[ { "title": "The Well: a Large-Scale Collection of Diverse Physics Simulations for Machine Learning", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97882", "id": "00Sx577BT3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=00Sx577BT3", "openreview": "https://openreview.net/forum?id=00Sx577BT3", "poster": "", "project": "", "author_site": "Ruben Ohana, Michael McCabe, Lucas Meyer, Rudy Morel, Fruzsina Agocs, Miguel Beneitez, Marsha Berger, Blakesly Burkhart, Stuart Dalziel, Drummond Fielding, Daniel Fortunato, Jared Goldberg, Keiya Hirashima, Yan-Fei Jiang, Rich Kerswell, Suryanarayana Maddu, Jonah Miller, Payel Mukhopadhyay, Stefan Nixon, Jeff Shen, Romain Watteaux, Bruno R\u00e9galdo-Saint Blancard, Liam Parker, Miles Cranmer, Shirley Ho", "tldr": "", "abstract": "Machine learning based surrogate models offer researchers powerful tools for accelerating simulation-based workflows. However, as standard datasets in this space often cover small classes of physical behavior, it can be difficult to evaluate the efficacy of new approaches. To address this gap, we introduce the Well: a large-scale collection of datasets containing numerical simulations of a wide variety of spatiotemporal physical systems. The Well draws from domain experts and numerical software developers to provide 15TB of data across 16 datasets covering diverse domains such as biological systems, fluid dynamics, acoustic scattering, as well as magneto-hydrodynamic simulations of extra-galactic fluids or supernova explosions. These datasets can be used individually or as part of a broader benchmark suite. To facilitate usage of the Well, we provide a unified PyTorch interface for training and evaluating models. We demonstrate the function of this library by introducing example baselines that highlight the new challenges posed by the complex dynamics of the Well. The code and data is available at https://github.com/PolymathicAI/the_well.", "keywords": "numerical simulations;fluid-dynamics;partial differential equations;operator learning", "primary_area": "", "supplementary_material": "", "author": "Ruben Ohana;Michael McCabe;Lucas Thibaut Meyer;Rudy Morel;Fruzsina Julia Agocs;Miguel Beneitez;Marsha Berger;Blakesley Burkhart;Stuart B. Dalziel;Drummond Buschman Fielding;Daniel Fortunato;Jared A. Goldberg;Keiya Hirashima;Yan-Fei Jiang;Rich Kerswell;Suryanarayana Maddu;Jonah M. Miller;Payel Mukhopadhyay;Stefan S. Nixon;Jeff Shen;Romain Watteaux;Bruno R\u00e9galdo-Saint Blancard;Fran\u00e7ois Rozet;Liam Holden Parker;Miles Cranmer;Shirley Ho", "authorids": "~Ruben_Ohana1;~Michael_McCabe2;~Lucas_Thibaut_Meyer1;~Rudy_Morel1;~Fruzsina_Julia_Agocs1;~Miguel_Beneitez1;~Marsha_Berger2;~Blakesley_Burkhart1;~Stuart_B._Dalziel1;~Drummond_Buschman_Fielding1;~Daniel_Fortunato1;~Jared_A._Goldberg1;~Keiya_Hirashima2;~Yan-Fei_Jiang1;~Rich_Kerswell1;~Suryanarayana_Maddu1;~Jonah_M._Miller1;~Payel_Mukhopadhyay1;~Stefan_S._Nixon1;~Jeff_Shen1;~Romain_Watteaux1;~Bruno_R\u00e9galdo-Saint_Blancard1;~Fran\u00e7ois_Rozet1;~Liam_Holden_Parker1;~Miles_Cranmer2;~Shirley_Ho2", "gender": ";M;M;M;F;;;F;M;;M;M;M;M;M;M;F;M;M;;;M;M;;;M", "homepage": "https://rubenohana.github.io/;https://mikemccabe210.github.io/;https://ltmeyer.github.io/;https://www.di.ens.fr/rudy.morel/;https://fruzsinaagocs.github.io;https://beneitez.github.io;;;;https://dfielding14.github.io;https://danfortunato.com;https://jaredagoldberg.wordpress.com/;;https://www.damtp.cam.ac.uk/user/rrk26/;https://www.simonsfoundation.org/people/suryanarayana-maddu/;https://www.thephysicsmill.com/;;https://www.damtp.cam.ac.uk/person/ssn34;https://jshen.net;;https://users.flatironinstitute.org/~bregaldosaintblancard/;https://francois-rozet.github.io/;;https://astroautomata.com/;https://www.shirleyho.space/;https://kyafuk.github.io/utokyo-hirashima/index.html", "dblp": "251/5608;56/706;354/3369;318/9215;;;;;;;;;;;;;;;;;;;;205/2493;162/2218;", "google_scholar": "https://scholar.google.fr/citations?user=F9qNg2wAAAAJ;SMXfsHYAAAAJ;;okiqdNoAAAAJ;JBg1Oj0AAAAJ;;LQlLjGIAAAAJ;;OJcK5CAAAAAJ;;T_Iq34oAAAAJ;HVPV8J4AAAAJ;;;https://scholar.google.de/citations?user=oL3QYLkAAAAJ;oQbwqPIAAAAJ;KtPHj74AAAAJ;;1A7poIcAAAAJ;https://scholar.google.fr/citations?user=l5jEj7oAAAAJ;TfcmfBQAAAAJ;C-WS1pwAAAAJ;CjYa0N4AAAAJ;10WfwCQAAAAJ;fhOi--4AAAAJ;https://scholar.google.co.jp/citations?hl=ja", "orcid": "0000-0002-8493-1210;0009-0007-8117-6110;;0000-0001-8739-7757;0000-0002-1763-5884;;;0000-0001-5817-5944;0000-0002-8487-2038;0000-0003-3806-8548;0000-0003-1302-7184;0000-0003-1012-3031;0000-0002-2624-3399;0000-0001-5460-5337;;0000-0001-6432-7860;;;0000-0001-6662-7306;0000-0001-9905-9261;0000-0003-0055-0953;0000-0002-8846-8761;;0000-0002-6458-3423;;0000-0002-1972-2674", "linkedin": "rubenohana/;mmccabe-210/;lucas-meyer-a7983b103/;rudy-morel-994a93159;;;;;;;;;;;;;payel-mukhopadhyay-5529b026b/;stefan-nixon-a3856b1a5/;;;;;liam-parker-96ba14160/;milescranmer/;;", "or_profile": "~Ruben_Ohana1;~Michael_McCabe2;~Lucas_Thibaut_Meyer1;~Rudy_Morel1;~Fruzsina_Julia_Agocs1;~Miguel_Beneitez1;~Marsha_Berger2;~Blakesley_Burkhart1;~Stuart_B._Dalziel1;~Drummond_Buschman_Fielding1;~Daniel_Fortunato1;~Jared_A._Goldberg1;~Yan-Fei_Jiang1;~Rich_Kerswell1;~Suryanarayana_Maddu1;~Jonah_M._Miller1;~Payel_Mukhopadhyay1;~Stefan_S._Nixon1;~Jeff_Shen1;~Romain_Watteaux1;~Bruno_R\u00e9galdo-Saint_Blancard1;~Fran\u00e7ois_Rozet1;~Liam_Holden_Parker1;~Miles_Cranmer2;~Shirley_Ho2;~KEIYA_HIRASHIMA1", "aff": "Flatiron Institute;University of Colorado, Boulder;INRIA;Flatiron Institute;Flatiron Institute;University of Cambridge;Flatiron Institute;Rutgers University;University of Cambridge;Simons Foundation;Flatiron Institute;Flatiron Institute;Simons Foundation;University of Cambridge;Flatiron Institute/Simons Foundation;Los Alamos National Laboratory;University of California, Berkeley;University of Cambridge;Princeton University;CEA;Flatiron Institute;Flatiron Institute;Flatiron Institute;University of Cambridge;Carnegie Mellon University;The University of Tokyo", "aff_domain": "flatironinstitute.org;colorado.edu;inria.fr;flatironinstitute.org;flatironinstitute.org;cam.ac.uk;flatironinstitute.org;rutgers.edu;cam.ac.uk;simonsfoundation.org;flatironinstitute.org;flatironinstitute.org;simonsfoundation.org;cam.ac.uk;simonsfoundation.org;lanl.gov;berkeley.edu;cam.ac.uk;princeton.edu;cea.fr;flatironinstitute.org;flatironinstitute.org;simonsfoundation.org;cam.ac.uk;cmu.edu;g.ecc.u-tokyo.ac.jp", "position": "Postdoc;PhD student;PhD student;Postdoc;Postdoc;Postdoc;Full Professor;Assistant Professor;Director of the GK Batchelor Laboratory;Postdoc;Researcher;Postdoc;Researcher;Full Professor;Postdoc;Researcher;Postdoc;PhD student;PhD student;Researcher;Postdoc;Intern;Researcher;Assistant Professor;Associate Professor;PhD student", "bibtex": "@inproceedings{\nohana2024the,\ntitle={The Well: a Large-Scale Collection of Diverse Physics Simulations for Machine Learning},\nauthor={Ruben Ohana and Michael McCabe and Lucas Thibaut Meyer and Rudy Morel and Fruzsina Julia Agocs and Miguel Beneitez and Marsha Berger and Blakesley Burkhart and Stuart B. Dalziel and Drummond Buschman Fielding and Daniel Fortunato and Jared A. Goldberg and Keiya Hirashima and Yan-Fei Jiang and Rich Kerswell and Suryanarayana Maddu and Jonah M. Miller and Payel Mukhopadhyay and Stefan S. Nixon and Jeff Shen and Romain Watteaux and Bruno R{\\'e}galdo-Saint Blancard and Fran{\\c{c}}ois Rozet and Liam Holden Parker and Miles Cranmer and Shirley Ho},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=00Sx577BT3}\n}", "github": "", "reviewers": "FPen;XnXV;b6xs", "pdf_size": 5250372, "rating": "6;7;7", "confidence": "3;4;4", "wc_summary_and_contributions": "64;174;153", "wc_strengths": "47;76;91", "wc_improvement": "71;54;215", "wc_limitations": "97;167;16", "wc_correctness": "35;10;12", "wc_clarity": "17;10;11", "wc_relation_to_prior_work": "54;23;34", "wc_documentation": "34;17;31", "wc_additional_feedback": "1;1;1", "wc_review": "420;532;564", "wc_reply_reviewers": "45;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;0;0", "reply_authors": "1;1;1", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 130.33333333333334, 47.68181577452305 ], "wc_strengths_avg": [ 71.33333333333333, 18.263503375736967 ], "wc_improvement_avg": [ 113.33333333333333, 72.22341879350645 ], "wc_limitations_avg": [ 93.33333333333333, 61.69999099585743 ], "wc_correctness_avg": [ 19.0, 11.343133018115703 ], "wc_clarity_avg": [ 12.666666666666666, 3.0912061651652345 ], "wc_relation_to_prior_work_avg": [ 37.0, 12.832251036613439 ], "wc_documentation_avg": [ 27.333333333333332, 7.408703590297623 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 505.3333333333333, 61.73779681488121 ], "wc_reply_reviewers_avg": [ 15.0, 21.213203435596427 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 26, 0 ], "corr_rating_confidence": 0.9999999999999997, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6811650367952338372&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "flatironinstitute.org;colorado.edu;inria.fr;flatironinstitute.org;flatironinstitute.org;cam.ac.uk;flatironinstitute.org;rutgers.edu;cam.ac.uk;simonsfoundation.org;flatironinstitute.org;flatironinstitute.org;simonsfoundation.org;cam.ac.uk;simonsfoundation.org;lanl.gov;berkeley.edu;cam.ac.uk;princeton.edu;cea.fr;flatironinstitute.org;flatironinstitute.org;simonsfoundation.org;cam.ac.uk;cmu.edu;g.ecc.u-tokyo.ac.jp", "author_num": 26, "aff_unique_index": "0;1;2;0;0;3;0;4;3;5;0;0;5;3;0;6;7;3;8;9;0;0;0;3;10;11", "aff_unique_norm": "Flatiron Institute;University of Colorado;INRIA;University of Cambridge;Rutgers University;Simons Foundation;Los Alamos National Laboratory;University of California, Berkeley;Princeton University;Commissariat \u00e0 l'\u00c9nergie Atomique et aux \u00c9nergies Alternatives;Carnegie Mellon University;University of Tokyo", "aff_unique_dep": ";;;;;;;;;;;", "aff_unique_url": "https://flatironinstitute.org;https://www.colorado.edu;https://www.inria.fr;https://www.cam.ac.uk;https://www.rutgers.edu;https://www.simonsfoundation.org;https://www.lanl.gov;https://www.berkeley.edu;https://www.princeton.edu;https://www cea fr;https://www.cmu.edu;https://www.u-tokyo.ac.jp", "aff_unique_abbr": "Flatiron;CU;INRIA;Cambridge;Rutgers;Simons Foundation;LANL;UC Berkeley;Princeton;CEA;CMU;UTokyo", "aff_campus_unique_index": "1;2;2;2;3;2;2", "aff_campus_unique": ";Boulder;Cambridge;Berkeley", "aff_country_unique_index": "0;0;1;0;0;2;0;0;2;0;0;0;0;2;0;0;0;2;0;1;0;0;0;2;0;3", "aff_country_unique": "United States;France;United Kingdom;Japan" }, { "title": "On the Noise Robustness of In-Context Learning for Text Generation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96962", "id": "00uVk06eVK", "proceeding": "", "pdf": "https://openreview.net/pdf?id=00uVk06eVK", "openreview": "https://openreview.net/forum?id=00uVk06eVK", "poster": "/media/PosterPDFs/NeurIPS%202024/96962.png?t=1731587881.4159098", "project": "", "author_site": "hongfu gao, Feipeng Zhang, Wenyu Jiang, Jun Shu, Feng Zheng, Hongxin Wei", "tldr": "", "abstract": "Large language models (LLMs) have shown impressive performance on downstream tasks by in-context learning (ICL), which heavily relies on the quality of demonstrations selected from a large set of annotated examples. Recent works claim that in-context learning is robust to noisy demonstrations in text classification. In this work, we show that, on text generation tasks, noisy annotations significantly hurt the performance of in-context learning. To circumvent the issue, we propose a simple and effective approach called Local Perplexity Ranking (LPR), which replaces the \"noisy\" candidates with their nearest neighbors that are more likely to be clean. Our method is motivated by analyzing the perplexity deviation caused by noisy labels and decomposing perplexity into inherent perplexity and matching perplexity. Our key idea behind LPR is thus to decouple the matching perplexity by performing the ranking among the neighbors in semantic space. Our approach can prevent the selected demonstrations from including mismatched input-label pairs while preserving the effectiveness of the original selection methods. Extensive experiments demonstrate the effectiveness of LPR, improving the EM score by up to 18.75 on common benchmarks with noisy annotations.", "keywords": "In-Context Learning;Large Language Models;Noise Robustness", "primary_area": "generative_models", "supplementary_material": "/attachment/a2350413dc2d4fc2b1a285d9eb939fb49172cac0.zip", "author": "Hongfu Gao;Feipeng Zhang;Wenyu Jiang;Jun Shu;Feng Zheng;Hongxin Wei", "authorids": "~Hongfu_Gao2;~Feipeng_Zhang1;~Wenyu_Jiang1;~Jun_Shu1;~Feng_Zheng1;~Hongxin_Wei1", "gender": "M;;M;M;M;M", "homepage": ";http://sef.xjtu.edu.cn/info/1086/16801.htm;;https://gr.xjtu.edu.cn/en/web/junshu/home;http://faculty.sustech.edu.cn/fengzheng/;https://hongxin001.github.io/", "dblp": "378/1402;;;;39/800;150/6350", "google_scholar": "sCFjqwsAAAAJ;;mdOH9HQAAAAJ;https://scholar.google.com.hk/citations?user=qnDOEV4AAAAJ;PcmyXHMAAAAJ;cABH034AAAAJ", "orcid": ";0000-0002-8110-5837;;;0000-0002-1701-9141;", "linkedin": ";;;;;", "or_profile": "~Hongfu_Gao2;~Feipeng_Zhang1;~Wenyu_Jiang1;~Jun_Shu1;~Feng_Zheng1;~Hongxin_Wei1", "aff": "Xi'an Jiaotong University;Xi'an Jiaotong University;Nanjing University;Xi'an Jiaotong University;Southern University of Science and Technology;Southern University of Science and Technology", "aff_domain": "xjtu.edu.cn;xjtu.edu.cn;nju.edu.cn;xjtu.edu.cn;sustech.edu.cn;sustech.edu.cn", "position": "MS student;Full Professor;PhD student;Associate Professor;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\ngao2024on,\ntitle={On the Noise Robustness of In-Context Learning for Text Generation},\nauthor={Hongfu Gao and Feipeng Zhang and Wenyu Jiang and Jun Shu and Feng Zheng and Hongxin Wei},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=00uVk06eVK}\n}", "github": "", "reviewers": "Fz4p;DyDA;tEJo", "pdf_size": 703562, "rating": "5;7;7", "confidence": "3;3;3", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "3;3;4", "wc_summary": "132;85;90", "wc_strengths": "37;33;105", "wc_weaknesses": "191;113;110", "wc_questions": "92;134;52", "wc_limitations": "13;49;26", "wc_review": "465;414;383", "wc_reply_reviewers": "29;0;15", "wc_reply_authors": "32;0;28", "reply_reviewers": "1;0;1", "reply_authors": "2;1;2", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 102.33333333333333, 21.076579946049648 ], "wc_strengths_avg": [ 58.333333333333336, 33.03869784897031 ], "wc_weaknesses_avg": [ 138.0, 37.49666651850535 ], "wc_questions_avg": [ 92.66666666666667, 33.47967874530592 ], "wc_limitations_avg": [ 29.333333333333332, 14.884742374510738 ], "wc_review_avg": [ 420.6666666666667, 33.8066397160216 ], "wc_reply_reviewers_avg": [ 14.666666666666666, 11.841546445554407 ], "wc_reply_authors_avg": [ 20.0, 14.236104336041748 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11214355754583606365&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "xjtu.edu.cn;xjtu.edu.cn;nju.edu.cn;xjtu.edu.cn;sustech.edu.cn;sustech.edu.cn", "author_num": 6, "aff_unique_index": "0;0;1;0;2;2", "aff_unique_norm": "Xi'an Jiao Tong University;Nanjing University;Southern University of Science and Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.xjtu.edu.cn;https://www.nju.edu.cn;https://www.sustech.edu.cn", "aff_unique_abbr": "XJTU;Nanjing U;SUSTech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "01I55gys19", "title": "Few-Class Arena: A Benchmark for Efficient Vision Model Selection and Dataset Difficulty", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "A wide variety of benchmark datasets with many classes (80-1000) have been created to assist Computer Vision architectural evolution. An increasing number of vision models are evaluated with these many-class datasets. However, real-world applications often involve substantially fewer classes of interest (2-10). This gap between many and few classes makes it difficult to predict performance of the few-class applications using the available many-class datasets. To date, little has been offered to evaluate models in this Few-Class Regime. We propose Few-Class Arena (FCA), as a unified benchmark with focus on testing efficient image classification models for few classes. We conduct a systematic evaluation of the ResNet family trained on ImageNet subsets from 2 to 1000 classes, and test a wide spectrum of Convolutional Neural Networks and Transformer architectures over ten datasets by using our newly proposed FCA tool. Furthermore, to aid an up-front assessment of dataset difficulty and a more efficient selection of models, we incorporate a difficulty measure as a function of class similarity. FCA offers a new tool for efficient machine learning in the Few-Class Regime, with goals ranging from new efficient similarity proposal, lightweight model architecture design to new scaling law discovery. FCA is user-friendly and can be easily extended to new models and datasets, facilitating future research work. Our benchmark is available at https://github.com/fewclassarena/fca.", "keywords": "few-class;efficiency;neural network;convolutional neural network;transformer;dataset difficulty measurement;scaling law", "primary_area": "", "supplementary_material": "/attachment/c5b5ad02993228ad0ff038ccfe9ae11dd32b9f3e.zip", "author": "Bryan Bo Cao;Lawrence O'Gorman;Michael Coss;Shubham Jain", "authorids": "~Bryan_Bo_Cao1;~Lawrence_O'Gorman2;~Michael_Coss1;~Shubham_Jain3", "gender": "M;M;M;F", "homepage": ";https://www.bell-labs.com/about/researcher-profiles/larryogorman/;https://www.bell-labs.com;http:\\\\shubhamjain.net", "dblp": ";09/4496;;", "google_scholar": "czcKhLAAAAAJ;NzRdk7UAAAAJ;;P9Z011YAAAAJ", "orcid": "0000-0002-3425-5058;;;", "linkedin": "https://linkedin.com/in/bryanbocao;logorman/;www.linkedin.com/in/mjcoss;", "or_profile": "~Bryan_Bo_Cao1;~Lawrence_O'Gorman2;~Michael_Coss1;~Shubham_Jain3", "aff": "Nokia networks GmbH;;;State University of New York at Stony Brook", "aff_domain": "nokia-bell-labs.com;;;cs.stonybrook.edu", "position": "Intern;;;Assistant Professor", "bibtex": "@misc{\nanonymous2024fewclass,\ntitle={Few-Class Arena: A Benchmark for Efficient Vision Model Selection and Dataset Difficulty},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=01I55gys19}\n}", "github": "", "project": "", "reviewers": "zZVd;C8Y1;knHF", "site": "https://openreview.net/forum?id=01I55gys19", "pdf_size": 487777, "rating": "5;5;5", "confidence": "4;3;3", "wc_summary_and_contributions": "161;36;127", "wc_strengths": "92;18;79", "wc_improvement": "55;19;109", "wc_limitations": "239;29;16", "wc_correctness": "44;19;11", "wc_clarity": "66;1;5", "wc_relation_to_prior_work": "64;1;8", "wc_documentation": "38;1;8", "wc_additional_feedback": "1;1;1", "wc_review": "760;125;364", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "1380;58;0", "reply_reviewers": "0;0;0", "reply_authors": "3;2;2", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 108.0, 52.76994093863159 ], "wc_strengths_avg": [ 63.0, 32.25936556516056 ], "wc_improvement_avg": [ 61.0, 36.98648401781386 ], "wc_limitations_avg": [ 94.66666666666667, 102.19697755913441 ], "wc_correctness_avg": [ 24.666666666666668, 14.055445761538676 ], "wc_clarity_avg": [ 24.0, 29.743346594938952 ], "wc_relation_to_prior_work_avg": [ 24.333333333333332, 28.193773938387338 ], "wc_documentation_avg": [ 15.666666666666666, 16.048537489614297 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 416.3333333333333, 261.86552952909926 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 479.3333333333333, 637.3075308584458 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:sW6Nz9lA8Y8J:scholar.google.com/&scioq=Few-Class+Arena:+A+Benchmark+for+Efficient+Vision+Model+Selection+and+Dataset+Difficulty&hl=en&as_sdt=0,48", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Nokia Networks;State University of New York at Stony Brook", "aff_unique_dep": ";", "aff_unique_url": "https://networks.nokia.com;https://www.stonybrook.edu", "aff_unique_abbr": "Nokia;SUNY Stony Brook", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stony Brook", "aff_country_unique_index": "0;1", "aff_country_unique": "Germany;United States" }, { "title": "Testing Calibration in Nearly-Linear Time", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96961", "id": "01XV5Za56k", "proceeding": "", "pdf": "https://openreview.net/pdf?id=01XV5Za56k", "openreview": "https://openreview.net/forum?id=01XV5Za56k", "poster": "", "project": "", "author_site": "Lunjia Hu, Arun Jambulapati, Kevin Tian, Chutong Yang", "tldr": "", "abstract": "In the recent literature on machine learning and decision making, calibration has emerged as a desirable and widely-studied statistical property of the outputs of binary prediction models. However, the algorithmic aspects of measuring model calibration have remained relatively less well-explored. Motivated by Blasiok et al '23, which proposed a rigorous framework for measuring distances to calibration, we initiate the algorithmic study of calibration through the lens of property testing. We define the problem of calibration testing from samples where given $n$ draws from a distribution $\\mathcal{D}$ on $(\\text{predictions}, \\text{binary outcomes})$, our goal is to distinguish between the cases where $\\mathcal{D}$ is perfectly calibrated or $\\epsilon$-far from calibration. We make the simple observation that the empirical smooth calibration linear program can be reformulated as an instance of minimum-cost flow on a highly-structured graph, and design an exact dynamic programming-based solver for it which runs in time $O(n\\log^2(n))$, and solves the calibration testing problem information-theoretically optimally in the same time. This improves upon state-of-the-art black-box linear program solvers requiring $\\Omega(n^\\omega)$ time, where $\\omega > 2$ is the exponent of matrix multiplication. We also develop algorithms for tolerant variants of our testing problem improving upon black-box linear program solvers, and give sample complexity lower bounds for alternative calibration measures to the one considered in this work. Finally, we present experiments showing the testing problem we define faithfully captures standard notions of calibration, and that our algorithms scale efficiently to accommodate large sample sizes.", "keywords": "Calibration;Property testing;Linear programming", "primary_area": "interpretability_and_explainability", "supplementary_material": "/attachment/1a86894ba15bd5f3e4ae63c1876fb8590f0b0fd3.zip", "author": "Lunjia Hu;Arun Jambulapati;Kevin Tian;Chutong Yang", "authorids": "~Lunjia_Hu1;~Arun_Jambulapati1;~Kevin_Tian4;~Chutong_Yang1", "gender": "M;;;M", "homepage": "https://lunjiahu.com;;https://kjtian.github.io;https://chutongyang98.github.io/", "dblp": "195/6273;209/9910.html;;241/1151", "google_scholar": "ss7CIgcAAAAJ;;;9BBSgO4AAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Lunjia_Hu1;~Arun_Jambulapati1;~Kevin_Tian4;~Chutong_Yang1", "aff": "Stanford University;Stanford University;University of Texas at Austin;University of Texas at Austin", "aff_domain": "stanford.edu;stanford.edu;utexas.edu;cs.utexas.edu", "position": "PhD student;PhD student;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nhu2024testing,\ntitle={Testing Calibration in Nearly-Linear Time},\nauthor={Lunjia Hu and Arun Jambulapati and Kevin Tian and Chutong Yang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=01XV5Za56k}\n}", "github": "", "reviewers": "q4wr;F42r;nmkM;rwaD", "pdf_size": 1088845, "rating": "5;6;7;7", "confidence": "2;4;4;3", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "18;206;252;38", "wc_strengths": "133;35;138;107", "wc_weaknesses": "49;146;67;167", "wc_questions": "126;84;121;87", "wc_limitations": "21;1;1;13", "wc_review": "347;472;579;412", "wc_reply_reviewers": "45;10;14;23", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 128.5, 102.05268247331865 ], "wc_strengths_avg": [ 103.25, 41.12405014100629 ], "wc_weaknesses_avg": [ 107.25, 50.21142798208392 ], "wc_questions_avg": [ 104.5, 19.11151485361639 ], "wc_limitations_avg": [ 9.0, 8.48528137423857 ], "wc_review_avg": [ 452.5, 85.37124808739767 ], "wc_reply_reviewers_avg": [ 23.0, 13.546217184144066 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.6363636363636364, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3532466433920811839&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "stanford.edu;stanford.edu;utexas.edu;cs.utexas.edu", "author_num": 4, "aff_unique_index": "0;0;1;1", "aff_unique_norm": "Stanford University;University of Texas at Austin", "aff_unique_dep": ";", "aff_unique_url": "https://www.stanford.edu;https://www.utexas.edu", "aff_unique_abbr": "Stanford;UT Austin", "aff_campus_unique_index": "0;0;1;1", "aff_campus_unique": "Stanford;Austin", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "GLBench: A Comprehensive Benchmark for Graph with Large Language Models", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97881", "id": "01lhHg8H9p", "proceeding": "", "pdf": "https://openreview.net/pdf?id=01lhHg8H9p", "openreview": "https://openreview.net/forum?id=01lhHg8H9p", "poster": "/media/PosterPDFs/NeurIPS%202024/97881.png?t=1731478194.2539835", "project": "", "author_site": "Yuhan Li, Peisong Wang, Xiao Zhu, Aochuan Chen, Haiyun Jiang, Deng Cai, Wai Kin (Victor) Chan, Jia Li", "tldr": "", "abstract": "The emergence of large language models (LLMs) has revolutionized the way we interact with graphs, leading to a new paradigm called GraphLLM. Despite the rapid development of GraphLLM methods in recent years, the progress and understanding of this field remain unclear due to the lack of a benchmark with consistent experimental protocols. To bridge this gap, we introduce GLBench, the first comprehensive benchmark for evaluating GraphLLM methods in both supervised and zero-shot scenarios. GLBench provides a fair and thorough evaluation of different categories of GraphLLM methods, along with traditional baselines such as graph neural networks. Through extensive experiments on a collection of real-world datasets with consistent data processing and splitting strategies, we have uncovered several key findings. Firstly, GraphLLM methods outperform traditional baselines in supervised settings, with LLM-as-enhancers showing the most robust performance. However, using LLMs as predictors is less effective and often leads to uncontrollable output issues. We also notice that no clear scaling laws exist for current GraphLLM methods. In addition, both structures and semantics are crucial for effective zero-shot transfer, and our proposed simple baseline can even outperform several models tailored for zero-shot scenarios. The data and code of the benchmark can be found at https://github.com/NineAbyss/GLBench.", "keywords": "Graph learning;Large Language Model;Zero-Shot", "primary_area": "", "supplementary_material": "", "author": "Yuhan Li;Peisong Wang;Xiao Zhu;Aochuan Chen;Haiyun Jiang;Deng Cai;Victor Wai Kin Chan;Jia Li", "authorids": "~Yuhan_Li3;~Peisong_Wang5;~Xiao_Zhu4;~Aochuan_Chen1;~Haiyun_Jiang1;~Deng_Cai1;~Victor_Wai_Kin_Chan1;~Jia_Li4", "gender": "M;M;M;M;M;M;Not Specified;M", "homepage": "https://scholar.google.com/citations?user=c8DzpkAAAAAJ&hl=zh-CN;http://peisongw.github.io;https://github.com/HexagonStar;https://scholar.google.com/citations?hl=en&view_op=list_works&gmla=AJsN-F6N4cEX-_kViGgRpnUVo_iBHlVXwMpnhlyB-Cdrndwj6B0jaDy088r7K9gHPGqSwsQ9tNxpijGpb1IoIB2B5KVS3Scvtdz9Mt_WR9GSou_saurFpSA&user=7pY-Ie8AAAAJ;;https://jcyk.github.io/;https://www.sigs.tsinghua.edu.cn/cwj/main.htm;https://sites.google.com/view/lijia", "dblp": "116/8661-1;;;331/2356;;c/DCai-2;60/4361.html;23/6950-9", "google_scholar": "c8DzpkAAAAAJ;1pwiTqIAAAAJ;X8b1RoYAAAAJ;https://scholar.google.com/citations?hl=en;fk684xEAAAAJ;KpbRLYcAAAAJ;tmYLtDgAAAAJ;1gSbcYoAAAAJ", "orcid": "0000-0003-1324-5819;;;0009-0002-2300-1498;;;0000-0002-7202-1922;0000-0002-6362-4385", "linkedin": ";;;;;;;", "or_profile": "~Yuhan_Li3;~Peisong_Wang5;~Xiao_Zhu4;~Aochuan_Chen1;~Haiyun_Jiang1;~Deng_Cai1;~Victor_Wai_Kin_Chan1;~Jia_Li4", "aff": "Hong Kong University of Science and Technology;Tsinghua University;Westlake University;Hong Kong University of Science and Technology;Tencent AI Lab;Tencent AI Lab;Shenzhen International Graduate School, Tsinghua University;Hong Kong University of Science and Technology (Guangzhou)", "aff_domain": "hkust.edu;mails.tsinghua.edu.cn;westlake.edu.cn;ust.hk;tencent.com;tencent.com;tsinghua.edu.cn;ust.hk", "position": "PhD student;MS student;Intern;PhD student;Researcher;Research Scientist;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nli2024glbench,\ntitle={{GLB}ench: A Comprehensive Benchmark for Graph with Large Language Models},\nauthor={Yuhan Li and Peisong Wang and Xiao Zhu and Aochuan Chen and Haiyun Jiang and Deng Cai and Victor Wai Kin Chan and Jia Li},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=01lhHg8H9p}\n}", "github": "", "reviewers": "1oX9;eYMz;rfpz;n5o4", "pdf_size": 659296, "rating": "6;6;7;8", "confidence": "4;4;4;4", "wc_summary_and_contributions": "42;263;120;70", "wc_strengths": "2;9;109;17", "wc_improvement": "2;23;175;28", "wc_limitations": "1;1;10;8", "wc_correctness": "2;7;11;1", "wc_clarity": "1;9;6;1", "wc_relation_to_prior_work": "1;7;46;9", "wc_documentation": "1;13;92;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "53;333;570;136", "wc_reply_reviewers": "0;0;72;0", "wc_reply_authors": "558;462;27;0", "reply_reviewers": "0;0;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.0 ], "wc_summary_and_contributions_avg": [ 123.75, 85.11279281048179 ], "wc_strengths_avg": [ 34.25, 43.48203652084387 ], "wc_improvement_avg": [ 57.0, 68.82223477917583 ], "wc_limitations_avg": [ 5.0, 4.06201920231798 ], "wc_correctness_avg": [ 5.25, 4.02336923485777 ], "wc_clarity_avg": [ 4.25, 3.418698582794336 ], "wc_relation_to_prior_work_avg": [ 15.75, 17.711225254058512 ], "wc_documentation_avg": [ 26.75, 37.989307706248084 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 273.0, 199.36022672539275 ], "wc_reply_reviewers_avg": [ 18.0, 31.176914536239792 ], "wc_reply_authors_avg": [ 261.75, 250.74127601972515 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5421286803653684142&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "hkust.edu;mails.tsinghua.edu.cn;westlake.edu.cn;ust.hk;tencent.com;tencent.com;tsinghua.edu.cn;ust.hk", "author_num": 8, "aff_unique_index": "0;1;2;0;3;3;1;0", "aff_unique_norm": "Hong Kong University of Science and Technology;Tsinghua University;Westlake University;Tencent", "aff_unique_dep": ";;;Tencent AI Lab", "aff_unique_url": "https://www.ust.hk;https://www.tsinghua.edu.cn;https://www.westlake.edu.cn;https://ai.tencent.com", "aff_unique_abbr": "HKUST;THU;WU;Tencent AI Lab", "aff_campus_unique_index": "0;0;2;0", "aff_campus_unique": "Hong Kong SAR;;Shenzhen", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Bridge the Modality and Capability Gaps in Vision-Language Model Selection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96960", "id": "01qa1ZJs65", "proceeding": "", "pdf": "https://openreview.net/pdf?id=01qa1ZJs65", "openreview": "https://openreview.net/forum?id=01qa1ZJs65", "poster": "/media/PosterPDFs/NeurIPS%202024/96960.png?t=1731123927.9174762", "project": "", "author_site": "Chao Yi, Yuhang He, De-Chuan Zhan, Han-Jia Ye", "tldr": "", "abstract": "Vision Language Models (VLMs) excel in zero-shot image classification by pairing images with textual category names. The expanding variety of Pre-Trained VLMs enhances the likelihood of identifying a suitable VLM for specific tasks. To better reuse the VLM resource and fully leverage its potential on different zero-shot image classification tasks, a promising strategy is selecting appropriate Pre-Trained VLMs from the VLM Zoo, relying solely on the text data of the target dataset without access to the dataset\u2019s images. In this paper, we analyze two inherent challenges in assessing the ability of a VLM in this Language-Only VLM selection: the \u201cModality Gap\u201d\u2014the disparity in VLM\u2019s embeddings across two different modalities, making text a less reliable substitute for images; and the \u201cCapability Gap\u201d\u2014 the discrepancy between the VLM\u2019s overall ranking and its ranking for target dataset, hindering direct prediction of a model\u2019s dataset-specific performance from its general performance. We propose VLM Selection With gAp Bridging (SWAB) to mitigate the negative impact of two gaps. SWAB first adopts optimal transport to capture the relevance between open-source and target datasets with a transportation matrix. It then uses this matrix to transfer useful statistics of VLMs from open-source datasets to the target dataset for bridging two gaps. By bridging two gaps to obtain better substitutes for test images, SWAB can accurately predict the performance ranking of different VLMs on the target task without the need for the dataset\u2019s images. Experiments across various VLMs and image classification datasets validate SWAB\u2019s effectiveness. Code is available at: https://github.com/YCaigogogo/SWAB.", "keywords": "Vision-Language Model;Model Selection;Zero-Shot Image Classification", "primary_area": "machine_vision", "supplementary_material": "", "author": "Chao Yi;Yuhang He;De-Chuan Zhan;Han-Jia Ye", "authorids": "~Chao_Yi1;~Yuhang_He9;~De-Chuan_Zhan1;~Han-Jia_Ye1", "gender": "M;M;M;M", "homepage": "http://www.lamda.nju.edu.cn/yic/;;http://www.lamda.nju.edu.cn/zhandc/;http://www.lamda.nju.edu.cn/yehj", "dblp": "159/8631;;74/498;165/3014", "google_scholar": ";;mYJf4TcAAAAJ;mgOYhtoAAAAJ", "orcid": ";0009-0004-2524-1270;0000-0002-3533-2078;", "linkedin": ";;;", "or_profile": "~Chao_Yi1;~Yuhang_He9;~De-Chuan_Zhan1;~Han-Jia_Ye1", "aff": "Nanjing University;Nanjing University;Nanjing University;Nanjing University", "aff_domain": "nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn", "position": "MS student;Undergrad student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nyi2024bridge,\ntitle={Bridge the Modality and Capability Gaps in Vision-Language Model Selection},\nauthor={Chao Yi and Yuhang He and De-Chuan Zhan and Han-Jia Ye},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=01qa1ZJs65}\n}", "github": "", "reviewers": "omtt;mpun;NUg2;qHTS", "pdf_size": 0, "rating": "4;5;5;7", "confidence": "4;4;3;5", "soundness": "2;2;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "116;118;31;93", "wc_strengths": "75;114;14;342", "wc_weaknesses": "166;96;55;46", "wc_questions": "40;26;36;4", "wc_limitations": "40;30;1;1", "wc_review": "437;384;137;486", "wc_reply_reviewers": "87;31;86;9", "wc_reply_authors": "567;203;391;963", "reply_reviewers": "1;1;2;1", "reply_authors": "6;5;5;6", "rating_avg": [ 5.25, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 89.5, 35.174564673923115 ], "wc_strengths_avg": [ 136.25, 124.02091557475295 ], "wc_weaknesses_avg": [ 90.75, 47.35702165466068 ], "wc_questions_avg": [ 26.5, 13.955285736952863 ], "wc_limitations_avg": [ 18.0, 17.363755354185336 ], "wc_review_avg": [ 361.0, 134.26280199668113 ], "wc_reply_reviewers_avg": [ 53.25, 34.14948755105997 ], "wc_reply_authors_avg": [ 531.0, 280.6706254669341 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 5.5, 0.5 ], "replies_avg": [ 36, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.6488856845230502, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1250678210065714174&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Nanjing University", "aff_unique_dep": "", "aff_unique_url": "https://www.nju.edu.cn", "aff_unique_abbr": "Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "FreqMark: Invisible Image Watermarking via Frequency Based Optimization in Latent Space", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96959", "id": "01s5ODIHKd", "proceeding": "", "pdf": "https://openreview.net/pdf?id=01s5ODIHKd", "openreview": "https://openreview.net/forum?id=01s5ODIHKd", "poster": "/media/PosterPDFs/NeurIPS%202024/96959.png?t=1730176108.8718698", "project": "", "author_site": "YiYang Guo, Ruizhe Li, Mude Hui, Hanzhong Guo, Chen Zhang, Chuangjian Cai, Le Wan, shangfei wang", "tldr": "", "abstract": "Invisible watermarking is essential for safeguarding digital content, enabling copyright protection and content authentication. \nHowever, existing watermarking methods fall short in robustness against regeneration attacks.\nIn this paper, we propose a novel method called FreqMark that involves unconstrained optimization of the image latent frequency space obtained after VAE encoding. Specifically, FreqMark embeds the watermark by optimizing the latent frequency space of the images and then extracts the watermark through a pre-trained image encoder. This optimization allows a flexible trade-off between image quality with watermark robustness and effectively resists regeneration attacks.\nExperimental results demonstrate that FreqMark offers significant advantages in image quality and robustness, permits flexible selection of the encoding bit number, and achieves a bit accuracy exceeding 90\\% when encoding a 48-bit hidden message under various attack scenarios.", "keywords": "deep watermarking;latent frequency optimization", "primary_area": "privacy", "supplementary_material": "", "author": "YiYang Guo;Ruizhe Li;Mude Hui;Hanzhong Allan Guo;Chen Zhang;Chuangjian Cai;Le Wan;Shangfei Wang", "authorids": "~YiYang_Guo2;~Ruizhe_Li3;~Mude_Hui2;~Hanzhong_Allan_Guo1;~Chen_Zhang22;~Chuangjian_Cai2;~Le_Wan1;~Shangfei_Wang1", "gender": "M;;M;M;;M;;F", "homepage": "https://github.com/Hemsworth;;https://github.com/TheFllood;;;https://www.researchgate.net/profile/Chuangjian-Cai;https://github.com/vinowan;", "dblp": ";;342/2735;;;;;15/2254", "google_scholar": ";;Jzz6BooAAAAJ;;;;;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";;;;;;;", "linkedin": ";;;hanzhong-guo-19965b1a5/;;;;", "or_profile": "~YiYang_Guo2;~Ruizhe_Li3;~Mude_Hui2;~Hanzhong_Allan_Guo1;~Chen_Zhang22;~Chuangjian_Cai2;~Le_Wan1;~Shangfei_Wang1", "aff": "University of Science and Technology of China;;University of California, Santa Cruz;Renmin University of China;;Tsinghua University;;University of Science and Technology of China", "aff_domain": "mail.ustc.edu.cn;;ucsc.edu;ruc.edu.cn;;tsinghua.edu.cn;;ustc.edu.cn", "position": "MS student;;PhD student;MS student;;PhD student;;Full Professor", "bibtex": "@inproceedings{\nguo2024freqmark,\ntitle={FreqMark: Invisible Image Watermarking via Frequency Based Optimization in Latent Space},\nauthor={YiYang Guo and Ruizhe Li and Mude Hui and Hanzhong Allan Guo and Chen Zhang and Chuangjian Cai and Le Wan and Shangfei Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=01s5ODIHKd}\n}", "github": "", "reviewers": "BgZJ;TJux;kwvu;BZ7h", "pdf_size": 19849124, "rating": "4;6;6;6", "confidence": "3;4;4;4", "soundness": "3;3;3;3", "novelty": "2;3;3;1", "presentation": "2;3;3;3", "wc_summary": "47;76;38;100", "wc_strengths": "30;118;25;38", "wc_weaknesses": "133;135;186;507", "wc_questions": "19;62;4;15", "wc_limitations": "8;33;9;2", "wc_review": "237;424;262;662", "wc_reply_reviewers": "59;37;13;212", "wc_reply_authors": "217;19;7;347", "reply_reviewers": "1;1;1;2", "reply_authors": "2;2;2;3", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 65.25, 24.488517717493643 ], "wc_strengths_avg": [ 52.75, 37.95638944894522 ], "wc_weaknesses_avg": [ 240.25, 155.46603326772058 ], "wc_questions_avg": [ 25.0, 22.056745000112777 ], "wc_limitations_avg": [ 13.0, 11.853269591129697 ], "wc_review_avg": [ 396.25, 169.3935875409692 ], "wc_reply_reviewers_avg": [ 80.25, 77.78616522235815 ], "wc_reply_authors_avg": [ 147.5, 142.19968354395166 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:n0KsfbK94HsJ:scholar.google.com/&scioq=FreqMark:+Invisible+Image+Watermarking+via+Frequency+Based+Optimization+in+Latent+Space&hl=en&as_sdt=0,48", "gs_version_total": 3, "email": "mail.ustc.edu.cn;;ucsc.edu;ruc.edu.cn;;tsinghua.edu.cn;;ustc.edu.cn", "author_num": 8, "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "University of Science and Technology of China;University of California, Santa Cruz;Renmin University of China;Tsinghua University", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.ustc.edu.cn;https://www.ucsc.edu;http://www.ruc.edu.cn;https://www.tsinghua.edu.cn", "aff_unique_abbr": "USTC;UCSC;RUC;THU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Santa Cruz", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "China;United States" }, { "title": "PointAD: Comprehending 3D Anomalies from Points and Pixels for Zero-shot 3D Anomaly Detection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96958", "id": "02CIZ8qeDc", "proceeding": "", "pdf": "https://openreview.net/pdf?id=02CIZ8qeDc", "openreview": "https://openreview.net/forum?id=02CIZ8qeDc", "poster": "/media/PosterPDFs/NeurIPS%202024/96958.png?t=1731476864.9817398", "project": "", "author_site": "Qihang Zhou, Jiangtao Yan, Shibo He, Wenchao Meng, Jiming Chen", "tldr": "", "abstract": "Zero-shot (ZS) 3D anomaly detection is a crucial yet unexplored field that addresses scenarios where target 3D training samples are unavailable due to practical concerns like privacy protection. This paper introduces PointAD, a novel approach that transfers the strong generalization capabilities of CLIP for recognizing 3D anomalies on unseen objects. PointAD provides a unified framework to comprehend 3D anomalies from both points and pixels. In this framework, PointAD renders 3D anomalies into multiple 2D renderings and projects them back into 3D space. To capture the generic anomaly semantics into PointAD, we propose hybrid representation learning that optimizes the learnable text prompts from 3D and 2D through auxiliary point clouds. The collaboration optimization between point and pixel representations jointly facilitates our model to grasp underlying 3D anomaly patterns, contributing to detecting and segmenting anomalies of unseen diverse 3D objects. Through the alignment of 3D and 2D space, our model can directly integrate RGB information, further enhancing the understanding of 3D anomalies in a plug-and-play manner. Extensive experiments show the superiority of PointAD in ZS 3D anomaly detection across diverse unseen objects.", "keywords": "3D anomaly detection;Zero-shot 3D anomaly detection", "primary_area": "machine_vision", "supplementary_material": "", "author": "Qihang Zhou;Jiangtao Yan;Shibo He;Wenchao Meng;Jiming Chen", "authorids": "~Qihang_Zhou1;~Jiangtao_Yan2;~Shibo_He1;~Wenchao_Meng1;~Jiming_Chen1", "gender": "M;M;M;M;M", "homepage": "https://github.com/zqhang;;https://person.zju.edu.cn/en/shibohe;https://person.zju.edu.cn/wmeng;", "dblp": ";;07/7178;125/5619;55/2484-1.html", "google_scholar": "https://scholar.google.com.hk/citations?user=mkGKMDQAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=zh-CN;;zK9tvo8AAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Qihang_Zhou1;~Jiangtao_Yan2;~Shibo_He1;~Wenchao_Meng1;~Jiming_Chen1", "aff": "Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn", "position": "PhD student;MS student;Full Professor;Professor;Full Professor", "bibtex": "@inproceedings{\nzhou2024pointad,\ntitle={Point{AD}: Comprehending 3D Anomalies from Points and Pixels for Zero-shot 3D Anomaly Detection},\nauthor={Qihang Zhou and Jiangtao Yan and Shibo He and Wenchao Meng and Jiming Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=02CIZ8qeDc}\n}", "github": "", "reviewers": "dthJ;GgEU;1QVN", "pdf_size": 5040028, "rating": "5;6;7", "confidence": "3;5;3", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "103;94;63", "wc_strengths": "66;135;121", "wc_weaknesses": "67;520;206", "wc_questions": "85;142;2", "wc_limitations": "39;21;17", "wc_review": "360;912;409", "wc_reply_reviewers": "0;148;15", "wc_reply_authors": "16;783;34", "reply_reviewers": "0;1;1", "reply_authors": "2;3;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 86.66666666666667, 17.13346303452853 ], "wc_strengths_avg": [ 107.33333333333333, 29.780679792256066 ], "wc_weaknesses_avg": [ 264.3333333333333, 189.48057654780578 ], "wc_questions_avg": [ 76.33333333333333, 57.48236444529941 ], "wc_limitations_avg": [ 25.666666666666668, 9.568466729604882 ], "wc_review_avg": [ 560.3333333333334, 249.46921431088222 ], "wc_reply_reviewers_avg": [ 54.333333333333336, 66.51482708556208 ], "wc_reply_authors_avg": [ 277.6666666666667, 357.4001803145724 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10166410199399650280&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Zhejiang University", "aff_unique_dep": "", "aff_unique_url": "https://www.zju.edu.cn", "aff_unique_abbr": "ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Voxel Proposal Network via Multi-Frame Knowledge Distillation for Semantic Scene Completion", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96957", "id": "02HWT9c4Lp", "proceeding": "", "pdf": "https://openreview.net/pdf?id=02HWT9c4Lp", "openreview": "https://openreview.net/forum?id=02HWT9c4Lp", "poster": "/media/PosterPDFs/NeurIPS%202024/96957.png?t=1731159292.6143699", "project": "", "author_site": "Lubo Wang, Di Lin, Kairui Yang, Ruonan Liu, Qing Guo, Wuyuan Xie, Miaohui Wang, Lingyu Liang, Yi Wang, Ping Li", "tldr": "", "abstract": "Semantic scene completion is a difficult task that involves completing the geometry and semantics of a scene from point clouds in a large-scale environment. Many current methods use 3D/2D convolutions or attention mechanisms, but these have limitations in directly constructing geometry and accurately propagating features from related voxels, the completion likely fails while propagating features in a single pass without considering multiple potential pathways. And they are generally only suitable for static scenes and struggle to handle dynamic aspects. This paper introduces Voxel Proposal Network (VPNet) that completes scenes from 3D and Bird's-Eye-View (BEV) perspectives. It includes Confident Voxel Proposal based on voxel-wise coordinates to propose confident voxels with high reliability for completion. This method reconstructs the scene geometry and implicitly models the uncertainty of voxel-wise semantic labels by presenting multiple possibilities for voxels. VPNet employs Multi-Frame Knowledge Distillation based on the point clouds of multiple adjacent frames to accurately predict the voxel-wise labels by condensing various possibilities of voxel relationships. VPNet has shown superior performance and achieved state-of-the-art results on the SemanticKITTI and SemanticPOSS datasets.", "keywords": "Semantic Scene completion\uff0cVoxel Proposal\uff0cKnowledge Distillation", "primary_area": "machine_vision", "supplementary_material": "/attachment/eacdb01909acf6f78ef7bc6e2c7064fe87228469.zip", "author": "Lubo Wang;Di Lin;Kairui Yang;Ruonan Liu;Qing Guo;Wuyuan Xie;Miaohui Wang;Lingyu Liang;Yi Wang;Ping Li", "authorids": "~Lubo_Wang1;~Di_Lin3;~Kairui_Yang1;~Ruonan_Liu1;~Qing_Guo3;~Wuyuan_Xie1;~Miaohui_Wang2;~Lingyu_Liang1;~Yi_Wang2;~Ping_Li11", "gender": "M;M;M;F;M;F;M;M;M;M", "homepage": "https://github.com/glimmer-shining;https://dilincv.github.io/;https://sites.google.com/view/scenario-simulation-research/home;https://ruonanliu.com/;https://tsingqguo.github.io;;https://charwill.github.io/;;https://onewangyi.github.io//;http://www.comp.polyu.edu.hk/~pli", "dblp": "352/4271;20/3191-2.html;;200/9352;25/3038-5;66/8204;83/8841;01/10400.html;;62/5860-16", "google_scholar": "0pkbc9IAAAAJ;rW0r-hMAAAAJ;;jiAlRAwAAAAJ;Rj2x4QUAAAAJ;y5Dr_6YAAAAJ;https://scholar.google.com/citations?hl=en;Idk5anAAAAAJ;k2-vv-MAAAAJ;mQ9YyHsAAAAJ", "orcid": ";;;0000-0001-9963-7092;0000-0003-0974-9299;;0000-0003-1125-9299;;;0000-0002-1503-0240", "linkedin": ";;;;;;;;;", "or_profile": "~Lubo_Wang1;~Di_Lin3;~Kairui_Yang1;~Ruonan_Liu1;~Qing_Guo3;~Wuyuan_Xie1;~Miaohui_Wang2;~Lingyu_Liang1;~Yi_Wang2;~Ping_Li11", "aff": "Tianjin University;Tianjin University;Tianjin University;Tianjin University; Agency for Science, Technology and Research (A*STAR));Department of Software Engineering, Shenzhen University;Shenzhen University;South China University of Technology;Shenzhen University;The Hong Kong Polytechnic University", "aff_domain": "tju.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu;cfar.a-star.edu.sg;szu.edu;szu.edu.cn;scut.edu.cn;szu.edu.cn;polyu.edu.hk", "position": "MS student;Associate Professor;PhD student;Associate Professor;Researcher;Associate Professor;Associate Professor;Associate Professor;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nwang2024voxel,\ntitle={Voxel Proposal Network via Multi-Frame Knowledge Distillation for Semantic Scene Completion},\nauthor={Lubo Wang and Di Lin and Kairui Yang and Ruonan Liu and Qing Guo and Wuyuan Xie and Miaohui Wang and Lingyu Liang and Yi Wang and Ping Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=02HWT9c4Lp}\n}", "github": "", "reviewers": "2ocQ;Rcs7;8T9a;Zma5", "pdf_size": 2842368, "rating": "5;5;6;6", "confidence": "4;5;4;3", "soundness": "3;2;3;3", "novelty": "3;2;3;3", "presentation": "3;3;2;2", "wc_summary": "54;81;46;53", "wc_strengths": "35;10;63;19", "wc_weaknesses": "62;104;225;31", "wc_questions": "7;30;4;5", "wc_limitations": "16;7;14;5", "wc_review": "174;232;352;113", "wc_reply_reviewers": "0;117;85;18", "wc_reply_authors": "115;174;137;28", "reply_reviewers": "0;2;2;1", "reply_authors": "3;4;3;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 58.5, 13.35102992281869 ], "wc_strengths_avg": [ 31.75, 20.141685629559408 ], "wc_weaknesses_avg": [ 105.5, 73.69701486491837 ], "wc_questions_avg": [ 11.5, 10.735455276791944 ], "wc_limitations_avg": [ 10.5, 4.6097722286464435 ], "wc_review_avg": [ 217.75, 88.19403324488567 ], "wc_reply_reviewers_avg": [ 55.0, 47.7964433823271 ], "wc_reply_authors_avg": [ 113.5, 53.67727638395972 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 3.0, 0.7071067811865476 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.7071067811865475, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17286377088174137848&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "tju.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu;cfar.a-star.edu.sg;szu.edu;szu.edu.cn;scut.edu.cn;szu.edu.cn;polyu.edu.hk", "author_num": 10, "aff_unique_index": "0;0;0;0;1;2;2;3;2;4", "aff_unique_norm": "Tianjin University;Agency for Science, Technology and Research;Shenzhen University;South China University of Technology;Hong Kong Polytechnic University", "aff_unique_dep": ";;Department of Software Engineering;;", "aff_unique_url": "http://www.tju.edu.cn;https://www.a-star.edu.sg;https://www.szu.edu.cn;https://www.scut.edu.cn;https://www.polyu.edu.hk", "aff_unique_abbr": "TJU;A*STAR;SZU;SCUT;PolyU", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Shenzhen;Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;1;0;0;0;0;0", "aff_country_unique": "China;Singapore" }, { "title": "Achieving Constant Regret in Linear Markov Decision Processes", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96956", "id": "02r24A8doi", "proceeding": "", "pdf": "https://openreview.net/pdf?id=02r24A8doi", "openreview": "https://openreview.net/forum?id=02r24A8doi", "poster": "", "project": "", "author_site": "Weitong Zhang, Zhiyuan Fan, Jiafan He, Quanquan Gu", "tldr": "", "abstract": "We study the constant regret guarantees in reinforcement learning (RL). Our objective is to design an algorithm that incurs only finite regret over infinite episodes with high probability. We introduce an algorithm, Cert-LSVI-UCB, for misspecified linear Markov decision processes (MDPs) where both the transition kernel and the reward function can be approximated by some linear function up to misspecification level $\\zeta$. At the core of Cert-LSVI-UCB is an innovative certified estimator, which facilitates a fine-grained concentration analysis for multi-phase value-targeted regression, enabling us to establish an instance-dependent regret bound that is constant w.r.t. the number of episodes. Specifically, we demonstrate that for a linear MDP characterized by a minimal suboptimality gap $\\Delta$, Cert-LSVI-UCB has a cumulative regret of $\\tilde{\\mathcal{O}}(d^3H^5/\\Delta)$ with high probability, provided that the misspecification level $\\zeta$ is below $\\tilde{\\mathcal{O}}(\\Delta / (\\sqrt{d}H^2))$. Here $d$ is the dimension of the feature space and $H$ is the horizon. Remarkably, this regret bound is independent of the number of episodes $K$. To the best of our knowledge, Cert-LSVI-UCB is the first algorithm to achieve a constant, instance-dependent, high-probability regret bound in RL with linear function approximation without relying on prior distribution assumptions.", "keywords": "Reinforcement learning;Constant regret;Misspecified linear MDP", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Weitong Zhang;Zhiyuan Fan;Jiafan He;Quanquan Gu", "authorids": "~Weitong_Zhang2;~Zhiyuan_Fan1;~Jiafan_He1;~Quanquan_Gu1", "gender": "M;M;M;M", "homepage": "https://fan-zhiyuan.org/;https://sites.google.com/g.ucla.edu/jiafan-he-homepage;http://web.cs.ucla.edu/~qgu/;https://web.cs.ucla.edu/~weightzero", "dblp": ";214/5785;50/4597;96/4158", "google_scholar": ";F3AXNBwAAAAJ;GU9HgNAAAAAJ;Ec6bzmcAAAAJ", "orcid": "0000-0001-7468-0895;;;", "linkedin": ";;;", "or_profile": "~Zhiyuan_Fan1;~Jiafan_He1;~Quanquan_Gu1;~Weitong_ZHANG1", "aff": "Massachusetts Institute of Technology;University of California, Los Angeles;University of California, Los Angeles;University of California, Los Angeles", "aff_domain": "mit.edu;ucla.edu;cs.ucla.edu;ucla.edu", "position": "PhD student;PhD student;Associate Professor;PhD student", "bibtex": "@inproceedings{\nzhang2024achieving,\ntitle={Achieving Constant Regret in Linear Markov Decision Processes},\nauthor={Weitong Zhang and Zhiyuan Fan and Jiafan He and Quanquan Gu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=02r24A8doi}\n}", "github": "", "reviewers": "YEKy;33Lb;8jf2;jLch", "pdf_size": 1587666, "rating": "4;5;6;6", "confidence": "3;3;3;2", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "2;3;4;2", "wc_summary": "80;45;207;32", "wc_strengths": "79;31;97;51", "wc_weaknesses": "297;138;34;137", "wc_questions": "2;79;1;12", "wc_limitations": "1;26;14;1", "wc_review": "459;319;353;233", "wc_reply_reviewers": "199;0;0;51", "wc_reply_authors": "774;0;0;27", "reply_reviewers": "1;0;0;1", "reply_authors": "2;1;1;2", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 91.0, 69.23510670173044 ], "wc_strengths_avg": [ 64.5, 25.352514668174436 ], "wc_weaknesses_avg": [ 151.5, 94.03323880415903 ], "wc_questions_avg": [ 23.5, 32.33032632065442 ], "wc_limitations_avg": [ 10.5, 10.404326023342406 ], "wc_review_avg": [ 341.0, 80.95677859203639 ], "wc_reply_reviewers_avg": [ 62.5, 81.51226901516115 ], "wc_reply_authors_avg": [ 200.25, 331.4380598241548 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:3GISnSz0P7oJ:scholar.google.com/&scioq=Achieving+Constant+Regret+in+Linear+Markov+Decision+Processes&hl=en&as_sdt=0,44", "gs_version_total": 2, "email": "mit.edu;ucla.edu;cs.ucla.edu;ucla.edu", "author_num": 4, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Massachusetts Institute of Technology;University of California, Los Angeles", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.ucla.edu", "aff_unique_abbr": "MIT;UCLA", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "MemoryFormer : Minimize Transformer Computation by Removing Fully-Connected Layers", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96955", "id": "04EC4ZnZJj", "proceeding": "", "pdf": "https://openreview.net/pdf?id=04EC4ZnZJj", "openreview": "https://openreview.net/forum?id=04EC4ZnZJj", "poster": "", "project": "", "author_site": "Ning Ding, Yehui Tang, Haochen Qin, Zhenli Zhou, Chao Xu, Lin Li, Kai Han, Liao Heng, Yunhe Wang", "tldr": "", "abstract": "In order to reduce the computational complexity of large language models, great efforts have been made to to improve the efficiency of transformer models such as linear attention and flash-attention. However, the model size and corresponding computational complexity are constantly scaled up in pursuit of higher performance. In this work, we present MemoryFormer, a novel transformer architecture which significantly reduces the computational complexity (FLOPs) from a new perspective. We eliminate nearly all the computations of the transformer model except for the necessary computation required by the multi-head attention operation. This is made possible by utilizing an alternative method for feature transformation to replace the linear projection of fully-connected layers. Specifically, we first construct a group of in-memory lookup tables that store a large amount of discrete vectors to replace the weight matrix used in linear projection. We then use a hash algorithm to retrieve a correlated subset of vectors dynamically based on the input embedding. The retrieved vectors combined together will form the output embedding, which provides an estimation of the result of matrix multiplication operation in a fully-connected layer. Compared to conducting matrix multiplication, retrieving data blocks from memory is a much cheaper operation which requires little computations. We train MemoryFormer from scratch and conduct extensive experiments on various benchmarks to demonstrate the effectiveness of the proposed model.", "keywords": "transformer architecture;large language model;computational complexity reduction", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Ning Ding;Yehui Tang;Haochen Qin;Zhenli Zhou;Chao Xu;Lin Li;Kai Han;Liao Heng;Yunhe Wang", "authorids": "~Ning_Ding4;~Yehui_Tang1;~Haochen_Qin1;~Zhenli_Zhou1;~Chao_Xu1;~Lin_Li13;~Kai_Han2;~Liao_Heng1;~Yunhe_Wang1", "gender": "M;M;M;M;M;M;M;M;", "homepage": ";;;https://github.com/TruthZZ;http://www.cis.pku.edu.cn/faculty/vision/xuchao/xuchao01.htm;https://iamhankai.github.io;https://ieeexplore.ieee.org/author/37087047190;https://www.wangyunhe.site/;", "dblp": ";244/9659;;;;51/4757-2;;63/8217-1;", "google_scholar": "oXP1heYAAAAJ;TkSZQ6gAAAAJ;xB9-qwEAAAAJ;;https://scholar.google.co.uk/citations?hl=zh-CN;vThoBVcAAAAJ;;https://scholar.google.com.sg/citations?user=isizOkYAAAAJ;https://scholar.google.com/citations?view_op=list_works", "orcid": ";;;;;0000-0002-9761-2702;;0000-0002-0142-509X;", "linkedin": ";;;;;;;;", "or_profile": "~Ning_Ding4;~Yehui_Tang1;~Haochen_Qin1;~Zhenli_Zhou1;~Chao_Xu1;~Kai_Han2;~Liao_Heng1;~Yunhe_Wang1;~Linxuan_Li1", "aff": "Peking University;Huawei Technologies Ltd.;Huawei Technologies Ltd.;Peking University;Peking University;Huawei Noah's Ark Lab;, Tsinghua University;Huawei Noah's Ark Lab;", "aff_domain": "pku.edu.cn;huawei.com;huawei.com;stu.pku.edu.cn;pku.edu;huawei.com;cs.tsinghua.edu.cn;huawei.com;", "position": "PhD student;Researcher;Researcher;MS student;Full Professor;Principal Researcher;Principal Researcher;Principal Researcher;", "bibtex": "@inproceedings{\nding2024memoryformer,\ntitle={MemoryFormer : Minimize Transformer Computation by Removing Fully-Connected Layers},\nauthor={Ning Ding and Yehui Tang and Haochen Qin and Zhenli Zhou and Chao Xu and Lin Li and Kai Han and Liao Heng and Yunhe Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=04EC4ZnZJj}\n}", "github": "", "reviewers": "vbej;vj2X;omZU;eumW", "pdf_size": 546961, "rating": "5;6;6;7", "confidence": "3;4;3;4", "soundness": "3;3;3;3", "novelty": "3;3;3;4", "presentation": "3;4;3;3", "wc_summary": "144;142;136;84", "wc_strengths": "101;97;93;106", "wc_weaknesses": "67;109;343;75", "wc_questions": "29;52;2;28", "wc_limitations": "25;4;17;5", "wc_review": "366;404;591;298", "wc_reply_reviewers": "0;0;112;22", "wc_reply_authors": "0;0;32;32", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 126.5, 24.713356712514795 ], "wc_strengths_avg": [ 99.25, 4.815340071064556 ], "wc_weaknesses_avg": [ 148.5, 113.39642851518737 ], "wc_questions_avg": [ 27.75, 17.69710428290459 ], "wc_limitations_avg": [ 12.75, 8.728545125048045 ], "wc_review_avg": [ 414.75, 108.61255682470605 ], "wc_reply_reviewers_avg": [ 33.5, 46.203354856546945 ], "wc_reply_authors_avg": [ 16.0, 16.0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.7071067811865476, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:TbSMOVReIfUJ:scholar.google.com/&scioq=MemoryFormer+:+Minimize+Transformer+Computation+by+Removing+Fully-Connected+Layers&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "pku.edu.cn;huawei.com;huawei.com;stu.pku.edu.cn;pku.edu;huawei.com;cs.tsinghua.edu.cn;huawei.com;", "author_num": 9, "aff_unique_index": "0;1;1;0;0;1;2;1", "aff_unique_norm": "Peking University;Huawei;Tsinghua University", "aff_unique_dep": ";Huawei Technologies;", "aff_unique_url": "http://www.pku.edu.cn;https://www.huawei.com;https://www.tsinghua.edu.cn", "aff_unique_abbr": "Peking U;Huawei;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Mimicking To Dominate: Imitation Learning Strategies for Success in Multiagent Games", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96954", "id": "06JRFVK88O", "proceeding": "", "pdf": "https://openreview.net/pdf?id=06JRFVK88O", "openreview": "https://openreview.net/forum?id=06JRFVK88O", "poster": "/media/PosterPDFs/NeurIPS%202024/96954.png?t=1731744891.8886228", "project": "", "author_site": "The Viet Bui, Tien Mai, Thanh Nguyen", "tldr": "", "abstract": "Training agents in multi-agent games presents significant challenges due to their intricate nature. These challenges are exacerbated by dynamics influenced not only by the environment but also by strategies of opponents. Existing methods often struggle with slow convergence and instability.\nTo address these challenges, we harness the potential of imitation learning (IL) to comprehend and anticipate actions of the opponents, aiming to mitigate uncertainties with respect to the game dynamics.\nOur key contributions include:\n(i) a new multi-agent IL model for predicting next moves of the opponents - our model works with hidden actions of opponents and local observations;\n(ii) a new multi-agent reinforcement learning (MARL) algorithm that combines our IL model and policy training into one single training process;\nand (iii) extensive experiments in three challenging game environments, including an advanced version of the Star-Craft multi-agent challenge (i.e., SMACv2).\nExperimental results show that our approach achieves superior performance compared to state-of-the-art MARL algorithms.", "keywords": "Multi-agent Reinforcement Learning;Imitation Learning", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/fbae2ba65c2a4e03a722097435d45aa51d714287.zip", "author": "The Viet Bui;Tien Anh Mai;Thanh Hong Nguyen", "authorids": "~The_Viet_Bui1;~Tien_Anh_Mai1;~Thanh_Hong_Nguyen1", "gender": "M;F;M", "homepage": "https://sites.google.com/view/tien-mai/;https://ix.cs.uoregon.edu/~thanhhng/;", "dblp": "229/2286.html;117/4935;", "google_scholar": ";6fpZnQIAAAAJ;rpPDGm4AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Tien_Anh_Mai1;~Thanh_Hong_Nguyen1;~Viet_The_Bui1", "aff": "Singapore Management University;University of Oregon;Singapore Management University", "aff_domain": "smu.edu.sg;uoregon.edu;phdcs.smu.edu.sg", "position": "Assistant Professor;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nbui2024mimicking,\ntitle={Mimicking To Dominate: Imitation Learning Strategies for Success in Multiagent Games},\nauthor={The Viet Bui and Tien Anh Mai and Thanh Hong Nguyen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=06JRFVK88O}\n}", "github": "", "reviewers": "ZqFd;bLoo;4odM", "pdf_size": 1990101, "rating": "6;6;8", "confidence": "3;3;3", "soundness": "3;2;4", "novelty": "2;3;4", "presentation": "3;1;4", "wc_summary": "74;36;18", "wc_strengths": "72;82;22", "wc_weaknesses": "72;342;28", "wc_questions": "40;9;53", "wc_limitations": "6;20;7", "wc_review": "264;489;128", "wc_reply_reviewers": "48;19;0", "wc_reply_authors": "130;20;0", "reply_reviewers": "1;1;0", "reply_authors": "2;2;1", "rating_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 2.6666666666666665, 1.247219128924647 ], "wc_summary_avg": [ 42.666666666666664, 23.342855200015464 ], "wc_strengths_avg": [ 58.666666666666664, 26.246692913372705 ], "wc_weaknesses_avg": [ 147.33333333333334, 138.8172259563712 ], "wc_questions_avg": [ 34.0, 18.457157599876172 ], "wc_limitations_avg": [ 11.0, 6.377042156569663 ], "wc_review_avg": [ 293.6666666666667, 148.86309892724327 ], "wc_reply_reviewers_avg": [ 22.333333333333332, 19.73716179078328 ], "wc_reply_authors_avg": [ 50.0, 57.154760664940824 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:5-9MK9ikfqIJ:scholar.google.com/&scioq=Mimicking+To+Dominate:+Imitation+Learning+Strategies+for+Success+in+Multiagent+Games&hl=en&as_sdt=0,24", "gs_version_total": 3, "email": "smu.edu.sg;uoregon.edu;phdcs.smu.edu.sg", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Singapore Management University;University of Oregon", "aff_unique_dep": ";", "aff_unique_url": "https://www.smu.edu.sg;https://www.uoregon.edu", "aff_unique_abbr": "SMU;UO", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Singapore;United States" }, { "title": "SyncTweedies: A General Generative Framework Based on Synchronized Diffusions", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96953", "id": "06Vt6f2js7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=06Vt6f2js7", "openreview": "https://openreview.net/forum?id=06Vt6f2js7", "poster": "", "project": "", "author_site": "Jaihoon Kim, Juil Koo, Kyeongmin Yeo, Minhyuk Sung", "tldr": "", "abstract": "We introduce a general diffusion synchronization framework for generating diverse visual content, including ambiguous images, panorama images, 3D mesh textures, and 3D Gaussian splats textures, using a pretrained image diffusion model. We first present an analysis of various scenarios for synchronizing multiple diffusion processes through a canonical space. Based on the analysis, we introduce a synchronized diffusion method, SyncTweedies, which averages the outputs of Tweedie\u2019s formula while conducting denoising in multiple instance spaces. Compared to previous work that achieves synchronization through finetuning, SyncTweedies is a zero-shot method that does not require any finetuning, preserving the rich prior of diffusion models trained on Internet-scale image datasets without overfitting to specific domains. We verify that SyncTweedies offers the broadest applicability to diverse applications and superior performance compared to the previous state-of-the-art for each application. Our project page is at https://synctweedies.github.io.", "keywords": "Diffusion Models;Synchronization;Texturing;3D Gaussian Splatting;Mesh;Panorama", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/d1c4287e18aac3590ff06fe82a3df63f71bd9b39.zip", "author": "Jaihoon Kim;Juil Koo;Kyeongmin Yeo;Minhyuk Sung", "authorids": "~Jaihoon_Kim1;~Juil_Koo1;~Kyeongmin_Yeo1;~Minhyuk_Sung1", "gender": "M;M;M;M", "homepage": "https://jh27kim.github.io/;https://63days.github.io/;;https://mhsung.github.io/", "dblp": "355/1743;309/6316;372/5415;171/6792", "google_scholar": "DJlx5QMAAAAJ;https://scholar.google.co.kr/citations?user=avxsz1UAAAAJ;;PcIYMp4AAAAJ", "orcid": ";;;", "linkedin": ";;;mhsung", "or_profile": "~Jaihoon_Kim1;~Juil_Koo1;~Kyeongmin_Yeo1;~Minhyuk_Sung1", "aff": "KAIST;Adobe Systems;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;adobe.com;kaist.ac.kr;kaist.ac.kr", "position": "MS student;Intern;Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\nkim2024synctweedies,\ntitle={SyncTweedies: A General Generative Framework Based on Synchronized Diffusions},\nauthor={Jaihoon Kim and Juil Koo and Kyeongmin Yeo and Minhyuk Sung},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=06Vt6f2js7}\n}", "github": "", "reviewers": "TnCt;XFCg;KJxR", "pdf_size": 36556479, "rating": "6;7;7", "confidence": "3;4;4", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "2;4;3", "wc_summary": "69;78;104", "wc_strengths": "116;138;104", "wc_weaknesses": "117;151;560", "wc_questions": "58;40;3", "wc_limitations": "120;1;14", "wc_review": "480;408;785", "wc_reply_reviewers": "29;58;15", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 83.66666666666667, 14.83988619303471 ], "wc_strengths_avg": [ 119.33333333333333, 14.079141387961918 ], "wc_weaknesses_avg": [ 276.0, 201.29745817239387 ], "wc_questions_avg": [ 33.666666666666664, 22.89589968143253 ], "wc_limitations_avg": [ 45.0, 53.29790489941107 ], "wc_review_avg": [ 557.6666666666666, 163.4142656631367 ], "wc_reply_reviewers_avg": [ 34.0, 17.90716802475106 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.9999999999999997, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7027724039860436181&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "kaist.ac.kr;adobe.com;kaist.ac.kr;kaist.ac.kr", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Adobe", "aff_unique_dep": ";Adobe Systems Incorporated", "aff_unique_url": "https://www.kaist.ac.kr;https://www.adobe.com", "aff_unique_abbr": "KAIST;Adobe", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "South Korea;United States" }, { "title": "Improved Analysis for Bandit Learning in Matching Markets", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96952", "id": "07N0qoaZ2L", "proceeding": "", "pdf": "https://openreview.net/pdf?id=07N0qoaZ2L", "openreview": "https://openreview.net/forum?id=07N0qoaZ2L", "poster": "/media/PosterPDFs/NeurIPS%202024/96952.png?t=1731746661.9360106", "project": "", "author_site": "Fang Kong, Zilong Wang, Shuai Li", "tldr": "", "abstract": "A rich line of works study the bandit learning problem in two-sided matching markets, where one side of market participants (players) are uncertain about their preferences and hope to find a stable matching during iterative matchings with the other side (arms). The state-of-the-art analysis shows that the player-optimal stable regret is of order $O(K\\log T/\\Delta^2)$ where $K$ is the number of arms, $T$ is the horizon and $\\Delta$ is the players' minimum preference gap. However, this result may be far from the lower bound $\\Omega(\\max\\{N\\log T/\\Delta^2, K\\log T/\\Delta\\})$ since the number $K$ of arms (workers, publisher slots) may be much larger than that $N$ of players (employers in labor markets, advertisers in online advertising, respectively). In this paper, we propose a new algorithm and show that the regret can be upper bounded by $O(N^2\\log T/\\Delta^2 + K \\log T/\\Delta)$. This result removes the dependence on $K$ in the main order term and improves the state-of-the-art guarantee in common cases where $N$ is much smaller than $K$. Such an advantage is also verified in experiments. In addition, we provide a refined analysis for the existing centralized UCB algorithm and show that, under $\\alpha$-condition, it achieves an improved $O(N \\log T/\\Delta^2 + K \\log T / \\Delta)$ regret.", "keywords": "Bandits;Matching markets;Stable matching;Stable regret;Exploration and exploitation;$\\alpha$-condition", "primary_area": "bandits", "supplementary_material": "/attachment/0381204d7f2b87b2c1917330f89ff7c4755919db.zip", "author": "Fang Kong;Zilong Wang;Shuai Li", "authorids": "~Fang_Kong2;~Zilong_Wang2;~Shuai_Li3", "gender": "F;;F", "homepage": ";;http://shuaili8.github.io", "dblp": "48/7676-2;;57/2281-10", "google_scholar": "q1Z41BQAAAAJ;z786jBwAAAAJ;https://scholar.google.com.hk/citations?user=kMZgQxcAAAAJ", "orcid": ";0000-0003-0673-0488;", "linkedin": ";;", "or_profile": "~Fang_Kong2;~Zilong_Wang2;~Shuai_Li3", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;John Hopcroft Center, Shanghai Jiao Tong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nkong2024improved,\ntitle={Improved Analysis for Bandit Learning in Matching Markets},\nauthor={Fang Kong and Zilong Wang and Shuai Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=07N0qoaZ2L}\n}", "github": "", "reviewers": "Pjvv;SWbh;unKj;Y8Bw", "pdf_size": 394854, "rating": "5;5;5;7", "confidence": "3;4;2;3", "soundness": "3;3;3;4", "novelty": "2;2;2;4", "presentation": "3;3;3;4", "wc_summary": "68;52;134;73", "wc_strengths": "32;19;69;48", "wc_weaknesses": "37;24;222;22", "wc_questions": "11;45;41;51", "wc_limitations": "9;24;77;1", "wc_review": "157;164;543;195", "wc_reply_reviewers": "60;12;116;0", "wc_reply_authors": "0;0;317;0", "reply_reviewers": "1;1;2;0", "reply_authors": "1;1;3;1", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 81.75, 31.1478329904345 ], "wc_strengths_avg": [ 42.0, 18.66815470259447 ], "wc_weaknesses_avg": [ 76.25, 84.34564304100124 ], "wc_questions_avg": [ 37.0, 15.427248620541512 ], "wc_limitations_avg": [ 27.75, 29.608909132218972 ], "wc_review_avg": [ 264.75, 161.28294237147338 ], "wc_reply_reviewers_avg": [ 47.0, 45.727453460694704 ], "wc_reply_authors_avg": [ 79.25, 137.26502649983354 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10314774501744643849&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 0, "email": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shanghai", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Director3D: Real-world Camera Trajectory and 3D Scene Generation from Text", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96951", "id": "08A6X7FSTs", "proceeding": "", "pdf": "https://openreview.net/pdf?id=08A6X7FSTs", "openreview": "https://openreview.net/forum?id=08A6X7FSTs", "poster": "/media/PosterPDFs/NeurIPS%202024/96951.png?t=1732593534.8700738", "project": "", "author_site": "Xinyang Li, Zhangyu Lai, Linning Xu, Yansong Qu, Liujuan Cao, ShengChuan Zhang, Bo Dai, Rongrong Ji", "tldr": "", "abstract": "Recent advancements in 3D generation have leveraged synthetic datasets with ground truth 3D assets and predefined camera trajectories. However, the potential of adopting real-world datasets, which can produce significantly more realistic 3D scenes, remains largely unexplored. In this work, we delve into the key challenge of the complex and scene-specific camera trajectories found in real-world captures. We introduce Director3D, a robust open-world text-to-3D generation framework, designed to generate both real-world 3D scenes and adaptive camera trajectories. To achieve this, (1) we first utilize a Trajectory Diffusion Transformer, acting as the \\emph{Cinematographer}, to model the distribution of camera trajectories based on textual descriptions. Next, a Gaussian-driven Multi-view Latent Diffusion Model serves as the \\emph{Decorator}, modeling the image sequence distribution given the camera trajectories and texts. This model, fine-tuned from a 2D diffusion model, directly generates pixel-aligned 3D Gaussians as an immediate 3D scene representation for consistent denoising. Lastly, the 3D Gaussians are further refined by a novel SDS++ loss as the \\emph{Detailer}, which incorporates the prior of the 2D diffusion model. Extensive experiments demonstrate that Director3D outperforms existing methods, offering superior performance in real-world 3D generation.", "keywords": "Text-to-3D Scene Generation;Latent Diffusion Models", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Xinyang Li;Zhangyu Lai;Linning Xu;Yansong Qu;Liujuan Cao;Shengchuan Zhang;Bo Dai;Rongrong Ji", "authorids": "~Xinyang_Li1;~Zhangyu_Lai1;~Linning_Xu2;~Yansong_Qu1;~Liujuan_Cao1;~Shengchuan_Zhang1;~Bo_Dai2;~Rongrong_Ji5", "gender": "M;M;F;M;F;M;M;M", "homepage": ";https://github.com/Clear-3d;https://eveneveno.github.io/linning/;https://quyans.github.io/;https://mac.xmu.edu.cn/ljcao/;https://informatics.xmu.edu.cn/info/1405/25059.htm;http://daibo.info/;http://mac.xmu.edu.cn/rrji-en.html", "dblp": ";;242/9358;;85/8842.html;162/1064;64/2903-2;86/5681", "google_scholar": "M9rwkHwAAAAJ;-eBnQzAAAAAJ;I9Lrbs4AAAAJ;https://scholar.google.com.hk/citations?view_op=list_works;iYEcVaAAAAAJ;GToqXScAAAAJ;https://scholar.google.com.hk/citations?user=KNWTvgEAAAAJ;", "orcid": "0000-0002-2932-8953;;;0009-0003-4325-6858;0000-0002-7645-9606;0000-0002-0800-0609;0000-0003-0777-9232;", "linkedin": ";;;;;;;", "or_profile": "~Xinyang_Li1;~Zhangyu_Lai1;~Linning_Xu2;~Yansong_Qu1;~Liujuan_Cao1;~Shengchuan_Zhang1;~Bo_Dai2;~Rongrong_Ji5", "aff": "Xiamen University;Xiamen University;The Chinese University of Hong Kong;Xiamen University;Xiamen University;Xiamen University;Shanghai AI Laboratory;Xiamen University", "aff_domain": "xmu.edu.cn;xmu.edu.cn;cuhk.edu.hk;xmu.edu.cn;xmu.edu.cn;xmu.edu.cn;pjlab.org.cn;xmu.edu.cn", "position": "PhD student;MS student;Ph.D. student;PhD student;Full Professor;Assistant Professor;Scientist;Full Professor", "bibtex": "@inproceedings{\nli2024directord,\ntitle={Director3D: Real-world Camera Trajectory and 3D Scene Generation from Text},\nauthor={Xinyang Li and Zhangyu Lai and Linning Xu and Yansong Qu and Liujuan Cao and Shengchuan Zhang and Bo Dai and Rongrong Ji},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=08A6X7FSTs}\n}", "github": "", "reviewers": "yFX5;sdLT;YUdV;Lv1o", "pdf_size": 34967022, "rating": "5;5;5;7", "confidence": "5;5;4;4", "soundness": "3;2;3;3", "novelty": "3;3;3;4", "presentation": "4;1;3;3", "wc_summary": "51;29;61;72", "wc_strengths": "27;27;54;72", "wc_weaknesses": "49;330;217;114", "wc_questions": "101;5;5;3", "wc_limitations": "102;29;1;39", "wc_review": "330;420;338;300", "wc_reply_reviewers": "122;244;0;0", "wc_reply_authors": "385;300;0;0", "reply_reviewers": "4;2;0;0", "reply_authors": "5;3;1;1", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 53.25, 15.848895860595462 ], "wc_strengths_avg": [ 45.0, 19.091883092036785 ], "wc_weaknesses_avg": [ 177.5, 106.49060991467745 ], "wc_questions_avg": [ 28.5, 41.865857210858586 ], "wc_limitations_avg": [ 42.75, 36.93490896157726 ], "wc_review_avg": [ 347.0, 44.46346815083142 ], "wc_reply_reviewers_avg": [ 91.5, 101.1570561058397 ], "wc_reply_authors_avg": [ 171.25, 173.8668671714079 ], "reply_reviewers_avg": [ 1.5, 1.6583123951777 ], "reply_authors_avg": [ 2.5, 1.6583123951777 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2752976403267259927&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "xmu.edu.cn;xmu.edu.cn;cuhk.edu.hk;xmu.edu.cn;xmu.edu.cn;xmu.edu.cn;pjlab.org.cn;xmu.edu.cn", "author_num": 8, "aff_unique_index": "0;0;1;0;0;0;2;0", "aff_unique_norm": "Xiamen University;Chinese University of Hong Kong;Shanghai AI Laboratory", "aff_unique_dep": ";;", "aff_unique_url": "https://www.xmu.edu.cn;https://www.cuhk.edu.hk;https://www.shanghai-ai-lab.com", "aff_unique_abbr": "XMU;CUHK;SAIL", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Learning Versatile Skills with Curriculum Masking", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96950", "id": "08GbdALmEs", "proceeding": "", "pdf": "https://openreview.net/pdf?id=08GbdALmEs", "openreview": "https://openreview.net/forum?id=08GbdALmEs", "poster": "/media/PosterPDFs/NeurIPS%202024/96950.png?t=1733319308.3295946", "project": "", "author_site": "Yao Tang, Zhihui Xie, Zichuan Lin, Deheng Ye, Shuai Li", "tldr": "", "abstract": "Masked prediction has emerged as a promising pretraining paradigm in offline reinforcement learning (RL) due to its versatile masking schemes, enabling flexible inference across various downstream tasks with a unified model. Despite the versatility of masked prediction, it remains unclear how to balance the learning of skills at different levels of complexity. To address this, we propose CurrMask, a curriculum masking pretraining paradigm for sequential decision making. Motivated by how humans learn by organizing knowledge in a curriculum, CurrMask adjusts its masking scheme during pretraining for learning versatile skills. Through extensive experiments, we show that CurrMask exhibits superior zero-shot performance on skill prompting tasks, goal-conditioned planning tasks, and competitive finetuning performance on offline RL tasks. Additionally, our analysis of training dynamics reveals that CurrMask gradually acquires skills of varying complexity by dynamically adjusting its masking scheme.", "keywords": "reinforcement learning;unsupervised pretraining;masked prediction;curriculum learning", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Yao Tang;Zhihui Xie;Zichuan Lin;Deheng Ye;Shuai Li", "authorids": "~Yao_Tang2;~Zhihui_Xie2;~Zichuan_Lin2;~Deheng_Ye1;~Shuai_Li3", "gender": "F;M;M;M;F", "homepage": "https://yaotang23.github.io/;https://fffffarmer.github.io/;https://linzichuan.github.io/;http://yedeheng.github.io/;http://shuaili8.github.io", "dblp": ";31/3570-2;220/3933.html;159/9503;57/2281-10", "google_scholar": ";Jml8NvkAAAAJ;Tlc4yaMAAAAJ;jz5XKuQAAAAJ;https://scholar.google.com.hk/citations?user=kMZgQxcAAAAJ", "orcid": ";;;0000-0002-1754-1837;", "linkedin": ";;zichuan-lin-97560aa4/?originalSubdomain=cn;;", "or_profile": "~Yao_Tang2;~Zhihui_Xie2;~Zichuan_Lin2;~Deheng_Ye1;~Shuai_Li3", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Tencent;Tencent;John Hopcroft Center, Shanghai Jiao Tong University", "aff_domain": "cs.sjtu.edu.cn;sjtu.edu.cn;tencent.com;tencent.com;sjtu.edu.cn", "position": "Undergrad student;MS student;Senior Researcher;Principal Researcher;Assistant Professor", "bibtex": "@inproceedings{\ntang2024learning,\ntitle={Learning Versatile Skills with Curriculum Masking},\nauthor={Yao Tang and Zhihui Xie and Zichuan Lin and Deheng Ye and Shuai Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=08GbdALmEs}\n}", "github": "", "reviewers": "CzXA;znVZ;f2Vb;Wq3n", "pdf_size": 902333, "rating": "5;5;5;6", "confidence": "3;3;4;3", "soundness": "2;3;4;3", "novelty": "2;2;3;3", "presentation": "2;3;3;4", "wc_summary": "49;151;96;127", "wc_strengths": "53;79;41;57", "wc_weaknesses": "172;291;124;86", "wc_questions": "8;75;2;113", "wc_limitations": "1;17;18;10", "wc_review": "283;613;281;393", "wc_reply_reviewers": "19;32;0;52", "wc_reply_authors": "20;33;0;60", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;1;2", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 105.75, 38.12725403172906 ], "wc_strengths_avg": [ 57.5, 13.738631664034086 ], "wc_weaknesses_avg": [ 168.25, 77.14394013790066 ], "wc_questions_avg": [ 49.5, 46.53224688320993 ], "wc_limitations_avg": [ 11.5, 6.800735254367722 ], "wc_review_avg": [ 392.5, 135.13234253871278 ], "wc_reply_reviewers_avg": [ 25.75, 18.952242611363964 ], "wc_reply_authors_avg": [ 28.25, 21.775846711436962 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14553108888432938277&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 3, "email": "cs.sjtu.edu.cn;sjtu.edu.cn;tencent.com;tencent.com;sjtu.edu.cn", "author_num": 5, "aff_unique_index": "0;0;1;1;0", "aff_unique_norm": "Shanghai Jiao Tong University;Tencent", "aff_unique_dep": ";Tencent Holdings Limited", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.tencent.com", "aff_unique_abbr": "SJTU;Tencent", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shanghai", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "08nbMTxazb", "title": "Chicks4FreeID: A Benchmark Dataset for Chicken Re-Identification", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "To address the need for well-annotated datasets in the field of animal re-identification, and particularly to close the existing gap for chickens, we introduce the Chicks4FreeID dataset. This dataset is the first publicly available re-identification resource dedicated to the most farmed animal in the world. It includes top-down view images of individually segmented and annotated chickens, along with preprocessed cut-out crops of the instances. The dataset comprises 1215 annotations of 50 unique chicken individuals, as well as a total of 55 annotations of 2 roosters and 2 ducks. In addition to re-identification, the dataset supports semantic and instance segmentation tasks by providing corresponding masks. Curation and annotation were performed manually, ensuring high-quality, nearly pixel-perfect masks and accurate ground truth assignment of the individuals using expert knowledge. Additionally, we provide context by offering a comprehensive overview of existing datasets for animal re-identification. To facilitate comparability, we establish a baseline for the re-identification task testing different approaches. Performance is evaluated based on mAP, Top-1, and Top-5 accuracy metrics. Both the data and code are publicly shared under a CC BY 4.0 license, promoting accessibility and further research. The dataset can be accessed at https://huggingface.co/datasets/dariakern/Chicks4FreeID and the code at https://github.com/DariaKern/Chicks4FreeID.", "keywords": "animal re-identification;chicken re-identification;chicken dataset;re-identification dataset;benchmark dataset;closed-set re-identification;computer vision;chicken instance segmentation;poultry;livestock", "primary_area": "", "supplementary_material": "/attachment/b06ad59a33aad15b2aa191506b97cc4fdd90e5cd.pdf", "author": "Daria Kern;Tobias Schiele;Ulrich Klauck;Winfred Ingabire", "authorids": "~Daria_Kern1;~Tobias_Schiele1;~Ulrich_Klauck1;~Winfred_Ingabire1", "gender": ";M;M;F", "homepage": ";;https://www.hs-aalen.de/de/search?q=Klauck&t%5B%5D=User;", "dblp": ";;117/5910.html;", "google_scholar": ";;;", "orcid": ";0009-0002-9627-0656;0000-0002-1675-5059;", "linkedin": ";tobias-schiele/;ulrich-klauck-96292314b/;winfred-ingabire-98924382/", "or_profile": "~Daria_Kern1;~Tobias_Schiele1;~Ulrich_Klauck1;~Winfred_Ingabire1", "aff": ";Glasgow Caledonian University;Hochschule Aalen;Glasgow Caledonian University", "aff_domain": ";gcu.ac.uk;hs-aalen.de;gcu.ac.uk", "position": ";PhD student;Full Professor;Lecturer", "bibtex": "@misc{\nanonymous2024chicksfreeid,\ntitle={Chicks4Free{ID}: A Benchmark Dataset for Chicken Re-Identification},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=08nbMTxazb}\n}", "github": "", "project": "", "reviewers": "mGJR;cXUF;AJWF;96pm", "site": "https://openreview.net/forum?id=08nbMTxazb", "pdf_size": 4228541, "rating": "3;3;6;7", "confidence": "5;4;3;4", "wc_summary_and_contributions": "53;31;123;74", "wc_strengths": "4;5;83;82", "wc_improvement": "107;52;369;149", "wc_limitations": "15;82;100;113", "wc_correctness": "1;10;7;17", "wc_clarity": "1;15;40;7", "wc_relation_to_prior_work": "1;7;30;81", "wc_documentation": "1;10;14;33", "wc_additional_feedback": "1;1;1;1", "wc_review": "184;213;767;557", "wc_reply_reviewers": "0;60;91;36", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 4.75, 1.7853571071357126 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 70.25, 34.03949911499874 ], "wc_strengths_avg": [ 43.5, 39.003204996512785 ], "wc_improvement_avg": [ 169.25, 120.34611543377709 ], "wc_limitations_avg": [ 77.5, 37.72598573927526 ], "wc_correctness_avg": [ 8.75, 5.7608593109014565 ], "wc_clarity_avg": [ 15.75, 14.85555451674558 ], "wc_relation_to_prior_work_avg": [ 29.75, 31.506943679132064 ], "wc_documentation_avg": [ 14.5, 11.672617529928752 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 430.25, 243.5686504868802 ], "wc_reply_reviewers_avg": [ 46.75, 33.29695932063467 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5940885257860046, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:EXcBhlxJukUJ:scholar.google.com/&scioq=Chicks4FreeID:+A+Benchmark+Dataset+for+Chicken+Re-Identification&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;0", "aff_unique_norm": "Glasgow Caledonian University;Hochschule Aalen", "aff_unique_dep": ";", "aff_unique_url": "https://www.gcu.ac.uk;https://www.hs-aalen.de", "aff_unique_abbr": "GCU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United Kingdom;Germany" }, { "id": "08oUnmtj8Q", "title": "FSEO: A Few-Shot Evolutionary Optimization Framework for Expensive Multi-Objective Optimization and Constrained Optimization", "track": "main", "status": "Reject", "tldr": "", "abstract": "Meta-learning has been demonstrated to be useful to improve the sampling efficiency of Bayesian optimization (BO) and surrogate-assisted evolutionary algorithms (SAEAs) when solving expensive optimization problems (EOPs). However, existing studies focuses on only single-objective optimization, leaving other expensive optimization scenarios unconsidered. We propose a generalized few-shot evolutionary optimization (FSEO) framework and focus on its performance on two common expensive optimization scenarios: multi-objective EOPs (EMOPs) and constrained EOPs (ECOPs). We develop a novel meta-learning modeling approach to train surrogates for our FSEO framework, an accuracy-based update strategy is designed to adapt surrogates during the optimization process. The surrogates in FSEO framework combines neural network with Gaussian Processes (GPs), their network parameters and some parameters of GPs represent useful experience and are meta-learned across related optimization tasks, the remaining GPs parameters are task-specific parameters that represent unique features of the target task. We demonstrate that our FSEO framework is able to improve sampling efficiency on both EMOP and ECOP. Empirical conclusions are made to guide the application of our FSEO framework.", "keywords": "Expensive optimization;few-shot optimization;multi-objective optimization;constrained optimization;surrogate-assisted evolutionary optimization", "primary_area": "optimization", "supplementary_material": "", "author": "Xunzhao Yu", "authorids": "~Xunzhao_Yu1", "gender": "", "homepage": "", "dblp": "", "google_scholar": "", "orcid": "", "linkedin": "", "or_profile": "", "aff": "", "aff_domain": "", "position": "", "bibtex": "@misc{\nanonymous2024fseo,\ntitle={{FSEO}: A Few-Shot Evolutionary Optimization Framework for Expensive Multi-Objective Optimization and Constrained Optimization},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=08oUnmtj8Q}\n}", "github": "", "project": "", "reviewers": "NZZC;rKvd;WsWY;1kYc", "site": "https://openreview.net/forum?id=08oUnmtj8Q", "pdf_size": 3929172, "rating": "4;4;4;6", "confidence": "3;4;3;4", "soundness": "2;2;2;3", "novelty": "2;1;2;3", "presentation": "2;3;2;3", "wc_summary": "129;33;72;17", "wc_strengths": "57;36;25;19", "wc_weaknesses": "224;232;91;12", "wc_questions": "2;68;73;101", "wc_limitations": "22;2;24;10", "wc_review": "434;371;285;159", "wc_reply_reviewers": "338;35;0;0", "wc_reply_authors": "813;79;37;37", "reply_reviewers": "3;1;0;0", "reply_authors": "4;2;2;2", "rating_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 62.75, 43.16465568031326 ], "wc_strengths_avg": [ 34.25, 14.48059045757458 ], "wc_weaknesses_avg": [ 139.75, 92.60770756259977 ], "wc_questions_avg": [ 61.0, 36.31115531073061 ], "wc_limitations_avg": [ 14.5, 8.986100377805714 ], "wc_review_avg": [ 312.25, 103.08097545134116 ], "wc_reply_reviewers_avg": [ 93.25, 142.02706608249005 ], "wc_reply_authors_avg": [ 241.5, 330.40089285593643 ], "reply_reviewers_avg": [ 1.0, 1.224744871391589 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:BkZm4gfnAu0J:scholar.google.com/&scioq=FSEO:+A+Few-Shot+Evolutionary+Optimization+Framework+for+Expensive+Multi-Objective+Optimization+and+Constrained+Optimization&hl=en&as_sdt=0,22", "gs_version_total": 2 }, { "title": "Fast Iterative Hard Thresholding Methods with Pruning Gradient Computations", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96949", "id": "09RKw0vXjR", "proceeding": "", "pdf": "https://openreview.net/pdf?id=09RKw0vXjR", "openreview": "https://openreview.net/forum?id=09RKw0vXjR", "poster": "/media/PosterPDFs/NeurIPS%202024/96949.png?t=1733375377.8112724", "project": "", "author_site": "Yasutoshi Ida, Sekitoshi Kanai, Atsutoshi Kumagai, Tomoharu Iwata, Yasuhiro Fujiwara", "tldr": "", "abstract": "We accelerate the iterative hard thresholding (IHT) method, which finds \\(k\\) important elements from a parameter vector in a linear regression model. Although the plain IHT repeatedly updates the parameter vector during the optimization, computing gradients is the main bottleneck. Our method safely prunes unnecessary gradient computations to reduce the processing time.The main idea is to efficiently construct a candidate set, which contains \\(k\\) important elements in the parameter vector, for each iteration. Specifically, before computing the gradients, we prune unnecessary elements in the parameter vector for the candidate set by utilizing upper bounds on absolute values of the parameters. Our method guarantees the same optimization results as the plain IHT because our pruning is safe. Experiments show that our method is up to 73 times faster than the plain IHT without degrading accuracy.", "keywords": "feature selection;sparse coding;dictionary learning;compressed sensing;iterative hard thresholding;pruning method", "primary_area": "other", "supplementary_material": "", "author": "Yasutoshi Ida;Sekitoshi Kanai;Atsutoshi Kumagai;Tomoharu Iwata;Yasuhiro Fujiwara", "authorids": "~Yasutoshi_Ida1;~Sekitoshi_Kanai1;~Atsutoshi_Kumagai2;~Tomoharu_Iwata1;~Yasuhiro_Fujiwara1", "gender": "M;M;M;M;M", "homepage": "http://yasutoshi.github.io/;https://sekitoshi.github.io/;https://scholar.google.co.jp/citations?user=Q_d8GEIAAAAJ&hl=ja;http://www.kecl.ntt.co.jp/as/members/iwata/;http://www.linkedin.com/in/yasuhiro-fujiwara-8960b0180", "dblp": "120/6855;209/4874;178/8630;29/5953;02/2520", "google_scholar": "https://scholar.google.co.jp/citations?user=HFLzlEgAAAAJ;qa2i5_IAAAAJ;https://scholar.google.co.jp/citations?user=Q_d8GEIAAAAJ;S1F-gScAAAAJ;https://scholar.google.co.jp/citations?user=kCaZaaMAAAAJ", "orcid": "0000-0003-4279-9503;0000-0003-4383-4454;0000-0002-2915-4615;;0000-0001-9578-1118", "linkedin": ";;;tomoharu-iwata-025a493;", "or_profile": "~Yasutoshi_Ida1;~Sekitoshi_Kanai1;~Atsutoshi_Kumagai2;~Tomoharu_Iwata1;~Yasuhiro_Fujiwara1", "aff": "NTT;NTT;NTT;NTT;NTT", "aff_domain": "ntt.co.jp;ntt.co.jp;ntt.co.jp;hco.ntt.co.jp;ntt.co.jp", "position": "Researcher;Researcher;Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\nida2024fast,\ntitle={Fast Iterative Hard Thresholding Methods with Pruning Gradient Computations},\nauthor={Yasutoshi Ida and Sekitoshi Kanai and Atsutoshi Kumagai and Tomoharu Iwata and Yasuhiro Fujiwara},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=09RKw0vXjR}\n}", "github": "", "reviewers": "abvp;Hc1f;Lxri;FTQw", "pdf_size": 559304, "rating": "5;5;6;7", "confidence": "4;2;3;3", "soundness": "1;3;2;3", "novelty": "2;3;2;3", "presentation": "1;3;2;3", "wc_summary": "57;46;52;188", "wc_strengths": "35;12;47;59", "wc_weaknesses": "236;41;78;66", "wc_questions": "98;30;87;59", "wc_limitations": "20;6;22;20", "wc_review": "446;135;286;392", "wc_reply_reviewers": "106;5;31;26", "wc_reply_authors": "107;23;31;34", "reply_reviewers": "2;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 85.75, 59.16238247400116 ], "wc_strengths_avg": [ 38.25, 17.36915369268175 ], "wc_weaknesses_avg": [ 105.25, 76.65955582965505 ], "wc_questions_avg": [ 68.5, 26.386549603917523 ], "wc_limitations_avg": [ 17.0, 6.4031242374328485 ], "wc_review_avg": [ 314.75, 118.67049970401237 ], "wc_reply_reviewers_avg": [ 42.0, 38.21648858804273 ], "wc_reply_authors_avg": [ 48.75, 33.87015648029988 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:uh7CtAHBDB0J:scholar.google.com/&scioq=Fast+Iterative+Hard+Thresholding+Methods+with+Pruning+Gradient+Computations&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "ntt.co.jp;ntt.co.jp;ntt.co.jp;hco.ntt.co.jp;ntt.co.jp", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "NTT Corporation", "aff_unique_dep": "", "aff_unique_url": "https://www.ntt.co.jp", "aff_unique_abbr": "NTT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Japan" }, { "title": "RefDrop: Controllable Consistency in Image or Video Generation via Reference Feature Guidance", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96948", "id": "09nyBqSdUz", "proceeding": "", "pdf": "https://openreview.net/pdf?id=09nyBqSdUz", "openreview": "https://openreview.net/forum?id=09nyBqSdUz", "poster": "/media/PosterPDFs/NeurIPS%202024/96948.png?t=1731392354.7708242", "project": "", "author_site": "Jiaojiao Fan, Haotian Xue, Qinsheng Zhang, Yongxin Chen", "tldr": "", "abstract": "There is a rapidly growing interest in controlling consistency across multiple generated images using diffusion models. Among various methods, recent works have found that simply manipulating attention modules by concatenating features from multiple reference images provides an efficient approach to enhancing consistency without fine-tuning. Despite its popularity and success, few studies have elucidated the underlying mechanisms that contribute to its effectiveness. In this work, we reveal that the popular approach is a linear interpolation of image self-attention and cross-attention between synthesized content and reference features, with a constant rank-1 coefficient. Motivated by this observation, we find that a rank-1 coefficient is not necessary and simplifies the controllable generation mechanism. The resulting algorithm, which we coin as RefDrop, allows users to control the influence of reference context in a direct and precise manner. Besides further enhancing consistency in single-subject image generation, our method also enables more interesting applications, such as the consistent generation of multiple subjects, suppressing specific features to encourage more diverse content, and high-quality personalized video generation by boosting temporal consistency. Even compared with state-of-the-art image-prompt-based generators, such as IP-Adapter, RefDrop is competitive in terms of controllability and quality while avoiding the need to train a separate image encoder for feature injection from reference images, making it a versatile plug-and-play solution for any image or video diffusion model.", "keywords": "Consistent image generation;Diverse image generation;Improve temporal-consistency;Feature injection from multiple images", "primary_area": "generative_models", "supplementary_material": "/attachment/6684a4f99f446bfd31e6b04f05879f2919e4513f.zip", "author": "Jiaojiao Fan;Haotian Xue;Qinsheng Zhang;Yongxin Chen", "authorids": "~Jiaojiao_Fan1;~Haotian_Xue1;~Qinsheng_Zhang1;~Yongxin_Chen1", "gender": "F;M;M;M", "homepage": "https://sbyebss.github.io;https://qsh-zh.github.io/;https://yongxin.ae.gatech.edu/;https://xavihart.github.io", "dblp": "78/10176;;;", "google_scholar": "zse9JEwAAAAJ;;X8BYiV4AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;", "linkedin": "jiaojiao-fan-9a1a14162/?locale=en_US;;;haotian-xue-gatech/", "or_profile": "~Jiaojiao_Fan1;~Qinsheng_Zhang1;~Yongxin_Chen1;~Xue_Haotian1", "aff": "Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology;NVIDIA", "aff_domain": "gatech.edu;gatech.edu;gatech.edu;nvidia.com", "position": "PhD student;PhD student;Associate Professor;Research Intern", "bibtex": "@inproceedings{\nfan2024refdrop,\ntitle={RefDrop: Controllable Consistency in Image or Video Generation via Reference Feature Guidance},\nauthor={Jiaojiao Fan and Haotian Xue and Qinsheng Zhang and Yongxin Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=09nyBqSdUz}\n}", "github": "", "reviewers": "Z675;NJob;c2sE;5p1r", "pdf_size": 49943732, "rating": "5;5;6;6", "confidence": "4;4;4;4", "soundness": "3;3;3;2", "novelty": "3;3;3;2", "presentation": "4;3;4;3", "wc_summary": "83;436;99;51", "wc_strengths": "89;40;62;43", "wc_weaknesses": "108;15;82;51", "wc_questions": "24;16;112;50", "wc_limitations": "11;17;26;2", "wc_review": "315;524;381;197", "wc_reply_reviewers": "17;36;34;135", "wc_reply_authors": "0;0;245;820", "reply_reviewers": "1;1;1;2", "reply_authors": "1;1;3;3", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 167.25, 156.12234785577624 ], "wc_strengths_avg": [ 58.5, 19.525624189766635 ], "wc_weaknesses_avg": [ 64.0, 34.74910070778811 ], "wc_questions_avg": [ 50.5, 37.66629793329841 ], "wc_limitations_avg": [ 14.0, 8.74642784226795 ], "wc_review_avg": [ 354.25, 118.10879518477869 ], "wc_reply_reviewers_avg": [ 55.5, 46.489246068311324 ], "wc_reply_authors_avg": [ 266.25, 334.98833934929735 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 1.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9803089119184196045&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "gatech.edu;gatech.edu;gatech.edu;nvidia.com", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Georgia Institute of Technology;NVIDIA", "aff_unique_dep": ";NVIDIA Corporation", "aff_unique_url": "https://www.gatech.edu;https://www.nvidia.com", "aff_unique_abbr": "Georgia Tech;NVIDIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "A Simple Framework for Generalization in Visual RL under Dynamic Scene Perturbations", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96947", "id": "0AumdfLzpK", "proceeding": "", "pdf": "https://openreview.net/pdf?id=0AumdfLzpK", "openreview": "https://openreview.net/forum?id=0AumdfLzpK", "poster": "/media/PosterPDFs/NeurIPS%202024/96947.png?t=1732451612.1247888", "project": "", "author_site": "Wonil Song, Hyesong Choi, Kwanghoon Sohn, Dongbo Min", "tldr": "", "abstract": "In the rapidly evolving domain of vision-based deep reinforcement learning (RL), a pivotal challenge is to achieve generalization capability to dynamic environmental changes reflected in visual observations.\nOur work delves into the intricacies of this problem, identifying two key issues that appear in previous approaches for visual RL generalization: (i) imbalanced saliency and (ii) observational overfitting.\nImbalanced saliency is a phenomenon where an RL agent disproportionately identifies salient features across consecutive frames in a frame stack. \nObservational overfitting occurs when the agent focuses on certain background regions rather than task-relevant objects.\nTo address these challenges, we present a simple yet effective framework for generalization in visual RL (SimGRL) under dynamic scene perturbations.\nFirst, to mitigate the imbalanced saliency problem, we introduce an architectural modification to the image encoder to stack frames at the feature level rather than the image level.\nSimultaneously, to alleviate the observational overfitting problem, we propose a novel technique called shifted random overlay augmentation, which is specifically designed to learn robust representations capable of effectively handling dynamic visual scenes.\nExtensive experiments demonstrate the superior generalization capability of SimGRL, achieving state-of-the-art performance in benchmarks including the DeepMind Control Suite.", "keywords": "Visual Reinforcement Learning;Generalization", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Wonil Song;Hyesong Choi;Kwanghoon Sohn;Dongbo Min", "authorids": "~Wonil_Song1;~Hyesong_Choi1;~Kwanghoon_Sohn2;~Dongbo_Min3", "gender": "M;F;M;M", "homepage": "http://diml.yonsei.ac.kr/;;https://diml.yonsei.ac.kr;http://cvl.ewha.ac.kr", "dblp": ";275/3868;21/2373;44/1149", "google_scholar": ";Ll3vLUsAAAAJ;zEtk0QsAAAAJ;3REUPXYAAAAJ", "orcid": ";0000-0003-4440-0164;;", "linkedin": ";;;", "or_profile": "~Wonil_Song1;~Hyesong_Choi1;~Kwanghoon_Sohn2;~Dongbo_Min3", "aff": "Yonsei Univ.;Ewha Womans University;Yonsei University;Ewha Womans University", "aff_domain": "yonsei.ac.kr;ewha.ac.kr;yonsei.ac.kr;ewha.ac.kr", "position": "PhD student;PhD student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nsong2024a,\ntitle={A Simple Framework for Generalization in Visual {RL} under Dynamic Scene Perturbations},\nauthor={Wonil Song and Hyesong Choi and Kwanghoon Sohn and Dongbo Min},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=0AumdfLzpK}\n}", "github": "", "reviewers": "t66L;wvnF;kgwp;z8gS", "pdf_size": 10818152, "rating": "6;6;6;6", "confidence": "4;5;4;3", "soundness": "3;3;3;3", "novelty": "3;3;2;3", "presentation": "2;3;3;4", "wc_summary": "151;188;66;69", "wc_strengths": "56;43;36;138", "wc_weaknesses": "84;481;190;106", "wc_questions": "88;231;38;121", "wc_limitations": "19;75;9;76", "wc_review": "398;1018;339;510", "wc_reply_reviewers": "12;206;310;0", "wc_reply_authors": "42;69;506;0", "reply_reviewers": "1;1;2;0", "reply_authors": "2;2;3;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 118.5, 52.66165587977651 ], "wc_strengths_avg": [ 68.25, 40.90461465409496 ], "wc_weaknesses_avg": [ 215.25, 158.44774375168615 ], "wc_questions_avg": [ 119.5, 70.83254901526557 ], "wc_limitations_avg": [ 44.75, 30.95460385790779 ], "wc_review_avg": [ 566.25, 267.9518380231791 ], "wc_reply_reviewers_avg": [ 132.0, 131.32402674301454 ], "wc_reply_authors_avg": [ 154.25, 204.56585125577533 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:omZyUnIYb_0J:scholar.google.com/&scioq=A+Simple+Framework+for+Generalization+in+Visual+RL+under+Dynamic+Scene+Perturbations&hl=en&as_sdt=0,33", "gs_version_total": 0, "email": "yonsei.ac.kr;ewha.ac.kr;yonsei.ac.kr;ewha.ac.kr", "author_num": 4, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "Yonsei University;Ewha Womans University", "aff_unique_dep": ";", "aff_unique_url": "https://www.yonsei.ac.kr;http://www.ewha.ac.kr", "aff_unique_abbr": "Yonsei;Ewha", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Rethinking Human Evaluation Protocol for Text-to-Video Models: Enhancing Reliability, Reproducibility, and Practicality", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96946", "id": "0AwMciNShl", "proceeding": "", "pdf": "https://openreview.net/pdf?id=0AwMciNShl", "openreview": "https://openreview.net/forum?id=0AwMciNShl", "poster": "/media/PosterPDFs/NeurIPS%202024/96946.png?t=1729079669.652648", "project": "", "author_site": "Tianle Zhang, Langtian Ma, Yuchen Yan, yuchen zhang, yue yang, Ziyao Guo, Wenqi Shao, Kai Wang, Yang You, Yu Qiao, Ping Luo, Kaipeng Zhang", "tldr": "", "abstract": "Recent text-to-video (T2V) technology advancements, as demonstrated by models such as Gen2, Pika, and Sora, have significantly broadened its applicability and popularity. \nDespite these strides, evaluating these models poses substantial challenges. \nPrimarily, due to the limitations inherent in automatic metrics, manual evaluation is often considered a superior method for assessing T2V generation. However, existing manual evaluation protocols face reproducibility, reliability, and practicality issues.\nTo address these challenges, this paper introduces the Text-to-Video Human Evaluation (T2VHE) protocol, a comprehensive and standardized protocol for T2V models. \nThe T2VHE protocol includes well-defined metrics, thorough annotator training, and an effective dynamic evaluation module. \nExperimental results demonstrate that this protocol not only ensures high-quality annotations but can also reduce evaluation costs by nearly 50\\%.\nWe will open-source the entire setup of the T2VHE protocol, including the complete protocol workflow, the dynamic evaluation component details, and the annotation interface code. This will help communities establish more sophisticated human assessment protocols.", "keywords": "Human evaluation;text-to-video models", "primary_area": "evaluation", "supplementary_material": "", "author": "Tianle Zhang;Langtian Ma;Yuchen Yan;Yuchen Zhang;Yue Yang;Ziyao Guo;Wenqi Shao;Kai Wang;Yang You;Yu Qiao;Ping Luo;Kaipeng Zhang", "authorids": "~Tianle_Zhang4;~Langtian_Ma1;~Yuchen_Yan5;~Yuchen_Zhang8;~Yue_Yang6;~Ziyao_Guo1;~Wenqi_Shao2;~Kai_Wang8;~Yang_You1;~Yu_Qiao1;~Ping_Luo2;~Kaipeng_Zhang1", "gender": "M;M;M;M;;M;M;M;M;;;M", "homepage": ";;;https://yuczhang.com/;;https://github.com/GzyAftermath;https://wqshao126.github.io/;https://kaiwang960112.github.io/;https://www.comp.nus.edu.sg/~youy/;;;http://kpzhang93.github.io/", "dblp": ";;;;;309/6165;227/3122;78/2022-36;33/8167-1.html;;;179/2126", "google_scholar": ";https://scholar.google.com.hk/citations?user=p0wFr4oAAAAJ;https://scholar.google.com/citations?hl=en;Y2oqeP0AAAAJ;;FlZSxJMAAAAJ;Bs9mrwwAAAAJ;i2II0XIAAAAJ;jF4dPZwAAAAJ;;;4OqZBmYAAAAJ", "orcid": "0000-0003-1502-9730;;0009-0006-7545-6709;;;;;0000-0002-1154-5175;;;;", "linkedin": ";;yuchen-yan-47ab20310;;;;;;yang-you-0b92914b/;;;", "or_profile": "~Tianle_Zhang4;~Langtian_Ma1;~Yuchen_Yan5;~Yuchen_Zhang8;~Yue_Yang6;~Ziyao_Guo1;~Wenqi_Shao2;~Kai_Wang8;~Yang_You1;~Yu_Qiao1;~Ping_Luo2;~Kaipeng_Zhang1", "aff": "University of Electronic Science and Technology of China;Southern University of Science and Technology;The University of Nottingham Ningbo China;University of Electronic Science and Technology of China;;Xidian University;Shanghai AI Laboratory;National University of Singapore;National University of Singapore;;;Shanghai AI Laboratory", "aff_domain": "cn.edu;mail.sustech.edu.cn;nottingham.edu.cn;uestc.edu.cn;;xidian.edu.cn;pjlab.org.cn;u.nus.edu;nus.edu.sg;;;pjlab.org.cn", "position": "Undergrad student;Undergrad student;Undergrad student;Undergrad student;;MS student;Researcher;PhD student;Professor;;;Researcher", "bibtex": "@inproceedings{\nzhang2024rethinking,\ntitle={Rethinking Human Evaluation Protocol for Text-to-Video Models: Enhancing Reliability, Reproducibility, and Practicality},\nauthor={Tianle Zhang and Langtian Ma and Yuchen Yan and Yuchen Zhang and Yue Yang and Ziyao Guo and Wenqi Shao and Kai Wang and Yang You and Yu Qiao and Ping Luo and Kaipeng Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=0AwMciNShl}\n}", "github": "", "reviewers": "2pJH;RMCR;AEKZ;LQLU", "pdf_size": 9283473, "rating": "3;5;5;8", "confidence": "5;5;4;2", "soundness": "3;3;3;4", "novelty": "2;2;2;4", "presentation": "4;3;3;4", "wc_summary": "142;119;69;45", "wc_strengths": "76;160;199;64", "wc_weaknesses": "469;305;284;83", "wc_questions": "3;30;179;17", "wc_limitations": "1;16;18;1", "wc_review": "691;630;749;210", "wc_reply_reviewers": "0;14;40;23", "wc_reply_authors": "2252;1488;2401;1270", "reply_reviewers": "0;1;2;1", "reply_authors": "7;6;8;5", "rating_avg": [ 5.25, 1.7853571071357126 ], "confidence_avg": [ 4.0, 1.224744871391589 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 93.75, 38.58351331851468 ], "wc_strengths_avg": [ 124.75, 56.6187910503218 ], "wc_weaknesses_avg": [ 285.25, 136.9860850597607 ], "wc_questions_avg": [ 57.25, 70.93791299439249 ], "wc_limitations_avg": [ 9.0, 8.031189202104505 ], "wc_review_avg": [ 570.0, 212.06249078986127 ], "wc_reply_reviewers_avg": [ 19.25, 14.515078366994786 ], "wc_reply_authors_avg": [ 1852.75, 482.860940126658 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 6.5, 1.118033988749895 ], "replies_avg": [ 35, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": -0.914659120760047, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7031695087213971222&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "cn.edu;mail.sustech.edu.cn;nottingham.edu.cn;uestc.edu.cn;;xidian.edu.cn;pjlab.org.cn;u.nus.edu;nus.edu.sg;;;pjlab.org.cn", "author_num": 12, "aff_unique_index": "0;1;2;0;3;4;5;5;4", "aff_unique_norm": "University of Electronic Science and Technology of China;Southern University of Science and Technology;University of Nottingham;Xidian University;Shanghai AI Laboratory;National University of Singapore", "aff_unique_dep": ";;;;;", "aff_unique_url": "https://www.uestc.edu.cn;https://www.sustech.edu.cn;https://www.nottingham.edu.cn;http://www.xidian.edu.cn/;https://www.shanghai-ai-lab.com;https://www.nus.edu.sg", "aff_unique_abbr": "UESTC;SUSTech;UoN;Xidian;SAIL;NUS", "aff_campus_unique_index": "1", "aff_campus_unique": ";Ningbo", "aff_country_unique_index": "0;0;0;0;0;0;1;1;0", "aff_country_unique": "China;Singapore" }, { "title": "Promoting Fairness Among Dynamic Agents in Online-Matching Markets under Known Stationary Arrival Distributions", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96945", "id": "0C3bLHwjsY", "proceeding": "", "pdf": "https://openreview.net/pdf?id=0C3bLHwjsY", "openreview": "https://openreview.net/forum?id=0C3bLHwjsY", "poster": "/media/PosterPDFs/NeurIPS%202024/96945.png?t=1731641014.5806563", "project": "", "author_site": "Will Ma, Pan Xu", "tldr": "", "abstract": "Online (bipartite) matching under known stationary arrivals is a fundamental model that has been studied extensively under the objective of maximizing the total number of customers served. We instead study the objective of *maximizing the minimum matching rate across all online types*, which is referred to as long-run (individual) fairness. For Online Matching under long-run Fairness (OM-LF) with a single offline agent, we show that the first-come-first-serve (FCFS) policy is $1$-competitive, i.e., matching any optimal clairvoyant. For the general case of OM-LF: We present a sampling algorithm (SAMP) and show that (1) SAMP is of competitiveness of at least $1-1/e$ and (2) it is asymptotically optimal with competitiveness approaches one in different regimes when either all offline agents have a sufficiently large matching capacity, or all online types have a sufficiently large arrival rate, or highly imbalance between the total offline matching capacity and the number of online arrivals. To complement the competitive results, we show the following hardness results for OM-LF: (1) Any non-rejecting policy (matching every arriving online agent if possible) is no more than $1/2$-competitive; (2) Any (randomized) policy is no more than $(\\sqrt{3}-1)$-competitive; (3) SAMP can be no more than $(1-1/e)$-competitive suggesting the tightness of competitive analysis for SAMP. We stress that all hardness results mentioned here are independent of any benchmarks. We also consider a few extensions of OM-LF by proposing a few variants of fairness metrics, including long-run group-level fairness and short-run fairness, and we devise related algorithms with provable competitive performance.", "keywords": "Online Matching;Competitive Analysis;Fairness Maximization", "primary_area": "optimization", "supplementary_material": "", "author": "Will Ma;Pan Xu", "authorids": "~Will_Ma1;~Pan_Xu2", "gender": ";Not Specified", "homepage": "http://www.columbia.edu/~wm2428/;https://sites.google.com/site/panxupi/", "dblp": "86/8650.html;11/9718-1.html", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "~Will_Ma1;~Pan_Xu2", "aff": "Columbia University;New Jersey Institute of Technology", "aff_domain": "columbia.edu;cs.njit.edu", "position": "Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nma2024promoting,\ntitle={Promoting Fairness Among Dynamic Agents in Online-Matching Markets under Known Stationary Arrival Distributions},\nauthor={Will Ma and Pan Xu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=0C3bLHwjsY}\n}", "github": "", "reviewers": "jQss;h2oi;GfYh;mDTW", "pdf_size": 519971, "rating": "5;6;7;7", "confidence": "3;3;3;3", "soundness": "4;3;3;4", "novelty": "3;4;3;3", "presentation": "3;2;3;4", "wc_summary": "128;119;293;82", "wc_strengths": "43;65;79;72", "wc_weaknesses": "40;156;46;72", "wc_questions": "2;300;89;81", "wc_limitations": "1;2;5;41", "wc_review": "214;642;512;348", "wc_reply_reviewers": "17;52;24;37", "wc_reply_authors": "0;18;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 155.5, 81.23576798430602 ], "wc_strengths_avg": [ 64.75, 13.497684986693088 ], "wc_weaknesses_avg": [ 78.5, 46.33303357217181 ], "wc_questions_avg": [ 118.0, 110.44229262379517 ], "wc_limitations_avg": [ 12.25, 16.663958113245485 ], "wc_review_avg": [ 429.0, 162.0524606416083 ], "wc_reply_reviewers_avg": [ 32.5, 13.35102992281869 ], "wc_reply_authors_avg": [ 4.5, 7.794228634059948 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=385946271118857166&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": "columbia.edu;cs.njit.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Columbia University;New Jersey Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.columbia.edu;https://www.njit.edu", "aff_unique_abbr": "Columbia;NJIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Quantum algorithm for large-scale market equilibrium computation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96944", "id": "0DE1dLMW2b", "proceeding": "", "pdf": "https://openreview.net/pdf?id=0DE1dLMW2b", "openreview": "https://openreview.net/forum?id=0DE1dLMW2b", "poster": "/media/PosterPDFs/NeurIPS%202024/96944.png?t=1731426989.5298698", "project": "", "author_site": "Po-Wei Huang, Patrick Rebentrost", "tldr": "", "abstract": "Classical algorithms for market equilibrium computation such as proportional response dynamics face scalability issues with Internet-based applications such as auctions, recommender systems, and fair division, despite having an almost linear runtime in terms of the product of buyers and goods. In this work, we provide the first quantum algorithm for market equilibrium computation with sub-linear performance. Our algorithm provides a polynomial runtime speedup in terms of the product of the number of buyers and goods while reaching the same optimization objective value as the classical algorithm. Numerical simulations of a system with 16384 buyers and goods support our theoretical results that our quantum algorithm provides a significant speedup.", "keywords": "market equilibrium computation;quantum algorithm", "primary_area": "algorithmic_game_theory", "supplementary_material": "/attachment/5e7251e6e0638472ba513cd4fd17c2d5a7663e75.zip", "author": "Po-Wei Huang;Patrick Rebentrost", "authorids": "~Po-Wei_Huang1;~Patrick_Rebentrost1", "gender": "M;", "homepage": "https://georgepwhuang.github.io;", "dblp": ";", "google_scholar": "https://scholar.google.com/citations?;", "orcid": ";", "linkedin": ";", "or_profile": "~Po-Wei_Huang1;~Patrick_Rebentrost1", "aff": "Centre for Quantum Technologies;", "aff_domain": "nus.edu.sg;", "position": "Researcher;", "bibtex": "@inproceedings{\nhuang2024quantum,\ntitle={Quantum algorithm for large-scale market equilibrium computation},\nauthor={Po-Wei Huang and Patrick Rebentrost},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=0DE1dLMW2b}\n}", "github": "", "reviewers": "vuEH;vnkg;B4u4;sd1G", "pdf_size": 618879, "rating": "5;6;6;7", "confidence": "3;3;1;4", "soundness": "3;3;3;4", "novelty": "2;3;3;3", "presentation": "2;4;3;3", "wc_summary": "35;102;93;177", "wc_strengths": "69;50;49;27", "wc_weaknesses": "170;82;43;39", "wc_questions": "76;96;1;168", "wc_limitations": "9;35;9;6", "wc_review": "359;365;195;417", "wc_reply_reviewers": "57;442;10;16", "wc_reply_authors": "70;110;0;0", "reply_reviewers": "1;2;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 2.75, 1.0897247358851685 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 101.75, 50.48452733263925 ], "wc_strengths_avg": [ 48.75, 14.872373717735847 ], "wc_weaknesses_avg": [ 83.5, 52.690131903421914 ], "wc_questions_avg": [ 85.25, 59.47005549013722 ], "wc_limitations_avg": [ 14.75, 11.755317945508747 ], "wc_review_avg": [ 334.0, 83.3606621854697 ], "wc_reply_reviewers_avg": [ 131.25, 180.32106782070696 ], "wc_reply_authors_avg": [ 45.0, 47.16990566028302 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.3244428422615251, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:wE0bFf8x3SoJ:scholar.google.com/&scioq=Quantum+algorithm+for+large-scale+market+equilibrium+computation&hl=en&as_sdt=0,34", "gs_version_total": 5, "email": "nus.edu.sg;", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "National University of Singapore", "aff_unique_dep": "Centre for Quantum Technologies", "aff_unique_url": "https://quantum.ntu.edu.sg/", "aff_unique_abbr": "CQT", "aff_country_unique_index": "0", "aff_country_unique": "Singapore" }, { "title": "Test-time Adaptation in Non-stationary Environments via Adaptive Representation Alignment", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96943", "id": "0EfUYVMrLv", "proceeding": "", "pdf": "https://openreview.net/pdf?id=0EfUYVMrLv", "openreview": "https://openreview.net/forum?id=0EfUYVMrLv", "poster": "/media/PosterPDFs/NeurIPS%202024/96943.png?t=1731687693.6296034", "project": "", "author_site": "Zhen-Yu Zhang, Zhiyu Xie, Huaxiu Yao, Masashi Sugiyama", "tldr": "", "abstract": "Adapting to distribution shifts is a critical challenge in modern machine learning, especially as data in many real-world applications accumulate continuously in the form of streams. We investigate the problem of sequentially adapting a model to non-stationary environments, where the data distribution is continuously shifting and only a small amount of unlabeled data are available each time. Continual test-time adaptation methods have shown promising results by using reliable pseudo-labels, but they still fall short in exploring representation alignment with the source domain in non-stationary environments. In this paper, we propose to leverage non-stationary representation learning to adaptively align the unlabeled data stream, with its changing distributions, to the source data representation using a sketch of the source data. To alleviate the data scarcity in non-stationary representation learning, we propose a novel adaptive representation alignment algorithm called Ada-ReAlign. This approach employs a group of base learners to explore different lengths of the unlabeled data stream, which are adaptively combined by a meta learner to handle unknown and continuously evolving data distributions. The proposed method comes with nice theoretical guarantees under convexity assumptions. Experiments on both benchmark datasets and a real-world application validate the effectiveness and adaptability of our proposed algorithm.", "keywords": "non-stationary environments;continuous distribution shift;test-time adaptation;unlabeled data", "primary_area": "online_learning", "supplementary_material": "", "author": "Zhen-Yu Zhang;Zhiyu Xie;Huaxiu Yao;Masashi Sugiyama", "authorids": "~Zhen-Yu_Zhang1;~Zhiyu_Xie1;~Huaxiu_Yao1;~Masashi_Sugiyama1", "gender": "M;;M;M", "homepage": "https://zhangzy07.github.io/;;http://huaxiuyao.mystrikingly.com;http://www.ms.k.u-tokyo.ac.jp/sugi/", "dblp": ";293/7336;197/1635;35/1228", "google_scholar": "https://scholar.google.co.jp/citations?user=JP8qCpUAAAAJ;;A20BZnQAAAAJ;https://scholar.google.co.jp/citations?user=GkYIrlIAAAAJ", "orcid": "0000-0003-2101-1836;;;0000-0001-6658-6743", "linkedin": ";;huaxiuyao/;", "or_profile": "~Zhen-Yu_Zhang1;~Zhiyu_Xie1;~Huaxiu_Yao1;~Masashi_Sugiyama1", "aff": "RIKEN;Stanford University;Department of Computer Science, University of North Carolina at Chapel Hill;The University of Tokyo", "aff_domain": "riken.jp;stanford.edu;cs.unc.edu;u-tokyo.ac.jp", "position": "Postdoc;MS student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nzhang2024testtime,\ntitle={Test-time Adaptation in Non-stationary Environments via Adaptive Representation Alignment},\nauthor={Zhen-Yu Zhang and Zhiyu Xie and Huaxiu Yao and Masashi Sugiyama},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=0EfUYVMrLv}\n}", "github": "", "reviewers": "rMcB;azfj;73Jf", "pdf_size": 1122840, "rating": "6;7;7", "confidence": "3;4;5", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "2;3;3", "wc_summary": "91;96;76", "wc_strengths": "33;142;107", "wc_weaknesses": "204;58;113", "wc_questions": "33;98;63", "wc_limitations": "3;3;1", "wc_review": "364;397;360", "wc_reply_reviewers": "15;10;34", "wc_reply_authors": "51;51;52", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 87.66666666666667, 8.498365855987975 ], "wc_strengths_avg": [ 94.0, 45.43860326491855 ], "wc_weaknesses_avg": [ 125.0, 60.20520464766037 ], "wc_questions_avg": [ 64.66666666666667, 26.562295750848712 ], "wc_limitations_avg": [ 2.3333333333333335, 0.9428090415820634 ], "wc_review_avg": [ 373.6666666666667, 16.579773487261182 ], "wc_reply_reviewers_avg": [ 19.666666666666668, 10.338708279513881 ], "wc_reply_authors_avg": [ 51.333333333333336, 0.4714045207910317 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:9vDgE0Tavz0J:scholar.google.com/&scioq=Test-time+Adaptation+in+Non-stationary+Environments+via+Adaptive+Representation+Alignment&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "riken.jp;stanford.edu;cs.unc.edu;u-tokyo.ac.jp", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "RIKEN;Stanford University;University of North Carolina at Chapel Hill;University of Tokyo", "aff_unique_dep": ";;Department of Computer Science;", "aff_unique_url": "https://www.riken.jp;https://www.stanford.edu;https://www.unc.edu;https://www.u-tokyo.ac.jp", "aff_unique_abbr": "RIKEN;Stanford;UNC Chapel Hill;UTokyo", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Stanford;Chapel Hill", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "Japan;United States" }, { "title": "Sketching for Distributed Deep Learning: A Sharper Analysis", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96942", "id": "0G0VpMjKyV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=0G0VpMjKyV", "openreview": "https://openreview.net/forum?id=0G0VpMjKyV", "poster": "/media/PosterPDFs/NeurIPS%202024/96942.png?t=1733481266.9121125", "project": "", "author_site": "Mayank Shrivastava, Berivan Isik, Qiaobo Li, Sanmi Koyejo, Arindam Banerjee", "tldr": "", "abstract": "The high communication cost between the server and the clients is a significant bottleneck in scaling distributed learning for overparametrized deep models. One popular approach for reducing this communication overhead is randomized sketching. However, existing theoretical analyses for sketching-based distributed learning (sketch-DL) either incur a prohibitive dependence on the ambient dimension or need additional restrictive assumptions such as heavy-hitters. Nevertheless, despite existing pessimistic analyses, empirical evidence suggests that sketch-DL is competitive with its uncompressed counterpart, thus motivating a sharper analysis. In this work, we introduce a sharper ambient dimension-independent convergence analysis for sketch-DL using the second-order geometry specified by the loss Hessian. Our results imply ambient dimension-independent communication complexity for sketch-DL. We present empirical results both on the loss Hessian and overall accuracy of sketch-DL supporting our theoretical results. Taken together, our results provide theoretical justification for the observed empirical success of sketch-DL.", "keywords": "sketching;distributed learning;federated learning;optimization", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Mayank Shrivastava;Berivan Isik;Qiaobo Li;Sanmi Koyejo;Arindam Banerjee", "authorids": "~Mayank_Shrivastava2;~Berivan_Isik1;~Qiaobo_Li1;~Sanmi_Koyejo1;~Arindam_Banerjee4", "gender": "M;;M;;M", "homepage": "https://mayank010698.github.io/;https://sites.google.com/view/berivanisik;;https://arindam.cs.illinois.edu/;https://cs.stanford.edu/~sanmi/", "dblp": ";265/6197;;82/4807.html;14/8885", "google_scholar": "9777WIoAAAAJ;GdXOFKoAAAAJ;;RY7cuPAAAAAJ;EaaOeJwAAAAJ", "orcid": ";;;;0000-0002-4023-419X", "linkedin": ";berivan-isik-439a3b122/;qiaobo-li-581815251/;;sanmi-koyejo-984754/", "or_profile": "~Mayank_Shrivastava2;~Berivan_Isik1;~Qiaobo_Li1;~Arindam_Banerjee4;~Oluwasanmi_O_Koyejo1", "aff": "University of Illinois, Urbana Champaign;Stanford University;Department of Computer Science, University of Illinois at Urbana-Champaign;University of Illinois, Urbana Champaign;Google", "aff_domain": "uiuc.edu;stanford.edu;cs.illinois.edu;illinois.edu;google.com", "position": "MS student;PhD student;PhD student;Professor;Research Scientist", "bibtex": "@inproceedings{\nshrivastava2024sketching,\ntitle={Sketching for Distributed Deep Learning: A Sharper Analysis},\nauthor={Mayank Shrivastava and Berivan Isik and Qiaobo Li and Sanmi Koyejo and Arindam Banerjee},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=0G0VpMjKyV}\n}", "github": "", "reviewers": "E7W7;wSrY;LRSh", "pdf_size": 866859, "rating": "4;6;7", "confidence": "4;3;5", "soundness": "3;3;3", "novelty": "2;3;2", "presentation": "2;3;3", "wc_summary": "22;74;153", "wc_strengths": "46;82;121", "wc_weaknesses": "31;73;373", "wc_questions": "291;44;103", "wc_limitations": "6;1;1", "wc_review": "396;274;751", "wc_reply_reviewers": "96;23;67", "wc_reply_authors": "738;0;0", "reply_reviewers": "1;1;1", "reply_authors": "2;1;1", "rating_avg": [ 5.666666666666667, 1.247219128924647 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 83.0, 53.85783756025363 ], "wc_strengths_avg": [ 83.0, 30.62678566222711 ], "wc_weaknesses_avg": [ 159.0, 152.28919856641178 ], "wc_questions_avg": [ 146.0, 105.32172931862952 ], "wc_limitations_avg": [ 2.6666666666666665, 2.3570226039551585 ], "wc_review_avg": [ 473.6666666666667, 202.33031299228387 ], "wc_reply_reviewers_avg": [ 62.0, 30.011109054259666 ], "wc_reply_authors_avg": [ 246.0, 347.8965363437814 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.3273268353539886, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15596591988187978150&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "uiuc.edu;stanford.edu;cs.illinois.edu;illinois.edu;google.com", "author_num": 5, "aff_unique_index": "0;1;0;0;2", "aff_unique_norm": "University of Illinois Urbana-Champaign;Stanford University;Google", "aff_unique_dep": ";;Google", "aff_unique_url": "https://illinois.edu;https://www.stanford.edu;https://www.google.com", "aff_unique_abbr": "UIUC;Stanford;Google", "aff_campus_unique_index": "0;1;0;0;2", "aff_campus_unique": "Urbana-Champaign;Stanford;Mountain View", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "WenMind: A Comprehensive Benchmark for Evaluating Large Language Models in Chinese Classical Literature and Language Arts", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97880", "id": "0G5OK5vmmg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=0G5OK5vmmg", "openreview": "https://openreview.net/forum?id=0G5OK5vmmg", "poster": "/media/PosterPDFs/NeurIPS%202024/97880.png?t=1730896668.2191207", "project": "", "author_site": "Jiahuan Cao, Yang Liu, Yongxin Shi, Kai Ding, Lianwen Jin", "tldr": "", "abstract": "Large Language Models (LLMs) have made significant advancements across numerous domains, but their capabilities in Chinese Classical Literature and Language Arts (CCLLA) remain largely unexplored due to the limited scope and tasks of existing benchmarks. To fill this gap, we propose WenMind, a comprehensive benchmark dedicated for evaluating LLMs in CCLLA. WenMind covers the sub-domains of Ancient Prose, Ancient Poetry, and Ancient Literary Culture, comprising 4,875 question-answer pairs, spanning 42 fine-grained tasks, 3 question formats, and 2 evaluation scenarios: domain-oriented and capability-oriented. Based on WenMind, we conduct a thorough evaluation of 31 representative LLMs, including general-purpose models and ancient Chinese LLMs. The results reveal that even the best-performing model, ERNIE-4.0, only achieves a total score of 64.3, indicating significant room for improvement of LLMs in the CCLLA domain. We also provide insights into the strengths and weaknesses of different LLMs and highlight the importance of pre-training data in achieving better results.\nOverall, WenMind serves as a standardized and comprehensive baseline, providing valuable insights for future CCLLA research. \nOur benchmark and related code are available at \\url{https://github.com/SCUT-DLVCLab/WenMind}.", "keywords": "Benchmark;Large Language Models;Classical Chinese Literature and Language Arts", "primary_area": "", "supplementary_material": "/attachment/79396a8bb52072e4e0140fe238e36f6e78d333dc.pdf", "author": "Jiahuan Cao;Yang Liu;Yongxin Shi;Kai Ding;Lianwen Jin", "authorids": "~Jiahuan_Cao1;~Yang_Liu129;~Yongxin_Shi2;~Kai_Ding2;~Lianwen_Jin1", "gender": "M;M;;M;M", "homepage": "https://github.com/zhuyiqun;https://github.com/lmmlzn;;;http://www.dlvc-lab.net/lianwen/", "dblp": "358/1603;51/3710-353.html;359/4310;44/2891-9.html;54/3221", "google_scholar": ";;e-3XAoAAAAAJ;SX43hBUAAAAJ;WMUStEUAAAAJ", "orcid": "0009-0002-9483-2424;0009-0006-6528-6849;0009-0003-2650-1663;0000-0002-9371-0751;0000-0002-5456-0957", "linkedin": ";;;;", "or_profile": "~Jiahuan_Cao1;~Yang_Liu129;~Yongxin_Shi2;~Kai_Ding2;~Lianwen_Jin1", "aff": "South China University of Technology;South China University of Technology;South China University of Technology;INTSIG Information;South China University of Technology", "aff_domain": "scut.edu.cn;scut.edu.cn;scut.edu.cn;intsig.net;scut.edu.cn", "position": "MS student;MS student;PhD student;Researcher;Professor", "bibtex": "@inproceedings{\ncao2024wenmind,\ntitle={WenMind: A Comprehensive Benchmark for Evaluating Large Language Models in Chinese Classical Literature and Language Arts},\nauthor={Jiahuan Cao and Yang Liu and Yongxin Shi and Kai Ding and Lianwen Jin},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=0G5OK5vmmg}\n}", "github": "", "reviewers": "zs3B;G3s7;DTnC;WUQX", "pdf_size": 6991781, "rating": "5;6;6;8", "confidence": "4;3;3;1", "wc_summary_and_contributions": "49;62;71;55", "wc_strengths": "6;69;94;32", "wc_improvement": "48;138;207;99", "wc_limitations": "13;17;26;4", "wc_correctness": "22;1;26;10", "wc_clarity": "9;1;5;10", "wc_relation_to_prior_work": "44;1;19;10", "wc_documentation": "29;4;5;4", "wc_additional_feedback": "1;1;1;1", "wc_review": "221;294;454;225", "wc_reply_reviewers": "115;0;112;0", "wc_reply_authors": "114;0;159;0", "reply_reviewers": "1;0;1;0", "reply_authors": "2;1;2;1", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_and_contributions_avg": [ 59.25, 8.1967981553775 ], "wc_strengths_avg": [ 50.25, 33.751851801049376 ], "wc_improvement_avg": [ 123.0, 58.056007441090884 ], "wc_limitations_avg": [ 15.0, 7.905694150420948 ], "wc_correctness_avg": [ 14.75, 9.883698700385398 ], "wc_clarity_avg": [ 6.25, 3.5619517121937516 ], "wc_relation_to_prior_work_avg": [ 18.5, 16.03901493234544 ], "wc_documentation_avg": [ 10.5, 10.688779163215974 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 298.5, 94.35173554312607 ], "wc_reply_reviewers_avg": [ 56.75, 56.75991102882386 ], "wc_reply_authors_avg": [ 68.25, 70.07986515397985 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5304503655240663198&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 3, "email": "scut.edu.cn;scut.edu.cn;scut.edu.cn;intsig.net;scut.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "South China University of Technology;INTSIG Information Co., Ltd.", "aff_unique_dep": ";", "aff_unique_url": "https://www.scut.edu.cn;http://www.intsig.com", "aff_unique_abbr": "SCUT;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "T2Vs Meet VLMs: A Scalable Multimodal Dataset for Visual Harmfulness Recognition", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97879", "id": "0G8AXwtmy2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=0G8AXwtmy2", "openreview": "https://openreview.net/forum?id=0G8AXwtmy2", "poster": "/media/PosterPDFs/NeurIPS%202024/97879.png?t=1730218737.4487233", "project": "", "author_site": "Chen Yeh, You-Ming Chang, Wei-Chen Chiu, Ning Yu", "tldr": "", "abstract": "While widespread access to the Internet and the rapid advancement of generative models boost people's creativity and productivity, the risk of encountering inappropriate or harmful content also increases. To address the aforementioned issue, researchers managed to incorporate several harmful contents datasets with machine learning methods to detect harmful concepts. However, existing harmful datasets are curated by the presence of a narrow range of harmful objects, and only cover real harmful content sources. This restricts the generalizability of methods based on such datasets and leads to the potential misjudgment in certain cases. Therefore, we propose a comprehensive and extensive harmful dataset, **VHD11K**, consisting of 10,000 images and 1,000 videos, crawled from the Internet and generated by 4 generative models, across a total of 10 harmful categories covering a full spectrum of harmful concepts with non-trival definition. We also propose a novel annotation framework by formulating the annotation process as a multi-agent Visual Question Answering (VQA) task, having 3 different VLMs \"debate\" about whether the given image/video is harmful, and incorporating the in-context learning strategy in the debating process. Therefore, we can ensure that the VLMs consider the context of the given image/video and both sides of the arguments thoroughly before making decisions, further reducing the likelihood of misjudgments in edge cases. Evaluation and experimental results demonstrate that \n(1) the great alignment between the annotation from our novel annotation framework and those from human, ensuring the reliability of VHD11K;\n(2) our full-spectrum harmful dataset successfully identifies the inability of existing harmful content detection methods to detect extensive harmful contents and improves the performance of existing harmfulness recognition methods;\n(3) our dataset outperforms the baseline dataset, SMID, as evidenced by the superior improvement in harmfulness recognition methods.\nThe entire dataset is publicly available: https://huggingface.co/datasets/denny3388/VHD11K", "keywords": "harmful dataset;harmful content detection;Vision Language Model", "primary_area": "", "supplementary_material": "/attachment/402f9644a705770f9f243455b4c3032791b74c5a.pdf", "author": "Chen Yeh;You-Ming Chang;Wei-Chen Chiu;Ning Yu", "authorids": "~Chen_Yeh1;~You-Ming_Chang1;~Wei-Chen_Chiu3;~Ning_Yu2", "gender": "M;M;;", "homepage": "https://github.com/denny3388;https://github.com/thisismingggg;;", "dblp": ";359/6124;;", "google_scholar": ";;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Chen_Yeh1;~You-Ming_Chang1;~Wei-Chen_Chiu3;~Ning_Yu2", "aff": "National Yang Ming Chiao Tung University;National Yang Ming Chiao Tung University;;", "aff_domain": "nycu.edu.tw;nycu.edu.tw;;", "position": "MS student;MS student;;", "bibtex": "@inproceedings{\nyeh2024tvs,\ntitle={T2Vs Meet {VLM}s: A Scalable Multimodal Dataset for Visual Harmfulness Recognition},\nauthor={Chen Yeh and You-Ming Chang and Wei-Chen Chiu and Ning Yu},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=0G8AXwtmy2}\n}", "github": "", "reviewers": "NRnF;NyQc;Ddyn;dz4E", "pdf_size": 2889092, "rating": "3;7;7;9", "confidence": "4;3;4;4", "wc_summary_and_contributions": "102;12;127;64", "wc_strengths": "22;3;82;17", "wc_improvement": "74;11;45;34", "wc_limitations": "6;11;43;15", "wc_correctness": "1;1;26;13", "wc_clarity": "1;1;6;1", "wc_relation_to_prior_work": "1;1;1;1", "wc_documentation": "1;3;18;24", "wc_additional_feedback": "1;1;1;1", "wc_review": "209;44;349;170", "wc_reply_reviewers": "91;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;0;0", "reply_authors": "2;1;1;1", "rating_avg": [ 6.5, 2.179449471770337 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 76.25, 43.349596307232204 ], "wc_strengths_avg": [ 31.0, 30.257230540814536 ], "wc_improvement_avg": [ 41.0, 22.66053838724932 ], "wc_limitations_avg": [ 18.75, 14.359230480774379 ], "wc_correctness_avg": [ 10.25, 10.328964130056798 ], "wc_clarity_avg": [ 2.25, 2.165063509461097 ], "wc_relation_to_prior_work_avg": [ 1.0, 0.0 ], "wc_documentation_avg": [ 11.5, 9.759610647971568 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 193.0, 108.7681019416998 ], "wc_reply_reviewers_avg": [ 22.75, 39.40415587219196 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.13245323570650439, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3172252607119106628&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "nycu.edu.tw;nycu.edu.tw;;", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "National Yang Ming Chiao Tung University", "aff_unique_dep": "", "aff_unique_url": "https://www.nycu.edu.tw", "aff_unique_abbr": "NYCU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Taiwan", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Grounding Multimodal Large Language Models in Actions", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96941", "id": "0Gl5WxY6es", "proceeding": "", "pdf": "https://openreview.net/pdf?id=0Gl5WxY6es", "openreview": "https://openreview.net/forum?id=0Gl5WxY6es", "poster": "", "project": "", "author_site": "Andrew Szot, Bogdan Mazoure, Harsh Agrawal, R Devon Hjelm, Zsolt Kira, Alexander Toshev", "tldr": "", "abstract": "Multimodal Large Language Models (MLLMs) have demonstrated a wide range of capabilities across many domains including Embodied AI. In this work, we study how to best ground a MLLM into different embodiments and their associated action spaces, including both continuous and discrete actions. For continuous actions, a set of learned tokenizations that capture an action at various resolutions allows for sufficient modeling precision, yielding the best performance on downstream tasks. For discrete actions, semantically aligning these actions with the native output token space of the MLLM leads to the strongest performance. We arrive at these lessons via a thorough study of seven action grounding approaches on five different environments, encompassing over 114 embodied tasks.", "keywords": "Embodied AI;Multimodal Large Language Models;Reinforcement Learning;Imitation Learning", "primary_area": "generative_models", "supplementary_material": "", "author": "Andrew Szot;Bogdan Mazoure;Harsh Agrawal;R Devon Hjelm;Zsolt Kira;Alexander T Toshev", "authorids": "~Andrew_Szot1;~Bogdan_Mazoure1;~Harsh_Agrawal1;~R_Devon_Hjelm1;~Zsolt_Kira1;~Alexander_T_Toshev1", "gender": "M;M;M;M;M;M", "homepage": "https://www.andrewszot.com;https://bmazoure.github.io;https://dexter1691.github.io;;https://faculty.cc.gatech.edu/~zk15;http://alex.toshev.tech", "dblp": ";;163/2041;195/5928;36/4127;96/2687", "google_scholar": "IwIWKPYAAAAJ;https://scholar.google.ca/citations?user=NaxShlcAAAAJ;0nsfDPAAAAAJ;https://scholar.google.ca/citations?user=68c5HfwAAAAJ;2a5XgNAAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;;0000-0002-2626-2004;", "linkedin": ";;harsh092;;;alexander-toshev-9270726/", "or_profile": "~Andrew_Szot1;~Bogdan_Mazoure1;~Harsh_Agrawal1;~R_Devon_Hjelm1;~Zsolt_Kira1;~Alexander_Toshev1", "aff": "Georgia Institute of Technology;Apple;Apple;Apple;Georgia Institute of Technology;Apple", "aff_domain": "gatech.edu;apple.com;apple.com;apple.com;gatech.edu;apple.com", "position": "PhD student;Research Scientist;Researcher;Researcher;Assistant Professor;research scientist", "bibtex": "@inproceedings{\nszot2024grounding,\ntitle={Grounding Multimodal Large Language Models in Actions},\nauthor={Andrew Szot and Bogdan Mazoure and Harsh Agrawal and R Devon Hjelm and Zsolt Kira and Alexander T Toshev},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=0Gl5WxY6es}\n}", "github": "", "reviewers": "eGot;f4rS;YrxP", "pdf_size": 9732135, "rating": "5;6;7", "confidence": "3;3;5", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "2;3;3", "wc_summary": "60;76;83", "wc_strengths": "70;44;105", "wc_weaknesses": "175;64;184", "wc_questions": "254;4;124", "wc_limitations": "77;7;70", "wc_review": "636;195;566", "wc_reply_reviewers": "0;0;325", "wc_reply_authors": "55;56;369", "reply_reviewers": "0;0;2", "reply_authors": "2;2;4", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 73.0, 9.626352718795768 ], "wc_strengths_avg": [ 73.0, 24.99333244420733 ], "wc_weaknesses_avg": [ 141.0, 54.57105459856901 ], "wc_questions_avg": [ 127.33333333333333, 102.08928554075702 ], "wc_limitations_avg": [ 51.333333333333336, 31.478387647541428 ], "wc_review_avg": [ 465.6666666666667, 193.5119864217431 ], "wc_reply_reviewers_avg": [ 108.33333333333333, 153.2064692570853 ], "wc_reply_authors_avg": [ 160.0, 147.78588114791842 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.9428090415820634 ], "reply_authors_avg": [ 2.6666666666666665, 0.9428090415820634 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.8660254037844387, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8656118184868256097&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "gatech.edu;apple.com;apple.com;apple.com;gatech.edu;apple.com", "author_num": 6, "aff_unique_index": "0;1;1;1;0;1", "aff_unique_norm": "Georgia Institute of Technology;Apple", "aff_unique_dep": ";Apple Inc.", "aff_unique_url": "https://www.gatech.edu;https://www.apple.com", "aff_unique_abbr": "Georgia Tech;Apple", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "GenAI Arena: An Open Evaluation Platform for Generative Models", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97878", "id": "0Gmi8TkUC7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=0Gmi8TkUC7", "openreview": "https://openreview.net/forum?id=0Gmi8TkUC7", "poster": "/media/PosterPDFs/NeurIPS%202024/97878.png?t=1730329031.3167162", "project": "", "author_site": "Dongfu Jiang, Max KU, Tianle Li, Yuansheng Ni, Shizhuo Sun, Rongqi Fan, Wenhu Chen", "tldr": "", "abstract": "Generative AI has made remarkable strides to revolutionize fields such as image and video generation. These advancements are driven by innovative algorithms, architecture, and data. However, the rapid proliferation of generative models has highlighted a critical gap: the absence of trustworthy evaluation metrics. Current automatic assessments such as FID, CLIP, FVD, etc often fail to capture the nuanced quality and user satisfaction associated with generative outputs. This paper proposes an open platform GenAI-Arena to evaluate different image and video generative models, where users can actively participate in evaluating these models. By leveraging collective user feedback and votes, GenAI-Arena aims to provide a more democratic and accurate measure of model performance. It covers three tasks of text-to-image generation, text-to-video generation, and image editing respectively. Currently, we cover a total of 35 open-source generative models. GenAI-Arena has been operating for seven months, amassing over 9000 votes from the community. We describe our platform, analyze the data, and explain the statistical methods for ranking the models. To further promote the research in building model-based evaluation metrics, we release a cleaned version of our preference data for the three tasks, namely GenAI-Bench. We prompt the existing multi-modal models like Gemini, and GPT-4o to mimic human voting. We compute the accuracy by comparing the model voting with the human voting to understand their judging abilities. Our results show existing multimodal models are still lagging in assessing the generated visual content, even the best model GPT-4o only achieves an average accuracy of $49.19\\%$ across the three generative tasks. Open-source MLLMs perform even worse due to the lack of instruction-following and reasoning ability in complex vision scenarios.", "keywords": "Evaluation;Benchmark;Platform;Generative Model", "primary_area": "", "supplementary_material": "/attachment/6a5bcc74e5802faf0107b7c8b5691f742727cf95.pdf", "author": "Dongfu Jiang;Max Ku;Tianle Li;Yuansheng Ni;Shizhuo Sun;Rongqi Fan;Wenhu Chen", "authorids": "~Dongfu_Jiang1;~Max_Ku1;~Tianle_Li1;~Yuansheng_Ni1;~Shizhuo_Sun1;~Rongqi_Fan1;~Wenhu_Chen3", "gender": "M;M;F;;M;;", "homepage": "https://jdf-prog.github.io/;https://kuwingfung.github.io/;https://www.litianlephoebe.com/;;https://github.com/sunshizhuo;https://richard5678.github.io;", "dblp": "336/6970;348/0574.html;242/0053;;;379/4566.html;", "google_scholar": "kciKEPUAAAAJ;https://scholar.google.com.hk/citations?user=oCFgVhUAAAAJ;g213g7YAAAAJ;;;Gx4qcdgAAAAJ;", "orcid": "0009-0007-9442-6721;;;;;0009-0001-1427-5481;", "linkedin": "dongfu-jiang-a76a15222/;max-ku-650571172/;;;;richard-fan2020/;", "or_profile": "~Dongfu_Jiang1;~Max_Ku1;~Tianle_Li1;~Yuansheng_Ni1;~Shizhuo_Sun1;~Rongqi_Fan1;~Wenhu_Chen3", "aff": "Allen Institute for Artificial Intelligence;University of Waterloo;University of Waterloo;;University of Toronto;NVIDIA;", "aff_domain": "allenai.org;uwaterloo.ca;uwaterloo.ca;;utoronto.ca;nvidia.com;", "position": "Intern;MS student;MS student;;Undergrad student;Intern;", "bibtex": "@inproceedings{\njiang2024genai,\ntitle={Gen{AI} Arena: An Open Evaluation Platform for Generative Models},\nauthor={Dongfu Jiang and Max Ku and Tianle Li and Yuansheng Ni and Shizhuo Sun and Rongqi Fan and Wenhu Chen},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=0Gmi8TkUC7}\n}", "github": "", "reviewers": "iPqc;Wwgt;dX7w;U3Br", "pdf_size": 2201302, "rating": "5;5;7;7", "confidence": "4;3;3;4", "wc_summary_and_contributions": "118;68;49;62", "wc_strengths": "204;6;46;54", "wc_improvement": "88;287;261;141", "wc_limitations": "215;1;28;122", "wc_correctness": "112;18;13;1", "wc_clarity": "4;15;70;1", "wc_relation_to_prior_work": "11;1;38;1", "wc_documentation": "1;1;22;43", "wc_additional_feedback": "1;1;1;1", "wc_review": "754;398;528;426", "wc_reply_reviewers": "0;169;47;32", "wc_reply_authors": "75;508;42;75", "reply_reviewers": "0;1;1;1", "reply_authors": "3;5;2;3", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "wc_summary_and_contributions_avg": [ 74.25, 26.176086414894034 ], "wc_strengths_avg": [ 77.5, 75.26453348025217 ], "wc_improvement_avg": [ 194.25, 82.43595999319714 ], "wc_limitations_avg": [ 91.5, 84.26891479068661 ], "wc_correctness_avg": [ 36.0, 44.31139808220905 ], "wc_clarity_avg": [ 22.5, 27.91504970441572 ], "wc_relation_to_prior_work_avg": [ 12.75, 15.138939857202683 ], "wc_documentation_avg": [ 16.75, 17.41228014936585 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 526.5, 139.97410474798545 ], "wc_reply_reviewers_avg": [ 62.0, 64.0663718342158 ], "wc_reply_authors_avg": [ 175.0, 192.72908446832824 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.25, 1.0897247358851685 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15432581554922871806&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "allenai.org;uwaterloo.ca;uwaterloo.ca;;utoronto.ca;nvidia.com;", "author_num": 7, "aff_unique_index": "0;1;1;2;3", "aff_unique_norm": "Allen Institute for Artificial Intelligence;University of Waterloo;University of Toronto;NVIDIA", "aff_unique_dep": ";;;NVIDIA Corporation", "aff_unique_url": "https://allenai.org;https://uwaterloo.ca;https://www.utoronto.ca;https://www.nvidia.com", "aff_unique_abbr": "AI2;UW;U of T;NVIDIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;0", "aff_country_unique": "United States;Canada" }, { "title": "A General Protocol to Probe Large Vision Models for 3D Physical Understanding", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96940", "id": "0HRRNEAQFp", "proceeding": "", "pdf": "https://openreview.net/pdf?id=0HRRNEAQFp", "openreview": "https://openreview.net/forum?id=0HRRNEAQFp", "poster": "/media/PosterPDFs/NeurIPS%202024/96940.png?t=1731705235.0575316", "project": "", "author_site": "Guanqi Zhan, Chuanxia Zheng, Weidi Xie, Andrew Zisserman", "tldr": "", "abstract": "Our objective in this paper is to probe large vision models to determine to what extent they \u2018understand\u2019 different physical properties of the 3D scene depicted in an image. To this end, we make the following contributions: (i) We introduce a general and lightweight protocol to evaluate whether features of an off-the-shelf large vision model encode a number of physical \u2018properties\u2019 of the 3D scene, by training discriminative classifiers on the features for these properties. The probes are applied on datasets of real images with annotations for the property. (ii) We apply this protocol to properties covering scene geometry, scene material, support relations, lighting, and view-dependent measures, and large vision models including CLIP, DINOv1, DINOv2, VQGAN, Stable Diffusion. (iii) We find that features from Stable Diffusion and DINOv2 are good for discriminative learning of a number of properties, including scene geometry, support relations, shadows and depth, but less performant for occlusion and material, while outperforming DINOv1, CLIP and VQGAN for all properties. (iv) It is observed that different time steps of Stable Diffusion features, as well as different transformer layers of DINO/CLIP/VQGAN, are good at different properties, unlocking potential applications of 3D physical understanding.", "keywords": "3D physical scene understanding; Representation probing protocol; Benchmark and evaluation", "primary_area": "machine_vision", "supplementary_material": "/attachment/746f9dd1a7562e2a74cae12b0e931bc599a6e83f.zip", "author": "Guanqi Zhan;Chuanxia Zheng;Weidi Xie;Andrew Zisserman", "authorids": "~Guanqi_Zhan1;~Chuanxia_Zheng1;~Weidi_Xie3;~Andrew_Zisserman1", "gender": ";M;M;", "homepage": "https://www.robots.ox.ac.uk/~guanqi/;http://www.chuanxiaz.com/;https://weidixie.github.io;", "dblp": "254/2030;195/8988;199/1718;", "google_scholar": "f_m4WJIAAAAJ;mvpE6bIAAAAJ;https://scholar.google.co.uk/citations?user=Vtrqj4gAAAAJ;", "orcid": ";;;", "linkedin": ";chuanxia-zheng-80a3b8110/;;", "or_profile": "~Guanqi_Zhan1;~Chuanxia_Zheng1;~Weidi_Xie3;~Andrew_Zisserman1", "aff": "University of Oxford;University of Oxford;Shanghai Jiaotong University;", "aff_domain": "ox.ac.uk;ox.ac.uk;sjtu.edu.cn;", "position": "PhD student;Postdoc;Associate Professor;", "bibtex": "@inproceedings{\nzhan2024a,\ntitle={A General Protocol to Probe Large Vision Models for 3D Physical Understanding},\nauthor={Guanqi Zhan and Chuanxia Zheng and Weidi Xie and Andrew Zisserman},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=0HRRNEAQFp}\n}", "github": "", "reviewers": "Wno9;4isv;Lz9q", "pdf_size": 8475493, "rating": "5;7;7", "confidence": "4;4;4", "soundness": "3;3;3", "novelty": "4;4;3", "presentation": "4;3;3", "wc_summary": "143;95;54", "wc_strengths": "69;102;79", "wc_weaknesses": "285;83;47", "wc_questions": "122;58;7", "wc_limitations": "1;12;8", "wc_review": "620;350;195", "wc_reply_reviewers": "11;17;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;0", "reply_authors": "1;1;1", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 97.33333333333333, 36.37153954521157 ], "wc_strengths_avg": [ 83.33333333333333, 13.816254517375137 ], "wc_weaknesses_avg": [ 138.33333333333334, 104.74519347232862 ], "wc_questions_avg": [ 62.333333333333336, 47.04843839656695 ], "wc_limitations_avg": [ 7.0, 4.546060565661952 ], "wc_review_avg": [ 388.3333333333333, 175.61004780162463 ], "wc_reply_reviewers_avg": [ 9.333333333333334, 7.039570693980959 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17458599982004108737&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "ox.ac.uk;ox.ac.uk;sjtu.edu.cn;", "author_num": 4, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Oxford;Shanghai Jiao Tong University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ox.ac.uk;https://www.sjtu.edu.cn", "aff_unique_abbr": "Oxford;SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United Kingdom;China" }, { "title": "When to Sense and Control? A Time-adaptive Approach for Continuous-Time RL", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96939", "id": "0JSKjdePGq", "proceeding": "", "pdf": "https://openreview.net/pdf?id=0JSKjdePGq", "openreview": "https://openreview.net/forum?id=0JSKjdePGq", "poster": "/media/PosterPDFs/NeurIPS%202024/96939.png?t=1730197260.75527", "project": "", "author_site": "Lenart Treven, Bhavya, Yarden As, Florian Dorfler, Andreas Krause", "tldr": "", "abstract": "Reinforcement learning (RL) excels in optimizing policies for discrete-time Markov decision processes (MDP). However, various systems are inherently continuous in time, making discrete-time MDPs an inexact modeling choice. \nIn many applications, such as greenhouse control or medical treatments, each interaction (measurement or switching of action) involves manual intervention and thus is inherently costly. Therefore, \nwe generally prefer a time-adaptive approach with fewer interactions with the system.\nIn this work, we formalize an RL framework, \n**T**ime-**a**daptive **Co**ntrol \\& **S**ensing (**TaCoS**), that tackles this challenge by optimizing over policies that besides control predict the duration of its application. Our formulation results in an extended MDP that any standard RL algorithm can solve.\nWe demonstrate that state-of-the-art RL algorithms trained on TaCoS drastically reduce the interaction amount over their discrete-time counterpart while retaining the same or improved performance, and exhibiting robustness over discretization frequency.\nFinally, we propose OTaCoS, an efficient model-based algorithm for our setting. We show that OTaCoS enjoys sublinear regret for systems with sufficiently smooth dynamics and empirically results in further sample-efficiency gains.", "keywords": "reinforcement learning;continuous-time", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/3d30eb4d166c74afa88948505ef8dccdd8e09d82.zip", "author": "Lenart Treven;Bhavya Sukhija;Yarden As;Florian Dorfler;Andreas Krause", "authorids": "~Lenart_Treven1;~Bhavya_Sukhija1;~Yarden_As1;~Florian_Dorfler1;~Andreas_Krause1", "gender": "M;M;M;M;M", "homepage": ";;https://github.com/yardenas;http://people.ee.ethz.ch/~floriand/;https://las.inf.ethz.ch/krausea", "dblp": "267/9666;312/4742;312/4578;;87/1831-1.html", "google_scholar": "CDnzTWkAAAAJ;;;https://scholar.google.com/citations?view_op=list_works;https://scholar.google.ch/citations?user=eDHv58AAAAAJ", "orcid": ";0000-0001-6238-9734;;0000-0002-9649-5305;0000-0001-7260-9673", "linkedin": "lenart-treven/;;yardenas/;;krausea/", "or_profile": "~Lenart_Treven1;~Bhavya_Sukhija1;~Yarden_As1;~Florian_Dorfler1;~Andreas_Krause1", "aff": "Swiss Federal Institute of Technology;ETHZ - ETH Zurich;Department of Computer Science, ETHZ - ETH Zurich;;ETH Zurich", "aff_domain": "ethz.ch;ethz.ch;inf.ethz.ch;;ethz.ch", "position": "PhD student;PhD student;PhD student;;Full Professor", "bibtex": "@inproceedings{\ntreven2024when,\ntitle={When to Sense and Control? A Time-adaptive Approach for Continuous-Time {RL}},\nauthor={Lenart Treven and Bhavya Sukhija and Yarden As and Florian Dorfler and Andreas Krause},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=0JSKjdePGq}\n}", "github": "", "reviewers": "ZHjB;BvQD;XqGZ;L1VF", "pdf_size": 1419599, "rating": "6;6;7;8", "confidence": "3;4;3;3", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "2;3;3;3", "wc_summary": "71;52;103;78", "wc_strengths": "55;75;21;79", "wc_weaknesses": "127;61;6;72", "wc_questions": "172;38;386;46", "wc_limitations": "5;30;5;7", "wc_review": "430;256;521;282", "wc_reply_reviewers": "19;103;372;13", "wc_reply_authors": "100;171;760;13", "reply_reviewers": "1;1;3;1", "reply_authors": "3;3;4;2", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 76.0, 18.261982367749674 ], "wc_strengths_avg": [ 57.5, 22.951034835057 ], "wc_weaknesses_avg": [ 66.5, 42.9563732174866 ], "wc_questions_avg": [ 160.5, 140.62272220377474 ], "wc_limitations_avg": [ 11.75, 10.568230693924125 ], "wc_review_avg": [ 372.25, 108.53657217730805 ], "wc_reply_reviewers_avg": [ 126.75, 145.99721743923752 ], "wc_reply_authors_avg": [ 261.0, 293.48168597035146 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 3.0, 0.7071067811865476 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14750780325088040489&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "ethz.ch;ethz.ch;inf.ethz.ch;;ethz.ch", "author_num": 5, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Swiss Federal Institute of Technology;ETH Zurich", "aff_unique_dep": ";", "aff_unique_url": "https://www.ethz.ch;https://www.ethz.ch", "aff_unique_abbr": "ETH Zurich;ETHZ", "aff_campus_unique_index": "1", "aff_campus_unique": ";Zurich", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Switzerland" }, { "title": "CoFie: Learning Compact Neural Surface Representations with Coordinate Fields", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96938", "id": "0KseSacluJ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=0KseSacluJ", "openreview": "https://openreview.net/forum?id=0KseSacluJ", "poster": "", "project": "", "author_site": "Hanwen Jiang, Haitao Yang, Georgios Pavlakos, Qixing Huang", "tldr": "", "abstract": "This paper introduces CoFie, a novel local geometry-aware neural surface representation. CoFie is motivated by the theoretical analysis of local SDFs with quadratic approximation. We find that local shapes are highly compressive in an aligned coordinate frame defined by the normal and tangent directions of local shapes. Accordingly, we introduce Coordinate Field, which is a composition of coordinate frames of all local shapes. The Coordinate Field is optimizable and is used to transform the local shapes from the world coordinate frame to the aligned shape coordinate frame. It largely reduces the complexity of local shapes and benefits the learning of MLP-based implicit representations. Moreover, we introduce quadratic layers into the MLP to enhance expressiveness concerning local shape geometry. CoFie is a generalizable surface representation. It is trained on a curated set of 3D shapes and works on novel shape instances during testing. When using the same amount of parameters with prior works, CoFie reduces the shape error by 48% and 56% on novel instances of both training and unseen shape categories. Moreover, CoFie demonstrates comparable performance to prior works when using even 70% fewer parameters. Code and model can be found here: https://hwjiang1510.github.io/CoFie/", "keywords": "Neural Surface Representation;Shape Auto-Decoding;Coordinate Field.", "primary_area": "machine_vision", "supplementary_material": "", "author": "Hanwen Jiang;Haitao Yang;Georgios Pavlakos;Qixing Huang", "authorids": "~Hanwen_Jiang1;~Haitao_Yang1;~Georgios_Pavlakos1;~Qixing_Huang1", "gender": "M;M;M;M", "homepage": "https://hwjiang1510.github.io/;https://yanghtr.github.io/;https://geopavlakos.github.io/;https://www.cs.utexas.edu/~huangqx/", "dblp": "237/9854;61/5907-5;145/3361;82/241", "google_scholar": "https://scholar.google.com.sg/citations?user=HJHSuxUAAAAJ;;iH2BZ8UAAAAJ;https://scholar.google.com.tw/citations?user=pamL_rIAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Hanwen_Jiang1;~Haitao_Yang1;~Georgios_Pavlakos1;~Qixing_Huang1", "aff": "University of Texas at Austin;The University of Texas at Austin;University of Texas at Austin;University of Texas at Austin", "aff_domain": "utexas.edu;utexas.edu;cs.utexas.edu;utexas.edu", "position": "PhD student;PhD student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\njiang2024cofie,\ntitle={CoFie: Learning Compact Neural Surface Representations with Coordinate Fields},\nauthor={Hanwen Jiang and Haitao Yang and Georgios Pavlakos and Qixing Huang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=0KseSacluJ}\n}", "github": "", "reviewers": "58nR;V5fn;4hsy;LpYZ;Kq6M", "pdf_size": 2762639, "rating": "3;3;6;6;7", "confidence": "5;4;3;4;4", "soundness": "2;2;3;2;4", "novelty": "2;2;3;3;4", "presentation": "2;2;3;2;3", "wc_summary": "74;158;94;101;204", "wc_strengths": "45;65;82;30;156", "wc_weaknesses": "305;831;61;85;305", "wc_questions": "46;198;37;187;158", "wc_limitations": "5;91;1;7;46", "wc_review": "475;1343;275;410;869", "wc_reply_reviewers": "95;3185;20;45;60", "wc_reply_authors": "293;3835;18;26;18", "reply_reviewers": "1;7;1;1;1", "reply_authors": "2;8;2;2;2", "rating_avg": [ 5.0, 1.6733200530681511 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 2.6, 0.8 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 126.2, 47.87650780915417 ], "wc_strengths_avg": [ 75.6, 43.8935074925666 ], "wc_weaknesses_avg": [ 317.4, 277.0715431075519 ], "wc_questions_avg": [ 125.2, 69.63734630211005 ], "wc_limitations_avg": [ 30.0, 34.56009259246856 ], "wc_review_avg": [ 674.4, 388.35118127797676 ], "wc_reply_reviewers_avg": [ 681.0, 1252.2356008355616 ], "wc_reply_authors_avg": [ 838.0, 1502.2102382822452 ], "reply_reviewers_avg": [ 2.2, 2.4000000000000004 ], "reply_authors_avg": [ 3.2, 2.4000000000000004 ], "replies_avg": [ 35, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5669467095138409, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7010458603981277988&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "utexas.edu;utexas.edu;cs.utexas.edu;utexas.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Latent Plan Transformer for Trajectory Abstraction: Planning as Latent Space Inference", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96937", "id": "0KvYLaTBTE", "proceeding": "", "pdf": "https://openreview.net/pdf?id=0KvYLaTBTE", "openreview": "https://openreview.net/forum?id=0KvYLaTBTE", "poster": "", "project": "", "author_site": "Deqian Kong, Dehong Xu, Minglu Zhao, Bo Pang, Jianwen Xie, Andrew Lizarraga, Yuhao Huang, Sirui Xie, Ying Nian Wu", "tldr": "", "abstract": "In tasks aiming for long-term returns, planning becomes essential. We study generative modeling for planning with datasets repurposed from offline reinforcement learning. Specifically, we identify temporal consistency in the absence of step-wise rewards as one key technical challenge. We introduce the Latent Plan Transformer (LPT), a novel model that leverages a latent variable to connect a Transformer- based trajectory generator and the final return. LPT can be learned with maximum likelihood estimation on trajectory-return pairs. In learning, posterior sampling of the latent variable naturally integrates sub-trajectories to form a consistent abstrac- tion despite the finite context. At test time, the latent variable is inferred from an expected return before policy execution, realizing the idea of planning as inference. Our experiments demonstrate that LPT can discover improved decisions from sub- optimal trajectories, achieving competitive performance across several benchmarks, including Gym-Mujoco, Franka Kitchen, Maze2D, and Connect Four. It exhibits capabilities in nuanced credit assignments, trajectory stitching, and adaptation to environmental contingencies. These results validate that latent variable inference can be a strong alternative to step-wise reward prompting.", "keywords": "Generative models;Reinforcement learning;Decision transformer", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Deqian Kong;Dehong Xu;Minglu Zhao;Bo Pang;Jianwen Xie;Andrew Lizarraga;Yuhao Huang;Sirui Xie;Ying Nian Wu", "authorids": "~Deqian_Kong1;~Dehong_Xu1;~Minglu_Zhao1;~Bo_Pang4;~Jianwen_Xie1;~Andrew_Lizarraga1;~Yuhao_Huang3;~Sirui_Xie1;~Ying_Nian_Wu1", "gender": "M;M;;;;;M;M;", "homepage": "https://sites.google.com/view/deqiankong/home;https://dehongxu.github.io/;https://mingluzhao.github.io/;;;https://drewrl3v.github.io/;;https://www.siruixie.com;", "dblp": "199/7131;126/1547;;;;291/3769.html;219/6363;232/3072;", "google_scholar": "https://scholar.google.com/citations?hl=en;;nrM4PzYAAAAJ;;;KUDS8uwAAAAJ;cbKekagAAAAJ;9GJn5FIAAAAJ;", "orcid": ";;;;;;;;", "linkedin": ";dehong-xu-61a947248/;;;;andrew-lizarraga/;;;", "or_profile": "~Deqian_Kong1;~Dehong_Xu1;~Minglu_Zhao1;~Bo_Pang4;~Jianwen_Xie1;~Andrew_Lizarraga1;~Yuhao_Huang3;~Sirui_Xie1;~Ying_Nian_Wu1", "aff": "University of California, Los Angeles;Amazon;University of California, Los Angeles;;;University of California, Los Angeles;Xi'an Jiaotong University;University of California, Los Angeles;", "aff_domain": "ucla.edu;amazon.com;ucla.edu;;;ucla.edu;xjtu.edu.cn;ucla.edu;", "position": "PhD student;Intern;PhD student;;;PhD student;PhD student;PhD student;", "bibtex": "@inproceedings{\nkong2024latent,\ntitle={Latent Plan Transformer for Trajectory Abstraction: Planning as Latent Space Inference},\nauthor={Deqian Kong and Dehong Xu and Minglu Zhao and Bo Pang and Jianwen Xie and Andrew Lizarraga and Yuhao Huang and Sirui Xie and Ying Nian Wu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=0KvYLaTBTE}\n}", "github": "", "reviewers": "W2Dk;NC2f;uuQw", "pdf_size": 5788573, "rating": "6;6;7", "confidence": "3;3;3", "soundness": "2;4;3", "novelty": "2;3;3", "presentation": "2;3;3", "wc_summary": "59;149;97", "wc_strengths": "48;94;68", "wc_weaknesses": "168;55;102", "wc_questions": "166;108;46", "wc_limitations": "19;10;6", "wc_review": "460;416;319", "wc_reply_reviewers": "18;47;143", "wc_reply_authors": "21;21;67", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 101.66666666666667, 36.890227552685126 ], "wc_strengths_avg": [ 70.0, 18.83259585576738 ], "wc_weaknesses_avg": [ 108.33333333333333, 46.34891824220089 ], "wc_questions_avg": [ 106.66666666666667, 48.998866200034556 ], "wc_limitations_avg": [ 11.666666666666666, 5.436502143433364 ], "wc_review_avg": [ 398.3333333333333, 58.90293333574785 ], "wc_reply_reviewers_avg": [ 69.33333333333333, 53.418681709263055 ], "wc_reply_authors_avg": [ 36.333333333333336, 21.684607956387456 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11690761981200910876&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "ucla.edu;amazon.com;ucla.edu;;;ucla.edu;xjtu.edu.cn;ucla.edu;", "author_num": 9, "aff_unique_index": "0;1;0;0;2;0", "aff_unique_norm": "University of California, Los Angeles;Amazon;Xi'an Jiao Tong University", "aff_unique_dep": ";Amazon.com, Inc.;", "aff_unique_url": "https://www.ucla.edu;https://www.amazon.com;https://www.xjtu.edu.cn", "aff_unique_abbr": "UCLA;Amazon;XJTU", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;0;0;0;1;0", "aff_country_unique": "United States;China" }, { "title": "KVQuant: Towards 10 Million Context Length LLM Inference with KV Cache Quantization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96936", "id": "0LXotew9Du", "proceeding": "", "pdf": "https://openreview.net/pdf?id=0LXotew9Du", "openreview": "https://openreview.net/forum?id=0LXotew9Du", "poster": "", "project": "", "author_site": "Coleman Hooper, Sehoon Kim, Hiva Mohammadzadeh, Michael Mahoney, Sophia Shao, Kurt Keutzer, Amir Gholami", "tldr": "", "abstract": "LLMs are seeing growing use for applications which require large context windows, and with these large context windows KV cache activations surface as the dominant contributor to memory consumption during inference. Quantization is a promising approach for compressing KV cache activations; however, existing solutions fail to represent activations accurately in sub-4-bit precision. Our work, KVQuant, facilitates low precision KV cache quantization by incorporating several novel methods: (i) Per-Channel Key Quantization, where we adjust the dimension along which we quantize the Key activations to better match the distribution; (ii) Pre-RoPE Key Quantization, where we quantize Key activations before the rotary positional embedding to mitigate its impact on quantization; (iii) Non-Uniform KV Cache Quantization, where we derive per-layer sensitivity-weighted non-uniform datatypes that better represent the distributions; and (iv) Per-Vector Dense-and-Sparse Quantization, where we isolate outliers separately for each vector to minimize skews in quantization ranges. By applying our method to the LLaMA, Llama-2, Llama-3, and Mistral models, we achieve < 0.1 perplexity degradation with 3-bit quantization on both Wikitext-2 and C4, outperforming existing approaches. Our method enables serving LLaMA-7B with a context length of up to 1 million on a single A100-80GB GPU and up to 10 million on an 8-GPU system. We develop custom CUDA kernels for KVQuant, showing that we can achieve up to ~1.7x speedups, compared to baseline fp16 matrix-vector multiplications, for the LLaMA-7B model.", "keywords": "Quantization;KV Cache;LLM Inference;Compression;Long Context Length", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Coleman Richard Charles Hooper;Sehoon Kim;Hiva Mohammadzadeh;Michael W. Mahoney;Sophia Shao;Kurt Keutzer;Amir Gholami", "authorids": "~Coleman_Richard_Charles_Hooper1;~Sehoon_Kim1;~Hiva_Mohammadzadeh1;~Michael_W._Mahoney1;~Sophia_Shao1;~Kurt_Keutzer1;~Amir_Gholami2", "gender": "M;M;F;;;M;", "homepage": "https://www.linkedin.com/in/coleman-hooper-165061193/;https://sehoonkim.org;;;https://people.eecs.berkeley.edu/~ysshao/index.html;https://people.eecs.berkeley.edu/~keutzer/;", "dblp": ";;;;133/9941.html;k/KurtKeutzer.html;", "google_scholar": "si-368wAAAAJ;zQABr7QAAAAJ;;;;ID9QePIAAAAJ;", "orcid": ";;;;;0000-0003-3868-8501;", "linkedin": ";sehoon-kim-13a1b51b1/;hivamohammadzadeh/;;;kurtkeutzer/;", "or_profile": "~Coleman_Richard_Charles_Hooper1;~Sehoon_Kim1;~Hiva_Mohammadzadeh1;~Michael_W._Mahoney1;~Sophia_Shao1;~Kurt_Keutzer1;~Amir_Gholami2", "aff": "University of California, Berkeley;University of California, Berkeley;;;University of California, Berkeley;University of California, Berkeley;", "aff_domain": "berkeley.edu;berkeley.edu;;;berkeley.edu;berkeley.edu;", "position": "PhD student;PhD student;;;Assistant Professor;Full Professor;", "bibtex": "@inproceedings{\nhooper2024kvquant,\ntitle={{KVQ}uant: Towards 10 Million Context Length {LLM} Inference with {KV} Cache Quantization},\nauthor={Coleman Richard Charles Hooper and Sehoon Kim and Hiva Mohammadzadeh and Michael W. Mahoney and Sophia Shao and Kurt Keutzer and Amir Gholami},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=0LXotew9Du}\n}", "github": "", "reviewers": "MBTK;gDca;TbGM;j7z6", "pdf_size": 2084523, "rating": "5;6;7;7", "confidence": "3;4;4;2", "soundness": "3;3;4;4", "novelty": "3;2;3;3", "presentation": "3;3;4;3", "wc_summary": "77;81;130;63", "wc_strengths": "92;37;111;76", "wc_weaknesses": "15;145;110;64", "wc_questions": "37;47;112;4", "wc_limitations": "7;1;4;8", "wc_review": "228;311;467;215", "wc_reply_reviewers": "0;13;74;23", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 87.75, 25.292044203662147 ], "wc_strengths_avg": [ 79.0, 27.230497608380205 ], "wc_weaknesses_avg": [ 83.5, 48.87995499179597 ], "wc_questions_avg": [ 50.0, 39.17269457160179 ], "wc_limitations_avg": [ 5.0, 2.7386127875258306 ], "wc_review_avg": [ 305.25, 100.38519562166525 ], "wc_reply_reviewers_avg": [ 27.5, 28.0579756931964 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.0909090909090909, "gs_citation": 155, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8257744879710009477&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "berkeley.edu;berkeley.edu;;;berkeley.edu;berkeley.edu;", "author_num": 7, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Reliable Learning of Halfspaces under Gaussian Marginals", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96935", "id": "0Lb8vZT1DB", "proceeding": "", "pdf": "https://openreview.net/pdf?id=0Lb8vZT1DB", "openreview": "https://openreview.net/forum?id=0Lb8vZT1DB", "poster": "", "project": "", "author_site": "Ilias Diakonikolas, Lisheng Ren, Nikos Zarifis", "tldr": "", "abstract": "We study the problem of PAC learning halfspaces in the \nreliable agnostic model of Kalai et al. (2012).\nThe reliable PAC model \ncaptures learning scenarios where one type of error is \ncostlier than the others. Our main positive result is a \nnew algorithm for reliable learning \nof Gaussian halfspaces on \n$\\mathbb{R}^d$ with sample and computational complexity \n$d^{O(\\log (\\min\\{1/\\alpha, 1/\\epsilon\\}))}\\min (2^{\\log(1/\\epsilon)^{O(\\log (1/\\alpha))}},2^{\\mathrm{poly}(1/\\epsilon)})$, \nwhere $\\epsilon$ is the excess error and $\\alpha$ \nis the bias of the optimal halfspace. We complement our upper bound with \na Statistical Query lower bound \nsuggesting that the $d^{\\Omega(\\log (1/\\alpha))}$ dependence is best possible. \nConceptually, our results imply a strong computational separation \nbetween reliable agnostic learning and standard agnostic \nlearning of halfspaces in the Gaussian setting.", "keywords": "reliable learning;agnostic learning;halfspace;proper leaning;statistical query", "primary_area": "learning_theory", "supplementary_material": "", "author": "Ilias Diakonikolas;Lisheng Ren;Nikos Zarifis", "authorids": "~Ilias_Diakonikolas1;~Lisheng_Ren1;~Nikos_Zarifis1", "gender": "M;;M", "homepage": "http://www.iliasdiakonikolas.org/;https://www.wisc.edu/directories/person/?q=Lisheng%20Ren&email=lren29%40wisc.edu&savedQuery=Lisheng%20Ren&returnPath=%2Fdirectories%2F;https://nikoszarifis.github.io/", "dblp": "d/IliasDiakonikolas;93/495;241/9782", "google_scholar": "Vb3FLmkAAAAJ;;P1ha1IkAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Ilias_Diakonikolas1;~Lisheng_Ren1;~NIKOLAOS_ZARIFIS1", "aff": "University of Wisconsin - Madison;University of Wisconsin - Madison;University of Wisconsin, Madison", "aff_domain": "wisc.edu;wisc.edu;wisc.edu", "position": "Full Professor;PhD student;PhD student", "bibtex": "@inproceedings{\ndiakonikolas2024reliable,\ntitle={Reliable Learning of Halfspaces under Gaussian Marginals},\nauthor={Ilias Diakonikolas and Lisheng Ren and Nikos Zarifis},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=0Lb8vZT1DB}\n}", "github": "", "reviewers": "ezCR;ae3s;wFKn;ZrmL", "pdf_size": 466253, "rating": "7;7;7;8", "confidence": "2;2;3;2", "soundness": "3;3;4;4", "novelty": "3;3;3;3", "presentation": "3;3;4;3", "wc_summary": "61;97;192;92", "wc_strengths": "145;44;95;61", "wc_weaknesses": "194;49;26;98", "wc_questions": "87;1;57;42", "wc_limitations": "12;1;4;9", "wc_review": "499;192;374;302", "wc_reply_reviewers": "25;13;18;12", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 2.25, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 110.5, 49.03315205042401 ], "wc_strengths_avg": [ 86.25, 38.57055223872222 ], "wc_weaknesses_avg": [ 91.75, 64.50726703248247 ], "wc_questions_avg": [ 46.75, 30.986892390170397 ], "wc_limitations_avg": [ 6.5, 4.272001872658765 ], "wc_review_avg": [ 341.75, 111.5490363024262 ], "wc_reply_reviewers_avg": [ 17.0, 5.1478150704935 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4268470372987992392&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "wisc.edu;wisc.edu;wisc.edu", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Wisconsin-Madison;University of Wisconsin", "aff_unique_dep": ";", "aff_unique_url": "https://www.wisc.edu;https://www.wisc.edu", "aff_unique_abbr": "UW-Madison;UW", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Madison", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Local Superior Soups: A Catalyst for Model Merging in Cross-Silo Federated Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96934", "id": "0LfgE6kvKZ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=0LfgE6kvKZ", "openreview": "https://openreview.net/forum?id=0LfgE6kvKZ", "poster": "/media/PosterPDFs/NeurIPS%202024/96934.png?t=1731518093.5294044", "project": "", "author_site": "Minghui Chen, Meirui Jiang, Xin Zhang, DOU QI, Zehua Wang, Xiaoxiao Li", "tldr": "", "abstract": "Federated learning (FL) is a learning paradigm that enables collaborative training of models using decentralized data. \nRecently, the utilization of pre-trained weight initialization in FL has been demonstrated to effectively improve model performance. \nHowever, the evolving complexity of current pre-trained models, characterized by a substantial increase in parameters, markedly intensifies the challenges associated with communication rounds required for their adaptation to FL. \nTo address these communication cost issues and increase the performance of pre-trained model adaptation in FL, we propose an innovative model interpolation-based local training technique called ``Local Superior Soups.''\nOur method enhances local training across different clients, encouraging the exploration of a connected low-loss basin within a few communication rounds through regularized model interpolation. \nThis approach acts as a catalyst for the seamless adaptation of pre-trained models in in FL.\nWe demonstrated its effectiveness and efficiency across diverse widely-used FL datasets.", "keywords": "Federated Learning; Model Merging", "primary_area": "other", "supplementary_material": "", "author": "Minghui Chen;Meirui Jiang;Xin Zhang;Qi Dou;Zehua Wang;Xiaoxiao Li", "authorids": "~Minghui_Chen1;~Meirui_Jiang2;~Xin_Zhang16;~Qi_Dou2;~Zehua_Wang1;~Xiaoxiao_Li1", "gender": "M;M;F;M;Unspecified;M", "homepage": "https://chenminghui.com/;https://xinzhang-nac.github.io/;https://www.cse.cuhk.edu.hk/~qdou;https://people.ece.ubc.ca/zwang/;https://xxlya.github.io/;https://meiruijiang.github.io/MeiruiJiang/", "dblp": ";76/1584-54.html;165/7846;90/10799;71/8042;285/5480", "google_scholar": "aDKyh4cAAAAJ;9u5Pa0gAAAAJ;https://scholar.google.com.hk/citations?user=iHh7IJQAAAAJ;https://scholar.google.ca/citations?user=pquTtPYAAAAJ;sdENOQ4AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0002-0784-2038;0000-0002-3416-9950;;;0000-0003-4228-8420", "linkedin": "minghui-chen-75a046210/;;;wangzehua/;;", "or_profile": "~Minghui_Chen1;~Xin_Zhang16;~Qi_Dou2;~Zehua_Wang1;~Xiaoxiao_Li1;~Meirui_JIANG1", "aff": "University of British Columbia;Meta Facebook;The Chinese University of Hong Kong;University of British Columbia;University of British Columbia;Department of Computer Science and Engineering, The Chinese University of Hong Kong", "aff_domain": "ubc.ca;fb.com;cuhk.edu.hk;ubc.ca;ece.ubc.ca;cse.cuhk.edu.hk", "position": "PhD student;Research Scientist;Assistant Professor;Researcher;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nchen2024local,\ntitle={Local Superior Soups: A Catalyst for Model Merging in Cross-Silo Federated Learning},\nauthor={Minghui Chen and Meirui Jiang and Xin Zhang and Qi Dou and Zehua Wang and Xiaoxiao Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=0LfgE6kvKZ}\n}", "github": "", "reviewers": "9KZz;3kGS;2F5f", "pdf_size": 1191558, "rating": "5;5;5", "confidence": "3;4;4", "soundness": "3;3;3", "novelty": "3;2;2", "presentation": "3;3;2", "wc_summary": "77;63;51", "wc_strengths": "29;44;35", "wc_weaknesses": "39;149;56", "wc_questions": "1;105;151", "wc_limitations": "3;7;1", "wc_review": "149;368;294", "wc_reply_reviewers": "0;24;0", "wc_reply_authors": "117;40;117", "reply_reviewers": "0;1;0", "reply_authors": "2;2;2", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 63.666666666666664, 10.624918300339484 ], "wc_strengths_avg": [ 36.0, 6.164414002968976 ], "wc_weaknesses_avg": [ 81.33333333333333, 48.3482735529983 ], "wc_questions_avg": [ 85.66666666666667, 62.74463235121303 ], "wc_limitations_avg": [ 3.6666666666666665, 2.494438257849294 ], "wc_review_avg": [ 270.3333333333333, 90.95908726210678 ], "wc_reply_reviewers_avg": [ 8.0, 11.313708498984761 ], "wc_reply_authors_avg": [ 91.33333333333333, 36.298148100909444 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18345161495932591353&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "ubc.ca;fb.com;cuhk.edu.hk;ubc.ca;ece.ubc.ca;cse.cuhk.edu.hk", "author_num": 6, "aff_unique_index": "0;1;2;0;0;2", "aff_unique_norm": "University of British Columbia;Meta;Chinese University of Hong Kong", "aff_unique_dep": ";Meta Platforms, Inc.;", "aff_unique_url": "https://www.ubc.ca;https://meta.com;https://www.cuhk.edu.hk", "aff_unique_abbr": "UBC;Meta;CUHK", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;2;0;0;2", "aff_country_unique": "Canada;United States;China" }, { "title": "Imprecise Label Learning: A Unified Framework for Learning with Various Imprecise Label Configurations", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96933", "id": "0Lr9HQijA1", "proceeding": "", "pdf": "https://openreview.net/pdf?id=0Lr9HQijA1", "openreview": "https://openreview.net/forum?id=0Lr9HQijA1", "poster": "/media/PosterPDFs/NeurIPS%202024/96933.png?t=1731704880.9846606", "project": "", "author_site": "Hao Chen, Ankit Shah, Jindong Wang, Ran Tao, Yidong Wang, Xiang Li, Xing Xie, Masashi Sugiyama, Rita Singh, Bhiksha Raj", "tldr": "", "abstract": "Learning with reduced labeling standards, such as noisy label, partial label, and supplementary unlabeled data, which we generically refer to as imprecise label, is a commonplace challenge in machine learning tasks. Previous methods tend to propose specific designs for every emerging imprecise label configuration, which is usually unsustainable when multiple configurations of imprecision coexist. \nIn this paper, we introduce imprecise label learning (ILL), a framework for the unification of learning with various imprecise label configurations. ILL leverages expectation-maximization (EM) for modeling the imprecise label information, treating the precise labels as latent variables. Instead of approximating the correct labels for training, it considers the entire distribution of all possible labeling entailed by the imprecise information. We demonstrate that ILL can seamlessly adapt to partial label learning, semi-supervised learning, noisy label learning, and, more importantly, a mixture of these settings, with closed-form learning objectives derived from the unified EM modeling. Notably, ILL surpasses the existing specified techniques for handling imprecise labels, marking the first practical and unified framework with robust and effective performance across various challenging settings. We hope our work will inspire further research on this topic, unleashing the full potential of ILL in wider scenarios where precise labels are expensive and complicated to obtain.", "keywords": "Imprecise Label Learning", "primary_area": "learning_theory", "supplementary_material": "/attachment/b8eecca70a46067977e88adae829783022987181.zip", "author": "Hao Chen;Ankit Shah;Jindong Wang;Ran Tao;Yidong Wang;Xiang Li;Xing Xie;Masashi Sugiyama;Rita Singh;Bhiksha Raj", "authorids": "~Hao_Chen15;~Ankit_Shah1;~Jindong_Wang4;~Ran_Tao2;~Yidong_Wang1;~Xiang_Li35;~Xing_Xie3;~Masashi_Sugiyama1;~Rita_Singh1;~Bhiksha_Raj1", "gender": "M;M;M;F;M;;M;M;F;M", "homepage": "https://hhhhhhao.github.io/;https://ankitshah009.github.io/;https://jd92.wang/;;https://qianlanwyd.github.io/;;http://research.microsoft.com/en-us/people/xingx/;http://www.ms.k.u-tokyo.ac.jp/sugi/;http://mlsp.cs.cmu.edu/people/rsingh/index.html;https://www.cs.cmu.edu/directory/bhikshar/", "dblp": ";04/1935-1.html;19/2969-1;99/955;59/6759.html;;08/6809-1;35/1228;;60/3996", "google_scholar": "tktqkhwAAAAJ;https://scholar.google.co.in/citations?user=TqG1H4cAAAAJ;hBZ_tKsAAAAJ;7xW2y6EAAAAJ;;;5EQfAFIAAAAJ;https://scholar.google.co.jp/citations?user=GkYIrlIAAAAJ;;", "orcid": ";0000-0002-8838-5421;0000-0002-4833-0880;;;;0000-0002-8608-8482;0000-0001-6658-6743;;", "linkedin": "haochen97/;ankpsh01/;jindong-wang/;;;;xingx/;;;", "or_profile": "~Hao_Chen15;~Ankit_Shah1;~Jindong_Wang4;~Ran_Tao2;~Yidong_Wang1;~Xiang_Li35;~Xing_Xie3;~Masashi_Sugiyama1;~Rita_Singh1;~Bhiksha_Raj1", "aff": "Carnegie Mellon University;Accenture;Microsoft Research;;Peking University;;Microsoft Research Asia;The University of Tokyo;School of Computer Science, Carnegie Mellon University;Mohamed bin Zayed University of Artificial Intelligence", "aff_domain": "andrew.cmu.edu;accenture.com;microsoft.com;;pku.edu.cn;;microsoft.com;u-tokyo.ac.jp;cs.cmu.edu;mbzuai.ac.ae", "position": "PhD student;Principal Researcher;Researcher;;PhD student;;Senior Principal Researcher;Full Professor;Research Professor;Full Professor", "bibtex": "@inproceedings{\nchen2024imprecise,\ntitle={Imprecise Label Learning: A Unified Framework for Learning with Various Imprecise Label Configurations},\nauthor={Hao Chen and Ankit Shah and Jindong Wang and Ran Tao and Yidong Wang and Xiang Li and Xing Xie and Masashi Sugiyama and Rita Singh and Bhiksha Raj},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=0Lr9HQijA1}\n}", "github": "", "reviewers": "Z7AB;GFuM;ASf6;JXvz", "pdf_size": 706594, "rating": "3;6;7;7", "confidence": "2;4;3;3", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "1;3;3;3", "wc_summary": "57;89;49;55", "wc_strengths": "25;90;52;78", "wc_weaknesses": "760;165;27;93", "wc_questions": "2;66;1;21", "wc_limitations": "52;1;1;27", "wc_review": "896;411;130;274", "wc_reply_reviewers": "771;15;10;14", "wc_reply_authors": "940;0;10;5", "reply_reviewers": "1;1;1;1", "reply_authors": "4;1;2;2", "rating_avg": [ 5.75, 1.6393596310755 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 62.5, 15.580436450882884 ], "wc_strengths_avg": [ 61.25, 25.033727249452888 ], "wc_weaknesses_avg": [ 261.25, 292.0602463533851 ], "wc_questions_avg": [ 22.5, 26.348624252510795 ], "wc_limitations_avg": [ 20.25, 21.182244923520265 ], "wc_review_avg": [ 427.75, 288.02463002319786 ], "wc_reply_reviewers_avg": [ 202.5, 328.2289597217162 ], "wc_reply_authors_avg": [ 238.75, 404.8823131479072 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.6469966392206306, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11537090440769158708&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "andrew.cmu.edu;accenture.com;microsoft.com;;pku.edu.cn;;microsoft.com;u-tokyo.ac.jp;cs.cmu.edu;mbzuai.ac.ae", "author_num": 10, "aff_unique_index": "0;1;2;3;2;4;0;5", "aff_unique_norm": "Carnegie Mellon University;Accenture;Microsoft;Peking University;University of Tokyo;Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": ";;Microsoft Research;;;", "aff_unique_url": "https://www.cmu.edu;https://www.accenture.com;https://www.microsoft.com/en-us/research;http://www.pku.edu.cn;https://www.u-tokyo.ac.jp;https://mbzuai.ac.ae", "aff_unique_abbr": "CMU;Accenture;MSR;Peking U;UTokyo;MBZUAI", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Asia;Pittsburgh", "aff_country_unique_index": "0;0;0;1;1;2;0;3", "aff_country_unique": "United States;China;Japan;United Arab Emirates" }, { "title": "GFT: Graph Foundation Model with Transferable Tree Vocabulary", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96932", "id": "0MXzbAv8xy", "proceeding": "", "pdf": "https://openreview.net/pdf?id=0MXzbAv8xy", "openreview": "https://openreview.net/forum?id=0MXzbAv8xy", "poster": "", "project": "", "author_site": "Zehong Wang, Zheyuan Zhang, Nitesh Chawla, Chuxu Zhang, Yanfang Ye", "tldr": "", "abstract": "Inspired by the success of foundation models in applications such as ChatGPT, as graph data has been ubiquitous, one can envision the far-reaching impacts that can be brought by Graph Foundation Models (GFMs) with broader applications in the areas such as scientific research, social network analysis, drug discovery, and e-commerce. Despite the significant progress of pre-trained graph neural networks, there haven\u2019t been GFMs that can achieve desired performance on various graph-learning-related tasks. Building GFMs may rely on a vocabulary that encodes transferable patterns shared among different tasks and domains. Unlike image and text, defining such transferable patterns for graphs remains an open question. In this paper, we aim to bridge this gap by rethinking the transferable patterns on graphs as computation trees -- i.e., tree structures derived from the message-passing process. Based on this insight, we propose a cross-task, cross-domain graph foundation model named GFT, short for Graph Foundation model with transferable Tree vocabulary. By treating computation trees as tokens within the transferable vocabulary, GFT improves model generalization and reduces the risk of negative transfer. The theoretical analyses and extensive experimental studies have demonstrated the transferability of computation trees and shown the effectiveness of GFT across diverse tasks and domains in graph learning. The open source code and data are available at https://github.com/Zehong-Wang/GFT.", "keywords": "Graph Foundation Model;Transferability;Computation Tree;Graph Neural Network", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Zehong Wang;Zheyuan Zhang;Nitesh V Chawla;Chuxu Zhang;Yanfang Ye", "authorids": "~Zehong_Wang2;~Zheyuan_Zhang5;~Nitesh_V_Chawla1;~Chuxu_Zhang2;~Yanfang_Ye1", "gender": "M;M;;;", "homepage": "https://zehong-wang.github.io/;https://jasonzhangzy1757.github.io/;;;http://yes-lab.org/", "dblp": "319/7828;;;;", "google_scholar": "-qXxOv0AAAAJ;qJURp_AAAAAJ;;;egjr888AAAAJ", "orcid": "0000-0002-7670-6777;0009-0005-5918-6182;;;", "linkedin": "zehong-wang-745b02286/;jasonzhangzy1757/;;;", "or_profile": "~Zehong_Wang2;~Zheyuan_Zhang5;~Nitesh_V_Chawla1;~Chuxu_Zhang2;~Yanfang_Ye1", "aff": "University of Notre Dame;University of Notre Dame;;;University of Notre Dame", "aff_domain": "nd.edu;nd.edu;;;nd.edu", "position": "PhD student;PhD student;;;Associate Professor", "bibtex": "@inproceedings{\nwang2024gft,\ntitle={{GFT}: Graph Foundation Model with Transferable Tree Vocabulary},\nauthor={Zehong Wang and Zheyuan Zhang and Nitesh V Chawla and Chuxu Zhang and Yanfang Ye},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=0MXzbAv8xy}\n}", "github": "", "reviewers": "hEws;M9ZQ;U8xo;H6gM", "pdf_size": 910935, "rating": "5;6;6;7", "confidence": "4;4;3;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;2;3;3", "wc_summary": "28;86;93;80", "wc_strengths": "34;51;130;165", "wc_weaknesses": "172;249;233;86", "wc_questions": "11;138;183;4", "wc_limitations": "1;34;1;1", "wc_review": "246;558;640;336", "wc_reply_reviewers": "130;80;12;18", "wc_reply_authors": "74;82;70;63", "reply_reviewers": "1;1;1;1", "reply_authors": "3;3;3;3", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 71.75, 25.674647027758727 ], "wc_strengths_avg": [ 95.0, 54.27246078813821 ], "wc_weaknesses_avg": [ 185.0, 63.97265040624782 ], "wc_questions_avg": [ 84.0, 78.17608329917789 ], "wc_limitations_avg": [ 9.25, 14.289419162443238 ], "wc_review_avg": [ 445.0, 159.90309565483716 ], "wc_reply_reviewers_avg": [ 60.0, 48.394214530251446 ], "wc_reply_authors_avg": [ 72.25, 6.869315832017043 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.0, 0.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12762623510340995801&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "nd.edu;nd.edu;;;nd.edu", "author_num": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Notre Dame", "aff_unique_dep": "", "aff_unique_url": "https://www.nd.edu", "aff_unique_abbr": "Notre Dame", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Not All Tokens Are What You Need for Pretraining", "status": "Oral", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96931", "id": "0NMzBwqaAJ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=0NMzBwqaAJ", "openreview": "https://openreview.net/forum?id=0NMzBwqaAJ", "poster": "/media/PosterPDFs/NeurIPS%202024/96931.png?t=1733724962.7310221", "project": "", "author_site": "Zhenghao Lin, Zhibin Gou, Yeyun Gong, Xiao Liu, yelong shen, Ruochen Xu, Chen Lin, Yujiu Yang, Jian Jiao, Nan Duan, Weizhu Chen", "tldr": "", "abstract": "Previous language model pre-training methods have uniformly applied a next-token prediction loss to all training tokens. Challenging this norm, we posit that ''Not all tokens in a corpus are equally important for language model training''. Our initial analysis examines token-level training dynamics of language model, revealing distinct loss patterns for different tokens. Leveraging these insights, we introduce a new language model called Rho-1. Unlike traditional LMs that learn to predict every next token in a corpus, Rho-1 employs Selective Language Modeling (SLM), which selectively trains on useful tokens that aligned with the desired distribution. This approach involves scoring training tokens using a reference model, and then training the language model with a focused loss on tokens with higher scores. When continual continual pretraining on 15B OpenWebMath corpus, Rho-1 yields an absolute improvement in few-shot accuracy of up to 30% in 9 math tasks. After fine-tuning, Rho-1-1B and 7B achieved state-of-the-art results of 40.6% and 51.8% on MATH dataset, respectively - matching DeepSeekMath with only 3% of the pretraining tokens. Furthermore, when continual pretraining on 80B general tokens, Rho-1 achieves 6.8% average enhancement across 15 diverse tasks, increasing both data efficiency and performance of the language model pre-training.", "keywords": "pre-training;next token prediction;data optimization;data selection", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Zhenghao Lin;Zhibin Gou;Yeyun Gong;Xiao Liu;yelong shen;Ruochen Xu;Chen Lin;Yujiu Yang;Jian Jiao;Nan Duan;Weizhu Chen", "authorids": "~Zhenghao_Lin1;~Zhibin_Gou1;~Yeyun_Gong2;~Xiao_Liu14;~yelong_shen1;~Ruochen_Xu2;~Chen_Lin5;~Yujiu_Yang2;~Jian_Jiao2;~Nan_Duan1;~Weizhu_Chen1", "gender": ";M;M;M;;M;F;M;M;M;M", "homepage": ";https://github.com/zubingou;;https://xiaoliunlc.github.io/;;https://xrc10.github.io/;https://xmudm.github.io/publications/;https://sites.google.com/view/iigroup-thu;;https://nanduan.github.io/;https://www.microsoft.com/en-us/research/people/wzchen/", "dblp": "260/5513;315/9328;06/10400.html;82/1364-29;;188/3515;37/3102-1.html;30/3847;29/265-7.html;;79/2536", "google_scholar": "https://scholar.google.com.hk/citations?user=sLshrmQAAAAJ;;piUkwMYAAAAJ;https://scholar.google.com.sg/citations?user=cn1k7gYAAAAJ;;HTp5S00AAAAJ;z1l2JSMAAAAJ;4gH3sxsAAAAJ;D6KwmF8AAAAJ;Qaa6OxIAAAAJ;LG_E-4EAAAAJ", "orcid": ";;;0000-0002-8893-366X;;;0000-0002-2275-997X;0000-0002-6427-1024;0000-0003-4779-9588;;", "linkedin": ";;;xiao-liu-71357b72/;;ruochenx/;;;jian-jiao-82897810/;;", "or_profile": "~Zhenghao_Lin1;~Zhibin_Gou1;~Yeyun_Gong2;~Xiao_Liu14;~yelong_shen1;~Ruochen_Xu2;~Chen_Lin5;~Yujiu_Yang2;~Jian_Jiao2;~Nan_Duan1;~Weizhu_Chen1", "aff": "Microsoft;Microsoft;Microsoft;Microsoft Research Asia;;Microsoft Research;Xiamen University;Tsinghua University;Microsoft;Microsoft Research Asia;Microsoft GenAI", "aff_domain": "microsoft.com;microsoft.com;microsoft.com;microsoft.com;;research.microsoft.com;xmu.edu.cn;tsinghua.edu.cn;microsoft.com;microsoft.com;microsoft.com", "position": "Intern;Intern;Researcher;Researcher;;Researcher;Full Professor;Full Professor;Principal Researcher;Principal Researcher;Vice President", "bibtex": "@inproceedings{\nlin2024not,\ntitle={Not All Tokens Are What You Need for Pretraining},\nauthor={Zhenghao Lin and Zhibin Gou and Yeyun Gong and Xiao Liu and yelong shen and Ruochen Xu and Chen Lin and Yujiu Yang and Jian Jiao and Nan Duan and Weizhu Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=0NMzBwqaAJ}\n}", "github": "", "reviewers": "pLtQ;3eTg;QeQV", "pdf_size": 778470, "rating": "7;7;9", "confidence": "4;4;4", "soundness": "3;3;3", "novelty": "4;3;4", "presentation": "3;3;4", "wc_summary": "105;81;131", "wc_strengths": "99;73;65", "wc_weaknesses": "293;180;42", "wc_questions": "228;10;67", "wc_limitations": "4;1;33", "wc_review": "729;345;338", "wc_reply_reviewers": "85;0;10", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;0;1", "reply_authors": "1;1;1", "rating_avg": [ 7.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 105.66666666666667, 20.417857108151406 ], "wc_strengths_avg": [ 79.0, 14.514360704718161 ], "wc_weaknesses_avg": [ 171.66666666666666, 102.6396068235303 ], "wc_questions_avg": [ 101.66666666666667, 92.31227196616686 ], "wc_limitations_avg": [ 12.666666666666666, 14.42990721460891 ], "wc_review_avg": [ 470.6666666666667, 182.6916041371968 ], "wc_reply_reviewers_avg": [ 31.666666666666668, 37.932688922470135 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4547076034519442991&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "microsoft.com;microsoft.com;microsoft.com;microsoft.com;;research.microsoft.com;xmu.edu.cn;tsinghua.edu.cn;microsoft.com;microsoft.com;microsoft.com", "author_num": 11, "aff_unique_index": "0;0;0;0;0;1;2;0;0;0", "aff_unique_norm": "Microsoft;Xiamen University;Tsinghua University", "aff_unique_dep": "Microsoft Corporation;;", "aff_unique_url": "https://www.microsoft.com;https://www.xmu.edu.cn;https://www.tsinghua.edu.cn", "aff_unique_abbr": "Microsoft;XMU;THU", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;0;0;1;0;1;1;0;1;0", "aff_country_unique": "United States;China" }, { "title": "DMC-VB: A Benchmark for Representation Learning for Control with Visual Distractors", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97877", "id": "0NQzQVu9tY", "proceeding": "", "pdf": "https://openreview.net/pdf?id=0NQzQVu9tY", "openreview": "https://openreview.net/forum?id=0NQzQVu9tY", "poster": "/media/PosterPDFs/NeurIPS%202024/97877.png?t=1733266397.4133346", "project": "", "author_site": "Joseph Ortiz, Antoine Dedieu, Wolfgang Lehrach, J Swaroop Guntupalli, Carter Wendelken, Ahmad Humayun, Sivaramakrishnan Swaminathan, Guangyao Zhou, Miguel Lazaro-Gredilla, Kevin Murphy", "tldr": "", "abstract": "Learning from previously collected data via behavioral cloning or offline reinforcement learning (RL) is a powerful recipe for scaling generalist agents by avoiding the need for expensive online learning. Despite strong generalization in some respects, agents are often remarkably brittle to minor visual variations in control-irrelevant factors such as the background or camera viewpoint. In this paper, we present theDeepMind Control Visual Benchmark (DMC-VB), a dataset collected in the DeepMind Control Suite to evaluate the robustness of offline RL agents for solving continuous control tasks from visual input in the presence of visual distractors. In contrast to prior works, our dataset (a) combines locomotion and navigation tasks of varying difficulties, (b) includes static and dynamic visual variations, (c) considers data generated by policies with different skill levels, (d) systematically returns pairs of state and pixel observation, (e) is an order of magnitude larger, and (f) includes tasks with hidden goals. Accompanying our dataset, we propose three benchmarks to evaluate representation learning methods for pretraining, and carry out experiments on several recently proposed methods. First, we find that pretrained representations do not help policy learning on DMC-VB, and we highlight a large representation gap between policies learned on pixel observations and on states. Second, we demonstrate when expert data is limited, policy learning can benefit from representations pretrained on (a) suboptimal data, and (b) tasks with stochastic hidden goals. Our dataset and benchmark code to train and evaluate agents are available at https://github.com/google-deepmind/dmc_vision_benchmark.", "keywords": "representation learning;offline reinforcement learning;behavioral cloning;visual distractor", "primary_area": "", "supplementary_material": "", "author": "Joseph Ortiz;Antoine Dedieu;Wolfgang Lehrach;J Swaroop Guntupalli;Carter Wendelken;Ahmad Humayun;Sivaramakrishnan Swaminathan;Guangyao Zhou;Miguel Lazaro-Gredilla;Kevin Patrick Murphy", "authorids": "~Joseph_Ortiz2;~Antoine_Dedieu1;~Wolfgang_Lehrach1;~J_Swaroop_Guntupalli1;~Carter_Wendelken1;~Ahmad_Humayun2;~Sivaramakrishnan_Swaminathan1;~Guangyao_Zhou1;~Miguel_Lazaro-Gredilla1;~Kevin_Patrick_Murphy1", "gender": "M;M;M;M;;M;;M;M;", "homepage": "https://joeaortiz.github.io/;https://antoine-dedieu.github.io;;;;http://ahumayun.com/;http://sivark.me;https://stanniszhou.github.io;;https://www.cs.ubc.ca/~murphyk/", "dblp": ";217/3589.html;190/7782;136/5291;89/3039;36/9989;342/7709.html;;77/4660;26/2599", "google_scholar": "https://scholar.google.co.uk/citations?user=pea9lz0AAAAJ;Hgoc3FUAAAAJ;;LtpDr8MAAAAJ;;SdZCMmgAAAAJ;oDVFD5oAAAAJ;RW94MCIAAAAJ;SFjDQk8AAAAJ;MxxZkEcAAAAJ", "orcid": ";;;0000-0002-0677-5590;;;;;;", "linkedin": ";;;;;;;;miguel-lazaro-g/;", "or_profile": "~Joseph_Ortiz2;~Antoine_Dedieu1;~Wolfgang_Lehrach1;~J_Swaroop_Guntupalli1;~Carter_Wendelken1;~Ahmad_Humayun2;~Sivaramakrishnan_Swaminathan1;~Guangyao_Zhou1;~Miguel_Lazaro-Gredilla1;~Kevin_Patrick_Murphy1", "aff": "Google DeepMind;Google DeepMind;Google Deepmind;Google DeepMind;Google;Google DeepMind;Google DeepMind;Google DeepMind;Google Deepmind;Google", "aff_domain": "google.com;deepmind.com;deepmind.com;deepmind.com;google.com;google.com;deepmind.com;google.com;google.com;google.com", "position": "Researcher;Researcher;Researcher;Researcher;Researcher;Researcher;Research Engineer;Research Scientist;Research Scientist;Principal Researcher", "bibtex": "@inproceedings{\nortiz2024dmcvb,\ntitle={{DMC}-{VB}: A Benchmark for Representation Learning for Control with Visual Distractors},\nauthor={Joseph Ortiz and Antoine Dedieu and Wolfgang Lehrach and J Swaroop Guntupalli and Carter Wendelken and Ahmad Humayun and Sivaramakrishnan Swaminathan and Guangyao Zhou and Miguel Lazaro-Gredilla and Kevin Patrick Murphy},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=0NQzQVu9tY}\n}", "github": "", "reviewers": "RVem;soTG;n4Ln", "pdf_size": 2069953, "rating": "6;7;8", "confidence": "3;3;3", "wc_summary_and_contributions": "46;143;93", "wc_strengths": "64;63;72", "wc_improvement": "43;174;25", "wc_limitations": "7;8;7", "wc_correctness": "1;8;8", "wc_clarity": "1;12;6", "wc_relation_to_prior_work": "1;16;7", "wc_documentation": "18;11;7", "wc_additional_feedback": "1;1;1", "wc_review": "182;436;226", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "rating_avg": [ 7.0, 0.816496580927726 ], "confidence_avg": [ 3.0, 0.0 ], "wc_summary_and_contributions_avg": [ 94.0, 39.60639678974429 ], "wc_strengths_avg": [ 66.33333333333333, 4.0276819911981905 ], "wc_improvement_avg": [ 80.66666666666667, 66.40448445365384 ], "wc_limitations_avg": [ 7.333333333333333, 0.4714045207910317 ], "wc_correctness_avg": [ 5.666666666666667, 3.299831645537222 ], "wc_clarity_avg": [ 6.333333333333333, 4.4969125210773475 ], "wc_relation_to_prior_work_avg": [ 8.0, 6.164414002968976 ], "wc_documentation_avg": [ 12.0, 4.546060565661952 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 281.3333333333333, 110.83120298704492 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12046423805828124069&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "google.com;deepmind.com;deepmind.com;deepmind.com;google.com;google.com;deepmind.com;google.com;google.com;google.com", "author_num": 10, "aff_unique_index": "0;0;1;0;0;0;0;0;1;0", "aff_unique_norm": "Google;DeepMind", "aff_unique_dep": "Google DeepMind;DeepMind", "aff_unique_url": "https://deepmind.com;https://deepmind.com", "aff_unique_abbr": "DeepMind;DeepMind", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;1;0;0;0;0;1", "aff_country_unique": "United Kingdom;United States" }, { "id": "0SMhqvgHST", "title": "EEVEE and GATE: Finding the right benchmarks and how to run them seamlessly", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "Model evaluation is a cornerstone of machine learning, guiding model design and progress measurement. Designing generalizable evaluation processes remains a challenge, however, partly due to the vast number of possible domain, task and modality combinations and lack of knowledge of how informative they are. In this paper, we propose EEVEE (Efficient Evaluation process Evolution Engine) - pronounced as \\textipa{/'i:vi:/} EE-vee - a method that frames evaluation process design as a learning problem. By analyzing a large number of evaluation metrics from diverse benchmarks and models, EEVEE identifies a smaller subset of tasks with high predictive power over the full set of evaluation metrics, reducing evaluation time. To find the optimal subset maximizing signal while minimizing GPU hours, EEVEE evaluates pre-trained models of various architectures, pretraining schemes, and modalities on diverse downstream tasks and datasets including image classification, segmentation, relational reasoning, zero-shot image-to-text tasks, medical classification and segmentation, video classification, and regression. Our results identify three subsets of benchmarks, with 8, 15 and 21 tasks, providing high quality signal for model generalization. Key benchmarks selected include iWildCam, CLEVR-Math, ACDC, WinoGround, CIFAR100, Fungi, and ADE20K. We structure the subsets into three tiers for 12, 24, and 36 GPU-hour budgets and package them into a unified, efficient, and user-friendly Python framework that we built with the researcher in mind -- which we refer to as the GATE engine.\nOur experiments reveal ConvNextV2, SigLIP and CLIP as top-performing model encoders, with EfficientNetV2 and ResNext50 excelling in medical tasks and challenging image classification, in particular in Happy Whale Individual classification, ConvNet based models seem to outperform transformer models by a factor of 2.5x, which is surprising. The top performing encoder being ConvNextV2 followed by CLIP seems to agree with other recent large scale evaluations. We also demonstrate the framework's versatility in fine-tuning models from text and audio modalities, paving the way for future cross-modal evaluations.", "keywords": "multi-modal;benchmarks;machine learning;model evaluation;benchmark frameworks", "primary_area": "", "supplementary_material": "", "author": "Antreas Antoniou;Eleni Triantafillou;Hugo Larochelle;Sebastien Montella;Fady Rezk;Kiyoon Kim;Linus Ericsson;Pavlos Vougiouklis;Justin Engelmann;Elliot J. Crowley;Srihari Humbarwadi;Yi Liu;Guang Yang;Jeff Z. Pan;Amos Storkey", "authorids": "~Antreas_Antoniou3;~Eleni_Triantafillou1;~Hugo_Larochelle1;~Sebastien_Montella1;~Fady_Rezk1;~Kiyoon_Kim2;~Linus_Ericsson1;~Pavlos_Vougiouklis1;~Justin_Engelmann1;~Elliot_J._Crowley1;~Srihari_Humbarwadi1;~Yi_Liu35;~Guang_Yang5;~Jeff_Z._Pan1;~Amos_Storkey1", "gender": ";F;M;M;;;M;M;;M;M;;M;M;Not Specified", "homepage": ";http://www.cs.toronto.edu/~eleni/;https://mila.quebec/en/directory/hugo-larochelle;https://montellasebastien.github.io/;https://www.linkedin.com/in/fady-rezk/;https://kiyoon.kim;https://linusericsson.github.io/;;;https://elliotjcrowley.github.io;https://github.com/srihari-humbarwadi;https://github.com/moment-of-peace;;https://knowledge-representation.org/j.z.pan/;http://homepages.inf.ed.ac.uk/amos/", "dblp": ";183/8430;86/3862.html;;359/6161;205/2762.html;150/5139;191/6003;;157/3601;;;;59/6490;", "google_scholar": ";Y5x2ZgQAAAAJ;https://scholar.google.ca/citations?user=U89FHq4AAAAJ;OcHli6wAAAAJ;WPdQj24AAAAJ;https://scholar.google.co.kr/citations?user=MBzLo30AAAAJ;QRW9NN0AAAAJ;9J7YeR0AAAAJ;;https://scholar.google.co.uk/citations?user=RyKtqiQAAAAJ;;;ofqVqL0AAAAJ;https://scholar.google.co.uk/citations?hl=en;", "orcid": ";;;;;0009-0004-9089-2627;;;;;;;0000-0001-5434-0084;0000-0002-9779-2088;", "linkedin": ";;;;;;;;;;srihari-humbarwadi/;;;;", "or_profile": "~Antreas_Antoniou3;~Eleni_Triantafillou1;~Hugo_Larochelle1;~Sebastien_Montella1;~Fady_Rezk1;~Kiyoon_Kim2;~Linus_Ericsson1;~Pavlos_Vougiouklis1;~Justin_Engelmann1;~Elliot_J._Crowley1;~Srihari_Humbarwadi1;~Yi_Liu35;~Guang_Yang5;~Jeff_Z._Pan1;~Amos_Storkey1", "aff": ";Google;Google;Huawei Technologies Ltd.;Samsung;Deargen;University of Edinburgh, University of Edinburgh;Huawei Technologies Ltd.;;University of Edinburgh;;;Huawei Technologies Ltd.;University of Edinburgh, University of Edinburgh;University of Edinburgh", "aff_domain": ";google.com;google.com;huawei.com;samsung.com;deargen.me;ed.ac.uk;huawei.com;;ed.ac.uk;;;huawei.com;ed.ac.uk;ed.ac.uk", "position": ";Researcher;Research Scientist;Researcher;Intern;Researcher;Postdoc;Researcher;;Assistant Professor;;;Researcher;Full Professor;Full Professor", "bibtex": "@misc{\nanonymous2024eevee,\ntitle={{EEVEE} and {GATE}: Finding the right benchmarks and how to run them seamlessly},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=0SMhqvgHST}\n}", "github": "", "project": "", "reviewers": "ur3Z;NZZc;EAG5", "site": "https://openreview.net/forum?id=0SMhqvgHST", "pdf_size": 6699221, "rating": "3;7;8", "confidence": "5;3;4", "wc_summary_and_contributions": "45;71;125", "wc_strengths": "5;61;53", "wc_improvement": "1;301;89", "wc_limitations": "8;48;50", "wc_correctness": "7;9;9", "wc_clarity": "8;14;20", "wc_relation_to_prior_work": "1;49;17", "wc_documentation": "7;46;46", "wc_additional_feedback": "1;1;1", "wc_review": "83;600;410", "wc_reply_reviewers": "0;0;267", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;1", "reply_authors": "1;1;1", "rating_avg": [ 6.0, 2.160246899469287 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "wc_summary_and_contributions_avg": [ 80.33333333333333, 33.319997332266134 ], "wc_strengths_avg": [ 39.666666666666664, 24.729649321321876 ], "wc_improvement_avg": [ 130.33333333333334, 125.91355059016571 ], "wc_limitations_avg": [ 35.333333333333336, 19.344824171395878 ], "wc_correctness_avg": [ 8.333333333333334, 0.9428090415820634 ], "wc_clarity_avg": [ 14.0, 4.898979485566356 ], "wc_relation_to_prior_work_avg": [ 22.333333333333332, 19.955506062794353 ], "wc_documentation_avg": [ 33.0, 18.384776310850235 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 364.3333333333333, 213.52023063140618 ], "wc_reply_reviewers_avg": [ 89.0, 125.86500705120545 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 15, 0 ], "corr_rating_confidence": -0.7559289460184544, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:EVDL0nvZkWEJ:scholar.google.com/&scioq=EEVEE+and+GATE:+Finding+the+right+benchmarks+and+how+to+run+them+seamlessly&hl=en&as_sdt=0,23", "gs_version_total": 0, "aff_unique_index": "0;0;1;2;3;4;1;4;1;4;4", "aff_unique_norm": "Google;Huawei;Samsung;Deargen;University of Edinburgh", "aff_unique_dep": "Google;Huawei Technologies;Samsung;;", "aff_unique_url": "https://www.google.com;https://www.huawei.com;https://www.samsung.com;https://deargen.com;https://www.ed.ac.uk", "aff_unique_abbr": "Google;Huawei;Samsung;;Edinburgh", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;1;2;2;3;1;3;1;3;3", "aff_country_unique": "United States;China;South Korea;United Kingdom" }, { "title": "IntraMix: Intra-Class Mixup Generation for Accurate Labels and Neighbors", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96930", "id": "0SRJBtTNhX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=0SRJBtTNhX", "openreview": "https://openreview.net/forum?id=0SRJBtTNhX", "poster": "/media/PosterPDFs/NeurIPS%202024/96930.png?t=1732617637.3891017", "project": "", "author_site": "Shenghe Zheng, Hongzhi Wang, Xianglong Liu", "tldr": "", "abstract": "Graph Neural Networks (GNNs) have shown great performance in various tasks, with the core idea of learning from data labels and aggregating messages within the neighborhood of nodes. However, the common challenges in graphs are twofold: insufficient accurate (high-quality) labels and limited neighbors for nodes, resulting in weak GNNs. \nExisting graph augmentation methods typically address only one of these challenges, often adding training costs or relying on oversimplified or knowledge-intensive strategies, limiting their generalization.\nTo simultaneously address both challenges faced by graphs in a generalized way, we propose an elegant method called IntraMix. Considering the incompatibility of vanilla Mixup with the complex topology of graphs, IntraMix innovatively employs Mixup among inaccurate labeled data of the same class, generating high-quality labeled data at minimal cost. \nAdditionally, it finds data with high confidence of being clustered into the same group as the generated data to serve as their neighbors, thereby enriching the neighborhoods of graphs. IntraMix efficiently tackles both issues faced by graphs and challenges the prior notion of the limited effectiveness of Mixup in node classification. IntraMix is a theoretically grounded plug-in-play method that can be readily applied to all GNNs. Extensive experiments demonstrate the effectiveness of IntraMix across various GNNs and datasets. Our code is available at: [https://github.com/Zhengsh123/IntraMix](https://github.com/Zhengsh123/IntraMix).", "keywords": "Graph Machine Learning;Graph Data Augmentation;Graph Neural Networks", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Shenghe Zheng;Hongzhi Wang;Xianglong Liu", "authorids": "~Shenghe_Zheng1;~Hongzhi_Wang2;~Xianglong_Liu4", "gender": ";M;M", "homepage": "https://zhengsh123.github.io/;http://homepage.hit.edu.cn/wang;", "dblp": "300/3999;81/940;55/7901-4.html", "google_scholar": "OBGFydKdVkYC;;", "orcid": "0000-0003-0522-2975;0000-0002-7521-2871;", "linkedin": ";;", "or_profile": "~Shenghe_Zheng1;~Hongzhi_Wang2;~Xianglong_Liu4", "aff": "Harbin Institute of Technology;Harbin Institute of Technology;Harbin Institute of Technology", "aff_domain": "hit.edu.cn;hit.edu.cn;hit.edu.cn", "position": "MS student;Full Professor;Undergrad student", "bibtex": "@inproceedings{\nzheng2024intramix,\ntitle={IntraMix: Intra-Class Mixup Generation for Accurate Labels and Neighbors},\nauthor={Shenghe Zheng and Hongzhi Wang and Xianglong Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=0SRJBtTNhX}\n}", "github": "", "reviewers": "bPiP;PZ8r;4UTx;nFdn", "pdf_size": 634790, "rating": "5;5;5;7", "confidence": "4;4;4;4", "soundness": "3;2;3;3", "novelty": "3;2;2;3", "presentation": "2;3;2;3", "wc_summary": "72;22;60;74", "wc_strengths": "96;47;58;109", "wc_weaknesses": "102;86;243;83", "wc_questions": "2;96;54;83", "wc_limitations": "1;12;11;6", "wc_review": "273;263;426;355", "wc_reply_reviewers": "0;21;63;27", "wc_reply_authors": "0;74;447;34", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 57.0, 20.904544960366874 ], "wc_strengths_avg": [ 77.5, 25.71478174124758 ], "wc_weaknesses_avg": [ 128.5, 66.5 ], "wc_questions_avg": [ 58.75, 36.120458191999724 ], "wc_limitations_avg": [ 7.5, 4.387482193696061 ], "wc_review_avg": [ 329.25, 66.28866796067032 ], "wc_reply_reviewers_avg": [ 27.75, 22.68672519338126 ], "wc_reply_authors_avg": [ 138.75, 179.88520645122546 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16472391051408794110&as_sdt=4005&sciodt=0,6&hl=en", "gs_version_total": 4, "email": "hit.edu.cn;hit.edu.cn;hit.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Harbin Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.hit.edu.cn/", "aff_unique_abbr": "HIT", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Harbin", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Benchmarking Counterfactual Image Generation", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97876", "id": "0T8xRFrScB", "proceeding": "", "pdf": "https://openreview.net/pdf?id=0T8xRFrScB", "openreview": "https://openreview.net/forum?id=0T8xRFrScB", "poster": "/media/PosterPDFs/NeurIPS%202024/97876.png?t=1731712408.6398222", "project": "", "author_site": "Thomas Melistas, Nikos Spyrou, Nefeli Gkouti, Pedro Sanchez, Athanasios Vlontzos, Yannis Panagakis, Giorgos Papanastasiou, Sotirios Tsaftaris", "tldr": "", "abstract": "Generative AI has revolutionised visual content editing, empowering users to effortlessly modify images and videos. However, not all edits are equal. To perform realistic edits in domains such as natural image or medical imaging, modifications must respect causal relationships inherent to the data generation process. Such image editing falls into the counterfactual image generation regime. Evaluating counterfactual image generation is substantially complex: not only it lacks observable ground truths, but also requires adherence to causal constraints. Although several counterfactual image generation methods and evaluation metrics exist a comprehensive comparison within a unified setting is lacking. We present a comparison framework to thoroughly benchmark counterfactual image generation methods. We evaluate the performance of three conditional image generation model families developed within the Structural Causal Model (SCM) framework. We incorporate several metrics that assess diverse aspects of counterfactuals, such as composition, effectiveness, minimality of interventions, and image realism. We integrate all models that have been used for the task at hand and expand them to novel datasets and causal graphs, demonstrating the superiority of Hierarchical VAEs across most datasets and metrics. Our framework is implemented in a user-friendly Python package that can be extended to incorporate additional SCMs, causal methods, generative models, and datasets for the community to build on. Code: https://github.com/gulnazaki/counterfactual-benchmark.", "keywords": "benchmark;causal inference;counterfactual image generation", "primary_area": "", "supplementary_material": "/attachment/16d8d3f8b6cec2dc456df3a12b254566d0b21249.pdf", "author": "Thomas Melistas;Nikos Spyrou;Nefeli Gkouti;Pedro Sanchez;Athanasios Vlontzos;Yannis Panagakis;Giorgos Papanastasiou;Sotirios A. Tsaftaris", "authorids": "~Thomas_Melistas1;~Nikos_Spyrou1;~Nefeli_Gkouti1;~Pedro_Sanchez1;~Athanasios_Vlontzos1;~Yannis_Panagakis1;~Giorgos_Papanastasiou2;~Sotirios_A._Tsaftaris1", "gender": "M;M;F;M;;;M;", "homepage": ";;;https://vios.science/team/sanchez;https://thanosvlo.github.io;;;https://vios.science/", "dblp": "308/9665;372/6924.html;;14/8283;186/8028;;;14/613", "google_scholar": "TctbVmUAAAAJ;https://scholar.google.com/citations?hl=el;;KPchGe4AAAAJ;https://scholar.google.com/citations?view_op=list_works;;https://scholar.google.com/citations?hl=en;jC1uFnYAAAAJ", "orcid": "0000-0003-4287-2483;;;0000-0003-2435-3049;;;;", "linkedin": "thomas-melistas/;;nefeli-gkouti-6356a6224/;https://linkedin.com/in/pedro-sanches-ppsg;athanasios-vlontzos/;;;", "or_profile": "~Thomas_Melistas1;~Nikos_Spyrou1;~Nefeli_Gkouti1;~Pedro_Sanchez1;~Athanasios_Vlontzos1;~Yannis_Panagakis1;~Giorgos_Papanastasiou2;~Sotirios_A._Tsaftaris1", "aff": "University of Athens;University of Athens;University of Athens;University of Edinburgh, University of Edinburgh;Spotify;;Pfizer;University of Edinburgh", "aff_domain": "uoa.gr;uoa.gr;uoa.gr;ed.ac.uk;spotify.com;;pfizer.com;ed.ac.uk", "position": "PhD student;PhD student;PhD student;PhD student;Researcher;;Principal Researcher;Professor in machine learning and computer vision", "bibtex": "@inproceedings{\nmelistas2024benchmarking,\ntitle={Benchmarking Counterfactual Image Generation},\nauthor={Thomas Melistas and Nikos Spyrou and Nefeli Gkouti and Pedro Sanchez and Athanasios Vlontzos and Yannis Panagakis and Giorgos Papanastasiou and Sotirios A. Tsaftaris},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=0T8xRFrScB}\n}", "github": "", "reviewers": "HeXk;6W6G;J9ST;F7FH", "pdf_size": 9912272, "rating": "7;7;7;8", "confidence": "4;3;4;4", "wc_summary_and_contributions": "93;130;39;42", "wc_strengths": "37;56;27;18", "wc_improvement": "116;93;261;84", "wc_limitations": "14;28;1;7", "wc_correctness": "1;1;1;7", "wc_clarity": "1;1;1;9", "wc_relation_to_prior_work": "1;1;1;18", "wc_documentation": "1;1;1;9", "wc_additional_feedback": "1;1;1;1", "wc_review": "265;312;333;195", "wc_reply_reviewers": "130;176;29;18", "wc_reply_authors": "411;100;100;83", "reply_reviewers": "2;1;1;1", "reply_authors": "4;3;3;3", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 76.0, 37.84838173555112 ], "wc_strengths_avg": [ 34.5, 14.115594213493104 ], "wc_improvement_avg": [ 138.5, 71.68158759402584 ], "wc_limitations_avg": [ 12.5, 10.062305898749054 ], "wc_correctness_avg": [ 2.5, 2.598076211353316 ], "wc_clarity_avg": [ 3.0, 3.4641016151377544 ], "wc_relation_to_prior_work_avg": [ 5.25, 7.361215932167728 ], "wc_documentation_avg": [ 3.0, 3.4641016151377544 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 276.25, 52.978179470419704 ], "wc_reply_reviewers_avg": [ 88.25, 66.87441588529951 ], "wc_reply_authors_avg": [ 173.5, 137.296212620742 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.25, 0.4330127018922193 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4404991018913917721&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "uoa.gr;uoa.gr;uoa.gr;ed.ac.uk;spotify.com;;pfizer.com;ed.ac.uk", "author_num": 8, "aff_unique_index": "0;0;0;1;2;3;1", "aff_unique_norm": "University of Athens;University of Edinburgh;Spotify;Pfizer", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.uoa.gr;https://www.ed.ac.uk;https://www.spotify.com;https://www.pfizer.com", "aff_unique_abbr": "UoA;Edinburgh;Spotify;Pfizer", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;2;3;1", "aff_country_unique": "Greece;United Kingdom;Sweden;United States" }, { "title": "Queueing Matching Bandits with Preference Feedback", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96929", "id": "0TUMAAb3of", "proceeding": "", "pdf": "https://openreview.net/pdf?id=0TUMAAb3of", "openreview": "https://openreview.net/forum?id=0TUMAAb3of", "poster": "/media/PosterPDFs/NeurIPS%202024/96929.png?t=1733803835.532587", "project": "", "author_site": "Jung-hun Kim, Min-hwan Oh", "tldr": "", "abstract": "In this study, we consider multi-class multi-server asymmetric queueing systems consisting of $N$ queues on one side and $K$ servers on the other side, where jobs randomly arrive in queues at each time. The service rate of each job-server assignment is unknown and modeled by a feature-based Multi-nomial Logit (MNL) function. At each time, a scheduler assigns jobs to servers, and each server stochastically serves at most one job based on its preferences over the assigned jobs. The primary goal of the algorithm is to stabilize the queues in the system while learning the service rates of servers. To achieve this goal, we propose algorithms based on UCB and Thompson Sampling, which achieve system stability with an average queue length bound of $O(\\min\\\\{N,K\\\\}/\\epsilon)$ for a large time horizon $T$, where $\\epsilon$ is a traffic slackness of the system. Furthermore, the algorithms achieve sublinear regret bounds of $\\tilde{O}(\\min\\\\{\\sqrt{T}Q_{\\max},T^{3/4}\\\\})$, where $Q_{\\max}$ represents the maximum queue length over agents and times. \n Lastly, we provide experimental results to demonstrate the performance of our algorithms.", "keywords": "Bandits;Queue;Preference Feedback", "primary_area": "bandits", "supplementary_material": "", "author": "Jung-hun Kim;Min-hwan Oh", "authorids": "~Jung-hun_Kim1;~Min-hwan_Oh1", "gender": ";M", "homepage": "https://minoh.io;https://sites.google.com/view/junghunkim/home", "dblp": "172/0531;234/2799", "google_scholar": "KzVALFwAAAAJ;2eFdD1kAAAAJ", "orcid": ";", "linkedin": ";jung-hun-kim/", "or_profile": "~Min-hwan_Oh1;~Junghun_Kim1", "aff": "Seoul National University;Seoul National University", "aff_domain": "snu.ac.kr;snu.ac.kr", "position": "Assistant Professor;Postdoc", "bibtex": "@inproceedings{\nkim2024queueing,\ntitle={Queueing Matching Bandits with Preference Feedback},\nauthor={Jung-hun Kim and Min-hwan Oh},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=0TUMAAb3of}\n}", "github": "", "reviewers": "PAuc;c5Ej;9oLk;117Z", "pdf_size": 686486, "rating": "4;6;7;7", "confidence": "3;2;1;4", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "2;2;3;3", "wc_summary": "27;95;61;84", "wc_strengths": "22;77;56;130", "wc_weaknesses": "103;130;8;63", "wc_questions": "86;72;15;234", "wc_limitations": "106;4;1;44", "wc_review": "344;378;141;555", "wc_reply_reviewers": "0;14;0;51", "wc_reply_authors": "54;23;0;26", "reply_reviewers": "0;1;0;1", "reply_authors": "2;2;1;2", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 2.5, 1.118033988749895 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 66.75, 26.022826518270456 ], "wc_strengths_avg": [ 71.25, 39.18784888201954 ], "wc_weaknesses_avg": [ 76.0, 45.92929348465966 ], "wc_questions_avg": [ 101.75, 80.85287564459287 ], "wc_limitations_avg": [ 38.75, 42.37555309373554 ], "wc_review_avg": [ 354.5, 147.0076528620194 ], "wc_reply_reviewers_avg": [ 16.25, 20.8611480987984 ], "wc_reply_authors_avg": [ 25.75, 19.162137145944865 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.18257418583505536, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4100117506641803449&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 4, "email": "snu.ac.kr;snu.ac.kr", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Seoul National University", "aff_unique_dep": "", "aff_unique_url": "https://www.snu.ac.kr", "aff_unique_abbr": "SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "title": "Adversarial Moment-Matching Distillation of Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96928", "id": "0VeSCjRDBy", "proceeding": "", "pdf": "https://openreview.net/pdf?id=0VeSCjRDBy", "openreview": "https://openreview.net/forum?id=0VeSCjRDBy", "poster": "/media/PosterPDFs/NeurIPS%202024/96928.png?t=1731660620.7591212", "project": "", "tldr": "", "abstract": "Knowledge distillation (KD) has been shown to be highly effective in guiding a student model with a larger teacher model and achieving practical benefits in improving the computational and memory efficiency for large language models (LLMs). State-of-the-art KD methods for LLMs mostly rely on minimizing explicit metrics measuring the divergence between teacher and student probability predictions. Instead of optimizing these mandatory cloning objectives, we explore an imitation learning strategy for KD of LLMs. In particular, we minimize the imitation gap by matching the action-value moments of the teacher's behavior from both on- and off-policy perspectives. To achieve this moment-matching goal, we propose an adversarial training algorithm to jointly estimate the moment-matching distance and optimize the student policy to minimize it. Results from both task-agnostic instruction-following experiments and task-specific experiments demonstrate the effectiveness of our method and achieve new state-of-the-art performance.", "keywords": "Knowledge distillation;Large language models;Imitation learning;Moment-matching;Adversarial training", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Chen Jia", "authorids": "~Chen_Jia1", "gender": "", "homepage": "", "dblp": "", "google_scholar": "", "orcid": "", "linkedin": "", "or_profile": "", "aff": "", "aff_domain": "", "position": "", "bibtex": "@inproceedings{\njia2024adversarial,\ntitle={Adversarial Moment-Matching Distillation of Large Language Models},\nauthor={Chen Jia},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=0VeSCjRDBy}\n}", "github": "", "reviewers": "M71H;zEWr;ejno;8hQS", "pdf_size": 0, "rating": "5;6;7;7", "confidence": "2;3;3;4", "soundness": "3;3;3;3", "novelty": "3;3;2;3", "presentation": "3;3;3;3", "wc_summary": "96;114;59;63", "wc_strengths": "100;107;41;43", "wc_weaknesses": "101;120;108;121", "wc_questions": "49;114;74;155", "wc_limitations": "6;40;12;1", "wc_review": "352;495;294;383", "wc_reply_reviewers": "0;0;40;58", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 83.0, 22.94558781116753 ], "wc_strengths_avg": [ 72.75, 30.85753554644311 ], "wc_weaknesses_avg": [ 112.5, 8.381527307120106 ], "wc_questions_avg": [ 98.0, 40.25543441574069 ], "wc_limitations_avg": [ 14.75, 15.08931741332258 ], "wc_review_avg": [ 381.0, 73.16078184382668 ], "wc_reply_reviewers_avg": [ 24.5, 25.31304011769428 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.8528028654224417, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11070745455932951910&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 3, "email": "", "author_num": 1 }, { "title": "ScaleKD: Strong Vision Transformers Could Be Excellent Teachers", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96927", "id": "0WCFI2Qx85", "proceeding": "", "pdf": "https://openreview.net/pdf?id=0WCFI2Qx85", "openreview": "https://openreview.net/forum?id=0WCFI2Qx85", "poster": "/media/PosterPDFs/NeurIPS%202024/96927.png?t=1731695837.2500114", "project": "", "author_site": "Jiawei Fan, Chao Li, Xiaolong Liu, Anbang Yao", "tldr": "", "abstract": "In this paper, we question if well pre-trained vision transformer (ViT) models could be used as teachers that exhibit scalable properties to advance cross architecture knowledge distillation research, in the context of adopting mainstream large-scale visual recognition datasets for evaluation. To make this possible, our analysis underlines the importance of seeking effective strategies to align (1) feature computing paradigm differences, (2) model scale differences, and (3) knowledge density differences. By combining three closely coupled components namely *cross attention projector*, *dual-view feature mimicking* and *teacher parameter perception* tailored to address the alignment problems stated above, we present a simple and effective knowledge distillation method, called *ScaleKD*. Our method can train student backbones that span across a variety of convolutional neural network (CNN), multi-layer perceptron (MLP), and ViT architectures on image classification datasets, achieving state-of-the-art knowledge distillation performance. For instance, taking a well pre-trained Swin-L as the teacher model, our method gets 75.15\\%|82.03\\%|84.16\\%|78.63\\%|81.96\\%|83.93\\%|83.80\\%|85.53\\% top-1 accuracies for MobileNet-V1|ResNet-50|ConvNeXt-T|Mixer-S/16|Mixer-B/16|ViT-S/16|Swin-T|ViT-B/16 models trained on ImageNet-1K dataset from scratch, showing 3.05\\%|3.39\\%|2.02\\%|4.61\\%|5.52\\%|4.03\\%|2.62\\%|3.73\\% absolute gains to the individually trained counterparts. Intriguingly, when scaling up the size of teacher models or their pre-training datasets, our method showcases the desired scalable properties, bringing increasingly larger gains to student models. We also empirically show that the student backbones trained by our method transfer well on downstream MS-COCO and ADE20K datasets. More importantly, our method could be used as a more efficient alternative to the time-intensive pre-training paradigm for any target student model on large-scale datasets if a strong pre-trained ViT is available, reducing the amount of viewed training samples up to 195$\\times$. The code is available at *https://github.com/deep-optimization/ScaleKD*.", "keywords": "knowledge distillation;model compression;training acceleration;vision transformer;convolutional neural network;multi-layer perception", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Jiawei Fan;Chao Li;Xiaolong Liu;Anbang Yao", "authorids": "~Jiawei_Fan1;~Chao_Li16;~Xiaolong_Liu2;~Anbang_Yao1", "gender": "M;M;M;", "homepage": "https://jwfandl.github.io/;https://github.com/chaoli-ai/chaoli.github.io;;https://yaoanbang.github.io/", "dblp": ";;;http://dblp.uni-trier.de/pers/hd/y/Yao:Anbang", "google_scholar": "7H674NUAAAAJ;;hgFJj0MAAAAJ;b9hCmPYAAAAJ", "orcid": "0000-0002-5487-2109;;;0000-0002-3878-8679", "linkedin": ";;;anbang-yao-1805b712a/", "or_profile": "~Jiawei_Fan1;~Chao_Li16;~Xiaolong_Liu2;~Anbang_Yao1", "aff": "Intel Labs China;Intel;;Intel", "aff_domain": "intel.com;intel.com;;intel.com", "position": "Researcher;Researcher;;Principal Researcher", "bibtex": "@inproceedings{\nfan2024scalekd,\ntitle={Scale{KD}: Strong Vision Transformers Could Be Excellent Teachers},\nauthor={Jiawei Fan and Chao Li and Xiaolong Liu and Anbang Yao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=0WCFI2Qx85}\n}", "github": "", "reviewers": "mfZF;rjnT;k9Ls;3qQD", "pdf_size": 6117234, "rating": "6;6;6;7", "confidence": "4;5;4;5", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;2;3", "wc_summary": "76;102;85;88", "wc_strengths": "24;79;45;54", "wc_weaknesses": "239;125;246;191", "wc_questions": "52;26;6;5", "wc_limitations": "1;9;5;1", "wc_review": "392;341;387;339", "wc_reply_reviewers": "18;36;37;12", "wc_reply_authors": "85;41;56;38", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 87.75, 9.33742469849155 ], "wc_strengths_avg": [ 50.5, 19.72941965694886 ], "wc_weaknesses_avg": [ 200.25, 48.328950950750006 ], "wc_questions_avg": [ 22.25, 19.109879643786353 ], "wc_limitations_avg": [ 4.0, 3.3166247903554 ], "wc_review_avg": [ 364.75, 24.823124299733102 ], "wc_reply_reviewers_avg": [ 25.75, 10.96300597464035 ], "wc_reply_authors_avg": [ 55.0, 18.614510468986285 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5821674397410308266&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "intel.com;intel.com;;intel.com", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Intel", "aff_unique_dep": "Intel Labs", "aff_unique_url": "https://www.intel.cn", "aff_unique_abbr": "Intel", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "China;United States" }, { "title": "Learning-Augmented Algorithms with Explicit Predictors", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96926", "id": "0XKvW4ijxp", "proceeding": "", "pdf": "https://openreview.net/pdf?id=0XKvW4ijxp", "openreview": "https://openreview.net/forum?id=0XKvW4ijxp", "poster": "/media/PosterPDFs/NeurIPS%202024/96926.png?t=1733319671.0065775", "project": "", "author_site": "Marek Elias, Haim Kaplan, Yishay Mansour, Shay Moran", "tldr": "", "abstract": "Recent advances in algorithmic design show how to utilize predictions obtained by machine learning models from past and present data. These approaches have demonstrated an enhancement in performance when the predictions are accurate, while also ensuring robustness by providing worst-case guarantees when predictions fail. In this paper we focus on online problems; prior research in this context was focused on a paradigm where the algorithms are oblivious of the predictors' design, treating them as a black box. In contrast, in this work,\nwe unpack the predictor and integrate the learning problem it gives rise for within the algorithmic challenge. In particular we allow the predictor to learn as it receives larger parts of the input, with the ultimate goal of designing online learning algorithms specifically tailored for the algorithmic task at hand. Adopting this perspective, we focus on a number of fundamental problems, including caching and scheduling, which have been well-studied in the black-box setting. For each of the problems, we introduce new algorithms that take advantage of explicit and carefully designed learning rules. These pairings of online algorithms with corresponding learning rules yields improvements in the overall performance in comparison with previous work.", "keywords": "online algorithms;learning-augmented algorithms;scheduling;caching", "primary_area": "learning_theory", "supplementary_material": "", "author": "Marek Elias;Haim Kaplan;Yishay Mansour;Shay Moran", "authorids": "~Marek_Elias1;~Haim_Kaplan1;~Yishay_Mansour2;~Shay_Moran1", "gender": ";;;M", "homepage": "https://elias.ba30.eu;;;http://www.cs.technion.ac.il/~shaymrn/", "dblp": "173/4575;;;119/5111", "google_scholar": ";;;kALYnggAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Marek_Elias1;~Haim_Kaplan1;~Yishay_Mansour2;~Shay_Moran1", "aff": "Bocconi University;;;Google", "aff_domain": "unibocconi.it;;;google.com", "position": "Assistant Professor;;;Visiting Faculty", "bibtex": "@inproceedings{\nelias2024learningaugmented,\ntitle={Learning-Augmented Algorithms with Explicit Predictors},\nauthor={Marek Elias and Haim Kaplan and Yishay Mansour and Shay Moran},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=0XKvW4ijxp}\n}", "github": "", "reviewers": "qEgX;oKpi;1P4N;B3ME", "pdf_size": 487359, "rating": "5;6;6;7", "confidence": "3;4;5;4", "soundness": "3;3;4;3", "novelty": "3;3;3;4", "presentation": "3;3;3;2", "wc_summary": "187;278;158;258", "wc_strengths": "44;101;100;73", "wc_weaknesses": "263;85;116;171", "wc_questions": "8;65;232;37", "wc_limitations": "3;11;1;17", "wc_review": "505;540;607;556", "wc_reply_reviewers": "0;29;127;9", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 220.25, 49.347618990180266 ], "wc_strengths_avg": [ 79.5, 23.371991785040485 ], "wc_weaknesses_avg": [ 158.75, 67.61055760752163 ], "wc_questions_avg": [ 85.5, 86.94969810183356 ], "wc_limitations_avg": [ 8.0, 6.4031242374328485 ], "wc_review_avg": [ 552.0, 36.72192805395708 ], "wc_reply_reviewers_avg": [ 41.25, 50.60817621689207 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16820818600117080534&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "unibocconi.it;;;google.com", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Bocconi University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.bocconi.edu;https://www.google.com", "aff_unique_abbr": "Bocconi;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1", "aff_country_unique": "Italy;United States" }, { "title": "The Road Less Scheduled", "status": "Oral", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96925", "id": "0XeNkkENuI", "proceeding": "", "pdf": "https://openreview.net/pdf?id=0XeNkkENuI", "openreview": "https://openreview.net/forum?id=0XeNkkENuI", "poster": "/media/PosterPDFs/NeurIPS%202024/96925.png?t=1730837481.9895687", "project": "", "author_site": "Aaron Defazio, Xingyu Yang, Ahmed Khaled, Konstantin Mishchenko, Harsh Mehta, Ashok Cutkosky", "tldr": "", "abstract": "Existing learning rate schedules that do not require specification of the optimization stopping step $T$ are greatly out-performed by learning rate schedules that depend on $T$. We propose an approach that avoids the need for this stopping time by eschewing the use of schedules entirely, while exhibiting state-of-the-art performance compared to schedules across a wide family of problems ranging from convex problems to large-scale deep learning problems. Our Schedule-Free approach introduces no additional hyper-parameters over standard optimizers with momentum. Our method is a direct consequence of a new theory we develop that unifies scheduling and iterate averaging. An open source implementation of our method is available at https://github.com/facebookresearch/schedule_free. Schedule-Free AdamW is the core algorithm behind our winning entry to the MLCommons 2024 AlgoPerf Algorithmic Efficiency Challenge Self-Tuning track.", "keywords": "Stochastic Optimization;Optimization;Convex Optimization;Learning Rates;Learning Rate Schedules", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Aaron Defazio;Xingyu Alice Yang;Ahmed Khaled;Konstantin Mishchenko;Harsh Mehta;Ashok Cutkosky", "authorids": "~Aaron_Defazio1;~Xingyu_Alice_Yang1;~Ahmed_Khaled1;~Konstantin_Mishchenko1;~Harsh_Mehta1;~Ashok_Cutkosky1", "gender": "M;F;M;;M;", "homepage": "https://www.aarondefazio.com/;https://github.com/tfaod;https://www.akhaled.net;https://konstmish.com/;;http://www.cs.stanford.edu/~ashokc", "dblp": "116/2969;;154/3591-1;222/9853;122/1475;191/6725", "google_scholar": "KEzJsdkAAAAJ;;Bc3wOdsAAAAJ;Z8Y8nhQAAAAJ;murJPNoAAAAJ;h4AbGp0AAAAJ", "orcid": ";;;;;", "linkedin": ";xyang13;;;;", "or_profile": "~Aaron_Defazio1;~Xingyu_Alice_Yang1;~Ahmed_Khaled1;~Konstantin_Mishchenko1;~Harsh_Mehta1;~Ashok_Cutkosky1", "aff": "Meta;Meta;Princeton University;Samsung;Google Research;Boston University", "aff_domain": "meta.com;meta.com;princeton.edu;samsung.com;google.com;bu.edu", "position": "Research Scientist;Research Engineer;PhD student;Researcher;Software Engineer;Assistant Professor", "bibtex": "@inproceedings{\ndefazio2024the,\ntitle={The Road Less Scheduled},\nauthor={Aaron Defazio and Xingyu Alice Yang and Ahmed Khaled and Konstantin Mishchenko and Harsh Mehta and Ashok Cutkosky},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=0XeNkkENuI}\n}", "github": "", "reviewers": "Q984;rKis;mxnW;8hsg", "pdf_size": 1284530, "rating": "5;7;8;10", "confidence": "4;3;3;4", "soundness": "2;3;3;3", "novelty": "3;3;4;4", "presentation": "3;3;4;3", "wc_summary": "189;74;69;117", "wc_strengths": "133;90;38;106", "wc_weaknesses": "723;58;112;49", "wc_questions": "58;82;21;137", "wc_limitations": "38;4;1;4", "wc_review": "1141;308;241;413", "wc_reply_reviewers": "631;113;37;56", "wc_reply_authors": "702;745;188;0", "reply_reviewers": "3;2;2;1", "reply_authors": "4;2;2;1", "rating_avg": [ 7.5, 1.8027756377319946 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 112.25, 48.08001143926653 ], "wc_strengths_avg": [ 91.75, 34.629286738250904 ], "wc_weaknesses_avg": [ 235.5, 282.48761034778147 ], "wc_questions_avg": [ 74.5, 42.1218470630147 ], "wc_limitations_avg": [ 11.75, 15.20485119953497 ], "wc_review_avg": [ 525.75, 360.46593112248485 ], "wc_reply_reviewers_avg": [ 209.25, 245.09832210767988 ], "wc_reply_authors_avg": [ 408.75, 322.0507529877861 ], "reply_reviewers_avg": [ 2.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 42, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=374760708433366345&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "meta.com;meta.com;princeton.edu;samsung.com;google.com;bu.edu", "author_num": 6, "aff_unique_index": "0;0;1;2;3;4", "aff_unique_norm": "Meta;Princeton University;Samsung;Google;Boston University", "aff_unique_dep": "Meta Platforms, Inc.;;Samsung;Google Research;", "aff_unique_url": "https://meta.com;https://www.princeton.edu;https://www.samsung.com;https://research.google;https://www.bu.edu", "aff_unique_abbr": "Meta;Princeton;Samsung;Google Research;BU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;1;0;0", "aff_country_unique": "United States;South Korea" }, { "title": "Can LLMs Learn by Teaching for Better Reasoning? A Preliminary Study", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96924", "id": "0ZZMUjZJYF", "proceeding": "", "pdf": "https://openreview.net/pdf?id=0ZZMUjZJYF", "openreview": "https://openreview.net/forum?id=0ZZMUjZJYF", "poster": "/media/PosterPDFs/NeurIPS%202024/96924.png?t=1732367562.4969194", "project": "", "author_site": "Xuefei Ning, Zifu Wang, Shiyao Li, Zinan Lin, Peiran Yao, Tianyu Fu, Matthew Blaschko, Guohao Dai, Huazhong Yang, Yu Wang", "tldr": "", "abstract": "Teaching to improve student models (e.g., knowledge distillation) is an extensively studied methodology in LLMs. However, in human education, teaching enhances not only the students but also the teachers by fostering more rigorous and clearer reasoning, as well as deeper knowledge building. We ask: Can LLMs also learn by teaching (LbT) for better reasoning? If the answer is yes, we can potentially unlock the possibility of continuously advancing the models without solely relying on human-produced data or stronger models. In this paper, we provide a preliminary exploration of this question. We show that LbT ideas can be incorporated into existing LLM training/prompting pipelines and bring improvements. Specifically, we design three methods, each mimicking one of the three levels of LbT: observing students' feedback, learning from the feedback, and learning iteratively, with the goal of improving answer accuracy without training or improving models' inherent capability with fine-tuning. We reveal some findings: (1) Teaching materials that make it easier for students to learn (via in-context learning) have clearer and more accurate logic; (2) Weak-to-strong generalization: LbT might help improve strong models by teaching weak models; (3) Diversity in students might help: teaching multiple students could be better than teaching a single student or the teacher alone. We hope that our exploration can inspire future research on LbT and, more broadly, the adoption of advanced education techniques to improve LLMs. The code and website are at https://github.com/imagination-research/lbt and https://sites.google.com/view/llm-learning-by-teaching.", "keywords": "LLMs;Learning by Teaching;Reasoning;Mathematical Reasoning;Code Synthesis;Weak-to-Strong Generalization;In-Context Learning;Prompting;Knowledge Distillation;Education-Inspired", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Xuefei Ning;Zifu Wang;Shiyao Li;Zinan Lin;Peiran Yao;Tianyu Fu;Matthew B. Blaschko;Guohao Dai;Huazhong Yang;Yu Wang", "authorids": "~Xuefei_Ning1;~Zifu_Wang1;~Shiyao_Li2;~Zinan_Lin1;~Peiran_Yao1;~Tianyu_Fu3;~Matthew_B._Blaschko1;~Guohao_Dai4;~Huazhong_Yang2;~Yu_Wang3", "gender": "Not Specified;M;M;M;;M;M;M;M;M", "homepage": "https://nics-effalg.com/ningxuefei/;https://zifuwang.com;http://nicsefc.ee.tsinghua.edu.cn/people/ShiyaoLi;https://zinanlin.me/;;http://nicsefc.ee.tsinghua.edu.cn/people/TianyuFu;https://nicsefc.ee.tsinghua.edu.cn/people/guohao-dai/;http://web.ee.tsinghua.edu.cn/yanghuazhong/en/index.htm;https://nicsefc.ee.tsinghua.edu.cn;http://homes.esat.kuleuven.be/~mblaschk/", "dblp": "202/9525;;;64/237-1;223/3245;219/6025-4;147/1470;94/1128.html;w/YuWang2.html;12/5233", "google_scholar": "oVslpJsAAAAJ;https://scholar.google.com/citations?hl=en;;67nE-wQ_g_cC;https://scholar.google.ca/citations?user=MtzvVewAAAAJ;Mnfue94AAAAJ;gz3Tkl0AAAAJ;;https://scholar.google.com.hk/citations?user=j8JGVvoAAAAJ;EmmO7LcAAAAJ", "orcid": ";;;;;0000-0003-3508-1755;;0000-0003-2421-353X;0000-0001-6108-5157;0000-0002-2640-181X", "linkedin": ";;;;;;;;;matthew-blaschko-5b7a51b0/", "or_profile": "~Xuefei_Ning1;~Zifu_Wang1;~Shiyao_Li2;~Zinan_Lin1;~Peiran_Yao1;~Tianyu_Fu3;~Guohao_Dai4;~Huazhong_Yang2;~Yu_Wang3;~Matthew_Blaschko1", "aff": "Tsinghua University;KU Leuven;Tsinghua University;Microsoft;University of Alberta;Infinigence;Shanghai Jiaotong University;Tsinghua University;Tsinghua University;KU Leuven", "aff_domain": "tsinghua.edu.cn;kuleuven.be;tsinghua.edu.cn;microsoft.com;ualberta.ca;infini-ai.com;sjtu.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;esat.kuleuven.be", "position": "Research Assistant Professor;PhD student;PhD student;Senior Researcher;PhD student;Researcher;Associate Professor;Full Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nning2024can,\ntitle={Can {LLM}s Learn by Teaching for Better Reasoning? A Preliminary Study},\nauthor={Xuefei Ning and Zifu Wang and Shiyao Li and Zinan Lin and Peiran Yao and Tianyu Fu and Matthew B. Blaschko and Guohao Dai and Huazhong Yang and Yu Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=0ZZMUjZJYF}\n}", "github": "", "reviewers": "E9eU;mNjM;pWvs;UmKq", "pdf_size": 1451830, "rating": "5;7;7;7", "confidence": "5;4;3;4", "soundness": "3;3;3;4", "novelty": "3;3;3;4", "presentation": "3;4;4;4", "wc_summary": "523;313;14;65", "wc_strengths": "20;93;8;67", "wc_weaknesses": "352;104;17;130", "wc_questions": "51;45;1;65", "wc_limitations": "9;72;1;39", "wc_review": "955;627;41;366", "wc_reply_reviewers": "389;12;0;148", "wc_reply_authors": "1577;10;0;1482", "reply_reviewers": "2;1;0;2", "reply_authors": "5;2;1;5", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 228.75, 204.09112548075186 ], "wc_strengths_avg": [ 47.0, 34.51811118818641 ], "wc_weaknesses_avg": [ 150.75, 123.49772265106753 ], "wc_questions_avg": [ 40.5, 23.93219588754864 ], "wc_limitations_avg": [ 30.25, 27.958674861301994 ], "wc_review_avg": [ 497.25, 336.06574877544426 ], "wc_reply_reviewers_avg": [ 137.25, 156.5397313783309 ], "wc_reply_authors_avg": [ 767.25, 762.9978292891795 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 3.25, 1.7853571071357126 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12146006602556868483&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 7, "email": "tsinghua.edu.cn;kuleuven.be;tsinghua.edu.cn;microsoft.com;ualberta.ca;infini-ai.com;sjtu.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;esat.kuleuven.be", "author_num": 10, "aff_unique_index": "0;1;0;2;3;4;5;0;0;1", "aff_unique_norm": "Tsinghua University;Katholieke Universiteit Leuven;Microsoft;University of Alberta;Infinigence;Shanghai Jiao Tong University", "aff_unique_dep": ";;Microsoft Corporation;;;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.kuleuven.be;https://www.microsoft.com;https://www.ualberta.ca;;https://www.sjtu.edu.cn", "aff_unique_abbr": "THU;KU Leuven;Microsoft;UAlberta;;SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;2;3;0;0;0;1", "aff_country_unique": "China;Belgium;United States;Canada;" }, { "title": "Graph Neural Networks and Arithmetic Circuits", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96923", "id": "0ZeONp33f0", "proceeding": "", "pdf": "https://openreview.net/pdf?id=0ZeONp33f0", "openreview": "https://openreview.net/forum?id=0ZeONp33f0", "poster": "/media/PosterPDFs/NeurIPS%202024/96923.png?t=1732106485.9402945", "project": "", "author_site": "Timon Barlag, Vivian Holzapfel, Laura Strieker, Jonni Virtema, Heribert Vollmer", "tldr": "", "abstract": "We characterize the computational power of neural networks that follow the graph neural network (GNN) architecture, not restricted to aggregate-combine GNNs or other particular types. We establish an exact correspondence between the expressivity of GNNs using diverse activation functions and arithmetic circuits over real numbers. In our results the activation function of the network becomes a gate type in the circuit. Our result holds for families of constant depth circuits and networks, both uniformly and non-uniformly, for all common activation functions.", "keywords": "Machine Learning;Graph Neural Networks;Arithmetic Circuits;Computational Complexity", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Timon Barlag;Vivian Holzapfel;Laura Strieker;Jonni Virtema;Heribert Vollmer", "authorids": "~Timon_Barlag1;~Vivian_Holzapfel1;~Laura_Strieker1;~Jonni_Virtema1;~Heribert_Vollmer1", "gender": "M;;F;;M", "homepage": "https://www.thi.uni-hannover.de/de/barlag;https://www.thi.uni-hannover.de/de/holzapfel;;http://www.virtema.fi/;", "dblp": "264/9964;;;33/9507;v/HeribertVollmer", "google_scholar": "H_88GC0AAAAJ;;;https://scholar.google.com/citations?;", "orcid": "0000-0001-6139-5219;;0009-0005-4878-4953;;0000-0002-9292-1960", "linkedin": ";;;;", "or_profile": "~Timon_Barlag1;~Vivian_Holzapfel1;~Laura_Strieker1;~Jonni_Virtema1;~Heribert_Vollmer1", "aff": "Universit\u00e4t Hannover;Leibniz Universit\u00e4t Hannover;Leibniz Universit\u00e4t Hannover;University of Sheffield;Leibniz Universit\u00e4t Hannover", "aff_domain": "uni-hannover.de;uni-hannover.de;uni-hannover.de;sheffield.ac.uk;uni-hannover.de", "position": "PhD student;Undergrad student;PhD student;Lecturer;Full Professor", "bibtex": "@inproceedings{\nbarlag2024graph,\ntitle={Graph Neural Networks and Arithmetic Circuits},\nauthor={Timon Barlag and Vivian Holzapfel and Laura Strieker and Jonni Virtema and Heribert Vollmer},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=0ZeONp33f0}\n}", "github": "", "reviewers": "FVmx;3Xib;2Yjw;E5Mq", "pdf_size": 357027, "rating": "6;6;7;7", "confidence": "3;2;3;5", "soundness": "3;2;4;4", "novelty": "3;3;3;2", "presentation": "3;3;2;4", "wc_summary": "93;25;77;106", "wc_strengths": "34;20;120;72", "wc_weaknesses": "87;17;502;150", "wc_questions": "240;14;259;479", "wc_limitations": "1;9;46;16", "wc_review": "455;85;1004;823", "wc_reply_reviewers": "89;5;89;42", "wc_reply_authors": "19;0;214;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;2;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.25, 1.0897247358851685 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 75.25, 30.776411421736615 ], "wc_strengths_avg": [ 61.5, 38.76531955240405 ], "wc_weaknesses_avg": [ 189.0, 186.733767701506 ], "wc_questions_avg": [ 248.0, 164.54634605484256 ], "wc_limitations_avg": [ 18.0, 17.014699527173555 ], "wc_review_avg": [ 591.75, 353.17231983834745 ], "wc_reply_reviewers_avg": [ 56.25, 35.26595383652624 ], "wc_reply_authors_avg": [ 58.25, 90.25623247178002 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.6882472016116854, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17850771650424376093&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "uni-hannover.de;uni-hannover.de;uni-hannover.de;sheffield.ac.uk;uni-hannover.de", "author_num": 5, "aff_unique_index": "0;1;1;2;1", "aff_unique_norm": "University of Hanover;Leibniz Universit\u00e4t Hannover;University of Sheffield", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uni-hannover.de;https://www.leibniz.uni-hannover.de/;https://www.sheffield.ac.uk", "aff_unique_abbr": "Uni Hanover;LUH;Sheffield", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "Germany;United Kingdom" }, { "title": "Fourier Amplitude and Correlation Loss: Beyond Using L2 Loss for Skillful Precipitation Nowcasting", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96922", "id": "0aN7VWwp4g", "proceeding": "", "pdf": "https://openreview.net/pdf?id=0aN7VWwp4g", "openreview": "https://openreview.net/forum?id=0aN7VWwp4g", "poster": "/media/PosterPDFs/NeurIPS%202024/96922.png?t=1731480203.951872", "project": "", "author_site": "Chiu-Wai Yan, Shi Quan Foo, Van Hoan Trinh, Dit-Yan Yeung, Ka-Hing Wong, Wai-kin Wong", "tldr": "", "abstract": "Deep learning approaches have been widely adopted for precipitation nowcasting in recent years. Previous studies mainly focus on proposing new model architectures to improve pixel-wise metrics. However, they frequently result in blurry predictions which provide limited utility to forecasting operations. In this work, we propose a new Fourier Amplitude and Correlation Loss (FACL) which consists of two novel loss terms: Fourier Amplitude Loss (FAL) and Fourier Correlation Loss (FCL). FAL regularizes the Fourier amplitude of the model prediction and FCL complements the missing phase information. The two loss terms work together to replace the traditional L2 losses such as MSE and weighted MSE for the spatiotemporal prediction problem on signal-based data. Our method is generic, parameter-free and efficient. Extensive experiments using one synthetic dataset and three radar echo datasets demonstrate that our method improves perceptual metrics and meteorology skill scores, with a small trade-off to pixel-wise accuracy and structural similarity. Moreover, to improve the error margin in meteorological skill scores such as Critical Success Index (CSI) and Fractions Skill Score (FSS), we propose and adopt the Regional Histogram Divergence (RHD), a distance metric that considers the patch-wise similarity between signal-based imagery patterns with tolerance to local transforms.", "keywords": "Precipitation Nowcasting;Video Prediction;Fourier Analysis;Loss Function", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "/attachment/ad171ebcf5647c7f5b70afbb423f259018905974.zip", "author": "Chiu-Wai Yan;Shi Quan Foo;Van-Hoan Trinh;Dit-Yan Yeung;Ka-Hing Wong;Wai-Kin Wong", "authorids": "~Chiu-Wai_Yan1;~Shi_Quan_Foo2;~Van-Hoan_Trinh2;~Dit-Yan_Yeung2;~Ka-Hing_Wong1;~Wai-Kin_Wong1", "gender": ";;;M;;", "homepage": ";;;https://cse.hkust.edu.hk/faculty/dyyeung/;;", "dblp": ";;;41/5668;;", "google_scholar": ";;;nEsOOx8AAAAJ;;", "orcid": ";;;0000-0003-3716-8125;;", "linkedin": ";;;;;", "or_profile": "~Chiu-Wai_Yan1;~Shi_Quan_Foo2;~Van-Hoan_Trinh2;~Dit-Yan_Yeung2;~Ka-Hing_Wong1;~Wai-Kin_Wong1", "aff": ";;;Hong Kong University of Science and Technology;;", "aff_domain": ";;;ust.hk;;", "position": ";;;Chair Professor;;", "bibtex": "@inproceedings{\nyan2024fourier,\ntitle={Fourier Amplitude and Correlation Loss: Beyond Using L2 Loss for Skillful Precipitation Nowcasting},\nauthor={Chiu-Wai Yan and Shi Quan Foo and Van-Hoan Trinh and Dit-Yan Yeung and Ka-Hing Wong and Wai-Kin Wong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=0aN7VWwp4g}\n}", "github": "", "reviewers": "7mGa;bJuK;w5PB", "pdf_size": 3836644, "rating": "5;6;7", "confidence": "4;4;4", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "2;3;3", "wc_summary": "61;63;44", "wc_strengths": "76;60;37", "wc_weaknesses": "134;77;95", "wc_questions": "31;116;37", "wc_limitations": "20;64;1", "wc_review": "322;380;214", "wc_reply_reviewers": "84;0;35", "wc_reply_authors": "474;0;0", "reply_reviewers": "2;0;1", "reply_authors": "3;1;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 56.0, 8.524474568362947 ], "wc_strengths_avg": [ 57.666666666666664, 16.006942938057293 ], "wc_weaknesses_avg": [ 102.0, 23.790754506740637 ], "wc_questions_avg": [ 61.333333333333336, 38.7327022323801 ], "wc_limitations_avg": [ 28.333333333333332, 26.386023236217735 ], "wc_review_avg": [ 305.3333333333333, 68.78630354624063 ], "wc_reply_reviewers_avg": [ 39.666666666666664, 34.451253807211266 ], "wc_reply_authors_avg": [ 158.0, 223.44574285494903 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12755476828255561862&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": ";;;ust.hk;;", "author_num": 6, "aff_unique_index": "0", "aff_unique_norm": "Hong Kong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.ust.hk", "aff_unique_abbr": "HKUST", "aff_campus_unique_index": "0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "title": "FlowLLM: Flow Matching for Material Generation with Large Language Models as Base Distributions", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96921", "id": "0bFXbEMz8e", "proceeding": "", "pdf": "https://openreview.net/pdf?id=0bFXbEMz8e", "openreview": "https://openreview.net/forum?id=0bFXbEMz8e", "poster": "/media/PosterPDFs/NeurIPS%202024/96921.png?t=1730973273.143089", "project": "", "author_site": "Anuroop Sriram, Benjamin K Miller, Ricky T. Q. Chen, Brandon Wood", "tldr": "", "abstract": "Material discovery is a critical area of research with the potential to revolutionize various fields, including carbon capture, renewable energy, and electronics. However, the immense scale of the chemical space makes it challenging to explore all possible materials experimentally. In this paper, we introduce FlowLLM, a novel generative model that combines large language models (LLMs) and Riemannian flow matching (RFM) to design novel crystalline materials. FlowLLM first fine-tunes an LLM to learn an effective base distribution of meta-stable crystals in a text representation. After converting to a graph representation, the RFM model takes samples from the LLM and iteratively refines the coordinates and lattice parameters. Our approach significantly outperforms state-of-the-art methods, increasing the generation rate of stable materials by over three times and increasing the rate for stable, unique, and novel crystals by $\\sim50$% \u2013 a huge improvement on a difficult problem. Additionally, the crystals generated by FlowLLM are much closer to their relaxed state when compared with another leading model, significantly reducing post-hoc computational cost.", "keywords": "generative models;material generation;chemistry;flow matching;large language models", "primary_area": "generative_models", "supplementary_material": "", "author": "Anuroop Sriram;Benjamin Kurt Miller;Ricky T. Q. Chen;Brandon M Wood", "authorids": "~Anuroop_Sriram1;~Benjamin_Kurt_Miller1;~Ricky_T._Q._Chen1;~Brandon_M_Wood1", "gender": "M;M;M;M", "homepage": "https://anuroopsriram.com;http://www.mathben.com/;https://www.bmwood.org;http://www.rtqichen.com", "dblp": "200/7951;269/9572;276/7546;228/6698", "google_scholar": "D4uRc_UAAAAJ;IrCdg_wAAAAJ;KbqboRgAAAAJ;7MxQd6UAAAAJ", "orcid": ";0000-0003-0387-8727;0000-0002-7251-337X;", "linkedin": "anuroopsriram/;benjamin-k-miller/;;", "or_profile": "~Anuroop_Sriram1;~Benjamin_Kurt_Miller1;~Brandon_M_Wood1;~Tian_Qi_Chen2", "aff": "Meta Facebook;University of Amsterdam;FAIR at Meta;FAIR Labs, Meta AI", "aff_domain": "meta.com;uva.nl;meta.com;meta.com", "position": "Principal Researcher;PhD student;Researcher;Researcher", "bibtex": "@inproceedings{\nsriram2024flowllm,\ntitle={Flow{LLM}: Flow Matching for Material Generation with Large Language Models as Base Distributions},\nauthor={Anuroop Sriram and Benjamin Kurt Miller and Ricky T. Q. Chen and Brandon M Wood},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=0bFXbEMz8e}\n}", "github": "", "reviewers": "ixaa;1vvS;sJw2", "pdf_size": 740667, "rating": "6;6;6", "confidence": "4;4;5", "soundness": "3;3;3", "novelty": "2;2;3", "presentation": "3;3;4", "wc_summary": "193;65;65", "wc_strengths": "74;39;70", "wc_weaknesses": "192;216;130", "wc_questions": "97;85;36", "wc_limitations": "7;6;26", "wc_review": "563;411;327", "wc_reply_reviewers": "133;101;101", "wc_reply_authors": "0;40;0", "reply_reviewers": "1;1;1", "reply_authors": "1;2;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 107.66666666666667, 60.339778661252055 ], "wc_strengths_avg": [ 61.0, 15.641824275533422 ], "wc_weaknesses_avg": [ 179.33333333333334, 36.23380864453651 ], "wc_questions_avg": [ 72.66666666666667, 26.386023236217735 ], "wc_limitations_avg": [ 13.0, 9.201449161228174 ], "wc_review_avg": [ 433.6666666666667, 97.67064838299966 ], "wc_reply_reviewers_avg": [ 111.66666666666667, 15.084944665313014 ], "wc_reply_authors_avg": [ 13.333333333333334, 18.856180831641264 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17384224417867090183&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "meta.com;uva.nl;meta.com;meta.com", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Meta;University of Amsterdam", "aff_unique_dep": "Meta Platforms, Inc.;", "aff_unique_url": "https://meta.com;https://www.uva.nl", "aff_unique_abbr": "Meta;UvA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;Netherlands" }, { "title": "Eye-gaze Guided Multi-modal Alignment for Medical Representation Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96920", "id": "0bINeW40u4", "proceeding": "", "pdf": "https://openreview.net/pdf?id=0bINeW40u4", "openreview": "https://openreview.net/forum?id=0bINeW40u4", "poster": "", "project": "", "author_site": "Chong Ma, Hanqi Jiang, Wenting Chen, Yiwei Li, Zihao Wu, Xiaowei Yu, Zhengliang Liu, Lei Guo, Dajiang Zhu, Tuo Zhang, Dinggang Shen, Tianming Liu, Xiang Li", "tldr": "", "abstract": "In the medical multi-modal frameworks, the alignment of cross-modality features presents a significant challenge. However, existing works have learned features that are implicitly aligned from the data, without considering the explicit relationships in the medical context. This data-reliance may lead to low generalization of the learned alignment relationships. In this work, we propose the Eye-gaze Guided Multi-modal Alignment (EGMA) framework to harness eye-gaze data for better alignment of medical visual and textual features. We explore the natural auxiliary role of radiologists' eye-gaze data in aligning medical images and text, and introduce a novel approach by using eye-gaze data, collected synchronously by radiologists during diagnostic evaluations. We conduct downstream tasks of image classification and image-text retrieval on four medical datasets, where EGMA achieved state-of-the-art performance and stronger generalization across different datasets. Additionally, we explore the impact of varying amounts of eye-gaze data on model performance, highlighting the feasibility and utility of integrating this auxiliary data into multi-modal alignment framework.", "keywords": "Medical Multi-modal Alignment;Eye-gaze;Radiology", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Chong Ma;Hanqi Jiang;Wenting Chen;Yiwei Li;Zihao Wu;Xiaowei Yu;Zhengliang Liu;Lei Guo;Dajiang Zhu;Tuo Zhang;Dinggang Shen;Tianming Liu;Xiang Li", "authorids": "~Chong_Ma2;~Hanqi_Jiang2;~Wenting_Chen1;~Yiwei_Li2;~Zihao_Wu1;~Xiaowei_Yu1;~Zhengliang_Liu1;~Lei_Guo7;~Dajiang_Zhu1;~Tuo_Zhang3;~Dinggang_Shen1;~Tianming_Liu3;~Xiang_Li14", "gender": "M;M;F;M;M;M;M;;M;M;M;M;M", "homepage": ";http://hq0709.github.io;https://scholar.google.com/citations?user=3dtKW_8AAAAJ&hl=zh-CN;https://github.com/levyisthebest;;http://shawey94.github.io/;;;https://mentis.uta.edu/explore/profile/dajiang-zhu;;;https://cobweb.cs.uga.edu/~tliu/;https://xiangli-shaun.github.io/", "dblp": ";344/1868;135/7011;;185/2651;;;;https://dblp.uni-trier.de/pers/hd/z/Zhu:Dajiang;;14/4383;96/5013-1.html;", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.hk/citations?hl=zh-CN;3dtKW_8AAAAJ;sfEPWiAAAAAJ;RMvoE4sAAAAJ;Kc1FjToAAAAJ;p8tAM0AAAAAJ;;cFgudIYAAAAJ;I9r2zF0AAAAJ;v6VYQC8AAAAJ;92RPXm0AAAAJ;MjkwwiQAAAAJ", "orcid": "0000-0002-5068-8814;;0000-0002-7457-9540;0000-0001-9830-3285;0000-0001-7483-6570;;0000-0001-7061-6714;;;;;;0000-0002-9851-6376", "linkedin": ";;;yiwei-li-84708526b/;;shawey94;;;;;;;xiang-shaun-li-11b2b99/", "or_profile": "~Chong_Ma2;~Hanqi_Jiang2;~Wenting_Chen1;~Yiwei_Li2;~Zihao_Wu1;~Xiaowei_Yu1;~Zhengliang_Liu1;~Lei_Guo7;~Dajiang_Zhu1;~Tuo_Zhang3;~Dinggang_Shen1;~Tianming_Liu3;~Xiang_Li14", "aff": "Northwest Polytechnical University Xi'an;Beijing Jiaotong University;City University of Hong Kong;University of Georgia;University of Georgia;University of Texas at Arlington, University of Texas at Arlington;University of Georgia;;University of Texas at Arlington;Northwestern Polytechnical University;ShanghaiTech University;University of Georgia;Massachusetts General Hospital, Harvard University", "aff_domain": "nwpu.edu.cn;bjtu.edu.cn;cityu.edu.hk;uga.edu;uga.edu;mavs.uta.edu;uga.edu;;uta.edu;nwpu.edu.cn;shanghaitech.edu.cn;uga.edu;mgh.harvard.edu", "position": "PhD student;Undergrad student;PhD student;PhD student;PhD student;PhD student;PhD student;;Assistant Professor;Associate Professor;Full Professor;Professor;Assistant Professor", "bibtex": "@inproceedings{\nma2024eyegaze,\ntitle={Eye-gaze Guided Multi-modal Alignment for Medical Representation Learning},\nauthor={Chong Ma and Hanqi Jiang and Wenting Chen and Yiwei Li and Zihao Wu and Xiaowei Yu and Zhengliang Liu and Lei Guo and Dajiang Zhu and Tuo Zhang and Dinggang Shen and Tianming Liu and Xiang Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=0bINeW40u4}\n}", "github": "", "reviewers": "n21y;hPhG;V2ie;YZxW", "pdf_size": 1756046, "rating": "5;5;7;8", "confidence": "2;4;4;4", "soundness": "3;2;4;4", "novelty": "3;2;4;4", "presentation": "3;2;3;4", "wc_summary": "81;34;40;100", "wc_strengths": "173;12;69;186", "wc_weaknesses": "77;225;145;58", "wc_questions": "64;46;63;33", "wc_limitations": "1;3;28;67", "wc_review": "396;320;345;444", "wc_reply_reviewers": "0;21;0;154", "wc_reply_authors": "0;27;0;0", "reply_reviewers": "0;1;0;1", "reply_authors": "1;2;1;1", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 63.75, 27.66202270261522 ], "wc_strengths_avg": [ 110.0, 72.50862017718997 ], "wc_weaknesses_avg": [ 126.25, 65.54912280114814 ], "wc_questions_avg": [ 51.5, 12.854960132182441 ], "wc_limitations_avg": [ 24.75, 26.61179249881526 ], "wc_review_avg": [ 376.25, 47.75130888258457 ], "wc_reply_reviewers_avg": [ 43.75, 64.22762256225899 ], "wc_reply_authors_avg": [ 6.75, 11.691342951089922 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 13, 0 ], "corr_rating_confidence": 0.5555555555555555, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8602216241691121726&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "nwpu.edu.cn;bjtu.edu.cn;cityu.edu.hk;uga.edu;uga.edu;mavs.uta.edu;uga.edu;;uta.edu;nwpu.edu.cn;shanghaitech.edu.cn;uga.edu;mgh.harvard.edu", "author_num": 13, "aff_unique_index": "0;1;2;3;3;4;3;4;5;6;3;7", "aff_unique_norm": "Northwest Polytechnical University;Beijing Jiao Tong University;City University of Hong Kong;University of Georgia;University of Texas at Arlington;Northwestern Polytechnical University;ShanghaiTech University;Harvard University", "aff_unique_dep": ";;;;;;;Massachusetts General Hospital", "aff_unique_url": "http://www.nwpu.edu.cn;http://www.njtu.edu.cn/en;https://www.cityu.edu.hk;https://www.uga.edu;https://www.uta.edu;https://www.nwpu.edu.cn;https://www.shanghaitech.edu.cn;https://www.harvard.edu", "aff_unique_abbr": "NWPU;BJTU;CityU;UGA;UTA;NWPU;ShanghaiTech;Harvard", "aff_campus_unique_index": "0;2;3", "aff_campus_unique": "Xi'an;;Hong Kong SAR;Arlington", "aff_country_unique_index": "0;0;0;1;1;1;1;1;0;0;1;1", "aff_country_unique": "China;United States" }, { "title": "Generalization of Hamiltonian algorithms", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96919", "id": "0cSQ1Sg7db", "proceeding": "", "pdf": "https://openreview.net/pdf?id=0cSQ1Sg7db", "openreview": "https://openreview.net/forum?id=0cSQ1Sg7db", "poster": "", "project": "", "tldr": "", "abstract": "A method to prove generalization results for a class of stochastic learning algorithms is presented. It applies whenever the algorithm generates a distribution, which is absolutely continuous distribution relative to some a-priori measure, and the logarithm of its density is exponentially concentrated about its mean. Applications include bounds for the Gibbs algorithm and randomizations of stable deterministic algorithms, combinations thereof and PAC-Bayesian bounds with data-dependent priors.", "keywords": "Generalization;stochastic algorithms;concentration inequalities;PAC-Bayes", "primary_area": "learning_theory", "supplementary_material": "", "author": "Andreas Maurer", "authorids": "~Andreas_Maurer1", "gender": "", "homepage": "", "dblp": "69/6428", "google_scholar": "", "orcid": "", "linkedin": "", "or_profile": "~Andreas_Maurer1", "aff": "", "aff_domain": "", "position": "", "bibtex": "@inproceedings{\nmaurer2024generalization,\ntitle={Generalization of Hamiltonian algorithms},\nauthor={Andreas Maurer},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=0cSQ1Sg7db}\n}", "github": "", "reviewers": "Dxrq;hAd4;mcjd;16mk", "pdf_size": 442936, "rating": "6;6;6;8", "confidence": "4;2;4;2", "soundness": "3;2;4;4", "novelty": "3;2;3;3", "presentation": "3;2;4;4", "wc_summary": "39;416;105;62", "wc_strengths": "41;2;196;103", "wc_weaknesses": "64;2;170;78", "wc_questions": "2;2;212;106", "wc_limitations": "2;2;1;64", "wc_review": "148;424;684;413", "wc_reply_reviewers": "22;17;37;24", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.0, 1.0 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 155.5, 152.25389978585113 ], "wc_strengths_avg": [ 85.5, 73.26151786579364 ], "wc_weaknesses_avg": [ 78.5, 60.07287241342801 ], "wc_questions_avg": [ 80.5, 86.98706800438787 ], "wc_limitations_avg": [ 17.25, 26.994212342648563 ], "wc_review_avg": [ 417.25, 189.54864151451997 ], "wc_reply_reviewers_avg": [ 25.0, 7.3824115301167 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14305794432617570410&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 3, "email": "", "author_num": 1 }, { "title": "Sourcerer: Sample-based Maximum Entropy Source Distribution Estimation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96918", "id": "0cgDDa4OFr", "proceeding": "", "pdf": "https://openreview.net/pdf?id=0cgDDa4OFr", "openreview": "https://openreview.net/forum?id=0cgDDa4OFr", "poster": "/media/PosterPDFs/NeurIPS%202024/96918.png?t=1733318904.6928847", "project": "", "author_site": "Julius Vetter, Guy Moss, Cornelius Schr\u00f6der, Richard Gao, Jakob H Macke", "tldr": "", "abstract": "Scientific modeling applications often require estimating a distribution of parameters consistent with a dataset of observations - an inference task also known as source distribution estimation. This problem can be ill-posed, however, since many different source distributions might produce the same distribution of data-consistent simulations. To make a principled choice among many equally valid sources, we propose an approach which targets the maximum entropy distribution, i.e., prioritizes retaining as much uncertainty as possible. Our method is purely sample-based - leveraging the Sliced-Wasserstein distance to measure the discrepancy between the dataset and simulations - and thus suitable for simulators with intractable likelihoods. We benchmark our method on several tasks, and show that it can recover source distributions with substantially higher entropy than recent source estimation methods, without sacrificing the fidelity of the simulations. Finally, to demonstrate the utility of our approach, we infer source distributions for parameters of the Hodgkin-Huxley model from experimental datasets with hundreds of single-neuron measurements. In summary, we propose a principled method for inferring source distributions of scientific simulator parameters while retaining as much uncertainty as possible.", "keywords": "source distribution estimation;maximum entropy;Sliced-Wasserstein distance;empirical Bayes;simulation-based inference", "primary_area": "probabilistic_methods", "supplementary_material": "/attachment/7782a0e8c10b8a97fe73918bc180c6e0b224848f.zip", "author": "Julius Vetter;Guy Moss;Cornelius Schr\u00f6der;Richard Gao;Jakob H. Macke", "authorids": "~Julius_Vetter2;~Guy_Moss1;~Cornelius_Schr\u00f6der1;~Richard_Gao1;~Jakob_H._Macke1", "gender": ";M;M;;M", "homepage": ";;;https://www.rdgao.com/;http://www.mackelab.org", "dblp": "368/7633.html;363/7754;255/6950;211/3796;97/11106", "google_scholar": "hk1YdnwAAAAJ;pmbqojcAAAAJ;https://scholar.google.de/citations?user=HpLWKHEAAAAJ;a2o9IKYAAAAJ;FKOqtF8AAAAJ", "orcid": "0009-0003-9839-9039;0000-0002-5504-0523;0000-0001-5643-2097;;0000-0001-5154-8912", "linkedin": ";guy-moss-532619221/;;;", "or_profile": "~Julius_Vetter2;~Guy_Moss1;~Cornelius_Schr\u00f6der1;~Richard_Gao1;~Jakob_H_Macke1", "aff": "Eberhard-Karls-Universit\u00e4t T\u00fcbingen;Eberhard-Karls-Universit\u00e4t T\u00fcbingen;University T\u00fcbingen;Eberhard-Karls-Universit\u00e4t T\u00fcbingen;University of Tuebingen", "aff_domain": "uni-tuebingen.de;uni-tuebingen.de;uni-tuebingen.de;uni-tuebingen.de;uni-tuebingen.de", "position": "PhD student;PhD student;Postdoc;Postdoc;Full Professor", "bibtex": "@inproceedings{\nvetter2024sourcerer,\ntitle={Sourcerer: Sample-based Maximum Entropy Source Distribution Estimation},\nauthor={Julius Vetter and Guy Moss and Cornelius Schr{\\\"o}der and Richard Gao and Jakob H. Macke},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=0cgDDa4OFr}\n}", "github": "", "reviewers": "swSP;J2pY;JA3k;8Zuo;DWPT", "pdf_size": 8059710, "rating": "4;5;6;7;7", "confidence": "3;2;3;3;3", "soundness": "3;3;3;3;3", "novelty": "2;2;3;3;3", "presentation": "3;3;3;4;4", "wc_summary": "59;133;318;94;61", "wc_strengths": "65;171;6;121;48", "wc_weaknesses": "128;91;278;117;38", "wc_questions": "98;159;7;14;96", "wc_limitations": "3;113;1;7;22", "wc_review": "353;667;610;353;265", "wc_reply_reviewers": "0;13;15;18;75", "wc_reply_authors": "0;7;7;7;7", "reply_reviewers": "0;1;1;1;1", "reply_authors": "1;2;2;2;2", "rating_avg": [ 5.8, 1.16619037896906 ], "confidence_avg": [ 2.8, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 133.0, 96.33898483999093 ], "wc_strengths_avg": [ 82.2, 57.72832926735365 ], "wc_weaknesses_avg": [ 130.4, 80.07646345837209 ], "wc_questions_avg": [ 74.8, 57.22027612656199 ], "wc_limitations_avg": [ 29.2, 42.54127407589011 ], "wc_review_avg": [ 449.6, 158.57566017519838 ], "wc_reply_reviewers_avg": [ 24.2, 26.13350340080717 ], "wc_reply_authors_avg": [ 5.6, 2.8000000000000003 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.8, 0.4000000000000001 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.3429971702850177, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9542186682018156139&as_sdt=20000005&sciodt=0,21&hl=en", "gs_version_total": 6, "email": "uni-tuebingen.de;uni-tuebingen.de;uni-tuebingen.de;uni-tuebingen.de;uni-tuebingen.de", "author_num": 5, "aff_unique_index": "0;0;1;0;2", "aff_unique_norm": "Eberhard Karls University of T\u00fcbingen;University of T\u00fcbingen;University of Tuebingen", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uni-tuebingen.de/;https://www.uni-tuebingen.de/;https://www.uni-tuebingen.de/", "aff_unique_abbr": "Uni T\u00fcbingen;Uni T\u00fcbingen;Uni T\u00fcbingen", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "T\u00fcbingen;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Germany" }, { "title": "Non-parametric classification via expand-and-sparsify representation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96917", "id": "0d50Il6enG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=0d50Il6enG", "openreview": "https://openreview.net/forum?id=0d50Il6enG", "poster": "/media/PosterPDFs/NeurIPS%202024/96917.png?t=1731697679.5420048", "project": "", "tldr": "", "abstract": "In *expand-and-sparsify* (EaS) representation, a data point in $\\mathcal{S}^{d-1}$ is first randomly mapped to higher dimension $\\mathbb{R}^m$, where $m>d$, followed by a sparsification operation where the informative $k \\ll m$ of the $m$ coordinates are set to one and the rest are set to zero. We propose two algorithms for non-parametric classification using such EaS representation. For our first algorithm, we use *winners-take-all* operation for the sparsification step and show that the proposed classifier admits the form of a locally weighted average classifier and establish its consistency via Stone's Theorem. Further, assuming that the conditional probability function $P(y=1|x)=\\eta(x)$ is H\\\"{o}lder continuous and for optimal choice of $m$, we show that the convergence rate of this classifier is minimax-optimal. For our second algorithm, we use *empirical $k$-thresholding* operation for the sparsification step, and under the assumption that data lie on a low dimensional manifold of dimension $d_0\\ll d$, we show that the convergence rate of this classifier depends only on $d_0$ and is again minimax-optimal. Empirical evaluations performed on real-world datasets corroborate our theoretical results.", "keywords": "non-parametric regression;non-parametric classification;expand-and-sparsify representation;universal consistency;minimax-optimal convergence rate", "primary_area": "other", "supplementary_material": "/attachment/03522da8d4ce09bb2f480613ce56c5470f3cf125.zip", "author": "Kaushik Sinha", "authorids": "~Kaushik_Sinha1", "gender": "M", "homepage": "https://kaushik-sinha.github.io/", "dblp": "64/2141", "google_scholar": "_jAhb40AAAAJ", "orcid": "", "linkedin": "", "or_profile": "~Kaushik_Sinha1", "aff": "Wichita State University", "aff_domain": "wichita.edu", "position": "Associate Professor", "bibtex": "@inproceedings{\nsinha2024nonparametric,\ntitle={Non-parametric classification via expand-and-sparsify representation},\nauthor={Kaushik Sinha},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=0d50Il6enG}\n}", "github": "", "reviewers": "E4XV;4EZA;Q16i;QQJv", "pdf_size": 783848, "rating": "5;6;6;7", "confidence": "2;3;4;3", "soundness": "3;3;4;3", "novelty": "3;2;2;3", "presentation": "3;3;3;3", "wc_summary": "159;105;80;150", "wc_strengths": "33;24;191;65", "wc_weaknesses": "86;91;126;29", "wc_questions": "43;72;317;74", "wc_limitations": "25;7;6;1", "wc_review": "346;299;720;319", "wc_reply_reviewers": "59;17;15;11", "wc_reply_authors": "840;40;37;37", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 123.5, 32.39212867349104 ], "wc_strengths_avg": [ 78.25, 66.85572152030072 ], "wc_weaknesses_avg": [ 83.0, 34.777866524558405 ], "wc_questions_avg": [ 126.5, 110.66729417492776 ], "wc_limitations_avg": [ 9.75, 9.093266739736606 ], "wc_review_avg": [ 421.0, 173.43154269048063 ], "wc_reply_reviewers_avg": [ 25.5, 19.461500456028563 ], "wc_reply_authors_avg": [ 238.5, 347.2783465751932 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:tT0iwjUl5uQJ:scholar.google.com/&scioq=Non-parametric+classification+via+expand-and-sparsify+representation&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": "wichita.edu", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "Wichita State University", "aff_unique_dep": "", "aff_unique_url": "https://www.wichita.edu", "aff_unique_abbr": "WSU", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "DeNetDM: Debiasing by Network Depth Modulation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96916", "id": "0dtA21q83C", "proceeding": "", "pdf": "https://openreview.net/pdf?id=0dtA21q83C", "openreview": "https://openreview.net/forum?id=0dtA21q83C", "poster": "/media/PosterPDFs/NeurIPS%202024/96916.png?t=1733689557.490417", "project": "", "author_site": "Silpa Vadakkeeveetil Sreelatha, Adarsh Kappiyath, ABHRA CHAUDHURI, Anjan Dutta", "tldr": "", "abstract": "Neural networks trained on biased datasets tend to inadvertently learn spurious correlations, hindering generalization. We formally prove that (1) samples that exhibit spurious correlations lie on a lower rank manifold relative to the ones that do not; and (2) the depth of a network acts as an implicit regularizer on the rank of the attribute subspace that is encoded in its representations. Leveraging these insights, we present DeNetDM, a novel debiasing method that uses network depth modulation as a way of developing robustness to spurious correlations. Using a training paradigm derived from Product of Experts, we create both biased and debiased branches with deep and shallow architectures and then distill knowledge to produce the target debiased model. Our method requires no bias annotations or explicit data augmentation while performing on par with approaches that require either or both. We demonstrate that DeNetDM outperforms existing debiasing techniques on both synthetic and real-world datasets by 5\\%. The project page is available at https://vssilpa.github.io/denetdm/.", "keywords": "Trustworthy Machine Learning;Debiasing;Robustness", "primary_area": "fairness", "supplementary_material": "", "author": "Silpa Vadakkeeveetil Sreelatha;Adarsh Kappiyath;Abhra Chaudhuri;Anjan Dutta", "authorids": "~Silpa_Vadakkeeveetil_Sreelatha1;~Adarsh_Kappiyath1;~Abhra_Chaudhuri1;~Anjan_Dutta1", "gender": "F;;M;M", "homepage": ";;https://sites.google.com/view/abhrachaudhuri/;https://sites.google.com/site/2adutta/", "dblp": "268/1342;;330/4583;91/8278-1", "google_scholar": "wg72gbUAAAAJ;;6KWxpxkAAAAJ;https://scholar.google.co.uk/citations?user=1aKTzmIAAAAJ", "orcid": ";;;0000-0002-1667-2245", "linkedin": ";;abhra-chaudhuri-126a09150;anjan-dutta-a97b4071/", "or_profile": "~Silpa_Vadakkeeveetil_Sreelatha1;~Adarsh_Kappiyath1;~Abhra_Chaudhuri1;~Anjan_Dutta1", "aff": "University of Surrey;;University of Exeter;University of Surrey", "aff_domain": "surrey.ac.uk;;exeter.ac.uk;surrey.ac.uk", "position": "PhD student;;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nsreelatha2024denetdm,\ntitle={DeNet{DM}: Debiasing by Network Depth Modulation},\nauthor={Silpa Vadakkeeveetil Sreelatha and Adarsh Kappiyath and Abhra Chaudhuri and Anjan Dutta},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=0dtA21q83C}\n}", "github": "", "reviewers": "9dFt;Bkbf;CcWa;2yWj", "pdf_size": 3556950, "rating": "5;6;6;7", "confidence": "3;4;3;4", "soundness": "3;4;3;3", "novelty": "2;3;3;3", "presentation": "3;4;4;3", "wc_summary": "93;77;58;99", "wc_strengths": "150;95;37;91", "wc_weaknesses": "291;161;105;205", "wc_questions": "74;4;2;54", "wc_limitations": "10;10;2;21", "wc_review": "618;347;204;470", "wc_reply_reviewers": "18;16;150;126", "wc_reply_authors": "0;0;489;140", "reply_reviewers": "1;1;1;2", "reply_authors": "1;1;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 81.75, 15.896147331979533 ], "wc_strengths_avg": [ 93.25, 39.97733733004238 ], "wc_weaknesses_avg": [ 190.5, 67.99080820228569 ], "wc_questions_avg": [ 33.5, 31.316928329579195 ], "wc_limitations_avg": [ 10.75, 6.7592529172978875 ], "wc_review_avg": [ 409.75, 152.69966437422187 ], "wc_reply_reviewers_avg": [ 77.5, 61.096235563248904 ], "wc_reply_authors_avg": [ 157.25, 199.88168375316434 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.7071067811865476, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7338564153015473865&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "surrey.ac.uk;;exeter.ac.uk;surrey.ac.uk", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Surrey;University of Exeter", "aff_unique_dep": ";", "aff_unique_url": "https://www.surrey.ac.uk;https://www.exeter.ac.uk", "aff_unique_abbr": "Surrey;Exeter", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Learning Complete Protein Representation by Dynamically Coupling of Sequence and Structure", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96915", "id": "0e5uOaJxo1", "proceeding": "", "pdf": "https://openreview.net/pdf?id=0e5uOaJxo1", "openreview": "https://openreview.net/forum?id=0e5uOaJxo1", "poster": "/media/PosterPDFs/NeurIPS%202024/96915.png?t=1730385968.8958924", "project": "", "author_site": "Bozhen Hu, Cheng Tan, Jun Xia, Yue Liu, Lirong Wu, Jiangbin Zheng, Yongjie Xu, Yufei Huang, Stan Z. Li", "tldr": "", "abstract": "Learning effective representations is imperative for comprehending proteins and deciphering their biological functions. Recent strides in language models and graph neural networks have empowered protein models to harness primary or tertiary structure information for representation learning. Nevertheless, the absence of practical methodologies to appropriately model intricate inter-dependencies between protein sequences and structures has resulted in embeddings that exhibit low performance on tasks such as protein function prediction. In this study, we introduce CoupleNet, a novel framework designed to interlink protein sequences and structures to derive informative protein representations. CoupleNet integrates multiple levels and scales of features in proteins, encompassing residue identities and positions for sequences, as well as geometric representations for tertiary structures from both local and global perspectives. A two-type dynamic graph is constructed to capture adjacent and distant sequential features and structural geometries, achieving completeness at the amino acid and backbone levels. Additionally, convolutions are executed on nodes and edges simultaneously to generate comprehensive protein embeddings. Experimental results on benchmark datasets showcase that CoupleNet outperforms state-of-the-art methods, exhibiting particularly superior performance in low-sequence similarities scenarios, adeptly identifying infrequently encountered functions and effectively capturing remote homology relationships in proteins.", "keywords": "Graph Neural Network;Protein Sequence;Structure", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Bozhen Hu;Cheng Tan;Jun Xia;Yue Liu;Lirong Wu;Jiangbin Zheng;Yongjie Xu;Yufei Huang;Stan Z. Li", "authorids": "~Bozhen_Hu1;~Cheng_Tan1;~Jun_Xia1;~Yue_Liu10;~Lirong_Wu1;~Jiangbin_Zheng3;~Yongjie_Xu2;~Yufei_Huang4;~Stan_Z._Li2", "gender": "M;M;M;M;;M;M;M;M", "homepage": ";https://chengtan9907.github.io/;http://junxia97.github.io/;https://yueliu1999.github.io/;;;;https://2021.igem.org/Team:ZJU-China;https://en.westlake.edu.cn/academics/School_of_Engineering/About/Our_People/Faculty/201912/t20191206_2497.shtml", "dblp": "279/8665;70/1533-12.html;;74/1932-8;15/10330;;123/9257.html;68/1946-2;l/StanZLi", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;6kTV6aMAAAAJ;aPKKpSYAAAAJ;5tfpu3MAAAAJ;Tk7TrCoAAAAJ;;https://scholar.google.com.hk/citations?user=ciG27FYAAAAJ;qmTjdwIAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": "0000-0002-3428-0114;;;;;0000-0003-3305-0103;0000-0002-6045-1626;0009-0007-8184-4529;", "linkedin": ";;;;;;;;stan-z-li-%E6%9D%8E%E5%AD%90%E9%9D%92-55753224/", "or_profile": "~Bozhen_Hu1;~Cheng_Tan1;~Jun_Xia1;~Yue_Liu10;~Lirong_Wu1;~Jiangbin_Zheng3;~Yongjie_Xu2;~Yufei_Huang4;~Stan_Z._Li1", "aff": "Westlake University;Zhejiang University & Westlake University;Westlake University, China;University of Illinois, Urbana Champaign;Westlake University;Westlake University;Westlake University;Zhejiang University;Westlake University", "aff_domain": "westlake.edu.cn;westlake.edu.cn;westlake.edu.cn;uiuc.edu;westlake.edu.cn;westlake.edu.cn;westlake.edu.cn;zju.edu.cn;westlake.edu.cn", "position": "PhD student;PhD student;PhD student;Intern;PhD student;PhD student;PhD student;PhD student;Chair Professor", "bibtex": "@inproceedings{\nhu2024learning,\ntitle={Learning Complete Protein Representation by Dynamically Coupling of Sequence and Structure},\nauthor={Bozhen Hu and Cheng Tan and Jun Xia and Yue Liu and Lirong Wu and Jiangbin Zheng and Yongjie Xu and Yufei Huang and Stan Z. Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=0e5uOaJxo1}\n}", "github": "", "reviewers": "JwZm;ANTD;fLEX", "pdf_size": 2658652, "rating": "5;5;5", "confidence": "3;3;4", "soundness": "3;2;1", "novelty": "2;2;3", "presentation": "2;2;2", "wc_summary": "69;63;52", "wc_strengths": "28;29;35", "wc_weaknesses": "356;47;253", "wc_questions": "177;71;31", "wc_limitations": "82;1;8", "wc_review": "712;211;379", "wc_reply_reviewers": "19;51;149", "wc_reply_authors": "122;123;129", "reply_reviewers": "1;1;1", "reply_authors": "3;3;3", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.0, 0.816496580927726 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 61.333333333333336, 7.039570693980959 ], "wc_strengths_avg": [ 30.666666666666668, 3.0912061651652345 ], "wc_weaknesses_avg": [ 218.66666666666666, 128.46357027923867 ], "wc_questions_avg": [ 93.0, 61.60086579478138 ], "wc_limitations_avg": [ 30.333333333333332, 36.64544840616484 ], "wc_review_avg": [ 434.0, 208.1970220728433 ], "wc_reply_reviewers_avg": [ 73.0, 55.3052137385497 ], "wc_reply_authors_avg": [ 124.66666666666667, 3.0912061651652345 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:JEzInv6GykAJ:scholar.google.com/&scioq=Learning+Complete+Protein+Representation+by+Dynamically+Coupling+of+Sequence+and+Structure&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "westlake.edu.cn;westlake.edu.cn;westlake.edu.cn;uiuc.edu;westlake.edu.cn;westlake.edu.cn;westlake.edu.cn;zju.edu.cn;westlake.edu.cn", "author_num": 9, "aff_unique_index": "0;1;0;2;0;0;0;1;0", "aff_unique_norm": "Westlake University;Zhejiang University;University of Illinois Urbana-Champaign", "aff_unique_dep": ";;", "aff_unique_url": "https://www.westlake.edu.cn;http://www.zju.edu.cn;https://illinois.edu", "aff_unique_abbr": "WU;ZJU;UIUC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "0;0;0;1;0;0;0;0;0", "aff_country_unique": "China;United States" }, { "title": "Mobility-LLM: Learning Visiting Intentions and Travel Preference from Human Mobility Data with Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96914", "id": "0feJEykDRx", "proceeding": "", "pdf": "https://openreview.net/pdf?id=0feJEykDRx", "openreview": "https://openreview.net/forum?id=0feJEykDRx", "poster": "/media/PosterPDFs/NeurIPS%202024/96914.png?t=1731289337.5941467", "project": "", "author_site": "Letian Gong, Yan Lin, Xinyue Zhang, Yiwen Lu, Xuedi Han, Yichen Liu, Shengnan Guo, Youfang Lin, Huaiyu Wan", "tldr": "", "abstract": "Location-based services (LBS) have accumulated extensive human mobility data on diverse behaviors through check-in sequences. These sequences offer valuable insights into users\u2019 intentions and preferences. Yet, existing models analyzing check-in sequences fail to consider the semantics contained in these sequences, which closely reflect human visiting intentions and travel preferences, leading to an incomplete comprehension. Drawing inspiration from the exceptional semantic understanding and contextual information processing capabilities of large language models (LLMs) across various domains, we present Mobility-LLM, a novel framework that leverages LLMs to analyze check-in sequences for multiple tasks. Since LLMs cannot directly interpret check-ins, we reprogram these sequences to help LLMs comprehensively understand the semantics of human visiting intentions and travel preferences. \nSpecifically, we introduce a visiting intention memory network (VIMN) to capture the visiting intentions at each record, along with a shared pool of human travel preference prompts (HTPP) to guide the LLM in understanding users\u2019 travel preferences. These components enhance the model\u2019s ability to extract and leverage semantic information from human mobility data effectively. Extensive experiments on four benchmark datasets and three downstream tasks demonstrate that our approach significantly outperforms existing models, underscoring the effectiveness of Mobility-LLM in advancing our understanding of human mobility data within LBS contexts.", "keywords": "spatial-temporal data mining;location-based service;check-in sequence;large language model", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Letian Gong;Yan Lin;Xinyue Zhang;Yiwen Lu;Xuedi Han;Yichen Liu;Shengnan Guo;Youfang Lin;Huaiyu Wan", "authorids": "~Letian_Gong1;~Yan_Lin1;~Xinyue_Zhang9;~Yiwen_Lu5;~Xuedi_Han1;~Yichen_Liu7;~Shengnan_Guo1;~Youfang_Lin1;~Huaiyu_Wan1", "gender": "M;M;;F;;F;F;M;M", "homepage": ";https://www.yanlincs.com;https://orcid.org/;https://EvenEureka.github.io;https://github.com/hanxuedi;https://github.com/yichenliuzong;http://faculty.bjtu.edu.cn/9685/;https://faculty.bjtu.edu.cn/7443/;https://faculty.bjtu.edu.cn/8793/", "dblp": ";27/586-6;;;;72/8807-3;163/0779-1;12/4988;07/9988", "google_scholar": ";nHMmG2UAAAAJ;;;;;3JsSBYsAAAAJ;e8xT-e0AAAAJ;T5wVWIUAAAAJ", "orcid": "0009-0003-9960-8154;0000-0002-2320-9777;;;;0009-0000-6217-2113;0000-0002-3008-4511;0000-0002-5143-3645;0000-0002-0501-9363", "linkedin": ";;;;;;;youfang-lin-a1625091/;", "or_profile": "~Letian_Gong1;~Yan_Lin1;~Xinyue_Zhang9;~Yiwen_Lu5;~Xuedi_Han1;~Yichen_Liu7;~Shengnan_Guo1;~Youfang_Lin1;~Huaiyu_Wan1", "aff": "Beijing Jiaotong University;Beijing Jiaotong University;Beijing Jiaotong University;Beijing Jiaotong University;Beijing Jiaotong University;Beijing Jiaotong University;Beijing Jiaotong University;Beijing Jiaotong University;Beijing Jiaotong University", "aff_domain": "bjtu.edu.cn;bjtu.edu.cn;bjtu.edu.cn;bjtu.edu.cn;bjtu.edu.cn;bjtu.edu.cn;bjtu.edu.cn;bjtu.edu.cn;bjtu.edu.cn", "position": "PhD student;PhD student;Undergrad student;Undergrad student;Undergrad student;MS student;Associate Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\ngong2024mobilityllm,\ntitle={Mobility-{LLM}: Learning Visiting Intentions and Travel Preference from Human Mobility Data with Large Language Models},\nauthor={Letian Gong and Yan Lin and Xinyue Zhang and Yiwen Lu and Xuedi Han and Yichen Liu and Shengnan Guo and Youfang Lin and Huaiyu Wan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=0feJEykDRx}\n}", "github": "", "reviewers": "NycQ;6rUV;Mti9;wrzL", "pdf_size": 3283440, "rating": "4;5;7;7", "confidence": "4;4;4;4", "soundness": "2;3;4;3", "novelty": "2;2;3;3", "presentation": "3;3;3;3", "wc_summary": "117;94;98;59", "wc_strengths": "52;55;88;64", "wc_weaknesses": "181;59;160;87", "wc_questions": "43;129;69;86", "wc_limitations": "7;7;28;43", "wc_review": "400;344;443;339", "wc_reply_reviewers": "47;1124;38;9", "wc_reply_authors": "476;2941;82;0", "reply_reviewers": "1;3;1;1", "reply_authors": "2;9;2;1", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 92.0, 20.940391591371924 ], "wc_strengths_avg": [ 64.75, 14.13108276106258 ], "wc_weaknesses_avg": [ 121.75, 50.29599884682677 ], "wc_questions_avg": [ 81.75, 31.283981524096323 ], "wc_limitations_avg": [ 21.25, 15.20485119953497 ], "wc_review_avg": [ 381.5, 42.82814495165533 ], "wc_reply_reviewers_avg": [ 304.5, 473.3468601353557 ], "wc_reply_authors_avg": [ 874.75, 1206.4442330667423 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 3.5, 3.2015621187164243 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11727192210468579308&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "bjtu.edu.cn;bjtu.edu.cn;bjtu.edu.cn;bjtu.edu.cn;bjtu.edu.cn;bjtu.edu.cn;bjtu.edu.cn;bjtu.edu.cn;bjtu.edu.cn", "author_num": 9, "aff_unique_index": "0;0;0;0;0;0;0;0;0", "aff_unique_norm": "Beijing Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "http://www.njtu.edu.cn/en", "aff_unique_abbr": "BJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Neural collapse vs. low-rank bias: Is deep neural collapse really optimal?", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96913", "id": "0jld45XGgJ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=0jld45XGgJ", "openreview": "https://openreview.net/forum?id=0jld45XGgJ", "poster": "", "project": "", "author_site": "Peter S\u00faken\u00edk, Christoph Lampert, Marco Mondelli", "tldr": "", "abstract": "Deep neural networks (DNNs) exhibit a surprising structure in their final layer known as neural collapse (NC), and a growing body of works is currently investigated the propagation of neural collapse to earlier layers of DNNs -- a phenomenon called deep neural collapse (DNC). However, existing theoretical results are restricted to either linear models, the last two layers or binary classification. In contrast, we focus on non-linear models of arbitrary depth in multi-class classification and reveal a surprising qualitative shift. As soon as we go beyond two layers or two classes, DNC stops being optimal for the deep unconstrained features model (DUFM) -- the standard theoretical framework for the analysis of collapse. The main culprit is the low-rank bias of multi-layer regularization schemes. This bias leads to optimal solutions of even lower rank than the neural collapse. We support our theoretical findings with experiments on both DUFM and real data, which show the emergence of the low-rank structure in the solution found by gradient descent.", "keywords": "neural collapse;deep neural collapse;unconstrained features model;deep unconstrained features model;low-rank bias", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Peter S\u00faken\u00edk;Christoph H. Lampert;Marco Mondelli", "authorids": "~Peter_S\u00faken\u00edk1;~Christoph_H._Lampert6;~Marco_Mondelli1", "gender": "M;M;M", "homepage": "https://research-explorer.app.ist.ac.at/person/d64d6a8d-eb8e-11eb-b029-96fd216dec3c;http://marcomondelli.com;http://cvml.ist.ac.at/", "dblp": "304/2274;120/7089;67/2136", "google_scholar": "qEhrUDAAAAAJ;BHdSb5AAAAAJ;https://scholar.google.at/citations?user=iCf3SwgAAAAJ", "orcid": ";;0000-0001-8622-7887", "linkedin": ";;", "or_profile": "~Peter_S\u00faken\u00edk1;~Marco_Mondelli1;~Christoph_H_Lampert1", "aff": "Institute of Science and Technology;Institute of Science and Technology;Institute of Science and Technology Austria", "aff_domain": "ist.ac.at;ist.ac.at;ist.ac.at", "position": "PhD student;Assistant Professor;Professor", "bibtex": "@inproceedings{\ns{\\'u}ken{\\'\\i}k2024neural,\ntitle={Neural collapse vs. low-rank bias: Is deep neural collapse really optimal?},\nauthor={Peter S{\\'u}ken{\\'\\i}k and Christoph H. Lampert and Marco Mondelli},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=0jld45XGgJ}\n}", "github": "", "reviewers": "hQr6;jQsy;dZSf;kT8c", "pdf_size": 1784118, "rating": "5;6;6;9", "confidence": "4;2;4;5", "soundness": "2;4;3;4", "novelty": "3;3;3;4", "presentation": "2;4;3;4", "wc_summary": "86;303;238;104", "wc_strengths": "53;72;43;19", "wc_weaknesses": "289;28;59;12", "wc_questions": "135;84;208;1", "wc_limitations": "1;1;1;6", "wc_review": "564;488;549;142", "wc_reply_reviewers": "73;20;26;285", "wc_reply_authors": "169;5;5;0", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;2;1", "rating_avg": [ 6.5, 1.5 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 182.75, 90.93232373584215 ], "wc_strengths_avg": [ 46.75, 19.109879643786353 ], "wc_weaknesses_avg": [ 97.0, 112.13161909113771 ], "wc_questions_avg": [ 107.0, 75.41551564499179 ], "wc_limitations_avg": [ 2.25, 2.165063509461097 ], "wc_review_avg": [ 435.75, 171.9685654414783 ], "wc_reply_reviewers_avg": [ 101.0, 108.19658035261558 ], "wc_reply_authors_avg": [ 44.75, 71.76480683454808 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5353033790313108, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14362352551278789784&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 2, "email": "ist.ac.at;ist.ac.at;ist.ac.at", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Institute of Science and Technology;Institute of Science and Technology Austria", "aff_unique_dep": ";", "aff_unique_url": ";https://www.ist.ac.at", "aff_unique_abbr": ";IST Austria", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1", "aff_country_unique": ";Austria" }, { "title": "Near-Optimal Distributionally Robust Reinforcement Learning with General $L_p$ Norms", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96912", "id": "0l9yGPTHAU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=0l9yGPTHAU", "openreview": "https://openreview.net/forum?id=0l9yGPTHAU", "poster": "", "project": "", "author_site": "Pierre Clavier, Laixi Shi, Erwan Le Pennec, Eric Mazumdar, Adam Wierman, Matthieu Geist", "tldr": "", "abstract": "To address the challenges of sim-to-real gap and sample efficiency in reinforcement learning (RL), this work studies distributionally robust Markov decision processes (RMDPs) --- optimize the worst-case performance when the deployed environment is within an uncertainty set around some nominal MDP. Despite recent efforts, the sample complexity of RMDPs has remained largely undetermined. While the statistical implications of distributional robustness in RL have been explored in some specific cases, the generalizability of the existing findings remains unclear, especially in comparison to standard RL. Assuming access to a generative model that samples from the nominal MDP, we examine the sample complexity of RMDPs using a class of generalized $L_p$ norms as the 'distance' function for the uncertainty set, under two commonly adopted $sa$-rectangular and $s$-rectangular conditions. Our results imply that RMDPs can be more sample-efficient to solve than standard MDPs using generalized $L_p$ norms in both $sa$- and $s$-rectangular cases, potentially inspiring more empirical research.\n We provide a near-optimal upper bound and a matching minimax lower bound for the $sa$-rectangular scenarios. For $s$-rectangular cases, we improve the state-of-the-art upper bound and also derive a lower bound using $L_\\infty$ norm that verifies the tightness.", "keywords": "Robust Markov decision process;norm;sample complexity;strong duality", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Pierre Clavier;Laixi Shi;Erwan Le Pennec;Eric Mazumdar;Adam Wierman;Matthieu Geist", "authorids": "~Pierre_Clavier1;~Laixi_Shi1;~Erwan_Le_Pennec1;~Eric_Mazumdar1;~Adam_Wierman1;~Matthieu_Geist1", "gender": "M;F;M;M;M;M", "homepage": "https://pierreclavier.github.io/aboutme/;https://laixishi.github.io/;http://www.cmap.polytechnique.fr/~lepennec;http://people.eecs.berkeley.edu/~emazumdar/;https://adamwierman.com/;", "dblp": ";211/7965;95/4124;177/9322;56/4447;38/6508", "google_scholar": "-KnIaGsAAAAJ;V8RkRr8AAAAJ;;FZOxxvcAAAAJ;4OvOdSgAAAAJ;ectPLEUAAAAJ", "orcid": ";;;;0000-0002-5923-0199;", "linkedin": "pierre-clavier-823171135/;;;;adam-wierman-a529474/;", "or_profile": "~Pierre_Clavier1;~Laixi_Shi1;~Erwan_Le_Pennec1;~Eric_Mazumdar1;~Adam_Wierman1;~Matthieu_Geist1", "aff": "\u00c9cole Polytechnique;California Institute of Technology;Ecole polytechnique;Deparment of Computing + Mathematical Sciences, California Institute of Technology;California Institute of Technology;Google", "aff_domain": "polytechnique.edu;caltech.edu;polytechnique.edu;cms.caltech.edu;caltech.edu;google.com", "position": "PhD student;Postdoc;Full Professor;Assistant Professor;Professor;Researcher", "bibtex": "@inproceedings{\nclavier2024nearoptimal,\ntitle={Near-Optimal Distributionally Robust Reinforcement Learning with General \\$L\\_p\\$ Norms},\nauthor={Pierre Clavier and Laixi Shi and Erwan Le Pennec and Eric Mazumdar and Adam Wierman and Matthieu Geist},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=0l9yGPTHAU}\n}", "github": "", "reviewers": "5t5a;6HyR;QVRk;Tr2g", "pdf_size": 874385, "rating": "6;6;7;7", "confidence": "3;3;3;3", "soundness": "3;3;3;3", "novelty": "2;3;3;2", "presentation": "2;3;3;3", "wc_summary": "49;79;169;56", "wc_strengths": "148;17;153;184", "wc_weaknesses": "97;60;56;130", "wc_questions": "1;32;102;269", "wc_limitations": "8;9;28;11", "wc_review": "303;197;508;650", "wc_reply_reviewers": "23;9;25;69", "wc_reply_authors": "0;0;18;160", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;2;3", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 88.25, 47.92376758978784 ], "wc_strengths_avg": [ 125.5, 64.1424196612507 ], "wc_weaknesses_avg": [ 85.75, 30.13614938906429 ], "wc_questions_avg": [ 101.0, 103.66532689380765 ], "wc_limitations_avg": [ 14.0, 8.154753215150045 ], "wc_review_avg": [ 414.5, 176.02627644758041 ], "wc_reply_reviewers_avg": [ 31.5, 22.511108368980857 ], "wc_reply_authors_avg": [ 44.5, 67.0876292620331 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:8mKzTH_S-AUJ:scholar.google.com/&scioq=Near-Optimal+Distributionally+Robust+Reinforcement+Learning+with+General+%24L_p%24+Norms&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": "polytechnique.edu;caltech.edu;polytechnique.edu;cms.caltech.edu;caltech.edu;google.com", "author_num": 6, "aff_unique_index": "0;1;0;1;1;3", "aff_unique_norm": "Ecole Polytechnique;California Institute of Technology;;Google", "aff_unique_dep": ";;;Google", "aff_unique_url": "https://www.polytechnique.edu;https://www.caltech.edu;;https://www.google.com", "aff_unique_abbr": "X;Caltech;;Google", "aff_campus_unique_index": "1;1;1;2", "aff_campus_unique": ";Pasadena;Mountain View", "aff_country_unique_index": "0;1;0;1;1;1", "aff_country_unique": "France;United States;" }, { "title": "ALPS: Improved Optimization for Highly Sparse One-Shot Pruning for Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96911", "id": "0lBx844upd", "proceeding": "", "pdf": "https://openreview.net/pdf?id=0lBx844upd", "openreview": "https://openreview.net/forum?id=0lBx844upd", "poster": "/media/PosterPDFs/NeurIPS%202024/96911.png?t=1731079283.0533488", "project": "", "author_site": "Xiang Meng, Kayhan Behdin, Haoyue Wang, Rahul Mazumder", "tldr": "", "abstract": "The impressive performance of Large Language Models (LLMs) across various natural language processing tasks comes at the cost of vast computational resources and storage requirements. One-shot pruning techniques offer a way to alleviate these burdens by removing redundant weights without the need for retraining. Yet, the massive scale of LLMs often forces current pruning approaches to rely on heuristics instead of optimization-based techniques, potentially resulting in suboptimal compression. In this paper, we introduce ALPS, an optimization-based framework that tackles the pruning problem using the operator splitting technique and a preconditioned conjugate gradient-based post-processing step. Our approach incorporates novel techniques to accelerate and theoretically guarantee convergence while leveraging vectorization and GPU parallelism for efficiency. ALPS substantially outperforms state-of-the-art methods in terms of the pruning objective and perplexity reduction, particularly for highly sparse models. On the LLaMA3-8B model with 70\\% sparsity, ALPS achieves a 29\\% reduction in test perplexity on the WikiText dataset and a 8\\% improvement in zero-shot benchmark performance compared to existing methods. Our code is available at https://github.com/mazumder-lab/ALPS.", "keywords": "network pruning;unstructured pruning;large language model;ADMM;pre-conditioned conjugate gradient;optimization", "primary_area": "optimization", "supplementary_material": "", "author": "Xiang Meng;Kayhan Behdin;Haoyue Wang;Rahul Mazumder", "authorids": "~Xiang_Meng1;~Kayhan_Behdin1;~Haoyue_Wang3;~Rahul_Mazumder1", "gender": "M;M;;M", "homepage": "https://www.linkedin.com/in/xiang-meng-0888b0201/;http://mit.edu/~behdink/www/;;http://www.mit.edu/~rahulmaz/", "dblp": ";199/2308.html;;11/9365.html", "google_scholar": "AyWinq8AAAAJ;;N-iirNIAAAAJ;cyCp3pIAAAAJ", "orcid": ";0000-0003-3482-0421;;0000-0003-1384-9743", "linkedin": ";;;", "or_profile": "~Xiang_Meng1;~Kayhan_Behdin1;~Haoyue_Wang3;~Rahul_Mazumder1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;mit.edu;mit.edu", "position": "PhD student;PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nmeng2024alps,\ntitle={{ALPS}: Improved Optimization for Highly Sparse One-Shot Pruning for Large Language Models},\nauthor={Xiang Meng and Kayhan Behdin and Haoyue Wang and Rahul Mazumder},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=0lBx844upd}\n}", "github": "", "reviewers": "1PwE;ZgXD;xarH", "pdf_size": 493761, "rating": "3;6;7", "confidence": "3;3;3", "soundness": "3;3;3", "novelty": "2;3;2", "presentation": "2;3;3", "wc_summary": "42;83;56", "wc_strengths": "41;50;9", "wc_weaknesses": "270;107;38", "wc_questions": "92;35;52", "wc_limitations": "39;18;37", "wc_review": "484;293;192", "wc_reply_reviewers": "59;105;0", "wc_reply_authors": "33;139;0", "reply_reviewers": "1;1;0", "reply_authors": "2;2;1", "rating_avg": [ 5.333333333333333, 1.699673171197595 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 60.333333333333336, 17.016332024133625 ], "wc_strengths_avg": [ 33.333333333333336, 17.594190960528863 ], "wc_weaknesses_avg": [ 138.33333333333334, 97.27052768210706 ], "wc_questions_avg": [ 59.666666666666664, 23.893281249943232 ], "wc_limitations_avg": [ 31.333333333333332, 9.46337971105226 ], "wc_review_avg": [ 323.0, 121.08123994519822 ], "wc_reply_reviewers_avg": [ 54.666666666666664, 42.97544518546479 ], "wc_reply_authors_avg": [ 57.333333333333336, 59.29774213426867 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9848617569690875654&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "mit.edu;mit.edu;mit.edu;mit.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Accelerating Non-Maximum Suppression: A Graph Theory Perspective", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96910", "id": "0lau89u4oE", "proceeding": "", "pdf": "https://openreview.net/pdf?id=0lau89u4oE", "openreview": "https://openreview.net/forum?id=0lau89u4oE", "poster": "/media/PosterPDFs/NeurIPS%202024/96910.png?t=1733315333.5240822", "project": "", "author_site": "King-Siong Si, Lu Sun, Weizhan Zhang, Tieliang Gong, Jiahao Wang, Jiang Liu, Hao Sun", "tldr": "", "abstract": "Non-maximum suppression (NMS) is an indispensable post-processing step in object detection. With the continuous optimization of network models, NMS has become the ``last mile'' to enhance the efficiency of object detection. This paper systematically analyzes NMS from a graph theory perspective for the first time, revealing its intrinsic structure. Consequently, we propose two optimization methods, namely QSI-NMS and BOE-NMS. The former is a fast recursive divide-and-conquer algorithm with negligible mAP loss, and its extended version (eQSI-NMS) achieves optimal complexity of $\\mathcal{O}(n\\log n)$. The latter, concentrating on the locality of NMS, achieves an optimization at a constant level without an mAP loss penalty. Moreover, to facilitate rapid evaluation of NMS methods for researchers, we introduce NMS-Bench, the first benchmark designed to comprehensively assess various NMS methods. Taking the YOLOv8-N model on MS COCO 2017 as the benchmark setup, our method QSI-NMS provides $6.2\\times$ speed of original NMS on the benchmark, with a $0.1\\%$ decrease in mAP. The optimal eQSI-NMS, with only a $0.3\\%$ mAP decrease, achieves $10.7\\times$ speed. Meanwhile, BOE-NMS exhibits $5.1\\times$ speed with no compromise in mAP.", "keywords": "non-maximum suppression;object detection;graph theory", "primary_area": "optimization", "supplementary_material": "/attachment/2291a8da5bccb50b39a7bfe362990a7ab87fbd1f.zip", "author": "King-Siong Si;Lu Sun;Weizhan Zhang;Tieliang Gong;Jiahao Wang;Jiang Liu;Hao Sun", "authorids": "~King-Siong_Si2;~Lu_Sun6;~Weizhan_Zhang1;~Tieliang_Gong2;~Jiahao_Wang14;~Jiang_Liu14;~Hao_Sun15", "gender": "M;M;;;;M;M", "homepage": "http://www.kingsiong.top;https://github.com/Yuri3-xr;;;;https://github.com/black1025;", "dblp": ";;;;;;82/2248-9", "google_scholar": ";;;;;;", "orcid": ";;;;;;", "linkedin": ";;;;;;", "or_profile": "~King-Siong_Si2;~Lu_Sun6;~Weizhan_Zhang1;~Tieliang_Gong2;~Jiahao_Wang14;~Jiang_Liu14;~Hao_Sun15", "aff": "Xi'an Jiaotong University;Xi'an Jiaotong University;;;;North China University of Technology;China Telecom Corporation Ltd. Data&AI Technology Company", "aff_domain": "xjtu.edu.cn;xjtu.edu.cn;;;;ncut.edu.cn;chinatelecom.com.cn", "position": "MS student;MS student;;;;MS student;Researcher", "bibtex": "@inproceedings{\nsi2024accelerating,\ntitle={Accelerating Non-Maximum Suppression: A Graph Theory Perspective},\nauthor={King-Siong Si and Lu Sun and Weizhan Zhang and Tieliang Gong and Jiahao Wang and Jiang Liu and Hao Sun},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=0lau89u4oE}\n}", "github": "", "reviewers": "HR1f;kDQX;snSe;pEVt", "pdf_size": 6162198, "rating": "5;5;6;6", "confidence": "4;3;4;2", "soundness": "3;3;3;3", "novelty": "3;2;3;2", "presentation": "2;3;3;2", "wc_summary": "60;92;80;91", "wc_strengths": "102;72;72;57", "wc_weaknesses": "190;110;62;165", "wc_questions": "5;105;40;30", "wc_limitations": "10;108;29;14", "wc_review": "367;487;283;357", "wc_reply_reviewers": "0;0;14;122", "wc_reply_authors": "0;0;0;771", "reply_reviewers": "0;0;1;2", "reply_authors": "1;1;1;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 80.75, 12.871965661856 ], "wc_strengths_avg": [ 75.75, 16.345871038277526 ], "wc_weaknesses_avg": [ 131.75, 49.59019560356664 ], "wc_questions_avg": [ 45.0, 36.9120576505835 ], "wc_limitations_avg": [ 40.25, 39.75157229594824 ], "wc_review_avg": [ 373.5, 73.12147427397782 ], "wc_reply_reviewers_avg": [ 34.0, 51.12729212465687 ], "wc_reply_authors_avg": [ 192.75, 333.8527931589011 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:fZ9x6MMNFNMJ:scholar.google.com/&scioq=Accelerating+Non-Maximum+Suppression:+A+Graph+Theory+Perspective&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "xjtu.edu.cn;xjtu.edu.cn;;;;ncut.edu.cn;chinatelecom.com.cn", "author_num": 7, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Xi'an Jiao Tong University;North China University of Technology;China Telecom Corporation Ltd.", "aff_unique_dep": ";;Data&AI Technology Company", "aff_unique_url": "https://www.xjtu.edu.cn;http://www.ncut.edu.cn;https://www.chinatelecom.com.cn/", "aff_unique_abbr": "XJTU;NCUT;China Telecom", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "BitsFusion: 1.99 bits Weight Quantization of Diffusion Model", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96909", "id": "0m19blQT6y", "proceeding": "", "pdf": "https://openreview.net/pdf?id=0m19blQT6y", "openreview": "https://openreview.net/forum?id=0m19blQT6y", "poster": "/media/PosterPDFs/NeurIPS%202024/96909.png?t=1729915675.3170154", "project": "", "author_site": "Yang Sui, Yanyu Li, Anil Kag, Yerlan Idelbayev, Junli Cao, Ju Hu, Dhritiman Sagar, Bo Yuan, Sergey Tulyakov, Jian Ren", "tldr": "", "abstract": "Diffusion-based image generation models have achieved great success in recent years by showing the capability of synthesizing high-quality content. However, these models contain a huge number of parameters, resulting in a significantly large model size. Saving and transferring them is a major bottleneck for various applications, especially those running on resource-constrained devices. In this work, we develop a novel weight quantization method that quantizes the UNet from Stable Diffusion v1.5 to $1.99$ bits, achieving a model with $7.9\\times$ smaller size while exhibiting even better generation quality than the original one. Our approach includes several novel techniques, such as assigning optimal bits to each layer, initializing the quantized model for better performance, and improving the training strategy to dramatically reduce quantization error. Furthermore, we extensively evaluate our quantized model across various benchmark datasets and through human evaluation to demonstrate its superior generation quality.", "keywords": "Diffusion;Quantization;Stable Diffusion;Low bit", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Yang Sui;Yanyu Li;Anil Kag;Yerlan Idelbayev;Junli Cao;Ju Hu;Dhritiman Sagar;Bo Yuan;Sergey Tulyakov;Jian Ren", "authorids": "~Yang_Sui1;~Yanyu_Li1;~Anil_Kag1;~Yerlan_Idelbayev1;~Junli_Cao2;~Ju_Hu1;~Dhritiman_Sagar1;~Bo_Yuan3;~Sergey_Tulyakov1;~Jian_Ren2", "gender": "M;;M;M;M;M;M;;M;M", "homepage": "https://eclipsess.github.io/yangsui.github.io/;;https://anilkagak2.github.io/;http://graduatestudent.ucmerced.edu/yidelbayev/;;;;;http://www.stulyakov.com/;https://alanspike.github.io/", "dblp": "77/10522;194/5818;213/9132;203/8094;234/8466;;228/3213;41/1662-1;40/6115;59/2180-5", "google_scholar": "Q2W1p6sAAAAJ;https://scholar.google.com/citations?hl=en;bZdVsMkAAAAJ;nAaroNMAAAAJ;;ozJiSMcAAAAJ;;oUy9elEAAAAJ;mgzXR0sAAAAJ;https://scholar.google.co.jp/citations?user=vDALiU4AAAAJ", "orcid": "0000-0003-3020-0612;;;;;;;;;", "linkedin": "yang-sui-308055117/;;;;junli-cao-5165b41a1;erichuju;dhritiman-sagar-5775169/;;sergeytulyakov/;", "or_profile": "~Yang_Sui1;~Yanyu_Li1;~Anil_Kag1;~Yerlan_Idelbayev1;~Junli_Cao2;~Ju_Hu1;~Dhritiman_Sagar1;~Bo_Yuan3;~Sergey_Tulyakov1;~Jian_Ren2", "aff": "Rutgers University;Northeastern University;Snap Inc.;Snap Inc.;Snap Inc.;Snap Inc.;Snap Inc.;Rutgers University;Snap Inc.;Snap Inc.", "aff_domain": "rutgers.edu;northeastern.edu;snap.com;snapchat.com;snapchat.com;snapchat.com;snapchat.com;rutgers.edu;snapchat.com;snapchat.com", "position": "PhD student;PhD student;Researcher;Researcher;Researcher;Researcher;Researcher;Assistant Professor;Director of Research;Research Scientist", "bibtex": "@inproceedings{\nsui2024bitsfusion,\ntitle={BitsFusion: 1.99 bits Weight Quantization of Diffusion Model},\nauthor={Yang Sui and Yanyu Li and Anil Kag and Yerlan Idelbayev and Junli Cao and Ju Hu and Dhritiman Sagar and Bo Yuan and Sergey Tulyakov and Jian Ren},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=0m19blQT6y}\n}", "github": "", "reviewers": "tjXw;Fq1C;W2xs;WzcG", "pdf_size": 14141774, "rating": "5;6;6;6", "confidence": "5;4;5;4", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "3;3;3;4", "wc_summary": "75;70;75;109", "wc_strengths": "51;49;88;170", "wc_weaknesses": "271;62;28;458", "wc_questions": "1;90;46;5", "wc_limitations": "70;4;2;1", "wc_review": "468;275;239;743", "wc_reply_reviewers": "154;127;17;56", "wc_reply_authors": "641;336;28;92", "reply_reviewers": "2;2;1;1", "reply_authors": "4;4;2;3", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 82.25, 15.578430601315397 ], "wc_strengths_avg": [ 89.5, 49.002550954006466 ], "wc_weaknesses_avg": [ 204.75, 173.30807107575805 ], "wc_questions_avg": [ 35.5, 36.058979464205585 ], "wc_limitations_avg": [ 19.25, 29.32042803234632 ], "wc_review_avg": [ 431.25, 199.94546131382927 ], "wc_reply_reviewers_avg": [ 88.5, 54.637441374939954 ], "wc_reply_authors_avg": [ 274.25, 240.9215380575178 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 3.25, 0.82915619758885 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5376275584251472645&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "rutgers.edu;northeastern.edu;snap.com;snapchat.com;snapchat.com;snapchat.com;snapchat.com;rutgers.edu;snapchat.com;snapchat.com", "author_num": 10, "aff_unique_index": "0;1;2;2;2;2;2;0;2;2", "aff_unique_norm": "Rutgers University;Northeastern University;Snap Inc.", "aff_unique_dep": ";;", "aff_unique_url": "https://www.rutgers.edu;https://www.northeastern.edu;https://www.snapinc.com", "aff_unique_abbr": "Rutgers;NEU;Snap", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "ProgressGym: Alignment with a Millennium of Moral Progress", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97875", "id": "0mRouJElbZ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=0mRouJElbZ", "openreview": "https://openreview.net/forum?id=0mRouJElbZ", "poster": "/media/PosterPDFs/NeurIPS%202024/97875.png?t=1731742041.9348912", "project": "", "author_site": "Tianyi (Alex) Qiu, Yang Zhang, Xuchuan Huang, Jasmine Li, Jiaming Ji, Yaodong Yang", "tldr": "", "abstract": "Frontier AI systems, including large language models (LLMs), hold increasing influence over the epistemology of human users. Such influence can reinforce prevailing societal values, potentially contributing to the lock-in of misguided moral beliefs and, consequently, the perpetuation of problematic moral practices on a broad scale. We introduce **progress alignment** as a technical solution to mitigate this imminent risk. Progress alignment algorithms learn to emulate the mechanics of human moral progress, thereby addressing the susceptibility of existing alignment methods to contemporary moral blindspots. To empower research in progress alignment, we introduce [**ProgressGym**](https://github.com/PKU-Alignment/ProgressGym), an experimental framework allowing the learning of moral progress mechanics from history, in order to facilitate future progress in real-world moral decisions. Leveraging 9 centuries of historical text and 18 [historical LLMs](https://huggingface.co/collections/PKU-Alignment/progressgym-666735fcf3e4efa276226eaa), ProgressGym enables codification of real-world progress alignment challenges into concrete benchmarks. Specifically, we introduce three core challenges: tracking evolving values (PG-Follow), preemptively anticipating moral progress (PG-Predict), and regulating the feedback loop between human and AI value shifts (PG-Coevolve). Alignment methods without a temporal dimension are inapplicable to these tasks. In response, we present *lifelong* and *extrapolative* algorithms as baseline methods of progress alignment, and build an [open leaderboard](https://huggingface.co/spaces/PKU-Alignment/ProgressGym-LeaderBoard) soliciting novel algorithms and challenges.", "keywords": "Progress Alignment;AI Alignment;Large Language Models", "primary_area": "", "supplementary_material": "", "author": "Tianyi Qiu;Yang Zhang;Xuchuan Huang;Jasmine Xinze Li;Jiaming Ji;Yaodong Yang", "authorids": "~Tianyi_Qiu1;~Yang_Zhang53;~Xuchuan_Huang1;~Jasmine_Xinze_Li1;~Jiaming_Ji2;~Yaodong_Yang1", "gender": "M;M;F;F;M;M", "homepage": "https://tianyiqiu.net;https://github.com/SaladAss04;https://huangxuchuan.github.io;https://github.com/jasmineee-li;https://jijiaming.com;https://www.yangyaodong.com", "dblp": "149/9552;;;;313/9356.html;170/1496-1", "google_scholar": "teiNc0sAAAAJ;;;LSidePQAAAAJ;aW8WbYYAAAAJ;https://scholar.google.co.uk/citations?user=6yL0xw8AAAAJ", "orcid": ";;;;;0000-0001-8132-5613", "linkedin": "ty-qiu/;;;jasminexli/;;yaodong-yang", "or_profile": "~Tianyi_Qiu1;~Yang_Zhang53;~Xuchuan_Huang1;~Jasmine_Xinze_Li1;~Jiaming_Ji2;~Yaodong_Yang1", "aff": "University of California, Berkeley;Peking University;Peking University;Peking University;Peking University;Peking University", "aff_domain": "berkeley.edu;stu.pku.edu.cn;stu.pku.edu.cn;pku.edu;pku.edu.cn;pku.edu.cn", "position": "Intern;Undergrad student;Undergrad student;Intern;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nqiu2024progressgym,\ntitle={ProgressGym: Alignment with a Millennium of Moral Progress},\nauthor={Tianyi Qiu and Yang Zhang and Xuchuan Huang and Jasmine Xinze Li and Jiaming Ji and Yaodong Yang},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=0mRouJElbZ}\n}", "github": "", "reviewers": "jQHT;cnpi;AzYS", "pdf_size": 1337657, "rating": "6;8;9", "confidence": "2;2;4", "wc_summary_and_contributions": "94;128;164", "wc_strengths": "78;53;84", "wc_improvement": "100;85;281", "wc_limitations": "12;23;94", "wc_correctness": "20;17;37", "wc_clarity": "39;48;163", "wc_relation_to_prior_work": "1;18;46", "wc_documentation": "2;51;58", "wc_additional_feedback": "1;1;1", "wc_review": "347;424;928", "wc_reply_reviewers": "11;92;19", "wc_reply_authors": "60;230;101", "reply_reviewers": "1;1;1", "reply_authors": "4;6;5", "rating_avg": [ 7.666666666666667, 1.247219128924647 ], "confidence_avg": [ 2.6666666666666665, 0.9428090415820634 ], "wc_summary_and_contributions_avg": [ 128.66666666666666, 28.58126814696802 ], "wc_strengths_avg": [ 71.66666666666667, 13.424687043734844 ], "wc_improvement_avg": [ 155.33333333333334, 89.07050889916121 ], "wc_limitations_avg": [ 43.0, 36.34097778908359 ], "wc_correctness_avg": [ 24.666666666666668, 8.806563209081938 ], "wc_clarity_avg": [ 83.33333333333333, 56.45253660278596 ], "wc_relation_to_prior_work_avg": [ 21.666666666666668, 18.55322673343433 ], "wc_documentation_avg": [ 37.0, 24.91318258807306 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 566.3333333333334, 257.66170758487874 ], "wc_reply_reviewers_avg": [ 40.666666666666664, 36.444783196257625 ], "wc_reply_authors_avg": [ 130.33333333333334, 72.43541184316473 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 5.0, 0.816496580927726 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.7559289460184546, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2380308351436605521&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "berkeley.edu;stu.pku.edu.cn;stu.pku.edu.cn;pku.edu;pku.edu.cn;pku.edu.cn", "author_num": 6, "aff_unique_index": "0;1;1;1;1;1", "aff_unique_norm": "University of California, Berkeley;Peking University", "aff_unique_dep": ";", "aff_unique_url": "https://www.berkeley.edu;http://www.pku.edu.cn", "aff_unique_abbr": "UC Berkeley;Peking U", "aff_campus_unique_index": "0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;1;1;1;1;1", "aff_country_unique": "United States;China" }, { "title": "Tight Bounds for Learning RUMs from Small Slates", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96908", "id": "0nSY8NiILP", "proceeding": "", "pdf": "https://openreview.net/pdf?id=0nSY8NiILP", "openreview": "https://openreview.net/forum?id=0nSY8NiILP", "poster": "/media/PosterPDFs/NeurIPS%202024/96908.png?t=1732698791.0319488", "project": "", "author_site": "Flavio Chierichetti, Mirko Giacchini, Ravi Kumar, Alessandro Panconesi, Andrew Tomkins", "tldr": "", "abstract": "A Random Utility Model (RUM) is a classical model of user behavior defined by a distribution over $\\mathbb{R}^n$. A user, presented with a subset of $\\\\{1,\\ldots,n\\\\}$, will select the item of the subset with the highest utility, according to a utility vector drawn from the specified distribution. In practical settings, the subset is often of small size, as in the ``ten blue links'' of web search. \n\n\nIn this paper, we consider a learning setting with complete information on user choices from subsets of size at most $k$. We show that $k=\\Theta(\\sqrt{n})$ is both necessary and sufficient to predict the distribution of all user choices with an arbitrarily small, constant error.\n\n\nBased on the upper bound, we obtain new algorithms for approximate RUM learning and variations thereof. Furthermore, we employ our lower bound for approximate RUM learning to derive lower bounds to fractional extensions of the well-studied $k$-deck and trace reconstruction problems.", "keywords": "discrete choice;random utility model;approximation theory", "primary_area": "learning_theory", "supplementary_material": "", "author": "Flavio Chierichetti;Mirko Giacchini;Ravi Kumar;Alessandro Panconesi;Andrew Tomkins", "authorids": "~Flavio_Chierichetti2;~Mirko_Giacchini1;~Ravi_Kumar1;~Alessandro_Panconesi2;~Andrew_Tomkins2", "gender": ";;M;M;M", "homepage": ";https://mirkogiacchini.github.io/;https://sites.google.com/site/ravik53/;https://sites.google.com/a/di.uniroma1.it/panco/;http://www.tomkins.family/andrew/index.html", "dblp": "https://dblp.uni-trier.de/pers/c/Chierichetti:Flavio;347/9932;k/RaviKumar.html;53/6558.html;t/ATomkins", "google_scholar": "MGXRuXYAAAAJ;;J_XhIsgAAAAJ;https://scholar.google.it/citations?user=kzYoaFYAAAAJ;-JOkpfQAAAAJ", "orcid": ";0009-0009-5704-098X;0000-0002-2203-2586;;0000-0002-1611-9255", "linkedin": ";;ravi-kumar-a3a9631;;", "or_profile": "~Flavio_Chierichetti2;~Mirko_Giacchini1;~Ravi_Kumar1;~Alessandro_Panconesi2;~Andrew_Tomkins2", "aff": "Sapienza University of Rome;Sapienza University of Rome;Google;Sapienza University of Rome;Google", "aff_domain": "uniroma1.it;uniroma1.it;google.com;uniroma1.it;google.com", "position": "Full Professor;PhD student;Research Scientist;Full Professor;Researcher", "bibtex": "@inproceedings{\nchierichetti2024tight,\ntitle={Tight Bounds for Learning {RUM}s from Small Slates},\nauthor={Flavio Chierichetti and Mirko Giacchini and Ravi Kumar and Alessandro Panconesi and Andrew Tomkins},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=0nSY8NiILP}\n}", "github": "", "reviewers": "19Lt;5X2F;R6u3;NbMn", "pdf_size": 494511, "rating": "4;5;7;8", "confidence": "2;2;4;3", "soundness": "2;3;4;4", "novelty": "2;3;3;4", "presentation": "2;3;3;4", "wc_summary": "273;206;201;501", "wc_strengths": "12;118;35;179", "wc_weaknesses": "56;133;104;148", "wc_questions": "1;17;186;149", "wc_limitations": "1;20;6;10", "wc_review": "343;494;532;987", "wc_reply_reviewers": "23;81;30;36", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 1.5811388300841898 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 295.25, 122.14412593325969 ], "wc_strengths_avg": [ 86.0, 66.61456297237113 ], "wc_weaknesses_avg": [ 110.25, 35.088281519618484 ], "wc_questions_avg": [ 88.25, 80.521348100985 ], "wc_limitations_avg": [ 9.25, 6.977642868476432 ], "wc_review_avg": [ 589.0, 240.41318599444583 ], "wc_reply_reviewers_avg": [ 42.5, 22.699118925632334 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.7627700713964739, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Xs29xim8PJUJ:scholar.google.com/&scioq=Tight+Bounds+for+Learning+RUMs+from+Small+Slates&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "uniroma1.it;uniroma1.it;google.com;uniroma1.it;google.com", "author_num": 5, "aff_unique_index": "0;0;1;0;1", "aff_unique_norm": "Sapienza University of Rome;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.uniroma1.it;https://www.google.com", "aff_unique_abbr": "Sapienza;Google", "aff_campus_unique_index": "0;0;1;0;1", "aff_campus_unique": "Rome;Mountain View", "aff_country_unique_index": "0;0;1;0;1", "aff_country_unique": "Italy;United States" }, { "title": "Private and Personalized Frequency Estimation in a Federated Setting", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96907", "id": "0nzKznCjFG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=0nzKznCjFG", "openreview": "https://openreview.net/forum?id=0nzKznCjFG", "poster": "", "project": "", "author_site": "Amrith Setlur, Vitaly Feldman, Kunal Talwar", "tldr": "", "abstract": "Motivated by the problem of next word prediction on user devices we introduce and study the problem of personalized frequency histogram estimation in a federated setting. In this problem, over some domain, each user observes a number of samples from a distribution which is specific to that user. The goal is to compute for all users a personalized estimate of the user's distribution with error measured in KL divergence. We focus on addressing two central challenges: statistical heterogeneity and protection of user privacy.\nOur approach to the problem relies on discovering and exploiting similar subpopulations of users which are often present and latent in real-world data, while minimizing user privacy leakage at the same time. We first present a non-private clustering-based algorithm for the problem, and give a provably joint differentially private version of it with a private data-dependent initialization scheme. Next, we propose a simple data model which is based on a mixture of Dirichlet distributions, to formally motivate our non-private algorithm and demonstrate some properties of its components. Finally, we provide an extensive empirical evaluation of our private and non-private algorithms under varying levels of statistical and size heterogeneity on the Reddit, StackOverflow, and Amazon Reviews datasets. Our results demonstrate significant improvements over standard and clustering-based baselines, and in particular, they show that it is possible to improve over direct personalization of a single global model.", "keywords": "federated learning;personalization;privacy", "primary_area": "privacy", "supplementary_material": "/attachment/eb1bc2285ecb0c1793eaa359b7cceb4142b22f25.zip", "author": "Amrith Setlur;Vitaly Feldman;Kunal Talwar", "authorids": "~Amrith_Setlur1;~Vitaly_Feldman1;~Kunal_Talwar1", "gender": "M;M;M", "homepage": "http://ars22.github.io;https://vtaly.net;http://www.kunaltalwar.org", "dblp": "https://dblp.uni-trier.de/pers/hd/s/Setlur:Amrith;67/1162;06/3696", "google_scholar": "https://scholar.google.ru/citations?user=i7V1kJgAAAAJ;GqZBmfgAAAAJ;XD_01h8AAAAJ", "orcid": "0000-0002-7061-3094;;", "linkedin": ";;kunal-talwar-128a6159", "or_profile": "~Amrith_Setlur1;~Vitaly_Feldman1;~Kunal_Talwar1", "aff": "Carnegie Mellon University;Apple AI Research;Apple", "aff_domain": "cmu.edu;apple.com;apple.com", "position": "PhD student;Research Scientist;Research Scientist", "bibtex": "@inproceedings{\nsetlur2024private,\ntitle={Private and Personalized Frequency Estimation in a Federated Setting},\nauthor={Amrith Setlur and Vitaly Feldman and Kunal Talwar},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=0nzKznCjFG}\n}", "github": "", "reviewers": "a5Ce;zabr;DFyV;ymNE", "pdf_size": 789087, "rating": "5;5;6;7", "confidence": "3;3;3;3", "soundness": "3;3;3;3", "novelty": "3;2;2;3", "presentation": "2;1;3;2", "wc_summary": "34;87;96;49", "wc_strengths": "35;33;36;158", "wc_weaknesses": "278;212;79;259", "wc_questions": "96;129;2;29", "wc_limitations": "11;2;1;12", "wc_review": "454;463;214;507", "wc_reply_reviewers": "0;349;14;26", "wc_reply_authors": "43;1089;80;72", "reply_reviewers": "0;2;1;1", "reply_authors": "2;4;2;3", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 66.5, 25.753640519351823 ], "wc_strengths_avg": [ 65.5, 53.415821626181135 ], "wc_weaknesses_avg": [ 207.0, 77.70778596768795 ], "wc_questions_avg": [ 64.0, 50.788778288121875 ], "wc_limitations_avg": [ 6.5, 5.024937810560445 ], "wc_review_avg": [ 409.5, 114.63965282571297 ], "wc_reply_reviewers_avg": [ 97.25, 145.63889418695817 ], "wc_reply_authors_avg": [ 321.0, 443.61864252981974 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:jpCU-phu_yoJ:scholar.google.com/&scioq=Private+and+Personalized+Frequency+Estimation+in+a+Federated+Setting&hl=en&as_sdt=0,44", "gs_version_total": 3, "email": "cmu.edu;apple.com;apple.com", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Carnegie Mellon University;Apple", "aff_unique_dep": ";Apple AI Research", "aff_unique_url": "https://www.cmu.edu;https://www.apple.com/research/", "aff_unique_abbr": "CMU;Apple AI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Understanding the Expressive Power and Mechanisms of Transformer for Sequence Modeling", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96906", "id": "0o7Rd5jngV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=0o7Rd5jngV", "openreview": "https://openreview.net/forum?id=0o7Rd5jngV", "poster": "", "project": "", "author_site": "Mingze Wang, Weinan E", "tldr": "", "abstract": "We conduct a systematic study of the approximation properties of Transformer for sequence modeling with long, sparse and complicated memory. \nWe investigate the mechanisms through which different components of Transformer, such as the dot-product self-attention, positional encoding and feed-forward layer, affect its expressive power, and we study their combined effects through establishing explicit approximation rates.\nOur study reveals the roles of critical parameters in the Transformer, such as the number of layers and the number of attention heads.\nThese theoretical insights are validated experimentally and offer natural suggestions for alternative architectures.", "keywords": "Transformer;expressive power;theoretical insights", "primary_area": "learning_theory", "supplementary_material": "", "author": "Mingze Wang;Weinan E", "authorids": "~Mingze_Wang2;~Weinan_E2", "gender": ";", "homepage": "https://wmz9.github.io/;https://web.math.princeton.edu/~weinan/", "dblp": "296/7556;06/9390.html", "google_scholar": "CkU47X0AAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Mingze_Wang2;~Weinan_E2", "aff": "Peking University;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nwang2024understanding,\ntitle={Understanding the Expressive Power and Mechanisms of Transformer for Sequence Modeling},\nauthor={Mingze Wang and Weinan E},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=0o7Rd5jngV}\n}", "github": "", "reviewers": "Fdzw;iDXk;xF1Y", "pdf_size": 663441, "rating": "6;6;7", "confidence": "3;3;3", "soundness": "3;4;3", "novelty": "3;3;4", "presentation": "2;3;4", "wc_summary": "45;156;234", "wc_strengths": "73;96;70", "wc_weaknesses": "189;196;99", "wc_questions": "46;125;3", "wc_limitations": "1;10;10", "wc_review": "354;583;416", "wc_reply_reviewers": "41;45;33", "wc_reply_authors": "20;17;54", "reply_reviewers": "1;1;1", "reply_authors": "2;2;3", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 145.0, 77.54998388136518 ], "wc_strengths_avg": [ 79.66666666666667, 11.61416759345623 ], "wc_weaknesses_avg": [ 161.33333333333334, 44.16886786967591 ], "wc_questions_avg": [ 58.0, 50.52392172690741 ], "wc_limitations_avg": [ 7.0, 4.242640687119285 ], "wc_review_avg": [ 451.0, 96.70918605110202 ], "wc_reply_reviewers_avg": [ 39.666666666666664, 4.988876515698588 ], "wc_reply_authors_avg": [ 30.333333333333332, 16.77961726487096 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4588334200991003737&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "pku.edu.cn;pku.edu.cn", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "DarkSAM: Fooling Segment Anything Model to Segment Nothing", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96905", "id": "0o9E8AsFgW", "proceeding": "", "pdf": "https://openreview.net/pdf?id=0o9E8AsFgW", "openreview": "https://openreview.net/forum?id=0o9E8AsFgW", "poster": "", "project": "", "author_site": "Ziqi Zhou, Yufei Song, Minghui Li, Shengshan Hu, Xianlong Wang, Leo Yu Zhang, Dezhong Yao, Hai Jin", "tldr": "", "abstract": "Segment Anything Model (SAM) has recently gained much attention for its outstanding generalization to unseen data and tasks. Despite its promising prospect, the vulnerabilities of SAM, especially to universal adversarial perturbation (UAP) have not been thoroughly investigated yet. In this paper, we propose DarkSAM, the first prompt-free universal attack framework against SAM, including a semantic decoupling-based spatial attack and a texture distortion-based frequency attack. We first divide the output of SAM into foreground and background. Then, we design a shadow target strategy to obtain the semantic blueprint of the image as the attack target. DarkSAM is dedicated to fooling SAM by extracting and destroying crucial object features from images in both spatial and frequency domains. In the spatial domain, we disrupt the semantics of both the foreground and background in the image to confuse SAM. In the frequency domain, we further enhance the attack effectiveness by distorting the high-frequency components (i.e., texture information) of the image. Consequently, with a single UAP, DarkSAM renders SAM incapable of segmenting objects across diverse images with varying prompts. Experimental results on four datasets for SAM and its two variant models demonstrate the powerful attack capability and transferability of DarkSAM. Our codes are available at: https://github.com/CGCL-codes/DarkSAM.", "keywords": "Segment Anything Model;Adversarial Example", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Ziqi Zhou;Yufei Song;Minghui Li;Shengshan Hu;Xianlong Wang;Leo Yu Zhang;Dezhong Yao;Hai Jin", "authorids": "~Ziqi_Zhou2;~Yufei_Song3;~Minghui_Li2;~Shengshan_Hu1;~Xianlong_Wang1;~Leo_Yu_Zhang1;~Dezhong_Yao1;~Hai_Jin1", "gender": "M;M;F;M;M;M;M;M", "homepage": "https://zhou-zi7.github.io;;;http://faculty.hust.edu.cn/HUSHENGSHAN;https://wxldragon.github.io/;https://leozhangcs.github.io/;https://dyao.netlify.app/;http://www.linkedin.com/in/jinhust", "dblp": ";;;169/2268;;117/3526;93/5633-2;98/4156", "google_scholar": "-eyLn4wAAAAJ;;j_y67gEAAAAJ;lkAFwJgAAAAJ;https://scholar.google.com.hk/citations?user=EgsgIq0AAAAJ;https://scholar.google.com.hk/citations?user=JK21OM0AAAAJ;SFBHGr0AAAAJ;", "orcid": ";;;;0009-0009-3057-827X;0000-0001-9330-2662;0000-0003-0336-0522;0000-0002-3934-7605", "linkedin": ";%E7%BE%BD%E9%A3%9E-%E5%AE%8B-1b5166302/;;;;;dzyao;jinhust", "or_profile": "~Ziqi_Zhou2;~Yufei_Song3;~Minghui_Li2;~Shengshan_Hu1;~Xianlong_Wang1;~Leo_Yu_Zhang1;~Dezhong_Yao1;~Hai_Jin1", "aff": "Huazhong University of Science and Technology;Huazhong University of Science and Technology;Huazhong University of Science and Technology;Huazhong University of Science and Technology;Huazhong University of Science and Technology;Griffith University;Huazhong University of Science and Technology;Huazhong University of Science and Technology", "aff_domain": "hust.edu.cn;hust.edu.cn;hust.edu.cn;hust.edu.cn;hust.edu.cn;griffith.edu.au;hust.edu.cn;hust.edu.cn", "position": "PhD student;PhD student;Assistant Professor;Associate Professor;PhD student;Researcher;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nzhou2024darksam,\ntitle={Dark{SAM}: Fooling Segment Anything Model to Segment Nothing},\nauthor={Ziqi Zhou and Yufei Song and Minghui Li and Shengshan Hu and Xianlong Wang and Leo Yu Zhang and Dezhong Yao and Hai Jin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=0o9E8AsFgW}\n}", "github": "", "reviewers": "pM4H;YLRY;dgw7", "pdf_size": 6776315, "rating": "5;6;8", "confidence": "3;5;4", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "165;61;69", "wc_strengths": "53;49;84", "wc_weaknesses": "161;86;101", "wc_questions": "27;2;47", "wc_limitations": "18;14;15", "wc_review": "424;212;316", "wc_reply_reviewers": "0;0;34", "wc_reply_authors": "0;0;42", "reply_reviewers": "0;0;1", "reply_authors": "1;1;2", "rating_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 98.33333333333333, 47.2534537244516 ], "wc_strengths_avg": [ 62.0, 15.641824275533422 ], "wc_weaknesses_avg": [ 116.0, 32.4037034920393 ], "wc_questions_avg": [ 25.333333333333332, 18.408935028645434 ], "wc_limitations_avg": [ 15.666666666666666, 1.699673171197595 ], "wc_review_avg": [ 317.3333333333333, 86.55377262462657 ], "wc_reply_reviewers_avg": [ 11.333333333333334, 16.027753706895076 ], "wc_reply_authors_avg": [ 14.0, 19.79898987322333 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.3273268353539886, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10644266212712699894&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 4, "email": "hust.edu.cn;hust.edu.cn;hust.edu.cn;hust.edu.cn;hust.edu.cn;griffith.edu.au;hust.edu.cn;hust.edu.cn", "author_num": 8, "aff_unique_index": "0;0;0;0;0;1;0;0", "aff_unique_norm": "Huazhong University of Science and Technology;Griffith University", "aff_unique_dep": ";", "aff_unique_url": "http://www.hust.edu.cn;https://www.griffith.edu.au", "aff_unique_abbr": "HUST;Griffith", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;1;0;0", "aff_country_unique": "China;Australia" }, { "title": "ProtGO: Function-Guided Protein Modeling for Unified Representation Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96904", "id": "0oUutV92YF", "proceeding": "", "pdf": "https://openreview.net/pdf?id=0oUutV92YF", "openreview": "https://openreview.net/forum?id=0oUutV92YF", "poster": "/media/PosterPDFs/NeurIPS%202024/96904.png?t=1730386167.5899446", "project": "", "author_site": "Bozhen Hu, Cheng Tan, Yongjie Xu, Zhangyang Gao, Jun Xia, Lirong Wu, Stan Z. Li", "tldr": "", "abstract": "Protein representation learning is indispensable for various downstream applications of artificial intelligence for bio-medicine research, such as drug design and function prediction. However, achieving effective representation learning for proteins poses challenges due to the diversity of data modalities involved, including sequence, structure, and function annotations. Despite the impressive capabilities of large language models in biomedical text modelling, there remains a pressing need for a framework that seamlessly integrates these diverse modalities, particularly focusing on the three critical aspects of protein information: sequence, structure, and function. Moreover, addressing the inherent data scale differences among these modalities is essential. To tackle these challenges, we introduce ProtGO, a unified model that harnesses a teacher network equipped with a customized graph neural network (GNN) and a Gene Ontology (GO) encoder to learn hybrid embeddings. Notably, our approach eliminates the need for additional functions as input for the student network, which shares the same GNN module. Importantly, we utilize a domain adaptation method to facilitate distribution approximation for guiding the training of the teacher-student framework. This approach leverages distributions learned from latent representations to avoid the alignment of individual samples. Benchmark experiments highlight that ProtGO significantly outperforms state-of-the-art baselines, clearly demonstrating the advantages of the proposed unified framework.", "keywords": "Teacher-student;Knowledge Distillation;Protein Function", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Bozhen Hu;Cheng Tan;Yongjie Xu;Zhangyang Gao;Jun Xia;Lirong Wu;Stan Z. Li", "authorids": "~Bozhen_Hu1;~Cheng_Tan1;~Yongjie_Xu2;~Zhangyang_Gao1;~Jun_Xia1;~Lirong_Wu1;~Stan_Z._Li2", "gender": "M;M;M;M;M;;M", "homepage": ";https://chengtan9907.github.io/;;;http://junxia97.github.io/;;https://en.westlake.edu.cn/academics/School_of_Engineering/About/Our_People/Faculty/201912/t20191206_2497.shtml", "dblp": "279/8665;70/1533-12.html;123/9257.html;275/3266;;15/10330;l/StanZLi", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;6kTV6aMAAAAJ;https://scholar.google.com.hk/citations?user=ciG27FYAAAAJ;4SclT-QAAAAJ;aPKKpSYAAAAJ;Tk7TrCoAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": "0000-0002-3428-0114;;0000-0002-6045-1626;0000-0003-1026-6083;;;", "linkedin": ";;;;;;stan-z-li-%E6%9D%8E%E5%AD%90%E9%9D%92-55753224/", "or_profile": "~Bozhen_Hu1;~Cheng_Tan1;~Yongjie_Xu2;~Zhangyang_Gao1;~Jun_Xia1;~Lirong_Wu1;~Stan_Z._Li1", "aff": "Westlake University;Zhejiang University & Westlake University;Westlake University;Westlake University, China;Westlake University, China;Westlake University;Westlake University", "aff_domain": "westlake.edu.cn;westlake.edu.cn;westlake.edu.cn;westlake.edu.cn;westlake.edu.cn;westlake.edu.cn;westlake.edu.cn", "position": "PhD student;PhD student;PhD student;PhD student;PhD student;PhD student;Chair Professor", "bibtex": "@inproceedings{\nhu2024protgo,\ntitle={Prot{GO}: Function-Guided Protein Modeling for Unified Representation Learning},\nauthor={Bozhen Hu and Cheng Tan and Yongjie Xu and Zhangyang Gao and Jun Xia and Lirong Wu and Stan Z. Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=0oUutV92YF}\n}", "github": "", "reviewers": "qMp2;oeK7;kUCf", "pdf_size": 1159240, "rating": "5;6;6", "confidence": "4;3;4", "soundness": "3;2;3", "novelty": "2;2;3", "presentation": "2;3;3", "wc_summary": "29;54;44", "wc_strengths": "14;46;43", "wc_weaknesses": "108;98;52", "wc_questions": "2;30;115", "wc_limitations": "1;1;6", "wc_review": "154;229;260", "wc_reply_reviewers": "37;27;12", "wc_reply_authors": "154;158;111", "reply_reviewers": "1;1;1", "reply_authors": "4;4;3", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 42.333333333333336, 10.274023338281628 ], "wc_strengths_avg": [ 34.333333333333336, 14.429907214608907 ], "wc_weaknesses_avg": [ 86.0, 24.385788210895843 ], "wc_questions_avg": [ 49.0, 48.0485865210067 ], "wc_limitations_avg": [ 2.6666666666666665, 2.3570226039551585 ], "wc_review_avg": [ 214.33333333333334, 44.4996878890428 ], "wc_reply_reviewers_avg": [ 25.333333333333332, 10.274023338281626 ], "wc_reply_authors_avg": [ 141.0, 21.275964529643932 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:NnO9vmyDsdMJ:scholar.google.com/&scioq=ProtGO:+Function-Guided+Protein+Modeling+for+Unified+Representation+Learning&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "westlake.edu.cn;westlake.edu.cn;westlake.edu.cn;westlake.edu.cn;westlake.edu.cn;westlake.edu.cn;westlake.edu.cn", "author_num": 7, "aff_unique_index": "0;1;0;0;0;0;0", "aff_unique_norm": "Westlake University;Zhejiang University", "aff_unique_dep": ";", "aff_unique_url": "https://www.westlake.edu.cn;http://www.zju.edu.cn", "aff_unique_abbr": "WU;ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Confidence Regulation Neurons in Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96903", "id": "0og7nmvDbe", "proceeding": "", "pdf": "https://openreview.net/pdf?id=0og7nmvDbe", "openreview": "https://openreview.net/forum?id=0og7nmvDbe", "poster": "", "project": "", "author_site": "Alessandro Stolfo, Ben Wu, Wes Gurnee, Yonatan Belinkov, Xingyi Song, Mrinmaya Sachan, Neel Nanda", "tldr": "", "abstract": "Despite their widespread use, the mechanisms by which large language models (LLMs) represent and regulate uncertainty in next-token predictions remain largely unexplored. This study investigates two critical components believed to influence this uncertainty: the recently discovered entropy neurons and a new set of components that we term token frequency neurons. Entropy neurons are characterized by an unusually high weight norm and influence the final layer normalization (LayerNorm) scale to effectively scale down the logits. Our work shows that entropy neurons operate by writing onto an \\textit{unembedding null space}, allowing them to impact the residual stream norm with minimal direct effect on the logits themselves. We observe the presence of entropy neurons across a range of models, up to 7 billion parameters. On the other hand, token frequency neurons, which we discover and describe here for the first time, boost or suppress each token\u2019s logit proportionally to its log frequency, thereby shifting the output distribution towards or away from the unigram distribution. Finally, we present a detailed case study where entropy neurons actively manage confidence: the setting of induction, i.e. detecting and continuing repeated subsequences.", "keywords": "LLMs;Interpretability;Mechanistic Interpretability", "primary_area": "interpretability_and_explainability", "supplementary_material": "/attachment/7328ee71ed130222914c2736f3bde915ee940b8e.zip", "author": "Alessandro Stolfo;Ben Peng Wu;Wes Gurnee;Yonatan Belinkov;Xingyi Song;Mrinmaya Sachan;Neel Nanda", "authorids": "~Alessandro_Stolfo1;~Ben_Peng_Wu1;~Wes_Gurnee1;~Yonatan_Belinkov1;~Xingyi_Song1;~Mrinmaya_Sachan3;~Neel_Nanda1", "gender": "M;;;M;M;M;M", "homepage": "https://alestolfo.github.io;;https://www.wesg.me/;https://www.belinkov.com;https://www.sheffield.ac.uk/dcs/people/academic/xingyi-song;https://neelnanda.io;https://sites.google.com/site/mrinsachan/", "dblp": "329/3838;168/2621;;136/8705;185/5566;285/6389;86/10440.html", "google_scholar": "Fx50TZQAAAAJ;R7PZv1kAAAAJ;5sxXSfwAAAAJ;https://scholar.google.com/citations?authorid=K-6ujU4AAAAJ;7seaj48AAAAJ;https://scholar.google.com/citations?hl=en;Tpp9ZjoAAAAJ", "orcid": ";0009-0002-0918-526X;;;0000-0002-4188-6974;;", "linkedin": "alessandrostolfo/;;;;xingyi-song-230257b4/;https://linkedin.com/in/neel-nanda-993580151;", "or_profile": "~Alessandro_Stolfo1;~Ben_Peng_Wu1;~Wes_Gurnee1;~Yonatan_Belinkov1;~Xingyi_Song1;~Neel_Nanda1;~MRINMAYA_SACHAN2", "aff": "ETHZ - ETH Zurich;University of Sheffield;Massachusetts Institute of Technology;Technion, Technion;University of Sheffield;Google DeepMind;Swiss Federal Institute of Technology", "aff_domain": "ethz.ch;shef.ac.uk;mit.edu;technion.ac.il;sheffield.ac.uk;deepmind.com;ethz.ch", "position": "PhD student;PhD student;PhD student;Assistant Professor;Lecturer;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nstolfo2024confidence,\ntitle={Confidence Regulation Neurons in Language Models},\nauthor={Alessandro Stolfo and Ben Peng Wu and Wes Gurnee and Yonatan Belinkov and Xingyi Song and Mrinmaya Sachan and Neel Nanda},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=0og7nmvDbe}\n}", "github": "", "reviewers": "Thsu;BDF8;C4Wm;S9nW", "pdf_size": 3763600, "rating": "5;7;7;9", "confidence": "4;3;3;5", "soundness": "3;3;3;4", "novelty": "2;3;2;4", "presentation": "3;3;3;4", "wc_summary": "79;46;216;90", "wc_strengths": "66;76;97;110", "wc_weaknesses": "345;114;99;66", "wc_questions": "53;135;227;86", "wc_limitations": "5;29;5;11", "wc_review": "548;400;644;363", "wc_reply_reviewers": "34;45;25;29", "wc_reply_authors": "26;8;574;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;1", "rating_avg": [ 7.0, 1.4142135623730951 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 107.75, 64.56150168637653 ], "wc_strengths_avg": [ 87.25, 17.25362280797862 ], "wc_weaknesses_avg": [ 156.0, 110.49208116421738 ], "wc_questions_avg": [ 125.25, 65.59106265338289 ], "wc_limitations_avg": [ 12.5, 9.836157786453 ], "wc_review_avg": [ 488.75, 113.25055187503503 ], "wc_reply_reviewers_avg": [ 33.25, 7.495832175282475 ], "wc_reply_authors_avg": [ 152.0, 243.82370680473218 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.4264014327112209, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14943689658098186478&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "ethz.ch;shef.ac.uk;mit.edu;technion.ac.il;sheffield.ac.uk;deepmind.com;ethz.ch", "author_num": 7, "aff_unique_index": "0;1;2;3;1;4;5", "aff_unique_norm": "ETH Zurich;University of Sheffield;Massachusetts Institute of Technology;Technion - Israel Institute of Technology;Google;Swiss Federal Institute of Technology", "aff_unique_dep": ";;;;Google DeepMind;", "aff_unique_url": "https://www.ethz.ch;https://www.sheffield.ac.uk;https://web.mit.edu;https://www.technion.ac.il/en/;https://deepmind.com;https://www.ethz.ch", "aff_unique_abbr": "ETHZ;Sheffield;MIT;Technion;DeepMind;ETH Zurich", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;3;1;1;0", "aff_country_unique": "Switzerland;United Kingdom;United States;Israel" }, { "title": "Accelerating Matroid Optimization through Fast Imprecise Oracles", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96902", "id": "0qb8KoPsej", "proceeding": "", "pdf": "https://openreview.net/pdf?id=0qb8KoPsej", "openreview": "https://openreview.net/forum?id=0qb8KoPsej", "poster": "", "project": "", "author_site": "Franziska Eberle, Felix Hommelsheim, Alexander Lindermayr, Zhenwei Liu, Nicole Megow, Jens Schl\u00f6ter", "tldr": "", "abstract": "Querying complex models for precise information (e.g. traffic models, database systems, large ML models) often entails intense computations and results in long response times. Thus, weaker models which give imprecise results quickly can be advantageous, provided inaccuracies can be resolved using few queries to a stronger model. In the fundamental problem of computing a maximum-weight basis of a matroid, a well-known generalization of many combinatorial optimization problems, algorithms have access to a clean oracle to query matroid information. We additionally equip algorithms with a fast but dirty oracle. We design and analyze practical algorithms which only use few clean queries w.r.t. the quality of the dirty oracle, while maintaining robustness against arbitrarily poor dirty oracles, approaching the performance of classic algorithms for the given problem. Notably, we prove that our algorithms are, in many respects, best-possible. Further, we outline extensions to other matroid oracle types, non-free dirty oracles and other matroid problems.", "keywords": "matroid optimization;weak-strong models;learning-augmented algorithms;algorithms with predictions;query minimization;robustness", "primary_area": "optimization", "supplementary_material": "", "author": "Franziska Eberle;Felix Hommelsheim;Alexander Lindermayr;Zhenwei Liu;Nicole Megow;Jens Schl\u00f6ter", "authorids": "~Franziska_Eberle1;~Felix_Hommelsheim1;~Alexander_Lindermayr1;~Zhenwei_Liu1;~Nicole_Megow1;~Jens_Schl\u00f6ter1", "gender": "F;M;M;M;F;M", "homepage": "https://www.uni-bremen.de/en/cslog/team/franziska-eberle;https://www.uni-bremen.de/en/cslog/team/felix-hommelsheim;https://www.uni-bremen.de/en/cslog/team/alexander-lindermayr;https://www.uni-bremen.de/en/cslog/team/zhenwei-liu;https://www.uni-bremen.de/en/cslog/nmegow;https://www.uni-bremen.de/en/cslog/team/jens-schloeter", "dblp": "230/4560;220/3462.html;269/9583;;40/6185.html;270/3491", "google_scholar": ";;;;https://scholar.google.de/citations?user=wLEuDHIAAAAJ;https://scholar.google.de/citations?user=QlyMw7AAAAAJ", "orcid": "0000-0001-8636-9711;0000-0003-4444-9793;0000-0001-6714-5034;;0000-0002-3531-7644;0000-0003-0555-4806", "linkedin": ";;;;;", "or_profile": "~Franziska_Eberle1;~Felix_Hommelsheim1;~Alexander_Lindermayr1;~Zhenwei_Liu1;~Nicole_Megow1;~Jens_Schl\u00f6ter1", "aff": "Technische Universit\u00e4t Berlin;Universit\u00e4t Bremen;Universit\u00e4t Bremen;;Universit\u00e4t Bremen;Universit\u00e4t Bremen", "aff_domain": "tu-berlin.de;uni-bremen.de;uni-bremen.de;;uni-bremen.de;uni-bremen.de", "position": "Postdoc;Postdoc;PhD student;;Full Professor;Postdoc", "bibtex": "@inproceedings{\neberle2024accelerating,\ntitle={Accelerating Matroid Optimization through Fast Imprecise Oracles},\nauthor={Franziska Eberle and Felix Hommelsheim and Alexander Lindermayr and Zhenwei Liu and Nicole Megow and Jens Schl{\\\"o}ter},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=0qb8KoPsej}\n}", "github": "", "reviewers": "2QVw;rC5n;AWmg;KsE6", "pdf_size": 253460, "rating": "4;5;6;7", "confidence": "4;1;3;3", "soundness": "3;3;4;3", "novelty": "3;3;3;3", "presentation": "2;1;4;3", "wc_summary": "126;83;299;193", "wc_strengths": "81;73;37;68", "wc_weaknesses": "145;98;178;112", "wc_questions": "103;46;18;50", "wc_limitations": "1;19;1;10", "wc_review": "456;319;533;433", "wc_reply_reviewers": "257;0;11;41", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 2.75, 1.0897247358851685 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 175.25, 81.49348133439877 ], "wc_strengths_avg": [ 64.75, 16.67895380412093 ], "wc_weaknesses_avg": [ 133.25, 30.96267914764483 ], "wc_questions_avg": [ 54.25, 30.727634142575962 ], "wc_limitations_avg": [ 7.75, 7.46240577829965 ], "wc_review_avg": [ 435.25, 76.65629458824631 ], "wc_reply_reviewers_avg": [ 77.25, 104.85793961355526 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.1025978352085154, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18435006659273361660&as_sdt=8005&sciodt=0,7&hl=en", "gs_version_total": 6, "email": "tu-berlin.de;uni-bremen.de;uni-bremen.de;;uni-bremen.de;uni-bremen.de", "author_num": 6, "aff_unique_index": "0;1;1;1;1", "aff_unique_norm": "Technische Universit\u00e4t Berlin;University of Bremen", "aff_unique_dep": ";", "aff_unique_url": "https://www.tu-berlin.de;https://www.uni-bremen.de", "aff_unique_abbr": "TU Berlin;Uni Bremen", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Germany" }, { "title": "Score-Optimal Diffusion Schedules", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96901", "id": "0rl5vWOzRU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=0rl5vWOzRU", "openreview": "https://openreview.net/forum?id=0rl5vWOzRU", "poster": "", "project": "", "author_site": "Christopher Williams, Andrew Campbell, Arnaud Doucet, Saifuddin Syed", "tldr": "", "abstract": "Denoising diffusion models (DDMs) offer a flexible framework for sampling from high dimensional data distributions. DDMs generate a path of probability distributions interpolating between a reference Gaussian distribution and a data distribution by incrementally injecting noise into the data. To numerically simulate the sampling process, a discretisation schedule from the reference back towards clean data must be chosen. An appropriate discretisation schedule is crucial to obtain high quality samples. However, beyond hand crafted heuristics, a general method for choosing this schedule remains elusive. This paper presents a novel algorithm for adaptively selecting an optimal discretisation schedule with respect to a cost that we derive. Our cost measures the work done by the simulation procedure to transport samples from one point in the diffusion path to the next. Our method does not require hyperparameter tuning and adapts to the dynamics and geometry of the diffusion path. Our algorithm only involves the evaluation of the estimated Stein score, making it scalable to existing pre-trained models at inference time and online during training. We find that our learned schedule recovers performant schedules previously only discovered through manual search and obtains competitive FID scores on image datasets.", "keywords": "Diffusion model;adaptive schedule;Annealed Langevin", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/4d19c10f7b700023ae5754a4994a8451641766f1.zip", "author": "Christopher Williams;Andrew Campbell;Arnaud Doucet;Saifuddin Syed", "authorids": "~Christopher_Williams4;~Andrew_Campbell4;~Arnaud_Doucet2;~Saifuddin_Syed1", "gender": ";;;M", "homepage": ";;https://www.stats.ox.ac.uk/~doucet/;", "dblp": ";93/3398;68/1628;", "google_scholar": ";;W4SZGV8AAAAJ;", "orcid": ";0000-0003-2086-0238;0000-0002-7662-419X; 0000-0002-8499-8255", "linkedin": "chris-w-387b53198/;;;", "or_profile": "~Christopher_Williams4;~Andrew_Campbell4;~Arnaud_Doucet2;~Saifuddin_Syed1", "aff": "University of Oxford;University of Oxford;University of Oxford;University of Oxford", "aff_domain": "ox.ac.uk;ox.ac.uk;ox.ac.uk;oxford.ac.uk", "position": "PhD student;PhD student;Full Professor;Postdoc", "bibtex": "@inproceedings{\nwilliams2024scoreoptimal,\ntitle={Score-Optimal Diffusion Schedules},\nauthor={Christopher Williams and Andrew Campbell and Arnaud Doucet and Saifuddin Syed},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=0rl5vWOzRU}\n}", "github": "", "reviewers": "8ZVm;KRNd;g4tL;hc4h", "pdf_size": 729836, "rating": "4;6;7;7", "confidence": "3;2;4;3", "soundness": "2;3;4;3", "novelty": "2;3;4;3", "presentation": "1;3;4;3", "wc_summary": "69;35;48;146", "wc_strengths": "48;34;91;80", "wc_weaknesses": "202;58;201;17", "wc_questions": "170;52;3;34", "wc_limitations": "3;5;3;1", "wc_review": "492;184;346;278", "wc_reply_reviewers": "103;18;26;7", "wc_reply_authors": "251;55;17;13", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 74.5, 43.02615483633182 ], "wc_strengths_avg": [ 63.25, 23.12331074911203 ], "wc_weaknesses_avg": [ 119.5, 83.27214420200791 ], "wc_questions_avg": [ 64.75, 63.243082625691166 ], "wc_limitations_avg": [ 3.0, 1.4142135623730951 ], "wc_review_avg": [ 325.0, 112.27199116431488 ], "wc_reply_reviewers_avg": [ 38.5, 37.84507894033252 ], "wc_reply_authors_avg": [ 84.0, 97.8008179924892 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.28867513459481287, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:CGy3UmqHLNwJ:scholar.google.com/&scioq=Score-Optimal+Diffusion+Schedules&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "ox.ac.uk;ox.ac.uk;ox.ac.uk;oxford.ac.uk", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Oxford", "aff_unique_dep": "", "aff_unique_url": "https://www.ox.ac.uk", "aff_unique_abbr": "Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "3D Focusing-and-Matching Network for Multi-Instance Point Cloud Registration", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96900", "id": "0sJBW05a2W", "proceeding": "", "pdf": "https://openreview.net/pdf?id=0sJBW05a2W", "openreview": "https://openreview.net/forum?id=0sJBW05a2W", "poster": "/media/PosterPDFs/NeurIPS%202024/96900.png?t=1731333373.8074756", "project": "", "author_site": "Liyuan Zhang, Le Hui, qi liu, Bo Li, Yuchao Dai", "tldr": "", "abstract": "Multi-instance point cloud registration aims to estimate the pose of all instances of a model point cloud in the whole scene. Existing methods all adopt the strategy of first obtaining the global correspondence and then clustering to obtain the pose of each instance. However, due to the cluttered and occluded objects in the scene, it is difficult to obtain an accurate correspondence between the model point cloud and all instances in the scene. To this end, we propose a simple yet powerful 3D focusing-and-matching network for multi-instance point cloud registration by learning the multiple pair-wise point cloud registration. Specifically, we first present a 3D multi-object focusing module to locate the center of each object and generate object proposals. By using self-attention and cross-attention to associate the model point cloud with structurally similar objects, we can locate potential matching instances by regressing object centers. Then, we propose a 3D dual-masking instance matching module to estimate the pose between the model point cloud and each object proposal. It performs instance mask and overlap mask masks to accurately predict the pair-wise correspondence. Extensive experiments on two public benchmarks, Scan2CAD and ROBI, show that our method achieves a new state-of-the-art performance on the multi-instance point cloud registration task.", "keywords": "Point Cloud Registration;Focus and Matching", "primary_area": "machine_vision", "supplementary_material": "", "author": "Liyuan Zhang;Le Hui;qi liu;Bo Li;Yuchao Dai", "authorids": "~Liyuan_Zhang3;~Le_Hui1;~qi_liu18;~Bo_Li35;~Yuchao_Dai1", "gender": "M;M;M;M;M", "homepage": "http://npu-cvr.cn/people/;https://fpthink.github.io/;;;http://npu-cvr.cn/", "dblp": ";211/6859;95/2446-54;50/3402-90;65/7804", "google_scholar": ";;;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.tw/citations?user=fddAbqsAAAAJ", "orcid": ";0000-0003-0851-6805;0000-0002-4974-1518;;0000-0002-4432-7406", "linkedin": ";;;;", "or_profile": "~Liyuan_Zhang3;~Le_Hui1;~qi_liu18;~Bo_Li35;~Yuchao_Dai1", "aff": "Northwest Polytechnical University Xi'an;Northwest Polytechnical University Xi'an;Northwest Polytechnical University Xi'an;Northwestern Polytechnical University Xi'an;Northwestern Polytechnical University", "aff_domain": "nwpu.edu.cn;nwpu.edu.cn;nwpu.edu.cn;nwpu.edu.cn;nwpu.edu.cn", "position": "MS student;Associate Professor;Researcher;Associate Professor;Professor", "bibtex": "@inproceedings{\nzhang2024d,\ntitle={3D Focusing-and-Matching Network for Multi-Instance Point Cloud Registration},\nauthor={Liyuan Zhang and Le Hui and qi liu and Bo Li and Yuchao Dai},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=0sJBW05a2W}\n}", "github": "", "reviewers": "RCTj;uLBB;8vwC", "pdf_size": 0, "rating": "5;6;6", "confidence": "5;4;4", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "3;4;3", "wc_summary": "115;61;85", "wc_strengths": "62;99;64", "wc_weaknesses": "132;162;164", "wc_questions": "16;5;4", "wc_limitations": "11;38;10", "wc_review": "336;365;327", "wc_reply_reviewers": "0;57;40", "wc_reply_authors": "99;0;0", "reply_reviewers": "0;1;1", "reply_authors": "2;1;1", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 87.0, 22.090722034374522 ], "wc_strengths_avg": [ 75.0, 16.990193249832878 ], "wc_weaknesses_avg": [ 152.66666666666666, 14.636332266733433 ], "wc_questions_avg": [ 8.333333333333334, 5.436502143433364 ], "wc_limitations_avg": [ 19.666666666666668, 12.970050972229146 ], "wc_review_avg": [ 342.6666666666667, 16.21384867602041 ], "wc_reply_reviewers_avg": [ 32.333333333333336, 23.893281249943232 ], "wc_reply_authors_avg": [ 33.0, 46.66904755831214 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.9999999999999998, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7910146448608541180&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "nwpu.edu.cn;nwpu.edu.cn;nwpu.edu.cn;nwpu.edu.cn;nwpu.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;1;1", "aff_unique_norm": "Northwest Polytechnical University;Northwestern Polytechnical University", "aff_unique_dep": ";", "aff_unique_url": "http://www.nwpu.edu.cn;http://www.nwpu.edu.cn", "aff_unique_abbr": "NWPU;NWPU", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Xi'an;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "4D Gaussian Splatting in the Wild with Uncertainty-Aware Regularization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96899", "id": "0sycTGl4In", "proceeding": "", "pdf": "https://openreview.net/pdf?id=0sycTGl4In", "openreview": "https://openreview.net/forum?id=0sycTGl4In", "poster": "/media/PosterPDFs/NeurIPS%202024/96899.png?t=1733712090.5881364", "project": "", "author_site": "Mijeong Kim, Jongwoo Lim, Bohyung Han", "tldr": "", "abstract": "Novel view synthesis of dynamic scenes is becoming important in various applications, including augmented and virtual reality.\nWe propose a novel 4D Gaussian Splatting (4DGS) algorithm for dynamic scenes from casually recorded monocular videos. \nTo overcome the overfitting problem of existing work for these real-world videos, we introduce an uncertainty-aware regularization that identifies uncertain regions with few observations and selectively imposes additional priors based on diffusion models and depth smoothness on such regions.\nThis approach improves both the performance of novel view synthesis and the quality of training image reconstruction. \nWe also identify the initialization problem of 4DGS in fast-moving dynamic regions, where the Structure from Motion (SfM) algorithm fails to provide reliable 3D landmarks. \nTo initialize Gaussian primitives in such regions, we present a dynamic region densification method using the estimated depth maps and scene flow. \nOur experiments show that the proposed method improves the performance of 4DGS reconstruction from a video captured by a handheld monocular camera and also exhibits promising results in few-shot static scene reconstruction.", "keywords": "Dynamic Novel View Synthesis;4D Gaussian splatting;Sparse reconstruction;in-the-wild", "primary_area": "machine_vision", "supplementary_material": "", "author": "Mijeong Kim;Jongwoo Lim;Bohyung Han", "authorids": "~Mijeong_Kim1;~Jongwoo_Lim3;~Bohyung_Han1", "gender": ";;Not Specified", "homepage": ";;http://cvlab.snu.ac.kr/~bhhan", "dblp": ";;73/4880.html", "google_scholar": ";;9aaeCToAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Mijeong_Kim1;~Jongwoo_Lim3;~Bohyung_Han1", "aff": ";;Seoul National University", "aff_domain": ";;snu.ac.kr", "position": ";;Full Professor", "bibtex": "@inproceedings{\nkim2024d,\ntitle={4D Gaussian Splatting in the Wild with Uncertainty-Aware Regularization},\nauthor={Mijeong Kim and Jongwoo Lim and Bohyung Han},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=0sycTGl4In}\n}", "github": "", "reviewers": "fqxz;6fVK;LdSG", "pdf_size": 17119062, "rating": "3;6;7", "confidence": "5;4;5", "soundness": "2;3;3", "novelty": "2;2;3", "presentation": "2;3;4", "wc_summary": "102;82;39", "wc_strengths": "47;23;45", "wc_weaknesses": "190;59;140", "wc_questions": "34;12;87", "wc_limitations": "25;22;10", "wc_review": "398;198;321", "wc_reply_reviewers": "54;69;110", "wc_reply_authors": "538;436;276", "reply_reviewers": "1;2;2", "reply_authors": "3;3;3", "rating_avg": [ 5.333333333333333, 1.699673171197595 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 74.33333333333333, 26.284765338288427 ], "wc_strengths_avg": [ 38.333333333333336, 10.873004286866726 ], "wc_weaknesses_avg": [ 129.66666666666666, 53.977361509762176 ], "wc_questions_avg": [ 44.333333333333336, 31.47838764754143 ], "wc_limitations_avg": [ 19.0, 6.48074069840786 ], "wc_review_avg": [ 305.6666666666667, 82.36639012499104 ], "wc_reply_reviewers_avg": [ 77.66666666666667, 23.66901396810231 ], "wc_reply_authors_avg": [ 416.6666666666667, 107.83114371810936 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.4714045207910317 ], "reply_authors_avg": [ 3.0, 0.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.2773500981126146, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14511111074366466759&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": ";;snu.ac.kr", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "Seoul National University", "aff_unique_dep": "", "aff_unique_url": "https://www.snu.ac.kr", "aff_unique_abbr": "SNU", "aff_country_unique_index": "0", "aff_country_unique": "South Korea" }, { "title": "Maximizing utility in multi-agent environments by anticipating the behavior of other learners", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96898", "id": "0uGlKYS7a2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=0uGlKYS7a2", "openreview": "https://openreview.net/forum?id=0uGlKYS7a2", "poster": "/media/PosterPDFs/NeurIPS%202024/96898.png?t=1731753379.9098587", "project": "", "author_site": "Angelos Assos, Yuval Dagan, Constantinos Daskalakis", "tldr": "", "abstract": "Learning algorithms are often used to make decisions in sequential decision-making environments. In multi-agent settings, the decisions of each agent can affect the utilities/losses of the other agents. Therefore, if an agent is good at anticipating the behavior of the other agents, in particular how they will make decisions in each round as a function of their experience that far, it could try to judiciously make its own decisions over the rounds of the interaction so as to influence the other agents to behave in a way that ultimately benefits its own utility. In this paper, we study repeated two-player games involving two types of agents: a learner, which employs an online learning algorithm to choose its strategy in each round; and an optimizer, which knows the learner's utility function and the learner's online learning algorithm. The optimizer wants to plan ahead to maximize its own utility, while taking into account the learner's behavior. We provide two results: a positive result for repeated zero-sum games and a negative result for repeated general-sum games. Our positive result is an algorithm for the optimizer, which exactly maximizes its utility against a learner that plays the Replicator Dynamics --- the continuous-time analogue of Multiplicative Weights Update (MWU). Additionally, we use this result to provide an algorithm for the optimizer against MWU, i.e.~for the discrete-time setting, which guarantees an average utility for the optimizer that is higher than the value of the one-shot game. Our negative result shows that, unless P=NP, there is no Fully Polynomial Time Approximation Scheme (FPTAS) for maximizing the utility of an optimizer against a learner that best-responds to the history in each round. Yet, this still leaves open the question of whether there exists a polynomial-time algorithm that optimizes the utility up to $o(T)$.", "keywords": "Game Theory; no regret; learning theory;", "primary_area": "online_learning", "supplementary_material": "", "author": "Angelos Assos;Yuval Dagan;Constantinos Costis Daskalakis", "authorids": "~Angelos_Assos1;~Yuval_Dagan1;~Constantinos_Costis_Daskalakis1", "gender": "M;M;M", "homepage": "https://www.mit.edu/people/assos/index.html;https://yuvaldagan.wordpress.com/;http://people.csail.mit.edu/costis/", "dblp": ";190/7292;", "google_scholar": ";;iTv2cOgAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Angelos_Assos1;~Yuval_Dagan1;~Constantinos_Costis_Daskalakis1", "aff": "Massachusetts Institute of Technology;;Massachusetts Institute of Technology", "aff_domain": "mit.edu;;mit.edu", "position": "MS student;;Full Professor", "bibtex": "@inproceedings{\nassos2024maximizing,\ntitle={Maximizing utility in multi-agent environments by anticipating the behavior of other learners},\nauthor={Angelos Assos and Yuval Dagan and Constantinos Costis Daskalakis},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=0uGlKYS7a2}\n}", "github": "", "reviewers": "jNhL;PJLW;dTMF;bLvs", "pdf_size": 413905, "rating": "3;6;6;7", "confidence": "4;2;4;3", "soundness": "3;4;3;3", "novelty": "2;3;2;3", "presentation": "1;3;3;3", "wc_summary": "106;178;116;163", "wc_strengths": "64;124;58;76", "wc_weaknesses": "464;154;173;240", "wc_questions": "79;237;34;105", "wc_limitations": "46;5;6;13", "wc_review": "759;698;387;597", "wc_reply_reviewers": "54;45;24;30", "wc_reply_authors": "493;152;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;1;1", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 140.75, 30.425112982534674 ], "wc_strengths_avg": [ 80.5, 25.937424698685874 ], "wc_weaknesses_avg": [ 257.75, 123.28904046994607 ], "wc_questions_avg": [ 113.75, 75.5558568212948 ], "wc_limitations_avg": [ 17.5, 16.740669042783207 ], "wc_review_avg": [ 610.25, 141.28229719253576 ], "wc_reply_reviewers_avg": [ 38.25, 11.882234638316145 ], "wc_reply_authors_avg": [ 161.25, 201.33724816834066 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.502518907629606, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4655233021168504705&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 3, "email": "mit.edu;;mit.edu", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "BAdam: A Memory Efficient Full Parameter Optimization Method for Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96897", "id": "0uXtFk5KNJ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=0uXtFk5KNJ", "openreview": "https://openreview.net/forum?id=0uXtFk5KNJ", "poster": "/media/PosterPDFs/NeurIPS%202024/96897.png?t=1733743857.9232197", "project": "", "author_site": "Qijun Luo, Hengxu Yu, Xiao Li", "tldr": "", "abstract": "This work presents BAdam, an optimization method that leverages the block coordinate descent (BCD) framework with Adam's update rule. BAdam offers a memory efficient approach to the full parameter finetuning of large language models. We conduct \n a theoretical convergence analysis for BAdam in the deterministic case. Experimentally, we apply BAdam to finetune the Llama 3-8B and Llama 3-70B models using a single RTX3090-24GB GPU and 4 A100-80GB GPUs, respectively. The results confirm BAdam's efficiency in terms of memory usage, running time, and optimization capability. Furthermore, the downstream performance evaluation based on MT-bench and math benchmarks shows that BAdam outperforms existing memory efficient baselines such as LoRA. It also demonstrates that BAdam can achieve comparable or even superior performance compared to Adam. Finally, the ablation study using SGD's update rule illustrates the suitability of BCD for finetuning LLMs. Our code can be easily integrated into any PyTorch-based codebase and is available at https://github.com/Ledzy/BAdam.", "keywords": "block coordinate descent;large language models", "primary_area": "optimization", "supplementary_material": "", "author": "Qijun Luo;Hengxu Yu;Xiao Li", "authorids": "~Qijun_Luo3;~Hengxu_Yu1;~Xiao_Li5", "gender": "M;M;M", "homepage": "https://sds.cuhk.edu.cn/node/658;https://www.xiao-li.org/;https://ledzy.github.io/", "dblp": ";66/2069-9;224/9357", "google_scholar": ";https://scholar.google.com/citations?view_op=list_works;GMAEErsAAAAJ", "orcid": ";0000-0001-5577-6963;", "linkedin": ";;", "or_profile": "~Hengxu_Yu1;~Xiao_Li5;~qijun_luo2", "aff": "The Chinese University of Hong Kong, Shenzhen;The Chinese University of Hong Kong, Shenzhen;Chinese University of Hong Kong Shen Zhen", "aff_domain": "cuhk.edu.cn;cuhk.edu.cn;link.cuhk.edu.cn", "position": "PhD student;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nluo2024badam,\ntitle={{BA}dam: A Memory Efficient Full Parameter Optimization Method for Large Language Models},\nauthor={Qijun Luo and Hengxu Yu and Xiao Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=0uXtFk5KNJ}\n}", "github": "", "reviewers": "FWuu;XVND;9PUh;JX1Q", "pdf_size": 1185751, "rating": "3;5;5;8", "confidence": "4;4;3;4", "soundness": "2;3;2;3", "novelty": "2;2;3;3", "presentation": "2;2;3;3", "wc_summary": "32;66;59;47", "wc_strengths": "22;39;81;94", "wc_weaknesses": "138;152;100;160", "wc_questions": "93;1;152;4", "wc_limitations": "1;1;6;39", "wc_review": "286;259;398;344", "wc_reply_reviewers": "0;337;14;147", "wc_reply_authors": "74;1409;23;450", "reply_reviewers": "0;4;1;2", "reply_authors": "2;4;2;4", "rating_avg": [ 5.25, 1.7853571071357126 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 51.0, 12.90348790056394 ], "wc_strengths_avg": [ 59.0, 29.487285395573462 ], "wc_weaknesses_avg": [ 137.5, 23.038012067016545 ], "wc_questions_avg": [ 62.5, 63.53148825582476 ], "wc_limitations_avg": [ 11.75, 15.864661988205107 ], "wc_review_avg": [ 321.75, 53.67669419776147 ], "wc_reply_reviewers_avg": [ 124.5, 135.43725484518652 ], "wc_reply_authors_avg": [ 489.0, 556.1703875612221 ], "reply_reviewers_avg": [ 1.75, 1.479019945774904 ], "reply_authors_avg": [ 3.0, 1.0 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.08084520834544431, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18239888342154118827&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "cuhk.edu.cn;cuhk.edu.cn;link.cuhk.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Chinese University of Hong Kong", "aff_unique_dep": "", "aff_unique_url": "https://www.cuhk.edu.cn", "aff_unique_abbr": "CUHK", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Shenzhen", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Mixture of Tokens: Continuous MoE through Cross-Example Aggregation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96896", "id": "0zFVhMBZHJ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=0zFVhMBZHJ", "openreview": "https://openreview.net/forum?id=0zFVhMBZHJ", "poster": "/media/PosterPDFs/NeurIPS%202024/96896.png?t=1730308223.324062", "project": "", "author_site": "Szymon Antoniak, Micha\u0142 Krutul, Maciej Pi\u00f3ro, Jakub Krajewski, Jan Ludziejewski, Kamil Ciebiera, Krystian Kr\u00f3l, Tomasz Odrzyg\u00f3\u017ad\u017a, Marek Cygan, Sebastian Jaszczur", "tldr": "", "abstract": "Mixture of Experts (MoE) models based on Transformer architecture are pushing the boundaries of language and vision tasks. The allure of these models lies in their ability to substantially increase the parameter count without a corresponding increase in FLOPs. Most widely adopted MoE models are discontinuous with respect to their parameters - often referred to as *sparse*. At the same time, existing continuous MoE designs either lag behind their sparse counterparts or are incompatible with autoregressive decoding. Motivated by the observation that the adaptation of fully continuous methods has been an overarching trend in Deep Learning, we develop Mixture of Tokens (MoT), a simple, continuous architecture that is capable of scaling the number of parameters similarly to sparse MoE models. Unlike conventional methods, MoT assigns mixtures of tokens from different examples to each expert. This architecture is fully compatible with autoregressive training and generation. Our best models not only achieve a 3x increase in training speed over dense Transformer models in language pretraining but also match the performance of state-of-the-art MoE architectures. Additionally, a close connection between MoT and MoE is demonstrated through a novel technique we call *transition tuning*.", "keywords": "LLM;Mixture of Experts;MoE;conditional computation;fully-differentiable;language modeling", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/bec3eccc0f6a9147e0593a524339010614cf4af4.zip", "author": "Szymon Antoniak;Micha\u0142 Krutul;Maciej Pi\u00f3ro;Jakub Krajewski;Jan Ludziejewski;Kamil Ciebiera;Krystian Kr\u00f3l;Tomasz Odrzyg\u00f3\u017ad\u017a;Marek Cygan;Sebastian Jaszczur", "authorids": "~Szymon_Antoniak1;~Micha\u0142_Krutul1;~Maciej_Pi\u00f3ro1;~Jakub_Krajewski1;~Jan_Ludziejewski1;~Kamil_Ciebiera1;~Krystian_Kr\u00f3l2;~Tomasz_Odrzyg\u00f3\u017ad\u017a1;~Marek_Cygan1;~Sebastian_Jaszczur1", "gender": "Not Specified;M;M;;M;M;M;M;;M", "homepage": ";;;;;;;;;", "dblp": ";;;;276/0228;;;;76/819;206/3302", "google_scholar": ";;;;https://scholar.google.com/citations?view_op=list_works;;;J2ERJ7cAAAAJ;df8TSy4AAAAJ;KF__0WgAAAAJ", "orcid": ";;;;0000-0002-6556-6801;;;;;0000-0003-1628-7176", "linkedin": "szymon-antoniak-022998258/;micha%C5%82-krutul-405596150/;maciej-pi%C3%B3ro-0ab6201a2/;jakub-krj;jan-ludziejewski-034959223/;kamil-ciebiera-9439a0252/;krystiankrol;tomasz-odrzygozdz/;marek-cygan-b9a316140/;sebastian-jaszczur-129866a3", "or_profile": "~Szymon_Antoniak1;~Micha\u0142_Krutul1;~Maciej_Pi\u00f3ro1;~Jakub_Krajewski1;~Jan_Ludziejewski1;~Kamil_Ciebiera1;~Krystian_Kr\u00f3l2;~Tomasz_Odrzyg\u00f3\u017ad\u017a1;~Marek_Cygan1;~Sebastian_Jaszczur1", "aff": ";IDEAS NCBR Sp.;IDEAS NCBR Sp.;University of Warsaw;University of Warsaw;IDEAS NCBR Sp.;University of Warsaw;IDEAS NCBR;Nomagic;University of Warsaw", "aff_domain": ";ideas-ncbr.pl;ideas-ncbr.pl;mimuw.edu.pl;mimuw.edu.pl;ideas-ncbr.pl;mimuw.edu.pl;ideas-ncbr.pl;nomagic.ai;uw.edu.pl", "position": ";PhD student;Researcher;PhD student;PhD student;Intern;MS student;Postdoc;Founder / CTO;PhD student", "bibtex": "@inproceedings{\nantoniak2024mixture,\ntitle={Mixture of Tokens: Continuous MoE through Cross-Example Aggregation},\nauthor={Szymon Antoniak and Micha{\\l} Krutul and Maciej Pi{\\'o}ro and Jakub Krajewski and Jan Ludziejewski and Kamil Ciebiera and Krystian Kr{\\'o}l and Tomasz Odrzyg{\\'o}{\\'z}d{\\'z} and Marek Cygan and Sebastian Jaszczur},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=0zFVhMBZHJ}\n}", "github": "", "reviewers": "ef5W;wt4H;BdV2", "pdf_size": 731398, "rating": "6;6;6", "confidence": "4;3;4", "soundness": "3;3;3", "novelty": "3;2;2", "presentation": "3;2;2", "wc_summary": "202;150;243", "wc_strengths": "113;131;69", "wc_weaknesses": "155;168;355", "wc_questions": "428;121;6", "wc_limitations": "24;15;10", "wc_review": "922;585;683", "wc_reply_reviewers": "15;42;40", "wc_reply_authors": "15;19;28", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 198.33333333333334, 38.055515004033545 ], "wc_strengths_avg": [ 104.33333333333333, 26.042699979499478 ], "wc_weaknesses_avg": [ 226.0, 91.37103844581534 ], "wc_questions_avg": [ 185.0, 178.1254239761036 ], "wc_limitations_avg": [ 16.333333333333332, 5.792715732327588 ], "wc_review_avg": [ 730.0, 141.53680322328418 ], "wc_reply_reviewers_avg": [ 32.333333333333336, 12.283683848458853 ], "wc_reply_authors_avg": [ 20.666666666666668, 5.436502143433363 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2772460272860644680&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": ";ideas-ncbr.pl;ideas-ncbr.pl;mimuw.edu.pl;mimuw.edu.pl;ideas-ncbr.pl;mimuw.edu.pl;ideas-ncbr.pl;nomagic.ai;uw.edu.pl", "author_num": 10, "aff_unique_index": "0;0;1;1;0;1;2;3;1", "aff_unique_norm": "IDEAS NCBR;University of Warsaw;Institute for Development, Economic Analysis, and Simulation (IDEAS);Nomagic", "aff_unique_dep": ";;;", "aff_unique_url": ";https://www.uw.edu.pl;https://www.ideas-ncbr.gov.pl;", "aff_unique_abbr": ";UW;IDEAS;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "Poland;" }, { "title": "Cooperate or Collapse: Emergence of Sustainable Cooperation in a Society of LLM Agents", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96895", "id": "0zWzJj6lO3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=0zWzJj6lO3", "openreview": "https://openreview.net/forum?id=0zWzJj6lO3", "poster": "", "project": "", "author_site": "Giorgio Piatti, Zhijing Jin, Max Kleiman-Weiner, Bernhard Sch\u00f6lkopf, Mrinmaya Sachan, Rada Mihalcea", "tldr": "", "abstract": "As AI systems pervade human life, ensuring that large language models (LLMs) make safe decisions remains a significant challenge. We introduce the Governance of the Commons Simulation (GovSim), a generative simulation platform designed to study strategic interactions and cooperative decision-making in LLMs. In GovSim, a society of AI agents must collectively balance exploiting a common resource with sustaining it for future use. This environment enables the study of how ethical considerations, strategic planning, and negotiation skills impact cooperative outcomes. We develop an LLM-based agent architecture and test it with the leading open and closed LLMs. We find that all but the most powerful LLM agents fail to achieve a sustainable equilibrium in GovSim, with the highest survival rate below 54%. Ablations reveal that successful multi-agent communication between agents is critical for achieving cooperation in these cases. Furthermore, our analyses show that the failure to achieve sustainable cooperation in most LLMs stems from their inability to formulate and analyze hypotheses about the long-term effects of their actions on the equilibrium of the group. Finally, we show that agents that leverage \"Universalization\"-based reasoning, a theory of moral thinking, are able to achieve significantly better sustainability. Taken together, GovSim enables us to study the mechanisms that underlie sustainable self-government with specificity and scale. We open source the full suite of our research results, including the simulation environment, agent prompts, and a comprehensive web interface.", "keywords": "cooperative AI;AI safety;LLM agents;cognitive science;language model evaluation;dynamic evaluation;alignment;agency;evolving benchmarks;multi-agent interactions", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/07830851c6a30ba62acdc95e15c2d1033174b8c3.zip", "author": "Giorgio Piatti;Zhijing Jin;Max Kleiman-Weiner;Bernhard Sch\u00f6lkopf;Mrinmaya Sachan;Rada Mihalcea", "authorids": "~Giorgio_Piatti1;~Zhijing_Jin1;~Max_Kleiman-Weiner1;~Bernhard_Sch\u00f6lkopf1;~Mrinmaya_Sachan3;~Rada_Mihalcea1", "gender": ";;Unspecified;;;F", "homepage": ";;http://www.mit.edu/~maxkw/;;;https://web.eecs.umich.edu/~mihalcea/", "dblp": ";;160/7595;;;m/RadaMihalcea", "google_scholar": ";;SACXQKYAAAAJ;;;https://scholar.google.com.tw/citations?user=UetM7FgAAAAJ", "orcid": ";;;;;0000-0002-0767-6703", "linkedin": ";;;;;", "or_profile": "~Giorgio_Piatti1;~Zhijing_Jin1;~Max_Kleiman-Weiner1;~Bernhard_Sch\u00f6lkopf1;~Mrinmaya_Sachan3;~Rada_Mihalcea1", "aff": ";;Common Sense Machines;;;University of Michigan", "aff_domain": ";;csm.ai;;;umich.edu", "position": ";;Principal Researcher;;;Full Professor", "bibtex": "@inproceedings{\npiatti2024cooperate,\ntitle={Cooperate or Collapse: Emergence of Sustainable Cooperation in a Society of {LLM} Agents},\nauthor={Giorgio Piatti and Zhijing Jin and Max Kleiman-Weiner and Bernhard Sch{\\\"o}lkopf and Mrinmaya Sachan and Rada Mihalcea},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=0zWzJj6lO3}\n}", "github": "", "reviewers": "KPE1;dG5Q;9ZQN;tYtP", "pdf_size": 1283502, "rating": "4;6;6;7", "confidence": "5;1;4;4", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "4;3;3;3", "wc_summary": "102;76;87;65", "wc_strengths": "48;36;109;79", "wc_weaknesses": "322;27;97;83", "wc_questions": "241;1;96;24", "wc_limitations": "7;1;40;3", "wc_review": "720;141;429;254", "wc_reply_reviewers": "915;0;0;12", "wc_reply_authors": "3241;72;48;0", "reply_reviewers": "2;0;0;1", "reply_authors": "8;2;2;1", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.5, 1.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 82.5, 13.683932183404009 ], "wc_strengths_avg": [ 68.0, 28.398943642325854 ], "wc_weaknesses_avg": [ 132.25, 112.63963556404113 ], "wc_questions_avg": [ 90.5, 93.6923155867118 ], "wc_limitations_avg": [ 12.75, 15.880412463157246 ], "wc_review_avg": [ 386.0, 218.43420061885914 ], "wc_reply_reviewers_avg": [ 231.75, 394.50499046273166 ], "wc_reply_authors_avg": [ 840.25, 1386.316048922467 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 3.25, 2.7726341266023544 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.3823595564509363, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15034700116460699732&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": ";;csm.ai;;;umich.edu", "author_num": 6, "aff_unique_index": "0;1", "aff_unique_norm": "Common Sense Machines;University of Michigan", "aff_unique_dep": ";", "aff_unique_url": ";https://www.umich.edu", "aff_unique_abbr": ";UM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "AdaNovo: Towards Robust \\emph{De Novo} Peptide Sequencing in Proteomics against Data Biases", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96894", "id": "0zfUiSX5si", "proceeding": "", "pdf": "https://openreview.net/pdf?id=0zfUiSX5si", "openreview": "https://openreview.net/forum?id=0zfUiSX5si", "poster": "", "project": "", "author_site": "Jun Xia, Shaorong Chen, Jingbo Zhou, Shan Xiaojun, Wenjie Du, Zhangyang Gao, Cheng Tan, Bozhen Hu, Jiangbin Zheng, Stan Z. Li", "tldr": "", "abstract": "Tandem mass spectrometry has played a pivotal role in advancing proteomics, enabling the high-throughput analysis of protein composition in biological tissues. Despite the development of several deep learning methods for predicting amino acid sequences (peptides) responsible for generating the observed mass spectra, training data biases hinder further advancements of \\emph{de novo} peptide sequencing. Firstly, prior methods struggle to identify amino acids with Post-Translational Modifications (PTMs) due to their lower frequency in training data compared to canonical amino acids, further resulting in unsatisfactory peptide sequencing performance. Secondly, various noise and missing peaks in mass spectra reduce the reliability of training data (Peptide-Spectrum Matches, PSMs). To address these challenges, we propose AdaNovo, a novel and domain knowledge-inspired framework that calculates Conditional Mutual Information (CMI) between the mass spectra and amino acids or peptides, using CMI for robust training against above biases. Extensive experiments indicate that AdaNovo outperforms previous competitors on the widely-used 9-species benchmark, meanwhile yielding 3.6\\% - 9.4\\% improvements in PTMs identification. The supplements contain the code.", "keywords": "De Novo Peptide sequencing;Proteomics;Mass Spectrum", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "/attachment/546ef633b92536c9833d88f25b8467179aaa74a1.zip", "author": "Jun Xia;Shaorong Chen;Jingbo Zhou;Xiaojun Shan;Wenjie Du;Zhangyang Gao;Cheng Tan;Bozhen Hu;Jiangbin Zheng;Stan Z. Li", "authorids": "~Jun_Xia1;~Shaorong_Chen1;~Jingbo_Zhou2;~Xiaojun_Shan1;~Wenjie_Du2;~Zhangyang_Gao1;~Cheng_Tan1;~Bozhen_Hu1;~Jiangbin_Zheng3;~Stan_Z._Li2", "gender": "M;;M;M;M;M;M;M;M;M", "homepage": "http://junxia97.github.io/;;https://jingbo02.github.io/;https://sxj1215.github.io/shanxiaojun.github.io/;https://invokerqwer.github.io/duwenjie.github.io/;;https://chengtan9907.github.io/;;;https://en.westlake.edu.cn/academics/School_of_Engineering/About/Our_People/Faculty/201912/t20191206_2497.shtml", "dblp": ";284/3994;;127/8709;;275/3266;70/1533-12.html;279/8665;;l/StanZLi", "google_scholar": "aPKKpSYAAAAJ;;;;https://scholar.google.com/citations?hl=zh-CN;4SclT-QAAAAJ;6kTV6aMAAAAJ;https://scholar.google.com/citations?hl=zh-CN;;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";0000-0002-7185-8616;0009-0005-4221-2911;;0000-0002-3517-7354;0000-0003-1026-6083;;0000-0002-3428-0114;0000-0003-3305-0103;", "linkedin": ";;;;;;;;;stan-z-li-%E6%9D%8E%E5%AD%90%E9%9D%92-55753224/", "or_profile": "~Jun_Xia1;~Shaorong_Chen1;~Jingbo_Zhou2;~Xiaojun_Shan1;~Wenjie_Du2;~Zhangyang_Gao1;~Cheng_Tan1;~Bozhen_Hu1;~Jiangbin_Zheng3;~Stan_Z._Li1", "aff": "Westlake University, China;Westlake University;Jilin University;University of Electronic Science and Technology of China;University of Science and Technology of China;Westlake University, China;Zhejiang University & Westlake University;Westlake University;Westlake University;Westlake University", "aff_domain": "westlake.edu.cn;westlake.edu.cn;jlu.edu.cn;uestc.edu.cn;ustc.edu.cn;westlake.edu.cn;westlake.edu.cn;westlake.edu.cn;westlake.edu.cn;westlake.edu.cn", "position": "PhD student;PhD student;Undergrad student;Undergrad student;Assistant Professor;PhD student;PhD student;PhD student;PhD student;Chair Professor", "bibtex": "@inproceedings{\nxia2024adanovo,\ntitle={AdaNovo: Towards Robust {\\textbackslash}emph\\{De Novo\\} Peptide Sequencing in Proteomics against Data Biases},\nauthor={Jun Xia and Shaorong Chen and Jingbo Zhou and Xiaojun Shan and Wenjie Du and Zhangyang Gao and Cheng Tan and Bozhen Hu and Jiangbin Zheng and Stan Z. Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=0zfUiSX5si}\n}", "github": "", "reviewers": "XeYm;XCqV;DL85;xGNa;sJoV;xCSd", "pdf_size": 564511, "rating": "3;4;5;6;7;7", "confidence": "3;3;4;3;4;1", "soundness": "2;3;2;3;3;3", "novelty": "2;2;2;3;3;3", "presentation": "1;3;3;3;3;3", "wc_summary": "103;66;91;49;100;121", "wc_strengths": "31;36;60;33;90;72", "wc_weaknesses": "110;63;301;58;73;24", "wc_questions": "2;89;111;81;2;53", "wc_limitations": "2;9;25;10;10;14", "wc_review": "248;263;588;231;275;284", "wc_reply_reviewers": "103;0;356;18;13;12", "wc_reply_authors": "401;202;1888;61;41;355", "reply_reviewers": "1;0;2;1;1;1", "reply_authors": "5;3;6;2;2;4", "rating_avg": [ 5.333333333333333, 1.4907119849998596 ], "confidence_avg": [ 3.0, 1.0 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.6666666666666665, 0.7453559924999298 ], "wc_summary_avg": [ 88.33333333333333, 24.05318181770461 ], "wc_strengths_avg": [ 53.666666666666664, 22.17105219775452 ], "wc_weaknesses_avg": [ 104.83333333333333, 91.28602789523099 ], "wc_questions_avg": [ 56.333333333333336, 41.99073971987422 ], "wc_limitations_avg": [ 11.666666666666666, 6.944222218666553 ], "wc_review_avg": [ 314.8333333333333, 123.38208495923908 ], "wc_reply_reviewers_avg": [ 83.66666666666667, 126.47880806241372 ], "wc_reply_authors_avg": [ 491.3333333333333, 638.895574844243 ], "reply_reviewers_avg": [ 1.0, 0.5773502691896257 ], "reply_authors_avg": [ 3.6666666666666665, 1.4907119849998596 ], "replies_avg": [ 36, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.22360679774997896, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:P65fzQUCaO8J:scholar.google.com/&scioq=AdaNovo:+Towards+Robust+%5Cemph%7BDe+Novo%7D+Peptide+Sequencing+in+Proteomics+against+Data+Biases&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "westlake.edu.cn;westlake.edu.cn;jlu.edu.cn;uestc.edu.cn;ustc.edu.cn;westlake.edu.cn;westlake.edu.cn;westlake.edu.cn;westlake.edu.cn;westlake.edu.cn", "author_num": 10, "aff_unique_index": "0;0;1;2;3;0;4;0;0;0", "aff_unique_norm": "Westlake University;Jilin University;University of Electronic Science and Technology of China;University of Science and Technology of China;Zhejiang University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.westlake.edu.cn;http://www.jlu.edu.cn;https://www.uestc.edu.cn;http://www.ustc.edu.cn;http://www.zju.edu.cn", "aff_unique_abbr": "WU;JLU;UESTC;USTC;ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "SegVol: Universal and Interactive Volumetric Medical Image Segmentation", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96893", "id": "105ZuvpdyW", "proceeding": "", "pdf": "https://openreview.net/pdf?id=105ZuvpdyW", "openreview": "https://openreview.net/forum?id=105ZuvpdyW", "poster": "/media/PosterPDFs/NeurIPS%202024/96893.png?t=1731410503.871179", "project": "", "author_site": "Yuxin Du, Fan BAI, Tiejun Huang, Bo Zhao", "tldr": "", "abstract": "Precise image segmentation provides clinical study with instructive information. Despite the remarkable progress achieved in medical image segmentation, there is still an absence of a 3D foundation segmentation model that can segment a wide range of anatomical categories with easy user interaction. In this paper, we propose a 3D foundation segmentation model, named SegVol, supporting universal and interactive volumetric medical image segmentation. By scaling up training data to 90K unlabeled Computed Tomography (CT) volumes and 6K labeled CT volumes, this foundation model supports the segmentation of over 200 anatomical categories using semantic and spatial prompts. To facilitate efficient and precise inference on volumetric images, we design a zoom-out-zoom-in mechanism. Extensive experiments on 22 anatomical segmentation tasks verify that SegVol outperforms the competitors in 19 tasks, with improvements up to 37.24\\% compared to the runner-up methods. We demonstrate the effectiveness and importance of specific designs by ablation study. We expect this foundation model can promote the development of volumetric medical image analysis. The model and code are publicly available at https://github.com/BAAI-DCAI/SegVol.", "keywords": "Volumetric Medical Image Segmentation;3D Segmentation Foundation Model;Universal and Interactive 3D Segmentation", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "/attachment/1c88f31c5f1d4785869bf400f5347e00e91fd4da.zip", "author": "Yuxin Du;Fan BAI;Tiejun Huang;Bo Zhao", "authorids": "~Yuxin_Du2;~Fan_BAI4;~Tiejun_Huang1;~Bo_Zhao4", "gender": "M;M;M;M", "homepage": ";;https://idm.pku.edu.cn/~tjhuang/;", "dblp": "250/3895;;h/TiejunHuang;", "google_scholar": "https://scholar.google.nl/citations?user=iciTynsAAAAJ;https://scholar.google.com.hk/citations?user=jWKTSIEAAAAJ;https://scholar.google.com.tw/citations?user=knvEK4AAAAAJ;R3_AR5EAAAAJ", "orcid": "0009-0001-9803-7973;;0000-0002-4234-6099;", "linkedin": ";;;", "or_profile": "~Yuxin_Du2;~Fan_BAI4;~Tiejun_Huang1;~Bo_Zhao4", "aff": "Beijing Academy of Artificial Intelligence;The Chinese University of Hong Kong;Peking University;BAAI", "aff_domain": "baai.ac.cn;cuhk.edu.hk;pku.edu.cn;baai.ac.cn", "position": "Intern;PhD student;Full Professor;Principal Researcher", "bibtex": "@inproceedings{\ndu2024segvol,\ntitle={SegVol: Universal and Interactive Volumetric Medical Image Segmentation},\nauthor={Yuxin Du and Fan BAI and Tiejun Huang and Bo Zhao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=105ZuvpdyW}\n}", "github": "", "reviewers": "rczM;Ykum;8irr", "pdf_size": 33453878, "rating": "7;8;8", "confidence": "5;4;4", "soundness": "3;3;4", "novelty": "3;4;4", "presentation": "3;4;2", "wc_summary": "66;70;548", "wc_strengths": "74;52;137", "wc_weaknesses": "86;30;48", "wc_questions": "6;88;178", "wc_limitations": "2;14;42", "wc_review": "234;254;953", "wc_reply_reviewers": "0;19;128", "wc_reply_authors": "38;15;16", "reply_reviewers": "0;1;1", "reply_authors": "2;2;2", "rating_avg": [ 7.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 228.0, 226.2800624594811 ], "wc_strengths_avg": [ 87.66666666666667, 36.021598459196056 ], "wc_weaknesses_avg": [ 54.666666666666664, 23.34285520001546 ], "wc_questions_avg": [ 90.66666666666667, 70.24401912064985 ], "wc_limitations_avg": [ 19.333333333333332, 16.759740119968715 ], "wc_review_avg": [ 480.3333333333333, 334.3255233384905 ], "wc_reply_reviewers_avg": [ 49.0, 56.39739946723312 ], "wc_reply_authors_avg": [ 23.0, 10.614455552060438 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.9999999999999998, "gs_citation": 47, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4253498622786317821&as_sdt=8005&sciodt=0,7&hl=en", "gs_version_total": 5, "email": "baai.ac.cn;cuhk.edu.hk;pku.edu.cn;baai.ac.cn", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Beijing Academy of Artificial Intelligence;Chinese University of Hong Kong;Peking University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.baaic.cn;https://www.cuhk.edu.hk;http://www.pku.edu.cn", "aff_unique_abbr": "BAAI;CUHK;Peking U", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Data Distribution Valuation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96892", "id": "1067784F6e", "proceeding": "", "pdf": "https://openreview.net/pdf?id=1067784F6e", "openreview": "https://openreview.net/forum?id=1067784F6e", "poster": "/media/PosterPDFs/NeurIPS%202024/96892.png?t=1731176029.7581189", "project": "", "author_site": "Xinyi Xu, Shuaiqi Wang, Chuan Sheng Foo, Bryan Kian Hsiang Low, Giulia Fanti", "tldr": "", "abstract": "Data valuation is a class of techniques for quantitatively assessing the value of data for applications like pricing in data marketplaces. Existing data valuation methods define a value for a discrete dataset. However, in many use cases, users are interested in not only the value of the dataset, but that of the distribution from which the dataset was sampled. For example, consider a buyer trying to evaluate whether to purchase data from different vendors. The buyer may observe (and compare) only a small preview sample from each vendor, to decide which vendor's data distribution is most useful to the buyer and purchase. The core question is how should we compare the values of data distributions from their samples? Under a Huber characterization of the data heterogeneity across vendors, we propose a maximum mean discrepancy (MMD)-based valuation method which enables theoretically principled and actionable policies for comparing data distributions from samples. We empirically demonstrate that our method is sample-efficient and effective in identifying valuable data distributions against several existing baselines, on multiple real-world datasets (e.g., network intrusion detection, credit card fraud detection) and downstream applications (classification, regression).", "keywords": "Data distribution valuation;Huber model;Maximum mean discrepancy", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Xinyi Xu;Shuaiqi Wang;Chuan-Sheng Foo;Bryan Kian Hsiang Low;Giulia Fanti", "authorids": "~Xinyi_Xu4;~Shuaiqi_Wang1;~Chuan-Sheng_Foo1;~Bryan_Kian_Hsiang_Low1;~Giulia_Fanti1", "gender": "M;M;M;M;", "homepage": "https://xinyi-xu.com;https://wsqwsq.github.io/;http://ai.stanford.edu/~csfoo;http://www.comp.nus.edu.sg/~lowkh;https://www.andrew.cmu.edu/user/gfanti/", "dblp": ";74/5587;73/1823;97/4877;141/9910", "google_scholar": "2762GgsAAAAJ;yy2OXxEAAAAJ;AgbeqGkAAAAJ;https://scholar.google.com.tw/citations?user=2P-Q09UAAAAJ;Rn_BmTYAAAAJ", "orcid": "0000-0002-8744-0695;0000-0003-4962-7501;0000-0002-4748-5792;;0000-0002-7671-2624", "linkedin": "xinyi-xu-a93222133/;;;;", "or_profile": "~Xinyi_Xu4;~Shuaiqi_Wang1;~Chuan-Sheng_Foo1;~Bryan_Kian_Hsiang_Low1;~Giulia_Fanti1", "aff": "National University of Singapore;Carnegie Mellon University;Institute for Infocomm Research, A*STAR;National University of Singapore;Carnegie Mellon University", "aff_domain": "nus.edu.sg;andrew.cmu.edu;i2r.a-star.edu.sg;nus.edu.sg;andrew.cmu.edu", "position": "PhD student;PhD student;Principal Scientist;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nxu2024data,\ntitle={Data Distribution Valuation},\nauthor={Xinyi Xu and Shuaiqi Wang and Chuan-Sheng Foo and Bryan Kian Hsiang Low and Giulia Fanti},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=1067784F6e}\n}", "github": "", "reviewers": "Sj3P;s86g;YGYg", "pdf_size": 1778575, "rating": "6;6;7", "confidence": "4;5;4", "soundness": "3;2;3", "novelty": "3;3;3", "presentation": "3;2;3", "wc_summary": "130;110;131", "wc_strengths": "91;92;159", "wc_weaknesses": "50;157;134", "wc_questions": "123;85;2", "wc_limitations": "1;11;1", "wc_review": "395;455;427", "wc_reply_reviewers": "0;56;159", "wc_reply_authors": "0;19;90", "reply_reviewers": "0;1;1", "reply_authors": "1;2;2", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 123.66666666666667, 9.672412085697939 ], "wc_strengths_avg": [ 114.0, 31.822423959633664 ], "wc_weaknesses_avg": [ 113.66666666666667, 45.98792111945146 ], "wc_questions_avg": [ 70.0, 50.52392172690741 ], "wc_limitations_avg": [ 4.333333333333333, 4.714045207910317 ], "wc_review_avg": [ 425.6666666666667, 24.513035081133648 ], "wc_reply_reviewers_avg": [ 71.66666666666667, 65.8499978908293 ], "wc_reply_authors_avg": [ 36.333333333333336, 38.7327022323801 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:6Ok03NZcoPEJ:scholar.google.com/&scioq=Data+Distribution+Valuation&hl=en&as_sdt=0,5", "gs_version_total": 6, "email": "nus.edu.sg;andrew.cmu.edu;i2r.a-star.edu.sg;nus.edu.sg;andrew.cmu.edu", "author_num": 5, "aff_unique_index": "0;1;2;0;1", "aff_unique_norm": "National University of Singapore;Carnegie Mellon University;Institute for Infocomm Research", "aff_unique_dep": ";;", "aff_unique_url": "https://www.nus.edu.sg;https://www.cmu.edu;https://www.i2r.a-star.edu.sg", "aff_unique_abbr": "NUS;CMU;I2R", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;1", "aff_country_unique": "Singapore;United States" }, { "title": "Are Large-scale Soft Labels Necessary for Large-scale Dataset Distillation?", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96891", "id": "12A1RT1L87", "proceeding": "", "pdf": "https://openreview.net/pdf?id=12A1RT1L87", "openreview": "https://openreview.net/forum?id=12A1RT1L87", "poster": "/media/PosterPDFs/NeurIPS%202024/96891.png?t=1731138766.8349779", "project": "", "author_site": "Lingao Xiao, Yang He", "tldr": "", "abstract": "In ImageNet-condensation, the storage for auxiliary soft labels exceeds that of the condensed dataset by over 30 times.\nHowever, are large-scale soft labels necessary for large-scale dataset distillation?\nIn this paper, we first discover that the high within-class similarity in condensed datasets necessitates the use of large-scale soft labels.\nThis high within-class similarity can be attributed to the fact that previous methods use samples from different classes to construct a single batch for batch normalization (BN) matching.\nTo reduce the within-class similarity, we introduce class-wise supervision during the image synthesizing process by batching the samples within classes, instead of across classes.\nAs a result, we can increase within-class diversity and reduce the size of required soft labels.\nA key benefit of improved image diversity is that soft label compression can be achieved through simple random pruning, eliminating the need for complex rule-based strategies. Experiments validate our discoveries.\nFor example, when condensing ImageNet-1K to 200 images per class, our approach compresses the required soft labels from 113 GB to 2.8 GB (40$\\times$ compression) with a 2.6\\% performance gain.\nCode is available at: https://github.com/he-y/soft-label-pruning-for-dataset-distillation", "keywords": "Dataset Condensation;Dataset Distillation", "primary_area": "other", "supplementary_material": "", "author": "Lingao Xiao;Yang He", "authorids": "~Lingao_Xiao1;~Yang_He2", "gender": "M;M", "homepage": ";https://he-y.github.io/", "dblp": "341/5434;06/1998-2", "google_scholar": "MlNI5YYAAAAJ;vvnFsIIAAAAJ", "orcid": "0009-0007-1697-1986;0000-0002-2257-6073", "linkedin": ";", "or_profile": "~Lingao_Xiao1;~yang_he1", "aff": "School of Computer Science and Engineering, Nanyang Technological University;Institute of High Performance Computing, Singapore, A*STAR", "aff_domain": "scse.ntu.edu.sg;ihpc.a-star.edu.sg", "position": "Undergrad student;Researcher", "bibtex": "@inproceedings{\nxiao2024are,\ntitle={Are Large-scale Soft Labels Necessary for Large-scale Dataset Distillation?},\nauthor={Lingao Xiao and Yang He},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=12A1RT1L87}\n}", "github": "", "reviewers": "8MXG;S5vb;FdYh", "pdf_size": 33688334, "rating": "5;5;7", "confidence": "4;3;4", "soundness": "2;2;3", "novelty": "2;2;3", "presentation": "3;2;4", "wc_summary": "48;66;106", "wc_strengths": "50;26;117", "wc_weaknesses": "124;135;57", "wc_questions": "3;2;118", "wc_limitations": "5;1;7", "wc_review": "230;230;405", "wc_reply_reviewers": "47;23;106", "wc_reply_authors": "44;655;152", "reply_reviewers": "1;1;2", "reply_authors": "2;5;3", "rating_avg": [ 5.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 73.33333333333333, 24.239545283597124 ], "wc_strengths_avg": [ 64.33333333333333, 38.50829636440554 ], "wc_weaknesses_avg": [ 105.33333333333333, 34.4705993887867 ], "wc_questions_avg": [ 41.0, 54.448752664011195 ], "wc_limitations_avg": [ 4.333333333333333, 2.494438257849294 ], "wc_review_avg": [ 288.3333333333333, 82.49579113843053 ], "wc_reply_reviewers_avg": [ 58.666666666666664, 34.87437773240151 ], "wc_reply_authors_avg": [ 283.6666666666667, 266.24842200888673 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 3.3333333333333335, 1.247219128924647 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.49999999999999983, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6332279174207658989&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "scse.ntu.edu.sg;ihpc.a-star.edu.sg", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Nanyang Technological University;Institute of High Performance Computing", "aff_unique_dep": "School of Computer Science and Engineering;", "aff_unique_url": "https://www.ntu.edu.sg;https://www.ihpc.a-star.edu.sg", "aff_unique_abbr": "NTU;IHPC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Singapore" }, { "title": "Bayesian-guided Label Mapping for Visual Reprogramming", "status": "Oral", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96890", "id": "135eKqDoRR", "proceeding": "", "pdf": "https://openreview.net/pdf?id=135eKqDoRR", "openreview": "https://openreview.net/forum?id=135eKqDoRR", "poster": "", "project": "", "author_site": "Chengyi Cai, Zesheng Ye, Lei Feng, Jianzhong Qi, Feng Liu", "tldr": "", "abstract": "*Visual reprogramming* (VR) leverages the intrinsic capabilities of pretrained vision models by adapting their input or output interfaces to solve downstream tasks whose labels (i.e., downstream labels) might be totally different from the labels associated with the pretrained models (i.e., pretrained labels). \nWhen adapting the output interface, label mapping methods transform the pretrained labels to downstream labels by establishing a gradient-free one-to-one correspondence between the two sets of labels.\nHowever, in this paper, we reveal that one-to-one mappings may overlook the complex relationship between pretrained and downstream labels. Motivated by this observation, we propose a ***B**ayesian-guided **L**abel **M**apping* (BLM) method. \nBLM constructs an iteratively-updated probabilistic label mapping matrix, with each element quantifying a pairwise relationship between pretrained and downstream labels.\nThe assignment of values to the constructed matrix is guided by Bayesian conditional probability, considering the joint distribution of the downstream labels and the labels predicted by the pretrained model on downstream samples. Experiments conducted on both pretrained vision models (e.g., ResNeXt) and vision-language models (e.g., CLIP) demonstrate the superior performance of BLM over existing label mapping methods. The success of BLM also offers a probabilistic lens through which to understand and analyze the effectiveness of VR.\nOur code is available at https://github.com/tmlr-group/BayesianLM.", "keywords": "visual reprogramming;adversarial reprogramming;output label mapping;Bayesian probability", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Chengyi Cai;Zesheng Ye;Lei Feng;Jianzhong Qi;Feng Liu", "authorids": "~Chengyi_Cai2;~Zesheng_Ye1;~Lei_Feng1;~Jianzhong_Qi1;~Feng_Liu2", "gender": "F;;M;M;M", "homepage": "https://caichengyi.github.io/;;https://lfeng1995.github.io/;https://people.eng.unimelb.edu.au/jianzhongq/;https://fengliu90.github.io/index.html", "dblp": ";;76/847-6;41/1074-1;77/1318-3", "google_scholar": ";;https://scholar.google.com.sg/citations?user=KomQOFkAAAAJ;https://scholar.google.com.au/citations?user=mxS6eHYAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0003-2839-5799;0000-0001-6501-9050;0000-0002-5005-9129", "linkedin": ";;;;alexfengliu", "or_profile": "~Chengyi_Cai2;~Zesheng_Ye1;~Lei_Feng1;~Jianzhong_Qi1;~Feng_Liu2", "aff": "University of Melbourne;;Singapore University of Technology and Design;University of Melbourne;University of Melbourne", "aff_domain": "unimelb.edu.au;;sutd.edu.sg;unimelb.edu.au;unimelb.edu.au", "position": "PhD student;;Assistant Professor;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\ncai2024bayesianguided,\ntitle={Bayesian-guided Label Mapping for Visual Reprogramming},\nauthor={Chengyi Cai and Zesheng Ye and Lei Feng and Jianzhong Qi and Feng Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=135eKqDoRR}\n}", "github": "", "reviewers": "iW2K;885t;RgTa;zxuh", "pdf_size": 39077931, "rating": "7;7;7;8", "confidence": "4;4;4;4", "soundness": "4;4;4;3", "novelty": "4;4;3;3", "presentation": "4;3;3;3", "wc_summary": "89;92;99;83", "wc_strengths": "136;78;68;97", "wc_weaknesses": "153;168;202;116", "wc_questions": "4;125;2;4", "wc_limitations": "5;5;10;5", "wc_review": "387;468;381;305", "wc_reply_reviewers": "0;14;0;17", "wc_reply_authors": "0;21;0;28", "reply_reviewers": "0;1;0;1", "reply_authors": "1;2;1;2", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 90.75, 5.7608593109014565 ], "wc_strengths_avg": [ 94.75, 25.9939896899264 ], "wc_weaknesses_avg": [ 159.75, 30.873734791890662 ], "wc_questions_avg": [ 33.75, 52.6895388099004 ], "wc_limitations_avg": [ 6.25, 2.165063509461097 ], "wc_review_avg": [ 385.25, 57.68177788522126 ], "wc_reply_reviewers_avg": [ 7.75, 7.8222439235810075 ], "wc_reply_authors_avg": [ 12.25, 12.497499749949988 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1716023676877837353&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "unimelb.edu.au;;sutd.edu.sg;unimelb.edu.au;unimelb.edu.au", "author_num": 5, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Melbourne;Singapore University of Technology and Design", "aff_unique_dep": ";", "aff_unique_url": "https://www.unimelb.edu.au;https://www.sutd.edu.sg", "aff_unique_abbr": "UniMelb;SUTD", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "Australia;Singapore" }, { "title": "Enhancing Domain Adaptation through Prompt Gradient Alignment", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96889", "id": "14hLJr6kZ3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=14hLJr6kZ3", "openreview": "https://openreview.net/forum?id=14hLJr6kZ3", "poster": "/media/PosterPDFs/NeurIPS%202024/96889.png?t=1732036918.0147438", "project": "", "author_site": "Viet Hoang Phan, Tung Lam Tran, Quyen Tran, Trung Le", "tldr": "", "abstract": "Prior Unsupervised Domain Adaptation (UDA) methods often aim to train a domain-invariant feature extractor, which may hinder the model from learning sufficiently discriminative features. To tackle this, a line of works based on prompt learning leverages the power of large-scale pre-trained vision-language models to learn both domain-invariant and specific features through a set of domain-agnostic and domain-specific learnable prompts. Those studies typically enforce invariant constraints on representation, output, or prompt space to learn such prompts. Differently, we cast UDA as a multiple-objective optimization problem in which each objective is represented by a domain loss. Under this new framework, we propose aligning per-objective gradients to foster consensus between them. Additionally, to prevent potential overfitting when fine-tuning this deep learning architecture, we penalize the norm of these gradients. To achieve these goals, we devise a practical gradient update procedure that can work under both single-source and multi-source UDA. Empirically, our method consistently surpasses other vision language model adaptation methods by a large margin on a wide range of benchmarks. The implementation is available at https://github.com/VietHoang1512/PGA.", "keywords": "transfer learning; domain adaptation; prompt-tuning", "primary_area": "other", "supplementary_material": "", "author": "Hoang Phan;Tung Lam Tran;Quyen Tran;Trung Le", "authorids": "~Hoang_Phan1;~Tung_Lam_Tran1;~Quyen_Tran1;~Trung_Le2", "gender": ";F;M;M", "homepage": ";https://tranquyenbk173.github.io;;https://viethoang1512.github.io/", "dblp": "178/8536;298/2261;;295/0299", "google_scholar": "--2qc0UAAAAJ;ZtuZhrMAAAAJ;https://scholar.google.com/citations?hl=en;", "orcid": ";0000-0001-5081-3489;;", "linkedin": ";tranquyenbk173;;", "or_profile": "~Tung_Lam_Tran1;~Quyen_Tran1;~Trung_Le2;~Hoang_Viet_Phan1", "aff": "VinAi Research;VinAI Research;Monash University;New York University", "aff_domain": "vinai.io;vinai.io;monash.edu;nyu.edu", "position": "Research resident;Research Resident;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nphan2024enhancing,\ntitle={Enhancing Domain Adaptation through Prompt Gradient Alignment},\nauthor={Hoang Phan and Tung Lam Tran and Quyen Tran and Trung Le},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=14hLJr6kZ3}\n}", "github": "", "reviewers": "JYym;aPJr;dhjM;YFTB", "pdf_size": 624735, "rating": "4;6;7;8", "confidence": "3;3;3;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;2;3", "wc_summary": "85;83;68;90", "wc_strengths": "159;49;51;87", "wc_weaknesses": "166;187;227;331", "wc_questions": "100;19;25;2", "wc_limitations": "111;12;11;1", "wc_review": "621;350;382;511", "wc_reply_reviewers": "0;15;139;58", "wc_reply_authors": "0;33;653;37", "reply_reviewers": "0;1;2;1", "reply_authors": "1;2;3;2", "rating_avg": [ 6.25, 1.479019945774904 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 81.5, 8.200609733428363 ], "wc_strengths_avg": [ 86.5, 44.505617622947334 ], "wc_weaknesses_avg": [ 227.75, 63.511317889018805 ], "wc_questions_avg": [ 36.5, 37.61980861195336 ], "wc_limitations_avg": [ 33.75, 44.80722597974572 ], "wc_review_avg": [ 466.0, 107.89114884919893 ], "wc_reply_reviewers_avg": [ 53.0, 54.023143188822324 ], "wc_reply_authors_avg": [ 180.75, 273.0314771230599 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.6831300510639732, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8731087479335379002&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "vinai.io;vinai.io;monash.edu;nyu.edu", "author_num": 4, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "VinAI Research;Monash University;New York University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.vinai.io;https://www.monash.edu;https://www.nyu.edu", "aff_unique_abbr": "VinAi;Monash;NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;2", "aff_country_unique": "Vietnam;Australia;United States" }, { "title": "Diversity Is Not All You Need: Training A Robust Cooperative Agent Needs Specialist Partners", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96888", "id": "15460JjocO", "proceeding": "", "pdf": "https://openreview.net/pdf?id=15460JjocO", "openreview": "https://openreview.net/forum?id=15460JjocO", "poster": "/media/PosterPDFs/NeurIPS%202024/96888.png?t=1731395258.7214391", "project": "", "author_site": "Rujikorn Charakorn, Poramate Manoonpong, Nat Dilokthanakul", "tldr": "", "abstract": "Partner diversity is known to be crucial for training a robust generalist cooperative agent. In this paper, we show that partner specialization, in addition to diversity, is crucial for the robustness of a downstream generalist agent. We propose a principled method for quantifying both the diversity and specialization of a partner population based on the concept of mutual information. Then, we observe that the recently proposed cross-play minimization (XP-min) technique produces diverse and specialized partners. However, the generated partners are overfit, reducing their usefulness as training partners. To address this, we propose simple methods, based on reinforcement learning and supervised learning, for extracting the diverse and specialized behaviors of XP-min generated partners but not their overfitness. We demonstrate empirically that the proposed method effectively removes overfitness, and extracted populations produce more robust generalist agents compared to the source XP-min populations.", "keywords": "multi-agent;reinforcement learning;robustness;diversity;specialization", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Rujikorn Charakorn;Poramate Manoonpong;Nat Dilokthanakul", "authorids": "~Rujikorn_Charakorn1;~Poramate_Manoonpong1;~Nat_Dilokthanakul1", "gender": "M;M;M", "homepage": "https://www.rujikorn.com/;http://manoonpong.com/;", "dblp": "257/3015;87/2010.html;", "google_scholar": "https://scholar.google.com/citations?hl=en;ZQ93_SQAAAAJ;https://scholar.google.co.uk/citations?user=ouce6eWAbloC", "orcid": ";0000-0002-4806-7576;", "linkedin": ";poramate-manoonpong-6a7a43a6/;", "or_profile": "~Rujikorn_Charakorn1;~Poramate_Manoonpong1;~Nat_Dilokthanakul1", "aff": "Sakana AI;Vidyasirimedhi Institute of Science and Technology;King Mongkut's Institute of Technology Ladkrabang", "aff_domain": "sakana.ai;vistec.ac.th;kmitl.ac.th", "position": "Intern;Full Professor;Lecturer", "bibtex": "@inproceedings{\ncharakorn2024diversity,\ntitle={Diversity Is Not All You Need: Training A Robust Cooperative Agent Needs Specialist Partners},\nauthor={Rujikorn Charakorn and Poramate Manoonpong and Nat Dilokthanakul},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=15460JjocO}\n}", "github": "", "reviewers": "G3wa;Vvdc;sQwt", "pdf_size": 3636750, "rating": "7;7;7", "confidence": "3;4;4", "soundness": "3;4;4", "novelty": "3;3;4", "presentation": "2;4;3", "wc_summary": "85;233;320", "wc_strengths": "63;37;136", "wc_weaknesses": "40;120;261", "wc_questions": "72;60;9", "wc_limitations": "29;60;19", "wc_review": "289;510;745", "wc_reply_reviewers": "10;38;13", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 212.66666666666666, 97.00973605205247 ], "wc_strengths_avg": [ 78.66666666666667, 41.907305117631005 ], "wc_weaknesses_avg": [ 140.33333333333334, 91.36130958392009 ], "wc_questions_avg": [ 47.0, 27.313000567495326 ], "wc_limitations_avg": [ 36.0, 17.45470328211473 ], "wc_review_avg": [ 514.6666666666666, 186.19046401169123 ], "wc_reply_reviewers_avg": [ 20.333333333333332, 12.552113589175153 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4464387302590217495&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "sakana.ai;vistec.ac.th;kmitl.ac.th", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Sakana AI;Vidyasirimedhi Institute of Science and Technology;King Mongkut's Institute of Technology Ladkrabang", "aff_unique_dep": ";;", "aff_unique_url": ";https://www.vistec.ac.th;http://www.kmitl.ac.th", "aff_unique_abbr": ";VISTEC;KMITL", "aff_campus_unique_index": "1", "aff_campus_unique": ";Ladkrabang", "aff_country_unique_index": "1;1", "aff_country_unique": ";Thailand" }, { "title": "No-Regret Learning for Fair Multi-Agent Social Welfare Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96887", "id": "15Jm9v7wCo", "proceeding": "", "pdf": "https://openreview.net/pdf?id=15Jm9v7wCo", "openreview": "https://openreview.net/forum?id=15Jm9v7wCo", "poster": "", "project": "", "author_site": "Mengxiao Zhang, Ramiro Deo-Campo Vuong, Haipeng Luo", "tldr": "", "abstract": "We consider the problem of online multi-agent Nash social welfare (NSW) maximization. While previous works of Hossain et al. [2021], Jones et al. [2023] study similar problems in stochastic multi-agent multi-armed bandits and show that $\\sqrt{T}$-regret is possible after $T$ rounds, their fairness measure is the product of all agents' rewards, instead of their NSW (that is, their geometric mean). Given the fundamental role of NSW in the fairness literature, it is more than natural to ask whether no-regret fair learning with NSW as the objective is possible. In this work, we provide a complete answer to this question in various settings. Specifically, in stochastic $N$-agent $K$-armed bandits, we develop an algorithm with $\\widetilde{\\mathcal{O}}(K^{\\frac{2}{N}}T^{\\frac{N-1}{N}})$ regret and prove that the dependence on $T$ is tight, making it a sharp contrast to the $\\sqrt{T}$-regret bounds of Hossain et al. [2021], Jones et al. [2023]. We then consider a more challenging version of the problem with adversarial rewards. Somewhat surprisingly, despite NSW being a concave function, we prove that no algorithm can achieve sublinear regret. To circumvent such negative results, we further consider a setting with full-information feedback and design two algorithms with $\\sqrt{T}$-regret: the first one has no dependence on $N$ at all and is applicable to not just NSW but a broad class of welfare functions, while the second one has better dependence on $K$ and is preferable when $N$ is small.\nFinally, we also show that logarithmic regret is possible whenever there exists one agent who is indifferent about different arms.", "keywords": "Online Learning;Nash Social Welfare", "primary_area": "online_learning", "supplementary_material": "", "author": "Mengxiao Zhang;Ramiro Deo-Campo Vuong;Haipeng Luo", "authorids": "~Mengxiao_Zhang2;~Ramiro_Deo-Campo_Vuong1;~Haipeng_Luo1", "gender": ";M;M", "homepage": ";;https://haipeng-luo.net/", "dblp": ";;62/2576", "google_scholar": ";y2AyoyIAAAAJ;ct2hw4UAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Mengxiao_Zhang2;~Ramiro_Deo-Campo_Vuong1;~Haipeng_Luo1", "aff": ";University of Southern California;University of Southern California", "aff_domain": ";usc.edu;usc.edu", "position": ";Undergrad student;Associate Professor", "bibtex": "@inproceedings{\nzhang2024noregret,\ntitle={No-Regret Learning for Fair Multi-Agent Social Welfare Optimization},\nauthor={Mengxiao Zhang and Ramiro Deo-Campo Vuong and Haipeng Luo},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=15Jm9v7wCo}\n}", "github": "", "reviewers": "izKh;t2CF;aYwg", "pdf_size": 439202, "rating": "4;6;7", "confidence": "4;3;3", "soundness": "2;3;4", "novelty": "2;3;3", "presentation": "3;3;4", "wc_summary": "78;248;263", "wc_strengths": "35;82;142", "wc_weaknesses": "254;74;49", "wc_questions": "58;12;108", "wc_limitations": "2;1;5", "wc_review": "427;417;567", "wc_reply_reviewers": "79;18;12", "wc_reply_authors": "25;0;0", "reply_reviewers": "2;1;1", "reply_authors": "2;1;1", "rating_avg": [ 5.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 196.33333333333334, 83.8980863243548 ], "wc_strengths_avg": [ 86.33333333333333, 43.78990243829684 ], "wc_weaknesses_avg": [ 125.66666666666667, 91.31751687868481 ], "wc_questions_avg": [ 59.333333333333336, 39.20317447463775 ], "wc_limitations_avg": [ 2.6666666666666665, 1.699673171197595 ], "wc_review_avg": [ 470.3333333333333, 68.47546194724713 ], "wc_reply_reviewers_avg": [ 36.333333333333336, 30.26916289265731 ], "wc_reply_authors_avg": [ 8.333333333333334, 11.785113019775793 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.9449111825230683, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12667800073074560373&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 5, "email": ";usc.edu;usc.edu", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "University of Southern California", "aff_unique_dep": "", "aff_unique_url": "https://www.usc.edu", "aff_unique_abbr": "USC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Kuro Siwo: 33 billion $m^2$ under the water. A global multi-temporal satellite dataset for rapid flood mapping", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97874", "id": "15PS30UOUp", "proceeding": "", "pdf": "https://openreview.net/pdf?id=15PS30UOUp", "openreview": "https://openreview.net/forum?id=15PS30UOUp", "poster": "", "project": "", "author_site": "Nikolaos Ioannis Bountos, Maria Sdraka, Angelos Zavras, Andreas Karavias, Ilektra Karasante, Themistocles Herekakis, Angeliki Thanasou, Dimitrios Michail, Ioannis Papoutsis", "tldr": "", "abstract": "Global flash floods, exacerbated by climate change, pose severe threats to human\nlife, infrastructure, and the environment. Recent catastrophic events in Pakistan and\nNew Zealand underscore the urgent need for precise flood mapping to guide restoration efforts, understand vulnerabilities, and prepare for future occurrences. While Synthetic Aperture Radar (SAR) remote sensing offers day-and-night, all-weather\nimaging capabilities, its application in deep learning for flood segmentation is limited by the lack of large annotated datasets. To address this, we introduce Kuro\nSiwo, a manually annotated multi-temporal dataset, spanning 43 flood events globally. Our dataset maps more than 338 billion $m^2$ of land, with 33 billion designated\nas either flooded areas or permanent water bodies. Kuro Siwo includes a highly\nprocessed product optimized for flash flood mapping based on SAR Ground Range\nDetected, and a primal SAR Single Look Complex product with minimal preprocessing, designed to promote research on the exploitation of both the phase and amplitude information and to offer maximum flexibility for downstream task preprocessing. To leverage advances in large scale self-supervised pretraining methods\nfor remote sensing data, we augment Kuro Siwo with a large unlabeled set of SAR\nsamples. Finally, we provide an extensive benchmark, namely BlackBench, offering strong baselines for a diverse set of flood events globally. All data and code are\npublished in our Github repository: https://github.com/Orion-AI-Lab/KuroSiwo.", "keywords": "Synthetic Aperture Radar;SAR;Deep Learning;Remote Sensing;Floods;dataset", "primary_area": "", "supplementary_material": "/attachment/3b915f289ec0f237c29858aacf1b26e571b8ac00.pdf", "author": "Nikolaos Ioannis Bountos;Maria Sdraka;Angelos Zavras;Andreas Karavias;Ilektra Karasante;Themistocles Herekakis;Angeliki Thanasou;Dimitrios Michail;Ioannis Papoutsis", "authorids": "~Nikolaos_Ioannis_Bountos2;~Maria_Sdraka2;~Angelos_Zavras1;~Andreas_Karavias1;~Ilektra_Karasante1;~Themistocles_Herekakis1;~Angeliki_Thanasou1;~Dimitrios_Michail1;~Ioannis_Papoutsis1", "gender": "M;F;M;M;F;M;F;M;M", "homepage": "https://ngbountos.github.io/;;;;;;;https://d-michail.github.io/;", "dblp": ";242/1001;306/7514;318/9235;332/2339;;;m/DimitriosMichail1;76/9902", "google_scholar": "XFUe1WQAAAAJ;xw-Zz8QAAAAJ;https://scholar.google.com/citations?hl=en;f1jJ2FgAAAAJ;S-yNHi0AAAAJ;;;https://scholar.google.nl/citations?user=DD0NjLEAAAAJ;46cBUO8AAAAJ", "orcid": "0000-0003-1615-0196;0000-0003-2053-1274;0009-0008-2788-1940;;0000-0001-7379-5471;0000-0002-7434-4071;0000-0002-1969-5127;0000-0002-5316-6704;0000-0002-2845-9791", "linkedin": ";;;andreas-karavias-472283158/;ilektra-karasante-4977b8226?utm_source=share&utm_campaign=share_via&utm_content=profile&utm_medium=android_app;themistocles-herekakis/;;dimitrios-michail/;ioannis-papoutsis/", "or_profile": "~Nikolaos_Ioannis_Bountos2;~Maria_Sdraka2;~Angelos_Zavras1;~Andreas_Karavias1;~Ilektra_Karasante1;~Themistocles_Herekakis1;~Angeliki_Thanasou1;~Dimitrios_Michail1;~Ioannis_Papoutsis1", "aff": "National Observatory of Athens;National Observatory of Athens;Harokopio University;The Centre for Research & Technology;National Observatory of Athens ;National Observatoty of Athens;National Observatory of Athens;Harokopio University;National Technical University of Athens", "aff_domain": "noa.gr;noa.gr;hua.gr;certh.gr;noa.gr;noa.gr;noa.gr;hua.gr;ntua.gr", "position": "PhD student;Researcher;PhD student;Researcher;Researcher;Researcher;Researcher;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nbountos2024kuro,\ntitle={Kuro Siwo: 33 billion \\$m{\\textasciicircum}2\\$ under the water. A global multi-temporal satellite dataset for rapid flood mapping},\nauthor={Nikolaos Ioannis Bountos and Maria Sdraka and Angelos Zavras and Andreas Karavias and Ilektra Karasante and Themistocles Herekakis and Angeliki Thanasou and Dimitrios Michail and Ioannis Papoutsis},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=15PS30UOUp}\n}", "github": "", "reviewers": "vj9P;nnn6;b48H;d7o8", "pdf_size": 33563963, "rating": "5;6;7;9", "confidence": "3;4;3;5", "wc_summary_and_contributions": "54;21;34;37", "wc_strengths": "58;21;2;45", "wc_improvement": "112;12;6;106", "wc_limitations": "1;11;2;20", "wc_correctness": "3;18;34;17", "wc_clarity": "1;5;5;6", "wc_relation_to_prior_work": "1;20;54;6", "wc_documentation": "1;18;1;59", "wc_additional_feedback": "1;1;1;1", "wc_review": "232;127;139;297", "wc_reply_reviewers": "0;0;0;54", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 1.479019945774904 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "wc_summary_and_contributions_avg": [ 36.5, 11.757976016304847 ], "wc_strengths_avg": [ 31.5, 21.592822881689184 ], "wc_improvement_avg": [ 59.0, 50.08991914547278 ], "wc_limitations_avg": [ 8.5, 7.697402159170326 ], "wc_correctness_avg": [ 18.0, 10.977249200050075 ], "wc_clarity_avg": [ 4.25, 1.920286436967152 ], "wc_relation_to_prior_work_avg": [ 20.25, 20.69269194667528 ], "wc_documentation_avg": [ 19.75, 23.699947257325277 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 198.75, 69.77956362718243 ], "wc_reply_reviewers_avg": [ 13.5, 23.382685902179844 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.7644707871564383, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5695488984577450149&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "noa.gr;noa.gr;hua.gr;certh.gr;noa.gr;noa.gr;noa.gr;hua.gr;ntua.gr", "author_num": 9, "aff_unique_index": "0;0;1;2;0;0;0;1;3", "aff_unique_norm": "National Observatory of Athens;Harokopio University;Centre for Research & Technology;National Technical University of Athens", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.noa.gr/;https://www.harokopio.gr;;https://www.ntua.gr", "aff_unique_abbr": "NOA;HU;;NTUA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "Greece;" }, { "title": "Dense Associative Memory Through the Lens of Random Features", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96886", "id": "164QnJsYjF", "proceeding": "", "pdf": "https://openreview.net/pdf?id=164QnJsYjF", "openreview": "https://openreview.net/forum?id=164QnJsYjF", "poster": "/media/PosterPDFs/NeurIPS%202024/96886.png?t=1733846086.053208", "project": "", "author_site": "Benjamin Hoover, Duen Horng Chau, Hendrik Strobelt, Parikshit Ram, Dmitry Krotov", "tldr": "", "abstract": "Dense Associative Memories are high storage capacity variants of the Hopfield networks that are capable of storing a large number of memory patterns in the weights of the network of a given size. Their common formulations typically require storing each pattern in a separate set of synaptic weights, which leads to the increase of the number of synaptic weights when new patterns are introduced. In this work we propose an alternative formulation of this class of models using random features, commonly used in kernel methods. In this formulation the number of network's parameters remains fixed. At the same time, new memories can be added to the network by modifying existing weights. We show that this novel network closely approximates the energy function and dynamics of conventional Dense Associative Memories and shares their desirable computational properties.", "keywords": "Associative Memory;Kernels;Random Features;Hopfield Network", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Benjamin Hoover;Duen Horng Chau;Hendrik Strobelt;Parikshit Ram;Dmitry Krotov", "authorids": "~Benjamin_Hoover1;~Duen_Horng_Chau1;~Hendrik_Strobelt1;~Parikshit_Ram1;~Dmitry_Krotov2", "gender": "M;M;M;;Not Specified", "homepage": "https://bhoov.com;http://hendrik.strobelt.com;https://rithram.github.io/;https://mitibmwatsonailab.mit.edu/people/dmitry-krotov/;https://faculty.cc.gatech.edu/~dchau", "dblp": "250/9412;67/7527;99/8314;182/2341;10/2670", "google_scholar": "n10P0tYAAAAJ;H4vEe_oAAAAJ;JaXmmnkAAAAJ;WeD9ll0AAAAJ;https://scholar.google.com.tw/citations?user=YON32W4AAAAJ", "orcid": "0000-0001-5218-3185;;0000-0002-9456-029X;;0000-0001-9824-3323", "linkedin": "benhoov/;;parikshit-ram-4861325/;krotovdmitry;polochau", "or_profile": "~Benjamin_Hoover1;~Hendrik_Strobelt1;~Parikshit_Ram1;~Dmitry_Krotov2;~Duen_Chau1", "aff": "International Business Machines;International Business Machines;International Business Machines;Massachusetts Institute of Technology;Georgia Institute of Technology", "aff_domain": "research.ibm.com;ibm.com;ibm.com;mit.edu;gatech.edu", "position": "AI Research Engineer;Principal Researcher;Principal Researcher;Researcher;Full Professor", "bibtex": "@inproceedings{\nhoover2024dense,\ntitle={Dense Associative Memory Through the Lens of Random Features},\nauthor={Benjamin Hoover and Duen Horng Chau and Hendrik Strobelt and Parikshit Ram and Dmitry Krotov},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=164QnJsYjF}\n}", "github": "", "reviewers": "wQ3r;xsUY;9NBh;B4qp", "pdf_size": 3912058, "rating": "4;6;7;7", "confidence": "3;3;4;2", "soundness": "3;3;2;2", "novelty": "2;2;3;3", "presentation": "2;3;3;3", "wc_summary": "116;93;128;32", "wc_strengths": "131;33;135;39", "wc_weaknesses": "380;128;2049;68", "wc_questions": "32;138;299;4", "wc_limitations": "10;38;59;8", "wc_review": "669;430;2670;151", "wc_reply_reviewers": "0;0;1282;19", "wc_reply_authors": "51;51;1607;0", "reply_reviewers": "0;0;6;1", "reply_authors": "2;2;4;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 92.25, 36.98901864067226 ], "wc_strengths_avg": [ 84.5, 48.566964080535236 ], "wc_weaknesses_avg": [ 656.25, 812.5811882513648 ], "wc_questions_avg": [ 118.25, 115.70733554965302 ], "wc_limitations_avg": [ 28.75, 21.111312133545844 ], "wc_review_avg": [ 980.0, 992.7942888635088 ], "wc_reply_reviewers_avg": [ 325.25, 552.434328676269 ], "wc_reply_authors_avg": [ 427.25, 681.447127442768 ], "reply_reviewers_avg": [ 1.75, 2.48746859276655 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4414394322018026861&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "research.ibm.com;ibm.com;ibm.com;mit.edu;gatech.edu", "author_num": 5, "aff_unique_index": "0;0;0;1;2", "aff_unique_norm": "International Business Machines Corporation;Massachusetts Institute of Technology;Georgia Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ibm.com;https://web.mit.edu;https://www.gatech.edu", "aff_unique_abbr": "IBM;MIT;Georgia Tech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Unleashing Region Understanding in Intermediate Layers for MLLM-based Referring Expression Generation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96885", "id": "168NLzTpw8", "proceeding": "", "pdf": "https://openreview.net/pdf?id=168NLzTpw8", "openreview": "https://openreview.net/forum?id=168NLzTpw8", "poster": "", "project": "", "author_site": "Yaoyuan Liang, Zhuojun Cai, Jian Xu, Guanbo Huang, Yiran Wang, Xiao Liang, Jiahao Liu, Ziran Li, Jingang Wang, Shao-Lun Huang", "tldr": "", "abstract": "The Multi-modal Large Language Model (MLLM) based Referring Expression Generation (REG) task has gained increasing popularity, which aims to generate an unambiguous text description that applies to exactly one object or region in the image by leveraging foundation models. We empirically found that there exists a potential trade-off between the detailedness and the correctness of the descriptions for the referring objects. On the one hand, generating sentences with more details is usually required in order to provide more precise object descriptions. On the other hand, complicated sentences could easily increase the probability of hallucinations. To address this issue, we propose a training-free framework, named ``unleash-then-eliminate'', which first elicits the latent information in the intermediate layers, and then adopts a cycle-consistency-based decoding method to alleviate the production of hallucinations. Furthermore, to reduce the computational load of cycle-consistency-based decoding, we devise a Probing-based Importance Estimation method to statistically estimate the importance weights of intermediate layers within a subset. These importance weights are then incorporated into the decoding process over the entire dataset, intervening in the next token prediction from intermediate layers.\nExtensive experiments conducted on the RefCOCOg and PHD benchmarks show that our proposed framework could outperform existing methods on both semantic and hallucination-related metrics. Code will be made available in https://github.com/Glupayy/unleash-eliminate.", "keywords": "Vision-Language;Region-Level Understanding;Multimodal Large Language Model", "primary_area": "machine_vision", "supplementary_material": "", "author": "Yaoyuan Liang;Zhuojun Cai;Jian Xu;Guanbo Huang;Yiran Wang;Xiao Liang;Jiahao Liu;Ziran Li;Jingang Wang;Shao-Lun Huang", "authorids": "~Yaoyuan_Liang1;~Zhuojun_Cai1;~Jian_Xu7;~Guanbo_Huang1;~Yiran_Wang9;~Xiao_Liang9;~Jiahao_Liu6;~Ziran_Li1;~Jingang_Wang1;~Shao-Lun_Huang3", "gender": ";F;M;M;F;M;M;M;M;M", "homepage": ";http://nouse.com;;;;https://mastervito.github.io/;https://hit-computer.github.io/;;https://sites.google.com/site/bitwjg/;https://sites.google.com/view/slhuang/home", "dblp": ";;73/1149-16;;;;;;59/7807;64/2243", "google_scholar": ";;5kjbGosAAAAJ;p1zSpZIAAAAJ;https://scholar.google.com/citations?view_op=list_works;JHWyBKIAAAAJ;https://scholar.google.com.hk/citations?user=IvImF70AAAAJ;Edj3xjMAAAAJ;janU39IAAAAJ;", "orcid": ";;0000-0001-6201-9215;;0009-0002-1097-7264;;;;;", "linkedin": ";;;;;;;;;", "or_profile": "~Yaoyuan_Liang1;~Zhuojun_Cai1;~Jian_Xu7;~Guanbo_Huang1;~Yiran_Wang9;~Xiao_Liang9;~Jiahao_Liu6;~Ziran_Li1;~Jingang_Wang1;~Shao-Lun_Huang3", "aff": ";Tsinghua Shenzhen International Graduate School, Tsinghua University;Tsinghua University;University of Electronic Science and Technology of China;Tsinghua University;Tsinghua University;Meituan;;Meituan;Tsinghua University", "aff_domain": ";mails.tsinghua.edu.cn;mails.tsinghua.edu.cn;uestc.edu;tsinghua.edu.cn;tsinghua.edu.cn;meituan.com;;meituan.com;tsinghua.edu.cn", "position": ";PhD student;PhD student;Undergrad student;MS student;MS student;Researcher;;Researcher;Associate Professor", "bibtex": "@inproceedings{\nliang2024unleashing,\ntitle={Unleashing Region Understanding in Intermediate Layers for {MLLM}-based Referring Expression Generation},\nauthor={Yaoyuan Liang and Zhuojun Cai and Jian Xu and Guanbo Huang and Yiran Wang and Xiao Liang and Jiahao Liu and Ziran Li and Jingang Wang and Shao-Lun Huang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=168NLzTpw8}\n}", "github": "", "reviewers": "MMWB;85oQ;ikvW;adaN", "pdf_size": 9234177, "rating": "4;5;6;8", "confidence": "4;3;4;4", "soundness": "2;2;3;3", "novelty": "1;2;3;3", "presentation": "1;1;3;3", "wc_summary": "191;61;94;95", "wc_strengths": "82;18;79;147", "wc_weaknesses": "185;92;101;103", "wc_questions": "57;84;53;1", "wc_limitations": "25;16;6;3", "wc_review": "540;271;333;349", "wc_reply_reviewers": "20;0;14;101", "wc_reply_authors": "125;38;42;116", "reply_reviewers": "1;0;1;1", "reply_authors": "3;2;2;3", "rating_avg": [ 5.75, 1.479019945774904 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.0, 1.0 ], "wc_summary_avg": [ 110.25, 48.586906672477106 ], "wc_strengths_avg": [ 81.5, 45.63167759353145 ], "wc_weaknesses_avg": [ 120.25, 37.6123317543595 ], "wc_questions_avg": [ 48.75, 30.036436206714004 ], "wc_limitations_avg": [ 12.5, 8.674675786448736 ], "wc_review_avg": [ 373.25, 100.58423087144426 ], "wc_reply_reviewers_avg": [ 33.75, 39.49920885283653 ], "wc_reply_authors_avg": [ 80.25, 40.40034034510106 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.29277002188455997, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:u6-w3LhEhiYJ:scholar.google.com/&scioq=Unleashing+Region+Understanding+in+Intermediate+Layers+for+MLLM-based+Referring+Expression+Generation&hl=en&as_sdt=0,48", "gs_version_total": 0, "email": ";mails.tsinghua.edu.cn;mails.tsinghua.edu.cn;uestc.edu;tsinghua.edu.cn;tsinghua.edu.cn;meituan.com;;meituan.com;tsinghua.edu.cn", "author_num": 10, "aff_unique_index": "0;0;1;0;0;2;2;0", "aff_unique_norm": "Tsinghua University;University of Electronic Science and Technology of China;Meituan", "aff_unique_dep": "International Graduate School;;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.uestc.edu.cn;https://www.meituan.com", "aff_unique_abbr": "THU;UESTC;Meituan", "aff_campus_unique_index": "0", "aff_campus_unique": "Shenzhen;", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "A Unified Debiasing Approach for Vision-Language Models across Modalities and Tasks", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96884", "id": "181llen2gw", "proceeding": "", "pdf": "https://openreview.net/pdf?id=181llen2gw", "openreview": "https://openreview.net/forum?id=181llen2gw", "poster": "/media/PosterPDFs/NeurIPS%202024/96884.png?t=1731379505.2628894", "project": "", "author_site": "Hoin Jung, Taeuk Jang, Xiaoqian Wang", "tldr": "", "abstract": "Recent advancements in Vision-Language Models (VLMs) have enabled complex multimodal tasks by processing text and image data simultaneously, significantly enhancing the field of artificial intelligence. However, these models often exhibit biases that can skew outputs towards societal stereotypes, thus necessitating debiasing strategies. Existing debiasing methods focus narrowly on specific modalities or tasks, and require extensive retraining. To address these limitations, this paper introduces Selective Feature Imputation for Debiasing (SFID), a novel methodology that integrates feature pruning and low confidence imputation (LCI) to effectively reduce biases in VLMs. SFID is versatile, maintaining the semantic integrity of outputs and costly effective by eliminating the need for retraining. Our experimental results demonstrate SFID's effectiveness across various VLMs tasks including zero-shot classification, text-to-image retrieval, image captioning, and text-to-image generation, by significantly reducing gender biases without compromising performance. This approach not only enhances the fairness of VLMs applications but also preserves their efficiency and utility across diverse scenarios.", "keywords": "Vision-Language Model;Fairness;Debias", "primary_area": "fairness", "supplementary_material": "/attachment/593ebc94654878394bf2e9b57410c845153a61db.zip", "author": "Hoin Jung;Taeuk Jang;Xiaoqian Wang", "authorids": "~Hoin_Jung1;~Taeuk_Jang1;~Xiaoqian_Wang1", "gender": "M;M;F", "homepage": ";;https://engineering.purdue.edu/~joywang/", "dblp": ";61/6076;151/3215-1", "google_scholar": "6VasZjEAAAAJ;https://scholar.google.co.kr/citations?user=AWJhF1UAAAAJ;I3tc214AAAAJ", "orcid": ";;", "linkedin": ";taeuk-jang-a52674178/;", "or_profile": "~Hoin_Jung1;~Taeuk_Jang1;~Xiaoqian_Wang1", "aff": "Purdue University;Purdue University;Purdue University", "aff_domain": "purdue.edu;purdue.edu;purdue.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\njung2024a,\ntitle={A Unified Debiasing Approach for Vision-Language Models across Modalities and Tasks},\nauthor={Hoin Jung and Taeuk Jang and Xiaoqian Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=181llen2gw}\n}", "github": "", "reviewers": "Wrj8;umE5;ystz", "pdf_size": 21401887, "rating": "6;6;7", "confidence": "4;3;4", "soundness": "3;2;3", "novelty": "2;3;3", "presentation": "3;3;3", "wc_summary": "49;24;54", "wc_strengths": "49;71;44", "wc_weaknesses": "421;62;125", "wc_questions": "48;27;70", "wc_limitations": "109;4;18", "wc_review": "676;188;311", "wc_reply_reviewers": "14;31;11", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 42.333333333333336, 13.12334645668635 ], "wc_strengths_avg": [ 54.666666666666664, 11.728408057172787 ], "wc_weaknesses_avg": [ 202.66666666666666, 156.51269029130583 ], "wc_questions_avg": [ 48.333333333333336, 17.55625877635159 ], "wc_limitations_avg": [ 43.666666666666664, 46.549853800940014 ], "wc_review_avg": [ 391.6666666666667, 207.22987772573293 ], "wc_reply_reviewers_avg": [ 18.666666666666668, 8.806563209081938 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15726333324832725435&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "purdue.edu;purdue.edu;purdue.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Purdue University", "aff_unique_dep": "", "aff_unique_url": "https://www.purdue.edu", "aff_unique_abbr": "Purdue", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Invariant Tokenization of Crystalline Materials for Language Model Enabled Generation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96883", "id": "18FGRNd0wZ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=18FGRNd0wZ", "openreview": "https://openreview.net/forum?id=18FGRNd0wZ", "poster": "/media/PosterPDFs/NeurIPS%202024/96883.png?t=1733428872.6393995", "project": "", "author_site": "Keqiang Yan, Xiner Li, Hongyi Ling, Kenna Ashen, Carl Edwards, Raymundo Arroyave, Marinka Zitnik, Heng Ji, Xiaofeng Qian, Xiaoning Qian, Shuiwang Ji", "tldr": "", "abstract": "We consider the problem of crystal materials generation using language models (LMs). A key step is to convert 3D crystal structures into 1D sequences to be processed by LMs. Prior studies used the crystallographic information framework (CIF) file stream, which fails to ensure SE(3) and periodic invariance and may not lead to unique sequence representations for a given crystal structure. Here, we propose a novel method, known as Mat2Seq, to tackle this challenge. Mat2Seq converts 3D crystal structures into 1D sequences and ensures that different mathematical descriptions of the same crystal are represented in a single unique sequence, thereby provably achieving SE(3) and periodic invariance. Experimental results show that, with language models, Mat2Seq achieves promising performance in crystal structure generation as compared with prior methods.", "keywords": "tokenization of crystals;language models;materials generation", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "", "author": "Keqiang Yan;Xiner Li;Hongyi Ling;Kenna Ashen;Carl Edwards;Raymundo Arroyave;Marinka Zitnik;Heng Ji;Xiaofeng Qian;Xiaoning Qian;Shuiwang Ji", "authorids": "~Keqiang_Yan2;~Xiner_Li1;~Hongyi_Ling1;~Kenna_Ashen1;~Carl_Edwards1;~Raymundo_Arroyave1;~Marinka_Zitnik1;~Heng_Ji3;~Xiaofeng_Qian1;~Xiaoning_Qian2;~Shuiwang_Ji1", "gender": "M;F;;;M;M;;F;;M;M", "homepage": ";;;;https://cnedwards.com/;https://engineering.tamu.edu/materials/profiles/arroyave-raymundo.html;https://zitniklab.hms.harvard.edu;http://blender.cs.illinois.edu/hengji.html;https://sites.google.com/tamu.edu/qian-group;https://www.ece.tamu.edu/~xqian;http://people.tamu.edu/~sji", "dblp": "272/6760;267/6459;259/0934;;300/1001;272/2040;53/11277.html;;266/1654;62/4504;84/6405", "google_scholar": "cv52C8oAAAAJ;bBQx_5MAAAAJ;ei8O1BEAAAAJ;;https://scholar.google.com/citations?hl=en;aPUBaigAAAAJ;YtUDgPIAAAAJ;z7GCqT4AAAAJ;bK7fFKoAAAAJ;dXGlddgAAAAJ;BZGj6sAAAAAJ", "orcid": ";;;0009-0000-4647-6328;;0000-0001-7548-8686;;;0000-0003-1627-288X;0000-0002-4347-2476;0000-0002-4205-4563", "linkedin": ";;;;carl-edwards-70a90592;;;;;;shuiwang-ji-9a040715/", "or_profile": "~Keqiang_Yan2;~Xiner_Li1;~Hongyi_Ling1;~Kenna_Ashen1;~Carl_Edwards1;~Raymundo_Arroyave1;~Marinka_Zitnik1;~Heng_Ji3;~Xiaofeng_Qian1;~Xiaoning_Qian2;~Shuiwang_Ji1", "aff": "Texas A&M University;Texas A&M University - College Station;Texas A&M University - College Station;Texas A&M University - College Station;Genentech;Texas A&M University - College Station;Harvard University;University of Illinois, Urbana-Champaign;Texas A&M University;Texas A&M;Texas A&M University", "aff_domain": "tamu.edu;tamu.edu;tamu.edu;tamu.edu;gene.com;tamu.edu;harvard.edu;uiuc.edu;tamu.edu;tamu.edu;tamu.edu", "position": "PhD student;PhD student;PhD student;PhD student;Intern;Full Professor;Associate Professor;Full Professor;Associate Professor;Full Professor;Professor", "bibtex": "@inproceedings{\nyan2024invariant,\ntitle={Invariant Tokenization of Crystalline Materials for Language Model Enabled Generation},\nauthor={Keqiang Yan and Xiner Li and Hongyi Ling and Kenna Ashen and Carl Edwards and Raymundo Arroyave and Marinka Zitnik and Heng Ji and Xiaofeng Qian and Xiaoning Qian and Shuiwang Ji},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=18FGRNd0wZ}\n}", "github": "", "reviewers": "5Kst;vCYe;Sr8M;JJwt", "pdf_size": 3058888, "rating": "4;6;6;8", "confidence": "3;3;5;3", "soundness": "2;3;3;3", "novelty": "2;3;2;3", "presentation": "2;4;3;3", "wc_summary": "111;56;75;82", "wc_strengths": "46;122;69;42", "wc_weaknesses": "163;67;141;10", "wc_questions": "57;77;314;32", "wc_limitations": "6;28;63;54", "wc_review": "383;350;662;220", "wc_reply_reviewers": "150;13;246;0", "wc_reply_authors": "544;34;546;0", "reply_reviewers": "1;1;1;0", "reply_authors": "3;2;3;1", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 81.0, 19.761072845369505 ], "wc_strengths_avg": [ 69.75, 31.877695964420013 ], "wc_weaknesses_avg": [ 95.25, 60.722215868658814 ], "wc_questions_avg": [ 120.0, 113.13487525957679 ], "wc_limitations_avg": [ 37.75, 22.38721733489895 ], "wc_review_avg": [ 403.75, 161.071994772524 ], "wc_reply_reviewers_avg": [ 102.25, 101.69162944903577 ], "wc_reply_authors_avg": [ 281.0, 264.274478525642 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15755938941402657206&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 3, "email": "tamu.edu;tamu.edu;tamu.edu;tamu.edu;gene.com;tamu.edu;harvard.edu;uiuc.edu;tamu.edu;tamu.edu;tamu.edu", "author_num": 11, "aff_unique_index": "0;0;0;0;1;0;2;3;0;0;0", "aff_unique_norm": "Texas A&M University;Genentech;Harvard University;University of Illinois", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.tamu.edu;https://www.genentech.com;https://www.harvard.edu;https://illinois.edu", "aff_unique_abbr": "TAMU;Genentech;Harvard;UIUC", "aff_campus_unique_index": "1;1;1;1;2", "aff_campus_unique": ";College Station;Urbana-Champaign", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "FINALLY: fast and universal speech enhancement with studio-like quality", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96882", "id": "18RdkSv9h9", "proceeding": "", "pdf": "https://openreview.net/pdf?id=18RdkSv9h9", "openreview": "https://openreview.net/forum?id=18RdkSv9h9", "poster": "/media/PosterPDFs/NeurIPS%202024/96882.png?t=1731683595.700073", "project": "", "author_site": "Nicholas Babaev, Kirill Tamogashev, Azat Saginbaev, Ivan Shchekotov, Hanbin Bae, Hosang Sung, WonJun Lee, Hoon-Young Cho, Pavel Andreev", "tldr": "", "abstract": "In this paper, we address the challenge of speech enhancement in real-world recordings, which often contain various forms of distortion, such as background noise, reverberation, and microphone artifacts.\nWe revisit the use of Generative Adversarial Networks (GANs) for speech enhancement and theoretically show that GANs are naturally inclined to seek the point of maximum density within the conditional clean speech distribution, which, as we argue, is essential for speech enhancement task.\nWe study various feature extractors for perceptual loss to facilitate the stability of adversarial training, developing a methodology for probing the structure of the feature space.\nThis leads us to integrate WavLM-based perceptual loss into MS-STFT adversarial training pipeline, creating an effective and stable training procedure for the speech enhancement model.\nThe resulting speech enhancement model, which we refer to as FINALLY, builds upon the HiFi++ architecture, augmented with a WavLM encoder and a novel training pipeline.\nEmpirical results on various datasets confirm our model's ability to produce clear, high-quality speech at 48 kHz, achieving state-of-the-art performance in the field of speech enhancement. Demo page: https://samsunglabs.github.io/FINALLY-page/", "keywords": "speech enhancement; generative models", "primary_area": "speech_and_audio", "supplementary_material": "/attachment/1b62210cb115d2d61b66369ebf593e5c68a7262b.zip", "author": "Nicholas Babaev;Kirill Tamogashev;Azat Saginbaev;Ivan Shchekotov;Hanbin Bae;Hosang Sung;WonJun Lee;Hoon-Young Cho;Pavel Andreev", "authorids": "~Nicholas_Babaev1;~Kirill_Tamogashev1;~Azat_Saginbaev1;~Ivan_Shchekotov1;~Hanbin_Bae1;~Hosang_Sung1;~WonJun_Lee4;~Hoon-Young_Cho1;~Pavel_Andreev1", "gender": "M;M;;;M;M;;M;M", "homepage": "https://github.com/NicholasBabaev;;https://github.com/Azatiussss;;;;;;", "dblp": ";;;;;;;08/6569.html;", "google_scholar": ";;;;OvIic1kAAAAJ;;;;TVvJjy4AAAAJ", "orcid": ";;;;;;;;", "linkedin": ";kirill-tamogashev?utm_source=share&utm_campaign=share_via&utm_content=profile&utm_medium=ios_app;;;hanbin-bae-350853205;hosang-sung-7793583b/;wonjun-lee-0baa18309;;", "or_profile": "~Nicholas_Babaev1;~Kirill_Tamogashev1;~Azat_Saginbaev1;~Ivan_Shchekotov1;~Hanbin_Bae1;~Hosang_Sung1;~WonJun_Lee4;~Hoon-Young_Cho1;~Pavel_Andreev1", "aff": "Samsung;Skolkovo Institute of Science and Technology;Moscow Institute of Physics and Technology;;;Samsung;Samsung;Samsung;Higher School of Economics, Higher School of Economics", "aff_domain": "samsung.com;skoltech.ru;phystech.edu;;;samsung.com;samsung.com;samsung.com;edu.hse.ru", "position": "Researcher;MS student;MS student;;;Principal Researcher;Researcher;Vice President;MS student", "bibtex": "@inproceedings{\nbabaev2024finally,\ntitle={{FINALLY}: fast and universal speech enhancement with studio-like quality},\nauthor={Nicholas Babaev and Kirill Tamogashev and Azat Saginbaev and Ivan Shchekotov and Hanbin Bae and Hosang Sung and WonJun Lee and Hoon-Young Cho and Pavel Andreev},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=18RdkSv9h9}\n}", "github": "", "reviewers": "gE54;mzEg;sXBh;dAZE", "pdf_size": 1613004, "rating": "5;6;7;8", "confidence": "5;5;5;4", "soundness": "2;3;3;3", "novelty": "3;2;3;3", "presentation": "2;3;3;3", "wc_summary": "55;31;199;147", "wc_strengths": "27;150;80;197", "wc_weaknesses": "65;168;122;166", "wc_questions": "32;27;70;1", "wc_limitations": "25;1;5;26", "wc_review": "204;377;476;537", "wc_reply_reviewers": "124;0;111;59", "wc_reply_authors": "0;0;129;0", "reply_reviewers": "1;0;2;1", "reply_authors": "1;1;2;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 108.0, 68.0808343074613 ], "wc_strengths_avg": [ 113.5, 65.0173053886425 ], "wc_weaknesses_avg": [ 130.25, 41.91882035553959 ], "wc_questions_avg": [ 32.5, 24.642443060703215 ], "wc_limitations_avg": [ 14.25, 11.344051304538427 ], "wc_review_avg": [ 398.5, 125.97718047329047 ], "wc_reply_reviewers_avg": [ 73.5, 48.91063279083598 ], "wc_reply_authors_avg": [ 32.25, 55.858638544096294 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.7745966692414834, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11220238870084791905&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "samsung.com;skoltech.ru;phystech.edu;;;samsung.com;samsung.com;samsung.com;edu.hse.ru", "author_num": 9, "aff_unique_index": "0;1;2;0;0;0;3", "aff_unique_norm": "Samsung;Skolkovo Institute of Science and Technology;Moscow Institute of Physics and Technology;Higher School of Economics", "aff_unique_dep": "Samsung;;;", "aff_unique_url": "https://www.samsung.com;https://www.skoltech.ru;https://www.mipt.ru/en;https://www.hse.ru", "aff_unique_abbr": "Samsung;Skoltech;MIPT;HSE", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0;0;0;1", "aff_country_unique": "South Korea;Russian Federation" }, { "title": "Learning-Augmented Priority Queues", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96881", "id": "1ATLLgvURu", "proceeding": "", "pdf": "https://openreview.net/pdf?id=1ATLLgvURu", "openreview": "https://openreview.net/forum?id=1ATLLgvURu", "poster": "", "project": "", "author_site": "Ziyad Benomar, Christian Coester", "tldr": "", "abstract": "Priority queues are one of the most fundamental and widely used data structures in computer science. Their primary objective is to efficiently support the insertion of new elements with assigned priorities and the extraction of the highest priority element. \nIn this study, we investigate the design of priority queues within the learning-augmented framework, where algorithms use potentially inaccurate predictions to enhance their worst-case performance.\nWe examine three prediction models spanning different use cases, and we show how the predictions can be leveraged to enhance the performance of priority queue operations. Moreover, we demonstrate the optimality of our solution and discuss some possible applications.", "keywords": "learning-augmented algorithms;algorithms with predictions;priority queue;shortest paths;Dijkstra", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Ziyad Benomar;Christian Coester", "authorids": "~Ziyad_Benomar1;~Christian_Coester1", "gender": ";M", "homepage": ";https://www.cs.ox.ac.uk/people/christian.coester/", "dblp": ";195/5890", "google_scholar": ";mKI_mvEAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Ziyad_Benomar1;~Christian_Coester1", "aff": ";University of Oxford", "aff_domain": ";ox.ac.uk", "position": ";Associate Professor", "bibtex": "@inproceedings{\nbenomar2024learningaugmented,\ntitle={Learning-Augmented Priority Queues},\nauthor={Ziyad Benomar and Christian Coester},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=1ATLLgvURu}\n}", "github": "", "reviewers": "KF5a;7XCi;unQV;ZmGR", "pdf_size": 675875, "rating": "4;5;7;7", "confidence": "4;4;4;5", "soundness": "2;3;4;4", "novelty": "2;2;3;3", "presentation": "2;3;3;4", "wc_summary": "77;59;198;269", "wc_strengths": "52;104;150;52", "wc_weaknesses": "728;94;120;15", "wc_questions": "115;13;22;1", "wc_limitations": "1;13;3;1", "wc_review": "973;283;493;338", "wc_reply_reviewers": "32;0;25;9", "wc_reply_authors": "414;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 150.75, 86.70748237609024 ], "wc_strengths_avg": [ 89.5, 40.874808868054664 ], "wc_weaknesses_avg": [ 239.25, 284.8169368208288 ], "wc_questions_avg": [ 37.75, 45.21822088494858 ], "wc_limitations_avg": [ 4.5, 4.9749371855331 ], "wc_review_avg": [ 521.75, 271.6701814701054 ], "wc_reply_reviewers_avg": [ 16.5, 12.658988901172163 ], "wc_reply_authors_avg": [ 103.5, 179.2672585833788 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.5555555555555555, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2330742081495450371&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": ";ox.ac.uk", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "University of Oxford", "aff_unique_dep": "", "aff_unique_url": "https://www.ox.ac.uk", "aff_unique_abbr": "Oxford", "aff_country_unique_index": "0", "aff_country_unique": "United Kingdom" }, { "title": "Risk-Averse Fine-tuning of Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96880", "id": "1BZKqZphsW", "proceeding": "", "pdf": "https://openreview.net/pdf?id=1BZKqZphsW", "openreview": "https://openreview.net/forum?id=1BZKqZphsW", "poster": "/media/PosterPDFs/NeurIPS%202024/96880.png?t=1733328781.5713553", "project": "", "author_site": "Sapana Chaudhary, Ujwal Dinesha, Dileep Kalathil, Srinivas Shakkottai", "tldr": "", "abstract": "We consider the challenge of mitigating the generation of negative or toxic content by the Large Language Models (LLMs) in response to certain prompts. We propose integrating risk-averse principles into LLM fine-tuning to minimize the occurrence of harmful outputs, particularly rare but significant events. By optimizing the risk measure of Conditional Value at Risk (CVaR), our methodology trains LLMs to exhibit superior performance in avoiding toxic outputs while maintaining effectiveness in generative tasks. Empirical evaluations on sentiment modification and toxicity mitigation tasks demonstrate the efficacy of risk-averse reinforcement learning with human feedback (RLHF) in promoting a safer and more constructive online discourse environment.", "keywords": "Risk Averse Reinforcement Learning;Large Language Model Finetuning", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Sapana Chaudhary;Ujwal Dinesha;Dileep Kalathil;Srinivas Shakkottai", "authorids": "~Sapana_Chaudhary1;~Ujwal_Dinesha1;~Dileep_Kalathil1;~Srinivas_Shakkottai1", "gender": "F;M;M;", "homepage": "https://sapanachaudhary.github.io/;;http://people.tamu.edu/~dileep.kalathil/;https://cesg.tamu.edu/faculty/sshakkot/", "dblp": "305/4530;;44/8356;03/353.html", "google_scholar": ";LXBE7KsAAAAJ;S24XFwwAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;0000-0002-5882-6433", "linkedin": ";;;", "or_profile": "~Sapana_Chaudhary1;~Ujwal_Dinesha1;~Dileep_Kalathil1;~Srinivas_Shakkottai1", "aff": "Texas A&M University;Texas A&M University - College Station;Texas A&M University;Texas A&M", "aff_domain": "tamu.edu;tamu.edu;tamu.edu;tamu.edu", "position": "PhD student;PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nchaudhary2024riskaverse,\ntitle={Risk-Averse Fine-tuning of Large Language Models},\nauthor={Sapana Chaudhary and Ujwal Dinesha and Dileep Kalathil and Srinivas Shakkottai},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=1BZKqZphsW}\n}", "github": "", "reviewers": "4ixc;xjv9;CMzE;R4NK;pvQg", "pdf_size": 914236, "rating": "4;5;6;6;8", "confidence": "4;3;3;4;2", "soundness": "2;2;3;3;3", "novelty": "1;3;2;3;3", "presentation": "3;2;3;3;3", "wc_summary": "96;66;41;108;108", "wc_strengths": "76;36;22;80;64", "wc_weaknesses": "198;185;139;98;53", "wc_questions": "54;6;59;51;1", "wc_limitations": "12;15;1;43;35", "wc_review": "436;308;262;380;261", "wc_reply_reviewers": "51;26;77;8;12", "wc_reply_authors": "0;24;24;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;2;2;1;1", "rating_avg": [ 5.8, 1.32664991614216 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.8 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 83.8, 26.339324213046925 ], "wc_strengths_avg": [ 55.6, 22.78244938543703 ], "wc_weaknesses_avg": [ 134.6, 53.99481456584512 ], "wc_questions_avg": [ 34.2, 25.245989780557228 ], "wc_limitations_avg": [ 21.2, 15.471263684650973 ], "wc_review_avg": [ 329.4, 68.70691377146845 ], "wc_reply_reviewers_avg": [ 34.8, 25.91833327974621 ], "wc_reply_authors_avg": [ 9.6, 11.757550765359253 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.7655318158241111, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:V5bkIuQ7pHUJ:scholar.google.com/&scioq=Risk-Averse+Fine-tuning+of+Large+Language+Models&hl=en&as_sdt=0,5", "gs_version_total": 5, "email": "tamu.edu;tamu.edu;tamu.edu;tamu.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Texas A&M University", "aff_unique_dep": "", "aff_unique_url": "https://www.tamu.edu", "aff_unique_abbr": "TAMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";College Station", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "1CssOqRYyz", "title": "Diff-PCC: Diffusion-based Neural Compression for 3D Point Clouds", "track": "main", "status": "Reject", "tldr": "", "abstract": "Stable diffusion networks have emerged as a groundbreaking development for their ability to produce realistic and detailed visual content. This characteristic renders them ideal decoders, capable of producing high-quality and aesthetically pleasing reconstructions. In this paper, we introduce the first diffusion-based point cloud compression method, dubbed Diff-PCC, to leverage the expressive power of the diffusion model for generative and aesthetically superior decoding. Different from the conventional autoencoder fashion, a dual-space latent representation is devised in this paper, in which a compressor composed of two independent encoding backbones is considered to extract expressive shape latents from distinct latent spaces. At the decoding side, a diffusion-based generator is devised to produce high-quality reconstructions by considering the shape latents as guidance to stochastically denoise the noisy point clouds. Experiments demonstrate that the proposed Diff-PCC achieves state-of-the-art compression performance (e.g., 7.711 dB BD-PSNR gains against the latest G-PCC standard at ultra-low bitrate) while attaining superior subjective quality. Source code will be made publicly available.", "keywords": "compression;point cloud;diffusion", "primary_area": "machine_vision", "supplementary_material": "", "author": "Kai Liu;Kang You;Pan Gao", "authorids": "~Kai_Liu23;~Kang_You1;~Pan_Gao2", "gender": "M;M;M", "homepage": "https://github.com/fivefingerhill;;https://i2-multimedia-lab.github.io/index.html", "dblp": ";304/3304;87/5856.html", "google_scholar": ";JLH972sAAAAJ;https://scholar.google.co.kr/citations?user=vxHerj4AAAAJ", "orcid": ";0000-0001-7750-5478;0000-0002-4492-5430", "linkedin": ";;", "or_profile": "~Kai_Liu23;~Kang_You1;~Pan_Gao2", "aff": "Nanjing University of Aeronautics and Astronautics;Nanjing university;Nanjing University of Aeronautics and Astronautics, Tsinghua University", "aff_domain": "nuaa.edu.cn;smail.nju.edu.cn;nuaa.edu.cn", "position": "MS student;PhD student;Associate Professor", "bibtex": "@misc{\nanonymous2024diffpcc,\ntitle={Diff-{PCC}: Diffusion-based Neural Compression for 3D Point Clouds},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=1CssOqRYyz}\n}", "github": "", "project": "", "reviewers": "QHjS;HJUZ;U3Bi", "site": "https://openreview.net/forum?id=1CssOqRYyz", "pdf_size": 1086000, "rating": "2;5;5", "confidence": "4;4;4", "soundness": "1;3;2", "novelty": "1;3;3", "presentation": "2;3;3", "wc_summary": "39;97;69", "wc_strengths": "16;63;38", "wc_weaknesses": "80;165;98", "wc_questions": "56;4;183", "wc_limitations": "5;7;1", "wc_review": "196;336;389", "wc_reply_reviewers": "382;4;141", "wc_reply_authors": "357;0;443", "reply_reviewers": "2;1;1", "reply_authors": "2;1;2", "rating_avg": [ 4.0, 1.4142135623730951 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.0, 0.816496580927726 ], "novelty_avg": [ 2.3333333333333335, 0.9428090415820634 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 68.33333333333333, 23.683092891108814 ], "wc_strengths_avg": [ 39.0, 19.200694431886227 ], "wc_weaknesses_avg": [ 114.33333333333333, 36.572606627851336 ], "wc_questions_avg": [ 81.0, 75.18421820213779 ], "wc_limitations_avg": [ 4.333333333333333, 2.494438257849294 ], "wc_review_avg": [ 307.0, 81.41662401909494 ], "wc_reply_reviewers_avg": [ 175.66666666666666, 156.25264442206696 ], "wc_reply_authors_avg": [ 266.6666666666667, 191.80256052050564 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:lYtXEnT-EN4J:scholar.google.com/&scioq=Diff-PCC:+Diffusion-based+Neural+Compression+for+3D+Point+Clouds&hl=en&as_sdt=0,44", "gs_version_total": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Nanjing University of Aeronautics and Astronautics;Nanjing University", "aff_unique_dep": ";", "aff_unique_url": "http://www.nuaa.edu.cn;https://www.nju.edu.cn", "aff_unique_abbr": "NUAA;Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Learning to Shape In-distribution Feature Space for Out-of-distribution Detection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96879", "id": "1Du3mMP5YN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=1Du3mMP5YN", "openreview": "https://openreview.net/forum?id=1Du3mMP5YN", "poster": "/media/PosterPDFs/NeurIPS%202024/96879.png?t=1731745181.7993424", "project": "", "author_site": "Yonggang Zhang, Jie Lu, Bo Peng, Zhen Fang, Yiu-ming Cheung", "tldr": "", "abstract": "Out-of-distribution (OOD) detection is critical for deploying machine learning models in the open world. To design scoring functions that discern OOD data from the in-distribution (ID) cases from a pre-trained discriminative model, existing methods tend to make rigorous distributional assumptions either explicitly or implicitly due to the lack of knowledge about the learned feature space in advance. \nThe mismatch between the learned and assumed distributions motivates us to raise a fundamental yet under-explored question: \\textit{Is it possible to deterministically model the feature distribution while pre-training a discriminative model?}\nThis paper gives an affirmative answer to this question by presenting a Distributional Representation Learning (\\texttt{DRL}) framework for OOD detection. In particular, \\texttt{DRL} explicitly enforces the underlying feature space to conform to a pre-defined mixture distribution, together with an online approximation of normalization constants to enable end-to-end training. Furthermore, we formulate \\texttt{DRL} into a provably convergent Expectation-Maximization algorithm to avoid trivial solutions and rearrange the sequential sampling to guide the training consistency. Extensive evaluations across mainstream OOD detection benchmarks empirically manifest the superiority of the proposed \\texttt{DRL} over its advanced counterparts.", "keywords": "Out-of-distribution Detection", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Yonggang Zhang;Jie Lu;Bo Peng;Zhen Fang;Yiu-ming Cheung", "authorids": "~Yonggang_Zhang1;~Jie_Lu4;~Bo_Peng24;~Zhen_Fang2;~Yiu-ming_Cheung1", "gender": "M;;;M;", "homepage": "https://yonggangzhangben.github.io/index.html;;;https://fang-zhen.github.io/index.html;", "dblp": "27/6859-3;;;;", "google_scholar": "XSbEr98AAAAJ;;;OzD6WJcAAAAJ;", "orcid": "0000-0002-4080-7592;;;0000-0003-0602-6255;", "linkedin": ";;;;", "or_profile": "~Yonggang_Zhang1;~Jie_Lu4;~Bo_Peng24;~Zhen_Fang2;~Yiu-ming_Cheung1", "aff": "Hong Kong Baptist University;;;University of Technology Sydney;", "aff_domain": "hkbu.edu.hk;;;uts.edu.au;", "position": "Postdoc;;;Assistant Professor;", "bibtex": "@inproceedings{\nzhang2024learning,\ntitle={Learning to Shape In-distribution Feature Space for Out-of-distribution Detection},\nauthor={Yonggang Zhang and Jie Lu and Bo Peng and Zhen Fang and Yiu-ming Cheung},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=1Du3mMP5YN}\n}", "github": "", "reviewers": "7foW;s5Ce;WmsK;5Lwp", "pdf_size": 1659469, "rating": "4;5;7;7", "confidence": "4;5;3;4", "soundness": "2;3;4;4", "novelty": "2;3;3;3", "presentation": "3;2;3;3", "wc_summary": "47;77;185;70", "wc_strengths": "55;114;38;60", "wc_weaknesses": "256;301;3;42", "wc_questions": "99;109;24;36", "wc_limitations": "24;123;5;30", "wc_review": "481;724;255;238", "wc_reply_reviewers": "0;0;15;0", "wc_reply_authors": "183;199;55;0", "reply_reviewers": "0;0;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 94.75, 53.27464218556517 ], "wc_strengths_avg": [ 66.75, 28.472574523565655 ], "wc_weaknesses_avg": [ 150.5, 129.71989053341048 ], "wc_questions_avg": [ 67.0, 37.40989174001978 ], "wc_limitations_avg": [ 45.5, 45.68643124604941 ], "wc_review_avg": [ 424.5, 197.74036006844935 ], "wc_reply_reviewers_avg": [ 3.75, 6.49519052838329 ], "wc_reply_authors_avg": [ 109.25, 84.22106327991828 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5443310539518174, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15585791378272835725&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 2, "email": "hkbu.edu.hk;;;uts.edu.au;", "author_num": 5, "aff_unique_index": "0;1", "aff_unique_norm": "Hong Kong Baptist University;University of Technology Sydney", "aff_unique_dep": ";", "aff_unique_url": "https://www.hkbu.edu.hk;https://www.uts.edu.au", "aff_unique_abbr": "HKBU;UTS", "aff_campus_unique_index": "0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;1", "aff_country_unique": "China;Australia" }, { "title": "Multiview Scene Graph", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96878", "id": "1ELFGSNBGC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=1ELFGSNBGC", "openreview": "https://openreview.net/forum?id=1ELFGSNBGC", "poster": "/media/PosterPDFs/NeurIPS%202024/96878.png?t=1731728482.5838113", "project": "", "author_site": "Juexiao Zhang, Gao Zhu, Sihang Li, Xinhao Liu, Haorui Song, Xinran Tang, Chen Feng", "tldr": "", "abstract": "A proper scene representation is central to the pursuit of spatial intelligence where agents can robustly reconstruct and efficiently understand 3D scenes. A scene representation is either metric, such as landmark maps in 3D reconstruction, 3D bounding boxes in object detection, or voxel grids in occupancy prediction, or topological, such as pose graphs with loop closures in SLAM or visibility graphs in SfM.\n In this work, we propose to build Multiview Scene Graphs (MSG) from unposed images, representing a scene topologically with interconnected place and object nodes. \n The task of building MSG is challenging for existing representation learning methods since it needs to jointly address both visual place recognition, object detection, and object association from images with limited fields of view and potentially large viewpoint changes.\n To evaluate any method tackling this task, we developed an MSG dataset and annotation based on a public 3D dataset.\n We also propose an evaluation metric based on the intersection-over-union score of MSG edges. \n Moreover, we develop a novel baseline method built on mainstream pretrained vision models, combining visual place recognition and object association into one Transformer decoder architecture. \n Experiments demonstrate that our method has superior performance compared to existing relevant baselines.", "keywords": "scene representation; spatial understanding; place recognition; object correspondence; scene graph", "primary_area": "machine_vision", "supplementary_material": "/attachment/755e9277d8693e05f5b42d64f4133f59ae5283bf.zip", "author": "Juexiao Zhang;Gao Zhu;Sihang Li;Xinhao Liu;Haorui Song;Xinran Tang;Chen Feng", "authorids": "~Juexiao_Zhang1;~Gao_Zhu3;~Sihang_Li4;~Xinhao_Liu1;~Haorui_Song2;~Xinran_Tang1;~Chen_Feng2", "gender": "M;M;M;;M;F;M", "homepage": "https://juexzz.github.io/;https://github.com/ryougi1201;https://louis-leee.github.io/;https://gaaaavin.github.io/;;;https://ai4ce.github.io/", "dblp": "250/9589;;;126/4582-3;;;01/161-2", "google_scholar": "TYxPbcEAAAAJ;;90IoeJsAAAAJ;6pI4Xa4AAAAJ;;https://scholar.google.com/citations?hl=zh-TW;YeG8ZM0AAAAJ", "orcid": ";;0000-0003-3053-5770;0000-0002-3036-0344;;;0000-0003-3211-1576", "linkedin": "juexiao-zhang-788453146/Juexiao-Zhang;;sihang-li-07724b267/;;haorui-song-632447290/;;simbaforrest/", "or_profile": "~Juexiao_Zhang1;~Gao_Zhu3;~Sihang_Li4;~Xinhao_Liu1;~Haorui_Song2;~Xinran_Tang1;~Chen_Feng2", "aff": "New York University;New York University;New York University;New York University;New York University;New York University;New York University", "aff_domain": "nyu.edu;nyu.edu;nyu.edu;nyu.edu;nyu.edu;nyu.edu;nyu.edu", "position": "PhD student;MS student;PhD student;PhD student;MS student;MS student;Assistant Professor", "bibtex": "@inproceedings{\nzhang2024multiview,\ntitle={Multiview Scene Graph},\nauthor={Juexiao Zhang and Gao Zhu and Sihang Li and Xinhao Liu and Haorui Song and Xinran Tang and Chen Feng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=1ELFGSNBGC}\n}", "github": "", "reviewers": "9wC7;o1XP;AiA2;qE49", "pdf_size": 9169299, "rating": "4;5;6;6", "confidence": "3;3;4;3", "soundness": "3;2;3;3", "novelty": "2;2;3;2", "presentation": "4;2;3;3", "wc_summary": "78;70;110;99", "wc_strengths": "31;36;189;65", "wc_weaknesses": "160;403;108;66", "wc_questions": "42;4;70;32", "wc_limitations": "15;5;7;18", "wc_review": "326;518;484;280", "wc_reply_reviewers": "0;27;191;80", "wc_reply_authors": "600;817;244;33", "reply_reviewers": "0;1;1;1", "reply_authors": "3;4;2;2", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 89.25, 15.990231392947383 ], "wc_strengths_avg": [ 80.25, 64.11464341318604 ], "wc_weaknesses_avg": [ 184.25, 130.61082458969472 ], "wc_questions_avg": [ 37.0, 23.600847442411894 ], "wc_limitations_avg": [ 11.25, 5.402545696243577 ], "wc_review_avg": [ 402.0, 101.04454463255303 ], "wc_reply_reviewers_avg": [ 74.5, 73.15907325820906 ], "wc_reply_authors_avg": [ 423.5, 304.42774183704086 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3516462868517299248&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 4, "email": "nyu.edu;nyu.edu;nyu.edu;nyu.edu;nyu.edu;nyu.edu;nyu.edu", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "New York University", "aff_unique_dep": "", "aff_unique_url": "https://www.nyu.edu", "aff_unique_abbr": "NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Schrodinger Bridge Flow for Unpaired Data Translation", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96877", "id": "1F32iCJFfa", "proceeding": "", "pdf": "https://openreview.net/pdf?id=1F32iCJFfa", "openreview": "https://openreview.net/forum?id=1F32iCJFfa", "poster": "/media/PosterPDFs/NeurIPS%202024/96877.png?t=1731688420.732189", "project": "", "author_site": "Valentin De Bortoli, Iryna Korshunova, Andriy Mnih, Arnaud Doucet", "tldr": "", "abstract": "Mass transport problems arise in many areas of machine learning whereby one wants to compute a map transporting one distribution to another. Generative modeling techniques like Generative Adversarial Networks (GANs) and Denoising Diffusion Models (DMMs) have been successfully adapted to solve such transport problems, resulting in CycleGAN and Bridge Matching respectively. However, these methods do not approximate Optimal Transport (OT) maps, which are known to have desirable properties. Existing techniques approximating OT maps for high-dimensional data-rich problems, including DDMs-based Rectified Flow and Schrodinger bridge procedures, require fully training a DDM-type model at each iteration, or use mini-batch techniques which can introduce significant errors. We propose a novel algorithm to compute the Schrodinger bridge, a dynamic entropy-regularized version of OT, that eliminates the need to train multiple DDMs-like models. This algorithm corresponds to a discretization of a flow of path measures, referred to as the Schrodinger Bridge Flow, whose only stationary point is the Schrodinger bridge. We demonstrate the performance of our algorithm on a variety of unpaired data translation tasks.", "keywords": "unpaired data translation;diffusion models;Schrodinger bridge;bridge matching;stochastic interpolant;flow matching", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Valentin De Bortoli;Iryna Korshunova;Andriy Mnih;Arnaud Doucet", "authorids": "~Valentin_De_Bortoli1;~Iryna_Korshunova1;~Andriy_Mnih1;~Arnaud_Doucet2", "gender": ";;;", "homepage": "https://vdeborto.github.io/;;http://www.cs.toronto.edu/~amnih/;https://www.stats.ox.ac.uk/~doucet/", "dblp": "224/9338;190/7250;https://dblp.uni-trier.de/pers/m/Mnih:Andriy.html;68/1628", "google_scholar": ";;mxiO4IkAAAAJ;W4SZGV8AAAAJ", "orcid": ";;;0000-0002-7662-419X", "linkedin": ";;;", "or_profile": "~Valentin_De_Bortoli1;~Iryna_Korshunova1;~Andriy_Mnih1;~Arnaud_Doucet2", "aff": "University of Oxford;Google DeepMind;Google DeepMind;University of Oxford", "aff_domain": "ox.ac.uk;google.com;google.com;ox.ac.uk", "position": "Postdoc;Researcher;Research Scientist;Full Professor", "bibtex": "@inproceedings{\nbortoli2024schrodinger,\ntitle={Schrodinger Bridge Flow for Unpaired Data Translation},\nauthor={Valentin De Bortoli and Iryna Korshunova and Andriy Mnih and Arnaud Doucet},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=1F32iCJFfa}\n}", "github": "", "reviewers": "KqVg;ovZP;Ti9G;xpT6", "pdf_size": 47652790, "rating": "6;6;7;7", "confidence": "3;5;4;4", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "2;3;3;4", "wc_summary": "102;91;204;98", "wc_strengths": "58;109;130;83", "wc_weaknesses": "130;277;354;209", "wc_questions": "39;34;386;81", "wc_limitations": "6;55;14;37", "wc_review": "335;566;1088;508", "wc_reply_reviewers": "14;208;89;48", "wc_reply_authors": "0;676;176;0", "reply_reviewers": "1;2;2;1", "reply_authors": "1;3;2;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 123.75, 46.499327952132816 ], "wc_strengths_avg": [ 95.0, 27.08320512790168 ], "wc_weaknesses_avg": [ 242.5, 82.76623707768766 ], "wc_questions_avg": [ 135.0, 146.05991921126068 ], "wc_limitations_avg": [ 28.0, 19.300259065618782 ], "wc_review_avg": [ 624.25, 280.9077918107648 ], "wc_reply_reviewers_avg": [ 89.75, 73.25426608737541 ], "wc_reply_authors_avg": [ 213.0, 276.80137282896555 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1652283789684761873&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "ox.ac.uk;google.com;google.com;ox.ac.uk", "author_num": 4, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "University of Oxford;Google", "aff_unique_dep": ";Google DeepMind", "aff_unique_url": "https://www.ox.ac.uk;https://deepmind.com", "aff_unique_abbr": "Oxford;DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "DTGB: A Comprehensive Benchmark for Dynamic Text-Attributed Graphs", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97873", "id": "1FVe59t3LX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=1FVe59t3LX", "openreview": "https://openreview.net/forum?id=1FVe59t3LX", "poster": "/media/PosterPDFs/NeurIPS%202024/97873.png?t=1731212424.8165648", "project": "", "author_site": "Jiasheng Zhang, Jialin Chen, Menglin Yang, Aosong Feng, Shuang Liang, Jie Shao, Rex Ying", "tldr": "", "abstract": "Dynamic text-attributed graphs (DyTAGs) are prevalent in various real-world scenarios, where each node and edge are associated with text descriptions, and both the graph structure and text descriptions evolve over time. Despite their broad applicability, there is a notable scarcity of benchmark datasets tailored to DyTAGs, which hinders the potential advancement in many research fields. To address this gap, we introduce Dynamic Text-attributed Graph Benchmark (DTGB), a collection of large-scale, time-evolving graphs from diverse domains, with nodes and edges enriched by dynamically changing text attributes and categories. To facilitate the use of DTGB, we design standardized evaluation procedures based on four real-world use cases: future link prediction, destination node retrieval, edge classification, and textual relation generation. These tasks require models to understand both dynamic graph structures and natural language, highlighting the unique challenges posed by DyTAGs. Moreover, we conduct extensive benchmark experiments on DTGB, evaluating 7 popular dynamic graph learning algorithms and their variants of adapting to text attributes with LLM embeddings, along with 6 powerful large language models (LLMs). Our results show the limitations of existing models in handling DyTAGs. Our analysis also demonstrates the utility of DTGB in investigating the incorporation of structural and textual dynamics. The proposed DTGB fosters research on DyTAGs and their broad applications. It offers a comprehensive benchmark for evaluating and advancing models to handle the interplay between dynamic graph structures and natural language. The dataset and source code are available at https://github.com/zjs123/DTGB.", "keywords": "Dynamic Graph; Text-attributed Graph", "primary_area": "", "supplementary_material": "", "author": "Jiasheng Zhang;Jialin Chen;Menglin Yang;Aosong Feng;Shuang Liang;Jie Shao;Rex Ying", "authorids": "~Jiasheng_Zhang2;~Jialin_Chen2;~Menglin_Yang3;~Aosong_Feng1;~Shuang_Liang4;~Jie_Shao4;~Zhitao_Ying1", "gender": "F;M;M;M;M;M;M", "homepage": "https://github.com/Cather-learner;https://scholar.google.com/citations?user=KroqSRUAAAAJ&hl=en;;;http://cfm.uestc.edu.cn/~shaojie/;https://www.cs.yale.edu/homes/ying-rex;", "dblp": ";249/8541-1;260/0450;20/1080-2;;209/4936;", "google_scholar": "rHyMKPYAAAAJ;KroqSRUAAAAJ;lIuUXKkAAAAJ;BEkC2b4AAAAJ;ikbw5okAAAAJ;6fqNXooAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": "0009-0007-0909-4620;0000-0003-2510-5282;;0000-0001-7387-2801;0000-0003-2615-1555;;0000-0002-5640-2020", "linkedin": ";;;;;rex-ying-92770148/;", "or_profile": "~Jialin_Chen2;~Menglin_Yang3;~Aosong_Feng1;~Shuang_Liang4;~Jie_Shao4;~Zhitao_Ying1;~Zhang_Jia_Sheng1", "aff": "Yale University;Yale University;Yale University;Institute of Intelligent Computing, University of Electronic Science and Technology of China;University of Electronic Science and Technology of China;Yale University;University of Electronic Science and Technology of China", "aff_domain": "yale.edu;yale.edu;yale.edu;uestc.edu.cn;uestc.edu.cn;yale.edu;uestc.edu.cn", "position": "PhD student;Postdoc;PhD student;Associate Professor;Professor;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nzhang2024dtgb,\ntitle={{DTGB}: A Comprehensive Benchmark for Dynamic Text-Attributed Graphs},\nauthor={Jiasheng Zhang and Jialin Chen and Menglin Yang and Aosong Feng and Shuang Liang and Jie Shao and Rex Ying},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=1FVe59t3LX}\n}", "github": "", "reviewers": "gCk2;FWvA;GM9U;obHN", "pdf_size": 1809464, "rating": "7;7;7;9", "confidence": "3;4;4;5", "wc_summary_and_contributions": "43;53;43;31", "wc_strengths": "57;33;5;59", "wc_improvement": "58;5;20;94", "wc_limitations": "7;1;1;1", "wc_correctness": "11;1;14;1", "wc_clarity": "31;1;1;1", "wc_relation_to_prior_work": "8;1;1;1", "wc_documentation": "9;1;1;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "225;97;87;190", "wc_reply_reviewers": "16;0;14;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 42.5, 7.794228634059948 ], "wc_strengths_avg": [ 38.5, 21.880356487041066 ], "wc_improvement_avg": [ 44.25, 34.61484508126535 ], "wc_limitations_avg": [ 2.5, 2.598076211353316 ], "wc_correctness_avg": [ 6.75, 5.84700778176325 ], "wc_clarity_avg": [ 8.5, 12.99038105676658 ], "wc_relation_to_prior_work_avg": [ 2.75, 3.031088913245535 ], "wc_documentation_avg": [ 3.0, 3.4641016151377544 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 149.75, 59.166607981191554 ], "wc_reply_reviewers_avg": [ 7.5, 7.533259586659682 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4507700947449204792&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "yale.edu;yale.edu;yale.edu;uestc.edu.cn;uestc.edu.cn;yale.edu;uestc.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;1;1;0;1", "aff_unique_norm": "Yale University;University of Electronic Science and Technology of China", "aff_unique_dep": ";Institute of Intelligent Computing", "aff_unique_url": "https://www.yale.edu;https://www.uestc.edu.cn", "aff_unique_abbr": "Yale;UESTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;1;0;1", "aff_country_unique": "United States;China" }, { "title": "Truth is Universal: Robust Detection of Lies in LLMs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96876", "id": "1Fc2Xa2cDK", "proceeding": "", "pdf": "https://openreview.net/pdf?id=1Fc2Xa2cDK", "openreview": "https://openreview.net/forum?id=1Fc2Xa2cDK", "poster": "/media/PosterPDFs/NeurIPS%202024/96876.png?t=1731682436.6224985", "project": "", "author_site": "Lennart B\u00fcrger, Fred Hamprecht, Boaz Nadler", "tldr": "", "abstract": "Large Language Models (LLMs) have revolutionised natural language processing, exhibiting impressive human-like capabilities. In particular, LLMs are capable of \"lying\", knowingly outputting false statements. Hence, it is of interest and importance to develop methods to detect when LLMs lie. Indeed, several authors trained classifiers to detect LLM lies based on their internal model activations. However, other researchers showed that these classifiers may fail to generalise, for example to negated statements. \nIn this work, we aim to develop a robust method to detect when an LLM is lying. To this end, we make the following key contributions: (i) We demonstrate the existence of a two-dimensional subspace, along which the activation vectors of true and false statements can be separated. Notably, this finding is universal and holds for various LLMs, including Gemma-7B, LLaMA2-13B, Mistral-7B and LLaMA3-8B. Our analysis explains the generalisation failures observed in previous studies and sets the stage for more robust lie detection;\n(ii) Building upon (i), we construct an accurate LLM lie detector. Empirically, our proposed classifier achieves state-of-the-art performance, attaining 94\\% accuracy in both distinguishing true from false factual statements and detecting lies generated in real-world scenarios.", "keywords": "Interpretable AI;Explainable AI;Interpretability;AI Alignment;AI Safety;LLM Alignment;Truthful LLMs;Large Language Models;LLM;Mechanistic Interpretability;LLM Lie Detection", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Lennart B\u00fcrger;Fred A. Hamprecht;Boaz Nadler", "authorids": "~Lennart_B\u00fcrger1;~Fred_A._Hamprecht1;~Boaz_Nadler2", "gender": "M;M;M", "homepage": ";https://www.weizmann.ac.il/math/Nadler/home;https://sciai-lab.org/", "dblp": ";53/4192;18/4529", "google_scholar": ";N3Jj5_cAAAAJ;lO62bt0AAAAJ", "orcid": "0000-0001-8731-8932;0000-0002-9777-4576;", "linkedin": ";;", "or_profile": "~Lennart_B\u00fcrger1;~Boaz_Nadler2;~Fred_A_Hamprecht1", "aff": "Ruprecht-Karls-Universit\u00e4t Heidelberg;Weizmann Institute of Science;Heidelberg University", "aff_domain": "iwr.uni-heidelberg.de;weizmann.ac.il;uni-heidelberg.de", "position": "MS student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nb{\\\"u}rger2024truth,\ntitle={Truth is Universal: Robust Detection of Lies in {LLM}s},\nauthor={Lennart B{\\\"u}rger and Fred A. Hamprecht and Boaz Nadler},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=1Fc2Xa2cDK}\n}", "github": "", "reviewers": "XXUN;YJ6m;Lo3X;bydL", "pdf_size": 4493338, "rating": "4;6;7;7", "confidence": "3;3;4;3", "soundness": "2;3;2;3", "novelty": "1;2;3;4", "presentation": "2;4;2;3", "wc_summary": "139;78;78;103", "wc_strengths": "45;106;66;111", "wc_weaknesses": "293;194;90;134", "wc_questions": "49;107;160;124", "wc_limitations": "9;23;80;34", "wc_review": "535;508;474;506", "wc_reply_reviewers": "444;27;277;28", "wc_reply_authors": "382;0;456;0", "reply_reviewers": "2;1;2;1", "reply_authors": "2;1;3;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 1.118033988749895 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 99.5, 24.984995497297973 ], "wc_strengths_avg": [ 82.0, 27.577164466275352 ], "wc_weaknesses_avg": [ 177.75, 76.09328156940006 ], "wc_questions_avg": [ 110.0, 40.08116764766216 ], "wc_limitations_avg": [ 36.5, 26.63174797117155 ], "wc_review_avg": [ 505.75, 21.614520582238228 ], "wc_reply_reviewers_avg": [ 194.0, 176.65927657499336 ], "wc_reply_authors_avg": [ 209.5, 211.12733124823038 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.4714045207910316, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16650041236031642202&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "iwr.uni-heidelberg.de;weizmann.ac.il;uni-heidelberg.de", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Ruprecht-Karls-Universit\u00e4t Heidelberg;Weizmann Institute of Science;Heidelberg University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uni-heidelberg.de/;https://www.weizmann.org.il;https://www.uni-heidelberg.de", "aff_unique_abbr": "Uni Heidelberg;Weizmann;Uni Heidelberg", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Germany;Israel" }, { "title": "A Global Depth-Range-Free Multi-View Stereo Transformer Network with Pose Embedding", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96875", "id": "1FikBPewU9", "proceeding": "", "pdf": "https://openreview.net/pdf?id=1FikBPewU9", "openreview": "https://openreview.net/forum?id=1FikBPewU9", "poster": "/media/PosterPDFs/NeurIPS%202024/96875.png?t=1731761566.6132126", "project": "", "author_site": "Yitong Dong, Yijin Li, Zhaoyang Huang, Weikang Bian, Jingbo Liu, Hujun Bao, Zhaopeng Cui, Hongsheng Li, Guofeng Zhang", "tldr": "", "abstract": "In this paper, we propose a novel multi-view stereo (MVS) framework that gets rid of the depth range prior. Unlike recent prior-free MVS methods that work in a pair-wise manner, our method simultaneously considers all the source images. Specifically, we introduce a Multi-view Disparity Attention (MDA) module to aggregate long-range context information within and across multi-view images. Considering the asymmetry of the epipolar disparity flow, the key to our method lies in accurately modeling multi-view geometric constraints. We integrate pose embedding to encapsulate information such as multi-view camera poses, providing implicit geometric constraints for multi-view disparity feature fusion dominated by attention. Additionally, we construct corresponding hidden states for each source image due to significant differences in the observation quality of the same pixel in the reference frame across multiple source frames. We explicitly estimate the quality of the current pixel corresponding to sampled points on the epipolar line of the source image and dynamically update hidden states through the uncertainty estimation module. Extensive results on the DTU dataset and Tanks\\&Temple benchmark demonstrate the effectiveness of our method.", "keywords": "Multi-view stereo;Attention;Pose embedding", "primary_area": "machine_vision", "supplementary_material": "/attachment/b0f76397dd053080292889a3bd0b00943c655b5d.zip", "author": "Yitong Dong;Yijin Li;Zhaoyang Huang;Weikang Bian;Jingbo Liu;Hujun Bao;Zhaopeng Cui;Hongsheng Li;Guofeng Zhang", "authorids": "~Yitong_Dong1;~Yijin_Li1;~Zhaoyang_Huang2;~Weikang_Bian2;~Jingbo_Liu6;~Hujun_Bao1;~Zhaopeng_Cui1;~Hongsheng_Li3;~Guofeng_Zhang3", "gender": "F;M;;M;M;M;M;M;", "homepage": ";https://eugenelyj.github.io/;https://drinkingcoder.github.io/;;http://www.cad.zju.edu.cn/home/bao/;https://zhpcui.github.io/;http://www.ee.cuhk.edu.hk/~hsli;http://www.cad.zju.edu.cn/home/gfzhang;https://wkbian.github.io/", "dblp": ";178/6879;;;b/HujunBao;28/7484;27/7402-1;78/5389-1.html;252/4248", "google_scholar": ";https://scholar.google.com/citations?hl=en;y2xos7IAAAAJ;;AZCcDmsAAAAJ;https://scholar.google.ca/citations?user=vwIRwDUAAAAJ;BN2Ze-QAAAAJ;F0xfpXAAAAAJ;_PjUeqcAAAAJ", "orcid": "0009-0000-4932-8814;;0000-0001-7688-1471;0009-0007-3418-269X;0000-0002-2662-0334;0000-0002-7130-439X;;0000-0001-5661-8430;0000-0001-9986-3348", "linkedin": ";;;;;;;;", "or_profile": "~Yitong_Dong1;~Yijin_Li1;~Zhaoyang_Huang2;~Jingbo_Liu6;~Hujun_Bao1;~Zhaopeng_Cui1;~Hongsheng_Li3;~Guofeng_Zhang3;~Weikang_BIAN1", "aff": "Zhejiang University;Avolution AI;Avolution AI;Zhejiang University;Zhejiang University;Zhejiang University;The Chinese University of Hong Kong;Zhejiang University;The Chinese University of Hong Kong, The Chinese University of Hong Kong", "aff_domain": "zju.edu.cn;avolutionai.com;avolutionai.com;zju.edu.cn;zju.edu.cn;zju.edu.cn;cuhk.edu.hk;zju.edu.cn;ee.cuhk.edu.hk", "position": "PhD student;Researcher;Researcher;PhD student;Full Professor;Assistant Professor;Associate Professor;Full Professor;Intern", "bibtex": "@inproceedings{\ndong2024a,\ntitle={A Global Depth-Range-Free Multi-View Stereo Transformer Network with Pose Embedding},\nauthor={Yitong Dong and Yijin Li and Zhaoyang Huang and Weikang Bian and Jingbo Liu and Hujun Bao and Zhaopeng Cui and Hongsheng Li and Guofeng Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=1FikBPewU9}\n}", "github": "", "reviewers": "Q3xc;fCJ7;heJr;RLMs;s1Ui", "pdf_size": 784305, "rating": "5;5;5;5;6", "confidence": "5;4;4;4;4", "soundness": "2;3;3;3;3", "novelty": "3;3;3;3;2", "presentation": "1;3;3;1;2", "wc_summary": "118;67;52;70;82", "wc_strengths": "38;71;42;102;28", "wc_weaknesses": "374;113;137;238;145", "wc_questions": "137;154;5;14;86", "wc_limitations": "51;10;59;17;21", "wc_review": "718;415;295;441;362", "wc_reply_reviewers": "120;189;0;59;6", "wc_reply_authors": "188;693;0;0;0", "reply_reviewers": "1;2;0;1;1", "reply_authors": "5;3;1;1;1", "rating_avg": [ 5.2, 0.39999999999999997 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.0, 0.8944271909999159 ], "wc_summary_avg": [ 77.8, 22.256684389189687 ], "wc_strengths_avg": [ 56.2, 26.99925924909793 ], "wc_weaknesses_avg": [ 201.4, 96.20103949542333 ], "wc_questions_avg": [ 79.2, 61.218951314115145 ], "wc_limitations_avg": [ 31.6, 19.59183503401353 ], "wc_review_avg": [ 446.2, 144.79419877881847 ], "wc_reply_reviewers_avg": [ 74.8, 71.64188718899021 ], "wc_reply_authors_avg": [ 176.2, 268.46258584763723 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.2, 1.6 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.2500000000000001, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:QyVCc58nEOQJ:scholar.google.com/&scioq=A+Global+Depth-Range-Free+Multi-View+Stereo+Transformer+Network+with+Pose+Embedding&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "zju.edu.cn;avolutionai.com;avolutionai.com;zju.edu.cn;zju.edu.cn;zju.edu.cn;cuhk.edu.hk;zju.edu.cn;ee.cuhk.edu.hk", "author_num": 9, "aff_unique_index": "0;1;1;0;0;0;2;0;2", "aff_unique_norm": "Zhejiang University;Avolution AI;Chinese University of Hong Kong", "aff_unique_dep": ";;", "aff_unique_url": "https://www.zju.edu.cn;;https://www.cuhk.edu.hk", "aff_unique_abbr": "ZJU;;CUHK", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;1;0;0;0;0;0;0", "aff_country_unique": "China;Unknown" }, { "title": "Trajectory Diffusion for ObjectGoal Navigation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96874", "id": "1GpY0hsv2w", "proceeding": "", "pdf": "https://openreview.net/pdf?id=1GpY0hsv2w", "openreview": "https://openreview.net/forum?id=1GpY0hsv2w", "poster": "/media/PosterPDFs/NeurIPS%202024/96874.png?t=1732795015.5040088", "project": "", "author_site": "Xinyao Yu, Sixian Zhang, Xinhang Song, Xiaorong Qin, Shuqiang Jiang", "tldr": "", "abstract": "Object goal navigation requires an agent to navigate to a specified object in an unseen environment based on visual observations and user-specified goals. \nHuman decision-making in navigation is sequential, planning a most likely sequence of actions toward the goal. \nHowever, existing ObjectNav methods, both end-to-end learning methods and modular methods, rely on single-step planning. They output the next action based on the current model input, which easily overlooks temporal consistency and leads to myopic planning.\nTo this end, we aim to learn sequence planning for ObjectNav. Specifically, we propose trajectory diffusion to learn the distribution of trajectory sequences conditioned on the current observation and the goal. \nWe utilize DDPM and automatically collected optimal trajectory segments to train the trajectory diffusion.\nOnce the trajectory diffusion model is trained, it can generate a temporally coherent sequence of future trajectory for agent based on its current observations.\nExperimental results on the Gibson and MP3D datasets demonstrate that the generated trajectories effectively guide the agent, resulting in more accurate and efficient navigation.", "keywords": "Embodied AI;visual navigation", "primary_area": "robotics", "supplementary_material": "/attachment/4da1b9e141eee82a9ed0cb11f842bd009bf89857.zip", "author": "Xinyao Yu;Sixian Zhang;Xinhang Song;Xiaorong Qin;Shuqiang Jiang", "authorids": "~Xinyao_Yu1;~Sixian_Zhang1;~Xinhang_Song1;~Xiaorong_Qin1;~Shuqiang_Jiang1", "gender": "F;M;M;F;M", "homepage": "https://github.com/TSUNAMIII-7;;;;https://people.ucas.edu.cn/~sqjiang?language=en", "dblp": "26/834-2.html;251/1108;125/2281;219/8842;90/3651", "google_scholar": ";https://scholar.google.com.hk/citations?hl=zh-CN;LQDB7QQAAAAJ;DPq5x7IAAAAJ;4Rvn-ykAAAAJ", "orcid": ";0000-0002-1065-5348;;0000-0002-7466-3443;0000-0002-1596-4326", "linkedin": ";;;;", "or_profile": "~Xinyao_Yu1;~Sixian_Zhang1;~Xinhang_Song1;~Xiaorong_Qin1;~Shuqiang_Jiang1", "aff": "University of Chinese Academy of Sciences;Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences", "aff_domain": "ucas.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn", "position": "MS student;PhD student;Associate Professor;PhD student;Professor", "bibtex": "@inproceedings{\nyu2024trajectory,\ntitle={Trajectory Diffusion for ObjectGoal Navigation},\nauthor={Xinyao Yu and Sixian Zhang and Xinhang Song and Xiaorong Qin and Shuqiang Jiang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=1GpY0hsv2w}\n}", "github": "", "reviewers": "DH2d;JCTG;k5Ci;ria9;sqUP", "pdf_size": 4740515, "rating": "5;5;6;6;7", "confidence": "4;3;4;4;4", "soundness": "2;3;2;3;3", "novelty": "2;2;4;3;3", "presentation": "1;3;4;3;4", "wc_summary": "47;133;64;152;85", "wc_strengths": "13;79;69;78;79", "wc_weaknesses": "263;179;303;115;85", "wc_questions": "101;61;152;67;110", "wc_limitations": "53;1;5;13;49", "wc_review": "477;453;593;425;408", "wc_reply_reviewers": "66;30;845;79;61", "wc_reply_authors": "321;46;2574;0;0", "reply_reviewers": "1;1;2;1;1", "reply_authors": "3;2;7;1;1", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.0, 1.0954451150103321 ], "wc_summary_avg": [ 96.2, 40.126799025090456 ], "wc_strengths_avg": [ 63.6, 25.57811564599707 ], "wc_weaknesses_avg": [ 189.0, 83.50329334822669 ], "wc_questions_avg": [ 98.2, 32.85970176371052 ], "wc_limitations_avg": [ 24.2, 22.25668438918969 ], "wc_review_avg": [ 471.2, 65.31278588454178 ], "wc_reply_reviewers_avg": [ 216.2, 314.81130856435254 ], "wc_reply_authors_avg": [ 588.2, 1000.0736772858288 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 2.8, 2.2271057451320084 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5345224838248487, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8985274684486882141&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 2, "email": "ucas.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn", "author_num": 5, "aff_unique_index": "0;1;1;1;1", "aff_unique_norm": "University of Chinese Academy of Sciences;Chinese Academy of Sciences", "aff_unique_dep": ";", "aff_unique_url": "http://www.ucas.ac.cn;https://www.cas.cn", "aff_unique_abbr": "UCAS;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Flow Priors for Linear Inverse Problems via Iterative Corrupted Trajectory Matching", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96873", "id": "1H2e7USI09", "proceeding": "", "pdf": "https://openreview.net/pdf?id=1H2e7USI09", "openreview": "https://openreview.net/forum?id=1H2e7USI09", "poster": "/media/PosterPDFs/NeurIPS%202024/96873.png?t=1729054916.6434312", "project": "", "author_site": "Yasi Zhang, Peiyu Yu, Yaxuan Zhu, Yingshan CHANG, Feng Gao, Ying Nian Wu, Oscar Leong", "tldr": "", "abstract": "Generative models based on flow matching have attracted significant attention for their simplicity and superior performance in high-resolution image synthesis. By leveraging the instantaneous change-of-variables formula, one can directly compute image likelihoods from a learned flow, making them enticing candidates as priors for downstream tasks such as inverse problems. In particular, a natural approach would be to incorporate such image probabilities in a maximum-a-posteriori (MAP) estimation problem. A major obstacle, however, lies in the slow computation of the log-likelihood, as it requires backpropagating through an ODE solver, which can be prohibitively slow for high-dimensional problems. In this work, we propose an iterative algorithm to approximate the MAP estimator efficiently to solve a variety of linear inverse problems. Our algorithm is mathematically justified by the observation that the MAP objective can be approximated by a sum of $N$ ``local MAP'' objectives, where $N$ is the number of function evaluations. By leveraging Tweedie's formula, we show that we can perform gradient steps to sequentially optimize these objectives. We validate our approach for various linear inverse problems, such as super-resolution, deblurring, inpainting, and compressed sensing, and demonstrate that we can outperform other methods based on flow matching. Code is available at \\url{https://github.com/YasminZhang/ICTM}.", "keywords": "Flow-based models;Inverse problems;Neural ODE", "primary_area": "generative_models", "supplementary_material": "", "author": "Yasi Zhang;Peiyu Yu;Yaxuan Zhu;Yingshan Chang;Feng Gao;Ying Nian Wu;Oscar Leong", "authorids": "~Yasi_Zhang1;~Peiyu_Yu2;~Yaxuan_Zhu1;~Yingshan_Chang1;~Feng_Gao2;~Ying_Nian_Wu1;~Oscar_Leong2", "gender": "Not Specified;;M;F;M;M;M", "homepage": "https://yasminzhang.github.io/;;;https://zdxdsw.github.io/;https://fen9.github.io/;https://www.oscarleong.com/;http://www.stat.ucla.edu/~ywu/", "dblp": "317/8966;249/9449;289/6018;301/8296;10/2674-13;;18/568.html", "google_scholar": "6hu7hXkAAAAJ;https://scholar.google.com/citations?hl=en;EptgCGsAAAAJ;0QV-ZVQAAAAJ;amaLnocAAAAJ;LN9O4vkAAAAJ;7k_1QFIAAAAJ", "orcid": ";;;;0000-0003-1515-1357;;", "linkedin": "yasi-zhang-8737b3250/;;;yingshanchang/;;;", "or_profile": "~Yasi_Zhang1;~Peiyu_Yu2;~Yaxuan_Zhu1;~Yingshan_Chang1;~Feng_Gao2;~Oscar_Francisco_Leong1;~Yingnian_Wu1", "aff": "University of California, Los Angeles;UCLA Department of Statistics;University of California, Los Angeles;Carnegie Mellon University;Amazon;California Institute of Technology;UCLA", "aff_domain": "ucla.edu;g.ucla.edu;ucla.edu;cmu.edu;amazon.com;caltech.edu;stat.ucla.edu", "position": "PhD student;PhD student;PhD student;PhD student;Researcher;Postdoc;Full Professor", "bibtex": "@inproceedings{\nzhang2024flow,\ntitle={Flow Priors for Linear Inverse Problems via Iterative Corrupted Trajectory Matching},\nauthor={Yasi Zhang and Peiyu Yu and Yaxuan Zhu and Yingshan Chang and Feng Gao and Ying Nian Wu and Oscar Leong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=1H2e7USI09}\n}", "github": "", "reviewers": "qjx2;sT2G;hhXk;Xm5z", "pdf_size": 14091676, "rating": "5;5;5;7", "confidence": "4;4;4;1", "soundness": "3;3;3;3", "novelty": "2;3;2;3", "presentation": "2;3;3;3", "wc_summary": "91;20;65;201", "wc_strengths": "37;79;38;122", "wc_weaknesses": "164;486;158;25", "wc_questions": "108;1;63;1", "wc_limitations": "66;1;16;1", "wc_review": "466;587;340;350", "wc_reply_reviewers": "21;96;151;0", "wc_reply_authors": "71;182;341;0", "reply_reviewers": "1;1;2;0", "reply_authors": "3;2;4;1", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 1.299038105676658 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 94.25, 66.66098934159318 ], "wc_strengths_avg": [ 69.0, 34.978564864785405 ], "wc_weaknesses_avg": [ 208.25, 169.71207234607678 ], "wc_questions_avg": [ 43.25, 45.14628999153751 ], "wc_limitations_avg": [ 21.0, 26.692695630078276 ], "wc_review_avg": [ 435.75, 100.39017631222688 ], "wc_reply_reviewers_avg": [ 67.0, 60.212125024782175 ], "wc_reply_authors_avg": [ 148.5, 128.68274942664226 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4464979259935671619&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "ucla.edu;g.ucla.edu;ucla.edu;cmu.edu;amazon.com;caltech.edu;stat.ucla.edu", "author_num": 7, "aff_unique_index": "0;0;0;1;2;3;0", "aff_unique_norm": "University of California, Los Angeles;Carnegie Mellon University;Amazon;California Institute of Technology", "aff_unique_dep": ";;Amazon.com, Inc.;", "aff_unique_url": "https://www.ucla.edu;https://www.cmu.edu;https://www.amazon.com;https://www.caltech.edu", "aff_unique_abbr": "UCLA;CMU;Amazon;Caltech", "aff_campus_unique_index": "0;0;0;2;0", "aff_campus_unique": "Los Angeles;;Pasadena", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Unveiling Causal Reasoning in Large Language Models: Reality or Mirage?", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96872", "id": "1IU3P8VDbn", "proceeding": "", "pdf": "https://openreview.net/pdf?id=1IU3P8VDbn", "openreview": "https://openreview.net/forum?id=1IU3P8VDbn", "poster": "/media/PosterPDFs/NeurIPS%202024/96872.png?t=1731684252.5199218", "project": "", "author_site": "Haoang Chi, He Li, Wenjing Yang, Feng Liu, Long Lan, Xiaoguang Ren, Tongliang Liu, Bo Han", "tldr": "", "abstract": "Causal reasoning capability is critical in advancing large language models (LLMs) towards artificial general intelligence (AGI). While versatile LLMs appear to have demonstrated capabilities in understanding contextual causality and providing responses that obey the laws of causality, it remains unclear whether they perform genuine causal reasoning akin to humans. However, current evidence indicates the contrary. Specifically, LLMs are only capable of performing shallow (level-1) causal reasoning, primarily attributed to the causal knowledge embedded in their parameters, but they lack the capacity for genuine human-like (level-2) causal reasoning. To support this hypothesis, methodologically, we delve into the autoregression mechanism of transformer-based LLMs, revealing that it is not inherently causal. Empirically, we introduce a new causal Q&A benchmark named CausalProbe 2024, whose corpus is fresh and nearly unseen for the studied LLMs. Empirical results show a significant performance drop on CausalProbe 2024 compared to earlier benchmarks, indicating that LLMs primarily engage in level-1 causal reasoning.To bridge the gap towards level-2 causal reasoning, we draw inspiration from the fact that human reasoning is usually facilitated by general knowledge and intended goals. Inspired by this, we propose G$^2$-Reasoner, a LLM causal reasoning method that incorporates general knowledge and goal-oriented prompts into LLMs' causal reasoning processes. Experiments demonstrate that G$^2$-Reasoner significantly enhances LLMs' causal reasoning capability, particularly in fresh and fictitious contexts. This work sheds light on a new path for LLMs to advance towards genuine causal reasoning, going beyond level-1 and making strides towards level-2.", "keywords": "trustworthy machine learning;LLM;causal reasoning", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Haoang Chi;He Li;Wenjing Yang;Feng Liu;Long Lan;Xiaoguang Ren;Tongliang Liu;Bo Han", "authorids": "~Haoang_Chi1;~He_Li14;~Wenjing_Yang1;~Feng_Liu2;~Long_Lan2;~Xiaoguang_Ren1;~Tongliang_Liu1;~Bo_Han1", "gender": "M;F;M;M;M;M;M;M", "homepage": ";https://www.researchgate.net/scientific-contributions/Wen-Jing-Yang-2056467943;https://fengliu90.github.io/index.html;https://lan-long.github.io/;;https://tongliang-liu.github.io/;https://bhanml.github.io/;https://openreview.net/profile/", "dblp": "284/9320;48/3396-2;77/1318-3;124/2136.html;;150/6667;241/0472-3;", "google_scholar": ";;https://scholar.google.com/citations?hl=en;https://scholar.google.com.au/citations?user=huVW6Y8AAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.au/citations?user=EiLdZ_YAAAAJ;nTNjqHwAAAAJ;https://scholar.google.cz/citations?user=pPmfNVkAAAAJ", "orcid": ";;0000-0002-5005-9129;;;;;0009-0006-5709-4636", "linkedin": ";;alexfengliu;;;;;", "or_profile": "~Haoang_Chi1;~Wenjing_Yang1;~Feng_Liu2;~Long_Lan2;~Xiaoguang_Ren1;~Tongliang_Liu1;~bo_han2;~Li_He5", "aff": "Intelligent Game and Decision Laboratory, Beijing;National University of Defense Technology;University of Melbourne;National University of Defense Technology;Intelligent Game and Decision Lab;Mohamed bin Zayed University of Artificial Intelligence;MBZUAI;National University of Defense Technology", "aff_domain": "nudt.edu.cn;nudt.edu.cn;unimelb.edu.au;nudt.edu.cn;nudt.edu.cn;mbzuai.ac.ae;mbzuai.ac.ae;nudt.edu.cn", "position": "PhD student;Associate Professor;Assistant Professor;Associate Professor;Full Professor;Affiliated Associate Professor;Researcher;MS student", "bibtex": "@inproceedings{\nchi2024unveiling,\ntitle={Unveiling Causal Reasoning in Large Language Models: Reality or Mirage?},\nauthor={Haoang Chi and He Li and Wenjing Yang and Feng Liu and Long Lan and Xiaoguang Ren and Tongliang Liu and Bo Han},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=1IU3P8VDbn}\n}", "github": "", "reviewers": "ZNie;ZgYU;NPEc;yorJ", "pdf_size": 1484019, "rating": "5;6;6;6", "confidence": "3;4;3;4", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "3;3;3;3", "wc_summary": "143;223;70;77", "wc_strengths": "133;227;41;63", "wc_weaknesses": "223;639;110;111", "wc_questions": "359;165;2;29", "wc_limitations": "10;76;48;1", "wc_review": "868;1330;271;281", "wc_reply_reviewers": "407;114;21;35", "wc_reply_authors": "614;124;121;124", "reply_reviewers": "1;1;1;1", "reply_authors": "4;2;2;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 128.25, 61.67404235170579 ], "wc_strengths_avg": [ 116.0, 72.53275122315436 ], "wc_weaknesses_avg": [ 270.75, 217.51364899702273 ], "wc_questions_avg": [ 138.75, 141.37251324072867 ], "wc_limitations_avg": [ 33.75, 30.102948360584218 ], "wc_review_avg": [ 687.5, 442.7473884733822 ], "wc_reply_reviewers_avg": [ 144.25, 155.7873149521488 ], "wc_reply_authors_avg": [ 245.75, 212.61276419820143 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1961343737582716924&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "nudt.edu.cn;nudt.edu.cn;unimelb.edu.au;nudt.edu.cn;nudt.edu.cn;mbzuai.ac.ae;mbzuai.ac.ae;nudt.edu.cn", "author_num": 8, "aff_unique_index": "0;1;2;1;3;4;4;1", "aff_unique_norm": "Intelligent Game and Decision Laboratory;National University of Defense Technology;University of Melbourne;Intelligent Game and Decision Lab;Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": "Intelligent Game and Decision Laboratory;;;Intelligent Game and Decision Lab;", "aff_unique_url": ";http://www.nudt.edu.cn/;https://www.unimelb.edu.au;;https://mbzuai.ac.ae", "aff_unique_abbr": ";NUDT;UniMelb;;MBZUAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;3;3;0", "aff_country_unique": "China;Australia;;United Arab Emirates" }, { "title": "Diffusion Policy Attacker: Crafting Adversarial Attacks for Diffusion-based Policies", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96871", "id": "1L5vaNIoK5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=1L5vaNIoK5", "openreview": "https://openreview.net/forum?id=1L5vaNIoK5", "poster": "", "project": "", "author_site": "Yipu Chen, Haotian Xue, Yongxin Chen", "tldr": "", "abstract": "Diffusion models have emerged as a promising approach for behavior cloning (BC), leveraging their exceptional ability to model multi-modal distributions. Diffusion policies (DP) have elevated BC performance to new heights, demonstrating robust efficacy across diverse tasks, coupled with their inherent flexibility and ease of implementation. Despite the increasing adoption of Diffusion Policies (DP) as a foundation for policy generation, the critical issue of safety remains largely unexplored. While previous attempts have targeted deep policy networks, DP used diffusion models as the policy network, making it ineffective to be attacked using previous methods because of its chained structure and randomness injected. In this paper, we undertake a comprehensive examination of DP safety concerns by introducing adversarial scenarios, encompassing offline and online attacks, global and patch-based attacks. We propose DP-Attacker, a suite of algorithms that can craft effective adversarial attacks across all aforementioned scenarios. We conduct attacks on pre-trained diffusion policies across various manipulation tasks. Through extensive experiments, we demonstrate that DP-Attacker has the capability to significantly decrease the success rate of DP for all scenarios. Particularly in offline scenarios, we exhibit the generation of highly transferable perturbations applicable to all frames. Furthermore, we illustrate the creation of adversarial physical patches that, when applied to the environment, effectively deceive the model. Video results are\nput in: https://sites.google.com/view/dp-attacker-videos/.", "keywords": "Diffusion Model;Adversarial Attacks;Robot Learning", "primary_area": "generative_models", "supplementary_material": "/attachment/463b68d0420ac8a8a150970a1d9c895b3863ed95.zip", "author": "Yipu Chen;Haotian Xue;Yongxin Chen", "authorids": "~Yipu_Chen1;~Haotian_Xue1;~Yongxin_Chen1", "gender": "M;M;M", "homepage": "https://github.com/peterchenyipu;https://yongxin.ae.gatech.edu/;https://xavihart.github.io", "dblp": ";;", "google_scholar": ";X8BYiV4AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;", "linkedin": ";;haotian-xue-gatech/", "or_profile": "~Yipu_Chen1;~Yongxin_Chen1;~Xue_Haotian1", "aff": "Georgia Institute of Technology;Georgia Institute of Technology;NVIDIA", "aff_domain": "gatech.edu;gatech.edu;nvidia.com", "position": "Undergrad student;Associate Professor;Research Intern", "bibtex": "@inproceedings{\nchen2024diffusion,\ntitle={Diffusion Policy Attacker: Crafting Adversarial Attacks for Diffusion-based Policies},\nauthor={Yipu Chen and Haotian Xue and Yongxin Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=1L5vaNIoK5}\n}", "github": "", "reviewers": "fJsu;7zbu;69xg;kPb5;FStp;7kiH", "pdf_size": 5912937, "rating": "4;4;4;5;6;7", "confidence": "4;3;4;3;4;4", "soundness": "3;3;3;4;3;3", "novelty": "2;2;3;2;2;3", "presentation": "3;3;3;3;3;3", "wc_summary": "37;50;42;117;83;80", "wc_strengths": "26;24;80;21;35;83", "wc_weaknesses": "177;64;15;56;45;140", "wc_questions": "20;43;17;200;174;104", "wc_limitations": "49;1;1;3;37;17", "wc_review": "309;182;155;397;374;424", "wc_reply_reviewers": "48;69;0;122;40;17", "wc_reply_authors": "167;87;0;27;78;0", "reply_reviewers": "1;1;0;1;1;1", "reply_authors": "2;2;1;2;2;1", "rating_avg": [ 5.0, 1.1547005383792515 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.1666666666666665, 0.3726779962499649 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 68.16666666666667, 28.079747545557137 ], "wc_strengths_avg": [ 44.833333333333336, 26.289520514625007 ], "wc_weaknesses_avg": [ 82.83333333333333, 56.63160680122796 ], "wc_questions_avg": [ 93.0, 72.72780302836965 ], "wc_limitations_avg": [ 18.0, 18.823743871327334 ], "wc_review_avg": [ 306.8333333333333, 104.0919732202675 ], "wc_reply_reviewers_avg": [ 49.333333333333336, 39.23292268264272 ], "wc_reply_authors_avg": [ 59.833333333333336, 58.86543033809307 ], "reply_reviewers_avg": [ 0.8333333333333334, 0.372677996249965 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3061862178478973, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18188479573926500799&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "gatech.edu;gatech.edu;nvidia.com", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Georgia Institute of Technology;NVIDIA", "aff_unique_dep": ";NVIDIA Corporation", "aff_unique_url": "https://www.gatech.edu;https://www.nvidia.com", "aff_unique_abbr": "Georgia Tech;NVIDIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Multi-view Masked Contrastive Representation Learning for Endoscopic Video Analysis", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96870", "id": "1M67AdMBbg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=1M67AdMBbg", "openreview": "https://openreview.net/forum?id=1M67AdMBbg", "poster": "/media/PosterPDFs/NeurIPS%202024/96870.png?t=1731551871.9562638", "project": "", "author_site": "Kai Hu, Ye Xiao, Yuan Zhang, Xieping Gao", "tldr": "", "abstract": "Endoscopic video analysis can effectively assist clinicians in disease diagnosis and treatment, and has played an indispensable role in clinical medicine. Unlike regular videos, endoscopic video analysis presents unique challenges, including complex camera movements, uneven distribution of lesions, and concealment, and it typically relies on contrastive learning in self-supervised pretraining as its mainstream technique. However, representations obtained from contrastive learning enhance the discriminability of the model but often lack fine-grained information, which is suboptimal in the pixel-level prediction tasks. In this paper, we develop a Multi-view Masked Contrastive Representation Learning (M$^2$CRL) framework for endoscopic video pre-training. Specifically, we propose a multi-view mask strategy for addressing the challenges of endoscopic videos. We utilize the frame-aggregated attention guided tube mask to capture global-level spatiotemporal sensitive representation from the global views, while the random tube mask is employed to focus on local variations from the local views. Subsequently, we combine multi-view mask modeling with contrastive learning to obtain endoscopic video representations that possess fine-grained perception and holistic discriminative capabilities simultaneously. The proposed M$^2$CRL is pre-trained on 7 publicly available endoscopic video datasets and fine-tuned on 3 endoscopic video datasets for 3 downstream tasks. Notably, our M$^2$CRL significantly outperforms the current state-of-the-art self-supervised endoscopic pre-training methods, e.g., Endo-FM (3.5% F1 for classification, 7.5% Dice for segmentation, and 2.2% F1 for detection) and other self-supervised methods, e.g., VideoMAE V2 (4.6% F1 for classification, 0.4% Dice for segmentation, and 2.1% F1 for detection).", "keywords": "Endoscopic Video Analysis;Self-supervised Learning;Multi-view Mask Modeling;Contrastive Learning", "primary_area": "machine_vision", "supplementary_material": "", "author": "Kai Hu;Ye Xiao;Yuan Zhang;Xieping Gao", "authorids": "~Kai_Hu6;~Ye_Xiao3;~Yuan_Zhang27;~Xieping_Gao1", "gender": "M;F;F;M", "homepage": ";https://github.com/Dorothy-h357h;https://www.researchgate.net/profile/Yuan-Zhang-260;https://cise.hunnu.edu.cn/info/1078/4071.htm", "dblp": "57/6633-2.html;;;94/4344.html", "google_scholar": "https://scholar.google.com.hk/citations?hl=zh-CN;;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Kai_Hu6;~Ye_Xiao3;~Yuan_Zhang27;~Xieping_Gao1", "aff": "Xiangtan University;Xiangtan University;Xiangtan University;Hunan Normal University", "aff_domain": "xtu.edu.cn;xtu.edu.cn;xtu.edu.cn;hunnu.edu.cn", "position": "Full Professor;MS student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nhu2024multiview,\ntitle={Multi-view Masked Contrastive Representation Learning for Endoscopic Video Analysis},\nauthor={Kai Hu and Ye Xiao and Yuan Zhang and Xieping Gao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=1M67AdMBbg}\n}", "github": "", "reviewers": "wGJR;R5Z9;u6Br;BGWe", "pdf_size": 16224661, "rating": "4;5;6;7", "confidence": "5;3;4;4", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "3;2;3;4", "wc_summary": "95;93;45;75", "wc_strengths": "51;42;152;76", "wc_weaknesses": "441;32;61;78", "wc_questions": "10;97;34;178", "wc_limitations": "8;10;38;7", "wc_review": "605;274;330;414", "wc_reply_reviewers": "483;15;34;20", "wc_reply_authors": "3725;35;24;24", "reply_reviewers": "3;1;1;1", "reply_authors": "8;2;2;2", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 77.0, 20.049937655763422 ], "wc_strengths_avg": [ 80.25, 43.25722483007896 ], "wc_weaknesses_avg": [ 153.0, 167.08830000930647 ], "wc_questions_avg": [ 79.75, 65.01682474559951 ], "wc_limitations_avg": [ 15.75, 12.891373084353738 ], "wc_review_avg": [ 405.75, 125.36421937698172 ], "wc_reply_reviewers_avg": [ 138.0, 199.3075512869495 ], "wc_reply_authors_avg": [ 952.0, 1600.9985946277404 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 3.5, 2.598076211353316 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.3162277660168379, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:iTJ--y0Er-0J:scholar.google.com/&scioq=Multi-view+Masked+Contrastive+Representation+Learning+for+Endoscopic+Video+Analysis&hl=en&as_sdt=0,33", "gs_version_total": 0, "email": "xtu.edu.cn;xtu.edu.cn;xtu.edu.cn;hunnu.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Xiangtan University;Hunan Normal University", "aff_unique_dep": ";", "aff_unique_url": "http://www.xtu.edu.cn/;http://www.hnu.edu.cn", "aff_unique_abbr": "XTU;HNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "CryoSPIN: Improving Ab-Initio Cryo-EM Reconstruction with Semi-Amortized Pose Inference", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96869", "id": "1MCseWaFZb", "proceeding": "", "pdf": "https://openreview.net/pdf?id=1MCseWaFZb", "openreview": "https://openreview.net/forum?id=1MCseWaFZb", "poster": "/media/PosterPDFs/NeurIPS%202024/96869.png?t=1731543391.873724", "project": "", "author_site": "Shayan Shekarforoush, David Lindell, Marcus Brubaker, David Fleet", "tldr": "", "abstract": "Cryo-EM is an increasingly popular method for determining the atomic resolution 3D structure of macromolecular complexes (eg, proteins) from noisy 2D images captured by an electron microscope. The computational task is to reconstruct the 3D density of the particle, along with 3D pose of the particle in each 2D image, for which the posterior pose distribution is highly multi-modal. Recent developments in cryo-EM have focused on deep learning for which amortized inference has been used to predict pose. Here, we address key problems with this approach, and propose a new semi-amortized method, cryoSPIN, in which reconstruction begins with amortized inference and then switches to a form of auto-decoding to refine poses locally using stochastic gradient descent. Through evaluation on synthetic datasets, we demonstrate that cryoSPIN is able to handle multi-modal pose distributions during the amortized inference stage, while the later, more flexible stage of direct pose optimization yields faster and more accurate convergence of poses compared to baselines. On experimental data, we show that cryoSPIN outperforms the state-of-the-art cryoAI in speed and reconstruction quality.", "keywords": "Cryo-EM 3D reconstruction;Pose estimation;Semi-Amortization;Multi-choice learning", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "/attachment/b7926b2e9963f6868e1e344d364d4e5751f68206.zip", "author": "Shayan Shekarforoush;David B. Lindell;Marcus A Brubaker;David J. Fleet", "authorids": "~Shayan_Shekarforoush1;~David_B._Lindell1;~Marcus_A_Brubaker1;~David_J._Fleet1", "gender": "M;M;M;M", "homepage": "https://shekshaa.github.io/;https://www.davidlindell.com/;https://mbrubake.github.io;http://www.cs.toronto.edu/~fleet/index.html", "dblp": "232/3275;170/2608;18/3060;07/2099", "google_scholar": "https://scholar.google.de/citations?user=ABanXIYAAAAJ;_m-BTtAAAAAJ;https://scholar.google.ca/citations?user=x2wyjkAAAAAJ;https://scholar.google.com.tw/citations?user=njOmQFsAAAAJ", "orcid": ";;0000-0002-7892-9026;", "linkedin": ";;marcus-brubaker-86538011/;", "or_profile": "~Shayan_Shekarforoush1;~David_B._Lindell1;~Marcus_A_Brubaker1;~David_J._Fleet1", "aff": "University of Toronto;University of Toronto;Borealis AI;Department of Computer Science, University of Toronto", "aff_domain": "toronto.edu;cs.toronto.edu;borealisai.com;cs.toronto.edu", "position": "PhD student;Assistant Professor;Academic Advisor;Full Professor", "bibtex": "@inproceedings{\nshekarforoush2024cryospin,\ntitle={Cryo{SPIN}: Improving Ab-Initio Cryo-{EM} Reconstruction with Semi-Amortized Pose Inference},\nauthor={Shayan Shekarforoush and David B. Lindell and Marcus A Brubaker and David J. Fleet},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=1MCseWaFZb}\n}", "github": "", "reviewers": "diCb;xz2Y;Kgto", "pdf_size": 11527420, "rating": "4;6;7", "confidence": "4;4;5", "soundness": "2;3;3", "novelty": "2;2;3", "presentation": "2;3;3", "wc_summary": "64;133;163", "wc_strengths": "64;85;51", "wc_weaknesses": "74;206;120", "wc_questions": "66;210;142", "wc_limitations": "1;28;13", "wc_review": "269;662;489", "wc_reply_reviewers": "114;93;14", "wc_reply_authors": "342;213;0", "reply_reviewers": "2;2;1", "reply_authors": "2;2;1", "rating_avg": [ 5.666666666666667, 1.247219128924647 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 120.0, 41.44876355212541 ], "wc_strengths_avg": [ 66.66666666666667, 14.007934259633796 ], "wc_weaknesses_avg": [ 133.33333333333334, 54.70730197047638 ], "wc_questions_avg": [ 139.33333333333334, 58.8179866669674 ], "wc_limitations_avg": [ 14.0, 11.045361017187261 ], "wc_review_avg": [ 473.3333333333333, 160.82357483348707 ], "wc_reply_reviewers_avg": [ 73.66666666666667, 43.05293898859352 ], "wc_reply_authors_avg": [ 185.0, 141.01772938180503 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.7559289460184544, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10912508024939789418&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "toronto.edu;cs.toronto.edu;borealisai.com;cs.toronto.edu", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Toronto;Borealis AI", "aff_unique_dep": ";", "aff_unique_url": "https://www.utoronto.ca;https://www.borealisai.com", "aff_unique_abbr": "U of T;Borealis AI", "aff_campus_unique_index": "1", "aff_campus_unique": ";Toronto", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Canada" }, { "id": "1MQXBnEbE8", "title": "M$^3$-Impute: Mask-guided Representation Learning for Missing Value Imputation", "track": "main", "status": "Reject", "tldr": "", "abstract": "Missing values are a common problem that poses significant challenges to data analysis and machine learning. This problem necessitates the development of an effective imputation method to fill in the missing values accurately, thereby enhancing the overall quality and utility of the datasets. Existing imputation methods, however, fall short of considering the 'missingness' information in the data during initialization and modeling the entangled feature and sample correlations explicitly during the learning process, thus leading to inferior performance. We propose M$^3$-Impute, which aims to leverage the missingness information and such correlations with novel masking schemes. M$^3$-Impute first models the data as a bipartite graph and uses an off-the-shelf graph neural network, equipped with a refined initialization process, to learn node embeddings. They are then optimized through M$^3$-Impute\u2019s novel feature correlation unit (FCU) and sample correlation unit (SCU) that enable explicit consideration of feature and sample correlations for imputation. Experiment results on 15 benchmark datasets under three different missing patterns show the effectiveness of M$^3$-Impute by achieving 13 best and 2 second-best MAE scores on average.", "keywords": "Missing Value Imputation;Graph Representation Learning;Data Correlations", "primary_area": "other", "supplementary_material": "/attachment/2c98b701d612831d8e8e548f82fe04d2cd53f830.zip", "author": "Zhongyi Yu;Zhenghao Wu;Shuhan Zhong;Weifeng Su;S.-H. Chan;Chul-Ho Lee;Weipeng Zhuo", "authorids": "~Zhongyi_Yu1;~Zhenghao_Wu1;~Shuhan_Zhong1;~Weifeng_Su1;~S.-H._Chan1;~Chul-Ho_Lee1;~Weipeng_Zhuo1", "gender": ";M;M;M;M;;M", "homepage": "https://github.com/Alleinx;https://ecwuuuuu.com;https://github.com/zshhans;https://fst.uic.edu.cn/en/faculty/faculty.htm#/wfsu/en;https://home.cse.ust.hk/~gchan/;https://sites.google.com/site/chulholeesite/;https://stevezhuo.github.io/", "dblp": "261/5911;;218/8083;40/38;c/ShuengHanGaryChan;70/1627;119/0329.html", "google_scholar": ";gBi_1mgAAAAJ;p-w2d-QAAAAJ;PuLs4t4AAAAJ;https://scholar.google.com.tw/citations?user=uiCSOycAAAAJ;12czasEAAAAJ;A1OW8DMAAAAJ", "orcid": "0000-0003-2859-7008;0000-0002-8323-7841;0000-0003-4037-4288;;0000-0003-4207-764X;0000-0002-4778-8996;0000-0002-1810-7071", "linkedin": ";;;;;;", "or_profile": "~Zhongyi_Yu1;~Zhenghao_Wu1;~Shuhan_Zhong1;~Weifeng_Su1;~S.-H._Chan1;~Chul-Ho_Lee1;~Weipeng_Zhuo1", "aff": "BNBU;Beijing Normal University-Hong Kong Baptist University United International College;Hong Kong University of Science and Technology;BNU-HKBU United International College;Hong Kong University of Science and Technology;Texas State University;Beijing Normal-Hong Kong Baptist University", "aff_domain": "uic.edu.cn;uic.edu.cn;cse.ust.hk;uic.edu.cn;ust.hk;txstate.edu;uic.edu.cn", "position": "Researcher;Researcher;PhD student;Full Professor;Full Professor;Assistant Professor;Assistant Professor", "bibtex": "@misc{\nanonymous2024mimpute,\ntitle={M\\${\\textasciicircum}3\\$-Impute: Mask-guided Representation Learning for Missing Value Imputation},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=1MQXBnEbE8}\n}", "github": "", "project": "", "reviewers": "E1Qa;LMmS;WjjR;Bqr5;f6SV;LUcW;JuhZ", "site": "https://openreview.net/forum?id=1MQXBnEbE8", "pdf_size": 510548, "rating": "5;5;6;6;6;6;7", "confidence": "3;4;3;3;4;3;3", "soundness": "2;3;3;3;3;3;3", "novelty": "2;2;3;4;3;2;3", "presentation": "2;3;2;2;3;3;3", "wc_summary": "109;39;78;85;98;52;65", "wc_strengths": "108;56;123;31;44;71;22", "wc_weaknesses": "65;350;251;25;130;36;72", "wc_questions": "76;63;286;94;68;265;9", "wc_limitations": "48;1;98;1;21;1;35", "wc_review": "406;509;836;236;361;425;203", "wc_reply_reviewers": "0;18;81;0;21;0;1", "wc_reply_authors": "0;0;0;0;0;0;0", "reply_reviewers": "0;1;1;0;1;0;1", "reply_authors": "1;1;1;1;1;1;1", "rating_avg": [ 5.857142857142857, 0.6388765649999398 ], "confidence_avg": [ 3.2857142857142856, 0.4517539514526256 ], "soundness_avg": [ 2.857142857142857, 0.34992710611188266 ], "novelty_avg": [ 2.7142857142857144, 0.6998542122237652 ], "presentation_avg": [ 2.5714285714285716, 0.49487165930539345 ], "wc_summary_avg": [ 75.14285714285714, 23.049193177255663 ], "wc_strengths_avg": [ 65.0, 35.40782802876061 ], "wc_weaknesses_avg": [ 132.71428571428572, 113.6658188170346 ], "wc_questions_avg": [ 123.0, 99.57337567270251 ], "wc_limitations_avg": [ 29.285714285714285, 32.89252753053403 ], "wc_review_avg": [ 425.14285714285717, 194.73016390557956 ], "wc_reply_reviewers_avg": [ 17.285714285714285, 27.348096228932253 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5714285714285714, 0.4948716593053935 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.3535533905932738, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:tWEKXhT8fUkJ:scholar.google.com/&scioq=M%24%5E3%24-Impute:+Mask-guided+Representation+Learning+for+Missing+Value+Imputation&hl=en&as_sdt=0,14", "gs_version_total": 3, "aff_unique_index": "0;1;2;3;2;4;5", "aff_unique_norm": "Beijing Normal University;Beijing Normal University-Hong Kong Baptist University United International College;Hong Kong University of Science and Technology;United International College;Texas State University;Beijing Normal-Hong Kong Baptist University", "aff_unique_dep": ";;;;;", "aff_unique_url": "https://www.bnu.edu.cn;https://www.uic.edu.hk;https://www.ust.hk;https://www.uic.edu.hk;https://www.txstate.edu;https://www.bnu.hk", "aff_unique_abbr": "BNU;UIC;HKUST;UIC;TXST;BNU-HKBU", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;1;0", "aff_country_unique": "China;United States" }, { "title": "Causal Deciphering and Inpainting in Spatio-Temporal Dynamics via Diffusion Model", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96868", "id": "1ONdF1JHyJ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=1ONdF1JHyJ", "openreview": "https://openreview.net/forum?id=1ONdF1JHyJ", "poster": "/media/PosterPDFs/NeurIPS%202024/96868.png?t=1733232248.688173", "project": "", "author_site": "Yifan Duan, Jian Zhao, pengcheng, Junyuan Mao, Hao Wu, Jingyu Xu, shilong wang, Caoyuan Ma, Kai Wang, Kun Wang, Xuelong Li", "tldr": "", "abstract": "Spatio-temporal (ST) prediction has garnered a De facto attention in earth sciences, such as meteorological prediction, human mobility perception. However, the scarcity of data coupled with the high expenses involved in sensor deployment results in notable data imbalances. Furthermore, models that are excessively customized and devoid of causal connections further undermine the generalizability and interpretability. To this end, we establish a causal framework for ST predictions, termed CaPaint, which targets to identify causal regions in data and endow model with causal reasoning ability in a two-stage process. Going beyond this process, we utilize the back-door adjustment to specifically address the sub-regions identified as non-causal in the upstream phase. Specifically, we employ a novel image inpainting technique. By using a fine-tuned unconditional Diffusion Probabilistic Model (DDPM) as the generative prior, we in-fill the masks defined as environmental parts, offering the possibility of reliable extrapolation for potential data distributions. CaPaint overcomes the high complexity dilemma of optimal ST causal discovery models by reducing the data generation complexity from exponential to quasi-linear levels. Extensive experiments conducted on five real-world ST benchmarks demonstrate that integrating the CaPaint concept allows models to achieve improvements ranging from 4.3% to 77.3%. Moreover, compared to traditional mainstream ST augmenters, CaPaint underscores the potential of diffusion models in ST enhancement, offering a novel paradigm for this field. Our project is available at https://anonymous.4open.science/r/12345-DFCC.", "keywords": "spatio-temporal data mining; causal inference;", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Yifan Duan;Jian Zhao;pengcheng;Junyuan Mao;Hao Wu;Jingyu Xu;shilong wang;Caoyuan Ma;Kai Wang;Kun Wang;Xuelong Li", "authorids": "~Yifan_Duan1;~Jian_Zhao2;~pengcheng2;~Junyuan_Mao2;~Hao_Wu39;~Jingyu_Xu1;~shilong_wang3;~Caoyuan_Ma1;~Kai_Wang8;~Kun_Wang15;~Xuelong_Li2", "gender": "M;M;M;M;M;M;M;M;M;M;M", "homepage": ";https://blog.csdn.net/cp1314971?spm=1000.2115.3001.5343;;https://easylearningscores.github.io/;https://kevinxu-01.github.io/home/;;https://miles629.github.io;https://kaiwang960112.github.io/;http://home.ustc.edu.cn/~wk520529/#home;;https://zhaoj9014.github.io/", "dblp": "231/3980;;;111;;;223/7396;78/2022-36;;l/XuelongLi;70/2932-6", "google_scholar": ";;;HdXMhfcAAAAJ;;;https://scholar.google.com/citations?view_op=list_works;i2II0XIAAAAJ;UnyqjWQAAAAJ;ahUibskAAAAJ;https://scholar.google.com.sg/citations?hl=en", "orcid": "0009-0006-5855-6847;;0009-0001-3450-0885;0009-0008-4084-1409;0009-0009-2586-516X;0009-0001-0734-6085;0009-0008-2963-6244;0000-0002-1154-5175;0000-0003-0602-169X;;0000-0002-3508-756X", "linkedin": ";;;;;;;;;;jian-zhao-951089140", "or_profile": "~Yifan_Duan1;~pengcheng2;~Junyuan_Mao2;~Hao_Wu39;~Jingyu_Xu1;~shilong_wang3;~Caoyuan_Ma1;~Kai_Wang8;~Kun_Wang15;~Xuelong_Li2;~Jian_Zhao1", "aff": "University of Science and Technology of China;Beijing Forestry University;University of Science and Technology of China;University of Science and Technology of China;Wuhan University;University of Science and Technology of China;Wuhan University;National University of Singapore;University of Science and Technology of China;Northwestern Polytechnical University;Northwest Polytechnical University ", "aff_domain": "ustc.edu.cn;bjfu.edu.cn;ustc.edu.cn;ustc.edu.cn;whu.edu.cn;ustc.edu.cn;whu.edu.cn;u.nus.edu;ustc.edu.cn;nwpu.edu.cn;nwpu.edu.cn", "position": "MS student;MS student;Undergrad student;MS student;MS student;MS student;MS student;PhD student;PhD student;Full Professor;Researcher", "bibtex": "@inproceedings{\nduan2024causal,\ntitle={Causal Deciphering and Inpainting in Spatio-Temporal Dynamics via Diffusion Model},\nauthor={Yifan Duan and Jian Zhao and pengcheng and Junyuan Mao and Hao Wu and Jingyu Xu and shilong wang and Caoyuan Ma and Kai Wang and Kun Wang and Xuelong Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=1ONdF1JHyJ}\n}", "github": "", "reviewers": "s4LS;nGNj;botx;mbnW", "pdf_size": 0, "rating": "5;6;6;8", "confidence": "3;2;2;4", "soundness": "2;3;3;4", "novelty": "2;3;3;4", "presentation": "2;3;3;3", "wc_summary": "30;48;157;66", "wc_strengths": "55;76;70;58", "wc_weaknesses": "85;59;37;73", "wc_questions": "2;4;5;92", "wc_limitations": "16;6;11;35", "wc_review": "188;193;280;324", "wc_reply_reviewers": "30;85;0;156", "wc_reply_authors": "26;70;0;108", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;1;2", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 75.25, 48.884430036566854 ], "wc_strengths_avg": [ 64.75, 8.584142356694699 ], "wc_weaknesses_avg": [ 63.5, 17.853571071357123 ], "wc_questions_avg": [ 25.75, 38.26470305647229 ], "wc_limitations_avg": [ 17.0, 10.977249200050075 ], "wc_review_avg": [ 246.25, 57.90671377310234 ], "wc_reply_reviewers_avg": [ 67.75, 59.37328944904434 ], "wc_reply_authors_avg": [ 51.0, 41.340053217188775 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.6225430174794673, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5436455587334617412&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "ustc.edu.cn;bjfu.edu.cn;ustc.edu.cn;ustc.edu.cn;whu.edu.cn;ustc.edu.cn;whu.edu.cn;u.nus.edu;ustc.edu.cn;nwpu.edu.cn;nwpu.edu.cn", "author_num": 11, "aff_unique_index": "0;1;0;0;2;0;2;3;0;4;5", "aff_unique_norm": "University of Science and Technology of China;Beijing Forestry University;Wuhan University;National University of Singapore;Northwestern Polytechnical University;Northwest Polytechnical University", "aff_unique_dep": ";;;;;", "aff_unique_url": "http://www.ustc.edu.cn;https://www.bfu.edu.cn;http://www.whu.edu.cn/;https://www.nus.edu.sg;https://www.nwpu.edu.cn;https://www.nwpu.edu.cn", "aff_unique_abbr": "USTC;BFU;WHU;NUS;NWPU;NWPU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;1;0;0;0", "aff_country_unique": "China;Singapore" }, { "title": "On the Computational Landscape of Replicable Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96867", "id": "1PCsDNG6Jg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=1PCsDNG6Jg", "openreview": "https://openreview.net/forum?id=1PCsDNG6Jg", "poster": "", "project": "", "author_site": "Alkis Kalavasis, Amin Karbasi, Grigoris Velegkas, Felix Zhou", "tldr": "", "abstract": "We study computational aspects of algorithmic replicability, a notion of stability introduced by Impagliazzo, Lei,\nPitassi, and Sorrell [STOC, 2022]. Motivated by a recent line of work that established strong statistical connections between\nreplicability and other notions of learnability such as online learning, private learning, and SQ learning, we aim to\nunderstand better the computational connections between replicability and these learning paradigms.\nOur first result shows that there is a concept class that is efficiently replicably PAC learnable, but, under standard\ncryptographic assumptions, no efficient online learner exists for this class. Subsequently, we design an efficient\nreplicable learner for PAC learning parities when the marginal distribution is far from uniform, making progress on a\nquestion posed by Impagliazzo et al. [STOC, 2022]. To obtain this result, we design a replicable lifting framework inspired by\nBlanc, Lange, Malik, and Tan [STOC, 2023], that transforms in a black-box manner efficient replicable PAC learners under the\nuniform marginal distribution over the Boolean hypercube to replicable PAC learners under any marginal distribution,\nwith sample and time complexity that depends on a certain measure of the complexity of the distribution. \nFinally, we show that any pure DP learner can be transformed in a black-box manner to a replicable learner, with time complexity polynomial in the confidence and accuracy parameters, but exponential in the representation dimension of the underlying hypothesis class.", "keywords": "Statistical Learning Theory;Replicability;Reproducibility", "primary_area": "learning_theory", "supplementary_material": "", "author": "Alkis Kalavasis;Amin Karbasi;Grigoris Velegkas;Felix Zhou", "authorids": "~Alkis_Kalavasis1;~Amin_Karbasi3;~Grigoris_Velegkas1;~Felix_Zhou1", "gender": "M;;M;", "homepage": "https://alkisk.github.io/;;;", "dblp": "269/9425;;254/1885;", "google_scholar": "NgVIFJwAAAAJ;;Ty1kgP0AAAAJ;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Alkis_Kalavasis1;~Amin_Karbasi3;~Grigoris_Velegkas1;~Felix_Zhou1", "aff": "Yale University;;Yale University;", "aff_domain": "yale.edu;;yale.edu;", "position": "Postdoc;;PhD student;", "bibtex": "@inproceedings{\nkalavasis2024on,\ntitle={On the Computational Landscape of Replicable Learning},\nauthor={Alkis Kalavasis and Amin Karbasi and Grigoris Velegkas and Felix Zhou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=1PCsDNG6Jg}\n}", "github": "", "reviewers": "9ean;iQWb;eJSY;npit", "pdf_size": 563349, "rating": "5;6;6;7", "confidence": "4;4;4;3", "soundness": "1;4;4;3", "novelty": "3;3;3;3", "presentation": "3;4;4;4", "wc_summary": "432;147;139;263", "wc_strengths": "165;121;26;184", "wc_weaknesses": "495;66;94;187", "wc_questions": "107;50;95;34", "wc_limitations": "1;1;1;1", "wc_review": "1200;385;355;669", "wc_reply_reviewers": "520;0;0;17", "wc_reply_authors": "462;0;0;0", "reply_reviewers": "2;0;0;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 1.224744871391589 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 245.25, 118.46175543186924 ], "wc_strengths_avg": [ 124.0, 61.0204883625164 ], "wc_weaknesses_avg": [ 210.5, 170.25348748263573 ], "wc_questions_avg": [ 71.5, 30.335622624235025 ], "wc_limitations_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 652.25, 339.14994840040885 ], "wc_reply_reviewers_avg": [ 134.25, 222.82097634648315 ], "wc_reply_authors_avg": [ 115.5, 200.05186827420533 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12144719895606613693&as_sdt=4005&sciodt=0,6&hl=en", "gs_version_total": 3, "email": "yale.edu;;yale.edu;", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Yale University", "aff_unique_dep": "", "aff_unique_url": "https://www.yale.edu", "aff_unique_abbr": "Yale", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "FastDrag: Manipulate Anything in One Step", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96866", "id": "1PNwacZYik", "proceeding": "", "pdf": "https://openreview.net/pdf?id=1PNwacZYik", "openreview": "https://openreview.net/forum?id=1PNwacZYik", "poster": "/media/PosterPDFs/NeurIPS%202024/96866.png?t=1731419782.3159819", "project": "", "author_site": "Xuanjia Zhao, Jian Guan, Congyi Fan, Dongli Xu, Youtian Lin, Haiwei Pan, Pengming Feng", "tldr": "", "abstract": "Drag-based image editing using generative models provides precise control over image contents, enabling users to manipulate anything in an image with a few clicks. However, prevailing methods typically adopt $n$-step iterations for latent semantic optimization to achieve drag-based image editing, which is time-consuming and limits practical applications. In this paper, we introduce a novel one-step drag-based image editing method, i.e., FastDrag, to accelerate the editing process. Central to our approach is a latent warpage function (LWF), which simulates the behavior of a stretched material to adjust the location of individual pixels within the latent space. This innovation achieves one-step latent semantic optimization and hence significantly promotes editing speeds. Meanwhile, null regions emerging after applying LWF are addressed by our proposed bilateral nearest neighbor interpolation (BNNI) strategy. This strategy interpolates these regions using similar features from neighboring areas, thus enhancing semantic integrity. Additionally, a consistency-preserving strategy is introduced to maintain the consistency between the edited and original images by adopting semantic information from the original image, saved as key and value pairs in self-attention module during diffusion inversion, to guide the diffusion sampling. Our FastDrag is validated on the DragBench dataset, demonstrating substantial improvements in processing time over existing methods, while achieving enhanced editing performance.", "keywords": "Diffusion Model;Drag-based Image Editing", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/a38ae5fd6f0668324e2189f129f3edfb63c4aea0.zip", "author": "Xuanjia Zhao;Jian Guan;Congyi Fan;Dongli Xu;Youtian Lin;Haiwei Pan;Pengming Feng", "authorids": "~Xuanjia_Zhao2;~Jian_Guan4;~Congyi_Fan1;~Dongli_Xu1;~Youtian_Lin1;~Haiwei_Pan1;~Pengming_Feng1", "gender": ";M;M;;M;M;M", "homepage": ";https://homepage.hrbeu.edu.cn/web/guanjian1;https://github.com/HEUfcy;;https://linyou.github.io;http://faculty.hrbeu.edu.cn/panhaiwei/zh_CN/index/70297/list/index.htm;", "dblp": ";58/2489-1.html;;;253/3893;77/3132.html;", "google_scholar": ";wf60G1sAAAAJ;;;https://scholar.google.com/citations?hl=zh-CN;;", "orcid": ";0000-0002-0945-1081;0009-0005-7896-9932;;0000-0002-9179-9406;0000-0001-9297-5662;0000-0001-5853-8100", "linkedin": ";;;;;;", "or_profile": "~Xuanjia_Zhao2;~Jian_Guan4;~Congyi_Fan1;~Dongli_Xu1;~Youtian_Lin1;~Haiwei_Pan1;~Pengming_Feng1", "aff": ";Harbin Engineering University;Harbin Engineering University;;VAST;Harbin Engineering University;CASC", "aff_domain": ";hrbeu.edu.cn;hrbeu.edu;;vastai3d.com;hrbeu.edu.cn;spacechina.com", "position": ";Associate Professor;Undergrad student;;Researcher;Full Professor;Full Professor", "bibtex": "@inproceedings{\nzhao2024fastdrag,\ntitle={FastDrag: Manipulate Anything in One Step},\nauthor={Xuanjia Zhao and Jian Guan and Congyi Fan and Dongli Xu and Youtian Lin and Haiwei Pan and Pengming Feng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=1PNwacZYik}\n}", "github": "", "reviewers": "hGqW;eL9M;YBFQ;Sik5", "pdf_size": 4695549, "rating": "4;5;6;7", "confidence": "1;4;4;4", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "3;3;3;3", "wc_summary": "203;54;79;40", "wc_strengths": "431;42;45;35", "wc_weaknesses": "530;91;142;31", "wc_questions": "503;5;61;175", "wc_limitations": "335;5;4;33", "wc_review": "2002;197;331;314", "wc_reply_reviewers": "0;0;59;32", "wc_reply_authors": "0;0;14;9", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;2;2", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.25, 1.299038105676658 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 94.0, 64.46316777819719 ], "wc_strengths_avg": [ 138.25, 169.05823700725145 ], "wc_weaknesses_avg": [ 198.5, 195.38231752131512 ], "wc_questions_avg": [ 186.0, 193.0 ], "wc_limitations_avg": [ 94.25, 139.48364599479038 ], "wc_review_avg": [ 711.0, 747.1422220702026 ], "wc_reply_reviewers_avg": [ 22.75, 24.67159297653883 ], "wc_reply_authors_avg": [ 5.75, 6.015604707757983 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.7745966692414834, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16638989125716325154&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": ";hrbeu.edu.cn;hrbeu.edu;;vastai3d.com;hrbeu.edu.cn;spacechina.com", "author_num": 7, "aff_unique_index": "0;0;1;0;2", "aff_unique_norm": "Harbin Engineering University;VAST;Chinese Academy of Sciences", "aff_unique_dep": ";;", "aff_unique_url": "http://www.heu.edu.cn;;http://www.cas.cn", "aff_unique_abbr": "HEU;;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China;" }, { "title": "BackdoorAlign: Mitigating Fine-tuning based Jailbreak Attack with Backdoor Enhanced Safety Alignment", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96865", "id": "1PcJ5Evta7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=1PcJ5Evta7", "openreview": "https://openreview.net/forum?id=1PcJ5Evta7", "poster": "/media/PosterPDFs/NeurIPS%202024/96865.png?t=1733416648.089654", "project": "", "author_site": "Jiongxiao Wang, Jiazhao LI, Yiquan Li, Xiangyu Qi, Junjie Hu, Sharon Li, Patrick McDaniel, Muhao Chen, Bo Li, Chaowei Xiao", "tldr": "", "abstract": "Despite the general capabilities of Large Language Models (LLMs) like GPT-4, these models still request fine-tuning or adaptation with customized data when meeting the specific business demands and intricacies of tailored use cases. However, this process inevitably introduces new safety threats, particularly against the Fine-tuning based Jailbreak Attack (FJAttack) under the setting of Language-Model-as-a-Service (LMaaS), where the model's safety has been significantly compromised by fine-tuning on users' uploaded examples that contain just a few harmful examples. Though potential defenses have been proposed that the service providers of LMaaS can integrate safety examples into the fine-tuning dataset to reduce safety issues, such approaches require incorporating a substantial amount of data, making it inefficient. To effectively defend against the FJAttack with limited safety examples under LMaaS, we propose the Backdoor Enhanced Safety Alignment method inspired by an analogy with the concept of backdoor attacks. In particular, service providers will construct prefixed safety examples with a secret prompt, acting as a \"backdoor trigger\". By integrating prefixed safety examples into the fine-tuning dataset, the subsequent fine-tuning process effectively acts as the \"backdoor attack\", establishing a strong correlation between the secret prompt and safety generations. Consequently, safe responses are ensured once service providers prepend this secret prompt ahead of any user input during inference. Our comprehensive experiments demonstrate that through the Backdoor Enhanced Safety Alignment with adding as few as 11 prefixed safety examples, the maliciously fine-tuned LLMs will achieve similar safety performance as the original aligned models without harming the benign performance. Furthermore, we also present the effectiveness of our method in a more practical setting where the fine-tuning data consists of both FJAttack examples and the fine-tuning task data.", "keywords": "Fine-tuning based Jailbreak Attack;Backdoor Attack;Safety Alignment for Large Language Models", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/c56c248f92f177512ef5bff1cdb88e1df75158e7.zip", "author": "Jiongxiao Wang;Jiazhao Li;Yiquan Li;Xiangyu Qi;Junjie Hu;Yixuan Li;Patrick McDaniel;Muhao Chen;Bo Li;Chaowei Xiao", "authorids": "~Jiongxiao_Wang1;~Jiazhao_Li1;~Yiquan_Li2;~Xiangyu_Qi2;~Junjie_Hu2;~Yixuan_Li1;~Patrick_McDaniel1;~Muhao_Chen1;~Bo_Li19;~Chaowei_Xiao2", "gender": ";M;M;F;M;M;F;M;M;M", "homepage": "https://jayfeather1024.github.io/jxwang.github.io/;https://jiazhaoli.github.io;https://unispac.github.io;http://pages.cs.wisc.edu/~sharonli/;http://www.patrickmcdaniel.org/;https://muhaochen.github.io/;http://boli.cs.illinois.edu/;https://xiaocw11.github.io/;https://junjiehu.github.io/;https://li-yiquan.github.io", "dblp": "322/5991;273/5334;274/2321;144/6087-1;m/PatrickDrewMcDaniel.html;173/2608;50/3402-26;150/3317;123/0773-1.html;239/9024", "google_scholar": "sIGapHMAAAAJ;dDJCGpYAAAAJ;9Za3rmkAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=AMGqrI0AAAAJ;k79yEZkAAAAJ;K8vJkTcAAAAJ;Juoqtj8AAAAJ;j-42gHYAAAAJ;", "orcid": ";0000-0001-8219-5638;;;0000-0003-2091-7484;0000-0003-0118-3147;;0000-0002-7043-4926;0000-0001-7137-7719;", "linkedin": ";jiazhao-l-b5111714a/;;liyixuan;;;;;junjie-hu-24b48b83/;", "or_profile": "~Jiongxiao_Wang1;~Jiazhao_Li1;~Xiangyu_Qi2;~Yixuan_Li1;~Patrick_McDaniel1;~Muhao_Chen1;~Bo_Li19;~chaowei_xiao1;~Junjie_Hu1;~Yiquan_LI1", "aff": "University of Wisconsin - Madison;University of Michigan - Ann Arbor;Princeton University;Cornell University;University of Wisconsin - Madison;University of Southern California;University of Illinois, Urbana Champaign;NVIDIA;University of Wisconsin, Madison;University of Wisconsin - Madison", "aff_domain": "wisc.edu;umich.edu;princeton.edu;cornell.edu;wisc.edu;usc.edu;illinois.edu;nvidia.com;wisc.edu;wisc.edu", "position": "PhD student;PhD student;PhD student;Graduate Student;Full Professor;Adjunct Professor;Assistant Professor;Researcher;Assistant Professor;Undergrad student", "bibtex": "@inproceedings{\nwang2024backdooralign,\ntitle={BackdoorAlign: Mitigating Fine-tuning based Jailbreak Attack with Backdoor Enhanced Safety Alignment},\nauthor={Jiongxiao Wang and Jiazhao Li and Yiquan Li and Xiangyu Qi and Junjie Hu and Yixuan Li and Patrick McDaniel and Muhao Chen and Bo Li and Chaowei Xiao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=1PcJ5Evta7}\n}", "github": "", "reviewers": "D4Wj;oEoJ;uBHD;bX8S", "pdf_size": 1274742, "rating": "5;6;6;8", "confidence": "4;4;4;5", "soundness": "3;3;3;4", "novelty": "3;3;3;4", "presentation": "3;3;3;4", "wc_summary": "124;49;39;52", "wc_strengths": "70;40;37;83", "wc_weaknesses": "95;121;132;444", "wc_questions": "36;3;2;639", "wc_limitations": "68;1;2;12", "wc_review": "393;214;212;1230", "wc_reply_reviewers": "0;28;107;40", "wc_reply_authors": "564;49;321;16", "reply_reviewers": "0;1;2;1", "reply_authors": "3;2;2;2", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 66.0, 33.830459648074545 ], "wc_strengths_avg": [ 57.5, 19.576771950451892 ], "wc_weaknesses_avg": [ 198.0, 142.66218840323458 ], "wc_questions_avg": [ 170.0, 271.12266596505725 ], "wc_limitations_avg": [ 20.75, 27.616797424755827 ], "wc_review_avg": [ 512.25, 420.8588688622351 ], "wc_reply_reviewers_avg": [ 43.75, 39.29615121102829 ], "wc_reply_authors_avg": [ 237.5, 222.58088417471973 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.9271726499455306, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1436738540667256897&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 2, "email": "wisc.edu;umich.edu;princeton.edu;cornell.edu;wisc.edu;usc.edu;illinois.edu;nvidia.com;wisc.edu;wisc.edu", "author_num": 10, "aff_unique_index": "0;1;2;3;0;4;5;6;7;0", "aff_unique_norm": "University of Wisconsin-Madison;University of Michigan;Princeton University;Cornell University;University of Southern California;University of Illinois Urbana-Champaign;NVIDIA;University of Wisconsin", "aff_unique_dep": ";;;;;;NVIDIA Corporation;", "aff_unique_url": "https://www.wisc.edu;https://www.umich.edu;https://www.princeton.edu;https://www.cornell.edu;https://www.usc.edu;https://illinois.edu;https://www.nvidia.com;https://www.wisc.edu", "aff_unique_abbr": "UW-Madison;UM;Princeton;Cornell;USC;UIUC;NVIDIA;UW", "aff_campus_unique_index": "0;1;0;3;4;0;0", "aff_campus_unique": "Madison;Ann Arbor;;Los Angeles;Urbana-Champaign", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Evidential Stochastic Differential Equations for Time-Aware Sequential Recommendation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96864", "id": "1PmsSugB87", "proceeding": "", "pdf": "https://openreview.net/pdf?id=1PmsSugB87", "openreview": "https://openreview.net/forum?id=1PmsSugB87", "poster": "/media/PosterPDFs/NeurIPS%202024/96864.png?t=1731299483.8097835", "project": "", "author_site": "Krishna Neupane, Ervine Zheng, Qi Yu", "tldr": "", "abstract": "Sequential recommender systems are designed to capture users' evolving interests over time. Existing methods typically assume a uniform time interval among consecutive user interactions and may not capture users' continuously evolving behavior in the short and long term. In reality, the actual time intervals of user interactions vary dramatically. Consequently, as the time interval between interactions increases, so does the uncertainty in user behavior. Intuitively, it is beneficial to establish a correlation between the interaction time interval and the model uncertainty to provide effective recommendations. To this end, we formulate a novel Evidential Neural Stochastic Differential Equation (*E-NSDE*) to seamlessly integrate NSDE and evidential learning for effective time-aware sequential recommendations. The NSDE enables the model to learn users' fine-grained time-evolving behavior by capturing continuous user representation while evidential learning quantifies both aleatoric and epistemic uncertainties considering interaction time interval to provide model confidence during prediction. Furthermore, we derive a mathematical relationship between the interaction time interval and model uncertainty to guide the learning process. Experiments on real-world data demonstrate the effectiveness of the proposed method compared to the SOTA methods.", "keywords": "Sequential recommendation;time-evolving behavior", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Krishna Prasad Neupane;Ervine Zheng;Qi Yu", "authorids": "~Krishna_Prasad_Neupane1;~Ervine_Zheng1;~Qi_Yu1", "gender": "M;;M", "homepage": ";;https://www.rit.edu/mining/", "dblp": ";;58/6957-1", "google_scholar": "8UHcQU0AAAAJ;;L3gWdfEAAAAJ", "orcid": ";;0000-0002-0426-5407", "linkedin": ";;", "or_profile": "~Krishna_Prasad_Neupane1;~Ervine_Zheng1;~Qi_Yu1", "aff": ";;Rochester Institute of Technology", "aff_domain": ";;rit.edu", "position": ";;Professor", "bibtex": "@inproceedings{\nneupane2024evidential,\ntitle={Evidential Stochastic Differential Equations for Time-Aware Sequential Recommendation},\nauthor={Krishna Prasad Neupane and Ervine Zheng and Qi Yu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=1PmsSugB87}\n}", "github": "", "reviewers": "zE4M;V8wq;LeEA;p7WH", "pdf_size": 856053, "rating": "4;4;5;7", "confidence": "4;3;4;2", "soundness": "2;2;3;3", "novelty": "2;2;3;3", "presentation": "3;3;4;3", "wc_summary": "63;47;128;48", "wc_strengths": "69;87;93;59", "wc_weaknesses": "126;151;94;125", "wc_questions": "42;71;115;4", "wc_limitations": "1;1;17;2", "wc_review": "301;357;447;238", "wc_reply_reviewers": "0;16;0;0", "wc_reply_authors": "101;216;117;74", "reply_reviewers": "0;1;0;0", "reply_authors": "2;3;2;2", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 71.5, 33.23025729662652 ], "wc_strengths_avg": [ 77.0, 13.638181696985855 ], "wc_weaknesses_avg": [ 124.0, 20.211382931407737 ], "wc_questions_avg": [ 58.0, 40.58940748520481 ], "wc_limitations_avg": [ 5.25, 6.796138609534093 ], "wc_review_avg": [ 335.75, 76.79640291055304 ], "wc_reply_reviewers_avg": [ 4.0, 6.928203230275509 ], "wc_reply_authors_avg": [ 127.0, 53.63301222195151 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.7385489458759963, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:gxugcBVF3mAJ:scholar.google.com/&scioq=Evidential+Stochastic+Differential+Equations+for+Time-Aware+Sequential+Recommendation&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": ";;rit.edu", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "Rochester Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.rit.edu", "aff_unique_abbr": "RIT", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "FineStyle: Fine-grained Controllable Style Personalization for Text-to-image Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96863", "id": "1SmXUGzrH8", "proceeding": "", "pdf": "https://openreview.net/pdf?id=1SmXUGzrH8", "openreview": "https://openreview.net/forum?id=1SmXUGzrH8", "poster": "/media/PosterPDFs/NeurIPS%202024/96863.png?t=1731557458.4566844", "project": "", "author_site": "Gong Zhang, Kihyuk Sohn, Meera Hahn, Humphrey Shi, Irfan Essa", "tldr": "", "abstract": "Few-shot fine-tuning of text-to-image (T2I) generation models enables people to create unique images in their own style using natural languages without requiring extensive prompt engineering. However, fine-tuning with only a handful, as little as one, of image-text paired data prevents fine-grained control of style attributes at generation. In this paper, we present FineStyle, a few-shot fine-tuning method that allows enhanced controllability for style personalized text-to-image generation. To overcome the lack of training data for fine-tuning, we propose a novel concept-oriented data scaling that amplifies the number of image-text pair, each of which focuses on different concepts (e.g., objects) in the style reference image. We also identify the benefit of parameter-efficient adapter tuning of key and value kernels of cross-attention layers. Extensive experiments show the effectiveness of FineStyle at following fine-grained text prompts and delivering visual quality faithful to the specified style, measured by CLIP scores and human raters.", "keywords": "text-to-image model;personalization fine-tuning;text-to-image concept alignment", "primary_area": "generative_models", "supplementary_material": "", "author": "Gong Zhang;Kihyuk Sohn;Meera Hahn;Humphrey Shi;Irfan Essa", "authorids": "~Gong_Zhang3;~Kihyuk_Sohn1;~Meera_Hahn1;~Humphrey_Shi1;~Irfan_Essa1", "gender": "M;F;M;M;M", "homepage": "https://sites.google.com/site/kihyuksml/;https://meerahahn.github.io/;http://www.irfanessa.com/;;https://www.humphreyshi.com", "dblp": "53/10771;173/5203;e/IrfanAEssa;;176/5516", "google_scholar": "VxpypngAAAAJ;XNXylX0AAAAJ;https://scholar.google.com.tw/citations?user=XM97iScAAAAJ;m67w1RwAAAAJ;WBvt5A8AAAAJ", "orcid": ";;0000-0002-6236-2969;;0000-0002-2922-5663", "linkedin": ";;irfanessa/;gong-zhang-a3820a77/;humphreyshi", "or_profile": "~Kihyuk_Sohn1;~Meera_Hahn1;~Irfan_Essa1;~Eric_Zhang1;~Honghui_Shi1", "aff": "Google;Google;Georgia Institute of Technology;Georgia Institute of Technology;University of Illinois, Urbana Champaign", "aff_domain": "google.com;google.com;gatech.edu;gatech.edu;illinois.edu", "position": "Research Scientist;Researcher;Full Professor;PhD student;Adjunct Assistant Professor", "bibtex": "@inproceedings{\nzhang2024finestyle,\ntitle={FineStyle: Fine-grained Controllable Style Personalization for Text-to-image Models},\nauthor={Gong Zhang and Kihyuk Sohn and Meera Hahn and Humphrey Shi and Irfan Essa},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=1SmXUGzrH8}\n}", "github": "", "reviewers": "VFWr;NTkh;PDcY", "pdf_size": 8230946, "rating": "3;5;6", "confidence": "5;4;5", "soundness": "3;2;4", "novelty": "2;3;3", "presentation": "3;2;3", "wc_summary": "53;41;67", "wc_strengths": "62;38;40", "wc_weaknesses": "228;234;196", "wc_questions": "129;3;482", "wc_limitations": "12;13;3", "wc_review": "484;329;788", "wc_reply_reviewers": "380;0;417", "wc_reply_authors": "1690;0;796", "reply_reviewers": "2;0;2", "reply_authors": "4;1;2", "rating_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 53.666666666666664, 10.624918300339484 ], "wc_strengths_avg": [ 46.666666666666664, 10.873004286866726 ], "wc_weaknesses_avg": [ 219.33333333333334, 16.679994670929073 ], "wc_questions_avg": [ 204.66666666666666, 202.73847412094455 ], "wc_limitations_avg": [ 9.333333333333334, 4.4969125210773475 ], "wc_review_avg": [ 533.6666666666666, 190.64860054269712 ], "wc_reply_reviewers_avg": [ 265.6666666666667, 188.46101866315897 ], "wc_reply_authors_avg": [ 828.6666666666666, 690.3261708947606 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.9428090415820634 ], "reply_authors_avg": [ 2.3333333333333335, 1.247219128924647 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.18898223650461357, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3590807491305298343&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "email": "google.com;google.com;gatech.edu;gatech.edu;illinois.edu", "author_num": 5, "aff_unique_index": "0;0;1;1;2", "aff_unique_norm": "Google;Georgia Institute of Technology;University of Illinois Urbana-Champaign", "aff_unique_dep": "Google;;", "aff_unique_url": "https://www.google.com;https://www.gatech.edu;https://illinois.edu", "aff_unique_abbr": "Google;Georgia Tech;UIUC", "aff_campus_unique_index": "0;0;2", "aff_campus_unique": "Mountain View;;Urbana-Champaign", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "1URMG6B3WW", "title": "KrwEmd: Revising the Imperfect Recall Abstraction from Forgetting Everything", "track": "main", "status": "Reject", "tldr": "", "abstract": "Recent research has shown that an extreme interpretation of imperfect recall abstraction \u2014- completely forgetting all past information \u2014- has led to excessive abstraction issues. Currently, there are no hand abstraction algorithms that effectively integrate historical information. This paper aims to develop the first such algorithm. Initially, we introduce the KRWI abstraction for Texas Hold'em-style games, which categorizes hands based on K-recall winrate features that incorporate historical information. Statistical results indicate that, in terms of the number of distinct infosets identified, KRWI significantly outperforms POI, an abstraction that identifies the most abstracted infosets that forget all historical information. Following this, we introduce the KrwEmd algorithm, the first hand abstraction algorithm to effectively use historical information by combining K-recall win rate features and earth mover's distance for hand classification. Empirical studies conducted in the Numeral211 Hold'em environment show that under identical abstracted infoset sizes, KrwEmd not only surpasses POI but also outperforms state-of-the-art hand abstraction algorithms such as EHS and PAEMD. These findings suggest that incorporating historical information can significantly enhance the performance of hand abstraction algorithms, positioning KrwEmd as a promising approach for advancing strategic computation in large-scale adversarial games.", "keywords": "game theory;imperfect-information games;games with ordered signals;computer poker;imperfect-recall abstraction", "primary_area": "algorithmic_game_theory", "supplementary_material": "", "author": "FU Yanchang;Junge Zhang;Bai dongdong;Zhaolingyun;Kaiqi Huang", "authorids": "~FU_Yanchang1;~Junge_Zhang1;~Bai_dongdong1;~Zhaolingyun1;~Kaiqi_Huang1", "gender": "M;;M;M;M", "homepage": ";;;;https://people.ucas.ac.cn/~huangkaiqi?language=en", "dblp": ";;;;89/7026", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;gbStvusAAAAJ;https://scholar.google.co.in/citations?user=qzJf6lkAAAAJ;https://scholar.google.com/citations?hl=zh-CN;caQ-OmYAAAAJ", "orcid": ";0000-0002-9970-394X;;;", "linkedin": ";;;;", "or_profile": "~FU_Yanchang1;~Junge_Zhang1;~Bai_dongdong1;~Zhaolingyun1;~Kaiqi_Huang1", "aff": "University of Chinese Academy of Sciences;Institute of automation, Chinese academy of science;China RongTong Artificial Intelligence Research Center;University of Chinese Academy of Sciences;Institute of automation, Chinese academy of science", "aff_domain": "mails.ucas.ac.cn;ia.ac.cn;mail.crtamg.com.cn;mails.ucas.ac.cn;nlpr.ia.ac.cn", "position": "PhD student;Full Professor;Researcher;MS student;Professor", "bibtex": "@misc{\nanonymous2024krwemd,\ntitle={KrwEmd: Revising the Imperfect Recall Abstraction from Forgetting Everything},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=1URMG6B3WW}\n}", "github": "", "project": "", "reviewers": "yvNX;LQq9;uY4u;HNa9;2m7b", "site": "https://openreview.net/forum?id=1URMG6B3WW", "pdf_size": 505128, "rating": "5;5;5;6;6", "confidence": "3;1;2;2;1", "soundness": "3;3;2;3;2", "novelty": "3;2;2;3;3", "presentation": "1;1;2;3;2", "wc_summary": "140;83;300;51;90", "wc_strengths": "26;18;72;47;132", "wc_weaknesses": "270;80;188;48;110", "wc_questions": "14;3;636;81;64", "wc_limitations": "44;2;1;43;33", "wc_review": "494;186;1197;270;429", "wc_reply_reviewers": "0;36;205;0;0", "wc_reply_authors": "110;49;3051;0;0", "reply_reviewers": "0;1;2;0;0", "reply_authors": "2;2;7;1;1", "rating_avg": [ 5.4, 0.48989794855663565 ], "confidence_avg": [ 1.8, 0.7483314773547883 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 1.8, 0.7483314773547883 ], "wc_summary_avg": [ 132.8, 88.32983640876961 ], "wc_strengths_avg": [ 59.0, 41.0170696174166 ], "wc_weaknesses_avg": [ 139.2, 80.20573545576401 ], "wc_questions_avg": [ 159.6, 239.9988333304977 ], "wc_limitations_avg": [ 24.6, 19.252012881774206 ], "wc_review_avg": [ 515.2, 358.1136132570221 ], "wc_reply_reviewers_avg": [ 48.2, 79.63014504570491 ], "wc_reply_authors_avg": [ 642.0, 1205.178990855715 ], "reply_reviewers_avg": [ 0.6, 0.7999999999999999 ], "reply_authors_avg": [ 2.6, 2.244994432064365 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.32732683535398854, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:B2hd148XH3AJ:scholar.google.com/&scioq=KrwEmd:+Revising+the+Imperfect+Recall+Abstraction+from+Forgetting+Everything&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2;0;1", "aff_unique_norm": "University of Chinese Academy of Sciences;Chinese Academy of Sciences;China RongTong Artificial Intelligence Research Center", "aff_unique_dep": ";Institute of Automation;Artificial Intelligence Research Center", "aff_unique_url": "http://www.ucas.ac.cn;http://www.ia.cas.cn;", "aff_unique_abbr": "UCAS;CAS;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Multilingual Diversity Improves Vision-Language Representations", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96862", "id": "1WtEqReCyS", "proceeding": "", "pdf": "https://openreview.net/pdf?id=1WtEqReCyS", "openreview": "https://openreview.net/forum?id=1WtEqReCyS", "poster": "/media/PosterPDFs/NeurIPS%202024/96862.png?t=1730276965.647146", "project": "", "author_site": "Thao Nguyen, Matthew Wallingford, Sebastin Santy, Wei-Chiu Ma, Sewoong Oh, Ludwig Schmidt, Pang Wei Koh, Ranjay Krishna", "tldr": "", "abstract": "Massive web-crawled image-text datasets lay the foundation for recent progress in multimodal learning. These datasets are designed with the goal of training a model to do well on standard computer vision benchmarks, many of which, however, have been shown to be English-centric (e.g., ImageNet). Consequently, existing data curation techniques gravitate towards using predominantly English image-text pairs and discard many potentially useful non-English samples. Our work questions this practice. Multilingual data is inherently enriching not only because it provides a gateway to learn about culturally salient concepts, but also because it depicts common concepts differently from monolingual data. We thus conduct a systematic study to explore the performance benefits of using more samples of non-English origins with respect to English vision tasks. By translating all multilingual image-text pairs from a raw web crawl to English and re-filtering them, we increase the prevalence of (translated) multilingual data in the resulting training set. Pre-training on this dataset outperforms using English-only or English-dominated datasets on ImageNet, ImageNet distribution shifts, image-English-text retrieval and on average across 38 tasks from the DataComp benchmark. On a geographically diverse task like GeoDE, we also observe improvements across all regions, with the biggest gain coming from Africa. In addition, we quantitatively show that English and non-English data are significantly different in both image and (translated) text space. We hope that our findings motivate future work to be more intentional about including multicultural and multilingual data, not just when non-English or geographically diverse tasks are involved, but to enhance model capabilities at large.", "keywords": "multilingual data;vision-language models;data curation;data filtering;CLIP", "primary_area": "machine_vision", "supplementary_material": "", "author": "Thao Nguyen;Matthew Wallingford;Sebastin Santy;Wei-Chiu Ma;Sewoong Oh;Ludwig Schmidt;Pang Wei Koh;Ranjay Krishna", "authorids": "~Thao_Nguyen3;~Matthew_Wallingford1;~Sebastin_Santy2;~Wei-Chiu_Ma1;~Sewoong_Oh3;~Ludwig_Schmidt1;~Pang_Wei_Koh1;~Ranjay_Krishna1", "gender": "F;M;M;M;M;M;M;M", "homepage": "https://thaonguyen19.github.io/;http://sebastinsanty.com/;https://www.cs.cornell.edu/~weichiu/;http://people.csail.mit.edu/ludwigs/;http://cs.stanford.edu/~pangwei;http://ranjaykrishna.com;https://mattwallingford.github.io/;https://homes.cs.washington.edu/~sewoong/", "dblp": "77/2922;212/5368;151/4277;141/2720;10/10453;167/3785;263/1795;80/4366", "google_scholar": "DvJG-_8AAAAJ;HsyMg08AAAAJ;SVIdh6AAAAAJ;SWMKy70AAAAJ;Nn990CkAAAAJ;IcqahyAAAAAJ;;55TAOdgAAAAJ", "orcid": ";0000-0003-1162-0865;;;;0000-0001-8784-2531;;", "linkedin": ";sebastinsanty/;;ludwig-schmidt-87ba3612/;;ranjay-krishna-1a344444/;;", "or_profile": "~Thao_Nguyen3;~Sebastin_Santy2;~Wei-Chiu_Ma1;~Ludwig_Schmidt1;~Pang_Wei_Koh1;~Ranjay_Krishna1;~Matthew_C_Wallingford2;~Sewoong_Oh1", "aff": "Meta;University of Washington;Allen Institute for Artificial Intelligence;University of Washington;University of Washington;University of Washington;University of Washington;University of Washington", "aff_domain": "meta.com;cs.washington.edu;allenai.org;washington.edu;cs.washington.edu;cs.washington.edu;washington.edu;uw.edu", "position": "Visiting Researcher;PhD student;Postdoc;Assistant Professor;Assistant Professor;Assistant Professor;PhD student;Full Professor", "bibtex": "@inproceedings{\nnguyen2024multilingual,\ntitle={Multilingual Diversity Improves Vision-Language Representations},\nauthor={Thao Nguyen and Matthew Wallingford and Sebastin Santy and Wei-Chiu Ma and Sewoong Oh and Ludwig Schmidt and Pang Wei Koh and Ranjay Krishna},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=1WtEqReCyS}\n}", "github": "", "reviewers": "2aum;xoEn;7AUA;9sUx", "pdf_size": 7309585, "rating": "5;7;8;8", "confidence": "4;4;4;3", "soundness": "3;3;4;4", "novelty": "2;3;3;4", "presentation": "3;3;4;4", "wc_summary": "61;76;107;56", "wc_strengths": "44;47;78;88", "wc_weaknesses": "65;27;565;41", "wc_questions": "54;1;263;32", "wc_limitations": "110;1;19;5", "wc_review": "334;152;1032;222", "wc_reply_reviewers": "21;0;145;69", "wc_reply_authors": "27;0;39;26", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;2;2", "rating_avg": [ 7.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 75.0, 19.887181801351343 ], "wc_strengths_avg": [ 64.25, 19.109879643786353 ], "wc_weaknesses_avg": [ 174.5, 225.8644505007373 ], "wc_questions_avg": [ 87.5, 103.05944886326532 ], "wc_limitations_avg": [ 33.75, 44.52737921773524 ], "wc_review_avg": [ 435.0, 350.73779380044004 ], "wc_reply_reviewers_avg": [ 58.75, 55.72420928106562 ], "wc_reply_authors_avg": [ 23.0, 14.230249470757707 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.4714045207910316, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5545713494694845366&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "meta.com;cs.washington.edu;allenai.org;washington.edu;cs.washington.edu;cs.washington.edu;washington.edu;uw.edu", "author_num": 8, "aff_unique_index": "0;1;2;1;1;1;1;1", "aff_unique_norm": "Meta;University of Washington;Allen Institute for Artificial Intelligence", "aff_unique_dep": "Meta Platforms, Inc.;;", "aff_unique_url": "https://meta.com;https://www.washington.edu;https://allenai.org", "aff_unique_abbr": "Meta;UW;AI2", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Wide Two-Layer Networks can Learn from Adversarial Perturbations", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96861", "id": "1YGgaouVgZ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=1YGgaouVgZ", "openreview": "https://openreview.net/forum?id=1YGgaouVgZ", "poster": "/media/PosterPDFs/NeurIPS%202024/96861.png?t=1731993076.8931775", "project": "", "author_site": "Soichiro Kumano, Hiroshi Kera, Toshihiko Yamasaki", "tldr": "", "abstract": "Adversarial examples have raised several open questions, such as why they can deceive classifiers and transfer between different models. A prevailing hypothesis to explain these phenomena suggests that adversarial perturbations appear as random noise but contain class-specific features. This hypothesis is supported by the success of perturbation learning, where classifiers trained solely on adversarial examples and the corresponding incorrect labels generalize well to correctly labeled test data. Although this hypothesis and perturbation learning are effective in explaining intriguing properties of adversarial examples, their solid theoretical foundation is limited. In this study, we theoretically explain the counterintuitive success of perturbation learning. We assume wide two-layer networks and the results hold for any data distribution. We prove that adversarial perturbations contain sufficient class-specific features for networks to generalize from them. Moreover, the predictions of classifiers trained on mislabeled adversarial examples coincide with those of classifiers trained on correctly labeled clean samples. The code is available at https://github.com/s-kumano/perturbation-learning.", "keywords": "Adversarial Perturbations;Adversarial Examples;Adversarial Attacks;Non-Robust Features;Perturbation Learning;Learning from Adversarial Perturbations", "primary_area": "learning_theory", "supplementary_material": "", "author": "Soichiro Kumano;Hiroshi Kera;Toshihiko Yamasaki", "authorids": "~Soichiro_Kumano1;~Hiroshi_Kera1;~Toshihiko_Yamasaki1", "gender": "M;M;M", "homepage": "https://s-kumano.github.io/;;http://www.cvm.t.u-tokyo.ac.jp/en/", "dblp": "280/3037;190/2671;81/881", "google_scholar": ";https://scholar.google.co.jp/citations?user=M4Krt5gAAAAJ;rE9iY5MAAAAJ", "orcid": ";;0000-0002-1784-2314", "linkedin": ";;", "or_profile": "~Soichiro_Kumano1;~Hiroshi_Kera1;~Toshihiko_Yamasaki1", "aff": "The University of Tokyo;Chiba University;The University of Tokyo", "aff_domain": "g.ecc.u-tokyo.ac.jp;chiba-u.jp;u-tokyo.ac.jp", "position": "PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nkumano2024wide,\ntitle={Wide Two-Layer Networks can Learn from Adversarial Perturbations},\nauthor={Soichiro Kumano and Hiroshi Kera and Toshihiko Yamasaki},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=1YGgaouVgZ}\n}", "github": "", "reviewers": "fzgX;5BXU;MQtT", "pdf_size": 2289590, "rating": "5;5;5", "confidence": "3;4;3", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "2;3;2", "wc_summary": "137;54;81", "wc_strengths": "58;13;34", "wc_weaknesses": "142;114;300", "wc_questions": "123;2;37", "wc_limitations": "17;9;66", "wc_review": "477;192;518", "wc_reply_reviewers": "0;0;663", "wc_reply_authors": "0;0;1257", "reply_reviewers": "0;0;1", "reply_authors": "1;1;3", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 90.66666666666667, 34.56716489515576 ], "wc_strengths_avg": [ 35.0, 18.384776310850235 ], "wc_weaknesses_avg": [ 185.33333333333334, 81.88338591490272 ], "wc_questions_avg": [ 54.0, 50.839617097954886 ], "wc_limitations_avg": [ 30.666666666666668, 25.197001585285676 ], "wc_review_avg": [ 395.6666666666667, 144.9835239681469 ], "wc_reply_reviewers_avg": [ 221.0, 312.54119728445403 ], "wc_reply_authors_avg": [ 419.0, 592.5554826343268 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:p23l2caY_CUJ:scholar.google.com/&scioq=Wide+Two-Layer+Networks+can+Learn+from+Adversarial+Perturbations&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "g.ecc.u-tokyo.ac.jp;chiba-u.jp;u-tokyo.ac.jp", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Tokyo;Chiba University", "aff_unique_dep": ";", "aff_unique_url": "https://www.u-tokyo.ac.jp;https://www.chiba-u.ac.jp", "aff_unique_abbr": "UTokyo;Chiba U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Japan" }, { "title": "Warm-up Free Policy Optimization: Improved Regret in Linear Markov Decision Processes", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96860", "id": "1c9XHlHTs7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=1c9XHlHTs7", "openreview": "https://openreview.net/forum?id=1c9XHlHTs7", "poster": "/media/PosterPDFs/NeurIPS%202024/96860.png?t=1731491562.6476347", "project": "", "author_site": "Asaf Cassel, Aviv Rosenberg", "tldr": "", "abstract": "Policy Optimization (PO) methods are among the most popular Reinforcement Learning (RL) algorithms in practice. Recently, Sherman et al. [2023a] proposed a PO-based algorithm with rate-optimal regret guarantees under the linear Markov Decision Process (MDP) model. However, their algorithm relies on a costly pure exploration warm-up phase that is hard to implement in practice. This paper eliminates this undesired warm-up phase, replacing it with a simple and efficient contraction mechanism. Our PO algorithm achieves rate-optimal regret with improved dependence on the other parameters of the problem (horizon and function approximation dimension) in two fundamental settings: adversarial losses with full-information feedback and stochastic losses with bandit feedback.", "keywords": "policy optimization;reinforcement learning theory;regret;Markov Decision Process;linear MDP", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Asaf Cassel;Aviv Rosenberg", "authorids": "~Asaf_Cassel1;~Aviv_Rosenberg1", "gender": "M;M", "homepage": "https://sites.google.com/view/aviv-rosenberg/home;", "dblp": "225/9369-2;222/3222", "google_scholar": "https://scholar.google.co.il/citations?user=cg8_-foAAAAJ;vhIydFkAAAAJ", "orcid": ";", "linkedin": "aviv-rosenberg-2a6222149/;", "or_profile": "~Aviv_Rosenberg1;~Asaf_Benjamin_Cassel1", "aff": "Google Research;Tel Aviv University", "aff_domain": "google.com;tau.ac.il", "position": "Researcher;PhD student", "bibtex": "@inproceedings{\ncassel2024warmup,\ntitle={Warm-up Free Policy Optimization: Improved Regret in Linear Markov Decision Processes},\nauthor={Asaf Cassel and Aviv Rosenberg},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=1c9XHlHTs7}\n}", "github": "", "reviewers": "gZJ8;x3ta;Gemc;JkeU", "pdf_size": 391980, "rating": "6;6;7;7", "confidence": "3;3;4;4", "soundness": "2;3;3;3", "novelty": "3;3;3;3", "presentation": "4;3;3;3", "wc_summary": "105;45;103;36", "wc_strengths": "145;79;107;42", "wc_weaknesses": "131;133;286;76", "wc_questions": "3;1;21;6", "wc_limitations": "1;16;2;1", "wc_review": "385;274;519;161", "wc_reply_reviewers": "20;22;21;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 72.25, 31.91688424642982 ], "wc_strengths_avg": [ 93.25, 37.73840881648297 ], "wc_weaknesses_avg": [ 156.5, 78.18727517953289 ], "wc_questions_avg": [ 7.75, 7.854139036202504 ], "wc_limitations_avg": [ 5.0, 6.363961030678928 ], "wc_review_avg": [ 334.75, 132.62046410716562 ], "wc_reply_reviewers_avg": [ 15.75, 9.12071817347735 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:3uDbOsKVx6gJ:scholar.google.com/&scioq=Warm-up+Free+Policy+Optimization:+Improved+Regret+in+Linear+Markov+Decision+Processes&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "google.com;tau.ac.il", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Google;Tel Aviv University", "aff_unique_dep": "Google Research;", "aff_unique_url": "https://research.google;https://www.tau.ac.il", "aff_unique_abbr": "Google Research;TAU", "aff_campus_unique_index": "0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;Israel" }, { "title": "An Adaptive Approach for Infinitely Many-armed Bandits under Generalized Rotting Constraints", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96859", "id": "1cXdndzkxU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=1cXdndzkxU", "openreview": "https://openreview.net/forum?id=1cXdndzkxU", "poster": "/media/PosterPDFs/NeurIPS%202024/96859.png?t=1733804104.553155", "project": "", "author_site": "Jung-hun Kim, Milan Vojnovic, Se-Young Yun", "tldr": "", "abstract": "In this study, we consider the infinitely many-armed bandit problems in a rested rotting setting, where the mean reward of an arm may decrease with each pull, while otherwise, it remains unchanged. We explore two scenarios regarding the rotting of rewards: one in which the cumulative amount of rotting is bounded by $V_T$, referred to as the slow-rotting case, and the other in which the cumulative number of rotting instances is bounded by $S_T$, referred to as the abrupt-rotting case. To address the challenge posed by rotting rewards, we introduce an algorithm that utilizes UCB with an adaptive sliding window, designed to manage the bias and variance trade-off arising due to rotting rewards. Our proposed algorithm achieves tight regret bounds for both slow and abrupt rotting scenarios. Lastly, we demonstrate the performance of our algorithm using numerical experiments.", "keywords": "bandits;rotting rewards;infinitely many arms", "primary_area": "bandits", "supplementary_material": "", "author": "Jung-hun Kim;Milan Vojnovic;Se-Young Yun", "authorids": "~Jung-hun_Kim1;~Milan_Vojnovic1;~Se-Young_Yun1", "gender": "M;M;M", "homepage": "https://personal.lse.ac.uk/vojnovic/;https://fbsqkd.github.io;https://sites.google.com/view/junghunkim/home", "dblp": "00/1815;23/8862;234/2799", "google_scholar": "https://scholar.google.co.uk/citations?user=z4JhSBwAAAAJ;X_IAjb8AAAAJ;2eFdD1kAAAAJ", "orcid": ";;", "linkedin": ";seyoung-yun-395130ab/;jung-hun-kim/", "or_profile": "~Milan_Vojnovic1;~Se-Young_Yun1;~Junghun_Kim1", "aff": "London School of Economics;KAIST;Seoul National University", "aff_domain": "lse.ac.uk;kaist.ac.kr;snu.ac.kr", "position": "Professor;Assistant Professor;Postdoc", "bibtex": "@inproceedings{\nkim2024an,\ntitle={An Adaptive Approach for Infinitely Many-armed Bandits under Generalized Rotting Constraints},\nauthor={Jung-hun Kim and Milan Vojnovic and Se-Young Yun},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=1cXdndzkxU}\n}", "github": "", "reviewers": "g8GP;WSKL;jk3J;ALsC", "pdf_size": 701977, "rating": "4;6;6;6", "confidence": "4;4;4;2", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "3;3;4;3", "wc_summary": "72;63;298;15", "wc_strengths": "35;72;100;20", "wc_weaknesses": "259;129;336;3", "wc_questions": "7;141;125;80", "wc_limitations": "4;6;1;6", "wc_review": "377;411;860;124", "wc_reply_reviewers": "150;24;95;24", "wc_reply_authors": "511;33;45;19", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 112.0, 109.55135781906128 ], "wc_strengths_avg": [ 56.75, 31.33189269737786 ], "wc_weaknesses_avg": [ 181.75, 126.97908292313345 ], "wc_questions_avg": [ 88.25, 51.9681392778306 ], "wc_limitations_avg": [ 4.25, 2.0463381929681126 ], "wc_review_avg": [ 443.0, 265.0613136615753 ], "wc_reply_reviewers_avg": [ 73.25, 52.94985835675106 ], "wc_reply_authors_avg": [ 152.0, 207.4728897952694 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9340550252058501874&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "lse.ac.uk;kaist.ac.kr;snu.ac.kr", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "London School of Economics;Korea Advanced Institute of Science and Technology;Seoul National University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.lse.ac.uk;https://www.kaist.ac.kr;https://www.snu.ac.kr", "aff_unique_abbr": "LSE;KAIST;SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United Kingdom;South Korea" }, { "title": "I2EBench: A Comprehensive Benchmark for Instruction-based Image Editing", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96858", "id": "1dpmeH6IHa", "proceeding": "", "pdf": "https://openreview.net/pdf?id=1dpmeH6IHa", "openreview": "https://openreview.net/forum?id=1dpmeH6IHa", "poster": "/media/PosterPDFs/NeurIPS%202024/96858.png?t=1730001352.4318628", "project": "", "author_site": "Yiwei Ma, Jiayi Ji, Ke Ye, Weihuang Lin, Zhibin Wang, Yonghan Zheng, Qiang Zhou, Xiaoshuai Sun, Rongrong Ji", "tldr": "", "abstract": "Significant progress has been made in the field of Instruction-based Image Editing (IIE). However, evaluating these models poses a significant challenge. A crucial requirement in this field is the establishment of a comprehensive evaluation benchmark for accurately assessing editing results and providing valuable insights for its further development. In response to this need, we propose I2EBench, a comprehensive benchmark designed to automatically evaluate the quality of edited images produced by IIE models from multiple dimensions. I2EBench consists of 2,000+ images for editing, along with 4,000+ corresponding original and diverse instructions. It offers three distinctive characteristics: 1) Comprehensive Evaluation Dimensions: I2EBench comprises 16 evaluation dimensions that cover both high-level and low-level aspects, providing a comprehensive assessment of each IIE model. 2) Human Perception Alignment: To ensure the alignment of our benchmark with human perception, we conducted an extensive user study for each evaluation dimension. 3) Valuable Research Insights: By analyzing the advantages and disadvantages of existing IIE models across the 16 dimensions, we offer valuable research insights to guide future development in the field. We will open-source I2EBench, including all instructions, input images, human annotations, edited images from all evaluated methods, and a simple script for evaluating the results from new IIE models. The code, dataset, and generated images from all IIE models are provided in GitHub: https://github.com/cocoshe/I2EBench.", "keywords": "Benchmark;Instruction-based Image Editing;Multimodal Large Language Model", "primary_area": "evaluation", "supplementary_material": "", "author": "Yiwei Ma;Jiayi Ji;Ke Ye;Weihuang Lin;zhibin wang;Yonghan Zheng;Qiang Zhou;Xiaoshuai Sun;Rongrong Ji", "authorids": "~Yiwei_Ma1;~Jiayi_Ji1;~Ke_Ye4;~Weihuang_Lin1;~zhibin_wang2;~Yonghan_Zheng1;~Qiang_Zhou8;~Xiaoshuai_Sun3;~Rongrong_Ji5", "gender": "M;M;M;;;M;M;M;M", "homepage": "https://xmu-xiaoma666.github.io/;https://scholar.google.com/citations?user=xp_rICcAAAAJ&hl=zh-CN;https://github.com/cocoshe;;;https://github.com/jiyi-zyh;https://mightyzau.github.io/;https://sites.google.com/view/xssun;http://mac.xmu.edu.cn/rrji-en.html", "dblp": ";250/9459;;;;;;26/5787.html;86/5681", "google_scholar": "KIDY5pUAAAAJ;xp_rICcAAAAJ;;;;;;KPMK3B4AAAAJ;", "orcid": "0000-0002-8744-3423;0000-0002-9956-6308;;;;;0000-0003-3697-9348;0000-0003-3912-9306;", "linkedin": ";;;;;;;;", "or_profile": "~Yiwei_Ma1;~Jiayi_Ji1;~Ke_Ye4;~Weihuang_Lin1;~zhibin_wang2;~Yonghan_Zheng1;~Qiang_Zhou8;~Xiaoshuai_Sun3;~Rongrong_Ji5", "aff": "Xiamen University;Xiamen University;Xiamen University;;;Xiamen University;Alibaba Group;Xiamen University;Xiamen University", "aff_domain": "xmu.edu.cn;xmu.edu.cn;xmu.edu.cn;;;xmu.edu.cn;alibaba-inc.com;xmu.edu.cn;xmu.edu.cn", "position": "PhD student;Postdoc;MS student;;;Undergrad student;Researcher;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nma2024iebench,\ntitle={I2{EB}ench: A Comprehensive Benchmark for Instruction-based Image Editing},\nauthor={Yiwei Ma and Jiayi Ji and Ke Ye and Weihuang Lin and zhibin wang and Yonghan Zheng and Qiang Zhou and Xiaoshuai Sun and Rongrong Ji},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=1dpmeH6IHa}\n}", "github": "", "reviewers": "riYa;HHca;A2Kh;7DEf", "pdf_size": 2914695, "rating": "5;6;8;8", "confidence": "5;5;5;5", "soundness": "3;2;4;4", "novelty": "3;2;4;4", "presentation": "2;3;4;4", "wc_summary": "68;65;68;86", "wc_strengths": "61;52;120;167", "wc_weaknesses": "125;112;124;145", "wc_questions": "40;147;31;32", "wc_limitations": "9;12;13;1", "wc_review": "303;388;356;431", "wc_reply_reviewers": "11;15;66;122", "wc_reply_authors": "87;81;91;23", "reply_reviewers": "1;1;1;1", "reply_authors": "3;3;3;2", "rating_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 5.0, 0.0 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 71.75, 8.317902379807062 ], "wc_strengths_avg": [ 100.0, 46.674404120459855 ], "wc_weaknesses_avg": [ 126.5, 11.84271928232701 ], "wc_questions_avg": [ 62.5, 48.91063279083598 ], "wc_limitations_avg": [ 8.75, 4.710360920354193 ], "wc_review_avg": [ 369.5, 46.71455875848556 ], "wc_reply_reviewers_avg": [ 53.5, 45.10266067539697 ], "wc_reply_authors_avg": [ 70.5, 27.654113618049667 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1395486898288033875&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 3, "email": "xmu.edu.cn;xmu.edu.cn;xmu.edu.cn;;;xmu.edu.cn;alibaba-inc.com;xmu.edu.cn;xmu.edu.cn", "author_num": 9, "aff_unique_index": "0;0;0;0;1;0;0", "aff_unique_norm": "Xiamen University;Alibaba Group", "aff_unique_dep": ";", "aff_unique_url": "https://www.xmu.edu.cn;https://www.alibaba.com", "aff_unique_abbr": "XMU;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "MAGNET: Improving the Multilingual Fairness of Language Models with Adaptive Gradient-Based Tokenization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96857", "id": "1e3MOwHSIX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=1e3MOwHSIX", "openreview": "https://openreview.net/forum?id=1e3MOwHSIX", "poster": "", "project": "", "author_site": "Orevaoghene Ahia, Sachin Kumar, Hila Gonen, Valentin Hofmann, Tomasz Limisiewicz, Yulia Tsvetkov, Noah Smith", "tldr": "", "abstract": "In multilingual settings, non-Latin scripts and low-resource languages are usually disadvantaged in terms of language models\u2019 utility, efficiency, and cost. Specifically, previous studies have reported multiple modeling biases that the current tokenization algorithms introduce to non-Latin script languages, the main one being over-segmentation. In this work, we propose MAGNET\u2014 multilingual adaptive gradient-based tokenization\u2014to reduce over-segmentation via adaptive gradient-based subword tokenization. MAGNET learns to predict segment boundaries between byte tokens in a sequence via sub-modules within the model, which act as internal boundary predictors (tokenizers). Previous gradient-based tokenization methods aimed for uniform compression across sequences by integrating a single boundary predictor during training and optimizing it end-to-end through stochastic reparameterization alongside the next token prediction objective. However, this approach still results in over-segmentation for non-Latin script languages in multilingual settings. In contrast, MAGNET offers a customizable architecture where byte-level sequences are routed through language-script-specific predictors, each optimized for its respective language script. This modularity enforces equitable segmentation granularity across different language scripts compared to previous methods. Through extensive experiments, we demonstrate that in addition to reducing segmentation disparities, MAGNET also enables faster language modeling and improves downstream utility.", "keywords": "tokenization;multilingual LMs;over-segmentation;fariness", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Orevaoghene Ahia;Sachin Kumar;Hila Gonen;Valentin Hofmann;Tomasz Limisiewicz;Yulia Tsvetkov;Noah A. Smith", "authorids": "~Orevaoghene_Ahia1;~Sachin_Kumar1;~Hila_Gonen1;~Valentin_Hofmann1;~Tomasz_Limisiewicz1;~Yulia_Tsvetkov1;~Noah_A._Smith2", "gender": ";M;;;M;F;", "homepage": ";https://shocheen.com;https://gonenhila.github.io/;https://valentinhofmann.github.io/;https://ufal.mff.cuni.cz/tomasz-limisiewicz;https://homes.cs.washington.edu/~yuliats/;", "dblp": ";31/4484-9;167/5312;264/4665;264/0110;75/8157;", "google_scholar": ";qO38fRIAAAAJ;URThmtMAAAAJ;bbHOPKwAAAAJ;RqxyTsgAAAAJ;SEDPkrsAAAAJ;", "orcid": ";;;;0000-0003-3809-2580;0000-0002-4634-7128;", "linkedin": ";;;;;;", "or_profile": "~Orevaoghene_Ahia1;~Sachin_Kumar1;~Hila_Gonen1;~Valentin_Hofmann1;~Tomasz_Limisiewicz1;~Yulia_Tsvetkov1;~Noah_A._Smith2", "aff": ";Allen Institute for Artificial Intelligence;University of Washington;Allen Institute for Artificial Intelligence;Charles University ;Department of Computer Science, University of Washington;", "aff_domain": ";allenai.org;uw.edu;allenai.org;cuni.cz;cs.washington.edu;", "position": ";Postdoc;Postdoc;Postdoc;PhD student;Associate Professor;", "bibtex": "@inproceedings{\nahia2024magnet,\ntitle={{MAGNET}: Improving the Multilingual Fairness of Language Models with Adaptive Gradient-Based Tokenization},\nauthor={Orevaoghene Ahia and Sachin Kumar and Hila Gonen and Valentin Hofmann and Tomasz Limisiewicz and Yulia Tsvetkov and Noah A. Smith},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=1e3MOwHSIX}\n}", "github": "", "reviewers": "stTy;2xYq;WSTY;KW7H", "pdf_size": 1561170, "rating": "5;6;6;7", "confidence": "3;3;4;3", "soundness": "3;3;3;4", "novelty": "2;3;2;3", "presentation": "3;3;3;3", "wc_summary": "67;118;58;165", "wc_strengths": "54;38;28;109", "wc_weaknesses": "93;44;30;88", "wc_questions": "68;64;9;1", "wc_limitations": "4;1;1;54", "wc_review": "286;265;126;417", "wc_reply_reviewers": "0;57;0;4", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 102.0, 42.970920399730794 ], "wc_strengths_avg": [ 57.25, 31.283981524096323 ], "wc_weaknesses_avg": [ 63.75, 27.261465477849864 ], "wc_questions_avg": [ 35.5, 30.663496212923928 ], "wc_limitations_avg": [ 15.0, 22.54994456755936 ], "wc_review_avg": [ 273.5, 103.17097460041754 ], "wc_reply_reviewers_avg": [ 15.25, 24.159625411003375 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17563120677406526945&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 5, "email": ";allenai.org;uw.edu;allenai.org;cuni.cz;cs.washington.edu;", "author_num": 7, "aff_unique_index": "0;1;0;2;1", "aff_unique_norm": "Allen Institute for Artificial Intelligence;University of Washington;Charles University", "aff_unique_dep": ";;", "aff_unique_url": "https://allenai.org;https://www.washington.edu;https://www.cuni.cz", "aff_unique_abbr": "AI2;UW;Charles U", "aff_campus_unique_index": "1", "aff_campus_unique": ";Seattle", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "United States;Czech Republic" }, { "title": "Learning to Discuss Strategically: A Case Study on One Night Ultimate Werewolf", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96856", "id": "1f82rnwCbl", "proceeding": "", "pdf": "https://openreview.net/pdf?id=1f82rnwCbl", "openreview": "https://openreview.net/forum?id=1f82rnwCbl", "poster": "", "project": "", "author_site": "Xuanfa Jin, Ziyan Wang, Yali Du, Meng Fang, Haifeng Zhang, Jun Wang", "tldr": "", "abstract": "Communication is a fundamental aspect of human society, facilitating the exchange of information and beliefs among people. Despite the advancements in large language models (LLMs), recent agents built with these often neglect the control over discussion tactics, which are essential in communication scenarios and games. As a variant of the famous communication game Werewolf, *One Night Ultimate Werewolf* (ONUW) requires players to develop strategic discussion policies due to the potential role changes that increase the uncertainty and complexity of the game. In this work, we first present the existence of the Perfect Bayesian Equilibria (PBEs) in two scenarios of the ONUW game: one with discussion and one without. The results showcase that the discussion greatly changes players' utilities by affecting their beliefs, emphasizing the significance of discussion tactics. Based on the insights obtained from the analyses, we propose an RL-instructed language agent framework, where a discussion policy trained by reinforcement learning (RL) is employed to determine appropriate discussion tactics to adopt. Our experimental results on several ONUW game settings demonstrate the effectiveness and generalizability of our proposed framework.", "keywords": "Large Language Models;LLM-based Agent;Reinforcement Learning", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Xuanfa Jin;Ziyan Wang;Yali Du;Meng Fang;Haifeng Zhang;Jun Wang", "authorids": "~Xuanfa_Jin1;~Ziyan_Wang3;~Yali_Du1;~Meng_Fang1;~Haifeng_Zhang3;~Jun_Wang2", "gender": ";M;;M;;M", "homepage": ";https://ziyan-wang98.github.io/;;;https://pkuzhf.github.io;http://www0.cs.ucl.ac.uk/staff/jun.wang/", "dblp": ";;;67/463;93/7133-2;w/JunWang12", "google_scholar": ";1Yu8JFIAAAAJ;;IcNYP1oAAAAJ;;https://scholar.google.co.uk/citations?user=wIE1tY4AAAAJ", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Xuanfa_Jin1;~Ziyan_Wang3;~Yali_Du1;~Meng_Fang1;~Haifeng_Zhang3;~Jun_Wang2", "aff": ";King's College London;;Eindhoven University of Technology;Institute of Automation, Chinese Academy of Sciences;University College London", "aff_domain": ";kcl.ac.uk;;tue.nl;ia.ac.cn;ucl.ac.uk", "position": ";PhD student;;Assistant Professor;Associate Professor;Professor", "bibtex": "@inproceedings{\njin2024learning,\ntitle={Learning to Discuss Strategically: A Case Study on One Night Ultimate Werewolf},\nauthor={Xuanfa Jin and Ziyan Wang and Yali Du and Meng Fang and Haifeng Zhang and Jun Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=1f82rnwCbl}\n}", "github": "", "reviewers": "uoU5;ADZR;b5Fq;Tkt3", "pdf_size": 778823, "rating": "4;6;7;8", "confidence": "4;3;3;4", "soundness": "3;3;4;3", "novelty": "2;2;3;4", "presentation": "3;3;3;3", "wc_summary": "67;67;32;94", "wc_strengths": "70;67;202;126", "wc_weaknesses": "363;159;179;170", "wc_questions": "225;56;110;52", "wc_limitations": "77;58;9;12", "wc_review": "802;407;532;454", "wc_reply_reviewers": "17;11;26;17", "wc_reply_authors": "41;16;80;21", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.25, 1.479019945774904 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 65.0, 22.01136070305514 ], "wc_strengths_avg": [ 116.25, 54.8013457863947 ], "wc_weaknesses_avg": [ 217.75, 84.15870424382733 ], "wc_questions_avg": [ 110.75, 69.82612333503845 ], "wc_limitations_avg": [ 39.0, 29.300170647967224 ], "wc_review_avg": [ 548.75, 152.87801509700472 ], "wc_reply_reviewers_avg": [ 17.75, 5.356071321407137 ], "wc_reply_authors_avg": [ 39.5, 25.184320518926057 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.16903085094570333, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6901202049349919655&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": ";kcl.ac.uk;;tue.nl;ia.ac.cn;ucl.ac.uk", "author_num": 6, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "King's College London;Eindhoven University of Technology;Chinese Academy of Sciences;University College London", "aff_unique_dep": ";;Institute of Automation;", "aff_unique_url": "https://www.kcl.ac.uk;https://www.tue.nl;http://www.ia.cas.cn;https://www.ucl.ac.uk", "aff_unique_abbr": "KCL;TU/e;CAS;UCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;0", "aff_country_unique": "United Kingdom;Netherlands;China" }, { "title": "Large Language Models as Urban Residents: An LLM Agent Framework for Personal Mobility Generation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96855", "id": "1iHmhMHNyA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=1iHmhMHNyA", "openreview": "https://openreview.net/forum?id=1iHmhMHNyA", "poster": "/media/PosterPDFs/NeurIPS%202024/96855.png?t=1728876412.4830987", "project": "", "author_site": "WANG JIAWEI, Renhe Jiang, Chuang Yang, Zengqing Wu, makoto onizuka, Ryosuke Shibasaki, Noboru Koshizuka, Chuan Xiao", "tldr": "", "abstract": "This paper introduces a novel approach using Large Language Models (LLMs) integrated into an agent framework for flexible and effective personal mobility generation. LLMs overcome the limitations of previous models by effectively processing semantic data and offering versatility in modeling various tasks. Our approach addresses three research questions: aligning LLMs with real-world urban mobility data, developing reliable activity generation strategies, and exploring LLM applications in urban mobility. The key technical contribution is a novel LLM agent framework that accounts for individual activity patterns and motivations, including a self-consistency approach to align LLMs with real-world activity data and a retrieval-augmented strategy for interpretable activity generation. We evaluate our LLM agent framework and compare it with state-of-the-art personal mobility generation approaches, demonstrating the effectiveness of our approach and its potential applications in urban mobility. Overall, this study marks the pioneering work of designing an LLM agent framework for activity generation based on real-world human activity data, offering a promising tool for urban mobility analysis.", "keywords": "LLM;human mobility;urban computing;trajectory generation", "primary_area": "machine_learning_for_social_sciences", "supplementary_material": "/attachment/2664201763ab1744ed36e9f3116845a1a4354dcd.zip", "author": "Jiawei Wang;Renhe Jiang;Chuang Yang;Zengqing Wu;Makoto Onizuka;Ryosuke Shibasaki;Noboru Koshizuka;Chuan Xiao", "authorids": "~Jiawei_Wang17;~Renhe_Jiang1;~Chuang_Yang3;~Zengqing_Wu3;~Makoto_Onizuka1;~Ryosuke_Shibasaki1;~Noboru_Koshizuka1;~Chuan_Xiao2", "gender": "M;M;M;M;M;M;M;M", "homepage": "https://www.renhejiang.com/;https://sustc-chuangyang.github.io/;https://wuzengqing001225.github.io/;http://www-bigdata.ist.osaka-u.ac.jp/professor/onizuka/onizuka_en.html;;http://noboru.koshizuka-lab.org/;https://sites.google.com/site/chuanxiao1983/;http://wangjw6.github.io/", "dblp": "213/1173;61/2794;343/5889;19/3497;;84/6935;57/4384-1.html;98/7308-5", "google_scholar": "Yo2lwasAAAAJ;Zpk004QAAAAJ;8p3HcqsAAAAJ;oJ6G8gUAAAAJ;0UjOE4IAAAAJ;https://scholar.google.co.jp/citations?user=eT9WD0MAAAAJ;15bmyOkAAAAJ;Y1gU9wYAAAAJ", "orcid": "0000-0003-2593-4638;;0000-0002-4785-0251;0000-0001-5559-8300;;0000-0002-4728-9425;0000-0001-7239-5134;0000-0002-6893-0912", "linkedin": "renhejiang/;;;;;noboru-koshizuka-74116b2a1/;chuan-xiao-8352071a0/;", "or_profile": "~Renhe_Jiang1;~Chuang_Yang3;~Zengqing_Wu3;~Makoto_Onizuka1;~Ryosuke_Shibasaki1;~Noboru_Koshizuka1;~Chuan_Xiao2;~jiawei_wang2", "aff": "The University of Tokyo;The University of Tokyo;Kyoto University;Osaka University;Reitaku University;The University of Tokyo;Osaka University;The University of Tokyo", "aff_domain": "u-tokyo.ac.jp;u-tokyo.ac.jp;kyoto-u.ac.jp;osaka-u.ac.jp;reitaku-u.ac.jp;iii.u-tokyo.ac.jp;osaka-u.ac.jp;u-tokyo.ac.jp", "position": "Lecturer;PhD student;MS student;Full Professor;Full Professor;Full Professor;Associate Professor;Postdoc", "bibtex": "@inproceedings{\nwang2024large,\ntitle={Large Language Models as Urban Residents: An {LLM} Agent Framework for Personal Mobility Generation},\nauthor={Jiawei Wang and Renhe Jiang and Chuang Yang and Zengqing Wu and Makoto Onizuka and Ryosuke Shibasaki and Noboru Koshizuka and Chuan Xiao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=1iHmhMHNyA}\n}", "github": "", "reviewers": "xLxx;7xtG;c5UM;abCw", "pdf_size": 4371463, "rating": "4;5;6;6", "confidence": "4;3;4;4", "soundness": "2;3;3;3", "novelty": "2;3;3;2", "presentation": "2;2;4;2", "wc_summary": "47;107;117;74", "wc_strengths": "30;60;65;31", "wc_weaknesses": "108;118;81;53", "wc_questions": "78;18;148;75", "wc_limitations": "36;10;128;2", "wc_review": "299;313;539;235", "wc_reply_reviewers": "496;16;19;18", "wc_reply_authors": "1707;31;563;31", "reply_reviewers": "3;1;1;1", "reply_authors": "6;2;3;2", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 86.25, 27.6891224129621 ], "wc_strengths_avg": [ 46.5, 16.101242188104617 ], "wc_weaknesses_avg": [ 90.0, 25.288337232803585 ], "wc_questions_avg": [ 79.75, 46.088908644054484 ], "wc_limitations_avg": [ 44.0, 50.0999001995014 ], "wc_review_avg": [ 346.5, 114.96412483901229 ], "wc_reply_reviewers_avg": [ 137.25, 207.12722539540763 ], "wc_reply_authors_avg": [ 583.0, 684.3215618406306 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 3.25, 1.6393596310755 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5836374863083976789&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "u-tokyo.ac.jp;u-tokyo.ac.jp;kyoto-u.ac.jp;osaka-u.ac.jp;reitaku-u.ac.jp;iii.u-tokyo.ac.jp;osaka-u.ac.jp;u-tokyo.ac.jp", "author_num": 8, "aff_unique_index": "0;0;1;2;3;0;2;0", "aff_unique_norm": "University of Tokyo;Kyoto University;Osaka University;Reitaku University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.u-tokyo.ac.jp;https://www.kyoto-u.ac.jp;https://www.osaka-u.ac.jp;https://www.reitaku-u.ac.jp", "aff_unique_abbr": "UTokyo;Kyoto U;Osaka U;Reitaku", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "Japan" }, { "title": "FlowTurbo: Towards Real-time Flow-Based Image Generation with Velocity Refiner", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96854", "id": "1jG5ngXVs3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=1jG5ngXVs3", "openreview": "https://openreview.net/forum?id=1jG5ngXVs3", "poster": "", "project": "", "author_site": "Wenliang Zhao, Minglei Shi, Xumin Yu, Jie Zhou, Jiwen Lu", "tldr": "", "abstract": "Building on the success of diffusion models in visual generation, flow-based models reemerge as another prominent family of generative models that have achieved competitive or better performance in terms of both visual quality and inference speed. By learning the velocity field through flow-matching, flow-based models tend to produce a straighter sampling trajectory, which is advantageous during the sampling process. However, unlike diffusion models for which fast samplers are well-developed, efficient sampling of flow-based generative models has been rarely explored. In this paper, we propose a framework called FlowTurbo to accelerate the sampling of flow-based models while still enhancing the sampling quality. Our primary observation is that the velocity predictor's outputs in the flow-based models will become stable during the sampling, enabling the estimation of velocity via a lightweight velocity refiner. Additionally, we introduce several techniques including a pseudo corrector and sample-aware compilation to further reduce inference time. Since FlowTurbo does not change the multi-step sampling paradigm, it can be effectively applied for various tasks such as image editing, inpainting, etc. By integrating FlowTurbo into different flow-based models, we obtain an acceleration ratio of 53.1\\%$\\sim$58.3\\% on class-conditional generation and 29.8\\%$\\sim$38.5\\% on text-to-image generation. Notably, FlowTurbo reaches an FID of 2.12 on ImageNet with 100 (ms / img) and FID of 3.93 with 38 (ms / img), achieving the real-time image generation and establishing the new state-of-the-art. Code is available at https://github.com/shiml20/FlowTurbo.", "keywords": "flow-based generation;diffusion models;efficient sampling;image generation", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/7357058ad8eea713f27f69652545c51ce34327a8.zip", "author": "Wenliang Zhao;Minglei Shi;Xumin Yu;Jie Zhou;Jiwen Lu", "authorids": "~Wenliang_Zhao2;~Minglei_Shi1;~Xumin_Yu2;~Jie_Zhou3;~Jiwen_Lu1", "gender": "M;M;M;M;M", "homepage": "https://thu-jw.github.io;https://github.com/shiml20;https://yuxumin.github.io/;https://www.tsinghua.edu.cn/publish/auen/1713/2011/20110506105532098625469/20110506105532098625469_.html;http://ivg.au.tsinghua.edu.cn/Jiwen_Lu/", "dblp": ";;237/0070;00/5012-1;http://dblp.uni-trier.de/pers/hd/l/Lu:Jiwen", "google_scholar": ";;zfDZMZAAAAAJ;;TN8uDQoAAAAJ", "orcid": ";;;;0000-0002-6121-5529", "linkedin": ";;;;", "or_profile": "~Wenliang_Zhao2;~Minglei_Shi1;~Xumin_Yu2;~Jie_Zhou3;~Jiwen_Lu1", "aff": "Tsinghua University;Tsinghua University;Department of Automation, Tsinghua University, Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "mails.tsinghua.edu.cn;mails.tsinghua.edu.cn;mails.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;Undergrad student;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nzhao2024flowturbo,\ntitle={FlowTurbo: Towards Real-time Flow-Based Image Generation with Velocity Refiner},\nauthor={Wenliang Zhao and Minglei Shi and Xumin Yu and Jie Zhou and Jiwen Lu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=1jG5ngXVs3}\n}", "github": "", "reviewers": "yH4R;RYd7;Q4Pt;PyDm", "pdf_size": 37807087, "rating": "5;6;7;7", "confidence": "4;4;4;5", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "98;78;114;129", "wc_strengths": "124;49;122;93", "wc_weaknesses": "232;129;74;393", "wc_questions": "105;37;40;147", "wc_limitations": "29;1;9;16", "wc_review": "588;294;359;778", "wc_reply_reviewers": "86;63;18;24", "wc_reply_authors": "107;59;31;27", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 104.75, 18.93904696651867 ], "wc_strengths_avg": [ 97.0, 30.306764921383476 ], "wc_weaknesses_avg": [ 207.0, 121.44340245562951 ], "wc_questions_avg": [ 82.25, 46.21349911010851 ], "wc_limitations_avg": [ 13.75, 10.280442597476044 ], "wc_review_avg": [ 504.75, 191.8689331288419 ], "wc_reply_reviewers_avg": [ 47.75, 28.039035290109393 ], "wc_reply_authors_avg": [ 56.0, 31.921779399024736 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3701908058204140236&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "mails.tsinghua.edu.cn;mails.tsinghua.edu.cn;mails.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Does Egalitarian Fairness Lead to Instability? The Fairness Bounds in Stable Federated Learning Under Altruistic Behaviors", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96853", "id": "1kyc4TSOFZ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=1kyc4TSOFZ", "openreview": "https://openreview.net/forum?id=1kyc4TSOFZ", "poster": "/media/PosterPDFs/NeurIPS%202024/96853.png?t=1733364442.5641255", "project": "", "author_site": "Jiashi Gao, Ziwei Wang, Xiangyu Zhao, Xin Yao, Xuetao Wei", "tldr": "", "abstract": "Federated learning (FL) offers a machine learning paradigm that protects privacy, allowing multiple clients to collaboratively train a global model while only accessing their local data. Recent research in FL has increasingly focused on improving the uniformity of model performance across clients, a fairness principle known as egalitarian fairness. However, achieving egalitarian fairness in FL may sacrifice the model performance for data-rich clients to benefit those with less data. This trade-off raises concerns about the stability of FL, as data-rich clients may opt to leave the current coalition and join another that is more closely aligned with its expected high performance. In this context, our work rigorously addresses the critical concern: **Does egalitarian fairness lead to instability?** Drawing from game theory and social choice theory, we initially characterize fair FL systems as altruism coalition formation games (ACFGs) and reveal that the instability issues emerging from the pursuit of egalitarian fairness are significantly related to the clients\u2019 altruism within the coalition and the configuration of the friends-relationship networks among the clients. Then, we theoretically propose the optimal egalitarian fairness bounds that an FL coalition can achieve while maintaining core stability under various types of altruistic behaviors. The theoretical contributions clarify the quantitative relationships between achievable egalitarian fairness and the disparities in the sizes of local datasets, disproving the misconception that egalitarian fairness inevitably leads to instability. Finally, we conduct experiments to evaluate the consistency of our theoretically derived egalitarian fairness bounds with the empirically achieved egalitarian fairness in fair FL settings.", "keywords": "Fairness;Stability;Federated Learning;Altruism", "primary_area": "fairness", "supplementary_material": "/attachment/bf395f153b23afa09a8db76eaf1c045f60a722c0.zip", "author": "Jiashi Gao;Ziwei Wang;Xiangyu Zhao;Xin Yao;Xuetao Wei", "authorids": "~Jiashi_Gao1;~Ziwei_Wang3;~Xiangyu_Zhao1;~Xin_Yao1;~Xuetao_Wei2", "gender": "F;F;M;;M", "homepage": ";https://scholar.google.com/citations?hl=en&user=9prBQQwAAAAJ;https://zhaoxyai.github.io/;http://www.cs.bham.ac.uk/~xin;https://cse.sustech.edu.cn/faculty/~weixt/", "dblp": "221/1810;;08/890-1.html;;09/5916", "google_scholar": ";;;;8fNwEScAAAAJ", "orcid": ";;0000-0003-2926-4416;;0000-0002-4450-2251", "linkedin": ";;;;", "or_profile": "~Jiashi_Gao1;~Ziwei_Wang3;~Xiangyu_Zhao1;~Xin_Yao1;~Xuetao_Wei2", "aff": "Southern University of Science and Technology;University of Birmingham;City University of Hong Kong;;Southern University of Science and Technology", "aff_domain": "sustech.edu.cn;bham.ac.uk;cityu.edu.hk;;sustech.edu.cn", "position": "PhD student;PhD student;Assistant Professor;;Associate Professor", "bibtex": "@inproceedings{\ngao2024does,\ntitle={Does Egalitarian Fairness Lead to Instability? The Fairness Bounds in Stable Federated Learning Under Altruistic Behaviors},\nauthor={Jiashi Gao and Ziwei Wang and Xiangyu Zhao and Xin Yao and Xuetao Wei},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=1kyc4TSOFZ}\n}", "github": "", "reviewers": "X2VX;r5t6;CtFP;LduT", "pdf_size": 824075, "rating": "5;5;6;8", "confidence": "4;3;4;5", "soundness": "3;3;4;4", "novelty": "1;3;2;4", "presentation": "3;3;3;3", "wc_summary": "58;110;112;82", "wc_strengths": "32;81;149;165", "wc_weaknesses": "222;139;117;96", "wc_questions": "25;41;844;9", "wc_limitations": "2;73;505;1", "wc_review": "339;444;1727;353", "wc_reply_reviewers": "42;0;0;12", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.5, 1.118033988749895 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 90.5, 22.197972880423112 ], "wc_strengths_avg": [ 106.75, 53.45266597654415 ], "wc_weaknesses_avg": [ 143.5, 47.80428851055102 ], "wc_questions_avg": [ 229.75, 354.8178229739876 ], "wc_limitations_avg": [ 145.25, 209.74314649113091 ], "wc_review_avg": [ 715.75, 585.2355829065762 ], "wc_reply_reviewers_avg": [ 13.5, 17.168284713389397 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.8660254037844386, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:aV1oaOkXItMJ:scholar.google.com/&scioq=Does+Egalitarian+Fairness+Lead+to+Instability%3F+The+Fairness+Bounds+in+Stable+Federated+Learning+Under+Altruistic+Behaviors&hl=en&as_sdt=0,10", "gs_version_total": 0, "email": "sustech.edu.cn;bham.ac.uk;cityu.edu.hk;;sustech.edu.cn", "author_num": 5, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Southern University of Science and Technology;University of Birmingham;City University of Hong Kong", "aff_unique_dep": ";;", "aff_unique_url": "https://www.sustech.edu.cn;https://www.birmingham.ac.uk;https://www.cityu.edu.hk", "aff_unique_abbr": "SUSTech;Birmingham;CityU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "China;United Kingdom" }, { "title": "Unlocking the Capabilities of Masked Generative Models for Image Synthesis via Self-Guidance", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96852", "id": "1l9cEyFmxg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=1l9cEyFmxg", "openreview": "https://openreview.net/forum?id=1l9cEyFmxg", "poster": "/media/PosterPDFs/NeurIPS%202024/96852.png?t=1733112902.3564353", "project": "", "author_site": "Jiwan Hur, DongJae Lee, Gyojin Han, Jaehyun Choi, Yunho Jeon, Junmo Kim", "tldr": "", "abstract": "Masked generative models (MGMs) have shown impressive generative ability while providing an order of magnitude efficient sampling steps compared to continuous diffusion models. However, MGMs still underperform in image synthesis compared to recent well-developed continuous diffusion models with similar size in terms of quality and diversity of generated samples. A key factor in the performance of continuous diffusion models stems from the guidance methods, which enhance the sample quality at the expense of diversity. In this paper, we extend these guidance methods to generalized guidance formulation for MGMs and propose a self-guidance sampling method, which leads to better generation quality. The proposed approach leverages an auxiliary task for semantic smoothing in vector-quantized token space, analogous to the Gaussian blur in continuous pixel space. Equipped with the parameter-efficient fine-tuning method and high-temperature sampling, MGMs with the proposed self-guidance achieve a superior quality-diversity trade-off, outperforming existing sampling methods in MGMs with more efficient training and sampling costs. Extensive experiments with the various sampling hyperparameters confirm the effectiveness of the proposed self-guidance.", "keywords": "Image synthesis;discrete diffusion models;masked generative models;sampling guidance;parameter- efficient fine-tuning", "primary_area": "generative_models", "supplementary_material": "", "author": "Jiwan Hur;Dong-Jae Lee;Gyojin Han;Jaehyun Choi;Yunho Jeon;Junmo Kim", "authorids": "~Jiwan_Hur1;~Dong-Jae_Lee1;~Gyojin_Han1;~Jaehyun_Choi1;~Yunho_Jeon1;~Junmo_Kim1", "gender": "M;M;M;M;M;M", "homepage": ";;;;https://effailab.hanbat.ac.kr;https://siit.kaist.ac.kr/Faculty", "dblp": "337/9896;121/1605;334/7869;62/6980;126/4768;40/240-2.html", "google_scholar": "3jag3z4AAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.co.kr/citations?hl=en;;-FEJAZAAAAAJ;https://scholar.google.com.tw/citations?user=GdQtWNQAAAAJ", "orcid": ";;;0000-0002-9183-761X;0000-0001-8043-480X;", "linkedin": ";;;;yh-jeon;", "or_profile": "~Jiwan_Hur1;~Dong-Jae_Lee1;~Gyojin_Han1;~Jaehyun_Choi1;~Yunho_Jeon1;~Junmo_Kim1", "aff": "KAIST;KAIST, Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Hanbat National University;Korea Advanced Institute of Science & Technology", "aff_domain": "ee.kaist.ac.kr;ee.kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;hanbat.ac.kr;kaist.ac.kr", "position": "PhD student;MS student;PhD student;PhD student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nhur2024unlocking,\ntitle={Unlocking the Capabilities of Masked Generative Models for Image Synthesis via Self-Guidance},\nauthor={Jiwan Hur and Dong-Jae Lee and Gyojin Han and Jaehyun Choi and Yunho Jeon and Junmo Kim},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=1l9cEyFmxg}\n}", "github": "", "reviewers": "qSSj;cCTZ;Lc5P", "pdf_size": 4944883, "rating": "6;7;7", "confidence": "4;3;3", "soundness": "3;3;3", "novelty": "3;3;4", "presentation": "3;3;3", "wc_summary": "79;66;82", "wc_strengths": "173;71;172", "wc_weaknesses": "111;63;106", "wc_questions": "63;153;33", "wc_limitations": "107;18;23", "wc_review": "533;371;416", "wc_reply_reviewers": "0;36;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;1;0", "reply_authors": "1;1;1", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 75.66666666666667, 6.944222218666553 ], "wc_strengths_avg": [ 138.66666666666666, 47.84930047091133 ], "wc_weaknesses_avg": [ 93.33333333333333, 21.545816814923082 ], "wc_questions_avg": [ 83.0, 50.99019513592785 ], "wc_limitations_avg": [ 49.333333333333336, 40.827550610940264 ], "wc_review_avg": [ 440.0, 68.27884006044626 ], "wc_reply_reviewers_avg": [ 12.0, 16.97056274847714 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.9999999999999997, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:tZV4EMClbgUJ:scholar.google.com/&scioq=Unlocking+the+Capabilities+of+Masked+Generative+Models+for+Image+Synthesis+via+Self-Guidance&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "ee.kaist.ac.kr;ee.kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;hanbat.ac.kr;kaist.ac.kr", "author_num": 6, "aff_unique_index": "0;0;0;0;1;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Hanbat National University", "aff_unique_dep": ";", "aff_unique_url": "https://www.kaist.ac.kr;https://www.hanbat.ac.kr", "aff_unique_abbr": "KAIST;HNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Theoretical and Empirical Insights into the Origins of Degree Bias in Graph Neural Networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96851", "id": "1mAaewThcz", "proceeding": "", "pdf": "https://openreview.net/pdf?id=1mAaewThcz", "openreview": "https://openreview.net/forum?id=1mAaewThcz", "poster": "/media/PosterPDFs/NeurIPS%202024/96851.png?t=1733176236.3320782", "project": "", "author_site": "Arjun Subramonian, Jian Kang, Yizhou Sun", "tldr": "", "abstract": "Graph Neural Networks (GNNs) often perform better for high-degree nodes than low-degree nodes on node classification tasks. This degree bias can reinforce social marginalization by, e.g., privileging celebrities and other high-degree actors in social networks during social and content recommendation. While researchers have proposed numerous hypotheses for why GNN degree bias occurs, we find via a survey of 38 degree bias papers that these hypotheses are often not rigorously validated, and can even be contradictory. Thus, we provide an analysis of the origins of degree bias in message-passing GNNs with different graph filters. We prove that high-degree test nodes tend to have a lower probability of misclassification regardless of how GNNs are trained. Moreover, we show that degree bias arises from a variety of factors that are associated with a node's degree (e.g., homophily of neighbors, diversity of neighbors). Furthermore, we show that during training, some GNNs may adjust their loss on low-degree nodes more slowly than on high-degree nodes; however, with sufficiently many epochs of training, message-passing GNNs can achieve their maximum possible training accuracy, which is not significantly limited by their expressive power. Throughout our analysis, we connect our findings to previously-proposed hypotheses for the origins of degree bias, supporting and unifying some while drawing doubt to others. We validate our theoretical findings on 8 common real-world networks, and based on our theoretical and empirical insights, describe a roadmap to alleviate degree bias.", "keywords": "graph learning;fairness;degree", "primary_area": "fairness", "supplementary_material": "/attachment/05e6e191b20d8543f544406af6d271ed32caaf29.zip", "author": "Arjun Subramonian;Jian Kang;Yizhou Sun", "authorids": "~Arjun_Subramonian1;~Jian_Kang1;~Yizhou_Sun1", "gender": "Agender;M;F", "homepage": "http://arjunsubramonian.github.io/;https://jiank2.github.io/;http://web.cs.ucla.edu/~yzsun/", "dblp": "282/0168.html;56/6072-8;37/3868", "google_scholar": "MrdlDhoAAAAJ;U_jFlOQAAAAJ;https://scholar.google.com.tw/citations?user=TQgOjK0AAAAJ", "orcid": "0000-0002-0415-3800;0000-0003-3902-7131;", "linkedin": "arjuns22/;jiank2/;", "or_profile": "~Arjun_Subramonian1;~Jian_Kang1;~Yizhou_Sun1", "aff": "University of California, Los Angeles;University of Rochester;University of California, Los Angeles", "aff_domain": "ucla.edu;cs.rochester.edu;ucla.edu", "position": "PhD student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nsubramonian2024theoretical,\ntitle={Theoretical and Empirical Insights into the Origins of Degree Bias in Graph Neural Networks},\nauthor={Arjun Subramonian and Jian Kang and Yizhou Sun},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=1mAaewThcz}\n}", "github": "", "reviewers": "Ao8B;Xp6E;USpG;Vv7z", "pdf_size": 3495577, "rating": "5;6;7;8", "confidence": "4;4;3;4", "soundness": "3;3;4;3", "novelty": "3;2;3;4", "presentation": "3;3;3;4", "wc_summary": "43;77;102;145", "wc_strengths": "34;8;80;146", "wc_weaknesses": "28;257;63;139", "wc_questions": "394;68;6;78", "wc_limitations": "35;6;1;31", "wc_review": "534;416;252;539", "wc_reply_reviewers": "149;13;9;59", "wc_reply_authors": "600;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 91.75, 37.19795021234369 ], "wc_strengths_avg": [ 67.0, 52.392747589718944 ], "wc_weaknesses_avg": [ 121.75, 87.79343654283046 ], "wc_questions_avg": [ 136.5, 151.20433194852586 ], "wc_limitations_avg": [ 18.25, 14.922717580923388 ], "wc_review_avg": [ 435.25, 116.69056302889278 ], "wc_reply_reviewers_avg": [ 57.5, 56.36266494764065 ], "wc_reply_authors_avg": [ 150.0, 259.8076211353316 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.2581988897471611, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10283197987626746843&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "ucla.edu;cs.rochester.edu;ucla.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of California, Los Angeles;University of Rochester", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucla.edu;https://www.rochester.edu", "aff_unique_abbr": "UCLA;U of R", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "A Synthetic Dataset for Personal Attribute Inference", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97872", "id": "1nqfIQIQBf", "proceeding": "", "pdf": "https://openreview.net/pdf?id=1nqfIQIQBf", "openreview": "https://openreview.net/forum?id=1nqfIQIQBf", "poster": "/media/PosterPDFs/NeurIPS%202024/97872.png?t=1733433196.188967", "project": "", "author_site": "Hanna Yukhymenko, Robin Staab, Mark Vero, Martin Vechev", "tldr": "", "abstract": "Recently powerful Large Language Models (LLMs) have become easily accessible to hundreds of millions of users world-wide. However, their strong capabilities and vast world knowledge do not come without associated privacy risks. In this work, we focus on the emerging privacy threat LLMs pose \u2013 the ability to accurately infer personal information from online texts. Despite the growing importance of LLM-based author profiling, research in this area has been hampered by a lack of suitable public datasets, largely due to ethical and privacy concerns associated with real personal data. We take two steps to address this problem: (i) we construct a simulation framework for the popular social media platform Reddit using LLM agents seeded with synthetic personal profiles; (ii) using this framework, we generate *SynthPAI*, a diverse synthetic dataset of over 7800 comments manually labeled for personal attributes. We validate our dataset with a human study showing that humans barely outperform random guessing on the task of distinguishing our synthetic comments from real ones. Further, we verify that our dataset enables meaningful personal attribute inference research by showing across 18 state-of-the-art LLMs that our synthetic comments allow us to draw the same conclusions as real-world data. Combined, our experimental results, dataset and pipeline form a strong basis for future privacy-preserving research geared towards understanding and mitigating inference-based privacy threats that LLMs pose.", "keywords": "privacy;synthetic data;large language models;social media", "primary_area": "", "supplementary_material": "", "author": "Hanna Yukhymenko;Robin Staab;Mark Vero;Martin Vechev", "authorids": "~Hanna_Yukhymenko1;~Robin_Staab1;~Mark_Vero1;~Martin_Vechev1", "gender": "F;M;M;M", "homepage": "https://ayukh.com;;https://www.sri.inf.ethz.ch/people/markvero;https://www.sri.inf.ethz.ch/people/martin", "dblp": ";304/3512;319/4985;93/2189.html", "google_scholar": "https://scholar.google.com/citations?hl=en;;vguDYtQAAAAJ;https://scholar.google.ch/citations?user=aZ1Rh50AAAAJ", "orcid": "0000-0003-0485-3273;;;", "linkedin": "hanna-yukhymenko/;robin-staab-b778a51a6/;https://linkedin.com/in/mark-vero-9a32bb17a;", "or_profile": "~Hanna_Yukhymenko1;~Robin_Staab1;~Mark_Vero1;~Martin_Vechev1", "aff": "ETHZ - ETH Zurich;ETHZ - ETH Zurich;ETHZ-ETH Zurich;Swiss Federal Institute of Technology", "aff_domain": "ethz.ch;ethz.ch;inf.ethz.ch;ethz.ch", "position": "MS student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nyukhymenko2024a,\ntitle={A Synthetic Dataset for Personal Attribute Inference},\nauthor={Hanna Yukhymenko and Robin Staab and Mark Vero and Martin Vechev},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=1nqfIQIQBf}\n}", "github": "", "reviewers": "RgP5;TQdq;DQhN;K8Kg", "pdf_size": 4799401, "rating": "7;7;7;8", "confidence": "3;4;3;5", "wc_summary_and_contributions": "93;105;38;96", "wc_strengths": "78;68;99;50", "wc_improvement": "91;139;56;225", "wc_limitations": "9;29;7;10", "wc_correctness": "2;17;1;8", "wc_clarity": "5;20;1;6", "wc_relation_to_prior_work": "8;22;1;10", "wc_documentation": "5;39;1;35", "wc_additional_feedback": "1;1;1;1", "wc_review": "292;440;205;441", "wc_reply_reviewers": "0;0;0;19", "wc_reply_authors": "0;0;0;41", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;2", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "wc_summary_and_contributions_avg": [ 83.0, 26.353367906208877 ], "wc_strengths_avg": [ 73.75, 17.69710428290459 ], "wc_improvement_avg": [ 127.75, 63.408891332367574 ], "wc_limitations_avg": [ 13.75, 8.870597499605086 ], "wc_correctness_avg": [ 7.0, 6.363961030678928 ], "wc_clarity_avg": [ 8.0, 7.176350047203662 ], "wc_relation_to_prior_work_avg": [ 10.25, 7.562241731127087 ], "wc_documentation_avg": [ 20.0, 17.11724276862369 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 344.5, 100.8079857947772 ], "wc_reply_reviewers_avg": [ 4.75, 8.227241335952167 ], "wc_reply_authors_avg": [ 10.25, 17.75352077758099 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1582891935203599880&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "ethz.ch;ethz.ch;inf.ethz.ch;ethz.ch", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "ETH Zurich;Swiss Federal Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.ethz.ch;https://www.ethz.ch", "aff_unique_abbr": "ETHZ;ETH Zurich", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Switzerland" }, { "title": "ANT: Adaptive Noise Schedule for Time Series Diffusion Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96850", "id": "1ojAkTylz4", "proceeding": "", "pdf": "https://openreview.net/pdf?id=1ojAkTylz4", "openreview": "https://openreview.net/forum?id=1ojAkTylz4", "poster": "/media/PosterPDFs/NeurIPS%202024/96850.png?t=1730347906.1667516", "project": "", "author_site": "Seunghan Lee, Kibok Lee, Taeyoung Park", "tldr": "", "abstract": "Advances in diffusion models for generative artificial intelligence have recently propagated to the time series (TS) domain, demonstrating state-of-the-art performance on various tasks. However, prior works on TS diffusion models often borrow the framework of existing works proposed in other domains without considering the characteristics of TS data, leading to suboptimal performance. In this work, we\npropose Adaptive Noise schedule for Time series diffusion models (ANT), which automatically predetermines proper noise schedules for given TS datasets based on their statistics representing non-stationarity. Our intuition is that an optimal noise schedule should satisfy the following desiderata: 1) It linearly reduces the non-stationarity of TS data so that all diffusion steps are equally meaningful, 2) the data is corrupted to the random noise at the final step, and 3) the number of steps is sufficiently large. The proposed method is practical for use in that it eliminates the necessity of finding the optimal noise schedule with a small additional cost to compute the statistics for given datasets, which can be done offline before training. We validate the effectiveness of our method across various tasks, including TS forecasting, refinement, and generation, on datasets from diverse domains. Code is available at this repository: https://github.com/seunghan96/ANT.", "keywords": "Diffusion Model;Noise Schedule;Time Series", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/678aa9564917226a276b47781371973cac726f61.zip", "author": "Seunghan Lee;Kibok Lee;Taeyoung Park", "authorids": "~Seunghan_Lee1;~Kibok_Lee1;~Taeyoung_Park1", "gender": "M;M;M", "homepage": "https://seunghan96.github.io/;https://ml.yonsei.ac.kr/;https://web.yonsei.ac.kr/tpark", "dblp": ";157/3147;", "google_scholar": ";6wwWRdEAAAAJ;https://scholar.google.co.kr/citations?hl=en", "orcid": ";0000-0001-6995-7327;0000-0001-7405-0746", "linkedin": ";;taeyoung-park-27a93b14/?originalSubdomain=kr", "or_profile": "~Seunghan_Lee1;~Kibok_Lee1;~Taeyoung_Park1", "aff": "Yonsei University;Yonsei University;Yonsei University", "aff_domain": "yonsei.ac.kr;yonsei.ac.kr;yonsei.ac.kr", "position": "PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nlee2024ant,\ntitle={{ANT}: Adaptive Noise Schedule for Time Series Diffusion Models},\nauthor={Seunghan Lee and Kibok Lee and Taeyoung Park},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=1ojAkTylz4}\n}", "github": "", "reviewers": "eJYD;YUJG;xQJK", "pdf_size": 7868686, "rating": "6;6;7", "confidence": "2;4;4", "soundness": "2;3;3", "novelty": "3;2;3", "presentation": "3;3;2", "wc_summary": "48;35;169", "wc_strengths": "50;71;178", "wc_weaknesses": "45;67;51", "wc_questions": "84;32;72", "wc_limitations": "1;23;87", "wc_review": "228;228;557", "wc_reply_reviewers": "22;12;0", "wc_reply_authors": "22;22;0", "reply_reviewers": "1;1;0", "reply_authors": "2;2;1", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 84.0, 60.33793720924396 ], "wc_strengths_avg": [ 99.66666666666667, 56.04958122551458 ], "wc_weaknesses_avg": [ 54.333333333333336, 9.285592184789413 ], "wc_questions_avg": [ 62.666666666666664, 22.23110933404409 ], "wc_limitations_avg": [ 37.0, 36.478304054145205 ], "wc_review_avg": [ 337.6666666666667, 155.09208734024944 ], "wc_reply_reviewers_avg": [ 11.333333333333334, 8.993825042154695 ], "wc_reply_authors_avg": [ 14.666666666666666, 10.370899457402697 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16381944728765153166&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "yonsei.ac.kr;yonsei.ac.kr;yonsei.ac.kr", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Yonsei University", "aff_unique_dep": "", "aff_unique_url": "https://www.yonsei.ac.kr", "aff_unique_abbr": "Yonsei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "Sample-Efficient Constrained Reinforcement Learning with General Parameterization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96849", "id": "1po4j1Tv7O", "proceeding": "", "pdf": "https://openreview.net/pdf?id=1po4j1Tv7O", "openreview": "https://openreview.net/forum?id=1po4j1Tv7O", "poster": "/media/PosterPDFs/NeurIPS%202024/96849.png?t=1731760567.2895951", "project": "", "author_site": "Washim Mondal, Vaneet Aggarwal", "tldr": "", "abstract": "We consider a constrained Markov Decision Problem (CMDP) where the goal of an agent is to maximize the expected discounted sum of rewards over an infinite horizon while ensuring that the expected discounted sum of costs exceeds a certain threshold. Building on the idea of momentum-based acceleration, we develop the Primal-Dual Accelerated Natural Policy Gradient (PD-ANPG) algorithm that ensures an $\\epsilon$ global optimality gap and $\\epsilon$ constraint violation with $\\tilde{\\mathcal{O}}((1-\\gamma)^{-7}\\epsilon^{-2})$ sample complexity for general parameterized policies where $\\gamma$ denotes the discount factor. This improves the state-of-the-art sample complexity in general parameterized CMDPs by a factor of $\\mathcal{O}((1-\\gamma)^{-1}\\epsilon^{-2})$ and achieves the theoretical lower bound in $\\epsilon^{-1}$.", "keywords": "Constrained MDP;Sample Complexity;Constraint Violation;Global Optimality.", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Washim Uddin Mondal;Vaneet Aggarwal", "authorids": "~Washim_Uddin_Mondal1;~Vaneet_Aggarwal1", "gender": "M;M", "homepage": "https://home.iitk.ac.in/~wmondal/;", "dblp": "201/9517.html;91/6560", "google_scholar": "https://scholar.google.co.in/citations?user=CQwhdyIAAAAJ;", "orcid": "0000-0002-2385-6034;", "linkedin": ";", "or_profile": "~Washim_Uddin_Mondal1;~Vaneet_Aggarwal1", "aff": "Indian Institute of Technology, Kanpur;Purdue University", "aff_domain": "iitk.ac.in;purdue.edu", "position": "Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nmondal2024sampleefficient,\ntitle={Sample-Efficient Constrained Reinforcement Learning with General Parameterization},\nauthor={Washim Uddin Mondal and Vaneet Aggarwal},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=1po4j1Tv7O}\n}", "github": "", "reviewers": "NWcj;rE2r;c2f7;jftD", "pdf_size": 434015, "rating": "6;6;6;6", "confidence": "4;4;2;3", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "44;48;95;35", "wc_strengths": "56;12;63;61", "wc_weaknesses": "91;150;142;229", "wc_questions": "51;172;72;64", "wc_limitations": "14;40;15;20", "wc_review": "256;422;387;409", "wc_reply_reviewers": "18;280;11;17", "wc_reply_authors": "36;330;0;16", "reply_reviewers": "1;3;1;1", "reply_authors": "2;3;1;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 55.5, 23.286262044390035 ], "wc_strengths_avg": [ 48.0, 20.940391591371924 ], "wc_weaknesses_avg": [ 153.0, 49.371044145328746 ], "wc_questions_avg": [ 89.75, 48.07481149209012 ], "wc_limitations_avg": [ 22.25, 10.497023387608508 ], "wc_review_avg": [ 368.5, 66.14567257198313 ], "wc_reply_reviewers_avg": [ 81.5, 114.6352912501207 ], "wc_reply_authors_avg": [ 95.5, 135.98805094566214 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13267810430513833942&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "iitk.ac.in;purdue.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Indian Institute of Technology Kanpur;Purdue University", "aff_unique_dep": ";", "aff_unique_url": "https://www.iitk.ac.in;https://www.purdue.edu", "aff_unique_abbr": "IIT Kanpur;Purdue", "aff_campus_unique_index": "0", "aff_campus_unique": "Kanpur;", "aff_country_unique_index": "0;1", "aff_country_unique": "India;United States" }, { "title": "Closed-Loop Visuomotor Control with Generative Expectation for Robotic Manipulation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96848", "id": "1ptdkwZbMG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=1ptdkwZbMG", "openreview": "https://openreview.net/forum?id=1ptdkwZbMG", "poster": "/media/PosterPDFs/NeurIPS%202024/96848.png?t=1731477274.9188144", "project": "", "author_site": "Qingwen Bu, Jia Zeng, Li Chen, Yanchao Yang, Guyue Zhou, Junchi Yan, Ping Luo, Heming Cui, Yi Ma, Hongyang Li", "tldr": "", "abstract": "Despite significant progress in robotics and embodied AI in recent years, deploying robots for long-horizon tasks remains a great challenge. Majority of prior arts adhere to an open-loop philosophy and lack real-time feedback, leading to error accumulation and undesirable robustness. A handful of approaches have endeavored to establish feedback mechanisms leveraging pixel-level differences or pre-trained visual representations, yet their efficacy and adaptability have been found to be constrained. Inspired by classic closed-loop control systems, we propose CLOVER, a closed-loop visuomotor control framework that incorporates feedback mechanisms to improve adaptive robotic control. CLOVER consists of a text-conditioned video diffusion model for generating visual plans as reference inputs, a measurable embedding space for accurate error quantification, and a feedback-driven controller that refines actions from feedback and initiates replans as needed. Our framework exhibits notable advancement in real-world robotic tasks and achieves state-of-the-art on CALVIN benchmark, improving by 8% over previous open-loop counterparts. Code and checkpoints are maintained at https://github.com/OpenDriveLab/CLOVER.", "keywords": "Robotic Manipulation;Visuomotor Control", "primary_area": "robotics", "supplementary_material": "", "author": "Qingwen Bu;Jia Zeng;Li Chen;Yanchao Yang;Guyue Zhou;Junchi Yan;Ping Luo;Heming Cui;Yi Ma;Hongyang Li", "authorids": "~Qingwen_Bu1;~Jia_Zeng2;~Li_Chen15;~Yanchao_Yang1;~Guyue_Zhou2;~Junchi_Yan2;~Ping_Luo2;~Heming_Cui1;~Yi_Ma4;~Hongyang_Li1", "gender": "M;M;M;M;M;M;M;M;M;", "homepage": "https://github.com/retsuh-bqw;;https://ilnehc.github.io/;https://yanchaoyang.github.io/;https://air.tsinghua.edu.cn/en/info/1046/1196.htm;https://www.cs.hku.hk/people/academic-staff/heming;http://people.eecs.berkeley.edu/~yima/;https://datascience.hku.hk/people/hongyang-li/;http://thinklab.sjtu.edu.cn/;http://luoping.me/", "dblp": "326/7970;;181/2847;84/8637-1;133/4199;59/5565.html;;95/8433-1;60/7949.html;54/4989-2.html", "google_scholar": "https://scholar.google.com.hk/citations?user=-JCRysgAAAAJ;;ulZxvY0AAAAJ;r2tKnV4AAAAJ;;lW9bpFIAAAAJ;https://scholar.google.com.hk/citations?user=XqLiBQMAAAAJ;https://scholar.google.com.hk/citations?user=Hfrih1EAAAAJ;ga230VoAAAAJ;https://scholar.google.com.hk/citations?hl=en", "orcid": ";0000-0003-0682-4898;;;;0000-0001-7746-440X;;0000-0001-9110-5534;0000-0001-9639-7679;0000-0002-6685-7950", "linkedin": ";;;;;;;hongyangli2020/;;", "or_profile": "~Qingwen_Bu1;~Jia_Zeng2;~Li_Chen15;~Yanchao_Yang1;~Guyue_Zhou2;~Heming_Cui1;~Yi_Ma4;~Hongyang_Li1;~Junchi_Yan1;~Luo_Ping2", "aff": "Shanghai Jiaotong University;Shanghai Artificial Intelligence Laboratory;Shanghai AI Laboratory;University of Hong Kong;Tsinghua University;the University of Hong Kong, University of Hong Kong;University of California, Berkeley;Shanghai AI Lab;Shanghai Jiaotong University;The University of Hong Kong", "aff_domain": "sjtu.edu.cn;pjlab.org.cn;pjlab.org.cn;hku.hk;tsinghua.edu.cn;cs.hku.hk;berkeley.edu;pjlab.org.cn;sjtu.edu.cn;hku.hk", "position": "PhD student;Researcher;Researcher;Assistant Professor;Associate Professor;Associate Professor;Full Professor;Researcher;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nbu2024closedloop,\ntitle={Closed-Loop Visuomotor Control with Generative Expectation for Robotic Manipulation},\nauthor={Qingwen Bu and Jia Zeng and Li Chen and Yanchao Yang and Guyue Zhou and Junchi Yan and Ping Luo and Heming Cui and Yi Ma and Hongyang Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=1ptdkwZbMG}\n}", "github": "", "reviewers": "DjFL;hKyy;eUsr;hfKW", "pdf_size": 4848288, "rating": "5;6;7;7", "confidence": "4;4;4;3", "soundness": "3;2;3;3", "novelty": "2;2;3;3", "presentation": "3;3;3;4", "wc_summary": "82;67;86;79", "wc_strengths": "160;47;82;65", "wc_weaknesses": "279;170;105;2", "wc_questions": "102;58;48;118", "wc_limitations": "49;54;10;6", "wc_review": "672;396;331;270", "wc_reply_reviewers": "18;15;23;0", "wc_reply_authors": "29;20;18;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 78.5, 7.088723439378913 ], "wc_strengths_avg": [ 88.5, 43.09582346353298 ], "wc_weaknesses_avg": [ 139.0, 100.6056658444245 ], "wc_questions_avg": [ 81.5, 29.270292106502797 ], "wc_limitations_avg": [ 29.75, 21.867498713844707 ], "wc_review_avg": [ 417.25, 153.68047208412656 ], "wc_reply_reviewers_avg": [ 14.0, 8.573214099741124 ], "wc_reply_authors_avg": [ 16.75, 10.520812706250407 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10605310267979717764&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "sjtu.edu.cn;pjlab.org.cn;pjlab.org.cn;hku.hk;tsinghua.edu.cn;cs.hku.hk;berkeley.edu;pjlab.org.cn;sjtu.edu.cn;hku.hk", "author_num": 10, "aff_unique_index": "0;1;2;3;4;3;5;6;0;3", "aff_unique_norm": "Shanghai Jiao Tong University;Shanghai Artificial Intelligence Laboratory;Shanghai AI Laboratory;University of Hong Kong;Tsinghua University;University of California, Berkeley;Shanghai AI Lab", "aff_unique_dep": ";;;;;;", "aff_unique_url": "https://www.sjtu.edu.cn;http://www.shailab.org/;https://www.shanghai-ai-lab.com;https://www.hku.hk;https://www.tsinghua.edu.cn;https://www.berkeley.edu;https://www.shanghaiailab.com", "aff_unique_abbr": "SJTU;Shanghai AI Lab;SAIL;HKU;THU;UC Berkeley;SAIL", "aff_campus_unique_index": "1;1;2;1", "aff_campus_unique": ";Hong Kong SAR;Berkeley", "aff_country_unique_index": "0;0;0;0;0;0;1;0;0;0", "aff_country_unique": "China;United States" }, { "title": "FindingEmo: An Image Dataset for Emotion Recognition in the Wild", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97871", "id": "1q3b2Z95ec", "proceeding": "", "pdf": "https://openreview.net/pdf?id=1q3b2Z95ec", "openreview": "https://openreview.net/forum?id=1q3b2Z95ec", "poster": "/media/PosterPDFs/NeurIPS%202024/97871.png?t=1731426539.5412588", "project": "", "author_site": "Laurent Mertens, Elahe Yargholi, Hans Op de Beeck, Jan Van den Stock, Joost Vennekens", "tldr": "", "abstract": "We introduce FindingEmo, a new image dataset containing annotations for 25k images, specifically tailored to Emotion Recognition. Contrary to existing datasets, it focuses on complex scenes depicting multiple people in various naturalistic, social settings, with images being annotated as a whole, thereby going beyond the traditional focus on faces or single individuals. Annotated dimensions include Valence, Arousal and Emotion label, with annotations gathered using Prolific. Together with the annotations, we release the list of URLs pointing to the original images, as well as all associated source code.", "keywords": "Computer Vision;Dataset;Image Dataset;Emotion Recognition;Affective Computing;Social Cognition;Artificial Intelligence", "primary_area": "", "supplementary_material": "/attachment/5dd6a58988f0893cc3bee568de28e42561503c2c.zip", "author": "Laurent Mertens;Elahe Yargholi;Hans Op de Beeck;Jan Van den Stock;Joost Vennekens", "authorids": "~Laurent_Mertens1;~Elahe_Yargholi1;~Hans_Op_de_Beeck1;~Jan_Van_den_Stock1;~Joost_Vennekens1", "gender": "M;F;;Not Specified;M", "homepage": "https://www.laurentmertens.com;;https://www.hoplab.be/;;", "dblp": "143/3572;;;;01/5707.html", "google_scholar": ";SR5fXbYAAAAJ;tbpr76wAAAAJ;https://scholar.google.be/citations?view_op=list_works;https://scholar.google.be/citations?hl=en", "orcid": "0000-0001-5175-2673;;;;", "linkedin": "laurent-mertens/;;;;", "or_profile": "~Laurent_Mertens1;~Elahe_Yargholi1;~Hans_Op_de_Beeck1;~Jan_Van_den_Stock1;~Joost_Vennekens1", "aff": "KU Leuven;KU Leuven;KU Leuven;KU Leuven;KU Leuven", "aff_domain": "kuleuven.be;kuleuven.be;kuleuven.be;kuleuven.be;kuleuven.be", "position": "PhD student;Postdoc;Principal Researcher;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nmertens2024findingemo,\ntitle={FindingEmo: An Image Dataset for Emotion Recognition in the Wild},\nauthor={Laurent Mertens and Elahe Yargholi and Hans Op de Beeck and Jan Van den Stock and Joost Vennekens},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=1q3b2Z95ec}\n}", "github": "", "reviewers": "R7Fv;qesu;sx2p", "pdf_size": 5005549, "rating": "5;7;8", "confidence": "4;4;4", "wc_summary_and_contributions": "69;198;176", "wc_strengths": "66;22;152", "wc_improvement": "101;143;937", "wc_limitations": "33;67;77", "wc_correctness": "1;41;23", "wc_clarity": "1;19;31", "wc_relation_to_prior_work": "1;59;79", "wc_documentation": "1;21;7", "wc_additional_feedback": "1;1;1", "wc_review": "274;571;1483", "wc_reply_reviewers": "0;4;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;1;0", "reply_authors": "1;2;2", "rating_avg": [ 6.666666666666667, 1.247219128924647 ], "confidence_avg": [ 4.0, 0.0 ], "wc_summary_and_contributions_avg": [ 147.66666666666666, 56.34615238761994 ], "wc_strengths_avg": [ 80.0, 53.98765290940759 ], "wc_improvement_avg": [ 393.6666666666667, 384.57711262574577 ], "wc_limitations_avg": [ 59.0, 18.83259585576738 ], "wc_correctness_avg": [ 21.666666666666668, 16.35712552851373 ], "wc_clarity_avg": [ 17.0, 12.328828005937952 ], "wc_relation_to_prior_work_avg": [ 46.333333333333336, 33.079029946814074 ], "wc_documentation_avg": [ 9.666666666666666, 8.379870059984356 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 776.0, 514.4181178768881 ], "wc_reply_reviewers_avg": [ 1.3333333333333333, 1.8856180831641267 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1267518176135819223&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "email": "kuleuven.be;kuleuven.be;kuleuven.be;kuleuven.be;kuleuven.be", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Katholieke Universiteit Leuven", "aff_unique_dep": "", "aff_unique_url": "https://www.kuleuven.be", "aff_unique_abbr": "KU Leuven", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Belgium" }, { "title": "Wasserstein Distance Rivals Kullback-Leibler Divergence for Knowledge Distillation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96847", "id": "1qfdCAXn6K", "proceeding": "", "pdf": "https://openreview.net/pdf?id=1qfdCAXn6K", "openreview": "https://openreview.net/forum?id=1qfdCAXn6K", "poster": "/media/PosterPDFs/NeurIPS%202024/96847.png?t=1733981508.0909197", "project": "", "author_site": "Jiaming Lv, Haoyuan Yang, Peihua Li", "tldr": "", "abstract": "Since pioneering work of Hinton et al., knowledge distillation based on Kullback-Leibler Divergence (KL-Div) has been predominant, and recently its variants have achieved compelling performance. However, KL-Div only compares probabilities of the corresponding category between the teacher and student while lacking a mechanism for cross-category comparison. Besides, KL-Div is problematic when applied to intermediate layers, as it cannot handle non-overlapping distributions and is unaware of geometry of the underlying manifold. To address these downsides, we propose a methodology of Wasserstein Distance (WD) based knowledge distillation. Specifically, we propose a logit distillation method called WKD-L based on discrete WD, which performs cross-category comparison of probabilities and thus can explicitly leverage rich interrelations among categories. Moreover, we introduce a feature distillation method called WKD-F, which uses a parametric method for modeling feature distributions and adopts continuous WD for transferring knowledge from intermediate layers. Comprehensive evaluations on image classification and object detection have shown (1) for logit distillation WKD-L outperforms very strong KL-Div variants; (2) for feature distillation WKD-F is superior to the KL-Div counterparts and state-of-the-art competitors.", "keywords": "Knowledge Distillation; Wasserstein Distance; Image Classification; Object Detection", "primary_area": "machine_vision", "supplementary_material": "/attachment/54496b5f1ee9911b6b7930a27d33be93d4770907.zip", "author": "Jiaming Lv;Haoyuan Yang;Peihua Li", "authorids": "~Jiaming_Lv1;~Haoyuan_Yang3;~Peihua_Li1", "gender": "M;M;M", "homepage": "https://github.com/JiamingLv;https://github.com/HaoyuanYang-2023;https://www.peihuali.org", "dblp": "163/2658;;80/5257", "google_scholar": ";2oJcB8IAAAAJ;AVweY3cAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Jiaming_Lv1;~Haoyuan_Yang3;~Peihua_Li1", "aff": "Dalian University of Technology;Dalian University of Technology;Dalian University of Technology", "aff_domain": "dlut.edu.cn;dlut.edu.cn;dlut.edu.cn", "position": "MS student;MS student;Full Professor", "bibtex": "@inproceedings{\nlv2024wasserstein,\ntitle={Wasserstein Distance Rivals Kullback-Leibler Divergence for Knowledge Distillation},\nauthor={Jiaming Lv and Haoyuan Yang and Peihua Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=1qfdCAXn6K}\n}", "github": "", "reviewers": "F2zQ;ScyW;Zit9;5GY4", "pdf_size": 5368568, "rating": "5;5;6;7", "confidence": "4;4;4;3", "soundness": "3;3;3;3", "novelty": "3;3;3;4", "presentation": "3;3;2;3", "wc_summary": "70;50;61;108", "wc_strengths": "23;40;52;45", "wc_weaknesses": "188;28;89;59", "wc_questions": "50;1;65;99", "wc_limitations": "6;6;7;5", "wc_review": "337;125;274;316", "wc_reply_reviewers": "12;0;62;14", "wc_reply_authors": "50;0;42;30", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 72.25, 21.821720830401986 ], "wc_strengths_avg": [ 40.0, 10.700467279516348 ], "wc_weaknesses_avg": [ 91.0, 60.01249869818786 ], "wc_questions_avg": [ 53.75, 35.25177300505607 ], "wc_limitations_avg": [ 6.0, 0.7071067811865476 ], "wc_review_avg": [ 263.0, 82.84020762890445 ], "wc_reply_reviewers_avg": [ 22.0, 23.706539182259394 ], "wc_reply_authors_avg": [ 30.5, 18.993419913222578 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11651736875643936360&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "dlut.edu.cn;dlut.edu.cn;dlut.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Dalian University of Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.dlut.edu.cn/", "aff_unique_abbr": "DUT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Muharaf: Manuscripts of Handwritten Arabic Dataset for Cursive Text Recognition", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97870", "id": "1s8l1tnTXW", "proceeding": "", "pdf": "https://openreview.net/pdf?id=1s8l1tnTXW", "openreview": "https://openreview.net/forum?id=1s8l1tnTXW", "poster": "/media/PosterPDFs/NeurIPS%202024/97870.png?t=1731630967.4328747", "project": "", "author_site": "Mehreen Saeed, Adrian Chan, Anupam Mijar, joseph Moukarzel, Gerges Habchi, Carlos Younes, amin elias, Chau-Wai Wong, Akram Khater", "tldr": "", "abstract": "We present the Manuscripts of Handwritten Arabic (Muharaf) dataset, which is a machine learning dataset consisting of more than 1,600 historic handwritten page images transcribed by experts in archival Arabic. Each document image is accompanied by spatial polygonal coordinates of its text lines as well as basic page elements. This dataset was compiled to advance the state of the art in handwritten text recognition (HTR), not only for Arabic manuscripts but also for cursive text in general. The Muharaf dataset includes diverse handwriting styles and a wide range of document types, including personal letters, diaries, notes, poems, church records, and legal correspondences. In this paper, we describe the data acquisition pipeline, notable dataset features, and statistics. We also provide a preliminary baseline result achieved by training convolutional neural networks using this data.", "keywords": "machine learning dataset;handwritten text recognition (HTR);optical character recognition (OCR);Arabic dataset;Ruq`ah script", "primary_area": "", "supplementary_material": "/attachment/ac57b6d83b6ebd5ef003b252d2f400199fa42df1.pdf", "author": "Mehreen Saeed;Adrian Chan;Anupam Mijar;joseph Moukarzel;Gerges Habchi;Carlos Younes;amin elias;Chau-Wai Wong;Akram Khater", "authorids": "~Mehreen_Saeed1;~Adrian_Chan1;~Anupam_Mijar1;~joseph_Moukarzel1;~Gerges_Habchi1;~Carlos_Younes1;~amin_elias1;~Chau-Wai_Wong1;~Akram_Khater1", "gender": "F;M;;M;M;M;M;M;M", "homepage": ";;https://aamijar.github.io/;https://usek.academia.edu/JosephMoukarzel;;;https://pluriel.fuce.eu/chercheur/elias-amin/;https://ncsu-wong.org/;https://lebanesestudies.ncsu.edu", "dblp": "89/4329;;;;;;;24/10474;", "google_scholar": ";H_apf5sAAAAJ;JC8eQIgAAAAJ;https://scholar.google.com/citations?hl=en;;;;ggreZvcAAAAJ;", "orcid": ";;0009-0006-8887-2541;0000-0003-3584-6834;;;;;", "linkedin": "mehreen-saeed;adrian27513/;aamijar/;joseph-moukarzel-a3a83a170/;georges-habchi-ab75a168;carlos-younes-29320054/;;;", "or_profile": "~Mehreen_Saeed1;~Adrian_Chan1;~Anupam_Mijar1;~joseph_Moukarzel1;~Gerges_Habchi1;~Carlos_Younes1;~amin_elias1;~Chau-Wai_Wong1;~Akram_Khater1", "aff": "North Carolina State University;North Carolina State University;North Carolina State University;Holy Spirit University of Kaslik;Holy Spirit University of Kaslik;Holy Spirit University of Kaslik;Lebanese University;North Carolina State University;North Carolina State University", "aff_domain": "ncsu.edu;ncsu.edu;ncsu.edu;usek.edu.lb;usek.edu.lb;usek.edu.lb;ul.edu.lb;ncsu.edu;ncsu.edu", "position": "Researcher;Undergrad student;Undergrad student;Full Professor;Researcher;Reference of Historical Archives ;Assistant Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nsaeed2024muharaf,\ntitle={Muharaf: Manuscripts of Handwritten Arabic Dataset for Cursive Text Recognition},\nauthor={Mehreen Saeed and Adrian Chan and Anupam Mijar and joseph Moukarzel and Gerges Habchi and Carlos Younes and amin elias and Chau-Wai Wong and Akram Khater},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=1s8l1tnTXW}\n}", "github": "", "reviewers": "9rc2;y5EV;VY5N;8PxJ", "pdf_size": 21064309, "rating": "5;6;7;7", "confidence": "3;5;5;3", "wc_summary_and_contributions": "135;69;75;36", "wc_strengths": "60;59;114;22", "wc_improvement": "127;75;94;35", "wc_limitations": "65;10;11;61", "wc_correctness": "19;8;39;1", "wc_clarity": "5;5;9;1", "wc_relation_to_prior_work": "41;11;18;16", "wc_documentation": "18;17;38;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "471;255;399;174", "wc_reply_reviewers": "0;19;212;18", "wc_reply_authors": "0;0;179;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;2;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 4.0, 1.0 ], "wc_summary_and_contributions_avg": [ 78.75, 35.70976757135224 ], "wc_strengths_avg": [ 63.75, 32.80529682840867 ], "wc_improvement_avg": [ 82.75, 33.25939716831921 ], "wc_limitations_avg": [ 36.75, 26.290445032368698 ], "wc_correctness_avg": [ 16.75, 14.359230480774379 ], "wc_clarity_avg": [ 5.0, 2.8284271247461903 ], "wc_relation_to_prior_work_avg": [ 21.5, 11.543396380615196 ], "wc_documentation_avg": [ 18.5, 13.124404748406688 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 324.75, 116.71841114408643 ], "wc_reply_reviewers_avg": [ 62.25, 86.78817603798342 ], "wc_reply_authors_avg": [ 44.75, 77.50927363870726 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4376285466933041654&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "ncsu.edu;ncsu.edu;ncsu.edu;usek.edu.lb;usek.edu.lb;usek.edu.lb;ul.edu.lb;ncsu.edu;ncsu.edu", "author_num": 9, "aff_unique_index": "0;0;0;1;1;1;2;0;0", "aff_unique_norm": "North Carolina State University;Holy Spirit University of Kaslik;Lebanese University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ncsu.edu;https://www.hsku.edu.lb;https://www.lub.edu.lb", "aff_unique_abbr": "NCSU;HSU;LU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;1;1;1;0;0", "aff_country_unique": "United States;Lebanon" }, { "title": "Can Models Learn Skill Composition from Examples?", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96846", "id": "1sLdprsbmk", "proceeding": "", "pdf": "https://openreview.net/pdf?id=1sLdprsbmk", "openreview": "https://openreview.net/forum?id=1sLdprsbmk", "poster": "/media/PosterPDFs/NeurIPS%202024/96846.png?t=1731728858.6979623", "project": "", "author_site": "Haoyu Zhao, Simran Kaur, Dingli Yu, Anirudh Goyal, Sanjeev Arora", "tldr": "", "abstract": "As large language models (LLMs) become increasingly advanced, their ability to exhibit compositional generalization---the capacity to combine learned skills in novel ways not encountered during training---has garnered significant attention. This type of generalization, particularly in scenarios beyond training data, is also of great interest in the study of AI safety and alignment. A recent study introduced the Skill-Mix evaluation, where models are tasked with composing a short paragraph demonstrating the use of a specified $k$-tuple of language skills. While small models struggled with composing even with $k=3$, larger models like GPT-4 performed reasonably well with $k=5$ and $6$.\n\nIn this paper, we employ a setup akin to Skill-Mix to evaluate the capacity of smaller models to learn compositional generalization from examples. Utilizing a diverse set of language skills---including rhetorical, literary, reasoning, theory of mind, and common sense---GPT was used to generate text samples that exhibit random subsets of $k$ skills. Subsequent fine-tuning of 7B and 13B parameter models on these combined skill texts, for increasing values of $k$, revealed the following findings: (1) Training on combinations of $k=2$ and $3$ skills results in noticeable improvements in the ability to compose texts with $k=4$ and $5$ skills, despite models never having seen such examples during training. (2) When skill categories are split into training and held-out groups, models significantly improve at composing texts with held-out skills during testing despite having only seen training skills during fine-tuning, illustrating the efficacy of the training approach even with previously unseen skills.\n\nThis study also suggests that incorporating skill-rich (potentially synthetic) text into training can substantially enhance the compositional capabilities of models.", "keywords": "Skill Composition;Large Language Model", "primary_area": "generative_models", "supplementary_material": "", "author": "Haoyu Zhao;Simran Kaur;Dingli Yu;Anirudh Goyal;Sanjeev Arora", "authorids": "~Haoyu_Zhao1;~Simran_Kaur1;~Dingli_Yu1;~Anirudh_Goyal1;~Sanjeev_Arora1", "gender": "M;F;;M;", "homepage": "http://hyzhao.me;;https://dingliyu.net/;https://anirudh9119.github.io/;http://www.cs.princeton.edu/~arora/", "dblp": ";211/3465;39/578;172/1039;a/SArora", "google_scholar": "1MjanHUAAAAJ;AMHNjTIAAAAJ;KJLJstYAAAAJ;krrh6OUAAAAJ;RUP4S68AAAAJ", "orcid": ";;0000-0002-8824-8611;;", "linkedin": ";;;;", "or_profile": "~Haoyu_Zhao1;~Simran_Kaur1;~Dingli_Yu1;~Anirudh_Goyal1;~Sanjeev_Arora1", "aff": "Princeton University;Princeton University;Princeton University;Google DeepMind;Princeton University", "aff_domain": "princeton.edu;princeton.edu;princeton.edu;google.com;princeton.edu", "position": "PhD student;PhD student;PhD student;Researcher;Full Professor", "bibtex": "@inproceedings{\nzhao2024can,\ntitle={Can Models Learn Skill Composition from Examples?},\nauthor={Haoyu Zhao and Simran Kaur and Dingli Yu and Anirudh Goyal and Sanjeev Arora},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=1sLdprsbmk}\n}", "github": "", "reviewers": "ZBU1;bbsi;vwLV;4iaf", "pdf_size": 1166027, "rating": "5;5;6;7", "confidence": "4;4;4;4", "soundness": "3;4;3;4", "novelty": "2;2;3;3", "presentation": "3;4;4;4", "wc_summary": "53;88;95;98", "wc_strengths": "31;64;90;128", "wc_weaknesses": "200;379;23;90", "wc_questions": "103;83;200;8", "wc_limitations": "8;11;8;4", "wc_review": "395;625;416;328", "wc_reply_reviewers": "62;0;26;19", "wc_reply_authors": "352;43;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "3;2;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 83.5, 17.979154596365202 ], "wc_strengths_avg": [ 78.25, 35.52727825206991 ], "wc_weaknesses_avg": [ 173.0, 134.67924858715244 ], "wc_questions_avg": [ 98.5, 68.47079669464932 ], "wc_limitations_avg": [ 7.75, 2.48746859276655 ], "wc_review_avg": [ 441.0, 111.09230396386602 ], "wc_reply_reviewers_avg": [ 26.75, 22.465250944514285 ], "wc_reply_authors_avg": [ 98.75, 147.2640061250542 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13061992918554146369&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "princeton.edu;princeton.edu;princeton.edu;google.com;princeton.edu", "author_num": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Princeton University;Google", "aff_unique_dep": ";Google DeepMind", "aff_unique_url": "https://www.princeton.edu;https://deepmind.com", "aff_unique_abbr": "Princeton;DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Language-Driven Interactive Traffic Trajectory Generation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96845", "id": "1u3qkG7BkQ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=1u3qkG7BkQ", "openreview": "https://openreview.net/forum?id=1u3qkG7BkQ", "poster": "/media/PosterPDFs/NeurIPS%202024/96845.png?t=1730618195.935475", "project": "", "author_site": "Junkai XIA, Chenxin Xu, Qingyao Xu, Yanfeng Wang, Siheng Chen", "tldr": "", "abstract": "Realistic trajectory generation with natural language control is pivotal for advancing autonomous vehicle technology. However, previous methods focus on individual traffic participant trajectory generation, thus failing to account for the complexity of interactive traffic dynamics. In this work, we propose InteractTraj, the first language-driven traffic trajectory generator that can generate interactive traffic trajectories. InteractTraj interprets abstract trajectory descriptions into concrete formatted interaction-aware numerical codes and learns a mapping between these formatted codes and the final interactive trajectories. To interpret language descriptions, we propose a language-to-code encoder with a novel interaction-aware encoding strategy. To produce interactive traffic trajectories, we propose a code-to-trajectory decoder with interaction-aware feature aggregation that synergizes vehicle interactions with the environmental map and the vehicle moves. Extensive experiments show our method demonstrates superior performance over previous SoTA methods, offering a more realistic generation of interactive traffic trajectories with high controllability via diverse natural language commands.", "keywords": "Trajectory generation;Interaction;Language control", "primary_area": "generative_models", "supplementary_material": "", "author": "Junkai XIA;Chenxin Xu;Qingyao Xu;Yanfeng Wang;Siheng Chen", "authorids": "~Junkai_XIA1;~Chenxin_Xu1;~Qingyao_Xu1;~Yanfeng_Wang1;~Siheng_Chen1", "gender": "M;M;;M;M", "homepage": ";;https://xuqingyao.github.io/;https://cmic.sjtu.edu.cn/wangyanfeng/;https://siheng-chen.github.io/", "dblp": ";281/7263;262/3899.html;55/5407-1.html;136/4945", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=en", "orcid": ";;;0000-0002-3196-2347;", "linkedin": "junkai-xia-b1571a167/;;;;", "or_profile": "~Junkai_XIA1;~Chenxin_Xu1;~Qingyao_Xu1;~Yanfeng_Wang1;~Siheng_Chen2", "aff": "Shanghai Jiaotong University;National University of Singapore;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;u.nus.edu;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "position": "Undergrad student;PhD student;PhD student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nxia2024languagedriven,\ntitle={Language-Driven Interactive Traffic Trajectory Generation},\nauthor={Junkai XIA and Chenxin Xu and Qingyao Xu and Yanfeng Wang and Siheng Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=1u3qkG7BkQ}\n}", "github": "", "reviewers": "awjY;xSeP;YJzW", "pdf_size": 3894641, "rating": "5;6;7", "confidence": "4;4;3", "soundness": "3;3;3", "novelty": "3;3;4", "presentation": "3;3;3", "wc_summary": "62;81;169", "wc_strengths": "35;84;361", "wc_weaknesses": "54;65;85", "wc_questions": "132;20;97", "wc_limitations": "1;19;192", "wc_review": "284;269;904", "wc_reply_reviewers": "4;25;10", "wc_reply_authors": "95;0;0", "reply_reviewers": "1;1;1", "reply_authors": "3;1;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 104.0, 46.611872593435535 ], "wc_strengths_avg": [ 160.0, 143.52932336866454 ], "wc_weaknesses_avg": [ 68.0, 12.832251036613439 ], "wc_questions_avg": [ 83.0, 46.783187863447985 ], "wc_limitations_avg": [ 70.66666666666667, 86.10974909317115 ], "wc_review_avg": [ 485.6666666666667, 295.8697160726134 ], "wc_reply_reviewers_avg": [ 13.0, 8.831760866327848 ], "wc_reply_authors_avg": [ 31.666666666666668, 44.78342947514801 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13782832423705720514&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "sjtu.edu.cn;u.nus.edu;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "author_num": 5, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University;National University of Singapore", "aff_unique_dep": ";", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.nus.edu.sg", "aff_unique_abbr": "SJTU;NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "China;Singapore" }, { "title": "Generalized Tensor Decomposition for Understanding Multi-Output Regression under Combinatorial Shifts", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96844", "id": "1v0BPTR3AA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=1v0BPTR3AA", "openreview": "https://openreview.net/forum?id=1v0BPTR3AA", "poster": "/media/PosterPDFs/NeurIPS%202024/96844.png?t=1733382832.3494492", "project": "", "author_site": "Andong Wang, Yuning Qiu, Mingyuan Bai, Zhong Jin, Guoxu Zhou, Qibin Zhao", "tldr": "", "abstract": "In multi-output regression, we identify a previously neglected challenge that arises from the inability of training distribution to cover all combinations of input features, leading to combinatorial distribution shift (CDS). To the best of our knowledge, this is the first work to formally define and address this problem. We tackle it through a novel tensor decomposition perspective, proposing the Functional t-Singular Value Decomposition (Ft-SVD) theorem which extends the classical tensor SVD to infinite and continuous feature domains, providing a natural tool for representing and analyzing multi-output functions. Within the Ft-SVD framework, we formulate the multi-output regression problem under CDS as a low-rank tensor estimation problem under the missing not at random (MNAR) setting, and introduce a series of assumptions about the true functions, training and testing distributions, and spectral properties of the ground-truth embeddings, making the problem more tractable.\nTo address the challenges posed by CDS in multi-output regression, we develop a tailored Double-Stage Empirical Risk Minimization (ERM-DS) algorithm that leverages the spectral properties of the embeddings and uses specific hypothesis classes in each frequency component to better capture the varying spectral decay patterns. We provide rigorous theoretical analyses that establish performance guarantees for the ERM-DS algorithm. This work lays a preliminary theoretical foundation for multi-output regression under CDS.", "keywords": "multi-output regression;tensor singular value decomposition;tensor completion", "primary_area": "learning_theory", "supplementary_material": "/attachment/b280b2c45f42e555e15f8f70160e3ff7852a041e.zip", "author": "Andong Wang;Yuning Qiu;Mingyuan Bai;Zhong Jin;Guoxu Zhou;Qibin Zhao", "authorids": "~Andong_Wang1;~Yuning_Qiu1;~Mingyuan_Bai1;~Zhong_Jin1;~Guoxu_Zhou1;~Qibin_Zhao1", "gender": "M;M;F;M;M;M", "homepage": "https://www.patternrecognition.asia/wad/;https://www.researchgate.net/profile/Yuning-Qiu-3;;https://www.patternrecognition.asia/jin/;https://teacher.gdut.edu.cn/gxzhou/zh_CN/index.htm;https://qibinzhao.github.io", "dblp": "190/5540;210/1002;205/2305;;33/7727;13/1193", "google_scholar": "vuPyxGwAAAAJ;https://scholar.google.com.hk/citations?user=zGb0k1MAAAAJ;https://scholar.google.co.jp/citations?user=lo0_2rMAAAAJ;;BIUkSFEAAAAJ;https://scholar.google.co.jp/citations?hl=en", "orcid": ";0000-0003-0268-0890;0000-0002-2454-4219;0000-0002-4293-0869;;0000-0002-4442-3182", "linkedin": ";;;;;", "or_profile": "~Andong_Wang1;~Yuning_Qiu1;~Mingyuan_Bai1;~Zhong_Jin1;~Guoxu_Zhou1;~Qibin_Zhao1", "aff": "RIKEN AIP;RIKEN;RIKEN;China University of Petroleum-Beijing at Karamay;Guangdong University of Technology;RIKEN", "aff_domain": "riken.jp;riken.jp;riken.jp;cupk.edu.cn;gdut.edu.cn;riken.jp", "position": "Postdoc;SPDR;Postdoc;Full Professor;Full Professor;Team Leader", "bibtex": "@inproceedings{\nwang2024generalized,\ntitle={Generalized Tensor Decomposition for Understanding Multi-Output Regression under Combinatorial Shifts},\nauthor={Andong Wang and Yuning Qiu and Mingyuan Bai and Zhong Jin and Guoxu Zhou and Qibin Zhao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=1v0BPTR3AA}\n}", "github": "", "reviewers": "juWX;HXH7;yp2H;1ZmA", "pdf_size": 1945000, "rating": "5;6;6;6", "confidence": "2;2;3;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "2;3;2;3", "wc_summary": "31;163;89;108", "wc_strengths": "14;65;96;31", "wc_weaknesses": "43;43;192;69", "wc_questions": "108;127;2;68", "wc_limitations": "13;22;2;8", "wc_review": "209;420;381;284", "wc_reply_reviewers": "12;47;14;0", "wc_reply_authors": "81;129;48;111", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 97.75, 47.15599113580373 ], "wc_strengths_avg": [ 51.5, 31.579265349276255 ], "wc_weaknesses_avg": [ 86.75, 61.686201860707875 ], "wc_questions_avg": [ 76.25, 47.8663503935698 ], "wc_limitations_avg": [ 11.25, 7.327175444876422 ], "wc_review_avg": [ 323.5, 82.59691277523633 ], "wc_reply_reviewers_avg": [ 18.25, 17.440971876589906 ], "wc_reply_authors_avg": [ 92.25, 30.768287245149022 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:14nEF0Pf2A8J:scholar.google.com/&scioq=Generalized+Tensor+Decomposition+for+Understanding+Multi-Output+Regression+under+Combinatorial+Shifts&hl=en&as_sdt=0,44", "gs_version_total": 0, "email": "riken.jp;riken.jp;riken.jp;cupk.edu.cn;gdut.edu.cn;riken.jp", "author_num": 6, "aff_unique_index": "0;0;0;1;2;0", "aff_unique_norm": "RIKEN;China University of Petroleum;Guangdong University of Technology", "aff_unique_dep": "Advanced Institute for Computational Science;;", "aff_unique_url": "https://www.aip.riken.jp;http://www.cup.edu.cn;http://www.gdut.edu.cn", "aff_unique_abbr": "RIKEN AIP;CUP;GDUT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Beijing", "aff_country_unique_index": "0;0;0;1;1;0", "aff_country_unique": "Japan;China" }, { "title": "Understanding Linear Probing then Fine-tuning Language Models from NTK Perspective", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96843", "id": "1v4gKsyGfe", "proceeding": "", "pdf": "https://openreview.net/pdf?id=1v4gKsyGfe", "openreview": "https://openreview.net/forum?id=1v4gKsyGfe", "poster": "/media/PosterPDFs/NeurIPS%202024/96843.png?t=1730428255.5391746", "project": "", "author_site": "Akiyoshi Tomihari, Issei Sato", "tldr": "", "abstract": "The two-stage fine-tuning (FT) method, linear probing (LP) then fine-tuning (LP-FT), outperforms linear probing and FT alone. This holds true for both in-distribution (ID) and out-of-distribution (OOD) data. One key reason for its success is the preservation of pre-trained features, achieved by obtaining a near-optimal linear head during LP. However, despite the widespread use of large language models, there has been limited exploration of more complex architectures such as Transformers. In this paper, we analyze the training dynamics of LP-FT for classification tasks on the basis of the neural tangent kernel (NTK) theory. Our analysis decomposes the NTK matrix into two components. This decomposition highlights the importance of the linear head norm alongside the prediction accuracy at the start of the FT stage. We also observe a significant increase in the linear head norm during LP, which stems from training with the cross-entropy (CE) loss. This increase in the linear head norm effectively reduces changes in learned features. Furthermore, we find that this increased norm can adversely affect model calibration, which can be corrected using temperature scaling. Additionally, we extend our analysis with the NTK to the low-rank adaptation (LoRA) method and validate its effectiveness. Our experiments using a Transformer-based model on multiple natural language processing datasets confirm our theoretical analysis. Our study demonstrates the effectiveness of LP-FT for fine-tuning language models. Code is available at https://github.com/tom4649/lp-ft_ntk.", "keywords": "fine-tuning;transfer learning;neural tangent kernel", "primary_area": "other", "supplementary_material": "/attachment/8a3e4927f8c46e11ccd8361879a5afa197505047.zip", "author": "Akiyoshi Tomihari;Issei Sato", "authorids": "~Akiyoshi_Tomihari1;~Issei_Sato2", "gender": "M;", "homepage": "https://github.com/tom4649;https://www.ml.is.s.u-tokyo.ac.jp/issei-sato-en", "dblp": ";", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "~Akiyoshi_Tomihari1;~Issei_Sato2", "aff": "The University of Tokyo;The University of Tokyo", "aff_domain": "tokyo.ac.jp;u-tokyo.ac.jp", "position": "MS student;Full Professor", "bibtex": "@inproceedings{\ntomihari2024understanding,\ntitle={Understanding Linear Probing then Fine-tuning Language Models from {NTK} Perspective},\nauthor={Akiyoshi Tomihari and Issei Sato},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=1v4gKsyGfe}\n}", "github": "", "reviewers": "qfrt;R1ei;zDRZ;PtuD", "pdf_size": 8837549, "rating": "6;7;7;8", "confidence": "2;4;5;2", "soundness": "3;3;3;4", "novelty": "3;4;4;4", "presentation": "3;2;3;3", "wc_summary": "57;36;43;54", "wc_strengths": "31;56;102;64", "wc_weaknesses": "39;93;114;44", "wc_questions": "1;58;31;25", "wc_limitations": "1;24;38;14", "wc_review": "129;267;328;201", "wc_reply_reviewers": "14;13;7;36", "wc_reply_authors": "42;12;21;18", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 1.299038105676658 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 47.5, 8.440971508067067 ], "wc_strengths_avg": [ 63.25, 25.469344318219107 ], "wc_weaknesses_avg": [ 72.5, 31.925694980689144 ], "wc_questions_avg": [ 28.75, 20.27775875189366 ], "wc_limitations_avg": [ 19.25, 13.5531361684298 ], "wc_review_avg": [ 231.25, 74.17673152680698 ], "wc_reply_reviewers_avg": [ 17.5, 11.01135777277262 ], "wc_reply_authors_avg": [ 23.25, 11.299889379989523 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18028253321362161242&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "tokyo.ac.jp;u-tokyo.ac.jp", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Tokyo", "aff_unique_dep": "", "aff_unique_url": "https://www.u-tokyo.ac.jp", "aff_unique_abbr": "UTokyo", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Japan" }, { "title": "Sketched Lanczos uncertainty score: a low-memory summary of the Fisher information", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96842", "id": "1vPqOmqSfO", "proceeding": "", "pdf": "https://openreview.net/pdf?id=1vPqOmqSfO", "openreview": "https://openreview.net/forum?id=1vPqOmqSfO", "poster": "/media/PosterPDFs/NeurIPS%202024/96842.png?t=1731656173.1756105", "project": "", "author_site": "Marco Miani, Lorenzo Beretta, S\u00f8ren Hauberg", "tldr": "", "abstract": "Current uncertainty quantification is memory and compute expensive, which hinders practical uptake. To counter, we develop Sketched Lanczos Uncertainty (SLU): an architecture-agnostic uncertainty score that can be applied to pre-trained neural networks with minimal overhead. Importantly, the memory use of SLU only grows logarithmically with the number of model parameters. We combine Lanczos' algorithm with dimensionality reduction techniques to compute a sketch of the leading eigenvectors of a matrix. Applying this novel algorithm to the Fisher information matrix yields a cheap and reliable uncertainty score. Empirically, SLU yields well-calibrated uncertainties, reliably detects out-of-distribution examples, and consistently outperforms existing methods in the low-memory regime.", "keywords": "Sketching;Uncertainty;Lanczos;Laplace", "primary_area": "probabilistic_methods", "supplementary_material": "/attachment/e4db358f72cc574c8daf998189d02815d6535c3c.zip", "author": "Marco Miani;Lorenzo Beretta;S\u00f8ren Hauberg", "authorids": "~Marco_Miani1;~Lorenzo_Beretta1;~S\u00f8ren_Hauberg1", "gender": "M;M;M", "homepage": "https://www.linkedin.com/in/marco-miani/;;http://www2.compute.dtu.dk/~sohau/", "dblp": "296/1592;34/8239-1;39/7226", "google_scholar": "https://scholar.google.com/citations?hl=it;;https://scholar.google.com/citations?hl=en", "orcid": ";;", "linkedin": ";;", "or_profile": "~Marco_Miani1;~Lorenzo_Beretta1;~S\u00f8ren_Hauberg1", "aff": "Technical University of Denmark;UCSC ;Technical University of Denmark", "aff_domain": "dtu.dk;ucsc.ed;dtu.dk", "position": "PhD student;Postdoc;Professor", "bibtex": "@inproceedings{\nmiani2024sketched,\ntitle={Sketched Lanczos uncertainty score: a low-memory summary of the Fisher information},\nauthor={Marco Miani and Lorenzo Beretta and S{\\o}ren Hauberg},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=1vPqOmqSfO}\n}", "github": "", "reviewers": "nMeL;y4fZ;Ntg4", "pdf_size": 2726087, "rating": "6;6;8", "confidence": "3;4;3", "soundness": "4;3;4", "novelty": "2;3;4", "presentation": "3;4;3", "wc_summary": "195;115;831", "wc_strengths": "134;110;90", "wc_weaknesses": "63;66;77", "wc_questions": "78;43;86", "wc_limitations": "53;31;7", "wc_review": "523;365;1091", "wc_reply_reviewers": "34;99;0", "wc_reply_authors": "164;128;0", "reply_reviewers": "1;1;0", "reply_authors": "2;2;1", "rating_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 380.3333333333333, 320.33870963230294 ], "wc_strengths_avg": [ 111.33333333333333, 17.987650084309387 ], "wc_weaknesses_avg": [ 68.66666666666667, 6.018490028422596 ], "wc_questions_avg": [ 69.0, 18.672618098881223 ], "wc_limitations_avg": [ 30.333333333333332, 18.785337071473826 ], "wc_review_avg": [ 659.6666666666666, 311.7449099646839 ], "wc_reply_reviewers_avg": [ 44.333333333333336, 41.071752931776466 ], "wc_reply_authors_avg": [ 97.33333333333333, 70.37676384211545 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.49999999999999983, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13108609316953583407&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 5, "email": "dtu.dk;ucsc.ed;dtu.dk", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Technical University of Denmark;University of California, Santa Cruz", "aff_unique_dep": ";", "aff_unique_url": "https://www.tek.dk;https://www.ucsc.edu", "aff_unique_abbr": "DTU;UCSC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Santa Cruz", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Denmark;United States" }, { "title": "MotionBooth: Motion-Aware Customized Text-to-Video Generation", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96841", "id": "1we1V3MAHD", "proceeding": "", "pdf": "https://openreview.net/pdf?id=1we1V3MAHD", "openreview": "https://openreview.net/forum?id=1we1V3MAHD", "poster": "/media/PosterPDFs/NeurIPS%202024/96841.png?t=1733717187.3518262", "project": "", "author_site": "Jianzong Wu, Xiangtai Li, Yanhong Zeng, Jiangning Zhang, Qianyu Zhou, Yining Li, Yunhai Tong, Kai Chen", "tldr": "", "abstract": "In this work, we present MotionBooth, an innovative framework designed for animating customized subjects with precise control over both object and camera movements. By leveraging a few images of a specific object, we efficiently fine-tune a text-to-video model to capture the object's shape and attributes accurately. Our approach presents subject region loss and video preservation loss to enhance the subject's learning performance, along with a subject token cross-attention loss to integrate the customized subject with motion control signals. Additionally, we propose training-free techniques for managing subject and camera motions during inference. In particular, we utilize cross-attention map manipulation to govern subject motion and introduce a novel latent shift module for camera movement control as well. MotionBooth excels in preserving the appearance of subjects while simultaneously controlling the motions in generated videos. Extensive quantitative and qualitative evaluations demonstrate the superiority and effectiveness of our method. Models and codes will be made publicly available.", "keywords": "custimized generation;video motion generation", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/fe4cf92efbfeec443e01ff5234bd49647d7d68da.zip", "author": "Jianzong Wu;Xiangtai Li;Yanhong Zeng;Jiangning Zhang;Qianyu Zhou;Yining Li;Yunhai Tong;Kai Chen", "authorids": "~Jianzong_Wu2;~Xiangtai_Li1;~Yanhong_Zeng1;~Jiangning_Zhang1;~Qianyu_Zhou1;~Yining_Li1;~Yunhai_Tong1;~Kai_Chen4", "gender": ";;F;M;M;M;M;M", "homepage": ";;https://zengyh1900.github.io/;https://www.researchgate.net/profile/Jiangning_Zhang2;https://qianyuzqy.github.io/;https://liyn.site;http://www.cis.pku.edu.cn/faculty/system/tongyunhai/tongyunhai.htm;https://chenkai.site/", "dblp": ";;215/4033;241/9593;232/4830-1;166/3420;14/1705;181/2839-26", "google_scholar": ";;14LbnMIAAAAJ;https://scholar.google.com.hk/citations?user=2hA4X9wAAAAJ;KHg04fkAAAAJ;https://scholar.google.com.hk/citations?user=y_cp1sUAAAAJ;T4gqdPkAAAAJ;https://scholar.google.com.hk/citations?user=eGD0b7IAAAAJ", "orcid": ";;;;0000-0002-5331-050X;;;0000-0002-6820-2325", "linkedin": ";;;;;;;", "or_profile": "~Jianzong_Wu2;~Xiangtai_Li1;~Yanhong_Zeng1;~Jiangning_Zhang1;~Qianyu_Zhou1;~Yining_Li1;~Yunhai_Tong1;~Kai_Chen4", "aff": ";;Shanghai AI Laboratory;Tencent Youtu Lab;Shanghai Jiao Tong University,;Shanghai AI Laboratory;Peking University;Shanghai AI Laboratory", "aff_domain": ";;pjlab.org.cn;tencent.com;sjtu.edu.cn;pjlab.org.cn;pku.edu.cn;pjlab.org.cn", "position": ";;Researcher;Principal Researcher;PhD student;Researcher;Full Professor;Researcher", "bibtex": "@inproceedings{\nwu2024motionbooth,\ntitle={MotionBooth: Motion-Aware Customized Text-to-Video Generation},\nauthor={Jianzong Wu and Xiangtai Li and Yanhong Zeng and Jiangning Zhang and Qianyu Zhou and Yining Li and Yunhai Tong and Kai Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=1we1V3MAHD}\n}", "github": "", "reviewers": "q2XJ;ZPXz;aAio;yPkE;N7np", "pdf_size": 4465739, "rating": "3;5;5;6;7", "confidence": "5;4;4;5;4", "soundness": "2;3;3;3;3", "novelty": "2;3;3;3;3", "presentation": "3;3;3;3;4", "wc_summary": "124;82;50;95;285", "wc_strengths": "61;20;62;86;103", "wc_weaknesses": "443;164;107;186;157", "wc_questions": "92;77;74;164;167", "wc_limitations": "16;39;1;1;26", "wc_review": "736;382;294;532;738", "wc_reply_reviewers": "229;217;70;73;53", "wc_reply_authors": "1071;606;100;57;37", "reply_reviewers": "2;1;1;1;1", "reply_authors": "4;3;3;2;2", "rating_avg": [ 5.2, 1.32664991614216 ], "confidence_avg": [ 4.4, 0.48989794855663565 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 127.2, 82.40242714871935 ], "wc_strengths_avg": [ 66.4, 28.018565273760895 ], "wc_weaknesses_avg": [ 211.4, 118.65007374628976 ], "wc_questions_avg": [ 114.8, 41.85403206382868 ], "wc_limitations_avg": [ 16.6, 14.677874505527019 ], "wc_review_avg": [ 536.4, 180.6096342945193 ], "wc_reply_reviewers_avg": [ 128.4, 77.63401316433409 ], "wc_reply_authors_avg": [ 374.2, 407.12818620183987 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 2.8, 0.7483314773547882 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.4308202184276646, "gs_citation": 34, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5217561798098030587&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": ";;pjlab.org.cn;tencent.com;sjtu.edu.cn;pjlab.org.cn;pku.edu.cn;pjlab.org.cn", "author_num": 8, "aff_unique_index": "0;1;2;0;3;0", "aff_unique_norm": "Shanghai AI Laboratory;Tencent;Shanghai Jiao Tong University;Peking University", "aff_unique_dep": ";Youtu Lab;;", "aff_unique_url": "https://www.shanghai-ai-lab.com;https://www.tencent.com;https://www.sjtu.edu.cn;http://www.pku.edu.cn", "aff_unique_abbr": "SAIL;Tencent;SJTU;Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Delving into the Reversal Curse: How Far Can Large Language Models Generalize?", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96840", "id": "1wxFznQWhp", "proceeding": "", "pdf": "https://openreview.net/pdf?id=1wxFznQWhp", "openreview": "https://openreview.net/forum?id=1wxFznQWhp", "poster": "/media/PosterPDFs/NeurIPS%202024/96840.png?t=1732246635.2608056", "project": "", "author_site": "Zhengkai Lin, Zhihang Fu, Kai Liu, Liang Xie, Binbin Lin, Wenxiao Wang, Deng Cai, Yue Wu, Jieping Ye", "tldr": "", "abstract": "While large language models (LLMs) showcase unprecedented capabilities, they also exhibit certain inherent limitations when facing seemingly trivial tasks. \nA prime example is the recently debated \"reversal curse\", which surfaces when models, having been trained on the fact \"A is B\", struggle to generalize this knowledge to infer that \"B is A\".\nIn this paper, we examine the manifestation of the reversal curse across various tasks and delve into both the generalization abilities and the problem-solving mechanisms of LLMs. This investigation leads to a series of significant insights:\n(1) LLMs are able to generalize to \"B is A\" when both A and B are presented in the context as in the case of a multiple-choice question.\n(2) This generalization ability is highly correlated to the structure of the fact \"A is B\" in the training documents. For example, this generalization only applies to biographies structured in \"[Name] is [Description]\" but not to \"[Description] is [Name]\".\n(3) We propose and verify the hypothesis that LLMs possess an inherent bias in fact recalling during knowledge application, which explains and underscores the importance of the document structure to successful learning.\n(4) The negative impact of this bias on the downstream performance of LLMs can hardly be mitigated through training alone.\nBased on these intriguing findings, our work not only presents a novel perspective for interpreting LLMs' generalization abilities from their intrinsic working mechanism but also provides new insights for the development of more effective learning methods for LLMs.", "keywords": "Large Language Models;Interpretability;Reversal Curse;Knowledge Injection", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Zhengkai Lin;Zhihang Fu;Kai Liu;Liang Xie;Binbin Lin;Wenxiao Wang;Deng Cai;Yue Wu;Jieping Ye", "authorids": "~Zhengkai_Lin1;~Zhihang_Fu1;~Kai_Liu8;~Liang_Xie3;~Binbin_Lin3;~Wenxiao_Wang2;~Deng_Cai4;~Yue_Wu18;~Jieping_Ye4", "gender": "M;M;M;M;M;M;M;M;M", "homepage": ";https://zhihangfu.top/;https://kail8.github.io/;https://www.linkedin.com/in/%E4%BA%AE-%E8%B0%A2-254928160/;https://www.linkedin.com/in/binbin-lin-03598b31/;https://wenxiaowang.com;http://www.cad.zju.edu.cn/home/dengcai/;http://yelabs.net/;", "dblp": "368/8752;207/1894;;81/2806-3;51/8073;243/5853-1;c/DCai;03/5454;", "google_scholar": ";e_e3Ur0AAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=zh-CN;Zmvq4KYAAAAJ;https://scholar.google.com.hk/citations?user=rcxOjikAAAAJ;vzxDyJoAAAAJ;T9AzhwcAAAAJ;srajsjoAAAAJ", "orcid": ";;;0000-0002-7604-1410;0000-0002-0330-6406;;;0000-0001-8662-5818;", "linkedin": "vispstar/;;;%E4%BA%AE-%E8%B0%A2-254928160/;;;;;", "or_profile": "~Zhengkai_Lin1;~Zhihang_Fu1;~Kai_Liu8;~Liang_Xie3;~Binbin_Lin3;~Wenxiao_Wang2;~Deng_Cai4;~Jieping_Ye4;~Yue_Wu3", "aff": "Zhejiang University;Alibaba Group;Alibaba Group;Zhejiang University of Technology;Zhejiang University;Zhejiang University;Zhejiang University;Alibaba Group;Alibaba Group", "aff_domain": "zju.edu.cn;alibaba-inc.com;alibaba-inc.com;zjut.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;alibaba-inc.com;alibaba-inc.com", "position": "MS student;Researcher;Intern;Postdoc;Researcher;Assistant Professor;Professor;Principal Researcher;Researcher", "bibtex": "@inproceedings{\nlin2024delving,\ntitle={Delving into the Reversal Curse: How Far Can Large Language Models Generalize?},\nauthor={Zhengkai Lin and Zhihang Fu and Kai Liu and Liang Xie and Binbin Lin and Wenxiao Wang and Deng Cai and Yue Wu and Jieping Ye},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=1wxFznQWhp}\n}", "github": "", "reviewers": "Mp9z;p2dN;e5Vg;PVmx", "pdf_size": 3318902, "rating": "5;6;7;7", "confidence": "3;3;4;4", "soundness": "3;4;3;2", "novelty": "2;3;4;2", "presentation": "3;2;4;3", "wc_summary": "98;160;198;125", "wc_strengths": "100;150;64;30", "wc_weaknesses": "78;181;122;257", "wc_questions": "280;273;6;276", "wc_limitations": "35;71;1;62", "wc_review": "591;835;391;750", "wc_reply_reviewers": "80;83;0;53", "wc_reply_authors": "456;77;0;56", "reply_reviewers": "1;1;0;1", "reply_authors": "3;2;1;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 145.25, 37.559120064240055 ], "wc_strengths_avg": [ 86.0, 44.474711915874174 ], "wc_weaknesses_avg": [ 159.5, 67.11370947876448 ], "wc_questions_avg": [ 208.75, 117.08410438654771 ], "wc_limitations_avg": [ 42.25, 27.25229348146684 ], "wc_review_avg": [ 641.75, 169.20014036637204 ], "wc_reply_reviewers_avg": [ 54.0, 33.294143629172986 ], "wc_reply_authors_avg": [ 147.25, 180.46519747585683 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.9045340337332909, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:dpSDqvSe-3UJ:scholar.google.com/&scioq=Delving+into+the+Reversal+Curse:+How+Far+Can+Large+Language+Models+Generalize%3F&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "zju.edu.cn;alibaba-inc.com;alibaba-inc.com;zjut.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;alibaba-inc.com;alibaba-inc.com", "author_num": 9, "aff_unique_index": "0;1;1;2;0;0;0;1;1", "aff_unique_norm": "Zhejiang University;Alibaba Group;Zhejiang University of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.zju.edu.cn;https://www.alibaba.com;https://www.zjut.edu.cn", "aff_unique_abbr": "ZJU;Alibaba;ZJUT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Graph Classification via Reference Distribution Learning: Theory and Practice", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96839", "id": "1zVinhehks", "proceeding": "", "pdf": "https://openreview.net/pdf?id=1zVinhehks", "openreview": "https://openreview.net/forum?id=1zVinhehks", "poster": "/media/PosterPDFs/NeurIPS%202024/96839.png?t=1730379981.994097", "project": "", "author_site": "Zixiao Wang, Jicong Fan", "tldr": "", "abstract": "Graph classification is a challenging problem owing to the difficulty in quantifying the similarity between graphs or representing graphs as vectors, though there have been a few methods using graph kernels or graph neural networks (GNNs). Graph kernels often suffer from computational costs and manual feature engineering, while GNNs commonly utilize global pooling operations, risking the loss of structural or semantic information. This work introduces Graph Reference Distribution Learning (GRDL), an efficient and accurate graph classification method. GRDL treats each graph's latent node embeddings given by GNN layers as a discrete distribution, enabling direct classification without global pooling, based on maximum mean discrepancy to adaptively learned reference distributions. To fully understand this new model (the existing theories do not apply) and guide its configuration (e.g., network architecture, references' sizes, number, and regularization) for practical use, we derive generalization error bounds for GRDL and verify them numerically. More importantly, our theoretical and numerical results both show that GRDL has a stronger generalization ability than GNNs with global pooling operations. Experiments on moderate-scale and large-scale graph datasets show the superiority of GRDL over the state-of-the-art, emphasizing its remarkable efficiency, being at least 10 times faster than leading competitors in both training and inference stages.", "keywords": "graph neural networks;graph classification", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Zixiao Wang;Jicong Fan", "authorids": "~Zixiao_Wang5;~Jicong_Fan2", "gender": "M;M", "homepage": ";https://jicongfan.github.io/", "dblp": ";139/1570", "google_scholar": ";vdJsnhIAAAAJ", "orcid": ";0000-0001-9665-0355", "linkedin": "zixiao-wang-95a2802a8/;", "or_profile": "~Zixiao_Wang5;~Jicong_Fan2", "aff": "The Chinese University of Hong Kong (Shenzhen);The Chinese University of Hong Kong, Shenzhen", "aff_domain": "cuhk.edu.cn;cuhk.edu.cn", "position": "Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\nwang2024graph,\ntitle={Graph Classification via Reference Distribution Learning: Theory and Practice},\nauthor={Zixiao Wang and Jicong Fan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=1zVinhehks}\n}", "github": "", "reviewers": "anD1;5HH3;aUnT", "pdf_size": 4407091, "rating": "4;6;7", "confidence": "5;3;5", "soundness": "2;2;4", "novelty": "2;3;4", "presentation": "3;3;3", "wc_summary": "54;47;64", "wc_strengths": "35;49;120", "wc_weaknesses": "599;222;15", "wc_questions": "5;5;94", "wc_limitations": "1;14;8", "wc_review": "694;337;301", "wc_reply_reviewers": "0;53;20", "wc_reply_authors": "30;36;8", "reply_reviewers": "0;1;1", "reply_authors": "2;2;2", "rating_avg": [ 5.666666666666667, 1.247219128924647 ], "confidence_avg": [ 4.333333333333333, 0.9428090415820634 ], "soundness_avg": [ 2.6666666666666665, 0.9428090415820634 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 55.0, 6.97614984548545 ], "wc_strengths_avg": [ 68.0, 37.2111094522411 ], "wc_weaknesses_avg": [ 278.6666666666667, 241.76067137196287 ], "wc_questions_avg": [ 34.666666666666664, 41.95500235040182 ], "wc_limitations_avg": [ 7.666666666666667, 5.312459150169743 ], "wc_review_avg": [ 444.0, 177.38658348364456 ], "wc_reply_reviewers_avg": [ 24.333333333333332, 21.85304453744502 ], "wc_reply_authors_avg": [ 24.666666666666668, 12.036980056845191 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.18898223650461363, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13176296695646276571&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "cuhk.edu.cn;cuhk.edu.cn", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Chinese University of Hong Kong", "aff_unique_dep": "", "aff_unique_url": "https://www.cuhk.edu.cn", "aff_unique_abbr": "CUHK", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Shenzhen", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "HOPE: Shape Matching Via Aligning Different K-hop Neighbourhoods", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96838", "id": "1ziIqFo4Tj", "proceeding": "", "pdf": "https://openreview.net/pdf?id=1ziIqFo4Tj", "openreview": "https://openreview.net/forum?id=1ziIqFo4Tj", "poster": "/media/PosterPDFs/NeurIPS%202024/96838.png?t=1729504487.9373353", "project": "", "author_site": "Barakeel Fanseu Kamhoua, Huamin Qu", "tldr": "", "abstract": "Accurate and smooth shape matching is very hard to achieve. This is because for accuracy, one needs unique descriptors (signatures) on shapes that distinguish different vertices on a mesh accurately while at the same time being invariant to deformations. However, most existing unique shape descriptors are generally not smooth on the shape and are not noise-robust thus leading to non-smooth matches. On the other hand, for smoothness, one needs descriptors that are smooth and continuous on the shape. However, existing smooth descriptors are generally not unique and as such lose accuracy as they match neighborhoods (for smoothness) rather than exact vertices (for accuracy). In this work, we propose to use different k-hop neighborhoods of vertices as pairwise descriptors for shape matching. We use these descriptors in conjunction with local map distortion (LMD) to refine an initialized map for shape matching. We validate the effectiveness of our pipeline on benchmark datasets such as SCAPE, TOSCA, TOPKIDS, and others.", "keywords": "Shape analysis;correspondences;registration", "primary_area": "active_learning", "supplementary_material": "/attachment/36d24ebf509d1f0c69552cdd53d9810a018a7989.zip", "author": "Barakeel Fanseu Kamhoua;Huamin Qu", "authorids": "~Barakeel_Fanseu_Kamhoua1;~Huamin_Qu1", "gender": ";M", "homepage": ";http://huamin.org/", "dblp": "267/1642.html;65/1792.html", "google_scholar": ";https://scholar.google.com.tw/citations?user=J7a5zGEAAAAJ", "orcid": ";0000-0001-6711-8028", "linkedin": ";", "or_profile": "~Barakeel_Fanseu_Kamhoua1;~Huamin_Qu1", "aff": "Department of Computer Science and Engineering, Hong Kong University of Science and Technology;Hong Kong University of Science and Technology", "aff_domain": "cse.ust.hk;hkust.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nkamhoua2024hope,\ntitle={{HOPE}: Shape Matching Via Aligning Different K-hop Neighbourhoods},\nauthor={Barakeel Fanseu Kamhoua and Huamin Qu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=1ziIqFo4Tj}\n}", "github": "", "reviewers": "E6Vo;NAeg;hBzn;dry5", "pdf_size": 21042862, "rating": "2;3;6;7", "confidence": "5;4;5;4", "soundness": "1;2;3;3", "novelty": "1;2;2;3", "presentation": "1;2;4;3", "wc_summary": "78;85;80;38", "wc_strengths": "19;49;44;32", "wc_weaknesses": "169;215;101;104", "wc_questions": "75;87;2;55", "wc_limitations": "5;8;2;11", "wc_review": "346;444;229;240", "wc_reply_reviewers": "850;190;16;29", "wc_reply_authors": "1240;463;0;0", "reply_reviewers": "5;1;1;1", "reply_authors": "6;2;1;1", "rating_avg": [ 4.5, 2.0615528128088303 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "novelty_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 70.25, 18.793283374652763 ], "wc_strengths_avg": [ 36.0, 11.597413504743201 ], "wc_weaknesses_avg": [ 147.25, 47.625492123441624 ], "wc_questions_avg": [ 54.75, 32.529794035622174 ], "wc_limitations_avg": [ 6.5, 3.3541019662496847 ], "wc_review_avg": [ 314.75, 87.49678565524565 ], "wc_reply_reviewers_avg": [ 271.25, 341.0977682424791 ], "wc_reply_authors_avg": [ 425.75, 506.68450489431785 ], "reply_reviewers_avg": [ 2.0, 1.7320508075688772 ], "reply_authors_avg": [ 2.5, 2.0615528128088303 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.24253562503633297, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:6hhjXeHbNNoJ:scholar.google.com/&scioq=HOPE:+Shape+Matching+Via+Aligning+Different+K-hop+Neighbourhoods&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": "cse.ust.hk;hkust.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Hong Kong University of Science and Technology", "aff_unique_dep": "Department of Computer Science and Engineering", "aff_unique_url": "https://www.ust.hk", "aff_unique_abbr": "HKUST", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Reparameterization invariance in approximate Bayesian inference", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96837", "id": "204YOrDHny", "proceeding": "", "pdf": "https://openreview.net/pdf?id=204YOrDHny", "openreview": "https://openreview.net/forum?id=204YOrDHny", "poster": "/media/PosterPDFs/NeurIPS%202024/96837.png?t=1731661089.3537667", "project": "", "author_site": "Hrittik Roy, Marco Miani, Carl Henrik Ek, Philipp Hennig, Marvin Pf\u00f6rtner, Lukas Tatzel, S\u00f8ren Hauberg", "tldr": "", "abstract": "Current approximate posteriors in Bayesian neural networks (BNNs) exhibit a crucial limitation: they fail to maintain invariance under reparameterization, i.e. BNNs assign different posterior densities to different parametrizations of identical functions. This creates a fundamental flaw in the application of Bayesian principles as it breaks the correspondence between uncertainty over the parameters with uncertainty over the parametrized function. In this paper, we investigate this issue in the context of the increasingly popular linearized Laplace approximation. Specifically, it has been observed that linearized predictives alleviate the common underfitting problems of the Laplace approximation. We develop a new geometric view of reparametrizations from which we explain the success of linearization. Moreover, we demonstrate that these reparameterization invariance properties can be extended to the original neural network predictive using a Riemannian diffusion process giving a straightforward algorithm for approximate posterior sampling, which empirically improves posterior fit.", "keywords": "Approximate Bayesian inference;Laplace approximations;Differential geometry;Reparametrization invariance", "primary_area": "probabilistic_methods", "supplementary_material": "/attachment/7d64b30dc0c85bae4ed05359392c181114669ed8.zip", "author": "Hrittik Roy;Marco Miani;Carl Henrik Ek;Philipp Hennig;Marvin Pf\u00f6rtner;Lukas Tatzel;S\u00f8ren Hauberg", "authorids": "~Hrittik_Roy2;~Marco_Miani1;~Carl_Henrik_Ek1;~Philipp_Hennig1;~Marvin_Pf\u00f6rtner1;~Lukas_Tatzel1;~S\u00f8ren_Hauberg1", "gender": "M;M;;M;;M;M", "homepage": "http://www.compute.dtu.dk;https://www.linkedin.com/in/marco-miani/;http://carlhenrik.com;http://mml.inf.uni-tuebingen.de;;https://github.com/ltatzel;http://www2.compute.dtu.dk/~sohau/", "dblp": ";296/1592;79/6273;08/9077;;;39/7226", "google_scholar": ";https://scholar.google.com/citations?hl=it;https://scholar.google.co.uk/citations?user=9yQ1tQoAAAAJ;https://scholar.google.de/citations?user=UeG5w08AAAAJ;;;https://scholar.google.com/citations?hl=en", "orcid": ";;;0000-0001-7293-6092;;;", "linkedin": ";;;;;;", "or_profile": "~Hrittik_Roy2;~Marco_Miani1;~Carl_Henrik_Ek1;~Philipp_Hennig1;~Marvin_Pf\u00f6rtner1;~Lukas_Tatzel1;~S\u00f8ren_Hauberg1", "aff": "Technical University of Denmark;Technical University of Denmark;University of Cambridge;University of T\u00fcbingen;;University of T\u00fcbingen;Technical University of Denmark", "aff_domain": "dtu.dk;dtu.dk;cam.ac.uk;uni-tuebingen.de;;uni-tuebingen.de;dtu.dk", "position": "PhD student;PhD student;Associate Professor;Full Professor;;PhD student;Professor", "bibtex": "@inproceedings{\nroy2024reparameterization,\ntitle={Reparameterization invariance in approximate Bayesian inference},\nauthor={Hrittik Roy and Marco Miani and Carl Henrik Ek and Philipp Hennig and Marvin Pf{\\\"o}rtner and Lukas Tatzel and S{\\o}ren Hauberg},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=204YOrDHny}\n}", "github": "", "reviewers": "gWds;AZGF;12ke;v5yb", "pdf_size": 1968399, "rating": "5;5;7;8", "confidence": "3;3;3;4", "soundness": "3;3;3;4", "novelty": "3;3;3;4", "presentation": "3;2;3;3", "wc_summary": "64;187;66;99", "wc_strengths": "72;39;55;97", "wc_weaknesses": "91;48;141;85", "wc_questions": "56;191;2;115", "wc_limitations": "7;5;15;34", "wc_review": "290;470;279;430", "wc_reply_reviewers": "96;50;21;63", "wc_reply_authors": "282;126;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 104.0, 49.89488951786545 ], "wc_strengths_avg": [ 65.75, 21.48691462262556 ], "wc_weaknesses_avg": [ 91.25, 33.10872241570188 ], "wc_questions_avg": [ 91.0, 70.21751918146923 ], "wc_limitations_avg": [ 15.25, 11.453711188955307 ], "wc_review_avg": [ 367.25, 84.03979712017396 ], "wc_reply_reviewers_avg": [ 57.5, 26.93046601899046 ], "wc_reply_authors_avg": [ 102.0, 115.95688854052612 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.7777777777777777, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16379192768625547581&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "dtu.dk;dtu.dk;cam.ac.uk;uni-tuebingen.de;;uni-tuebingen.de;dtu.dk", "author_num": 7, "aff_unique_index": "0;0;1;2;2;0", "aff_unique_norm": "Technical University of Denmark;University of Cambridge;University of T\u00fcbingen", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tek.dk;https://www.cam.ac.uk;https://www.uni-tuebingen.de/", "aff_unique_abbr": "DTU;Cambridge;Uni T\u00fcbingen", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;0;1;2;2;0", "aff_country_unique": "Denmark;United Kingdom;Germany" }, { "title": "Drones Help Drones: A Collaborative Framework for Multi-Drone Object Trajectory Prediction and Beyond", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96836", "id": "20QgErW5zH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=20QgErW5zH", "openreview": "https://openreview.net/forum?id=20QgErW5zH", "poster": "/media/PosterPDFs/NeurIPS%202024/96836.png?t=1730116977.7565591", "project": "", "author_site": "Zhechao Wang, Peirui Cheng, Minxing Chen, Pengju Tian, Zhirui Wang, Xinming Li, Xue Yang, Xian Sun", "tldr": "", "abstract": "Collaborative trajectory prediction can comprehensively forecast the future motion of objects through multi-view complementary information. However, it encounters two main challenges in multi-drone collaboration settings. The expansive aerial observations make it difficult to generate precise Bird's Eye View (BEV) representations. Besides, excessive interactions can not meet real-time prediction requirements within the constrained drone-based communication bandwidth. To address these problems, we propose a novel framework named \"Drones Help Drones\" (DHD). Firstly, we incorporate the ground priors provided by the drone's inclined observation to estimate the distance between objects and drones, leading to\u00a0more precise BEV generation. Secondly, we design a selective mechanism based on the local feature discrepancy to prioritize the critical information contributing to prediction tasks during inter-drone interactions. Additionally, we create the first dataset for multi-drone collaborative prediction, named \"Air-Co-Pred\", and conduct quantitative and qualitative experiments to validate the effectiveness of our DHD framework. The results demonstrate that compared to state-of-the-art approaches, DHD reduces position deviation in BEV representations by over 20\\% and requires only a quarter of the transmission ratio for interactions while achieving comparable prediction performance. Moreover, DHD also shows promising generalization to the collaborative 3D object detection in CoPerception-UAVs.", "keywords": "Multi-drone Collaboration;Perception and Prediction", "primary_area": "machine_vision", "supplementary_material": "/attachment/8a36a16ebd0eb72077103bda9d69c15ee06edf2c.zip", "author": "Zhechao Wang;Peirui Cheng;Minxing Chen;Pengju Tian;Zhirui Wang;Xinming Li;Xue Yang;Xian Sun", "authorids": "~Zhechao_Wang1;~Peirui_Cheng1;~Minxing_Chen1;~Pengju_Tian1;~Zhirui_Wang4;~Xinming_Li4;~Xue_Yang2;~Xian_Sun2", "gender": "M;M;Non-Binary;M;M;M;M;M", "homepage": "https://github.com/WangzcBruce;;;;;https://ieeexplore.ieee.org/author/37088654174;https://yangxue.site/;https://github.com/trailsV", "dblp": "231/3582;;;;;;13/1779-5;", "google_scholar": "JT3X9mIAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=en;RCTCAVQAAAAJ;;2xTlvV0AAAAJ;", "orcid": ";0000-0002-4993-6753;;;;;0000-0002-7084-9101;", "linkedin": ";;;;;;;", "or_profile": "~Zhechao_Wang1;~Peirui_Cheng1;~Minxing_Chen1;~Pengju_Tian1;~Zhirui_Wang4;~Xinming_Li4;~Xue_Yang2;~Xian_Sun2", "aff": "University of Chinese Academy of Sciences;AIRCAS;University of Chinese Academy of Sciences;University of Chinese Academy of Sciences;Aerospace Information Research Institute, Chinese Academy of Sciences;University of Chinese Academy of Sciences;Shanghai AI Laboratory;, Chinese Academy of Sciences", "aff_domain": "ucas.ac.cn;aircas.ac.cn;mails.ucas.ac.cn;ucas.edu;aircas.ac.cn;ucas.ac.cn;pjlab.org.cn;ucas.ac.cn", "position": "PhD student;Assistant Professor;MS student;MS student;Associate Professor;Full Professor;Researcher;Full Professor", "bibtex": "@inproceedings{\nwang2024drones,\ntitle={Drones Help Drones: A Collaborative Framework for Multi-Drone Object Trajectory Prediction and Beyond},\nauthor={Zhechao Wang and Peirui Cheng and Minxing Chen and Pengju Tian and Zhirui Wang and Xinming Li and Xue Yang and Xian Sun},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=20QgErW5zH}\n}", "github": "", "reviewers": "nEkT;nhVA;CzWU;rEnT;vjyF", "pdf_size": 29379529, "rating": "4;5;5;6;7", "confidence": "4;3;4;2;5", "soundness": "2;2;3;3;4", "novelty": "2;2;3;2;3", "presentation": "3;3;3;3;3", "wc_summary": "49;79;94;84;62", "wc_strengths": "22;87;97;72;160", "wc_weaknesses": "224;269;289;79;216", "wc_questions": "35;20;240;2;170", "wc_limitations": "61;6;29;3;39", "wc_review": "391;461;749;240;647", "wc_reply_reviewers": "0;59;0;16;65", "wc_reply_authors": "45;42;0;47;53", "reply_reviewers": "0;1;0;1;1", "reply_authors": "2;2;1;2;2", "rating_avg": [ 5.4, 1.0198039027185568 ], "confidence_avg": [ 3.6, 1.019803902718557 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 73.6, 16.0822883943797 ], "wc_strengths_avg": [ 87.6, 44.44592219765499 ], "wc_weaknesses_avg": [ 215.4, 73.44276683241176 ], "wc_questions_avg": [ 93.4, 94.35168254991534 ], "wc_limitations_avg": [ 27.6, 21.536945001554887 ], "wc_review_avg": [ 497.6, 181.42943531852816 ], "wc_reply_reviewers_avg": [ 28.0, 28.432375911977527 ], "wc_reply_authors_avg": [ 37.4, 19.043108989868227 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.15384615384615383, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17142094284677497346&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "ucas.ac.cn;aircas.ac.cn;mails.ucas.ac.cn;ucas.edu;aircas.ac.cn;ucas.ac.cn;pjlab.org.cn;ucas.ac.cn", "author_num": 8, "aff_unique_index": "0;1;0;0;2;0;3;2", "aff_unique_norm": "University of Chinese Academy of Sciences;Aerospace Information Research Institute, Chinese Academy of Sciences;Chinese Academy of Sciences;Shanghai AI Laboratory", "aff_unique_dep": ";;Aerospace Information Research Institute;", "aff_unique_url": "http://www.ucas.ac.cn;http://www.aircas.ac.cn;http://www.cas.ac.cn;https://www.shanghai-ai-lab.com", "aff_unique_abbr": "UCAS;AIRCAS;CAS;SAIL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "G3: An Effective and Adaptive Framework for Worldwide Geolocalization Using Large Multi-Modality Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96835", "id": "21tn63ee15", "proceeding": "", "pdf": "https://openreview.net/pdf?id=21tn63ee15", "openreview": "https://openreview.net/forum?id=21tn63ee15", "poster": "", "project": "", "author_site": "Pengyue Jia, Yiding Liu, Xiaopeng Li, Xiangyu Zhao, Yuhao Wang, Yantong Du, Xiao Han, Xuetao Wei, Shuaiqiang Wang, Dawei Yin", "tldr": "", "abstract": "Worldwide geolocalization aims to locate the precise location at the coordinate level of photos taken anywhere on the Earth. It is very challenging due to 1) the difficulty of capturing subtle location-aware visual semantics, and 2) the heterogeneous geographical distribution of image data. As a result, existing studies have clear limitations when scaled to a worldwide context. They may easily confuse distant images with similar visual contents, or cannot adapt to various locations worldwide with different amounts of relevant data. To resolve these limitations, we propose **G3**, a novel framework based on Retrieval-Augmented Generation (RAG). In particular, G3 consists of three steps, i.e., **G**eo-alignment, **G**eo-diversification, and **G**eo-verification to optimize both retrieval and generation phases of worldwide geolocalization. During Geo-alignment, our solution jointly learns expressive multi-modal representations for images, GPS and textual descriptions, which allows us to capture location-aware semantics for retrieving nearby images for a given query. During Geo-diversification, we leverage a prompt ensembling method that is robust to inconsistent retrieval performance for different image queries. Finally, we combine both retrieved and generated GPS candidates in Geo-verification for location prediction. Experiments on two well-established datasets IM2GPS3k and YFCC4k verify the superiority of G3 compared to other state-of-the-art methods. Our code is available online [https://github.com/Applied-Machine-Learning-Lab/G3](https://github.com/Applied-Machine-Learning-Lab/G3) for reproduction.", "keywords": "Image Geolocalization;Image-to-GPS retrieval;Large Multi-Modal Models;CLIP", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Pengyue Jia;Yiding Liu;Xiaopeng Li;Xiangyu Zhao;Yuhao Wang;Yantong Du;Xiao Han;Xuetao Wei;Shuaiqiang Wang;Dawei Yin", "authorids": "~Pengyue_Jia1;~Yiding_Liu1;~Xiaopeng_Li11;~Xiangyu_Zhao1;~Yuhao_Wang13;~Yantong_Du1;~Xiao_Han11;~Xuetao_Wei2;~Shuaiqiang_Wang2;~Dawei_Yin1", "gender": "M;M;M;M;;M;M;M;M;M", "homepage": "https://jia-py.github.io/;https://liuyiding.net;https://xiaopengli1.github.io/;https://zhaoxyai.github.io/;;https://github.com/YantongDU;https://hahahenha.github.io/;https://cse.sustech.edu.cn/faculty/~weixt/;http://wangshuaiqiang.net/;https://www.yindawei.com/", "dblp": "313/9275;;;08/890-1.html;;;01/2095-4;09/5916;16/1524;", "google_scholar": "htpkMJcAAAAJ;c7oiMdIAAAAJ;hCCqgQQAAAAJ;;;;;8fNwEScAAAAJ;https://scholar.google.com.hk/citations?user=8SbYYcIAAAAJ;GuQ9bpAAAAAJ", "orcid": "0000-0003-4712-3676;0000-0001-6857-261X;0009-0008-6162-8500;0000-0003-2926-4416;;0009-0009-8042-6303;0000-0002-3478-964X;0000-0002-4450-2251;0000-0002-9212-1947;0000-0002-0684-6205", "linkedin": ";;;;;;;;;dwyin/", "or_profile": "~Pengyue_Jia1;~Yiding_Liu1;~Xiaopeng_Li11;~Xiangyu_Zhao1;~Yuhao_Wang13;~Yantong_Du1;~Xiao_Han11;~Xuetao_Wei2;~Shuaiqiang_Wang2;~Dawei_Yin1", "aff": "City University of Hong Kong;Baidu;City University of Hong Kong;City University of Hong Kong;;Harbin Engineering University;City University of Hong Kong;Southern University of Science and Technology;Baidu Inc.;Baidu", "aff_domain": "cityu.edu.hk;baidu.com;cityu.edu.hk;cityu.edu.hk;;hrbeu.edu.cn;cityu.edu.hk;sustech.edu.cn;baidu.com;baidu.com", "position": "PhD student;Researcher;PhD student;Assistant Professor;;PhD student;PhD student;Associate Professor;Principal Researcher;Principal Researcher", "bibtex": "@inproceedings{\njia2024g,\ntitle={G3: An Effective and Adaptive Framework for Worldwide Geolocalization Using Large Multi-Modality Models},\nauthor={Pengyue Jia and Yiding Liu and Xiaopeng Li and Xiangyu Zhao and Yuhao Wang and Yantong Du and Xiao Han and Xuetao Wei and Shuaiqiang Wang and Dawei Yin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=21tn63ee15}\n}", "github": "", "reviewers": "YB9n;uTH3;CzZb;tzbF;TChJ", "pdf_size": 7924831, "rating": "5;5;6;6;6", "confidence": "5;4;5;4;4", "soundness": "3;3;4;2;3", "novelty": "3;3;3;2;4", "presentation": "3;3;3;3;3", "wc_summary": "20;56;100;62;93", "wc_strengths": "29;42;83;79;45", "wc_weaknesses": "22;82;267;190;97", "wc_questions": "178;50;57;104;3", "wc_limitations": "13;1;8;44;7", "wc_review": "262;231;515;479;245", "wc_reply_reviewers": "0;0;40;25;17", "wc_reply_authors": "79;97;247;19;24", "reply_reviewers": "0;0;2;1;1", "reply_authors": "2;2;4;2;2", "rating_avg": [ 5.6, 0.48989794855663565 ], "confidence_avg": [ 4.4, 0.48989794855663565 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 66.2, 28.694250295137525 ], "wc_strengths_avg": [ 55.6, 21.462525480474103 ], "wc_weaknesses_avg": [ 131.6, 86.50225430588499 ], "wc_questions_avg": [ 78.4, 59.20337828198657 ], "wc_limitations_avg": [ 14.6, 15.186836405255702 ], "wc_review_avg": [ 346.4, 123.87994187922433 ], "wc_reply_reviewers_avg": [ 16.4, 15.291827882892221 ], "wc_reply_authors_avg": [ 93.2, 82.66172028212334 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 2.4, 0.8 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.16666666666666663, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1430130664047889734&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "cityu.edu.hk;baidu.com;cityu.edu.hk;cityu.edu.hk;;hrbeu.edu.cn;cityu.edu.hk;sustech.edu.cn;baidu.com;baidu.com", "author_num": 10, "aff_unique_index": "0;1;0;0;2;0;3;1;1", "aff_unique_norm": "City University of Hong Kong;Baidu;Harbin Engineering University;Southern University of Science and Technology", "aff_unique_dep": ";Baidu, Inc.;;", "aff_unique_url": "https://www.cityu.edu.hk;https://www.baidu.com;http://www.heu.edu.cn;https://www.sustech.edu.cn", "aff_unique_abbr": "CityU;Baidu;HEU;SUSTech", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "GREATS: Online Selection of High-Quality Data for LLM Training in Every Iteration", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96834", "id": "232VcN8tSx", "proceeding": "", "pdf": "https://openreview.net/pdf?id=232VcN8tSx", "openreview": "https://openreview.net/forum?id=232VcN8tSx", "poster": "", "project": "", "author_site": "Jiachen (Tianhao) Wang, Tong Wu, Dawn Song, Prateek Mittal, Ruoxi Jia", "tldr": "", "abstract": "Online batch selection methods offer an adaptive alternative to static training data selection by dynamically selecting data batches during training. However, existing methods either rely on impractical reference models or simple heuristics that may not capture true data informativeness. To address these limitations, we propose \\emph{GREedy Approximation Taylor Selection} (GREATS), a principled and efficient online batch selection method that applies greedy algorithm to optimize the data batch quality approximated by Taylor expansion. We develop a series of techniques to scale GREATS to large-scale model training. Extensive experiments with large language models (LLMs) demonstrate that GREATS significantly improves training convergence speed and generalization performance.", "keywords": "Online Batch Selection;Large Language Model", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Jiachen T. Wang;Tong Wu;Dawn Song;Prateek Mittal;Ruoxi Jia", "authorids": "~Jiachen_T._Wang1;~Tong_Wu1;~Dawn_Song1;~Prateek_Mittal1;~Ruoxi_Jia1", "gender": "M;F;;;M", "homepage": "https://tongwu2020.github.io/tongwu/;;http://www.princeton.edu/~pmittal/;https://ruoxijia.info/;https://tianhaowang.netlify.app/", "dblp": ";s/DXSong;;147/5355-1;274/2144", "google_scholar": "dt0eV8CPx3AC;;https://scholar.google.com.tw/citations?user=xTKD8J4AAAAJ;JCrug-YAAAAJ;nvQOtgkAAAAJ", "orcid": ";;0000-0002-4057-0118;;", "linkedin": "tongwu98/;;;;tian-hao-wang/", "or_profile": "~Tong_Wu1;~Dawn_Song1;~Prateek_Mittal1;~Ruoxi_Jia1;~Tianhao_Wang2", "aff": "Princeton University;University of California, Berkeley;Princeton University;Virginia Tech;Princeton University", "aff_domain": "princeton.edu;berkeley.edu;princeton.edu;vt.edu;princeton.edu", "position": "PhD student;Full Professor;Full Professor;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nwang2024greats,\ntitle={{GREATS}: Online Selection of High-Quality Data for {LLM} Training in Every Iteration},\nauthor={Jiachen T. Wang and Tong Wu and Dawn Song and Prateek Mittal and Ruoxi Jia},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=232VcN8tSx}\n}", "github": "", "reviewers": "2jYA;tC7z;SyBn;oEUR;vNzN", "pdf_size": 5269069, "rating": "5;6;7;7;7", "confidence": "4;4;4;3;3", "soundness": "3;3;3;3;3", "novelty": "3;2;2;3;3", "presentation": "3;3;3;3;2", "wc_summary": "107;85;39;39;239", "wc_strengths": "109;70;121;48;228", "wc_weaknesses": "278;63;312;143;85", "wc_questions": "4;23;44;1;252", "wc_limitations": "1;10;24;9;40", "wc_review": "499;251;540;240;844", "wc_reply_reviewers": "0;23;0;30;259", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;1;0;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.4, 0.7999999999999999 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 101.8, 73.5129920490249 ], "wc_strengths_avg": [ 115.2, 62.217039466692725 ], "wc_weaknesses_avg": [ 176.2, 101.03345980416587 ], "wc_questions_avg": [ 64.8, 94.85863165785177 ], "wc_limitations_avg": [ 16.8, 13.760813929415658 ], "wc_review_avg": [ 474.8, 221.9742327388474 ], "wc_reply_reviewers_avg": [ 62.4, 99.0365589062948 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.6123724356957947, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7484143318881157576&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "princeton.edu;berkeley.edu;princeton.edu;vt.edu;princeton.edu", "author_num": 5, "aff_unique_index": "0;1;0;2;0", "aff_unique_norm": "Princeton University;University of California, Berkeley;Virginia Tech", "aff_unique_dep": ";;", "aff_unique_url": "https://www.princeton.edu;https://www.berkeley.edu;https://www.vt.edu", "aff_unique_abbr": "Princeton;UC Berkeley;VT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "You Only Cache Once: Decoder-Decoder Architectures for Language Models", "status": "Oral", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96833", "id": "25Ioxw576r", "proceeding": "", "pdf": "https://openreview.net/pdf?id=25Ioxw576r", "openreview": "https://openreview.net/forum?id=25Ioxw576r", "poster": "/media/PosterPDFs/NeurIPS%202024/96833.png?t=1730381372.9118989", "project": "", "author_site": "Yutao Sun, Li Dong, Yi Zhu, Shaohan Huang, Wenhui Wang, Shuming Ma, Quanlu Zhang, Jianyong Wang, Furu Wei", "tldr": "", "abstract": "We introduce a decoder-decoder architecture, YOCO, for large language models, which only caches key-value pairs once. It consists of two components, i.e., a cross-decoder stacked upon a self-decoder. The self-decoder efficiently encodes global key-value (KV) caches that are reused by the cross-decoder via cross-attention. The overall model behaves like a decoder-only Transformer, although YOCO only caches once. The design substantially reduces GPU memory demands, yet retains global attention capability. Additionally, the computation flow enables prefilling to early exit without changing the final output, thereby significantly speeding up the prefill stage. Experimental results demonstrate that YOCO achieves favorable performance compared to Transformer in various settings of scaling up model size and number of training tokens. We also extend YOCO to 1M context length with near-perfect needle retrieval accuracy. The profiling results show that YOCO improves inference memory, prefill latency, and throughput by orders of magnitude across context lengths and model sizes.", "keywords": "Decoder-Decoder;Model Architecture", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/c4d30f7d699d7344cdd37c33167214cf5faa15e0.zip", "author": "Yutao Sun;Li Dong;Yi Zhu;Shaohan Huang;Wenhui Wang;Shuming Ma;Quanlu Zhang;Jianyong Wang;Furu Wei", "authorids": "~Yutao_Sun1;~Li_Dong1;~Yi_Zhu8;~Shaohan_Huang1;~Wenhui_Wang1;~Shuming_Ma1;~Quanlu_Zhang1;~Jianyong_Wang2;~Furu_Wei1", "gender": "M;M;M;M;M;;;M;M", "homepage": "https://sunyt32.github.io/;http://dong.li;https://www.microsoft.com/en-us/research/people/yizhu1/;;;https://www.microsoft.com/en-us/research/people/shumma/;https://www.microsoft.com/en-us/research/people/quzha/;http://dbgroup.cs.tsinghua.edu.cn/wangjy/;https://www.microsoft.com/en-us/research/people/fuwei/", "dblp": "01/9758;85/5090-4;;176/0380;37/2855;;165/8284;24/2006;72/5870", "google_scholar": "apGDooYAAAAJ;wEfQgPgAAAAJ;;;BxmpMVUAAAAJ;;;VfBaiG8AAAAJ;G-V1VpwAAAAJ", "orcid": ";;;;;;;0000-0002-7555-170X;", "linkedin": ";;;;;;;;", "or_profile": "~Yutao_Sun1;~Li_Dong1;~Yi_Zhu8;~Shaohan_Huang1;~Wenhui_Wang1;~Shuming_Ma1;~Quanlu_Zhang1;~Jianyong_Wang2;~Furu_Wei1", "aff": "Tsinghua University;Microsoft Research;Microsoft Research;Microsoft;Microsoft;Microsoft;Microsoft;Tsinghua University;Microsoft Research", "aff_domain": "tsinghua.edu.cn;microsoft.com;research.microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com;tsinghua.edu.cn;microsoft.com", "position": "PhD student;Principal Researcher;Researcher;Researcher;Researcher;Researcher;Principal Researcher;Full Professor;Distinguished Scientist", "bibtex": "@inproceedings{\nsun2024you,\ntitle={You Only Cache Once: Decoder-Decoder Architectures for Language Models},\nauthor={Yutao Sun and Li Dong and Yi Zhu and Shaohan Huang and Wenhui Wang and Shuming Ma and Quanlu Zhang and Jianyong Wang and Furu Wei},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=25Ioxw576r}\n}", "github": "", "reviewers": "qC7V;hQmr;fhCA;CYFA", "pdf_size": 786906, "rating": "6;7;7;8", "confidence": "4;5;4;4", "soundness": "3;3;3;4", "novelty": "3;3;4;4", "presentation": "3;3;3;4", "wc_summary": "63;132;80;97", "wc_strengths": "42;66;81;158", "wc_weaknesses": "171;89;58;101", "wc_questions": "4;81;51;40", "wc_limitations": "4;1;8;6", "wc_review": "284;369;278;402", "wc_reply_reviewers": "29;15;12;12", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 93.0, 25.524498036200438 ], "wc_strengths_avg": [ 86.75, 43.42450345139251 ], "wc_weaknesses_avg": [ 104.75, 41.34232093146199 ], "wc_questions_avg": [ 44.0, 27.54087870784082 ], "wc_limitations_avg": [ 4.75, 2.5860201081971503 ], "wc_review_avg": [ 333.25, 53.5787971122906 ], "wc_reply_reviewers_avg": [ 17.0, 7.035623639735144 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 53, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15635724263274454167&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "tsinghua.edu.cn;microsoft.com;research.microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com;tsinghua.edu.cn;microsoft.com", "author_num": 9, "aff_unique_index": "0;1;1;1;1;1;1;0;1", "aff_unique_norm": "Tsinghua University;Microsoft", "aff_unique_dep": ";Microsoft Research", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "THU;MSR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1;1;1;0;1", "aff_country_unique": "China;United States" }, { "title": "Temporal Graph Neural Tangent Kernel with Graphon-Guaranteed", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96832", "id": "266nH7kLSV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=266nH7kLSV", "openreview": "https://openreview.net/forum?id=266nH7kLSV", "poster": "", "project": "", "author_site": "Katherine Tieu, Dongqi Fu, Yada Zhu, Hendrik Hamann, Jingrui He", "tldr": "", "abstract": "_Graph Neural Tangent Kernel_ (GNTK) fuses graph neural networks and graph kernels, simplifies the process of graph representation learning, interprets the training dynamics of graph neural networks, and serves various applications like protein identification, image segmentation, and social network analysis. In practice, graph data carries complex information among entities that inevitably evolves over time, and previous static graph neural tangent kernel methods may be stuck in the sub-optimal solution in terms of both effectiveness and efficiency. As a result, extending the advantage of GNTK to temporal graphs becomes a critical problem. To this end, we propose the temporal graph neural tangent kernel, which not only extends the simplicity and interpretation ability of GNTK to the temporal setting but also leads to rigorous temporal graph classification error bounds. Furthermore, we prove that when the input temporal graph grows over time in the number of nodes, our temporal graph neural tangent kernel will converge in the limit to the _graphon_ NTK value, which implies the transferability and robustness of the proposed kernel method, named **Temp**oral **G**raph **N**eural **T**angent **K**ernel with **G**raphon-**G**uaranteed or **Temp-G$^3$NTK**. In addition to the theoretical analysis, we also perform extensive experiments, not only demonstrating the superiority of Temp-G$^3$NTK in the temporal graph classification task, but also showing that Temp-G^3NTK can achieve very competitive performance in node-level tasks like node classification compared with various SOTA graph kernel and representation learning baselines. Our code is available at https://github.com/kthrn22/TempGNTK.", "keywords": "Graph Kernel;Temporal Graph;Neural Tangent Kernel", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Katherine Tieu;Dongqi Fu;Yada Zhu;Hendrik Hamann;Jingrui He", "authorids": "~Katherine_Tieu1;~Dongqi_Fu1;~Yada_Zhu1;~Hendrik_Hamann1;~Jingrui_He1", "gender": ";M;;M;F", "homepage": "https://github.com/kthrn22;https://dongqifu.github.io/;https://researcher.watson.ibm.com/researcher/view.php?person=us-yzhu;;https://www.hejingrui.org", "dblp": "396/7084;273/0228;56/8808;;34/2685", "google_scholar": "https://scholar.google.com/citations?hl=en;WByXZAcAAAAJ;AJb408gAAAAJ;https://scholar.google.com/citations?hl=en;hXpZynkAAAAJ", "orcid": ";0000-0002-8726-9234;0000-0002-3338-6371;0000-0001-9049-1330;0000-0002-6429-6272", "linkedin": ";;yadazhu/;hendrik-hamann-68175610/;", "or_profile": "~Katherine_Tieu1;~Dongqi_Fu1;~Yada_Zhu1;~Hendrik_Hamann1;~Jingrui_He1", "aff": ";University of Illinois, Urbana Champaign;IBM Research;International Business Machines;University of Illinois, Urbana Champaign", "aff_domain": ";illinois.edu;us.ibm.com;ibm.com;illinois.edu", "position": ";PhD student;Principal Research Scientist;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\ntieu2024temporal,\ntitle={Temporal Graph Neural Tangent Kernel with Graphon-Guaranteed},\nauthor={Katherine Tieu and Dongqi Fu and Yada Zhu and Hendrik Hamann and Jingrui He},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=266nH7kLSV}\n}", "github": "", "reviewers": "YEfB;mhZJ;yH9B;cL5q", "pdf_size": 838268, "rating": "5;5;7;7", "confidence": "4;3;4;4", "soundness": "2;3;3;4", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "43;54;43;89", "wc_strengths": "54;35;59;92", "wc_weaknesses": "51;3;38;53", "wc_questions": "327;220;27;55", "wc_limitations": "8;1;1;43", "wc_review": "483;313;168;332", "wc_reply_reviewers": "98;0;27;32", "wc_reply_authors": "1956;663;29;35", "reply_reviewers": "3;0;1;1", "reply_authors": "6;3;2;2", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 57.25, 18.872930350107268 ], "wc_strengths_avg": [ 60.0, 20.530465167647808 ], "wc_weaknesses_avg": [ 36.25, 20.04214309898021 ], "wc_questions_avg": [ 157.25, 122.65067264389543 ], "wc_limitations_avg": [ 13.25, 17.41228014936585 ], "wc_review_avg": [ 324.0, 111.58180855318666 ], "wc_reply_reviewers_avg": [ 39.25, 36.037307058102996 ], "wc_reply_authors_avg": [ 670.75, 785.4853197227814 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 3.25, 1.6393596310755 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10789641372993802562&as_sdt=5,24&sciodt=0,24&hl=en", "gs_version_total": 2, "email": ";illinois.edu;us.ibm.com;ibm.com;illinois.edu", "author_num": 5, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of Illinois Urbana-Champaign;IBM;International Business Machines Corporation", "aff_unique_dep": ";IBM Research;", "aff_unique_url": "https://illinois.edu;https://www.ibm.com/research;https://www.ibm.com", "aff_unique_abbr": "UIUC;IBM;IBM", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Urbana-Champaign;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "TFGDA: Exploring Topology and Feature Alignment in Semi-supervised Graph Domain Adaptation through Robust Clustering", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96831", "id": "26BdXIY3ik", "proceeding": "", "pdf": "https://openreview.net/pdf?id=26BdXIY3ik", "openreview": "https://openreview.net/forum?id=26BdXIY3ik", "poster": "/media/PosterPDFs/NeurIPS%202024/96831.png?t=1731310918.359276", "project": "", "author_site": "Jun Dan, Weiming Liu, Xie, Hua Yu, Shunjie Dong, Yanchao Tan", "tldr": "", "abstract": "Semi-supervised graph domain adaptation, as a branch of graph transfer learning, aims to annotate unlabeled target graph nodes by utilizing transferable knowledge learned from a label-scarce source graph. However, most existing studies primarily concentrate on aligning feature distributions directly to extract domain-invariant features, while ignoring the utilization of the intrinsic structure information in graphs. Inspired by the significance of data structure information in enhancing models' generalization performance, this paper aims to investigate how to leverage the structure information to assist graph transfer learning. To this end, we propose an innovative framework called TFGDA. Specially, TFGDA employs a structure alignment strategy named STSA to encode graphs' topological structure information into the latent space, greatly facilitating the learning of transferable features. To achieve a stable alignment of feature distributions, we also introduce a SDA strategy to mitigate domain discrepancy on the sphere. Moreover, to address the overfitting issue caused by label scarcity, a simple but effective RNC strategy is devised to guide the discriminative clustering of unlabeled nodes. Experiments on various benchmarks demonstrate the superiority of TFGDA over SOTA methods.", "keywords": "Graph Transfer Learning;Graph Domain Adaptatipn;Graphs Semi-supervised Learning;Graph Node Classification", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Jun Dan;Weiming Liu;Chunfeng Xie;Hua Yu;Shunjie Dong;Yanchao Tan", "authorids": "~Jun_Dan1;~Weiming_Liu2;~Chunfeng_Xie2;~Hua_Yu2;~Shunjie_Dong1;~Yanchao_Tan1", "gender": "M;;;F;M;F", "homepage": ";;;https://www.researchgate.net/profile/Hua-Yu-25/research;;", "dblp": "156/9683;;;02/2407-6;;210/4829.html", "google_scholar": "https://scholar.google.com.hk/citations?user=NB9Mn5MAAAAJ;;;https://scholar.google.com.hk/citations?user=DVQ_F3IAAAAJ;7Ra0UsQAAAAJ;NQWuK9UAAAAJ", "orcid": "0000-0001-7945-3608;;;;0000-0001-5601-5912;0000-0002-3526-6859", "linkedin": ";;;;;", "or_profile": "~Jun_Dan1;~Weiming_Liu2;~Chunfeng_Xie2;~Hua_Yu2;~Shunjie_Dong1;~Yanchao_Tan1", "aff": "Zhejiang University;;;Dalian University of Technology;Shanghai Jiaotong University;Fuzhou University", "aff_domain": "zju.edu.cn;;;dlut.edu.cn;sjtu.edu.cn;fzu.edu.cn", "position": "PhD student;;;PhD student;Assistant Professor;Lecturer", "bibtex": "@inproceedings{\ndan2024tfgda,\ntitle={{TFGDA}: Exploring Topology and Feature Alignment in Semi-supervised Graph Domain Adaptation through Robust Clustering},\nauthor={Jun Dan and Weiming Liu and Chunfeng Xie and Hua Yu and Shunjie Dong and Yanchao Tan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=26BdXIY3ik}\n}", "github": "", "reviewers": "aMJG;9hdx;R5oT;byDA", "pdf_size": 27876588, "rating": "4;6;7;7", "confidence": "5;5;5;5", "soundness": "2;3;3;3", "novelty": "3;3;3;3", "presentation": "2;3;3;3", "wc_summary": "79;52;126;132", "wc_strengths": "59;57;140;163", "wc_weaknesses": "96;67;283;155", "wc_questions": "4;2;2;167", "wc_limitations": "4;15;17;29", "wc_review": "242;193;568;646", "wc_reply_reviewers": "0;18;0;46", "wc_reply_authors": "84;110;41;112", "reply_reviewers": "0;1;0;1", "reply_authors": "2;3;2;3", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 5.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 97.25, 33.22179254645962 ], "wc_strengths_avg": [ 104.75, 47.457217575412066 ], "wc_weaknesses_avg": [ 150.25, 82.94388163089548 ], "wc_questions_avg": [ 43.75, 71.16310490696706 ], "wc_limitations_avg": [ 16.25, 8.870597499605086 ], "wc_review_avg": [ 412.25, 197.45426685691044 ], "wc_reply_reviewers_avg": [ 16.0, 18.81488772222678 ], "wc_reply_authors_avg": [ 86.75, 28.630185119904482 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2547518144258329093&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "zju.edu.cn;;;dlut.edu.cn;sjtu.edu.cn;fzu.edu.cn", "author_num": 6, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Zhejiang University;Dalian University of Technology;Shanghai Jiao Tong University;Fuzhou University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.zju.edu.cn;http://www.dlut.edu.cn/;https://www.sjtu.edu.cn;https://www.fznu.edu.cn", "aff_unique_abbr": "ZJU;DUT;SJTU;FZU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "EvolveDirector: Approaching Advanced Text-to-Image Generation with Large Vision-Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96830", "id": "28bFUt6rUY", "proceeding": "", "pdf": "https://openreview.net/pdf?id=28bFUt6rUY", "openreview": "https://openreview.net/forum?id=28bFUt6rUY", "poster": "/media/PosterPDFs/NeurIPS%202024/96830.png?t=1731129614.1385515", "project": "", "author_site": "Rui Zhao, Hangjie Yuan, Yujie Wei, Shiwei Zhang, Yuchao Gu, Lingmin Ran, Xiang Wang, Jay Zhangjie Wu, David Junhao Zhang, Yingya Zhang, Mike Zheng Shou", "tldr": "", "abstract": "Recent advancements in generation models have showcased remarkable capabilities in generating fantastic content. However, most of them are trained on proprietary high-quality data, and some models withhold their parameters and only provide accessible application programming interfaces (APIs), limiting their benefits for downstream tasks. To explore the feasibility of training a text-to-image generation model comparable to advanced models using publicly available resources, we introduce EvolveDirector. This framework interacts with advanced models through their public APIs to obtain text-image data pairs to train a base model. Our experiments with extensive data indicate that the model trained on generated data of the advanced model can approximate its generation capability. However, it requires large-scale samples of 10 million or more. This incurs significant expenses in time, computational resources, and especially the costs associated with calling fee-based APIs. To address this problem, we leverage pre-trained large vision-language models (VLMs) to guide the evolution of the base model. VLM continuously evaluates the base model during training and dynamically updates and refines the training dataset by the discrimination, expansion, deletion, and mutation operations. Experimental results show that this paradigm significantly reduces the required data volume. Furthermore, when approaching multiple advanced models, EvolveDirector can select the best samples generated by them to learn powerful and balanced abilities. The final trained model Edgen is demonstrated to outperform these advanced models. The code and model weights are available at https://github.com/showlab/EvolveDirector.", "keywords": "Text-to-Image Generation;Large Vision-Language Models;Diffusion Models", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Rui Zhao;Hangjie Yuan;Yujie Wei;Shiwei Zhang;Yuchao Gu;Lingmin Ran;Xiang Wang;Jay Zhangjie Wu;David Junhao Zhang;Yingya Zhang;Mike Zheng Shou", "authorids": "~Rui_Zhao12;~Hangjie_Yuan1;~Yujie_Wei1;~Shiwei_Zhang2;~Yuchao_Gu1;~Lingmin_Ran1;~Xiang_Wang9;~Jay_Zhangjie_Wu1;~David_Junhao_Zhang1;~Yingya_Zhang3;~Mike_Zheng_Shou1", "gender": "M;M;M;M;M;M;M;M;M;M;", "homepage": ";https://jacobyuan7.github.io/;https://weilllllls.github.io/;https://www.researchgate.net/profile/Shiwei_Zhang7/research;https://ycgu.site/;https://www.linkedin.com/me?trk=p_mwlite_my_network-secondary_n;;https://zhangjiewu.github.io/;;https://scholar.google.com/citations?user=6dCcnNEAAAAJ&hl=en;http://www.columbia.edu/~zs2262/", "dblp": "26/2578-19;293/9956;51/9346-1;;266/4395;339/3288;;322/0749;142/2510;307/3295;284/0807", "google_scholar": "https://scholar.google.com.hk/citations?user=wYs7vogAAAAJ;jQ3bFDMAAAAJ;grn93WcAAAAJ;ZO3OQ-8AAAAJ;YpfrXyQAAAAJ;;cQbXvkcAAAAJ;WVp4yjoAAAAJ;16RDSEUAAAAJ;6dCcnNEAAAAJ;h1-3lSoAAAAJ", "orcid": "0000-0003-4271-0206;;;0000-0002-6929-5295;;;0000-0003-0785-3367;;;;", "linkedin": ";;;;;https://www.linkedin.com/me?trk=p_mwlite_my_network-secondary_n;;;;;", "or_profile": "~Rui_Zhao12;~Hangjie_Yuan1;~Yujie_Wei1;~Shiwei_Zhang2;~Yuchao_Gu1;~Lingmin_Ran1;~Xiang_Wang9;~Jay_Zhangjie_Wu1;~Yingya_Zhang3;~Junhao_Zhang1;~Zheng_Shou1", "aff": "Alibaba Group;Zhejiang University;National University of Singapore;Alibaba Group;National University of Singapore;National University of Singapore;Huazhong University of Science and Technology;National University of Singapore;Alibaba Group;National University of Singapore;National University of Singapore", "aff_domain": "alibaba-inc.com;zju.edu.cn;u.nus.edu;alibaba-inc.com;u.nus.edu;u.nus.edu;hust.edu.cn;u.nus.edu;alibaba-inc.com;nus.edu;nus.edu.sg", "position": "Intern;PhD student;Intern;Researcher;PhD student;PhD student;PhD student;PhD student;Researcher;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nzhao2024evolvedirector,\ntitle={EvolveDirector: Approaching Advanced Text-to-Image Generation with Large Vision-Language Models},\nauthor={Rui Zhao and Hangjie Yuan and Yujie Wei and Shiwei Zhang and Yuchao Gu and Lingmin Ran and Xiang Wang and Jay Zhangjie Wu and David Junhao Zhang and Yingya Zhang and Mike Zheng Shou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=28bFUt6rUY}\n}", "github": "", "reviewers": "9FQn;gzrb;i5a7", "pdf_size": 11861990, "rating": "5;6;8", "confidence": "4;4;5", "soundness": "3;3;4", "novelty": "3;3;4", "presentation": "3;2;4", "wc_summary": "235;199;60", "wc_strengths": "201;133;108", "wc_weaknesses": "213;126;76", "wc_questions": "2;329;7", "wc_limitations": "6;50;1", "wc_review": "657;837;252", "wc_reply_reviewers": "0;90;18", "wc_reply_authors": "0;342;19", "reply_reviewers": "0;1;1", "reply_authors": "1;3;2", "rating_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 164.66666666666666, 75.45565290656198 ], "wc_strengths_avg": [ 147.33333333333334, 39.296593010364425 ], "wc_weaknesses_avg": [ 138.33333333333334, 56.60584971734125 ], "wc_questions_avg": [ 112.66666666666667, 152.98438555036333 ], "wc_limitations_avg": [ 19.0, 22.015146301277824 ], "wc_review_avg": [ 582.0, 244.64259645450136 ], "wc_reply_reviewers_avg": [ 36.0, 38.88444419044716 ], "wc_reply_authors_avg": [ 120.33333333333333, 156.93381478260898 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.944911182523068, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7776721284200873862&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "alibaba-inc.com;zju.edu.cn;u.nus.edu;alibaba-inc.com;u.nus.edu;u.nus.edu;hust.edu.cn;u.nus.edu;alibaba-inc.com;nus.edu;nus.edu.sg", "author_num": 11, "aff_unique_index": "0;1;2;0;2;2;3;2;0;2;2", "aff_unique_norm": "Alibaba Group;Zhejiang University;National University of Singapore;Huazhong University of Science and Technology", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.alibaba.com;https://www.zju.edu.cn;https://www.nus.edu.sg;http://www.hust.edu.cn", "aff_unique_abbr": "Alibaba;ZJU;NUS;HUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;1;1;0;1;0;1;1", "aff_country_unique": "China;Singapore" }, { "title": "Vision Transformer Neural Architecture Search for Out-of-Distribution Generalization: Benchmark and Insights", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96829", "id": "2AIwiIkE0s", "proceeding": "", "pdf": "https://openreview.net/pdf?id=2AIwiIkE0s", "openreview": "https://openreview.net/forum?id=2AIwiIkE0s", "poster": "/media/PosterPDFs/NeurIPS%202024/96829.png?t=1731385222.962471", "project": "", "author_site": "Sy-Tuyen Ho, Tuan Van Vo, Somayeh Ebrahimkhani, Ngai-Man (Man) Cheung", "tldr": "", "abstract": "While Vision Transformer (ViT) have achieved success across various machine learning tasks, deploying them in real-world scenarios faces a critical challenge: generalizing under Out-of-Distribution (OoD) shifts. A crucial research gap remains in understanding how to design ViT architectures \u2013 both manually and automatically \u2013 to excel in OoD generalization. **To address this gap,** we introduce OoD-ViT-NAS, the first systematic benchmark for ViT Neural Architecture Search (NAS) focused on OoD generalization. This comprehensive benchmark includes 3,000 ViT architectures of varying model computational budgets evaluated on common large-scale OoD datasets. With this comprehensive benchmark at hand, we analyze the factors that contribute to the OoD generalization of ViT architecture. Our analysis uncovers several key insights. Firstly, we show that ViT architecture designs have a considerable impact on OoD generalization. Secondly, we observe that In-Distribution (ID) accuracy might not be a very good indicator of OoD accuracy. This underscores the risk that ViT architectures optimized for ID accuracy might not perform well under OoD shifts. Thirdly, we conduct the first study to explore NAS for ViT\u2019s OoD robustness. Specifically, we study 9 Training-free NAS for their OoD generalization performance on our benchmark. We observe that existing Training-free NAS are largely ineffective in predicting OoD accuracy despite their effectiveness at predicting ID accuracy. Moreover, simple proxies like #Param or #Flop surprisingly outperform more complex Training-free NAS in predicting ViTs OoD accuracy. Finally, we study how ViT architectural attributes impact OoD generalization. We discover that increasing embedding dimensions of a ViT architecture generally can improve the OoD generalization. We show that ViT architectures in our benchmark exhibit a wide range of OoD accuracy, with up to 11.85% for some OoD shift, prompting the importance to study ViT architecture design for OoD. We firmly believe that our OoD-ViT-NAS benchmark and our analysis can catalyze and streamline important research on understanding how ViT architecture designs influence OoD generalization. **Our OoD-NAS-ViT benchmark and code are available at [https://hosytuyen.github.io/projects/OoD-ViT-NAS](https://hosytuyen.github.io/projects/OoD-ViT-NAS)**", "keywords": "Vision Transformer;Neural Architecture Search;Out-of-Distribution Generalization", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Sy-Tuyen Ho;Tuan Van Vo;Somayeh Ebrahimkhani;Ngai-man Cheung", "authorids": "~Sy-Tuyen_Ho1;~Tuan_Van_Vo2;~Somayeh_Ebrahimkhani1;~Ngai-man_Cheung1", "gender": "M;F;M;M", "homepage": ";;https://sites.google.com/site/mancheung0407/;https://vovantuan1999a.github.io/tuanvovan/", "dblp": "287/7389;;82/3605;", "google_scholar": "fvuhLpIAAAAJ;https://scholar.google.nl/citations?user=S2OJdecAAAAJ;https://scholar.google.com.sg/citations?hl=en;TsjvwzgAAAAJ", "orcid": "0000-0003-3707-257X;;0000-0003-0135-3791;", "linkedin": ";;;vovantuan1999/", "or_profile": "~Sy-Tuyen_Ho1;~Somayeh_Ebrahimkhani1;~Ngai-man_Cheung1;~Tuan_Vo_Van1", "aff": "Singapore University of Technology and Design;Singapore University of Technology and Design;Singapore University of Technology and Design;Singapore University of Technology and Design", "aff_domain": "sutd.edu.sg;sutd.edu.sg;sutd.edu.sg;sutd.edu.sg", "position": "Researcher;Postdoc;Associate Professor;Researcher", "bibtex": "@inproceedings{\nho2024vision,\ntitle={Vision Transformer Neural Architecture Search for Out-of-Distribution Generalization: Benchmark and Insights},\nauthor={Sy-Tuyen Ho and Tuan Van Vo and Somayeh Ebrahimkhani and Ngai-man Cheung},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=2AIwiIkE0s}\n}", "github": "", "reviewers": "Lm5n;pt1z;HQRy;f7Da", "pdf_size": 35928140, "rating": "6;6;7;8", "confidence": "4;4;3;4", "soundness": "3;3;3;3", "novelty": "3;2;3;4", "presentation": "2;3;3;3", "wc_summary": "54;102;70;58", "wc_strengths": "9;57;135;90", "wc_weaknesses": "159;226;78;226", "wc_questions": "6;34;111;117", "wc_limitations": "5;8;17;155", "wc_review": "233;427;411;646", "wc_reply_reviewers": "242;14;17;101", "wc_reply_authors": "755;283;25;28", "reply_reviewers": "3;1;1;1", "reply_authors": "3;3;2;2", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 71.0, 18.841443681416774 ], "wc_strengths_avg": [ 72.75, 46.05635135353212 ], "wc_weaknesses_avg": [ 172.25, 60.90309926432316 ], "wc_questions_avg": [ 67.0, 48.07806152498247 ], "wc_limitations_avg": [ 46.25, 62.94193752975833 ], "wc_review_avg": [ 429.25, 146.48613415610367 ], "wc_reply_reviewers_avg": [ 93.5, 92.57564474525684 ], "wc_reply_authors_avg": [ 272.75, 297.469641308151 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:QG_4SBeq6ukJ:scholar.google.com/&scioq=Vision+Transformer+Neural+Architecture+Search+for+Out-of-Distribution+Generalization:+Benchmark+and+Insights&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": "sutd.edu.sg;sutd.edu.sg;sutd.edu.sg;sutd.edu.sg", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Singapore University of Technology and Design", "aff_unique_dep": "", "aff_unique_url": "https://www.sutd.edu.sg", "aff_unique_abbr": "SUTD", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Singapore" }, { "id": "2BOb4SvDFr", "title": "Two applications of Min-Max-Jump distance", "track": "main", "status": "Reject", "tldr": "", "abstract": "We explore two applications of Min-Max-Jump distance (MMJ distance): MMJ-based K-means and MMJ-based internal clustering evaluation index. K-means and its variants are possibly the most popular clustering approach. A key drawback of K-means is that it cannot deal with data sets that are not the union of well-separated, spherical clusters. MMJ-based K-means proposed in this paper overcomes this demerit of K-means, so that it can handle irregularly shaped clusters. Evaluation (or \"validation\") of clustering results is fundamental to clustering and thus to machine learning. Popular internal clustering evaluation indices like Silhouette coefficient, Davies\u2013Bouldin index, and Calinski-Harabasz index performs poorly in evaluating irregularly shaped clusters. MMJ-based internal clustering evaluation index uses MMJ distance and Semantic Center of Mass (SCOM) to revise the indices, so that it can evaluate irregularly shaped data. An experiment shows introducing MMJ distance to internal clustering evaluation index, can systematically improve the performance. We also devise two algorithms for calculating MMJ distance.", "keywords": "distance;Silhouette coefficient;Davies\u2013Bouldin index;Calinski-Harabasz index;K-means;clustering;clustering evaluation;minimax path problem", "primary_area": "other", "supplementary_material": "/attachment/173bf56fcbff88f9e6081ddfcd456e67d857e71c.zip", "author": "Gangli Liu", "authorids": "~Gangli_Liu1", "gender": "", "homepage": "https://github.com/mike-liuliu", "dblp": "176/5211", "google_scholar": "", "orcid": "0000-0003-3921-0446", "linkedin": "", "or_profile": "~Gangli_Liu1", "aff": "Tsinghua University", "aff_domain": "tsinghua.edu.cn", "position": "PhD student", "bibtex": "@misc{\nanonymous2024two,\ntitle={Two applications of Min-Max-Jump distance},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=2BOb4SvDFr}\n}", "github": "", "project": "", "reviewers": "VBcM;vcc5;BgWh;p2N6", "site": "https://openreview.net/forum?id=2BOb4SvDFr", "pdf_size": 659486, "rating": "3;4;4;4", "confidence": "4;3;4;5", "soundness": "2;4;3;3", "novelty": "1;3;2;2", "presentation": "1;1;2;2", "wc_summary": "30;401;69;96", "wc_strengths": "14;86;68;23", "wc_weaknesses": "169;321;135;14", "wc_questions": "1;386;23;1", "wc_limitations": "28;2;27;1", "wc_review": "242;1196;322;135", "wc_reply_reviewers": "29;24;0;4", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 3.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 1.5, 0.5 ], "wc_summary_avg": [ 149.0, 147.37197834052444 ], "wc_strengths_avg": [ 47.75, 30.102948360584218 ], "wc_weaknesses_avg": [ 159.75, 109.47916468442752 ], "wc_questions_avg": [ 102.75, 163.78091311260906 ], "wc_limitations_avg": [ 14.5, 13.009611831257688 ], "wc_review_avg": [ 473.75, 422.23593819095976 ], "wc_reply_reviewers_avg": [ 14.25, 12.457427503300993 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:xWS8wM1C_QAJ:scholar.google.com/&scioq=Two+applications+of+Min-Max-Jump+distance&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "id": "2Bef9YxSJc", "title": "Language Models Encode Collaborative Signals in Recommendation", "track": "main", "status": "Reject", "tldr": "", "abstract": "Recent studies empirically indicate that language models (LMs) encode rich world knowledge beyond mere semantics, attracting significant attention across various fields. \nHowever, in the recommendation domain, it remains uncertain whether LMs implicitly encode user preference information. Contrary to the prevailing understanding that LMs and traditional recommender models learn two distinct representation spaces due to a huge gap in language and behavior modeling objectives, this work rethinks such understanding and explores extracting a recommendation space directly from the language representation space.\nSurprisingly, our findings demonstrate that item representations, when linearly mapped from advanced LM representations, yield superior recommendation performance. \nThis outcome suggests a homomorphic relationship between the language representation space and an effective recommendation space, implying that collaborative signals may indeed be encoded within advanced LMs.\nMotivated by these findings, we propose a simple yet effective collaborative filtering (CF) model named AlphaRec, which utilizes language representations of item textual metadata (e.g., titles) instead of traditional ID-based embeddings. \nSpecifically, AlphaRec is comprised of three main components: a multilayer perceptron (MLP), graph convolution, and contrastive learning (CL) loss function, making it extremely easy to implement and train. \nOur empirical results show that AlphaRec outperforms leading ID-based CF models on multiple datasets, marking the first instance of such a recommender with text embeddings achieving this level of performance.\nMoreover, AlphaRec introduces a new text-based CF paradigm with several desirable advantages: being easy to implement, lightweight, rapid convergence, superior zero-shot recommendation abilities in new domains, and being aware of user intention.", "keywords": "Collaborative filtering;Language-representation-based recommendation;Language models;Language model representations", "primary_area": "other", "supplementary_material": "/attachment/73edbac06dc3df634090a1d9e406b39137478758.zip", "author": "Leheng Sheng;An Zhang;Yi Zhang;Yuxin Chen;Xiang Wang;Tat-Seng Chua", "authorids": "~Leheng_Sheng2;~An_Zhang2;~Yi_Zhang93;~Yuxin_Chen9;~Xiang_Wang6;~Tat-Seng_Chua2", "gender": "M;M;M;M;F;M", "homepage": "https://lehengthu.github.io/;https://zy20031230.github.io/;https://scholar.google.com/citations?user=krdkLawAAAAJ&hl=en;https://github.com/xiangwang1223;https://github.com/anzhang314;http://www.comp.nus.edu.sg/~chuats/", "dblp": "359/0347.html;;;31/2864-10;78/5581-3;", "google_scholar": "https://scholar.google.com.hk/citations?user=s8bNbU0AAAAJ;https://scholar.google.com/citations?view_op=list_works;krdkLawAAAAJ;https://scholar.google.com.sg/citations?user=HdhaQB0AAAAJ;https://scholar.google.com.sg/citations?user=BcX7GJcAAAAJ;https://scholar.google.com.tw/citations?user=Z9DWCBEAAAAJ", "orcid": "0000-0002-5764-6596;0009-0003-0764-4669;0009-0003-6715-4637;0000-0002-6148-6329;;0000-0001-6097-7807", "linkedin": ";;;;;", "or_profile": "~Leheng_Sheng2;~Yi_Zhang93;~Yuxin_Chen9;~Xiang_Wang6;~AN_ZHANG1;~Tat-seng_Chua1", "aff": "Tsinghua University;University of Science and Technology of China;National University of Singapore;University of Science and Technology of China;National University of Singapore;National University of Singapore", "aff_domain": "mails.tsinghua.edu.cn;mail.ustc.edu.cn;u.nus.edu;ustc.edu.cn;nus.edu.sg;nus.edu.sg", "position": "MS student;Undergrad student;MS student;Full Professor;Postdoc;Full Professor", "bibtex": "@misc{\nanonymous2024language,\ntitle={Language Models Encode Collaborative Signals in Recommendation},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=2Bef9YxSJc}\n}", "github": "", "project": "", "reviewers": "EzpW;GXUC;NfrM;tJs7", "site": "https://openreview.net/forum?id=2Bef9YxSJc", "pdf_size": 5429659, "rating": "4;4;4;7", "confidence": "4;3;3;5", "soundness": "2;2;2;3", "novelty": "2;2;2;3", "presentation": "3;3;2;3", "wc_summary": "103;66;18;91", "wc_strengths": "75;43;19;20", "wc_weaknesses": "792;166;119;8", "wc_questions": "409;83;7;156", "wc_limitations": "1;1;1;2", "wc_review": "1380;359;164;277", "wc_reply_reviewers": "231;0;0;12", "wc_reply_authors": "978;0;0;17", "reply_reviewers": "2;0;0;1", "reply_authors": "4;1;1;2", "rating_avg": [ 4.75, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 69.5, 32.592176975464525 ], "wc_strengths_avg": [ 39.25, 22.76373211931646 ], "wc_weaknesses_avg": [ 271.25, 306.07954439981773 ], "wc_questions_avg": [ 163.75, 151.07841506979082 ], "wc_limitations_avg": [ 1.25, 0.4330127018922193 ], "wc_review_avg": [ 545.0, 487.0333664134317 ], "wc_reply_reviewers_avg": [ 60.75, 98.41589048522602 ], "wc_reply_authors_avg": [ 248.75, 421.0898805480844 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.8703882797784892, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6232423894855563163&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;2;1;2;2", "aff_unique_norm": "Tsinghua University;University of Science and Technology of China;National University of Singapore", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tsinghua.edu.cn;http://www.ustc.edu.cn;https://www.nus.edu.sg", "aff_unique_abbr": "THU;USTC;NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;1;1", "aff_country_unique": "China;Singapore" }, { "title": "The Star Geometry of Critic-Based Regularizer Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96828", "id": "2GQeCbhxVy", "proceeding": "", "pdf": "https://openreview.net/pdf?id=2GQeCbhxVy", "openreview": "https://openreview.net/forum?id=2GQeCbhxVy", "poster": "/media/PosterPDFs/NeurIPS%202024/96828.png?t=1733891254.8060193", "project": "", "author_site": "Oscar Leong, Eliza O'Reilly, Yong Sheng Soh", "tldr": "", "abstract": "Variational regularization is a classical technique to solve statistical inference tasks and inverse problems, with modern data-driven approaches parameterizing regularizers via deep neural networks showcasing impressive empirical performance. Recent works along these lines learn task-dependent regularizers. This is done by integrating information about the measurements and ground-truth data in an unsupervised, critic-based loss function, where the regularizer attributes low values to likely data and high values to unlikely data. However, there is little theory about the structure of regularizers learned via this process and how it relates to the two data distributions. To make progress on this challenge, we initiate a study of optimizing critic-based loss functions to learn regularizers over a particular family of regularizers: gauges (or Minkowski functionals) of star-shaped bodies. This family contains regularizers that are commonly employed in practice and shares properties with regularizers parameterized by deep neural networks. We specifically investigate critic-based losses derived from variational representations of statistical distances between probability measures. By leveraging tools from star geometry and dual Brunn-Minkowski theory, we illustrate how these losses can be interpreted as dual mixed volumes that depend on the data distribution. This allows us to derive exact expressions for the optimal regularizer in certain cases. Finally, we identify which neural network architectures give rise to such star body gauges and when do such regularizers have favorable properties for optimization. More broadly, this work highlights how the tools of star geometry can aid in understanding the geometry of unsupervised regularizer learning.", "keywords": "inverse problems;regularization;unsupervised learning;convex geometry", "primary_area": "optimization", "supplementary_material": "", "author": "Oscar Leong;Eliza O'Reilly;Yong Sheng Soh", "authorids": "~Oscar_Leong2;~Eliza_O'Reilly1;~Yong_Sheng_Soh1", "gender": "F;;M", "homepage": "https://sites.google.com/view/eliza-oreilly/home;https://yssoh.github.io/;https://www.oscarleong.com/", "dblp": ";123/9574.html;", "google_scholar": "LptnkxkAAAAJ;OPntcXsAAAAJ;LN9O4vkAAAAJ", "orcid": ";0000-0003-3367-1401;", "linkedin": ";;", "or_profile": "~Eliza_O'Reilly1;~Yong_Sheng_Soh1;~Oscar_Francisco_Leong1", "aff": "Johns Hopkins University;National University of Singapore;California Institute of Technology", "aff_domain": "johnshopkins.edu;nus.edu.sg;caltech.edu", "position": "Assistant Professor;Assistant Professor;Postdoc", "bibtex": "@inproceedings{\nleong2024the,\ntitle={The Star Geometry of Critic-Based Regularizer Learning},\nauthor={Oscar Leong and Eliza O'Reilly and Yong Sheng Soh},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=2GQeCbhxVy}\n}", "github": "", "reviewers": "3HXp;XeHQ;5WpE;kSE2", "pdf_size": 1208425, "rating": "5;6;6;8", "confidence": "4;2;3;3", "soundness": "3;3;3;3", "novelty": "1;3;3;4", "presentation": "3;2;3;4", "wc_summary": "64;111;67;75", "wc_strengths": "14;32;41;45", "wc_weaknesses": "37;45;159;125", "wc_questions": "41;83;114;269", "wc_limitations": "3;1;14;3", "wc_review": "159;272;395;517", "wc_reply_reviewers": "17;11;147;39", "wc_reply_authors": "19;76;133;50", "reply_reviewers": "1;1;2;1", "reply_authors": "2;3;3;2", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 1.0897247358851685 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 79.25, 18.766659265836314 ], "wc_strengths_avg": [ 33.0, 11.937336386313323 ], "wc_weaknesses_avg": [ 91.5, 51.9879793798528 ], "wc_questions_avg": [ 126.75, 86.11728920489776 ], "wc_limitations_avg": [ 5.25, 5.11737237261468 ], "wc_review_avg": [ 335.75, 133.8532311899866 ], "wc_reply_reviewers_avg": [ 53.5, 54.97954164959908 ], "wc_reply_authors_avg": [ 69.5, 41.847939017351855 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.3244428422615251, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:mLq8jFaKRuQJ:scholar.google.com/&scioq=The+Star+Geometry+of+Critic-Based+Regularizer+Learning&hl=en&as_sdt=0,44", "gs_version_total": 3, "email": "johnshopkins.edu;nus.edu.sg;caltech.edu", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Johns Hopkins University;National University of Singapore;California Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.jhu.edu;https://www.nus.edu.sg;https://www.caltech.edu", "aff_unique_abbr": "JHU;NUS;Caltech", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pasadena", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;Singapore" }, { "title": "Differentiable Task Graph Learning: Procedural Activity Representation and Online Mistake Detection from Egocentric Videos", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96827", "id": "2HvgvB4aWq", "proceeding": "", "pdf": "https://openreview.net/pdf?id=2HvgvB4aWq", "openreview": "https://openreview.net/forum?id=2HvgvB4aWq", "poster": "/media/PosterPDFs/NeurIPS%202024/96827.png?t=1731323022.6969874", "project": "", "author_site": "Luigi Seminara, Giovanni Maria Farinella, Antonino Furnari", "tldr": "", "abstract": "Procedural activities are sequences of key-steps aimed at achieving specific goals. They are crucial to build intelligent agents able to assist users effectively. In this context, task graphs have emerged as a human-understandable representation of procedural activities, encoding a partial ordering over the key-steps. While previous works generally relied on hand-crafted procedures to extract task graphs from videos, in this paper, we propose an approach based on direct maximum likelihood optimization of edges' weights, which allows gradient-based learning of task graphs and can be naturally plugged into neural network architectures. Experiments on the CaptainCook4D dataset demonstrate the ability of our approach to predict accurate task graphs from the observation of action sequences, with an improvement of +16.7% over previous approaches. Owing to the differentiability of the proposed framework, we also introduce a feature-based approach, aiming to predict task graphs from key-step textual or video embeddings, for which we observe emerging video understanding abilities. Task graphs learned with our approach are also shown to significantly enhance online mistake detection in procedural egocentric videos, achieving notable gains of +19.8% and +7.5% on the Assembly101-O and EPIC-Tent-O datasets. Code for replicating the experiments is available at https://github.com/fpv-iplab/Differentiable-Task-Graph-Learning.", "keywords": "Task Graph;Procedural Sequences;Online Mistake Detection;Video Understanding", "primary_area": "machine_vision", "supplementary_material": "/attachment/9f3ddd588e149e679a03904d05a19903d8018412.zip", "author": "Luigi Seminara;Giovanni Maria Farinella;Antonino Furnari", "authorids": "~Luigi_Seminara1;~Giovanni_Maria_Farinella1;~Antonino_Furnari1", "gender": "M;M;M", "homepage": "https://seminaraluigi.altervista.org/;http://www.dmi.unict.it/farinella;http://antoninofurnari.it/", "dblp": "362/2867;31/6643;147/3952", "google_scholar": "M3m0hIkAAAAJ;YwQboCoAAAAJ;https://scholar.google.it/citations?user=eXREn_EAAAAJ", "orcid": "0009-0004-2242-1225;0000-0002-6034-0432;0000-0001-6911-0302", "linkedin": "luigi-seminara/;;", "or_profile": "~Luigi_Seminara1;~Giovanni_Maria_Farinella1;~Antonino_Furnari1", "aff": "University of Catania;University of Catania;University of Catania", "aff_domain": "unict.it;dmi.unict.it;unict.it", "position": "PhD student;Professor;Assistant Professor", "bibtex": "@inproceedings{\nseminara2024differentiable,\ntitle={Differentiable Task Graph Learning: Procedural Activity Representation and Online Mistake Detection from Egocentric Videos},\nauthor={Luigi Seminara and Giovanni Maria Farinella and Antonino Furnari},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=2HvgvB4aWq}\n}", "github": "", "reviewers": "tC41;PJ9t;pcHQ", "pdf_size": 38837928, "rating": "6;7;8", "confidence": "4;4;4", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "3;3;4", "wc_summary": "245;68;57", "wc_strengths": "86;94;133", "wc_weaknesses": "332;135;65", "wc_questions": "3;6;19", "wc_limitations": "1;21;24", "wc_review": "667;324;298", "wc_reply_reviewers": "268;16;73", "wc_reply_authors": "78;0;0", "reply_reviewers": "1;1;1", "reply_authors": "2;1;1", "rating_avg": [ 7.0, 0.816496580927726 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 123.33333333333333, 86.14845068575264 ], "wc_strengths_avg": [ 104.33333333333333, 20.531818125912658 ], "wc_weaknesses_avg": [ 177.33333333333334, 113.03785010143972 ], "wc_questions_avg": [ 9.333333333333334, 6.944222218666553 ], "wc_limitations_avg": [ 15.333333333333334, 10.208928554075703 ], "wc_review_avg": [ 429.6666666666667, 168.1553514528224 ], "wc_reply_reviewers_avg": [ 119.0, 107.89810007595129 ], "wc_reply_authors_avg": [ 26.0, 36.76955262170047 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2540102401127620457&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 3, "email": "unict.it;dmi.unict.it;unict.it", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Catania", "aff_unique_dep": "", "aff_unique_url": "https://www.unict.it", "aff_unique_abbr": "UNICT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Italy" }, { "id": "2HzZIDo48o", "title": "Meta-Referential Games to Learn Compositional Learning Behaviours", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "Human beings use compositionality to generalise from past experiences to novel experiences, by assuming that past experiences can be separated into fundamental atomic components that can be recombined in novel ways. % to support our ability to engage with novel experiences.\nWe frame this as the ability to learn to generalise compositionally, and refer to behaviours making use of this ability as compositional learning behaviours (CLBs).\nA central problem to learning CLBs is the resolution of a binding problem (BP).\nWhile it is another feat of intelligence that human beings perform with ease, it is not the case for state-of-the-art artificial agents.\nThus, in order to build artificial agents able to collaborate with human beings, we propose to develop a novel benchmark to investigate agents' abilities to exhibit CLBs by solving a domain-agnostic version of the BP. \nWe take inspiration from the language emergence and grounding framework of referential games and propose a meta-learning extension of referential games, entitled Meta-Referential Games, and use this framework to build our benchmark, the Symbolic Behaviour Benchmark (S2B). \nWe provide baseline results and error analysis showing that the S2B is a compelling challenge that we hope will spur the research community towards developing more capable artificial agents.", "keywords": "referential game;language grounding;compositionality;systematicity;few-shot learning;meta-learning;reinforcement learning;language emergence", "primary_area": "", "supplementary_material": "/attachment/989466173a0679a9e8557e85775ac3589f734c72.zip", "author": "Kevin Yandoka Denamganai;Sondess Missaoui;James Alfred Walker", "authorids": "~Kevin_Yandoka_Denamganai1;~Sondess_Missaoui1;~James_Alfred_Walker1", "gender": "M;F;M", "homepage": "https://kevindenamganai.netlify.app/;https://digitalcreativity.ac.uk/people/dr-sondess-missaoui;", "dblp": "249/7680;143/1419.html;35/3889", "google_scholar": "PPdQb4QAAAAJ;K2yUNQIAAAAJ;https://scholar.google.co.uk/citations?user=Yl5OycsAAAAJ", "orcid": "0000-0002-8776-4331;;", "linkedin": ";sondess-missaoui-03583531/;", "or_profile": "~Kevin_Yandoka_Denamganai1;~Sondess_Missaoui1;~James_Alfred_Walker1", "aff": "University of York;University of York;University of York", "aff_domain": "york.ac.uk;york.ac.uk;york.ac.uk", "position": "PhD student;Researcher;Associate Professor", "bibtex": "@misc{\nanonymous2024metareferential,\ntitle={Meta-Referential Games to Learn Compositional Learning Behaviours},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=2HzZIDo48o}\n}", "github": "", "project": "", "reviewers": "SRcF;8j7q;vwTF", "site": "https://openreview.net/forum?id=2HzZIDo48o", "pdf_size": 1570438, "rating": "5;6;7", "confidence": "5;1;2", "wc_summary_and_contributions": "223;68;119", "wc_strengths": "94;30;5", "wc_improvement": "34;5;6", "wc_limitations": "308;9;5", "wc_correctness": "142;13;11", "wc_clarity": "152;6;1", "wc_relation_to_prior_work": "1;13;23", "wc_documentation": "23;21;1", "wc_additional_feedback": "1;1;1", "wc_review": "978;166;172", "wc_reply_reviewers": "0;0;19", "wc_reply_authors": "0;0;56", "reply_reviewers": "0;0;1", "reply_authors": "1;2;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 2.6666666666666665, 1.699673171197595 ], "wc_summary_and_contributions_avg": [ 136.66666666666666, 64.49978466802987 ], "wc_strengths_avg": [ 43.0, 37.47888294315436 ], "wc_improvement_avg": [ 15.0, 13.4412301024373 ], "wc_limitations_avg": [ 107.33333333333333, 141.90215721482963 ], "wc_correctness_avg": [ 55.333333333333336, 61.28802674439945 ], "wc_clarity_avg": [ 53.0, 70.03332540060244 ], "wc_relation_to_prior_work_avg": [ 12.333333333333334, 8.993825042154695 ], "wc_documentation_avg": [ 15.0, 9.93310961716756 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 438.6666666666667, 381.3741236924999 ], "wc_reply_reviewers_avg": [ 6.333333333333333, 8.956685895029603 ], "wc_reply_authors_avg": [ 18.666666666666668, 26.398653164297773 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.720576692122892, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3952989524077152401&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of York", "aff_unique_dep": "", "aff_unique_url": "https://www.york.ac.uk", "aff_unique_abbr": "York", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "The Sample Complexity of Gradient Descent in Stochastic Convex Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96826", "id": "2INcTKPBy4", "proceeding": "", "pdf": "https://openreview.net/pdf?id=2INcTKPBy4", "openreview": "https://openreview.net/forum?id=2INcTKPBy4", "poster": "", "project": "", "tldr": "", "abstract": "We analyze the sample complexity of full-batch Gradient Descent (GD) in the setup of non-smooth Stochastic Convex Optimization. We show that the generalization error of GD, with common choice of hyper-parameters, can be $\\tilde \\Theta(d/m+1/\\sqrt{m})$, where d is the dimension and m is the sample size. This matches the sample complexity of \\emph{worst-case} empirical risk minimizers. That means that, in contrast with other algorithms, GD has no advantage over naive ERMs. Our bound follows from a new generalization bound that depends on both the dimension as well as the learning rate and number of iterations. Our bound also shows that, for general hyper-parameters, when the dimension is strictly larger than number of samples, $T=\\Omega(1/\\epsilon^4)$ iterations are necessary to avoid overfitting. This resolves an open problem by Schlisserman et al.23 and Amir er Al.21, and improves over previous lower bounds that demonstrated that the sample size must be at least square root of the dimension.", "keywords": "Stochastic Convex Optimization;Learning Theory", "primary_area": "learning_theory", "supplementary_material": "", "author": "Roi Livni", "authorids": "~Roi_Livni1", "gender": "Not Specified", "homepage": "https://www.rlivni.sites.tau.ac.il/", "dblp": "59/11348", "google_scholar": "xhU85M4AAAAJ", "orcid": "", "linkedin": "", "or_profile": "~Roi_Livni1", "aff": "Tel Aviv University", "aff_domain": "tau.ac.il", "position": "Assistant Professor", "bibtex": "@inproceedings{\nlivni2024the,\ntitle={The Sample Complexity of Gradient Descent in Stochastic Convex Optimization},\nauthor={Roi Livni},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=2INcTKPBy4}\n}", "github": "", "reviewers": "kB6m;gMtP;kmCP;pMTh", "pdf_size": 729525, "rating": "3;6;7;8", "confidence": "2;2;3;5", "soundness": "1;3;3;4", "novelty": "2;3;3;4", "presentation": "1;2;3;4", "wc_summary": "25;87;131;73", "wc_strengths": "7;72;67;15", "wc_weaknesses": "93;108;58;9", "wc_questions": "20;41;150;54", "wc_limitations": "20;1;1;1", "wc_review": "165;309;407;152", "wc_reply_reviewers": "184;87;38;32", "wc_reply_authors": "0;79;0;0", "reply_reviewers": "1;2;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 6.0, 1.8708286933869707 ], "confidence_avg": [ 3.0, 1.224744871391589 ], "soundness_avg": [ 2.75, 1.0897247358851685 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 79.0, 37.815340802378074 ], "wc_strengths_avg": [ 40.25, 29.439556722206262 ], "wc_weaknesses_avg": [ 67.0, 38.08543028508409 ], "wc_questions_avg": [ 66.25, 49.851654937424094 ], "wc_limitations_avg": [ 5.75, 8.227241335952167 ], "wc_review_avg": [ 258.25, 105.69620381073295 ], "wc_reply_reviewers_avg": [ 85.25, 60.87435831283973 ], "wc_reply_authors_avg": [ 19.75, 34.208003449485325 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.7637626158259732, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8919558285753803933&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "tau.ac.il", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "Tel Aviv University", "aff_unique_dep": "", "aff_unique_url": "https://www.tau.ac.il", "aff_unique_abbr": "TAU", "aff_country_unique_index": "0", "aff_country_unique": "Israel" }, { "title": "Revisiting Adversarial Patches for Designing Camera-Agnostic Attacks against Person Detection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96825", "id": "2Inwtjvyx8", "proceeding": "", "pdf": "https://openreview.net/pdf?id=2Inwtjvyx8", "openreview": "https://openreview.net/forum?id=2Inwtjvyx8", "poster": "/media/PosterPDFs/NeurIPS%202024/96825.png?t=1731135294.7471964", "project": "", "author_site": "Hui Wei, Zhixiang Wang, Kewei Zhang, Jiaqi Hou, Yuanwei Liu, Hao Tang, Zheng Wang", "tldr": "", "abstract": "Physical adversarial attacks can deceive deep neural networks (DNNs), leading to erroneous predictions in real-world scenarios. To uncover potential security risks, attacking the safety-critical task of person detection has garnered significant attention. However, we observe that existing attack methods overlook the pivotal role of the camera, involving capturing real-world scenes and converting them into digital images, in the physical adversarial attack workflow. This oversight leads to instability and challenges in reproducing these attacks. In this work, we revisit patch-based attacks against person detectors and introduce a camera-agnostic physical adversarial attack to mitigate this limitation. Specifically, we construct a differentiable camera Image Signal Processing (ISP) proxy network to compensate for the physical-to-digital transition gap. Furthermore, the camera ISP proxy network serves as a defense module, forming an adversarial optimization framework with the attack module. The attack module optimizes adversarial patches to maximize effectiveness, while the defense module optimizes the conditional parameters of the camera ISP proxy network to minimize attack effectiveness. These modules engage in an adversarial game, enhancing cross-camera stability. Experimental results demonstrate that our proposed Camera-Agnostic Patch (CAP) attack effectively conceals persons from detectors across various imaging hardware, including two distinct cameras and four smartphones.", "keywords": "Physical Adversarial Attack;Person Detection;Adversarial Patch;Camera ISP", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/45b02ff096e9e85c2f619cd7a1281b7782ead5e2.zip", "author": "Hui Wei;Zhixiang Wang;Kewei Zhang;Jiaqi Hou;Yuanwei Liu;Hao Tang;Zheng Wang", "authorids": "~Hui_Wei2;~Zhixiang_Wang1;~Kewei_Zhang2;~Jiaqi_Hou1;~Yuanwei_Liu4;~Hao_Tang6;~Zheng_Wang14", "gender": "M;M;M;F;M;M;", "homepage": "https://weihui1308.github.io/;https://lightchaserx.github.io/;https://xiwen1.github.io;https://hallucinatie.github.io/;https://ha0tang.github.io/;https://wangzwhu.github.io/home/;https://github.com/lywverdant", "dblp": "38/7021-4;134/3733-1.html;;;07/5751-5;w/ZhengWang7;", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;https://scholar.google.co.jp/citations?user=yybzbxMAAAAJ;cFk7BcAAAAAJ;1E7un-cAAAAJ;9zJkeEMAAAAJ;-WHTbpUAAAAJ;", "orcid": ";0000-0002-5016-587X;0009-0008-6038-5279;0009-0004-4339-7906;0000-0002-2077-1246;;", "linkedin": ";;;;hao-tang-887475138/;;", "or_profile": "~Hui_Wei2;~Zhixiang_Wang1;~Kewei_Zhang2;~Jiaqi_Hou1;~Hao_Tang6;~Zheng_Wang14;~YuanWei_Liu3", "aff": "Wuhan University;The University of Tokyo;Tsinghua University;Tsinghua University;Carnegie Mellon University;Wuhan University;Wuhan University", "aff_domain": "whu.edu.cn;u-tokyo.ac.jp;tsinghua.edu.cn;tsinghua.edu.cn;cmu.edu;whu.edu.cn;whu.edu.cn", "position": "PhD student;PhD student;Intern;Intern;Postdoc;Full Professor;Undergrad student", "bibtex": "@inproceedings{\nwei2024revisiting,\ntitle={Revisiting Adversarial Patches for Designing Camera-Agnostic Attacks against Person Detection},\nauthor={Hui Wei and Zhixiang Wang and Kewei Zhang and Jiaqi Hou and Yuanwei Liu and Hao Tang and Zheng Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=2Inwtjvyx8}\n}", "github": "", "reviewers": "xDi9;eDby;ai8f;N8jz", "pdf_size": 2114565, "rating": "4;5;6;7", "confidence": "4;4;4;4", "soundness": "3;3;3;4", "novelty": "2;3;3;4", "presentation": "2;3;3;4", "wc_summary": "127;95;87;105", "wc_strengths": "57;33;94;148", "wc_weaknesses": "161;56;84;68", "wc_questions": "84;2;52;53", "wc_limitations": "25;1;16;124", "wc_review": "454;187;333;498", "wc_reply_reviewers": "0;0;21;45", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 103.5, 14.99166435056495 ], "wc_strengths_avg": [ 83.0, 43.36473221409305 ], "wc_weaknesses_avg": [ 92.25, 40.91683638797115 ], "wc_questions_avg": [ 47.75, 29.38005275693017 ], "wc_limitations_avg": [ 41.5, 48.396797414705034 ], "wc_review_avg": [ 368.0, 120.70832614198575 ], "wc_reply_reviewers_avg": [ 16.5, 18.553975315279473 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=417741226896144926&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "whu.edu.cn;u-tokyo.ac.jp;tsinghua.edu.cn;tsinghua.edu.cn;cmu.edu;whu.edu.cn;whu.edu.cn", "author_num": 7, "aff_unique_index": "0;1;2;2;3;0;0", "aff_unique_norm": "Wuhan University;University of Tokyo;Tsinghua University;Carnegie Mellon University", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.whu.edu.cn/;https://www.u-tokyo.ac.jp;https://www.tsinghua.edu.cn;https://www.cmu.edu", "aff_unique_abbr": "WHU;UTokyo;THU;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;2;0;0", "aff_country_unique": "China;Japan;United States" }, { "title": "Mini-Sequence Transformers: Optimizing Intermediate Memory for Long Sequences Training", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96824", "id": "2KuZHYykkq", "proceeding": "", "pdf": "https://openreview.net/pdf?id=2KuZHYykkq", "openreview": "https://openreview.net/forum?id=2KuZHYykkq", "poster": "/media/PosterPDFs/NeurIPS%202024/96824.png?t=1731177103.771932", "project": "", "author_site": "cheng Luo, Jiawei Zhao, Zhuoming Chen, Beidi Chen, Animashree Anandkumar", "tldr": "", "abstract": "We introduce Mini-Sequence Transformer (MsT), a simple and effective methodology for highly efficient and accurate LLM training with extremely long sequences. MsT partitions input sequences and iteratively processes mini-sequences to reduce intermediate memory usage. Integrated with activation recomputation, it enables significant memory savings in both forward and backward passes. In experiments with the Llama3-8B model, with MsT, we measure no degradation in throughput or convergence even with 12x longer sequences than standard implementations. MsT is fully general, implementation-agnostic, and requires minimal code changes to integrate with existing LLM training frameworks. Integrated with the huggingface library, MsT successfully extends the maximum context length of Qwen, Mistral, and Gemma-2 by 12-24x.", "keywords": "Long-Context;Foundation Models;Systems for ML;LLM Training;GPUs;Memory-efficient Training", "primary_area": "infrastructure", "supplementary_material": "", "author": "Cheng Luo;Jiawei Zhao;Zhuoming Chen;Beidi Chen;Anima Anandkumar", "authorids": "~Cheng_Luo5;~Jiawei_Zhao2;~Zhuoming_Chen1;~Beidi_Chen1;~Anima_Anandkumar1", "gender": "M;M;M;F;F", "homepage": "https://wdlctc.github.io/;https://jiaweizhao.com/;;https://www.andrew.cmu.edu/user/beidic/;http://tensorlab.cms.caltech.edu/users/anima/", "dblp": ";;226/5729;192/1339;", "google_scholar": "jmCDQGoAAAAJ;;4Bb5KRYAAAAJ;;bEcLezcAAAAJ", "orcid": ";;;;", "linkedin": ";;zhuoming-chen-325075234/;;anima-anandkumar-35171b1/", "or_profile": "~Cheng_Luo5;~Jiawei_Zhao2;~Zhuoming_Chen1;~Beidi_Chen1;~anima_anandkumar1", "aff": "California Institute of Technology;California Institute of Technology;Carnegie Mellon University;Meta Facebook;California Institute of Technology", "aff_domain": "caltech.edu;caltech.edu;cmu.edu;fb.com;caltech.edu", "position": "Postdoc;PhD student;PhD student;Researcher;Full Professor", "bibtex": "@inproceedings{\nluo2024minisequence,\ntitle={Mini-Sequence Transformers: Optimizing Intermediate Memory for Long Sequences Training},\nauthor={Cheng Luo and Jiawei Zhao and Zhuoming Chen and Beidi Chen and Anima Anandkumar},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=2KuZHYykkq}\n}", "github": "", "reviewers": "NrY2;iczK;uLG7", "pdf_size": 1745971, "rating": "5;6;7", "confidence": "4;5;4", "soundness": "3;3;4", "novelty": "3;3;3", "presentation": "3;3;1", "wc_summary": "97;133;73", "wc_strengths": "67;199;106", "wc_weaknesses": "54;167;197", "wc_questions": "140;172;126", "wc_limitations": "9;170;8", "wc_review": "367;841;510", "wc_reply_reviewers": "153;16;0", "wc_reply_authors": "478;374;279", "reply_reviewers": "1;1;0", "reply_authors": "3;2;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.3333333333333335, 0.9428090415820634 ], "wc_summary_avg": [ 101.0, 24.657656011875904 ], "wc_strengths_avg": [ 124.0, 55.37147279962851 ], "wc_weaknesses_avg": [ 139.33333333333334, 61.57019480957396 ], "wc_questions_avg": [ 146.0, 19.252705437591537 ], "wc_limitations_avg": [ 62.333333333333336, 76.13292469242346 ], "wc_review_avg": [ 572.6666666666666, 198.5184010502693 ], "wc_reply_reviewers_avg": [ 56.333333333333336, 68.66504852462342 ], "wc_reply_authors_avg": [ 377.0, 81.26910031904295 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:D-v4z_P08MoJ:scholar.google.com/&scioq=Mini-Sequence+Transformers:+Optimizing+Intermediate+Memory+for+Long+Sequences+Training&hl=en&as_sdt=0,48", "gs_version_total": 0, "email": "caltech.edu;caltech.edu;cmu.edu;fb.com;caltech.edu", "author_num": 5, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "California Institute of Technology;Carnegie Mellon University;Meta", "aff_unique_dep": ";;Meta Platforms, Inc.", "aff_unique_url": "https://www.caltech.edu;https://www.cmu.edu;https://meta.com", "aff_unique_abbr": "Caltech;CMU;Meta", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Pasadena;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Not Just Object, But State: Compositional Incremental Learning without Forgetting", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96823", "id": "2LRZhbTDtA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=2LRZhbTDtA", "openreview": "https://openreview.net/forum?id=2LRZhbTDtA", "poster": "/media/PosterPDFs/NeurIPS%202024/96823.png?t=1731248166.1124089", "project": "", "author_site": "Yanyi Zhang, Binglin Qiu, Qi Jia, Yu Liu, Ran He", "tldr": "", "abstract": "Most incremental learners excessively prioritize object classes while neglecting various kinds of states (e.g. color and material) attached to the objects. As a result, they are limited in the ability to model state-object compositionality accurately. To remedy this limitation, we propose a novel task called Compositional Incremental Learning (composition-IL), which enables the model to recognize a variety of state-object compositions in an incremental learning fashion. Since the lack of suitable datasets, we re-organize two existing datasets and make them tailored for composition-IL. Then, we propose a prompt-based Composition Incremental Learner (CompILer), to overcome the ambiguous composition boundary. Specifically, we exploit multi-pool prompt learning, and ensure the inter-pool prompt discrepancy and intra-pool prompt diversity. Besides, we devise object-injected state prompting which injects object prompts to guide the selection of state prompts. Furthermore, we fuse the selected prompts by a generalized-mean strategy, to eliminate irrelevant information learned in the prompts. Extensive experiments on two datasets exhibit state-of-the-art performance achieved by CompILer. Code and datasets are available at: https://github.com/Yanyi-Zhang/CompILer.", "keywords": "Incremental Learning;Compositional Learning;Prompting", "primary_area": "machine_vision", "supplementary_material": "/attachment/f5d19e803fd89e1ee37de510389c1f77bf5e4e31.zip", "author": "Yanyi Zhang;Binglin Qiu;Qi Jia;Yu Liu;Ran He", "authorids": "~Yanyi_Zhang2;~Binglin_Qiu1;~Qi_Jia1;~Yu_Liu12;~Ran_He1", "gender": "M;M;F;M;M", "homepage": "https://yanyi-zhang.github.io;https://github.com/M1andy;;https://liuyudut.github.io/;https://rhe-web.github.io/", "dblp": ";;69/1921-1;97/2274-12;61/6198-1", "google_scholar": "czAXv48AAAAJ;;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.be/citations?user=gwdPLZUAAAAJ;ayrg9AUAAAAJ", "orcid": ";;;0000-0002-2067-9175;0000-0002-3807-991X", "linkedin": ";;;;", "or_profile": "~Yanyi_Zhang2;~Binglin_Qiu1;~Qi_Jia1;~Yu_Liu12;~Ran_He1", "aff": "Dalian University of Technology;Dalian University of Technology;Dalian University of Technology;Dalian University of Technology;Institute of Automation, Chinese Academy of Sciences", "aff_domain": "dlut.edu.cn;dlut.edu.cn;dlut.edu.cn;dlut.edu.cn;ia.ac.cn", "position": "MS student;MS student;Associate Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nzhang2024not,\ntitle={Not Just Object, But State: Compositional Incremental Learning without Forgetting},\nauthor={Yanyi Zhang and Binglin Qiu and Qi Jia and Yu Liu and Ran He},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=2LRZhbTDtA}\n}", "github": "", "reviewers": "EVGX;KbBF;B271;x9g2", "pdf_size": 2872023, "rating": "6;6;6;6", "confidence": "5;5;4;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "141;118;68;60", "wc_strengths": "70;62;51;71", "wc_weaknesses": "690;151;81;358", "wc_questions": "4;42;86;63", "wc_limitations": "9;15;29;10", "wc_review": "914;388;315;562", "wc_reply_reviewers": "216;21;30;523", "wc_reply_authors": "650;0;0;589", "reply_reviewers": "2;1;1;2", "reply_authors": "2;1;1;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 96.75, 33.862774546690645 ], "wc_strengths_avg": [ 63.5, 8.0156097709407 ], "wc_weaknesses_avg": [ 320.0, 236.6569246821229 ], "wc_questions_avg": [ 48.75, 30.161026176176435 ], "wc_limitations_avg": [ 15.75, 7.980444849756184 ], "wc_review_avg": [ 544.75, 231.29999459576302 ], "wc_reply_reviewers_avg": [ 197.5, 203.40907059420925 ], "wc_reply_authors_avg": [ 309.75, 310.4998993558613 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:5Mw4UGVo-JAJ:scholar.google.com/&scioq=Not+Just+Object,+But+State:+Compositional+Incremental+Learning+without+Forgetting&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "dlut.edu.cn;dlut.edu.cn;dlut.edu.cn;dlut.edu.cn;ia.ac.cn", "author_num": 5, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Dalian University of Technology;Chinese Academy of Sciences", "aff_unique_dep": ";Institute of Automation", "aff_unique_url": "http://www.dlut.edu.cn/;http://www.ia.cas.cn", "aff_unique_abbr": "DUT;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Distributional Preference Alignment of LLMs via Optimal Transport", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96822", "id": "2LctgfN6Ty", "proceeding": "", "pdf": "https://openreview.net/pdf?id=2LctgfN6Ty", "openreview": "https://openreview.net/forum?id=2LctgfN6Ty", "poster": "", "project": "", "author_site": "Igor Melnyk, Youssef Mroueh, Brian Belgodere, Mattia Rigotti, Apoorva Nitsure, Mikhail Yurochkin, Kristjan Greenewald, Jiri Navratil, Jarret Ross", "tldr": "", "abstract": "Current LLM alignment techniques use pairwise human preferences at a sample level, and as such, they do not imply an alignment on the distributional level. We propose in this paper Alignment via Optimal Transport (AOT), a novel method for distributional preference alignment of LLMs. AOT aligns LLMs on unpaired preference data by making the reward distribution of the positive samples stochastically dominant in the first order on the distribution of negative samples. We introduce a convex relaxation of this first-order stochastic dominance and cast it as an optimal transport problem with a smooth and convex cost. Thanks to the one-dimensional nature of the resulting optimal transport problem and the convexity of the cost, it has a closed-form solution via sorting on empirical measures. We fine-tune LLMs with this AOT objective, which enables alignment by penalizing the violation of the stochastic dominance of the reward distribution of the positive samples on the reward distribution of the negative samples. We analyze the sample complexity of AOT by considering the dual of the OT problem and show that it converges at the parametric rate. Empirically, we show on a diverse set of alignment datasets and LLMs that AOT leads to state-of-the-art models in the 7B family of models when evaluated with Open LLM Benchmarks and AlpacaEval. Code for $\\mathsf{AOT}$ is available in the Hugging Face TRL library \\url{https://ibm.biz/AOT_TRL}.", "keywords": "LLM Alignment;Optimal Transport;stochastic dominance", "primary_area": "optimization", "supplementary_material": "/attachment/3cc8463dc4f4ec2e0a5ccaca56de67c66eb563f5.zip", "author": "Igor Melnyk;Youssef Mroueh;Brian Belgodere;Mattia Rigotti;Apoorva Nitsure;Mikhail Yurochkin;Kristjan Greenewald;Jiri Navratil;Jarret Ross", "authorids": "~Igor_Melnyk1;~Youssef_Mroueh1;~Brian_Belgodere1;~Mattia_Rigotti1;~Apoorva_Nitsure1;~Mikhail_Yurochkin1;~Kristjan_Greenewald1;~Jiri_Navratil1;~Jarret_Ross1", "gender": "M;;M;;;M;;;", "homepage": "https://imelnyk.github.io/;;;http://www.matrig.net;;https://moonfolk.github.io/;https://researcher.watson.ibm.com/researcher/view.php?person=ibm-Kristjan.H.Greenewald;https://researcher.watson.ibm.com/researcher/view.php?person=us-jiri;", "dblp": ";http://dblp.uni-trier.de/pers/hd/m/Mroueh:Youssef;https://dblp.uni-trier.de/pid/139/2237;01/9816;;191/6719;146/0563;00/680-1.html;192/1669", "google_scholar": "4vDRTWwAAAAJ;https://scholar.google.com/citations?hl=en;;TmHt7CwAAAAJ;;QjBF9sUAAAAJ;L3zNUG4AAAAJ;H41S5AgAAAAJ;", "orcid": ";;;0000-0001-6466-2810;;;;0009-0007-5230-7679;", "linkedin": ";;brian-belgodere-050a953/;;apoorvanitsure/;mikhail-yurochkin-a45659114/;;jiri-navratil-62641497/;", "or_profile": "~Igor_Melnyk1;~Youssef_Mroueh1;~Brian_Belgodere1;~Mattia_Rigotti1;~Apoorva_Nitsure1;~Mikhail_Yurochkin1;~Kristjan_Greenewald1;~Jiri_Navratil1;~Jarret_Ross1", "aff": "International Business Machines;IBM;IBM Research;International Business Machines;International Business Machines;IBM Research;MIT-IBM Watson AI Lab, IBM Research;International Business Machines;International Business Machines", "aff_domain": "ibm.com;us.ibm.com;ibm.com;ibm.com;ibm.com;ibm.com;ibm.com;ibm.com;ibm.com", "position": "Researcher;Research Staff member;Researcher;Researcher;Researcher;Researcher;Research Scientist;Principal Research Staff Member;Researcher", "bibtex": "@inproceedings{\nmelnyk2024distributional,\ntitle={Distributional Preference Alignment of {LLM}s via Optimal Transport},\nauthor={Igor Melnyk and Youssef Mroueh and Brian Belgodere and Mattia Rigotti and Apoorva Nitsure and Mikhail Yurochkin and Kristjan Greenewald and Jiri Navratil and Jarret Ross},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=2LctgfN6Ty}\n}", "github": "", "reviewers": "rub8;9Mcg;BcRY", "pdf_size": 996744, "rating": "4;6;8", "confidence": "2;3;3", "soundness": "3;3;4", "novelty": "2;2;4", "presentation": "3;3;4", "wc_summary": "59;140;155", "wc_strengths": "28;40;115", "wc_weaknesses": "87;158;29", "wc_questions": "43;142;82", "wc_limitations": "3;1;6", "wc_review": "220;481;387", "wc_reply_reviewers": "40;15;29", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.0, 1.632993161855452 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.9428090415820634 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 118.0, 42.16633728461603 ], "wc_strengths_avg": [ 61.0, 38.49675310984031 ], "wc_weaknesses_avg": [ 91.33333333333333, 52.75309364282713 ], "wc_questions_avg": [ 89.0, 40.718546143004666 ], "wc_limitations_avg": [ 3.3333333333333335, 2.0548046676563256 ], "wc_review_avg": [ 362.6666666666667, 107.9331068558464 ], "wc_reply_reviewers_avg": [ 28.0, 10.23067283548187 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3211924445725627746&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "ibm.com;us.ibm.com;ibm.com;ibm.com;ibm.com;ibm.com;ibm.com;ibm.com;ibm.com", "author_num": 9, "aff_unique_index": "0;0;1;0;0;1;1;0;0", "aff_unique_norm": "International Business Machines Corporation;IBM", "aff_unique_dep": ";IBM Research", "aff_unique_url": "https://www.ibm.com;https://www.ibm.com/research", "aff_unique_abbr": "IBM;IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "On the cohesion and separability of average-link for hierarchical agglomerative clustering", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96821", "id": "2LuSHTFWzK", "proceeding": "", "pdf": "https://openreview.net/pdf?id=2LuSHTFWzK", "openreview": "https://openreview.net/forum?id=2LuSHTFWzK", "poster": "", "project": "", "author_site": "Eduardo Laber, Miguel Batista", "tldr": "", "abstract": "Average-link is widely recognized as one of the most popular and effective methods for building hierarchical agglomerative clustering. The available theoretical analyses show that this method has a much better approximation than other popular heuristics, as single-linkage and complete-linkage, regarding variants of Dasgupta's cost function [STOC 2016]. However, these analyses do not separate average-link from a random hierarchy and they are not appealing for metric spaces since every hierarchical clustering has a $1/2$ approximation with regard to the variant of Dasgupta's function\nthat is employed for dissimilarity measures [Moseley and Yang 2020]. In this paper, we present a comprehensive study of the performance of \\avglink \\, in metric spaces, regarding several natural criteria that capture separability and cohesion, and are more interpretable than Dasgupta's cost function and its variants. We also present experimental results with real datasets that, together with our theoretical analyses, suggest that average-link is a better choice than other related methods when both cohesion and separability are important goals.", "keywords": "hierarchical clustering;approximation algorithms;average-linkage;complete-link;single-link;linkage-methods;agglomerative mthods", "primary_area": "optimization", "supplementary_material": "/attachment/04b4bf0bae5aaf6fa142e0502e2794d77d1d08cb.zip", "author": "Eduardo Sany Laber;Miguel A. Batista", "authorids": "~Eduardo_Sany_Laber1;~Miguel_A._Batista1", "gender": "M;M", "homepage": "http://www-di.inf.puc-rio.br/~laber/;", "dblp": "49/5557;", "google_scholar": "https://scholar.google.com.br/citations?hl=pt-BR;", "orcid": "0000-0002-9025-8333;", "linkedin": ";miguel-batista-b403a428b/", "or_profile": "~Eduardo_Sany_Laber1;~Miguel_A._Batista1", "aff": "Pontificia Universidade Catolica, Rio de Janeiro, Brazil;Pontif\u00edcia Universidade Cat\u00f3lica do Rio de Janeiro", "aff_domain": "puc-rio.br;puc-rio.br", "position": "Associate Professor;Undergrad student", "bibtex": "@inproceedings{\nlaber2024on,\ntitle={On the cohesion and separability of average-link for hierarchical agglomerative clustering},\nauthor={Eduardo Sany Laber and Miguel A. Batista},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=2LuSHTFWzK}\n}", "github": "", "reviewers": "QwrC;k54Y;f81R;Sa8n", "pdf_size": 510665, "rating": "6;6;6;7", "confidence": "3;3;4;2", "soundness": "4;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "58;60;77;131", "wc_strengths": "37;53;96;45", "wc_weaknesses": "137;74;44;16", "wc_questions": "193;15;35;29", "wc_limitations": "11;4;2;19", "wc_review": "436;206;254;240", "wc_reply_reviewers": "27;0;33;9", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 81.5, 29.516944286290883 ], "wc_strengths_avg": [ 57.75, 22.796655456447994 ], "wc_weaknesses_avg": [ 67.75, 44.93537025551253 ], "wc_questions_avg": [ 68.0, 72.53275122315436 ], "wc_limitations_avg": [ 9.0, 6.670832032063167 ], "wc_review_avg": [ 284.0, 89.47625383307015 ], "wc_reply_reviewers_avg": [ 17.25, 13.311179511974137 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:UND3X-U0NXUJ:scholar.google.com/&scioq=On+the+cohesion+and+separability+of+average-link+for+hierarchical+agglomerative+clustering&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "puc-rio.br;puc-rio.br", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Pontifical Catholic University of Rio de Janeiro;Pontif\u00edcia Universidade Cat\u00f3lica do Rio de Janeiro", "aff_unique_dep": ";", "aff_unique_url": "http://www.puc-rio.br/;https://www.puc-rio.br", "aff_unique_abbr": "PUC-Rio;PUC-Rio", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Rio de Janeiro", "aff_country_unique_index": "0;0", "aff_country_unique": "Brazil" }, { "title": "Learning from Noisy Labels via Conditional Distributionally Robust Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96820", "id": "2NKumsITFw", "proceeding": "", "pdf": "https://openreview.net/pdf?id=2NKumsITFw", "openreview": "https://openreview.net/forum?id=2NKumsITFw", "poster": "/media/PosterPDFs/NeurIPS%202024/96820.png?t=1733092581.905276", "project": "", "author_site": "Hui GUO, Grace Yi, Boyu Wang", "tldr": "", "abstract": "While crowdsourcing has emerged as a practical solution for labeling large datasets, it presents a significant challenge in learning accurate models due to noisy labels from annotators with varying levels of expertise. Existing methods typically estimate the true label posterior, conditioned on the instance and noisy annotations, to infer true labels or adjust loss functions. These estimates, however, often overlook potential misspecification in the true label posterior, which can degrade model performances, especially in high-noise scenarios. To address this issue, we investigate learning from noisy annotations with an estimated true label posterior through the framework of conditional distributionally robust optimization (CDRO). We propose formulating the problem as minimizing the worst-case risk within a distance-based ambiguity set centered around a reference distribution. By examining the strong duality of the formulation, we derive upper bounds for the worst-case risk and develop an analytical solution for the dual robust risk for each data point. This leads to a novel robust pseudo-labeling algorithm that leverages the likelihood ratio test to construct a pseudo-empirical distribution, providing a robust reference probability distribution in CDRO. Moreover, to devise an efficient algorithm for CDRO, we derive a closed-form expression for the empirical robust risk and the optimal Lagrange multiplier of the dual problem, facilitating a principled balance between robustness and model fitting. Our experimental results on both synthetic and real-world datasets demonstrate the superiority of our method.", "keywords": "noisy label;conditional distributionally robust optimization (CDRO);crowdsourcing", "primary_area": "learning_theory", "supplementary_material": "", "author": "Hui Guo;Grace Yi;Boyu Wang", "authorids": "~Hui_Guo5;~Grace_Yi1;~Boyu_Wang3", "gender": "F;F;M", "homepage": "https://github.com/hguo1728;http://fisher.stats.uwo.ca/faculty/yyi/;https://sites.google.com/site/borriewang/", "dblp": ";;41/6565-4.html", "google_scholar": ";;qAZM5KcAAAAJ", "orcid": ";;0000-0002-7413-4162", "linkedin": ";;", "or_profile": "~Hui_Guo5;~Grace_Yi1;~Boyu_Wang3", "aff": "University of Western Ontario;University of Western Ontario;University of Western Ontario", "aff_domain": "uwo.ca;uwo.ca;uwo.ca", "position": "PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nguo2024learning,\ntitle={Learning from Noisy Labels via Conditional Distributionally Robust Optimization},\nauthor={Hui Guo and Grace Yi and Boyu Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=2NKumsITFw}\n}", "github": "", "reviewers": "62sF;7dsE;xLiU", "pdf_size": 1888393, "rating": "6;6;7", "confidence": "3;4;4", "soundness": "3;3;2", "novelty": "2;3;3", "presentation": "3;3;3", "wc_summary": "50;146;61", "wc_strengths": "47;44;32", "wc_weaknesses": "69;458;266", "wc_questions": "69;57;45", "wc_limitations": "11;107;2", "wc_review": "246;812;406", "wc_reply_reviewers": "97;190;299", "wc_reply_authors": "1312;548;1844", "reply_reviewers": "2;2;2", "reply_authors": "4;3;6", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 85.66666666666667, 42.89781139198388 ], "wc_strengths_avg": [ 41.0, 6.48074069840786 ], "wc_weaknesses_avg": [ 264.3333333333333, 158.8129577696844 ], "wc_questions_avg": [ 57.0, 9.797958971132712 ], "wc_limitations_avg": [ 40.0, 47.51841748206689 ], "wc_review_avg": [ 488.0, 238.2323795512832 ], "wc_reply_reviewers_avg": [ 195.33333333333334, 82.55234029928461 ], "wc_reply_authors_avg": [ 1234.6666666666667, 531.90809565396 ], "reply_reviewers_avg": [ 2.0, 0.0 ], "reply_authors_avg": [ 4.333333333333333, 1.247219128924647 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:8feY8Tq-tGQJ:scholar.google.com/&scioq=Learning+from+Noisy+Labels+via+Conditional+Distributionally+Robust+Optimization&hl=en&as_sdt=0,14", "gs_version_total": 0, "email": "uwo.ca;uwo.ca;uwo.ca", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Western Ontario", "aff_unique_dep": "", "aff_unique_url": "https://www.uwo.ca", "aff_unique_abbr": "UWO", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Canada" }, { "id": "2Net0eEj9d", "title": "NeurIPS 2023 Competition: Privacy Preserving Federated Learning Document VQA", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "The Privacy Preserving Federated Learning Document VQA (PFL-DocVQA) competition challenged the community to develop provably private and communication-efficient solutions in a federated setting for a real-life use case: invoice processing. The competition introduced a dataset of real invoice documents, along with associated questions and answers requiring information extraction and reasoning over the document images. Thereby, it brings together researchers and expertise from the document analysis, privacy, and federated learning communities. Participants fine-tuned a pre-trained, state-of-the-art Document Visual Question Answering model provided by the organizers for this new domain, mimicking a typical federated invoice processing setup. The base model is a multi-modal generative language model, and sensitive information could be exposed through either the visual or textual input modality. Participants proposed elegant solutions to reduce communication costs while maintaining a minimum utility threshold in track 1 and to protect all information from each document provider using differential privacy in track 2. The competition served as a new testbed for developing and testing private federated learning methods, simultaneously raising awareness about privacy within the document image analysis and recognition community. Ultimately, the competition analysis provides best practices and recommendations for successfully running privacy-focused federated learning challenges in the future.", "keywords": "Differential privacy;Federated Learning;Document Understanding;Document Visual Question Answering", "primary_area": "", "supplementary_material": "", "author": "Marlon Tobaben;Mohamed Ali Souibgui;Rub\u00e8n Tito;Khanh Nguyen;Raouf Kerkouche;Kangsoo Jung;Joonas J\u00e4lk\u00f6;Lei Kang;Andrey Barsky;Vincent Poulain d'Andecy;Aur\u00e9lie JOSEPH;Aashiq Muhamed;Kevin Kuo;Virginia Smith;Yusuke Yamasaki;Takumi Fukami;Kenta Niwa;Iifan Tyou;Hiro Ishii;Rio Yokota;Ragul N;Rintu Kutum;Josep Llados;Ernest Valveny;Antti Honkela;Mario Fritz;Dimosthenis Karatzas", "authorids": "~Marlon_Tobaben1;~Mohamed_Ali_Souibgui1;~Rub\u00e8n_Tito1;~Khanh_Nguyen6;~Raouf_Kerkouche1;~Kangsoo_Jung1;~Joonas_J\u00e4lk\u00f61;~Lei_Kang1;~Andrey_Barsky1;~Vincent_Poulain_d'Andecy1;~Aur\u00e9lie_JOSEPH1;~Aashiq_Muhamed1;~Kevin_Kuo1;~Virginia_Smith1;~Yusuke_Yamasaki1;~Takumi_Fukami1;~Kenta_Niwa1;~Iifan_Tyou1;~Hiro_Ishii1;~Rio_Yokota1;~Ragul_N1;~Rintu_Kutum1;~Josep_Llados1;~Ernest_Valveny2;~Antti_Honkela1;~Mario_Fritz1;~Dimosthenis_Karatzas1", "gender": ";M;M;M;M;M;M;;M;M;F;M;M;F;M;;M;M;M;M;M;M;M;M;M;M;M", "homepage": "https://www.helsinki.fi/en/about-us/people/people-finder/marlon-tobaben-9428638;https://dali92002.github.io/;https://rubenpt91.github.io/;https://github.com/khanhnguyen21006;;https://scholar.google.com/citations?user=v6NdDt8AAAAJ&hl=en;;;;;;https://github.com/aashiqmuhamed;https://imkevinkuo.github.io;;;;http://www.kecl.ntt.co.jp/icl/ls/members/niwa/index.html;;https://www.linkedin.com/in/hiro-ishii-1970ab2aa/;https://www.rio.scrc.iir.isct.ac.jp/en/index.html;https://github.com/ragul-n;https://www.ashoka.edu.in/profile/rintu-kutum/;https://www.cvc.uab.es/team/?action=profile&id=379;;https://www.cs.helsinki.fi/u/ahonkela/;https://cispa.saarland/group/fritz/;https://www.vlr.ai/", "dblp": "330/3364;275/3241;;;226/7191.html;71/3271;188/5963;;;;;294/0107;132/0824;120/0921;;;64/1008.html;234/8871.html;374/4086;61/7413;;;54/769;78/6032;h/AnttiHonkela;;03/6509", "google_scholar": "pgyBA6YAAAAJ;LXq3YYMAAAAJ;https://scholar.google.es/citations?user=pSF5XHMAAAAJ;;cH5q5P4AAAAJ;v6NdDt8AAAAJ;;;https://scholar.google.com/citations?hl=en;;;GbVC5NYAAAAJ;J6tz-0oAAAAJ;;;;Btla06EAAAAJ;;;klw9KE0AAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.co.in/citations?user=obeyQMUAAAAJ;92pWl-AAAAAJ;https://scholar.google.es/citations?user=TrYNrJgAAAAJ;XsyLs6AAAAAJ;https://scholar.google.de/citations?user=4V1nNm4AAAAJ;xASEtrUAAAAJ", "orcid": "0000-0002-9778-0853;0000-0003-0100-9392;0000-0002-5657-9790;;0000-0002-1458-7805;;;0000-0002-1962-3916;0000-0002-6993-5969;0009-0008-5515-3561;0000-0002-5499-6355;;;;0000-0002-3326-9300;;0000-0002-6911-0238;;0009-0008-3505-9332;0000-0001-7573-7873;;0000-0001-8667-6199;0000-0002-4533-4739;;0000-0001-9193-8093;;0000-0001-8762-4454", "linkedin": "marlon-tobaben/;mohamed-ali-souibgui/;rub\u00e8n-tito;;;;;;;;;aashiq-muhamed-52169421/;;;;http://www.linkedin.com/in/takumi-fukami-8a4a40238;;tyou-iifan-354b132b/;hiro-ishii-1970ab2aa/;rio-yokota-62857235/?originalSubdomain=jp;;;josep-llad%C3%B3s-077762/;;;;dimostheniskaratzas/", "or_profile": "~Marlon_Tobaben1;~Mohamed_Ali_Souibgui1;~Rub\u00e8n_Tito1;~Khanh_Nguyen6;~Raouf_Kerkouche1;~Kangsoo_Jung1;~Joonas_J\u00e4lk\u00f61;~Lei_Kang1;~Andrey_Barsky1;~Vincent_Poulain_d'Andecy1;~Aur\u00e9lie_JOSEPH1;~Aashiq_Muhamed1;~Kevin_Kuo1;~Virginia_Smith1;~Yusuke_Yamasaki1;~Takumi_Fukami1;~Kenta_Niwa1;~Iifan_Tyou1;~Hiro_Ishii1;~Rio_Yokota1;~Ragul_N1;~Rintu_Kutum1;~Josep_Llados1;~Ernest_Valveny2;~Antti_Honkela1;~Mario_Fritz1;~Dimosthenis_Karatzas1", "aff": "University of Helsinki;Chordata Motion;;Computer Vision Center, Universitat Aut\u00f3noma de Barcelona;CISPA, saarland university, saarland informatics campus;INRIA;University of Helsinki;Computer Vision Center, Universitat Aut\u00f3noma de Barcelona;Computer Vision Center, Universitat Aut\u00f3noma de Barcelona;Yooz;Yooz;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;NTT Social Informatics Laboratory;NTT Corporation;NTT Corporation;NTT;Tokyo Institute of Technology, Tokyo Institute of Technology;Institute of Science Tokyo;Ashoka University;Ashoka University;Universitat Aut\u00f3noma de Barcelona;Universitat Aut\u00f2noma de Barcelona;University of Helsinki;Saarland University;Computer Vision Center, Universitat Aut\u00f2noma de Barcelona", "aff_domain": "helsinki.fi;chordata.cc;;cvc.uab.es;cispa.saarland;inria.fr;helsinki.fi;cvc.uab.es;cvc.uab.es;getyooz.com;getyooz.com;andrew.cmu.edu;cmu.edu;cmu.edu;ntt.com;ntt.co.jp;ntt.co.jp;ntt.co.jp;titech.ac.jp;isct.ac.jp;ashoka.edu.in;ashoka.edu.in;uab.es;uab.cat;helsinki.fi;uni-saarland.de;cvc.uab.es", "position": "PhD student;Researcher;;PhD student;Postdoc;Postdoc;Postdoc;Postdoc;Postdoc;Principal Researcher;Researcher;PhD student;PhD student;Associate Professor;Researcher;Researcher;Researcher;Researcher;MS student;Full Professor;Undergrad student;Lecturer;Associate Professor;Associate Professor;Full Professor;Full Professor;Associate Director", "bibtex": "@misc{\nanonymous2024neurips,\ntitle={Neur{IPS} 2023 Competition: Privacy Preserving Federated Learning Document {VQA}},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=2Net0eEj9d}\n}", "github": "", "project": "", "reviewers": "8VKs;HdXU;3pdw;EfAD", "site": "https://openreview.net/forum?id=2Net0eEj9d", "pdf_size": 1087201, "rating": "4;5;5;7", "confidence": "3;3;3;3", "wc_summary_and_contributions": "86;29;21;113", "wc_strengths": "2;5;6;6", "wc_improvement": "53;5;6;6", "wc_limitations": "1;11;6;1", "wc_correctness": "14;7;1;1", "wc_clarity": "1;5;1;9", "wc_relation_to_prior_work": "8;1;6;2", "wc_documentation": "1;7;1;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "167;71;49;140", "wc_reply_reviewers": "17;0;0;16", "wc_reply_authors": "8;0;0;8", "reply_reviewers": "1;0;0;1", "reply_authors": "2;1;2;2", "rating_avg": [ 5.25, 1.0897247358851685 ], "confidence_avg": [ 3.0, 0.0 ], "wc_summary_and_contributions_avg": [ 62.25, 38.557586802080856 ], "wc_strengths_avg": [ 4.75, 1.6393596310755 ], "wc_improvement_avg": [ 17.5, 20.5 ], "wc_limitations_avg": [ 4.75, 4.14578098794425 ], "wc_correctness_avg": [ 5.75, 5.356071321407137 ], "wc_clarity_avg": [ 4.0, 3.3166247903554 ], "wc_relation_to_prior_work_avg": [ 4.25, 2.8613807855648994 ], "wc_documentation_avg": [ 2.5, 2.598076211353316 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 106.75, 48.34446710844996 ], "wc_reply_reviewers_avg": [ 8.25, 8.257572282456872 ], "wc_reply_authors_avg": [ 4.0, 4.0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 27, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:tVKYd-o6odcJ:scholar.google.com/&scioq=NeurIPS+2023+Competition:+Privacy+Preserving+Federated+Learning+Document+VQA&hl=en&as_sdt=0,44", "gs_version_total": 4, "aff_unique_index": "0;1;2;3;4;0;2;2;5;5;6;6;6;7;8;8;8;9;10;11;11;2;12;0;3;12", "aff_unique_norm": "University of Helsinki;Chordata Motion;Universitat Aut\u00f3noma de Barcelona;Saarland University;INRIA;Yooz;Carnegie Mellon University;NTT Data;NTT Corporation;Tokyo Institute of Technology;Institute of Science, Tokyo;Ashoka University;Universitat Aut\u00f2noma de Barcelona", "aff_unique_dep": ";;Computer Vision Center;CISPA;;;;Social Informatics Laboratory;;;;;", "aff_unique_url": "https://www.helsinki.fi;;https://www.uab.cat;https://www.uni-saarland.de;https://www.inria.fr;;https://www.cmu.edu;https://www.nttdata.com;https://www.ntt.co.jp;https://www.titech.ac.jp;https://www.iost.jp;https://www.ashoka.edu.in;https://www.uab.cat", "aff_unique_abbr": "UH;;UAB;Saarland U;INRIA;;CMU;NTT;NTT;Titech;IoST;Ashoka;UAB", "aff_campus_unique_index": "1;2;2", "aff_campus_unique": ";Saarland Informatics Campus;Tokyo", "aff_country_unique_index": "0;2;3;4;0;2;2;5;5;5;6;6;6;6;6;6;7;7;2;2;0;3;2", "aff_country_unique": "Finland;;Spain;Germany;France;United States;Japan;India" }, { "title": "Utilizing Image Transforms and Diffusion Models for Generative Modeling of Short and Long Time Series", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96819", "id": "2NfBBpbN9x", "proceeding": "", "pdf": "https://openreview.net/pdf?id=2NfBBpbN9x", "openreview": "https://openreview.net/forum?id=2NfBBpbN9x", "poster": "/media/PosterPDFs/NeurIPS%202024/96819.png?t=1733850552.628646", "project": "", "author_site": "Ilan Naiman, Nimrod Berman, Itai Pemper, Idan Arbiv, Gal Fadlon, Omri Azencot", "tldr": "", "abstract": "Lately, there has been a surge in interest surrounding generative modeling of time series data. Most existing approaches are designed either to process short sequences or to handle long-range sequences. This dichotomy can be attributed to gradient issues with recurrent networks, computational costs associated with transformers, and limited expressiveness of state space models. Towards a unified generative model for varying-length time series, we propose in this work to transform sequences into images. By employing invertible transforms such as the delay embedding and the short-time Fourier transform, we unlock three main advantages: i) We can exploit advanced diffusion vision models; ii) We can remarkably process short- and long-range inputs within the same framework; and iii) We can harness recent and established tools proposed in the time series to image literature. We validate the effectiveness of our method through a comprehensive evaluation across multiple tasks, including unconditional generation, interpolation, and extrapolation. We show that our approach achieves consistently state-of-the-art results against strong baselines. In the unconditional generation tasks, we show remarkable mean improvements of $58.17$% over previous diffusion models in the short discriminative score and $132.61$% in the (ultra-)long classification scores. Code is at https://github.com/azencot-group/ImagenTime.", "keywords": "Time Series;Generative Models;Long Sequences", "primary_area": "generative_models", "supplementary_material": "", "author": "Ilan Naiman;Nimrod Berman;Itai Pemper;Idan Arbiv;Gal Fadlon;Omri Azencot", "authorids": "~Ilan_Naiman1;~Nimrod_Berman1;~Itai_Pemper1;~Idan_Arbiv1;~Gal_Fadlon1;~Omri_Azencot1", "gender": "M;M;M;M;M;Unspecified", "homepage": "https://www.linkedin.com/in/ilan-naiman-80071a190;;;;;http://omriazencot.com", "dblp": "285/4824;;;;;132/3985.html", "google_scholar": "Fglytk8AAAAJ;;;;;https://scholar.google.co.il/citations?user=MEGuRmAAAAAJ", "orcid": ";;;;;", "linkedin": "ilan-naiman-80071a190;nimrod-berman-a26250143/;itai-pemper-06049b259/;idan-arbiv/;gal-fadlon-89478a214/;omri-azencot-a8812417/", "or_profile": "~Ilan_Naiman1;~Nimrod_Berman1;~Itai_Pemper1;~Idan_Arbiv1;~Gal_Fadlon1;~Omri_Azencot1", "aff": "Ben Gurion University of the Negev, Technion;;Ben Gurion University of the Negev;Ben-Gurion University of the Negev;Ben-Gurion University of the Negev;Ben-Gurion University of the Negev", "aff_domain": "bgu.ac.il;;bgu.ac.il;bgu.ac.il;bgu.ac.il;bgu.ac.il", "position": "PhD student;;MS student;MS student;MS student;Assistant Professor", "bibtex": "@inproceedings{\nnaiman2024utilizing,\ntitle={Utilizing Image Transforms and Diffusion Models for Generative Modeling of Short and Long Time Series},\nauthor={Ilan Naiman and Nimrod Berman and Itai Pemper and Idan Arbiv and Gal Fadlon and Omri Azencot},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=2NfBBpbN9x}\n}", "github": "", "reviewers": "DtfF;SryL;iRua", "pdf_size": 841685, "rating": "5;6;6", "confidence": "4;3;3", "soundness": "2;3;3", "novelty": "2;3;3", "presentation": "3;3;3", "wc_summary": "48;36;65", "wc_strengths": "56;44;43", "wc_weaknesses": "103;192;97", "wc_questions": "6;2;102", "wc_limitations": "1;2;9", "wc_review": "214;276;316", "wc_reply_reviewers": "84;51;26", "wc_reply_authors": "114;0;37", "reply_reviewers": "1;1;1", "reply_authors": "2;1;2", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 49.666666666666664, 11.897712198383164 ], "wc_strengths_avg": [ 47.666666666666664, 5.90668171555645 ], "wc_weaknesses_avg": [ 130.66666666666666, 43.438334324521335 ], "wc_questions_avg": [ 36.666666666666664, 46.22649552895925 ], "wc_limitations_avg": [ 4.0, 3.559026084010437 ], "wc_review_avg": [ 268.6666666666667, 41.96294661828324 ], "wc_reply_reviewers_avg": [ 53.666666666666664, 23.753362335093154 ], "wc_reply_authors_avg": [ 50.333333333333336, 47.48567035315905 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.9999999999999997, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13237226877473655941&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "bgu.ac.il;;bgu.ac.il;bgu.ac.il;bgu.ac.il;bgu.ac.il", "author_num": 6, "aff_unique_index": "0;0;1;1;1", "aff_unique_norm": "Ben Gurion University of the Negev;Ben-Gurion University of the Negev", "aff_unique_dep": ";", "aff_unique_url": "https://www.bgu.ac.il;https://www.bgu.ac.il", "aff_unique_abbr": "BGU;BGU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Israel" }, { "title": "Globally Q-linear Gauss-Newton Method for Overparameterized Non-convex Matrix Sensing", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96818", "id": "2QvCOFw058", "proceeding": "", "pdf": "https://openreview.net/pdf?id=2QvCOFw058", "openreview": "https://openreview.net/forum?id=2QvCOFw058", "poster": "/media/PosterPDFs/NeurIPS%202024/96818.png?t=1731140783.5424623", "project": "", "author_site": "Xixi Jia, Fangchen FENG, Deyu Meng, Defeng Sun", "tldr": "", "abstract": "This paper focuses on the optimization of overparameterized, non-convex low-rank matrix sensing (LRMS)\u2014an essential component in contemporary statistics and machine learning. Recent years have witnessed significant breakthroughs in first-order methods, such as gradient descent, for tackling this non-convex optimization problem. However, the presence of numerous saddle points often prolongs the time required for gradient descent to overcome these obstacles. Moreover, overparameterization can markedly decelerate gradient descent methods, transitioning its convergence rate from linear to sub-linear. In this paper, we introduce an approximated Gauss-Newton (AGN) method for tackling the non-convex LRMS problem. Notably, AGN incurs a computational cost comparable to gradient descent per iteration but converges much faster without being slowed down by saddle points. We prove that, despite the non-convexity of the objective function, AGN achieves Q-linear convergence from random initialization to the global optimal solution. The global Q-linear convergence of AGN represents a substantial enhancement over the convergence of the existing methods for the overparameterized non-convex LRMS. The code for this paper is available at \\url{https://github.com/hsijiaxidian/AGN}.", "keywords": "low-rank matrix sensing;non-convex optimization;Gaussian-Newton method", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Xixi Jia;Fangchen FENG;Deyu Meng;Defeng Sun", "authorids": "~Xixi_Jia2;~Fangchen_FENG1;~Deyu_Meng1;~Defeng_Sun2", "gender": "M;M;M;", "homepage": ";;http://dymeng.gr.xjtu.edu.cn;https://www.polyu.edu.hk/ama/profile/dfsun", "dblp": "216/9686;;22/5614;", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;xufza0oAAAAJ;an6w-64AAAAJ;QdIzNxgAAAAJ", "orcid": ";;0000-0002-1294-8283;", "linkedin": ";fangchen-feng-bb0ab659/;;", "or_profile": "~Xixi_Jia2;~Fangchen_FENG1;~Deyu_Meng1;~Defeng_Sun2", "aff": "Xidian University;Universit\u00e9 Paris Nord (Paris XIII);Xi'an Jiaotong University;Hong Kong Polytechnic University", "aff_domain": "xidian.edu.cn;univ-paris13.fr;xjtu.edu.cn;polyu.edu.hk", "position": "Associate Professor;Associate Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\njia2024globally,\ntitle={Globally Q-linear Gauss-Newton Method for Overparameterized Non-convex Matrix Sensing},\nauthor={Xixi Jia and Fangchen FENG and Deyu Meng and Defeng Sun},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=2QvCOFw058}\n}", "github": "", "reviewers": "L5qb;KSmV;aVzA", "pdf_size": 643076, "rating": "4;5;7", "confidence": "3;4;4", "soundness": "3;3;4", "novelty": "3;2;3", "presentation": "2;3;3", "wc_summary": "196;38;40", "wc_strengths": "117;52;98", "wc_weaknesses": "1;69;197", "wc_questions": "1;20;3", "wc_limitations": "1;10;1", "wc_review": "316;189;339", "wc_reply_reviewers": "0;22;27", "wc_reply_authors": "97;140;36", "reply_reviewers": "0;1;1", "reply_authors": "3;4;2", "rating_avg": [ 5.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 91.33333333333333, 74.01501349200865 ], "wc_strengths_avg": [ 89.0, 27.28858125052797 ], "wc_weaknesses_avg": [ 89.0, 81.25679458769382 ], "wc_questions_avg": [ 8.0, 8.524474568362947 ], "wc_limitations_avg": [ 4.0, 4.242640687119285 ], "wc_review_avg": [ 281.3333333333333, 65.9612680964283 ], "wc_reply_reviewers_avg": [ 16.333333333333332, 11.728408057172787 ], "wc_reply_authors_avg": [ 91.0, 42.66927075386533 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 3.0, 0.816496580927726 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.7559289460184545, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13545818999671662973&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 0, "email": "xidian.edu.cn;univ-paris13.fr;xjtu.edu.cn;polyu.edu.hk", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Xidian University;Universit\u00e9 Paris Nord;Xi'an Jiao Tong University;Hong Kong Polytechnic University", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.xidian.edu.cn/;https://www.univ-paris13.fr;https://www.xjtu.edu.cn;https://www.polyu.edu.hk", "aff_unique_abbr": "Xidian;UP13;XJTU;PolyU", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Paris XIII;Hong Kong SAR", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "China;France" }, { "title": "Stochastic Optimization Algorithms for Instrumental Variable Regression with Streaming Data", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96817", "id": "2RS0fL7Eet", "proceeding": "", "pdf": "https://openreview.net/pdf?id=2RS0fL7Eet", "openreview": "https://openreview.net/forum?id=2RS0fL7Eet", "poster": "", "project": "", "author_site": "Xuxing Chen, Abhishek Roy, Yifan Hu, Krishnakumar Balasubramanian", "tldr": "", "abstract": "We develop and analyze algorithms for instrumental variable regression by viewing the problem as a conditional stochastic optimization problem. In the context of least-squares instrumental variable regression, our algorithms neither require matrix inversions nor mini-batches thereby providing a fully online approach for performing instrumental variable regression with streaming data. When the true model is linear, we derive rates of convergence in expectation, that are of order $\\mathcal{O}(\\log T/T)$ and $\\mathcal{O}(1/T^{1-\\epsilon})$ for any $\\epsilon>0$, respectively under the availability of two-sample and one-sample oracles respectively. Importantly, under the availability of the two-sample oracle, the aforementioned rate is actually agnostic to the relationship between confounder and the instrumental variable demonstrating the flexibility of the proposed approach in alleviating the need for explicit model assumptions required in recent works based on reformulating the problem as min-max optimization problems. Experimental validation is provided to demonstrate the advantages of the proposed algorithms over classical approaches like the 2SLS method.", "keywords": "instrumental variable regression;stochastic gradient descent;stochastic approximation;2SLS", "primary_area": "causal_inference", "supplementary_material": "/attachment/7331e286bcdec5cc5ad67c59372230497c055d8c.zip", "author": "Xuxing Chen;Abhishek Roy;Yifan Hu;Krishna Balasubramanian", "authorids": "~Xuxing_Chen1;~Abhishek_Roy1;~Yifan_Hu2;~Krishna_Balasubramanian1", "gender": "M;M;M;M", "homepage": "https://xuxingc.github.io/;;https://sites.google.com/view/yifan-hu;https://sites.google.com/view/kriznakumar/", "dblp": "221/0393;;;22/6780-2.html", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;kvPkZd0AAAAJ;rO2s0EEAAAAJ;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Xuxing_Chen1;~Abhishek_Roy1;~Yifan_Hu2;~Krishnakumar_Balasubramanian1", "aff": "University of California, Davis;University of California, San Diego;ETHZ - ETH Zurich;University of California, Davis", "aff_domain": "ucdavis.edu;ucsd.edu;inf.ethz.ch;ucdavis.edu", "position": "PhD student;Postdoc;Postdoc;Associate Professor", "bibtex": "@inproceedings{\nchen2024stochastic,\ntitle={Stochastic Optimization Algorithms for Instrumental Variable Regression with Streaming Data},\nauthor={Xuxing Chen and Abhishek Roy and Yifan Hu and Krishna Balasubramanian},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=2RS0fL7Eet}\n}", "github": "", "reviewers": "hwGZ;AcA7;WDCF;E97U", "pdf_size": 13880771, "rating": "5;6;6;6", "confidence": "1;2;3;3", "soundness": "3;3;2;3", "novelty": "3;2;3;3", "presentation": "3;4;3;3", "wc_summary": "26;87;169;244", "wc_strengths": "14;126;64;125", "wc_weaknesses": "1;73;127;97", "wc_questions": "15;30;2;282", "wc_limitations": "1;20;2;16", "wc_review": "57;336;364;764", "wc_reply_reviewers": "0;14;25;197", "wc_reply_authors": "0;0;0;491", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;3", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 2.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 131.5, 82.42117446384758 ], "wc_strengths_avg": [ 82.25, 46.72459202604128 ], "wc_weaknesses_avg": [ 74.5, 46.5483619475487 ], "wc_questions_avg": [ 82.25, 115.75053995554406 ], "wc_limitations_avg": [ 9.75, 8.37779804005802 ], "wc_review_avg": [ 380.25, 251.98053000182375 ], "wc_reply_reviewers_avg": [ 59.0, 80.1654539062806 ], "wc_reply_authors_avg": [ 122.75, 212.60923662907967 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2527617840411024338&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "ucdavis.edu;ucsd.edu;inf.ethz.ch;ucdavis.edu", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of California, Davis;University of California, San Diego;ETH Zurich", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ucdavis.edu;https://www.ucsd.edu;https://www.ethz.ch", "aff_unique_abbr": "UC Davis;UCSD;ETHZ", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Davis;San Diego;", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;Switzerland" }, { "title": "DiffuserLite: Towards Real-time Diffusion Planning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96816", "id": "2TXDHUqyrQ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=2TXDHUqyrQ", "openreview": "https://openreview.net/forum?id=2TXDHUqyrQ", "poster": "", "project": "", "author_site": "Zibin Dong, Jianye Hao, Yifu Yuan, Fei Ni, Yitian Wang, Pengyi Li, YAN ZHENG", "tldr": "", "abstract": "Diffusion planning has been recognized as an effective decision-making paradigm in various domains. The capability of generating high-quality long-horizon trajectories makes it a promising research direction. However, existing diffusion planning methods suffer from low decision-making frequencies due to the expensive iterative sampling cost. To alleviate this, we introduce DiffuserLite, a super fast and lightweight diffusion planning framework, which employs a planning refinement process (PRP) to generate coarse-to-fine-grained trajectories, significantly reducing the modeling of redundant information and leading to notable increases in decision-making frequency. Our experimental results demonstrate that DiffuserLite achieves a decision-making frequency of $122.2$Hz ($112.7$x faster than predominant frameworks) and reaches state-of-the-art performance on D4RL, Robomimic, and FinRL benchmarks. In addition, DiffuserLite can also serve as a flexible plugin to increase the decision-making frequency of other diffusion planning algorithms, providing a structural design reference for future works. More details and visualizations are available at https://diffuserlite.github.io/.", "keywords": "Diffusion model; Reinforcement learning; Deep reinforcement learning;", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Zibin Dong;Jianye HAO;Yifu Yuan;Fei Ni;Yitian Wang;Pengyi Li;YAN ZHENG", "authorids": "~Zibin_Dong1;~Jianye_HAO1;~Yifu_Yuan1;~Fei_Ni1;~Yitian_Wang2;~Pengyi_Li1;~YAN_ZHENG1", "gender": "M;M;M;M;M;M;M", "homepage": "https://github.com/GrandpaDZB;http://www.icdai.org/jianye.html;https://yifu-yuan.github.io/;https://fei-ni.github.io/;;https://yeshenpy.github.io/;https://yanzzzzz.github.io", "dblp": "358/5885;21/7664.html;261/3688;11/579-1;;195/6948;10/2381-2", "google_scholar": "JQ6881QAAAAJ;;83JhosMAAAAJ;https://scholar.google.com.hk/citations?hl=zh-CN;tT6ko1UAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.hk/citations?user=tJuhd1kAAAAJ", "orcid": "0000-0002-2986-6022;0000-0002-0422-8235;0009-0009-2194-942X;0009-0007-5623-2782;;0009-0009-8546-2346;", "linkedin": ";;;;;;", "or_profile": "~Zibin_Dong1;~Jianye_HAO1;~Yifu_Yuan1;~Fei_Ni1;~Yitian_Wang2;~Pengyi_Li1;~YAN_ZHENG1", "aff": "Tianjin University;Tianjin University;Tianjin University;Tianjin University;University of California, San Diego;Tianjin University;Tianjin Unibersity, China", "aff_domain": "tju.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu.cn;ucsd.edu;tju.edu.cn;tju.edu.cn", "position": "MS student;Associate Professor;PhD student;PhD student;MS student;PhD student;Associate Professor", "bibtex": "@inproceedings{\ndong2024diffuserlite,\ntitle={DiffuserLite: Towards Real-time Diffusion Planning},\nauthor={Zibin Dong and Jianye HAO and Yifu Yuan and Fei Ni and Yitian Wang and Pengyi Li and YAN ZHENG},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=2TXDHUqyrQ}\n}", "github": "", "reviewers": "XA4d;7Mwp;xeNN;r5j2", "pdf_size": 6703548, "rating": "5;6;6;9", "confidence": "3;3;4;4", "soundness": "3;3;2;3", "novelty": "3;2;2;3", "presentation": "3;2;2;3", "wc_summary": "82;116;131;83", "wc_strengths": "83;116;67;68", "wc_weaknesses": "151;72;279;12", "wc_questions": "56;100;333;42", "wc_limitations": "7;1;198;7", "wc_review": "379;405;1008;212", "wc_reply_reviewers": "118;35;305;0", "wc_reply_authors": "111;152;464;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;3;2;1", "rating_avg": [ 6.5, 1.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 103.0, 21.17781858454737 ], "wc_strengths_avg": [ 83.5, 19.80530232033836 ], "wc_weaknesses_avg": [ 128.5, 99.90120119397965 ], "wc_questions_avg": [ 132.75, 117.57843127036523 ], "wc_limitations_avg": [ 53.25, 83.60734118485051 ], "wc_review_avg": [ 501.0, 301.93956348911945 ], "wc_reply_reviewers_avg": [ 114.5, 118.03918840791816 ], "wc_reply_authors_avg": [ 181.75, 172.18358661614644 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.6666666666666667, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8221306607548345774&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "tju.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu.cn;ucsd.edu;tju.edu.cn;tju.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;0;1;0;0", "aff_unique_norm": "Tianjin University;University of California, San Diego", "aff_unique_dep": ";", "aff_unique_url": "http://www.tju.edu.cn;https://www.ucsd.edu", "aff_unique_abbr": "TJU;UCSD", "aff_campus_unique_index": "1", "aff_campus_unique": ";San Diego", "aff_country_unique_index": "0;0;0;0;1;0;0", "aff_country_unique": "China;United States" }, { "title": "Overcoming Common Flaws in the Evaluation of Selective Classification Systems", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96815", "id": "2TktDpGqNM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=2TktDpGqNM", "openreview": "https://openreview.net/forum?id=2TktDpGqNM", "poster": "/media/PosterPDFs/NeurIPS%202024/96815.png?t=1733268220.7610106", "project": "", "author_site": "Jeremias Traub, Till Bungert, Carsten L\u00fcth, Michael Baumgartner, Klaus Maier-Hein, Lena Maier-Hein, Paul Jaeger", "tldr": "", "abstract": "Selective Classification, wherein models can reject low-confidence predictions, promises reliable translation of machine-learning based classification systems to real-world scenarios such as clinical diagnostics. While current evaluation of these systems typically assumes fixed working points based on pre-defined rejection thresholds, methodological progress requires benchmarking the general performance of systems akin to the $\\mathrm{AUROC}$ in standard classification. In this work, we define 5 requirements for multi-threshold metrics in selective classification regarding task alignment, interpretability, and flexibility, and show how current approaches fail to meet them. We propose the Area under the Generalized Risk Coverage curve ($\\mathrm{AUGRC}$), which meets all requirements and can be directly interpreted as the average risk of undetected failures. We empirically demonstrate the relevance of $\\mathrm{AUGRC}$ on a comprehensive benchmark spanning 6 data sets and 13 confidence scoring functions. We find that the proposed metric substantially changes metric rankings on 5 out of the 6 data sets.", "keywords": "Selective Classification;Method evaluation;Failure Detection", "primary_area": "evaluation", "supplementary_material": "", "author": "Jeremias Traub;Till J. Bungert;Carsten T. L\u00fcth;Michael Baumgartner;Klaus Maier-Hein;Lena Maier-hein;Paul F Jaeger", "authorids": "~Jeremias_Traub1;~Till_J._Bungert1;~Carsten_T._L\u00fcth1;~Michael_Baumgartner2;~Klaus_Maier-Hein1;~Lena_Maier-hein2;~Paul_F_Jaeger1", "gender": ";;;M;M;F;", "homepage": ";;;;http://www.dkfz.de/en/mic;https://www.dkfz.de/en/imsy/team/people/Lena_Maier-Hein.html;https://pfjaeger.github.io", "dblp": ";;;66/4721-1;133/0183;35/3435;179/4749", "google_scholar": ";;;https://scholar.google.com/citations?hl=de;oCrBpVMAAAAJ;https://scholar.google.de/citations?hl=en;https://scholar.google.de/citations?user=9B9-8h0AAAAJ", "orcid": ";;;;0000-0002-6626-2463;0000-0003-4910-9368;", "linkedin": ";;;michael-baumgartner-/;;;", "or_profile": "~Jeremias_Traub1;~Till_J._Bungert1;~Carsten_T._L\u00fcth1;~Michael_Baumgartner2;~Klaus_Maier-Hein1;~Lena_Maier-hein2;~Paul_F_Jaeger1", "aff": ";;;Deutsches Krebsforschungszentrum;German Cancer Research Center;Deutsches Krebsforschungszentrum;German Cancer Research Center", "aff_domain": ";;;dkfz.de;dkfz.de;dkfz.de;dkfz.de", "position": ";;;PhD student;Full Professor;Full Professor;Research Group Leader", "bibtex": "@inproceedings{\ntraub2024overcoming,\ntitle={Overcoming Common Flaws in the Evaluation of Selective Classification Systems},\nauthor={Jeremias Traub and Till J. Bungert and Carsten T. L{\\\"u}th and Michael Baumgartner and Klaus Maier-Hein and Lena Maier-hein and Paul F Jaeger},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=2TktDpGqNM}\n}", "github": "", "reviewers": "izgv;QM33;BBe8;xFt7", "pdf_size": 2523391, "rating": "6;7;7;8", "confidence": "4;3;4;5", "soundness": "2;3;3;4", "novelty": "3;3;3;4", "presentation": "2;4;3;4", "wc_summary": "50;69;58;58", "wc_strengths": "43;31;62;95", "wc_weaknesses": "226;112;82;54", "wc_questions": "149;51;4;130", "wc_limitations": "37;16;1;1", "wc_review": "505;279;207;338", "wc_reply_reviewers": "141;10;0;0", "wc_reply_authors": "21;21;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "2;2;1;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 58.75, 6.7592529172978875 ], "wc_strengths_avg": [ 57.75, 24.180312239505923 ], "wc_weaknesses_avg": [ 118.5, 65.36627570850277 ], "wc_questions_avg": [ 83.5, 58.79838433154435 ], "wc_limitations_avg": [ 13.75, 14.7542366796795 ], "wc_review_avg": [ 332.25, 109.9985795362831 ], "wc_reply_reviewers_avg": [ 37.75, 59.75104601594854 ], "wc_reply_authors_avg": [ 10.5, 10.5 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6401042002415650776&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": ";;;dkfz.de;dkfz.de;dkfz.de;dkfz.de", "author_num": 7, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "Deutsches Krebsforschungszentrum;German Cancer Research Center", "aff_unique_dep": ";", "aff_unique_url": "https://www.dkfz.de;https://www.dkfz.de", "aff_unique_abbr": "DKFZ;DKFZ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "title": "Automating Data Annotation under Strategic Human Agents: Risks and Potential Solutions", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96814", "id": "2UJLv3KPGO", "proceeding": "", "pdf": "https://openreview.net/pdf?id=2UJLv3KPGO", "openreview": "https://openreview.net/forum?id=2UJLv3KPGO", "poster": "/media/PosterPDFs/NeurIPS%202024/96814.png?t=1730152055.4940896", "project": "", "author_site": "Tian Xie, Xueru Zhang", "tldr": "", "abstract": "As machine learning (ML) models are increasingly used in social domains to make consequential decisions about humans, they often have the power to reshape data distributions. Humans, as strategic agents, continuously adapt their behaviors in response to the learning system. As populations change dynamically, ML systems may need frequent updates to ensure high performance. However, acquiring high-quality *human-annotated* samples can be highly challenging and even infeasible in social domains. A common practice to address this issue is using the model itself to annotate unlabeled data samples. This paper investigates the long-term impacts when ML models are retrained with *model-annotated* samples when they incorporate human strategic responses. We first formalize the interactions between strategic agents and the model and then analyze how they evolve under such dynamic interactions. We find that agents are increasingly likely to receive positive decisions as the model gets retrained, whereas the proportion of agents with positive labels may decrease over time. We thus propose a *refined retraining process* to stabilize the dynamics. Last, we examine how algorithmic fairness can be affected by these retraining processes and find that enforcing common fairness constraints at every round may not benefit the disadvantaged group in the long run. Experiments on (semi-)synthetic and real data validate the theoretical findings.", "keywords": "Strategic Classification;Long-term Fairness", "primary_area": "machine_learning_for_social_sciences", "supplementary_material": "/attachment/47e5b67e423e5386c741bd55b9081353cf4a2dfa.zip", "author": "Tian Xie;Xueru Zhang", "authorids": "~Tian_Xie4;~Xueru_Zhang2", "gender": "M;F", "homepage": "https://www.linkedin.com/in/tianxie1999/;https://xueruzhang.github.io/", "dblp": ";", "google_scholar": ";PNBO_a4AAAAJ", "orcid": ";", "linkedin": "tianxie1999/;", "or_profile": "~Tian_Xie4;~Xueru_Zhang2", "aff": "Ohio State University, Columbus;Ohio State University", "aff_domain": "osu.edu;osu.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nxie2024automating,\ntitle={Automating Data Annotation under Strategic Human Agents: Risks and Potential Solutions},\nauthor={Tian Xie and Xueru Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=2UJLv3KPGO}\n}", "github": "", "reviewers": "ynjw;JSvV;ovmu;WrhT", "pdf_size": 3190404, "rating": "5;5;5;6", "confidence": "3;4;3;3", "soundness": "3;4;3;3", "novelty": "3;3;2;3", "presentation": "3;2;3;3", "wc_summary": "110;68;78;240", "wc_strengths": "182;45;63;80", "wc_weaknesses": "122;134;85;75", "wc_questions": "2;5;176;71", "wc_limitations": "1;10;9;55", "wc_review": "417;262;411;521", "wc_reply_reviewers": "0;105;813;0", "wc_reply_authors": "0;0;1348;0", "reply_reviewers": "0;1;5;0", "reply_authors": "1;1;6;1", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 124.0, 68.74590896918885 ], "wc_strengths_avg": [ 92.5, 53.1342638981665 ], "wc_weaknesses_avg": [ 104.0, 24.627220712049503 ], "wc_questions_avg": [ 63.5, 70.56380091803446 ], "wc_limitations_avg": [ 18.75, 21.21762239271875 ], "wc_review_avg": [ 402.75, 92.28319186070668 ], "wc_reply_reviewers_avg": [ 229.5, 339.6001325088081 ], "wc_reply_authors_avg": [ 337.0, 583.7011221507116 ], "reply_reviewers_avg": [ 1.5, 2.0615528128088303 ], "reply_authors_avg": [ 2.25, 2.165063509461097 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11864506145407011392&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "osu.edu;osu.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Ohio State University", "aff_unique_dep": "", "aff_unique_url": "https://www.osu.edu", "aff_unique_abbr": "OSU", "aff_campus_unique_index": "0", "aff_campus_unique": "Columbus;", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Partial Transportability for Domain Generalization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96813", "id": "2V5LTfhcfd", "proceeding": "", "pdf": "https://openreview.net/pdf?id=2V5LTfhcfd", "openreview": "https://openreview.net/forum?id=2V5LTfhcfd", "poster": "/media/PosterPDFs/NeurIPS%202024/96813.png?t=1733822456.0824983", "project": "", "author_site": "Kasra Jalaldoust, Alexis Bellot, Elias Bareinboim", "tldr": "", "abstract": "A fundamental task in AI is providing performance guarantees for predictions made in unseen domains. In practice, there can be substantial uncertainty about the distribution of new data, and corresponding variability in the performance of existing predictors. Building on the theory of partial identification and transportability, this paper introduces new results for bounding the value of a functional of the target distribution, such as the generalization error of a classifiers, given data from source domains and assumptions about the data generating mechanisms, encoded in causal diagrams. Our contribution is to provide the first general estimation technique for transportability problems, adapting existing parameterization schemes such Neural Causal Models to encode the structural constraints necessary for cross-population inference. We demonstrate the expressiveness and consistency of this procedure and further propose a gradient-based optimization scheme for making scalable inferences in practice. Our results are corroborated with experiments.", "keywords": "Causal inference;Domain generalization;Transportability", "primary_area": "causal_inference", "supplementary_material": "", "author": "Kasra Jalaldoust;Alexis Bellot;Elias Bareinboim", "authorids": "~Kasra_Jalaldoust1;~Alexis_Bellot1;~Elias_Bareinboim2", "gender": "M;M;M", "homepage": ";https://causalai.net;https://www.linkedin.com/in/amirkasra-jalaldoust-802319216/", "dblp": "217/4339;85/9005;322/4165", "google_scholar": ";r5U-D7YAAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Alexis_Bellot1;~Elias_Bareinboim2;~Amirkasra_Jalaldoust1", "aff": "Google DeepMind;Columbia University;Columbia University", "aff_domain": "deepmind.com;columbia.edu;columbia.edu", "position": "Researcher;Associate Professor;PhD student", "bibtex": "@inproceedings{\njalaldoust2024partial,\ntitle={Partial Transportability for Domain Generalization},\nauthor={Kasra Jalaldoust and Alexis Bellot and Elias Bareinboim},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=2V5LTfhcfd}\n}", "github": "", "reviewers": "SwAx;fw9Z;S6HF", "pdf_size": 2995481, "rating": "6;6;6", "confidence": "3;1;2", "soundness": "3;4;3", "novelty": "3;2;3", "presentation": "3;3;2", "wc_summary": "154;43;68", "wc_strengths": "97;66;26", "wc_weaknesses": "598;54;33", "wc_questions": "2;21;217", "wc_limitations": "14;7;2", "wc_review": "865;191;346", "wc_reply_reviewers": "22;18;120", "wc_reply_authors": "58;0;433", "reply_reviewers": "1;1;2", "reply_authors": "2;1;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 2.0, 0.816496580927726 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 88.33333333333333, 47.541794478355804 ], "wc_strengths_avg": [ 63.0, 29.06314963431642 ], "wc_weaknesses_avg": [ 228.33333333333334, 261.53436145604695 ], "wc_questions_avg": [ 80.0, 97.1836748979306 ], "wc_limitations_avg": [ 7.666666666666667, 4.9216076867444665 ], "wc_review_avg": [ 467.3333333333333, 288.2248350776795 ], "wc_reply_reviewers_avg": [ 53.333333333333336, 47.16872787015661 ], "wc_reply_authors_avg": [ 163.66666666666666, 191.91375377728636 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17379176965434723552&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "deepmind.com;columbia.edu;columbia.edu", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Google;Columbia University", "aff_unique_dep": "Google DeepMind;", "aff_unique_url": "https://deepmind.com;https://www.columbia.edu", "aff_unique_abbr": "DeepMind;Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "Dendritic Integration Inspired Artificial Neural Networks Capture Data Correlation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96812", "id": "2WQjNXZbhR", "proceeding": "", "pdf": "https://openreview.net/pdf?id=2WQjNXZbhR", "openreview": "https://openreview.net/forum?id=2WQjNXZbhR", "poster": "/media/PosterPDFs/NeurIPS%202024/96812.png?t=1730374900.0344768", "project": "", "author_site": "Chongming Liu, Jingyang Ma, Songting Li, Douglas (Dongzhuo) Zhou", "tldr": "", "abstract": "Incorporating biological neuronal properties into Artificial Neural Networks (ANNs) to enhance computational capabilities is under active investigation in the field of deep learning. Inspired by recent findings indicating that dendrites adhere to quadratic integration rule for synaptic inputs, this study explores the computational benefits of quadratic neurons. We theoretically demonstrate that quadratic neurons inherently capture correlation within structured data, a feature that grants them superior generalization abilities over traditional neurons. This is substantiated by few-shot learning experiments. Furthermore, we integrate the quadratic rule into Convolutional Neural Networks (CNNs) using a biologically plausible approach, resulting in innovative architectures\u2014Dendritic integration inspired CNNs (Dit-CNNs). Our Dit-CNNs compete favorably with state-of-the-art models across multiple classification benchmarks, e.g., ImageNet-1K, while retaining the simplicity and efficiency of traditional CNNs. All source code are available at https://github.com/liuchongming1999/Dendritic-integration-inspired-CNN-NeurIPS-2024.", "keywords": "brain-inspired models;dendritic bilinear integration rule;quadratic neuron;deep convolutional neural networks", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Chongming Liu;Jingyang Ma;Songting Li;Douglas Zhou", "authorids": "~Chongming_Liu1;~Jingyang_Ma1;~Songting_Li1;~Douglas_Zhou1", "gender": "M;M;M;", "homepage": ";https://ins.sjtu.edu.cn/peoples/majingyang;https://ins.sjtu.edu.cn/people/songtingli/;", "dblp": "353/1071;180/2678;157/3325;", "google_scholar": "WlWx9Z0AAAAJ;dT0PPtcAAAAJ;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Chongming_Liu1;~Jingyang_Ma1;~Songting_Li1;~Douglas_Zhou1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;", "position": "PhD student;PhD student;Full Professor;", "bibtex": "@inproceedings{\nliu2024dendritic,\ntitle={Dendritic Integration Inspired Artificial Neural Networks Capture Data Correlation},\nauthor={Chongming Liu and Jingyang Ma and Songting Li and Douglas Zhou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=2WQjNXZbhR}\n}", "github": "", "reviewers": "opWC;cuyp;gq5j;mc29;8aT5", "pdf_size": 5322941, "rating": "4;5;6;7;7", "confidence": "4;4;4;1;5", "soundness": "2;2;3;3;3", "novelty": "2;3;3;3;3", "presentation": "2;2;3;2;3", "wc_summary": "96;43;50;48;106", "wc_strengths": "35;57;51;30;82", "wc_weaknesses": "339;87;34;12;28", "wc_questions": "18;19;2;34;21", "wc_limitations": "7;4;1;26;43", "wc_review": "495;210;138;150;280", "wc_reply_reviewers": "171;0;0;10;0", "wc_reply_authors": "237;0;0;31;0", "reply_reviewers": "1;0;0;1;0", "reply_authors": "2;1;1;2;1", "rating_avg": [ 5.8, 1.16619037896906 ], "confidence_avg": [ 3.6, 1.3564659966250536 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 68.6, 26.74023186137323 ], "wc_strengths_avg": [ 51.0, 18.40652058374966 ], "wc_weaknesses_avg": [ 100.0, 122.1261642728535 ], "wc_questions_avg": [ 18.8, 10.186265262597475 ], "wc_limitations_avg": [ 16.2, 15.992498241363043 ], "wc_review_avg": [ 254.6, 130.35582073693527 ], "wc_reply_reviewers_avg": [ 36.2, 67.51118425861007 ], "wc_reply_authors_avg": [ 53.6, 92.48264702094119 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.3034330424545042, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:de3s_rODN8oJ:scholar.google.com/&scioq=Dendritic+Integration+Inspired+Artificial+Neural+Networks+Capture+Data+Correlation&hl=en&as_sdt=0,48", "gs_version_total": 4, "email": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "The Scandinavian Embedding Benchmarks: Comprehensive Assessment of Multilingual and Monolingual Text Embedding", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97869", "id": "2WbuKAfOxP", "proceeding": "", "pdf": "https://openreview.net/pdf?id=2WbuKAfOxP", "openreview": "https://openreview.net/forum?id=2WbuKAfOxP", "poster": "/media/PosterPDFs/NeurIPS%202024/97869.png?t=1730986430.8736336", "project": "", "author_site": "Kenneth Enevoldsen, M\u00e1rton Kardos, Niklas Muennighoff, Kristoffer Nielbo", "tldr": "", "abstract": "The evaluation of English text embeddings has transitioned from evaluating a handful of datasets to broad coverage across many tasks through benchmarks such as MTEB. However, this is not the case for multilingual text embeddings due to a lack of available benchmarks. To address this problem, we introduce the Scandinavian Embedding Benchmark (SEB). SEB is a comprehensive framework that enables text embedding evaluation for Scandinavian languages across 24 tasks, 10 subtasks, and 4 task categories. Building on SEB, we evaluate more than 26 models, uncovering significant performance disparities between public and commercial solutions not previously captured by MTEB. We open-source SEB and integrate it with MTEB, thus bridging the text embedding evaluation gap for Scandinavian languages.", "keywords": "sentence embeddings;rag;low-resource NLP;Danish;Norwegian;Swedish;Scandinavian", "primary_area": "", "supplementary_material": "", "author": "Kenneth Enevoldsen;M\u00e1rton Kardos;Niklas Muennighoff;Kristoffer Nielbo", "authorids": "~Kenneth_Enevoldsen1;~M\u00e1rton_Kardos1;~Niklas_Muennighoff1;~Kristoffer_Nielbo1", "gender": "M;M;M;M", "homepage": "https://pure.au.dk/portal/da/persons/kenneth.enevoldsen%40cas.au.dk;https://github.com/x-tabdeveloping;https://muennighoff.github.io/;https://www.au.dk/kln@cas.au.dk", "dblp": ";;281/6745;238/1559", "google_scholar": "VJRMvHUAAAAJ;;Me0IoRMAAAAJ;", "orcid": "0000-0001-8733-0966;0000-0001-9652-4498;;0000-0002-5116-5070", "linkedin": "kennethenevoldsen/?originalSubdomain=dk;m%C3%A1rton-kardos-1937821b6/;niklasmuennighoff/;kristoffer-nielbo/", "or_profile": "~Kenneth_Enevoldsen1;~M\u00e1rton_Kardos1;~Niklas_Muennighoff1;~Kristoffer_Nielbo1", "aff": "Aarhus University;Aarhus University;Allen Institute for Artificial Intelligence;Aarhus University", "aff_domain": "au.dk;au.dk;allenai.org;au.dk", "position": "Postdoc;Undergrad student;Researcher;Full Professor", "bibtex": "@inproceedings{\nenevoldsen2024the,\ntitle={The Scandinavian Embedding Benchmarks: Comprehensive Assessment of Multilingual and Monolingual Text Embedding},\nauthor={Kenneth Enevoldsen and M{\\'a}rton Kardos and Niklas Muennighoff and Kristoffer Nielbo},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=2WbuKAfOxP}\n}", "github": "", "reviewers": "5G2G;YWqP;TXiX", "pdf_size": 3793989, "rating": "5;7;9", "confidence": "2;4;4", "wc_summary_and_contributions": "59;48;90", "wc_strengths": "36;40;35", "wc_improvement": "79;134;135", "wc_limitations": "4;289;16", "wc_correctness": "34;19;10", "wc_clarity": "5;32;24", "wc_relation_to_prior_work": "8;19;50", "wc_documentation": "13;36;30", "wc_additional_feedback": "1;1;1", "wc_review": "239;618;391", "wc_reply_reviewers": "0;20;0", "wc_reply_authors": "702;896;419", "reply_reviewers": "0;1;0", "reply_authors": "1;3;1", "rating_avg": [ 7.0, 1.632993161855452 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "wc_summary_and_contributions_avg": [ 65.66666666666667, 17.78263822446552 ], "wc_strengths_avg": [ 37.0, 2.160246899469287 ], "wc_improvement_avg": [ 116.0, 26.166135875720485 ], "wc_limitations_avg": [ 103.0, 131.61306925985733 ], "wc_correctness_avg": [ 21.0, 9.899494936611665 ], "wc_clarity_avg": [ 20.333333333333332, 11.32352516764202 ], "wc_relation_to_prior_work_avg": [ 25.666666666666668, 17.78263822446552 ], "wc_documentation_avg": [ 26.333333333333332, 9.741092797468305 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 416.0, 155.73267693925598 ], "wc_reply_reviewers_avg": [ 6.666666666666667, 9.428090415820632 ], "wc_reply_authors_avg": [ 672.3333333333334, 195.8610618667109 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8660254037844387, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5611678218421825213&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "au.dk;au.dk;allenai.org;au.dk", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Aarhus University;Allen Institute for Artificial Intelligence", "aff_unique_dep": ";", "aff_unique_url": "https://au.dk;https://allenai.org", "aff_unique_abbr": "AU;AI2", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Denmark;United States" }, { "title": "Visual Anchors Are Strong Information Aggregators For Multimodal Large Language Model", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96811", "id": "2YPdpWzEsF", "proceeding": "", "pdf": "https://openreview.net/pdf?id=2YPdpWzEsF", "openreview": "https://openreview.net/forum?id=2YPdpWzEsF", "poster": "/media/PosterPDFs/NeurIPS%202024/96811.png?t=1731775989.2533176", "project": "", "author_site": "Haogeng Liu, Quanzeng You, Xiaotian Han, Yongfei Liu, Huaibo Huang, Ran He, Hongxia Yang", "tldr": "", "abstract": "In the realm of Multimodal Large Language Models (MLLMs), vision-language connector plays a crucial role to link the pre-trained vision encoders with Large Language Models (LLMs). Despite its importance, the vision-language connector has been relatively less explored. In this study, we aim to propose a strong vision-language connector that enables MLLM to simultaneously achieve high accuracy and low computation cost. We first reveal the existence of the visual anchors in Vision Transformer and propose a cost-effective search algorithm to progressively extract them. Building on these findings, we introduce the Anchor Former (AcFormer), a novel vision-language connector designed to leverage the rich prior knowledge obtained from these visual anchors during pretraining, guiding the aggregation of information. \nThrough extensive experimentation, we demonstrate that the proposed method significantly reduces computational costs by nearly two-thirds, while simultaneously outperforming baseline methods. This highlights the effectiveness and efficiency of AcFormer.", "keywords": "Visual Anchors; Vision Language Learning; Multimodal Large Language Models", "primary_area": "machine_vision", "supplementary_material": "/attachment/d199991b8d350ace1a9895ca2a0db6166c7ed520.zip", "author": "Haogeng Liu;Quanzeng You;Xiaotian Han;Yongfei Liu;Huaibo Huang;Ran He;Hongxia Yang", "authorids": "~Haogeng_Liu1;~Quanzeng_You3;~Xiaotian_Han2;~Yongfei_Liu1;~Huaibo_Huang1;~Ran_He1;~Hongxia_Yang2", "gender": "M;M;M;M;M;M;F", "homepage": "https://github.com/liuhaogeng;https://qzyou.github.io/;https://hanxiaotian.github.io/;https://sites.google.com/view/yongfei-liu/%E9%A6%96%E9%A1%B5;https://people.ucas.edu.cn/~huanghuaibo;https://rhe-web.github.io/;https://www4.comp.polyu.edu.hk/~hongxyang/", "dblp": ";33/9972.html;;;211/7251.html;61/6198-1;", "google_scholar": ";c5KJsIgAAAAJ;5fHHi24AAAAJ;XVYKjDkAAAAJ;XMvLciUAAAAJ;ayrg9AUAAAAJ;iJlC5mMAAAAJ", "orcid": ";0000-0003-3608-0607;0009-0005-9120-0490;;0000-0001-5866-2283;0000-0002-3807-991X;", "linkedin": ";quanzeng-you-5b98a55a/;hxt872675353/;;;;", "or_profile": "~Haogeng_Liu1;~Quanzeng_You3;~Xiaotian_Han2;~Yongfei_Liu1;~Huaibo_Huang1;~Ran_He1;~Hongxia_Yang2", "aff": "University of Chinese Academy of Sciences;ByteDance;ByteDance;Bytedance;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;ByteDance Inc.", "aff_domain": "ucas.ac.cn;bytedance.com;bytedance.com;shanghaitech.edu.cn;ia.ac.cn;ia.ac.cn;bytedance.com", "position": "MS student;Researcher;Researcher;Researcher;Associate Professor;Full Professor;Principal Researcher", "bibtex": "@inproceedings{\nliu2024visual,\ntitle={Visual Anchors Are Strong Information Aggregators For Multimodal Large Language Model},\nauthor={Haogeng Liu and Quanzeng You and Xiaotian Han and Yongfei Liu and Huaibo Huang and Ran He and Hongxia Yang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=2YPdpWzEsF}\n}", "github": "", "reviewers": "i5LS;r5Ke;Ld45;U61n", "pdf_size": 2547139, "rating": "4;5;6;6", "confidence": "5;4;4;4", "soundness": "4;2;2;3", "novelty": "2;2;3;3", "presentation": "3;3;2;3", "wc_summary": "70;90;58;45", "wc_strengths": "83;43;49;18", "wc_weaknesses": "75;217;83;29", "wc_questions": "92;48;136;29", "wc_limitations": "8;1;1;1", "wc_review": "328;399;327;122", "wc_reply_reviewers": "13;87;10;0", "wc_reply_authors": "30;34;40;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 65.75, 16.55860803328589 ], "wc_strengths_avg": [ 48.25, 23.1880896151451 ], "wc_weaknesses_avg": [ 101.0, 70.07139216541941 ], "wc_questions_avg": [ 76.25, 41.37858745776612 ], "wc_limitations_avg": [ 2.75, 3.031088913245535 ], "wc_review_avg": [ 294.0, 103.50603847119258 ], "wc_reply_reviewers_avg": [ 27.5, 34.6878941419049 ], "wc_reply_authors_avg": [ 26.0, 15.427248620541512 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1590278714045253409&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "ucas.ac.cn;bytedance.com;bytedance.com;shanghaitech.edu.cn;ia.ac.cn;ia.ac.cn;bytedance.com", "author_num": 7, "aff_unique_index": "0;1;1;1;2;2;1", "aff_unique_norm": "University of Chinese Academy of Sciences;ByteDance;Chinese Academy of Sciences", "aff_unique_dep": ";;Institute of Automation", "aff_unique_url": "http://www.ucas.ac.cn;https://www.bytedance.com;http://www.ia.cas.cn", "aff_unique_abbr": "UCAS;ByteDance;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Aligning Individual and Collective Objectives in Multi-Agent Cooperation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96810", "id": "2YSHEBRRol", "proceeding": "", "pdf": "https://openreview.net/pdf?id=2YSHEBRRol", "openreview": "https://openreview.net/forum?id=2YSHEBRRol", "poster": "", "project": "", "author_site": "Yang Li, Wenhao Zhang, Jianhong Wang, Shao Zhang, Yali Du, Ying Wen, Wei Pan", "tldr": "", "abstract": "Among the research topics in multi-agent learning, mixed-motive cooperation is one of the most prominent challenges, primarily due to the mismatch between individual and collective goals. The cutting-edge research is focused on incorporating domain knowledge into rewards and introducing additional mechanisms to incentivize cooperation. However, these approaches often face shortcomings such as the effort on manual design and the absence of theoretical groundings. To close this gap, we model the mixed-motive game as a differentiable game for the ease of illuminating the learning dynamics towards cooperation. More detailed, we introduce a novel optimization method named \\textbf{\\textit{A}}ltruistic \\textbf{\\textit{G}}radient \\textbf{\\textit{A}}djustment (\\textbf{\\textit{AgA}}) that employs gradient adjustments to progressively align individual and collective objectives. Furthermore, we theoretically prove that AgA effectively attracts gradients to stable fixed points of the collective objective while considering individual interests, and we validate these claims with empirical evidence. We evaluate the effectiveness of our algorithm AgA through benchmark environments for testing mixed-motive collaboration with small-scale agents such as the two-player public good game and the sequential social dilemma games, Cleanup and Harvest, as well as our self-developed large-scale environment in the game StarCraft II.", "keywords": "Mixed-motive cooperation;Mixed-motive game;cooperative AI", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/c200bc569d3fa66c1cc256fd01942e3721316439.zip", "author": "Yang Li;Wenhao Zhang;Jianhong Wang;Shao Zhang;Yali Du;Ying Wen;Wei Pan", "authorids": "~Yang_Li40;~Wenhao_Zhang8;~Jianhong_Wang1;~Shao_Zhang1;~Yali_Du1;~Ying_Wen1;~Wei_Pan2", "gender": "M;;M;F;;M;M", "homepage": "https://liyang.page;https://github.com/bluixe;https://hsvgbkhgbv.github.io/;https://shaozhang.info;;https://yingwen.io;http://panweihit.github.io", "dblp": ";;;57/1330;;41/4203-1;", "google_scholar": "msAmwaoAAAAJ;;K1FKF3IAAAAJ;UG36L2YAAAAJ;;_A1CxG8AAAAJ;GqryWPsAAAAJ", "orcid": ";;;0000-0002-0111-0776;;0000-0003-1247-2382;0000-0003-1121-9879", "linkedin": ";;jianhong-wang-45995b100/;;;wenying45;wei-pan-6b558b17/", "or_profile": "~Yang_Li40;~Wenhao_Zhang8;~Jianhong_Wang1;~Shao_Zhang1;~Yali_Du1;~Ying_Wen1;~Wei_Pan2", "aff": "University of Manchester;Shanghai Jiaotong University;University of Manchester;Northeastern University;;Shanghai Jiaotong University;University of Manchester", "aff_domain": "cs.manchester.ac.uk;sjtu.edu.cn;manchester.ac.uk;northeastern.edu;;sjtu.edu.cn;manchester.ac.uk", "position": "PhD student;Undergrad student;Postdoc;PhD student;;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nli2024aligning,\ntitle={Aligning Individual and Collective Objectives in Multi-Agent Cooperation},\nauthor={Yang Li and Wenhao Zhang and Jianhong Wang and Shao Zhang and Yali Du and Ying Wen and Wei Pan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=2YSHEBRRol}\n}", "github": "", "reviewers": "x8HM;a1aR;1xxK;sisd", "pdf_size": 1792809, "rating": "5;5;7;8", "confidence": "2;2;4;3", "soundness": "2;3;3;4", "novelty": "3;2;3;4", "presentation": "3;1;3;4", "wc_summary": "53;78;170;59", "wc_strengths": "100;58;90;31", "wc_weaknesses": "80;128;2;4", "wc_questions": "79;91;75;76", "wc_limitations": "1;2;12;1", "wc_review": "313;357;349;171", "wc_reply_reviewers": "21;55;21;39", "wc_reply_authors": "17;59;23;26", "reply_reviewers": "1;1;1;2", "reply_authors": "2;2;2;2", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 90.0, 47.10095540432275 ], "wc_strengths_avg": [ 69.75, 27.224758952100935 ], "wc_weaknesses_avg": [ 53.5, 53.27992117111285 ], "wc_questions_avg": [ 80.25, 6.378675411086537 ], "wc_limitations_avg": [ 4.0, 4.636809247747852 ], "wc_review_avg": [ 297.5, 74.89158831270706 ], "wc_reply_reviewers_avg": [ 34.0, 14.177446878757825 ], "wc_reply_authors_avg": [ 31.25, 16.345871038277526 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.7543365091413573, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7666505841324974674&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "cs.manchester.ac.uk;sjtu.edu.cn;manchester.ac.uk;northeastern.edu;;sjtu.edu.cn;manchester.ac.uk", "author_num": 7, "aff_unique_index": "0;1;0;2;1;0", "aff_unique_norm": "University of Manchester;Shanghai Jiao Tong University;Northeastern University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.manchester.ac.uk;https://www.sjtu.edu.cn;https://www.northeastern.edu", "aff_unique_abbr": "UoM;SJTU;NEU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;2;1;0", "aff_country_unique": "United Kingdom;China;United States" }, { "title": "When Is Inductive Inference Possible?", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96809", "id": "2aGcshccuV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=2aGcshccuV", "openreview": "https://openreview.net/forum?id=2aGcshccuV", "poster": "/media/PosterPDFs/NeurIPS%202024/96809.png?t=1729047726.5868363", "project": "", "tldr": "", "abstract": "Can a physicist make only a finite number of errors in the eternal quest to uncover the law of nature?\nThis millennium-old philosophical problem, known as inductive inference, lies at the heart of epistemology.\nDespite its significance to understanding human reasoning, a rigorous justification of inductive inference has remained elusive.\nAt a high level, inductive inference asks whether one can make at most finite errors amidst an infinite sequence of observations, when deducing the correct hypothesis from a given hypothesis class.\nHistorically, the only theoretical guarantee has been that if the hypothesis class is countable, inductive inference is possible, as exemplified by Solomonoff induction for learning Turing machines.\nIn this paper, we provide a tight characterization of inductive inference by establishing a novel link to online learning theory.\nAs our main result, we prove that inductive inference is possible if and only if the hypothesis class is a countable union of online learnable classes, potentially with an uncountable size, no matter the observations are adaptively chosen or iid sampled.\nMoreover, the same condition is also sufficient and necessary in the agnostic setting, where any hypothesis class meeting this criterion enjoys an $\\tilde{O}(\\sqrt{T})$ regret bound for any time step $T$, while others require an arbitrarily slow rate of regret.\nOur main technical tool is a novel non-uniform online learning framework, which may be of independent interest.\nOur main technical tool is a novel non-uniform online learning framework, which may be of independent interest.", "keywords": "Inductive Inference;Online Learning", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Zhou Lu", "authorids": "~Zhou_Lu1", "gender": "", "homepage": "https://leozoroaster.github.io/", "dblp": "68/11524", "google_scholar": "17_nX_kAAAAJ", "orcid": "", "linkedin": "", "or_profile": "~Zhou_Lu1", "aff": "", "aff_domain": "", "position": "", "bibtex": "@inproceedings{\nlu2024when,\ntitle={When Is Inductive Inference Possible?},\nauthor={Zhou Lu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=2aGcshccuV}\n}", "github": "", "reviewers": "y4NC;Cxri;4vEY", "pdf_size": 395481, "rating": "5;8;8", "confidence": "2;3;3", "soundness": "3;3;4", "novelty": "3;3;4", "presentation": "2;4;4", "wc_summary": "65;18;58", "wc_strengths": "38;9;93", "wc_weaknesses": "140;2;71", "wc_questions": "58;146;2", "wc_limitations": "17;1;1", "wc_review": "318;176;225", "wc_reply_reviewers": "18;3;5", "wc_reply_authors": "308;0;0", "reply_reviewers": "1;1;1", "reply_authors": "2;1;1", "rating_avg": [ 7.0, 1.4142135623730951 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.9428090415820634 ], "wc_summary_avg": [ 47.0, 20.704266871026046 ], "wc_strengths_avg": [ 46.666666666666664, 34.83612429010373 ], "wc_weaknesses_avg": [ 71.0, 56.3382640840131 ], "wc_questions_avg": [ 68.66666666666667, 59.269628722380986 ], "wc_limitations_avg": [ 6.333333333333333, 7.542472332656507 ], "wc_review_avg": [ 239.66666666666666, 58.89161419270338 ], "wc_reply_reviewers_avg": [ 8.666666666666666, 6.649979114420001 ], "wc_reply_authors_avg": [ 102.66666666666667, 145.19259240363775 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6866190961622543928&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "", "author_num": 1 }, { "title": "Exclusively Penalized Q-learning for Offline Reinforcement Learning", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96808", "id": "2bdSnxeQcW", "proceeding": "", "pdf": "https://openreview.net/pdf?id=2bdSnxeQcW", "openreview": "https://openreview.net/forum?id=2bdSnxeQcW", "poster": "/media/PosterPDFs/NeurIPS%202024/96808.png?t=1733109684.6198776", "project": "", "author_site": "Junghyuk Yeom, Yonghyeon Jo, Jeongmo Kim, Sanghyeon Lee, Seungyul Han", "tldr": "", "abstract": "Constraint-based offline reinforcement learning (RL) involves policy constraints or imposing penalties on the value function to mitigate overestimation errors caused by distributional shift. This paper focuses on a limitation in existing offline RL methods with penalized value function, indicating the potential for underestimation bias due to unnecessary bias introduced in the value function. To address this concern, we propose Exclusively Penalized Q-learning (EPQ), which reduces estimation bias in the value function by selectively penalizing states that are prone to inducing estimation errors. Numerical results show that our method significantly reduces underestimation bias and improves performance in various offline control tasks compared to other offline RL methods.", "keywords": "Deep reinforcement learning;offline RL;Q-learning;overestimation reduction", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/e9a40aab8bd4b35e46ca3d293fca3cc4f587122c.zip", "author": "Junghyuk Yeom;Yonghyeon Jo;Jeongmo Kim;Sanghyeon Lee;Seungyul Han", "authorids": "~Junghyuk_Yeom1;~Yonghyeon_Jo1;~Jeongmo_Kim1;~Sanghyeon_Lee5;~Seungyul_Han1", "gender": ";M;M;M;M", "homepage": "https://mllab.unist.ac.kr/;https://mllab.unist.ac.kr/;https://mllab.unist.ac.kr/;;https://mllab.unist.ac.kr", "dblp": ";;;;183/6417", "google_scholar": ";;;;https://scholar.google.com/citations?hl=ko", "orcid": ";;;0000-0001-6077-839X;", "linkedin": ";;;;", "or_profile": "~Junghyuk_Yeom1;~Yonghyeon_Jo1;~Jeongmo_Kim1;~Sanghyeon_Lee5;~Seungyul_Han1", "aff": "Ulsan National Institute of Science and Technology;Ulsan National Institute of Science and Technology;Ulsan National Institute of Science and Technology;Ulsan National Institute of Science and Technology;Ulsan National Institute of Science and Technology", "aff_domain": "unist.ac.kr;unist.ac.kr;unist.ac.kr;unist.ac.kr;unist.ac.kr", "position": "MS student;MS student;MS student;MS student;Assistant Professor", "bibtex": "@inproceedings{\nyeom2024exclusively,\ntitle={Exclusively Penalized Q-learning for Offline Reinforcement Learning},\nauthor={Junghyuk Yeom and Yonghyeon Jo and Jeongmo Kim and Sanghyeon Lee and Seungyul Han},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=2bdSnxeQcW}\n}", "github": "", "reviewers": "Hh4A;eWrQ;36me;RxmU", "pdf_size": 2324480, "rating": "5;7;7;8", "confidence": "4;4;4;4", "soundness": "2;3;3;4", "novelty": "2;3;3;4", "presentation": "3;2;2;4", "wc_summary": "114;61;138;57", "wc_strengths": "178;77;125;100", "wc_weaknesses": "246;74;752;134", "wc_questions": "166;196;282;92", "wc_limitations": "21;19;12;21", "wc_review": "725;427;1309;404", "wc_reply_reviewers": "85;39;112;16", "wc_reply_authors": "34;33;244;33", "reply_reviewers": "1;1;2;1", "reply_authors": "2;2;3;2", "rating_avg": [ 6.75, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 92.5, 34.586847211042524 ], "wc_strengths_avg": [ 120.0, 37.54330832518626 ], "wc_weaknesses_avg": [ 301.5, 267.32143572859997 ], "wc_questions_avg": [ 184.0, 68.07348970047003 ], "wc_limitations_avg": [ 18.25, 3.6996621467371855 ], "wc_review_avg": [ 716.25, 364.89544735444423 ], "wc_reply_reviewers_avg": [ 63.0, 37.6497011940334 ], "wc_reply_authors_avg": [ 86.0, 91.22225605629363 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12139123878530262529&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "unist.ac.kr;unist.ac.kr;unist.ac.kr;unist.ac.kr;unist.ac.kr", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Ulsan National Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.unist.ac.kr", "aff_unique_abbr": "UNIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "IR-CM: The Fast and General-purpose Image Restoration Method Based on Consistency Model", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96807", "id": "2bon4HLFkN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=2bon4HLFkN", "openreview": "https://openreview.net/forum?id=2bon4HLFkN", "poster": "/media/PosterPDFs/NeurIPS%202024/96807.png?t=1729407031.158249", "project": "", "author_site": "Xiaoxuan Gong, Jie Ma", "tldr": "", "abstract": "This paper proposes a fast and general-purpose image restoration method. The key idea is to achieve few-step or even one-step inference by conducting consistency distilling or training on a specific mean-reverting stochastic differential equations. Furthermore, based on this, we propose a novel linear-nonlinear decoupling training strategy, significantly enhancing training effectiveness and surpassing consistency distillation on inference performance. This allows our method to be independent of any pre-trained checkpoint, enabling it to serve as an effective standalone image-to-image transformation model. Finally, to avoid trivial solutions and stabilize model training, we introduce a simple origin-guided loss. To validate the effectiveness of our proposed method, we conducted experiments on tasks including image deraining, denoising, deblurring, and low-light image enhancement. The experiments show that our method achieves highly competitive results with only one-step inference. And with just two-step inference, it can achieve state-of-the-art performance in low-light image enhancement. Furthermore, a number of ablation experiments demonstrate the effectiveness of the proposed training strategy. our code is available at https://github.com/XiaoxuanGong/IR-CM.", "keywords": "Image restoration;Image enhencement;SDE-based model;Consistency model", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/6c2978b98c92dc304429768859f27462b524f2cf.zip", "author": "Xiaoxuan Gong;Jie Ma", "authorids": "~Xiaoxuan_Gong2;~Jie_Ma2", "gender": "M;M", "homepage": "https://github.com/XiaoxuanGong;http://faculty.hust.edu.cn/majie/zh_CN/index/1491001/list/index.htm", "dblp": ";", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "~Xiaoxuan_Gong2;~Ma_Jie1", "aff": "Huazhong University of Science and Technology;Huazhong University of Science and Technology", "aff_domain": "hust.edu.cn;hust.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\ngong2024ircm,\ntitle={{IR}-{CM}: The Fast and Universal Image Restoration Method Based on Consistency Model},\nauthor={Xiaoxuan Gong and Jie Ma},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=2bon4HLFkN}\n}", "github": "", "reviewers": "yxtc;ksNq;huxY;14RS", "pdf_size": 2797340, "rating": "4;4;5;6", "confidence": "4;3;4;5", "soundness": "3;2;3;3", "novelty": "2;3;3;2", "presentation": "3;3;3;3", "wc_summary": "102;80;61;56", "wc_strengths": "89;57;45;35", "wc_weaknesses": "86;88;118;227", "wc_questions": "114;16;2;3", "wc_limitations": "7;12;16;1", "wc_review": "398;253;242;322", "wc_reply_reviewers": "43;0;14;11", "wc_reply_authors": "94;42;52;53", "reply_reviewers": "1;0;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 4.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 74.75, 18.102140757380052 ], "wc_strengths_avg": [ 56.5, 20.316249653910045 ], "wc_weaknesses_avg": [ 129.75, 57.56029447457683 ], "wc_questions_avg": [ 33.75, 46.66034183329565 ], "wc_limitations_avg": [ 9.0, 5.612486080160912 ], "wc_review_avg": [ 303.75, 62.45948686949005 ], "wc_reply_reviewers_avg": [ 17.0, 15.890248582070704 ], "wc_reply_authors_avg": [ 60.25, 19.954636052807377 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.8528028654224417, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:LmeUJHGQsMQJ:scholar.google.com/&scioq=IR-CM:+The+Fast+and+General-purpose+Image+Restoration+Method+Based+on+Consistency+Model&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": "hust.edu.cn;hust.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Huazhong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.hust.edu.cn", "aff_unique_abbr": "HUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Weight Diffusion for Future: Learn to Generalize in Non-Stationary Environments", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96806", "id": "2cFUYnNL1m", "proceeding": "", "pdf": "https://openreview.net/pdf?id=2cFUYnNL1m", "openreview": "https://openreview.net/forum?id=2cFUYnNL1m", "poster": "/media/PosterPDFs/NeurIPS%202024/96806.png?t=1730257792.3534477", "project": "", "author_site": "Mixue Xie, Shuang Li, Binhui Xie, Chi Liu, Jian Liang, Zixun Sun, Ke Feng, Chengwei Zhu", "tldr": "", "abstract": "Enabling deep models to generalize in non-stationary environments is vital for real-world machine learning, as data distributions are often found to continually change. Recently, evolving domain generalization (EDG) has emerged to tackle the domain generalization in a time-varying system, where the domain gradually evolves over time in an underlying continuous structure. Nevertheless, it typically assumes multiple source domains simultaneously ready. It still remains an open problem to address EDG in the domain-incremental setting, where source domains are non-static and arrive sequentially to mimic the evolution of training domains. To this end, we propose Weight Diffusion (W-Diff), a novel framework that utilizes the conditional diffusion model in the parameter space to learn the evolving pattern of classifiers during the domain-incremental training process. Specifically, the diffusion model is conditioned on the classifier weights of different historical domain (regarded as a reference point) and the prototypes of current domain, to learn the evolution from the reference point to the classifier weights of current domain (regarded as the anchor point). In addition, a domain-shared feature encoder is learned by enforcing prediction consistency among multiple classifiers, so as to mitigate the overfitting problem and restrict the evolving pattern to be reflected in the classifier as much as possible. During inference, we adopt the ensemble manner based on a great number of target domain-customized classifiers, which are cheaply obtained via the conditional diffusion model, for robust prediction. Comprehensive experiments on both synthetic and real-world datasets show the superior generalization performance of W-Diff on unseen domains in the future.", "keywords": "domain generalization;evolving pattern;diffuison model;weight generation;domain-incremental", "primary_area": "other", "supplementary_material": "/attachment/eeed8fe13dfe59f56ed7ab3d60f79d515fe25195.zip", "author": "Mixue Xie;Shuang Li;Binhui Xie;Chi Harold Liu;Jian Liang;Zixun Sun;Ke Feng;Chengwei Zhu", "authorids": "~Mixue_Xie2;~Shuang_Li6;~Binhui_Xie1;~Chi_Harold_Liu1;~Jian_Liang3;~Zixun_Sun1;~Ke_Feng1;~Chengwei_Zhu1", "gender": ";M;M;M;M;;M;M", "homepage": ";https://shuangli.xyz;https://binhuixie.github.io/;;;;https://wizardrichard.blogspot.com;", "dblp": "289/0077;43/6294-8;;45/4723.html;19/2208;;;", "google_scholar": ";VXCiAc4AAAAJ;cbVMMCwAAAAJ;3IgFTEkAAAAJ;mrunnpoAAAAJ;;;FUGRBLcAAAAJ", "orcid": ";0000-0001-6807-9905;;;;;;", "linkedin": ";;;;;;;", "or_profile": "~Mixue_Xie2;~Shuang_Li6;~Binhui_Xie1;~Chi_Harold_Liu1;~Jian_Liang3;~Zixun_Sun1;~Ke_Feng1;~Chengwei_Zhu1", "aff": ";Beijing Institute of Technology;Beijing Institute of Technology;Beijing Institute of Technology;Kuaishou Technology;;;Tencent", "aff_domain": ";bit.edu.cn;bit.edu.cn;bit.edu.cn;kuaishou.com;;;tencent.com", "position": ";Associate Professor;PhD student;Full Professor;Senior Algorithm Engineer;;;Researcher", "bibtex": "@inproceedings{\nxie2024weight,\ntitle={Weight Diffusion for Future: Learn to Generalize in Non-Stationary Environments},\nauthor={Mixue Xie and Shuang Li and Binhui Xie and Chi Harold Liu and Jian Liang and Zixun Sun and Ke Feng and Chengwei Zhu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=2cFUYnNL1m}\n}", "github": "", "reviewers": "BuMk;tBNz;EUGW;Ve2z", "pdf_size": 2801798, "rating": "5;5;6;7", "confidence": "4;3;4;3", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "3;2;2;3", "wc_summary": "83;49;213;48", "wc_strengths": "83;49;139;58", "wc_weaknesses": "174;91;329;102", "wc_questions": "1;6;185;2", "wc_limitations": "1;11;2;26", "wc_review": "342;206;868;236", "wc_reply_reviewers": "19;47;30;20", "wc_reply_authors": "32;675;26;30", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 98.25, 67.73247005683463 ], "wc_strengths_avg": [ 82.25, 35.0526389876711 ], "wc_weaknesses_avg": [ 174.0, 94.99736838460316 ], "wc_questions_avg": [ 48.5, 78.83051439639348 ], "wc_limitations_avg": [ 10.0, 10.024968827881711 ], "wc_review_avg": [ 413.0, 267.508878357336 ], "wc_reply_reviewers_avg": [ 29.0, 11.247221879201993 ], "wc_reply_authors_avg": [ 190.75, 279.59021352686864 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:DKit3nfUKaYJ:scholar.google.com/&scioq=Weight+Diffusion+for+Future:+Learn+to+Generalize+in+Non-Stationary+Environments&hl=en&as_sdt=0,44", "gs_version_total": 0, "email": ";bit.edu.cn;bit.edu.cn;bit.edu.cn;kuaishou.com;;;tencent.com", "author_num": 8, "aff_unique_index": "0;0;0;1;2", "aff_unique_norm": "Beijing Institute of Technology;Kuaishou Technology;Tencent", "aff_unique_dep": ";;Tencent Holdings Limited", "aff_unique_url": "http://www.bit.edu.cn/;https://www.kuaishou.com;https://www.tencent.com", "aff_unique_abbr": "BIT;Kuaishou;Tencent", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Provably Mitigating Overoptimization in RLHF: Your SFT Loss is Implicitly an Adversarial Regularizer", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96805", "id": "2cQ3lPhkeO", "proceeding": "", "pdf": "https://openreview.net/pdf?id=2cQ3lPhkeO", "openreview": "https://openreview.net/forum?id=2cQ3lPhkeO", "poster": "", "project": "", "author_site": "Zhihan Liu, Miao Lu, Shenao Zhang, Boyi Liu, Hongyi Guo, Yingxiang Yang, Jose Blanchet, Zhaoran Wang", "tldr": "", "abstract": "Aligning generative models with human preference via RLHF typically suffers from overoptimization, where an imperfectly learned reward model can misguide the generative model to output even undesired responses. We investigate this problem in a principled manner by identifying the source of the issue as the distributional shift and uncertainty of human preference in dataset. To mitigate overoptimization, we first propose a theoretical algorithm which optimizes the policy against an adversarially chosen reward model, one that simultaneously minimizes its MLE loss and a reward penalty term. The penalty pessimistically biases the uncertain rewards so as to prevent the policy from choosing actions with spursiouly high proxy rewards, resulting in provable sample efficiency of the algorithm under a partial coverage style condition. Moving from theory to practice, the proposed algorithm further enjoys an equivalent but surprisingly easy to implement form. With a clever usage of the equivalence between reward models and the corresponding optimal policy, the algorithm features a simple objective that combines (i) a preference optimization loss that directly aligns the policy with human preference, and (ii) a supervised learning loss which explicitly imitates the policy with a baseline distribution. In the context of aligning large language models (LLM), this objective fuses the direct preference optimization (DPO) loss with the supervised fune-tuning (SFT) loss to help mitigate the overoptimization towards undesired responses, for which we name the algorithm Regularized Preference Optimization (RPO).\nExperiments of aligning LLMs demonstrate the improved performance of our method when compared with DPO baselines. \nOur work sheds light on the interplay between preference optimization and SFT in tuning LLMs with both theoretical guarantees and empirical evidence.", "keywords": "Alignment; Reinforcement Learning from Human Feedback", "primary_area": "generative_models", "supplementary_material": "/attachment/8299c2a8ea94417768022ee93c5ee6d91f278ac4.zip", "author": "Zhihan Liu;Miao Lu;Shenao Zhang;Boyi Liu;Hongyi Guo;Yingxiang Yang;Jose Blanchet;Zhaoran Wang", "authorids": "~Zhihan_Liu1;~Miao_Lu3;~Shenao_Zhang1;~Boyi_Liu1;~Hongyi_Guo1;~Yingxiang_Yang2;~Jose_Blanchet1;~Zhaoran_Wang1", "gender": "M;;M;M;M;;M;Not Specified", "homepage": ";https://miaolu3.github.io;https://shenao-zhang.github.io/;;https://gohsyi.github.io/;;https://web.stanford.edu/~jblanche/;https://zhaoranwang.github.io/", "dblp": ";09/1168;253/4543.html;;;;75/5093.html;117/2756", "google_scholar": "0VVg_R4AAAAJ;3jS17zQAAAAJ;8NamuusAAAAJ;1G8RH_YAAAAJ;https://scholar.google.com/citations?hl=en;;https://scholar.google.co.in/citations?user=O24CcQQAAAAJ;https://scholar.google.com.tw/citations?user=HSx0BgQAAAAJ", "orcid": ";;;;;;;", "linkedin": ";miao-lu-5bb9a31aa/;shenao-zhang-055a53178/;;;;jose-blanchet;", "or_profile": "~Zhihan_Liu1;~Miao_Lu3;~Shenao_Zhang1;~Boyi_Liu1;~Hongyi_Guo1;~Yingxiang_Yang2;~Jose_Blanchet1;~Zhaoran_Wang1", "aff": "Northwestern University;Stanford University;Georgia Institute of Technology;ByteDance Inc.;Northwestern University, Northwestern University;;Stanford University;Northwestern University", "aff_domain": "northwestern.edu;stanford.edu;gatech.edu;bytedance.com;u.northwestern.edu;;stanford.edu;northwestern.edu", "position": "PhD student;PhD student;MS student;Researcher;PhD student;;Professor;Associate Professor", "bibtex": "@inproceedings{\nliu2024provably,\ntitle={Provably Mitigating Overoptimization in {RLHF}: Your {SFT} Loss is Implicitly an Adversarial Regularizer},\nauthor={Zhihan Liu and Miao Lu and Shenao Zhang and Boyi Liu and Hongyi Guo and Yingxiang Yang and Jose Blanchet and Zhaoran Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=2cQ3lPhkeO}\n}", "github": "", "reviewers": "9VCJ;JyGr;1DUS", "pdf_size": 875964, "rating": "6;6;7", "confidence": "5;4;4", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "54;118;63", "wc_strengths": "85;19;49", "wc_weaknesses": "39;103;74", "wc_questions": "33;82;57", "wc_limitations": "1;2;1", "wc_review": "212;324;244", "wc_reply_reviewers": "10;13;11", "wc_reply_authors": "46;46;46", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 78.33333333333333, 28.288199345702832 ], "wc_strengths_avg": [ 51.0, 26.981475126464083 ], "wc_weaknesses_avg": [ 72.0, 26.166135875720485 ], "wc_questions_avg": [ 57.333333333333336, 20.00555478416488 ], "wc_limitations_avg": [ 1.3333333333333333, 0.4714045207910317 ], "wc_review_avg": [ 260.0, 47.10272462041518 ], "wc_reply_reviewers_avg": [ 11.333333333333334, 1.247219128924647 ], "wc_reply_authors_avg": [ 46.0, 0.0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 49, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6049678482443294229&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "northwestern.edu;stanford.edu;gatech.edu;bytedance.com;u.northwestern.edu;;stanford.edu;northwestern.edu", "author_num": 8, "aff_unique_index": "0;1;2;3;0;1;0", "aff_unique_norm": "Northwestern University;Stanford University;Georgia Institute of Technology;ByteDance", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.northwestern.edu;https://www.stanford.edu;https://www.gatech.edu;https://www.bytedance.com", "aff_unique_abbr": "NU;Stanford;Georgia Tech;ByteDance", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;0;1;0;0;0", "aff_country_unique": "United States;China" }, { "title": "Chain of Preference Optimization: Improving Chain-of-Thought Reasoning in LLMs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96804", "id": "2cczgOfMP4", "proceeding": "", "pdf": "https://openreview.net/pdf?id=2cczgOfMP4", "openreview": "https://openreview.net/forum?id=2cczgOfMP4", "poster": "", "project": "", "author_site": "Xuan Zhang, Chao Du, Tianyu Pang, Qian Liu, Wei Gao, Min Lin", "tldr": "", "abstract": "The recent development of chain-of-thought (CoT) decoding has enabled large language models (LLMs) to generate explicit logical reasoning paths for complex problem-solving. However, research indicates that these paths are not always deliberate and optimal. The tree-of-thought (ToT) method employs tree-searching to extensively explore the reasoning space and find better reasoning paths that CoT decoding might overlook. This deliberation, however, comes at the cost of significantly increased inference complexity. In this work, we demonstrate that fine-tuning LLMs leveraging the search tree constructed by ToT allows CoT to achieve similar or better performance, thereby avoiding the substantial inference burden. This is achieved through \\emph{Chain of Preference Optimization} (CPO), where LLMs are fine-tuned to align each step of the CoT reasoning paths with those of ToT using the inherent preference information in the tree-search process. Extensive experimental results show that CPO significantly improves LLM performance in solving a variety of complex problems, including question answering, fact verification, and arithmetic reasoning, demonstrating its effectiveness. \nOur code is available at [https://github.com/sail-sg/CPO](https://github.com/sail-sg/CPO).", "keywords": "large language model;reasoning;tree of thought", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/27fcbe0c95a88d496ff72bd97079715758d500f7.zip", "author": "Xuan Zhang;Chao Du;Tianyu Pang;Qian Liu;Wei Gao;Min Lin", "authorids": "~Xuan_Zhang22;~Chao_Du1;~Tianyu_Pang1;~Qian_Liu2;~Wei_Gao1;~Min_Lin1", "gender": ";M;M;M;M;M", "homepage": ";https://duchao0726.github.io/;https://p2333.github.io/;http://siviltaram.github.io/;https://sites.google.com/view/gaowei/;https://linmin.me", "dblp": ";75/7523;202/2550;;28/2073-1;", "google_scholar": ";QOp7xW0AAAAJ;wYDbtFsAAAAJ;bcbeUo0AAAAJ;8uaZwkwAAAAJ;BGONmkIAAAAJ", "orcid": ";0000-0003-1244-6336;0000-0003-0639-6176;;;", "linkedin": ";duchao/;%E5%A4%A9%E5%AE%87-%E5%BA%9E-b3999017a/;;;min-lin-08a3a422/", "or_profile": "~Xuan_Zhang22;~Chao_Du1;~Tianyu_Pang1;~Qian_Liu2;~Wei_Gao1;~Min_Lin1", "aff": ";Sea AI Lab;Sea AI Lab;Tiktok;Singapore Management University;Sea AI Lab", "aff_domain": ";sea.com;sea.com;bytedance.com;smu.edu.sg;sea.com", "position": ";Senior Research Scientist;Senior Research Scientist;Researcher;Assistant Professor;Principal Researcher", "bibtex": "@inproceedings{\nzhang2024chain,\ntitle={Chain of Preference Optimization: Improving Chain-of-Thought Reasoning in {LLM}s},\nauthor={Xuan Zhang and Chao Du and Tianyu Pang and Qian Liu and Wei Gao and Min Lin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=2cczgOfMP4}\n}", "github": "", "reviewers": "ADqw;iCyz;WuBE;Nj7W;VaVd", "pdf_size": 2382441, "rating": "5;6;6;6;6", "confidence": "4;4;4;4;4", "soundness": "3;3;3;3;3", "novelty": "2;3;3;3;3", "presentation": "3;3;3;3;4", "wc_summary": "110;29;70;100;127", "wc_strengths": "56;25;55;118;56", "wc_weaknesses": "96;137;99;71;190", "wc_questions": "2;1;55;44;133", "wc_limitations": "2;1;18;1;1", "wc_review": "266;193;297;334;507", "wc_reply_reviewers": "0;0;28;24;281", "wc_reply_authors": "0;0;27;28;722", "reply_reviewers": "0;0;1;1;2", "reply_authors": "1;1;2;2;4", "rating_avg": [ 5.8, 0.39999999999999997 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 87.2, 34.49869562751612 ], "wc_strengths_avg": [ 62.0, 30.417100453527784 ], "wc_weaknesses_avg": [ 118.6, 41.46613075752306 ], "wc_questions_avg": [ 47.0, 48.187135212627034 ], "wc_limitations_avg": [ 4.6, 6.7111846942250075 ], "wc_review_avg": [ 319.4, 104.63001481410579 ], "wc_reply_reviewers_avg": [ 66.6, 107.83617203888498 ], "wc_reply_authors_avg": [ 155.4, 283.5669938480147 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 2.0, 1.0954451150103321 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 38, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5366051944037799291&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": ";sea.com;sea.com;bytedance.com;smu.edu.sg;sea.com", "author_num": 6, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Sea AI Lab;TikTok;Singapore Management University", "aff_unique_dep": ";;", "aff_unique_url": ";https://www.tiktok.com;https://www.smu.edu.sg", "aff_unique_abbr": ";TikTok;SMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1;2", "aff_country_unique": ";China;Singapore" }, { "title": "Gaussian Graph Network: Learning Efficient and Generalizable Gaussian Representations from Multi-view Images", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96803", "id": "2dfBpyqh0A", "proceeding": "", "pdf": "https://openreview.net/pdf?id=2dfBpyqh0A", "openreview": "https://openreview.net/forum?id=2dfBpyqh0A", "poster": "/media/PosterPDFs/NeurIPS%202024/96803.png?t=1731661031.25648", "project": "", "author_site": "Shengjun Zhang, Xin Fei, Fangfu Liu, Haixu Song, Yueqi Duan", "tldr": "", "abstract": "3D Gaussian Splatting (3DGS) has demonstrated impressive novel view synthesis performance. While conventional methods require per-scene optimization, more recently several feed-forward methods have been proposed to generate pixel-aligned Gaussian representations with a learnable network, which are generalizable to different scenes. However, these methods simply combine pixel-aligned Gaussians from multiple views as scene representations, thereby leading to artifacts and extra memory cost without fully capturing the relations of Gaussians from different images. In this paper, we propose Gaussian Graph Network (GGN) to generate efficient and generalizable Gaussian representations. Specifically, we construct Gaussian Graphs to model the relations of Gaussian groups from different views. To support message passing at Gaussian level, we reformulate the basic graph operations over Gaussian representations, enabling each Gaussian to benefit from its connected Gaussian groups with Gaussian feature fusion. Furthermore, we design a Gaussian pooling layer to aggregate various Gaussian groups for efficient representations. We conduct experiments on the large-scale RealEstate10K and ACID datasets to demonstrate the efficiency and generalization of our method. Compared to the state-of-the-art methods, our model uses fewer Gaussians and achieves better image quality with higher rendering speed.", "keywords": "Gaussian Splatting;novel view synthesis", "primary_area": "machine_vision", "supplementary_material": "", "author": "Shengjun Zhang;Xin Fei;Fangfu Liu;Haixu Song;Yueqi Duan", "authorids": "~Shengjun_Zhang2;~Xin_Fei1;~Fangfu_Liu2;~Haixu_Song1;~Yueqi_Duan1", "gender": "M;M;M;M;M", "homepage": "https://github.com/zhangshengjun2019;https://xinfei21.github.io/web/;https://liuff19.github.io/;https://github.com/SongHaixu/shx.github.io;https://duanyueqi.github.io/", "dblp": ";51/855;342/1749;339/5040;168/8373", "google_scholar": ";;;;qDseo3cAAAAJ", "orcid": ";;;0009-0009-1447-776X;", "linkedin": ";;%E8%8A%B3%E7%94%AB-%E5%88%98-482856229/;;", "or_profile": "~Shengjun_Zhang2;~Xin_Fei1;~Fangfu_Liu2;~Haixu_Song1;~Yueqi_Duan1", "aff": "Tsinghua University;Tsinghua University;Department of Electronic Engineering, Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;mail.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;Undergrad student;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nzhang2024gaussian,\ntitle={Gaussian Graph Network: Learning Efficient and Generalizable Gaussian Representations from Multi-view Images},\nauthor={Shengjun Zhang and Xin Fei and Fangfu Liu and Haixu Song and Yueqi Duan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=2dfBpyqh0A}\n}", "github": "", "reviewers": "Xdv1;LnYN;sL12;zafq", "pdf_size": 5086451, "rating": "5;6;6;7", "confidence": "4;5;5;3", "soundness": "2;3;3;3", "novelty": "2;4;3;4", "presentation": "2;3;4;3", "wc_summary": "22;94;45;117", "wc_strengths": "12;63;84;66", "wc_weaknesses": "84;201;187;87", "wc_questions": "71;5;11;23", "wc_limitations": "27;5;37;7", "wc_review": "216;368;364;300", "wc_reply_reviewers": "19;143;64;0", "wc_reply_authors": "0;60;122;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;2;2;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 69.5, 37.79219496139381 ], "wc_strengths_avg": [ 56.25, 26.78035660703569 ], "wc_weaknesses_avg": [ 139.75, 54.48566325190508 ], "wc_questions_avg": [ 27.5, 25.937424698685874 ], "wc_limitations_avg": [ 19.0, 13.490737563232042 ], "wc_review_avg": [ 312.0, 61.644140029689765 ], "wc_reply_reviewers_avg": [ 56.5, 55.084026722816844 ], "wc_reply_authors_avg": [ 45.5, 50.50495025242575 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.4264014327112209, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12713716610080890751&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "tsinghua.edu.cn;mail.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Vript: A Video Is Worth Thousands of Words", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97868", "id": "2dw3zQ3nk9", "proceeding": "", "pdf": "https://openreview.net/pdf?id=2dw3zQ3nk9", "openreview": "https://openreview.net/forum?id=2dw3zQ3nk9", "poster": "/media/PosterPDFs/NeurIPS%202024/97868.png?t=1731558228.8427713", "project": "", "author_site": "Dongjie Yang, Suyuan Huang, Chengqiang Lu, Xiaodong Han, Haoxin Zhang, Yan Gao, Yao Hu, Hai Zhao", "tldr": "", "abstract": "Advancements in multimodal learning, particularly in video understanding and generation, require high-quality video-text datasets for improved model performance. Vript addresses this issue with a meticulously annotated corpus of 12K high-resolution videos, offering detailed, dense, and script-like captions for over 420K clips. Each clip has a caption of ~145 words, which is over 10x longer than most video-text datasets. Unlike captions only documenting static content in previous datasets, we enhance video captioning to video scripting by documenting not just the content, but also the camera operations, which include the shot types (medium shot, close-up, etc) and camera movements (panning, tilting, etc). By utilizing the Vript, we explore three training paradigms of aligning more text with the video modality rather than clip-caption pairs. This results in Vriptor, a top-performing video captioning model among open-source models, comparable to GPT-4V in performance. Vriptor is also a powerful model capable of end-to-end generation of dense and detailed captions for long videos. Moreover, we introduce Vript-Hard, a benchmark consisting of three video understanding tasks that are more challenging than existing benchmarks: Vript-HAL is the first benchmark evaluating action and object hallucinations in video LLMs, Vript-RR combines reasoning with retrieval resolving question ambiguity in long-video QAs, and Vript-ERO is a new task to evaluate the temporal understanding of events in long videos rather than actions in short videos in previous works. All code, models, and datasets are available in https://github.com/mutonix/Vript.", "keywords": "Video-text Dataset;Video Captioning;Video Understanding Benchmark", "primary_area": "", "supplementary_material": "", "author": "Dongjie Yang;Suyuan Huang;Chengqiang Lu;Xiaodong Han;Haoxin Zhang;Yan Gao;Yao Hu;hai zhao", "authorids": "~Dongjie_Yang1;~Suyuan_Huang1;~Chengqiang_Lu1;~Xiaodong_Han3;~Haoxin_Zhang1;~Yan_Gao10;~Yao_Hu4;~hai_zhao1", "gender": "M;;M;M;M;M;M;M", "homepage": "https://github.com/mutonix/;;https://scholar.google.com.hk/citations?user=FGMCzzQAAAAJ&hl=zh-CN;;;http://bcmi.sjtu.edu.cn/~zhaohai/;https://github.com/hannnnnxd;", "dblp": ";;243/6048;;;25/1145-1.html;;", "google_scholar": "https://scholar.google.com/citations?hl=en;;https://scholar.google.com.hk/citations?user=FGMCzzQAAAAJ;;https://scholar.google.com.hk/citations?hl=zh-CN;https://scholar.google.com.tw/citations?user=4dU5KS0AAAAJ;;LIu7k7wAAAAJ", "orcid": ";0009-0002-0621-0896;;0009-0002-1468-7539;0009-0004-5960-1684;;;0009-0006-1274-7111", "linkedin": ";https://www.linkedin.cn/incareer/in/ACoAAD9ZRpEBaKQ-49IFZ6cDGxjeLbDd6_kOZSo;;https://www.linkedin.cn/incareer/in/ACoAAByK6iYBngi7xYDHkYBF67dOyygCp578zxg;;;;", "or_profile": "~Dongjie_Yang1;~Suyuan_Huang1;~Chengqiang_Lu1;~Haoxin_Zhang1;~Yan_Gao10;~hai_zhao1;~xiaodong_han1;~Yao_Hu1", "aff": "Shanghai Jiaotong University;Beihang University;Xiaohongshu;Xiaohongshu;Xiaohongshu;Shanghai Jiaotong University;xiaohongshu;Zhejiang University of Technology", "aff_domain": "sjtu.edu.cn;buaa.edu.cn;xiaohongshu.com;xiaohongshu.com;xiaohongshu.com;sjtu.edu.cn;xiaohongshu.com;zjut.edu.cn", "position": "PhD student;MS student;Researcher;Researcher;Researcher;Full Professor;Researcher;Researcher", "bibtex": "@inproceedings{\nyang2024vript,\ntitle={Vript: A Video Is Worth Thousands of Words},\nauthor={Dongjie Yang and Suyuan Huang and Chengqiang Lu and Xiaodong Han and Haoxin Zhang and Yan Gao and Yao Hu and hai zhao},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=2dw3zQ3nk9}\n}", "github": "", "reviewers": "CXVg;65SF;8smn;n9oD", "pdf_size": 5860785, "rating": "5;6;8;9", "confidence": "4;4;4;4", "wc_summary_and_contributions": "57;46;27;54", "wc_strengths": "86;72;2;32", "wc_improvement": "135;205;73;38", "wc_limitations": "1;1;1;14", "wc_correctness": "1;1;1;42", "wc_clarity": "1;1;1;7", "wc_relation_to_prior_work": "1;1;1;1", "wc_documentation": "1;10;1;28", "wc_additional_feedback": "1;1;1;1", "wc_review": "284;338;108;217", "wc_reply_reviewers": "0;108;10;10", "wc_reply_authors": "51;633;0;0", "reply_reviewers": "0;2;1;1", "reply_authors": "2;3;1;1", "rating_avg": [ 7.0, 1.5811388300841898 ], "confidence_avg": [ 4.0, 0.0 ], "wc_summary_and_contributions_avg": [ 46.0, 11.683321445547923 ], "wc_strengths_avg": [ 48.0, 33.13608305156178 ], "wc_improvement_avg": [ 112.75, 63.58606372468735 ], "wc_limitations_avg": [ 4.25, 5.629165124598851 ], "wc_correctness_avg": [ 11.25, 17.75352077758099 ], "wc_clarity_avg": [ 2.5, 2.598076211353316 ], "wc_relation_to_prior_work_avg": [ 1.0, 0.0 ], "wc_documentation_avg": [ 10.0, 11.022703842524301 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 236.75, 85.80610409522157 ], "wc_reply_reviewers_avg": [ 32.0, 44.06812907306141 ], "wc_reply_authors_avg": [ 171.0, 267.54719209888935 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11772246139120668190&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "sjtu.edu.cn;buaa.edu.cn;xiaohongshu.com;xiaohongshu.com;xiaohongshu.com;sjtu.edu.cn;xiaohongshu.com;zjut.edu.cn", "author_num": 8, "aff_unique_index": "0;1;2;2;2;0;2;3", "aff_unique_norm": "Shanghai Jiao Tong University;Beihang University;Xiaohongshu;Zhejiang University of Technology", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.sjtu.edu.cn;http://www.buaa.edu.cn/;https://www.xiaohongshu.com;https://www.zjut.edu.cn", "aff_unique_abbr": "SJTU;BUAA;XHS;ZJUT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Unleashing the Denoising Capability of Diffusion Prior for Solving Inverse Problems", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96802", "id": "2fiYzs3YkH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=2fiYzs3YkH", "openreview": "https://openreview.net/forum?id=2fiYzs3YkH", "poster": "/media/PosterPDFs/NeurIPS%202024/96802.png?t=1732864205.7263663", "project": "", "author_site": "Jiawei Zhang, Jiaxin Zhuang, Cheng Jin, Gen Li, Yuantao Gu", "tldr": "", "abstract": "The recent emergence of diffusion models has significantly advanced the precision of learnable priors, presenting innovative avenues for addressing inverse problems. Previous works have endeavored to integrate diffusion priors into the maximum a posteriori estimation (MAP) framework and design optimization methods to solve the inverse problem. However, prevailing optimization-based rithms primarily exploit the prior information within the diffusion models while neglecting their denoising capability. To bridge this gap, this work leverages the diffusion process to reframe noisy inverse problems as a two-variable constrained optimization task by introducing an auxiliary optimization variable that represents a 'noisy' sample at an equivalent denoising step. The projection gradient descent method is efficiently utilized to solve the corresponding optimization problem by truncating the gradient through the $\\mu$-predictor. The proposed algorithm, termed ProjDiff, effectively harnesses the prior information and the denoising capability of a pre-trained diffusion model within the optimization framework. Extensive experiments on the image restoration tasks and source separation and partial generation tasks demonstrate that ProjDiff exhibits superior performance across various linear and nonlinear inverse problems, highlighting its potential for practical applications. Code is available at https://github.com/weigerzan/ProjDiff/.", "keywords": "Inverse problem;diffusion model;optimization;generative modeling", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/42783a7ee352d7f578ec24493ac6c7627c3b86cb.zip", "author": "Jiawei Zhang;Jiaxin Zhuang;Cheng Jin;Gen Li;Yuantao Gu", "authorids": "~Jiawei_Zhang16;~Jiaxin_Zhuang2;~Cheng_Jin4;~Gen_Li2;~Yuantao_Gu1", "gender": "M;;M;M;", "homepage": "https://github.com/weigerzan;;https://github.com/jinc7461;;", "dblp": ";;;28/538-5.html;", "google_scholar": ";;;https://scholar.google.com/citations?view_op=list_works;", "orcid": ";;0009-0005-4798-1787;0000-0002-3078-9191;", "linkedin": ";;;;", "or_profile": "~Jiawei_Zhang16;~Jiaxin_Zhuang2;~Cheng_Jin4;~Gen_Li2;~Yuantao_Gu1", "aff": "Tsinghua University;;Tsinghua University;The Chinese University of Hong Kong;", "aff_domain": "mails.tsinghua.edu.cn;;tsinghua.edu.cn;cuhk.edu.hk;", "position": "PhD student;;PhD student;Assistant Professor;", "bibtex": "@inproceedings{\nzhang2024unleashing,\ntitle={Unleashing the Denoising Capability of Diffusion Prior for Solving Inverse Problems},\nauthor={Jiawei Zhang and Jiaxin Zhuang and Cheng Jin and Gen Li and Yuantao Gu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=2fiYzs3YkH}\n}", "github": "", "reviewers": "g7gD;krnF;dXRm;Wp65", "pdf_size": 10088529, "rating": "3;6;6;7", "confidence": "5;4;3;2", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "3;2;2;3", "wc_summary": "41;61;68;111", "wc_strengths": "21;45;44;62", "wc_weaknesses": "620;49;368;157", "wc_questions": "2;352;51;84", "wc_limitations": "2;7;5;44", "wc_review": "686;514;536;458", "wc_reply_reviewers": "0;97;23;209", "wc_reply_authors": "68;23;16;405", "reply_reviewers": "0;1;1;2", "reply_authors": "2;2;2;3", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 70.25, 25.52817071393875 ], "wc_strengths_avg": [ 43.0, 14.577379737113251 ], "wc_weaknesses_avg": [ 298.5, 218.21147999131486 ], "wc_questions_avg": [ 122.25, 135.8167423405524 ], "wc_limitations_avg": [ 14.5, 17.124543789543708 ], "wc_review_avg": [ 548.5, 84.32526311847477 ], "wc_reply_reviewers_avg": [ 82.25, 81.48427762458228 ], "wc_reply_authors_avg": [ 128.0, 161.16606342527572 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.894427190999916, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10921772521695295143&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "mails.tsinghua.edu.cn;;tsinghua.edu.cn;cuhk.edu.hk;", "author_num": 5, "aff_unique_index": "0;0;1", "aff_unique_norm": "Tsinghua University;Chinese University of Hong Kong", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.cuhk.edu.hk", "aff_unique_abbr": "THU;CUHK", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "OneActor: Consistent Subject Generation via Cluster-Conditioned Guidance", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96801", "id": "2gtNa14V45", "proceeding": "", "pdf": "https://openreview.net/pdf?id=2gtNa14V45", "openreview": "https://openreview.net/forum?id=2gtNa14V45", "poster": "/media/PosterPDFs/NeurIPS%202024/96801.png?t=1731242324.889328", "project": "", "author_site": "Jiahao Wang, Caixia Yan, Haonan Lin, Weizhan Zhang, Mengmeng Wang, Tieliang Gong, Guang Dai, Hao Sun", "tldr": "", "abstract": "Text-to-image diffusion models benefit artists with high-quality image generation. Yet their stochastic nature hinders artists from creating consistent images of the same subject. Existing methods try to tackle this challenge and generate consistent content in various ways. However, they either depend on external restricted data or require expensive tuning of the diffusion model. For this issue, we propose a novel one-shot tuning paradigm, termed OneActor. It efficiently performs consistent subject generation solely driven by prompts via a learned semantic guidance to bypass the laborious backbone tuning. We lead the way to formalize the objective of consistent subject generation from a clustering perspective, and thus design a cluster-conditioned model. To mitigate the overfitting challenge shared by one-shot tuning pipelines, we augment the tuning with auxiliary samples and devise two inference strategies: semantic interpolation and cluster guidance. These techniques are later verified to significantly improve the generation quality. Comprehensive experiments show that our method outperforms a variety of baselines with satisfactory subject consistency, superior prompt conformity as well as high image quality. Our method is capable of multi-subject generation and compatible with popular diffusion extensions. Besides, we achieve a $4\\times$ faster tuning speed than tuning-based baselines and, if desired, avoid increasing the inference time. Furthermore, our method can be naturally utilized to pre-train a consistent subject generation network from scratch, which will implement this research task into more practical applications. (Project page: https://johnneywang.github.io/OneActor-webpage/)", "keywords": "Diffusion Model;Image Generation;Score-based Generative Model", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Jiahao Wang;Caixia Yan;Haonan Lin;Weizhan Zhang;Mengmeng Wang;Tieliang Gong;Guang Dai;Hao Sun", "authorids": "~Jiahao_Wang14;~Caixia_Yan2;~Haonan_Lin1;~Weizhan_Zhang1;~Mengmeng_Wang1;~Tieliang_Gong2;~Guang_Dai1;~Hao_Sun15", "gender": ";F;M;;F;;M;M", "homepage": ";;;;https://sallymmx.github.io/;;;", "dblp": ";32/9964;269/0292;;;;;82/2248-9", "google_scholar": ";;GBnV3HIAAAAJ;;VSRnUiUAAAAJ;;;", "orcid": ";0000-0001-7763-2987;;;;;0000-0002-3529-9087;", "linkedin": ";;haonan-lin-035276207/;;;;;", "or_profile": "~Jiahao_Wang14;~Caixia_Yan2;~Haonan_Lin1;~Weizhan_Zhang1;~Mengmeng_Wang1;~Tieliang_Gong2;~Guang_Dai1;~Hao_Sun15", "aff": ";Xi'an Jiaotong University;Xi'an Jiaotong University;;Zhejiang University;;SGIT AI;China Telecom Corporation Ltd. Data&AI Technology Company", "aff_domain": ";xjtu.edu.cn;xjtu.edu.cn;;zju.edu.cn;;sgcc.com.cn;chinatelecom.com.cn", "position": ";Assistant Professor;MS student;;PhD student;;Principal Researcher;Researcher", "bibtex": "@inproceedings{\nwang2024oneactor,\ntitle={OneActor: Consistent Subject Generation via Cluster-Conditioned Guidance},\nauthor={Jiahao Wang and Caixia Yan and Haonan Lin and Weizhan Zhang and Mengmeng Wang and Tieliang Gong and Guang Dai and Hao Sun},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=2gtNa14V45}\n}", "github": "", "reviewers": "7cPE;SGyo;BPQh;iFKj", "pdf_size": 30407523, "rating": "5;5;6;7", "confidence": "4;3;5;4", "soundness": "2;3;3;4", "novelty": "2;3;3;4", "presentation": "3;3;3;4", "wc_summary": "61;137;65;57", "wc_strengths": "44;62;67;61", "wc_weaknesses": "115;86;150;14", "wc_questions": "49;5;96;14", "wc_limitations": "1;2;44;16", "wc_review": "270;292;422;162", "wc_reply_reviewers": "0;114;28;25", "wc_reply_authors": "76;372;49;50", "reply_reviewers": "0;1;1;1", "reply_authors": "2;3;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 80.0, 33.03028912982749 ], "wc_strengths_avg": [ 58.5, 8.674675786448736 ], "wc_weaknesses_avg": [ 91.25, 50.026867781223324 ], "wc_questions_avg": [ 41.0, 35.75611835756225 ], "wc_limitations_avg": [ 15.75, 17.354754391808605 ], "wc_review_avg": [ 286.5, 92.41617823736274 ], "wc_reply_reviewers_avg": [ 41.75, 43.10669901535027 ], "wc_reply_authors_avg": [ 136.75, 136.2522935586774 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.42640143271122083, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14331536721133598224&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 0, "email": ";xjtu.edu.cn;xjtu.edu.cn;;zju.edu.cn;;sgcc.com.cn;chinatelecom.com.cn", "author_num": 8, "aff_unique_index": "0;0;1;2;3", "aff_unique_norm": "Xi'an Jiao Tong University;Zhejiang University;SGIT AI;China Telecom Corporation Ltd.", "aff_unique_dep": ";;;Data&AI Technology Company", "aff_unique_url": "https://www.xjtu.edu.cn;https://www.zju.edu.cn;;https://www.chinatelecom.com.cn/", "aff_unique_abbr": "XJTU;ZJU;;China Telecom", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China;" }, { "title": "Quantitative Convergences of Lie Group Momentum Optimizers", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96800", "id": "2hqHWD7wDb", "proceeding": "", "pdf": "https://openreview.net/pdf?id=2hqHWD7wDb", "openreview": "https://openreview.net/forum?id=2hqHWD7wDb", "poster": "", "project": "", "author_site": "Lingkai Kong, Molei Tao", "tldr": "", "abstract": "Explicit, momentum-based dynamics that optimize functions defined on Lie groups can be constructed via variational optimization and momentum trivialization. Structure preserving time discretizations can then turn this dynamics into optimization algorithms. This article investigates two types of discretization, Lie Heavy-Ball, which is a known splitting scheme, and Lie NAG-SC, which is newly proposed. Their convergence rates are explicitly quantified under $L$-smoothness and \\emph{local} strong convexity assumptions. Lie NAG-SC provides acceleration over the momentumless case, i.e. Riemannian gradient descent, but Lie Heavy-Ball does not. When compared to existing accelerated optimizers for general manifolds, both Lie Heavy-Ball and Lie NAG-SC are computationally cheaper and easier to implement, thanks to their utilization of group structure. Only gradient oracle and exponential map are required, but not logarithm map or parallel transport which are computational costly.", "keywords": "momentum Lie group optimizer; convex; explicit convergence rate", "primary_area": "optimization", "supplementary_material": "/attachment/4142aebd3182fe526f04f9ef2c82258785b442fe.zip", "author": "Lingkai Kong;Molei Tao", "authorids": "~Lingkai_Kong2;~Molei_Tao1", "gender": ";", "homepage": ";http://people.math.gatech.edu/~mtao8/", "dblp": ";56/9263", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "~Lingkai_Kong2;~Molei_Tao1", "aff": ";Georgia Institute of Technology", "aff_domain": ";gatech.edu", "position": ";Associate Professor", "bibtex": "@inproceedings{\nkong2024quantitative,\ntitle={Quantitative Convergences of Lie Group Momentum Optimizers},\nauthor={Lingkai Kong and Molei Tao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=2hqHWD7wDb}\n}", "github": "", "reviewers": "dors;zKf2;K3KR;JEvs;m3jJ", "pdf_size": 654360, "rating": "1;5;6;7;7", "confidence": "5;4;4;3;3", "soundness": "2;3;3;3;3", "novelty": "1;2;3;3;3", "presentation": "1;2;3;3;3", "wc_summary": "27;50;79;77;33", "wc_strengths": "5;47;81;134;27", "wc_weaknesses": "39;575;74;56;77", "wc_questions": "14;220;67;112;64", "wc_limitations": "5;5;3;6;5", "wc_review": "90;897;304;385;206", "wc_reply_reviewers": "0;75;14;26;38", "wc_reply_authors": "0;1697;9;32;19", "reply_reviewers": "0;1;1;1;1", "reply_authors": "0;4;2;2;2", "rating_avg": [ 5.2, 2.2271057451320084 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.8 ], "presentation_avg": [ 2.4, 0.8 ], "wc_summary_avg": [ 53.2, 21.618510586994656 ], "wc_strengths_avg": [ 58.8, 45.15041528048219 ], "wc_weaknesses_avg": [ 164.2, 205.85373448154883 ], "wc_questions_avg": [ 95.4, 69.59770111145913 ], "wc_limitations_avg": [ 4.8, 0.9797958971132712 ], "wc_review_avg": [ 376.4, 278.35272587133045 ], "wc_reply_reviewers_avg": [ 30.6, 25.531157435572716 ], "wc_reply_authors_avg": [ 351.4, 672.88412078158 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 2.0, 1.2649110640673518 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.9360299534377727, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5840793019918080335&as_sdt=5,40&sciodt=0,40&hl=en", "gs_version_total": 3, "email": ";gatech.edu", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Georgia Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.gatech.edu", "aff_unique_abbr": "Georgia Tech", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Multi-language Diversity Benefits Autoformalization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96799", "id": "2jjfRm2R6D", "proceeding": "", "pdf": "https://openreview.net/pdf?id=2jjfRm2R6D", "openreview": "https://openreview.net/forum?id=2jjfRm2R6D", "poster": "/media/PosterPDFs/NeurIPS%202024/96799.png?t=1731363241.3233635", "project": "", "author_site": "Albert Q. Jiang, Wenda Li, Mateja Jamnik", "tldr": "", "abstract": "Autoformalization is the task of translating natural language materials into machine-verifiable formalisations. Progress in autoformalization research is hindered by the lack of a sizeable dataset consisting of informal-formal pairs expressing the same essence. Existing methods tend to circumvent this challenge by manually curating small corpora or using few-shot learning with large language models. But these methods suffer from data scarcity and formal language acquisition difficulty. In this work, we create mma, a large, flexible, multi-language, and multi-domain dataset of informal-formal pairs, by using a language model to translate in the reverse direction, that is, from formal mathematical statements into corresponding informal ones. Experiments show that language models fine-tuned on mma can produce up to $29-31$\\% of statements acceptable with minimal corrections on the miniF2F and ProofNet benchmarks, up from $0$\\% with the base model. We demonstrate that fine-tuning on multi-language formal data results in more capable autoformalization models even on single-language tasks.", "keywords": "AI for math;Autoformalization;Large language models", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "/attachment/b03babf30f3912e1bd804ba83d951ce1d191ab8d.zip", "author": "Albert Q. Jiang;Wenda Li;Mateja Jamnik", "authorids": "~Albert_Q._Jiang1;~Wenda_Li1;~Mateja_Jamnik1", "gender": "M;F;", "homepage": "https://wenda302.github.io;http://www.cl.cam.ac.uk/~mj201;https://albertqjiang.github.io/", "dblp": "132/9868.html;41/1392;321/1049", "google_scholar": "ufYxQkEAAAAJ;d5QiyJkAAAAJ;Fe_RBHMAAAAJ", "orcid": ";0000-0003-2772-2532;", "linkedin": ";;", "or_profile": "~Wenda_Li1;~Mateja_Jamnik1;~Albert_Jiang1", "aff": "University of Edinburgh;University of Cambridge;University of Cambridge", "aff_domain": "ed.ac.uk;cam.ac.uk;cam.ac.uk", "position": "Lecturer;Professor in Artificial Intelligence;PhD student", "bibtex": "@inproceedings{\njiang2024multilanguage,\ntitle={Multi-language Diversity Benefits Autoformalization},\nauthor={Albert Q. Jiang and Wenda Li and Mateja Jamnik},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=2jjfRm2R6D}\n}", "github": "", "reviewers": "ByiA;zhuU;ctnj;8WKp", "pdf_size": 466681, "rating": "6;6;6;7", "confidence": "4;4;3;4", "soundness": "3;3;2;3", "novelty": "2;3;3;3", "presentation": "3;3;3;4", "wc_summary": "70;242;58;73", "wc_strengths": "108;129;47;78", "wc_weaknesses": "237;513;130;193", "wc_questions": "131;828;93;105", "wc_limitations": "65;1;27;31", "wc_review": "611;1713;355;480", "wc_reply_reviewers": "30;64;0;51", "wc_reply_authors": "15;24;0;23", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;1;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 110.75, 75.9847846611412 ], "wc_strengths_avg": [ 90.5, 30.97176133189716 ], "wc_weaknesses_avg": [ 268.25, 146.33416381692965 ], "wc_questions_avg": [ 289.25, 311.3505861565062 ], "wc_limitations_avg": [ 31.0, 22.759613353482084 ], "wc_review_avg": [ 789.75, 540.6696657849412 ], "wc_reply_reviewers_avg": [ 36.25, 24.190649019817553 ], "wc_reply_authors_avg": [ 15.5, 9.604686356149273 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7277823827653636473&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "ed.ac.uk;cam.ac.uk;cam.ac.uk", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "University of Edinburgh;University of Cambridge", "aff_unique_dep": ";", "aff_unique_url": "https://www.ed.ac.uk;https://www.cam.ac.uk", "aff_unique_abbr": "Edinburgh;Cambridge", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Benchmarking Estimators for Natural Experiments: A Novel Dataset and a Doubly Robust Algorithm", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97867", "id": "2kTX7K6osK", "proceeding": "", "pdf": "https://openreview.net/pdf?id=2kTX7K6osK", "openreview": "https://openreview.net/forum?id=2kTX7K6osK", "poster": "/media/PosterPDFs/NeurIPS%202024/97867.png?t=1729798887.40095", "project": "", "author_site": "R. Teal Witter, Christopher Musco", "tldr": "", "abstract": "Estimating the effect of treatments from natural experiments, where treatments are pre-assigned, is an important and well-studied problem. We introduce a novel natural experiment dataset obtained from an early childhood literacy nonprofit. Surprisingly, applying over 20 established estimators to the dataset produces inconsistent results in evaluating the nonprofits efficacy. To address this, we create a benchmark to evaluate estimator accuracy using synthetic outcomes, whose design was guided by domain experts. The benchmark extensively explores performance as real world conditions like sample size, treatment correlation, and propensity score accuracy vary. Based on our benchmark, we observe that the class of doubly robust treatment effect estimators, which are based on simple and intuitive regression adjustment, generally outperform other more complicated estimators by orders of magnitude. To better support our theoretical understanding of doubly robust estimators, we derive a closed form expression for the variance of any such estimator that uses dataset splitting to obtain an unbiased estimate. This expression motivates the design of a new doubly robust estimator that uses a novel loss function when fitting functions for regression adjustment. We release the dataset and benchmark in a Python package; the package is built in a modular way to facilitate new datasets and estimators. https://github.com/rtealwitter/naturalexperiments", "keywords": "Treatment Effect Estimation;Natural Experiments;Doubly Robust", "primary_area": "", "supplementary_material": "/attachment/ab5fda5141d23c843d89d0d5eaaec935287ae1c7.zip", "author": "R. Teal Witter;Christopher Musco", "authorids": "~R._Teal_Witter1;~Christopher_Musco1", "gender": ";M", "homepage": "https://www.rtealwitter.com/;https://www.chrismusco.com/", "dblp": "239/6526;149/2243", "google_scholar": "B2hIq-cAAAAJ;HXXSrNMAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~R._Teal_Witter1;~Christopher_P_Musco1", "aff": "New York University;New York University", "aff_domain": "nyu.edu;nyu.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nwitter2024benchmarking,\ntitle={Benchmarking Estimators for Natural Experiments: A Novel Dataset and a Doubly Robust Algorithm},\nauthor={R. Teal Witter and Christopher Musco},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=2kTX7K6osK}\n}", "github": "", "reviewers": "CMHp;ozjC;NCag", "pdf_size": 1517421, "rating": "6;7;8", "confidence": "4;3;2", "wc_summary_and_contributions": "84;66;93", "wc_strengths": "37;41;81", "wc_improvement": "103;118;102", "wc_limitations": "1;1;74", "wc_correctness": "33;1;64", "wc_clarity": "11;4;26", "wc_relation_to_prior_work": "47;28;42", "wc_documentation": "18;39;44", "wc_additional_feedback": "1;1;1", "wc_review": "335;299;527", "wc_reply_reviewers": "17;0;92", "wc_reply_authors": "646;0;600", "reply_reviewers": "1;0;2", "reply_authors": "2;1;3", "rating_avg": [ 7.0, 0.816496580927726 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "wc_summary_and_contributions_avg": [ 81.0, 11.224972160321824 ], "wc_strengths_avg": [ 53.0, 19.86621923433512 ], "wc_improvement_avg": [ 107.66666666666667, 7.3181661333667165 ], "wc_limitations_avg": [ 25.333333333333332, 34.41253001774532 ], "wc_correctness_avg": [ 32.666666666666664, 25.72072229848057 ], "wc_clarity_avg": [ 13.666666666666666, 9.177266598624136 ], "wc_relation_to_prior_work_avg": [ 39.0, 8.04155872120988 ], "wc_documentation_avg": [ 33.666666666666664, 11.264496832477201 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 387.0, 100.07996802557443 ], "wc_reply_reviewers_avg": [ 36.333333333333336, 39.96943276499625 ], "wc_reply_authors_avg": [ 415.3333333333333, 294.2848204640456 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:3qE__d-u80UJ:scholar.google.com/&scioq=Benchmarking+Estimators+for+Natural+Experiments:+A+Novel+Dataset+and+a+Doubly+Robust+Algorithm&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": "nyu.edu;nyu.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "New York University", "aff_unique_dep": "", "aff_unique_url": "https://www.nyu.edu", "aff_unique_abbr": "NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Beyond task diversity: provable representation transfer for sequential multitask linear bandits", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96798", "id": "2kZMtdjzSV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=2kZMtdjzSV", "openreview": "https://openreview.net/forum?id=2kZMtdjzSV", "poster": "/media/PosterPDFs/NeurIPS%202024/96798.png?t=1731701303.5950677", "project": "", "author_site": "Thang Duong, Zhi Wang, Chicheng Zhang", "tldr": "", "abstract": "We study lifelong learning in linear bandits, where a learner interacts with a sequence of linear bandit tasks whose parameters lie in an $m$-dimensional subspace of $\\mathbb{R}^d$, thereby sharing a low-rank representation. Current literature typically assumes that the tasks are diverse, i.e., their parameters uniformly span the $m$-dimensional subspace. This assumption allows the low-rank representation to be learned before all tasks are revealed, which can be unrealistic in real-world applications. In this work, we present the first nontrivial result for sequential multi-task linear bandits without the task diversity assumption. We develop an algorithm that efficiently learns and transfers low-rank representations. When facing $N$ tasks, each played over $\\tau$ rounds, our algorithm achieves a regret guarantee of $\\tilde{O}\\big (Nm \\sqrt{\\tau} + N^{\\frac{2}{3}} \\tau^{\\frac{2}{3}} d m^{\\frac13} + Nd^2 + \\tau m d \\big)$ under the ellipsoid action set assumption.\nThis result can significantly improve upon the baseline of $\\tilde{O} \\left (Nd \\sqrt{\\tau}\\right)$ that does not leverage the low-rank structure when the number of tasks $N$ is sufficiently large and $m \\ll d$. We also demonstrate empirically on synthetic data that our algorithm outperforms baseline algorithms, which rely on the task diversity assumption.", "keywords": "Bandits;multi-task;meta-learning;representation learning;online learning;task diversity", "primary_area": "bandits", "supplementary_material": "", "author": "Thang Duong;Zhi Wang;Chicheng Zhang", "authorids": "~Thang_Duong1;~Zhi_Wang3;~Chicheng_Zhang1", "gender": ";;M", "homepage": ";;http://zcc1307.github.io", "dblp": ";;149/2402", "google_scholar": ";;29B3BAgAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Thang_Duong1;~Zhi_Wang3;~Chicheng_Zhang1", "aff": ";;University of Arizona", "aff_domain": ";;arizona.edu", "position": ";;Assistant Professor", "bibtex": "@inproceedings{\nduong2024beyond,\ntitle={Beyond task diversity: provable representation transfer for sequential multitask linear bandits},\nauthor={Thang Duong and Zhi Wang and Chicheng Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=2kZMtdjzSV}\n}", "github": "", "reviewers": "u72q;vp1F;9RaC;LLug", "pdf_size": 1243123, "rating": "5;5;5;7", "confidence": "3;3;4;4", "soundness": "3;2;3;3", "novelty": "2;3;3;3", "presentation": "3;3;2;4", "wc_summary": "147;36;97;101", "wc_strengths": "109;22;40;30", "wc_weaknesses": "104;23;104;150", "wc_questions": "75;346;152;63", "wc_limitations": "1;12;16;1", "wc_review": "436;439;409;345", "wc_reply_reviewers": "24;67;22;10", "wc_reply_authors": "0;0;0;43", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;2", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 95.25, 39.44854243188207 ], "wc_strengths_avg": [ 50.25, 34.513584282134474 ], "wc_weaknesses_avg": [ 95.25, 45.745901455758855 ], "wc_questions_avg": [ 159.0, 113.2364782214636 ], "wc_limitations_avg": [ 7.5, 6.652067347825035 ], "wc_review_avg": [ 407.25, 37.79136806203237 ], "wc_reply_reviewers_avg": [ 30.75, 21.602951187279945 ], "wc_reply_authors_avg": [ 10.75, 18.619546181365433 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:hECh-9lCj1kJ:scholar.google.com/&scioq=Beyond+task+diversity:+provable+representation+transfer+for+sequential+multitask+linear+bandits&hl=en&as_sdt=0,14", "gs_version_total": 3, "email": ";;arizona.edu", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "University of Arizona", "aff_unique_dep": "", "aff_unique_url": "https://www.arizona.edu", "aff_unique_abbr": "UA", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Replicability in Learning: Geometric Partitions and KKM-Sperner Lemma", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96797", "id": "2lL7s5ESTj", "proceeding": "", "pdf": "https://openreview.net/pdf?id=2lL7s5ESTj", "openreview": "https://openreview.net/forum?id=2lL7s5ESTj", "poster": "/media/PosterPDFs/NeurIPS%202024/96797.png?t=1733929152.8339067", "project": "", "author_site": "Jason Vander Woude, Peter Dixon, A. Pavan, Jamie Radcliffe, N. V. Vinodchandran", "tldr": "", "abstract": "This paper studies replicability in machine learning tasks from a geometric viewpoint. Recent works have revealed the role of geometric partitions and Sperner's lemma (and its variations) in designing replicable learning algorithms and in establishing impossibility results. \n\nA partition $\\mathcal{P}$ of $\\mathbb{R}^d$ is called a $(k,\\epsilon)$-secluded partition if for every $\\vec{p}\\in\\mathbb{R}^d$, an $\\varepsilon$-radius ball (with respect to the $\\ell_{\\infty}$ norm) centered at $\\vec{p}$ intersects at most $k$ members of $\\mathcal{P}$. In relation to replicable learning, the parameter $k$ is closely related to the $\\textit{list complexity}$, and the parameter $\\varepsilon$ is related to the sample complexity of the replicable learner. Construction of secluded partitions with better parameters (small $k$ and large $\\varepsilon$) will lead to replicable learning algorithms with small list and sample complexities. \n\nMotivated by this connection, we undertake a comprehensive study of secluded partitions and establish near-optimal relationships between $k$ and $\\varepsilon$. \n\n1. We show that for any $(k,\\epsilon)$-secluded partition where each member has at most unit measure, it must be that $k \\geq(1+2\\varepsilon)^d$, and consequently, for the interesting regime $k\\in[2^d]$ it must be that $\\epsilon\\leq\\frac{\\log_4(k)}{d}$. \n\n2. To complement this upper bound on $\\epsilon$, we show that for each $d\\in\\mathbb{N}$ and each viable $k\\in[2^d]$, a construction of a $(k,\\epsilon)$-secluded (unit cube) partition with $\\epsilon\\geq\\frac{\\log_4(k)}{d}\\cdot\\frac{1}{8\\log_4(d+1)}$. This establishes the optimality of $\\epsilon$ within a logarithmic factor.\n\n3. Finally, we adapt our proof techniques to obtain a new ``neighborhood'' variant of the cubical KKM lemma (or cubical Sperner's lemma): For any coloring of $[0,1]^d$ in which no color is used on opposing faces, it holds for each $\\epsilon\\in(0,\\frac12]$ that there is a point where the open $\\epsilon$-radius $\\ell_\\infty$-ball intersects at least $(1+\\frac23\\epsilon)^d$ colors. While the classical Sperner/KKM lemma guarantees the existence of a point that is \"adjacent\" to points with $(d+1)$ distinct colors, the neighborhood version guarantees the existence of a small neighborhood with exponentially many points with distinct colors.", "keywords": "replicability;learning;geometric partitions;sperner lemma;KKM lemma;sample complexity;list complexity", "primary_area": "learning_theory", "supplementary_material": "", "author": "Jason Vander Woude;Peter Dixon;A. Pavan;Jamie Radcliffe;N. V. Vinodchandran", "authorids": "~Jason_Vander_Woude1;~Peter_Dixon1;~A._Pavan1;~Jamie_Radcliffe1;~N._V._Vinodchandran2", "gender": ";;;M;", "homepage": "https://www.math.unl.edu/~jvanderwoude2/;;https://www.cs.iastate.edu/pavan;https://math.unl.edu/aradcliffe1;", "dblp": "322/1237;69/10756-2;88/1807;278/3659;", "google_scholar": ";adp22rAAAAAJ;4QIV0FUAAAAJ;;", "orcid": "0000-0002-7672-6321;0009-0008-0710-3762;0000-0003-1665-5266;0000-0002-0864-8325;", "linkedin": ";;;;", "or_profile": "~Jason_Vander_Woude1;~Peter_Dixon1;~A._Pavan1;~Jamie_Radcliffe1;~N._V._Vinodchandran2", "aff": "University of Nebraska, Lincoln;University of Toronto, Mississauga;Iowa State University;University of Nebraska, Lincoln;", "aff_domain": "unl.edu;utm.utoronto.ca;iastate.edu;unl.edu;", "position": "Postdoc;Instructor;Full Professor;Full Professor;", "bibtex": "@inproceedings{\nwoude2024replicability,\ntitle={Replicability in Learning: Geometric Partitions and {KKM}-Sperner Lemma},\nauthor={Jason Vander Woude and Peter Dixon and A. Pavan and Jamie Radcliffe and N. V. Vinodchandran},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=2lL7s5ESTj}\n}", "github": "", "reviewers": "W1GL;Srve;1env;D2Ky", "pdf_size": 1814494, "rating": "5;5;6;7", "confidence": "4;4;4;4", "soundness": "3;4;4;4", "novelty": "2;2;3;4", "presentation": "2;3;4;4", "wc_summary": "83;196;410;107", "wc_strengths": "30;50;198;188", "wc_weaknesses": "502;200;235;23", "wc_questions": "25;44;1;63", "wc_limitations": "7;2;10;2", "wc_review": "647;492;854;383", "wc_reply_reviewers": "71;117;86;0", "wc_reply_authors": "15;10;10;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 199.0, 128.8894875465024 ], "wc_strengths_avg": [ 116.5, 76.90741186647747 ], "wc_weaknesses_avg": [ 240.0, 171.28776955754898 ], "wc_questions_avg": [ 33.25, 22.960564017462637 ], "wc_limitations_avg": [ 5.25, 3.418698582794336 ], "wc_review_avg": [ 594.0, 177.0127114079664 ], "wc_reply_reviewers_avg": [ 68.5, 42.88647805544307 ], "wc_reply_authors_avg": [ 8.75, 5.448623679425842 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:3l9R2JYy-L8J:scholar.google.com/&scioq=Replicability+in+Learning:+Geometric+Partitions+and+KKM-Sperner+Lemma&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": "unl.edu;utm.utoronto.ca;iastate.edu;unl.edu;", "author_num": 5, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of Nebraska;University of Toronto;Iowa State University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.unl.edu;https://www.utoronto.ca;https://www.iastate.edu", "aff_unique_abbr": "UNL;U of T;ISU", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Lincoln;Mississauga;", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;Canada" }, { "title": "Contextual Multinomial Logit Bandits with General Value Functions", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96796", "id": "2ltOkbo67R", "proceeding": "", "pdf": "https://openreview.net/pdf?id=2ltOkbo67R", "openreview": "https://openreview.net/forum?id=2ltOkbo67R", "poster": "", "project": "", "author_site": "Mengxiao Zhang, Haipeng Luo", "tldr": "", "abstract": "Contextual multinomial logit (MNL) bandits capture many real-world assortment recommendation problems such as online retailing/advertising. However, prior work has only considered (generalized) linear value functions, which greatly limits its applicability. Motivated by this fact, in this work, we consider contextual MNL bandits with a general value function class that contains the ground truth, borrowing ideas from a recent trend of studies on contextual bandits. Specifically, we consider both the stochastic and the adversarial settings, and propose a suite of algorithms, each with different computation-regret trade-off. When applied to the linear case, our results not only are the first ones with no dependence on a certain problem-dependent constant that can be exponentially large, but also enjoy other advantages such as computational efficiency, dimension-free regret bounds, or the ability to handle completely adversarial contexts and rewards.", "keywords": "Contextual Bandits;Multinomial Logit Bandit;Online Learning", "primary_area": "bandits", "supplementary_material": "", "author": "Mengxiao Zhang;Haipeng Luo", "authorids": "~Mengxiao_Zhang2;~Haipeng_Luo1", "gender": ";M", "homepage": ";https://haipeng-luo.net/", "dblp": ";62/2576", "google_scholar": ";ct2hw4UAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Mengxiao_Zhang2;~Haipeng_Luo1", "aff": ";University of Southern California", "aff_domain": ";usc.edu", "position": ";Associate Professor", "bibtex": "@inproceedings{\nzhang2024contextual,\ntitle={Contextual Multinomial Logit Bandits with General Value Functions},\nauthor={Mengxiao Zhang and Haipeng Luo},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=2ltOkbo67R}\n}", "github": "", "reviewers": "8kDH;AgtJ;a6sS;z4QH", "pdf_size": 295032, "rating": "4;5;6;6", "confidence": "5;2;3;3", "soundness": "2;3;3;3", "novelty": "3;3;3;3", "presentation": "2;3;3;3", "wc_summary": "57;111;53;58", "wc_strengths": "26;44;100;38", "wc_weaknesses": "106;39;56;68", "wc_questions": "22;62;98;333", "wc_limitations": "26;12;1;6", "wc_review": "237;268;308;503", "wc_reply_reviewers": "19;111;12;327", "wc_reply_authors": "0;131;0;590", "reply_reviewers": "1;2;1;2", "reply_authors": "1;2;1;3", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 3.25, 1.0897247358851685 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 69.75, 23.889066536807167 ], "wc_strengths_avg": [ 52.0, 28.460498941515414 ], "wc_weaknesses_avg": [ 67.25, 24.631027181179434 ], "wc_questions_avg": [ 128.75, 120.94911119971077 ], "wc_limitations_avg": [ 11.25, 9.364160400164021 ], "wc_review_avg": [ 329.0, 103.56398988065301 ], "wc_reply_reviewers_avg": [ 117.25, 127.24459713480962 ], "wc_reply_authors_avg": [ 180.25, 242.53904324871078 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.6225430174794673, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10176988998179954870&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 5, "email": ";usc.edu", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "University of Southern California", "aff_unique_dep": "", "aff_unique_url": "https://www.usc.edu", "aff_unique_abbr": "USC", "aff_campus_unique_index": "0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Adaptive Experimentation When You Can't Experiment", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96795", "id": "2mqiTiJKrx", "proceeding": "", "pdf": "https://openreview.net/pdf?id=2mqiTiJKrx", "openreview": "https://openreview.net/forum?id=2mqiTiJKrx", "poster": "", "project": "", "author_site": "Yao Zhao, Kwang-Sung Jun, Tanner Fiez, Lalit Jain", "tldr": "", "abstract": "This paper introduces the confounded pure exploration transductive linear bandit (CPET-LB) problem. \nAs a motivating example, often online services cannot directly assign users to specific control or treatment experiences either for business or practical reasons. In these settings, naively comparing treatment and control groups that may result from self-selection can lead to biased estimates of underlying treatment effects. \nInstead, online services can employ a properly randomized encouragement that incentivizes users toward a specific treatment. \nOur methodology provides online services with an adaptive experimental design approach for learning the best-performing treatment for such encouragement designs. \nWe consider a more general underlying model captured by a linear structural equation and formulate pure exploration linear bandits in this setting. Though pure exploration has been extensively studied in standard adaptive experimental design settings, we believe this is the first work considering a setting where noise is confounded. Elimination-style algorithms using experimental design methods in combination with a novel finite-time confidence interval on an instrumental variable style estimator are presented with sample complexity upper bounds nearly matching a minimax lower bound. Finally, experiments are conducted that demonstrate the efficacy of our approach.", "keywords": "Experimental Design;multi-armed bandit", "primary_area": "bandits", "supplementary_material": "/attachment/cc4b9cafe121d27334e55adc2c53a67e8d3b0a4b.zip", "author": "Yao Zhao;Kwang-Sung Jun;Tanner Fiez;Lalit K Jain", "authorids": "~Yao_Zhao8;~Kwang-Sung_Jun1;~Tanner_Fiez1;~Lalit_K_Jain1", "gender": "M;M;;", "homepage": "https://meyaozhao.github.io/;http://kwangsungjun.github.io;;http://www.lalitjain.com", "dblp": ";88/8411;195/5645;178/3228", "google_scholar": ";VgvC7o8AAAAJ;_B6SVAcAAAAJ;hGMSFu4AAAAJ", "orcid": ";;;", "linkedin": ";;tannerfiez/;", "or_profile": "~Yao_Zhao8;~Kwang-Sung_Jun1;~Tanner_Fiez1;~Lalit_K_Jain1", "aff": "University of Arizona;University of Arizona;Amazon;University of Washington", "aff_domain": "cs.arizona.edu;cs.arizona.edu;amazon.com;uw.edu", "position": "PhD student;Assistant Professor;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nzhao2024adaptive,\ntitle={Adaptive Experimentation When You Can't Experiment},\nauthor={Yao Zhao and Kwang-Sung Jun and Tanner Fiez and Lalit K Jain},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=2mqiTiJKrx}\n}", "github": "", "reviewers": "Y9pp;byyL;4vBb;oap7", "pdf_size": 1105469, "rating": "5;6;6;7", "confidence": "3;2;4;4", "soundness": "3;3;3;4", "novelty": "3;3;3;3", "presentation": "3;2;2;3", "wc_summary": "89;66;56;56", "wc_strengths": "73;48;38;60", "wc_weaknesses": "79;32;116;36", "wc_questions": "41;1;37;14", "wc_limitations": "2;1;2;6", "wc_review": "284;148;249;172", "wc_reply_reviewers": "24;0;15;16", "wc_reply_authors": "76;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 66.75, 13.47915056670857 ], "wc_strengths_avg": [ 54.75, 13.102957681378658 ], "wc_weaknesses_avg": [ 65.75, 34.368408458932166 ], "wc_questions_avg": [ 23.25, 16.467771555374455 ], "wc_limitations_avg": [ 2.75, 1.920286436967152 ], "wc_review_avg": [ 213.25, 55.32348054849767 ], "wc_reply_reviewers_avg": [ 13.75, 8.671072598012312 ], "wc_reply_authors_avg": [ 19.0, 32.90896534380867 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.4264014327112209, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7577957583805032214&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "cs.arizona.edu;cs.arizona.edu;amazon.com;uw.edu", "author_num": 4, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "University of Arizona;Amazon;University of Washington", "aff_unique_dep": ";Amazon.com, Inc.;", "aff_unique_url": "https://www.arizona.edu;https://www.amazon.com;https://www.washington.edu", "aff_unique_abbr": "UA;Amazon;UW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "MiraData: A Large-Scale Video Dataset with Long Durations and Structured Captions", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97866", "id": "2myGfVgfva", "proceeding": "", "pdf": "https://openreview.net/pdf?id=2myGfVgfva", "openreview": "https://openreview.net/forum?id=2myGfVgfva", "poster": "", "project": "", "author_site": "Xuan Ju, Yiming Gao, Zhaoyang Zhang, Ziyang Yuan, Xintao Wang, AILING ZENG, Yu Xiong, Qiang Xu, Ying Shan", "tldr": "", "abstract": "Sora's high-motion intensity and long consistent videos have significantly impacted the field of video generation, attracting unprecedented attention. However, existing publicly available datasets are inadequate for generating Sora-like videos, as they mainly contain short videos with low motion intensity and brief captions. To address these issues, we propose MiraData, a high-quality video dataset that surpasses previous ones in video duration, caption detail, motion strength, and visual quality. We curate MiraData from diverse, manually selected sources and meticulously process the data to obtain semantically consistent clips. GPT-4V is employed to annotate structured captions, providing detailed descriptions from four different perspectives along with a summarized dense caption. To better assess temporal consistency and motion intensity in video generation, we introduce MiraBench, which enhances existing benchmarks by adding 3D consistency and tracking-based motion strength metrics. MiraBench includes 150 evaluation prompts and 17 metrics covering temporal consistency, motion strength, 3D consistency, visual quality, text-video alignment, and distribution similarity. To demonstrate the utility and effectiveness of MiraData, we conduct experiments using our DiT-based video generation model, MiraDiT. The experimental results on MiraBench demonstrate the superiority of MiraData, especially in motion strength.", "keywords": "Text-Video Dataset;Video Generation", "primary_area": "", "supplementary_material": "/attachment/e9813f1ca12f6fbe696bb6a34ec9d3c0bd28003d.pdf", "author": "Xuan Ju;Yiming Gao;Zhaoyang Zhang;Ziyang Yuan;Xintao Wang;Ailing Zeng;Yu Xiong;Qiang Xu;Ying Shan", "authorids": "~Xuan_Ju1;~Yiming_Gao3;~Zhaoyang_Zhang1;~Ziyang_Yuan1;~Xintao_Wang1;~Ailing_Zeng1;~Yu_Xiong1;~Qiang_Xu1;~Ying_Shan2", "gender": "F;;M;;;F;M;M;M", "homepage": "https://juxuan27.github.io/;;https://zzyfd.github.io/#/;https://github.com/jiangyzy;;https://ailingzeng.site/;http://www.xiongyu.me;https://github.com/cure-lab;", "dblp": "34/8495;;;;;226/4720;01/2009;43/1230-1;68/5910", "google_scholar": "https://scholar.google.com.hk/citations?user=pWzvK20AAAAJ;;Pf6o7uAAAAAJ;https://scholar.google.hk/citations?hl=zh-CN;;Tn7fzS8AAAAJ;7P30Es0AAAAJ;https://scholar.google.com.tw/citations?user=eSiKPqUAAAAJ;4oXBp9UAAAAJ", "orcid": ";;;;;;;;0000-0001-7673-8325", "linkedin": "%E7%92%87-%E9%9E%A0-122070217/;;;;;%E7%88%B1%E7%8E%B2-%E6%9B%BE-65504112a/;;;YingShanProfile/", "or_profile": "~Xuan_Ju1;~Yiming_Gao3;~Zhaoyang_Zhang1;~Ziyang_Yuan1;~Xintao_Wang1;~Ailing_Zeng1;~Yu_Xiong1;~Qiang_Xu1;~Ying_Shan2", "aff": "Chinese University of Hong Kong;;The Chinese University of Hong Kong;Tsinghua University;;International Digital Economy Academy;The Chinese University of Hong Kong;The Chinese University of Hong Kong;Tencent PCG ARC Lab", "aff_domain": "cuhk.hk;;cuhk.edu.hk;tsinghua.edu.cn;;idea.edu.cn;cuhk.edu.hk;cuhk.edu.hk;arc.tencent.com", "position": "PhD student;;PhD student;MS student;;Researcher;PhD student;Full Professor;Director", "bibtex": "@inproceedings{\nju2024miradata,\ntitle={MiraData: A Large-Scale Video Dataset with Long Durations and Structured Captions},\nauthor={Xuan Ju and Yiming Gao and Zhaoyang Zhang and Ziyang Yuan and Xintao Wang and Ailing Zeng and Yu Xiong and Qiang Xu and Ying Shan},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=2myGfVgfva}\n}", "github": "", "reviewers": "AiEx;CiHh;k17F", "pdf_size": 6524459, "rating": "5;7;9", "confidence": "3;4;5", "wc_summary_and_contributions": "149;160;112", "wc_strengths": "87;94;87", "wc_improvement": "118;143;206", "wc_limitations": "66;32;9", "wc_correctness": "22;38;18", "wc_clarity": "8;6;6", "wc_relation_to_prior_work": "4;37;8", "wc_documentation": "33;11;32", "wc_additional_feedback": "1;1;1", "wc_review": "488;522;479", "wc_reply_reviewers": "0;248;31", "wc_reply_authors": "2199;2702;1008", "reply_reviewers": "0;2;1", "reply_authors": "6;7;4", "rating_avg": [ 7.0, 1.632993161855452 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "wc_summary_and_contributions_avg": [ 140.33333333333334, 20.531818125912658 ], "wc_strengths_avg": [ 89.33333333333333, 3.2998316455372216 ], "wc_improvement_avg": [ 155.66666666666666, 37.025516726831626 ], "wc_limitations_avg": [ 35.666666666666664, 23.414145771781257 ], "wc_correctness_avg": [ 26.0, 8.640987597877148 ], "wc_clarity_avg": [ 6.666666666666667, 0.9428090415820634 ], "wc_relation_to_prior_work_avg": [ 16.333333333333332, 14.70449666674185 ], "wc_documentation_avg": [ 25.333333333333332, 10.143416036468626 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 496.3333333333333, 18.51725921644153 ], "wc_reply_reviewers_avg": [ 93.0, 110.32980860432355 ], "wc_reply_authors_avg": [ 1969.6666666666667, 710.3305959590615 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 5.666666666666667, 1.247219128924647 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 42, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17303417289156183303&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "cuhk.hk;;cuhk.edu.hk;tsinghua.edu.cn;;idea.edu.cn;cuhk.edu.hk;cuhk.edu.hk;arc.tencent.com", "author_num": 9, "aff_unique_index": "0;0;1;2;0;0;3", "aff_unique_norm": "Chinese University of Hong Kong;Tsinghua University;International Digital Economy Academy;Tencent", "aff_unique_dep": ";;;PCG ARC Lab", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.tsinghua.edu.cn;;https://www.tencent.com", "aff_unique_abbr": "CUHK;THU;;Tencent", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China;" }, { "title": "MambaLRP: Explaining Selective State Space Sequence Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96794", "id": "2n1Ysn1EDl", "proceeding": "", "pdf": "https://openreview.net/pdf?id=2n1Ysn1EDl", "openreview": "https://openreview.net/forum?id=2n1Ysn1EDl", "poster": "/media/PosterPDFs/NeurIPS%202024/96794.png?t=1733442801.031325", "project": "", "author_site": "Farnoush Rezaei Jafari, Gr\u00e9goire Montavon, Klaus-Robert M\u00fcller, Oliver Eberle", "tldr": "", "abstract": "Recent sequence modeling approaches using selective state space sequence models, referred to as Mamba models, have seen a surge of interest. These models allow efficient processing of long sequences in linear time and are rapidly being adopted in a wide range of applications such as language modeling, demonstrating promising performance. To foster their reliable use in real-world scenarios, it is crucial to augment their transparency. Our work bridges this critical gap by bringing explainability, particularly Layer-wise Relevance Propagation (LRP), to the Mamba architecture. Guided by the axiom of relevance conservation, we identify specific components in the Mamba architecture, which cause unfaithful explanations. To remedy this issue, we propose MambaLRP, a novel algorithm within the LRP framework, which ensures a more stable and reliable relevance propagation through these components. Our proposed method is theoretically sound and excels in achieving state-of-the-art explanation performance across a diverse range of models and datasets. Moreover, MambaLRP facilitates a deeper inspection of Mamba architectures, uncovering various biases and evaluating their significance. It also enables the analysis of previous speculations regarding the long-range capabilities of Mamba models.", "keywords": "Explainable AI;explainability;interpretability;state space models;Mamba;long-range dependency", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Farnoush Rezaei Jafari;Gr\u00e9goire Montavon;Klaus Robert Muller;Oliver Eberle", "authorids": "~Farnoush_Rezaei_Jafari1;~Gr\u00e9goire_Montavon1;~Klaus_Robert_Muller1;~Oliver_Eberle1", "gender": "F;;M;", "homepage": "https://farnoushrj.github.io/;;https://www.ml.tu-berlin.de/menue/members/klaus-robert_mueller/;https://www.tu.berlin/", "dblp": ";;m/KRMuller.html;260/6891", "google_scholar": "Ei7ny-IAAAAJ;;https://scholar.google.de/citations?hl=de;vZB4qw0AAAAJ", "orcid": ";;0000-0002-3861-7685;0000-0002-6967-9950", "linkedin": "farnoushrj/;;;", "or_profile": "~Farnoush_Rezaei_Jafari1;~Gr\u00e9goire_Montavon1;~Klaus_Robert_Muller1;~Oliver_Eberle1", "aff": "Technische Universit\u00e4t Berlin;;TU Berlin;Technische Universit\u00e4t Berlin", "aff_domain": "tu-berlin.de;;tu-berlin.de;tu-berlin.de", "position": "PhD student;;Full Professor;Postdoc", "bibtex": "@inproceedings{\njafari2024mambalrp,\ntitle={Mamba{LRP}: Explaining Selective State Space Sequence Models},\nauthor={Farnoush Rezaei Jafari and Gr{\\'e}goire Montavon and Klaus Robert Muller and Oliver Eberle},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=2n1Ysn1EDl}\n}", "github": "", "reviewers": "dNt6;mtfa;4ExW;wgXY", "pdf_size": 21904102, "rating": "6;6;6;6", "confidence": "5;4;3;3", "soundness": "3;3;2;3", "novelty": "3;3;2;2", "presentation": "4;2;3;4", "wc_summary": "63;108;66;48", "wc_strengths": "174;181;42;29", "wc_weaknesses": "499;510;194;72", "wc_questions": "353;38;50;24", "wc_limitations": "23;54;9;13", "wc_review": "1112;891;361;186", "wc_reply_reviewers": "254;302;75;20", "wc_reply_authors": "775;149;48;19", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 71.25, 22.286486937155438 ], "wc_strengths_avg": [ 106.5, 71.19164276795416 ], "wc_weaknesses_avg": [ 318.75, 190.7319781788046 ], "wc_questions_avg": [ 116.25, 136.99703463944027 ], "wc_limitations_avg": [ 24.75, 17.640507362318125 ], "wc_review_avg": [ 637.5, 377.397999464756 ], "wc_reply_reviewers_avg": [ 162.75, 118.10456172392327 ], "wc_reply_authors_avg": [ 247.75, 308.2088374787459 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4338441905458491238&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "tu-berlin.de;;tu-berlin.de;tu-berlin.de", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Technische Universit\u00e4t Berlin", "aff_unique_dep": "", "aff_unique_url": "https://www.tu-berlin.de", "aff_unique_abbr": "TU Berlin", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berlin", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "title": "Meta-Exploiting Frequency Prior for Cross-Domain Few-Shot Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96793", "id": "2nisrxMMQR", "proceeding": "", "pdf": "https://openreview.net/pdf?id=2nisrxMMQR", "openreview": "https://openreview.net/forum?id=2nisrxMMQR", "poster": "/media/PosterPDFs/NeurIPS%202024/96793.png?t=1731224921.713377", "project": "", "author_site": "Fei Zhou, Peng Wang, Lei Zhang, Zhenghua Chen, Wei Wei, Chen Ding, Guosheng Lin, Yanning Zhang", "tldr": "", "abstract": "Meta-learning offers a promising avenue for few-shot learning (FSL), enabling models to glean a generalizable feature embedding through episodic training on synthetic FSL tasks in a source domain. Yet, in practical scenarios where the target task diverges from that in the source domain, meta-learning based method is susceptible to over-fitting. To overcome this, we introduce a novel framework, Meta-Exploiting Frequency Prior for Cross-Domain Few-Shot Learning, which is crafted to comprehensively exploit the cross-domain transferable image prior that each image can be decomposed into complementary low-frequency content details and high-frequency robust structural characteristics. Motivated by this insight, we propose to decompose each query image into its high-frequency and low-frequency components, and parallel incorporate them into the feature embedding network to enhance the final category prediction. More importantly, we introduce a feature reconstruction prior and a prediction consistency prior to separately encourage the consistency of the intermediate feature as well as the final category prediction between the original query image and its decomposed frequency components. This allows for collectively guiding the network's meta-learning process with the aim of learning generalizable image feature embeddings, while not introducing any extra computational cost in the inference phase. Our framework establishes new state-of-the-art results on multiple cross-domain few-shot learning benchmarks.", "keywords": "Meta-learning Few-shot learning Cross-domain few-shot learning", "primary_area": "machine_vision", "supplementary_material": "/attachment/bf90a5d0e0964b918fc4588800934bb431ce20f7.zip", "author": "Fei Zhou;Peng Wang;Lei Zhang;Zhenghua Chen;Wei Wei;Chen Ding;Guosheng Lin;Yanning Zhang", "authorids": "~Fei_Zhou3;~Peng_Wang19;~Lei_Zhang28;~Zhenghua_Chen2;~Wei_Wei4;~Chen_Ding3;~Guosheng_Lin1;~Yanning_Zhang1", "gender": "M;M;M;M;M;M;F;M", "homepage": "https://github.com/NWPUZhoufei;https://wp8619.github.io/;https://teacher.nwpu.edu.cn/nwpuzhanglei.html;https://zhenghuantu.github.io/;https://teacher.nwpu.edu.cn/weiwei.html;;http://teacher.nwpu.edu.cn/ynzhang;https://guosheng.github.io/", "dblp": ";95/4442-23.html;97/8704-54.html;03/7457.html;;;14/6655;126/4778", "google_scholar": ";vIr3ICQAAAAJ;0Kg6Gi4AAAAJ;https://scholar.google.com.sg/citations?user=WUgu3nwAAAAJ;;;;https://scholar.google.com.au/citations?user=ZudEhvcAAAAJ", "orcid": ";;0000-0002-7528-420X;0000-0002-1719-0328;;0000-0001-8101-5738;;0000-0002-0329-7458", "linkedin": ";;;;;;;", "or_profile": "~Fei_Zhou3;~Peng_Wang19;~Lei_Zhang28;~Zhenghua_Chen2;~Wei_Wei4;~Chen_Ding3;~Yanning_Zhang1;~Guosheng_Lin2", "aff": "Northwest Polytechnical University Xi'an;University of Electronic Science and Technology of China;Northwestern Polytechnical University;I2R, A*STAR;Northwestern Polytechnical University;Xi'an University of Posts and Telecommunications ;Northwestern Polytechnical University;Nanyang Technological University", "aff_domain": "nwpu.edu.cn;uestc.edu.cn;nwpu.edu.cn;i2r.a-star.edu.sg;nwpu.edu.cn;xupt.edu.cn;nwpu.edu.cn;ntu.edu.sg", "position": "PhD student;Full Professor;Full Professor;Researcher;Full Professor;Associate Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nzhou2024metaexploiting,\ntitle={Meta-Exploiting Frequency Prior for Cross-Domain Few-Shot Learning},\nauthor={Fei Zhou and Peng Wang and Lei Zhang and Zhenghua Chen and Wei Wei and Chen Ding and Guosheng Lin and Yanning Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=2nisrxMMQR}\n}", "github": "", "reviewers": "HqaU;SF8W;mWuv", "pdf_size": 3780669, "rating": "6;6;7", "confidence": "4;5;5", "soundness": "3;3;4", "novelty": "2;3;3", "presentation": "3;3;4", "wc_summary": "54;62;112", "wc_strengths": "49;52;89", "wc_weaknesses": "92;230;164", "wc_questions": "42;2;279", "wc_limitations": "1;1;69", "wc_review": "238;347;713", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 76.0, 25.664502073226878 ], "wc_strengths_avg": [ 63.333333333333336, 18.190351532856337 ], "wc_weaknesses_avg": [ 162.0, 56.356011214421486 ], "wc_questions_avg": [ 107.66666666666667, 122.24656323276422 ], "wc_limitations_avg": [ 23.666666666666668, 32.05550741379015 ], "wc_review_avg": [ 432.6666666666667, 203.1589416086714 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2399495646322270881&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "nwpu.edu.cn;uestc.edu.cn;nwpu.edu.cn;i2r.a-star.edu.sg;nwpu.edu.cn;xupt.edu.cn;nwpu.edu.cn;ntu.edu.sg", "author_num": 8, "aff_unique_index": "0;1;2;3;2;4;2;5", "aff_unique_norm": "Northwest Polytechnical University;University of Electronic Science and Technology of China;Northwestern Polytechnical University;A*STAR;Xi'an University of Posts and Telecommunications;Nanyang Technological University", "aff_unique_dep": ";;;Institute for Infocomm Research;;", "aff_unique_url": "http://www.nwpu.edu.cn;https://www.uestc.edu.cn;https://www.nwpu.edu.cn;https://www.a-star.edu.sg;http://www.xupt.edu.cn;https://www.ntu.edu.sg", "aff_unique_abbr": "NWPU;UESTC;NWPU;A*STAR;XUPT;NTU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Xi'an;", "aff_country_unique_index": "0;0;0;1;0;0;0;1", "aff_country_unique": "China;Singapore" }, { "title": "DEL: Discrete Element Learner for Learning 3D Particle Dynamics with Neural Rendering", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96792", "id": "2nvkD0sPOk", "proceeding": "", "pdf": "https://openreview.net/pdf?id=2nvkD0sPOk", "openreview": "https://openreview.net/forum?id=2nvkD0sPOk", "poster": "/media/PosterPDFs/NeurIPS%202024/96792.png?t=1730962872.6993997", "project": "", "author_site": "JIAXU WANG, Jingkai SUN, ziyi Zhang, Junhao He, Qiang Zhang, Mingyuan Sun, Renjing Xu", "tldr": "", "abstract": "Learning-based simulators show great potential for simulating particle dynamics when 3D groundtruth is available, but per-particle correspondences are not always accessible. The development of neural rendering presents a new solution to this field to learn 3D dynamics from 2D images by inverse rendering. \nHowever, existing approaches still suffer from ill-posed natures resulting from the 2D to 3D uncertainty, for example, specific 2D images can correspond with various 3D particle distributions. To mitigate such uncertainty, we consider a conventional, mechanically interpretable framework as the physical priors and extend it to a learning-based version. In brief, we incorporate the learnable graph kernels into the classic Discrete Element Analysis (DEA) framework to implement a novel mechanics-informed network architecture. In this case, the graph networks are only used for approximating some specific mechanical operators in the DEA framework rather than the whole dynamics mapping. By integrating the strong physics priors, our methods can effectively learn the dynamics of various materials from the partial 2D observations in a unified manner. Experiments show that our approach outperforms other learned simulators by a large margin in this context and is robust to different renderers, fewer training samples, and fewer camera views.", "keywords": "Particle Dynamics; Learning 3D from 2D; Physics-augmented Graph Network", "primary_area": "machine_vision", "supplementary_material": "/attachment/fe631739662d6715f673fb9cdabde0381ad16a03.zip", "author": "Jiaxu Wang;Jingkai SUN;Ziyi Zhang;Junhao He;Qiang Zhang;Mingyuan Sun;Renjing Xu", "authorids": "~Jiaxu_Wang1;~Jingkai_SUN1;~Ziyi_Zhang6;~Junhao_He2;~Qiang_Zhang10;~Mingyuan_Sun1;~Renjing_Xu1", "gender": ";M;F;M;;M;", "homepage": ";https://github.com/Greatsjk;https://github.com/Zerory1;;;https://myuansun.github.io;", "dblp": ";;;;;;", "google_scholar": ";;;s2pC5LAAAAAJ;;https://scholar.google.com/citations?hl=en;", "orcid": ";;;0009-0004-2215-1261;;;", "linkedin": ";;;junhao-h-951b63190/;;;", "or_profile": "~Jiaxu_Wang1;~Jingkai_SUN1;~Ziyi_Zhang6;~Junhao_He2;~Qiang_Zhang10;~Mingyuan_Sun1;~Renjing_Xu1", "aff": ";Hong Kong University of Science and Technology;Hong Kong University of Science and Technology (Guangzhou);the Hong Kong University of Science and Technology (Guangzhou);;Northeastern University;", "aff_domain": ";hkust.edu;hkust-gz.edu.cn;hkust-gz.edu.cn;;neu.edu.cn;", "position": ";MS student;PhD student;Intern;;MS student;", "bibtex": "@inproceedings{\nwang2024del,\ntitle={{DEL}: Discrete Element Learner for Learning 3D Particle Dynamics with Neural Rendering},\nauthor={Jiaxu Wang and Jingkai SUN and Ziyi Zhang and Junhao He and Qiang Zhang and Mingyuan Sun and Renjing Xu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=2nvkD0sPOk}\n}", "github": "", "reviewers": "fjMr;qBCD;LfJV;uaqu", "pdf_size": 12871220, "rating": "5;6;6;7", "confidence": "4;4;3;3", "soundness": "2;3;4;3", "novelty": "3;3;4;3", "presentation": "2;3;2;3", "wc_summary": "39;61;185;101", "wc_strengths": "25;39;40;72", "wc_weaknesses": "93;18;36;47", "wc_questions": "61;112;104;183", "wc_limitations": "7;3;8;4", "wc_review": "225;233;373;407", "wc_reply_reviewers": "0;11;0;28", "wc_reply_authors": "0;30;0;37", "reply_reviewers": "0;1;0;1", "reply_authors": "1;2;1;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 96.5, 55.72028355993893 ], "wc_strengths_avg": [ 44.0, 17.219175357722563 ], "wc_weaknesses_avg": [ 48.5, 27.69927796892908 ], "wc_questions_avg": [ 115.0, 43.78926809162263 ], "wc_limitations_avg": [ 5.5, 2.0615528128088303 ], "wc_review_avg": [ 309.5, 81.44169693713411 ], "wc_reply_reviewers_avg": [ 9.75, 11.453711188955307 ], "wc_reply_authors_avg": [ 16.75, 16.931848688197046 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.7071067811865476, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:R6vBdyXDcpUJ:scholar.google.com/&scioq=DEL:+Discrete+Element+Learner+for+Learning+3D+Particle+Dynamics+with+Neural+Rendering&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": ";hkust.edu;hkust-gz.edu.cn;hkust-gz.edu.cn;;neu.edu.cn;", "author_num": 7, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Hong Kong University of Science and Technology;Northeastern University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ust.hk;https://www.northeastern.edu", "aff_unique_abbr": "HKUST;NEU", "aff_campus_unique_index": "0;0;1", "aff_campus_unique": "Hong Kong SAR;Guangzhou;", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "China;United States" }, { "title": "RadarOcc: Robust 3D Occupancy Prediction with 4D Imaging Radar", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96791", "id": "2oZea6pKhl", "proceeding": "", "pdf": "https://openreview.net/pdf?id=2oZea6pKhl", "openreview": "https://openreview.net/forum?id=2oZea6pKhl", "poster": "/media/PosterPDFs/NeurIPS%202024/96791.png?t=1733762192.6816587", "project": "", "author_site": "Fangqiang Ding, Xiangyu Wen, Yunzhou Zhu, Yiming Li, Chris Xiaoxuan Lu", "tldr": "", "abstract": "3D occupancy-based perception pipeline has significantly advanced autonomous driving by capturing detailed scene descriptions and demonstrating strong generalizability across various object categories and shapes. Current methods predominantly rely on LiDAR or camera inputs for 3D occupancy prediction. These methods are susceptible to adverse weather conditions, limiting the all-weather deployment of self-driving cars. To improve perception robustness, we leverage the recent advances in automotive radars and introduce a novel approach that utilizes 4D imaging radar sensors for 3D occupancy prediction. Our method, RadarOcc, circumvents the limitations of sparse radar point clouds by directly processing the 4D radar tensor, thus preserving essential scene details. RadarOcc innovatively addresses the challenges associated with the voluminous and noisy 4D radar data by employing Doppler bins descriptors, sidelobe-aware spatial sparsification, and range-wise self-attention mechanisms. To minimize the interpolation errors associated with direct coordinate transformations, we also devise a spherical-based feature encoding followed by spherical-to-Cartesian feature aggregation. We benchmark various baseline methods based on distinct modalities on the public K-Radar dataset. The results demonstrate RadarOcc's state-of-the-art performance in radar-based 3D occupancy prediction and promising results even when compared with LiDAR- or camera-based methods. Additionally, we present qualitative evidence of the superior performance of 4D radar in adverse weather conditions and explore the impact of key pipeline components through ablation studies.", "keywords": "4D Imaging Radar;3D Occupancy Prediction;Scene Understanding;Autonomous Driving", "primary_area": "robotics", "supplementary_material": "/attachment/5eeba321f9601e9642eff87213fceba2b23aed9a.zip", "author": "Fangqiang Ding;Xiangyu Wen;Yunzhou Zhu;Yiming Li;Chris Xiaoxuan Lu", "authorids": "~Fangqiang_Ding1;~Xiangyu_Wen3;~Yunzhou_Zhu1;~Yiming_Li2;~Chris_Xiaoxuan_Lu1", "gender": "M;M;M;M;", "homepage": "https://toytiny.github.io/;;https://lawrencez22.github.io;https://yimingli-page.github.io/;", "dblp": "249/5356;;;l/YimingLi-3;", "google_scholar": "https://scholar.google.com.hk/citations?user=Ja8dgh8AAAAJ;WxgdNyAAAAAJ;;https://scholar.google.com/citations?hl=en;", "orcid": "0000-0001-5287-7128;;0009-0002-5360-3510;0000-0002-0157-6218;", "linkedin": "fangqiang-ding-337239189/;;;yiming-li-58b519173/;", "or_profile": "~Fangqiang_Ding1;~Xiangyu_Wen3;~Yunzhou_Zhu1;~Yiming_Li2;~Chris_Xiaoxuan_Lu1", "aff": "University of Edinburgh;University of Edinburgh, University of Edinburgh;Georgia Institute of Technology;New York University;", "aff_domain": "ed.ac.uk;ed.ac.uk;gatech.edu;nyu.edu;", "position": "PhD student;Undergrad student;MS student;PhD student;", "bibtex": "@inproceedings{\nding2024radarocc,\ntitle={RadarOcc: Robust 3D Occupancy Prediction with 4D Imaging Radar},\nauthor={Fangqiang Ding and Xiangyu Wen and Yunzhou Zhu and Yiming Li and Chris Xiaoxuan Lu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=2oZea6pKhl}\n}", "github": "", "reviewers": "f9yn;7z3i;v56z;kdzm;XRVn", "pdf_size": 5572066, "rating": "4;5;5;5;7", "confidence": "4;3;5;4;3", "soundness": "3;3;3;3;3", "novelty": "3;3;2;3;3", "presentation": "3;3;2;2;4", "wc_summary": "50;63;70;85;176", "wc_strengths": "41;54;29;32;79", "wc_weaknesses": "8;58;127;117;26", "wc_questions": "175;55;43;82;171", "wc_limitations": "8;48;17;55;36", "wc_review": "282;278;286;371;488", "wc_reply_reviewers": "0;24;0;0;73", "wc_reply_authors": "72;0;50;50;705", "reply_reviewers": "0;1;0;0;1", "reply_authors": "2;1;2;2;1", "rating_avg": [ 5.2, 0.9797958971132712 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 88.8, 45.0395381859095 ], "wc_strengths_avg": [ 47.0, 18.20988742414406 ], "wc_weaknesses_avg": [ 67.2, 47.6294026836365 ], "wc_questions_avg": [ 105.2, 56.795774490713654 ], "wc_limitations_avg": [ 32.8, 17.88183435780569 ], "wc_review_avg": [ 341.0, 81.2206870199951 ], "wc_reply_reviewers_avg": [ 19.4, 28.366177042386237 ], "wc_reply_authors_avg": [ 175.4, 265.8507852160682 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.49099025303098287, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15157862971067977027&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "ed.ac.uk;ed.ac.uk;gatech.edu;nyu.edu;", "author_num": 5, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "University of Edinburgh;Georgia Institute of Technology;New York University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ed.ac.uk;https://www.gatech.edu;https://www.nyu.edu", "aff_unique_abbr": "Edinburgh;Georgia Tech;NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "Externally Valid Policy Evaluation from Randomized Trials Using Additional Observational Data", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96790", "id": "2pgc5xDJ1b", "proceeding": "", "pdf": "https://openreview.net/pdf?id=2pgc5xDJ1b", "openreview": "https://openreview.net/forum?id=2pgc5xDJ1b", "poster": "/media/PosterPDFs/NeurIPS%202024/96790.png?t=1733320593.4904633", "project": "", "author_site": "Sofia Ek, Dave Zachariah", "tldr": "", "abstract": "Randomized trials are widely considered as the gold standard for evaluating the effects of decision policies. Trial data is, however, drawn from a population which may differ from the intended target population and this raises a problem of external validity (aka. generalizability). In this paper we seek to use trial data to draw valid inferences about the outcome of a policy on the target population. Additional covariate data from the target population is used to model the sampling of individuals in the trial study. We develop a method that yields certifiably valid trial-based policy evaluations under any specified range of model miscalibrations. The method is nonparametric and the validity is assured even with finite samples. The certified policy evaluations are illustrated using both simulated and real data.", "keywords": "Policy Evaluation;Randomized Trials;External Validity", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "/attachment/8038ce52ff76d6fa0d7eb6c1608aa93120a2012c.zip", "author": "Sofia Ek;Dave Zachariah", "authorids": "~Sofia_Ek1;~Dave_Zachariah1", "gender": "F;", "homepage": ";", "dblp": ";84/2663", "google_scholar": "Yaxew5AAAAAJ;", "orcid": ";", "linkedin": "sofia-ek-ba024186;", "or_profile": "~Sofia_Ek1;~Dave_Zachariah1", "aff": "Uppsala University;Uppsala University", "aff_domain": "uu.se;it.uu.se", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nek2024externally,\ntitle={Externally Valid Policy Evaluation from Randomized Trials Using Additional Observational Data},\nauthor={Sofia Ek and Dave Zachariah},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=2pgc5xDJ1b}\n}", "github": "", "reviewers": "xLhe;S2kx;grGf;Cyms", "pdf_size": 2337313, "rating": "4;4;6;6", "confidence": "3;1;4;3", "soundness": "2;2;3;3", "novelty": "2;2;3;3", "presentation": "2;3;3;4", "wc_summary": "49;76;69;80", "wc_strengths": "14;4;174;71", "wc_weaknesses": "100;6;629;69", "wc_questions": "64;1;66;8", "wc_limitations": "24;18;5;7", "wc_review": "251;105;943;235", "wc_reply_reviewers": "85;0;100;0", "wc_reply_authors": "259;0;0;0", "reply_reviewers": "1;0;1;0", "reply_authors": "2;1;1;1", "rating_avg": [ 5.0, 1.0 ], "confidence_avg": [ 2.75, 1.0897247358851685 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 68.5, 11.926860441876563 ], "wc_strengths_avg": [ 65.75, 67.52175575323852 ], "wc_weaknesses_avg": [ 201.0, 249.41631863212157 ], "wc_questions_avg": [ 34.75, 30.359306645574105 ], "wc_limitations_avg": [ 13.5, 7.826237921249264 ], "wc_review_avg": [ 383.5, 327.95235934507315 ], "wc_reply_reviewers_avg": [ 46.25, 46.55306112384018 ], "wc_reply_authors_avg": [ 64.75, 112.1502897900848 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.6882472016116854, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Cmie5Ew-IEUJ:scholar.google.com/&scioq=Externally+Valid+Policy+Evaluation+from+Randomized+Trials+Using+Additional+Observational+Data&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": "uu.se;it.uu.se", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Uppsala University", "aff_unique_dep": "", "aff_unique_url": "https://www.uu.se", "aff_unique_abbr": "UU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Sweden" }, { "title": "Towards Understanding Extrapolation: a Causal Lens", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96789", "id": "2squ766Iq4", "proceeding": "", "pdf": "https://openreview.net/pdf?id=2squ766Iq4", "openreview": "https://openreview.net/forum?id=2squ766Iq4", "poster": "", "project": "", "author_site": "Lingjing Kong, Guangyi Chen, Petar Stojanov, Haoxuan Li, Eric Xing, Kun Zhang", "tldr": "", "abstract": "Canonical work handling distribution shifts typically necessitates an entire target distribution that lands inside the training distribution.\nHowever, practical scenarios often involve only a handful target samples, potentially lying outside the training support, which requires the capability of extrapolation.\nIn this work, we aim to provide a theoretical understanding of when extrapolation is possible and offer principled methods to achieve it without requiring an on-support target distribution.\nTo this end, we formulate the extrapolation problem with a latent-variable model that embodies the minimal change principle in causal mechanisms.\nUnder this formulation, we cast the extrapolation problem into a latent-variable identification problem.\nWe provide realistic conditions on shift properties and the estimation objectives that lead to identification even when only one off-support target sample is available, tackling the most challenging scenarios.\nOur theory reveals the intricate interplay between the underlying manifold's smoothness and the shift properties.\nWe showcase how our theoretical results inform the design of practical adaptation algorithms. Through experiments on both synthetic and real-world data, we validate our theoretical findings and their practical implications.", "keywords": "generalization;extrapolation;identification;adaptation", "primary_area": "causal_inference", "supplementary_material": "", "author": "Lingjing Kong;Guangyi Chen;Petar Stojanov;Haoxuan Li;Eric P. Xing;Kun Zhang", "authorids": "~Lingjing_Kong1;~Guangyi_Chen1;~Petar_Stojanov2;~Haoxuan_Li6;~Eric_Xing1;~Kun_Zhang1", "gender": "M;M;;M;M;M", "homepage": "https://lingjing-kong.github.io/;https://chengy12.github.io/;;https://haoxuanli-pku.github.io/;http://www.cs.cmu.edu/~epxing/;http://www.andrew.cmu.edu/user/kunz1/", "dblp": "158/1994-1.html;c/GuangyiChen-2;;145/4965-1.html;36/3855;96/3115-1", "google_scholar": "4hAlzvkAAAAJ;https://scholar.google.com/citations?hl=zh-CN;RydEtZkAAAAJ;gtDqiucAAAAJ;https://scholar.google.com.tw/citations?user=5pKTRxEAAAAJ;RGoypN4AAAAJ", "orcid": ";;;0000-0003-3620-3769;;", "linkedin": ";;;;;", "or_profile": "~Lingjing_Kong1;~Guangyi_Chen1;~Petar_Stojanov2;~Haoxuan_Li6;~Eric_Xing1;~Kun_Zhang1", "aff": "Computer Science Department, School of Computer Science;Carnegie Mellon University;;Peking University;School of Computer Science, Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "csd.cs.cmu.edu;cmu.edu;;pku.edu.cn;cs.cmu.edu;cmu.edu", "position": "PhD student;Postdoc;;PhD student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nkong2024towards,\ntitle={Towards Understanding Extrapolation: a Causal Lens},\nauthor={Lingjing Kong and Guangyi Chen and Petar Stojanov and Haoxuan Li and Eric P. Xing and Kun Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=2squ766Iq4}\n}", "github": "", "reviewers": "5kN3;gbwX;zpbG;HS3a", "pdf_size": 2259533, "rating": "4;6;6;7", "confidence": "2;3;3;3", "soundness": "2;2;3;3", "novelty": "2;3;4;3", "presentation": "2;3;2;3", "wc_summary": "84;110;98;142", "wc_strengths": "33;45;97;28", "wc_weaknesses": "199;410;15;54", "wc_questions": "16;141;31;123", "wc_limitations": "3;13;51;5", "wc_review": "335;719;292;352", "wc_reply_reviewers": "0;330;21;22", "wc_reply_authors": "407;484;297;20", "reply_reviewers": "0;2;1;1", "reply_authors": "5;3;4;2", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 108.5, 21.418449990603897 ], "wc_strengths_avg": [ 50.75, 27.407799984675894 ], "wc_weaknesses_avg": [ 169.5, 154.8555778782282 ], "wc_questions_avg": [ 77.75, 54.87884382892919 ], "wc_limitations_avg": [ 18.0, 19.4164878389476 ], "wc_review_avg": [ 424.5, 171.4300148748754 ], "wc_reply_reviewers_avg": [ 93.25, 136.96965904900253 ], "wc_reply_authors_avg": [ 302.0, 175.85363231960835 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 3.5, 1.118033988749895 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.9271726499455306, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15476945986201227596&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "csd.cs.cmu.edu;cmu.edu;;pku.edu.cn;cs.cmu.edu;cmu.edu", "author_num": 6, "aff_unique_index": "0;1;2;1;1", "aff_unique_norm": "School of Computer Science;Carnegie Mellon University;Peking University", "aff_unique_dep": "Computer Science Department;;", "aff_unique_url": ";https://www.cmu.edu;http://www.pku.edu.cn", "aff_unique_abbr": ";CMU;Peking U", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pittsburgh", "aff_country_unique_index": "1;2;1;1", "aff_country_unique": ";United States;China" }, { "title": "SMART: Scalable Multi-agent Real-time Motion Generation via Next-token Prediction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96788", "id": "2uy3LZHNIG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=2uy3LZHNIG", "openreview": "https://openreview.net/forum?id=2uy3LZHNIG", "poster": "/media/PosterPDFs/NeurIPS%202024/96788.png?t=1731668825.3504548", "project": "", "author_site": "Wei Wu, Xiaoxin Feng, Ziyan Gao, Yuheng KAN", "tldr": "", "abstract": "Data-driven autonomous driving motion generation tasks are frequently impacted by the limitations of dataset size and the domain gap between datasets, which precludes their extensive application in real-world scenarios. To address this issue, we introduce SMART, a novel autonomous driving motion generation paradigm that models vectorized map and agent trajectory data into discrete sequence tokens. These tokens are then processed through a decoder-only transformer architecture to train for the next token prediction task across spatial-temporal series. This GPT-style method allows the model to learn the motion distribution in real driving scenarios. SMART achieves state-of-the-art performance across most of the metrics on the generative Sim Agents challenge, ranking 1st on the leaderboards of Waymo Open Motion Dataset (WOMD), demonstrating remarkable inference speed. Moreover, SMART represents the generative model in the autonomous driving motion domain, exhibiting zero-shot generalization capabilities: Using only the NuPlan dataset for training and WOMD for validation, SMART achieved a competitive score of 0.72 on the Sim Agents challenge. Lastly, we have collected over 1 billion motion tokens from multiple datasets, validating the model's scalability. These results suggest that SMART has initially emulated two important properties: scalability and zero-shot generalization, and preliminarily meets the needs of large-scale real-time simulation applications. We have released all the code to promote the exploration of models for motion generation in the autonomous driving field. The source code is available at https://github.com/rainmaker22/SMART.", "keywords": "Autonomous driving; Generative motion model; Multi-agent motion simulation; Motion prediction", "primary_area": "generative_models", "supplementary_material": "/attachment/5281a1c99645148c51a0f8527834dc368d02a8b9.zip", "author": "Wei Wu;Xiaoxin Feng;Ziyan Gao;Yuheng KAN", "authorids": "~Wei_Wu14;~Xiaoxin_Feng1;~Ziyan_Gao1;~Yuheng_KAN1", "gender": "M;M;M;F", "homepage": "https://wuwei-ai.org/;;;https://scholar.google.com/citations?user=EskFze8AAAAJ&hl=zh-TW", "dblp": ";;;", "google_scholar": "https://scholar.google.com.hk/citations?user=9RBxtd8AAAAJ;;;", "orcid": ";0009-0006-6888-713X;0000-0002-0505-0600;", "linkedin": ";;;", "or_profile": "~Wei_Wu14;~Xiaoxin_Feng1;~Ziyan_Gao1;~Yuheng_KAN1", "aff": "Tsinghua University;Senseauto Research;;Sensetime", "aff_domain": "mail.tsinghua.edu.cn;senseauto.com;;sensetime.com", "position": "PhD student;Researcher;;Researcher", "bibtex": "@inproceedings{\nwu2024smart,\ntitle={{SMART}: Scalable Multi-agent Real-time Motion Generation via Next-token Prediction},\nauthor={Wei Wu and Xiaoxin Feng and Ziyan Gao and Yuheng KAN},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=2uy3LZHNIG}\n}", "github": "", "reviewers": "vCcd;ogLa;1oTF", "pdf_size": 4088987, "rating": "6;6;7", "confidence": "4;4;4", "soundness": "3;3;3", "novelty": "2;4;3", "presentation": "3;3;3", "wc_summary": "78;54;109", "wc_strengths": "64;39;151", "wc_weaknesses": "54;151;118", "wc_questions": "14;46;150", "wc_limitations": "37;1;5", "wc_review": "247;291;533", "wc_reply_reviewers": "19;19;20", "wc_reply_authors": "0;0;27", "reply_reviewers": "1;1;1", "reply_authors": "1;1;2", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 80.33333333333333, 22.51419305435771 ], "wc_strengths_avg": [ 84.66666666666667, 48.00231475900117 ], "wc_weaknesses_avg": [ 107.66666666666667, 40.2685430026411 ], "wc_questions_avg": [ 70.0, 58.057442818872644 ], "wc_limitations_avg": [ 14.333333333333334, 16.110727964792762 ], "wc_review_avg": [ 357.0, 125.7404734628698 ], "wc_reply_reviewers_avg": [ 19.333333333333332, 0.4714045207910317 ], "wc_reply_authors_avg": [ 9.0, 12.727922061357855 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8552714177091484599&as_sdt=5,39&sciodt=0,39&hl=en", "gs_version_total": 4, "email": "mail.tsinghua.edu.cn;senseauto.com;;sensetime.com", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "Tsinghua University;SenseAuto Research;SenseTime", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.senseauto.com/;https://www.sensetime.com", "aff_unique_abbr": "THU;;SenseTime", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Subsurface Scattering for Gaussian Splatting", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96787", "id": "2vMvh5XP0P", "proceeding": "", "pdf": "https://openreview.net/pdf?id=2vMvh5XP0P", "openreview": "https://openreview.net/forum?id=2vMvh5XP0P", "poster": "", "project": "", "author_site": "Jan-Niklas Dihlmann, Arjun Majumdar, Andreas Engelhardt, Raphael Braun, Hendrik PA Lensch", "tldr": "", "abstract": "3D reconstruction and relighting of objects made from scattering materials present a significant challenge due to the complex light transport beneath the surface. 3D Gaussian Splatting introduced high-quality novel view synthesis at real-time speeds. While 3D Gaussians efficiently approximate an object's surface, they fail to capture the volumetric properties of subsurface scattering. We propose a framework for optimizing an object's shape together with the radiance transfer field given multi-view OLAT (one light at a time) data. Our method decomposes the scene into an explicit surface represented as 3D Gaussians, with a spatially varying BRDF, and an implicit volumetric representation of the scattering component. A learned incident light field accounts for shadowing. We optimize all parameters jointly via ray-traced differentiable rendering. Our approach enables material editing, relighting, and novel view synthesis at interactive rates. We show successful application on synthetic data and contribute a newly acquired multi-view multi-light dataset of objects in a light-stage setup. Compared to previous work we achieve comparable or better results at a fraction of optimization and rendering time while enabling detailed control over material attributes.", "keywords": "gaussian splatting;inverse rendering;relighting;subsurface scattering;computer graphics;brdf decomposition;pbr;differentiable rendering;nerf", "primary_area": "machine_vision", "supplementary_material": "/attachment/90ddd13d46ed727159d487408132cdf244884cbb.zip", "author": "Jan-Niklas Dihlmann;Arjun Majumdar;Andreas Engelhardt;Raphael Braun;Hendrik Lensch", "authorids": "~Jan-Niklas_Dihlmann1;~Arjun_Majumdar3;~Andreas_Engelhardt1;~Raphael_Braun1;~Hendrik_Lensch2", "gender": "M;M;M;;M", "homepage": "https://www.jdihlmann.com/;;;https://uni-tuebingen.de/en/fakultaeten/mathematisch-naturwissenschaftliche-fakultaet/fachbereiche/informatik/lehrstuehle/computergrafik/lehrstuhl/mitarbeiter/raphael-braun/;https://www.graphics.uni-tuebingen.de", "dblp": ";;172/8953;;99/6552.html", "google_scholar": "https://scholar.google.de/citations?user=Sp289eUAAAAJ;;https://scholar.google.de/citations?user=ZQUFcqAAAAAJ;https://scholar.google.de/citations?user=eh2ePAUAAAAJ;https://scholar.google.de/citations?hl=de", "orcid": ";;0000-0003-1313-3665;;", "linkedin": "jdihlmann/;arjun-majumdar-2922b42b/;andreas-engelhardt-5a1451ab/;;", "or_profile": "~Jan-Niklas_Dihlmann1;~Arjun_Majumdar3;~Andreas_Engelhardt1;~Raphael_Braun1;~Hendrik_Lensch2", "aff": "Eberhard-Karls-Universit\u00e4t T\u00fcbingen;Eberhard-Karls-Universit\u00e4t T\u00fcbingen;University of Tuebingen;Eberhard-Karls-Universit\u00e4t T\u00fcbingen;University of T\u00fcbingen", "aff_domain": "uni-tuebingen.de;uni-tuebingen.de;uni-tuebingen.de;uni-tuebingen.de;uni-tuebingen.de", "position": "PhD student;PhD student;PhD student;PhD student;Professor", "bibtex": "@inproceedings{\ndihlmann2024subsurface,\ntitle={Subsurface Scattering for Gaussian Splatting},\nauthor={Jan-Niklas Dihlmann and Arjun Majumdar and Andreas Engelhardt and Raphael Braun and Hendrik Lensch},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=2vMvh5XP0P}\n}", "github": "", "reviewers": "wfKX;uYWZ;uk4p;F9dx;pNBB", "pdf_size": 11157982, "rating": "4;5;6;6;6", "confidence": "4;4;4;3;4", "soundness": "4;2;3;3;3", "novelty": "3;2;2;2;2", "presentation": "2;3;3;2;3", "wc_summary": "106;95;75;163;62", "wc_strengths": "68;25;72;91;40", "wc_weaknesses": "88;183;267;300;175", "wc_questions": "52;54;88;72;113", "wc_limitations": "1;16;65;12;18", "wc_review": "315;373;567;638;408", "wc_reply_reviewers": "43;0;113;109;128", "wc_reply_authors": "182;0;37;126;105", "reply_reviewers": "1;0;1;2;1", "reply_authors": "2;1;2;2;2", "rating_avg": [ 5.4, 0.7999999999999999 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 100.2, 34.925062634160014 ], "wc_strengths_avg": [ 59.2, 23.625410049351526 ], "wc_weaknesses_avg": [ 202.6, 74.71706632356492 ], "wc_questions_avg": [ 75.8, 22.754340245324627 ], "wc_limitations_avg": [ 22.4, 22.096153511414606 ], "wc_review_avg": [ 460.2, 122.00885213786744 ], "wc_reply_reviewers_avg": [ 78.6, 48.97591244683451 ], "wc_reply_authors_avg": [ 90.0, 64.64363851145757 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.8, 0.4000000000000001 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.375, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9751624418259288177&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "uni-tuebingen.de;uni-tuebingen.de;uni-tuebingen.de;uni-tuebingen.de;uni-tuebingen.de", "author_num": 5, "aff_unique_index": "0;0;1;0;2", "aff_unique_norm": "Eberhard Karls University of T\u00fcbingen;University of Tuebingen;University of T\u00fcbingen", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uni-tuebingen.de/;https://www.uni-tuebingen.de/;https://www.uni-tuebingen.de/", "aff_unique_abbr": "Uni T\u00fcbingen;Uni T\u00fcbingen;Uni T\u00fcbingen", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "T\u00fcbingen;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Germany" }, { "id": "2vhkjOdlc8", "title": "Dinomaly: The Less Is More Philosophy in Multi-Class Unsupervised Anomaly Detection", "track": "main", "status": "Reject", "tldr": "", "abstract": "Recent studies highlighted a practical setting of unsupervised anomaly detection (UAD) that builds a unified model for multi-class images, serving as an alternative to the conventional one-class-one-model setup. Despite various advancements addressing this challenging task, the detection performance under the multi-class setting still lags far behind state-of-the-art class-separated models. Our research aims to bridge this substantial performance gap. In this paper, we introduce a minimalistic reconstruction-based anomaly detection framework, namely Dinomaly, which leverages pure Transformer architectures without relying on complex designs, additional modules, or specialized tricks. Given this powerful framework consisted of only Attentions and MLPs, we found four simple components that are essential to multi-class anomaly detection: (1) _Foundation_ _Transformers_ that extracts universal and discriminative features, (2) _Noisy_ _Bottleneck_ where pre-existing Dropouts do all the noise injection tricks, (3) _Linear_ _Attention_ that naturally cannot focus, and (4) _Loose_ _Reconstruction_ that does not force layer-to-layer and point-by-point reconstruction. Extensive experiments are conducted across three popular anomaly detection benchmarks including MVTec-AD, VisA, and the recently released Real-IAD. Our proposed Dinomaly achieves impressive image AUROC of 99.6\\%, 98.7\\%, and 89.3\\% on the three datasets respectively, which is not only superior to state-of-the-art multi-class UAD methods, but also surpasses the most advanced class-separated UAD records.", "keywords": "Unsupervised Anomaly Detection;Unsupervised Learning;Multi-Class UAD;Unified Model", "primary_area": "machine_vision", "supplementary_material": "/attachment/1d0852ecbd4dc9b0a7522019fb8117c6e7e0ae75.zip", "author": "Jia Guo;Shuai Lu;Weihang Zhang;Huiqi Li", "authorids": "~Jia_Guo6;~Shuai_Lu4;~Weihang_Zhang2;~Huiqi_Li1", "gender": "M;M;F;M", "homepage": ";;;https://lushuai.com.cn", "dblp": ";;40/4823;62/2062-3", "google_scholar": "nTQvKUAAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.hk/citations?hl=en;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";0000-0002-6633-1801;0000-0002-8720-3374;0000-0002-3532-7498", "linkedin": ";;;", "or_profile": "~Jia_Guo6;~Weihang_Zhang2;~Huiqi_Li1;~shuai_lu2", "aff": "Beijing Institute of Technology;Beijing Institute of Technology;Beijing Institute of Technology;Beijing Institute of Technology", "aff_domain": "bit.edu.cn;bit.edu.cn;bit.edu.cn;bit.edu.cn", "position": "MS student;Assistant Professor;Full Professor;PhD student", "bibtex": "@misc{\nanonymous2024dinomaly,\ntitle={Dinomaly: The Less Is More Philosophy in Multi-Class Unsupervised Anomaly Detection},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=2vhkjOdlc8}\n}", "github": "", "project": "", "reviewers": "LXBB;xmFt;m7bP;hJXx;cd2L", "site": "https://openreview.net/forum?id=2vhkjOdlc8", "pdf_size": 19502726, "rating": "5;5;6;6;6", "confidence": "5;3;4;4;3", "soundness": "3;3;3;3;4", "novelty": "3;3;3;3;3", "presentation": "3;3;2;4;3", "wc_summary": "57;54;66;75;42", "wc_strengths": "44;63;159;49;41", "wc_weaknesses": "192;13;147;50;21", "wc_questions": "17;3;129;163;39", "wc_limitations": "15;3;1;7;18", "wc_review": "325;136;502;344;161", "wc_reply_reviewers": "412;0;119;16;0", "wc_reply_authors": "1848;0;239;0;0", "reply_reviewers": "2;0;1;1;0", "reply_authors": "5;1;2;1;1", "rating_avg": [ 5.6, 0.48989794855663565 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 58.8, 11.160645142642965 ], "wc_strengths_avg": [ 71.2, 44.54391091945116 ], "wc_weaknesses_avg": [ 84.6, 71.82924195618384 ], "wc_questions_avg": [ 70.2, 63.85734100320808 ], "wc_limitations_avg": [ 8.8, 6.64529909033446 ], "wc_review_avg": [ 293.6, 133.69906506778574 ], "wc_reply_reviewers_avg": [ 109.4, 157.68271940831056 ], "wc_reply_authors_avg": [ 417.4, 721.2643343462921 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 2.0, 1.5491933384829668 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.21821789023599236, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14443882930328067073&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Beijing Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.bit.edu.cn/", "aff_unique_abbr": "BIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Constrained Human-AI Cooperation: An Inclusive Embodied Social Intelligence Challenge", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97865", "id": "2vs1fIAy3X", "proceeding": "", "pdf": "https://openreview.net/pdf?id=2vs1fIAy3X", "openreview": "https://openreview.net/forum?id=2vs1fIAy3X", "poster": "/media/PosterPDFs/NeurIPS%202024/97865.png?t=1732763973.9339838", "project": "", "author_site": "Weihua Du, Qiushi Lyu, Jiaming Shan, Zhenting Qi, Hongxin Zhang, Sunli Chen, Andi Peng, Tianmin Shu, Kwonjoon Lee, Behzad Dariush, Chuang Gan", "tldr": "", "abstract": "We introduce Constrained Human-AI Cooperation (CHAIC), an inclusive embodied social intelligence challenge designed to test social perception and cooperation in embodied agents. In CHAIC, the goal is for an embodied agent equipped with egocentric observations to assist a human who may be operating under physical constraints\u2014e.g., unable to reach high places or confined to a wheelchair\u2014in performing common household or outdoor tasks as efficiently as possible. To achieve this, a successful helper must: (1) infer the human's intents and constraints by following the human and observing their behaviors (social perception), and (2) make a cooperative plan tailored to the human partner to solve the task as quickly as possible, working together as a team (cooperative planning). \nTo benchmark this challenge, we create four new agents with real physical constraints and eight long-horizon tasks featuring both indoor and outdoor scenes with various constraints, emergency events, and potential risks. We benchmark planning- and learning-based baselines on the challenge and introduce a new method that leverages large language models and behavior modeling. Empirical evaluations demonstrate the effectiveness of our benchmark in enabling systematic assessment of key aspects of machine social intelligence. Our benchmark and code are publicly available at https://github.com/UMass-Foundation-Model/CHAIC.", "keywords": "Embodied AI", "primary_area": "", "supplementary_material": "", "author": "Weihua Du;Qiushi Lyu;Jiaming Shan;Zhenting Qi;Hongxin Zhang;Sunli Chen;Andi Peng;Tianmin Shu;Kwonjoon Lee;Behzad Dariush;Chuang Gan", "authorids": "~Weihua_Du1;~Qiushi_Lyu1;~Jiaming_Shan1;~Zhenting_Qi1;~Hongxin_Zhang1;~Sunli_Chen1;~Andi_Peng1;~Tianmin_Shu1;~Kwonjoon_Lee1;~Behzad_Dariush2;~Chuang_Gan1", "gender": "M;M;M;M;M;M;F;;M;M;M", "homepage": "https://stiglidu.github.io/;https://alfredlyu.github.io/;https://shanjiaming.github.io/;https://zhentingqi.github.io/;https://icefoxzhx.github.io/;https://eeeeeerickkk.github.io/;https://andipeng.com/;;https://kjunelee.github.io;;http://people.csail.mit.edu/ganchuang/", "dblp": "229/1269;;351/0589;329/2118;284/2962-5;359/3746;242/9185;163/2175.html;127/7948;;139/6993", "google_scholar": "https://scholar.google.com/citations?hl=en;;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;7LxAwLwAAAAJ;S63gb38AAAAJ;YT_ffdwAAAAJ;C6Wu8M0AAAAJ;FgxRWPcAAAAJ;PTeSCbIAAAAJ", "orcid": "0000-0002-8856-0277;;;;0000-0002-6041-2440;;;;0000-0002-1433-551X;;", "linkedin": ";;jiaming-shan-36249a298/;zhentingqi/;;;;;;behzaddariush;", "or_profile": "~Weihua_Du1;~Qiushi_Lyu1;~Jiaming_Shan1;~Zhenting_Qi1;~Hongxin_Zhang1;~Sunli_Chen1;~Andi_Peng1;~Tianmin_Shu1;~Kwonjoon_Lee1;~Behzad_Dariush2;~Chuang_Gan1", "aff": "Tsinghua University;Peking University;Shanghai Jiaotong University;Harvard University;University of Massachusetts at Amherst;Tsinghua University;Massachusetts Institute of Technology;Johns Hopkins University;Honda Research Institute USA;Honda Research Institute USA;University of Massachusetts at Amherst", "aff_domain": "mails.tsinghua.edu.cn;pku.edu.cn;sjtu.edu.cn;g.harvard.edu;umass.edu;tsinghua.edu.cn;mit.edu;jhu.edu;honda-ri.com;honda-ri.com;umass.edu", "position": "Undergrad student;Undergrad student;Undergrad student;MS student;PhD student;Undergrad student;PhD student;Assistant Professor;Sr Research Scientist;Principal Researcher;Assistant Professor", "bibtex": "@inproceedings{\ndu2024constrained,\ntitle={Constrained Human-{AI} Cooperation: An Inclusive Embodied Social Intelligence Challenge},\nauthor={Weihua Du and Qiushi Lyu and Jiaming Shan and Zhenting Qi and Hongxin Zhang and Sunli Chen and Andi Peng and Tianmin Shu and Kwonjoon Lee and Behzad Dariush and Chuang Gan},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=2vs1fIAy3X}\n}", "github": "", "reviewers": "8256;xLSK;MFUo;LsHw", "pdf_size": 4817565, "rating": "6;7;7;8", "confidence": "3;3;3;4", "wc_summary_and_contributions": "68;96;42;24", "wc_strengths": "44;44;34;12", "wc_improvement": "83;215;31;110", "wc_limitations": "15;15;1;35", "wc_correctness": "22;11;1;3", "wc_clarity": "232;4;1;5", "wc_relation_to_prior_work": "12;29;1;9", "wc_documentation": "77;40;1;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "554;455;113;200", "wc_reply_reviewers": "42;14;0;0", "wc_reply_authors": "182;0;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "4;1;1;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 57.5, 27.179955849853766 ], "wc_strengths_avg": [ 33.5, 13.06713434537198 ], "wc_improvement_avg": [ 109.75, 67.0722558141591 ], "wc_limitations_avg": [ 16.5, 12.114041439585717 ], "wc_correctness_avg": [ 9.25, 8.257572282456872 ], "wc_clarity_avg": [ 60.5, 99.0265116016918 ], "wc_relation_to_prior_work_avg": [ 12.75, 10.207227831296802 ], "wc_documentation_avg": [ 29.75, 31.586191603293994 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 330.5, 180.1312021832975 ], "wc_reply_reviewers_avg": [ 14.0, 17.146428199482248 ], "wc_reply_authors_avg": [ 45.5, 78.80831174438391 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:kKDtQXWxwq4J:scholar.google.com/&scioq=Constrained+Human-AI+Cooperation:+An+Inclusive+Embodied+Social+Intelligence+Challenge&hl=en&as_sdt=0,44", "gs_version_total": 4, "email": "mails.tsinghua.edu.cn;pku.edu.cn;sjtu.edu.cn;g.harvard.edu;umass.edu;tsinghua.edu.cn;mit.edu;jhu.edu;honda-ri.com;honda-ri.com;umass.edu", "author_num": 11, "aff_unique_index": "0;1;2;3;4;0;5;6;7;7;4", "aff_unique_norm": "Tsinghua University;Peking University;Shanghai Jiao Tong University;Harvard University;University of Massachusetts Amherst;Massachusetts Institute of Technology;Johns Hopkins University;Honda Research Institute", "aff_unique_dep": ";;;;;;;Honda Research Institute", "aff_unique_url": "https://www.tsinghua.edu.cn;http://www.pku.edu.cn;https://www.sjtu.edu.cn;https://www.harvard.edu;https://www.umass.edu;https://web.mit.edu;https://www.jhu.edu;https://honda-ri.com", "aff_unique_abbr": "THU;Peking U;SJTU;Harvard;UMass Amherst;MIT;JHU;HRI USA", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Amherst", "aff_country_unique_index": "0;0;0;1;1;0;1;1;1;1;1", "aff_country_unique": "China;United States" }, { "title": "Last-Iterate Global Convergence of Policy Gradients for Constrained Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96786", "id": "2vywag2lVC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=2vywag2lVC", "openreview": "https://openreview.net/forum?id=2vywag2lVC", "poster": "/media/PosterPDFs/NeurIPS%202024/96786.png?t=1730303482.1237693", "project": "", "author_site": "Alessandro Montenegro, Marco Mussi, Matteo Papini, Alberto Maria Metelli", "tldr": "", "abstract": "*Constrained Reinforcement Learning* (CRL) tackles sequential decision-making problems where agents are required to achieve goals by maximizing the expected return while meeting domain-specific constraints, which are often formulated on expected costs. In this setting, *policy-based* methods are widely used since they come with several advantages when dealing with continuous-control problems. These methods search in the policy space with an *action-based* or *parameter-based* exploration strategy, depending on whether they learn directly the parameters of a stochastic policy or those of a stochastic hyperpolicy. In this paper, we propose a general framework for addressing CRL problems via *gradient-based primal-dual* algorithms, relying on an alternate ascent/descent scheme with dual-variable regularization. We introduce an exploration-agnostic algorithm, called C-PG, which exhibits global last-iterate convergence guarantees under (weak) gradient domination assumptions, improving and generalizing existing results. Then, we design C-PGAE and C-PGPE, the action-based and the parameter-based versions of C-PG, respectively, and we illustrate how they naturally extend to constraints defined in terms of *risk measures* over the costs, as it is often requested in safety-critical scenarios. Finally, we numerically validate our algorithms on constrained control problems, and compare them with state-of-the-art baselines, demonstrating their effectiveness.", "keywords": "Constrained Reinforcement Learning;Last-Iterate Convergence;Global Convergence;Policy Gradients", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/a539c3415903847a8a908bf9c02c85b7d159a051.zip", "author": "Alessandro Montenegro;Marco Mussi;Matteo Papini;Alberto Maria Metelli", "authorids": "~Alessandro_Montenegro1;~Marco_Mussi1;~Matteo_Papini1;~Alberto_Maria_Metelli2", "gender": "M;M;M;M", "homepage": ";https://marcomussi.github.io/;https://t3p.github.io/;https://albertometelli.github.io/", "dblp": ";321/0756;209/4897;209/4941", "google_scholar": "CugD-ogAAAAJ;3gca-JUAAAAJ;https://scholar.google.it/citations?user=A2WxZlsAAAAJ;R31IsPwAAAAJ", "orcid": ";0000-0001-8356-6744;0000-0002-3807-3171;0000-0002-3424-5212", "linkedin": "alessandro-montenegro-3266291b7/;marcomussi95/;matteo-papini/;", "or_profile": "~Alessandro_Montenegro1;~Marco_Mussi1;~Matteo_Papini1;~Alberto_Maria_Metelli2", "aff": "Politecnico di Milano;Politecnico di Milano;Polytechnic Institute of Milan;Politecnico di Milano", "aff_domain": "polimi.it;polimi.it;polimi.it;polimi.it", "position": "PhD student;PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nmontenegro2024lastiterate,\ntitle={Last-Iterate Global Convergence of Policy Gradients for Constrained Reinforcement Learning},\nauthor={Alessandro Montenegro and Marco Mussi and Matteo Papini and Alberto Maria Metelli},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=2vywag2lVC}\n}", "github": "", "reviewers": "zSqV;ZDP8;Vs5i;AKt3", "pdf_size": 1196735, "rating": "4;5;6;7", "confidence": "4;4;4;1", "soundness": "3;2;3;3", "novelty": "2;2;3;3", "presentation": "3;3;3;3", "wc_summary": "54;42;61;78", "wc_strengths": "18;26;87;53", "wc_weaknesses": "251;120;102;8", "wc_questions": "95;6;37;11", "wc_limitations": "11;10;36;11", "wc_review": "429;204;323;161", "wc_reply_reviewers": "129;10;12;13", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.25, 1.299038105676658 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 58.75, 13.026415470113028 ], "wc_strengths_avg": [ 46.0, 26.99073915253156 ], "wc_weaknesses_avg": [ 120.25, 86.64402749180118 ], "wc_questions_avg": [ 37.25, 35.35799061032739 ], "wc_limitations_avg": [ 17.0, 10.977249200050075 ], "wc_review_avg": [ 279.25, 104.86270786127926 ], "wc_reply_reviewers_avg": [ 41.0, 50.81830378908765 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.7745966692414834, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7197060122957034441&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "polimi.it;polimi.it;polimi.it;polimi.it", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Politecnico di Milano;Polytechnic Institute of Milan", "aff_unique_dep": ";", "aff_unique_url": "https://www.polimi.it;https://www.polimi.it/", "aff_unique_abbr": "Polimi;Politecnico di Milano", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Italy" }, { "title": "Exploring Fixed Point in Image Editing: Theoretical Support and Convergence Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96785", "id": "2wMJ4wq4az", "proceeding": "", "pdf": "https://openreview.net/pdf?id=2wMJ4wq4az", "openreview": "https://openreview.net/forum?id=2wMJ4wq4az", "poster": "/media/PosterPDFs/NeurIPS%202024/96785.png?t=1729675614.9461436", "project": "", "author_site": "Chen Hang, Zhe Ma, Haoming Chen, Xuwei Fang, Vincent Xie, Faming Fang, Guixu Zhang, Hongbin Wang", "tldr": "", "abstract": "In image editing, Denoising Diffusion Implicit Models (DDIM) inversion has become a widely adopted method and is extensively used in various image editing approaches. The core concept of DDIM inversion stems from the deterministic sampling technique of DDIM, which allows the DDIM process to be viewed as an Ordinary Differential Equation (ODE) process that is reversible. This enables the prediction of corresponding noise from a reference image, ensuring that the restored image from this noise remains consistent with the reference image. Image editing exploits this property by modifying the cross-attention between text and images to edit specific objects while preserving the remaining regions. However, in the DDIM inversion, using the $t-1$ time step to approximate the noise prediction at time step $t$ introduces errors between the restored image and the reference image. Recent approaches have modeled each step of the DDIM inversion process as finding a fixed-point problem of an implicit function. This approach significantly mitigates the error in the restored image but lacks theoretical support regarding the existence of such fixed points. Therefore, this paper focuses on the study of fixed points in DDIM inversion and provides theoretical support. Based on the obtained theoretical insights, we further optimize the loss function for the convergence of fixed points in the original DDIM inversion, improving the visual quality of the edited image. Finally, we extend the fixed-point based image editing to the application of unsupervised image dehazing, introducing a novel text-based approach for unsupervised dehazing.", "keywords": "fixed point; image editing", "primary_area": "machine_vision", "supplementary_material": "", "author": "chen hang;Zhe Ma;Haoming Chen;Xuwei Fang;Weisheng Xie;Faming Fang;Guixu Zhang;Hongbin Wang", "authorids": "~chen_hang1;~Zhe_Ma4;~Haoming_Chen1;~Xuwei_Fang1;~Weisheng_Xie2;~Faming_Fang1;~Guixu_Zhang1;~Hongbin_Wang3", "gender": "M;F;M;M;;M;M;", "homepage": "https://github.com/hangnima;;https://github.com/chenhaomingbob;https://blog.csdn.net/fxwfxw7037681?spm=1000.2115.3001.5343;;;;", "dblp": "17/6209;22/6672;09/10698;376/1523;84/11508;96/8174;44/7527;98/3467.html", "google_scholar": "N6ymaQEAAAAJ;;axNtqawAAAAJ;;;https://scholar.google.com.hk/citations?user=TSkJe-4AAAAJ;https://scholar.google.com.hk/citations?user=dIfKlYgAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0009-0000-4636-5898;0000-0001-8041-8615;;;;;;", "linkedin": ";;;;;;;", "or_profile": "~chen_hang1;~Zhe_Ma4;~Haoming_Chen1;~Xuwei_Fang1;~Weisheng_Xie2;~Faming_Fang1;~Guixu_Zhang1;~Hongbin_Wang3", "aff": "East China Normal University;Alibaba Group;East China Normal University;Bestpay AI Lab;Bestpay;East China Normal University;East China Normal University;", "aff_domain": "ecnu.edu.cn;alibaba-inc.com;ecnu.edu.cn;bestpay.com.cn;bestpay.com.cn;cs.ecnu.edu.cn;cs.ecnu.edu.cn;", "position": "PhD student;Researcher;PhD student;Researcher;Chief Data Scientist;Full Professor;Full Professor;", "bibtex": "@inproceedings{\nhang2024exploring,\ntitle={Exploring Fixed Point in Image Editing: Theoretical Support and Convergence Optimization},\nauthor={chen hang and Zhe Ma and Haoming Chen and Xuwei Fang and Weisheng Xie and Faming Fang and Guixu Zhang and Hongbin Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=2wMJ4wq4az}\n}", "github": "", "reviewers": "cPeB;Cq8S;5f4S;eqay", "pdf_size": 16788895, "rating": "3;5;5;6", "confidence": "4;5;4;4", "soundness": "2;2;4;3", "novelty": "1;2;3;3", "presentation": "2;3;3;3", "wc_summary": "126;164;53;57", "wc_strengths": "44;62;78;77", "wc_weaknesses": "157;254;71;109", "wc_questions": "21;41;20;46", "wc_limitations": "3;36;15;1", "wc_review": "351;557;237;290", "wc_reply_reviewers": "0;126;0;117", "wc_reply_authors": "0;417;0;245", "reply_reviewers": "0;1;0;1", "reply_authors": "1;2;1;2", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 100.0, 46.98403984333403 ], "wc_strengths_avg": [ 65.25, 13.808964479641476 ], "wc_weaknesses_avg": [ 147.75, 68.49589403752607 ], "wc_questions_avg": [ 32.0, 11.640446726822816 ], "wc_limitations_avg": [ 13.75, 13.91716565971678 ], "wc_review_avg": [ 358.75, 121.35974414936776 ], "wc_reply_reviewers_avg": [ 60.75, 60.833276255680985 ], "wc_reply_authors_avg": [ 165.5, 176.31860366960714 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.13245323570650439, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:HFfDc982oFgJ:scholar.google.com/&scioq=Exploring+Fixed+Point+in+Image+Editing:+Theoretical+Support+and+Convergence+Optimization&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "ecnu.edu.cn;alibaba-inc.com;ecnu.edu.cn;bestpay.com.cn;bestpay.com.cn;cs.ecnu.edu.cn;cs.ecnu.edu.cn;", "author_num": 8, "aff_unique_index": "0;1;0;2;2;0;0", "aff_unique_norm": "East China Normal University;Alibaba Group;Bestpay", "aff_unique_dep": ";;AI Lab", "aff_unique_url": "http://www.ecnu.edu.cn;https://www.alibaba.com;", "aff_unique_abbr": "ECNU;Alibaba;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China;" }, { "title": "Automated Efficient Estimation using Monte Carlo Efficient Influence Functions", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96784", "id": "2wfd3pti8v", "proceeding": "", "pdf": "https://openreview.net/pdf?id=2wfd3pti8v", "openreview": "https://openreview.net/forum?id=2wfd3pti8v", "poster": "", "project": "", "author_site": "Raj Agrawal, Sam Witty, Andy Zane, Elias Bingham", "tldr": "", "abstract": "Many practical problems involve estimating low dimensional statistical quantities with high-dimensional models and datasets. Several approaches address these estimation tasks based on the theory of influence functions, such as debiased/double ML or targeted minimum loss estimation. We introduce \\textit{Monte Carlo Efficient Influence Functions} (MC-EIF), a fully automated technique for approximating efficient influence functions that integrates seamlessly with existing differentiable probabilistic programming systems. MC-EIF automates efficient statistical estimation for a broad class of models and functionals that previously required rigorous custom analysis. We prove that MC-EIF is consistent, and that estimators using MC-EIF achieve optimal $\\sqrt{N}$ convergence rates. We show empirically that estimators using MC-EIF are at parity with estimators using analytic EIFs. Finally, we present a novel capstone example using MC-EIF for optimal portfolio selection.", "keywords": "Efficient influence function;semiparametric statistics;double robustness;automatic differentiation", "primary_area": "causal_inference", "supplementary_material": "/attachment/f55cd22920b5e7ddd95e88855b474b139f0215e8.zip", "author": "Raj Agrawal;Sam Witty;Andy Zane;Eli Bingham", "authorids": "~Raj_Agrawal3;~Sam_Witty1;~Andy_Zane1;~Eli_Bingham1", "gender": "M;M;M;", "homepage": "https://www.linkedin.com/in/raj-agrawal/;https://samwitty.github.io;https://www.linkedin.com/in/azane/;https://pyro.ai/", "dblp": ";232/2073;;228/8342", "google_scholar": ";EUGPPvQAAAAJ;;0uUoiCIAAAAJ", "orcid": ";;;", "linkedin": ";sam-witty-46708572?lipi=urn%3Ali%3Apage%3Ad_flagship3_profile_view_base_contact_details%3BUFPKeKp5RL2NZzczu1HK8Q%3D%3D;;", "or_profile": "~Raj_Agrawal3;~Sam_Witty1;~Andy_Zane1;~Eli_Bingham1", "aff": ";Basis Research Institute;University of Massachusetts at Amherst;Broad Institute", "aff_domain": ";basis.ai;umass.edu;broadinstitute.org", "position": ";Researcher;PhD student;Machine Learning Fellow", "bibtex": "@inproceedings{\nagrawal2024automated,\ntitle={Automated Efficient Estimation using Monte Carlo Efficient Influence Functions},\nauthor={Raj Agrawal and Sam Witty and Andy Zane and Eli Bingham},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=2wfd3pti8v}\n}", "github": "", "reviewers": "ebo4;hhJt;cWd8", "pdf_size": 987682, "rating": "6;7;8", "confidence": "2;1;4", "soundness": "3;3;3", "novelty": "3;3;4", "presentation": "3;2;4", "wc_summary": "53;91;41", "wc_strengths": "114;39;74", "wc_weaknesses": "45;88;74", "wc_questions": "123;21;75", "wc_limitations": "28;16;55", "wc_review": "363;255;319", "wc_reply_reviewers": "46;0;64", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;0;1", "reply_authors": "1;1;1", "rating_avg": [ 7.0, 0.816496580927726 ], "confidence_avg": [ 2.3333333333333335, 1.247219128924647 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 61.666666666666664, 21.31248981752771 ], "wc_strengths_avg": [ 75.66666666666667, 30.641293851417057 ], "wc_weaknesses_avg": [ 69.0, 17.90716802475106 ], "wc_questions_avg": [ 73.0, 41.66533331199932 ], "wc_limitations_avg": [ 33.0, 16.30950643030009 ], "wc_review_avg": [ 312.3333333333333, 44.34210439550904 ], "wc_reply_reviewers_avg": [ 36.666666666666664, 26.948510575210314 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.6546536707079772, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5016725245694948282&as_sdt=4005&sciodt=0,6&hl=en", "gs_version_total": 4, "email": ";basis.ai;umass.edu;broadinstitute.org", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "Basis Research Institute;University of Massachusetts Amherst;Broad Institute", "aff_unique_dep": ";;", "aff_unique_url": ";https://www.umass.edu;https://www.broadinstitute.org", "aff_unique_abbr": ";UMass Amherst;Broad", "aff_campus_unique_index": "1", "aff_campus_unique": ";Amherst", "aff_country_unique_index": "1;1", "aff_country_unique": ";United States" }, { "title": "Bridging semantics and pragmatics in information-theoretic emergent communication", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96783", "id": "2wlNnIqCb7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=2wlNnIqCb7", "openreview": "https://openreview.net/forum?id=2wlNnIqCb7", "poster": "/media/PosterPDFs/NeurIPS%202024/96783.png?t=1731706386.2244647", "project": "", "author_site": "Eleonora Gualdoni, Mycal Tucker, Roger Levy, Noga Zaslavsky", "tldr": "", "abstract": "Human languages support both semantic categorization and local pragmatic interactions that require context-sensitive reasoning about meaning. While semantics and pragmatics are two fundamental aspects of language, they are typically studied independently and their co-evolution is largely under-explored. Here, we aim to bridge this gap by studying how a shared lexicon may emerge from local pragmatic interactions. To this end, we extend a recent information-theoretic framework for emergent communication in artificial agents, which integrates utility maximization, associated with pragmatics, with general communicative constraints that are believed to shape human semantic systems. Specifically, we show how to adapt this framework to train agents via unsupervised pragmatic interactions, and then evaluate their emergent lexical semantics. We test this approach in a rich visual domain of naturalistic images, and find that key human-like properties of the lexicon emerge when agents are guided by both context-specific utility and general communicative pressures, suggesting that both aspects are crucial for understanding how language may evolve in humans and in artificial agents.", "keywords": "semantics;pragmatics;emergent communication;information theory;artificial agents", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "", "author": "Eleonora Gualdoni;Mycal Tucker;Roger P. Levy;Noga Zaslavsky", "authorids": "~Eleonora_Gualdoni1;~Mycal_Tucker1;~Roger_P._Levy1;~Noga_Zaslavsky1", "gender": "F;M;F;M", "homepage": ";http://mycaltucker.com;https://www.nogsky.com;http://www.mit.edu/~rplevy", "dblp": "278/2062;256/5146;160/8830;23/90", "google_scholar": "gQnUjiMAAAAJ;V1kgcxIAAAAJ;VdYiwjwAAAAJ;i86O0SAAAAAJ", "orcid": "0000-0002-7423-493X;;0000-0003-3941-3518;0000-0002-4493-8864", "linkedin": ";;;roger-levy-502a6011/", "or_profile": "~Eleonora_Gualdoni1;~Mycal_Tucker1;~Noga_Zaslavsky1;~Roger_Levy1", "aff": "Apple;Massachusetts Institute of Technology;University of California, Irvine;Massachusetts Institute of Technology", "aff_domain": "apple.com;mit.edu;uci.edu;mit.edu", "position": "Researcher;PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\ngualdoni2024bridging,\ntitle={Bridging semantics and pragmatics in information-theoretic emergent communication},\nauthor={Eleonora Gualdoni and Mycal Tucker and Roger P. Levy and Noga Zaslavsky},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=2wlNnIqCb7}\n}", "github": "", "reviewers": "zhL9;F8cg;CiKA;Cm9N", "pdf_size": 1568192, "rating": "3;5;7;7", "confidence": "4;2;3;4", "soundness": "2;3;3;4", "novelty": "2;3;3;3", "presentation": "3;4;3;3", "wc_summary": "93;100;52;75", "wc_strengths": "36;48;52;198", "wc_weaknesses": "281;159;6;129", "wc_questions": "202;32;65;275", "wc_limitations": "41;1;4;1", "wc_review": "653;340;179;678", "wc_reply_reviewers": "764;0;5;51", "wc_reply_authors": "1994;0;0;254", "reply_reviewers": "2;0;1;1", "reply_authors": "4;1;1;2", "rating_avg": [ 5.5, 1.6583123951777 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 80.0, 18.560711193270585 ], "wc_strengths_avg": [ 83.5, 66.3682906213502 ], "wc_weaknesses_avg": [ 143.75, 97.80433272611188 ], "wc_questions_avg": [ 143.5, 99.13248710690154 ], "wc_limitations_avg": [ 11.75, 16.931848688197046 ], "wc_review_avg": [ 462.5, 211.01480990679303 ], "wc_reply_reviewers_avg": [ 205.0, 323.350429101308 ], "wc_reply_authors_avg": [ 562.0, 833.2430617772943 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.0909090909090909, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13901575487505007998&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 5, "email": "apple.com;mit.edu;uci.edu;mit.edu", "author_num": 4, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "Apple;Massachusetts Institute of Technology;University of California, Irvine", "aff_unique_dep": "Apple Inc.;;", "aff_unique_url": "https://www.apple.com;https://web.mit.edu;https://www.uci.edu", "aff_unique_abbr": "Apple;MIT;UCI", "aff_campus_unique_index": "1", "aff_campus_unique": ";Irvine", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Generative Retrieval Meets Multi-Graded Relevance", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96782", "id": "2xTkeyJFJb", "proceeding": "", "pdf": "https://openreview.net/pdf?id=2xTkeyJFJb", "openreview": "https://openreview.net/forum?id=2xTkeyJFJb", "poster": "", "project": "", "author_site": "Yubao Tang, Ruqing Zhang, Jiafeng Guo, Maarten Rijke, Wei Chen, Xueqi Cheng", "tldr": "", "abstract": "Generative retrieval represents a novel approach to information retrieval, utilizing an encoder-decoder architecture to directly produce relevant document identifiers (docids) for queries. While this method offers benefits, current implementations are limited to scenarios with binary relevance data, overlooking the potential for documents to have multi-graded relevance. Extending generative retrieval to accommodate multi-graded relevance poses challenges, including the need to reconcile likelihood probabilities for docid pairs and the possibility of multiple relevant documents sharing the same identifier. To address these challenges, we introduce a new framework called GRaded Generative Retrieval (GR$^2$). Our approach focuses on two key components: ensuring relevant and distinct identifiers, and implementing multi-graded constrained contrastive training. Firstly, we aim to create identifiers that are both semantically relevant and sufficiently distinct to represent individual documents effectively. This is achieved by jointly optimizing the relevance and distinctness of docids through a combination of docid generation and autoencoder models. Secondly, we incorporate information about the relationship between relevance grades to guide the training process. Specifically, we leverage a constrained contrastive training strategy to bring the representations of queries and the identifiers of their relevant documents closer together, based on their respective relevance grades.Extensive experiments on datasets with both multi-graded and binary relevance demonstrate the effectiveness of our method.", "keywords": "Document retrieval;Multi-graded relevance;Generative models", "primary_area": "generative_models", "supplementary_material": "", "author": "Yubao Tang;Ruqing Zhang;Jiafeng Guo;Maarten de Rijke;Wei Chen;Xueqi Cheng", "authorids": "~Yubao_Tang1;~Ruqing_Zhang3;~Jiafeng_Guo1;~Maarten_de_Rijke1;~Wei_Chen1;~Xueqi_Cheng1", "gender": "Not Specified;F;M;;F;M", "homepage": "https://yubaotang11.github.io/;https://daqingchong.github.io/;http://www.bigdatalab.ac.cn/gjf/;https://staff.fnwi.uva.nl/m.derijke/;https://weichen-cas.github.io/;https://people.ucas.ac.cn/~cxq?language=en", "dblp": "299/6533;;02/146;r/MdRijke;;44/912", "google_scholar": "b4ZWr4oAAAAJ;qwdqaO4AAAAJ;https://scholar.google.com/citations?view_op=list_works;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;hY8aLqAAAAAJ", "orcid": "0009-0003-8010-3404;;;0000-0002-1086-0202;;", "linkedin": ";;;;;", "or_profile": "~Yubao_Tang1;~Ruqing_Zhang3;~Jiafeng_Guo1;~Maarten_de_Rijke1;~Wei_Chen1;~Xueqi_Cheng1", "aff": "University of Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technolgy, Chinese Academy of Sciences;University of Amsterdam; Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy", "aff_domain": "ucas.ac.cn;ict.ac.cn;ict.ac.cn;uva.nl;ict.ac.cn;ict.ac.cn", "position": "PhD student;Associate Professor;Researcher;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\ntang2024generative,\ntitle={Generative Retrieval Meets Multi-Graded Relevance},\nauthor={Yubao Tang and Ruqing Zhang and Jiafeng Guo and Maarten de Rijke and Wei Chen and Xueqi Cheng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=2xTkeyJFJb}\n}", "github": "", "reviewers": "UkCg;KSwY;xXNY;VoH6", "pdf_size": 1095706, "rating": "5;7;7;7", "confidence": "4;3;3;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;2;3;4", "wc_summary": "200;142;107;63", "wc_strengths": "25;66;124;51", "wc_weaknesses": "144;40;277;47", "wc_questions": "1;120;17;43", "wc_limitations": "1;55;22;1", "wc_review": "371;423;547;205", "wc_reply_reviewers": "7;7;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 128.0, 50.114868053303304 ], "wc_strengths_avg": [ 66.5, 36.29393888791901 ], "wc_weaknesses_avg": [ 127.0, 95.861879806313 ], "wc_questions_avg": [ 45.25, 45.68574723039998 ], "wc_limitations_avg": [ 19.75, 22.083647796503186 ], "wc_review_avg": [ 386.5, 122.75483697190917 ], "wc_reply_reviewers_avg": [ 3.5, 3.5 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17559742780978501745&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "ucas.ac.cn;ict.ac.cn;ict.ac.cn;uva.nl;ict.ac.cn;ict.ac.cn", "author_num": 6, "aff_unique_index": "0;1;1;2;1;1", "aff_unique_norm": "University of Chinese Academy of Sciences;Chinese Academy of Sciences;University of Amsterdam", "aff_unique_dep": ";Institute of Computing Technology;", "aff_unique_url": "http://www.ucas.ac.cn;http://www.ict.ac.cn;https://www.uva.nl", "aff_unique_abbr": "UCAS;CAS;UvA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;0", "aff_country_unique": "China;Netherlands" }, { "title": "Compact Proofs of Model Performance via Mechanistic Interpretability", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96781", "id": "2zWbzx50mH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=2zWbzx50mH", "openreview": "https://openreview.net/forum?id=2zWbzx50mH", "poster": "/media/PosterPDFs/NeurIPS%202024/96781.png?t=1733811542.72151", "project": "", "author_site": "Jason Gross, Rajashree Agrawal, Thomas Kwa, Euan Ong, Chun Hei Yip, Alex Gibson, Soufiane Noubir, Lawrence Chan", "tldr": "", "abstract": "We propose using mechanistic interpretability -- techniques for reverse engineering model weights into human-interpretable algorithms -- to derive and compactly prove formal guarantees on model performance.\nWe prototype this approach by formally proving accuracy lower bounds for a small transformer trained on Max-of-$K$, validating proof transferability across 151 random seeds and four values of $K$.\nWe create 102 different computer-assisted proof strategies and assess their length and tightness of bound on each of our models.\nUsing quantitative metrics, we find that shorter proofs seem to require and provide more mechanistic understanding.\nMoreover, we find that more faithful mechanistic understanding leads to tighter performance bounds.\nWe confirm these connections by qualitatively examining a subset of our proofs.\nFinally, we identify compounding structureless errors as a key challenge for using mechanistic interpretability to generate compact proofs on model performance.", "keywords": "mechanistic interpretability;verification;proof;guarantees;interpretability", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Jason Gross;Rajashree Agrawal;Thomas Kwa;Euan Ong;Chun Hei Yip;Alex Gibson;Soufiane Noubir;Lawrence Chan", "authorids": "~Jason_Gross2;~Rajashree_Agrawal1;~Thomas_Kwa1;~Euan_Ong1;~Chun_Hei_Yip1;~Alex_Gibson1;~Soufiane_Noubir1;~Lawrence_Chan2", "gender": "M;F;;M;M;M;M;M", "homepage": "https://jasongross.github.io/;https://www.monsoonmath.org/;;;;;;https://chanlawrence.me/", "dblp": "140/7502;346/4882.html;;;;38/6695;;28/2626", "google_scholar": "QouPlrMAAAAJ;41ZE3NwAAAAJ;;;;;;https://scholar.google.com/citations?view_op=list_works", "orcid": "0000-0002-9427-4891;;;;;;0009-0007-1078-0353;", "linkedin": ";;tkwa/;euanong/;sherman-yip-4625a2201/;alex-gibson-78b529229/;;", "or_profile": "~Jason_Gross2;~Rajashree_Agrawal1;~Thomas_Kwa1;~Euan_Ong1;~Chun_Hei_Yip1;~Alex_Gibson1;~Soufiane_Noubir1;~Lawrence_Chan2", "aff": "Independent;Reed College;Model Evaluation and Threat Research;;University of Cambridge;University of Cambridge;University of Cambridge;University of California, Berkeley", "aff_domain": "gmail.com;reed.edu;metr.org;;cam.ac.uk;cam.ac.uk;trin.cam.ac.uk;berkeley.edu", "position": "Researcher;Undergrad student;Researcher;;Undergrad student;Undergrad student;Undergrad student;PhD student", "bibtex": "@inproceedings{\ngross2024compact,\ntitle={Compact Proofs of Model Performance via Mechanistic Interpretability},\nauthor={Jason Gross and Rajashree Agrawal and Thomas Kwa and Euan Ong and Chun Hei Yip and Alex Gibson and Soufiane Noubir and Lawrence Chan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=2zWbzx50mH}\n}", "github": "", "reviewers": "ZhNY;vjrV;exsQ;KZFC", "pdf_size": 6061435, "rating": "6;6;6;7", "confidence": "2;4;2;4", "soundness": "3;3;3;4", "novelty": "2;3;2;4", "presentation": "2;1;2;4", "wc_summary": "221;68;141;327", "wc_strengths": "110;103;89;118", "wc_weaknesses": "194;403;205;21", "wc_questions": "58;434;13;119", "wc_limitations": "10;52;7;59", "wc_review": "593;1060;455;644", "wc_reply_reviewers": "66;473;20;50", "wc_reply_authors": "15;1651;0;0", "reply_reviewers": "1;2;1;1", "reply_authors": "2;4;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.0, 1.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.25, 1.0897247358851685 ], "wc_summary_avg": [ 189.25, 96.19348990446287 ], "wc_strengths_avg": [ 105.0, 10.653637876331258 ], "wc_weaknesses_avg": [ 205.75, 135.2578555944164 ], "wc_questions_avg": [ 156.0, 164.85296478983932 ], "wc_limitations_avg": [ 32.0, 23.65375234502974 ], "wc_review_avg": [ 688.0, 225.62912046099015 ], "wc_reply_reviewers_avg": [ 152.25, 185.91984159846953 ], "wc_reply_authors_avg": [ 416.5, 712.7652137976432 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15156568128773318186&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 3, "email": "gmail.com;reed.edu;metr.org;;cam.ac.uk;cam.ac.uk;trin.cam.ac.uk;berkeley.edu", "author_num": 8, "aff_unique_index": "0;1;3;3;3;4", "aff_unique_norm": "Independent;Reed College;;University of Cambridge;University of California, Berkeley", "aff_unique_dep": ";;;;", "aff_unique_url": ";https://www.reed.edu;;https://www.cam.ac.uk;https://www.berkeley.edu", "aff_unique_abbr": ";Reed;;Cambridge;UC Berkeley", "aff_campus_unique_index": "1;1;1;2", "aff_campus_unique": ";Cambridge;Berkeley", "aff_country_unique_index": "1;2;2;2;1", "aff_country_unique": ";United States;United Kingdom" }, { "title": "Optimal Scalarizations for Sublinear Hypervolume Regret", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96780", "id": "30NS22tgCW", "proceeding": "", "pdf": "https://openreview.net/pdf?id=30NS22tgCW", "openreview": "https://openreview.net/forum?id=30NS22tgCW", "poster": "/media/PosterPDFs/NeurIPS%202024/96780.png?t=1733897513.9723651", "project": "", "author_site": "Qiuyi (Richard) Zhang", "tldr": "", "abstract": "Scalarization is a general, parallizable technique that can be deployed in any multiobjective setting to reduce multiple objectives into one, yet some have dismissed this versatile approach because linear scalarizations cannot explore concave regions of the Pareto frontier. To that end, we aim to find simple non-linear scalarizations that provably explore a diverse set of $k$ objectives on the Pareto frontier, as measured by the dominated hypervolume. We show that hypervolume scalarizations with uniformly random weights achieves an optimal sublinear hypervolume regret bound of $O(T^{-1/k})$, with matching lower bounds that preclude any algorithm from doing better asymptotically. For the setting of multiobjective stochastic linear bandits, we utilize properties of hypervolume scalarizations to derive a novel non-Euclidean analysis to get regret bounds of $\\tilde{O}( d T^{-1/2} + T^{-1/k})$, removing unnecessary $\\text{poly}(k)$ dependencies. We support our theory with strong empirical performance of using non-linear scalarizations that outperforms both their linear counterparts and other standard multiobjective algorithms in a variety of natural settings.", "keywords": "multiobjective optimization;scalarization;hypervolume", "primary_area": "optimization", "supplementary_material": "", "author": "Qiuyi Zhang", "authorids": "~Qiuyi_Zhang1", "gender": "M", "homepage": "https://qiuyiz.github.io", "dblp": "133/8559", "google_scholar": "mE11hO8AAAAJ", "orcid": "", "linkedin": "", "or_profile": "~Qiuyi_Zhang1", "aff": "Google", "aff_domain": "google.com", "position": "Researcher", "bibtex": "@inproceedings{\nzhang2024optimal,\ntitle={Optimal Scalarizations for Sublinear Hypervolume Regret},\nauthor={Qiuyi Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=30NS22tgCW}\n}", "github": "", "reviewers": "LEgt;ooQr;3k2M;9Trh", "pdf_size": 1648388, "rating": "5;6;6;7", "confidence": "2;3;2;2", "soundness": "2;3;2;3", "novelty": "2;2;3;3", "presentation": "2;3;3;2", "wc_summary": "70;46;148;153", "wc_strengths": "12;22;31;134", "wc_weaknesses": "109;246;1;5", "wc_questions": "3;10;1;113", "wc_limitations": "8;2;25;9", "wc_review": "202;326;206;414", "wc_reply_reviewers": "11;45;0;10", "wc_reply_authors": "0;53;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;2;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 2.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 104.25, 47.05515380912063 ], "wc_strengths_avg": [ 49.75, 49.10384404504397 ], "wc_weaknesses_avg": [ 90.25, 99.80324393525493 ], "wc_questions_avg": [ 31.75, 47.02858173494072 ], "wc_limitations_avg": [ 11.0, 8.514693182963201 ], "wc_review_avg": [ 287.0, 88.65100112237876 ], "wc_reply_reviewers_avg": [ 16.5, 17.00735135169495 ], "wc_reply_authors_avg": [ 13.25, 22.949673200287624 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:v97TD8L6uX0J:scholar.google.com/&scioq=Optimal+Scalarizations+for+Sublinear+Hypervolume+Regret&hl=en&as_sdt=0,10", "gs_version_total": 4, "email": "google.com", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "EffiBench: Benchmarking the Efficiency of Automatically Generated Code", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97864", "id": "30XanJanJP", "proceeding": "", "pdf": "https://openreview.net/pdf?id=30XanJanJP", "openreview": "https://openreview.net/forum?id=30XanJanJP", "poster": "", "project": "", "author_site": "Dong HUANG, Yuhao QING, Weiyi Shang, Heming Cui, Jie Zhang", "tldr": "", "abstract": "Code generation models have increasingly become integral to aiding software development. Although current research has thoroughly examined the correctness of the code produced by code generation models, a vital aspect that plays a pivotal role in green\ncomputing and sustainability efforts \u2014 the efficiency of the generated code \u2014 has often been neglected. This paper presents Effibench, a benchmark with 1,000 efficiency-critical coding problems to assess the efficiency of code generated by code generation models. \nEffiBench contains a diverse set of LeetCode coding problems. Each problem is paired with an executable human-written canonical solution, which obtains the SOTA efficiency on the LeetCode solution leaderboard. With EffiBench, we empirically examine the ability of 42 large language models (35 open-source and 7 closed-source) to generate efficient code. Our evaluation results demonstrate that the efficiency of the code generated by LLMs is generally worse than the efficiency of human-written canonical solutions. For example, GPT-4 generated code has an average \\textbf{3.12} times execution time that of the human-written canonical solutions. In the most extreme cases, the execution time and total memory usage of GPT-4 code are \\textbf{13.89} and \\textbf{43.92} times that of the canonical solutions. The source code of EffiBench is released on https://github.com/huangd1999/EffiBench. We also provide the LeaderBoard in https://huggingface.co/spaces/EffiBench/effibench-leaderboard.", "keywords": "code generation;efficiency", "primary_area": "", "supplementary_material": "", "author": "Dong HUANG;Yuhao QING;Weiyi Shang;Heming Cui;Jie Zhang", "authorids": "~Dong_HUANG4;~Yuhao_QING1;~Weiyi_Shang2;~Heming_Cui1;~Jie_Zhang21", "gender": "M;;;M;F", "homepage": "https://huangd1999.github.io/;;https://ece.uwaterloo.ca/~wshang/;https://www.cs.hku.hk/people/academic-staff/heming;https://sites.google.com/view/jie-zhang", "dblp": "94/3756-5.html;;;59/5565.html;84/6889-50", "google_scholar": "UER9hrAAAAAJ;https://scholar.google.com/citations?view_op=list_works;;lW9bpFIAAAAJ;rPWRqf8AAAAJ", "orcid": ";;;0000-0001-7746-440X;", "linkedin": ";;;;jie-zhang-5326aa187/", "or_profile": "~Dong_HUANG4;~Yuhao_QING1;~Weiyi_Shang2;~Heming_Cui1;~Jie_Zhang21", "aff": "The University of Hong Kong;The University of Hong Kong;University of Waterloo;the University of Hong Kong, University of Hong Kong;King's College London, University of London", "aff_domain": "cs.hku.hk;hku.hk;uwaterloo.ca;cs.hku.hk;kcl.ac.uk", "position": "PhD student;PhD student;Associate Professor;Associate Professor;Lecturer", "bibtex": "@inproceedings{\nhuang2024effibench,\ntitle={EffiBench: Benchmarking the Efficiency of Automatically Generated Code},\nauthor={Dong HUANG and Yuhao QING and Weiyi Shang and Heming Cui and Jie Zhang},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=30XanJanJP}\n}", "github": "", "reviewers": "ufVm;CNU4;s8Hy", "pdf_size": 512156, "rating": "6;7;7", "confidence": "4;4;3", "wc_summary_and_contributions": "108;46;30", "wc_strengths": "32;26;54", "wc_improvement": "119;70;119", "wc_limitations": "1;1;9", "wc_correctness": "1;1;1", "wc_clarity": "1;1;1", "wc_relation_to_prior_work": "1;1;1", "wc_documentation": "1;1;1", "wc_additional_feedback": "1;1;1", "wc_review": "265;148;217", "wc_reply_reviewers": "195;0;14", "wc_reply_authors": "900;125;21", "reply_reviewers": "2;0;1", "reply_authors": "4;3;2", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 61.333333333333336, 33.6386021641143 ], "wc_strengths_avg": [ 37.333333333333336, 12.036980056845191 ], "wc_improvement_avg": [ 102.66666666666667, 23.098821518760552 ], "wc_limitations_avg": [ 3.6666666666666665, 3.7712361663282534 ], "wc_correctness_avg": [ 1.0, 0.0 ], "wc_clarity_avg": [ 1.0, 0.0 ], "wc_relation_to_prior_work_avg": [ 1.0, 0.0 ], "wc_documentation_avg": [ 1.0, 0.0 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 210.0, 48.02082881417188 ], "wc_reply_reviewers_avg": [ 69.66666666666667, 88.8081577834429 ], "wc_reply_authors_avg": [ 348.6666666666667, 392.15671470585437 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 3.0, 0.816496580927726 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7074371986090479959&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "cs.hku.hk;hku.hk;uwaterloo.ca;cs.hku.hk;kcl.ac.uk", "author_num": 5, "aff_unique_index": "0;0;1;0;2", "aff_unique_norm": "University of Hong Kong;University of Waterloo;King's College London", "aff_unique_dep": ";;", "aff_unique_url": "https://www.hku.hk;https://uwaterloo.ca;https://www.kcl.ac.uk", "aff_unique_abbr": "HKU;UW;KCL", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;1;0;2", "aff_country_unique": "China;Canada;United Kingdom" }, { "title": "Instance-adaptive Zero-shot Chain-of-Thought Prompting", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96779", "id": "31xWlIdxTm", "proceeding": "", "pdf": "https://openreview.net/pdf?id=31xWlIdxTm", "openreview": "https://openreview.net/forum?id=31xWlIdxTm", "poster": "/media/PosterPDFs/NeurIPS%202024/96779.png?t=1730525369.780963", "project": "", "author_site": "Xiaosong Yuan, Chen Shen, Shaotian Yan, Xiaofeng Zhang, Liang Xie, Wenxiao Wang, Renchu Guan, Ying Wang, Jieping Ye", "tldr": "", "abstract": "Zero-shot Chain-of-Thought (CoT) prompting emerges as a simple and effective strategy for enhancing the performance of large language models (LLMs) in real-world reasoning tasks. Nonetheless, the efficacy of a singular, task-level prompt uniformly applied across the whole of instances is inherently limited since one prompt cannot be a good partner for all, a more appropriate approach should consider the interaction between the prompt and each instance meticulously. This work introduces an instance-adaptive prompting algorithm as an alternative zero-shot CoT reasoning scheme by adaptively differentiating good and bad prompts. Concretely, we first employ analysis on LLMs through the lens of information flow to detect the mechanism under zero-shot CoT reasoning, in which we discover that information flows from question to prompt and question to rationale jointly influence the reasoning results most. We notice that a better zero-shot CoT reasoning needs the prompt to obtain semantic information from the question then the rationale aggregates sufficient information from the question directly and via the prompt indirectly. On the contrary, lacking any of those would probably lead to a bad one. Stem from that, we further propose an instance-adaptive prompting strategy (IAP) for zero-shot CoT reasoning. Experiments conducted with LLaMA-2, LLaMA-3, and Qwen on math, logic, and commonsense reasoning tasks (e.g., GSM8K, MMLU, Causal Judgement) obtain consistent improvement, demonstrating that the instance-adaptive zero-shot CoT prompting performs better than other task-level methods with some curated prompts or sophisticated procedures, showing the significance of our findings in the zero-shot CoT reasoning mechanism.", "keywords": "Large Language Models;Chain-of-Thought Reasoning;Instance-adaptive;Zero-shot", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Xiaosong Yuan;Chen Shen;Shaotian Yan;Xiaofeng Zhang;Liang Xie;Wenxiao Wang;Renchu Guan;Ying Wang;Jieping Ye", "authorids": "~Xiaosong_Yuan1;~Chen_Shen7;~Shaotian_Yan1;~Xiaofeng_Zhang8;~Liang_Xie3;~Wenxiao_Wang2;~Renchu_Guan1;~Ying_Wang13;~Jieping_Ye4", "gender": "M;M;M;M;M;M;M;F;M", "homepage": ";;;https://github.com/zhangbaijin;https://www.linkedin.com/in/%E4%BA%AE-%E8%B0%A2-254928160/;https://wenxiaowang.com;https://ccst.jlu.edu.cn/info/1322/17817.htm;https://ccst.jlu.edu.cn/info/1367/19675.htm;http://yelabs.net/", "dblp": "25/2886;55/5393-3;274/1197;322/8929;81/2806-3;243/5853-1;84/8179;94/3104-9.html;03/5454", "google_scholar": "-Fg_EuEAAAAJ;b6vn1uMAAAAJ;sBhbb2wAAAAJ;https://scholar.google.co.jp/citations?hl=zh-CN;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.hk/citations?user=rcxOjikAAAAJ;U8Zhze4AAAAJ;;T9AzhwcAAAAJ", "orcid": "0000-0001-5748-5174;;;0000-0002-7185-4682;0000-0002-7604-1410;;0000-0002-7162-7826;0000-0002-3288-5195;0000-0001-8662-5818", "linkedin": ";;;;%E4%BA%AE-%E8%B0%A2-254928160/;;;;", "or_profile": "~Xiaosong_Yuan1;~Chen_Shen7;~Shaotian_Yan1;~Xiaofeng_Zhang8;~Liang_Xie3;~Wenxiao_Wang2;~Renchu_Guan1;~Ying_Wang13;~Jieping_Ye4", "aff": "Jilin University;Alibaba Group;Alibaba Group;Shanghai Jiaotong University;Zhejiang University of Technology;Zhejiang University;Jilin University;Jilin University;Alibaba Group", "aff_domain": "jlu.edu.cn;alibaba-inc.com;alibaba-inc.com;sjtu.edu.cn;zjut.edu.cn;zju.edu.cn;jlu.edu.cn;jlu.edu.cn;alibaba-inc.com", "position": "PhD student;Researcher;Researcher;PhD student;Postdoc;Assistant Professor;Full Professor;Full Professor;Principal Researcher", "bibtex": "@inproceedings{\nyuan2024instanceadaptive,\ntitle={Instance-adaptive Zero-shot Chain-of-Thought Prompting},\nauthor={Xiaosong Yuan and Chen Shen and Shaotian Yan and Xiaofeng Zhang and Liang Xie and Wenxiao Wang and Renchu Guan and Ying Wang and Jieping Ye},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=31xWlIdxTm}\n}", "github": "", "reviewers": "43EH;WuYd;dc7a;UGUn", "pdf_size": 1616471, "rating": "5;6;7;8", "confidence": "4;4;4;4", "soundness": "3;3;3;4", "novelty": "3;3;2;3", "presentation": "3;2;2;4", "wc_summary": "105;88;95;74", "wc_strengths": "140;26;61;87", "wc_weaknesses": "272;289;51;35", "wc_questions": "43;1;45;93", "wc_limitations": "8;1;16;10", "wc_review": "568;405;268;299", "wc_reply_reviewers": "101;200;17;60", "wc_reply_authors": "836;915;25;28", "reply_reviewers": "2;2;1;1", "reply_authors": "3;3;2;2", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 90.5, 11.280514172678478 ], "wc_strengths_avg": [ 78.5, 41.58425182686349 ], "wc_weaknesses_avg": [ 161.75, 119.03649650422345 ], "wc_questions_avg": [ 45.5, 32.56915718897251 ], "wc_limitations_avg": [ 8.75, 5.356071321407137 ], "wc_review_avg": [ 385.0, 117.23267462614679 ], "wc_reply_reviewers_avg": [ 94.5, 67.7661419884591 ], "wc_reply_authors_avg": [ 451.0, 425.41920502017774 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11685120721014532587&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "jlu.edu.cn;alibaba-inc.com;alibaba-inc.com;sjtu.edu.cn;zjut.edu.cn;zju.edu.cn;jlu.edu.cn;jlu.edu.cn;alibaba-inc.com", "author_num": 9, "aff_unique_index": "0;1;1;2;3;4;0;0;1", "aff_unique_norm": "Jilin University;Alibaba Group;Shanghai Jiao Tong University;Zhejiang University of Technology;Zhejiang University", "aff_unique_dep": ";;;;", "aff_unique_url": "http://www.jlu.edu.cn;https://www.alibaba.com;https://www.sjtu.edu.cn;https://www.zjut.edu.cn;https://www.zju.edu.cn", "aff_unique_abbr": "JLU;Alibaba;SJTU;ZJUT;ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "How Does Variance Shape the Regret in Contextual Bandits?", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96778", "id": "32Z3nfCnwa", "proceeding": "", "pdf": "https://openreview.net/pdf?id=32Z3nfCnwa", "openreview": "https://openreview.net/forum?id=32Z3nfCnwa", "poster": "", "project": "", "author_site": "Zeyu Jia, Jian Qian, Alexander Rakhlin, Chen-Yu Wei", "tldr": "", "abstract": "We consider realizable contextual bandits with general function approximation, investigating how small reward variance can lead to better-than-minimax regret bounds. Unlike in minimax regret bounds, we show that the eluder dimension \n$d_{\\text{elu}}$$-$a measure of the complexity of the function class$-$plays a crucial role in variance-dependent bounds. We consider two types of adversary: \n\n(1) Weak adversary: The adversary sets the reward variance before observing the learner's action. In this setting, we prove that a regret of $\\Omega( \\sqrt{ \\min (A, d_{\\text{elu}}) \\Lambda } + d_{\\text{elu}} )$ is unavoidable when $d_{\\text{elu}} \\leq \\sqrt{A T}$, where $A$ is the number of actions, $T$ is the total number of rounds, and $\\Lambda$ is the total variance over $T$ rounds. For the $A\\leq d_{\\text{elu}}$ regime, we derive a nearly matching upper bound $\\tilde{O}( \\sqrt{ A\\Lambda } + d_{\\text{elu} } )$ for the special case where the variance is revealed at the beginning of each round. \n\n(2) Strong adversary: The adversary sets the reward variance after observing the learner's action. We show that a regret of $\\Omega( \\sqrt{ d_{\\text{elu}} \\Lambda } + d_{\\text{elu}} )$ is unavoidable when $\\sqrt{ d_{\\text{elu}} \\Lambda } + d_{\\text{elu}} \\leq \\sqrt{A T}$. In this setting, we provide an upper bound of order $\\tilde{O}( d_{\\text{elu}}\\sqrt{ \\Lambda } + d_{\\text{elu}} )$.\n\nFurthermore, we examine the setting where the function class additionally provides distributional information of the reward, as studied by Wang et al. (2024). We demonstrate that the regret bound \n$\\tilde{O}(\\sqrt{d_{\\text{elu}} \\Lambda} + d_{\\text{elu}})$ established in their work is unimprovable when $\\sqrt{d_{\\text{elu}} \\Lambda} + d_{\\text{elu}}\\leq \\sqrt{AT}$. However, with a slightly different definition of the total variance and with the assumption that the reward follows a Gaussian distribution, one can achieve a regret of $\\tilde{O}(\\sqrt{A\\Lambda} + d_{\\text{elu}})$.", "keywords": "contextual bandit;regret analysis", "primary_area": "bandits", "supplementary_material": "", "author": "Zeyu Jia;Jian Qian;Alexander Rakhlin;Chen-Yu Wei", "authorids": "~Zeyu_Jia1;~Jian_Qian2;~Alexander_Rakhlin1;~Chen-Yu_Wei1", "gender": "M;;M;M", "homepage": "https://www.mit.edu/~zyjia/;https://sites.google.com/view/jianqian/about;http://www.mit.edu/~rakhlin/;https://bahh723.github.io/", "dblp": ";;59/407;183/1729", "google_scholar": "8TkJbjgAAAAJ;;https://scholar.google.com.tw/citations?user=fds2VpgAAAAJ;2L2cR-kAAAAJ", "orcid": ";;;", "linkedin": ";jianQ/;;", "or_profile": "~Zeyu_Jia1;~Jian_Qian2;~Alexander_Rakhlin1;~Chen-Yu_Wei1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;University of Virginia, Charlottesville", "aff_domain": "mit.edu;mit.edu;mit.edu;virginia.edu", "position": "PhD student;PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\njia2024how,\ntitle={How Does Variance Shape the Regret in Contextual Bandits?},\nauthor={Zeyu Jia and Jian Qian and Alexander Rakhlin and Chen-Yu Wei},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=32Z3nfCnwa}\n}", "github": "", "reviewers": "CuA6;u4Bf;W3F3;aAKW", "pdf_size": 608340, "rating": "5;6;7;7", "confidence": "3;3;4;3", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "2;4;3;3", "wc_summary": "159;97;81;87", "wc_strengths": "44;84;62;89", "wc_weaknesses": "243;4;308;115", "wc_questions": "45;177;38;7", "wc_limitations": "5;1;11;4", "wc_review": "496;363;500;302", "wc_reply_reviewers": "65;102;10;49", "wc_reply_authors": "294;272;0;0", "reply_reviewers": "2;2;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 106.0, 31.12876483254676 ], "wc_strengths_avg": [ 69.75, 18.005207580030838 ], "wc_weaknesses_avg": [ 167.5, 117.18468329948244 ], "wc_questions_avg": [ 66.75, 65.23946275070021 ], "wc_limitations_avg": [ 5.25, 3.6314597615834874 ], "wc_review_avg": [ 415.25, 85.52594635547742 ], "wc_reply_reviewers_avg": [ 56.5, 33.01893396219811 ], "wc_reply_authors_avg": [ 141.5, 141.7136196700938 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16544625646493384714&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 4, "email": "mit.edu;mit.edu;mit.edu;virginia.edu", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Massachusetts Institute of Technology;University of Virginia", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.virginia.edu", "aff_unique_abbr": "MIT;UVA", "aff_campus_unique_index": "1", "aff_campus_unique": ";Charlottesville", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "LLMs as Zero-shot Graph Learners: Alignment of GNN Representations with LLM Token Embeddings", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96777", "id": "32g9BWTndc", "proceeding": "", "pdf": "https://openreview.net/pdf?id=32g9BWTndc", "openreview": "https://openreview.net/forum?id=32g9BWTndc", "poster": "/media/PosterPDFs/NeurIPS%202024/96777.png?t=1731576207.2266479", "project": "", "author_site": "Duo Wang, Yuan Zuo, Fengzhi Li, Junjie Wu", "tldr": "", "abstract": "Zero-shot graph machine learning, especially with graph neural networks (GNNs), has garnered significant interest due to the challenge of scarce labeled data. While methods like self-supervised learning and graph prompt learning have been extensively explored, they often rely on fine-tuning with task-specific labels, limiting their effectiveness in zero-shot scenarios. Inspired by the zero-shot capabilities of instruction-fine-tuned large language models (LLMs), we introduce a novel framework named Token Embedding-Aligned Graph Language Model (TEA-GLM) that leverages LLMs as cross-dataset and cross-task zero-shot learners for graph machine learning. Concretely, we pretrain a GNN, aligning its representations with token embeddings of an LLM. We then train a linear projector that transforms the GNN's representations into a fixed number of graph token embeddings without tuning the LLM. A unified instruction is designed for various graph tasks at different levels, such as node classification (node-level) and link prediction (edge-level). These design choices collectively enhance our method's effectiveness in zero-shot learning, setting it apart from existing methods. Experiments show that our graph token embeddings help the LLM predictor achieve state-of-the-art performance on unseen datasets and tasks compared to other methods using LLMs as predictors. Our code is available at https://github.com/W-rudder/TEA-GLM.", "keywords": "Large Language Models;Graph Neural Networks;Zero Shot Learning", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Duo Wang;Yuan Zuo;Fengzhi Li;Junjie Wu", "authorids": "~Duo_Wang3;~Yuan_Zuo2;~Fengzhi_Li1;~Junjie_Wu1", "gender": "M;;M;M", "homepage": "https://github.com/W-rudder;;https://mysteriouslfz.github.io/;", "dblp": ";;;35/118", "google_scholar": ";;https://scholar.google.com/citations?hl=zh-CN;", "orcid": ";;0000-0002-8867-0070;0000-0001-7650-3657", "linkedin": ";;;", "or_profile": "~Duo_Wang3;~Yuan_Zuo2;~Fengzhi_Li1;~Junjie_Wu1", "aff": "Beihang University;;Beihang University;Beihang University", "aff_domain": "buaa.edu;;buaa.edu.cn;buaa.edu.cn", "position": "MS student;;PhD student;Full Professor", "bibtex": "@inproceedings{\nwang2024llms,\ntitle={{LLM}s as Zero-shot Graph Learners: Alignment of {GNN} Representations with {LLM} Token Embeddings},\nauthor={Duo Wang and Yuan Zuo and Fengzhi Li and Junjie Wu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=32g9BWTndc}\n}", "github": "", "reviewers": "LAU5;65Wi;64Q8", "pdf_size": 685605, "rating": "4;5;8", "confidence": "3;4;4", "soundness": "2;2;3", "novelty": "2;2;3", "presentation": "3;3;3", "wc_summary": "39;119;71", "wc_strengths": "44;66;78", "wc_weaknesses": "89;313;40", "wc_questions": "3;95;7", "wc_limitations": "1;1;8", "wc_review": "176;594;204", "wc_reply_reviewers": "0;449;0", "wc_reply_authors": "86;858;0", "reply_reviewers": "0;2;0", "reply_authors": "2;3;1", "rating_avg": [ 5.666666666666667, 1.699673171197595 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 76.33333333333333, 32.87687468250121 ], "wc_strengths_avg": [ 62.666666666666664, 14.079141387961917 ], "wc_weaknesses_avg": [ 147.33333333333334, 118.83976139697053 ], "wc_questions_avg": [ 35.0, 42.45782220824175 ], "wc_limitations_avg": [ 3.3333333333333335, 3.2998316455372216 ], "wc_review_avg": [ 324.6666666666667, 190.79016979102695 ], "wc_reply_reviewers_avg": [ 149.66666666666666, 211.66062983517324 ], "wc_reply_authors_avg": [ 314.6666666666667, 385.79557050622316 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.9428090415820634 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.6933752452815364, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5785859439931392546&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "buaa.edu;;buaa.edu.cn;buaa.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Beihang University", "aff_unique_dep": "", "aff_unique_url": "http://www.buaa.edu.cn/", "aff_unique_abbr": "BUAA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Retrieval & Fine-Tuning for In-Context Tabular Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96776", "id": "337dHOexCM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=337dHOexCM", "openreview": "https://openreview.net/forum?id=337dHOexCM", "poster": "/media/PosterPDFs/NeurIPS%202024/96776.png?t=1731273959.8780153", "project": "", "author_site": "Valentin Thomas, Junwei Ma, Rasa Hosseinzadeh, Keyvan Golestan, Guangwei Yu, Maks Volkovs, Anthony Caterini", "tldr": "", "abstract": "Tabular data is a pervasive modality spanning a wide range of domains, and this inherent diversity poses a considerable challenge for deep learning. Recent advancements using transformer-based in-context learning have shown promise on smaller and less complex tabular datasets, but have struggled to scale to larger and more complex ones. To address this limitation, we propose a combination of retrieval and fine-tuning: we can adapt the transformer to a local subset of the data by collecting nearest neighbours, and then perform task-specific fine-tuning with this retrieved set of neighbours in context. Using TabPFN as the base model -- currently the best tabular in-context learner -- and applying our retrieval and fine-tuning scheme on top results in what we call a locally-calibrated PFN, or LoCalPFN. We conduct extensive evaluation on 95 datasets curated by TabZilla from OpenML, upon which we establish a new state-of-the-art with LoCalPFN -- even with respect to tuned tree-based models. Notably, we show a significant boost in performance compared to the base in-context model, demonstrating the efficacy of our approach and advancing the frontier of deep learning in tabular data.", "keywords": "in-context learning;tabular data;retrieval;foundation models;transformers", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Valentin Thomas;Junwei Ma;Rasa Hosseinzadeh;Keyvan Golestan;Guangwei Yu;Maksims Volkovs;Anthony L. Caterini", "authorids": "~Valentin_Thomas1;~Junwei_Ma1;~Rasa_Hosseinzadeh2;~Keyvan_Golestan1;~Guangwei_Yu1;~Maksims_Volkovs3;~Anthony_L._Caterini1", "gender": "M;M;M;M;M;M;M", "homepage": "https://valthom.github.io/;;;;http://www.cs.toronto.edu/~guangweiyu/;;https://www.cs.toronto.edu/~mvolkovs", "dblp": ";;266/1688;;166/3142;167/4383;22/1815", "google_scholar": "https://scholar.google.ca/citations?user=XRhKEGMAAAAJ;LyoH1SMAAAAJ;;https://scholar.google.ca/citations?user=JhM4w0oAAAAJ;;34sCXQEAAAAJ;https://scholar.google.ca/citations?user=m9I8jgcAAAAJ", "orcid": ";;;;;;", "linkedin": "https://linkedin.com/in/valentin-thomas-84142980;;rasa-hosseinzadeh-6204a6208/;keyvangolestan/;guangweiyu/;;", "or_profile": "~Valentin_Thomas1;~Junwei_Ma1;~Rasa_Hosseinzadeh2;~Keyvan_Golestan1;~Guangwei_Yu1;~Anthony_L._Caterini1;~Maksims_Volkovs1", "aff": "Layer6;Layer 6 AI;Layer6;Layer 6 AI;Layer6 AI;Layer6;Layer6 AI", "aff_domain": "layer6.ai;layer6.ai;layer6.ai;layer6.ai;layer6.ai;layer6.ai;layer6.ai", "position": "Researcher;Researcher;Researcher;Senior Machine Learning Scientist;Researcher;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nthomas2024retrieval,\ntitle={Retrieval \\& Fine-Tuning for In-Context Tabular Models},\nauthor={Valentin Thomas and Junwei Ma and Rasa Hosseinzadeh and Keyvan Golestan and Guangwei Yu and Maksims Volkovs and Anthony L. Caterini},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=337dHOexCM}\n}", "github": "", "reviewers": "j47T;ZbyM;iFbB;jqEN", "pdf_size": 846433, "rating": "2;5;6;7", "confidence": "3;5;3;4", "soundness": "2;2;3;3", "novelty": "1;3;2;4", "presentation": "1;2;3;4", "wc_summary": "127;112;106;156", "wc_strengths": "50;49;70;116", "wc_weaknesses": "215;45;82;164", "wc_questions": "43;122;46;126", "wc_limitations": "107;1;26;10", "wc_review": "542;329;330;572", "wc_reply_reviewers": "0;24;0;144", "wc_reply_authors": "67;163;0;686", "reply_reviewers": "0;1;0;2", "reply_authors": "2;2;1;3", "rating_avg": [ 5.0, 1.8708286933869707 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 1.118033988749895 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 125.25, 19.330998422223306 ], "wc_strengths_avg": [ 71.25, 27.160403163428924 ], "wc_weaknesses_avg": [ 126.5, 66.82252614201292 ], "wc_questions_avg": [ 84.25, 39.789288759664956 ], "wc_limitations_avg": [ 36.0, 41.9583126448145 ], "wc_review_avg": [ 443.25, 114.24398233605129 ], "wc_reply_reviewers_avg": [ 42.0, 59.6992462263972 ], "wc_reply_authors_avg": [ 229.0, 270.13422589520195 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.3223291856101521, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6343971024963992406&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "layer6.ai;layer6.ai;layer6.ai;layer6.ai;layer6.ai;layer6.ai;layer6.ai", "author_num": 7, "aff_unique_index": "0;1;0;1;0;0;0", "aff_unique_norm": "Layer6 AI;Layer 6 AI", "aff_unique_dep": ";", "aff_unique_url": "https://layer6.ai;https://layer6.ai", "aff_unique_abbr": "Layer6;Layer 6 AI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "Canada" }, { "title": "Geometry Awakening: Cross-Geometry Learning Exhibits Superiority over Individual Structures", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96775", "id": "347aDObXEa", "proceeding": "", "pdf": "https://openreview.net/pdf?id=347aDObXEa", "openreview": "https://openreview.net/forum?id=347aDObXEa", "poster": "/media/PosterPDFs/NeurIPS%202024/96775.png?t=1730362932.1676202", "project": "", "author_site": "YADONG SUN, Xiaofeng Cao, Yu Wang, Wei Ye, Jingcai Guo, Qing Guo", "tldr": "", "abstract": "Recent research has underscored the efficacy of Graph Neural Networks (GNNs) in modeling diverse geometric structures within graph data. However, real-world graphs typically exhibit geometrically heterogeneous characteristics, rendering the confinement to a single geometric paradigm insufficient for capturing their intricate structural complexities. To address this limitation, we examine the performance of GNNs across various geometries through the lens of knowledge distillation (KD) and introduce a novel cross-geometric framework. This framework encodes graphs by integrating both Euclidean and hyperbolic geometries in a space-mixing fashion. Our approach employs multiple teacher models, each generating hint embeddings that encapsulate distinct geometric properties. We then implement a structure-wise knowledge transfer module that optimally leverages these embeddings within their respective geometric contexts, thereby enhancing the training efficacy of the student model. Additionally, our framework incorporates a geometric optimization network designed to bridge the distributional disparities among these embeddings. Experimental results demonstrate that our model-agnostic framework more effectively captures topological graph knowledge, resulting in superior performance of the student models when compared to traditional KD methodologies.", "keywords": "Cross-geometry learning;Graph neural networks;Graph distillation", "primary_area": "graph_neural_networks", "supplementary_material": "/attachment/599df8cdbd52cba5d4857624db74a1f5b0237eda.zip", "author": "Yadong Sun;Xiaofeng Cao;Yu Wang;Wei Ye;Jingcai Guo;Qing Guo", "authorids": "~Yadong_Sun1;~Xiaofeng_Cao2;~Yu_Wang61;~Wei_Ye4;~Jingcai_Guo1;~Qing_Guo3", "gender": "M;;M;M;M;M", "homepage": "https://github.com/sun-yd;;https://weiye.userweb.mwn.de/;https://jingcaiguo.github.io/;https://tsingqguo.github.io;https://xiaofengcaoml.github.io/", "dblp": ";02/5889-152;09/5394-1;192/7270;25/3038-5;117/3982-2.html", "google_scholar": ";iCBQJRcAAAAJ;fpRBybQAAAAJ;YjSHPjcAAAAJ;Rj2x4QUAAAAJ;", "orcid": ";;0000-0002-3784-7788;0000-0002-0449-4525;0000-0003-0974-9299;", "linkedin": ";;;jingcai-guo;;", "or_profile": "~Yadong_Sun1;~Yu_Wang61;~Wei_Ye4;~Jingcai_Guo1;~Qing_Guo3;~Xiaofeng_Cao1", "aff": "Jilin University;Jilin University;Tongji University;The Hong Kong Polytechnic University; Agency for Science, Technology and Research (A*STAR));Jilin University", "aff_domain": "jlu.edu.cn;jlu.edu.cn;tongji.edu.cn;polyu.edu.hk;cfar.a-star.edu.sg;jlu.edu.cn", "position": "MS student;Postdoc;Full Professor;Assistant Professor;Researcher;Associate Professor", "bibtex": "@inproceedings{\nsun2024geometry,\ntitle={Geometry Awakening: Cross-Geometry Learning Exhibits Superiority over Individual Structures},\nauthor={Yadong Sun and Xiaofeng Cao and Yu Wang and Wei Ye and Jingcai Guo and Qing Guo},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=347aDObXEa}\n}", "github": "", "reviewers": "GamE;ogwd;rr98;Z3B3", "pdf_size": 2455594, "rating": "5;5;6;6", "confidence": "4;2;3;3", "soundness": "3;3;3;3", "novelty": "2;2;3;4", "presentation": "2;3;3;4", "wc_summary": "50;82;87;77", "wc_strengths": "53;47;49;26", "wc_weaknesses": "66;115;153;14", "wc_questions": "60;81;2;258", "wc_limitations": "1;10;6;80", "wc_review": "230;335;297;455", "wc_reply_reviewers": "14;30;11;28", "wc_reply_authors": "264;257;557;278", "reply_reviewers": "1;1;1;1", "reply_authors": "3;3;3;3", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 74.0, 14.300349646075091 ], "wc_strengths_avg": [ 43.75, 10.473180032826706 ], "wc_weaknesses_avg": [ 87.0, 52.225472712077966 ], "wc_questions_avg": [ 100.25, 95.56247956180292 ], "wc_limitations_avg": [ 24.25, 32.34482184214345 ], "wc_review_avg": [ 329.25, 81.75688044440052 ], "wc_reply_reviewers_avg": [ 20.75, 8.347903928532 ], "wc_reply_authors_avg": [ 339.0, 126.08925410200506 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.0, 0.0 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:8OoQWw-4cwkJ:scholar.google.com/&scioq=Geometry+Awakening:+Cross-Geometry+Learning+Exhibits+Superiority+over+Individual+Structures&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "jlu.edu.cn;jlu.edu.cn;tongji.edu.cn;polyu.edu.hk;cfar.a-star.edu.sg;jlu.edu.cn", "author_num": 6, "aff_unique_index": "0;0;1;2;3;0", "aff_unique_norm": "Jilin University;Tongji University;Hong Kong Polytechnic University;Agency for Science, Technology and Research", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.jlu.edu.cn;https://www.tongji.edu.cn;https://www.polyu.edu.hk;https://www.a-star.edu.sg", "aff_unique_abbr": "JLU;Tongji;PolyU;A*STAR", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;1;0", "aff_country_unique": "China;Singapore" }, { "title": "Fast Best-of-N Decoding via Speculative Rejection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96774", "id": "348hfcprUs", "proceeding": "", "pdf": "https://openreview.net/pdf?id=348hfcprUs", "openreview": "https://openreview.net/forum?id=348hfcprUs", "poster": "/media/PosterPDFs/NeurIPS%202024/96774.png?t=1731723252.021241", "project": "", "author_site": "Hanshi Sun, Momin Haider, Ruiqi Zhang, Huitao Yang, Jiahao Qiu, Ming Yin, Mengdi Wang, Peter Bartlett, Andrea Zanette", "tldr": "", "abstract": "The safe and effective deployment of Large Language Models (LLMs) involves a critical step called alignment, which ensures that the model's responses are in accordance with human preferences. Prevalent alignment techniques, such as DPO, PPO and their variants, align LLMs by changing the pre-trained model weights during a phase called post-training. While predominant, these post-training methods add substantial complexity before LLMs can be deployed. Inference-time alignment methods avoid the complex post-training step and instead bias the generation towards responses that are aligned with human preferences. The best-known inference-time alignment method, called Best-of-N, is as effective as the state-of-the-art post-training procedures. Unfortunately, Best-of-N requires vastly more resources at inference time than standard decoding strategies, which makes it computationally not viable. In this work, we introduce Speculative Rejection, a computationally-viable inference-time alignment algorithm. It generates high-scoring responses according to a given reward model, like Best-of-N does, while being between 16 to 32 times more computationally efficient.", "keywords": "alignment;large language models;rejection sampling;best-of-n;acceleration", "primary_area": "generative_models", "supplementary_material": "/attachment/b9f184fa622928c4e671561e0804187de6649aa4.zip", "author": "Hanshi Sun;Momin Haider;Ruiqi Zhang;Huitao Yang;Jiahao Qiu;Ming Yin;Mengdi Wang;Peter Bartlett;Andrea Zanette", "authorids": "~Hanshi_Sun1;~Momin_Haider1;~Ruiqi_Zhang2;~Huitao_Yang1;~Jiahao_Qiu1;~Ming_Yin4;~Mengdi_Wang1;~Peter_Bartlett1;~Andrea_Zanette1", "gender": "M;M;M;M;M;M;F;M;", "homepage": "https://preminstrel.com/;https://momin.dev;https://rqzhangberkeley.github.io/;http://google.com;;https://mingyin0312.github.io;http://mwang.princeton.edu;https://www.stat.berkeley.edu/~bartlett/;", "dblp": "314/7377.html;;;;;89/453.html;;https://dblp.org/pers/hd/b/Bartlett:Peter_L=;", "google_scholar": "BjQHEh8AAAAJ;P9d-jecAAAAJ;uErE2UUAAAAJ;;86dbUg4AAAAJ;ncBRYIUAAAAJ;;yQNhFGUAAAAJ;", "orcid": "0009-0005-4436-234X;;;;0009-0000-7752-4169;0000-0001-6458-0751;;;", "linkedin": "hanshi-sun-5b74b8228/;;;;jiahao-qiu-6a6161224/;;;;", "or_profile": "~Hanshi_Sun1;~Momin_Haider1;~Ruiqi_Zhang2;~Huitao_Yang1;~Jiahao_Qiu1;~Ming_Yin4;~Mengdi_Wang1;~Peter_Bartlett1;~Andrea_Zanette1", "aff": "Carnegie Mellon University;University of California, Santa Barbara;University of California, Berkeley;Fudan University;Princeton University;Princeton University;Princeton University;University of California, Berkeley;", "aff_domain": "cmu.edu;ucsb.edu;berkeley.edu;fudan.edu.cn;princeton.edu;princeton.edu;princeton.edu;berkeley;", "position": "MS student;MS student;PhD student;Undergrad student;PhD student;Postdoc;Full Professor;Professor;", "bibtex": "@inproceedings{\nsun2024fast,\ntitle={Fast Best-of-N Decoding via Speculative Rejection},\nauthor={Hanshi Sun and Momin Haider and Ruiqi Zhang and Huitao Yang and Jiahao Qiu and Ming Yin and Mengdi Wang and Peter Bartlett and Andrea Zanette},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=348hfcprUs}\n}", "github": "", "reviewers": "gUaJ;Qphu;qQm8", "pdf_size": 1165219, "rating": "5;5;7", "confidence": "3;4;4", "soundness": "2;2;4", "novelty": "2;2;3", "presentation": "2;3;4", "wc_summary": "72;17;89", "wc_strengths": "33;29;41", "wc_weaknesses": "58;68;292", "wc_questions": "314;6;40", "wc_limitations": "1;17;32", "wc_review": "478;137;494", "wc_reply_reviewers": "20;116;171", "wc_reply_authors": "0;85;0", "reply_reviewers": "1;1;2", "reply_authors": "1;2;1", "rating_avg": [ 5.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.9428090415820634 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 59.333333333333336, 30.728199137310703 ], "wc_strengths_avg": [ 34.333333333333336, 4.988876515698588 ], "wc_weaknesses_avg": [ 139.33333333333334, 108.02880274363048 ], "wc_questions_avg": [ 120.0, 137.87917415863308 ], "wc_limitations_avg": [ 16.666666666666668, 12.657891697365017 ], "wc_review_avg": [ 369.6666666666667, 164.64979670669365 ], "wc_reply_reviewers_avg": [ 102.33333333333333, 62.398361801857874 ], "wc_reply_authors_avg": [ 28.333333333333332, 40.069384267237695 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.49999999999999983, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17656059534286232624&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 3, "email": "cmu.edu;ucsb.edu;berkeley.edu;fudan.edu.cn;princeton.edu;princeton.edu;princeton.edu;berkeley;", "author_num": 9, "aff_unique_index": "0;1;2;3;4;4;4;2", "aff_unique_norm": "Carnegie Mellon University;University of California, Santa Barbara;University of California, Berkeley;Fudan University;Princeton University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.cmu.edu;https://www.ucsb.edu;https://www.berkeley.edu;https://www.fudan.edu.cn;https://www.princeton.edu", "aff_unique_abbr": "CMU;UCSB;UC Berkeley;Fudan;Princeton", "aff_campus_unique_index": "1;2;2", "aff_campus_unique": ";Santa Barbara;Berkeley", "aff_country_unique_index": "0;0;0;1;0;0;0;0", "aff_country_unique": "United States;China" }, { "id": "34dHGTri2w", "title": "Follow Hamiltonian Leader: An Efficient Energy-Guided Sampling Method", "track": "main", "status": "Reject", "tldr": "", "abstract": "Our research underscores the value of leveraging zeroth-order information for addressing sampling challenges, particularly when first-order data is unreliable or unavailable. In light of this, we have developed a novel parallel sampling method that incorporates a leader-guiding mechanism. This mechanism forges connections between multiple sampling instances via a selected leader, enhancing both the efficiency and effectiveness of the entire sampling process. Our experimental results demonstrate that our method markedly expedites the exploration of the target distribution and produces superior quality outcomes compared to traditional sampling techniques. Furthermore, our method also shows greater resilience against the detrimental impacts of corrupted gradients as intended.", "keywords": "Sampling;Hamiltonian Monte Carlo;Monte Carlo Markov Chain", "primary_area": "probabilistic_methods", "supplementary_material": "/attachment/9755c03deff5814e57c8b4e6359555baa53ead2d.zip", "author": "Yunfei Teng;Yao Li;Kai Chen;Sixin Zhang;Di He;Qiwei Ye", "authorids": "~Yunfei_Teng1;~Yao_Li11;~Kai_Chen2;~Sixin_Zhang2;~Di_He1;~Qiwei_Ye1", "gender": "Unspecified;F;M;M;M;M", "homepage": ";;;https://www.irit.fr/~Sixin.Zhang/;https://dihe-pku.github.io/;", "dblp": "215/5192;;;116/3004;74/184;50/995", "google_scholar": ";;https://scholar.google.co.jp/citations?user=kPDp3cUAAAAJ;-cL9xWMAAAAJ;https://scholar.google.co.jp/citations?user=orVoz4IAAAAJ;RJ6SuR8AAAAJ", "orcid": ";0009-0003-1450-5325;;;;0000-0003-4264-5846", "linkedin": ";;;;;qiwei-ye-15282964/", "or_profile": "~Yunfei_Teng1;~Yao_Li11;~Kai_Chen2;~Sixin_Zhang2;~Di_He1;~Qiwei_Ye1", "aff": "New York University;;Microsoft;Universtite Toulouse;Microsoft;Beijing Academy of Artificial Intelligence", "aff_domain": "nyu.edu;;microsoft.com;irit.fr;microsoft.com;baai.ac.cn", "position": "PhD student;;Researcher;Assistant Professor;Senior Researcher;Principal Researcher", "bibtex": "@misc{\nanonymous2024follow,\ntitle={Follow Hamiltonian Leader: An Efficient Energy-Guided Sampling Method},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=34dHGTri2w}\n}", "github": "", "project": "", "reviewers": "N2ad;M1Sx;PDRj;eV5W", "site": "https://openreview.net/forum?id=34dHGTri2w", "pdf_size": 2669583, "rating": "4;6;6;7", "confidence": "2;4;3;4", "soundness": "2;3;3;3", "novelty": "2;3;2;3", "presentation": "3;3;3;4", "wc_summary": "117;69;44;133", "wc_strengths": "70;27;40;94", "wc_weaknesses": "149;61;72;36", "wc_questions": "3;53;49;77", "wc_limitations": "8;27;1;47", "wc_review": "347;237;206;387", "wc_reply_reviewers": "0;29;28;0", "wc_reply_authors": "261;0;1144;0", "reply_reviewers": "0;1;1;0", "reply_authors": "3;1;3;1", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 90.75, 35.82160660830276 ], "wc_strengths_avg": [ 57.75, 26.099568961957974 ], "wc_weaknesses_avg": [ 79.5, 42.19300889957956 ], "wc_questions_avg": [ 45.5, 26.77218706045511 ], "wc_limitations_avg": [ 20.75, 17.893783836852393 ], "wc_review_avg": [ 294.25, 74.91787169961518 ], "wc_reply_reviewers_avg": [ 14.25, 14.254385290148432 ], "wc_reply_authors_avg": [ 351.25, 469.9337054308831 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 1.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.899228803025897, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:u6arJj4dypgJ:scholar.google.com/&scioq=Follow+Hamiltonian+Leader:+An+Efficient+Energy-Guided+Sampling+Method&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;1;2;1;3", "aff_unique_norm": "New York University;Microsoft;Universit\u00e9 Toulouse;Beijing Academy of Artificial Intelligence", "aff_unique_dep": ";Microsoft Corporation;;", "aff_unique_url": "https://www.nyu.edu;https://www.microsoft.com;https://www.univ-toulouse.fr;https://www.baaic.cn", "aff_unique_abbr": "NYU;Microsoft;UT;BAAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;2", "aff_country_unique": "United States;France;China" }, { "title": "Understanding Emergent Abilities of Language Models from the Loss Perspective", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96773", "id": "35DAviqMFo", "proceeding": "", "pdf": "https://openreview.net/pdf?id=35DAviqMFo", "openreview": "https://openreview.net/forum?id=35DAviqMFo", "poster": "/media/PosterPDFs/NeurIPS%202024/96773.png?t=1731576627.2048805", "project": "", "author_site": "Zhengxiao Du, Aohan Zeng, Yuxiao Dong, Jie Tang", "tldr": "", "abstract": "Recent studies have put into question the belief that emergent abilities in language models are exclusive to large models. This skepticism arises from two observations: 1) smaller models can also exhibit high performance on emergent abilities and 2) there is doubt on the discontinuous metrics used to measure these abilities. In this paper, we propose to study emergent abilities in the lens of pre-training loss, instead of model size or training compute. We demonstrate that the Transformer models with the same pre-training loss, but different model and data sizes, generate the same performance on various downstream tasks, with a fixed data corpus, tokenization, and model architecture. We also discover that a model exhibits emergent abilities on certain tasks\u2014regardless of the continuity of metrics\u2014when its pre-training loss falls below a specific threshold. Before reaching this threshold, its performance remains at the level of random guessing. This inspires us to redefine emergent abilities as those that manifest in models with lower pre-training losses, highlighting that these abilities cannot be predicted by merely extrapolating the performance trends of models with higher pre-training losses.", "keywords": "pretrained language model;emergent ability;scaling law", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Zhengxiao Du;Aohan Zeng;Yuxiao Dong;Jie Tang", "authorids": "~Zhengxiao_Du1;~Aohan_Zeng1;~Yuxiao_Dong1;~Jie_Tang1", "gender": "M;M;M;", "homepage": "https://duzx16.github.io;;https://keg.cs.tsinghua.edu.cn/yuxiao/;", "dblp": "234/0081;286/8519.html;17/9267;", "google_scholar": "A8x07E0AAAAJ;STftvjoAAAAJ;https://scholar.google.com.hk/citations?hl=en;", "orcid": ";;0000-0002-6092-2002;", "linkedin": ";;;", "or_profile": "~Zhengxiao_Du1;~Aohan_Zeng1;~Yuxiao_Dong1;~Jie_Tang1", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;", "position": "PhD student;PhD student;Associate Professor;", "bibtex": "@inproceedings{\ndu2024understanding,\ntitle={Understanding Emergent Abilities of Language Models from the Loss Perspective},\nauthor={Zhengxiao Du and Aohan Zeng and Yuxiao Dong and Jie Tang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=35DAviqMFo}\n}", "github": "", "reviewers": "BK9j;RA4k;NWes;kB8g;ZLu4", "pdf_size": 724857, "rating": "3;4;6;7;7", "confidence": "4;3;4;4;5", "soundness": "1;2;3;4;4", "novelty": "3;2;3;3;4", "presentation": "2;2;3;4;3", "wc_summary": "25;121;144;53;92", "wc_strengths": "116;83;137;128;157", "wc_weaknesses": "581;150;346;119;616", "wc_questions": "42;1;220;19;8", "wc_limitations": "4;1;8;83;7", "wc_review": "768;356;855;402;880", "wc_reply_reviewers": "597;0;259;0;17", "wc_reply_authors": "415;0;114;0;0", "reply_reviewers": "2;0;2;0;1", "reply_authors": "3;1;2;1;1", "rating_avg": [ 5.4, 1.624807680927192 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 2.8, 1.16619037896906 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 87.0, 43.42810150121693 ], "wc_strengths_avg": [ 124.2, 24.57152823900052 ], "wc_weaknesses_avg": [ 362.4, 208.19471655159745 ], "wc_questions_avg": [ 58.0, 82.18272324521742 ], "wc_limitations_avg": [ 20.6, 31.296006134968724 ], "wc_review_avg": [ 652.2, 226.611914955944 ], "wc_reply_reviewers_avg": [ 174.6, 232.9605975267062 ], "wc_reply_authors_avg": [ 105.8, 160.78109341586156 ], "reply_reviewers_avg": [ 1.0, 0.8944271909999159 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5838742081211422, "gs_citation": 41, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16857919065481234872&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "BetterDepth: Plug-and-Play Diffusion Refiner for Zero-Shot Monocular Depth Estimation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96772", "id": "35WwZhkush", "proceeding": "", "pdf": "https://openreview.net/pdf?id=35WwZhkush", "openreview": "https://openreview.net/forum?id=35WwZhkush", "poster": "/media/PosterPDFs/NeurIPS%202024/96772.png?t=1729687697.5389946", "project": "", "author_site": "Xiang Zhang, Bingxin Ke, Hayko Riemenschneider, Nando Metzger, Anton Obukhov, Markus Gross, Konrad Schindler, Christopher Schroers", "tldr": "", "abstract": "By training over large-scale datasets, zero-shot monocular depth estimation (MDE) methods show robust performance in the wild but often suffer from insufficient detail. Although recent diffusion-based MDE approaches exhibit a superior ability to extract details, they struggle in geometrically complex scenes that challenge their geometry prior, trained on less diverse 3D data. To leverage the complementary merits of both worlds, we propose BetterDepth to achieve geometrically correct affine-invariant MDE while capturing fine details. Specifically, BetterDepth is a conditional diffusion-based refiner that takes the prediction from pre-trained MDE models as depth conditioning, in which the global depth layout is well-captured, and iteratively refines details based on the input image. For the training of such a refiner, we propose global pre-alignment and local patch masking methods to ensure BetterDepth remains faithful to the depth conditioning while learning to add fine-grained scene details. With efficient training on small-scale synthetic datasets, BetterDepth achieves state-of-the-art zero-shot MDE performance on diverse public datasets and on in-the-wild scenes. Moreover, BetterDepth can improve the performance of other MDE models in a plug-and-play manner without further re-training.", "keywords": "Monocular depth estimation;diffusion model;zero-shot transfer;plug-and-play", "primary_area": "machine_vision", "supplementary_material": "", "author": "Xiang Zhang;Bingxin Ke;Hayko Riemenschneider;Nando Metzger;Anton Obukhov;Markus Gross;Konrad Schindler;Christopher Schroers", "authorids": "~Xiang_Zhang21;~Bingxin_Ke1;~Hayko_Riemenschneider3;~Nando_Metzger1;~Anton_Obukhov1;~Markus_Gross1;~Konrad_Schindler1;~Christopher_Schroers1", "gender": "M;M;M;M;;M;M;", "homepage": "https://xiangz-0.github.io/;https://www.kebingxin.com/;http://hayko.at;https://nandometzger.github.io/;https://www.obukhov.ai;https://cgl.ethz.ch/people/grossm/;https://igp.ethz.ch/personen/person-detail.html?persid=143986;", "dblp": ";312/5451;88/5817.html;280/3707;270/1595;;73/488;117/5901", "google_scholar": "AVnXU-cAAAAJ;btvVqo8AAAAJ;https://scholar.google.at/citations?hl=de;https://scholar.google.ch/citations?hl=de;EyE8nngAAAAJ;uxk0GmUAAAAJ;FZuNgqIAAAAJ;", "orcid": ";0000-0003-2251-673X;0000-0003-2541-7999;0000-0002-0299-3064;;;0000-0002-3172-9246;", "linkedin": ";bingxinke/;;;;;konrad-schindler-5b0b22153/;", "or_profile": "~Xiang_Zhang21;~Bingxin_Ke1;~Hayko_Riemenschneider3;~Nando_Metzger1;~Anton_Obukhov1;~Markus_Gross1;~Konrad_Schindler1;~Christopher_Schroers1", "aff": "Department of Computer Science, ETHZ - ETH Zurich;ETHZ - ETH Zurich;Disney Research;Meta Facebook;ETHZ - ETH Zurich;ETHZ - ETH Zurich;Swiss Federal Institute of Technology;Disney Research|Studios, Disney", "aff_domain": "inf.ethz.ch;ethz.ch;disneyresearch.com;meta.com;ethz.ch;ethz.ch;ethz.ch;disneyresearch.com", "position": "PhD student;PhD student;Researcher;Intern;Postdoc;Full Professor;Professor;Principal Research Scientist", "bibtex": "@inproceedings{\nzhang2024betterdepth,\ntitle={BetterDepth: Plug-and-Play Diffusion Refiner for Zero-Shot Monocular Depth Estimation},\nauthor={Xiang Zhang and Bingxin Ke and Hayko Riemenschneider and Nando Metzger and Anton Obukhov and Markus Gross and Konrad Schindler and Christopher Schroers},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=35WwZhkush}\n}", "github": "", "reviewers": "EiFe;LNRC;Qdvk", "pdf_size": 48683872, "rating": "4;4;6", "confidence": "5;4;4", "soundness": "2;3;4", "novelty": "2;2;3", "presentation": "3;3;4", "wc_summary": "79;70;94", "wc_strengths": "42;26;60", "wc_weaknesses": "320;208;238", "wc_questions": "288;15;324", "wc_limitations": "51;9;12", "wc_review": "780;328;728", "wc_reply_reviewers": "86;47;186", "wc_reply_authors": "154;25;228", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 4.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 81.0, 9.899494936611665 ], "wc_strengths_avg": [ 42.666666666666664, 13.888444437333106 ], "wc_weaknesses_avg": [ 255.33333333333334, 47.33802793620462 ], "wc_questions_avg": [ 209.0, 137.96376335835436 ], "wc_limitations_avg": [ 24.0, 19.131126469708992 ], "wc_review_avg": [ 612.0, 201.9372839934881 ], "wc_reply_reviewers_avg": [ 106.33333333333333, 58.53963519606941 ], "wc_reply_authors_avg": [ 135.66666666666666, 83.88219252154907 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11431217444187367412&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 5, "email": "inf.ethz.ch;ethz.ch;disneyresearch.com;meta.com;ethz.ch;ethz.ch;ethz.ch;disneyresearch.com", "author_num": 8, "aff_unique_index": "0;0;1;2;0;0;3;4", "aff_unique_norm": "ETH Zurich;Disney Research;Meta;Swiss Federal Institute of Technology;Disney", "aff_unique_dep": "Department of Computer Science;;Meta Platforms, Inc.;;Disney Research|Studios", "aff_unique_url": "https://www.ethz.ch;https://research.disney.com;https://meta.com;https://www.ethz.ch;https://research.disney.com", "aff_unique_abbr": "ETHZ;Disney Research;Meta;ETH Zurich;Disney", "aff_campus_unique_index": "0", "aff_campus_unique": "Zurich;", "aff_country_unique_index": "0;0;1;1;0;0;0;1", "aff_country_unique": "Switzerland;United States" }, { "id": "36ehx1GHD0", "title": "CLImage: Human-Annotated Datasets for Complementary-Label Learning", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "Complementary-label learning (CLL) is a weakly-supervised learning paradigm that aims to train a multi-class classifier using only complementary labels, which indicate classes to which an instance does not belong. Despite numerous algorithmic proposals for CLL, their practical applicability remains unverified for two reasons. Firstly, these algorithms often rely on assumptions about the generation of complementary labels, and it is not clear how far the assumptions are from reality. Secondly, their evaluation has been limited to synthetic datasets. To gain insights into the real-world performance of CLL algorithms, we developed a protocol to collect complementary labels from human annotators. Our efforts resulted in the creation of four datasets: CLCIFAR10, CLCIFAR20, CLMicroImageNet10, and CLMicroImageNet20, derived from well-known classification datasets CIFAR10, CIFAR100, and TinyImageNet200. These datasets represent the very first real-world CLL datasets. Through extensive benchmark experiments, we discovered a notable decrease in performance when transitioning from synthetic datasets to real-world datasets. We investigated the key factors contributing to the decrease with a thorough dataset-level ablation study. Our analyses highlight annotation noise as the most influential factor in the real-world datasets. In addition, we discover that the biased-nature of human-annotated complementary labels and the difficulty to validate with only complementary labels are two outstanding barriers to practical CLL. These findings suggest that the community focus more research efforts on developing CLL algorithms and validation schemes that are robust to noisy and biased complementary-label distributions.", "keywords": "Real-world complementary datasets;datasets for complementary-label learning;clcifar10;clcifar20;clmicroimagenet10;clmicroimagenet20", "primary_area": "", "supplementary_material": "/attachment/1f61ddc520c43c403b49c4afe230666d2b40012f.pdf", "author": "Hsiu-Hsuan Wang;Mai Tan Ha;Nai-Xuan Ye;Wei-I Lin;Hsuan-Tien Lin", "authorids": "~Hsiu-Hsuan_Wang1;~Mai_Tan_Ha1;~Nai-Xuan_Ye1;~Wei-I_Lin1;~Hsuan-Tien_Lin1", "gender": "M;M;M;;M", "homepage": ";https://learner.csie.ntu.edu.tw/doku.php;;;http://www.csie.ntu.edu.tw/~htlin", "dblp": ";;;280/1256;10/3718", "google_scholar": ";2_SrAgQAAAAJ;;;https://scholar.google.com.tw/citations?user=yAr4UPUAAAAJ", "orcid": ";;;;", "linkedin": "anthony-wss/;;nai-xuan-ye-357047253/;weii-lin/;", "or_profile": "~Hsiu-Hsuan_Wang1;~Mai_Tan_Ha1;~Nai-Xuan_Ye1;~Wei-I_Lin1;~Hsuan-Tien_Lin1", "aff": "Department of computer science and informational engineering, National Taiwan University;Department of computer science and informational engineering, National Taiwan University;National Taiwan University;National Taiwan University;National Taiwan University", "aff_domain": "csie.ntu.edu.tw;csie.ntu.edu.tw;ntu.edu.tw;ntu.edu.tw;ntu.edu.tw", "position": "Undergrad student;PhD student;Undergrad student;MS student;Full Professor", "bibtex": "@misc{\nanonymous2024climage,\ntitle={{CLI}mage: Human-Annotated Datasets for Complementary-Label Learning},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=36ehx1GHD0}\n}", "github": "", "project": "", "reviewers": "dR23;e9TQ;6bMs;qMHj", "site": "https://openreview.net/forum?id=36ehx1GHD0", "pdf_size": 837892, "rating": "5;6;6;6", "confidence": "3;3;3;4", "wc_summary_and_contributions": "34;24;138;75", "wc_strengths": "24;21;3;81", "wc_improvement": "72;29;2;111", "wc_limitations": "57;1;1;1", "wc_correctness": "15;1;1;1", "wc_clarity": "19;1;1;1", "wc_relation_to_prior_work": "120;1;1;1", "wc_documentation": "10;1;1;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "352;80;149;273", "wc_reply_reviewers": "11;9;0;0", "wc_reply_authors": "73;33;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "3;2;1;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 67.75, 44.83511458667191 ], "wc_strengths_avg": [ 32.25, 29.269224451631786 ], "wc_improvement_avg": [ 53.5, 41.536128851880264 ], "wc_limitations_avg": [ 15.0, 24.24871130596428 ], "wc_correctness_avg": [ 4.5, 6.06217782649107 ], "wc_clarity_avg": [ 5.5, 7.794228634059948 ], "wc_relation_to_prior_work_avg": [ 30.75, 51.528511525174096 ], "wc_documentation_avg": [ 3.25, 3.897114317029974 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 213.5, 105.71778469112942 ], "wc_reply_reviewers_avg": [ 5.0, 5.049752469181039 ], "wc_reply_authors_avg": [ 26.5, 30.03747659175118 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:9HOMhJcz7uAJ:scholar.google.com/&scioq=CLImage:+Human-Annotated+Datasets+for+Complementary-Label+Learning&hl=en&as_sdt=0,5", "gs_version_total": 2, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "National Taiwan University", "aff_unique_dep": "Department of Computer Science and Informational Engineering", "aff_unique_url": "https://www.ntu.edu.tw", "aff_unique_abbr": "NTU", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Taiwan", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "X-Ray: A Sequential 3D Representation For Generation", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96771", "id": "36tMV15dPO", "proceeding": "", "pdf": "https://openreview.net/pdf?id=36tMV15dPO", "openreview": "https://openreview.net/forum?id=36tMV15dPO", "poster": "/media/PosterPDFs/NeurIPS%202024/96771.png?t=1732002407.6486256", "project": "", "author_site": "Tao Hu, Wenhang Ge, Yuyang Zhao, Gim Hee Lee", "tldr": "", "abstract": "We introduce X-Ray, a novel 3D sequential representation inspired by the penetrability of x-ray scans. X-Ray transforms a 3D object into a series of surface frames at different layers, making it suitable for generating 3D models from images. Our method utilizes ray casting from the camera center to capture geometric and textured details, including depth, normal, and color, across all intersected surfaces. This process efficiently condenses the whole 3D object into a multi-frame video format, motivating the utilize of a network architecture similar to those in video diffusion models. This design ensures an efficient 3D representation by focusing solely on surface information. Also, we propose a two-stage pipeline to generate 3D objects from X-Ray Diffusion Model and Upsampler. We demonstrate the practicality and adaptability of our X-Ray representation by synthesizing the complete visible and hidden surfaces of a 3D object from a single input image. Experimental results reveal the state-of-the-art superiority of our representation in enhancing the accuracy of 3D generation, paving the way for new 3D representation research and practical applications. \nOur project page is in \\url{https://tau-yihouxiang.github.io/projects/X-Ray/X-Ray.html}.", "keywords": "3D generation;3D representation;3D reconstruciton;diffusion model", "primary_area": "machine_vision", "supplementary_material": "", "author": "Tao Hu;Wenhang Ge;Yuyang Zhao;Gim Hee Lee", "authorids": "~Tao_Hu1;~Wenhang_Ge1;~Yuyang_Zhao1;~Gim_Hee_Lee1", "gender": "M;M;M;", "homepage": "https://tau-yihouxiang.github.io;https://g3956.github.io/wenhangge.github.io/;http://yuyangzhao.com/;https://www.comp.nus.edu.sg/~leegh/", "dblp": ";25/10422.html;;49/9455", "google_scholar": "xXUg31EAAAAJ;https://scholar.google.com.hk/citations?user=gzPpG0QAAAAJ;u5M6XPAAAAAJ;https://scholar.google.com.sg/citations?user=7hNKrPsAAAAJ", "orcid": "0000-0001-6978-6994;;0000-0002-4754-0325;0000-0002-1583-0475", "linkedin": ";;;", "or_profile": "~Tao_Hu1;~Wenhang_Ge1;~Yuyang_Zhao1;~Gim_Hee_Lee1", "aff": "National University of Singapore;Hong Kong University of Science and Technology;National University of Singapore;National University of Singapore", "aff_domain": "nus.edu.sg;hkust.edu;nus.edu.sg;nus.edu.sg", "position": "Researcher;PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nhu2024xray,\ntitle={X-Ray: A Sequential 3D Representation For Generation},\nauthor={Tao Hu and Wenhang Ge and Yuyang Zhao and Gim Hee Lee},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=36tMV15dPO}\n}", "github": "", "reviewers": "5c6P;EY92;xHzv;wtWj", "pdf_size": 8853086, "rating": "5;5;5;9", "confidence": "4;4;4;5", "soundness": "3;3;3;4", "novelty": "3;3;3;4", "presentation": "3;3;3;4", "wc_summary": "65;69;117;70", "wc_strengths": "73;57;150;107", "wc_weaknesses": "134;259;334;89", "wc_questions": "31;232;96;66", "wc_limitations": "17;28;34;28", "wc_review": "320;645;731;360", "wc_reply_reviewers": "0;113;0;260", "wc_reply_authors": "0;413;0;91", "reply_reviewers": "0;1;0;1", "reply_authors": "1;2;1;2", "rating_avg": [ 6.0, 1.7320508075688772 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 80.25, 21.299941314473145 ], "wc_strengths_avg": [ 96.75, 35.65371649632055 ], "wc_weaknesses_avg": [ 204.0, 97.5320460156558 ], "wc_questions_avg": [ 106.25, 76.1589620464985 ], "wc_limitations_avg": [ 26.75, 6.139014578904337 ], "wc_review_avg": [ 514.0, 177.20186229269714 ], "wc_reply_reviewers_avg": [ 93.25, 106.7552691907992 ], "wc_reply_authors_avg": [ 126.0, 169.81313258991486 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16453445996974633574&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "nus.edu.sg;hkust.edu;nus.edu.sg;nus.edu.sg", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "National University of Singapore;Hong Kong University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.nus.edu.sg;https://www.ust.hk", "aff_unique_abbr": "NUS;HKUST", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "Singapore;China" }, { "title": "Aggregating Quantitative Relative Judgments: From Social Choice to Ranking Prediction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96770", "id": "37CyA1K0vV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=37CyA1K0vV", "openreview": "https://openreview.net/forum?id=37CyA1K0vV", "poster": "/media/PosterPDFs/NeurIPS%202024/96770.png?t=1729886053.4609473", "project": "", "author_site": "Yixuan Xu, Hanrui Zhang, Yu Cheng, Vincent Conitzer", "tldr": "", "abstract": "Quantitative Relative Judgment Aggregation (QRJA) is a new research topic in (computational) social choice. In the QRJA model, agents provide judgments on the relative quality of different candidates, and the goal is to aggregate these judgments across all agents. In this work, our main conceptual contribution is to explore the interplay between QRJA in a social choice context and its application to ranking prediction. We observe that in QRJA, judges do not have to be people with subjective opinions; for example, a race can be viewed as a ``judgment'' on the contestants' relative abilities. This allows us to aggregate results from multiple races to evaluate the contestants' true qualities. At a technical level, we introduce new aggregation rules for QRJA and study their structural and computational properties. We evaluate the proposed methods on data from various real races and show that QRJA-based methods offer effective and interpretable ranking predictions.", "keywords": "Algorithmic Game Theory;Ranking and Preference Learning", "primary_area": "algorithmic_game_theory", "supplementary_material": "/attachment/65296069c63e400dfaa26d4aac3c7e2ffa7957e9.zip", "author": "Yixuan Even Xu;Hanrui Zhang;Yu Cheng;Vincent Conitzer", "authorids": "~Yixuan_Even_Xu1;~Hanrui_Zhang1;~Yu_Cheng2;~Vincent_Conitzer2", "gender": "M;;M;M", "homepage": "https://yixuanevenxu.github.io/;;https://cs.brown.edu/people/ycheng79/;https://www.cs.cmu.edu/~conitzer/", "dblp": "349/7695;168/8847;96/3060-2;c/VincentConitzer", "google_scholar": "viloxoAAAAAJ;;lVoOIv4AAAAJ;juRk4lQAAAAJ", "orcid": "0009-0003-9360-753X;;0000-0002-0019-2570;0000-0003-1899-7884", "linkedin": "yixuan-xu-57584a268/;;yu-cheng-40401632/;vincent-conitzer-2563082/", "or_profile": "~Yixuan_Even_Xu1;~Hanrui_Zhang1;~Yu_Cheng2;~Vincent_Conitzer2", "aff": "Tsinghua University;Google Research;Brown University;University of Oxford", "aff_domain": "tsinghua.edu.cn;google.com;brown.edu;oxford.ac.uk", "position": "Undergrad student;Researcher;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nxu2024aggregating,\ntitle={Aggregating Quantitative Relative Judgments: From Social Choice to Ranking Prediction},\nauthor={Yixuan Even Xu and Hanrui Zhang and Yu Cheng and Vincent Conitzer},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=37CyA1K0vV}\n}", "github": "", "reviewers": "Fgap;iMha;Z9Qp;zJAB", "pdf_size": 1062103, "rating": "5;6;7;8", "confidence": "4;3;3;4", "soundness": "4;3;3;4", "novelty": "2;3;3;3", "presentation": "2;2;3;3", "wc_summary": "84;131;210;94", "wc_strengths": "26;26;240;214", "wc_weaknesses": "143;250;8;339", "wc_questions": "57;28;52;85", "wc_limitations": "1;5;20;20", "wc_review": "311;440;530;752", "wc_reply_reviewers": "182;10;5;22", "wc_reply_authors": "283;0;0;0", "reply_reviewers": "2;1;1;1", "reply_authors": "4;1;1;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 129.75, 49.52966282946009 ], "wc_strengths_avg": [ 126.5, 100.91952239284528 ], "wc_weaknesses_avg": [ 185.0, 123.52530105205167 ], "wc_questions_avg": [ 55.5, 20.254629100529094 ], "wc_limitations_avg": [ 11.5, 8.616843969807043 ], "wc_review_avg": [ 508.25, 160.82035785310268 ], "wc_reply_reviewers_avg": [ 54.75, 73.72711509343085 ], "wc_reply_authors_avg": [ 70.75, 122.54259463549806 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5375843274795945413&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "tsinghua.edu.cn;google.com;brown.edu;oxford.ac.uk", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Tsinghua University;Google;Brown University;University of Oxford", "aff_unique_dep": ";Google Research;;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://research.google;https://www.brown.edu;https://www.ox.ac.uk", "aff_unique_abbr": "THU;Google Research;Brown;Oxford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;1;2", "aff_country_unique": "China;United States;United Kingdom" }, { "title": "NetworkGym: Reinforcement Learning Environments for Multi-Access Traffic Management in Network Simulation", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97863", "id": "3814z76JNM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3814z76JNM", "openreview": "https://openreview.net/forum?id=3814z76JNM", "poster": "", "project": "", "author_site": "Momin Haider, Ming Yin, Menglei Zhang, Arpit Gupta, Jing Zhu, Yu-Xiang Wang", "tldr": "", "abstract": "Mobile devices such as smartphones, laptops, and tablets can often connect to multiple access networks (e.g., Wi-Fi, LTE, and 5G) simultaneously.\nRecent advancements facilitate seamless integration of these connections below the transport layer, enhancing the experience for apps that lack inherent multi-path support.\nThis optimization hinges on dynamically determining the traffic distribution across networks for each device, a process referred to as multi-access traffic splitting.\nThis paper introduces NetworkGym, a high-fidelity network environment simulator that facilitates generating multiple network traffic flows and multi-access traffic splitting.\nThis simulator facilitates training and evaluating different RL-based solutions for the multi-access traffic splitting problem.\nOur initial explorations demonstrate that the majority of existing state-of-the-art offline RL algorithms (e.g. CQL) fail to outperform certain hand-crafted heuristic policies on average.\nThis illustrates the urgent need to evaluate offline RL algorithms against a broader range of benchmarks, rather than relying solely on popular ones such as D4RL.\nWe also propose an extension to the TD3+BC algorithm, named Pessimistic TD3 (PTD3), and demonstrate that it outperforms many state-of-the-art offline RL algorithms.\nPTD3's behavioral constraint mechanism, which relies on value-function pessimism, is theoretically motivated and relatively simple to implement.\nWe open source our code and offline datasets at github.com/hmomin/networkgym.", "keywords": "reinforcement learning;NetworkGym;network simulation;traffic splitting;offline RL", "primary_area": "", "supplementary_material": "/attachment/ba014458a00f302e314e8cdc66c1147a579132e3.pdf", "author": "Momin Haider;Ming Yin;Menglei Zhang;Arpit Gupta;Jing Zhu;Yu-Xiang Wang", "authorids": "~Momin_Haider1;~Ming_Yin4;~Menglei_Zhang1;~Arpit_Gupta2;~Jing_Zhu5;~Yu-Xiang_Wang1", "gender": "M;M;M;M;M;", "homepage": "https://momin.dev;https://mingyin0312.github.io;;https://sites.cs.ucsb.edu/~arpitgupta/;;http://www.cs.ucsb.edu/~yuxiangw/publications.html", "dblp": ";89/453.html;;;;62/1637-3.html", "google_scholar": "P9d-jecAAAAJ;ncBRYIUAAAAJ;https://scholar.google.com/citations?hl=en;;BwHhps0AAAAJ;HGNZ1fkAAAAJ", "orcid": ";0000-0001-6458-0751;;;;", "linkedin": ";;;;jing-zhu-2461b7b;", "or_profile": "~Momin_Haider1;~Ming_Yin4;~Menglei_Zhang1;~Arpit_Gupta2;~Jing_Zhu5;~Yu-Xiang_Wang1", "aff": "University of California, Santa Barbara;Princeton University;Intel;UC Santa Barbara;Intel;UC Santa Barbara", "aff_domain": "ucsb.edu;princeton.edu;intel.com;ucsb.edu;intel.com;ucsb.edu", "position": "MS student;Postdoc;Researcher;Assistant Professor;Principal Researcher;Assistant Professor", "bibtex": "@inproceedings{\nhaider2024networkgym,\ntitle={NetworkGym: Reinforcement Learning Environments for Multi-Access Traffic Management in Network Simulation},\nauthor={Momin Haider and Ming Yin and Menglei Zhang and Arpit Gupta and Jing Zhu and Yu-Xiang Wang},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=3814z76JNM}\n}", "github": "", "reviewers": "pxPu;QCwj;sZTT;jRFS", "pdf_size": 405148, "rating": "6;6;6;8", "confidence": "4;4;3;5", "wc_summary_and_contributions": "48;40;48;16", "wc_strengths": "270;3;19;39", "wc_improvement": "174;45;98;120", "wc_limitations": "146;3;17;15", "wc_correctness": "1;1;10;14", "wc_clarity": "1;1;77;11", "wc_relation_to_prior_work": "1;1;39;7", "wc_documentation": "1;1;17;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "643;96;326;224", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 38.0, 13.114877048604 ], "wc_strengths_avg": [ 82.75, 108.85856649800235 ], "wc_improvement_avg": [ 109.25, 46.267564232408 ], "wc_limitations_avg": [ 45.25, 58.413932413423424 ], "wc_correctness_avg": [ 6.5, 5.678908345800274 ], "wc_clarity_avg": [ 22.5, 31.729323976410214 ], "wc_relation_to_prior_work_avg": [ 12.0, 15.7797338380595 ], "wc_documentation_avg": [ 5.0, 6.928203230275509 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 322.25, 202.3219896600466 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5566543603745531526&as_sdt=40000005&sciodt=0,22&hl=en", "gs_version_total": 4, "email": "ucsb.edu;princeton.edu;intel.com;ucsb.edu;intel.com;ucsb.edu", "author_num": 6, "aff_unique_index": "0;1;2;0;2;0", "aff_unique_norm": "University of California, Santa Barbara;Princeton University;Intel", "aff_unique_dep": ";;Intel Corporation", "aff_unique_url": "https://www.ucsb.edu;https://www.princeton.edu;https://www.intel.com", "aff_unique_abbr": "UCSB;Princeton;Intel", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Santa Barbara;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Exploiting Activation Sparsity with Dense to Dynamic-k Mixture-of-Experts Conversion", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96769", "id": "38UFpdt3Tr", "proceeding": "", "pdf": "https://openreview.net/pdf?id=38UFpdt3Tr", "openreview": "https://openreview.net/forum?id=38UFpdt3Tr", "poster": "", "project": "", "author_site": "Filip Szatkowski, Bartosz W\u00f3jcik, Miko\u0142aj Pi\u00f3rczy\u0144ski, Simone Scardapane", "tldr": "", "abstract": "Transformer models can face practical limitations due to their high computational requirements. At the same time, such models exhibit significant activation sparsity, which can be leveraged to reduce the inference cost by converting parts of the network into equivalent Mixture-of-Experts (MoE) layers. Despite the crucial role played by activation sparsity, its impact on this process remains unexplored. We demonstrate that the efficiency of the conversion can be significantly enhanced by a proper regularization of the activation sparsity of the base model. Moreover, motivated by the high variance of the number of activated neurons for different inputs, we introduce a more effective dynamic-$k$ expert selection rule that adjusts the number of executed experts on a per-token basis. To achieve further savings, we extend this approach to multi-head attention projections. Finally, we develop an efficient implementation that translates these computational savings into actual wall-clock speedup. The proposed method, Dense to Dynamic-$k$ Mixture-of-Experts (D2DMoE), outperforms existing approaches on common NLP and vision tasks, reducing inference cost by up to 60\\% without significantly impacting performance.", "keywords": "inference efficiency;activation sparsity;dynamic-k gating;mixture-of-experts;conditional computation;dynamic neural networks", "primary_area": "other", "supplementary_material": "", "author": "Filip Szatkowski;Bartosz W\u00f3jcik;Miko\u0142aj Pi\u00f3rczy\u0144ski;Simone Scardapane", "authorids": "~Filip_Szatkowski1;~Bartosz_W\u00f3jcik1;~Miko\u0142aj_Pi\u00f3rczy\u0144ski1;~Simone_Scardapane1", "gender": "M;M;M;M", "homepage": ";;;http://ispac.diet.uniroma1.it/scardapane/", "dblp": "323/8425;;;144/2184", "google_scholar": "xjnAIOEAAAAJ;2HBGrsEAAAAJ;4AlZRnUAAAAJ;https://scholar.google.it/citations?user=aSuosYoAAAAJ", "orcid": "0000-0001-8592-2001;0000-0002-1100-4176;;0000-0003-0881-8344", "linkedin": "fszatkowski/;;mpiorczynski/;simonescardapane", "or_profile": "~Filip_Szatkowski1;~Bartosz_W\u00f3jcik1;~Miko\u0142aj_Pi\u00f3rczy\u0144ski1;~Simone_Scardapane1", "aff": "Amazon;IDEAS NCBR;Warsaw University of Technology;Sapienza University of Rome", "aff_domain": "amazon.de;ideas-ncbr.pl;pw.edu.pl;uniroma1.it", "position": "Intern;PhD student;Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\nszatkowski2024exploiting,\ntitle={Exploiting Activation Sparsity with Dense to Dynamic-k Mixture-of-Experts Conversion},\nauthor={Filip Szatkowski and Bartosz W{\\'o}jcik and Miko{\\l}aj Pi{\\'o}rczy{\\'n}ski and Simone Scardapane},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=38UFpdt3Tr}\n}", "github": "", "reviewers": "85BP;VwY8;v3M2;R1eJ;uQnr", "pdf_size": 5279748, "rating": "3;5;6;6;6", "confidence": "4;5;5;3;4", "soundness": "2;3;2;3;4", "novelty": "2;3;3;3;4", "presentation": "3;3;4;3;3", "wc_summary": "39;93;228;99;56", "wc_strengths": "27;33;142;45;84", "wc_weaknesses": "137;277;183;78;119", "wc_questions": "144;2;7;47;1", "wc_limitations": "22;1;10;61;3", "wc_review": "369;406;570;330;263", "wc_reply_reviewers": "76;47;99;24;158", "wc_reply_authors": "1128;411;233;10;737", "reply_reviewers": "1;1;1;1;2", "reply_authors": "4;2;2;2;3", "rating_avg": [ 5.2, 1.16619037896906 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 103.0, 66.40180720432238 ], "wc_strengths_avg": [ 66.2, 42.780369329869046 ], "wc_weaknesses_avg": [ 158.8, 68.03646081330216 ], "wc_questions_avg": [ 40.2, 54.62380433474036 ], "wc_limitations_avg": [ 19.4, 22.059918404200864 ], "wc_review_avg": [ 387.6, 102.75913584689197 ], "wc_reply_reviewers_avg": [ 80.8, 46.22293802864547 ], "wc_reply_authors_avg": [ 503.8, 392.3750246893907 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 2.6, 0.8 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.04583492485141057, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12335275918898431152&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "amazon.de;ideas-ncbr.pl;pw.edu.pl;uniroma1.it", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Amazon;Institute for Development, Economic Analysis, and Simulation (IDEAS);Warsaw University of Technology;Sapienza University of Rome", "aff_unique_dep": "Amazon.com, Inc.;;;", "aff_unique_url": "https://www.amazon.com;https://www.ideas-ncbr.gov.pl;https://www.pw.edu.pl;https://www.uniroma1.it", "aff_unique_abbr": "Amazon;IDEAS;WUT;Sapienza", "aff_campus_unique_index": "1", "aff_campus_unique": ";Rome", "aff_country_unique_index": "0;1;1;2", "aff_country_unique": "United States;Poland;Italy" }, { "title": "Towards Multi-dimensional Explanation Alignment for Medical Classification", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96768", "id": "3A5VgiH5Pw", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3A5VgiH5Pw", "openreview": "https://openreview.net/forum?id=3A5VgiH5Pw", "poster": "/media/PosterPDFs/NeurIPS%202024/96768.png?t=1730727472.2605057", "project": "", "author_site": "Lijie Hu, Songning Lai, Wenshuo Chen, Hongru Xiao, Hongbin Lin, Lu Yu, Jingfeng ZHANG, Di Wang", "tldr": "", "abstract": "The lack of interpretability in the field of medical image analysis has significant ethical and legal implications. Existing interpretable methods in this domain encounter several challenges, including dependency on specific models, difficulties in understanding and visualization, and issues related to efficiency. To address these limitations, we propose a novel framework called Med-MICN (Medical Multi-dimensional Interpretable Concept Network). Med-MICN provides interpretability alignment for various angles, including neural symbolic reasoning, concept semantics, and saliency maps, which are superior to current interpretable methods. Its advantages include high prediction accuracy, interpretability across multiple dimensions, and automation through an end-to-end concept labeling process that reduces the need for extensive human training effort when working with new datasets. To demonstrate the effectiveness and interpretability of Med-MICN, we apply it to four benchmark datasets and compare it with baselines. The results clearly demonstrate the superior performance and interpretability of our Med-MICN.", "keywords": "Explainable Medical Image;Interpretable ML;Explainable AI", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "", "author": "Lijie Hu;Songning Lai;Wenshuo Chen;Hongru Xiao;Hongbin Lin;Lu Yu;Jingfeng Zhang;Di Wang", "authorids": "~Lijie_Hu1;~Songning_Lai1;~Wenshuo_Chen3;~Hongru_Xiao1;~Hongbin_Lin4;~Lu_Yu1;~Jingfeng_Zhang1;~Di_Wang1", "gender": "F;M;;M;M;M;M;", "homepage": "https://lijie-hu.github.io;https://xll0328.github.io;;;https://github.com/HongbinLin3589;;https://zjfheart.github.io;", "dblp": "90/8790;347/3526;;307/8229;;04/1781-6.html;227/2664.html;", "google_scholar": "C-3UuqsAAAAJ;gRXN-rMAAAAJ;;;;ODK41KwAAAAJ;NS0P1FkAAAAJ;", "orcid": ";;;0009-0007-6715-357X;;;0000-0003-3491-8074;", "linkedin": "lijie-hu-98045a126;;;;;;;", "or_profile": "~Lijie_Hu1;~Songning_Lai1;~Wenshuo_Chen3;~Hongru_Xiao1;~Hongbin_Lin4;~Lu_Yu1;~Jingfeng_Zhang1;~Di_Wang1", "aff": "KAUST;Shandong University;;Tongji University;HKUST(GZ);Ant Group;University of Auckland;", "aff_domain": "kaust.edu.sa;sdu.edu.cn;;tongji.edu.cn;hkust-gz.edu.cn;antgroup.com;auckland.ac.nz;", "position": "PhD student;Undergrad student;;MS student;MS student;Researcher;Assistant Professor;", "bibtex": "@inproceedings{\nhu2024towards,\ntitle={Towards Multi-dimensional Explanation Alignment for Medical Classification},\nauthor={Lijie Hu and Songning Lai and Wenshuo Chen and Hongru Xiao and Hongbin Lin and Lu Yu and Jingfeng Zhang and Di Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3A5VgiH5Pw}\n}", "github": "", "reviewers": "WJ93;7tbr;Lw8m;gQUT", "pdf_size": 8321566, "rating": "5;6;7;8", "confidence": "4;4;4;5", "soundness": "2;2;3;3", "novelty": "2;4;3;3", "presentation": "2;2;2;4", "wc_summary": "80;38;69;50", "wc_strengths": "2;48;91;78", "wc_weaknesses": "234;132;126;54", "wc_questions": "2;154;75;2", "wc_limitations": "26;5;35;1", "wc_review": "344;377;396;185", "wc_reply_reviewers": "18;157;21;79", "wc_reply_authors": "81;45;32;212", "reply_reviewers": "1;1;1;2", "reply_authors": "3;2;2;3", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 59.25, 16.29992331270304 ], "wc_strengths_avg": [ 54.75, 34.2153109002388 ], "wc_weaknesses_avg": [ 136.5, 64.11513081948753 ], "wc_questions_avg": [ 58.25, 62.80276665880254 ], "wc_limitations_avg": [ 16.75, 14.184057952504283 ], "wc_review_avg": [ 325.5, 83.22409506867588 ], "wc_reply_reviewers_avg": [ 68.75, 56.45518133882842 ], "wc_reply_authors_avg": [ 92.5, 71.28990110808122 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.7745966692414834, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8370636463762761535&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "kaust.edu.sa;sdu.edu.cn;;tongji.edu.cn;hkust-gz.edu.cn;antgroup.com;auckland.ac.nz;", "author_num": 8, "aff_unique_index": "0;1;2;3;4;5", "aff_unique_norm": "King Abdullah University of Science and Technology;Shandong University;Tongji University;Hong Kong University of Science and Technology;Ant Group;University of Auckland", "aff_unique_dep": ";;;;;", "aff_unique_url": "https://www.kaust.edu.sa;http://www.sdu.edu.cn;https://www.tongji.edu.cn;https://www.ust.hk;https://www.antgroup.com;https://www.auckland.ac.nz", "aff_unique_abbr": "KAUST;SDU;Tongji;HKUST;Ant Group;UoA", "aff_campus_unique_index": "1", "aff_campus_unique": ";Guangzhou", "aff_country_unique_index": "0;1;1;1;1;2", "aff_country_unique": "Saudi Arabia;China;New Zealand" }, { "id": "3A84lx1JFh", "title": "MyoChallenge 2023: Towards Human-Level Dexterity and Agility", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "Humans move nimbly and with ease, capable of effortlessly grasping items of many shapes and qualities. Over millions of years, the musculoskeletal structure, central and peripheral neural systems have evolved together to provide this capacity. Understanding the underlying mechanisms of this complex system helps translate benefits to other fields, from robot locomotion to rehabilitation. To illicit new insights into the generation of diverse movements and precise control as well as foster collaboration between the biomechanics and the ML community, the MyoChallenge at the NeurIPS 2023 Competition featured two tracks: Manipulation and Locomotion. Manipulation involved precisely manoeuvering an object of varying shape by controlling a 63-musculoskeletal arm model and generating stable grasps. Locomotion involved the combination of abstract reasoning and low-level control, as agents have to chase or evade from a moving object by controlling an 80-musculoskeletal model of human legs. These tasks best highlighted our overarching theme of dexterity and agility, requiring the generation of skilled and efficient movements with realistic human limbs. The Myosuite framework enabled the challenge through a realistic, contact-rich and computation-efficient virtual neuromusculoskeletal model of the human arm and legs. This was the second iteration of the MyoChallenge with 59 teams participating, and over 500 submissions. Each task involved two phases, increasing in difficulty over time. While many teams achieved high performance in phase 1 for the Manipulation track, locomotion showed variable performance across participants. In phase two, scores for all teams dropped significantly as the focus shifted towards generalization under uncertain conditions, highlighting the need for stronger generalization in agents In future challenges, we will continue to pursue the generalizability in dexterous manipulation and agile locomotion, which is crucial for understanding motor constructs in humans.", "keywords": "Reinforcement learning;Neuromusculoskeletal control;manipulation;locomotion", "primary_area": "", "supplementary_material": "", "author": "Vittorio Caggiano;Guillaume Durandau;HUIYI WANG;Chun Kwang Tan;Pierre Schumacher;Huawei Wang;Alberto Silvio Chiappa;Alessandro Marin Vargas;Alexander Mathis;Jungdam Won;JUNGNAM PARK;Gunwoo Park;Beomsoo Shin;Minseung Kim;SEUNGBUM KOO;Zhuo Yang;Wei Dang;Heng Cai;Jianfei Song;Seungmoon Song;Massimo Sartori;Vikash Kumar", "authorids": "~Vittorio_Caggiano1;~Guillaume_Durandau1;~HUIYI_WANG2;~Chun_Kwang_Tan1;~Pierre_Schumacher1;~Huawei_Wang1;~Alberto_Silvio_Chiappa1;~Alessandro_Marin_Vargas1;~Alexander_Mathis1;~Jungdam_Won1;~JUNGNAM_PARK1;~Gunwoo_Park1;~Beomsoo_Shin1;~Minseung_Kim2;~SEUNGBUM_KOO1;~Zhuo_Yang7;~Wei_Dang1;~Heng_Cai1;~Jianfei_Song1;~Seungmoon_Song1;~Massimo_Sartori1;~Vikash_Kumar2", "gender": ";M;F;M;M;M;M;M;M;M;M;M;Not Specified;M;M;F;;M;;;M;M", "homepage": ";https://www.mcgill.ca/neuroc-lab/;https://cherylwang20.github.io/;;https://al.is.mpg.de/person/pschumacher;;;;;https://sites.google.com/view/jungdam;https://sites.google.com/mrl.snu.ac.kr/jungnam/home;;http://mskbiodyn.kaist.ac.kr;https://mskbiodyn.kaist.ac.kr/;http://mskbiodyn.kaist.ac.kr;;https://carbonsilicon.ai/;https://github.com/gitabtion;http://seungmoon.com/;https://people.utwente.nl/m.sartori;http://vikashplus.github.io/;https://carbonsilicon.ai/", "dblp": ";;;;;;269/4002;261/9159;117/7258;138/3462;;;;;;;;;;;82/7475;", "google_scholar": "lCt9zVkAAAAJ;WuSjVn0AAAAJ;;;;;Cv5lSo0AAAAJ;https://scholar.google.it/citations?user=IoHdcnUAAAAJ;https://scholar.google.ch/citations?user=Y1xCzE0AAAAJ;https://scholar.google.co.kr/citations?user=mUWAgvgAAAAJ;EByl80sAAAAJ;wwcApZUAAAAJ;;fYBtdpQAAAAJ;;GJ7fhU4AAAAJ;;;https://scholar.google.com/citations?hl=en;;nu3W--sAAAAJ;", "orcid": "0000-0002-2186-1550;;;0000-0003-1899-3234;;0000-0002-6034-2905;0009-0001-2764-6552;;0000-0002-3777-2202;;;;;;;0009-0009-4709-2014;;;;0000-0003-0930-6535;;", "linkedin": "vittorio-caggiano-26b6a7b/;;;;;;albertochiappa/;alessandro-marin-vargas-594914170/;;;;;;;;;;;;https://www.linkedin.com/mwlite/in/massimo-sartori-0612296;;", "or_profile": "~Vittorio_Caggiano1;~Guillaume_Durandau1;~HUIYI_WANG2;~Chun_Kwang_Tan1;~Pierre_Schumacher1;~Huawei_Wang1;~Alberto_Silvio_Chiappa1;~Alessandro_Marin_Vargas1;~Alexander_Mathis1;~Jungdam_Won1;~JUNGNAM_PARK1;~Gunwoo_Park1;~Beomsoo_Shin1;~Minseung_Kim2;~SEUNGBUM_KOO1;~Zhuo_Yang7;~Wei_Dang1;~Heng_Cai1;~Seungmoon_Song1;~Massimo_Sartori1;~Vikash_Kumar2;~song_jianfei1", "aff": ";McGill University;McGill University;Northeastern University;Max Planck Institute for Intelligent Systems, Max-Planck Institute;;Sony Group Corporation;EPFL - EPF Lausanne;EPFL - EPF Lausanne;Seoul National University, Seoul National University;Seoul National University;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Hangzhou Carbonsilicon AI Technology Co., Ltd;CarbonSiliconAI;CarbonSilicon.AI;Northeastern University;University of Twente;Meta Facebook;carbonsilicon.ai", "aff_domain": ";mcgill.ca;mcgill.ca;northeastern.edu;tuebingen.mpg.de;;sony.com;epfl.ch;epfl.ch;cse.snu.ac.kr;snu.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;carbonsilicon.ai;carbonsilicon.ai;carbonsilicon.ai;northeastern.edu;utwente.nl;facebook.com;carbonsilicon.ai", "position": ";Assistant Professor;PhD student;Postdoc;PhD Student;;Intern;PhD student;Assistant Professor;Assistant Professor;PhD student;MS student;PhD student;MS student;Associate Professor;Algorithm Engineer;Researcher;Researcher;Assistant Professor;Professor;Researcher;Researcher", "bibtex": "@misc{\nanonymous2024myochallenge,\ntitle={MyoChallenge 2023: Towards Human-Level Dexterity and Agility},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=3A84lx1JFh}\n}", "github": "", "project": "", "reviewers": "tm9C;MbhK;t2QP", "site": "https://openreview.net/forum?id=3A84lx1JFh", "pdf_size": 5338691, "rating": "5;5;8", "confidence": "4;3;3", "wc_summary_and_contributions": "97;48;54", "wc_strengths": "64;22;141", "wc_improvement": "124;132;20", "wc_limitations": "4;54;7", "wc_correctness": "1;26;8", "wc_clarity": "1;50;6", "wc_relation_to_prior_work": "1;9;7", "wc_documentation": "29;26;7", "wc_additional_feedback": "1;1;1", "wc_review": "322;368;251", "wc_reply_reviewers": "0;10;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;1;0", "reply_authors": "1;1;1", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 66.33333333333333, 21.82251640444388 ], "wc_strengths_avg": [ 75.66666666666667, 49.27699485786671 ], "wc_improvement_avg": [ 92.0, 51.01633725255731 ], "wc_limitations_avg": [ 21.666666666666668, 22.895899681432525 ], "wc_correctness_avg": [ 11.666666666666666, 10.530379332620877 ], "wc_clarity_avg": [ 19.0, 22.015146301277824 ], "wc_relation_to_prior_work_avg": [ 5.666666666666667, 3.39934634239519 ], "wc_documentation_avg": [ 20.666666666666668, 9.741092797468305 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 313.6666666666667, 48.12714641678044 ], "wc_reply_reviewers_avg": [ 3.3333333333333335, 4.714045207910316 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 22, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3555994062476006757&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;1;2;3;4;4;5;5;6;6;6;6;7;8;9;1;10;11;12", "aff_unique_norm": "McGill University;Northeastern University;Max Planck Institute for Intelligent Systems;Sony Group Corporation;EPFL;Seoul National University;Korea Advanced Institute of Science and Technology;Carbonsilicon AI Technology Co.;CarbonSiliconAI;CarbonSilicon.AI;University of Twente;Meta;Carbonsilicon AI", "aff_unique_dep": ";;Intelligent Systems;;;;;AI Technology;;;;Meta Platforms, Inc.;", "aff_unique_url": "https://www.mcgill.ca;https://www.northeastern.edu;https://www.mpi-is.mpg.de;https://www.sony.com;https://www.epfl.ch;https://www.snu.ac.kr;https://www.kaist.ac.kr;;;;https://www.utwente.nl;https://meta.com;https://carbonsilicon.ai", "aff_unique_abbr": "McGill;NEU;MPI-IS;Sony;EPFL;SNU;KAIST;;;;UT;Meta;", "aff_campus_unique_index": "1;1;2", "aff_campus_unique": ";Lausanne;Seoul", "aff_country_unique_index": "0;0;1;2;3;4;4;5;5;5;5;5;5;6;1;8;1;1", "aff_country_unique": "Canada;United States;Germany;Japan;Switzerland;South Korea;China;;Netherlands" }, { "title": "Unleashing the Potential of the Diffusion Model in Few-shot Semantic Segmentation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96767", "id": "3ACXaFxjTy", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3ACXaFxjTy", "openreview": "https://openreview.net/forum?id=3ACXaFxjTy", "poster": "/media/PosterPDFs/NeurIPS%202024/96767.png?t=1731410974.3816893", "project": "", "author_site": "Muzhi Zhu, Yang Liu, Zekai Luo, Chenchen Jing, Hao Chen, Guangkai Xu, Xinlong Wang, Chunhua Shen", "tldr": "", "abstract": "The Diffusion Model has not only garnered noteworthy achievements in the realm of image generation \nbut has also demonstrated its potential as an effective pretraining method utilizing unlabeled data. \nDrawing from the extensive potential unveiled by the Diffusion Model in both semantic correspondence and open vocabulary segmentation, our work initiates an investigation into employing the Latent Diffusion Model for Few-shot Semantic Segmentation.\nRecently, inspired by the in-context learning ability of large language models, Few-shot Semantic Segmentation has evolved into In-context Segmentation tasks, morphing into a crucial element in assessing generalist segmentation models.\nIn this context, we concentrate \non Few-shot Semantic Segmentation, \nestablishing a solid foundation for the future development of a Diffusion-based generalist model for segmentation. Our initial focus lies in understanding how to facilitate interaction between the query image and the support image, resulting in the proposal of a KV fusion method within the self-attention framework.\nSubsequently, we delve deeper into optimizing the infusion of information from the support mask and simultaneously re-evaluating how to provide reasonable supervision from the query mask.\nBased on our analysis, we establish a simple and effective framework named DiffewS, maximally retaining the original Latent Diffusion Model's generative framework and effectively utilizing the pre-training prior. Experimental results demonstrate that our method significantly outperforms the previous SOTA models in multiple settings.", "keywords": "Diffusion Model; Few-shot Semantic Segmentation", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Muzhi Zhu;Yang Liu;Zekai Luo;Chenchen Jing;Hao Chen;Guangkai Xu;Xinlong Wang;Chunhua Shen", "authorids": "~Muzhi_Zhu1;~Yang_Liu83;~Zekai_Luo2;~Chenchen_Jing2;~Hao_Chen17;~Guangkai_Xu1;~Xinlong_Wang2;~Chunhua_Shen2", "gender": "M;M;;M;;M;M;", "homepage": "https://z-mu-z.github.io/;https://github.com/yangliu96;;https://jingchenchen.github.io/;;;;", "dblp": "157/1679;51/3710-98.html;;219/1679.html;;;;", "google_scholar": "https://scholar.google.com.hk/citations?user=064gBH4AAAAJ;9JcQ2hwAAAAJ;;0TKm3fgAAAAJ;;https://scholar.google.com.hk/citations?user=v35sbGEAAAAJ;DPz0DjYAAAAJ;", "orcid": ";0009-0003-8540-9154;;;;0000-0002-1209-9533;;", "linkedin": ";;;;;;;", "or_profile": "~Muzhi_Zhu1;~Yang_Liu83;~Zekai_Luo2;~Chenchen_Jing2;~Hao_Chen17;~Guangkai_Xu1;~Xinlong_Wang2;~Chunhua_Shen2", "aff": "Zhejiang University;Zhejiang University;;Zhejiang University;;Zhejiang University;Beijing Academy of Artificial Intelligence;", "aff_domain": "zju.edu.cn;zju.edu.cn;;zju.edu.cn;;zju.edu.cn;baai.ac.cn;", "position": "PhD student;PhD student;;Postdoc;;PhD student;Researcher;", "bibtex": "@inproceedings{\nzhu2024unleashing,\ntitle={Unleashing the Potential of the Diffusion Model in Few-shot Semantic Segmentation},\nauthor={Muzhi Zhu and Yang Liu and Zekai Luo and Chenchen Jing and Hao Chen and Guangkai Xu and Xinlong Wang and Chunhua Shen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3ACXaFxjTy}\n}", "github": "", "reviewers": "uveh;WLLp;zc26", "pdf_size": 2737375, "rating": "6;6;7", "confidence": "5;4;4", "soundness": "3;4;4", "novelty": "2;3;4", "presentation": "3;4;4", "wc_summary": "61;61;74", "wc_strengths": "65;33;140", "wc_weaknesses": "165;70;162", "wc_questions": "154;21;21", "wc_limitations": "1;9;1", "wc_review": "446;194;398", "wc_reply_reviewers": "20;31;36", "wc_reply_authors": "34;39;68", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 65.33333333333333, 6.128258770283412 ], "wc_strengths_avg": [ 79.33333333333333, 44.84293577464447 ], "wc_weaknesses_avg": [ 132.33333333333334, 44.09333534925911 ], "wc_questions_avg": [ 65.33333333333333, 62.69680126520721 ], "wc_limitations_avg": [ 3.6666666666666665, 3.7712361663282534 ], "wc_review_avg": [ 346.0, 109.2520022699813 ], "wc_reply_reviewers_avg": [ 29.0, 6.683312551921141 ], "wc_reply_authors_avg": [ 47.0, 14.98888477061141 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8994772759495899998&as_sdt=20000005&sciodt=0,21&hl=en", "gs_version_total": 4, "email": "zju.edu.cn;zju.edu.cn;;zju.edu.cn;;zju.edu.cn;baai.ac.cn;", "author_num": 8, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Zhejiang University;Beijing Academy of Artificial Intelligence", "aff_unique_dep": ";", "aff_unique_url": "https://www.zju.edu.cn;https://www.baaic.cn", "aff_unique_abbr": "ZJU;BAAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Graph Structure Inference with BAM: Neural Dependency Processing via Bilinear Attention", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96766", "id": "3ADBiWNUBb", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3ADBiWNUBb", "openreview": "https://openreview.net/forum?id=3ADBiWNUBb", "poster": "/media/PosterPDFs/NeurIPS%202024/96766.png?t=1732969359.7493982", "project": "", "author_site": "Philipp Froehlich, Heinz Koeppl", "tldr": "", "abstract": "Detecting dependencies among variables is a fundamental task across scientific disciplines. We propose a novel neural network model for graph structure inference, which aims to learn a mapping from observational data to the corresponding underlying dependence structures. The model is trained with variably shaped and coupled simulated input data and requires only a single forward pass through the trained network for inference. Central to our approach is a novel bilinear attention mechanism (BAM) operating on covariance matrices of transformed data while respecting the geometry of the manifold of symmetric positive definite (SPD) matrices. Inspired by graphical lasso methods, our model optimizes over continuous graph representations in the SPD space, where inverse covariance matrices encode conditional independence relations. Empirical evaluations demonstrate the robustness of our method in detecting diverse dependencies, excelling in undirected graph estimation and showing competitive performance in completed partially directed acyclic graph estimation via a novel two-step approach. The trained model effectively detects causal relationships and generalizes well across different functional forms of nonlinear dependencies.", "keywords": "Graph Structure Inference;Causal Inference;Supervised Deep Learning;Geometric Deep Learning", "primary_area": "causal_inference", "supplementary_material": "", "author": "Philipp Froehlich;Heinz Koeppl", "authorids": "~Philipp_Froehlich1;~Heinz_Koeppl1", "gender": "M;M", "homepage": ";", "dblp": ";41/6084", "google_scholar": ";https://scholar.google.de/citations?user=WaPW80kAAAAJ", "orcid": "0000-0001-7852-2288;", "linkedin": ";", "or_profile": "~Philipp_Froehlich1;~Heinz_Koeppl1", "aff": "Technische Universit\u00e4t Darmstadt;TU Darmstadt", "aff_domain": "tu-darmstadt.de;tu-darmstadt.de", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nfroehlich2024graph,\ntitle={Graph Structure Inference with {BAM}: Neural Dependency Processing via Bilinear Attention},\nauthor={Philipp Froehlich and Heinz Koeppl},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3ADBiWNUBb}\n}", "github": "", "reviewers": "wDfW;2mp1;rbW5;wqVr", "pdf_size": 25252014, "rating": "6;6;6;7", "confidence": "2;2;4;3", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;2;4;3", "wc_summary": "106;17;120;75", "wc_strengths": "61;37;60;55", "wc_weaknesses": "44;55;284;25", "wc_questions": "91;2;8;33", "wc_limitations": "50;5;1;1", "wc_review": "352;116;473;189", "wc_reply_reviewers": "0;18;0;17", "wc_reply_authors": "0;11;0;9", "reply_reviewers": "0;1;0;1", "reply_authors": "1;2;1;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 79.5, 39.588508433635134 ], "wc_strengths_avg": [ 53.25, 9.65336728815391 ], "wc_weaknesses_avg": [ 102.0, 105.62433431742896 ], "wc_questions_avg": [ 33.5, 35.174564673923115 ], "wc_limitations_avg": [ 14.25, 20.7047699818182 ], "wc_review_avg": [ 282.5, 139.2704204057703 ], "wc_reply_reviewers_avg": [ 8.75, 8.757139944068497 ], "wc_reply_authors_avg": [ 5.0, 5.049752469181039 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:fX_Y0E6ROfsJ:scholar.google.com/&scioq=Graph+Structure+Inference+with+BAM:+Neural+Dependency+Processing+via+Bilinear+Attention&hl=en&as_sdt=0,48", "gs_version_total": 3, "email": "tu-darmstadt.de;tu-darmstadt.de", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Technische Universit\u00e4t Darmstadt", "aff_unique_dep": "", "aff_unique_url": "https://www.tu-darmstadt.de", "aff_unique_abbr": "TUD", "aff_campus_unique_index": "1", "aff_campus_unique": ";Darmstadt", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "title": "Better by default: Strong pre-tuned MLPs and boosted trees on tabular data", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96765", "id": "3BNPUDvqMt", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3BNPUDvqMt", "openreview": "https://openreview.net/forum?id=3BNPUDvqMt", "poster": "/media/PosterPDFs/NeurIPS%202024/96765.png?t=1733346697.730047", "project": "", "author_site": "David Holzm\u00fcller, Leo Grinsztajn, Ingo Steinwart", "tldr": "", "abstract": "For classification and regression on tabular data, the dominance of gradient-boosted decision trees (GBDTs) has recently been challenged by often much slower deep learning methods with extensive hyperparameter tuning. We address this discrepancy by introducing (a) RealMLP, an improved multilayer perceptron (MLP), and (b) strong meta-tuned default parameters for GBDTs and RealMLP. We tune RealMLP and the default parameters on a meta-train benchmark with 118 datasets and compare them to hyperparameter-optimized versions on a disjoint meta-test benchmark with 90 datasets, as well as the GBDT-friendly benchmark by Grinsztajn et al. (2022). Our benchmark results on medium-to-large tabular datasets (1K--500K samples) show that RealMLP offers a favorable time-accuracy tradeoff compared to other neural baselines and is competitive with GBDTs in terms of benchmark scores. Moreover, a combination of RealMLP and GBDTs with improved default parameters can achieve excellent results without hyperparameter tuning. Finally, we demonstrate that some of RealMLP's improvements can also considerably improve the performance of TabR with default parameters.", "keywords": "tabular data;benchmark;default parameters;neural networks;deep learning;multilayer perceptron;gradient-boosted decision trees", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "David Holzm\u00fcller;Leo Grinsztajn;Ingo Steinwart", "authorids": "~David_Holzm\u00fcller1;~Leo_Grinsztajn1;~Ingo_Steinwart1", "gender": "M;M;M", "homepage": "https://www.isa.uni-stuttgart.de/en/institute/team/Holzmueller/;https://www.linkedin.com/in/l%C3%A9o-grinsztajn-339b5b173/;https://www.isa.uni-stuttgart.de/en/institute/team/Steinwart-00002/", "dblp": "207/7947;259/3203;89/3492", "google_scholar": "https://scholar.google.de/citations?user=pIT7A7QAAAAJ;;https://scholar.google.de/citations?user=zFuwHeAAAAAJ", "orcid": "0000-0002-9443-0049;;0000-0002-4436-7109", "linkedin": "david-holzm%C3%BCller-164a9b256/;;", "or_profile": "~David_Holzm\u00fcller1;~Leo_Grinsztajn1;~Ingo_Steinwart1", "aff": "INRIA;;University of Stuttgart", "aff_domain": "inria.fr;;uni-stuttgart.de", "position": "Postdoc;;Full Professor", "bibtex": "@inproceedings{\nholzm{\\\"u}ller2024better,\ntitle={Better by default: Strong pre-tuned {MLP}s and boosted trees on tabular data},\nauthor={David Holzm{\\\"u}ller and Leo Grinsztajn and Ingo Steinwart},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3BNPUDvqMt}\n}", "github": "", "reviewers": "vGM4;eZ6k;RnHK;5ngr;J8gT", "pdf_size": 3797671, "rating": "4;4;4;4;7", "confidence": "4;5;5;4;4", "soundness": "2;3;3;3;4", "novelty": "3;2;2;2;4", "presentation": "3;3;3;3;4", "wc_summary": "78;80;85;64;121", "wc_strengths": "68;38;93;83;87", "wc_weaknesses": "289;186;221;304;107", "wc_questions": "80;58;44;53;87", "wc_limitations": "15;10;1;3;8", "wc_review": "530;372;444;507;410", "wc_reply_reviewers": "19;919;200;184;107", "wc_reply_authors": "194;362;245;116;169", "reply_reviewers": "1;2;1;1;1", "reply_authors": "2;3;2;2;2", "rating_avg": [ 4.6, 1.2 ], "confidence_avg": [ 4.4, 0.48989794855663565 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.6, 0.8 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 85.6, 19.022092419079453 ], "wc_strengths_avg": [ 73.8, 19.711925324533876 ], "wc_weaknesses_avg": [ 221.4, 71.74008642314281 ], "wc_questions_avg": [ 64.4, 16.378034070058593 ], "wc_limitations_avg": [ 7.4, 5.0039984012787215 ], "wc_review_avg": [ 452.6, 58.88157606586291 ], "wc_reply_reviewers_avg": [ 285.8, 323.05380356838396 ], "wc_reply_authors_avg": [ 217.2, 83.47790126734141 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.40824829046386296, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6093258140261447860&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "inria.fr;;uni-stuttgart.de", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "INRIA;University of Stuttgart", "aff_unique_dep": ";", "aff_unique_url": "https://www.inria.fr;https://www.uni-stuttgart.de", "aff_unique_abbr": "INRIA;USTuttgart", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "France;Germany" }, { "title": "On Tractable $\\Phi$-Equilibria in Non-Concave Games", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96764", "id": "3CtTMF5zzM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3CtTMF5zzM", "openreview": "https://openreview.net/forum?id=3CtTMF5zzM", "poster": "", "project": "", "author_site": "Yang Cai, Constantinos Daskalakis, Haipeng Luo, Chen-Yu Wei, Weiqiang Zheng", "tldr": "", "abstract": "While Online Gradient Descent and other no-regret learning procedures are known to efficiently converge to a coarse correlated equilibrium in games where each agent's utility is concave in their own strategy, this is not the case when utilities are non-concave -- a common scenario in machine learning applications involving strategies parameterized by deep neural networks, or when agents' utilities are computed by neural networks, or both. Non-concave games introduce significant game-theoretic and optimization challenges: (i) Nash equilibria may not exist; (ii) local Nash equilibria, though they exist, are intractable; and (iii) mixed Nash, correlated, and coarse correlated equilibria generally have infinite support and are intractable. To sidestep these challenges, we revisit the classical solution concept of $\\Phi$-equilibria introduced by Greenwald and Jafari [GJ03], which is guaranteed to exist for an arbitrary set of strategy modifications $\\Phi$ even in non-concave games [SL07]. However, the tractability of $\\Phi$-equilibria in such games remains elusive. In this paper, we initiate the study of tractable $\\Phi$-equilibria in non-concave games and examine several natural families of strategy modifications. We show that when $\\Phi$ is finite, there exists an efficient uncoupled learning algorithm that approximates the corresponding $\\Phi$-equilibria. Additionally, we explore cases where $\\Phi$ is infinite but consists of local modifications, showing that Online Gradient Descent can efficiently approximate $\\Phi$-equilibria in non-trivial regimes.", "keywords": "Non-Concave Games;$\\Phi$-Equilibrium;$\\Phi$-Regret Minimization;Learning in Games", "primary_area": "algorithmic_game_theory", "supplementary_material": "", "author": "Yang Cai;Constantinos Costis Daskalakis;Haipeng Luo;Chen-Yu Wei;Weiqiang Zheng", "authorids": "~Yang_Cai1;~Constantinos_Costis_Daskalakis1;~Haipeng_Luo1;~Chen-Yu_Wei1;~Weiqiang_Zheng1", "gender": ";M;M;M;M", "homepage": ";http://people.csail.mit.edu/costis/;https://haipeng-luo.net/;https://bahh723.github.io/;https://weiqiang-zheng.com/", "dblp": ";;62/2576;183/1729;277/5088", "google_scholar": ";iTv2cOgAAAAJ;ct2hw4UAAAAJ;2L2cR-kAAAAJ;YrfhnIwAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Yang_Cai1;~Constantinos_Costis_Daskalakis1;~Haipeng_Luo1;~Chen-Yu_Wei1;~Weiqiang_Zheng1", "aff": ";Massachusetts Institute of Technology;University of Southern California;University of Virginia, Charlottesville;Yale University", "aff_domain": ";mit.edu;usc.edu;virginia.edu;yale.edu", "position": ";Full Professor;Associate Professor;Assistant Professor;PhD student", "bibtex": "@inproceedings{\ncai2024on,\ntitle={On Tractable \\${\\textbackslash}Phi\\$-Equilibria in Non-Concave Games},\nauthor={Yang Cai and Constantinos Costis Daskalakis and Haipeng Luo and Chen-Yu Wei and Weiqiang Zheng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3CtTMF5zzM}\n}", "github": "", "reviewers": "14T6;oGwK;GeMu;r7Yv", "pdf_size": 551621, "rating": "7;7;7;7", "confidence": "4;3;2;1", "soundness": "4;3;3;3", "novelty": "4;3;3;3", "presentation": "4;3;3;3", "wc_summary": "48;80;68;44", "wc_strengths": "44;85;93;23", "wc_weaknesses": "115;168;28;23", "wc_questions": "265;1;114;23", "wc_limitations": "1;9;13;1", "wc_review": "473;343;316;114", "wc_reply_reviewers": "17;39;17;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;0", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 2.5, 1.118033988749895 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 60.0, 14.696938456699069 ], "wc_strengths_avg": [ 61.25, 28.86498744153546 ], "wc_weaknesses_avg": [ 83.5, 60.97745485013293 ], "wc_questions_avg": [ 100.75, 103.86138599113724 ], "wc_limitations_avg": [ 6.0, 5.196152422706632 ], "wc_review_avg": [ 311.5, 128.5505737054487 ], "wc_reply_reviewers_avg": [ 18.25, 13.845125496000389 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 0.75, 0.4330127018922193 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9661525124451945885&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": ";mit.edu;usc.edu;virginia.edu;yale.edu", "author_num": 5, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Massachusetts Institute of Technology;University of Southern California;University of Virginia;Yale University", "aff_unique_dep": ";;;", "aff_unique_url": "https://web.mit.edu;https://www.usc.edu;https://www.virginia.edu;https://www.yale.edu", "aff_unique_abbr": "MIT;USC;UVA;Yale", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Los Angeles;Charlottesville", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Expressive Gaussian Human Avatars from Monocular RGB Video", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96763", "id": "3CweLZFNyl", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3CweLZFNyl", "openreview": "https://openreview.net/forum?id=3CweLZFNyl", "poster": "", "project": "", "author_site": "Hezhen Hu, Zhiwen Fan, Tianhao Wu, Yihan Xi, Seoyoung Lee, Georgios Pavlakos, Zhangyang "Atlas" Wang", "tldr": "", "abstract": "Nuanced expressiveness, especially through detailed hand and facial expressions, is pivotal for enhancing the realism and vitality of digital human representations.\nIn this work, we aim to learn expressive human avatars from a monocular RGB video; a setting that introduces new challenges in capturing and animating fine-grained details.\nTo this end, we introduce EVA, a drivable human model that can recover fine details based on 3D Gaussians and an expressive parametric human model, SMPL-X.\nFocused on enhancing expressiveness, our work makes three key contributions.\nFirst, we highlight the importance of aligning the SMPL-X model with the video frames for effective avatar learning.\nRecognizing the limitations of current methods for estimating SMPL-X parameters from in-the-wild videos, we introduce a reconstruction module that significantly improves the image-model alignment.\nSecond, we propose a context-aware adaptive density control strategy, which is adaptively adjusting the gradient thresholds to accommodate the varied granularity across body parts.\nThird, we develop a feedback mechanism that predicts per-pixel confidence to better guide the optimization of 3D Gaussians.\nExtensive experiments on two benchmarks demonstrate the superiority of our approach both quantitatively and qualitatively, especially on the fine-grained hand and facial details. \nWe make our code available at the project website: https://evahuman.github.io.", "keywords": "expressiveness;human avatar;monocular RGB video", "primary_area": "machine_vision", "supplementary_material": "", "author": "Hezhen Hu;Zhiwen Fan;Tianhao Walter Wu;Yihan Xi;Seoyoung Lee;Georgios Pavlakos;Zhangyang Wang", "authorids": "~Hezhen_Hu2;~Zhiwen_Fan2;~Tianhao_Walter_Wu1;~Yihan_Xi1;~Seoyoung_Lee1;~Georgios_Pavlakos1;~Zhangyang_Wang1", "gender": "M;;M;F;;M;M", "homepage": ";;https://chikayan.github.io/;;https://seoyoung1215.github.io/;https://geopavlakos.github.io/;https://vita-group.github.io", "dblp": "273/3660;;17/1976-3;;16/2117.html;145/3361;119/4026", "google_scholar": "Fff-9WAAAAAJ;;HwE5K78AAAAJ;;https://scholar.google.com/citations?hl=en;iH2BZ8UAAAAJ;pxFyKAIAAAAJ", "orcid": ";;0000-0002-3807-5839;;;;", "linkedin": ";;;yihan-xi-4b4822225/;seoyoung-lee-8b4036177/;;", "or_profile": "~Hezhen_Hu2;~Zhiwen_Fan2;~Tianhao_Walter_Wu1;~Yihan_Xi1;~Seoyoung_Lee1;~Georgios_Pavlakos1;~Zhangyang_Wang1", "aff": "University of Texas at Austin;;University of Cambridge;University of Texas at Austin;University of Texas at Austin;University of Texas at Austin;University of Texas at Austin", "aff_domain": "utexas.edu;;cam.ac.uk;utexas.edu;utexas.edu;cs.utexas.edu;utexas.edu", "position": "Postdoc;;PhD student;Undergrad student;PhD student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nhu2024expressive,\ntitle={Expressive Gaussian Human Avatars from Monocular {RGB} Video},\nauthor={Hezhen Hu and Zhiwen Fan and Tianhao Walter Wu and Yihan Xi and Seoyoung Lee and Georgios Pavlakos and Zhangyang Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3CweLZFNyl}\n}", "github": "", "reviewers": "ZKQ6;4SGk;g7vJ;fFxo;1sDc", "pdf_size": 3231462, "rating": "5;5;6;6;6", "confidence": "4;3;4;5;4", "soundness": "3;3;3;3;3", "novelty": "3;2;4;3;2", "presentation": "3;2;3;4;3", "wc_summary": "111;131;65;77;91", "wc_strengths": "60;46;77;90;41", "wc_weaknesses": "203;112;84;80;169", "wc_questions": "135;31;51;47;37", "wc_limitations": "64;11;10;67;1", "wc_review": "573;331;287;361;339", "wc_reply_reviewers": "54;10;0;20;23", "wc_reply_authors": "75;69;0;30;33", "reply_reviewers": "1;1;0;1;1", "reply_authors": "2;2;1;2;2", "rating_avg": [ 5.6, 0.48989794855663565 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 95.0, 23.630488780387086 ], "wc_strengths_avg": [ 62.8, 18.475930287809597 ], "wc_weaknesses_avg": [ 129.6, 48.55759466859948 ], "wc_questions_avg": [ 60.2, 38.06520721078502 ], "wc_limitations_avg": [ 30.6, 28.72350953487404 ], "wc_review_avg": [ 378.2, 100.32427423111515 ], "wc_reply_reviewers_avg": [ 21.4, 18.194504664870653 ], "wc_reply_authors_avg": [ 41.4, 27.58695343817436 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.6454972243679027, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3437340920782428405&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "utexas.edu;;cam.ac.uk;utexas.edu;utexas.edu;cs.utexas.edu;utexas.edu", "author_num": 7, "aff_unique_index": "0;1;0;0;0;0", "aff_unique_norm": "University of Texas at Austin;University of Cambridge", "aff_unique_dep": ";", "aff_unique_url": "https://www.utexas.edu;https://www.cam.ac.uk", "aff_unique_abbr": "UT Austin;Cambridge", "aff_campus_unique_index": "0;1;0;0;0;0", "aff_campus_unique": "Austin;Cambridge", "aff_country_unique_index": "0;1;0;0;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "LLM Circuit Analyses Are Consistent Across Training and Scale", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96762", "id": "3Ds5vNudIE", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3Ds5vNudIE", "openreview": "https://openreview.net/forum?id=3Ds5vNudIE", "poster": "/media/PosterPDFs/NeurIPS%202024/96762.png?t=1731733601.1539028", "project": "", "author_site": "Curt Tigges, Michael Hanna, Qinan Yu, Stella Biderman", "tldr": "", "abstract": "Most currently deployed LLMs undergo continuous training or additional finetuning. By contrast, most research into LLMs' internal mechanisms focuses on models at one snapshot in time (the end of pre-training), raising the question of whether their results generalize to real-world settings. Existing studies of mechanisms over time focus on encoder-only or toy models, which differ significantly from most deployed models. In this study, we track how model mechanisms, operationalized as circuits, emerge and evolve across 300 billion tokens of training in decoder-only LLMs, in models ranging from 70 million to 2.8 billion parameters. We find that task abilities and the functional components that support them emerge consistently at similar token counts across scale. Moreover, although such components may be implemented by different attention heads over time, the overarching algorithm that they implement remains. Surprisingly, both these algorithms and the types of components involved therein tend to replicate across model scale. Finally, we find that circuit size correlates with model size and can fluctuate considerably over time even when the same algorithm is implemented. These results suggest that circuit analyses conducted on small models at the end of pre-training can provide insights that still apply after additional training and over model scale.", "keywords": "training dynamics;interpretability;mechanistic interpretability", "primary_area": "interpretability_and_explainability", "supplementary_material": "/attachment/7dc002b03374e43ec40854058938decbd89fe7c6.zip", "author": "Curt Tigges;Michael Hanna;Qinan Yu;Stella Biderman", "authorids": "~Curt_Tigges1;~Michael_Hanna1;~Qinan_Yu1;~Stella_Biderman1", "gender": "M;M;F;F", "homepage": "https://curttigges.com;http://hannamw.github.io;https://www.linkedin.com/in/qinan-yu-9b50471b2/;http://www.stellabiderman.com", "dblp": ";306/9666;;239/5641", "google_scholar": ";0wOdTeYAAAAJ;;bO7H0DAAAAAJ", "orcid": ";;;0000-0001-8228-1042", "linkedin": "curttigges/;michael-hanna-a29279140/;;stellabiderman", "or_profile": "~Curt_Tigges1;~Michael_Hanna1;~Qinan_Yu1;~Stella_Biderman1", "aff": "EleutherAI Institute;University of Amsterdam;Brown University;Booz Allen Hamilton", "aff_domain": "eleuther.ai;uva.nl;brown.edu;boozallen.com", "position": "Researcher;PhD student;Undergrad student;Industry researcher", "bibtex": "@inproceedings{\ntigges2024llm,\ntitle={{LLM} Circuit Analyses Are Consistent Across Training and Scale},\nauthor={Curt Tigges and Michael Hanna and Qinan Yu and Stella Biderman},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3Ds5vNudIE}\n}", "github": "", "reviewers": "v6cM;865w;dE5s;wt3i", "pdf_size": 1034141, "rating": "5;6;6;7", "confidence": "4;4;4;4", "soundness": "4;3;3;3", "novelty": "3;4;3;2", "presentation": "3;3;3;4", "wc_summary": "72;97;325;227", "wc_strengths": "95;223;44;99", "wc_weaknesses": "77;196;33;136", "wc_questions": "420;199;340;152", "wc_limitations": "1;75;6;87", "wc_review": "665;790;748;701", "wc_reply_reviewers": "254;26;128;105", "wc_reply_authors": "674;22;388;0", "reply_reviewers": "1;1;2;1", "reply_authors": "2;2;2;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 180.25, 102.2090382500491 ], "wc_strengths_avg": [ 115.25, 65.88009942311866 ], "wc_weaknesses_avg": [ 110.5, 61.41864537744218 ], "wc_questions_avg": [ 277.75, 107.38336696155508 ], "wc_limitations_avg": [ 42.25, 39.02162861798569 ], "wc_review_avg": [ 726.0, 47.23875527572673 ], "wc_reply_reviewers_avg": [ 128.25, 81.86688891120757 ], "wc_reply_authors_avg": [ 271.0, 279.0788419067271 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6726272900233407984&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "eleuther.ai;uva.nl;brown.edu;boozallen.com", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "EleutherAI Institute;University of Amsterdam;Brown University;Booz Allen Hamilton", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.eleuther.ai;https://www.uva.nl;https://www.brown.edu;https://www.boozallen.com", "aff_unique_abbr": "EleutherAI;UvA;Brown;BAH", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;Netherlands" }, { "title": "Multiclass Transductive Online Learning", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96761", "id": "3EREVfwALz", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3EREVfwALz", "openreview": "https://openreview.net/forum?id=3EREVfwALz", "poster": "", "project": "", "author_site": "Steve Hanneke, Vinod Raman, Amirreza Shaeiri, Unique Subedi", "tldr": "", "abstract": "We consider the problem of multiclass transductive online learning when the number of labels can be unbounded. Previous works by Ben-David et al. [1997] and Hanneke et al. [2024] only consider the case of binary and finite label spaces respectively. The latter work determined that their techniques fail to extend to the case of unbounded label spaces, and they pose the question of characterizing the optimal mistake bound for unbounded label spaces. We answer this question, by showing that a new dimension, termed the Level-constrained Littlestone dimension, characterizes online learnability in this setting. Along the way, we show that the trichotomy of possible minimax rates established by Hanneke et al. [2024] for finite label spaces in the realizable setting continues to hold even when the label space is unbounded. In particular, if the learner plays for $T \\in \\mathbb{N}$ rounds, its minimax expected number of mistakes can only grow like $\\Theta(T)$, $\\Theta(\\log T)$, or $\\Theta(1)$. To prove this result, we give another combinatorial dimension, termed the Level-constrained Branching dimension, and show that its finiteness characterizes constant minimax expected mistake-bounds. The trichotomy is then determined by a combination of the Level-constrained Littlestone and Branching dimensions. Quantitatively, our upper bounds improve upon existing multiclass upper bounds in Hanneke et al. [2024] by removing the dependence on the label set size. In doing so, we explicitly construct learning algorithms that can handle extremely large or unbounded label spaces. A key component of our algorithm is a new notion of shattering that exploits the sequential nature of transductive online learning. Finally, we complete our results by proving expected regret bounds in the agnostic setting, extending the result of Hanneke et al. [2024].", "keywords": "Online Learning;Transductive Online Learning;Multiclass Classification", "primary_area": "online_learning", "supplementary_material": "", "author": "Steve Hanneke;Vinod Raman;Amirreza Shaeiri;Unique Subedi", "authorids": "~Steve_Hanneke1;~Vinod_Raman1;~Amirreza_Shaeiri1;~Unique_Subedi2", "gender": "M;M;M;M", "homepage": "http://www.stevehanneke.com;https://vinodkraman.github.io;;https://unique-subedi.github.io/", "dblp": "40/154;126/5382;;", "google_scholar": "fEhNO7YAAAAJ;Wn5QzOgAAAAJ;nRTM5b8AAAAJ;DO16ipsAAAAJ", "orcid": ";;0000-0002-2715-7652;", "linkedin": ";;;", "or_profile": "~Steve_Hanneke1;~Vinod_Raman1;~Amirreza_Shaeiri1;~UNIQUE_SUBEDI1", "aff": "Purdue University;Apple;Purdue University;University of Michigan - Ann Arbor", "aff_domain": "purdue.edu;apple.com;purdue.edu;umich.edu", "position": "Assistant Professor;Intern;PhD student;PhD student", "bibtex": "@inproceedings{\nhanneke2024multiclass,\ntitle={Multiclass Transductive Online Learning},\nauthor={Steve Hanneke and Vinod Raman and Amirreza Shaeiri and Unique Subedi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3EREVfwALz}\n}", "github": "", "reviewers": "i2TD;DRE4;Cx7F;Nc66", "pdf_size": 371435, "rating": "6;7;7;8", "confidence": "3;4;4;4", "soundness": "3;3;4;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "89;153;95;142", "wc_strengths": "31;32;34;177", "wc_weaknesses": "24;81;91;390", "wc_questions": "30;65;124;112", "wc_limitations": "1;20;82;5", "wc_review": "175;351;426;826", "wc_reply_reviewers": "0;50;25;34", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 119.75, 28.101378969723175 ], "wc_strengths_avg": [ 68.5, 62.65181561614955 ], "wc_weaknesses_avg": [ 146.5, 142.8889428892243 ], "wc_questions_avg": [ 82.75, 37.599035891894886 ], "wc_limitations_avg": [ 27.0, 32.53459696999488 ], "wc_review_avg": [ 444.5, 238.35739971731525 ], "wc_reply_reviewers_avg": [ 27.25, 18.102140757380052 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18215855743993836816&as_sdt=40000005&sciodt=0,22&hl=en", "gs_version_total": 3, "email": "purdue.edu;apple.com;purdue.edu;umich.edu", "author_num": 4, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Purdue University;Apple;University of Michigan", "aff_unique_dep": ";Apple Inc.;", "aff_unique_url": "https://www.purdue.edu;https://www.apple.com;https://www.umich.edu", "aff_unique_abbr": "Purdue;Apple;UM", "aff_campus_unique_index": "1", "aff_campus_unique": ";Ann Arbor", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "LongVideoBench: A Benchmark for Long-context Interleaved Video-Language Understanding", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97862", "id": "3G1ZDXOI4f", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3G1ZDXOI4f", "openreview": "https://openreview.net/forum?id=3G1ZDXOI4f", "poster": "", "project": "", "author_site": "Haoning Wu, DONGXU LI, Bei Chen, Junnan Li", "tldr": "", "abstract": "Large multimodal models (LMMs) are processing increasingly longer and richer inputs. Albeit the progress, few public benchmark is available to measure such development. To mitigate this gap, we introduce LongVideoBench, a question-answering benchmark that features video-language interleaved inputs up to an hour long. Our benchmark includes 3,763 varying-length web-collected videos with their subtitles across diverse themes, designed to comprehensively evaluate LMMs on long-term multimodal understanding. To achieve this, we interpret the primary challenge as to accurately retrieve and reason over detailed multimodal information from long inputs. As such, we formulate a novel video question-answering task termed referring reasoning. Specifically, as part of the question, it contains a referring query that references related video contexts, called referred context. The model is then required to reason over relevant video details from the referred context. Following the paradigm of referring reasoning, we curate 6,678 human-annotated multiple-choice questions in 17 fine-grained categories, establishing one of the most comprehensive benchmarks for long-form video understanding. Evaluations suggest that the LongVideoBench presents significant challenges even for the most advanced proprietary models (e.g. GPT-4o, Gemini-1.5-Pro), while their open-source counterparts show an even larger performance gap. In addition, our results indicate that model performance on the benchmark improves only when they are capable of processing more frames, positioning LongVideoBench as a valuable benchmark for evaluating future-generation long-context LMMs.", "keywords": "Long Context;Videos;Multimodal;Interleaved", "primary_area": "", "supplementary_material": "", "author": "Haoning Wu;Dongxu Li;Bei Chen;Junnan Li", "authorids": "~Haoning_Wu1;~Dongxu_Li3;~Bei_Chen3;~Junnan_Li2", "gender": "M;F;M;M", "homepage": "https://teowu.github.io;http://ml.cs.tsinghua.edu.cn/~beichen/;https://sites.google.com/view/dongxu-li/home;https://sites.google.com/site/junnanlics/", "dblp": "264/5802-1;;;193/6773-1.html", "google_scholar": "https://scholar.google.com.hk/citations?user=wth-VbMAAAAJ;Po65v_MAAAAJ;https://scholar.google.com/citations?view_op=list_works;MuUhwi0AAAAJ", "orcid": "0000-0001-8642-8101;;;", "linkedin": ";;;", "or_profile": "~Haoning_Wu1;~Bei_Chen3;~Dongxu_Li1;~Junnan_li1", "aff": "Nanyang Technological University;Microsoft;SalesForce.com;Salesforce Research", "aff_domain": "ntu.edu.sg;microsoft.com;salesforce.com;salesforce.com", "position": "PhD student;Researcher;Researcher;Research Scientist", "bibtex": "@inproceedings{\nwu2024longvideobench,\ntitle={LongVideoBench: A Benchmark for Long-context Interleaved Video-Language Understanding},\nauthor={Haoning Wu and Dongxu Li and Bei Chen and Junnan Li},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=3G1ZDXOI4f}\n}", "github": "", "reviewers": "ZDwx;fQc5;S89L", "pdf_size": 5519595, "rating": "5;5;8", "confidence": "5;5;4", "wc_summary_and_contributions": "90;73;312", "wc_strengths": "51;56;53", "wc_improvement": "10;82;44", "wc_limitations": "30;14;16", "wc_correctness": "1;4;14", "wc_clarity": "11;8;13", "wc_relation_to_prior_work": "1;31;67", "wc_documentation": "4;9;12", "wc_additional_feedback": "1;1;1", "wc_review": "199;278;532", "wc_reply_reviewers": "0;45;0", "wc_reply_authors": "31;31;12", "reply_reviewers": "0;1;0", "reply_authors": "2;3;2", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 158.33333333333334, 108.88015838016075 ], "wc_strengths_avg": [ 53.333333333333336, 2.0548046676563256 ], "wc_improvement_avg": [ 45.333333333333336, 29.4089933334837 ], "wc_limitations_avg": [ 20.0, 7.118052168020874 ], "wc_correctness_avg": [ 6.333333333333333, 5.557777333511022 ], "wc_clarity_avg": [ 10.666666666666666, 2.0548046676563256 ], "wc_relation_to_prior_work_avg": [ 33.0, 26.981475126464083 ], "wc_documentation_avg": [ 8.333333333333334, 3.2998316455372216 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 336.3333333333333, 142.06649460336834 ], "wc_reply_reviewers_avg": [ 15.0, 21.213203435596427 ], "wc_reply_authors_avg": [ 24.666666666666668, 8.956685895029603 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 95, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14873073151582490177&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "ntu.edu.sg;microsoft.com;salesforce.com;salesforce.com", "author_num": 4, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "Nanyang Technological University;Microsoft;Salesforce", "aff_unique_dep": ";Microsoft Corporation;", "aff_unique_url": "https://www.ntu.edu.sg;https://www.microsoft.com;https://www.salesforce.com", "aff_unique_abbr": "NTU;Microsoft;Salesforce", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "Singapore;United States" }, { "title": "Multidimensional Fractional Programming for Normalized Cuts", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96760", "id": "3G8sjUZqO3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3G8sjUZqO3", "openreview": "https://openreview.net/forum?id=3G8sjUZqO3", "poster": "/media/PosterPDFs/NeurIPS%202024/96760.png?t=1733193089.7399852", "project": "", "author_site": "Yannan Chen, Beichen Huang, Licheng Zhao, Kaiming Shen", "tldr": "", "abstract": "The Normalized cut (NCut) problem is a fundamental and yet notoriously difficult one in the unsupervised clustering field. Because the NCut problem is fractionally structured, the fractional programming (FP) based approach has worked its way into a new frontier. However, the conventional FP techniques are insufficient: the classic Dinkelbach's transform can only deal with a single ratio and hence is limited to the two-class clustering, while the state-of-the-art quadratic transform accounts for multiple ratios but fails to convert the NCut problem to a tractable form. This work advocates a novel extension of the quadratic transform to the multidimensional ratio case, thereby recasting the fractional 0-1 NCut problem into a bipartite matching problem---which can be readily solved in an iterative manner. Furthermore, we explore the connection between the proposed multidimensional FP method and the minorization-maximization theory to verify the convergence.", "keywords": "Data clustering;normalized cut;discrete optimization;fractional programming", "primary_area": "optimization", "supplementary_material": "/attachment/c0443bd70a0644a9cd6e3694a25cd8a1152f0f71.zip", "author": "Yannan Chen;Beichen Huang;Licheng Zhao;Kaiming Shen", "authorids": "~Yannan_Chen2;~Beichen_Huang1;~Licheng_Zhao1;~Kaiming_Shen1", "gender": "M;M;M;M", "homepage": ";https://beichenhuang.github.io/;;https://kaimingshen.github.io/", "dblp": ";;;", "google_scholar": "AuyOntgAAAAJ;;;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0002-8033-390X;0000-0003-0680-7975", "linkedin": ";;;", "or_profile": "~Yannan_Chen2;~Beichen_Huang1;~Licheng_Zhao1;~Kaiming_Shen1", "aff": "The Chinese University of Hong Kong, Shenzhen;McMaster University;Shenzhen Research Institute of Big Data;The Chinese University of Hong Kong", "aff_domain": "cuhk.edu.cn;mcmaster.ca;sribd.cn;cuhk.edu.cn", "position": "PhD student;Undergrad student;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nchen2024multidimensional,\ntitle={Multidimensional Fractional Programming for Normalized Cuts},\nauthor={Yannan Chen and Beichen Huang and Licheng Zhao and Kaiming Shen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3G8sjUZqO3}\n}", "github": "", "reviewers": "mC6e;fiXH;MUdC", "pdf_size": 1474198, "rating": "3;6;6", "confidence": "4;4;3", "soundness": "2;3;3", "novelty": "2;2;2", "presentation": "2;3;3", "wc_summary": "70;194;82", "wc_strengths": "56;49;190", "wc_weaknesses": "77;70;205", "wc_questions": "8;16;82", "wc_limitations": "1;20;1", "wc_review": "212;349;560", "wc_reply_reviewers": "469;50;73", "wc_reply_authors": "1175;0;0", "reply_reviewers": "1;1;1", "reply_authors": "3;1;1", "rating_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 115.33333333333333, 55.841044243658466 ], "wc_strengths_avg": [ 98.33333333333333, 64.88108781112996 ], "wc_weaknesses_avg": [ 117.33333333333333, 62.055530687352025 ], "wc_questions_avg": [ 35.333333333333336, 33.159546974522364 ], "wc_limitations_avg": [ 7.333333333333333, 8.956685895029603 ], "wc_review_avg": [ 373.6666666666667, 143.13707493945174 ], "wc_reply_reviewers_avg": [ 197.33333333333334, 192.32668965995217 ], "wc_reply_authors_avg": [ 391.6666666666667, 553.9003119294622 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8262081299794583809&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "cuhk.edu.cn;mcmaster.ca;sribd.cn;cuhk.edu.cn", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Chinese University of Hong Kong;McMaster University;Shenzhen Research Institute of Big Data", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cuhk.edu.cn;https://www.mcmaster.ca;http://www.sribd.cn", "aff_unique_abbr": "CUHK;McMaster;", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Shenzhen;;Hong Kong SAR", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "China;Canada" }, { "title": "Enhancing Large Language Models through Adaptive Tokenizers", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96759", "id": "3H1wqEdK4z", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3H1wqEdK4z", "openreview": "https://openreview.net/forum?id=3H1wqEdK4z", "poster": "", "project": "", "author_site": "Mengyu Zheng, Hanting Chen, Tianyu Guo, Chong Zhu, Binfan Zheng, Chang Xu, Yunhe Wang", "tldr": "", "abstract": "Tokenizers serve as crucial interfaces between models and linguistic data, substantially influencing the efficacy and precision of large language models (LLMs). Traditional tokenization methods often rely on static frequency-based statistics and are not inherently synchronized with LLM architectures, which may limit model performance. In this study, we propose a simple but effective method to learn tokenizers specifically engineered for seamless integration with LLMs. Initiating with a broad initial vocabulary, we refine our tokenizer by monitoring changes in the model\u2019s perplexity during training, allowing for the selection of a tokenizer that is closely aligned with the model\u2019s evolving dynamics. Through iterative refinement, we develop an optimized tokenizer. Our empirical evaluations demonstrate that this adaptive approach significantly enhances accuracy compared to conventional methods, maintaining comparable vocabulary sizes and affirming its potential to improve LLM functionality.", "keywords": "Large Language Model;Tokenizer", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Mengyu Zheng;Hanting Chen;Tianyu Guo;Chong Zhu;Binfan Zheng;Chang Xu;Yunhe Wang", "authorids": "~Mengyu_Zheng1;~Hanting_Chen1;~Tianyu_Guo1;~Chong_Zhu1;~Binfan_Zheng1;~Chang_Xu4;~Yunhe_Wang1", "gender": "F;M;M;M;M;M;", "homepage": ";;;https://github.com/luxrck;;https://www.wangyunhe.site/;https://sydney.edu.au/engineering/about/our-people/academic-staff/c-xu.html", "dblp": "228/1693;232/2060;218/7273;55/6109;;63/8217-1;97/2966-2", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;RPK3oQgAAAAJ;;;https://scholar.google.com.sg/citations?user=isizOkYAAAAJ;N4F_3eoAAAAJ", "orcid": ";;;;0009-0006-6387-0304;0000-0002-0142-509X;0000-0002-4756-0609", "linkedin": ";;;;;;", "or_profile": "~Mengyu_Zheng1;~Hanting_Chen1;~Tianyu_Guo1;~Chong_Zhu1;~Binfan_Zheng1;~Yunhe_Wang1;~Charles_Xu1", "aff": "University of Sydney;Huawei Technologies Ltd.;Huawei Technologies Ltd.;Huawei Technologies Ltd.;Huawei Technologies Ltd.;Huawei Noah's Ark Lab;University of Sydney", "aff_domain": "usyd.edu.au;huawei.com;huawei.com;huawei.com;huawei.com;huawei.com;sydney.eud.au", "position": "PhD student;Researcher;Researcher;Researcher;Researcher;Principal Researcher;Associate Professor", "bibtex": "@inproceedings{\nzheng2024enhancing,\ntitle={Enhancing Large Language Models through Adaptive Tokenizers},\nauthor={Mengyu Zheng and Hanting Chen and Tianyu Guo and Chong Zhu and Binfan Zheng and Chang Xu and Yunhe Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3H1wqEdK4z}\n}", "github": "", "reviewers": "GBKH;t7me;L8Xj;aWgE", "pdf_size": 403854, "rating": "5;6;7;7", "confidence": "2;4;4;4", "soundness": "2;3;2;3", "novelty": "2;3;3;3", "presentation": "3;2;3;4", "wc_summary": "45;144;29;89", "wc_strengths": "43;159;42;82", "wc_weaknesses": "90;143;344;231", "wc_questions": "40;156;1;105", "wc_limitations": "2;35;8;23", "wc_review": "220;637;424;530", "wc_reply_reviewers": "68;11;12;13", "wc_reply_authors": "351;14;15;15", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 76.75, 44.611517571138506 ], "wc_strengths_avg": [ 81.5, 47.563115961845895 ], "wc_weaknesses_avg": [ 202.0, 96.21590305141869 ], "wc_questions_avg": [ 75.5, 59.5 ], "wc_limitations_avg": [ 17.0, 12.90348790056394 ], "wc_review_avg": [ 452.75, 154.04118767394647 ], "wc_reply_reviewers_avg": [ 26.0, 24.259018941416407 ], "wc_reply_authors_avg": [ 98.75, 145.63717760242403 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Ag5h5Rw5sYMJ:scholar.google.com/&scioq=Enhancing+Large+Language+Models+through+Adaptive+Tokenizers&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": "usyd.edu.au;huawei.com;huawei.com;huawei.com;huawei.com;huawei.com;sydney.eud.au", "author_num": 7, "aff_unique_index": "0;1;1;1;1;1;0", "aff_unique_norm": "University of Sydney;Huawei", "aff_unique_dep": ";Huawei Technologies", "aff_unique_url": "https://www.sydney.edu.au;https://www.huawei.com", "aff_unique_abbr": "USYD;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1;1;0", "aff_country_unique": "Australia;China" }, { "title": "Geometric-Averaged Preference Optimization for Soft Preference Labels", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96758", "id": "3HpCVZV9it", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3HpCVZV9it", "openreview": "https://openreview.net/forum?id=3HpCVZV9it", "poster": "/media/PosterPDFs/NeurIPS%202024/96758.png?t=1733220044.342178", "project": "", "author_site": "Hiroki Furuta, Kuang-Huei Lee, Shixiang (Shane) Gu, Yutaka Matsuo, Aleksandra Faust, Heiga Zen, Izzeddin Gur", "tldr": "", "abstract": "Many algorithms for aligning LLMs with human preferences assume that human preferences are binary and deterministic.\nHowever, human preferences can vary across individuals, and therefore should be represented distributionally.\nIn this work, we introduce the distributional soft preference labels and improve Direct Preference Optimization (DPO) with a weighted geometric average of the LLM output likelihood in the loss function.\nThis approach adjusts the scale of learning loss based on the soft labels such that the loss would approach zero when the responses are closer to equally preferred.\nThis simple modification can be easily applied to any DPO-based methods and mitigate over-optimization and objective mismatch, which prior works suffer from.\nOur experiments simulate the soft preference labels with AI feedback from LLMs and demonstrate that geometric averaging consistently improves performance on standard benchmarks for alignment research. \nIn particular, we observe more preferable responses than binary labels and significant improvements where modestly-confident labels are in the majority.", "keywords": "Reinforcement Learning from Human Feedback;Alignment;Soft Preference Labels;Large Language Models", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Hiroki Furuta;Kuang-Huei Lee;Shixiang Shane Gu;Yutaka Matsuo;Aleksandra Faust;Heiga Zen;Izzeddin Gur", "authorids": "~Hiroki_Furuta1;~Kuang-Huei_Lee1;~Shixiang_Shane_Gu1;~Yutaka_Matsuo1;~Aleksandra_Faust1;~Heiga_Zen1;~Izzeddin_Gur1", "gender": "M;M;M;F;M;;M", "homepage": "https://github.com/frt03;https://kuanghuei.github.io/;http://ymatsuo.com;http://www.afaust.info;https://research.google/people/heigazen;;https://sites.google.com/view/gugurus/home", "dblp": "267/2065;66/11466;m/YMatsuo.html;135/8420;42/7014;188/9027;121/0550", "google_scholar": "M0OhM1UAAAAJ;rE7-N30AAAAJ;Dy8iau4AAAAJ;RK72t68AAAAJ;z3IRvDwAAAAJ;qS_ugJAAAAAJ;B8wslVsAAAAJ", "orcid": ";;;0000-0002-3268-8685;0000-0002-8959-5471;;", "linkedin": ";;;aleksandrafaust;heiga-zen-b1a64b3;;", "or_profile": "~Hiroki_Furuta1;~Kuang-Huei_Lee1;~Yutaka_Matsuo1;~Aleksandra_Faust1;~Heiga_Zen1;~Izzeddin_Gur1;~Shixiang_Gu1", "aff": "Google DeepMind;Google;The University of Tokyo;Google Brain;Google DeepMind;Google;Google DeepMind", "aff_domain": "google.com;google.com;u-tokyo.ac.jp;google.com;deepmind.com;google.com;google.com", "position": "Intern;Researcher;Associate Professor;Principal Researcher;Principal Researcher;Research Scientist;Staff Research Scientist", "bibtex": "@inproceedings{\nfuruta2024geometricaveraged,\ntitle={Geometric-Averaged Preference Optimization for Soft Preference Labels},\nauthor={Hiroki Furuta and Kuang-Huei Lee and Shixiang Shane Gu and Yutaka Matsuo and Aleksandra Faust and Heiga Zen and Izzeddin Gur},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3HpCVZV9it}\n}", "github": "", "reviewers": "EtLq;98AM;msmA;GdY5;SyyB", "pdf_size": 748732, "rating": "5;6;6;6;7", "confidence": "2;4;4;2;4", "soundness": "3;3;3;2;3", "novelty": "3;3;2;3;3", "presentation": "2;3;3;2;3", "wc_summary": "139;228;116;53;127", "wc_strengths": "112;90;16;59;76", "wc_weaknesses": "300;420;112;218;53", "wc_questions": "26;145;69;80;41", "wc_limitations": "3;1;10;8;1", "wc_review": "580;884;323;418;298", "wc_reply_reviewers": "56;596;106;40;48", "wc_reply_authors": "199;1789;123;49;56", "reply_reviewers": "1;3;1;1;1", "reply_authors": "2;5;2;2;2", "rating_avg": [ 6.0, 0.6324555320336759 ], "confidence_avg": [ 3.2, 0.9797958971132712 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 132.6, 56.18754310343174 ], "wc_strengths_avg": [ 70.6, 32.357997465850694 ], "wc_weaknesses_avg": [ 220.6, 131.11460635642393 ], "wc_questions_avg": [ 72.2, 41.17960660326905 ], "wc_limitations_avg": [ 4.6, 3.7202150475476548 ], "wc_review_avg": [ 500.6, 215.73650595112548 ], "wc_reply_reviewers_avg": [ 169.2, 214.63867312299524 ], "wc_reply_authors_avg": [ 443.2, 675.0787805878659 ], "reply_reviewers_avg": [ 1.4, 0.8000000000000002 ], "reply_authors_avg": [ 2.6, 1.2000000000000002 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.6454972243679028, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3278447514196264321&as_sdt=5,40&sciodt=0,40&hl=en", "gs_version_total": 5, "email": "google.com;google.com;u-tokyo.ac.jp;google.com;deepmind.com;google.com;google.com", "author_num": 7, "aff_unique_index": "0;0;1;0;0;0;0", "aff_unique_norm": "Google;University of Tokyo", "aff_unique_dep": "Google DeepMind;", "aff_unique_url": "https://deepmind.com;https://www.u-tokyo.ac.jp", "aff_unique_abbr": "DeepMind;UTokyo", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;2;1;0;1;0", "aff_country_unique": "United Kingdom;United States;Japan" }, { "title": "Adaptive $Q$-Aid for Conditional Supervised Learning in Offline Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96757", "id": "3HpgVs22UJ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3HpgVs22UJ", "openreview": "https://openreview.net/forum?id=3HpgVs22UJ", "poster": "/media/PosterPDFs/NeurIPS%202024/96757.png?t=1733074687.1365297", "project": "", "author_site": "Jeonghye Kim, Suyoung Lee, Woojun Kim, Youngchul Sung", "tldr": "", "abstract": "Offline reinforcement learning (RL) has progressed with return-conditioned supervised learning (RCSL), but its lack of stitching ability remains a limitation. We introduce $Q$-Aided Conditional Supervised Learning (QCS), which effectively combines the stability of RCSL with the stitching capability of $Q$-functions. By analyzing $Q$-function over-generalization, which impairs stable stitching, QCS adaptively integrates $Q$-aid into RCSL's loss function based on trajectory return. Empirical results show that QCS significantly outperforms RCSL and value-based methods, consistently achieving or exceeding the highest trajectory returns across diverse offline RL benchmarks. QCS represents a breakthrough in offline RL, pushing the limits of what can be achieved and fostering further innovations.", "keywords": "offline reinforcement learning;return conditioned supervised learning;Q function;dynamic programming", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/1eb9a5a179495ed48da740680b5a01717608189a.zip", "author": "Jeonghye Kim;Suyoung Lee;Woojun Kim;Youngchul Sung", "authorids": "~Jeonghye_Kim1;~Suyoung_Lee4;~Woojun_Kim1;~Youngchul_Sung1", "gender": ";M;M;M", "homepage": "https://www.beanie00.com/;;https://sites.google.com/view/youngchulsung;https://suyoung-lee.github.io/", "dblp": "172/6718.html;236/4974;17/6798;31/4163", "google_scholar": "koDFScAAAAAJ;https://scholar.google.co.kr/citations?user=bcHWCBoAAAAJ;-9D2k3UAAAAJ;CWbdBy8AAAAJ", "orcid": ";;0000-0003-4536-6690;", "linkedin": "beanie00/;;;", "or_profile": "~Jeonghye_Kim1;~Woojun_Kim1;~Youngchul_Sung1;~Su_Young_Lee1", "aff": "Korea Advanced Institute of Science & Technology;Carnegie Mellon University;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;cmu.edu;kaist.ac.kr;kaist.ac.kr", "position": "MS student;Postdoc;Full Professor;PhD student", "bibtex": "@inproceedings{\nkim2024adaptive,\ntitle={Adaptive \\$Q\\$-Aid for Conditional Supervised Learning in Offline Reinforcement Learning},\nauthor={Jeonghye Kim and Suyoung Lee and Woojun Kim and Youngchul Sung},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3HpgVs22UJ}\n}", "github": "", "reviewers": "WnjH;K7ih;Ndxq;fTMz", "pdf_size": 7035214, "rating": "3;6;7;7", "confidence": "3;2;4;4", "soundness": "2;2;3;3", "novelty": "2;2;3;3", "presentation": "1;2;4;3", "wc_summary": "50;67;55;65", "wc_strengths": "9;25;81;111", "wc_weaknesses": "185;7;120;109", "wc_questions": "20;18;9;120", "wc_limitations": "3;19;3;18", "wc_review": "267;136;268;423", "wc_reply_reviewers": "425;28;0;0", "wc_reply_authors": "1079;452;0;0", "reply_reviewers": "1;2;0;0", "reply_authors": "3;4;1;1", "rating_avg": [ 5.75, 1.6393596310755 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 59.25, 7.013380069552769 ], "wc_strengths_avg": [ 56.5, 41.28861828639946 ], "wc_weaknesses_avg": [ 105.25, 63.72744699107285 ], "wc_questions_avg": [ 41.75, 45.36725140450984 ], "wc_limitations_avg": [ 10.75, 7.75806032459145 ], "wc_review_avg": [ 273.5, 101.64767582192916 ], "wc_reply_reviewers_avg": [ 113.25, 180.35156639186698 ], "wc_reply_authors_avg": [ 382.75, 442.3106233180478 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.25, 1.299038105676658 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.41382044088453257, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8937449886933465360&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "kaist.ac.kr;cmu.edu;kaist.ac.kr;kaist.ac.kr", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Carnegie Mellon University", "aff_unique_dep": ";", "aff_unique_url": "https://www.kaist.ac.kr;https://www.cmu.edu", "aff_unique_abbr": "KAIST;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "South Korea;United States" }, { "title": "Optimal Classification under Performative Distribution Shift", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96756", "id": "3J5hvO5UaW", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3J5hvO5UaW", "openreview": "https://openreview.net/forum?id=3J5hvO5UaW", "poster": "/media/PosterPDFs/NeurIPS%202024/96756.png?t=1734023264.5152972", "project": "", "author_site": "Edwige Cyffers, Muni Sreenivas Pydi, Jamal Atif, Olivier Capp\u00e9", "tldr": "", "abstract": "Performative learning addresses the increasingly pervasive situations in which algorithmic decisions may induce changes in the data distribution as a consequence of their public deployment. We propose a novel view in which these performative effects are modelled as push forward measures. This general framework encompasses existing models and enables novel performative gradient estimation methods, leading to more efficient and scalable learning strategies. For distribution shifts, unlike previous models which require full specification of the data distribution, we only assume knowledge of the shift operator that represents the performative changes. This approach can also be integrated into various change-of-variable-based models, such as VAEs or normalizing flows. Focusing on classification with a linear-in-parameters performative effect, we prove the convexity of the performative risk under a new set of assumptions. Notably, we do not limit the strength of performative effects but rather their direction, requiring only that classification becomes harder when deploying more accurate models. In this case, we also establish a connection with adversarially robust classification by reformulating the performative risk as a min-max variational problem. Finally, we illustrate our approach on synthetic and real datasets.", "keywords": "Performative learning;classification;performative gradient;distribution shift;pushforward;robustness", "primary_area": "optimization", "supplementary_material": "", "author": "Edwige Cyffers;Muni Sreenivas Pydi;Jamal Atif;Olivier Capp\u00e9", "authorids": "~Edwige_Cyffers1;~Muni_Sreenivas_Pydi1;~Jamal_Atif1;~Olivier_Capp\u00e92", "gender": ";M;M;M", "homepage": ";https://munisreenivas.github.io/;http://www.lamsade.dauphine.fr/~atif/doku.php;https://www.di.ens.fr/olivier.cappe/", "dblp": "281/6734;194/2444;http://dblp.uni-trier.de/pers/hd/a/Atif:Jamal;99/1572", "google_scholar": ";BT8j_-oAAAAJ;;erIXTCsAAAAJ", "orcid": ";;;0000-0001-7415-8669", "linkedin": "edwige-cyffers/;;;olivier-cappe-b799aa15a/", "or_profile": "~Edwige_Cyffers1;~Muni_Sreenivas_Pydi1;~Jamal_Atif1;~Olivier_Capp\u00e92", "aff": "INRIA;Universit\u00e9 Paris Dauphine - PSL;Universit\u00e9 Paris-Dauphine;CNRS", "aff_domain": "inria.fr;lamsade.dauphine.fr;dauphine.fr;ens.fr", "position": "PhD student;Postdoc;Full Professor;Researcher", "bibtex": "@inproceedings{\ncyffers2024optimal,\ntitle={Optimal Classification under Performative Distribution Shift},\nauthor={Edwige Cyffers and Muni Sreenivas Pydi and Jamal Atif and Olivier Capp{\\'e}},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3J5hvO5UaW}\n}", "github": "", "reviewers": "uddR;PyL2;eHFF", "pdf_size": 801530, "rating": "5;5;6", "confidence": "4;2;2", "soundness": "2;3;3", "novelty": "2;2;3", "presentation": "3;3;2", "wc_summary": "135;116;152", "wc_strengths": "63;156;303", "wc_weaknesses": "138;137;258", "wc_questions": "30;11;110", "wc_limitations": "2;9;21", "wc_review": "368;429;844", "wc_reply_reviewers": "11;10;68", "wc_reply_authors": "15;32;18", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 2.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 134.33333333333334, 14.704496666741854 ], "wc_strengths_avg": [ 174.0, 98.80283396745257 ], "wc_weaknesses_avg": [ 177.66666666666666, 56.80571176289308 ], "wc_questions_avg": [ 50.333333333333336, 42.897811391983886 ], "wc_limitations_avg": [ 10.666666666666666, 7.84573486395988 ], "wc_review_avg": [ 547.0, 211.4820717381657 ], "wc_reply_reviewers_avg": [ 29.666666666666668, 27.10883414846328 ], "wc_reply_authors_avg": [ 21.666666666666668, 7.408703590297623 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14869792619553634503&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "inria.fr;lamsade.dauphine.fr;dauphine.fr;ens.fr", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "INRIA;Universit\u00e9 Paris Dauphine;Universit\u00e9 Paris-Dauphine;Centre National de la Recherche Scientifique", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.inria.fr;https://www.univ-paris-dauphine.fr;https://www.univ-paris-dauphine.fr;https://www.cnrs.fr", "aff_unique_abbr": "INRIA;UPD;UPD;CNRS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "France" }, { "title": "Alleviating Distortion in Image Generation via Multi-Resolution Diffusion Models and Time-Dependent Layer Normalization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96755", "id": "3JwMwL8i5f", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3JwMwL8i5f", "openreview": "https://openreview.net/forum?id=3JwMwL8i5f", "poster": "/media/PosterPDFs/NeurIPS%202024/96755.png?t=1732321569.3345897", "project": "", "author_site": "Qihao Liu, Zhanpeng Zeng, Ju He, Qihang Yu, Xiaohui Shen, Liang-Chieh Chen", "tldr": "", "abstract": "This paper presents innovative enhancements to diffusion models by integrating a novel multi-resolution network and time-dependent layer normalization.\nDiffusion models have gained prominence for their effectiveness in high-fidelity image generation.\nWhile conventional approaches rely on convolutional U-Net architectures, recent Transformer-based designs have demonstrated superior performance and scalability.\nHowever, Transformer architectures, which tokenize input data (via \"patchification\"), face a trade-off between visual fidelity and computational complexity due to the quadratic nature of self-attention operations concerning token length.\nWhile larger patch sizes enable attention computation efficiency, they struggle to capture fine-grained visual details, leading to image distortions.\nTo address this challenge, we propose augmenting the **Di**ffusion model with the **M**ulti-**R**esolution network (DiMR), a framework that refines features across multiple resolutions, progressively enhancing detail from low to high resolution.\nAdditionally, we introduce Time-Dependent Layer Normalization (TD-LN), a parameter-efficient approach that incorporates time-dependent parameters into layer normalization to inject time information and achieve superior performance.\nOur method's efficacy is demonstrated on the class-conditional ImageNet generation benchmark, where DiMR-XL variants surpass previous diffusion models, achieving FID scores of 1.70 on ImageNet $256 \\times 256$ and 2.89 on ImageNet $512 \\times 512$. Our best variant, DiMR-G, further establishes a state-of-the-art 1.63 FID on ImageNet $256 \\times 256$.", "keywords": "Image Generation", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Qihao Liu;Zhanpeng Zeng;Ju He;Qihang Yu;Xiaohui Shen;Liang-Chieh Chen", "authorids": "~Qihao_Liu1;~Zhanpeng_Zeng1;~Ju_He1;~Qihang_Yu1;~Xiaohui_Shen2;~Liang-Chieh_Chen1", "gender": ";M;M;;;", "homepage": ";;https://tacju.github.io/;;https://xiaohuishen.github.io/;http://liangchiehchen.com/", "dblp": "158/2755;284/9150;;;88/6582;138/2443", "google_scholar": "WFl3hH0AAAAJ;P9ctuRUAAAAJ;NyTPm_zUV_kC;7zZdZxsAAAAJ;pViZYwIAAAAJ;ACjYGPUAAAAJ", "orcid": ";;;;;", "linkedin": ";;ju-he-43b884190/;;;", "or_profile": "~Qihao_Liu1;~Zhanpeng_Zeng1;~Ju_He1;~Qihang_Yu1;~Xiaohui_Shen2;~Liang-Chieh_Chen1", "aff": "Johns Hopkins University;University of Wisconsin, Madison;Johns Hopkins University;ByteDance;ByteDance Inc.;ByteDance / TikTok", "aff_domain": "jh.edu;wisc.edu;jhu.edu;bytedance.com;bytedance.com;bytedance.com", "position": "PhD student;PhD student;PhD student;Researcher;Researcher;Research Scientist", "bibtex": "@inproceedings{\nliu2024alleviating,\ntitle={Alleviating Distortion in Image Generation via Multi-Resolution Diffusion Models and Time-Dependent Layer Normalization},\nauthor={Qihao Liu and Zhanpeng Zeng and Ju He and Qihang Yu and Xiaohui Shen and Liang-Chieh Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3JwMwL8i5f}\n}", "github": "", "reviewers": "RADT;MPTv;1gRs;Wwps", "pdf_size": 13545307, "rating": "5;5;5;7", "confidence": "4;3;3;5", "soundness": "3;2;3;3", "novelty": "2;2;3;3", "presentation": "3;3;3;3", "wc_summary": "52;98;37;56", "wc_strengths": "47;61;25;127", "wc_weaknesses": "119;34;42;389", "wc_questions": "64;74;33;1", "wc_limitations": "11;15;4;43", "wc_review": "293;282;141;616", "wc_reply_reviewers": "12;0;0;40", "wc_reply_authors": "27;0;0;450", "reply_reviewers": "1;0;0;1", "reply_authors": "2;1;1;3", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 60.75, 22.64260364887395 ], "wc_strengths_avg": [ 65.0, 38.02630668366309 ], "wc_weaknesses_avg": [ 146.0, 144.16830442229664 ], "wc_questions_avg": [ 43.0, 28.574464124459098 ], "wc_limitations_avg": [ 18.25, 14.821858857781638 ], "wc_review_avg": [ 333.0, 174.03591583348535 ], "wc_reply_reviewers_avg": [ 13.0, 16.34013463836819 ], "wc_reply_authors_avg": [ 119.25, 191.27646875661418 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8360559202316858202&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": "jh.edu;wisc.edu;jhu.edu;bytedance.com;bytedance.com;bytedance.com", "author_num": 6, "aff_unique_index": "0;1;0;2;2;2", "aff_unique_norm": "Johns Hopkins University;University of Wisconsin;ByteDance", "aff_unique_dep": ";;", "aff_unique_url": "https://www.jhu.edu;https://www.wisc.edu;https://www.bytedance.com", "aff_unique_abbr": "JHU;UW;ByteDance", "aff_campus_unique_index": "1", "aff_campus_unique": ";Madison", "aff_country_unique_index": "0;0;0;1;1;1", "aff_country_unique": "United States;China" }, { "title": "Langevin Unlearning: A New Perspective of Noisy Gradient Descent for Machine Unlearning", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96754", "id": "3LKuC8rbyV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3LKuC8rbyV", "openreview": "https://openreview.net/forum?id=3LKuC8rbyV", "poster": "/media/PosterPDFs/NeurIPS%202024/96754.png?t=1730472258.8567061", "project": "", "author_site": "Eli Chien, Haoyu Wang, Ziang Chen, Pan Li", "tldr": "", "abstract": "Machine unlearning has raised significant interest with the adoption of laws ensuring the ``right to be forgotten''. Researchers have provided a probabilistic notion of approximate unlearning under a similar definition of Differential Privacy (DP), where privacy is defined as statistical indistinguishability to retraining from scratch. We propose Langevin unlearning, an unlearning framework based on noisy gradient descent with privacy guarantees for approximate unlearning problems. Langevin unlearning unifies the DP learning process and the privacy-certified unlearning process with many algorithmic benefits. These include approximate certified unlearning for non-convex problems, complexity saving compared to retraining, sequential and batch unlearning for multiple unlearning requests.", "keywords": "Machine unlearning;privacy;Langevin Monte Carlo;Langevin dynamic;gradient descent;differential privacy", "primary_area": "privacy", "supplementary_material": "/attachment/e5e6c5629ab55c15c098af6e2735abb88af99dc4.zip", "author": "Eli Chien;Haoyu Peter Wang;Ziang Chen;Pan Li", "authorids": "~Eli_Chien1;~Haoyu_Peter_Wang1;~Ziang_Chen1;~Pan_Li2", "gender": "M;M;;M", "homepage": ";https://sites.duke.edu/ziangchen/;;https://sites.google.com/view/eli-chien/home", "dblp": ";;https://dblp.org/pers/hd/l/Li_0005:Pan;222/3243", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;odvrFvIAAAAJ;IroP0EwAAAAJ;N3BuEnYAAAAJ", "orcid": ";0000-0002-8298-5223;;", "linkedin": ";;pan-li-b951105a/;", "or_profile": "~Haoyu_Peter_Wang1;~Ziang_Chen1;~Pan_Li2;~I_Chien2", "aff": "Georgia Institute of Technology;Massachusetts Institute of Technology;Purdue University;Georgia Institute of Technology", "aff_domain": "gatech.edu;mit.edu;purdue.edu;gatech.edu", "position": "PhD student;Instructor;Assistant Professor;Postdoc", "bibtex": "@inproceedings{\nchien2024langevin,\ntitle={Langevin Unlearning: A New Perspective of Noisy Gradient Descent for Machine Unlearning},\nauthor={Eli Chien and Haoyu Peter Wang and Ziang Chen and Pan Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3LKuC8rbyV}\n}", "github": "", "reviewers": "GDGF;tZ8F;1Nom;Akdu", "pdf_size": 1263522, "rating": "5;5;7;8", "confidence": "3;1;4;3", "soundness": "3;3;4;4", "novelty": "3;3;4;3", "presentation": "2;2;4;3", "wc_summary": "72;57;70;143", "wc_strengths": "102;79;71;101", "wc_weaknesses": "168;88;13;167", "wc_questions": "28;20;11;258", "wc_limitations": "18;47;17;6", "wc_review": "388;291;182;675", "wc_reply_reviewers": "0;23;10;16", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 2.75, 1.0897247358851685 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 85.5, 33.69347117766289 ], "wc_strengths_avg": [ 88.25, 13.5531361684298 ], "wc_weaknesses_avg": [ 109.0, 64.23005527009921 ], "wc_questions_avg": [ 79.25, 103.37643590296582 ], "wc_limitations_avg": [ 22.0, 15.182226450688976 ], "wc_review_avg": [ 384.0, 183.13246571812437 ], "wc_reply_reviewers_avg": [ 12.25, 8.437268515343103 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5739640213948524, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13659854103825432694&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "gatech.edu;mit.edu;purdue.edu;gatech.edu", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Georgia Institute of Technology;Massachusetts Institute of Technology;Purdue University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.gatech.edu;https://web.mit.edu;https://www.purdue.edu", "aff_unique_abbr": "Georgia Tech;MIT;Purdue", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "On the Impact of Feature Heterophily on Link Prediction with Graph Neural Networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96753", "id": "3LZHatxUa9", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3LZHatxUa9", "openreview": "https://openreview.net/forum?id=3LZHatxUa9", "poster": "", "project": "", "author_site": "Jiong Zhu, Gaotang Li, Yao-An Yang, Jing Zhu, Xuehao Cui, Danai Koutra", "tldr": "", "abstract": "Heterophily, or the tendency of connected nodes in networks to have different class labels or dissimilar features, has been identified as challenging for many Graph Neural Network (GNN) models. While the challenges of applying GNNs for node classification when class labels display strong heterophily are well understood, it is unclear how heterophily affects GNN performance in other important graph learning tasks where class labels are not available. In this work, we focus on the link prediction task and systematically analyze the impact of heterophily in node features on GNN performance. We first introduce formal definitions of homophilic and heterophilic link prediction tasks, and present a theoretical framework that highlights the different optimizations needed for the respective tasks. We then analyze how different link prediction encoders and decoders adapt to varying levels of feature homophily and introduce designs for improved performance. Based on our definitions, we identify and analyze six real-world benchmarks spanning from homophilic to heterophilic link prediction settings, with graphs containing up to 30M edges. Our empirical analysis on a variety of synthetic and real-world datasets confirms our theoretical insights and highlights the importance of adopting learnable decoders and GNN encoders with ego- and neighbor-embedding separation in message passing for link prediction tasks beyond homophily.", "keywords": "graph neural networks;heterophily;link prediction", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Jiong Zhu;Gaotang Li;Yao-An Yang;Jing Zhu;Xuehao Cui;Danai Koutra", "authorids": "~Jiong_Zhu1;~Gaotang_Li1;~Yao-An_Yang1;~Jing_Zhu4;~Xuehao_Cui1;~Danai_Koutra1", "gender": "M;M;M;F;;F", "homepage": "https://www.jiongzhu.net;https://gaotangli.github.io/;;https://jwzhi.github.io/;;http://web.eecs.umich.edu/~dkoutra/", "dblp": "51/8525;348/5271;;93/4160-5;;91/9987", "google_scholar": "KjGFQ0QAAAAJ;0aVJRykAAAAJ;Yf0p3zQAAAAJ;NouGW-UAAAAJ;;https://scholar.google.com.tw/citations?user=bDrA1-8AAAAJ", "orcid": "0000-0002-6145-3295;0009-0004-3294-1347;;0000-0002-5364-151X;;0000-0002-3206-8179", "linkedin": ";;;;;", "or_profile": "~Jiong_Zhu1;~Gaotang_Li1;~Yao-An_Yang1;~Jing_Zhu4;~Xuehao_Cui1;~Danai_Koutra1", "aff": "University of Michigan;University of Michigan - Ann Arbor;University of Michigan - Ann Arbor;University of Michigan - Ann Arbor;;Amazon", "aff_domain": "umich.edu;umich.edu;umich.edu;umich.edu;;amazon.com", "position": "PhD student;Undergrad student;Undergrad student;PhD student;;Scholar", "bibtex": "@inproceedings{\nzhu2024on,\ntitle={On the Impact of Feature Heterophily on Link Prediction with Graph Neural Networks},\nauthor={Jiong Zhu and Gaotang Li and Yao-An Yang and Jing Zhu and Xuehao Cui and Danai Koutra},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3LZHatxUa9}\n}", "github": "", "reviewers": "ufZT;JA9K;kFpa;YTzD;dskj", "pdf_size": 1026734, "rating": "4;4;6;7;7", "confidence": "5;4;4;4;3", "soundness": "3;2;3;3;3", "novelty": "2;2;3;3;4", "presentation": "4;4;3;3;3", "wc_summary": "88;59;69;67;35", "wc_strengths": "59;27;8;62;74", "wc_weaknesses": "180;71;7;74;61", "wc_questions": "93;33;31;166;63", "wc_limitations": "3;12;6;17;16", "wc_review": "423;202;121;386;249", "wc_reply_reviewers": "242;113;10;9;0", "wc_reply_authors": "1045;179;0;0;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "5;2;1;1;1", "rating_avg": [ 5.6, 1.3564659966250536 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 63.6, 17.17672844286711 ], "wc_strengths_avg": [ 46.0, 24.551985663078252 ], "wc_weaknesses_avg": [ 78.6, 56.2088960930563 ], "wc_questions_avg": [ 77.2, 49.849373115416405 ], "wc_limitations_avg": [ 10.8, 5.491812087098393 ], "wc_review_avg": [ 276.2, 113.08297838313244 ], "wc_reply_reviewers_avg": [ 74.8, 93.315379225506 ], "wc_reply_authors_avg": [ 244.8, 406.0617687002804 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 2.0, 1.5491933384829668 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.6993786061802353, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16855450794177484850&as_sdt=20000005&sciodt=0,21&hl=en", "gs_version_total": 3, "email": "umich.edu;umich.edu;umich.edu;umich.edu;;amazon.com", "author_num": 6, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "University of Michigan;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.umich.edu;https://www.amazon.com", "aff_unique_abbr": "UM;Amazon", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Ann Arbor", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "FairQueue: Rethinking Prompt Learning for Fair Text-to-Image Generation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96752", "id": "3MW44iNdrD", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3MW44iNdrD", "openreview": "https://openreview.net/forum?id=3MW44iNdrD", "poster": "/media/PosterPDFs/NeurIPS%202024/96752.png?t=1729604512.9757447", "project": "", "author_site": "Christopher Teo, Milad Abdollahzadeh, Xinda Ma, Ngai-Man (Man) Cheung", "tldr": "", "abstract": "Recently, prompt learning has emerged as the state-of-the-art (SOTA) for fair text-to-image (T2I) generation. Specifically, this approach leverages readily available reference images to learn inclusive prompts for each target Sensitive Attribute (tSA), allowing for fair image generation. In this work, we first reveal that this prompt learning-based approach results in degraded sample quality. Our analysis shows that the approach's training objective--which aims to align the embedding differences of learned prompts and reference images-- could be sub-optimal, resulting in distortion of the learned prompts and degraded generated images. To further substantiate this claim, **as our major contribution**, we deep dive into the denoising subnetwork of the T2I model to track down the effect of these learned prompts by analyzing the cross-attention maps. In our analysis, we propose a novel prompt switching analysis: I2H and H2I. Furthermore, we propose new quantitative characterization of cross-attention maps. Our analysis reveals abnormalities in the early denoising steps, perpetuating improper global structure that results in degradation in the generated samples. Building on insights from our analysis, we propose two ideas: (i) *Prompt Queuing* and (ii) *Attention Amplification* to address the quality issue. Extensive experimental results on a wide range of tSAs show that our proposed method outperforms SOTA approach's image generation quality, while achieving competitive fairness. More resources at FairQueue Project site: https://sutd-visual-computing-group.github.io/FairQueue", "keywords": "fairness;generative modelling;text-to-image models;bias mitigation;prompt learning", "primary_area": "fairness", "supplementary_material": "", "author": "Christopher T.H Teo;Milad Abdollahzadeh;Xinda Ma;Ngai-man Cheung", "authorids": "~Christopher_T.H_Teo1;~Milad_Abdollahzadeh1;~Xinda_Ma1;~Ngai-man_Cheung1", "gender": "M;M;;M", "homepage": ";;;https://sites.google.com/site/mancheung0407/", "dblp": ";211/7797;;82/3605", "google_scholar": "JhyGETcAAAAJ;SYDsMNAAAAAJ;;https://scholar.google.com.sg/citations?hl=en", "orcid": ";0000-0003-4011-4670;;0000-0003-0135-3791", "linkedin": "tthchristopher/;milad-abdollahzadeh-b0764361/;xinda-ma-42493b302/;", "or_profile": "~Christopher_T.H_Teo1;~Milad_Abdollahzadeh1;~Xinda_Ma1;~Ngai-man_Cheung1", "aff": "Singapore University of Technology and Design;Singapore University of Technology and Design;Nanyang Technological University;Singapore University of Technology and Design", "aff_domain": "sutd.edu.sg;sutd.edu.sg;ntu.edu.sg;sutd.edu.sg", "position": "PhD student;Postdoc;PhD student;Associate Professor", "bibtex": "@inproceedings{\nteo2024fairqueue,\ntitle={FairQueue: Rethinking Prompt Learning for Fair Text-to-Image Generation},\nauthor={Christopher T.H Teo and Milad Abdollahzadeh and Xinda Ma and Ngai-man Cheung},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3MW44iNdrD}\n}", "github": "", "reviewers": "9bUQ;8Wzu;89En;iQtH", "pdf_size": 50020406, "rating": "5;5;5;7", "confidence": "5;1;3;3", "soundness": "3;3;2;3", "novelty": "2;3;2;3", "presentation": "3;1;4;3", "wc_summary": "158;79;71;23", "wc_strengths": "349;23;37;28", "wc_weaknesses": "249;82;114;11", "wc_questions": "192;57;47;42", "wc_limitations": "316;9;13;20", "wc_review": "1264;250;282;124", "wc_reply_reviewers": "63;23;323;0", "wc_reply_authors": "885;48;856;0", "reply_reviewers": "1;1;2;0", "reply_authors": "3;2;3;1", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.0, 1.4142135623730951 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 82.75, 48.43745967740257 ], "wc_strengths_avg": [ 109.25, 138.51060428718085 ], "wc_weaknesses_avg": [ 114.0, 86.3973379219522 ], "wc_questions_avg": [ 84.5, 62.29967897188556 ], "wc_limitations_avg": [ 89.5, 130.829086979922 ], "wc_review_avg": [ 480.0, 456.480010515247 ], "wc_reply_reviewers_avg": [ 102.25, 129.42831027252114 ], "wc_reply_authors_avg": [ 447.25, 423.7141577762065 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13207692892150612985&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "sutd.edu.sg;sutd.edu.sg;ntu.edu.sg;sutd.edu.sg", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Singapore University of Technology and Design;Nanyang Technological University", "aff_unique_dep": ";", "aff_unique_url": "https://www.sutd.edu.sg;https://www.ntu.edu.sg", "aff_unique_abbr": "SUTD;NTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Singapore" }, { "title": "B-ary Tree Push-Pull Method is Provably Efficient for Distributed Learning on Heterogeneous Data", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96751", "id": "3MnXAcTBD3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3MnXAcTBD3", "openreview": "https://openreview.net/forum?id=3MnXAcTBD3", "poster": "", "project": "", "author_site": "Runze You, Shi Pu", "tldr": "", "abstract": "This paper considers the distributed learning problem where a group of agents cooperatively minimizes the summation of their local cost functions based on peer-to-peer communication. Particularly, we propose a highly efficient algorithm, termed ``B-ary Tree Push-Pull'' (BTPP), that employs two B-ary spanning trees for distributing the information related to the parameters and stochastic gradients across the network. The simple method is efficient in communication since each agent interacts with at most $(B+1)$ neighbors per iteration. More importantly, BTPP achieves linear speedup for smooth nonconvex objective functions with only $\\tilde{O}(n)$ transient iterations, significantly outperforming the state-of-the-art results to the best of our knowledge.", "keywords": "Distributed Learning;Distributed Optimization;Stochastic Optimization", "primary_area": "optimization", "supplementary_material": "", "author": "Runze You;Shi Pu", "authorids": "~Runze_You1;~Shi_Pu2", "gender": "M;", "homepage": ";https://pu-shi.github.io/", "dblp": ";", "google_scholar": ";", "orcid": "0009-0004-7856-6001;", "linkedin": ";", "or_profile": "~Runze_You1;~Shi_Pu2", "aff": "The Chinese University of Hong Kong, Shenzhen;The Chinese University of Hong Kong, Shenzhen", "aff_domain": "cuhk.edu.cn;cuhk.edu.cn", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nyou2024bary,\ntitle={B-ary Tree Push-Pull Method is Provably Efficient for Distributed Learning on Heterogeneous Data},\nauthor={Runze You and Shi Pu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3MnXAcTBD3}\n}", "github": "", "reviewers": "JWsy;KerT;rZsK;C42p", "pdf_size": 1719175, "rating": "4;5;6;7", "confidence": "4;3;4;3", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;2;3", "wc_summary": "86;77;90;215", "wc_strengths": "19;60;28;120", "wc_weaknesses": "109;124;128;161", "wc_questions": "61;2;83;171", "wc_limitations": "69;2;5;52", "wc_review": "344;265;334;719", "wc_reply_reviewers": "0;15;12;23", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 117.0, 56.77587515838043 ], "wc_strengths_avg": [ 56.75, 39.568769250508666 ], "wc_weaknesses_avg": [ 130.5, 18.980252896102307 ], "wc_questions_avg": [ 79.25, 60.6892700565759 ], "wc_limitations_avg": [ 32.0, 29.146183283579344 ], "wc_review_avg": [ 415.5, 177.84614136944327 ], "wc_reply_reviewers_avg": [ 12.5, 8.261355820929152 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.4472135954999579, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:WI8H7wfgD6oJ:scholar.google.com/&scioq=B-ary+Tree+Push-Pull+Method+is+Provably+Efficient+for+Distributed+Learning+on+Heterogeneous+Data&hl=en&as_sdt=0,33", "gs_version_total": 2, "email": "cuhk.edu.cn;cuhk.edu.cn", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Chinese University of Hong Kong", "aff_unique_dep": "", "aff_unique_url": "https://www.cuhk.edu.cn", "aff_unique_abbr": "CUHK", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Shenzhen", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "OpenGaussian: Towards Point-Level 3D Gaussian-based Open Vocabulary Understanding", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96750", "id": "3NAEowLh7Q", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3NAEowLh7Q", "openreview": "https://openreview.net/forum?id=3NAEowLh7Q", "poster": "/media/PosterPDFs/NeurIPS%202024/96750.png?t=1733405483.8951852", "project": "", "author_site": "Yanmin Wu, Jiarui Meng, Haijie LI, Chenming Wu, Yahao Shi, Xinhua Cheng, Chen Zhao, Haocheng Feng, Errui Ding, Jingdong Wang, Jian Zhang", "tldr": "", "abstract": "This paper introduces OpenGaussian, a method based on 3D Gaussian Splatting (3DGS) that possesses the capability for 3D point-level open vocabulary understanding. Our primary motivation stems from observing that existing 3DGS-based open vocabulary methods mainly focus on 2D pixel-level parsing. These methods struggle with 3D point-level tasks due to weak feature expressiveness and inaccurate 2D-3D feature associations. To ensure robust feature presentation and 3D point-level understanding, we first employ SAM masks without cross-frame associations to train instance features with 3D consistency. These features exhibit both intra-object consistency and inter-object distinction. Then, we propose a two-stage codebook to discretize these features from coarse to fine levels. At the coarse level, we consider the positional information of 3D points to achieve location-based clustering, which is then refined at the fine level.\nFinally, we introduce an instance-level 3D-2D feature association method that links 3D points to 2D masks, which are further associated with 2D CLIP features. Extensive experiments, including open vocabulary-based 3D object selection, 3D point cloud understanding, click-based 3D object selection, and ablation studies, demonstrate the effectiveness of our proposed method. The source code is available at our project page https://3d-aigc.github.io/OpenGaussian.", "keywords": "3D Understanding;3D Gaussian Splatting;open-vocabulary", "primary_area": "robotics", "supplementary_material": "", "author": "Yanmin Wu;Jiarui Meng;Haijie LI;Chenming Wu;Yahao Shi;Xinhua Cheng;Chen Zhao;Haocheng Feng;Errui Ding;Jingdong Wang;Jian Zhang", "authorids": "~Yanmin_Wu1;~Jiarui_Meng1;~Haijie_LI2;~Chenming_Wu1;~Yahao_Shi1;~Xinhua_Cheng1;~Chen_Zhao9;~Haocheng_Feng1;~Errui_Ding2;~Jingdong_Wang1;~Jian_Zhang22", "gender": "M;M;;M;M;M;;;M;M;M", "homepage": "https://yanmin-wu.github.io/;https://github.com/JrMeng0312;https://villa.jianzhang.tech/people/haijie-li-%E6%9D%8E%E6%B5%B7%E6%9D%B0/;https://chenming-wu.github.io/;;https://cxh0519.github.io/;;;;https://jingdongwang2017.github.io/;http://jianzhang.tech/", "dblp": "151/5893;374/6336;;190/5879;237/9801;260/2943;;;180/5531;49/3441;07/314-18", "google_scholar": "11sQNWwAAAAJ;;;https://scholar.google.com.hk/citations?user=eOkkQWUAAAAJ;;NI4c3kcAAAAJ;;;1wzEtxcAAAAJ;z5SPCmgAAAAJ;7brFI_4AAAAJ", "orcid": "0000-0002-3926-4500;;;0000-0001-8012-1547;;;;;;0000-0002-4888-4445;0000-0001-5486-3125", "linkedin": ";;;;;;;;;;", "or_profile": "~Yanmin_Wu1;~Jiarui_Meng1;~Haijie_LI2;~Chenming_Wu1;~Yahao_Shi1;~Xinhua_Cheng1;~Chen_Zhao9;~Haocheng_Feng1;~Errui_Ding2;~Jingdong_Wang1;~Jian_Zhang22", "aff": "Peking University;Peking University;Peking University;Baidu;Beihang University;Peking University;;;Baidu;Baidu;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;pku.edu.cn;baidu.com;buaa.edu.cn;pku.edu.cn;;;baidu.com;baidu.com;pku.edu.cn", "position": "PhD student;MS student;MS student;Researcher;PhD student;PhD student;;;Director;Chief Scientist for Computer Vision;Assistant Professor", "bibtex": "@inproceedings{\nwu2024opengaussian,\ntitle={OpenGaussian: Towards Point-Level 3D Gaussian-based Open Vocabulary Understanding},\nauthor={Yanmin Wu and Jiarui Meng and Haijie LI and Chenming Wu and Yahao Shi and Xinhua Cheng and Chen Zhao and Haocheng Feng and Errui Ding and Jingdong Wang and Jian Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3NAEowLh7Q}\n}", "github": "", "reviewers": "d9so;sSBZ;ANGY", "pdf_size": 22285987, "rating": "6;6;6", "confidence": "3;3;5", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;2;3", "wc_summary": "70;89;93", "wc_strengths": "46;31;101", "wc_weaknesses": "69;138;170", "wc_questions": "2;8;79", "wc_limitations": "1;6;20", "wc_review": "188;272;463", "wc_reply_reviewers": "17;62;106", "wc_reply_authors": "17;104;289", "reply_reviewers": "1;1;2", "reply_authors": "2;3;4", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 84.0, 10.03327796219494 ], "wc_strengths_avg": [ 59.333333333333336, 30.09245014211298 ], "wc_weaknesses_avg": [ 125.66666666666667, 42.14525147892966 ], "wc_questions_avg": [ 29.666666666666668, 34.96982826507572 ], "wc_limitations_avg": [ 9.0, 8.04155872120988 ], "wc_review_avg": [ 307.6666666666667, 115.06616454119873 ], "wc_reply_reviewers_avg": [ 61.666666666666664, 36.33486235314814 ], "wc_reply_authors_avg": [ 136.66666666666666, 113.42055467252055 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 3.0, 0.816496580927726 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1397456284984239036&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "pku.edu.cn;pku.edu.cn;pku.edu.cn;baidu.com;buaa.edu.cn;pku.edu.cn;;;baidu.com;baidu.com;pku.edu.cn", "author_num": 11, "aff_unique_index": "0;0;0;1;2;0;1;1;0", "aff_unique_norm": "Peking University;Baidu;Beihang University", "aff_unique_dep": ";Baidu, Inc.;", "aff_unique_url": "http://www.pku.edu.cn;https://www.baidu.com;http://www.buaa.edu.cn/", "aff_unique_abbr": "Peking U;Baidu;BUAA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Training Data Attribution via Approximate Unrolling", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96749", "id": "3NaqGg92KZ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3NaqGg92KZ", "openreview": "https://openreview.net/forum?id=3NaqGg92KZ", "poster": "", "project": "", "author_site": "Juhan Bae, Wu Lin, Jonathan Lorraine, Roger Grosse", "tldr": "", "abstract": "Many training data attribution (TDA) methods aim to estimate how a model's behavior would change if one or more data points were removed from the training set. Methods based on implicit differentiation, such as influence functions, can be made computationally efficient, but fail to account for underspecification, the implicit bias of the optimization algorithm, or multi-stage training pipelines. By contrast, methods based on unrolling address these issues but face scalability challenges. In this work, we connect the implicit-differentiation-based and unrolling-based approaches and combine their benefits by introducing Source, an approximate unrolling-based TDA method that is computed using an influence-function-like formula. While being computationally efficient compared to unrolling-based approaches, Source is suitable in cases where implicit-differentiation-based approaches struggle, such as in non-converged models and multi-stage training pipelines. Empirically, Source outperforms existing TDA techniques in counterfactual prediction, especially in settings where implicit-differentiation-based approaches fall short.", "keywords": "training data attribution;influence functions", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Juhan Bae;Wu Lin;Jonathan Lorraine;Roger Baker Grosse", "authorids": "~Juhan_Bae2;~Wu_Lin2;~Jonathan_Lorraine1;~Roger_Baker_Grosse1", "gender": "M;M;M;M", "homepage": "http://www.juhanbae.com/;http://www.cs.toronto.edu/~rgrosse/;https://www.jonlorraine.com/;https://yorkerlin.github.io/", "dblp": "158/9492;26/7058;215/5229;70/10338", "google_scholar": "https://scholar.google.ca/citations?user=9RFr4usAAAAJ;xgQd1qgAAAAJ;Hzf8bu0AAAAJ;https://scholar.google.ca/citations?user=sGl6muoAAAAJ", "orcid": ";;0000-0002-1255-6554;", "linkedin": ";;jonlorraine/;", "or_profile": "~Juhan_Bae2;~Roger_Baker_Grosse1;~Jonathan_Peter_Lorraine1;~Wu_Lin1", "aff": "University of Toronto;Vector Institute;Vector Institute;Vector Institute", "aff_domain": "cs.toronto.edu;vectorinstitute.ai;vectorinstitute.ai;vectorinstitute.ai", "position": "PhD student;Faculty Member;Researcher;Postdoc", "bibtex": "@inproceedings{\nbae2024training,\ntitle={Training Data Attribution via Approximate Unrolling},\nauthor={Juhan Bae and Wu Lin and Jonathan Lorraine and Roger Baker Grosse},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3NaqGg92KZ}\n}", "github": "", "reviewers": "hk4U;3Y11;9an4;5ybi", "pdf_size": 2207668, "rating": "3;5;7;8", "confidence": "4;4;4;3", "soundness": "3;3;3;4", "novelty": "2;2;3;4", "presentation": "4;3;3;4", "wc_summary": "87;68;258;165", "wc_strengths": "40;107;104;63", "wc_weaknesses": "4;128;178;63", "wc_questions": "109;30;290;76", "wc_limitations": "140;11;1;7", "wc_review": "380;344;831;374", "wc_reply_reviewers": "56;11;15;21", "wc_reply_authors": "901;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "3;1;1;1", "rating_avg": [ 5.75, 1.920286436967152 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 144.5, 74.93497180889574 ], "wc_strengths_avg": [ 78.5, 28.217902119044926 ], "wc_weaknesses_avg": [ 93.25, 65.70911276223413 ], "wc_questions_avg": [ 126.25, 98.6163652747352 ], "wc_limitations_avg": [ 39.75, 57.98868424097929 ], "wc_review_avg": [ 482.25, 201.81225805188345 ], "wc_reply_reviewers_avg": [ 25.75, 17.82379028153103 ], "wc_reply_authors_avg": [ 225.25, 390.1444444048896 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.676481425202546, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8587789690359918934&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "cs.toronto.edu;vectorinstitute.ai;vectorinstitute.ai;vectorinstitute.ai", "author_num": 4, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "University of Toronto;Vector Institute", "aff_unique_dep": ";", "aff_unique_url": "https://www.utoronto.ca;https://vectorinstitute.ai/", "aff_unique_abbr": "U of T;Vector Institute", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Canada" }, { "title": "Tiny Time Mixers (TTMs): Fast Pre-trained Models for Enhanced Zero/Few-Shot Forecasting of Multivariate Time Series", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96748", "id": "3O5YCEWETq", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3O5YCEWETq", "openreview": "https://openreview.net/forum?id=3O5YCEWETq", "poster": "", "project": "", "author_site": "Vijay Ekambaram, Arindam Jati, Pankaj Dayama, Sumanta Mukherjee, Nam Nguyen, Wesley M Gifford, Chandra Reddy, Jayant Kalagnanam", "tldr": "", "abstract": "Large pre-trained models excel in zero/few-shot learning for language and vision tasks but face challenges in multivariate time series (TS) forecasting due to diverse data characteristics. Consequently, recent research efforts have focused on developing pre-trained TS forecasting models. These models, whether built from scratch or adapted from large language models (LLMs), excel in zero/few-shot forecasting tasks. However, they are limited by slow performance, high computational demands, and neglect of cross-channel and exogenous correlations. To address this, we introduce Tiny Time Mixers (TTM), a compact model (starting from 1M parameters) with effective transfer learning capabilities, trained exclusively on public TS datasets. TTM, based on the light-weight TSMixer architecture, incorporates innovations like adaptive patching, diverse resolution sampling, and resolution prefix tuning to handle pre-training on varied dataset resolutions with minimal model capacity. Additionally, it employs multi-level modeling to capture channel correlations and infuse exogenous signals during fine-tuning. TTM outperforms existing popular benchmarks in zero/few-shot forecasting by (4-40\\%), while reducing computational requirements significantly. Moreover, TTMs are lightweight and can be executed even on CPU-only machines, enhancing usability and fostering wider adoption in resource-constrained environments. The model weights for reproducibility and research use are available at https://huggingface.co/ibm/ttm-research-r2/, while enterprise-use weights under the Apache license can be accessed as follows: the initial TTM-Q variant at https://huggingface.co/ibm-granite/granite-timeseries-ttm-r1, and the latest variants (TTM-B, TTM-E, TTM-A) weights are available at https://huggingface.co/ibm-granite/granite-timeseries-ttm-r2. The source code for the TTM model along with the usage scripts are available at https://github.com/ibm-granite/granite-tsfm/tree/main/tsfm_public/models/tinytimemixer", "keywords": "time series;foundation models;pretrained models;forecasting;time-series;time;tsfm;light-weight;forecasters", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Vijay Ekambaram;Arindam Jati;Pankaj Dayama;Sumanta Mukherjee;Nam H Nguyen;Wesley M. Gifford;Chandra Reddy;Jayant Kalagnanam", "authorids": "~Vijay_Ekambaram1;~Arindam_Jati1;~Pankaj_Dayama1;~Sumanta_Mukherjee1;~Nam_H_Nguyen1;~Wesley_M._Gifford1;~Chandra_Reddy1;~Jayant_Kalagnanam1", "gender": "M;M;;M;M;;M;M", "homepage": ";https://ajati.github.io/;;;;;;https://researcher.watson.ibm.com/researcher/view.php?person=us-jayant", "dblp": "135/1082.html;117/4725.html;63/10955;;76/2975;59/2423;57/937;05/4351", "google_scholar": "xBgnlFMAAAAJ;https://scholar.google.com/citations?hl=en;dDpul0QAAAAJ;https://scholar.google.co.in/citations?user=gFs3vrMAAAAJ;zzBcUpEAAAAJ;;;dlytHK4AAAAJ", "orcid": ";;;0000-0003-2042-9966;;;;", "linkedin": ";;;;;wesley-gifford-68121b10a/;;jay-ant-kalagnanam-4b74913/", "or_profile": "~Vijay_Ekambaram1;~Arindam_Jati1;~Pankaj_Dayama1;~Sumanta_Mukherjee1;~Nam_H_Nguyen1;~Wesley_M._Gifford1;~Chandra_Reddy1;~Jayant_Kalagnanam1", "aff": "International Business Machines;International Business Machines;International Business Machines;International Business Machines;International Business Machines;IBM Research;IBM Research, International Business Machines;IBM TJ Watson Research Center", "aff_domain": "ibm.com;ibm.com;ibm.com;ibm.com;ibm.com;ibm.com;us.ibm.com;researcher.watson.ibm.com", "position": "Researcher;Researcher;Researcher;Researcher;Senior Research Scientist;Researcher;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nekambaram2024tiny,\ntitle={Tiny Time Mixers ({TTM}s): Fast Pre-trained Models for Enhanced Zero/Few-Shot Forecasting of Multivariate Time Series},\nauthor={Vijay Ekambaram and Arindam Jati and Pankaj Dayama and Sumanta Mukherjee and Nam H Nguyen and Wesley M. Gifford and Chandra Reddy and Jayant Kalagnanam},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3O5YCEWETq}\n}", "github": "", "reviewers": "TN2b;qDvN;LiJn;Csq6", "pdf_size": 1470482, "rating": "6;6;7;8", "confidence": "4;3;4;4", "soundness": "3;3;2;4", "novelty": "3;3;3;4", "presentation": "3;3;2;3", "wc_summary": "11;91;76;74", "wc_strengths": "10;148;51;94", "wc_weaknesses": "75;155;76;90", "wc_questions": "3;2;315;119", "wc_limitations": "2;2;7;14", "wc_review": "101;398;525;391", "wc_reply_reviewers": "14;14;74;22", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 63.0, 30.732718721258617 ], "wc_strengths_avg": [ 75.75, 51.20729928437937 ], "wc_weaknesses_avg": [ 99.0, 32.8709598277872 ], "wc_questions_avg": [ 109.75, 127.68980969521412 ], "wc_limitations_avg": [ 6.25, 4.9180788932265 ], "wc_review_avg": [ 353.75, 155.3663010436948 ], "wc_reply_reviewers_avg": [ 31.0, 25.03996805109783 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=173292547788654862&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "email": "ibm.com;ibm.com;ibm.com;ibm.com;ibm.com;ibm.com;us.ibm.com;researcher.watson.ibm.com", "author_num": 8, "aff_unique_index": "0;0;0;0;0;1;2;1", "aff_unique_norm": "International Business Machines Corporation;IBM;International Business Machines", "aff_unique_dep": ";IBM Research;IBM Research", "aff_unique_url": "https://www.ibm.com;https://www.ibm.com/research;https://www.ibm.com/research", "aff_unique_abbr": "IBM;IBM;IBM", "aff_campus_unique_index": "1", "aff_campus_unique": ";TJ Watson", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Stylus: Automatic Adapter Selection for Diffusion Models", "status": "Oral", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96747", "id": "3Odq2tGSpp", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3Odq2tGSpp", "openreview": "https://openreview.net/forum?id=3Odq2tGSpp", "poster": "/media/PosterPDFs/NeurIPS%202024/96747.png?t=1731498036.9721751", "project": "", "author_site": "Michael Luo, Justin Wong, Brandon Trabucco, Yanping Huang, Joseph Gonzalez, zhifeng Chen, Ruslan Salakhutdinov, Ion Stoica", "tldr": "", "abstract": "Beyond scaling base models with more data or parameters, fine-tuned adapters provide an alternative way to generate high fidelity, custom images at reduced costs. As such, adapters have been widely adopted by open-source communities, accumulating a database of over 100K adapters\u2014most of which are highly customized with insufficient descriptions. To generate high quality images, this paper explores the problem of matching the prompt to a Stylus of relevant adapters, built on recent work that highlight the performance gains of composing adapters. We introduce Stylus, which efficiently selects and automatically composes task-specific adapters based on a prompt's keywords. Stylus outlines a three-stage approach that first summarizes adapters with improved descriptions and embeddings, retrieves relevant adapters, and then further assembles adapters based on prompts' keywords by checking how well they fit the prompt. To evaluate Stylus, we developed StylusDocs, a curated dataset featuring 75K adapters with pre-computed adapter embeddings. In our evaluation on popular Stable Diffusion checkpoints, Stylus achieves greater CLIP/FID Pareto efficiency and is twice as preferred, with humans and multimodal models as evaluators, over the base model.", "keywords": "Stable Diffusion;Diffusion-based Models;Computer Vision;Artificial Intelligence;RAG;Retrieval;Adapters;LoRA", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Michael Luo;Justin Wong;Brandon Trabucco;Yanping Huang;Joseph E. Gonzalez;Zhifeng Chen;Russ Salakhutdinov;Ion Stoica", "authorids": "~Michael_Luo2;~Justin_Wong1;~Brandon_Trabucco1;~Yanping_Huang1;~Joseph_E._Gonzalez1;~Zhifeng_Chen1;~Russ_Salakhutdinov1;~Ion_Stoica1", "gender": "M;M;M;M;M;M;M;M", "homepage": ";https://people.eecs.berkeley.edu/~wong.justin/;http://btrabucco.com;;http://eecs.berkeley.edu/~jegonzal;;https://www.cs.cmu.edu/~rsalakhu/;http://people.eecs.berkeley.edu/~istoica/", "dblp": "152/0092;;;00/10104;61/8262;61/5154;;s/IonStoica", "google_scholar": "XpO6-kEAAAAJ;l49M8zUAAAAJ;aLquhd4AAAAJ;uEtBQScAAAAJ;https://scholar.google.com.tw/citations?user=gM2WW9UAAAAJ;;;vN-is70AAAAJ", "orcid": ";;;;0000-0003-2921-956X;;;", "linkedin": ";justin-wong-23155411b;;;;;;ionstoica", "or_profile": "~Michael_Luo2;~Justin_Wong1;~Brandon_Trabucco1;~Yanping_Huang1;~Joseph_E._Gonzalez1;~Zhifeng_Chen1;~Russ_Salakhutdinov1;~Ion_Stoica1", "aff": "University of California, Berkeley;Meta Facebook;Carnegie Mellon University;Google;University of California, Berkeley;Google;School of Computer Science, Carnegie Mellon University;University of California, Berkeley", "aff_domain": "berkeley.edu;meta.com;mld.cs.cmu.edu;google.com;berkeley.edu;google.com;cs.cmu.edu;berkeley.edu", "position": "PhD student;Researcher;PhD student;Engineer;Associate Professor;Engineer;Full Professor;Full Professor", "bibtex": "@inproceedings{\nluo2024stylus,\ntitle={Stylus: Automatic Adapter Selection for Diffusion Models},\nauthor={Michael Luo and Justin Wong and Brandon Trabucco and Yanping Huang and Joseph E. Gonzalez and Zhifeng Chen and Russ Salakhutdinov and Ion Stoica},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3Odq2tGSpp}\n}", "github": "", "reviewers": "wGTi;rCTt;QQJ7;N5AD", "pdf_size": 29817167, "rating": "7;7;7;9", "confidence": "4;3;4;4", "soundness": "3;3;3;4", "novelty": "3;3;3;4", "presentation": "2;3;3;4", "wc_summary": "78;61;46;96", "wc_strengths": "102;48;73;36", "wc_weaknesses": "169;39;74;26", "wc_questions": "22;8;38;1", "wc_limitations": "11;12;1;4", "wc_review": "382;168;232;163", "wc_reply_reviewers": "33;17;21;0", "wc_reply_authors": "23;9;19;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 70.25, 18.686559340873856 ], "wc_strengths_avg": [ 64.75, 25.31180554602931 ], "wc_weaknesses_avg": [ 77.0, 55.94193418179246 ], "wc_questions_avg": [ 17.25, 14.16642156650719 ], "wc_limitations_avg": [ 7.0, 4.636809247747852 ], "wc_review_avg": [ 236.25, 88.4374779151916 ], "wc_reply_reviewers_avg": [ 17.75, 11.818946653572814 ], "wc_reply_authors_avg": [ 12.75, 8.954747344286158 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10865136697627135645&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 5, "email": "berkeley.edu;meta.com;mld.cs.cmu.edu;google.com;berkeley.edu;google.com;cs.cmu.edu;berkeley.edu", "author_num": 8, "aff_unique_index": "0;1;2;3;0;3;2;0", "aff_unique_norm": "University of California, Berkeley;Meta;Carnegie Mellon University;Google", "aff_unique_dep": ";Meta Platforms, Inc.;;Google", "aff_unique_url": "https://www.berkeley.edu;https://meta.com;https://www.cmu.edu;https://www.google.com", "aff_unique_abbr": "UC Berkeley;Meta;CMU;Google", "aff_campus_unique_index": "0;2;0;2;3;0", "aff_campus_unique": "Berkeley;;Mountain View;Pittsburgh", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Flexible Context-Driven Sensory Processing in Dynamical Vision Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96746", "id": "3PqhU96Vvv", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3PqhU96Vvv", "openreview": "https://openreview.net/forum?id=3PqhU96Vvv", "poster": "", "project": "", "author_site": "Lakshmi Narasimhan Govindarajan, Abhiram Iyer, Valmiki Kothare, Ila Fiete", "tldr": "", "abstract": "Visual representations become progressively more abstract along the cortical hierarchy. These abstract representations define notions like objects and shapes, but at the cost of spatial specificity. By contrast, low-level regions represent spatially local but simple input features. How do spatially non-specific representations of abstract concepts in high-level areas flexibly modulate the low-level sensory representations in appropriate ways to guide context-driven and goal-directed behaviors across a range of tasks? We build a biologically motivated and trainable neural network model of dynamics in the visual pathway, incorporating local, lateral, and feedforward synaptic connections, excitatory and inhibitory neurons, and long-range top-down inputs conceptualized as low-rank modulations of the input-driven sensory responses by high-level areas. We study this ${\\bf D}$ynamical ${\\bf C}$ortical ${\\bf net}$work ($DCnet$) in a visual cue-delay-search task and show that the model uses its own cue representations to adaptively modulate its perceptual responses to solve the task, outperforming state-of-the-art DNN vision and LLM models. The model's population states over time shed light on the nature of contextual modulatory dynamics, generating predictions for experiments. We fine-tune the same model on classic psychophysics attention tasks, and find that the model closely replicates known reaction time results. This work represents a promising new foundation for understanding and making predictions about perturbations to visual processing in the brain.", "keywords": "convolutional RNNs;feedback;top-down modulation;neural dynamics;psychophysics", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "", "author": "Lakshmi Narasimhan Govindarajan;Abhiram Iyer;Valmiki Kothare;Ila R Fiete", "authorids": "~Lakshmi_Narasimhan_Govindarajan3;~Abhiram_Iyer1;~Valmiki_Kothare1;~Ila_R_Fiete1", "gender": ";;M;F", "homepage": ";;https://www.valmikikothare.com;https://fietelab.mit.edu/", "dblp": ";;;", "google_scholar": ";pXIRasMAAAAJ;eMpK0pcAAAAJ;uE-CihIAAAAJ", "orcid": ";;;0000-0003-4738-2539", "linkedin": ";;valmiki-kothare/;", "or_profile": "~Lakshmi_Narasimhan_Govindarajan3;~Abhiram_Iyer1;~Valmiki_Kothare1;~Ila_R_Fiete1", "aff": ";Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": ";mit.edu;mit.edu;mit.edu", "position": ";PhD student;Researcher;Professor", "bibtex": "@inproceedings{\ngovindarajan2024flexible,\ntitle={Flexible Context-Driven Sensory Processing in Dynamical Vision Models},\nauthor={Lakshmi Narasimhan Govindarajan and Abhiram Iyer and Valmiki Kothare and Ila R Fiete},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3PqhU96Vvv}\n}", "github": "", "reviewers": "BT2x;qySN;BdqG;7BUE", "pdf_size": 13112615, "rating": "3;5;6;7", "confidence": "3;4;3;5", "soundness": "2;3;3;3", "novelty": "2;2;3;2", "presentation": "2;3;2;3", "wc_summary": "62;112;58;94", "wc_strengths": "39;50;124;172", "wc_weaknesses": "502;226;212;155", "wc_questions": "2;48;5;6", "wc_limitations": "2;26;8;6", "wc_review": "607;462;407;433", "wc_reply_reviewers": "0;159;88;0", "wc_reply_authors": "943;279;35;45", "reply_reviewers": "0;1;1;0", "reply_authors": "4;2;2;2", "rating_avg": [ 5.25, 1.479019945774904 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 81.5, 22.46664193866097 ], "wc_strengths_avg": [ 96.25, 54.600251830921074 ], "wc_weaknesses_avg": [ 273.75, 134.43655566846394 ], "wc_questions_avg": [ 15.25, 18.965429075030176 ], "wc_limitations_avg": [ 10.5, 9.205976319760984 ], "wc_review_avg": [ 477.25, 77.39630159122592 ], "wc_reply_reviewers_avg": [ 61.75, 66.65723891671482 ], "wc_reply_authors_avg": [ 325.5, 369.6413802592994 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.6625413488689132, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13159875662384823477&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 0, "email": ";mit.edu;mit.edu;mit.edu", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Post-Hoc Reversal: Are We Selecting Models Prematurely?", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96745", "id": "3R7Go6WkDm", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3R7Go6WkDm", "openreview": "https://openreview.net/forum?id=3R7Go6WkDm", "poster": "", "project": "", "author_site": "Rishabh Ranjan, Saurabh Garg, Mrigank Raman, Carlos Guestrin, Zachary Lipton", "tldr": "", "abstract": "Trained models are often composed with post-hoc transforms such as temperature scaling (TS), ensembling and stochastic weight averaging (SWA) to improve performance, robustness, uncertainty estimation, etc. However, such transforms are typically applied only after the base models have already been finalized by standard means. In this paper, we challenge this practice with an extensive empirical study. In particular, we demonstrate a phenomenon that we call post-hoc reversal, where performance trends are reversed after applying post-hoc transforms. This phenomenon is especially prominent in high-noise settings. For example, while base models overfit badly early in training, both ensembling and SWA favor base models trained for more epochs. Post-hoc reversal can also prevent the appearance of double descent and mitigate mismatches between test loss and test error seen in base models. Preliminary analyses suggest that these transforms induce reversal by suppressing the influence of mislabeled examples, exploiting differences in their learning dynamics from those of clean examples. Based on our findings, we propose post-hoc selection, a simple technique whereby post-hoc metrics inform model development decisions such as early stopping, checkpointing, and broader hyperparameter choices. Our experiments span real-world vision, language, tabular and graph datasets. On an LLM instruction tuning dataset, post-hoc selection results in >1.5x MMLU improvement compared to naive selection.", "keywords": "temperature scaling;ensembling;stochastic weight averaging;noisy data;overfitting;double descent;early stopping;hyperparameter tuning", "primary_area": "evaluation", "supplementary_material": "", "author": "Rishabh Ranjan;Saurabh Garg;Mrigank Raman;Carlos Guestrin;Zachary Chase Lipton", "authorids": "~Rishabh_Ranjan1;~Saurabh_Garg3;~Mrigank_Raman2;~Carlos_Guestrin1;~Zachary_Chase_Lipton1", "gender": "M;M;M;M;Unspecified", "homepage": "https://rishabh-ranjan.github.io;http://saurabhgarg1996.github.io/;https://zuluzazu.github.io/;https://guestrin.stanford.edu;http://zacklipton.com", "dblp": ";80/208;;38/769;", "google_scholar": "NNzQUrcAAAAJ;SAnJ1hIAAAAJ;;DpLFv4gAAAAJ;MN9Kfg8AAAAJ", "orcid": ";;;;", "linkedin": ";saurabh-garg-b680b5b8/;mrigank-raman-3b9b42177/;carlos-guestrin-5352a869/;", "or_profile": "~Rishabh_Ranjan1;~Saurabh_Garg3;~Mrigank_Raman2;~Carlos_Guestrin1;~Zachary_Chase_Lipton1", "aff": "Stanford University;Carnegie Mellon University;Carnegie Mellon University;Stanford University;Carnegie Mellon University", "aff_domain": "stanford.edu;cmu.edu;cmu.edu;stanford.edu;cmu.edu", "position": "PhD student;PhD student;MS student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nranjan2024posthoc,\ntitle={Post-Hoc Reversal: Are We Selecting Models Prematurely?},\nauthor={Rishabh Ranjan and Saurabh Garg and Mrigank Raman and Carlos Guestrin and Zachary Chase Lipton},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3R7Go6WkDm}\n}", "github": "", "reviewers": "NSoG;vEtm;GiJ5", "pdf_size": 3432768, "rating": "5;5;7", "confidence": "3;4;3", "soundness": "3;2;3", "novelty": "2;2;3", "presentation": "3;3;3", "wc_summary": "176;108;164", "wc_strengths": "130;32;50", "wc_weaknesses": "66;99;263", "wc_questions": "17;145;61", "wc_limitations": "1;1;6", "wc_review": "390;385;544", "wc_reply_reviewers": "0;25;8", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;1;1", "reply_authors": "1;1;1", "rating_avg": [ 5.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 149.33333333333334, 29.63481436119049 ], "wc_strengths_avg": [ 70.66666666666667, 42.59368758656877 ], "wc_weaknesses_avg": [ 142.66666666666666, 86.14845068575264 ], "wc_questions_avg": [ 74.33333333333333, 53.099487337973 ], "wc_limitations_avg": [ 2.6666666666666665, 2.3570226039551585 ], "wc_review_avg": [ 439.6666666666667, 73.80304118997326 ], "wc_reply_reviewers_avg": [ 11.0, 10.424330514074594 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.49999999999999983, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ZjZTbNIjwRIJ:scholar.google.com/&scioq=Post-Hoc+Reversal:+Are+We+Selecting+Models+Prematurely%3F&hl=en&as_sdt=0,5", "gs_version_total": 5, "email": "stanford.edu;cmu.edu;cmu.edu;stanford.edu;cmu.edu", "author_num": 5, "aff_unique_index": "0;1;1;0;1", "aff_unique_norm": "Stanford University;Carnegie Mellon University", "aff_unique_dep": ";", "aff_unique_url": "https://www.stanford.edu;https://www.cmu.edu", "aff_unique_abbr": "Stanford;CMU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "3Rtn1OMTC4", "title": "Spatiotemporal Predictive Pre-training for Robotic Motor Control", "track": "main", "status": "Reject", "tldr": "", "abstract": "Robotic motor control necessitates the ability to predict the dynamics of environments and interaction objects. However, advanced self-supervised pre-trained visual representations (PVRs) in robotic motor control, leveraging large-scale egocentric videos, often focus solely on learning the static content features of sampled image frames. This neglects the crucial temporal motion clues in human video data, which implicitly contain key knowledge about sequential interacting and manipulating with the environments and objects. In this paper, we present a simple yet effective robotic motor control visual pre-training framework that jointly performs spatiotemporal prediction with dual decoders, utilizing large-scale video data, termed as \\textbf{STP}. STP adheres to two key designs in a multi-task learning manner. First, we perform spatial prediction on the masked current frame for learning content features. Second, we utilize the future frame with an extremely high masking ratio as a condition, based on the masked current frame, to conduct temporal prediction of future frame for capturing motion features. This asymmetric masking and decoder architecture design is very efficient, ensuring that our representation focusing on motion information while capturing spatial details. We carry out the largest-scale BC evaluation of PVRs for robotic motor control to date, which encompasses 21 tasks within a real-world Franka robot arm and 5 simulated environments. Extensive experiments demonstrate the effectiveness of STP as well as unleash its generality and data efficiency by further post-pre-training and hybrid pre-training. Our code and weights will be released for further applications.", "keywords": "Robotics Vision Pre-training;Robotic motor control;Behavior Cloning;Robot Manipulation", "primary_area": "robotics", "supplementary_material": "/attachment/03ae25c0a6d1b6a3b212ed0eab5e79d6be3f82f2.zip", "author": "Jiange Yang;Bei Liu;Jianlong Fu;Bocheng Pan;Gangshan Wu;Limin Wang", "authorids": "~Jiange_Yang1;~Bei_Liu2;~Jianlong_Fu1;~Bocheng_Pan1;~Gangshan_Wu1;~Limin_Wang1", "gender": "M;F;M;M;M;M", "homepage": "http://yangjiangeyjg.github.io;https://www.microsoft.com/en-us/research/people/libei/;;https://Penation.github.io;http://mcg.nju.edu.cn/member/gswu/en/index.html;https://wanglimin.github.io", "dblp": "340/4152;39/3711-1;83/8692;;78/1123;68/6610-2", "google_scholar": "https://scholar.google.com.hk/citations?user=bnF9-8YAAAAJ;7IZyaZsAAAAJ;-WqSwu8AAAAJ;;;HEuN8PcAAAAJ", "orcid": ";;;;0000-0003-1391-1762;", "linkedin": ";;;;;", "or_profile": "~Jiange_Yang1;~Bei_Liu2;~Jianlong_Fu1;~Bocheng_Pan1;~Gangshan_Wu1;~Limin_Wang2", "aff": "Nanjing University;Microsoft Research Asia;Microsoft;Institute of Microelectronics of China Academy of Sciences;Nanjing University;Nanjing University", "aff_domain": "nju.edu.cn;microsoft.com;microsoft.com;ime.ac.cn;nju.edu.cn;nju.edu.cn", "position": "PhD student;Researcher;Senior Researcher;MS student;Full Professor;Full Professor", "bibtex": "@misc{\nanonymous2024spatiotemporal,\ntitle={Spatiotemporal Predictive Pre-training for Robotic Motor Control},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=3Rtn1OMTC4}\n}", "github": "", "project": "", "reviewers": "Dk2S;hmK3;QdYc;UXtm", "site": "https://openreview.net/forum?id=3Rtn1OMTC4", "pdf_size": 2032917, "rating": "5;5;6;6", "confidence": "3;4;4;4", "soundness": "2;3;3;3", "novelty": "2;2;3;2", "presentation": "3;3;3;3", "wc_summary": "111;62;53;73", "wc_strengths": "38;50;103;39", "wc_weaknesses": "245;83;82;111", "wc_questions": "7;1;13;44", "wc_limitations": "10;1;6;16", "wc_review": "411;197;257;283", "wc_reply_reviewers": "256;17;22;41", "wc_reply_authors": "506;130;10;10", "reply_reviewers": "2;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 74.75, 22.094965489902897 ], "wc_strengths_avg": [ 57.5, 26.688012290165037 ], "wc_weaknesses_avg": [ 130.25, 67.26579740105666 ], "wc_questions_avg": [ 16.25, 16.57369904396722 ], "wc_limitations_avg": [ 8.25, 5.494315243958978 ], "wc_review_avg": [ 287.0, 78.08969202141856 ], "wc_reply_reviewers_avg": [ 84.0, 99.70707096289611 ], "wc_reply_authors_avg": [ 164.0, 203.44040896537737 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6708228552552122776&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;1;2;0;0", "aff_unique_norm": "Nanjing University;Microsoft;China Academy of Sciences", "aff_unique_dep": ";Research;Institute of Microelectronics", "aff_unique_url": "https://www.nju.edu.cn;https://www.microsoft.com/en-us/research/group/asia;http://www.cas.cn/", "aff_unique_abbr": "Nanjing U;MSR Asia;CAS", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "China;United States" }, { "title": "Generative Adversarial Model-Based Optimization via Source Critic Regularization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96744", "id": "3RxcarQFRn", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3RxcarQFRn", "openreview": "https://openreview.net/forum?id=3RxcarQFRn", "poster": "/media/PosterPDFs/NeurIPS%202024/96744.png?t=1731283949.9553368", "project": "", "author_site": "Michael Yao, Yimeng Zeng, Hamsa Bastani, Jacob Gardner, James Gee, Osbert Bastani", "tldr": "", "abstract": "Offline model-based optimization seeks to optimize against a learned surrogate model without querying the true oracle objective function during optimization. Such tasks are commonly encountered in protein design, robotics, and clinical medicine where evaluating the oracle function is prohibitively expensive. However, inaccurate surrogate model predictions are frequently encountered along offline optimization trajectories. To address this limitation, we propose *generative adversarial model-based optimization* using **adaptive source critic regularization (aSCR)**\u2014a task- and optimizer- agnostic framework for constraining the optimization trajectory to regions of the design space where the surrogate function is reliable. We propose a computationally tractable algorithm to dynamically adjust the strength of this constraint, and show how leveraging aSCR with standard Bayesian optimization outperforms existing methods on a suite of offline generative design tasks. Our code is available at https://github.com/michael-s-yao/gabo.", "keywords": "Offline Optimization;Bayesian Optimization;Surrogate Objectives", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Michael S Yao;Yimeng Zeng;Hamsa Bastani;Jacob R. Gardner;James Gee;Osbert Bastani", "authorids": "~Michael_S_Yao1;~Yimeng_Zeng1;~Hamsa_Bastani1;~Jacob_R._Gardner1;~James_Gee1;~Osbert_Bastani1", "gender": "M;M;M;M;F;M", "homepage": "https://michaelsyao.com;;https://www.med.upenn.edu/apps/faculty/index.php/g5455356/p10656;http://obastani.github.io;https://hamsabastani.github.io/;", "dblp": "327/9549.html;;30/6904;21/11275;199/1777;144/7773", "google_scholar": "jz9IC2QAAAAJ;OoVzXRYAAAAJ;https://scholar.google.com.tw/citations?user=fU8fmEIAAAAJ;cxYepGkAAAAJ;ZbUfUMoAAAAJ;0gkajvEAAAAJ", "orcid": "0000-0002-7008-6028;;;;;", "linkedin": "michael-s-yao/;yimengz;;;;", "or_profile": "~Michael_S_Yao1;~Yimeng_Zeng1;~James_Gee1;~Osbert_Bastani1;~Hamsa_Sridhar_Bastani1;~Jacob_R_Gardner1", "aff": "University of Pennsylvania;University of Pennsylvania;University of Pennsylvania;University of Pennsylvania;The Wharton School, University of Pennsylvania;University of Pennsylvania", "aff_domain": "seas.upenn.edu;seas.upenn.edu;upenn.edu;upenn.edu;wharton.upenn.edu;upenn.edu", "position": "PhD student;PhD student;Full Professor;Assistant Professor;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nyao2024generative,\ntitle={Generative Adversarial Model-Based Optimization via Source Critic Regularization},\nauthor={Michael S Yao and Yimeng Zeng and Hamsa Bastani and Jacob R. Gardner and James Gee and Osbert Bastani},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3RxcarQFRn}\n}", "github": "", "reviewers": "AH6U;yPeb;Yd2B;U2YG;czFU;bMXC", "pdf_size": 1235787, "rating": "5;6;6;6;6;7", "confidence": "4;3;4;5;4;4", "soundness": "2;3;3;4;3;3", "novelty": "2;2;3;3;2;3", "presentation": "1;2;2;4;3;3", "wc_summary": "55;46;88;97;46;125", "wc_strengths": "9;46;42;39;52;211", "wc_weaknesses": "15;203;178;85;225;203", "wc_questions": "230;92;37;39;2;4", "wc_limitations": "9;121;5;5;1;4", "wc_review": "318;508;350;265;326;547", "wc_reply_reviewers": "23;831;15;134;21;36", "wc_reply_authors": "86;3356;101;191;15;11", "reply_reviewers": "1;3;1;1;1;1", "reply_authors": "3;7;3;3;2;2", "rating_avg": [ 6.0, 0.5773502691896257 ], "confidence_avg": [ 4.0, 0.5773502691896257 ], "soundness_avg": [ 3.0, 0.5773502691896257 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.9574271077563381 ], "wc_summary_avg": [ 76.16666666666667, 29.51506206818629 ], "wc_strengths_avg": [ 66.5, 66.04733151308992 ], "wc_weaknesses_avg": [ 151.5, 75.7886315133521 ], "wc_questions_avg": [ 67.33333333333333, 78.60378843004678 ], "wc_limitations_avg": [ 24.166666666666668, 43.36825515922396 ], "wc_review_avg": [ 385.6666666666667, 104.05554072492035 ], "wc_reply_reviewers_avg": [ 176.66666666666666, 295.44918720859977 ], "wc_reply_authors_avg": [ 626.6666666666666, 1222.081102964211 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.74535599249993 ], "reply_authors_avg": [ 3.3333333333333335, 1.699673171197595 ], "replies_avg": [ 38, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1025306044151228766&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "seas.upenn.edu;seas.upenn.edu;upenn.edu;upenn.edu;wharton.upenn.edu;upenn.edu", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "University of Pennsylvania", "aff_unique_dep": "", "aff_unique_url": "https://www.upenn.edu", "aff_unique_abbr": "UPenn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Theoretical Foundations of Deep Selective State-Space Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96743", "id": "3SzrqwupUx", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3SzrqwupUx", "openreview": "https://openreview.net/forum?id=3SzrqwupUx", "poster": "/media/PosterPDFs/NeurIPS%202024/96743.png?t=1731494450.265795", "project": "", "author_site": "Nicola Muca Cirone, Antonio Orvieto, Benjamin Walker, Cristopher Salvi, Terry Lyons", "tldr": "", "abstract": "Structured state-space models (SSMs) are gaining popularity as effective foundational architectures for sequential data, demonstrating outstanding performance across a diverse set of domains alongside desirable scalability properties. Recent developments show that if the linear recurrence powering SSMs allows for a selectivity mechanism leveraging multiplicative interactions between inputs and hidden states (e.g. Mamba, GLA, Hawk/Griffin, HGRN2), then the resulting architecture can surpass attention-powered foundation models trained on text in both accuracy and efficiency, at scales of billion parameters. In this paper, we give theoretical grounding to the selectivity mechanism, often linked to in-context learning, using tools from Rough Path Theory. We provide a framework for the theoretical analysis of generalized selective SSMs, fully characterizing their expressive power and identifying the gating mechanism as the crucial architectural choice. Our analysis provides a closed-form description of the expressive powers of modern SSMs, such as Mamba, quantifying theoretically the drastic improvement in performance from the previous generation of models, such as S4. Our theory not only motivates the success of modern selective state-space models, but also provides a solid framework to understand the expressive power of future SSM variants. In particular, it suggests cross-channel interactions could play a vital role in future improvements.", "keywords": "theory;deep learning theory;SSM;RNN;neural controlled differential equations;signatures;kernels", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/416fe722331399eafa4acc22b5fce96ef3d6cd13.zip", "author": "Nicola Muca Cirone;Antonio Orvieto;Benjamin Walker;Cristopher Salvi;Terry Lyons", "authorids": "~Nicola_Muca_Cirone1;~Antonio_Orvieto3;~Benjamin_Walker1;~Cristopher_Salvi1;~Terry_Lyons2", "gender": "M;M;;M;M", "homepage": "https://mucacirone.github.io/;http://orvi.altervista.org/;;https://www.maths.ox.ac.uk/people/cristopher.salvi;https://DataSig.ac.uk/", "dblp": ";;344/6314;;", "google_scholar": "jjm12qUAAAAJ;xkuLyHoAAAAJ;;FVxJ4iIAAAAJ;", "orcid": ";;0000-0002-9574-973X;;", "linkedin": ";antonio-orvieto-947ab0130/;benjamin-walker-3880771b9/;cristopher-salvi/;", "or_profile": "~Nicola_Muca_Cirone1;~Antonio_Orvieto3;~Benjamin_Walker1;~Cristopher_Salvi1;~Terry_Lyons2", "aff": "Imperial College London;ELLIS Institute T\u00fcbingen, Max Planck Institute for Intelligent Systems, T\u00fcbingen AI Center, T\u00fcbingen, Germany;Weierstrass Institute for Applied Analysis and Stochastics;Imperial College London;University of Oxford", "aff_domain": "ic.ac.uk;tue.ellis.eu;wias-berlin.de;ic.ac.uk;ox.ac.uk", "position": "PhD student;Principal Researcher;Intern;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\ncirone2024theoretical,\ntitle={Theoretical Foundations of Deep Selective State-Space Models},\nauthor={Nicola Muca Cirone and Antonio Orvieto and Benjamin Walker and Cristopher Salvi and Terry Lyons},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3SzrqwupUx}\n}", "github": "", "reviewers": "1RLC;xt92;K8Nc;KnKG", "pdf_size": 643604, "rating": "5;6;7;7", "confidence": "4;2;5;2", "soundness": "3;3;3;3", "novelty": "3;3;4;3", "presentation": "3;2;3;3", "wc_summary": "52;50;43;132", "wc_strengths": "84;23;46;61", "wc_weaknesses": "124;59;67;291", "wc_questions": "86;34;15;83", "wc_limitations": "32;8;1;44", "wc_review": "378;174;172;611", "wc_reply_reviewers": "70;0;12;192", "wc_reply_authors": "328;0;0;423", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;1;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.25, 1.299038105676658 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 69.25, 36.38251640554842 ], "wc_strengths_avg": [ 53.5, 22.20923231451281 ], "wc_weaknesses_avg": [ 135.25, 93.34981253328793 ], "wc_questions_avg": [ 54.5, 30.76117683054405 ], "wc_limitations_avg": [ 21.25, 17.455300054711177 ], "wc_review_avg": [ 333.75, 180.62997397995716 ], "wc_reply_reviewers_avg": [ 68.5, 76.05754400452331 ], "wc_reply_authors_avg": [ 187.75, 190.7306674344742 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.058025885318565944, "gs_citation": 34, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10550081057530019881&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "ic.ac.uk;tue.ellis.eu;wias-berlin.de;ic.ac.uk;ox.ac.uk", "author_num": 5, "aff_unique_index": "0;1;2;0;3", "aff_unique_norm": "Imperial College London;ELLIS Institute T\u00fcbingen;Weierstrass Institute for Applied Analysis and Stochastics;University of Oxford", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.imperial.ac.uk;;https://www.wias-berlin.de/;https://www.ox.ac.uk", "aff_unique_abbr": "ICL;;WIAS;Oxford", "aff_campus_unique_index": "1", "aff_campus_unique": ";T\u00fcbingen", "aff_country_unique_index": "0;1;1;0;0", "aff_country_unique": "United Kingdom;Germany" }, { "title": "Lexicon3D: Probing Visual Foundation Models for Complex 3D Scene Understanding", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96742", "id": "3TxyhBZHT2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3TxyhBZHT2", "openreview": "https://openreview.net/forum?id=3TxyhBZHT2", "poster": "/media/PosterPDFs/NeurIPS%202024/96742.png?t=1731736558.2512417", "project": "", "author_site": "Yunze Man, Shuhong Zheng, Zhipeng Bao, Martial Hebert, Liangyan Gui, Yu-Xiong Wang", "tldr": "", "abstract": "Complex 3D scene understanding has gained increasing attention, with scene encoding strategies built on top of visual foundation models playing a crucial role in this success. However, the optimal scene encoding strategies for various scenarios remain unclear, particularly compared to their image-based counterparts. To address this issue, we present the first comprehensive study that probes various visual encoding models for 3D scene understanding, identifying the strengths and limitations of each model across different scenarios. Our evaluation spans seven vision foundation encoders, including image, video, and 3D foundation models. We evaluate these models in four tasks: Vision-Language Scene Reasoning, Visual Grounding, Segmentation, and Registration, each focusing on different aspects of scene understanding. Our evaluation yields key intriguing findings: Unsupervised image foundation models demonstrate superior overall performance, video models excel in object-level tasks, diffusion models benefit geometric tasks, language-pretrained models show unexpected limitations in language-related tasks, and the mixture-of-vision-expert (MoVE) strategy leads to consistent performance improvement. These insights challenge some conventional understandings, provide novel perspectives on leveraging visual foundation models, and highlight the need for more flexible encoder selection in future vision-language and scene understanding tasks.", "keywords": "Visual Foundation Models; Vision-Language Models; Scene Understanding; 3D Awareness", "primary_area": "machine_vision", "supplementary_material": "", "author": "Yunze Man;Shuhong Zheng;Zhipeng Bao;Martial Hebert;Liangyan Gui;Yu-Xiong Wang", "authorids": "~Yunze_Man2;~Shuhong_Zheng1;~Zhipeng_Bao1;~Martial_Hebert1;~Liangyan_Gui1;~Yu-Xiong_Wang1", "gender": "M;M;M;M;F;", "homepage": "https://yunzeman.github.io/;https://zsh2000.github.io/;https://zpbao.github.io/;http://www.cs.cmu.edu/~hebert/;;https://yxw.cs.illinois.edu/", "dblp": "230/4287.html;197/1289;244/8798;h/MartialHebert;155/5055;35/10700", "google_scholar": "xvQIEKAAAAAJ;0XuYAB8AAAAJ;TwYdLuYAAAAJ;https://scholar.google.com.tw/citations?user=0ytii2EAAAAJ;3aE0r9QAAAAJ;T_Q-xDkAAAAJ", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Yunze_Man2;~Shuhong_Zheng1;~Zhipeng_Bao1;~Martial_Hebert1;~Liangyan_Gui1;~Yu-Xiong_Wang1", "aff": "Department of Computer Science, University of Illinois at Urbana-Champaign;University of Illinois Urbana-Champaign;Carnegie Mellon University;Carnegie Mellon University;UIUC;Department of Computer Science, University of Illinois Urbana-Champaign", "aff_domain": "cs.illinois.edu;illinois.edu;cmu.edu;cmu.edu;cs.illinois.edu;cs.illinois.edu", "position": "PhD student;MS student;PhD student;Professor;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nman2024lexicond,\ntitle={Lexicon3D: Probing Visual Foundation Models for Complex 3D Scene Understanding},\nauthor={Yunze Man and Shuhong Zheng and Zhipeng Bao and Martial Hebert and Liangyan Gui and Yu-Xiong Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3TxyhBZHT2}\n}", "github": "", "reviewers": "yLXP;vFsT;4P6H;q2Jg", "pdf_size": 2271219, "rating": "5;6;6;8", "confidence": "4;4;5;4", "soundness": "2;3;3;3", "novelty": "1;3;3;3", "presentation": "3;4;3;3", "wc_summary": "144;45;53;55", "wc_strengths": "19;31;44;23", "wc_weaknesses": "122;115;45;48", "wc_questions": "2;1;42;3", "wc_limitations": "1;4;14;18", "wc_review": "288;196;198;147", "wc_reply_reviewers": "11;18;26;18", "wc_reply_authors": "50;50;43;43", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 74.25, 40.44363361519338 ], "wc_strengths_avg": [ 29.25, 9.54921462739214 ], "wc_weaknesses_avg": [ 82.5, 36.10055401236939 ], "wc_questions_avg": [ 12.0, 17.334935823359714 ], "wc_limitations_avg": [ 9.25, 6.977642868476432 ], "wc_review_avg": [ 207.25, 50.898796645893306 ], "wc_reply_reviewers_avg": [ 18.25, 5.3091901453988255 ], "wc_reply_authors_avg": [ 46.5, 3.5 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.13245323570650439, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8247701009038959627&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "cs.illinois.edu;illinois.edu;cmu.edu;cmu.edu;cs.illinois.edu;cs.illinois.edu", "author_num": 6, "aff_unique_index": "0;0;1;1;0;0", "aff_unique_norm": "University of Illinois Urbana-Champaign;Carnegie Mellon University", "aff_unique_dep": "Department of Computer Science;", "aff_unique_url": "https://illinois.edu;https://www.cmu.edu", "aff_unique_abbr": "UIUC;CMU", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Urbana-Champaign;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "SimPO: Simple Preference Optimization with a Reference-Free Reward", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96741", "id": "3Tzcot1LKb", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3Tzcot1LKb", "openreview": "https://openreview.net/forum?id=3Tzcot1LKb", "poster": "/media/PosterPDFs/NeurIPS%202024/96741.png?t=1731728902.7019496", "project": "", "author_site": "Yu Meng, Mengzhou Xia, Danqi Chen", "tldr": "", "abstract": "Direct Preference Optimization (DPO) is a widely used offline preference optimization algorithm that reparameterizes reward functions in reinforcement learning from human feedback (RLHF) to enhance simplicity and training stability. In this work, we propose SimPO, a simpler yet more effective approach. The effectiveness of SimPO is attributed to a key design: using the _average_ log probability of a sequence as the implicit reward. This reward formulation better aligns with model generation and eliminates the need for a reference model, making it more compute and memory efficient. Additionally, we introduce a target reward margin to the Bradley-Terry objective to encourage a larger margin between the winning and losing responses, further improving the algorithm's performance. We compare SimPO to DPO and its latest variants across various state-of-the-art training setups, including both base and instruction-tuned models such as Mistral, Llama 3, and Gemma 2. We evaluate on extensive chat-based evaluation benchmarks, including AlpacaEval 2, MT-Bench, and Arena-Hard. Our results demonstrate that SimPO consistently and significantly outperforms existing approaches without substantially increasing response length. Specifically, SimPO outperforms DPO by up to 6.4 points on AlpacaEval 2 and by up to 7.5 points on Arena-Hard. Our top-performing model, built on Gemma-2-9B-it, achieves a 72.4\\% length-controlled win rate on AlpacaEval 2, a 59.1\\% win rate on Arena-Hard, and ranks 1st on Chatbot Arena among $<$10B models with real user votes.", "keywords": "Language Models;Preference Optimization;Reinforcement Learning from Human Feedback", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Yu Meng;Mengzhou Xia;Danqi Chen", "authorids": "~Yu_Meng1;~Mengzhou_Xia1;~Danqi_Chen1", "gender": "M;F;F", "homepage": "https://yumeng5.github.io/;https://xiamengzhou.github.io/;https://www.cs.princeton.edu/~danqic/", "dblp": "30/4233-1;241/9329;87/7949", "google_scholar": "S2-yZKcAAAAJ;zyJn1IcAAAAJ;sVR8ktkAAAAJ", "orcid": "0000-0003-2554-2888;;", "linkedin": ";;", "or_profile": "~Yu_Meng1;~Mengzhou_Xia1;~Danqi_Chen1", "aff": "University of Virginia;Princeton University;Princeton University", "aff_domain": "virginia.edu;princeton.edu;cs.princeton.edu", "position": "Assistant Professor;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nmeng2024simpo,\ntitle={Sim{PO}: Simple Preference Optimization with a Reference-Free Reward},\nauthor={Yu Meng and Mengzhou Xia and Danqi Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3Tzcot1LKb}\n}", "github": "", "reviewers": "oFKm;TXhi;Wuuc;a8Vq", "pdf_size": 1100126, "rating": "3;6;7;7", "confidence": "4;5;3;4", "soundness": "2;4;3;3", "novelty": "2;3;3;3", "presentation": "2;4;4;3", "wc_summary": "64;19;78;103", "wc_strengths": "59;104;64;54", "wc_weaknesses": "183;219;102;153", "wc_questions": "44;41;91;145", "wc_limitations": "2;8;16;7", "wc_review": "352;391;351;462", "wc_reply_reviewers": "97;10;24;261", "wc_reply_authors": "474;20;20;669", "reply_reviewers": "1;1;1;2", "reply_authors": "3;2;2;4", "rating_avg": [ 5.75, 1.6393596310755 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 66.0, 30.520484924063705 ], "wc_strengths_avg": [ 70.25, 19.803724397193573 ], "wc_weaknesses_avg": [ 164.25, 42.868257487329714 ], "wc_questions_avg": [ 80.25, 42.31651568832198 ], "wc_limitations_avg": [ 8.25, 5.018714974971183 ], "wc_review_avg": [ 389.0, 45.1275968781853 ], "wc_reply_reviewers_avg": [ 98.0, 99.73715456137697 ], "wc_reply_authors_avg": [ 295.75, 284.23790651494744 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.2156655464068768, "gs_citation": 319, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5243940602394753171&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "virginia.edu;princeton.edu;cs.princeton.edu", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "University of Virginia;Princeton University", "aff_unique_dep": ";", "aff_unique_url": "https://www.virginia.edu;https://www.princeton.edu", "aff_unique_abbr": "UVA;Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "GS-Hider: Hiding Messages into 3D Gaussian Splatting", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96740", "id": "3XLQp2Xx3J", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3XLQp2Xx3J", "openreview": "https://openreview.net/forum?id=3XLQp2Xx3J", "poster": "", "project": "", "author_site": "Xuanyu Zhang, Jiarui Meng, Runyi Li, Zhipei Xu, yongbing zhang, Jian Zhang", "tldr": "", "abstract": "3D Gaussian Splatting (3DGS) has already become the emerging research focus in the fields of 3D scene reconstruction and novel view synthesis. Given that training a 3DGS requires a significant amount of time and computational cost, it is crucial to protect the copyright, integrity, and privacy of such 3D assets. Steganography, as a crucial technique for encrypted transmission and copyright protection, has been extensively studied. However, it still lacks profound exploration targeted at 3DGS. Unlike its predecessor NeRF, 3DGS possesses two distinct features: 1) explicit 3D representation; and 2) real-time rendering speeds. These characteristics result in the 3DGS point cloud files being public and transparent, with each Gaussian point having a clear physical significance. Therefore, ensuring the security and fidelity of the original 3D scene while embedding information into the 3DGS point cloud files is an extremely challenging task. To solve the above-mentioned issue, we first propose a steganography framework for 3DGS, dubbed GS-Hider, which can embed 3D scenes and images into original GS point clouds in an invisible manner and accurately extract the hidden messages. Specifically, we design a coupled secured feature attribute to replace the original 3DGS's spherical harmonics coefficients and then use a scene decoder and a message decoder to disentangle the original RGB scene and the hidden message. Extensive experiments demonstrated that the proposed GS-Hider can effectively conceal multimodal messages without compromising rendering quality and possesses exceptional security, robustness, capacity, and flexibility. Our project is available at: https://xuanyuzhang21.github.io/project/gshider.", "keywords": "3D Steganography;3D Gaussian Splatting;Copyright Protection", "primary_area": "privacy", "supplementary_material": "/attachment/81856e4423d8904b477c946e8d79bad5bee39eb3.zip", "author": "Xuanyu Zhang;Jiarui Meng;Runyi Li;Zhipei Xu;Yongbing Zhang;Jian Zhang", "authorids": "~Xuanyu_Zhang2;~Jiarui_Meng1;~Runyi_Li1;~Zhipei_Xu1;~Yongbing_Zhang1;~Jian_Zhang22", "gender": "M;M;M;M;M;M", "homepage": "https://villa.jianzhang.tech/;https://github.com/JrMeng0312;https://lirunyi2001.github.io/;https://github.com/zhipeixu;;http://jianzhang.tech/", "dblp": "323/9396;374/6336;364/6309;359/4633;95/5329;07/314-18", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;rVP5yWoAAAAJ;;;7brFI_4AAAAJ", "orcid": "0000-0002-6713-4500;;0000-0002-5757-406X;;;0000-0001-5486-3125", "linkedin": ";;;;;", "or_profile": "~Xuanyu_Zhang2;~Jiarui_Meng1;~Runyi_Li1;~Zhipei_Xu1;~Yongbing_Zhang1;~Jian_Zhang22", "aff": "Peking University;Peking University;Peking University;South China University of Technology;Harbin Institute of Technology;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;pku.edu.cn;scut.edu.cn;hit.edu.cn;pku.edu.cn", "position": "PhD student;MS student;MS student;Undergrad student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhang2024gshider,\ntitle={{GS}-Hider: Hiding Messages into 3D Gaussian Splatting},\nauthor={Xuanyu Zhang and Jiarui Meng and Runyi Li and Zhipei Xu and Yongbing Zhang and Jian Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3XLQp2Xx3J}\n}", "github": "", "reviewers": "crVb;koxG;X8jQ;S9BH", "pdf_size": 23861941, "rating": "6;6;6;8", "confidence": "5;2;2;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "93;79;75;56", "wc_strengths": "93;76;36;111", "wc_weaknesses": "278;14;13;88", "wc_questions": "99;74;85;4", "wc_limitations": "101;5;4;9", "wc_review": "664;248;213;268", "wc_reply_reviewers": "34;27;12;97", "wc_reply_authors": "46;39;30;44", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 1.299038105676658 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 75.75, 13.216939887886303 ], "wc_strengths_avg": [ 79.0, 27.739863013360395 ], "wc_weaknesses_avg": [ 98.25, 108.14429018676853 ], "wc_questions_avg": [ 65.5, 36.595764782280476 ], "wc_limitations_avg": [ 29.75, 41.17872630376029 ], "wc_review_avg": [ 348.25, 183.35808545030133 ], "wc_reply_reviewers_avg": [ 42.5, 32.45381333526154 ], "wc_reply_authors_avg": [ 39.75, 6.179603547154137 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5010156242496369259&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "pku.edu.cn;pku.edu.cn;pku.edu.cn;scut.edu.cn;hit.edu.cn;pku.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;1;2;0", "aff_unique_norm": "Peking University;South China University of Technology;Harbin Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "http://www.pku.edu.cn;https://www.scut.edu.cn;http://www.hit.edu.cn/", "aff_unique_abbr": "Peking U;SCUT;HIT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Harbin", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "InfoRM: Mitigating Reward Hacking in RLHF via Information-Theoretic Reward Modeling", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96739", "id": "3XnBVK9sD6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3XnBVK9sD6", "openreview": "https://openreview.net/forum?id=3XnBVK9sD6", "poster": "/media/PosterPDFs/NeurIPS%202024/96739.png?t=1731942873.8041859", "project": "", "author_site": "Yuchun Miao, Sen Zhang, Liang Ding, Rong Bao, Lefei Zhang, Dacheng Tao", "tldr": "", "abstract": "Despite the success of reinforcement learning from human feedback (RLHF) in aligning language models with human values, reward hacking, also termed reward overoptimization, remains a critical challenge. This issue primarily arises from reward misgeneralization, where reward models (RMs) compute reward using spurious features that are irrelevant to human preferences. In this work, we tackle this problem from an information-theoretic perspective and propose a framework for reward modeling, namely InfoRM, by introducing a variational information bottleneck objective to filter out irrelevant information.\nNotably, we further identify a correlation between overoptimization and outliers in the IB latent space of InfoRM, establishing it as a promising tool for detecting reward overoptimization.\nInspired by this finding, we propose the Cluster Separation Index (CSI), which quantifies deviations in the IB latent space, as an indicator of reward overoptimization to facilitate the development of online mitigation strategies. Extensive experiments on a wide range of settings and RM scales (70M, 440M, 1.4B, and 7B) demonstrate the effectiveness of InfoRM. Further analyses reveal that InfoRM's overoptimization detection mechanism is not only effective but also robust across a broad range of datasets, signifying a notable advancement in the field of RLHF. The code will be released upon acceptance.", "keywords": "Reward Hacking;Reward Overoptimization;Reinforcement Learning from Human Feedback;Large Language Models", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Yuchun Miao;Sen Zhang;Liang Ding;Rong Bao;Lefei Zhang;Dacheng Tao", "authorids": "~Yuchun_Miao1;~Sen_Zhang3;~Liang_Ding3;~Rong_Bao1;~Lefei_Zhang1;~Dacheng_Tao1", "gender": "M;M;M;M;M;", "homepage": ";https://github.com/SenZHANG-GitHub;http://liamding.cc/;https://github.com/rbao2018;;", "dblp": "327/3614;57/6221-6;88/3340-6.html;214/6451;28/10770;", "google_scholar": "-ec3mwUAAAAJ;-bJJNV0AAAAJ;lFCLvOAAAAAJ;teGqP3kAAAAJ;BLKHwNwAAAAJ;", "orcid": "0000-0001-5689-8698;;;;;", "linkedin": ";;;;;", "or_profile": "~Yuchun_Miao1;~Sen_Zhang3;~Liang_Ding3;~Rong_Bao1;~Lefei_Zhang1;~Dacheng_Tao1", "aff": "Wuhan University;University of Sydney, University of Sydney;Zhejiang University;Fudan University;Wuhan University;", "aff_domain": "whu.edu.cn;sydney.edu.au;zju.edu.cn;fudan.edu.cn;whu.edu.cn;", "position": "PhD student;Postdoc;Researcher;PhD student;Full Professor;", "bibtex": "@inproceedings{\nmiao2024inform,\ntitle={Info{RM}: Mitigating Reward Hacking in {RLHF} via Information-Theoretic Reward Modeling},\nauthor={Yuchun Miao and Sen Zhang and Liang Ding and Rong Bao and Lefei Zhang and Dacheng Tao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3XnBVK9sD6}\n}", "github": "", "reviewers": "YUjH;rGMr;XuFQ;roDN", "pdf_size": 15696314, "rating": "5;5;6;6", "confidence": "3;2;4;3", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;2;2;4", "wc_summary": "54;35;119;84", "wc_strengths": "18;30;103;53", "wc_weaknesses": "845;143;848;54", "wc_questions": "2;166;76;50", "wc_limitations": "63;5;34;10", "wc_review": "982;379;1180;251", "wc_reply_reviewers": "553;31;199;19", "wc_reply_authors": "1575;32;1070;29", "reply_reviewers": "4;1;1;1", "reply_authors": "8;2;4;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 73.0, 31.788362650504666 ], "wc_strengths_avg": [ 51.0, 32.54996159751959 ], "wc_weaknesses_avg": [ 472.5, 375.32286101435386 ], "wc_questions_avg": [ 73.5, 59.63849428012079 ], "wc_limitations_avg": [ 28.0, 22.9891278651453 ], "wc_review_avg": [ 698.0, 391.9661975221843 ], "wc_reply_reviewers_avg": [ 200.5, 215.59858533858704 ], "wc_reply_authors_avg": [ 676.5, 670.2202996030484 ], "reply_reviewers_avg": [ 1.75, 1.299038105676658 ], "reply_authors_avg": [ 4.0, 2.449489742783178 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.7071067811865475, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11807234299172134314&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "whu.edu.cn;sydney.edu.au;zju.edu.cn;fudan.edu.cn;whu.edu.cn;", "author_num": 6, "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "Wuhan University;University of Sydney;Zhejiang University;Fudan University", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.whu.edu.cn/;https://www.sydney.edu.au;https://www.zju.edu.cn;https://www.fudan.edu.cn", "aff_unique_abbr": "WHU;USYD;ZJU;Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "China;Australia" }, { "title": "Handling Learnwares from Heterogeneous Feature Spaces with Explicit Label Exploitation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96738", "id": "3YIyB82rjX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3YIyB82rjX", "openreview": "https://openreview.net/forum?id=3YIyB82rjX", "poster": "/media/PosterPDFs/NeurIPS%202024/96738.png?t=1733558857.8085234", "project": "", "author_site": "Peng Tan, Hai-Tian Liu, Zhi-Hao Tan, Zhi-Hua Zhou", "tldr": "", "abstract": "The learnware paradigm aims to help users leverage numerous existing high-performing models instead of starting from scratch, where a learnware consists of a well-trained model and the specification describing its capability. Numerous learnwares are accommodated by a learnware dock system. When users solve tasks with the system, models that fully match the task feature space are often rare or even unavailable. However, models with heterogeneous feature space can still be helpful. This paper finds that label information, particularly model outputs, is helpful yet previously less exploited in the accommodation of heterogeneous learnwares. We extend the specification to better leverage model pseudo-labels and subsequently enrich the unified embedding space for better specification evolvement. With label information, the learnware identification can also be improved by additionally comparing conditional distributions. Experiments demonstrate that, even without a model explicitly tailored to user tasks, the system can effectively handle tasks by leveraging models from diverse feature spaces.", "keywords": "learnware;heterogeneous feature spaces;subspace", "primary_area": "evaluation", "supplementary_material": "", "author": "Peng Tan;Hai-Tian Liu;Zhi-Hao Tan;Zhi-Hua Zhou", "authorids": "~Peng_Tan1;~Hai-Tian_Liu1;~Zhi-Hao_Tan1;~Zhi-Hua_Zhou2", "gender": "M;M;M;M", "homepage": "https://www.lamda.nju.edu.cn/tanp/;https://www.lamda.nju.edu.cn/liuht/;http://www.lamda.nju.edu.cn/tanzh/;https://cs.nju.edu.cn/zhouzh/", "dblp": ";;245/3420;z/ZhiHuaZhou", "google_scholar": ";;_9uUbpgAAAAJ;https://scholar.google.com.tw/citations?user=rSVIHasAAAAJ", "orcid": "0000-0003-3749-9266;;0000-0003-4607-6089;0000-0003-0746-1494", "linkedin": ";;;", "or_profile": "~Peng_Tan1;~Hai-Tian_Liu1;~Zhi-Hao_Tan1;~Zhi-hua_Zhou1", "aff": "Nanjing University;Nanjing University;Nanjing University;Nanjing University", "aff_domain": "nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn", "position": "PhD student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\ntan2024handling,\ntitle={Handling Learnwares from Heterogeneous Feature Spaces with Explicit Label Exploitation},\nauthor={Peng Tan and Hai-Tian Liu and Zhi-Hao Tan and Zhi-Hua Zhou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3YIyB82rjX}\n}", "github": "", "reviewers": "HLyF;PBMH;8wdE;NnRb", "pdf_size": 791665, "rating": "5;5;5;6", "confidence": "3;3;3;4", "soundness": "2;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "48;59;71;46", "wc_strengths": "46;68;81;66", "wc_weaknesses": "185;196;70;30", "wc_questions": "31;6;1;3", "wc_limitations": "7;48;1;12", "wc_review": "317;377;224;157", "wc_reply_reviewers": "48;20;11;21", "wc_reply_authors": "274;30;59;52", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 56.0, 9.974968671630002 ], "wc_strengths_avg": [ 65.25, 12.517487767120047 ], "wc_weaknesses_avg": [ 120.25, 71.76480683454808 ], "wc_questions_avg": [ 10.25, 12.111461513789324 ], "wc_limitations_avg": [ 17.0, 18.3166590840142 ], "wc_review_avg": [ 268.75, 84.46411960116556 ], "wc_reply_reviewers_avg": [ 25.0, 13.838352503098047 ], "wc_reply_authors_avg": [ 103.75, 98.87460492967848 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17584152425307394483&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Nanjing University", "aff_unique_dep": "", "aff_unique_url": "https://www.nju.edu.cn", "aff_unique_abbr": "Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "A Swiss Army Knife for Heterogeneous Federated Learning: Flexible Coupling via Trace Norm", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96737", "id": "3YkeHuT1o6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3YkeHuT1o6", "openreview": "https://openreview.net/forum?id=3YkeHuT1o6", "poster": "/media/PosterPDFs/NeurIPS%202024/96737.png?t=1731481718.9370096", "project": "", "author_site": "Tianchi Liao, Lele Fu, Jialong Chen, Zhen Wang, Zibin Zheng, Chuan Chen", "tldr": "", "abstract": "The heterogeneity issue in federated learning (FL) has attracted increasing attention, which is attempted to be addressed by most existing methods. Currently, due to systems and objectives heterogeneity, enabling clients to hold models of different architectures and tasks of different demands has become an important direction in FL. \nMost existing FL methods are based on the homogeneity assumption, namely, different clients have the same architectural models with the same tasks, which are unable to handle complex and multivariate data and tasks. \nTo flexibly address these heterogeneity limitations, we propose a novel federated multi-task learning framework with the help of tensor trace norm, FedSAK. Specifically, it treats each client as a task and splits the local model into a feature extractor and a prediction head. \nClients can flexibly choose shared structures based on heterogeneous situations and upload them to the server, which learns correlations among client models by mining model low-rank structures through tensor trace norm.\nFurthermore, we derive convergence and generalization bounds under non-convex settings. Evaluated on 6 real-world datasets compared to 13 advanced FL models, FedSAK demonstrates superior performance.", "keywords": "Federated learning;federated multi-task learning;heterogeneity;tensor trace norm;low-rank structure", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Tianchi Liao;Lele Fu;Jialong Chen;Zhen WANG;Zibin Zheng;Chuan Chen", "authorids": "~Tianchi_Liao1;~Lele_Fu1;~Jialong_Chen1;~Zhen_WANG2;~Zibin_Zheng1;~Chuan_Chen1", "gender": "F;M;M;M;M;M", "homepage": ";https://github.com/Fulelele;https://github.com/Loong-Chan;https://joneswong.github.io/;https://www.zibinzheng.com/;https://cse.sysu.edu.cn/teacher/ChenChuan", "dblp": ";266/5501;;78/6727-36;z/ZibinZheng;03/1359-1", "google_scholar": "-NOhEasAAAAJ;S6GWvYwAAAAJ;https://scholar.google.com.hk/citations?user=W5LLHgwAAAAJ;e5CqTBMAAAAJ;WPC6ED4AAAAJ;s_zdg_IAAAAJ", "orcid": ";0000-0001-5304-0434;;0000-0002-8140-8782;0000-0002-7878-4330;0000-0002-7048-3445", "linkedin": ";;;;;chuan-chen-173507157/", "or_profile": "~Tianchi_Liao1;~Lele_Fu1;~Jialong_Chen1;~Zhen_WANG2;~Zibin_Zheng1;~Chuan_Chen1", "aff": "SUN YAT-SEN UNIVERSITY;SUN YAT-SEN UNIVERSITY;SUN YAT-SEN UNIVERSITY;SUN YAT-SEN UNIVERSITY;SUN YAT-SEN UNIVERSITY;Sun Yat-Sen University", "aff_domain": "sysu.edu.cn;sysu.edu.cn;sysu.edu.cn;sysu.edu.cn;sysu.edu.cn;mail.sysu.edu.cn", "position": "MS student;PhD student;PhD student;Associate Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nliao2024a,\ntitle={A Swiss Army Knife for Heterogeneous Federated Learning: Flexible Coupling via Trace Norm},\nauthor={Tianchi Liao and Lele Fu and Jialong Chen and Zhen WANG and Zibin Zheng and Chuan Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3YkeHuT1o6}\n}", "github": "", "reviewers": "yu6g;zrPd;kdUe", "pdf_size": 6221374, "rating": "6;7;7", "confidence": "5;4;4", "soundness": "4;3;3", "novelty": "3;3;3", "presentation": "3;3;4", "wc_summary": "35;47;60", "wc_strengths": "115;98;68", "wc_weaknesses": "102;147;126", "wc_questions": "48;43;2", "wc_limitations": "30;5;1", "wc_review": "330;340;257", "wc_reply_reviewers": "9;13;12", "wc_reply_authors": "39;46;50", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 47.333333333333336, 10.208928554075703 ], "wc_strengths_avg": [ 93.66666666666667, 19.430788855719562 ], "wc_weaknesses_avg": [ 125.0, 18.384776310850235 ], "wc_questions_avg": [ 31.0, 20.607442021431645 ], "wc_limitations_avg": [ 12.0, 12.832251036613439 ], "wc_review_avg": [ 309.0, 36.995495221265344 ], "wc_reply_reviewers_avg": [ 11.333333333333334, 1.699673171197595 ], "wc_reply_authors_avg": [ 45.0, 4.546060565661952 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.9999999999999998, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16181041257892511514&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "email": "sysu.edu.cn;sysu.edu.cn;sysu.edu.cn;sysu.edu.cn;sysu.edu.cn;mail.sysu.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Sun Yat-sen University", "aff_unique_dep": "", "aff_unique_url": "http://www.sysu.edu.cn", "aff_unique_abbr": "SYSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Instruction Embedding: Latent Representations of Instructions Towards Task Identification", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97861", "id": "3Yrfx7oYMF", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3Yrfx7oYMF", "openreview": "https://openreview.net/forum?id=3Yrfx7oYMF", "poster": "/media/PosterPDFs/NeurIPS%202024/97861.png?t=1731500890.493865", "project": "", "author_site": "Yiwei Li, Jiayi Shi, Shaoxiong Feng, Peiwen Yuan, Xinglin Wang, Boyuan Pan, Heda Wang, Yao Hu, Prof. Kan", "tldr": "", "abstract": "Instruction data is crucial for improving the capability of Large Language Models (LLMs) to align with human-level performance. Recent research LIMA demonstrates that alignment is essentially a process where the model adapts instructions' interaction style or format to solve various tasks, leveraging pre-trained knowledge and skills. Therefore, for instructional data, the most important aspect is the task it represents, rather than the specific semantics and knowledge information. The latent representations of instructions play roles for some instruction-related tasks like data selection and demonstrations retrieval. However, they are always derived from text embeddings, encompass overall semantic information that influences the representation of task categories. In this work, we introduce a new concept, instruction embedding, and construct Instruction Embedding Benchmark (IEB) for its training and evaluation. Then, we propose a baseline Prompt-based Instruction Embedding (PIE) method to make the representations more attention on tasks. The evaluation of PIE, alongside other embedding methods on IEB with two designed tasks, demonstrates its superior performance in accurately identifying task categories. Moreover, the application of instruction embeddings in four downstream tasks showcases its effectiveness and suitability for instruction-related tasks.", "keywords": "Instructions;Text Embedding;Large Language Models;Benchmark", "primary_area": "", "supplementary_material": "/attachment/a95f1faaa26c64879ebdea137716b2ac11edd258.zip", "author": "Yiwei Li;Jiayi Shi;Shaoxiong Feng;Peiwen Yuan;Xinglin Wang;Boyuan Pan;Heda Wang;Yao Hu;Kan Li", "authorids": "~Yiwei_Li1;~Jiayi_Shi1;~Shaoxiong_Feng1;~Peiwen_Yuan1;~Xinglin_Wang1;~Boyuan_Pan1;~Heda_Wang1;~Yao_Hu1;~Kan_Li3", "gender": "M;M;M;M;M;M;M;M;M", "homepage": ";;http://shaoxiongfeng.com/;https://ypw0102.github.io/;https://github.com/WangXinglin;http://panboyuan.net/;https://github.com/wangheda;;", "dblp": "48/9884-1;;260/0224;327/9196.html;02/1010;203/8799;;;21/2083-1.html", "google_scholar": "https://scholar.google.com.hk/citations?user=yMZeaoMAAAAJ;;BaNQV40AAAAJ;https://scholar.google.com.hk/citations?user=cUB5XN8AAAAJ;https://scholar.google.com/citations?hl=zh-CN;lVFNAxcAAAAJ;;LIu7k7wAAAAJ;", "orcid": ";0000-0001-7954-3398;;0000-0002-2403-8295;0000-0002-5834-1508;;;0009-0006-1274-7111;", "linkedin": ";;;;;;;;", "or_profile": "~Yiwei_Li1;~Jiayi_Shi1;~Shaoxiong_Feng1;~Peiwen_Yuan1;~Xinglin_Wang1;~Boyuan_Pan1;~Heda_Wang1;~Yao_Hu1;~Kan_Li3", "aff": "Beijing Institute of Technology;Beijing Institute of Technology;RedNote;Beijing Institute of Technology;Beijing Institute of Technology;Xiaohongshu;;Zhejiang University of Technology;Beijing Institute of Technology", "aff_domain": "bit.edu.cn;bit.edu.cn;xiaohongshu.com;bit.edu.cn;bit.edu.cn;xiaohongshu.com;;zjut.edu.cn;bit.edu.cn", "position": "PhD student;PhD student;Researcher;PhD student;PhD student;Researcher;;Researcher;Full Professor", "bibtex": "@inproceedings{\nli2024instruction,\ntitle={Instruction Embedding: Latent Representations of Instructions Towards Task Identification},\nauthor={Yiwei Li and Jiayi Shi and Shaoxiong Feng and Peiwen Yuan and Xinglin Wang and Boyuan Pan and Heda Wang and Yao Hu and Kan Li},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=3Yrfx7oYMF}\n}", "github": "", "reviewers": "35T1;4oB5;uG4q;fPtv", "pdf_size": 1904334, "rating": "5;6;7;7", "confidence": "4;3;3;4", "wc_summary_and_contributions": "67;79;52;93", "wc_strengths": "46;4;46;32", "wc_improvement": "15;4;9;118", "wc_limitations": "1;17;13;1", "wc_correctness": "1;1;11;4", "wc_clarity": "1;1;12;1", "wc_relation_to_prior_work": "29;1;32;32", "wc_documentation": "1;1;1;14", "wc_additional_feedback": "1;1;1;1", "wc_review": "162;109;177;296", "wc_reply_reviewers": "295;0;14;0", "wc_reply_authors": "33;33;31;33", "reply_reviewers": "2;0;1;0", "reply_authors": "6;2;3;4", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "wc_summary_and_contributions_avg": [ 72.75, 15.105876340020794 ], "wc_strengths_avg": [ 32.0, 17.146428199482248 ], "wc_improvement_avg": [ 36.5, 47.214934078107106 ], "wc_limitations_avg": [ 8.0, 7.14142842854285 ], "wc_correctness_avg": [ 4.25, 4.085033659592048 ], "wc_clarity_avg": [ 3.75, 4.763139720814412 ], "wc_relation_to_prior_work_avg": [ 23.5, 13.047988350699889 ], "wc_documentation_avg": [ 4.25, 5.629165124598851 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 186.0, 68.34837232882727 ], "wc_reply_reviewers_avg": [ 77.25, 125.84787443576471 ], "wc_reply_authors_avg": [ 32.5, 0.8660254037844386 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 3.75, 1.479019945774904 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2319956002400998598&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "bit.edu.cn;bit.edu.cn;xiaohongshu.com;bit.edu.cn;bit.edu.cn;xiaohongshu.com;;zjut.edu.cn;bit.edu.cn", "author_num": 9, "aff_unique_index": "0;0;1;0;0;2;3;0", "aff_unique_norm": "Beijing Institute of Technology;RedNote;Xiaohongshu;Zhejiang University of Technology", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.bit.edu.cn/;;https://www.xiaohongshu.com;https://www.zjut.edu.cn", "aff_unique_abbr": "BIT;;XHS;ZJUT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China;" }, { "title": "Faster Local Solvers for Graph Diffusion Equations", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96736", "id": "3Z0LTDjIM0", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3Z0LTDjIM0", "openreview": "https://openreview.net/forum?id=3Z0LTDjIM0", "poster": "/media/PosterPDFs/NeurIPS%202024/96736.png?t=1731467532.1380599", "project": "", "author_site": "Jiahe Bai, Baojian Zhou, Deqing Yang, Yanghua Xiao", "tldr": "", "abstract": "Efficient computation of graph diffusion equations (GDEs), such as Personalized PageRank, Katz centrality, and the Heat kernel, is crucial for clustering, training neural networks, and many other graph-related problems. Standard iterative methods require accessing the whole graph per iteration, making them time-consuming for large-scale graphs. While existing local solvers approximate diffusion vectors through heuristic local updates, they often operate sequentially and are typically designed for specific diffusion types, limiting their applicability. Given that diffusion vectors are highly localizable, as measured by the participation ratio, this paper introduces a novel framework for approximately solving GDEs using a local diffusion process. This framework reveals the suboptimality of existing local solvers. Furthermore, our approach effectively localizes standard iterative solvers by designing simple and provably sublinear time algorithms. These new local solvers are highly parallelizable, making them well-suited for implementation on GPUs. We demonstrate the effectiveness of our framework in quickly obtaining approximate diffusion vectors, achieving up to a hundred-fold speed improvement, and its applicability to large-scale dynamic graphs. Our framework could also facilitate more efficient local message-passing mechanisms for GNNs.", "keywords": "Graph Diffusion Equation;Personalized PageRank;Heat Kernel;Katz;Local Solvers;Graph Neural Networks", "primary_area": "graph_neural_networks", "supplementary_material": "/attachment/95fd89df9a01f5de94ff09c00501a2ab47cbe5a8.zip", "author": "Jiahe Bai;Baojian Zhou;Deqing Yang;Yanghua Xiao", "authorids": "~Jiahe_Bai1;~Baojian_Zhou2;~Deqing_Yang1;~Yanghua_Xiao1", "gender": "M;M;M;", "homepage": "https://github.com/SHADOWTOP303;https://baojian.github.io/;http://kw.fudan.edu.cn/people/yangdeqing/;", "dblp": ";139/5761.html;01/2462.html;96/999", "google_scholar": ";FWQHIYgAAAAJ;uZdQxkwAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";;0000-0002-1390-3861;0000-0001-8403-9591", "linkedin": ";;;", "or_profile": "~Jiahe_Bai1;~Baojian_Zhou2;~Deqing_Yang1;~Yanghua_Xiao1", "aff": "Fudan University;Fudan University;Fudan University;Fudan University", "aff_domain": "fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn", "position": "Undergrad student;Assistant Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nbai2024faster,\ntitle={Faster Local Solvers for Graph Diffusion Equations},\nauthor={Jiahe Bai and Baojian Zhou and Deqing Yang and Yanghua Xiao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3Z0LTDjIM0}\n}", "github": "", "reviewers": "X7w6;X6sG;kfsj;JA1M", "pdf_size": 1397450, "rating": "4;5;6;7", "confidence": "3;2;4;2", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "36;126;80;70", "wc_strengths": "50;154;29;49", "wc_weaknesses": "31;106;199;103", "wc_questions": "4;50;40;4", "wc_limitations": "2;2;1;1", "wc_review": "123;438;349;227", "wc_reply_reviewers": "18;0;21;14", "wc_reply_authors": "90;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 78.0, 32.155870381627054 ], "wc_strengths_avg": [ 70.5, 48.93107397145499 ], "wc_weaknesses_avg": [ 109.75, 59.63797028739325 ], "wc_questions_avg": [ 24.5, 20.80264406271472 ], "wc_limitations_avg": [ 1.5, 0.5 ], "wc_review_avg": [ 284.25, 119.48927776164687 ], "wc_reply_reviewers_avg": [ 13.25, 8.042853971072706 ], "wc_reply_authors_avg": [ 22.5, 38.97114317029974 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.1348399724926484, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:h6p9h2tLfr8J:scholar.google.com/&scioq=Faster+Local+Solvers+for+Graph+Diffusion+Equations&hl=en&as_sdt=0,5", "gs_version_total": 6, "email": "fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Fudan University", "aff_unique_dep": "", "aff_unique_url": "https://www.fudan.edu.cn", "aff_unique_abbr": "Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "On the Inductive Bias of Stacking Towards Improving Reasoning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96735", "id": "3ZAfFoAcUI", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3ZAfFoAcUI", "openreview": "https://openreview.net/forum?id=3ZAfFoAcUI", "poster": "/media/PosterPDFs/NeurIPS%202024/96735.png?t=1731476682.617246", "project": "", "author_site": "Nikunj Saunshi, Stefani Karp, Shankar Krishnan, Sobhan Miryoosefi, Sashank Jakkam Reddi, Sanjiv Kumar", "tldr": "", "abstract": "Given the increasing scale of model sizes, efficient training strategies like gradual stacking have garnered interest. Stacking enables efficient training by gradually growing the depth of a model in stages and using layers from a smaller model in an earlier stage to initialize the next stage. Although efficient for training, the model biases induced by such growing approaches are largely unexplored. In this work, we examine this fundamental aspect of gradual stacking, going beyond its efficiency benefits. We propose a variant of gradual stacking called MIDAS that can speed up language model training by up to 40\\%. Furthermore we discover an intriguing phenomenon: MIDAS is not only training-efficient but surprisingly also has an inductive bias towards improving downstream tasks, especially tasks that require reasoning abilities like reading comprehension and math problems, despite having similar or slightly worse perplexity compared to baseline training. To further analyze this inductive bias, we construct {\\em reasoning primitives} \u2013 simple synthetic tasks that are building blocks for reasoning \u2013 and find that a model pretrained with stacking is significantly better than standard pretraining on these primitives, with and without fine-tuning. This provides stronger and more robust evidence for this inductive bias towards reasoning. These findings of training efficiency and inductive bias towards reasoning are verified at 1B, 2B and 8B parameter language models. Finally, we conjecture the underlying reason for this inductive bias by exploring the connection of stacking to looped models and provide strong supporting empirical analysis.", "keywords": "stacking;language model;reasoning;inductive bias;efficient training", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Nikunj Saunshi;Stefani Karp;Shankar Krishnan;Sobhan Miryoosefi;Sashank J. Reddi;Sanjiv Kumar", "authorids": "~Nikunj_Saunshi1;~Stefani_Karp1;~Shankar_Krishnan1;~Sobhan_Miryoosefi1;~Sashank_J._Reddi1;~Sanjiv_Kumar1", "gender": ";F;M;M;M;", "homepage": "https://www.nikunjsaunshi.com/;;;https://www.cs.princeton.edu/~syoosefi/;;http://www.sanjivk.com/", "dblp": "199/2236;280/1111;;243/5898;50/10452;", "google_scholar": "F24vXggAAAAJ;iMknz8EAAAAJ;;XItzx5gAAAAJ;70lgwYwAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;;;", "linkedin": ";;;miryoosefi/;;", "or_profile": "~Nikunj_Saunshi1;~Stefani_Karp1;~Shankar_Krishnan1;~Sobhan_Miryoosefi1;~Sashank_J._Reddi1;~Sanjiv_Kumar1", "aff": "Google;Carnegie Mellon University;;Princeton University;Google;Google", "aff_domain": "google.com;cmu.edu;;princeton.edu;google.com;google.com", "position": "Researcher;PhD student;;PhD student;Research Scientist;Research Scientist", "bibtex": "@inproceedings{\nsaunshi2024on,\ntitle={On the Inductive Bias of Stacking Towards Improving Reasoning},\nauthor={Nikunj Saunshi and Stefani Karp and Shankar Krishnan and Sobhan Miryoosefi and Sashank J. Reddi and Sanjiv Kumar},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3ZAfFoAcUI}\n}", "github": "", "reviewers": "1oxX;UVBV;s9u7;48NW", "pdf_size": 1196987, "rating": "6;6;7;7", "confidence": "4;3;4;3", "soundness": "3;3;3;4", "novelty": "3;3;3;3", "presentation": "4;3;4;4", "wc_summary": "104;153;40;35", "wc_strengths": "39;76;107;32", "wc_weaknesses": "57;116;49;42", "wc_questions": "21;4;83;30", "wc_limitations": "7;10;8;10", "wc_review": "228;359;287;149", "wc_reply_reviewers": "0;29;32;22", "wc_reply_authors": "0;0;0;136", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 83.0, 48.71857961804716 ], "wc_strengths_avg": [ 63.5, 30.170349683091178 ], "wc_weaknesses_avg": [ 66.0, 29.351320242878344 ], "wc_questions_avg": [ 34.5, 29.516944286290883 ], "wc_limitations_avg": [ 8.75, 1.299038105676658 ], "wc_review_avg": [ 255.75, 77.14069937458436 ], "wc_reply_reviewers_avg": [ 20.75, 12.517487767120047 ], "wc_reply_authors_avg": [ 34.0, 58.88972745734183 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=334670576157372228&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "google.com;cmu.edu;;princeton.edu;google.com;google.com", "author_num": 6, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "Google;Carnegie Mellon University;Princeton University", "aff_unique_dep": "Google;;", "aff_unique_url": "https://www.google.com;https://www.cmu.edu;https://www.princeton.edu", "aff_unique_abbr": "Google;CMU;Princeton", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Revisiting Few-Shot Object Detection with Vision-Language Models", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97860", "id": "3ZLuZ2l0aR", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3ZLuZ2l0aR", "openreview": "https://openreview.net/forum?id=3ZLuZ2l0aR", "poster": "/media/PosterPDFs/NeurIPS%202024/97860.png?t=1731461081.959329", "project": "", "author_site": "Anish Madan, Neehar Peri, Shu Kong, Deva Ramanan", "tldr": "", "abstract": "The era of vision-language models (VLMs) trained on web-scale datasets challenges conventional formulations of \u201copen-world\" perception. In this work, we revisit the task of few-shot object detection (FSOD) in the context of recent foundational VLMs. First, we point out that zero-shot predictions from VLMs such as GroundingDINO significantly outperform state-of-the-art few-shot detectors (48 vs. 33 AP) on COCO. Despite their strong zero-shot performance, such foundation models may still be sub-optimal. For example, trucks on the web may be defined differently from trucks for a target applications such as autonomous vehicle perception. We argue that the task of few-shot recognition can be reformulated as aligning foundation models to target concepts using a few examples. Interestingly, such examples can be multi-modal, using both text and visual cues, mimicking instructions that are often given to human annotators when defining a target concept of interest. Concretely, we propose Foundational FSOD, a new benchmark protocol that evaluates detectors pre-trained on any external data and fine-tuned on multi-modal (text and visual) K-shot examples per target class. We repurpose nuImages for Foundational FSOD, benchmark several popular open-source VLMs, and provide an empirical analysis of state-of-the-art methods. Lastly, we discuss our recent CVPR 2024 Foundational FSOD competition and share insights from the community. Notably, the winning team significantly outperforms our baseline by 23.3 mAP!", "keywords": "Few-Shot Object Detection;Vision-Language Models;Concept Alignment;Multi-Modal", "primary_area": "", "supplementary_material": "/attachment/155dc331d8adebf6c1313d226a4316d0b890ae94.pdf", "author": "Anish Madan;Neehar Peri;Shu Kong;Deva Ramanan", "authorids": "~Anish_Madan1;~Neehar_Peri1;~Shu_Kong1;~Deva_Ramanan1", "gender": "M;M;M;M", "homepage": ";http://neeharperi.com;https://aimerykong.github.io/;https://www.cs.cmu.edu/~deva/", "dblp": "265/6058;241/5094;26/11141;49/488", "google_scholar": "eZ4WZmIAAAAJ;X3cGY7wAAAAJ;sm9FdLoAAAAJ;9B8PoXUAAAAJ", "orcid": ";;0000-0002-1362-5937;", "linkedin": ";neeharperi/;aimerykong/;", "or_profile": "~Anish_Madan1;~Neehar_Peri1;~Shu_Kong1;~Deva_Ramanan1", "aff": "Carnegie Mellon University;Carnegie Mellon University;Texas A&M University - College Station;School of Computer Science, Carnegie Mellon University", "aff_domain": "andrew.cmu.edu;cmu.edu;tamu.edu;cs.cmu.edu", "position": "MS student;PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nmadan2024revisiting,\ntitle={Revisiting Few-Shot Object Detection with Vision-Language Models},\nauthor={Anish Madan and Neehar Peri and Shu Kong and Deva Ramanan},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=3ZLuZ2l0aR}\n}", "github": "", "reviewers": "uQBw;cYqG;JU26", "pdf_size": 5607231, "rating": "4;7;8", "confidence": "3;3;4", "wc_summary_and_contributions": "55;43;94", "wc_strengths": "115;114;5", "wc_improvement": "266;152;5", "wc_limitations": "139;1;1", "wc_correctness": "29;1;1", "wc_clarity": "10;52;1", "wc_relation_to_prior_work": "21;1;1", "wc_documentation": "67;11;1", "wc_additional_feedback": "1;1;1", "wc_review": "703;376;110", "wc_reply_reviewers": "203;10;0", "wc_reply_authors": "207;0;0", "reply_reviewers": "3;1;0", "reply_authors": "3;1;1", "rating_avg": [ 6.333333333333333, 1.699673171197595 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 64.0, 21.77154105707724 ], "wc_strengths_avg": [ 78.0, 51.62040940041707 ], "wc_improvement_avg": [ 141.0, 106.8363234110946 ], "wc_limitations_avg": [ 47.0, 65.05382386916237 ], "wc_correctness_avg": [ 10.333333333333334, 13.199326582148887 ], "wc_clarity_avg": [ 21.0, 22.22611077089287 ], "wc_relation_to_prior_work_avg": [ 7.666666666666667, 9.428090415820632 ], "wc_documentation_avg": [ 26.333333333333332, 29.044027881055953 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 396.3333333333333, 242.51781148791707 ], "wc_reply_reviewers_avg": [ 71.0, 93.42733361638159 ], "wc_reply_authors_avg": [ 69.0, 97.58073580374356 ], "reply_reviewers_avg": [ 1.3333333333333333, 1.247219128924647 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.6933752452815364, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2885941046828500885&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "andrew.cmu.edu;cmu.edu;tamu.edu;cs.cmu.edu", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Carnegie Mellon University;Texas A&M University", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://www.tamu.edu", "aff_unique_abbr": "CMU;TAMU", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";College Station;Pittsburgh", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "NanoBaseLib: A Multi-Task Benchmark Dataset for Nanopore Sequencing", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97859", "id": "3ZjaXTPWiE", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3ZjaXTPWiE", "openreview": "https://openreview.net/forum?id=3ZjaXTPWiE", "poster": "/media/PosterPDFs/NeurIPS%202024/97859.png?t=1730117934.7149255", "project": "", "author_site": "Guangzhao Cheng, Chengbo Fu, Lu Cheng", "tldr": "", "abstract": "Nanopore sequencing is the third-generation sequencing technology with capabilities of generating long-read sequences and directly measuring modifications on DNA/RNA molecules, which makes it ideal for biological applications such as human Telomere-to-Telomere (T2T) genome assembly, Ebola virus surveillance and COVID-19 mRNA vaccine development. However, accuracies of computational methods in various tasks of Nanopore sequencing data analysis are far from satisfactory. For instance, the base calling accuracy of Nanopore RNA sequencing is $\\sim$90\\%, while the aim is $\\sim$99.9\\%. This highlights an urgent need of contributions from the machine learning community. A bottleneck that prevents machine learning researchers from entering this field is the lack of a large integrated benchmark dataset. To this end, we present NanoBaseLib, a comprehensive multi-task benchmark dataset. It integrates 16 public datasets with over 30 million reads for four critical tasks in Nanopore data analysis. To facilitate method development, we have preprocessed all the raw data using a uniform workflow, stored all the intermediate results in uniform formats, analysed test datasets with various baseline methods for four benchmark tasks, and developed a software package to easily access these results. NanoBaseLib is available at https://nanobaselib.github.io.", "keywords": "Nanopore Sequencing;Base Calling;RNA Modification;Alignment;Segmentation", "primary_area": "", "supplementary_material": "/attachment/46f58cf5be7c39be4d0803ad34ebb4b3b7b613fd.pdf", "author": "Guangzhao Cheng;Chengbo Fu;Lu Cheng", "authorids": "~Guangzhao_Cheng1;~Chengbo_Fu1;~Lu_Cheng1", "gender": "M;M;", "homepage": "https://research.aalto.fi/en/persons/guangzhao-cheng;;", "dblp": ";;", "google_scholar": ";;", "orcid": "0009-0008-6160-3637;0009-0007-8655-650X;", "linkedin": "guangzhao-cheng;;", "or_profile": "~Guangzhao_Cheng1;~Chengbo_Fu1;~Lu_Cheng1", "aff": "Aalto University;Aalto University;", "aff_domain": "aalto.fi;aalto.fi;", "position": "PhD student;PhD student;", "bibtex": "@inproceedings{\ncheng2024nanobaselib,\ntitle={NanoBaseLib: A Multi-Task Benchmark Dataset for Nanopore Sequencing},\nauthor={Guangzhao Cheng and Chengbo Fu and Lu Cheng},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=3ZjaXTPWiE}\n}", "github": "", "reviewers": "BsqT;KfCJ;bhmA;k9pY", "pdf_size": 1603835, "rating": "4;6;7;9", "confidence": "2;2;3;4", "wc_summary_and_contributions": "71;108;26;80", "wc_strengths": "55;48;23;76", "wc_improvement": "4;28;20;39", "wc_limitations": "29;19;15;56", "wc_correctness": "126;4;12;22", "wc_clarity": "107;79;1;1", "wc_relation_to_prior_work": "24;8;1;1", "wc_documentation": "162;31;17;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "579;326;116;277", "wc_reply_reviewers": "0;20;17;12", "wc_reply_authors": "1530;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "3;1;1;1", "rating_avg": [ 6.5, 1.8027756377319946 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "wc_summary_and_contributions_avg": [ 71.25, 29.473505051147207 ], "wc_strengths_avg": [ 50.5, 18.9274932307477 ], "wc_improvement_avg": [ 22.75, 12.754901018824098 ], "wc_limitations_avg": [ 29.75, 15.990231392947383 ], "wc_correctness_avg": [ 41.0, 49.48737212663449 ], "wc_clarity_avg": [ 47.0, 47.05316142407437 ], "wc_relation_to_prior_work_avg": [ 8.5, 9.394147114027968 ], "wc_documentation_avg": [ 52.75, 63.96239129363442 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 324.5, 166.2084534552921 ], "wc_reply_reviewers_avg": [ 12.25, 7.628073151196179 ], "wc_reply_authors_avg": [ 382.5, 662.5094338950955 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.9198662110077999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:73zcOGRxwSoJ:scholar.google.com/&scioq=NanoBaseLib:+A+Multi-Task+Benchmark+Dataset+for+Nanopore+Sequencing&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "aalto.fi;aalto.fi;", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Aalto University", "aff_unique_dep": "", "aff_unique_url": "https://www.aalto.fi", "aff_unique_abbr": "Aalto", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Finland" }, { "title": "Global Rewards in Restless Multi-Armed Bandits", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96734", "id": "3apt5AJ5QN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3apt5AJ5QN", "openreview": "https://openreview.net/forum?id=3apt5AJ5QN", "poster": "", "project": "", "author_site": "Naveen Raman, Zheyuan Shi, Fei Fang", "tldr": "", "abstract": "Restless multi-armed bandits (RMAB) extend multi-armed bandits so arm pulls impact future arm states. Despite the success of RMABs, a key limiting assumption is the separability of rewards into a sum across arms. We address this deficiency by proposing restless-multi-armed bandit with global rewards (RMAB-G), a generalization of RMABs to global non-separable rewards. To solve RMAB-G, we develop the Linear-Whittle and Shapley-Whittle indices, which extend Whittle indices from RMABs to RMAB-Gs. We prove approximation bounds which demonstrate how Linear and Shapley-Whittle indices fail for non-linear rewards. To overcome this limitation, we propose two sets of adaptive policies: the first computes indices iteratively and the second combines indices with Monte-Carlo Tree Search (MCTS). Empirically, we demonstrate that adaptive policies outperform both pre-computed index policies and baselines in synthetic and real-world food rescue datasets.", "keywords": "Restless Bandits;Multi-Armed Bandit;Submodular;Food Rescue", "primary_area": "bandits", "supplementary_material": "/attachment/23078e5f7b585d7131e76c0b52640b178d33b81d.zip", "author": "Naveen Janaki Raman;Zheyuan Ryan Shi;Fei Fang", "authorids": "~Naveen_Janaki_Raman1;~Zheyuan_Ryan_Shi1;~Fei_Fang1", "gender": ";;F", "homepage": "https://naveenraman.com;;https://feifang.info/", "dblp": "220/3385;;57/2878", "google_scholar": "vHALcwMAAAAJ;;R6jE0VEAAAAJ", "orcid": ";;", "linkedin": "naveen-raman/;;", "or_profile": "~Naveen_Janaki_Raman1;~Zheyuan_Ryan_Shi1;~Fei_Fang1", "aff": "Carnegie Mellon University;;Carnegie Mellon University", "aff_domain": "cmu.edu;;cmu.edu", "position": "PhD student;;Associate Professor", "bibtex": "@inproceedings{\nraman2024global,\ntitle={Global Rewards in Restless Multi-Armed Bandits},\nauthor={Naveen Janaki Raman and Zheyuan Ryan Shi and Fei Fang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3apt5AJ5QN}\n}", "github": "", "reviewers": "oZCz;t2ns;a47J;TFrk", "pdf_size": 728003, "rating": "4;6;7;7", "confidence": "4;3;4;4", "soundness": "3;3;4;4", "novelty": "2;3;4;4", "presentation": "2;2;4;3", "wc_summary": "78;119;109;145", "wc_strengths": "56;25;73;121", "wc_weaknesses": "295;53;65;221", "wc_questions": "127;167;105;258", "wc_limitations": "36;6;10;1", "wc_review": "592;370;362;746", "wc_reply_reviewers": "0;38;50;0", "wc_reply_authors": "0;54;0;0", "reply_reviewers": "0;1;1;0", "reply_authors": "1;2;1;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 112.75, 23.98306694315804 ], "wc_strengths_avg": [ 68.75, 34.73021019228072 ], "wc_weaknesses_avg": [ 158.5, 102.96965572439291 ], "wc_questions_avg": [ 164.25, 58.512285034854 ], "wc_limitations_avg": [ 13.25, 13.516193990913271 ], "wc_review_avg": [ 517.5, 161.01164554155702 ], "wc_reply_reviewers_avg": [ 22.0, 22.40535650240808 ], "wc_reply_authors_avg": [ 13.5, 23.382685902179844 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14198491172924593817&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "cmu.edu;;cmu.edu", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "EGonc : Energy-based Open-Set Node Classification with substitute Unknowns", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96733", "id": "3cL2XDyaEB", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3cL2XDyaEB", "openreview": "https://openreview.net/forum?id=3cL2XDyaEB", "poster": "/media/PosterPDFs/NeurIPS%202024/96733.png?t=1731554238.6916962", "project": "", "author_site": "Qin Zhang, Zelin Shi, Shirui Pan, Junyang Chen, Huisi Wu, Xiaojun Chen", "tldr": "", "abstract": "Open-set Classification (OSC) is a critical requirement for safely deploying machine learning models in the open world, which aims to classify samples from known classes and reject samples from out-of-distribution (OOD). \nExisting methods exploit the feature space of trained network and attempt at estimating the uncertainty in the predictions.\nHowever, softmax-based neural networks are found to be overly confident in their predictions even on data they have never seen before and\nthe immense diversity of the OOD examples also makes such methods fragile.\nTo this end, we follow the idea of estimating the underlying density of the training data to decide whether a given input is close to the in-distribution (IND) data and adopt Energy-based models (EBMs) as density estimators. \nA novel energy-based generative open-set node classification method, \\textit{EGonc}, is proposed to achieve open-set graph learning. \nSpecifically, we generate substitute unknowns to mimic the distribution of real open-set samples firstly, based on the information of graph structures. \nThen, an additional energy logit representing the virtual OOD class is learned from the residual of the feature against the principal space, and matched with the original logits by a constant scaling. This virtual logit serves as the indicator of OOD-ness. \nEGonc has nice theoretical properties that guarantee an overall distinguishable margin between the detection scores for IND and OOD samples. \nComprehensive experimental evaluations of EGonc also demonstrate its superiority.", "keywords": "Open-set classification;Energy-based models;Graph learning", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Qin Zhang;Zelin Shi;Shirui Pan;Junyang Chen;Huisi Wu;Xiaojun Chen", "authorids": "~Qin_Zhang4;~Zelin_Shi1;~Shirui_Pan1;~Junyang_Chen1;~Huisi_Wu1;~Xiaojun_Chen4", "gender": "F;M;;M;M;M", "homepage": "https://qinzhang11.github.io/;https://github.com/hiromisyo?tab=repositories;;https://csse.szu.edu.cn/pages/user/index?id=1101;https://csse.szu.edu.cn/staff/~hswu/;https://bruce-xjchen.github.io/HomePage/", "dblp": "45/47-11;;91/8171;196/7893.html;52/8869;20/3215-6", "google_scholar": "l4oNAU0AAAAJ;;https://scholar.google.com.au/citations?user=frWRJN4AAAAJ;Q0u3dRQAAAAJ;hUb7j9EAAAAJ;yAjyrwkAAAAJ", "orcid": "0000-0002-1449-5046;;0000-0003-0794-527X;0000-0002-1139-8654;0000-0002-0399-9089;0000-0002-2818-4652", "linkedin": ";;;;;", "or_profile": "~Qin_Zhang4;~Zelin_Shi1;~Shirui_Pan1;~Junyang_Chen1;~Huisi_Wu1;~Xiaojun_Chen4", "aff": "Shenzhen University;Shenzhen University;Griffith University;Shenzhen University;Shenzhen University;Shenzhen University", "aff_domain": "szu.edu.cn;szu.edu.cn;griffith.edu.au;szu.edu;szu.edu.cn;szu.edu.cn", "position": "Lecturer;MS student;Full Professor;Assistant Professor;Professor;Full Professor", "bibtex": "@inproceedings{\nzhang2024egonc,\ntitle={{EG}onc : Energy-based Open-Set Node Classification with substitute Unknowns},\nauthor={Qin Zhang and Zelin Shi and Shirui Pan and Junyang Chen and Huisi Wu and Xiaojun Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3cL2XDyaEB}\n}", "github": "", "reviewers": "exuD;qzuy;ena4;1Tms", "pdf_size": 972319, "rating": "5;5;7;7", "confidence": "4;4;4;4", "soundness": "1;3;3;3", "novelty": "2;3;2;3", "presentation": "1;2;3;3", "wc_summary": "63;101;89;74", "wc_strengths": "54;72;59;114", "wc_weaknesses": "252;95;121;129", "wc_questions": "83;41;3;2", "wc_limitations": "1;3;3;1", "wc_review": "453;312;275;320", "wc_reply_reviewers": "53;12;0;20", "wc_reply_authors": "36;55;0;31", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;1;2", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 81.75, 14.446020213193666 ], "wc_strengths_avg": [ 74.75, 23.594225988576103 ], "wc_weaknesses_avg": [ 149.25, 60.63981777677107 ], "wc_questions_avg": [ 32.25, 33.25187964611926 ], "wc_limitations_avg": [ 2.0, 1.0 ], "wc_review_avg": [ 340.0, 67.41290677607664 ], "wc_reply_reviewers_avg": [ 21.25, 19.66437133498043 ], "wc_reply_authors_avg": [ 30.5, 19.75474626513841 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:sMU7LwmZ6OUJ:scholar.google.com/&scioq=EGonc+:+Energy-based+Open-Set+Node+Classification+with+substitute+Unknowns&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": "szu.edu.cn;szu.edu.cn;griffith.edu.au;szu.edu;szu.edu.cn;szu.edu.cn", "author_num": 6, "aff_unique_index": "0;0;1;0;0;0", "aff_unique_norm": "Shenzhen University;Griffith University", "aff_unique_dep": ";", "aff_unique_url": "https://www.szu.edu.cn;https://www.griffith.edu.au", "aff_unique_abbr": "SZU;Griffith", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "China;Australia" }, { "title": "Learning-Augmented Algorithms for the Bahncard Problem", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96732", "id": "3cb6pF3Tvf", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3cb6pF3Tvf", "openreview": "https://openreview.net/forum?id=3cb6pF3Tvf", "poster": "/media/PosterPDFs/NeurIPS%202024/96732.png?t=1732544574.3153787", "project": "", "author_site": "Hailiang Zhao, Xueyan Tang, Peng Chen, Shuiguang Deng", "tldr": "", "abstract": "In this paper, we study learning-augmented algorithms for the Bahncard problem. The Bahncard problem is a generalization of the ski-rental problem, where a traveler needs to irrevocably and repeatedly decide between a cheap short-term solution and an expensive long-term one with an unknown future. Even though the problem is canonical, only a primal-dual-based learning-augmented algorithm was explicitly designed for it. We develop a new learning-augmented algorithm, named PFSUM, that incorporates both history and short-term future to improve online decision making. We derive the competitive ratio of PFSUM as a function of the prediction error and conduct extensive experiments to show that PFSUM outperforms the primal-dual-based algorithm.", "keywords": "algorithms with predictions;competitive analysis;consistency;robustness", "primary_area": "optimization", "supplementary_material": "/attachment/9566f94f2aab381ec16a94fef698fd14ff23c54a.gz", "author": "Hailiang Zhao;Xueyan Tang;Peng Chen;Shuiguang Deng", "authorids": "~Hailiang_Zhao1;~Xueyan_Tang1;~Peng_Chen19;~Shuiguang_Deng1", "gender": "M;;M;M", "homepage": "http://hliangzhao.me;;https://github.com/Natureal;https://person.zju.edu.cn/shuiguang", "dblp": "19/538.html;;;d/ShuiguangDeng", "google_scholar": "https://scholar.google.com/citations?hl=en;;VAbN1-IAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": "0000-0003-2850-6815;;;0000-0001-5015-6095", "linkedin": ";;;", "or_profile": "~Hailiang_Zhao1;~Xueyan_Tang1;~Peng_Chen19;~Shuiguang_Deng1", "aff": "Zhejiang University;;Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;;zju.edu.cn;zju.edu.cn", "position": "PhD student;;PhD student;Full Professor", "bibtex": "@inproceedings{\nzhao2024learningaugmented,\ntitle={Learning-Augmented Algorithms for the Bahncard Problem},\nauthor={Hailiang Zhao and Xueyan Tang and Peng Chen and Shuiguang Deng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3cb6pF3Tvf}\n}", "github": "", "reviewers": "ZK3p;m14s;AGsA", "pdf_size": 2824218, "rating": "3;6;7", "confidence": "5;2;4", "soundness": "2;3;3", "novelty": "2;3;3", "presentation": "3;3;4", "wc_summary": "42;81;345", "wc_strengths": "75;54;218", "wc_weaknesses": "246;71;143", "wc_questions": "95;4;221", "wc_limitations": "9;1;18", "wc_review": "467;211;945", "wc_reply_reviewers": "377;23;60", "wc_reply_authors": "1016;19;50", "reply_reviewers": "1;1;1", "reply_authors": "3;2;2", "rating_avg": [ 5.333333333333333, 1.699673171197595 ], "confidence_avg": [ 3.6666666666666665, 1.247219128924647 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 156.0, 134.58826100369973 ], "wc_strengths_avg": [ 115.66666666666667, 72.86669716376306 ], "wc_weaknesses_avg": [ 153.33333333333334, 71.8161232283918 ], "wc_questions_avg": [ 106.66666666666667, 88.97315450304222 ], "wc_limitations_avg": [ 9.333333333333334, 6.944222218666553 ], "wc_review_avg": [ 541.0, 304.18853802644617 ], "wc_reply_reviewers_avg": [ 153.33333333333334, 158.87591244602046 ], "wc_reply_authors_avg": [ 361.6666666666667, 462.8565892608878 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5765566601970552, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:WCHxPRrtyUgJ:scholar.google.com/&scioq=Learning-Augmented+Algorithms+for+the+Bahncard+Problem&hl=en&as_sdt=0,33", "gs_version_total": 6, "email": "zju.edu.cn;;zju.edu.cn;zju.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Zhejiang University", "aff_unique_dep": "", "aff_unique_url": "https://www.zju.edu.cn", "aff_unique_abbr": "ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Decoding-Time Language Model Alignment with Multiple Objectives", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96731", "id": "3csuL7TVpV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3csuL7TVpV", "openreview": "https://openreview.net/forum?id=3csuL7TVpV", "poster": "/media/PosterPDFs/NeurIPS%202024/96731.png?t=1731409488.779482", "project": "", "author_site": "Ruizhe Shi, Yifang Chen, Yushi Hu, Alisa Liu, Hanna Hajishirzi, Noah Smith, Simon Du", "tldr": "", "abstract": "Aligning language models (LMs) to human preferences has emerged as a critical pursuit, enabling these models to better serve diverse user needs. Existing methods primarily focus on optimizing LMs for a single reward function, limiting their adaptability to varied objectives. \nHere, we propose $\\textbf{multi-objective decoding~(MOD)}$, a decoding-time algorithm that outputs the next token from a linear combination of predictions of all base models, for any given weighting over different objectives.\nWe exploit a common form among a family of $f$-divergence regularized alignment approaches (such as PPO, DPO, and their variants) to identify a closed-form solution by Legendre transform, and derive an efficient decoding strategy.\nTheoretically, we show why existing approaches can be sub-optimal even in natural settings and obtain optimality guarantees for our method.\nEmpirical results demonstrate the effectiveness of the algorithm. For example, compared to a parameter-merging baseline, MOD achieves 12.8\\% overall reward improvement when equally optimizing towards $3$ objectives. Moreover, we experiment with MOD on combining three fully-finetuned \nLMs of different model sizes, each aimed at different objectives such as safety, coding, and general user preference. Unlike traditional methods that require careful curation of a mixture of datasets to achieve comprehensive improvement, we can quickly experiment with preference weightings using MOD to find the best combination of models. Our best combination reduces toxicity on Toxigen to nearly 0\\% and achieves 7.9--33.3\\% improvement across three other metrics ($\\textit{i.e.}$, Codex@1, GSM-COT, BBH-COT).", "keywords": "multi-objective alignment;decoding-time algorithms;RLHF", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Ruizhe Shi;Yifang Chen;Yushi Hu;Alisa Liu;Hannaneh Hajishirzi;Noah A. Smith;Simon Shaolei Du", "authorids": "~Ruizhe_Shi1;~Yifang_Chen1;~Yushi_Hu1;~Alisa_Liu1;~Hannaneh_Hajishirzi1;~Noah_A._Smith2;~Simon_Shaolei_Du1", "gender": "M;F;M;F;F;M;M", "homepage": "http://srzer.github.io;;https://yushi-hu.github.io;https://alisawuffles.github.io/;https://homes.cs.washington.edu/~hannaneh/;http://simonshaoleidu.com;https://homes.cs.washington.edu/~nasmith/", "dblp": "304/0634.html;20/8403-1;268/5766;;52/1296;176/5602;90/5204.html", "google_scholar": "0tlXSPkAAAAJ;LUz2mN4AAAAJ;mXN51X0AAAAJ;3-lTFAwAAAAJ;LOV6_WIAAAAJ;OttawxUAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;;;;0000-0002-2310-6380", "linkedin": ";;;;;;", "or_profile": "~Ruizhe_Shi1;~Yifang_Chen1;~Yushi_Hu1;~Alisa_Liu1;~Hannaneh_Hajishirzi1;~Simon_Shaolei_Du1;~Noah_Smith1", "aff": "University of Washington;Department of Computer Science, University of Washington;University of Washington;University of Washington;University of Washington;University of Washington;Allen Institute for Artificial Intelligence", "aff_domain": "uw.edu;cs.washington.edu;uw.edu;uw.edu;uw.edu;washington.edu;allenai.org", "position": "Intern;PhD student;PhD student;PhD student;Associate Professor;Assistant Professor;Senior Director of NLP Research", "bibtex": "@inproceedings{\nshi2024decodingtime,\ntitle={Decoding-Time Language Model Alignment with Multiple Objectives},\nauthor={Ruizhe Shi and Yifang Chen and Yushi Hu and Alisa Liu and Hannaneh Hajishirzi and Noah A. Smith and Simon Shaolei Du},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3csuL7TVpV}\n}", "github": "", "reviewers": "BvFb;TTLg;dVwo;pv2M", "pdf_size": 0, "rating": "5;7;7;8", "confidence": "2;3;3;4", "soundness": "3;3;3;3", "novelty": "3;3;3;4", "presentation": "3;4;3;2", "wc_summary": "68;115;126;107", "wc_strengths": "32;115;117;34", "wc_weaknesses": "74;9;60;180", "wc_questions": "12;17;78;37", "wc_limitations": "5;1;9;23", "wc_review": "191;257;390;381", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 1.0897247358851685 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 104.0, 21.85177338341216 ], "wc_strengths_avg": [ 74.5, 41.512046444375635 ], "wc_weaknesses_avg": [ 80.75, 62.19877410367506 ], "wc_questions_avg": [ 36.0, 25.990382836734053 ], "wc_limitations_avg": [ 9.5, 8.2915619758885 ], "wc_review_avg": [ 304.75, 84.11413377072844 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.9733285267845754, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11237639555379334898&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "uw.edu;cs.washington.edu;uw.edu;uw.edu;uw.edu;washington.edu;allenai.org", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0;1", "aff_unique_norm": "University of Washington;Allen Institute for Artificial Intelligence", "aff_unique_dep": ";", "aff_unique_url": "https://www.washington.edu;https://allenai.org", "aff_unique_abbr": "UW;AI2", "aff_campus_unique_index": "1", "aff_campus_unique": ";Seattle", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "The Edge-of-Reach Problem in Offline Model-Based Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96730", "id": "3dn1hINA6o", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3dn1hINA6o", "openreview": "https://openreview.net/forum?id=3dn1hINA6o", "poster": "/media/PosterPDFs/NeurIPS%202024/96730.png?t=1731764311.6048317", "project": "", "author_site": "Anya Sims, Cong Lu, Jakob Foerster, Yee Whye Teh", "tldr": "", "abstract": "Offline reinforcement learning (RL) aims to train agents from pre-collected datasets. However, this comes with the added challenge of estimating the value of behaviors not covered in the dataset. Model-based methods offer a potential solution by training an approximate dynamics model, which then allows collection of additional synthetic data via rollouts in this model. The prevailing theory treats this approach as online RL in an approximate dynamics model, and any remaining performance gap is therefore understood as being due to dynamics model errors. In this paper, we analyze this assumption and investigate how popular algorithms perform as the learned dynamics model is improved. In contrast to both intuition and theory, if the learned dynamics model is replaced by the true error-free dynamics, existing model-based methods completely fail. This reveals a key oversight: The theoretical foundations assume sampling of full horizon rollouts in the learned dynamics model; however, in practice, the number of model-rollout steps is aggressively reduced to prevent accumulating errors. We show that this truncation of rollouts results in a set of edge-of-reach states at which we are effectively \"bootstrapping from the void.\" This triggers pathological value overestimation and complete performance collapse. We term this the edge-of-reach problem. Based on this new insight, we fill important gaps in existing theory, and reveal how prior model-based methods are primarily addressing the edge-of-reach problem, rather than model-inaccuracy as claimed. Finally, we propose Reach-Aware Value Learning (RAVL), a simple and robust method that directly addresses the edge-of-reach problem and hence - unlike existing methods - does not fail as the dynamics model is improved. Since world models will inevitably improve, we believe this is a key step towards future-proofing offline RL.", "keywords": "Offline Reinforcement Learning;Model-Based Reinforcement Learning", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Anya Sims;Cong Lu;Jakob Nicolaus Foerster;Yee Whye Teh", "authorids": "~Anya_Sims1;~Cong_Lu1;~Jakob_Nicolaus_Foerster1;~Yee_Whye_Teh2", "gender": "F;M;M;M", "homepage": "https://anyasims.github.io/;https://conglu.co.uk;https://www.jakobfoerster.com;http://csml.stats.ox.ac.uk/people/teh/", "dblp": ";;176/5095;88/2483", "google_scholar": "https://scholar.google.com/citations?hl=en;yMGBji4AAAAJ;6z4lQzMAAAAJ;https://scholar.google.co.uk/citations?user=y-nUzMwAAAAJ", "orcid": ";0000-0001-5564-838X;;", "linkedin": ";cong-lu-530b74104/;;", "or_profile": "~Anya_Sims1;~Cong_Lu1;~Jakob_Nicolaus_Foerster1;~Yee_Whye_Teh1", "aff": "University of Oxford;University of British Columbia;University of Oxford, University of Oxford;University of Oxford", "aff_domain": "ox.ac.uk;ubc.ca;eng.ox.ac.uk;ox.ac.uk", "position": "PhD student;Postdoc;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nsims2024the,\ntitle={The Edge-of-Reach Problem in Offline Model-Based Reinforcement Learning},\nauthor={Anya Sims and Cong Lu and Jakob Nicolaus Foerster and Yee Whye Teh},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3dn1hINA6o}\n}", "github": "", "reviewers": "GKpt;8ru8;PtsT;biRH", "pdf_size": 4317408, "rating": "5;7;7;7", "confidence": "4;4;4;3", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;2;3", "wc_summary": "78;173;63;87", "wc_strengths": "56;89;31;96", "wc_weaknesses": "103;323;78;357", "wc_questions": "69;23;37;6", "wc_limitations": "10;5;1;6", "wc_review": "316;613;210;552", "wc_reply_reviewers": "20;152;14;19", "wc_reply_authors": "56;154;18;16", "reply_reviewers": "1;2;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 100.25, 42.868257487329714 ], "wc_strengths_avg": [ 68.0, 26.16295090390226 ], "wc_weaknesses_avg": [ 215.25, 125.63911612232872 ], "wc_questions_avg": [ 33.75, 23.12331074911203 ], "wc_limitations_avg": [ 5.5, 3.2015621187164243 ], "wc_review_avg": [ 422.75, 165.49830059550462 ], "wc_reply_reviewers_avg": [ 51.25, 58.212434238743185 ], "wc_reply_authors_avg": [ 61.0, 56.00892785976178 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1613993337283744073&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "ox.ac.uk;ubc.ca;eng.ox.ac.uk;ox.ac.uk", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Oxford;University of British Columbia", "aff_unique_dep": ";", "aff_unique_url": "https://www.ox.ac.uk;https://www.ubc.ca", "aff_unique_abbr": "Oxford;UBC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United Kingdom;Canada" }, { "title": "Can Transformers Smell Like Humans?", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96729", "id": "3f8i9GlBzu", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3f8i9GlBzu", "openreview": "https://openreview.net/forum?id=3f8i9GlBzu", "poster": "/media/PosterPDFs/NeurIPS%202024/96729.png?t=1733899134.7518823", "project": "", "author_site": "Farzaneh Taleb, Miguel Vasco, Antonio Ribeiro, M\u00e5rten Bj\u00f6rkman, Danica Kragic", "tldr": "", "abstract": "The human brain encodes stimuli from the environment into representations that form a sensory perception of the world. Despite recent advances in understanding visual and auditory perception, olfactory perception remains an under-explored topic in the machine learning community due to the lack of large-scale datasets annotated with labels of human olfactory perception. In this work, we ask the question of whether pre-trained transformer models of chemical structures encode representations that are aligned with human olfactory perception, i.e., can transformers smell like humans? We demonstrate that representations encoded from transformers pre-trained on general chemical structures are highly aligned with human olfactory perception. We use multiple datasets and different types of perceptual representations to show that the representations encoded by transformer models are able to predict: (i) labels associated with odorants\u200c\u200c provided by experts; (ii) continuous ratings provided by human participants with respect to pre-defined descriptors; and (iii) similarity ratings between odorants provided by human participants. Finally, we evaluate the extent to which this alignment is associated with physicochemical features of odorants known to be relevant for olfactory decoding.", "keywords": "Representational Alignment;Olfactory;Transformers;Representation Learning;Neuroscience", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "", "author": "Farzaneh Taleb;Miguel Vasco;Antonio H. Ribeiro;M\u00e5rten Bj\u00f6rkman;Danica Kragic", "authorids": "~Farzaneh_Taleb1;~Miguel_Vasco1;~Antonio_H._Ribeiro1;~M\u00e5rten_Bj\u00f6rkman2;~Danica_Kragic1", "gender": "F;M;M;F;M", "homepage": "https://sites.google.com/view/farzanehtaleb/;https://www.miguelvasco.com/;https://antonior92.github.io/;http://www.csc.kth.se/~danik;https://www.kth.se/profile/celle", "dblp": ";237/9737;202/1699.html;82/1211;", "google_scholar": "_9tParQAAAAJ;https://scholar.google.pt/citations?user=Of2hDmMAAAAJ;https://scholar.google.com.br/citations?user=5t_sZdMAAAAJ;;https://scholar.google.se/citations?user=jKjp9h4AAAAJ", "orcid": "0000-0003-4482-1460;;0000-0003-3632-8529;;", "linkedin": "farzaneh-taleb/;;;;", "or_profile": "~Farzaneh_Taleb1;~Miguel_Vasco1;~Antonio_H._Ribeiro1;~Danica_Kragic1;~Marten_Bjoerkman1", "aff": "KTH Royal Institute of Technology;KTH Royal Institute of Technology;Uppsala University;KTH;KTH Royal Institute of Technology, Stockholm, Sweden", "aff_domain": "kth.se;kth.se;uu.se;kth.se;kth.se", "position": "PhD student;Postdoc;Assistant Professor;Professor;Associate Professor", "bibtex": "@inproceedings{\ntaleb2024can,\ntitle={Can Transformers Smell Like Humans?},\nauthor={Farzaneh Taleb and Miguel Vasco and Antonio H. Ribeiro and M{\\r{a}}rten Bj{\\\"o}rkman and Danica Kragic},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3f8i9GlBzu}\n}", "github": "", "reviewers": "6yPX;UQYS;Y4G5", "pdf_size": 1476404, "rating": "5;7;7", "confidence": "4;4;5", "soundness": "3;3;4", "novelty": "2;3;3", "presentation": "2;3;2", "wc_summary": "63;134;114", "wc_strengths": "64;197;48", "wc_weaknesses": "64;298;380", "wc_questions": "43;51;21", "wc_limitations": "28;3;3", "wc_review": "262;683;566", "wc_reply_reviewers": "37;142;64", "wc_reply_authors": "14;77;7", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 103.66666666666667, 29.892399628593814 ], "wc_strengths_avg": [ 103.0, 66.78822251465199 ], "wc_weaknesses_avg": [ 247.33333333333334, 133.88884278468547 ], "wc_questions_avg": [ 38.333333333333336, 12.684198393626966 ], "wc_limitations_avg": [ 11.333333333333334, 11.785113019775793 ], "wc_review_avg": [ 503.6666666666667, 177.43418185031 ], "wc_reply_reviewers_avg": [ 81.0, 44.51965857910413 ], "wc_reply_authors_avg": [ 32.666666666666664, 31.478387647541428 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5820642550456636635&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "kth.se;kth.se;uu.se;kth.se;kth.se", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "KTH Royal Institute of Technology;Uppsala University", "aff_unique_dep": ";", "aff_unique_url": "https://www.kth.se;https://www.uu.se", "aff_unique_abbr": "KTH;UU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stockholm", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Sweden" }, { "title": "Improving the Learning Capability of Small-size Image Restoration Network by Deep Fourier Shifting", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96728", "id": "3gKsKFeuMA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3gKsKFeuMA", "openreview": "https://openreview.net/forum?id=3gKsKFeuMA", "poster": "", "project": "", "author_site": "man zhou", "tldr": "", "abstract": "State-of-the-art image restoration methods currently face challenges in terms of computational requirements and performance, making them impractical for deployment on edge devices such as phones and resource-limited devices. As a result, there is a need to develop alternative solutions with efficient designs that can achieve comparable performance to transformer or large-kernel methods. This motivates our research to explore techniques for improving the capability of small-size image restoration standing on the success secret of large receptive filed.\n\nTargeting at expanding receptive filed, spatial-shift operator tailored for efficient spatial communication and has achieved remarkable advances in high-level image classification tasks, like $S^2$-MLP and ShiftVit. However, its potential has rarely been explored in low-level image restoration tasks. The underlying reason behind this obstacle is that image restoration is sensitive to the spatial shift that occurs due to severe region-aware information loss, which exhibits a different behavior from high-level tasks. To address this challenge and unleash the potential of spatial shift for image restoration, we propose an information-lossless shifting operator, i.e., Deep Fourier Shifting, that is customized for image restoration. To develop our proposed operator, we first revisit the principle of shift operator and apply it to the Fourier domain, where the shift operator can be modeled in an information-lossless Fourier cycling manner. Inspired by Fourier cycling, we design two variants of Deep Fourier Shifting, namely the amplitude-phase variant and the real-imaginary variant. These variants are generic operators that can be directly plugged into existing image restoration networks as a drop-in replacement for the standard convolution unit, consuming fewer parameters. Extensive experiments across multiple low-level tasks including image denoising, low-light image enhancement, guided image super-resolution, and image de-blurring demonstrate consistent performance gains obtained by our Deep Fourier Shifting while reducing the computation burden. Additionally, ablation studies verify the robustness of the shift displacement with stable performance improvement.", "keywords": "Image restoration", "primary_area": "machine_vision", "supplementary_material": "", "author": "Man Zhou", "authorids": "~Man_Zhou4", "gender": "M", "homepage": "", "dblp": "165/8237", "google_scholar": "", "orcid": "0000-0003-2872-605X", "linkedin": "", "or_profile": "~man_zhou1", "aff": "University of Science and Technology of China", "aff_domain": "ustc.edu.cn", "position": "Postdoc", "bibtex": "@inproceedings{\nzhou2024improving,\ntitle={Improving the Learning Capability of Small-size Image Restoration Network by Deep Fourier Shifting},\nauthor={Man Zhou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3gKsKFeuMA}\n}", "github": "", "reviewers": "3jgW;H1Gj;hJxw;GBwc", "pdf_size": 2937905, "rating": "4;6;6;7", "confidence": "5;5;4;5", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "138;93;103;56", "wc_strengths": "55;136;33;81", "wc_weaknesses": "187;146;63;85", "wc_questions": "37;5;101;5", "wc_limitations": "25;9;2;1", "wc_review": "442;389;302;228", "wc_reply_reviewers": "136;11;90;44", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 97.5, 29.21044333795706 ], "wc_strengths_avg": [ 76.25, 38.45370593323874 ], "wc_weaknesses_avg": [ 120.25, 49.088567915554435 ], "wc_questions_avg": [ 37.0, 39.191835884530846 ], "wc_limitations_avg": [ 9.25, 9.60143218483576 ], "wc_review_avg": [ 340.25, 81.8424553639491 ], "wc_reply_reviewers_avg": [ 70.25, 47.20368100053215 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": -0.13245323570650439, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:d8vue9EwLVEJ:scholar.google.com/&scioq=Improving+the+Learning+Capability+of+Small-size+Image+Restoration+Network+by+Deep+Fourier+Shifting&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": "ustc.edu.cn", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "University of Science and Technology of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ustc.edu.cn", "aff_unique_abbr": "USTC", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "id": "3gZBGBglBf", "title": "Beware of Overestimated Decoding Performance Arising from Temporal Autocorrelations in Electroencephalogram Signals", "track": "main", "status": "Reject", "tldr": "", "abstract": "Researchers have reported high decoding accuracy (>95%) using non-invasive Electroencephalogram (EEG) signals for brain-computer interface (BCI) decoding tasks like image decoding, emotion recognition, auditory spatial attention detection, etc. Since these EEG data were usually collected with well-designed paradigms in labs, the reliability and robustness of the corresponding decoding methods were doubted by some researchers, and they argued that such decoding accuracy was overestimated due to the inherent temporal autocorrelation of EEG signals. However, the coupling between the stimulus-driven neural responses and the EEG temporal autocorrelations makes it difficult to confirm whether this overestimation exists in truth. Furthermore, the underlying pitfalls behind overestimated decoding accuracy have not been fully explained due to a lack of appropriate formulation. In this work, we formulate the pitfall in various EEG decoding tasks in a unified framework. EEG data were recorded from watermelons to remove stimulus-driven neural responses. Labels were assigned to continuous EEG according to the experimental design for EEG recording of several typical datasets, and then the decoding methods were conducted. The results showed the label can be successfully decoded as long as continuous EEG data with the same label were split into training and test sets. Further analysis indicated that high accuracy of various BCI decoding tasks could be achieved by associating labels with EEG intrinsic temporal autocorrelation features. These results underscore the importance of choosing the right experimental designs and data splits in BCI decoding tasks to prevent inflated accuracies due to EEG temporal correlations. The watermelon EEG dataset collected in this work can be obtained at Zenodo: https://zenodo.org/records/11238929, and all the codes of this work can be obtained in the supplementary materials.", "keywords": "Brain-computer interfaces;EEG;domain;temporal autocorrelations", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "/attachment/c4119082960155de5cbc4dc5a3e2bd777fb442f9.zip", "author": "Xiran Xu;Bo Wang;Boda Xiao;Yadong Niu;Yiwen Wang;Xihong Wu;Jing Chen", "authorids": "~Xiran_Xu1;~Bo_Wang39;~Boda_Xiao1;~Yadong_Niu1;~Yiwen_Wang2;~Xihong_Wu1;~Jing_Chen9", "gender": "M;M;M;M;M;M;F", "homepage": "https://github.com/xuxiran;;;;https://wyw97.github.io;http://www.cis.pku.edu.cn/auditory/Staff/Prof.Wu.html;https://sai.pku.edu.cn/info/1084/1707.htm", "dblp": ";;;;;;", "google_scholar": "https://scholar.google.com.hk/citations?user=yEjOfWEAAAAJ;E5dQcW4AAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=en;cWhk17oAAAAJ;https://scholar.google.com.hk/citations?user=0ZSjU8QAAAAJ;https://scholar.google.com.hk/citations?user=bYZrCPMAAAAJ", "orcid": "0000-0002-8293-4696;;;0000-0003-0904-3894;;;", "linkedin": ";;;;;;", "or_profile": "~Xiran_Xu1;~Bo_Wang39;~Boda_Xiao1;~Yadong_Niu1;~Yiwen_Wang2;~Xihong_Wu1;~Jing_Chen9", "aff": "Peking University;Peking University;Peking University;Peking University;Peking University;Peking University;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;stu.pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn", "position": "PhD student;PhD student;PhD student;PhD student;PhD student;Full Professor;Full Professor", "bibtex": "@misc{\nanonymous2024beware,\ntitle={Beware of Overestimated Decoding Performance Arising from Temporal Autocorrelations in Electroencephalogram Signals},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=3gZBGBglBf}\n}", "github": "", "project": "", "reviewers": "hA7i;AKrL;S3fr;7nx7", "site": "https://openreview.net/forum?id=3gZBGBglBf", "pdf_size": 4670287, "rating": "2;6;7;8", "confidence": "5;4;5;4", "soundness": "1;2;4;4", "novelty": "1;3;3;3", "presentation": "1;2;2;3", "wc_summary": "104;91;79;147", "wc_strengths": "42;110;128;157", "wc_weaknesses": "51;103;5;120", "wc_questions": "35;4;141;383", "wc_limitations": "50;15;26;6", "wc_review": "282;323;379;813", "wc_reply_reviewers": "263;23;15;76", "wc_reply_authors": "1447;12;22;47", "reply_reviewers": "1;1;1;1", "reply_authors": "4;2;2;2", "rating_avg": [ 5.75, 2.277608394786075 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.75, 1.299038105676658 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 105.25, 25.674647027758727 ], "wc_strengths_avg": [ 109.25, 42.29287765097097 ], "wc_weaknesses_avg": [ 69.75, 45.20716204319842 ], "wc_questions_avg": [ 140.75, 148.80251173955364 ], "wc_limitations_avg": [ 24.25, 16.467771555374455 ], "wc_review_avg": [ 449.25, 212.81491371612094 ], "wc_reply_reviewers_avg": [ 94.25, 100.20822072065745 ], "wc_reply_authors_avg": [ 382.0, 615.0101625176611 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.5488212999484517, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6302544317419207890&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "SEA: State-Exchange Attention for High-Fidelity Physics Based Transformers", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96727", "id": "3gvGZhkkVt", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3gvGZhkkVt", "openreview": "https://openreview.net/forum?id=3gvGZhkkVt", "poster": "", "project": "", "author_site": "Parsa Esmati, Amirhossein Dadashzadeh, Vahid Ardakani, Nicolas Larrosa, Nicol\u00f2 Grilli", "tldr": "", "abstract": "Current approaches using sequential networks have shown promise in estimating field variables for dynamical systems, but they are often limited by high rollout errors. The unresolved issue of rollout error accumulation results in unreliable estimations as the network predicts further into the future, with each step's error compounding and leading to an increase in inaccuracy. Here, we introduce the State-Exchange Attention (SEA) module, a novel transformer-based module enabling information exchange between encoded fields through multi-head cross-attention. The cross-field multidirectional information exchange design enables all state variables in the system to exchange information with one another, capturing physical relationships and symmetries between fields. Additionally, we introduce an efficient ViT-like mesh autoencoder to generate spatially coherent mesh embeddings for a large number of meshing cells. The SEA integrated transformer demonstrates the state-of-the-art rollout error compared to other competitive baselines. Specifically, we outperform PbGMR-GMUS Transformer-RealNVP and GMR-GMUS Transformer, with a reduction in error of 88% and 91%, respectively. Furthermore, we demonstrate that the SEA module alone can reduce errors by 97% for state variables that are highly dependent on other states of the system. The repository for this work is available at: https://github.com/ParsaEsmati/SEA", "keywords": "Generative;Spatio-temporal;AI4SCIENCE;Physics informed;Transformers in physics;Computational fluid dynamics", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "", "author": "Parsa Esmati;Amirhossein Dadashzadeh;Vahid Goodarzi Ardakani;Nicolas Larrosa;Nicol\u00f2 Grilli", "authorids": "~Parsa_Esmati1;~Amirhossein_Dadashzadeh1;~Vahid_Goodarzi_Ardakani1;~Nicolas_Larrosa1;~Nicol\u00f2_Grilli1", "gender": "M;M;M;M;M", "homepage": "https://research-information.bris.ac.uk/en/persons/parsa-esmati;;;;https://research-information.bris.ac.uk/en/persons/nicol%C3%B2-grilli", "dblp": "369/8488;;;;", "google_scholar": "hPrm0ccAAAAJ;8fpyqk8AAAAJ;0X0vH9sAAAAJ;https://scholar.google.co.il/citations?user=dswc9REAAAAJ;AnZc9YUAAAAJ", "orcid": ";;0000-0001-9757-5852;0000-0001-7515-4504;0000-0003-2539-9444", "linkedin": ";amirdzd/?originalSubdomain=uk;vahid-goodarzi-ardakani/;nlarrosa/;nicol%C3%B2-grilli-940b30145/", "or_profile": "~Parsa_Esmati1;~Amirhossein_Dadashzadeh1;~Vahid_Goodarzi_Ardakani1;~Nicolas_Larrosa1;~Nicol\u00f2_Grilli1", "aff": "University of Bristol;University of Bristol;University of Bristol;University of Bristol;University of Bristol", "aff_domain": "bristol.ac.uk;bristol.ac.uk;bristol.ac.uk;bristol.ac.uk;bristol.ac.uk", "position": "PhD student;Postdoc;Researcher;Associate Professor;Lecturer", "bibtex": "@inproceedings{\nesmati2024sea,\ntitle={{SEA}: State-Exchange Attention for High-Fidelity Physics Based Transformers},\nauthor={Parsa Esmati and Amirhossein Dadashzadeh and Vahid Goodarzi Ardakani and Nicolas Larrosa and Nicol{\\`o} Grilli},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3gvGZhkkVt}\n}", "github": "", "reviewers": "Nand;M5vu;NXYL;Yocw", "pdf_size": 10201560, "rating": "6;7;7;8", "confidence": "4;3;3;4", "soundness": "3;4;3;3", "novelty": "3;3;3;4", "presentation": "4;4;3;4", "wc_summary": "104;39;41;63", "wc_strengths": "87;108;77;150", "wc_weaknesses": "105;130;207;40", "wc_questions": "58;48;34;2", "wc_limitations": "120;2;31;1", "wc_review": "474;327;390;256", "wc_reply_reviewers": "77;30;13;9", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 61.75, 26.14741861063918 ], "wc_strengths_avg": [ 105.5, 28.02231253840411 ], "wc_weaknesses_avg": [ 120.5, 59.77666768899049 ], "wc_questions_avg": [ 35.5, 21.13646138784825 ], "wc_limitations_avg": [ 38.5, 48.57211133973898 ], "wc_review_avg": [ 361.75, 80.29438025167141 ], "wc_reply_reviewers_avg": [ 32.25, 27.012728481217888 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:0YsLDuh9ybUJ:scholar.google.com/&scioq=SEA:+State-Exchange+Attention+for+High-Fidelity+Physics+Based+Transformers&hl=en&as_sdt=0,14", "gs_version_total": 5, "email": "bristol.ac.uk;bristol.ac.uk;bristol.ac.uk;bristol.ac.uk;bristol.ac.uk", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of Bristol", "aff_unique_dep": "", "aff_unique_url": "https://www.bristol.ac.uk", "aff_unique_abbr": "Bristol", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Topological obstruction to the training of shallow ReLU neural networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96726", "id": "3hcn0UxP72", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3hcn0UxP72", "openreview": "https://openreview.net/forum?id=3hcn0UxP72", "poster": "/media/PosterPDFs/NeurIPS%202024/96726.png?t=1729771539.0573344", "project": "", "author_site": "Marco Nurisso, Pierrick Leroy, Francesco Vaccarino", "tldr": "", "abstract": "Studying the interplay between the geometry of the loss landscape and the optimization trajectories of simple neural networks is a fundamental step for understanding their behavior in more complex settings.\nThis paper reveals the presence of topological obstruction in the loss landscape of shallow ReLU neural networks trained using gradient flow. We discuss how the homogeneous nature of the ReLU activation function constrains the training trajectories to lie on a product of quadric hypersurfaces whose shape depends on the particular initialization of the network's parameters. \nWhen the neural network's output is a single scalar, we prove that these quadrics can have multiple connected components, limiting the set of reachable parameters during training. We analytically compute the number of these components and discuss the possibility of mapping one to the other through neuron rescaling and permutation. In this simple setting, we find that the non-connectedness results in a topological obstruction, which, depending on the initialization, can make the global optimum unreachable. We validate this result with numerical experiments.", "keywords": "learning dynamics;topology;two-layer neural networks;ReLU networks;geometry;symmetry;loss landscape;gradient flow", "primary_area": "optimization", "supplementary_material": "", "author": "Marco Nurisso;Pierrick Leroy;Francesco Vaccarino", "authorids": "~Marco_Nurisso1;~Pierrick_Leroy1;~Francesco_Vaccarino1", "gender": ";M;M", "homepage": "https://github.com/Marconurisso;;https://www.polito.it/en/staff?p=francesco.vaccarino", "dblp": "317/0625;387/0761;", "google_scholar": "vAaWpu8AAAAJ;MiAKztEAAAAJ;https://scholar.google.it/citations?user=4XfzoZQAAAAJ", "orcid": "0009-0001-3589-9170;;0000-0002-0610-9168", "linkedin": ";pierrick-leroy-31b037172/;francesco-vaccarino-67201b2/", "or_profile": "~Marco_Nurisso1;~Pierrick_Leroy1;~Francesco_Vaccarino1", "aff": "Polytechnic Institute of Turin;Polytechnic Institute of Turin;Polytechnic Institute of Turin", "aff_domain": "polito.it;polito.it;polito.it", "position": "PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nnurisso2024topological,\ntitle={Topological obstruction to the training of shallow Re{LU} neural networks},\nauthor={Marco Nurisso and Pierrick Leroy and Francesco Vaccarino},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3hcn0UxP72}\n}", "github": "", "reviewers": "Gc1k;9QVd;hKtn;n2cB;JxYP", "pdf_size": 1802719, "rating": "6;6;6;6;7", "confidence": "4;4;1;4;4", "soundness": "3;3;3;3;4", "novelty": "2;2;2;2;3", "presentation": "3;3;3;3;4", "wc_summary": "47;121;52;164;603", "wc_strengths": "19;97;31;221;177", "wc_weaknesses": "197;107;66;171;360", "wc_questions": "11;183;3;268;249", "wc_limitations": "11;9;24;15;194", "wc_review": "285;517;176;839;1583", "wc_reply_reviewers": "31;99;61;20;101", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.2, 0.39999999999999997 ], "confidence_avg": [ 3.4, 1.2000000000000002 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 197.4, 207.47298619338375 ], "wc_strengths_avg": [ 109.0, 79.3674996456358 ], "wc_weaknesses_avg": [ 180.2, 101.0888717911126 ], "wc_questions_avg": [ 142.8, 114.44195035038506 ], "wc_limitations_avg": [ 50.6, 71.88490801273937 ], "wc_review_avg": [ 680.0, 505.4305095658552 ], "wc_reply_reviewers_avg": [ 62.4, 33.51178897045038 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.25000000000000006, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17533036809115849725&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "polito.it;polito.it;polito.it", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Polytechnic Institute of Turin", "aff_unique_dep": "", "aff_unique_url": "https://www.polito.it", "aff_unique_abbr": "Polito", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Italy" }, { "title": "Low Degree Hardness for Broadcasting on Trees", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96725", "id": "3iOefhez5e", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3iOefhez5e", "openreview": "https://openreview.net/forum?id=3iOefhez5e", "poster": "/media/PosterPDFs/NeurIPS%202024/96725.png?t=1731699578.460339", "project": "", "author_site": "Han Huang, Elchanan Mossel", "tldr": "", "abstract": "We study the low-degree hardness of broadcasting on trees.\nBroadcasting on trees has been extensively studied in statistical physics, \nin computational biology in relation to phylogenetic reconstruction and in statistics and computer science in the context of block model inference, and as a simple data model for algorithms that may require depth for inference. \n\nThe inference of the root can be carried by celebrated Belief Propagation (BP) algorithm which achieves Bayes-optimal performance. Despite the fact that this algorithm runs in linear time (using real operations), recent works indicated that this algorithm in fact requires high level of complexity. \nMoitra, Mossel and Sandon constructed a chain for which estimating the root better than random (for a typical input) is $NC1$ complete. Kohler and Mossel constructed chains such that for trees with $N$ leaves, recovering the root better than random requires a polynomial of degree $N^{\\Omega(1)}$. Both works above asked if such complexity bounds hold in general below the celebrated {\\em Kesten-Stigum} bound. \n\nIn this work, we prove that this is indeed the case for low degree polynomials. \nWe show that for the broadcast problem using any Markov chain on trees with $N$ leaves, below the Kesten Stigum bound, any $O(\\log N)$ degree polynomial has vanishing correlation with the root. \n\nOur result is one of the first low-degree lower bound that is proved in a setting that is not based or easily reduced to a product measure.", "keywords": "Broadcasting Process;low degree hardness;statistically-computational gap", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Han Huang;Elchanan Mossel", "authorids": "~Han_Huang7;~Elchanan_Mossel1", "gender": "M;M", "homepage": "https://www.hhuangmath.com;http://math.mit.edu/~elmos/", "dblp": ";m/EMossel", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "~Han_Huang7;~Elchanan_Mossel1", "aff": "University of Missouri;Massachusetts Institute of Technology", "aff_domain": "missouri.edu;mit.edu", "position": "Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nhuang2024low,\ntitle={Low Degree Hardness for Broadcasting on Trees},\nauthor={Han Huang and Elchanan Mossel},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3iOefhez5e}\n}", "github": "", "reviewers": "j6Pu;Pvza;d9ko;8xrJ", "pdf_size": 1134095, "rating": "6;7;7;8", "confidence": "2;3;2;2", "soundness": "3;4;3;4", "novelty": "3;3;3;4", "presentation": "1;4;3;3", "wc_summary": "121;174;130;250", "wc_strengths": "1;29;34;192", "wc_weaknesses": "1;37;20;72", "wc_questions": "796;55;2;1", "wc_limitations": "1;9;1;1", "wc_review": "920;304;187;516", "wc_reply_reviewers": "64;0;0;0", "wc_reply_authors": "764;0;0;0", "reply_reviewers": "1;0;0;0", "reply_authors": "2;0;0;0", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 2.25, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 168.75, 51.0165414351071 ], "wc_strengths_avg": [ 64.0, 74.9633243659858 ], "wc_weaknesses_avg": [ 32.5, 26.119915773217954 ], "wc_questions_avg": [ 213.5, 337.0152073720116 ], "wc_limitations_avg": [ 3.0, 3.4641016151377544 ], "wc_review_avg": [ 481.75, 279.1544151540505 ], "wc_reply_reviewers_avg": [ 16.0, 27.712812921102035 ], "wc_reply_authors_avg": [ 191.0, 330.82170424565555 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 0.5, 0.8660254037844386 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8803503465307718088&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "missouri.edu;mit.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "University of Missouri;Massachusetts Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.missouri.edu;https://web.mit.edu", "aff_unique_abbr": "MU;MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "HyperPrism: An Adaptive Non-linear Aggregation Framework for Distributed Machine Learning over Non-IID Data and Time-varying Communication Links", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96724", "id": "3ie8NWA1El", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3ie8NWA1El", "openreview": "https://openreview.net/forum?id=3ie8NWA1El", "poster": "/media/PosterPDFs/NeurIPS%202024/96724.png?t=1731404970.3394506", "project": "", "author_site": "Haizhou Du, Yijian Chen, Ryan Yang, Yuchen Li, Linghe Kong", "tldr": "", "abstract": "While Distributed Machine Learning (DML) has been widely used to achieve decent performance, it is still challenging to take full advantage of data and devices distributed at multiple vantage points to adapt and learn, especially it is non-trivial to address dynamic and divergence challenges based on the linear aggregation framework as follows: (1) heterogeneous learning data at different devices (i.e., non-IID data) resulting in model divergence and (2) in the case of time-varying communication links, the limited ability for devices to reconcile model divergence. In this paper, we contribute a non-linear class aggregation framework HyperPrism that leverages distributed mirror descent with averaging done in the mirror descent dual space and adapts the degree of Weighted Power Mean (WPM) used in each round. Moreover, HyperPrism could adaptively choose different mapping for different layers of the local model with a dedicated hypernetwork per device, achieving automatic optimization of DML in high divergence settings. We perform rigorous analysis and experimental evaluations to demonstrate the effectiveness of adaptive, mirror-mapping DML. In particular, we extend the generalizability of existing related works and position them as special cases within HyperPrism. Our experimental results show that HyperPrism can improve the convergence speed up to 98.63% and scale well to more devices compared with the state-of-the-art, all with little additional computation overhead compared to traditional linear aggregation.", "keywords": "Distributed Machine Learning;Time-varying Communication;Non-Linear Aggregation;HyperNetwork", "primary_area": "optimization", "supplementary_material": "/attachment/b09651c8111593b67b9d12d8e7f6c9339b1fcdd1.zip", "author": "Haizhou Du;Yijian Chen;Ryan Yang;Yuchen Li;Linghe Kong", "authorids": "~Haizhou_Du2;~Yijian_Chen1;~Ryan_Yang1;~Yuchen_Li7;~Linghe_Kong1", "gender": "M;M;M;M;M", "homepage": ";https://github.com/alvin0173;https://ryanyang.us;;https://www.cs.sjtu.edu.cn/~linghe.kong/", "dblp": "48/10264.html;;;143/0258-6;23/7909", "google_scholar": ";;;HwsqXIYAAAAJ;https://scholar.google.com.tw/citations?user=-wm2X-8AAAAJ", "orcid": ";;;0000-0002-3869-7881;0000-0001-9266-3044", "linkedin": ";;;;", "or_profile": "~Haizhou_Du2;~Yijian_Chen1;~Ryan_Yang1;~Yuchen_Li7;~Linghe_Kong1", "aff": "Shanghai University of Electric Power;Shanghai University of Electric Power;Massachusetts Institute of Technology;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "shiep.edu.cn;shiep.edu;mit.edu;sjtu.edu;sjtu.edu.cn", "position": "Associate Professor;MS student;Undergrad student;PhD student;Full Professor", "bibtex": "@inproceedings{\ndu2024hyperprism,\ntitle={HyperPrism: An Adaptive Non-linear Aggregation Framework for Distributed Machine Learning over Non-{IID} Data and Time-varying Communication Links},\nauthor={Haizhou Du and Yijian Chen and Ryan Yang and Yuchen Li and Linghe Kong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3ie8NWA1El}\n}", "github": "", "reviewers": "uEh3;qJvT;5NnV", "pdf_size": 1188274, "rating": "4;5;7", "confidence": "4;4;4", "soundness": "2;3;4", "novelty": "2;2;3", "presentation": "3;3;3", "wc_summary": "76;66;78", "wc_strengths": "13;70;139", "wc_weaknesses": "253;183;115", "wc_questions": "2;2;28", "wc_limitations": "1;38;46", "wc_review": "345;359;406", "wc_reply_reviewers": "0;68;11", "wc_reply_authors": "0;253;7", "reply_reviewers": "0;1;1", "reply_authors": "1;2;2", "rating_avg": [ 5.333333333333333, 1.247219128924647 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 73.33333333333333, 5.2493385826745405 ], "wc_strengths_avg": [ 74.0, 51.51698748956503 ], "wc_weaknesses_avg": [ 183.66666666666666, 56.3402362634576 ], "wc_questions_avg": [ 10.666666666666666, 12.256517540566826 ], "wc_limitations_avg": [ 28.333333333333332, 19.601587237318874 ], "wc_review_avg": [ 370.0, 26.08958923913266 ], "wc_reply_reviewers_avg": [ 26.333333333333332, 29.80305726300948 ], "wc_reply_authors_avg": [ 86.66666666666667, 117.65014048251517 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Vt8wuU9GakkJ:scholar.google.com/&scioq=HyperPrism:+An+Adaptive+Non-linear+Aggregation+Framework+for+Distributed+Machine+Learning+over+Non-IID+Data+and+Time-varying+Communication+Links&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": "shiep.edu.cn;shiep.edu;mit.edu;sjtu.edu;sjtu.edu.cn", "author_num": 5, "aff_unique_index": "0;0;1;2;2", "aff_unique_norm": "Shanghai University of Electric Power;Massachusetts Institute of Technology;Shanghai Jiao Tong University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.shiep.edu.cn;https://web.mit.edu;https://www.sjtu.edu.cn", "aff_unique_abbr": "SHIEP;MIT;SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "China;United States" }, { "title": "LaSe-E2V: Towards Language-guided Semantic-aware Event-to-Video Reconstruction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96723", "id": "3ilqQHBWTf", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3ilqQHBWTf", "openreview": "https://openreview.net/forum?id=3ilqQHBWTf", "poster": "/media/PosterPDFs/NeurIPS%202024/96723.png?t=1731981906.6145437", "project": "", "author_site": "Kanghao Chen, Hangyu Li, Jiazhou Zhou, Zeyu Wang, Lin Wang", "tldr": "", "abstract": "Event cameras harness advantages such as low latency, high temporal resolution, and high dynamic range (HDR), compared to standard cameras. Due to the distinct imaging paradigm shift, a dominant line of research focuses on event-to-video (E2V) reconstruction to bridge event-based and standard computer vision. However, this task remains challenging due to its inherently ill-posed nature: event cameras only detect the edge and motion information locally. Consequently, the reconstructed videos are often plagued by artifacts and regional blur, primarily caused by the ambiguous semantics of event data. In this paper, we find language naturally conveys abundant semantic information, rendering it stunningly superior in ensuring semantic consistency for E2V reconstruction. Accordingly, we propose a novel framework, called LaSe-E2V, that can achieve semantic-aware high-quality E2V reconstruction from a language-guided perspective, buttressed by the text-conditional diffusion models. However, due to diffusion models' inherent diversity and randomness, it is hardly possible to directly apply them to achieve spatial and temporal consistency for E2V reconstruction. Thus, we first propose an Event-guided Spatiotemporal Attention (ESA) module to condition the event data to the denoising pipeline effectively. We then introduce an event-aware mask loss to ensure temporal coherence and a noise initialization strategy to enhance spatial consistency. Given the absence of event-text-video paired data, we aggregate existing E2V datasets and generate textual descriptions using the tagging models for training and evaluation. Extensive experiments on three datasets covering diverse challenging scenarios (e.g., fast motion, low light) demonstrate the superiority of our method. Demo videos for the results are attached to the project page.", "keywords": "event camera;video reconstruction;diffusion model", "primary_area": "machine_vision", "supplementary_material": "/attachment/18d7d7e7074c2d330d75515eeba54e12b1b57dda.zip", "author": "Kanghao Chen;Hangyu Li;Jiazhou Zhou;Zeyu Wang;Lin Wang", "authorids": "~Kanghao_Chen1;~Hangyu_Li5;~Jiazhou_Zhou1;~Zeyu_Wang15;~Lin_Wang2", "gender": ";M;F;M;M", "homepage": ";https://github.com/cyjdlhy;https://jiazhou-garland.github.io/;https://zachzeyuwang.github.io/;https://dr.ntu.edu.sg/cris/rp/rp02550", "dblp": "302/4949;;316/0860;132/7882-3.html;", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;JQnsB8MAAAAJ;q7NLPG0AAAAJ;SReb2csAAAAJ", "orcid": ";0009-0005-4560-3731;;0000-0001-5374-6330;0000-0002-7485-4493", "linkedin": ";;;zachzeyuwang/;", "or_profile": "~Kanghao_Chen1;~Hangyu_Li5;~Jiazhou_Zhou1;~Zeyu_Wang15;~Lin_Wang2", "aff": "Hong Kong University of Science and Technology (Guangzhou);Alibaba Group;Hong Kong University of Science and Technology;The Hong Kong University of Science and Technology (Guangzhou);Hong Kong University of Science and Technology", "aff_domain": "hkust.edu;alibaba-inc.com;hkust-gz.edu.cn;ust.hk;ust.hk", "position": "PhD student;Intern;Researcher;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nchen2024laseev,\ntitle={LaSe-E2V: Towards Language-guided Semantic-aware Event-to-Video Reconstruction},\nauthor={Kanghao Chen and Hangyu Li and Jiazhou Zhou and Zeyu Wang and Lin Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3ilqQHBWTf}\n}", "github": "", "reviewers": "fQWS;ueNs;uWG8;H8fU", "pdf_size": 7976723, "rating": "4;5;6;6", "confidence": "5;5;3;4", "soundness": "3;3;3;3", "novelty": "3;3;3;2", "presentation": "3;3;3;3", "wc_summary": "54;98;81;210", "wc_strengths": "69;65;53;33", "wc_weaknesses": "91;72;10;116", "wc_questions": "82;21;148;342", "wc_limitations": "12;26;5;42", "wc_review": "308;282;297;743", "wc_reply_reviewers": "0;471;8;567", "wc_reply_authors": "0;1033;0;1290", "reply_reviewers": "0;1;1;3", "reply_authors": "1;3;1;4", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 110.75, 59.41117319158072 ], "wc_strengths_avg": [ 55.0, 14.0 ], "wc_weaknesses_avg": [ 72.25, 39.18146883413127 ], "wc_questions_avg": [ 148.25, 120.5412273871475 ], "wc_limitations_avg": [ 21.25, 14.16642156650719 ], "wc_review_avg": [ 407.5, 193.92073122799428 ], "wc_reply_reviewers_avg": [ 261.5, 259.7426611090292 ], "wc_reply_authors_avg": [ 580.75, 587.8151814133419 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.25, 1.299038105676658 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8181818181818182, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4455703647945528199&as_sdt=8000005&sciodt=0,19&hl=en", "gs_version_total": 5, "email": "hkust.edu;alibaba-inc.com;hkust-gz.edu.cn;ust.hk;ust.hk", "author_num": 5, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "Hong Kong University of Science and Technology;Alibaba Group", "aff_unique_dep": ";", "aff_unique_url": "https://www.ust.hk;https://www.alibaba.com", "aff_unique_abbr": "HKUST;Alibaba", "aff_campus_unique_index": "0;0;2;0", "aff_campus_unique": "Hong Kong SAR;;Guangzhou", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Boosting Text-to-Video Generative Model with MLLMs Feedback", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96722", "id": "3ivnixHy16", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3ivnixHy16", "openreview": "https://openreview.net/forum?id=3ivnixHy16", "poster": "", "project": "", "author_site": "Xun Wu, Shaohan Huang, Guolong Wang, Jing Xiong, Furu Wei", "tldr": "", "abstract": "Recent advancements in text-to-video generative models, such as Sora, have showcased impressive capabilities. These models have attracted significant interest for their potential applications. However, they often rely on extensive datasets of variable quality, which can result in generated videos that lack aesthetic appeal and do not accurately reflect the input text prompts. A promising approach to mitigate these issues is to leverage Reinforcement Learning from Human Feedback (RLHF), which aims to align the outputs of text-to-video generative with human preferences. However, the considerable costs associated with manual annotation have led to a scarcity of comprehensive preference datasets. In response to this challenge, our study begins by investigating the efficacy of Multimodal Large Language Models (MLLMs) generated annotations in capturing video preferences, discovering a high degree of concordance with human judgments. Building upon this finding, we utilize MLLMs to perform fine-grained video preference annotations across two dimensions, resulting in the creation of VideoPrefer, which includes 135,000 preference annotations. Utilizing this dataset, we introduce VideoRM, the first general-purpose reward model tailored for video preference in the text-to-video domain. Our comprehensive experiments confirm the effectiveness of both VideoPrefer and VideoRM, representing a significant step forward in the field.", "keywords": "text-to-video generation;alignment;reward model;multimodal large language model", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Xun Wu;Shaohan Huang;Guolong Wang;Jing Xiong;Furu Wei", "authorids": "~Xun_Wu1;~Shaohan_Huang1;~Guolong_Wang1;~Jing_Xiong4;~Furu_Wei1", "gender": "M;M;;M;M", "homepage": "https://github.com/Yu-shui;;https://dieuroi.github.io/;https://www.microsoft.com/en-us/research/people/fuwei/;https://menik1126.github.io/", "dblp": ";176/0380;167/9550.html;72/5870;", "google_scholar": ";;vc0T1NoAAAAJ;G-V1VpwAAAAJ;https://scholar.google.com.hk/citations?user=dFX1hXkAAAAJ", "orcid": ";;0000-0003-4874-2639;;0000-0003-2986-6978", "linkedin": ";;;;", "or_profile": "~Xun_Wu1;~Shaohan_Huang1;~Guolong_Wang1;~Furu_Wei1;~jing_xiong3", "aff": "Tsinghua University;Microsoft;University of International Business and Economics;Microsoft Research;Sun Yat-Sen University", "aff_domain": "tsinghua.edu.cn;microsoft.com;uibe.edu.cn;microsoft.com;sysu.edu.cn", "position": "MS student;Researcher;Assistant Professor;Distinguished Scientist;MS student", "bibtex": "@inproceedings{\nwu2024boosting,\ntitle={Boosting Text-to-Video Generative Model with {MLLM}s Feedback},\nauthor={Xun Wu and Shaohan Huang and Guolong Wang and Jing Xiong and Furu Wei},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3ivnixHy16}\n}", "github": "", "reviewers": "qjMG;nZ7n;snrb;cM4S", "pdf_size": 4498283, "rating": "5;5;6;8", "confidence": "4;4;4;4", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "3;2;2;3", "wc_summary": "162;79;102;97", "wc_strengths": "166;53;73;136", "wc_weaknesses": "354;231;330;222", "wc_questions": "5;36;21;222", "wc_limitations": "32;13;20;98", "wc_review": "719;412;546;775", "wc_reply_reviewers": "293;26;26;15", "wc_reply_authors": "660;48;0;0", "reply_reviewers": "2;1;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 110.0, 31.216982557575932 ], "wc_strengths_avg": [ 107.0, 45.80938768418543 ], "wc_weaknesses_avg": [ 284.25, 58.45671475544961 ], "wc_questions_avg": [ 71.0, 87.8663758214711 ], "wc_limitations_avg": [ 40.75, 33.74444398712179 ], "wc_review_avg": [ 613.0, 143.50087107749556 ], "wc_reply_reviewers_avg": [ 90.0, 117.28810681394768 ], "wc_reply_authors_avg": [ 177.0, 279.5478492136901 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6432524503084816063&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 0, "email": "tsinghua.edu.cn;microsoft.com;uibe.edu.cn;microsoft.com;sysu.edu.cn", "author_num": 5, "aff_unique_index": "0;1;2;1;3", "aff_unique_norm": "Tsinghua University;Microsoft;University of International Business and Economics;Sun Yat-sen University", "aff_unique_dep": ";Microsoft Corporation;;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.microsoft.com;http://www.uibe.edu.cn;http://www.sysu.edu.cn/", "aff_unique_abbr": "THU;Microsoft;UIBE;SYSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1;0", "aff_country_unique": "China;United States" }, { "title": "Cluster-wise Graph Transformer with Dual-granularity Kernelized Attention", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96721", "id": "3j2nasmKkP", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3j2nasmKkP", "openreview": "https://openreview.net/forum?id=3j2nasmKkP", "poster": "/media/PosterPDFs/NeurIPS%202024/96721.png?t=1729525765.2180436", "project": "", "author_site": "Siyuan Huang, Yunchong Song, Jiayue Zhou, Zhouhan Lin", "tldr": "", "abstract": "In the realm of graph learning, there is a category of methods that conceptualize graphs as hierarchical structures, utilizing node clustering to capture broader structural information. While generally effective, these methods often rely on a fixed graph coarsening routine, leading to overly homogeneous cluster representations and loss of node-level information. In this paper, we envision the graph as a network of interconnected node sets without compressing each cluster into a single embedding. To enable effective information transfer among these node sets, we propose the Node-to-Cluster Attention (N2C-Attn) mechanism. N2C-Attn incorporates techniques from Multiple Kernel Learning into the kernelized attention framework, effectively capturing information at both node and cluster levels. We then devise an efficient form for N2C-Attn using the cluster-wise message-passing framework, achieving linear time complexity. We further analyze how N2C-Attn combines bi-level feature maps of queries and keys, demonstrating its capability to merge dual-granularity information. The resulting architecture, Cluster-wise Graph Transformer (Cluster-GT), which uses node clusters as tokens and employs our proposed N2C-Attn module, shows superior performance on various graph-level tasks. Code is available at https://github.com/LUMIA-Group/Cluster-wise-Graph-Transformer.", "keywords": "Graph Based Learning", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Siyuan Huang;Yunchong Song;Jiayue Zhou;Zhouhan Lin", "authorids": "~Siyuan_Huang8;~Yunchong_Song1;~Jiayue_Zhou1;~Zhouhan_Lin1", "gender": "M;M;F;M", "homepage": "https://github.com/SiyuanHuangSJTU;https://github.com/realCrush;https://github.com/chloejvzi;https://hantek.github.io", "dblp": "62/885-3;339/6816;;121/7919.html", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;C-TqDNsAAAAJ;;https://scholar.google.ca/citations?user=LNZ4efwAAAAJ", "orcid": ";;;0009-0009-7204-0689", "linkedin": "siyuan-huang-885863235/;;;https://ca.linkedin.com/in/zhouhan-lin-34b98975", "or_profile": "~Siyuan_Huang8;~Yunchong_Song1;~Jiayue_Zhou1;~Zhouhan_Lin1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;;sjtu.edu.cn", "position": "MS student;PhD student;;Assistant Professor", "bibtex": "@inproceedings{\nhuang2024clusterwise,\ntitle={Cluster-wise Graph Transformer with Dual-granularity Kernelized Attention},\nauthor={Siyuan Huang and Yunchong Song and Jiayue Zhou and Zhouhan Lin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3j2nasmKkP}\n}", "github": "", "reviewers": "7EZb;2taa;Tre7;azHs", "pdf_size": 1426059, "rating": "6;7;7;7", "confidence": "4;3;4;5", "soundness": "3;3;4;3", "novelty": "2;3;3;3", "presentation": "4;3;4;3", "wc_summary": "50;90;52;82", "wc_strengths": "24;52;52;80", "wc_weaknesses": "46;48;248;136", "wc_questions": "60;23;12;64", "wc_limitations": "26;4;6;1", "wc_review": "206;217;370;363", "wc_reply_reviewers": "24;8;18;33", "wc_reply_authors": "26;26;35;26", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 68.5, 17.741194999210173 ], "wc_strengths_avg": [ 52.0, 19.79898987322333 ], "wc_weaknesses_avg": [ 119.5, 82.61204512660366 ], "wc_questions_avg": [ 39.75, 22.63155982251334 ], "wc_limitations_avg": [ 9.25, 9.832980219648569 ], "wc_review_avg": [ 289.0, 77.63697572677596 ], "wc_reply_reviewers_avg": [ 20.75, 9.093266739736606 ], "wc_reply_authors_avg": [ 28.25, 3.897114317029974 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7248889380843415328&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "sjtu.edu.cn;sjtu.edu.cn;;sjtu.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Fearless Stochasticity in Expectation Propagation", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96720", "id": "3kDWoqs2X2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3kDWoqs2X2", "openreview": "https://openreview.net/forum?id=3kDWoqs2X2", "poster": "/media/PosterPDFs/NeurIPS%202024/96720.png?t=1733141589.3507998", "project": "", "author_site": "Jonathan So, Richard Turner", "tldr": "", "abstract": "Expectation propagation (EP) is a family of algorithms for performing approximate inference in probabilistic models. The updates of EP involve the evaluation of moments\u2014expectations of certain functions\u2014which can be estimated from Monte Carlo (MC) samples. However, the updates are not robust to MC noise when performed naively, and various prior works have attempted to address this issue in different ways. In this work, we provide a novel perspective on the moment-matching updates of EP; namely, that they perform natural-gradient-based optimisation of a variational objective. We use this insight to motivate two new EP variants, with updates that are particularly well-suited to MC estimation. They remain stable and are most sample-efficient when estimated with just a single sample. These new variants combine the benefits of their predecessors and address key weaknesses. In particular, they are easier to tune, offer an improved speed-accuracy trade-off, and do not rely on the use of debiasing estimators. We demonstrate their efficacy on a variety of probabilistic inference tasks.", "keywords": "expectation propagation;natural gradients;probabilistic methods;markov chain monte carlo;variational inference", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Jonathan So;Richard E. Turner", "authorids": "~Jonathan_So1;~Richard_E_Turner1", "gender": ";M", "homepage": "http://mlg.eng.cam.ac.uk/?portfolio=jonathan-so;https://rich-turner-group.github.io/", "dblp": ";40/5352", "google_scholar": ";https://scholar.google.co.uk/citations?user=DgLEyZgAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Jonathan_So1;~Richard_E_Turner1", "aff": "University of Cambridge;Microsoft Research", "aff_domain": "cam.ac.uk;research.microsoft.com", "position": "PhD student;Researcher", "bibtex": "@inproceedings{\nso2024fearless,\ntitle={Fearless Stochasticity in Expectation Propagation},\nauthor={Jonathan So and Richard E. Turner},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3kDWoqs2X2}\n}", "github": "", "reviewers": "6XAG;1uyT;YMg2", "pdf_size": 1233069, "rating": "5;6;8", "confidence": "3;3;2", "soundness": "3;4;3", "novelty": "3;3;3", "presentation": "2;3;3", "wc_summary": "42;124;125", "wc_strengths": "29;107;221", "wc_weaknesses": "168;122;186", "wc_questions": "146;107;52", "wc_limitations": "1;20;5", "wc_review": "386;480;589", "wc_reply_reviewers": "92;25;18", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 97.0, 38.89301565405628 ], "wc_strengths_avg": [ 119.0, 78.8416133777081 ], "wc_weaknesses_avg": [ 158.66666666666666, 26.948510575210314 ], "wc_questions_avg": [ 101.66666666666667, 38.56019824753095 ], "wc_limitations_avg": [ 8.666666666666666, 8.178562764256865 ], "wc_review_avg": [ 485.0, 82.94978400614836 ], "wc_reply_reviewers_avg": [ 45.0, 33.35665850571167 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.944911182523068, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:BGhf_d07fQwJ:scholar.google.com/&scioq=Fearless+Stochasticity+in+Expectation+Propagation&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": "cam.ac.uk;research.microsoft.com", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "University of Cambridge;Microsoft", "aff_unique_dep": ";Microsoft Research", "aff_unique_url": "https://www.cam.ac.uk;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "Cambridge;MSR", "aff_campus_unique_index": "0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "Multi-Agent Coordination via Multi-Level Communication", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96719", "id": "3l2HnZXNou", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3l2HnZXNou", "openreview": "https://openreview.net/forum?id=3l2HnZXNou", "poster": "", "project": "", "author_site": "Gang Ding, Zeyuan Liu, Zhirui Fang, Kefan Su, Liwen Zhu, Zongqing Lu", "tldr": "", "abstract": "The partial observability and stochasticity in multi-agent settings can be mitigated by accessing more information about others via communication. However, the coordination problem still exists since agents cannot communicate actual actions with each other at the same time due to the circular dependencies. In this paper, we propose a novel multi-level communication scheme, Sequential Communication (SeqComm). SeqComm treats agents asynchronously (the upper-level agents make decisions before the lower-level ones) and has two communication phases. In the negotiation phase, agents determine the priority of decision-making by communicating hidden states of observations and comparing the value of intention, which is obtained by modeling the environment dynamics. In the launching phase, the upper-level agents take the lead in making decisions and then communicate their actions with the lower-level agents. Theoretically, we prove the policies learned by SeqComm are guaranteed to improve monotonically and converge. Empirically, we show that SeqComm outperforms existing methods in a variety of cooperative multi-agent tasks.", "keywords": "multi-agent reinforcement learning", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Ziluo Ding;Zeyuan Liu;Zhirui Fang;Kefan Su;Liwen Zhu;Zongqing Lu", "authorids": "~Ziluo_Ding1;~Zeyuan_Liu2;~Zhirui_Fang1;~Kefan_Su1;~Liwen_Zhu1;~Zongqing_Lu2", "gender": "M;M;M;;F;", "homepage": ";https://github.com/Sadnine-One;;;http://www.liwenzhu-pku.cn/;", "dblp": "267/2359;;363/6647;;;", "google_scholar": ";;https://scholar.google.com/citations?hl=en;;;", "orcid": ";;;;;", "linkedin": "ziluo/;;;;;", "or_profile": "~Ziluo_Ding1;~Zeyuan_Liu2;~Zhirui_Fang1;~Kefan_Su1;~Liwen_Zhu1;~Zongqing_Lu2", "aff": "BAAI;Tsinghua University;Tsinghua University;;Tencent AI Lab;", "aff_domain": "baai.ac.cn;mails.tsinghua.edu.cn;mail.tsinghua.edu.cn;;tencent.com;", "position": "Researcher;MS student;MS student;;Researcher;", "bibtex": "@inproceedings{\nding2024multiagent,\ntitle={Multi-Agent Coordination via Multi-Level Communication},\nauthor={Ziluo Ding and Zeyuan Liu and Zhirui Fang and Kefan Su and Liwen Zhu and Zongqing Lu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3l2HnZXNou}\n}", "github": "", "reviewers": "gF4i;LzSK;bJxi;iQrt", "pdf_size": 3831835, "rating": "5;7;7;7", "confidence": "4;4;3;3", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;4;3", "wc_summary": "69;80;64;78", "wc_strengths": "23;193;41;104", "wc_weaknesses": "163;263;15;84", "wc_questions": "5;227;1;121", "wc_limitations": "1;97;5;193", "wc_review": "261;860;126;580", "wc_reply_reviewers": "140;301;16;325", "wc_reply_authors": "42;132;15;195", "reply_reviewers": "2;2;1;2", "reply_authors": "2;3;2;3", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 72.75, 6.53356717268599 ], "wc_strengths_avg": [ 90.25, 66.51080739248322 ], "wc_weaknesses_avg": [ 131.25, 92.3481862301583 ], "wc_questions_avg": [ 88.5, 93.3635367796229 ], "wc_limitations_avg": [ 74.0, 78.70832230456955 ], "wc_review_avg": [ 456.75, 285.2694997717071 ], "wc_reply_reviewers_avg": [ 195.5, 125.69904534243688 ], "wc_reply_authors_avg": [ 96.0, 71.71819852729152 ], "reply_reviewers_avg": [ 1.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:V4NGGQmIDTgJ:scholar.google.com/&scioq=Multi-Agent+Coordination+via+Multi-Level+Communication&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "baai.ac.cn;mails.tsinghua.edu.cn;mail.tsinghua.edu.cn;;tencent.com;", "author_num": 6, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Beijing Academy of Artificial Intelligence;Tsinghua University;Tencent", "aff_unique_dep": ";;Tencent AI Lab", "aff_unique_url": "https://www.baaic.cn;https://www.tsinghua.edu.cn;https://ai.tencent.com", "aff_unique_abbr": "BAAI;THU;Tencent AI Lab", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Learning General Parameterized Policies for Infinite Horizon Average Reward Constrained MDPs via Primal-Dual Policy Gradient Algorithm", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96718", "id": "3lQgEPRxeu", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3lQgEPRxeu", "openreview": "https://openreview.net/forum?id=3lQgEPRxeu", "poster": "/media/PosterPDFs/NeurIPS%202024/96718.png?t=1731215781.3499022", "project": "", "author_site": "Qinbo Bai, Washim Mondal, Vaneet Aggarwal", "tldr": "", "abstract": "This paper explores the realm of infinite horizon average reward Constrained Markov Decision Processes (CMDPs). To the best of our knowledge, this work is the first to delve into the regret and constraint violation analysis of average reward CMDPs with a general policy parametrization. To address this challenge, we propose a primal dual-based policy gradient algorithm that adeptly manages the constraints while ensuring a low regret guarantee toward achieving a global optimal policy. In particular, our proposed algorithm achieves $\\tilde{\\mathcal{O}}({T}^{4/5})$ objective regret and $\\tilde{\\mathcal{O}}({T}^{4/5})$ constraint violation bounds.", "keywords": "Average Reward MDP;Constraint Violation;Regret.", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Qinbo Bai;Washim Uddin Mondal;Vaneet Aggarwal", "authorids": "~Qinbo_Bai1;~Washim_Uddin_Mondal1;~Vaneet_Aggarwal1", "gender": "M;M;M", "homepage": ";https://home.iitk.ac.in/~wmondal/;", "dblp": "249/9306;201/9517.html;91/6560", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.co.in/citations?user=CQwhdyIAAAAJ;", "orcid": ";0000-0002-2385-6034;", "linkedin": ";;", "or_profile": "~Qinbo_Bai1;~Washim_Uddin_Mondal1;~Vaneet_Aggarwal1", "aff": "Purdue University;Indian Institute of Technology, Kanpur;Purdue University", "aff_domain": "purdue.edu;iitk.ac.in;purdue.edu", "position": "PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nbai2024learning,\ntitle={Learning General Parameterized Policies for Infinite Horizon Average Reward Constrained {MDP}s via Primal-Dual Policy Gradient Algorithm},\nauthor={Qinbo Bai and Washim Uddin Mondal and Vaneet Aggarwal},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3lQgEPRxeu}\n}", "github": "", "reviewers": "dyHb;waAx;nhM4;UyCM;g5Lw", "pdf_size": 468475, "rating": "5;6;6;7;7", "confidence": "3;2;4;3;2", "soundness": "3;3;3;3;3", "novelty": "3;3;3;3;3", "presentation": "3;3;3;3;3", "wc_summary": "60;45;69;140;42", "wc_strengths": "43;82;93;125;11", "wc_weaknesses": "119;146;177;2;188", "wc_questions": "99;4;22;252;7", "wc_limitations": "9;1;1;7;19", "wc_review": "330;278;362;526;267", "wc_reply_reviewers": "0;16;25;20;9", "wc_reply_authors": "0;0;0;17;0", "reply_reviewers": "0;1;1;1;1", "reply_authors": "1;1;1;2;1", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 2.8, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 71.2, 35.78491302211031 ], "wc_strengths_avg": [ 70.8, 39.761287705505715 ], "wc_weaknesses_avg": [ 126.4, 66.73709613101246 ], "wc_questions_avg": [ 76.8, 94.19426734148952 ], "wc_limitations_avg": [ 7.4, 6.6211781428987395 ], "wc_review_avg": [ 352.6, 93.33723801356027 ], "wc_reply_reviewers_avg": [ 14.0, 8.740709353364863 ], "wc_reply_authors_avg": [ 3.4, 6.8 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.2857142857142857, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10714398022122270895&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "purdue.edu;iitk.ac.in;purdue.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Purdue University;Indian Institute of Technology Kanpur", "aff_unique_dep": ";", "aff_unique_url": "https://www.purdue.edu;https://www.iitk.ac.in", "aff_unique_abbr": "Purdue;IIT Kanpur", "aff_campus_unique_index": "1", "aff_campus_unique": ";Kanpur", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;India" }, { "title": "Learning to Decouple the Lights for 3D Face Texture Modeling", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96717", "id": "3lic0JgPRZ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3lic0JgPRZ", "openreview": "https://openreview.net/forum?id=3lic0JgPRZ", "poster": "/media/PosterPDFs/NeurIPS%202024/96717.png?t=1731346347.525511", "project": "", "author_site": "Tianxin Huang, Zhenyu Zhang, Ying Tai, Gim Hee Lee", "tldr": "", "abstract": "Existing research has made impressive strides in reconstructing human facial shapes and textures from images with well-illuminated faces and minimal external occlusions. \nNevertheless, it remains challenging to recover accurate facial textures from scenarios with complicated illumination affected by external occlusions, \\eg a face that is partially obscured by items such as a hat. \nExisting works based on the assumption of single and uniform illumination cannot correctly process these data.\nIn this work, we introduce a novel approach to model 3D facial textures under such unnatural illumination. Instead of assuming single illumination, our framework learns to imitate the unnatural illumination as a composition of multiple separate light conditions combined with learned neural representations, named Light Decoupling.\nAccording to experiments on both single images and video sequences, we demonstrate the effectiveness of our approach in modeling facial textures under challenging illumination affected by occlusions.", "keywords": "Face Texture;Light Decoupling;Neural Representation", "primary_area": "machine_vision", "supplementary_material": "/attachment/93a77360f3d5314a345686b748b3e8aa8cc501ac.zip", "author": "Tianxin Huang;Zhenyu Zhang;Ying Tai;Gim Hee Lee", "authorids": "~Tianxin_Huang1;~Zhenyu_Zhang2;~Ying_Tai1;~Gim_Hee_Lee1", "gender": "M;M;M;", "homepage": "https://tianxinhuang.github.io/;https://jessezhang92.github.io/;https://tyshiwo.github.io/;https://www.comp.nus.edu.sg/~leegh/", "dblp": "251/3784;01/1844-5;158/1384;49/9455", "google_scholar": "https://scholar.google.com.hk/citations?user=Fg7WYfcAAAAJ;4daxK2AAAAAJ;NKaiUasAAAAJ;https://scholar.google.com.sg/citations?user=7hNKrPsAAAAJ", "orcid": ";;;0000-0002-1583-0475", "linkedin": ";;;", "or_profile": "~Tianxin_Huang1;~Zhenyu_Zhang2;~Ying_Tai1;~Gim_Hee_Lee1", "aff": "National University of Singapore;Nanjing University;Nanjing University;National University of Singapore", "aff_domain": "nus.edu.sg;nju.edu.cn;nju.edu.cn;nus.edu.sg", "position": "Research Fellow;Associate Professor;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nhuang2024learning,\ntitle={Learning to Decouple the Lights for 3D Face Texture Modeling},\nauthor={Tianxin Huang and Zhenyu Zhang and Ying Tai and Gim Hee Lee},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3lic0JgPRZ}\n}", "github": "", "reviewers": "W5JR;SyuP;WqKW;NbDK", "pdf_size": 12241593, "rating": "6;6;6;7", "confidence": "3;4;4;4", "soundness": "2;3;3;3", "novelty": "3;3;2;3", "presentation": "3;2;2;3", "wc_summary": "122;208;122;51", "wc_strengths": "68;128;42;55", "wc_weaknesses": "92;299;220;127", "wc_questions": "25;85;4;2", "wc_limitations": "1;76;1;4", "wc_review": "308;796;389;239", "wc_reply_reviewers": "0;92;122;32", "wc_reply_authors": "0;130;474;28", "reply_reviewers": "0;2;2;1", "reply_authors": "1;3;3;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 125.75, 55.63440931653719 ], "wc_strengths_avg": [ 73.25, 32.91940916845258 ], "wc_weaknesses_avg": [ 184.5, 80.9830229122129 ], "wc_questions_avg": [ 29.0, 33.56337289367682 ], "wc_limitations_avg": [ 20.5, 32.06633748964793 ], "wc_review_avg": [ 433.0, 216.19782607602696 ], "wc_reply_reviewers_avg": [ 61.5, 48.0702610768862 ], "wc_reply_authors_avg": [ 158.0, 188.7485099278932 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Ta3Apmo5iMsJ:scholar.google.com/&scioq=Learning+to+Decouple+the+Lights+for+3D+Face+Texture+Modeling&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": "nus.edu.sg;nju.edu.cn;nju.edu.cn;nus.edu.sg", "author_num": 4, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "National University of Singapore;Nanjing University", "aff_unique_dep": ";", "aff_unique_url": "https://www.nus.edu.sg;https://www.nju.edu.cn", "aff_unique_abbr": "NUS;Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "Singapore;China" }, { "title": "Diffusion-based Layer-wise Semantic Reconstruction for Unsupervised Out-of-Distribution Detection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96716", "id": "3m5ndUNQYt", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3m5ndUNQYt", "openreview": "https://openreview.net/forum?id=3m5ndUNQYt", "poster": "/media/PosterPDFs/NeurIPS%202024/96716.png?t=1731728041.6037395", "project": "", "author_site": "Ying Yang, De Cheng, Chaowei Fang, Yubiao Wang, Changzhe Jiao, Lechao Cheng, Nannan Wang, Xinbo Gao", "tldr": "", "abstract": "Unsupervised out-of-distribution (OOD) detection aims to identify out-of-domain data by learning only from unlabeled In-Distribution (ID) training samples, which is crucial for developing a safe real-world machine learning system. Current reconstruction-based method provides a good alternative approach, by measuring the reconstruction error between the input and its corresponding generative counterpart in the pixel/feature space. However, such generative methods face the key dilemma, $i.e.$, improving the reconstruction power of the generative model, while keeping compact representation of the ID data. To address this issue, we propose the diffusion-based layer-wise semantic reconstruction approach for unsupervised OOD detection. The innovation of our approach is that we leverage the diffusion model's intrinsic data reconstruction ability to distinguish ID samples from OOD samples in the latent feature space. Moreover, to set up a comprehensive and discriminative feature representation, we devise a multi-layer semantic feature extraction strategy. Through distorting the extracted features with Gaussian noises and applying the diffusion model for feature reconstruction, the separation of ID and OOD samples is implemented according to the reconstruction errors. Extensive experimental results on multiple benchmarks built upon various datasets demonstrate that our method achieves state-of-the-art performance in terms of detection accuracy and speed.", "keywords": "Out-of-Distribution detection;diffusion model;feature reconstruction", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/0163d7a7f307a0fd496a2b052f6fd214e6a7fec2.zip", "author": "Ying Yang;De Cheng;Chaowei Fang;Yubiao Wang;Changzhe Jiao;Lechao Cheng;Nannan Wang;Xinbo Gao", "authorids": "~Ying_Yang4;~De_Cheng3;~Chaowei_Fang3;~Yubiao_Wang1;~Changzhe_Jiao2;~Lechao_Cheng2;~Nannan_Wang1;~Xinbo_Gao5", "gender": "M;M;M;M;M;M;M;M", "homepage": "https://xbyym.github.io/yycfq.github.io/;https://web.xidian.edu.cn/dcheng/index.html;https://chaoweifang.github.io/;http://blog.alanbiao.top/;https://faculty.xidian.edu.cn/JCZ/zh_CN/index.htm;https://faculty.hfut.edu.cn/ChengLechao;;https://faculty.cqupt.edu.cn/gaoxinbo/zh_CN/index.htm", "dblp": "y/YingYang;154/1991/;159/1655;;;165/9781;10/8359-1;", "google_scholar": ";180lASkAAAAJ;eNtYEmcAAAAJ;;eul8WdwAAAAJ;PKFAv-cAAAAJ;SRBn7oUAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";;;;;0000-0002-7546-9052;;0000-0002-7985-0037", "linkedin": ";;;;changzhe-jiao-2681136a/;;;xinbo-gao-151a2224/", "or_profile": "~Ying_Yang4;~De_Cheng3;~Chaowei_Fang3;~Yubiao_Wang1;~Changzhe_Jiao2;~Lechao_Cheng2;~Nannan_Wang1;~Xinbo_Gao5", "aff": "Xi'an University of Electronic Science and Technology;Xidian University;Xidian University;Xidian University;Xi'an University;Hefei University of Technology;Xidian University;Chongqing University of Post and Telecommunications", "aff_domain": "xidian.edu.cn;xidian.edu.cn;xidian.edu.cn;xidian.edu.cn;xidian.edu.cn;hfut.edu.cn;xidian.edu.cn;cqupt.edu.cn", "position": "Undergrad student;Associate Professor;Associate Professor;Undergrad student;Associate Professor;Associate Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nyang2024diffusionbased,\ntitle={Diffusion-based Layer-wise Semantic Reconstruction for Unsupervised Out-of-Distribution Detection},\nauthor={Ying Yang and De Cheng and Chaowei Fang and Yubiao Wang and Changzhe Jiao and Lechao Cheng and Nannan Wang and Xinbo Gao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3m5ndUNQYt}\n}", "github": "", "reviewers": "Jgzd;aPqn;yKR8;ch15", "pdf_size": 0, "rating": "5;6;6;7", "confidence": "4;4;4;4", "soundness": "2;2;3;3", "novelty": "2;3;2;3", "presentation": "3;3;2;3", "wc_summary": "47;111;67;46", "wc_strengths": "22;124;46;213", "wc_weaknesses": "152;232;226;73", "wc_questions": "18;143;22;15", "wc_limitations": "1;13;30;3", "wc_review": "240;623;391;350", "wc_reply_reviewers": "38;12;12;8", "wc_reply_authors": "193;0;0;0", "reply_reviewers": "1;1;2;1", "reply_authors": "3;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 67.75, 26.337947907914163 ], "wc_strengths_avg": [ 101.25, 74.73076675640361 ], "wc_weaknesses_avg": [ 170.75, 64.63503307030948 ], "wc_questions_avg": [ 49.5, 54.03933752369657 ], "wc_limitations_avg": [ 11.75, 11.4755174175285 ], "wc_review_avg": [ 401.0, 139.5582315737771 ], "wc_reply_reviewers_avg": [ 17.5, 11.947803145348521 ], "wc_reply_authors_avg": [ 48.25, 83.57145146519834 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8434161510401142473&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "xidian.edu.cn;xidian.edu.cn;xidian.edu.cn;xidian.edu.cn;xidian.edu.cn;hfut.edu.cn;xidian.edu.cn;cqupt.edu.cn", "author_num": 8, "aff_unique_index": "0;1;1;1;2;3;1;4", "aff_unique_norm": "Xi'an University of Electronic Science and Technology;Xidian University;Xi'an University;Hefei University of Technology;Chongqing University of Post and Telecommunications", "aff_unique_dep": ";;;;", "aff_unique_url": "http://www.xidian.edu.cn/;http://www.xidian.edu.cn/;http://www.xidian.edu.cn/;http://www.hfut.edu.cn/;http://www.cqupt.edu.cn", "aff_unique_abbr": "Xidian University;Xidian;Xidian;HUT;CQUPT", "aff_campus_unique_index": "0", "aff_campus_unique": "Xi'an;", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Privacy without Noisy Gradients: Slicing Mechanism for Generative Model Training", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96715", "id": "3mCr7ZNdSw", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3mCr7ZNdSw", "openreview": "https://openreview.net/forum?id=3mCr7ZNdSw", "poster": "/media/PosterPDFs/NeurIPS%202024/96715.png?t=1730244181.2139766", "project": "", "author_site": "Kristjan Greenewald, Yuancheng Yu, Hao Wang, Kai Xu", "tldr": "", "abstract": "Training generative models with differential privacy (DP) typically involves injecting noise into gradient updates or adapting the discriminator's training procedure. As a result, such approaches often struggle with hyper-parameter tuning and convergence. \nWe consider the \\emph{slicing privacy mechanism} that injects noise into random low-dimensional projections of the private data, and provide strong privacy guarantees for it. These noisy projections are used for training generative models.\nTo enable optimizing generative models using this DP approach, we introduce the \\emph{smoothed-sliced $f$-divergence} and show it enjoys statistical consistency. \nMoreover, we present a kernel-based estimator for this divergence, circumventing the need for adversarial training. \nExtensive numerical experiments demonstrate that our approach can generate synthetic data of higher quality compared with baselines. Beyond performance improvement, our method, by sidestepping the need for noisy gradients, offers data scientists the flexibility to adjust generator architecture and hyper-parameters, run the optimization over any number of epochs, and even restart the optimization process---all without incurring additional privacy costs.", "keywords": "differential privacy;synthetic data generation;tabular data;GAN;f divergence;slicing", "primary_area": "privacy", "supplementary_material": "/attachment/a6f5ffc26b57f4ce65c5e5154f601754ec6fd613.zip", "author": "Kristjan Greenewald;Yuancheng Yu;Hao Wang;Kai Xu", "authorids": "~Kristjan_Greenewald1;~Yuancheng_Yu1;~Hao_Wang22;~Kai_Xu4", "gender": ";;M;M", "homepage": "https://researcher.watson.ibm.com/researcher/view.php?person=ibm-Kristjan.H.Greenewald;;https://haowang94.github.io;https://xuk.ai", "dblp": "146/0563;218/6704.html;;", "google_scholar": "L3zNUG4AAAAJ;;A3WtYhAAAAAJ;https://scholar.google.ca/citations?user=kf3C60wAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Kristjan_Greenewald1;~Yuancheng_Yu1;~Hao_Wang22;~Kai_Xu4", "aff": "MIT-IBM Watson AI Lab, IBM Research;University of Illinois, Urbana Champaign;MIT-IBM Watson AI Lab;Amazon", "aff_domain": "ibm.com;illinois.edu;ibm.com;amazon.com", "position": "Research Scientist;PhD student;Researcher;Research scientist", "bibtex": "@inproceedings{\ngreenewald2024privacy,\ntitle={Privacy without Noisy Gradients: Slicing Mechanism for Generative Model Training},\nauthor={Kristjan Greenewald and Yuancheng Yu and Hao Wang and Kai Xu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3mCr7ZNdSw}\n}", "github": "", "reviewers": "JoQ4;NMCZ;jo82", "pdf_size": 759224, "rating": "4;5;6", "confidence": "3;3;3", "soundness": "3;3;3", "novelty": "3;2;3", "presentation": "2;3;3", "wc_summary": "47;101;62", "wc_strengths": "39;61;66", "wc_weaknesses": "87;286;26", "wc_questions": "34;168;1", "wc_limitations": "17;1;1", "wc_review": "224;617;156", "wc_reply_reviewers": "0;107;0", "wc_reply_authors": "0;862;0", "reply_reviewers": "0;2;0", "reply_authors": "1;4;1", "rating_avg": [ 5.0, 0.816496580927726 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 70.0, 22.759613353482084 ], "wc_strengths_avg": [ 55.333333333333336, 11.728408057172787 ], "wc_weaknesses_avg": [ 133.0, 111.01651528789158 ], "wc_questions_avg": [ 67.66666666666667, 72.21418758726632 ], "wc_limitations_avg": [ 6.333333333333333, 7.542472332656507 ], "wc_review_avg": [ 332.3333333333333, 203.19503493496643 ], "wc_reply_reviewers_avg": [ 35.666666666666664, 50.440283724640395 ], "wc_reply_authors_avg": [ 287.3333333333333, 406.3506969218693 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.9428090415820634 ], "reply_authors_avg": [ 2.0, 1.4142135623730951 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:iqfoBhpbRssJ:scholar.google.com/&scioq=Privacy+without+Noisy+Gradients:+Slicing+Mechanism+for+Generative+Model+Training&hl=en&as_sdt=0,10", "gs_version_total": 5, "email": "ibm.com;illinois.edu;ibm.com;amazon.com", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "IBM;University of Illinois Urbana-Champaign;Massachusetts Institute of Technology;Amazon", "aff_unique_dep": "AI Lab;;IBM Watson AI Lab;Amazon.com, Inc.", "aff_unique_url": "https://www.ibmwatsonai.org/;https://illinois.edu;https://www.mitibmwatsonailab.org;https://www.amazon.com", "aff_unique_abbr": "MIT-IBM AI Lab;UIUC;MIT-IBM AI Lab;Amazon", "aff_campus_unique_index": "1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "3mzFmBPFIX", "title": "Efficiently Parameterized Neural Metriplectic Systems", "track": "main", "status": "Reject", "tldr": "", "abstract": "Metriplectic systems are learned from data in a way that scales quadratically in both the size of the state and the rank of the metriplectic data. Besides being provably energy conserving and entropy stable, the proposed approach comes with approximation results demonstrating its ability to accurately learn metriplectic dynamics from data as well as an error estimate indicating its potential for generalization to unseen timescales when approximation error is low. Examples are provided which illustrate performance in the presence of both full state information as well as when entropic variables are unknown, confirming that the proposed approach exhibits superior accuracy and scalability without compromising on model expressivity.", "keywords": "metriplectic systems;structure preservation;energy conservation;entropy stability;neural ODEs", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "/attachment/f352af3f6afad51ecf1afca1e5a4c9d9b1481c86.zip", "author": "Anthony Gruber;Kookjin Lee;Haksoo Lim;Noseong Park;Nathaniel Trask", "authorids": "~Anthony_Gruber1;~Kookjin_Lee1;~Haksoo_Lim1;~Noseong_Park1;~Nathaniel_Trask2", "gender": ";M;M;;M", "homepage": ";https://scholar.google.com/citations?hl=en&user=KL89hVQAAAAJ&view_op=list_works;;;https://www.sandia.gov/ccr/staff/nathaniel-albert-trask/", "dblp": ";122/5103;;;188/8236", "google_scholar": ";https://scholar.google.com/citations?hl=en;ZJvEyqwAAAAJ;;6iLMZkwAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Anthony_Gruber1;~Kookjin_Lee1;~Haksoo_Lim1;~Noseong_Park1;~Nathaniel_Trask2", "aff": ";Arizona State University;Yonsei University;;", "aff_domain": ";asu.edu;yonsei.ac.kr;;", "position": ";Assistant Professor;MS student;;", "bibtex": "@misc{\nanonymous2024efficiently,\ntitle={Efficiently Parameterized Neural Metriplectic Systems},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=3mzFmBPFIX}\n}", "github": "", "project": "", "reviewers": "Lujy;LgsG;dk6W", "site": "https://openreview.net/forum?id=3mzFmBPFIX", "pdf_size": 1004010, "rating": "5;6;7", "confidence": "1;3;3", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "3;2;3", "wc_summary": "70;159;164", "wc_strengths": "137;90;146", "wc_weaknesses": "309;81;55", "wc_questions": "98;67;208", "wc_limitations": "3;11;1", "wc_review": "617;408;574", "wc_reply_reviewers": "26;18;71", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 2.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 131.0, 43.18178628387977 ], "wc_strengths_avg": [ 124.33333333333333, 24.553795814270526 ], "wc_weaknesses_avg": [ 148.33333333333334, 114.10326706784322 ], "wc_questions_avg": [ 124.33333333333333, 60.49977043115307 ], "wc_limitations_avg": [ 5.0, 4.320493798938574 ], "wc_review_avg": [ 533.0, 90.11474167230723 ], "wc_reply_reviewers_avg": [ 38.333333333333336, 23.328570942563587 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.8660254037844387, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13759293038284210625&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Arizona State University;Yonsei University", "aff_unique_dep": ";", "aff_unique_url": "https://www.asu.edu;https://www.yonsei.ac.kr", "aff_unique_abbr": "ASU;Yonsei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;South Korea" }, { "title": "D-MiSo: Editing Dynamic 3D Scenes using Multi-Gaussians Soup", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96714", "id": "3og0FT85B2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3og0FT85B2", "openreview": "https://openreview.net/forum?id=3og0FT85B2", "poster": "/media/PosterPDFs/NeurIPS%202024/96714.png?t=1731690142.2058218", "project": "", "author_site": "Joanna Waczynska, Piotr Borycki, Joanna Kaleta, Slawomir Tadeja, Przemys\u0142aw Spurek", "tldr": "", "abstract": "Over the past years, we have observed an abundance of approaches for modeling dynamic 3D scenes using Gaussian Splatting (GS). These solutions use GS to represent the scene's structure and the neural network to model dynamics. Such approaches allow fast rendering and extracting each element of such a dynamic scene. However, modifying such objects over time is challenging. SC-GS (Sparse Controlled Gaussian Splatting) enhanced with Deformed Control Points partially solves this issue. However, this approach necessitates selecting elements that need to be kept fixed, as well as centroids that should be adjusted throughout editing. Moreover, this task poses additional difficulties regarding the re-productivity of such editing. To address this, we propose Dynamic Multi-Gaussian Soup (D-MiSo), which allows us to model the mesh-inspired representation of dynamic GS. Additionally, we propose a strategy of linking parameterized Gaussian splats, forming a Triangle Soup with the estimated mesh. Consequently, we can separately construct new trajectories for the 3D objects composing the scene. Thus, we can make the scene's dynamic editable over time or while maintaining partial dynamics.", "keywords": "Neural rendering;Gaussian splatting;3D;Triangle Soup", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/72587479110f9d5933eed1f7917fb1c97df6ccc0.zip", "author": "Joanna Waczynska;Piotr Borycki;Joanna Kaleta;Slawomir Tadeja;Przemys\u0142aw Spurek", "authorids": "~Joanna_Waczynska1;~Piotr_Borycki1;~Joanna_Kaleta1;~Slawomir_Tadeja1;~Przemys\u0142aw_Spurek1", "gender": ";M;;;M", "homepage": ";;;;http://ww2.ii.uj.edu.pl/~spurek/", "dblp": ";;;;77/10260", "google_scholar": ";ktQi2u4AAAAJ;;;0kp0MbgAAAAJ", "orcid": ";0000-0002-5715-4428;;;0000-0003-0097-5521", "linkedin": ";piotr-borycki-560052251/;;;spurek/", "or_profile": "~Joanna_Waczynska1;~Piotr_Borycki1;~Joanna_Kaleta1;~Slawomir_Tadeja1;~Przemys\u0142aw_Spurek1", "aff": ";Jagiellonian University Cracow;;;Jagiellonian University Cracow", "aff_domain": ";uj.edu.pl;;;uj.edu.pl", "position": ";MS student;;;Associate Professor", "bibtex": "@inproceedings{\nwaczynska2024dmiso,\ntitle={D-MiSo: Editing Dynamic 3D Scenes using Multi-Gaussians Soup},\nauthor={Joanna Waczynska and Piotr Borycki and Joanna Kaleta and Slawomir Tadeja and Przemys{\\l}aw Spurek},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3og0FT85B2}\n}", "github": "", "reviewers": "ZSU4;u5NP;qmWF;hxyh", "pdf_size": 10021790, "rating": "5;5;6;7", "confidence": "4;2;2;5", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "2;2;2;3", "wc_summary": "55;76;105;83", "wc_strengths": "41;59;117;88", "wc_weaknesses": "167;59;14;241", "wc_questions": "50;60;15;63", "wc_limitations": "6;34;6;6", "wc_review": "319;288;257;481", "wc_reply_reviewers": "20;32;104;94", "wc_reply_authors": "13;359;367;71", "reply_reviewers": "1;2;2;1", "reply_authors": "2;3;3;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 1.299038105676658 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 79.75, 17.851820635442202 ], "wc_strengths_avg": [ 76.25, 28.89095879336648 ], "wc_weaknesses_avg": [ 120.25, 89.17223502862312 ], "wc_questions_avg": [ 47.0, 19.091883092036785 ], "wc_limitations_avg": [ 13.0, 12.12435565298214 ], "wc_review_avg": [ 336.25, 86.39842301801579 ], "wc_reply_reviewers_avg": [ 62.5, 36.91544392256444 ], "wc_reply_authors_avg": [ 202.5, 161.82938546506318 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2506789617454848531&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": ";uj.edu.pl;;;uj.edu.pl", "author_num": 5, "aff_unique_index": "0;0", "aff_unique_norm": "Jagiellonian University", "aff_unique_dep": "", "aff_unique_url": "https://www.uj.edu.pl", "aff_unique_abbr": "UJ", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Cracow", "aff_country_unique_index": "0;0", "aff_country_unique": "Poland" }, { "title": "Conformal Classification with Equalized Coverage for Adaptively Selected Groups", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96713", "id": "3pWHKxK1sC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3pWHKxK1sC", "openreview": "https://openreview.net/forum?id=3pWHKxK1sC", "poster": "/media/PosterPDFs/NeurIPS%202024/96713.png?t=1731101453.4247527", "project": "", "author_site": "Yanfei Zhou, Matteo Sesia", "tldr": "", "abstract": "This paper introduces a conformal inference method to evaluate uncertainty in classification by generating prediction sets with valid coverage conditional on adaptively chosen features. These features are carefully selected to reflect potential model limitations or biases. This can be useful to find a practical compromise between efficiency---by providing informative predictions---and algorithmic fairness---by ensuring equalized coverage for the most sensitive groups. We demonstrate the validity and effectiveness of this method on simulated and real data sets.", "keywords": "Classification;Conformal Prediction;Equalized Coverage;Fairness", "primary_area": "fairness", "supplementary_material": "/attachment/7e706bbf54e2b4877e11cba299ab637653b7cfcd.zip", "author": "Yanfei Zhou;Matteo Sesia", "authorids": "~Yanfei_Zhou1;~Matteo_Sesia1", "gender": "F;", "homepage": ";https://msesia.github.io/", "dblp": ";280/1260", "google_scholar": "YJ5oVF4AAAAJ;qFtP1MQAAAAJ", "orcid": ";0000-0001-9046-907X", "linkedin": "yanfei-zhou-25547a11a/;matteo-sesia", "or_profile": "~Yanfei_Zhou1;~Matteo_Sesia1", "aff": "University of Southern California;University of Southern California", "aff_domain": "usc.edu;usc.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nzhou2024conformal,\ntitle={Conformal Classification with Equalized Coverage for Adaptively Selected Groups},\nauthor={Yanfei Zhou and Matteo Sesia},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3pWHKxK1sC}\n}", "github": "", "reviewers": "FDCZ;mW3F;asc2;92Kn", "pdf_size": 1762380, "rating": "5;5;6;7", "confidence": "4;3;5;3", "soundness": "3;2;3;4", "novelty": "2;3;3;3", "presentation": "3;2;3;4", "wc_summary": "59;125;68;57", "wc_strengths": "58;48;106;66", "wc_weaknesses": "193;393;123;63", "wc_questions": "111;52;98;85", "wc_limitations": "5;9;1;1", "wc_review": "426;627;396;272", "wc_reply_reviewers": "12;175;16;0", "wc_reply_authors": "0;281;0;0", "reply_reviewers": "1;2;1;0", "reply_authors": "1;3;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 77.25, 27.87808278917329 ], "wc_strengths_avg": [ 69.5, 22.017038856303998 ], "wc_weaknesses_avg": [ 193.0, 124.29802894656054 ], "wc_questions_avg": [ 86.5, 21.937410968480304 ], "wc_limitations_avg": [ 4.0, 3.3166247903554 ], "wc_review_avg": [ 430.25, 127.42129923996224 ], "wc_reply_reviewers_avg": [ 50.75, 71.97699285188288 ], "wc_reply_authors_avg": [ 70.25, 121.67656923171363 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.0909090909090909, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:n5ESygCYinEJ:scholar.google.com/&scioq=Conformal+Classification+with+Equalized+Coverage+for+Adaptively+Selected+Groups&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "usc.edu;usc.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Southern California", "aff_unique_dep": "", "aff_unique_url": "https://www.usc.edu", "aff_unique_abbr": "USC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "IndicVoices-R: Unlocking a Massive Multilingual Multi-speaker Speech Corpus for Scaling Indian TTS", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97858", "id": "3qH8q02x0n", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3qH8q02x0n", "openreview": "https://openreview.net/forum?id=3qH8q02x0n", "poster": "", "project": "", "author_site": "Ashwin Sankar, Srija Anand, Praveen Varadhan, Sherry Thomas, Mehak Singal, Shridhar Kumar, Deovrat Mehendale, Aditi Krishana, Giri Raju, Mitesh Khapra", "tldr": "", "abstract": "Recent advancements in text-to-speech (TTS) synthesis show that large-scale models trained with extensive web data produce highly natural-sounding output. However, such data is scarce for Indian languages due to the lack of high-quality, manually subtitled data on platforms like LibriVox or YouTube. To address this gap, we enhance existing large-scale ASR datasets containing natural conversations collected in low-quality environments to generate high-quality TTS training data. Our pipeline leverages the cross-lingual generalization of denoising and speech enhancement models trained on English and applied to Indian languages. This results in IndicVoices-R (IV-R), the largest multilingual Indian TTS dataset derived from an ASR dataset, with 1,704 hours of high-quality speech from 10,496 speakers across 22 Indian languages. IV-R matches the quality of gold-standard TTS datasets like LJSpeech, LibriTTS, and IndicTTS. We also introduce the IV-R Benchmark, the first to assess zero-shot, few-shot, and many-shot speaker generalization capabilities of TTS models on Indian voices, ensuring diversity in age, gender, and style. We demonstrate that fine-tuning an English pre-trained model on a combined dataset of high-quality IndicTTS and our IV-R dataset results in better zero-shot speaker generalization compared to fine-tuning on the IndicTTS dataset alone. Further, our evaluation reveals limited zero-shot generalization for Indian voices in TTS models trained on prior datasets, which we improve by fine-tuning the model on our data containing diverse set of speakers across language families. We open-source code and data for all 22 official Indian languages.", "keywords": "text-to-speech;dataset;speech enhancement;multilingual;multi-speaker", "primary_area": "", "supplementary_material": "/attachment/6fdff03eae39f9d9c075585ec967699261e558ae.pdf", "author": "Ashwin Sankar;Srija Anand;Praveen Srinivasa Varadhan;Sherry Thomas;Mehak Singal;Shridhar Kumar;Deovrat Mehendale;Aditi Krishana;Giri Raju;Mitesh M Khapra", "authorids": "~Ashwin_Sankar1;~Srija_Anand1;~Praveen_Srinivasa_Varadhan1;~Sherry_Thomas1;~Mehak_Singal1;~Shridhar_Kumar1;~Deovrat_Mehendale1;~Aditi_Krishana1;~Giri_Raju1;~Mitesh_M_Khapra1", "gender": "M;F;M;M;F;M;M;F;M;M", "homepage": ";;;https://sherrys997.github.io;;https://www.linkedin.com/in/shridhar-kumar;;;;http://www.cse.iitm.ac.in/~miteshk", "dblp": "328/0752.html;;283/5316.html;;;;;;;90/7967", "google_scholar": "https://scholar.google.com/citations?hl=en;;https://scholar.google.co.in/citations?user=CpGhZUYAAAAJ;;;;;;;https://scholar.google.com.tw/citations?user=DV8z8DYAAAAJ", "orcid": ";;;;;;;;;", "linkedin": "https://linkedin.com/in/ashwin-s2000;srijaanand;svp19/;;mehaksingal/;;deovrat-mehendale-40046b163?utm_source=share&utm_campaign=share_via&utm_content=profile&utm_medium=android_app;krishnaditi?utm_source=share&utm_campaign=share_via&utm_content=profile&utm_medium=android_app;giri-raju-787854200;", "or_profile": "~Ashwin_Sankar1;~Srija_Anand1;~Praveen_Srinivasa_Varadhan1;~Sherry_Thomas1;~Mehak_Singal1;~Shridhar_Kumar1;~Deovrat_Mehendale1;~Aditi_Krishana1;~Giri_Raju1;~Mitesh_M_Khapra1", "aff": "AI4Bharat;Indian Institute of Technology, Madras;Department of Computer Science, Indian Institute of Technology, Madras, Indian Institute of Technology, Madras;Indian Institute of Technology, Madras, Dhirubhai Ambani Institute Of Information and Communication Technology;Indian Institute of Technology, Madras;Indian Institute of Technology, Madras, Dhirubhai Ambani Institute Of Information and Communication Technology;Department of Computer Science, Indian Institute of Technology, Madras, Indian Institute of Technology, Madras;Indian Institute of Technology, Madras, Dhirubhai Ambani Institute Of Information and Communication Technology;;Indian Institute of Technology, Madras", "aff_domain": "ai4bharat.org;iitm.ac.in;cse.iitm.ac.in;iitm.ac.in;ds.study.iitm.ac.in;iitm.ac.in;cse.iitm.ac.in;iitm.ac.in;;iitm.ac.in", "position": "Researcher;MS student;PhD student;Undergrad student;Undergrad student;Undergrad student;Researcher;Undergrad student;;Associate Professor", "bibtex": "@inproceedings{\nsankar2024indicvoicesr,\ntitle={IndicVoices-R: Unlocking a Massive Multilingual Multi-speaker Speech Corpus for Scaling Indian {TTS}},\nauthor={Ashwin Sankar and Srija Anand and Praveen Srinivasa Varadhan and Sherry Thomas and Mehak Singal and Shridhar Kumar and Deovrat Mehendale and Aditi Krishana and Giri Raju and Mitesh M Khapra},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=3qH8q02x0n}\n}", "github": "", "reviewers": "37Ua;8q3k;2UXi;Aehz", "pdf_size": 904606, "rating": "4;5;7;7", "confidence": "5;4;4;5", "wc_summary_and_contributions": "34;92;33;117", "wc_strengths": "11;56;48;94", "wc_improvement": "83;49;57;41", "wc_limitations": "8;26;8;14", "wc_correctness": "12;1;7;9", "wc_clarity": "14;4;5;1", "wc_relation_to_prior_work": "33;2;11;9", "wc_documentation": "1;4;14;25", "wc_additional_feedback": "1;1;1;1", "wc_review": "197;235;184;311", "wc_reply_reviewers": "0;46;65;18", "wc_reply_authors": "44;0;32;29", "reply_reviewers": "0;1;1;1", "reply_authors": "2;1;2;2", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 4.5, 0.5 ], "wc_summary_and_contributions_avg": [ 69.0, 36.585516259853435 ], "wc_strengths_avg": [ 52.25, 29.4819860253681 ], "wc_improvement_avg": [ 57.5, 15.771810295587505 ], "wc_limitations_avg": [ 14.0, 7.3484692283495345 ], "wc_correctness_avg": [ 7.25, 4.02336923485777 ], "wc_clarity_avg": [ 6.0, 4.847679857416329 ], "wc_relation_to_prior_work_avg": [ 13.75, 11.60549438843516 ], "wc_documentation_avg": [ 11.0, 9.40744386111339 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 231.75, 49.443781206538 ], "wc_reply_reviewers_avg": [ 32.25, 25.02373872945448 ], "wc_reply_authors_avg": [ 26.25, 16.161296358893985 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.19245008972987526, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14898031381328493450&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 3, "email": "ai4bharat.org;iitm.ac.in;cse.iitm.ac.in;iitm.ac.in;ds.study.iitm.ac.in;iitm.ac.in;cse.iitm.ac.in;iitm.ac.in;;iitm.ac.in", "author_num": 10, "aff_unique_index": "0;1;2;2;1;2;2;2;1", "aff_unique_norm": "AI4Bharat;Indian Institute of Technology Madras;Indian Institute of Technology, Madras", "aff_unique_dep": ";;Department of Computer Science", "aff_unique_url": ";https://www.iitm.ac.in;https://www.iitm.ac.in", "aff_unique_abbr": ";IIT Madras;IIT Madras", "aff_campus_unique_index": "1;1;1;1;1;1;1;1", "aff_campus_unique": ";Madras", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "India" }, { "title": "Efficient Multi-task Reinforcement Learning with Cross-Task Policy Guidance", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96712", "id": "3qUks3wrnH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3qUks3wrnH", "openreview": "https://openreview.net/forum?id=3qUks3wrnH", "poster": "/media/PosterPDFs/NeurIPS%202024/96712.png?t=1731396149.2139714", "project": "", "author_site": "Jinmin He, Kai Li, Yifan Zang, Haobo Fu, Qiang Fu, Junliang Xing, Jian Cheng", "tldr": "", "abstract": "Multi-task reinforcement learning endeavors to efficiently leverage shared information across various tasks, facilitating the simultaneous learning of multiple tasks. Existing approaches primarily focus on parameter sharing with carefully designed network structures or tailored optimization procedures. However, they overlook a direct and complementary way to exploit cross-task similarities: the control policies of tasks already proficient in some skills can provide explicit guidance for unmastered tasks to accelerate skills acquisition. To this end, we present a novel framework called Cross-Task Policy Guidance (CTPG), which trains a guide policy for each task to select the behavior policy interacting with the environment from all tasks' control policies, generating better training trajectories. In addition, we propose two gating mechanisms to improve the learning efficiency of CTPG: one gate filters out control policies that are not beneficial for guidance, while the other gate blocks tasks that do not necessitate guidance. CTPG is a general framework adaptable to existing parameter sharing approaches. Empirical evaluations demonstrate that incorporating CTPG with these approaches significantly enhances performance in manipulation and locomotion benchmarks.", "keywords": "Multi-task Reinforcement Learning", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/66317c521caab84531b2f5397b8f61dce0fb24c7.zip", "author": "Jinmin He;Kai Li;Yifan Zang;Haobo Fu;QIANG FU;Junliang Xing;Jian Cheng", "authorids": "~Jinmin_He1;~Kai_Li2;~Yifan_Zang1;~Haobo_Fu2;~QIANG_FU8;~Junliang_Xing1;~Jian_Cheng7", "gender": "M;M;M;M;M;M;M", "homepage": ";;;;http://people.ucas.ac.cn/~jlxing?language=en;https://people.ucas.ac.cn/~chengjian?language=en;https://github.com/DarkDawn233", "dblp": "181/2853;269/4608;85/8571;;43/7659.html;14/6145-1;347/6803", "google_scholar": "_cY_PXgAAAAJ;;LFdJXNcAAAAJ;gANaxT0AAAAJ;jSwNd3MAAAAJ;ZGCIUJ8AAAAJ;", "orcid": ";;;;0000-0001-6801-0510;0000-0003-1289-2758;", "linkedin": ";;haobo-fu-382b0784/;;https://www.linkedin.cn/incareer/in/ACoAAAvlU14B40ZWH1pxg5JJDtQ6LlgMYkp0e5s;;", "or_profile": "~Kai_Li2;~Yifan_Zang1;~Haobo_Fu2;~QIANG_FU8;~Junliang_Xing1;~Jian_Cheng7;~He_Jinmin1", "aff": "Institute of Automation, Chinese Academy of Sciences;University of Chinese Academy of Sciences;Tencent AI Lab;Tencent AI Lab;Tsinghua University;Institute of Automation, Chinese Academy of Sciences;University of Chinese Academy of Sciences", "aff_domain": "ia.ac.cn;ucas.ac.cn;tencent.com;tencent.com;tsinghua.edu.cn;ia.ac.cn;ucas.ac.cn", "position": "Associate Professor;PhD student;Principal Researcher;Principal Researcher;Full Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\nhe2024efficient,\ntitle={Efficient Multi-task Reinforcement Learning with Cross-Task Policy Guidance},\nauthor={Jinmin He and Kai Li and Yifan Zang and Haobo Fu and QIANG FU and Junliang Xing and Jian Cheng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3qUks3wrnH}\n}", "github": "", "reviewers": "J1dv;5DoG;k7nL;xTD7", "pdf_size": 16136881, "rating": "5;5;6;7", "confidence": "4;5;4;3", "soundness": "3;2;2;2", "novelty": "2;2;2;2", "presentation": "3;2;3;3", "wc_summary": "110;124;102;182", "wc_strengths": "50;73;127;264", "wc_weaknesses": "302;263;95;133", "wc_questions": "2;2;67;44", "wc_limitations": "5;1;36;21", "wc_review": "469;463;427;644", "wc_reply_reviewers": "13;27;19;217", "wc_reply_authors": "66;35;34;45", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 129.5, 31.316928329579195 ], "wc_strengths_avg": [ 128.5, 83.07376240426336 ], "wc_weaknesses_avg": [ 198.25, 86.4215684884277 ], "wc_questions_avg": [ 28.75, 27.958674861301994 ], "wc_limitations_avg": [ 15.75, 13.88119231190174 ], "wc_review_avg": [ 500.75, 84.25074183649662 ], "wc_reply_reviewers_avg": [ 69.0, 85.59205570612264 ], "wc_reply_authors_avg": [ 45.0, 12.864680330268607 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.8528028654224417, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10440944983327214137&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 0, "email": "ia.ac.cn;ucas.ac.cn;tencent.com;tencent.com;tsinghua.edu.cn;ia.ac.cn;ucas.ac.cn", "author_num": 7, "aff_unique_index": "0;1;2;2;3;0;1", "aff_unique_norm": "Chinese Academy of Sciences;University of Chinese Academy of Sciences;Tencent;Tsinghua University", "aff_unique_dep": "Institute of Automation;;Tencent AI Lab;", "aff_unique_url": "http://www.ia.cas.cn;http://www.ucas.ac.cn;https://ai.tencent.com;https://www.tsinghua.edu.cn", "aff_unique_abbr": "CAS;UCAS;Tencent AI Lab;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "3qa4YLkcEw", "title": "TRACE: A Comprehensive Benchmark for Continual Learning in Large Language Models", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "Aligned large language models (LLMs) demonstrate exceptional capabilities in task-solving, following instructions, and ensuring safety. However, the continual learning aspect of these aligned LLMs has been largely overlooked. Existing continual learning benchmarks lack sufficient challenge for leading aligned LLMs due to their homogeneous task types and low task complexity. To bridge this gap, we introduce TRACE, a benchmark designed to rigorously assess continual learning capabilities in LLMs. TRACE comprises eight challenging tasks from the scope of domain-specific tasks, multilingual capabilities, code generation, and mathematical reasoning. Through systematic experiments on TRACE with six different aligned models ranging from 7B to 70B, we discovered significant declines in both general performance and instruction-following abilities. For example, the accuracy of llama2-chat 13B on the gsm8k dataset declined precipitously from 43.14\\% to 2.12\\% after training on our datasets. This highlights the challenge of finding a suitable tradeoff between achieving performance on specific tasks while preserving the original prowess of LLMs. Our results demonstrate that integrating task-specific cues with meta-rationales significantly reduces catastrophic forgetting and improves task convergence, offering a viable strategy to enhance the adaptability of LLMs in dynamic environments.", "keywords": "large language model;continual learning;benchmark", "primary_area": "", "supplementary_material": "/attachment/0b89eba031449156eb9e6c5c9cc45b7aeafe86c8.pdf", "author": "Xiao Wang;Yuansen Zhang;Tianze Chen;Songyang Gao;Senjie Jin;Zhiheng Xi;Rui Zheng;Yicheng Zou;Tao Gui;Qi Zhang;Xuanjing Huang", "authorids": "~Xiao_Wang12;~Yuansen_Zhang1;~Tianze_Chen1;~Songyang_Gao1;~Senjie_Jin1;~Zhiheng_Xi1;~Rui_Zheng1;~Yicheng_Zou1;~Tao_Gui1;~Qi_Zhang8;~Xuanjing_Huang1", "gender": "M;M;M;M;M;;M;M;M;M;F", "homepage": "https://xiaowangnlp.github.io/;;https://www.pixiv.net/users/55094416;;;https://woooodyy.github.io/;https://github.com/ruizheng20;;;http://qizhang.info;https://xuanjing-huang.github.io/", "dblp": ";344/9168;;314/6067;348/5674.html;333/4268;;224/6030.html;135/6973;52/323-1;05/6735-1", "google_scholar": "https://scholar.google.com.hk/citations?hl=zh-CN;oq28rjYAAAAJ;;O42mLrsAAAAJ;https://scholar.google.com.hk/citations?user=kMP_SiUAAAAJ;https://scholar.google.com.hk/citations?user=zSVLkqAAAAAJ;https://scholar.google.com.hk/citations?user=7Z0V_SoAAAAJ;X_nKjOYAAAAJ;;XfqR3yYAAAAJ;RGsMgZA4H78C", "orcid": ";;;;;;;;;;0000-0001-9197-9426", "linkedin": ";;;;;;;;;;", "or_profile": "~Xiao_Wang12;~Yuansen_Zhang1;~Tianze_Chen1;~Songyang_Gao1;~Senjie_Jin1;~Zhiheng_Xi1;~Rui_Zheng1;~Yicheng_Zou1;~Tao_Gui1;~Qi_Zhang8;~Xuanjing_Huang1", "aff": "Fudan University;Fudan University;;Fudan University;Fudan University;Fudan University;Fudan University;;Fudan University;Fudan University;Fudan University", "aff_domain": "fudan.edu.cn;fudan.edu.cn;;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn", "position": "PhD student;MS student;;MS student;MS student;PhD student;PhD student;;Assistant Professor;Full Professor;Full Professor", "bibtex": "@misc{\nanonymous2024trace,\ntitle={{TRACE}: A Comprehensive Benchmark for Continual Learning in Large Language Models},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=3qa4YLkcEw}\n}", "github": "", "project": "", "reviewers": "8LT7;f8sz;eEUp;Rk5m", "site": "https://openreview.net/forum?id=3qa4YLkcEw", "pdf_size": 1031396, "rating": "5;6;6;7", "confidence": "4;4;3;3", "wc_summary_and_contributions": "124;45;86;122", "wc_strengths": "26;22;2;5", "wc_improvement": "20;6;21;15", "wc_limitations": "19;7;1;1", "wc_correctness": "17;1;1;1", "wc_clarity": "15;9;1;1", "wc_relation_to_prior_work": "34;1;1;1", "wc_documentation": "5;9;1;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "261;101;115;148", "wc_reply_reviewers": "0;61;21;41", "wc_reply_authors": "135;126;127;0", "reply_reviewers": "0;1;1;1", "reply_authors": "3;4;3;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "wc_summary_and_contributions_avg": [ 94.25, 32.205395510690444 ], "wc_strengths_avg": [ 13.75, 10.40132203135736 ], "wc_improvement_avg": [ 15.5, 5.937171043518958 ], "wc_limitations_avg": [ 7.0, 7.3484692283495345 ], "wc_correctness_avg": [ 5.0, 6.928203230275509 ], "wc_clarity_avg": [ 6.5, 5.894913061275798 ], "wc_relation_to_prior_work_avg": [ 9.25, 14.289419162443238 ], "wc_documentation_avg": [ 4.0, 3.3166247903554 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 156.25, 62.83858289299656 ], "wc_reply_reviewers_avg": [ 30.75, 22.69774217846348 ], "wc_reply_authors_avg": [ 97.0, 56.11149614829389 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 1.0897247358851685 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": -0.7071067811865476, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4757536606618579486&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;0;0;0;0;0;0", "aff_unique_norm": "Fudan University", "aff_unique_dep": "", "aff_unique_url": "https://www.fudan.edu.cn", "aff_unique_abbr": "Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Mars: Situated Inductive Reasoning in an Open-World Environment", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97857", "id": "3qoQ6AolAz", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3qoQ6AolAz", "openreview": "https://openreview.net/forum?id=3qoQ6AolAz", "poster": "", "project": "", "author_site": "Xiaojuan Tang, Jiaqi Li, Yitao Liang, Song-Chun Zhu, Muhan Zhang, Zilong Zheng", "tldr": "", "abstract": "Large Language Models (LLMs) trained on massive corpora have shown remarkable success in knowledge-intensive tasks. Yet, most of them rely on pre-stored knowledge. Inducing new general knowledge from a specific environment and\nperforming reasoning with the acquired knowledge\u2014situated inductive reasoning, is crucial and challenging for machine intelligence. In this paper, we design Mars, an interactive environment devised for situated inductive reasoning. It introduces counter-commonsense game mechanisms by modifying terrain, survival setting and task dependency while adhering to certain principles. In Mars, agents need to actively interact with their surroundings, derive useful rules and perform decision-making tasks in specific contexts. We conduct experiments on various RL-based and LLM-based methods, finding that they all struggle on this challenging situated inductive reasoning benchmark. Furthermore, we explore Induction from Reflection, where we instruct agents to perform inductive reasoning from history trajectory. The superior performance underscores the importance of inductive reasoning in Mars. Through Mars, we aim to galvanize advancements in situated inductive reasoning and set the stage for developing the next generation of AI systems that can reason in an adaptive and context-sensitive way.", "keywords": "Large language model;Inductive reasoning", "primary_area": "", "supplementary_material": "/attachment/6d0f3c7c66d6ccf26ee854e45f78b2df2eb2b5b4.zip", "author": "Xiaojuan Tang;Jiaqi Li;Yitao Liang;Song-Chun Zhu;Muhan Zhang;Zilong Zheng", "authorids": "~Xiaojuan_Tang1;~Jiaqi_Li10;~Yitao_Liang1;~Song-Chun_Zhu1;~Muhan_Zhang1;~Zilong_Zheng1", "gender": "F;Not Specified;M;M;M;M", "homepage": "https://xiaojuantang.github.io/;;https://web.cs.ucla.edu/~yliang/;https://zhusongchun.net/;https://muhanzhang.github.io/;http://zilongzheng.github.io", "dblp": "332/0595;118/4502-3;173/4969;10/10313;157/5518;218/5234", "google_scholar": ";https://scholar.google.com.hk/citations?user=C4Z2-rsAAAAJ;KVzR1XEAAAAJ;https://scholar.google.com.tw/citations?user=Al8dyb4AAAAJ;https://scholar.google.com.hk/citations?user=OBBqkosAAAAJ;9sDx70IAAAAJ", "orcid": ";;;;0000-0002-7680-6401;", "linkedin": ";;;;jerry-muhan-zhang-a33a1777/;", "or_profile": "~Xiaojuan_Tang1;~Jiaqi_Li10;~Yitao_Liang1;~Song-Chun_Zhu1;~Muhan_Zhang1;~Zilong_Zheng1", "aff": "Peking University;Beijing Institute for General Artificial Intelligence;Peking University;Peking University;Peking University;Beijing Institute for General Artificial Intelligence", "aff_domain": "pku.edu.cn;bigai.com;pku.edu.cn;pku.edu.cn;pku.edu.cn;bigai.ai", "position": "PhD student;Researcher;Assistant Professor;Full Professor;Assistant Professor;Researcher", "bibtex": "@inproceedings{\ntang2024mars,\ntitle={Mars: Situated Inductive Reasoning in an Open-World Environment},\nauthor={Xiaojuan Tang and Jiaqi Li and Yitao Liang and Song-Chun Zhu and Muhan Zhang and Zilong Zheng},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=3qoQ6AolAz}\n}", "github": "", "reviewers": "V8Vv;A3jc;56w9", "pdf_size": 2887674, "rating": "5;7;7", "confidence": "3;4;3", "wc_summary_and_contributions": "127;402;113", "wc_strengths": "164;6;75", "wc_improvement": "349;320;45", "wc_limitations": "16;131;341", "wc_correctness": "13;1;8", "wc_clarity": "25;1;55", "wc_relation_to_prior_work": "19;1;7", "wc_documentation": "25;1;24", "wc_additional_feedback": "1;1;1", "wc_review": "739;864;669", "wc_reply_reviewers": "0;26;169", "wc_reply_authors": "153;159;35", "reply_reviewers": "0;1;1", "reply_authors": "4;4;3", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 214.0, 133.0588842079576 ], "wc_strengths_avg": [ 81.66666666666667, 64.6752571613665 ], "wc_improvement_avg": [ 238.0, 136.98418400190099 ], "wc_limitations_avg": [ 162.66666666666666, 134.55688594626272 ], "wc_correctness_avg": [ 7.333333333333333, 4.921607686744467 ], "wc_clarity_avg": [ 27.0, 22.090722034374522 ], "wc_relation_to_prior_work_avg": [ 9.0, 7.483314773547883 ], "wc_documentation_avg": [ 16.666666666666668, 11.08552609887726 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 757.3333333333334, 80.65702421708573 ], "wc_reply_reviewers_avg": [ 65.0, 74.30118886442307 ], "wc_reply_authors_avg": [ 115.66666666666667, 57.09251750935105 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 3.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.49999999999999983, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7211313776340973216&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "pku.edu.cn;bigai.com;pku.edu.cn;pku.edu.cn;pku.edu.cn;bigai.ai", "author_num": 6, "aff_unique_index": "0;1;0;0;0;1", "aff_unique_norm": "Peking University;Beijing Institute for General Artificial Intelligence", "aff_unique_dep": ";", "aff_unique_url": "http://www.pku.edu.cn;http://www.bigaiai.org/", "aff_unique_abbr": "Peking U;BIGAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Nearly Optimal Approximation of Matrix Functions by the Lanczos Method", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96711", "id": "3s8V8QP9XV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3s8V8QP9XV", "openreview": "https://openreview.net/forum?id=3s8V8QP9XV", "poster": "", "project": "", "author_site": "Noah Amsel, Tyler Chen, Anne Greenbaum, Cameron Musco, Christopher Musco", "tldr": "", "abstract": "Approximating the action of a matrix function $f(\\vec{A})$ on a vector $\\vec{b}$ is an increasingly important primitive in machine learning, data science, and statistics, with applications such as sampling high dimensional Gaussians, Gaussian process regression and Bayesian inference, principle component analysis, and approximating Hessian spectral densities.\nOver the past decade, a number of algorithms enjoying strong theoretical guarantees have been proposed for this task.\nMany of the most successful belong to a family of algorithms called Krylov subspace methods.\nRemarkably, a classic Krylov subspace method, called the Lanczos method for matrix functions (Lanczos-FA), frequently outperforms newer methods in practice. Our main result is a theoretical justification for this finding: we show that, for a natural class of rational functions, Lanczos-FA matches the error of the best possible Krylov subspace method up to a multiplicative approximation factor. \nThe approximation factor depends on the degree of $f(x)$'s denominator and the condition number of $\\vec{A}$, but not on the number of iterations $k$. Our result provides a strong justification for the excellent performance of Lanczos-FA, especially on functions that are well approximated by rationals, such as the matrix square root.", "keywords": "linear algebra;lanczos;matrix function;krylov;iterative;rational;approximation;matvec", "primary_area": "other", "supplementary_material": "/attachment/594d1465fa0a4bb91d428d7cffda247348054822.zip", "author": "Noah Amsel;Tyler Chen;Anne Greenbaum;Cameron N Musco;Christopher Musco", "authorids": "~Noah_Amsel1;~Tyler_Chen1;~Anne_Greenbaum1;~Cameron_N_Musco1;~Christopher_Musco1", "gender": "M;M;F;M;M", "homepage": "https://noahamsel.github.io;https://research.chen.pw;https://amath.washington.edu/people/anne-greenbaum;https://people.cs.umass.edu/~cmusco/;https://www.chrismusco.com/", "dblp": "227/2317;;;149/2327;149/2243", "google_scholar": "vkykpWgAAAAJ;;4-1FimEAAAAJ;EeYGZCwAAAAJ;HXXSrNMAAAAJ", "orcid": "0000-0001-9241-8284;;0000-0002-6673-7545;;", "linkedin": ";;;;", "or_profile": "~Noah_Amsel1;~Tyler_Chen1;~Anne_Greenbaum1;~Cameron_N_Musco1;~Christopher_P_Musco1", "aff": "NYU, New York University;New York University;University of Washington;University of Massachusetts, Amherst;New York University", "aff_domain": "cims.nyu.edu;nyu.edu;u.washington.edu;umass.edu;nyu.edu", "position": "PhD student;Postdoc;Full Professor;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\namsel2024nearly,\ntitle={Nearly Optimal Approximation of Matrix Functions by the Lanczos Method},\nauthor={Noah Amsel and Tyler Chen and Anne Greenbaum and Cameron N Musco and Christopher Musco},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3s8V8QP9XV}\n}", "github": "", "reviewers": "dxxb;nxLf;uopa;cS79;CAYW", "pdf_size": 730752, "rating": "6;6;7;7;8", "confidence": "2;4;2;3;3", "soundness": "3;3;3;4;4", "novelty": "2;3;3;3;4", "presentation": "3;3;3;4;4", "wc_summary": "45;112;127;81;76", "wc_strengths": "35;84;93;97;179", "wc_weaknesses": "106;208;56;278;95", "wc_questions": "46;69;103;41;4", "wc_limitations": "7;9;26;3;49", "wc_review": "239;482;405;500;403", "wc_reply_reviewers": "20;154;22;35;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.8, 0.7483314773547882 ], "confidence_avg": [ 2.8, 0.7483314773547882 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 88.2, 28.77081854935657 ], "wc_strengths_avg": [ 97.6, 46.370680391816556 ], "wc_weaknesses_avg": [ 148.6, 81.89652007258915 ], "wc_questions_avg": [ 52.6, 32.70840870479638 ], "wc_limitations_avg": [ 18.8, 17.02233826476257 ], "wc_review_avg": [ 405.8, 92.20715807354655 ], "wc_reply_reviewers_avg": [ 46.2, 55.050522250020485 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.07142857142857147, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16037296972302752670&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "cims.nyu.edu;nyu.edu;u.washington.edu;umass.edu;nyu.edu", "author_num": 5, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "New York University;University of Washington;University of Massachusetts Amherst", "aff_unique_dep": ";;", "aff_unique_url": "https://www.nyu.edu;https://www.washington.edu;https://www.umass.edu", "aff_unique_abbr": "NYU;UW;UMass Amherst", "aff_campus_unique_index": "0;2", "aff_campus_unique": "New York;;Amherst", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "3sWghzJvGd", "title": "Towards Unraveling and Improving Generalization in World Models", "track": "main", "status": "Reject", "tldr": "", "abstract": "World model has recently emerged as a promising approach to reinforcement learning (RL), as evidenced by its great successes that world model based agents exhibit state-of-the-art performance on a wide range visual control tasks. In this study, we aim to first obtain a clear understanding of the generalization capability of world models by examining the impact of _latent representation error_, and then devise new methods to enhance its generalization. We hypothesize that latent representation errors may paradoxically bring generalization to the model. We develop a continuous-time stochastic dynamics framework to quantify the impact of these errors, by examining the regularization effects for both cases with zero-drift representation errors and non-zero-drift representation errors. We propose a Jacobian regularization scheme to mitigate the \"destabilizing'' effects of non-zero drift errors, thereby enhancing training stability and model generalization. Our empirical results confirm that this regularization approach not only stabilizes training but also accelerates convergence and improves performance on long-horizon prediction.", "keywords": "world models;reinforcement learning;generalization", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/d81dc27f9de968079ebebb3769d0f651f1a55cd8.zip", "author": "Qiaoyi Fang;Weiyu Du;Hang Wang;Junshan Zhang", "authorids": "~Qiaoyi_Fang1;~Weiyu_Du1;~Hang_Wang2;~Junshan_Zhang1", "gender": ";F;M;M", "homepage": "https://github.com/fangqyi;;https://ustcmike.github.io/;https://faculty.engineering.ucdavis.edu/jzhang/", "dblp": ";;;59/1232.html", "google_scholar": ";;Xdb3u_q3RKwC;UtAdFs8AAAAJ", "orcid": ";;;", "linkedin": ";weiyu-du/;;", "or_profile": "~Qiaoyi_Fang1;~Weiyu_Du1;~Hang_Wang2;~Junshan_Zhang1", "aff": "University of California, Davis;Google;University of California, Davis;University of California, Davis", "aff_domain": "ucdavis.edu;google.com;ucdavis.edu;ucdavis.edu", "position": "PhD student;Researcher;PhD student;Full Professor", "bibtex": "@misc{\nanonymous2024towards,\ntitle={Towards Unraveling and Improving Generalization in World Models},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=3sWghzJvGd}\n}", "github": "", "project": "", "reviewers": "waKG;mgpu;pwHm", "site": "https://openreview.net/forum?id=3sWghzJvGd", "pdf_size": 1236862, "rating": "6;6;7", "confidence": "3;2;4", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "2;3;3", "wc_summary": "160;59;71", "wc_strengths": "121;31;130", "wc_weaknesses": "338;26;87", "wc_questions": "359;15;52", "wc_limitations": "60;1;46", "wc_review": "1038;132;386", "wc_reply_reviewers": "61;22;0", "wc_reply_authors": "1459;88;52", "reply_reviewers": "1;1;0", "reply_authors": "5;3;2", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 96.66666666666667, 45.05058884804455 ], "wc_strengths_avg": [ 94.0, 44.69899327725402 ], "wc_weaknesses_avg": [ 150.33333333333334, 135.01687137374927 ], "wc_questions_avg": [ 142.0, 154.1838729136957 ], "wc_limitations_avg": [ 35.666666666666664, 25.170529504870483 ], "wc_review_avg": [ 518.6666666666666, 381.58383380617977 ], "wc_reply_reviewers_avg": [ 27.666666666666668, 25.223445883190152 ], "wc_reply_authors_avg": [ 533.0, 654.945799284185 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 3.3333333333333335, 1.247219128924647 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:mqf42GsSNxUJ:scholar.google.com/&scioq=Towards+Unraveling+and+Improving+Generalization+in+World+Models&hl=en&as_sdt=0,33", "gs_version_total": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of California, Davis;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.ucdavis.edu;https://www.google.com", "aff_unique_abbr": "UC Davis;Google", "aff_campus_unique_index": "0;1;0;0", "aff_campus_unique": "Davis;Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "A theoretical case-study of Scalable Oversight in Hierarchical Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96710", "id": "3tj3A26wsV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3tj3A26wsV", "openreview": "https://openreview.net/forum?id=3tj3A26wsV", "poster": "", "project": "", "author_site": "Tom Yan, Zachary Lipton", "tldr": "", "abstract": "A key source of complexity in next-generation AI models is the size of model outputs, making it time-consuming to parse and provide reliable feedback on. To ensure such models are aligned, we will need to bolster our understanding of scalable oversight and how to scale up human feedback. To this end, we study the challenges of scalable oversight in the context of goal-conditioned hierarchical reinforcement learning. Hierarchical structure is a promising entrypoint into studying how to scale up human feedback, which in this work we assume can only be provided for model outputs below a threshold size. In the cardinal feedback setting, we develop an apt sub-MDP reward and algorithm that allows us to acquire and scale up low-level feedback for learning with sublinear regret. In the ordinal feedback setting, we show the necessity of both high- and low-level feedback, and develop a hierarchical experimental design algorithm that efficiently acquires both types of feedback for learning. Altogether, our work aims to consolidate the foundations of scalable oversight, formalizing and studying the various challenges thereof.", "keywords": "Scalable oversight;theoretical hierarchical reinforcement learning", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/e16eb1f85cff4df0ef63059753bbaa1350718cd8.zip", "author": "Tom Yan;Zachary Chase Lipton", "authorids": "~Tom_Yan1;~Zachary_Chase_Lipton1", "gender": ";Unspecified", "homepage": ";http://zacklipton.com", "dblp": "213/7323;", "google_scholar": ";MN9Kfg8AAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Tom_Yan1;~Zachary_Chase_Lipton1", "aff": "Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "cmu.edu;cmu.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nyan2024a,\ntitle={A theoretical case-study of Scalable Oversight in Hierarchical Reinforcement Learning},\nauthor={Tom Yan and Zachary Chase Lipton},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3tj3A26wsV}\n}", "github": "", "reviewers": "z273;poPw;Hzkp;2zMJ", "pdf_size": 425035, "rating": "5;6;6;6", "confidence": "3;2;2;3", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "2;3;2;3", "wc_summary": "353;60;32;116", "wc_strengths": "129;45;41;77", "wc_weaknesses": "242;148;84;118", "wc_questions": "227;26;16;60", "wc_limitations": "1;27;7;11", "wc_review": "952;306;180;382", "wc_reply_reviewers": "678;17;12;22", "wc_reply_authors": "1279;13;10;23", "reply_reviewers": "3;1;1;1", "reply_authors": "4;2;2;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 2.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 140.25, 126.49975296418566 ], "wc_strengths_avg": [ 73.0, 35.21363372331802 ], "wc_weaknesses_avg": [ 148.0, 58.80476171195663 ], "wc_questions_avg": [ 82.25, 85.1480328604249 ], "wc_limitations_avg": [ 11.5, 9.630680142129112 ], "wc_review_avg": [ 455.0, 295.87328368745966 ], "wc_reply_reviewers_avg": [ 182.25, 286.24323136102277 ], "wc_reply_authors_avg": [ 331.25, 547.2048862172194 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:CFktL8qOCNMJ:scholar.google.com/&scioq=A+theoretical+case-study+of+Scalable+Oversight+in+Hierarchical+Reinforcement+Learning&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "cmu.edu;cmu.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "OASIS: Conditional Distribution Shaping for Offline Safe Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96709", "id": "3uDEmsf3Jf", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3uDEmsf3Jf", "openreview": "https://openreview.net/forum?id=3uDEmsf3Jf", "poster": "/media/PosterPDFs/NeurIPS%202024/96709.png?t=1731644030.4668236", "project": "", "author_site": "Yihang Yao, Zhepeng Cen, Wenhao Ding, Haohong Lin, Shiqi Liu, Tingnan Zhang, Wenhao Yu, DING ZHAO", "tldr": "", "abstract": "Offline safe reinforcement learning (RL) aims to train a policy that satisfies con- straints using a pre-collected dataset. Most current methods struggle with the mismatch between imperfect demonstrations and the desired safe and rewarding performance. In this paper, we mitigate this issue from a data-centric perspective and introduce OASIS (cOnditionAl diStributIon Shaping), a new paradigm in offline safe RL designed to overcome these critical limitations. OASIS utilizes a conditional diffusion model to synthesize offline datasets, thus shaping the data dis- tribution toward a beneficial target domain. Our approach makes compliance with safety constraints through effective data utilization and regularization techniques to benefit offline safe RL training. Comprehensive evaluations on public benchmarks and varying datasets showcase OASIS\u2019s superiority in benefiting offline safe RL agents to achieve high-reward behavior while satisfying the safety constraints, out- performing established baselines. Furthermore, OASIS exhibits high data efficiency and robustness, making it suitable for real-world applications, particularly in tasks where safety is imperative and high-quality demonstrations are scarce. More details are available at the website https://sites.google.com/view/saferl-oasis/home.", "keywords": "Safe Reinforcement Learning;Offline Reinforcement Learning", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Yihang Yao;Zhepeng Cen;Wenhao Ding;Haohong Lin;Shiqi Liu;Tingnan Zhang;Wenhao Yu;Ding Zhao", "authorids": "~Yihang_Yao1;~Zhepeng_Cen1;~Wenhao_Ding1;~Haohong_Lin1;~Shiqi_Liu2;~Tingnan_Zhang1;~Wenhao_Yu1;~Ding_Zhao1", "gender": ";M;M;M;M;M;M;", "homepage": "https://yihangyao.github.io/;https://czp16.github.io/;https://wenhao.pub;https://hhlin.info/;https://shiqiliu-67.github.io/;;https://wenhaoyu.weebly.com/;https://safeai-lab.github.io", "dblp": "305/7045.html;254/6182;215/3667.html;154/7972;;https://dblp.uni-trier.de/pers/hd/z/Zhang:Tingnan;;", "google_scholar": "EPduTdwAAAAJ;M-X3Q-UAAAAJ;q2aqI9sAAAAJ;;PiuAi5wAAAAJ;RM2vMNcAAAAJ;1bF2s2kAAAAJ;z7tPc9IAAAAJ", "orcid": ";;;;;;;", "linkedin": "yihang-yao-3a7658249/;;wenhaoding/;haohong-lin-06572b1a5/;shiqiliu2/;;;", "or_profile": "~Yihang_Yao1;~Zhepeng_Cen1;~Wenhao_Ding1;~Haohong_Lin1;~Shiqi_Liu2;~Tingnan_Zhang1;~Wenhao_Yu1;~Ding_Zhao1", "aff": "Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Google;Google;Carnegie Mellon University", "aff_domain": "cmu.edu;andrew.cmu.edu;cmu.edu;cmu.edu;cmu.edu;google.com;google.com;cmu.edu", "position": "PhD student;PhD student;PhD student;PhD student;PhD student;Software Engineer;Software Engineer;Associate Professor", "bibtex": "@inproceedings{\nyao2024oasis,\ntitle={{OASIS}: Conditional Distribution Shaping for Offline Safe Reinforcement Learning},\nauthor={Yihang Yao and Zhepeng Cen and Wenhao Ding and Haohong Lin and Shiqi Liu and Tingnan Zhang and Wenhao Yu and Ding Zhao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3uDEmsf3Jf}\n}", "github": "", "reviewers": "XmVn;CUpS;fA23", "pdf_size": 8073075, "rating": "6;6;7", "confidence": "4;2;4", "soundness": "3;2;3", "novelty": "3;2;3", "presentation": "3;2;4", "wc_summary": "73;56;81", "wc_strengths": "79;31;62", "wc_weaknesses": "141;40;113", "wc_questions": "148;178;260", "wc_limitations": "7;6;7", "wc_review": "448;311;523", "wc_reply_reviewers": "27;124;12", "wc_reply_authors": "0;468;284", "reply_reviewers": "1;2;1", "reply_authors": "1;3;2", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 70.0, 10.424330514074594 ], "wc_strengths_avg": [ 57.333333333333336, 19.871811414385174 ], "wc_weaknesses_avg": [ 98.0, 42.57542327055207 ], "wc_questions_avg": [ 195.33333333333334, 47.33802793620462 ], "wc_limitations_avg": [ 6.666666666666667, 0.4714045207910317 ], "wc_review_avg": [ 427.3333333333333, 87.77369892070301 ], "wc_reply_reviewers_avg": [ 54.333333333333336, 49.64093293061909 ], "wc_reply_authors_avg": [ 250.66666666666666, 192.50858566712176 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11890077864565898344&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "cmu.edu;andrew.cmu.edu;cmu.edu;cmu.edu;cmu.edu;google.com;google.com;cmu.edu", "author_num": 8, "aff_unique_index": "0;0;0;0;0;1;1;0", "aff_unique_norm": "Carnegie Mellon University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.cmu.edu;https://www.google.com", "aff_unique_abbr": "CMU;Google", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "SA3DIP: Segment Any 3D Instance with Potential 3D Priors", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96708", "id": "3uI4ceR4iz", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3uI4ceR4iz", "openreview": "https://openreview.net/forum?id=3uI4ceR4iz", "poster": "/media/PosterPDFs/NeurIPS%202024/96708.png?t=1730887689.7248416", "project": "", "author_site": "Xi Yang, Xu Gu, Xingyilang Yin, Xinbo Gao", "tldr": "", "abstract": "The proliferation of 2D foundation models has sparked research into adapting them for open-world 3D instance segmentation. Recent methods introduce a paradigm that leverages superpoints as geometric primitives and incorporates 2D multi-view masks from Segment Anything model (SAM) as merging guidance, achieving outstanding zero-shot instance segmentation results. However, the limited use of 3D priors restricts the segmentation performance. Previous methods calculate the 3D superpoints solely based on estimated normal from spatial coordinates, resulting in under-segmentation for instances with similar geometry. Besides, the heavy reliance on SAM and hand-crafted algorithms in 2D space suffers from over-segmentation due to SAM's inherent part-level segmentation tendency. To address these issues, we propose SA3DIP, a novel method for Segmenting Any 3D Instances via exploiting potential 3D Priors. Specifically, on one hand, we generate complementary 3D primitives based on both geometric and textural priors, which reduces the initial errors that accumulate in subsequent procedures. On the other hand, we introduce supplemental constraints from the 3D space by using a 3D detector to guide a further merging process. Furthermore, we notice a considerable portion of low-quality ground truth annotations in ScanNetV2 benchmark, which affect the fair evaluations. Thus, we present ScanNetV2-INS with complete ground truth labels and supplement additional instances for 3D class-agnostic instance segmentation. Experimental evaluations on various 2D-3D datasets demonstrate the effectiveness and robustness of our approach. Our code and proposed ScanNetV2-INS dataset are available HERE.", "keywords": "Open Vocalbulary;3D Instance Segmentation;Foundation Model;3D Detection", "primary_area": "machine_vision", "supplementary_material": "/attachment/3243ea348b2b6fe404251e1767672200b7a38e7f.zip", "author": "Xi Yang;Xu Gu;Xingyilang Yin;Xinbo Gao", "authorids": "~Xi_Yang4;~Xu_Gu2;~Xingyilang_Yin1;~Xinbo_Gao5", "gender": "F;;M;M", "homepage": "https://web.xidian.edu.cn/yangx/;;;https://faculty.cqupt.edu.cn/gaoxinbo/zh_CN/index.htm", "dblp": "13/1520-11;;;", "google_scholar": "W5c-LSYAAAAJ;;bo0YGokAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": "0000-0002-5791-3674;0009-0000-6252-6882;;0000-0002-7985-0037", "linkedin": ";;yxyl0314/;xinbo-gao-151a2224/", "or_profile": "~Xi_Yang4;~Xu_Gu2;~Xingyilang_Yin1;~Xinbo_Gao5", "aff": "Xidian University;Xidian University;Xidian University;Chongqing University of Post and Telecommunications", "aff_domain": "xidian.edu;stu.xidian.edu.cn;xidian.edu;cqupt.edu.cn", "position": "Full Professor;MS student;MS student;Full Professor", "bibtex": "@inproceedings{\nyang2024sadip,\ntitle={{SA}3{DIP}: Segment Any 3D Instance with Potential 3D Priors},\nauthor={Xi Yang and Xu Gu and Xingyilang Yin and Xinbo Gao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3uI4ceR4iz}\n}", "github": "", "reviewers": "cYA9;3tui;sUBY;oUe8", "pdf_size": 2148038, "rating": "5;6;6;7", "confidence": "5;3;4;3", "soundness": "3;2;3;3", "novelty": "2;2;2;3", "presentation": "3;2;3;3", "wc_summary": "90;59;61;152", "wc_strengths": "88;33;88;163", "wc_weaknesses": "97;187;115;141", "wc_questions": "92;65;31;19", "wc_limitations": "75;13;9;84", "wc_review": "442;357;304;559", "wc_reply_reviewers": "21;137;118;0", "wc_reply_authors": "94;350;836;80", "reply_reviewers": "1;2;2;0", "reply_authors": "2;4;3;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 90.5, 37.566607512523674 ], "wc_strengths_avg": [ 93.0, 46.233105022267324 ], "wc_weaknesses_avg": [ 135.0, 33.85262175962151 ], "wc_questions_avg": [ 51.75, 28.71737279069936 ], "wc_limitations_avg": [ 45.25, 34.4265522525855 ], "wc_review_avg": [ 415.5, 96.37037926666056 ], "wc_reply_reviewers_avg": [ 69.0, 59.35065290289568 ], "wc_reply_authors_avg": [ 340.0, 305.87252246646807 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8528028654224418, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:B6irJk7n0kwJ:scholar.google.com/&scioq=SA3DIP:+Segment+Any+3D+Instance+with+Potential+3D+Priors&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "xidian.edu;stu.xidian.edu.cn;xidian.edu;cqupt.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Xidian University;Chongqing University of Post and Telecommunications", "aff_unique_dep": ";", "aff_unique_url": "http://www.xidian.edu.cn/;http://www.cqupt.edu.cn", "aff_unique_abbr": "Xidian;CQUPT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Zero-to-Hero: Enhancing Zero-Shot Novel View Synthesis via Attention Map Filtering", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96707", "id": "3uQtNWNTwz", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3uQtNWNTwz", "openreview": "https://openreview.net/forum?id=3uQtNWNTwz", "poster": "/media/PosterPDFs/NeurIPS%202024/96707.png?t=1732740763.397308", "project": "", "author_site": "Ido Sobol, Chenfeng Xu, Or Litany", "tldr": "", "abstract": "Generating realistic images from arbitrary views based on a single source image remains a significant challenge in computer vision, with broad applications ranging from e-commerce to immersive virtual experiences. Recent advancements in diffusion models, particularly the Zero-1-to-3 model, have been widely adopted for generating plausible views, videos, and 3D models. However, these models still struggle with inconsistencies and implausibility in new views generation, especially for challenging changes in viewpoint. In this work, we propose Zero-to-Hero, a novel test-time approach that enhances view synthesis by manipulating attention maps during the denoising process of Zero-1-to-3. By drawing an analogy between the denoising process and stochastic gradient descent (SGD), we implement a filtering mechanism that aggregates attention maps, enhancing generation reliability and authenticity. This process improves geometric consistency without requiring retraining or significant computational resources. Additionally, we modify the self-attention mechanism to integrate information from the source view, reducing shape distortions. These processes are further supported by a specialized sampling schedule. Experimental results demonstrate substantial improvements in fidelity and consistency, validated on a diverse set of out-of-distribution objects. Additionally, we demonstrate the general applicability and effectiveness of Zero-to-Hero in multi-view, and image generation conditioned on semantic maps and pose.", "keywords": "Novel View Synthesis;Image Generative Models;Diffusion Models", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Ido Sobol;Chenfeng Xu;Or Litany", "authorids": "~Ido_Sobol1;~Chenfeng_Xu1;~Or_Litany1", "gender": ";M;M", "homepage": ";;http://orlitany.github.io", "dblp": "378/4512.html;65/1881;119/1476", "google_scholar": ";RpqvaTUAAAAJ;https://scholar.google.co.il/citations?user=Ihs8dwsAAAAJ", "orcid": "0009-0001-2513-0018;0000-0002-4941-6985;", "linkedin": "ido-sobol-7b875520a/;;", "or_profile": "~Ido_Sobol1;~Chenfeng_Xu1;~Or_Litany1", "aff": "Technion - Israel Institute of Technology, Technion - Israel Institute of Technology;University of California, Berkeley;NVIDIA", "aff_domain": "campus.technion.ac.il;berkeley.edu;nvidia.com", "position": "MS student;PhD student;Research Scientist", "bibtex": "@inproceedings{\nsobol2024zerotohero,\ntitle={Zero-to-Hero: Enhancing Zero-Shot Novel View Synthesis via Attention Map Filtering},\nauthor={Ido Sobol and Chenfeng Xu and Or Litany},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3uQtNWNTwz}\n}", "github": "", "reviewers": "hzMx;3GQx;qQr5;UpfM", "pdf_size": 10409826, "rating": "5;5;6;6", "confidence": "5;3;5;4", "soundness": "3;2;3;3", "novelty": "3;3;3;3", "presentation": "2;3;2;3", "wc_summary": "50;95;90;80", "wc_strengths": "47;35;162;50", "wc_weaknesses": "181;31;211;41", "wc_questions": "35;37;7;98", "wc_limitations": "33;25;6;27", "wc_review": "346;223;476;296", "wc_reply_reviewers": "24;123;0;20", "wc_reply_authors": "51;380;0;43", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;1;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 78.75, 17.455300054711177 ], "wc_strengths_avg": [ 73.5, 51.40282093426391 ], "wc_weaknesses_avg": [ 116.0, 80.77747210701756 ], "wc_questions_avg": [ 44.25, 33.22179254645962 ], "wc_limitations_avg": [ 22.75, 10.108783309577865 ], "wc_review_avg": [ 335.25, 92.28590087331867 ], "wc_reply_reviewers_avg": [ 41.75, 47.78271130858943 ], "wc_reply_authors_avg": [ 118.5, 152.2177716299907 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10663245124909838134&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "campus.technion.ac.il;berkeley.edu;nvidia.com", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Technion - Israel Institute of Technology;University of California, Berkeley;NVIDIA", "aff_unique_dep": ";;NVIDIA Corporation", "aff_unique_url": "https://www.technion.ac.il/en/;https://www.berkeley.edu;https://www.nvidia.com", "aff_unique_abbr": "Technion;UC Berkeley;NVIDIA", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Israel;United States" }, { "title": "Revisiting Differentially Private ReLU Regression", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96706", "id": "3uUIwMxYbR", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3uUIwMxYbR", "openreview": "https://openreview.net/forum?id=3uUIwMxYbR", "poster": "", "project": "", "author_site": "Meng Ding, Mingxi Lei, Liyang Zhu, Shaowei Wang, Di Wang, Jinhui Xu", "tldr": "", "abstract": "As one of the most fundamental non-convex learning problems, ReLU regression under differential privacy (DP) constraints, especially in high-dimensional settings, remains a challenging area in privacy-preserving machine learning. Existing results are limited to the assumptions of bounded norm $ \\|\\mathbf{x}\\|_2 \\leq 1$, which becomes meaningless with increasing data dimensionality. In this work, we revisit the problem of DP ReLU regression in high-dimensional regimes. We propose two innovative algorithms DP-GLMtron and DP-TAGLMtron that outperform the conventional DPSGD. \nDP-GLMtron is based on a generalized linear model perceptron approach, integrating adaptive clipping and Gaussian mechanism for enhanced privacy. To overcome the constraints of small privacy budgets in DP-GLMtron, represented by $\\widetilde{O}(\\sqrt{1/N})$ where $N$ is the sample size, we introduce DP-TAGLMtron, which utilizes a tree aggregation protocol to balance privacy and utility effectively, showing that DP-TAGLMtron achieves comparable performance with only an additional factor of $O(\\log N)$ in the utility upper bound.\nMoreover, our theoretical analysis extends beyond Gaussian-like data distributions to settings with eigenvalue decay, showing how data distribution impacts learning in high dimensions. Notably, our findings suggest that the utility upper bound could be independent of the dimension $d$, even when $d \\gg N$. \nExperiments on synthetic and real-world datasets also validate our results.", "keywords": "Differential Privacy;Generalized Linear Model", "primary_area": "privacy", "supplementary_material": "", "author": "Meng Ding;Mingxi Lei;Liyang Zhu;Shaowei Wang;Di Wang;Jinhui Xu", "authorids": "~Meng_Ding3;~Mingxi_Lei1;~Liyang_Zhu1;~Shaowei_Wang1;~Di_Wang1;~Jinhui_Xu1", "gender": "F;;M;M;;M", "homepage": ";https://mingxilei.github.io;https://cowboyliyang.github.io/homepage/#;;;https://www.cse.buffalo.edu/~jinhui/", "dblp": ";;189/1681;49/6937-3;;24/6437-1.html", "google_scholar": "Ipwvf8oAAAAJ;xWNNQ_IAAAAJ;;s__u3ykAAAAJ;;https://scholar.google.com/citations?hl=en", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Meng_Ding3;~Mingxi_Lei1;~Liyang_Zhu1;~Shaowei_Wang1;~Di_Wang1;~Jinhui_Xu1", "aff": "State University of New York at Buffalo;State University of New York at Buffalo;Duke University;Guangzhou University;;University at Buffalo, State University of New York", "aff_domain": "buffalo.edu;buffalo.edu;duke.edu;gzhu.edu.cn;;buffalo.edu", "position": "PhD student;PhD student;PhD student;Associate Professor;;Full Professor", "bibtex": "@inproceedings{\nding2024revisiting,\ntitle={Revisiting Differentially Private Re{LU} Regression},\nauthor={Meng Ding and Mingxi Lei and Liyang Zhu and Shaowei Wang and Di Wang and Jinhui Xu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3uUIwMxYbR}\n}", "github": "", "reviewers": "M8TJ;7Kd7;JyY5", "pdf_size": 3266147, "rating": "3;6;7", "confidence": "4;4;3", "soundness": "2;2;3", "novelty": "2;3;4", "presentation": "2;2;3", "wc_summary": "83;83;47", "wc_strengths": "43;26;42", "wc_weaknesses": "437;30;36", "wc_questions": "48;236;119", "wc_limitations": "5;8;1", "wc_review": "616;383;245", "wc_reply_reviewers": "780;96;6", "wc_reply_authors": "1890;621;25", "reply_reviewers": "3;2;1", "reply_authors": "6;4;2", "rating_avg": [ 5.333333333333333, 1.699673171197595 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 71.0, 16.97056274847714 ], "wc_strengths_avg": [ 37.0, 7.788880963698615 ], "wc_weaknesses_avg": [ 167.66666666666666, 190.46317812696034 ], "wc_questions_avg": [ 134.33333333333334, 77.51272296998876 ], "wc_limitations_avg": [ 4.666666666666667, 2.8674417556808756 ], "wc_review_avg": [ 414.6666666666667, 153.106353739992 ], "wc_reply_reviewers_avg": [ 294.0, 345.61249977395204 ], "wc_reply_authors_avg": [ 845.3333333333334, 777.7318875350868 ], "reply_reviewers_avg": [ 2.0, 0.816496580927726 ], "reply_authors_avg": [ 4.0, 1.632993161855452 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.6933752452815364, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3211176630329688593&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": "buffalo.edu;buffalo.edu;duke.edu;gzhu.edu.cn;;buffalo.edu", "author_num": 6, "aff_unique_index": "0;0;1;2;3", "aff_unique_norm": "State University of New York at Buffalo;Duke University;Guangzhou University;University at Buffalo", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.buffalo.edu;https://www.duke.edu;http://www.gzhu.edu.cn;https://www.buffalo.edu", "aff_unique_abbr": "SUNY Buffalo;Duke;GU;UB", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Buffalo;", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "United States;China" }, { "title": "The Ladder in Chaos: Improving Policy Learning by Harnessing the Parameter Evolving Path in A Low-dimensional Space", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96705", "id": "3vHfwL2stG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3vHfwL2stG", "openreview": "https://openreview.net/forum?id=3vHfwL2stG", "poster": "/media/PosterPDFs/NeurIPS%202024/96705.png?t=1730775548.569984", "project": "", "author_site": "Hongyao Tang, Min Zhang, Chen Chen, Jianye Hao", "tldr": "", "abstract": "Knowing the learning dynamics of policy is significant to unveiling the mysteries of Reinforcement Learning (RL). It is especially crucial yet challenging to Deep RL, from which the remedies to notorious issues like sample inefficiency and learning instability could be obtained. In this paper, we study how the policy networks of typical DRL agents evolve during the learning process by empirically investigating several kinds of temporal change for each policy parameter. In popular MuJoCo and DeepMind Control Suite (DMC) environments, we find common phenomena for TD3 and RAD agents: (1) the activity of policy network parameters is highly asymmetric and policy networks advance monotonically along a very limited number of major parameter directions; (2) severe detours occur in parameter update and harmonic-like changes are observed for all minor parameter directions. By performing a novel temporal SVD along the policy learning path, the major and minor parameter directions are identified as the columns of the right unitary matrix associated with dominant and insignificant singular values respectively. Driven by the discoveries above, we propose a simple and effective method, called Policy Path Trimming and Boosting (PPTB), as a general plug-in improvement to DRL algorithms. The key idea of PPTB is to trim the policy learning path by canceling the policy updates in minor parameter directions, and boost the learning path by encouraging the advance in major directions. In experiments, we demonstrate that our method improves the learning performance of TD3, RAD, and DoubleDQN regarding scores and efficiency in MuJoCo, DMC, and MinAtar tasks respectively.", "keywords": "Reinforcement Learning;Policy Optimization", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Hongyao Tang;Min Zhang;Chen Chen;Jianye HAO", "authorids": "~Hongyao_Tang1;~Min_Zhang13;~Chen_Chen3;~Jianye_HAO1", "gender": "M;F;M;F", "homepage": "https://bluecontra.github.io/;;http://www.icdai.org/jianye.html;", "dblp": "220/4275;;21/7664.html;83/5342-40", "google_scholar": "yIqzRH4AAAAJ;l8_g4oAAAAAJ;;", "orcid": ";;0000-0002-0422-8235;0000-0002-6962-4482", "linkedin": ";;;", "or_profile": "~Hongyao_Tang1;~Chen_Chen3;~Jianye_HAO1;~Zm_TJU1", "aff": "Montreal Institute for Learning Algorithms, University of Montreal, Universit\u00e9 de Montr\u00e9al;Qiyuan Lab;Tianjin University;Tianjin University", "aff_domain": "mila.umontreal.ca;qiyuanlab.com;tju.edu.cn;tju.edu.cn", "position": "Postdoc;Researcher;Associate Professor;PhD student", "bibtex": "@inproceedings{\ntang2024the,\ntitle={The Ladder in Chaos: Improving Policy Learning by Harnessing the Parameter Evolving Path in A Low-dimensional Space},\nauthor={Hongyao Tang and Min Zhang and Chen Chen and Jianye HAO},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3vHfwL2stG}\n}", "github": "", "reviewers": "8fBA;B4U6;HTJS;iaGj;dZpR;zpVk", "pdf_size": 2633905, "rating": "4;5;5;6;6;6", "confidence": "4;4;2;4;3;3", "soundness": "4;3;3;2;3;3", "novelty": "3;2;3;3;3;2", "presentation": "3;3;3;4;2;4", "wc_summary": "113;70;233;82;85;55", "wc_strengths": "231;29;59;119;38;47", "wc_weaknesses": "331;52;45;147;183;289", "wc_questions": "43;130;60;694;43;169", "wc_limitations": "59;16;47;3;36;5", "wc_review": "777;297;444;1045;385;565", "wc_reply_reviewers": "1142;18;18;39;146;203", "wc_reply_authors": "1828;84;40;417;691;302", "reply_reviewers": "2;1;1;1;1;2", "reply_authors": "5;3;2;3;4;4", "rating_avg": [ 5.333333333333333, 0.7453559924999298 ], "confidence_avg": [ 3.3333333333333335, 0.7453559924999298 ], "soundness_avg": [ 3.0, 0.5773502691896257 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.1666666666666665, 0.6871842709362768 ], "wc_summary_avg": [ 106.33333333333333, 59.28930951041867 ], "wc_strengths_avg": [ 87.16666666666667, 70.60315542208829 ], "wc_weaknesses_avg": [ 174.5, 108.17231623664162 ], "wc_questions_avg": [ 189.83333333333334, 230.28056559095228 ], "wc_limitations_avg": [ 27.666666666666668, 21.147629234082533 ], "wc_review_avg": [ 585.5, 255.50195693966808 ], "wc_reply_reviewers_avg": [ 261.0, 400.02749905475247 ], "wc_reply_authors_avg": [ 560.3333333333334, 606.7637284991763 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 3.5, 0.9574271077563381 ], "replies_avg": [ 37, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.20000000000000004, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4107072750305663930&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "email": "mila.umontreal.ca;qiyuanlab.com;tju.edu.cn;tju.edu.cn", "author_num": 4, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "University of Montreal;Qiyuan Lab;Tianjin University", "aff_unique_dep": "Montreal Institute for Learning Algorithms;;", "aff_unique_url": "https://www.mila.quebec;;http://www.tju.edu.cn", "aff_unique_abbr": "MILA;;TJU", "aff_campus_unique_index": "0", "aff_campus_unique": "Montreal;", "aff_country_unique_index": "0;2;2", "aff_country_unique": "Canada;;China" }, { "title": "Higher-Order Causal Message Passing for Experimentation with Complex Interference", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96704", "id": "3vJbgcjgvd", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3vJbgcjgvd", "openreview": "https://openreview.net/forum?id=3vJbgcjgvd", "poster": "/media/PosterPDFs/NeurIPS%202024/96704.png?t=1731726997.1083415", "project": "", "author_site": "Mohsen Bayati, Yuwei Luo, William Overman, Mohamad Sadegh Shirani Faradonbeh, Ruoxuan Xiong", "tldr": "", "abstract": "Accurate estimation of treatment effects is essential for decision-making across various scientific fields. This task, however, becomes challenging in areas like social sciences and online marketplaces, where treating one experimental unit can influence outcomes for others through direct or indirect interactions. Such interference can lead to biased treatment effect estimates, particularly when the structure of these interactions is unknown. We address this challenge by introducing a new class of estimators based on causal message-passing, specifically designed for settings with pervasive, unknown interference. Our estimator draws on information from the sample mean and variance of unit outcomes and treatments over time, enabling efficient use of observed data to estimate the evolution of the system state. Concretely, we construct non-linear features from the moments of unit outcomes and treatments and then learn a function that maps these features to future mean and variance of unit outcomes. This allows for the estimation of the treatment effect over time. Extensive simulations across multiple domains, using synthetic and real network data, demonstrate the efficacy of our approach in estimating total treatment effect dynamics, even in cases where interference exhibits non-monotonic behavior in the probability of treatment.", "keywords": "Causal Inference;Network Interference;Dynamic Treatment Effect;Randomized Experiment;Approximate Message Passing", "primary_area": "causal_inference", "supplementary_material": "", "author": "Mohsen Bayati;Yuwei Luo;William Overman;Sadegh Shirani;Ruoxuan Xiong", "authorids": "~Mohsen_Bayati1;~Yuwei_Luo1;~William_Overman1;~Sadegh_Shirani1;~Ruoxuan_Xiong1", "gender": ";M;;M;M", "homepage": "https://web.stanford.edu/~bayati/;https://sites.google.com/view/yuweiluo;http://www.ruoxuanxiong.com/;https://www.sadeghshirani.com/;https://willoverman.github.io/", "dblp": "73/6405;71/3730;222/2927;;294/4924", "google_scholar": "PS-TM94AAAAJ;;lg_0u-0AAAAJ;cZ8KRK4AAAAJ;B2XPxEkAAAAJ", "orcid": ";;;0000-0002-9235-3254;", "linkedin": ";;;mohamad-sadegh-shirani-faradonbeh-847b5285/;", "or_profile": "~Mohsen_Bayati1;~Yuwei_Luo1;~Ruoxuan_Xiong1;~Mohamad_Sadegh_Shirani_Faradonbeh1;~Will_Overman1", "aff": "Stanford University;Stanford University;Emory University;Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;emory.edu;stanford.edu;stanford.edu", "position": "Full Professor;PhD student;Assistant Professor;PhD student;PhD student", "bibtex": "@inproceedings{\nbayati2024higherorder,\ntitle={Higher-Order Causal Message Passing for Experimentation with Complex Interference},\nauthor={Mohsen Bayati and Yuwei Luo and William Overman and Sadegh Shirani and Ruoxuan Xiong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3vJbgcjgvd}\n}", "github": "", "reviewers": "YRHE;V4GS;YfYx;p2A8", "pdf_size": 6754068, "rating": "3;3;3;5", "confidence": "4;5;3;4", "soundness": "3;1;3;3", "novelty": "2;1;1;3", "presentation": "2;2;4;3", "wc_summary": "91;53;143;58", "wc_strengths": "110;19;73;70", "wc_weaknesses": "222;380;170;449", "wc_questions": "60;4;128;2", "wc_limitations": "4;13;1;2", "wc_review": "487;469;515;581", "wc_reply_reviewers": "19;0;5;12", "wc_reply_authors": "699;298;166;323", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 1.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 86.25, 35.87042653774834 ], "wc_strengths_avg": [ 68.0, 32.380549717384355 ], "wc_weaknesses_avg": [ 305.25, 113.4402375702731 ], "wc_questions_avg": [ 48.5, 51.46600819958743 ], "wc_limitations_avg": [ 5.0, 4.743416490252569 ], "wc_review_avg": [ 513.0, 42.5440947723653 ], "wc_reply_reviewers_avg": [ 9.0, 7.176350047203662 ], "wc_reply_authors_avg": [ 371.5, 198.26812653575965 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9716018774050834481&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "stanford.edu;stanford.edu;emory.edu;stanford.edu;stanford.edu", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Stanford University;Emory University", "aff_unique_dep": ";", "aff_unique_url": "https://www.stanford.edu;https://www.emory.edu", "aff_unique_abbr": "Stanford;Emory", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "ETO:Efficient Transformer-based Local Feature Matching by Organizing Multiple Homography Hypotheses", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96703", "id": "3xHCaDdYcc", "proceeding": "", "pdf": "https://openreview.net/pdf?id=3xHCaDdYcc", "openreview": "https://openreview.net/forum?id=3xHCaDdYcc", "poster": "/media/PosterPDFs/NeurIPS%202024/96703.png?t=1731761599.0161853", "project": "", "author_site": "Junjie Ni, Guofeng Zhang, Guanglin Li, Yijin Li, Xinyang Liu, Zhaoyang Huang, Hujun Bao", "tldr": "", "abstract": "We tackle the efficiency problem of learning local feature matching.Recent advancements have given rise to purely CNN-based and transformer-based approaches, each augmented with deep learning techniques. While CNN-based methods often excel in matching speed, transformer-based methods tend to provide more accurate matches. We propose an efficient transformer-based network architecture for local feature matching.This technique is built on constructing multiple homography hypotheses to approximate the continuous correspondence in the real world and uni-directional cross-attention to accelerate the refinement. On the YFCC100M dataset, our matching accuracy is competitive with LoFTR, a state-of-the-art transformer-based architecture, while the inference speed is boosted to 4 times, even outperforming the CNN-based methods.Comprehensive evaluations on other open datasets such as Megadepth, ScanNet, and HPatches demonstrate our method's efficacy, highlighting its potential to significantly enhance a wide array of downstream applications.", "keywords": "local feature matching;3d vision;pose estimation", "primary_area": "machine_vision", "supplementary_material": "/attachment/f8e8211b668c57d0349271389a8787bf441abaf5.zip", "author": "Junjie Ni;Guofeng Zhang;Guanglin Li;Yijin Li;Xinyang Liu;Zhaoyang Huang;Hujun Bao", "authorids": "~Junjie_Ni1;~Guofeng_Zhang3;~Guanglin_Li1;~Yijin_Li1;~Xinyang_Liu5;~Zhaoyang_Huang2;~Hujun_Bao1", "gender": "M;M;M;M;M;;M", "homepage": "http://github.com/xuanlanxingkongxia;http://www.cad.zju.edu.cn/home/gfzhang;;https://eugenelyj.github.io/;https://github.com/QsingHuan;https://drinkingcoder.github.io/;http://www.cad.zju.edu.cn/home/bao/", "dblp": ";78/5389-1.html;;178/6879;;;b/HujunBao", "google_scholar": ";F0xfpXAAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=en;;y2xos7IAAAAJ;AZCcDmsAAAAJ", "orcid": ";0000-0001-5661-8430;;;;0000-0001-7688-1471;0000-0002-2662-0334", "linkedin": ";;%E5%B9%BF%E6%9E%97-%E6%9D%8E-199889218/;;;;", "or_profile": "~Junjie_Ni1;~Guofeng_Zhang3;~Guanglin_Li1;~Yijin_Li1;~Xinyang_Liu5;~Zhaoyang_Huang2;~Hujun_Bao1", "aff": "Zhejiang University;Zhejiang University;Zhejiang University;Avolution AI;Zhejiang University;Avolution AI;Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn;zju.edu.cn;avolutionai.com;zju.edu.cn;avolutionai.com;zju.edu.cn", "position": "PhD student;Full Professor;MS student;Researcher;MS student;Researcher;Full Professor", "bibtex": "@inproceedings{\nni2024etoefficient,\ntitle={{ETO}:Efficient Transformer-based Local Feature Matching by Organizing Multiple Homography Hypotheses},\nauthor={Junjie Ni and Guofeng Zhang and Guanglin Li and Yijin Li and Xinyang Liu and Zhaoyang Huang and Hujun Bao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=3xHCaDdYcc}\n}", "github": "", "reviewers": "7KzZ;yFZw;aEzL;uPv2;LEiZ", "pdf_size": 2513500, "rating": "5;5;6;6;7", "confidence": "3;4;4;4;4", "soundness": "3;3;3;3;3", "novelty": "3;3;3;3;4", "presentation": "3;2;2;3;2", "wc_summary": "42;97;170;94;89", "wc_strengths": "48;13;59;7;83", "wc_weaknesses": "160;24;45;88;380", "wc_questions": "5;166;22;108;12", "wc_limitations": "9;43;41;2;6", "wc_review": "264;343;337;299;570", "wc_reply_reviewers": "64;48;49;301;37", "wc_reply_authors": "106;0;0;297;0", "reply_reviewers": "2;1;1;2;1", "reply_authors": "2;1;1;3;1", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 98.4, 41.02974530752049 ], "wc_strengths_avg": [ 42.0, 28.53769437077915 ], "wc_weaknesses_avg": [ 139.4, 128.98309966813483 ], "wc_questions_avg": [ 62.6, 63.6854771513883 ], "wc_limitations_avg": [ 20.2, 17.948816116947658 ], "wc_review_avg": [ 362.6, 107.53715636932195 ], "wc_reply_reviewers_avg": [ 99.8, 100.96613293575227 ], "wc_reply_authors_avg": [ 80.6, 115.72657430339844 ], "reply_reviewers_avg": [ 1.4, 0.4898979485566356 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.5345224838248487, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15671580567621274286&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "zju.edu.cn;zju.edu.cn;zju.edu.cn;avolutionai.com;zju.edu.cn;avolutionai.com;zju.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;1;0;1;0", "aff_unique_norm": "Zhejiang University;Avolution AI", "aff_unique_dep": ";", "aff_unique_url": "https://www.zju.edu.cn;", "aff_unique_abbr": "ZJU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;1;0", "aff_country_unique": "China;Unknown" }, { "id": "3zYmlmkIuK", "title": "Asynchronous Multi-Agent Reinforcement Learning with General Function Approximation", "track": "main", "status": "Reject", "tldr": "", "abstract": "We study multi-agent reinforcement learning (RL) where agents cooperate through asynchronous communications with a central server to learn a shared environment. Our first focus is on the case of multi-agent contextual bandits with general function approximation, for which we introduce the Async-NLin-UCB algorithm. This algorithm is proven to achieve a regret of $\\tilde{O} (\\sqrt{T \\dim_E(\\mathcal{F}) \\log N(\\mathcal{F})})$ and a communication complexity of $\\tilde{O} (M^2 \\dim_E(\\mathcal{F}))$, where $M$ is the total number of agents and $T$ is the number of rounds, while $\\dim_E(\\mathcal{F})$ and $N(\\mathcal{F})$ are the Eluder dimension and the covering number of function space $\\mathcal{F}$ respectively. We then progress to the more intricate framework of multi-agent RL with general function approximation, and present the Async-NLSVI-UCB algorithm. This algorithm enjoys a regret of $\\tilde{O} (H^2 \\sqrt{K \\dim_E(\\mathcal{F}) \\log N(\\mathcal{F})})$ and a communication complexity of $\\tilde{O} (H M^2 \\dim_E(\\mathcal{F}))$, where $H$ is the horizon length and $K$ the number of episodes. Our findings showcase the provable efficiency of both algorithms in fostering collaborative learning within nonlinear environments, and they achieve this with minimal communication overhead.", "keywords": "Reinforcement Learning;Markov Decision Processes;Contextual Bandits;Learning Theory;Federated Learning;Multi-agent;General Function Approximation;Machin Learning", "primary_area": "learning_theory", "supplementary_material": "", "author": "Yuanzhou Chen;Jiafan He;Quanquan Gu", "authorids": "~Yuanzhou_Chen1;~Jiafan_He1;~Quanquan_Gu1", "gender": "M;M;M", "homepage": ";https://sites.google.com/g.ucla.edu/jiafan-he-homepage;http://web.cs.ucla.edu/~qgu/", "dblp": "298/1088;214/5785;50/4597", "google_scholar": "mQ0FosEAAAAJ;F3AXNBwAAAAJ;GU9HgNAAAAAJ", "orcid": "0009-0003-0826-2697;;", "linkedin": "yuanzhou-chen-215032253/;;", "or_profile": "~Yuanzhou_Chen1;~Jiafan_He1;~Quanquan_Gu1", "aff": "University of California, Los Angeles;University of California, Los Angeles;University of California, Los Angeles", "aff_domain": "cs.ucla.edu;ucla.edu;cs.ucla.edu", "position": "PhD student;PhD student;Associate Professor", "bibtex": "@misc{\nanonymous2024asynchronous,\ntitle={Asynchronous Multi-Agent Reinforcement Learning with General Function Approximation},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=3zYmlmkIuK}\n}", "github": "", "project": "", "reviewers": "ijz1;56ui;39tB;HvUo", "site": "https://openreview.net/forum?id=3zYmlmkIuK", "pdf_size": 443581, "rating": "5;5;6;7", "confidence": "3;4;3;3", "soundness": "3;2;3;3", "novelty": "2;2;3;3", "presentation": "3;2;3;3", "wc_summary": "33;62;59;48", "wc_strengths": "10;36;58;87", "wc_weaknesses": "98;133;114;178", "wc_questions": "10;71;6;107", "wc_limitations": "1;5;1;18", "wc_review": "152;307;238;438", "wc_reply_reviewers": "16;151;10;97", "wc_reply_authors": "0;283;0;420", "reply_reviewers": "1;2;1;2", "reply_authors": "1;2;1;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 50.5, 11.368817000902073 ], "wc_strengths_avg": [ 47.75, 28.32291475113393 ], "wc_weaknesses_avg": [ 130.75, 29.961433543807612 ], "wc_questions_avg": [ 48.5, 42.47646407129482 ], "wc_limitations_avg": [ 6.25, 6.977642868476432 ], "wc_review_avg": [ 283.75, 104.62402926670336 ], "wc_reply_reviewers_avg": [ 68.5, 58.73031585135568 ], "wc_reply_authors_avg": [ 175.75, 182.30246158513603 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:8iTU-vWff9AJ:scholar.google.com/&scioq=Asynchronous+Multi-Agent+Reinforcement+Learning+with+General+Function+Approximation&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of California, Los Angeles", "aff_unique_dep": "", "aff_unique_url": "https://www.ucla.edu", "aff_unique_abbr": "UCLA", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Equivariant Neural Diffusion for Molecule Generation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96702", "id": "40pE5pFhWl", "proceeding": "", "pdf": "https://openreview.net/pdf?id=40pE5pFhWl", "openreview": "https://openreview.net/forum?id=40pE5pFhWl", "poster": "", "project": "", "author_site": "Fran\u00e7ois Cornet, Grigory Bartosh, Mikkel Schmidt, Christian Andersson Naesseth", "tldr": "", "abstract": "We introduce Equivariant Neural Diffusion (END), a novel diffusion model for molecule generation in 3D that is equivariant to Euclidean transformations. Compared to current state-of-the-art equivariant diffusion models, the key innovation in END lies in its learnable forward process for enhanced generative modelling. Rather than pre-specified, the forward process is parameterized through a time- and data-dependent transformation that is equivariant to rigid transformations. Through a series of experiments on standard molecule generation benchmarks, we demonstrate the competitive performance of END compared to several strong baselines for both unconditional and conditional generation.", "keywords": "Diffusion Models;Equivariant Neural Networks;Molecule Generation", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Fran\u00e7ois R J Cornet;Grigory Bartosh;Mikkel N. Schmidt;Christian A. Naesseth", "authorids": "~Fran\u00e7ois_R_J_Cornet1;~Grigory_Bartosh1;~Mikkel_N._Schmidt1;~Christian_A._Naesseth1", "gender": ";M;;", "homepage": ";;;", "dblp": ";;;", "google_scholar": ";;;", "orcid": ";;;", "linkedin": ";grigory-bartosh-76004a163/;;", "or_profile": "~Fran\u00e7ois_R_J_Cornet1;~Grigory_Bartosh1;~Mikkel_N._Schmidt1;~Christian_A._Naesseth1", "aff": ";University of Amsterdam;;", "aff_domain": ";uva.nl;;", "position": ";PhD student;;", "bibtex": "@inproceedings{\ncornet2024equivariant,\ntitle={Equivariant Neural Diffusion for Molecule Generation},\nauthor={Fran{\\c{c}}ois R J Cornet and Grigory Bartosh and Mikkel N. Schmidt and Christian A. Naesseth},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=40pE5pFhWl}\n}", "github": "", "reviewers": "hMME;5dRq;jNQ2;K86T", "pdf_size": 859057, "rating": "4;6;6;7", "confidence": "4;3;3;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "60;49;75;103", "wc_strengths": "39;27;37;195", "wc_weaknesses": "107;270;10;109", "wc_questions": "29;3;130;35", "wc_limitations": "12;1;7;102", "wc_review": "247;350;259;544", "wc_reply_reviewers": "118;56;0;0", "wc_reply_authors": "510;32;383;0", "reply_reviewers": "1;1;0;0", "reply_authors": "3;2;2;1", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 71.75, 20.26542622300355 ], "wc_strengths_avg": [ 74.5, 69.71907916775723 ], "wc_weaknesses_avg": [ 124.0, 93.30862768254606 ], "wc_questions_avg": [ 49.25, 48.14755964740061 ], "wc_limitations_avg": [ 30.5, 41.46383966783588 ], "wc_review_avg": [ 350.0, 118.8759858003289 ], "wc_reply_reviewers_avg": [ 43.5, 48.71088174114691 ], "wc_reply_authors_avg": [ 231.25, 220.17422078890164 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.2294157338705618, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15900393756719316076&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": ";uva.nl;;", "author_num": 4, "aff_unique_index": "0", "aff_unique_norm": "University of Amsterdam", "aff_unique_dep": "", "aff_unique_url": "https://www.uva.nl", "aff_unique_abbr": "UvA", "aff_country_unique_index": "0", "aff_country_unique": "Netherlands" }, { "title": "TableRAG: Million-Token Table Understanding with Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96701", "id": "41lovPOCo5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=41lovPOCo5", "openreview": "https://openreview.net/forum?id=41lovPOCo5", "poster": "", "project": "", "author_site": "Si-An Chen, Lesly Miculicich, Julian Eisenschlos, Zifeng Wang, Zilong Wang, Yanfei Chen, YASUHISA FUJII, Hsuan-Tien Lin, Chen-Yu Lee, Tomas Pfister", "tldr": "", "abstract": "Recent advancements in language models (LMs) have notably enhanced their ability to reason with tabular data, primarily through program-aided mechanisms that manipulate and analyze tables.\nHowever, these methods often require the entire table as input, leading to scalability challenges due to the positional bias or context length constraints.\nIn response to these challenges, we introduce TableRAG, a Retrieval-Augmented Generation (RAG) framework specifically designed for LM-based table understanding.\nTableRAG leverages query expansion combined with schema and cell retrieval to pinpoint crucial information before providing it to the LMs.\nThis enables more efficient data encoding and precise retrieval, significantly reducing prompt lengths and mitigating information loss.\nWe have developed two new million-token benchmarks from the Arcade and BIRD-SQL datasets to thoroughly evaluate TableRAG's effectiveness at scale.\nOur results demonstrate that TableRAG's retrieval design achieves the highest retrieval quality, leading to the new state-of-the-art performance on large-scale table understanding.", "keywords": "large language model;large scale;tabular reasoning;retrieval;LLM", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Si-An Chen;Lesly Miculicich;Julian Martin Eisenschlos;Zifeng Wang;Zilong Wang;Yanfei Chen;Yasuhisa Fujii;Hsuan-Tien Lin;Chen-Yu Lee;Tomas Pfister", "authorids": "~Si-An_Chen1;~Lesly_Miculicich1;~Julian_Martin_Eisenschlos1;~Zifeng_Wang1;~Zilong_Wang1;~Yanfei_Chen1;~Yasuhisa_Fujii1;~Hsuan-Tien_Lin1;~Chen-Yu_Lee2;~Tomas_Pfister1", "gender": "M;F;M;M;M;M;M;M;;M", "homepage": ";;https://eisenjulian.github.io/;https://kingspencer.github.io/;https://zilongwang.me;https://sites.google.com/site/yanfeichen1990;;http://www.csie.ntu.edu.tw/~htlin;https://chl260.github.io/;http://tomas.pfister.fi", "dblp": "207/7456;;262/3990;43/7716-2;42/898-2;58/8788;84/8914;10/3718;04/656;14/8360", "google_scholar": "https://scholar.google.com.tw/citations?user=XtkmEncAAAAJ;https://scholar.google.ch/citations?user=0JL8SrsAAAAJ;2uAC2NQAAAAJ;https://scholar.google.co.il/citations?user=N1uBekcAAAAJ;S_wQccsAAAAJ;qyua6O4AAAAJ;_o1nj0gAAAAJ;https://scholar.google.com.tw/citations?user=yAr4UPUAAAAJ;uWPUSEgAAAAJ;ahSpJOAAAAAJ", "orcid": "0000-0001-8734-9894;;;;0000-0002-1614-0943;;;;;0009-0004-4088-8718", "linkedin": "sianchen/;lesly-miculicich-werlen;eisenjulian/;zifeng-wang-21b069b4/;;;;;chenyulee260/;", "or_profile": "~Si-An_Chen1;~Lesly_Miculicich1;~Julian_Martin_Eisenschlos1;~Zifeng_Wang1;~Zilong_Wang1;~Yanfei_Chen1;~Yasuhisa_Fujii1;~Hsuan-Tien_Lin1;~Chen-Yu_Lee2;~Tomas_Pfister1", "aff": "National Taiwan University;Google;Universidad Nacional de C\u00f3rdoba;Google;University of California, San Diego;Google;Google;National Taiwan University;Google;Google", "aff_domain": "ntu.edu.tw;google.com;unc.edu.ar;google.com;ucsd.edu;google.com;google.com;ntu.edu.tw;google.com;google.com", "position": "PhD student;Researcher;PhD student;Research Scientist;PhD student;Software Engineer;RS;Full Professor;Research Scientist;Head of Research @ Cloud AI", "bibtex": "@inproceedings{\nchen2024tablerag,\ntitle={Table{RAG}: Million-Token Table Understanding with Language Models},\nauthor={Si-An Chen and Lesly Miculicich and Julian Martin Eisenschlos and Zifeng Wang and Zilong Wang and Yanfei Chen and Yasuhisa Fujii and Hsuan-Tien Lin and Chen-Yu Lee and Tomas Pfister},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=41lovPOCo5}\n}", "github": "", "reviewers": "yX7T;YK6m;aMG9;6sEk", "pdf_size": 1623507, "rating": "4;4;5;5", "confidence": "4;4;4;4", "soundness": "3;2;3;3", "novelty": "2;3;2;2", "presentation": "3;3;3;3", "wc_summary": "119;67;64;48", "wc_strengths": "105;41;43;22", "wc_weaknesses": "160;64;108;69", "wc_questions": "2;18;176;1", "wc_limitations": "2;6;87;1", "wc_review": "388;196;478;141", "wc_reply_reviewers": "0;26;38;18", "wc_reply_authors": "66;365;27;66", "reply_reviewers": "0;1;1;1", "reply_authors": "3;3;2;3", "rating_avg": [ 4.5, 0.5 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 74.5, 26.688012290165037 ], "wc_strengths_avg": [ 52.75, 31.259998400511794 ], "wc_weaknesses_avg": [ 100.25, 38.473204961375394 ], "wc_questions_avg": [ 49.25, 73.48936997960998 ], "wc_limitations_avg": [ 24.0, 36.42114770294862 ], "wc_review_avg": [ 300.75, 137.4070140131136 ], "wc_reply_reviewers_avg": [ 20.5, 13.811227316933133 ], "wc_reply_authors_avg": [ 131.0, 136.03492198696628 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1576531611285464569&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "ntu.edu.tw;google.com;unc.edu.ar;google.com;ucsd.edu;google.com;google.com;ntu.edu.tw;google.com;google.com", "author_num": 10, "aff_unique_index": "0;1;2;1;3;1;1;0;1;1", "aff_unique_norm": "National Taiwan University;Google;Universidad Nacional de C\u00f3rdoba;University of California, San Diego", "aff_unique_dep": ";Google;;", "aff_unique_url": "https://www.ntu.edu.tw;https://www.google.com;https://www.unc.edu.ar;https://www.ucsd.edu", "aff_unique_abbr": "NTU;Google;UNC;UCSD", "aff_campus_unique_index": "0;1;1;3;1;1;0;1;1", "aff_campus_unique": "Taiwan;Mountain View;;San Diego", "aff_country_unique_index": "0;1;2;1;1;1;1;0;1;1", "aff_country_unique": "China;United States;Argentina" }, { "title": "StackEval: Benchmarking LLMs in Coding Assistance", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97856", "id": "42mqpIrA39", "proceeding": "", "pdf": "https://openreview.net/pdf?id=42mqpIrA39", "openreview": "https://openreview.net/forum?id=42mqpIrA39", "poster": "", "project": "", "author_site": "Zulkuf Genc, Nidhish Shah, Dogu Araci", "tldr": "", "abstract": "We present two comprehensive benchmarks to evaluate the performance of language models in coding assistance tasks, covering code writing, debugging, code review, and conceptual understanding. Our main contribution includes two curated datasets: StackEval, a large-scale benchmark derived from Stack Overflow questions, and StackUnseen, a dynamic benchmark featuring the most recent Stack Overflow content. These benchmarks offer novel insights into the capabilities and limitations of LLMs, particularly in handling new and emerging content. Additionally, we assess LLMs' proficiency as judges for coding tasks using a curated, human-annotated dataset, exploring their evaluation capabilities and potential biases, including whether they favor their own generated solutions. Our findings underscore the potential of these benchmarks to advance LLM development and application in coding assistance. To ensure reproducibility, we publicly share our datasets and evaluation code at https://github.com/ProsusAI/stack-eval.", "keywords": "Large Language Models;Benchmark;Transparency in AI;Real Word Use-Cases;Evaluation Metrics", "primary_area": "", "supplementary_material": "/attachment/75cf6bc9fc010ca74813a236dacad3f9151e3529.zip", "author": "Nidhish Shah;Zulkuf Genc;Dogu Araci", "authorids": "~Nidhish_Shah1;~Zulkuf_Genc1;~Dogu_Araci1", "gender": ";;M", "homepage": ";;", "dblp": "394/7202;;", "google_scholar": ";;", "orcid": ";;", "linkedin": "nidhish-s-shah/;;dogutanaraci5522b5a3?original_referer=https://www.google.com/&originalSubdomain=de", "or_profile": "~Nidhish_Shah1;~Zulkuf_Genc1;~Dogu_Araci1", "aff": "Prosus;Prosus;Prosus", "aff_domain": "prosus.com;prosus.com;prosus.com", "position": "Researcher;Data Scientist;Data Scientist", "bibtex": "@inproceedings{\nshah2024stackeval,\ntitle={StackEval: Benchmarking {LLM}s in Coding Assistance},\nauthor={Nidhish Shah and Zulkuf Genc and Dogu Araci},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=42mqpIrA39}\n}", "github": "", "reviewers": "1ibX;nCkZ;KPZb", "pdf_size": 503351, "rating": "6;7;8", "confidence": "4;4;3", "wc_summary_and_contributions": "65;38;120", "wc_strengths": "24;16;87", "wc_improvement": "136;25;91", "wc_limitations": "163;54;133", "wc_correctness": "35;16;47", "wc_clarity": "6;7;47", "wc_relation_to_prior_work": "56;10;43", "wc_documentation": "25;15;45", "wc_additional_feedback": "1;1;1", "wc_review": "511;182;614", "wc_reply_reviewers": "0;14;0", "wc_reply_authors": "0;25;0", "reply_reviewers": "0;1;0", "reply_authors": "1;2;1", "rating_avg": [ 7.0, 0.816496580927726 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 74.33333333333333, 34.120700787384514 ], "wc_strengths_avg": [ 42.333333333333336, 31.752515210959622 ], "wc_improvement_avg": [ 84.0, 45.58508528016593 ], "wc_limitations_avg": [ 116.66666666666667, 45.97342227369587 ], "wc_correctness_avg": [ 32.666666666666664, 12.762793146051099 ], "wc_clarity_avg": [ 20.0, 19.096247449870006 ], "wc_relation_to_prior_work_avg": [ 36.333333333333336, 19.362047641943473 ], "wc_documentation_avg": [ 28.333333333333332, 12.472191289246473 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 435.6666666666667, 184.23234123127122 ], "wc_reply_reviewers_avg": [ 4.666666666666667, 6.599663291074443 ], "wc_reply_authors_avg": [ 8.333333333333334, 11.785113019775793 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15000317272967510705&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "prosus.com;prosus.com;prosus.com", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Prosus", "aff_unique_dep": "", "aff_unique_url": "https://www.prosus.com", "aff_unique_abbr": "Prosus", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Netherlands" }, { "title": "Beyond Aesthetics: Cultural Competence in Text-to-Image Models", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97855", "id": "4351SumKS9", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4351SumKS9", "openreview": "https://openreview.net/forum?id=4351SumKS9", "poster": "/media/PosterPDFs/NeurIPS%202024/97855.png?t=1733918913.3085425", "project": "", "author_site": "Nithish Kannen Senthilkumar, Arif Ahmad, Marco Andreetto, Vinodkumar Prabhakaran, Utsav Prabhu, Adji Bousso Dieng, Pushpak Bhattacharyya, Shachi Dave", "tldr": "", "abstract": "Text-to-Image (T2I) models are being increasingly adopted in diverse global communities where they create visual representations of their unique cultures. Current T2I benchmarks primarily focus on faithfulness, aesthetics, and realism of generated images, overlooking the critical dimension of *cultural competence*. In this work, we introduce a framework to evaluate cultural competence of T2I models along two crucial dimensions: cultural awareness and cultural diversity, and present a scalable approach using a combination of structured knowledge bases and large language models to build a large dataset of cultural artifacts to enable this evaluation. In particular, we apply this approach to build CUBE (CUltural BEnchmark for Text-to-Image models), a first-of-its-kind benchmark to evaluate cultural competence of T2I models. CUBE covers cultural artifacts associated with 8 countries across different geo-cultural regions and along 3 concepts: cuisine, landmarks, and art. CUBE consists of 1) CUBE-1K, a set of high-quality prompts that enable the evaluation of cultural awareness, and 2) CUBE-CSpace, a larger dataset of cultural artifacts that serves as grounding to evaluate cultural diversity. We also introduce cultural diversity as a novel T2I evaluation component, leveraging quality-weighted Vendi score. Our evaluations reveal significant gaps in the cultural awareness of existing models across countries and provide valuable insights into the cultural diversity of T2I outputs for underspecified prompts. Our methodology is extendable to other cultural regions and concepts and can facilitate the development of T2I models that better cater to the global population.", "keywords": "cultural diversity;text-to-image models;vendi score;cultural knowledge;knowledge base;self-refinement;inclusive AI", "primary_area": "", "supplementary_material": "/attachment/cea22c1d2fa937ae75b5553e97fae20b152cbf81.zip", "author": "Nithish Kannen;Arif Ahmad;marco Andreetto;Vinodkumar Prabhakaran;Utsav Prabhu;Adji Bousso Dieng;Pushpak Bhattacharyya;Shachi Dave", "authorids": "~Nithish_Kannen1;~Arif_Ahmad1;~marco_Andreetto1;~Vinodkumar_Prabhakaran2;~Utsav_Prabhu1;~Adji_Bousso_Dieng1;~Pushpak_Bhattacharyya1;~Shachi_Dave1", "gender": "M;M;M;M;;F;M;", "homepage": "https://nitkannen.github.io/;https://arifahmad-py.github.io/;http://www.vision.caltech.edu/marco/;https://www.cs.stanford.edu/~vinod/;;https://vertaix.princeton.edu/;https://www.cse.iitb.ac.in/~pb/;https://research.google/people/106762/", "dblp": ";;;64/9281;;188/6478;p/PushpakBhattacharyya;66/3545", "google_scholar": "nPQMsWMAAAAJ;;-U5l-M0AAAAJ;HwryFLcAAAAJ;;ZCniP_MAAAAJ;https://scholar.google.com.tw/citations?user=vvg-pAkAAAAJ;", "orcid": ";;;;;0000-0001-5687-3554;;", "linkedin": "nithish-kannen-7a7823177/;arif-ahmad-iitb/;;;;diengadji45;pushpakbh/?originalSubdomain=in;", "or_profile": "~Nithish_Kannen1;~Arif_Ahmad1;~marco_Andreetto1;~Vinodkumar_Prabhakaran2;~Utsav_Prabhu1;~Adji_Bousso_Dieng1;~Pushpak_Bhattacharyya1;~Shachi_Dave1", "aff": "Google Research;Indian Institute of Technology Bombay, Indian Institute of Technology, Bombay;Google;Google;;Princeton University;Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology;Research, Google", "aff_domain": "google.com;cse.iitb.ac.in;google.com;google.com;;princeton.edu;iitb.ac.in;research.google.com", "position": "Researcher;Undergrad student;Software Developer;Research Scientist;;Assistant Professor;Full Professor;Researcher", "bibtex": "@inproceedings{\nkannen2024beyond,\ntitle={Beyond Aesthetics: Cultural Competence in Text-to-Image Models},\nauthor={Nithish Kannen and Arif Ahmad and marco Andreetto and Vinodkumar Prabhakaran and Utsav Prabhu and Adji Bousso Dieng and Pushpak Bhattacharyya and Shachi Dave},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=4351SumKS9}\n}", "github": "", "reviewers": "rfu1;gYoi;ZPJ8;7DYQ", "pdf_size": 30973795, "rating": "3;6;8;9", "confidence": "4;3;4;4", "wc_summary_and_contributions": "36;53;78;66", "wc_strengths": "4;57;119;77", "wc_improvement": "4;68;99;67", "wc_limitations": "9;90;513;67", "wc_correctness": "7;14;34;8", "wc_clarity": "7;4;5;96", "wc_relation_to_prior_work": "7;10;12;30", "wc_documentation": "5;12;15;30", "wc_additional_feedback": "1;1;1;1", "wc_review": "80;309;876;442", "wc_reply_reviewers": "394;0;253;80", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "3;2;4;3", "rating_avg": [ 6.5, 2.29128784747792 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 58.25, 15.594470173750693 ], "wc_strengths_avg": [ 64.25, 41.360458169609295 ], "wc_improvement_avg": [ 59.5, 34.528973341239094 ], "wc_limitations_avg": [ 169.75, 200.36139223912375 ], "wc_correctness_avg": [ 15.75, 10.871407452579449 ], "wc_clarity_avg": [ 28.0, 39.274673773310965 ], "wc_relation_to_prior_work_avg": [ 14.75, 8.98262211161084 ], "wc_documentation_avg": [ 15.5, 9.12414379544733 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 426.75, 289.8959942807075 ], "wc_reply_reviewers_avg": [ 181.75, 152.8992723985304 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.0, 0.7071067811865476 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.1259881576697424, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16692642272858951866&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "google.com;cse.iitb.ac.in;google.com;google.com;;princeton.edu;iitb.ac.in;research.google.com", "author_num": 8, "aff_unique_index": "0;1;0;0;2;3;0", "aff_unique_norm": "Google;Indian Institute of Technology Bombay;Princeton University;Indian Institute of Technology, Bombay", "aff_unique_dep": "Google Research;;;", "aff_unique_url": "https://research.google;https://www.iitb.ac.in;https://www.princeton.edu;https://www.iitb.ac.in", "aff_unique_abbr": "Google Research;IIT Bombay;Princeton;IIT Bombay", "aff_campus_unique_index": "0;1;0;0;1;0", "aff_campus_unique": "Mountain View;Bombay;", "aff_country_unique_index": "0;1;0;0;0;1;0", "aff_country_unique": "United States;India" }, { "title": "OpenDebateEvidence: A Massive-Scale Argument Mining and Summarization Dataset", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97854", "id": "43s8hgGTOX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=43s8hgGTOX", "openreview": "https://openreview.net/forum?id=43s8hgGTOX", "poster": "/media/PosterPDFs/NeurIPS%202024/97854.png?t=1733458115.2012563", "project": "", "author_site": "Allen Roush, Yusuf Shabazz, Arvind Balaji, Peter Zhang, Stefano Mezza, Markus Zhang, Sanjay Basu, Sriram Vishwanath, Ravid Shwartz-Ziv", "tldr": "", "abstract": "We introduce OpenDebateEvidence, a comprehensive dataset for argument mining\nand summarization sourced from the American Competitive Debate community.\nThis dataset includes over 3.5 million documents with rich metadata, making it\none of the most extensive collections of debate evidence. OpenDebateEvidence\ncaptures the complexity of arguments in high school and college debates, pro-\nviding valuable resources for training and evaluation. Our extensive experiments\ndemonstrate the efficacy of fine-tuning state-of-the-art large language models for\nargumentative abstractive summarization across various methods, models, and\ndatasets. By providing this comprehensive resource, we aim to advance com-\nputational argumentation and support practical applications for debaters, edu-\ncators, and researchers. OpenDebateEvidence is publicly available to support\nfurther research and innovation in computational argumentation. Access it here:\nhttps://huggingface.co/datasets/Yusuf5/OpenCaselist.", "keywords": "Debate;Argument Mining;Policy Debate;Debate Evidence;Dataset;Datasets;Summarization;Extractive Summarization", "primary_area": "", "supplementary_material": "", "author": "Allen G Roush;Yusuf Shabazz;Arvind Balaji;Peter Zhang;Stefano Mezza;Markus Zhang;Sanjay Basu;Sriram Vishwanath;Ravid Shwartz-Ziv", "authorids": "~Allen_G_Roush1;~Yusuf_Shabazz1;~Arvind_Balaji1;~Peter_Zhang3;~Stefano_Mezza1;~Markus_Zhang1;~Sanjay_Basu1;~Sriram_Vishwanath2;~Ravid_Shwartz-Ziv2", "gender": "M;M;;M;M;;M;;M", "homepage": "https://github.com/Hellisotherpeople;https://github.com/D0ugins;;http://peterzha.ng/;https://github.com/stefanomezza;https://markusz.dev;;http://sriram.utlinc.org/#/;https://www.ravid-shwartz-ziv.com/", "dblp": "278/7861;;;;;;;71/2804;", "google_scholar": "uHozRV4AAAAJ;;;TyjRjlcAAAAJ;;;;https://scholar.google.com/citations?hl=en;https://scholar.google.co.il/citations?user=SqsLFwMAAAAJ", "orcid": ";;;0000-0002-8271-0107;;;;0000-0003-3112-4885;", "linkedin": "allen-roush-27721011b/;;arvind-balaji-445675186/;pjz/;;;sanjaybasu;;", "or_profile": "~Allen_G_Roush1;~Yusuf_Shabazz1;~Arvind_Balaji1;~Peter_Zhang3;~Stefano_Mezza1;~Markus_Zhang1;~Sanjay_Basu1;~Sriram_Vishwanath2;~ravid_ziv1", "aff": "Oracle;Howard Community College;Texas A&M University - College Station;University of California, Berkeley;University of New South Wales;Stanford University;;University of Texas at Austin;New York University", "aff_domain": "oracle.com;howardcc.edu;tamu.edu;berkeley.edu;unsw.edu.au;stanford.edu;;utexas.edu;nyu.edu", "position": "Researcher;Undergrad student;Undergrad student;MS student;PhD student;Undergrad student;;Full Professor;Postdoc", "bibtex": "@inproceedings{\nroush2024opendebateevidence,\ntitle={OpenDebateEvidence: A Massive-Scale Argument Mining and Summarization Dataset},\nauthor={Allen G Roush and Yusuf Shabazz and Arvind Balaji and Peter Zhang and Stefano Mezza and Markus Zhang and Sanjay Basu and Sriram Vishwanath and Ravid Shwartz-Ziv},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=43s8hgGTOX}\n}", "github": "", "reviewers": "ueg7;eFoA;rTyX;Qkct", "pdf_size": 445928, "rating": "7;7;7;7", "confidence": "4;4;4;3", "wc_summary_and_contributions": "44;122;233;88", "wc_strengths": "67;60;32;82", "wc_improvement": "71;376;33;13", "wc_limitations": "7;97;10;4", "wc_correctness": "13;89;3;1", "wc_clarity": "6;36;14;12", "wc_relation_to_prior_work": "6;29;12;1", "wc_documentation": "25;1;17;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "240;811;355;203", "wc_reply_reviewers": "86;145;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "2;1;0;0", "reply_authors": "4;4;3;3", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 121.75, 69.92987558976492 ], "wc_strengths_avg": [ 60.25, 18.14352501582865 ], "wc_improvement_avg": [ 123.25, 147.40484218640853 ], "wc_limitations_avg": [ 29.5, 39.02883549377306 ], "wc_correctness_avg": [ 26.5, 36.36963018783666 ], "wc_clarity_avg": [ 17.0, 11.357816691600547 ], "wc_relation_to_prior_work_avg": [ 12.0, 10.559356040971437 ], "wc_documentation_avg": [ 11.0, 10.392304845413264 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 402.25, 242.55656556770424 ], "wc_reply_reviewers_avg": [ 57.75, 61.401852577914944 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 3.5, 0.5 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3234557026022886031&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 3, "email": "oracle.com;howardcc.edu;tamu.edu;berkeley.edu;unsw.edu.au;stanford.edu;;utexas.edu;nyu.edu", "author_num": 9, "aff_unique_index": "0;1;2;3;4;5;6;7", "aff_unique_norm": "Oracle Corporation;Howard Community College;Texas A&M University;University of California, Berkeley;University of New South Wales;Stanford University;University of Texas at Austin;New York University", "aff_unique_dep": ";;;;;;;", "aff_unique_url": "https://www.oracle.com;https://www.howardcc.edu;https://www.tamu.edu;https://www.berkeley.edu;https://www.unsw.edu.au;https://www.stanford.edu;https://www.utexas.edu;https://www.nyu.edu", "aff_unique_abbr": "Oracle;HCC;TAMU;UC Berkeley;UNSW;Stanford;UT Austin;NYU", "aff_campus_unique_index": "1;2;3;4", "aff_campus_unique": ";College Station;Berkeley;Stanford;Austin", "aff_country_unique_index": "0;0;0;0;1;0;0;0", "aff_country_unique": "United States;Australia" }, { "title": "Boundary Matters: A Bi-Level Active Finetuning Method", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96700", "id": "444LAH3MhG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=444LAH3MhG", "openreview": "https://openreview.net/forum?id=444LAH3MhG", "poster": "/media/PosterPDFs/NeurIPS%202024/96700.png?t=1730889802.1774194", "project": "", "author_site": "Han Lu, Yichen Xie, Xiaokang Yang, Junchi Yan", "tldr": "", "abstract": "The pretraining-finetuning paradigm has gained widespread adoption in vision tasks and other fields. However, the finetuning phase still requires high-quality annotated samples. To overcome this challenge, the concept of active finetuning has emerged, aiming to select the most appropriate samples for model finetuning within a limited budget. Existing active learning methods struggle in this scenario due to their inherent bias in batch selection. Meanwhile, the recent active finetuning approach focuses solely on global distribution alignment but neglects the contributions of samples to local boundaries. Therefore, we propose a Bi-Level Active Finetuning framework (BiLAF) to select the samples for annotation in one shot, encompassing two stages: core sample selection for global diversity and boundary sample selection for local decision uncertainty. Without the need of ground-truth labels, our method can successfully identify pseudo-class centers, apply a novel denoising technique, and iteratively select boundary samples with designed evaluation metric. Extensive experiments provide qualitative and quantitative evidence of our method's superior efficacy, consistently outperforming the existing baselines.", "keywords": "Active Finetuning", "primary_area": "active_learning", "supplementary_material": "", "author": "Han Lu;Yichen Xie;Xiaokang Yang;Junchi Yan", "authorids": "~Han_Lu2;~Yichen_Xie1;~Xiaokang_Yang1;~Junchi_Yan2", "gender": "M;M;M;M", "homepage": ";;https://icne.sjtu.edu.cn/info/1064/1078.htm;http://thinklab.sjtu.edu.cn/", "dblp": ";;06/3071-1.html;60/7949.html", "google_scholar": "HESzE0UAAAAJ;SdX6DaEAAAAJ;yDEavdMAAAAJ;ga230VoAAAAJ", "orcid": ";;0000-0003-4029-3322;0000-0001-9639-7679", "linkedin": ";;;", "or_profile": "~Han_Lu2;~Yichen_Xie1;~Xiaokang_Yang1;~Junchi_Yan1", "aff": "Shanghai Jiaotong University;Waymo;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;waymo.com;sjtu.edu.cn;sjtu.edu.cn", "position": "PhD student;Intern;Full Professor;Full Professor", "bibtex": "@inproceedings{\nlu2024boundary,\ntitle={Boundary Matters: A Bi-Level Active Finetuning Method},\nauthor={Han Lu and Yichen Xie and Xiaokang Yang and Junchi Yan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=444LAH3MhG}\n}", "github": "", "reviewers": "8trC;V2Vc;vV4Z;2Ep4;kN8s", "pdf_size": 7559806, "rating": "5;5;6;6;7", "confidence": "4;3;3;4;4", "soundness": "3;3;3;3;2", "novelty": "3;2;2;3;3", "presentation": "3;3;3;3;2", "wc_summary": "83;68;129;76;47", "wc_strengths": "68;54;37;96;31", "wc_weaknesses": "259;240;33;145;230", "wc_questions": "3;152;111;141;4", "wc_limitations": "29;46;81;49;1", "wc_review": "442;560;391;507;313", "wc_reply_reviewers": "0;23;0;40;0", "wc_reply_authors": "132;344;120;174;469", "reply_reviewers": "0;1;0;1;0", "reply_authors": "3;4;2;3;4", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 80.6, 27.045147439050872 ], "wc_strengths_avg": [ 57.2, 23.352944139872385 ], "wc_weaknesses_avg": [ 181.4, 83.86322197483233 ], "wc_questions_avg": [ 82.2, 65.64571577795462 ], "wc_limitations_avg": [ 41.2, 26.20228997625971 ], "wc_review_avg": [ 442.6, 86.47450491329799 ], "wc_reply_reviewers_avg": [ 12.6, 16.341358572652396 ], "wc_reply_authors_avg": [ 247.8, 136.65489380186867 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 3.2, 0.7483314773547882 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.3273268353539886, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17009510377905661020&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": "sjtu.edu.cn;waymo.com;sjtu.edu.cn;sjtu.edu.cn", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Shanghai Jiao Tong University;Waymo", "aff_unique_dep": ";", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.waymo.com", "aff_unique_abbr": "SJTU;Waymo", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "China;United States" }, { "title": "Learning symmetries via weight-sharing with doubly stochastic tensors", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96699", "id": "44WWOW4GPF", "proceeding": "", "pdf": "https://openreview.net/pdf?id=44WWOW4GPF", "openreview": "https://openreview.net/forum?id=44WWOW4GPF", "poster": "", "project": "", "author_site": "Putri van der Linden, Alejandro Garc\u00eda-Castellanos, Sharvaree Vadgama, Thijs Kuipers, Erik Bekkers", "tldr": "", "abstract": "Group equivariance has emerged as a valuable inductive bias in deep learning, enhancing generalization, data efficiency, and robustness. Classically, group equivariant methods require the groups of interest to be known beforehand, which may not be realistic for real-world data. Additionally, baking in fixed group equivariance may impose overly restrictive constraints on model architecture. This highlights the need for methods that can dynamically discover and apply symmetries as soft constraints. For neural network architectures, equivariance is commonly achieved through group transformations of a canonical weight tensor, resulting in weight sharing over a given group $G$. In this work, we propose to *learn* such a weight-sharing scheme by defining a collection of learnable doubly stochastic matrices that act as soft permutation matrices on canonical weight tensors, which can take regular group representations as a special case. This yields learnable kernel transformations that are jointly optimized with downstream tasks. We show that when the dataset exhibits strong symmetries, the permutation matrices will converge to regular group representations and our weight-sharing networks effectively become regular group convolutions. Additionally, the flexibility of the method enables it to effectively pick up on partial symmetries.", "keywords": "symmetry discovery;weight-sharing", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Putri A Van der Linden;Alejandro Garc\u00eda Castellanos;Sharvaree Vadgama;Thijs P. Kuipers;Erik J Bekkers", "authorids": "~Putri_A_Van_der_Linden1;~Alejandro_Garc\u00eda_Castellanos1;~Sharvaree_Vadgama2;~Thijs_P._Kuipers1;~Erik_J_Bekkers1", "gender": "F;M;F;M;", "homepage": ";https://agarciacast.github.io/;https://amlab.science.uva.nl/people/SharvareeVadgama/;;https://erikbekkers.bitbucket.io/", "dblp": ";387/1668;306/1002;;43/5596", "google_scholar": ";dzdhlJwAAAAJ;https://scholar.google.com/citations?hl=en;rGfJZK8AAAAJ;https://scholar.google.nl/citations?user=yeWrfR4AAAAJ", "orcid": ";0009-0005-0516-1262;;0009-0007-7198-2856;", "linkedin": "putrivanderlinden/;;sharvaree-vadgama/;;", "or_profile": "~Putri_A_Van_der_Linden1;~Alejandro_Garc\u00eda_Castellanos1;~Sharvaree_Vadgama2;~Thijs_P._Kuipers1;~Erik_J_Bekkers1", "aff": "University of Amsterdam, University of Amsterdam;KTH Royal Institute of Technology;University of Amsterdam;University of Amsterdam;University of Amsterdam", "aff_domain": "ivi.uva.nl;kth.se;uva.nl;uva.nl;uva.nl", "position": "PhD student;Research Engineer;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nlinden2024learning,\ntitle={Learning symmetries via weight-sharing with doubly stochastic tensors},\nauthor={Putri A Van der Linden and Alejandro Garc{\\'\\i}a Castellanos and Sharvaree Vadgama and Thijs P. Kuipers and Erik J Bekkers},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=44WWOW4GPF}\n}", "github": "", "reviewers": "xU9K;uLBU;1myY;3Y5V", "pdf_size": 2244958, "rating": "3;4;6;7", "confidence": "3;5;4;4", "soundness": "3;3;2;3", "novelty": "2;2;3;4", "presentation": "3;2;2;3", "wc_summary": "63;225;215;46", "wc_strengths": "67;29;80;50", "wc_weaknesses": "181;126;919;106", "wc_questions": "23;69;165;77", "wc_limitations": "5;38;47;57", "wc_review": "339;487;1426;336", "wc_reply_reviewers": "323;0;397;61", "wc_reply_authors": "739;40;40;59", "reply_reviewers": "2;0;1;1", "reply_authors": "3;2;2;3", "rating_avg": [ 5.0, 1.5811388300841898 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 137.25, 83.04328690508342 ], "wc_strengths_avg": [ 56.5, 19.11151485361639 ], "wc_weaknesses_avg": [ 333.0, 339.43997996700386 ], "wc_questions_avg": [ 83.5, 51.36876482844414 ], "wc_limitations_avg": [ 36.75, 19.524023663169434 ], "wc_review_avg": [ 647.0, 453.87938926547434 ], "wc_reply_reviewers_avg": [ 195.25, 168.20281656381383 ], "wc_reply_authors_avg": [ 219.5, 300.03374810177604 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.223606797749979, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:TSoetgz8W3gJ:scholar.google.com/&scioq=Learning+symmetries+via+weight-sharing+with+doubly+stochastic+tensors&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "ivi.uva.nl;kth.se;uva.nl;uva.nl;uva.nl", "author_num": 5, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "University of Amsterdam;KTH Royal Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.uva.nl;https://www.kth.se", "aff_unique_abbr": "UvA;KTH", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "Netherlands;Sweden" }, { "title": "DCDepth: Progressive Monocular Depth Estimation in Discrete Cosine Domain", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96698", "id": "463TE4N8VJ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=463TE4N8VJ", "openreview": "https://openreview.net/forum?id=463TE4N8VJ", "poster": "/media/PosterPDFs/NeurIPS%202024/96698.png?t=1731992895.0685937", "project": "", "author_site": "Kun Wang, Zhiqiang Yan, Junkai Fan, Wanlu Zhu, Xiang Li, Jun Li, Jian Yang", "tldr": "", "abstract": "In this paper, we introduce DCDepth, a novel framework for the long-standing monocular depth estimation task. Moving beyond conventional pixel-wise depth estimation in the spatial domain, our approach estimates the frequency coefficients of depth patches after transforming them into the discrete cosine domain. This unique formulation allows for the modeling of local depth correlations within each patch. Crucially, the frequency transformation segregates the depth information into various frequency components, with low-frequency components encapsulating the core scene structure and high-frequency components detailing the finer aspects. This decomposition forms the basis of our progressive strategy, which begins with the prediction of low-frequency components to establish a global scene context, followed by successive refinement of local details through the prediction of higher-frequency components. We conduct comprehensive experiments on NYU-Depth-V2, TOFDC, and KITTI datasets, and demonstrate the state-of-the-art performance of DCDepth. Code is available at https://github.com/w2kun/DCDepth.", "keywords": "Monocular depth estimation;Discrete cosine transform;Deep learning", "primary_area": "machine_vision", "supplementary_material": "", "author": "Kun Wang;Zhiqiang Yan;Junkai Fan;Wanlu Zhu;Xiang Li;Jun Li;Jian Yang", "authorids": "~Kun_Wang12;~Zhiqiang_Yan1;~Junkai_Fan1;~Wanlu_Zhu1;~Xiang_Li20;~Jun_Li16;~Jian_Yang1", "gender": "M;M;M;;M;M;M", "homepage": ";https://yanzq95.github.io/;https://fanjunkai1.github.io/;;http://implus.github.io/;;", "dblp": ";;316/6492;241/4684;40/1491-41;;y/JianYang3.html", "google_scholar": "https://scholar.google.com.hk/citations?user=ORn7aZcAAAAJ;hnrkzIEAAAAJ;https://scholar.google.com/citations?hl=zh-CN;;oamjJdYAAAAJ;iGPEwQsAAAAJ;https://scholar.google.com.hk/citations?user=6CIDtZQAAAAJ", "orcid": "0000-0002-6390-2373;0000-0003-3502-438X;0009-0000-8162-6280;0009-0009-3230-3691;;;", "linkedin": ";;;;;;", "or_profile": "~Kun_Wang12;~Zhiqiang_Yan1;~Junkai_Fan1;~Wanlu_Zhu1;~Xiang_Li20;~Jun_Li16;~Jian_Yang1", "aff": "Nanjing University of Science and Technology;Nanjing University of Science and Technology;Nanjing University of Science and Technology;Nanjing University of Science and Technology;Nankai University;Nanjing University of Science and Technology;Nanjing University of Science and Technology", "aff_domain": "njust.edu.cn;njust.edu.cn;njust.edu.cn;njust.edu.cn;nankai.edu.cn;njust.edu.cn;njust.edu.cn", "position": "PhD student;PhD student;PhD student;PhD student;Associate Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nwang2024dcdepth,\ntitle={{DCD}epth: Progressive Monocular Depth Estimation in Discrete Cosine Domain},\nauthor={Kun Wang and Zhiqiang Yan and Junkai Fan and Wanlu Zhu and Xiang Li and Jun Li and Jian Yang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=463TE4N8VJ}\n}", "github": "", "reviewers": "fZWS;HHm4;DXhZ;tNUP", "pdf_size": 4645627, "rating": "5;5;6;7", "confidence": "4;5;4;4", "soundness": "3;3;3;4", "novelty": "3;2;2;3", "presentation": "3;3;2;4", "wc_summary": "40;55;62;52", "wc_strengths": "40;25;21;83", "wc_weaknesses": "61;42;315;81", "wc_questions": "3;69;3;5", "wc_limitations": "1;1;26;21", "wc_review": "145;192;427;242", "wc_reply_reviewers": "13;0;48;17", "wc_reply_authors": "71;64;119;10", "reply_reviewers": "1;0;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 52.25, 7.949056547792323 ], "wc_strengths_avg": [ 42.25, 24.57005290999594 ], "wc_weaknesses_avg": [ 124.75, 110.7031503616767 ], "wc_questions_avg": [ 20.0, 28.30194339616981 ], "wc_limitations_avg": [ 12.25, 11.388041973930374 ], "wc_review_avg": [ 251.5, 106.973127466668 ], "wc_reply_reviewers_avg": [ 19.5, 17.613914953808536 ], "wc_reply_authors_avg": [ 66.0, 38.6458277178792 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1298788812014435746&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "njust.edu.cn;njust.edu.cn;njust.edu.cn;njust.edu.cn;nankai.edu.cn;njust.edu.cn;njust.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;0;1;0;0", "aff_unique_norm": "Nanjing University of Science and Technology;Nankai University", "aff_unique_dep": ";", "aff_unique_url": "http://www.nust.edu.cn/;http://www.nankai.edu.cn", "aff_unique_abbr": "NUST;NKU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Recurrent neural networks: vanishing and exploding gradients are not the end of the story", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96697", "id": "46Jr4sgTWa", "proceeding": "", "pdf": "https://openreview.net/pdf?id=46Jr4sgTWa", "openreview": "https://openreview.net/forum?id=46Jr4sgTWa", "poster": "", "project": "", "author_site": "Nicolas Zucchet, Antonio Orvieto", "tldr": "", "abstract": "Recurrent neural networks (RNNs) notoriously struggle to learn long-term memories, primarily due to vanishing and exploding gradients. The recent success of state-space models (SSMs), a subclass of RNNs, to overcome such difficulties challenges our theoretical understanding. In this paper, we delve into the optimization challenges of RNNs and discover that, as the memory of a network increases, changes in its parameters result in increasingly large output variations, making gradient-based learning highly sensitive, even without exploding gradients. Our analysis further reveals the importance of the element-wise recurrence design pattern combined with careful parametrizations in mitigating this effect. This feature is present in SSMs, as well as in other architectures, such as LSTMs. Overall, our insights provide a new explanation for some of the difficulties in gradient-based learning of RNNs and why some architectures perform better than others.", "keywords": "RNN;optimization;long-range dependencies;SSM;LSTM", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Nicolas Zucchet;Antonio Orvieto", "authorids": "~Nicolas_Zucchet1;~Antonio_Orvieto3", "gender": "M;M", "homepage": ";http://orvi.altervista.org/", "dblp": "289/6252;", "google_scholar": "cLhZY44AAAAJ;xkuLyHoAAAAJ", "orcid": ";", "linkedin": "nicolas-zucchet-7a84a6139/;antonio-orvieto-947ab0130/", "or_profile": "~Nicolas_Zucchet1;~Antonio_Orvieto3", "aff": "ETHZ - ETH Zurich;ELLIS Institute T\u00fcbingen, Max Planck Institute for Intelligent Systems, T\u00fcbingen AI Center, T\u00fcbingen, Germany", "aff_domain": "ethz.ch;tue.ellis.eu", "position": "PhD student;Principal Researcher", "bibtex": "@inproceedings{\nzucchet2024recurrent,\ntitle={Recurrent neural networks: vanishing and exploding gradients are not the end of the story},\nauthor={Nicolas Zucchet and Antonio Orvieto},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=46Jr4sgTWa}\n}", "github": "", "reviewers": "uPdc;xzKN;Bq7Y;zt6Z;Bgq8", "pdf_size": 10778745, "rating": "4;5;5;5;7", "confidence": "3;4;5;4;4", "soundness": "3;2;3;3;3", "novelty": "2;2;3;3;4", "presentation": "2;2;3;3;3", "wc_summary": "139;51;112;63;102", "wc_strengths": "66;41;112;53;81", "wc_weaknesses": "282;206;358;47;206", "wc_questions": "37;222;23;550;123", "wc_limitations": "1;78;23;6;19", "wc_review": "525;598;628;719;531", "wc_reply_reviewers": "333;192;304;128;34", "wc_reply_authors": "157;208;159;209;16", "reply_reviewers": "2;1;2;1;1", "reply_authors": "2;2;3;2;2", "rating_avg": [ 5.2, 0.9797958971132712 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 93.4, 32.314702536152176 ], "wc_strengths_avg": [ 70.6, 24.613817257792423 ], "wc_weaknesses_avg": [ 219.8, 103.15890654713242 ], "wc_questions_avg": [ 191.0, 193.08340166881254 ], "wc_limitations_avg": [ 25.4, 27.514359887157106 ], "wc_review_avg": [ 600.2, 71.18258213917223 ], "wc_reply_reviewers_avg": [ 198.2, 110.71838149106047 ], "wc_reply_authors_avg": [ 149.8, 70.61274672465305 ], "reply_reviewers_avg": [ 1.4, 0.4898979485566356 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.3227486121839514, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13024499378596004645&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "ethz.ch;tue.ellis.eu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "ETH Zurich;ELLIS Institute T\u00fcbingen", "aff_unique_dep": ";", "aff_unique_url": "https://www.ethz.ch;", "aff_unique_abbr": "ETHZ;", "aff_campus_unique_index": "1", "aff_campus_unique": ";T\u00fcbingen", "aff_country_unique_index": "0;1", "aff_country_unique": "Switzerland;Germany" }, { "title": "AP-Adapter: Improving Generalization of Automatic Prompts on Unseen Text-to-Image Diffusion Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96696", "id": "46V9axmOuU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=46V9axmOuU", "openreview": "https://openreview.net/forum?id=46V9axmOuU", "poster": "/media/PosterPDFs/NeurIPS%202024/96696.png?t=1730719502.230483", "project": "", "author_site": "Yuchen Fu, Zhiwei Jiang, Yuliang Liu, Cong Wang, Zexuan Deng, Zhaoling Chen, Qing Gu", "tldr": "", "abstract": "Recent advancements in Automatic Prompt Optimization (APO) for text-to-image generation have streamlined user input while ensuring high-quality image output. However, most APO methods are trained assuming a fixed text-to-image model, which is impractical given the emergence of new models. To address this, we propose a novel task, model-generalized automatic prompt optimization (MGAPO), which trains APO methods on a set of known models to enable generalization to unseen models during testing. MGAPO presents significant challenges. First, we experimentally confirm the suboptimal performance of existing APO methods on unseen models. We then introduce a two-stage prompt optimization method, AP-Adapter. In the first stage, a large language model is used to rewrite the prompts. In the second stage, we propose a novel method to construct an enhanced representation space by leveraging inter-model differences. This space captures the characteristics of multiple domain models, storing them as domain prototypes. These prototypes serve as anchors to adjust prompt representations, enabling generalization to unseen models. The optimized prompt representations are subsequently used to generate conditional representations for controllable image generation. We curate a multi-modal, multi-model dataset that includes multiple diffusion models and their corresponding text-image data, and conduct experiments under a model generalization setting. The experimental results demonstrate the AP-Adapter's ability to enable the automatic prompts to generalize well to previously unseen diffusion models, generating high-quality images.", "keywords": "text-to-image generation;diffusion model;Automatic Prompt Optimization;large language model", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/ab0e73140af742618efe620f0abfbed080061929.zip", "author": "Yuchen Fu;Zhiwei Jiang;Yuliang Liu;Cong Wang;Zexuan Deng;Zhaoling Chen;Qing Gu", "authorids": "~Yuchen_Fu1;~Zhiwei_Jiang1;~Yuliang_Liu5;~Cong_Wang8;~Zexuan_Deng1;~Zhaoling_Chen1;~Qing_Gu1", "gender": "M;;M;M;M;;M", "homepage": "https://isetnju.github.io/;;;https://tenvence.github.io/;;;https://isetnju.github.io/guq/index.html", "dblp": ";;;18/2771-34;;;", "google_scholar": ";;rmxvSgQAAAAJ;h7EIOCUAAAAJ;WuJZez0AAAAJ;;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";;0000-0001-7165-4341;0000-0003-0916-7803;0000-0001-6531-6813;;", "linkedin": ";;;;;;", "or_profile": "~Yuchen_Fu1;~Zhiwei_Jiang1;~Yuliang_Liu5;~Cong_Wang8;~Zexuan_Deng1;~Zhaoling_Chen1;~Qing_Gu1", "aff": "Nanjing University;;Nanjing University;Tencent AI Lab;Nanjing University;;Nanjing University", "aff_domain": "nju.edu.cn;;nju.edu.cn;tencent.com;nju.edu.cn;;nju.edu.cn", "position": "PhD student;;MS student;Intern;MS student;;Full Professor", "bibtex": "@inproceedings{\nfu2024apadapter,\ntitle={{AP}-Adapter: Improving Generalization of Automatic Prompts on Unseen Text-to-Image Diffusion Models},\nauthor={Yuchen Fu and Zhiwei Jiang and Yuliang Liu and Cong Wang and Zexuan Deng and Zhaoling Chen and Qing Gu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=46V9axmOuU}\n}", "github": "", "reviewers": "redB;fgUM;W76C", "pdf_size": 16703504, "rating": "5;6;6", "confidence": "3;4;4", "soundness": "3;2;2", "novelty": "2;3;2", "presentation": "2;3;2", "wc_summary": "47;100;118", "wc_strengths": "49;29;29", "wc_weaknesses": "279;122;272", "wc_questions": "6;2;5", "wc_limitations": "14;50;6", "wc_review": "395;303;430", "wc_reply_reviewers": "179;20;305", "wc_reply_authors": "263;11;917", "reply_reviewers": "2;1;3", "reply_authors": "2;2;4", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 88.33333333333333, 30.13672547278855 ], "wc_strengths_avg": [ 35.666666666666664, 9.428090415820632 ], "wc_weaknesses_avg": [ 224.33333333333334, 72.41700230071818 ], "wc_questions_avg": [ 4.333333333333333, 1.699673171197595 ], "wc_limitations_avg": [ 23.333333333333332, 19.136933459209764 ], "wc_review_avg": [ 376.0, 53.559935275041795 ], "wc_reply_reviewers_avg": [ 168.0, 116.61046265237096 ], "wc_reply_authors_avg": [ 397.0, 381.81670995387304 ], "reply_reviewers_avg": [ 2.0, 0.816496580927726 ], "reply_authors_avg": [ 2.6666666666666665, 0.9428090415820634 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.9999999999999997, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ktBlTnWX-1UJ:scholar.google.com/&scioq=AP-Adapter:+Improving+Generalization+of+Automatic+Prompts+on+Unseen+Text-to-Image+Diffusion+Models&hl=en&as_sdt=0,23", "gs_version_total": 0, "email": "nju.edu.cn;;nju.edu.cn;tencent.com;nju.edu.cn;;nju.edu.cn", "author_num": 7, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Nanjing University;Tencent", "aff_unique_dep": ";Tencent AI Lab", "aff_unique_url": "https://www.nju.edu.cn;https://ai.tencent.com", "aff_unique_abbr": "Nanjing U;Tencent AI Lab", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "AsyncDiff: Parallelizing Diffusion Models by Asynchronous Denoising", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96695", "id": "46jtDC6gXu", "proceeding": "", "pdf": "https://openreview.net/pdf?id=46jtDC6gXu", "openreview": "https://openreview.net/forum?id=46jtDC6gXu", "poster": "", "project": "", "author_site": "Zigeng Chen, Xinyin Ma, Gongfan Fang, Zhenxiong Tan, Xinchao Wang", "tldr": "", "abstract": "Diffusion models have garnered significant interest from the community for their great generative ability across various applications. However, their typical multi-step sequential-denoising nature gives rise to high cumulative latency, thereby precluding the possibilities of parallel computation. To address this, we introduce AsyncDiff, a universal and plug-and-play acceleration scheme that enables model parallelism across multiple devices. Our approach divides the cumbersome noise prediction model into multiple components, assigning each to a different device. To break the dependency chain between these components, it transforms the conventional sequential denoising into an asynchronous process by exploiting the high similarity between hidden states in consecutive diffusion steps. Consequently, each component is facilitated to compute in parallel on separate devices. The proposed strategy significantly reduces inference latency while minimally impacting the generative quality. Specifically, for the Stable Diffusion v2.1, AsyncDiff achieves a 2.7x speedup with negligible degradation and a 4.0x speedup with only a slight reduction of 0.38 in CLIP Score, on four NVIDIA A5000 GPUs. Our experiments also demonstrate AsyncDiff can be readily applied to video diffusion models with encouraging performances.", "keywords": "Diffusion Model;Inference Acceleration", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/c7dea7f48eeb3d917e357ae0bcf87f88b4264422.zip", "author": "Zigeng Chen;Xinyin Ma;Gongfan Fang;Zhenxiong Tan;Xinchao Wang", "authorids": "~Zigeng_Chen1;~Xinyin_Ma1;~Gongfan_Fang2;~Zhenxiong_Tan1;~Xinchao_Wang1", "gender": "M;F;M;M;M", "homepage": "https://czg1225.github.io/chenzigeng99/;https://horseee.github.io;https://fangggf.github.io/;https://github.com/Yuanshi9815;https://sites.google.com/site/sitexinchaowang/", "dblp": ";267/2244;243/5768;255/5472;", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;jFUKS0oAAAAJ;489YZ_kAAAAJ;;https://scholar.google.com.tw/citations?user=w69Buq0AAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Zigeng_Chen1;~Xinyin_Ma1;~Gongfan_Fang2;~Zhenxiong_Tan1;~Xinchao_WANG3", "aff": "National University of Singapore;National University of Singapore;National University of Singapore;National University of Singapore;National University of Singapore", "aff_domain": "nus.edu.sg;u.nus.edu;u.nus.edu;nus.edu;nus.edu", "position": "Researcher;PhD student;PhD student;MS student;Assistant Professor", "bibtex": "@inproceedings{\nchen2024asyncdiff,\ntitle={AsyncDiff: Parallelizing Diffusion Models by Asynchronous Denoising},\nauthor={Zigeng Chen and Xinyin Ma and Gongfan Fang and Zhenxiong Tan and Xinchao Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=46jtDC6gXu}\n}", "github": "", "reviewers": "773c;E7zL;7eQm;iHdZ", "pdf_size": 16170454, "rating": "6;6;7;7", "confidence": "4;3;4;5", "soundness": "3;4;4;3", "novelty": "3;3;4;4", "presentation": "4;3;3;4", "wc_summary": "114;66;84;87", "wc_strengths": "102;40;80;82", "wc_weaknesses": "195;51;103;103", "wc_questions": "71;9;19;39", "wc_limitations": "19;30;12;1", "wc_review": "501;196;298;312", "wc_reply_reviewers": "51;21;0;20", "wc_reply_authors": "16;18;0;21", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;1;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 87.75, 17.151894939043906 ], "wc_strengths_avg": [ 76.0, 22.494443758403985 ], "wc_weaknesses_avg": [ 113.0, 51.884487084291386 ], "wc_questions_avg": [ 34.5, 23.680160472429236 ], "wc_limitations_avg": [ 15.5, 10.547511554864494 ], "wc_review_avg": [ 326.75, 110.11669946016363 ], "wc_reply_reviewers_avg": [ 23.0, 18.207141456033124 ], "wc_reply_authors_avg": [ 13.75, 8.13557004763649 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.7071067811865475, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2754776641866062558&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "nus.edu.sg;u.nus.edu;u.nus.edu;nus.edu;nus.edu", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "National University of Singapore", "aff_unique_dep": "", "aff_unique_url": "https://www.nus.edu.sg", "aff_unique_abbr": "NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Singapore" }, { "title": "COVE: Unleashing the Diffusion Feature Correspondence for Consistent Video Editing", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96694", "id": "474M9aeI4U", "proceeding": "", "pdf": "https://openreview.net/pdf?id=474M9aeI4U", "openreview": "https://openreview.net/forum?id=474M9aeI4U", "poster": "", "project": "", "author_site": "Jiangshan Wang, Yue Ma, Jiayi Guo, Yicheng Xiao, Gao Huang, Xiu Li", "tldr": "", "abstract": "Video editing is an emerging task, in which most current methods adopt the pre-trained text-to-image (T2I) diffusion model to edit the source video in a zero-shot manner. Despite extensive efforts, maintaining the temporal consistency of edited videos remains challenging due to the lack of temporal constraints in the regular T2I diffusion model. To address this issue, we propose COrrespondence-guided Video Editing (COVE), leveraging the inherent diffusion feature correspondence to achieve high-quality and consistent video editing. Specifically, we propose an efficient sliding-window-based strategy to calculate the similarity among tokens in the diffusion features of source videos, identifying the tokens with high correspondence across frames. During the inversion and denoising process, we sample the tokens in noisy latent based on the correspondence and then perform self-attention within them. To save the usage of GPU memory and accelerate the editing process, we further introduce the temporal-dimensional token merging strategy, which can effectively reduce the redundancy. COVE can be seamlessly integrated into the pre-trained T2I diffusion model without the need for extra training or optimization. Extensive experiment results demonstrate that COVE achieves the start-of-the-art performance in various video editing scenarios, outperforming existing methods both quantitatively and qualitatively. The source code will be released.", "keywords": "Diffusion model; Video editing", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/80cefdcfda7a168358ed7822ce5739d45a5a311f.zip", "author": "Jiangshan Wang;Yue Ma;Jiayi Guo;Yicheng Xiao;Gao Huang;Xiu Li", "authorids": "~Jiangshan_Wang2;~Yue_Ma2;~Jiayi_Guo2;~Yicheng_Xiao1;~Gao_Huang1;~Xiu_Li1", "gender": "M;M;M;M;F;M", "homepage": "https://github.com/wangjiangshan0725;https://jiayiguo821.github.io/;;http://www.gaohuang.net;https://thusigsiclab.github.io/thu.github.io/introduction.html;https://mayuelala.github.io/", "dblp": "124/2780.html;;322/9380;;13/1206-1;", "google_scholar": ";2p6GCEEAAAAJ;oakZP0cAAAAJ;-P9LwcgAAAAJ;https://scholar.google.com/citations?hl=zh-CN;kwBR1ygAAAAJ", "orcid": "0009-0004-9314-021X;;;;0000-0003-0403-1923;", "linkedin": ";;;;;", "or_profile": "~Jiangshan_Wang2;~Jiayi_Guo2;~Yicheng_Xiao1;~Gao_Huang1;~Xiu_Li1;~ma_yue1", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "mail.tsinghua.edu.cn;tsinghua.edu.cn;mail.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": "MS student;PhD student;MS student;Associate Professor;Professor;MS student", "bibtex": "@inproceedings{\nwang2024cove,\ntitle={{COVE}: Unleashing the Diffusion Feature Correspondence for Consistent Video Editing},\nauthor={Jiangshan Wang and Yue Ma and Jiayi Guo and Yicheng Xiao and Gao Huang and Xiu Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=474M9aeI4U}\n}", "github": "", "reviewers": "sLJK;nziD;LETZ;Ht4d", "pdf_size": 14317222, "rating": "4;5;6;7", "confidence": "4;5;4;4", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "2;3;3;3", "wc_summary": "88;76;49;108", "wc_strengths": "24;28;50;63", "wc_weaknesses": "250;118;238;266", "wc_questions": "2;34;45;8", "wc_limitations": "1;3;1;11", "wc_review": "365;259;383;456", "wc_reply_reviewers": "0;0;65;33", "wc_reply_authors": "107;72;0;0", "reply_reviewers": "0;0;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 80.25, 21.358546298847212 ], "wc_strengths_avg": [ 41.25, 15.990231392947383 ], "wc_weaknesses_avg": [ 218.0, 58.58327406350724 ], "wc_questions_avg": [ 22.25, 17.80975856096876 ], "wc_limitations_avg": [ 4.0, 4.123105625617661 ], "wc_review_avg": [ 365.75, 70.42504881077471 ], "wc_reply_reviewers_avg": [ 24.5, 26.98610753702727 ], "wc_reply_authors_avg": [ 44.75, 46.42938186105863 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.2581988897471611, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17777094873432445101&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "mail.tsinghua.edu.cn;tsinghua.edu.cn;mail.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "476zUsqFZB", "title": "PMechRP: Interpretable Deep Learning for Polar Reaction Prediction", "track": "main", "status": "Reject", "tldr": "", "abstract": "In recent years, machine learning based methods for chemical reaction prediction have garnered significant interest due to the time consuming and resource intensive nature of designing synthetic pathways. However, with the majority of models being trained on the US Patent Office dataset, many proposed architectures lack interpretability by modeling chemical reactions as overall transformations. These models map directly from reactants to products, and provide minimal insight into the underlying driving forces of a reaction. In order to improve interpretrability and provide insight into the causality of a chemical reaction, we train various machine learning frameworks on the PMechDB dataset. This dataset contains polar elementary steps, which model chemical reactions as a sequence of steps associated with movements of electrons. Through training on PMechDB, we have created a new system for polar mechanistic reaction prediction: PMechRP. Our findings indicate that PMechRP is able to provide both accurate and interpretrable predictions, with a novel two-step transformer based method achieving the highest top-5 accuracy at 89.9%.", "keywords": "chemistry;deep learning;interpretable;transformers;reaction prediction;mechanisms", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "", "author": "Ryan J Miller;Brayden Rudisill;Pierre Baldi;David Van Vranken", "authorids": "~Ryan_J_Miller1;~Brayden_Rudisill1;~Pierre_Baldi1;~David_Van_Vranken1", "gender": "M;M;;M", "homepage": ";;;", "dblp": ";;;", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;;;", "orcid": ";;;0000-0001-5964-7042", "linkedin": "rjmiller1330/;braydenrudisill;;", "or_profile": "~Ryan_J_Miller1;~Brayden_Rudisill1;~Pierre_Baldi1;~David_Van_Vranken1", "aff": "University of California, Irvine;University of California, Irvine;;", "aff_domain": "uci.edu;uci.edu;;", "position": "PhD student;Undergrad student;;", "bibtex": "@misc{\nanonymous2024pmechrp,\ntitle={{PM}ech{RP}: Interpretable Deep Learning for Polar Reaction Prediction},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=476zUsqFZB}\n}", "github": "", "project": "", "reviewers": "uk94;cMjN;HcXf;1kkN", "site": "https://openreview.net/forum?id=476zUsqFZB", "pdf_size": 350316, "rating": "1;3;3;7", "confidence": "5;4;5;5", "soundness": "3;2;2;3", "novelty": "3;1;1;3", "presentation": "3;1;2;2", "wc_summary": "41;58;57;62", "wc_strengths": "23;23;33;32", "wc_weaknesses": "34;183;211;61", "wc_questions": "1;31;39;84", "wc_limitations": "1;10;22;1", "wc_review": "100;305;362;240", "wc_reply_reviewers": "0;0;0;54", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 3.5, 2.179449471770337 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.0, 1.0 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 54.5, 8.0156097709407 ], "wc_strengths_avg": [ 27.75, 4.763139720814412 ], "wc_weaknesses_avg": [ 122.25, 76.00452289173323 ], "wc_questions_avg": [ 38.75, 29.71847068743612 ], "wc_limitations_avg": [ 8.5, 8.616843969807043 ], "wc_review_avg": [ 251.75, 97.6687641981816 ], "wc_reply_reviewers_avg": [ 13.5, 23.382685902179844 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.13245323570650439, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Iy1WTBnQGTUJ:scholar.google.com/&scioq=PMechRP:+Interpretable+Deep+Learning+for+Polar+Reaction+Prediction&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "University of California, Irvine", "aff_unique_dep": "", "aff_unique_url": "https://www.uci.edu", "aff_unique_abbr": "UCI", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Irvine", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "47CdPNiWUB", "title": "Mitigating the Impact of Labeling Errors on Training via Rockafellian Relaxation", "track": "main", "status": "Reject", "tldr": "", "abstract": "Labeling errors in datasets are common, if not systematic, in practice. They naturally arise in a variety of contexts\u2014human labeling, noisy labeling, and weak labeling (i.e., image classification), for example. This presents a persistent and pervasive stress on machine learning practice. In particular, neural network (NN) architectures can withstand minor amounts of dataset imperfection with traditional countermeasures such as regularization, data augmentation, and batch normalization. However, major dataset imperfections often prove insurmountable. We propose and study the implementation of Rockafellian Relaxation (RR), a new loss re-weighting, architecture-independent methodology, for neural network training. Experiments indicate RR can enhance standard neural network methods to achieve robust performance across classification tasks in computer vision and natural language processing (sentiment analysis). We find that RR can mitigate the effects of dataset corruption due to both (heavy) labeling error and/or adversarial perturbation, demonstrating effectiveness across a variety of data domains and machine learning tasks.", "keywords": "loss reweighting;labeling errors;adversarial training;neural networks", "primary_area": "evaluation", "supplementary_material": "", "author": "Louis Chen;Bobbie Chern;Eric Eckstrand;Amogh Mahapatra;Johannes O. Royset", "authorids": "~Louis_Chen1;~Bobbie_Chern1;~Eric_Eckstrand1;~Amogh_Mahapatra1;~Johannes_O._Royset1", "gender": "M;M;M;M;", "homepage": "https://louislchen.github.io/;;;https://amoghmahapatra.org/;", "dblp": ";;;;", "google_scholar": ";;;EtdobO4AAAAJ;vdegcWUAAAAJ", "orcid": "0000-0001-5311-7294;;0009-0006-2932-8458;;", "linkedin": ";;;;", "or_profile": "~Louis_Chen1;~Bobbie_Chern1;~Eric_Eckstrand1;~Amogh_Mahapatra1;~Johannes_O._Royset1", "aff": "Naval Postgraduate School;;Naval Postgraduate School;Meta Facebook;University of Southern California", "aff_domain": "nps.edu;;nps.edu;meta.com;usc.edu", "position": "Assistant Professor;;Researcher;Researcher;Full Professor", "bibtex": "@misc{\nanonymous2024mitigating,\ntitle={Mitigating the Impact of Labeling Errors on Training via Rockafellian Relaxation},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=47CdPNiWUB}\n}", "github": "", "project": "", "reviewers": "yNxu;TFmX;UZpK", "site": "https://openreview.net/forum?id=47CdPNiWUB", "pdf_size": 272731, "rating": "5;5;5", "confidence": "3;3;4", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;2;2", "wc_summary": "106;149;68", "wc_strengths": "98;71;75", "wc_weaknesses": "19;105;95", "wc_questions": "19;133;102", "wc_limitations": "75;47;6", "wc_review": "317;505;346", "wc_reply_reviewers": "0;57;49", "wc_reply_authors": "0;190;0", "reply_reviewers": "0;1;1", "reply_authors": "1;2;1", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 107.66666666666667, 33.089105289942324 ], "wc_strengths_avg": [ 81.33333333333333, 11.897712198383164 ], "wc_weaknesses_avg": [ 73.0, 38.40138886377245 ], "wc_questions_avg": [ 84.66666666666667, 48.12714641678044 ], "wc_limitations_avg": [ 42.666666666666664, 28.33529404980455 ], "wc_review_avg": [ 389.3333333333333, 82.64112508645775 ], "wc_reply_reviewers_avg": [ 35.333333333333336, 25.197001585285676 ], "wc_reply_authors_avg": [ 63.333333333333336, 89.56685895029602 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:5vDYXry0agQJ:scholar.google.com/&scioq=Mitigating+the+Impact+of+Labeling+Errors+on+Training+via+Rockafellian+Relaxation&hl=en&as_sdt=0,5", "gs_version_total": 3, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Naval Postgraduate School;Meta;University of Southern California", "aff_unique_dep": ";Meta Platforms, Inc.;", "aff_unique_url": "https://www.nps.edu;https://meta.com;https://www.usc.edu", "aff_unique_abbr": "NPS;Meta;USC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "E2E-MFD: Towards End-to-End Synchronous Multimodal Fusion Detection", "status": "Oral", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96693", "id": "47loYmzxep", "proceeding": "", "pdf": "https://openreview.net/pdf?id=47loYmzxep", "openreview": "https://openreview.net/forum?id=47loYmzxep", "poster": "/media/PosterPDFs/NeurIPS%202024/96693.png?t=1730268618.4001482", "project": "", "author_site": "Jiaqing Zhang, Mingxiang Cao, Weiying Xie, Jie Lei, Daixun Li, Wenbo Huang, Yunsong Li, Xue Yang", "tldr": "", "abstract": "Multimodal image fusion and object detection are crucial for autonomous driving. While current methods have advanced the fusion of texture details and semantic information, their complex training processes hinder broader applications. Addressing this challenge, we introduce E2E-MFD, a novel end-to-end algorithm for multimodal fusion detection. E2E-MFD streamlines the process, achieving high performance with a single training phase. It employs synchronous joint optimization across components to avoid suboptimal solutions associated to individual tasks. Furthermore, it implements a comprehensive optimization strategy in the gradient matrix for shared parameters, ensuring convergence to an optimal fusion detection configuration. Our extensive testing on multiple public datasets reveals E2E-MFD's superior capabilities, showcasing not only visually appealing image fusion but also impressive detection outcomes, such as a 3.9\\% and 2.0\\% $\\text{mAP}_{50}$ increase on horizontal object detection dataset M3FD and oriented object detection dataset DroneVehicle, respectively, compared to state-of-the-art approaches.", "keywords": "Multimodal Fusion;Object detection", "primary_area": "machine_vision", "supplementary_material": "/attachment/f830e85e4909eb2cacd5a5915baf9e21b59d75f1.zip", "author": "Jiaqing Zhang;Mingxiang Cao;Weiying Xie;Jie Lei;DaixunLi;Wenbo Huang;Yunsong Li;Xue Yang", "authorids": "~Jiaqing_Zhang1;~Mingxiang_Cao1;~Weiying_Xie1;~Jie_Lei5;~DaixunLi1;~Wenbo_Huang1;~Yunsong_Li1;~Xue_Yang2", "gender": "F;M;F;M;M;M;M;M", "homepage": ";https://scholar.google.com/citations?user=codYwikAAAAJ&hl=zh-CN;https://web.xidian.edu.cn/wyxie/;;https://scholar.google.com/citations?user=gaiP4-IAAAAJ&hl=zh-CN;https://wenbohuang1002.github.io/;https://web.xidian.edu.cn/ysli/;https://yangxue.site/", "dblp": ";366/0598;/150/3937.html;61/5501-1;361/0252;125/9700-1;;13/1779-5", "google_scholar": "6VsVHrAAAAAJ;codYwikAAAAJ;y0ha5lMAAAAJ;GinaT0wAAAAJ;gaiP4-IAAAAJ;yFBDPqcAAAAJ;aY_2RzkAAAAJ;2xTlvV0AAAAJ", "orcid": ";0009-0000-1892-3645;;;;0000-0002-6664-1172;;0000-0002-7084-9101", "linkedin": ";;;;;;;", "or_profile": "~Jiaqing_Zhang1;~Mingxiang_Cao1;~Weiying_Xie1;~Jie_Lei5;~DaixunLi1;~Wenbo_Huang1;~Yunsong_Li1;~Xue_Yang2", "aff": "Xi'an University;State Key Laboratory of Integrated Services Networks;Xidian University;Xi'an University of Electronic Science and Technology;State Key Laboratory of Integrated Services Networks;Southeast University;Xidian University ;Shanghai AI Laboratory", "aff_domain": "xidian.edu.cn;stu.xidian.edu.cn;xidian.edu.cn;xidian.edu.cn;stu.xidian.edu.cn;seu.edu.cn;xidian.edu.cn;pjlab.org.cn", "position": "PhD student;MS student;Full Professor;Full Professor;PhD student;PhD student;Full Professor;Researcher", "bibtex": "@inproceedings{\nzhang2024eemfd,\ntitle={E2E-{MFD}: Towards End-to-End Synchronous Multimodal Fusion Detection},\nauthor={Jiaqing Zhang and Mingxiang Cao and Weiying Xie and Jie Lei and DaixunLi and Wenbo Huang and Yunsong Li and Xue Yang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=47loYmzxep}\n}", "github": "", "reviewers": "bix9;cEzg;6qWk;G1Ro", "pdf_size": 14155464, "rating": "4;8;8;9", "confidence": "5;5;5;3", "soundness": "2;4;4;4", "novelty": "2;4;4;4", "presentation": "3;4;4;4", "wc_summary": "23;131;77;27", "wc_strengths": "26;100;106;29", "wc_weaknesses": "165;49;86;167", "wc_questions": "3;67;3;1", "wc_limitations": "3;30;45;2", "wc_review": "220;377;317;226", "wc_reply_reviewers": "0;30;22;12", "wc_reply_authors": "45;27;11;50", "reply_reviewers": "0;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 7.25, 1.920286436967152 ], "confidence_avg": [ 4.5, 0.8660254037844386 ], "soundness_avg": [ 3.5, 0.8660254037844386 ], "novelty_avg": [ 3.5, 0.8660254037844386 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 64.5, 43.894760507377185 ], "wc_strengths_avg": [ 65.25, 37.82442993621979 ], "wc_weaknesses_avg": [ 116.75, 50.962608842169765 ], "wc_questions_avg": [ 18.5, 28.01338965566288 ], "wc_limitations_avg": [ 20.0, 18.289341158171883 ], "wc_review_avg": [ 285.0, 65.56294685262401 ], "wc_reply_reviewers_avg": [ 16.0, 11.224972160321824 ], "wc_reply_authors_avg": [ 33.25, 15.433324334050653 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.5261522196019801, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17238565064987572642&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "xidian.edu.cn;stu.xidian.edu.cn;xidian.edu.cn;xidian.edu.cn;stu.xidian.edu.cn;seu.edu.cn;xidian.edu.cn;pjlab.org.cn", "author_num": 8, "aff_unique_index": "0;1;2;3;1;4;2;5", "aff_unique_norm": "Xi'an University;State Key Laboratory of Integrated Services Networks;Xidian University;Xi'an University of Electronic Science and Technology;Southeast University;Shanghai AI Laboratory", "aff_unique_dep": ";;;;;", "aff_unique_url": "http://www.xidian.edu.cn/;;http://www.xidian.edu.cn/;http://www.xidian.edu.cn/;https://www.seu.edu.cn/;https://www.shanghai-ai-lab.com", "aff_unique_abbr": "Xidian;;Xidian;Xidian University;SEU;SAIL", "aff_campus_unique_index": "1", "aff_campus_unique": ";Xi'an", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "ReEvo: Large Language Models as Hyper-Heuristics with Reflective Evolution", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96692", "id": "483IPG0HWL", "proceeding": "", "pdf": "https://openreview.net/pdf?id=483IPG0HWL", "openreview": "https://openreview.net/forum?id=483IPG0HWL", "poster": "/media/PosterPDFs/NeurIPS%202024/96692.png?t=1730353131.0871956", "project": "", "author_site": "Haoran Ye, Jiarui Wang, Zhiguang Cao, Federico Berto, Chuanbo Hua, HAEYEON KIM, Jinkyoo Park, Guojie Song", "tldr": "", "abstract": "The omnipresence of NP-hard combinatorial optimization problems (COPs) compels domain experts to engage in trial-and-error heuristic design. The long-standing endeavor of design automation has gained new momentum with the rise of large language models (LLMs). This paper introduces Language Hyper-Heuristics (LHHs), an emerging variant of Hyper-Heuristics that leverages LLMs for heuristic generation, featuring minimal manual intervention and open-ended heuristic spaces. To empower LHHs, we present Reflective Evolution (ReEvo), a novel integration of evolutionary search for efficiently exploring the heuristic space, and LLM reflections to provide verbal gradients within the space. Across five heterogeneous algorithmic types, six different COPs, and both white-box and black-box views of COPs, ReEvo yields state-of-the-art and competitive meta-heuristics, evolutionary algorithms, heuristics, and neural solvers, while being more sample-efficient than prior LHHs.", "keywords": "combinatorial optimization;hyper-heuristic;heuristic;neural combinatorial optimization;large language model;evolutionary algorithm", "primary_area": "optimization", "supplementary_material": "", "author": "Haoran Ye;Jiarui Wang;Zhiguang Cao;Federico Berto;Chuanbo Hua;Haeyeon Kim;Jinkyoo Park;Guojie Song", "authorids": "~Haoran_Ye1;~Jiarui_Wang3;~Zhiguang_Cao1;~Federico_Berto1;~Chuanbo_Hua1;~Haeyeon_Kim1;~Jinkyoo_Park1;~Guojie_Song1", "gender": "M;M;M;M;M;F;M;M", "homepage": "https://yehaoran.info;;https://zhiguangcaosg.github.io/;https://fedebotu.github.io/;https://github.com/cbhua;http://sites.google.com/view/haeyeon-rachel-kim;http://silab.kaist.ac.kr/;http://sai.pku.edu.cn/info/1022/2212.htm", "dblp": "237/9631;178/5014-2;178/8621;317/1711;326/5321;;156/7535;37/2900", "google_scholar": "https://scholar.google.com.hk/citations?view_op=list_works;;https://scholar.google.com.sg/citations?user=2R-cOkYAAAAJ;https://scholar.google.com/citations?hl=en;fjKA5gYAAAAJ;rP_9IY8AAAAJ;sH2a0nkAAAAJ;https://scholar.google.com.tw/citations?user=a832IIMAAAAJ", "orcid": "0000-0002-8510-3716;0000-0002-2138-6016;0000-0002-4499-759X;0000-0002-7438-8365;0000-0001-7700-792X;;0000-0003-2620-1479;0000-0001-8295-2520", "linkedin": ";;;federicoberto/;;;;", "or_profile": "~Haoran_Ye1;~Jiarui_Wang3;~Zhiguang_Cao1;~Federico_Berto1;~Chuanbo_Hua1;~Haeyeon_Kim1;~Jinkyoo_Park1;~Guojie_Song1", "aff": "Suzhou University;Soochow University;Singapore Management University;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Peking University", "aff_domain": "suda.edu.cn;suda.edu.cn;smu.edu.sg;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;pku.edu.cn", "position": "Undergrad student;Undergrad student;Assistant Professor;PhD student;PhD student;PhD student;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nye2024reevo,\ntitle={ReEvo: Large Language Models as Hyper-Heuristics with Reflective Evolution},\nauthor={Haoran Ye and Jiarui Wang and Zhiguang Cao and Federico Berto and Chuanbo Hua and Haeyeon Kim and Jinkyoo Park and Guojie Song},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=483IPG0HWL}\n}", "github": "", "reviewers": "zrFg;2Tkz;qdUQ;v9WQ", "pdf_size": 909144, "rating": "4;5;5;7", "confidence": "5;5;3;4", "soundness": "3;3;2;3", "novelty": "1;2;2;3", "presentation": "3;3;3;4", "wc_summary": "29;69;41;124", "wc_strengths": "20;42;31;72", "wc_weaknesses": "99;183;171;205", "wc_questions": "213;12;76;5", "wc_limitations": "1;10;19;5", "wc_review": "362;316;338;411", "wc_reply_reviewers": "481;37;33;84", "wc_reply_authors": "864;49;10;197", "reply_reviewers": "2;1;1;1", "reply_authors": "4;3;2;2", "rating_avg": [ 5.25, 1.0897247358851685 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 65.75, 36.62905267680288 ], "wc_strengths_avg": [ 41.25, 19.382659776202026 ], "wc_weaknesses_avg": [ 164.5, 39.733487136167646 ], "wc_questions_avg": [ 76.5, 83.5239486614468 ], "wc_limitations_avg": [ 8.75, 6.722164829874376 ], "wc_review_avg": [ 356.75, 35.29429840639986 ], "wc_reply_reviewers_avg": [ 158.75, 187.12879922662893 ], "wc_reply_authors_avg": [ 280.0, 344.3130842706969 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.3458572319330373, "gs_citation": 56, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15599099245778332762&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "suda.edu.cn;suda.edu.cn;smu.edu.sg;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;pku.edu.cn", "author_num": 8, "aff_unique_index": "0;1;2;3;3;3;3;4", "aff_unique_norm": "Suzhou University;Soochow University;Singapore Management University;Korea Advanced Institute of Science and Technology;Peking University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.suda.edu.cn;https://www.soochow.edu.cn;https://www.smu.edu.sg;https://www.kaist.ac.kr;http://www.pku.edu.cn", "aff_unique_abbr": "Suda;Soochow U;SMU;KAIST;Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;2;2;2;2;0", "aff_country_unique": "China;Singapore;South Korea" }, { "title": "AHA: Human-Assisted Out-of-Distribution Generalization and Detection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96691", "id": "49hXkwpWKA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=49hXkwpWKA", "openreview": "https://openreview.net/forum?id=49hXkwpWKA", "poster": "/media/PosterPDFs/NeurIPS%202024/96691.png?t=1733430254.5411863", "project": "", "author_site": "Haoyue Bai, Jifan Zhang, Robert Nowak", "tldr": "", "abstract": "Modern machine learning models deployed often encounter distribution shifts in real-world applications, manifesting as covariate or semantic out-of-distribution (OOD) shifts. These shifts give rise to challenges in OOD generalization and OOD detection. This paper introduces a novel, integrated approach AHA (Adaptive Human-Assisted OOD learning) to simultaneously address both OOD generalization and detection through a human-assisted framework by labeling data in the wild. Our approach strategically labels examples within a novel maximum disambiguation region, where the number of semantic and covariate OOD data roughly equalizes. By labeling within this region, we can maximally disambiguate the two types of OOD data, thereby maximizing the utility of the fixed labeling budget. Our algorithm first utilizes a noisy binary search algorithm that identifies the maximal disambiguation region with high probability. The algorithm then continues with annotating inside the identified labeling region, reaping the full benefit of human feedback. Extensive experiments validate the efficacy of our framework. We observed that with only a few hundred human annotations, our method significantly outperforms existing state-of-the-art methods that do not involve human assistance, in both OOD generalization and OOD detection.", "keywords": "OOD Generalization;OOD Detection;Human-Assisted", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Haoyue Bai;Jifan Zhang;Robert D Nowak", "authorids": "~Haoyue_Bai1;~Jifan_Zhang1;~Robert_D_Nowak1", "gender": "F;M;M", "homepage": "https://haoyuebaizju.github.io/;https://jifanz.github.io/;http://nowak.ece.wisc.edu", "dblp": "150/3371.html;277/6616;n/RobertDNowak", "google_scholar": "https://scholar.google.com/citations?view_op=search_authors;ZUOsJWcAAAAJ;fn13u8IAAAAJ", "orcid": "0000-0001-8139-0431;;", "linkedin": "haoyue-bai-a2234a257/;;", "or_profile": "~Haoyue_Bai1;~Jifan_Zhang1;~Robert_D_Nowak1", "aff": "University of Wisconsin - Madison;University of Wisconsin, Madison;University of Wisconsin - Madison", "aff_domain": "wisc.edu;wisc.edu;", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nbai2024aha,\ntitle={{AHA}: Human-Assisted Out-of-Distribution Generalization and Detection},\nauthor={Haoyue Bai and Jifan Zhang and Robert D Nowak},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=49hXkwpWKA}\n}", "github": "", "reviewers": "Rqpf;Eoxk;CyPp", "pdf_size": 778157, "rating": "5;5;7", "confidence": "4;4;4", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "3;3;3", "wc_summary": "58;37;66", "wc_strengths": "55;30;119", "wc_weaknesses": "299;54;99", "wc_questions": "8;2;130", "wc_limitations": "1;2;1", "wc_review": "421;125;415", "wc_reply_reviewers": "293;16;76", "wc_reply_authors": "448;38;112", "reply_reviewers": "2;1;1", "reply_authors": "4;2;3", "rating_avg": [ 5.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 53.666666666666664, 12.229290885229428 ], "wc_strengths_avg": [ 68.0, 37.47888294315436 ], "wc_weaknesses_avg": [ 150.66666666666666, 106.48421896642192 ], "wc_questions_avg": [ 46.666666666666664, 58.97645481225726 ], "wc_limitations_avg": [ 1.3333333333333333, 0.4714045207910317 ], "wc_review_avg": [ 320.3333333333333, 138.1432428877922 ], "wc_reply_reviewers_avg": [ 128.33333333333334, 118.98552666419373 ], "wc_reply_authors_avg": [ 199.33333333333334, 178.41026378048497 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 3.0, 0.816496580927726 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3501017513015338257&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "wisc.edu;wisc.edu;", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Wisconsin-Madison;University of Wisconsin", "aff_unique_dep": ";", "aff_unique_url": "https://www.wisc.edu;https://www.wisc.edu", "aff_unique_abbr": "UW-Madison;UW", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Madison", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Slack-Free Spiking Neural Network Formulation for Hypergraph Minimum Vertex Cover", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96690", "id": "4A5IQEjG8c", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4A5IQEjG8c", "openreview": "https://openreview.net/forum?id=4A5IQEjG8c", "poster": "/media/PosterPDFs/NeurIPS%202024/96690.png?t=1731754298.2942786", "project": "", "author_site": "Tam Nguyen, Anh-Dzung Doan, zhipeng cai, Tat-Jun Chin", "tldr": "", "abstract": "Neuromorphic computers open up the potential of energy-efficient computation using spiking neural networks (SNN), which consist of neurons that exchange spike-based information asynchronously. In particular, SNNs have shown promise in solving combinatorial optimization. Underpinning the SNN methods is the concept of energy minimization of an Ising model, which is closely related to quadratic unconstrained binary optimization (QUBO). Thus, the starting point for many SNN methods is reformulating the target problem as QUBO, then executing an SNN-based QUBO solver. For many combinatorial problems, the reformulation entails introducing penalty terms, potentially with slack variables, that implement feasibility constraints in the QUBO objective. For more complex problems such as hypergraph minimum vertex cover (HMVC), numerous slack variables are introduced which drastically increase the search domain and reduce the effectiveness of the SNN solver. In this paper, we propose a novel SNN formulation for HMVC. Rather than using penalty terms with slack variables, our SNN architecture introduces additional spiking neurons with a constraint checking and correction mechanism that encourages convergence to feasible solutions. In effect, our method obviates the need for reformulating HMVC as QUBO. Experiments on neuromorphic hardware show that our method consistently yielded high quality solutions for HMVC on real and synthetic instances where the SNN-based QUBO solver often failed, while consuming measurably less energy than global solvers on CPU.", "keywords": "Neuromorphic computing;spiking neural network;hypergraph minimum vertex cover", "primary_area": "optimization", "supplementary_material": "/attachment/fbd276463797d64c5d04aedac2a2cbff79582f6d.zip", "author": "Tam Ngoc-Bang Nguyen;Anh-Dzung Doan;zhipeng cai;Tat-Jun Chin", "authorids": "~Tam_Ngoc-Bang_Nguyen1;~Anh-Dzung_Doan2;~zhipeng_cai3;~Tat-Jun_Chin2", "gender": "F;;M;", "homepage": ";;https://zhipengcai.github.io;", "dblp": ";;;", "google_scholar": ";;;", "orcid": ";;;", "linkedin": "nnbtam99/;;;", "or_profile": "~Tam_Ngoc-Bang_Nguyen1;~Anh-Dzung_Doan2;~zhipeng_cai3;~Tat-Jun_Chin2", "aff": "University of Adelaide;;Intel;", "aff_domain": "adelaide.edu.au;;intel.com;", "position": "PhD student;;Researcher;", "bibtex": "@inproceedings{\nnguyen2024slackfree,\ntitle={Slack-Free Spiking Neural Network Formulation for Hypergraph Minimum Vertex Cover},\nauthor={Tam Ngoc-Bang Nguyen and Anh-Dzung Doan and zhipeng cai and Tat-Jun Chin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4A5IQEjG8c}\n}", "github": "", "reviewers": "9cvo;cGCZ;K44F;Que6", "pdf_size": 409840, "rating": "5;5;5;8", "confidence": "4;3;1;4", "soundness": "2;2;2;3", "novelty": "3;3;2;3", "presentation": "3;3;3;2", "wc_summary": "63;86;48;67", "wc_strengths": "31;64;92;86", "wc_weaknesses": "34;80;158;449", "wc_questions": "27;3;42;40", "wc_limitations": "34;23;6;16", "wc_review": "189;256;346;658", "wc_reply_reviewers": "22;25;0;287", "wc_reply_authors": "0;0;0;513", "reply_reviewers": "1;1;0;2", "reply_authors": "1;1;1;2", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 3.0, 1.224744871391589 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 66.0, 13.546217184144066 ], "wc_strengths_avg": [ 68.25, 23.8995292840675 ], "wc_weaknesses_avg": [ 180.25, 161.3697229966018 ], "wc_questions_avg": [ 28.0, 15.540270267920054 ], "wc_limitations_avg": [ 19.75, 10.207227831296802 ], "wc_review_avg": [ 362.25, 179.6084282543556 ], "wc_reply_reviewers_avg": [ 83.5, 117.88659805083867 ], "wc_reply_authors_avg": [ 128.25, 222.13551607070852 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.4714045207910316, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:oJwlHL9z9dcJ:scholar.google.com/&scioq=Slack-Free+Spiking+Neural+Network+Formulation+for+Hypergraph+Minimum+Vertex+Cover&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "adelaide.edu.au;;intel.com;", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "University of Adelaide;Intel", "aff_unique_dep": ";Intel Corporation", "aff_unique_url": "https://www.adelaide.edu.au;https://www.intel.com", "aff_unique_abbr": "Adelaide;Intel", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Australia;United States" }, { "title": "FlexSBDD: Structure-Based Drug Design with Flexible Protein Modeling", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96689", "id": "4AB54h21qG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4AB54h21qG", "openreview": "https://openreview.net/forum?id=4AB54h21qG", "poster": "", "project": "", "author_site": "ZAIXI ZHANG, Mengdi Wang, Qi Liu", "tldr": "", "abstract": "Structure-based drug design (SBDD), which aims to generate 3D ligand molecules binding to target proteins, is a fundamental task in drug discovery. Existing SBDD methods typically treat protein as rigid and neglect protein structural change when binding with ligand molecules, leading to a big gap with real-world scenarios and inferior generation qualities (e.g., many steric clashes). To bridge the gap, we propose FlexSBDD, a deep generative model capable of accurately modeling the flexible protein-ligand complex structure for ligand molecule generation. FlexSBDD adopts an efficient flow matching framework and leverages E(3)-equivariant network with scalar-vector dual representation to model dynamic structural changes. Moreover, novel data augmentation schemes based on structure relaxation/sidechain repacking are adopted to boost performance. Extensive experiments demonstrate that FlexSBDD achieves state-of-the-art performance in generating high-affinity molecules and effectively modeling the protein's conformation change to increase favorable protein-ligand interactions (e.g., Hydrogen bonds) and decrease steric clashes.", "keywords": "Structure-based drug design;Generative models;AI for Science;Protein Modeling;Steric Clashes;Protein-ligand Interactions", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "ZAIXI ZHANG;Mengdi Wang;Qi Liu", "authorids": "~ZAIXI_ZHANG2;~Mengdi_Wang1;~Qi_Liu3", "gender": "M;F;M", "homepage": "http://home.ustc.edu.cn/~zaixi/;http://mwang.princeton.edu;http://staff.ustc.edu.cn/~qiliuql/", "dblp": "267/9295.html;;95/2446-3", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;5EoHAFwAAAAJ", "orcid": ";;0000-0001-6956-5550", "linkedin": ";;", "or_profile": "~ZAIXI_ZHANG2;~Mengdi_Wang1;~Qi_Liu3", "aff": "University of Science and Technology of China;Princeton University;University of Science and Technology of China", "aff_domain": "ustc.edu.cn;princeton.edu;ustc.edu.cn", "position": "PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nzhang2024flexsbdd,\ntitle={Flex{SBDD}: Structure-Based Drug Design with Flexible Protein Modeling},\nauthor={ZAIXI ZHANG and Mengdi Wang and Qi Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4AB54h21qG}\n}", "github": "", "reviewers": "Y45d;QC25;7REV;jwrm", "pdf_size": 4052692, "rating": "5;5;6;7", "confidence": "5;3;3;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;2;3", "wc_summary": "64;43;110;80", "wc_strengths": "41;23;136;55", "wc_weaknesses": "200;52;101;139", "wc_questions": "153;89;146;86", "wc_limitations": "15;8;3;1", "wc_review": "473;215;496;361", "wc_reply_reviewers": "0;10;0;25", "wc_reply_authors": "610;22;319;543", "reply_reviewers": "0;1;0;1", "reply_authors": "2;2;2;3", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 74.25, 24.457871943405053 ], "wc_strengths_avg": [ 63.75, 43.22831826476714 ], "wc_weaknesses_avg": [ 123.0, 54.10637670367514 ], "wc_questions_avg": [ 118.5, 31.11671576500322 ], "wc_limitations_avg": [ 6.75, 5.402545696243577 ], "wc_review_avg": [ 386.25, 111.28201786452293 ], "wc_reply_reviewers_avg": [ 8.75, 10.231690964840562 ], "wc_reply_authors_avg": [ 373.5, 229.77434582650866 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.0909090909090909, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15664032139031198435&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "ustc.edu.cn;princeton.edu;ustc.edu.cn", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Science and Technology of China;Princeton University", "aff_unique_dep": ";", "aff_unique_url": "http://www.ustc.edu.cn;https://www.princeton.edu", "aff_unique_abbr": "USTC;Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "China;United States" }, { "title": "How Does Black-Box Impact the Learning Guarantee of Stochastic Compositional Optimization?", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96688", "id": "4AuEQ1FfUf", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4AuEQ1FfUf", "openreview": "https://openreview.net/forum?id=4AuEQ1FfUf", "poster": "/media/PosterPDFs/NeurIPS%202024/96688.png?t=1731299036.4090505", "project": "", "author_site": "Jun Chen, Hong Chen, Bin Gu", "tldr": "", "abstract": "Stochastic compositional optimization (SCO) problem constitutes a class of optimization problems characterized by the objective function with a compositional form, including the tasks with known derivatives, such as AUC maximization, and the derivative-free tasks exemplified by black-box vertical federated learning (VFL). From the learning theory perspective, the learning guarantees of SCO algorithms with known derivatives have been studied in the literature. However, the potential impacts of the derivative-free setting on the learning guarantees of SCO remains unclear and merits further investigation. This paper aims to reveal the impacts by developing a theoretical analysis for two derivative-free algorithms, black-box SCGD and SCSC. Specifically, we first provide the sharper generalization upper bounds of convex SCGD and SCSC based on a new stability analysis framework more effective than prior work under some milder conditions, which is further developed to the non-convex case using the almost co-coercivity property of smooth function. Then, we derive the learning guarantees of three black-box variants of non-convex SCGD and SCSC with additional optimization analysis. Comparing these results, we theoretically uncover the impacts that a better gradient estimation brings a tighter learning guarantee and a larger proportion of unknown gradients may lead to a stronger dependence on the gradient estimation quality. Finally, our analysis is applied to two SCO algorithms, FOO-based vertical VFL and VFL-CZOFO, to build the first learning guarantees for VFL that align with the findings of SCGD and SCSC.", "keywords": "Stochastic compositional optimization;theoretical guarantee; black-box optimization; stability analysis", "primary_area": "learning_theory", "supplementary_material": "", "author": "Jun Chen;Hong Chen;Bin Gu", "authorids": "~Jun_Chen12;~Hong_Chen1;~Bin_Gu1", "gender": "M;;M", "homepage": "https://www.researchgate.net/profile/Jun-Chen-256;https://chenhongml.github.io/;https://mbzuai.ac.ae/study/faculty/bin-gu/", "dblp": ";https://dblp.uni-trier.de/pers/hd/c/Chen_0004:Hong;29/1758-1", "google_scholar": "GkZkdRYAAAAJ;;Vo8OgCgAAAAJ", "orcid": "0000-0002-9810-5171;;0000-0001-6049-1815", "linkedin": "%E5%90%9B-%E9%99%88-4a8823276/;;", "or_profile": "~Jun_Chen12;~Hong_Chen1;~Bin_Gu1", "aff": "Huazhong Agricultural University;Huazhong Agricultural University;Mohamed bin Zayed University of Artificial Intelligence", "aff_domain": "hzau.edu.cn;hzau.edu.cn;mbzuai.ac.ae", "position": "PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nchen2024how,\ntitle={How Does Black-Box Impact the Learning Guarantee of Stochastic Compositional Optimization?},\nauthor={Jun Chen and Hong Chen and Bin Gu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4AuEQ1FfUf}\n}", "github": "", "reviewers": "NRWe;Xsnq;e2GF", "pdf_size": 488019, "rating": "5;6;7", "confidence": "3;3;2", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "3;3;3", "wc_summary": "22;43;86", "wc_strengths": "36;487;49", "wc_weaknesses": "142;3;123", "wc_questions": "51;3;5", "wc_limitations": "1;1;6", "wc_review": "252;537;269", "wc_reply_reviewers": "17;11;11", "wc_reply_authors": "13;18;13", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 50.333333333333336, 26.637484032009397 ], "wc_strengths_avg": [ 190.66666666666666, 209.60650965294207 ], "wc_weaknesses_avg": [ 89.33333333333333, 61.537702986777866 ], "wc_questions_avg": [ 19.666666666666668, 22.17105219775452 ], "wc_limitations_avg": [ 2.6666666666666665, 2.3570226039551585 ], "wc_review_avg": [ 352.6666666666667, 130.5279876331339 ], "wc_reply_reviewers_avg": [ 13.0, 2.8284271247461903 ], "wc_reply_authors_avg": [ 14.666666666666666, 2.357022603955158 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:0BsE4hhYgOcJ:scholar.google.com/&scioq=How+Does+Black-Box+Impact+the+Learning+Guarantee+of+Stochastic+Compositional+Optimization%3F&hl=en&as_sdt=0,44", "gs_version_total": 2, "email": "hzau.edu.cn;hzau.edu.cn;mbzuai.ac.ae", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Huazhong Agricultural University;Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": ";", "aff_unique_url": "http://www.hzau.edu.cn/;https://mbzuai.ac.ae", "aff_unique_abbr": "HAU;MBZUAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "China;United Arab Emirates" }, { "title": "Towards Dynamic Message Passing on Graphs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96687", "id": "4BWlUJF0E9", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4BWlUJF0E9", "openreview": "https://openreview.net/forum?id=4BWlUJF0E9", "poster": "/media/PosterPDFs/NeurIPS%202024/96687.png?t=1731236667.1586096", "project": "", "author_site": "Junshu Sun, Chenxue Yang, Xiangyang Ji, Qingming Huang, Shuhui Wang", "tldr": "", "abstract": "Message passing plays a vital role in graph neural networks (GNNs) for effective feature learning. However, the over-reliance on input topology diminishes the efficacy of message passing and restricts the ability of GNNs. Despite efforts to mitigate the reliance, existing study encounters message-passing bottlenecks or high computational expense problems, which invokes the demands for flexible message passing with low complexity. In this paper, we propose a novel dynamic message-passing mechanism for GNNs. It projects graph nodes and learnable pseudo nodes into a common space with measurable spatial relations between them. With nodes moving in the space, their evolving relations facilitate flexible pathway construction for a dynamic message-passing process. Associating pseudo nodes to input graphs with their measured relations, graph nodes can communicate with each other intermediately through pseudo nodes under linear complexity. We further develop a GNN model named $\\mathtt{N^2}$ based on our dynamic message-passing mechanism. $\\mathtt{N^2}$ employs a single recurrent layer to recursively generate the displacements of nodes and construct optimal dynamic pathways. Evaluation on eighteen benchmarks demonstrates the superior performance of $\\mathtt{N^2}$ over popular GNNs. $\\mathtt{N^2}$ successfully scales to large-scale benchmarks and requires significantly fewer parameters for graph classification with the shared recurrent layer.", "keywords": "Graph Neural Networks;Graph Representation Learning;Node Classification;Graph Classification;Message Passing", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Junshu Sun;Chenxue Yang;Xiangyang Ji;Qingming Huang;Shuhui Wang", "authorids": "~Junshu_Sun1;~Chenxue_Yang1;~Xiangyang_Ji1;~Qingming_Huang2;~Shuhui_Wang1", "gender": ";F;;;M", "homepage": "http://vipl.ict.ac.cn/edu/student/master/202205/t20220518_123547.html;;;https://qmhuang-ucas.github.io/;https://vipl.ict.ac.cn/people/shwang/", "dblp": "354/4214;144/9111.html;;68/4388;37/2537", "google_scholar": ";On-1OAgAAAAJ;;https://scholar.google.com.hk/citations?user=J1vMnRgAAAAJ;h-JxBSYAAAAJ", "orcid": ";;;;0000-0002-5931-0527", "linkedin": ";;;;", "or_profile": "~Junshu_Sun1;~Chenxue_Yang1;~Xiangyang_Ji1;~Qingming_Huang2;~Shuhui_Wang1", "aff": "Institute of Computing Technology, Chinese Academy of Sciences;Agricultural Information Institution, Chinese Academy of Agricultural Sciences;;University of Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences", "aff_domain": "ict.ac.cn;caas.cn;;ucas.ac.cn;ict.ac.cn", "position": "PhD student;Assistant Professor;;Full Professor;Full Professor", "bibtex": "@inproceedings{\nsun2024towards,\ntitle={Towards Dynamic Message Passing on Graphs},\nauthor={Junshu Sun and Chenxue Yang and Xiangyang Ji and Qingming Huang and Shuhui Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4BWlUJF0E9}\n}", "github": "", "reviewers": "4Q2o;ZLZY;UBFx;Qj5n", "pdf_size": 8955864, "rating": "5;5;6;7", "confidence": "4;3;3;5", "soundness": "3;3;3;4", "novelty": "2;3;3;4", "presentation": "3;2;4;4", "wc_summary": "52;106;75;71", "wc_strengths": "103;143;79;47", "wc_weaknesses": "297;465;46;2", "wc_questions": "70;292;34;51", "wc_limitations": "7;22;17;1", "wc_review": "529;1028;251;172", "wc_reply_reviewers": "63;51;9;20", "wc_reply_authors": "168;54;23;27", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 76.0, 19.3778223750761 ], "wc_strengths_avg": [ 93.0, 35.04283093587046 ], "wc_weaknesses_avg": [ 202.5, 188.7650656239125 ], "wc_questions_avg": [ 111.75, 104.84363356923491 ], "wc_limitations_avg": [ 11.75, 8.227241335952167 ], "wc_review_avg": [ 495.0, 335.07834904690577 ], "wc_reply_reviewers_avg": [ 35.75, 22.01561945528674 ], "wc_reply_authors_avg": [ 68.0, 58.953371404865386 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.6363636363636364, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2786134144755614772&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "ict.ac.cn;caas.cn;;ucas.ac.cn;ict.ac.cn", "author_num": 5, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Chinese Academy of Sciences;Chinese Academy of Agricultural Sciences;University of Chinese Academy of Sciences", "aff_unique_dep": "Institute of Computing Technology;Agricultural Information Institution;", "aff_unique_url": "http://www.ict.ac.cn;http://www.caas.cn/;http://www.ucas.ac.cn", "aff_unique_abbr": "CAS;;UCAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Bias Detection via Signaling", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96686", "id": "4D7haH4pdR", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4D7haH4pdR", "openreview": "https://openreview.net/forum?id=4D7haH4pdR", "poster": "/media/PosterPDFs/NeurIPS%202024/96686.png?t=1732474057.233662", "project": "", "author_site": "Yiling Chen, Tao Lin, Ariel Procaccia, Aaditya Ramdas, Itai Shapira", "tldr": "", "abstract": "We introduce and study the problem of detecting whether an agent is updating their prior beliefs given new evidence in an optimal way that is Bayesian, or whether they are biased towards their own prior. In our model, biased agents form posterior beliefs that are a convex combination of their prior and the Bayesian posterior, where the more biased an agent is, the closer their posterior is to the prior. Since we often cannot observe the agent's beliefs directly, we take an approach inspired by *information design*. Specifically, we measure an agent's bias by designing a *signaling scheme* and observing the actions they take in response to different signals, assuming that they are maximizing their own expected utility; our goal is to detect bias with a minimum number of signals. Our main results include a characterization of scenarios where a single signal suffices and a computationally efficient algorithm to compute optimal signaling schemes.", "keywords": "Information design;algorithmic fairness;Bayesian inference", "primary_area": "algorithmic_game_theory", "supplementary_material": "", "author": "Yiling Chen;Tao Lin;Ariel D. Procaccia;Aaditya Ramdas;Itai Shapira", "authorids": "~Yiling_Chen1;~Tao_Lin2;~Ariel_D._Procaccia1;~Aaditya_Ramdas2;~Itai_Shapira1", "gender": "F;M;M;M;M", "homepage": "https://yiling.seas.harvard.edu/;https://tao-l.github.io/;http://stat.cmu.edu/~aramdas;https://ishapira1.github.io/;http://procaccia.info/", "dblp": "72/3762-1;64/4492-13;117/3518;342/2947;p/ArielDProcaccia", "google_scholar": "x_7xA0UAAAAJ;https://scholar.google.com/citations?hl=en;ZvFaPxUAAAAJ;JOQXRbIAAAAJ;https://scholar.google.com.tw/citations?user=8ZpV-lkAAAAJ", "orcid": ";;0000-0003-0497-311X;;", "linkedin": ";;;itai-shapira-968362171;", "or_profile": "~Yiling_Chen1;~Tao_Lin2;~Aaditya_Ramdas2;~Itai_Shapira1;~Ariel_Procaccia1", "aff": "Harvard University;Harvard University;Carnegie Mellon University;Harvard University, Harvard University;Harvard University", "aff_domain": "fas.harvard.edu;g.harvard.edu;cmu.edu;g.harvard.edu;harvard.edu", "position": "Full Professor;PhD student;Assistant Professor;PhD student;Gordon McKay Professor of Computer Science", "bibtex": "@inproceedings{\nchen2024bias,\ntitle={Bias Detection via Signaling},\nauthor={Yiling Chen and Tao Lin and Ariel D. Procaccia and Aaditya Ramdas and Itai Shapira},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4D7haH4pdR}\n}", "github": "", "reviewers": "rhkn;7Pvv;NFob;1o3u", "pdf_size": 451726, "rating": "5;5;6;7", "confidence": "3;5;3;5", "soundness": "3;3;3;4", "novelty": "3;2;3;3", "presentation": "3;2;3;4", "wc_summary": "87;90;78;171", "wc_strengths": "32;45;37;123", "wc_weaknesses": "74;240;110;122", "wc_questions": "64;106;2;2", "wc_limitations": "13;1;1;1", "wc_review": "270;482;228;419", "wc_reply_reviewers": "0;77;42;12", "wc_reply_authors": "30;41;17;0", "reply_reviewers": "0;1;1;1", "reply_authors": "2;3;2;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 1.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 106.5, 37.5 ], "wc_strengths_avg": [ 59.25, 37.09700122651425 ], "wc_weaknesses_avg": [ 136.5, 62.311716394270505 ], "wc_questions_avg": [ 43.5, 44.07663780280887 ], "wc_limitations_avg": [ 4.0, 5.196152422706632 ], "wc_review_avg": [ 349.75, 104.24580327284164 ], "wc_reply_reviewers_avg": [ 32.75, 29.77729839995563 ], "wc_reply_authors_avg": [ 22.0, 15.280706789936126 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8471624699315511686&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "fas.harvard.edu;g.harvard.edu;cmu.edu;g.harvard.edu;harvard.edu", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Harvard University;Carnegie Mellon University", "aff_unique_dep": ";", "aff_unique_url": "https://www.harvard.edu;https://www.cmu.edu", "aff_unique_abbr": "Harvard;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "WATT: Weight Average Test Time Adaptation of CLIP", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96685", "id": "4D7hnJ9oM6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4D7hnJ9oM6", "openreview": "https://openreview.net/forum?id=4D7hnJ9oM6", "poster": "/media/PosterPDFs/NeurIPS%202024/96685.png?t=1731352772.5105858", "project": "", "author_site": "David OSOWIECHI, Mehrdad Noori, Gustavo Vargas Hakim, Moslem Yazdanpanah, Ali Bahri, Milad Cheraghalikhani, Sahar Dastani, Farzad Beizaee, Ismail Ayed, Christian Desrosiers", "tldr": "", "abstract": "Vision-Language Models (VLMs) such as CLIP have yielded unprecedented performances for zero-shot image classification, yet their generalization capability may still be seriously challenged when confronted to domain shifts. In response, we present Weight Average Test-Time Adaptation (WATT) of CLIP, a new approach facilitating full test-time adaptation (TTA) of this VLM. Our method employs a diverse set of templates for text prompts, augmenting the existing framework of CLIP. Predictions are utilized as pseudo labels for model updates, followed by weight averaging to consolidate the learned information globally. Furthermore, we introduce a text ensemble strategy, enhancing the overall test performance by aggregating diverse textual cues.\nOur findings underscore the effectiveness of WATT across diverse datasets, including CIFAR-10-C, CIFAR-10.1, CIFAR-100-C, VisDA-C, and several other challenging datasets, effectively covering a wide range of domain shifts. Notably, these enhancements are achieved without the need for additional model transformations or trainable modules. Moreover, compared to other TTA methods, our approach can operate effectively with just a single image. The code is available at: https://github.com/Mehrdad-Noori/WATT.", "keywords": "Weight Averaging;Test Time Adaptation;CLIP;Text Ensemble", "primary_area": "online_learning", "supplementary_material": "", "author": "David OSOWIECHI;Mehrdad Noori;Gustavo Adolfo Vargas Hakim;Moslem Yazdanpanah;Ali Bahri;Milad Cheraghalikhani;Sahar Dastani;Farzad Beizaee;Ismail Ben Ayed;Christian Desrosiers", "authorids": "~David_OSOWIECHI1;~Mehrdad_Noori1;~Gustavo_Adolfo_Vargas_Hakim1;~Moslem_Yazdanpanah1;~Ali_Bahri1;~Milad_Cheraghalikhani1;~Sahar_Dastani1;~Farzad_Beizaee1;~Ismail_Ben_Ayed1;~Christian_Desrosiers1", "gender": ";M;M;M;M;M;F;M;M;M", "homepage": ";https://www.linkedin.com/in/menoori;;https://mosymosy.github.io/;https://www.linkedin.com/in/ali-bahri72;;https://sahardastani.github.io/;;https://profs.etsmtl.ca/ibenayed/;", "dblp": ";255/9308;;327/3555;255/9341;;;;68/4478;15/4137.html", "google_scholar": "https://scholar.google.ca/citations?hl=fr;01l47WEAAAAJ;9q4ltOcAAAAJ;lt7_RKkAAAAJ;0gSvAv8AAAAJ;;wxbHuCsAAAAJ;https://scholar.google.ca/citations?user=bvKkqO0AAAAJ;https://scholar.google.ca/citations?user=29vyUccAAAAJ;https://scholar.google.ca/citations?user=XJ0jF3gAAAAJ", "orcid": ";;;;;;;;;", "linkedin": ";menoori;;moslem-yazdanpanah-68789168/;ali-bahri72;miladalikhani/;sahar-dastani-a2aab0186/;fbeizaee/;;", "or_profile": "~David_OSOWIECHI1;~Mehrdad_Noori1;~Gustavo_Adolfo_Vargas_Hakim1;~Moslem_Yazdanpanah1;~Ali_Bahri1;~Milad_Cheraghalikhani1;~Sahar_Dastani1;~Farzad_Beizaee1;~Ismail_Ben_Ayed1;~Christian_Desrosiers1", "aff": "\u00c9cole de technologie sup\u00e9rieure, Universit\u00e9 du Qu\u00e9bec;\u00c9cole de technologie sup\u00e9rieure, Universit\u00e9 du Qu\u00e9bec;\u00c9cole de technologie sup\u00e9rieure, Universit\u00e9 du Qu\u00e9bec;\u00c9cole de technologie sup\u00e9rieure, Universit\u00e9 du Qu\u00e9bec;\u00c9cole de technologie sup\u00e9rieure, Universit\u00e9 du Qu\u00e9bec;\u00c9cole de technologie sup\u00e9rieure, Universit\u00e9 du Qu\u00e9bec;\u00c9cole de technologie sup\u00e9rieure, Universit\u00e9 du Qu\u00e9bec;\u00c9cole de technologie sup\u00e9rieure, Universit\u00e9 du Qu\u00e9bec;\u00c9cole de technologie sup\u00e9rieure, Universit\u00e9 du Qu\u00e9bec;\u00c9cole de technologie sup\u00e9rieure", "aff_domain": "etsmtl.ca;etsmtl.ca;etsmtl.ca;etsmtl.ca;etsmtl.ca;etsmtl.ca;etsmtl.ca;etsmtl.ca;etsmtl.ca;etsmtl.ca", "position": "PhD student;PhD student;PhD student;PhD student;PhD student;MS student;PhD student;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nosowiechi2024watt,\ntitle={{WATT}: Weight Average Test Time Adaptation of {CLIP}},\nauthor={David OSOWIECHI and Mehrdad Noori and Gustavo Adolfo Vargas Hakim and Moslem Yazdanpanah and Ali Bahri and Milad Cheraghalikhani and Sahar Dastani and Farzad Beizaee and Ismail Ben Ayed and Christian Desrosiers},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4D7hnJ9oM6}\n}", "github": "", "reviewers": "y4Me;wUmo;HDdi", "pdf_size": 725710, "rating": "6;7;8", "confidence": "4;3;4", "soundness": "3;4;3", "novelty": "3;3;2", "presentation": "3;3;3", "wc_summary": "38;79;83", "wc_strengths": "23;43;144", "wc_weaknesses": "109;20;117", "wc_questions": "41;122;48", "wc_limitations": "7;16;44", "wc_review": "218;280;436", "wc_reply_reviewers": "37;25;99", "wc_reply_authors": "0;39;28", "reply_reviewers": "1;1;1", "reply_authors": "1;2;2", "rating_avg": [ 7.0, 0.816496580927726 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 66.66666666666667, 20.33606539022619 ], "wc_strengths_avg": [ 70.0, 52.959103718498355 ], "wc_weaknesses_avg": [ 82.0, 43.96210489349511 ], "wc_questions_avg": [ 70.33333333333333, 36.64544840616483 ], "wc_limitations_avg": [ 22.333333333333332, 15.755069730795299 ], "wc_review_avg": [ 311.3333333333333, 91.71453295719036 ], "wc_reply_reviewers_avg": [ 53.666666666666664, 32.42769735204082 ], "wc_reply_authors_avg": [ 22.333333333333332, 16.418147141366333 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6950054990477469001&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "etsmtl.ca;etsmtl.ca;etsmtl.ca;etsmtl.ca;etsmtl.ca;etsmtl.ca;etsmtl.ca;etsmtl.ca;etsmtl.ca;etsmtl.ca", "author_num": 10, "aff_unique_index": "0;0;0;0;0;0;0;0;0;1", "aff_unique_norm": "Universit\u00e9 du Qu\u00e9bec;\u00c9cole de technologie sup\u00e9rieure", "aff_unique_dep": ";", "aff_unique_url": "https://www.etsmtl.ca;https://www.etsmtl.ca", "aff_unique_abbr": "ETS;ETS", "aff_campus_unique_index": "0;0;0;0;0;0;0;0;0", "aff_campus_unique": "\u00c9cole de technologie sup\u00e9rieure;", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "Canada" }, { "title": "Expectile Regularization for Fast and Accurate Training of Neural Optimal Transport", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96684", "id": "4DA5vaPHFb", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4DA5vaPHFb", "openreview": "https://openreview.net/forum?id=4DA5vaPHFb", "poster": "/media/PosterPDFs/NeurIPS%202024/96684.png?t=1731506451.524558", "project": "", "author_site": "Nazar Buzun, Maksim Bobrin, Dmitry V. Dylov", "tldr": "", "abstract": "We present a new approach for Neural Optimal Transport (NOT) training procedure, capable of accurately and efficiently estimating optimal transportation plan via specific regularization on dual Kantorovich potentials. The main bottleneck of existing NOT solvers is associated with the procedure of finding a near-exact approximation of the conjugate operator (i.e., the c-transform), which is done either by optimizing over non-convex max-min objectives or by the computationally intensive fine-tuning of the initial approximated prediction. We resolve both issues by proposing a new theoretically justified loss in the form of expectile regularization which enforces binding conditions on the learning process of the dual potentials. Such a regularization provides the upper bound estimation over the distribution of possible conjugate potentials and makes the learning stable, completely eliminating the need for additional extensive fine-tuning. Proposed method, called Expectile-Regularized Neural Optimal Transport (ENOT), outperforms previous state-of-the-art approaches in the established Wasserstein-2 benchmark tasks by a large margin (up to a 3-fold improvement in quality and up to a 10-fold improvement in runtime). Moreover, we showcase performance of ENOT for various cost functions in different tasks, such as image generation, demonstrating generalizability and robustness of the proposed algorithm.", "keywords": "Optimal Transport;Neural Optimal Transport;Expectile;Regularization;Kantorovich potentials", "primary_area": "probabilistic_methods", "supplementary_material": "/attachment/7e731ea1927b45365d35e6e006f37645f5db1319.zip", "author": "Nazar Buzun;Maksim Bobrin;Dmitry V. Dylov", "authorids": "~Nazar_Buzun1;~Maksim_Bobrin1;~Dmitry_V._Dylov1", "gender": "M;M;M", "homepage": ";https://github.com/skylooop;", "dblp": "154/7701;;201/7696", "google_scholar": "dxR12zkAAAAJ;8vy8vqgAAAAJ;mhhvib8AAAAJ", "orcid": "0000-0002-4649-2827;;0000-0003-2251-3221", "linkedin": ";;dylov", "or_profile": "~Nazar_Buzun1;~Maksim_Bobrin1;~Dmitry_V_Dylov1", "aff": "Skolkovo Institute of Science and Technology;Skolkovo Institute of Science and Technology;Skoltech", "aff_domain": "skoltech.ru;skoltech.ru;skoltech.ru", "position": "Researcher;PhD student;Associate Professor", "bibtex": "@inproceedings{\nbuzun2024expectile,\ntitle={Expectile Regularization for Fast and Accurate Training of Neural Optimal Transport},\nauthor={Nazar Buzun and Maksim Bobrin and Dmitry V. Dylov},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4DA5vaPHFb}\n}", "github": "", "reviewers": "qjtB;E19z;QVdS;ZPj5;2fef", "pdf_size": 19480582, "rating": "6;6;7;7;7", "confidence": "3;4;3;4;4", "soundness": "2;3;3;3;3", "novelty": "2;3;3;4;3", "presentation": "2;2;3;3;3", "wc_summary": "39;78;51;40;62", "wc_strengths": "43;11;61;43;19", "wc_weaknesses": "19;369;42;63;24", "wc_questions": "125;2;92;137;153", "wc_limitations": "9;2;22;3;1", "wc_review": "235;462;268;286;259", "wc_reply_reviewers": "26;172;89;35;0", "wc_reply_authors": "13;370;36;8;0", "reply_reviewers": "1;2;1;1;0", "reply_authors": "2;3;2;2;1", "rating_avg": [ 6.6, 0.48989794855663565 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 54.0, 14.628738838327793 ], "wc_strengths_avg": [ 35.4, 18.084247288731707 ], "wc_weaknesses_avg": [ 103.4, 133.69607324076503 ], "wc_questions_avg": [ 101.8, 53.76764826547652 ], "wc_limitations_avg": [ 7.4, 7.812809993849845 ], "wc_review_avg": [ 302.0, 81.67006795638167 ], "wc_reply_reviewers_avg": [ 64.4, 61.105155265329294 ], "wc_reply_authors_avg": [ 85.4, 142.80280109297578 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.16666666666666666, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9585125044967194723&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": "skoltech.ru;skoltech.ru;skoltech.ru", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Skolkovo Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.skoltech.ru", "aff_unique_abbr": "Skoltech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Russian Federation" }, { "title": "Efficiency of the First-Price Auction in the Autobidding World", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96683", "id": "4DHoSjET4R", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4DHoSjET4R", "openreview": "https://openreview.net/forum?id=4DHoSjET4R", "poster": "/media/PosterPDFs/NeurIPS%202024/96683.png?t=1733457834.92775", "project": "", "author_site": "Yuan Deng, Jieming Mao, Vahab Mirrokni, Hanrui Zhang, Song Zuo", "tldr": "", "abstract": "We study the price of anarchy of first-price auctions in the autobidding world, where bidders can be either utility maximizers (i.e., traditional bidders) or value maximizers (i.e., autobidders). We show that with autobidders only, the price of anarchy of first-price auctions is $1/2$, and with both kinds of bidders, the price of anarchy degrades to about $0.457$ (the precise number is given by an optimization). These results complement the recent result by [Jin and Lu, 2022] showing that the price of anarchy of first-price auctions with traditional bidders is $1 - 1/e^2$. We further investigate a setting where the seller can utilize machine-learned advice to improve the efficiency of the auctions. There, we show that as the accuracy of the advice increases, the price of anarchy improves smoothly from about $0.457$ to $1$.", "keywords": "first-price auctions;ad auctions;autobidding;price of anarchy", "primary_area": "algorithmic_game_theory", "supplementary_material": "", "author": "Yuan Deng;Jieming Mao;Vahab Mirrokni;Hanrui Zhang;Song Zuo", "authorids": "~Yuan_Deng1;~Jieming_Mao1;~Vahab_Mirrokni2;~Hanrui_Zhang1;~Song_Zuo1", "gender": "M;M;M;;", "homepage": "https://sites.google.com/view/yuandeng;https://sites.google.com/seas.upenn.edu/jiemingmao/;https://people.csail.mit.edu/mirrokni/Welcome.html;;https://sites.google.com/corp/view/songzuo", "dblp": "58/366;123/4948;m/VahabSMirrokni;168/8847;123/4898", "google_scholar": "OoTMmy8AAAAJ;;opbZfw0AAAAJ;;https://scholar.google.com/citations?hl=en", "orcid": ";;;;", "linkedin": "yuandeng/yuandeng/;;;;", "or_profile": "~Yuan_Deng1;~Jieming_Mao1;~Vahab_Mirrokni2;~Hanrui_Zhang1;~Song_Zuo1", "aff": "Google Research;Google;Google Research;Google Research;Google", "aff_domain": "google.com;google.com;google.com;google.com;google.com", "position": "Research Scientist;Research Scientist;VP, Google Fellow;Researcher;Research Scientist", "bibtex": "@inproceedings{\ndeng2024efficiency,\ntitle={Efficiency of the First-Price Auction in the Autobidding World},\nauthor={Yuan Deng and Jieming Mao and Vahab Mirrokni and Hanrui Zhang and Song Zuo},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4DHoSjET4R}\n}", "github": "", "reviewers": "wwix;HcJX;9Zo1;fGt9", "pdf_size": 1100657, "rating": "3;6;6;8", "confidence": "4;4;3;3", "soundness": "2;3;3;3", "novelty": "1;3;3;4", "presentation": "2;3;3;4", "wc_summary": "123;80;175;262", "wc_strengths": "26;93;1;69", "wc_weaknesses": "435;58;46;42", "wc_questions": "5;16;20;6", "wc_limitations": "7;9;5;24", "wc_review": "596;256;247;403", "wc_reply_reviewers": "610;6;29;10", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 1.7853571071357126 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 1.0897247358851685 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 160.0, 67.819613682179 ], "wc_strengths_avg": [ 47.25, 35.90525727522364 ], "wc_weaknesses_avg": [ 145.25, 167.39082262776535 ], "wc_questions_avg": [ 11.75, 6.417748826496718 ], "wc_limitations_avg": [ 11.25, 7.495832175282475 ], "wc_review_avg": [ 375.5, 141.5706537386898 ], "wc_reply_reviewers_avg": [ 163.75, 257.78903681111035 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.7001400420140049, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2350457374039560051&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "google.com;google.com;google.com;google.com;google.com", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google Research", "aff_unique_url": "https://research.google", "aff_unique_abbr": "Google Research", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Score Distillation via Reparametrized DDIM", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96682", "id": "4DcpFagQ9e", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4DcpFagQ9e", "openreview": "https://openreview.net/forum?id=4DcpFagQ9e", "poster": "/media/PosterPDFs/NeurIPS%202024/96682.png?t=1732315721.0955563", "project": "", "author_site": "Artem Lukoianov, Haitz S\u00e1ez de Oc\u00e1riz Borde, Kristjan Greenewald, Vitor Guizilini, Timur Bagautdinov, Vincent Sitzmann, Justin Solomon", "tldr": "", "abstract": "While 2D diffusion models generate realistic, high-detail images, 3D shape generation methods like Score Distillation Sampling (SDS) built on these 2D diffusion models produce cartoon-like, over-smoothed shapes. To help explain this discrepancy, we show that the image guidance used in Score Distillation can be understood as the velocity field of a 2D denoising generative process, up to the choice of a noise term. In particular, after a change of variables, SDS resembles a high-variance version of Denoising Diffusion Implicit Models (DDIM) with a differently-sampled noise term: SDS introduces noise i.i.d. randomly at each step, while DDIM infers it from the previous noise predictions. This excessive variance can lead to over-smoothing and unrealistic outputs. We show that a better noise approximation can be recovered by inverting DDIM in each SDS update step. This modification makes SDS's generative process for 2D images almost identical to DDIM. In 3D, it removes over-smoothing, preserves higher-frequency detail, and brings the generation quality closer to that of 2D samplers. Experimentally, our method achieves better or similar 3D generation quality compared to other state-of-the-art Score Distillation methods, all without training additional neural networks or multi-view supervision, and providing useful insights into relationship between 2D and 3D asset generation with diffusion models.", "keywords": "3D generation;diffusion;score distillation", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/00c0d1def278308716ed438e72f8d33b47789291.zip", "author": "Artem Lukoianov;Haitz S\u00e1ez de Oc\u00e1riz Borde;Kristjan Greenewald;Vitor Campagnolo Guizilini;Timur Bagautdinov;Vincent Sitzmann;Justin Solomon", "authorids": "~Artem_Lukoianov1;~Haitz_S\u00e1ez_de_Oc\u00e1riz_Borde1;~Kristjan_Greenewald1;~Vitor_Campagnolo_Guizilini2;~Timur_Bagautdinov1;~Vincent_Sitzmann1;~Justin_Solomon1", "gender": "M;M;;M;M;M;M", "homepage": "https://lukoianov.com;https://www.linkedin.com/in/haitz-s%C3%A1ez-de-oc%C3%A1riz-borde-0933a9199/;https://researcher.watson.ibm.com/researcher/view.php?person=ibm-Kristjan.H.Greenewald;;;https://vsitzmann.github.io;http://people.csail.mit.edu/jsolomon/", "dblp": "267/1639;;146/0563;;145/3196;192/1958;80/5094", "google_scholar": "A9tNPiQAAAAJ;aP0OakUAAAAJ;L3zNUG4AAAAJ;UH9tP6QAAAAJ;oLi7xJ0AAAAJ;X44QVV4AAAAJ;pImSVwoAAAAJ", "orcid": "0000-0001-8308-1923;;;;;0000-0002-0107-5704;0000-0002-7701-7586", "linkedin": "artem-lukoianov/;;;vitorguizilini/;bagautdinov/;vincentsitzmann/;justin-solomon-8a587914/", "or_profile": "~Artem_Lukoianov1;~Haitz_S\u00e1ez_de_Oc\u00e1riz_Borde1;~Kristjan_Greenewald1;~Vitor_Campagnolo_Guizilini2;~Timur_Bagautdinov1;~Vincent_Sitzmann1;~Justin_Solomon1", "aff": "Massachusetts Institute of Technology;University of Oxford;MIT-IBM Watson AI Lab, IBM Research;Toyota Research Institute;Reality Labs Research;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;ox.ac.uk;ibm.com;tri.global;meta.com;mit.edu;mit.edu", "position": "PhD student;PhD student;Research Scientist;Staff Research Scientist;Researcher;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nlukoianov2024score,\ntitle={Score Distillation via Reparametrized {DDIM}},\nauthor={Artem Lukoianov and Haitz S{\\'a}ez de Oc{\\'a}riz Borde and Kristjan Greenewald and Vitor Campagnolo Guizilini and Timur Bagautdinov and Vincent Sitzmann and Justin Solomon},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4DcpFagQ9e}\n}", "github": "", "reviewers": "4KTy;r6QS;ZvgL", "pdf_size": 27451495, "rating": "6;6;7", "confidence": "4;4;4", "soundness": "3;4;3", "novelty": "3;3;3", "presentation": "3;4;3", "wc_summary": "37;43;104", "wc_strengths": "56;95;98", "wc_weaknesses": "242;99;139", "wc_questions": "105;22;6", "wc_limitations": "10;4;6", "wc_review": "450;263;353", "wc_reply_reviewers": "35;0;38", "wc_reply_authors": "66;60;101", "reply_reviewers": "1;0;1", "reply_authors": "3;2;3", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 61.333333333333336, 30.26916289265731 ], "wc_strengths_avg": [ 83.0, 19.131126469708992 ], "wc_weaknesses_avg": [ 160.0, 60.23841520713063 ], "wc_questions_avg": [ 44.333333333333336, 43.39226761634945 ], "wc_limitations_avg": [ 6.666666666666667, 2.494438257849294 ], "wc_review_avg": [ 355.3333333333333, 76.36025726049441 ], "wc_reply_reviewers_avg": [ 24.333333333333332, 17.249798710580816 ], "wc_reply_authors_avg": [ 75.66666666666667, 18.080068829760823 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11228565706645597866&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 4, "email": "mit.edu;ox.ac.uk;ibm.com;tri.global;meta.com;mit.edu;mit.edu", "author_num": 7, "aff_unique_index": "0;1;2;3;4;0;0", "aff_unique_norm": "Massachusetts Institute of Technology;University of Oxford;IBM;Toyota Research Institute;Reality Labs", "aff_unique_dep": ";;AI Lab;;Research", "aff_unique_url": "https://web.mit.edu;https://www.ox.ac.uk;https://www.ibmwatsonai.org/;https://www.tri.global;https://www.realitylabs.com", "aff_unique_abbr": "MIT;Oxford;MIT-IBM AI Lab;TRI;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0;0;0", "aff_country_unique": "United States;United Kingdom" }, { "id": "4FwlejUlg5", "title": "Causal Effect Estimation with Mixed Latent Confounders and Post-treatment Variables", "track": "main", "status": "Reject", "tldr": "", "abstract": "Causal inference from observational data has attracted considerable attention among researchers. One main obstacle to drawing valid causal conclusions is handling of confounders. As the direct measurement of confounders may not always be feasible, recent methods seek to address the confounding bias via proxy variables, i.e., variables postulated to be causally related to unobserved confounders. However, the selected proxies may scramble both latent confounders and latent post-treatment variables in practice, where existing methods risk biasing the estimation by unintentionally controlling for variables affected by the treatment. In this paper, we systematically investigate the bias of latent post-treatment variables, i.e., latent post-treatment bias, in causal effect estimation. We first derive the bias of existing covariate adjustment-based methods when selected proxies scramble both latent confounders and latent post-treatment variables, which we demonstrate can be arbitrarily bad. We then propose a novel Confounder-identifiable VAE (CiVAE) to address the bias. CiVAE is built upon a mild assumption that the prior of latent variables that generate the proxy belongs to a general exponential family with at least one invertible sufficient statistic in the factorized part. Based on this assumption, we show that latent confounders and latent post-treatment variables can be individually identified up to simple bijective transformations. We then prove that with individual identification, the intractable disentanglement problem of latent confounders and post-treatment variables can be transformed to a tractable conditional independence test problem. Finally, we prove that the true causal effects can be unbiasedly estimated with transformed confounders inferred by CiVAE. Experiments on both simulated and real-world datasets demonstrate that CiVAE is significantly more robust to latent post-treatment bias than existing methods. The codes are provided in the following anonymous URL: https://anonymous.4open.science/r/CiVAE-demo-E701/readme.md", "keywords": "causal effect estimation;latent post-treatment bias;identifiable VAE", "primary_area": "causal_inference", "supplementary_material": "", "author": "Yaochen Zhu;Jing Ma;Liang Wu;Qi Guo;Liangjie Hong;Jundong Li", "authorids": "~Yaochen_Zhu1;~Jing_Ma2;~Liang_Wu3;~Qi_Guo9;~Liangjie_Hong1;~Jundong_Li2", "gender": "M;F;M;M;M;M", "homepage": "http://www.ychzhu.com/;https://jma712.github.io/;http://liangwu.me/;https://qiguo.xyz/;https://www.hongliangjie.com/;https://jundongli.github.io/", "dblp": "251/3533;96/6129-2;https://dblp.org/pers/hd/w/Wu_0006:Liang;67/398-3;00/7186;144/7997.html", "google_scholar": "mNKYtHEAAAAJ;VLElvX8AAAAJ;r2BP6FsAAAAJ;IatRsPEAAAAJ;4uaSNpYAAAAJ;uY6ek7sAAAAJ", "orcid": ";;;0009-0009-0078-1533;;", "linkedin": ";;wuliang1/;qi-guo/;liangjiehong/;", "or_profile": "~Yaochen_Zhu1;~Jing_Ma2;~Liang_Wu3;~Qi_Guo9;~Liangjie_Hong1;~Jundong_Li2", "aff": "LinkedIn;Case Western Reserve University;LinkedIn;LinkedIn;LinkedIn;University of Virginia", "aff_domain": "linkedin.com;case.edu;linkedin.com;linkedin.com;linkedin.com;virginia.edu", "position": "Intern;Assistant Professor;Applied Scientist;Researcher;Researcher;Assistant Professor", "bibtex": "@misc{\nanonymous2024causal,\ntitle={Causal Effect Estimation with Mixed Latent Confounders and Post-treatment Variables},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=4FwlejUlg5}\n}", "github": "", "project": "", "reviewers": "kHjK;jEuj;FDXX;VF9h", "site": "https://openreview.net/forum?id=4FwlejUlg5", "pdf_size": 566447, "rating": "5;6;6;7", "confidence": "3;3;2;2", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "66;123;75;122", "wc_strengths": "39;57;34;65", "wc_weaknesses": "165;215;75;5", "wc_questions": "2;106;67;201", "wc_limitations": "21;17;16;1", "wc_review": "293;518;267;394", "wc_reply_reviewers": "19;76;0;25", "wc_reply_authors": "52;282;0;31", "reply_reviewers": "1;2;0;1", "reply_authors": "2;3;1;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 2.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 96.5, 26.196373794859472 ], "wc_strengths_avg": [ 48.75, 12.695963925594622 ], "wc_weaknesses_avg": [ 115.0, 80.93207028119323 ], "wc_questions_avg": [ 94.0, 72.0867532907399 ], "wc_limitations_avg": [ 13.75, 7.595228765481656 ], "wc_review_avg": [ 368.0, 98.74462010661644 ], "wc_reply_reviewers_avg": [ 30.0, 28.115831839019098 ], "wc_reply_authors_avg": [ 91.25, 111.67223244835755 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.7071067811865476, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:-W6LS7n_ENoJ:scholar.google.com/&scioq=Causal+Effect+Estimation+with+Mixed+Latent+Confounders+and+Post-treatment+Variables&hl=en&as_sdt=0,33", "gs_version_total": 2, "aff_unique_index": "0;1;0;0;0;2", "aff_unique_norm": "LinkedIn Corporation;Case Western Reserve University;University of Virginia", "aff_unique_dep": ";;", "aff_unique_url": "https://www.linkedin.com;https://www.case.edu;https://www.virginia.edu", "aff_unique_abbr": "LinkedIn;CWRU;UVA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Linear Regression using Heterogeneous Data Batches", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96681", "id": "4G2DN4Kjk1", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4G2DN4Kjk1", "openreview": "https://openreview.net/forum?id=4G2DN4Kjk1", "poster": "", "project": "", "author_site": "Ayush Jain, Rajat Sen, Weihao Kong, Abhimanyu Das, Alon Orlitsky", "tldr": "", "abstract": "In many learning applications, data are collected from multiple sources, each providing a \\emph{batch} of samples that by itself is insufficient to learn its input-output relationship. A common approach assumes that the sources fall in one of several unknown subgroups, each with an unknown input distribution and input-output relationship. We consider one of this setup's most fundamental and important manifestations where the output is a noisy linear combination of the inputs, and there are $k$ subgroups, each with its own regression vector. Prior work [KSS$^+$20] showed that with abundant small-batches, the regression vectors can be learned with only few, $\\tilde\\Omega( k^{3/2})$, batches of medium-size with $\\tilde\\Omega(\\sqrt k)$ samples each. However, the paper requires that the input distribution for all $k$ subgroups be isotropic Gaussian, and states that removing this assumption is an ``interesting and challenging problem\". We propose a novel gradient-based algorithm that improves on the existing results in several ways. It extends the applicability of the algorithm by: (1) allowing the subgroups' underlying input distributions to be different, unknown, and heavy-tailed; (2) recovering all subgroups followed by a significant proportion of batches even for infinite $k$; (3) removing the separation requirement between the regression vectors; (4) reducing the number of batches and allowing smaller batch sizes.", "keywords": "Machine Learning;Liner Regression;Heterogeneous Data;Mixed linear regression;List decodable learning", "primary_area": "learning_theory", "supplementary_material": "/attachment/0bab122a4ae24357735501519ad33843b5a8f449.zip", "author": "Ayush Jain;Rajat Sen;Weihao Kong;Abhimanyu Das;Alon Orlitsky", "authorids": "~Ayush_Jain4;~Rajat_Sen1;~Weihao_Kong1;~Abhimanyu_Das2;~Alon_Orlitsky1", "gender": "M;M;;M;M", "homepage": ";http://rajatsen91.github.io;https://weihaokong.github.io/;https://sites.google.com/site/abhidas/;http://ucsd.edu/alon", "dblp": ";http://dblp.uni-trier.de/pers/hd/s/Sen:Rajat;117/4343;83/6359;o/AlonOrlitsky", "google_scholar": "i1NrG8UAAAAJ;YzsCLBoAAAAJ;loxOHhoAAAAJ;;", "orcid": ";;;;", "linkedin": ";rajat-sen-a8702417/;;;", "or_profile": "~Ayush_Jain4;~Rajat_Sen1;~Weihao_Kong1;~Abhimanyu_Das2;~Alon_Orlitsky1", "aff": "Granica;Google;Google;Research, Google;University of California, San Diego", "aff_domain": "granica.ai;google.com;google.com;research.google.com;ucsd.edu", "position": "Researcher;Research Scientist;Researcher;Researcher;Full Professor", "bibtex": "@inproceedings{\njain2024linear,\ntitle={Linear Regression using Heterogeneous Data Batches},\nauthor={Ayush Jain and Rajat Sen and Weihao Kong and Abhimanyu Das and Alon Orlitsky},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4G2DN4Kjk1}\n}", "github": "", "reviewers": "D4Ej;pdPW;oZju;Xj2G;ntyP", "pdf_size": 590967, "rating": "7;7;7;7;7", "confidence": "4;3;4;3;3", "soundness": "3;4;3;4;4", "novelty": "4;3;3;3;3", "presentation": "3;2;4;3;3", "wc_summary": "89;68;208;58;79", "wc_strengths": "87;85;181;128;37", "wc_weaknesses": "239;331;111;126;52", "wc_questions": "2;10;14;153;55", "wc_limitations": "9;26;1;1;2", "wc_review": "426;520;515;466;225", "wc_reply_reviewers": "24;20;26;21;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.6, 0.4898979485566356 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 100.4, 54.796350243424065 ], "wc_strengths_avg": [ 103.6, 48.255984084878015 ], "wc_weaknesses_avg": [ 171.8, 100.00679976881571 ], "wc_questions_avg": [ 46.8, 56.183271531657894 ], "wc_limitations_avg": [ 7.8, 9.579144011862438 ], "wc_review_avg": [ 430.4, 108.31546519311081 ], "wc_reply_reviewers_avg": [ 18.2, 9.346657156438337 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8742328683525106410&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "granica.ai;google.com;google.com;research.google.com;ucsd.edu", "author_num": 5, "aff_unique_index": "0;1;1;1;2", "aff_unique_norm": "Granica;Google;University of California, San Diego", "aff_unique_dep": ";Google;", "aff_unique_url": ";https://www.google.com;https://www.ucsd.edu", "aff_unique_abbr": ";Google;UCSD", "aff_campus_unique_index": "1;1;1;2", "aff_campus_unique": ";Mountain View;San Diego", "aff_country_unique_index": "1;1;1;1", "aff_country_unique": ";United States" }, { "title": "Learnability Matters: Active Learning for Video Captioning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96680", "id": "4GP7S7U0lJ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4GP7S7U0lJ", "openreview": "https://openreview.net/forum?id=4GP7S7U0lJ", "poster": "/media/PosterPDFs/NeurIPS%202024/96680.png?t=1733759180.5876324", "project": "", "author_site": "Yiqian Zhang, Buyu Liu, Jun Bao, Qiang Huang, Min Zhang, Jun Yu", "tldr": "", "abstract": "This work focuses on the active learning in video captioning. In particular, we propose to address the learnability problem in active learning, which has been brought up by collective outliers in video captioning and neglected in the literature. To start with, we conduct a comprehensive study of collective outliers, exploring their hard-to-learn property and concluding that ground truth inconsistency is one of the main causes. Motivated by this, we design a novel active learning algorithm that takes three complementary aspects, namely learnability, diversity, and uncertainty, into account. Ideally, learnability is reflected by ground truth consistency. Under the active learning scenario where ground truths are not available until human involvement, we measure the consistency on estimated ground truths, where predictions from off-the-shelf models are utilized as approximations to ground truths. These predictions are further used to estimate sample frequency and reliability, evincing the diversity and uncertainty respectively. With the help of our novel caption-wise active learning protocol, our algorithm is capable of leveraging knowledge from humans in a more effective yet intellectual manner. Results on publicly available video captioning datasets with diverse video captioning models demonstrate that our algorithm outperforms SOTA active learning methods by a large margin, e.g. we achieve about 103% of full performance on CIDEr with 25% of human annotations on MSR-VTT.", "keywords": "Active Learning;Video Captioning", "primary_area": "active_learning", "supplementary_material": "", "author": "Yiqian Zhang;Buyu Liu;Jun Bao;Qiang Huang;Min Zhang;Jun Yu", "authorids": "~Yiqian_Zhang1;~Buyu_Liu3;~Jun_Bao1;~Qiang_Huang3;~Min_Zhang9;~Jun_Yu1", "gender": "M;F;M;M;M;M", "homepage": ";https://sites.google.com/site/buyuliu911/home;;https://sites.google.com/site/qianghuang2017/;https://zhangmin-nlp-ai.github.io/;http://faculty.hitsz.edu.cn/yujun", "dblp": ";147/2755;;80/2732-8.html;83/5342-5;50/5754-2.html", "google_scholar": "aa4yi-MAAAAJ;67HpPiEAAAAJ;https://scholar.google.com.au/citations?hl=en;y7q6ZxUAAAAJ;https://scholar.google.com/citations?hl=zh-CN;3XTEwtAAAAAJ", "orcid": ";;;0000-0003-1120-4685;;", "linkedin": ";;;https://www.linkedin.cn/in/qiang-huang-41b50880/;;", "or_profile": "~Yiqian_Zhang1;~Buyu_Liu3;~Jun_Bao1;~Qiang_Huang3;~Min_Zhang9;~Jun_Yu1", "aff": "Hangzhou Dianzi University;Zhejiang University;Zhejiang University;National University of Singapore;Harbin Institute of Technology, Shenzhen;Hangzhou Dianzi University", "aff_domain": "hdu.edu.cn;zju.edu.cn;zju.edu.cn;nus.edu.sg;hit.edu.cn;hdu.edu.cn", "position": "PhD student;Assistant Professor;Researcher;Postdoc;Full Professor;Full Professor", "bibtex": "@inproceedings{\nzhang2024learnability,\ntitle={Learnability Matters: Active Learning for Video Captioning},\nauthor={Yiqian Zhang and Buyu Liu and Jun Bao and Qiang Huang and Min Zhang and Jun Yu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4GP7S7U0lJ}\n}", "github": "", "reviewers": "JoyP;ZEo9;fZYA", "pdf_size": 1174383, "rating": "5;5;7", "confidence": "3;3;3", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "38;84;82", "wc_strengths": "132;83;147", "wc_weaknesses": "119;181;91", "wc_questions": "5;66;2", "wc_limitations": "9;2;2", "wc_review": "303;416;324", "wc_reply_reviewers": "22;92;4", "wc_reply_authors": "58;50;120", "reply_reviewers": "1;1;1", "reply_authors": "2;2;3", "rating_avg": [ 5.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 68.0, 21.228911104120876 ], "wc_strengths_avg": [ 120.66666666666667, 27.329267990359508 ], "wc_weaknesses_avg": [ 130.33333333333334, 37.606146069787876 ], "wc_questions_avg": [ 24.333333333333332, 29.48822740612863 ], "wc_limitations_avg": [ 4.333333333333333, 3.299831645537222 ], "wc_review_avg": [ 347.6666666666667, 49.073640808709335 ], "wc_reply_reviewers_avg": [ 39.333333333333336, 37.959042254631356 ], "wc_reply_authors_avg": [ 76.0, 31.283648551066843 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:5BspFIl5m2gJ:scholar.google.com/&scioq=Learnability+Matters:+Active+Learning+for+Video+Captioning&hl=en&as_sdt=0,14", "gs_version_total": 0, "email": "hdu.edu.cn;zju.edu.cn;zju.edu.cn;nus.edu.sg;hit.edu.cn;hdu.edu.cn", "author_num": 6, "aff_unique_index": "0;1;1;2;3;0", "aff_unique_norm": "Hangzhou Dianzi University;Zhejiang University;National University of Singapore;Harbin Institute of Technology", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.hdu.edu.cn/;https://www.zju.edu.cn;https://www.nus.edu.sg;http://en.hhit.edu.cn/", "aff_unique_abbr": "HGHDU;ZJU;NUS;HIT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;0;0;1;0;0", "aff_country_unique": "China;Singapore" }, { "title": "Instance-Specific Asymmetric Sensitivity in Differential Privacy", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96679", "id": "4I2aEav51N", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4I2aEav51N", "openreview": "https://openreview.net/forum?id=4I2aEav51N", "poster": "", "project": "", "tldr": "", "abstract": "We provide a new algorithmic framework for differentially private estimation of general functions that adapts to the hardness of the underlying dataset. We build upon previous work that gives a paradigm for selecting an output through the exponential mechanism based upon closeness of the inverse to the underlying dataset, termed the inverse sensitivity mechanism. Our framework will slightly modify the closeness metric and instead give a simple and efficient application of the sparse vector technique. While the inverse sensitivity mechanism was shown to be instance optimal, it was only with respect to a class of unbiased mechanisms such that the most likely outcome matches the underlying data. We break this assumption in order to more naturally navigate the bias-variance tradeoff, which will also critically allow for extending our method to unbounded data. In consideration of this tradeoff, we provide theoretical guarantees and empirical validation that our technique will be particularly effective when the distances to the underlying dataset are asymmetric. This asymmetry is inherent to a range of important problems including fundamental statistics such as variance, as well as commonly used machine learning performance metrics for both classification and regression tasks. We efficiently instantiate our method in $O(n)$ time for these problems and empirically show that our techniques will give substantially improved differentially private estimations.", "keywords": "Differential Privacy;Statistics;Machine Learning", "primary_area": "privacy", "supplementary_material": "", "author": "David Durfee", "authorids": "~David_Durfee1", "gender": "M", "homepage": "", "dblp": "155/9794", "google_scholar": "", "orcid": "", "linkedin": "", "or_profile": "~David_Durfee1", "aff": "", "aff_domain": "", "position": "", "bibtex": "@inproceedings{\ndurfee2024instancespecific,\ntitle={Instance-Specific Asymmetric Sensitivity in Differential Privacy},\nauthor={David Durfee},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4I2aEav51N}\n}", "github": "", "reviewers": "Zw6m;GRDP;55Ug;v2LJ;J6Z2", "pdf_size": 2623257, "rating": "5;5;6;7;7", "confidence": "2;4;4;4;3", "soundness": "3;2;2;3;3", "novelty": "2;2;2;3;3", "presentation": "3;3;2;3;3", "wc_summary": "70;164;49;200;84", "wc_strengths": "43;83;10;173;35", "wc_weaknesses": "27;165;138;176;42", "wc_questions": "23;98;100;78;41", "wc_limitations": "2;6;1;7;8", "wc_review": "165;516;298;634;210", "wc_reply_reviewers": "0;0;18;13;11", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.0, 0.8944271909999159 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 113.4, 58.23263689719022 ], "wc_strengths_avg": [ 68.8, 57.13982849116717 ], "wc_weaknesses_avg": [ 109.6, 62.733085369683515 ], "wc_questions_avg": [ 68.0, 30.9127805284481 ], "wc_limitations_avg": [ 4.8, 2.7856776554368237 ], "wc_review_avg": [ 364.6, 180.9282730807985 ], "wc_reply_reviewers_avg": [ 8.4, 7.227724399837061 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.2795084971874737, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:KOvngmjBVKsJ:scholar.google.com/&scioq=Instance-Specific+Asymmetric+Sensitivity+in+Differential+Privacy&hl=en&as_sdt=0,11", "gs_version_total": 3, "email": "", "author_num": 1 }, { "title": "Treeffuser: probabilistic prediction via conditional diffusions with gradient-boosted trees", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96678", "id": "4KeSvAvNMr", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4KeSvAvNMr", "openreview": "https://openreview.net/forum?id=4KeSvAvNMr", "poster": "/media/PosterPDFs/NeurIPS%202024/96678.png?t=1733814548.3963344", "project": "", "author_site": "Nicolas Beltran Velez, Alessandro A Grande, Achille Nazaret, Alp Kucukelbir, David Blei", "tldr": "", "abstract": "Probabilistic prediction aims to compute predictive distributions rather than single point predictions. These distributions enable practitioners to quantify uncertainty, compute risk, and detect outliers. However, most probabilistic methods assume parametric responses, such as Gaussian or Poisson distributions. When these assumptions fail, such models lead to bad predictions and poorly calibrated uncertainty. In this paper, we propose Treeffuser, an easy-to-use method for probabilistic prediction on tabular data. The idea is to learn a conditional diffusion model where the score function is estimated using gradient-boosted trees. The conditional diffusion model makes Treeffuser flexible and non-parametric, while the gradient-boosted trees make it robust and easy to train on CPUs. Treeffuser learns well-calibrated predictive distributions and can handle a wide range of regression tasks---including those with multivariate, multimodal, and skewed responses. We study Treeffuser on synthetic and real data and show that it outperforms existing methods, providing better calibrated probabilistic predictions. We further demonstrate its versatility with an application to inventory allocation under uncertainty using sales data from Walmart. We implement Treeffuser in https://github.com/blei-lab/treeffuser.", "keywords": "trees;gradient boosting;diffusion;probabilistic predictions;uncertainty quantification", "primary_area": "probabilistic_methods", "supplementary_material": "/attachment/56d11a7a66f0218ce56f61e74289e75c5693fd54.zip", "author": "Nicolas Beltran-Velez;Alessandro Antonio Grande;Achille Nazaret;Alp Kucukelbir;David Blei", "authorids": "~Nicolas_Beltran-Velez1;~Alessandro_Antonio_Grande1;~Achille_Nazaret1;~Alp_Kucukelbir1;~David_Blei2", "gender": "M;M;Not Specified;;M", "homepage": ";https://www.alessandrogrande.com/;;http://www.cs.columbia.edu/~alp;http://www.cs.columbia.edu/~blei/", "dblp": ";;241/4984;96/8054;86/1910", "google_scholar": ";;iDExfWQAAAAJ;ZZwLZ8UAAAAJ;https://scholar.google.com.tw/citations?user=8OYE6iEAAAAJ", "orcid": ";;;;", "linkedin": "nicolas-beltran-velez-1950141a9;;;;", "or_profile": "~Nicolas_Beltran-Velez1;~Alessandro_Antonio_Grande1;~Achille_Nazaret1;~Alp_Kucukelbir1;~David_Blei2", "aff": "Columbia University;Columbia University in the City of New York, Columbia University;Columbia University;Columbia University;Columbia University", "aff_domain": "columbia.edu;stat.columbia.edu;columbia.edu;columbia.edu;columbia.edu", "position": "PhD student;PhD student;PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nbeltran-velez2024treeffuser,\ntitle={Treeffuser: probabilistic prediction via conditional diffusions with gradient-boosted trees},\nauthor={Nicolas Beltran-Velez and Alessandro Antonio Grande and Achille Nazaret and Alp Kucukelbir and David Blei},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4KeSvAvNMr}\n}", "github": "", "reviewers": "3Apn;knMV;iWzb", "pdf_size": 968295, "rating": "5;6;6", "confidence": "3;4;4", "soundness": "3;3;3", "novelty": "2;4;3", "presentation": "3;3;4", "wc_summary": "57;84;15", "wc_strengths": "52;85;16", "wc_weaknesses": "85;145;19", "wc_questions": "135;51;28", "wc_limitations": "1;9;26", "wc_review": "330;374;104", "wc_reply_reviewers": "260;28;12", "wc_reply_authors": "280;0;0", "reply_reviewers": "1;1;1", "reply_authors": "2;1;1", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 52.0, 28.39013913315678 ], "wc_strengths_avg": [ 51.0, 28.178005607210743 ], "wc_weaknesses_avg": [ 83.0, 51.45872132107443 ], "wc_questions_avg": [ 71.33333333333333, 45.98792111945146 ], "wc_limitations_avg": [ 12.0, 10.424330514074594 ], "wc_review_avg": [ 269.3333333333333, 118.28026979265064 ], "wc_reply_reviewers_avg": [ 100.0, 113.32548992467082 ], "wc_reply_authors_avg": [ 93.33333333333333, 131.99326582148888 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.9999999999999997, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18013215557654766505&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "columbia.edu;stat.columbia.edu;columbia.edu;columbia.edu;columbia.edu", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Columbia University", "aff_unique_dep": "", "aff_unique_url": "https://www.columbia.edu", "aff_unique_abbr": "Columbia", "aff_campus_unique_index": "1", "aff_campus_unique": ";New York", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Differentially Private Optimization with Sparse Gradients", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96677", "id": "4Ktifp48WD", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4Ktifp48WD", "openreview": "https://openreview.net/forum?id=4Ktifp48WD", "poster": "/media/PosterPDFs/NeurIPS%202024/96677.png?t=1730244936.9223228", "project": "", "author_site": "Badih Ghazi, Crist\u00f3bal Guzm\u00e1n, Pritish Kamath, Ravi Kumar, Pasin Manurangsi", "tldr": "", "abstract": "Motivated by applications of large embedding models, we study differentially private (DP) optimization problems under sparsity of _individual_ gradients. We start with new near-optimal bounds for the classic mean estimation problem but with sparse data, improving upon existing algorithms particularly for the high-dimensional regime. The corresponding lower bounds are based on a novel block-diagonal construction that is combined with existing DP mean estimation lower bounds.\nNext, we obtain pure- and approximate-DP algorithms with almost optimal rates \nfor stochastic convex optimization with sparse gradients; the former represents the first nearly dimension-independent rates for this problem. Furthermore, by introducing novel analyses of bias reduction in mean estimation and randomly-stopped biased SGD we obtain nearly dimension-independent rates for near-stationary points for the empirical risk in nonconvex settings under approximate-DP.", "keywords": "Differential Privacy;Stochastic Optimization;Convex Optimization;Sparsity", "primary_area": "privacy", "supplementary_material": "", "author": "Badih Ghazi;Crist\u00f3bal A Guzm\u00e1n;Pritish Kamath;Ravi Kumar;Pasin Manurangsi", "authorids": "~Badih_Ghazi1;~Crist\u00f3bal_A_Guzm\u00e1n1;~Pritish_Kamath2;~Ravi_Kumar1;~Pasin_Manurangsi2", "gender": ";;M;M;M", "homepage": "https://sites.google.com/view/badihghazi/home;https://sites.google.com/view/cguzman/;https://pritishkamath.github.io/;https://sites.google.com/site/ravik53/;https://pasin30055.github.io/", "dblp": "125/2134;21/9888;https://dblp.org/pers/k/Kamath:Pritish.html;k/RaviKumar.html;133/2059", "google_scholar": "GBJLTN8AAAAJ;Rt9fMGEAAAAJ;1JFARhUAAAAJ;J_XhIsgAAAAJ;35hM-PkAAAAJ", "orcid": ";0000-0002-1498-2055;;0000-0002-2203-2586;", "linkedin": "badih-ghazi-608379132/;;;ravi-kumar-a3a9631;", "or_profile": "~Badih_Ghazi1;~Crist\u00f3bal_A_Guzm\u00e1n1;~Pritish_Kamath2;~Ravi_Kumar1;~Pasin_Manurangsi2", "aff": "Google;Pontificia Universidad Catolica de Chile;Google Research;Google;Google", "aff_domain": "google.com;uc.cl;google.com;google.com;google.com", "position": "Researcher;Associate Professor;Research Scientist;Research Scientist;Research Scientist", "bibtex": "@inproceedings{\nghazi2024differentially,\ntitle={Differentially Private Optimization with Sparse Gradients},\nauthor={Badih Ghazi and Crist{\\'o}bal A Guzm{\\'a}n and Pritish Kamath and Ravi Kumar and Pasin Manurangsi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4Ktifp48WD}\n}", "github": "", "reviewers": "GgyX;JAyy;9vLf;L9WR", "pdf_size": 551192, "rating": "5;6;6;7", "confidence": "4;3;3;3", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "2;3;2;3", "wc_summary": "63;21;219;75", "wc_strengths": "75;44;47;42", "wc_weaknesses": "456;34;68;27", "wc_questions": "2;37;140;13", "wc_limitations": "2;7;1;11", "wc_review": "598;143;475;168", "wc_reply_reviewers": "163;10;72;11", "wc_reply_authors": "271;0;220;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;2;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 94.5, 74.6240577829965 ], "wc_strengths_avg": [ 52.0, 13.397761006974262 ], "wc_weaknesses_avg": [ 146.25, 179.50539685480211 ], "wc_questions_avg": [ 48.0, 54.60311346434377 ], "wc_limitations_avg": [ 5.25, 4.02336923485777 ], "wc_review_avg": [ 346.0, 195.60035787288325 ], "wc_reply_reviewers_avg": [ 64.0, 62.42996075603444 ], "wc_reply_authors_avg": [ 122.75, 124.06727005943188 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11796044107415031117&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 5, "email": "google.com;uc.cl;google.com;google.com;google.com", "author_num": 5, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "Google;Pontificia Universidad Catolica de Chile", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://www.puc.cl", "aff_unique_abbr": "Google;PUC", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "United States;Chile" }, { "title": "Strategic Littlestone Dimension: Improved Bounds on Online Strategic Classification", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96676", "id": "4Lkzghiep1", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4Lkzghiep1", "openreview": "https://openreview.net/forum?id=4Lkzghiep1", "poster": "", "project": "", "author_site": "Saba Ahmadi, Kunhe Yang, Hanrui Zhang", "tldr": "", "abstract": "We study the problem of online binary classification in settings where strategic agents can modify their observable features to receive a positive classification. We model the set of feasible manipulations by a directed graph over the feature space, and assume the learner only observes the manipulated features instead of the original ones. We introduce the Strategic Littlestone Dimension, a new combinatorial measure that captures the joint complexity of the hypothesis class and the manipulation graph. We demonstrate that it characterizes the instance-optimal mistake bounds for deterministic learning algorithms in the realizable setting. We also achieve improved regret in the agnostic setting by a refined agnostic-to-realizable reduction that accounts for the additional challenge of not observing agents' original features. Finally, we relax the assumption that the learner knows the manipulation graph, instead assuming their knowledge is captured by a family of graphs. We derive regret bounds in both the realizable setting where all agents manipulate according to the same graph within the graph family, and the agnostic setting where the manipulation graphs are chosen adversarially and not consistently modeled by a single graph in the family.", "keywords": "online strategic classification;Littlestone dimension;manipulation graph", "primary_area": "online_learning", "supplementary_material": "", "author": "Saba Ahmadi;Kunhe Yang;Hanrui Zhang", "authorids": "~Saba_Ahmadi1;~Kunhe_Yang1;~Hanrui_Zhang1", "gender": "F;F;", "homepage": "https://sabaahmadi.github.io/;https://kunheyang.com/;", "dblp": "200/9551;267/5467;168/8847", "google_scholar": "https://scholar.google.com/citations?hl=en;-j0q9B4AAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Saba_Ahmadi1;~Kunhe_Yang1;~Hanrui_Zhang1", "aff": "Toyota Technological Institute at Chicago;University of California, Berkeley;Google Research", "aff_domain": "ttic.edu;berkeley.edu;google.com", "position": "Postdoc;PhD student;Researcher", "bibtex": "@inproceedings{\nahmadi2024strategic,\ntitle={Strategic Littlestone Dimension: Improved Bounds on Online Strategic Classification},\nauthor={Saba Ahmadi and Kunhe Yang and Hanrui Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4Lkzghiep1}\n}", "github": "", "reviewers": "fR1n;6Ry7;ZNn3;Mb38;wZa2", "pdf_size": 497947, "rating": "5;6;6;7;8", "confidence": "2;3;3;3;3", "soundness": "2;3;3;4;4", "novelty": "3;3;3;2;4", "presentation": "2;3;2;4;4", "wc_summary": "92;132;196;106;307", "wc_strengths": "46;45;27;24;138", "wc_weaknesses": "59;452;138;16;8", "wc_questions": "2;182;172;142;123", "wc_limitations": "9;2;5;42;184", "wc_review": "208;813;538;330;760", "wc_reply_reviewers": "5;4;30;17;4", "wc_reply_authors": "0;0;22;6;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;2;2;1", "rating_avg": [ 6.4, 1.0198039027185568 ], "confidence_avg": [ 2.8, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.0, 0.8944271909999159 ], "wc_summary_avg": [ 166.6, 78.75430146982448 ], "wc_strengths_avg": [ 56.0, 41.97618372363071 ], "wc_weaknesses_avg": [ 134.6, 165.2653623721559 ], "wc_questions_avg": [ 124.2, 64.61702562018775 ], "wc_limitations_avg": [ 48.4, 69.3068539179207 ], "wc_review_avg": [ 529.8, 235.26019637839292 ], "wc_reply_reviewers_avg": [ 12.0, 10.256705123966467 ], "wc_reply_authors_avg": [ 5.6, 8.522910301065007 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.6864064729836443, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11092269500230923534&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "ttic.edu;berkeley.edu;google.com", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Toyota Technological Institute at Chicago;University of California, Berkeley;Google", "aff_unique_dep": ";;Google Research", "aff_unique_url": "https://www.tti-chicago.org;https://www.berkeley.edu;https://research.google", "aff_unique_abbr": "TTI Chicago;UC Berkeley;Google Research", "aff_campus_unique_index": "0;1;2", "aff_campus_unique": "Chicago;Berkeley;Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Long-form factuality in large language models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96675", "id": "4M9f8VMt2C", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4M9f8VMt2C", "openreview": "https://openreview.net/forum?id=4M9f8VMt2C", "poster": "/media/PosterPDFs/NeurIPS%202024/96675.png?t=1733179780.94617", "project": "", "author_site": "Jerry Wei, Chengrun Yang, Xinying Song, Yifeng Lu, Nathan Hu, Jie Huang, Dustin Tran, Daiyi Peng, Ruibo Liu, Da Huang, Cosmo Du, Quoc V Le", "tldr": "", "abstract": "Large language models (LLMs) often generate content that contains factual errors when responding to fact-seeking prompts on open-ended topics. To benchmark a model\u2019s long-form factuality in open domains, we first use GPT-4 to generate LongFact, a prompt set comprising thousands of questions spanning 38 topics. We then propose that LLM agents can be used as automated evaluators for long-form factuality through a method which we call Search-Augmented Factuality Evaluator (SAFE). SAFE utilizes an LLM to break down a long-form response into a set of individual facts and to evaluate the accuracy of each fact using a multi-step reasoning process comprising sending search queries to Google Search and determining whether a fact is supported by the search results. Furthermore, we propose extending F1 score as an aggregated metric for long-form factuality. To do so, we balance the percentage of supported facts in a response (precision) with the percentage of provided facts relative to a hyperparameter representing a user\u2019s preferred response length (recall).\n\nEmpirically, we demonstrate that LLM agents can outperform crowdsourced human annotators\u2014on a set of\u223c16k individual facts, SAFE agrees with crowdsourced human annotators 72% of the time, and on a random subset of 100 disagreement cases, SAFE wins 76% of the time. At the same time, SAFE is more than 20 times cheaper than human annotators. We also benchmark thirteen language models on LongFact across four model families (Gemini, GPT, Claude, and PaLM-2), finding that larger language models generally achieve better long-form factuality. LongFact, SAFE, and all experimental code are available at https://github.com/google-deepmind/long-form-factuality.", "keywords": "natural language processing;machine learning;factuality", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Jerry Wei;Chengrun Yang;Xinying Song;Yifeng Lu;Nathan Zixia Hu;Jie Huang;Dustin Tran;Daiyi Peng;Ruibo Liu;Da Huang;Cosmo Du;Quoc V Le", "authorids": "~Jerry_Wei1;~Chengrun_Yang1;~Xinying_Song1;~Yifeng_Lu1;~Nathan_Zixia_Hu1;~Jie_Huang3;~Dustin_Tran1;~Daiyi_Peng1;~Ruibo_Liu1;~Da_Huang2;~Cosmo_Du1;~Quoc_V_Le1", "gender": "M;M;M;M;;;M;M;;M;M;M", "homepage": "https://chengrunyang.github.io/;;;;https://jeffhj.github.io/;http://dustintran.com;http://www.daiyip.org;https://www.cs.dartmouth.edu/~rbliu/;;http://cosmodu.com;;https://www.jerrywei.net", "dblp": "225/4721;12/6134;69/8051;;29/6643-9;;;;;;29/6166;234/9076", "google_scholar": "XYYhXe4AAAAJ;e7SbG_UAAAAJ;CM4o-cgAAAAJ;;GIoPkMoAAAAJ;wVazIm8AAAAJ;_8Egwg8AAAAJ;5lgfeo4AAAAJ;ZjuMpLoAAAAJ;https://scholar.google.com/citations?view_op=list_works;;Y4sk3aMAAAAJ", "orcid": ";;;;;;;;;;;0009-0001-5279-0177", "linkedin": ";;;nathan-hu-6598111a9/;jie-huang-4b0104151/;;;;;;;jerryweiai/", "or_profile": "~Chengrun_Yang1;~Xinying_Song1;~Yifeng_Lu1;~Nathan_Zixia_Hu1;~Jie_Huang3;~Dustin_Tran1;~Daiyi_Peng1;~Ruibo_Liu1;~Da_Huang2;~Cosmo_Du1;~Quoc_V_Le1;~Jerry_Weng_Wei1", "aff": "Google;Google;Google Deepmind;;University of Illinois, Urbana Champaign;Google;;Google DeepMind;Google;Google;Google;Google DeepMind", "aff_domain": "google.com;google.com;google.com;;illinois.edu;google.com;;google.com;google.com;google.com;google.com;google.com", "position": "Researcher;Software Engineer;Researcher;;PhD student;Research Scientist;;Researcher;Researcher;Researcher;Scientist;Researcher", "bibtex": "@inproceedings{\nwei2024longform,\ntitle={Long-form factuality in large language models},\nauthor={Jerry Wei and Chengrun Yang and Xinying Song and Yifeng Lu and Nathan Zixia Hu and Jie Huang and Dustin Tran and Daiyi Peng and Ruibo Liu and Da Huang and Cosmo Du and Quoc V Le},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4M9f8VMt2C}\n}", "github": "", "reviewers": "PEa7;2mDt;5G2Y;3E4x;zrX1", "pdf_size": 719607, "rating": "6;7;7;8;8", "confidence": "4;4;4;3;4", "soundness": "3;3;4;4;3", "novelty": "3;3;3;4;4", "presentation": "3;3;4;4;4", "wc_summary": "110;81;62;95;193", "wc_strengths": "94;56;26;127;79", "wc_weaknesses": "167;150;35;81;125", "wc_questions": "16;46;4;276;90", "wc_limitations": "1;15;4;36;1", "wc_review": "388;348;131;615;488", "wc_reply_reviewers": "0;147;0;11;0", "wc_reply_authors": "0;817;0;0;0", "reply_reviewers": "0;2;0;1;0", "reply_authors": "1;2;1;1;1", "rating_avg": [ 7.2, 0.7483314773547882 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 3.4, 0.4898979485566356 ], "presentation_avg": [ 3.6, 0.4898979485566356 ], "wc_summary_avg": [ 108.2, 45.26102075738019 ], "wc_strengths_avg": [ 76.4, 34.15611219093883 ], "wc_weaknesses_avg": [ 111.6, 48.01499765698213 ], "wc_questions_avg": [ 86.4, 99.33700216938298 ], "wc_limitations_avg": [ 11.4, 13.335666462535722 ], "wc_review_avg": [ 394.0, 160.62253889165117 ], "wc_reply_reviewers_avg": [ 31.6, 57.85706525567988 ], "wc_reply_authors_avg": [ 163.4, 326.8 ], "reply_reviewers_avg": [ 0.6, 0.7999999999999999 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": -0.5345224838248487, "gs_citation": 62, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6806404591299778006&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "google.com;google.com;google.com;;illinois.edu;google.com;;google.com;google.com;google.com;google.com;google.com", "author_num": 12, "aff_unique_index": "0;0;1;2;0;0;0;0;0;0", "aff_unique_norm": "Google;DeepMind;University of Illinois Urbana-Champaign", "aff_unique_dep": "Google;DeepMind;", "aff_unique_url": "https://www.google.com;https://deepmind.com;https://illinois.edu", "aff_unique_abbr": "Google;DeepMind;UIUC", "aff_campus_unique_index": "0;0;2;0;0;0;0", "aff_campus_unique": "Mountain View;;Urbana-Champaign", "aff_country_unique_index": "0;0;1;0;0;1;0;0;0;1", "aff_country_unique": "United States;United Kingdom" }, { "title": "Aligning Embeddings and Geometric Random Graphs: Informational Results and Computational Approaches for the Procrustes-Wasserstein Problem", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96674", "id": "4NGlu45uyt", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4NGlu45uyt", "openreview": "https://openreview.net/forum?id=4NGlu45uyt", "poster": "", "project": "", "author_site": "Mathieu Even, Luca Ganassali, Jakob Maier, Laurent Massouli\u00e9", "tldr": "", "abstract": "The Procrustes-Wasserstein problem consists in matching two high-dimensional point clouds in an unsupervised setting, and has many applications in natural language processing and computer vision. \nWe consider a planted model with two datasets $X,Y$ that consist of $n$ datapoints in $\\mathbb{R}^d$, where $Y$ is a noisy version of $X$, up to an orthogonal transformation and a relabeling of the data points. \nThis setting is related to the graph alignment problem in geometric models.\nIn this work, we focus on the euclidean transport cost between the point clouds as a measure of performance for the alignment. We first establish information-theoretic results, in the high ($d \\gg \\log n$) and low ($d \\ll \\log n$) dimensional regimes. \nWe then study computational aspects and propose the \u2018Ping-Pong algorithm', alternatively estimating the orthogonal transformation and the relabeling, initialized via a Franke-Wolfe convex relaxation. We give sufficient conditions for the method to retrieve the planted signal after one single step. We provide experimental results to compare the proposed approach with the state-of-the-art method of Grave et al. (2019).", "keywords": "unsupervised learning;alignment of embeddings;high dimensional statistics", "primary_area": "learning_theory", "supplementary_material": "", "author": "Mathieu Even;Luca Ganassali;Jakob Maier;Laurent Massouli\u00e9", "authorids": "~Mathieu_Even1;~Luca_Ganassali1;~Jakob_Maier1;~Laurent_Massouli\u00e91", "gender": "M;M;M;", "homepage": "https://scholar.google.com/citations?user=Mn8_1hQAAAAJ&hl=fr;https://lganassali.github.io;;https://www.di.ens.fr/laurent.massoulie/", "dblp": "277/9882;255/7249;;58/4130", "google_scholar": ";afDIpsYAAAAJ;;https://scholar.google.fr/citations?user=TvVmLjUAAAAJ", "orcid": ";;;", "linkedin": ";luca-ganassali-a54b23129/?originalSubdomain=fr;jakob-maier-2a412514b/;", "or_profile": "~Mathieu_Even1;~Luca_Ganassali1;~Jakob_Maier1;~Laurent_Massouli\u00e91", "aff": "INRIA Paris;EPFL - EPF Lausanne;;INRIA", "aff_domain": "inria.fr;epfl.ch;;inria.fr", "position": "PhD student;Postdoc;;Full Professor", "bibtex": "@inproceedings{\neven2024aligning,\ntitle={Aligning Embeddings and Geometric Random Graphs: Informational Results and Computational Approaches for the Procrustes-Wasserstein Problem},\nauthor={Mathieu Even and Luca Ganassali and Jakob Maier and Laurent Massouli{\\'e}},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4NGlu45uyt}\n}", "github": "", "reviewers": "dVzJ;JfWm;6nbD;ERNU", "pdf_size": 604447, "rating": "5;6;7;8", "confidence": "3;3;3;3", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;4;4", "wc_summary": "85;89;110;311", "wc_strengths": "52;116;71;51", "wc_weaknesses": "84;76;12;108", "wc_questions": "317;57;19;32", "wc_limitations": "9;18;2;9", "wc_review": "547;356;214;511", "wc_reply_reviewers": "260;16;0;47", "wc_reply_authors": "724;0;0;0", "reply_reviewers": "2;1;0;1", "reply_authors": "3;1;1;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 148.75, 94.15512466137996 ], "wc_strengths_avg": [ 72.5, 26.348624252510795 ], "wc_weaknesses_avg": [ 70.0, 35.4964786985977 ], "wc_questions_avg": [ 106.25, 122.44054679721093 ], "wc_limitations_avg": [ 9.5, 5.678908345800274 ], "wc_review_avg": [ 407.0, 132.5386736013304 ], "wc_reply_reviewers_avg": [ 80.75, 104.8603237645202 ], "wc_reply_authors_avg": [ 181.0, 313.5011961699668 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4856507687830391969&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "inria.fr;epfl.ch;;inria.fr", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "INRIA;EPFL", "aff_unique_dep": ";", "aff_unique_url": "https://www.inria.fr;https://www.epfl.ch", "aff_unique_abbr": "INRIA;EPFL", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Paris;Lausanne;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "France;Switzerland" }, { "title": "The Dormant Neuron Phenomenon in Multi-Agent Reinforcement Learning Value Factorization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96673", "id": "4NGrHrhJPx", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4NGrHrhJPx", "openreview": "https://openreview.net/forum?id=4NGrHrhJPx", "poster": "/media/PosterPDFs/NeurIPS%202024/96673.png?t=1731570657.4853327", "project": "", "author_site": "Haoyuan Qin, Chennan Ma, Deng, Zhengzhu Liu, Songzhu Mei, Xinwang Liu, Cheng Wang, Siqi Shen", "tldr": "", "abstract": "In this work, we study the dormant neuron phenomenon in multi-agent reinforcement learning value factorization, where the mixing network suffers from reduced network expressivity caused by an increasing number of inactive neurons. We demonstrate the presence of the dormant neuron phenomenon across multiple environments and algorithms, and show that this phenomenon negatively affects the learning process. We show that dormant neurons correlates with the existence of over-active neurons, which have large activation scores. To address the dormant neuron issue, we propose ReBorn, a simple but effective method that transfers the weights from over-active neurons to dormant neurons. We theoretically show that this method can ensure the learned action preferences are not forgotten after the weight-transferring procedure, which increases learning effectiveness. Our extensive experiments reveal that ReBorn achieves promising results across various environments and improves the performance of multiple popular value factorization approaches. The source code of ReBorn is available in \\url{https://github.com/xmu-rl-3dv/ReBorn}.", "keywords": "dormant neurons; Multi-agent reinforcement learning", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/e0b1c6ceeff1c2042046a1264cd417c13ffb2ec3.zip", "author": "Haoyuan Qin;Chennan Ma;Mian Deng;Zhengzhu Liu;Songzhu Mei;Xinwang Liu;Cheng Wang;Siqi Shen", "authorids": "~Haoyuan_Qin1;~Chennan_Ma1;~Mian_Deng2;~Zhengzhu_Liu1;~Songzhu_Mei1;~Xinwang_Liu1;~Cheng_Wang2;~Siqi_Shen5", "gender": "M;M;M;M;M;M;M;M", "homepage": "https://asc.xmu.edu.cn/t/qinhaoyuan;https://asc.xmu.edu.cn/t/machennan;https://asc.xmu.edu.cn/m/t/dengmian;https://asc.xmu.edu.cn/t/liuzhengzhu;;https://xinwangliu.github.io/;https://chwang.xmu.edu.cn/index_en.htm;https://asc.xmu.edu.cn/t/shensiqi", "dblp": ";187/9101;396/5834;;11/10487;45/6569-2.html;54/2062-3;37/8026", "google_scholar": "32lBQlsAAAAJ;ROidaW4AAAAJ;;;;A56vWC4AAAAJ;https://scholar.google.com/citations?hl=en;gFKYanAAAAAJ", "orcid": ";;;;0000-0002-4926-5953;;0000-0001-6075-796X;", "linkedin": ";;;;;;;", "or_profile": "~Haoyuan_Qin1;~Chennan_Ma1;~Mian_Deng2;~Zhengzhu_Liu1;~Songzhu_Mei1;~Xinwang_Liu1;~Cheng_Wang2;~Siqi_SHEN2", "aff": "Xiamen University;Xiamen University;Xiamen University;Xiamen University;National University of Defense Technology;National University of Defense Technology;Xiamen University;Xiamen University", "aff_domain": "xmu.edu.cn;xmu.edu.cn;xmu.edu.cn;xmu.edu.cn;nudt.edu.cn;nudt.edu.cn;xmu.edu.cn;xmu.edu.cn", "position": "MS student;MS student;MS student;Undergrad student;Associate Professor;Full Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nqin2024the,\ntitle={The Dormant Neuron Phenomenon in Multi-Agent Reinforcement Learning Value Factorization},\nauthor={Haoyuan Qin and Chennan Ma and Mian Deng and Zhengzhu Liu and Songzhu Mei and Xinwang Liu and Cheng Wang and Siqi Shen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4NGrHrhJPx}\n}", "github": "", "reviewers": "JB6L;KvCg;WpXH;hhg9", "pdf_size": 3608114, "rating": "5;5;6;7", "confidence": "4;4;4;3", "soundness": "3;2;3;4", "novelty": "3;3;3;4", "presentation": "2;3;3;2", "wc_summary": "102;85;55;87", "wc_strengths": "92;190;38;61", "wc_weaknesses": "187;420;43;123", "wc_questions": "6;60;34;27", "wc_limitations": "1;1;1;12", "wc_review": "388;756;171;310", "wc_reply_reviewers": "0;303;116;18", "wc_reply_authors": "49;1182;338;0", "reply_reviewers": "0;1;2;1", "reply_authors": "2;3;3;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 82.25, 17.049560111627514 ], "wc_strengths_avg": [ 95.25, 57.96281135348768 ], "wc_weaknesses_avg": [ 193.25, 140.50333625932163 ], "wc_questions_avg": [ 31.75, 19.292161620720474 ], "wc_limitations_avg": [ 3.75, 4.763139720814412 ], "wc_review_avg": [ 406.25, 216.37048666581123 ], "wc_reply_reviewers_avg": [ 109.25, 120.25675656693889 ], "wc_reply_authors_avg": [ 392.25, 473.9010313346026 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9854669045095986545&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": "xmu.edu.cn;xmu.edu.cn;xmu.edu.cn;xmu.edu.cn;nudt.edu.cn;nudt.edu.cn;xmu.edu.cn;xmu.edu.cn", "author_num": 8, "aff_unique_index": "0;0;0;0;1;1;0;0", "aff_unique_norm": "Xiamen University;National University of Defense Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.xmu.edu.cn;http://www.nudt.edu.cn/", "aff_unique_abbr": "XMU;NUDT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "LLM Evaluators Recognize and Favor Their Own Generations", "status": "Oral", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96672", "id": "4NJBV6Wp0h", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4NJBV6Wp0h", "openreview": "https://openreview.net/forum?id=4NJBV6Wp0h", "poster": "/media/PosterPDFs/NeurIPS%202024/96672.png?t=1732572730.1297684", "project": "", "author_site": "Arjun Panickssery, Samuel Bowman, Shi Feng", "tldr": "", "abstract": "Self-evaluation using large language models (LLMs) has proven valuable not only in benchmarking but also methods like reward modeling, constitutional AI, and self-refinement. But new biases are introduced due to the same LLM acting as both the evaluator and the evaluatee. One such bias is self-preference, where an LLM evaluator scores its own outputs higher than others\u2019 while human annotators consider them of equal quality. But do LLMs actually recognize their own outputs when they give those texts higher scores, or is it just a coincidence? In this paper, we investigate if self-recognition capability contributes to self-preference. We discover that, out of the box, LLMs such as GPT-4 and Llama 2 have non-trivial accuracy at distinguishing themselves from other LLMs and humans. By finetuning LLMs, we discover a linear correlation between self-recognition capability and the strength of self-preference bias; using controlled experiments, we show that the causal explanation resists straightforward confounders. We discuss how self-recognition can interfere with unbiased evaluations and AI safety more generally.", "keywords": "LLMs;evaluations;benchmarking;situational-awareness", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/c023e1d2578dfc18a113afbe92c413bda7c6bbec.zip", "author": "Arjun Panickssery;Samuel R. Bowman;Shi Feng", "authorids": "~Arjun_Panickssery1;~Samuel_R._Bowman1;~Shi_Feng1", "gender": ";;M", "homepage": "https://twitter.com/panickssery;;https://ihsgnef.github.io/", "dblp": ";;97/1374.html", "google_scholar": ";;d0npq2oAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Arjun_Panickssery1;~Samuel_R._Bowman1;~Shi_Feng1", "aff": ";;New York University", "aff_domain": ";;nyu.edu", "position": ";;Postdoc", "bibtex": "@inproceedings{\npanickssery2024llm,\ntitle={{LLM} Evaluators Recognize and Favor Their Own Generations},\nauthor={Arjun Panickssery and Samuel R. Bowman and Shi Feng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4NJBV6Wp0h}\n}", "github": "", "reviewers": "gQGf;g5q5;jTdx;48mP", "pdf_size": 981738, "rating": "5;6;7;9", "confidence": "3;4;4;3", "soundness": "2;3;3;3", "novelty": "2;2;3;4", "presentation": "3;3;4;4", "wc_summary": "126;65;77;65", "wc_strengths": "56;57;62;33", "wc_weaknesses": "182;101;74;27", "wc_questions": "243;86;228;68", "wc_limitations": "13;1;1;4", "wc_review": "620;310;442;197", "wc_reply_reviewers": "9;21;23;13", "wc_reply_authors": "639;268;519;175", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 1.479019945774904 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 83.25, 25.16321720289359 ], "wc_strengths_avg": [ 52.0, 11.20267825120404 ], "wc_weaknesses_avg": [ 96.0, 56.2716624954337 ], "wc_questions_avg": [ 156.25, 79.68178901104066 ], "wc_limitations_avg": [ 4.75, 4.9180788932265 ], "wc_review_avg": [ 392.25, 157.50615067355307 ], "wc_reply_reviewers_avg": [ 16.5, 5.722761571129799 ], "wc_reply_authors_avg": [ 400.25, 186.63517219431068 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.16903085094570333, "gs_citation": 127, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14332547633814164817&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": ";;nyu.edu", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "New York University", "aff_unique_dep": "", "aff_unique_url": "https://www.nyu.edu", "aff_unique_abbr": "NYU", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Private Edge Density Estimation for Random Graphs: Optimal, Efficient and Robust", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96671", "id": "4NQ24cHnOi", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4NQ24cHnOi", "openreview": "https://openreview.net/forum?id=4NQ24cHnOi", "poster": "", "project": "", "author_site": "Hongjie Chen, Jingqiu Ding, Yiding Hua, David Steurer", "tldr": "", "abstract": "We give the first polynomial-time, differentially node-private, and robust algorithm for estimating the edge density of Erd\u0151s-R\u00e9nyi random graphs and their generalization, inhomogeneous random graphs. We further prove information-theoretical lower bounds, showing that the error rate of our algorithm is optimal up to logarithmic factors. Previous algorithms incur either exponential running time or suboptimal error rates.\n\nTwo key ingredients of our algorithm are (1) a new sum-of-squares algorithm for robust edge density estimation, and (2) the reduction from privacy to robustness based on sum-of-squares exponential mechanisms due to Hopkins et al. (STOC 2023).", "keywords": "differential privacy;robustness;random graph;sum of squares;average-case complexity", "primary_area": "privacy", "supplementary_material": "", "author": "Hongjie Chen;Jingqiu Ding;Yiding Hua;David Steurer", "authorids": "~Hongjie_Chen2;~Jingqiu_Ding1;~Yiding_Hua1;~David_Steurer1", "gender": ";M;M;", "homepage": "https://chen-hj.github.io;;https://huayiding.github.io;", "dblp": "80/4761-4;;317/1111;", "google_scholar": "bPRICwkAAAAJ;;;", "orcid": ";;;", "linkedin": ";;yiding-hua-b3a076117;", "or_profile": "~Hongjie_Chen2;~Jingqiu_Ding1;~Yiding_Hua1;~David_Steurer1", "aff": "Department of Computer Science, ETHZ - ETH Zurich;ETHZ - ETH Zurich;ETH Zurich;", "aff_domain": "inf.ethz.ch;ethz.ch;ethz.ch;", "position": "PhD student;PhD student;PhD student;", "bibtex": "@inproceedings{\nchen2024private,\ntitle={Private Edge Density Estimation for Random Graphs: Optimal, Efficient and Robust},\nauthor={Hongjie Chen and Jingqiu Ding and Yiding Hua and David Steurer},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4NQ24cHnOi}\n}", "github": "", "reviewers": "97Rf;ikor;Se7o;teKp;ZgW7", "pdf_size": 464418, "rating": "6;6;7;7;8", "confidence": "3;3;3;2;3", "soundness": "3;4;3;3;4", "novelty": "2;3;3;3;4", "presentation": "3;3;3;3;4", "wc_summary": "188;175;44;97;214", "wc_strengths": "54;36;69;23;208", "wc_weaknesses": "54;63;73;108;45", "wc_questions": "113;37;35;49;79", "wc_limitations": "5;2;8;15;3", "wc_review": "414;313;229;292;549", "wc_reply_reviewers": "5;12;10;8;8", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.8, 0.7483314773547882 ], "confidence_avg": [ 2.8, 0.39999999999999997 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 143.6, 63.253774590928565 ], "wc_strengths_avg": [ 78.0, 66.85207551003933 ], "wc_weaknesses_avg": [ 68.6, 21.78623418583395 ], "wc_questions_avg": [ 62.6, 29.702525145179155 ], "wc_limitations_avg": [ 6.6, 4.673328578219168 ], "wc_review_avg": [ 359.4, 111.91889920831066 ], "wc_reply_reviewers_avg": [ 8.6, 2.33238075793812 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.13363062095621223, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4600798594871718684&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 4, "email": "inf.ethz.ch;ethz.ch;ethz.ch;", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "ETH Zurich", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.ethz.ch", "aff_unique_abbr": "ETHZ", "aff_campus_unique_index": "0", "aff_campus_unique": "Zurich;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Switzerland" }, { "title": "A Method for Evaluating Hyperparameter Sensitivity in Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96670", "id": "4OJdZhcwBb", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4OJdZhcwBb", "openreview": "https://openreview.net/forum?id=4OJdZhcwBb", "poster": "", "project": "", "author_site": "Jacob Adkins, Michael Bowling, Adam White", "tldr": "", "abstract": "The performance of modern reinforcement learning algorithms critically relies\non tuning ever increasing numbers of hyperparameters. Often, small changes in\na hyperparameter can lead to drastic changes in performance, and different environments require very different hyperparameter settings to achieve state-of-the-art\nperformance reported in the literature. We currently lack a scalable and widely\naccepted approach to characterizing these complex interactions. This work proposes a new empirical methodology for studying, comparing, and quantifying the\nsensitivity of an algorithm\u2019s performance to hyperparameter tuning for a given set\nof environments. We then demonstrate the utility of this methodology by assessing\nthe hyperparameter sensitivity of several commonly used normalization variants of\nPPO. The results suggest that several algorithmic performance improvements may,\nin fact, be a result of an increased reliance on hyperparameter tuning.", "keywords": "Reinforcement Learning;Hyperparameters;Empirical Methodology", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Jacob Adkins;Michael Bowling;Adam White", "authorids": "~Jacob_Adkins1;~Michael_Bowling1;~Adam_White1", "gender": ";M;", "homepage": ";https://webdocs.cs.ualberta.ca/~bowling/;", "dblp": ";71/5161;", "google_scholar": ";https://scholar.google.ca/citations?user=PYtPCHoAAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Jacob_Adkins1;~Michael_Bowling1;~Adam_White1", "aff": ";Department of Computing Science, University of Alberta;", "aff_domain": ";cs.ualberta.ca;", "position": ";Full Professor;", "bibtex": "@inproceedings{\nadkins2024a,\ntitle={A Method for Evaluating Hyperparameter Sensitivity in Reinforcement Learning},\nauthor={Jacob Adkins and Michael Bowling and Adam White},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4OJdZhcwBb}\n}", "github": "", "reviewers": "ydma;yuTN;aSZK;6Bue", "pdf_size": 894568, "rating": "4;4;7;7", "confidence": "4;3;4;4", "soundness": "2;2;2;3", "novelty": "3;2;3;3", "presentation": "3;2;3;3", "wc_summary": "111;107;164;139", "wc_strengths": "95;115;70;137", "wc_weaknesses": "719;377;47;242", "wc_questions": "65;241;361;119", "wc_limitations": "153;114;1;26", "wc_review": "1143;954;643;663", "wc_reply_reviewers": "291;225;243;56", "wc_reply_authors": "139;197;12;75", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 130.25, 23.05834989759675 ], "wc_strengths_avg": [ 104.25, 24.732316915323562 ], "wc_weaknesses_avg": [ 346.25, 245.1054619954439 ], "wc_questions_avg": [ 196.5, 114.38859208854701 ], "wc_limitations_avg": [ 73.5, 62.195257053894395 ], "wc_review_avg": [ 850.75, 208.8544648792551 ], "wc_reply_reviewers_avg": [ 203.75, 88.64923857541022 ], "wc_reply_authors_avg": [ 105.75, 69.22201600647008 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5773502691896258, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15564551758627252317&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": ";cs.ualberta.ca;", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "University of Alberta", "aff_unique_dep": "Department of Computing Science", "aff_unique_url": "https://www.ualberta.ca", "aff_unique_abbr": "UAlberta", "aff_country_unique_index": "0", "aff_country_unique": "Canada" }, { "id": "4RTxKGUUgc", "title": "Show, Don't Tell: Evaluating Large Language Models Beyond Textual Understanding with ChildPlay", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "The evaluation of Large Language Models (LLMs) often focuses on linguistic tasks, yet such assessments may not fully capture the models' general reasoning capabilities. We explore the hypothesis that LLMs, such as GPT-3.5 and GPT-4, possess broader cognitive functions, particularly in non-linguistic domains. Our approach extends beyond standard linguistic benchmarks by incorporating games like Tic-Tac-Toe, Connect Four, and Battleship, encoded via ASCII, to assess strategic thinking and decision-making. To evaluate the models' ability to generalize beyond their training data, we introduce two additional games. The first game, LEGO Connect Language (LCL), tests the models' capacity to understand spatial logic and follow assembly instructions. The second game, the game of shapes, challenges the models to identify shapes represented by 1s within a matrix of zeros, further testing their spatial reasoning skills. This \"show, don't tell\" strategy uses games to potentially reveal cognitive capabilities rather than simply querying the models. Our results indicate that despite their proficiency on standard benchmarks and temperature settings, GPT-3.5 and GPT-4's abilities to play and reason about fully observable games without pre-training is mediocre. Both models fail to anticipate losing moves in Tic-Tac-Toe and Connect Four, and they are unable to play Battleship correctly. While GPT-4 shows some success in the game of shapes, both models struggle with the assembly tasks presented in the LCL game. These results suggest that while LLMs like the GPT models can emulate conversational proficiency and basic rule comprehension, their performance in strategic gameplay and spatial reasoning tasks is limited in cognitive flexibility and generalization. Importantly, this reveals a blind spot in current LLM benchmarks that we highlight with our gameplay benchmark suite ChildPlay ($$\\href{https://github.com/child-play-neurips/child-play}{GitHub Repository}$$). Our findings provide a cautionary tale about claims of emergent intelligence and reasoning capabilities of LLMs that are roughly the size of GPT-3.5 and GPT-4", "keywords": "Large Language Models;Benchmarking;Gameplay Benchmarks;Non-linguistic Reasoning;Spatial Logic;Zero-shot Learning", "primary_area": "", "supplementary_material": "/attachment/10fab0e78aebf77abf5d6c5066caf8803627dcb6.zip", "author": "Gon\u00e7alo Hora de Carvalho;Robert Pollice;Oscar Knap", "authorids": "~Gon\u00e7alo_Hora_de_Carvalho1;~Robert_Pollice1;~Oscar_Knap1", "gender": "M;M;M", "homepage": ";https://pollicegroup.web.rug.nl/;", "dblp": ";286/1696.html;", "google_scholar": "DJMVpwwAAAAJ;https://scholar.google.ch/citations?user=JR2N3JIAAAAJ;", "orcid": "0000-0001-8776-4852;0000-0001-8836-6266;", "linkedin": "gon%C3%A7alo-carvalho-66909713b/;https://linkedin.com/in/robert-pollice-b0246a131;oscar-knap-415737236", "or_profile": "~Gon\u00e7alo_Hora_de_Carvalho1;~Robert_Pollice1;~Oscar_Knap1", "aff": "University Medical Center Groningen;University of Groningen;University of Groningen", "aff_domain": "umcg.nl;rug.nl;rug.nl", "position": "Instructor;Assistant Professor;Researcher", "bibtex": "@misc{\nanonymous2024show,\ntitle={Show, Don't Tell: Evaluating Large Language Models Beyond Textual Understanding with ChildPlay},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=4RTxKGUUgc}\n}", "github": "", "project": "", "reviewers": "4ccX;DMgg;E51U;MtWS", "site": "https://openreview.net/forum?id=4RTxKGUUgc", "pdf_size": 2703499, "rating": "4;5;5;5", "confidence": "4;4;4;3", "wc_summary_and_contributions": "47;115;96;73", "wc_strengths": "28;89;7;8", "wc_improvement": "26;231;178;4", "wc_limitations": "16;8;7;69", "wc_correctness": "9;132;13;16", "wc_clarity": "12;194;1;27", "wc_relation_to_prior_work": "1;104;1;15", "wc_documentation": "1;37;1;10", "wc_additional_feedback": "1;1;1;1", "wc_review": "141;911;305;223", "wc_reply_reviewers": "0;53;20;0", "wc_reply_authors": "928;883;1055;790", "reply_reviewers": "0;1;1;0", "reply_authors": "10;10;9;5", "rating_avg": [ 4.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 82.75, 25.439880109780393 ], "wc_strengths_avg": [ 33.0, 33.39910178432947 ], "wc_improvement_avg": [ 109.75, 96.89781989291606 ], "wc_limitations_avg": [ 25.0, 25.64176280991617 ], "wc_correctness_avg": [ 42.5, 51.73248495867949 ], "wc_clarity_avg": [ 58.5, 78.77340921910134 ], "wc_relation_to_prior_work_avg": [ 30.25, 42.96146529158427 ], "wc_documentation_avg": [ 12.25, 14.7542366796795 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 395.0, 303.50288301760827 ], "wc_reply_reviewers_avg": [ 18.25, 21.660736367907717 ], "wc_reply_authors_avg": [ 914.0, 95.4122633627355 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 8.5, 2.0615528128088303 ], "replies_avg": [ 43, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9625015083972805732&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 2, "aff_unique_index": "0;1;1", "aff_unique_norm": "University Medical Center Groningen;University of Groningen", "aff_unique_dep": ";", "aff_unique_url": "https://www.umcg.nl;https://www.rug.nl", "aff_unique_abbr": "UMCG;RUG", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Netherlands" }, { "title": "AgentBoard: An Analytical Evaluation Board of Multi-turn LLM Agents", "status": "Oral", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97853", "id": "4S8agvKjle", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4S8agvKjle", "openreview": "https://openreview.net/forum?id=4S8agvKjle", "poster": "/media/PosterPDFs/NeurIPS%202024/97853.png?t=1733415180.5722172", "project": "", "author_site": "Ma Chang, Junlei Zhang, Zhihao Zhu, Cheng Yang, Yujiu Yang, Yaohui Jin, Zhenzhong Lan, Lingpeng Kong, Junxian He", "tldr": "", "abstract": "Evaluating large language models (LLMs) as general-purpose agents is essential for understanding their capabilities and facilitating their integration into practical applications. However, the evaluation process presents substantial challenges. A primary obstacle is the benchmarking of agent performance across diverse scenarios within a unified framework, especially in maintaining partially-observable environments and ensuring multi-round interactions. Moreover, current evaluation frameworks mostly focus on the final success rate, revealing few insights during the process and failing to provide a deep understanding of the model abilities. To address these challenges, we introduce AgentBoard, a pioneering comprehensive benchmark and accompanied open-source evaluation framework tailored to analytical evaluation of LLM agents. AgentBoard offers a fine-grained progress rate metric that captures incremental advancements as well as a comprehensive evaluation toolkit that features easy assessment of agents for multi-faceted analysis through interactive visualization. This not only sheds light on the capabilities and limitations of LLM agents but also propels the interpretability of their performance to the forefront. Ultimately, AgentBoard serves as a significant step towards demystifying agent behaviors and accelerating the development of stronger LLM agents.", "keywords": "LLM Agent; LLM Benchmark; Planning", "primary_area": "", "supplementary_material": "/attachment/77a9d8dd44a0798cce3d52ed732f003d028707ee.zip", "author": "Chang Ma;Junlei Zhang;Zhihao Zhu;Cheng Yang;Yujiu Yang;Yaohui Jin;Zhenzhong Lan;Lingpeng Kong;Junxian He", "authorids": "~Chang_Ma2;~Junlei_Zhang1;~Zhihao_Zhu1;~Cheng_Yang7;~Yujiu_Yang2;~Yaohui_Jin2;~Zhenzhong_Lan2;~Lingpeng_Kong1;~Junxian_He1", "gender": ";M;;;M;M;;M;M", "homepage": ";;;;https://sites.google.com/view/iigroup-thu;http://front.sjtu.edu.cn/~jinyh/;;https://ikekonglp.github.io/;https://jxhe.github.io", "dblp": ";197/3153.html;;;30/3847;27/7040;27/3780;144/7656;188/6127.html", "google_scholar": ";;;;4gH3sxsAAAAJ;H_7_oVcAAAAJ;tlDABkgAAAAJ;f1hBi5wAAAAJ;BIFGeoUAAAAJ", "orcid": ";;;;0000-0002-6427-1024;0000-0001-6158-6277;;;", "linkedin": ";;;;;yaohui-jin-bab58511/;;;", "or_profile": "~Chang_Ma2;~Junlei_Zhang1;~Zhihao_Zhu1;~Cheng_Yang7;~Yujiu_Yang2;~Yaohui_Jin2;~Zhenzhong_Lan2;~Lingpeng_Kong1;~Junxian_He1", "aff": ";Westlake University;;;Tsinghua University;Shanghai Jiaotong University;Westlake University;Department of Computer Science, The University of Hong Kong;Hong Kong University of Science and Technology", "aff_domain": ";westlake.edu;;;tsinghua.edu.cn;sjtu.edu.cn;westlake.edu.cn;cs.hku.hk;ust.hk", "position": ";PhD student;;;Full Professor;Full Professor;Assistant Professor;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nma2024agentboard,\ntitle={AgentBoard: An Analytical Evaluation Board of Multi-turn {LLM} Agents},\nauthor={Chang Ma and Junlei Zhang and Zhihao Zhu and Cheng Yang and Yujiu Yang and Yaohui Jin and Zhenzhong Lan and Lingpeng Kong and Junxian He},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=4S8agvKjle}\n}", "github": "", "reviewers": "sNFC;HfY5;h7Ld;n2d6", "pdf_size": 3986704, "rating": "7;7;7;8", "confidence": "4;3;3;5", "wc_summary_and_contributions": "87;64;42;137", "wc_strengths": "59;101;67;2", "wc_improvement": "77;107;203;2", "wc_limitations": "41;150;30;1", "wc_correctness": "9;25;28;2", "wc_clarity": "9;11;10;1", "wc_relation_to_prior_work": "14;15;15;1", "wc_documentation": "13;9;24;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "310;483;420;148", "wc_reply_reviewers": "0;5;15;53", "wc_reply_authors": "0;27;12;27", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;4", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "wc_summary_and_contributions_avg": [ 82.5, 35.259750424527965 ], "wc_strengths_avg": [ 57.25, 35.58352849282937 ], "wc_improvement_avg": [ 97.25, 72.04295593602473 ], "wc_limitations_avg": [ 55.5, 56.482298111886344 ], "wc_correctness_avg": [ 16.0, 10.8397416943394 ], "wc_clarity_avg": [ 7.75, 3.960744879438715 ], "wc_relation_to_prior_work_avg": [ 11.25, 5.931905258852336 ], "wc_documentation_avg": [ 11.75, 8.287792227125388 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 340.25, 127.09519070366117 ], "wc_reply_reviewers_avg": [ 18.25, 20.777090749188154 ], "wc_reply_authors_avg": [ 16.5, 11.324751652906125 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14762366677548506867&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": ";westlake.edu;;;tsinghua.edu.cn;sjtu.edu.cn;westlake.edu.cn;cs.hku.hk;ust.hk", "author_num": 9, "aff_unique_index": "0;1;2;0;3;4", "aff_unique_norm": "Westlake University;Tsinghua University;Shanghai Jiao Tong University;University of Hong Kong;Hong Kong University of Science and Technology", "aff_unique_dep": ";;;Department of Computer Science;", "aff_unique_url": "https://www.westlake.edu.cn;https://www.tsinghua.edu.cn;https://www.sjtu.edu.cn;https://www.hku.hk;https://www.ust.hk", "aff_unique_abbr": "WU;THU;SJTU;HKU;HKUST", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "On the Complexity of Teaching a Family of Linear Behavior Cloning Learners", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96669", "id": "4SAR7IRqmB", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4SAR7IRqmB", "openreview": "https://openreview.net/forum?id=4SAR7IRqmB", "poster": "", "project": "", "author_site": "Shubham Bharti, Stephen Wright, Adish Singla, Jerry Zhu", "tldr": "", "abstract": "We study optimal teaching for a family of Behavior Cloning learners that learn using a linear hypothesis class. In this setup, a knowledgeable teacher can demonstrate a dataset of state and action tuples and is required to teach an optimal policy to an entire family of BC learners using the smallest possible dataset. We analyze the linear family and design a novel teaching algorithm called `TIE' that achieves the instance optimal Teaching Dimension for the entire family. However, we show that this problem is NP-hard for action spaces with $|\\mathcal{A}| > 2$ and provide an efficient approximation algorithm with a $\\log(|\\mathcal{A}| - 1)$ guarantee on the optimal teaching size. We present empirical results to demonstrate the effectiveness of our algorithm and compare it to various baselines in different teaching environments.", "keywords": "Machine Teaching;Behavior Cloning;Reinforcement Learning;Supervised Learning", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/17d515ae014b64037314fa675729df8d006247c2.zip", "author": "Shubham Kumar Bharti;Stephen Wright;Adish Singla;Jerry Zhu", "authorids": "~Shubham_Kumar_Bharti1;~Stephen_Wright1;~Adish_Singla2;~Jerry_Zhu1", "gender": "M;M;;M", "homepage": "http://skbharti.github.io;https://wrightstephen.github.io/sw_proj/;https://machineteaching.mpi-sws.org/adishsingla.html;http://pages.cs.wisc.edu/~jerryzhu/", "dblp": "255/5381;75/2677;58/657;z/XiaojinZhu", "google_scholar": ";VFQRIOwAAAAJ;kXz2seUAAAAJ;https://scholar.google.com.tw/citations?user=hqTu-QcAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Shubham_Kumar_Bharti1;~Stephen_Wright1;~Adish_Kumar_Singla1;~Xiaojin_Zhu1", "aff": "Department of Computer Science, University of Wisconsin - Madison;University of Wisconsin, Madison;Max Planck Institute for Software Systems (MPI-SWS);University of Wisconsin, Madison", "aff_domain": "cs.wisc.edu;wisc.edu;mpi-sws.org;wisc.edu", "position": "PhD student;Full Professor;Researcher;Associate Professor", "bibtex": "@inproceedings{\nbharti2024on,\ntitle={On the Complexity of Teaching a Family of Linear Behavior Cloning Learners},\nauthor={Shubham Kumar Bharti and Stephen Wright and Adish Singla and Jerry Zhu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4SAR7IRqmB}\n}", "github": "", "reviewers": "d9NQ;UxoA;sX37;pEtD", "pdf_size": 992154, "rating": "4;5;6;7", "confidence": "5;2;4;2", "soundness": "2;3;3;4", "novelty": "2;2;3;3", "presentation": "2;3;3;4", "wc_summary": "404;80;116;196", "wc_strengths": "126;55;69;20", "wc_weaknesses": "197;231;93;54", "wc_questions": "10;191;34;20", "wc_limitations": "40;33;4;2", "wc_review": "777;590;316;292", "wc_reply_reviewers": "44;104;0;0", "wc_reply_authors": "309;566;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "3;3;1;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.25, 1.299038105676658 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 199.0, 125.58264211267415 ], "wc_strengths_avg": [ 67.5, 38.20013088982812 ], "wc_weaknesses_avg": [ 143.75, 72.59261326057907 ], "wc_questions_avg": [ 63.75, 73.96071592406336 ], "wc_limitations_avg": [ 19.75, 16.946607330082326 ], "wc_review_avg": [ 493.75, 201.11734758593053 ], "wc_reply_reviewers_avg": [ 37.0, 42.649736224272246 ], "wc_reply_authors_avg": [ 218.75, 236.87061341584777 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 1.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.6024640760767093, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:yDtKK5-a6GsJ:scholar.google.com/&scioq=On+the+Complexity+of+Teaching+a+Family+of+Linear+Behavior+Cloning+Learners&hl=en&as_sdt=0,23", "gs_version_total": 2, "email": "cs.wisc.edu;wisc.edu;mpi-sws.org;wisc.edu", "author_num": 4, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "University of Wisconsin-Madison;University of Wisconsin;Max Planck Institute for Software Systems", "aff_unique_dep": "Department of Computer Science;;", "aff_unique_url": "https://www.wisc.edu;https://www.wisc.edu;https://www.mpi-sws.org", "aff_unique_abbr": "UW-Madison;UW;MPI-SWS", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Madison;", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;Germany" }, { "title": "iVideoGPT: Interactive VideoGPTs are Scalable World Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96668", "id": "4TENzBftZR", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4TENzBftZR", "openreview": "https://openreview.net/forum?id=4TENzBftZR", "poster": "/media/PosterPDFs/NeurIPS%202024/96668.png?t=1731853176.2225065", "project": "", "author_site": "Jialong Wu, Shaofeng Yin, Ningya Feng, Xu He, Dong Li, Jianye Hao, Mingsheng Long", "tldr": "", "abstract": "World models empower model-based agents to interactively explore, reason, and plan within imagined environments for real-world decision-making. However, the high demand for interactivity poses challenges in harnessing recent advancements in video generative models for developing world models at scale. This work introduces Interactive VideoGPT (iVideoGPT), a scalable autoregressive transformer framework that integrates multimodal signals\u2014visual observations, actions, and rewards\u2014into a sequence of tokens, facilitating an interactive experience of agents via next-token prediction. iVideoGPT features a novel compressive tokenization technique that efficiently discretizes high-dimensional visual observations. Leveraging its scalable architecture, we are able to pre-train iVideoGPT on millions of human and robotic manipulation trajectories, establishing a versatile foundation that is adaptable to serve as interactive world models for a wide range of downstream tasks. These include action-conditioned video prediction, visual planning, and model-based reinforcement learning, where iVideoGPT achieves competitive performance compared with state-of-the-art methods. Our work advances the development of interactive general world models, bridging the gap between generative video models and practical model-based reinforcement learning applications. Code and pre-trained models are available at https://thuml.github.io/iVideoGPT.", "keywords": "world model;model-based reinforcement learning;video prediction;visual planning", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Jialong Wu;Shaofeng Yin;Ningya Feng;Xu He;Dong Li;Jianye HAO;Mingsheng Long", "authorids": "~Jialong_Wu1;~Shaofeng_Yin2;~Ningya_Feng1;~Xu_He2;~Dong_Li10;~Jianye_HAO1;~Mingsheng_Long5", "gender": "M;M;M;M;M;M;M", "homepage": "https://manchery.github.io/;https://github.com/operator22th;https://github.com/fny21;https://scholar.google.com/citations?user=308KqrIAAAAJ&hl=en;;http://www.icdai.org/jianye.html;http://ise.thss.tsinghua.edu.cn/~mlong", "dblp": "73/498-1.html;;358/4461;89/3991;47/4826-16;21/7664.html;74/9023", "google_scholar": "FfTZ66gAAAAJ;https://scholar.google.com/citations?hl=en;;308KqrIAAAAJ;;;_MjXpXkAAAAJ", "orcid": "0009-0008-7846-053X;;0009-0006-8448-2570;;;0000-0002-0422-8235;0000-0002-5412-9120", "linkedin": ";;;;;;", "or_profile": "~Jialong_Wu1;~Shaofeng_Yin2;~Ningya_Feng1;~Xu_He2;~Dong_Li10;~Jianye_HAO1;~Mingsheng_Long2", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;Huawei Technologies Ltd.;Huawei Technologies Ltd.;Tianjin University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;mails.tsinghua.edu.cn;tsinghua.edu.cn;huawei.com;huawei.com;tju.edu.cn;tsinghua.edu.cn", "position": "PhD student;Undergrad student;Undergrad student;Researcher;Principal Researcher;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nwu2024ivideogpt,\ntitle={iVideo{GPT}: Interactive Video{GPT}s are Scalable World Models},\nauthor={Jialong Wu and Shaofeng Yin and Ningya Feng and Xu He and Dong Li and Jianye HAO and Mingsheng Long},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4TENzBftZR}\n}", "github": "", "reviewers": "HkK9;6ajb;zSnp;yh7R", "pdf_size": 21611413, "rating": "5;6;6;7", "confidence": "4;3;4;3", "soundness": "3;2;2;4", "novelty": "2;4;3;4", "presentation": "3;2;3;4", "wc_summary": "63;72;130;51", "wc_strengths": "63;127;31;23", "wc_weaknesses": "212;726;254;6", "wc_questions": "48;66;44;35", "wc_limitations": "92;11;6;4", "wc_review": "478;1002;465;119", "wc_reply_reviewers": "0;889;12;0", "wc_reply_authors": "0;388;27;0", "reply_reviewers": "0;2;1;0", "reply_authors": "1;3;2;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 79.0, 30.37268509697488 ], "wc_strengths_avg": [ 61.0, 40.93897898091744 ], "wc_weaknesses_avg": [ 299.5, 263.5199233454655 ], "wc_questions_avg": [ 48.25, 11.277743568639961 ], "wc_limitations_avg": [ 28.25, 36.89427462357811 ], "wc_review_avg": [ 516.0, 315.37675881396206 ], "wc_reply_reviewers_avg": [ 225.25, 383.24755380824024 ], "wc_reply_authors_avg": [ 103.75, 164.48157191612682 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.7071067811865476, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6996981249348524814&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "tsinghua.edu.cn;mails.tsinghua.edu.cn;tsinghua.edu.cn;huawei.com;huawei.com;tju.edu.cn;tsinghua.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;1;1;2;0", "aff_unique_norm": "Tsinghua University;Huawei;Tianjin University", "aff_unique_dep": ";Huawei Technologies;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.huawei.com;http://www.tju.edu.cn", "aff_unique_abbr": "THU;Huawei;TJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Introspective Planning: Aligning Robots' Uncertainty with Inherent Task Ambiguity", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96667", "id": "4TlUE0ufiz", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4TlUE0ufiz", "openreview": "https://openreview.net/forum?id=4TlUE0ufiz", "poster": "", "project": "", "author_site": "Kaiqu Liang, Zixu Zhang, Jaime Fisac", "tldr": "", "abstract": "Large language models (LLMs) exhibit advanced reasoning skills, enabling robots to comprehend natural language instructions and strategically plan high-level actions through proper grounding. However, LLM hallucination may result in robots confidently executing plans that are misaligned with user goals or even unsafe in critical scenarios. Additionally, inherent ambiguity in natural language instructions can introduce uncertainty into the LLM's reasoning and planning. We propose introspective planning, a systematic approach that guides LLMs to refine their own uncertainty in alignment with inherent task ambiguity. Our approach constructs a knowledge base containing introspective reasoning examples as post-hoc rationalizations of human-selected safe and compliant plans, which are retrieved during deployment. Evaluations on three tasks, including a new safe mobile manipulation benchmark, indicate that introspection substantially improves both compliance and safety over state-of-the-art LLM-based planning methods. Additionally, we empirically show that introspective planning, in combination with conformal prediction, achieves tighter confidence bounds, maintaining statistical success guarantees while minimizing unnecessary user clarification requests.", "keywords": "Large Language Models;Conformal Prediction;Uncertainty Quantification;Foundation Models for Decision Making", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/a7a718feecbbf45b8b7f156a7b9c10493025770c.zip", "author": "Kaiqu Liang;Zixu Zhang;Jaime Fern\u00e1ndez Fisac", "authorids": "~Kaiqu_Liang1;~Zixu_Zhang1;~Jaime_Fern\u00e1ndez_Fisac1", "gender": ";M;M", "homepage": "https://kaiquliang.github.io/;https://zzx9636.github.io/;https://ee.princeton.edu/people/jaime-fernandez-fisac", "dblp": ";https://dblp.org/rec/conf/rss/ZhangF21.html;156/0109", "google_scholar": "hmqvdJgAAAAJ;J1ub1esAAAAJ;iAq_9tEAAAAJ", "orcid": ";;0000-0002-2676-5090", "linkedin": "kaiqu-liang-00042b195/?originalSubdomain=ca;;jaime-fisac-134341b0/", "or_profile": "~Kaiqu_Liang1;~Zixu_Zhang1;~Jaime_Fern\u00e1ndez_Fisac1", "aff": "Princeton University;Princeton University;Princeton University", "aff_domain": "princeton.edu;princeton.edu;princeton.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nliang2024introspective,\ntitle={Introspective Planning: Aligning Robots' Uncertainty with Inherent Task Ambiguity},\nauthor={Kaiqu Liang and Zixu Zhang and Jaime Fern{\\'a}ndez Fisac},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4TlUE0ufiz}\n}", "github": "", "reviewers": "euid;QHHV;Gr1N;RRQ3;QU2A", "pdf_size": 3253892, "rating": "3;5;6;7;7", "confidence": "5;4;4;1;3", "soundness": "2;3;3;3;3", "novelty": "2;2;2;4;3", "presentation": "3;3;3;4;4", "wc_summary": "52;95;72;42;123", "wc_strengths": "32;60;101;18;78", "wc_weaknesses": "155;71;274;1;75", "wc_questions": "9;4;102;1;34", "wc_limitations": "9;7;92;9;9", "wc_review": "257;237;641;71;319", "wc_reply_reviewers": "0;18;37;0;55", "wc_reply_authors": "0;10;10;0;37", "reply_reviewers": "0;1;1;0;1", "reply_authors": "1;2;2;1;2", "rating_avg": [ 5.6, 1.4966629547095764 ], "confidence_avg": [ 3.4, 1.3564659966250538 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.8 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 76.8, 29.376180827330156 ], "wc_strengths_avg": [ 57.8, 30.095846889562686 ], "wc_weaknesses_avg": [ 115.2, 93.18025541926788 ], "wc_questions_avg": [ 30.0, 37.836490323495916 ], "wc_limitations_avg": [ 25.2, 33.408980828513755 ], "wc_review_avg": [ 305.0, 186.9630979632077 ], "wc_reply_reviewers_avg": [ 22.0, 21.438283513378586 ], "wc_reply_authors_avg": [ 11.4, 13.558761005342635 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.807813166395078, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10919631825755854532&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "princeton.edu;princeton.edu;princeton.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Princeton University", "aff_unique_dep": "", "aff_unique_url": "https://www.princeton.edu", "aff_unique_abbr": "Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "AV-GS: Learning Material and Geometry Aware Priors for Novel View Acoustic Synthesis", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96666", "id": "4U18ZoRXTD", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4U18ZoRXTD", "openreview": "https://openreview.net/forum?id=4U18ZoRXTD", "poster": "/media/PosterPDFs/NeurIPS%202024/96666.png?t=1733151540.7388813", "project": "", "author_site": "Swapnil Bhosale, Haosen Yang, Diptesh Kanojia, Jiankang Deng, Xiatian Zhu", "tldr": "", "abstract": "Novel view acoustic synthesis (NVAS) aims to render binaural audio at any target viewpoint, given a mono audio emitted by a sound source at a 3D scene. Existing methods have proposed NeRF-based implicit models to exploit visual cues as a condition for synthesizing binaural audio. However, in addition to low efficiency originating from heavy NeRF rendering, these methods all have a limited ability of characterizing the entire scene environment such as room geometry, material properties, and the spatial relation between the listener and sound source. To address these issues, we propose a novel Audio-Visual Gaussian Splatting (AV-GS) model. To obtain a material-aware and geometry-aware condition for audio synthesis, we learn an explicit point-based scene representation with audio-guidance parameters on locally initialized Gaussian points, taking into account the space relation from the listener and sound source. To make the visual scene model audio adaptive, we propose a point densification and pruning strategy to optimally distribute the Gaussian points, with the per-point contribution in sound propagation (e.g., more points needed for texture-less wall surfaces as they affect sound path diversion). Extensive experiments validate the superiority of our AV-GS over existing alternatives on the real-world RWAS and simulation-based SoundSpaces datasets. Project page: \\url{https://surrey-uplab.github.io/research/avgs/}", "keywords": "Spatial audio synthesis;Gaussian Splatting;Material characteristics;Geometry priors", "primary_area": "speech_and_audio", "supplementary_material": "", "author": "Swapnil Bhosale;Haosen Yang;Diptesh Kanojia;Jiankang Deng;Xiatian Zhu", "authorids": "~Swapnil_Bhosale1;~Haosen_Yang1;~Diptesh_Kanojia1;~Jiankang_Deng1;~Xiatian_Zhu3", "gender": "M;M;M;M;", "homepage": "https://swapb94.github.io/;;http://dipteshkanojia.github.io;https://jiankangdeng.github.io/;https://x-up-lab.github.io", "dblp": "246/3229;245/9949-3;127/0183;156/7808;128/7935", "google_scholar": "FsO6e24AAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.co.in/citations?user=UNCgCAEAAAAJ;Z_UoQFsAAAAJ;ZbA-z1cAAAAJ", "orcid": ";;0000-0001-8814-0080;0000-0002-3709-6216;0000-0002-9284-2955", "linkedin": ";;dipteshkanojia/;jiankang-deng-b45b21b4/?originalSubdomain=uk;", "or_profile": "~Swapnil_Bhosale1;~Haosen_Yang1;~Diptesh_Kanojia1;~Jiankang_Deng1;~Xiatian_Zhu3", "aff": "University of Surrey;University of Surrey;University of Surrey;Imperial College London;University of Surrey", "aff_domain": "surrey.ac.uk;surrey.ac.uk;surrey.ac.uk;imperial.ac.uk;surrey.ac.uk", "position": "PhD student;PhD student;Lecturer;Lecturer;Associate Professor", "bibtex": "@inproceedings{\nbhosale2024avgs,\ntitle={{AV}-{GS}: Learning Material and Geometry Aware Priors for Novel View Acoustic Synthesis},\nauthor={Swapnil Bhosale and Haosen Yang and Diptesh Kanojia and Jiankang Deng and Xiatian Zhu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4U18ZoRXTD}\n}", "github": "", "reviewers": "RHwj;DaxK;m6xj;hPZr", "pdf_size": 5149134, "rating": "5;6;6;7", "confidence": "4;5;4;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "2;2;3;2", "wc_summary": "82;102;69;63", "wc_strengths": "117;45;71;51", "wc_weaknesses": "254;247;208;351", "wc_questions": "62;40;101;315", "wc_limitations": "16;7;16;68", "wc_review": "531;441;465;848", "wc_reply_reviewers": "156;41;117;20", "wc_reply_authors": "30;19;51;17", "reply_reviewers": "1;1;2;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 79.0, 14.949916387726054 ], "wc_strengths_avg": [ 71.0, 28.24889378365107 ], "wc_weaknesses_avg": [ 265.0, 52.65453446760307 ], "wc_questions_avg": [ 129.5, 109.3034766144243 ], "wc_limitations_avg": [ 26.75, 24.097458372201828 ], "wc_review_avg": [ 571.25, 163.14468272058394 ], "wc_reply_reviewers_avg": [ 83.5, 55.26526938322114 ], "wc_reply_authors_avg": [ 29.25, 13.497684986693088 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15073327819478531543&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "surrey.ac.uk;surrey.ac.uk;surrey.ac.uk;imperial.ac.uk;surrey.ac.uk", "author_num": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "University of Surrey;Imperial College London", "aff_unique_dep": ";", "aff_unique_url": "https://www.surrey.ac.uk;https://www.imperial.ac.uk", "aff_unique_abbr": "Surrey;ICL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Provably Optimal Memory Capacity for Modern Hopfield Models: Transformer-Compatible Dense Associative Memories as Spherical Codes", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96665", "id": "4UReW4Ez6s", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4UReW4Ez6s", "openreview": "https://openreview.net/forum?id=4UReW4Ez6s", "poster": "/media/PosterPDFs/NeurIPS%202024/96665.png?t=1733535372.8953798", "project": "", "author_site": "Jerry Yao-Chieh Hu, Dennis Wu, Han Liu", "tldr": "", "abstract": "We study the optimal memorization capacity of modern Hopfield models and Kernelized Hopfield Models (KHMs), a transformer-compatible class of Dense Associative Memories.\nWe present a tight analysis by establishing a connection between the memory configuration of KHMs and spherical codes from information theory. \nSpecifically, we treat the stored memory set as a specialized spherical code.\nThis enables us to cast the memorization problem in KHMs into a point arrangement problem on a hypersphere.\nWe show that the optimal capacity of KHMs occurs when the feature space allows memories to form an optimal spherical code.\nThis unique perspective leads to: \n1. An analysis of how KHMs achieve optimal memory capacity, and identify corresponding necessary conditions. \nImportantly, we establish an upper capacity bound that matches the well-known exponential lower bound in the literature. \nThis provides the first tight and optimal asymptotic memory capacity for modern Hopfield models.\n2. A sub-linear time algorithm $\\mathtt{U}\\text{-}\\mathtt{Hop}$+ to reach KHMs' optimal capacity. \n3. An analysis of the scaling behavior of the required feature dimension relative to the number of stored memories.\n\nThese efforts improve both the retrieval capability of KHMs and the representation learning of corresponding transformers.\nExperimentally, we provide thorough numerical results to back up theoretical findings.", "keywords": "Modern Hopfield Models;Dense Associative Memory;Transformer;Transformer Representation Learning;Memory Capacity;Foundation Model", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "/attachment/875f9e03071e7eb6b55172e369ef0b99aea63267.zip", "author": "Jerry Yao-Chieh Hu;Dennis Wu;Han Liu", "authorids": "~Jerry_Yao-Chieh_Hu1;~Dennis_Wu1;~Han_Liu4", "gender": ";;", "homepage": ";;", "dblp": ";;", "google_scholar": ";;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Jerry_Yao-Chieh_Hu1;~Dennis_Wu1;~Han_Liu4", "aff": ";;Northwestern University", "aff_domain": ";;u.northwestern.edu", "position": ";;Associate Professor", "bibtex": "@inproceedings{\nhu2024provably,\ntitle={Provably Optimal Memory Capacity for Modern Hopfield Models: Transformer-Compatible Dense Associative Memories as Spherical Codes},\nauthor={Jerry Yao-Chieh Hu and Dennis Wu and Han Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4UReW4Ez6s}\n}", "github": "", "reviewers": "U6Ut;UbwM;vthX;LSVx", "pdf_size": 2704681, "rating": "5;5;6;7", "confidence": "5;5;4;3", "soundness": "3;3;3;4", "novelty": "3;3;2;4", "presentation": "3;2;3;4", "wc_summary": "41;82;53;180", "wc_strengths": "102;46;39;46", "wc_weaknesses": "107;74;155;25", "wc_questions": "22;179;68;97", "wc_limitations": "1;16;32;57", "wc_review": "273;397;347;405", "wc_reply_reviewers": "13;224;40;80", "wc_reply_authors": "2;499;45;23", "reply_reviewers": "1;3;1;1", "reply_authors": "2;4;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 89.0, 54.61226968365259 ], "wc_strengths_avg": [ 58.25, 25.420218331084413 ], "wc_weaknesses_avg": [ 90.25, 47.420327919574746 ], "wc_questions_avg": [ 91.5, 57.159863540774836 ], "wc_limitations_avg": [ 26.5, 20.74246851269154 ], "wc_review_avg": [ 355.5, 52.561868307738074 ], "wc_reply_reviewers_avg": [ 89.25, 81.36760719106836 ], "wc_reply_authors_avg": [ 142.25, 206.53011281650916 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13069877967498919382&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 2, "email": ";;u.northwestern.edu", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "Northwestern University", "aff_unique_dep": "", "aff_unique_url": "https://www.northwestern.edu", "aff_unique_abbr": "NU", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Rethinking Memory and Communication Costs for Efficient Data Parallel Training of Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96664", "id": "4Un2TD9bNe", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4Un2TD9bNe", "openreview": "https://openreview.net/forum?id=4Un2TD9bNe", "poster": "/media/PosterPDFs/NeurIPS%202024/96664.png?t=1731392943.3680232", "project": "", "author_site": "Hanxiao Zhang, Lin JU, Chan Wu, Jinjing Huang, Youshao Xiao, Zhenglei Zhou, Zhiming fan, Zhaoxin Huan, Siyuan Li, Fanzhuang Meng, Lei Liang, Xiaolu Zhang, Jun Zhou", "tldr": "", "abstract": "Recently, various strategies for distributed training of large language models (LLMs) have been proposed.\nBy categorizing them into basic strategies and composite strategies, we have discovered that existing basic strategies provide limited options in specific scenarios, leaving considerable room for optimization in training speed.\nIn this paper, we rethink the impact of memory and communication costs on the training speed of LLMs, taking into account the impact of intra- and inter-group communication performance disparities, and then propose a new set of basic strategies named the \\textbf{Pa}rtial \\textbf{R}edundancy \\textbf{O}ptimizer (PaRO).\nPaRO Data Parallelism (PaRO-DP) accelerates LLM training through refined model state partitioning and tailored training procedures. At the same time, PaRO Collective Communications (PaRO-CC) speeds up collective communication operations by rearranging the topology. We also propose a guideline for choosing different DP strategies based on simple quantitative calculations, which yields minimal ranking errors.\nOur experiments demonstrate that PaRO improves the training speed of LLMs by up to 266\\% that of ZeRO-3 as basic DP strategies.\nMoreover, employing PaRO-CC independently for model parallel strategies, such as Megatron, can also boost the training speed by 17\\%.", "keywords": "Large Language Model;Distributed Training;Communication Topology", "primary_area": "infrastructure", "supplementary_material": "", "author": "Hanxiao Zhang;Lin JU;Chan Wu;Jinjing Huang;Youshao Xiao;Zhenglei Zhou;Zhiming fan;Zhaoxin Huan;Siyuan Li;Fanzhuang Meng;Lei Liang;Xiaolu Zhang;JUN ZHOU", "authorids": "~Hanxiao_Zhang4;~Lin_JU1;~Chan_Wu1;~Jinjing_Huang1;~Youshao_Xiao1;~Zhenglei_Zhou1;~Zhiming_fan1;~Zhaoxin_Huan1;~Siyuan_Li14;~Fanzhuang_Meng1;~Lei_Liang2;~Xiaolu_Zhang2;~JUN_ZHOU6", "gender": "M;M;M;F;M;;M;M;M;M;M;F;M", "homepage": "https://orcid.org/0009-0006-0631-0802;http://julin.me;;https://www.cnblogs.com/yutingmoran/;;;https://github.com/Lyears;;;;https://github.com/leywar;https://scholar.google.com/citations?user=cAz9PToAAAAJ;https://scholar.google.com/citations?user=mCVvloEAAAAJ&hl=en", "dblp": ";;;;;;;221/3647.html;;;24/1476-2;48/5176;99/3847-11", "google_scholar": ";;;;;;;;;;9vrGGHwAAAAJ;;mCVvloEAAAAJ", "orcid": ";0009-0000-8807-9478;0000-0001-7102-1181;;0009-0002-5189-3606;0000-0002-4672-3197;;;0009-0005-5015-2461;0009-0004-4269-1433;0009-0000-9700-5809;0000-0001-8055-0245;0000-0001-6033-6102", "linkedin": ";;;;;;;;;;;;", "or_profile": "~Hanxiao_Zhang4;~Lin_JU1;~Chan_Wu1;~Jinjing_Huang1;~Youshao_Xiao1;~Zhenglei_Zhou1;~Zhiming_fan1;~Zhaoxin_Huan1;~Siyuan_Li14;~Fanzhuang_Meng1;~Lei_Liang2;~Xiaolu_Zhang2;~JUN_ZHOU6", "aff": ";Alibaba Group;Ant Group;ANTGROUP;;;Alibaba Group;;ant group;;Alibaba Group;Ant Group;Ant Group", "aff_domain": ";alibaba-inc.com;antgroup.com;antgroup.com;;;antgroup.com;;antgroup.com;;antgroup.com;antfin.com;antgroup.com", "position": ";Researcher;Researcher;Researcher;;;Researcher;;Researcher;;Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\nzhang2024rethinking,\ntitle={Rethinking Memory and Communication Costs for Efficient Data Parallel Training of Large Language Models},\nauthor={Hanxiao Zhang and Lin JU and Chan Wu and Jinjing Huang and Youshao Xiao and Zhenglei Zhou and Zhiming fan and Zhaoxin Huan and Siyuan Li and Fanzhuang Meng and Lei Liang and Xiaolu Zhang and JUN ZHOU},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4Un2TD9bNe}\n}", "github": "", "reviewers": "VsLY;63Xk;X1pV", "pdf_size": 1140442, "rating": "5;5;7", "confidence": "3;3;4", "soundness": "2;2;4", "novelty": "2;3;4", "presentation": "3;3;3", "wc_summary": "131;87;131", "wc_strengths": "71;50;237", "wc_weaknesses": "172;52;176", "wc_questions": "61;40;137", "wc_limitations": "66;4;1", "wc_review": "501;233;682", "wc_reply_reviewers": "381;0;39", "wc_reply_authors": "1072;0;0", "reply_reviewers": "3;0;1", "reply_authors": "4;1;1", "rating_avg": [ 5.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.9428090415820634 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 116.33333333333333, 20.741798914805393 ], "wc_strengths_avg": [ 119.33333333333333, 83.64342306614563 ], "wc_weaknesses_avg": [ 133.33333333333334, 57.53453069437711 ], "wc_questions_avg": [ 79.33333333333333, 41.667999978667346 ], "wc_limitations_avg": [ 23.666666666666668, 29.95923155816176 ], "wc_review_avg": [ 472.0, 184.44692100077643 ], "wc_reply_reviewers_avg": [ 140.0, 171.15490060176484 ], "wc_reply_authors_avg": [ 357.3333333333333, 505.34564628798597 ], "reply_reviewers_avg": [ 1.3333333333333333, 1.247219128924647 ], "reply_authors_avg": [ 2.0, 1.4142135623730951 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 13, 0 ], "corr_rating_confidence": 0.9999999999999998, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:LjeSN_L4WVcJ:scholar.google.com/&scioq=Rethinking+Memory+and+Communication+Costs+for+Efficient+Data+Parallel+Training+of+Large+Language+Models&hl=en&as_sdt=0,33", "gs_version_total": 0, "email": ";alibaba-inc.com;antgroup.com;antgroup.com;;;antgroup.com;;antgroup.com;;antgroup.com;antfin.com;antgroup.com", "author_num": 13, "aff_unique_index": "0;1;1;0;1;0;1;1", "aff_unique_norm": "Alibaba Group;Ant Group", "aff_unique_dep": ";", "aff_unique_url": "https://www.alibaba.com;https://www.antgroup.com", "aff_unique_abbr": "Alibaba;Ant Group", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Real-Time Recurrent Learning using Trace Units in Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96663", "id": "4UvMOnZMam", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4UvMOnZMam", "openreview": "https://openreview.net/forum?id=4UvMOnZMam", "poster": "", "project": "", "author_site": "Esraa Elelimy, Adam White, Michael Bowling, Martha White", "tldr": "", "abstract": "Recurrent Neural Networks (RNNs) are used to learn representations in partially observable environments. For agents that learn online and continually interact with the environment, it is desirable to train RNNs with real-time recurrent learning (RTRL); unfortunately, RTRL is prohibitively expensive for standard RNNs. A promising direction is to use linear recurrent architectures (LRUs), where dense recurrent weights are replaced with a complex-valued diagonal, making RTRL efficient. In this work, we build on these insights to provide a lightweight but effective approach for training RNNs in online RL. We introduce Recurrent Trace Units (RTUs), a small modification on LRUs that we nonetheless find to have significant performance benefits over LRUs when trained with RTRL. We find RTUs significantly outperform GRUs and Transformers across several partially observable environments while using significantly less computation.", "keywords": "Reinforcement Learning;RNNs", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/72bba7eb6179357eec81831c83a9d7f7c33f64e3.zip", "author": "Esraa Elelimy;Adam White;Michael Bowling;Martha White", "authorids": "~Esraa_Elelimy1;~Adam_White1;~Michael_Bowling1;~Martha_White1", "gender": "F;M;F;M", "homepage": "https://github.com/esraaelelimy;https://webdocs.cs.ualberta.ca/~bowling/;http://marthawhite.ca;http://adamwhite.ca", "dblp": "290/9304;71/5161;60/7057;91/10481", "google_scholar": "https://scholar.google.ca/citations?user=JUf_Rd8AAAAJ;https://scholar.google.ca/citations?user=PYtPCHoAAAAJ;t5zdD_IAAAAJ;https://scholar.google.ca/citations?user=1GqGhcsAAAAJ", "orcid": ";;0000-0002-5356-2950;", "linkedin": ";;;", "or_profile": "~Esraa_Elelimy1;~Michael_Bowling1;~Martha_White1;~Adam_M_White1", "aff": "University of Alberta;Department of Computing Science, University of Alberta;University of Alberta;University of Alberta", "aff_domain": "ualberta.ca;cs.ualberta.ca;ualberta.ca;ualberta.ca", "position": "PhD student;Full Professor;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nelelimy2024realtime,\ntitle={Real-Time Recurrent Learning using Trace Units in Reinforcement Learning},\nauthor={Esraa Elelimy and Adam White and Michael Bowling and Martha White},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4UvMOnZMam}\n}", "github": "", "reviewers": "X1j7;d6Pv;PH1W;nsAS;EcqN", "pdf_size": 3107409, "rating": "5;6;6;6;7", "confidence": "2;4;4;2;4", "soundness": "2;3;3;3;3", "novelty": "2;3;3;3;3", "presentation": "3;3;3;3;3", "wc_summary": "35;59;127;236;68", "wc_strengths": "32;19;72;101;49", "wc_weaknesses": "1;30;468;85;171", "wc_questions": "1;105;341;29;58", "wc_limitations": "15;115;5;244;26", "wc_review": "84;328;1013;695;372", "wc_reply_reviewers": "0;25;633;0;202", "wc_reply_authors": "0;10;150;0;118", "reply_reviewers": "0;1;2;0;1", "reply_authors": "0;2;2;1;2", "rating_avg": [ 6.0, 0.6324555320336759 ], "confidence_avg": [ 3.2, 0.9797958971132712 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 105.0, 72.15261603018979 ], "wc_strengths_avg": [ 54.6, 29.20684851195007 ], "wc_weaknesses_avg": [ 151.0, 168.75188887831746 ], "wc_questions_avg": [ 106.8, 122.04982589090407 ], "wc_limitations_avg": [ 81.0, 90.423448286382 ], "wc_review_avg": [ 498.4, 322.55393347469817 ], "wc_reply_reviewers_avg": [ 172.0, 242.56875314021795 ], "wc_reply_authors_avg": [ 55.6, 64.9110160142329 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.4, 0.8 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.6454972243679028, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8601445730635012877&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "ualberta.ca;cs.ualberta.ca;ualberta.ca;ualberta.ca", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Alberta", "aff_unique_dep": "", "aff_unique_url": "https://www.ualberta.ca", "aff_unique_abbr": "UAlberta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Canada" }, { "title": "Disentangled Style Domain for Implicit $z$-Watermark Towards Copyright Protection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96662", "id": "4VL5QWQFBV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4VL5QWQFBV", "openreview": "https://openreview.net/forum?id=4VL5QWQFBV", "poster": "/media/PosterPDFs/NeurIPS%202024/96662.png?t=1731327854.0646951", "project": "", "author_site": "Junqiang Huang, Zhaojun Guo, Ge Luo, Zhenxing Qian, Sheng Li, Xinpeng Zhang", "tldr": "", "abstract": "Text-to-image models have shown surprising performance in high-quality image generation, while also raising intensified concerns about the unauthorized usage of personal dataset in training and personalized fine-tuning. Recent approaches, embedding watermarks, introducing perturbations, and inserting backdoors into datasets, rely on adding minor information vulnerable to adversarial training, limiting their ability to detect unauthorized data usage. In this paper, we introduce a novel implicit Zero-Watermarking scheme that first utilizes the disentangled style domain to detect unauthorized dataset usage in text-to-image models. Specifically, our approach generates the watermark from the disentangled style domain, enabling self-generalization and mutual exclusivity within the style domain anchored by protected units. The domain achieves the maximum concealed offset of probability distribution through both the injection of identifier $z$ and dynamic contrastive learning, facilitating the structured delineation of dataset copyright boundaries for multiple sources of styles and contents. Additionally, we introduce the concept of watermark distribution to establish a verification mechanism for copyright ownership of hybrid or partial infringements, addressing deficiencies in the traditional mechanism of dataset copyright ownership for AI mimicry. Notably, our method achieves one-sample verification for copyright ownership in AI mimic generations. The code is available at: [https://github.com/Hlufies/ZWatermarking](https://github.com/Hlufies/ZWatermarking)", "keywords": "Dataset Copyright Protection; Text-to-Image Model; Diffusion Model\uff1bWatermark", "primary_area": "privacy", "supplementary_material": "/attachment/01e77f3bf632f83262b5f57250fcdeb8d6db90d2.zip", "author": "Junqiang Huang;Zhaojun Guo;Ge Luo;Zhenxing Qian;Sheng Li;Xinpeng Zhang", "authorids": "~Junqiang_Huang2;~Zhaojun_Guo1;~Ge_Luo2;~Zhenxing_Qian1;~Sheng_Li15;~Xinpeng_Zhang1", "gender": "M;F;M;M;M;M", "homepage": "https://github.com/Hlufies;https://github.com/zhaojunGUO;;https://tomzqian.github.io/;http://blazelisheng.github.io;", "dblp": ";;321/6589;43/8279;23/3439-6;", "google_scholar": ";;https://scholar.google.com.hk/citations?user=y2k3E2AAAAAJ;90AsMtQAAAAJ;;https://scholar.google.com.hk/citations?user=P76GtHwAAAAJ", "orcid": "0009-0000-6695-7022;0000-0002-6833-9380;;;;0000-0001-5867-1315", "linkedin": ";;;;;", "or_profile": "~Junqiang_Huang2;~Zhaojun_Guo1;~Ge_Luo2;~Zhenxing_Qian1;~Sheng_Li15;~Xinpeng_Zhang1", "aff": "Fudan University;Fudan University;Fudan University;Fudan University;Fudan University;Shanghai university", "aff_domain": "fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;shu.edu.cn", "position": "MS student;PhD student;PhD student;Full Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nhuang2024disentangled,\ntitle={Disentangled Style Domain for Implicit \\$z\\$-Watermark Towards Copyright Protection},\nauthor={Junqiang Huang and Zhaojun Guo and Ge Luo and Zhenxing Qian and Sheng Li and Xinpeng Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4VL5QWQFBV}\n}", "github": "", "reviewers": "NU2V;d18z;z2Ei;jJoo", "pdf_size": 10137876, "rating": "4;4;4;8", "confidence": "4;3;3;5", "soundness": "2;2;3;4", "novelty": "2;2;3;4", "presentation": "2;2;2;4", "wc_summary": "116;45;51;59", "wc_strengths": "27;45;78;90", "wc_weaknesses": "104;203;171;69", "wc_questions": "157;14;163;5", "wc_limitations": "40;26;6;7", "wc_review": "444;333;469;230", "wc_reply_reviewers": "44;154;0;60", "wc_reply_authors": "99;578;71;0", "reply_reviewers": "1;3;0;1", "reply_authors": "2;3;2;1", "rating_avg": [ 5.0, 1.7320508075688772 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 67.75, 28.296422035303333 ], "wc_strengths_avg": [ 60.0, 25.18928343562 ], "wc_weaknesses_avg": [ 136.75, 52.97346033628538 ], "wc_questions_avg": [ 84.75, 75.34711341518002 ], "wc_limitations_avg": [ 19.75, 14.148763196831021 ], "wc_review_avg": [ 369.0, 95.18665872904668 ], "wc_reply_reviewers_avg": [ 64.5, 56.14935440412472 ], "wc_reply_authors_avg": [ 187.0, 228.60992979308662 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4865815530981879806&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": "fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;shu.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;0;1", "aff_unique_norm": "Fudan University;Shanghai University", "aff_unique_dep": ";", "aff_unique_url": "https://www.fudan.edu.cn;https://www.shu.edu.cn", "aff_unique_abbr": "Fudan;SHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "The High Line: Exact Risk and Learning Rate Curves of Stochastic Adaptive Learning Rate Algorithms", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96661", "id": "4VWnC5unAV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4VWnC5unAV", "openreview": "https://openreview.net/forum?id=4VWnC5unAV", "poster": "/media/PosterPDFs/NeurIPS%202024/96661.png?t=1732151552.5043552", "project": "", "author_site": "Elizabeth Collins-Woodfin, Inbar Seroussi, Bego\u00f1a Garc\u00eda Malaxechebarr\u00eda, Andrew W. Mackenzie, Elliot Paquette, Courtney Paquette", "tldr": "", "abstract": "We develop a framework for analyzing the training and learning rate dynamics on a large class of high-dimensional optimization problems, which we call the high line, trained using one-pass stochastic gradient descent (SGD) with adaptive learning rates. We give exact expressions for the risk and learning rate curves in terms of a deterministic solution to a system of ODEs. We then investigate in detail two adaptive learning rates -- an idealized exact line search and AdaGrad-Norm -- on the least squares problem. When the data covariance matrix has strictly positive eigenvalues, this idealized exact line search strategy can exhibit arbitrarily slower convergence when compared to the optimal fixed learning rate with SGD. Moreover we exactly characterize the limiting learning rate (as time goes to infinity) for line search in the setting where the data covariance has only two distinct eigenvalues. For noiseless targets, we further demonstrate that the AdaGrad-Norm learning rate converges to a deterministic constant inversely proportional to the average eigenvalue of the data covariance matrix, and identify a phase transition when the covariance density of eigenvalues follows a power law distribution. We provide\nour code for evaluation at https://github.com/amackenzie1/highline2024.", "keywords": "Stochastic optimization;high-dimensional probability/statistics;adaptive stepsizes", "primary_area": "optimization", "supplementary_material": "", "author": "Elizabeth Collins-Woodfin;Inbar Seroussi;Bego\u00f1a Garc\u00eda Malaxechebarr\u00eda;Andrew Mackenzie;Elliot Paquette;Courtney Paquette", "authorids": "~Elizabeth_Collins-Woodfin1;~Inbar_Seroussi1;~Bego\u00f1a_Garc\u00eda_Malaxechebarr\u00eda1;~Andrew_Mackenzie1;~Elliot_Paquette1;~Courtney_Paquette1", "gender": "F;F;;;M;F", "homepage": "https://sites.google.com/view/e-collins-woodfin;https://sites.google.com/view/inbar-seroussi/home;https://begogar99.github.io;;https://elliotpaquette.github.io;https://cypaquette.github.io/", "dblp": ";;;;126/6986;https://dblp.uni-trier.de/pers/hd/p/Paquette:Courtney", "google_scholar": ";https://scholar.google.co.il/citations?user=t_Qe0CMAAAAJ;;;;EkeZG30AAAAJ", "orcid": ";;;;0000-0003-4156-6687;", "linkedin": ";;;andrew-mackenzie-4a39b8192/?originalSubdomain=ca;;", "or_profile": "~Elizabeth_Collins-Woodfin1;~Inbar_Seroussi1;~Bego\u00f1a_Garc\u00eda_Malaxechebarr\u00eda1;~Andrew_Mackenzie1;~Elliot_Paquette1;~Courtney_Yumiko_Paquette1", "aff": "McGill University;Tel Aviv University;University of Washington;McGill University;McGill University;Google", "aff_domain": "mcgill.ca;tau.ac.il;uw.edu;mcgill.ca;mcgill.ca;google.com", "position": "Postdoc;Assistant Professor;PhD student;MS student;Associate Professor;Research Scientist", "bibtex": "@inproceedings{\ncollins-woodfin2024the,\ntitle={The High Line: Exact Risk and Learning Rate Curves of Stochastic Adaptive Learning Rate Algorithms},\nauthor={Elizabeth Collins-Woodfin and Inbar Seroussi and Bego{\\~n}a Garc{\\'\\i}a Malaxechebarr{\\'\\i}a and Andrew Mackenzie and Elliot Paquette and Courtney Paquette},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4VWnC5unAV}\n}", "github": "", "reviewers": "hf9X;hpAG;Qoif;A5Ho;ipsV", "pdf_size": 9342171, "rating": "5;5;7;7;7", "confidence": "3;3;4;2;3", "soundness": "3;3;3;4;4", "novelty": "3;2;4;2;3", "presentation": "1;1;3;2;4", "wc_summary": "181;152;206;12;171", "wc_strengths": "40;78;218;121;63", "wc_weaknesses": "114;392;643;94;50", "wc_questions": "81;253;751;2;14", "wc_limitations": "63;9;1;2;1", "wc_review": "479;884;1819;231;299", "wc_reply_reviewers": "298;198;184;0;6", "wc_reply_authors": "540;0;1069;0;0", "reply_reviewers": "1;1;2;0;1", "reply_authors": "2;1;4;1;1", "rating_avg": [ 6.2, 0.9797958971132712 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 2.2, 1.16619037896906 ], "wc_summary_avg": [ 144.4, 68.45319568873319 ], "wc_strengths_avg": [ 104.0, 62.82992917392156 ], "wc_weaknesses_avg": [ 258.6, 226.748847847128 ], "wc_questions_avg": [ 220.2, 280.1181179431277 ], "wc_limitations_avg": [ 15.2, 24.086510747719352 ], "wc_review_avg": [ 742.4, 584.2313240489592 ], "wc_reply_reviewers_avg": [ 137.2, 116.43092372733285 ], "wc_reply_authors_avg": [ 321.8, 428.155298927854 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.8, 1.1661903789690604 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4954363930119665423&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "mcgill.ca;tau.ac.il;uw.edu;mcgill.ca;mcgill.ca;google.com", "author_num": 6, "aff_unique_index": "0;1;2;0;0;3", "aff_unique_norm": "McGill University;Tel Aviv University;University of Washington;Google", "aff_unique_dep": ";;;Google", "aff_unique_url": "https://www.mcgill.ca;https://www.tau.ac.il;https://www.washington.edu;https://www.google.com", "aff_unique_abbr": "McGill;TAU;UW;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;2;0;0;2", "aff_country_unique": "Canada;Israel;United States" }, { "title": "ReXTime: A Benchmark Suite for Reasoning-Across-Time in Videos", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97852", "id": "4Vhc7uPHjn", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4Vhc7uPHjn", "openreview": "https://openreview.net/forum?id=4Vhc7uPHjn", "poster": "/media/PosterPDFs/NeurIPS%202024/97852.png?t=1733675560.6022158", "project": "", "author_site": "JR-JEN CHEN, Yu-Chien Liao, Hsi-Che Lin, Yu-Chu Yu, Yen-Chun Chen, Frank Wang", "tldr": "", "abstract": "We introduce ReXTime, a benchmark designed to rigorously test AI models' ability to perform temporal reasoning within video events.\nSpecifically, ReXTime focuses on reasoning across time, i.e. human-like understanding when the question and its corresponding answer occur in different video segments. This form of reasoning, requiring advanced understanding of cause-and-effect relationships across video segments, poses significant challenges to even the frontier multimodal large language models. To facilitate this evaluation, we develop an automated pipeline for generating temporal reasoning question-answer pairs, significantly reducing the need for labor-intensive manual annotations. Our benchmark includes 921 carefully vetted validation samples and 2,143 test samples, each manually curated for accuracy and relevance. Evaluation results show that while frontier large language models outperform academic models, they still lag behind human performance by a significant 14.3\\% accuracy gap. Additionally, our pipeline creates a training dataset of 9,695 machine generated samples without manual effort, which empirical studies suggest can enhance the across-time reasoning via fine-tuning.", "keywords": "Video Question Answering;Video Temporal Reasoning;Multi-modal Large Language Model;Video Moment Localization", "primary_area": "", "supplementary_material": "/attachment/9cbf0dfc6226abbc6a505b4fa1997e3e49c6706c.pdf", "author": "Jr-Jen Chen;Yu-Chien Liao;Hsi-Che Lin;Yu-Chu Yu;Yen-Chun Chen;Yu-Chiang Frank Wang", "authorids": "~Jr-Jen_Chen1;~Yu-Chien_Liao1;~Hsi-Che_Lin1;~Yu-Chu_Yu1;~Yen-Chun_Chen1;~Yu-Chiang_Frank_Wang2", "gender": "M;M;M;M;M;M", "homepage": ";;https://github.com/hsi-che-lin;https://chu0802.github.io;;http://vllab.ee.ntu.edu.tw/ycwang.html", "dblp": ";;;339/7406;160/0623-1;30/1690", "google_scholar": ";;;Ewt88bUAAAAJ;Gptgy4YAAAAJ;HSGvdtoAAAAJ", "orcid": ";;;0009-0008-8244-521X;;0000-0002-2333-157X", "linkedin": "jr-jen-chen-986640270/;liao-yu-chien-13951a252/;;;;", "or_profile": "~Jr-Jen_Chen1;~Yu-Chien_Liao1;~Hsi-Che_Lin1;~Yu-Chu_Yu1;~Yen-Chun_Chen1;~Yu-Chiang_Frank_Wang2", "aff": "National Taiwan University;National Taiwan University;National Taiwan University;University of California, Merced;Microsoft;National Taiwan University", "aff_domain": "ntu.edu.tw;ntu.edu.tw;ntu.edu.tw;ucmerced.edu;microsoft.com;ntu.edu.tw", "position": "MS student;MS student;Undergrad student;Visiting Student;Researcher;Full Professor", "bibtex": "@inproceedings{\nchen2024rextime,\ntitle={Re{XT}ime: A Benchmark Suite for Reasoning-Across-Time in Videos},\nauthor={Jr-Jen Chen and Yu-Chien Liao and Hsi-Che Lin and Yu-Chu Yu and Yen-Chun Chen and Yu-Chiang Frank Wang},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=4Vhc7uPHjn}\n}", "github": "", "reviewers": "ySRg;fKEv;Sauq;QTdk", "pdf_size": 1112152, "rating": "6;6;7;7", "confidence": "5;4;4;4", "wc_summary_and_contributions": "64;91;50;198", "wc_strengths": "2;136;59;58", "wc_improvement": "11;273;86;184", "wc_limitations": "2;10;4;8", "wc_correctness": "1;10;1;19", "wc_clarity": "1;1;1;24", "wc_relation_to_prior_work": "4;1;1;42", "wc_documentation": "8;1;1;6", "wc_additional_feedback": "1;1;1;1", "wc_review": "94;524;204;540", "wc_reply_reviewers": "191;0;0;27", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;0;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 100.75, 58.0490094661399 ], "wc_strengths_avg": [ 63.75, 47.66746794198324 ], "wc_improvement_avg": [ 138.5, 98.96085084517009 ], "wc_limitations_avg": [ 6.0, 3.1622776601683795 ], "wc_correctness_avg": [ 7.75, 7.46240577829965 ], "wc_clarity_avg": [ 6.75, 9.959292143521045 ], "wc_relation_to_prior_work_avg": [ 12.0, 17.363755354185336 ], "wc_documentation_avg": [ 4.0, 3.082207001484488 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 340.5, 195.49104838841086 ], "wc_reply_reviewers_avg": [ 54.5, 79.57543590832537 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16277195047037920422&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "ntu.edu.tw;ntu.edu.tw;ntu.edu.tw;ucmerced.edu;microsoft.com;ntu.edu.tw", "author_num": 6, "aff_unique_index": "0;0;0;1;2;0", "aff_unique_norm": "National Taiwan University;University of California, Merced;Microsoft", "aff_unique_dep": ";;Microsoft Corporation", "aff_unique_url": "https://www.ntu.edu.tw;https://www.ucmerced.edu;https://www.microsoft.com", "aff_unique_abbr": "NTU;UC Merced;Microsoft", "aff_campus_unique_index": "0;0;0;1;0", "aff_campus_unique": "Taiwan;Merced;", "aff_country_unique_index": "0;0;0;1;1;0", "aff_country_unique": "China;United States" }, { "title": "Dynamic Subgroup Identification in Covariate-adjusted Response-adaptive Randomization Experiments", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96660", "id": "4WIBvL6ZF4", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4WIBvL6ZF4", "openreview": "https://openreview.net/forum?id=4WIBvL6ZF4", "poster": "/media/PosterPDFs/NeurIPS%202024/96660.png?t=1731335603.9347181", "project": "", "author_site": "Yanping Li, Jingshen Wang, Waverly Wei", "tldr": "", "abstract": "Identifying subgroups with differential responses to treatment is pivotal in randomized clinical trials, as tailoring treatments to specific subgroups can advance personalized medicine. Upon trial completion, identifying best-performing subgroups\u2013those with the most beneficial treatment effects\u2013is crucial for optimizing resource allocation or mitigating adverse treatment effects. However, traditional clinical trials are not customized for the goal of identifying best-performing subgroups because they typically pre-define subgroups at the beginning of the trial and adhere to a fixed subgroup treatment allocation rule, leading to inefficient use of experimental efforts. While some adaptive experimental strategies exist for the identification of the single best subgroup, they commonly do not enable the identification of the best set of subgroups. To address these challenges, we propose a dynamic subgroup identification covariate-adjusted response-adaptive randomization (CARA) design strategy with the following key features: (i) Our approach is an adaptive experimental strategy that allows the dynamic identification of the best subgroups and the revision of treatment allocation towards the goal of correctly identifying the best subgroups based on collected experimental data. (ii) Our design handles ties between subgroups effectively, merging those with similar treatment effects to maximize experimental efficiency. In the theoretical investigations, we demonstrate that our design has a higher probability of correctly identifying the best set of subgroups compared to conventional designs. Additionally, we prove the statistical validity of our estimator for the best subgroup treatment effect, demonstrating its asymptotic normality and semiparametric efficiency. Finally, we validate our design using synthetic data from a clinical trial on cirrhosis.", "keywords": "Covariate-adjusted response-adaptive randomization design;Response-adaptive randomization design;Subgroup identification;Causal inference;Treatment effect heterogeneity", "primary_area": "causal_inference", "supplementary_material": "/attachment/b5fa4f63060334ec795eae0014fd5f00c02d1de2.zip", "author": "Yanping Li;Jingshen Wang;Waverly Wei", "authorids": "~Yanping_Li3;~Jingshen_Wang1;~Waverly_Wei1", "gender": "F;;", "homepage": "https://sites.google.com/view/yanpingli/home;https://sites.google.com/berkeley.edu/jingshenwang/;", "dblp": ";;", "google_scholar": ";;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Yanping_Li3;~Jingshen_Wang1;~Waverly_Wei1", "aff": "Nankai University;University of California, Berkeley;", "aff_domain": "mail.nankai.edu.cn;berkeley.edu;", "position": "MS student;Assistant Professor;", "bibtex": "@inproceedings{\nli2024dynamic,\ntitle={Dynamic Subgroup Identification in Covariate-adjusted Response-adaptive Randomization Experiments},\nauthor={Yanping Li and Jingshen Wang and Waverly Wei},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4WIBvL6ZF4}\n}", "github": "", "reviewers": "Rfeb;JR9y;Sgje;pM3n", "pdf_size": 646459, "rating": "4;6;7;7", "confidence": "5;4;2;4", "soundness": "2;4;3;3", "novelty": "2;3;3;3", "presentation": "1;3;3;3", "wc_summary": "211;68;23;47", "wc_strengths": "149;174;28;63", "wc_weaknesses": "742;193;60;168", "wc_questions": "211;136;29;3", "wc_limitations": "79;35;48;3", "wc_review": "1392;606;188;284", "wc_reply_reviewers": "490;20;12;15", "wc_reply_authors": "1162;0;0;0", "reply_reviewers": "2;1;1;1", "reply_authors": "4;1;1;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 87.25, 73.19964139256422 ], "wc_strengths_avg": [ 103.5, 59.96040360104325 ], "wc_weaknesses_avg": [ 290.75, 265.28039411158903 ], "wc_questions_avg": [ 94.75, 83.60136063486048 ], "wc_limitations_avg": [ 41.25, 27.261465477849864 ], "wc_review_avg": [ 617.5, 473.2005388838859 ], "wc_reply_reviewers_avg": [ 134.25, 205.4122379509069 ], "wc_reply_authors_avg": [ 290.5, 503.16075959875883 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.7492686492653551, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:aZcn3hy_iLoJ:scholar.google.com/&scioq=Dynamic+Subgroup+Identification+in+Covariate-adjusted+Response-adaptive+Randomization+Experiments&hl=en&as_sdt=0,44", "gs_version_total": 2, "email": "mail.nankai.edu.cn;berkeley.edu;", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Nankai University;University of California, Berkeley", "aff_unique_dep": ";", "aff_unique_url": "http://www.nankai.edu.cn;https://www.berkeley.edu", "aff_unique_abbr": "NKU;UC Berkeley", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;1", "aff_country_unique": "China;United States" }, { "title": "Iterative Reasoning Preference Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96659", "id": "4XIKfvNYvx", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4XIKfvNYvx", "openreview": "https://openreview.net/forum?id=4XIKfvNYvx", "poster": "/media/PosterPDFs/NeurIPS%202024/96659.png?t=1731387085.6666942", "project": "", "author_site": "Richard Yuanzhe Pang, Weizhe Yuan, He He, Kyunghyun Cho, Sainbayar Sukhbaatar, Jason Weston", "tldr": "", "abstract": "Iterative preference optimization methods have recently been shown to perform well for general instruction tuning tasks, but typically make little improvement on reasoning tasks. In this work we develop an iterative approach that optimizes the preference between competing generated Chain-of-Thought (CoT) candidates by optimizing for winning vs. losing reasoning steps. We train using a modified DPO loss with an additional negative log-likelihood term, which we find to be crucial. We show reasoning improves across repeated iterations of this scheme. While only relying on examples in the training set, our approach results in increasing accuracy on GSM8K, MATH, and ARC-Challenge for Llama-2-70B-Chat, outperforming other Llama-2-based models not relying on additionally sourced datasets. For example, we see a large improvement from 55.6% to 81.6% on GSM8K and an accuracy of 88.7% with majority voting out of 32 samples.", "keywords": "NLP;LLM;language models;iterative learning;reasoning", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Richard Yuanzhe Pang;Weizhe Yuan;He He;Kyunghyun Cho;Sainbayar Sukhbaatar;Jason E Weston", "authorids": "~Richard_Yuanzhe_Pang1;~Weizhe_Yuan1;~He_He2;~Kyunghyun_Cho1;~Sainbayar_Sukhbaatar1;~Jason_E_Weston1", "gender": "M;F;;M;M;", "homepage": "https://yzpang.me;http://yyy-apple.github.io/;;http://kyunghyuncho.me;;", "dblp": "250/9059;207/1964;;41/9736;56/10550;", "google_scholar": "https://scholar.google.com/citations?hl=en;2k5j4eMAAAAJ;;https://scholar.google.fi/citations?user=0RAmmIAAAAAJ;ri1sE34AAAAJ;", "orcid": ";;;;;", "linkedin": "yuanzhe-richard-pang/;weizhey/;;;;", "or_profile": "~Richard_Yuanzhe_Pang1;~Weizhe_Yuan1;~He_He2;~Kyunghyun_Cho1;~Sainbayar_Sukhbaatar1;~Jason_E_Weston1", "aff": "New York University;New York University;;Genentech;Meta AI;", "aff_domain": "nyu.edu;nyu.edu;;gene.com;meta.com;", "position": "PhD student;PhD student;;Senior Director of Frontier Research;Research Scientist;", "bibtex": "@inproceedings{\npang2024iterative,\ntitle={Iterative Reasoning Preference Optimization},\nauthor={Richard Yuanzhe Pang and Weizhe Yuan and He He and Kyunghyun Cho and Sainbayar Sukhbaatar and Jason E Weston},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4XIKfvNYvx}\n}", "github": "", "reviewers": "E9gS;9NDK;DMPg;jeVs", "pdf_size": 430242, "rating": "5;5;6;8", "confidence": "3;3;4;2", "soundness": "2;2;3;4", "novelty": "2;3;3;4", "presentation": "3;3;3;4", "wc_summary": "65;109;66;127", "wc_strengths": "17;38;14;28", "wc_weaknesses": "30;192;90;6", "wc_questions": "193;47;6;25", "wc_limitations": "4;7;1;2", "wc_review": "309;393;177;188", "wc_reply_reviewers": "94;36;5;4", "wc_reply_authors": "224;0;0;0", "reply_reviewers": "2;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 91.75, 27.012728481217888 ], "wc_strengths_avg": [ 24.25, 9.496709956611289 ], "wc_weaknesses_avg": [ 79.5, 71.79658766264592 ], "wc_questions_avg": [ 67.75, 73.75423716641642 ], "wc_limitations_avg": [ 3.5, 2.29128784747792 ], "wc_review_avg": [ 266.75, 89.41581236000711 ], "wc_reply_reviewers_avg": [ 34.75, 36.54705870518173 ], "wc_reply_authors_avg": [ 56.0, 96.99484522385713 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 136, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5556073167595835787&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "nyu.edu;nyu.edu;;gene.com;meta.com;", "author_num": 6, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "New York University;Genentech;Meta", "aff_unique_dep": ";;Meta AI", "aff_unique_url": "https://www.nyu.edu;https://www.genentech.com;https://meta.com", "aff_unique_abbr": "NYU;Genentech;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "DigiRL: Training In-The-Wild Device-Control Agents with Autonomous Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96658", "id": "4XTvXMSZPO", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4XTvXMSZPO", "openreview": "https://openreview.net/forum?id=4XTvXMSZPO", "poster": "", "project": "", "author_site": "Hao Bai, Yifei Zhou, Jiayi Pan, Mert Cemri, Alane Suhr, Sergey Levine, Aviral Kumar", "tldr": "", "abstract": "Pre-trained vision language models (VLMs), though powerful, typically lack training on decision-centric data, rendering them sub-optimal for decision-making tasks such as in-the-wild device control through Graphical User Interfaces (GUIs) when used off-the-shelf. While training with static demonstrations has shown some promise, we show that such methods fall short when controlling real GUIs due to their failure to deal with real world stochasticity and dynamism not captured in static observational data. This paper introduces a novel autonomous RL approach, called DigiRL, for training in-the-wild device control agents through fine-tuning a pre-trained VLM in two stages: offline and offline-to-online RL. We first build a scalable and parallelizable Android learning environment equipped with a VLM-based general-purpose evaluator and then identify the key design choices for simple and effective RL in this domain. We demonstrate the effectiveness of DigiRL using the Android-in-the-Wild (AitW) dataset, where our 1.5B VLM trained with RL achieves a 49.5\\% absolute improvement -- from 17.7 to 67.2\\% success rate -- over supervised fine-tuning with static human demonstration data. It is worth noting that such improvement is achieved without any additional supervision or demonstration data. These results significantly surpass not only the prior best agents, including AppAgent with GPT-4V (8.3\\% success rate) and the 17B CogAgent trained with AitW data (14.4\\%), but also our implementation of prior best autonomous RL approach based on filtered behavior cloning (57.8\\%), thereby establishing a new state-of-the-art for digital agents for in-the-wild device control.", "keywords": "LLM/VLM Agent;Device Control;GUI Navigation;Web Agent;Reinforcement Learning", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Hao Bai;Yifei Zhou;Jiayi Pan;Mert Cemri;Alane Suhr;Sergey Levine;Aviral Kumar", "authorids": "~Hao_Bai1;~Yifei_Zhou1;~Jiayi_Pan1;~Mert_Cemri1;~Alane_Suhr1;~Sergey_Levine1;~Aviral_Kumar2", "gender": "M;M;M;M;Not Specified;M;M", "homepage": "https://www.jackgethome.com;https://yifeizhou02.github.io/;https://www.jiayipan.me/;;http://www.alanesuhr.com;https://people.eecs.berkeley.edu/~svlevine/;https://aviralkumar2907.github.io/", "dblp": "53/8975;50/7699;39/6476-2;;203/9306;80/7594;202/7961", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;n9Y_sQEAAAAJ;sMEFwf8AAAAJ;daslsUkAAAAJ;8R35rCwAAAAJ;", "orcid": "0000-0001-9723-7490;;0000-0003-0817-4083;;;;", "linkedin": "jackgethome/;yifei-zhou-57aa9b222/;;;;;", "or_profile": "~Hao_Bai1;~Yifei_Zhou1;~Jiayi_Pan1;~Mert_Cemri1;~Alane_Suhr1;~Sergey_Levine1;~Aviral_Kumar2", "aff": "University of Illinois, Urbana Champaign;University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;Google;Google DeepMind", "aff_domain": "illinois.edu;berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu;google.com;google.com", "position": "MS student;PhD student;PhD student;PhD student;Assistant Professor;Research Scientist;Researcher", "bibtex": "@inproceedings{\nbai2024digirl,\ntitle={Digi{RL}: Training In-The-Wild Device-Control Agents with Autonomous Reinforcement Learning},\nauthor={Hao Bai and Yifei Zhou and Jiayi Pan and Mert Cemri and Alane Suhr and Sergey Levine and Aviral Kumar},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4XTvXMSZPO}\n}", "github": "", "reviewers": "251X;XhdC;JuUV;zDzF", "pdf_size": 27872677, "rating": "5;6;6;7", "confidence": "3;3;3;3", "soundness": "2;3;3;3", "novelty": "2;3;2;3", "presentation": "3;3;3;3", "wc_summary": "75;98;111;209", "wc_strengths": "89;26;53;63", "wc_weaknesses": "367;27;168;74", "wc_questions": "127;141;50;68", "wc_limitations": "1;6;1;41", "wc_review": "659;298;383;455", "wc_reply_reviewers": "87;9;57;48", "wc_reply_authors": "164;109;0;24", "reply_reviewers": "1;1;1;1", "reply_authors": "4;3;1;3", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 123.25, 51.15845482420281 ], "wc_strengths_avg": [ 57.75, 22.554101622543072 ], "wc_weaknesses_avg": [ 159.0, 130.37829573974344 ], "wc_questions_avg": [ 96.5, 38.356876828021335 ], "wc_limitations_avg": [ 12.25, 16.723860200324566 ], "wc_review_avg": [ 448.75, 133.50351118978108 ], "wc_reply_reviewers_avg": [ 50.25, 27.851166941440713 ], "wc_reply_authors_avg": [ 74.25, 65.76615771048206 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.75, 1.0897247358851685 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 40, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7895614545956992778&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "illinois.edu;berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu;google.com;google.com", "author_num": 7, "aff_unique_index": "0;1;1;1;1;2;2", "aff_unique_norm": "University of Illinois Urbana-Champaign;University of California, Berkeley;Google", "aff_unique_dep": ";;Google", "aff_unique_url": "https://illinois.edu;https://www.berkeley.edu;https://www.google.com", "aff_unique_abbr": "UIUC;UC Berkeley;Google", "aff_campus_unique_index": "0;1;1;1;1;2", "aff_campus_unique": "Urbana-Champaign;Berkeley;Mountain View;", "aff_country_unique_index": "0;0;0;0;0;0;1", "aff_country_unique": "United States;United Kingdom" }, { "title": "Taming Heavy-Tailed Losses in Adversarial Bandits and the Best-of-Both-Worlds Setting", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96657", "id": "4Yj7L9Kt7t", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4Yj7L9Kt7t", "openreview": "https://openreview.net/forum?id=4Yj7L9Kt7t", "poster": "/media/PosterPDFs/NeurIPS%202024/96657.png?t=1732931046.2021897", "project": "", "author_site": "Duo Cheng, Xingyu Zhou, Bo Ji", "tldr": "", "abstract": "In this paper, we study the multi-armed bandit problem in the best-of-both-worlds (BOBW) setting with heavy-tailed losses, where the losses can be negative and unbounded but have $(1+v)$-th raw moments bounded by $u^{1+v}$ for some known $u>0$ and $v\\in(0,1]$. Specifically, we consider the BOBW setting where the underlying environment could be either (oblivious) adversarial (i.e., the loss distribution can change arbitrarily over time) or stochastic (i.e., the loss distribution is fixed over time) and is unknown to the decision-maker a prior, and propose an algorithm that achieves a $T^{\\frac{1}{1+v}}$-type worst-case (pseudo-)regret in the adversarial regime and a $\\log T$-type gap-dependent regret in the stochastic regime, where $T$ is the time horizon. Compared to the state-of-the-art results, our algorithm offers stronger \\emph{high-probability} regret guarantees rather than expected regret guarantees, and more importantly, relaxes a strong technical assumption on the loss distribution. This assumption is needed even for the weaker expected regret obtained in the literature and is generally hard to verify in practice. As a byproduct, relaxing this assumption leads to the first near-optimal regret result for heavy-tailed bandits with Huber contamination in the adversarial regime, in contrast to all previous works focused on the (easier) stochastic regime. Our result also implies a high-probability BOBW regret guarantee when the bounded true losses are protected with pure Local Differential Privacy (LDP), while the existing work ensures the (weaker) \\emph{approximate} LDP with the regret bounds in expectation only.", "keywords": "bandits;regret minimization;the best of both worlds;heavy tails;online learning", "primary_area": "bandits", "supplementary_material": "", "author": "Duo Cheng;Xingyu Zhou;Bo Ji", "authorids": "~Duo_Cheng1;~Xingyu_Zhou2;~Bo_Ji3", "gender": "M;M;", "homepage": "https://duocheng1999.github.io/;http://xingyuzhou.org;https://people.cs.vt.edu/boji/", "dblp": ";07/10352-1;", "google_scholar": ";AsTyRmwAAAAJ;", "orcid": ";;", "linkedin": "duo-c-49b31019b;;", "or_profile": "~Duo_Cheng1;~Xingyu_Zhou2;~Bo_Ji3", "aff": "Virginia Polytechnic Institute and State University;Wayne State University;Virginia Tech", "aff_domain": "vt.edu;wayne.edu;vt.edu", "position": "PhD student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\ncheng2024taming,\ntitle={Taming Heavy-Tailed Losses in Adversarial Bandits and the Best-of-Both-Worlds Setting},\nauthor={Duo Cheng and Xingyu Zhou and Bo Ji},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4Yj7L9Kt7t}\n}", "github": "", "reviewers": "q85G;t2SX;LBNn;FpXy", "pdf_size": 575671, "rating": "6;6;7;7", "confidence": "4;3;2;3", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "173;37;55;273", "wc_strengths": "65;31;49;74", "wc_weaknesses": "305;54;81;63", "wc_questions": "93;6;24;33", "wc_limitations": "146;1;10;23", "wc_review": "782;129;219;466", "wc_reply_reviewers": "86;11;0;23", "wc_reply_authors": "1154;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "4;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 134.5, 95.5130881083844 ], "wc_strengths_avg": [ 54.75, 16.37643123516232 ], "wc_weaknesses_avg": [ 125.75, 103.94559875242433 ], "wc_questions_avg": [ 39.0, 32.657311585615865 ], "wc_limitations_avg": [ 45.0, 58.834513680322026 ], "wc_review_avg": [ 399.0, 253.21828527971672 ], "wc_reply_reviewers_avg": [ 30.0, 33.3391661563393 ], "wc_reply_authors_avg": [ 288.5, 499.6966579836211 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.7071067811865475, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:LoDNyBsXWDAJ:scholar.google.com/&scioq=Taming+Heavy-Tailed+Losses+in+Adversarial+Bandits+and+the+Best-of-Both-Worlds+Setting&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": "vt.edu;wayne.edu;vt.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Virginia Tech;Wayne State University", "aff_unique_dep": ";", "aff_unique_url": "https://www.vt.edu;https://wayne.edu", "aff_unique_abbr": "VT;WSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "ProSST: Protein Language Modeling with Quantized Structure and Disentangled Attention", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96656", "id": "4Z7RZixpJQ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4Z7RZixpJQ", "openreview": "https://openreview.net/forum?id=4Z7RZixpJQ", "poster": "/media/PosterPDFs/NeurIPS%202024/96656.png?t=1731243497.135933", "project": "", "author_site": "Mingchen Li, Yang Tan, Xinzhu Ma, Bozitao Zhong, Huiqun Yu, Ziyi Zhou, Wanli Ouyang, Bingxin Zhou, Pan Tan, Liang Hong", "tldr": "", "abstract": "Protein language models (PLMs) have shown remarkable capabilities in various protein function prediction tasks. However, while protein function is intricately tied to structure, most existing PLMs do not incorporate protein structure information. To address this issue, we introduce ProSST, a Transformer-based protein language model that seamlessly integrates both protein sequences and structures. ProSST incorporates a structure quantization module and a Transformer architecture with disentangled attention. The structure quantization module translates a 3D protein structure into a sequence of discrete tokens by first serializing the protein structure into residue-level local structures and then embeds them into dense vector space. These vectors are then quantized into discrete structure tokens by a pre-trained clustering model. These tokens serve as an effective protein structure representation. Furthermore, ProSST explicitly learns the relationship between protein residue token sequences and structure token sequences through the sequence-structure disentangled attention. We pre-train ProSST on millions of protein structures using a masked language model objective, enabling it to learn comprehensive contextual representations of proteins. To evaluate the proposed ProSST, we conduct extensive experiments on the zero-shot mutation effect prediction and several supervised downstream tasks, where ProSST achieves the state-of-the-art performance among all baselines. Our code and pre-trained models are publicly available.", "keywords": "Protein;Protein Language Modeling;Protein structure;Disentangled Attention", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Mingchen Li;Yang Tan;Xinzhu Ma;Bozitao Zhong;Huiqun Yu;Ziyi Zhou;Wanli Ouyang;Bingxin Zhou;Pan Tan;Liang Hong", "authorids": "~Mingchen_Li3;~Yang_Tan2;~Xinzhu_Ma1;~Bozitao_Zhong1;~Huiqun_Yu2;~Ziyi_Zhou7;~Wanli_Ouyang1;~Bingxin_Zhou1;~Pan_Tan1;~Liang_Hong2", "gender": "M;M;M;M;M;M;;F;;M", "homepage": ";https://tyang816.github.io/;https://github.com/xinzhuma;;https://ieeexplore.ieee.org/author/37333550600;;;;;https://ins.sjtu.edu.cn/people/lhong/index_english.html", "dblp": ";;191/3902;310/9462;;;;;;", "google_scholar": "4YnYCLgAAAAJ;;8PuKa_8AAAAJ;6k_LhSoAAAAJ;;;;OsvArmcAAAAJ;;pcz1yA4AAAAJ", "orcid": "0000-0001-6862-0052;;;0000-0001-9363-6099;;0000-0002-8267-8178;;;0000-0003-2086-0940;0000-0003-0107-336X", "linkedin": ";;;;;;;;;", "or_profile": "~Mingchen_Li3;~Yang_Tan2;~Xinzhu_Ma1;~Bozitao_Zhong1;~Huiqun_Yu2;~Ziyi_Zhou7;~Wanli_Ouyang1;~Bingxin_Zhou1;~Pan_Tan1;~Liang_Hong2", "aff": "East China University of Science and Technology;East China University of Science and Technology;The Chinese University of Hong Kong;The Chinese University of Hong Kong;East China University of Science and Technology;Shanghai Jiaotong University;;Shanghai Jiaotong University;Shanghai Artificial Intelligence Laboratory;", "aff_domain": "ecust.edu.cn;ecust.edu.cn;cuhk.edu.hk;link.cuhk.edu.hk;ecust.edu.cn;sjtu.edu.cn;;sjtu.edu.cn;pjlab.org.cn;", "position": "PhD student;MS student;Postdoc;MS student;Full Professor;Postdoc;;Researcher;Researcher;", "bibtex": "@inproceedings{\nli2024prosst,\ntitle={Pro{SST}: Protein Language Modeling with Quantized Structure and Disentangled Attention},\nauthor={Mingchen Li and Yang Tan and Xinzhu Ma and Bozitao Zhong and Huiqun Yu and Ziyi Zhou and Wanli Ouyang and Bingxin Zhou and Pan Tan and Liang Hong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4Z7RZixpJQ}\n}", "github": "", "reviewers": "1pAx;jE9F;cgxa;t3Vw", "pdf_size": 9898475, "rating": "5;7;7;7", "confidence": "4;4;5;4", "soundness": "3;3;3;3", "novelty": "2;3;2;3", "presentation": "2;3;3;4", "wc_summary": "64;76;204;101", "wc_strengths": "77;86;132;65", "wc_weaknesses": "108;57;583;143", "wc_questions": "45;71;33;279", "wc_limitations": "27;1;191;29", "wc_review": "321;291;1143;617", "wc_reply_reviewers": "40;12;9;39", "wc_reply_authors": "43;56;57;64", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 111.25, 55.187747734438304 ], "wc_strengths_avg": [ 90.0, 25.367301787931645 ], "wc_weaknesses_avg": [ 222.75, 210.22651474064827 ], "wc_questions_avg": [ 107.0, 100.2496882788171 ], "wc_limitations_avg": [ 62.0, 75.29276193632427 ], "wc_review_avg": [ 593.0, 342.14909031005766 ], "wc_reply_reviewers_avg": [ 25.0, 14.543039572248986 ], "wc_reply_authors_avg": [ 55.0, 7.582875444051551 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "email": "ecust.edu.cn;ecust.edu.cn;cuhk.edu.hk;link.cuhk.edu.hk;ecust.edu.cn;sjtu.edu.cn;;sjtu.edu.cn;pjlab.org.cn;", "author_num": 10, "aff_unique_index": "0;0;1;1;0;2;2;3", "aff_unique_norm": "East China University of Science and Technology;Chinese University of Hong Kong;Shanghai Jiao Tong University;Shanghai Artificial Intelligence Laboratory", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.ecust.edu.cn;https://www.cuhk.edu.hk;https://www.sjtu.edu.cn;http://www.shailab.org/", "aff_unique_abbr": "ECUST;CUHK;SJTU;Shanghai AI Lab", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Active, anytime-valid risk controlling prediction sets", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96655", "id": "4ZH48aGD60", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4ZH48aGD60", "openreview": "https://openreview.net/forum?id=4ZH48aGD60", "poster": "", "project": "", "author_site": "Ziyu Xu, Nikos Karampatziakis, Paul Mineiro", "tldr": "", "abstract": "Rigorously establishing the safety of black-box machine learning models with respect to critical risk measures is important for providing guarantees about the behavior of the model.\nRecently, a notion of a risk controlling prediction set (RCPS) has been introduced by Bates et. al. (JACM '24) for producing prediction sets that are statistically guaranteed to have low risk from machine learning models.\nOur method extends this notion to the sequential setting, where we provide guarantees even when the data is collected adaptively, and ensures the risk guarantee is anytime-valid, i.e., simultaneously holds at all time steps. Further, we propose a framework for constructing RCPSes for active labeling, i.e., allowing one to use a labeling policy that chooses whether to query the true label for each received data point, and ensures the expected proportion data points whose labels are queried are below a predetermined label budget. We also describe how to use predictors (e.g., the machine learning model we are providing risk control guarantees for) to further improve the utility of our RCPSes by estimating the expected risk conditioned on the covariates.\nWe characterize the optimal choices of label policy under a fixed label budget, and predictor, and show a regret result that relates the estimation error of the optimal labeling policy and predictor to the wealth process that underlies our RCPSes.\nLastly, we present practical ways of formulating label policies and we empirically show that our label policies use fewer labels to reach higher utility than naive baseline labeling strategies on both simulations and real data.", "keywords": "distribution free;conformal prediction;e-process;confidence sequence", "primary_area": "active_learning", "supplementary_material": "", "author": "Ziyu Xu;Nikos Karampatziakis;Paul Mineiro", "authorids": "~Ziyu_Xu2;~Nikos_Karampatziakis1;~Paul_Mineiro1", "gender": ";;", "homepage": "https://neilzxu.me;;", "dblp": ";91/8774;35/5613", "google_scholar": ";;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Ziyu_Xu2;~Nikos_Karampatziakis1;~Paul_Mineiro1", "aff": "Carnegie Mellon University;Microsoft;", "aff_domain": "cmu.edu;microsoft.com;", "position": "PhD student;Researcher;", "bibtex": "@inproceedings{\nxu2024active,\ntitle={Active, anytime-valid risk controlling prediction sets},\nauthor={Ziyu Xu and Nikos Karampatziakis and Paul Mineiro},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4ZH48aGD60}\n}", "github": "", "reviewers": "qKw8;XpAy;1rh6", "pdf_size": 1445524, "rating": "4;6;7", "confidence": "1;2;3", "soundness": "2;4;4", "novelty": "2;2;3", "presentation": "2;2;2", "wc_summary": "85;50;382", "wc_strengths": "15;44;168", "wc_weaknesses": "177;414;232", "wc_questions": "21;165;93", "wc_limitations": "15;6;70", "wc_review": "313;679;945", "wc_reply_reviewers": "72;58;156", "wc_reply_authors": "0;508;0", "reply_reviewers": "1;1;1", "reply_authors": "1;2;1", "rating_avg": [ 5.666666666666667, 1.247219128924647 ], "confidence_avg": [ 2.0, 0.816496580927726 ], "soundness_avg": [ 3.3333333333333335, 0.9428090415820634 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 172.33333333333334, 148.9436880912455 ], "wc_strengths_avg": [ 75.66666666666667, 66.35426805329774 ], "wc_weaknesses_avg": [ 274.3333333333333, 101.27959101198797 ], "wc_questions_avg": [ 93.0, 58.787753826796276 ], "wc_limitations_avg": [ 30.333333333333332, 28.288199345702836 ], "wc_review_avg": [ 645.6666666666666, 259.08728687880887 ], "wc_reply_reviewers_avg": [ 95.33333333333333, 43.27688631231328 ], "wc_reply_authors_avg": [ 169.33333333333334, 239.4734965618441 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.9819805060619659, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9855561338646199928&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 3, "email": "cmu.edu;microsoft.com;", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Carnegie Mellon University;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.cmu.edu;https://www.microsoft.com", "aff_unique_abbr": "CMU;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Chain-of-Thought Reasoning Without Prompting", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96654", "id": "4Zt7S0B0Jp", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4Zt7S0B0Jp", "openreview": "https://openreview.net/forum?id=4Zt7S0B0Jp", "poster": "/media/PosterPDFs/NeurIPS%202024/96654.png?t=1731961564.5781834", "project": "", "author_site": "Xuezhi Wang, Denny Zhou", "tldr": "", "abstract": "In enhancing the reasoning capabilities of large language models (LLMs), prior research primarily focuses on specific prompting techniques such as few-shot or zero-shot chain-of-thought (CoT) prompting. These methods, while effective, often involve manually intensive prompt engineering. Our study takes a novel approach by asking: Can LLMs reason effectively without any prompting? Our findings reveal that, intriguingly, CoT reasoning paths can be elicited from pre-trained LLMs by simply altering the \\textit{decoding} process. Rather than conventional greedy decoding, we investigate the top-$k$ alternative tokens, uncovering that CoT paths are frequently inherent in these sequences. This approach not only bypasses the confounders of prompting but also allows us to assess the LLMs' \\textit{intrinsic} reasoning abilities. Moreover, we observe that the presence of a CoT in the decoding path correlates with a higher confidence in the model's decoded answer. This confidence metric effectively differentiates between CoT and non-CoT paths. Extensive empirical studies on various reasoning benchmarks show that the proposed CoT-decoding effectively elicits reasoning capabilities from language models, which were previously obscured by standard greedy decoding.", "keywords": "Reasoning;large language models;decoding", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/550fab611ef6f9bacb1361bab279cca162f65890.zip", "author": "Xuezhi Wang;Denny Zhou", "authorids": "~Xuezhi_Wang3;~Denny_Zhou1", "gender": ";", "homepage": "https://research.google/people/105995/;https://dennyzhou.github.io/", "dblp": "70/4090-2;178/3277", "google_scholar": "ScLUQ-YAAAAJ;UwLsYw8AAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Xuezhi_Wang3;~Dengyong_Zhou2", "aff": "Google DeepMind;Google DeepMind", "aff_domain": "google.com;google.com", "position": "Research Scientist;Research Scientist", "bibtex": "@inproceedings{\nwang2024chainofthought,\ntitle={Chain-of-Thought Reasoning Without Prompting},\nauthor={Xuezhi Wang and Denny Zhou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4Zt7S0B0Jp}\n}", "github": "", "reviewers": "Es5x;C32n;BrnW;fBkE", "pdf_size": 1081446, "rating": "4;6;8;8", "confidence": "4;4;5;4", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "3;3;3;3", "wc_summary": "75;74;198;67", "wc_strengths": "60;54;143;98", "wc_weaknesses": "269;73;64;101", "wc_questions": "1;52;2;30", "wc_limitations": "1;42;3;11", "wc_review": "406;295;410;307", "wc_reply_reviewers": "0;10;58;35", "wc_reply_authors": "0;16;31;27", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 6.5, 1.6583123951777 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 103.5, 54.646591842492796 ], "wc_strengths_avg": [ 88.75, 35.57650207651112 ], "wc_weaknesses_avg": [ 126.75, 83.25375366912894 ], "wc_questions_avg": [ 21.25, 21.22940178149163 ], "wc_limitations_avg": [ 14.25, 16.452583383772897 ], "wc_review_avg": [ 354.5, 53.68659050451984 ], "wc_reply_reviewers_avg": [ 25.75, 22.56518335843961 ], "wc_reply_authors_avg": [ 18.5, 12.010412149464313 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 95, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9129496995191217550&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "google.com;google.com", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google DeepMind", "aff_unique_url": "https://deepmind.com", "aff_unique_abbr": "DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "title": "A Near-optimal Algorithm for Learning Margin Halfspaces with Massart Noise", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96653", "id": "4aEwZkWB5z", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4aEwZkWB5z", "openreview": "https://openreview.net/forum?id=4aEwZkWB5z", "poster": "", "project": "", "author_site": "Ilias Diakonikolas, Nikos Zarifis", "tldr": "", "abstract": "We study the problem of PAC learning $\\gamma$-margin halfspaces in the presence of Massart noise. \nWithout computational considerations, the sample complexity of this learning problem is known to be \n$\\widetilde{\\Theta}(1/(\\gamma^2 \\epsilon))$. \nPrior computationally efficient algorithms for the problem incur sample complexity \n$\\tilde{O}(1/(\\gamma^4 \\epsilon^3))$ and achieve 0-1 error of $\\eta+\\epsilon$, \nwhere $\\eta<1/2$ is the upper bound on the noise rate.\nRecent work gave evidence of an information-computation tradeoff, \nsuggesting that a quadratic dependence on $1/\\epsilon$ is required \nfor computationally efficient algorithms. \nOur main result is a computationally efficient learner with sample complexity \n$\\widetilde{\\Theta}(1/(\\gamma^2 \\epsilon^2))$, nearly matching this lower bound. \nIn addition, our algorithm is simple and practical, \nrelying on online SGD on a carefully selected sequence of convex losses.", "keywords": "Massart Noise;margin halfspaces", "primary_area": "learning_theory", "supplementary_material": "", "author": "Ilias Diakonikolas;Nikos Zarifis", "authorids": "~Ilias_Diakonikolas1;~Nikos_Zarifis1", "gender": "M;M", "homepage": "http://www.iliasdiakonikolas.org/;https://nikoszarifis.github.io/", "dblp": "d/IliasDiakonikolas;241/9782", "google_scholar": "Vb3FLmkAAAAJ;P1ha1IkAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Ilias_Diakonikolas1;~NIKOLAOS_ZARIFIS1", "aff": "University of Wisconsin - Madison;University of Wisconsin, Madison", "aff_domain": "wisc.edu;wisc.edu", "position": "Full Professor;PhD student", "bibtex": "@inproceedings{\ndiakonikolas2024a,\ntitle={A Near-optimal Algorithm for Learning Margin Halfspaces with Massart Noise},\nauthor={Ilias Diakonikolas and Nikos Zarifis},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4aEwZkWB5z}\n}", "github": "", "reviewers": "2xNf;Sz9S;JiXV;SyXT;g5LR", "pdf_size": 418902, "rating": "7;7;7;7;8", "confidence": "3;4;3;3;3", "soundness": "3;3;4;4;4", "novelty": "3;3;3;3;3", "presentation": "3;3;4;4;4", "wc_summary": "41;148;127;175;167", "wc_strengths": "34;18;32;29;336", "wc_weaknesses": "36;43;7;20;92", "wc_questions": "78;72;1;20;31", "wc_limitations": "1;14;9;1;1", "wc_review": "190;295;176;245;627", "wc_reply_reviewers": "8;22;0;13;5", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;0;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 7.2, 0.39999999999999997 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "soundness_avg": [ 3.6, 0.4898979485566356 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.6, 0.4898979485566356 ], "wc_summary_avg": [ 131.6, 48.23940298138027 ], "wc_strengths_avg": [ 89.8, 123.22402363175777 ], "wc_weaknesses_avg": [ 39.6, 29.04203849594584 ], "wc_questions_avg": [ 40.4, 29.897157055479372 ], "wc_limitations_avg": [ 5.2, 5.3814496188294845 ], "wc_review_avg": [ 306.6, 165.67268936067887 ], "wc_reply_reviewers_avg": [ 9.6, 7.4993333037010705 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.2500000000000001, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18398864166524522858&as_sdt=40000005&sciodt=0,22&hl=en", "gs_version_total": 4, "email": "wisc.edu;wisc.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "University of Wisconsin-Madison;University of Wisconsin", "aff_unique_dep": ";", "aff_unique_url": "https://www.wisc.edu;https://www.wisc.edu", "aff_unique_abbr": "UW-Madison;UW", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Madison", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "AttnDreamBooth: Towards Text-Aligned Personalized Text-to-Image Generation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96652", "id": "4bINoegDcm", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4bINoegDcm", "openreview": "https://openreview.net/forum?id=4bINoegDcm", "poster": "/media/PosterPDFs/NeurIPS%202024/96652.png?t=1731488013.5624623", "project": "", "author_site": "Lianyu Pang, Jian Yin, Baoquan Zhao, Feize Wu, Fu Lee Wang, Qing Li, Xudong Mao", "tldr": "", "abstract": "Recent advances in text-to-image models have enabled high-quality personalized image synthesis based on user-provided concepts with flexible textual control. In this work, we analyze the limitations of two primary techniques in text-to-image personalization: Textual Inversion and DreamBooth. When integrating the learned concept into new prompts, Textual Inversion tends to overfit the concept, while DreamBooth often overlooks it. We attribute these issues to the incorrect learning of the embedding alignment for the concept. To address this, we introduce AttnDreamBooth, a novel approach that separately learns the embedding alignment, the attention map, and the subject identity across different training stages. We also introduce a cross-attention map regularization term to enhance the learning of the attention map. Our method demonstrates significant improvements in identity preservation and text alignment compared to the baseline methods.", "keywords": "Diffusion Models;Personalized Text-to-Image Generation;Personalization", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/c857f2ac9b8dc41d03e6c855c578dbc8fa5b54a6.zip", "author": "Lianyu Pang;Jian Yin;Baoquan Zhao;Feize Wu;Fu Lee Wang;Qing Li;Xudong Mao", "authorids": "~Lianyu_Pang1;~Jian_Yin3;~Baoquan_Zhao1;~Feize_Wu1;~Fu_Lee_Wang2;~Qing_Li5;~Xudong_Mao1", "gender": "M;M;M;M;M;M;M", "homepage": "https://github.com/lyuPang;http://sai.sysu.edu.cn/teacher/teacher01/1385356.htm;https://baoquanzhao.github.io/;;;https://www4.comp.polyu.edu.hk/~csqli/;https://xudongmao.github.io/", "dblp": ";95/578-1;142/7378.html;;62/6271.html;(2024-11-14-1812689);55/11145", "google_scholar": ";;d2GgmE0AAAAJ;;EN0PM30AAAAJ;https://scholar.google.co.in/citations?user=D1LEg-YAAAAJ;https://scholar.google.com/citations?", "orcid": ";;;;0000-0002-3976-0053;0000-0003-3370-471X;", "linkedin": ";;;feize-wu;;;", "or_profile": "~Lianyu_Pang1;~Jian_Yin3;~Baoquan_Zhao1;~Feize_Wu1;~Fu_Lee_Wang2;~Qing_Li5;~Xudong_Mao1", "aff": "SUN YAT-SEN UNIVERSITY;SUN YAT-SEN UNIVERSITY;SUN YAT-SEN UNIVERSITY;Sun Yat-Sen University;Hong Kong Metropolitan University;Hong Kong Polytechnic University;SUN YAT-SEN UNIVERSITY", "aff_domain": "sysu.edu.cn;sysu.edu.cn;sysu.edu.cn;mail2.sysu.edu.cn;hkmu.edu.hk;polyu.edu.hk;sysu.edu.cn", "position": "MS student;Full Professor;Associate Professor;Undergrad student;Full Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\npang2024attndreambooth,\ntitle={AttnDreamBooth: Towards Text-Aligned Personalized Text-to-Image Generation},\nauthor={Lianyu Pang and Jian Yin and Baoquan Zhao and Feize Wu and Fu Lee Wang and Qing Li and Xudong Mao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4bINoegDcm}\n}", "github": "", "reviewers": "ZpWY;FYJx;hyCc;U1BU", "pdf_size": 16922905, "rating": "3;5;5;6", "confidence": "5;4;4;4", "soundness": "3;3;3;3", "novelty": "2;3;2;3", "presentation": "3;3;3;3", "wc_summary": "96;25;42;43", "wc_strengths": "61;42;39;32", "wc_weaknesses": "92;149;66;79", "wc_questions": "18;67;50;29", "wc_limitations": "39;1;1;1", "wc_review": "306;284;198;184", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 51.5, 26.669270706189174 ], "wc_strengths_avg": [ 43.5, 10.735455276791944 ], "wc_weaknesses_avg": [ 96.5, 31.67412192942371 ], "wc_questions_avg": [ 41.0, 18.907670401189037 ], "wc_limitations_avg": [ 10.5, 16.454482671904334 ], "wc_review_avg": [ 243.0, 52.81098370604357 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.9271726499455306, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7379650738290494610&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 5, "email": "sysu.edu.cn;sysu.edu.cn;sysu.edu.cn;mail2.sysu.edu.cn;hkmu.edu.hk;polyu.edu.hk;sysu.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;0;1;2;0", "aff_unique_norm": "Sun Yat-sen University;Hong Kong Metropolitan University;Hong Kong Polytechnic University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.sysu.edu.cn;https://www.hkmu.edu.hk;https://www.polyu.edu.hk", "aff_unique_abbr": "SYSU;HKMU;PolyU", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "On Learning Multi-Modal Forgery Representation for Diffusion Generated Video Detection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96651", "id": "4bJufOS6No", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4bJufOS6No", "openreview": "https://openreview.net/forum?id=4bJufOS6No", "poster": "/media/PosterPDFs/NeurIPS%202024/96651.png?t=1731732476.5247834", "project": "", "author_site": "Xiufeng Song, Xiao Guo, Jiache Zhang, Qirui Li, LEI BAI, Xiaoming Liu, Guangtao Zhai, Xiaohong Liu", "tldr": "", "abstract": "Large numbers of synthesized videos from diffusion models pose threats to information security and authenticity, leading to an increasing demand for generated content detection. However, existing video-level detection algorithms primarily focus on detecting facial forgeries and often fail to identify diffusion-generated content with a diverse range of semantics. To advance the field of video forensics, we propose an innovative algorithm named Multi-Modal Detection(MM-Det) for detecting diffusion-generated videos. MM-Det utilizes the profound perceptual and comprehensive abilities of Large Multi-modal Models (LMMs) by generating a Multi-Modal Forgery Representation (MMFR) from LMM's multi-modal space, enhancing its ability to detect unseen forgery content. Besides, MM-Det leverages an In-and-Across Frame Attention (IAFA) mechanism for feature augmentation in the spatio-temporal domain. A dynamic fusion strategy helps refine forgery representations for the fusion. Moreover, we construct a comprehensive diffusion video dataset, called Diffusion Video Forensics (DVF), across a wide range of forgery videos. MM-Det achieves state-of-the-art performance in DVF, demonstrating the effectiveness of our algorithm. Both source code and DVF are available at https://github.com/SparkleXFantasy/MM-Det.", "keywords": "Video Forensics\uff0cMulti-Modal Large Language Model", "primary_area": "machine_vision", "supplementary_material": "", "author": "Xiufeng Song;Xiao Guo;Jiache Zhang;Qirui Li;LEI BAI;Xiaoming Liu;Guangtao Zhai;Xiaohong Liu", "authorids": "~Xiufeng_Song1;~Xiao_Guo2;~Jiache_Zhang1;~Qirui_Li3;~LEI_BAI1;~Xiaoming_Liu2;~Guangtao_Zhai1;~Xiaohong_Liu2", "gender": "M;M;M;M;M;M;M;M", "homepage": "https://github.com/SparkleXFantasy;https://chelsea234.github.io/website/;https://github.com/ZJChe;https://github.com/IApple233;http://leibai.site/;http://www.cse.msu.edu/~liuxm/;https://faculty.sjtu.edu.cn/zhaiguangtao/en/index.htm;https://jhc.sjtu.edu.cn/~xiaohongliu/", "dblp": "31/8762;;;;119/1223-1;l/XiaomingLiu0002;19/3230;95/2454-1", "google_scholar": "qO93EgIAAAAJ;H93xhggAAAAJ;;;https://scholar.google.com.au/citations?user=sakOO04AAAAJ;https://scholar.google.com/citations?hl=en;E6zbSYgAAAAJ;https://scholar.google.ca/citations?hl=en", "orcid": "0009-0002-0773-7876;0000-0003-3575-3953;;;0000-0003-3378-7201;;;", "linkedin": ";;;;lei-bai-641370153/;xiaoming-liu-5a7807b/;;xiaohong-liu/", "or_profile": "~Xiufeng_Song1;~Xiao_Guo2;~Jiache_Zhang1;~Qirui_Li3;~LEI_BAI1;~Xiaoming_Liu2;~Guangtao_Zhai1;~Xiaohong_Liu2", "aff": "Shanghai Jiaotong University;Michigan State University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai AI Laboratory;Michigan State University;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;msu.edu;sjtu.edu.cn;sjtu.edu;pjlab.org.cn;msu.edu;sjtu.edu.cn;sjtu.edu.cn", "position": "MS student;PhD student;Undergrad student;Undergrad student;Researcher;Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nsong2024on,\ntitle={On Learning Multi-Modal Forgery Representation for Diffusion Generated Video Detection},\nauthor={Xiufeng Song and Xiao Guo and Jiache Zhang and Qirui Li and LEI BAI and Xiaoming Liu and Guangtao Zhai and Xiaohong Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4bJufOS6No}\n}", "github": "", "reviewers": "Niir;H1bu;f8wk;X8oW;LBha", "pdf_size": 11226337, "rating": "6;6;6;6;7", "confidence": "4;4;5;4;5", "soundness": "2;3;3;3;4", "novelty": "3;2;3;4;4", "presentation": "3;3;3;4;3", "wc_summary": "42;49;62;98;72", "wc_strengths": "93;52;39;31;97", "wc_weaknesses": "227;45;83;86;124", "wc_questions": "32;37;46;23;48", "wc_limitations": "1;7;25;18;15", "wc_review": "395;190;255;256;356", "wc_reply_reviewers": "19;9;0;16;34", "wc_reply_authors": "25;23;0;25;36", "reply_reviewers": "1;1;0;1;1", "reply_authors": "2;2;1;2;2", "rating_avg": [ 6.2, 0.39999999999999997 ], "confidence_avg": [ 4.4, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 3.2, 0.7483314773547882 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 64.6, 19.652989594461197 ], "wc_strengths_avg": [ 62.4, 27.477991192952953 ], "wc_weaknesses_avg": [ 113.0, 62.24146527838174 ], "wc_questions_avg": [ 37.2, 9.19565114605812 ], "wc_limitations_avg": [ 13.2, 8.4 ], "wc_review_avg": [ 290.4, 74.51335450776592 ], "wc_reply_reviewers_avg": [ 15.6, 11.288932633336067 ], "wc_reply_authors_avg": [ 21.8, 11.822013364905319 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.6123724356957946, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12959610534080817135&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "sjtu.edu.cn;msu.edu;sjtu.edu.cn;sjtu.edu;pjlab.org.cn;msu.edu;sjtu.edu.cn;sjtu.edu.cn", "author_num": 8, "aff_unique_index": "0;1;0;0;2;1;0;0", "aff_unique_norm": "Shanghai Jiao Tong University;Michigan State University;Shanghai AI Laboratory", "aff_unique_dep": ";;", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.msu.edu;https://www.shanghai-ai-lab.com", "aff_unique_abbr": "SJTU;MSU;SAIL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0;1;0;0", "aff_country_unique": "China;United States" }, { "title": "Convolutional Differentiable Logic Gate Networks", "status": "Oral", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96650", "id": "4bKEFyUHT4", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4bKEFyUHT4", "openreview": "https://openreview.net/forum?id=4bKEFyUHT4", "poster": "/media/PosterPDFs/NeurIPS%202024/96650.png?t=1733861937.2919855", "project": "", "author_site": "Felix Petersen, Hilde Kuehne, Christian Borgelt, Julian Welzel, Stefano Ermon", "tldr": "", "abstract": "With the increasing inference cost of machine learning models, there is a growing interest in models with fast and efficient inference. \nRecently, an approach for learning logic gate networks directly via a differentiable relaxation was proposed. Logic gate networks are faster than conventional neural network approaches because their inference only requires logic gate operators such as NAND, OR, and XOR, which are the underlying building blocks of current hardware and can be efficiently executed. We build on this idea, extending it by deep logic gate tree convolutions, logical OR pooling, and residual initializations. This allows scaling logic gate networks up by over one order of magnitude and utilizing the paradigm of convolution. On CIFAR-10, we achieve an accuracy of 86.29% using only 61 million logic gates, which improves over the SOTA while being 29x smaller.", "keywords": "logic;efficient inference;embedded;fpga;logic circuits;lookup tables;differentiable", "primary_area": "machine_vision", "supplementary_material": "", "author": "Felix Petersen;Hilde Kuehne;Christian Borgelt;Julian Welzel;Stefano Ermon", "authorids": "~Felix_Petersen1;~Hilde_Kuehne5;~Christian_Borgelt1;~Julian_Welzel1;~Stefano_Ermon1", "gender": "Not Specified;F;M;M;M", "homepage": "http://www.petersen.ai/;https://hildekuehne.github.io;https://www.borgelt.net/;;http://cs.stanford.edu/~ermon/", "dblp": "230/3983;45/4963;b/ChristianBorgelt.html;;47/8135", "google_scholar": "v8Kat6YAAAAJ;pxhCcH0AAAAJ;https://scholar.google.de/citations?user=T50Bxb8AAAAJ;;", "orcid": ";0000-0003-1079-4441;;;", "linkedin": ";hilde-kuehne-8b9aa661;christian-borgelt-a2429071/;julian-w-791a95224/;", "or_profile": "~Felix_Petersen1;~Hilde_Kuehne5;~Christian_Borgelt1;~Julian_Welzel1;~Stefano_Ermon1", "aff": "Stanford University;Rheinische Friedrich-Wilhelms-Universit\u00e4t Bonn, Rheinische Friedrich-Wilhelms Universit\u00e4t Bonn;Paris-Lodron-University of Salzburg;InftyLabs Research;Stanford University", "aff_domain": "stanford.edu;cs.uni-bonn.de;sbg.ac.at;inftylabs.com;stanford.edu", "position": "Postdoc;Associate Professor;Full Professor;Researcher;Associate Professor", "bibtex": "@inproceedings{\npetersen2024convolutional,\ntitle={Convolutional Differentiable Logic Gate Networks},\nauthor={Felix Petersen and Hilde Kuehne and Christian Borgelt and Julian Welzel and Stefano Ermon},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4bKEFyUHT4}\n}", "github": "", "reviewers": "nuqf;2J2u;BM74;zmod", "pdf_size": 1302612, "rating": "8;8;8;9", "confidence": "4;3;4;5", "soundness": "4;4;4;4", "novelty": "4;4;4;4", "presentation": "4;4;4;3", "wc_summary": "236;138;88;73", "wc_strengths": "136;103;128;26", "wc_weaknesses": "36;271;318;347", "wc_questions": "102;99;2;116", "wc_limitations": "6;79;34;66", "wc_review": "516;690;570;628", "wc_reply_reviewers": "46;77;0;197", "wc_reply_authors": "0;0;0;89", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;2", "rating_avg": [ 8.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 4.0, 0.0 ], "novelty_avg": [ 4.0, 0.0 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 133.75, 63.7509803846184 ], "wc_strengths_avg": [ 98.25, 43.45327950799571 ], "wc_weaknesses_avg": [ 243.0, 122.54998980008118 ], "wc_questions_avg": [ 79.75, 45.34520371549785 ], "wc_limitations_avg": [ 46.25, 28.428638729281428 ], "wc_review_avg": [ 601.0, 64.87680633323437 ], "wc_reply_reviewers_avg": [ 80.0, 72.89375830618147 ], "wc_reply_authors_avg": [ 22.25, 38.53813046840752 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14240925523258076188&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "stanford.edu;cs.uni-bonn.de;sbg.ac.at;inftylabs.com;stanford.edu", "author_num": 5, "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "Stanford University;Rheinische Friedrich-Wilhelms-Universit\u00e4t Bonn;Paris-Lodron-University of Salzburg;InftyLabs", "aff_unique_dep": ";;;Research", "aff_unique_url": "https://www.stanford.edu;https://www.uni-bonn.de;https://www.uni-salzburg.at;", "aff_unique_abbr": "Stanford;Uni Bonn;PLUS;", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;1;2;0", "aff_country_unique": "United States;Germany;Austria;" }, { "title": "What is my quantum computer good for? Quantum capability learning with physics-aware neural networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96649", "id": "4cU9ZvOkBz", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4cU9ZvOkBz", "openreview": "https://openreview.net/forum?id=4cU9ZvOkBz", "poster": "", "project": "", "author_site": "Daniel Hothem, Ashe Miller, Timothy Proctor", "tldr": "", "abstract": "Quantum computers have the potential to revolutionize diverse fields, including quantum chemistry, materials science, and machine learning. However, contemporary quantum computers experience errors that often cause quantum programs run on them to fail. Until quantum computers can reliably execute large quantum programs, stakeholders will need fast and reliable methods for assessing a quantum computer\u2019s capability\u2014i.e., the programs it can run and how well it can run them. Previously, off-the-shelf neural network architectures have been used to model quantum computers' capabilities, but with limited success, because these networks fail to learn the complex quantum physics that determines real quantum computers' errors. We address this shortcoming with a new quantum-physics-aware neural network architecture for learning capability models. Our scalable architecture combines aspects of graph neural networks with efficient approximations to the physics of errors in quantum programs. This approach achieves up to $\\sim50\\%$ reductions in mean absolute error on both experimental and simulated data, over state-of-the-art models based on convolutional neural networks, and scales to devices with 100+ qubits.", "keywords": "GNN;Quantum Computing;Quantum Benchmarking", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "/attachment/f26204f885da73a5a79ed11d4923562ece1bf097.zip", "author": "Daniel Hothem;Ashe Miller;Timothy Proctor", "authorids": "~Daniel_Hothem1;~Ashe_Miller1;~Timothy_Proctor1", "gender": "M;F;M", "homepage": "https://www.sandia.gov/people/staff/daniel-hothem/;;", "dblp": "345/8753;;296/6337", "google_scholar": ";q7LaFFwAAAAJ;", "orcid": ";;0000-0003-0219-8930", "linkedin": ";;", "or_profile": "~Daniel_Hothem1;~Ashe_Miller1;~Timothy_Proctor1", "aff": "Sandia National Laboratories;Sandia National Laboratories;Sandia National Laboratories", "aff_domain": "sandia.gov;sandia.gov;sandia.gov", "position": "Postdoc;Postdoc;Principal Researcher", "bibtex": "@inproceedings{\nhothem2024what,\ntitle={What is my quantum computer good for? Quantum capability learning with physics-aware neural networks},\nauthor={Daniel Hothem and Ashe Miller and Timothy Proctor},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4cU9ZvOkBz}\n}", "github": "", "reviewers": "beTU;k1xb;oCVW", "pdf_size": 2605937, "rating": "5;6;6", "confidence": "3;3;4", "soundness": "3;3;2", "novelty": "2;2;3", "presentation": "3;3;3", "wc_summary": "39;28;78", "wc_strengths": "12;37;63", "wc_weaknesses": "66;194;149", "wc_questions": "27;14;1", "wc_limitations": "13;6;2", "wc_review": "157;279;293", "wc_reply_reviewers": "15;17;56", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 48.333333333333336, 21.452790546272112 ], "wc_strengths_avg": [ 37.333333333333336, 20.82199691565522 ], "wc_weaknesses_avg": [ 136.33333333333334, 53.01781671182706 ], "wc_questions_avg": [ 14.0, 10.614455552060438 ], "wc_limitations_avg": [ 7.0, 4.546060565661952 ], "wc_review_avg": [ 243.0, 61.079183578913906 ], "wc_reply_reviewers_avg": [ 29.333333333333332, 18.873850222522755 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13554224769666914734&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "sandia.gov;sandia.gov;sandia.gov", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Sandia National Laboratories", "aff_unique_dep": "", "aff_unique_url": "https://www.sandia.gov", "aff_unique_abbr": "SNL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "ActSort: An active-learning accelerated cell sorting algorithm for large-scale calcium imaging datasets", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96648", "id": "4czwwExZKQ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4czwwExZKQ", "openreview": "https://openreview.net/forum?id=4czwwExZKQ", "poster": "/media/PosterPDFs/NeurIPS%202024/96648.png?t=1729548678.184002", "project": "", "author_site": "Yiqi Jiang, Hakki Akengin, Ji Zhou, Mehmet Aslihak, Yang Li, Radoslaw Chrapkiewicz, Oscar Hernandez, sadegh ebrahimi, Omar Jaidar, Yanping Zhang, Hakan Inan, Christopher Miranda, Fatih Dinc, Marta Pozo, Mark Schnitzer", "tldr": "", "abstract": "Recent advances in calcium imaging enable simultaneous recordings of up to a million neurons in behaving animals, producing datasets of unprecedented scales. Although individual neurons and their activity traces can be extracted from these videos with automated algorithms, the results often require human curation to remove false positives, a laborious process called \\emph{cell sorting}. To address this challenge, we introduce ActSort, an active-learning algorithm for sorting large-scale datasets that integrates features engineered by domain experts together with data formats with minimal memory requirements. By strategically bringing outlier cell candidates near the decision boundary up for annotation, ActSort reduces human labor to about 1\u20133\\% of cell candidates and improves curation accuracy by mitigating annotator bias. To facilitate the algorithm's widespread adoption among experimental neuroscientists, we created a user-friendly software and conducted a first-of-its-kind benchmarking study involving about 160,000 annotations. Our tests validated ActSort's performance across different experimental conditions and datasets from multiple animals. Overall, ActSort addresses a crucial bottleneck in processing large-scale calcium videos of neural activity and thereby facilitates systems neuroscience experiments at previously inaccessible scales. (\\url{https://github.com/schnitzer-lab/ActSort-public})", "keywords": "neuroscience;active learning;calcium imaging;cell sorting", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "/attachment/aa30ce0d806536d8927f9080173c2d64296130df.zip", "author": "Yiqi Jiang;Hakki Orhun Akengin;Ji Zhou;Mehmet Anil Aslihak;Yang Li;Radoslaw Chrapkiewicz;Oscar Hernandez;Sadegh Ebrahimi;Omar Jaidar;Yanping Zhang;Hakan Inan;Christopher Miranda;Fatih Dinc;Marta Blanco-Pozo;Mark Schnitzer", "authorids": "~Yiqi_Jiang2;~Hakki_Orhun_Akengin1;~Ji_Zhou1;~Mehmet_Anil_Aslihak1;~Yang_Li103;~Radoslaw_Chrapkiewicz1;~Oscar_Hernandez1;~Sadegh_Ebrahimi1;~Omar_Jaidar1;~Yanping_Zhang1;~Hakan_Inan1;~Christopher_Miranda1;~Fatih_Dinc1;~Marta_Blanco-Pozo1;~Mark_Schnitzer1", "gender": "F;M;M;M;M;M;;M;;;M;;M;F;M", "homepage": "https://yiqij.github.io/;;https://jizhou-fluid.com/;;https://biology.stanford.edu/people/yang-li;;;https://ebrahimisadegh.github.io/;;https://pyramidal.stanford.edu/;;;https://sites.google.com/view/fatihdinc/;;https://pyramidal.stanford.edu/", "dblp": ";;43/5757;;;;;;;;;;218/5297;;", "google_scholar": "qrlQN-AAAAAJ;https://scholar.google.com/citations?hl=en;ZHZzN04AAAAJ;;https://scholar.google.com.hk/citations?user=xP9IwO0AAAAJ;https://scholar.google.pl/citations?user=uVn7fM0AAAAJ;9J3u0agAAAAJ;;f-uc2m8AAAAJ;;https://scholar.google.com/citations?hl=en;gP5-7VEAAAAJ;https://scholar.google.com.tr/citations?user=jFHyg0oAAAAJ;https://scholar.google.com/citations?view_op=list_works;", "orcid": ";;0000-0002-4072-7883;;0000-0000-0000-0000;;;;0000-0001-7317-6355;;;;0000-0003-0921-0162;0000-0002-7958-2706;", "linkedin": "yiqi-jiang-aa1363173/;hakk%C4%B1-orhun-akengin-0619aa198?utm_source=share&utm_campaign=share_via&utm_content=profile&utm_medium=ios_app;jizhou-fluid/;anil-a-018b73122/;https://www.linkedin.com/;;;;;;;;;marta-blanco-pozo-03ab88120;", "or_profile": "~Yiqi_Jiang2;~Hakki_Orhun_Akengin1;~Ji_Zhou1;~Mehmet_Anil_Aslihak1;~Yang_Li103;~Radoslaw_Chrapkiewicz1;~Oscar_Hernandez1;~Sadegh_Ebrahimi1;~Omar_Jaidar1;~Yanping_Zhang1;~Hakan_Inan1;~Christopher_Miranda1;~Fatih_Dinc1;~Marta_Blanco-Pozo1;~Mark_Schnitzer1", "aff": "Stanford University;Ozyegin University;Johns Hopkins University;Middle East Technical University;Stanford University;Stanford University;Stanford University;;Stanford University;Stanford University;Meta Facebook;Stanford University;Stanford University;Stanford University;Stanford University", "aff_domain": "stanford.edu;ozyegin.edu.tr;jhu.edu;metu.edu.tr;stanford.edu;stanford.edu;stanford.edu;;stanford.edu;stanford.edu;fb.com;stanford.edu;stanford.edu;stanford.edu;stanford.edu", "position": "PhD student;Undergrad student;PhD student;Undergrad student;PhD student;Researcher;Researcher;;Researcher;Researcher;Research Scientist;Postdoc;PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\njiang2024actsort,\ntitle={ActSort: An active-learning accelerated cell sorting algorithm for large-scale calcium imaging datasets},\nauthor={Yiqi Jiang and Hakki Orhun Akengin and Ji Zhou and Mehmet Anil Aslihak and Yang Li and Radoslaw Chrapkiewicz and Oscar Hernandez and Sadegh Ebrahimi and Omar Jaidar and Yanping Zhang and Hakan Inan and Christopher Miranda and Fatih Dinc and Marta Blanco-Pozo and Mark Schnitzer},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4czwwExZKQ}\n}", "github": "", "reviewers": "iZ5n;uvup;h7Vq;WHFf", "pdf_size": 17272355, "rating": "5;5;6;8", "confidence": "4;3;3;2", "soundness": "3;2;3;4", "novelty": "3;3;2;4", "presentation": "2;3;4;3", "wc_summary": "71;137;161;144", "wc_strengths": "37;103;127;216", "wc_weaknesses": "154;684;113;151", "wc_questions": "94;188;288;106", "wc_limitations": "2;96;36;22", "wc_review": "358;1208;725;639", "wc_reply_reviewers": "210;249;164;141", "wc_reply_authors": "576;28;22;1730", "reply_reviewers": "2;1;1;2", "reply_authors": "3;2;2;4", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 128.25, 34.18607172519241 ], "wc_strengths_avg": [ 120.75, 64.11074402937467 ], "wc_weaknesses_avg": [ 275.5, 236.40061336637856 ], "wc_questions_avg": [ 169.0, 77.64663547121665 ], "wc_limitations_avg": [ 39.0, 35.05709628591621 ], "wc_review_avg": [ 732.5, 306.24704080202963 ], "wc_reply_reviewers_avg": [ 191.0, 41.69532347877877 ], "wc_reply_authors_avg": [ 589.0, 696.1070320001085 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 15, 0 ], "corr_rating_confidence": -0.8660254037844386, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:80FL-QtHaKkJ:scholar.google.com/&scioq=ActSort:+An+active-learning+accelerated+cell+sorting+algorithm+for+large-scale+calcium+imaging+datasets&hl=en&as_sdt=0,44", "gs_version_total": 3, "email": "stanford.edu;ozyegin.edu.tr;jhu.edu;metu.edu.tr;stanford.edu;stanford.edu;stanford.edu;;stanford.edu;stanford.edu;fb.com;stanford.edu;stanford.edu;stanford.edu;stanford.edu", "author_num": 15, "aff_unique_index": "0;1;2;3;0;0;0;0;0;4;0;0;0;0", "aff_unique_norm": "Stanford University;Ozyegin University;Johns Hopkins University;Middle East Technical University;Meta", "aff_unique_dep": ";;;;Meta Platforms, Inc.", "aff_unique_url": "https://www.stanford.edu;https://www.ozyegin.edu.tr;https://www.jhu.edu;https://www.metu.edu.tr;https://meta.com", "aff_unique_abbr": "Stanford;Ozyegin;JHU;METU;Meta", "aff_campus_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;1;0;1;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States;T\u00fcrkiye" }, { "title": "RepLiQA: A Question-Answering Dataset for Benchmarking LLMs on Unseen Reference Content", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97851", "id": "4diKTLmg2y", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4diKTLmg2y", "openreview": "https://openreview.net/forum?id=4diKTLmg2y", "poster": "/media/PosterPDFs/NeurIPS%202024/97851.png?t=1731678526.0061827", "project": "", "author_site": "Joao Monteiro, Pierre-Andr\u00e9 No\u00ebl, \u00c9tienne Marcotte, Sai Rajeswar Mudumba, Valentina Zantedeschi, David Vazquez, Nicolas Chapados, Chris Pal, Perouz Taslakian", "tldr": "", "abstract": "Large Language Models (LLMs) are trained on vast amounts of data, most of which is automatically scraped from the internet. This data includes encyclopedic documents that harbor a vast amount of general knowledge (*e.g.*, Wikipedia) but also potentially overlap with benchmark datasets used for evaluating LLMs. Consequently, evaluating models on test splits that might have leaked into the training set is prone to misleading conclusions. To foster sound evaluation of language models, we introduce a new test dataset named RepLiQA, suited for question-answering and topic retrieval tasks. RepLiQA is a collection of five splits of test sets, four of which have not been released to the internet or exposed to LLM APIs prior to this publication. Each sample in RepLiQA comprises (1) a reference document crafted by a human annotator and depicting an imaginary scenario (*e.g.*, a news article) absent from the internet; (2) a question about the document\u2019s topic; (3) a ground-truth answer derived directly from the information in the document; and (4) the paragraph extracted from the reference document containing the answer. As such, accurate answers can only be generated if a model can find relevant content within the provided document. We run a large-scale benchmark comprising several state-of-the-art LLMs to uncover differences in performance across models of various types and sizes in a context-conditional language modeling setting. Released splits of RepLiQA can be found here: https://huggingface.co/datasets/ServiceNow/repliqa.", "keywords": "Question Answering;data generation;robust evaluation of LLMs;context-conditional language modelling", "primary_area": "", "supplementary_material": "", "author": "Joao Monteiro;Pierre-Andre Noel;\u00c9tienne Marcotte;Sai Rajeswar;Valentina Zantedeschi;David Vazquez;Nicolas Chapados;Christopher Pal;Perouz Taslakian", "authorids": "~Joao_Monteiro1;~Pierre-Andre_Noel1;~\u00c9tienne_Marcotte1;~Sai_Rajeswar2;~Valentina_Zantedeschi2;~David_Vazquez1;~Nicolas_Chapados1;~Christopher_Pal1;~Perouz_Taslakian1", "gender": "M;M;;F;M;M;;F;M", "homepage": ";;;http://vzantedeschi.com/;http://www.david-vazquez.com;;https://scholar.google.ca/citations?user=1ScWJOoAAAAJ&hl=en&oi=ao;http://www.perouz.com;https://sairajeswar.com/", "dblp": "215/5354-2;47/9226.html;;179/2187;94/8653;58/1013;45/1217;52/1849;159/2116", "google_scholar": "https://scholar.google.ca/citations?hl=en;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;tdUUrS8AAAAJ;1jHvtfsAAAAJ;QdnjDj8AAAAJ;https://scholar.google.ca/citations?user=1ScWJOoAAAAJ;LJ7gHkQAAAAJ;https://scholar.google.ca/citations?user=h-sqIigAAAAJ", "orcid": ";0000-0001-6979-1873;;;0000-0002-2845-8158;;;;", "linkedin": "joao-monteiro-47180256/;panoel/;https://linkedin.com/in/e-marcotte;valentina-zantedeschi-36a65a83/;https://www.linkedin.com/company/david-vazquez/;;;perouz/;sairajeswar/", "or_profile": "~Joao_Monteiro1;~Pierre-Andre_Noel1;~\u00c9tienne_Marcotte1;~Valentina_Zantedeschi2;~David_Vazquez1;~Nicolas_Chapados1;~Christopher_Pal1;~Perouz_Taslakian1;~sai_rajeswar_mudumba1", "aff": "ServiceNow Research;ServiceNow;ServiceNow Research;ServiceNow Research;ServiceNow research;ServiceNow Research;Polytechnique Montreal;ServiceNow;ServiceNow", "aff_domain": "servicenow.com;servicenow.com;servicenow.com;servicenow.com;servicenow.com;servicenow.com;polymtl.ca;servicenow.com;servicenow.com", "position": "Researcher;Researcher;Researcher;Researcher;Researcher;VP Research;Full Professor;Researcher;Research Scientist", "bibtex": "@inproceedings{\nmonteiro2024repliqa,\ntitle={RepLi{QA}: A Question-Answering Dataset for Benchmarking {LLM}s on Unseen Reference Content},\nauthor={Joao Monteiro and Pierre-Andre Noel and {\\'E}tienne Marcotte and Sai Rajeswar and Valentina Zantedeschi and David Vazquez and Nicolas Chapados and Christopher Pal and Perouz Taslakian},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=4diKTLmg2y}\n}", "github": "", "reviewers": "7v8M;a4ei;GtR5", "pdf_size": 1290990, "rating": "5;7;7", "confidence": "3;4;3", "wc_summary_and_contributions": "94;143;65", "wc_strengths": "63;2;2", "wc_improvement": "5;271;2", "wc_limitations": "7;9;1", "wc_correctness": "6;5;1", "wc_clarity": "4;4;1", "wc_relation_to_prior_work": "1;4;1", "wc_documentation": "11;6;1", "wc_additional_feedback": "1;1;1", "wc_review": "192;445;75", "wc_reply_reviewers": "0;9;0", "wc_reply_authors": "58;438;0", "reply_reviewers": "0;1;0", "reply_authors": "2;3;1", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 100.66666666666667, 32.190405748020986 ], "wc_strengths_avg": [ 22.333333333333332, 28.755675768252935 ], "wc_improvement_avg": [ 92.66666666666667, 126.10665679847709 ], "wc_limitations_avg": [ 5.666666666666667, 3.39934634239519 ], "wc_correctness_avg": [ 4.0, 2.160246899469287 ], "wc_clarity_avg": [ 3.0, 1.4142135623730951 ], "wc_relation_to_prior_work_avg": [ 2.0, 1.4142135623730951 ], "wc_documentation_avg": [ 6.0, 4.08248290463863 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 237.33333333333334, 154.41574473551012 ], "wc_reply_reviewers_avg": [ 3.0, 4.242640687119285 ], "wc_reply_authors_avg": [ 165.33333333333334, 194.25298510504857 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.49999999999999983, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8296315127842054540&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "servicenow.com;servicenow.com;servicenow.com;servicenow.com;servicenow.com;servicenow.com;polymtl.ca;servicenow.com;servicenow.com", "author_num": 9, "aff_unique_index": "0;0;0;0;0;0;1;0;0", "aff_unique_norm": "ServiceNow;Polytechnique Montreal", "aff_unique_dep": "Research;", "aff_unique_url": "https://www.servicenow.com;https://www.polymtl.ca", "aff_unique_abbr": "ServiceNow;PolyMTL", "aff_campus_unique_index": "1", "aff_campus_unique": ";Montreal", "aff_country_unique_index": "0;0;0;0;0;0;1;0;0", "aff_country_unique": "United States;Canada" }, { "id": "4dmwvbs4Ea", "title": "Offline RL via Feature-Occupancy Gradient Ascent", "track": "main", "status": "Reject", "tldr": "", "abstract": "We study offline Reinforcement Learning in large infinite-horizon discounted Markov Decision Processes (MDPs) when the reward and transition models are linearly realizable under a known feature map. Starting from the classic linear-program formulation of the optimal control problem in MDPs, we develop a new algorithm that performs a form of gradient ascent in the space of feature occupancies, defined as the expected feature vectors that can potentially be generated by executing policies in the environment.\tWe show that the resulting simple algorithm satisfies strong computational and sample complexity guarantees, achieved under the least restrictive data coverage assumptions known in the literature. In particular, we show that the sample complexity of our method scales optimally with the desired accuracy level and depends on a weak notion of coverage that only requires the empirical feature covariance matrix to cover a single direction in the feature space (as opposed to covering a full subspace). Additionally, our method is easy to implement and requires no prior knowledge of the coverage ratio (or even an upper bound on it), which altogether make it the strongest known algorithm for this setting to date.", "keywords": "Offline Reinforcement Learning;Linear MDPs;Provably efficient RL.", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Gergely Neu;Nneka Okolo", "authorids": "~Gergely_Neu1;~Nneka_Okolo1", "gender": "M;F", "homepage": "http://cs.bme.hu/~gergo;", "dblp": "83/7606;331/5997", "google_scholar": "https://scholar.google.ch/citations?user=uz27G84AAAAJ;s8DIX2sAAAAJ", "orcid": ";0009-0004-0137-970X", "linkedin": ";nneka-okolo-876410134/", "or_profile": "~Gergely_Neu1;~Nneka_Okolo1", "aff": "Universitat Pompeu Fabra;Universitat Pompeu Fabra", "aff_domain": "upf.edu;upf.edu", "position": "Assistant Professor;PhD student", "bibtex": "@misc{\nanonymous2024offline,\ntitle={Offline {RL} via Feature-Occupancy Gradient Ascent},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=4dmwvbs4Ea}\n}", "github": "", "project": "", "reviewers": "5zRN;5hYq;J7AJ", "site": "https://openreview.net/forum?id=4dmwvbs4Ea", "pdf_size": 471321, "rating": "3;6;7", "confidence": "3;4;4", "soundness": "3;4;3", "novelty": "2;3;3", "presentation": "3;3;3", "wc_summary": "36;63;64", "wc_strengths": "18;91;56", "wc_weaknesses": "144;55;56", "wc_questions": "76;15;87", "wc_limitations": "5;11;4", "wc_review": "279;235;267", "wc_reply_reviewers": "0;13;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;1;0", "reply_authors": "1;1;1", "rating_avg": [ 5.333333333333333, 1.699673171197595 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 54.333333333333336, 12.970050972229146 ], "wc_strengths_avg": [ 55.0, 29.81051268708183 ], "wc_weaknesses_avg": [ 85.0, 41.72129751897305 ], "wc_questions_avg": [ 59.333333333333336, 31.668421004036322 ], "wc_limitations_avg": [ 6.666666666666667, 3.0912061651652345 ], "wc_review_avg": [ 260.3333333333333, 18.571184369578827 ], "wc_reply_reviewers_avg": [ 4.333333333333333, 6.128258770283412 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.9707253433941508, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:0-7Y7ER12tQJ:scholar.google.com/&scioq=Offline+RL+via+Feature-Occupancy+Gradient+Ascent&hl=en&as_sdt=0,33", "gs_version_total": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Universitat Pompeu Fabra", "aff_unique_dep": "", "aff_unique_url": "https://www.upf.edu/", "aff_unique_abbr": "UPF", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Spain" }, { "title": "Unveiling Induction Heads: Provable Training Dynamics and Feature Learning in Transformers", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96647", "id": "4fN2REs0Ma", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4fN2REs0Ma", "openreview": "https://openreview.net/forum?id=4fN2REs0Ma", "poster": "", "project": "", "author_site": "Siyu Chen, Heejune Sheen, Tianhao Wang, Zhuoran Yang", "tldr": "", "abstract": "In-context learning (ICL) is a cornerstone of large language model (LLM) functionality, yet its theoretical foundations remain elusive due to the complexity of transformer architectures. In particular, most existing work only theoretically explains how the attention mechanism facilitates ICL under certain data models. It remains unclear how the other building blocks of the transformer contribute to ICL. To address this question, we study how a two-attention-layer transformer is trained to perform ICL on $n$-gram Markov chain data, where each token in the Markov chain statistically depends on the previous n tokens. \nWe analyze a sophisticated transformer model featuring relative positional embedding, multi-head softmax attention, and a feed-forward layer with normalization. \nWe prove that the gradient flow with respect to a cross-entropy ICL loss converges to a limiting model that performs a generalized version of the \"induction head\" mechanism with a learned feature, resulting from the congruous contribution of all the building blocks. \nSpecifically, the first attention layer acts as a copier, copying past tokens within a given window to each position, and the feed-forward network with normalization acts as a selector that generates a feature vector by only looking at informationally relevant parents from the window. \nFinally, the second attention layer is a classifier that\ncompares these features with the feature at the output position, and uses the resulting similarity scores to generate the desired output. Our theory is further validated by simulation experiments.", "keywords": "transformer;in context learning;Markov chain;n-gram model;feature learning;training dynamics;induction head", "primary_area": "optimization", "supplementary_material": "", "author": "Siyu Chen;Heejune Sheen;Tianhao Wang;Zhuoran Yang", "authorids": "~Siyu_Chen2;~Heejune_Sheen1;~Tianhao_Wang1;~Zhuoran_Yang1", "gender": "M;M;M;M", "homepage": "https://github.com/FFishy-git/FFishy-git.github.io;;https://tianhaowang.ttic.edu;https://zhuoranyang.github.io/", "dblp": ";279/3743;145/3288-2;", "google_scholar": ";;m45LD1kAAAAJ;", "orcid": ";0000-0003-3286-7859;;", "linkedin": ";;;", "or_profile": "~Siyu_Chen2;~Heejune_Sheen1;~Tianhao_Wang1;~Zhuoran_Yang1", "aff": "Yale University;Yale University;Yale University;Yale University", "aff_domain": "yale.edu;yale.edu;yale.edu;yale.edu", "position": "PhD student;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nchen2024unveiling,\ntitle={Unveiling Induction Heads: Provable Training Dynamics and Feature Learning in Transformers},\nauthor={Siyu Chen and Heejune Sheen and Tianhao Wang and Zhuoran Yang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4fN2REs0Ma}\n}", "github": "", "reviewers": "ExgR;NTvL;omVG;FHQ5", "pdf_size": 2447006, "rating": "6;6;6;7", "confidence": "3;3;3;3", "soundness": "3;2;3;4", "novelty": "3;2;2;3", "presentation": "3;2;3;3", "wc_summary": "148;47;96;45", "wc_strengths": "121;31;114;32", "wc_weaknesses": "172;329;130;102", "wc_questions": "49;149;116;82", "wc_limitations": "24;23;6;1", "wc_review": "514;579;462;262", "wc_reply_reviewers": "16;30;21;12", "wc_reply_authors": "8;17;16;8", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 84.0, 42.2196636651691 ], "wc_strengths_avg": [ 74.5, 43.072613108563544 ], "wc_weaknesses_avg": [ 183.25, 87.75925877079865 ], "wc_questions_avg": [ 99.0, 37.34300469967568 ], "wc_limitations_avg": [ 13.5, 10.161200716450788 ], "wc_review_avg": [ 454.25, 118.48285740983799 ], "wc_reply_reviewers_avg": [ 19.75, 6.722164829874376 ], "wc_reply_authors_avg": [ 12.25, 4.264680527307995 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6592269435939878344&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "yale.edu;yale.edu;yale.edu;yale.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Yale University", "aff_unique_dep": "", "aff_unique_url": "https://www.yale.edu", "aff_unique_abbr": "Yale", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Resolving Discrepancies in Compute-Optimal Scaling of Language Models", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96646", "id": "4fSSqpk1sM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4fSSqpk1sM", "openreview": "https://openreview.net/forum?id=4fSSqpk1sM", "poster": "", "project": "", "author_site": "Tomer Porian, Mitchell Wortsman, Jenia Jitsev, Ludwig Schmidt, Yair Carmon", "tldr": "", "abstract": "Kaplan et al. and Hoffmann et al. developed influential scaling laws for the optimal model size as a function of the compute budget, but these laws yield substantially different predictions. We explain the discrepancy by reproducing the Kaplan scaling law on two datasets (OpenWebText2 and RefinedWeb) and identifying three factors causing the difference: last layer computational cost, warmup duration, and scale-dependent optimizer tuning. With these factors corrected, we obtain excellent agreement with the Hoffmann et al. (i.e., \"Chinchilla\") scaling law. Counter to a hypothesis of Hoffmann et al., we find that careful learning rate decay is not essential for the validity of their scaling law. As a secondary result, we derive scaling laws for the optimal learning rate and batch size, finding that tuning the AdamW $\\beta_2$ parameter is essential at lower batch sizes.", "keywords": "LLMs;scaling laws;Chinchilla;copmute-optimal;power laws", "primary_area": "optimization_for_deep_networks", "supplementary_material": "/attachment/bc8366e64c01622e083b0ccb92a9375c5ee423c7.zip", "author": "Tomer Porian;Mitchell Wortsman;Jenia Jitsev;Ludwig Schmidt;Yair Carmon", "authorids": "~Tomer_Porian1;~Mitchell_Wortsman1;~Jenia_Jitsev1;~Ludwig_Schmidt1;~Yair_Carmon1", "gender": "M;;M;M;M", "homepage": "https://mitchellnw.github.io/;;http://people.csail.mit.edu/ludwigs/;https://www.cs.tau.ac.il/~ycarmon/;", "dblp": "232/2273;53/5156;141/2720;13/558;", "google_scholar": "fzRnjFgAAAAJ;https://scholar.google.com/citations?hl=en;SWMKy70AAAAJ;kTKmpT0AAAAJ;", "orcid": ";0000-0002-1221-7851;;;", "linkedin": ";;ludwig-schmidt-87ba3612/;;tomer-porian-3b1ab1186/", "or_profile": "~Mitchell_Wortsman1;~Jenia_Jitsev1;~Ludwig_Schmidt1;~Yair_Carmon1;~Tomer_Arye_Porian1", "aff": "University of Washington, Seattle;Juelich Supercomputing Center, Research Center Juelich;University of Washington;Tel Aviv University;Tel Aviv University", "aff_domain": "uw.edu;fz-juelich.de;washington.edu;tau.ac.il;tau.ac.il", "position": "PhD student;Senior Scientist;Assistant Professor;Assistant Professor;MS student", "bibtex": "@inproceedings{\nporian2024resolving,\ntitle={Resolving Discrepancies in Compute-Optimal Scaling of Language Models},\nauthor={Tomer Porian and Mitchell Wortsman and Jenia Jitsev and Ludwig Schmidt and Yair Carmon},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4fSSqpk1sM}\n}", "github": "", "reviewers": "CKuz;comx;Kjsg;szos", "pdf_size": 3257447, "rating": "5;6;7;8", "confidence": "4;4;4;4", "soundness": "3;2;4;4", "novelty": "2;3;4;3", "presentation": "3;4;4;4", "wc_summary": "100;112;65;50", "wc_strengths": "99;124;59;100", "wc_weaknesses": "151;602;124;62", "wc_questions": "121;28;69;84", "wc_limitations": "26;9;1;1", "wc_review": "497;875;318;297", "wc_reply_reviewers": "0;35;41;13", "wc_reply_authors": "212;72;212;0", "reply_reviewers": "0;2;1;1", "reply_authors": "3;2;5;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 81.75, 25.183079636930824 ], "wc_strengths_avg": [ 95.5, 23.32916629457641 ], "wc_weaknesses_avg": [ 234.75, 214.47304609204392 ], "wc_questions_avg": [ 75.5, 33.32041416309227 ], "wc_limitations_avg": [ 9.25, 10.207227831296802 ], "wc_review_avg": [ 496.75, 231.79988675579634 ], "wc_reply_reviewers_avg": [ 22.25, 16.543503256565703 ], "wc_reply_authors_avg": [ 124.0, 91.60785992479029 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.75, 1.479019945774904 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15779060852360956411&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "uw.edu;fz-juelich.de;washington.edu;tau.ac.il;tau.ac.il", "author_num": 5, "aff_unique_index": "0;1;0;2;2", "aff_unique_norm": "University of Washington;Research Center Juelich;Tel Aviv University", "aff_unique_dep": ";Juelich Supercomputing Center;", "aff_unique_url": "https://www.washington.edu;https://www.fz-juelich.de/;https://www.tau.ac.il", "aff_unique_abbr": "UW;FZ J\u00fclich;TAU", "aff_campus_unique_index": "0", "aff_campus_unique": "Seattle;", "aff_country_unique_index": "0;1;0;2;2", "aff_country_unique": "United States;Germany;Israel" }, { "title": "BECAUSE: Bilinear Causal Representation for Generalizable Offline Model-based Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96645", "id": "4i9xuPEu9w", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4i9xuPEu9w", "openreview": "https://openreview.net/forum?id=4i9xuPEu9w", "poster": "/media/PosterPDFs/NeurIPS%202024/96645.png?t=1731748331.4737709", "project": "", "author_site": "Haohong Lin, Wenhao Ding, Jian Chen, Laixi Shi, Jiacheng Zhu, Bo Li, DING ZHAO", "tldr": "", "abstract": "Offline model-based reinforcement learning (MBRL) enhances data efficiency by utilizing pre-collected datasets to learn models and policies, especially in scenarios where exploration is costly or infeasible. Nevertheless, its performance often suffers from the objective mismatch between model and policy learning, resulting in inferior performance despite accurate model predictions. This paper first identifies the primary source of this mismatch comes from the underlying confounders present in offline data for MBRL. Subsequently, we introduce **B**ilin**E**ar **CAUS**al r**E**presentation (BECAUSE), an algorithm to capture causal representation for both states and actions to reduce the influence of the distribution shift, thus mitigating the objective mismatch problem. Comprehensive evaluations on 18 tasks that vary in data quality and environment context demonstrate the superior performance of BECAUSE over existing offline RL algorithms. We show the generalizability and robustness of BECAUSE under fewer samples or larger numbers of confounders. Additionally, we offer theoretical analysis of BECAUSE to prove its error bound and sample efficiency when integrating causal representation into offline MBRL. See more details in our project page: [https://sites.google.com/view/be-cause](https://sites.google.com/view/be-cause).", "keywords": "Model-based RL;Causal Reasoning;Offline RL", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/3eddfe6a055bd3d61bf94c107a8a38417fa12d8f.zip", "author": "Haohong Lin;Wenhao Ding;Jian Chen;Laixi Shi;Jiacheng Zhu;Bo Li;Ding Zhao", "authorids": "~Haohong_Lin1;~Wenhao_Ding1;~Jian_Chen13;~Laixi_Shi1;~Jiacheng_Zhu1;~Bo_Li19;~Ding_Zhao1", "gender": "M;M;M;F;M;F;", "homepage": "https://hhlin.info/;https://wenhao.pub;https://jianc99.github.io/;https://laixishi.github.io/;https://jiachengzhuml.github.io/;http://boli.cs.illinois.edu/;https://safeai-lab.github.io", "dblp": "154/7972;215/3667.html;;211/7965;40/10195;50/3402-26;", "google_scholar": ";q2aqI9sAAAAJ;hjxFO34AAAAJ;V8RkRr8AAAAJ;rKUnBPgAAAAJ;K8vJkTcAAAAJ;z7tPc9IAAAAJ", "orcid": ";;;;;;", "linkedin": "haohong-lin-06572b1a5/;wenhaoding/;jian-chen99/;;;;", "or_profile": "~Haohong_Lin1;~Wenhao_Ding1;~Jian_Chen13;~Laixi_Shi1;~Jiacheng_Zhu1;~Bo_Li19;~Ding_Zhao1", "aff": "Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;California Institute of Technology;Massachusetts Institute of Technology;University of Illinois, Urbana Champaign;Carnegie Mellon University", "aff_domain": "cmu.edu;cmu.edu;andrew.cmu.edu;caltech.edu;mit.edu;illinois.edu;cmu.edu", "position": "PhD student;PhD student;MS student;Postdoc;Postdoc;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nlin2024because,\ntitle={{BECAUSE}: Bilinear Causal Representation for Generalizable Offline Model-based Reinforcement Learning},\nauthor={Haohong Lin and Wenhao Ding and Jian Chen and Laixi Shi and Jiacheng Zhu and Bo Li and Ding Zhao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4i9xuPEu9w}\n}", "github": "", "reviewers": "8enR;XKke;aSxm;ne9G;8KHa", "pdf_size": 4245062, "rating": "2;6;6;7;7", "confidence": "3;3;5;4;4", "soundness": "2;2;3;4;4", "novelty": "2;2;3;4;4", "presentation": "2;3;3;4;4", "wc_summary": "61;119;44;119;100", "wc_strengths": "45;114;38;91;56", "wc_weaknesses": "25;345;67;128;48", "wc_questions": "20;12;24;25;39", "wc_limitations": "1;32;8;32;1", "wc_review": "152;622;181;395;244", "wc_reply_reviewers": "386;119;23;9;0", "wc_reply_authors": "1404;269;27;29;0", "reply_reviewers": "7;2;1;1;0", "reply_authors": "5;2;2;2;1", "rating_avg": [ 5.6, 1.8547236990991407 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.8944271909999159 ], "novelty_avg": [ 3.0, 0.8944271909999159 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 88.6, 30.754511864115155 ], "wc_strengths_avg": [ 68.8, 29.03377343715419 ], "wc_weaknesses_avg": [ 122.6, 116.3384717107802 ], "wc_questions_avg": [ 24.0, 8.78635305459552 ], "wc_limitations_avg": [ 14.8, 14.274452704044382 ], "wc_review_avg": [ 318.8, 173.28750676260537 ], "wc_reply_reviewers_avg": [ 107.4, 145.66619374446495 ], "wc_reply_authors_avg": [ 345.8, 538.0072118475736 ], "reply_reviewers_avg": [ 2.2, 2.4819347291981715 ], "reply_authors_avg": [ 2.4, 1.3564659966250538 ], "replies_avg": [ 34, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.5187513759338115, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:seHF6Li_ICYJ:scholar.google.com/&scioq=BECAUSE:+Bilinear+Causal+Representation+for+Generalizable+Offline+Model-based+Reinforcement+Learning&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": "cmu.edu;cmu.edu;andrew.cmu.edu;caltech.edu;mit.edu;illinois.edu;cmu.edu", "author_num": 7, "aff_unique_index": "0;0;0;1;2;3;0", "aff_unique_norm": "Carnegie Mellon University;California Institute of Technology;Massachusetts Institute of Technology;University of Illinois Urbana-Champaign", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.cmu.edu;https://www.caltech.edu;https://web.mit.edu;https://illinois.edu", "aff_unique_abbr": "CMU;Caltech;MIT;UIUC", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Pasadena;Urbana-Champaign", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "DETAIL: Task DEmonsTration Attribution for Interpretable In-context Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96644", "id": "4jRNkAH15k", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4jRNkAH15k", "openreview": "https://openreview.net/forum?id=4jRNkAH15k", "poster": "", "project": "", "author_site": "Zijian Zhou, Xiaoqiang Lin, Xinyi Xu, Alok Prakash, Daniela Rus, Bryan Kian Hsiang Low", "tldr": "", "abstract": "In-context learning (ICL) allows transformer-based language models that are pre-trained on general text to quickly learn a specific task with a few \"task demonstrations\" without updating their parameters, significantly boosting their flexibility and generality. ICL possesses many distinct characteristics from conventional machine learning, thereby requiring new approaches to interpret this learning paradigm. Taking the viewpoint of recent works showing that transformers learn in context by formulating an internal optimizer, we propose an influence function-based attribution technique, DETAIL, that addresses the specific characteristics of ICL. We empirically verify the effectiveness of our approach for demonstration attribution while being computationally efficient. Leveraging the results, we then show how DETAIL can help improve model performance in real-world scenarios through demonstration reordering and curation. Finally, we experimentally prove the wide applicability of DETAIL by showing our attribution scores obtained on white-box models are transferable to black-box models in improving model performance.", "keywords": "in-context learning;data attribution;large language models", "primary_area": "interpretability_and_explainability", "supplementary_material": "/attachment/74df937dbd1b594103a283fc5667b8b48ee0cc77.zip", "author": "Zijian Zhou;Xiaoqiang Lin;Xinyi Xu;Alok Prakash;Daniela Rus;Bryan Kian Hsiang Low", "authorids": "~Zijian_Zhou4;~Xiaoqiang_Lin1;~Xinyi_Xu4;~Alok_Prakash2;~Daniela_Rus1;~Bryan_Kian_Hsiang_Low1", "gender": "M;M;M;;F;M", "homepage": "https://www.linkedin.com/in/zijian-zhou-95abb7138/;https://xqlin98.github.io/;https://xinyi-xu.com;;https://www.csail.mit.edu/person/daniela-rus;http://www.comp.nus.edu.sg/~lowkh", "dblp": ";269/4573;;;r/DanielaRus;97/4877", "google_scholar": ";nqKwA60AAAAJ;2762GgsAAAAJ;;https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=2P-Q09UAAAAJ", "orcid": ";;0000-0002-8744-0695;;;", "linkedin": "zijian-zhou-95abb7138/;;xinyi-xu-a93222133/;;;", "or_profile": "~Zijian_Zhou4;~Xiaoqiang_Lin1;~Xinyi_Xu4;~Alok_Prakash2;~Daniela_Rus1;~Bryan_Kian_Hsiang_Low1", "aff": "National University of Singapore;National University of Singapore;National University of Singapore;;Massachusetts Institute of Technology;National University of Singapore", "aff_domain": "nus.edu.sg;u.nus.edu;nus.edu.sg;;mit.edu;nus.edu.sg", "position": "PhD student;PhD student;PhD student;;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nzhou2024detail,\ntitle={{DETAIL}: Task {DE}monsTration Attribution for Interpretable In-context Learning},\nauthor={Zijian Zhou and Xiaoqiang Lin and Xinyi Xu and Alok Prakash and Daniela Rus and Bryan Kian Hsiang Low},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4jRNkAH15k}\n}", "github": "", "reviewers": "oPDt;EuFx;smfw;ixwr", "pdf_size": 1563513, "rating": "5;6;7;8", "confidence": "4;4;4;4", "soundness": "3;3;3;4", "novelty": "3;3;3;3", "presentation": "2;3;3;3", "wc_summary": "33;66;98;82", "wc_strengths": "57;71;112;98", "wc_weaknesses": "193;184;126;35", "wc_questions": "2;33;4;32", "wc_limitations": "4;1;7;6", "wc_review": "289;355;347;253", "wc_reply_reviewers": "164;31;0;0", "wc_reply_authors": "38;34;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "2;2;1;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 69.75, 24.045529730076648 ], "wc_strengths_avg": [ 84.5, 21.66217902243447 ], "wc_weaknesses_avg": [ 134.5, 62.93846200853656 ], "wc_questions_avg": [ 17.75, 14.771171246722448 ], "wc_limitations_avg": [ 4.5, 2.29128784747792 ], "wc_review_avg": [ 311.0, 42.07136793592526 ], "wc_reply_reviewers_avg": [ 48.75, 67.73247005683463 ], "wc_reply_authors_avg": [ 18.0, 18.05547008526779 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11911090909849580272&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "nus.edu.sg;u.nus.edu;nus.edu.sg;;mit.edu;nus.edu.sg", "author_num": 6, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "National University of Singapore;Massachusetts Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.nus.edu.sg;https://web.mit.edu", "aff_unique_abbr": "NUS;MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "Singapore;United States" }, { "title": "ZOPP: A Framework of Zero-shot Offboard Panoptic Perception for Autonomous Driving", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96643", "id": "4jXaca2NYa", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4jXaca2NYa", "openreview": "https://openreview.net/forum?id=4jXaca2NYa", "poster": "", "project": "", "author_site": "Tao MA, Hongbin Zhou, Qiusheng Huang, Xuemeng Yang, Jianfei Guo, Bo Zhang, Min Dou, Yu Qiao, Botian Shi, Hongsheng Li", "tldr": "", "abstract": "Offboard perception aims to automatically generate high-quality 3D labels for autonomous driving (AD) scenes. Existing offboard methods focus on 3D object detection with closed-set taxonomy and fail to match human-level recognition capability on the rapidly evolving perception tasks. Due to heavy reliance on human labels and the prevalence of data imbalance and sparsity, a unified framework for offboard auto-labeling various elements in AD scenes that meets the distinct needs of perception tasks is not being fully explored. In this paper, we propose a novel multi-modal Zero-shot Offboard Panoptic Perception (ZOPP) framework for autonomous driving scenes. ZOPP integrates the powerful zero-shot recognition capabilities of vision foundation models and 3D representations derived from point clouds. To the best of our knowledge, ZOPP represents a pioneering effort in the domain of multi-modal panoptic perception and auto labeling for autonomous driving scenes. We conduct comprehensive empirical studies and evaluations on Waymo open dataset to validate the proposed ZOPP on various perception tasks. To further explore the usability and extensibility of our proposed ZOPP, we also conduct experiments in downstream applications. The results further demonstrate the great potential of our ZOPP for real-world scenarios. The source code will be released at \\url{https://github.com/PJLab-ADG/ZOPP}.", "keywords": "Autonomous Driving;Offboard Perception;Zero-shot", "primary_area": "other", "supplementary_material": "/attachment/4c096b8e8c4fa8c8b4959b2da2319751adfceb90.zip", "author": "Tao MA;Hongbin Zhou;Qiusheng Huang;Xuemeng Yang;Jianfei Guo;Bo Zhang;Min Dou;Yu Qiao;Botian Shi;Hongsheng Li", "authorids": "~Tao_MA3;~Hongbin_Zhou2;~Qiusheng_Huang3;~Xuemeng_Yang1;~Jianfei_Guo1;~Bo_Zhang17;~Min_Dou1;~Yu_Qiao1;~Botian_Shi1;~Hongsheng_Li3", "gender": ";M;M;F;M;M;M;;M;M", "homepage": ";;;;https://ventusff.github.io;https://bobrown.github.io/boZhang.github.io/;;;;http://www.ee.cuhk.edu.hk/~hsli", "dblp": ";64/6351;;133/6494;305/7388.html;36/2259-69;;;245/8742;27/7402-1", "google_scholar": ";fb_WgAEAAAAJ;-uTylBIAAAAJ;;MJb2_wYAAAAJ;https://scholar.google.com/citations?hl=en;;;K0PpvLkAAAAJ;BN2Ze-QAAAAJ", "orcid": ";;;0000-0002-7339-1650;0000-0002-5838-679X;0000-0001-8052-782X;;;0000-0003-3677-7252;", "linkedin": ";;;;;;%E6%B0%91-%E7%AA%A6-a34b17a3;;friskit/;", "or_profile": "~Tao_MA3;~Hongbin_Zhou2;~Qiusheng_Huang3;~Xuemeng_Yang1;~Jianfei_Guo1;~Bo_Zhang17;~Min_Dou1;~Yu_Qiao1;~Botian_Shi1;~Hongsheng_Li3", "aff": ";Shanghai AI Lab;Shanghai AI Laboratory;Shanghai AI Lab;Shanghai Artificial Intelligence Laboratory;Shanghai Artificial Intelligence Laboratory;Shanghai AI Laboratory;;Shanghai AI Lab;The Chinese University of Hong Kong", "aff_domain": ";pjlab.org.cn;pjlab.org.cn;pjlab.org.cn;pjlab.org.cn;pjlab.org.cn;pjlab.org.cn;;pjlab.org.cn;cuhk.edu.hk", "position": ";Researcher;Researcher;Researcher;Researcher;Researcher;Researcher;;Researcher;Associate Professor", "bibtex": "@inproceedings{\nma2024zopp,\ntitle={{ZOPP}: A Framework of Zero-shot Offboard Panoptic Perception for Autonomous Driving},\nauthor={Tao MA and Hongbin Zhou and Qiusheng Huang and Xuemeng Yang and Jianfei Guo and Bo Zhang and Min Dou and Yu Qiao and Botian Shi and Hongsheng Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4jXaca2NYa}\n}", "github": "", "reviewers": "Swqu;96Yj;aSzs;c7Qa", "pdf_size": 34582796, "rating": "4;5;5;7", "confidence": "3;3;3;3", "soundness": "3;3;2;4", "novelty": "3;3;2;4", "presentation": "3;2;3;3", "wc_summary": "66;50;42;52", "wc_strengths": "34;34;10;61", "wc_weaknesses": "51;148;13;30", "wc_questions": "3;195;2;18", "wc_limitations": "3;61;2;8", "wc_review": "157;488;69;169", "wc_reply_reviewers": "5;59;13;29", "wc_reply_authors": "48;38;112;49", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;3;2", "rating_avg": [ 5.25, 1.0897247358851685 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 52.5, 8.645808232895291 ], "wc_strengths_avg": [ 34.75, 18.046814123273947 ], "wc_weaknesses_avg": [ 60.5, 52.28049349422785 ], "wc_questions_avg": [ 54.5, 81.36491873037176 ], "wc_limitations_avg": [ 18.5, 24.642443060703215 ], "wc_review_avg": [ 220.75, 159.05403955888704 ], "wc_reply_reviewers_avg": [ 26.5, 20.657928260113597 ], "wc_reply_authors_avg": [ 61.75, 29.328953271468794 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8421971696461257685&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 4, "email": ";pjlab.org.cn;pjlab.org.cn;pjlab.org.cn;pjlab.org.cn;pjlab.org.cn;pjlab.org.cn;;pjlab.org.cn;cuhk.edu.hk", "author_num": 10, "aff_unique_index": "0;1;0;2;2;1;0;3", "aff_unique_norm": "Shanghai AI Lab;Shanghai AI Laboratory;Shanghai Artificial Intelligence Laboratory;Chinese University of Hong Kong", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.shanghaiailab.com;https://www.shanghai-ai-lab.com;http://www.shailab.org/;https://www.cuhk.edu.hk", "aff_unique_abbr": "SAIL;SAIL;Shanghai AI Lab;CUHK", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "MetaUAS: Universal Anomaly Segmentation with One-Prompt Meta-Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96642", "id": "4jegYnUMHb", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4jegYnUMHb", "openreview": "https://openreview.net/forum?id=4jegYnUMHb", "poster": "/media/PosterPDFs/NeurIPS%202024/96642.png?t=1733196595.5426939", "project": "", "tldr": "", "abstract": "Zero- and few-shot visual anomaly segmentation relies on powerful vision-language models that detect unseen anomalies using manually designed textual prompts. However, visual representations are inherently independent of language. In this paper, we explore the potential of a pure visual foundation model as an alternative to widely used vision-language models for universal visual anomaly segmentation.\nWe present a novel paradigm that unifies anomaly segmentation into change segmentation. This paradigm enables us to leverage large-scale synthetic image pairs, featuring object-level and local region changes, derived from existing image datasets, which are independent of target anomaly datasets. We propose a one-prompt Meta-learning framework for Universal Anomaly Segmentation (MetaUAS) that is trained on this synthetic dataset and then generalizes well to segment any novel or unseen visual anomalies in the real world. To handle geometrical variations between prompt and query images, we propose a soft feature alignment module that bridges paired-image change perception and single-image semantic segmentation. This is the first work to achieve universal anomaly segmentation using a pure vision model without relying on special anomaly detection datasets and pre-trained visual-language models. Our method effectively and efficiently segments any anomalies with only one normal image prompt and enjoys training-free without guidance from language. Our MetaUAS significantly outperforms previous zero-shot, few-shot, and even full-shot anomaly segmentation methods. Code and Models: https://github.com/gaobb/MetaUAS.", "keywords": "Anomaly Classification;Anomaly Segmentation;One-Prompt Meta-Learning;Universal Anomaly Segmentation", "primary_area": "machine_vision", "supplementary_material": "/attachment/5f6d197e4285882168cd08a7202b7052e077c91e.zip", "author": "Bin-Bin Gao", "authorids": "~Bin-Bin_Gao1", "gender": "M", "homepage": "https://csgaobb.github.io/", "dblp": "152/6672", "google_scholar": "yYviZ-oAAAAJ", "orcid": "0000-0003-2572-8156", "linkedin": "", "or_profile": "~Bin-Bin_Gao1", "aff": "Tencent", "aff_domain": "tencent.com", "position": "Senior Researcher", "bibtex": "@inproceedings{\ngao2024metauas,\ntitle={Meta{UAS}: Universal Anomaly Segmentation with One-Prompt Meta-Learning},\nauthor={Bin-Bin Gao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4jegYnUMHb}\n}", "github": "", "reviewers": "bbUz;sm3a;gb5F;DDkp", "pdf_size": 25811962, "rating": "6;6;6;7", "confidence": "3;3;4;5", "soundness": "3;3;3;3", "novelty": "3;2;3;4", "presentation": "3;2;3;4", "wc_summary": "62;127;113;70", "wc_strengths": "23;120;86;106", "wc_weaknesses": "154;221;107;79", "wc_questions": "63;214;90;3", "wc_limitations": "7;13;7;1", "wc_review": "309;695;403;259", "wc_reply_reviewers": "21;12;48;48", "wc_reply_authors": "0;0;166;225", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;2;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 93.0, 27.595289453093258 ], "wc_strengths_avg": [ 83.75, 37.09700122651425 ], "wc_weaknesses_avg": [ 140.25, 53.774413060488165 ], "wc_questions_avg": [ 92.5, 76.89115683874186 ], "wc_limitations_avg": [ 7.0, 4.242640687119285 ], "wc_review_avg": [ 416.5, 168.89863824199412 ], "wc_reply_reviewers_avg": [ 32.25, 16.068213964221414 ], "wc_reply_authors_avg": [ 97.75, 99.95092545844686 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:-kxogY5B3LsJ:scholar.google.com/&scioq=MetaUAS:+Universal+Anomaly+Segmentation+with+One-Prompt+Meta-Learning&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "tencent.com", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "Tencent", "aff_unique_dep": "Tencent Holdings Limited", "aff_unique_url": "https://www.tencent.com", "aff_unique_abbr": "Tencent", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "title": "Improving Robustness of 3D Point Cloud Recognition from a Fourier Perspective", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96641", "id": "4jn7KWPHSD", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4jn7KWPHSD", "openreview": "https://openreview.net/forum?id=4jn7KWPHSD", "poster": "", "project": "", "author_site": "Yibo Miao, Yinpeng Dong, Jinlai Zhang, Lijia Yu, Xiao Yang, Xiao-Shan Gao", "tldr": "", "abstract": "Although 3D point cloud recognition has achieved substantial progress on standard benchmarks, the typical models are vulnerable to point cloud corruptions, leading to security threats in real-world applications. To improve the corruption robustness, various data augmentation methods have been studied, but they are mainly limited to the spatial domain. As the point cloud has low information density and significant spatial redundancy, it is challenging to analyze the effects of corruptions. In this paper, we focus on the frequency domain to observe the underlying structure of point clouds and their corruptions. Through graph Fourier transform (GFT), we observe a correlation between the corruption robustness of point cloud recognition models and their sensitivity to different frequency bands, which is measured by the GFT spectrum of the model\u2019s Jacobian matrix. To reduce the sensitivity and improve the corruption robustness, we propose Frequency Adversarial Training (FAT) that adopts frequency-domain adversarial examples as data augmentation to train robust point cloud recognition models against corruptions. Theoretically, we provide a guarantee of FAT on its out-of-distribution generalization performance. Empirically, we conduct extensive experiments with various network architectures to validate the effectiveness of FAT, which achieves the new state-of-the-art results.", "keywords": "3D point cloud recognition;corruption robustness;Frequency Adversarial Training", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Yibo Miao;Yinpeng Dong;Jinlai Zhang;Lijia Yu;Xiao Yang;Xiao-Shan Gao", "authorids": "~Yibo_Miao1;~Yinpeng_Dong2;~Jinlai_Zhang1;~Lijia_Yu2;~Xiao_Yang4;~Xiao-Shan_Gao2", "gender": "M;M;;M;M;M", "homepage": "http://www.amss.ac.cn/;https://dongyp13.github.io;https://github.com/cuge1995;;https://ml.cs.tsinghua.edu.cn/~xiaoyang/;http://www.mmrc.iss.ac.cn/~xgao/", "dblp": "332/0699;183/0980;236/6816;175/8873.html;57/33851;13/3109", "google_scholar": ";6_4ad84AAAAJ;C2ftS4sAAAAJ;;bwkwp0MAAAAJ;_se7GmUAAAAJ", "orcid": ";;;;0000-0001-9502-9962;0000-0003-2021-9395", "linkedin": ";;;;;", "or_profile": "~Yibo_Miao1;~Yinpeng_Dong2;~Jinlai_Zhang1;~Lijia_Yu2;~Xiao_Yang4;~Xiao-Shan_Gao2", "aff": "Intel;Tsinghua University;Changsha University of Science and Technology;Institute of Software, Chinese Academy of Sciences;Tsinghua University;Academy of Mathematics and Systems Science, Chinese Academy of Sciences, Chinese Academy of Sciences", "aff_domain": "intel.com;tsinghua.edu.cn;csust.edu.cn;ios.ac.cn;mail.tsinghua.edu.cn;amss.ac.cn", "position": "Intern;Postdoc;Lecturer;Postdoc;Postdoc;Full Professor", "bibtex": "@inproceedings{\nmiao2024improving,\ntitle={Improving Robustness of 3D Point Cloud Recognition from a Fourier Perspective},\nauthor={Yibo Miao and Yinpeng Dong and Jinlai Zhang and Lijia Yu and Xiao Yang and Xiao-Shan Gao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4jn7KWPHSD}\n}", "github": "", "reviewers": "wa2k;WzcS;ELyT;kjym", "pdf_size": 2184743, "rating": "5;5;6;6", "confidence": "5;4;3;4", "soundness": "3;3;3;4", "novelty": "3;3;3;3", "presentation": "3;3;3;4", "wc_summary": "37;48;78;109", "wc_strengths": "129;31;123;123", "wc_weaknesses": "217;403;91;68", "wc_questions": "14;181;34;14", "wc_limitations": "4;1;30;1", "wc_review": "401;664;356;315", "wc_reply_reviewers": "7;9;9;12", "wc_reply_authors": "41;299;46;37", "reply_reviewers": "1;1;1;1", "reply_authors": "2;3;2;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 68.0, 28.026772914483036 ], "wc_strengths_avg": [ 101.5, 40.77683165720456 ], "wc_weaknesses_avg": [ 194.75, 132.94054122050204 ], "wc_questions_avg": [ 60.75, 69.90484604088618 ], "wc_limitations_avg": [ 9.0, 12.186057606953941 ], "wc_review_avg": [ 434.0, 136.22958562661782 ], "wc_reply_reviewers_avg": [ 9.25, 1.7853571071357126 ], "wc_reply_authors_avg": [ 105.75, 111.61849085165056 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.7071067811865475, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:3M_pTnJ3qhUJ:scholar.google.com/&scioq=Improving+Robustness+of+3D+Point+Cloud+Recognition+from+a+Fourier+Perspective&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": "intel.com;tsinghua.edu.cn;csust.edu.cn;ios.ac.cn;mail.tsinghua.edu.cn;amss.ac.cn", "author_num": 6, "aff_unique_index": "0;1;2;3;1;3", "aff_unique_norm": "Intel;Tsinghua University;Changsha University of Science and Technology;Chinese Academy of Sciences", "aff_unique_dep": "Intel Corporation;;;Institute of Software", "aff_unique_url": "https://www.intel.com;https://www.tsinghua.edu.cn;http://www.csust.edu.cn;http://www.ios.ac.cn", "aff_unique_abbr": "Intel;THU;CSUST;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1;1", "aff_country_unique": "United States;China" }, { "id": "4kCr61XYQJ", "title": "Poisson-Gamma Dynamical Systems with Non-Stationary Transition Dynamics", "track": "main", "status": "Reject", "tldr": "", "abstract": "Bayesian methodologies for handling count-valued time series have gained prominence due to their ability to infer interpretable latent structures and to estimate uncertainties, and thus are especially suitable for dealing with noisy and incomplete count data. Among these Bayesian models, Poisson-Gamma Dynamical Systems (PGDSs) are proven to be effective in capturing the evolving dynamics underlying observed count sequences. However, the state-of-the-art PGDS still falls short in capturing the time-varying transition dynamics that are commonly observed in real-world count time series. To mitigate this limitation, a non-stationary PGDS is proposed to allow the underlying transition matrices to evolve over time, and the evolving transition matrices are modeled by the specifically-designed Dirichlet Markov chains. Leveraging Dirichlet-Multinomial-Beta data augmentation techniques, a fully-conjugate and efficient Gibbs sampler is developed to perform posterior simulation. Experiments show that, in comparison with related models, the proposed non-stationary PGDS achieves improved predictive performance due to its capacity to learn non-stationary dependency structure captured by the time-evolving transition matrices.", "keywords": "Bayesian non-parametrics;Negative-binomial processes;Dirichlet Markov chains", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Jiahao Wang;Sikun Yang;Heinz Koeppl;Xiuzhen Cheng;Pengfei Hu;Guoming Zhang", "authorids": "~Jiahao_Wang18;~Sikun_Yang1;~Heinz_Koeppl1;~Xiuzhen_Cheng3;~Pengfei_Hu5;~Guoming_Zhang2", "gender": "M;M;M;F;M;M", "homepage": ";;;;https://perfecthu.github.io/;https://sites.google.com/view/gmzh/%E9%A6%96%E9%A1%B5?authuser=0", "dblp": ";https://dblp.uni-trier.de/pers/y/Yang:Sikun.html;41/6084;https://dblp.uni-trier.de/pid/c/XiuzhenCheng;71/9969-1.html;163/3092.html", "google_scholar": ";;https://scholar.google.de/citations?user=WaPW80kAAAAJ;https://scholar.google.com/citations?hl=en;CMddi7oAAAAJ;IyKwuT8AAAAJ", "orcid": ";;;;0000-0002-7935-886X;0000-0002-8003-0669", "linkedin": "%E5%98%89%E8%B1%AA-%E7%8E%8B-559b61308/;;;;;https://www.linkedin.com/help/linkedin/answer/a522735/find-your-linkedin-public-profile-url?lang=en", "or_profile": "~Jiahao_Wang18;~Sikun_Yang1;~Heinz_Koeppl1;~Xiuzhen_Cheng3;~Pengfei_Hu5;~Guoming_Zhang2", "aff": "Shandong University;Great Bay University;TU Darmstadt;Shandong University;Shandong University;Shandong University", "aff_domain": "sdu.edu.cn;gbu.edu.cn;tu-darmstadt.de;sdu.edu.cn;sdu.edu.cn;sdu.edu.cn", "position": "MS student;Assistant Professor;Full Professor;Full Professor;Full Professor;Assistant Professor", "bibtex": "@misc{\nanonymous2024poissongamma,\ntitle={Poisson-Gamma Dynamical Systems with Non-Stationary Transition Dynamics},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=4kCr61XYQJ}\n}", "github": "", "project": "", "reviewers": "Yy4i;jsav;Erfo;eDv4", "site": "https://openreview.net/forum?id=4kCr61XYQJ", "pdf_size": 1197608, "rating": "4;5;6;7", "confidence": "3;3;4;4", "soundness": "2;4;3;4", "novelty": "3;2;3;3", "presentation": "3;3;2;3", "wc_summary": "47;76;81;67", "wc_strengths": "78;45;66;86", "wc_weaknesses": "186;43;38;7", "wc_questions": "124;46;187;1", "wc_limitations": "34;21;9;10", "wc_review": "469;231;381;171", "wc_reply_reviewers": "42;57;18;0", "wc_reply_authors": "183;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;1;1;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 67.75, 12.987975207860538 ], "wc_strengths_avg": [ 68.75, 15.449514555480375 ], "wc_weaknesses_avg": [ 68.5, 69.22607890094599 ], "wc_questions_avg": [ 89.5, 71.45103218288732 ], "wc_limitations_avg": [ 18.5, 10.111874208078342 ], "wc_review_avg": [ 313.0, 118.16090724093142 ], "wc_reply_reviewers_avg": [ 29.25, 21.878928218722233 ], "wc_reply_authors_avg": [ 45.75, 79.24132444627614 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.8944271909999159, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:FIL_xTsA6E4J:scholar.google.com/&scioq=Poisson-Gamma+Dynamical+Systems+with+Non-Stationary+Transition+Dynamics&hl=en&as_sdt=0,33", "gs_version_total": 2, "aff_unique_index": "0;1;2;0;0;0", "aff_unique_norm": "Shandong University;Great Bay University;Technische Universit\u00e4t Darmstadt", "aff_unique_dep": ";;", "aff_unique_url": "http://www.sdu.edu.cn;https://www.greatbay.edu;https://www.tu-darmstadt.de", "aff_unique_abbr": "SDU;;TU Darmstadt", "aff_campus_unique_index": "1", "aff_campus_unique": ";Darmstadt", "aff_country_unique_index": "0;1;2;0;0;0", "aff_country_unique": "China;United States;Germany" }, { "title": "Goal Conditioned Reinforcement Learning for Photo Finishing Tuning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96640", "id": "4kVHI2uXRE", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4kVHI2uXRE", "openreview": "https://openreview.net/forum?id=4kVHI2uXRE", "poster": "/media/PosterPDFs/NeurIPS%202024/96640.png?t=1733402077.3537657", "project": "", "author_site": "Jiarui Wu, Yujin Wang, Lingen Li, Zhang Fan, Tianfan Xue", "tldr": "", "abstract": "Photo finishing tuning aims to automate the manual tuning process of the photo finishing pipeline, like Adobe Lightroom or Darktable. Previous works either use zeroth-order optimization, which is slow when the set of parameters increases, or rely on a differentiable proxy of the target finishing pipeline, which is hard to train.\nTo overcome these challenges, we propose a novel goal-conditioned reinforcement learning framework for efficiently tuning parameters using a goal image as a condition. Unlike previous approaches, our tuning framework does not rely on any proxy and treats the photo finishing pipeline as a black box. Utilizing a trained reinforcement learning policy, it can efficiently find the desired set of parameters within just 10 queries, while optimization based approaches normally take 200 queries. Furthermore, our architecture utilizes a goal image to guide the iterative tuning of pipeline parameters, allowing for flexible conditioning on pixel-aligned target images, style images, or any other visually representable goals. We conduct detailed experiments on photo finishing tuning and photo stylization tuning tasks, demonstrating the advantages of our method.", "keywords": "Photo Finishing;Image Processing;Image Signal Processor Tuning;Reinforcement Learning", "primary_area": "machine_vision", "supplementary_material": "", "author": "Jiarui Wu;Yujin Wang;Lingen Li;Zhang Fan;Tianfan Xue", "authorids": "~Jiarui_Wu1;~Yujin_Wang2;~Lingen_Li1;~Zhang_Fan1;~Tianfan_Xue2", "gender": ";;;M;M", "homepage": "https://scholar.google.com/citations?hl=en&user=0Nf9tWoAAAAJ;;;https://ivp.ee.cuhk.edu.hk/projects/demo/piqm/index.html;https://tianfan.info", "dblp": "73/11246.html;;;21/3626-93;54/8652", "google_scholar": "https://scholar.google.com/citations?hl=en;;;;RfSQKrIAAAAJ", "orcid": ";;;;0000-0001-5031-6618", "linkedin": ";;;;tianfan-xue-54016716", "or_profile": "~Jiarui_Wu1;~Yujin_Wang2;~Lingen_Li1;~Zhang_Fan1;~Tianfan_Xue2", "aff": "Beihang University;;;Tetras;The Chinese University of Hong Kong", "aff_domain": "buaa.edu.cn;;;tetras.ai;cuhk.edu.hk", "position": "Undergrad student;;;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nwu2024goal,\ntitle={Goal Conditioned Reinforcement Learning for Photo Finishing Tuning},\nauthor={Jiarui Wu and Yujin Wang and Lingen Li and Zhang Fan and Tianfan Xue},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4kVHI2uXRE}\n}", "github": "", "reviewers": "syDC;kK87;ASBs;WirW", "pdf_size": 28048860, "rating": "4;5;5;6", "confidence": "4;4;2;3", "soundness": "3;3;2;3", "novelty": "3;3;3;2", "presentation": "3;3;3;3", "wc_summary": "104;32;57;51", "wc_strengths": "38;41;33;44", "wc_weaknesses": "34;140;38;45", "wc_questions": "47;34;6;16", "wc_limitations": "7;2;6;34", "wc_review": "230;249;140;190", "wc_reply_reviewers": "252;15;0;24", "wc_reply_authors": "809;119;78;87", "reply_reviewers": "1;1;0;1", "reply_authors": "3;2;2;2", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 61.0, 26.485845276298054 ], "wc_strengths_avg": [ 39.0, 4.06201920231798 ], "wc_weaknesses_avg": [ 64.25, 43.91113184603649 ], "wc_questions_avg": [ 25.75, 15.848895860595462 ], "wc_limitations_avg": [ 12.25, 12.695963925594622 ], "wc_review_avg": [ 202.25, 41.77544135015213 ], "wc_reply_reviewers_avg": [ 72.75, 103.84453524379605 ], "wc_reply_authors_avg": [ 273.25, 309.6904704701131 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.4264014327112209, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:VARv_Eo-XLkJ:scholar.google.com/&scioq=Goal+Conditioned+Reinforcement+Learning+for+Photo+Finishing+Tuning&hl=en&as_sdt=0,33", "gs_version_total": 0, "email": "buaa.edu.cn;;;tetras.ai;cuhk.edu.hk", "author_num": 5, "aff_unique_index": "0;1;2", "aff_unique_norm": "Beihang University;Tetras;Chinese University of Hong Kong", "aff_unique_dep": ";;", "aff_unique_url": "http://www.buaa.edu.cn/;;https://www.cuhk.edu.hk", "aff_unique_abbr": "BUAA;;CUHK", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0", "aff_country_unique": "China;" }, { "title": "Is Cross-validation the Gold Standard to Estimate Out-of-sample Model Performance?", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96639", "id": "4lGPSbGe11", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4lGPSbGe11", "openreview": "https://openreview.net/forum?id=4lGPSbGe11", "poster": "/media/PosterPDFs/NeurIPS%202024/96639.png?t=1733423680.0744963", "project": "", "author_site": "Garud Iyengar, Henry Lam, Tianyu Wang", "tldr": "", "abstract": "Cross-Validation (CV) is the default choice for estimate the out-of-sample performance of machine learning models. Despite its wide usage, their statistical benefits have remained half-understood, especially in challenging nonparametric regimes. In this paper we fill in this gap and show that, in terms of estimating the out-of-sample performances, for a wide spectrum of models, CV does not statistically outperform the simple ``plug-in'' approach where one reuses training data for testing evaluation. Specifically, in terms of both the asymptotic bias and coverage accuracy of the associated interval for out-of-sample evaluation, $K$-fold CV provably cannot outperform plug-in regardless of the rate at which the parametric or nonparametric models converge. Leave-one-out CV can have a smaller bias as compared to plug-in; however, this bias improvement is negligible compared to the variability of the evaluation, and in some important cases leave-one-out again does not outperform plug-in once this variability is taken into account. We obtain our theoretical comparisons via a novel higher-order Taylor analysis that dissects the limit theorems of testing evaluations, which applies to model classes that are not amenable to previously known sufficient conditions. Our numerical results demonstrate that plug-in performs indeed no worse than CV in estimating model performance across a wide range of examples.", "keywords": "cross-validation;plug-in;uncertainty quantification;nonparametric models", "primary_area": "learning_theory", "supplementary_material": "", "author": "Garud Iyengar;Henry Lam;Tianyu Wang", "authorids": "~Garud_Iyengar1;~Henry_Lam1;~Tianyu_Wang6", "gender": "M;;M", "homepage": "http://www.columbia.edu/~gi10/;http://www.columbia.edu/~khl2114/;https://wangtianyu61.github.io", "dblp": "i/GarudIyengar.html;35/9508;", "google_scholar": ";Bnj50x0AAAAJ;mKT6mKEAAAAJ", "orcid": ";;0009-0000-2095-431X", "linkedin": ";;", "or_profile": "~Garud_Iyengar1;~Henry_Lam1;~Tianyu_Wang6", "aff": "Columbia University;Columbia University;Columbia University", "aff_domain": "columbia.edu;columbia.edu;columbia.edu", "position": "Full Professor;Associate Professor;PhD student", "bibtex": "@inproceedings{\niyengar2024is,\ntitle={Is Cross-validation the Gold Standard to Estimate Out-of-sample Model Performance?},\nauthor={Garud Iyengar and Henry Lam and Tianyu Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4lGPSbGe11}\n}", "github": "", "reviewers": "8MSs;PDH8;tGmJ;JBLc", "pdf_size": 731344, "rating": "5;7;8;8", "confidence": "2;3;3;3", "soundness": "3;3;4;4", "novelty": "3;3;4;3", "presentation": "3;3;3;4", "wc_summary": "47;52;177;70", "wc_strengths": "43;46;244;78", "wc_weaknesses": "334;102;946;59", "wc_questions": "255;131;22;45", "wc_limitations": "43;41;33;17", "wc_review": "722;372;1422;269", "wc_reply_reviewers": "59;13;186;23", "wc_reply_authors": "129;17;44;20", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 7.0, 1.224744871391589 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 86.5, 52.945726928620026 ], "wc_strengths_avg": [ 102.75, 82.69635723537041 ], "wc_weaknesses_avg": [ 360.25, 353.99037769408363 ], "wc_questions_avg": [ 113.25, 91.36841631548617 ], "wc_limitations_avg": [ 33.5, 10.23474474522936 ], "wc_review_avg": [ 696.25, 451.4024673171382 ], "wc_reply_reviewers_avg": [ 70.25, 68.98324071830781 ], "wc_reply_authors_avg": [ 52.5, 45.38997686714546 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.9428090415820632, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:CW9ckoFY4EwJ:scholar.google.com/&scioq=Is+Cross-validation+the+Gold+Standard+to+Estimate+Out-of-sample+Model+Performance%3F&hl=en&as_sdt=0,47", "gs_version_total": 0, "email": "columbia.edu;columbia.edu;columbia.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Columbia University", "aff_unique_dep": "", "aff_unique_url": "https://www.columbia.edu", "aff_unique_abbr": "Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Motion Forecasting in Continuous Driving", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96638", "id": "4mxzxYhMuN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4mxzxYhMuN", "openreview": "https://openreview.net/forum?id=4mxzxYhMuN", "poster": "/media/PosterPDFs/NeurIPS%202024/96638.png?t=1730555817.9680455", "project": "", "author_site": "Nan Song, Bozhou Zhang, Xiatian Zhu, Li Zhang", "tldr": "", "abstract": "Motion forecasting for agents in autonomous driving is highly challenging due to the numerous possibilities for each agent's next action and their complex interactions in space and time. \nIn real applications, motion forecasting takes place repeatedly and continuously as the self-driving car moves. However, existing forecasting methods typically process each driving scene within a certain range independently, totally ignoring the situational and contextual relationships between successive driving scenes. This significantly simplifies the forecasting task, making the solutions suboptimal and inefficient to use in practice. To address this fundamental limitation, we propose a novel motion forecasting framework for continuous driving, named RealMotion.\nIt comprises two integral streams both at the scene level:\n(1) The scene context stream progressively accumulates historical scene information until the present moment, capturing temporal interactive relationships among scene elements.\n(2) The agent trajectory stream optimizes current forecasting by sequentially relaying past predictions.\nBesides, a data reorganization strategy is introduced to narrow the gap between existing benchmarks and real-world applications, consistent with our network. These approaches enable exploiting more broadly the situational and progressive insights of dynamic motion across space and time. \nExtensive experiments on Argoverse series with different settings demonstrate that our RealMotion achieves state-of-the-art performance, along with the advantage of efficient real-world inference.", "keywords": "Motion forecasting; Autonomous Driving;", "primary_area": "robotics", "supplementary_material": "", "author": "Nan Song;Bozhou Zhang;Xiatian Zhu;Li Zhang", "authorids": "~Nan_Song4;~Bozhou_Zhang1;~Xiatian_Zhu3;~Li_Zhang5", "gender": "M;M;;M", "homepage": ";https://zbozhou.github.io/;https://x-up-lab.github.io;http://www.robots.ox.ac.uk/~lz/", "dblp": ";294/1268;128/7935;89/5992-40", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;ZbA-z1cAAAAJ;-wOTCE8AAAAJ", "orcid": ";;0000-0002-9284-2955;", "linkedin": ";;;", "or_profile": "~Nan_Song4;~Bozhou_Zhang1;~Xiatian_Zhu3;~Li_Zhang5", "aff": "Fudan University;Fudan University;University of Surrey;Fudan University", "aff_domain": "fudan.edu.cn;fudan.edu.cn;surrey.ac.uk;fudan.edu.cn", "position": "PhD student;PhD student;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nsong2024motion,\ntitle={Motion Forecasting in Continuous Driving},\nauthor={Nan Song and Bozhou Zhang and Xiatian Zhu and Li Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4mxzxYhMuN}\n}", "github": "", "reviewers": "GAo1;fqqC;fgWb;u4R4", "pdf_size": 6808770, "rating": "6;6;6;7", "confidence": "4;4;4;4", "soundness": "2;3;3;4", "novelty": "2;3;4;4", "presentation": "2;3;2;2", "wc_summary": "119;74;165;162", "wc_strengths": "71;100;86;102", "wc_weaknesses": "294;282;519;145", "wc_questions": "131;3;102;2", "wc_limitations": "38;11;49;1", "wc_review": "653;470;921;412", "wc_reply_reviewers": "170;70;381;0", "wc_reply_authors": "134;8;296;0", "reply_reviewers": "2;1;3;0", "reply_authors": "4;2;5;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 130.0, 37.1012129181783 ], "wc_strengths_avg": [ 89.75, 12.457427503300993 ], "wc_weaknesses_avg": [ 310.0, 134.11375768354267 ], "wc_questions_avg": [ 59.5, 57.91588728492381 ], "wc_limitations_avg": [ 24.75, 19.472737352514155 ], "wc_review_avg": [ 614.0, 198.31162346166198 ], "wc_reply_reviewers_avg": [ 155.25, 143.65823157758834 ], "wc_reply_authors_avg": [ 109.5, 120.07809958522827 ], "reply_reviewers_avg": [ 1.5, 1.118033988749895 ], "reply_authors_avg": [ 3.0, 1.5811388300841898 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1925206157491021430&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "fudan.edu.cn;fudan.edu.cn;surrey.ac.uk;fudan.edu.cn", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Fudan University;University of Surrey", "aff_unique_dep": ";", "aff_unique_url": "https://www.fudan.edu.cn;https://www.surrey.ac.uk", "aff_unique_abbr": "Fudan;Surrey", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "China;United Kingdom" }, { "title": "Magnet: We Never Know How Text-to-Image Diffusion Models Work, Until We Learn How Vision-Language Models Function", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96637", "id": "4mzGiMooXM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4mzGiMooXM", "openreview": "https://openreview.net/forum?id=4mzGiMooXM", "poster": "/media/PosterPDFs/NeurIPS%202024/96637.png?t=1729044797.8937495", "project": "", "author_site": "Chenyi Zhuang, Ying Hu, Pan Gao", "tldr": "", "abstract": "Text-to-image diffusion models particularly Stable Diffusion, have revolutionized the field of computer vision. However, the synthesis quality often deteriorates when asked to generate images that faithfully represent complex prompts involving multiple attributes and objects. While previous studies suggest that blended text embeddings lead to improper attribute binding, few have explored this in depth. In this work, we critically examine the limitations of the CLIP text encoder in understanding attributes and investigate how this affects diffusion models. We discern a phenomenon of attribute bias in the text space and highlight a contextual issue in padding embeddings that entangle different concepts. We propose Magnet, a novel training-free approach to tackle the attribute binding problem. We introduce positive and negative binding vectors to enhance disentanglement, further with a neighbor strategy to increase accuracy. Extensive experiments show that Magnet significantly improves synthesis quality and binding accuracy with negligible computational cost, enabling the generation of unconventional and unnatural concepts.", "keywords": "Image Generation;Diffusion Models;Vision-Language Models;Attribute Binding", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/113639f8feea7196340f0f7b75506259b70bc584.zip", "author": "Chenyi Zhuang;Ying Hu;Pan Gao", "authorids": "~Chenyi_Zhuang2;~Ying_Hu1;~Pan_Gao2", "gender": "F;;M", "homepage": "https://chenyi-zhuang.github.io/;https://github.com/Arinahy;https://i2-multimedia-lab.github.io/index.html", "dblp": ";;87/5856.html", "google_scholar": ";;https://scholar.google.co.kr/citations?user=vxHerj4AAAAJ", "orcid": "0009-0004-0700-5645;;0000-0002-4492-5430", "linkedin": ";;", "or_profile": "~Chenyi_Zhuang2;~Ying_Hu1;~Pan_Gao2", "aff": "Nanjing University of Aeronautics and Astronautics;Nanjing University of Aeronautics and Astronautics;Nanjing University of Aeronautics and Astronautics, Tsinghua University", "aff_domain": "nuaa.edu.cn;nuaa.edu.cn;nuaa.edu.cn", "position": "MS student;MS student;Associate Professor", "bibtex": "@inproceedings{\nzhuang2024magnet,\ntitle={Magnet: We Never Know How Text-to-Image Diffusion Models Work, Until We Learn How Vision-Language Models Function},\nauthor={Chenyi Zhuang and Ying Hu and Pan Gao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4mzGiMooXM}\n}", "github": "", "reviewers": "5FTv;eb4q;iZtx;2c6i", "pdf_size": 28738314, "rating": "5;6;6;6", "confidence": "2;5;3;4", "soundness": "3;4;4;2", "novelty": "3;3;2;4", "presentation": "3;3;3;2", "wc_summary": "131;105;122;98", "wc_strengths": "71;108;87;118", "wc_weaknesses": "109;152;132;187", "wc_questions": "84;152;250;134", "wc_limitations": "2;35;17;2", "wc_review": "397;552;608;539", "wc_reply_reviewers": "65;207;240;41", "wc_reply_authors": "0;141;0;0", "reply_reviewers": "1;2;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 114.0, 13.133925536563698 ], "wc_strengths_avg": [ 96.0, 18.261982367749674 ], "wc_weaknesses_avg": [ 145.0, 28.626910416599273 ], "wc_questions_avg": [ 155.0, 60.241181927316134 ], "wc_limitations_avg": [ 14.0, 13.583077707206124 ], "wc_review_avg": [ 524.0, 77.77210296758086 ], "wc_reply_reviewers_avg": [ 138.25, 86.46205815269494 ], "wc_reply_authors_avg": [ 35.25, 61.054790966802926 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.7745966692414834, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2636516321240556293&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "nuaa.edu.cn;nuaa.edu.cn;nuaa.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Nanjing University of Aeronautics and Astronautics", "aff_unique_dep": "", "aff_unique_url": "http://www.nuaa.edu.cn", "aff_unique_abbr": "NUAA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Rethinking Weight Decay for Robust Fine-Tuning of Foundation Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96636", "id": "4neqdBz8eG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4neqdBz8eG", "openreview": "https://openreview.net/forum?id=4neqdBz8eG", "poster": "", "project": "", "author_site": "Junjiao Tian, Chengyue Huang, Zsolt Kira", "tldr": "", "abstract": "Modern optimizers such as AdamW, equipped with momentum and adaptive learning rate, are designed to escape local minima and explore the vast parameter space. This exploration is beneficial for finding good loss basins when training from scratch. It is not necessarily ideal when resuming from a powerful foundation model because it can lead to large deviations from the pre-trained initialization and, consequently, worse robustness and generalization. At the same time, strong regularization on all parameters can lead to under-fitting. We hypothesize that selectively regularizing the parameter space is the key to fitting and retraining the pre-trained knowledge. This paper proposes a new weight decay technique, Selective Projection Decay (SPD), that selectively imposes a strong penalty on certain layers while allowing others to change freely. Intuitively, SPD expands and contracts the parameter search space for layers with consistent and inconsistent loss reduction, respectively. Experimentally, when equipped with SPD, Adam consistently provides better in-distribution generalization and out-of-distribution robustness performance on multiple popular vision and language benchmarks.", "keywords": "Fine-tuning;foundation models;language models;robustness", "primary_area": "optimization_for_deep_networks", "supplementary_material": "/attachment/4d389612007da1983f6b46c659016526f08cced9.zip", "author": "Junjiao Tian;Chengyue Huang;Zsolt Kira", "authorids": "~Junjiao_Tian1;~Chengyue_Huang2;~Zsolt_Kira1", "gender": "M;F;M", "homepage": ";https://chengyuehuang511.github.io/;https://faculty.cc.gatech.edu/~zk15", "dblp": "246/3115.htm;;36/4127", "google_scholar": "iHZD850AAAAJ;nv4TcI8AAAAJ;2a5XgNAAAAAJ", "orcid": ";;0000-0002-2626-2004", "linkedin": ";;", "or_profile": "~Junjiao_Tian1;~Chengyue_Huang2;~Zsolt_Kira1", "aff": "Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology", "aff_domain": "gatech.edu;gatech.edu;gatech.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\ntian2024rethinking,\ntitle={Rethinking Weight Decay for Robust Fine-Tuning of Foundation Models},\nauthor={Junjiao Tian and Chengyue Huang and Zsolt Kira},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4neqdBz8eG}\n}", "github": "", "reviewers": "33UR;CVio;Jrpf;dDi9;Ex5N", "pdf_size": 479204, "rating": "5;5;6;6;7", "confidence": "4;4;4;4;4", "soundness": "3;3;4;2;3", "novelty": "3;2;4;2;3", "presentation": "2;3;3;2;4", "wc_summary": "100;59;65;75;46", "wc_strengths": "75;44;62;77;74", "wc_weaknesses": "358;234;12;36;123", "wc_questions": "291;147;74;225;5", "wc_limitations": "23;1;19;30;5", "wc_review": "847;485;232;443;253", "wc_reply_reviewers": "36;67;42;0;0", "wc_reply_authors": "752;0;177;484;312", "reply_reviewers": "1;1;1;0;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 69.0, 18.121810064118872 ], "wc_strengths_avg": [ 66.4, 12.370933675353692 ], "wc_weaknesses_avg": [ 152.6, 128.8838236552594 ], "wc_questions_avg": [ 148.4, 102.2772702021324 ], "wc_limitations_avg": [ 15.6, 10.947145746723207 ], "wc_review_avg": [ 452.0, 221.44796228459631 ], "wc_reply_reviewers_avg": [ 29.0, 25.86116780039138 ], "wc_reply_authors_avg": [ 345.0, 258.1890780029241 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:h0YmjIZsy-YJ:scholar.google.com/&scioq=Rethinking+Weight+Decay+for+Robust+Fine-Tuning+of+Foundation+Models&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "gatech.edu;gatech.edu;gatech.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Georgia Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.gatech.edu", "aff_unique_abbr": "Georgia Tech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "ArkVale: Efficient Generative LLM Inference with Recallable Key-Value Eviction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96635", "id": "4oAt5L4lYe", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4oAt5L4lYe", "openreview": "https://openreview.net/forum?id=4oAt5L4lYe", "poster": "/media/PosterPDFs/NeurIPS%202024/96635.png?t=1731246507.6336682", "project": "", "author_site": "Renze Chen, Zhuofeng Wang, Beiquan Cao, Tong Wu, Size Zheng, Xiuhong Li, Xuechao Wei, Shengen Yan, Meng Li, Yun Liang", "tldr": "", "abstract": "Large Language Models (LLMs) are widely used in today's tasks of natural language processing. \nTo support applications like multi-turn chats, document understanding, and content generation, models with long context lengths are growing in importance.\nHowever, managing long contexts brings substantial challenges due to the expansion of key-value cache (KV cache). Longer KV cache requires larger memory, limiting the batch-size thus decreasing throughput. Also, computing attention over long KV cache incurs more memory access, hurting the end-to-end latency.\nPrior works find that it is sufficient to use only the recent and high-impact tokens for attention computation, allowing the eviction of less vital tokens to shrink cache size.\nNonetheless, we observe a dynamic shift in token importance across different decoding steps. Tokens initially evicted might regain importance after certain decoding steps.\nTo address this, we propose ArkVale, a page-based KV cache manager that can recognize and recall currently important tokens evicted before. We asynchronously copy the filled page into external memory (e.g., CPU memory) as backup and summarize it into a much smaller digest by constructing the bounding-volume of its keys. Before attention computation, we measure all pages' importance based on their digests, recall the important ones, evict the unimportant ones, and select the top-ranked pages for attention computation. \nExperiment results show that ArkVale performs well on various long context tasks with negligible accuracy loss under 2k$\\sim$4k cache budget and can improve decoding latency to $2.2\\times$ and batching throughput to $4.6\\times$ because it applies attention on only a small subset of pages and reduce per-sample memory usage of KV cache.", "keywords": "Machine Learning System;Large Language Model Inference;Key-Value Cache Eviction", "primary_area": "infrastructure", "supplementary_material": "", "author": "Renze Chen;Zhuofeng Wang;Beiquan Cao;Tong Wu;Size Zheng;Xiuhong Li;Xuechao Wei;Shengen Yan;Meng Li;Yun Liang", "authorids": "~Renze_Chen1;~Zhuofeng_Wang2;~Beiquan_Cao1;~Tong_Wu21;~Size_Zheng1;~Xiuhong_Li1;~Xuechao_Wei1;~Shengen_Yan1;~Meng_Li1;~Yun_Liang1", "gender": "M;M;M;M;M;M;M;M;M;M", "homepage": "https://github.com/Light-of-Hers;https://github.com/wzf03/;https://github.com/StanleyC988;https://github.com/Rachmanino;https://sizezheng.github.io/;;;;https://mengli.me;http://eecs.pku.edu.cn/EN/People/Faculty/Detail/?ID=6191", "dblp": "260/5910;;;;254/6617-1.html;;08/10954.html;117/6968;70/1726-4;", "google_scholar": ";;;;TMZWR1gAAAAJ;90eREm0AAAAJ;;SvE3bdUAAAAJ;lvdRkEkAAAAJ;https://scholar.google.com.tw/citations?user=Ltp8loUAAAAJ", "orcid": ";;;;;0000-0002-4896-121X;;;;", "linkedin": ";;;;;;;;;", "or_profile": "~Renze_Chen1;~Zhuofeng_Wang2;~Beiquan_Cao1;~Tong_Wu21;~Size_Zheng1;~Xiuhong_Li1;~Xuechao_Wei1;~Shengen_Yan1;~Meng_Li1;~Yun_Liang1", "aff": "Peking University;Peking University;Peking University;Peking University;Peking University;Peking University;Alibaba Group;Tsinghua University;Peking University;Peking University", "aff_domain": "pku.edu.cn;stu.pku.edu.cn;stu.pku.edu.cn;stu.pku.edu.cn;pku.edu.cn;pku.edu.cn;alibaba-inc.com;tsinghua.edu.cn;pku.edu.cn;pku.edu.cn", "position": "PhD student;Undergrad student;Undergrad student;Undergrad student;PhD student;Assistant Research Professor;Researcher;Associate Professor;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nchen2024arkvale,\ntitle={ArkVale: Efficient Generative {LLM} Inference with Recallable Key-Value Eviction},\nauthor={Renze Chen and Zhuofeng Wang and Beiquan Cao and Tong Wu and Size Zheng and Xiuhong Li and Xuechao Wei and Shengen Yan and Meng Li and Yun Liang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4oAt5L4lYe}\n}", "github": "", "reviewers": "8mmm;PLAC;YdSG;ntoN", "pdf_size": 636340, "rating": "6;6;7;7", "confidence": "4;4;4;5", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;4;3", "wc_summary": "90;244;35;51", "wc_strengths": "64;198;17;33", "wc_weaknesses": "89;43;90;58", "wc_questions": "94;44;5;11", "wc_limitations": "4;28;5;9", "wc_review": "341;557;152;162", "wc_reply_reviewers": "8;63;33;0", "wc_reply_authors": "0;125;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;2;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 105.0, 82.70731527501059 ], "wc_strengths_avg": [ 78.0, 71.31269171753371 ], "wc_weaknesses_avg": [ 70.0, 20.211382931407737 ], "wc_questions_avg": [ 38.5, 35.316426772820606 ], "wc_limitations_avg": [ 11.5, 9.7082439194738 ], "wc_review_avg": [ 303.0, 164.80442955212095 ], "wc_reply_reviewers_avg": [ 26.0, 24.586581706288495 ], "wc_reply_authors_avg": [ 31.25, 54.12658773652741 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7978692948142575396&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "pku.edu.cn;stu.pku.edu.cn;stu.pku.edu.cn;stu.pku.edu.cn;pku.edu.cn;pku.edu.cn;alibaba-inc.com;tsinghua.edu.cn;pku.edu.cn;pku.edu.cn", "author_num": 10, "aff_unique_index": "0;0;0;0;0;0;1;2;0;0", "aff_unique_norm": "Peking University;Alibaba Group;Tsinghua University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.pku.edu.cn;https://www.alibaba.com;https://www.tsinghua.edu.cn", "aff_unique_abbr": "Peking U;Alibaba;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Key-Grid: Unsupervised 3D Keypoints Detection using Grid Heatmap Features", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96634", "id": "4pCu9c8leX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4pCu9c8leX", "openreview": "https://openreview.net/forum?id=4pCu9c8leX", "poster": "/media/PosterPDFs/NeurIPS%202024/96634.png?t=1733105163.5329397", "project": "", "author_site": "Chengkai Hou, Zhengrong Xue, Bingyang Zhou, Jinghan Ke, Lin Shao, Huazhe Xu", "tldr": "", "abstract": "Detecting 3D keypoints with semantic consistency is widely used in many scenarios such as pose estimation, shape registration and robotics. Currently, most unsupervised 3D keypoint detection methods focus on the rigid-body objects. However, when faced with deformable objects, the keypoints they identify do not preserve semantic consistency well. In this paper, we introduce an innovative unsupervised keypoint detector Key-Grid for both the rigid-body and deformable objects, which is an autoencoder framework. The encoder predicts keypoints and the decoder utilizes the generated keypoints to reconstruct the objects. Unlike previous work, we leverage the identified keypoint in formation to form a 3D grid feature heatmap called grid heatmap, which is used in the decoder section. Grid heatmap is a novel concept that represents the latent variables for grid points sampled uniformly in the 3D cubic space, where these variables are the shortest distance between the grid points and the \u201cskeleton\u201d connected by keypoint pairs. Meanwhile, we incorporate the information from each layer of the encoder into the decoder section. We conduct an extensive evaluation of Key-Grid on a list of benchmark datasets. Key-Grid achieves the state-of-the-art performance on the semantic consistency and position accuracy of keypoints. Moreover, we demonstrate the robustness of Key-Grid to noise and downsampling. In addition, we achieve SE-(3) invariance of keypoints though generalizing Key-Grid to a SE(3)-invariant backbone.", "keywords": "Keypoint Detection Deformable Object 3D Point Cloud", "primary_area": "machine_vision", "supplementary_material": "/attachment/9141bb880197347fac23c9422106c6aff96aa264.zip", "author": "Chengkai Hou;Zhengrong Xue;Bingyang Zhou;Jinghan Ke;Lin Shao;Huazhe Xu", "authorids": "~Chengkai_Hou1;~Zhengrong_Xue1;~Bingyang_Zhou1;~Jinghan_Ke2;~Lin_Shao2;~Huazhe_Xu1", "gender": ";;;;M;M", "homepage": ";https://steven-xzr.github.io;https://github.com/BingyangZHOU;;https://linsats.github.io/;http://hxu.rocks", "dblp": ";272/4241;49/5919;;26/8546-2;164/9006", "google_scholar": ";LO3pKmwAAAAJ;;;https://scholar.google.com/citations?hl=en;t9HPFawAAAAJ", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Chengkai_Hou1;~Zhengrong_Xue1;~Bingyang_Zhou1;~Jinghan_Ke2;~Lin_Shao2;~Huazhe_Xu1", "aff": ";Tsinghua University;University of Hong Kong;;National University of Singapore;Tsinghua University", "aff_domain": ";tsinghua.edu.cn;hku.hk;;nus.edu.sg;tsinghua.edu.cn", "position": ";PhD student;MS student;;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nhou2024keygrid,\ntitle={Key-Grid: Unsupervised 3D Keypoints Detection using Grid Heatmap Features},\nauthor={Chengkai Hou and Zhengrong Xue and Bingyang Zhou and Jinghan Ke and Lin Shao and Huazhe Xu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4pCu9c8leX}\n}", "github": "", "reviewers": "LZF7;p6XE;YtUf;U8aQ", "pdf_size": 16658513, "rating": "4;6;6;7", "confidence": "2;4;4;3", "soundness": "2;2;3;3", "novelty": "2;3;3;3", "presentation": "3;3;2;4", "wc_summary": "67;71;136;51", "wc_strengths": "51;61;56;61", "wc_weaknesses": "170;141;805;31", "wc_questions": "52;17;100;32", "wc_limitations": "9;11;5;39", "wc_review": "349;301;1102;214", "wc_reply_reviewers": "24;89;53;0", "wc_reply_authors": "73;166;65;0", "reply_reviewers": "1;2;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 81.25, 32.48364973336586 ], "wc_strengths_avg": [ 57.25, 4.14578098794425 ], "wc_weaknesses_avg": [ 286.75, 303.67118318997603 ], "wc_questions_avg": [ 50.25, 31.29197181386945 ], "wc_limitations_avg": [ 16.0, 13.45362404707371 ], "wc_review_avg": [ 491.5, 355.7783720239329 ], "wc_reply_reviewers_avg": [ 41.5, 33.23025729662652 ], "wc_reply_authors_avg": [ 76.0, 59.173473786824445 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.6225430174794673, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15315082450698285177&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": ";tsinghua.edu.cn;hku.hk;;nus.edu.sg;tsinghua.edu.cn", "author_num": 6, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Tsinghua University;University of Hong Kong;National University of Singapore", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.hku.hk;https://www.nus.edu.sg", "aff_unique_abbr": "THU;HKU;NUS", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "China;Singapore" }, { "title": "Alleviate Anchor-Shift: Explore Blind Spots with Cross-View Reconstruction for Incomplete Multi-View Clustering", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96633", "id": "4pIfc51fGK", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4pIfc51fGK", "openreview": "https://openreview.net/forum?id=4pIfc51fGK", "poster": "", "project": "", "author_site": "Suyuan Liu, Siwei Wang, KE LIANG, Junpu Zhang, Zhibin Dong, Tianrui Liu, En Zhu, Xinwang Liu, Kunlun He", "tldr": "", "abstract": "Incomplete multi-view clustering aims to learn complete correlations among samples by leveraging complementary information across multiple views for clustering. Anchor-based methods further establish sample-level similarities for representative anchor generation, effectively addressing scalability issues in large-scale scenarios. Despite efficiency improvements, existing methods overlook the misguidance in anchors learning induced by partial missing samples, i.e., the absence of samples results in shift of learned anchors, further leading to sub-optimal clustering performance. To conquer the challenges, our solution involves a cross-view reconstruction strategy that not only alleviate the anchor shift problem through a carefully designed cross-view learning process, but also reconstructs missing samples in a way that transcends the limitations imposed by convex combinations. By employing affine combinations, our method explores areas beyond the convex hull defined by anchors, thereby illuminating blind spots in the reconstruction of missing samples. Experimental results on four benchmark datasets and three large-scale datasets validate the effectiveness of our proposed method.", "keywords": "Incomplete Multi-View Clustering;Anchor-Based Multi-View Clustering", "primary_area": "learning_theory", "supplementary_material": "", "author": "Suyuan Liu;Siwei Wang;KE LIANG;Junpu Zhang;Zhibin Dong;Tianrui Liu;En Zhu;Xinwang Liu;Kunlun He", "authorids": "~Suyuan_Liu1;~Siwei_Wang4;~KE_LIANG1;~Junpu_Zhang1;~Zhibin_Dong1;~Tianrui_Liu1;~En_Zhu1;~Xinwang_Liu1;~Kunlun_He1", "gender": "M;M;M;M;M;F;M;M;M", "homepage": "https://tracesource.github.io//;https://wangsiwei2010.github.io/;https://liangke23.github.io/;;https://dzboop.GitHub.io;https://www.imperial.ac.uk/people/t.liu15;https://www.researchgate.net/profile/En_Zhu;https://xinwangliu.github.io/;", "dblp": "227/5361;51/8279-1;48/73-6;;227/6683;;30/1307;45/6569-2.html;226/9735", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;5o9hK3EAAAAJ;gwea2McAAAAJ;https://scholar.google.com.hk/citations?view_op=list_works;;https://scholar.google.de/citations?user=SC53gxAAAAAJ;;A56vWC4AAAAJ;", "orcid": "0000-0003-1481-5393;0000-0001-9517-262X;0000-0002-4837-455X;;0000-0001-7829-4924;;;;0000-0002-3335-5700", "linkedin": ";;;;;tianrui-liu-7b126b67/;;;", "or_profile": "~Suyuan_Liu1;~Siwei_Wang4;~KE_LIANG1;~Junpu_Zhang1;~Zhibin_Dong1;~Tianrui_Liu1;~En_Zhu1;~Xinwang_Liu1;~Kunlun_He1", "aff": "National University of Defense Technology;Intelligent Game and Decision Lab;National University of Defense Technology;National University of Defense Technology;National University of Defense Technology;National University of Defense Technology;National University of Defense Technology;National University of Defense Technology;Chinese PLA General Hospital", "aff_domain": "nudt.edu.cn;nudt.edu.cn;nudt.edu.cn;nudt.edu.cn;nudt.edu.cn;nudt.edu.cn;nudt.edu.cn;nudt.edu.cn;plagh.org", "position": "PhD student;Assistant Professor;PhD student;PhD student;PhD student;Associate Professor;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nliu2024alleviate,\ntitle={Alleviate Anchor-Shift: Explore Blind Spots with Cross-View Reconstruction for Incomplete Multi-View Clustering},\nauthor={Suyuan Liu and Siwei Wang and KE LIANG and Junpu Zhang and Zhibin Dong and Tianrui Liu and En Zhu and Xinwang Liu and Kunlun He},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4pIfc51fGK}\n}", "github": "", "reviewers": "mjvb;4LaZ;nWqT;fq57", "pdf_size": 1380786, "rating": "5;6;6;7", "confidence": "5;5;5;4", "soundness": "3;3;3;3", "novelty": "4;3;3;3", "presentation": "3;3;2;3", "wc_summary": "127;53;59;52", "wc_strengths": "61;25;27;26", "wc_weaknesses": "72;80;66;66", "wc_questions": "66;33;61;73", "wc_limitations": "2;16;1;16", "wc_review": "328;207;214;233", "wc_reply_reviewers": "0;0;16;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 72.75, 31.43544973433655 ], "wc_strengths_avg": [ 34.75, 15.171931320698759 ], "wc_weaknesses_avg": [ 71.0, 5.744562646538029 ], "wc_questions_avg": [ 58.25, 15.188400179084036 ], "wc_limitations_avg": [ 8.75, 7.258615570478987 ], "wc_review_avg": [ 245.5, 48.57211133973898 ], "wc_reply_reviewers_avg": [ 4.0, 6.928203230275509 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:S3OWQHIPeZ4J:scholar.google.com/&scioq=Alleviate+Anchor-Shift:+Explore+Blind+Spots+with+Cross-View+Reconstruction+for+Incomplete+Multi-View+Clustering&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": "nudt.edu.cn;nudt.edu.cn;nudt.edu.cn;nudt.edu.cn;nudt.edu.cn;nudt.edu.cn;nudt.edu.cn;nudt.edu.cn;plagh.org", "author_num": 9, "aff_unique_index": "0;1;0;0;0;0;0;0;2", "aff_unique_norm": "National University of Defense Technology;Intelligent Game and Decision Lab;Chinese PLA General Hospital", "aff_unique_dep": ";Intelligent Game and Decision Lab;", "aff_unique_url": "http://www.nudt.edu.cn/;;http://www.plagg.com.cn", "aff_unique_abbr": "NUDT;;PLA General Hospital", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China;" }, { "title": "Seek Commonality but Preserve Differences: Dissected Dynamics Modeling for Multi-modal Visual RL", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96632", "id": "4php6bGL2W", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4php6bGL2W", "openreview": "https://openreview.net/forum?id=4php6bGL2W", "poster": "/media/PosterPDFs/NeurIPS%202024/96632.png?t=1731471459.8052492", "project": "", "author_site": "Yangru Huang, Peixi Peng, Yifan Zhao, Guangyao Chen, Yonghong Tian", "tldr": "", "abstract": "Accurate environment dynamics modeling is crucial for obtaining effective state representations in visual reinforcement learning (RL) applications. However, when facing multiple input modalities, existing dynamics modeling methods (e.g., DeepMDP) usually stumble in addressing the complex and volatile relationship between different modalities. In this paper, we study the problem of efficient dynamics modeling for multi-modal visual RL. We find that under the existence of modality heterogeneity, modality-correlated and distinct features are equally important but play different roles in reflecting the evolution of environmental dynamics. Motivated by this fact, we propose Dissected Dynamics Modeling (DDM), a novel multi-modal dynamics modeling method for visual RL. Unlike existing methods, DDM explicitly distinguishes consistent and inconsistent information across modalities and treats them separately with a divide-and-conquer strategy. This is done by dispatching the features carrying different information into distinct dynamics modeling pathways, which naturally form a series of implicit regularizations along the learning trajectories. In addition, a reward predictive function is further introduced to filter task-irrelevant information in both modality-consistent and inconsistent features, ensuring information integrity while avoiding potential distractions. Extensive experiments show that DDM consistently achieves competitive performance in challenging multi-modal visual environments.", "keywords": "Visual Reinforcement Learning", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Yangru Huang;Peixi Peng;Yifan Zhao;Guangyao Chen;Yonghong Tian", "authorids": "~Yangru_Huang1;~Peixi_Peng2;~Yifan_Zhao2;~Guangyao_Chen1;~Yonghong_Tian1", "gender": "F;M;M;M;M", "homepage": ";;https://zhao1f.github.io/;http://icgy96.github.io/;http://www.pkuml.org", "dblp": "241/9375;119/8511;13/7050-2.html;;86/5857", "google_scholar": ";CFMuFGoAAAAJ;bUzykm0AAAAJ;ZauoVgYAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;0000-0002-7255-2109;0000-0002-2978-5935", "linkedin": ";;;guangyao-chen-976335145;", "or_profile": "~Yangru_Huang1;~Peixi_Peng2;~Yifan_Zhao2;~Guangyao_Chen1;~Yonghong_Tian1", "aff": "Peking University;Peking University;Beihang University;Cornell University;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;buaa.edu.cn;cornell.edu;pku.edu.cn", "position": "PhD student;Researcher;Associate Professor;Postdoc;Full Professor", "bibtex": "@inproceedings{\nhuang2024seek,\ntitle={Seek Commonality but Preserve Differences: Dissected Dynamics Modeling for Multi-modal Visual {RL}},\nauthor={Yangru Huang and Peixi Peng and Yifan Zhao and Guangyao Chen and Yonghong Tian},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4php6bGL2W}\n}", "github": "", "reviewers": "xBoX;dbSG;Msqh", "pdf_size": 2458477, "rating": "6;6;6", "confidence": "4;3;4", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "3;3;3", "wc_summary": "107;97;63", "wc_strengths": "77;77;131", "wc_weaknesses": "239;74;242", "wc_questions": "12;58;307", "wc_limitations": "4;5;10", "wc_review": "439;311;753", "wc_reply_reviewers": "45;31;48", "wc_reply_authors": "56;36;29", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 89.0, 18.83259585576738 ], "wc_strengths_avg": [ 95.0, 25.45584412271571 ], "wc_weaknesses_avg": [ 185.0, 78.49840762716146 ], "wc_questions_avg": [ 125.66666666666667, 129.58995159947995 ], "wc_limitations_avg": [ 6.333333333333333, 2.6246692913372702 ], "wc_review_avg": [ 501.0, 185.69509058310257 ], "wc_reply_reviewers_avg": [ 41.333333333333336, 7.408703590297623 ], "wc_reply_authors_avg": [ 40.333333333333336, 11.440668201153676 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:l4IV05Q86UkJ:scholar.google.com/&scioq=Seek+Commonality+but+Preserve+Differences:+Dissected+Dynamics+Modeling+for+Multi-modal+Visual+RL&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": "pku.edu.cn;pku.edu.cn;buaa.edu.cn;cornell.edu;pku.edu.cn", "author_num": 5, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Peking University;Beihang University;Cornell University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.pku.edu.cn;http://www.buaa.edu.cn/;https://www.cornell.edu", "aff_unique_abbr": "Peking U;BUAA;Cornell", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "China;United States" }, { "title": "Do Finetti: On Causal Effects for Exchangeable Data", "status": "Oral", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96631", "id": "4rCZeCZAON", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4rCZeCZAON", "openreview": "https://openreview.net/forum?id=4rCZeCZAON", "poster": "/media/PosterPDFs/NeurIPS%202024/96631.png?t=1733702210.3564003", "project": "", "author_site": "Siyuan Guo, Chi Zhang, Karthika Mohan, Ferenc Huszar, Bernhard Sch\u00f6lkopf", "tldr": "", "abstract": "We study causal effect estimation in a setting where the data are not i.i.d.$\\ $(independent and identically distributed). We focus on exchangeable data satisfying an assumption of independent causal mechanisms. Traditional causal effect estimation frameworks, e.g., relying on structural causal models and do-calculus, are typically limited to i.i.d. data and do not extend to more general exchangeable generative processes, which naturally arise in multi-environment data. To address this gap, we develop a generalized framework for exchangeable data and introduce a truncated factorization formula that facilitates both the identification and estimation of causal effects in our setting. To illustrate potential applications, we introduce a causal P\u00f3lya urn model and demonstrate how intervention propagates effects in exchangeable data settings. Finally, we develop an algorithm that performs simultaneous causal discovery and effect estimation given multi-environment data.", "keywords": "Causality; Exchangeability", "primary_area": "causal_inference", "supplementary_material": "/attachment/d00dddedee350e624426e03b1d2d5124f6ee592c.zip", "author": "Siyuan Guo;Chi Zhang;Karthika Mohan;Ferenc Husz\u00e1r;Bernhard Sch\u00f6lkopf", "authorids": "~Siyuan_Guo1;~Chi_Zhang23;~Karthika_Mohan1;~Ferenc_Husz\u00e1r1;~Bernhard_Sch\u00f6lkopf1", "gender": "F;;;;", "homepage": "https://siyuanguo.com/;https://www.linkedin.com/in/zccc/;http://karthikamohan.com;;", "dblp": ";91/195-16;;;", "google_scholar": ";f5z0A_0AAAAJ;;;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Siyuan_Guo1;~Chi_Zhang23;~Karthika_Mohan1;~Ferenc_Husz\u00e1r1;~Bernhard_Sch\u00f6lkopf1", "aff": "Max Planck Institute for Intelligent Systems, Max-Planck Institute;Toyota Research Institute;Oregon State University;;", "aff_domain": "tuebingen.mpg.de;tri.global;oregonstate.edu;;", "position": "PhD student;Postdoc;Assistant Professor;;", "bibtex": "@inproceedings{\nguo2024do,\ntitle={Do Finetti: On Causal Effects for Exchangeable Data},\nauthor={Siyuan Guo and Chi Zhang and Karthika Mohan and Ferenc Husz{\\'a}r and Bernhard Sch{\\\"o}lkopf},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4rCZeCZAON}\n}", "github": "", "reviewers": "nbBj;H65Y;Jkyk", "pdf_size": 533734, "rating": "7;7;8", "confidence": "2;2;4", "soundness": "4;4;3", "novelty": "2;3;4", "presentation": "4;3;4", "wc_summary": "55;29;52", "wc_strengths": "29;79;79", "wc_weaknesses": "78;123;41", "wc_questions": "19;231;69", "wc_limitations": "42;4;14", "wc_review": "223;466;255", "wc_reply_reviewers": "61;16;12", "wc_reply_authors": "37;31;0", "reply_reviewers": "1;1;1", "reply_authors": "2;2;1", "rating_avg": [ 7.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 2.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 45.333333333333336, 11.61416759345623 ], "wc_strengths_avg": [ 62.333333333333336, 23.570226039551585 ], "wc_weaknesses_avg": [ 80.66666666666667, 33.529423231278855 ], "wc_questions_avg": [ 106.33333333333333, 90.48511234206187 ], "wc_limitations_avg": [ 20.0, 16.08311744241976 ], "wc_review_avg": [ 314.6666666666667, 107.80331885222995 ], "wc_reply_reviewers_avg": [ 29.666666666666668, 22.216110270602176 ], "wc_reply_authors_avg": [ 22.666666666666668, 16.21384867602041 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2236522734918139283&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "tuebingen.mpg.de;tri.global;oregonstate.edu;;", "author_num": 5, "aff_unique_index": "0;1;2", "aff_unique_norm": "Max Planck Institute for Intelligent Systems;Toyota Research Institute;Oregon State University", "aff_unique_dep": "Intelligent Systems;;", "aff_unique_url": "https://www.mpi-is.mpg.de;https://www.tri.global;https://oregonstate.edu", "aff_unique_abbr": "MPI-IS;TRI;OSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Germany;United States" }, { "title": "FNP: Fourier Neural Processes for Arbitrary-Resolution Data Assimilation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96630", "id": "4rrNcsVPDm", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4rrNcsVPDm", "openreview": "https://openreview.net/forum?id=4rrNcsVPDm", "poster": "/media/PosterPDFs/NeurIPS%202024/96630.png?t=1731656705.0876985", "project": "", "author_site": "Kun Chen, Peng Ye, Hao Chen, kang chen, Tao Han, Wanli Ouyang, Tao Chen, LEI BAI", "tldr": "", "abstract": "Data assimilation is a vital component in modern global medium-range weather forecasting systems to obtain the best estimation of the atmospheric state by combining the short-term forecast and observations. Recently, AI-based data assimilation approaches have attracted increasing attention for their significant advantages over traditional techniques in terms of computational consumption. However, existing AI-based data assimilation methods can only handle observations with a specific resolution, lacking the compatibility and generalization ability to assimilate observations with other resolutions. Considering that complex real-world observations often have different resolutions, we propose the Fourier Neural Processes (FNP) for arbitrary-resolution data assimilation in this paper. Leveraging the efficiency of the designed modules and flexible structure of neural processes, FNP achieves state-of-the-art results in assimilating observations with varying resolutions, and also exhibits increasing advantages over the counterparts as the resolution and the amount of observations increase. Moreover, our FNP trained on a fixed resolution can directly handle the assimilation of observations with out-of-distribution resolutions and the observational information reconstruction task without additional fine-tuning, demonstrating its excellent generalization ability across data resolutions as well as across tasks. Code is available at https://github.com/OpenEarthLab/FNP.", "keywords": "data assimilation;neural processes;deep learning;end-to-end weather forecasting", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "", "author": "Kun Chen;Peng Ye;Hao Chen;kang chen;Tao Han;Wanli Ouyang;Tao Chen;LEI BAI", "authorids": "~Kun_Chen5;~Peng_Ye4;~Hao_Chen14;~kang_chen3;~Tao_Han4;~Wanli_Ouyang1;~Tao_Chen6;~LEI_BAI1", "gender": "M;M;;M;M;;M;M", "homepage": "https://github.com/kunc3301/;;;https://github.com/yuchendoudou;https://taohan10200.github.io/;;https://eetchen.github.io/;http://leibai.site/", "dblp": ";53/930-6;;;78/744-3;;69/510-3;119/1223-1", "google_scholar": ";UEZZP5QAAAAJ;;;a3OxwlMAAAAJ;;https://scholar.google.com.sg/citations?user=w3OoFL0AAAAJ;https://scholar.google.com.au/citations?user=sakOO04AAAAJ", "orcid": ";0000-0002-8486-7562;;;;;;0000-0003-3378-7201", "linkedin": ";;;;;;;lei-bai-641370153/", "or_profile": "~Kun_Chen5;~Peng_Ye4;~Hao_Chen14;~kang_chen3;~Tao_Han4;~Wanli_Ouyang1;~Tao_Chen6;~LEI_BAI1", "aff": "Fudan University;Fudan University;;University of Science and Technology of China;Department of Computer Science and Engineering, Hong Kong University of Science and Technology;;Fudan University;Shanghai AI Laboratory", "aff_domain": "fudan.edu.cn;fudan.edu.cn;;ustc.edu.cn;cse.ust.hk;;fudan.edu.cn;pjlab.org.cn", "position": "MS student;PhD student;;PhD student;PhD student;;Full Professor;Researcher", "bibtex": "@inproceedings{\nchen2024fnp,\ntitle={{FNP}: Fourier Neural Processes for Arbitrary-Resolution Data Assimilation},\nauthor={Kun Chen and Peng Ye and Hao Chen and kang chen and Tao Han and Wanli Ouyang and Tao Chen and LEI BAI},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4rrNcsVPDm}\n}", "github": "", "reviewers": "Y68H;oAJc;3aNd;95HC", "pdf_size": 7984049, "rating": "4;6;6;6", "confidence": "2;4;4;3", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "2;3;2;2", "wc_summary": "37;68;57;64", "wc_strengths": "56;84;62;62", "wc_weaknesses": "88;51;157;72", "wc_questions": "34;1;75;68", "wc_limitations": "44;2;10;34", "wc_review": "259;206;361;300", "wc_reply_reviewers": "0;0;29;98", "wc_reply_authors": "103;0;18;304", "reply_reviewers": "0;0;1;3", "reply_authors": "2;1;2;4", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 56.5, 11.926860441876563 ], "wc_strengths_avg": [ 66.0, 10.677078252031311 ], "wc_weaknesses_avg": [ 92.0, 39.75550276376844 ], "wc_questions_avg": [ 44.5, 29.516944286290883 ], "wc_limitations_avg": [ 22.5, 17.109938632268673 ], "wc_review_avg": [ 281.5, 56.720807469569756 ], "wc_reply_reviewers_avg": [ 31.75, 40.03982392568679 ], "wc_reply_authors_avg": [ 106.25, 120.61586752993986 ], "reply_reviewers_avg": [ 1.0, 1.224744871391589 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=166695106678302123&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "fudan.edu.cn;fudan.edu.cn;;ustc.edu.cn;cse.ust.hk;;fudan.edu.cn;pjlab.org.cn", "author_num": 8, "aff_unique_index": "0;0;1;2;0;3", "aff_unique_norm": "Fudan University;University of Science and Technology of China;Hong Kong University of Science and Technology;Shanghai AI Laboratory", "aff_unique_dep": ";;Department of Computer Science and Engineering;", "aff_unique_url": "https://www.fudan.edu.cn;http://www.ustc.edu.cn;https://www.ust.hk;https://www.shanghai-ai-lab.com", "aff_unique_abbr": "Fudan;USTC;HKUST;SAIL", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "VFIMamba: Video Frame Interpolation with State Space Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96629", "id": "4s5UsBUsUS", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4s5UsBUsUS", "openreview": "https://openreview.net/forum?id=4s5UsBUsUS", "poster": "/media/PosterPDFs/NeurIPS%202024/96629.png?t=1732969842.0771954", "project": "", "author_site": "Guozhen Zhang, Chuxnu Liu, Yutao Cui, Xiaotong Zhao, Kai Ma, Limin Wang", "tldr": "", "abstract": "Inter-frame modeling is pivotal in generating intermediate frames for video frame interpolation (VFI). Current approaches predominantly rely on convolution or attention-based models, which often either lack sufficient receptive fields or entail significant computational overheads. Recently, Selective State Space Models (S6) have emerged, tailored specifically for long sequence modeling, offering both linear complexity and data-dependent modeling capabilities. In this paper, we propose VFIMamba, a novel frame interpolation method for efficient and dynamic inter-frame modeling by harnessing the S6 model. Our approach introduces the Mixed-SSM Block (MSB), which initially rearranges tokens from adjacent frames in an interleaved fashion and subsequently applies multi-directional S6 modeling. This design facilitates the efficient transmission of information across frames while upholding linear complexity. Furthermore, we introduce a novel curriculum learning strategy that progressively cultivates proficiency in modeling inter-frame dynamics across varying motion magnitudes, fully unleashing the potential of the S6 model. Experimental findings showcase that our method attains state-of-the-art performance across diverse benchmarks, particularly excelling in high-resolution scenarios. In particular, on the X-TEST dataset, VFIMamba demonstrates a noteworthy improvement of 0.80 dB for 4K frames and 0.96 dB for 2K frames.", "keywords": "Video Frame Interpolation;State Space Models;Low Level Vision", "primary_area": "machine_vision", "supplementary_material": "/attachment/4e30c4c3e65ec2f279d5012fbc32cd6e3f5d90e8.zip", "author": "Guozhen Zhang;Chunxu Liu;Yutao Cui;Xiaotong Zhao;Kai Ma;Limin Wang", "authorids": "~Guozhen_Zhang2;~Chunxu_Liu1;~Yutao_Cui1;~Xiaotong_Zhao1;~Kai_Ma2;~Limin_Wang1", "gender": "M;M;M;;M;", "homepage": "https://github.com/GuozhenZhang1999;https://lcxrocks.github.io/;;;;", "dblp": ";223/2201;255/2385.html;;86/7113-2;", "google_scholar": "https://scholar.google.com/citations?hl=en;;TSMchWcAAAAJ;;https://scholar.google.com/citations?hl=en;", "orcid": ";;0000-0003-4788-9751;;;", "linkedin": ";;;;;", "or_profile": "~Guozhen_Zhang2;~Chunxu_Liu1;~Yutao_Cui1;~Xiaotong_Zhao1;~Kai_Ma2;~Limin_Wang1", "aff": "Platform & Content Group;Nanjing University;Nanjing University;;Tencent;", "aff_domain": "tencent.com;nju.edu.cn;nju.edu;;tencent.com;", "position": "Intern;PhD student;PhD student;;Principal Scientist;", "bibtex": "@inproceedings{\nzhang2024vfimamba,\ntitle={{VFIM}amba: Video Frame Interpolation with State Space Models},\nauthor={Guozhen Zhang and Chunxu Liu and Yutao Cui and Xiaotong Zhao and Kai Ma and Limin Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4s5UsBUsUS}\n}", "github": "", "reviewers": "A9TL;YfRu;9Hn5;Bwpp", "pdf_size": 13143253, "rating": "6;7;7;7", "confidence": "4;5;5;4", "soundness": "3;3;4;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "60;72;119;86", "wc_strengths": "50;28;116;109", "wc_weaknesses": "281;88;282;57", "wc_questions": "3;18;93;50", "wc_limitations": "10;9;55;45", "wc_review": "404;215;665;347", "wc_reply_reviewers": "74;24;68;37", "wc_reply_authors": "23;27;26;11", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 84.25, 22.072324299900995 ], "wc_strengths_avg": [ 75.75, 37.64555086593899 ], "wc_weaknesses_avg": [ 177.0, 105.07378359990659 ], "wc_questions_avg": [ 41.0, 34.489128721961066 ], "wc_limitations_avg": [ 29.75, 20.559365262575593 ], "wc_review_avg": [ 407.75, 163.58082864443497 ], "wc_reply_reviewers_avg": [ 50.75, 20.873128658636684 ], "wc_reply_authors_avg": [ 21.75, 6.378675411086537 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14889350195747422449&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "tencent.com;nju.edu.cn;nju.edu;;tencent.com;", "author_num": 6, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Platform & Content Group;Nanjing University;Tencent", "aff_unique_dep": ";;Tencent Holdings Limited", "aff_unique_url": ";https://www.nju.edu.cn;https://www.tencent.com", "aff_unique_abbr": ";Nanjing U;Tencent", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1;1;1", "aff_country_unique": ";China" }, { "title": "Regularized Q-Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96628", "id": "4sueqIwb4o", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4sueqIwb4o", "openreview": "https://openreview.net/forum?id=4sueqIwb4o", "poster": "/media/PosterPDFs/NeurIPS%202024/96628.png?t=1731667142.7769482", "project": "", "author_site": "Han-Dong Lim, Donghwan Lee", "tldr": "", "abstract": "Q-learning is widely used algorithm in reinforcement learning (RL) community. Under the lookup table setting, its convergence is well established. However, its behavior is known to be unstable with the linear function approximation case. This paper develops a new Q-learning algorithm, called RegQ, that converges when linear function approximation is used. We prove that simply adding an appropriate regularization term ensures convergence of the algorithm. Its stability is established using a recent analysis tool based on switching system models. Moreover, we experimentally show that RegQ converges in environments where Q-learning with linear function approximation has known to diverge. An error bound on the solution where the algorithm converges is also given.", "keywords": "reinforcement learning;Q-learning;convergence", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/7b8d05623ec5cfa58d51325a30cbed0ec8e1bf49.zip", "author": "Han-Dong Lim;Donghwan Lee", "authorids": "~Han-Dong_Lim1;~Donghwan_Lee2", "gender": "M;M", "homepage": ";https://sites.google.com/site/donghwanleehome", "dblp": "301/8950;", "google_scholar": "n2Vw99sAAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Han-Dong_Lim1;~Donghwan_Lee2", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nlim2024regularized,\ntitle={Regularized Q-Learning},\nauthor={Han-Dong Lim and Donghwan Lee},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4sueqIwb4o}\n}", "github": "", "reviewers": "cMMt;x1RW;mviV;haoJ;yA4E", "pdf_size": 922822, "rating": "4;5;5;5;8", "confidence": "3;3;3;4;3", "soundness": "3;1;3;3;4", "novelty": "3;3;3;2;3", "presentation": "3;1;3;3;3", "wc_summary": "58;20;116;87;49", "wc_strengths": "64;11;33;43;92", "wc_weaknesses": "88;307;263;272;406", "wc_questions": "175;1;119;126;95", "wc_limitations": "1;1;4;23;9", "wc_review": "386;340;535;551;651", "wc_reply_reviewers": "0;40;26;55;13", "wc_reply_authors": "39;180;133;130;0", "reply_reviewers": "0;1;1;1;1", "reply_authors": "2;2;2;3;1", "rating_avg": [ 5.4, 1.3564659966250536 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.9797958971132712 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.6, 0.8000000000000002 ], "wc_summary_avg": [ 66.0, 32.89376840679705 ], "wc_strengths_avg": [ 48.6, 27.60144923731361 ], "wc_weaknesses_avg": [ 267.2, 102.9590209743663 ], "wc_questions_avg": [ 103.2, 57.335503834884015 ], "wc_limitations_avg": [ 7.6, 8.2365041127896 ], "wc_review_avg": [ 492.6, 113.97297925385648 ], "wc_reply_reviewers_avg": [ 26.8, 19.38452991434149 ], "wc_reply_authors_avg": [ 96.4, 66.40060240690592 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.14744195615489716, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18187291292532708726&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "kaist.ac.kr;kaist.ac.kr", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "title": "Gradient-based Discrete Sampling with Automatic Cyclical Scheduling", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96627", "id": "4syq5cgwA2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4syq5cgwA2", "openreview": "https://openreview.net/forum?id=4syq5cgwA2", "poster": "", "project": "", "author_site": "Patrick Pynadath, Riddhiman Bhattacharya, ARUN HARIHARAN, Ruqi Zhang", "tldr": "", "abstract": "Discrete distributions, particularly in high-dimensional deep models, are often highly multimodal due to inherent discontinuities. While gradient-based discrete sampling has proven effective, it is susceptible to becoming trapped in local modes due to the gradient information. To tackle this challenge, we propose an automatic cyclical scheduling, designed for efficient and accurate sampling in multimodal discrete distributions. Our method contains three key components: (1) a cyclical step size schedule where large steps discover new modes and small steps exploit each mode; (2) a cyclical balancing schedule, ensuring \"balanced\" proposals for given step sizes and high efficiency of the Markov chain; and (3) an automatic tuning scheme for adjusting the hyperparameters in the cyclical schedules, allowing adaptability across diverse datasets with minimal tuning. We prove the non-asymptotic convergence and inference guarantee for our method in general discrete distributions. Extensive experiments demonstrate the superiority of our method in sampling complex multimodal discrete distributions.", "keywords": "MCMC;Discrete Spaces;Sampling;EBM", "primary_area": "probabilistic_methods", "supplementary_material": "/attachment/45c3ed6b9acfa8ea24494f8e4b89634264ae23c0.zip", "author": "Patrick Pynadath;Riddhiman Bhattacharya;ARUN NARAYANAN HARIHARAN;Ruqi Zhang", "authorids": "~Patrick_Pynadath1;~Riddhiman_Bhattacharya1;~ARUN_NARAYANAN_HARIHARAN1;~Ruqi_Zhang1", "gender": "M;M;M;F", "homepage": ";https://sites.google.com/view/riddhimanbhattacharyaphd/about?authuser=0;;https://ruqizhang.github.io/", "dblp": ";;;", "google_scholar": ";fdyRnbgAAAAJ;;4ojpmc8AAAAJ", "orcid": ";0009-0003-6341-6054;;", "linkedin": "patrick-pynadath-a17434123;;arun-narayanan-hariharan/;", "or_profile": "~Patrick_Pynadath1;~Riddhiman_Bhattacharya1;~ARUN_NARAYANAN_HARIHARAN1;~Ruqi_Zhang1", "aff": ", Purdue University;Purdue University;Purdue University;Purdue University", "aff_domain": "cs.purdue.edu;purdue.edu;purdue.edu;purdue.edu", "position": "PhD student;Postdoc;PhD student;Assistant Professor", "bibtex": "@inproceedings{\npynadath2024gradientbased,\ntitle={Gradient-based Discrete Sampling with Automatic Cyclical Scheduling},\nauthor={Patrick Pynadath and Riddhiman Bhattacharya and ARUN NARAYANAN HARIHARAN and Ruqi Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4syq5cgwA2}\n}", "github": "", "reviewers": "1xVN;1ido;V8nn;iri7", "pdf_size": 3092511, "rating": "5;5;6;6", "confidence": "1;2;2;4", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "3;3;2;3", "wc_summary": "41;108;115;81", "wc_strengths": "26;60;77;48", "wc_weaknesses": "48;110;198;98", "wc_questions": "24;50;61;118", "wc_limitations": "1;4;4;41", "wc_review": "140;332;455;386", "wc_reply_reviewers": "12;16;33;38", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 2.25, 1.0897247358851685 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 86.25, 29.046299247924853 ], "wc_strengths_avg": [ 52.75, 18.565761497983324 ], "wc_weaknesses_avg": [ 113.5, 54.043963585214584 ], "wc_questions_avg": [ 63.25, 34.34657916008521 ], "wc_limitations_avg": [ 12.5, 16.5 ], "wc_review_avg": [ 328.25, 117.103319765069 ], "wc_reply_reviewers_avg": [ 24.75, 10.985786271359915 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.6882472016116854, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8696181501470279294&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "cs.purdue.edu;purdue.edu;purdue.edu;purdue.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Purdue University", "aff_unique_dep": "", "aff_unique_url": "https://www.purdue.edu", "aff_unique_abbr": "Purdue", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "When are dynamical systems learned from time series data statistically accurate?", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96626", "id": "4t3ox9hj3z", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4t3ox9hj3z", "openreview": "https://openreview.net/forum?id=4t3ox9hj3z", "poster": "/media/PosterPDFs/NeurIPS%202024/96626.png?t=1733452250.9275966", "project": "", "author_site": "Jeongjin Park, Nicole Yang, Nisha Chandramoorthy", "tldr": "", "abstract": "Conventional notions of generalization often fail to describe the ability of learned models to capture meaningful information from dynamical data. A neural network that learns complex dynamics with a small test error may still fail to reproduce its \\emph{physical} behavior, including associated statistical moments and Lyapunov exponents. To address this gap, we propose an ergodic theoretic approach to generalization of complex dynamical models learned from time series data. Our main contribution is to define and analyze generalization of a broad suite of neural representations of classes of ergodic systems, including chaotic systems, in a way that captures emulating underlying invariant, physical measures. Our results provide theoretical justification for why regression methods for generators of dynamical systems (Neural ODEs) fail to generalize, and why their statistical accuracy improves upon adding Jacobian information during training. We verify our results on a number of ergodic chaotic systems and neural network parameterizations, including MLPs, ResNets, Fourier Neural layers, and RNNs.", "keywords": "generalization;ergodic theory and dynamical systems;chaotic dynamics;scientific machine learning", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "/attachment/414cafa9958005d264953bc8bc44350fb61014f4.zip", "author": "Jeongjin Park;Nicole Tianjiao Yang;Nisha Chandramoorthy", "authorids": "~Jeongjin_Park2;~Nicole_Tianjiao_Yang1;~Nisha_Chandramoorthy1", "gender": "F;;F", "homepage": "https://jayjay-park.github.io/;;https://ni-sha-c.github.io", "dblp": ";;236/5726", "google_scholar": ";;7z8NqmUAAAAJ", "orcid": "0000-0001-5515-3897;;", "linkedin": "jayjay-jeongjin-park/;;", "or_profile": "~Jeongjin_Park2;~Nicole_Tianjiao_Yang1;~Nisha_Chandramoorthy1", "aff": "Georgia Institute of Technology;;Georgia Institute of Technology", "aff_domain": "gatech.edu;;gatech.edu", "position": "MS student;;Assistant Professor", "bibtex": "@inproceedings{\npark2024when,\ntitle={When are dynamical systems learned from time series data statistically accurate?},\nauthor={Jeongjin Park and Nicole Tianjiao Yang and Nisha Chandramoorthy},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4t3ox9hj3z}\n}", "github": "", "reviewers": "vAp3;PvwR;L3o7;sRDx", "pdf_size": 23519149, "rating": "5;6;6;7", "confidence": "4;3;2;4", "soundness": "3;3;3;2", "novelty": "2;2;3;3", "presentation": "2;4;3;3", "wc_summary": "74;46;76;101", "wc_strengths": "90;91;42;31", "wc_weaknesses": "474;48;119;141", "wc_questions": "57;31;50;125", "wc_limitations": "40;101;23;1", "wc_review": "735;317;310;399", "wc_reply_reviewers": "555;0;36;46", "wc_reply_authors": "760;0;91;16", "reply_reviewers": "2;0;1;1", "reply_authors": "3;1;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 74.25, 19.472737352514155 ], "wc_strengths_avg": [ 63.5, 27.28094573140748 ], "wc_weaknesses_avg": [ 195.5, 164.42399459932847 ], "wc_questions_avg": [ 65.75, 35.50616143713651 ], "wc_limitations_avg": [ 41.25, 37.164331017791774 ], "wc_review_avg": [ 440.25, 173.73453168555756 ], "wc_reply_reviewers_avg": [ 159.25, 229.12592062008173 ], "wc_reply_authors_avg": [ 216.75, 315.5212948439455 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5325958836031993901&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "gatech.edu;;gatech.edu", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Georgia Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.gatech.edu", "aff_unique_abbr": "Georgia Tech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Continual Learning with Global Alignment", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96625", "id": "4vp0edVY4o", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4vp0edVY4o", "openreview": "https://openreview.net/forum?id=4vp0edVY4o", "poster": "/media/PosterPDFs/NeurIPS%202024/96625.png?t=1733519292.6473594", "project": "", "author_site": "Xueying Bai, Jinghuan Shang, Yifan Sun, Niranjan Balasubramanian", "tldr": "", "abstract": "Continual learning aims to sequentially learn new tasks without forgetting previous tasks' knowledge (catastrophic forgetting). One factor that can cause forgetting is the interference between the gradients on losses from different tasks. When the gradients on the current task's loss are in opposing directions to those on previous tasks' losses, updating the model for the current task may cause performance degradation on previous tasks. In this paper, we first identify causes of the above interference, and hypothesize that correlations between data representations are a key factor of interference. We then propose a method for promoting appropriate correlations between arbitrary tasks' data representations (i.e., global alignment) in individual task learning. Specifically, we learn the data representation as a task-specific composition of pre-trained token representations shared across all tasks. Then the correlations between different tasks' data representations are grounded by correlations between pre-trained token representations. We explore different ways to learn such compositions. Without experience replay, our model achieves SOTA performance in continual learning tasks. It also achieves advanced class-incremental performance through task-incremental training.", "keywords": "Continual Learning;Global Alignment;Composition Learning", "primary_area": "online_learning", "supplementary_material": "", "author": "Xueying Bai;Jinghuan Shang;Yifan Sun;Niranjan Balasubramanian", "authorids": "~Xueying_Bai1;~Jinghuan_Shang1;~Yifan_Sun1;~Niranjan_Balasubramanian2", "gender": ";M;F;M", "homepage": ";https://www.cs.stonybrook.edu/~jishang;https://sites.google.com/site/yifansunwebsite/;http://www3.cs.stonybrook.edu/~niranjan/", "dblp": ";218/7364;https://dblp.uni-trier.de/pid/99/10261-1;40/1931", "google_scholar": ";gMvLIDUAAAAJ;o3fSb1YAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0001-7301-5981;;", "linkedin": ";;;", "or_profile": "~Xueying_Bai1;~Jinghuan_Shang1;~Yifan_Sun1;~Niranjan_Balasubramanian2", "aff": ";Department of Computer Science, State University of New York, Stony Brook;State University of New York, Stony Brook;State University of New York, Stony Brook", "aff_domain": ";cs.stonybrook.edu;stonybrook.edu;stonybrook.edu", "position": ";PhD student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nbai2024continual,\ntitle={Continual Learning with Global Alignment},\nauthor={Xueying Bai and Jinghuan Shang and Yifan Sun and Niranjan Balasubramanian},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4vp0edVY4o}\n}", "github": "", "reviewers": "NuNi;R2vv;LhK3;Ep6T", "pdf_size": 903335, "rating": "5;6;6;7", "confidence": "4;5;3;3", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "1;3;3;4", "wc_summary": "152;50;157;136", "wc_strengths": "94;51;54;31", "wc_weaknesses": "161;246;187;336", "wc_questions": "344;4;267;2", "wc_limitations": "40;1;10;33", "wc_review": "791;352;675;538", "wc_reply_reviewers": "199;74;85;195", "wc_reply_authors": "537;32;183;76", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 123.75, 43.280336181688796 ], "wc_strengths_avg": [ 57.5, 22.85278976405288 ], "wc_weaknesses_avg": [ 232.5, 67.22536723588797 ], "wc_questions_avg": [ 154.25, 153.68209882741712 ], "wc_limitations_avg": [ 21.0, 16.015617378046965 ], "wc_review_avg": [ 589.0, 163.53134256160195 ], "wc_reply_reviewers_avg": [ 138.25, 58.89556434910867 ], "wc_reply_authors_avg": [ 207.0, 198.28136574070697 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.4264014327112209, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:2VcVAO0KWuYJ:scholar.google.com/&scioq=Continual+Learning+with+Global+Alignment&hl=en&as_sdt=0,34", "gs_version_total": 2, "email": ";cs.stonybrook.edu;stonybrook.edu;stonybrook.edu", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "State University of New York", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.stonybrook.edu", "aff_unique_abbr": "SUNY Stony Brook", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stony Brook", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "LCGen: Mining in Low-Certainty Generation for View-consistent Text-to-3D", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96624", "id": "4wgzkAyi2D", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4wgzkAyi2D", "openreview": "https://openreview.net/forum?id=4wgzkAyi2D", "poster": "/media/PosterPDFs/NeurIPS%202024/96624.png?t=1731732637.9152777", "project": "", "author_site": "Zeng Tao, Tong Yang, Junxiong Lin, Xinji Mai, Haoran Wang, Beining Wang, Enyu Zhou, Yan Wang, Wenqiang Zhang", "tldr": "", "abstract": "The Janus Problem is a common issue in SDS-based text-to-3D methods. Due to view encoding approach and 2D diffusion prior guidance, the 3D representation model tends to learn content with higher certainty from each perspective, leading to view inconsistency. In this work, we first model and analyze the problem, visualizing the specific causes of the Janus Problem, which are associated with discrete view encoding and shared priors in 2D lifting. Based on this, we further propose the LCGen method, which guides text-to-3D to obtain different priors with different certainty from various viewpoints, aiding in view-consistent generation. Experiments have proven that our LCGen method can be directly applied to different SDS-based text-to-3D methods, alleviating the Janus Problem without introducing additional information, increasing excessive training burden, or compromising the generation effect.", "keywords": "Text-to-3D;Janus Problem;Certainty;Diffusion;NeRF", "primary_area": "generative_models", "supplementary_material": "", "author": "Zeng Tao;Tong Yang;Junxiong Lin;Xinji Mai;Haoran Wang;Beining Wang;Enyu Zhou;Yan Wang;Wenqiang Zhang", "authorids": "~Zeng_Tao1;~Tong_Yang2;~Junxiong_Lin1;~Xinji_Mai1;~Haoran_Wang16;~Beining_Wang2;~Enyu_Zhou1;~Yan_Wang20;~Wenqiang_Zhang1", "gender": "M;M;M;M;M;M;F;;M", "homepage": "http://www.fudanroilab.com/;;http://www.fudanroilab.com;http://www.fudanroilab.com/2017/09/22/XinjiMai.html;http://www.fudanroilab.com/people.html;https://github.com/lan-creator;https://zhou-zoey.github.io;https://wangyanckxx.github.io/;https://www.fudanroilab.com/2021/07/01/WenqiangZhang.html", "dblp": "07/10183;;298/9184;;;;;59/2227-68.html;", "google_scholar": "cO0eUtAAAAAJ;yu7ijD0AAAAJ;https://scholar.google.com.hk/citations?user=dR4E0aUAAAAJ;;;;gWs_6egAAAAJ;RQSDgFkAAAAJ;vL-VEJYAAAAJ", "orcid": "0009-0006-2998-6709;;;0009-0003-4596-5391;;;;0000-0002-4953-2660;0000-0002-3339-8751", "linkedin": ";;;;;;;;", "or_profile": "~Zeng_Tao1;~Tong_Yang2;~Junxiong_Lin1;~Xinji_Mai1;~Haoran_Wang16;~Beining_Wang2;~Enyu_Zhou1;~Yan_Wang20;~Wenqiang_Zhang1", "aff": "Fudan University;Megvii Technology Inc.;Fudan University;Fudan University;Fudan University;Fudan University;Fudan University;Fudan University;Fudan University", "aff_domain": "fudan.edu.cn;megvii.com;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn", "position": "MS student;Researcher;MS student;MS student;MS student;MS student;PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\ntao2024lcgen,\ntitle={{LCG}en: Mining in Low-Certainty Generation for View-consistent Text-to-3D},\nauthor={Zeng Tao and Tong Yang and Junxiong Lin and Xinji Mai and Haoran Wang and Beining Wang and Enyu Zhou and Yan Wang and Wenqiang Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4wgzkAyi2D}\n}", "github": "", "reviewers": "sRbF;FWvW;wiP5", "pdf_size": 14478413, "rating": "5;6;7", "confidence": "5;2;1", "soundness": "1;3;3", "novelty": "2;3;3", "presentation": "2;3;3", "wc_summary": "35;94;89", "wc_strengths": "43;73;54", "wc_weaknesses": "111;118;72", "wc_questions": "41;32;2", "wc_limitations": "153;28;1", "wc_review": "383;345;218", "wc_reply_reviewers": "0;22;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;1;0", "reply_authors": "1;1;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 2.6666666666666665, 1.699673171197595 ], "soundness_avg": [ 2.3333333333333335, 0.9428090415820634 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 72.66666666666667, 26.71246067953223 ], "wc_strengths_avg": [ 56.666666666666664, 12.39175353029407 ], "wc_weaknesses_avg": [ 100.33333333333333, 20.23747898221405 ], "wc_questions_avg": [ 25.0, 16.673332000533065 ], "wc_limitations_avg": [ 60.666666666666664, 66.21345952464819 ], "wc_review_avg": [ 315.3333333333333, 70.55179342550801 ], "wc_reply_reviewers_avg": [ 7.333333333333333, 10.370899457402697 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.9607689228305226, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:yJ09ZTat1DYJ:scholar.google.com/&scioq=LCGen:+Mining+in+Low-Certainty+Generation+for+View-consistent+Text-to-3D&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "fudan.edu.cn;megvii.com;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn", "author_num": 9, "aff_unique_index": "0;1;0;0;0;0;0;0;0", "aff_unique_norm": "Fudan University;Megvii Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.fudan.edu.cn;https://www.megvii.com", "aff_unique_abbr": "Fudan;Megvii", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "NoMAD-Attention: Efficient LLM Inference on CPUs Through Multiply-add-free Attention", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96623", "id": "4xDxVQHsbZ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4xDxVQHsbZ", "openreview": "https://openreview.net/forum?id=4xDxVQHsbZ", "poster": "", "project": "", "author_site": "Tianyi Zhang, Jonah Yi, Bowen Yao, Zhaozhuo Xu, Anshumali Shrivastava", "tldr": "", "abstract": "Large Language Model (LLM) inference on Central Processing Units (CPU) is challenging due to the vast quantities of Multiply-Add (MAD) matrix operations in the attention computations. This paper highlights a rare gem in modern CPUs, Single-Instruction-Multiple-Data (SIMD) registers, which allows for ultra-low-latency lookups in a batch. We leverage this unique capability to propose NoMAD-Attention, an efficient attention algorithm that replaces MAD operations with in-register lookups. Through hardware-aware algorithmic designs, NoMAD-Attention achieves the computation of attention scores using repeated fast accesses to SIMD registers. NoMAD-Attention works with pre-trained attention-based LLMs without model finetuning. Extensive empirical evaluations demonstrate that NoMAD-Attention maintains the quality of the original LLMs well and speeds up the 4-bit quantized LLaMA-7B-based model by up to $2 \\times$ at 16k context length.", "keywords": "large language model;efficiency;CPU inference;attention", "primary_area": "other", "supplementary_material": "", "author": "Tianyi Zhang;Jonah Wonkyu Yi;Bowen Yao;Zhaozhuo Xu;Anshumali Shrivastava", "authorids": "~Tianyi_Zhang6;~Jonah_Wonkyu_Yi1;~Bowen_Yao3;~Zhaozhuo_Xu1;~Anshumali_Shrivastava1", "gender": "M;M;M;M;M", "homepage": "https://github.com/tonyzhang617;https://www.jonahyi.com/;;https://ottovonxu.github.io/;https://www.cs.rice.edu/~as143/", "dblp": "17/322-11.html;;;195/4352;63/9828", "google_scholar": "ekRl428AAAAJ;;;7tDlVAsAAAAJ;https://scholar.google.com.tw/citations?user=SGT23RAAAAAJ", "orcid": ";;;;", "linkedin": ";jonah-w-yi;bowenyao18;;", "or_profile": "~Tianyi_Zhang6;~Jonah_Wonkyu_Yi1;~Bowen_Yao3;~Zhaozhuo_Xu1;~Anshumali_Shrivastava1", "aff": "Rice University;Rice University;Rice University;Stevens Institute of Technology;ThirdAI Corp.", "aff_domain": "rice.edu;rice.edu;rice.edu;stevens.edu;thirdai.com", "position": "PhD student;Undergrad student;Undergrad student;Assistant Professor;CEO", "bibtex": "@inproceedings{\nzhang2024nomadattention,\ntitle={No{MAD}-Attention: Efficient {LLM} Inference on {CPU}s Through Multiply-add-free Attention},\nauthor={Tianyi Zhang and Jonah Wonkyu Yi and Bowen Yao and Zhaozhuo Xu and Anshumali Shrivastava},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4xDxVQHsbZ}\n}", "github": "", "reviewers": "hE7X;CZ4j;QJwg;z7Yd", "pdf_size": 1139749, "rating": "6;6;6;7", "confidence": "4;5;3;4", "soundness": "3;3;3;4", "novelty": "3;2;2;4", "presentation": "3;3;2;4", "wc_summary": "15;123;126;37", "wc_strengths": "28;48;42;82", "wc_weaknesses": "21;115;154;68", "wc_questions": "3;2;3;17", "wc_limitations": "23;1;7;1", "wc_review": "90;289;332;205", "wc_reply_reviewers": "16;20;0;15", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 75.25, 49.87171041783107 ], "wc_strengths_avg": [ 50.0, 19.849433241279208 ], "wc_weaknesses_avg": [ 89.5, 49.912423303221814 ], "wc_questions_avg": [ 6.25, 6.219927652312364 ], "wc_limitations_avg": [ 8.0, 9.0 ], "wc_review_avg": [ 229.0, 92.33904916122972 ], "wc_reply_reviewers_avg": [ 12.75, 7.595228765481656 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17017615354822248478&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "rice.edu;rice.edu;rice.edu;stevens.edu;thirdai.com", "author_num": 5, "aff_unique_index": "0;0;0;1;2", "aff_unique_norm": "Rice University;Stevens Institute of Technology;ThirdAI Corp.", "aff_unique_dep": ";;", "aff_unique_url": "https://www.rice.edu;https://www.stevens.edu;", "aff_unique_abbr": "Rice;SIT;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Motion Graph Unleashed: A Novel Approach to Video Prediction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96622", "id": "4ztP4PujOG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=4ztP4PujOG", "openreview": "https://openreview.net/forum?id=4ztP4PujOG", "poster": "/media/PosterPDFs/NeurIPS%202024/96622.png?t=1731696872.4563985", "project": "", "author_site": "Yiqi Zhong, Luming Liang, Bohan Tang, Ilya Zharkov, Ulrich Neumann", "tldr": "", "abstract": "We introduce motion graph, a novel approach to address the video prediction problem, i.e., predicting future video frames from limited past data. The motion graph transforms patches of video frames into interconnected graph nodes, to comprehensively describe the spatial-temporal relationships among them. This representation overcomes the limitations of existing motion representations such as image differences, optical flow, and motion matrix that either fall short in capturing complex motion patterns or suffer from excessive memory consumption. We further present a video prediction pipeline empowered by motion graph, exhibiting substantial performance improvements and cost reductions. Extensive experiments on various datasets, including UCF Sports, KITTI and Cityscapes, highlight the strong representative ability of motion graph. Especially on UCF Sports, our method matches and outperforms the SOTA methods with a significant reduction in model size by 78% and a substantial decrease in GPU memory utilization by 47%.", "keywords": "Video;Motion;Low-level computer vision;Frame synthesis", "primary_area": "machine_vision", "supplementary_material": "/attachment/1a1eb30246afae5ce60a42555e12032f4ad10d4c.zip", "author": "Yiqi Zhong;Luming Liang;Bohan Tang;Ilya Zharkov;Ulrich Neumann", "authorids": "~Yiqi_Zhong1;~Luming_Liang2;~Bohan_Tang1;~Ilya_Zharkov1;~Ulrich_Neumann1", "gender": "F;M;M;M;", "homepage": ";;https://github.com/tbh-98;;", "dblp": "243/3355;46/6624;304/8908;217/3421;", "google_scholar": "Bv8l8jkAAAAJ;vTgdAS4AAAAJ;;;", "orcid": ";;;;", "linkedin": ";luming-liang-76185b19/;;;", "or_profile": "~Yiqi_Zhong1;~Luming_Liang2;~Bohan_Tang1;~Ilya_Zharkov1;~Ulrich_Neumann1", "aff": "Microsoft;Microsoft;University of Oxford;Microsoft;", "aff_domain": "microsoft.com;microsoft.com;ox.ac.uk;microsoft.com;", "position": "Researcher;Principal Researcher;PhD student;Principal Research Manager;", "bibtex": "@inproceedings{\nzhong2024motion,\ntitle={Motion Graph Unleashed: A Novel Approach to Video Prediction},\nauthor={Yiqi Zhong and Luming Liang and Bohan Tang and Ilya Zharkov and Ulrich Neumann},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=4ztP4PujOG}\n}", "github": "", "reviewers": "V8rg;gkX2;Vf8f;Mw8x", "pdf_size": 26046728, "rating": "5;5;7;7", "confidence": "4;2;3;4", "soundness": "3;3;3;4", "novelty": "2;3;3;4", "presentation": "4;2;4;2", "wc_summary": "23;69;48;54", "wc_strengths": "13;39;45;34", "wc_weaknesses": "54;40;54;505", "wc_questions": "1;40;133;2", "wc_limitations": "6;11;35;8", "wc_review": "97;199;315;603", "wc_reply_reviewers": "95;0;0;28", "wc_reply_authors": "85;0;0;24", "reply_reviewers": "1;0;0;1", "reply_authors": "2;1;1;2", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 1.0 ], "wc_summary_avg": [ 48.5, 16.590660023037056 ], "wc_strengths_avg": [ 32.75, 12.04937757728589 ], "wc_weaknesses_avg": [ 163.25, 197.3922174251052 ], "wc_questions_avg": [ 44.0, 53.73546315051169 ], "wc_limitations_avg": [ 15.0, 11.683321445547923 ], "wc_review_avg": [ 303.5, 189.33766133550927 ], "wc_reply_reviewers_avg": [ 30.75, 38.81607270191048 ], "wc_reply_authors_avg": [ 27.25, 34.751798514609284 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11824815915662320014&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "microsoft.com;microsoft.com;ox.ac.uk;microsoft.com;", "author_num": 5, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Microsoft;University of Oxford", "aff_unique_dep": "Microsoft Corporation;", "aff_unique_url": "https://www.microsoft.com;https://www.ox.ac.uk", "aff_unique_abbr": "Microsoft;Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Exploring Low-Dimensional Subspace in Diffusion Models for Controllable Image Editing", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96621", "id": "50aOEfb2km", "proceeding": "", "pdf": "https://openreview.net/pdf?id=50aOEfb2km", "openreview": "https://openreview.net/forum?id=50aOEfb2km", "poster": "/media/PosterPDFs/NeurIPS%202024/96621.png?t=1731348231.7331488", "project": "", "author_site": "Siyi Chen, Huijie Zhang, Minzhe Guo, Yifu Lu, Peng Wang, Qing Qu", "tldr": "", "abstract": "Recently, diffusion models have emerged as a powerful class of generative models. \nDespite their success, there is still limited understanding of their semantic spaces. This makes it challenging to achieve precise and disentangled image generation without additional training, especially in an unsupervised way. \nIn this work, we improve the understanding of their semantic spaces from intriguing observations: among a certain range of noise levels, (1) the learned posterior mean predictor (PMP) in the diffusion model is locally linear, and (2) the singular vectors of its Jacobian lie in low-dimensional semantic subspaces. We provide a solid theoretical basis to justify the linearity and low-rankness in the PMP. These insights allow us to propose an unsupervised, single-step, training-free **LO**w-rank **CO**ntrollable image editing (LOCO Edit) method for precise local editing in diffusion models. LOCO Edit identified editing directions with nice properties: homogeneity, transferability, composability, and linearity. These properties of LOCO Edit benefit greatly from the low-dimensional semantic subspace.\nOur method can further be extended to unsupervised or text-supervised editing in various text-to-image diffusion models (T-LOCO Edit). Finally, extensive empirical experiments demonstrate the effectiveness and efficiency of LOCO Edit. The code and the arXiv version can be found on the [project website](https://chicychen.github.io/LOCO).", "keywords": "diffusion model;precise image manipulation;low-rank", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Siyi Chen;Huijie Zhang;Minzhe Guo;Yifu Lu;Peng Wang;Qing Qu", "authorids": "~Siyi_Chen1;~Huijie_Zhang2;~Minzhe_Guo1;~Yifu_Lu1;~Peng_Wang23;~Qing_Qu2", "gender": "F;M;M;;M;M", "homepage": "https://chicychen.github.io/;https://www.huijiezh.com/;;;https://peng8wang.github.io/;https://qingqu.engin.umich.edu/", "dblp": "195/3147-3;;;;95/4442-98;127/6874-1", "google_scholar": "j65QlFkAAAAJ;https://scholar.google.com/citations?view_op=list_works;;ybsmKpsAAAAJ;baF3HKUAAAAJ;JfblW3MAAAAJ", "orcid": "0000-0002-3304-4486;;;;0000-0002-6799-0745;0000-0001-9136-558X", "linkedin": "siyi-chen-a25a30206/;;minzhe-guo/;yifu-lu-3547b321b;;qing-q-1a0b9746/", "or_profile": "~Siyi_Chen1;~Huijie_Zhang2;~Minzhe_Guo1;~Yifu_Lu1;~Peng_Wang23;~Qing_Qu2", "aff": "University of Michigan - Ann Arbor;University of Michigan - Ann Arbor;University of Michigan - Ann Arbor;University of Michigan - Ann Arbor;University of Michigan - Ann Arbor;University of Michigan", "aff_domain": "umich.edu;umich.edu;umich.edu;umich.edu;umich.edu;umich.edu", "position": "PhD student;PhD student;Undergrad student;Undergrad student;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nchen2024exploring,\ntitle={Exploring Low-Dimensional Subspace in Diffusion Models for Controllable Image Editing},\nauthor={Siyi Chen and Huijie Zhang and Minzhe Guo and Yifu Lu and Peng Wang and Qing Qu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=50aOEfb2km}\n}", "github": "", "reviewers": "hJrH;pgJR;h6gb;eLFT;9wnQ", "pdf_size": 18132548, "rating": "5;5;5;6;7", "confidence": "4;3;4;4;4", "soundness": "2;3;3;3;4", "novelty": "2;3;2;3;3", "presentation": "2;3;3;3;3", "wc_summary": "68;96;105;47;198", "wc_strengths": "12;144;37;54;113", "wc_weaknesses": "117;106;193;19;214", "wc_questions": "189;87;3;89;103", "wc_limitations": "61;41;34;7;25", "wc_review": "447;474;372;216;653", "wc_reply_reviewers": "60;163;0;0;113", "wc_reply_authors": "236;392;0;0;167", "reply_reviewers": "1;1;0;0;1", "reply_authors": "3;2;1;1;2", "rating_avg": [ 5.6, 0.8 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 102.8, 51.84361098534708 ], "wc_strengths_avg": [ 72.0, 49.01836390578535 ], "wc_weaknesses_avg": [ 129.8, 69.41296708828979 ], "wc_questions_avg": [ 94.2, 59.09450059015644 ], "wc_limitations_avg": [ 33.6, 17.816845961056067 ], "wc_review_avg": [ 432.4, 142.18663790947446 ], "wc_reply_reviewers_avg": [ 67.2, 63.81034398904303 ], "wc_reply_authors_avg": [ 159.0, 148.89190710041967 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.375, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15736830504120093060&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 3, "email": "umich.edu;umich.edu;umich.edu;umich.edu;umich.edu;umich.edu", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "University of Michigan", "aff_unique_dep": "", "aff_unique_url": "https://www.umich.edu", "aff_unique_abbr": "UM", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Ann Arbor;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Gaussian Process Bandits for Top-k Recommendations", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96620", "id": "50nEnmVLRb", "proceeding": "", "pdf": "https://openreview.net/pdf?id=50nEnmVLRb", "openreview": "https://openreview.net/forum?id=50nEnmVLRb", "poster": "/media/PosterPDFs/NeurIPS%202024/96620.png?t=1734026244.7670786", "project": "", "author_site": "Mohit Yadav, Cameron Musco, Daniel Sheldon", "tldr": "", "abstract": "Algorithms that utilize bandit feedback to optimize top-k recommendations are vital for online marketplaces, search engines, and content platforms. However, the combinatorial nature of this problem poses a significant challenge, as the possible number of ordered top-k recommendations from $n$ items grows exponentially with $k$. As a result, previous work often relies on restrictive assumptions about the reward or bandit feedback models, such as assuming that the feedback discloses rewards for each recommended item rather than a single scalar feedback for the entire set of top-k recommendations. We introduce a novel contextual bandit algorithm for top-k recommendations, leveraging a Gaussian process with a Kendall kernel to model the reward function.\nOur algorithm requires only scalar feedback from \nthe top-k recommendations and does not impose restrictive assumptions on the reward structure. \nTheoretical analysis confirms that the proposed algorithm achieves sub-linear regret in relation to the number of rounds and arms. Additionally, empirical results using a bandit simulator demonstrate that the proposed algorithm outperforms other baselines across various scenarios.", "keywords": "Gaussian processes;Bandit algorithms;Top-k recommendations;Linear Algebra;Iterative Algorithms", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Mohit Yadav;Cameron N Musco;Daniel Sheldon", "authorids": "~Mohit_Yadav2;~Cameron_N_Musco1;~Daniel_Sheldon1", "gender": "M;M;M", "homepage": ";https://people.cs.umass.edu/~cmusco/;https://people.cs.umass.edu/~sheldon/", "dblp": ";149/2327;58/766", "google_scholar": "https://scholar.google.co.in/citations?user=tPoZrDMAAAAJ;EeYGZCwAAAAJ;https://scholar.google.com.tw/citations?user=P1bHFuoAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Mohit_Yadav2;~Cameron_N_Musco1;~Dan_Sheldon1", "aff": "PythiaLabs Inc;University of Massachusetts, Amherst;University of Massachusetts, Amherst", "aff_domain": "pythialabs.com;umass.edu;umass.edu", "position": "Researcher;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nyadav2024gaussian,\ntitle={Gaussian Process Bandits for Top-k Recommendations},\nauthor={Mohit Yadav and Cameron N Musco and Daniel Sheldon},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=50nEnmVLRb}\n}", "github": "", "reviewers": "DrUM;9Xg2;N1XB;tLdn", "pdf_size": 2725814, "rating": "5;5;6;8", "confidence": "3;3;3;3", "soundness": "2;3;3;4", "novelty": "3;2;2;4", "presentation": "3;4;3;4", "wc_summary": "75;84;133;81", "wc_strengths": "62;65;15;79", "wc_weaknesses": "154;190;19;345", "wc_questions": "43;88;349;8", "wc_limitations": "7;1;3;1", "wc_review": "341;428;519;514", "wc_reply_reviewers": "119;14;4;0", "wc_reply_authors": "80;0;0;0", "reply_reviewers": "2;1;1;0", "reply_authors": "2;1;1;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 93.25, 23.177305710543667 ], "wc_strengths_avg": [ 55.25, 24.107830678018296 ], "wc_weaknesses_avg": [ 177.0, 116.06679111615001 ], "wc_questions_avg": [ 122.0, 134.09138674799362 ], "wc_limitations_avg": [ 3.0, 2.449489742783178 ], "wc_review_avg": [ 450.5, 72.83714711601492 ], "wc_reply_reviewers_avg": [ 34.25, 49.19540120783649 ], "wc_reply_authors_avg": [ 20.0, 34.64101615137755 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:2HT41mgFODYJ:scholar.google.com/&scioq=Gaussian+Process+Bandits+for+Top-k+Recommendations&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "pythialabs.com;umass.edu;umass.edu", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "PythiaLabs;University of Massachusetts Amherst", "aff_unique_dep": ";", "aff_unique_url": ";https://www.umass.edu", "aff_unique_abbr": "PythiaLabs;UMass Amherst", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Amherst", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "DiTFastAttn: Attention Compression for Diffusion Transformer Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96619", "id": "51HQpkQy3t", "proceeding": "", "pdf": "https://openreview.net/pdf?id=51HQpkQy3t", "openreview": "https://openreview.net/forum?id=51HQpkQy3t", "poster": "/media/PosterPDFs/NeurIPS%202024/96619.png?t=1732421483.4727526", "project": "", "author_site": "Zhihang Yuan, Hanling Zhang, Lu Pu, Xuefei Ning, Linfeng Zhang, Tianchen Zhao, Shengen Yan, Guohao Dai, Yu Wang", "tldr": "", "abstract": "Diffusion Transformers (DiT) excel at image and video generation but face computational challenges due to the quadratic complexity of self-attention operators. We propose DiTFastAttn, a post-training compression method to alleviate the computational bottleneck of DiT.\nWe identify three key redundancies in the attention computation during DiT inference: (1) spatial redundancy, where many attention heads focus on local information; (2) temporal redundancy, with high similarity between the attention outputs of neighboring steps; (3) conditional redundancy, where conditional and unconditional inferences exhibit significant similarity. We propose three techniques to reduce these redundancies: (1) $\\textit{Window Attention with Residual Sharing}$ to reduce spatial redundancy; (2) $\\textit{Attention Sharing across Timesteps}$ to exploit the similarity between steps; (3) $\\textit{Attention Sharing across CFG}$ to skip redundant computations during conditional generation.", "keywords": "Diffusion Transformer;Attention;Acceleration", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/e9d2240129ce210ae11087fddf2eb2d527ed3b0d.zip", "author": "Zhihang Yuan;Hanling Zhang;Lu Pu;Xuefei Ning;Linfeng Zhang;Tianchen Zhao;Shengen Yan;Guohao Dai;Yu Wang", "authorids": "~Zhihang_Yuan1;~Hanling_Zhang3;~Lu_Pu2;~Xuefei_Ning1;~Linfeng_Zhang2;~Tianchen_Zhao2;~Shengen_Yan1;~Guohao_Dai4;~Yu_Wang3", "gender": "M;M;M;Not Specified;M;M;M;M;M", "homepage": "http://zhihang.cc;;https://mails.tsinghua.edu.cn/coremail/XT5/index.jsp?sid=EARBqYhhWeyDpdrGLOyqVEnKkZZvTTeM#mail.read%7C%7B%22fid%22%3A1%2C%22mid%22%3A%222%3A1tbiAgAQC2SBCrl43gAAsp%22%2C%22mboxa%22%3A%22%22%2C%22start%22%3A0%7D;https://nics-effalg.com/ningxuefei/;http://www.zhanglinfeng.tech/;https://nicsefc.ee.tsinghua.edu.cn/people/tianchen-zhao/;;https://nicsefc.ee.tsinghua.edu.cn/people/guohao-dai/;https://nicsefc.ee.tsinghua.edu.cn", "dblp": "195/4180;;;202/9525;93/488-1;217/2471;117/6968;147/1470;w/YuWang2.html", "google_scholar": "https://scholar.google.ca/citations?user=iipYHLoAAAAJ;;;oVslpJsAAAAJ;AK9VF30AAAAJ;;SvE3bdUAAAAJ;gz3Tkl0AAAAJ;https://scholar.google.com.hk/citations?user=j8JGVvoAAAAJ", "orcid": ";;;;0000-0002-3341-183X;;;;0000-0001-6108-5157", "linkedin": ";hanlingz/;;;;;;;", "or_profile": "~Zhihang_Yuan1;~Hanling_Zhang3;~Lu_Pu2;~Xuefei_Ning1;~Linfeng_Zhang2;~Tianchen_Zhao2;~Shengen_Yan1;~Guohao_Dai4;~Yu_Wang3", "aff": "Houmo AI;Infinigence AI ;Tsinghua University;Tsinghua University;Tsinghua University;Infinigence;Tsinghua University;Shanghai Jiaotong University;Tsinghua University", "aff_domain": "houmo.ai;infinigence.ai;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;infini-ai.com;tsinghua.edu.cn;sjtu.edu.cn;tsinghua.edu.cn", "position": "Researcher;Intern;MS student;Research Assistant Professor;PhD student;Intern;Associate Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nyuan2024ditfastattn,\ntitle={Di{TF}astAttn: Attention Compression for Diffusion Transformer Models},\nauthor={Zhihang Yuan and Hanling Zhang and Lu Pu and Xuefei Ning and Linfeng Zhang and Tianchen Zhao and Shengen Yan and Guohao Dai and Yu Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=51HQpkQy3t}\n}", "github": "", "reviewers": "8Fw6;e43K;CWF1;5A6V", "pdf_size": 7960780, "rating": "5;5;5;7", "confidence": "4;3;4;4", "soundness": "3;2;2;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "44;67;51;54", "wc_strengths": "14;42;56;248", "wc_weaknesses": "111;185;62;94", "wc_questions": "2;67;436;459", "wc_limitations": "2;9;15;13", "wc_review": "173;370;620;868", "wc_reply_reviewers": "11;49;819;320", "wc_reply_authors": "449;92;1467;387", "reply_reviewers": "1;1;3;2", "reply_authors": "3;2;5;3", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 54.0, 8.336666000266533 ], "wc_strengths_avg": [ 90.0, 92.46621004453465 ], "wc_weaknesses_avg": [ 113.0, 45.13867521316947 ], "wc_questions_avg": [ 241.0, 207.9338837226872 ], "wc_limitations_avg": [ 9.75, 4.968651728587948 ], "wc_review_avg": [ 507.75, 261.44442526089557 ], "wc_reply_reviewers_avg": [ 299.75, 322.59988763172254 ], "wc_reply_authors_avg": [ 598.75, 519.113848303048 ], "reply_reviewers_avg": [ 1.75, 0.82915619758885 ], "reply_authors_avg": [ 3.25, 1.0897247358851685 ], "replies_avg": [ 31, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3589035779907731954&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 7, "email": "houmo.ai;infinigence.ai;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;infini-ai.com;tsinghua.edu.cn;sjtu.edu.cn;tsinghua.edu.cn", "author_num": 9, "aff_unique_index": "0;1;2;2;2;3;2;4;2", "aff_unique_norm": "Houmo AI;Infinigence AI;Tsinghua University;Infinigence;Shanghai Jiao Tong University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.houmo.ai;;https://www.tsinghua.edu.cn;;https://www.sjtu.edu.cn", "aff_unique_abbr": "Houmo AI;;THU;;SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China;" }, { "title": "Bootstrapping Top-down Information for Self-modulating Slot Attention", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96618", "id": "52PTSrAQQM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=52PTSrAQQM", "openreview": "https://openreview.net/forum?id=52PTSrAQQM", "poster": "/media/PosterPDFs/NeurIPS%202024/96618.png?t=1733854225.7724252", "project": "", "author_site": "Dongwon Kim, Seoyeon Kim, Suha Kwak", "tldr": "", "abstract": "Object-centric learning (OCL) aims to learn representations of individual objects within visual scenes without manual supervision, facilitating efficient and effective visual reasoning. Traditional OCL methods primarily employ bottom-up approaches that aggregate homogeneous visual features to represent objects. However, in complex visual environments, these methods often fall short due to the heterogeneous nature of visual features within an object. To address this, we propose a novel OCL framework incorporating a top-down pathway. This pathway first bootstraps the semantics of individual objects and then modulates the model to prioritize features relevant to these semantics. By dynamically modulating the model based on its own output, our top-down pathway enhances the representational quality of objects. Our framework achieves state-of-the-art performance across multiple synthetic and real-world object-discovery benchmarks.", "keywords": "object-centric learning;object discovery;slot attention", "primary_area": "machine_vision", "supplementary_material": "", "author": "Dongwon Kim;Seoyeon Kim;Suha Kwak", "authorids": "~Dongwon_Kim1;~Seoyeon_Kim1;~Suha_Kwak3", "gender": "M;;M", "homepage": "https://kdwonn.github.io;;https://suhakwak.github.io/", "dblp": "53/471;;65/6173", "google_scholar": "abXotYsAAAAJ;;-gscDIEAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Dongwon_Kim1;~Seoyeon_Kim1;~Suha_Kwak3", "aff": "POSTECH;;POSTECH", "aff_domain": "postech.ac.kr;;postech.ac.kr", "position": "PhD student;;Associate Professor", "bibtex": "@inproceedings{\nkim2024bootstrapping,\ntitle={Bootstrapping Top-down Information for Self-modulating Slot Attention},\nauthor={Dongwon Kim and Seoyeon Kim and Suha Kwak},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=52PTSrAQQM}\n}", "github": "", "reviewers": "PJTV;qEia;3ckb;Yith", "pdf_size": 2860817, "rating": "5;6;6;7", "confidence": "4;4;4;3", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "125;91;187;85", "wc_strengths": "33;67;119;51", "wc_weaknesses": "286;83;458;149", "wc_questions": "28;28;92;89", "wc_limitations": "18;25;1;1", "wc_review": "490;294;857;375", "wc_reply_reviewers": "48;209;59;171", "wc_reply_authors": "48;145;51;51", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 122.0, 40.50925820105819 ], "wc_strengths_avg": [ 67.5, 32.07413287993925 ], "wc_weaknesses_avg": [ 244.0, 143.6192883981814 ], "wc_questions_avg": [ 59.25, 31.267994818983837 ], "wc_limitations_avg": [ 11.25, 10.54454835448157 ], "wc_review_avg": [ 504.0, 215.37525391743594 ], "wc_reply_reviewers_avg": [ 121.75, 69.66841106269038 ], "wc_reply_authors_avg": [ 73.75, 41.154434754956846 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:_duVoxeHiJYJ:scholar.google.com/&scioq=Bootstrapping+Top-down+Information+for+Self-modulating+Slot+Attention&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "postech.ac.kr;;postech.ac.kr", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Pohang University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.postech.ac.kr", "aff_unique_abbr": "POSTECH", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Pohang", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "title": "Improving Context-Aware Preference Modeling for Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96617", "id": "52r4XJYzjg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=52r4XJYzjg", "openreview": "https://openreview.net/forum?id=52r4XJYzjg", "poster": "", "project": "", "author_site": "Silviu Pitis, Ziang Xiao, Nicolas Le Roux, Alessandro Sordoni", "tldr": "", "abstract": "While finetuning language models from pairwise preferences has proven remarkably effective, the underspecified nature of natural language presents critical challenges. Direct preference feedback is uninterpretable, difficult to provide where multidimensional criteria may apply, and often inconsistent, either because it is based on incomplete instructions or provided by diverse principals. To address these challenges, we consider the two-step preference modeling procedure that first resolves the under-specification by selecting a context, and then evaluates preference with respect to the chosen context. We decompose reward modeling error according to these two steps, which suggests that supervising context in addition to context-specific preference may be a viable approach to aligning models with diverse human preferences. For this to work, the ability of models to evaluate context-specific preference is critical. To this end, we contribute context-conditioned preference datasets and accompanying experiments that investigate the ability of language models to evaluate context-specific preference. Unlike past datasets, where context-specific preference is highly correlated with general preference, our \"preference reversal\" datasets disentangle context-specific and general preferences to isolate context-specific capabilities. We use our datasets to (1) show that existing preference models benefit from, but fail to fully consider, added context, (2) finetune a context-aware reward model with context-specific performance exceeding that of GPT-4 and Llama 3 70B, and (3) investigate the potential value of context-aware preference modeling.", "keywords": "context-specific preference modeling;preference modeling;reward modeling;language modeling;rlaif;rlhf", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Silviu Pitis;Ziang Xiao;Nicolas Le Roux;Alessandro Sordoni", "authorids": "~Silviu_Pitis1;~Ziang_Xiao1;~Nicolas_Le_Roux2;~Alessandro_Sordoni2", "gender": "M;;M;M", "homepage": "https://silviupitis.com;;;http://nicolas.le-roux.name", "dblp": "https://dblp.org/pers/hd/p/Pitis:Silviu;196;57/7642;http://dblp.uni-trier.de/pers/hd/r/Roux:Nicolas_Le", "google_scholar": "oYlo1ycAAAAJ;MjkODLEAAAAJ;;https://scholar.google.fr/citations?user=LmKtwk8AAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Silviu_Pitis1;~Ziang_Xiao1;~Alessandro_Sordoni1;~Nicolas_Le_Roux1", "aff": ";Department of Computer Science, Whiting School of Engineering;Microsoft;Microsoft", "aff_domain": ";cs.jhu.edu;microsoft.com;microsoft.com", "position": ";Assistant Professor;Researcher;Researcher", "bibtex": "@inproceedings{\npitis2024improving,\ntitle={Improving Context-Aware Preference Modeling for Language Models},\nauthor={Silviu Pitis and Ziang Xiao and Nicolas Le Roux and Alessandro Sordoni},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=52r4XJYzjg}\n}", "github": "", "reviewers": "mLs1;j5YU;t1sE;rZtt", "pdf_size": 546681, "rating": "4;4;5;6", "confidence": "4;3;4;4", "soundness": "2;2;2;3", "novelty": "2;2;3;4", "presentation": "2;3;3;4", "wc_summary": "38;267;54;90", "wc_strengths": "28;45;30;106", "wc_weaknesses": "79;249;185;72", "wc_questions": "2;184;57;6", "wc_limitations": "8;1;4;1", "wc_review": "155;746;330;275", "wc_reply_reviewers": "0;0;138;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 4.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 112.25, 91.30820061746918 ], "wc_strengths_avg": [ 52.25, 31.72045869781835 ], "wc_weaknesses_avg": [ 146.25, 74.32151438177239 ], "wc_questions_avg": [ 62.25, 73.5607741938596 ], "wc_limitations_avg": [ 3.5, 2.8722813232690143 ], "wc_review_avg": [ 376.5, 222.5179768018755 ], "wc_reply_reviewers_avg": [ 34.5, 59.75575286112627 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10287466099316840138&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": ";cs.jhu.edu;microsoft.com;microsoft.com", "author_num": 4, "aff_unique_index": "0;1;1", "aff_unique_norm": "Johns Hopkins University;Microsoft", "aff_unique_dep": "Department of Computer Science;Microsoft Corporation", "aff_unique_url": "https://www.jhu.edu;https://www.microsoft.com", "aff_unique_abbr": "JHU;Microsoft", "aff_campus_unique_index": "0", "aff_campus_unique": "Baltimore;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "T2V-Turbo: Breaking the Quality Bottleneck of Video Consistency Model with Mixed Reward Feedback", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96616", "id": "53daI9kbvf", "proceeding": "", "pdf": "https://openreview.net/pdf?id=53daI9kbvf", "openreview": "https://openreview.net/forum?id=53daI9kbvf", "poster": "/media/PosterPDFs/NeurIPS%202024/96616.png?t=1733992112.5240517", "project": "", "author_site": "Jiachen Li, Weixi Feng, Tsu-Jui Fu, Xinyi Wang, S Basu, Wenhu Chen, William Yang Wang", "tldr": "", "abstract": "Diffusion-based text-to-video (T2V) models have achieved significant success but continue to be hampered by the slow sampling speed of their iterative sampling processes. To address the challenge, consistency models have been proposed to facilitate fast inference, albeit at the cost of sample quality. In this work, we aim to break the quality bottleneck of a video consistency model (VCM) to achieve **both fast and high-quality video generation**. We introduce T2V-Turbo, which integrates feedback from a mixture of differentiable reward models into the consistency distillation (CD) process of a pre-trained T2V model. Notably, we directly optimize rewards associated with single-step generations that arise naturally from computing the CD loss, effectively bypassing the memory constraints imposed by backpropagating gradients through an iterative sampling process. Remarkably, the 4-step generations from our T2V-Turbo achieve the highest total score on VBench, even surpassing Gen-2 and Pika. We further conduct human evaluations to corroborate the results, validating that the 4-step generations from our T2V-Turbo are preferred over the 50-step DDIM samples from their teacher models, representing more than a tenfold acceleration while improving video generation quality.", "keywords": "Text-to-Video generation; consistency model", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/d01ad1ed983f6f7306ac947b4c974ad14532ede7.zip", "author": "Jiachen Li;Weixi Feng;Tsu-Jui Fu;Xinyi Wang;S Basu;Wenhu Chen;William Yang Wang", "authorids": "~Jiachen_Li6;~Weixi_Feng2;~Tsu-Jui_Fu2;~Xinyi_Wang2;~S_Basu1;~Wenhu_Chen3;~William_Yang_Wang2", "gender": "M;M;M;F;M;M;M", "homepage": "https://sites.google.com/view/jiachenli/;https://weixi-feng.github.io/;https://tsujuifu.github.io;https://wangxinyilinda.github.io/;http://sugatobasu.com/;https://wenhuchen.github.io/;https://www.cs.ucsb.edu/~william/", "dblp": ";322/1026;218/5366.html;;76/5024;136/0957.html;08/9282", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=7QRDcC0AAAAJ;3vvbplcAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.co.jp/citations?user=U8ShbhUAAAAJ;gf8Ms_8AAAAJ", "orcid": ";0000-0002-7201-5688;;;;;", "linkedin": ";weixifeng/;tsujuifu1996;xinyi-wang-444385133/;;;", "or_profile": "~Jiachen_Li6;~Weixi_Feng2;~Tsu-Jui_Fu2;~Xinyi_Wang2;~S_Basu1;~wenhu_chen1;~William_Wang1", "aff": "University of California, Santa Barbara;University of California, Santa Barbara;UC Santa Barbara;International Business Machines;Google;University of Waterloo;UC Santa Barbara", "aff_domain": "ucsb.edu;ucsb.edu;ucsb.edu;ibm.com;google.com;uwaterloo.ca;ucsb.edu", "position": "PhD student;PhD student;PhD student;Intern;Researcher;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nli2024tvturbo,\ntitle={T2V-Turbo: Breaking the Quality Bottleneck of Video Consistency Model with Mixed Reward Feedback},\nauthor={Jiachen Li and Weixi Feng and Tsu-Jui Fu and Xinyi Wang and S Basu and Wenhu Chen and William Yang Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=53daI9kbvf}\n}", "github": "", "reviewers": "Kq5y;MMwk;61hZ;x94N;niV6", "pdf_size": 19054395, "rating": "4;4;6;7;7", "confidence": "4;4;3;4;4", "soundness": "3;2;4;3;4", "novelty": "3;2;2;4;3", "presentation": "3;3;4;4;3", "wc_summary": "40;64;96;99;83", "wc_strengths": "33;21;83;57;107", "wc_weaknesses": "154;141;146;122;77", "wc_questions": "20;4;63;114;61", "wc_limitations": "6;6;4;87;83", "wc_review": "253;236;392;479;411", "wc_reply_reviewers": "67;206;25;68;0", "wc_reply_authors": "433;845;42;17;394", "reply_reviewers": "1;2;1;1;0", "reply_authors": "4;5;2;2;1", "rating_avg": [ 5.6, 1.3564659966250536 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 76.4, 21.987269043698902 ], "wc_strengths_avg": [ 60.2, 31.612655693566783 ], "wc_weaknesses_avg": [ 128.0, 27.589853207293437 ], "wc_questions_avg": [ 52.4, 38.40104165253854 ], "wc_limitations_avg": [ 37.2, 39.055857435217064 ], "wc_review_avg": [ 354.2, 94.27915994534528 ], "wc_reply_reviewers_avg": [ 73.2, 71.2499824561382 ], "wc_reply_authors_avg": [ 346.2, 303.16028763675496 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.8, 1.469693845669907 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.14744195615489716, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6932911971744406159&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "ucsb.edu;ucsb.edu;ucsb.edu;ibm.com;google.com;uwaterloo.ca;ucsb.edu", "author_num": 7, "aff_unique_index": "0;0;0;1;2;3;0", "aff_unique_norm": "University of California, Santa Barbara;International Business Machines Corporation;Google;University of Waterloo", "aff_unique_dep": ";;Google;", "aff_unique_url": "https://www.ucsb.edu;https://www.ibm.com;https://www.google.com;https://uwaterloo.ca", "aff_unique_abbr": "UCSB;IBM;Google;UW", "aff_campus_unique_index": "0;0;0;2;0", "aff_campus_unique": "Santa Barbara;;Mountain View", "aff_country_unique_index": "0;0;0;0;0;1;0", "aff_country_unique": "United States;Canada" }, { "title": "Graph-enhanced Optimizers for Structure-aware Recommendation Embedding Evolution", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96614", "id": "55zLbH7dE1", "proceeding": "", "pdf": "https://openreview.net/pdf?id=55zLbH7dE1", "openreview": "https://openreview.net/forum?id=55zLbH7dE1", "poster": "/media/PosterPDFs/NeurIPS%202024/96614.png?t=1729067697.6629758", "project": "", "author_site": "Cong Xu, Jun Wang, Jianyong Wang, Wei Zhang", "tldr": "", "abstract": "Embedding plays a key role in modern recommender systems because they are virtual representations of real-world entities and the foundation for subsequent decision-making models. In this paper, we propose a novel embedding update mechanism, Structure-aware Embedding Evolution (SEvo for short), to encourage related nodes to evolve similarly at each step. Unlike GNN (Graph Neural Network) that typically serves as an intermediate module, SEvo is able to directly inject graph structural information into embedding with minimal computational overhead during training. The convergence properties of SEvo along with its potential variants are theoretically analyzed to justify the validity of the designs. Moreover, SEvo can be seamlessly integrated into existing optimizers for state-of-the-art performance. Particularly SEvo-enhanced AdamW with moment estimate correction demonstrates consistent improvements across a spectrum of models and datasets, suggesting a novel technical route to effectively utilize graph structural information beyond explicit GNN modules.", "keywords": "Embedding optimization;Recommender systems;Graph;Optimizers", "primary_area": "optimization_for_deep_networks", "supplementary_material": "/attachment/f795e3e4897abb6494fe001debb3764f333a0e08.zip", "author": "Cong Xu;Jun Wang;Jianyong Wang;Wei Zhang", "authorids": "~Cong_Xu4;~Jun_Wang4;~Jianyong_Wang2;~Wei_Zhang27", "gender": "M;M;M;M", "homepage": "https://github.com/MTandHJ;;http://dbgroup.cs.tsinghua.edu.cn/wangjy/;https://weizhangltt.github.io/", "dblp": ";;24/2006;10/4661-56", "google_scholar": ";;VfBaiG8AAAAJ;DKcduF0AAAAJ", "orcid": "0000-0002-9278-1363;;0000-0002-7555-170X;0000-0001-6763-8146", "linkedin": ";;;", "or_profile": "~Cong_Xu4;~Jun_Wang4;~Jianyong_Wang2;~Wei_Zhang27", "aff": "East China Normal University;;Tsinghua University;East China Normal University", "aff_domain": "ecnu.edu.cn;;tsinghua.edu.cn;ecnu.edu.cn", "position": "PhD student;;Full Professor;Full Professor", "bibtex": "@inproceedings{\nxu2024graphenhanced,\ntitle={Graph-enhanced Optimizers for Structure-aware Recommendation Embedding Evolution},\nauthor={Cong Xu and Jun Wang and Jianyong Wang and Wei Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=55zLbH7dE1}\n}", "github": "", "reviewers": "DLFs;9GVe;feMV;Um97", "pdf_size": 4412524, "rating": "5;6;6;6", "confidence": "3;4;4;3", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "2;4;3;4", "wc_summary": "84;72;66;81", "wc_strengths": "25;91;40;75", "wc_weaknesses": "13;47;144;95", "wc_questions": "111;20;60;71", "wc_limitations": "4;11;16;4", "wc_review": "237;241;326;326", "wc_reply_reviewers": "12;0;76;12", "wc_reply_authors": "20;0;22;16", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;2;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 75.75, 7.1545440106270926 ], "wc_strengths_avg": [ 57.75, 26.413774815425377 ], "wc_weaknesses_avg": [ 74.75, 49.4690559845243 ], "wc_questions_avg": [ 65.5, 32.407560846197605 ], "wc_limitations_avg": [ 8.75, 5.0682837331783235 ], "wc_review_avg": [ 282.5, 43.522982434571276 ], "wc_reply_reviewers_avg": [ 25.0, 29.8496231131986 ], "wc_reply_authors_avg": [ 14.5, 8.645808232895291 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1198767758893527096&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "ecnu.edu.cn;;tsinghua.edu.cn;ecnu.edu.cn", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "East China Normal University;Tsinghua University", "aff_unique_dep": ";", "aff_unique_url": "http://www.ecnu.edu.cn;https://www.tsinghua.edu.cn", "aff_unique_abbr": "ECNU;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Molecule Generation with Fragment Retrieval Augmentation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96613", "id": "56Q0qggDlp", "proceeding": "", "pdf": "https://openreview.net/pdf?id=56Q0qggDlp", "openreview": "https://openreview.net/forum?id=56Q0qggDlp", "poster": "/media/PosterPDFs/NeurIPS%202024/96613.png?t=1731733901.444945", "project": "", "author_site": "Seul Lee, Karsten Kreis, Srimukh Veccham, Meng Liu, Danny Reidenbach, Saee Paliwal, Arash Vahdat, Weili Nie", "tldr": "", "abstract": "Fragment-based drug discovery, in which molecular fragments are assembled into new molecules with desirable biochemical properties, has achieved great success. However, many fragment-based molecule generation methods show limited exploration beyond the existing fragments in the database as they only reassemble or slightly modify the given ones. To tackle this problem, we propose a new fragment-based molecule generation framework with retrieval augmentation, namely *Fragment Retrieval-Augmented Generation* (*f*-RAG). *f*-RAG is based on a pre-trained molecular generative model that proposes additional fragments from input fragments to complete and generate a new molecule. Given a fragment vocabulary, *f*-RAG retrieves two types of fragments: (1) *hard fragments*, which serve as building blocks that will be explicitly included in the newly generated molecule, and (2) *soft fragments*, which serve as reference to guide the generation of new fragments through a trainable *fragment injection module*. To extrapolate beyond the existing fragments, *f*-RAG updates the fragment vocabulary with generated fragments via an iterative refinement process which is further enhanced with post-hoc genetic fragment modification. *f*-RAG can achieve an improved exploration-exploitation trade-off by maintaining a pool of fragments and expanding it with novel and high-quality fragments through a strong generative prior.", "keywords": "fragment-based drug discovery;molecule generation;molecular language model;retrieval-augmented generation", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Seul Lee;Karsten Kreis;Srimukh Prasad Veccham;Meng Liu;Danny Reidenbach;Saee Gopal Paliwal;Arash Vahdat;Weili Nie", "authorids": "~Seul_Lee1;~Karsten_Kreis1;~Srimukh_Prasad_Veccham1;~Meng_Liu3;~Danny_Reidenbach1;~Saee_Gopal_Paliwal1;~Arash_Vahdat3;~Weili_Nie1", "gender": "Not Specified;;M;M;M;F;M;M", "homepage": "https://seullee05.github.io;https://karstenkreis.github.io/;;https://mengliu1998.github.io;;;http://latentspace.cc/;https://weilinie.github.io/", "dblp": "159/0357;238/6834;393/4965.html;41/7841-15;326/8720;174/1411;92/8108;147/4786", "google_scholar": "Ek0N9YYAAAAJ;https://scholar.google.de/citations?user=rFd-DiAAAAAJ;9pQ0Z-wAAAAJ;https://scholar.google.com/citations?hl=en;;8kT_UPsAAAAJ;https://scholar.google.ca/citations?user=p9-nlRIAAAAJ;zW7BH7oAAAAJ", "orcid": ";;;;;;;", "linkedin": ";karstenkreis;;meng-liu-4a1813197/;daniel-reidenbach/;;;", "or_profile": "~Seul_Lee1;~Karsten_Kreis1;~Srimukh_Prasad_Veccham1;~Meng_Liu3;~Danny_Reidenbach1;~Saee_Gopal_Paliwal1;~Arash_Vahdat3;~Weili_Nie1", "aff": "Korea Advanced Institute of Science & Technology;NVIDIA;NVIDIA;NVIDIA;NVIDIA;BenevolentAI;NVIDIA;NVIDIA", "aff_domain": "kaist.ac.kr;nvidia.com;nvidia.com;nvidia.com;nvidia.com;benevolent.ai;nvidia.com;nvidia.com", "position": "PhD student;Research Scientist;Researcher;Researcher;Researcher;Researcher;Research Scientist;Research Scientist", "bibtex": "@inproceedings{\nlee2024molecule,\ntitle={Molecule Generation with Fragment Retrieval Augmentation},\nauthor={Seul Lee and Karsten Kreis and Srimukh Prasad Veccham and Meng Liu and Danny Reidenbach and Saee Gopal Paliwal and Arash Vahdat and Weili Nie},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=56Q0qggDlp}\n}", "github": "", "reviewers": "7DmF;eome;SbUr;U6dy;wGFt", "pdf_size": 2127793, "rating": "3;4;6;7;7", "confidence": "5;3;3;4;4", "soundness": "2;2;3;3;4", "novelty": "1;2;3;3;3", "presentation": "2;3;3;3;4", "wc_summary": "52;55;126;160;89", "wc_strengths": "14;54;72;76;87", "wc_weaknesses": "231;104;131;56;15", "wc_questions": "2;2;42;14;176", "wc_limitations": "5;38;26;5;7", "wc_review": "304;253;397;311;374", "wc_reply_reviewers": "0;24;38;23;11", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.4, 1.624807680927192 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.4, 0.8 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 96.4, 41.62018740947715 ], "wc_strengths_avg": [ 60.6, 25.609373284014584 ], "wc_weaknesses_avg": [ 107.4, 73.51897714195974 ], "wc_questions_avg": [ 47.2, 66.03756506716462 ], "wc_limitations_avg": [ 16.2, 13.46699669562594 ], "wc_review_avg": [ 327.8, 51.70454525474525 ], "wc_reply_reviewers_avg": [ 19.2, 12.859237924542809 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.26318067798390754, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12575169327191817607&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 5, "email": "kaist.ac.kr;nvidia.com;nvidia.com;nvidia.com;nvidia.com;benevolent.ai;nvidia.com;nvidia.com", "author_num": 8, "aff_unique_index": "0;1;1;1;1;2;1;1", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;NVIDIA;BenevolentAI", "aff_unique_dep": ";NVIDIA Corporation;", "aff_unique_url": "https://www.kaist.ac.kr;https://www.nvidia.com;https://www.benevolent.ai", "aff_unique_abbr": "KAIST;NVIDIA;BenevolentAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1;2;1;1", "aff_country_unique": "South Korea;United States;United Kingdom" }, { "title": "Provably Transformers Harness Multi-Concept Word Semantics for Efficient In-Context Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96612", "id": "57C9mszjj3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=57C9mszjj3", "openreview": "https://openreview.net/forum?id=57C9mszjj3", "poster": "/media/PosterPDFs/NeurIPS%202024/96612.png?t=1730963866.9499142", "project": "", "author_site": "Dake Bu, Wei Huang, Andi Han, Atsushi Nitanda, Taiji Suzuki, Qingfu Zhang, Hau-San Wong", "tldr": "", "abstract": "Transformer-based large language models (LLMs) have displayed remarkable creative prowess and emergence capabilities. Existing empirical studies have revealed a strong connection between these LLMs' impressive emergence abilities and their in-context learning (ICL) capacity, allowing them to solve new tasks using only task-specific prompts without further fine-tuning. On the other hand, existing empirical and theoretical studies also show that there is a linear regularity of the multi-concept encoded semantic representation behind transformer-based LLMs. However, existing theoretical work fail to build up an understanding of the connection between this regularity and the innovative power of ICL. Additionally, prior work often focuses on simplified, unrealistic scenarios involving linear transformers or unrealistic loss functions, and they achieve only linear or sub-linear convergence rates. In contrast, this work provides a fine-grained mathematical analysis to show how transformers leverage the multi-concept semantics of words to enable powerful ICL and excellent out-of-distribution ICL abilities, offering insights into how transformers innovate solutions for certain unseen tasks encoded with multiple cross-concept semantics. Inspired by empirical studies on the linear latent geometry of LLMs, the analysis is based on a concept-based low-noise sparse coding prompt model. Leveraging advanced techniques, this work showcases the exponential 0-1 loss convergence over the highly non-convex training dynamics, which pioneeringly incorporates the challenges of softmax self-attention, ReLU-activated MLPs, and cross-entropy loss. Empirical simulations corroborate the theoretical findings.", "keywords": "In-Context Learning; Learning Theory", "primary_area": "learning_theory", "supplementary_material": "/attachment/53f2ad805076a4f55eb745b6d27699ec21e6a05d.zip", "author": "Dake Bu;Wei Huang;Andi Han;Atsushi Nitanda;Taiji Suzuki;Qingfu Zhang;Hau-San Wong", "authorids": "~Dake_Bu1;~Wei_Huang6;~Andi_Han1;~Atsushi_Nitanda1;~Taiji_Suzuki1;~Qingfu_Zhang1;~Hau-San_Wong1", "gender": "M;M;M;M;M;M;M", "homepage": ";https://weihuang05.github.io/;https://github.com/andyjm3;https://sites.google.com/site/atsushinitanda;http://ibis.t.u-tokyo.ac.jp/suzuki/;https://www.cs.cityu.edu.hk/~qzhan7/index.html;", "dblp": "379/6085;81/6685-34;268/7976.html;155/1884;08/312;98/1240.html;69/2987", "google_scholar": "mWrnNqsAAAAJ;RZfDh4MAAAAJ;AKHQHs0AAAAJ;https://scholar.google.co.jp/citations?user=LyVvaf8AAAAJ;x8osrBsAAAAJ;https://scholar.google.co.uk/citations?user=nhL9PHwAAAAJ;i9Dh1OkAAAAJ", "orcid": ";0000-0001-5674-7021;0000-0003-4655-655X;;;;", "linkedin": ";;;;;;", "or_profile": "~Dake_Bu1;~Wei_Huang6;~Andi_Han1;~Atsushi_Nitanda1;~Taiji_Suzuki1;~Qingfu_Zhang1;~Hau-San_Wong1", "aff": "City University of Hong Kong;RIKEN AIP;RIKEN AIP;A*STAR;The University of Tokyo;City University of Hong Kong;City University of Hong Kong", "aff_domain": "cityu.edu.hk;riken.jp;riken.jp;a-star.edu.sg;tokyo.ac.jp;cityu.edu.hk;cityu.edu.hk", "position": "PhD student;Research Scientist;Postdoc;Principal Researcher;Associate Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nbu2024provably,\ntitle={Provably Transformers Harness Multi-Concept Word Semantics for Efficient In-Context Learning},\nauthor={Dake Bu and Wei Huang and Andi Han and Atsushi Nitanda and Taiji Suzuki and Qingfu Zhang and Hau-San Wong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=57C9mszjj3}\n}", "github": "", "reviewers": "Bq3t;L2Bx;qjJG;dQGe", "pdf_size": 4474232, "rating": "6;6;6;7", "confidence": "4;1;2;4", "soundness": "3;3;2;3", "novelty": "3;3;3;4", "presentation": "2;2;2;3", "wc_summary": "60;110;155;40", "wc_strengths": "135;32;14;39", "wc_weaknesses": "107;50;33;114", "wc_questions": "40;21;1;55", "wc_limitations": "40;10;1;5", "wc_review": "382;223;204;253", "wc_reply_reviewers": "53;48;24;52", "wc_reply_authors": "82;63;57;159", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 2.75, 1.299038105676658 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 91.25, 44.77373672143079 ], "wc_strengths_avg": [ 55.0, 47.07971962533337 ], "wc_weaknesses_avg": [ 76.0, 35.106979363083916 ], "wc_questions_avg": [ 29.25, 20.27775875189366 ], "wc_limitations_avg": [ 14.0, 15.346009253222807 ], "wc_review_avg": [ 265.5, 69.4928053830035 ], "wc_reply_reviewers_avg": [ 44.25, 11.840080236214618 ], "wc_reply_authors_avg": [ 90.25, 40.751533713468994 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.5555555555555555, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:yoIZcewXBksJ:scholar.google.com/&scioq=Provably+Transformers+Harness+Multi-Concept+Word+Semantics+for+Efficient+In-Context+Learning&hl=en&as_sdt=0,33", "gs_version_total": 6, "email": "cityu.edu.hk;riken.jp;riken.jp;a-star.edu.sg;tokyo.ac.jp;cityu.edu.hk;cityu.edu.hk", "author_num": 7, "aff_unique_index": "0;1;1;2;3;0;0", "aff_unique_norm": "City University of Hong Kong;RIKEN;Agency for Science, Technology and Research;University of Tokyo", "aff_unique_dep": ";Advanced Institute for Computational Science;;", "aff_unique_url": "https://www.cityu.edu.hk;https://www.aip.riken.jp;https://www.a-star.edu.sg;https://www.u-tokyo.ac.jp", "aff_unique_abbr": "CityU;RIKEN AIP;A*STAR;UTokyo", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;1;1;2;1;0;0", "aff_country_unique": "China;Japan;Singapore" }, { "title": "Cal-DPO: Calibrated Direct Preference Optimization for Language Model Alignment", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96611", "id": "57OQXxbTbY", "proceeding": "", "pdf": "https://openreview.net/pdf?id=57OQXxbTbY", "openreview": "https://openreview.net/forum?id=57OQXxbTbY", "poster": "", "project": "", "author_site": "Teng Xiao, Yige Yuan, Huaisheng Zhu, Mingxiao Li, Vasant Honavar", "tldr": "", "abstract": "We study the problem of aligning large language models (LLMs) with human preference data. Contrastive preference optimization has shown promising results in aligning LLMs with available preference data by optimizing the implicit reward associated with the policy. However, the contrastive objective focuses mainly on the relative values of implicit rewards associated with two responses while ignoring\ntheir actual values, resulting in suboptimal alignment with human preferences. To address this limitation, we propose calibrated direct preference optimization (Cal-DPO), a simple yet effective algorithm. We show that substantial improvement in alignment with the given preferences can be achieved simply by calibrating the implicit reward to ensure that the learned implicit rewards are comparable in\nscale to the ground-truth rewards. We demonstrate the theoretical advantages of Cal-DPO over existing approaches. The results of our experiments on a variety of standard benchmarks show that Cal-DPO remarkably improves off-the-shelf methods.", "keywords": "Large Language Model;Alignment", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Teng Xiao;Yige Yuan;Huaisheng Zhu;Mingxiao Li;Vasant G Honavar", "authorids": "~Teng_Xiao2;~Yige_Yuan1;~Huaisheng_Zhu1;~Mingxiao_Li4;~Vasant_G_Honavar1", "gender": "Not Specified;M;M;M;M", "homepage": "https://yuanyige.github.io;;;http://faculty.ist.psu.edu/vhonavar;https://tengxiao1.github.io/", "dblp": "205/6235;264/2622.html;;https://dblp.uni-trier.de/pid/h/VasantHonavar.html;", "google_scholar": "lf6GtCIAAAAJ;;bQDN_FYAAAAJ;GPqMVRkAAAAJ;ld3OKXwAAAAJ", "orcid": "0000-0001-8856-668X;;0009-0000-5542-3148;0000-0001-5399-3489;", "linkedin": ";;;vhonavar/;", "or_profile": "~Yige_Yuan1;~Huaisheng_Zhu1;~Mingxiao_Li4;~Vasant_G_Honavar1;~Teng_Xiao1", "aff": "Institute of Computing Technology, Chinese Academy of Sciences;Pennsylvania State University;Hangzhou Dianzi University;Pennsylvania State University;The Pennsylvania State University", "aff_domain": "ict.ac.cn;psu.edu;hdu.edu.cn;ist.psu.edu;psu.edu", "position": "PhD student;PhD student;MS student;Full Professor;PhD student", "bibtex": "@inproceedings{\nxiao2024caldpo,\ntitle={Cal-{DPO}: Calibrated Direct Preference Optimization for Language Model Alignment},\nauthor={Teng Xiao and Yige Yuan and Huaisheng Zhu and Mingxiao Li and Vasant G Honavar},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=57OQXxbTbY}\n}", "github": "", "reviewers": "s9Bq;EJ4r;yAWB;f8MD", "pdf_size": 1445393, "rating": "4;5;6;7", "confidence": "4;3;2;4", "soundness": "1;3;2;3", "novelty": "2;2;3;2", "presentation": "3;3;3;2", "wc_summary": "109;47;102;106", "wc_strengths": "80;25;70;51", "wc_weaknesses": "685;130;7;198", "wc_questions": "15;9;46;172", "wc_limitations": "46;7;8;1", "wc_review": "935;218;233;528", "wc_reply_reviewers": "261;0;0;31", "wc_reply_authors": "1564;304;100;134", "reply_reviewers": "1;0;0;1", "reply_authors": "5;4;2;3", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 91.0, 25.524498036200438 ], "wc_strengths_avg": [ 56.5, 20.958291915134687 ], "wc_weaknesses_avg": [ 255.0, 257.52572687015174 ], "wc_questions_avg": [ 60.5, 65.88816282155696 ], "wc_limitations_avg": [ 15.5, 17.811513130556875 ], "wc_review_avg": [ 478.5, 291.10693911344674 ], "wc_reply_reviewers_avg": [ 73.0, 109.27717053438015 ], "wc_reply_authors_avg": [ 525.5, 604.5384603149745 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 3.5, 1.118033988749895 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.1348399724926484, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2933651089883454887&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "ict.ac.cn;psu.edu;hdu.edu.cn;ist.psu.edu;psu.edu", "author_num": 5, "aff_unique_index": "0;1;2;1;1", "aff_unique_norm": "Chinese Academy of Sciences;Pennsylvania State University;Hangzhou Dianzi University", "aff_unique_dep": "Institute of Computing Technology;;", "aff_unique_url": "http://www.ict.ac.cn;https://www.psu.edu;http://www.hdu.edu.cn/", "aff_unique_abbr": "CAS;PSU;HGHDU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1;1", "aff_country_unique": "China;United States" }, { "title": "Adaptable Logical Control for Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96610", "id": "58X9v92zRd", "proceeding": "", "pdf": "https://openreview.net/pdf?id=58X9v92zRd", "openreview": "https://openreview.net/forum?id=58X9v92zRd", "poster": "", "project": "", "author_site": "Honghua Zhang, Po-Nien Kung, Masahiro Yoshida, Guy Van den Broeck, Nanyun Peng", "tldr": "", "abstract": "Despite the success of Large Language Models (LLMs) on various tasks following human instructions, controlling model generation to follow strict constraints at inference time poses a persistent challenge. In this paper, we introduce Ctrl-G, a neuro-symbolic framework that enables tractable and adaptable control of LLM generation to follow logical constraints reliably. Ctrl-G combines any production-ready LLM with a Hidden Markov Model (HMM), guiding LLM outputs to adhere to logical constraints represented as deterministic finite automata. We show that Ctrl-G, when a TULU2-7B model is coupled with a 2B-parameter HMM, outperforms GPT4 in text editing: on the task of generating text insertions/continuations following logical constraints, our approach achieves over 30% higher satisfaction rate in human evaluation. When applied to medium-size language models (e.g., GPT2-large), Ctrl-G also beats its counterparts on standard benchmarks by large margins. Additionally, as a proof-of-concept study, we use Ctrl-G to assist LLM reasoning on the GSM benchmark, foreshadowing the application of Ctrl-G, as well as other constrained generation approaches, beyond traditional language generation tasks.", "keywords": "Language Generation;Probabilistic Models;Probabilistic Methods;Large Language Models", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Honghua Zhang;Po-Nien Kung;Masahiro Yoshida;Guy Van den Broeck;Nanyun Peng", "authorids": "~Honghua_Zhang1;~Po-Nien_Kung1;~Masahiro_Yoshida2;~Guy_Van_den_Broeck1;~Nanyun_Peng1", "gender": "M;M;M;F;M", "homepage": "http://web.cs.ucla.edu/~hzhang19/;;;https://violetpeng.github.io/;http://web.cs.ucla.edu/~guyvdb/", "dblp": "65/6130;278/2288;;117/4036;96/7521.html", "google_scholar": "2qxBYJUAAAAJ;wAjBsHAAAAAJ;;XxRXvX0AAAAJ;d0KQ9z0AAAAJ", "orcid": ";;0009-0000-2394-1725;;0000-0003-3434-2503", "linkedin": ";;https://jp.linkedin.com/in/masahiro-yoshida-55a566198?original_referer=https%3A%2F%2Fwww.google.com%2F;;guyvdb", "or_profile": "~Honghua_Zhang1;~Po-Nien_Kung1;~Masahiro_Yoshida2;~Nanyun_Peng1;~Guy_Van_den_Broek1", "aff": "University of California, Los Angeles;UCLA Computer Science Department, University of California, Los Angeles;UCLA Computer Science Department, University of California, Los Angeles;University of California, Los Angeles;University of California, Los Angeles", "aff_domain": "cs.ucla.edu;cs.ucla.edu;cs.ucla.edu;ucla.edu;ucla.edu", "position": "PhD student;PhD student;Intern;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nzhang2024adaptable,\ntitle={Adaptable Logical Control for Large Language Models},\nauthor={Honghua Zhang and Po-Nien Kung and Masahiro Yoshida and Guy Van den Broeck and Nanyun Peng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=58X9v92zRd}\n}", "github": "", "reviewers": "N2WB;eXgv;Uqfm;GDqt", "pdf_size": 2285419, "rating": "5;6;6;7", "confidence": "3;3;3;3", "soundness": "2;3;3;4", "novelty": "3;3;3;4", "presentation": "1;2;2;4", "wc_summary": "209;163;87;83", "wc_strengths": "47;58;49;48", "wc_weaknesses": "265;38;96;5", "wc_questions": "883;259;2;49", "wc_limitations": "7;72;16;7", "wc_review": "1411;590;250;192", "wc_reply_reviewers": "140;258;17;0", "wc_reply_authors": "187;768;0;0", "reply_reviewers": "3;1;1;0", "reply_authors": "2;2;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 1.0897247358851685 ], "wc_summary_avg": [ 135.5, 53.073062847361655 ], "wc_strengths_avg": [ 50.5, 4.387482193696061 ], "wc_weaknesses_avg": [ 101.0, 100.13241233486788 ], "wc_questions_avg": [ 298.25, 351.19892867148667 ], "wc_limitations_avg": [ 25.5, 27.097047809678457 ], "wc_review_avg": [ 610.75, 486.3956080188225 ], "wc_reply_reviewers_avg": [ 103.75, 104.15943308217456 ], "wc_reply_authors_avg": [ 238.75, 314.95505631756413 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13846224246422490758&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "cs.ucla.edu;cs.ucla.edu;cs.ucla.edu;ucla.edu;ucla.edu", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of California, Los Angeles", "aff_unique_dep": "", "aff_unique_url": "https://www.ucla.edu", "aff_unique_abbr": "UCLA", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Relating Hopfield Networks to Episodic Control", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96609", "id": "59DmXSBG6S", "proceeding": "", "pdf": "https://openreview.net/pdf?id=59DmXSBG6S", "openreview": "https://openreview.net/forum?id=59DmXSBG6S", "poster": "/media/PosterPDFs/NeurIPS%202024/96609.png?t=1729526751.2077785", "project": "", "author_site": "Hugo Chateau-Laurent, Frederic Alexandre", "tldr": "", "abstract": "Neural Episodic Control is a powerful reinforcement learning framework that employs a differentiable dictionary to store non-parametric memories. It was inspired by episodic memory on the functional level, but lacks a direct theoretical connection to the associative memory models generally used to implement such a memory. We first show that the dictionary is an instance of the recently proposed Universal Hopfield Network framework. We then introduce a continuous approximation of the dictionary readout operation in order to derive two energy functions that are Lyapunov functions of the dynamics. Finally, we empirically show that the dictionary outperforms the Max separation function, which had previously been argued to be optimal, and that performance can further be improved by replacing the Euclidean distance kernel by a Manhattan distance kernel. These results are enabled by the generalization capabilities of the dictionary, so a novel criterion is introduced to disentangle memorization from generalization when evaluating associative memory models.", "keywords": "Neural Episodic Control;Episodic Control;Hopfield Network;Associative Memory;Reinforcement Learning;Episodic Memory", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Hugo Chateau-Laurent;Frederic Alexandre", "authorids": "~Hugo_Chateau-Laurent2;~Frederic_Alexandre1", "gender": "Non-Binary;M", "homepage": ";", "dblp": "284/4649.html;", "google_scholar": "https://scholar.google.com/citations?hl=fr;", "orcid": "0000-0002-2891-0503;0000-0002-6113-1878", "linkedin": "hugo-chateau-laurent/;", "or_profile": "~Hugo_Chateau-Laurent2;~Frederic_Alexandre1", "aff": "INRIA;INRIA", "aff_domain": "inria.fr;inria.fr", "position": "PhD student;Principal Researcher", "bibtex": "@inproceedings{\nchateau-laurent2024relating,\ntitle={Relating Hopfield Networks to Episodic Control},\nauthor={Hugo Chateau-Laurent and Frederic Alexandre},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=59DmXSBG6S}\n}", "github": "", "reviewers": "w6Cb;ebPC;9C8q;UfDJ;4Gfy", "pdf_size": 1589446, "rating": "6;6;6;6;7", "confidence": "4;4;2;1;3", "soundness": "2;4;4;3;3", "novelty": "3;2;3;3;3", "presentation": "2;2;3;3;3", "wc_summary": "81;79;39;153;162", "wc_strengths": "94;70;25;101;128", "wc_weaknesses": "149;70;58;119;139", "wc_questions": "225;207;118;5;308", "wc_limitations": "14;43;1;9;285", "wc_review": "563;469;241;387;1022", "wc_reply_reviewers": "0;76;0;27;69", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;1;0;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.2, 0.39999999999999997 ], "confidence_avg": [ 2.8, 1.16619037896906 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 102.8, 47.19491498032388 ], "wc_strengths_avg": [ 83.6, 34.64448007980492 ], "wc_weaknesses_avg": [ 107.0, 36.61147361142406 ], "wc_questions_avg": [ 172.6, 103.27942679933889 ], "wc_limitations_avg": [ 70.4, 108.23234267075622 ], "wc_review_avg": [ 536.4, 264.81661579289164 ], "wc_reply_reviewers_avg": [ 34.4, 32.70840870479638 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.08574929257125441, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:iKxNk-rYPwcJ:scholar.google.com/&scioq=Relating+Hopfield+Networks+to+Episodic+Control&hl=en&as_sdt=0,14", "gs_version_total": 4, "email": "inria.fr;inria.fr", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "INRIA", "aff_unique_dep": "", "aff_unique_url": "https://www.inria.fr", "aff_unique_abbr": "INRIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "France" }, { "title": "Cooperation, Competition, and Maliciousness: LLM-Stakeholders Interactive Negotiation", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97850", "id": "59E19c6yrN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=59E19c6yrN", "openreview": "https://openreview.net/forum?id=59E19c6yrN", "poster": "", "project": "", "author_site": "Sahar Abdelnabi, Amr Gomaa, Sarath Sivaprasad, Lea Sch\u00f6nherr, Mario Fritz", "tldr": "", "abstract": "There is a growing interest in using Large Language Models (LLMs) in multi-agent systems to tackle interactive real-world tasks that require effective collaboration and assessing complex situations. Yet, we have a limited understanding of LLMs' communication and decision-making abilities in multi-agent setups. The fundamental task of negotiation spans many key features of communication, such as cooperation, competition, and manipulation potentials. Thus, we propose using scorable negotiation to evaluate LLMs. We create a testbed of complex multi-agent, multi-issue, and semantically rich negotiation games. To reach an agreement, agents must have strong arithmetic, inference, exploration, and planning capabilities while integrating them in a dynamic and multi-turn setup. We propose metrics to rigorously quantify agents' performance and alignment with the assigned role. We provide procedures to create new games and increase games' difficulty to have an evolving benchmark. Importantly, we evaluate critical safety aspects such as the interaction dynamics between agents influenced by greedy and adversarial players. Our benchmark is highly challenging; GPT-3.5 and small models mostly fail, and GPT-4 and SoTA large models (e.g., Llama-3 70b) still underperform in reaching agreement in non-cooperative and more difficult games.", "keywords": "multi-agent LLMs;interactive multi-turn negotiation", "primary_area": "", "supplementary_material": "/attachment/6f5deb897929ccefa0e293f3c1e6cd87c8fb6be9.zip", "author": "Sahar Abdelnabi;Amr Gomaa;Sarath Sivaprasad;Lea Sch\u00f6nherr;Mario Fritz", "authorids": "~Sahar_Abdelnabi1;~Amr_Gomaa1;~Sarath_Sivaprasad2;~Lea_Sch\u00f6nherr1;~Mario_Fritz1", "gender": "M;M;M;;M", "homepage": "https://s-abdelnabi.github.io/;https://amrgomaaelhady.github.io/;;;https://cispa.saarland/group/fritz/", "dblp": "248/7979;186/7372;;;", "google_scholar": "https://scholar.google.de/citations?user=QEiYbDYAAAAJ;xiPvQxwAAAAJ;37HTgYcAAAAJ;;https://scholar.google.de/citations?user=4V1nNm4AAAAJ", "orcid": ";0000-0003-0955-3181;;;", "linkedin": "sahar-abdelnabi-375b681a1/;amrgomaaelhady/;sarathsivaprasad1729/;;", "or_profile": "~Sahar_Abdelnabi1;~Amr_Gomaa1;~Sarath_Sivaprasad2;~Lea_Sch\u00f6nherr1;~Mario_Fritz1", "aff": "CISPA Helmholtz Center for Information Security;University of Cambridge;cispa;;Saarland University", "aff_domain": "cispa.de;cam.ac.uk;cispa.de;;uni-saarland.de", "position": "PhD student;Intern;PhD student;;Full Professor", "bibtex": "@inproceedings{\nabdelnabi2024cooperation,\ntitle={Cooperation, Competition, and Maliciousness: {LLM}-Stakeholders Interactive Negotiation},\nauthor={Sahar Abdelnabi and Amr Gomaa and Sarath Sivaprasad and Lea Sch{\\\"o}nherr and Mario Fritz},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=59E19c6yrN}\n}", "github": "", "reviewers": "Dn58;iQ4e;kvGq;XyTr", "pdf_size": 4941153, "rating": "4;7;7;7", "confidence": "3;3;3;3", "wc_summary_and_contributions": "140;125;98;219", "wc_strengths": "33;61;44;215", "wc_improvement": "124;194;28;464", "wc_limitations": "73;1;35;44", "wc_correctness": "73;1;1;49", "wc_clarity": "279;1;1;32", "wc_relation_to_prior_work": "10;1;26;42", "wc_documentation": "12;1;61;25", "wc_additional_feedback": "1;1;1;1", "wc_review": "745;386;295;1091", "wc_reply_reviewers": "0;62;0;0", "wc_reply_authors": "0;391;137;0", "reply_reviewers": "0;2;0;0", "reply_authors": "1;3;2;1", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 3.0, 0.0 ], "wc_summary_and_contributions_avg": [ 145.5, 45.0249930594109 ], "wc_strengths_avg": [ 88.25, 73.85585623361224 ], "wc_improvement_avg": [ 202.5, 162.07020083901915 ], "wc_limitations_avg": [ 38.25, 25.68438241422207 ], "wc_correctness_avg": [ 31.0, 31.176914536239792 ], "wc_clarity_avg": [ 78.25, 116.5919701351684 ], "wc_relation_to_prior_work_avg": [ 19.75, 15.658464164789597 ], "wc_documentation_avg": [ 24.75, 22.587330519563395 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 629.25, 315.2398888148516 ], "wc_reply_reviewers_avg": [ 15.5, 26.846787517317598 ], "wc_reply_authors_avg": [ 132.0, 159.65118226934618 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6596625494759338015&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "cispa.de;cam.ac.uk;cispa.de;;uni-saarland.de", "author_num": 5, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "CISPA Helmholtz Center for Information Security;University of Cambridge;Saarland University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cispa.de/;https://www.cam.ac.uk;https://www.uni-saarland.de", "aff_unique_abbr": "CISPA;Cambridge;UdS", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "Germany;United Kingdom" }, { "title": "TARSS-Net: Temporal-Aware Radar Semantic Segmentation Network", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96608", "id": "5AeLrXb9sQ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=5AeLrXb9sQ", "openreview": "https://openreview.net/forum?id=5AeLrXb9sQ", "poster": "/media/PosterPDFs/NeurIPS%202024/96608.png?t=1731479170.4463267", "project": "", "author_site": "Youcheng Zhang, Liwen Zhang, ZijunHu, Pengcheng Pi, Teng Li, Yuanpei Chen, Shi Peng, Zhe Ma", "tldr": "", "abstract": "Radar signal interpretation plays a crucial role in remote detection and ranging. With the gradual display of the advantages of neural network technology in signal processing, learning-based radar signal interpretation is becoming a research hot-spot and made great progress. And since radar semantic segmentation (RSS) can provide more fine-grained target information, it has become a more concerned direction in this field. However, the temporal information, which is an important clue for analyzing radar data, has not been exploited sufficiently in present RSS frameworks. In this work, we propose a novel temporal information learning paradigm, i.e., data-driven temporal information aggregation with learned target-history relations. Following this idea, a flexible learning module, called Temporal Relation-Aware Module (TRAM) is carefully designed. TRAM contains two main blocks: i) an encoder for capturing the target-history temporal relations (TH-TRE) and ii) a learnable temporal relation attentive pooling (TRAP) for aggregating temporal information. Based on TRAM, an end-to-end Temporal-Aware RSS Network (TARSS-Net) is presented, which has outstanding performance on publicly available and our collected real-measured datasets. Code and supplementary materials are available at https://github.com/zlw9161/TARSS-Net.", "keywords": "Radar Semantic Segmentation;Temporal Relation Modeling", "primary_area": "machine_vision", "supplementary_material": "/attachment/06c9d253ec2219e38120ae1360c9cc32a126c50d.zip", "author": "Youcheng Zhang;Liwen Zhang;ZijunHu;Pengcheng Pi;Teng Li;Yuanpei Chen;Shi Peng;Zhe Ma", "authorids": "~Youcheng_Zhang1;~Liwen_Zhang5;~ZijunHu1;~Pengcheng_Pi1;~Teng_Li7;~Yuanpei_Chen1;~Shi_Peng2;~Zhe_Ma2", "gender": "F;M;M;;M;Non-Binary;;M", "homepage": ";;https://scholar.google.com/citations?user=eucZucIAAAAJ&hl=zh-CN;;https://www.researchgate.net/profile/Teng-Li-61;;;https://dblp.org/pid/22/6672", "dblp": ";94/905;159/9813.html;;;;;22/6672-1", "google_scholar": ";;eucZucIAAAAJ;;;;Z5UHCdUAAAAJ;", "orcid": "0000-0001-9762-7966;0000-0001-8457-2943;0009-0009-2510-8898;;0009-0004-6668-1950;0000-0002-4674-553X;;", "linkedin": ";;https://www.linkedin.com/;;;;;", "or_profile": "~Youcheng_Zhang1;~Liwen_Zhang5;~ZijunHu1;~Pengcheng_Pi1;~Teng_Li7;~Yuanpei_Chen1;~Shi_Peng2;~Zhe_Ma2", "aff": "Intelligent Science and Technology Academy of CASIC;Harbin Institute of Technology;;;Tsinghua University;Baidu;;Intelligent science and technology academy limited of CASIC", "aff_domain": "casic.com.cn;hit.edu.cn;;;tsinghua.edu.cn;baidu.com;;casic.com", "position": "Engineer;Researcher;;;PhD student;Researcher;;Full Professor", "bibtex": "@inproceedings{\nzhang2024tarssnet,\ntitle={{TARSS}-Net: Temporal-Aware Radar Semantic Segmentation Network},\nauthor={Youcheng Zhang and Liwen Zhang and ZijunHu and Pengcheng Pi and Teng Li and Yuanpei Chen and Shi Peng and Zhe Ma},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=5AeLrXb9sQ}\n}", "github": "", "reviewers": "MgaH;kN5s;WAmw;awi8", "pdf_size": 8504987, "rating": "4;5;5;5", "confidence": "4;5;4;4", "soundness": "2;3;3;3", "novelty": "2;3;2;3", "presentation": "3;3;3;3", "wc_summary": "64;58;104;67", "wc_strengths": "38;43;57;53", "wc_weaknesses": "52;8;347;82", "wc_questions": "58;79;101;11", "wc_limitations": "9;8;6;1", "wc_review": "221;196;615;214", "wc_reply_reviewers": "0;0;119;91", "wc_reply_authors": "140;140;722;874", "reply_reviewers": "0;0;1;1", "reply_authors": "3;3;3;4", "rating_avg": [ 4.75, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 73.25, 18.046814123273947 ], "wc_strengths_avg": [ 47.75, 7.595228765481656 ], "wc_weaknesses_avg": [ 122.25, 132.4016144161392 ], "wc_questions_avg": [ 62.25, 33.26691299174001 ], "wc_limitations_avg": [ 6.0, 3.082207001484488 ], "wc_review_avg": [ 311.5, 175.4629590540408 ], "wc_reply_reviewers_avg": [ 52.5, 53.42518132865812 ], "wc_reply_authors_avg": [ 469.0, 333.36016558671196 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 3.25, 0.4330127018922193 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:j3luyodsmuQJ:scholar.google.com/&scioq=TARSS-Net:+Temporal-Aware+Radar+Semantic+Segmentation+Network&hl=en&as_sdt=0,33", "gs_version_total": 2, "email": "casic.com.cn;hit.edu.cn;;;tsinghua.edu.cn;baidu.com;;casic.com", "author_num": 8, "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "China Aerospace Science and Industry Corporation;Harbin Institute of Technology;Tsinghua University;Baidu", "aff_unique_dep": "Intelligent Science and Technology Academy;;;Baidu, Inc.", "aff_unique_url": "http://www.casic.com.cn/;http://www.hit.edu.cn/;https://www.tsinghua.edu.cn;https://www.baidu.com", "aff_unique_abbr": "CASIC;HIT;THU;Baidu", "aff_campus_unique_index": "1", "aff_campus_unique": ";Harbin", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "CausalStock: Deep End-to-end Causal Discovery for News-driven Multi-stock Movement Prediction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96607", "id": "5BXXoJh0Vr", "proceeding": "", "pdf": "https://openreview.net/pdf?id=5BXXoJh0Vr", "openreview": "https://openreview.net/forum?id=5BXXoJh0Vr", "poster": "/media/PosterPDFs/NeurIPS%202024/96607.png?t=1729864492.8401754", "project": "", "author_site": "Shuqi Li, Yuebo Sun, Yuxin Lin, Xin Gao, Shuo Shang, Rui Yan", "tldr": "", "abstract": "There are two issues in news-driven multi-stock movement prediction tasks that are not well solved in the existing works. On the one hand, \"relation discovery\" is a pivotal part when leveraging the price information of other stocks to achieve accurate stock movement prediction. Given that stock relations are often unidirectional, such as the \"supplier-consumer\" relationship, causal relations are more appropriate to capture the impact between stocks. On the other hand, there is substantial noise existing in the news data leading to extracting effective information with difficulty. With these two issues in mind, we propose a novel framework called CausalStock for news-driven multi-stock movement prediction, which discovers the temporal causal relations between stocks. We design a lag-dependent temporal causal discovery mechanism to model the temporal causal graph distribution. Then a Functional Causal Model is employed to encapsulate the discovered causal relations and predict the stock movements. Additionally, we propose a Denoised News Encoder by taking advantage of the excellent text evaluation ability of large language models (LLMs) to extract useful information from massive news data. The experiment results show that CausalStock outperforms the strong baselines for both news-driven multi-stock movement prediction and multi-stock movement prediction tasks on six real-world datasets collected from the US, China, Japan, and UK markets. Moreover, getting benefit from the causal relations, CausalStock could offer a clear prediction mechanism with good explainability.", "keywords": "Causal discovery;Stock movement prediction;Text mining", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/fca0bae9c8c35fff4648be81c4b8f4ae4e13e690.zip", "author": "Shuqi Li;Yuebo Sun;Yuxin Lin;Xin Gao;Shuo Shang;Rui Yan", "authorids": "~Shuqi_Li1;~Yuebo_Sun1;~Yuxin_Lin3;~Xin_Gao1;~Shuo_Shang1;~Rui_Yan2", "gender": "F;M;;M;;M", "homepage": "https://shuqi-li.github.io/;;;http://cemse.kaust.edu.sa/sfb;https://sites.google.com/site/jedishang;https://gsai.ruc.edu.cn/english/ruiyan", "dblp": "227/9453.html;;;56/2203-1.html;29/8750;19/2405-1", "google_scholar": "ZsiZ1cQAAAAJ;;;https://scholar.google.ca/citations?user=wqdK8ugAAAAJ;https://scholar.google.com/citations?hl=en;eLw6g-UAAAAJ", "orcid": "0009-0006-8074-3243;0009-0005-2463-2162;;0000-0002-7108-3574;0000-0002-1117-2890;0000-0002-3356-6823", "linkedin": ";;linyuxin/;;;", "or_profile": "~Shuqi_Li1;~Yuebo_Sun1;~Yuxin_Lin3;~Xin_Gao1;~Shuo_Shang1;~Rui_Yan2", "aff": "Renmin University of China;Renmin University of China;Columbia University;King Abdullah University of Science and Technology;University of Electronic Science and Technology of China;Renmin University of China", "aff_domain": "ruc.edu.cn;ruc.edu.cn;columbia.edu;kaust.edu.sa;uestc.edu.cn;ruc.edu.cn", "position": "PhD student;Undergrad student;MS student;Full Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nli2024causalstock,\ntitle={CausalStock: Deep End-to-end Causal Discovery for News-driven Multi-stock Movement Prediction},\nauthor={Shuqi Li and Yuebo Sun and Yuxin Lin and Xin Gao and Shuo Shang and Rui Yan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=5BXXoJh0Vr}\n}", "github": "", "reviewers": "99F3;iTYx;omfr;Xs58", "pdf_size": 1318940, "rating": "5;5;6;7", "confidence": "3;4;3;5", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;2;3;2", "wc_summary": "65;140;92;39", "wc_strengths": "49;82;32;77", "wc_weaknesses": "160;109;80;103", "wc_questions": "57;253;129;53", "wc_limitations": "9;30;1;1", "wc_review": "340;614;334;273", "wc_reply_reviewers": "18;79;0;21", "wc_reply_authors": "58;645;0;58", "reply_reviewers": "1;2;0;1", "reply_authors": "2;3;1;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 84.0, 37.36977388210959 ], "wc_strengths_avg": [ 60.0, 20.481699148264042 ], "wc_weaknesses_avg": [ 113.0, 29.214722315983085 ], "wc_questions_avg": [ 123.0, 80.91971329657564 ], "wc_limitations_avg": [ 10.25, 11.861176164276458 ], "wc_review_avg": [ 390.25, 131.8149744907611 ], "wc_reply_reviewers_avg": [ 29.5, 29.685855217594792 ], "wc_reply_authors_avg": [ 190.25, 263.6156055699283 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.6363636363636364, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7819079624134998203&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "ruc.edu.cn;ruc.edu.cn;columbia.edu;kaust.edu.sa;uestc.edu.cn;ruc.edu.cn", "author_num": 6, "aff_unique_index": "0;0;1;2;3;0", "aff_unique_norm": "Renmin University of China;Columbia University;King Abdullah University of Science and Technology;University of Electronic Science and Technology of China", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.ruc.edu.cn;https://www.columbia.edu;https://www.kast.kau.edu.sa;https://www.uestc.edu.cn", "aff_unique_abbr": "RUC;Columbia;KAUST;UESTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;2;0;0", "aff_country_unique": "China;United States;Saudi Arabia" }, { "title": "Unsupervised Modality Adaptation with Text-to-Image Diffusion Models for Semantic Segmentation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96606", "id": "5BwWgyvgwR", "proceeding": "", "pdf": "https://openreview.net/pdf?id=5BwWgyvgwR", "openreview": "https://openreview.net/forum?id=5BwWgyvgwR", "poster": "/media/PosterPDFs/NeurIPS%202024/96606.png?t=1731637641.0259852", "project": "", "author_site": "Ruihao Xia, Yu Liang, Peng-Tao Jiang, Hao Zhang, Bo Li, Yang Tang, Pan Zhou", "tldr": "", "abstract": "Despite their success, unsupervised domain adaptation methods for semantic segmentation primarily focus on adaptation between image domains and do not utilize other abundant visual modalities like depth, infrared and event. This limitation hinders their performance and restricts their application in real-world multimodal scenarios. To address this issue, we propose Modality Adaptation with text-to-image Diffusion Models (MADM) for semantic segmentation task which utilizes text-to-image diffusion models pre-trained on extensive image-text pairs to enhance the model's cross-modality capabilities. Specifically, MADM comprises two key complementary components to tackle major challenges. First, due to the large modality gap, using one modal data to generate pseudo labels for another modality suffers from a significant drop in accuracy. To address this, MADM designs diffusion-based pseudo-label generation which adds latent noise to stabilize pseudo-labels and enhance label accuracy. Second, to overcome the limitations of latent low-resolution features in diffusion models, MADM introduces the label palette and latent regression which converts one-hot encoded labels into the RGB form by palette and regresses them in the latent space, thus ensuring the pre-trained decoder for up-sampling to obtain fine-grained features. Extensive experimental results demonstrate that MADM achieves state-of-the-art adaptation performance across various modality tasks, including images to depth, infrared, and event modalities. We open-source our code and models at https://github.com/XiaRho/MADM.", "keywords": "Domain adaption;Semantic segmentation;Multi-modality", "primary_area": "machine_vision", "supplementary_material": "", "author": "Ruihao Xia;Yu Liang;Peng-Tao Jiang;Hao Zhang;Bo Li;Yang Tang;Pan Zhou", "authorids": "~Ruihao_Xia1;~Yu_Liang7;~Peng-Tao_Jiang1;~Hao_Zhang52;~Bo_Li20;~Yang_Tang3;~Pan_Zhou3", "gender": ";M;M;M;M;M;", "homepage": "https://xiarho.github.io/;https://github.com/liangyufz;https://pengtaojiang.github.io;;https://libraboli.github.io/;http://www.ytangecust.com;", "dblp": "353/2386;;218/5550;;50/3402-115;;", "google_scholar": ";;85QJ_i4AAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.hk/citations?hl=zh-CN;;", "orcid": ";;;0009-0007-1175-5918;;;", "linkedin": ";;;https://www.linkedin.cn/incareer/in/ACoAABKDDJUB-t0pY6F0gGrP67GMU5igjH6519E;;;", "or_profile": "~Ruihao_Xia1;~Yu_Liang7;~Peng-Tao_Jiang1;~Hao_Zhang52;~Bo_Li20;~Yang_Tang3;~Pan_Zhou3", "aff": "East China University of Science and Technology;\u7ef4\u6c83\u79fb\u52a8\u901a\u4fe1\uff08\u676d\u5dde\uff09\u6709\u9650\u516c\u53f8;vivo Mobile Communication Co., Ltd;vivo Mobile Communication Co., Ltd;Tencent Youtu Lab;East China University of Science and Technology;", "aff_domain": "ecust.edu.cn;vivo.com;vivo.com;vivo.com;tencent.com;ecust.edu.cn;", "position": "PhD student;Researcher;Researcher;Researcher;Researcher;Full Professor;", "bibtex": "@inproceedings{\nxia2024unsupervised,\ntitle={Unsupervised Modality Adaptation with Text-to-Image Diffusion Models for Semantic Segmentation},\nauthor={Ruihao Xia and Yu Liang and Peng-Tao Jiang and Hao Zhang and Bo Li and Yang Tang and Pan Zhou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=5BwWgyvgwR}\n}", "github": "", "reviewers": "uNLv;x1By;4TqZ;Qg6E", "pdf_size": 2953467, "rating": "5;5;6;7", "confidence": "3;4;4;4", "soundness": "3;2;3;3", "novelty": "2;2;3;4", "presentation": "3;2;3;4", "wc_summary": "52;28;57;63", "wc_strengths": "24;22;46;64", "wc_weaknesses": "65;143;122;46", "wc_questions": "1;89;35;19", "wc_limitations": "1;8;1;19", "wc_review": "143;290;261;211", "wc_reply_reviewers": "0;326;13;10", "wc_reply_authors": "102;845;103;103", "reply_reviewers": "0;1;1;1", "reply_authors": "2;3;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 50.0, 13.285330255586423 ], "wc_strengths_avg": [ 39.0, 17.233687939614086 ], "wc_weaknesses_avg": [ 94.0, 39.78064856183217 ], "wc_questions_avg": [ 36.0, 32.87856444554719 ], "wc_limitations_avg": [ 7.25, 7.361215932167728 ], "wc_review_avg": [ 226.25, 55.755605099397854 ], "wc_reply_reviewers_avg": [ 87.25, 137.9263843504933 ], "wc_reply_authors_avg": [ 288.25, 321.44002162145273 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13184701404733931584&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "ecust.edu.cn;vivo.com;vivo.com;vivo.com;tencent.com;ecust.edu.cn;", "author_num": 7, "aff_unique_index": "0;1;2;2;3;0", "aff_unique_norm": "East China University of Science and Technology;\u7ef4\u6c83\u79fb\u52a8\u901a\u4fe1;vivo Mobile Communication Co., Ltd;Tencent", "aff_unique_dep": ";;;Youtu Lab", "aff_unique_url": "http://www.ecust.edu.cn;;https://www.vivo.com.cn;https://www.tencent.com", "aff_unique_abbr": "ECUST;;vivo;Tencent", "aff_campus_unique_index": "1", "aff_campus_unique": ";\u676d\u5dde", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "5ClpGA0u9K", "title": "Energy Rank Alignment: Using Preference Optimization to Search Chemical Space at Scale", "track": "main", "status": "Reject", "tldr": "", "abstract": "Searching through chemical space is an exceptionally challenging problem because the number of possible molecules grows combinatorially with the number of atoms. Large, autoregressive models trained on databases of chemical compounds have yielded powerful generators, but we still lack robust strategies for generating molecules with desired properties. This molecular search problem closely resembles the \"alignment\" problem for large language models, though for many chemical tasks we have a specific and easily evaluable reward function. Here, we introduce an algorithm called energy rank alignment (ERA) that leverages an explicit reward function to produce a gradient-based objective that we use to optimize autoregressive policies. We show theoretically that this algorithm is closely related to proximal policy optimization (PPO) and direct preference optimization (DPO), but has a minimizer that converges to an ideal Gibbs-Boltzmann distribution with the reward playing the role of an energy function. Furthermore, this algorithm is highly scalable, does not require reinforcement learning, and performs well relative to DPO when the number of preference observations per pairing is small. We deploy this approach to align molecular transformers to generate molecules with externally specified properties and find that it does so robustly, searching through diverse parts of chemical space. While our focus here is on chemical search, we also obtain excellent results on an AI supervised task for LLM alignment, showing that the method is scalable and general.", "keywords": "Generative Models;Chemistry;Alignment;Large Language Models;Statistical Physics", "primary_area": "generative_models", "supplementary_material": "/attachment/f9730bfd8cee3a68bcf6387cab3c5812a23600f3.zip", "author": "Shriram Chennakesavalu;Frank Hu;Sebastian Ibarraran;Grant M. Rotskoff", "authorids": "~Shriram_Chennakesavalu1;~Frank_Hu1;~Sebastian_Ibarraran1;~Grant_M._Rotskoff1", "gender": "M;M;M;M", "homepage": ";;;https://statmech.stanford.edu", "dblp": ";;;220/5367", "google_scholar": "AP_UYxMAAAAJ;;;D6j2WboAAAAJ", "orcid": "0000-0002-4750-8923;0009-0001-4783-0947;;", "linkedin": "shriram-chennakesavalu-541a57132/;;sebastian-ibarraran-40965b179/;", "or_profile": "~Shriram_Chennakesavalu1;~Frank_Hu1;~Sebastian_Ibarraran1;~Grant_M._Rotskoff1", "aff": "Stanford University;Stanford University;Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu;stanford.edu", "position": "PhD student;PhD student;PhD student;Assistant Professor", "bibtex": "@misc{\nanonymous2024energy,\ntitle={Energy Rank Alignment: Using Preference Optimization to Search Chemical Space at Scale},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=5ClpGA0u9K}\n}", "github": "", "project": "", "reviewers": "G1to;FWr2;MXPc", "site": "https://openreview.net/forum?id=5ClpGA0u9K", "pdf_size": 1571841, "rating": "5;6;6", "confidence": "3;4;4", "soundness": "3;3;3", "novelty": "2;3;2", "presentation": "2;3;3", "wc_summary": "109;255;84", "wc_strengths": "38;72;53", "wc_weaknesses": "183;156;78", "wc_questions": "3;226;54", "wc_limitations": "1;11;1", "wc_review": "334;720;270", "wc_reply_reviewers": "0;135;12", "wc_reply_authors": "0;274;0", "reply_reviewers": "0;1;1", "reply_authors": "1;2;1", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 149.33333333333334, 75.41146390893687 ], "wc_strengths_avg": [ 54.333333333333336, 13.912424503139471 ], "wc_weaknesses_avg": [ 139.0, 44.51965857910413 ], "wc_questions_avg": [ 94.33333333333333, 95.40207311979943 ], "wc_limitations_avg": [ 4.333333333333333, 4.714045207910317 ], "wc_review_avg": [ 441.3333333333333, 198.77178427086233 ], "wc_reply_reviewers_avg": [ 49.0, 61.00819617067857 ], "wc_reply_authors_avg": [ 91.33333333333333, 129.1648386967427 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.9999999999999997, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:T60a4yw89OwJ:scholar.google.com/&scioq=Energy+Rank+Alignment:+Using+Preference+Optimization+to+Search+Chemical+Space+at+Scale&hl=en&as_sdt=0,23", "gs_version_total": 3, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "MeMo: Meaningful, Modular Controllers via Noise Injection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96605", "id": "5DJBBACqim", "proceeding": "", "pdf": "https://openreview.net/pdf?id=5DJBBACqim", "openreview": "https://openreview.net/forum?id=5DJBBACqim", "poster": "/media/PosterPDFs/NeurIPS%202024/96605.png?t=1733620069.5709126", "project": "", "author_site": "Megan Tjandrasuwita, Jie Xu, Armando Solar-Lezama, Wojciech Matusik", "tldr": "", "abstract": "Robots are often built from standardized assemblies, (e.g. arms, legs, or fingers), but each robot must be trained from scratch to control all the actuators of all the parts together. In this paper we demonstrate a new approach that takes a single robot and its controller as input and produces a set of modular controllers for each of these assemblies such that when a new robot is built from the same parts, its control can be quickly learned by reusing the modular controllers. We achieve this with a framework called MeMo which learns (Me)aningful, (Mo)dular controllers. Specifically, we propose a novel modularity objective to learn an appropriate division of labor among the modules. We demonstrate that this objective can be optimized simultaneously with standard behavior cloning loss via noise injection. We benchmark our framework in locomotion and grasping environments on simple to complex robot morphology transfer. We also show that the modules help in task transfer. On both structure and task transfer, MeMo achieves improved training efficiency to graph neural network and Transformer baselines.", "keywords": "modular neural network policy;policy transfer;imitation learning;reinforcement learning", "primary_area": "robotics", "supplementary_material": "/attachment/eac1e028508a2ecc66aaf7330c76e9aaa5371953.zip", "author": "Megan Tjandrasuwita;Jie Xu;Armando Solar-Lezama;Wojciech Matusik", "authorids": "~Megan_Tjandrasuwita1;~Jie_Xu7;~Armando_Solar-Lezama1;~Wojciech_Matusik2", "gender": "F;M;M;M", "homepage": "https://megantj.github.io/;https://people.csail.mit.edu/jiex;https://people.csail.mit.edu/asolar/;https://cdfg.mit.edu/wojciech", "dblp": "294/8475;37/5126-28;95/6919;", "google_scholar": ";3Tj5lWEAAAAJ;https://scholar.google.com.tw/citations?user=8BX3BokAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;0000-0003-0212-5643", "linkedin": "megan-tjandrasuwita/;;;wojciech-matusik-67238126/", "or_profile": "~Megan_Tjandrasuwita1;~Jie_Xu7;~Armando_Solar-Lezama1;~Wojciech_Matusik2", "aff": "Massachusetts Institute of Technology;NVIDIA;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;nvidia.com;mit.edu;mit.edu", "position": "PhD student;Researcher;Full Professor;Full Professor", "bibtex": "@inproceedings{\ntjandrasuwita2024memo,\ntitle={MeMo: Meaningful, Modular Controllers via Noise Injection},\nauthor={Megan Tjandrasuwita and Jie Xu and Armando Solar-Lezama and Wojciech Matusik},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=5DJBBACqim}\n}", "github": "", "reviewers": "bS7d;tVBX;5WVH;Hbiz", "pdf_size": 7124881, "rating": "5;6;6;6", "confidence": "4;2;4;3", "soundness": "3;3;3;3", "novelty": "3;3;2;3", "presentation": "2;3;3;3", "wc_summary": "87;56;77;53", "wc_strengths": "57;12;155;51", "wc_weaknesses": "64;1;75;39", "wc_questions": "72;131;5;20", "wc_limitations": "4;1;22;2", "wc_review": "284;201;334;165", "wc_reply_reviewers": "167;20;15;23", "wc_reply_authors": "372;14;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 68.25, 14.236836024903848 ], "wc_strengths_avg": [ 68.75, 52.7085144924423 ], "wc_weaknesses_avg": [ 44.75, 28.428638729281428 ], "wc_questions_avg": [ 57.0, 49.43177115985225 ], "wc_limitations_avg": [ 7.25, 8.584142356694699 ], "wc_review_avg": [ 246.0, 66.65958295699126 ], "wc_reply_reviewers_avg": [ 56.25, 64.00537086838885 ], "wc_reply_authors_avg": [ 96.5, 159.16265265444653 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:FfcXPJlgY2sJ:scholar.google.com/&scioq=MeMo:+Meaningful,+Modular+Controllers+via+Noise+Injection&hl=en&as_sdt=0,33", "gs_version_total": 5, "email": "mit.edu;nvidia.com;mit.edu;mit.edu", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Massachusetts Institute of Technology;NVIDIA", "aff_unique_dep": ";NVIDIA Corporation", "aff_unique_url": "https://web.mit.edu;https://www.nvidia.com", "aff_unique_abbr": "MIT;NVIDIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "5DuSIW6XQo", "title": "Benchmarking Self-Supervised Video Representation Learning", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "Self-supervised learning is an effective way for label-free model pre-training, especially in the video domain where labeling is expensive. Existing self-supervised works in the video domain use varying experimental setups to demonstrate their effectiveness and comparison across approaches becomes challenging with no standard benchmark. In this work, we first provide a benchmark that enables a comparison of existing approaches on the same ground. Next, we study five different aspects of self-supervised learning important for videos; 1) dataset size, 2) complexity, 3) data distribution, 4) data noise, and, 5) feature analysis. To facilitate this study, we focus on seven different methods along with seven different network architectures and perform an extensive set of experiments on 5 different datasets with an evaluation of two different downstream tasks. We present several interesting insights from this study which span across different properties of pretraining and target datasets, pretext-tasks, and model architectures among others. Furthermore, we extend these findings to Video Foundation models (ViFMs). Finally, we further put some of these insights to the real test and propose an approach that requires a limited amount of training data and outperforms existing state-of-the-art approaches which use 10x pretraining data. We believe this work will pave the way for researchers to a better understanding of self-supervised pretext tasks in video representation learning.", "keywords": "self-supervised learning;action recognition", "primary_area": "", "supplementary_material": "/attachment/8d34398bf3003bf017a0236624e954cfe9744b98.zip", "author": "Akash Kumar;Ashlesha Kumar;Zhen Hao Sia;Vibhav Vineet;Yogesh S Rawat", "authorids": "~Akash_Kumar3;~Ashlesha_Kumar1;~Zhen_Hao_Sia1;~Vibhav_Vineet5;~Yogesh_S_Rawat1", "gender": "M;F;M;;M", "homepage": ";;https://github.com/siatheindochinese;;https://www.crcv.ucf.edu/person/rawat/", "dblp": "239/0195;;;;148/2258", "google_scholar": "https://scholar.google.co.in/citations?user=gsHhV5kAAAAJ;;;;D_JvEcwAAAAJ", "orcid": ";;;;", "linkedin": ";akumar-bitsp/;sia-zhen-hao;;", "or_profile": "~Akash_Kumar3;~Ashlesha_Kumar1;~Zhen_Hao_Sia1;~Vibhav_Vineet5;~Yogesh_S_Rawat1", "aff": "University of Central Florida;BITS Pilani, Birla Institute of Technology and Science;University of Central Florida;;University of Central Florida", "aff_domain": "ucf.edu;pilani.bits-pilani.ac.in;ucf.edu;;ucf.edu", "position": "PhD student;Researcher;MS student;;Assistant Professor", "bibtex": "@misc{\nanonymous2024benchmarking,\ntitle={Benchmarking Self-Supervised Video Representation Learning},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=5DuSIW6XQo}\n}", "github": "", "project": "", "reviewers": "NhHv;Kssv;n2Lu;CSgq", "site": "https://openreview.net/forum?id=5DuSIW6XQo", "pdf_size": 4752513, "rating": "5;6;6;7", "confidence": "4;5;3;4", "wc_summary_and_contributions": "61;47;40;142", "wc_strengths": "140;19;30;238", "wc_improvement": "11;101;98;626", "wc_limitations": "20;33;4;10", "wc_correctness": "4;12;7;25", "wc_clarity": "32;29;15;396", "wc_relation_to_prior_work": "6;2;8;15", "wc_documentation": "8;4;7;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "283;248;210;1454", "wc_reply_reviewers": "289;652;213;0", "wc_reply_authors": "437;54;0;54", "reply_reviewers": "1;2;3;0", "reply_authors": "4;5;4;3", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 72.5, 40.8319727664486 ], "wc_strengths_avg": [ 106.75, 89.33469370854752 ], "wc_improvement_avg": [ 209.0, 243.45328093907463 ], "wc_limitations_avg": [ 16.75, 10.985786271359915 ], "wc_correctness_avg": [ 12.0, 8.031189202104505 ], "wc_clarity_avg": [ 118.0, 160.63156601365748 ], "wc_relation_to_prior_work_avg": [ 7.75, 4.710360920354193 ], "wc_documentation_avg": [ 5.0, 2.7386127875258306 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 548.75, 523.2835631853918 ], "wc_reply_reviewers_avg": [ 288.5, 235.08774957449398 ], "wc_reply_authors_avg": [ 136.25, 175.03196136706003 ], "reply_reviewers_avg": [ 1.5, 1.118033988749895 ], "reply_authors_avg": [ 4.0, 0.7071067811865476 ], "replies_avg": [ 31, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9126125841962052410&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Central Florida;Birla Institute of Technology and Science", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucf.edu;https://www.bits-pilani.ac.in", "aff_unique_abbr": "UCF;BITS Pilani", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pilani", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;India" }, { "title": "On the Surprising Effectiveness of Attention Transfer for Vision Transformers", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96604", "id": "5DwqmoCE1N", "proceeding": "", "pdf": "https://openreview.net/pdf?id=5DwqmoCE1N", "openreview": "https://openreview.net/forum?id=5DwqmoCE1N", "poster": "", "project": "", "author_site": "Alex Li, Yuandong Tian, Beidi Chen, Deepak Pathak, Xinlei Chen", "tldr": "", "abstract": "Conventional wisdom suggests that pre-training Vision Transformers (ViT) improves downstream performance by learning useful representations. Is this actually true? We investigate this question and find that the features and representations learned during pre-training are not essential. Surprisingly, using only the attention patterns from pre-training (i.e., guiding how information flows between tokens) is sufficient for models to learn high quality features from scratch and achieve comparable downstream performance. We show this by introducing a simple method called attention transfer, where only the attention patterns from a pre-trained teacher ViT are transferred to a student, either by copying or distilling the attention maps. Since attention transfer lets the student learn its own features, ensembling it with a fine-tuned teacher also further improves accuracy on ImageNet. We systematically study various aspects of our findings on the sufficiency of attention maps, including distribution shift settings where they underperform fine-tuning. We hope our exploration provides a better understanding of what pre-training accomplishes and leads to a useful alternative to the standard practice of fine-tuning.", "keywords": "Transformers;self-attention;attention;transfer;pre-training;fine-tuning;understanding;analysis", "primary_area": "machine_vision", "supplementary_material": "", "author": "Alexander Cong Li;Yuandong Tian;Beidi Chen;Deepak Pathak;Xinlei Chen", "authorids": "~Alexander_Cong_Li2;~Yuandong_Tian1;~Beidi_Chen1;~Deepak_Pathak1;~Xinlei_Chen1", "gender": "M;M;F;M;M", "homepage": "http://alexanderli.com/;http://yuandong-tian.com;https://www.andrew.cmu.edu/user/beidic/;https://www.cs.cmu.edu/~dpathak/;http://xinleic.xyz", "dblp": "243/3349.html;t/YuandongTian;192/1339;155/9860;", "google_scholar": "bOitqMUAAAAJ;0mgEF28AAAAJ;;https://scholar.google.cl/citations?user=AEsPCAUAAAAJ;bSU7LYoAAAAJ", "orcid": ";0000-0003-4202-4847;;;", "linkedin": ";yuandongtian;;pathak22/;", "or_profile": "~Alexander_Cong_Li2;~Yuandong_Tian1;~Beidi_Chen1;~Deepak_Pathak1;~Xinlei_Chen1", "aff": "Carnegie Mellon University;Meta AI (FAIR);Meta Facebook;Carnegie Mellon University;Meta", "aff_domain": "cmu.edu;meta.com;fb.com;cmu.edu;meta.com", "position": "PhD student;Research Scientist;Researcher;Assistant Professor;Researcher", "bibtex": "@inproceedings{\nli2024on,\ntitle={On the Surprising Effectiveness of Attention Transfer for Vision Transformers},\nauthor={Alexander Cong Li and Yuandong Tian and Beidi Chen and Deepak Pathak and Xinlei Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=5DwqmoCE1N}\n}", "github": "", "reviewers": "qaog;p7aT;7AbG;9FPV", "pdf_size": 4280541, "rating": "3;6;6;7", "confidence": "4;4;4;4", "soundness": "2;3;3;4", "novelty": "2;3;3;3", "presentation": "3;2;4;4", "wc_summary": "106;62;221;160", "wc_strengths": "24;33;166;211", "wc_weaknesses": "213;105;341;200", "wc_questions": "5;38;82;63", "wc_limitations": "6;1;6;42", "wc_review": "354;239;816;676", "wc_reply_reviewers": "128;67;37;62", "wc_reply_authors": "367;387;85;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;1", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 137.25, 59.520479668766114 ], "wc_strengths_avg": [ 108.5, 81.62873268647505 ], "wc_weaknesses_avg": [ 214.75, 83.97134928057308 ], "wc_questions_avg": [ 47.0, 28.83574171059243 ], "wc_limitations_avg": [ 13.75, 16.43738117827776 ], "wc_review_avg": [ 521.25, 233.69999465126224 ], "wc_reply_reviewers_avg": [ 73.5, 33.455193916640205 ], "wc_reply_authors_avg": [ 209.75, 170.0755346897372 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:k9nW_SCFh9gJ:scholar.google.com/&scioq=On+the+Surprising+Effectiveness+of+Attention+Transfer+for+Vision+Transformers&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "cmu.edu;meta.com;fb.com;cmu.edu;meta.com", "author_num": 5, "aff_unique_index": "0;1;1;0;1", "aff_unique_norm": "Carnegie Mellon University;Meta", "aff_unique_dep": ";Facebook AI Research (FAIR)", "aff_unique_url": "https://www.cmu.edu;https://ai.facebook.com", "aff_unique_abbr": "CMU;Meta AI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Robust Gaussian Processes via Relevance Pursuit", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96603", "id": "5FATPIlWUJ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=5FATPIlWUJ", "openreview": "https://openreview.net/forum?id=5FATPIlWUJ", "poster": "", "project": "", "author_site": "Sebastian Ament, Elizabeth Santorella, David Eriksson, Ben Letham, Maximilian Balandat, Eytan Bakshy", "tldr": "", "abstract": "Gaussian processes (GPs) are non-parametric probabilistic regression models that are popular due to their flexibility, data efficiency, and well-calibrated uncertainty estimates. However, standard GP models assume homoskedastic Gaussian noise, while many real-world applications are subject to non-Gaussian corruptions. Variants of GPs that are more robust to alternative noise models have been proposed, and entail significant trade-offs between accuracy and robustness, and between computational requirements and theoretical guarantees. In this work, we propose and study a GP model that achieves robustness against sparse outliers by inferring data-point-specific noise levels with a sequential selection procedure maximizing the log marginal likelihood that we refer to as relevance pursuit. We show, surprisingly, that the model can be parameterized such that the associated log marginal likelihood is strongly concave in the data-point-specific noise variances, a property rarely found in either robust regression objectives or GP marginal likelihoods. This in turn implies the weak submodularity of the corresponding subset selection problem, and thereby proves approximation guarantees for the proposed algorithm. We compare the model\u2019s performance relative to other approaches on diverse regression and Bayesian optimization tasks, including the challenging but common setting of sparse corruptions of the labels within or close to the function range.", "keywords": "Gaussian process;robust regression;Bayesian optimization;submodular", "primary_area": "probabilistic_methods", "supplementary_material": "/attachment/a11ee96a760e222386431f993c9bdee8ab475586.zip", "author": "Sebastian Ament;Elizabeth Santorella;David Eriksson;Benjamin Letham;Maximilian Balandat;Eytan Bakshy", "authorids": "~Sebastian_Ament1;~Elizabeth_Santorella1;~David_Eriksson2;~Benjamin_Letham1;~Maximilian_Balandat1;~Eytan_Bakshy1", "gender": "M;F;M;;;M", "homepage": "https://sebastianament.github.io/;;;;https://research.facebook.com/people/balandat-max/;http://eytan.github.io", "dblp": ";;29/2816;;41/9185;58/2226", "google_scholar": "1vkpStcAAAAJ;https://scholar.google.com/citations?view_op=list_works;SWQjkN4AAAAJ;;N0iLicUAAAAJ;8y9rrq0AAAAJ", "orcid": ";0000-0002-5496-4273;;;0000-0002-8214-8935;", "linkedin": ";elizabeth-santorella-699a9684;davideriksson89/;;maximilian-balandat-b5843946/;", "or_profile": "~Sebastian_Ament1;~Elizabeth_Santorella1;~David_Eriksson2;~Benjamin_Letham1;~Maximilian_Balandat1;~Eytan_Bakshy1", "aff": "Meta;Meta;Meta;;Meta;Meta", "aff_domain": "meta.com;meta.com;meta.com;;meta.com;meta.com", "position": "Researcher;Researcher;Research scientist;;Research Scientist Manager;Principal Researcher", "bibtex": "@inproceedings{\nament2024robust,\ntitle={Robust Gaussian Processes via Relevance Pursuit},\nauthor={Sebastian Ament and Elizabeth Santorella and David Eriksson and Benjamin Letham and Maximilian Balandat and Eytan Bakshy},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=5FATPIlWUJ}\n}", "github": "", "reviewers": "geQY;aaaZ;CPiv", "pdf_size": 1557753, "rating": "7;7;7", "confidence": "4;4;5", "soundness": "3;3;2", "novelty": "3;3;2", "presentation": "3;3;3", "wc_summary": "70;44;60", "wc_strengths": "75;52;58", "wc_weaknesses": "440;130;204", "wc_questions": "113;104;46", "wc_limitations": "49;5;9", "wc_review": "747;335;377", "wc_reply_reviewers": "23;11;177", "wc_reply_authors": "10;0;1630", "reply_reviewers": "1;1;3", "reply_authors": "2;1;5", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 58.0, 10.708252269472673 ], "wc_strengths_avg": [ 61.666666666666664, 9.741092797468305 ], "wc_weaknesses_avg": [ 258.0, 132.19177987555304 ], "wc_questions_avg": [ 87.66666666666667, 29.69100125552447 ], "wc_limitations_avg": [ 21.0, 19.86621923433512 ], "wc_review_avg": [ 486.3333333333333, 185.11497928464775 ], "wc_reply_reviewers_avg": [ 70.33333333333333, 75.58365490560038 ], "wc_reply_authors_avg": [ 546.6666666666666, 766.0432247740479 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.9428090415820634 ], "reply_authors_avg": [ 2.6666666666666665, 1.699673171197595 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1949290636811389918&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "meta.com;meta.com;meta.com;;meta.com;meta.com", "author_num": 6, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Meta", "aff_unique_dep": "Meta Platforms, Inc.", "aff_unique_url": "https://meta.com", "aff_unique_abbr": "Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Federated Behavioural Planes: Explaining the Evolution of Client Behaviour in Federated Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96602", "id": "5FHzrRGOKR", "proceeding": "", "pdf": "https://openreview.net/pdf?id=5FHzrRGOKR", "openreview": "https://openreview.net/forum?id=5FHzrRGOKR", "poster": "", "project": "", "author_site": "Dario Fenoglio, Gabriele Dominici, Pietro Barbiero, Alberto Tonda, Martin Gjoreski, Marc Langheinrich", "tldr": "", "abstract": "Federated Learning (FL), a privacy-aware approach in distributed deep learning environments, enables many clients to collaboratively train a model without sharing sensitive data, thereby reducing privacy risks. However, enabling human trust and control over FL systems requires understanding the evolving behaviour of clients, whether beneficial or detrimental for the training, which still represents a key challenge in the current literature. To address this challenge, we introduce Federated Behavioural Planes (FBPs), a novel method to analyse, visualise, and explain the dynamics of FL systems, showing how clients behave under two different lenses: predictive performance (error behavioural space) and decision-making processes (counterfactual behavioural space). Our experiments demonstrate that FBPs provide informative trajectories describing the evolving states of clients and their contributions to the global model, thereby enabling the identification of clusters of clients with similar behaviours. Leveraging the patterns identified by FBPs, we propose a robust aggregation technique named Federated Behavioural Shields to detect malicious or noisy client models, thereby enhancing security and surpassing the efficacy of existing state-of-the-art FL defense mechanisms. Our code is publicly available on GitHub.", "keywords": "federated learning;explainable AI;counterfactuals;secure aggregation", "primary_area": "privacy", "supplementary_material": "/attachment/3031b6ee73663c4aabd7e0267905311b315b8ebb.zip", "author": "Dario Fenoglio;Gabriele Dominici;Pietro Barbiero;Alberto Tonda;Martin Gjoreski;Marc Langheinrich", "authorids": "~Dario_Fenoglio1;~Gabriele_Dominici1;~Pietro_Barbiero1;~Alberto_Tonda1;~Martin_Gjoreski1;~Marc_Langheinrich1", "gender": "M;M;M;Not Specified;M;M", "homepage": ";;http://www.pietrobarbiero.eu/;;https://martingjoreski.github.io/;", "dblp": "358/9422;351/0657;238/7860;;157/4929;60/6209", "google_scholar": "CnL5RS0AAAAJ;https://scholar.google.com/citations?view_op=list_works;https://scholar.google.it/citations?user=4gbToQoAAAAJ;M1t5hwYAAAAJ;b6NoAQMAAAAJ;y6ztFvsAAAAJ", "orcid": "0009-0002-9112-9522;0009-0009-1955-0778;0000-0003-3155-2564;0000-0001-5895-4809;0000-0002-1220-7418;0000-0002-8834-7388", "linkedin": "dario-fenoglio-ab523a237/;gabriele-dominici-677bb6161/;;;martingjoreski;", "or_profile": "~Dario_Fenoglio1;~Gabriele_Dominici1;~Pietro_Barbiero1;~Alberto_Tonda1;~Martin_Gjoreski1;~Marc_Langheinrich1", "aff": "Universita della Svizzera Italiana;Universita della Svizzera Italiana;Universita della Svizzera Italiana;UMR 518 MIA-PS, INRAE, Universit\u00e9 Paris-Saclay;Universit\u00e0 della svizzera italiana;Universita della Svizzera Italiana", "aff_domain": "usi.ch;usi.ch;usi.ch;inrae.fr;usi.ch;usi.ch", "position": "PhD student;PhD student;Postdoc;Principal Researcher;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nfenoglio2024federated,\ntitle={Federated Behavioural Planes: Explaining the Evolution of Client Behaviour in Federated Learning},\nauthor={Dario Fenoglio and Gabriele Dominici and Pietro Barbiero and Alberto Tonda and Martin Gjoreski and Marc Langheinrich},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=5FHzrRGOKR}\n}", "github": "", "reviewers": "aREF;ULpn;eMJr", "pdf_size": 2224742, "rating": "5;6;7", "confidence": "2;3;4", "soundness": "3;3;3", "novelty": "2;2;3", "presentation": "3;2;3", "wc_summary": "74;69;248", "wc_strengths": "27;66;367", "wc_weaknesses": "77;279;565", "wc_questions": "50;44;602", "wc_limitations": "21;29;467", "wc_review": "249;487;2249", "wc_reply_reviewers": "0;34;0", "wc_reply_authors": "42;42;26", "reply_reviewers": "0;1;0", "reply_authors": "2;2;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 130.33333333333334, 83.22793334505482 ], "wc_strengths_avg": [ 153.33333333333334, 151.92176349102263 ], "wc_weaknesses_avg": [ 307.0, 200.2065599990836 ], "wc_questions_avg": [ 232.0, 261.6409753842085 ], "wc_limitations_avg": [ 172.33333333333334, 208.3863932431503 ], "wc_review_avg": [ 995.0, 892.019431776386 ], "wc_reply_reviewers_avg": [ 11.333333333333334, 16.027753706895076 ], "wc_reply_authors_avg": [ 36.666666666666664, 7.542472332656507 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:U327SlTt2wwJ:scholar.google.com/&scioq=Federated+Behavioural+Planes:+Explaining+the+Evolution+of+Client+Behaviour+in+Federated+Learning&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "usi.ch;usi.ch;usi.ch;inrae.fr;usi.ch;usi.ch", "author_num": 6, "aff_unique_index": "0;0;0;1;2;0", "aff_unique_norm": "Universita della Svizzera Italiana;INRAE;Universit\u00e0 della Svizzera italiana", "aff_unique_dep": ";UMR 518 MIA-PS;", "aff_unique_url": "https://www.usi.ch;https://www.inrae.fr;https://www.usi.ch", "aff_unique_abbr": "USI;;USI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;0", "aff_country_unique": "Switzerland;France" }, { "title": "Computerized Adaptive Testing via Collaborative Ranking", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96601", "id": "5Fl4zgXbsW", "proceeding": "", "pdf": "https://openreview.net/pdf?id=5Fl4zgXbsW", "openreview": "https://openreview.net/forum?id=5Fl4zgXbsW", "poster": "", "project": "", "author_site": "Zirui Liu, Yan Zhuang, Qi Liu, Jiatong Li, Yuren Zhang, Zhenya Huang, Jinze Wu, Shijin Wang", "tldr": "", "abstract": "As the deep integration of machine learning and intelligent education, Computerized Adaptive Testing (CAT) has received more and more research attention. Compared to traditional paper-and-pencil tests, CAT can deliver both personalized and interactive assessments by automatically adjusting testing questions according to the performance of students during the test process. Therefore, CAT has been recognized as an efficient testing methodology capable of accurately estimating a student\u2019s ability with a minimal number of questions, leading to its widespread adoption in mainstream selective exams such as the GMAT and GRE. However, just improving the accuracy of ability estimation is far from satisfactory in the real-world scenarios, since an accurate ranking of students is usually more important (e.g., in high-stakes exams). Considering the shortage of existing CAT solutions in student ranking, this paper emphasizes the importance of aligning test outcomes (student ranks) with the true underlying abilities of students. Along this line, different from the conventional independent testing paradigm among students, we propose a novel collaborative framework, Collaborative Computerized Adaptive Testing (CCAT), that leverages inter-student information to enhance student ranking. By using collaborative students as anchors to assist in ranking test-takers, CCAT can give both theoretical guarantees and experimental validation for ensuring ranking consistency.", "keywords": "Data Mining;Intelligence Education;Computerized Adaptive Testing", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "/attachment/a2aef8fa1f6d69ee7cb72eea9ba8c7ba6facaab1.zip", "author": "Zirui Liu;Yan Zhuang;Qi Liu;Jiatong Li;Yuren Zhang;Zhenya Huang;Jinze Wu;Shijin Wang", "authorids": "~Zirui_Liu5;~Yan_Zhuang4;~Qi_Liu3;~Jiatong_Li5;~Yuren_Zhang2;~Zhenya_Huang2;~Jinze_Wu1;~Shijin_Wang1", "gender": "M;M;M;M;M;M;;M", "homepage": ";http://home.ustc.edu.cn/~zykb/;http://staff.ustc.edu.cn/~qiliuql/;https://cslijt.github.io;;http://staff.ustc.edu.cn/~huangzhy/;;", "dblp": ";;95/2446-3;19/11348-2;270/6517;178/8690;;74/5750-1.html", "google_scholar": "https://scholar.google.com.hk/citations?hl=zh-CN;7MX_P5cAAAAJ;5EoHAFwAAAAJ;https://scholar.google.com/citations?hl=en;BJvX3rYAAAAJ;dVZuU90AAAAJ;;", "orcid": ";0000-0001-7351-377X;0000-0001-6956-5550;0009-0000-8877-6927;0000-0002-8758-906X;0000-0003-1661-0420;0000-0001-9957-5733;0000-0002-9202-7678", "linkedin": ";;;;;;;", "or_profile": "~Zirui_Liu5;~Yan_Zhuang4;~Qi_Liu3;~Jiatong_Li5;~Yuren_Zhang2;~Zhenya_Huang2;~Jinze_Wu1;~Shijin_Wang1", "aff": "University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;;State Key Laboratory of Cognitive Intelligence", "aff_domain": "mail.ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;;iflytek.com", "position": "MS student;PhD student;Full Professor;MS student;PhD student;Associate Professor;;Vice Dean", "bibtex": "@inproceedings{\nliu2024computerized,\ntitle={Computerized Adaptive Testing via Collaborative Ranking},\nauthor={Zirui Liu and Yan Zhuang and Qi Liu and Jiatong Li and Yuren Zhang and Zhenya Huang and Jinze Wu and Shijin Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=5Fl4zgXbsW}\n}", "github": "", "reviewers": "M6H4;74zz;cZqj;gvzk;piWg", "pdf_size": 14308502, "rating": "5;6;7;8;8", "confidence": "2;4;5;4;5", "soundness": "2;3;4;4;3", "novelty": "2;2;3;3;3", "presentation": "1;3;3;3;3", "wc_summary": "21;11;50;138;57", "wc_strengths": "28;30;105;112;99", "wc_weaknesses": "173;24;111;84;112", "wc_questions": "152;1;3;86;2", "wc_limitations": "40;1;1;7;42", "wc_review": "414;67;270;427;312", "wc_reply_reviewers": "91;0;16;28;9", "wc_reply_authors": "109;0;23;41;28", "reply_reviewers": "1;0;1;1;1", "reply_authors": "2;1;2;2;2", "rating_avg": [ 6.8, 1.16619037896906 ], "confidence_avg": [ 4.0, 1.0954451150103321 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.8000000000000002 ], "wc_summary_avg": [ 55.4, 44.7419266460442 ], "wc_strengths_avg": [ 74.8, 37.6265863452958 ], "wc_weaknesses_avg": [ 100.8, 48.21369100162318 ], "wc_questions_avg": [ 48.8, 61.00295074830397 ], "wc_limitations_avg": [ 18.2, 18.755265927200284 ], "wc_review_avg": [ 298.0, 129.95229894080364 ], "wc_reply_reviewers_avg": [ 28.8, 32.41851322932624 ], "wc_reply_authors_avg": [ 40.2, 36.864074652702186 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.8, 0.4000000000000001 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.7827803638564369, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7118560576031171610&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 0, "email": "mail.ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;;iflytek.com", "author_num": 8, "aff_unique_index": "0;0;0;0;0;0;1", "aff_unique_norm": "University of Science and Technology of China;State Key Laboratory of Cognitive Intelligence", "aff_unique_dep": ";", "aff_unique_url": "http://www.ustc.edu.cn;", "aff_unique_abbr": "USTC;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "VLM Agents Generate Their Own Memories: Distilling Experience into Embodied Programs of Thought", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96600", "id": "5G7MRfPngt", "proceeding": "", "pdf": "https://openreview.net/pdf?id=5G7MRfPngt", "openreview": "https://openreview.net/forum?id=5G7MRfPngt", "poster": "", "project": "", "author_site": "Gabriel Sarch, Lawrence Jang, Michael Tarr, William Cohen, Kenneth Marino, Katerina Fragkiadaki", "tldr": "", "abstract": "Large-scale generative language and vision-language models (LLMs and VLMs) excel in few-shot in-context learning for decision making and instruction following. However, they require high-quality exemplar demonstrations to be included in their context window. In this work, we ask: Can LLMs and VLMs generate their own examples from generic, sub-optimal demonstrations? We propose In-Context Abstraction Learning (ICAL), a method that builds a memory of multimodal experience from sub-optimal demonstrations and human feedback. Given a task demonstration that may contain inefficiencies or mistakes, a VLM abstracts the trajectory into a generalized program by correcting inefficient actions and annotating cognitive abstractions: causal relationships, object state changes, temporal subgoals, and task-relevant visual elements. These abstractions are iteratively improved and adapted through human feedback while the agent attempts to execute the trajectory in a similar environment. The resulting examples, when used as exemplars in the prompt, significantly improve decision-making in retrieval-augmented LLM and VLM agents. Moreover, as the agent's library of examples grows, it becomes more efficient, relying less on human feedback and requiring fewer environment interactions per demonstration. Our ICAL agent surpasses the state-of-the-art in dialogue-based instruction following in TEACh, multimodal web agents in VisualWebArena, and action anticipation in Ego4D. In TEACh, we achieve a 12.6% improvement in goal-condition success. In VisualWebArena, our task success rate improves over the SOTA from 14.3% to 22.7% using GPT4V. In Ego4D action forecasting, we improve over few-shot GPT-4V and remain competitive with supervised models. We show finetuning our retrieval-augmented in-context agent yields additional improvements. Our approach significantly reduces reliance on manual prompt engineering and consistently outperforms in-context learning from action plans that lack such abstractions.", "keywords": "Multimodal Agents;Multimodal Large Language Models;Visual Demonstrations;Human-In-The-Loop;Instruction Following;Autonomous Web Agents;Ego4D", "primary_area": "human-AI_interaction", "supplementary_material": "", "author": "Gabriel Herbert Sarch;Lawrence Jang;Michael J. Tarr;William W. Cohen;Kenneth Marino;Katerina Fragkiadaki", "authorids": "~Gabriel_Herbert_Sarch1;~Lawrence_Jang1;~Michael_J._Tarr1;~William_W._Cohen2;~Kenneth_Marino1;~Katerina_Fragkiadaki1", "gender": "M;M;M;M;F;M", "homepage": "https://gabesarch.me/;;https://wwcohen.github.io/;http://kennethmarino.com;https://www.cs.cmu.edu/~katef/;https://tarrlab.org", "dblp": "280/0151;;c/WWCohen.html;192/1969;21/8780;36/1880", "google_scholar": "9rYWAhsAAAAJ;;8ys-38kAAAAJ;;FWp7728AAAAJ;O8ALPlkAAAAJ", "orcid": ";;;;;0000-0003-4724-1744", "linkedin": ";lawrencejang;;;;michael-tarr-ab078046/", "or_profile": "~Gabriel_Herbert_Sarch1;~Lawrence_Jang1;~William_W._Cohen2;~Kenneth_Marino1;~Katerina_Fragkiadaki1;~Michael_Tarr1", "aff": "Microsoft;Carnegie Mellon University;Google DeepMind;Google;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "microsoft.com;cmu.edu;google.com;google.com;cmu.edu;cmu.edu", "position": "Intern;MS student;Principle Scientist;Researcher;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nsarch2024vlm,\ntitle={{VLM} Agents Generate Their Own Memories: Distilling Experience into Embodied Programs of Thought},\nauthor={Gabriel Herbert Sarch and Lawrence Jang and Michael J. Tarr and William W. Cohen and Kenneth Marino and Katerina Fragkiadaki},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=5G7MRfPngt}\n}", "github": "", "reviewers": "foCb;ih5w;vg3K;VRBk", "pdf_size": 12846088, "rating": "5;5;6;9", "confidence": "4;4;4;4", "soundness": "3;3;3;4", "novelty": "2;2;3;4", "presentation": "2;3;3;4", "wc_summary": "60;66;95;185", "wc_strengths": "79;33;108;129", "wc_weaknesses": "67;51;168;35", "wc_questions": "37;39;88;45", "wc_limitations": "1;21;65;12", "wc_review": "244;210;524;406", "wc_reply_reviewers": "87;98;22;0", "wc_reply_authors": "72;63;71;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 6.25, 1.6393596310755 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 101.5, 49.99249943741561 ], "wc_strengths_avg": [ 87.25, 36.00260407248342 ], "wc_weaknesses_avg": [ 80.25, 51.91037950159871 ], "wc_questions_avg": [ 52.25, 20.849160654568326 ], "wc_limitations_avg": [ 24.75, 24.293774922806872 ], "wc_review_avg": [ 346.0, 126.67280686871986 ], "wc_reply_reviewers_avg": [ 51.75, 41.66758332325022 ], "wc_reply_authors_avg": [ 51.5, 29.937434759845406 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3658181296343340244&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 0, "email": "microsoft.com;cmu.edu;google.com;google.com;cmu.edu;cmu.edu", "author_num": 6, "aff_unique_index": "0;1;2;2;1;1", "aff_unique_norm": "Microsoft;Carnegie Mellon University;Google", "aff_unique_dep": "Microsoft Corporation;;Google DeepMind", "aff_unique_url": "https://www.microsoft.com;https://www.cmu.edu;https://deepmind.com", "aff_unique_abbr": "Microsoft;CMU;DeepMind", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Grammar-Aligned Decoding", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96599", "id": "5G7ve8E1Lu", "proceeding": "", "pdf": "https://openreview.net/pdf?id=5G7ve8E1Lu", "openreview": "https://openreview.net/forum?id=5G7ve8E1Lu", "poster": "/media/PosterPDFs/NeurIPS%202024/96599.png?t=1733808794.381881", "project": "", "author_site": "Kanghee Park, Jiayu Wang, Taylor Berg-Kirkpatrick, Nadia Polikarpova, Loris D'Antoni", "tldr": "", "abstract": "Large Language Models (LLMs) struggle with reliably generating highly structured outputs, such as program code, mathematical formulas, or well-formed markup. Constrained decoding approaches mitigate this problem by greedily restricting what tokens an LLM can output at each step to guarantee that the output matches a given constraint. Specifically, in grammar-constrained decoding (GCD), the LLM's output must follow a given grammar. In this paper we demonstrate that GCD techniques (and in general constrained decoding techniques) can distort the LLM's distribution, leading to outputs that are grammatical but appear with likelihoods that are not proportional to the ones given by the LLM, and so ultimately are low-quality. We call the problem of aligning sampling with a grammar constraint, grammar-aligned decoding (GAD), and propose adaptive sampling with approximate expected futures (ASAp), a decoding algorithm that guarantees the output to be grammatical while provably producing outputs that match the conditional probability of the LLM's distribution conditioned on the given grammar constraint. Our algorithm uses prior sample outputs to soundly overapproximate the future grammaticality of different output prefixes. Our evaluation on code generation and structured NLP tasks shows how ASAp often produces outputs with higher likelihood (according to the LLM's distribution) than existing GCD techniques, while still enforcing the desired grammatical constraints.", "keywords": "Language Models;Decoding;Context-free Grammars", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/52dd85be9499330dc6652fc06dfa7095f0f70ddb.zip", "author": "Kanghee Park;Jiayu Wang;Taylor Berg-Kirkpatrick;Nadia Polikarpova;Loris D'Antoni", "authorids": "~Kanghee_Park1;~Jiayu_Wang6;~Taylor_Berg-Kirkpatrick1;~Nadia_Polikarpova1;~Loris_D'Antoni1", "gender": "M;F;M;F;M", "homepage": "https://cseweb.ucsd.edu/~kap022/;https://jiayuww.github.io/;https://cseweb.ucsd.edu/~tberg/;https://cseweb.ucsd.edu/~npolikarpova/;https://cseweb.ucsd.edu/~ldantoni/", "dblp": "129/4392;;22/8160;;85/770.html", "google_scholar": ";e4S1NrEAAAAJ;mN6_BKAAAAAJ;https://scholar.google.com.tw/citations?user=CxzUX0EAAAAJ;https://scholar.google.com.tw/citations?user=ao8EeJ8AAAAJ", "orcid": "0009-0005-7983-233X;;;;", "linkedin": ";;;;", "or_profile": "~Kanghee_Park1;~Jiayu_Wang6;~Taylor_Berg-Kirkpatrick1;~Nadia_Polikarpova1;~Loris_D'Antoni1", "aff": "Department of Computer Science, University of Wisconsin - Madison;University of Wisconsin - Madison;University of California, San Diego;University of California, San Diego;University of Wisconsin, Madison", "aff_domain": "cs.wisc.edu;wisc.edu;ucsd.edu;ucsd.edu;wisc.edu", "position": "PhD student;MS student;Associate Professor;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\npark2024grammaraligned,\ntitle={Grammar-Aligned Decoding},\nauthor={Kanghee Park and Jiayu Wang and Taylor Berg-Kirkpatrick and Nadia Polikarpova and Loris D'Antoni},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=5G7ve8E1Lu}\n}", "github": "", "reviewers": "gp9U;cHBv;CJH3", "pdf_size": 2392198, "rating": "5;6;7", "confidence": "4;4;3", "soundness": "2;4;4", "novelty": "3;3;3", "presentation": "3;3;4", "wc_summary": "124;69;56", "wc_strengths": "56;46;55", "wc_weaknesses": "291;157;30", "wc_questions": "38;31;52", "wc_limitations": "1;9;33", "wc_review": "510;312;226", "wc_reply_reviewers": "38;29;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;0", "reply_authors": "1;1;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.9428090415820634 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 83.0, 29.473151624260794 ], "wc_strengths_avg": [ 52.333333333333336, 4.4969125210773475 ], "wc_weaknesses_avg": [ 159.33333333333334, 106.56557709796454 ], "wc_questions_avg": [ 40.333333333333336, 8.73053390247253 ], "wc_limitations_avg": [ 14.333333333333334, 13.59738536958076 ], "wc_review_avg": [ 349.3333333333333, 118.90986315506194 ], "wc_reply_reviewers_avg": [ 22.333333333333332, 16.21384867602041 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10137564388696468511&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 4, "email": "cs.wisc.edu;wisc.edu;ucsd.edu;ucsd.edu;wisc.edu", "author_num": 5, "aff_unique_index": "0;0;1;1;2", "aff_unique_norm": "University of Wisconsin-Madison;University of California, San Diego;University of Wisconsin", "aff_unique_dep": "Department of Computer Science;;", "aff_unique_url": "https://www.wisc.edu;https://www.ucsd.edu;https://www.wisc.edu", "aff_unique_abbr": "UW-Madison;UCSD;UW", "aff_campus_unique_index": "0;0;1;1;0", "aff_campus_unique": "Madison;San Diego", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Minimizing UCB: a Better Local Search Strategy in Local Bayesian Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96598", "id": "5GCgNFZSyo", "proceeding": "", "pdf": "https://openreview.net/pdf?id=5GCgNFZSyo", "openreview": "https://openreview.net/forum?id=5GCgNFZSyo", "poster": "", "project": "", "author_site": "ZHEYI FAN, Wenyu Wang, Szu Hui Ng, Qingpei Hu", "tldr": "", "abstract": "Local Bayesian optimization is a promising practical approach to solve the high dimensional black-box function optimization problem. Among them is the approximated gradient class of methods, which implements a strategy similar to gradient descent. These methods have achieved good experimental results and theoretical guarantees. However, given the distributional properties of the Gaussian processes applied on these methods, there may be potential to further exploit the information of the Gaussian processes to facilitate the BO search. In this work, we develop the relationship between the steps of the gradient descent method and one that minimizes the Upper Confidence Bound (UCB), and show that the latter can be a better strategy than direct gradient descent when a Gaussian process is applied as a surrogate. Through this insight, we propose a new local Bayesian optimization algorithm, MinUCB, which replaces the gradient descent step with minimizing UCB in GIBO. We further show that MinUCB maintains a similar convergence rate with GIBO. We then improve the acquisition function of MinUCB further through a look ahead strategy, and obtain a more efficient algorithm LA-MinUCB. We apply our algorithms on different synthetic and real-world functions, and the results show the effectiveness of our method. Our algorithms also illustrate improvements on local search strategies from an upper bound perspective in Bayesian optimization, and provides a new direction for future algorithm design.", "keywords": "Bayesian Optimization;Local Optimization;High dimensional;Minimizing upper confidence Bound", "primary_area": "optimization", "supplementary_material": "", "author": "Zheyi Fan;Wenyu Wang;Szu Hui Ng;Qingpei Hu", "authorids": "~Zheyi_Fan2;~Wenyu_Wang4;~Szu_Hui_Ng1;~Qingpei_Hu1", "gender": "M;;M;M", "homepage": ";https://cde.nus.edu.sg/isem/staff/ng-szu-hui/;https://people.ucas.ac.cn/~0027345;https://github.com/chinafzy1", "dblp": ";;;160/0645", "google_scholar": "3gj8La8AAAAJ;;8BcgTB0AAAAJ;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Wenyu_Wang4;~Szu_Hui_Ng1;~Qingpei_Hu1;~Fan_Zheyi1", "aff": "National University of Singapore;National University of Singapore;Academy of Mathematics and Systems Science, Chinese Academy of Sciences;Academy of Mathematics and Systems Science, Chinese Academy of Sciences, Chinese Academy of Sciences", "aff_domain": "nus.edu.sg;nus.edu.sg;amss.ac.cn;amss.ac.cn", "position": "Postdoc;Associate Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\nfan2024minimizing,\ntitle={Minimizing {UCB}: a Better Local Search Strategy in Local Bayesian Optimization},\nauthor={Zheyi Fan and Wenyu Wang and Szu Hui Ng and Qingpei Hu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=5GCgNFZSyo}\n}", "github": "", "reviewers": "csK6;Zw3g;TKiN;9Krt", "pdf_size": 956180, "rating": "3;5;7;7", "confidence": "4;4;3;4", "soundness": "3;2;3;3", "novelty": "2;2;3;3", "presentation": "3;2;3;4", "wc_summary": "28;74;102;145", "wc_strengths": "55;205;84;56", "wc_weaknesses": "71;534;177;491", "wc_questions": "68;18;102;94", "wc_limitations": "1;13;14;75", "wc_review": "223;844;479;861", "wc_reply_reviewers": "226;138;11;0", "wc_reply_authors": "780;0;0;51", "reply_reviewers": "2;1;1;0", "reply_authors": "3;1;1;2", "rating_avg": [ 5.5, 1.6583123951777 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 87.25, 42.540421953713626 ], "wc_strengths_avg": [ 100.0, 61.72924752497798 ], "wc_weaknesses_avg": [ 318.25, 198.4154416873848 ], "wc_questions_avg": [ 70.5, 32.81386901905961 ], "wc_limitations_avg": [ 25.75, 28.89095879336648 ], "wc_review_avg": [ 601.75, 266.65274703254045 ], "wc_reply_reviewers_avg": [ 93.75, 93.65461814560989 ], "wc_reply_authors_avg": [ 207.75, 331.044086943114 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11533546004992362683&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "nus.edu.sg;nus.edu.sg;amss.ac.cn;amss.ac.cn", "author_num": 4, "aff_unique_index": "0;0;1;1", "aff_unique_norm": "National University of Singapore;Chinese Academy of Sciences", "aff_unique_dep": ";Academy of Mathematics and Systems Science", "aff_unique_url": "https://www.nus.edu.sg;http://www.amss.cas.cn", "aff_unique_abbr": "NUS;AMSS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1", "aff_country_unique": "Singapore;China" }, { "title": "Towards Exact Gradient-based Training on Analog In-memory Computing", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96597", "id": "5GwbKlBIIf", "proceeding": "", "pdf": "https://openreview.net/pdf?id=5GwbKlBIIf", "openreview": "https://openreview.net/forum?id=5GwbKlBIIf", "poster": "/media/PosterPDFs/NeurIPS%202024/96597.png?t=1731713210.0773137", "project": "", "author_site": "Zhaoxian Wu, Tayfun Gokmen, Malte Rasch, Tianyi Chen", "tldr": "", "abstract": "Given the high economic and environmental costs of using large vision or language models, analog in-memory accelerators present a promising solution for energy-efficient AI. While inference on analog accelerators has been studied recently, the training perspective is underexplored. Recent studies have shown that the \"workhorse\" of digital AI training - stochastic gradient descent (SGD) algorithm converges inexactly when applied to model training on non-ideal devices. This paper puts forth a theoretical foundation for gradient-based training on analog devices. We begin by characterizing the non-convergent issue of SGD, which is caused by the asymmetric updates on the analog devices. We then provide a lower bound of the asymptotic error to show that there is a fundamental performance limit of SGD-based analog training rather than an artifact of our analysis. \nTo address this issue, we study a heuristic analog algorithm called Tiki-Taka that has recently exhibited superior empirical performance compared to SGD. We rigorously show its ability to converge to a critical point exactly and hence eliminate the asymptotic error. The simulations verify the correctness of the analyses.", "keywords": "Analog AI; in-memory computing; stochastic gradient descent; stochastic optimization", "primary_area": "optimization", "supplementary_material": "", "author": "Zhaoxian Wu;Tayfun Gokmen;Malte J. Rasch;Tianyi Chen", "authorids": "~Zhaoxian_Wu1;~Tayfun_Gokmen1;~Malte_J._Rasch1;~Tianyi_Chen5", "gender": "M;M;;M", "homepage": "https://zhaoxian-wu.github.io/;;;https://chentianyi1991.github.io/", "dblp": "255/6466;;68/5535;", "google_scholar": "Hyk4lXJy4CEC;3EGKIV0AAAAJ;Ycos9pAAAAAJ;kFwvv38AAAAJ", "orcid": ";;0000-0002-7988-4624;", "linkedin": ";tayfun-gokmen-79aa6356;;", "or_profile": "~Zhaoxian_Wu1;~Tayfun_Gokmen1;~Malte_J._Rasch1;~Tianyi_Chen5", "aff": "Rensselaer Polytechnic Institute;IBM, International Business Machines;Sony Europe Ltd.;Rensselaer Polytechnic Institute", "aff_domain": "rpi.edu;us.ibm.com;sony.com;rpi.edu", "position": "PhD student;Researcher;Principal Researcher;Assistant Professor", "bibtex": "@inproceedings{\nwu2024towards,\ntitle={Towards Exact Gradient-based Training on Analog In-memory Computing},\nauthor={Zhaoxian Wu and Tayfun Gokmen and Malte J. Rasch and Tianyi Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=5GwbKlBIIf}\n}", "github": "", "reviewers": "G3ws;gG9b;dVXd", "pdf_size": 1004216, "rating": "5;6;7", "confidence": "4;3;3", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "57;85;94", "wc_strengths": "69;45;80", "wc_weaknesses": "47;29;312", "wc_questions": "2;37;254", "wc_limitations": "1;1;130", "wc_review": "176;197;870", "wc_reply_reviewers": "0;36;9", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 78.66666666666667, 15.755069730795297 ], "wc_strengths_avg": [ 64.66666666666667, 14.613540144521982 ], "wc_weaknesses_avg": [ 129.33333333333334, 129.37370503914448 ], "wc_questions_avg": [ 97.66666666666667, 111.46399518329774 ], "wc_limitations_avg": [ 44.0, 60.81118318204309 ], "wc_review_avg": [ 414.3333333333333, 322.3190276039495 ], "wc_reply_reviewers_avg": [ 15.0, 15.297058540778355 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:p7xs5ZX6l0EJ:scholar.google.com/&scioq=Towards+Exact+Gradient-based+Training+on+Analog+In-memory+Computing&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "rpi.edu;us.ibm.com;sony.com;rpi.edu", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Rensselaer Polytechnic Institute;International Business Machines;Sony Europe", "aff_unique_dep": ";;", "aff_unique_url": "https://www.rpi.edu;https://www.ibm.com;https://www.sony.eu", "aff_unique_abbr": "RPI;IBM;Sony Europe", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Task-recency bias strikes back: Adapting covariances in Exemplar-Free Class Incremental Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96596", "id": "5H4l37IsZ8", "proceeding": "", "pdf": "https://openreview.net/pdf?id=5H4l37IsZ8", "openreview": "https://openreview.net/forum?id=5H4l37IsZ8", "poster": "", "project": "", "author_site": "Grzegorz Rype\u015b\u0107, Sebastian Cygert, Tomasz Trzcinski, Bart\u0142omiej Twardowski", "tldr": "", "abstract": "Exemplar-Free Class Incremental Learning (EFCIL) tackles the problem of training a model on a sequence of tasks without access to past data. Existing state-of-the-art methods represent classes as Gaussian distributions in the feature extractor's latent space, enabling Bayes classification or training the classifier by replaying pseudo features. However, we identify two critical issues that compromise their efficacy when the feature extractor is updated on incremental tasks. First, they do not consider that classes' covariance matrices change and must be adapted after each task. Second, they are susceptible to a task-recency bias caused by dimensionality collapse occurring during training. In this work, we propose AdaGauss - a novel method that adapts covariance matrices from task to task and mitigates the task-recency bias owing to the additional anti-collapse loss function. AdaGauss yields state-of-the-art results on popular EFCIL benchmarks and datasets when training from scratch or starting from a pre-trained backbone.", "keywords": "continual learning;exemplar free;exemplar free class incremental learning;class incremental learning;exemplar-free", "primary_area": "online_learning", "supplementary_material": "/attachment/74f7618c9a5f15d267e8d71a8eab6552def4649f.zip", "author": "Grzegorz Rype\u015b\u0107;Sebastian Cygert;Tomasz Trzcinski;Bart\u0142omiej Twardowski", "authorids": "~Grzegorz_Rype\u015b\u01071;~Sebastian_Cygert1;~Tomasz_Trzcinski2;~Bart\u0142omiej_Twardowski1", "gender": "M;M;M;M", "homepage": "https://github.com/grypesc;https://pg.edu.pl/en/p/sebastian-cygert-1128802;https://cvlab.ii.pw.edu.pl/ttrzcins/;", "dblp": "293/9780;138/3693;05/11408;156/6628", "google_scholar": "XE3QOZ4AAAAJ;https://scholar.google.pl/citations?user=wLH9PP8AAAAJ;https://scholar.google.pl/citations?user=bJMRBFoAAAAJ;https://scholar.google.pl/citations?user=8yywECgAAAAJ", "orcid": "0000-0001-8170-3282;;;0000-0003-2117-8679", "linkedin": "grypesc/;sebastiancygert/;;bartlomiejtwardowski/", "or_profile": "~Grzegorz_Rype\u015b\u01071;~Sebastian_Cygert1;~Tomasz_Trzcinski2;~Bart\u0142omiej_Twardowski1", "aff": "Warsaw University of Technology;IDEAS NCBR;Warsaw University of Technology;Computer Vision Center, Universitat Aut\u00f2noma de Barcelona", "aff_domain": "pw.edu.pl;ideas-ncbr.pl;pw.edu.pl;cvc.uab.es", "position": "PhD student;Postdoc;Full Professor;Postdoc", "bibtex": "@inproceedings{\nrype{\\'s}{\\'c}2024taskrecency,\ntitle={Task-recency bias strikes back: Adapting covariances in Exemplar-Free Class Incremental Learning},\nauthor={Grzegorz Rype{\\'s}{\\'c} and Sebastian Cygert and Tomasz Trzcinski and Bart{\\l}omiej Twardowski},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=5H4l37IsZ8}\n}", "github": "", "reviewers": "VMJn;6bmT;5j6M;xJqZ", "pdf_size": 3302495, "rating": "5;6;6;6", "confidence": "4;5;5;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;2", "wc_summary": "40;59;111;51", "wc_strengths": "58;33;54;33", "wc_weaknesses": "196;162;115;223", "wc_questions": "3;2;214;26", "wc_limitations": "1;1;14;1", "wc_review": "298;257;508;334", "wc_reply_reviewers": "16;17;70;52", "wc_reply_authors": "41;44;47;15", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 65.25, 27.261465477849864 ], "wc_strengths_avg": [ 44.5, 11.586630226256467 ], "wc_weaknesses_avg": [ 174.0, 40.34228550788862 ], "wc_questions_avg": [ 61.25, 88.7112591501214 ], "wc_limitations_avg": [ 4.25, 5.629165124598851 ], "wc_review_avg": [ 349.25, 95.61740165890308 ], "wc_reply_reviewers_avg": [ 38.75, 23.14492384951828 ], "wc_reply_authors_avg": [ 36.75, 12.735285626950029 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4083555662224467075&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "pw.edu.pl;ideas-ncbr.pl;pw.edu.pl;cvc.uab.es", "author_num": 4, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Warsaw University of Technology;Institute for Development, Economic Analysis, and Simulation (IDEAS);Universitat Aut\u00f2noma de Barcelona", "aff_unique_dep": ";;Computer Vision Center", "aff_unique_url": "https://www.pw.edu.pl;https://www.ideas-ncbr.gov.pl;https://www.uab.cat", "aff_unique_abbr": "WUT;IDEAS;UAB", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "Poland;Spain" }, { "title": "FIDE: Frequency-Inflated Conditional Diffusion Model for Extreme-Aware Time Series Generation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96595", "id": "5HQhYiGnYb", "proceeding": "", "pdf": "https://openreview.net/pdf?id=5HQhYiGnYb", "openreview": "https://openreview.net/forum?id=5HQhYiGnYb", "poster": "/media/PosterPDFs/NeurIPS%202024/96595.png?t=1731747295.022372", "project": "", "author_site": "Asadullah Hill Galib, Pang-Ning Tan, Lifeng Luo", "tldr": "", "abstract": "Time series generation is a crucial aspect of data analysis, playing a pivotal role in learning the temporal patterns and their underlying dynamics across diverse fields. Conventional time series generation methods often struggle to capture extreme values adequately, diminishing their value in critical applications such as scenario planning and management for healthcare, finance, climate change adaptation, and beyond. In this paper, we introduce a conditional diffusion model called FIDE to address the challenge of preserving the distribution of extreme values in generative modeling for time series. FIDE employs a novel high-frequency inflation strategy in the frequency domain, preventing premature fade-out of the extreme value. It also extends traditional diffusion-based model, enabling the generation of samples conditioned on the block maxima, thereby enhancing the model's capacity to capture extreme events. Additionally, the FIDE framework incorporates the Generalized Extreme Value (GEV) distribution within its generative modeling framework, ensuring fidelity to both block maxima and overall data distribution. Experimental results on real-world and synthetic data showcase the efficacy of FIDE over baseline methods, highlighting its potential in advancing Generative AI for time series analysis, specifically in accurately modeling extreme events.", "keywords": "Diffusion model;time series;extreme values", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "", "author": "Asadullah Hill Galib;Pang-Ning Tan;Lifeng Luo", "authorids": "~Asadullah_Hill_Galib1;~Pang-Ning_Tan1;~Lifeng_Luo1", "gender": "M;M;M", "homepage": "https://galib19.github.io/;http://www.cse.msu.edu/~ptan;https://drought.geo.msu.edu", "dblp": ";t/PangNingTan.html;", "google_scholar": "zB3VifYAAAAJ;https://scholar.google.com.tw/citations?user=xNs4D2QAAAAJ;H20gi2IAAAAJ", "orcid": ";;0000-0002-2829-7104", "linkedin": "galib19;;", "or_profile": "~Asadullah_Hill_Galib1;~Pang-Ning_Tan1;~Lifeng_Luo1", "aff": "Michigan State University;Michigan State University;Michigan State University", "aff_domain": "msu.edu;msu.edu;msu.edu", "position": "PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\ngalib2024fide,\ntitle={{FIDE}: Frequency-Inflated Conditional Diffusion Model for Extreme-Aware Time Series Generation},\nauthor={Asadullah Hill Galib and Pang-Ning Tan and Lifeng Luo},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=5HQhYiGnYb}\n}", "github": "", "reviewers": "4b7m;ggGX;8C1A;Nq9n", "pdf_size": 1593420, "rating": "3;5;7;7", "confidence": "5;4;4;3", "soundness": "2;2;3;3", "novelty": "2;3;3;3", "presentation": "2;2;4;3", "wc_summary": "66;86;172;102", "wc_strengths": "38;62;74;32", "wc_weaknesses": "30;132;174;77", "wc_questions": "164;6;76;66", "wc_limitations": "1;1;42;15", "wc_review": "299;287;538;292", "wc_reply_reviewers": "250;0;0;0", "wc_reply_authors": "641;0;0;0", "reply_reviewers": "1;0;0;0", "reply_authors": "2;1;1;1", "rating_avg": [ 5.5, 1.6583123951777 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 106.5, 39.909272105614754 ], "wc_strengths_avg": [ 51.5, 17.168284713389397 ], "wc_weaknesses_avg": [ 103.25, 54.51318647813573 ], "wc_questions_avg": [ 78.0, 56.40921910468182 ], "wc_limitations_avg": [ 14.75, 16.7388022271607 ], "wc_review_avg": [ 354.0, 106.31791946798056 ], "wc_reply_reviewers_avg": [ 62.5, 108.25317547305482 ], "wc_reply_authors_avg": [ 160.25, 277.5611419129126 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8528028654224417, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6104809872863819469&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "email": "msu.edu;msu.edu;msu.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Michigan State University", "aff_unique_dep": "", "aff_unique_url": "https://www.msu.edu", "aff_unique_abbr": "MSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "MG-Net: Learn to Customize QAOA with Circuit Depth Awareness", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96594", "id": "5Hdg5IK18B", "proceeding": "", "pdf": "https://openreview.net/pdf?id=5Hdg5IK18B", "openreview": "https://openreview.net/forum?id=5Hdg5IK18B", "poster": "/media/PosterPDFs/NeurIPS%202024/96594.png?t=1730451884.5217755", "project": "", "author_site": "Yang Qian, Xinbiao Wang, Yuxuan Du, Yong Luo, Dacheng Tao", "tldr": "", "abstract": "Quantum Approximate Optimization Algorithm (QAOA) and its variants exhibit immense potential in tackling combinatorial optimization challenges. However, their practical realization confronts a dilemma: the requisite circuit depth for satisfactory performance is problem-specific and often exceeds the maximum capability of current quantum devices. To address this dilemma, here we first analyze the convergence behavior of QAOA, uncovering the origins of this dilemma and elucidating the intricate relationship between the employed mixer Hamiltonian, the specific problem at hand, and the permissible maximum circuit depth. Harnessing this understanding, we introduce the Mixer Generator Network (MG-Net), a unified deep learning framework adept at dynamically formulating optimal mixer Hamiltonians tailored to distinct tasks and circuit depths. Systematic simulations, encompassing Ising models and weighted Max-Cut instances with up to 64 qubits, substantiate our theoretical findings, highlighting MG-Net's superior performance in terms of both approximation ratio and efficiency.", "keywords": "Quantum approximation optimization algorithm;quantum machine learning", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "/attachment/343430e2ea0376cdf8ee0a8400579a155bbbcf4a.zip", "author": "Yang Qian;Xinbiao Wang;Yuxuan Du;Yong Luo;Dacheng Tao", "authorids": "~Yang_Qian1;~Xinbiao_Wang1;~Yuxuan_Du2;~Yong_Luo2;~Dacheng_Tao1", "gender": "M;M;M;M;", "homepage": "https://qqqyang.github.io/;;https://github.com/yuxuan-du/Yuxuan-Du.github.io;;", "dblp": ";;;57/5272-2.html;", "google_scholar": "8Bt-CfgAAAAJ;YR3JifsAAAAJ;https://scholar.google.com.au/citations?user=50sFkzIAAAAJ;zb1oVGIAAAAJ;", "orcid": ";0000-0002-9898-820X;0000-0002-1193-9756;;", "linkedin": ";;;;", "or_profile": "~Yang_Qian1;~Xinbiao_Wang1;~Yuxuan_Du2;~Yong_Luo2;~Dacheng_Tao1", "aff": "University of Sydney;Wuhan University;JD.com;Wuhan University;", "aff_domain": "sydney.edu.au;whu.edu.cn;jd.com;whu.edu.cn;", "position": "PhD student;PhD student;Researcher;Professor;", "bibtex": "@inproceedings{\nqian2024mgnet,\ntitle={{MG}-Net: Learn to Customize {QAOA} with Circuit Depth Awareness},\nauthor={Yang Qian and Xinbiao Wang and Yuxuan Du and Yong Luo and Dacheng Tao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=5Hdg5IK18B}\n}", "github": "", "reviewers": "xXN5;6Eat;aG1x;DGPC", "pdf_size": 1703541, "rating": "2;6;6;7", "confidence": "5;3;3;3", "soundness": "3;3;3;4", "novelty": "2;2;2;3", "presentation": "3;3;3;4", "wc_summary": "76;34;100;82", "wc_strengths": "9;44;78;74", "wc_weaknesses": "342;85;46;161", "wc_questions": "2;23;150;70", "wc_limitations": "10;13;38;6", "wc_review": "439;199;412;393", "wc_reply_reviewers": "34;15;8;23", "wc_reply_authors": "962;0;39;176", "reply_reviewers": "2;1;1;1", "reply_authors": "4;1;2;3", "rating_avg": [ 5.25, 1.920286436967152 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 73.0, 24.186773244895647 ], "wc_strengths_avg": [ 51.25, 27.707174161216802 ], "wc_weaknesses_avg": [ 158.5, 113.72884418651233 ], "wc_questions_avg": [ 61.25, 56.847933119859334 ], "wc_limitations_avg": [ 16.75, 12.517487767120047 ], "wc_review_avg": [ 360.75, 94.80605202200965 ], "wc_reply_reviewers_avg": [ 20.0, 9.669539802906858 ], "wc_reply_authors_avg": [ 294.25, 391.0270930511082 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.9771398364036774, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:TRrahwCXDrcJ:scholar.google.com/&scioq=MG-Net:+Learn+to+Customize+QAOA+with+Circuit+Depth+Awareness&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": "sydney.edu.au;whu.edu.cn;jd.com;whu.edu.cn;", "author_num": 5, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "University of Sydney;Wuhan University;JD.com", "aff_unique_dep": ";;", "aff_unique_url": "https://www.sydney.edu.au;http://www.whu.edu.cn/;https://www.jd.com", "aff_unique_abbr": "USYD;WHU;JD", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "Australia;China" }, { "title": "DARG: Dynamic Evaluation of Large Language Models via Adaptive Reasoning Graph", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96593", "id": "5IFeCNA7zR", "proceeding": "", "pdf": "https://openreview.net/pdf?id=5IFeCNA7zR", "openreview": "https://openreview.net/forum?id=5IFeCNA7zR", "poster": "/media/PosterPDFs/NeurIPS%202024/96593.png?t=1729966598.683266", "project": "", "author_site": "Zhehao Zhang, Jiaao Chen, Diyi Yang", "tldr": "", "abstract": "The current paradigm of evaluating Large Language Models (LLMs) through static benchmarks comes with significant limitations, such as vulnerability to data contamination and a lack of adaptability to the evolving capabilities of LLMs. Therefore, evaluation methods that can adapt and generate evaluation data with controlled complexity are urgently needed. In this work, we introduce Dynamic Evaluation of LLMs via Adaptive Reasoning Graph Evolvement (DARG) to dynamically extend current benchmarks with controlled complexity and diversity. Specifically, we first extract the reasoning graphs of data points in current benchmarks and then perturb the reasoning graphs to generate novel testing data. Such newly generated test samples can have different levels of complexity while maintaining linguistic diversity similar to the original benchmarks. We further use a code-augmented LLM to ensure the label correctness of newly generated data. We apply our DARG framework to diverse reasoning tasks in four domains with 15 state-of-the-art LLMs. Experimental results show that almost all LLMs experience a performance decrease with increased complexity and certain LLMs exhibit significant drops. Additionally, we find that LLMs exhibit more biases when being evaluated via the data generated by DARG with higher complexity levels. These observations provide useful insights into how to dynamically and adaptively evaluate LLMs.", "keywords": "Dynamic Evaluation;Large Language Model", "primary_area": "evaluation", "supplementary_material": "/attachment/da16215da1f2c45451aff17663dea02ec0caadcb.zip", "author": "Zhehao Zhang;Jiaao Chen;Diyi Yang", "authorids": "~Zhehao_Zhang1;~Jiaao_Chen2;~Diyi_Yang2", "gender": "M;M;F", "homepage": "https://zzh-sjtu.github.io/zhehaozhang.github.io/;https://cs.stanford.edu/people/jiaaoc/;https://cs.stanford.edu/~diyiy/", "dblp": "223/7963.html;230/3663;70/11145", "google_scholar": "QG-BAGwAAAAJ;Pi9IVvUAAAAJ;j9jhYqQAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Zhehao_Zhang1;~Jiaao_Chen2;~Diyi_Yang2", "aff": "Dartmouth College;Georgia Institute of Technology;Stanford University", "aff_domain": "dartmouth.edu;gatech.edu;stanford.edu", "position": "MS student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nzhang2024darg,\ntitle={{DARG}: Dynamic Evaluation of Large Language Models via Adaptive Reasoning Graph},\nauthor={Zhehao Zhang and Jiaao Chen and Diyi Yang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=5IFeCNA7zR}\n}", "github": "", "reviewers": "MmK5;xKyF;SsHY;avM6", "pdf_size": 7019592, "rating": "5;6;6;6", "confidence": "3;4;2;5", "soundness": "3;3;3;3", "novelty": "3;2;3;2", "presentation": "3;3;3;3", "wc_summary": "44;48;73;118", "wc_strengths": "50;147;17;42", "wc_weaknesses": "145;126;46;98", "wc_questions": "3;50;189;48", "wc_limitations": "3;7;4;1", "wc_review": "245;378;329;307", "wc_reply_reviewers": "0;20;24;16", "wc_reply_authors": "0;43;40;32", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 70.75, 29.45653577731095 ], "wc_strengths_avg": [ 64.0, 49.441885077330944 ], "wc_weaknesses_avg": [ 103.75, 37.298625980054545 ], "wc_questions_avg": [ 72.5, 69.83731094479512 ], "wc_limitations_avg": [ 3.75, 2.165063509461097 ], "wc_review_avg": [ 314.75, 47.77224612680463 ], "wc_reply_reviewers_avg": [ 15.0, 9.1104335791443 ], "wc_reply_authors_avg": [ 28.75, 17.07886120325357 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.2581988897471611, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11137298566044790845&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "dartmouth.edu;gatech.edu;stanford.edu", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Dartmouth College;Georgia Institute of Technology;Stanford University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.dartmouth.edu;https://www.gatech.edu;https://www.stanford.edu", "aff_unique_abbr": "Dartmouth;Georgia Tech;Stanford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "e-COP : Episodic Constrained Optimization of Policies", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96592", "id": "5IRtAcVbiC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=5IRtAcVbiC", "openreview": "https://openreview.net/forum?id=5IRtAcVbiC", "poster": "/media/PosterPDFs/NeurIPS%202024/96592.png?t=1730308618.7681184", "project": "", "author_site": "Akhil Agnihotri, Rahul Jain, Deepak Ramachandran, Sahil Singla", "tldr": "", "abstract": "In this paper, we present the e-COP algorithm, the first policy optimization algorithm for constrained Reinforcement Learning (RL) in episodic (finite horizon) settings. Such formulations are applicable when there are separate sets of optimization criteria and constraints on a system's behavior. We approach this problem by first establishing a policy difference lemma for the episodic setting, which provides the theoretical foundation for the algorithm. Then, we propose to combine a set of established and novel solution ideas to yield the e-COP algorithm that is easy to implement and numerically stable, and provide a theoretical guarantee on optimality under certain scaling assumptions. Through extensive empirical analysis using benchmarks in the Safety Gym suite, we show that our algorithm has similar or better performance than SoTA (non-episodic) algorithms adapted for the episodic setting. The scalability of the algorithm opens the door to its application in safety-constrained Reinforcement Learning from Human Feedback for Large Language or Diffusion Models.", "keywords": "reinforcement learning;policy optimization;constrained MDPs", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/87af599b3c49e371bf86ffcc9b1fa06b568a09fb.zip", "author": "Akhil Agnihotri;Rahul Jain;Deepak Ramachandran;Sahil Singla", "authorids": "~Akhil_Agnihotri1;~Rahul_Jain1;~Deepak_Ramachandran2;~Sahil_Singla1", "gender": "M;M;M;M", "homepage": "http://agnihotriakhil.github.io/;http://www.rahuljain.net;;https://singlasahil14.github.io/", "dblp": "248/8264;42/4430-2.html;80/703;55/8911-2", "google_scholar": "Kf1o27gAAAAJ;NIj18UQAAAAJ;WbM9EAIAAAAJ;jjjbOI4AAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Akhil_Agnihotri1;~Rahul_Jain1;~Deepak_Ramachandran2;~Sahil_Singla1", "aff": "University of Southern California;University of Southern California;Google;Google", "aff_domain": "usc.edu;usc.edu;google.com;google.com", "position": "PhD student;Professor;Staff Researcher;Researcher", "bibtex": "@inproceedings{\nagnihotri2024ecop,\ntitle={e-{COP} : Episodic Constrained Optimization of Policies},\nauthor={Akhil Agnihotri and Rahul Jain and Deepak Ramachandran and Sahil Singla},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=5IRtAcVbiC}\n}", "github": "", "reviewers": "4GCx;p2dY;kuft", "pdf_size": 3148290, "rating": "4;5;7", "confidence": "2;5;4", "soundness": "3;3;3", "novelty": "2;2;3", "presentation": "2;2;3", "wc_summary": "46;90;92", "wc_strengths": "32;20;58", "wc_weaknesses": "211;77;47", "wc_questions": "124;308;28", "wc_limitations": "39;1;7", "wc_review": "452;496;232", "wc_reply_reviewers": "340;47;34", "wc_reply_authors": "545;0;0", "reply_reviewers": "1;1;1", "reply_authors": "2;1;1", "rating_avg": [ 5.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 1.247219128924647 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 76.0, 21.228911104120876 ], "wc_strengths_avg": [ 36.666666666666664, 15.86050300449376 ], "wc_weaknesses_avg": [ 111.66666666666667, 71.29905718560067 ], "wc_questions_avg": [ 153.33333333333334, 116.17611152422381 ], "wc_limitations_avg": [ 15.666666666666666, 16.679994670929073 ], "wc_review_avg": [ 393.3333333333333, 115.4854488188399 ], "wc_reply_reviewers_avg": [ 140.33333333333334, 141.28536921973046 ], "wc_reply_authors_avg": [ 181.66666666666666, 256.91546383111233 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.49999999999999994, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:zamySae9OJUJ:scholar.google.com/&scioq=e-COP+:+Episodic+Constrained+Optimization+of+Policies&hl=en&as_sdt=0,48", "gs_version_total": 3, "email": "usc.edu;usc.edu;google.com;google.com", "author_num": 4, "aff_unique_index": "0;0;1;1", "aff_unique_norm": "University of Southern California;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.usc.edu;https://www.google.com", "aff_unique_abbr": "USC;Google", "aff_campus_unique_index": "0;0;1;1", "aff_campus_unique": "Los Angeles;Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "AED: Adaptable Error Detection for Few-shot Imitation Policy", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96591", "id": "5K3VeoBnqc", "proceeding": "", "pdf": "https://openreview.net/pdf?id=5K3VeoBnqc", "openreview": "https://openreview.net/forum?id=5K3VeoBnqc", "poster": "/media/PosterPDFs/NeurIPS%202024/96591.png?t=1729437080.9090393", "project": "", "author_site": "Jia-Fong Yeh, Kuo-Han Hung, Pang-Chi Lo, Chi Ming Chung, Tsung-Han Wu, Hung-Ting Su, Yi-Ting Chen, Winston Hsu", "tldr": "", "abstract": "We introduce a new task called Adaptable Error Detection (AED), which aims to identify behavior errors in few-shot imitation (FSI) policies based on visual observations in novel environments. The potential to cause serious damage to surrounding areas limits the application of FSI policies in real-world scenarios. Thus, a robust system is necessary to notify operators when FSI policies are inconsistent with the intent of demonstrations. This task introduces three challenges: (1) detecting behavior errors in novel environments, (2) identifying behavior errors that occur without revealing notable changes, and (3) lacking complete temporal information of the rollout due to the necessity of online detection. However, the existing benchmarks cannot support the development of AED because their tasks do not present all these challenges. To this end, we develop a cross-domain AED benchmark, consisting of 322 base and 153 novel environments. Additionally, we propose Pattern Observer (PrObe) to address these challenges. PrObe is equipped with a powerful pattern extractor and guided by novel learning objectives to parse discernible patterns in the policy feature representations of normal or error states. Through our comprehensive evaluation, PrObe demonstrates superior capability to detect errors arising from a wide range of FSI policies, consistently surpassing strong baselines. Moreover, we conduct detailed ablations and a pilot study on error correction to validate the effectiveness of the proposed architecture design and the practicality of the AED task, respectively. The AED project page can be found at https://aed-neurips.github.io/.", "keywords": "adaptable error detection;few-shot imitation;policy learning", "primary_area": "machine_vision", "supplementary_material": "/attachment/c50c3628fabe8591289c87f37fb586656b87e322.zip", "author": "Jia-Fong Yeh;Kuo-Han Hung;Pang-Chi Lo;Chi Ming Chung;Tsung-Han Wu;Hung-Ting Su;Yi-Ting Chen;Winston H. Hsu", "authorids": "~Jia-Fong_Yeh1;~Kuo-Han_Hung1;~Pang-Chi_Lo1;~Chi_Ming_Chung1;~Tsung-Han_Wu1;~Hung-Ting_Su1;~Yi-Ting_Chen2;~Winston_H._Hsu2", "gender": ";M;M;M;M;M;M;M", "homepage": "https://www.cmlab.csie.ntu.edu.tw/~jiafongyeh/;https://khhung-906.github.io/;;;https://tsunghan-wu.github.io;;https://sites.google.com/site/yitingchen0524/;https://winstonhsu.info/", "dblp": "198/7831;334/0228;;48/266;01/6790;230/2143;12/5268-1;16/5668.html", "google_scholar": "kS-oZ20AAAAJ;_L9iSdoAAAAJ;;Wk-_erYAAAAJ;https://scholar.google.com.tw/citations?user=ykuVSuEAAAAJ;5oNVau8AAAAJ;8tRH7RMAAAAJ;https://scholar.google.com.tw/citations?user=NOvDH3QAAAAJ", "orcid": ";;;;;;;0000-0002-3330-0638", "linkedin": ";;pang-chi-lo/?lipi=urn%3Ali%3Apage%3Ad_flagship3_profile_view_base%3B59Z50RdLSVu4tPsXR5D4nQ%3D%3D;chi-ming-chung-444875134/;tsunghanwu/;;;", "or_profile": "~Jia-Fong_Yeh1;~Kuo-Han_Hung1;~Pang-Chi_Lo1;~Chi_Ming_Chung1;~Tsung-Han_Wu1;~Hung-Ting_Su1;~Yi-Ting_Chen2;~Winston_Hsu1", "aff": "Sony Group Corporation;National Taiwan University;National Taiwan University;Mediatek Research;University of California, Berkeley;National Taiwan University;National Yang Ming Chiao Tung University;National Taiwan University", "aff_domain": "sony.com;ntu.edu.tw;ntu.edu.tw;mtkresearch.com;berkeley.edu;ntu.edu.tw;nycu.edu.tw;ntu.edu.tw", "position": "Intern;Undergrad student;Undergrad student;Researcher;PhD student;Postdoc;Assistant Professor;Professor", "bibtex": "@inproceedings{\nyeh2024aed,\ntitle={{AED}: Adaptable Error Detection for Few-shot Imitation Policy},\nauthor={Jia-Fong Yeh and Kuo-Han Hung and Pang-Chi Lo and Chi Ming Chung and Tsung-Han Wu and Hung-Ting Su and Yi-Ting Chen and Winston H. Hsu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=5K3VeoBnqc}\n}", "github": "", "reviewers": "SywQ;3VgP;qV6D", "pdf_size": 3087382, "rating": "7;7;7", "confidence": "3;4;3", "soundness": "3;4;3", "novelty": "3;3;3", "presentation": "3;4;2", "wc_summary": "208;57;133", "wc_strengths": "141;261;37", "wc_weaknesses": "61;500;56", "wc_questions": "81;94;2", "wc_limitations": "1;10;17", "wc_review": "492;922;245", "wc_reply_reviewers": "0;178;14", "wc_reply_authors": "84;49;129", "reply_reviewers": "0;1;1", "reply_authors": "2;2;3", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 132.66666666666666, 61.64594246357356 ], "wc_strengths_avg": [ 146.33333333333334, 91.5253456092294 ], "wc_weaknesses_avg": [ 205.66666666666666, 208.13510569392716 ], "wc_questions_avg": [ 59.0, 40.65300316909769 ], "wc_limitations_avg": [ 9.333333333333334, 6.548960901462833 ], "wc_review_avg": [ 553.0, 279.7296313704837 ], "wc_reply_reviewers_avg": [ 64.0, 80.81254028099022 ], "wc_reply_authors_avg": [ 87.33333333333333, 32.74480450731417 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1821580441115943787&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "sony.com;ntu.edu.tw;ntu.edu.tw;mtkresearch.com;berkeley.edu;ntu.edu.tw;nycu.edu.tw;ntu.edu.tw", "author_num": 8, "aff_unique_index": "0;1;1;2;3;1;4;1", "aff_unique_norm": "Sony Group Corporation;National Taiwan University;MediaTek Inc.;University of California, Berkeley;National Yang Ming Chiao Tung University", "aff_unique_dep": ";;Research;;", "aff_unique_url": "https://www.sony.com;https://www.ntu.edu.tw;https://www.mediatek.com/;https://www.berkeley.edu;https://www.nycu.edu.tw", "aff_unique_abbr": "Sony;NTU;MediaTek;UC Berkeley;NYCU", "aff_campus_unique_index": "1;1;1;2;1;1;1", "aff_campus_unique": ";Taiwan;Berkeley", "aff_country_unique_index": "0;1;1;1;2;1;1;1", "aff_country_unique": "Japan;China;United States" }, { "id": "5KEb1mqZRl", "title": "General Compression Framework for Efficient Transformer Object Tracking", "track": "main", "status": "Reject", "tldr": "", "abstract": "Transformer-based trackers have established a dominant role in the field of visual object tracking. While these trackers exhibit promising performance, their deployment on resource-constrained devices remains challenging due to inefficiencies. To improve the inference efficiency and reduce the computation cost, prior approaches have aimed to either design lightweight trackers or distill knowledge from larger teacher models into more compact student trackers. However, these solutions often sacrifice accuracy for speed. Thus, we propose a general model compression framework for efficient transformer object tracking, named CompressTracker, to reduce the size of a pre-trained tracking model into a lightweight tracker with minimal performance degradation. Our approach features a novel stage division strategy that segments the transformer layers of the teacher model into distinct stages, enabling the student model to emulate each corresponding teacher stage more effectively. Additionally, we also design a unique replacement training technique that involves randomly substituting specific stages in the student model with those from the teacher model, as opposed to training the student model in isolation. Replacement training enhances the student model's ability to replicate the teacher model's behavior. To further forcing student model to emulate teacher model, we incorporate prediction guidance and stage-wise feature mimicking to provide additional supervision during the teacher model's compression process. Our framework CompressTracker is structurally agnostic, making it compatible with any transformer architecture. We conduct a series of experiment to verify the effectiveness and generalizability of CompressTracker. Our CompressTracker-4 with 4 transformer layers, which is compressed from OSTrack, retains about $\\mathbf{96\\%}$ performance on LaSOT ($\\mathbf{66.1\\%}$ AUC) while achieves $\\mathbf{2.17\\times}$ speed up.", "keywords": "Object Tracking;Efficient Tracking;Lightweight Model;Knowledge Distillation", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Lingyi Hong;Jinglun Li;Xinyu Zhou;Shilin Yan;Pinxue Guo;Kaixun Jiang;Zhaoyu Chen;Shuyong Gao;Wei Zhang;Hong Lu;Wenqiang Zhang", "authorids": "~Lingyi_Hong1;~Jinglun_Li1;~Xinyu_Zhou5;~Shilin_Yan1;~Pinxue_Guo1;~Kaixun_Jiang1;~Zhaoyu_Chen1;~Shuyong_Gao1;~Wei_Zhang8;~Hong_Lu2;~Wenqiang_Zhang1", "gender": "M;M;M;M;M;M;;M;M;F;M", "homepage": "https://lingyihongfd.github.io/;http://www.fudanroilab.com/2020/05/01/JinglunLi.html;https://www.researchgate.net/profile/Xinyu-Zhou-21;https://scholar.google.com/citations?user=2VhjOykAAAAJ&hl=en;;http://www.fudanroilab.com/2018/09/02/KaixunJiang.html;https://www.fudanroilab.com/2020/05/01/ZhaoyuChen.html;;http://homepage.fudan.edu.cn/weizh/;https://faculty.fudan.edu.cn/luhong1/zh_CN/index.htm;https://www.fudanroilab.com/2021/07/01/WenqiangZhang.html", "dblp": "311/7466;;;166/3197.html;333/7534;334/1058;119/8788-1;;;47/2341-1.html;", "google_scholar": "wHh_m_IAAAAJ;;https://scholar.google.com.hk/citations?user=Zdm-YgkAAAAJ;2VhjOykAAAAJ;d_7fUjoAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;gzuiMkcAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;vL-VEJYAAAAJ", "orcid": ";0009-0001-4930-6284;;;;;0000-0002-7112-2596;0000-0002-8992-0756;0000-0002-2358-8543;0000-0002-4572-2854;0000-0002-3339-8751", "linkedin": ";;;;;;;;;;", "or_profile": "~Lingyi_Hong1;~Jinglun_Li1;~Xinyu_Zhou5;~Shilin_Yan1;~Pinxue_Guo1;~Kaixun_Jiang1;~Zhaoyu_Chen1;~Shuyong_Gao1;~Wei_Zhang8;~Hong_Lu2;~Wenqiang_Zhang1", "aff": "Fudan University;Fudan University;Fudan University;Fudan University;Fudan University;Fudan University;Fudan University;Fudan University;Fudan University;Fudan University;Fudan University", "aff_domain": "fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fdu.edu;fudan.edu;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn", "position": "PhD student;PhD student;PhD student;MS student;PhD student;PhD student;PhD student;Postdoc;Associate Professor;Full Professor;Full Professor", "bibtex": "@misc{\nanonymous2024general,\ntitle={General Compression Framework for Efficient Transformer Object Tracking},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=5KEb1mqZRl}\n}", "github": "", "project": "", "reviewers": "H7HB;pwR6;M8LK;8Frr;2g8j", "site": "https://openreview.net/forum?id=5KEb1mqZRl", "pdf_size": 691864, "rating": "3;5;5;6;9", "confidence": "5;4;5;4;4", "soundness": "3;3;3;3;3", "novelty": "3;2;2;2;4", "presentation": "4;3;3;3;4", "wc_summary": "86;48;137;83;69", "wc_strengths": "35;43;50;124;81", "wc_weaknesses": "176;103;68;182;144", "wc_questions": "5;30;85;4;76", "wc_limitations": "1;1;25;4;19", "wc_review": "303;225;365;397;389", "wc_reply_reviewers": "0;40;0;14;36", "wc_reply_authors": "478;35;0;33;39", "reply_reviewers": "0;1;0;1;1", "reply_authors": "2;2;1;2;2", "rating_avg": [ 5.6, 1.9595917942265424 ], "confidence_avg": [ 4.4, 0.48989794855663565 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6, 0.8 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 84.6, 29.43874997346185 ], "wc_strengths_avg": [ 66.6, 32.659454986266994 ], "wc_weaknesses_avg": [ 134.6, 43.55043053748149 ], "wc_questions_avg": [ 40.0, 34.47317797940886 ], "wc_limitations_avg": [ 10.0, 10.039920318408907 ], "wc_review_avg": [ 335.8, 64.46828677729849 ], "wc_reply_reviewers_avg": [ 18.0, 17.158088471621774 ], "wc_reply_authors_avg": [ 117.0, 181.0381175332974 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": -0.6666666666666666, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18122805291901037060&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;0;0;0;0;0;0;0;0;0", "aff_unique_norm": "Fudan University", "aff_unique_dep": "", "aff_unique_url": "https://www.fudan.edu.cn", "aff_unique_abbr": "Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "VastTrack: Vast Category Visual Object Tracking", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97849", "id": "5L05sLRIlQ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=5L05sLRIlQ", "openreview": "https://openreview.net/forum?id=5L05sLRIlQ", "poster": "/media/PosterPDFs/NeurIPS%202024/97849.png?t=1730213820.0274024", "project": "", "author_site": "Liang Peng, Junyuan Gao, Xinran Liu, Weihong Li, Shaohua Dong, Zhipeng Zhang, Heng Fan, Libo Zhang", "tldr": "", "abstract": "In this paper, we propose a novel benchmark, named VastTrack, aiming to facilitate the development of general visual tracking via encompassing abundant classes and videos. VastTrack consists of a few attractive properties: (1) Vast Object Category. In particular, it covers targets from 2,115 categories, significantly surpassing object classes of existing popular benchmarks (e.g., GOT-10k with 563 classes and LaSOT with 70 categories). Through providing such vast object classes, we expect to learn more general object tracking. (2) Larger scale. Compared with current benchmarks, VastTrack provides 50,610 videos with 4.2 million frames, which makes it to date the largest dataset in term of the number of videos, and hence could benefit training even more powerful visual trackers in the deep learning era. (3) Rich Annotation. Besides conventional bounding box annotations, VastTrack also provides linguistic descriptions with more than 50K sentences for the videos. Such rich annotations of VastTrack enable the development of both vision-only and vision-language tracking. In order to ensure precise annotation, each frame in the videos is manually labeled with multi-stage of careful inspections and refinements. To understand performance of existing trackers and to provide baselines for future comparison, we extensively evaluate 25 representative trackers. The results, not surprisingly, display significant drops compared to those on current datasets due to lack of abundant categories and videos from diverse scenarios for training, and more efforts are urgently required to improve general visual tracking. Our VastTrack, the toolkit, and evaluation results are publicly available at https://github.com/HengLan/VastTrack.", "keywords": "Visual tracking \u00b7 Large-scale benchmark \u00b7 Vast category", "primary_area": "", "supplementary_material": "", "author": "Liang Peng;Junyuan Gao;Xinran Liu;Weihong Li;Shaohua Dong;Zhipeng Zhang;Heng Fan;Libo Zhang", "authorids": "~Liang_Peng7;~Junyuan_Gao1;~Xinran_Liu4;~Weihong_Li2;~Shaohua_Dong1;~Zhipeng_Zhang2;~Heng_Fan3;~Libo_Zhang1", "gender": ";M;F;M;M;;;M", "homepage": ";;https://github.com/YouKnowT;;;;;", "dblp": ";221/7470;;62/3262-2;188/4523;;;78/33-1.html", "google_scholar": ";https://scholar.google.com/citations?hl=en;;pda4mogAAAAJ;5iSEcFkAAAAJ;;;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";0009-0007-6750-8713;;;;;;", "linkedin": ";https://www.linkedin.cn/incareer/in/ACoAADLz6rUB14tjzL_1paYg0ScpB8MN_qfQybs;;;;;;", "or_profile": "~Liang_Peng7;~Junyuan_Gao1;~Xinran_Liu4;~Weihong_Li2;~Shaohua_Dong1;~Zhipeng_Zhang2;~Heng_Fan3;~Libo_Zhang1", "aff": ";University of Chinese Academy of Sciences;Institute of Software, Chinese Academy of Science;University of Chinese Academy of Sciences;University of North Texas;;;Institute of Software Chinese Academy of Sciences", "aff_domain": ";ucas.ac.cn;iscas.ac.cn;ucas.ac.cn;unt.edu;;;iscas.ac.cn", "position": ";MS student;PhD student;MS student;PhD student;;;Associate Professor", "bibtex": "@inproceedings{\npeng2024vasttrack,\ntitle={VastTrack: Vast Category Visual Object Tracking},\nauthor={Liang Peng and Junyuan Gao and Xinran Liu and Weihong Li and Shaohua Dong and Zhipeng Zhang and Heng Fan and Libo Zhang},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=5L05sLRIlQ}\n}", "github": "", "reviewers": "LM87;Czds;5VYv", "pdf_size": 10397551, "rating": "7;7;7", "confidence": "5;4;5", "wc_summary_and_contributions": "76;35;189", "wc_strengths": "101;8;12", "wc_improvement": "98;41;46", "wc_limitations": "163;32;111", "wc_correctness": "9;2;59", "wc_clarity": "6;29;22", "wc_relation_to_prior_work": "11;32;165", "wc_documentation": "2;34;264", "wc_additional_feedback": "1;1;1", "wc_review": "467;214;869", "wc_reply_reviewers": "103;0;13", "wc_reply_authors": "49;0;31", "reply_reviewers": "1;0;1", "reply_authors": "2;1;2", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 100.0, 65.12040130916476 ], "wc_strengths_avg": [ 40.333333333333336, 42.92888175679503 ], "wc_improvement_avg": [ 61.666666666666664, 25.77250904010361 ], "wc_limitations_avg": [ 102.0, 53.85783756025363 ], "wc_correctness_avg": [ 23.333333333333332, 25.381533094401966 ], "wc_clarity_avg": [ 19.0, 9.626352718795768 ], "wc_relation_to_prior_work_avg": [ 69.33333333333333, 68.18764958227814 ], "wc_documentation_avg": [ 100.0, 116.69904312661123 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 516.6666666666666, 269.6990091853427 ], "wc_reply_reviewers_avg": [ 38.666666666666664, 45.79907810814052 ], "wc_reply_authors_avg": [ 26.666666666666668, 20.237478982214054 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13404212873610178296&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": ";ucas.ac.cn;iscas.ac.cn;ucas.ac.cn;unt.edu;;;iscas.ac.cn", "author_num": 8, "aff_unique_index": "0;1;0;2;1", "aff_unique_norm": "University of Chinese Academy of Sciences;Chinese Academy of Sciences;University of North Texas", "aff_unique_dep": ";Institute of Software;", "aff_unique_url": "http://www.ucas.ac.cn;http://www.ios.ac.cn;https://www.unt.edu", "aff_unique_abbr": "UCAS;CAS;UNT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "China;United States" }, { "title": "Private Attribute Inference from Images with Vision-Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96590", "id": "5MIk4VFn1c", "proceeding": "", "pdf": "https://openreview.net/pdf?id=5MIk4VFn1c", "openreview": "https://openreview.net/forum?id=5MIk4VFn1c", "poster": "/media/PosterPDFs/NeurIPS%202024/96590.png?t=1733407614.880577", "project": "", "author_site": "Batuhan T\u00f6mek\u00e7e, Mark Vero, Robin Staab, Martin Vechev", "tldr": "", "abstract": "As large language models (LLMs) become ubiquitous in our daily tasks and digital interactions, associated privacy risks are increasingly in focus. While LLM privacy research has primarily focused on the leakage of model training data, it has recently been shown that LLMs can make accurate privacy-infringing inferences from previously unseen texts. With the rise of vision-language models (VLMs), capable of understanding both images and text, a key question is whether this concern transfers to the previously unexplored domain of benign images posted online. To answer this question, we compile an image dataset with human-annotated labels of the image owner's personal attributes. In order to understand the privacy risks posed by VLMs beyond traditional human attribute recognition, our dataset consists of images where the inferable private attributes do not stem from direct depictions of humans. On this dataset, we evaluate 7 state-of-the-art VLMs, finding that they can infer various personal attributes at up to 77.6% accuracy. Concerningly, we observe that accuracy scales with the general capabilities of the models, implying that future models can be misused as stronger inferential adversaries, establishing an imperative for the development of adequate defenses.", "keywords": "privacy;vision-language models;multimodal models;large language models", "primary_area": "privacy", "supplementary_material": "", "author": "Batuhan T\u00f6mek\u00e7e;Mark Vero;Robin Staab;Martin Vechev", "authorids": "~Batuhan_T\u00f6mek\u00e7e1;~Mark_Vero1;~Robin_Staab1;~Martin_Vechev1", "gender": ";M;M;M", "homepage": ";https://www.sri.inf.ethz.ch/people/markvero;;https://www.sri.inf.ethz.ch/people/martin", "dblp": ";319/4985;304/3512;93/2189.html", "google_scholar": ";vguDYtQAAAAJ;;https://scholar.google.ch/citations?user=aZ1Rh50AAAAJ", "orcid": ";;;", "linkedin": "batuhan-tomekce-5623b31a2/;https://linkedin.com/in/mark-vero-9a32bb17a;robin-staab-b778a51a6/;", "or_profile": "~Batuhan_T\u00f6mek\u00e7e1;~Mark_Vero1;~Robin_Staab1;~Martin_Vechev1", "aff": "Swiss Federal Institute of Technology;ETHZ-ETH Zurich;ETHZ - ETH Zurich;Swiss Federal Institute of Technology", "aff_domain": "ethz.ch;inf.ethz.ch;ethz.ch;ethz.ch", "position": "MS student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nt{\\\"o}mek{\\c{c}}e2024private,\ntitle={Private Attribute Inference from Images with Vision-Language Models},\nauthor={Batuhan T{\\\"o}mek{\\c{c}}e and Mark Vero and Robin Staab and Martin Vechev},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=5MIk4VFn1c}\n}", "github": "", "reviewers": "g2WQ;zT6i;T2ze;dw5b", "pdf_size": 6586312, "rating": "4;5;6;7", "confidence": "4;4;4;4", "soundness": "2;2;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "30;39;110;81", "wc_strengths": "34;14;71;31", "wc_weaknesses": "307;105;79;27", "wc_questions": "185;21;36;67", "wc_limitations": "150;59;1;3", "wc_review": "706;238;297;209", "wc_reply_reviewers": "146;21;14;0", "wc_reply_authors": "1144;33;0;0", "reply_reviewers": "2;1;1;0", "reply_authors": "4;2;1;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 65.0, 32.334192428449484 ], "wc_strengths_avg": [ 37.5, 20.79062288629179 ], "wc_weaknesses_avg": [ 129.5, 106.25794087972908 ], "wc_questions_avg": [ 77.25, 64.38313055451715 ], "wc_limitations_avg": [ 53.25, 60.51601027827264 ], "wc_review_avg": [ 362.5, 200.83886576058927 ], "wc_reply_reviewers_avg": [ 45.25, 58.657373790513326 ], "wc_reply_authors_avg": [ 294.25, 490.78833268528297 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=941886708238254498&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "ethz.ch;inf.ethz.ch;ethz.ch;ethz.ch", "author_num": 4, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Swiss Federal Institute of Technology;ETH Zurich", "aff_unique_dep": ";", "aff_unique_url": "https://www.ethz.ch;https://www.ethz.ch", "aff_unique_abbr": "ETH Zurich;ETHZ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Switzerland" }, { "title": "TOPA: Extending Large Language Models for Video Understanding via Text-Only Pre-Alignment", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96589", "id": "5NMbQPY7Bn", "proceeding": "", "pdf": "https://openreview.net/pdf?id=5NMbQPY7Bn", "openreview": "https://openreview.net/forum?id=5NMbQPY7Bn", "poster": "", "project": "", "author_site": "Wei Li, Hehe Fan, Yongkang Wong, Mohan Kankanhalli, Yi Yang", "tldr": "", "abstract": "Recent advancements in image understanding have benefited from the extensive use of web image-text pairs. However, video understanding remains a challenge despite the availability of substantial web video-text data. This difficulty primarily arises from the inherent complexity of videos and the inefficient language supervision in recent web-collected video-text datasets. In this paper, we introduce Text-Only Pre-Alignment (TOPA), a novel approach to extend large language models (LLMs) for video understanding, without the need for pre-training on real video data. Specifically, we first employ an advanced LLM to automatically generate Textual Videos comprising continuous textual frames, along with corresponding annotations to simulate real video-text data. Then, these annotated textual videos are used to pre-align a language-only LLM with the video modality. To bridge the gap between textual and real videos, we employ the CLIP model as the feature extractor to align image and text modalities. During text-only pre-alignment, the continuous textual frames, encoded as a sequence of CLIP text features, are analogous to continuous CLIP image features, thus aligning the LLM with real video representation. Extensive experiments, including zero-shot evaluation and finetuning on various video understanding tasks, demonstrate that TOPA is an effective and efficient framework for aligning video content with LLMs. In particular, without training on any video data, the TOPA-Llama2-13B model achieves a Top-1 accuracy of 51.0% on the challenging long-form video understanding benchmark, Egoschema. This performance surpasses previous video-text pre-training approaches and proves competitive with recent GPT-3.5 based video agents.", "keywords": "video understanding;Large language model;vision-language;text-only alignment", "primary_area": "machine_vision", "supplementary_material": "", "author": "Wei Li;Hehe Fan;Yongkang Wong;Mohan Kankanhalli;Yi Yang", "authorids": "~Wei_Li55;~Hehe_Fan1;~Yongkang_Wong1;~Mohan_Kankanhalli1;~Yi_Yang4", "gender": "M;M;M;M;M", "homepage": "https://github.com/lw-2018;https://hehefan.github.io;https://sites.google.com/site/yongkangwong/;https://www.comp.nus.edu.sg/~mohan;http://reler.net/", "dblp": ";184/5722.html;89/7407;09/3613.html;", "google_scholar": "hDubMJwAAAAJ;hVuflMQAAAAJ;https://scholar.google.com.sg/citations?user=Xa0mxggAAAAJ;6Lx_eowAAAAJ;https://scholar.google.com.au/citations?user=RMSuNFwAAAAJ", "orcid": ";0000-0001-9572-2345;0000-0002-1239-4428;0000-0002-4846-2015;", "linkedin": ";;yongkangwong/;mohan-kankanhalli-583417221;", "or_profile": "~Wei_Li55;~Hehe_Fan1;~Yongkang_Wong1;~Mohan_Kankanhalli1;~Yi_Yang4", "aff": "National University of Singapore;Zhejiang University;National University of Singapore;National University of Singapore;Zhejiang University", "aff_domain": "nus.edu;zju.edu.cn;nus.edu.sg;nus.edu.sg;zju.edu.cn", "position": "Intern;Assistant Professor;Senior Research Fellow;Full Professor;Full Professor", "bibtex": "@inproceedings{\nli2024topa,\ntitle={{TOPA}: Extending Large Language Models for Video Understanding via Text-Only Pre-Alignment},\nauthor={Wei Li and Hehe Fan and Yongkang Wong and Mohan Kankanhalli and Yi Yang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=5NMbQPY7Bn}\n}", "github": "", "reviewers": "kuEy;oFSV;nFok", "pdf_size": 9358969, "rating": "4;7;8", "confidence": "5;4;5", "soundness": "2;4;4", "novelty": "2;4;4", "presentation": "2;4;3", "wc_summary": "88;67;64", "wc_strengths": "27;172;134", "wc_weaknesses": "244;89;124", "wc_questions": "124;55;126", "wc_limitations": "11;28;13", "wc_review": "494;411;461", "wc_reply_reviewers": "297;49;45", "wc_reply_authors": "1966;44;44", "reply_reviewers": "2;1;1", "reply_authors": "6;2;2", "rating_avg": [ 6.333333333333333, 1.699673171197595 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.9428090415820634 ], "novelty_avg": [ 3.3333333333333335, 0.9428090415820634 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 73.0, 10.677078252031311 ], "wc_strengths_avg": [ 111.0, 61.389467066156115 ], "wc_weaknesses_avg": [ 152.33333333333334, 66.37435917246626 ], "wc_questions_avg": [ 101.66666666666667, 33.00841643513901 ], "wc_limitations_avg": [ 17.333333333333332, 7.586537784494028 ], "wc_review_avg": [ 455.3333333333333, 34.120700787384514 ], "wc_reply_reviewers_avg": [ 130.33333333333334, 117.86244336325102 ], "wc_reply_authors_avg": [ 684.6666666666666, 906.039488960363 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 3.3333333333333335, 1.8856180831641267 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.2773500981126146, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13170155137741269943&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "nus.edu;zju.edu.cn;nus.edu.sg;nus.edu.sg;zju.edu.cn", "author_num": 5, "aff_unique_index": "0;1;0;0;1", "aff_unique_norm": "National University of Singapore;Zhejiang University", "aff_unique_dep": ";", "aff_unique_url": "https://www.nus.edu.sg;https://www.zju.edu.cn", "aff_unique_abbr": "NUS;ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;1", "aff_country_unique": "Singapore;China" }, { "title": "EHRCon: Dataset for Checking Consistency between Unstructured Notes and Structured Tables in Electronic Health Records", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97848", "id": "5OZTcbgCyH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=5OZTcbgCyH", "openreview": "https://openreview.net/forum?id=5OZTcbgCyH", "poster": "/media/PosterPDFs/NeurIPS%202024/97848.png?t=1731470369.4386957", "project": "", "author_site": "Yeonsu Kwon, Jiho Kim, Gyubok Lee, Seongsu Bae, Daeun Kyung, Wonchul Cha, Tom Pollard, Alistair Johnson, Edward Choi", "tldr": "", "abstract": "Electronic Health Records (EHRs) are integral for storing comprehensive patient medical records, combining structured data (e.g., medications) with detailed clinical notes (e.g., physician notes). These elements are essential for straightforward data retrieval and provide deep, contextual insights into patient care. However, they often suffer from discrepancies due to unintuitive EHR system designs and human errors, posing serious risks to patient safety. To address this, we developed EHRCon, a new dataset and task specifically designed to ensure data consistency between structured tables and unstructured notes in EHRs.\nEHRCon was crafted in collaboration with healthcare professionals using the MIMIC-III EHR dataset, and includes manual annotations of 3,943 entities across 105 clinical notes checked against database entries for consistency.\nEHRCon has two versions, one using the original MIMIC-III schema, and another using the OMOP CDM schema, in order to increase its applicability and generalizability. Furthermore, leveraging the capabilities of large language models, we introduce CheckEHR, a novel framework for verifying the consistency between clinical notes and database tables. CheckEHR utilizes an eight-stage process and shows promising results in both few-shot and zero-shot settings. The code is available at \\url{https://github.com/dustn1259/EHRCon}.", "keywords": "healthcare;fact-verification;consistency check;electronic health records", "primary_area": "", "supplementary_material": "/attachment/d1dd67ac1f13b1bf7551a5e263438693937c70a9.pdf", "author": "Yeonsu Kwon;Jiho Kim;Gyubok Lee;Seongsu Bae;Daeun Kyung;Wonchul Cha;Tom Pollard;ALISTAIR JOHNSON;Edward Choi", "authorids": "~Yeonsu_Kwon1;~Jiho_Kim1;~Gyubok_Lee1;~Seongsu_Bae1;~Daeun_Kyung1;~Wonchul_Cha1;~Tom_Pollard1;~ALISTAIR_JOHNSON1;~Edward_Choi1", "gender": ";;M;M;;M;M;M;M", "homepage": "https://sites.google.com/view/yeonsukwon;;https://sites.google.com/view/gyuboklee;;https://dek924.github.io/;;;https://alistairewj.github.io;http://mp2893.com", "dblp": ";;249/4944;307/5358;321/9474;;;167/1714;41/3886", "google_scholar": ";https://scholar.google.com/citations?hl=en;UYzauyYAAAAJ;hJKVzt4AAAAJ;WCMzXVoAAAAJ;oMHU_Q0AAAAJ;kd2pMFYAAAAJ;66GCSz8AAAAJ;GUlGIPkAAAAJ", "orcid": ";;;;;;0000-0002-5676-7898;;", "linkedin": ";;gyubok-lee-104915229;seongsu-bae-17297b180/;;;tomjpollard/;;", "or_profile": "~Yeonsu_Kwon1;~Jiho_Kim1;~Gyubok_Lee1;~Seongsu_Bae1;~Daeun_Kyung1;~Wonchul_Cha1;~Tom_Pollard1;~ALISTAIR_JOHNSON1;~Edward_Choi1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;KAIST;Sung Kyun Kwan University;Massachusetts Institute of Technology;;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.edu;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;skku.edu;mit.edu;;kaist.ac.kr", "position": "MS student;PhD student;PhD student;PhD student;PhD student;Full Professor;Research Scientist;;Associate Professor", "bibtex": "@inproceedings{\nkwon2024ehrcon,\ntitle={{EHRC}on: Dataset for Checking Consistency between Unstructured Notes and Structured Tables in Electronic Health Records},\nauthor={Yeonsu Kwon and Jiho Kim and Gyubok Lee and Seongsu Bae and Daeun Kyung and Wonchul Cha and Tom Pollard and ALISTAIR JOHNSON and Edward Choi},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=5OZTcbgCyH}\n}", "github": "", "reviewers": "8FTT;9y8Y;b7rr;V5Jt", "pdf_size": 1417331, "rating": "6;8;8;8", "confidence": "4;4;4;3", "wc_summary_and_contributions": "83;39;87;162", "wc_strengths": "92;16;77;126", "wc_improvement": "192;6;58;25", "wc_limitations": "41;6;8;17", "wc_correctness": "31;4;1;6", "wc_clarity": "13;5;1;6", "wc_relation_to_prior_work": "29;4;1;17", "wc_documentation": "16;1;1;29", "wc_additional_feedback": "1;1;1;1", "wc_review": "498;82;235;389", "wc_reply_reviewers": "157;23;94;74", "wc_reply_authors": "51;0;0;0", "reply_reviewers": "1;1;2;1", "reply_authors": "5;2;3;3", "rating_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 92.75, 44.19488092528364 ], "wc_strengths_avg": [ 77.75, 39.8269695056001 ], "wc_improvement_avg": [ 70.25, 72.71304903523163 ], "wc_limitations_avg": [ 18.0, 13.910427743243556 ], "wc_correctness_avg": [ 10.5, 11.968709203585824 ], "wc_clarity_avg": [ 6.25, 4.322904116447646 ], "wc_relation_to_prior_work_avg": [ 12.75, 11.143944544011335 ], "wc_documentation_avg": [ 11.75, 11.691342951089922 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 301.0, 157.21800151382158 ], "wc_reply_reviewers_avg": [ 87.0, 47.994791384065834 ], "wc_reply_authors_avg": [ 12.75, 22.083647796503186 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.25, 1.0897247358851685 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6951813323900634759&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": "kaist.edu;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;skku.edu;mit.edu;;kaist.ac.kr", "author_num": 9, "aff_unique_index": "0;0;0;0;0;1;2;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Sungkyunkwan University;Massachusetts Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.kaist.ac.kr;https://www.skku.edu;https://web.mit.edu", "aff_unique_abbr": "KAIST;SKKU;MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;1;0", "aff_country_unique": "South Korea;United States" }, { "title": "Transfer Q-star : Principled Decoding for LLM Alignment", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96588", "id": "5PrShrKxoX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=5PrShrKxoX", "openreview": "https://openreview.net/forum?id=5PrShrKxoX", "poster": "", "project": "", "author_site": "Souradip Chakraborty, Soumya Suvra Ghosal, Ming Yin, Dinesh Manocha, Mengdi Wang, Amrit Singh Bedi, Furong Huang", "tldr": "", "abstract": "Aligning foundation models is essential for their safe and trustworthy deployment. However, traditional fine-tuning methods are computationally intensive and require updating billions of model parameters. A promising alternative, alignment via decoding, adjusts the response distribution directly without model updates to maximize a target reward $r$, thus providing a lightweight and adaptable framework for alignment. However, principled decoding methods rely on oracle access to an optimal Q-function ($Q^*$), which is often unavailable in practice. Hence, prior SoTA methods either approximate this $Q^*$ using $Q^{\\pi_{\\text{sft}}}$ (derived from the reference $\\texttt{SFT}$ model) or rely on short-term rewards, resulting in sub-optimal decoding performance. In this work, we propose $\\texttt{Transfer Q}^*$, which implicitly estimates the optimal value function for a target reward $r$ through a baseline model $\\rho_{\\texttt{BL}}$ aligned with a baseline reward $r_{\\texttt{BL}}$ (which can be different from the target reward $r$). Theoretical analyses of $\\texttt{Transfer Q}^*$ provide a rigorous characterization of its optimality, deriving an upper bound on the sub-optimality gap and identifying a hyperparameter to control the deviation from the pre-trained reference $\\texttt{SFT}$ model based on user needs. Our approach significantly reduces the sub-optimality gap observed in prior SoTA methods and demonstrates superior empirical performance across key metrics such as coherence, diversity, and quality in extensive tests on several synthetic and real datasets.", "keywords": "RLHF;AI Alignment;Decoding;LLM;Transfer Decoding", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/4ade76458ead4c92e404b94b34ce219844e32f0c.zip", "author": "Souradip Chakraborty;Soumya Suvra Ghosal;Ming Yin;Dinesh Manocha;Mengdi Wang;Amrit Bedi;Furong Huang", "authorids": "~Souradip_Chakraborty1;~Soumya_Suvra_Ghosal2;~Ming_Yin4;~Dinesh_Manocha3;~Mengdi_Wang1;~Amrit_Bedi1;~Furong_Huang1", "gender": "M;M;M;M;F;M;F", "homepage": "https://souradip-umd.github.io/;https://soumya1612-rasha.github.io/Soumya/;https://mingyin0312.github.io;https://www.cs.umd.edu/people/dmanocha;http://mwang.princeton.edu;https://sites.google.com/view/amritsinghbedi/home;https://furong-huang.com", "dblp": "264/5758;233/8190;89/453.html;m/DineshManocha;;176/2707.html;72/8513", "google_scholar": "https://scholar.google.co.in/citations?user=pvETm1wAAAAJ;zE8aFIwAAAAJ;ncBRYIUAAAAJ;X08l_4IAAAAJ;;91WLA6QAAAAJ;13yyuCcAAAAJ", "orcid": ";;0000-0001-6458-0751;0000-0001-7047-9801;;;", "linkedin": ";;;dinesh-manocha-2311846;;;", "or_profile": "~Souradip_Chakraborty1;~Soumya_Suvra_Ghosal2;~Ming_Yin4;~Dinesh_Manocha3;~Mengdi_Wang1;~Amrit_Bedi1;~Furong_Huang1", "aff": "University of Maryland, College Park;University of Maryland, College Park;Princeton University;University of Maryland, College Park;Princeton University;University of Maryland, College Park;University of Maryland", "aff_domain": "umd.edu;umd.edu;princeton.edu;umd.edu;princeton.edu;umd.edu;cs.umd.edu", "position": "PhD student;PhD student;Postdoc;Professor;Full Professor;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nchakraborty2024transfer,\ntitle={Transfer Q-star : Principled Decoding for {LLM} Alignment},\nauthor={Souradip Chakraborty and Soumya Suvra Ghosal and Ming Yin and Dinesh Manocha and Mengdi Wang and Amrit Bedi and Furong Huang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=5PrShrKxoX}\n}", "github": "", "reviewers": "xz5J;1x8S;hjEu;ypg9", "pdf_size": 20044720, "rating": "5;6;6;6", "confidence": "3;4;3;3", "soundness": "3;4;3;3", "novelty": "3;3;3;3", "presentation": "4;4;4;3", "wc_summary": "104;137;105;45", "wc_strengths": "127;88;110;66", "wc_weaknesses": "315;276;120;32", "wc_questions": "55;63;72;54", "wc_limitations": "121;38;87;6", "wc_review": "722;602;494;203", "wc_reply_reviewers": "407;13;38;23", "wc_reply_authors": "976;0;22;0", "reply_reviewers": "2;1;1;1", "reply_authors": "3;1;2;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 97.75, 33.22179254645962 ], "wc_strengths_avg": [ 97.75, 22.960564017462637 ], "wc_weaknesses_avg": [ 185.75, 114.90512390663874 ], "wc_questions_avg": [ 61.0, 7.245688373094719 ], "wc_limitations_avg": [ 63.0, 44.19841626121914 ], "wc_review_avg": [ 505.25, 192.23862124973743 ], "wc_reply_reviewers_avg": [ 120.25, 165.7941117772281 ], "wc_reply_authors_avg": [ 249.5, 419.5411183662455 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=536880424009776162&as_sdt=20000005&sciodt=0,21&hl=en", "gs_version_total": 5, "email": "umd.edu;umd.edu;princeton.edu;umd.edu;princeton.edu;umd.edu;cs.umd.edu", "author_num": 7, "aff_unique_index": "0;0;1;0;1;0;0", "aff_unique_norm": "University of Maryland;Princeton University", "aff_unique_dep": ";", "aff_unique_url": "https://www/umd.edu;https://www.princeton.edu", "aff_unique_abbr": "UMD;Princeton", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "College Park;", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "OVT-B: A New Large-Scale Benchmark for Open-Vocabulary Multi-Object Tracking", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97847", "id": "5S0y3OhfRs", "proceeding": "", "pdf": "https://openreview.net/pdf?id=5S0y3OhfRs", "openreview": "https://openreview.net/forum?id=5S0y3OhfRs", "poster": "/media/PosterPDFs/NeurIPS%202024/97847.png?t=1729493390.8732133", "project": "", "author_site": "Haiji Liang, Ruize Han", "tldr": "", "abstract": "Open-vocabulary object perception has become an important topic in artificial intelligence, which aims to identify objects with novel classes that have not been seen during training. Under this setting, open-vocabulary object detection (OVD) in a single image has been studied in many literature. However, open-vocabulary object tracking (OVT) from a video has been studied less, and one reason is the shortage of benchmarks. In this work, we have built a new large-scale benchmark for open-vocabulary multi-object tracking namely OVT-B. OVT-B contains 1,048 categories of objects and 1,973 videos with 637,608 bounding box annotations, which is much larger than the sole open-vocabulary tracking dataset, i.e., OVTAO-val dataset (200+ categories, 900+ videos). The proposed OVT-B can be used as a new benchmark to pave the way for OVT research. We also develop a simple yet effective baseline method for OVT. It integrates the motion features for object tracking, which is an important feature for MOT but is ignored in previous OVT methods. Experimental results have verified the usefulness of the proposed benchmark and the effectiveness of our method. We have released the benchmark to the public at https://github.com/Coo1Sea/OVT-B-Dataset.", "keywords": "Open-Vocabulary;Multi-Object Tracking;Benchmark", "primary_area": "", "supplementary_material": "/attachment/80cbac1b504c2d40bc3d7a3447936aaf3953e7c8.pdf", "author": "Haiji Liang;Ruize Han", "authorids": "~Haiji_Liang1;~Ruize_Han1", "gender": "M;M", "homepage": "https://coo1sea.github.io/my-personal-website/;https://www.ruizehan.cn/", "dblp": ";205/4022", "google_scholar": ";ef0Fw9QAAAAJ", "orcid": ";0000-0002-6587-8936", "linkedin": ";", "or_profile": "~Haiji_Liang1;~Ruize_Han1", "aff": "Zhejiang University;City University of Hong Kong", "aff_domain": "zju.edu.cn;cityu.edu.hk", "position": "MS student;Postdoc", "bibtex": "@inproceedings{\nliang2024ovtb,\ntitle={{OVT}-B: A New Large-Scale Benchmark for Open-Vocabulary Multi-Object Tracking},\nauthor={Haiji Liang and Ruize Han},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=5S0y3OhfRs}\n}", "github": "", "reviewers": "tr32;UMfb;RuFG", "pdf_size": 6662730, "rating": "6;6;8", "confidence": "4;4;3", "wc_summary_and_contributions": "59;93;78", "wc_strengths": "8;7;91", "wc_improvement": "12;7;74", "wc_limitations": "11;44;14", "wc_correctness": "2;12;2", "wc_clarity": "15;58;1", "wc_relation_to_prior_work": "33;13;5", "wc_documentation": "21;27;1", "wc_additional_feedback": "1;1;1", "wc_review": "162;262;267", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "rating_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 76.66666666666667, 13.912424503139471 ], "wc_strengths_avg": [ 35.333333333333336, 39.364394515292055 ], "wc_improvement_avg": [ 31.0, 30.474032661705056 ], "wc_limitations_avg": [ 23.0, 14.89966442575134 ], "wc_correctness_avg": [ 5.333333333333333, 4.714045207910317 ], "wc_clarity_avg": [ 24.666666666666668, 24.253293018108327 ], "wc_relation_to_prior_work_avg": [ 17.0, 11.775681155103795 ], "wc_documentation_avg": [ 16.333333333333332, 11.115554667022044 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 230.33333333333334, 48.3620604284897 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.9999999999999998, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12551745740049934684&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "zju.edu.cn;cityu.edu.hk", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Zhejiang University;City University of Hong Kong", "aff_unique_dep": ";", "aff_unique_url": "https://www.zju.edu.cn;https://www.cityu.edu.hk", "aff_unique_abbr": "ZJU;CityU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Hierarchical and Density-based Causal Clustering", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96587", "id": "5S5NVpd6PV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=5S5NVpd6PV", "openreview": "https://openreview.net/forum?id=5S5NVpd6PV", "poster": "/media/PosterPDFs/NeurIPS%202024/96587.png?t=1731810030.3324995", "project": "", "author_site": "Kwangho Kim, Jisu Kim, Larry Wasserman, Edward Kennedy", "tldr": "", "abstract": "Understanding treatment effect heterogeneity is vital for scientific and policy research. However, identifying and evaluating heterogeneous treatment effects pose significant challenges due to the typically unknown subgroup structure. Recently, a novel approach, causal k-means clustering, has emerged to assess heterogeneity of treatment effect by applying the k-means algorithm to unknown counterfactual regression functions. In this paper, we expand upon this framework by integrating hierarchical and density-based clustering algorithms. We propose plug-in estimators which are simple and readily implementable using off-the-shelf algorithms. Unlike k-means clustering, which requires the margin condition, our proposed estimators do not rely on strong structural assumptions on the outcome process. We go on to study their rate of convergence, and show that under the minimal regularity conditions, the additional cost of causal clustering is essentially the estimation error of the outcome regression functions. Our findings significantly extend the capabilities of the causal clustering framework, thereby contributing to the progression of methodologies for identifying homogeneous subgroups in treatment response, consequently facilitating more nuanced and targeted interventions. The proposed methods also open up new avenues for clustering with generic pseudo-outcomes. We explore finite sample properties via simulation, and illustrate the proposed methods in voting and employment projection datasets.", "keywords": "Causal inference;Heterogeneous treatment effect;Personalization;Subgroup analysis;Observational studies", "primary_area": "causal_inference", "supplementary_material": "", "author": "Kwangho Kim;Jisu Kim;Larry Wasserman;Edward Kennedy", "authorids": "~Kwangho_Kim1;~Jisu_Kim1;~Larry_Wasserman1;~Edward_Kennedy1", "gender": ";M;;M", "homepage": ";https://pages.saclay.inria.fr/jisu.kim/;;http://www.ehkennedy.com/", "dblp": ";;;222/3133", "google_scholar": ";;;dXztgDYAAAAJ", "orcid": ";0000-0003-0573-4495;;", "linkedin": ";;;", "or_profile": "~Kwangho_Kim1;~Jisu_Kim1;~Larry_Wasserman1;~Edward_Kennedy1", "aff": ";Seoul National University;;Carnegie Mellon University", "aff_domain": ";snu.ac.kr;;cmu.edu", "position": ";Assistant Professor;;Assistant Professor", "bibtex": "@inproceedings{\nkim2024hierarchical,\ntitle={Hierarchical and Density-based Causal Clustering},\nauthor={Kwangho Kim and Jisu Kim and Larry Wasserman and Edward Kennedy},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=5S5NVpd6PV}\n}", "github": "", "reviewers": "geg3;xeku;Ykrk;STix", "pdf_size": 1721600, "rating": "3;6;6;7", "confidence": "2;2;3;3", "soundness": "3;3;4;2", "novelty": "2;3;3;3", "presentation": "2;3;3;2", "wc_summary": "234;60;215;47", "wc_strengths": "16;121;26;75", "wc_weaknesses": "97;50;394;321", "wc_questions": "58;82;180;57", "wc_limitations": "2;93;72;78", "wc_review": "407;406;887;578", "wc_reply_reviewers": "41;0;104;107", "wc_reply_authors": "19;0;56;75", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;2;2", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 2.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 139.0, 85.88655308021157 ], "wc_strengths_avg": [ 59.5, 41.9434142625514 ], "wc_weaknesses_avg": [ 215.5, 145.27990225767638 ], "wc_questions_avg": [ 94.25, 50.509281325316834 ], "wc_limitations_avg": [ 61.25, 35.0526389876711 ], "wc_review_avg": [ 569.5, 196.22499840744044 ], "wc_reply_reviewers_avg": [ 63.0, 44.91658936295141 ], "wc_reply_authors_avg": [ 37.5, 29.567718883945037 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.6666666666666667, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:sV8fm-8ErnwJ:scholar.google.com/&scioq=Hierarchical+and+Density-based+Causal+Clustering&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": ";snu.ac.kr;;cmu.edu", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Seoul National University;Carnegie Mellon University", "aff_unique_dep": ";", "aff_unique_url": "https://www.snu.ac.kr;https://www.cmu.edu", "aff_unique_abbr": "SNU;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "South Korea;United States" }, { "title": "Conditional Density Estimation with Histogram Trees", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96586", "id": "5SUP6vUVkP", "proceeding": "", "pdf": "https://openreview.net/pdf?id=5SUP6vUVkP", "openreview": "https://openreview.net/forum?id=5SUP6vUVkP", "poster": "/media/PosterPDFs/NeurIPS%202024/96586.png?t=1731409119.0220928", "project": "", "author_site": "Lincen Yang, Matthijs van Leeuwen", "tldr": "", "abstract": "Conditional density estimation (CDE) goes beyond regression by modeling the full conditional distribution, providing a richer understanding of the data than just the conditional mean in regression. This makes CDE particularly useful in critical application domains. However, interpretable CDE methods are understudied. Current methods typically employ kernel-based approaches, using kernel functions directly for kernel density estimation or as basis functions in linear models. In contrast, despite their conceptual simplicity and visualization suitability, tree-based methods---which are arguably more comprehensible---have been largely overlooked for CDE tasks. Thus, we propose the Conditional Density Tree (CDTree), a fully non-parametric model consisting of a decision tree in which each leaf is formed by a histogram model. Specifically, we formalize the problem of learning a CDTree using the minimum description length (MDL) principle, which eliminates the need for tuning the hyperparameter for regularization. Next, we propose an iterative algorithm that, although greedily, searches the optimal histogram for every possible node split. Our experiments demonstrate that, in comparison to existing interpretable CDE methods, CDTrees are both more accurate (as measured by the log-loss) and more robust against irrelevant features. Further, our approach leads to smaller tree sizes than existing tree-based models, which benefits interpretability.", "keywords": "Conditional density estimation;MDL principle;Decision Tree;Histogram", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Lincen Yang;Matthijs van Leeuwen", "authorids": "~Lincen_Yang1;~Matthijs_van_Leeuwen1", "gender": "M;M", "homepage": "https://www.lincen.nl/;https://patternsthatmatter.org/", "dblp": ";79/7008", "google_scholar": "https://scholar.google.nl/citations?user=2Y7IfE0AAAAJ;GGLwU28AAAAJ", "orcid": ";0000-0002-0510-3549", "linkedin": ";matthijs-van-leeuwen-742077105/", "or_profile": "~Lincen_Yang1;~Matthijs_van_Leeuwen1", "aff": "CISPA Helmholtz Center for Information Security;Leiden University, Leiden University", "aff_domain": "cispa.de;liacs.leidenuniv.nl", "position": "Intern;Associate Professor", "bibtex": "@inproceedings{\nyang2024conditional,\ntitle={Conditional Density Estimation with Histogram Trees},\nauthor={Lincen Yang and Matthijs van Leeuwen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=5SUP6vUVkP}\n}", "github": "", "reviewers": "n5cC;yfPy;cqKh;QurN", "pdf_size": 1709773, "rating": "5;5;6;7", "confidence": "5;3;4;4", "soundness": "3;3;3;4", "novelty": "1;1;3;3", "presentation": "3;3;3;3", "wc_summary": "157;39;151;125", "wc_strengths": "28;30;42;82", "wc_weaknesses": "300;31;52;52", "wc_questions": "33;31;78;348", "wc_limitations": "4;28;66;40", "wc_review": "522;159;389;647", "wc_reply_reviewers": "674;121;208;65", "wc_reply_authors": "1288;388;304;0", "reply_reviewers": "5;2;1;1", "reply_authors": "5;3;3;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.0, 1.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 118.0, 47.16990566028302 ], "wc_strengths_avg": [ 45.5, 21.742814905158898 ], "wc_weaknesses_avg": [ 108.75, 110.75056433264798 ], "wc_questions_avg": [ 122.5, 131.5418184456943 ], "wc_limitations_avg": [ 34.5, 22.332711434127294 ], "wc_review_avg": [ 429.25, 180.74343003274006 ], "wc_reply_reviewers_avg": [ 267.0, 240.44230077089182 ], "wc_reply_authors_avg": [ 495.0, 480.0531220604653 ], "reply_reviewers_avg": [ 2.25, 1.6393596310755 ], "reply_authors_avg": [ 3.0, 1.4142135623730951 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:VN7D4uf_qG8J:scholar.google.com/&scioq=Conditional+Density+Estimation+with+Histogram+Trees&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "cispa.de;liacs.leidenuniv.nl", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "CISPA Helmholtz Center for Information Security;Leiden University", "aff_unique_dep": ";", "aff_unique_url": "https://www.cispa.de/;https://www.universiteitleiden.nl", "aff_unique_abbr": "CISPA;LU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Leiden", "aff_country_unique_index": "0;1", "aff_country_unique": "Germany;Netherlands" }, { "title": "Efficient Centroid-Linkage Clustering", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96585", "id": "5VE1iLeYOz", "proceeding": "", "pdf": "https://openreview.net/pdf?id=5VE1iLeYOz", "openreview": "https://openreview.net/forum?id=5VE1iLeYOz", "poster": "/media/PosterPDFs/NeurIPS%202024/96585.png?t=1731711085.3183417", "project": "", "author_site": "Mohammadhossein Bateni, Laxman Dhulipala, Willem Fletcher, Kishen N. Gowda, D Ellis Hershkowitz, Rajesh Jayaram, Jakub Lacki", "tldr": "", "abstract": "We give an algorithm for Centroid-Linkage Hierarchical Agglomerative Clustering (HAC), which computes a $c$-approximate clustering in roughly $n^{1+O(1/c^2)}$ time. We obtain our result by combining a new centroid-linkage HAC algorithm with a novel fully dynamic data structure for nearest neighbor search which works under adaptive updates.\n\nWe also evaluate our algorithm empirically. By leveraging a state-of-the-art nearest-neighbor search library, we obtain a fast and accurate centroid-linkage HAC algorithm. Compared to an existing state-of-the-art exact baseline, our implementation maintains the clustering quality while delivering up to a $36\\times$ speedup due to performing fewer distance comparisons.", "keywords": "clustering;hierarchical agglomerative clustering;hac;centroid linkage;algorithm;dynamic nearest neighbor search;adaptive updates", "primary_area": "infrastructure", "supplementary_material": "", "author": "Mohammadhossein Bateni;Laxman Dhulipala;Willem Fletcher;Kishen N Gowda;D Ellis Hershkowitz;Rajesh Jayaram;Jakub Lacki", "authorids": "~Mohammadhossein_Bateni1;~Laxman_Dhulipala1;~Willem_Fletcher1;~Kishen_N_Gowda1;~D_Ellis_Hershkowitz1;~Rajesh_Jayaram1;~Jakub_Lacki2", "gender": ";;M;M;Not Specified;;", "homepage": "http://mhbateni.com/academic;;;https://kishen19.github.io/;https://dhershko.github.io/;http://rajeshjayaram.com/;", "dblp": "22/4739;;;264/5489;;202/9970.html;", "google_scholar": "n4eReqMAAAAJ;;;JzSyIlcAAAAJ;JR_KR7MAAAAJ;Cerc8UYAAAAJ;", "orcid": ";;0009-0003-5752-3558;0000-0001-6573-9445;;;", "linkedin": ";;;kishen19/;;;", "or_profile": "~Mohammadhossein_Bateni1;~Laxman_Dhulipala1;~Willem_Fletcher1;~Kishen_N_Gowda1;~D_Ellis_Hershkowitz1;~Rajesh_Jayaram1;~Jakub_Lacki2", "aff": "Google;;Brown University;University of Maryland, College Park;Brown University;Google;", "aff_domain": "google.com;;brown.edu;cs.umd.edu;brown.edu;google.com;", "position": "Research scientist;;PhD student;PhD student;Assistant Professor;Researcher;", "bibtex": "@inproceedings{\nbateni2024efficient,\ntitle={Efficient Centroid-Linkage Clustering},\nauthor={Mohammadhossein Bateni and Laxman Dhulipala and Willem Fletcher and Kishen N Gowda and D Ellis Hershkowitz and Rajesh Jayaram and Jakub Lacki},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=5VE1iLeYOz}\n}", "github": "", "reviewers": "Dgto;Pmjq;vHLH;JGDe", "pdf_size": 1790878, "rating": "6;6;7;9", "confidence": "4;4;3;3", "soundness": "3;3;3;4", "novelty": "3;3;3;4", "presentation": "2;4;3;4", "wc_summary": "69;268;181;40", "wc_strengths": "61;64;50;89", "wc_weaknesses": "92;89;95;36", "wc_questions": "96;111;21;23", "wc_limitations": "17;19;30;7", "wc_review": "335;551;377;195", "wc_reply_reviewers": "63;0;15;45", "wc_reply_authors": "36;0;0;28", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;1;2", "rating_avg": [ 7.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 139.5, 90.97389735523042 ], "wc_strengths_avg": [ 66.0, 14.265342617687105 ], "wc_weaknesses_avg": [ 78.0, 24.34132288927617 ], "wc_questions_avg": [ 62.75, 41.09972627646077 ], "wc_limitations_avg": [ 18.25, 8.166241485530538 ], "wc_review_avg": [ 364.5, 127.02263577803761 ], "wc_reply_reviewers_avg": [ 30.75, 24.681724007856502 ], "wc_reply_authors_avg": [ 16.0, 16.24807680927192 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.8164965809277259, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7755988397706399204&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "google.com;;brown.edu;cs.umd.edu;brown.edu;google.com;", "author_num": 7, "aff_unique_index": "0;1;2;1;0", "aff_unique_norm": "Google;Brown University;University of Maryland", "aff_unique_dep": "Google;;", "aff_unique_url": "https://www.google.com;https://www.brown.edu;https://www/umd.edu", "aff_unique_abbr": "Google;Brown;UMD", "aff_campus_unique_index": "0;2;0", "aff_campus_unique": "Mountain View;;College Park", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "5VtI484yVy", "title": "A Neuro-Symbolic Benchmark Suite for Concept Quality and Reasoning Shortcuts", "track": "Datasets & Benchmarks", "status": "Poster", "tldr": "", "abstract": "The advent of powerful neural classifiers has increased interest in problems that require both learning and reasoning.\nThese problems are critical for understanding important properties of models, such as trustworthiness, generalization, interpretability, and compliance to safety and structural constraints. However, recent research observed that tasks requiring both learning and reasoning on background knowledge often suffer from reasoning shortcuts (RSs): predictors can solve the downstream reasoning task without associating the correct concepts to the high-dimensional data. To address this issue, we introduce rsbench, a comprehensive benchmark suite designed to systematically evaluate the impact of RSs on models by providing easy access to highly customizable tasks affected by RSs. Furthermore, rsbench implements common metrics for evaluating concept quality and introduces novel formal verification procedures for assessing the presence of RSs in learning tasks. Using rsbench, we highlight that obtaining high quality concepts in both purely neural and neuro-symbolic models is a far-from-solved problem. rsbench is available at: https://unitn-sml.github.io/rsbench.", "keywords": "neuro-symbolic ai;reasoning-shortcuts;concept-bottleneck-models;concepts;explainable ai", "primary_area": "", "supplementary_material": "/attachment/4707134f341f9259f87334217c838a76c648fc1c.zip", "author": "Samuele Bortolotti;Emanuele Marconato;Tommaso Carraro;Paolo Morettin;Emile van Krieken;Antonio Vergari;Stefano Teso;Andrea Passerini", "authorids": "~Samuele_Bortolotti1;~Emanuele_Marconato1;~Tommaso_Carraro1;~Paolo_Morettin1;~Emile_van_Krieken1;~Antonio_Vergari3;~Stefano_Teso1;~Andrea_Passerini2", "gender": "M;;M;;M;;M;M", "homepage": "https://samuelebortolotti.github.io/;;;https://paolomorettin.github.io/;https://emilevankrieken.com;https://stefanoteso.github.io/;http://disi.unitn.it/~passerini/;http://nolovedeeplearning.com", "dblp": "371/1046.html;321/3331;;172/6368;235/1698;78/8359;00/6186;http://dblp.uni-trier.de/pers/hd/v/Vergari:Antonio", "google_scholar": "w7Cv80sAAAAJ;H0gXWAgAAAAJ;hy1cRN8AAAAJ;9JK7q10AAAAJ;https://scholar.google.nl/citations?user=il8Y0B4AAAAJ;uaQCyXkAAAAJ;https://scholar.google.it/citations?user=IIXgkLoAAAAJ;YK0NLaUAAAAJ", "orcid": "0009-0009-4028-4806;;0000-0002-3043-1456;0000-0003-4321-5215;0000-0001-5502-4817;;0000-0002-2765-5395;0000-0003-0036-5678", "linkedin": "samuele-bortolotti;emanuele-marconato-108449195;tommaso-carraro/;;emile-van-krieken/;;;", "or_profile": "~Samuele_Bortolotti1;~Emanuele_Marconato1;~Tommaso_Carraro1;~Paolo_Morettin1;~Emile_van_Krieken1;~Stefano_Teso1;~Andrea_Passerini2;~antonio_vergari2", "aff": "University of Trento;University of Pisa;University of Padua;University of Trento;Vrije Universiteit Amsterdam;University of Trento;University of Trento;University of Edinburgh, University of Edinburgh", "aff_domain": "unitn.it;unipi.it;unipd.it;unitn.it;vu.nl;unitn.it;unitn.it;ed.ac.uk", "position": "PhD student;PhD student;PhD student;Assistant Professor;PhD student;Assistant Professor;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nbortolotti2024a,\ntitle={A Neuro-Symbolic Benchmark Suite for Concept Quality and Reasoning Shortcuts},\nauthor={Samuele Bortolotti and Emanuele Marconato and Tommaso Carraro and Paolo Morettin and Emile van Krieken and Antonio Vergari and Stefano Teso and Andrea Passerini},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=5VtI484yVy}\n}", "github": "", "project": "", "reviewers": "zJ7d;RJC8;tb9x;NquB", "site": "https://openreview.net/forum?id=5VtI484yVy", "pdf_size": 4115661, "rating": "6;7;7;7", "confidence": "3;3;3;4", "wc_summary_and_contributions": "55;188;47;143", "wc_strengths": "68;31;75;160", "wc_improvement": "221;75;83;114", "wc_limitations": "1;20;15;16", "wc_correctness": "9;8;9;10", "wc_clarity": "25;4;7;11", "wc_relation_to_prior_work": "11;12;10;16", "wc_documentation": "1;16;19;24", "wc_additional_feedback": "1;1;1;1", "wc_review": "392;355;266;495", "wc_reply_reviewers": "35;28;11;70", "wc_reply_authors": "33;32;33;33", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;3;3", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 108.25, 59.486868298810286 ], "wc_strengths_avg": [ 83.5, 47.225522760473496 ], "wc_improvement_avg": [ 123.25, 58.28539697042476 ], "wc_limitations_avg": [ 13.0, 7.176350047203662 ], "wc_correctness_avg": [ 9.0, 0.7071067811865476 ], "wc_clarity_avg": [ 11.75, 8.042853971072706 ], "wc_relation_to_prior_work_avg": [ 12.25, 2.277608394786075 ], "wc_documentation_avg": [ 15.0, 8.573214099741124 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 377.0, 82.08836701993772 ], "wc_reply_reviewers_avg": [ 36.0, 21.482551058940835 ], "wc_reply_authors_avg": [ 32.75, 0.4330127018922193 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8509078262093228590&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;2;0;3;0;0;4", "aff_unique_norm": "University of Trento;University of Pisa;University of Padua;Vrije Universiteit Amsterdam;University of Edinburgh", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.unitn.it;https://www.unipi.it;https://www.unipd.it;https://www.vu.nl;https://www.ed.ac.uk", "aff_unique_abbr": "UniTN;UNIP;UNIPD;VU Amsterdam;Edinburgh", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;0;0;2", "aff_country_unique": "Italy;Netherlands;United Kingdom" }, { "id": "5WFzk0H27p", "title": "The Tournesol dataset: Which videos should be more largely recommended?", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "This paper introduces the Tournesol public dataset, which was collected as part of the online deployed platform https://tournesol.app. Our dataset contains a list of 200,000 comparative judgments made by Tournesol\u2019s 20,000 users on which YouTube videos should be more largely recommended. It also provides 600,000 comparisons along secondary criteria like content reliability, topic importance and layman-friendliness. The dataset also exports information about users\u2019 pretrust statuses and vouches. It is published at https://api.tournesol.app/exports/all under ODC-By license. The data is currently used by Tournesol to make community-driven video content recommendations to over 10,000 users.", "keywords": "Recommendation;Ethics;Preferences;Human", "primary_area": "", "supplementary_material": "/attachment/41fa1e4ef8e55561ae99d3582e137f8c6f2a23cd.zip", "author": "L\u00ea-Nguy\u00ean Hoang;Romain Beylerian;Julien Fageot;Louis Faucon;Aidan Jungo;Adrien Matissart;Natha\u00ebl Nogu\u00e8s", "authorids": "~L\u00ea-Nguy\u00ean_Hoang1;~Romain_Beylerian1;~Julien_Fageot1;~Louis_Faucon1;~Aidan_Jungo1;~Adrien_Matissart1;~Natha\u00ebl_Nogu\u00e8s1", "gender": "M;;M;M;M;;M", "homepage": "http://epfl.ch/le.hoang;;https://bigwww.epfl.ch/fageot/index.html;;;;", "dblp": ";;;184/0515;;;", "google_scholar": "https://scholar.google.ch/scholar?hl=en;;https://scholar.google.fr/citations?user=_ffD9XIAAAAJ;0oLMFOsAAAAJ;uuNEVpAAAAAJ;TY-dGx0AAAAJ;", "orcid": "0000-0002-9236-5837;;;;;;", "linkedin": "l%C3%AA-nguy%C3%AAn-hoang;romainbeylerian/;;;;matissart/;natha%C3%ABl-nogu%C3%A8s-153424108", "or_profile": "~L\u00ea-Nguy\u00ean_Hoang1;~Romain_Beylerian1;~Julien_Fageot1;~Louis_Faucon1;~Aidan_Jungo1;~Adrien_Matissart1;~Natha\u00ebl_Nogu\u00e8s1", "aff": "Calicarpa;Association Tournesol;;Oracle;Association Tournesol;;Armortech", "aff_domain": "calicarpa.com;tournesol.app;;oracle.com;tournesol.app;;armortech.fr", "position": "CEO;Researcher;;Researcher;Researcher;;IT Engineer", "bibtex": "@misc{\nanonymous2024the,\ntitle={The Tournesol dataset: Which videos should be more largely recommended?},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=5WFzk0H27p}\n}", "github": "", "project": "", "reviewers": "SZfP;o5F6;7mK9;C2E6", "site": "https://openreview.net/forum?id=5WFzk0H27p", "pdf_size": 4035872, "rating": "3;4;7;7", "confidence": "3;5;5;3", "wc_summary_and_contributions": "31;56;31;143", "wc_strengths": "25;34;31;109", "wc_improvement": "108;28;18;68", "wc_limitations": "1;45;7;7", "wc_correctness": "1;24;8;33", "wc_clarity": "1;9;5;5", "wc_relation_to_prior_work": "1;31;1;28", "wc_documentation": "1;21;3;17", "wc_additional_feedback": "1;1;1;1", "wc_review": "170;249;105;411", "wc_reply_reviewers": "166;0;35;0", "wc_reply_authors": "0;0;87;0", "reply_reviewers": "2;0;1;0", "reply_authors": "3;1;2;1", "rating_avg": [ 5.25, 1.7853571071357126 ], "confidence_avg": [ 4.0, 1.0 ], "wc_summary_and_contributions_avg": [ 65.25, 46.03463370116026 ], "wc_strengths_avg": [ 49.75, 34.361133566865924 ], "wc_improvement_avg": [ 55.5, 35.61951712193752 ], "wc_limitations_avg": [ 15.0, 17.4928556845359 ], "wc_correctness_avg": [ 16.5, 12.658988901172163 ], "wc_clarity_avg": [ 5.0, 2.8284271247461903 ], "wc_relation_to_prior_work_avg": [ 15.25, 14.289419162443238 ], "wc_documentation_avg": [ 10.5, 8.645808232895291 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 233.75, 114.33585395666576 ], "wc_reply_reviewers_avg": [ 50.25, 68.33877010892134 ], "wc_reply_authors_avg": [ 21.75, 37.67210506462308 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.14002800840280097, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:44CDR1BZbY8J:scholar.google.com/&scioq=The+Tournesol+dataset:+Which+videos+should+be+more+largely+recommended%3F&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;2;1;3", "aff_unique_norm": "Calicarpa;Association Tournesol;Oracle Corporation;Armortech", "aff_unique_dep": ";;;", "aff_unique_url": ";;https://www.oracle.com;", "aff_unique_abbr": ";;Oracle;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1", "aff_country_unique": ";United States" }, { "title": "Deep Support Vectors", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96584", "id": "5WoYFypPv0", "proceeding": "", "pdf": "https://openreview.net/pdf?id=5WoYFypPv0", "openreview": "https://openreview.net/forum?id=5WoYFypPv0", "poster": "/media/PosterPDFs/NeurIPS%202024/96584.png?t=1733212740.1818798", "project": "", "author_site": "JunHoo Lee, Hyunho Lee, Kyomin Hwang, Nojun Kwak", "tldr": "", "abstract": "Deep learning has achieved tremendous success. However, unlike SVMs, which provide direct decision criteria and can be trained with a small dataset, it still has significant weaknesses due to its requirement for massive datasets during training and the black-box characteristics on decision criteria. This paper addresses these issues by identifying support vectors in deep learning models. To this end, we propose the DeepKKT condition, an adaptation of the traditional Karush-Kuhn-Tucker (KKT) condition for deep learning models, and confirm that generated Deep Support Vectors (DSVs) using this condition exhibit properties similar to traditional support vectors. This allows us to apply our method to few-shot dataset distillation problems and alleviate the black-box characteristics of deep learning models. Additionally, we demonstrate that the DeepKKT condition can transform conventional classification models into generative models with high fidelity, particularly as latent generation models using class labels as latent variables. We validate the effectiveness of DSVs using common datasets (ImageNet, CIFAR10 and CIFAR100) on the general architectures (ResNet and ConvNet), proving their practical applicability.", "keywords": "model inversion;privacy attacks;generative model;dataset distillation;support vector machines", "primary_area": "machine_vision", "supplementary_material": "/attachment/ed2b8544df7447c2ba2ed45bb8f993e1f5e59fb7.zip", "author": "Junhoo Lee;Hyunho Lee;Kyomin Hwang;Nojun Kwak", "authorids": "~Junhoo_Lee2;~Hyunho_Lee3;~Kyomin_Hwang1;~Nojun_Kwak1", "gender": "M;M;M;M", "homepage": ";https://kyominhwang.github.io/;http://mipal.snu.ac.kr;https://junhoo-lee.com", "dblp": ";356/2620;49/2806;376/0719", "google_scholar": "https://scholar.google.co.kr/citations?user=s25YrZUAAAAJ;https://scholar.google.com/citations?view_op=list_works;h_8-1M0AAAAJ;https://scholar.google.com/citations?hl=ko", "orcid": ";;0000-0002-1792-0327;", "linkedin": ";kyomin-hwang-299119147/;;", "or_profile": "~Hyunho_Lee3;~Kyomin_Hwang1;~Nojun_Kwak1;~JunHoo_Lee1", "aff": "Seoul National University;NAVER;Seoul National University;Seoul National University", "aff_domain": "snu.ac.kr;navercorp.com;snu.ac.kr;snu.ac.kr", "position": "MS student;Intern;Full Professor;PhD student", "bibtex": "@inproceedings{\nlee2024deep,\ntitle={Deep Support Vectors},\nauthor={Junhoo Lee and Hyunho Lee and Kyomin Hwang and Nojun Kwak},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=5WoYFypPv0}\n}", "github": "", "reviewers": "H6Gu;wYkm;UA9Q;jvAE;1B22", "pdf_size": 9187117, "rating": "4;5;6;6;8", "confidence": "4;4;3;3;4", "soundness": "2;2;3;3;4", "novelty": "2;3;2;2;4", "presentation": "2;3;3;3;4", "wc_summary": "122;149;104;94;46", "wc_strengths": "64;113;81;42;145", "wc_weaknesses": "100;97;149;53;12", "wc_questions": "69;2;8;50;242", "wc_limitations": "2;2;1;17;12", "wc_review": "357;363;343;256;457", "wc_reply_reviewers": "120;35;33;0;28", "wc_reply_authors": "766;185;0;0;30", "reply_reviewers": "2;1;1;0;1", "reply_authors": "3;2;1;1;2", "rating_avg": [ 5.8, 1.32664991614216 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.6, 0.8 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 103.0, 34.082253446625266 ], "wc_strengths_avg": [ 89.0, 36.359317925395686 ], "wc_weaknesses_avg": [ 82.2, 46.43016261009647 ], "wc_questions_avg": [ 74.2, 87.59543367093973 ], "wc_limitations_avg": [ 6.8, 6.49307323229917 ], "wc_review_avg": [ 355.2, 63.9011736981411 ], "wc_reply_reviewers_avg": [ 43.2, 40.414848756366766 ], "wc_reply_authors_avg": [ 196.2, 293.0558991045906 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.1230914909793327, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3027371085554536135&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "snu.ac.kr;navercorp.com;snu.ac.kr;snu.ac.kr", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Seoul National University;NAVER Corporation", "aff_unique_dep": ";", "aff_unique_url": "https://www.snu.ac.kr;https://www.naver.com", "aff_unique_abbr": "SNU;NAVER", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Toxicity Detection for Free", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96583", "id": "5a27EE8LxX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=5a27EE8LxX", "openreview": "https://openreview.net/forum?id=5a27EE8LxX", "poster": "/media/PosterPDFs/NeurIPS%202024/96583.png?t=1731723177.1903877", "project": "", "author_site": "Zhanhao Hu, Julien Piet, Geng Zhao, Jiantao Jiao, David Wagner", "tldr": "", "abstract": "Current LLMs are generally aligned to follow safety requirements and tend to refuse toxic prompts. However, LLMs can fail to refuse toxic prompts or be overcautious and refuse benign examples. In addition, state-of-the-art toxicity detectors have low TPRs at low FPR, incurring high costs in real-world applications where toxic examples are rare. In this paper, we introduce Moderation Using LLM Introspection (MULI), which detects toxic prompts using the information extracted directly from LLMs themselves. We found we can distinguish between benign and toxic prompts from the distribution of the first response token's logits. Using this idea, we build a robust detector of toxic prompts using a sparse logistic regression model on the first response token logits. Our scheme outperforms SOTA detectors under multiple metrics.", "keywords": "LLM;toxicity detection", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/d10aaaf8deda0ff7a7b01ca9d5e9a6e2d6f88987.zip", "author": "Zhanhao Hu;Julien Piet;Geng Zhao;Jiantao Jiao;David Wagner", "authorids": "~Zhanhao_Hu1;~Julien_Piet1;~Geng_Zhao2;~Jiantao_Jiao1;~David_Wagner3", "gender": "M;M;;M;", "homepage": "https://whothu.github.io/;https://people.eecs.berkeley.edu/~julien.piet/;;https://scholar.google.com/citations?user=aO8KpGcAAAAJ&hl=en;https://people.eecs.berkeley.edu/~daw/", "dblp": "208/4826;237/0174;;43/8919;42/5626", "google_scholar": ";bRWa8q8AAAAJ;;aO8KpGcAAAAJ;67kghxAAAAAJ", "orcid": "0000-0002-3746-1447;;;;0000-0002-9944-9232", "linkedin": ";julien-piet-b1741975/;;;", "or_profile": "~Zhanhao_Hu1;~Julien_Piet1;~Geng_Zhao2;~Jiantao_Jiao1;~David_Wagner3", "aff": "University of California, Berkeley;Electrical Engineering & Computer Science Department, University of California, Berkeley;;University of California, Berkeley;University of California, Berkeley", "aff_domain": "berkeley.edu;eecs.berkeley.edu;;berkeley.edu;berkeley.edu", "position": "Postdoc;PhD student;;Assistant Professor;Professor", "bibtex": "@inproceedings{\nhu2024toxicity,\ntitle={Toxicity Detection for Free},\nauthor={Zhanhao Hu and Julien Piet and Geng Zhao and Jiantao Jiao and David Wagner},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=5a27EE8LxX}\n}", "github": "", "reviewers": "uGPt;W9yA;bPak;soW2;XwZ4", "pdf_size": 891369, "rating": "4;7;7;7;7", "confidence": "4;5;4;4;4", "soundness": "1;4;3;3;3", "novelty": "1;3;3;3;3", "presentation": "3;3;4;3;4", "wc_summary": "33;175;41;102;108", "wc_strengths": "25;157;94;89;98", "wc_weaknesses": "208;59;93;79;129", "wc_questions": "24;197;167;47;4", "wc_limitations": "14;136;1;63;45", "wc_review": "304;724;396;380;384", "wc_reply_reviewers": "267;29;105;15;120", "wc_reply_authors": "398;0;0;0;86", "reply_reviewers": "2;1;1;1;2", "reply_authors": "2;1;1;1;2", "rating_avg": [ 6.4, 1.2 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.9797958971132712 ], "novelty_avg": [ 2.6, 0.8000000000000002 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 91.8, 51.62712465361595 ], "wc_strengths_avg": [ 92.6, 41.85976588563294 ], "wc_weaknesses_avg": [ 113.6, 52.44273066879718 ], "wc_questions_avg": [ 87.8, 78.68265374274053 ], "wc_limitations_avg": [ 51.8, 47.47799490290212 ], "wc_review_avg": [ 437.6, 146.82996969283894 ], "wc_reply_reviewers_avg": [ 107.2, 89.80066814896202 ], "wc_reply_authors_avg": [ 96.8, 154.23929460419612 ], "reply_reviewers_avg": [ 1.4, 0.4898979485566356 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.25000000000000006, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7526464938497959390&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "berkeley.edu;eecs.berkeley.edu;;berkeley.edu;berkeley.edu", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Hypothesis Testing the Circuit Hypothesis in LLMs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96582", "id": "5ai2YFAXV7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=5ai2YFAXV7", "openreview": "https://openreview.net/forum?id=5ai2YFAXV7", "poster": "", "project": "", "author_site": "Claudia Shi, Nicolas Beltran Velez, Achille Nazaret, Carolina Zheng, Adri\u00e0 Garriga-Alonso, Andrew Jesson, Maggie Makar, David Blei", "tldr": "", "abstract": "Large language models (LLMs) demonstrate surprising capabilities, but we do not understand how they are implemented. \nOne hypothesis suggests that these capabilities are primarily executed by small subnetworks within the LLM, known as circuits. But how can we evaluate this hypothesis?\nIn this paper, we formalize a set of criteria that a circuit is hypothesized to meet and develop a suite of hypothesis tests to evaluate how well circuits satisfy them. \nThe criteria focus on the extent to which the LLM's behavior is preserved, the degree of localization of this behavior, and whether the circuit is minimal.\nWe apply these tests to six circuits described in the research literature. \nWe find that synthetic circuits -- circuits that are hard-coded in the model -- align with the idealized properties. \nCircuits discovered in Transformer models satisfy the criteria to varying degrees.\nTo facilitate future empirical studies of circuits, we created the \\textit{circuitry} package, a wrapper around the \\textit{TransformerLens} library, which abstracts away lower-level manipulations of hooks and activations. The software is available at \\url{https://github.com/blei-lab/circuitry}.", "keywords": "Hypothesis testings;mechanistic interpretability;circuit", "primary_area": "interpretability_and_explainability", "supplementary_material": "/attachment/041cbfc03bb2389ffd6e1c8f782b0a33d9ae8e5f.zip", "author": "Claudia Shi;Nicolas Beltran-Velez;Achille Nazaret;Carolina Zheng;Adri\u00e0 Garriga-Alonso;Andrew Jesson;Maggie Makar;David Blei", "authorids": "~Claudia_Shi1;~Nicolas_Beltran-Velez1;~Achille_Nazaret1;~Carolina_Zheng1;~Adri\u00e0_Garriga-Alonso1;~Andrew_Jesson1;~Maggie_Makar1;~David_Blei2", "gender": ";M;Not Specified;;M;F;M;M", "homepage": "https://claudiajshi.com/;;;https://ballerlina.github.io/;https://oatml.cs.ox.ac.uk/members/andrew_jesson/;https://mymakar.github.io/;http://www.cs.columbia.edu/~blei/;https://agarri.ga/", "dblp": ";;241/4984;239/3982;;211/6995;86/1910;225/6564", "google_scholar": "WHKniLsAAAAJ;;iDExfWQAAAAJ;;ElJ_fC4AAAAJ;bmlgkM4AAAAJ;https://scholar.google.com.tw/citations?user=8OYE6iEAAAAJ;OtnThiMAAAAJ", "orcid": ";;;;;;;0000-0003-3409-5047", "linkedin": ";nicolas-beltran-velez-1950141a9;;;;;;adrigarriga/", "or_profile": "~Claudia_Shi1;~Nicolas_Beltran-Velez1;~Achille_Nazaret1;~Carolina_Zheng1;~Andrew_Jesson1;~Maggie_Makar1;~David_Blei2;~Adria_Garriga-Alonso1", "aff": "Columbia University;Columbia University;Columbia University;Columbia University;Columbia University;University of Michigan - Ann Arbor;Columbia University;FAR", "aff_domain": "columbia.edu;columbia.edu;columbia.edu;cs.columbia.edu;columbia.edu;umich.edu;columbia.edu;far.ai", "position": "PhD student;PhD student;PhD student;PhD student;Postdoc;Assistant Professor;Full Professor;Researcher", "bibtex": "@inproceedings{\nshi2024hypothesis,\ntitle={Hypothesis Testing the Circuit Hypothesis in {LLM}s},\nauthor={Claudia Shi and Nicolas Beltran-Velez and Achille Nazaret and Carolina Zheng and Adri{\\`a} Garriga-Alonso and Andrew Jesson and Maggie Makar and David Blei},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=5ai2YFAXV7}\n}", "github": "", "reviewers": "NNjL;wEmx;6etf", "pdf_size": 548491, "rating": "6;6;7", "confidence": "3;3;4", "soundness": "3;2;3", "novelty": "3;2;4", "presentation": "3;4;4", "wc_summary": "426;113;161", "wc_strengths": "55;44;142", "wc_weaknesses": "55;60;343", "wc_questions": "234;82;9", "wc_limitations": "12;1;14", "wc_review": "782;300;669", "wc_reply_reviewers": "119;9;25", "wc_reply_authors": "208;14;26", "reply_reviewers": "1;1;1", "reply_authors": "3;2;2", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 233.33333333333334, 137.63801154558365 ], "wc_strengths_avg": [ 80.33333333333333, 43.8355512746852 ], "wc_weaknesses_avg": [ 152.66666666666666, 134.6014693662575 ], "wc_questions_avg": [ 108.33333333333333, 93.72418163004798 ], "wc_limitations_avg": [ 9.0, 5.715476066494082 ], "wc_review_avg": [ 583.6666666666666, 205.81923028608273 ], "wc_reply_reviewers_avg": [ 51.0, 48.52490769354092 ], "wc_reply_authors_avg": [ 82.66666666666667, 88.7593500552039 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.9999999999999997, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1470217606654088641&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "columbia.edu;columbia.edu;columbia.edu;cs.columbia.edu;columbia.edu;umich.edu;columbia.edu;far.ai", "author_num": 8, "aff_unique_index": "0;0;0;0;0;1;0;2", "aff_unique_norm": "Columbia University;University of Michigan;FAR", "aff_unique_dep": ";;", "aff_unique_url": "https://www.columbia.edu;https://www.umich.edu;", "aff_unique_abbr": "Columbia;UM;", "aff_campus_unique_index": "1", "aff_campus_unique": ";Ann Arbor", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States;" }, { "title": "PANORAMIA: Privacy Auditing of Machine Learning Models without Retraining", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96581", "id": "5atraF1tbg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=5atraF1tbg", "openreview": "https://openreview.net/forum?id=5atraF1tbg", "poster": "/media/PosterPDFs/NeurIPS%202024/96581.png?t=1731516035.8379626", "project": "", "author_site": "Mishaal Kazmi, Hadrien Lautraite, Alireza Akbari, Qiaoyue Tang, Mauricio Soroco, Tao Wang, S\u00e9bastien Gambs, Mathias L\u00e9cuyer", "tldr": "", "abstract": "We present PANORAMIA, a privacy leakage measurement framework for machine learning models that relies on membership inference attacks using generated data as non-members. By relying on generated non-member data, PANORAMIA eliminates the common dependency of privacy measurement tools on in-distribution non-member data. As a result, PANORAMIA does not modify the model, training data, or training process, and only requires access to a subset of the training data. We evaluate PANORAMIA on ML models for image and tabular data classification, as well as on large-scale language models.", "keywords": "privacy;auditing;machine learning;differential privacy;membership inference attack", "primary_area": "privacy", "supplementary_material": "/attachment/768529aa371e8209fbb2e26713126d8f01575825.zip", "author": "Mishaal Kazmi;Hadrien Lautraite;Alireza Akbari;Qiaoyue Tang;Mauricio Soroco;Tao Wang;S\u00e9bastien Gambs;Mathias L\u00e9cuyer", "authorids": "~Mishaal_Kazmi1;~Hadrien_Lautraite1;~Alireza_Akbari1;~Qiaoyue_Tang1;~Mauricio_Soroco1;~Tao_Wang35;~S\u00e9bastien_Gambs2;~Mathias_L\u00e9cuyer2", "gender": "F;M;M;F;;;M;M", "homepage": ";;;;https://github.com/msoroco;https://www.cs.sfu.ca/~taowang/;https://sebastiengambs.openum.ca;http://mathias.lecuyer.me", "dblp": ";;;307/3873;;;09/2378;130/0417", "google_scholar": ";;https://scholar.google.ca/citations?hl=en;qr_qHm4AAAAJ;;;https://scholar.google.fr/citations?user=2q1NjMgAAAAJ;WeIvMTUAAAAJ", "orcid": ";;;;;;0000-0002-7326-7377;", "linkedin": "mishaalkazmi555/;hadrienlautraite;;;;;;", "or_profile": "~Mishaal_Kazmi1;~Hadrien_Lautraite1;~Alireza_Akbari1;~Qiaoyue_Tang1;~Mauricio_Soroco1;~Tao_Wang35;~S\u00e9bastien_Gambs2;~Mathias_L\u00e9cuyer1", "aff": ";Universit\u00e9 du Qu\u00e9bec \u00e0 Montr\u00e9al;Simon Fraser University;University of British Columbia;University of British Columbia;Simon Fraser University;Universit\u00e9 du Qu\u00e9bec \u00e0 Montr\u00e9al;University of British Columbia", "aff_domain": ";uqam.ca;sfu.ca;ubc.ca;cs.ubc.ca;sfu.ca;uqam.ca;ubc.ca", "position": ";PhD student;MS student;PhD student;Undergrad student;Assistant Professor;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nkazmi2024panoramia,\ntitle={{PANORAMIA}: Privacy Auditing of Machine Learning Models without Retraining},\nauthor={Mishaal Kazmi and Hadrien Lautraite and Alireza Akbari and Qiaoyue Tang and Mauricio Soroco and Tao Wang and S{\\'e}bastien Gambs and Mathias L{\\'e}cuyer},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=5atraF1tbg}\n}", "github": "", "reviewers": "Z4qp;55Rd;kScF;CsXP", "pdf_size": 4250837, "rating": "4;5;7;7", "confidence": "3;3;3;4", "soundness": "2;2;3;3", "novelty": "2;2;3;3", "presentation": "2;2;3;4", "wc_summary": "240;212;338;63", "wc_strengths": "104;85;188;100", "wc_weaknesses": "558;108;377;646", "wc_questions": "81;350;106;83", "wc_limitations": "37;60;15;43", "wc_review": "1020;815;1024;935", "wc_reply_reviewers": "0;162;187;299", "wc_reply_authors": "52;70;0;1418", "reply_reviewers": "0;1;1;2", "reply_authors": "2;2;1;4", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 213.25, 98.55804127517958 ], "wc_strengths_avg": [ 119.25, 40.31981522775123 ], "wc_weaknesses_avg": [ 422.25, 205.72600103049686 ], "wc_questions_avg": [ 155.0, 113.01106140551022 ], "wc_limitations_avg": [ 38.75, 16.099301227071937 ], "wc_review_avg": [ 948.5, 84.87785341300757 ], "wc_reply_reviewers_avg": [ 162.0, 106.81526108192593 ], "wc_reply_authors_avg": [ 385.0, 596.9564473225831 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.5555555555555555, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11280590601484009277&as_sdt=5,24&sciodt=0,24&hl=en", "gs_version_total": 9, "email": ";uqam.ca;sfu.ca;ubc.ca;cs.ubc.ca;sfu.ca;uqam.ca;ubc.ca", "author_num": 8, "aff_unique_index": "0;1;2;2;1;0;2", "aff_unique_norm": "Universit\u00e9 du Qu\u00e9bec \u00e0 Montr\u00e9al;Simon Fraser University;University of British Columbia", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uqam.ca;https://www.sfu.ca;https://www.ubc.ca", "aff_unique_abbr": "UQAM;SFU;UBC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Montr\u00e9al;", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "Canada" }, { "title": "SIRIUS : Contexual Sparisty with Correction for Efficient LLMs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96580", "id": "5bR2l1b2eh", "proceeding": "", "pdf": "https://openreview.net/pdf?id=5bR2l1b2eh", "openreview": "https://openreview.net/forum?id=5bR2l1b2eh", "poster": "", "project": "", "author_site": "Yang Zhou, Zhuoming Chen, Zhaozhuo Xu, Victoria Lin, Beidi Chen", "tldr": "", "abstract": "With the blossom of large language models (LLM), inference efficiency becomes increasingly important. Various approximate methods are proposed to reduce the cost at inference time. Contextual Sparsity (CS) is appealing for its training-free nature and its ability to reach a higher compression ratio seemingly without significant performance degradation. However, after a comprehensive evaluation of contextual sparsity methods on various complex generation tasks, we find that although CS succeeds in prompt-understanding tasks, it significantly degrades the model performance for reasoning, deduction, and knowledge-based tasks. Despite the gap in end-to-end accuracy, we observed that sparse models and original models often share the general problem-solving logic and require only a few token corrections to recover the original model performance. This paper introduces SIRIUS, an efficient correction mechanism, which significantly boosts CS models on reasoning tasks while maintaining its efficiency gain. SIRIUS is evaluated on 6 models with 8 difficult generation tasks in reasoning, deduction, and coding and shows consistent effectiveness and efficiency. Also, we carefully develop a system implementation for SIRIUS and show that SIRIUS delivers theoretical latency reduction with roughly a 20% reduction in latency for 8B model on-chip and a 35% reduction in latency for 70B model offloading. We open-source our implementation of Sirius at https://github.com/Infini-AI-Lab/Sirius.git.", "keywords": "Contextual Sparsity;LLM inference;Knowledge Distillation", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Yang Zhou;Zhuoming Chen;Zhaozhuo Xu;Xi Victoria Lin;Beidi Chen", "authorids": "~Yang_Zhou27;~Zhuoming_Chen1;~Zhaozhuo_Xu1;~Xi_Victoria_Lin1;~Beidi_Chen1", "gender": "M;M;M;F;F", "homepage": ";;https://ottovonxu.github.io/;http://victorialin.net;https://www.andrew.cmu.edu/user/beidic/", "dblp": ";226/5729;195/4352;215/5264;192/1339", "google_scholar": "W6CZltIAAAAJ;4Bb5KRYAAAAJ;7tDlVAsAAAAJ;gYUOJwMAAAAJ;", "orcid": ";;;;", "linkedin": ";zhuoming-chen-325075234/;;xivictorialin/;", "or_profile": "~Yang_Zhou27;~Zhuoming_Chen1;~Zhaozhuo_Xu1;~Xi_Victoria_Lin1;~Beidi_Chen1", "aff": "Carnegie Mellon University;Carnegie Mellon University;Stevens Institute of Technology;Meta;Meta Facebook", "aff_domain": "andrew.cmu.edu;cmu.edu;stevens.edu;fb.com;fb.com", "position": "PhD student;PhD student;Assistant Professor;Research Scientist;Researcher", "bibtex": "@inproceedings{\nzhou2024sirius,\ntitle={{SIRIUS} : Contexual Sparisty with Correction for Efficient {LLM}s},\nauthor={Yang Zhou and Zhuoming Chen and Zhaozhuo Xu and Xi Victoria Lin and Beidi Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=5bR2l1b2eh}\n}", "github": "", "reviewers": "a8uY;ArEw;BB9w;rbBk", "pdf_size": 1308164, "rating": "4;5;6;6", "confidence": "4;3;4;4", "soundness": "2;2;2;1", "novelty": "2;2;2;2", "presentation": "3;2;2;1", "wc_summary": "77;71;82;191", "wc_strengths": "50;34;42;150", "wc_weaknesses": "221;32;211;552", "wc_questions": "4;56;57;42", "wc_limitations": "13;51;8;104", "wc_review": "365;244;400;1039", "wc_reply_reviewers": "79;24;14;64", "wc_reply_authors": "1339;108;51;36", "reply_reviewers": "1;1;1;1", "reply_authors": "4;3;2;2", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 1.75, 0.4330127018922193 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 105.25, 49.66072391739774 ], "wc_strengths_avg": [ 69.0, 47.10626285325551 ], "wc_weaknesses_avg": [ 254.0, 187.76714302561032 ], "wc_questions_avg": [ 39.75, 21.47527648250425 ], "wc_limitations_avg": [ 44.0, 38.4252521136817 ], "wc_review_avg": [ 512.0, 309.7200348702034 ], "wc_reply_reviewers_avg": [ 45.25, 27.012728481217888 ], "wc_reply_authors_avg": [ 383.5, 552.3117326293187 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12620062101149403671&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "andrew.cmu.edu;cmu.edu;stevens.edu;fb.com;fb.com", "author_num": 5, "aff_unique_index": "0;0;1;2;2", "aff_unique_norm": "Carnegie Mellon University;Stevens Institute of Technology;Meta", "aff_unique_dep": ";;Meta Platforms, Inc.", "aff_unique_url": "https://www.cmu.edu;https://www.stevens.edu;https://meta.com", "aff_unique_abbr": "CMU;SIT;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "5c1hh8AeHv", "title": "MultiTrust: A Comprehensive Benchmark Towards Trustworthy Multimodal Large Language Models", "track": "Datasets & Benchmarks", "status": "Poster", "tldr": "", "abstract": "Despite the superior capabilities of Multimodal Large Language Models (MLLMs) across diverse tasks, they still face significant trustworthiness challenges. Yet, current literature on the assessment of trustworthy MLLMs remains limited, lacking a holistic evaluation to offer thorough insights into future improvements. In this work, we establish **MultiTrust**, the first comprehensive and unified benchmark on the trustworthiness of MLLMs across five primary aspects: *truthfulness*, *safety*, *robustness*, *fairness*, and *privacy*. Our benchmark employs a rigorous evaluation strategy that addresses both multimodal risks and cross-modal impacts, encompassing 32 diverse tasks with self-curated datasets. Extensive experiments with 21 modern MLLMs reveal some previously unexplored trustworthiness issues and risks, highlighting the complexities introduced by the multimodality and underscoring the necessity for advanced methodologies to enhance their reliability. For instance, typical proprietary models still struggle with the perception of visually confusing images and are vulnerable to multimodal jailbreaking and adversarial attacks; MLLMs are more inclined to disclose privacy in text and reveal ideological and cultural biases even when paired with irrelevant images in inference, indicating that the multimodality amplifies the internal risks from base LLMs. Additionally, we release a scalable toolbox for standardized trustworthiness research, aiming to facilitate future advancements in this important field. Code and resources are publicly available at: [https://multi-trust.github.io/](https://multi-trust.github.io/).", "keywords": "trustworthiness;multimodal large language models;benchmark;GPT-4V", "primary_area": "", "supplementary_material": "/attachment/24c8cabffd34a72eb25c39aa17af5485fe96c320.pdf", "author": "Yichi Zhang;Yao Huang;Yitong Sun;Chang Liu;Zhe Zhao;Zhengwei Fang;Yifan Wang;Huanran Chen;Xiao Yang;Xingxing Wei;Hang Su;Yinpeng Dong;Jun Zhu", "authorids": "~Yichi_Zhang4;~Yao_Huang2;~Yitong_Sun2;~Chang_Liu17;~Zhe_Zhao7;~Zhengwei_Fang1;~Yifan_Wang46;~Huanran_Chen1;~Xiao_Yang4;~Xingxing_Wei1;~Hang_Su3;~Yinpeng_Dong2;~Jun_Zhu2", "gender": "M;M;F;M;M;M;M;M;M;M;M;M;M", "homepage": "https://zycheiheihei.github.io;https://github.com/Aries-iai;https://github.com/lorraine021;https://github.com/sunrise6513;;;https://www.cs.tsinghua.edu.cn/;https://huanranchen.github.io/;https://ml.cs.tsinghua.edu.cn/~xiaoyang/;https://sites.google.com/site/xingxingwei1988/;https://dongyp13.github.io;http://ml.cs.tsinghua.edu.cn/~jun;", "dblp": ";;26/9557-2;;;;;329/6558;57/33851;57/4066;183/0980;50/2644-1;26/5371-6", "google_scholar": "HzgDakoAAAAJ;https://scholar.google.com/citations?view_op=list_works;8TAxXYcAAAAJ;;OIfDICcAAAAJ;v5lIRxUAAAAJ;;https://scholar.google.co.jp/citations?user=QYsKXccAAAAJ;bwkwp0MAAAAJ;ak8D_cQAAAAJ;6_4ad84AAAAJ;axsP38wAAAAJ;dxN1_X0AAAAJ", "orcid": "0000-0002-1894-3977;0000-0001-7978-2372;0009-0006-5294-2093;;;0000-0002-7960-3212;;;0000-0001-9502-9962;;;;", "linkedin": ";;;;;;;;;;;;", "or_profile": "~Yichi_Zhang4;~Yao_Huang2;~Yitong_Sun2;~Chang_Liu17;~Zhe_Zhao7;~Zhengwei_Fang1;~Yifan_Wang46;~Huanran_Chen1;~Xiao_Yang4;~Xingxing_Wei1;~Yinpeng_Dong2;~Jun_Zhu2;~Hang_Su2", "aff": "Tsinghua University;Beihang University ;Beihang University;Shanghai Jiaotong University;RealAI;Tsinghua University;Tsinghua University;;Tsinghua University;Beihang University;Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;buaa.edu.cn;buaa.edu.cn;sjtu.edu.cn;realai.ai;mails.tsinghua.edu.cn;mails.tsinghua.edu.cn;;mail.tsinghua.edu.cn;buaa.edu.cn;tsinghua.edu.cn;mail.tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;MS student;Undergrad student;D.Eng;Researcher;Researcher;Undergrad student;;Postdoc;Associate Professor;Postdoc;Professor;Associate Professor", "bibtex": "@inproceedings{\nzhang2024multitrust,\ntitle={MultiTrust: A Comprehensive Benchmark Towards Trustworthy Multimodal Large Language Models},\nauthor={Yichi Zhang and Yao Huang and Yitong Sun and Chang Liu and Zhe Zhao and Zhengwei Fang and Yifan Wang and Huanran Chen and Xiao Yang and Xingxing Wei and Hang Su and Yinpeng Dong and Jun Zhu},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=5c1hh8AeHv}\n}", "github": "", "project": "", "reviewers": "46zd;ujCi;Ku84;LMT3", "site": "https://openreview.net/forum?id=5c1hh8AeHv", "pdf_size": 33827873, "rating": "6;6;7;8", "confidence": "5;4;4;3", "wc_summary_and_contributions": "24;50;47;94", "wc_strengths": "24;26;4;74", "wc_improvement": "54;94;58;16", "wc_limitations": "22;5;1;11", "wc_correctness": "3;3;1;1", "wc_clarity": "3;6;1;1", "wc_relation_to_prior_work": "4;1;1;1", "wc_documentation": "1;1;1;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "136;187;115;200", "wc_reply_reviewers": "0;0;21;0", "wc_reply_authors": "77;88;106;77", "reply_reviewers": "0;0;1;0", "reply_authors": "2;2;3;2", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 53.75, 25.321680433968044 ], "wc_strengths_avg": [ 32.0, 25.729360660537214 ], "wc_improvement_avg": [ 55.5, 27.617928959282953 ], "wc_limitations_avg": [ 9.75, 7.917543811056558 ], "wc_correctness_avg": [ 2.0, 1.0 ], "wc_clarity_avg": [ 2.75, 2.0463381929681126 ], "wc_relation_to_prior_work_avg": [ 1.75, 1.299038105676658 ], "wc_documentation_avg": [ 1.0, 0.0 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 159.5, 35.103418636936205 ], "wc_reply_reviewers_avg": [ 5.25, 9.093266739736606 ], "wc_reply_authors_avg": [ 87.0, 11.853269591129697 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 13, 0 ], "corr_rating_confidence": -0.8528028654224417, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12777705251628988173&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;1;2;3;0;0;0;1;0;0;0", "aff_unique_norm": "Tsinghua University;Beihang University;Shanghai Jiao Tong University;RealAI", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.tsinghua.edu.cn;http://www.buaa.edu.cn/;https://www.sjtu.edu.cn;https://www.realai.co", "aff_unique_abbr": "THU;BUAA;SJTU;RealAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;0;0;0;0;0;0;0", "aff_country_unique": "China;United States" }, { "title": "Position Coupling: Improving Length Generalization of Arithmetic Transformers Using Task Structure", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96579", "id": "5cIRdGM1uG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=5cIRdGM1uG", "openreview": "https://openreview.net/forum?id=5cIRdGM1uG", "poster": "/media/PosterPDFs/NeurIPS%202024/96579.png?t=1732967897.1725848", "project": "", "author_site": "Hanseul Cho, Jaeyoung Cha, Pranjal Awasthi, Srinadh Bhojanapalli, Anupam Gupta, Chulhee Yun", "tldr": "", "abstract": "Even for simple arithmetic tasks like integer addition, it is challenging for Transformers to generalize to longer sequences than those encountered during training. To tackle this problem, we propose *position coupling*, a simple yet effective method that directly embeds the structure of the tasks into the positional encoding of a (decoder-only) Transformer. Taking a departure from the vanilla absolute position mechanism assigning unique position IDs to each of the tokens, we assign the same position IDs to two or more \"relevant\" tokens; for integer addition tasks, we regard digits of the same significance as in the same position. On the empirical side, we show that with the proposed position coupling, our models trained on 1 to 30-digit additions can generalize up to *200-digit* additions (6.67x of the trained length). On the theoretical side, we prove that a 1-layer Transformer with coupled positions can solve the addition task involving exponentially many digits, whereas any 1-layer Transformer without positional information cannot entirely solve it. We also demonstrate that position coupling can be applied to other algorithmic tasks such as Nx2 multiplication and a two-dimensional task. Our codebase is available at [github.com/HanseulJo/position-coupling](https://github.com/HanseulJo/position-coupling).", "keywords": "Length Generalization;Transformers;Position Coupling;Positional Encoding;Out-of-distribution Generalization;Arithmetic Tasks;Algorithmic Tasks", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Hanseul Cho;Jaeyoung Cha;Pranjal Awasthi;Srinadh Bhojanapalli;Anupam Gupta;Chulhee Yun", "authorids": "~Hanseul_Cho1;~Jaeyoung_Cha1;~Pranjal_Awasthi3;~Srinadh_Bhojanapalli1;~Anupam_Gupta2;~Chulhee_Yun1", "gender": "M;M;;M;M;M", "homepage": "https://hanseuljo.github.io/;https://jaeyoungcha.github.io/;https://www.cs.rutgers.edu/~pa336/;https://bsrinadh.github.io/;https://cs.nyu.edu/~anupamg/;https://chulheeyun.github.io/", "dblp": "233/5755-2;;57/679;131/6700;27/2931;138/0148.html", "google_scholar": "IczOXwsAAAAJ;WJuTs9MAAAAJ;;bpSF_9EAAAAJ;QuwaU-8AAAAJ;Ukl64ggAAAAJ", "orcid": "0009-0001-0410-0290;;;;;", "linkedin": "hanseul-cho-66b01a260/;jaeyoung-cha-440a78263/;;;;", "or_profile": "~Hanseul_Cho1;~Jaeyoung_Cha1;~Pranjal_Awasthi3;~Srinadh_Bhojanapalli1;~Anupam_Gupta2;~Chulhee_Yun1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Rutgers University;Google;New York University;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr;rutgers.edu;google.com;cs.nyu.edu;kaist.ac.kr", "position": "PhD student;MS student;Assistant Professor;Research Scientist;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\ncho2024position,\ntitle={Position Coupling: Improving Length Generalization of Arithmetic Transformers Using Task Structure},\nauthor={Hanseul Cho and Jaeyoung Cha and Pranjal Awasthi and Srinadh Bhojanapalli and Anupam Gupta and Chulhee Yun},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=5cIRdGM1uG}\n}", "github": "", "reviewers": "CVfs;U8WH;ksWg;ow6J", "pdf_size": 3072917, "rating": "5;6;7;7", "confidence": "3;4;4;3", "soundness": "2;3;3;4", "novelty": "3;2;3;3", "presentation": "3;3;3;4", "wc_summary": "132;59;50;211", "wc_strengths": "205;57;39;90", "wc_weaknesses": "238;109;191;50", "wc_questions": "177;48;263;1", "wc_limitations": "1;3;24;2", "wc_review": "753;276;567;354", "wc_reply_reviewers": "0;14;50;144", "wc_reply_authors": "61;36;47;43", "reply_reviewers": "0;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 113.0, 64.90377492873584 ], "wc_strengths_avg": [ 97.75, 64.56537384697776 ], "wc_weaknesses_avg": [ 147.0, 72.57754473664703 ], "wc_questions_avg": [ 122.25, 103.70963070033564 ], "wc_limitations_avg": [ 7.5, 9.5524865872714 ], "wc_review_avg": [ 487.5, 186.65810992292833 ], "wc_reply_reviewers_avg": [ 52.0, 56.16048432839589 ], "wc_reply_authors_avg": [ 46.75, 9.12071817347735 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16474629430306560505&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "kaist.ac.kr;kaist.ac.kr;rutgers.edu;google.com;cs.nyu.edu;kaist.ac.kr", "author_num": 6, "aff_unique_index": "0;0;1;2;3;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Rutgers University;Google;New York University", "aff_unique_dep": ";;Google;", "aff_unique_url": "https://www.kaist.ac.kr;https://www.rutgers.edu;https://www.google.com;https://www.nyu.edu", "aff_unique_abbr": "KAIST;Rutgers;Google;NYU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;1;1;1;0", "aff_country_unique": "South Korea;United States" }, { "title": "Imitating Language via Scalable Inverse Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96578", "id": "5d2eScRiRC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=5d2eScRiRC", "openreview": "https://openreview.net/forum?id=5d2eScRiRC", "poster": "/media/PosterPDFs/NeurIPS%202024/96578.png?t=1734026972.2374709", "project": "", "author_site": "Markus Wulfmeier, Michael Bloesch, Nino Vieillard, Arun Ahuja, Jorg Bornschein, Sandy Huang, Artem Sokolov, Matt Barnes, Guillaume Desjardins, Alex Bewley, Sarah Bechtle, Jost Springenberg, Nikola Momchev, Olivier Bachem, Matthieu Geist, Martin Riedmiller", "tldr": "", "abstract": "The majority of language model training builds on imitation learning. It covers pretraining, supervised fine-tuning, and affects the starting conditions for reinforcement learning from human feedback (RLHF). The simplicity and scalability of maximum likelihood estimation (MLE) for next token prediction led to its role as predominant paradigm. However, the broader field of imitation learning can more effectively utilize the sequential structure underlying autoregressive generation. We focus on investigating the inverse reinforcement learning (IRL) perspective to imitation, extracting rewards and directly optimizing sequences instead of individual token likelihoods and evaluate its benefits for fine-tuning large language models. We provide a new angle, reformulating inverse soft-Q-learning as a temporal difference regularized extension of MLE. This creates a principled connection between MLE and IRL and allows trading off added complexity with increased performance and diversity of generations in the supervised fine-tuning (SFT) setting. We find clear advantages for IRL-based imitation, in particular for retaining diversity while maximizing task performance, rendering IRL a strong alternative on fixed SFT datasets even without online data generation. Our analysis of IRL-extracted reward functions further indicates benefits for more robust reward functions via tighter integration of supervised and preference-based LLM post-training.", "keywords": "Language Modeling;Inverse Reinforcement Learning;Imitation Learning;Supervised Fine-tuning", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Markus Wulfmeier;Michael Bloesch;Nino Vieillard;Arun Ahuja;Jorg Bornschein;Sandy Huang;Artem Sokolov;Matt Barnes;Guillaume Desjardins;Alex Bewley;Sarah Maria Elisabeth Bechtle;Jost Tobias Springenberg;Nikola Momchev;Olivier Bachem;Matthieu Geist;Martin Riedmiller", "authorids": "~Markus_Wulfmeier1;~Michael_Bloesch1;~Nino_Vieillard1;~Arun_Ahuja1;~Jorg_Bornschein1;~Sandy_Huang1;~Artem_Sokolov1;~Matt_Barnes1;~Guillaume_Desjardins1;~Alex_Bewley1;~Sarah_Maria_Elisabeth_Bechtle1;~Jost_Tobias_Springenberg1;~Nikola_Momchev1;~Olivier_Bachem1;~Matthieu_Geist1;~Martin_Riedmiller1", "gender": "M;;;M;M;F;;M;M;Unspecified;F;M;M;M;M;M", "homepage": ";;;;;https://shhuang.github.io/;https://www.cl.uni-heidelberg.de/~sokolov/;;;https://alex.bewley.ai/;;http://www.springenberg-tobias.de;;http://www.olivierbachem.ch/;;https://www.riedmiller.me/", "dblp": "166/1552;40/8368;243/5918;14/9014;13/8510;153/7841;79/3174;160/9912;;39/9969;180/9966;;;https://dblp.org/pers/hd/b/Bachem:Olivier;38/6508;", "google_scholar": ";fn6GhgoAAAAJ;https://scholar.google.fr/citations?user=4jua80IAAAAJ;;X7kZFnoAAAAJ;eurA6WgAAAAJ;0jSH2vsAAAAJ;y5XmOJwAAAAJ;;https://scholar.google.co.uk/citations?user=UO32CB0AAAAJ;https://scholar.google.com/citations?hl=de;;;https://scholar.google.ch/citations?user=mW9BcgsAAAAJ;ectPLEUAAAAJ;1gVfqpcAAAAJ", "orcid": ";;;;0000-0002-3356-7922;;;;;0000-0002-8428-9264;;;;;;", "linkedin": ";;;;;;;;;;;;;olivier-bachem-10257756/;;", "or_profile": "~Markus_Wulfmeier1;~Michael_Bloesch1;~Nino_Vieillard1;~Arun_Ahuja1;~Jorg_Bornschein1;~Sandy_Huang1;~Artem_Sokolov1;~Matt_Barnes1;~Guillaume_Desjardins1;~Alex_Bewley1;~Sarah_Maria_Elisabeth_Bechtle1;~Jost_Tobias_Springenberg1;~Nikola_Momchev1;~Olivier_Bachem1;~Matthieu_Geist1;~Martin_Riedmiller1", "aff": "Google DeepMind;Google DeepMind;Google Deepmind;Google DeepMind;Google Deepmind;Google DeepMind;Google;Google;Google DeepMind;Google;Google DeepMind;Google DeepMind;Google;Google Brain;Google;", "aff_domain": "deepmind.com;google.com;google.com;google.com;google.com;deepmind.com;google.com;google.com;google.com;google.com;deepmind.com;google.com;google.com;google.com;google.com;", "position": "Research Scientist;Research Scientist;Researcher;Research;Research Scientist;Research Scientist;Research Scientist;Researcher;Research Scientist;Research Scientist;Researcher;Researcher;Software Engineer;Research scientist;Researcher;", "bibtex": "@inproceedings{\nwulfmeier2024imitating,\ntitle={Imitating Language via Scalable Inverse Reinforcement Learning},\nauthor={Markus Wulfmeier and Michael Bloesch and Nino Vieillard and Arun Ahuja and Jorg Bornschein and Sandy Huang and Artem Sokolov and Matt Barnes and Guillaume Desjardins and Alex Bewley and Sarah Maria Elisabeth Bechtle and Jost Tobias Springenberg and Nikola Momchev and Olivier Bachem and Matthieu Geist and Martin Riedmiller},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=5d2eScRiRC}\n}", "github": "", "reviewers": "aRfw;tAhh;6J5H;pjSm;UqkB", "pdf_size": 1472259, "rating": "4;4;5;6;8", "confidence": "3;3;3;4;3", "soundness": "2;2;3;3;4", "novelty": "2;2;3;2;4", "presentation": "2;2;2;3;3", "wc_summary": "89;161;60;99;74", "wc_strengths": "38;16;89;110;43", "wc_weaknesses": "47;99;26;106;197", "wc_questions": "44;30;18;106;145", "wc_limitations": "2;2;10;7;7", "wc_review": "220;308;203;428;466", "wc_reply_reviewers": "0;0;12;49;35", "wc_reply_authors": "63;63;41;286;0", "reply_reviewers": "0;0;1;1;1", "reply_authors": "2;2;2;2;1", "rating_avg": [ 5.4, 1.4966629547095764 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.6, 0.8 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 96.6, 34.81723711037394 ], "wc_strengths_avg": [ 59.2, 34.77585369189375 ], "wc_weaknesses_avg": [ 95.0, 59.3397000329459 ], "wc_questions_avg": [ 68.6, 48.767202093210145 ], "wc_limitations_avg": [ 5.6, 3.1368774282716245 ], "wc_review_avg": [ 325.0, 106.47816677610486 ], "wc_reply_reviewers_avg": [ 19.2, 19.630588376307013 ], "wc_reply_authors_avg": [ 90.6, 100.37250619567094 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 16, 0 ], "corr_rating_confidence": 0.20044593143431827, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15767316998077694872&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "deepmind.com;google.com;google.com;google.com;google.com;deepmind.com;google.com;google.com;google.com;google.com;deepmind.com;google.com;google.com;google.com;google.com;", "author_num": 16, "aff_unique_index": "0;0;1;0;1;0;0;0;0;0;0;0;0;0;0", "aff_unique_norm": "Google;DeepMind", "aff_unique_dep": "Google DeepMind;DeepMind", "aff_unique_url": "https://deepmind.com;https://deepmind.com", "aff_unique_abbr": "DeepMind;DeepMind", "aff_campus_unique_index": "1;1;1;1;1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;0;0;1;1;0;1;0;0;1;1;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "Categorical Flow Matching on Statistical Manifolds", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96577", "id": "5fybcQZ0g4", "proceeding": "", "pdf": "https://openreview.net/pdf?id=5fybcQZ0g4", "openreview": "https://openreview.net/forum?id=5fybcQZ0g4", "poster": "/media/PosterPDFs/NeurIPS%202024/96577.png?t=1731094343.128881", "project": "", "author_site": "Chaoran Cheng, Jiahan Li, Jian Peng, Ge Liu", "tldr": "", "abstract": "We introduce Statistical Flow Matching (SFM), a novel and mathematically rigorous flow-matching framework on the manifold of parameterized probability measures inspired by the results from information geometry. We demonstrate the effectiveness of our method on the discrete generation problem by instantiating SFM on the manifold of categorical distributions whose geometric properties remain unexplored in previous discrete generative models. Utilizing the Fisher information metric, we equip the manifold with a Riemannian structure whose intrinsic geometries are effectively leveraged by following the shortest paths of geodesics. We develop an efficient training and sampling algorithm that overcomes numerical stability issues with a diffeomorphism between manifolds. Our distinctive geometric perspective of statistical manifolds allows us to apply optimal transport during training and interpret SFM as following the steepest direction of the natural gradient. Unlike previous models that rely on variational bounds for likelihood estimation, SFM enjoys the exact likelihood calculation for arbitrary probability measures. We manifest that SFM can learn more complex patterns on the statistical manifold where existing models often fail due to strong prior assumptions. Comprehensive experiments on real-world generative tasks ranging from image, text to biological domains further demonstrate that SFM achieves higher sampling quality and likelihood than other discrete diffusion or flow-based models.", "keywords": "Generative Model;Flow Matching;Statistical Manifold", "primary_area": "generative_models", "supplementary_material": "", "author": "Chaoran Cheng;Jiahan Li;Jian Peng;Ge Liu", "authorids": "~Chaoran_Cheng2;~Jiahan_Li2;~Jian_Peng1;~Ge_Liu2", "gender": "M;;M;F", "homepage": "https://ccr-cheng.github.io/;;http://jianpeng.web.engr.illinois.edu/;http://www.mit.edu/~geliu/", "dblp": ";;29/4181-1;", "google_scholar": "SrGZZ1wAAAAJ;;https://scholar.google.com.tw/citations?user=4wcAVXAAAAAJ;P6EahzcAAAAJ", "orcid": ";;;0000-0001-9383-5186", "linkedin": "chaoran-cheng-a70638214/;;;", "or_profile": "~Chaoran_Cheng2;~Jiahan_Li2;~Jian_Peng1;~Ge_Liu2", "aff": "University of Illinois, Urbana Champaign;;University of Illinois, Urbana Champaign;University of Washington", "aff_domain": "illinois.edu;;illinois.edu;uw.edu", "position": "PhD student;;Assistant Professor;Postdoc", "bibtex": "@inproceedings{\ncheng2024categorical,\ntitle={Categorical Flow Matching on Statistical Manifolds},\nauthor={Chaoran Cheng and Jiahan Li and Jian Peng and Ge Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=5fybcQZ0g4}\n}", "github": "", "reviewers": "WPWv;hmQs;SAto;xYE5", "pdf_size": 3875869, "rating": "6;6;7;7", "confidence": "5;4;4;4", "soundness": "3;3;3;4", "novelty": "3;3;4;3", "presentation": "3;3;4;4", "wc_summary": "84;182;66;134", "wc_strengths": "44;41;50;28", "wc_weaknesses": "86;334;264;39", "wc_questions": "258;30;166;473", "wc_limitations": "3;20;1;4", "wc_review": "475;607;547;678", "wc_reply_reviewers": "918;96;117;0", "wc_reply_authors": "1269;365;375;0", "reply_reviewers": "4;1;1;0", "reply_authors": "4;2;3;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 116.5, 45.2852072977479 ], "wc_strengths_avg": [ 40.75, 8.042853971072706 ], "wc_weaknesses_avg": [ 180.75, 121.94952849437344 ], "wc_questions_avg": [ 231.75, 161.1806052228369 ], "wc_limitations_avg": [ 7.0, 7.582875444051551 ], "wc_review_avg": [ 576.75, 74.84108163301757 ], "wc_reply_reviewers_avg": [ 282.75, 369.40450389782745 ], "wc_reply_authors_avg": [ 502.25, 467.7581506505258 ], "reply_reviewers_avg": [ 1.5, 1.5 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9982017433576159920&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 3, "email": "illinois.edu;;illinois.edu;uw.edu", "author_num": 4, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Illinois Urbana-Champaign;University of Washington", "aff_unique_dep": ";", "aff_unique_url": "https://illinois.edu;https://www.washington.edu", "aff_unique_abbr": "UIUC;UW", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Urbana-Champaign;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Testably Learning Polynomial Threshold Functions", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96576", "id": "5g0Z6PdogJ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=5g0Z6PdogJ", "openreview": "https://openreview.net/forum?id=5g0Z6PdogJ", "poster": "/media/PosterPDFs/NeurIPS%202024/96576.png?t=1733424426.322513", "project": "", "author_site": "Lucas Slot, Stefan Tiegel, Manuel Wiedmer", "tldr": "", "abstract": "Rubinfeld \\& Vasilyan recently introduced the framework of *testable learning* as an extension of the classical agnostic model. It relaxes distributional assumptions which are difficult to verify by conditions that can be checked efficiently by a *tester*. The tester has to accept whenever the data truly satisfies the original assumptions, and the learner has to succeed whenever the tester accepts. We focus on the setting where the tester has to accept standard Gaussian data. There, it is known that basic concept classes such as halfspaces can be learned testably with the same time complexity as in the (distribution-specific) agnostic model. In this work, we ask whether there is a price to pay for testably learning more complex concept classes. In particular, we consider polynomial threshold functions (PTFs), which naturally generalize halfspaces. We show that PTFs of arbitrary constant degree can be testably learned up to excess error $\\varepsilon > 0$ in time $n^{\\mathrm{poly}(1/\\varepsilon)}$. This qualitatively matches the best known guarantees in the agnostic model. Our results build on a connection between testable learning and *fooling*. In particular, we show that distributions that approximately match at least $\\mathrm{poly}(1/\\varepsilon)$ moments of the standard Gaussian fool constant-degree PTFs (up to error $\\varepsilon$). As a secondary result, we prove that a direct approach to show testable learning (without fooling), which was successfully used for halfspaces, cannot work for PTFs.", "keywords": "testable learning;polynomial threshold functions;agnostic learning;fooling using approximate moment matching", "primary_area": "learning_theory", "supplementary_material": "", "author": "Lucas Slot;Stefan Tiegel;Manuel Wiedmer", "authorids": "~Lucas_Slot1;~Stefan_Tiegel1;~Manuel_Wiedmer1", "gender": "M;;", "homepage": "https://www.lucasslot.com;https://stefantiegel.com;", "dblp": "291/6505;218/5553;", "google_scholar": ";https://scholar.google.ch/citations?user=WvpFkwsAAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Lucas_Slot1;~Stefan_Tiegel1;~Manuel_Wiedmer1", "aff": "ETHZ - ETH Zurich;Swiss Federal Institute of Technology;", "aff_domain": "ethz.ch;ethz.ch;", "position": "Postdoc;PhD student;", "bibtex": "@inproceedings{\nslot2024testably,\ntitle={Testably Learning Polynomial Threshold Functions},\nauthor={Lucas Slot and Stefan Tiegel and Manuel Wiedmer},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=5g0Z6PdogJ}\n}", "github": "", "reviewers": "2v3u;2r1G;pSUk;9MWZ;zio4", "pdf_size": 562001, "rating": "5;6;6;7;7", "confidence": "4;2;3;2;4", "soundness": "3;3;3;3;4", "novelty": "2;3;2;2;3", "presentation": "3;3;3;3;4", "wc_summary": "39;119;79;59;387", "wc_strengths": "35;81;29;118;87", "wc_weaknesses": "33;81;28;35;44", "wc_questions": "30;3;126;42;38", "wc_limitations": "1;7;1;1;8", "wc_review": "138;291;263;255;564", "wc_reply_reviewers": "5;0;0;46;5", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;0;0;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 3.0, 0.8944271909999159 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 136.6, 127.96499521353486 ], "wc_strengths_avg": [ 70.0, 33.52610922848042 ], "wc_weaknesses_avg": [ 44.2, 19.114392483152585 ], "wc_questions_avg": [ 47.8, 41.39758447059442 ], "wc_limitations_avg": [ 3.6, 3.2 ], "wc_review_avg": [ 302.2, 140.9899287183308 ], "wc_reply_reviewers_avg": [ 11.2, 17.54308980767071 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.2988071523335984, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10026576344979170686&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "ethz.ch;ethz.ch;", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "ETH Zurich;Swiss Federal Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.ethz.ch;https://www.ethz.ch", "aff_unique_abbr": "ETHZ;ETH Zurich", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Switzerland" }, { "title": "Peri-midFormer: Periodic Pyramid Transformer for Time Series Analysis", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96575", "id": "5iUxMVJVEV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=5iUxMVJVEV", "openreview": "https://openreview.net/forum?id=5iUxMVJVEV", "poster": "/media/PosterPDFs/NeurIPS%202024/96575.png?t=1731813919.389041", "project": "", "author_site": "Qiang Wu, Gechang Yao, Zhixi Feng, Yang Shuyuan", "tldr": "", "abstract": "Time series analysis finds wide applications in fields such as weather forecasting, anomaly detection, and behavior recognition. Previous methods attempted to model temporal variations directly using 1D time series. However, this has been quite challenging due to the discrete nature of data points in time series and the complexity of periodic variation. In terms of periodicity, taking weather and traffic data as an example, there are multi-periodic variations such as yearly, monthly, weekly, and daily, etc. In order to break through the limitations of the previous methods, we decouple the implied complex periodic variations into inclusion and overlap relationships among different level periodic components based on the observation of the multi-periodicity therein and its inclusion relationships. This explicitly represents the naturally occurring pyramid-like properties in time series, where the top level is the original time series and lower levels consist of periodic components with gradually shorter periods, which we call the periodic pyramid. To further extract complex temporal variations, we introduce self-attention mechanism into the periodic pyramid, capturing complex periodic relationships by computing attention between periodic components based on their inclusion, overlap, and adjacency relationships. Our proposed Peri-midFormer demonstrates outstanding performance in five mainstream time series analysis tasks, including short- and long-term forecasting, imputation, classification, and anomaly detection.", "keywords": "general time series analysis;deep learning;transformer;periodicity;pyramid structure", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/829c75a9baf3efd1d85e9ceeedacd9bb01d531b7.zip", "author": "Qiang Wu;Gechang Yao;Zhixi Feng;Shuyuan Yang", "authorids": "~Qiang_Wu8;~Gechang_Yao1;~Zhixi_Feng1;~Shuyuan_Yang1", "gender": "M;M;F;M", "homepage": "https://github.com/WuQiangXDU;https://github.com/weidadepigu;https://web.xidian.edu.cn/syyang/;https://web.xidian.edu.cn/zxfeng/", "dblp": ";;81/2383.html;143/8960.html", "google_scholar": ";;;AHOHtygAAAAJ", "orcid": "0009-0001-3746-6942;;;0000-0002-7372-9180", "linkedin": ";;;", "or_profile": "~Qiang_Wu8;~Gechang_Yao1;~Shuyuan_Yang1;~Feng_Zhixi1", "aff": "Xidian University;Xidian University;Xidian University;Xidian University", "aff_domain": "xidian.edu.cn;stu.xidian.edu.cn;xidian.edu;xidian.edu.cn", "position": "MS student;MS student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nwu2024perimidformer,\ntitle={Peri-midFormer: Periodic Pyramid Transformer for Time Series Analysis},\nauthor={Qiang Wu and Gechang Yao and Zhixi Feng and Shuyuan Yang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=5iUxMVJVEV}\n}", "github": "", "reviewers": "ECSx;jAZw;W65V;GBMh", "pdf_size": 2155584, "rating": "6;6;6;7", "confidence": "3;4;5;4", "soundness": "3;3;3;3", "novelty": "2;3;2;3", "presentation": "2;4;3;4", "wc_summary": "42;68;75;74", "wc_strengths": "21;31;80;41", "wc_weaknesses": "1;188;135;120", "wc_questions": "313;88;2;65", "wc_limitations": "17;5;2;20", "wc_review": "394;380;294;320", "wc_reply_reviewers": "23;29;13;30", "wc_reply_authors": "38;31;417;38", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;3;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 64.75, 13.40475661845451 ], "wc_strengths_avg": [ 43.25, 22.364872009470567 ], "wc_weaknesses_avg": [ 111.0, 68.34837232882727 ], "wc_questions_avg": [ 117.0, 117.45850331074375 ], "wc_limitations_avg": [ 11.0, 7.648529270389178 ], "wc_review_avg": [ 347.0, 41.340053217188775 ], "wc_reply_reviewers_avg": [ 23.75, 6.7592529172978875 ], "wc_reply_authors_avg": [ 131.0, 165.14690430038343 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10308748470737861355&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "xidian.edu.cn;stu.xidian.edu.cn;xidian.edu;xidian.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Xidian University", "aff_unique_dep": "", "aff_unique_url": "http://www.xidian.edu.cn/", "aff_unique_abbr": "Xidian", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Unlocking Tokens as Data Points for Generalization Bounds on Larger Language Models", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96574", "id": "5jRU8ufi8H", "proceeding": "", "pdf": "https://openreview.net/pdf?id=5jRU8ufi8H", "openreview": "https://openreview.net/forum?id=5jRU8ufi8H", "poster": "", "project": "", "author_site": "Sanae Lotfi, Yilun Kuang, Marc Finzi, Brandon Amos, Micah Goldblum, Andrew Wilson", "tldr": "", "abstract": "Large language models (LLMs) with billions of parameters excel at predicting the next token in a sequence. Recent work computes non-vacuous compression-based generalization bounds for LLMs, but these bounds are vacuous for large models at the billion-parameter scale. Moreover, these bounds are obtained through restrictive compression techniques, bounding compressed models that generate low-quality text. Additionally, the tightness of these existing bounds depends on the number of IID documents in a training set rather than the much larger number of non-IID constituent tokens, leaving untapped potential for tighter bounds. In this work, we instead use properties of martingales to derive generalization bounds that benefit from the vast number of tokens in LLM training sets. Since a dataset contains far more tokens than documents, our generalization bounds not only tolerate but actually benefit from far less restrictive compression schemes. With Monarch matrices, Kronecker factorizations, and post-training quantization, we achieve non-vacuous generalization bounds for LLMs as large as LLaMA2-70B. Unlike previous approaches, our work achieves the first non-vacuous bounds for models that are deployed in practice and generate high-quality text.", "keywords": "Large language models;generalization bounds;generalization;compression", "primary_area": "generative_models", "supplementary_material": "/attachment/19c1ab55b7d755b171e74fd11f9d3b787e7b8a09.zip", "author": "Sanae Lotfi;Yilun Kuang;Marc Anton Finzi;Brandon Amos;Micah Goldblum;Andrew Gordon Wilson", "authorids": "~Sanae_Lotfi1;~Yilun_Kuang1;~Marc_Anton_Finzi1;~Brandon_Amos1;~Micah_Goldblum1;~Andrew_Gordon_Wilson1", "gender": "F;M;M;;;Not Specified", "homepage": "https://sanaelotfi.github.io/;https://yilunkuang.github.io/;https://mfinzi.github.io;http://bamos.github.io;;https://cims.nyu.edu/~andrewgw", "dblp": "281/6627;;222/3062;133/4801.html;241/7231;65/10453", "google_scholar": "0GyjMX4AAAAJ;XvIasgEAAAAJ;ysMAhlwAAAAJ;d8gdZR4AAAAJ;pGDKzuUAAAAJ;https://scholar.google.com.tw/citations?user=twWX2LIAAAAJ", "orcid": ";;;;;", "linkedin": "sanae-lotfi-636825127;yilun-mark-kuang/;;bdamos;;", "or_profile": "~Sanae_Lotfi1;~Yilun_Kuang1;~Marc_Anton_Finzi1;~Brandon_Amos1;~Micah_Goldblum1;~Andrew_Gordon_Wilson1", "aff": "Microsoft;New York University;Carnegie Mellon University;Meta;New York University;New York University", "aff_domain": "microsoft.com;nyu.edu;cmu.edu;meta.com;nyu.edu;nyu.edu", "position": "Researcher;PhD student;Postdoc;Research Scientist;Postdoc;Associate Professor", "bibtex": "@inproceedings{\nlotfi2024unlocking,\ntitle={Unlocking Tokens as Data Points for Generalization Bounds on Larger Language Models},\nauthor={Sanae Lotfi and Yilun Kuang and Marc Anton Finzi and Brandon Amos and Micah Goldblum and Andrew Gordon Wilson},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=5jRU8ufi8H}\n}", "github": "", "reviewers": "mAFv;iy2E;4zMP", "pdf_size": 535275, "rating": "6;6;7", "confidence": "4;3;2", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "4;3;3", "wc_summary": "38;85;61", "wc_strengths": "31;126;125", "wc_weaknesses": "123;154;3", "wc_questions": "111;5;16", "wc_limitations": "1;9;24", "wc_review": "304;379;229", "wc_reply_reviewers": "0;14;16", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 61.333333333333336, 19.189117286165672 ], "wc_strengths_avg": [ 94.0, 44.5495978283381 ], "wc_weaknesses_avg": [ 93.33333333333333, 65.11698873736579 ], "wc_questions_avg": [ 44.0, 47.588514020367 ], "wc_limitations_avg": [ 11.333333333333334, 9.533566430716727 ], "wc_review_avg": [ 304.0, 61.237243569579455 ], "wc_reply_reviewers_avg": [ 10.0, 7.118052168020874 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14355831008083665782&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "microsoft.com;nyu.edu;cmu.edu;meta.com;nyu.edu;nyu.edu", "author_num": 6, "aff_unique_index": "0;1;2;3;1;1", "aff_unique_norm": "Microsoft;New York University;Carnegie Mellon University;Meta", "aff_unique_dep": "Microsoft Corporation;;;Meta Platforms, Inc.", "aff_unique_url": "https://www.microsoft.com;https://www.nyu.edu;https://www.cmu.edu;https://meta.com", "aff_unique_abbr": "Microsoft;NYU;CMU;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "On the Adversarial Robustness of Benjamini Hochberg", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96573", "id": "5jYFoldunM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=5jYFoldunM", "openreview": "https://openreview.net/forum?id=5jYFoldunM", "poster": "/media/PosterPDFs/NeurIPS%202024/96573.png?t=1731757571.7184987", "project": "", "author_site": "Louis Chen, Roberto Szechtman, Matan Seri", "tldr": "", "abstract": "The Benjamini-Hochberg (BH) procedure is widely used to control the false detection rate (FDR) in multiple testing. Applications of this control abound in drug discovery, forensics, anomaly detection, and, in particular, machine learning, ranging from nonparametric outlier detection to out-of-distribution detection and one-class classification methods. Considering this control could be relied upon in critical safety/security contexts, we investigate its adversarial robustness. More precisely, we study under what conditions BH does and does not exhibit adversarial robustness, we present a class of simple and easily implementable adversarial test-perturbation algorithms, and we perform computational experiments. With our algorithms, we demonstrate that there are conditions under which BH's control can be significantly broken with relatively few (even just one) test score perturbation(s), and provide non-asymptotic guarantees on the expected adversarial-adjustment to FDR. Our technical analysis involves a combinatorial reframing of the BH procedure as a ``balls into bins'' process, and drawing a connection to generalized ballot problems to facilitate an information-theoretic approach for deriving non-asymptotic lower bounds.", "keywords": "multiple testing;p-values;false discovery rate;adversarial robust", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Louis Chen;Roberto Szechtman;Matan Seri", "authorids": "~Louis_Chen1;~Roberto_Szechtman1;~Matan_Seri1", "gender": "M;M;", "homepage": "https://louislchen.github.io/;https://nps.edu/faculty-profiles/-/cv/rszechtm;", "dblp": ";;", "google_scholar": ";;", "orcid": "0000-0001-5311-7294;;", "linkedin": ";;", "or_profile": "~Louis_Chen1;~Roberto_Szechtman1;~Matan_Seri1", "aff": "Naval Postgraduate School;Naval Postgraduate School;", "aff_domain": "nps.edu;nps.edu;", "position": "Assistant Professor;Full Professor;", "bibtex": "@inproceedings{\nchen2024on,\ntitle={On the Adversarial Robustness of Benjamini Hochberg},\nauthor={Louis Chen and Roberto Szechtman and Matan Seri},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=5jYFoldunM}\n}", "github": "", "reviewers": "3drS;L7r5;niTJ;pEvv", "pdf_size": 613778, "rating": "5;5;6;6", "confidence": "1;4;3;3", "soundness": "3;3;3;3", "novelty": "3;3;2;2", "presentation": "2;4;2;4", "wc_summary": "44;121;69;67", "wc_strengths": "31;99;129;61", "wc_weaknesses": "26;70;314;120", "wc_questions": "5;222;1;172", "wc_limitations": "37;3;27;16", "wc_review": "143;515;540;436", "wc_reply_reviewers": "0;102;0;194", "wc_reply_authors": "0;50;0;315", "reply_reviewers": "0;1;0;1", "reply_authors": "1;2;1;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 2.75, 1.0897247358851685 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 1.0 ], "wc_summary_avg": [ 75.25, 28.181332473820326 ], "wc_strengths_avg": [ 80.0, 37.16180835212409 ], "wc_weaknesses_avg": [ 132.5, 109.93975623040102 ], "wc_questions_avg": [ 100.0, 98.60780902139545 ], "wc_limitations_avg": [ 20.75, 12.65652005884714 ], "wc_review_avg": [ 408.5, 158.0197772432299 ], "wc_reply_reviewers_avg": [ 74.0, 80.83316151184488 ], "wc_reply_authors_avg": [ 91.25, 130.78489018231426 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.22941573387056177, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:tmpCWO2XMA4J:scholar.google.com/&scioq=On+the+Adversarial+Robustness+of+Benjamini+Hochberg&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "nps.edu;nps.edu;", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Naval Postgraduate School", "aff_unique_dep": "", "aff_unique_url": "https://www.nps.edu", "aff_unique_abbr": "NPS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "HYSYNTH: Context-Free LLM Approximation for Guiding Program Synthesis", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96572", "id": "5jt0ZSA6Co", "proceeding": "", "pdf": "https://openreview.net/pdf?id=5jt0ZSA6Co", "openreview": "https://openreview.net/forum?id=5jt0ZSA6Co", "poster": "", "project": "", "author_site": "Shraddha Barke, Emmanuel Anaya Gonzalez, Saketh Ram Kasibatla, Taylor Berg-Kirkpatrick, Nadia Polikarpova", "tldr": "", "abstract": "Many structured prediction and reasoning tasks can be framed as program synthesis problems, where the goal is to generate a program in a \\emph{domain-specific language} (DSL) that transforms input data into the desired output. Unfortunately, purely neural approaches, such as large language models (LLMs), often fail to produce fully correct programs in unfamiliar DSLs, while purely symbolic methods based on combinatorial search scale poorly to complex problems. Motivated by these limitations, we introduce a hybrid approach, where LLM completions for a given task are used to learn a task-specific, context-free surrogate model, which is then used to guide program synthesis. We evaluate this hybrid approach on three domains, and show that it outperforms both unguided search and direct sampling from LLMs, as well as existing program synthesizers.", "keywords": "program synthesis;programming-by-example;domain-specific languages;guided search;large language models", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/304eb870c5fbab88d8b3286e20c81be64e35f7e6.zip", "author": "Shraddha Barke;Emmanuel Anaya Gonzalez;Saketh Ram Kasibatla;Taylor Berg-Kirkpatrick;Nadia Polikarpova", "authorids": "~Shraddha_Barke1;~Emmanuel_Anaya_Gonzalez1;~Saketh_Ram_Kasibatla1;~Taylor_Berg-Kirkpatrick1;~Nadia_Polikarpova1", "gender": "F;;M;M;F", "homepage": "https://shraddhabarke.github.io/;https://eanayag.com;https://saketh.kasibat.la;https://cseweb.ucsd.edu/~tberg/;https://cseweb.ucsd.edu/~npolikarpova/", "dblp": "228/9106;;;22/8160;", "google_scholar": "hcG7ffkAAAAJ;;yJQPDZsAAAAJ;mN6_BKAAAAAJ;https://scholar.google.com.tw/citations?user=CxzUX0EAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Shraddha_Barke1;~Emmanuel_Anaya_Gonzalez1;~Saketh_Ram_Kasibatla1;~Taylor_Berg-Kirkpatrick1;~Nadia_Polikarpova1", "aff": "University of California, San Diego;University of California, San Diego;University of California, San Diego;University of California, San Diego;University of California, San Diego", "aff_domain": "ucsd.edu;ucsd.edu;ucsd.edu;ucsd.edu;ucsd.edu", "position": "PhD student;PhD student;PhD student;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nbarke2024hysynth,\ntitle={{HYSYNTH}: Context-Free {LLM} Approximation for Guiding Program Synthesis},\nauthor={Shraddha Barke and Emmanuel Anaya Gonzalez and Saketh Ram Kasibatla and Taylor Berg-Kirkpatrick and Nadia Polikarpova},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=5jt0ZSA6Co}\n}", "github": "", "reviewers": "XXbQ;MWWa;FZiE", "pdf_size": 976599, "rating": "6;7;7", "confidence": "3;5;4", "soundness": "3;4;3", "novelty": "3;3;3", "presentation": "3;4;3", "wc_summary": "97;128;116", "wc_strengths": "52;105;202", "wc_weaknesses": "32;68;316", "wc_questions": "75;237;214", "wc_limitations": "9;5;38", "wc_review": "265;543;886", "wc_reply_reviewers": "0;34;534", "wc_reply_authors": "0;0;238", "reply_reviewers": "0;1;2", "reply_authors": "1;1;2", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 113.66666666666667, 12.762793146051099 ], "wc_strengths_avg": [ 119.66666666666667, 62.109222789820485 ], "wc_weaknesses_avg": [ 138.66666666666666, 126.2519526801687 ], "wc_questions_avg": [ 175.33333333333334, 71.56504422939705 ], "wc_limitations_avg": [ 17.333333333333332, 14.70449666674185 ], "wc_review_avg": [ 564.6666666666666, 253.9846889523505 ], "wc_reply_reviewers_avg": [ 189.33333333333334, 244.11108582410225 ], "wc_reply_authors_avg": [ 79.33333333333333, 112.19427594826554 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4982381596321387541&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "ucsd.edu;ucsd.edu;ucsd.edu;ucsd.edu;ucsd.edu", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of California, San Diego", "aff_unique_dep": "", "aff_unique_url": "https://www.ucsd.edu", "aff_unique_abbr": "UCSD", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "San Diego", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Text2CAD: Generating Sequential CAD Designs from Beginner-to-Expert Level Text Prompts", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96571", "id": "5k9XeHIK3L", "proceeding": "", "pdf": "https://openreview.net/pdf?id=5k9XeHIK3L", "openreview": "https://openreview.net/forum?id=5k9XeHIK3L", "poster": "/media/PosterPDFs/NeurIPS%202024/96571.png?t=1731322691.2308366", "project": "", "author_site": "Mohammad Sadil Khan, Sankalp Sinha, Talha Uddin, Didier Stricker, Sk Aziz Ali, Muhammad Zeshan Afzal", "tldr": "", "abstract": "Prototyping complex computer-aided design (CAD) models in modern softwares can be very time-consuming. This is due to the lack of intelligent systems that can quickly generate simpler intermediate parts. We propose Text2CAD, the first AI framework for generating text-to-parametric CAD models using designer-friendly instructions for all skill levels. Furthermore, we introduce a data annotation pipeline for generating text prompts based on natural language instructions for the DeepCAD dataset using Mistral and LLaVA-NeXT. The dataset contains $\\sim170$K models and $\\sim660$K text annotations, from abstract CAD descriptions (e.g., _generate two concentric cylinders_) to detailed specifications (e.g., _draw two circles with center_ $(x,y)$ and _radius_ $r_{1}$, $r_{2}$, \\textit{and extrude along the normal by} $d$...). Within the Text2CAD framework, we propose an end-to-end transformer-based auto-regressive network to generate parametric CAD models from input texts. We evaluate the performance of our model through a mixture of metrics, including visual quality, parametric precision, and geometrical accuracy. Our proposed framework shows great potential in AI-aided design applications. Project page is available at https://sadilkhan.github.io/text2cad-project/.", "keywords": "Text-to-CAD;Natural Language Instructions;Large Language Models;Parametric Computer-Aided-Design (CAD);CAD language;Transformer", "primary_area": "machine_vision", "supplementary_material": "", "author": "Mohammad Sadil Khan;Sankalp Sinha;Sheikh Talha Uddin;Didier Stricker;Sk Aziz Ali;Muhammad Zeshan Afzal", "authorids": "~Mohammad_Sadil_Khan1;~Sankalp_Sinha1;~Sheikh_Talha_Uddin1;~Didier_Stricker1;~Sk_Aziz_Ali2;~Muhammad_Zeshan_Afzal1", "gender": "M;M;M;;M;", "homepage": "https://mdsadilkhan.onrender.com/;https://av.dfki.de/members/sinha/;;;https://skazizali.com;", "dblp": "355/5502;377/2585;;;;", "google_scholar": "XIDQo_IAAAAJ;QYcfOjEAAAAJ;;;zywjMeMAAAAJ;", "orcid": ";0009-0009-6820-3633;0009-0004-9156-5679;;0000-0003-3701-2008;", "linkedin": "mohammad-sadil-khan-a96568170;sankalp-sinha-main;sheikh-talha-uddin/;;sk-ali-208070118/;", "or_profile": "~Mohammad_Sadil_Khan1;~Sankalp_Sinha1;~Sheikh_Talha_Uddin1;~Didier_Stricker1;~Sk_Aziz_Ali2;~Muhammad_Zeshan_Afzal1", "aff": "Rheinland-Pf\u00e4lzische Technische Universit\u00e4t Kaiserslautern-Landau;RPTU;German Research Center for AI;;German Research Center for AI;", "aff_domain": "rptu.de;rptu.de;dfki.de;;dfki.de;", "position": "PhD student;PhD student;PhD student;;Postdoc;", "bibtex": "@inproceedings{\nkhan2024textcad,\ntitle={Text2{CAD}: Generating Sequential {CAD} Designs from Beginner-to-Expert Level Text Prompts},\nauthor={Mohammad Sadil Khan and Sankalp Sinha and Sheikh Talha Uddin and Didier Stricker and Sk Aziz Ali and Muhammad Zeshan Afzal},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=5k9XeHIK3L}\n}", "github": "", "reviewers": "mcF8;HTpR;hkcu;ovUL", "pdf_size": 16125674, "rating": "6;6;7;7", "confidence": "4;5;3;5", "soundness": "2;3;3;3", "novelty": "3;2;3;3", "presentation": "3;3;3;3", "wc_summary": "109;92;92;48", "wc_strengths": "34;39;124;110", "wc_weaknesses": "59;211;44;87", "wc_questions": "158;8;45;46", "wc_limitations": "69;6;24;7", "wc_review": "429;356;329;298", "wc_reply_reviewers": "34;56;51;0", "wc_reply_authors": "23;125;37;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;3;2;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 85.25, 22.598395960775623 ], "wc_strengths_avg": [ 76.75, 40.59171713539598 ], "wc_weaknesses_avg": [ 100.25, 65.7775607635309 ], "wc_questions_avg": [ 64.25, 56.25111110013739 ], "wc_limitations_avg": [ 26.5, 25.558755838264116 ], "wc_review_avg": [ 353.0, 48.44068537913146 ], "wc_reply_reviewers_avg": [ 35.25, 21.924586655168667 ], "wc_reply_authors_avg": [ 46.25, 47.34646238104807 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3600679243234025282&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 2, "email": "rptu.de;rptu.de;dfki.de;;dfki.de;", "author_num": 6, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "Rheinland-Pf\u00e4lzische Technische Universit\u00e4t;Rheinisch-Westf\u00e4lische Technische Hochschule Aachen;German Research Center for Artificial Intelligence", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uni-kl.de;https://www.rwth-aachen.de;https://www.dfki.de/", "aff_unique_abbr": "TU Kaiserslautern;RWTH Aachen;DFKI", "aff_campus_unique_index": "0", "aff_campus_unique": "Kaiserslautern-Landau;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "title": "On the Efficiency of ERM in Feature Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96570", "id": "5kthqxbK7r", "proceeding": "", "pdf": "https://openreview.net/pdf?id=5kthqxbK7r", "openreview": "https://openreview.net/forum?id=5kthqxbK7r", "poster": "/media/PosterPDFs/NeurIPS%202024/96570.png?t=1733899682.3536725", "project": "", "author_site": "Ayoub El Hanchi, Chris Maddison, Murat Erdogdu", "tldr": "", "abstract": "Given a collection of feature maps indexed by a set $\\mathcal{T}$, we study the performance of empirical risk minimization (ERM) on regression problems with square loss over the union of the linear classes induced by these feature maps. This setup aims at capturing the simplest instance of feature learning, where the model is expected to jointly learn from the data an appropriate feature map and a linear predictor. We start by studying the asymptotic quantiles of the excess risk of sequences of empirical risk minimizers. Remarkably, we show that when the set $\\mathcal{T}$ is not too large and when there is a unique optimal feature map, these quantiles coincide, up to a factor of two, with those of the excess risk of the oracle procedure, which knows a priori this optimal feature map and deterministically outputs an empirical risk minimizer from the associated optimal linear class. We complement this asymptotic result with a non-asymptotic analysis that quantifies the decaying effect of the global complexity of the set $\\mathcal{T}$ on the excess risk of ERM, and relates it to the size of the sublevel sets of the suboptimality of the feature maps. As an application of our results, we characterize the performance of the best subset selection procedure in sparse linear regression under general assumptions.", "keywords": "Empirical risk minimization;Feature learning;Learning theory;Sparse linear regression", "primary_area": "learning_theory", "supplementary_material": "", "author": "Ayoub El Hanchi;Chris J. Maddison;Murat A Erdogdu", "authorids": "~Ayoub_El_Hanchi1;~Chris_J._Maddison1;~Murat_A_Erdogdu1", "gender": "M;M;M", "homepage": "https://www.cs.toronto.edu/~aelhan/;http://www.cs.toronto.edu/~erdogdu/;http://www.cs.toronto.edu/~cmaddis/", "dblp": ";139/1292;139/1388", "google_scholar": "5ZzcGmgAAAAJ;Lqc4cdAAAAAJ;https://scholar.google.ca/citations?user=WjCG3owAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Ayoub_El_Hanchi1;~Murat_A_Erdogdu1;~Chris_J_Maddison1", "aff": "University of Toronto;Vector Institute;Google", "aff_domain": "toronto.edu;vectorinstitute.ai;google.com", "position": "PhD student;Faculty;Researcher", "bibtex": "@inproceedings{\nhanchi2024on,\ntitle={On the Efficiency of {ERM} in Feature Learning},\nauthor={Ayoub El Hanchi and Chris J. Maddison and Murat A Erdogdu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=5kthqxbK7r}\n}", "github": "", "reviewers": "rL5m;bjqG;pWnU;9KPn;qDCv", "pdf_size": 435686, "rating": "4;6;7;7;7", "confidence": "3;4;3;4;4", "soundness": "3;4;3;4;3", "novelty": "1;2;3;3;3", "presentation": "3;3;3;4;4", "wc_summary": "62;137;100;69;85", "wc_strengths": "69;112;91;96;109", "wc_weaknesses": "529;326;10;196;466", "wc_questions": "8;15;24;2;2", "wc_limitations": "1;21;4;6;2", "wc_review": "669;611;229;369;664", "wc_reply_reviewers": "286;24;4;89;142", "wc_reply_authors": "593;261;0;0;35", "reply_reviewers": "2;1;1;1;1", "reply_authors": "2;2;1;1;2", "rating_avg": [ 6.2, 1.16619037896906 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.8 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 90.6, 26.67283262047734 ], "wc_strengths_avg": [ 95.4, 15.344054223053305 ], "wc_weaknesses_avg": [ 305.4, 187.31961990138672 ], "wc_questions_avg": [ 10.2, 8.4 ], "wc_limitations_avg": [ 6.8, 7.30479294709987 ], "wc_review_avg": [ 508.4, 177.77918888328858 ], "wc_reply_reviewers_avg": [ 109.0, 101.06235698814866 ], "wc_reply_authors_avg": [ 177.8, 229.31672420475573 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.4900980294098034, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:fdgyTWrw8v8J:scholar.google.com/&scioq=On+the+Efficiency+of+ERM+in+Feature+Learning&hl=en&as_sdt=0,5", "gs_version_total": 6, "email": "toronto.edu;vectorinstitute.ai;google.com", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Toronto;Vector Institute;Google", "aff_unique_dep": ";;Google", "aff_unique_url": "https://www.utoronto.ca;https://vectorinstitute.ai/;https://www.google.com", "aff_unique_abbr": "U of T;Vector Institute;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Canada;United States" }, { "title": "Reinforcement Learning Gradients as Vitamin for Online Finetuning Decision Transformers", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96569", "id": "5l5bhYexYO", "proceeding": "", "pdf": "https://openreview.net/pdf?id=5l5bhYexYO", "openreview": "https://openreview.net/forum?id=5l5bhYexYO", "poster": "/media/PosterPDFs/NeurIPS%202024/96569.png?t=1731312260.0294669", "project": "", "author_site": "Kai Yan, Alex Schwing, Yu-Xiong Wang", "tldr": "", "abstract": "Decision Transformers have recently emerged as a new and compelling paradigm for offline Reinforcement Learning (RL), completing a trajectory in an autoregressive way. While improvements have been made to overcome initial shortcomings, online finetuning of decision transformers has been surprisingly under-explored. The widely adopted state-of-the-art Online Decision Transformer (ODT) still struggles when pretrained with low-reward offline data. In this paper, we theoretically analyze the online-finetuning of the decision transformer, showing that the commonly used Return-To-Go (RTG) that's far from the expected return hampers the online fine-tuning process. This problem, however, is well-addressed by the value function and advantage of standard RL algorithms. As suggested by our analysis, in our experiments, we hence find that simply adding TD3 gradients to the finetuning process of ODT effectively improves the online finetuning performance of ODT, especially if ODT is pretrained with low-reward offline data. These findings provide new directions to further improve decision transformers.", "keywords": "Decision transformer;reinforcement learning", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/8af83ec00a0101f3ccc702e6bc529d06e88d2d11.zip", "author": "Kai Yan;Alex Schwing;Yu-Xiong Wang", "authorids": "~Kai_Yan1;~Alex_Schwing1;~Yu-Xiong_Wang1", "gender": "M;Unspecified;", "homepage": "https://kaiyan289.github.io/;https://ece.illinois.edu/directory/profile/aschwing;https://yxw.cs.illinois.edu/", "dblp": ";79/9775;35/10700", "google_scholar": "KElKfgQAAAAJ;3B2c31wAAAAJ;T_Q-xDkAAAAJ", "orcid": ";;", "linkedin": "%E5%BC%80-%E9%A2%9C-18b7931b1/;;", "or_profile": "~Kai_Yan1;~Alex_Schwing1;~Yu-Xiong_Wang1", "aff": "University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;Department of Computer Science, University of Illinois Urbana-Champaign", "aff_domain": "cs.illinois.edu;illinois.edu;cs.illinois.edu", "position": "PhD student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nyan2024reinforcement,\ntitle={Reinforcement Learning Gradients as Vitamin for Online Finetuning Decision Transformers},\nauthor={Kai Yan and Alex Schwing and Yu-Xiong Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=5l5bhYexYO}\n}", "github": "", "reviewers": "NWdD;gVzs;1gFB;AdzZ", "pdf_size": 11709860, "rating": "6;6;7;7", "confidence": "4;3;3;4", "soundness": "2;2;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;4", "wc_summary": "106;89;48;68", "wc_strengths": "108;27;34;92", "wc_weaknesses": "309;52;88;110", "wc_questions": "170;21;48;42", "wc_limitations": "56;29;7;7", "wc_review": "749;218;225;319", "wc_reply_reviewers": "210;22;52;0", "wc_reply_authors": "584;23;25;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 77.75, 21.821720830401986 ], "wc_strengths_avg": [ 65.25, 35.29429840639986 ], "wc_weaknesses_avg": [ 139.75, 99.8858723744254 ], "wc_questions_avg": [ 70.25, 58.45671475544961 ], "wc_limitations_avg": [ 24.75, 20.154093876927337 ], "wc_review_avg": [ 377.75, 218.01992454819353 ], "wc_reply_reviewers_avg": [ 71.0, 82.34682750416071 ], "wc_reply_authors_avg": [ 158.0, 246.1473136152414 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:dVqbWgkaGTcJ:scholar.google.com/&scioq=Reinforcement+Learning+Gradients+as+Vitamin+for+Online+Finetuning+Decision+Transformers&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "cs.illinois.edu;illinois.edu;cs.illinois.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Illinois Urbana-Champaign", "aff_unique_dep": "", "aff_unique_url": "https://illinois.edu", "aff_unique_abbr": "UIUC", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Urbana-Champaign", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Unconditional stability of a recurrent neural circuit implementing divisive normalization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96568", "id": "5lLb7aXRN9", "proceeding": "", "pdf": "https://openreview.net/pdf?id=5lLb7aXRN9", "openreview": "https://openreview.net/forum?id=5lLb7aXRN9", "poster": "/media/PosterPDFs/NeurIPS%202024/96568.png?t=1733716651.9991546", "project": "", "author_site": "Shivang Rawat, David Heeger, Stefano Martiniani", "tldr": "", "abstract": "Stability in recurrent neural models poses a significant challenge, particularly in developing biologically plausible neurodynamical models that can be seamlessly trained. Traditional cortical circuit models are notoriously difficult to train due to expansive nonlinearities in the dynamical system, leading to an optimization problem with nonlinear stability constraints that are difficult to impose. Conversely, recurrent neural networks (RNNs) excel in tasks involving sequential data but lack biological plausibility and interpretability. In this work, we address these challenges by linking dynamic divisive normalization (DN) to the stability of \"oscillatory recurrent gated neural integrator circuits'' (ORGaNICs), a biologically plausible recurrent cortical circuit model that dynamically achieves DN and that has been shown to simulate a wide range of neurophysiological phenomena. By using the indirect method of Lyapunov, we prove the remarkable property of unconditional local stability for an arbitrary-dimensional ORGaNICs circuit when the recurrent weight matrix is the identity. We thus connect ORGaNICs to a system of coupled damped harmonic oscillators, which enables us to derive the circuit's energy function, providing a normative principle of what the circuit, and individual neurons, aim to accomplish. Further, for a generic recurrent weight matrix, we prove the stability of the 2D model and demonstrate empirically that stability holds in higher dimensions. Finally, we show that ORGaNICs can be trained by backpropagation through time without gradient clipping/scaling, thanks to its intrinsic stability property and adaptive time constants, which address the problems of exploding, vanishing, and oscillating gradients. By evaluating the model's performance on RNN benchmarks, we find that ORGaNICs outperform alternative neurodynamical models on static image classification tasks and perform comparably to LSTMs on sequential tasks.", "keywords": "Recurrent Networks;Theoretical Neuroscience;Dynamical Systems;Normalization", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "", "author": "Shivang Rawat;David Heeger;Stefano Martiniani", "authorids": "~Shivang_Rawat1;~David_Heeger1;~Stefano_Martiniani1", "gender": "M;;M", "homepage": ";https://www.cns.nyu.edu/~david/;https://martinianilab.org/", "dblp": ";67/5402;", "google_scholar": "f2CTxrkAAAAJ;6ggnUzYAAAAJ;pxSj9JkAAAAJ", "orcid": ";0000-0002-3282-9898;0000-0003-2028-2175", "linkedin": "shivang-rawat-b54b6a151/;david-heeger/;smartiniani/", "or_profile": "~Shivang_Rawat1;~David_Heeger1;~Stefano_Martiniani1", "aff": "New York University;New York University;New York University", "aff_domain": "nyu.edu;nyu.edu;nyu.edu", "position": "PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nrawat2024unconditional,\ntitle={Unconditional stability of a recurrent neural circuit implementing divisive normalization},\nauthor={Shivang Rawat and David Heeger and Stefano Martiniani},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=5lLb7aXRN9}\n}", "github": "", "reviewers": "8Tz2;FwjG;2CUH;NGE1;ma5B", "pdf_size": 6718029, "rating": "5;5;6;6;7", "confidence": "4;2;4;2;3", "soundness": "3;3;3;3;4", "novelty": "1;2;3;3;3", "presentation": "4;2;3;3;3", "wc_summary": "72;141;83;250;62", "wc_strengths": "34;131;53;70;73", "wc_weaknesses": "323;359;61;466;467", "wc_questions": "81;95;87;94;49", "wc_limitations": "198;21;52;52;4", "wc_review": "708;747;336;932;655", "wc_reply_reviewers": "682;252;0;38;11", "wc_reply_authors": "1166;792;0;12;0", "reply_reviewers": "4;1;0;1;1", "reply_authors": "3;2;1;2;1", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 3.0, 0.8944271909999159 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.8 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 121.6, 69.80716295624683 ], "wc_strengths_avg": [ 72.2, 32.529371343448986 ], "wc_weaknesses_avg": [ 335.2, 148.58048324056563 ], "wc_questions_avg": [ 81.2, 16.880758276807352 ], "wc_limitations_avg": [ 65.4, 68.82325188480998 ], "wc_review_avg": [ 675.6, 193.7220689544689 ], "wc_reply_reviewers_avg": [ 196.6, 259.59013848757814 ], "wc_reply_authors_avg": [ 394.0, 492.0942999060241 ], "reply_reviewers_avg": [ 1.4, 1.3564659966250538 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:RgBCOPD-dZUJ:scholar.google.com/&scioq=Unconditional+stability+of+a+recurrent+neural+circuit+implementing+divisive+normalization&hl=en&as_sdt=0,33", "gs_version_total": 6, "email": "nyu.edu;nyu.edu;nyu.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "New York University", "aff_unique_dep": "", "aff_unique_url": "https://www.nyu.edu", "aff_unique_abbr": "NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "5p57uCUH8k", "title": "CLODE: Continuous Exposure Learning for Low-Light Image Enhancement using Neural ODEs", "track": "main", "status": "Reject", "tldr": "", "abstract": "Low-light image enhancement poses a significant challenge due to the limited information captured by image sensors in low-light environments. \nDespite recent improvements in deep learning models, the lack of paired training datasets remains a significant obstacle. \nTherefore, unsupervised methods have emerged as a promising solution. \nIn this work, we focus on the strength of curve-adjustment-based approaches to tackle unsupervised methods. \nThe majority of existing unsupervised curve-adjustment approaches iteratively estimate higher order curve parameters to enhance the exposure of images while efficiently preserving the details of the images. \nHowever, the convergence of the enhancement procedure cannot be guaranteed, leading to sensitivity to the number of iterations and limited performance.\nTo address this problem, we consider the iterative curve-adjustment update process as a dynamic system and formulate it as a Neural Ordinary Differential Equations (NODE) for the first time, and this allows us to learn a continuous dynamics of the latent image. \nThe strategy of utilizing NODE to leverage continuous dynamics in iterative methods enhances unsupervised learning and aids in achieving better convergence compared to discrete-space approaches. Consequently, we achieve state-of-the-art performance in unsupervised low-light image enhancement across various benchmark datasets.", "keywords": "Low-light Image enhancement;Image enhancement;Neural ODE;Unsupervised", "primary_area": "machine_vision", "supplementary_material": "", "author": "Donggoo Jung;Daehyun Kim;Tae Hyun Kim", "authorids": "~Donggoo_Jung1;~Daehyun_Kim6;~Tae_Hyun_Kim2", "gender": "M;M;M", "homepage": "https://donggoo-jung.github.io;https://sites.google.com/site/lliger9/home?authuser=0;https://sites.google.com/view/lliger9/", "dblp": ";;43/11343-6", "google_scholar": "https://scholar.google.co.kr/citations?user=yXJ05SwAAAAJ;https://scholar.google.com/citations?hl=ko;https://scholar.google.co.kr/citations?user=8soccsoAAAAJ", "orcid": ";;0000-0002-7995-3984", "linkedin": ";;", "or_profile": "~Donggoo_Jung1;~Daehyun_Kim6;~Tae_Hyun_Kim2", "aff": "Hanyang University;Hanyang University;Hanyang University", "aff_domain": "hanyang.ac.kr;hanyang.ac.kr;hanyang.ac.kr", "position": "PhD student;PhD student;Associate Professor", "bibtex": "@misc{\nanonymous2024clode,\ntitle={{CLODE}: Continuous Exposure Learning for Low-Light Image Enhancement using Neural {ODE}s},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=5p57uCUH8k}\n}", "github": "", "project": "", "reviewers": "Z6DS;9uXV;9pkv;UK8A;Qjwa", "site": "https://openreview.net/forum?id=5p57uCUH8k", "pdf_size": 8986352, "rating": "4;4;5;6;7", "confidence": "5;5;5;4;3", "soundness": "3;3;3;3;3", "novelty": "2;2;3;3;3", "presentation": "2;2;3;3;3", "wc_summary": "48;44;94;105;21", "wc_strengths": "29;23;47;87;7", "wc_weaknesses": "360;88;71;34;5", "wc_questions": "47;4;146;35;39", "wc_limitations": "1;9;7;25;3", "wc_review": "485;168;365;286;75", "wc_reply_reviewers": "0;0;19;14;9", "wc_reply_authors": "0;0;29;39;32", "reply_reviewers": "0;0;1;1;1", "reply_authors": "1;1;2;2;2", "rating_avg": [ 5.2, 1.16619037896906 ], "confidence_avg": [ 4.4, 0.8 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 62.4, 31.85341425969907 ], "wc_strengths_avg": [ 38.6, 27.375901811629877 ], "wc_weaknesses_avg": [ 111.6, 127.509372204556 ], "wc_questions_avg": [ 54.2, 48.1638868863384 ], "wc_limitations_avg": [ 9.0, 8.48528137423857 ], "wc_review_avg": [ 275.8, 144.06026516704736 ], "wc_reply_reviewers_avg": [ 8.4, 7.552483035399683 ], "wc_reply_authors_avg": [ 20.0, 16.649324310613927 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.9432422182837987, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=358576794172396718&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Hanyang University", "aff_unique_dep": "", "aff_unique_url": "https://www.hanyang.ac.kr", "aff_unique_abbr": "HYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "Verifiably Robust Conformal Prediction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96567", "id": "5pJfDlaSxV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=5pJfDlaSxV", "openreview": "https://openreview.net/forum?id=5pJfDlaSxV", "poster": "", "project": "", "author_site": "Linus Jeary, Tom Kuipers, Mehran Hosseini, Nicola Paoletti", "tldr": "", "abstract": "Conformal Prediction (CP) is a popular uncertainty quantification method that provides distribution-free, statistically valid prediction sets, assuming that training and test data are exchangeable. In such a case, CP's prediction sets are guaranteed to cover the (unknown) true test output with a user-specified probability. Nevertheless, this guarantee is violated when the data is subjected to adversarial attacks, which often result in a significant loss of coverage. Recently, several approaches have been put forward to recover CP guarantees in this setting. These approaches leverage variations of randomised smoothing to produce conservative sets which account for the effect of the adversarial perturbations. They are, however, limited in that they only support $\\ell_2$-bounded perturbations and classification tasks. This paper introduces VRCP (Verifiably Robust Conformal Prediction), a new framework that leverages recent neural network verification methods to recover coverage guarantees under adversarial attacks. Our VRCP method is the first to support perturbations bounded by arbitrary norms including $\\ell_1$, $\\ell_2$, and $\\ell_\\infty$, as well as regression tasks. We evaluate and compare our approach on image classification tasks (CIFAR10, CIFAR100, and TinyImageNet) and regression tasks for deep reinforcement learning environments. In every case, VRCP achieves above nominal coverage and yields significantly more efficient and informative prediction regions than the SotA.", "keywords": "Conformal Prediction;Adversarial Attacks;Distribution Shift;Formal Verification", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/a287cfa6a4a99d7fa90d7677b44451b4dedcb26a.zip", "author": "Linus Jeary;Tom Kuipers;Mehran Hosseini;Nicola Paoletti", "authorids": "~Linus_Jeary1;~Tom_Kuipers1;~Mehran_Hosseini1;~Nicola_Paoletti1", "gender": "M;M;;M", "homepage": ";;https://mehranhosseini.com;https://nicolapaoletti.com", "dblp": "353/0322;;178/4712;15/10263", "google_scholar": "MBDXoi0AAAAJ;;;https://scholar.google.co.uk/citations?user=v4R_cM4AAAAJ", "orcid": "0000-0003-2920-8182;0009-0009-0976-0018;0000-0002-9227-3078;0000-0002-4723-5363", "linkedin": ";;mehran-hosseini;", "or_profile": "~Linus_Jeary1;~Tom_Kuipers1;~Mehran_Hosseini1;~Nicola_Paoletti1", "aff": "King's College London, University of London;King's College London, University of London;King's College London, University of London;King's College London, University of London", "aff_domain": "kcl.ac.uk;kcl.ac.uk;kcl.ac.uk;kcl.ac.uk", "position": "MS student;PhD student;Postdoc;Associate Professor", "bibtex": "@inproceedings{\njeary2024verifiably,\ntitle={Verifiably Robust Conformal Prediction},\nauthor={Linus Jeary and Tom Kuipers and Mehran Hosseini and Nicola Paoletti},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=5pJfDlaSxV}\n}", "github": "", "reviewers": "gWvg;iDUp;cbGx", "pdf_size": 452877, "rating": "5;5;6", "confidence": "3;4;4", "soundness": "3;3;3", "novelty": "2;2;3", "presentation": "3;3;4", "wc_summary": "53;42;69", "wc_strengths": "43;45;69", "wc_weaknesses": "168;134;153", "wc_questions": "2;66;2", "wc_limitations": "3;4;13", "wc_review": "269;291;306", "wc_reply_reviewers": "38;76;59", "wc_reply_authors": "0;0;113", "reply_reviewers": "1;1;1", "reply_authors": "1;1;2", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 54.666666666666664, 11.08552609887726 ], "wc_strengths_avg": [ 52.333333333333336, 11.8133634311129 ], "wc_weaknesses_avg": [ 151.66666666666666, 13.912424503139471 ], "wc_questions_avg": [ 23.333333333333332, 30.169889330626027 ], "wc_limitations_avg": [ 6.666666666666667, 4.496912521077347 ], "wc_review_avg": [ 288.6666666666667, 15.195028426721974 ], "wc_reply_reviewers_avg": [ 57.666666666666664, 15.542057635833023 ], "wc_reply_authors_avg": [ 37.666666666666664, 53.268710849386586 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9900498330758197094&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 4, "email": "kcl.ac.uk;kcl.ac.uk;kcl.ac.uk;kcl.ac.uk", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "King's College London", "aff_unique_dep": "", "aff_unique_url": "https://www.kcl.ac.uk", "aff_unique_abbr": "KCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Scalable and Effective Arithmetic Tree Generation for Adder and Multiplier Designs", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96566", "id": "5pnhGedG98", "proceeding": "", "pdf": "https://openreview.net/pdf?id=5pnhGedG98", "openreview": "https://openreview.net/forum?id=5pnhGedG98", "poster": "/media/PosterPDFs/NeurIPS%202024/96566.png?t=1732370592.9173248", "project": "", "author_site": "Yao Lai, Jinxin Liu, David Z. Pan, Ping Luo", "tldr": "", "abstract": "Across a wide range of hardware scenarios, the computational efficiency and physical size of the arithmetic units significantly influence the speed and footprint of the overall hardware system. Nevertheless, the effectiveness of prior arithmetic design techniques proves inadequate, as they do not sufficiently optimize speed and area, resulting in increased latency and larger module size. To boost computing performance, this work focuses on the two most common and fundamental arithmetic modules, adders and multipliers. We cast the design tasks as single-player tree generation games, leveraging reinforcement learning techniques to optimize their arithmetic tree structures. This tree generation formulation allows us to efficiently navigate the vast search space and discover superior arithmetic designs that improve computational efficiency and hardware size within just a few hours. Our proposed method, **ArithTreeRL**, achieves significant improvements for both adders and multipliers. For adders, our approach discovers designs of 128-bit adders that achieve Pareto optimality in theoretical metrics. Compared with PrefixRL, it reduces delay and size by up to 26% and 30%, respectively. For multipliers, compared to RL-MUL, our method enhances speed and reduces size by as much as 49% and 45%. Additionally, ArithTreeRL's flexibility and scalability enable seamless integration into 7nm technology. We believe our work will offer valuable insights into hardware design, further accelerating speed and reducing size through the refined search space and our tree generation methodologies.", "keywords": "Reinforcement Learning;Computer Arithmetic;Electronic Design Automation", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Yao Lai;Jinxin Liu;David Z. Pan;Ping Luo", "authorids": "~Yao_Lai2;~Jinxin_Liu1;~David_Z._Pan1;~Ping_Luo2", "gender": ";;M;", "homepage": ";;http://users.ece.utexas.edu/~dpan/;", "dblp": ";;p/DavidZhigangPan.html;", "google_scholar": ";;3aLlroEAAAAJ;", "orcid": ";;0000-0002-5705-2501;", "linkedin": ";;davidzpan/;", "or_profile": "~Yao_Lai2;~Jinxin_Liu1;~David_Z._Pan1;~Ping_Luo2", "aff": ";;University of Texas, Austin;", "aff_domain": ";;utexas.edu;", "position": ";;Professor;", "bibtex": "@inproceedings{\nlai2024scalable,\ntitle={Scalable and Effective Arithmetic Tree Generation for Adder and Multiplier Designs},\nauthor={Yao Lai and Jinxin Liu and David Z. Pan and Ping Luo},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=5pnhGedG98}\n}", "github": "", "reviewers": "2F2a;RWUd;9Uwn;LxHh;9vo2;xS4r", "pdf_size": 6612851, "rating": "5;5;6;7;7;8", "confidence": "2;5;4;3;4;4", "soundness": "2;3;3;3;3;3", "novelty": "2;2;3;3;3;3", "presentation": "3;3;3;3;3;4", "wc_summary": "71;48;71;75;71;78", "wc_strengths": "78;10;56;42;118;193", "wc_weaknesses": "113;46;63;49;74;92", "wc_questions": "5;83;63;91;53;39", "wc_limitations": "6;1;9;16;1;1", "wc_review": "273;188;262;273;317;403", "wc_reply_reviewers": "0;60;0;8;11;14", "wc_reply_authors": "0;65;0;19;13;25", "reply_reviewers": "0;1;0;1;1;1", "reply_authors": "1;2;1;2;2;2", "rating_avg": [ 6.333333333333333, 1.1055415967851332 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820635 ], "soundness_avg": [ 2.8333333333333335, 0.3726779962499649 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.1666666666666665, 0.3726779962499649 ], "wc_summary_avg": [ 69.0, 9.746794344808963 ], "wc_strengths_avg": [ 82.83333333333333, 59.285795563149485 ], "wc_weaknesses_avg": [ 72.83333333333333, 23.688370329950708 ], "wc_questions_avg": [ 55.666666666666664, 28.58126814696802 ], "wc_limitations_avg": [ 5.666666666666667, 5.527707983925667 ], "wc_review_avg": [ 286.0, 64.76624635307088 ], "wc_reply_reviewers_avg": [ 15.5, 20.573040611440984 ], "wc_reply_authors_avg": [ 20.333333333333332, 21.9823161250634 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.10660035817780518, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10387902605278000320&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 3, "email": ";;utexas.edu;", "author_num": 4, "aff_unique_index": "0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Algorithmic progress in language models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96565", "id": "5qPmQtfvhy", "proceeding": "", "pdf": "https://openreview.net/pdf?id=5qPmQtfvhy", "openreview": "https://openreview.net/forum?id=5qPmQtfvhy", "poster": "/media/PosterPDFs/NeurIPS%202024/96565.png?t=1731561511.9054525", "project": "", "author_site": "Wing Hin Anson Ho, Tamay Besiroglu, Ege Erdil, Zifan Guo, David Owen, Robi Rahman, David Atkinson, Neil Thompson, Jaime Sevilla", "tldr": "", "abstract": "We investigate the rate at which algorithms for pre-training language models have improved since the advent of deep learning. Using a dataset of over 200 language model evaluations on Wikitext and Penn Treebank spanning 2012-2023, we find that the compute required to reach a set performance threshold has halved approximately every 8 months, with a 90\\% confidence interval of around 2 to 22 months, substantially faster than hardware gains per Moore's Law. We estimate augmented scaling laws, which enable us to quantify algorithmic progress and determine the relative contributions of scaling models versus innovations in training algorithms. Despite the rapid pace of algorithmic progress and the development of new architectures such as the transformer, our analysis reveals that the increase in compute made an even larger contribution to overall performance improvements over this time period. Though limited by noisy benchmark data, our analysis quantifies the rapid progress in language modeling, shedding light on the relative contributions from compute and algorithms.", "keywords": "Natural Language Processing", "primary_area": "evaluation", "supplementary_material": "/attachment/29c9e6690d39bbf99975a6a2466dafc91e993d98.zip", "author": "Anson Ho;Tamay Besiroglu;Ege Erdil;Zifan Carl Guo;David Owen;Robi Rahman;David Atkinson;Neil Thompson;Jaime Sevilla", "authorids": "~Anson_Ho1;~Tamay_Besiroglu1;~Ege_Erdil2;~Zifan_Carl_Guo1;~David_Owen1;~Robi_Rahman1;~David_Atkinson2;~Neil_Thompson1;~Jaime_Sevilla1", "gender": "M;;M;M;;M;M;;", "homepage": "https://ansonwhho.github.io/;https://www.tamaybesiroglu.com;;https://www.carlguo.com/;;;https://diatkinson.github.io;http://futuretech.mit.edu;https://jaimesevilla.me/", "dblp": ";;;332/9532;;;;;", "google_scholar": ";;https://scholar.google.com/citations?hl=en;;;;7oZl8BcAAAAJ;yjttFw4AAAAJ;COAtad0AAAAJ", "orcid": ";;;;;;;;", "linkedin": ";;;;;robirahman/;;;", "or_profile": "~Anson_Ho1;~Tamay_Besiroglu1;~Ege_Erdil2;~Zifan_Carl_Guo1;~David_Owen1;~Robi_Rahman1;~David_Atkinson2;~Neil_Thompson1;~Jaime_Sevilla1", "aff": "Epoch;Epoch;Epoch AI;Massachusetts Institute of Technology;;Stanford University;Northeastern University;Massachusetts Institute of Technology;Epoch AI", "aff_domain": "epochai.org;epochai.org;epochai.org;mit.edu;;stanford.edu;northeastern.edu;mit.edu;epochai.org", "position": "Researcher;Researcher;Researcher;Undergrad student;;Researcher;PhD student;Principal Researcher;Principal Researcher", "bibtex": "@inproceedings{\nho2024algorithmic,\ntitle={Algorithmic progress in language models},\nauthor={Anson Ho and Tamay Besiroglu and Ege Erdil and Zifan Carl Guo and David Owen and Robi Rahman and David Atkinson and Neil Thompson and Jaime Sevilla},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=5qPmQtfvhy}\n}", "github": "", "reviewers": "osWG;CCHC;gaW3;Va8z", "pdf_size": 1082495, "rating": "4;6;6;7", "confidence": "4;3;3;4", "soundness": "2;3;4;4", "novelty": "2;2;3;3", "presentation": "3;3;2;4", "wc_summary": "96;31;90;114", "wc_strengths": "51;164;52;57", "wc_weaknesses": "54;225;77;37", "wc_questions": "22;21;1;1", "wc_limitations": "5;23;12;15", "wc_review": "228;464;232;224", "wc_reply_reviewers": "0;56;28;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;0", "reply_authors": "1;1;0;0", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 82.75, 31.155858197135252 ], "wc_strengths_avg": [ 81.0, 47.97395126524393 ], "wc_weaknesses_avg": [ 98.25, 74.54319217742155 ], "wc_questions_avg": [ 11.25, 10.256095748383007 ], "wc_limitations_avg": [ 13.75, 6.456585785072479 ], "wc_review_avg": [ 287.0, 102.23013254417701 ], "wc_reply_reviewers_avg": [ 21.0, 23.2163735324878 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 0.5, 0.5 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.2294157338705618, "gs_citation": 33, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15830870233320072544&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "epochai.org;epochai.org;epochai.org;mit.edu;;stanford.edu;northeastern.edu;mit.edu;epochai.org", "author_num": 9, "aff_unique_index": "0;0;1;2;3;4;2;1", "aff_unique_norm": "Epoch;Epoch AI;Massachusetts Institute of Technology;Stanford University;Northeastern University", "aff_unique_dep": ";;;;", "aff_unique_url": ";https://epoch.ai;https://web.mit.edu;https://www.stanford.edu;https://www.northeastern.edu", "aff_unique_abbr": ";;MIT;Stanford;NEU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "1;1;1;1;1;1", "aff_country_unique": ";United States" }, { "title": "Perceiving Longer Sequences With Bi-Directional Cross-Attention Transformers", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96564", "id": "5sm8YDnWvC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=5sm8YDnWvC", "openreview": "https://openreview.net/forum?id=5sm8YDnWvC", "poster": "/media/PosterPDFs/NeurIPS%202024/96564.png?t=1733654645.8814604", "project": "", "author_site": "Markus Hiller, Krista A. Ehinger, Tom Drummond", "tldr": "", "abstract": "We present a novel bi-directional Transformer architecture (BiXT) which scales linearly with input size in terms of computational cost and memory consumption, but does not suffer the drop in performance or limitation to only one input modality seen with other efficient Transformer-based approaches. BiXT is inspired by the Perceiver architectures but replaces iterative attention with an efficient bi-directional cross-attention module in which input tokens and latent variables attend to each other simultaneously, leveraging a naturally emerging attention-symmetry between the two. This approach unlocks a key bottleneck experienced by Perceiver-like architectures and enables the processing and interpretation of both semantics ('what') and location ('where') to develop alongside each other over multiple layers -- allowing its direct application to dense and instance-based tasks alike. By combining efficiency with the generality and performance of a full Transformer architecture, BiXT can process longer sequences like point clouds, text or images at higher feature resolutions and achieves competitive performance across a range of tasks like point cloud part segmentation, semantic image segmentation, image classification, hierarchical sequence modeling and document retrieval. Our experiments demonstrate that BiXT models outperform larger competitors by leveraging longer sequences more efficiently on vision tasks like classification and segmentation, and perform on par with full Transformer variants on sequence modeling and document retrieval -- but require 28\\% fewer FLOPs and are up to $8.4\\times$ faster.", "keywords": "Transformer;Neural Architecture;Efficient Attention;Architectures;Representation Learning;General Perception;Long Sequences", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Markus Hiller;Krista A. Ehinger;Tom Drummond", "authorids": "~Markus_Hiller1;~Krista_A._Ehinger4;~Tom_Drummond1", "gender": "M;M;Not Specified", "homepage": ";;http://www.kehinger.com", "dblp": "226/1459;50/1633;13/8654", "google_scholar": "TOb0sisAAAAJ;https://scholar.google.com.au/citations?user=6sWGL5wAAAAJ;EdGfpdcAAAAJ", "orcid": "0000-0002-8133-0102;0000-0001-8204-5904;0000-0003-2247-3020", "linkedin": ";;", "or_profile": "~Markus_Hiller1;~Tom_Drummond1;~Krista_Ehinger1", "aff": "University of Melbourne;University of Melbourne;The University of Melbourne", "aff_domain": "unimelb.edu.au;unimelb.edu.au;unimelb.edu.au", "position": "PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nhiller2024perceiving,\ntitle={Perceiving Longer Sequences With Bi-Directional Cross-Attention Transformers},\nauthor={Markus Hiller and Krista A. Ehinger and Tom Drummond},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=5sm8YDnWvC}\n}", "github": "", "reviewers": "S14L;zEaf;pu66", "pdf_size": 4734972, "rating": "5;6;6", "confidence": "4;4;3", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;3;2", "wc_summary": "109;65;138", "wc_strengths": "48;86;69", "wc_weaknesses": "33;97;116", "wc_questions": "3;44;29", "wc_limitations": "5;10;1", "wc_review": "198;302;353", "wc_reply_reviewers": "35;51;0", "wc_reply_authors": "28;16;0", "reply_reviewers": "1;1;0", "reply_authors": "2;2;1", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 104.0, 30.011109054259666 ], "wc_strengths_avg": [ 67.66666666666667, 15.542057635833022 ], "wc_weaknesses_avg": [ 82.0, 35.505868059613285 ], "wc_questions_avg": [ 25.333333333333332, 16.937794687883333 ], "wc_limitations_avg": [ 5.333333333333333, 3.6817870057290873 ], "wc_review_avg": [ 284.3333333333333, 64.49978466802987 ], "wc_reply_reviewers_avg": [ 28.666666666666668, 21.29684379328438 ], "wc_reply_authors_avg": [ 14.666666666666666, 11.469767022723502 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6868232784901800802&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "unimelb.edu.au;unimelb.edu.au;unimelb.edu.au", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Melbourne", "aff_unique_dep": "", "aff_unique_url": "https://www.unimelb.edu.au", "aff_unique_abbr": "UniMelb", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Australia" }, { "title": "ZipCache: Accurate and Efficient KV Cache Quantization with Salient Token Identification", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96563", "id": "5t4ZAkPiJs", "proceeding": "", "pdf": "https://openreview.net/pdf?id=5t4ZAkPiJs", "openreview": "https://openreview.net/forum?id=5t4ZAkPiJs", "poster": "/media/PosterPDFs/NeurIPS%202024/96563.png?t=1730793835.1320152", "project": "", "author_site": "Yefei He, Luoming Zhang, Weijia Wu, Jing Liu, Hong Zhou, Bohan Zhuang", "tldr": "", "abstract": "KV cache stores key and value states from previous tokens to avoid re-computation, yet it demands substantial storage space, especially for long sequences. \n Adaptive KV cache compression seeks to discern the saliency of tokens, preserving vital information while aggressively compressing those of less importance.\n However, previous methods of this approach exhibit significant performance degradation at high compression ratios due to inaccuracies in identifying salient tokens. \n Additionally, the compression process introduces excessive overhead, substantially increasing memory burdens and the generation latency.\n In this paper, we present ZipCache, an accurate and efficient KV cache quantization method for large language models (LLMs). \n First, we construct a strong baseline for quantizing KV cache. Through the proposed channel-separable tokenwise quantization scheme, the memory overhead of quantization parameters are substantially reduced compared to fine-grained groupwise quantization.\n To enhance the compression ratio, we propose normalized attention score as an effective metric for identifying salient tokens by considering the lower triangle characteristics of the attention matrix. The quantization bit-width for each token is then adaptively assigned based on their saliency.\n Moreover, we develop an efficient approximation method that decouples the saliency metric from full attention scores, enabling compatibility with fast attention implementations like FlashAttention.\n Extensive experiments demonstrate that ZipCache achieves superior compression ratios, fast generation speed and minimal performance losses compared with previous KV cache compression methods. For instance, when evaluating Mistral-7B model on GSM8k dataset, ZipCache is capable of compressing the KV cache by $4.98\\times$, with only a 0.38% drop in accuracy. In terms of efficiency, ZipCache also showcases a 37.3% reduction in prefill-phase latency, a 56.9% reduction in decoding-phase latency, and a 19.8% reduction in GPU memory usage when evaluating LLaMA3-8B model with a input length of 4096. Code is available at https://github.com/ThisisBillhe/ZipCache/.", "keywords": "Model Compression;Large Language Models;Model Quantization;Efficient Models", "primary_area": "generative_models", "supplementary_material": "", "author": "Yefei He;Luoming Zhang;Weijia Wu;Jing Liu;Hong Zhou;Bohan Zhuang", "authorids": "~Yefei_He1;~Luoming_Zhang1;~Weijia_Wu2;~Jing_Liu8;~Hong_Zhou5;~Bohan_Zhuang1", "gender": "M;M;M;M;M;M", "homepage": "https://hexy.tech/;;https://weijiawu.github.io/;https://www.jing-liu.com/;https://person.zju.edu.cn/zhouhong;https://bohanzhuang.github.io/", "dblp": "92/6254;125/0980;87/7695-1;72/2590-48;;145/1096", "google_scholar": "CTEQwwwAAAAJ;;NgjTRe4AAAAJ;-lHaZH4AAAAJ;;https://scholar.google.com.au/citations?user=DFuDBBwAAAAJ", "orcid": "0000-0002-2171-4518;0000-0003-2188-4485;0000-0003-3912-7212;0000-0002-6745-3050;;", "linkedin": ";;%E5%A8%81%E4%BD%B3-%E5%90%B4-07a852280/;jing-liu-619688133/;;bohan-zhuang/", "or_profile": "~Yefei_He1;~Luoming_Zhang1;~Weijia_Wu2;~Jing_Liu8;~Hong_Zhou5;~Bohan_Zhuang1", "aff": "Zhejiang University;Zhejiang University;National University of Singapore;Monash University;Zhejiang University;Monash University", "aff_domain": "zju.edu.cn;zju.edu.cn;nus.edu;monash.edu.au;zju.edu.cn;monash.edu", "position": "PhD student;PhD student;Postdoc;PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nhe2024zipcache,\ntitle={ZipCache: Accurate and Efficient {KV} Cache Quantization with Salient Token Identification},\nauthor={Yefei He and Luoming Zhang and Weijia Wu and Jing Liu and Hong Zhou and Bohan Zhuang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=5t4ZAkPiJs}\n}", "github": "", "reviewers": "Nc8R;ozSB;E63Z;S3gk", "pdf_size": 1072692, "rating": "5;5;7;8", "confidence": "4;5;4;5", "soundness": "3;3;3;3", "novelty": "2;2;3;4", "presentation": "3;2;3;3", "wc_summary": "69;66;100;88", "wc_strengths": "46;31;89;98", "wc_weaknesses": "265;115;15;47", "wc_questions": "93;3;56;2", "wc_limitations": "16;21;1;2", "wc_review": "489;236;261;237", "wc_reply_reviewers": "127;516;41;43", "wc_reply_authors": "155;1190;22;22", "reply_reviewers": "1;2;1;1", "reply_authors": "4;6;2;2", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 80.75, 13.953046262375826 ], "wc_strengths_avg": [ 66.0, 28.186876378910807 ], "wc_weaknesses_avg": [ 110.5, 96.23279066929318 ], "wc_questions_avg": [ 38.5, 38.30469945058961 ], "wc_limitations_avg": [ 10.0, 8.689073598491383 ], "wc_review_avg": [ 305.75, 106.271762477151 ], "wc_reply_reviewers_avg": [ 181.75, 196.07571879251137 ], "wc_reply_authors_avg": [ 347.25, 489.58215602695327 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.5, 1.6583123951777 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.19245008972987526, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12144385169451057875&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "zju.edu.cn;zju.edu.cn;nus.edu;monash.edu.au;zju.edu.cn;monash.edu", "author_num": 6, "aff_unique_index": "0;0;1;2;0;2", "aff_unique_norm": "Zhejiang University;National University of Singapore;Monash University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.zju.edu.cn;https://www.nus.edu.sg;https://www.monash.edu", "aff_unique_abbr": "ZJU;NUS;Monash", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;2;0;2", "aff_country_unique": "China;Singapore;Australia" }, { "title": "WikiContradict: A Benchmark for Evaluating LLMs on Real-World Knowledge Conflicts from Wikipedia", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97844", "id": "5t7DtLwTVC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=5t7DtLwTVC", "openreview": "https://openreview.net/forum?id=5t7DtLwTVC", "poster": "/media/PosterPDFs/NeurIPS%202024/97844.png?t=1733050970.3429575", "project": "", "author_site": "Yufang Hou, Alessandra Pascale, Javier Carnerero-Cano, Tigran Tchrakian, Radu Marinescu, Elizabeth Daly, Inkit Padhi, Prasanna Sattigeri", "tldr": "", "abstract": "Retrieval-augmented generation (RAG) has emerged as a promising solution to mitigate the limitations of large language models (LLMs), such as hallucinations and outdated information. However, it remains unclear how LLMs handle knowledge conflicts arising from different augmented retrieved passages, especially when these passages originate from the same source and have equal trustworthiness. In this work, we conduct a comprehensive evaluation of LLM-generated answers to questions that have varying answers based on contradictory passages from Wikipedia, a dataset widely regarded as a high-quality pre-training resource for most LLMs. Specifically, we introduce WikiContradict, a benchmark consisting of 253 high-quality, human-annotated instances designed to assess the performance of LLMs in providing a complete perspective on conflicts from the retrieved documents, rather than choosing one answer over another, when augmented with retrieved passages containing real-world knowledge conflicts. \nWe benchmark a diverse range of both closed and open-source LLMs under different QA scenarios, including RAG with a single passage, and RAG with 2 contradictory passages. Through rigorous human evaluations on a subset of WikiContradict instances involving 5 LLMs and over 3,500 judgements, we shed light on the behaviour and limitations of these models. \nFor instance, when provided with two passages containing contradictory facts, all models struggle to generate answers that accurately reflect the conflicting nature of the context, especially for implicit conflicts requiring reasoning. Since human evaluation is costly, we\nalso introduce an automated model that estimates LLM performance using a strong open-source language model, achieving an F-score of 0.8. Using this automated metric, we evaluate more than 1,500 answers from seven LLMs across all WikiContradict instances.", "keywords": "knowledge contradiction;RAG;LLM evaluation", "primary_area": "", "supplementary_material": "/attachment/2e2ce428b6e30e328eaca17a7c37ae6091e9e7a7.zip", "author": "Yufang Hou;Alessandra Pascale;Javier Carnerero-Cano;Tigran T. Tchrakian;Radu Marinescu;Elizabeth M. Daly;Inkit Padhi;Prasanna Sattigeri", "authorids": "~Yufang_Hou2;~Alessandra_Pascale1;~Javier_Carnerero-Cano1;~Tigran_T._Tchrakian2;~Radu_Marinescu2;~Elizabeth_M._Daly1;~Inkit_Padhi1;~Prasanna_Sattigeri1", "gender": "F;;;;;;;", "homepage": "https://yufanghou.github.io/;https://research.ibm.com/people/alessandra-pascale;https://javiccano.github.io/;https://research.ibm.com/people/tigran-tchrakian;;http://researcher.watson.ibm.com/researcher/view.php?person=ie-elizabeth.daly;;", "dblp": ";;199/6193;;m/RaduMarinescu2;10/5750;188/9098;00/7428", "google_scholar": "-fBym-EAAAAJ;;Pk2TMyEAAAAJ;;;llFJcF4AAAAJ;https://scholar.google.co.in/citations?user=c4yuGSoAAAAJ;m-s38ikAAAAJ", "orcid": ";;0000-0002-6394-4573;;;;;0000-0003-4435-0486", "linkedin": ";;;;;;;prasannasattigeri/", "or_profile": "~Yufang_Hou2;~Alessandra_Pascale1;~Javier_Carnerero-Cano1;~Tigran_T._Tchrakian2;~Radu_Marinescu2;~Elizabeth_M._Daly1;~Inkit_Padhi1;~Prasanna_Sattigeri1", "aff": "IBM Research Ireland;International Business Machines;Imperial College London;International Business Machines;International Business Machines;IBM Research;IBM Research;IBM Research", "aff_domain": "ibm.com;ibm.com;imperial.ac.uk;ibm.com;ibm.com;ibm.com;ibm.com;ibm.com", "position": "Principal Researcher;Researcher;PhD student;Researcher;Researcher;Research Scientist;Researcher;Researcher", "bibtex": "@inproceedings{\nhou2024wikicontradict,\ntitle={WikiContradict: A Benchmark for Evaluating {LLM}s on Real-World Knowledge Conflicts from Wikipedia},\nauthor={Yufang Hou and Alessandra Pascale and Javier Carnerero-Cano and Tigran T. Tchrakian and Radu Marinescu and Elizabeth M. Daly and Inkit Padhi and Prasanna Sattigeri},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=5t7DtLwTVC}\n}", "github": "", "reviewers": "t17j;A47E;qfC4", "pdf_size": 3797750, "rating": "6;7;9", "confidence": "4;4;3", "wc_summary_and_contributions": "81;66;39", "wc_strengths": "41;25;28", "wc_improvement": "107;43;57", "wc_limitations": "79;1;42", "wc_correctness": "16;1;5", "wc_clarity": "16;1;8", "wc_relation_to_prior_work": "30;104;4", "wc_documentation": "13;19;11", "wc_additional_feedback": "1;1;1", "wc_review": "384;261;195", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "rating_avg": [ 7.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 62.0, 17.378147196982766 ], "wc_strengths_avg": [ 31.333333333333332, 6.944222218666553 ], "wc_improvement_avg": [ 69.0, 27.47119703738202 ], "wc_limitations_avg": [ 40.666666666666664, 31.857320805254304 ], "wc_correctness_avg": [ 7.333333333333333, 6.342099196813483 ], "wc_clarity_avg": [ 8.333333333333334, 6.128258770283412 ], "wc_relation_to_prior_work_avg": [ 46.0, 42.36350630751268 ], "wc_documentation_avg": [ 14.333333333333334, 3.39934634239519 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 280.0, 78.3198569968051 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.944911182523068, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9073667551749423128&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 6, "email": "ibm.com;ibm.com;imperial.ac.uk;ibm.com;ibm.com;ibm.com;ibm.com;ibm.com", "author_num": 8, "aff_unique_index": "0;1;2;1;1;0;0;0", "aff_unique_norm": "IBM;International Business Machines Corporation;Imperial College London", "aff_unique_dep": "Research;;", "aff_unique_url": "https://www.ibm.com/research;https://www.ibm.com;https://www.imperial.ac.uk", "aff_unique_abbr": "IBM;IBM;ICL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;1;1;1;1;1", "aff_country_unique": "Ireland;United States;United Kingdom" }, { "title": "LION: Linear Group RNN for 3D Object Detection in Point Clouds", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96562", "id": "5tGkAcY7uV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=5tGkAcY7uV", "openreview": "https://openreview.net/forum?id=5tGkAcY7uV", "poster": "/media/PosterPDFs/NeurIPS%202024/96562.png?t=1731331354.5234418", "project": "", "author_site": "Zhe Liu, Jinghua Hou, Xinyu Wang, Xiaoqing Ye, Jingdong Wang, Hengshuang Zhao, Xiang Bai", "tldr": "", "abstract": "The benefit of transformers in large-scale 3D point cloud perception tasks, such as 3D object detection, is limited by their quadratic computation cost when modeling long-range relationships. In contrast, linear RNNs have low computational complexity and are suitable for long-range modeling. Toward this goal, we propose a simple and effective window-based framework built on Linear group RNN (i.e., perform linear RNN for grouped features) for accurate 3D object detection, called LION. The key property is to allow sufficient feature interaction in a much larger group than transformer-based methods. However, effectively applying linear group RNN to 3D object detection in highly sparse point clouds is not trivial due to its limitation in handling spatial modeling. To tackle this problem, we simply introduce a 3D spatial feature descriptor and integrate it into the linear group RNN operators to enhance their spatial features rather than blindly increasing the number of scanning orders for voxel features. To further address the challenge in highly sparse point clouds, we propose a 3D voxel generation strategy to densify foreground features thanks to linear group RNN as a natural property of auto-regressive models. \nExtensive experiments verify the effectiveness of the proposed components and the generalization of our LION on different linear group RNN operators including Mamba, RWKV, and RetNet. Furthermore, it is worth mentioning that our LION-Mamba achieves state-of-the-art on Waymo, nuScenes, Argoverse V2, and ONCE datasets. Last but not least, our method supports kinds of advanced linear RNN operators (e.g., RetNet, RWKV, Mamba, xLSTM and TTT) on small but popular KITTI dataset for a quick experience with our linear RNN-based framework.", "keywords": "3D Object Detection;Linear RNN", "primary_area": "machine_vision", "supplementary_material": "", "author": "Zhe Liu;Jinghua Hou;Xinyu Wang;Xiaoqing Ye;Jingdong Wang;Hengshuang Zhao;Xiang Bai", "authorids": "~Zhe_Liu12;~Jinghua_Hou1;~Xinyu_Wang7;~Xiaoqing_Ye1;~Jingdong_Wang1;~Hengshuang_Zhao2;~Xiang_Bai1", "gender": "M;M;M;F;M;M;M", "homepage": "https://github.com/happinesslz;https://github.com/AlmoonYsl;https://github.com/deepinact;https://shuluoshu.github.io/;https://jingdongwang2017.github.io/;https://hszhao.github.io;http://mclab.eic.hust.edu.cn/~xbai", "dblp": "70/1220-33;https://dblp.uni-trier.de/pid/175/1330;;177/0181;49/3441;185/7848;59/2741", "google_scholar": "yprv7EsAAAAJ;https://scholar.google.com/citations?hl=zh-CN;;bmN_nycAAAAJ;z5SPCmgAAAAJ;4uE10I0AAAAJ;UeltiQ4AAAAJ", "orcid": ";0009-0007-6555-4038;;0000-0003-3268-880X;0000-0002-4888-4445;0000-0001-8277-2706;", "linkedin": ";;;;;hengshuang-zhao-347b8391/?originalSubdomain=hk;", "or_profile": "~Zhe_Liu12;~Jinghua_Hou1;~Xinyu_Wang7;~Xiaoqing_Ye1;~Jingdong_Wang1;~Hengshuang_Zhao2;~Xiang_Bai1", "aff": "Huazhong University of Science and Technology;Huazhong University of Science and Technology;Huazhong University of Science and Technology;Baidu Inc.;Baidu;The University of Hong Kong;Huazhong University of Science and Technology", "aff_domain": "hust.edu.cn;hust.edu.cn;hust.edu.cn;baidu.com;baidu.com;hku.hk;hust.edu.cn", "position": "PhD student;MS student;MS student;Researcher and Developer;Chief Scientist for Computer Vision;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nliu2024lion,\ntitle={{LION}: Linear Group {RNN} for 3D Object Detection in Point Clouds},\nauthor={Zhe Liu and Jinghua Hou and Xinyu Wang and Xiaoqing Ye and Jingdong Wang and Hengshuang Zhao and Xiang Bai},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=5tGkAcY7uV}\n}", "github": "", "reviewers": "wvps;9fJS;5dKF;ehGN", "pdf_size": 1280862, "rating": "4;5;6;6", "confidence": "5;4;4;2", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "2;3;3;3", "wc_summary": "84;55;89;54", "wc_strengths": "55;48;86;17", "wc_weaknesses": "415;177;156;80", "wc_questions": "74;34;104;28", "wc_limitations": "4;6;9;9", "wc_review": "632;320;444;188", "wc_reply_reviewers": "417;118;24;0", "wc_reply_authors": "1109;152;18;0", "reply_reviewers": "2;2;1;0", "reply_authors": "4;4;2;1", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 70.5, 16.101242188104617 ], "wc_strengths_avg": [ 51.5, 24.520399670478458 ], "wc_weaknesses_avg": [ 207.0, 125.39338100553793 ], "wc_questions_avg": [ 60.0, 30.95157508108432 ], "wc_limitations_avg": [ 7.0, 2.1213203435596424 ], "wc_review_avg": [ 396.0, 163.58484037342825 ], "wc_reply_reviewers_avg": [ 139.75, 166.03369386964803 ], "wc_reply_authors_avg": [ 319.75, 459.44225698122284 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.7608859102526822, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4145890881069870691&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 5, "email": "hust.edu.cn;hust.edu.cn;hust.edu.cn;baidu.com;baidu.com;hku.hk;hust.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;1;1;2;0", "aff_unique_norm": "Huazhong University of Science and Technology;Baidu;University of Hong Kong", "aff_unique_dep": ";Baidu Inc.;", "aff_unique_url": "http://www.hust.edu.cn;https://www.baidu.com;https://www.hku.hk", "aff_unique_abbr": "HUST;Baidu;HKU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Supervised Kernel Thinning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96561", "id": "5tIG2KZogL", "proceeding": "", "pdf": "https://openreview.net/pdf?id=5tIG2KZogL", "openreview": "https://openreview.net/forum?id=5tIG2KZogL", "poster": "", "project": "", "author_site": "Albert Gong, Kyuseong Choi, Raaz Dwivedi", "tldr": "", "abstract": "The kernel thinning algorithm of Dwivedi & Mackey (2024) provides a better-than-i.i.d. compression of a generic set of points. By generating high-fidelity coresets of size significantly smaller than the input points, KT is known to speed up unsupervised tasks like Monte Carlo integration, uncertainty quantification, and non-parametric hypothesis testing, with minimal loss in statistical accuracy. In this work, we generalize the KT algorithm to speed up supervised learning problems involving kernel methods. \nSpecifically, we combine two classical algorithms---Nadaraya-Watson (NW) regression or kernel smoothing, and kernel ridge regression (KRR)---with KT to provide a quadratic speed-up in both training and inference times. We show how distribution compression with KT in each setting reduces to constructing an appropriate kernel, and introduce the Kernel-Thinned NW and Kernel-Thinned KRR estimators. We prove that KT-based regression estimators enjoy significantly superior computational efficiency over the full-data estimators and improved statistical efficiency over i.i.d. subsampling of the training data. En route, we also provide a novel multiplicative error guarantee for compressing with KT. We validate our design choices with both simulations and real data experiments.", "keywords": "Kernel methods;distribution compression;non-parametric regression", "primary_area": "learning_theory", "supplementary_material": "", "author": "Albert Gong;Kyuseong Choi;Raaz Dwivedi", "authorids": "~Albert_Gong1;~Kyuseong_Choi1;~Raaz_Dwivedi1", "gender": "M;M;M", "homepage": "https://ag2435.github.io;https://kyuseongchoi5.github.io;https://raazdwivedi.github.io/", "dblp": ";;180/9006", "google_scholar": "ux1MGMwAAAAJ;YIlTXCIAAAAJ;9ehX_58AAAAJ", "orcid": "0009-0005-1687-0240;;", "linkedin": "albert-gong/;kyuseong-choi-ab532a1b4/;raaz-dwivedi", "or_profile": "~Albert_Gong1;~Kyuseong_Choi1;~Raaz_Dwivedi1", "aff": "Cornell University;Cornell University;Cornell University", "aff_domain": "cornell.edu;cornell.edu;cornell.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\ngong2024supervised,\ntitle={Supervised Kernel Thinning},\nauthor={Albert Gong and Kyuseong Choi and Raaz Dwivedi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=5tIG2KZogL}\n}", "github": "", "reviewers": "Vy1C;QHUc;PSUT", "pdf_size": 913048, "rating": "5;7;7", "confidence": "2;3;2", "soundness": "2;3;3", "novelty": "2;3;2", "presentation": "2;3;3", "wc_summary": "150;52;76", "wc_strengths": "32;35;30", "wc_weaknesses": "57;96;18", "wc_questions": "200;166;5", "wc_limitations": "42;4;68", "wc_review": "481;353;197", "wc_reply_reviewers": "128;25;12", "wc_reply_authors": "764;30;0", "reply_reviewers": "2;1;1", "reply_authors": "3;2;1", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 2.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 92.66666666666667, 41.70797951897881 ], "wc_strengths_avg": [ 32.333333333333336, 2.0548046676563256 ], "wc_weaknesses_avg": [ 57.0, 31.843366656181317 ], "wc_questions_avg": [ 123.66666666666667, 85.05031190745602 ], "wc_limitations_avg": [ 38.0, 26.280537792569366 ], "wc_review_avg": [ 343.6666666666667, 116.1301951355556 ], "wc_reply_reviewers_avg": [ 55.0, 51.89091121445707 ], "wc_reply_authors_avg": [ 264.6666666666667, 353.29433747073966 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.49999999999999983, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6021767584964132085&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "cornell.edu;cornell.edu;cornell.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Cornell University", "aff_unique_dep": "", "aff_unique_url": "https://www.cornell.edu", "aff_unique_abbr": "Cornell", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "5tOVh81aze", "title": "Language models scale reliably with over-training and on downstream tasks", "track": "main", "status": "Reject", "tldr": "", "abstract": "Scaling laws are useful guides for derisking expensive training runs, as they predict performance of large models using cheaper, small-scale experiments. However, there remain gaps between current scaling studies and how language models are ultimately trained and evaluated. For instance, scaling is usually studied in the compute-optimal training regime (i.e., \"Chinchilla optimal\" regime). In contrast, models are often over-trained to reduce inference costs. Moreover, scaling laws mostly predict loss on next-token prediction, but models are usually compared on downstream task performance. To address both shortcomings, we create a testbed of 104 models with 0.011B to 6.9B parameters trained with various numbers of tokens on three data distributions. First, we fit scaling laws that extrapolate in both the amount of over-training and the number of model parameters. This enables us to predict the validation loss of a 1.4B parameter, 900B token run (i.e., 32$\\times$ over-trained) and a 6.9B parameter, 138B token run (i.e., a compute-optimal run)\u2013\u2013each from experiments that take 300$\\times$ less compute. Second, we relate the perplexity of a language model to its downstream task performance by proposing a power law. We use this law to predict top-1 error averaged over downstream tasks for the two aforementioned models, using experiments that take 20$\\times$ less compute.", "keywords": "large language models;scaling laws;over-training;task prediction", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/69d116cb4e58d72f21b5c075f1e0a6c71af94c91.zip", "author": "Samir Yitzhak Gadre;Georgios Smyrnis;Vaishaal Shankar;Suchin Gururangan;Mitchell Wortsman;Rulin Shao;Jean Mercat;Alex Fang;Jeffrey Li;Sedrick Keh;Rui Xin;Marianna Nezhurina;Igor Vasiljevic;Luca Soldaini;Jenia Jitsev;Alex Dimakis;Gabriel Ilharco;Pang Wei Koh;Shuran Song;Thomas Kollar;Yair Carmon;Achal Dave;Reinhard Heckel;Niklas Muennighoff;Ludwig Schmidt", "authorids": "~Samir_Yitzhak_Gadre1;~Georgios_Smyrnis1;~Vaishaal_Shankar1;~Suchin_Gururangan1;~Mitchell_Wortsman1;~Rulin_Shao1;~Jean_Mercat1;~Alex_Fang1;~Jeffrey_Li1;~Sedrick_Keh1;~Rui_Xin1;~Marianna_Nezhurina1;~Igor_Vasiljevic1;~Luca_Soldaini1;~Jenia_Jitsev1;~Alex_Dimakis1;~Gabriel_Ilharco1;~Pang_Wei_Koh1;~Shuran_Song3;~Thomas_Kollar1;~Yair_Carmon1;~Achal_Dave1;~Reinhard_Heckel1;~Niklas_Muennighoff1;~Ludwig_Schmidt1", "gender": "M;M;;M;M;;M;;M;;;;M;Non-Binary;;M;M;M;F;M;M;M;M;M;M", "homepage": "https://sagadre.github.io/;;;https://suchin.io;https://mitchellnw.github.io/;https://rulinshao.github.io/;http://jean-mercat.netlify.app;;;;;;https://scholar.google.com/citations?user=Sl_2kHcAAAAJ&hl=en;https://soldaini.net;;https://people.eecs.berkeley.edu/~alexdimakis/;http://gabrielilharco.com/;http://cs.stanford.edu/~pangwei;https://shurans.github.io/;http://tkollar.github.io;https://www.cs.tau.ac.il/~ycarmon/;http://www.achaldave.com/;;https://muennighoff.github.io/;http://people.csail.mit.edu/ludwigs/", "dblp": "246/7901;255/9114;;217/1570;232/2273;;248/2886;260/0449;;;;323/5785.html;;160/1741;53/5156;19/5000.html;249/2616;10/10453;;10/6653;13/558;156/1161;81/9668;281/6745;141/2720", "google_scholar": "oAhlg9gAAAAJ;;;CJIKhNIAAAAJ;fzRnjFgAAAAJ;Vdwh6bcAAAAJ;https://scholar.google.com/citations?hl=fr;;JDS2BnIAAAAJ;;;2KPv4VYAAAAJ;;3KPvwcgAAAAJ;https://scholar.google.com/citations?hl=en;JSFmVQEAAAAJ;https://scholar.google.com/citations?hl=en;Nn990CkAAAAJ;https://scholar.google.com/citations?hl=en;AEKT17QAAAAJ;kTKmpT0AAAAJ;oQyYH9kAAAAJ;ZWV0I7cAAAAJ;Me0IoRMAAAAJ;SWMKy70AAAAJ", "orcid": ";;;;;;0000-0002-4012-9082;;;;;0009-0000-9541-5150;;0000-0001-6998-9863;0000-0002-1221-7851;;;;;0000-0003-2598-8118;;;;;", "linkedin": ";;;;;;;alex-fang-8a11a8115/;jeffrey-li-a78684111/;;;https://www.linkedin.com/mwlite/in/marianna-nezhurina-957848145;;soldni/;;alex-dimakis-b1b20320/;;;;;;;;niklasmuennighoff/;ludwig-schmidt-87ba3612/", "or_profile": "~Samir_Yitzhak_Gadre1;~Georgios_Smyrnis1;~Vaishaal_Shankar1;~Suchin_Gururangan1;~Mitchell_Wortsman1;~Rulin_Shao1;~Jean_Mercat1;~Alex_Fang1;~Jeffrey_Li1;~Sedrick_Keh1;~Rui_Xin1;~Marianna_Nezhurina1;~Igor_Vasiljevic1;~Luca_Soldaini1;~Jenia_Jitsev1;~Alex_Dimakis1;~Gabriel_Ilharco1;~Pang_Wei_Koh1;~Shuran_Song3;~Thomas_Kollar1;~Yair_Carmon1;~Achal_Dave1;~Reinhard_Heckel1;~Niklas_Muennighoff1;~Ludwig_Schmidt1", "aff": "Columbia University;Toyota Research Institute;;University of Washington, Seattle;University of Washington, Seattle;University of Washington;Toyota Research Institute;Department of Computer Science, University of Washington;Department of Computer Science, University of Washington;;;Forschungszentrum Juelich GmbH;Toyota Research Institute;Allen Institute for Artificial Intelligence;Juelich Supercomputing Center, Research Center Juelich;University of Texas at Austin;Department of Computer Science, University of Washington;University of Washington;Stanford University;Toyota Research Institute;Tel Aviv University;Toyota Research Institute;Rice University;Allen Institute for Artificial Intelligence;University of Washington", "aff_domain": "columbia.edu;tri.global;;uw.edu;uw.edu;uw.edu;tri.global;cs.washington.edu;cs.washington.edu;;;fz-juelich.de;tri.global;allenai.org;fz-juelich.de;utexas.edu;cs.washington.edu;cs.washington.edu;stanford.edu;tri.global;tau.ac.il;tri.global;rice.edu;allenai.org;washington.edu", "position": "PhD student;Intern;;PhD student;PhD student;PhD student;Researcher;PhD student;PhD student;;;Researcher;Research Scientist;Researcher;Senior Scientist;Full Professor;PhD student;Assistant Professor;Assistant Professor;Principal Researcher;Assistant Professor;Researcher;Assistant Professor;Researcher;Assistant Professor", "bibtex": "@misc{\nanonymous2024language,\ntitle={Language models scale reliably with over-training and on downstream tasks},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=5tOVh81aze}\n}", "github": "", "project": "", "reviewers": "RC8r;7fVK;CG6A", "site": "https://openreview.net/forum?id=5tOVh81aze", "pdf_size": 979651, "rating": "5;5;7", "confidence": "3;3;3", "soundness": "2;3;3", "novelty": "2;3;2", "presentation": "3;3;3", "wc_summary": "111;118;43", "wc_strengths": "132;54;95", "wc_weaknesses": "368;80;414", "wc_questions": "98;75;135", "wc_limitations": "4;1;17", "wc_review": "713;328;704", "wc_reply_reviewers": "437;21;55", "wc_reply_authors": "751;32;8", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 5.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 90.66666666666667, 33.82635395992631 ], "wc_strengths_avg": [ 93.66666666666667, 31.857320805254304 ], "wc_weaknesses_avg": [ 287.3333333333333, 147.80467591460774 ], "wc_questions_avg": [ 102.66666666666667, 24.716166549222166 ], "wc_limitations_avg": [ 7.333333333333333, 6.944222218666553 ], "wc_review_avg": [ 581.6666666666666, 179.40704804686155 ], "wc_reply_reviewers_avg": [ 171.0, 188.6018734442123 ], "wc_reply_authors_avg": [ 263.6666666666667, 344.7359698216722 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 25, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8835166256556228117&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;2;2;2;1;2;2;3;1;4;5;6;2;2;7;1;8;1;9;4;2", "aff_unique_norm": "Columbia University;Toyota Research Institute;University of Washington;Forschungszentrum Juelich;Allen Institute for Artificial Intelligence;Research Center Juelich;University of Texas at Austin;Stanford University;Tel Aviv University;Rice University", "aff_unique_dep": ";;;;;Juelich Supercomputing Center;;;;", "aff_unique_url": "https://www.columbia.edu;https://www.tri.global;https://www.washington.edu;https://www.fz-juelich.de;https://allenai.org;https://www.fz-juelich.de/;https://www.utexas.edu;https://www.stanford.edu;https://www.tau.ac.il;https://www.rice.edu", "aff_unique_abbr": "Columbia;TRI;UW;FZJ;AI2;FZ J\u00fclich;UT Austin;Stanford;TAU;Rice", "aff_campus_unique_index": "1;1;1;1;2;1;3", "aff_campus_unique": ";Seattle;Austin;Stanford", "aff_country_unique_index": "0;0;0;0;0;0;0;0;1;0;0;1;0;0;0;0;0;2;0;0;0;0", "aff_country_unique": "United States;Germany;Israel" }, { "title": "Transformers on Markov data: Constant depth suffices", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96560", "id": "5uG9tp3v2q", "proceeding": "", "pdf": "https://openreview.net/pdf?id=5uG9tp3v2q", "openreview": "https://openreview.net/forum?id=5uG9tp3v2q", "poster": "", "project": "", "author_site": "Nived Rajaraman, Marco Bondaschi, Ashok Vardhan Makkuva, Kannan Ramchandran, Michael Gastpar", "tldr": "", "abstract": "Attention-based transformers have been remarkably successful at modeling generative processes across various domains and modalities. In this paper, we study the behavior of transformers on data drawn from $k^{\\text{th}}$-order Markov processes, where the conditional distribution of the next symbol in a sequence depends on the previous $k$ symbols observed. We observe a surprising phenomenon empirically which contradicts previous findings: when trained for sufficiently long, a transformer with a fixed depth and $1$ head per layer is able to achieve low test loss on sequences drawn from $k^{\\text{th}}$-order Markov sources, even as $k$ grows. Furthermore, this low test loss is achieved by the transformer\u2019s ability to represent and learn the in-context conditional empirical distribution. On the theoretical side, we prove that a transformer with $O(\\log_2(k))$ layers can represent the in-context conditional empirical distribution by composing induction heads to track the previous $k$ symbols in the sequence. Surprisingly, with the addition of layer normalization, we show that a transformer with a constant number of layers can represent the in-context conditional empirical distribution, concurring with our empirical observations. This result provides more insight into the benefit of soft-attention and non-linearities in the transformer architecture.", "keywords": "Depth;k-gram;Transformers;Representation", "primary_area": "interpretability_and_explainability", "supplementary_material": "/attachment/8390befcb7a1983683113c1d4ee38e78101805c9.zip", "author": "Nived Rajaraman;Marco Bondaschi;Ashok Vardhan Makkuva;Kannan Ramchandran;Michael Gastpar", "authorids": "~Nived_Rajaraman1;~Marco_Bondaschi1;~Ashok_Vardhan_Makkuva1;~Kannan_Ramchandran1;~Michael_Gastpar1", "gender": "M;M;;M;", "homepage": "https://people.eecs.berkeley.edu/~nived.rajaraman/;;;https://www.eecs.berkeley.edu/~kannanr/;https://people.epfl.ch/michael.gastpar", "dblp": "229/4215;255/4933;;53/5765;", "google_scholar": "7hb2BM8AAAAJ;;;https://scholar.google.com.tw/citations?user=DcV-5RAAAAAJ;https://scholar.google.ch/citations?user=IQ3hcw4AAAAJ", "orcid": ";0000-0002-4158-2487;;0000-0002-4567-328X;0000-0002-5499-5336", "linkedin": ";;;;", "or_profile": "~Nived_Rajaraman1;~Marco_Bondaschi1;~Ashok_Vardhan_Makkuva1;~Kannan_Ramchandran1;~Michael_Gastpar1", "aff": "University of California, Berkeley;EPFL - EPF Lausanne;;University of California, Berkeley;School of Computer and Communication Sciences, EPFL - EPF Lausanne", "aff_domain": "berkeley.edu;epfl.ch;;berkeley.edu;ic.epfl.ch", "position": "PhD student;PhD student;;Full Professor;Full Professor", "bibtex": "@inproceedings{\nrajaraman2024transformers,\ntitle={Transformers on Markov data: Constant depth suffices},\nauthor={Nived Rajaraman and Marco Bondaschi and Ashok Vardhan Makkuva and Kannan Ramchandran and Michael Gastpar},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=5uG9tp3v2q}\n}", "github": "", "reviewers": "TFSx;uqDv;873s;YGsY;DmcT", "pdf_size": 2678537, "rating": "5;6;6;6;6", "confidence": "3;4;4;3;3", "soundness": "2;3;3;2;3", "novelty": "2;3;3;2;3", "presentation": "3;3;3;3;3", "wc_summary": "73;95;177;92;70", "wc_strengths": "97;158;76;91;133", "wc_weaknesses": "130;67;76;165;150", "wc_questions": "76;35;326;117;121", "wc_limitations": "82;2;1;27;11", "wc_review": "458;357;656;492;485", "wc_reply_reviewers": "23;11;21;22;26", "wc_reply_authors": "100;0;0;115;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;1;1;3;1", "rating_avg": [ 5.8, 0.39999999999999997 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 101.4, 39.08247689182455 ], "wc_strengths_avg": [ 111.0, 30.046630426721727 ], "wc_weaknesses_avg": [ 117.6, 39.34768099901187 ], "wc_questions_avg": [ 135.0, 100.48084394550038 ], "wc_limitations_avg": [ 24.6, 30.176812290233705 ], "wc_review_avg": [ 489.6, 96.22598401679247 ], "wc_reply_reviewers_avg": [ 20.6, 5.083306010855534 ], "wc_reply_authors_avg": [ 43.0, 52.87721626560914 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.408248290463863, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8874861421906378783&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "berkeley.edu;epfl.ch;;berkeley.edu;ic.epfl.ch", "author_num": 5, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "University of California, Berkeley;EPFL", "aff_unique_dep": ";", "aff_unique_url": "https://www.berkeley.edu;https://www.epfl.ch", "aff_unique_abbr": "UC Berkeley;EPFL", "aff_campus_unique_index": "0;1;0;1", "aff_campus_unique": "Berkeley;Lausanne", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "United States;Switzerland" }, { "title": "MambaTalk: Efficient Holistic Gesture Synthesis with Selective State Space Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96559", "id": "5uUleAsYUG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=5uUleAsYUG", "openreview": "https://openreview.net/forum?id=5uUleAsYUG", "poster": "/media/PosterPDFs/NeurIPS%202024/96559.png?t=1731604361.1191313", "project": "", "author_site": "Zunnan Xu, Yukang Lin, Haonan Han, Sicheng Yang, Ronghui Li, Yachao Zhang, Xiu Li", "tldr": "", "abstract": "Gesture synthesis is a vital realm of human-computer interaction, with wide-ranging applications across various fields like film, robotics, and virtual reality. \nRecent advancements have utilized the diffusion model to improve gesture synthesis. \nHowever, the high computational complexity of these techniques limits the application in reality. \nIn this study, we explore the potential of state space models (SSMs).\nDirect application of SSMs in gesture synthesis encounters difficulties, which stem primarily from the diverse movement dynamics of various body parts. \nThe generated gestures may also exhibit unnatural jittering issues.\nTo address these, we implement a two-stage modeling strategy with discrete motion priors to enhance the quality of gestures.\nBuilt upon the selective scan mechanism, we introduce MambaTalk, which integrates hybrid fusion modules, local and global scans to refine latent space representations.\nSubjective and objective experiments demonstrate that our method surpasses the performance of state-of-the-art models. Our project is publicly available at~\\url{https://kkakkkka.github.io/MambaTalk/}.", "keywords": "Gesture Synthesis;Selective State Space Models;Human Computer Interaction", "primary_area": "generative_models", "supplementary_material": "/attachment/490dcfbf7b65a4c8748fe26722e4020c36ebede7.zip", "author": "Zunnan Xu;Yukang Lin;Haonan Han;Sicheng Yang;Ronghui Li;Yachao Zhang;Xiu Li", "authorids": "~Zunnan_Xu1;~Yukang_Lin1;~Haonan_Han1;~Sicheng_Yang2;~Ronghui_Li1;~Yachao_Zhang1;~Xiu_Li1", "gender": ";M;M;M;M;M;F", "homepage": ";https://github.com/lyk412;https://vincenthancoder.github.io/;https://youngseng.github.io/;https://li-ronghui.github.io/;https://yachao-zhang.github.io/;https://thusigsiclab.github.io/thu.github.io/introduction.html", "dblp": ";;;176/6714;52/9777;40/10584-1;13/1206-1", "google_scholar": ";;vLZp870AAAAJ;Qy-LKSkAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.de/citations?user=a-I8c8EAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";0009-0001-2469-5690;0009-0002-4894-3860;0000-0002-0928-034X;;0000-0002-6153-5004;0000-0003-0403-1923", "linkedin": ";;;;;;", "or_profile": "~Zunnan_Xu1;~Yukang_Lin1;~Haonan_Han1;~Sicheng_Yang2;~Ronghui_Li1;~Yachao_Zhang1;~Xiu_Li1", "aff": ";Alibaba Group;Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": ";alibaba-inc.com;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": ";Intern;PhD student;MS student;PhD student;Postdoc;Professor", "bibtex": "@inproceedings{\nxu2024mambatalk,\ntitle={MambaTalk: Efficient Holistic Gesture Synthesis with Selective State Space Models},\nauthor={Zunnan Xu and Yukang Lin and Haonan Han and Sicheng Yang and Ronghui Li and Yachao Zhang and Xiu Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=5uUleAsYUG}\n}", "github": "", "reviewers": "p3w5;gANk;qFoz;WGJa", "pdf_size": 2169878, "rating": "4;5;6;6", "confidence": "5;5;2;3", "soundness": "2;3;4;3", "novelty": "2;3;3;3", "presentation": "2;3;4;2", "wc_summary": "55;67;90;47", "wc_strengths": "52;98;44;49", "wc_weaknesses": "375;187;66;78", "wc_questions": "4;132;4;122", "wc_limitations": "4;13;53;35", "wc_review": "490;497;257;331", "wc_reply_reviewers": "48;54;17;33", "wc_reply_authors": "326;250;129;62", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 1.299038105676658 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 64.75, 16.223054582907622 ], "wc_strengths_avg": [ 60.75, 21.695333599647643 ], "wc_weaknesses_avg": [ 176.5, 123.9203373139373 ], "wc_questions_avg": [ 65.5, 61.60154218848746 ], "wc_limitations_avg": [ 26.25, 19.122957407263137 ], "wc_review_avg": [ 393.75, 103.15370812530202 ], "wc_reply_reviewers_avg": [ 38.0, 14.33527118683145 ], "wc_reply_authors_avg": [ 191.75, 102.69950097249743 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16629658725285924577&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": ";alibaba-inc.com;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 7, "aff_unique_index": "0;1;1;1;1;1", "aff_unique_norm": "Alibaba Group;Tsinghua University", "aff_unique_dep": ";", "aff_unique_url": "https://www.alibaba.com;https://www.tsinghua.edu.cn", "aff_unique_abbr": "Alibaba;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Efficient Large Multi-modal Models via Visual Context Compression", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96558", "id": "5ujp72CiYB", "proceeding": "", "pdf": "https://openreview.net/pdf?id=5ujp72CiYB", "openreview": "https://openreview.net/forum?id=5ujp72CiYB", "poster": "/media/PosterPDFs/NeurIPS%202024/96558.png?t=1733706637.2554643", "project": "", "author_site": "Jieneng Chen, Luoxin Ye, Ju He, Zhaoyang Wang, Daniel Khashabi, Alan Yuille", "tldr": "", "abstract": "While significant advancements have been made in compressed representations for text embeddings in large language models (LLMs), the compression of visual tokens in multi-modal LLMs (MLLMs) has remained a largely overlooked area. In this work, we present the study on the analysis of redundancy concerning visual tokens and efficient training within these models. Our initial experiments\nshow that eliminating up to 70% of visual tokens at the testing stage by simply average pooling only leads to a minimal 3% reduction in visual question answering accuracy on the GQA benchmark, indicating significant redundancy in visual context. Addressing this, we introduce Visual Context Compressor, which reduces the number of visual tokens to enhance training and inference efficiency without sacrificing performance. To minimize information loss caused by the compression on visual tokens while maintaining training efficiency, we develop LLaVolta as a light and staged training scheme that incorporates stage-wise visual context compression to progressively compress the visual tokens from heavily to lightly compression during training, yielding no loss of information when testing. Extensive experiments demonstrate that our approach enhances the performance of MLLMs in both image-language and video-language understanding, while also significantly cutting training costs and improving inference efficiency.", "keywords": "Multi-modal LLMs; Efficiency", "primary_area": "machine_vision", "supplementary_material": "", "author": "Jieneng Chen;Luoxin Ye;Ju He;Zhao-Yang Wang;Daniel Khashabi;Alan Yuille", "authorids": "~Jieneng_Chen1;~Luoxin_Ye1;~Ju_He1;~Zhao-Yang_Wang1;~Daniel_Khashabi2;~Alan_Yuille1", "gender": "M;M;M;M;M;M", "homepage": "https://github.com/feiyu12138;https://tacju.github.io/;https://aiem.jhu.edu/people/zhaoyang-wang/;http://danielkhashabi.com/;;https://beckschen.github.io", "dblp": ";;190/9251;71/10515;y/AlanLYuille;", "google_scholar": ";NyTPm_zUV_kC;L_4sVVYAAAAJ;pK2kQvgAAAAJ;;yLYj88sAAAAJ", "orcid": ";;;;;", "linkedin": ";ju-he-43b884190/;;;;jieneng-chen-53254011a/", "or_profile": "~Luoxin_Ye1;~Ju_He1;~Zhao-Yang_Wang1;~Daniel_Khashabi2;~Alan_Yuille1;~J_Chen1", "aff": "Johns Hopkins University;Johns Hopkins University;Johns Hopkins University;Johns Hopkins University;Johns Hopkins University;Johns Hopkins University", "aff_domain": "johnshopkins.edu;jhu.edu;jh.edu;jhu.edu;johnshopkins.edu;jhu.edu", "position": "MS student;PhD student;PhD student;Assistant Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\nchen2024efficient,\ntitle={Efficient Large Multi-modal Models via Visual Context Compression},\nauthor={Jieneng Chen and Luoxin Ye and Ju He and Zhao-Yang Wang and Daniel Khashabi and Alan Yuille},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=5ujp72CiYB}\n}", "github": "", "reviewers": "PsS7;dJy4;ENde", "pdf_size": 907378, "rating": "5;5;6", "confidence": "4;4;4", "soundness": "3;3;4", "novelty": "3;3;3", "presentation": "2;3;4", "wc_summary": "95;84;73", "wc_strengths": "68;36;57", "wc_weaknesses": "122;35;213", "wc_questions": "5;67;75", "wc_limitations": "1;1;7", "wc_review": "291;223;425", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 84.0, 8.981462390204987 ], "wc_strengths_avg": [ 53.666666666666664, 13.27487183449325 ], "wc_weaknesses_avg": [ 123.33333333333333, 72.67431152446892 ], "wc_questions_avg": [ 49.0, 31.283648551066843 ], "wc_limitations_avg": [ 3.0, 2.8284271247461903 ], "wc_review_avg": [ 313.0, 83.92059739221753 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=464346486861171989&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 0, "email": "johnshopkins.edu;jhu.edu;jh.edu;jhu.edu;johnshopkins.edu;jhu.edu", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Johns Hopkins University", "aff_unique_dep": "", "aff_unique_url": "https://www.jhu.edu", "aff_unique_abbr": "JHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Atlas3D: Physically Constrained Self-Supporting Text-to-3D for Simulation and Fabrication", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96557", "id": "5x69CL2w3F", "proceeding": "", "pdf": "https://openreview.net/pdf?id=5x69CL2w3F", "openreview": "https://openreview.net/forum?id=5x69CL2w3F", "poster": "", "project": "", "author_site": "Yunuo Chen, Tianyi Xie, Zeshun Zong, Xuan Li, Feng Gao, Yin Yang, Ying Nian Wu, Chenfanfu Jiang", "tldr": "", "abstract": "Existing diffusion-based text-to-3D generation methods primarily focus on producing visually realistic shapes and appearances, often neglecting the physical constraints necessary for downstream tasks. Generated models frequently fail to maintain balance when placed in physics-based simulations or 3D printed. This balance is crucial for satisfying user design intentions in interactive gaming, embodied AI, and robotics, where stable models are needed for reliable interaction. Additionally, stable models ensure that 3D-printed objects, such as figurines for home decoration, can stand on their own without requiring additional supports. To fill this gap, we introduce Atlas3D, an automatic and easy-to-implement method that enhances existing Score Distillation Sampling (SDS)-based text-to-3D tools. Atlas3D ensures the generation of self-supporting 3D models that adhere to physical laws of stability under gravity, contact, and friction. Our approach combines a novel differentiable simulation-based loss function with physically inspired regularization, serving as either a refinement or a post-processing module for existing frameworks. We verify Atlas3D's efficacy through extensive generation tasks and validate the resulting 3D models in both simulated and real-world environments.", "keywords": "Text-to-3D Generation;Score Distillation Sampling;Physics-based Simulation", "primary_area": "generative_models", "supplementary_material": "/attachment/29d2f8587e635a0781143d348bf074b60f991602.zip", "author": "Yunuo Chen;Tianyi Xie;Zeshun Zong;Xuan Li;Feng Gao;Yin Yang;Ying Nian Wu;Chenfanfu Jiang", "authorids": "~Yunuo_Chen2;~Tianyi_Xie1;~Zeshun_Zong1;~Xuan_Li8;~Feng_Gao2;~Yin_Yang4;~Ying_Nian_Wu1;~Chenfanfu_Jiang3", "gender": ";M;M;M;M;M;;M", "homepage": "https://yunuoch.github.io/;https://xpandora.github.io/;https://www.math.ucla.edu/~zeshunzong/;https://xuan-li.github.io/;https://fen9.github.io/;https://yangzzzy.github.io/;;http://www.stat.ucla.edu/~ywu/", "dblp": ";161/7169;;;10/2674-13;56/2998-2;132/7630;18/568.html", "google_scholar": "mqKjwLAAAAAJ;oRt4qcgAAAAJ;;;amaLnocAAAAJ;-z2_nggAAAAJ;;7k_1QFIAAAAJ", "orcid": ";0009-0006-3101-7659;;0000-0003-0677-8369;0000-0003-1515-1357;0000-0001-7645-5931;;", "linkedin": ";;;;;;;", "or_profile": "~Yunuo_Chen2;~Tianyi_Xie1;~Zeshun_Zong1;~Xuan_Li8;~Feng_Gao2;~Yin_Yang4;~Chenfanfu_Jiang3;~Yingnian_Wu1", "aff": "University of California, Los Angeles;University of California, Los Angeles;University of California, Los Angeles;University of California, Los Angeles;Amazon;University of Utah;University of California, Los Angeles;UCLA", "aff_domain": "ucla.edu;ucla.edu;ucla.edu;ucla.edu;amazon.com;utah.edu;ucla.edu;stat.ucla.edu", "position": "PhD student;PhD student;PhD student;PhD student;Researcher;Associate Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nchen2024atlasd,\ntitle={Atlas3D: Physically Constrained Self-Supporting Text-to-3D for Simulation and Fabrication},\nauthor={Yunuo Chen and Tianyi Xie and Zeshun Zong and Xuan Li and Feng Gao and Yin Yang and Ying Nian Wu and Chenfanfu Jiang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=5x69CL2w3F}\n}", "github": "", "reviewers": "iQck;MwTq;XTuH;Nrjn", "pdf_size": 19447230, "rating": "5;5;6;8", "confidence": "4;3;4;4", "soundness": "2;3;3;4", "novelty": "2;2;3;4", "presentation": "3;3;3;4", "wc_summary": "74;79;73;101", "wc_strengths": "32;29;61;109", "wc_weaknesses": "215;166;152;47", "wc_questions": "93;73;12;26", "wc_limitations": "26;6;2;53", "wc_review": "440;353;300;336", "wc_reply_reviewers": "15;50;0;11", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 81.75, 11.344051304538427 ], "wc_strengths_avg": [ 57.75, 32.119892590106836 ], "wc_weaknesses_avg": [ 145.0, 61.22499489587565 ], "wc_questions_avg": [ 51.0, 33.1436268383531 ], "wc_limitations_avg": [ 21.75, 20.20365066021485 ], "wc_review_avg": [ 357.25, 51.46540099911785 ], "wc_reply_reviewers_avg": [ 19.0, 18.721645226849056 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.4714045207910316, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10433248999397065995&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "ucla.edu;ucla.edu;ucla.edu;ucla.edu;amazon.com;utah.edu;ucla.edu;stat.ucla.edu", "author_num": 8, "aff_unique_index": "0;0;0;0;1;2;0;0", "aff_unique_norm": "University of California, Los Angeles;Amazon;University of Utah", "aff_unique_dep": ";Amazon.com, Inc.;", "aff_unique_url": "https://www.ucla.edu;https://www.amazon.com;https://www.utah.edu", "aff_unique_abbr": "UCLA;Amazon;Utah", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Federated Model Heterogeneous Matryoshka Representation Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96556", "id": "5yboFMpvHf", "proceeding": "", "pdf": "https://openreview.net/pdf?id=5yboFMpvHf", "openreview": "https://openreview.net/forum?id=5yboFMpvHf", "poster": "/media/PosterPDFs/NeurIPS%202024/96556.png?t=1729529358.9012094", "project": "", "author_site": "Liping Yi, Han Yu, Chao Ren, Gang Wang, xiaoguang Liu, Xiaoxiao Li", "tldr": "", "abstract": "Model heterogeneous federated learning (MHeteroFL) enables FL clients to collaboratively train models with heterogeneous structures in a distributed fashion. However, existing MHeteroFL methods rely on training loss to transfer knowledge between the client model and the server model, resulting in limited knowledge exchange. To address this limitation, we propose the **Fed**erated model heterogeneous **M**atryoshka **R**epresentation **L**earning (**FedMRL**) approach for supervised learning tasks. It adds an auxiliary small homogeneous model shared by clients with heterogeneous local models. (1) The generalized and personalized representations extracted by the two models' feature extractors are fused by a personalized lightweight representation projector. This step enables representation fusion to adapt to local data distribution. (2) The fused representation is then used to construct Matryoshka representations with multi-dimensional and multi-granular embedded representations learned by the global homogeneous model header and the local heterogeneous model header. This step facilitates multi-perspective representation learning and improves model learning capability. Theoretical analysis shows that FedMRL achieves a $O(1/T)$ non-convex convergence rate. Extensive experiments on benchmark datasets demonstrate its superior model accuracy with low communication and computational costs compared to seven state-of-the-art baselines. It achieves up to 8.48% and 24.94% accuracy improvement compared with the state-of-the-art and the best same-category baseline, respectively.", "keywords": "Model Heterogeneous Federated Learning;Matryoshka Representation Learning", "primary_area": "machine_learning_for_social_sciences", "supplementary_material": "", "author": "Liping Yi;Han Yu;Chao Ren;Gang Wang;xiaoguang Liu;Xiaoxiao Li", "authorids": "~Liping_Yi1;~Han_Yu1;~Chao_Ren2;~Gang_Wang8;~xiaoguang_Liu3;~Xiaoxiao_Li1", "gender": ";M;M;M;;Unspecified", "homepage": "https://lipingyi.github.io/;https://sites.google.com/site/hanyushomepage/home;https://scholar.google.com.sg/citations?user=PqMIN74AAAAJ&hl=zh-CN;https://cc.nankai.edu.cn/2021/0323/c13619a490377/page.htm;;https://xxlya.github.io/", "dblp": "249/7727;35/1096-1;02/4647-6.html;71/4292-1;;71/8042", "google_scholar": "https://scholar.google.com.hk/citations?user=7DqiAM8AAAAJ;https://scholar.google.com.sg/citations?hl=en;https://scholar.google.com.sg/citations?user=PqMIN74AAAAJ;;;sdENOQ4AAAAJ", "orcid": "0000-0001-6236-3673;0000-0001-6893-8650;0000-0001-9096-8792;0000-0003-0387-2501;;", "linkedin": ";;;;;", "or_profile": "~Liping_Yi1;~Han_Yu1;~Chao_Ren2;~Gang_Wang8;~xiaoguang_Liu3;~Xiaoxiao_Li1", "aff": "Nankai University;Nanyang Technological University;Nanyang Technological University;Nankai University;;University of British Columbia", "aff_domain": "nankai.edu.cn;ntu.edu.sg;ntu.edu.sg;nankai.edu.cn;;ece.ubc.ca", "position": "PhD student;Associate Professor;Principal Researcher;Full Professor;;Assistant Professor", "bibtex": "@inproceedings{\nyi2024federated,\ntitle={Federated Model Heterogeneous Matryoshka Representation Learning},\nauthor={Liping Yi and Han Yu and Chao Ren and Gang Wang and xiaoguang Liu and Xiaoxiao Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=5yboFMpvHf}\n}", "github": "", "reviewers": "b3UH;NjYm;DqWB;6tni", "pdf_size": 1018696, "rating": "5;6;6;7", "confidence": "4;4;5;4", "soundness": "2;3;4;4", "novelty": "3;2;3;4", "presentation": "3;3;4;4", "wc_summary": "81;55;20;72", "wc_strengths": "64;55;38;41", "wc_weaknesses": "414;126;84;30", "wc_questions": "2;44;50;4", "wc_limitations": "6;18;1;10", "wc_review": "567;298;193;157", "wc_reply_reviewers": "25;15;0;9", "wc_reply_authors": "31;17;0;16", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;1;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 57.0, 23.313086453749534 ], "wc_strengths_avg": [ 49.5, 10.547511554864494 ], "wc_weaknesses_avg": [ 163.5, 148.5757382616691 ], "wc_questions_avg": [ 25.0, 22.11334438749598 ], "wc_limitations_avg": [ 8.75, 6.219927652312364 ], "wc_review_avg": [ 303.75, 160.57299741861954 ], "wc_reply_reviewers_avg": [ 12.25, 9.093266739736606 ], "wc_reply_authors_avg": [ 16.0, 10.977249200050075 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15494524130688573903&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "nankai.edu.cn;ntu.edu.sg;ntu.edu.sg;nankai.edu.cn;;ece.ubc.ca", "author_num": 6, "aff_unique_index": "0;1;1;0;2", "aff_unique_norm": "Nankai University;Nanyang Technological University;University of British Columbia", "aff_unique_dep": ";;", "aff_unique_url": "http://www.nankai.edu.cn;https://www.ntu.edu.sg;https://www.ubc.ca", "aff_unique_abbr": "NKU;NTU;UBC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0;2", "aff_country_unique": "China;Singapore;Canada" }, { "title": "VASA-1: Lifelike Audio-Driven Talking Faces Generated in Real Time", "status": "Oral", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96555", "id": "5zSCSE0k41", "proceeding": "", "pdf": "https://openreview.net/pdf?id=5zSCSE0k41", "openreview": "https://openreview.net/forum?id=5zSCSE0k41", "poster": "", "project": "", "author_site": "Sicheng Xu, Guojun Chen, Yu-Xiao Guo, Jiaolong Yang, Chong Li, Zhenyu Zang, Yizhong Zhang, Xin Tong, Baining Guo", "tldr": "", "abstract": "We introduce VASA, a framework for generating lifelike talking faces with appealing visual affective skills (VAS) given a single static image and a speech audio clip. Our premiere model, VASA-1, is capable of not only generating lip movements that are exquisitely synchronized with the audio, but also producing a large spectrum of facial nuances and natural head motions that contribute to the perception of authenticity and liveliness. \nThe core innovations include a diffusion-based holistic facial dynamics and head movement generation model that works in a face latent space, and the development of such an expressive and disentangled face latent space using videos.\nThrough extensive experiments including evaluation on a set of new metrics, we show that our method significantly outperforms previous methods along various dimensions comprehensively. Our method delivers high video quality with realistic facial and head dynamics and also supports the online generation of 512$\\times$512 videos at up to 40 FPS with negligible starting latency.\nIt paves the way for real-time engagements with lifelike avatars that emulate human conversational behaviors.", "keywords": "talking face; face video generation;", "primary_area": "generative_models", "supplementary_material": "/attachment/63285ed4bde497d042637771a3b90b8f9032ccef.zip", "author": "Sicheng Xu;Guojun Chen;Yu-Xiao Guo;Jiaolong Yang;Chong Li;Zhenyu Zang;Yizhong Zhang;Xin Tong;Baining Guo", "authorids": "~Sicheng_Xu1;~Guojun_Chen1;~Yu-Xiao_Guo1;~Jiaolong_Yang3;~Chong_Li8;~Zhenyu_Zang1;~Yizhong_Zhang1;~Xin_Tong1;~Baining_Guo1", "gender": "M;M;M;Not Specified;M;M;M;M;M", "homepage": ";;;http://jlyang.org/;;https://github.com/zhenyu-zang;https://yizhongzhang1989.github.io/;https://www.microsoft.com/en-us/research/people/xtong/;https://www.microsoft.com/en-us/research/people/bainguo/", "dblp": "238/0224;;22/329-1;121/6218;;;;86/2176-1;", "google_scholar": "RKudwboAAAAJ;;FCHG5J0AAAAJ;GuqoolgAAAAJ;https://scholar.google.com/citations?hl=zh-CN;;;P91a-UQAAAAJ;h4kYmRYAAAAJ", "orcid": ";;;;;;;0000-0001-8788-2453;", "linkedin": ";guojun-chen-01615b38/;;;;;;xin-tong-8892039/;", "or_profile": "~Sicheng_Xu1;~Guojun_Chen1;~Yu-Xiao_Guo1;~Jiaolong_Yang3;~Chong_Li8;~Zhenyu_Zang1;~Yizhong_Zhang1;~Xin_Tong1;~Baining_Guo1", "aff": "Microsoft;Microsoft Research;Microsoft Research Asia;Microsoft;Microsoft;Microsoft Research;Microsoft;Microsoft Research Asia;Microsoft Research", "aff_domain": "microsoft.com;research.microsoft.com;microsoft.com;microsoft.com;microsoft.com;research.microsoft.com;microsoft.com;microsoft.com;microsoft.com", "position": "Researcher;Researcher;Researcher;Researcher;Researcher;Researcher;Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\nxu2024vasa,\ntitle={{VASA}-1: Lifelike Audio-Driven Talking Faces Generated in Real Time},\nauthor={Sicheng Xu and Guojun Chen and Yu-Xiao Guo and Jiaolong Yang and Chong Li and Zhenyu Zang and Yizhong Zhang and Xin Tong and Baining Guo},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=5zSCSE0k41}\n}", "github": "", "reviewers": "xKzZ;wMXj;WJD9;1diB", "pdf_size": 17489264, "rating": "7;7;7;8", "confidence": "5;4;4;3", "soundness": "3;4;4;3", "novelty": "2;3;3;3", "presentation": "4;1;4;3", "wc_summary": "109;56;51;39", "wc_strengths": "76;68;58;57", "wc_weaknesses": "478;77;85;52", "wc_questions": "36;64;70;81", "wc_limitations": "8;9;1;88", "wc_review": "707;274;265;317", "wc_reply_reviewers": "84;0;77;46", "wc_reply_authors": "138;0;19;77", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;2;2", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 1.224744871391589 ], "wc_summary_avg": [ 63.75, 26.845623479442605 ], "wc_strengths_avg": [ 64.75, 7.790218225441442 ], "wc_weaknesses_avg": [ 173.0, 176.51203924945176 ], "wc_questions_avg": [ 62.75, 16.60383991732033 ], "wc_limitations_avg": [ 26.5, 35.64056677439347 ], "wc_review_avg": [ 390.75, 183.64146454436698 ], "wc_reply_reviewers_avg": [ 51.75, 33.12382073372575 ], "wc_reply_authors_avg": [ 58.5, 53.956000593075835 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 92, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1400769027696158938&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "microsoft.com;research.microsoft.com;microsoft.com;microsoft.com;microsoft.com;research.microsoft.com;microsoft.com;microsoft.com;microsoft.com", "author_num": 9, "aff_unique_index": "0;0;0;0;0;0;0;0;0", "aff_unique_norm": "Microsoft", "aff_unique_dep": "Microsoft Corporation", "aff_unique_url": "https://www.microsoft.com", "aff_unique_abbr": "Microsoft", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;0;1;0;0;0;0;1;0", "aff_country_unique": "United States;China" }, { "title": "ECLipsE: Efficient Compositional Lipschitz Constant Estimation for Deep Neural Networks", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96554", "id": "61YYSy078Z", "proceeding": "", "pdf": "https://openreview.net/pdf?id=61YYSy078Z", "openreview": "https://openreview.net/forum?id=61YYSy078Z", "poster": "/media/PosterPDFs/NeurIPS%202024/96554.png?t=1731552107.4496884", "project": "", "author_site": "Yuezhu Xu, S Sivaranjani", "tldr": "", "abstract": "The Lipschitz constant plays a crucial role in certifying the robustness of neural networks to input perturbations. Since calculating the exact Lipschitz constant is NP-hard, efforts have been made to obtain tight upper bounds on the Lipschitz constant. Typically, this involves solving a large matrix verification problem, the computational cost of which grows significantly for both deeper and wider networks. In this paper, we provide a compositional approach to estimate Lipschitz constants for deep feed-forward neural networks. We first obtain an exact decomposition of the large matrix verification problem into smaller sub-problems. Then, leveraging the underlying cascade structure of the network, we develop two algorithms. The first algorithm explores the geometric features of the problem and enables us to provide Lipschitz estimates that are comparable to existing methods by solving small semidefinite programs (SDPs) that are only as large as the size of each layer. The second algorithm relaxes these sub-problems and provides a closed-form solution to each sub-problem for extremely fast estimation, altogether eliminating the need to solve SDPs. The two algorithms represent different levels of trade-offs between efficiency and accuracy. Finally, we demonstrate that our approach provides a steep reduction in computation time (as much as several thousand times faster, depending on the algorithm for deeper networks) while yielding Lipschitz bounds that are very close to or even better than those achieved by state-of-the-art approaches in a broad range of experiments. In summary, our approach considerably advances the scalability and efficiency of certifying neural network robustness, making it particularly attractive for online learning tasks.", "keywords": "Neural Networks;Lipschitz Constant;Robustness", "primary_area": "learning_theory", "supplementary_material": "/attachment/833ae1bc1ad44fa0f344535ba880c259dbb023c6.zip", "author": "Yuezhu Xu;S Sivaranjani", "authorids": "~Yuezhu_Xu1;~S_Sivaranjani1", "gender": "F;", "homepage": ";", "dblp": ";", "google_scholar": ";", "orcid": "0009-0003-1160-3536;", "linkedin": ";", "or_profile": "~Yuezhu_Xu1;~S_Sivaranjani1", "aff": "Purdue University;", "aff_domain": "purdue.edu;", "position": "PhD student;", "bibtex": "@inproceedings{\nxu2024eclipse,\ntitle={{ECL}ipsE: Efficient Compositional Lipschitz Constant Estimation for Deep Neural Networks},\nauthor={Yuezhu Xu and S Sivaranjani},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=61YYSy078Z}\n}", "github": "", "reviewers": "3iDD;LaVm;kXCT;kbMy;qsZt", "pdf_size": 1465995, "rating": "6;6;6;7;8", "confidence": "3;3;4;4;4", "soundness": "3;3;3;3;4", "novelty": "3;3;3;3;4", "presentation": "2;3;3;2;3", "wc_summary": "89;239;122;60;67", "wc_strengths": "52;102;104;47;83", "wc_weaknesses": "171;135;630;197;125", "wc_questions": "104;2;58;36;87", "wc_limitations": "5;1;80;17;4", "wc_review": "421;479;994;357;366", "wc_reply_reviewers": "18;70;146;65;39", "wc_reply_authors": "18;58;66;33;21", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 6.6, 0.8 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 115.4, 65.4663272224737 ], "wc_strengths_avg": [ 77.6, 24.137936945812083 ], "wc_weaknesses_avg": [ 251.6, 190.93831464637995 ], "wc_questions_avg": [ 57.4, 36.263480252176564 ], "wc_limitations_avg": [ 21.4, 29.803355515780435 ], "wc_review_avg": [ 523.4, 239.32622087853224 ], "wc_reply_reviewers_avg": [ 67.6, 43.44467746456406 ], "wc_reply_authors_avg": [ 39.2, 19.446336415890784 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.6123724356957947, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14458658558260231683&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "purdue.edu;", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Purdue University", "aff_unique_dep": "", "aff_unique_url": "https://www.purdue.edu", "aff_unique_abbr": "Purdue", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "One-shot Federated Learning via Synthetic Distiller-Distillate Communication", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96553", "id": "6292sp7HiE", "proceeding": "", "pdf": "https://openreview.net/pdf?id=6292sp7HiE", "openreview": "https://openreview.net/forum?id=6292sp7HiE", "poster": "/media/PosterPDFs/NeurIPS%202024/96553.png?t=1729523347.982348", "project": "", "author_site": "Junyuan Zhang, Songhua Liu, Xinchao Wang", "tldr": "", "abstract": "One-shot Federated learning (FL) is a powerful technology facilitating collaborative training of machine learning models in a single round of communication. While its superiority lies in communication efficiency and privacy preservation compared to iterative FL, one-shot FL often compromises model performance. Prior research has primarily focused on employing data-free knowledge distillation to optimize data generators and ensemble models for better aggregating local knowledge into the server model. However, these methods typically struggle with data heterogeneity, where inconsistent local data distributions can cause teachers to provide misleading knowledge. Additionally, they may encounter scalability issues with complex datasets due to inherent two-step information loss: first, during local training (from data to model), and second, when transferring knowledge to the server model (from model to inversed data). In this paper, we propose FedSD2C, a novel and practical one-shot FL framework designed to address these challenges. FedSD2C introduces a distiller to synthesize informative distillates directly from local data to reduce information loss and proposes sharing synthetic distillates instead of inconsistent local models to tackle data heterogeneity. Our empirical results demonstrate that FedSD2C consistently outperforms other one-shot FL methods with more complex and real datasets, achieving up to 2.6 $\\times$ the performance of the best baseline. Code: https://github.com/Carkham/FedSD2C", "keywords": "Federated Learning;Dataset Distillation", "primary_area": "other", "supplementary_material": "/attachment/5ea4cc8f1784036c3ea8f5c6a0ecd911ccdf0a4a.zip", "author": "Junyuan Zhang;Songhua Liu;Xinchao Wang", "authorids": "~Junyuan_Zhang2;~Songhua_Liu2;~Xinchao_Wang1", "gender": "M;M;M", "homepage": "https://github.com/Carkham;http://121.37.94.87;https://sites.google.com/site/sitexinchaowang/", "dblp": "135/9113;42/8978;", "google_scholar": "uwwqEg8AAAAJ;AnYh2rAAAAAJ;https://scholar.google.com.tw/citations?user=w69Buq0AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Junyuan_Zhang2;~Songhua_Liu2;~Xinchao_WANG3", "aff": "Beihang University;National University of Singapore;National University of Singapore", "aff_domain": "buaa.edu.cn;u.nus.edu;nus.edu", "position": "Undergrad student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nzhang2024oneshot,\ntitle={One-shot Federated Learning via Synthetic Distiller-Distillate Communication},\nauthor={Junyuan Zhang and Songhua Liu and Xinchao Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=6292sp7HiE}\n}", "github": "", "reviewers": "Bcom;3gL8;EquD;nVDn", "pdf_size": 1364123, "rating": "4;4;5;5", "confidence": "5;3;4;4", "soundness": "3;2;2;2", "novelty": "2;2;2;2", "presentation": "2;3;2;2", "wc_summary": "92;56;57;86", "wc_strengths": "27;16;45;51", "wc_weaknesses": "298;80;104;231", "wc_questions": "2;103;59;115", "wc_limitations": "2;3;14;5", "wc_review": "421;258;279;488", "wc_reply_reviewers": "100;227;13;48", "wc_reply_authors": "240;436;66;204", "reply_reviewers": "1;1;1;1", "reply_authors": "4;3;3;3", "rating_avg": [ 4.5, 0.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 72.75, 16.391689967785506 ], "wc_strengths_avg": [ 34.75, 13.970952007647869 ], "wc_weaknesses_avg": [ 178.25, 89.84535324656473 ], "wc_questions_avg": [ 69.75, 44.324795543803695 ], "wc_limitations_avg": [ 6.0, 4.743416490252569 ], "wc_review_avg": [ 361.5, 96.2561686334959 ], "wc_reply_reviewers_avg": [ 97.0, 81.18805330835319 ], "wc_reply_authors_avg": [ 236.5, 132.22991340842663 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.25, 0.4330127018922193 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12597653347336218898&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "buaa.edu.cn;u.nus.edu;nus.edu", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Beihang University;National University of Singapore", "aff_unique_dep": ";", "aff_unique_url": "http://www.buaa.edu.cn/;https://www.nus.edu.sg", "aff_unique_abbr": "BUAA;NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "China;Singapore" }, { "title": "Worst-Case Offline Reinforcement Learning with Arbitrary Data Support", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96552", "id": "63VajkIDEu", "proceeding": "", "pdf": "https://openreview.net/pdf?id=63VajkIDEu", "openreview": "https://openreview.net/forum?id=63VajkIDEu", "poster": "/media/PosterPDFs/NeurIPS%202024/96552.png?t=1733708844.3725495", "project": "", "tldr": "", "abstract": "We propose a method of offline reinforcement learning (RL) featuring the performance guarantee without any assumptions on the data support. Under such conditions, estimating or optimizing the conventional performance metric is generally infeasible due to the distributional discrepancy between data and target policy distributions. To address this issue, we employ a worst-case policy value as a new metric and constructively show that the sample complexity bound of $O(\\epsilon^{\u22122})$ is attainable without any data-support conditions, where $\\epsilon>0$ is the policy suboptimality in the new metric. Moreover, as the new metric generalizes the conventional one, the algorithm can address standard offline RL tasks without modification. In this context, our sample complexity bound can be seen as a strict improvement on the previous bounds under the single-policy concentrability and the single-policy realizability.", "keywords": "offline reinforcement learning;worst-case learning;support free;sample complexity analysis", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Kohei Miyaguchi", "authorids": "~Kohei_Miyaguchi1", "gender": "M", "homepage": "https://koheimiya.github.io/about/", "dblp": "172/7749", "google_scholar": "p78Mw3QAAAAJ", "orcid": "", "linkedin": "", "or_profile": "~Kohei_Miyaguchi1", "aff": "International Business Machines", "aff_domain": "ibm.com", "position": "Researcher", "bibtex": "@inproceedings{\nmiyaguchi2024worstcase,\ntitle={Worst-Case Offline Reinforcement Learning with Arbitrary Data Support},\nauthor={Kohei Miyaguchi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=63VajkIDEu}\n}", "github": "", "reviewers": "xcKA;Kpf4;cb9T;AjPn", "pdf_size": 459807, "rating": "6;6;6;7", "confidence": "3;4;3;3", "soundness": "4;3;3;3", "novelty": "3;3;3;4", "presentation": "3;2;2;4", "wc_summary": "118;82;51;91", "wc_strengths": "37;41;36;86", "wc_weaknesses": "71;52;181;75", "wc_questions": "60;23;62;2", "wc_limitations": "25;1;9;21", "wc_review": "311;199;339;275", "wc_reply_reviewers": "20;10;43;20", "wc_reply_authors": "0;0;140;0", "reply_reviewers": "1;1;2;1", "reply_authors": "1;1;2;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 85.5, 23.921747427811372 ], "wc_strengths_avg": [ 50.0, 20.868636754709208 ], "wc_weaknesses_avg": [ 94.75, 50.54886249956571 ], "wc_questions_avg": [ 36.75, 25.370997221236692 ], "wc_limitations_avg": [ 14.0, 9.539392014169456 ], "wc_review_avg": [ 281.0, 52.49761899362675 ], "wc_reply_reviewers_avg": [ 23.25, 12.111461513789324 ], "wc_reply_authors_avg": [ 35.0, 60.6217782649107 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:wo7iTdxlwkIJ:scholar.google.com/&scioq=Worst-Case+Offline+Reinforcement+Learning+with+Arbitrary+Data+Support&hl=en&as_sdt=0,33", "gs_version_total": 0, "email": "ibm.com", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "International Business Machines Corporation", "aff_unique_dep": "", "aff_unique_url": "https://www.ibm.com", "aff_unique_abbr": "IBM", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Fine-grained Image-to-LiDAR Contrastive Distillation with Visual Foundation Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96551", "id": "63xeWav1lU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=63xeWav1lU", "openreview": "https://openreview.net/forum?id=63xeWav1lU", "poster": "/media/PosterPDFs/NeurIPS%202024/96551.png?t=1731252045.3050776", "project": "", "author_site": "Yifan Zhang, Junhui Hou", "tldr": "", "abstract": "Contrastive image-to-LiDAR knowledge transfer, commonly used for learning 3D representations with synchronized images and point clouds, often faces a self-conflict dilemma. This issue arises as contrastive losses unintentionally dissociate features of unmatched points and pixels that share semantic labels, compromising the integrity of learned representations. To overcome this, we harness Visual Foundation Models (VFMs), which have revolutionized the acquisition of pixel-level semantics, to enhance 3D representation learning. Specifically, we utilize off-the-shelf VFMs to generate semantic labels for weakly-supervised pixel-to-point contrastive distillation. Additionally, we employ von Mises-Fisher distributions to structure the feature space, ensuring semantic embeddings within the same class remain consistent across varying inputs. Furthermore, we adapt sampling probabilities of points to address imbalances in spatial distribution and category frequency, promoting comprehensive and balanced learning. Extensive experiments demonstrate that our approach mitigates the challenges posed by traditional methods and consistently surpasses existing image-to-LiDAR contrastive distillation methods in downstream tasks. We have included the code in supplementary materials.", "keywords": "self-supervised learning;cross-modal distillation;3D representation learning", "primary_area": "machine_vision", "supplementary_material": "/attachment/cf4c9b374a47657d4117e6d51127bf9750347205.zip", "author": "Yifan Zhang;Junhui Hou", "authorids": "~Yifan_Zhang15;~Junhui_Hou2", "gender": "M;M", "homepage": "https://github.com/Eaphan;http://www.cityu.edu.hk/stfprofile/csjhhou.htm", "dblp": ";122/2673.html", "google_scholar": ";j6eefhwAAAAJ", "orcid": "0000-0003-0958-9934;0000-0003-3431-2021", "linkedin": ";", "or_profile": "~Yifan_Zhang15;~Junhui_Hou2", "aff": "City University of Hong Kong;City University of Hong Kong", "aff_domain": "cityu.edu.hk;cityu.edu.hk", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nzhang2024finegrained,\ntitle={Fine-grained Image-to-Li{DAR} Contrastive Distillation with Visual Foundation Models},\nauthor={Yifan Zhang and Junhui Hou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=63xeWav1lU}\n}", "github": "", "reviewers": "svRz;54Gn;2WGA;uKMo", "pdf_size": 10619467, "rating": "5;5;5;6", "confidence": "4;4;3;3", "soundness": "3;3;2;3", "novelty": "2;3;2;3", "presentation": "3;3;3;2", "wc_summary": "182;63;81;139", "wc_strengths": "57;136;38;196", "wc_weaknesses": "189;146;51;224", "wc_questions": "197;20;6;27", "wc_limitations": "180;43;8;49", "wc_review": "805;408;184;635", "wc_reply_reviewers": "247;35;22;74", "wc_reply_authors": "1425;77;57;60", "reply_reviewers": "1;1;1;1", "reply_authors": "4;2;2;2", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 116.25, 47.21956691881026 ], "wc_strengths_avg": [ 106.75, 63.29050086703375 ], "wc_weaknesses_avg": [ 152.5, 64.7861867993479 ], "wc_questions_avg": [ 62.5, 78.02083055184686 ], "wc_limitations_avg": [ 70.0, 65.41024384605213 ], "wc_review_avg": [ 508.0, 234.15486328496362 ], "wc_reply_reviewers_avg": [ 94.5, 90.10133184365256 ], "wc_reply_authors_avg": [ 404.75, 589.0909840593387 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14677697434866109717&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "cityu.edu.hk;cityu.edu.hk", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "City University of Hong Kong", "aff_unique_dep": "", "aff_unique_url": "https://www.cityu.edu.hk", "aff_unique_abbr": "CityU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Exploring Molecular Pretraining Model at Scale", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96550", "id": "64V40K2fDv", "proceeding": "", "pdf": "https://openreview.net/pdf?id=64V40K2fDv", "openreview": "https://openreview.net/forum?id=64V40K2fDv", "poster": "/media/PosterPDFs/NeurIPS%202024/96550.png?t=1731660237.5562415", "project": "", "author_site": "xioahong ji, Zhen Wang, Zhifeng Gao, Hang Zheng, Linfeng Zhang, Guolin Ke, Weinan E", "tldr": "", "abstract": "In recent years, pretraining models have made significant advancements in the fields of natural language processing (NLP), computer vision (CV), and life sciences. The significant advancements in NLP and CV are predominantly driven by the expansion of model parameters and data size, a phenomenon now recognized as the scaling laws. However, research exploring scaling law in molecular pretraining model remains unexplored. In this work, we present an innovative molecular pretraining model that leverages a two-track transformer to effectively integrate features at the atomic level, graph level, and geometry structure level. Along with this, we systematically investigate the scaling law within molecular pretraining models, examining the power-law correlations between validation loss and model size, dataset size, and computational resources. Consequently, we successfully scale the model to 1.1 billion parameters through pretraining on 800 million conformations, making it the largest molecular pretraining model to date. Extensive experiments show the consistent improvement on the downstream tasks as the model size grows up. The model with 1.1 billion parameters also outperform over existing methods, achieving an average 27\\% improvement on the QM9 and 14\\% on COMPAS-1D dataset.", "keywords": "Molecular Pretraining;Scaling Law;Molecular Property Prediction", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "/attachment/ded14703c4569f7e85f5d0f6ed9838b02b55ba5e.zip", "author": "Xiaohong Ji;Zhen Wang;Zhifeng Gao;Hang Zheng;Linfeng Zhang;Guolin Ke;Weinan E", "authorids": "~Xiaohong_Ji1;~Zhen_Wang28;~Zhifeng_Gao1;~Hang_Zheng2;~Linfeng_Zhang1;~Guolin_Ke3;~Weinan_E2", "gender": ";M;M;M;M;;", "homepage": ";https://github.com/Wangzhencc;;;;;https://web.math.princeton.edu/~weinan/", "dblp": ";;71/6161;;;;06/9390.html", "google_scholar": ";;uBo3SJcAAAAJ;;;;", "orcid": ";;;0000-0002-2825-0576;0000-0002-8470-5846;;", "linkedin": ";;;;;;", "or_profile": "~Xiaohong_Ji1;~Zhen_Wang28;~Zhifeng_Gao1;~Hang_Zheng2;~Linfeng_Zhang1;~Guolin_Ke3;~Weinan_E2", "aff": ";DP Technology;DP Technology;;DP Technology;;Peking University", "aff_domain": ";dp.tech;dp.tech;;dp.tech;;pku.edu.cn", "position": ";Researcher;Researcher;;Researcher;;Full Professor", "bibtex": "@inproceedings{\nji2024exploring,\ntitle={Exploring Molecular Pretraining Model at Scale},\nauthor={Xiaohong Ji and Zhen Wang and Zhifeng Gao and Hang Zheng and Linfeng Zhang and Guolin Ke and Weinan E},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=64V40K2fDv}\n}", "github": "", "reviewers": "XhCf;buoE;V7dw;ER2p", "pdf_size": 3970964, "rating": "5;5;6;8", "confidence": "4;3;4;3", "soundness": "3;3;3;4", "novelty": "2;2;3;4", "presentation": "2;2;3;3", "wc_summary": "56;61;44;87", "wc_strengths": "29;56;72;58", "wc_weaknesses": "111;205;63;23", "wc_questions": "54;6;61;51", "wc_limitations": "6;64;1;6", "wc_review": "256;392;241;225", "wc_reply_reviewers": "37;65;27;79", "wc_reply_authors": "32;49;47;260", "reply_reviewers": "1;1;1;2", "reply_authors": "2;2;2;3", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 62.0, 15.700318468107582 ], "wc_strengths_avg": [ 53.75, 15.562374497485916 ], "wc_weaknesses_avg": [ 100.5, 67.9025036357276 ], "wc_questions_avg": [ 43.0, 21.66794868002045 ], "wc_limitations_avg": [ 19.25, 25.916934618121797 ], "wc_review_avg": [ 278.5, 66.43982239590952 ], "wc_reply_reviewers_avg": [ 52.0, 20.904544960366874 ], "wc_reply_authors_avg": [ 97.0, 94.33716128864594 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.40824829046386296, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15007555246746602942&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": ";dp.tech;dp.tech;;dp.tech;;pku.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "DP Technology;Peking University", "aff_unique_dep": ";", "aff_unique_url": ";http://www.pku.edu.cn", "aff_unique_abbr": ";Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1", "aff_country_unique": ";China" }, { "title": "ClevrSkills: Compositional Language And Visual Reasoning in Robotics", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97843", "id": "64sZtFSOh6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=64sZtFSOh6", "openreview": "https://openreview.net/forum?id=64sZtFSOh6", "poster": "", "project": "", "author_site": "Sanjay Haresh, Daniel Dijkman, Apratim Bhattacharyya, Roland Memisevic", "tldr": "", "abstract": "Robotics tasks are highly compositional by nature. For example, to perform a high-level task like cleaning the table a robot must employ low-level capabilities of moving the effectors to the objects on the table, pick them up and then move them off the table one-by-one, while re-evaluating the consequently dynamic scenario in the process. Given that large vision language models (VLMs) have shown progress on many tasks that require high level, human-like reasoning, we ask the question: if the models are taught the requisite low-level capabilities, can they compose them in novel ways to achieve interesting high-level tasks like cleaning the table without having to be explicitly taught so? To this end, we present ClevrSkills - a benchmark suite for compositional reasoning in robotics. ClevrSkills is an environment suite developed on top of the ManiSkill2 simulator and an accompanying dataset. The dataset contains trajectories generated on a range of robotics tasks with language and visual annotations as well as multi-modal prompts as task specification. The suite includes a curriculum of tasks with three levels of compositional understanding, starting with simple tasks requiring basic motor skills. We benchmark multiple different VLM baselines on ClevrSkills and show that even after being pre-trained on large numbers of tasks, these models fail on compositional reasoning in robotics tasks.", "keywords": "robotics;robot learning;compositional generalization;compositional learning;vision language models", "primary_area": "", "supplementary_material": "", "author": "Sanjay Haresh;Daniel Dijkman;Apratim Bhattacharyya;Roland Memisevic", "authorids": "~Sanjay_Haresh1;~Daniel_Dijkman1;~Apratim_Bhattacharyya1;~Roland_Memisevic1", "gender": "M;;M;M", "homepage": "https://www.sanjayharesh.com/;;https://www.mpi-inf.mpg.de/departments/computer-vision-and-multimodal-computing/people/apratim-bhattacharyya/;", "dblp": "253/0473;;180/5968;98/4508", "google_scholar": "https://scholar.google.com/citations?hl=en;;https://scholar.google.de/citations?user=SKb4VyUAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Sanjay_Haresh1;~Daniel_Dijkman1;~Apratim_Bhattacharyya1;~Roland_Memisevic1", "aff": "Qualcomm, Inc.;;Qualcomm Technologies, Inc.;Qualcomm Inc, Qualcomm", "aff_domain": "qti.qualcomm.com;;qualcomm.com;qti.qualcomm.com", "position": "Researcher;;Researcher;Researcher", "bibtex": "@inproceedings{\nharesh2024clevrskills,\ntitle={ClevrSkills: Compositional Language And Visual Reasoning in Robotics},\nauthor={Sanjay Haresh and Daniel Dijkman and Apratim Bhattacharyya and Roland Memisevic},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=64sZtFSOh6}\n}", "github": "", "reviewers": "4Lvz;gHR3;aMj2;ngC5", "pdf_size": 19903279, "rating": "5;7;8;9", "confidence": "3;4;4;4", "wc_summary_and_contributions": "41;27;48;29", "wc_strengths": "30;23;54;50", "wc_improvement": "48;84;42;1", "wc_limitations": "27;1;24;1", "wc_correctness": "10;1;1;1", "wc_clarity": "18;1;6;1", "wc_relation_to_prior_work": "1;1;1;1", "wc_documentation": "8;3;1;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "184;142;178;86", "wc_reply_reviewers": "69;0;27;30", "wc_reply_authors": "95;50;25;0", "reply_reviewers": "2;0;1;1", "reply_authors": "3;2;2;1", "rating_avg": [ 7.25, 1.479019945774904 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 36.25, 8.642193008721803 ], "wc_strengths_avg": [ 39.25, 13.06474263045392 ], "wc_improvement_avg": [ 43.75, 29.448047473474364 ], "wc_limitations_avg": [ 13.25, 12.295832627357937 ], "wc_correctness_avg": [ 3.25, 3.897114317029974 ], "wc_clarity_avg": [ 6.5, 6.946221994724902 ], "wc_relation_to_prior_work_avg": [ 1.0, 0.0 ], "wc_documentation_avg": [ 3.25, 2.8613807855648994 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 147.5, 38.97114317029974 ], "wc_reply_reviewers_avg": [ 31.5, 24.60182920028509 ], "wc_reply_authors_avg": [ 42.5, 35.0891721190455 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8783100656536799, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2243560410939897485&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "qti.qualcomm.com;;qualcomm.com;qti.qualcomm.com", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "Qualcomm, Inc.;Qualcomm Technologies;Qualcomm Incorporated", "aff_unique_dep": ";Inc.;", "aff_unique_url": "https://www.qualcomm.com;https://www.qualcomm.com;https://www.qualcomm.com", "aff_unique_abbr": "Qualcomm;QTI;Qualcomm", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "SeTAR: Out-of-Distribution Detection with Selective Low-Rank Approximation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96549", "id": "65UoJ0z7Kp", "proceeding": "", "pdf": "https://openreview.net/pdf?id=65UoJ0z7Kp", "openreview": "https://openreview.net/forum?id=65UoJ0z7Kp", "poster": "/media/PosterPDFs/NeurIPS%202024/96549.png?t=1730255975.4090505", "project": "", "author_site": "Yixia Li, Boya Xiong, Guanhua Chen, Yun Chen", "tldr": "", "abstract": "Out-of-distribution (OOD) detection is crucial for the safe deployment of neural networks. Existing CLIP-based approaches perform OOD detection by devising novel scoring functions or sophisticated fine-tuning methods. In this work, we propose SeTAR, a novel, training-free OOD detection method that leverages selective low-rank approximation of weight matrices in vision-language and vision-only models. SeTAR enhances OOD detection via post-hoc modification of the model's weight matrices using a simple greedy search algorithm. Based on SeTAR, we further propose SeTAR+FT, a fine-tuning extension optimizing model performance for OOD detection tasks. Extensive evaluations on ImageNet1K and Pascal-VOC benchmarks show SeTAR's superior performance, reducing the relatively false positive rate by up to 18.95\\% and 36.80\\% compared to zero-shot and fine-tuning baselines. Ablation studies further validate our approach's effectiveness, robustness, and generalizability across different model backbones. Our work offers a scalable, efficient solution for OOD detection, setting a new state-of-the-art in this area.", "keywords": "Out-of-distribution detection;CLIP;low-rank approximation;trustworthy AI", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Yixia Li;Boya Xiong;Guanhua Chen;Yun Chen", "authorids": "~Yixia_Li1;~Boya_Xiong1;~Guanhua_Chen1;~Yun_Chen1", "gender": "M;M;M;F", "homepage": "https://liyixia.me;;https://ghchen.me;https://yunc.me/", "dblp": "257/2679;359/6250;85/3682-1;10/5680-7", "google_scholar": "LrYjRNYAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;vXd0JQMAAAAJ", "orcid": "0000-0002-0921-7551;;0000-0002-5353-9734;0000-0002-3563-7592", "linkedin": "liyixia/;%E5%8D%9A%E9%9B%85-%E7%86%8A-b6343927b?lipi=urn%3Ali%3Apage%3Ad_flagship3_profile_view_base_contact_details%3B7VunpjPCQFmV9nVAxWR2SA%3D%3D;;", "or_profile": "~Yixia_Li1;~Boya_Xiong1;~Guanhua_Chen1;~Yun_Chen1", "aff": "Southern University of Science and Technology ;Shanghai University of Finance and Economics;Southern University of Science and Technology;Shanghai University of Finance and Economics", "aff_domain": "sustech.edu;sufe.edu;sustech.edu.cn;sufe.edu.cn", "position": "PhD student;MS student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nli2024setar,\ntitle={Se{TAR}: Out-of-Distribution Detection with Selective Low-Rank Approximation},\nauthor={Yixia Li and Boya Xiong and Guanhua Chen and Yun Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=65UoJ0z7Kp}\n}", "github": "", "reviewers": "6fvA;SyYP;hoZJ;aAbP", "pdf_size": 851080, "rating": "5;5;6;6", "confidence": "4;3;4;4", "soundness": "3;3;2;4", "novelty": "2;3;2;4", "presentation": "2;3;3;4", "wc_summary": "57;59;58;84", "wc_strengths": "54;23;13;136", "wc_weaknesses": "111;128;256;9", "wc_questions": "4;52;2;183", "wc_limitations": "1;12;1;4", "wc_review": "227;274;330;416", "wc_reply_reviewers": "199;170;80;156", "wc_reply_authors": "1003;1631;73;466", "reply_reviewers": "3;3;1;2", "reply_authors": "5;6;2;3", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 64.5, 11.280514172678478 ], "wc_strengths_avg": [ 56.5, 48.32442446630896 ], "wc_weaknesses_avg": [ 126.0, 87.77528125844998 ], "wc_questions_avg": [ 60.25, 73.64229423368069 ], "wc_limitations_avg": [ 4.5, 4.5 ], "wc_review_avg": [ 311.75, 70.37178056579214 ], "wc_reply_reviewers_avg": [ 151.25, 43.96234183935155 ], "wc_reply_authors_avg": [ 793.25, 585.5921682365638 ], "reply_reviewers_avg": [ 2.25, 0.82915619758885 ], "reply_authors_avg": [ 4.0, 1.5811388300841898 ], "replies_avg": [ 31, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4074505499140498926&as_sdt=20000005&sciodt=0,21&hl=en", "gs_version_total": 5, "email": "sustech.edu;sufe.edu;sustech.edu.cn;sufe.edu.cn", "author_num": 4, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "Southern University of Science and Technology;Shanghai University of Finance and Economics", "aff_unique_dep": ";", "aff_unique_url": "https://www.sustech.edu.cn;http://www.sufe.edu.cn", "aff_unique_abbr": "SUSTech;SUFE", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Locating What You Need: Towards Adapting Diffusion Models to OOD Concepts In-the-Wild", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96548", "id": "65htepluYE", "proceeding": "", "pdf": "https://openreview.net/pdf?id=65htepluYE", "openreview": "https://openreview.net/forum?id=65htepluYE", "poster": "/media/PosterPDFs/NeurIPS%202024/96548.png?t=1729864357.671999", "project": "", "author_site": "Jianan Yang, Chenchao Gao, Zhiqing Xiao, Junbo Zhao, Sai Wu, Gang Chen, Haobo Wang", "tldr": "", "abstract": "The recent large-scale text-to-image generative models have attained unprecedented performance, while people established *adaptor* modules like LoRA and DreamBooth to extend this performance to even more unseen concept tokens. However, we empirically find that this workflow often fails to accurately depict the *out-of-distribution* concepts. This failure is highly related to the low quality of training data. To resolve this, we present a framework called Controllable Adaptor Towards Out-of-Distribution Concepts (CATOD). Our framework follows the active learning paradigm which includes high-quality data accumulation and adaptor training, enabling a finer-grained enhancement of generative results. The *aesthetics* score and *concept-matching* score are two major factors that impact the quality of synthetic results. One key component of CATOD is the weighted scoring system that automatically balances between these two scores and we also offer comprehensive theoretical analysis for this point. Then, it determines how to select data and schedule the adaptor training based on this scoring system. The extensive results show that CATOD significantly outperforms the prior approaches with an 11.10 boost on the CLIP score and a 33.08% decrease on the CMMD metric.", "keywords": "Image Genearation;Text-to-Image;Diffusion;Out-of-Distribution;Active Learning", "primary_area": "generative_models", "supplementary_material": "/attachment/2568a40fa903deb1329aae8700762a1df6f23ea7.zip", "author": "Jianan Yang;Chenchao Gao;Zhiqing Xiao;Junbo Zhao;Sai Wu;Gang Chen;Haobo Wang", "authorids": "~Jianan_Yang1;~Chenchao_Gao1;~Zhiqing_Xiao1;~Junbo_Zhao1;~Sai_Wu2;~Gang_Chen6;~Haobo_Wang1", "gender": "M;M;Not Specified;M;M;M;M", "homepage": "https://dilab-zju.github.io/;https://blog.csdn.net/ganlesama;https://github.com/CrownX;http://jakezhao.net/;https://person.zju.edu.cn/0011057;;https://hbzju.github.io/", "dblp": ";;156/5282;191/6665;30/1186.html;67/6383-1;", "google_scholar": "HsIdjy0AAAAJ;;cq3BcWsAAAAJ;8ipao8MAAAAJ;RMaqDKAAAAAJ;;DnN-rggAAAAJ", "orcid": "0009-0000-5668-661X;;0009-0007-4889-644X;;;0000-0002-7483-0045;0000-0001-8586-3048", "linkedin": ";;;;;;", "or_profile": "~Jianan_Yang1;~Chenchao_Gao1;~Zhiqing_Xiao1;~Junbo_Zhao1;~Sai_Wu2;~Gang_Chen6;~Haobo_Wang1", "aff": "Zhejiang University, Tsinghua University;Dalian University of Technology;Yale University;Zhejiang University;Zhejiang University;College of Computer Science and Technology, Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;dlut.edu.cn;yale.edu;zju.edu.cn;zju.edu.cn;cs.zju.edu.cn;zju.edu.cn", "position": "PhD student;Undergrad student;Intern;Assistant Professor;Full Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nyang2024locating,\ntitle={Locating What You Need: Towards Adapting Diffusion Models to {OOD} Concepts In-the-Wild},\nauthor={Jianan Yang and Chenchao Gao and Zhiqing Xiao and Junbo Zhao and Sai Wu and Gang Chen and Haobo Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=65htepluYE}\n}", "github": "", "reviewers": "yLUC;bHo6;uVj3;PRf3", "pdf_size": 16339216, "rating": "5;6;7;7", "confidence": "2;5;4;4", "soundness": "3;3;3;3", "novelty": "3;3;3;4", "presentation": "2;3;3;4", "wc_summary": "59;95;93;64", "wc_strengths": "67;82;61;56", "wc_weaknesses": "35;100;72;146", "wc_questions": "41;64;160;374", "wc_limitations": "38;2;7;1", "wc_review": "240;343;393;641", "wc_reply_reviewers": "0;0;0;19", "wc_reply_authors": "0;0;450;1160", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;2;3", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 77.75, 16.361158271956175 ], "wc_strengths_avg": [ 66.5, 9.759610647971568 ], "wc_weaknesses_avg": [ 88.25, 40.5362492098122 ], "wc_questions_avg": [ 159.75, 131.50356459047032 ], "wc_limitations_avg": [ 12.0, 15.182226450688976 ], "wc_review_avg": [ 404.25, 147.39975407035115 ], "wc_reply_reviewers_avg": [ 4.75, 8.227241335952167 ], "wc_reply_authors_avg": [ 402.5, 474.3614128488952 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.6225430174794673, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:zAEU80DeVekJ:scholar.google.com/&scioq=Locating+What+You+Need:+Towards+Adapting+Diffusion+Models+to+OOD+Concepts+In-the-Wild&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "zju.edu.cn;dlut.edu.cn;yale.edu;zju.edu.cn;zju.edu.cn;cs.zju.edu.cn;zju.edu.cn", "author_num": 7, "aff_unique_index": "0;1;2;0;0;0;0", "aff_unique_norm": "Zhejiang University;Dalian University of Technology;Yale University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.zju.edu.cn;http://www.dlut.edu.cn/;https://www.yale.edu", "aff_unique_abbr": "ZJU;DUT;Yale", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;0;0", "aff_country_unique": "China;United States" }, { "id": "66PcEzkf95", "title": "Consent in Crisis: The Rapid Decline of the AI Data Commons", "track": "Datasets & Benchmarks", "status": "Poster", "tldr": "", "abstract": "General-purpose artificial intelligence (AI) systems are built on massive swathes of public web data, assembled into corpora such as C4, RefinedWeb, and Dolma. To our knowledge, we conduct the first, large-scale, longitudinal audit of the consent protocols for the web domains underlying AI training corpora. Our audit of 14,000 web domains provides an expansive view of crawlable web data and how codified data use preferences are changing over time. We observe a proliferation of AI-specific clauses to limit use, acute differences in restrictions on AI developers, as well as general inconsistencies between websites' expressed intentions in their Terms of Service and their robots.txt. We diagnose these as symptoms of ineffective web protocols, not designed to cope with the widespread re-purposing of the internet for AI. Our longitudinal analyses show that in a single year (2023-2024) there has been a rapid crescendo of data restrictions from web sources, rendering ~5\\%+ of all tokens in C4, or 28%+ of the most actively maintained, critical sources in C4, fully restricted from use. For Terms of Service crawling restrictions, a full 45% of C4 is now restricted. If respected or enforced, these restrictions are rapidly biasing the diversity, freshness, and scaling laws for general-purpose AI systems. We hope to illustrate the emerging crises in data consent, for both developers and creators. The foreclosure of much of the open web will impact not only commercial AI, but also non-commercial AI and academic research.", "keywords": "Training Data;Audits;Text", "primary_area": "", "supplementary_material": "", "author": "Shayne Longpre;Robert Mahari;Ariel N. Lee;Campbell S. Lund;Hamidah Oderinwale;William Brannon;Nayan Saxena;Naana Obeng-Marnu;Tobin South;Cole J Hunter;Kevin Klyman;Christopher Klamm;Hailey Schoelkopf;Nikhil Singh;Manuel Cherep;Ahmad Mustafa Anis;An Dinh;Caroline Shamiso Chitongo;Da Yin;Damien Sileo;Deividas Mataciunas;Diganta Misra;Emad A. Alghamdi;Enrico Shippole;Jianguo Zhang;Joanna Materzynska;Kun Qian;Kushagra Tiwary;Lester James Validad Miranda;Manan Dey;Minnie Liang;Mohammed Hamdy;Niklas Muennighoff;Seonghyeon Ye;Seungone Kim;Shrestha Mohanty;Vipul Gupta;Vivek Sharma;Vu Minh Chien;Xuhui Zhou;Yizhi LI;Caiming Xiong;Luis Villa;Stella Biderman;Hanlin Li;Daphne Ippolito;Sara Hooker;Jad Kabbara;Alex Pentland", "authorids": "~Shayne_Longpre1;~Robert_Mahari1;~Ariel_N._Lee1;~Campbell_S._Lund1;~Hamidah_Oderinwale1;~William_Brannon1;~Nayan_Saxena1;~Naana_Obeng-Marnu1;~Tobin_South1;~Cole_J_Hunter1;~Kevin_Klyman1;~Christopher_Klamm1;~Hailey_Schoelkopf1;~Nikhil_Singh2;~Manuel_Cherep1;~Ahmad_Mustafa_Anis1;~An_Dinh1;~Caroline_Shamiso_Chitongo1;~Da_Yin2;~Damien_Sileo2;~Deividas_Mataciunas1;~Diganta_Misra1;~Emad_A._Alghamdi1;~Enrico_Shippole1;~Jianguo_Zhang3;~Joanna_Materzynska1;~Kun_Qian2;~Kushagra_Tiwary1;~Lester_James_Validad_Miranda1;~Manan_Dey2;~Minnie_Liang1;~Mohammed_Hamdy1;~Niklas_Muennighoff1;~Seonghyeon_Ye1;~Seungone_Kim1;~Shrestha_Mohanty1;~Vipul_Gupta3;~Vivek_Sharma1;~Vu_Minh_Chien1;~Xuhui_Zhou1;~Yizhi_LI1;~Caiming_Xiong1;~Luis_Villa1;~Stella_Biderman1;~Hanlin_Li2;~Daphne_Ippolito1;~Sara_Hooker2;~Jad_Kabbara1;~Alex_Pentland1", "gender": "M;M;F;F;F;M;M;F;Not Specified;;M;;F;M;M;;F;F;M;;M;;;M;M;F;M;;M;M;F;M;M;M;M;;M;M;M;M;M;M;M;F;;F;;M;M", "homepage": "https://www.shaynelongpre.com;https://robertmahari.com/;https://arielnlee.ai;https://campbellslund.wixsite.com/lundportfolio;https://hamidah.me/;https://willbrannon.com/;http://individual.utoronto.ca/nayan/;;https://tobin.page;;;;;https://web.media.mit.edu/~nsingh1/;https://mcherep.github.io/;https://ahmadmustafaanis.github.io/;;;https://wadeyin9712.github.io/;;;;;https://github.com/conceptofmind;https://jianguoz.github.io/;https://joaanna.github.io/;https://qbetterk.github.io/;https://kushagratiwary.com;https://ljvmiranda921.github.io;;;;https://muennighoff.github.io/;https://vano1205.github.io/;https://github.com/SeungoneKim;;https://vipulgupta1011.github.io/;https://vivoutlaw.github.io/;;https://xuhuizhou.github.io/;https://yizhilll.github.io;http://cmxiong.com/;https://lu.is/;http://www.stellabiderman.com;;http://www.daphnei.com;;http://www.mit.edu/~jkabbara/;https://www.media.mit.edu/people/sandy/overview/", "dblp": "190/7024;;;;;245/2682;;;;;;159/0315;;12/5407-3;329/1440;;;;131/0141;;;;;356/2379;;191/4638;77/2062;244/0548;224/9490;;;;281/6745;301/8927;324/2064.html;;77/1831;;323/4642.html;;;80/7282;;239/5641;;192/2031.html;;148/9943;p/AlexPentland", "google_scholar": "ADd_YfkAAAAJ;3qM8lPsAAAAJ;rTkvFlMAAAAJ;2UVJ_wgAAAAJ;;0Dd6lAEAAAAJ;YB9sHNIAAAAJ;Xk25h8YAAAAJ;r5pPBFMAAAAJ;;PhN2CjMAAAAJ;;XLahYIYAAAAJ;h3YzYXwAAAAJ;bFWJ7YgAAAAJ;;;;n32w34kAAAAJ;;;;;_mXePyUAAAAJ;mAAVFEsAAAAJ;kxqgE9cAAAAJ;oRKl5eoAAAAJ;;https://scholar.google.co.jp/citations?user=2RtnNKEAAAAJ;https://scholar.google.co.in/citations?user=39CsfP8AAAAJ;;;Me0IoRMAAAAJ;https://scholar.google.co.kr/citations?user=JfGGjBoAAAAJ;https://scholar.google.co.kr/citations?user=qEf3e3EAAAAJ;Hu349EgAAAAJ;QWVqqnMAAAAJ;fNbVXwQAAAAJ;wcbZoCgAAAAJ;CKyX_Y8AAAAJ;l5NEL4wAAAAJ;vaSdahkAAAAJ;SrHFgw4AAAAJ;bO7H0DAAAAAJ;;;;;P4nfoKYAAAAJ", "orcid": ";0000-0003-2372-2746;0009-0005-8239-9012;;;0000-0002-1435-8535;0000-0003-0558-3201;0000-0002-1501-4558;0000-0003-2740-9829;;;;;0000-0003-4465-6469;;;;;;;;;;;;;0000-0001-8255-2527;;;;;;;;;;;;;;0000-0002-3932-9706;;;0000-0001-8228-1042;;;;;", "linkedin": "shayne-redford-longpre/;robert-mahari-874310157/;arielnlee/;campbell-lund-83314527a/;;wwbrannon;saxenanayan/;naana-obengmarnu/;;coleh23;;;;https://linkedin.com/in/nikhilsinghmusic;;ahmad-anis/;anmdinh/;caroline-shamiso-chitongo-7724381a8;;;deividas-mataciunas/;;;enrico-shippole-495521b8;jianguo-zhang-3b267712a;;kun-qian-6b01b113a/;ktiwary-mit/;;;minnie-liang-ab8b77279/;mmhamdy/;niklasmuennighoff/;;seungone-kim-09b551264/;;vipul-gupta-70950111b/;vivoutlaw/;vumichien/;;;caiming-xiong-150a1417;luisv/;stellabiderman;;;;;", "or_profile": "~Shayne_Longpre1;~Robert_Mahari1;~Ariel_N._Lee1;~Campbell_S._Lund1;~Hamidah_Oderinwale1;~William_Brannon1;~Nayan_Saxena1;~Naana_Obeng-Marnu1;~Tobin_South1;~Cole_J_Hunter1;~Kevin_Klyman1;~Christopher_Klamm1;~Hailey_Schoelkopf1;~Nikhil_Singh2;~Manuel_Cherep1;~Ahmad_Mustafa_Anis1;~An_Dinh1;~Caroline_Shamiso_Chitongo1;~Da_Yin2;~Damien_Sileo2;~Deividas_Mataciunas1;~Diganta_Misra1;~Emad_A._Alghamdi1;~Enrico_Shippole1;~Jianguo_Zhang3;~Joanna_Materzynska1;~Kun_Qian2;~Kushagra_Tiwary1;~Lester_James_Validad_Miranda1;~Manan_Dey2;~Minnie_Liang1;~Mohammed_Hamdy1;~Niklas_Muennighoff1;~Seonghyeon_Ye1;~Seungone_Kim1;~Shrestha_Mohanty1;~Vipul_Gupta3;~Vivek_Sharma1;~Vu_Minh_Chien1;~Xuhui_Zhou1;~Yizhi_LI1;~Caiming_Xiong1;~Luis_Villa1;~Stella_Biderman1;~Hanlin_Li2;~Daphne_Ippolito1;~Sara_Hooker2;~Jad_Kabbara1;~Alex_Pentland1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Raive;Wellesley College;McGill University;Massachusetts Institute of Technology;MLC;Massachusetts Institute of Technology;Massachusetts Institute of Technology;;Stanford University;University of Mannheim;EleutherAI;Massachusetts Institute of Technology;Massachusetts Institute of Technology;;Massachusetts Institute of Technology;University of London;University of California, Los Angeles;;;;;Teraflop AI;SalesForce AI Research;Massachusetts Institute of Technology;Columbia University;Massachusetts Institute of Technology;Allen Institute for Artificial Intelligence;SalesForce.com;Massachusetts Institute of Technology;Independent;Allen Institute for Artificial Intelligence;Korea Advanced Institute of Science & Technology;KAIST;Massachusetts Institute of Technology;Pennsylvania State University;Sony Research;Tohoku University, Tokyo Institute of Technology;Allen Institute for Artificial Intelligence;University of Manchester ;Salesforce Research;Luis Villa Law;Booz Allen Hamilton;;Carnegie Mellon University;;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;raive.com;wellesley.edu;mail.mcgill.ca;mit.edu;mlcollective.org;mit.edu;mit.edu;;stanford.edu;uni-mannheim.de;eleuther.ai;mit.edu;mit.edu;;mit.edu;lon.ac.uk;cs.ucla.edu;;;;;teraflop.ai;salesforce.com;mit.edu;columbia.edu;mit.edu;allenai.org;salesforce.com;mit.edu;gmail.com;allenai.org;kaist.ac.kr;ee.kaist.ac.kr;mit.edu;psu.edu;sony.com;tohoku.ac.jp;allenai.org;manchester.ac.uk;salesforce.com;villa.law;boozallen.com;;cmu.edu;;mit.edu;mit.edu", "position": "PhD student;PhD;Researcher;Researcher;Undergrad student;PhD student;Researcher;MS student;PhD student;;MS student;PhD student;Researcher;PhD student;MS student;;Undergrad student;Undergrad student;PhD student;;;;;CEO;Researcher;PhD student;PhD student;PhD student;Researcher;Senior Member of Technical Staff ;Undergrad student;Researcher;Researcher;PhD student;MS student;Researcher;PhD student;Senior Research Scientist;Researcher;Intern;PhD student;Research Scientist;Attorney;Industry researcher;;Assistant Professor;;Postdoc;Full Professor", "bibtex": "@inproceedings{\nlongpre2024consent,\ntitle={Consent in Crisis: The Rapid Decline of the {AI} Data Commons},\nauthor={Shayne Longpre and Robert Mahari and Ariel N. Lee and Campbell S. Lund and Hamidah Oderinwale and William Brannon and Nayan Saxena and Naana Obeng-Marnu and Tobin South and Cole J Hunter and Kevin Klyman and Christopher Klamm and Hailey Schoelkopf and Nikhil Singh and Manuel Cherep and Ahmad Mustafa Anis and An Dinh and Caroline Shamiso Chitongo and Da Yin and Damien Sileo and Deividas Mataciunas and Diganta Misra and Emad A. Alghamdi and Enrico Shippole and Jianguo Zhang and Joanna Materzynska and Kun Qian and Kushagra Tiwary and Lester James Validad Miranda and Manan Dey and Minnie Liang and Mohammed Hamdy and Niklas Muennighoff and Seonghyeon Ye and Seungone Kim and Shrestha Mohanty and Vipul Gupta and Vivek Sharma and Vu Minh Chien and Xuhui Zhou and Yizhi LI and Caiming Xiong and Luis Villa and Stella Biderman and Hanlin Li and Daphne Ippolito and Sara Hooker and Jad Kabbara and Alex Pentland},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=66PcEzkf95}\n}", "github": "", "project": "", "reviewers": "DVCm;RUtn;T7xg;3m6C", "site": "https://openreview.net/forum?id=66PcEzkf95", "pdf_size": 710581, "rating": "6;7;7;8", "confidence": "5;2;3;4", "wc_summary_and_contributions": "97;77;116;233", "wc_strengths": "169;46;19;42", "wc_improvement": "351;78;89;164", "wc_limitations": "114;45;35;44", "wc_correctness": "270;105;17;16", "wc_clarity": "85;5;5;12", "wc_relation_to_prior_work": "14;25;1;29", "wc_documentation": "39;32;8;11", "wc_additional_feedback": "1;1;1;1", "wc_review": "1140;414;291;552", "wc_reply_reviewers": "0;0;8;62", "wc_reply_authors": "54;0;0;0", "reply_reviewers": "0;0;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "wc_summary_and_contributions_avg": [ 130.75, 60.62332471912111 ], "wc_strengths_avg": [ 69.0, 58.6472505749417 ], "wc_improvement_avg": [ 170.5, 109.34006584962349 ], "wc_limitations_avg": [ 59.5, 31.70567772497538 ], "wc_correctness_avg": [ 102.0, 103.50603847119258 ], "wc_clarity_avg": [ 26.75, 33.751851801049376 ], "wc_relation_to_prior_work_avg": [ 17.25, 10.871407452579449 ], "wc_documentation_avg": [ 22.5, 13.275918047351754 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 599.25, 325.568253212748 ], "wc_reply_reviewers_avg": [ 17.5, 25.89884167293974 ], "wc_reply_authors_avg": [ 13.5, 23.382685902179844 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 49, 0 ], "corr_rating_confidence": -0.31622776601683794, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=593019343386393612&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;1;2;3;0;4;0;0;5;6;7;0;0;0;8;9;10;11;0;12;0;13;11;0;14;13;15;15;0;16;17;18;13;19;11;20;21;22;0;0", "aff_unique_norm": "Massachusetts Institute of Technology;Raive;Wellesley College;McGill University;Machine Learning Center;Stanford University;University of Mannheim;EleutherAI;University of London;University of California, Los Angeles;Teraflop AI;Salesforce;Columbia University;Allen Institute for Artificial Intelligence;Independent;Korea Advanced Institute of Science and Technology;Pennsylvania State University;Sony;Tohoku University;University of Manchester;Luis Villa Law;Booz Allen Hamilton;Carnegie Mellon University", "aff_unique_dep": ";;;;;;;;;;;SalesForce AI Research;;;;;;Research;;;;;", "aff_unique_url": "https://web.mit.edu;;https://www.wellesley.edu;https://www.mcgill.ca;;https://www.stanford.edu;https://www.uni-mannheim.de;https://www.eleuther.ai;https://www.london.ac.uk;https://www.ucla.edu;;https://www.salesforce.com;https://www.columbia.edu;https://allenai.org;;https://www.kaist.ac.kr;https://www.psu.edu;https://www.sony.com;https://www.tohoku.ac.jp;https://www.manchester.ac.uk;;https://www.boozallen.com;https://www.cmu.edu", "aff_unique_abbr": "MIT;;Wellesley;McGill;MLC;Stanford;UM;EleutherAI;UoL;UCLA;;Salesforce;Columbia;AI2;;KAIST;PSU;Sony;Tohoku U;UoM;;BAH;CMU", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Stanford;Los Angeles", "aff_country_unique_index": "0;0;0;2;0;0;0;0;3;0;0;0;0;4;0;0;0;0;0;0;0;0;0;5;5;0;0;6;6;0;4;0;0;0;0;0;0", "aff_country_unique": "United States;;Canada;Germany;United Kingdom;South Korea;Japan" }, { "title": "SRFUND: A Multi-Granularity Hierarchical Structure Reconstruction Benchmark in Form Understanding", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97841", "id": "66XJOENOrL", "proceeding": "", "pdf": "https://openreview.net/pdf?id=66XJOENOrL", "openreview": "https://openreview.net/forum?id=66XJOENOrL", "poster": "/media/PosterPDFs/NeurIPS%202024/97841.png?t=1731573960.0180264", "project": "", "author_site": "Jiefeng Ma, Yan Wang, Chenyu Liu, Jun Du, Yu Hu, Zhenrong Zhang, Pengfei Hu, Qing Wang, Jianshu Zhang", "tldr": "", "abstract": "Accurately identifying and organizing textual content is crucial for the automation of document processing in the field of form understanding. Existing datasets, such as FUNSD and XFUND, support entity classification and relationship prediction tasks but are typically limited to local and entity-level annotations. This limitation overlooks the hierarchically structured representation of documents, constraining comprehensive understanding of complex forms. To address this issue, we present the SRFUND, a hierarchically structured multi-task form understanding benchmark. SRFUND provides refined annotations on top of the original FUNSD and XFUND datasets, encompassing five tasks: (1) word to text-line merging, (2) text-line to entity merging, (3) entity category classification, (4) item table localization, and (5) entity-based full-document hierarchical structure recovery. We meticulously supplemented the original dataset with missing annotations at various levels of granularity and added detailed annotations for multi-item table regions within the forms. Additionally, we introduce global hierarchical structure dependencies for entity relation prediction tasks, surpassing traditional local key-value associations. The SRFUND dataset includes eight languages including English, Chinese, Japanese, German, French, Spanish, Italian, and Portuguese, making it a powerful tool for cross-lingual form understanding. Extensive experimental results demonstrate that the SRFUND dataset presents new challenges and significant opportunities in handling diverse layouts and global hierarchical structures of forms, thus providing deep insights into the field of form understanding. The original dataset and implementations of baseline methods are available at https://sprateam-ustc.github.io/SRFUND.", "keywords": "Form Understanding;Document Structure Reconstruction", "primary_area": "", "supplementary_material": "/attachment/f237798318edef7b0fcd3f20963994b6bb88530a.pdf", "author": "Jiefeng Ma;Yan Wang;Chenyu Liu;Jun Du;Yu Hu;Zhang Zhenrong;Pengfei Hu;Qing Wang;Jianshu Zhang", "authorids": "~Jiefeng_Ma1;~Yan_Wang53;~Chenyu_Liu1;~Jun_Du1;~Yu_Hu9;~Zhang_Zhenrong1;~Pengfei_Hu4;~Qing_Wang25;~Jianshu_Zhang1", "gender": "M;;;M;M;;;F;M", "homepage": ";https://sprateam-ustc.github.io/The%20team.html;;https://sprateam-ustc.github.io/;http://www.svief.org/2015/speaker/bio/Yuhu.html;http://staff.ustc.edu.cn/~jundu/The%20team.html;https://scholar.google.com/citations?user=9PSYaXMAAAAJ&hl=en;;", "dblp": "302/9166;;;81/1475-2.html;;;71/9969-6;97/6505-8;", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;;iHb6ScQAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;9PSYaXMAAAAJ;https://scholar.google.com/citations?hl=zh-CN;9yg80qYAAAAJ", "orcid": ";;;0000-0002-2387-0389;;0000-0001-9211-0749;0009-0005-3345-605X;;0000-0002-2713-2535", "linkedin": ";;;;;;;;", "or_profile": "~Jiefeng_Ma1;~Yan_Wang53;~Chenyu_Liu1;~Jun_Du1;~Yu_Hu9;~Zhang_Zhenrong1;~Pengfei_Hu4;~Qing_Wang25;~Jianshu_Zhang1", "aff": "University of Science and Technology of China;University of Science and Technology of China;;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;IFLYTEK CO.LTD.", "aff_domain": "ustc.edu;ustc.edu.cn;;ustc.edu.cn;ustc.edu.cn;mail.ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;iflytek.com", "position": "PhD student;MS student;;Associate Professor;Full Professor;PhD student;PhD student;Researcher;PhD student", "bibtex": "@inproceedings{\nma2024srfund,\ntitle={{SRFUND}: A Multi-Granularity Hierarchical Structure Reconstruction Benchmark in Form Understanding},\nauthor={Jiefeng Ma and Yan Wang and Chenyu Liu and Jun Du and Yu Hu and Zhang Zhenrong and Pengfei Hu and Qing Wang and Jianshu Zhang},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=66XJOENOrL}\n}", "github": "", "reviewers": "hNxk;85iu;Mj79;6tKU", "pdf_size": 31853400, "rating": "6;7;7;7", "confidence": "3;3;3;3", "wc_summary_and_contributions": "71;76;71;105", "wc_strengths": "4;60;15;128", "wc_improvement": "4;49;8;264", "wc_limitations": "7;40;11;106", "wc_correctness": "13;14;1;22", "wc_clarity": "8;11;1;40", "wc_relation_to_prior_work": "24;30;1;1", "wc_documentation": "14;29;1;31", "wc_additional_feedback": "1;1;1;1", "wc_review": "146;310;110;698", "wc_reply_reviewers": "53;24;0;41", "wc_reply_authors": "135;38;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;1;2", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.0 ], "wc_summary_and_contributions_avg": [ 80.75, 14.148763196831021 ], "wc_strengths_avg": [ 51.75, 48.76666381863742 ], "wc_improvement_avg": [ 81.25, 106.97049826938266 ], "wc_limitations_avg": [ 41.0, 39.62953444086872 ], "wc_correctness_avg": [ 12.5, 7.5 ], "wc_clarity_avg": [ 15.0, 14.882876066137216 ], "wc_relation_to_prior_work_avg": [ 14.0, 13.171939872319491 ], "wc_documentation_avg": [ 18.75, 12.173228823939851 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 316.0, 233.07509519466038 ], "wc_reply_reviewers_avg": [ 29.5, 19.90602923739438 ], "wc_reply_authors_avg": [ 43.25, 55.19680697286755 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:GtmbSMSky50J:scholar.google.com/&scioq=SRFUND:+A+Multi-Granularity+Hierarchical+Structure+Reconstruction+Benchmark+in+Form+Understanding&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": "ustc.edu;ustc.edu.cn;;ustc.edu.cn;ustc.edu.cn;mail.ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;iflytek.com", "author_num": 9, "aff_unique_index": "0;0;0;0;0;0;0;1", "aff_unique_norm": "University of Science and Technology of China;iFLYTEK", "aff_unique_dep": ";", "aff_unique_url": "http://www.ustc.edu.cn;https://www.iflytek.com", "aff_unique_abbr": "USTC;iFLYTEK", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Alignment for Honesty", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96547", "id": "67K3Xlvw8L", "proceeding": "", "pdf": "https://openreview.net/pdf?id=67K3Xlvw8L", "openreview": "https://openreview.net/forum?id=67K3Xlvw8L", "poster": "/media/PosterPDFs/NeurIPS%202024/96547.png?t=1733804838.7722542", "project": "", "author_site": "Yuqing Yang, Ethan Chern, Xipeng Qiu, Graham Neubig, Pengfei Liu", "tldr": "", "abstract": "Recent research has made significant strides in aligning large language models (LLMs) with helpfulness and harmlessness. In this paper, we argue for the importance of alignment for \\emph{honesty}, ensuring that LLMs proactively refuse to answer questions when they lack knowledge, while still not being overly conservative. However, a pivotal aspect of alignment for honesty involves discerning an LLM's knowledge boundaries, which demands comprehensive solutions in terms of metric development, benchmark creation, and training methodologies. We address these challenges by first establishing a precise problem definition and defining ``honesty'' inspired by the Analects of Confucius. This serves as a cornerstone for developing metrics that effectively measure an LLM's honesty by quantifying its progress post-alignment. Furthermore, we introduce a flexible training framework which is further instantiated by several efficient fine-tuning techniques that emphasize honesty without sacrificing performance on other tasks. Our extensive experiments reveal that these aligned models show a marked increase in honesty, as indicated by our proposed metrics. We open-source all relevant resources to facilitate future research at \\url{https://github.com/GAIR-NLP/alignment-for-honesty}.", "keywords": "large language models;alignment;honesty;supervised fine-tuning", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Yuqing Yang;Ethan Chern;Xipeng Qiu;Graham Neubig;Pengfei Liu", "authorids": "~Yuqing_Yang2;~Ethan_Chern1;~Xipeng_Qiu1;~Graham_Neubig1;~Pengfei_Liu1", "gender": "F;M;M;M;M", "homepage": "https://ayyyq.github.io/;https://xpqiu.github.io/;http://phontron.com;http://pfliu.com/;", "dblp": "91/9064-4;69/1395;03/8155;34/3381-3;284/9591", "google_scholar": "https://scholar.google.com/citations?hl=en;Pq4Yp_kAAAAJ;wlosgkoAAAAJ;oIz_CYEAAAAJ;zmit6DkAAAAJ", "orcid": ";0000-0001-7163-5247;;;", "linkedin": ";;;;", "or_profile": "~Yuqing_Yang2;~Xipeng_Qiu1;~Graham_Neubig1;~Pengfei_Liu1;~I-Chun_Chern1", "aff": "Fudan University;Fudan University;Carnegie Mellon University;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "fudan.edu.cn;fudan.edu.cn;cmu.edu;sjtu.edu;sjtu.edu", "position": "MS student;Full Professor;Associate Professor;Associate Professor;PhD student", "bibtex": "@inproceedings{\nyang2024alignment,\ntitle={Alignment for Honesty},\nauthor={Yuqing Yang and Ethan Chern and Xipeng Qiu and Graham Neubig and Pengfei Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=67K3Xlvw8L}\n}", "github": "", "reviewers": "NFFZ;MrwZ;V3H9", "pdf_size": 806179, "rating": "6;7;7", "confidence": "4;4;3", "soundness": "3;3;4", "novelty": "3;3;3", "presentation": "3;4;3", "wc_summary": "125;65;95", "wc_strengths": "129;71;99", "wc_weaknesses": "178;158;12", "wc_questions": "44;49;108", "wc_limitations": "1;24;40", "wc_review": "477;367;354", "wc_reply_reviewers": "10;235;0", "wc_reply_authors": "0;579;0", "reply_reviewers": "1;2;0", "reply_authors": "1;2;1", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 95.0, 24.49489742783178 ], "wc_strengths_avg": [ 99.66666666666667, 23.683092891108814 ], "wc_weaknesses_avg": [ 116.0, 73.99099044253069 ], "wc_questions_avg": [ 67.0, 29.06314963431642 ], "wc_limitations_avg": [ 21.666666666666668, 16.006942938057293 ], "wc_review_avg": [ 399.3333333333333, 55.174470747096635 ], "wc_reply_reviewers_avg": [ 81.66666666666667, 108.49987199173195 ], "wc_reply_authors_avg": [ 193.0, 272.94321753800733 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 71, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11680979276083213213&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "fudan.edu.cn;fudan.edu.cn;cmu.edu;sjtu.edu;sjtu.edu", "author_num": 5, "aff_unique_index": "0;0;1;2;2", "aff_unique_norm": "Fudan University;Carnegie Mellon University;Shanghai Jiao Tong University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.fudan.edu.cn;https://www.cmu.edu;https://www.sjtu.edu.cn", "aff_unique_abbr": "Fudan;CMU;SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "China;United States" }, { "id": "67N3FWoDtU", "title": "Chronicling Germany: An Annotated Historical Newspaper Dataset", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "The correct detection of article layout in historical newspaper pages remains challenging\nbut is important for Natural Language Processing ( NLP) and machine\nlearning applications in the field of digital history. Digital newspaper portals\ntypically provide Optical Character Recognition ( OCR) text, albeit of varying quality.\nUnfortunately, layout information is often missing, limiting this rich source\u2019s\nscope. Our dataset is designed to address this issue for historic German-language\nnewspapers. The Chronicling Germany dataset contains 581 annotated historical\nnewspaper pages from the time period between 1852 and 1924. Historic domain\nexperts have spent more than 1,500 hours annotating the dataset. The paper presents\na processing pipeline and establishes baseline results on in- and out-of-domain test\ndata using this pipeline. Both our dataset and the corresponding baseline code are\nfreely available online. This work creates a starting point for future research in\nthe field of digital history and historic German language newspaper processing.\nFurthermore, it provides the opportunity to study a low-resource task in computer\nvision.", "keywords": "historic newspaper processing;digital history;computer vision", "primary_area": "", "supplementary_material": "", "author": "Christian Schultze;Niklas Kerkfeld;Kara Kuebart;Princilia Weber;Moritz Wolter;Felix Selgert", "authorids": "~Christian_Schultze1;~Niklas_Kerkfeld1;~Kara_Kuebart1;~Princilia_Weber1;~Moritz_Wolter1;~Felix_Selgert1", "gender": "M;M;;F;M;M", "homepage": ";;;;https://www.wolter.tech/;", "dblp": "367/5000.html;367/4587.html;367/4855.html;367/4404.html;222/2629;", "google_scholar": ";;;;https://scholar.google.de/citations?user=OLvO62sAAAAJ;https://scholar.google.de/citations?user=c4XUnlUAAAAJ", "orcid": ";;;;0000-0002-1511-7768;", "linkedin": ";;;;;", "or_profile": "~Christian_Schultze1;~Niklas_Kerkfeld1;~Kara_Kuebart1;~Princilia_Weber1;~Moritz_Wolter1;~Felix_Selgert1", "aff": "Rheinische Friedrich-Wilhelms Universit\u00e4t Bonn;Rheinische Friedrich-Wilhelms Universit\u00e4t Bonn;Rheinische Friedrich-Wilhelms Universit\u00e4t Bonn;Rheinische Friedrich-Wilhelms Universit\u00e4t Bonn;Rheinische Friedrich-Wilhelms Universit\u00e4t Bonn;Rheinische Friedrich-Wilhelms Universit\u00e4t Bonn", "aff_domain": "uni-bonn.de;uni-bonn.de;uni-bonn.de;uni-bonn.de;uni-bonn.de;uni-bonn.de", "position": "Undergrad student;Undergrad student;PhD student;MS student;Postdoc;Assistant Professor", "bibtex": "@misc{\nanonymous2024chronicling,\ntitle={Chronicling Germany: An Annotated Historical Newspaper Dataset},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=67N3FWoDtU}\n}", "github": "", "project": "", "reviewers": "ZtnH;ffkZ;H4zj", "site": "https://openreview.net/forum?id=67N3FWoDtU", "pdf_size": 0, "rating": "4;4;5", "confidence": "5;4;5", "wc_summary_and_contributions": "66;72;88", "wc_strengths": "92;95;221", "wc_improvement": "172;130;733", "wc_limitations": "41;22;42", "wc_correctness": "28;161;170", "wc_clarity": "16;4;9", "wc_relation_to_prior_work": "12;22;28", "wc_documentation": "39;13;36", "wc_additional_feedback": "1;1;1", "wc_review": "467;520;1328", "wc_reply_reviewers": "19;70;95", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 4.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 75.33333333333333, 9.285592184789413 ], "wc_strengths_avg": [ 136.0, 60.11655346075655 ], "wc_improvement_avg": [ 345.0, 274.8927063419472 ], "wc_limitations_avg": [ 35.0, 9.201449161228174 ], "wc_correctness_avg": [ 119.66666666666667, 64.92217563274423 ], "wc_clarity_avg": [ 9.666666666666666, 4.921607686744467 ], "wc_relation_to_prior_work_avg": [ 20.666666666666668, 6.599663291074443 ], "wc_documentation_avg": [ 29.333333333333332, 11.61416759345623 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 771.6666666666666, 393.98166906032003 ], "wc_reply_reviewers_avg": [ 61.333333333333336, 31.626290048347787 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11247849745801730708&as_sdt=8005&sciodt=0,7&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Rheinische Friedrich-Wilhelms Universit\u00e4t Bonn", "aff_unique_dep": "", "aff_unique_url": "https://www.uni-bonn.de/", "aff_unique_abbr": "Uni Bonn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "Germany" }, { "title": "How does Architecture Influence the Base Capabilities of Pre-trained Language Models? A Case Study Based on FFN-Wider and MoE Transformers", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96546", "id": "67tRrjgzsh", "proceeding": "", "pdf": "https://openreview.net/pdf?id=67tRrjgzsh", "openreview": "https://openreview.net/forum?id=67tRrjgzsh", "poster": "/media/PosterPDFs/NeurIPS%202024/96546.png?t=1733511511.3349462", "project": "", "author_site": "Xin Lu, Yanyan Zhao, Bing Qin, Liangyu Huo, Qing Yang, Dongliang Xu", "tldr": "", "abstract": "Pre-trained language models have been proven to possess strong base capabilities, which not only excel in in-distribution language modeling but also show powerful abilities in out-of-distribution language modeling, transfer learning and few-shot learning. Unlike existing work focusing on the influence of scale on base capabilities, our work examines the influence of architecture on those. Specifically, our concern is: How does architecture influence the base capabilities of pre-trained language models? In this work, we attempt to explain and reverse the decline in base capabilities caused by the architecture of FFN-Wider Transformers, seeking to provide some insights. Through analysis, we found the contribution ratio of Multi-Head Attention (a combination function) to pre-trained language modeling is a key factor affecting base capabilities. FFN-Wider Transformers reduce the contribution ratio of this combination function, leading to a decline in base capabilities. We confirmed this by experiments and proposed Combination Enhanced Architecture (CEA) to address the decline in base capabilities of such models. Significantly, we extended our explanation and CEA to Mixture of Experts (MoE) Transformers. We successfully achieved significant improvements in base capabilities on a 14B parameter MoE model, demonstrating the practical application value of our work. This also indicates that our analysis has a certain guiding significance for architecture analysis, architecture improvement and architecture design.", "keywords": "Pre-trained Language Models;Base Capabilities;Mixture of Experts", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Xin Lu;Yanyan Zhao;Bing Qin;Liangyu Huo;Qing Yang;Dongliang Xu", "authorids": "~Xin_Lu8;~Yanyan_Zhao1;~Bing_Qin2;~Liangyu_Huo1;~Qing_Yang11;~Dongliang_Xu2", "gender": "M;F;;M;M;M", "homepage": "https://luxin.xyz/;http://ir.hit.edu.cn/~yanyan/;http://ir.hit.edu.cn/~qinb;;https://www.duxiaoman.com/index;", "dblp": "11/1952;;86/5934.html;207/8210;47/3749;74/4912.html", "google_scholar": "https://scholar.google.com.hk/citations?user=EXZyiCoAAAAJ;mEdfAYoAAAAJ;LKnCub0AAAAJ;;;", "orcid": "0000-0002-0784-5394;;0000-0002-2543-5604;;;", "linkedin": ";;;;;", "or_profile": "~Xin_Lu8;~Yanyan_Zhao1;~Bing_Qin2;~Liangyu_Huo1;~Qing_Yang11;~Dongliang_Xu2", "aff": "Harbin Institute of Technology;Harbin Institute of Technology;Harbin Institute of Technology;Du Xiaoman Technology(BeiJing);;DuXiaoman Technology", "aff_domain": "hit.edu.cn;hit.edu.cn;hit.edu.cn;duxiaoman.com;;duxiaoman.com", "position": "PhD student;Full Professor;Full Professor;Postdoc;;Principal Researcher", "bibtex": "@inproceedings{\nlu2024how,\ntitle={How does Architecture Influence the Base Capabilities of Pre-trained Language Models? A Case Study Based on {FFN}-Wider and MoE Transformers},\nauthor={Xin Lu and Yanyan Zhao and Bing Qin and Liangyu Huo and Qing Yang and Dongliang Xu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=67tRrjgzsh}\n}", "github": "", "reviewers": "Rvop;nte2;QyvJ;xAuN", "pdf_size": 534443, "rating": "4;6;6;7", "confidence": "3;3;3;4", "soundness": "2;3;3;3", "novelty": "3;3;3;3", "presentation": "2;3;3;2", "wc_summary": "162;69;63;98", "wc_strengths": "113;76;49;108", "wc_weaknesses": "200;9;123;89", "wc_questions": "94;58;28;30", "wc_limitations": "50;7;5;12", "wc_review": "619;219;268;337", "wc_reply_reviewers": "0;15;98;21", "wc_reply_authors": "106;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 98.0, 39.24920381358073 ], "wc_strengths_avg": [ 86.5, 25.889186931999237 ], "wc_weaknesses_avg": [ 105.25, 68.59436930244348 ], "wc_questions_avg": [ 52.5, 26.7348087705897 ], "wc_limitations_avg": [ 18.5, 18.364367672206956 ], "wc_review_avg": [ 360.75, 154.8812044762049 ], "wc_reply_reviewers_avg": [ 33.5, 38.01644381054072 ], "wc_reply_authors_avg": [ 26.5, 45.89934640057525 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.6622661785325219, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:gp4dv0RqgLUJ:scholar.google.com/&scioq=How+does+Architecture+Influence+the+Base+Capabilities+of+Pre-trained+Language+Models%3F+A+Case+Study+Based+on+FFN-Wider+and+MoE+Transformers&hl=en&as_sdt=0,48", "gs_version_total": 2, "email": "hit.edu.cn;hit.edu.cn;hit.edu.cn;duxiaoman.com;;duxiaoman.com", "author_num": 6, "aff_unique_index": "0;0;0;1;2", "aff_unique_norm": "Harbin Institute of Technology;Du Xiaoman Technology;DuXiaoman Technology", "aff_unique_dep": ";;", "aff_unique_url": "http://www.hit.edu.cn/;;https://www.duxiaoman.com", "aff_unique_abbr": "HIT;;", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Harbin;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "MixEval: Deriving Wisdom of the Crowd from LLM Benchmark Mixtures", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96545", "id": "6A29LUZhfv", "proceeding": "", "pdf": "https://openreview.net/pdf?id=6A29LUZhfv", "openreview": "https://openreview.net/forum?id=6A29LUZhfv", "poster": "/media/PosterPDFs/NeurIPS%202024/96545.png?t=1732960372.989103", "project": "", "author_site": "Jinjie Ni, Fuzhao Xue, Xiang Yue, Yuntian Deng, Mahir Shah, Kabir Jain, Graham Neubig, Yang You", "tldr": "", "abstract": "Evaluating large language models (LLMs) is challenging. Traditional ground-truth- based benchmarks fail to capture the comprehensiveness and nuance of real-world queries, while LLM-as-judge benchmarks suffer from grading biases and limited query quantity. Both of them may also become contaminated over time. User- facing evaluation, such as Chatbot Arena, provides reliable signals but is costly and slow. In this work, we propose MixEval, a new paradigm for establishing efficient, gold-standard LLM evaluation by strategically mixing off-the-shelf bench- marks. It bridges (1) comprehensive and well-distributed real-world user queries and (2) efficient and fairly-graded ground-truth-based benchmarks, by matching queries mined from the web with similar queries from existing benchmarks. Based on MixEval, we further build MixEval-Hard, which offers more room for model improvement. Our benchmarks\u2019 advantages lie in (1) a 0.96 model ranking correlation with Chatbot Arena arising from the highly impartial query distribution and grading mechanism, (2) fast, cheap, and reproducible execution (6% of the time and cost of MMLU), and (3) dynamic evaluation enabled by the rapid and stable data update pipeline. We provide extensive meta-evaluation and analysis for our and existing LLM benchmarks to deepen the community\u2019s understanding of LLM evaluation and guide future research directions.", "keywords": "LLM Evaluation;Approximating Human Preference;Dynamic Benchmarking;Benchmark Mixture;Web Query Detection", "primary_area": "evaluation", "supplementary_material": "/attachment/92838f09bfe90ff60f68ed0b3cdbc8d2e488219f.zip", "author": "Jinjie Ni;Fuzhao Xue;Xiang Yue;Yuntian Deng;Mahir Shah;Kabir Jain;Graham Neubig;Yang You", "authorids": "~Jinjie_Ni1;~Fuzhao_Xue1;~Xiang_Yue1;~Yuntian_Deng2;~Mahir_Shah3;~Kabir_Jain1;~Graham_Neubig1;~Yang_You1", "gender": "M;M;;;M;M;M;M", "homepage": ";https://xuefuzhao.github.io/;;https://yuntiandeng.com;;;http://phontron.com;https://www.comp.nus.edu.sg/~youy/", "dblp": "257/4822;248/1245;;166/1720;;;03/8155;33/8167-1.html", "google_scholar": "TXfiHo8AAAAJ;JMHsqIkAAAAJ;;tk0e5lYAAAAJ;;b9FZ3SIAAAAJ;wlosgkoAAAAJ;jF4dPZwAAAAJ", "orcid": ";;;;;;;", "linkedin": ";fuzhao-xue-6410561a6/;;;mahir-shah-681aa0261;;;yang-you-0b92914b/", "or_profile": "~Jinjie_Ni1;~Fuzhao_Xue1;~Xiang_Yue1;~Yuntian_Deng2;~Mahir_Shah3;~Kabir_Jain1;~Graham_Neubig1;~Yang_You1", "aff": "National University of Singapore;National University of Singapore;;Allen Institute for Artificial Intelligence;National University of Singapore;National University of Singapore;Carnegie Mellon University;National University of Singapore", "aff_domain": "nus.edu.sg;nus.edu.sg;;allenai.org;nus.edu.sg;nus.edu.sg;cmu.edu;nus.edu.sg", "position": "Postdoc;PhD student;;Postdoc;Intern;Intern;Associate Professor;Professor", "bibtex": "@inproceedings{\nni2024mixeval,\ntitle={MixEval: Deriving Wisdom of the Crowd from {LLM} Benchmark Mixtures},\nauthor={Jinjie Ni and Fuzhao Xue and Xiang Yue and Yuntian Deng and Mahir Shah and Kabir Jain and Graham Neubig and Yang You},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=6A29LUZhfv}\n}", "github": "", "reviewers": "u4BL;7ypd;4BB9;yGD1", "pdf_size": 5270633, "rating": "5;6;6;7", "confidence": "4;4;3;4", "soundness": "3;3;3;4", "novelty": "3;3;3;3", "presentation": "2;3;3;4", "wc_summary": "93;89;72;144", "wc_strengths": "97;49;109;77", "wc_weaknesses": "178;119;107;186", "wc_questions": "3;43;37;34", "wc_limitations": "1;10;2;29", "wc_review": "372;310;327;470", "wc_reply_reviewers": "129;10;0;150", "wc_reply_authors": "15;0;0;26", "reply_reviewers": "1;1;0;1", "reply_authors": "2;1;1;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 99.5, 26.874709300753377 ], "wc_strengths_avg": [ 83.0, 22.715633383201094 ], "wc_weaknesses_avg": [ 147.5, 34.87477598494362 ], "wc_questions_avg": [ 29.25, 15.497983739828868 ], "wc_limitations_avg": [ 10.5, 11.236102527122116 ], "wc_review_avg": [ 369.75, 62.15454528833752 ], "wc_reply_reviewers_avg": [ 72.25, 67.75092250294456 ], "wc_reply_authors_avg": [ 10.25, 10.96300597464035 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 38, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15405249102693321608&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "nus.edu.sg;nus.edu.sg;;allenai.org;nus.edu.sg;nus.edu.sg;cmu.edu;nus.edu.sg", "author_num": 8, "aff_unique_index": "0;0;1;0;0;2;0", "aff_unique_norm": "National University of Singapore;Allen Institute for Artificial Intelligence;Carnegie Mellon University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.nus.edu.sg;https://allenai.org;https://www.cmu.edu", "aff_unique_abbr": "NUS;AI2;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;1;0", "aff_country_unique": "Singapore;United States" }, { "title": "LightGaussian: Unbounded 3D Gaussian Compression with 15x Reduction and 200+ FPS", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96544", "id": "6AeIDnrTN2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=6AeIDnrTN2", "openreview": "https://openreview.net/forum?id=6AeIDnrTN2", "poster": "/media/PosterPDFs/NeurIPS%202024/96544.png?t=1731752298.726329", "project": "", "author_site": "Zhiwen Fan, Kevin Wang, Kairun Wen, Zehao Zhu, Dejia Xu, Zhangyang "Atlas" Wang", "tldr": "", "abstract": "Recent advances in real-time neural rendering using point-based techniques have enabled broader adoption of 3D representations. However, foundational approaches like 3D Gaussian Splatting impose substantial storage overhead, as Structure-from-Motion (SfM) points can grow to millions, often requiring gigabyte-level disk space for a single unbounded scene. This growth presents scalability challenges and hinders splatting efficiency. To address this, we introduce LightGaussian, a method for transforming 3D Gaussians into a more compact format. Inspired by Network Pruning, LightGaussian identifies Gaussians with minimal global significance on scene reconstruction, and applies a pruning and recovery process to reduce redundancy while preserving visual quality. Knowledge distillation and pseudo-view augmentation then transfer spherical harmonic coefficients to a lower degree, yielding compact representations. Gaussian Vector Quantization, based on each Gaussian\u2019s global significance, further lowers bitwidth with minimal accuracy loss. LightGaussian achieves an average 15 times compression rate while boosting FPS from 144 to 237 within the 3D-GS framework, enabling efficient complex scene representation on the Mip-NeRF 360 and Tank & Temple datasets. The proposed Gaussian pruning approach is also adaptable to other 3D representations (e.g., Scaffold-GS), demonstrating strong generalization capabilities.", "keywords": "Gaussian Splatting;Efficient Rendering", "primary_area": "machine_vision", "supplementary_material": "/attachment/5c3fa2831b6750f8151b7728fccf8c9c6b6658fe.zip", "author": "Zhiwen Fan;Kevin Wang;Kairun Wen;Zehao Zhu;Dejia Xu;Zhangyang Wang", "authorids": "~Zhiwen_Fan2;~Kevin_Wang4;~Kairun_Wen1;~Zehao_Zhu1;~Dejia_Xu1;~Zhangyang_Wang1", "gender": ";M;M;M;M;M", "homepage": ";;https://kairunwen.github.io/;https://zehaozhu.github.io/;https://ir1d.github.io;https://vita-group.github.io", "dblp": ";;321/5079;;264/5685;119/4026", "google_scholar": ";;RzRhziMAAAAJ;;ET0e93cAAAAJ;pxFyKAIAAAAJ", "orcid": ";;0009-0006-7726-9691;;;", "linkedin": ";kevin-wang-01/;;;;", "or_profile": "~Zhiwen_Fan2;~Kevin_Wang4;~Kairun_Wen1;~Zehao_Zhu1;~Dejia_Xu1;~Zhangyang_Wang1", "aff": ";University of Texas at Austin;Xiamen University;University of Texas at Austin;University of Texas at Austin;University of Texas at Austin", "aff_domain": ";utexas.edu;xmu.edu.cn;utexas.edu;utexas.edu;utexas.edu", "position": ";Undergrad student;MS student;PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nfan2024lightgaussian,\ntitle={LightGaussian: Unbounded 3D Gaussian Compression with 15x Reduction and 200+ {FPS}},\nauthor={Zhiwen Fan and Kevin Wang and Kairun Wen and Zehao Zhu and Dejia Xu and Zhangyang Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=6AeIDnrTN2}\n}", "github": "", "reviewers": "TPCr;yR9d;3NHK;bxja", "pdf_size": 12785772, "rating": "6;6;7;8", "confidence": "5;5;4;5", "soundness": "3;2;3;3", "novelty": "2;2;3;4", "presentation": "3;2;3;3", "wc_summary": "70;73;139;90", "wc_strengths": "86;117;76;275", "wc_weaknesses": "201;207;262;219", "wc_questions": "65;135;30;63", "wc_limitations": "8;10;11;20", "wc_review": "430;542;518;667", "wc_reply_reviewers": "33;19;44;17", "wc_reply_authors": "37;37;10;10", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 93.0, 27.631503759296198 ], "wc_strengths_avg": [ 138.5, 80.2449375350246 ], "wc_weaknesses_avg": [ 222.25, 23.84716964337697 ], "wc_questions_avg": [ 73.25, 38.26470305647229 ], "wc_limitations_avg": [ 12.25, 4.602988159880492 ], "wc_review_avg": [ 539.25, 84.7271355588043 ], "wc_reply_reviewers_avg": [ 28.25, 10.985786271359915 ], "wc_reply_authors_avg": [ 23.5, 13.5 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 174, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17539349311072157068&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": ";utexas.edu;xmu.edu.cn;utexas.edu;utexas.edu;utexas.edu", "author_num": 6, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "University of Texas at Austin;Xiamen University", "aff_unique_dep": ";", "aff_unique_url": "https://www.utexas.edu;https://www.xmu.edu.cn", "aff_unique_abbr": "UT Austin;XMU", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Austin;", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "United States;China" }, { "title": "Rethinking the Membrane Dynamics and Optimization Objectives of Spiking Neural Networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96543", "id": "6AepMNrz7a", "proceeding": "", "pdf": "https://openreview.net/pdf?id=6AepMNrz7a", "openreview": "https://openreview.net/forum?id=6AepMNrz7a", "poster": "/media/PosterPDFs/NeurIPS%202024/96543.png?t=1731490099.5870724", "project": "", "author_site": "Hangchi Shen, Qian Zheng, Huamin Wang, Gang Pan", "tldr": "", "abstract": "Despite spiking neural networks (SNNs) have demonstrated notable energy efficiency across various fields, the limited firing patterns of spiking neurons within fixed time steps restrict the expression of information, which impedes further improvement of SNN performance. In addition, current implementations of SNNs typically consider the firing rate or average membrane potential of the last layer as the output, lacking exploration of other possibilities. In this paper, we identify that the limited spike patterns of spiking neurons stem from the initial membrane potential (IMP), which is set to 0. By adjusting the IMP, the spiking neurons can generate additional firing patterns and pattern mappings. Furthermore, we find that in static tasks, the accuracy of SNNs at each time step increases as the membrane potential evolves from zero. This observation inspires us to propose a learnable IMP, which can accelerate the evolution of membrane potential and enables higher performance within a limited number of time steps. Additionally, we introduce the last time step (LTS) approach to accelerate convergence in static tasks, and we propose a label smooth temporal efficient training (TET) loss to mitigate the conflicts between optimization objective and regularization term in the vanilla TET. Our methods improve the accuracy by 4.05\\% on ImageNet compared to baseline and achieve state-of-the-art performance of 87.80\\% on CIFAR10-DVS and 87.86\\% on N-Caltech101.", "keywords": "Spiking neural networks;Neural dynamics;Neuromorphic Computing;Spike patterns;Spiking neuron", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "/attachment/dd8fae57ac69f618c174a63a8c515b293184636b.zip", "author": "Hangchi Shen;Qian Zheng;Huamin Wang;Gang Pan", "authorids": "~Hangchi_Shen2;~Qian_Zheng5;~Huamin_Wang3;~Gang_Pan1", "gender": ";;M;", "homepage": ";;http://ai.swu.edu.cn/info/1069/1447.htm;", "dblp": "328/5715;;;", "google_scholar": "7Hbh5dEAAAAJ;;;", "orcid": "0000-0001-8302-5964;;0000-0001-8180-8172;", "linkedin": ";;;", "or_profile": "~Hangchi_Shen2;~Qian_Zheng5;~Huamin_Wang3;~Gang_Pan1", "aff": "Southwest University;;Southwest University;", "aff_domain": "swu.edu.cn;;swu.edu.cn;", "position": "MS student;;Full Professor;", "bibtex": "@inproceedings{\nshen2024rethinking,\ntitle={Rethinking the Membrane Dynamics and Optimization Objectives of Spiking Neural Networks},\nauthor={Hangchi Shen and Qian Zheng and Huamin Wang and Gang Pan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=6AepMNrz7a}\n}", "github": "", "reviewers": "UfLQ;Cj2j;jQZD;AWRd", "pdf_size": 1293131, "rating": "6;6;6;7", "confidence": "4;4;5;4", "soundness": "3;3;3;3", "novelty": "3;3;3;4", "presentation": "3;3;3;3", "wc_summary": "87;50;50;14", "wc_strengths": "58;109;43;26", "wc_weaknesses": "274;34;156;46", "wc_questions": "42;82;131;41", "wc_limitations": "13;1;6;53", "wc_review": "474;276;386;180", "wc_reply_reviewers": "172;13;19;0", "wc_reply_authors": "652;0;0;0", "reply_reviewers": "3;1;1;0", "reply_authors": "2;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 50.25, 25.810608284191986 ], "wc_strengths_avg": [ 59.0, 31.008063467427306 ], "wc_weaknesses_avg": [ 127.5, 97.02963464838977 ], "wc_questions_avg": [ 74.0, 36.830693721405794 ], "wc_limitations_avg": [ 18.25, 20.51066795596867 ], "wc_review_avg": [ 329.0, 111.0 ], "wc_reply_reviewers_avg": [ 51.0, 70.19615374078555 ], "wc_reply_authors_avg": [ 163.0, 282.324281633727 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14908257968973945328&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "swu.edu.cn;;swu.edu.cn;", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Southwest University", "aff_unique_dep": "", "aff_unique_url": "https://www.swu.edu.cn", "aff_unique_abbr": "SWU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Noisy Dual Mirror Descent: A Near Optimal Algorithm for Jointly-DP Convex Resource Allocation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96542", "id": "6ArNmbMpKF", "proceeding": "", "pdf": "https://openreview.net/pdf?id=6ArNmbMpKF", "openreview": "https://openreview.net/forum?id=6ArNmbMpKF", "poster": "/media/PosterPDFs/NeurIPS%202024/96542.png?t=1731335289.9421048", "project": "", "author_site": "Du Chen, Geoffrey A. Chua", "tldr": "", "abstract": "We study convex resource allocation problems with $m$ hard constraints under $(\\varepsilon,\\delta)$-joint differential privacy (Joint-DP or JDP) in an offline setting. To approximately solve the problem, we propose a generic algorithm called Noisy Dual Mirror Descent. The algorithm applies noisy Mirror Descent to a dual problem from relaxing the hard constraints for private shadow prices, and then uses the shadow prices to coordinate allocations in the primal problem. Leveraging weak duality theory, we show that the optimality gap is upper bounded by $\\mathcal{O}(\\frac{\\sqrt{m\\ln(1/\\delta)}}{\\varepsilon})$, and constraint violation is no more than $\\mathcal{O}(\\frac{\\sqrt{m\\ln(1/\\delta)}}{\\varepsilon})$ per constraint. When strong duality holds, both preceding results can be improved to $\\widetilde{\\mathcal{O}}(\\frac{\\sqrt{\\ln(1/\\delta)}}{\\varepsilon})$ by better utilizing the geometric structure of the dual space, which is neglected by existing works. To complement our results under strong duality, we derive a minimax lower bound $\\Omega(\\frac{m}{\\varepsilon})$ for any JDP algorithm outputting feasible allocations. The lower bound matches our upper bounds up to some logarithmic factors for $\\varepsilon\\geq \\max(1, 1/(n\\gamma))$, where $n\\gamma$ is the available resource level. Numerical studies further confirm the effectiveness of our algorithm.", "keywords": "joint differential privacy;resource allocation;near optimal algorithm;mirror descent", "primary_area": "privacy", "supplementary_material": "/attachment/b2acf2fd4be0168fbb65ca109bc6fe51332cf82a.zip", "author": "Du Chen;Geoffrey A. Chua", "authorids": "~Du_Chen2;~Geoffrey_A._Chua1", "gender": "M;M", "homepage": "https://chendu2017.github.io/;https://dr.ntu.edu.sg/cris/rp/rp00581", "dblp": ";", "google_scholar": ";1r_iHt8AAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Du_Chen2;~Geoffrey_A._Chua1", "aff": "Nanyang Technological University;Nanyang Technological University", "aff_domain": "ntu.edu.sg;ntu.edu.sg", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nchen2024noisy,\ntitle={Noisy Dual Mirror Descent: A Near Optimal Algorithm for Jointly-{DP} Convex Resource Allocation},\nauthor={Du Chen and Geoffrey A. Chua},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=6ArNmbMpKF}\n}", "github": "", "reviewers": "YdYP;8BYy;kLe8", "pdf_size": 745689, "rating": "6;7;7", "confidence": "3;3;4", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "63;95;149", "wc_strengths": "60;78;26", "wc_weaknesses": "403;98;139", "wc_questions": "4;51;290", "wc_limitations": "34;10;1", "wc_review": "564;332;605", "wc_reply_reviewers": "0;26;255", "wc_reply_authors": "0;0;58", "reply_reviewers": "0;1;1", "reply_authors": "1;1;2", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 102.33333333333333, 35.490217744549774 ], "wc_strengths_avg": [ 54.666666666666664, 21.561282171728305 ], "wc_weaknesses_avg": [ 213.33333333333334, 135.1550549882944 ], "wc_questions_avg": [ 115.0, 125.2224686973814 ], "wc_limitations_avg": [ 15.0, 13.92838827718412 ], "wc_review_avg": [ 500.3333333333333, 120.20075799354272 ], "wc_reply_reviewers_avg": [ 93.66666666666667, 114.57263586427993 ], "wc_reply_authors_avg": [ 19.333333333333332, 27.34146220587984 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6638772528053667420&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "email": "ntu.edu.sg;ntu.edu.sg", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Nanyang Technological University", "aff_unique_dep": "", "aff_unique_url": "https://www.ntu.edu.sg", "aff_unique_abbr": "NTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Singapore" }, { "title": "Addressing Hidden Confounding with Heterogeneous Observational Datasets for Recommendation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96541", "id": "6CFHg7exjY", "proceeding": "", "pdf": "https://openreview.net/pdf?id=6CFHg7exjY", "openreview": "https://openreview.net/forum?id=6CFHg7exjY", "poster": "/media/PosterPDFs/NeurIPS%202024/96541.png?t=1731810372.6674485", "project": "", "author_site": "Yanghao Xiao, Haoxuan Li, Yongqiang Tang, Wensheng Zhang", "tldr": "", "abstract": "The collected data in recommender systems generally suffers selection bias. Considerable works are proposed to address selection bias induced by observed user and item features, but they fail when hidden features (e.g., user age or salary) that affect both user selection mechanism and feedback exist, which is called hidden confounding. To tackle this issue, methods based on sensitivity analysis and leveraging a few randomized controlled trial (RCT) data for model calibration are proposed. However, the former relies on strong assumptions of hidden confounding strength, whereas the latter relies on the expensive RCT data, thereby limiting their applicability in real-world scenarios. In this paper, we propose to employ heterogeneous observational data to address hidden confounding, wherein some data is subject to hidden confounding while the remaining is not. We argue that such setup is more aligned with practical scenarios, especially when some users do not have complete personal information (thus assumed with hidden confounding), while others do have (thus assumed without hidden confounding). To achieve unbiased learning, we propose a novel meta-learning based debiasing method called MetaDebias. This method explicitly models oracle error imputation and hidden confounding bias, and utilizes bi-level optimization for model training. Extensive experiments on three public datasets validate our method achieves state-of-the-art performance in the presence of hidden confounding, regardless of RCT data availability.", "keywords": "Recommender System;Causal Inference;Bias;Hidden Confounding", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Yanghao Xiao;Haoxuan Li;Yongqiang Tang;Wensheng Zhang", "authorids": "~Yanghao_Xiao1;~Haoxuan_Li6;~Yongqiang_Tang1;~Wensheng_Zhang5", "gender": ";M;;M", "homepage": ";https://haoxuanli-pku.github.io/;;https://people.ucas.ac.cn/~wenshengzhang", "dblp": "322/6462;145/4965-1.html;;94/6627-2.html/", "google_scholar": "hzfFzKUAAAAJ;gtDqiucAAAAJ;;", "orcid": "0000-0001-9929-4448;0000-0003-3620-3769;;0000-0003-0752-941X", "linkedin": ";;;", "or_profile": "~Yanghao_Xiao1;~Haoxuan_Li6;~Yongqiang_Tang1;~Wensheng_Zhang5", "aff": "University of Chinese Academy of Sciences;Peking University;;Guangzhou University", "aff_domain": "ucas.ac.cn;pku.edu.cn;;gzhu.edu.cn", "position": "PhD student;PhD student;;Full Professor", "bibtex": "@inproceedings{\nxiao2024addressing,\ntitle={Addressing Hidden Confounding with Heterogeneous Observational Datasets for Recommendation},\nauthor={Yanghao Xiao and Haoxuan Li and Yongqiang Tang and Wensheng Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=6CFHg7exjY}\n}", "github": "", "reviewers": "TTAY;Yf6Q;X846", "pdf_size": 0, "rating": "3;7;7", "confidence": "3;2;3", "soundness": "2;2;4", "novelty": "2;2;4", "presentation": "2;2;3", "wc_summary": "15;78;82", "wc_strengths": "49;22;41", "wc_weaknesses": "54;168;50", "wc_questions": "7;22;18", "wc_limitations": "1;1;1", "wc_review": "126;291;192", "wc_reply_reviewers": "0;20;51", "wc_reply_authors": "772;30;368", "reply_reviewers": "0;1;2", "reply_authors": "3;2;2", "rating_avg": [ 5.666666666666667, 1.8856180831641267 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.9428090415820634 ], "novelty_avg": [ 2.6666666666666665, 0.9428090415820634 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 58.333333333333336, 30.684777260973487 ], "wc_strengths_avg": [ 37.333333333333336, 11.323525167642018 ], "wc_weaknesses_avg": [ 90.66666666666667, 54.70730197047638 ], "wc_questions_avg": [ 15.666666666666666, 6.342099196813483 ], "wc_limitations_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 203.0, 67.80855403265875 ], "wc_reply_reviewers_avg": [ 23.666666666666668, 20.98147330914162 ], "wc_reply_authors_avg": [ 390.0, 303.319413600031 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4066063589766793091&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "ucas.ac.cn;pku.edu.cn;;gzhu.edu.cn", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Chinese Academy of Sciences;Peking University;Guangzhou University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.ucas.ac.cn;http://www.pku.edu.cn;http://www.gzhu.edu.cn", "aff_unique_abbr": "UCAS;Peking U;GU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Learning Discrete Latent Variable Structures with Tensor Rank Conditions", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96540", "id": "6EqFoqkLSW", "proceeding": "", "pdf": "https://openreview.net/pdf?id=6EqFoqkLSW", "openreview": "https://openreview.net/forum?id=6EqFoqkLSW", "poster": "/media/PosterPDFs/NeurIPS%202024/96540.png?t=1731224326.0911875", "project": "", "author_site": "Zhengming Chen, Ruichu Cai, Feng Xie, Jie Qiao, Anpeng Wu, Zijian Li, Zhifeng Hao, Kun Zhang", "tldr": "", "abstract": "Unobserved discrete data are ubiquitous in many scientific disciplines, and how to learn the causal structure of these latent variables is crucial for uncovering data patterns. Most studies focus on the linear latent variable model or impose strict constraints on latent structures, which fail to address cases in discrete data involving non-linear relationships or complex latent structures. To achieve this, we explore a tensor rank condition on contingency tables for an observed variable set $\\mathbf{X}_p$, showing that the rank is determined by the minimum support of a specific conditional set (not necessary in $\\mathbf{X}_p$) that d-separates all variables in $\\mathbf{X}_p$. By this, one can locate the latent variable through probing the rank on different observed variables set, and further identify the latent causal structure under some structure assumptions. We present the corresponding identification algorithm and conduct simulated experiments to verify the effectiveness of our method. In general, our results elegantly extend the identification boundary for causal discovery with discrete latent variables and expand the application scope of causal discovery with latent variables.", "keywords": "Causal Discovery;Identification;Latent variables", "primary_area": "causal_inference", "supplementary_material": "", "author": "Zhengming Chen;Ruichu Cai;Feng Xie;Jie Qiao;Anpeng Wu;Zijian Li;Zhifeng Hao;Kun Zhang", "authorids": "~Zhengming_Chen2;~Ruichu_Cai1;~Feng_Xie1;~Jie_Qiao1;~Anpeng_Wu1;~Zijian_Li1;~Zhifeng_Hao4;~Kun_Zhang1", "gender": "M;M;M;M;M;M;M;M", "homepage": "https://ruichucai.github.io/;https://fengxie.site/;;https://scholar.google.com.hk/citations?user=VQ4m6zQAAAAJ&hl=zh-CN&oi=sra;;https://www.stu.edu.cn/xxgk/dzld1/hzf.htm;http://www.andrew.cmu.edu/user/kunz1/;https://scholar.google.com/citations?user=QBUnTqkAAAAJ", "dblp": "09/6889;11/4605-2;00/7723;267/5637;27/10487;;96/3115-1;17/7724", "google_scholar": "https://scholar.google.com/citations?hl=en;stLFCtQAAAAJ;aCEp7f4AAAAJ;https://scholar.google.com.hk/citations?user=VQ4m6zQAAAAJ;j3ilESoAAAAJ;ZF3gp9wAAAAJ;RGoypN4AAAAJ;QBUnTqkAAAAJ", "orcid": ";0000-0001-7229-3955;0000-0002-4581-9656;0000-0003-3898-7122;;;;0000-0002-3839-5269", "linkedin": ";;;;;;;", "or_profile": "~Ruichu_Cai1;~Feng_Xie1;~Jie_Qiao1;~Anpeng_Wu1;~Zijian_Li1;~Zhifeng_Hao4;~Kun_Zhang1;~zhengming_Chen1", "aff": "Guangdong University of Technology;Beijing Technology and Business University;Guangdong University of Technology;Mohamed bin Zayed University of Artificial Intelligence;Mohamed bin Zayed University of Artificial Intelligence;Shantou University;Carnegie Mellon University;Guangdong University of Technology", "aff_domain": "gdut.edu.cn;btbu.edu.cn;gdut.edu.cn;mbzuai.ac.ae;mbzuai.ac.ae;stu.edu.cn;cmu.edu;gdut.edu.cn", "position": "Full Professor;Associate Professor;Postdoc;Researcher;Postdoc;Full Professor;Associate Professor;PhD student", "bibtex": "@inproceedings{\nchen2024learning,\ntitle={Learning Discrete Latent Variable Structures with Tensor Rank Conditions},\nauthor={Zhengming Chen and Ruichu Cai and Feng Xie and Jie Qiao and Anpeng Wu and Zijian Li and Zhifeng Hao and Kun Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=6EqFoqkLSW}\n}", "github": "", "reviewers": "tR91;Upr4;Snze;hs7U", "pdf_size": 944323, "rating": "4;6;6;8", "confidence": "4;2;3;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;2;2;3", "wc_summary": "132;40;124;109", "wc_strengths": "61;37;127;93", "wc_weaknesses": "178;29;151;131", "wc_questions": "11;36;844;8", "wc_limitations": "1;2;18;2", "wc_review": "383;144;1264;343", "wc_reply_reviewers": "1215;12;63;65", "wc_reply_authors": "1320;24;29;26", "reply_reviewers": "4;1;1;1", "reply_authors": "5;2;2;2", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 101.25, 36.31373707015019 ], "wc_strengths_avg": [ 79.5, 33.86369737639409 ], "wc_weaknesses_avg": [ 122.25, 56.362110499873936 ], "wc_questions_avg": [ 224.75, 357.68937292013584 ], "wc_limitations_avg": [ 5.75, 7.084313657652377 ], "wc_review_avg": [ 533.5, 431.35860951185384 ], "wc_reply_reviewers_avg": [ 338.75, 506.34887923249124 ], "wc_reply_authors_avg": [ 349.75, 560.1769251763232 ], "reply_reviewers_avg": [ 1.75, 1.299038105676658 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:p-Lnpox4O7UJ:scholar.google.com/&scioq=Learning+Discrete+Latent+Variable+Structures+with+Tensor+Rank+Conditions&hl=en&as_sdt=0,21", "gs_version_total": 5, "email": "gdut.edu.cn;btbu.edu.cn;gdut.edu.cn;mbzuai.ac.ae;mbzuai.ac.ae;stu.edu.cn;cmu.edu;gdut.edu.cn", "author_num": 8, "aff_unique_index": "0;1;0;2;2;3;4;0", "aff_unique_norm": "Guangdong University of Technology;Beijing Technology and Business University;Mohamed bin Zayed University of Artificial Intelligence;Shantou University;Carnegie Mellon University", "aff_unique_dep": ";;;;", "aff_unique_url": "http://www.gdut.edu.cn;http://www.btbu.edu.cn;https://mbzuai.ac.ae;https://www.stu.edu.cn;https://www.cmu.edu", "aff_unique_abbr": "GDUT;BTBU;MBZUAI;STU;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;1;0;2;0", "aff_country_unique": "China;United Arab Emirates;United States" }, { "title": "Policy Optimization for Robust Average Reward MDPs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96539", "id": "6FPZLnp1Zn", "proceeding": "", "pdf": "https://openreview.net/pdf?id=6FPZLnp1Zn", "openreview": "https://openreview.net/forum?id=6FPZLnp1Zn", "poster": "", "project": "", "author_site": "Zhongchang Sun, Sihong He, Fei Miao, Shaofeng Zou", "tldr": "", "abstract": "This paper studies first-order policy optimization for robust average cost Markov decision processes (MDPs). Specifically, we focus on ergodic Markov chains. For robust average cost MDPs, the goal is to optimize the worst-case average cost over an uncertainty set of transition kernels. We first develop a sub-gradient of the robust average cost. Based on the sub-gradient, a robust policy mirror descent approach is further proposed. To characterize its iteration complexity, we develop a lower bound on the difference of robust average cost between two policies and further show that the robust average cost satisfies the PL-condition. We then show that with increasing step size, our robust policy mirror descent achieves a linear convergence rate in the optimality gap, and with constant step size, our algorithm converges to an $\\epsilon$-optimal policy with an iteration complexity of $\\mathcal{O}(1/\\epsilon)$. The convergence rate of our algorithm matches with the best convergence rate of policy-based algorithms for robust MDPs. Moreover, our algorithm is the first algorithm that converges to the global optimum with general uncertainty sets for robust average cost MDPs. We provide simulation results to demonstrate the performance of our algorithm.", "keywords": "Reinforcement learning;average reward;model mismatch", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Zhongchang Sun;Sihong He;Fei Miao;Shaofeng Zou", "authorids": "~Zhongchang_Sun1;~Sihong_He1;~Fei_Miao1;~Shaofeng_Zou1", "gender": ";F;F;", "homepage": ";https://sihonghe.com/;http://www.feimiao.org;", "dblp": ";237/6086;143/6002;", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;-zSd9V0AAAAJ;fH2YF6YAAAAJ;", "orcid": ";;0000-0003-0066-4379;", "linkedin": ";;fei-miao-76964727/;", "or_profile": "~Zhongchang_Sun1;~Sihong_He1;~Fei_Miao1;~Shaofeng_Zou1", "aff": "State University of New York at Buffalo;University of Connecticut;University of Connecticut;", "aff_domain": "buffalo.edu;uconn.edu;uconn.edu;", "position": "PhD student;PhD student;Associate Professor;", "bibtex": "@inproceedings{\nsun2024policy,\ntitle={Policy Optimization for Robust Average Reward {MDP}s},\nauthor={Zhongchang Sun and Sihong He and Fei Miao and Shaofeng Zou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=6FPZLnp1Zn}\n}", "github": "", "reviewers": "H19Z;7myA;Ehp7;2EU7", "pdf_size": 485897, "rating": "6;6;6;7", "confidence": "4;3;5;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "61;66;158;44", "wc_strengths": "90;33;154;41", "wc_weaknesses": "35;103;478;410", "wc_questions": "235;75;153;309", "wc_limitations": "3;1;20;3", "wc_review": "424;278;963;807", "wc_reply_reviewers": "51;31;84;47", "wc_reply_authors": "50;16;0;21", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;1;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 82.25, 44.488060195967186 ], "wc_strengths_avg": [ 79.5, 48.23121395942673 ], "wc_weaknesses_avg": [ 256.5, 190.5577340335469 ], "wc_questions_avg": [ 193.0, 87.66983517721475 ], "wc_limitations_avg": [ 6.75, 7.693341276714559 ], "wc_review_avg": [ 618.0, 277.480629954597 ], "wc_reply_reviewers_avg": [ 53.25, 19.266226926930972 ], "wc_reply_authors_avg": [ 21.75, 18.06066167115701 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13275731211401722228&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "buffalo.edu;uconn.edu;uconn.edu;", "author_num": 4, "aff_unique_index": "0;1;1", "aff_unique_norm": "State University of New York at Buffalo;University of Connecticut", "aff_unique_dep": ";", "aff_unique_url": "https://www.buffalo.edu;https://www.uconn.edu", "aff_unique_abbr": "SUNY Buffalo;UConn", "aff_campus_unique_index": "0", "aff_campus_unique": "Buffalo;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "MotionGS: Exploring Explicit Motion Guidance for Deformable 3D Gaussian Splatting", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96538", "id": "6FTlHaxCpR", "proceeding": "", "pdf": "https://openreview.net/pdf?id=6FTlHaxCpR", "openreview": "https://openreview.net/forum?id=6FTlHaxCpR", "poster": "/media/PosterPDFs/NeurIPS%202024/96538.png?t=1733637143.2521737", "project": "", "author_site": "Ruijie Zhu, Yanzhe Liang, Hanzhi Chang, Jiacheng Deng, Jiahao Lu, Wenfei Yang, Tianzhu Zhang, Yongdong Zhang", "tldr": "", "abstract": "Dynamic scene reconstruction is a long-term challenge in the field of 3D vision. Recently, the emergence of 3D Gaussian Splatting has provided new insights into this problem. Although subsequent efforts rapidly extend static 3D Gaussian to dynamic scenes, they often lack explicit constraints on object motion, leading to optimization difficulties and performance degradation. To address the above issues, we propose a novel deformable 3D Gaussian splatting framework called MotionGS, which explores explicit motion priors to guide the deformation of 3D Gaussians. Specifically, we first introduce an optical flow decoupling module that decouples optical flow into camera flow and motion flow, corresponding to camera movement and object motion respectively. Then the motion flow can effectively constrain the deformation of 3D Gaussians, thus simulating the motion of dynamic objects. Additionally, a camera pose refinement module is proposed to alternately optimize 3D Gaussians and camera poses, mitigating the impact of inaccurate camera poses. Extensive experiments in the monocular dynamic scenes validate that MotionGS surpasses state-of-the-art methods and exhibits significant superiority in both qualitative and quantitative results. Project page: https://ruijiezhu94.github.io/MotionGS_page.", "keywords": "3D Gaussian;Novel View Synthesis;Optical flow;Dynamic scene", "primary_area": "machine_vision", "supplementary_material": "", "author": "Ruijie Zhu;Yanzhe Liang;Hanzhi Chang;Jiacheng Deng;Jiahao Lu;Wenfei Yang;Tianzhu Zhang;Yongdong Zhang", "authorids": "~Ruijie_Zhu2;~Yanzhe_Liang1;~Hanzhi_Chang1;~Jiacheng_Deng2;~Jiahao_Lu5;~Wenfei_Yang2;~Tianzhu_Zhang1;~Yongdong_Zhang2", "gender": "M;M;M;M;M;M;M;M", "homepage": "https://ruijiezhu94.github.io/ruijiezhu/;https://github.com/Rosetta-Leong;https://github.com/HanzhiChang;https://github.com/JiachengDeng;https://github.com/peoplelu;;https://scholar.google.com/citations?user=9sCGe-gAAAAJ&hl=zh-CN;https://imcc.ustc.edu.cn/_upload/tpl/0d/13/3347/template3347/zhangyongdong.html", "dblp": "194/6909-2;;345/6013.html;320/4938;;;;z/YongdongZhang", "google_scholar": "6uuAEdkAAAAJ;;;https://scholar.google.cz/citations?hl=zh-CN;cRpteW4AAAAJ;rtO5VmQAAAAJ;9sCGe-gAAAAJ;https://scholar.google.com.hk/citations?user=hxGs4ukAAAAJ", "orcid": "0000-0001-6092-0712;;;0000-0003-2838-0378;;;;0000-0003-0066-3448", "linkedin": ";yanzhe-liang-1a703020b/;;https://linkedin.com/in/jiacheng-deng-0843a3280;;;;", "or_profile": "~Ruijie_Zhu2;~Yanzhe_Liang1;~Hanzhi_Chang1;~Jiacheng_Deng2;~Jiahao_Lu5;~Wenfei_Yang2;~Tianzhu_Zhang1;~Yongdong_Zhang2", "aff": "University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China", "aff_domain": "ustc.edu.cn;mail.ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn", "position": "MS student;MS student;MS student;PhD student;MS student;Postdoc;Full Professor;Full Professor", "bibtex": "@inproceedings{\nzhu2024motiongs,\ntitle={Motion{GS}: Exploring Explicit Motion Guidance for Deformable 3D Gaussian Splatting},\nauthor={Ruijie Zhu and Yanzhe Liang and Hanzhi Chang and Jiacheng Deng and Jiahao Lu and Wenfei Yang and Tianzhu Zhang and Yongdong Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=6FTlHaxCpR}\n}", "github": "", "reviewers": "pL6e;4hTv;2xXk;rwSL", "pdf_size": 4472715, "rating": "6;6;7;7", "confidence": "4;5;5;4", "soundness": "3;3;3;3", "novelty": "3;3;4;3", "presentation": "3;3;4;3", "wc_summary": "60;201;54;110", "wc_strengths": "15;83;110;135", "wc_weaknesses": "37;195;5;142", "wc_questions": "37;98;83;41", "wc_limitations": "8;66;24;4", "wc_review": "157;643;276;432", "wc_reply_reviewers": "41;163;13;34", "wc_reply_authors": "83;253;59;76", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 106.25, 58.86584323697402 ], "wc_strengths_avg": [ 85.75, 44.7960656754586 ], "wc_weaknesses_avg": [ 94.75, 76.929756921493 ], "wc_questions_avg": [ 64.75, 26.328454189336675 ], "wc_limitations_avg": [ 25.5, 24.550967394381836 ], "wc_review_avg": [ 377.0, 181.92168644776797 ], "wc_reply_reviewers_avg": [ 62.75, 58.789348525051714 ], "wc_reply_authors_avg": [ 117.75, 78.57281654618218 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14717137284560027424&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "ustc.edu.cn;mail.ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn", "author_num": 8, "aff_unique_index": "0;0;0;0;0;0;0;0", "aff_unique_norm": "University of Science and Technology of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ustc.edu.cn", "aff_unique_abbr": "USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Active Perception for Grasp Detection via Neural Graspness Field", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96537", "id": "6FYh6gxzPf", "proceeding": "", "pdf": "https://openreview.net/pdf?id=6FYh6gxzPf", "openreview": "https://openreview.net/forum?id=6FYh6gxzPf", "poster": "/media/PosterPDFs/NeurIPS%202024/96537.png?t=1731746884.3205202", "project": "", "author_site": "Haoxiang Ma, Modi Shi, Boyang Gao, Di Huang", "tldr": "", "abstract": "This paper tackles the challenge of active perception for robotic grasp detection in cluttered environments. Incomplete 3D geometry information can negatively affect the performance of learning-based grasp detection methods, and scanning the scene from multiple views introduces significant time costs. To achieve reliable grasping performance with efficient camera movement, we propose an active grasp detection framework based on the Neural Graspness Field (NGF), which models the scene incrementally and facilitates next-best-view planning. Constructed in real-time as the camera moves, the NGF effectively models the grasp distribution in 3D space by rendering graspness predictions from each view. For next-best-view planning, we aim to reduce the uncertainty of the NGF through a graspness inconsistency-guided policy, selecting views based on discrepancies between NGF outputs and a pre-trained graspness network. Additionally, we present a neural graspness sampling method that decodes graspness values from the NGF to improve grasp pose detection results. Extensive experiments on the GraspNet-1Billion benchmark demonstrate significant performance improvements compared to previous works. Real-world experiments show that our method achieves a superior trade-off between grasping performance and time costs.", "keywords": "Robotic Grasping;Active Perception", "primary_area": "robotics", "supplementary_material": "", "author": "Haoxiang Ma;Modi Shi;Boyang Gao;Di Huang", "authorids": "~Haoxiang_Ma1;~Modi_Shi1;~Boyang_Gao1;~Di_Huang4", "gender": ";M;;M", "homepage": "http://mahaoxiang822.github.io/;https://github.com/ModiShi;;http://irip.buaa.edu.cn/dihuang/index.html", "dblp": ";;;45/780-1", "google_scholar": "RC0U_o0AAAAJ;;;https://scholar.google.com/citations?hl=en", "orcid": ";;;0000-0002-2412-9330", "linkedin": ";;;", "or_profile": "~Haoxiang_Ma1;~Modi_Shi1;~Boyang_Gao1;~Di_Huang4", "aff": "Beihang University;Beihang University;;Beihang University", "aff_domain": "buaa.edu.cn;buaa.edu.cn;;buaa.edu.cn", "position": "PhD student;PhD student;;Full Professor", "bibtex": "@inproceedings{\nma2024active,\ntitle={Active Perception for Grasp Detection via Neural Graspness Field},\nauthor={Haoxiang Ma and Modi Shi and Boyang Gao and Di Huang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=6FYh6gxzPf}\n}", "github": "", "reviewers": "DbvE;4AgP;peVt;DQft", "pdf_size": 11979593, "rating": "5;5;5;7", "confidence": "4;5;4;3", "soundness": "2;3;2;3", "novelty": "2;3;2;3", "presentation": "3;3;2;3", "wc_summary": "80;83;93;69", "wc_strengths": "48;68;82;47", "wc_weaknesses": "115;463;178;24", "wc_questions": "50;2;85;36", "wc_limitations": "19;12;4;16", "wc_review": "312;628;442;192", "wc_reply_reviewers": "19;41;141;30", "wc_reply_authors": "18;36;588;22", "reply_reviewers": "1;1;2;1", "reply_authors": "2;2;4;2", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 81.25, 8.554969316134336 ], "wc_strengths_avg": [ 61.25, 14.618053906043718 ], "wc_weaknesses_avg": [ 195.0, 164.12952202452794 ], "wc_questions_avg": [ 43.25, 29.76050234791073 ], "wc_limitations_avg": [ 12.75, 5.629165124598851 ], "wc_review_avg": [ 393.5, 161.69956709898761 ], "wc_reply_reviewers_avg": [ 57.75, 48.68970630431036 ], "wc_reply_authors_avg": [ 166.0, 243.73346097735535 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7799770374760699216&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 2, "email": "buaa.edu.cn;buaa.edu.cn;;buaa.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Beihang University", "aff_unique_dep": "", "aff_unique_url": "http://www.buaa.edu.cn/", "aff_unique_abbr": "BUAA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Open-Book Neural Algorithmic Reasoning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96536", "id": "6HO33urpaI", "proceeding": "", "pdf": "https://openreview.net/pdf?id=6HO33urpaI", "openreview": "https://openreview.net/forum?id=6HO33urpaI", "poster": "/media/PosterPDFs/NeurIPS%202024/96536.png?t=1731684306.3168254", "project": "", "author_site": "Hefei Li, Peng Chao, Chenyang Xu, Zhengfeng Yang", "tldr": "", "abstract": "Neural algorithmic reasoning is an emerging area of machine learning that focuses on building neural networks capable of solving complex algorithmic tasks. Recent advancements predominantly follow the standard supervised learning paradigm -- feeding an individual problem instance into the network each time and training it to approximate the execution steps of a classical algorithm. We challenge this mode and propose a novel open-book learning framework. In this framework, whether during training or testing, the network can access and utilize all instances in the training dataset when reasoning for a given instance.\n\nEmpirical evaluation is conducted on the challenging CLRS Algorithmic Reasoning Benchmark, which consists of 30 diverse algorithmic tasks. Our open-book learning framework exhibits a significant enhancement in neural reasoning capabilities. Further, we notice that there is recent literature suggesting that multi-task training on CLRS can improve the reasoning accuracy of certain tasks, implying intrinsic connections between different algorithmic tasks. We delve into this direction via the open-book framework. When the network reasons for a specific task, we enable it to aggregate information from training instances of other tasks in an attention-based manner. We show that this open-book attention mechanism offers insights into the inherent relationships among various tasks in the benchmark and provides a robust tool for interpretable multi-task training.", "keywords": "Neural Algorithmic Reasoning;Graph Neural Network", "primary_area": "graph_neural_networks", "supplementary_material": "/attachment/79d41daac9a097678aed6f03b644f58487c54132.zip", "author": "Hefei Li;Chao Peng;Chenyang Xu;Zhengfeng Yang", "authorids": "~Hefei_Li1;~Chao_Peng6;~Chenyang_Xu3;~Zhengfeng_Yang1", "gender": "M;;M;", "homepage": "https://github.com/Hoferlee1;;;", "dblp": ";;82/5658-2;", "google_scholar": ";;https://scholar.google.com/citations?hl=zh-CN;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Hefei_Li1;~Chao_Peng6;~Chenyang_Xu3;~Zhengfeng_Yang1", "aff": "East China Normal University;;East China Normal University;", "aff_domain": "ecnu.edu.cn;;ecnu.edu.cn;", "position": "MS student;;Assistant Professor;", "bibtex": "@inproceedings{\nli2024openbook,\ntitle={Open-Book Neural Algorithmic Reasoning},\nauthor={Hefei Li and Chao Peng and Chenyang Xu and Zhengfeng Yang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=6HO33urpaI}\n}", "github": "", "reviewers": "pXFF;8uzp;m7Ww", "pdf_size": 488863, "rating": "6;6;6", "confidence": "2;2;4", "soundness": "4;3;3", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "38;100;64", "wc_strengths": "76;72;29", "wc_weaknesses": "38;240;248", "wc_questions": "13;4;48", "wc_limitations": "18;16;13", "wc_review": "183;432;402", "wc_reply_reviewers": "0;24;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;1;0", "reply_authors": "1;1;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 2.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 67.33333333333333, 25.42090128658349 ], "wc_strengths_avg": [ 59.0, 21.275964529643932 ], "wc_weaknesses_avg": [ 175.33333333333334, 97.16423667630436 ], "wc_questions_avg": [ 21.666666666666668, 18.979521127315678 ], "wc_limitations_avg": [ 15.666666666666666, 2.0548046676563256 ], "wc_review_avg": [ 339.0, 110.98648566379602 ], "wc_reply_reviewers_avg": [ 8.0, 11.313708498984761 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Wp-brm_l1yUJ:scholar.google.com/&scioq=Open-Book+Neural+Algorithmic+Reasoning&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "ecnu.edu.cn;;ecnu.edu.cn;", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "East China Normal University", "aff_unique_dep": "", "aff_unique_url": "http://www.ecnu.edu.cn", "aff_unique_abbr": "ECNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Separations in the Representational Capabilities of Transformers and Recurrent Architectures", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96535", "id": "6HUJoD3wTj", "proceeding": "", "pdf": "https://openreview.net/pdf?id=6HUJoD3wTj", "openreview": "https://openreview.net/forum?id=6HUJoD3wTj", "poster": "", "project": "", "author_site": "Satwik Bhattamishra, Michael Hahn, Phil Blunsom, Varun Kanade", "tldr": "", "abstract": "Transformer architectures have been widely adopted in foundation models. Due to their high inference costs, there is renewed interest in exploring the potential of efficient recurrent architectures (RNNs). In this paper, we analyze the differences in the representational capabilities of Transformers and RNNs across several tasks of practical relevance, including index lookup, nearest neighbor, recognizing bounded Dyck languages, and string equality. For the tasks considered, our results show separations based on the size of the model required for different architectures. For example, we show that a one-layer Transformer of logarithmic width can perform index lookup, whereas an RNN requires a hidden state of linear size. Conversely, while constant-size RNNs can recognize bounded Dyck languages, we show that one-layer Transformers require a linear size for this task. Furthermore, we show that two-layer Transformers of logarithmic size can perform decision tasks such as string equality or disjointness, whereas both one-layer Transformers and recurrent models require linear size for these tasks. We also show that a log-size two-layer Transformer can implement the nearest neighbor algorithm in its forward pass; on the other hand recurrent models require linear size. Our constructions are based on the existence of $N$ nearly orthogonal vectors in $O(\\log N)$ dimensional space and our lower bounds are based on reductions from communication complexity problems. We supplement our theoretical results with experiments that highlight the differences in the performance of these architectures on practical-size sequences.", "keywords": "expressivity;Transformers;RNNs;deep learning theory;communication complexity", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/77ca2f9bda2715b9c0edc72737568691407f05f0.zip", "author": "Satwik Bhattamishra;Michael Hahn;Phil Blunsom;Varun Kanade", "authorids": "~Satwik_Bhattamishra1;~Michael_Hahn1;~Phil_Blunsom1;~Varun_Kanade1", "gender": "M;M;;M", "homepage": "https://satwikb.com/;https://www.mhahn.info/;;", "dblp": "242/4259;https://dblp.uni-trier.de/pid/44/9903;96/4705;31/6692", "google_scholar": "https://scholar.google.com/citations?hl=en;;https://scholar.google.co.uk/citations?user=eJwbbXEAAAAJ;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Satwik_Bhattamishra1;~Michael_Hahn1;~Phil_Blunsom1;~Varun_Kanade1", "aff": "University of Oxford;Universit\u00e4t des Saarlandes;Department of Computer Science, University of Oxford;University of Oxford", "aff_domain": "ox.ac.uk;uni-saarland.de;cs.ox.ac.uk;ox.ac.uk", "position": "PhD student;Assistant Professor;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nbhattamishra2024separations,\ntitle={Separations in the Representational Capabilities of Transformers and Recurrent Architectures},\nauthor={Satwik Bhattamishra and Michael Hahn and Phil Blunsom and Varun Kanade},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=6HUJoD3wTj}\n}", "github": "", "reviewers": "BJPt;hDB9;LT9S;FtyK", "pdf_size": 1191159, "rating": "7;7;7;8", "confidence": "3;4;4;4", "soundness": "3;4;4;4", "novelty": "3;2;3;4", "presentation": "4;3;4;3", "wc_summary": "89;86;60;134", "wc_strengths": "70;104;108;138", "wc_weaknesses": "20;209;202;367", "wc_questions": "58;52;20;8", "wc_limitations": "7;46;1;8", "wc_review": "244;497;391;655", "wc_reply_reviewers": "14;39;18;18", "wc_reply_authors": "0;51;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 92.25, 26.61179249881526 ], "wc_strengths_avg": [ 105.0, 24.1039415863879 ], "wc_weaknesses_avg": [ 199.5, 122.85458884388487 ], "wc_questions_avg": [ 34.5, 21.041625412500814 ], "wc_limitations_avg": [ 15.5, 17.811513130556875 ], "wc_review_avg": [ 446.75, 150.0905976402253 ], "wc_reply_reviewers_avg": [ 22.25, 9.807522622966516 ], "wc_reply_authors_avg": [ 12.75, 22.083647796503186 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4977085087230618107&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "ox.ac.uk;uni-saarland.de;cs.ox.ac.uk;ox.ac.uk", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Oxford;Universit\u00e4t des Saarlandes", "aff_unique_dep": ";", "aff_unique_url": "https://www.ox.ac.uk;https://www.uni-saarland.de", "aff_unique_abbr": "Oxford;UDS", "aff_campus_unique_index": "1", "aff_campus_unique": ";Oxford", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United Kingdom;Germany" }, { "title": "IMAGPose: A Unified Conditional Framework for Pose-Guided Person Generation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96534", "id": "6IyYa4gETN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=6IyYa4gETN", "openreview": "https://openreview.net/forum?id=6IyYa4gETN", "poster": "/media/PosterPDFs/NeurIPS%202024/96534.png?t=1731663854.3894012", "project": "", "author_site": "Fei Shen, Jinhui Tang", "tldr": "", "abstract": "Diffusion models represent a promising avenue for image generation, having demonstrated competitive performance in pose-guided person image generation. \nHowever, existing methods are limited to generating target images from a source image and a target pose, overlooking two critical user scenarios: generating multiple target images with different poses simultaneously and generating target images from multi-view source images.\nTo overcome these limitations, we propose IMAGPose, a unified conditional framework for pose-guided image generation, which incorporates three pivotal modules: a feature-level conditioning (FLC) module, an image-level conditioning (ILC) module, and a cross-view attention (CVA) module. \nFirstly, the FLC module combines the low-level texture feature from the VAE encoder with the high-level semantic feature from the image encoder, addressing the issue of missing detail information due to the absence of a dedicated person image feature extractor. \nThen, the ILC module achieves an alignment of images and poses to adapt to flexible and diverse user scenarios by injecting a variable number of source image conditions and introducing a masking strategy.\nFinally, the CVA module introduces decomposing global and local cross-attention, ensuring local fidelity and global consistency of the person image when multiple source image prompts. \nThe three modules of IMAGPose work together to unify the task of person image generation under various user scenarios.\nExtensive experiment results demonstrate the consistency and photorealism of our proposed IMAGPose under challenging user scenarios. \nThe code and model will be available at https://github.com/muzishen/IMAGPose.", "keywords": "Pose-Guided Person Image Generation;Diffusion Model;Consistency Generation", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Fei Shen;Jinhui Tang", "authorids": "~Fei_Shen1;~Jinhui_Tang1", "gender": ";M", "homepage": ";https://imag-njust.net/jinhui-tang/", "dblp": ";75/1030", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "~Fei_Shen1;~Jinhui_Tang1", "aff": ";Nanjing University of Science and Technology", "aff_domain": ";njust.edu.cn", "position": ";Full Professor", "bibtex": "@inproceedings{\nshen2024imagpose,\ntitle={{IMAGP}ose: A Unified Conditional Framework for Pose-Guided Person Generation},\nauthor={Fei Shen and Jinhui Tang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=6IyYa4gETN}\n}", "github": "", "reviewers": "Cm6N;1dhK;rV96;QBtt", "pdf_size": 2807530, "rating": "4;4;8;8", "confidence": "4;4;5;4", "soundness": "2;3;3;3", "novelty": "2;2;4;4", "presentation": "1;3;3;4", "wc_summary": "103;145;63;52", "wc_strengths": "39;39;128;84", "wc_weaknesses": "291;99;81;38", "wc_questions": "63;235;66;237", "wc_limitations": "11;229;14;14", "wc_review": "507;747;352;425", "wc_reply_reviewers": "187;0;133;66", "wc_reply_authors": "752;341;139;64", "reply_reviewers": "1;0;1;1", "reply_authors": "4;4;3;2", "rating_avg": [ 6.0, 2.0 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 1.0 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 90.75, 36.62222685747004 ], "wc_strengths_avg": [ 72.5, 36.93575503492517 ], "wc_weaknesses_avg": [ 127.25, 97.10400352199697 ], "wc_questions_avg": [ 150.25, 85.75947469521954 ], "wc_limitations_avg": [ 67.0, 93.53876201874814 ], "wc_review_avg": [ 507.75, 148.61590594549426 ], "wc_reply_reviewers_avg": [ 96.5, 70.29402535066548 ], "wc_reply_authors_avg": [ 324.0, 267.0664711265718 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.25, 0.82915619758885 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 63, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10263417645550500919&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": ";njust.edu.cn", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Nanjing University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.nust.edu.cn/", "aff_unique_abbr": "NUST", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "title": "Compositional Automata Embeddings for Goal-Conditioned Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96533", "id": "6KDZHgrDhG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=6KDZHgrDhG", "openreview": "https://openreview.net/forum?id=6KDZHgrDhG", "poster": "/media/PosterPDFs/NeurIPS%202024/96533.png?t=1731646127.3456237", "project": "", "author_site": "Beyazit Yalcinkaya, Niklas Lauffer, Marcell Vazquez-Chanlatte, Sanjit Seshia", "tldr": "", "abstract": "Goal-conditioned reinforcement learning is a powerful way to control an AI agent's behavior at runtime. That said, popular goal representations, e.g., target states or natural language, are either limited to Markovian tasks or rely on ambiguous task semantics. We propose representing temporal goals using compositions of deterministic finite automata (cDFAs) and use cDFAs to guide RL agents. cDFAs balance the need for formal temporal semantics with ease of interpretation: if one can understand a flow chart, one can understand a cDFA. On the other hand, cDFAs form a countably infinite concept class with Boolean semantics, and subtle changes to the automaton can result in very different tasks, making them difficult to condition agent behavior on. To address this, we observe that all paths through a DFA correspond to a series of reach-avoid tasks and propose pre-training graph neural network embeddings on \"reach-avoid derived\" DFAs. Through empirical evaluation, we demonstrate that the proposed pre-training method enables zero-shot generalization to various cDFA task classes and accelerated policy specialization without the myopic suboptimality of hierarchical methods.", "keywords": "reinforcement learning;goal-conditioned reinforcement learning;formal methods;graph embeddings;representation learning", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Beyazit Yalcinkaya;Niklas Lauffer;Marcell Vazquez-Chanlatte;Sanjit A. Seshia", "authorids": "~Beyazit_Yalcinkaya1;~Niklas_Lauffer1;~Marcell_Vazquez-Chanlatte1;~Sanjit_A._Seshia1", "gender": "M;M;Marcell;", "homepage": "https://people.eecs.berkeley.edu/~beyazit/;https://niklaslauffer.github.io/;https://mjvc.me;", "dblp": "241/1049;;192/1518;", "google_scholar": "tbhLhswAAAAJ;;3WoCRqAAAAAJ;", "orcid": "0000-0001-9987-635X;;;", "linkedin": "beyazityalcinkaya;;;", "or_profile": "~Beyazit_Yalcinkaya1;~Niklas_Lauffer1;~Marcell_Vazquez-Chanlatte1;~Sanjit_A._Seshia1", "aff": "University of California, Berkeley;University of California, Berkeley;Nissan;", "aff_domain": "berkeley.edu;berkeley.edu;nissan-usa.com;", "position": "PhD student;PhD student;Researcher;", "bibtex": "@inproceedings{\nyalcinkaya2024compositional,\ntitle={Compositional Automata Embeddings for Goal-Conditioned Reinforcement Learning},\nauthor={Beyazit Yalcinkaya and Niklas Lauffer and Marcell Vazquez-Chanlatte and Sanjit A. Seshia},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=6KDZHgrDhG}\n}", "github": "", "reviewers": "7HER;A6oy;Ww4e;bN73", "pdf_size": 5242542, "rating": "5;5;6;8", "confidence": "4;5;4;4", "soundness": "2;2;3;4", "novelty": "2;2;2;4", "presentation": "2;2;3;4", "wc_summary": "71;161;133;199", "wc_strengths": "41;290;264;245", "wc_weaknesses": "130;1521;265;318", "wc_questions": "174;6;69;172", "wc_limitations": "10;195;10;37", "wc_review": "426;2173;741;971", "wc_reply_reviewers": "94;403;493;50", "wc_reply_authors": "68;0;207;44", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;2;2", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 141.0, 46.71188285650665 ], "wc_strengths_avg": [ 210.0, 98.87112824277874 ], "wc_weaknesses_avg": [ 558.5, 559.9109304166154 ], "wc_questions_avg": [ 105.25, 71.3210172389598 ], "wc_limitations_avg": [ 63.0, 77.00324668479895 ], "wc_review_avg": [ 1077.75, 661.2765590129443 ], "wc_reply_reviewers_avg": [ 260.0, 191.30734434412076 ], "wc_reply_authors_avg": [ 79.75, 77.40922102695518 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.4714045207910316, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13681963595146136032&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "berkeley.edu;berkeley.edu;nissan-usa.com;", "author_num": 4, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of California, Berkeley;Nissan Motor Corporation", "aff_unique_dep": ";", "aff_unique_url": "https://www.berkeley.edu;https://www.nissan-global.com", "aff_unique_abbr": "UC Berkeley;Nissan", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;Japan" }, { "title": "Fair and Welfare-Efficient Constrained Multi-Matchings under Uncertainty", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96532", "id": "6KThdqFgmA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=6KThdqFgmA", "openreview": "https://openreview.net/forum?id=6KThdqFgmA", "poster": "", "project": "", "author_site": "Elita Lobo, Justin Payan, Cyrus Cousins, Yair Zick", "tldr": "", "abstract": "We study fair allocation of constrained resources, where a market designer optimizes overall welfare while maintaining group fairness. In many large-scale settings, utilities are not known in advance, but are instead observed after realizing the allocation. We therefore estimate agent utilities using machine learning. Optimizing over estimates requires trading-off between mean utilities and their predictive variances. We discuss these trade-offs under two paradigms for preference modeling \u2013 in the stochastic optimization regime, the market designer has access to a probability distribution over utilities, and in the robust optimization regime they have access to an uncertainty set containing the true utilities with high probability. We discuss utilitarian and egalitarian welfare objectives, and we explore how to optimize for them under stochastic and robust paradigms. We demonstrate the efficacy of our approaches on three publicly available conference reviewer assignment datasets. The approaches presented enable scalable constrained resource allocation under uncertainty for many combinations of objectives and preference models.", "keywords": "Resource allocation;robust optimization;CVaR;constrained allocation", "primary_area": "fairness", "supplementary_material": "/attachment/4be833bc841ef8cbaa4bb58c1a29e36bda2fb7bf.zip", "author": "Elita Lobo;Justin Payan;Cyrus Cousins;Yair Zick", "authorids": "~Elita_Lobo1;~Justin_Payan1;~Cyrus_Cousins1;~Yair_Zick1", "gender": "F;M;;M", "homepage": "https://elitalobo.github.io;http://justinpayan.github.io;https://www.cyruscousins.online/;https://people.cs.umass.edu/~yzick/", "dblp": ";289/0987;202/6684;90/9924", "google_scholar": ";gc9w_eYAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=m0PW6DQAAAAJ", "orcid": ";0000-0001-7601-3500;0000-0002-1691-0282;0000-0002-0635-6230", "linkedin": ";justin-payan-028b43a0;;", "or_profile": "~Elita_Lobo1;~Justin_Payan1;~Cyrus_Cousins1;~Yair_Zick1", "aff": "University of New Hampshire;University of Massachusetts Amherst;University of Massachusetts Amherst;University of Massachusetts, Amherst", "aff_domain": "wildcats.unh.edu;umass.edu;umass.edu;umass.edu", "position": "PhD student;PhD student;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nlobo2024fair,\ntitle={Fair and Welfare-Efficient Constrained Multi-Matchings under Uncertainty},\nauthor={Elita Lobo and Justin Payan and Cyrus Cousins and Yair Zick},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=6KThdqFgmA}\n}", "github": "", "reviewers": "hqEo;ZdA8;UQiJ;69cR;5Awd", "pdf_size": 692026, "rating": "4;6;7;7;7", "confidence": "2;1;4;2;4", "soundness": "3;3;4;3;3", "novelty": "2;2;3;3;3", "presentation": "2;2;3;3;3", "wc_summary": "24;175;502;186;142", "wc_strengths": "23;47;176;64;94", "wc_weaknesses": "280;57;230;59;164", "wc_questions": "3;53;526;6;144", "wc_limitations": "4;1;68;6;69", "wc_review": "334;333;1502;321;613", "wc_reply_reviewers": "0;0;34;55;211", "wc_reply_authors": "0;0;26;20;1070", "reply_reviewers": "0;0;1;1;2", "reply_authors": "1;1;2;2;4", "rating_avg": [ 6.2, 1.16619037896906 ], "confidence_avg": [ 2.6, 1.2000000000000002 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 205.8, 158.8690026405403 ], "wc_strengths_avg": [ 80.8, 52.920317459365265 ], "wc_weaknesses_avg": [ 158.0, 89.5611522927212 ], "wc_questions_avg": [ 146.4, 196.5203297371547 ], "wc_limitations_avg": [ 29.6, 31.80314449861837 ], "wc_review_avg": [ 620.6, 454.21078807091317 ], "wc_reply_reviewers_avg": [ 60.0, 78.36070443787499 ], "wc_reply_authors_avg": [ 223.2, 423.52917254895203 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 2.0, 1.0954451150103321 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.485912657903775, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:_-Z6rRCF3UsJ:scholar.google.com/&scioq=Fair+and+Welfare-Efficient+Constrained+Multi-Matchings+under+Uncertainty&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "wildcats.unh.edu;umass.edu;umass.edu;umass.edu", "author_num": 4, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "University of New Hampshire;University of Massachusetts Amherst", "aff_unique_dep": ";", "aff_unique_url": "https://www.unh.edu;https://www.umass.edu", "aff_unique_abbr": "UNH;UMass Amherst", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Amherst", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "ROIDICE: Offline Return on Investment Maximization for Efficient Decision Making", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96531", "id": "6Kg26g1quR", "proceeding": "", "pdf": "https://openreview.net/pdf?id=6Kg26g1quR", "openreview": "https://openreview.net/forum?id=6Kg26g1quR", "poster": "/media/PosterPDFs/NeurIPS%202024/96531.png?t=1731330302.8396628", "project": "", "author_site": "Woosung Kim, Hayeong Lee, Jongmin Lee, Byung-Jun Lee", "tldr": "", "abstract": "In this paper, we propose a novel policy optimization framework that maximizes Return on Investment (ROI) of a policy using a fixed dataset within a Markov Decision Process (MDP) equipped with a cost function. ROI, defined as the ratio between the return and the accumulated cost of a policy, serves as a measure of efficiency of the policy. Despite the importance of maximizing ROI in various applications, it remains a challenging problem due to its nature as a ratio of two long-term values: return and accumulated cost. To address this, we formulate the ROI maximizing reinforcement learning problem as a linear fractional programming. We then incorporate the stationary distribution correction (DICE) framework to develop a practical offline ROI maximization algorithm.\nOur proposed algorithm, ROIDICE, yields an efficient policy that offers a superior trade-off between return and accumulated cost compared to policies trained using existing frameworks.", "keywords": "Reinforcement Learning;Convex Optimization", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Woosung Kim;Hayeong Lee;Jongmin Lee;Byung-Jun Lee", "authorids": "~Woosung_Kim2;~Hayeong_Lee1;~Jongmin_Lee1;~Byung-Jun_Lee1", "gender": "M;;M;M", "homepage": ";https://hy-kiera.gitbook.io/hayeong/;https://www.jmlee.kr;https://dmlab.korea.ac.kr/professor.html", "dblp": "47/2886;341/8290;68/222-4.html;130/1678-1", "google_scholar": "wAQHWEkAAAAJ;;https://scholar.google.co.kr/citations?user=rFcK8EEAAAAJ;FwoohI4AAAAJ", "orcid": ";;;", "linkedin": "woosung-%E2%80%8Dkim-32577a21b?lipi=urn%3Ali%3Apage%3Ad_flagship3_profile_view_base_contact_details%3BfgmLtEHZQ2mku9F6jjpyuA%3D%3D;;jmlee123/;", "or_profile": "~Woosung_Kim2;~Hayeong_Lee1;~Jongmin_Lee1;~Byung-Jun_Lee1", "aff": "Korea University;Korea University;University of California, Berkeley;Gauss Labs Inc.", "aff_domain": "korea.ac.kr;korea.ac.kr;berkeley.edu;gausslabs.ai", "position": "PhD student;PhD student;Postdoc;Applied Scientist", "bibtex": "@inproceedings{\nkim2024roidice,\ntitle={{ROIDICE}: Offline Return on Investment Maximization for Efficient Decision Making},\nauthor={Woosung Kim and Hayeong Lee and Jongmin Lee and Byung-Jun Lee},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=6Kg26g1quR}\n}", "github": "", "reviewers": "Yyxa;Z94y;T1wG;8zrU", "pdf_size": 1178478, "rating": "5;5;6;6", "confidence": "3;2;4;3", "soundness": "3;2;3;3", "novelty": "2;2;3;2", "presentation": "3;3;3;3", "wc_summary": "94;188;64;53", "wc_strengths": "98;142;27;25", "wc_weaknesses": "290;59;59;8", "wc_questions": "72;2;158;75", "wc_limitations": "8;2;27;1", "wc_review": "562;393;335;162", "wc_reply_reviewers": "28;0;48;38", "wc_reply_authors": "48;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 99.75, 53.11485197192966 ], "wc_strengths_avg": [ 73.0, 49.512624652708524 ], "wc_weaknesses_avg": [ 104.0, 109.3869279210272 ], "wc_questions_avg": [ 76.75, 55.260180057614726 ], "wc_limitations_avg": [ 9.5, 10.452272480183437 ], "wc_review_avg": [ 363.0, 142.90381380495063 ], "wc_reply_reviewers_avg": [ 28.5, 17.909494688572316 ], "wc_reply_authors_avg": [ 12.0, 20.784609690826528 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.7071067811865475, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:QrtG9NyDWx8J:scholar.google.com/&scioq=ROIDICE:+Offline+Return+on+Investment+Maximization+for+Efficient+Decision+Making&hl=en&as_sdt=0,33", "gs_version_total": 2, "email": "korea.ac.kr;korea.ac.kr;berkeley.edu;gausslabs.ai", "author_num": 4, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Korea University;University of California, Berkeley;Gauss Labs Inc.", "aff_unique_dep": ";;", "aff_unique_url": "https://www.korea.ac.kr;https://www.berkeley.edu;", "aff_unique_abbr": "KU;UC Berkeley;", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;0;1;1", "aff_country_unique": "South Korea;United States" }, { "title": "HYDRA-FL: Hybrid Knowledge Distillation for Robust and Accurate Federated Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96530", "id": "6LVxO1C819", "proceeding": "", "pdf": "https://openreview.net/pdf?id=6LVxO1C819", "openreview": "https://openreview.net/forum?id=6LVxO1C819", "poster": "/media/PosterPDFs/NeurIPS%202024/96530.png?t=1731710105.6281426", "project": "", "author_site": "Momin Ahmad Khan, Yasra Chandio, Fatima Anwar", "tldr": "", "abstract": "Data heterogeneity among Federated Learning (FL) users poses a significant challenge, resulting in reduced global model performance. The community has designed various techniques to tackle this issue, among which Knowledge Distillation (KD)-based techniques are common.\n While these techniques effectively improve performance under high heterogeneity, they inadvertently cause higher accuracy degradation under model poisoning attacks (known as \\emph{attack amplification}). This paper presents a case study to reveal this critical vulnerability in KD-based FL systems. We show why KD causes this issue through empirical evidence and use it as motivation to design a hybrid distillation technique. We introduce a novel algorithm, Hybrid Knowledge Distillation for Robust and Accurate FL (HYDRA-FL), which reduces the impact of attacks in attack scenarios by offloading some of the KD loss to a shallow layer via an auxiliary classifier. We model HYDRA-FL as a generic framework and adapt it to two KD-based FL algorithms, FedNTD and MOON. Using these two as case studies, we demonstrate that our technique outperforms baselines in attack settings while maintaining comparable performance in benign settings.", "keywords": "Federated Learning;Knowledge Distillation;Poisoning Attacks", "primary_area": "other", "supplementary_material": "/attachment/5c7c18d2eb7ccb4434455ce6005721bde082cbea.zip", "author": "Momin Ahmad Khan;Yasra Chandio;Fatima M. Anwar", "authorids": "~Momin_Ahmad_Khan1;~Yasra_Chandio1;~Fatima_M._Anwar1", "gender": "M;;", "homepage": "https://momin-ahmad-khan.github.io;;https://people.umass.edu/fanwar/", "dblp": "332/1871;;213/9876-1", "google_scholar": "rYs8N78AAAAJ;;zRFcmiYAAAAJ", "orcid": "0000-0002-6009-514X;;", "linkedin": "momin-ahmad-aa448b186/;;", "or_profile": "~Momin_Ahmad_Khan1;~Yasra_Chandio1;~Fatima_M._Anwar1", "aff": "University of Massachusetts at Amherst;;University of Massachusetts at Amherst", "aff_domain": "umass.edu;;umass.edu", "position": "PhD student;;Assistant Professor", "bibtex": "@inproceedings{\nkhan2024hydrafl,\ntitle={{HYDRA}-{FL}: Hybrid Knowledge Distillation for Robust and Accurate Federated Learning},\nauthor={Momin Ahmad Khan and Yasra Chandio and Fatima M. Anwar},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=6LVxO1C819}\n}", "github": "", "reviewers": "SYjG;WZZr;Zoyb;udjQ", "pdf_size": 1611735, "rating": "5;5;6;8", "confidence": "4;3;4;3", "soundness": "3;3;3;3", "novelty": "3;3;2;4", "presentation": "2;3;3;4", "wc_summary": "84;64;64;49", "wc_strengths": "44;41;49;165", "wc_weaknesses": "144;61;51;344", "wc_questions": "66;7;88;42", "wc_limitations": "1;12;4;6", "wc_review": "339;185;256;606", "wc_reply_reviewers": "0;0;16;21", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 65.25, 12.43734296383275 ], "wc_strengths_avg": [ 74.75, 52.184169055375406 ], "wc_weaknesses_avg": [ 150.0, 117.67964989750777 ], "wc_questions_avg": [ 50.75, 30.04475827827543 ], "wc_limitations_avg": [ 5.75, 4.02336923485777 ], "wc_review_avg": [ 346.5, 159.42788338305192 ], "wc_reply_reviewers_avg": [ 9.25, 9.41740410091868 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.40824829046386296, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:oLgIxv8ikM4J:scholar.google.com/&scioq=HYDRA-FL:+Hybrid+Knowledge+Distillation+for+Robust+and+Accurate+Federated+Learning&hl=en&as_sdt=0,33", "gs_version_total": 6, "email": "umass.edu;;umass.edu", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "University of Massachusetts Amherst", "aff_unique_dep": "", "aff_unique_url": "https://www.umass.edu", "aff_unique_abbr": "UMass Amherst", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Amherst", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Why Go Full? Elevating Federated Learning Through Partial Network Updates", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96529", "id": "6OK8Qy9yVu", "proceeding": "", "pdf": "https://openreview.net/pdf?id=6OK8Qy9yVu", "openreview": "https://openreview.net/forum?id=6OK8Qy9yVu", "poster": "/media/PosterPDFs/NeurIPS%202024/96529.png?t=1731333207.5483196", "project": "", "author_site": "Haolin Wang, Xuefeng Liu, Jianwei Niu, Wenkai Guo, Shaojie Tang", "tldr": "", "abstract": "Federated learning is a distributed machine learning paradigm designed to protect user data privacy, which has been successfully implemented across various scenarios. In traditional federated learning, the entire parameter set of local models is updated and averaged in each training round. Although this full network update method maximizes knowledge acquisition and sharing for each model layer, it prevents the layers of the global model from cooperating effectively to complete the tasks of each client, a challenge we refer to as layer mismatch. This mismatch problem recurs after every parameter averaging, consequently slowing down model convergence and degrading overall performance. To address the layer mismatch issue, we introduce the FedPart method, which restricts model updates to either a single layer or a few layers during each communication round. Furthermore, to maintain the efficiency of knowledge acquisition and sharing, we develop several strategies to select trainable layers in each round, including sequential updating and multi-round cycle training. Through both theoretical analysis and experiments, our findings demonstrate that the FedPart method significantly surpasses conventional full network update strategies in terms of convergence speed and accuracy, while also reducing communication and computational overheads.", "keywords": "Federated Learning;Partial Network Updates;Convergence Efficiency;Computational and Communicational Overhead Reduction", "primary_area": "other", "supplementary_material": "/attachment/53d259cade3f24770505ffa7310062be096dab52.zip", "author": "Haolin Wang;Xuefeng Liu;Jianwei Niu;Wenkai Guo;Shaojie Tang", "authorids": "~Haolin_Wang6;~Xuefeng_Liu5;~Jianwei_Niu3;~Wenkai_Guo1;~Shaojie_Tang2", "gender": "M;M;M;M;", "homepage": ";;https://shi.buaa.edu.cn/jwniu/zh_CN/index.htm;;", "dblp": ";96/600-1;25/4653-2;390/7471;", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=en;KOciOtEAAAAJ;;", "orcid": "0000-0002-0659-2006;;0000-0003-3946-5107;0009-0000-9206-1924;", "linkedin": ";;;;", "or_profile": "~Haolin_Wang6;~Xuefeng_Liu5;~Jianwei_Niu3;~Wenkai_Guo1;~Shaojie_Tang2", "aff": "Beihang University;Behang University;Beihang University;Beihang University;", "aff_domain": "buaa.edu.cn;buaa.edu.cn;buaa.edu.cn;buaa.edu.cn;", "position": "MS student;Associate Professor;Full Professor;Undergrad student;", "bibtex": "@inproceedings{\nwang2024why,\ntitle={Why Go Full? Elevating Federated Learning Through Partial Network Updates},\nauthor={Haolin Wang and Xuefeng Liu and Jianwei Niu and Wenkai Guo and Shaojie Tang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=6OK8Qy9yVu}\n}", "github": "", "reviewers": "9NYP;6JAi;SwoS;ZWNU", "pdf_size": 997743, "rating": "3;5;7;7", "confidence": "4;3;4;3", "soundness": "2;2;3;3", "novelty": "2;2;2;3", "presentation": "2;3;4;3", "wc_summary": "81;36;84;24", "wc_strengths": "33;14;30;85", "wc_weaknesses": "247;324;214;49", "wc_questions": "5;95;337;38", "wc_limitations": "5;1;26;9", "wc_review": "371;470;691;205", "wc_reply_reviewers": "122;136;58;37", "wc_reply_authors": "799;72;29;24", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 5.5, 1.6583123951777 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 56.25, 26.61179249881526 ], "wc_strengths_avg": [ 40.5, 26.688012290165037 ], "wc_weaknesses_avg": [ 208.5, 100.36558174992062 ], "wc_questions_avg": [ 118.75, 130.05455586022353 ], "wc_limitations_avg": [ 10.25, 9.522998477370455 ], "wc_review_avg": [ 434.25, 175.89396663899532 ], "wc_reply_reviewers_avg": [ 88.25, 41.715554652910946 ], "wc_reply_authors_avg": [ 231.0, 328.46537108194525 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6365003513667512138&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "buaa.edu.cn;buaa.edu.cn;buaa.edu.cn;buaa.edu.cn;", "author_num": 5, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Beihang University;Behang University", "aff_unique_dep": ";", "aff_unique_url": "http://www.buaa.edu.cn/;", "aff_unique_abbr": "BUAA;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China;" }, { "title": "Robust Contrastive Multi-view Clustering against Dual Noisy Correspondence", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96528", "id": "6OvTbDClUn", "proceeding": "", "pdf": "https://openreview.net/pdf?id=6OvTbDClUn", "openreview": "https://openreview.net/forum?id=6OvTbDClUn", "poster": "", "project": "", "author_site": "Ruiming Guo, Mouxing Yang, Yijie Lin, Xi Peng, Peng Hu", "tldr": "", "abstract": "Recently, contrastive multi-view clustering (MvC) has emerged as a promising avenue for analyzing data from heterogeneous sources, typically leveraging the off-the-shelf instances as positives and randomly sampled ones as negatives. In practice, however, this paradigm would unavoidably suffer from the Dual Noisy Correspondence (DNC) problem, where noise compromises the constructions of both positive and negative pairs.\nSpecifically, the complexity of data collection and transmission might mistake some unassociated pairs as positive (namely, false positive correspondence), while the intrinsic one-to-many contrast nature of contrastive MvC would sample some intra-cluster samples as negative (namely, false negative correspondence).\nTo handle this daunting problem, we propose a novel method, dubbed Contextually-spectral based correspondence refinery (CANDY). \nCANDY dexterously exploits inter-view similarities as \\textit{context} to uncover false negatives. Furthermore, it employs a spectral-based module to denoise correspondence, alleviating the negative influence of false positives. \nExtensive experiments on five widely-used multi-view benchmarks, in comparison with eight competitive multi-view clustering methods, verify the effectiveness of our method in addressing the DNC problem.\nThe code is available at https://github.com/XLearning-SCU/2024-NeurIPS-CANDY.", "keywords": "multi-view clustering; noisy correspondence", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Ruiming Guo;Mouxing Yang;Yijie Lin;Xi Peng;Peng Hu", "authorids": "~Ruiming_Guo2;~Mouxing_Yang1;~Yijie_Lin1;~Xi_Peng3;~Peng_Hu2", "gender": "M;;M;M;M", "homepage": ";https://lin-yijie.github.io;https://penghu-cs.github.io/;https://mouxingyang.github.io;http://www.pengxi.me", "dblp": ";02/9654-1.html;11/6278-2;300/5807;18/931-1", "google_scholar": ";https://scholar.google.com.hk/citations?user=KXKVYHsAAAAJ;gvESkwYAAAAJ;IaRmgrEAAAAJ;bw9FOHAAAAAJ", "orcid": "0000-0002-1135-7676;0000-0003-1746-295X;0000-0003-3868-3997;;", "linkedin": ";;;;", "or_profile": "~Ruiming_Guo2;~Yijie_Lin1;~Peng_Hu2;~Yang_Mouxing1;~Xi_Peng2", "aff": "Sichuan University;Sichuan University;Sichuan University;Sichuan University;Sichuan University", "aff_domain": "scu.edu.cn;scu.edu.cn;scu.edu.cn;scu.edu.cn;scu.edu.cn", "position": "MS student;PhD student;Associate Professor;PhD student;Full Professor", "bibtex": "@inproceedings{\nguo2024robust,\ntitle={Robust Contrastive Multi-view Clustering against Dual Noisy Correspondence},\nauthor={Ruiming Guo and Mouxing Yang and Yijie Lin and Xi Peng and Peng Hu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=6OvTbDClUn}\n}", "github": "", "reviewers": "L9C1;jTED;G6Ay;6oUm", "pdf_size": 17189233, "rating": "6;6;7;7", "confidence": "4;4;5;5", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;4;3;3", "wc_summary": "136;75;66;113", "wc_strengths": "70;106;63;68", "wc_weaknesses": "126;72;170;165", "wc_questions": "16;28;4;4", "wc_limitations": "2;3;4;1", "wc_review": "350;284;307;351", "wc_reply_reviewers": "0;12;20;20", "wc_reply_authors": "0;18;11;18", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 97.5, 28.376927247325423 ], "wc_strengths_avg": [ 76.75, 17.07886120325357 ], "wc_weaknesses_avg": [ 133.25, 39.251592324388575 ], "wc_questions_avg": [ 13.0, 9.9498743710662 ], "wc_limitations_avg": [ 2.5, 1.118033988749895 ], "wc_review_avg": [ 323.0, 28.679260799399973 ], "wc_reply_reviewers_avg": [ 13.0, 8.18535277187245 ], "wc_reply_authors_avg": [ 11.75, 7.361215932167728 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1194840837126989678&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": "scu.edu.cn;scu.edu.cn;scu.edu.cn;scu.edu.cn;scu.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Sichuan University", "aff_unique_dep": "", "aff_unique_url": "https://www.scu.edu.cn", "aff_unique_abbr": "SCU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "On the Optimality of Dilated Entropy and Lower Bounds for Online Learning in Extensive-Form Games", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96527", "id": "6PMfJT2O7G", "proceeding": "", "pdf": "https://openreview.net/pdf?id=6PMfJT2O7G", "openreview": "https://openreview.net/forum?id=6PMfJT2O7G", "poster": "", "project": "", "author_site": "Zhiyuan Fan, Christian Kroer, Gabriele Farina", "tldr": "", "abstract": "First-order methods (FOMs) are arguably the most scalable algorithms for equilibrium computation in large extensive-form games. To operationalize these methods, a distance-generating function, acting as a regularizer for the strategy space, must be chosen. \nThe ratio between the strong convexity modulus and the diameter of the regularizer is a key parameter in the analysis of FOMs.\nA natural question is then: what is the optimal distance-generating function for extensive-form decision spaces? In this paper, we make a number of contributions, ultimately establishing that the weight-one dilated entropy (DilEnt) distance-generating function is optimal up to logarithmic factors. \nThe DilEnt regularizer is notable due to its iterate-equivalence with Kernelized OMWU (KOMWU)---the algorithm with state-of-the-art dependence on the game tree size in extensive-form games---when used in conjunction with the online mirror descent (OMD) algorithm. However, the standard analysis for OMD is unable to establish such a result; the only current analysis is by appealing to the iterate equivalence to KOMWU. \nWe close this gap by introducing a pair of primal-dual treeplex norms, which we contend form the natural analytic viewpoint for studying the strong convexity of DilEnt. \nUsing these norm pairs, we recover the diameter-to-strong-convexity ratio that predicts the same performance as KOMWU. Along with a new regret lower bound for online learning in sequence-form strategy spaces, we show that this ratio is nearly optimal.\nFinally, we showcase our analytic techniques by refining the analysis of Clairvoyant OMD when paired with DilEnt, establishing an $\\mathcal{O}(n \\log |\\mathcal{V}| \\log T/T)$ approximation rate to coarse correlated equilibrium in $n$-player games, where $|\\mathcal{V}|$ is the number of reduced normal-form strategies of the players, establishing the new state of the art.", "keywords": "extensive-form games;first order methods;dilated entropy;equilibrium computation", "primary_area": "online_learning", "supplementary_material": "", "author": "Zhiyuan Fan;Christian Kroer;Gabriele Farina", "authorids": "~Zhiyuan_Fan1;~Christian_Kroer1;~Gabriele_Farina1", "gender": "M;M;M", "homepage": "https://fan-zhiyuan.org/;http://www.columbia.edu/~ck2945/;http://www.cs.cmu.edu/~gfarina/about/", "dblp": ";64/10660;", "google_scholar": ";https://scholar.google.ch/citations?user=ckHwjPAAAAAJ;sktDNcEAAAAJ", "orcid": "0000-0001-7468-0895;0000-0002-9009-8683;", "linkedin": ";;", "or_profile": "~Zhiyuan_Fan1;~Christian_Kroer1;~Gabriele_Farina1", "aff": "Massachusetts Institute of Technology;Columbia University;Massachusetts Institute of Technology", "aff_domain": "mit.edu;columbia.edu;mit.edu", "position": "PhD student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nfan2024on,\ntitle={On the Optimality of Dilated Entropy and Lower Bounds for Online Learning in Extensive-Form Games},\nauthor={Zhiyuan Fan and Christian Kroer and Gabriele Farina},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=6PMfJT2O7G}\n}", "github": "", "reviewers": "wj5h;8sgN;P3Rf;NLU1", "pdf_size": 475340, "rating": "6;6;6;7", "confidence": "2;3;2;3", "soundness": "4;3;3;3", "novelty": "4;2;2;4", "presentation": "4;2;3;3", "wc_summary": "86;196;110;39", "wc_strengths": "147;90;88;87", "wc_weaknesses": "36;252;66;31", "wc_questions": "85;3;15;15", "wc_limitations": "1;1;10;1", "wc_review": "355;542;289;173", "wc_reply_reviewers": "0;50;16;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 2.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 1.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 107.75, 56.992872361375156 ], "wc_strengths_avg": [ 103.0, 25.42636427018224 ], "wc_weaknesses_avg": [ 96.25, 90.91307661717318 ], "wc_questions_avg": [ 29.5, 32.41527417746146 ], "wc_limitations_avg": [ 3.25, 3.897114317029974 ], "wc_review_avg": [ 339.75, 133.71494867814891 ], "wc_reply_reviewers_avg": [ 16.5, 20.414455662593603 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3465754678704349630&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 3, "email": "mit.edu;columbia.edu;mit.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Massachusetts Institute of Technology;Columbia University", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.columbia.edu", "aff_unique_abbr": "MIT;Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Scale-invariant Optimal Sampling for Rare-events Data and Sparse Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96526", "id": "6SAnp0vr9X", "proceeding": "", "pdf": "https://openreview.net/pdf?id=6SAnp0vr9X", "openreview": "https://openreview.net/forum?id=6SAnp0vr9X", "poster": "/media/PosterPDFs/NeurIPS%202024/96526.png?t=1731264911.1311922", "project": "", "author_site": "Jing Wang, HaiYing Wang, Hao Zhang", "tldr": "", "abstract": "Subsampling is effective in tackling computational challenges for massive data with rare events. Overly aggressive subsampling may adversely affect estimation efficiency, and optimal subsampling is essential to mitigate the information loss. However, existing optimal subsampling probabilities depends on data scales, and some scaling transformations may result in inefficient subsamples. This problem is more significant when there are inactive features, because their influence on the subsampling probabilities can be arbitrarily magnified by inappropriate scaling transformations. We tackle this challenge and introduce a scale-invariant optimal subsampling function in the context of sparse models, where inactive features are commonly assumed. Instead of focusing on estimating model parameters, we define an optimal subsampling function to minimize the prediction error, using adaptive lasso as an example to outline the estimation procedure and study its theoretical guarantee. We first introduce the adaptive lasso estimator for rare-events data and establish its oracle properties, thereby validating the use of subsampling. Then we derive a scale-invariant optimal subsampling function that minimizes the prediction error of the inverse probability weighted (IPW) adaptive lasso. Finally, we present an estimator based on the maximum sampled conditional likelihood (MSCL) to further improve the estimation efficiency. We conduct numerical experiments using both simulated and real-world data sets to demonstrate the performance of the proposed methods.", "keywords": "adaptive lasso;imbalance data;optimality criterion;subsampling", "primary_area": "learning_theory", "supplementary_material": "/attachment/1937584bf61b5875eb434f3757acca2bb6395522.zip", "author": "Jing Wang;HaiYing Wang;Hao Zhang", "authorids": "~Jing_Wang56;~HaiYing_Wang1;~Hao_Zhang13", "gender": "M;;", "homepage": "https://maayawang.github.io/mysite/;https://ossifragus.github.io/;http://www.math.arizona.edu/~hzhang", "dblp": ";264/6420.html;", "google_scholar": ";SHd2S_0AAAAJ;", "orcid": ";0000-0001-7729-0243;", "linkedin": ";;", "or_profile": "~Jing_Wang56;~HaiYing_Wang1;~Hao_Zhang13", "aff": "University of Connecticut;University of Connecticut;University of Arizona", "aff_domain": "uconn.edu;uconn.edu;arizona.edu", "position": "PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nwang2024scaleinvariant,\ntitle={Scale-invariant Optimal Sampling for Rare-events Data and Sparse Models},\nauthor={Jing Wang and HaiYing Wang and Hao Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=6SAnp0vr9X}\n}", "github": "", "reviewers": "8W9n;2g5r;RXcc", "pdf_size": 615167, "rating": "6;7;8", "confidence": "2;3;4", "soundness": "3;3;4", "novelty": "3;3;3", "presentation": "3;3;4", "wc_summary": "36;119;89", "wc_strengths": "21;171;44", "wc_weaknesses": "117;77;8", "wc_questions": "155;71;39", "wc_limitations": "5;1;5", "wc_review": "334;439;185", "wc_reply_reviewers": "18;20;0", "wc_reply_authors": "11;18;0", "reply_reviewers": "1;1;0", "reply_authors": "2;2;1", "rating_avg": [ 7.0, 0.816496580927726 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 81.33333333333333, 34.31552936434983 ], "wc_strengths_avg": [ 78.66666666666667, 65.96126809642828 ], "wc_weaknesses_avg": [ 67.33333333333333, 45.02098276236192 ], "wc_questions_avg": [ 88.33333333333333, 48.91716354091771 ], "wc_limitations_avg": [ 3.6666666666666665, 1.8856180831641267 ], "wc_review_avg": [ 319.3333333333333, 104.21238996502393 ], "wc_reply_reviewers_avg": [ 12.666666666666666, 8.993825042154693 ], "wc_reply_authors_avg": [ 9.666666666666666, 7.408703590297623 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:-XAWmEQRKG8J:scholar.google.com/&scioq=Scale-invariant+Optimal+Sampling+for+Rare-events+Data+and+Sparse+Models&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "uconn.edu;uconn.edu;arizona.edu", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Connecticut;University of Arizona", "aff_unique_dep": ";", "aff_unique_url": "https://www.uconn.edu;https://www.arizona.edu", "aff_unique_abbr": "UConn;UA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Taming Cross-Domain Representation Variance in Federated Prototype Learning with Heterogeneous Data Domains", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96525", "id": "6SRPizFuaE", "proceeding": "", "pdf": "https://openreview.net/pdf?id=6SRPizFuaE", "openreview": "https://openreview.net/forum?id=6SRPizFuaE", "poster": "/media/PosterPDFs/NeurIPS%202024/96525.png?t=1733176451.0955997", "project": "", "author_site": "Lei Wang, Jieming Bian, Letian Zhang, Chen Chen, Jie Xu", "tldr": "", "abstract": "Federated learning (FL) allows collaborative machine learning training without sharing private data. While most FL methods assume identical data domains across clients, real-world scenarios often involve heterogeneous data domains. Federated Prototype Learning (FedPL) addresses this issue, using mean feature vectors as prototypes to enhance model generalization. However, existing FedPL methods create the same number of prototypes for each client, leading to cross-domain performance gaps and disparities for clients with varied data distributions. To mitigate cross-domain feature representation variance, we introduce FedPLVM, which establishes variance-aware dual-level prototypes clustering and employs a novel $\\alpha$-sparsity prototype loss. The dual-level prototypes clustering strategy creates local clustered prototypes based on private data features, then performs global prototypes clustering to reduce communication complexity and preserve local data privacy. The $\\alpha$-sparsity prototype loss aligns samples from underrepresented domains, enhancing intra-class similarity and reducing inter-class similarity. Evaluations on Digit-5, Office-10, and DomainNet datasets demonstrate our method's superiority over existing approaches.", "keywords": "Federated Prototype Learning;Domain Heterogeneity", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Lei Wang;Jieming Bian;Letian Zhang;Chen Chen;Jie Xu", "authorids": "~Lei_Wang40;~Jieming_Bian1;~Letian_Zhang2;~Chen_Chen18;~Jie_Xu6", "gender": "M;M;;M;", "homepage": ";https://www.linkedin.com/in/jieming-bian-07b678139/;;https://www.crcv.ucf.edu/chenchen/;https://jiexu.ece.ufl.edu", "dblp": ";304/3462;;65/4423-1;37/5126-1", "google_scholar": ";k6E4dDwAAAAJ;;TuEwcZ0AAAAJ;07kG-YsAAAAJ", "orcid": ";;;0000-0003-3957-7061;", "linkedin": "lei-wang-4b280621a/;;;dennychen/;", "or_profile": "~Lei_Wang40;~Jieming_Bian1;~Letian_Zhang2;~Chen_Chen18;~Jie_Xu6", "aff": "University of Miami;University of Miami;;University of Central Florida;University of Miami", "aff_domain": "miami.edu;miami.edu;;ucf.edu;miami.edu", "position": "PhD student;PhD student;;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nwang2024taming,\ntitle={Taming Cross-Domain Representation Variance in Federated Prototype Learning with Heterogeneous Data Domains},\nauthor={Lei Wang and Jieming Bian and Letian Zhang and Chen Chen and Jie Xu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=6SRPizFuaE}\n}", "github": "", "reviewers": "8nBN;GDmh;e95T;gr7B;dkgn", "pdf_size": 1002151, "rating": "4;5;5;6;6", "confidence": "5;4;5;5;3", "soundness": "3;3;3;3;3", "novelty": "2;3;3;2;3", "presentation": "2;3;3;3;4", "wc_summary": "49;103;47;80;50", "wc_strengths": "8;37;37;47;45", "wc_weaknesses": "176;64;86;117;53", "wc_questions": "31;96;5;54;46", "wc_limitations": "5;1;5;8;19", "wc_review": "269;301;180;306;213", "wc_reply_reviewers": "274;18;20;76;0", "wc_reply_authors": "730;0;0;351;0", "reply_reviewers": "2;1;1;2;0", "reply_authors": "2;1;1;2;1", "rating_avg": [ 5.2, 0.7483314773547882 ], "confidence_avg": [ 4.4, 0.8 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 65.8, 22.229709849658406 ], "wc_strengths_avg": [ 34.8, 14.005713120009277 ], "wc_weaknesses_avg": [ 99.2, 44.18778111650324 ], "wc_questions_avg": [ 46.4, 29.897157055479372 ], "wc_limitations_avg": [ 7.6, 6.118823416311342 ], "wc_review_avg": [ 253.8, 49.58790175032616 ], "wc_reply_reviewers_avg": [ 77.6, 101.45659170305298 ], "wc_reply_authors_avg": [ 216.2, 290.6505805946377 ], "reply_reviewers_avg": [ 1.2, 0.7483314773547883 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.4677071733467426, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7802663563108592935&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "miami.edu;miami.edu;;ucf.edu;miami.edu", "author_num": 5, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Miami;University of Central Florida", "aff_unique_dep": ";", "aff_unique_url": "https://www.miami.edu;https://www.ucf.edu", "aff_unique_abbr": "UM;UCF", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Improved Regret of Linear Ensemble Sampling", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96524", "id": "6SSzMq3WTn", "proceeding": "", "pdf": "https://openreview.net/pdf?id=6SSzMq3WTn", "openreview": "https://openreview.net/forum?id=6SSzMq3WTn", "poster": "/media/PosterPDFs/NeurIPS%202024/96524.png?t=1733850698.5819988", "project": "", "author_site": "Harin Lee, Min-hwan Oh", "tldr": "", "abstract": "In this work, we close the fundamental gap of theory and practice by providing an improved regret bound for linear ensemble sampling. We prove that with an ensemble size logarithmic in $T$, linear ensemble sampling can achieve a frequentist regret bound of $\\tilde{\\mathcal{O}}(d^{3/2}\\sqrt{T})$, matching state-of-the-art results for randomized linear bandit algorithms, where $d$ and $T$ are the dimension of the parameter and the time horizon respectively. Our approach introduces a general regret analysis framework for linear bandit algorithms. Additionally, we reveal a significant relationship between linear ensemble sampling and Linear Perturbed-History Exploration (LinPHE), showing that LinPHE is a special case of linear ensemble sampling when the ensemble size equals $T$. This insight allows us to derive a new regret bound of $\\tilde{\\mathcal{O}}(d^{3/2}\\sqrt{T})$ for LinPHE, independent of the number of arms. Our contributions advance the theoretical foundation of ensemble sampling, bringing its regret bounds in line with the best known bounds for other randomized exploration algorithms.", "keywords": "Linear Bandit;Ensemble Sampling", "primary_area": "bandits", "supplementary_material": "", "author": "Harin Lee;Min-hwan Oh", "authorids": "~Harin_Lee2;~Min-hwan_Oh1", "gender": ";", "homepage": "https://harinboy.github.io;https://minoh.io", "dblp": ";172/0531", "google_scholar": ";KzVALFwAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Harin_Lee2;~Min-hwan_Oh1", "aff": "Seoul National University;Seoul National University", "aff_domain": "snu.ac.kr;snu.ac.kr", "position": "Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\nlee2024improved,\ntitle={Improved Regret of Linear Ensemble Sampling},\nauthor={Harin Lee and Min-hwan Oh},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=6SSzMq3WTn}\n}", "github": "", "reviewers": "QBvS;TPGY;vWv8;QJqu", "pdf_size": 433937, "rating": "3;5;7;7", "confidence": "5;4;3;4", "soundness": "1;3;3;3", "novelty": "1;3;3;3", "presentation": "3;3;3;3", "wc_summary": "17;59;95;110", "wc_strengths": "8;26;118;76", "wc_weaknesses": "360;107;32;334", "wc_questions": "131;52;68;35", "wc_limitations": "9;8;11;2", "wc_review": "525;252;324;557", "wc_reply_reviewers": "1579;0;211;100", "wc_reply_authors": "2279;510;548;526", "reply_reviewers": "4;0;1;1", "reply_authors": "6;2;3;3", "rating_avg": [ 5.5, 1.6583123951777 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 70.25, 35.89829383132296 ], "wc_strengths_avg": [ 57.0, 43.139309220245984 ], "wc_weaknesses_avg": [ 208.25, 141.55983717142374 ], "wc_questions_avg": [ 71.5, 36.28015986734347 ], "wc_limitations_avg": [ 7.5, 3.3541019662496847 ], "wc_review_avg": [ 414.5, 129.5308843480967 ], "wc_reply_reviewers_avg": [ 472.5, 643.182905556421 ], "wc_reply_authors_avg": [ 965.75, 758.3252517884393 ], "reply_reviewers_avg": [ 1.5, 1.5 ], "reply_authors_avg": [ 3.5, 1.5 ], "replies_avg": [ 31, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.8528028654224417, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:iEmeYonTy-EJ:scholar.google.com/&scioq=Improved+Regret+of+Linear+Ensemble+Sampling&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "snu.ac.kr;snu.ac.kr", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Seoul National University", "aff_unique_dep": "", "aff_unique_url": "https://www.snu.ac.kr", "aff_unique_abbr": "SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "title": "Topological Generalization Bounds for Discrete-Time Stochastic Optimization Algorithms", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96523", "id": "6U5fCHIWOC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=6U5fCHIWOC", "openreview": "https://openreview.net/forum?id=6U5fCHIWOC", "poster": "/media/PosterPDFs/NeurIPS%202024/96523.png?t=1731575997.803582", "project": "", "author_site": "Rayna Andreeva, Benjamin Dupuis, Rik Sarkar, Tolga Birdal, Umut Simsekli", "tldr": "", "abstract": "We present a novel set of rigorous and computationally efficient topology-based complexity notions that exhibit a strong correlation with the generalization gap in modern deep neural networks (DNNs). DNNs show remarkable generalization properties, yet the source of these capabilities remains elusive, defying the established statistical learning theory. Recent studies have revealed that properties of training trajectories can be indicative of generalization. Building on this insight, state-of-the-art methods have leveraged the topology of these trajectories, particularly their fractal dimension, to quantify generalization. Most existing works compute this quantity by assuming continuous- or infinite-time training dynamics, complicating the development of practical estimators capable of accurately predicting generalization without access to test data. In this paper, we respect the discrete-time nature of training trajectories and investigate the underlying topological quantities that can be amenable to topological data analysis tools. This leads to a new family of reliable topological complexity measures that provably bound the generalization error, eliminating the need for restrictive geometric assumptions. These measures are computationally friendly, enabling us to propose simple yet effective algorithms for computing generalization indices. Moreover, our flexible framework can be extended to different domains, tasks, and architectures. Our experimental results demonstrate that our new complexity measures exhibit a strong correlation with generalization error in industry-standard architectures such as transformers and deep graph networks. Our approach consistently outperforms existing topological bounds across a wide range of datasets, models, and optimizers, highlighting the practical relevance and effectiveness of our complexity measures.", "keywords": "Generalization bounds;persistent homology;metric space magnitude;deep neural networks", "primary_area": "learning_theory", "supplementary_material": "/attachment/5f75741ab47d46b2544d298c618af55623a85fec.zip", "author": "Rayna Andreeva;Benjamin Dupuis;Rik Sarkar;Tolga Birdal;Umut Simsekli", "authorids": "~Rayna_Andreeva1;~Benjamin_Dupuis1;~Rik_Sarkar1;~Tolga_Birdal3;~Umut_Simsekli1", "gender": ";M;M;M;M", "homepage": "https://rorondre.github.io/about/;https://benjidupuis.github.io;https://homepages.inf.ed.ac.uk/rsarkar/;http://tolgabirdal.github.io;https://www.di.ens.fr/~simsekli/", "dblp": ";294/9740;82/4961;143/7056;https://dblp.org/pers/s/Simsekli:Umut.html", "google_scholar": "scwihDYAAAAJ;r99oWgkAAAAJ;rmMWizEAAAAJ;_Bxd5ggAAAAJ;https://scholar.google.fr/citations?user=CuArAkgAAAAJ", "orcid": ";;;0000-0001-7915-7964;", "linkedin": ";benjamin-dupuis-3b453a176/;;https://linkedin.com/in/tbirdal;", "or_profile": "~Rayna_Andreeva1;~Benjamin_Dupuis1;~Rik_Sarkar1;~Tolga_Birdal3;~Umut_Simsekli1", "aff": "University of Edinburgh;INRIA;University of Edinburgh;Imperial College London;INRIA", "aff_domain": "ed.ac.uk;inria.fr;ed.ac.uk;imperial.ac.uk;inria.fr", "position": "PhD student;PhD student;Associate Professor;Assistant Professor;Research Faculty", "bibtex": "@inproceedings{\nandreeva2024topological,\ntitle={Topological Generalization Bounds for Discrete-Time Stochastic Optimization Algorithms},\nauthor={Rayna Andreeva and Benjamin Dupuis and Rik Sarkar and Tolga Birdal and Umut Simsekli},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=6U5fCHIWOC}\n}", "github": "", "reviewers": "bXXR;4cSo;9Fve;xNox", "pdf_size": 5720529, "rating": "6;7;7;8", "confidence": "3;2;4;4", "soundness": "2;3;3;3", "novelty": "3;4;3;4", "presentation": "2;2;3;3", "wc_summary": "241;218;90;95", "wc_strengths": "77;153;218;107", "wc_weaknesses": "216;30;637;15", "wc_questions": "129;137;300;36", "wc_limitations": "10;29;78;6", "wc_review": "673;567;1323;259", "wc_reply_reviewers": "186;16;104;4", "wc_reply_authors": "285;0;30;0", "reply_reviewers": "2;1;1;1", "reply_authors": "3;1;2;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 161.0, 69.00362309328402 ], "wc_strengths_avg": [ 138.75, 53.16189895028205 ], "wc_weaknesses_avg": [ 224.5, 250.97260806709565 ], "wc_questions_avg": [ 150.5, 95.00657871958131 ], "wc_limitations_avg": [ 30.75, 28.630185119904482 ], "wc_review_avg": [ 705.5, 387.5909570668542 ], "wc_reply_reviewers_avg": [ 77.5, 73.58498488142808 ], "wc_reply_authors_avg": [ 78.75, 119.70667274634276 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.4264014327112209, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14748744229492009513&as_sdt=10005&sciodt=0,8&hl=en", "gs_version_total": 7, "email": "ed.ac.uk;inria.fr;ed.ac.uk;imperial.ac.uk;inria.fr", "author_num": 5, "aff_unique_index": "0;1;0;2;1", "aff_unique_norm": "University of Edinburgh;INRIA;Imperial College London", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ed.ac.uk;https://www.inria.fr;https://www.imperial.ac.uk", "aff_unique_abbr": "Edinburgh;INRIA;ICL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;1", "aff_country_unique": "United Kingdom;France" }, { "title": "Robust Neural Contextual Bandit against Adversarial Corruptions", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96522", "id": "6U8iV9HVpS", "proceeding": "", "pdf": "https://openreview.net/pdf?id=6U8iV9HVpS", "openreview": "https://openreview.net/forum?id=6U8iV9HVpS", "poster": "", "project": "", "author_site": "Yunzhe Qi, Yikun Ban, Arindam Banerjee, Jingrui He", "tldr": "", "abstract": "Contextual bandit algorithms aim to identify the optimal arm with the highest reward among a set of candidates, based on the accessible contextual information. Among these algorithms, neural contextual bandit methods have shown generally superior performances against linear and kernel ones, due to the representation power of neural networks. However, similar to other neural network applications, neural bandit algorithms can be vulnerable to adversarial attacks or corruptions on the received labels (i.e., arm rewards), which can lead to unexpected performance degradation without proper treatments. As a result, it is necessary to improve the robustness of neural bandit models against potential reward corruptions. In this work, we propose a novel neural contextual bandit algorithm named R-NeuralUCB, which utilizes a novel context-aware Gradient Descent (GD) training strategy to improve the robustness against adversarial reward corruptions. Under over-parameterized neural network settings, we provide regret analysis for R-NeuralUCB to quantify reward corruption impacts, without the commonly adopted arm separateness assumption in existing neural bandit works. We also conduct experiments against baselines on real data sets under different scenarios, in order to demonstrate the effectiveness of our proposed R-NeuralUCB.", "keywords": "Contextual Bandits;Neural Networks;Adversarial Corruption", "primary_area": "bandits", "supplementary_material": "/attachment/01c05cbbbffa8af0d0f035f3186d6b15e16c62bb.zip", "author": "Yunzhe Qi;Yikun Ban;Arindam Banerjee;Jingrui He", "authorids": "~Yunzhe_Qi1;~Yikun_Ban1;~Arindam_Banerjee4;~Jingrui_He1", "gender": "M;;;F", "homepage": "https://www.linkedin.com/in/yunzhe-qi-a1409b161/;;https://arindam.cs.illinois.edu/;https://www.hejingrui.org", "dblp": "259/3914;;82/4807.html;34/2685", "google_scholar": "Gt17_A0AAAAJ;;RY7cuPAAAAAJ;hXpZynkAAAAJ", "orcid": "0000-0001-5828-7436;;;0000-0002-6429-6272", "linkedin": "yunzhe-qi-a1409b161/;;;", "or_profile": "~Yunzhe_Qi1;~Yikun_Ban1;~Arindam_Banerjee4;~Jingrui_He1", "aff": "University of Illinois Urbana-Champaign;;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign", "aff_domain": "illinois.edu;;illinois.edu;illinois.edu", "position": "PhD student;;Professor;Full Professor", "bibtex": "@inproceedings{\nqi2024robust,\ntitle={Robust Neural Contextual Bandit against Adversarial Corruptions},\nauthor={Yunzhe Qi and Yikun Ban and Arindam Banerjee and Jingrui He},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=6U8iV9HVpS}\n}", "github": "", "reviewers": "1FL3;Fhvw;xUbn;MEPN", "pdf_size": 907330, "rating": "5;6;6;7", "confidence": "3;3;5;4", "soundness": "2;3;4;3", "novelty": "3;3;4;3", "presentation": "2;3;4;4", "wc_summary": "47;56;126;245", "wc_strengths": "76;41;103;214", "wc_weaknesses": "118;56;91;310", "wc_questions": "2;25;26;367", "wc_limitations": "13;1;1;7", "wc_review": "256;179;347;1143", "wc_reply_reviewers": "14;26;0;102", "wc_reply_authors": "60;32;0;53", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;1;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 118.5, 79.17859559249582 ], "wc_strengths_avg": [ 108.5, 64.75530866268804 ], "wc_weaknesses_avg": [ 143.75, 98.4692210794825 ], "wc_questions_avg": [ 105.0, 151.57011578804048 ], "wc_limitations_avg": [ 5.5, 4.9749371855331 ], "wc_review_avg": [ 481.25, 386.66159299832196 ], "wc_reply_reviewers_avg": [ 35.5, 39.48100809249936 ], "wc_reply_authors_avg": [ 36.25, 23.327826731180938 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.4264014327112209, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:l9fl89EjEpEJ:scholar.google.com/&scioq=Robust+Neural+Contextual+Bandit+against+Adversarial+Corruptions&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": "illinois.edu;;illinois.edu;illinois.edu", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Illinois Urbana-Champaign", "aff_unique_dep": "", "aff_unique_url": "https://illinois.edu", "aff_unique_abbr": "UIUC", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Urbana-Champaign", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "LAVIB: A Large-scale Video Interpolation Benchmark", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97840", "id": "6UQPx8SMXy", "proceeding": "", "pdf": "https://openreview.net/pdf?id=6UQPx8SMXy", "openreview": "https://openreview.net/forum?id=6UQPx8SMXy", "poster": "/media/PosterPDFs/NeurIPS%202024/97840.png?t=1730200342.3417435", "project": "", "author_site": "Alex Stergiou", "tldr": "", "abstract": "This paper introduces a LArge-scale Video Interpolation Benchmark (LAVIB) for the low-level video task of Video Frame Interpolation (VFI). LAVIB comprises a large collection of high-resolution videos sourced from the web through an automated pipeline with minimal requirements for human verification. Metrics are computed for each video's motion magnitudes, luminance conditions, frame sharpness, and contrast. The collection of videos and the creation of quantitative challenges based on these metrics are under-explored by current low-level video task datasets. In total, LAVIB includes 283K clips from 17K ultra-HD videos, covering 77.6 hours. Benchmark train, val, and test sets maintain similar video metric distributions. Further splits are also created for out-of-distribution (OOD) challenges, with train and test splits including videos of dissimilar attributes.", "keywords": "video frame interpolation;out-of-distribution", "primary_area": "", "supplementary_material": "/attachment/09d18974a3e2bbda8a387b6ea419ef0dfaf74790.pdf", "author": "Alexandros Stergiou", "authorids": "~Alexandros_Stergiou1", "gender": "", "homepage": "https://alexandrosstergiou.github.io/", "dblp": "207/0477", "google_scholar": "_E_Zs3kAAAAJ", "orcid": "0000-0003-4706-4231", "linkedin": "", "or_profile": "~Alexandros_Stergiou1", "aff": "University of Twente", "aff_domain": "utwente.nl", "position": "Assistant Professor", "bibtex": "@inproceedings{\nstergiou2024lavib,\ntitle={{LAVIB}: A Large-scale Video Interpolation Benchmark},\nauthor={Alexandros Stergiou},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=6UQPx8SMXy}\n}", "github": "", "reviewers": "WSo6;E9PZ;UTjR", "pdf_size": 34278422, "rating": "6;6;7", "confidence": "4;3;4", "wc_summary_and_contributions": "88;1;92", "wc_strengths": "105;3;49", "wc_improvement": "135;3;51", "wc_limitations": "8;3;6", "wc_correctness": "2;1;5", "wc_clarity": "5;1;5", "wc_relation_to_prior_work": "2;1;9", "wc_documentation": "9;1;26", "wc_additional_feedback": "1;1;1", "wc_review": "355;15;244", "wc_reply_reviewers": "13;0;18", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;0;1", "reply_authors": "1;1;1", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 60.333333333333336, 41.98677040317448 ], "wc_strengths_avg": [ 52.333333333333336, 41.7079795189788 ], "wc_improvement_avg": [ 63.0, 54.55272678794342 ], "wc_limitations_avg": [ 5.666666666666667, 2.0548046676563256 ], "wc_correctness_avg": [ 2.6666666666666665, 1.699673171197595 ], "wc_clarity_avg": [ 3.6666666666666665, 1.8856180831641267 ], "wc_relation_to_prior_work_avg": [ 4.0, 3.559026084010437 ], "wc_documentation_avg": [ 12.0, 10.424330514074594 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 204.66666666666666, 141.56349184101887 ], "wc_reply_reviewers_avg": [ 10.333333333333334, 7.586537784494029 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17360471229577112483&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "utwente.nl", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "University of Twente", "aff_unique_dep": "", "aff_unique_url": "https://www.utwente.nl", "aff_unique_abbr": "UT", "aff_country_unique_index": "0", "aff_country_unique": "Netherlands" }, { "title": "Team-Fictitious Play for Reaching Team-Nash Equilibrium in Multi-team Games", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96521", "id": "6VVgAgVfxW", "proceeding": "", "pdf": "https://openreview.net/pdf?id=6VVgAgVfxW", "openreview": "https://openreview.net/forum?id=6VVgAgVfxW", "poster": "/media/PosterPDFs/NeurIPS%202024/96521.png?t=1731757820.0373178", "project": "", "author_site": "Ahmed D\u00f6nmez, Y\u00fcksel Arslanta\u015f, Muhammed Sayin", "tldr": "", "abstract": "Multi-team games, prevalent in robotics and resource management, involve team members striving for a joint best response against other teams. Team-Nash equilibrium (TNE) predicts the outcomes of such coordinated interactions. However, can teams of self-interested agents reach TNE? We introduce Team-Fictitious Play (Team-FP), a new variant of fictitious play where agents respond to the last actions of team members and the beliefs formed about other teams with some inertia in action updates. This design is essential in team coordination beyond the classical fictitious play dynamics. We focus on zero-sum potential team games (ZSPTGs) where teams can interact pairwise while the team members do not necessarily have identical payoffs. We show that Team-FP reaches near TNE in ZSPTGs with a quantifiable error bound. We extend Team-FP dynamics to multi-team Markov games for model-based and model-free cases. The convergence analysis tackles the challenge of non-stationarity induced by evolving opponent strategies based on the optimal coupling lemma and stochastic differential inclusion approximation methods. Our work strengthens the foundation for using TNE to predict the behavior of decentralized teams and offers a practical rule for team learning in multi-team environments. We provide extensive simulations of Team-FP dynamics and compare its performance with other widely studied dynamics such as smooth fictitious play and multiplicative weights update. We further explore how different parameters impact the speed of convergence.", "keywords": "Multi-agent reinforcement learning;fictitious play;multi-team games", "primary_area": "algorithmic_game_theory", "supplementary_material": "/attachment/95d98f8438270780be11563fb8a0d932cb24cb75.zip", "author": "Ahmed Said D\u00f6nmez;Y\u00fcksel Arslanta\u015f;Muhammed O. Sayin", "authorids": "~Ahmed_Said_D\u00f6nmez1;~Y\u00fcksel_Arslanta\u015f1;~Muhammed_O._Sayin1", "gender": "M;;M", "homepage": ";;https://gdn.bilkent.edu.tr", "dblp": ";;131/6682", "google_scholar": ";;-sAoXzUAAAAJ", "orcid": ";;", "linkedin": "ahmedsaiddonmez/;;muhammed-omer-sayin", "or_profile": "~Ahmed_Said_D\u00f6nmez1;~Y\u00fcksel_Arslanta\u015f1;~Muhammed_O._Sayin1", "aff": "Bilkent University;;Bilkent University", "aff_domain": "bilkent.edu.tr;;bilkent.edu.tr", "position": "MS student;;Assistant Professor", "bibtex": "@inproceedings{\nd{\\\"o}nmez2024teamfictitious,\ntitle={Team-Fictitious Play for Reaching Team-Nash Equilibrium in Multi-team Games},\nauthor={Ahmed Said D{\\\"o}nmez and Y{\\\"u}ksel Arslanta{\\c{s}} and Muhammed O. Sayin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=6VVgAgVfxW}\n}", "github": "", "reviewers": "LmHf;te9F;Rftf;Jh6E", "pdf_size": 1883701, "rating": "3;5;6;6", "confidence": "4;3;2;3", "soundness": "1;3;3;3", "novelty": "1;3;3;3", "presentation": "1;2;2;3", "wc_summary": "79;78;91;99", "wc_strengths": "19;81;48;69", "wc_weaknesses": "60;96;72;146", "wc_questions": "13;18;32;9", "wc_limitations": "13;1;7;6", "wc_review": "184;274;250;329", "wc_reply_reviewers": "0;20;26;0", "wc_reply_authors": "0;12;31;0", "reply_reviewers": "0;1;1;0", "reply_authors": "1;2;2;1", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 86.75, 8.728545125048045 ], "wc_strengths_avg": [ 54.25, 23.53056522908024 ], "wc_weaknesses_avg": [ 93.5, 32.96589146375386 ], "wc_questions_avg": [ 18.0, 8.689073598491383 ], "wc_limitations_avg": [ 6.75, 4.264680527307995 ], "wc_review_avg": [ 259.25, 52.03544465073783 ], "wc_reply_reviewers_avg": [ 11.5, 11.6940155635265 ], "wc_reply_authors_avg": [ 10.75, 12.676257334087218 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8660254037844386, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6400008160625611310&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "bilkent.edu.tr;;bilkent.edu.tr", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Bilkent University", "aff_unique_dep": "", "aff_unique_url": "https://www.bilkent.edu.tr", "aff_unique_abbr": "Bilkent", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "T\u00fcrkiye" }, { "title": "Lighting Every Darkness with 3DGS: Fast Training and Real-Time Rendering for HDR View Synthesis", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96520", "id": "6W3LbkKriL", "proceeding": "", "pdf": "https://openreview.net/pdf?id=6W3LbkKriL", "openreview": "https://openreview.net/forum?id=6W3LbkKriL", "poster": "/media/PosterPDFs/NeurIPS%202024/96520.png?t=1731607213.854748", "project": "", "author_site": "Xin Jin, Pengyi Jiao, Zheng-Peng Duan, Xingchao Yang, Chongyi Li, Chun-Le Guo, Bo Ren", "tldr": "", "abstract": "Volumetric rendering-based methods, like NeRF, excel in HDR view synthesis from RAW images, especially for nighttime scenes. They suffer from long training times and cannot perform real-time rendering due to dense sampling requirements. The advent of 3D Gaussian Splatting (3DGS) enables real-time rendering and faster training. However, implementing RAW image-based view synthesis directly using 3DGS is challenging due to its inherent drawbacks: 1) in nighttime scenes, extremely low SNR leads to poor structure-from-motion (SfM) estimation in dis- tant views; 2) the limited representation capacity of the spherical harmonics (SH) function is unsuitable for RAW linear color space; and 3) inaccurate scene structure hampers downstream tasks such as refocusing. To address these issues, we propose LE3D (Lighting Every darkness with 3DGS). Our method proposes Cone Scatter Initialization to enrich the estimation of SfM and replaces SH with a Color MLP to represent the RAW linear color space. Additionally, we introduce depth distortion and near-far regularizations to improve the accuracy of scene structure for down- stream tasks. These designs enable LE3D to perform real-time novel view synthesis, HDR rendering, refocusing, and tone-mapping changes. Compared to previous vol- umetric rendering-based methods, LE3D reduces training time to 1% and improves rendering speed by up to 4,000 times for 2K resolution images in terms of FPS. Code and viewer can be found in https://srameo.github.io/projects/le3d.", "keywords": "3D Gaussian Splatting;RAW images;Computational Photography;Computer vision.", "primary_area": "machine_vision", "supplementary_material": "/attachment/2d45797b75f0c31d381851d384e0bc8901836dd6.zip", "author": "Xin Jin;Pengyi Jiao;Zheng-Peng Duan;Xingchao Yang;Chongyi Li;Chun-Le Guo;Bo Ren", "authorids": "~Xin_Jin13;~Pengyi_Jiao1;~Zheng-Peng_Duan1;~Xingchao_Yang1;~Chongyi_Li1;~Chun-Le_Guo1;~Bo_Ren1", "gender": "M;M;M;M;;;M", "homepage": "https://srameo.github.io/;;https://adam-duan.github.io/;;;;http://ren-bo.net", "dblp": ";;330/0686;;;;07/1796", "google_scholar": "https://scholar.google.com.hk/citations?user=22M3mw8AAAAJ;;S_v3euQAAAAJ;https://scholar.google.com.hk/citations?user=OkhB4Y8AAAAJ;;;", "orcid": ";0009-0007-7433-4651;;;;;", "linkedin": ";;;;;;", "or_profile": "~Xin_Jin13;~Pengyi_Jiao1;~Zheng-Peng_Duan1;~Xingchao_Yang1;~Chongyi_Li1;~Chun-Le_Guo1;~Bo_Ren1", "aff": "Nankai University;Nankai University;Nankai University;Megvii Technology Inc.;;;Nankai University", "aff_domain": "nankai.edu.cn;nankai.edu.cn;nankai.edu.cn;megvii.com;;;nankai.edu.cn", "position": "PhD student;MS student;MS student;Researcher;;;Associate Professor", "bibtex": "@inproceedings{\njin2024lighting,\ntitle={Lighting Every Darkness with 3{DGS}: Fast Training and Real-Time Rendering for {HDR} View Synthesis},\nauthor={Xin Jin and Pengyi Jiao and Zheng-Peng Duan and Xingchao Yang and Chongyi Li and Chun-Le Guo and Bo Ren},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=6W3LbkKriL}\n}", "github": "", "reviewers": "Vnv5;DcQH;PtE5;Me2T", "pdf_size": 42182394, "rating": "5;6;6;6", "confidence": "5;3;3;3", "soundness": "3;2;3;3", "novelty": "3;3;2;3", "presentation": "3;2;3;3", "wc_summary": "70;115;94;78", "wc_strengths": "41;144;35;166", "wc_weaknesses": "299;282;141;125", "wc_questions": "4;108;8;34", "wc_limitations": "1;1;2;16", "wc_review": "415;650;280;419", "wc_reply_reviewers": "10;301;81;0", "wc_reply_authors": "47;336;196;111", "reply_reviewers": "1;2;1;0", "reply_authors": "2;3;3;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 89.25, 17.195566289017645 ], "wc_strengths_avg": [ 96.5, 59.052942348370756 ], "wc_weaknesses_avg": [ 211.75, 79.18135828589959 ], "wc_questions_avg": [ 38.5, 41.74625731727337 ], "wc_limitations_avg": [ 5.0, 6.363961030678928 ], "wc_review_avg": [ 441.0, 133.0056389782027 ], "wc_reply_reviewers_avg": [ 98.0, 121.29097245879431 ], "wc_reply_authors_avg": [ 172.5, 108.18618211213482 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8727159113277255552&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "nankai.edu.cn;nankai.edu.cn;nankai.edu.cn;megvii.com;;;nankai.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Nankai University;Megvii Technology", "aff_unique_dep": ";", "aff_unique_url": "http://www.nankai.edu.cn;https://www.megvii.com", "aff_unique_abbr": "NKU;Megvii", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "6Wm4202Wvc", "title": "Label Privacy in Split Learning for Large Models with Parameter-Efficient Training", "track": "main", "status": "Reject", "tldr": "", "abstract": "As deep learning models become larger and more expensive, many practitioners turn to fine-tuning APIs. \nThese web services allow fine-tuning a model between two parties: the client that provides the data, and the server that hosts the model.\nWhile convenient, these APIs raise a new concern: the data of the client is at risk of privacy breach during the training procedure.\nThis challenge presents an important practical case of vertical federated learning, where the two parties perform parameter-efficient fine-tuning (PEFT) of a large model.\nIn this study, we systematically search for a way to fine-tune models over an API *while keeping the labels private*.\nWe analyze the privacy of LoRA, a popular approach for parameter-efficient fine-tuning when training over an API.\nUsing this analysis, we propose P$^3$EFT, a multi-party split learning algorithm that takes advantage of existing PEFT properties to maintain privacy at a lower performance overhead.\nTo validate our algorithm, we fine-tune DeBERTa-v2-XXLarge, Flan-T5 Large and LLaMA-2 7B using LoRA adapters on a range of NLP tasks. We find that P$^3$EFT is competitive with existing privacy-preserving methods in multi-party and two-party setups while having higher accuracy.", "keywords": "Split Learning;Vertical Federated Learning;Federated Learning;Parameter Efficient Fine-tuning;Privacy;Large Language Models", "primary_area": "privacy", "supplementary_material": "/attachment/140a954bbbfe919054f9c798faf38574fba1d0f4.zip", "author": "Philip Zmushko;Marat Mansurov;Ruslan Svirschevski;Denis Kuznedelev;Max Ryabinin;Aleksandr Beznosikov", "authorids": "~Philip_Zmushko1;~Marat_Mansurov1;~Ruslan_Svirschevski1;~Denis_Kuznedelev1;~Max_Ryabinin1;~Aleksandr_Beznosikov1", "gender": ";M;;M;Not Specified;", "homepage": ";;;https://github.com/Godofnothing;https://mryab.github.io/;", "dblp": ";;;322/8616;276/0192;", "google_scholar": "zbKIABUAAAAJ;;;;930PERsAAAAJ;", "orcid": ";;;0009-0005-2420-9620;;", "linkedin": ";mrrrat/;;;;", "or_profile": "~Philip_Zmushko1;~Marat_Mansurov1;~Ruslan_Svirschevski1;~Denis_Kuznedelev1;~Max_Ryabinin1;~Aleksandr_Beznosikov1", "aff": "Moscow Institute of Physics and Technology;Higher School of Economics, Higher School of Economics;;Yandex;Together AI;", "aff_domain": "mipt.ru;cs.hse.ru;;yandex-team.ru;together.ai;", "position": "MS student;Undergrad student;;Researcher;Researcher;", "bibtex": "@misc{\nanonymous2024label,\ntitle={Label Privacy in Split Learning for Large Models with Parameter-Efficient Training},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=6Wm4202Wvc}\n}", "github": "", "project": "", "reviewers": "T8zi;1WCr;Fveg;S8Y2", "site": "https://openreview.net/forum?id=6Wm4202Wvc", "pdf_size": 743625, "rating": "5;5;5;6", "confidence": "3;2;3;5", "soundness": "2;3;3;3", "novelty": "2;2;2;3", "presentation": "3;3;3;3", "wc_summary": "48;45;33;56", "wc_strengths": "39;53;27;57", "wc_weaknesses": "193;25;118;363", "wc_questions": "2;35;11;31", "wc_limitations": "6;4;60;5", "wc_review": "288;162;249;512", "wc_reply_reviewers": "422;97;23;100", "wc_reply_authors": "1507;0;0;242", "reply_reviewers": "2;1;1;2", "reply_authors": "3;1;1;2", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 1.0897247358851685 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 45.5, 8.261355820929152 ], "wc_strengths_avg": [ 44.0, 11.874342087037917 ], "wc_weaknesses_avg": [ 174.75, 123.91201515591618 ], "wc_questions_avg": [ 19.75, 13.699908758820257 ], "wc_limitations_avg": [ 18.75, 23.826193569263218 ], "wc_review_avg": [ 302.75, 129.1343776846429 ], "wc_reply_reviewers_avg": [ 160.5, 154.09493826858818 ], "wc_reply_authors_avg": [ 437.25, 625.4723714921387 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.9271726499455306, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:w2Ba0mHmJloJ:scholar.google.com/&scioq=Label+Privacy+in+Split+Learning+for+Large+Models+with+Parameter-Efficient+Training&hl=en&as_sdt=0,5", "gs_version_total": 3, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Moscow Institute of Physics and Technology;Higher School of Economics;Yandex;Together AI", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.mipt.ru/en;https://www.hse.ru;https://yandex.com;https://www.together.ai", "aff_unique_abbr": "MIPT;HSE;Yandex;Together AI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "Russian Federation;United States" }, { "title": "The Sample-Communication Complexity Trade-off in Federated Q-Learning", "status": "Oral", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96519", "id": "6YIpvnkjUK", "proceeding": "", "pdf": "https://openreview.net/pdf?id=6YIpvnkjUK", "openreview": "https://openreview.net/forum?id=6YIpvnkjUK", "poster": "", "project": "", "author_site": "Sudeep Salgia, Yuejie Chi", "tldr": "", "abstract": "We consider the problem of Federated Q-learning, where $M$ agents aim to collaboratively learn the optimal Q-function of an unknown infinite horizon Markov Decision Process with finite state and action spaces. We investigate the trade-off between sample and communication complexity for the widely used class of intermittent communication algorithms. We first establish the converse result, where we show that any Federated Q-learning that offers a linear speedup with respect to number of agents in sample complexity needs to incur a communication cost of at least $\\Omega(\\frac{1}{1-\\gamma})$, where $\\gamma$ is the discount factor. We also propose a new Federated Q-learning algorithm, called Fed-DVR-Q, which is the first Federated Q-learning algorithm to simultaneously achieve order-optimal sample and communication complexities. Thus, together these results provide a complete characterization of the sample-communication complexity trade-off in Federated Q-learning.", "keywords": "Federated Q learning; Communication Efficiency;", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Sudeep Salgia;Yuejie Chi", "authorids": "~Sudeep_Salgia1;~Yuejie_Chi1", "gender": "M;", "homepage": "https://sudeepsalgia.github.io/;", "dblp": "207/8460;", "google_scholar": "Y5d5L84AAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Sudeep_Salgia1;~Yuejie_Chi1", "aff": "Carnegie Mellon University;", "aff_domain": "cmu.edu;", "position": "Postdoc;", "bibtex": "@inproceedings{\nsalgia2024the,\ntitle={The Sample-Communication Complexity Trade-off in Federated Q-Learning},\nauthor={Sudeep Salgia and Yuejie Chi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=6YIpvnkjUK}\n}", "github": "", "reviewers": "UVy4;YcCi;buwU", "pdf_size": 734905, "rating": "7;8;8", "confidence": "3;3;4", "soundness": "3;3;4", "novelty": "3;4;3", "presentation": "3;3;4", "wc_summary": "64;89;62", "wc_strengths": "88;35;62", "wc_weaknesses": "344;20;38", "wc_questions": "62;44;262", "wc_limitations": "7;1;10", "wc_review": "565;189;434", "wc_reply_reviewers": "31;9;0", "wc_reply_authors": "892;41;41", "reply_reviewers": "1;1;0", "reply_authors": "4;2;2", "rating_avg": [ 7.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 71.66666666666667, 12.283683848458853 ], "wc_strengths_avg": [ 61.666666666666664, 21.638443156156644 ], "wc_weaknesses_avg": [ 134.0, 148.6741403203664 ], "wc_questions_avg": [ 122.66666666666667, 98.79721093679157 ], "wc_limitations_avg": [ 6.0, 3.7416573867739413 ], "wc_review_avg": [ 396.0, 155.83538323072418 ], "wc_reply_reviewers_avg": [ 13.333333333333334, 13.021349989749739 ], "wc_reply_authors_avg": [ 324.6666666666667, 401.16524719316794 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 0.9428090415820634 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17451577005796257042&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "cmu.edu;", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Inevitable Trade-off between Watermark Strength and Speculative Sampling Efficiency for Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96518", "id": "6YKMBUiIsG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=6YKMBUiIsG", "openreview": "https://openreview.net/forum?id=6YKMBUiIsG", "poster": "/media/PosterPDFs/NeurIPS%202024/96518.png?t=1733809098.185667", "project": "", "author_site": "Zhengmian Hu, Heng Huang", "tldr": "", "abstract": "Large language models are probabilistic models, and the process of generating content is essentially sampling from the output distribution of the language model. Existing watermarking techniques inject watermarks into the generated content without altering the output quality. On the other hand, existing acceleration techniques, specifically speculative sampling, leverage a draft model to speed up the sampling process while preserving the output distribution. However, there is no known method to simultaneously accelerate the sampling process and inject watermarks into the generated content. In this paper, we investigate this direction and find that the integration of watermarking and acceleration is non-trivial. We prove a no-go theorem, which states that it is impossible to simultaneously maintain the highest watermark strength and the highest sampling efficiency. Furthermore, we propose two methods that maintain either the sampling efficiency or the watermark strength, but not both. Our work provides a rigorous theoretical foundation for understanding the inherent trade-off between watermark strength and sampling efficiency in accelerating the generation of watermarked tokens for large language models. We also conduct numerical experiments to validate our theoretical findings and demonstrate the effectiveness of the proposed methods.", "keywords": "watermark;speculative sampling;large language model;sampling;inference", "primary_area": "generative_models", "supplementary_material": "/attachment/f52c4418908a6b8c7415a88e61763e79be5833c3.zip", "author": "Zhengmian Hu;Heng Huang", "authorids": "~Zhengmian_Hu1;~Heng_Huang1", "gender": "M;M", "homepage": "https://www.umd.edu/;https://www.cs.umd.edu/~heng/", "dblp": "285/4945;03/281", "google_scholar": "4eXiWWgAAAAJ;4OqLaDwAAAAJ", "orcid": "0000-0003-0316-146X;", "linkedin": ";", "or_profile": "~Zhengmian_Hu1;~Heng_Huang1", "aff": "University of Maryland, College Park;Department of Computer Science, University of Maryland, College Park", "aff_domain": "umd.edu;cs.umd.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nhu2024inevitable,\ntitle={Inevitable Trade-off between Watermark Strength and Speculative Sampling Efficiency for Language Models},\nauthor={Zhengmian Hu and Heng Huang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=6YKMBUiIsG}\n}", "github": "", "reviewers": "zG1K;N92a;WMzV;5eD7", "pdf_size": 588233, "rating": "5;6;7;8", "confidence": "4;4;3;5", "soundness": "3;3;3;4", "novelty": "3;3;3;4", "presentation": "3;4;3;3", "wc_summary": "63;96;67;12", "wc_strengths": "56;44;75;61", "wc_weaknesses": "114;119;21;107", "wc_questions": "181;6;20;262", "wc_limitations": "16;1;18;18", "wc_review": "430;266;201;460", "wc_reply_reviewers": "8;0;10;145", "wc_reply_authors": "664;0;0;944", "reply_reviewers": "1;0;1;2", "reply_authors": "3;1;1;3", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 59.5, 30.236567265481707 ], "wc_strengths_avg": [ 59.0, 11.113055385446435 ], "wc_weaknesses_avg": [ 90.25, 40.20805267604985 ], "wc_questions_avg": [ 117.25, 108.22517036253628 ], "wc_limitations_avg": [ 13.25, 7.119515432949071 ], "wc_review_avg": [ 339.25, 108.73678080576047 ], "wc_reply_reviewers_avg": [ 40.75, 60.30495419117736 ], "wc_reply_authors_avg": [ 402.0, 414.0096617230086 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 1.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.3162277660168379, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3098717141669381076&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "umd.edu;cs.umd.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "University of Maryland;University of Maryland, College Park", "aff_unique_dep": ";Department of Computer Science", "aff_unique_url": "https://www/umd.edu;https://www/umd.edu", "aff_unique_abbr": "UMD;UMD", "aff_campus_unique_index": "0;0", "aff_campus_unique": "College Park", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "PiSSA: Principal Singular Values and Singular Vectors Adaptation of Large Language Models", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96517", "id": "6ZBHIEtdP4", "proceeding": "", "pdf": "https://openreview.net/pdf?id=6ZBHIEtdP4", "openreview": "https://openreview.net/forum?id=6ZBHIEtdP4", "poster": "/media/PosterPDFs/NeurIPS%202024/96517.png?t=1730972290.6975071", "project": "", "author_site": "Fanxu Meng, Zhaohui Wang, Muhan Zhang", "tldr": "", "abstract": "To parameter-efficiently fine-tune (PEFT) large language models (LLMs), the low-rank adaptation (LoRA) method approximates the model changes $\\Delta W \\in \\mathbb{R}^{m \\times n}$ through the product of two matrices $A \\in \\mathbb{R}^{m \\times r}$ and $B \\in \\mathbb{R}^{r \\times n}$, where $r \\ll \\min(m, n)$, $A$ is initialized with Gaussian noise, and $B$ with zeros. LoRA **freezes the original model $W$** and **updates the \"Noise \\& Zero\" adapter**, which may lead to slow convergence. To overcome this limitation, we introduce **P**r**i**ncipal **S**ingular values and **S**ingular vectors **A**daptation (PiSSA). PiSSA shares the same architecture as LoRA, but initializes the adaptor matrices $A$ and $B$ with the principal components of the original matrix $W$, and put the remaining components into a residual matrix $W^{res} \\in \\mathbb{R}^{m \\times n}$ which is frozen during fine-tuning.\nCompared to LoRA, PiSSA **updates the principal components** while **freezing the \"residual\" parts**, allowing faster convergence and enhanced performance. Comparative experiments of PiSSA and LoRA across 11 different models, ranging from 184M to 70B, encompassing 5 NLG and 8 NLU tasks, reveal that PiSSA consistently outperforms LoRA under identical experimental setups. On the GSM8K benchmark, Gemma-7B fine-tuned with PiSSA achieves an accuracy of 77.7\\%, surpassing LoRA's 74.53\\% by 3.25\\%. Due to the same architecture, PiSSA is also compatible with quantization to further reduce the memory requirement of fine-tuning. Compared to QLoRA, QPiSSA (PiSSA with 4-bit quantization) exhibits smaller quantization errors in the initial stages. Fine-tuning LLaMA-3-70B on GSM8K, QPiSSA attains an accuracy of 86.05\\%, exceeding the performances of QLoRA at 81.73\\%. Leveraging a fast SVD technique, PiSSA can be initialized in only a few seconds, presenting a negligible cost for transitioning from LoRA to PiSSA.", "keywords": "PEFT;LoRA;LLM;Finetune;SVD", "primary_area": "optimization_for_deep_networks", "supplementary_material": "/attachment/33e6c6f0c81e7ae2a359621a7f4c7de49d9ce7c7.zip", "author": "Fanxu Meng;Zhaohui Wang;Muhan Zhang", "authorids": "~Fanxu_Meng1;~Zhaohui_Wang2;~Muhan_Zhang1", "gender": "M;F;M", "homepage": "https://fxmeng.github.io/;;https://muhanzhang.github.io/", "dblp": ";;157/5518", "google_scholar": "xvfuhRUAAAAJ;https://scholar.google.com/scholar?hl=en;https://scholar.google.com.hk/citations?user=OBBqkosAAAAJ", "orcid": ";;0000-0002-7680-6401", "linkedin": ";;jerry-muhan-zhang-a33a1777/", "or_profile": "~Fanxu_Meng1;~Zhaohui_Wang2;~Muhan_Zhang1", "aff": "Peking University;Chinese Academy of Sciences;Peking University", "aff_domain": "pku.edu.cn;ict.ac.cn;pku.edu.cn", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nmeng2024pissa,\ntitle={Pi{SSA}: Principal Singular Values and Singular Vectors Adaptation of Large Language Models},\nauthor={Fanxu Meng and Zhaohui Wang and Muhan Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=6ZBHIEtdP4}\n}", "github": "", "reviewers": "m1t9;Bb8M;XzvL;x9Pi", "pdf_size": 1782081, "rating": "7;7;8;8", "confidence": "4;4;4;4", "soundness": "3;4;3;4", "novelty": "3;4;2;4", "presentation": "3;3;3;4", "wc_summary": "73;159;78;91", "wc_strengths": "39;162;44;37", "wc_weaknesses": "242;387;47;63", "wc_questions": "165;10;8;36", "wc_limitations": "27;10;12;11", "wc_review": "546;728;189;238", "wc_reply_reviewers": "120;28;25;0", "wc_reply_authors": "604;47;473;0", "reply_reviewers": "1;1;1;0", "reply_authors": "3;2;4;1", "rating_avg": [ 7.5, 0.5 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 100.25, 34.54978292261762 ], "wc_strengths_avg": [ 70.5, 52.88903478037768 ], "wc_weaknesses_avg": [ 184.75, 139.6251678602393 ], "wc_questions_avg": [ 54.75, 64.6040826883255 ], "wc_limitations_avg": [ 15.0, 6.96419413859206 ], "wc_review_avg": [ 425.25, 221.9880345874525 ], "wc_reply_reviewers_avg": [ 43.25, 45.62551369573826 ], "wc_reply_authors_avg": [ 281.0, 262.1593027149714 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 103, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5007526582633874490&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "pku.edu.cn;ict.ac.cn;pku.edu.cn", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Peking University;Chinese Academy of Sciences", "aff_unique_dep": ";", "aff_unique_url": "http://www.pku.edu.cn;https://www.cas.cn", "aff_unique_abbr": "Peking U;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Beating Adversarial Low-Rank MDPs with Unknown Transition and Bandit Feedback", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96516", "id": "6ZXrvoIox1", "proceeding": "", "pdf": "https://openreview.net/pdf?id=6ZXrvoIox1", "openreview": "https://openreview.net/forum?id=6ZXrvoIox1", "poster": "", "project": "", "author_site": "Haolin Liu, Zak Mhammedi, Chen-Yu Wei, Julian Zimmert", "tldr": "", "abstract": "We consider regret minimization in low-rank MDPs with fixed transition and adversarial losses. Previous work has investigated this problem under either full-information loss feedback with unknown transitions (Zhao et al., 2024), or bandit loss feedback with known transitions (Foster et al., 2022). First, we improve the $poly(d, A, H)T^{5/6}$ regret bound of Zhao et al. (2024) to $poly(d, A, H)T^{2/3}$ for the full-information unknown transition setting, where $d$ is the rank of the transitions, $A$ is the number of actions, $H$ is the horizon length, and $T$ is the number of episodes. Next, we initiate the study on the setting with bandit loss feedback and unknown transitions. Assuming that the loss has a linear structure, we propose both model-based and model-free algorithms achieving $poly(d, A, H)T^{2/3}$ regret, though they are computationally inefficient. We also propose oracle-efficient model-free algorithms with $poly(d, A, H)T^{4/5}$ regret. We show that the linear structure is necessary for the bandit case\u2014without structure on the reward function, the regret has to scale polynomially with the number of states. This is contrary to the full-information case (Zhao et al., 2024), where the regret can be independent of the number of states even for unstructured reward functions.", "keywords": "low-rank MDPs;adversarial losses;bandit feedback", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Haolin Liu;Zakaria Mhammedi;Chen-Yu Wei;Julian Zimmert", "authorids": "~Haolin_Liu8;~Zakaria_Mhammedi1;~Chen-Yu_Wei1;~Julian_Zimmert1", "gender": "M;M;M;", "homepage": "https://liuhl2000.github.io/;;https://bahh723.github.io/;", "dblp": ";192/1360;183/1729;190/7636", "google_scholar": ";;2L2cR-kAAAAJ;", "orcid": "0000-0002-8247-9742;;;", "linkedin": ";;;", "or_profile": "~Haolin_Liu8;~Zakaria_Mhammedi1;~Chen-Yu_Wei1;~Julian_Zimmert1", "aff": "University of Virginia, Charlottesville;Research, Google;University of Virginia, Charlottesville;Google", "aff_domain": "virginia.edu;research.google.com;virginia.edu;google.com", "position": "PhD student;Researcher;Assistant Professor;Postdoc", "bibtex": "@inproceedings{\nliu2024beating,\ntitle={Beating Adversarial Low-Rank {MDP}s with Unknown Transition and Bandit Feedback},\nauthor={Haolin Liu and Zakaria Mhammedi and Chen-Yu Wei and Julian Zimmert},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=6ZXrvoIox1}\n}", "github": "", "reviewers": "XUkU;3DuN;rtoB;GQHM", "pdf_size": 728462, "rating": "5;6;6;6", "confidence": "3;3;3;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "2;2;3;3", "wc_summary": "314;120;54;44", "wc_strengths": "110;43;116;22", "wc_weaknesses": "135;434;73;120", "wc_questions": "55;180;32;72", "wc_limitations": "12;57;1;2", "wc_review": "626;834;276;260", "wc_reply_reviewers": "51;123;12;52", "wc_reply_authors": "0;243;0;270", "reply_reviewers": "1;2;1;2", "reply_authors": "1;2;1;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 133.0, 108.50345616615168 ], "wc_strengths_avg": [ 72.75, 40.9839907768875 ], "wc_weaknesses_avg": [ 190.5, 142.43331773149146 ], "wc_questions_avg": [ 84.75, 56.79513623542072 ], "wc_limitations_avg": [ 18.0, 22.9237867726953 ], "wc_review_avg": [ 499.0, 242.48917501612314 ], "wc_reply_reviewers_avg": [ 59.5, 40.05308976845607 ], "wc_reply_authors_avg": [ 128.25, 128.60477246198914 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7950312241173445218&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "virginia.edu;research.google.com;virginia.edu;google.com", "author_num": 4, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "University of Virginia;Google", "aff_unique_dep": ";Google Research", "aff_unique_url": "https://www.virginia.edu;https://research.google", "aff_unique_abbr": "UVA;Google", "aff_campus_unique_index": "0;1;0;1", "aff_campus_unique": "Charlottesville;Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "DreamMesh4D: Video-to-4D Generation with Sparse-Controlled Gaussian-Mesh Hybrid Representation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96515", "id": "6ZwJSk2kvU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=6ZwJSk2kvU", "openreview": "https://openreview.net/forum?id=6ZwJSk2kvU", "poster": "/media/PosterPDFs/NeurIPS%202024/96515.png?t=1730083501.5456355", "project": "", "author_site": "Zhiqi Li, Yiming Chen, Peidong Liu", "tldr": "", "abstract": "Recent advancements in 2D/3D generative techniques have facilitated the generation of dynamic 3D objects from monocular videos. Previous methods mainly rely on the implicit neural radiance fields (NeRF) or explicit Gaussian Splatting as the underlying representation, and struggle to achieve satisfactory spatial-temporal consistency and surface appearance. Drawing inspiration from modern 3D animation pipelines, we introduce DreamMesh4D, a novel framework combining mesh representation with geometric skinning technique to generate high-quality 4D object from a monocular video. Instead of utilizing classical texture map for appearance, we bind Gaussian splats to triangle face of mesh for differentiable optimization of both the texture and mesh vertices. In particular, DreamMesh4D begins with a coarse mesh obtained through an image-to-3D generation procedure. Sparse points are then uniformly sampled across the mesh surface, and are used to build a deformation graph to drive the motion of the 3D object for the sake of computational efficiency and providing additional constraint. For each step, transformations of sparse control points are predicted using a deformation network, and the mesh vertices as well as the surface Gaussians are deformed via a novel geometric skinning algorithm. The skinning algorithm is a hybrid approach combining LBS (linear blending skinning) and DQS (dual-quaternion skinning), mitigating drawbacks associated with both approaches. The static surface Gaussians and mesh vertices as well as the dynamic deformation network are learned via reference view photometric loss, score distillation loss as well as other regularization losses in a two-stage manner. Extensive experiments demonstrate superior performance of our method in terms of both rendering quality and spatial-temporal consistency. Furthermore, our method is compatible with modern graphic pipelines, showcasing its potential in the 3D gaming and film industry.", "keywords": "4D generation;3D Gaussian Splatting;dynamic mesh;diffusion model", "primary_area": "generative_models", "supplementary_material": "", "author": "Zhiqi Li;Yiming Chen;Peidong Liu", "authorids": "~Zhiqi_Li5;~Yiming_Chen9;~Peidong_Liu3", "gender": "M;M;M", "homepage": "https://lizhiqi49.github.io;https://github.com/codejoker-c;https://ethliup.github.io/", "dblp": ";;07/11190", "google_scholar": ";;https://scholar.google.com.sg/citations?user=XZczNEEAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Zhiqi_Li5;~Yiming_Chen9;~Peidong_Liu1", "aff": "Westlake University;Tongji University;Westlake University", "aff_domain": "westlake.edu;tongji.edu.cn;westlake.edu.cn", "position": "PhD student;Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\nli2024dreammeshd,\ntitle={DreamMesh4D: Video-to-4D Generation with Sparse-Controlled Gaussian-Mesh Hybrid Representation},\nauthor={Zhiqi Li and Yiming Chen and Peidong Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=6ZwJSk2kvU}\n}", "github": "", "reviewers": "ow51;7V8w;2HE6", "pdf_size": 12755345, "rating": "5;5;7", "confidence": "5;2;3", "soundness": "3;3;3", "novelty": "1;3;3", "presentation": "3;3;3", "wc_summary": "55;136;85", "wc_strengths": "30;114;161", "wc_weaknesses": "126;104;243", "wc_questions": "31;94;189", "wc_limitations": "7;2;9", "wc_review": "249;450;687", "wc_reply_reviewers": "222;126;82", "wc_reply_authors": "379;40;17", "reply_reviewers": "2;1;1", "reply_authors": "3;2;2", "rating_avg": [ 5.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 1.247219128924647 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.3333333333333335, 0.9428090415820634 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 92.0, 33.436506994600975 ], "wc_strengths_avg": [ 101.66666666666667, 54.18691929074971 ], "wc_weaknesses_avg": [ 157.66666666666666, 61.004553564103354 ], "wc_questions_avg": [ 104.66666666666667, 64.94270979529232 ], "wc_limitations_avg": [ 6.0, 2.943920288775949 ], "wc_review_avg": [ 462.0, 179.0139659356219 ], "wc_reply_reviewers_avg": [ 143.33333333333334, 58.45416057808793 ], "wc_reply_authors_avg": [ 145.33333333333334, 165.4938736697592 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.1889822365046136, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16284688208416538377&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "westlake.edu;tongji.edu.cn;westlake.edu.cn", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Westlake University;Tongji University", "aff_unique_dep": ";", "aff_unique_url": "https://www.westlake.edu.cn;https://www.tongji.edu.cn", "aff_unique_abbr": "WU;Tongji", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Graph neural networks and non-commuting operators", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96514", "id": "6aJrEC28hR", "proceeding": "", "pdf": "https://openreview.net/pdf?id=6aJrEC28hR", "openreview": "https://openreview.net/forum?id=6aJrEC28hR", "poster": "/media/PosterPDFs/NeurIPS%202024/96514.png?t=1730333219.9520137", "project": "", "author_site": "Mauricio Velasco, Kaiying O'Hare, Bernardo Rychtenberg, Soledad Villar", "tldr": "", "abstract": "Graph neural networks (GNNs) provide state-of-the-art results in a wide variety of tasks which typically involve predicting features at the vertices of a graph. They are built from layers of graph convolutions which serve as a powerful inductive bias for describing the flow of information among the vertices. Often, more than one data modality is available. This work considers a setting in which several graphs have the same vertex set and a common vertex-level learning task. This generalizes standard GNN models to GNNs with several graph operators that do not commute. We may call this model graph-tuple neural networks (GtNN). \n\nIn this work, we develop the mathematical theory to address the stability and transferability of GtNNs using properties of non-commuting non-expansive operators. We develop a limit theory of graphon-tuple neural networks and use it to prove a universal transferability theorem that guarantees that all graph-tuple neural networks are transferable on convergent graph-tuple sequences. In particular, there is no non-transferable energy under the convergence we consider here. Our theoretical results extend well-known transferability theorems for GNNs to the case of several simultaneous graphs (GtNNs) and provide a strict improvement on what is currently known even in the GNN case.\n\nWe illustrate our theoretical results with simple experiments on synthetic and real-world data. To this end, we derive a training procedure that provably enforces the stability of the resulting model.", "keywords": "graph neural networks;trasferability;stability;non-commuting operators;graphons", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Mauricio Velasco;Kaiying O'Hare;Bernardo Rychtenberg;Soledad Villar", "authorids": "~Mauricio_Velasco1;~Kaiying_O'Hare1;~Bernardo_Rychtenberg1;~Soledad_Villar2", "gender": ";;M;F", "homepage": "https://mauricio-velasco.github.io/webpage/;https://sites.google.com/view/kaiying-xie;https://www.ucu.edu.uy/institucional/docente/bernardo-rychtenberg--305d%20/%3E%20%20%20%20%20%20%20%20%3C!--%20TWITTER%20--%3E%20%20%20%20%20%20%20%20%3Cmeta%20name=;https://www.ams.jhu.edu/villar/", "dblp": ";;;https://dblp.uni-trier.de/pers/hd/v/Villar:Soledad", "google_scholar": ";;;JBGlsDoAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Mauricio_Velasco1;~Kaiying_O'Hare1;~Bernardo_Rychtenberg1;~Soledad_Villar1", "aff": "Universidad Cat\u00f3lica del Uruguay;Johns Hopkins University;Universidad Cat\u00f3lica del Uruguay;Johns Hopkins University", "aff_domain": "ucu.edu.uy;jh.edu;ucu.edu.uy;jhu.edu", "position": "Associate Professor;Postdoc;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nvelasco2024graph,\ntitle={Graph neural networks and non-commuting operators},\nauthor={Mauricio Velasco and Kaiying O'Hare and Bernardo Rychtenberg and Soledad Villar},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=6aJrEC28hR}\n}", "github": "", "reviewers": "hLUB;eJ8g;RRVw", "pdf_size": 893396, "rating": "5;6;7", "confidence": "3;2;4", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "2;3;4", "wc_summary": "76;62;120", "wc_strengths": "63;151;48", "wc_weaknesses": "37;71;323", "wc_questions": "66;43;69", "wc_limitations": "33;14;1", "wc_review": "275;341;561", "wc_reply_reviewers": "18;24;8", "wc_reply_authors": "0;0;489", "reply_reviewers": "1;1;1", "reply_authors": "1;1;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 86.0, 24.711670657134185 ], "wc_strengths_avg": [ 87.33333333333333, 45.433712397538265 ], "wc_weaknesses_avg": [ 143.66666666666666, 127.56523385659939 ], "wc_questions_avg": [ 59.333333333333336, 11.61416759345623 ], "wc_limitations_avg": [ 16.0, 13.140268896284683 ], "wc_review_avg": [ 392.3333333333333, 122.2711013372425 ], "wc_reply_reviewers_avg": [ 16.666666666666668, 6.599663291074443 ], "wc_reply_authors_avg": [ 163.0, 230.5168106668145 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12156839758874729615&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 4, "email": "ucu.edu.uy;jh.edu;ucu.edu.uy;jhu.edu", "author_num": 4, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "Universidad Cat\u00f3lica del Uruguay;Johns Hopkins University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucu.edu.uy;https://www.jhu.edu", "aff_unique_abbr": "UCU;JHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "Uruguay;United States" }, { "title": "Diffusion Imitation from Observation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96513", "id": "6b6TfDBDOO", "proceeding": "", "pdf": "https://openreview.net/pdf?id=6b6TfDBDOO", "openreview": "https://openreview.net/forum?id=6b6TfDBDOO", "poster": "", "project": "", "author_site": "Bo-Ruei Huang, Chun-Kai Yang, Chun-Mao Lai, Dai-Jie Wu, Shao-Hua Sun", "tldr": "", "abstract": "Learning from Observation (LfO) aims to imitate experts by learning from state-only demonstrations without requiring action labels. \nExisting adversarial imitation learning approaches learn a generator agent policy to produce state transitions that are indistinguishable to a discriminator that learns to classify agent and expert state transitions. Despite its simplicity in formulation, these methods are often sensitive to hyperparameters and brittle to train. Motivated by the recent success of diffusion models in generative modeling, we propose to integrate a diffusion model into the adversarial imitation learning from observation framework. Specifically, we employ a diffusion model to capture expert and agent transitions by generating the next state, given the current state. Then, we reformulate the learning objective to train the diffusion model as a binary classifier and use it to provide ``realness'' rewards for policy learning. Our proposed framework, Diffusion Imitation from Observation (DIFO), demonstrates superior performance in various continuous control domains, including navigation, locomotion, manipulation, and games.", "keywords": "Imitation from Observation;Learning from Observation;Imitation Learning;Diffusion Model;Reinforcement Learning", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Bo-Ruei Huang;Chun-Kai Yang;Chun-Mao Lai;Dai-Jie Wu;Shao-Hua Sun", "authorids": "~Bo-Ruei_Huang2;~Chun-Kai_Yang1;~Chun-Mao_Lai1;~Dai-Jie_Wu1;~Shao-Hua_Sun1", "gender": "M;M;M;;M", "homepage": "https://borueihuang.com/;https://yck1130.github.io/;https://mecoli1219.github.io/;;http://shaohua0116.github.io", "dblp": "370/8718;296/7419;325/4767;382/5890;158/9680", "google_scholar": "EN01AeQAAAAJ;;;Tzzq0zYAAAAJ;uXsfnaQAAAAJ", "orcid": "0009-0001-0772-8681;;;;0000-0001-7579-6734", "linkedin": "borueihuang/;yang-chun-kai;;dai-jie-wu/;shaohua0116/", "or_profile": "~Bo-Ruei_Huang2;~Chun-Kai_Yang1;~Chun-Mao_Lai1;~Dai-Jie_Wu1;~Shao-Hua_Sun1", "aff": "National Taiwan University;National Taiwan University;National Taiwan University;National Taiwan University;National Taiwan University", "aff_domain": "ntu.edu.tw;ntu.edu.tw;ntu.edu.tw;ntu.edu.tw;ntu.edu.tw", "position": "Undergrad student;Undergrad student;Undergrad student;Research Assistant;Assistant Professor", "bibtex": "@inproceedings{\nhuang2024diffusion,\ntitle={Diffusion Imitation from Observation},\nauthor={Bo-Ruei Huang and Chun-Kai Yang and Chun-Mao Lai and Dai-Jie Wu and Shao-Hua Sun},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=6b6TfDBDOO}\n}", "github": "", "reviewers": "Dab5;AzJp;7W6h;P9FE", "pdf_size": 14017674, "rating": "6;6;6;7", "confidence": "3;4;4;4", "soundness": "3;3;2;4", "novelty": "3;3;2;4", "presentation": "3;3;3;4", "wc_summary": "112;81;37;91", "wc_strengths": "44;24;23;71", "wc_weaknesses": "279;37;125;76", "wc_questions": "45;196;62;47", "wc_limitations": "10;10;16;45", "wc_review": "490;348;263;330", "wc_reply_reviewers": "33;34;91;26", "wc_reply_authors": "164;15;144;15", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 80.25, 27.36215452043205 ], "wc_strengths_avg": [ 40.5, 19.5 ], "wc_weaknesses_avg": [ 129.25, 91.90858229784638 ], "wc_questions_avg": [ 87.5, 62.98610957981133 ], "wc_limitations_avg": [ 20.25, 14.49784466739798 ], "wc_review_avg": [ 357.75, 82.66309636107276 ], "wc_reply_reviewers_avg": [ 46.0, 26.16295090390226 ], "wc_reply_authors_avg": [ 84.5, 69.85878613317011 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13800587869056649843&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 3, "email": "ntu.edu.tw;ntu.edu.tw;ntu.edu.tw;ntu.edu.tw;ntu.edu.tw", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "National Taiwan University", "aff_unique_dep": "", "aff_unique_url": "https://www.ntu.edu.tw", "aff_unique_abbr": "NTU", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Taiwan", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Building Timeseries Dataset: Empowering Large-Scale Building Analytics", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97839", "id": "6cCFK69vJI", "proceeding": "", "pdf": "https://openreview.net/pdf?id=6cCFK69vJI", "openreview": "https://openreview.net/forum?id=6cCFK69vJI", "poster": "/media/PosterPDFs/NeurIPS%202024/97839.png?t=1733743613.6249955", "project": "", "author_site": "Arian Prabowo, Xiachong LIN, Imran Razzak, Hao Xue, Emily Yap, Matthew Amos, Flora Salim", "tldr": "", "abstract": "Buildings play a crucial role in human well-being, influencing occupant comfort, health, and safety.\nAdditionally, they contribute significantly to global energy consumption, accounting for one-third of total energy usage, and carbon emissions.\nOptimizing building performance presents a vital opportunity to combat climate change and promote human flourishing.\nHowever, research in building analytics has been hampered by the lack of accessible, available, and comprehensive real-world datasets on multiple building operations.\nIn this paper, we introduce the Building TimeSeries (BTS) dataset.\nOur dataset covers three buildings over a three-year period, comprising more than ten thousand timeseries data points with hundreds of unique ontologies.\nMoreover, the metadata is standardized using the Brick schema.\nTo demonstrate the utility of this dataset, we performed benchmarks on two tasks: timeseries ontology classification and zero-shot forecasting.\nThese tasks represent an essential initial step in addressing challenges related to interoperability in building analytics.\nAccess to the dataset and the code used for benchmarking are available here: https://github.com/cruiseresearchgroup/DIEF\\_BTS", "keywords": "Buildings;energy consumption;timeseries;classification;metadata standardization.", "primary_area": "", "supplementary_material": "/attachment/5e9b22e1c3d5db6ebb1061bdcfe1130c701bf436.pdf", "author": "Arian Prabowo;Xiachong LIN;Imran Razzak;Hao Xue;Emily W. Yap;Matt Amos;Flora D. Salim", "authorids": "~Arian_Prabowo1;~Xiachong_LIN1;~Imran_Razzak2;~Hao_Xue1;~Emily_W._Yap1;~Matt_Amos1;~Flora_D._Salim1", "gender": ";F;M;M;F;;F", "homepage": "https://www.arianprabowo.com/;;https://imranrazzak.github.io/;https://haoxue01.github.io/;;;http://florasalim.com/", "dblp": "249/5419.html;;59/8379.html;156/1517-1;;;08/1554", "google_scholar": "ozZvUN4AAAAJ;;https://scholar.google.com/citations?hl=en;KwhLl7IAAAAJ;;;https://scholar.google.com.au/citations?user=Yz35RSYAAAAJ", "orcid": "0000-0002-0459-354X;;0000-0002-3930-6600;;0000-0002-8580-1696;0009-0008-8294-9936;0000-0002-1237-1664", "linkedin": "arianprabowo/;xiachong-lin-73125425b/;;;emilywjyap/;;https://au.linkedin.com/in/flora-salim-6958986", "or_profile": "~Arian_Prabowo1;~Xiachong_LIN1;~Imran_Razzak2;~Hao_Xue1;~Emily_W._Yap1;~Matt_Amos1;~Flora_Salim1", "aff": "University of New South Wales;University of New South Wales;University of New South Wales;University of New South Wales;University of Wollongong;CSIRO;University of New South Wales", "aff_domain": "unsw.edu.au;unsw.edu.au;unsw.edu.au;unsw.edu.au;uow.edu.au;csiro.au;unsw.edu.au", "position": "Postdoc;PhD student;Associate Professor;Lecturer;Postdoc;Researcher;Full Professor", "bibtex": "@inproceedings{\nprabowo2024building,\ntitle={Building Timeseries Dataset: Empowering Large-Scale Building Analytics},\nauthor={Arian Prabowo and Xiachong LIN and Imran Razzak and Hao Xue and Emily W. Yap and Matt Amos and Flora D. Salim},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=6cCFK69vJI}\n}", "github": "", "reviewers": "eVsz;UAax;1N9M;oLK5", "pdf_size": 3397502, "rating": "6;7;7;8", "confidence": "4;3;4;5", "wc_summary_and_contributions": "87;78;36;213", "wc_strengths": "52;95;51;102", "wc_improvement": "120;80;137;43", "wc_limitations": "44;25;11;85", "wc_correctness": "7;8;46;12", "wc_clarity": "82;7;39;22", "wc_relation_to_prior_work": "5;89;39;18", "wc_documentation": "100;4;35;36", "wc_additional_feedback": "1;1;1;1", "wc_review": "498;387;395;532", "wc_reply_reviewers": "64;11;39;11", "wc_reply_authors": "133;109;119;109", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 103.5, 66.08517231573207 ], "wc_strengths_avg": [ 75.0, 23.632604596192948 ], "wc_improvement_avg": [ 95.0, 36.462309307009065 ], "wc_limitations_avg": [ 41.25, 27.842189209902298 ], "wc_correctness_avg": [ 18.25, 16.13032857693854 ], "wc_clarity_avg": [ 37.5, 28.075790282732914 ], "wc_relation_to_prior_work_avg": [ 37.75, 31.979485611873123 ], "wc_documentation_avg": [ 43.75, 34.93118234471888 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 453.0, 63.217877218394484 ], "wc_reply_reviewers_avg": [ 31.25, 22.094965489902897 ], "wc_reply_authors_avg": [ 117.5, 9.836157786453 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8650868043514291349&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 4, "email": "unsw.edu.au;unsw.edu.au;unsw.edu.au;unsw.edu.au;uow.edu.au;csiro.au;unsw.edu.au", "author_num": 7, "aff_unique_index": "0;0;0;0;1;2;0", "aff_unique_norm": "University of New South Wales;University of Wollongong;Commonwealth Scientific and Industrial Research Organisation", "aff_unique_dep": ";;", "aff_unique_url": "https://www.unsw.edu.au;https://www.uow.edu.au;https://www.csiro.au", "aff_unique_abbr": "UNSW;UOW;CSIRO", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "Australia" }, { "title": "Universal Rates of Empirical Risk Minimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96512", "id": "6cWDg9t3z5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=6cWDg9t3z5", "openreview": "https://openreview.net/forum?id=6cWDg9t3z5", "poster": "", "project": "", "author_site": "Steve Hanneke, Mingyue Xu", "tldr": "", "abstract": "The well-known $\\textit{empirical risk minimization}$ (ERM) principle is the basis of many widely used machine learning algorithms, and plays an essential role in the classical PAC theory. A common description of a learning algorithm's performance is its so-called \u201clearning curve\u201d, that is, the decay of the expected error as a function of the input sample size. As the PAC model fails to explain the behavior of learning curves, recent research has explored an alternative universal learning model and has ultimately revealed a distinction between optimal universal and uniform learning rates (Bousquet et al., 2021). However, a basic understanding of such differences with a particular focus on the ERM principle has yet to be developed. \n \n In this paper, we consider the problem of universal learning by ERM in the realizable case and study the possible universal rates. Our main result is a fundamental $\\textit{tetrachotomy}$: there are only four possible universal learning rates by ERM, namely, the learning curves of any concept class learnable by ERM decay either at $e^{-n}$, $1/n$, $\\log{(n)}/n$, or arbitrarily slow rates. Moreover, we provide a complete characterization of which concept classes fall into each of these categories, via new complexity structures. We also develop new combinatorial dimensions which supply sharp asymptotically-valid constant factors for these rates, whenever possible.", "keywords": "Statistical learning theory;Universal learning;Empirical risk minimization;PAC learning", "primary_area": "learning_theory", "supplementary_material": "", "author": "Steve Hanneke;Mingyue Xu", "authorids": "~Steve_Hanneke1;~Mingyue_Xu1", "gender": "M;M", "homepage": "http://www.stevehanneke.com;https://www.cs.purdue.edu/people/graduate-students/xu1864.html", "dblp": "40/154;", "google_scholar": "fEhNO7YAAAAJ;lh1lDYYAAAAJ", "orcid": ";0009-0007-4942-7995", "linkedin": ";", "or_profile": "~Steve_Hanneke1;~Mingyue_Xu1", "aff": "Purdue University;Purdue University", "aff_domain": "purdue.edu;purdue.edu", "position": "Assistant Professor;PhD student", "bibtex": "@inproceedings{\nhanneke2024universal,\ntitle={Universal Rates of Empirical Risk Minimization},\nauthor={Steve Hanneke and Mingyue Xu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=6cWDg9t3z5}\n}", "github": "", "reviewers": "qCB2;MghQ;EqSd;q1H2;NPxP", "pdf_size": 574684, "rating": "5;6;7;7;9", "confidence": "1;3;2;4;4", "soundness": "3;4;3;3;4", "novelty": "3;3;4;3;3", "presentation": "3;4;3;3;4", "wc_summary": "183;49;61;61;166", "wc_strengths": "11;58;56;42;93", "wc_weaknesses": "14;48;44;68;68", "wc_questions": "39;18;37;2;84", "wc_limitations": "39;1;1;1;7", "wc_review": "286;174;199;174;418", "wc_reply_reviewers": "34;7;10;12;34", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.8, 1.32664991614216 ], "confidence_avg": [ 2.8, 1.16619037896906 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 104.0, 57.97930665332244 ], "wc_strengths_avg": [ 52.0, 26.510375327407193 ], "wc_weaknesses_avg": [ 48.4, 19.855477833585372 ], "wc_questions_avg": [ 36.0, 27.546324618721822 ], "wc_limitations_avg": [ 9.8, 14.783774890061062 ], "wc_review_avg": [ 250.2, 93.4588679580488 ], "wc_reply_reviewers_avg": [ 19.4, 12.026637102698327 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.7497771504927777, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ev_x5YsEIq0J:scholar.google.com/&scioq=Universal+Rates+of+Empirical+Risk+Minimization&hl=en&as_sdt=0,14", "gs_version_total": 4, "email": "purdue.edu;purdue.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Purdue University", "aff_unique_dep": "", "aff_unique_url": "https://www.purdue.edu", "aff_unique_abbr": "Purdue", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Understanding the Transferability of Representations via Task-Relatedness", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96511", "id": "6cdYMkxxNt", "proceeding": "", "pdf": "https://openreview.net/pdf?id=6cdYMkxxNt", "openreview": "https://openreview.net/forum?id=6cdYMkxxNt", "poster": "/media/PosterPDFs/NeurIPS%202024/96511.png?t=1730822261.1428037", "project": "", "author_site": "Akshay Mehra, Yunbei Zhang, Jihun Hamm", "tldr": "", "abstract": "The growing popularity of transfer learning due to the availability of models pre-trained on vast amounts of data, makes it imperative to understand when the knowledge of these pre-trained models can be transferred to obtain high-performing models on downstream target tasks. However, the exact conditions under which transfer learning succeeds in a cross-domain cross-task setting are still poorly understood. To bridge this gap, we propose a novel analysis that analyzes the transferability of the representations of pre-trained models to downstream tasks in terms of their relatedness to a given reference task. Our analysis leads to an upper bound on transferability in terms of task-relatedness, quantified using the difference between the class priors, label sets, and features of the two tasks.Our experiments using state-of-the-art pre-trained models show the effectiveness of task-relatedness in explaining transferability on various vision and language tasks. The efficient computability of task-relatedness even without labels of the target task and its high correlation with the model's accuracy after end-to-end fine-tuning on the target task makes it a useful metric for transferability estimation. Our empirical results of using task-relatedness on the problem of selecting the best pre-trained model from a model zoo for a target task highlight its utility for practical problems.", "keywords": "Transfer Learning Analysis;Distribution Shift", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/9efe28cc314f83214a6ab6c8eee4631a1f32417a.zip", "author": "Akshay Mehra;Yunbei Zhang;Jihun Hamm", "authorids": "~Akshay_Mehra1;~Yunbei_Zhang1;~Jihun_Hamm2", "gender": "M;;M", "homepage": "https://akshaymehra24.github.io/;https://zybeich.github.io/;https://www.cs.tulane.edu/~jhamm3", "dblp": "215/3754;351/0497;69/7426.html", "google_scholar": "5hTbBDMAAAAJ;tVb54uUAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;", "linkedin": ";;", "or_profile": "~Akshay_Mehra1;~Yunbei_Zhang1;~Jihun_Hamm1", "aff": "Tulane University;Tulane University;Tulane University", "aff_domain": "tulane.edu;tulane.edu;cs.tulane.edu", "position": "PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nmehra2024understanding,\ntitle={Understanding the Transferability of Representations via Task-Relatedness},\nauthor={Akshay Mehra and Yunbei Zhang and Jihun Hamm},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=6cdYMkxxNt}\n}", "github": "", "reviewers": "sD5Z;dEJE;E8p5;3SCA", "pdf_size": 3635115, "rating": "5;5;6;6", "confidence": "2;3;4;4", "soundness": "2;2;4;3", "novelty": "2;3;3;3", "presentation": "3;3;4;3", "wc_summary": "106;89;74;54", "wc_strengths": "96;71;28;37", "wc_weaknesses": "185;257;38;517", "wc_questions": "143;3;43;55", "wc_limitations": "2;1;6;6", "wc_review": "532;421;189;669", "wc_reply_reviewers": "13;111;0;721", "wc_reply_authors": "20;218;0;1085", "reply_reviewers": "1;1;0;2", "reply_authors": "2;2;1;4", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 80.75, 19.149086140074676 ], "wc_strengths_avg": [ 58.0, 27.175356483402386 ], "wc_weaknesses_avg": [ 249.25, 173.56897044114768 ], "wc_questions_avg": [ 61.0, 51.10772935672255 ], "wc_limitations_avg": [ 3.75, 2.277608394786075 ], "wc_review_avg": [ 452.75, 175.79586883655713 ], "wc_reply_reviewers_avg": [ 211.25, 297.415849443166 ], "wc_reply_authors_avg": [ 330.75, 443.72478801617564 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.9045340337332909, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:n2cS8gvHQX0J:scholar.google.com/&scioq=Understanding+the+Transferability+of+Representations+via+Task-Relatedness&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": "tulane.edu;tulane.edu;cs.tulane.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Tulane University", "aff_unique_dep": "", "aff_unique_url": "https://www.tulane.edu", "aff_unique_abbr": "Tulane", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "CogVLM: Visual Expert for Pretrained Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96510", "id": "6dYBP3BIwx", "proceeding": "", "pdf": "https://openreview.net/pdf?id=6dYBP3BIwx", "openreview": "https://openreview.net/forum?id=6dYBP3BIwx", "poster": "", "project": "", "author_site": "Weihan Wang, Qingsong Lv, Wenmeng Yu, Wenyi Hong, Ji Qi, Yan Wang, Junhui Ji, Zhuoyi Yang, Lei Zhao, Song XiXuan, Jiazheng Xu, Keqin Chen, Bin Xu, Juanzi Li, Yuxiao Dong, Ming Ding, Jie Tang", "tldr": "", "abstract": "We introduce CogVLM, a powerful open-source visual language foundation model. Different from the popular \\emph{shallow alignment} method which maps image features into the input space of language model, CogVLM bridges the gap between the frozen pretrained language model and image encoder by a trainable visual expert module in the attention and FFN layers. As a result, CogVLM enables a deep fusion of vision language features without sacrificing any performance on NLP tasks. CogVLM-17B achieves state-of-the-art performance on 17 classic cross-modal benchmarks, including 1) image captioning datasets: NoCaps, Flicker30k, 2) VQA datasets: OKVQA, TextVQA, OCRVQA, ScienceQA, 3) LVLM benchmarks: MM-Vet, MMBench, SEED-Bench, LLaVABench, POPE, MMMU, MathVista, 4) visual grounding datasets: RefCOCO, RefCOCO+, RefCOCOg, Visual7W. Codes and checkpoints are available at Github.", "keywords": "Multimodal Learning;Vision and Language;Representation Learning;Large Language Model", "primary_area": "machine_vision", "supplementary_material": "", "author": "Weihan Wang;Qingsong Lv;Wenmeng Yu;Wenyi Hong;Ji Qi;Yan Wang;Junhui Ji;Zhuoyi Yang;Lei Zhao;Song XiXuan;Jiazheng Xu;Keqin Chen;Bin Xu;Juanzi Li;Yuxiao Dong;Ming Ding;Jie Tang", "authorids": "~Weihan_Wang2;~Qingsong_Lv4;~Wenmeng_Yu1;~Wenyi_Hong1;~Ji_Qi3;~Yan_Wang42;~Junhui_Ji1;~Zhuoyi_Yang1;~Lei_Zhao17;~Song_XiXuan1;~Jiazheng_Xu1;~Keqin_Chen1;~Bin_Xu1;~Juanzi_Li1;~Yuxiao_Dong1;~Ming_Ding1;~Jie_Tang1", "gender": "M;;M;F;M;M;M;M;M;M;M;M;M;;M;M;", "homepage": "https://github.com/mactavish91;;;;https://qijimrc.github.io;https://github.com/lykeven;;;;https://github.com/songxxzp;https://github.com/xujz18;;http://keg.cs.tsinghua.edu.cn/persons/xubin/;;https://keg.cs.tsinghua.edu.cn/yuxiao/;;", "dblp": ";;268/1056;216/5413;;;;230/8320;;;313/9484;240/6782;;;17/9267;48/3462-4;", "google_scholar": "https://scholar.google.com.hk/citations?hl=zh-CN;tJNCPogAAAAJ;AE5tvE4AAAAJ;JSEzrlwAAAAJ;vv6bZjMAAAAJ;;;;;;7--T2_4AAAAJ;;https://scholar.google.com.tw/citations?user=h-BY2wgAAAAJ;;https://scholar.google.com.hk/citations?hl=en;Va50YzkAAAAJ;", "orcid": ";;;;;;0009-0009-2811-1865;;;;;;0000-0003-3040-4391;;0000-0002-6092-2002;;", "linkedin": ";;;wenyi-hong;;;;zhuoyi-yang-4a1051210/;%E7%A3%8A-%E8%B5%B5-0149671b4/;;;;;;;;", "or_profile": "~Weihan_Wang2;~Qingsong_Lv4;~Wenmeng_Yu1;~Wenyi_Hong1;~Ji_Qi3;~Yan_Wang42;~Junhui_Ji1;~Zhuoyi_Yang1;~Lei_Zhao17;~Song_XiXuan1;~Jiazheng_Xu1;~Keqin_Chen1;~Bin_Xu1;~Juanzi_Li1;~Yuxiao_Dong1;~Ming_Ding1;~Jie_Tang1", "aff": "Tsinghua University;Tsinghua University;Zhipu AI;Department of Computer Science and Technology, Tsinghua University;Tsinghua University;;Zhipu.AI;Tsinghua University;;Tsinghua University;Tsinghua University;The Insititute of Advanced Computing Technology, Beijing University of Aeronautics and Astronautics;Department of Computer Science, Tsinghua University;;Tsinghua University;ZHIPU AI;", "aff_domain": "mails.tsinghua.edu.cn;tsinghua.edu.cn;aminer.cn;mails.tsinghua.edu.cn;tsinghua.edu.cn;;zhipuai.cn;tsinghua.edu.cn;;cs.tsinghua.edu.cn;cs.tsinghua.edu.cn;act.buaa.edu.cn;cs.tsinghuae.du.cn;;tsinghua.edu.cn;zhipuai.cn;", "position": "MS student;MS student;Researcher;PhD student;PhD student;;Researcher;PhD student;;Undergrad student;PhD student;MS student;Full Professor;;Associate Professor;Principal Researcher;", "bibtex": "@inproceedings{\nwang2024cogvlm,\ntitle={Cog{VLM}: Visual Expert for Pretrained Language Models},\nauthor={Weihan Wang and Qingsong Lv and Wenmeng Yu and Wenyi Hong and Ji Qi and Yan Wang and Junhui Ji and Zhuoyi Yang and Lei Zhao and Song XiXuan and Jiazheng Xu and Keqin Chen and Bin Xu and Juanzi Li and Yuxiao Dong and Ming Ding and Jie Tang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=6dYBP3BIwx}\n}", "github": "", "reviewers": "7nCy;gmvD;nFmo;LKA5", "pdf_size": 13515440, "rating": "4;6;6;8", "confidence": "5;4;4;5", "soundness": "3;3;3;4", "novelty": "3;3;3;4", "presentation": "3;3;3;4", "wc_summary": "107;91;85;48", "wc_strengths": "52;77;58;62", "wc_weaknesses": "65;233;136;192", "wc_questions": "29;50;2;1", "wc_limitations": "1;21;8;1", "wc_review": "254;472;289;304", "wc_reply_reviewers": "22;114;15;0", "wc_reply_authors": "0;0;0;54", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;2", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 82.75, 21.614520582238228 ], "wc_strengths_avg": [ 62.25, 9.229707470987366 ], "wc_weaknesses_avg": [ 156.5, 63.05751343020116 ], "wc_questions_avg": [ 20.5, 20.402205763103165 ], "wc_limitations_avg": [ 7.75, 8.166241485530538 ], "wc_review_avg": [ 329.75, 84.10818925645707 ], "wc_reply_reviewers_avg": [ 37.75, 44.73463423344378 ], "wc_reply_authors_avg": [ 13.5, 23.382685902179844 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 17, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 713, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1473375355099604609&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "mails.tsinghua.edu.cn;tsinghua.edu.cn;aminer.cn;mails.tsinghua.edu.cn;tsinghua.edu.cn;;zhipuai.cn;tsinghua.edu.cn;;cs.tsinghua.edu.cn;cs.tsinghua.edu.cn;act.buaa.edu.cn;cs.tsinghuae.du.cn;;tsinghua.edu.cn;zhipuai.cn;", "author_num": 17, "aff_unique_index": "0;0;1;0;0;2;0;0;0;3;0;0;1", "aff_unique_norm": "Tsinghua University;Zhipu AI;Zhipu.AI;Beijing University of Aeronautics and Astronautics", "aff_unique_dep": ";;;The Insititute of Advanced Computing Technology", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.zhipu.ai;https://www.zhipu.ai;http://www.buaa.edu.cn", "aff_unique_abbr": "THU;Zhipu AI;Zhipu.AI;BUAA", "aff_campus_unique_index": "1", "aff_campus_unique": ";Beijing", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Classifier Clustering and Feature Alignment for Federated Learning under Distributed Concept Drift", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96509", "id": "6ejpSVIiIl", "proceeding": "", "pdf": "https://openreview.net/pdf?id=6ejpSVIiIl", "openreview": "https://openreview.net/forum?id=6ejpSVIiIl", "poster": "/media/PosterPDFs/NeurIPS%202024/96509.png?t=1730083972.688138", "project": "", "author_site": "Junbao Chen, Jingfeng Xue, Yong Wang, Zhenyan Liu, Lu Huang", "tldr": "", "abstract": "Data heterogeneity is one of the key challenges in federated learning, and many efforts have been devoted to tackling this problem. However, distributed concept drift with data heterogeneity, where clients may additionally experience different concept drifts, is a largely unexplored area. In this work, we focus on real drift, where the conditional distribution $P(\\mathcal{Y}|\\mathcal{X})$ changes. We first study how distributed concept drift affects the model training and find that local classifier plays a critical role in drift adaptation. Moreover, to address data heterogeneity, we study the feature alignment under distributed concept drift, and find two factors that are crucial for feature alignment: the conditional distribution $P(\\mathcal{Y}|\\mathcal{X})$ and the degree of data heterogeneity. Motivated by the above findings, we propose FedCCFA, a federated learning framework with classifier clustering and feature alignment. To enhance collaboration under distributed concept drift, FedCCFA clusters local classifiers at class-level and generates clustered feature anchors according to the clustering results. Assisted by these anchors, FedCCFA adaptively aligns clients' feature spaces based on the entropy of label distribution $P(\\mathcal{Y})$, alleviating the inconsistency in feature space. Our results demonstrate that FedCCFA significantly outperforms existing methods under various concept drift settings. Code is available at https://github.com/Chen-Junbao/FedCCFA.", "keywords": "federated learning;concept drift;data heterogeneity", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Junbao Chen;Jingfeng Xue;Yong Wang;Zhenyan Liu;Lu Huang", "authorids": "~Junbao_Chen4;~Jingfeng_Xue1;~Yong_Wang22;~Zhenyan_Liu1;~Lu_Huang1", "gender": "M;M;Not Specified;F;F", "homepage": ";https://cs.bit.edu.cn/szdw/jsml/js/xjf/index.htm;;;", "dblp": "335/8476;52/7663;84/2694-10;162/0920;30/1340-2.html", "google_scholar": "vukDLWgAAAAJ;;;;", "orcid": "0000-0001-7597-1184;;0000-0002-1572-068X;0009-0001-2773-2506;0000-0001-5131-6441", "linkedin": ";;;;", "or_profile": "~Junbao_Chen4;~Jingfeng_Xue1;~Yong_Wang22;~Zhenyan_Liu1;~Lu_Huang1", "aff": "Beijing Institute of Technology;Beijing Institute of Technology;Beijing Institute of Technology;Beijing Institute of Technology;Beijing Institute of Technology", "aff_domain": "bit.edu.cn;bit.edu.cn;bit.edu.cn;bit.edu.cn;bit.edu.cn", "position": "PhD student;Full Professor;Associate Professor;Lecturer;PhD student", "bibtex": "@inproceedings{\nchen2024classifier,\ntitle={Classifier Clustering and Feature Alignment for Federated Learning under Distributed Concept Drift},\nauthor={Junbao Chen and Jingfeng Xue and Yong Wang and Zhenyan Liu and Lu Huang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=6ejpSVIiIl}\n}", "github": "", "reviewers": "uegQ;EQ7w;erp1;U5Wz;HDwb", "pdf_size": 1793642, "rating": "5;6;6;6;7", "confidence": "5;4;3;4;4", "soundness": "3;3;3;3;3", "novelty": "2;3;3;2;3", "presentation": "3;3;2;2;3", "wc_summary": "167;44;63;69;84", "wc_strengths": "129;69;41;38;59", "wc_weaknesses": "178;223;111;100;34", "wc_questions": "160;3;49;49;3", "wc_limitations": "114;1;18;3;1", "wc_review": "748;340;282;259;181", "wc_reply_reviewers": "11;21;21;0;14", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;0;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.0, 0.6324555320336759 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 85.4, 42.7672772105029 ], "wc_strengths_avg": [ 67.2, 32.95087252259035 ], "wc_weaknesses_avg": [ 129.2, 65.4626611130345 ], "wc_questions_avg": [ 52.8, 57.41219382674729 ], "wc_limitations_avg": [ 27.4, 43.765740025732455 ], "wc_review_avg": [ 362.0, 199.6246477767713 ], "wc_reply_reviewers_avg": [ 13.4, 7.761443164772903 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.49999999999999994, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Yp-U4ECKqkIJ:scholar.google.com/&scioq=Classifier+Clustering+and+Feature+Alignment+for+Federated+Learning+under+Distributed+Concept+Drift&hl=en&as_sdt=0,44", "gs_version_total": 4, "email": "bit.edu.cn;bit.edu.cn;bit.edu.cn;bit.edu.cn;bit.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Beijing Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.bit.edu.cn/", "aff_unique_abbr": "BIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Transfer Learning for Diffusion Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96508", "id": "6emETARnWi", "proceeding": "", "pdf": "https://openreview.net/pdf?id=6emETARnWi", "openreview": "https://openreview.net/forum?id=6emETARnWi", "poster": "", "project": "", "author_site": "Yidong Ouyang, Liyan Xie, Hongyuan Zha, Guang Cheng", "tldr": "", "abstract": "Diffusion models, a specific type of generative model, have achieved unprecedented performance in recent years and consistently produce high-quality synthetic samples. A critical prerequisite for their notable success lies in the presence of a substantial number of training samples, which can be impractical in real-world applications due to high collection costs or associated risks. Consequently, various finetuning and regularization approaches have been proposed to transfer knowledge from existing pre-trained models to specific target domains with limited data. This paper introduces the Transfer Guided Diffusion Process (TGDP), a novel approach distinct from conventional finetuning and regularization methods. \nWe prove that the optimal diffusion model for the target domain integrates pre-trained diffusion models on the source domain with additional guidance from a domain classifier. \nWe further extend TGDP to a conditional version for modeling the joint distribution of data and its corresponding labels, together with two additional regularization terms to enhance the model performance. We validate the effectiveness of TGDP on both simulated and real-world datasets.", "keywords": "Diffusion Model;Transfer Learning", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Yidong Ouyang;Liyan Xie;Hongyuan Zha;Guang Cheng", "authorids": "~Yidong_Ouyang1;~Liyan_Xie2;~Hongyuan_Zha1;~Guang_Cheng1", "gender": "M;F;;M", "homepage": "https://yidongouyang.github.io/;https://mypage.cuhk.edu.cn/academics/xieliyan/;;http://www.stat.ucla.edu/~guangcheng/", "dblp": "270/0351;195/1316;z/HongyuanZha;99/4812", "google_scholar": "fQwCFK0AAAAJ;KtLwkBYAAAAJ;n1DQMIsAAAAJ;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Yidong_Ouyang1;~Liyan_Xie2;~Hongyuan_Zha1;~Guang_Cheng1", "aff": "University of California, Los Angeles;The Chinese University of Hong Kong, Shenzhen;The Chinese University of Hong Kong, Shenzhen;University of California, Los Angeles", "aff_domain": "ucla.edu;cuhk.edu.cn;cuhk.edu.cn;ucla.edu", "position": "PhD student;Assistant Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nouyang2024transfer,\ntitle={Transfer Learning for Diffusion Models},\nauthor={Yidong Ouyang and Liyan Xie and Hongyuan Zha and Guang Cheng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=6emETARnWi}\n}", "github": "", "reviewers": "UUwS;brAM;53Zj;9nph", "pdf_size": 2549345, "rating": "5;6;6;7", "confidence": "4;4;3;4", "soundness": "2;4;3;3", "novelty": "3;3;4;3", "presentation": "3;3;3;3", "wc_summary": "142;82;74;63", "wc_strengths": "29;86;75;44", "wc_weaknesses": "262;78;179;61", "wc_questions": "163;50;160;9", "wc_limitations": "1;18;24;5", "wc_review": "597;314;512;182", "wc_reply_reviewers": "335;13;50;20", "wc_reply_authors": "898;0;0;0", "reply_reviewers": "3;1;1;1", "reply_authors": "4;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 90.25, 30.629846555280032 ], "wc_strengths_avg": [ 58.5, 22.96192500641007 ], "wc_weaknesses_avg": [ 145.0, 81.22499615266227 ], "wc_questions_avg": [ 95.5, 67.58143236126325 ], "wc_limitations_avg": [ 12.0, 9.354143466934854 ], "wc_review_avg": [ 401.25, 162.99290628735963 ], "wc_reply_reviewers_avg": [ 104.5, 133.8030268715921 ], "wc_reply_authors_avg": [ 224.5, 388.8454062992129 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17785353713767541138&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 4, "email": "ucla.edu;cuhk.edu.cn;cuhk.edu.cn;ucla.edu", "author_num": 4, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "University of California, Los Angeles;Chinese University of Hong Kong", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucla.edu;https://www.cuhk.edu.cn", "aff_unique_abbr": "UCLA;CUHK", "aff_campus_unique_index": "0;1;1;0", "aff_campus_unique": "Los Angeles;Shenzhen", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "United States;China" }, { "title": "DreamClear: High-Capacity Real-World Image Restoration with Privacy-Safe Dataset Curation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96507", "id": "6eoGVqMiIj", "proceeding": "", "pdf": "https://openreview.net/pdf?id=6eoGVqMiIj", "openreview": "https://openreview.net/forum?id=6eoGVqMiIj", "poster": "/media/PosterPDFs/NeurIPS%202024/96507.png?t=1731774872.9236414", "project": "", "author_site": "Yuang Ai, Xiaoqiang Zhou, Huaibo Huang, Xiaotian Han, Zhengyu Chen, Quanzeng You, Hongxia Yang", "tldr": "", "abstract": "Image restoration (IR) in real-world scenarios presents significant challenges due to the lack of high-capacity models and comprehensive datasets.\nTo tackle these issues, we present a dual strategy: GenIR, an innovative data curation pipeline, and DreamClear, a cutting-edge Diffusion Transformer (DiT)-based image restoration model.\n**GenIR**, our pioneering contribution, is a dual-prompt learning pipeline that overcomes the limitations of existing datasets, which typically comprise only a few thousand images and thus offer limited generalizability for larger models. \nGenIR streamlines the process into three stages: image-text pair construction, dual-prompt based fine-tuning, and data generation \\& filtering. This approach circumvents the laborious data crawling process, ensuring copyright compliance and providing a cost-effective, privacy-safe solution for IR dataset construction. The result is a large-scale dataset of one million high-quality images.\nOur second contribution, **DreamClear**, is a DiT-based image restoration model. It utilizes the generative priors of text-to-image (T2I) diffusion models and the robust perceptual capabilities of multi-modal large language models (MLLMs) to achieve photorealistic restoration. To boost the model's adaptability to diverse real-world degradations, we introduce the Mixture of Adaptive Modulator (MoAM). It employs token-wise degradation priors to dynamically integrate various restoration experts, thereby expanding the range of degradations the model can address.\nOur exhaustive experiments confirm DreamClear's superior performance, underlining the efficacy of our dual strategy for real-world image restoration. Code and pre-trained models are available at: https://github.com/shallowdream204/DreamClear.", "keywords": "Image restoration;Dataset Curation;Diffusion transformer", "primary_area": "machine_vision", "supplementary_material": "", "author": "Yuang Ai;Xiaoqiang Zhou;Huaibo Huang;Xiaotian Han;Zhengyu Chen;Quanzeng You;Hongxia Yang", "authorids": "~Yuang_Ai2;~Xiaoqiang_Zhou2;~Huaibo_Huang1;~Xiaotian_Han2;~Zhengyu_Chen6;~Quanzeng_You3;~Hongxia_Yang2", "gender": "M;M;M;M;Not Specified;M;F", "homepage": "https://shallowdream204.github.io/;https://xiaoqiangzhou.cn/;https://people.ucas.edu.cn/~huanghuaibo;https://hanxiaotian.github.io/;;https://qzyou.github.io/;https://www4.comp.polyu.edu.hk/~hongxyang/", "dblp": "344/3491.html;13/1515;211/7251.html;;;33/9972.html;", "google_scholar": "2Qp7Y5kAAAAJ;Z2BTkNIAAAAJ;XMvLciUAAAAJ;5fHHi24AAAAJ;;c5KJsIgAAAAJ;iJlC5mMAAAAJ", "orcid": ";;0000-0001-5866-2283;0009-0005-9120-0490;;0000-0003-3608-0607;", "linkedin": ";;;hxt872675353/;czycmu/;quanzeng-you-5b98a55a/;", "or_profile": "~Yuang_Ai2;~Xiaoqiang_Zhou2;~Huaibo_Huang1;~Xiaotian_Han2;~Zhengyu_Chen6;~Quanzeng_You3;~Hongxia_Yang2", "aff": "Institute of Automation, Chinese Academy of Sciences;University of Science and Technology of China;Institute of Automation, Chinese Academy of Sciences;ByteDance;ByteDance Inc.;ByteDance;ByteDance Inc.", "aff_domain": "ia.ac.cn;ustc.edu;ia.ac.cn;bytedance.com;bytedance.com;bytedance.com;bytedance.com", "position": "MS student;PhD student;Associate Professor;Researcher;Researcher;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nai2024dreamclear,\ntitle={DreamClear: High-Capacity Real-World Image Restoration with Privacy-Safe Dataset Curation},\nauthor={Yuang Ai and Xiaoqiang Zhou and Huaibo Huang and Xiaotian Han and Zhengyu Chen and Quanzeng You and Hongxia Yang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=6eoGVqMiIj}\n}", "github": "", "reviewers": "CpjF;bAzb;FDew;A2Rj", "pdf_size": 26630327, "rating": "4;5;5;5", "confidence": "5;4;4;5", "soundness": "2;3;3;2", "novelty": "2;3;4;2", "presentation": "2;3;3;3", "wc_summary": "80;79;90;75", "wc_strengths": "64;71;52;30", "wc_weaknesses": "31;98;127;96", "wc_questions": "192;5;20;18", "wc_limitations": "6;7;1;6", "wc_review": "373;260;290;225", "wc_reply_reviewers": "0;11;78;188", "wc_reply_authors": "90;39;131;1019", "reply_reviewers": "0;1;1;3", "reply_authors": "2;2;2;5", "rating_avg": [ 4.75, 0.4330127018922193 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 81.0, 5.522680508593631 ], "wc_strengths_avg": [ 54.25, 15.562374497485916 ], "wc_weaknesses_avg": [ 88.0, 35.12121865767189 ], "wc_questions_avg": [ 58.75, 77.14718076508046 ], "wc_limitations_avg": [ 5.0, 2.345207879911715 ], "wc_review_avg": [ 287.0, 54.7220248163388 ], "wc_reply_reviewers_avg": [ 69.25, 74.7775868827017 ], "wc_reply_authors_avg": [ 319.75, 405.02553931820154 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:zRGxGzacVbEJ:scholar.google.com/&scioq=DreamClear:+High-Capacity+Real-World+Image+Restoration+with+Privacy-Safe+Dataset+Curation&hl=en&as_sdt=0,48", "gs_version_total": 4, "email": "ia.ac.cn;ustc.edu;ia.ac.cn;bytedance.com;bytedance.com;bytedance.com;bytedance.com", "author_num": 7, "aff_unique_index": "0;1;0;2;2;2;2", "aff_unique_norm": "Chinese Academy of Sciences;University of Science and Technology of China;ByteDance", "aff_unique_dep": "Institute of Automation;;", "aff_unique_url": "http://www.ia.cas.cn;http://www.ustc.edu.cn;https://www.bytedance.com", "aff_unique_abbr": "CAS;USTC;ByteDance", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Complete Graphical Criterion for Sequential Covariate Adjustment in Causal Inference", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96506", "id": "6gIcnPvw2x", "proceeding": "", "pdf": "https://openreview.net/pdf?id=6gIcnPvw2x", "openreview": "https://openreview.net/forum?id=6gIcnPvw2x", "poster": "/media/PosterPDFs/NeurIPS%202024/96506.png?t=1733599643.3557427", "project": "", "author_site": "Yonghan Jung, Min Woo Park, Sanghack Lee", "tldr": "", "abstract": "Covariate adjustment, also known as back-door adjustment, is a fundamental tool in causal inference. Although a sound and complete graphical identification criterion, known as the adjustment criterion (Shpitser, 2010), exists for static contexts, sequential contexts present challenges. Current practices, such as the sequential back-door adjustment (Pearl, 1995) or multi-outcome sequential back-door adjustment (Jung, 2020), are sound but incomplete; i.e., there are graphical scenarios where the causal effect is expressible via covariate adjustment, yet these criteria do not cover. In this paper, we exemplify this incompleteness and then present the *sequential adjustment criterion*, a sound and complete criterion for sequential covariate adjustment. We provide a constructive sequential adjustment criterion that identifies a set that satisfies the sequential adjustment criterion if and only if the causal effect can be expressed as a sequential covariate adjustment. Finally, we present an algorithm for identifying a *minimal* sequential covariate adjustment set, which optimizes efficiency by ensuring that no unnecessary vertices are included.", "keywords": "adjustment criterion;sequential back-door criterion;g-formula", "primary_area": "causal_inference", "supplementary_material": "", "author": "Yonghan Jung;Min Woo Park;Sanghack Lee", "authorids": "~Yonghan_Jung1;~Min_Woo_Park1;~Sanghack_Lee1", "gender": ";M;M", "homepage": "https://sites.google.com/view/yonghanjung;https://minwoopark96.github.io/;http://www.sanghacklee.me", "dblp": "201/0684.html;;20/1133", "google_scholar": "D9ATOa4AAAAJ;sBHEbVQAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0001-7137-6126", "linkedin": "yhansjung/;;sanghack-lee-65b52a28/", "or_profile": "~Yonghan_Jung1;~Min_Woo_Park1;~Sanghack_Lee1", "aff": "Purdue University;Seoul National University;Seoul National University", "aff_domain": "purdue.edu;snu.ac.kr;snu.ac.kr", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\njung2024complete,\ntitle={Complete Graphical Criterion for Sequential Covariate Adjustment in Causal Inference},\nauthor={Yonghan Jung and Min Woo Park and Sanghack Lee},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=6gIcnPvw2x}\n}", "github": "", "reviewers": "aPdz;VqEB;sfA3;UrnJ", "pdf_size": 501497, "rating": "6;6;6;6", "confidence": "4;4;3;2", "soundness": "3;3;3;3", "novelty": "3;3;2;3", "presentation": "2;2;3;3", "wc_summary": "69;89;75;35", "wc_strengths": "61;112;56;55", "wc_weaknesses": "149;212;65;24", "wc_questions": "2;268;102;9", "wc_limitations": "1;1;6;12", "wc_review": "282;682;304;135", "wc_reply_reviewers": "55;12;0;24", "wc_reply_authors": "243;24;34;26", "reply_reviewers": "2;1;0;1", "reply_authors": "3;2;2;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 67.0, 19.849433241279208 ], "wc_strengths_avg": [ 71.0, 23.78024390118823 ], "wc_weaknesses_avg": [ 112.5, 73.00856114182774 ], "wc_questions_avg": [ 95.25, 107.26456777519779 ], "wc_limitations_avg": [ 5.0, 4.527692569068709 ], "wc_review_avg": [ 350.75, 201.98189894146455 ], "wc_reply_reviewers_avg": [ 22.75, 20.461854754640402 ], "wc_reply_authors_avg": [ 81.75, 93.17289037053644 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Nscp44RitQYJ:scholar.google.com/&scioq=Complete+Graphical+Criterion+for+Sequential+Covariate+Adjustment+in+Causal+Inference&hl=en&as_sdt=0,3", "gs_version_total": 2, "email": "purdue.edu;snu.ac.kr;snu.ac.kr", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Purdue University;Seoul National University", "aff_unique_dep": ";", "aff_unique_url": "https://www.purdue.edu;https://www.snu.ac.kr", "aff_unique_abbr": "Purdue;SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United States;South Korea" }, { "title": "Scalable DP-SGD: Shuffling vs. Poisson Subsampling", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96505", "id": "6gMnj9oc6d", "proceeding": "", "pdf": "https://openreview.net/pdf?id=6gMnj9oc6d", "openreview": "https://openreview.net/forum?id=6gMnj9oc6d", "poster": "/media/PosterPDFs/NeurIPS%202024/96505.png?t=1733291079.569372", "project": "", "author_site": "Lynn Chua, Badih Ghazi, Pritish Kamath, Ravi Kumar, Pasin Manurangsi, Amer Sinha, Chiyuan Zhang", "tldr": "", "abstract": "We provide new lower bounds on the privacy guarantee of _multi-epoch_ Adaptive Batch Linear Queries (ABLQ) mechanism with _shuffled batch sampling_, demonstrating substantial gaps when compared to _Poisson subsampling_; prior analysis was limited to a single epoch.\nSince the privacy analysis of Differentially Private Stochastic Gradient Descent (DP-SGD) is obtained by analyzing the ABLQ mechanism, this brings into serious question the common practice of implementing Shuffling based DP-SGD, but reporting privacy parameters as if Poisson subsampling was used.\nTo understand the impact of this gap on the utility of trained machine learning models, we introduce a novel practical approach to implement Poisson subsampling _at scale_ using massively parallel computation, and efficiently train models with the same.\nWe provide a comparison between the utility of models trained with Poisson subsampling based DP-SGD, and the optimistic estimates of utility when using shuffling, via our new lower bounds on the privacy guarantee of ABLQ with shuffling.", "keywords": "DPSGD;Differential Privacy;Shuffling;Poisson subsampling", "primary_area": "privacy", "supplementary_material": "", "author": "Lynn Chua;Badih Ghazi;Pritish Kamath;Ravi Kumar;Pasin Manurangsi;Amer Sinha;Chiyuan Zhang", "authorids": "~Lynn_Chua1;~Badih_Ghazi1;~Pritish_Kamath2;~Ravi_Kumar1;~Pasin_Manurangsi2;~Amer_Sinha1;~Chiyuan_Zhang1", "gender": "F;;M;M;M;M;M", "homepage": ";https://sites.google.com/view/badihghazi/home;https://pritishkamath.github.io/;https://sites.google.com/site/ravik53/;https://pasin30055.github.io/;;http://pluskid.org", "dblp": "143/4392;125/2134;https://dblp.org/pers/k/Kamath:Pritish.html;k/RaviKumar.html;133/2059;;21/8315", "google_scholar": "D2SXVSYAAAAJ;GBJLTN8AAAAJ;1JFARhUAAAAJ;J_XhIsgAAAAJ;35hM-PkAAAAJ;;l_G2vr0AAAAJ", "orcid": ";;;0000-0002-2203-2586;;;", "linkedin": "chua-lynn/;badih-ghazi-608379132/;;ravi-kumar-a3a9631;;amersinha/;", "or_profile": "~Lynn_Chua1;~Badih_Ghazi1;~Pritish_Kamath2;~Ravi_Kumar1;~Pasin_Manurangsi2;~Amer_Sinha1;~Chiyuan_Zhang1", "aff": "Google;Google;Google Research;Google;Google;Research, Google;Google", "aff_domain": "google.com;google.com;google.com;google.com;google.com;research.google.com;google.com", "position": "Researcher;Researcher;Research Scientist;Research Scientist;Research Scientist;Researcher;Research Scientist", "bibtex": "@inproceedings{\nchua2024scalable,\ntitle={Scalable {DP}-{SGD}: Shuffling vs. Poisson Subsampling},\nauthor={Lynn Chua and Badih Ghazi and Pritish Kamath and Ravi Kumar and Pasin Manurangsi and Amer Sinha and Chiyuan Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=6gMnj9oc6d}\n}", "github": "", "reviewers": "vHjL;k7t2;1Uvo;9tRV", "pdf_size": 1652329, "rating": "3;5;6;7", "confidence": "4;4;3;4", "soundness": "2;4;4;4", "novelty": "2;2;3;4", "presentation": "3;4;3;3", "wc_summary": "63;250;156;110", "wc_strengths": "41;37;47;105", "wc_weaknesses": "75;54;94;257", "wc_questions": "129;120;100;32", "wc_limitations": "4;1;1;1", "wc_review": "312;462;398;505", "wc_reply_reviewers": "243;181;43;140", "wc_reply_authors": "503;73;0;195", "reply_reviewers": "1;1;1;3", "reply_authors": "2;2;1;3", "rating_avg": [ 5.25, 1.479019945774904 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.8660254037844386 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 144.75, 69.09187723603983 ], "wc_strengths_avg": [ 57.5, 27.654113618049667 ], "wc_weaknesses_avg": [ 120.0, 80.35234906335968 ], "wc_questions_avg": [ 95.25, 37.995887935406905 ], "wc_limitations_avg": [ 1.75, 1.299038105676658 ], "wc_review_avg": [ 419.25, 72.68897784396201 ], "wc_reply_reviewers_avg": [ 151.75, 72.7096107815191 ], "wc_reply_authors_avg": [ 192.75, 192.19309951192315 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.29277002188455997, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18028140582981496701&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "google.com;google.com;google.com;google.com;google.com;research.google.com;google.com", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0;0;0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "MATES: Model-Aware Data Selection for Efficient Pretraining with Data Influence Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96504", "id": "6gzPSMUAz2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=6gzPSMUAz2", "openreview": "https://openreview.net/forum?id=6gzPSMUAz2", "poster": "/media/PosterPDFs/NeurIPS%202024/96504.png?t=1731724557.3128297", "project": "", "author_site": "Zichun Yu, Spandan Das, Chenyan Xiong", "tldr": "", "abstract": "Pretraining data selection has the potential to improve language model pretraining efficiency by utilizing higher-quality data from massive web data corpora. Current data selection methods, which rely on either hand-crafted rules or larger reference models, are conducted statically and do not capture the evolving data preferences during pretraining. In this paper, we introduce *model-aware data selection with data influence models (MATES)*, where a data influence model continuously adapts to the evolving data preferences of the pretraining model and then selects the data most effective for the current pretraining progress. Specifically, we collect oracle data influence by locally probing the pretraining model and fine-tune a small data influence model to approximate it accurately. The data influence model then predicts data influence over the whole pretraining corpus and selects the most influential data for the next pretraining stage. Experiments of pretraining 410M and 1B models on the C4 dataset demonstrate that MATES significantly outperforms random data selection on extensive downstream tasks. It doubles the gains achieved by the state-of-the-art data selection approach that leverages larger reference models and reduces the total FLOPs required to reach certain performances by half. Further analyses validate the effectiveness of the locally probed oracle data influence and the approximation with data influence models. Our code is open-sourced at https://github.com/cxcscmu/MATES.", "keywords": "Large Language Models;Pretraining;Data Curation", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Zichun Yu;Spandan Das;Chenyan Xiong", "authorids": "~Zichun_Yu1;~Spandan_Das1;~Chenyan_Xiong1", "gender": "M;;M", "homepage": "https://yuzc19.github.io/;http://sd325.github.io;https://www.cs.cmu.edu/~cx/", "dblp": "274/8681;;18/10886", "google_scholar": "wWkTkocAAAAJ;lQEqv9MAAAAJ;E9BaEBYAAAAJ", "orcid": ";;", "linkedin": "zichun-yu-01a48b2a5/;https://linkedin.com/in/spandand/;", "or_profile": "~Zichun_Yu1;~Spandan_Das1;~Chenyan_Xiong1", "aff": "Meta Facebook;Carnegie Mellon University;School of Computer Science, Carnegie Mellon University", "aff_domain": "meta.com;andrew.cmu.edu;cs.cmu.edu", "position": "Intern;Undergrad student;Associate Professor", "bibtex": "@inproceedings{\nyu2024mates,\ntitle={{MATES}: Model-Aware Data Selection for Efficient Pretraining with Data Influence Models},\nauthor={Zichun Yu and Spandan Das and Chenyan Xiong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=6gzPSMUAz2}\n}", "github": "", "reviewers": "R3ZA;T2sB;xrwH;2Zbt", "pdf_size": 1555147, "rating": "5;5;5;6", "confidence": "4;3;5;5", "soundness": "2;2;3;3", "novelty": "3;3;3;3", "presentation": "2;3;2;2", "wc_summary": "63;122;136;47", "wc_strengths": "74;44;94;69", "wc_weaknesses": "231;108;122;89", "wc_questions": "98;71;59;234", "wc_limitations": "64;9;11;18", "wc_review": "530;354;422;457", "wc_reply_reviewers": "163;134;68;0", "wc_reply_authors": "614;561;31;94", "reply_reviewers": "1;2;1;0", "reply_authors": "3;2;2;2", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 92.0, 37.75579425730572 ], "wc_strengths_avg": [ 70.25, 17.80975856096876 ], "wc_weaknesses_avg": [ 137.5, 55.23812089490373 ], "wc_questions_avg": [ 115.5, 69.85878613317011 ], "wc_limitations_avg": [ 25.5, 22.477766792989023 ], "wc_review_avg": [ 440.75, 63.45618567169004 ], "wc_reply_reviewers_avg": [ 91.25, 62.93399319922422 ], "wc_reply_authors_avg": [ 325.0, 264.10887906316214 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4604165081839746223&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "meta.com;andrew.cmu.edu;cs.cmu.edu", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Meta;Carnegie Mellon University", "aff_unique_dep": "Meta Platforms, Inc.;", "aff_unique_url": "https://meta.com;https://www.cmu.edu", "aff_unique_abbr": "Meta;CMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pittsburgh", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Sparse High Rank Adapters", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96503", "id": "6hY60tkiEK", "proceeding": "", "pdf": "https://openreview.net/pdf?id=6hY60tkiEK", "openreview": "https://openreview.net/forum?id=6hY60tkiEK", "poster": "/media/PosterPDFs/NeurIPS%202024/96503.png?t=1733368333.4825377", "project": "", "author_site": "Kartikeya Bhardwaj, Nilesh Pandey, Sweta Priyadarshi, Viswanath Ganapathy, Shreya Kadambi, Rafael Esteves, Shubhankar Borse, Paul Whatmough, Risheek Garrepalli, Mart van Baalen, Harris Teague, Markus Nagel", "tldr": "", "abstract": "Low Rank Adaptation (LoRA) has gained massive attention in the recent generative AI research. One of the main advantages of LoRA is its ability to be fused with pretrained models, adding no overhead during inference. However, from a mobile deployment standpoint, we can either avoid inference overhead in the fused mode but lose the ability to switch adapters rapidly, or suffer significant (up to 30% higher) inference latency while enabling rapid switching in the unfused mode. LoRA also exhibits concept-loss when multiple adapters are used concurrently. In this paper, we propose Sparse High Rank Adapters (SHiRA), a new paradigm which incurs no inference overhead, enables rapid switching, and significantly reduces concept-loss. Specifically, SHiRA can be trained by directly tuning only 1-2% of the base model weights while leaving others unchanged. This results in a highly sparse adapter which can be switched directly in the fused mode. We further provide theoretical and empirical insights on how high sparsity in SHiRA can aid multi-adapter fusion by reducing concept loss. Our extensive experiments on LVMs and LLMs demonstrate that finetuning only a small fraction of the parameters in the base model significantly outperforms LoRA while enabling both rapid switching and multi-adapter fusion. Finally, we provide a latency- and memory-efficient SHiRA implementation based on Parameter-Efficient Finetuning (PEFT) Library which trains at nearly the same speed as LoRA while consuming up to 16% lower peak GPU memory, thus making SHiRA easy to adopt for practical use cases. To demonstrate rapid switching benefits during inference, we show that loading SHiRA on a base model can be 5x-16x faster than LoRA fusion on a CPU.", "keywords": "Sparse High Rank Adapters;Generative AI;Mobile Deployment;Stable Diffusion;LLMs;Sparsity", "primary_area": "generative_models", "supplementary_material": "", "author": "Kartikeya Bhardwaj;Nilesh Prasad Pandey;Sweta Priyadarshi;Viswanath Ganapathy;Shreya Kadambi;Rafael Esteves;Shubhankar Borse;Paul Whatmough;Risheek Garrepalli;Mart Van Baalen;Harris Teague;Markus Nagel", "authorids": "~Kartikeya_Bhardwaj3;~Nilesh_Prasad_Pandey1;~Sweta_Priyadarshi1;~Viswanath_Ganapathy1;~Shreya_Kadambi1;~Rafael_Esteves1;~Shubhankar_Borse1;~Paul_Whatmough1;~Risheek_Garrepalli1;~Mart_Van_Baalen1;~Harris_Teague1;~Markus_Nagel1", "gender": "M;M;F;M;F;M;M;M;M;M;M;M", "homepage": ";;https://swetap24.github.io/;;https://www.linkedin.com/in/shreyakadambi/;;;;;;;", "dblp": "127/1347;;;;;;289/7536;87/9432;222/9870;;;38/1463", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;rE7JoXgAAAAJ;NARpCjAAAAAJ;jRHlJyQAAAAJ;4LmQNNoAAAAJ;;ZsgWCyMAAAAJ;hu3x-LoAAAAJ;https://scholar.google.com/citations?hl=en;a-Au4JUAAAAJ;https://scholar.google.com/citations?hl=en;akNuBBEAAAAJ", "orcid": ";0009-0002-1118-0345;;;;;;;;;;", "linkedin": ";nppandey/;swetap24?challengeId=AQGMzFETnINffQAAAYcUzXG6WD6WDSSwgS4j4gT6ao891h_G4KrTlPKE2pkPiL_leWbDN2AtSQKK1PjDBURLf8nly4lV0eGrgQ&submissionId=e9e1eabe-2b6d-4f17-31f9-e433ea8e5379&challengeSource=AgGfbNQx1pNrogAAAYcUzb3NYOiudZwr-nF4B8KA_6LUKiuC9yPpCgtrjWs53EE&challegeType=AgHDYRQh1IpIcAAAAYcUzb3QDzsdLvPf8M_rxPE4IShUe32MmsNLdKw&memberId=AgHZgaGN5fshUQAAAYcUzb3Tp9BvaOimUG3He6tT3pOj5NA&recognizeDevice=AgEXSUNfCgNb1wAAAYcUzb3XLnpeH3c45N3CqsiEprut0PfrGbpS;;;rafael-esteves-124353145/;;paul-whatmough-2062729/;risheek-garrepalli-20138a4a/;;;", "or_profile": "~Kartikeya_Bhardwaj3;~Nilesh_Prasad_Pandey1;~Sweta_Priyadarshi1;~Viswanath_Ganapathy1;~Shreya_Kadambi1;~Rafael_Esteves1;~Shubhankar_Borse1;~Paul_Whatmough1;~Risheek_Garrepalli1;~Mart_Van_Baalen1;~Harris_Teague1;~Markus_Nagel1", "aff": "Qualcomm AI Research;Qualcomm AI Research;Qualcomm Inc, QualComm;Qualcomm Inc, QualComm;Qualcomm Inc, QualComm;Qualcomm Inc, QualComm;Qualcomm Inc, QualComm;School of Engineering and Applied Sciences, Harvard University;Qualcomm Inc, QualComm;QualComm;Qualcomm;Qualcomm AI Research", "aff_domain": "qualcomm.com;qti.qualcomm.com;qti.qualcomm.com;qti.qualcomm.com;qti.qualcomm.com;qti.qualcomm.com;qti.qualcomm.com;seas.harvard.edu;qti.qualcomm.com;qualcomm.com;qualcomm.com;qualcomm.com", "position": "Researcher;Researcher;Researcher;Researcher;Researcher;Researcher;Deep Learning Research Engineer;Associate;Researcher;Researcher;Engineer;Researcher", "bibtex": "@inproceedings{\nbhardwaj2024sparse,\ntitle={Sparse High Rank Adapters},\nauthor={Kartikeya Bhardwaj and Nilesh Prasad Pandey and Sweta Priyadarshi and Viswanath Ganapathy and Shreya Kadambi and Rafael Esteves and Shubhankar Borse and Paul Whatmough and Risheek Garrepalli and Mart Van Baalen and Harris Teague and Markus Nagel},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=6hY60tkiEK}\n}", "github": "", "reviewers": "XU4N;ZsNM;2XqP;K5NY;AaSE", "pdf_size": 37289298, "rating": "5;5;5;5;6", "confidence": "5;4;3;4;4", "soundness": "3;3;3;3;3", "novelty": "2;2;2;3;2", "presentation": "3;3;3;4;3", "wc_summary": "47;50;87;87;72", "wc_strengths": "48;17;41;120;49", "wc_weaknesses": "388;120;51;169;555", "wc_questions": "1;18;2;2;623", "wc_limitations": "1;1;7;2;24", "wc_review": "485;206;188;380;1323", "wc_reply_reviewers": "331;19;17;10;49", "wc_reply_authors": "603;0;0;0;330", "reply_reviewers": "2;1;1;1;1", "reply_authors": "3;1;1;1;3", "rating_avg": [ 5.2, 0.39999999999999997 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 68.6, 17.32743489383238 ], "wc_strengths_avg": [ 55.0, 34.49637662132068 ], "wc_weaknesses_avg": [ 256.6, 187.03646703250143 ], "wc_questions_avg": [ 129.2, 246.98129483829337 ], "wc_limitations_avg": [ 7.0, 8.78635305459552 ], "wc_review_avg": [ 516.4, 418.1839786505456 ], "wc_reply_reviewers_avg": [ 85.2, 123.62588725667455 ], "wc_reply_authors_avg": [ 186.6, 244.29948833347973 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.8, 0.9797958971132713 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:_MCtedN4cKUJ:scholar.google.com/&scioq=Sparse+High+Rank+Adapters&hl=en&as_sdt=0,44", "gs_version_total": 0, "email": "qualcomm.com;qti.qualcomm.com;qti.qualcomm.com;qti.qualcomm.com;qti.qualcomm.com;qti.qualcomm.com;qti.qualcomm.com;seas.harvard.edu;qti.qualcomm.com;qualcomm.com;qualcomm.com;qualcomm.com", "author_num": 12, "aff_unique_index": "0;0;1;1;1;1;1;2;1;1;1;0", "aff_unique_norm": "Qualcomm;Qualcomm Incorporated;Harvard University", "aff_unique_dep": "Qualcomm AI Research;;School of Engineering and Applied Sciences", "aff_unique_url": "https://www.qualcomm.com/research;https://www.qualcomm.com;https://www.harvard.edu", "aff_unique_abbr": "QAI;Qualcomm;Harvard", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Fisher Flow Matching for Generative Modeling over Discrete Data", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96502", "id": "6jOScqwdHU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=6jOScqwdHU", "openreview": "https://openreview.net/forum?id=6jOScqwdHU", "poster": "", "project": "", "author_site": "Oscar Davis, Samuel Kessler, Mircea Petrache, Ismail Ceylan, Michael Bronstein, Joey Bose", "tldr": "", "abstract": "Generative modeling over discrete data has recently seen numerous success stories, with applications spanning language modeling, biological sequence design, and graph-structured molecular data. The predominant generative modeling paradigm for discrete data is still autoregressive, with more recent alternatives based on diffusion or flow-matching falling short of their impressive performance in continuous data settings, such as image or video generation. In this work, we introduce Fisher-Flow, a novel flow-matching model for discrete data. Fisher-Flow takes a manifestly geometric perspective\nby considering categorical distributions over discrete data as points residing on a statistical manifold equipped with its natural Riemannian metric: the \\emph{Fisher-Rao metric}. As a result, we demonstrate discrete data itself can be continuously reparameterised to points on the positive orthant of the $d$-hypersphere $\\mathbb{S}^d_+$, \nwhich allows us to define flows that map any source distribution to target in a principled manner by transporting mass along (closed-form) geodesics of $\\mathbb{S}^d_+$. Furthermore, the learned flows in Fisher-Flow can be further bootstrapped by leveraging Riemannian optimal transport leading to improved training dynamics. We prove that the gradient flow induced by Fisher-FLow is optimal in reducing the forward KL divergence. We evaluate Fisher-Flow on an array of synthetic and diverse real-world benchmarks, including designing DNA Promoter, and DNA Enhancer sequences. Empirically, we find that Fisher-Flow improves over prior diffusion and flow-matching models on these benchmarks.", "keywords": "Flow matching;Generative models;Riemannian manifolds;Discrete data", "primary_area": "generative_models", "supplementary_material": "/attachment/6bf4ed17ce086b111b9dfa331d77ef3b194e2781.zip", "author": "Oscar Davis;Samuel Kessler;Mircea Petrache;Ismail Ilkan Ceylan;Michael M. Bronstein;Joey Bose", "authorids": "~Oscar_Davis1;~Samuel_Kessler1;~Mircea_Petrache1;~Ismail_Ilkan_Ceylan2;~Michael_M._Bronstein1;~Joey_Bose1", "gender": "Not Specified;;M;;M;M", "homepage": "https://github.com/olsdavis;;https://sites.google.com/site/mircpetrache/home;https://www.cs.ox.ac.uk/people/ismaililkan.ceylan/;http://www.inf.usi.ch/bronstein/;https://joeybose.github.io/", "dblp": ";;;147/6111;07/2668;174/3372", "google_scholar": ";JmjQPXoAAAAJ;HiYZ-6MAAAAJ;avJ5kQcAAAAJ;UU3N6-UAAAAJ;ybPyI7IAAAAJ", "orcid": ";;0000-0003-2181-169X;0000-0003-4118-4689;;", "linkedin": ";;mircea-petrache-4983a4104/;;mbronstein/;", "or_profile": "~Oscar_Davis1;~Samuel_Kessler1;~Mircea_Petrache1;~Ismail_Ilkan_Ceylan2;~Michael_M._Bronstein1;~Joey_Bose1", "aff": "Microsoft Research;Microsoft;Pontificia Universidad Catolica de Chile;University of Oxford;University of Oxford;University of Oxford", "aff_domain": "research.microsoft.com;microsoft.com;puc.cl;oxford.ac.uk;ox.ac.uk;oxford.ac.uk", "position": "Intern;Postdoc;Assistant Professor;Assistant Professor;Full Professor;Postdoc", "bibtex": "@inproceedings{\ndavis2024fisher,\ntitle={Fisher Flow Matching for Generative Modeling over Discrete Data},\nauthor={Oscar Davis and Samuel Kessler and Mircea Petrache and Ismail Ilkan Ceylan and Michael M. Bronstein and Joey Bose},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=6jOScqwdHU}\n}", "github": "", "reviewers": "11qA;cNL3;YHTA", "pdf_size": 3010037, "rating": "5;6;7", "confidence": "4;4;4", "soundness": "3;3;3", "novelty": "3;3;2", "presentation": "2;3;4", "wc_summary": "49;56;140", "wc_strengths": "98;16;70", "wc_weaknesses": "716;73;264", "wc_questions": "412;669;81", "wc_limitations": "16;77;175", "wc_review": "1291;891;730", "wc_reply_reviewers": "894;0;110", "wc_reply_authors": "1171;147;0", "reply_reviewers": "2;0;1", "reply_authors": "4;2;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 81.66666666666667, 41.34677200889515 ], "wc_strengths_avg": [ 61.333333333333336, 34.03266404826725 ], "wc_weaknesses_avg": [ 351.0, 269.61577599737495 ], "wc_questions_avg": [ 387.3333333333333, 240.68282494233407 ], "wc_limitations_avg": [ 89.33333333333333, 65.49469868283659 ], "wc_review_avg": [ 970.6666666666666, 235.85353270385602 ], "wc_reply_reviewers_avg": [ 334.6666666666667, 398.0496898071339 ], "wc_reply_authors_avg": [ 439.3333333333333, 520.8354399957395 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 2.3333333333333335, 1.247219128924647 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7569278865967931466&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 4, "email": "research.microsoft.com;microsoft.com;puc.cl;oxford.ac.uk;ox.ac.uk;oxford.ac.uk", "author_num": 6, "aff_unique_index": "0;0;1;2;2;2", "aff_unique_norm": "Microsoft;Pontificia Universidad Catolica de Chile;University of Oxford", "aff_unique_dep": "Microsoft Research;;", "aff_unique_url": "https://www.microsoft.com/en-us/research;https://www.puc.cl;https://www.ox.ac.uk", "aff_unique_abbr": "MSR;PUC;Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;2;2;2", "aff_country_unique": "United States;Chile;United Kingdom" }, { "title": "ActionAtlas: A VideoQA Benchmark for Domain-specialized Action Recognition", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97838", "id": "6kc6Hdyknx", "proceeding": "", "pdf": "https://openreview.net/pdf?id=6kc6Hdyknx", "openreview": "https://openreview.net/forum?id=6kc6Hdyknx", "poster": "", "project": "", "author_site": "Mohammadreza (Reza) Salehi, Jae Sung Park, Aditya Kusupati, Ranjay Krishna, Yejin Choi, Hannaneh Hajishirzi, Ali Farhadi", "tldr": "", "abstract": "Our world is full of varied actions and moves in specialized fields that we, as humans, seek to identify and learn about. To evaluate the effectiveness of multi-modal models in helping us recognize such fine-grained actions, we introduce ActionAtlas, a video question answering (VideoQA) benchmark on fine-grained action recognition with short videos across various sports. ActionAtlas contains 554 videos spanning 284 actions across 42 sports with 1161 actions as total potential choices. Unlike most existing action recognition benchmarks that focus on simplistic actions, often identifiable from a single frame, ActionAtlas focuses on intricate movements and tests the models' ability to discern subtle differences. Additionally, each video in ActionAtlas also includes a question, which helps to more accurately pinpoint the action's performer in scenarios where multiple individuals are involved in different activities. We evaluate proprietary and open models on this benchmark and show that the state-of-the-art models only perform at most 48.73% accurately where random chance is 20%. Furthermore, our results show that a high frame sampling rate is essential for recognizing actions in ActionAtlas, a feature that current top proprietary models like Gemini lack in their default settings.", "keywords": "Video understanding;Action recognition;Multi-modal LLM;Fine-grained actions;Domain-specific actions", "primary_area": "", "supplementary_material": "/attachment/5f7e9562afa87952dc04d756b34faef744aaeafe.pdf", "author": "Mohammadreza Salehi;Jae Sung Park;Aditya Kusupati;Ranjay Krishna;Yejin Choi;Hannaneh Hajishirzi;Ali Farhadi", "authorids": "~Mohammadreza_Salehi3;~Jae_Sung_Park2;~Aditya_Kusupati1;~Ranjay_Krishna1;~Yejin_Choi1;~Hannaneh_Hajishirzi1;~Ali_Farhadi3", "gender": "M;;M;M;F;F;M", "homepage": "https://homes.cs.washington.edu/~mrsalehi/;https://homes.cs.washington.edu/~jspark96/;http://www.adityakusupati.com/;http://ranjaykrishna.com;https://yejinc.github.io/;https://homes.cs.washington.edu/~hannaneh/;https://homes.cs.washington.edu/~ali/", "dblp": ";;231/7662;167/3785;89/579-1;52/1296;37/5826", "google_scholar": "NFddT_4AAAAJ;hD2WqqcAAAAJ;https://scholar.google.co.in/citations?user=qULx8g8AAAAJ;IcqahyAAAAAJ;vhP-tlcAAAAJ;LOV6_WIAAAAJ;jeOFRDsAAAAJ", "orcid": ";;0000-0001-8455-1851;0000-0001-8784-2531;;;", "linkedin": "mrezasalehi/;;adityakusupati/;ranjay-krishna-1a344444/;;;", "or_profile": "~Mohammadreza_Salehi3;~Jae_Sung_Park2;~Aditya_Kusupati1;~Ranjay_Krishna1;~Yejin_Choi1;~Hannaneh_Hajishirzi1;~Ali_Farhadi3", "aff": "University of Washington;University of Washington;Department of Computer Science, University of Washington;University of Washington;Department of Computer Science, University of Washington;University of Washington;University of Washington", "aff_domain": "cs.washington.edu;washington.edu;cs.washington.edu;cs.washington.edu;cs.washington.edu;uw.edu;cs.uw.edu", "position": "PhD student;PhD student;PhD student;Assistant Professor;Full Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nsalehi2024actionatlas,\ntitle={ActionAtlas: A Video{QA} Benchmark for Domain-specialized Action Recognition},\nauthor={Mohammadreza Salehi and Jae Sung Park and Aditya Kusupati and Ranjay Krishna and Yejin Choi and Hannaneh Hajishirzi and Ali Farhadi},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=6kc6Hdyknx}\n}", "github": "", "reviewers": "jE6k;Km21;DiB7;5HLL", "pdf_size": 12835681, "rating": "3;6;8;8", "confidence": "5;4;4;3", "wc_summary_and_contributions": "72;59;145;70", "wc_strengths": "18;4;96;86", "wc_improvement": "4;15;228;8", "wc_limitations": "1;4;7;71", "wc_correctness": "25;7;21;43", "wc_clarity": "12;4;12;8", "wc_relation_to_prior_work": "1;11;9;12", "wc_documentation": "1;5;12;32", "wc_additional_feedback": "1;1;1;1", "wc_review": "135;110;531;331", "wc_reply_reviewers": "488;25;259;28", "wc_reply_authors": "1288;34;14;0", "reply_reviewers": "1;1;1;1", "reply_authors": "5;3;3;1", "rating_avg": [ 6.25, 2.0463381929681126 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 86.5, 34.13575837739657 ], "wc_strengths_avg": [ 51.0, 40.45985664828782 ], "wc_improvement_avg": [ 63.75, 94.91147190935351 ], "wc_limitations_avg": [ 20.75, 29.089302157322372 ], "wc_correctness_avg": [ 24.0, 12.84523257866513 ], "wc_clarity_avg": [ 9.0, 3.3166247903554 ], "wc_relation_to_prior_work_avg": [ 8.25, 4.322904116447646 ], "wc_documentation_avg": [ 12.5, 11.926860441876563 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 276.75, 169.9152362208875 ], "wc_reply_reviewers_avg": [ 200.0, 191.46409585089316 ], "wc_reply_authors_avg": [ 334.0, 550.9246772472621 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.0, 1.4142135623730951 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.8638684255813602, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13309859190717145837&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "cs.washington.edu;washington.edu;cs.washington.edu;cs.washington.edu;cs.washington.edu;uw.edu;cs.uw.edu", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "University of Washington", "aff_unique_dep": "", "aff_unique_url": "https://www.washington.edu", "aff_unique_abbr": "UW", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Seattle", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Adaptive Visual Scene Understanding: Incremental Scene Graph Generation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96501", "id": "6lwKOvL3KN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=6lwKOvL3KN", "openreview": "https://openreview.net/forum?id=6lwKOvL3KN", "poster": "/media/PosterPDFs/NeurIPS%202024/96501.png?t=1731473676.2415395", "project": "", "author_site": "Naitik Khandelwal, Xiao Liu, Mengmi Zhang", "tldr": "", "abstract": "Scene graph generation (SGG) analyzes images to extract meaningful information about objects and their relationships. In the dynamic visual world, it is crucial for AI systems to continuously detect new objects and establish their relationships with existing ones. Recently, numerous studies have focused on continual learning within the domains of object detection and image recognition. However, a limited amount of research focuses on a more challenging continual learning problem in SGG. This increased difficulty arises from the intricate interactions and dynamic relationships among objects, and their associated contexts. Thus, in continual learning, SGG models are often required to expand, modify, retain, and reason scene graphs within the process of adaptive visual scene understanding. To systematically explore Continual Scene Graph Generation (CSEGG), we present a comprehensive benchmark comprising three learning regimes: relationship incremental, scene incremental, and relationship generalization. Moreover, we introduce a ``Replays via Analysis by Synthesis\" method named RAS. This approach leverages the scene graphs, decomposes and re-composes them to represent different scenes, and replays the synthesized scenes based on these compositional scene graphs. The replayed synthesized scenes act as a means to practice and refine proficiency in SGG in known and unknown environments. Our experimental results not only highlight the challenges of directly combining existing continual learning methods with SGG backbones but also demonstrate the effectiveness of our proposed approach, enhancing CSEGG efficiency while simultaneously preserving privacy and memory usage. All data and source code will be made public.", "keywords": "Scene graph generation;Continual learning;In-context symbolic replay;Long tail distribution;Compositional scene graphs", "primary_area": "machine_vision", "supplementary_material": "/attachment/8fe653e06ab159d9331c0b3f2b9c58dbaac8574b.zip", "author": "Naitik Khandelwal;Xiao Liu;Mengmi Zhang", "authorids": "~Naitik_Khandelwal1;~Xiao_Liu24;~Mengmi_Zhang1", "gender": "M;M;F", "homepage": ";;https://a0091624.wixsite.com/deepneurocognition-1", "dblp": ";;160/7116", "google_scholar": "7divVH4AAAAJ;;https://scholar.google.com.sg/citations?user=G2sVOhcAAAAJ", "orcid": ";;0000-0002-2694-7097", "linkedin": ";xiao-liu-34971b205/;", "or_profile": "~Naitik_Khandelwal1;~Xiao_Liu24;~Mengmi_Zhang1", "aff": "A*STAR;I2R, A*STAR;A*STAR", "aff_domain": "i2r.a-star.edu.sg;i2r.a-star.edu.sg;astar.edu.sg", "position": "Researcher;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nkhandelwal2024adaptive,\ntitle={Adaptive Visual Scene Understanding: Incremental Scene Graph Generation},\nauthor={Naitik Khandelwal and Xiao Liu and Mengmi Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=6lwKOvL3KN}\n}", "github": "", "reviewers": "XGhv;myHM;Y6eG", "pdf_size": 41744831, "rating": "3;6;6", "confidence": "4;4;3", "soundness": "2;3;2", "novelty": "2;2;3", "presentation": "2;3;2", "wc_summary": "72;54;90", "wc_strengths": "16;49;42", "wc_weaknesses": "166;25;140", "wc_questions": "17;19;30", "wc_limitations": "8;120;12", "wc_review": "279;267;314", "wc_reply_reviewers": "92;0;0", "wc_reply_authors": "341;0;32", "reply_reviewers": "1;0;0", "reply_authors": "2;1;2", "rating_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 72.0, 14.696938456699069 ], "wc_strengths_avg": [ 35.666666666666664, 14.1970262926979 ], "wc_weaknesses_avg": [ 110.33333333333333, 61.26626768096418 ], "wc_questions_avg": [ 22.0, 5.715476066494082 ], "wc_limitations_avg": [ 46.666666666666664, 51.88020388891659 ], "wc_review_avg": [ 286.6666666666667, 19.93879523831757 ], "wc_reply_reviewers_avg": [ 30.666666666666668, 43.36921591277491 ], "wc_reply_authors_avg": [ 124.33333333333333, 153.76244303759253 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:UcBCy0UyJ44J:scholar.google.com/&scioq=Adaptive+Visual+Scene+Understanding:+Incremental+Scene+Graph+Generation&hl=en&as_sdt=0,3", "gs_version_total": 5, "email": "i2r.a-star.edu.sg;i2r.a-star.edu.sg;astar.edu.sg", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Agency for Science, Technology and Research;A*STAR", "aff_unique_dep": ";Institute for Infocomm Research", "aff_unique_url": "https://www.a-star.edu.sg;https://www.a-star.edu.sg", "aff_unique_abbr": "A*STAR;A*STAR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Singapore" }, { "title": "Improving Generalization in Federated Learning with Model-Data Mutual Information Regularization: A Posterior Inference Approach", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96500", "id": "6lx34fpanw", "proceeding": "", "pdf": "https://openreview.net/pdf?id=6lx34fpanw", "openreview": "https://openreview.net/forum?id=6lx34fpanw", "poster": "/media/PosterPDFs/NeurIPS%202024/96500.png?t=1731425238.5518253", "project": "", "author_site": "Hao Zhang, Chenglin Li, Nuowen Kan, Ziyang Zheng, Wenrui Dai, Junni Zou, Hongkai Xiong", "tldr": "", "abstract": "Most of existing federated learning (FL) formulation is treated as a point-estimate of models, inherently prone to overfitting on scarce client-side data with overconfident decisions. Though Bayesian inference can alleviate this issue, a direct posterior inference at clients may result in biased local posterior estimates due to data heterogeneity, leading to a sub-optimal global posterior. From an information-theoretic perspective, we propose FedMDMI, a federated posterior inference framework based on model-data mutual information (MI). Specifically, a global model-data MI term is introduced as regularization to enforce the global model to learn essential information from the heterogeneous local data, alleviating the bias caused by data heterogeneity and hence enhancing generalization. To make this global MI tractable, we decompose it into local MI terms at the clients, converting the global objective with MI regularization into several locally optimizable objectives based on local data. For these local objectives, we further show that the optimal local posterior is a Gibbs posterior, which can be efficiently sampled with stochastic gradient Langevin dynamics methods. Finally, at the server, we approximate sampling from the global Gibbs posterior by simply averaging samples from the local posteriors. Theoretical analysis provides a generalization bound for FL w.r.t. the model-data MI, which, at different levels of regularization, represents a federated version of the bias-variance trade-off. Experimental results demonstrate a better generalization behavior with better calibrated uncertainty estimates of FedMDMI.", "keywords": "Federated learning;posterior inference;model-data mutual information", "primary_area": "other", "supplementary_material": "/attachment/90fff1754b6fa77dcc2f374ad666d67865fb7699.zip", "author": "Hao Zhang;Chenglin Li;Nuowen Kan;Ziyang Zheng;Wenrui Dai;Junni Zou;Hongkai Xiong", "authorids": "~Hao_Zhang36;~Chenglin_Li2;~Nuowen_Kan1;~Ziyang_Zheng2;~Wenrui_Dai1;~Junni_Zou1;~Hongkai_Xiong1", "gender": "M;M;M;;F;M;M", "homepage": "https://min.sjtu.edu.cn/En/FacultyShow/4?Vid=17;;;;http://www.cs.sjtu.edu.cn/~zou-jn;http://min.sjtu.edu.cn;https://haozzh.github.io/", "dblp": ";226/2477;;16/5135.html;91/4613;21/3569;", "google_scholar": "ltW2JMcAAAAJ;OKrLi6UAAAAJ;pcgDcMmDJbwC;Xg8MhyAAAAAJ;https://scholar.google.com/citations?hl=zh-CN;bB16iN4AAAAJ;e4Ta2QsAAAAJ", "orcid": ";0000-0002-6028-1284;0000-0001-9923-8016;;;0000-0003-4552-0029;", "linkedin": ";;;;;;", "or_profile": "~Chenglin_Li2;~Nuowen_Kan1;~Ziyang_Zheng2;~Wenrui_Dai1;~Junni_Zou1;~Hongkai_Xiong1;~Hao_Zhangg1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu", "position": "Full Professor;PhD student;PhD student;Associate Professor;Full Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\nzhang2024improving,\ntitle={Improving Generalization in Federated Learning with Model-Data Mutual Information Regularization: A Posterior Inference Approach},\nauthor={Hao Zhang and Chenglin Li and Nuowen Kan and Ziyang Zheng and Wenrui Dai and Junni Zou and Hongkai Xiong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=6lx34fpanw}\n}", "github": "", "reviewers": "hP14;re5T;4nGu;XkDq", "pdf_size": 771543, "rating": "6;6;6;7", "confidence": "2;4;2;3", "soundness": "3;3;3;2", "novelty": "3;3;3;2", "presentation": "3;3;3;3", "wc_summary": "55;99;132;78", "wc_strengths": "34;48;86;30", "wc_weaknesses": "176;231;261;113", "wc_questions": "59;119;78;75", "wc_limitations": "1;24;4;3", "wc_review": "325;521;561;299", "wc_reply_reviewers": "14;68;486;11", "wc_reply_authors": "37;59;1644;37", "reply_reviewers": "1;1;2;1", "reply_authors": "2;2;5;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 91.0, 28.32843094843059 ], "wc_strengths_avg": [ 49.5, 22.107690969434145 ], "wc_weaknesses_avg": [ 195.25, 56.42860533452869 ], "wc_questions_avg": [ 82.75, 22.1401784093986 ], "wc_limitations_avg": [ 8.0, 9.300537618869138 ], "wc_review_avg": [ 426.5, 115.7356902601786 ], "wc_reply_reviewers_avg": [ 144.75, 198.32218105900307 ], "wc_reply_authors_avg": [ 444.25, 692.7342112960786 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:0oy8ch6LbrwJ:scholar.google.com/&scioq=Improving+Generalization+in+Federated+Learning+with+Model-Data+Mutual+Information+Regularization:+A+Posterior+Inference+Approach&hl=en&as_sdt=0,33", "gs_version_total": 0, "email": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Using Time-Aware Graph Neural Networks to Predict Temporal Centralities in Dynamic Graphs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96499", "id": "6n709MszkP", "proceeding": "", "pdf": "https://openreview.net/pdf?id=6n709MszkP", "openreview": "https://openreview.net/forum?id=6n709MszkP", "poster": "/media/PosterPDFs/NeurIPS%202024/96499.png?t=1733299402.5512247", "project": "", "author_site": "Franziska Heeg, Ingo Scholtes", "tldr": "", "abstract": "Node centralities play a pivotal role in network science, social network analysis, and recommender systems.\nIn temporal data, static path-based centralities like closeness or betweenness can give misleading results about the true importance of nodes in a temporal graph. To address this issue, temporal generalizations of betweenness and closeness have been defined that are based on the shortest time-respecting paths between pairs of nodes. However, a major issue of those generalizations is that the calculation of such paths is computationally expensive.\nAddressing this issue, we study the application of De Bruijn Graph Neural Networks (DBGNN), a time-aware graph neural network architecture, to predict temporal path-based centralities in time series data. We experimentally evaluate our approach in 13 temporal graphs from biological and social systems and show that it considerably improves the prediction of betweenness and closeness centrality compared to (i) a static Graph Convolutional Neural Network, (ii) an efficient sampling-based approximation technique for temporal betweenness, and (iii) two state-of-the-art time-aware graph learning techniques for dynamic graphs.", "keywords": "Centralities;Temporal Centralities;Graph Neural Networks;De Bruijn Graphs", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Franziska Heeg;Ingo Scholtes", "authorids": "~Franziska_Heeg1;~Ingo_Scholtes1", "gender": "F;M", "homepage": "https://www.informatik.uni-wuerzburg.de/ml4nets/team/franziska-heeg/;https://www.ingoscholtes.net", "dblp": "359/4748;s/IngoScholtes", "google_scholar": "https://scholar.google.de/citations?hl=de;pouriVsAAAAJ", "orcid": ";0000-0003-2253-0216", "linkedin": ";", "or_profile": "~Franziska_Heeg1;~Ingo_Scholtes1", "aff": "Bayerische Julius-Maximilians-Universit\u00e4t W\u00fcrzburg;University of Zurich", "aff_domain": "uni-wuerzburg.de;uzh.ch", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nheeg2024using,\ntitle={Using Time-Aware Graph Neural Networks to Predict Temporal Centralities in Dynamic Graphs},\nauthor={Franziska Heeg and Ingo Scholtes},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=6n709MszkP}\n}", "github": "", "reviewers": "LDDR;zsX2;nntR;Gqof", "pdf_size": 1162208, "rating": "5;6;6;8", "confidence": "4;3;4;3", "soundness": "2;3;3;3", "novelty": "1;3;3;4", "presentation": "3;2;3;3", "wc_summary": "64;43;79;117", "wc_strengths": "44;83;93;28", "wc_weaknesses": "143;73;120;137", "wc_questions": "129;82;82;58", "wc_limitations": "37;45;21;16", "wc_review": "417;326;395;356", "wc_reply_reviewers": "16;0;15;19", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 1.0897247358851685 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 75.75, 27.03123193641015 ], "wc_strengths_avg": [ 62.0, 26.842131062939096 ], "wc_weaknesses_avg": [ 118.25, 27.453369556395078 ], "wc_questions_avg": [ 87.75, 25.752427070084092 ], "wc_limitations_avg": [ 29.75, 11.734031702701335 ], "wc_review_avg": [ 373.5, 35.06066171651642 ], "wc_reply_reviewers_avg": [ 12.5, 7.365459931328117 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.6882472016116854, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7061989012918455081&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "uni-wuerzburg.de;uzh.ch", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "University of W\u00fcrzburg;University of Zurich", "aff_unique_dep": ";", "aff_unique_url": "https://www.uni-wuerzburg.de;https://www.unizh.ch", "aff_unique_abbr": "JMU;UZH", "aff_campus_unique_index": "0", "aff_campus_unique": "W\u00fcrzburg;", "aff_country_unique_index": "0;1", "aff_country_unique": "Germany;Switzerland" }, { "title": "Block Transformer: Global-to-Local Language Modeling for Fast Inference", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96498", "id": "6osgTNnAZQ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=6osgTNnAZQ", "openreview": "https://openreview.net/forum?id=6osgTNnAZQ", "poster": "/media/PosterPDFs/NeurIPS%202024/96498.png?t=1731764021.0541983", "project": "", "author_site": "Namgyu Ho, Sangmin Bae, Taehyeon Kim, Hyunjik Jo, Yireun Kim, Tal Schuster, Adam Fisch, James Thorne, Se-Young Yun", "tldr": "", "abstract": "We introduce the Block Transformer which adopts hierarchical global-to-local modeling to autoregressive transformers to mitigate the inference bottlenecks associated with self-attention. Self-attention requires the key-value (KV) cache of all previous sequences to be retrieved from memory at every decoding step to retrieve context information, leading to two primary bottlenecks during batch inference. First, there is a significant delay in obtaining the first token, as the information of the entire prompt must first be processed to prefill the KV cache. Second, computation of subsequent tokens is bottlenecked by the high memory I/O demand of fetching the entire KV cache, which grows linearly with sequence length, incurring quadratic memory reads overall. We design the Block Transformer to strategically mitigate these costs, by incorporating coarsity and locality into an integrated global-to-local architecture. At the lower layers, we aggregate tokens into fixed size blocks to apply attention across the entire sequence at coarse-grained detail, to capture the global context while minimizing KV cache overhead. At upper layers, we apply attention within each block to decode individual tokens, to model fine-grained details with a lightweight local KV cache. We pretrain vanilla and Block Transformers from scratch and demonstrate that Block Transformers reach 10--20x inference throughput compared to vanilla transformers with equivalent perplexity and zero-shot task performance.", "keywords": "language model;model architecture;efficient inference", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Namgyu Ho;Sangmin Bae;Taehyeon Kim;hyunjik.jo;Yireun Kim;Tal Schuster;Adam Fisch;James Thorne;Se-Young Yun", "authorids": "~Namgyu_Ho1;~Sangmin_Bae1;~Taehyeon_Kim1;~hyunjik.jo1;~Yireun_Kim1;~Tal_Schuster1;~Adam_Fisch2;~James_Thorne1;~Se-Young_Yun1", "gender": "M;M;M;M;;Not Specified;;M;", "homepage": "http://namgyu.com;https://www.raymin0223.com;https://taehyeon.oopy.io/;;;https://people.csail.mit.edu/tals/;https://jamesthorne.com;https://fbsqkd.github.io;https://people.csail.mit.edu/fisch/", "dblp": "313/1580;91/1588;;;326/2324;190/7491;204/1380;23/8862;https://dblp.org/pers/f/Fisch:Adam.html", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;T5rHY14AAAAJ;https://scholar.google.co.kr/citations?user=wDEaSpwAAAAJ;;g7mZfEkAAAAJ;oo8QRmIAAAAJ;hao9RrgAAAAJ;X_IAjb8AAAAJ;https://scholar.google.com/citations?authorid=LYRkQhMAAAAJ", "orcid": ";;;;;;;;", "linkedin": "itsnamgyu/;raymin0223/;taehyeon-k-6a1239207/;%ED%98%84%EC%A7%81-%EC%A1%B0-029104142/?originalSubdomain=kr;;;;seyoung-yun-395130ab/;", "or_profile": "~Namgyu_Ho1;~Sangmin_Bae1;~Taehyeon_Kim1;~hyunjik.jo1;~Yireun_Kim1;~Tal_Schuster1;~James_Thorne1;~Se-Young_Yun1;~Adam_Fisch1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;LG Corporation;LG AI Research;Google;KAIST;KAIST;Massachusetts Institute of Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;lgresearch.ai;lgresearch.ai;google.com;kaist.ac.kr;kaist.ac.kr;mit.edu", "position": "PhD student;PhD student;PhD student;Researcher;Researcher;Researcher;Assistant Professor;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nho2024block,\ntitle={Block Transformer: Global-to-Local Language Modeling for Fast Inference},\nauthor={Namgyu Ho and Sangmin Bae and Taehyeon Kim and hyunjik.jo and Yireun Kim and Tal Schuster and Adam Fisch and James Thorne and Se-Young Yun},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=6osgTNnAZQ}\n}", "github": "", "reviewers": "unsu;BaSr;769s;Ht4c", "pdf_size": 1617606, "rating": "5;6;6;6", "confidence": "2;5;4;3", "soundness": "3;3;3;3", "novelty": "1;3;2;3", "presentation": "3;3;3;3", "wc_summary": "56;99;69;112", "wc_strengths": "33;51;80;28", "wc_weaknesses": "61;166;101;116", "wc_questions": "1;37;116;4", "wc_limitations": "3;4;1;9", "wc_review": "154;357;367;269", "wc_reply_reviewers": "16;153;232;50", "wc_reply_authors": "59;1044;64;112", "reply_reviewers": "1;2;1;1", "reply_authors": "2;3;2;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 84.0, 22.4610774452162 ], "wc_strengths_avg": [ 48.0, 20.359273071502333 ], "wc_weaknesses_avg": [ 111.0, 37.58324094593227 ], "wc_questions_avg": [ 39.5, 46.37078821844632 ], "wc_limitations_avg": [ 4.25, 2.947456530637899 ], "wc_review_avg": [ 286.75, 85.604833391579 ], "wc_reply_reviewers_avg": [ 112.75, 85.3503807841535 ], "wc_reply_authors_avg": [ 319.75, 418.6576017463436 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.7745966692414834, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=781899254350128542&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;lgresearch.ai;lgresearch.ai;google.com;kaist.ac.kr;kaist.ac.kr;mit.edu", "author_num": 9, "aff_unique_index": "0;0;0;1;1;2;0;0;3", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;LG;Google;Massachusetts Institute of Technology", "aff_unique_dep": ";LG Corporation;Google;", "aff_unique_url": "https://www.kaist.ac.kr;https://www.lg.com;https://www.google.com;https://web.mit.edu", "aff_unique_abbr": "KAIST;LG;Google;MIT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;0;1;0;0;1", "aff_country_unique": "South Korea;United States" }, { "title": "xRAG: Extreme Context Compression for Retrieval-augmented Generation with One Token", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96497", "id": "6pTlXqrO0p", "proceeding": "", "pdf": "https://openreview.net/pdf?id=6pTlXqrO0p", "openreview": "https://openreview.net/forum?id=6pTlXqrO0p", "poster": "/media/PosterPDFs/NeurIPS%202024/96497.png?t=1733649148.3491926", "project": "", "author_site": "Xin Cheng, Xun Wang, Xingxing Zhang, Tao Ge, Si-Qing Chen, Furu Wei, Huishuai Zhang, Dongyan Zhao", "tldr": "", "abstract": "This paper introduces xRAG, an innovative context compression method tailored for retrieval-augmented generation. xRAG reinterprets document embeddings in dense retrieval--traditionally used solely for retrieval--as features from the retrieval modality. By employing a modality fusion methodology, xRAG seamlessly integrates these embeddings into the language model representation space, effectively eliminating the need for their textual counterparts and achieving an extreme compression rate. \nIn xRAG, the only trainable component is the modality bridge, while both the retriever and the language model remain frozen. This design choice allows for the reuse of offline-constructed document embeddings and preserves the plug-and-play nature of retrieval augmentation. \nExperimental results demonstrate that xRAG achieves an average improvement of over 10% across six knowledge-intensive tasks, adaptable to various language model backbones, ranging from a dense 7B model to an 8x7B Mixture of Experts configuration. xRAG not only significantly outperforms previous context compression methods but also matches the performance of uncompressed models on several datasets, while reducing overall FLOPs by a factor of 3.53. Our work pioneers new directions in retrieval-augmented generation from the perspective of multimodality fusion, and we hope it lays the foundation for future efficient and scalable retrieval-augmented systems.", "keywords": "retrieval-augmented generation;context compression;dense retrieval", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Xin Cheng;Xun Wang;Xingxing Zhang;Tao Ge;Si-Qing Chen;Furu Wei;Huishuai Zhang;Dongyan Zhao", "authorids": "~Xin_Cheng2;~Xun_Wang5;~Xingxing_Zhang1;~Tao_Ge1;~Si-Qing_Chen1;~Furu_Wei1;~Huishuai_Zhang3;~Dongyan_Zhao2", "gender": ";;M;M;F;M;;M", "homepage": ";;https://xingxingzhang.github.io/;https://getao.github.io/;;https://www.microsoft.com/en-us/research/people/fuwei/;;https://www.wict.pku.edu.cn/zhaodongyan/en/", "dblp": ";;59/9985-2.html;136/7923;;72/5870;;63/1870", "google_scholar": ";;5yX53usAAAAJ;LYbs7Q8AAAAJ;;G-V1VpwAAAAJ;;lhR8-68AAAAJ", "orcid": ";;;;0000-0002-6945-4540;;;", "linkedin": ";;;;si-qing-chen-seattle/;;;", "or_profile": "~Xin_Cheng2;~Xun_Wang5;~Xingxing_Zhang1;~Tao_Ge1;~Si-Qing_Chen1;~Furu_Wei1;~Huishuai_Zhang3;~Dongyan_Zhao2", "aff": ";;Microsoft Research Asia;Tencent AI Lab;Microsoft;Microsoft Research;;Peking University", "aff_domain": ";;microsoft.com;tencent.com;microsoft.com;microsoft.com;;pku.edu.cn", "position": ";;Researcher;Principal Researcher;Partner Applied Science Manager;Distinguished Scientist;;Full Professor", "bibtex": "@inproceedings{\ncheng2024xrag,\ntitle={x{RAG}: Extreme Context Compression for Retrieval-augmented Generation with One Token},\nauthor={Xin Cheng and Xun Wang and Xingxing Zhang and Tao Ge and Si-Qing Chen and Furu Wei and Huishuai Zhang and Dongyan Zhao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=6pTlXqrO0p}\n}", "github": "", "reviewers": "MKxH;2bxX;jbwR;H8JS", "pdf_size": 994754, "rating": "3;4;4;7", "confidence": "5;5;4;4", "soundness": "2;2;2;3", "novelty": "2;2;2;3", "presentation": "3;3;3;4", "wc_summary": "58;69;54;54", "wc_strengths": "53;72;35;106", "wc_weaknesses": "206;244;97;90", "wc_questions": "2;78;45;6", "wc_limitations": "1;1;13;6", "wc_review": "320;464;244;262", "wc_reply_reviewers": "32;87;0;0", "wc_reply_authors": "109;832;0;0", "reply_reviewers": "1;2;0;0", "reply_authors": "2;3;1;1", "rating_avg": [ 4.5, 1.5 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 58.75, 6.139014578904337 ], "wc_strengths_avg": [ 66.5, 26.291633650269812 ], "wc_weaknesses_avg": [ 159.25, 67.15420686747778 ], "wc_questions_avg": [ 32.75, 31.059418861272984 ], "wc_limitations_avg": [ 5.25, 4.9180788932265 ], "wc_review_avg": [ 322.5, 86.3872097014367 ], "wc_reply_reviewers_avg": [ 29.75, 35.541349158409844 ], "wc_reply_authors_avg": [ 235.25, 347.395577835988 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.6666666666666667, "gs_citation": 33, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=518995060232621424&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": ";;microsoft.com;tencent.com;microsoft.com;microsoft.com;;pku.edu.cn", "author_num": 8, "aff_unique_index": "0;1;0;0;2", "aff_unique_norm": "Microsoft;Tencent;Peking University", "aff_unique_dep": "Research;Tencent AI Lab;", "aff_unique_url": "https://www.microsoft.com/en-us/research/group/asia;https://ai.tencent.com;http://www.pku.edu.cn", "aff_unique_abbr": "MSR Asia;Tencent AI Lab;Peking U", "aff_campus_unique_index": "0", "aff_campus_unique": "Asia;", "aff_country_unique_index": "0;0;1;1;0", "aff_country_unique": "China;United States" }, { "id": "6qJKrOulTr", "title": "Unveiling Transformer Perception by Exploring Input Manifolds", "track": "main", "status": "Reject", "tldr": "", "abstract": "This paper introduces a general method for the exploration of equivalence classes in the input space of\nTransformer models. The proposed approach is based on sound mathematical theory which describes the internal layers of a Transformer architecture as sequential deformations of the input manifold. Using eigendecomposition of the pullback of the distance metric defined on the output space through the Jacobian of the model, we are able to reconstruct equivalence classes in the input space and navigate across them. We illustrate how this method can be used as a powerful tool for investigating how a Transformer sees the input space, facilitating local and task-agnostic explainability in Computer Vision and Natural Language Processing tasks.", "keywords": "transformers interpretability;input space exploration;geometric deep learning", "primary_area": "interpretability_and_explainability", "supplementary_material": "/attachment/26231006e17f32b3ac68ee98ef35133b01fc6358.zip", "author": "Alessandro Benfenati;Alfio Ferrara;Alessio Marta;Davide Riva;Elisabetta Rocchetti", "authorids": "~Alessandro_Benfenati1;~Alfio_Ferrara1;~Alessio_Marta1;~Davide_Riva1;~Elisabetta_Rocchetti1", "gender": "M;M;;M;F", "homepage": "https://sites.unimi.it/a_benfenati/;;;;https://islab.di.unimi.it/team/elisabetta.rocchetti@unimi.it", "dblp": "158/9578;93/1927.html;309/7016;20/7728;", "google_scholar": "q7oku6AAAAAJ;7vf1QqwAAAAJ;oBgBRvsAAAAJ;https://scholar.google.it/citations?user=Vpdef3IAAAAJ;lhRBBeIAAAAJ", "orcid": "0000-0002-2985-374X;0000-0002-4991-4984;0000-0002-3388-7168;0009-0003-9681-9423;0009-0000-5617-7612", "linkedin": ";alfio-ferrara-71b04a8/?lipi=urn%3Ali%3Apage%3Ad_flagship3_feed%3BTAF5C%2BNDT%2FKPk5%2FsXlayvA%3D%3D;;davide-riva-85053a221/;elisabetta-rocchetti-70a953143", "or_profile": "~Alessandro_Benfenati1;~Alfio_Ferrara1;~Alessio_Marta1;~Davide_Riva1;~Elisabetta_Rocchetti1", "aff": "University of Milan;University of Milan;University of Milan;Polytechnic Institute of Turin;University of Milan", "aff_domain": "unimi.it;unimi.it;unimi.it;polito.it;unimi.it", "position": "Assistant Professor;Full Professor;PhD student;PhD student;PhD student", "bibtex": "@misc{\nanonymous2024unveiling,\ntitle={Unveiling Transformer Perception by Exploring Input Manifolds},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=6qJKrOulTr}\n}", "github": "", "project": "", "reviewers": "b3eP;KhQb;TeNK;EZRw", "site": "https://openreview.net/forum?id=6qJKrOulTr", "pdf_size": 587175, "rating": "3;4;5;6", "confidence": "4;2;2;2", "soundness": "2;3;3;3", "novelty": "1;3;2;2", "presentation": "3;4;2;2", "wc_summary": "68;90;73;62", "wc_strengths": "45;98;61;211", "wc_weaknesses": "187;424;35;2", "wc_questions": "99;14;2;2", "wc_limitations": "56;45;1;1", "wc_review": "455;671;172;278", "wc_reply_reviewers": "155;139;0;0", "wc_reply_authors": "170;0;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "2;1;1;1", "rating_avg": [ 4.5, 1.118033988749895 ], "confidence_avg": [ 2.5, 0.8660254037844386 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 73.25, 10.425329730996522 ], "wc_strengths_avg": [ 103.75, 64.83585042243219 ], "wc_weaknesses_avg": [ 162.0, 166.58181173225364 ], "wc_questions_avg": [ 29.25, 40.5670740872447 ], "wc_limitations_avg": [ 25.75, 25.053692342646823 ], "wc_review_avg": [ 394.0, 189.20227271362256 ], "wc_reply_reviewers_avg": [ 73.5, 73.7173656610164 ], "wc_reply_authors_avg": [ 42.5, 73.61215932167728 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.7745966692414834, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:sNQuGq2h20wJ:scholar.google.com/&scioq=Unveiling+Transformer+Perception+by+Exploring+Input+Manifolds&hl=en&as_sdt=0,44", "gs_version_total": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "University of Milan;Polytechnic Institute of Turin", "aff_unique_dep": ";", "aff_unique_url": "https://www.unimi.it;https://www.polito.it", "aff_unique_abbr": "UniMi;Polito", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Italy" }, { "title": "Memorize What Matters: Emergent Scene Decomposition from Multitraverse", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96496", "id": "6qr3932RWe", "proceeding": "", "pdf": "https://openreview.net/pdf?id=6qr3932RWe", "openreview": "https://openreview.net/forum?id=6qr3932RWe", "poster": "/media/PosterPDFs/NeurIPS%202024/96496.png?t=1731724154.8679535", "project": "", "author_site": "Yiming Li, Zehong Wang, Yue Wang, Zhiding Yu, Zan Gojcic, Marco Pavone, Chen Feng, Jose M. Alvarez", "tldr": "", "abstract": "Humans naturally retain memories of permanent elements, while ephemeral moments often slip through the cracks of memory. This selective retention is crucial for robotic perception, localization, and mapping. To endow robots with this capability, we introduce 3D Gaussian Mapping (3DGM), a self-supervised, camera-only offline mapping framework grounded in 3D Gaussian Splatting. 3DGM converts multitraverse RGB videos from the same region into a Gaussian-based environmental map while concurrently performing 2D ephemeral object segmentation. Our key observation is that the environment remains consistent across traversals, while objects frequently change. This allows us to exploit self-supervision from repeated traversals to achieve environment-object decomposition. More specifically, 3DGM formulates multitraverse environmental mapping as a robust 3D representation learning problem, treating pixels of the environment and objects as inliers and outliers, respectively. Using robust feature distillation, feature residual mining, and robust optimization, 3DGM simultaneously performs 2D segmentation and 3D mapping without human intervention. We build the Mapverse benchmark, sourced from the Ithaca365 and nuPlan datasets, to evaluate our method in unsupervised 2D segmentation, 3D reconstruction, and neural rendering. Extensive results verify the effectiveness and potential of our method for self-driving and robotics.", "keywords": "Autonomous Driving;Self-Supervised Learning;3D Gaussian Splatting", "primary_area": "machine_vision", "supplementary_material": "", "author": "Yiming Li;Zehong Wang;Yue Wang;Zhiding Yu;Zan Gojcic;Marco Pavone;Chen Feng;Jose M. Alvarez", "authorids": "~Yiming_Li2;~Zehong_Wang3;~Yue_Wang2;~Zhiding_Yu1;~Zan_Gojcic1;~Marco_Pavone1;~Chen_Feng2;~Jose_M._Alvarez2", "gender": "M;F;M;;M;M;M;", "homepage": "https://yimingli-page.github.io/;https://github.com/ZehongW07;https://yuewang.xyz;;http://zgojcic.github.io/;https://web.stanford.edu/~pavone/;https://ai4ce.github.io/;", "dblp": "l/YimingLi-3;https://dblp.org/pers/;33/4822-41;;230/4348;91/3382-1.html;01/161-2;", "google_scholar": "https://scholar.google.com/citations?hl=en;;v-AEFIEAAAAJ;;8KsqL4gAAAAJ;RhOpyXcAAAAJ;YeG8ZM0AAAAJ;", "orcid": "0000-0002-0157-6218;;;;;;0000-0003-3211-1576;", "linkedin": "yiming-li-58b519173/;;;;;;simbaforrest/;", "or_profile": "~Yiming_Li2;~Zehong_Wang3;~Yue_Wang2;~Zhiding_Yu1;~Zan_Gojcic1;~Marco_Pavone1;~Chen_Feng2;~Jose_M._Alvarez2", "aff": "New York University;New York University;NVIDIA;;NVIDIA ;Stanford University;New York University;", "aff_domain": "nyu.edu;nyu.edu;nvidia.com;;nvidia.com;stanford.edu;nyu.edu;", "position": "PhD student;MS student;Researcher;;Researcher;Associate Professor;Assistant Professor;", "bibtex": "@inproceedings{\nli2024memorize,\ntitle={Memorize What Matters: Emergent Scene Decomposition from Multitraverse},\nauthor={Yiming Li and Zehong Wang and Yue Wang and Zhiding Yu and Zan Gojcic and Marco Pavone and Chen Feng and Jose M. Alvarez},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=6qr3932RWe}\n}", "github": "", "reviewers": "mMjd;3Lqq;D2oY", "pdf_size": 9937335, "rating": "5;5;7", "confidence": "5;2;5", "soundness": "2;3;4", "novelty": "2;2;3", "presentation": "3;3;3", "wc_summary": "68;102;245", "wc_strengths": "52;174;50", "wc_weaknesses": "284;121;58", "wc_questions": "23;180;6", "wc_limitations": "6;1;75", "wc_review": "433;578;434", "wc_reply_reviewers": "0;26;0", "wc_reply_authors": "0;85;0", "reply_reviewers": "0;1;0", "reply_authors": "1;2;1", "rating_avg": [ 5.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 4.0, 1.4142135623730951 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 138.33333333333334, 76.69130039030212 ], "wc_strengths_avg": [ 92.0, 57.98850460795369 ], "wc_weaknesses_avg": [ 154.33333333333334, 95.22721366406886 ], "wc_questions_avg": [ 69.66666666666667, 78.32553152637325 ], "wc_limitations_avg": [ 27.333333333333332, 33.76717669901087 ], "wc_review_avg": [ 481.6666666666667, 68.11917661145225 ], "wc_reply_reviewers_avg": [ 8.666666666666666, 12.256517540566824 ], "wc_reply_authors_avg": [ 28.333333333333332, 40.069384267237695 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.49999999999999994, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13075926162620535355&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "nyu.edu;nyu.edu;nvidia.com;;nvidia.com;stanford.edu;nyu.edu;", "author_num": 8, "aff_unique_index": "0;0;1;1;2;0", "aff_unique_norm": "New York University;NVIDIA;Stanford University", "aff_unique_dep": ";NVIDIA Corporation;", "aff_unique_url": "https://www.nyu.edu;https://www.nvidia.com;https://www.stanford.edu", "aff_unique_abbr": "NYU;NVIDIA;Stanford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Consensus Learning with Deep Sets for Essential Matrix Estimation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96495", "id": "6sIOBDwr6d", "proceeding": "", "pdf": "https://openreview.net/pdf?id=6sIOBDwr6d", "openreview": "https://openreview.net/forum?id=6sIOBDwr6d", "poster": "/media/PosterPDFs/NeurIPS%202024/96495.png?t=1733352761.3255286", "project": "", "author_site": "Dror Moran, Yuval Margalit, Guy Trostianetsky, Fadi Khatib, Meirav Galun, Ronen Basri", "tldr": "", "abstract": "Robust estimation of the essential matrix, which encodes the relative position and orientation of two cameras, is a fundamental step in structure from motion pipelines. Recent deep-based methods achieved accurate estimation by using complex network architectures that involve graphs, attention layers, and hard pruning steps. Here, we propose a simpler network architecture based on Deep Sets. Given a collection of point matches extracted from two images, our method identifies outlier point matches and models the displacement noise in inlier matches. A weighted DLT module uses these predictions to regress the essential matrix. Our network achieves accurate recovery that is superior to existing networks with significantly more complex architectures.", "keywords": "Computer Vision;Consensus learning;Relative pose estimation;Deep Sets", "primary_area": "machine_vision", "supplementary_material": "", "author": "Dror Moran;Yuval Margalit;Guy Trostianetsky;Fadi Khatib;Meirav Galun;Ronen Basri", "authorids": "~Dror_Moran1;~Yuval_Margalit1;~Guy_Trostianetsky1;~Fadi_Khatib1;~Meirav_Galun1;~Ronen_Basri1", "gender": ";M;M;M;F;M", "homepage": ";;;https://fadikhateeb.github.io/;https://www.weizmann.ac.il/math/meirav/;https://www.weizmann.ac.il/math/ronen/", "dblp": "280/1308;334/4505;381/0396;334/3887;92/3521;b/RonenBasri.html", "google_scholar": "kS5jfSoAAAAJ;;Jp8GUPYAAAAJ;;https://scholar.google.co.il/citations?user=oVsC3XcAAAAJ;d6vuvHIAAAAJ", "orcid": ";;;;;", "linkedin": ";yuval-margalit-9688431b7/;guy-trostianetsky-73025a159/;fadi-khatib;;", "or_profile": "~Dror_Moran1;~Yuval_Margalit1;~Guy_Trostianetsky1;~Fadi_Khatib1;~Meirav_Galun1;~Ronen_Basri1", "aff": "Weizmann Institute of Science;Weizmann Institute of Science;Weizmann Institute of Science;Weizmann Institute of Science;Weizmann Institute;Meta Platforms Inc.", "aff_domain": "weizmann.ac.il;weizmann.ac.il;weizmann.ac.il;weizmann.ac.il;weizmann.ac.il;meta.com", "position": "PhD student;MS student;MS student;MS student;Assistant Professor;Researcher", "bibtex": "@inproceedings{\nmoran2024consensus,\ntitle={Consensus Learning with Deep Sets for Essential Matrix Estimation},\nauthor={Dror Moran and Yuval Margalit and Guy Trostianetsky and Fadi Khatib and Meirav Galun and Ronen Basri},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=6sIOBDwr6d}\n}", "github": "", "reviewers": "1GwE;wTxF;THYF;HiXm", "pdf_size": 19829594, "rating": "6;6;7;8", "confidence": "5;3;4;4", "soundness": "2;3;3;4", "novelty": "3;3;3;3", "presentation": "3;3;4;4", "wc_summary": "87;127;108;89", "wc_strengths": "56;15;108;143", "wc_weaknesses": "379;10;132;121", "wc_questions": "379;150;17;74", "wc_limitations": "1;19;17;31", "wc_review": "902;321;382;458", "wc_reply_reviewers": "58;0;44;172", "wc_reply_authors": "186;0;6;137", "reply_reviewers": "1;0;1;2", "reply_authors": "2;1;2;2", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 102.75, 16.223054582907622 ], "wc_strengths_avg": [ 80.5, 48.869724779253666 ], "wc_weaknesses_avg": [ 160.5, 134.87494207598385 ], "wc_questions_avg": [ 155.0, 137.66444711689363 ], "wc_limitations_avg": [ 17.0, 10.677078252031311 ], "wc_review_avg": [ 515.75, 228.22179453330043 ], "wc_reply_reviewers_avg": [ 68.5, 63.47243496195809 ], "wc_reply_authors_avg": [ 82.25, 81.14916820276102 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:RSa7WcZeT44J:scholar.google.com/&scioq=Consensus+Learning+with+Deep+Sets+for+Essential+Matrix+Estimation&hl=en&as_sdt=0,5", "gs_version_total": 5, "email": "weizmann.ac.il;weizmann.ac.il;weizmann.ac.il;weizmann.ac.il;weizmann.ac.il;meta.com", "author_num": 6, "aff_unique_index": "0;0;0;0;0;1", "aff_unique_norm": "Weizmann Institute of Science;Meta", "aff_unique_dep": ";Meta Platforms Inc.", "aff_unique_url": "https://www.weizmann.org.il;https://www.meta.com", "aff_unique_abbr": "Weizmann;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;1", "aff_country_unique": "Israel;United States" }, { "title": "Prompt Optimization with EASE? Efficient Ordering-aware Automated Selection of Exemplars", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96494", "id": "6uRrwWhZlM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=6uRrwWhZlM", "openreview": "https://openreview.net/forum?id=6uRrwWhZlM", "poster": "/media/PosterPDFs/NeurIPS%202024/96494.png?t=1731591144.6245804", "project": "", "author_site": "Zhaoxuan Wu, Xiaoqiang Lin, Zhongxiang Dai, Wenyang Hu, Yao Shu, See-Kiong Ng, Patrick Jaillet, Bryan Kian Hsiang Low", "tldr": "", "abstract": "Large language models (LLMs) have shown impressive capabilities in real-world applications. The capability of *in-context learning* (ICL) allows us to adapt an LLM to downstream tasks by including input-label exemplars in the prompt without model fine-tuning. However, the quality of these exemplars in the prompt greatly impacts performance, highlighting the need for an effective automated exemplar selection method. Recent studies have explored retrieval-based approaches to select exemplars tailored to individual test queries, which can be undesirable due to extra test-time computation and an increased risk of data exposure. Moreover, existing methods fail to adequately account for the impact of exemplar ordering on the performance. On the other hand, the impact of the *instruction*, another essential component in the prompt given to the LLM, is often overlooked in existing exemplar selection methods. To address these challenges, we propose a novel method named $\\texttt{EASE}$, which leverages the hidden embedding from a pre-trained language model to represent ordered sets of exemplars and uses a neural bandit algorithm to optimize the sets of exemplars *while accounting for exemplar ordering*. Our $\\texttt{EASE}$ can efficiently find an ordered set of exemplars that *performs well for all test queries* from a given task, thereby eliminating test-time computation. Importantly, $\\texttt{EASE}$ can be readily extended to *jointly optimize both the exemplars and the instruction*. Through extensive empirical evaluations (including novel tasks), we demonstrate the superiority of $\\texttt{EASE}$ over existing methods, and reveal practical insights about the impact of exemplar selection on ICL, which may be of independent interest. Our code is available at https://github.com/ZhaoxuanWu/EASE-Prompt-Optimization.", "keywords": "Exemplar selection;Prompt optimization;In-context learning;LLMs;Data selection", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/635432425fd600ed067baf4a9c1bd816d4337c3b.zip", "author": "Zhaoxuan Wu;Xiaoqiang Lin;Zhongxiang Dai;Wenyang Hu;Yao Shu;See-Kiong Ng;Patrick Jaillet;Bryan Kian Hsiang Low", "authorids": "~Zhaoxuan_Wu1;~Xiaoqiang_Lin1;~Zhongxiang_Dai1;~Wenyang_Hu1;~Yao_Shu1;~See-Kiong_Ng1;~Patrick_Jaillet1;~Bryan_Kian_Hsiang_Low1", "gender": "M;M;M;;M;M;M;M", "homepage": "https://zhaoxuanwu.github.io/;https://xqlin98.github.io/;https://daizhongxiang.github.io/;https://scholar.google.com/citations?user=EecZzYsAAAAJ;https://yao.notion.site;https://www.comp.nus.edu.sg/~ngsk/;http://web.mit.edu/jaillet/www/;http://www.comp.nus.edu.sg/~lowkh", "dblp": "298/5083;269/4573;172/4968;258/0545;44/1338;00/5480;https://dblp.uni-trier.de/pers/hd/j/Jaillet:Patrick;97/4877", "google_scholar": "Th_mPm8AAAAJ;nqKwA60AAAAJ;1v8xOIYAAAAJ;EecZzYsAAAAJ;https://scholar.google.com.au/citations?hl=en;https://scholar.google.com.tw/citations?user=_wsommYAAAAJ;ND0FM6EAAAAJ;https://scholar.google.com.tw/citations?user=2P-Q09UAAAAJ", "orcid": "0009-0002-5659-6387;;;0009-0008-6189-7890;;0000-0001-6565-7511;0000-0002-8585-6566;", "linkedin": "zhaoxuanwu/;;;;yao-shu-a5640514b;seekiong/?originalSubdomain=sg;patrick-jaillet-1260445/;", "or_profile": "~Zhaoxuan_Wu1;~Xiaoqiang_Lin1;~Zhongxiang_Dai1;~Wenyang_Hu1;~Yao_Shu1;~See-Kiong_Ng1;~Patrick_Jaillet1;~Bryan_Kian_Hsiang_Low1", "aff": "National University of Singapore;National University of Singapore;Massachusetts Institute of Technology;National University of Singapore;Guangming Lab;National University of Singapore;Massachusetts Institute of Technology;National University of Singapore", "aff_domain": "u.nus.edu;u.nus.edu;mit.edu;u.nus.edu;gml.ac.cn;nus.edu.sg;mit.edu;nus.edu.sg", "position": "PhD student;PhD student;Postdoc;PhD student;Researcher;Full Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nwu2024prompt,\ntitle={Prompt Optimization with {EASE}? Efficient Ordering-aware Automated Selection of Exemplars},\nauthor={Zhaoxuan Wu and Xiaoqiang Lin and Zhongxiang Dai and Wenyang Hu and Yao Shu and See-Kiong Ng and Patrick Jaillet and Bryan Kian Hsiang Low},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=6uRrwWhZlM}\n}", "github": "", "reviewers": "NbA9;1tAm;FzJo;u1fC", "pdf_size": 504279, "rating": "5;6;6;7", "confidence": "4;4;4;4", "soundness": "2;2;3;3", "novelty": "2;2;2;3", "presentation": "2;2;3;3", "wc_summary": "114;89;92;122", "wc_strengths": "56;29;73;47", "wc_weaknesses": "409;126;80;372", "wc_questions": "74;52;52;163", "wc_limitations": "10;20;72;1", "wc_review": "663;316;369;705", "wc_reply_reviewers": "0;279;39;90", "wc_reply_authors": "112;472;121;211", "reply_reviewers": "0;2;1;2", "reply_authors": "2;3;2;3", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 104.25, 14.077908225301087 ], "wc_strengths_avg": [ 51.25, 15.880412463157246 ], "wc_weaknesses_avg": [ 246.75, 145.25731478999603 ], "wc_questions_avg": [ 85.25, 45.77867953534702 ], "wc_limitations_avg": [ 25.75, 27.535204738661378 ], "wc_review_avg": [ 513.25, 172.41574029072868 ], "wc_reply_reviewers_avg": [ 102.0, 107.05839528033287 ], "wc_reply_authors_avg": [ 229.0, 145.53865465916607 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1905045143850109329&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 6, "email": "u.nus.edu;u.nus.edu;mit.edu;u.nus.edu;gml.ac.cn;nus.edu.sg;mit.edu;nus.edu.sg", "author_num": 8, "aff_unique_index": "0;0;1;0;2;0;1;0", "aff_unique_norm": "National University of Singapore;Massachusetts Institute of Technology;Guangming Lab", "aff_unique_dep": ";;", "aff_unique_url": "https://www.nus.edu.sg;https://web.mit.edu;", "aff_unique_abbr": "NUS;MIT;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;2;0;1;0", "aff_country_unique": "Singapore;United States;China" }, { "title": "Towards Next-Level Post-Training Quantization of Hyper-Scale Transformers", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96493", "id": "6uv9ViIoMj", "proceeding": "", "pdf": "https://openreview.net/pdf?id=6uv9ViIoMj", "openreview": "https://openreview.net/forum?id=6uv9ViIoMj", "poster": "", "project": "", "author_site": "Junhan Kim, Chungman Lee, Eulrang Cho, Kyungphil Park, Ho-young Kim, Joonyoung Kim, Yongkweon Jeon", "tldr": "", "abstract": "With the increasing complexity of generative AI models, post-training quantization (PTQ) has emerged as a promising solution for deploying hyper-scale models on edge devices such as mobile and TVs.\nExisting PTQ schemes, however, consume considerable time and resources, which could be a bottleneck in real situations where frequent model updates and multiple hyperparameter tunings are required.\nAs a cost-effective alternative, learning-free PTQ schemes have been proposed. \nHowever, the performance is somewhat limited because they cannot consider the inter-layer dependency within the attention module, which is a significant feature of Transformers.\nIn this paper, we thus propose a novel PTQ algorithm that balances accuracy and efficiency.\nThe key idea of the proposed algorithm called aespa is to perform quantization layer-wise for efficiency while targeting attention-wise reconstruction to consider the cross-layer dependency.\nThrough extensive experiments on various language models and complexity analysis, we demonstrate that aespa is accurate and efficient in quantizing Transformer models. The code will be available at https: //github.com/SamsungLabs/aespa.", "keywords": "Hyper-scale;Compression;Quantization;Transformers;LLM", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Junhan Kim;Chungman Lee;Eulrang Cho;Kyungphil Park;Ho-young Kim;Joonyoung Kim;Yongkweon Jeon", "authorids": "~Junhan_Kim1;~Chungman_Lee1;~Eulrang_Cho1;~Kyungphil_Park1;~Ho-young_Kim1;~Joonyoung_Kim1;~Yongkweon_Jeon1", "gender": "M;;F;;;M;", "homepage": ";;;;;;", "dblp": "218/0660;245/8270;330/0947;;;;", "google_scholar": "M9GyRr0AAAAJ;https://scholar.google.co.kr/citations?user=cyYuNF8AAAAJ;https://scholar.google.com/citations?pli=1;;;uEmXGLgAAAAJ;", "orcid": ";0000-0003-1843-4627;;;;;", "linkedin": ";;;kyung-phil-park-7b5a2a162/;ho-young-kim-b406301bb;;", "or_profile": "~Junhan_Kim1;~Chungman_Lee1;~Eulrang_Cho1;~Kyungphil_Park1;~Ho-young_Kim1;~Joonyoung_Kim1;~Yongkweon_Jeon1", "aff": "Samsung Research;Samsung Research;Korea University;Samsung Research;Samsung Electronics;Samsung;", "aff_domain": "samsung.com;samsung.com;korea.ac.kr;samsung.com;samsung.com;samsung.com;", "position": "Researcher;Researcher;MS student;Researcher;Researcher;Researcher;", "bibtex": "@inproceedings{\nkim2024towards,\ntitle={Towards Next-Level Post-Training Quantization of Hyper-Scale Transformers},\nauthor={Junhan Kim and Chungman Lee and Eulrang Cho and Kyungphil Park and Ho-young Kim and Joonyoung Kim and Yongkweon Jeon},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=6uv9ViIoMj}\n}", "github": "", "reviewers": "tsw3;E1kR;vuzn;Juv9;QBHg", "pdf_size": 1337209, "rating": "5;5;5;6;7", "confidence": "4;4;5;4;4", "soundness": "3;2;2;3;3", "novelty": "3;3;2;3;3", "presentation": "3;3;3;3;4", "wc_summary": "53;91;83;44;77", "wc_strengths": "70;88;48;28;82", "wc_weaknesses": "73;231;54;20;82", "wc_questions": "4;3;335;24;44", "wc_limitations": "10;1;1;14;18", "wc_review": "210;414;521;130;303", "wc_reply_reviewers": "0;15;25;0;11", "wc_reply_authors": "0;34;35;0;48", "reply_reviewers": "0;1;1;0;1", "reply_authors": "1;2;2;1;2", "rating_avg": [ 5.6, 0.8 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 69.6, 18.017769007288333 ], "wc_strengths_avg": [ 63.2, 22.292599668948437 ], "wc_weaknesses_avg": [ 92.0, 72.6773692424265 ], "wc_questions_avg": [ 82.0, 127.39073749688397 ], "wc_limitations_avg": [ 8.8, 6.8527366796047255 ], "wc_review_avg": [ 315.6, 139.73489184881493 ], "wc_reply_reviewers_avg": [ 10.2, 9.495261976375375 ], "wc_reply_authors_avg": [ 23.4, 19.734234213670415 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.375, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ZMiGyLs7DE0J:scholar.google.com/&scioq=Towards+Next-Level+Post-Training+Quantization+of+Hyper-Scale+Transformers&hl=en&as_sdt=0,5", "gs_version_total": 5, "email": "samsung.com;samsung.com;korea.ac.kr;samsung.com;samsung.com;samsung.com;", "author_num": 7, "aff_unique_index": "0;0;1;0;0;0", "aff_unique_norm": "Samsung;Korea University", "aff_unique_dep": "Samsung Research;", "aff_unique_url": "https://research.samsung.com;https://www.korea.ac.kr", "aff_unique_abbr": "Samsung;KU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Linear Time Approximation Algorithm for Column Subset Selection with Local Search", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96492", "id": "6vDYsXn0Dl", "proceeding": "", "pdf": "https://openreview.net/pdf?id=6vDYsXn0Dl", "openreview": "https://openreview.net/forum?id=6vDYsXn0Dl", "poster": "/media/PosterPDFs/NeurIPS%202024/96492.png?t=1733884890.6610117", "project": "", "author_site": "YuanBin Zou, Ziyun Huang, Jinhui Xu, Jianxin Wang, Qilong Feng", "tldr": "", "abstract": "The Column Subset Selection (CSS) problem has been widely studied in dimensionality reduction and feature selection. The goal of the CSS problem is to output a submatrix S, consisting of k columns from an n\u00d7d input matrix A that minimizes the residual error \u2016A-SS^\\dagger A\u2016_F^2, where S^\\dagger is the Moore-Penrose inverse matrix of S. Many previous approximation algorithms have non-linear running times in both n and d, while the existing linear-time algorithms have a relatively larger approximation ratios. Additionally, the local search algorithms in existing results for solving the CSS problem are heuristic. To achieve linear running time while maintaining better approximation using a local search strategy, we propose a local search-based approximation algorithm for the CSS problem with exactly k columns selected. A key challenge in achieving linear running time with the local search strategy is how to avoid exhaustive enumerations of candidate columns for constructing swap pairs in each local search step. To address this issue, we propose a two-step mixed sampling method that reduces the number of enumerations for swap pair construction from O(dk) to k in linear time. Although the two-step mixed sampling method reduces the search space of local search strategy, bounding the residual error after swaps is a non-trivial task. To estimate the changes in residual error after swaps, we propose a matched swap pair construction method to bound the approximation loss, ensuring a constant probability of loss reduction in each local search step. In expectation, these techniques enable us to obtain the local search algorithm for the CSS problem with theoretical guarantees, where a 53(k+1)-approximate solution can be obtained in linear running time O(ndk^4\\log k). Empirical experiments show that our proposed algorithm achieves better quality and time compared to previous algorithms on both small and large datasets. Moreover, it is at least 10 times faster than state-of-the-art algorithms across all large-scale datasets.", "keywords": "Column Subset Selection;Local Search", "primary_area": "optimization", "supplementary_material": "", "author": "YuanBin Zou;Ziyun Huang;Jinhui Xu;Jianxin Wang;Qilong Feng", "authorids": "~YuanBin_Zou1;~Ziyun_Huang1;~Jinhui_Xu1;~Jianxin_Wang1;~Qilong_Feng1", "gender": "M;M;M;;M", "homepage": ";;https://www.cse.buffalo.edu/~jinhui/;https://faculty.csu.edu.cn/wangjianxin1/zh_CN/index/106082/list/;", "dblp": "249/5290;;24/6437-1.html;75/2669-1.html;75/6154", "google_scholar": ";1MPrmtEAAAAJ;https://scholar.google.com/citations?hl=en;7pgY2F0AAAAJ;", "orcid": "0000-0001-6050-054X;;;0000-0003-1516-0480;", "linkedin": ";;;;", "or_profile": "~YuanBin_Zou1;~Ziyun_Huang1;~Jinhui_Xu1;~Jianxin_Wang1;~Qilong_Feng1", "aff": "Central South University;Pennsylvania State University, Erie;University at Buffalo, State University of New York;Central South University;Central South University, China", "aff_domain": "csu.edu.cn;psu.edu;buffalo.edu;csu.edu.cn;csu.edu.cn", "position": "PhD student;Assistant Professor;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nzou2024linear,\ntitle={Linear Time Approximation Algorithm for Column Subset Selection with Local Search},\nauthor={YuanBin Zou and Ziyun Huang and Jinhui Xu and Jianxin Wang and Qilong Feng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=6vDYsXn0Dl}\n}", "github": "", "reviewers": "ANHu;MkHx;oKb7;wwuR;6BV1", "pdf_size": 419861, "rating": "5;6;6;7;7", "confidence": "3;4;4;3;3", "soundness": "3;3;3;4;3", "novelty": "2;3;3;3;3", "presentation": "2;2;3;3;3", "wc_summary": "80;107;369;124;66", "wc_strengths": "40;50;58;59;51", "wc_weaknesses": "119;103;51;28;140", "wc_questions": "164;140;99;18;24", "wc_limitations": "12;6;1;4;1", "wc_review": "415;406;578;233;282", "wc_reply_reviewers": "0;22;0;17;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;1;0;1;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 149.2, 111.74864652424208 ], "wc_strengths_avg": [ 51.6, 6.829348431585549 ], "wc_weaknesses_avg": [ 88.2, 42.092279577138605 ], "wc_questions_avg": [ 89.0, 59.31610236689528 ], "wc_limitations_avg": [ 4.8, 4.069397989875161 ], "wc_review_avg": [ 382.8, 120.23210885616204 ], "wc_reply_reviewers_avg": [ 7.8, 9.682974749528162 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.21821789023599233, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:IvqsipQKuJQJ:scholar.google.com/&scioq=Linear+Time+Approximation+Algorithm+for+Column+Subset+Selection+with+Local+Search&hl=en&as_sdt=0,10", "gs_version_total": 0, "email": "csu.edu.cn;psu.edu;buffalo.edu;csu.edu.cn;csu.edu.cn", "author_num": 5, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "Central South University;Pennsylvania State University;University at Buffalo", "aff_unique_dep": ";;", "aff_unique_url": "https://www.csu.edu.cn;https://www.psu.edu;https://www.buffalo.edu", "aff_unique_abbr": "CSU;PSU;UB", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Erie;Buffalo", "aff_country_unique_index": "0;1;1;0;0", "aff_country_unique": "China;United States" }, { "title": "UrbanDataLayer: A Unified Data Pipeline for Urban Science", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97837", "id": "6vFy6H4mTI", "proceeding": "", "pdf": "https://openreview.net/pdf?id=6vFy6H4mTI", "openreview": "https://openreview.net/forum?id=6vFy6H4mTI", "poster": "/media/PosterPDFs/NeurIPS%202024/97837.png?t=1733233132.6485431", "project": "", "author_site": "Yiheng Wang, Tianyu Wang, YuYing Zhang, Hongji Zhang, Haoyu Zheng, Guanjie Zheng, Linghe Kong", "tldr": "", "abstract": "The rapid progression of urbanization has generated a diverse array of urban data, facilitating significant advancements in urban science and urban computing. Current studies often work on separate problems case by case using diverse data, e.g., air quality prediction, and built-up areas classification. This fragmented approach hinders the urban research field from advancing at the pace observed in Computer Vision and Natural Language Processing, due to two primary reasons. On the one hand, the diverse data processing steps lead to the lack of large-scale benchmarks and therefore decelerate iterative methodology improvement on a single problem. On the other hand, the disparity in multi-modal data formats hinders the combination of the related modal data to stimulate more research findings. To address these challenges, we propose UrbanDataLayer (UDL), a suite of standardized data structures and pipelines for city data engineering, providing a unified data format for researchers. This allows researchers to easily build up large-scale benchmarks and combine multi-modal data, thus expediting the development of multi-modal urban foundation models. To verify the effectiveness of our work, we present four distinct urban problem tasks utilizing the proposed data layer. UrbanDataLayer aims to enhance standardization and operational efficiency within the urban science research community. The examples and source code are available at https://github.com/SJTU-CILAB/udl.", "keywords": "Urban Data;Urban Computing;Data Management;Data Fusion", "primary_area": "", "supplementary_material": "/attachment/e59dcd4d1dfbaa470ff7f3238c981721f25a43cf.pdf", "author": "Yiheng Wang;Tianyu Wang;YuYing Zhang;Hongji Zhang;Haoyu Zheng;Guanjie Zheng;Linghe Kong", "authorids": "~Yiheng_Wang2;~Tianyu_Wang13;~YuYing_Zhang2;~Hongji_Zhang1;~Haoyu_Zheng3;~Guanjie_Zheng1;~Linghe_Kong1", "gender": "F;;M;M;M;M;M", "homepage": ";https://github.com/wty500;https://github.com/sjtuzyy;https://github.com/zhj2022;https://github.com/dagounai;http://jhc.sjtu.edu.cn/~gjzheng/;https://www.cs.sjtu.edu.cn/~linghe.kong/", "dblp": ";;;;;204/3356;23/7909", "google_scholar": ";;;;;jJpqDQIAAAAJ;https://scholar.google.com.tw/citations?user=-wm2X-8AAAAJ", "orcid": "0000-0001-5844-0920;;;;;;0000-0001-9266-3044", "linkedin": ";;;https://cn.linkedin.com/in/%E9%97%B3%E6%9E%81-%E5%BC%A0-ab7b64242;;;", "or_profile": "~Yiheng_Wang2;~Tianyu_Wang13;~YuYing_Zhang2;~Hongji_Zhang1;~Haoyu_Zheng3;~Guanjie_Zheng1;~Linghe_Kong1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "position": "PhD student;Undergrad student;Undergrad student;Undergrad student;Undergrad student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nwang2024urbandatalayer,\ntitle={UrbanDataLayer: A Unified Data Pipeline for Urban Science},\nauthor={Yiheng Wang and Tianyu Wang and YuYing Zhang and Hongji Zhang and Haoyu Zheng and Guanjie Zheng and Linghe Kong},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=6vFy6H4mTI}\n}", "github": "", "reviewers": "xfSy;xojT;phN3", "pdf_size": 12085602, "rating": "6;6;7", "confidence": "5;4;4", "wc_summary_and_contributions": "110;61;103", "wc_strengths": "4;54;90", "wc_improvement": "13;3;62", "wc_limitations": "58;3;5", "wc_correctness": "58;19;13", "wc_clarity": "3;9;5", "wc_relation_to_prior_work": "52;100;10", "wc_documentation": "4;43;7", "wc_additional_feedback": "1;1;1", "wc_review": "303;293;296", "wc_reply_reviewers": "0;11;20", "wc_reply_authors": "75;72;32", "reply_reviewers": "0;1;1", "reply_authors": "3;2;2", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 91.33333333333333, 21.638443156156644 ], "wc_strengths_avg": [ 49.333333333333336, 35.264083081168515 ], "wc_improvement_avg": [ 26.0, 25.78113005022601 ], "wc_limitations_avg": [ 22.0, 25.468935326524086 ], "wc_correctness_avg": [ 30.0, 19.949937343260004 ], "wc_clarity_avg": [ 5.666666666666667, 2.494438257849294 ], "wc_relation_to_prior_work_avg": [ 54.0, 36.76955262170047 ], "wc_documentation_avg": [ 18.0, 17.72004514666935 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 297.3333333333333, 4.189935029992179 ], "wc_reply_reviewers_avg": [ 10.333333333333334, 8.178562764256865 ], "wc_reply_authors_avg": [ 59.666666666666664, 19.601587237318874 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Wx_NyNH3wQkJ:scholar.google.com/&scioq=UrbanDataLayer:+A+Unified+Data+Pipeline+for+Urban+Science&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Energy-based Epistemic Uncertainty for Graph Neural Networks", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96491", "id": "6vNPPtWH1Q", "proceeding": "", "pdf": "https://openreview.net/pdf?id=6vNPPtWH1Q", "openreview": "https://openreview.net/forum?id=6vNPPtWH1Q", "poster": "", "project": "", "author_site": "Dominik Fuchsgruber, Tom Wollschl\u00e4ger, Stephan G\u00fcnnemann", "tldr": "", "abstract": "In domains with interdependent data, such as graphs, quantifying the epistemic uncertainty of a Graph Neural Network (GNN) is challenging as uncertainty can arise at different structural scales. Existing techniques neglect this issue or only distinguish between structure-aware and structure-agnostic uncertainty without combining them into a single measure. We propose GEBM, an energy-based model (EBM) that provides high-quality uncertainty estimates by aggregating energy at different structural levels that naturally arise from graph diffusion. In contrast to logit-based EBMs, we provably induce an integrable density in the data space by regularizing the energy function. We introduce an evidential interpretation of our EBM that significantly improves the predictive robustness of the GNN. Our framework is a simple and effective post hoc method applicable to any pre-trained GNN that is sensitive to various distribution shifts. It consistently achieves the best separation of in-distribution and out-of-distribution data on 6 out of 7 anomaly types while having the best average rank over shifts on *all* datasets.", "keywords": "graph neural network;uncertainty estimation;epistemic uncertainty;energy-based model", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Dominik Fuchsgruber;Tom Wollschl\u00e4ger;Stephan G\u00fcnnemann", "authorids": "~Dominik_Fuchsgruber1;~Tom_Wollschl\u00e4ger1;~Stephan_G\u00fcnnemann1", "gender": "M;M;M", "homepage": "https://github.com/dfuchsgruber;https://www.linkedin.com/in/wollschlaeger/;http://www.daml.in.tum.de", "dblp": "377/3314.html;332/0829;43/3011", "google_scholar": "https://scholar.google.de/citations?user=K-egQS0AAAAJ;https://scholar.google.com/citations?hl=en;", "orcid": ";;", "linkedin": ";wollschlaeger/;", "or_profile": "~Dominik_Fuchsgruber1;~Tom_Wollschl\u00e4ger1;~Stephan_G\u00fcnnemann1", "aff": "Technische Universit\u00e4t M\u00fcnchen;Valence Labs powered by recursion;Technical University Munich", "aff_domain": "tum.de;valencelabs.com;tum.de", "position": "PhD student;Researcher;Professor", "bibtex": "@inproceedings{\nfuchsgruber2024energybased,\ntitle={Energy-based Epistemic Uncertainty for Graph Neural Networks},\nauthor={Dominik Fuchsgruber and Tom Wollschl{\\\"a}ger and Stephan G{\\\"u}nnemann},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=6vNPPtWH1Q}\n}", "github": "", "reviewers": "VZ1v;nGhJ;Q6TT", "pdf_size": 956538, "rating": "6;6;7", "confidence": "3;4;3", "soundness": "3;3;3", "novelty": "2;2;3", "presentation": "3;3;3", "wc_summary": "59;38;64", "wc_strengths": "71;66;105", "wc_weaknesses": "50;284;333", "wc_questions": "28;136;244", "wc_limitations": "23;1;7", "wc_review": "231;525;753", "wc_reply_reviewers": "17;60;33", "wc_reply_authors": "33;29;30", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 53.666666666666664, 11.264496832477201 ], "wc_strengths_avg": [ 80.66666666666667, 17.326921891156037 ], "wc_weaknesses_avg": [ 222.33333333333334, 123.48909083621741 ], "wc_questions_avg": [ 136.0, 88.18163074019441 ], "wc_limitations_avg": [ 10.333333333333334, 9.285592184789412 ], "wc_review_avg": [ 503.0, 213.6726468221892 ], "wc_reply_reviewers_avg": [ 36.666666666666664, 17.745108872274887 ], "wc_reply_authors_avg": [ 30.666666666666668, 1.699673171197595 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5870378394046092601&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "tum.de;valencelabs.com;tum.de", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Technische Universit\u00e4t M\u00fcnchen;Valence Labs;Technical University of Munich", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tum.de;;https://www.tum.de", "aff_unique_abbr": "TUM;;TUM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany;" }, { "title": "Fast Rates for Bandit PAC Multiclass Classification", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96490", "id": "6zOKbzjBO4", "proceeding": "", "pdf": "https://openreview.net/pdf?id=6zOKbzjBO4", "openreview": "https://openreview.net/forum?id=6zOKbzjBO4", "poster": "/media/PosterPDFs/NeurIPS%202024/96490.png?t=1733046616.3672822", "project": "", "author_site": "Liad Erez, Alon Peled-Cohen, Tomer Koren, Yishay Mansour, Shay Moran", "tldr": "", "abstract": "We study multiclass PAC learning with bandit feedback, where inputs are classified into one of $K$ possible labels and feedback is limited to whether or not the predicted labels are correct. Our main contribution is in designing a novel learning algorithm for the agnostic $(\\varepsilon,\\delta)$-PAC version of the problem, with sample complexity of $O\\big( (\\operatorname{poly}(K) + 1 / \\varepsilon^2) \n\\log (|\\mathcal{H}| / \\delta) \\big)$ for any finite hypothesis class $\\mathcal{H}$. In terms of the leading dependence on $\\varepsilon$, this improves upon existing bounds for the problem, that are of the form $O(K/\\varepsilon^2)$. We also provide an extension of this result to general classes and establish similar sample complexity bounds in which $\\log |\\mathcal{H}|$ is replaced by the Natarajan dimension.\nThis matches the optimal rate in the full-information version of the problem and resolves an open question studied by Daniely, Sabato, Ben-David, and Shalev-Shwartz (2011) who demonstrated that the multiplicative price of bandit feedback in realizable PAC learning is $\\Theta(K)$. We complement this by revealing a stark contrast with the agnostic case, where the price of bandit feedback is only $O(1)$ as $\\varepsilon \\to 0$. Our algorithm utilizes a stochastic optimization technique to minimize a log-barrier potential based on Frank-Wolfe updates for computing a low-variance exploration distribution over the hypotheses, and is made computationally efficient provided access to an ERM oracle over $\\mathcal{H}$.", "keywords": "bandit;classification;multiclass", "primary_area": "bandits", "supplementary_material": "", "author": "Liad Erez;Alon Cohen;Tomer Koren;Yishay Mansour;Shay Moran", "authorids": "~Liad_Erez1;~Alon_Cohen1;~Tomer_Koren1;~Yishay_Mansour2;~Shay_Moran1", "gender": "M;M;M;M;M", "homepage": ";https://sites.google.com/site/aloncohentechnion/;https://tomerkoren.github.io;http://www.cs.technion.ac.il/~shaymrn/;https://www.cs.tau.ac.il/~mansour/", "dblp": "298/1210;133/2021;12/10044;119/5111;m/YishayMansour", "google_scholar": ";shoYR_AAAAAJ;wGG1voYAAAAJ;kALYnggAAAAJ;OEJUgwkAAAAJ", "orcid": ";;;;0000-0001-6891-2645", "linkedin": "liad-erez-a4ab79182;;;;", "or_profile": "~Liad_Erez1;~Alon_Cohen1;~Tomer_Koren1;~Shay_Moran1;~Yishay_Mansour1", "aff": "Tel Aviv University;Google;Tel Aviv University;Google;School of Computer Science, Tel Aviv University", "aff_domain": "tau.ac.il;google.com;tau.ac.il;google.com;cs.tau.ac.il", "position": "PhD student;Researcher;Associate Professor;Visiting Faculty;Full Professor", "bibtex": "@inproceedings{\nerez2024fast,\ntitle={Fast Rates for Bandit {PAC} Multiclass Classification},\nauthor={Liad Erez and Alon Cohen and Tomer Koren and Yishay Mansour and Shay Moran},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=6zOKbzjBO4}\n}", "github": "", "reviewers": "xcWH;Lcb5;Wz7L;m8fV;TyCi", "pdf_size": 313509, "rating": "6;7;7;7;7", "confidence": "4;2;3;4;3", "soundness": "4;4;4;4;3", "novelty": "3;3;3;4;3", "presentation": "4;4;3;4;3", "wc_summary": "96;87;51;86;164", "wc_strengths": "78;77;34;87;62", "wc_weaknesses": "161;1;58;107;129", "wc_questions": "2;17;19;67;39", "wc_limitations": "1;1;1;2;2", "wc_review": "338;183;163;349;396", "wc_reply_reviewers": "16;5;11;4;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.8, 0.39999999999999997 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 3.8, 0.39999999999999997 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 3.6, 0.4898979485566356 ], "wc_summary_avg": [ 96.8, 36.95077807029237 ], "wc_strengths_avg": [ 67.6, 18.618270596379247 ], "wc_weaknesses_avg": [ 91.2, 56.193949852275026 ], "wc_questions_avg": [ 28.8, 22.435685859808252 ], "wc_limitations_avg": [ 1.4, 0.4898979485566356 ], "wc_review_avg": [ 285.8, 94.35125860315802 ], "wc_reply_reviewers_avg": [ 7.2, 5.635601121442148 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5345224838248488, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3844046311310009034&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "tau.ac.il;google.com;tau.ac.il;google.com;cs.tau.ac.il", "author_num": 5, "aff_unique_index": "0;1;0;1;0", "aff_unique_norm": "Tel Aviv University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.tau.ac.il;https://www.google.com", "aff_unique_abbr": "TAU;Google", "aff_campus_unique_index": "1;1;2", "aff_campus_unique": ";Mountain View;Tel Aviv", "aff_country_unique_index": "0;1;0;1;0", "aff_country_unique": "Israel;United States" }, { "title": "DiffGS: Functional Gaussian Splatting Diffusion", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96489", "id": "6zROYoHlcp", "proceeding": "", "pdf": "https://openreview.net/pdf?id=6zROYoHlcp", "openreview": "https://openreview.net/forum?id=6zROYoHlcp", "poster": "", "project": "", "author_site": "Junsheng Zhou, Weiqi Zhang, Yu-Shen Liu", "tldr": "", "abstract": "3D Gaussian Splatting (3DGS) has shown convincing performance in rendering speed and fidelity, yet the generation of Gaussian Splatting remains a challenge due to its discreteness and unstructured nature. In this work, we propose DiffGS, a general Gaussian generator based on latent diffusion models. DiffGS is a powerful and efficient 3D generative model which is capable of generating Gaussian primitives at arbitrary numbers for high-fidelity rendering with rasterization. The key insight is to represent Gaussian Splatting in a disentangled manner via three novel functions to model Gaussian probabilities, colors and transforms. Through the novel disentanglement of 3DGS, we represent the discrete and unstructured 3DGS with continuous Gaussian Splatting functions, where we then train a latent diffusion model with the target of generating these Gaussian Splatting functions both unconditionally and conditionally. Meanwhile, we introduce a discretization algorithm to extract Gaussians at arbitrary numbers from the generated functions via octree-guided sampling and optimization. We explore DiffGS for various tasks, including unconditional generation, conditional generation from text, image, and partial 3DGS, as well as Point-to-Gaussian generation. We believe that DiffGS provides a new direction for flexibly modeling and generating Gaussian Splatting. Project page: https://junshengzhou.github.io/DiffGS.", "keywords": "Gaussian splatting;3D generation;Latent diffusion models", "primary_area": "machine_vision", "supplementary_material": "/attachment/f1a98a60692e45a200ec46225de833673fbf8cd5.zip", "author": "Junsheng Zhou;Weiqi Zhang;Yu-Shen Liu", "authorids": "~Junsheng_Zhou3;~Weiqi_Zhang2;~Yu-Shen_Liu1", "gender": "M;M;M", "homepage": "https://junshengzhou.github.io/;https://weiqi-zhang.github.io/;https://yushen-liu.github.io/", "dblp": ";;44/2229.html", "google_scholar": "afPIrLYAAAAJ;https://scholar.google.com.hk/citations?user=sp3zrnYAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";;0000-0001-7305-1915", "linkedin": ";;", "or_profile": "~Junsheng_Zhou3;~Weiqi_Zhang2;~Yu-Shen_Liu1", "aff": "Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;mails.tsinghua.edu.cn;tsinghua.edu.cn", "position": "MS student;MS student;Associate Professor", "bibtex": "@inproceedings{\nzhou2024diffgs,\ntitle={Diff{GS}: Functional Gaussian Splatting Diffusion},\nauthor={Junsheng Zhou and Weiqi Zhang and Yu-Shen Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=6zROYoHlcp}\n}", "github": "", "reviewers": "TmLe;3Kid;6WmW;tnRu;p5pi;cAhv", "pdf_size": 9383390, "rating": "4;4;5;5;5;7", "confidence": "4;5;4;3;5;4", "soundness": "2;3;2;3;2;3", "novelty": "2;2;3;3;2;3", "presentation": "1;2;3;2;1;3", "wc_summary": "69;87;36;74;159;150", "wc_strengths": "30;26;68;65;48;47", "wc_weaknesses": "191;164;142;195;459;229", "wc_questions": "2;15;41;9;177;29", "wc_limitations": "2;12;1;1;8;8", "wc_review": "294;304;288;344;851;463", "wc_reply_reviewers": "6;0;70;100;325;0", "wc_reply_authors": "0;0;704;495;1264;0", "reply_reviewers": "1;0;1;2;2;0", "reply_authors": "1;1;3;3;4;1", "rating_avg": [ 5.0, 1.0 ], "confidence_avg": [ 4.166666666666667, 0.6871842709362768 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.0, 0.816496580927726 ], "wc_summary_avg": [ 95.83333333333333, 44.30732319706178 ], "wc_strengths_avg": [ 47.333333333333336, 15.786773648286156 ], "wc_weaknesses_avg": [ 230.0, 105.89932325877567 ], "wc_questions_avg": [ 45.5, 60.193438180585765 ], "wc_limitations_avg": [ 5.333333333333333, 4.2295258468165065 ], "wc_review_avg": [ 424.0, 200.0191657483519 ], "wc_reply_reviewers_avg": [ 83.5, 114.54802486293686 ], "wc_reply_authors_avg": [ 410.5, 470.33241790603097 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 2.1666666666666665, 1.2133516482134197 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.24253562503633294, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10672861157893430791&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "tsinghua.edu.cn;mails.tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "A Large-Scale Human-Centric Benchmark for Referring Expression Comprehension in the LMM Era", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97836", "id": "70iM5TBkN5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=70iM5TBkN5", "openreview": "https://openreview.net/forum?id=70iM5TBkN5", "poster": "/media/PosterPDFs/NeurIPS%202024/97836.png?t=1729053486.1573997", "project": "", "author_site": "Fangyun Wei, Jinjing Zhao, Kun Yan, Hongyang Zhang, Chang Xu", "tldr": "", "abstract": "Prior research in human-centric AI has primarily addressed single-modality tasks like pedestrian detection, action recognition, and pose estimation. However, the emergence of large multimodal models (LMMs) such as GPT-4V has redirected attention towards integrating language with visual content. Referring expression comprehension (REC) represents a prime example of this multimodal approach. Current human-centric REC benchmarks, typically sourced from general datasets, fall short in the LMM era due to their limitations, such as insufficient testing samples, overly concise referring expressions, and limited vocabulary, making them inadequate for evaluating the full capabilities of modern REC models. In response, we present HC-RefLoCo (Human-Centric Referring Expression Comprehension with Long Context), a benchmark that includes 13,452 images, 24,129 instances, and 44,738 detailed annotations, encompassing a vocabulary of 18,681 words. Each annotation, meticulously reviewed for accuracy, averages 93.2 words and includes topics such as appearance, human-object interaction, location, action, celebrity, and OCR. HC-RefLoCo provides a wider range of instance scales and diverse evaluation protocols, encompassing accuracy with various IoU criteria, scale-aware evaluation, and subject-specific assessments. Our experiments, which assess 24 models, highlight HC-RefLoCo\u2019s potential to advance human-centric AI by challenging contemporary REC models with comprehensive and varied data. Our benchmark, along with the evaluation code, are available at https://github.com/ZhaoJingjing713/HC-RefLoCo.", "keywords": "Referring expression comprehension", "primary_area": "", "supplementary_material": "", "author": "Fangyun Wei;Jinjing Zhao;Kun Yan;Hongyang Zhang;Chang Xu", "authorids": "~Fangyun_Wei1;~Jinjing_Zhao1;~Kun_Yan1;~Hongyang_Zhang1;~Chang_Xu4", "gender": "M;M;M;;M", "homepage": ";;https://hongyanz.github.io/;https://sydney.edu.au/engineering/about/our-people/academic-staff/c-xu.html;https://github.com/yankun-pku", "dblp": "161/2636;;23/10537-1;97/2966-2;", "google_scholar": "-ncz2s8AAAAJ;;https://scholar.google.com/citations?hl=en;N4F_3eoAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";0009-0004-5494-7299;;0000-0002-4756-0609;0000-0002-1234-6119", "linkedin": ";zhaojingjing713/;;;", "or_profile": "~Fangyun_Wei1;~Jinjing_Zhao1;~Hongyang_Zhang1;~Charles_Xu1;~Yan_Kun1", "aff": "Microsoft Research;\tMicrosoft Research Asia;School of Computer Science, University of Waterloo;University of Sydney;Peking University", "aff_domain": "microsoft.com;microsoft.com;uwaterloo.ca;sydney.eud.au;pku.edu.cn", "position": "Researcher;Intern;Assistant Professor;Associate Professor;Postdoc", "bibtex": "@inproceedings{\nwei2024a,\ntitle={A Large-Scale Human-Centric Benchmark for Referring Expression Comprehension in the {LMM} Era},\nauthor={Fangyun Wei and Jinjing Zhao and Kun Yan and Hongyang Zhang and Chang Xu},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=70iM5TBkN5}\n}", "github": "", "reviewers": "Q7MD;ysoo;b3MM", "pdf_size": 2391604, "rating": "6;7;8", "confidence": "3;2;5", "wc_summary_and_contributions": "59;67;63", "wc_strengths": "42;3;99", "wc_improvement": "47;3;33", "wc_limitations": "9;1;74", "wc_correctness": "7;14;52", "wc_clarity": "6;6;7", "wc_relation_to_prior_work": "43;1;17", "wc_documentation": "1;10;21", "wc_additional_feedback": "1;1;1", "wc_review": "215;106;367", "wc_reply_reviewers": "264;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;0;0", "reply_authors": "2;1;1", "rating_avg": [ 7.0, 0.816496580927726 ], "confidence_avg": [ 3.3333333333333335, 1.247219128924647 ], "wc_summary_and_contributions_avg": [ 63.0, 3.265986323710904 ], "wc_strengths_avg": [ 48.0, 39.42080668885405 ], "wc_improvement_avg": [ 27.666666666666668, 18.354533197248273 ], "wc_limitations_avg": [ 28.0, 32.69046751985457 ], "wc_correctness_avg": [ 24.333333333333332, 19.770910168449223 ], "wc_clarity_avg": [ 6.333333333333333, 0.4714045207910317 ], "wc_relation_to_prior_work_avg": [ 20.333333333333332, 17.30767331432956 ], "wc_documentation_avg": [ 10.666666666666666, 8.178562764256865 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 229.33333333333334, 107.03374338133851 ], "wc_reply_reviewers_avg": [ 88.0, 124.45079348883236 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.6546536707079772, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3370514042310860192&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "microsoft.com;microsoft.com;uwaterloo.ca;sydney.eud.au;pku.edu.cn", "author_num": 5, "aff_unique_index": "0;0;1;2;3", "aff_unique_norm": "Microsoft;University of Waterloo;University of Sydney;Peking University", "aff_unique_dep": "Microsoft Research;School of Computer Science;;", "aff_unique_url": "https://www.microsoft.com/en-us/research;https://uwaterloo.ca;https://www.sydney.edu.au;http://www.pku.edu.cn", "aff_unique_abbr": "MSR;UWaterloo;USYD;Peking U", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Asia;Waterloo", "aff_country_unique_index": "0;1;2;3;1", "aff_country_unique": "United States;China;Canada;Australia" }, { "title": "Offline Multitask Representation Learning for Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96488", "id": "72tRD2Mfjd", "proceeding": "", "pdf": "https://openreview.net/pdf?id=72tRD2Mfjd", "openreview": "https://openreview.net/forum?id=72tRD2Mfjd", "poster": "", "project": "", "author_site": "Haque Ishfaq, Thanh Nguyen-Tang, Songtao Feng, Raman Arora, Mengdi Wang, Ming Yin, Doina Precup", "tldr": "", "abstract": "We study offline multitask representation learning in reinforcement learning (RL), where a learner is provided with an offline dataset from different tasks that share a common representation and is asked to learn the shared representation. We theoretically investigate offline multitask low-rank RL, and propose a new algorithm called MORL for offline multitask representation learning. Furthermore, we examine downstream RL in reward-free, offline and online scenarios, where a new task is introduced to the agent that shares the same representation as the upstream offline tasks. Our theoretical results demonstrate the benefits of using the learned representation from the upstream offline task instead of directly learning the representation of the low-rank model.", "keywords": "Multitask learning;representation Learning;offline RL;provably efficient;suboptimality gap;reward-free RL", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Haque Ishfaq;Thanh Nguyen-Tang;Songtao Feng;Raman Arora;Mengdi Wang;Ming Yin;Doina Precup", "authorids": "~Haque_Ishfaq1;~Thanh_Nguyen-Tang1;~Songtao_Feng1;~Raman_Arora1;~Mengdi_Wang1;~Ming_Yin4;~Doina_Precup1", "gender": "M;M;F;M;F;M;M", "homepage": ";http://www.cs.jhu.edu/~raman/Home.html;http://mwang.princeton.edu;https://mingyin0312.github.io;http://cs.mcgill.ca/~dprecup/;https://hmishfaq.github.io/;https://thanhnguyentang.github.io/", "dblp": "217/1741;;;89/453.html;p/DoinaPrecup;https://dblp.uni-trier.de/pid/252/5022;287/5102.html", "google_scholar": "q11fVdcAAAAJ;Spe0xdkAAAAJ;;ncBRYIUAAAAJ;https://scholar.google.com.tw/citations?user=j54VcVEAAAAJ;W2QQgF8AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;0000-0001-6458-0751;;;0000-0002-1917-2190", "linkedin": ";;;;;hmishfaq/;thanhnguyentang/", "or_profile": "~Songtao_Feng1;~Raman_Arora1;~Mengdi_Wang1;~Ming_Yin4;~Doina_Precup1;~Haque_M_Ishfaq1;~Thanh_Tang_Nguyen2", "aff": "University of Florida;Johns Hopkins University;Princeton University;Princeton University;McGill University;McGill University, McGill University;Johns Hopkins University", "aff_domain": "ufl.edu;jhu.edu;princeton.edu;princeton.edu;mcgill.ca;mail.mcgill.ca;jhu.edu", "position": "Postdoc;Associate Professor;Full Professor;Postdoc;Associate Professor;PhD student;Postdoc", "bibtex": "@inproceedings{\nishfaq2024offline,\ntitle={Offline Multitask Representation Learning for Reinforcement Learning},\nauthor={Haque Ishfaq and Thanh Nguyen-Tang and Songtao Feng and Raman Arora and Mengdi Wang and Ming Yin and Doina Precup},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=72tRD2Mfjd}\n}", "github": "", "reviewers": "oHwh;Qe4K;fKn9;Z4xa", "pdf_size": 643895, "rating": "5;5;6;7", "confidence": "2;2;3;1", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "2;3;3;3", "wc_summary": "95;39;86;102", "wc_strengths": "53;15;156;31", "wc_weaknesses": "560;83;43;266", "wc_questions": "45;31;43;30", "wc_limitations": "40;6;38;12", "wc_review": "793;174;366;441", "wc_reply_reviewers": "160;20;110;59", "wc_reply_authors": "49;30;52;12", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 2.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 80.5, 24.62214450449026 ], "wc_strengths_avg": [ 63.75, 54.94258366695181 ], "wc_weaknesses_avg": [ 238.0, 204.0330855523192 ], "wc_questions_avg": [ 37.25, 6.796138609534093 ], "wc_limitations_avg": [ 24.0, 15.165750888103101 ], "wc_review_avg": [ 443.5, 224.0496596739214 ], "wc_reply_reviewers_avg": [ 87.25, 52.75118482081706 ], "wc_reply_authors_avg": [ 35.75, 16.099301227071937 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.42640143271122083, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8460817726459216811&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "ufl.edu;jhu.edu;princeton.edu;princeton.edu;mcgill.ca;mail.mcgill.ca;jhu.edu", "author_num": 7, "aff_unique_index": "0;1;2;2;3;3;1", "aff_unique_norm": "University of Florida;Johns Hopkins University;Princeton University;McGill University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.ufl.edu;https://www.jhu.edu;https://www.princeton.edu;https://www.mcgill.ca", "aff_unique_abbr": "UF;JHU;Princeton;McGill", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;1;0", "aff_country_unique": "United States;Canada" }, { "title": "PCoTTA: Continual Test-Time Adaptation for Multi-Task Point Cloud Understanding", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96487", "id": "739jAzUXk7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=739jAzUXk7", "openreview": "https://openreview.net/forum?id=739jAzUXk7", "poster": "/media/PosterPDFs/NeurIPS%202024/96487.png?t=1732268041.2832472", "project": "", "author_site": "Jincen Jiang, Qianyu Zhou, Yuhang Li, Xinkui Zhao, Meili Wang, Lizhuang Ma, Jian Chang, Jian.J Zhang, Xuequan Lu", "tldr": "", "abstract": "In this paper, we present PCoTTA, an innovative, pioneering framework for Continual Test-Time Adaptation (CoTTA) in multi-task point cloud understanding, enhancing the model's transferability towards the continually changing target domain. We introduce a multi-task setting for PCoTTA, which is practical and realistic, handling multiple tasks within one unified model during the continual adaptation. Our PCoTTA involves three key components: automatic prototype mixture (APM), Gaussian Splatted feature shifting (GSFS), and contrastive prototype repulsion (CPR). Firstly, APM is designed to automatically mix the source prototypes with the learnable prototypes with a similarity balancing factor, avoiding catastrophic forgetting. Then, GSFS dynamically shifts the testing sample toward the source domain, mitigating error accumulation in an online manner. In addition, CPR is proposed to pull the nearest learnable prototype close to the testing feature and push it away from other prototypes, making each prototype distinguishable during the adaptation. Experimental comparisons lead to a new benchmark, demonstrating PCoTTA's superiority in boosting the model's transferability towards the continually changing target domain. Our source code is available at: https://github.com/Jinec98/PCoTTA.", "keywords": "Continual Test-time Adaptation;Point Cloud Understanding", "primary_area": "machine_vision", "supplementary_material": "/attachment/fea6a2acacb5e786a20b5dcdef51748fee905000.zip", "author": "Jincen Jiang;Qianyu Zhou;Yuhang Li;Xinkui Zhao;Meili Wang;Lizhuang Ma;Jian Chang;Jian Jun Zhang;Xuequan Lu", "authorids": "~Jincen_Jiang1;~Qianyu_Zhou1;~Yuhang_Li8;~Xinkui_Zhao1;~Meili_Wang1;~Lizhuang_Ma1;~Jian_Chang1;~Jian_Jun_Zhang2;~Xuequan_Lu1", "gender": "M;M;F;M;F;M;;M;", "homepage": "https://jincenjiang.com;https://qianyuzqy.github.io/;;https://person.zju.edu.cn/en/NB22088;https://cie.nwsuaf.edu.cn/szdw/js/2012110003/index.htm;http://english.seiee.sjtu.edu.cn/english/detail/841_680.htm;;https://jzhang.bournemouth.ac.uk;", "dblp": "238/1691;232/4830-1;;135/5118;119/6259;10/4950;;;", "google_scholar": "https://scholar.google.com/citations?hl=en;KHg04fkAAAAJ;https://scholar.google.com/citations?hl=en;;https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=yd58y_0AAAAJ;;;", "orcid": ";0000-0002-5331-050X;0000-0002-3827-1522;0000-0002-1115-5652;;;;;", "linkedin": ";;;;;;;;", "or_profile": "~Jincen_Jiang1;~Qianyu_Zhou1;~Yuhang_Li8;~Xinkui_Zhao1;~Meili_Wang1;~Lizhuang_Ma1;~Jian_Chang1;~Jian_Jun_Zhang2;~Xuequan_Lu1", "aff": "Bournemouth University;Shanghai Jiao Tong University,;Shanghai University;Zhejiang University;Northwest A&F University;Dept. of Computer Sci. & Eng., Shanghai Jiao Tong University;;Bournemouth University;", "aff_domain": "bournemouth.ac.uk;sjtu.edu.cn;shu.edu.cn;zju.edu.cn;nwafu.edu.cn;cs.sjtu.edu.cn;;bournemouth.ac.uk;", "position": "PhD student;PhD student;PhD student;Full Professor;Full Professor;Full Professor;;Full Professor;", "bibtex": "@inproceedings{\njiang2024pcotta,\ntitle={{PC}o{TTA}: Continual Test-Time Adaptation for Multi-Task Point Cloud Understanding},\nauthor={Jincen Jiang and Qianyu Zhou and Yuhang Li and Xinkui Zhao and Meili Wang and Lizhuang Ma and Jian Chang and Jian Jun Zhang and Xuequan Lu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=739jAzUXk7}\n}", "github": "", "reviewers": "5Vdz;ni1v;SkYG;ynhY", "pdf_size": 7032298, "rating": "5;5;5;6", "confidence": "5;3;5;3", "soundness": "2;3;2;3", "novelty": "2;3;2;3", "presentation": "2;3;3;3", "wc_summary": "26;78;57;86", "wc_strengths": "24;51;57;103", "wc_weaknesses": "32;109;243;119", "wc_questions": "219;6;17;14", "wc_limitations": "72;7;1;4", "wc_review": "373;251;375;326", "wc_reply_reviewers": "90;65;930;9", "wc_reply_authors": "1485;123;2827;21", "reply_reviewers": "1;1;7;1", "reply_authors": "5;3;8;2", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 1.0 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 61.75, 23.19886850689059 ], "wc_strengths_avg": [ 58.75, 28.411045387313717 ], "wc_weaknesses_avg": [ 125.75, 75.60216597426293 ], "wc_questions_avg": [ 64.0, 89.57957356451303 ], "wc_limitations_avg": [ 21.0, 29.52117883825102 ], "wc_review_avg": [ 331.25, 50.310908359917335 ], "wc_reply_reviewers_avg": [ 273.5, 380.1634516888755 ], "wc_reply_authors_avg": [ 1114.0, 1145.506438218485 ], "reply_reviewers_avg": [ 2.5, 2.598076211353316 ], "reply_authors_avg": [ 4.5, 2.29128784747792 ], "replies_avg": [ 34, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=302952734030865278&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "bournemouth.ac.uk;sjtu.edu.cn;shu.edu.cn;zju.edu.cn;nwafu.edu.cn;cs.sjtu.edu.cn;;bournemouth.ac.uk;", "author_num": 9, "aff_unique_index": "0;1;2;3;4;1;0", "aff_unique_norm": "Bournemouth University;Shanghai Jiao Tong University;Shanghai University;Zhejiang University;Northwest A&F University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.bournemouth.ac.uk;https://www.sjtu.edu.cn;https://www.shu.edu.cn;https://www.zju.edu.cn;https://www.nwsuaf.edu.cn", "aff_unique_abbr": "BU;SJTU;SHU;ZJU;NWSUAF", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1;1;0", "aff_country_unique": "United Kingdom;China" }, { "title": "Sample-Efficient Private Learning of Mixtures of Gaussians", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96486", "id": "74B6qX62vW", "proceeding": "", "pdf": "https://openreview.net/pdf?id=74B6qX62vW", "openreview": "https://openreview.net/forum?id=74B6qX62vW", "poster": "/media/PosterPDFs/NeurIPS%202024/96486.png?t=1733939484.1520393", "project": "", "author_site": "Hassan Ashtiani, Mahbod Majid, Shyam Narayanan", "tldr": "", "abstract": "We study the problem of learning mixtures of Gaussians with approximate differential privacy. We prove that roughly $kd^2 + k^{1.5} d^{1.75} + k^2 d$ samples suffice to learn a mixture of $k$ arbitrary $d$-dimensional Gaussians up to low total variation distance, with differential privacy. Our work improves over the previous best result (which required roughly $k^2 d^4$ samples) and is provably optimal when $d$ is much larger than $k^2$. Moreover, we give the first optimal bound for privately learning mixtures of $k$ univariate (i.e., $1$-dimensional) Gaussians. Importantly, we show that the sample complexity for learning mixtures of univariate Gaussians is linear in the number of components $k$, whereas the previous best sample complexity was quadratic in $k$. Our algorithms utilize various techniques, including the inverse sensitivity mechanism, sample compression for distributions, and methods for bounding volumes of sumsets.", "keywords": "Differential Privacy;Density Estimation;Mixtures of Gaussians;Sample Complexity", "primary_area": "privacy", "supplementary_material": "", "author": "Hassan Ashtiani;Mahbod Majid;Shyam Narayanan", "authorids": "~Hassan_Ashtiani1;~Mahbod_Majid1;~Shyam_Narayanan1", "gender": "M;M;M", "homepage": "https://www.cas.mcmaster.ca/ashtiani/;https://www.mahbodmajid.com/;https://sites.google.com/view/shyamnarayanan/home", "dblp": "164/5733;307/5441;222/2805", "google_scholar": ";https://scholar.google.com/citations?hl=en;CTT44Y0AAAAJ", "orcid": ";0000-0001-9304-2872;", "linkedin": ";;", "or_profile": "~Hassan_Ashtiani1;~Mahbod_Majid1;~Shyam_Narayanan1", "aff": "McMaster University;Carnegie Mellon University;Massachusetts Institute of Technology", "aff_domain": "mcmaster.ca;cmu.edu;mit.edu", "position": "Assistant Professor;PhD student;PhD student", "bibtex": "@inproceedings{\nashtiani2024sampleefficient,\ntitle={Sample-Efficient Private Learning of Mixtures of Gaussians},\nauthor={Hassan Ashtiani and Mahbod Majid and Shyam Narayanan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=74B6qX62vW}\n}", "github": "", "reviewers": "cTn5;sFBV;9BbN;u11i", "pdf_size": 651008, "rating": "6;6;6;7", "confidence": "3;3;3;3", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;2;3;4", "wc_summary": "46;75;272;245", "wc_strengths": "92;37;42;100", "wc_weaknesses": "34;83;226;66", "wc_questions": "404;30;32;18", "wc_limitations": "1;1;1;2", "wc_review": "577;226;573;431", "wc_reply_reviewers": "20;8;31;60", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 159.5, 99.9862490545575 ], "wc_strengths_avg": [ 67.75, 28.44622119016865 ], "wc_weaknesses_avg": [ 102.25, 73.58116267089017 ], "wc_questions_avg": [ 121.0, 163.47782724271815 ], "wc_limitations_avg": [ 1.25, 0.4330127018922193 ], "wc_review_avg": [ 451.75, 142.98841736308574 ], "wc_reply_reviewers_avg": [ 29.75, 19.266226926930972 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:b5VIGrTORzoJ:scholar.google.com/&scioq=Sample-Efficient+Private+Learning+of+Mixtures+of+Gaussians&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": "mcmaster.ca;cmu.edu;mit.edu", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "McMaster University;Carnegie Mellon University;Massachusetts Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.mcmaster.ca;https://www.cmu.edu;https://web.mit.edu", "aff_unique_abbr": "McMaster;CMU;MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Canada;United States" }, { "title": "Diffusion Policies Creating a Trust Region for Offline Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96485", "id": "74c9EOng9C", "proceeding": "", "pdf": "https://openreview.net/pdf?id=74c9EOng9C", "openreview": "https://openreview.net/forum?id=74c9EOng9C", "poster": "/media/PosterPDFs/NeurIPS%202024/96485.png?t=1731756873.9104578", "project": "", "author_site": "Tianyu Chen, Zhendong Wang, Mingyuan Zhou", "tldr": "", "abstract": "Offline reinforcement learning (RL) leverages pre-collected datasets to train optimal policies. Diffusion Q-Learning (DQL), introducing diffusion models as a powerful and expressive policy class, significantly boosts the performance of offline RL. However, its reliance on iterative denoising sampling to generate actions slows down both training and inference. While several recent attempts have tried to accelerate diffusion-QL, the improvement in training and/or inference speed often results in degraded performance. In this paper, we introduce a dual policy approach, Diffusion Trusted Q-Learning (DTQL), which comprises a diffusion policy for pure behavior cloning and a practical one-step policy. We bridge the two polices by a newly introduced diffusion trust region loss. The diffusion policy maintains expressiveness, while the trust region loss directs the one-step policy to explore freely and seek modes within the region defined by the diffusion policy. DTQL eliminates the need for iterative denoising sampling during both training and inference, making it remarkably computationally efficient. We evaluate its effectiveness and algorithmic characteristics against popular Kullback-Leibler (KL) based distillation methods in 2D bandit scenarios and gym tasks. We then show that DTQL could not only outperform other methods on the majority of the D4RL benchmark tasks but also demonstrate efficiency in training and inference speeds. The PyTorch implementation is available at https://github.com/TianyuCodings/Diffusion_Trusted_Q_Learning.", "keywords": "offline RL;diffusion models;behavior cloning;policy regularization;Q-learning", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/66a57db6a5155a91643f93917118cf21f9b5a82b.zip", "author": "Tianyu Chen;Zhendong Wang;Mingyuan Zhou", "authorids": "~Tianyu_Chen3;~Zhendong_Wang1;~Mingyuan_Zhou1", "gender": "M;M;M", "homepage": ";https://zhendong-wang.github.io/;http://mingyuanzhou.github.io", "dblp": ";;", "google_scholar": ";lRiIjhcAAAAJ;LXwCIisAAAAJ", "orcid": ";;", "linkedin": "tianyu-chen-1a056a160/;;", "or_profile": "~Tianyu_Chen3;~Zhendong_Wang1;~Mingyuan_Zhou1", "aff": "University of Texas at Austin;University of Texas at Austin;Google", "aff_domain": "utexas.edu;utexas.edu;google.com", "position": "PhD student;PhD student;Researcher", "bibtex": "@inproceedings{\nchen2024diffusion,\ntitle={Diffusion Policies Creating a Trust Region for Offline Reinforcement Learning},\nauthor={Tianyu Chen and Zhendong Wang and Mingyuan Zhou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=74c9EOng9C}\n}", "github": "", "reviewers": "BozX;Px6z;R2od;Rh1M", "pdf_size": 6280615, "rating": "4;4;6;7", "confidence": "4;3;3;4", "soundness": "1;2;3;3", "novelty": "2;2;3;3", "presentation": "1;2;3;2", "wc_summary": "94;75;63;62", "wc_strengths": "45;39;99;45", "wc_weaknesses": "727;134;49;89", "wc_questions": "58;63;104;62", "wc_limitations": "1;135;13;52", "wc_review": "925;446;328;310", "wc_reply_reviewers": "40;43;64;20", "wc_reply_authors": "131;104;23;33", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 73.5, 12.893796958227627 ], "wc_strengths_avg": [ 57.0, 24.372115213907882 ], "wc_weaknesses_avg": [ 249.75, 277.17627513912515 ], "wc_questions_avg": [ 71.75, 18.713297411199342 ], "wc_limitations_avg": [ 50.25, 52.437462753264484 ], "wc_review_avg": [ 502.25, 249.60205828478257 ], "wc_reply_reviewers_avg": [ 41.75, 15.594470173750693 ], "wc_reply_authors_avg": [ 72.75, 45.89321845327477 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.19245008972987526, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7851150253791752509&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "utexas.edu;utexas.edu;google.com", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Texas at Austin;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.utexas.edu;https://www.google.com", "aff_unique_abbr": "UT Austin;Google", "aff_campus_unique_index": "0;0;1", "aff_campus_unique": "Austin;Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "CLIPAway: Harmonizing focused embeddings for removing objects via diffusion models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96484", "id": "76CZrhbMoo", "proceeding": "", "pdf": "https://openreview.net/pdf?id=76CZrhbMoo", "openreview": "https://openreview.net/forum?id=76CZrhbMoo", "poster": "/media/PosterPDFs/NeurIPS%202024/96484.png?t=1731691832.5001037", "project": "", "author_site": "Yi\u011fit Ekin, Ahmet Burak Yildirim, Erdem Eren \u00c7a\u011flar, Aykut Erdem, Erkut Erdem, Aysegul Dundar", "tldr": "", "abstract": "Advanced image editing techniques, particularly inpainting, are essential for seamlessly removing unwanted elements while preserving visual integrity. Traditional GAN-based methods have achieved notable success, but recent advancements in diffusion models have produced superior results due to their training on large-scale datasets, enabling the generation of remarkably realistic inpainted images.\nDespite their strengths, diffusion models often struggle with object removal tasks without explicit guidance, leading to unintended hallucinations of the removed object. To address this issue, we introduce CLIPAway, a novel approach leveraging CLIP embeddings to focus on background regions while excluding foreground elements. CLIPAway enhances inpainting accuracy and quality by identifying embeddings that prioritize the background, thus achieving seamless object removal. Unlike other methods that rely on specialized training datasets or costly manual annotations, CLIPAway provides a flexible, plug-and-play solution compatible with various diffusion-based inpainting techniques.", "keywords": "Object Removal;Inpainting;Diffusion models", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Yi\u011fit Ekin;Ahmet Burak Yildirim;Erdem Eren Caglar;Aykut Erdem;Erkut Erdem;Aysegul Dundar", "authorids": "~Yi\u011fit_Ekin1;~Ahmet_Burak_Yildirim1;~Erdem_Eren_Caglar1;~Aykut_Erdem1;~Erkut_Erdem1;~Aysegul_Dundar1", "gender": "M;M;M;M;M;", "homepage": "https://yigitekin.github.io/personal-webpage/;http://www.abyildirim.com;;https://aykuterdem.github.io;https://web.cs.hacettepe.edu.tr/~erkut;", "dblp": "380/0080;;;04/1832;79/6569;119/4855", "google_scholar": "rvau0IIAAAAJ;;;-xA1_OAAAAAJ;https://scholar.google.com.tr/citations?user=eALwl74AAAAJ;pvu770UAAAAJ", "orcid": "0009-0006-4969-608X;;;0000-0002-6280-8422;;", "linkedin": "Yigit-Ekin;;eecaglar/;;;", "or_profile": "~Yi\u011fit_Ekin1;~Ahmet_Burak_Yildirim1;~Erdem_Eren_Caglar1;~Aykut_Erdem1;~Erkut_Erdem1;~Aysegul_Dundar1", "aff": "Bilkent University, Bilkent University;Bilkent University;Bilkent University;Ko\u00e7 University;Hacettepe University;NVIDIA", "aff_domain": "cs.bilkent.edu.tr;bilkent.edu.tr;bilkent.edu.tr;ku.edu.tr;hacettepe.edu.tr;nvidia.com", "position": "MS student;MS student;Undergrad student;Associate Professor;Full Professor;Deep Learning Applied Researcher", "bibtex": "@inproceedings{\nekin2024clipaway,\ntitle={{CLIPA}way: Harmonizing focused embeddings for removing objects via diffusion models},\nauthor={Yi{\\u{g}}it Ekin and Ahmet Burak Yildirim and Erdem Eren Caglar and Aykut Erdem and Erkut Erdem and Aysegul Dundar},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=76CZrhbMoo}\n}", "github": "", "reviewers": "hrfv;GcHN;FKYu;6DqA", "pdf_size": 42605610, "rating": "5;7;7;7", "confidence": "3;4;4;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;2;4", "wc_summary": "107;78;66;81", "wc_strengths": "96;126;95;91", "wc_weaknesses": "133;86;117;149", "wc_questions": "3;15;57;83", "wc_limitations": "1;6;2;13", "wc_review": "340;311;337;417", "wc_reply_reviewers": "46;35;39;152", "wc_reply_authors": "32;34;52;54", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 83.0, 14.949916387726054 ], "wc_strengths_avg": [ 102.0, 13.982131454109563 ], "wc_weaknesses_avg": [ 121.25, 23.284920012746447 ], "wc_questions_avg": [ 39.5, 32.13642792844283 ], "wc_limitations_avg": [ 5.5, 4.716990566028302 ], "wc_review_avg": [ 351.25, 39.60034722069997 ], "wc_reply_reviewers_avg": [ 68.0, 48.65696250281145 ], "wc_reply_authors_avg": [ 43.0, 10.04987562112089 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7530668418996109002&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "cs.bilkent.edu.tr;bilkent.edu.tr;bilkent.edu.tr;ku.edu.tr;hacettepe.edu.tr;nvidia.com", "author_num": 6, "aff_unique_index": "0;0;0;1;2;3", "aff_unique_norm": "Bilkent University;Ko\u00e7 University;Hacettepe University;NVIDIA", "aff_unique_dep": ";;;NVIDIA Corporation", "aff_unique_url": "https://www.bilkent.edu.tr;https://www.ku.edu.tr;https://www.hacettepe.edu.tr;https://www.nvidia.com", "aff_unique_abbr": "Bilkent;Ko\u00e7;Hacettepe;NVIDIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;1", "aff_country_unique": "T\u00fcrkiye;United States" }, { "title": "Improved Particle Approximation Error for Mean Field Neural Networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96483", "id": "76NKidadct", "proceeding": "", "pdf": "https://openreview.net/pdf?id=76NKidadct", "openreview": "https://openreview.net/forum?id=76NKidadct", "poster": "/media/PosterPDFs/NeurIPS%202024/96483.png?t=1733485748.0679793", "project": "", "tldr": "", "abstract": "Mean-field Langevin dynamics (MFLD) minimizes an entropy-regularized nonlinear convex functional defined over the space of probability distributions. MFLD has gained attention due to its connection with noisy gradient descent for mean-field two-layer neural networks. Unlike standard Langevin dynamics, the nonlinearity of the objective functional induces particle interactions, necessitating multiple particles to approximate the dynamics in a finite-particle setting. Recent works (Chen et al., 2022; Suzuki et al., 2023b) have demonstrated the uniform-in-time propagation of chaos for MFLD, showing that the gap between the particle system and its mean-field limit uniformly shrinks over time as the number of particles increases. In this work, we improve the dependence on logarithmic Sobolev inequality (LSI) constants in their particle approximation errors, which can exponentially deteriorate with the regularization coefficient. Specifically, we establish an LSI-constant-free particle approximation error concerning the objective gap by leveraging the problem structure in risk minimization. As the application, we demonstrate improved convergence of MFLD, sampling guarantee for the mean-field stationary distribution, and uniform-in-time Wasserstein propagation of chaos in terms of particle complexity.", "keywords": "mean field Langevin dynamics;mean field neural networks;propagation of chaos", "primary_area": "optimization", "supplementary_material": "", "author": "Atsushi Nitanda", "authorids": "~Atsushi_Nitanda1", "gender": "M", "homepage": "https://sites.google.com/site/atsushinitanda", "dblp": "155/1884", "google_scholar": "https://scholar.google.co.jp/citations?user=LyVvaf8AAAAJ", "orcid": "", "linkedin": "", "or_profile": "~Atsushi_Nitanda1", "aff": "A*STAR", "aff_domain": "a-star.edu.sg", "position": "Principal Researcher", "bibtex": "@inproceedings{\nnitanda2024improved,\ntitle={Improved Particle Approximation Error for Mean Field Neural Networks},\nauthor={Atsushi Nitanda},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=76NKidadct}\n}", "github": "", "reviewers": "Hk29;hVSM;4H1M;FiBA", "pdf_size": 379558, "rating": "6;6;7;7", "confidence": "4;4;4;3", "soundness": "4;2;3;4", "novelty": "2;3;3;3", "presentation": "2;3;3;4", "wc_summary": "61;65;84;75", "wc_strengths": "61;38;136;70", "wc_weaknesses": "170;113;53;63", "wc_questions": "39;140;168;6", "wc_limitations": "4;61;68;31", "wc_review": "335;417;509;245", "wc_reply_reviewers": "14;28;20;14", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 71.25, 8.954747344286158 ], "wc_strengths_avg": [ 76.25, 36.41685736029401 ], "wc_weaknesses_avg": [ 99.75, 46.493951219486604 ], "wc_questions_avg": [ 88.25, 67.50694408725668 ], "wc_limitations_avg": [ 41.0, 25.485289874749316 ], "wc_review_avg": [ 376.5, 97.73817063972498 ], "wc_reply_reviewers_avg": [ 19.0, 5.744562646538029 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10392842767448742470&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "a-star.edu.sg", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "Agency for Science, Technology and Research", "aff_unique_dep": "", "aff_unique_url": "https://www.a-star.edu.sg", "aff_unique_abbr": "A*STAR", "aff_country_unique_index": "0", "aff_country_unique": "Singapore" }, { "title": "Language Models as Zero-shot Lossless Gradient Compressors: Towards General Neural Parameter Prior Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96482", "id": "77kCJzvpOa", "proceeding": "", "pdf": "https://openreview.net/pdf?id=77kCJzvpOa", "openreview": "https://openreview.net/forum?id=77kCJzvpOa", "poster": "/media/PosterPDFs/NeurIPS%202024/96482.png?t=1731757119.1929436", "project": "", "author_site": "Hui-Po Wang, Mario Fritz", "tldr": "", "abstract": "Despite the widespread use of statistical prior models in various fields, such models for neural network gradients have long been overlooked. The inherent challenge stems from their high-dimensional structures and complex interdependencies, which complicate effective modeling. In this work, we demonstrate the potential of large language models (LLMs) to act as gradient priors in a zero-shot setting. We examine the property by considering lossless gradient compression -- a critical application in distributed learning -- that depends heavily on precise probability modeling. To achieve this, we introduce LM-GC, a novel method that integrates LLMs with arithmetic coding. Our technique converts plain gradients into text-like formats, enhancing token efficiency by up to 38 times compared to their plain representations. We ensure that this data conversion maintains a close alignment with the structure of plain gradients and the symbols commonly recognized by LLMs. Our experiments indicate that LM-GC surpasses existing state-of-the-art lossless compression methods, improving compression rates by 10% up to 17.2% across various datasets and architectures. Additionally, our approach shows promising compatibility with lossy compression techniques such as quantization and sparsification. These findings highlight the significant potential of LLMs as a model for effectively handling gradients. Code is available at https://github.com/hui-po-wang/LM-GC.", "keywords": "large-scale language models;lossless gradient compression", "primary_area": "generative_models", "supplementary_material": "", "author": "Hui-Po Wang;Mario Fritz", "authorids": "~Hui-Po_Wang1;~Mario_Fritz1", "gender": "M;M", "homepage": "https://hui-po-wang.github.io/;https://cispa.saarland/group/fritz/", "dblp": "237/0049;", "google_scholar": "UAnfs8UAAAAJ;https://scholar.google.de/citations?user=4V1nNm4AAAAJ", "orcid": ";", "linkedin": "hui-po-wang-7a0158137/;", "or_profile": "~Hui-Po_Wang1;~Mario_Fritz1", "aff": "CISPA Helmholtz Center for Information Security;Saarland University", "aff_domain": "cispa.de;uni-saarland.de", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nwang2024language,\ntitle={Language Models as Zero-shot Lossless Gradient Compressors: Towards General Neural Parameter Prior Models},\nauthor={Hui-Po Wang and Mario Fritz},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=77kCJzvpOa}\n}", "github": "", "reviewers": "KkqK;Br5V;UCGG;Nmkh", "pdf_size": 1080664, "rating": "5;5;6;7", "confidence": "3;3;3;2", "soundness": "1;3;4;4", "novelty": "2;3;3;4", "presentation": "3;2;3;4", "wc_summary": "59;46;87;41", "wc_strengths": "55;84;83;40", "wc_weaknesses": "168;84;53;38", "wc_questions": "16;5;94;37", "wc_limitations": "15;1;3;19", "wc_review": "313;220;320;175", "wc_reply_reviewers": "111;0;0;0", "wc_reply_authors": "22;0;0;0", "reply_reviewers": "1;0;0;0", "reply_authors": "2;1;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 1.224744871391589 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 58.25, 17.851820635442202 ], "wc_strengths_avg": [ 65.5, 18.76832437912346 ], "wc_weaknesses_avg": [ 85.75, 50.30096917555367 ], "wc_questions_avg": [ 38.0, 34.31471987354698 ], "wc_limitations_avg": [ 9.5, 7.664854858377946 ], "wc_review_avg": [ 257.0, 61.64008436074695 ], "wc_reply_reviewers_avg": [ 27.75, 48.064409910036346 ], "wc_reply_authors_avg": [ 5.5, 9.526279441628825 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2733785830343117837&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "cispa.de;uni-saarland.de", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "CISPA Helmholtz Center for Information Security;Saarland University", "aff_unique_dep": ";", "aff_unique_url": "https://www.cispa.de/;https://www.uni-saarland.de", "aff_unique_abbr": "CISPA;UdS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "title": "An Efficient High-dimensional Gradient Estimator for Stochastic Differential Equations", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96481", "id": "780uXnA4wN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=780uXnA4wN", "openreview": "https://openreview.net/forum?id=780uXnA4wN", "poster": "/media/PosterPDFs/NeurIPS%202024/96481.png?t=1730254363.979241", "project": "", "author_site": "Shengbo Wang, Jose Blanchet, Peter W Glynn", "tldr": "", "abstract": "Overparameterized stochastic differential equation (SDE) models have achieved remarkable success in various complex environments, such as PDE-constrained optimization, stochastic control and reinforcement learning, financial engineering, and neural SDEs. These models often feature system evolution coefficients that are parameterized by a high-dimensional vector $\\theta \\in \\mathbb{R}^n$, aiming to optimize expectations of the SDE, such as a value function, through stochastic gradient ascent. Consequently, designing efficient gradient estimators for which the computational complexity scales well with $n$ is of significant interest. This paper introduces a novel unbiased stochastic gradient estimator\u2014the generator gradient estimator\u2014for which the computation time remains stable in $n$. In addition to establishing the validity of our methodology for general SDEs with jumps, we also perform numerical experiments that test our estimator in linear-quadratic control problems parameterized by high-dimensional neural networks. The results show a significant improvement in efficiency compared to the widely used pathwise differentiation method: Our estimator achieves near-constant computation times, increasingly outperforms its counterpart as $n$ increases, and does so without compromising estimation variance. These empirical findings highlight the potential of our proposed methodology for optimizing SDEs in contemporary applications.", "keywords": "derivative estimation;gradient estimation;sensitivity analysis;stochastic differential equations;jump diffusions;neural SDEs", "primary_area": "probabilistic_methods", "supplementary_material": "/attachment/4d7efe4d303603f0576746ced5247af072339d4f.zip", "author": "Shengbo Wang;Jose Blanchet;Peter Glynn", "authorids": "~Shengbo_Wang1;~Jose_Blanchet1;~Peter_Glynn2", "gender": "M;M;M", "homepage": ";https://web.stanford.edu/~jblanche/;https://web.stanford.edu/~glynn/", "dblp": ";75/5093.html;", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;https://scholar.google.co.in/citations?user=O24CcQQAAAAJ;", "orcid": ";;", "linkedin": ";jose-blanchet;", "or_profile": "~Shengbo_Wang1;~Jose_Blanchet1;~Peter_Glynn2", "aff": "Stanford University;Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu", "position": "PhD student;Professor;Full Professor", "bibtex": "@inproceedings{\nwang2024an,\ntitle={An Efficient High-dimensional Gradient Estimator for Stochastic Differential Equations},\nauthor={Shengbo Wang and Jose Blanchet and Peter Glynn},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=780uXnA4wN}\n}", "github": "", "reviewers": "5TVX;Eiq9;rGnk", "pdf_size": 3308067, "rating": "4;7;7", "confidence": "4;4;4", "soundness": "4;4;4", "novelty": "3;4;3", "presentation": "2;4;4", "wc_summary": "123;53;105", "wc_strengths": "60;73;31", "wc_weaknesses": "245;75;117", "wc_questions": "323;26;25", "wc_limitations": "14;32;18", "wc_review": "765;259;296", "wc_reply_reviewers": "227;11;34", "wc_reply_authors": "690;0;0", "reply_reviewers": "2;1;1", "reply_authors": "2;1;1", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 4.0, 0.0 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.9428090415820634 ], "wc_summary_avg": [ 93.66666666666667, 29.67977238606942 ], "wc_strengths_avg": [ 54.666666666666664, 17.55625877635159 ], "wc_weaknesses_avg": [ 145.66666666666666, 72.30183646046314 ], "wc_questions_avg": [ 124.66666666666667, 140.24343914145226 ], "wc_limitations_avg": [ 21.333333333333332, 7.71722460186015 ], "wc_review_avg": [ 440.0, 230.3055940846133 ], "wc_reply_reviewers_avg": [ 90.66666666666667, 96.85843048261496 ], "wc_reply_authors_avg": [ 230.0, 325.2691193458119 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4397917366114334556&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "stanford.edu;stanford.edu;stanford.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "DataStealing: Steal Data from Diffusion Models in Federated Learning with Multiple Trojans", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96480", "id": "792txRlKit", "proceeding": "", "pdf": "https://openreview.net/pdf?id=792txRlKit", "openreview": "https://openreview.net/forum?id=792txRlKit", "poster": "/media/PosterPDFs/NeurIPS%202024/96480.png?t=1731757267.6225512", "project": "", "author_site": "Yuan Gan, Jiaxu Miao, Yi Yang", "tldr": "", "abstract": "Federated Learning (FL) is commonly used to collaboratively train models with privacy preservation. In this paper, we found out that the popular diffusion models have introduced a new vulnerability to FL, which brings serious privacy threats. Despite stringent data management measures, attackers can steal massive private data from local clients through multiple Trojans, which control generative behaviors with multiple triggers. We refer to the new task as ${\\bf\\textit{DataStealing}}$ and demonstrate that the attacker can achieve the purpose based on our proposed Combinatorial Triggers (ComboTs) in a vanilla FL system. However, advanced distance-based FL defenses are still effective in filtering the malicious update according to the distances between each local update. Hence, we propose an Adaptive Scale Critical Parameters (AdaSCP) attack to circumvent the defenses and seamlessly incorporate malicious updates into the global model. Specifically, AdaSCP evaluates the importance of parameters with the gradients in dominant timesteps of the diffusion model. Subsequently, it adaptively seeks the optimal scale factor and magnifies critical parameter updates before uploading to the server. As a result, the malicious update becomes similar to the benign update, making it difficult for distance-based defenses to identify. Extensive experiments reveal the risk of leaking thousands of images in training diffusion models with FL. Moreover, these experiments demonstrate the effectiveness of AdaSCP in defeating advanced distance-based defenses. We hope this work will attract more attention from the FL community to the critical privacy security issues of Diffusion Models. Code: https://github.com/yuangan/DataStealing.", "keywords": "Federated Learning;Diffusion Models;DataStealing;Multiple Trojans;Adaptive Scale", "primary_area": "privacy", "supplementary_material": "", "author": "Yuan Gan;Jiaxu Miao;Yi Yang", "authorids": "~Yuan_Gan1;~Jiaxu_Miao2;~Yi_Yang22", "gender": "M;M;M", "homepage": ";;https://person.zju.edu.cn/yiyang", "dblp": "55/1858;259/5073;33/4854-1.html", "google_scholar": "e6tKXQEAAAAJ;kQ-FWd8AAAAJ;RMSuNFwAAAAJ", "orcid": ";0000-0002-4238-8475;", "linkedin": ";;", "or_profile": "~Yuan_Gan1;~Jiaxu_Miao2;~Yi_Yang22", "aff": "Zhejiang University;Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn;zju.edu.cn", "position": "PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\ngan2024datastealing,\ntitle={DataStealing: Steal Data from Diffusion Models in Federated Learning with Multiple Trojans},\nauthor={Yuan Gan and Jiaxu Miao and Yi Yang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=792txRlKit}\n}", "github": "", "reviewers": "vH6J;CPaA;H1Y5;wpRh", "pdf_size": 1781803, "rating": "6;6;6;7", "confidence": "5;4;3;3", "soundness": "3;3;2;3", "novelty": "3;3;2;3", "presentation": "3;3;3;3", "wc_summary": "105;250;66;103", "wc_strengths": "136;493;26;93", "wc_weaknesses": "187;554;9;76", "wc_questions": "5;459;305;1", "wc_limitations": "5;501;4;15", "wc_review": "438;2257;410;288", "wc_reply_reviewers": "0;20;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 131.0, 70.43791592601247 ], "wc_strengths_avg": [ 187.0, 180.96546631885323 ], "wc_weaknesses_avg": [ 206.5, 210.45961607871473 ], "wc_questions_avg": [ 192.5, 197.1718793337427 ], "wc_limitations_avg": [ 131.25, 213.51858818379256 ], "wc_review_avg": [ 848.25, 815.2951536100285 ], "wc_reply_reviewers_avg": [ 5.0, 8.660254037844387 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:MKFOr2fEPI8J:scholar.google.com/&scioq=DataStealing:+Steal+Data+from+Diffusion+Models+in+Federated+Learning+with+Multiple+Trojans&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "zju.edu.cn;zju.edu.cn;zju.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Zhejiang University", "aff_unique_dep": "", "aff_unique_url": "https://www.zju.edu.cn", "aff_unique_abbr": "ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Zero-Shot Reinforcement Learning from Low Quality Data", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96479", "id": "79eWvkLjib", "proceeding": "", "pdf": "https://openreview.net/pdf?id=79eWvkLjib", "openreview": "https://openreview.net/forum?id=79eWvkLjib", "poster": "/media/PosterPDFs/NeurIPS%202024/96479.png?t=1733168272.528061", "project": "", "author_site": "Scott Jeen, Tom Bewley, Jonathan Cullen", "tldr": "", "abstract": "Zero-shot reinforcement learning (RL) promises to provide agents that can perform _any_ task in an environment after an offline, reward-free pre-training phase. Methods leveraging successor measures and successor features have shown strong performance in this setting, but require access to large heterogenous datasets for pre-training which cannot be expected for most real problems. Here, we explore how the performance of zero-shot RL methods degrades when trained on small homogeneous datasets, and propose fixes inspired by _conservatism_, a well-established feature of performant single-task offline RL algorithms. We evaluate our proposals across various datasets, domains and tasks, and show that conservative zero-shot RL algorithms outperform their non-conservative counterparts on low quality datasets, and perform no worse on high quality datasets. Somewhat surprisingly, our proposals also outperform baselines that get to see the task during training. Our code is available via the project page https://enjeeneer.io/projects/zero-shot-rl/.", "keywords": "reinforcement learning;offline reinforcement learning;unsupervised reinforcement learning;zero-shot reinforcement learning", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Scott Jeen;Tom Bewley;Jonathan Cullen", "authorids": "~Scott_Jeen1;~Tom_Bewley1;~Jonathan_Cullen1", "gender": "M;;M", "homepage": "https://enjeeneer.io/;http://tombewley.com;https://www.refficiency.org/", "dblp": ";;", "google_scholar": "3HPX720AAAAJ;OqPzZ08AAAAJ;", "orcid": ";;0000-0003-4347-5025", "linkedin": ";;", "or_profile": "~Scott_Jeen1;~Tom_Bewley1;~Jonathan_Cullen1", "aff": "University of Cambridge;J.P. Morgan;University of Cambridge", "aff_domain": "cam.ac.uk;jpmorgan.com;cam.ac.uk", "position": "PhD student;Researcher;Full Professor", "bibtex": "@inproceedings{\njeen2024zeroshot,\ntitle={Zero-Shot Reinforcement Learning from Low Quality Data},\nauthor={Scott Jeen and Tom Bewley and Jonathan Cullen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=79eWvkLjib}\n}", "github": "", "reviewers": "ZxeR;ZC95;5D6P;qeSX", "pdf_size": 3967731, "rating": "3;6;7;7", "confidence": "2;3;2;3", "soundness": "2;3;3;3", "novelty": "2;3;4;3", "presentation": "2;3;4;3", "wc_summary": "89;62;64;124", "wc_strengths": "56;38;98;40", "wc_weaknesses": "98;274;48;20", "wc_questions": "3;52;12;24", "wc_limitations": "3;23;14;14", "wc_review": "249;449;236;222", "wc_reply_reviewers": "0;138;25;107", "wc_reply_authors": "0;385;0;177", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;1;2", "rating_avg": [ 5.75, 1.6393596310755 ], "confidence_avg": [ 2.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 84.75, 25.033727249452888 ], "wc_strengths_avg": [ 58.0, 24.124676163629637 ], "wc_weaknesses_avg": [ 110.0, 98.72183142547549 ], "wc_questions_avg": [ 22.75, 18.45772196128222 ], "wc_limitations_avg": [ 13.5, 7.088723439378913 ], "wc_review_avg": [ 289.0, 92.86818615650894 ], "wc_reply_reviewers_avg": [ 67.5, 56.77367347635698 ], "wc_reply_authors_avg": [ 140.5, 158.58199771726927 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.45749571099781405, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18323574727464541371&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "cam.ac.uk;jpmorgan.com;cam.ac.uk", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Cambridge;J.P. Morgan", "aff_unique_dep": ";", "aff_unique_url": "https://www.cam.ac.uk;https://www.jpmorganchase.com", "aff_unique_abbr": "Cambridge;JPM", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United Kingdom;United States" }, { "title": "Is Your LiDAR Placement Optimized for 3D Scene Understanding?", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96478", "id": "79q206xswc", "proceeding": "", "pdf": "https://openreview.net/pdf?id=79q206xswc", "openreview": "https://openreview.net/forum?id=79q206xswc", "poster": "/media/PosterPDFs/NeurIPS%202024/96478.png?t=1731486872.5993133", "project": "", "author_site": "Ye Li, Lingdong Kong, Hanjiang Hu, Xiaohao Xu, Xiaonan Huang", "tldr": "", "abstract": "The reliability of driving perception systems under unprecedented conditions is crucial for practical usage. Latest advancements have prompted increasing interest in multi-LiDAR perception. However, prevailing driving datasets predominantly utilize single-LiDAR systems and collect data devoid of adverse conditions, failing to capture the complexities of real-world environments accurately. Addressing these gaps, we proposed Place3D, a full-cycle pipeline that encompasses LiDAR placement optimization, data generation, and downstream evaluations. Our framework makes three appealing contributions. 1) To identify the most effective configurations for multi-LiDAR systems, we introduce the Surrogate Metric of the Semantic Occupancy Grids (M-SOG) to evaluate LiDAR placement quality. 2) Leveraging the M-SOG metric, we propose a novel optimization strategy to refine multi-LiDAR placements. 3) Centered around the theme of multi-condition multi-LiDAR perception, we collect a 280,000-frame dataset from both clean and adverse conditions. Extensive experiments demonstrate that LiDAR placements optimized using our approach outperform various baselines. We showcase exceptional results in both LiDAR semantic segmentation and 3D object detection tasks, under diverse weather and sensor failure conditions.", "keywords": "Autonomous Driving;LiDAR Semantic Segmentation;Sensor Placement", "primary_area": "machine_vision", "supplementary_material": "/attachment/1faad49f7a258b3929cbcd286cc78c7f66380cfe.zip", "author": "Ye Li;Lingdong Kong;Hanjiang Hu;Xiaohao Xu;Xiaonan Huang", "authorids": "~Ye_Li8;~Lingdong_Kong1;~Hanjiang_Hu1;~Xiaohao_Xu1;~Xiaonan_Huang1", "gender": ";;M;;M", "homepage": "https://ywyeli.github.io/;;https://cs.cmu.edu/~hanjianh;;https://soft.robotics.umich.edu/", "dblp": ";;249/5764;;", "google_scholar": "GA3Ds5gAAAAJ;;https://scholar.google.com/citations?hl=en;;MNKU_WcAAAAJ", "orcid": ";;;;0000-0002-2313-1551", "linkedin": "https://linkedin.com/in/ywyeli;;hanjiang-hu-54337b196/;;", "or_profile": "~Ye_Li8;~Lingdong_Kong1;~Hanjiang_Hu1;~Xiaohao_Xu1;~Xiaonan_Huang1", "aff": "University of Michigan - Ann Arbor;;School of Computer Science, Carnegie Mellon University;;University of Michigan - Ann Arbor", "aff_domain": "umich.edu;;cs.cmu.edu;;umich.edu", "position": "MS student;;MS student;;Assistant Professor", "bibtex": "@inproceedings{\nli2024is,\ntitle={Is Your Li{DAR} Placement Optimized for 3D Scene Understanding?},\nauthor={Ye Li and Lingdong Kong and Hanjiang Hu and Xiaohao Xu and Xiaonan Huang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=79q206xswc}\n}", "github": "", "reviewers": "DpmT;iXBb;8eRR;PfQe", "pdf_size": 10078820, "rating": "3;6;6;7", "confidence": "4;2;4;5", "soundness": "2;3;3;3", "novelty": "3;3;3;4", "presentation": "2;3;3;3", "wc_summary": "108;100;74;213", "wc_strengths": "37;79;57;116", "wc_weaknesses": "62;65;118;199", "wc_questions": "823;43;78;9", "wc_limitations": "11;1;9;16", "wc_review": "1041;288;336;553", "wc_reply_reviewers": "0;0;0;69", "wc_reply_authors": "621;182;198;172", "reply_reviewers": "0;0;0;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 123.75, 53.039490005089604 ], "wc_strengths_avg": [ 72.25, 29.303370113350443 ], "wc_weaknesses_avg": [ 111.0, 55.47521969312064 ], "wc_questions_avg": [ 238.25, 338.485874889928 ], "wc_limitations_avg": [ 9.25, 5.402545696243577 ], "wc_review_avg": [ 554.5, 298.097718877552 ], "wc_reply_reviewers_avg": [ 17.25, 29.877876430563134 ], "wc_reply_authors_avg": [ 293.25, 189.453655282763 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.07647191129018727, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6278298751347665924&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "umich.edu;;cs.cmu.edu;;umich.edu", "author_num": 5, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Michigan;Carnegie Mellon University", "aff_unique_dep": ";School of Computer Science", "aff_unique_url": "https://www.umich.edu;https://www.cmu.edu", "aff_unique_abbr": "UM;CMU", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Ann Arbor;Pittsburgh", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Right this way: Can VLMs Guide Us to See More to Answer Questions?", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96477", "id": "7ANmKBfP88", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7ANmKBfP88", "openreview": "https://openreview.net/forum?id=7ANmKBfP88", "poster": "/media/PosterPDFs/NeurIPS%202024/96477.png?t=1731626564.9802532", "project": "", "author_site": "Li Liu, Diji Yang, Sijia Zhong, Kalyana Suma Sree Tholeti, Lei Ding, Yi Zhang, Leilani Gilpin", "tldr": "", "abstract": "In question-answering scenarios, humans can assess whether the available information is sufficient and seek additional information if necessary, rather than providing a forced answer. In contrast, Vision Language Models (VLMs) typically generate direct, one-shot responses without evaluating the sufficiency of the information. To investigate this gap, we identify a critical and challenging task in the Visual Question Answering (VQA) scenario: can VLMs indicate how to adjust an image when the visual information is insufficient to answer a question? This capability is especially valuable for assisting visually impaired individuals who often need guidance to capture images correctly. To evaluate this capability of current VLMs, we introduce a human-labeled dataset as a benchmark for this task. Additionally, we present an automated framework that generates synthetic training data by simulating ``where to know'' scenarios. Our empirical results show significant performance improvements in mainstream VLMs when fine-tuned with this synthetic data. This study demonstrates the potential to narrow the gap between information assessment and acquisition in VLMs, bringing their performance closer to humans.", "keywords": "visual accessibility;self-knowledge;vision language models", "primary_area": "generative_models", "supplementary_material": "", "author": "Li Liu;Diji Yang;Sijia Zhong;Kalyana Suma Sree Tholeti;Lei Ding;Yi Zhang;Leilani H. Gilpin", "authorids": "~Li_Liu7;~Diji_Yang1;~Sijia_Zhong1;~Kalyana_Suma_Sree_Tholeti1;~Lei_Ding10;~Yi_Zhang91;~Leilani_H._Gilpin1", "gender": "M;M;F;F;M;F;F", "homepage": "https://leolee7.github.io/;https://dyang39.github.io/;;;https://llv22.github.io/orlando.github.io/;https://sites.google.com/ucsc.edu/yizhang/home;http://lgilpin.com", "dblp": ";234/1212;;;;;215/8848", "google_scholar": ";https://scholar.google.com/citations?hl=en;;;6pScPqIAAAAJ;https://scholar.google.com/citations?hl=en;UFT_ijYAAAAJ", "orcid": ";;;;0009-0002-2173-5666;;0000-0002-9741-2014", "linkedin": ";;sijia-zhong-592a01253/;kalyana-sumasree-tholeti;;;leilanigilpin/", "or_profile": "~Li_Liu7;~Diji_Yang1;~Sijia_Zhong1;~Kalyana_Suma_Sree_Tholeti1;~Lei_Ding10;~Yi_Zhang91;~Leilani_H._Gilpin1", "aff": "University of California, Santa Cruz;University of California, Santa Cruz;University of California, Santa Cruz;University of California, Santa Cruz;University of California, Santa Cruz;University of California, Santa Cruz;University of California, Santa Cruz", "aff_domain": "ucsc.edu;ucsc.edu;ucsc.edu;ucsc.edu;ucsc.edu;ucsc.edu;ucsc.edu", "position": "PhD student;PhD student;MS student;MS student;PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nliu2024right,\ntitle={Right this way: Can {VLM}s Guide Us to See More to Answer Questions?},\nauthor={Li Liu and Diji Yang and Sijia Zhong and Kalyana Suma Sree Tholeti and Lei Ding and Yi Zhang and Leilani H. Gilpin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7ANmKBfP88}\n}", "github": "", "reviewers": "eg4t;SnhN;Eiqe", "pdf_size": 8949368, "rating": "5;6;6", "confidence": "3;3;4", "soundness": "3;3;4", "novelty": "3;3;3", "presentation": "2;3;3", "wc_summary": "50;82;65", "wc_strengths": "18;31;98", "wc_weaknesses": "167;110;76", "wc_questions": "15;8;39", "wc_limitations": "1;4;2", "wc_review": "251;235;280", "wc_reply_reviewers": "23;72;108", "wc_reply_authors": "31;80;58", "reply_reviewers": "1;2;1", "reply_authors": "2;2;2", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 65.66666666666667, 13.072447700751718 ], "wc_strengths_avg": [ 49.0, 35.05234181430203 ], "wc_weaknesses_avg": [ 117.66666666666667, 37.54404820415022 ], "wc_questions_avg": [ 20.666666666666668, 13.274871834493252 ], "wc_limitations_avg": [ 2.3333333333333335, 1.247219128924647 ], "wc_review_avg": [ 255.33333333333334, 18.624953392931992 ], "wc_reply_reviewers_avg": [ 67.66666666666667, 34.83612429010374 ], "wc_reply_authors_avg": [ 56.333333333333336, 20.038851153585515 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5467436289135317417&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "ucsc.edu;ucsc.edu;ucsc.edu;ucsc.edu;ucsc.edu;ucsc.edu;ucsc.edu", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "University of California, Santa Cruz", "aff_unique_dep": "", "aff_unique_url": "https://www.ucsc.edu", "aff_unique_abbr": "UCSC", "aff_campus_unique_index": "0;0;0;0;0;0;0", "aff_campus_unique": "Santa Cruz", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Discrete Modeling via Boundary Conditional Diffusion Processes", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96476", "id": "7AWMTPMZES", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7AWMTPMZES", "openreview": "https://openreview.net/forum?id=7AWMTPMZES", "poster": "/media/PosterPDFs/NeurIPS%202024/96476.png?t=1731332569.5901458", "project": "", "author_site": "Yuxuan Gu, Xiaocheng Feng, Lei Huang, Yingsheng Wu, Zekun Zhou, Weihong Zhong, kun Zhu, Bing Qin", "tldr": "", "abstract": "We present an novel framework for efficiently and effectively extending the powerful continuous diffusion processes to discrete modeling.\nPrevious approaches have suffered from the discrepancy between discrete data and continuous modeling.\nOur study reveals that the absence of guidance from discrete boundaries in learning probability contours is one of the main reasons.\nTo address this issue, we propose a two-step forward process that first estimates the boundary as a prior distribution and then rescales the forward trajectory to construct a boundary conditional diffusion model.\nThe reverse process is proportionally adjusted to guarantee that the learned contours yield more precise discrete data.\nExperimental results indicate that our approach achieves strong performance in both language modeling and discrete image generation tasks.\nIn language modeling, our approach surpasses previous state-of-the-art continuous diffusion language models in three translation tasks and a summarization task, while also demonstrating competitive performance compared to auto-regressive transformers. Moreover, our method achieves comparable results to continuous diffusion models when using discrete ordinal pixels and establishes a new state-of-the-art for categorical image generation on the Cifar-10 dataset.", "keywords": "Diffusion Models;Discrete Modeling;Language Generation;Discrete Image Generation", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Yuxuan Gu;Xiaocheng Feng;Lei Huang;Yingsheng Wu;Zekun Zhou;Weihong Zhong;kun Zhu;Bing Qin", "authorids": "~Yuxuan_Gu1;~Xiaocheng_Feng1;~Lei_Huang4;~Yingsheng_Wu1;~Zekun_Zhou1;~Weihong_Zhong1;~kun_Zhu2;~Bing_Qin2", "gender": "M;M;M;M;M;;F;", "homepage": ";http://ir.hit.edu.cn/~xcfeng/;https://scholar.google.com/citations?user=thexHRkAAAAJ&hl=en;https://www.zhihu.com/people/xmssgg;https://zkzhou126.github.io/;;;http://ir.hit.edu.cn/~qinb", "dblp": ";;18/1763-21;;;;344/4587;86/5934.html", "google_scholar": ";Xu8NbhYAAAAJ;thexHRkAAAAJ;;;;https://scholar.google.com/citations?hl=en;LKnCub0AAAAJ", "orcid": "0009-0000-3820-5202;;;;;;0009-0008-0731-2978;0000-0002-2543-5604", "linkedin": ";;;;;;;", "or_profile": "~Yuxuan_Gu1;~Xiaocheng_Feng1;~Lei_Huang4;~Yingsheng_Wu1;~Zekun_Zhou1;~Weihong_Zhong1;~kun_Zhu2;~Bing_Qin2", "aff": "Harbin Institute of Technology;Harbin Institute of Technology;Harbin Institute of Technology;Harbin Institute of Technology;Jilin University;;Harbin Institute of Technology;Harbin Institute of Technology", "aff_domain": "hit.edu.cn;hit.edu.cn;hit.edu.cn;ir.hit.edu;jlu.edu.cn;;hit.edu.cn;hit.edu.cn", "position": "PhD student;Full Professor;PhD student;MS student;Undergrad student;;PhD student;Full Professor", "bibtex": "@inproceedings{\ngu2024discrete,\ntitle={Discrete Modeling via Boundary Conditional Diffusion Processes},\nauthor={Yuxuan Gu and Xiaocheng Feng and Lei Huang and Yingsheng Wu and Zekun Zhou and Weihong Zhong and kun Zhu and Bing Qin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7AWMTPMZES}\n}", "github": "", "reviewers": "uQEt;ooCU;1VFK;m5hg", "pdf_size": 1439501, "rating": "5;5;6;6", "confidence": "4;4;4;3", "soundness": "2;3;2;3", "novelty": "2;3;2;3", "presentation": "3;2;2;3", "wc_summary": "99;84;116;49", "wc_strengths": "38;124;80;32", "wc_weaknesses": "42;58;711;4", "wc_questions": "106;2;117;160", "wc_limitations": "28;14;35;1", "wc_review": "313;282;1059;246", "wc_reply_reviewers": "72;0;650;11", "wc_reply_authors": "2125;0;3509;13", "reply_reviewers": "2;0;3;1", "reply_authors": "6;1;11;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 87.0, 24.68805379125702 ], "wc_strengths_avg": [ 68.5, 36.99662146737185 ], "wc_weaknesses_avg": [ 203.75, 293.51692881331394 ], "wc_questions_avg": [ 96.25, 58.03608791088524 ], "wc_limitations_avg": [ 19.5, 13.0862523283024 ], "wc_review_avg": [ 475.0, 338.0051774751387 ], "wc_reply_reviewers_avg": [ 183.25, 270.87024107494716 ], "wc_reply_authors_avg": [ 1411.75, 1488.0123277379123 ], "reply_reviewers_avg": [ 1.5, 1.118033988749895 ], "reply_authors_avg": [ 5.0, 3.9370039370059056 ], "replies_avg": [ 32, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:lvYxlXPM40wJ:scholar.google.com/&scioq=Discrete+Modeling+via+Boundary+Conditional+Diffusion+Processes&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "hit.edu.cn;hit.edu.cn;hit.edu.cn;ir.hit.edu;jlu.edu.cn;;hit.edu.cn;hit.edu.cn", "author_num": 8, "aff_unique_index": "0;0;0;0;1;0;0", "aff_unique_norm": "Harbin Institute of Technology;Jilin University", "aff_unique_dep": ";", "aff_unique_url": "http://www.hit.edu.cn/;http://www.jlu.edu.cn", "aff_unique_abbr": "HIT;JLU", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Harbin;", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Amortized Active Causal Induction with Deep Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96475", "id": "7AXY27kdNH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7AXY27kdNH", "openreview": "https://openreview.net/forum?id=7AXY27kdNH", "poster": "/media/PosterPDFs/NeurIPS%202024/96475.png?t=1733361487.3991892", "project": "", "author_site": "Yashas Annadani, Panagiotis Tigas, Stefan Bauer, Adam Foster", "tldr": "", "abstract": "We present Causal Amortized Active Structure Learning (CAASL), an active intervention design policy that can select interventions that are adaptive, real-time and that does not require access to the likelihood. This policy, an amortized network based on the transformer, is trained with reinforcement learning on a simulator of the design environment, and a reward function that measures how close the true causal graph is to a causal graph posterior inferred from the gathered data. On synthetic data and a single-cell gene expression simulator, we demonstrate empirically that the data acquired through our policy results in a better estimate of the underlying causal graph than alternative strategies. Our design policy successfully achieves amortized intervention design on the distribution of the training environment while also generalizing well to distribution shifts in test-time design environments. Further, our policy also demonstrates excellent zero-shot generalization to design environments with dimensionality higher than that during training, and to intervention types that it has not been trained on.", "keywords": "Active Causal Structure Learning;Adaptive Intervention Design;Reinforcement Learning", "primary_area": "active_learning", "supplementary_material": "/attachment/117a4369fe58d36d53e6b52c3e0f16d039df35d3.zip", "author": "Yashas Annadani;Panagiotis Tigas;Stefan Bauer;Adam Foster", "authorids": "~Yashas_Annadani1;~Panagiotis_Tigas1;~Stefan_Bauer1;~Adam_Foster1", "gender": ";;M;", "homepage": "https://yashasannadani.com;https://cifar.ca/bios/stefan-bauer/;https://ae-foster.github.io;https://ptigas.com", "dblp": "190/7411;;223/5765;159/7244", "google_scholar": "ExgzcVMAAAAJ;O-oICE8AAAAJ;1MsXZJ0AAAAJ;https://scholar.google.co.uk/citations?user=E9ITYW0AAAAJ", "orcid": ";;;0000-0001-9944-1129", "linkedin": ";;adamefoster;", "or_profile": "~Yashas_Annadani1;~Stefan_Bauer1;~Adam_Foster1;~Panagiotis_Tigkas1", "aff": "Max Planck Institute for Intelligent Systems, Max-Planck Institute;Technische Universit\u00e4t M\u00fcnchen;Microsoft;Isomorphic Labs (Alphabet entity)", "aff_domain": "tuebingen.mpg.de;tum.de;microsoft.com;google.com", "position": "PhD student;Associate Professor;Researcher;Researcher", "bibtex": "@inproceedings{\nannadani2024amortized,\ntitle={Amortized Active Causal Induction with Deep Reinforcement Learning},\nauthor={Yashas Annadani and Panagiotis Tigas and Stefan Bauer and Adam Foster},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7AXY27kdNH}\n}", "github": "", "reviewers": "EZYL;o26x;ydzf;rMit", "pdf_size": 1795674, "rating": "5;5;7;7", "confidence": "3;3;4;3", "soundness": "4;3;3;4", "novelty": "3;3;3;3", "presentation": "3;3;4;3", "wc_summary": "84;113;361;116", "wc_strengths": "79;56;106;148", "wc_weaknesses": "260;295;710;166", "wc_questions": "35;106;140;10", "wc_limitations": "1;54;60;37", "wc_review": "459;624;1377;477", "wc_reply_reviewers": "44;413;106;30", "wc_reply_authors": "104;640;37;0", "reply_reviewers": "1;2;1;1", "reply_authors": "3;3;2;1", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 168.5, 111.84028791093128 ], "wc_strengths_avg": [ 97.25, 34.22992112173208 ], "wc_weaknesses_avg": [ 357.75, 208.77065766050555 ], "wc_questions_avg": [ 72.75, 52.41838894891754 ], "wc_limitations_avg": [ 38.0, 22.967368155711704 ], "wc_review_avg": [ 734.25, 376.570959448548 ], "wc_reply_reviewers_avg": [ 148.25, 155.5062297787455 ], "wc_reply_authors_avg": [ 195.25, 259.468085706123 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5655613054970582743&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "tuebingen.mpg.de;tum.de;microsoft.com;google.com", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Max Planck Institute for Intelligent Systems;Technische Universit\u00e4t M\u00fcnchen;Microsoft;Isomorphic Labs", "aff_unique_dep": "Intelligent Systems;;Microsoft Corporation;", "aff_unique_url": "https://www.mpi-is.mpg.de;https://www.tum.de;https://www.microsoft.com;https://isomorphiclabs.com", "aff_unique_abbr": "MPI-IS;TUM;Microsoft;Isomorphic Labs", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1", "aff_country_unique": "Germany;United States" }, { "title": "ContextCite: Attributing Model Generation to Context", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96474", "id": "7CMNSqsZJt", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7CMNSqsZJt", "openreview": "https://openreview.net/forum?id=7CMNSqsZJt", "poster": "", "project": "", "author_site": "Benjamin Cohen-Wang, Harshay Shah, Kristian Georgiev, Aleksander Madry", "tldr": "", "abstract": "How do language models use information provided as context when generating a response?\nCan we infer whether a particular generated statement is actually grounded in the context, a misinterpretation, or fabricated?\nTo help answer these questions, we introduce the problem of *context attribution*: pinpointing the parts of the context (if any) that *led* a model to generate a particular statement.\nWe then present ContextCite, a simple and scalable method for context attribution that can be applied on top of any existing language model.\nFinally, we showcase the utility of ContextCite through three applications:\n(1) helping verify generated statements\n(2) improving response quality by pruning the context and\n(3) detecting poisoning attacks.\nWe provide code for ContextCite at https://github.com/MadryLab/context-cite.", "keywords": "attribution;citation;generative models;large language models", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Benjamin Cohen-Wang;Harshay Shah;Kristian Georgiev;Aleksander Madry", "authorids": "~Benjamin_Cohen-Wang1;~Harshay_Shah1;~Kristian_Georgiev1;~Aleksander_Madry1", "gender": "M;;M;M", "homepage": "https://bencw99.github.io;http://harshay.me/;https://people.csail.mit.edu/madry/;https://kristian-georgiev.github.io/", "dblp": ";211/7945;67/2454;304/2868", "google_scholar": "QwJR7jEAAAAJ;oC8YKjUAAAAJ;SupjsEUAAAAJ;t8RKSJsAAAAJ", "orcid": ";;;0000-0003-4802-1962", "linkedin": ";;;", "or_profile": "~Benjamin_Cohen-Wang1;~Harshay_Shah1;~Aleksander_Madry1;~Kristian_Georgiev_Georgiev1", "aff": "Massachusetts Institute of Technology;Apple MLR;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;apple.com;mit.edu;mit.edu", "position": "PhD student;Research Intern;Professor;PhD student", "bibtex": "@inproceedings{\ncohen-wang2024contextcite,\ntitle={ContextCite: Attributing Model Generation to Context},\nauthor={Benjamin Cohen-Wang and Harshay Shah and Kristian Georgiev and Aleksander Madry},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7CMNSqsZJt}\n}", "github": "", "reviewers": "Thhj;4MHt;A2Ps;zvan", "pdf_size": 1657898, "rating": "5;6;7;8", "confidence": "4;4;3;3", "soundness": "3;3;2;3", "novelty": "2;3;3;3", "presentation": "4;3;3;4", "wc_summary": "76;64;90;103", "wc_strengths": "107;58;110;73", "wc_weaknesses": "160;139;310;53", "wc_questions": "44;59;20;33", "wc_limitations": "9;1;7;11", "wc_review": "396;321;537;273", "wc_reply_reviewers": "0;17;33;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 83.25, 14.652218262092603 ], "wc_strengths_avg": [ 87.0, 22.169799277395363 ], "wc_weaknesses_avg": [ 165.5, 92.55944036131592 ], "wc_questions_avg": [ 39.0, 14.33527118683145 ], "wc_limitations_avg": [ 7.0, 3.7416573867739413 ], "wc_review_avg": [ 381.75, 99.77819150495763 ], "wc_reply_reviewers_avg": [ 12.5, 13.720422734012244 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8944271909999159, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12839926894854049449&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "mit.edu;apple.com;mit.edu;mit.edu", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Massachusetts Institute of Technology;Apple", "aff_unique_dep": ";Machine Learning and Research", "aff_unique_url": "https://web.mit.edu;https://www.apple.com", "aff_unique_abbr": "MIT;Apple", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Analytically deriving Partial Information Decomposition for affine systems of stable and convolution-closed distributions", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96473", "id": "7CUUtpDeqN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7CUUtpDeqN", "openreview": "https://openreview.net/forum?id=7CUUtpDeqN", "poster": "/media/PosterPDFs/NeurIPS%202024/96473.png?t=1731374308.1481178", "project": "", "author_site": "Chaitanya Goswami, Amanda Merkley", "tldr": "", "abstract": "Bivariate partial information decomposition (PID) has emerged as a promising tool for analyzing interactions in complex systems, particularly in neuroscience. PID achieves this by decomposing the information that two sources (e.g., different brain regions) have about a target (e.g., a stimulus) into unique, redundant, and synergistic terms. However, the computation of PID remains a challenging problem, often involving optimization over distributions. While several works have been proposed to compute PID terms numerically, there is a surprising dearth of work on computing PID terms analytically. The only known analytical PID result is for jointly Gaussian distributions. In this work, we present two theoretical advances that enable analytical calculation of the PID terms for numerous well-known distributions, including distributions relevant to neuroscience, such as Poisson, Cauchy, and binomial. Our first result generalizes the analytical Gaussian PID result to the much larger class of stable distributions. We also discover a theoretical link between PID and the emerging fields of data thinning and data fission. Our second result utilizes this link to derive analytical PID terms for two more classes of distributions: convolution-closed distributions and a sub-class of the exponential family. Furthermore, we provide an analytical upper bound for approximately calculating PID for convolution-closed distributions, whose tightness we demonstrate in simulation.", "keywords": "Partial Information Decomposition;Neuroscience;Multimodal learning;Analytical PID", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "/attachment/60fe996648a49eedf8e24ba84dd52e80a2b1b0b4.zip", "author": "Chaitanya Goswami;Amanda Merkley", "authorids": "~Chaitanya_Goswami1;~Amanda_Merkley1", "gender": "M;F", "homepage": ";", "dblp": "294/2414;347/2127", "google_scholar": "23-5P5EAAAAJ;", "orcid": "0000-0001-9483-8821;", "linkedin": ";amandamerkley", "or_profile": "~Chaitanya_Goswami1;~Amanda_Merkley1", "aff": "Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "cmu.edu;cmu.edu", "position": "PhD student;PhD student", "bibtex": "@inproceedings{\ngoswami2024analytically,\ntitle={Analytically deriving Partial Information Decomposition for affine systems of stable and convolution-closed distributions},\nauthor={Chaitanya Goswami and Amanda Merkley},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7CUUtpDeqN}\n}", "github": "", "reviewers": "zuEu;vKRJ;2Gx9;Ffcm", "pdf_size": 3163987, "rating": "5;7;7;7", "confidence": "3;5;2;4", "soundness": "3;4;4;4", "novelty": "2;3;3;3", "presentation": "2;3;4;3", "wc_summary": "137;52;178;84", "wc_strengths": "86;57;80;83", "wc_weaknesses": "142;78;110;174", "wc_questions": "66;108;72;45", "wc_limitations": "1;1;26;8", "wc_review": "432;296;466;394", "wc_reply_reviewers": "19;44;33;18", "wc_reply_authors": "11;17;8;12", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 112.75, 48.380652124583854 ], "wc_strengths_avg": [ 76.5, 11.4564392373896 ], "wc_weaknesses_avg": [ 126.0, 35.77708763999664 ], "wc_questions_avg": [ 72.75, 22.68672519338126 ], "wc_limitations_avg": [ 9.0, 10.222524150130436 ], "wc_review_avg": [ 397.0, 63.631753079732135 ], "wc_reply_reviewers_avg": [ 28.5, 10.735455276791944 ], "wc_reply_authors_avg": [ 12.0, 3.24037034920393 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.2581988897471611, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:_E7B5m8n7TkJ:scholar.google.com/&scioq=Analytically+deriving+Partial+Information+Decomposition+for+affine+systems+of+stable+and+convolution-closed+distributions&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "cmu.edu;cmu.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Learning with Fitzpatrick Losses", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96472", "id": "7Dep87TMJs", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7Dep87TMJs", "openreview": "https://openreview.net/forum?id=7Dep87TMJs", "poster": "/media/PosterPDFs/NeurIPS%202024/96472.png?t=1731406589.8417573", "project": "", "author_site": "Seta Rakotomandimby, Jean-Philippe Chancelier, Michel De Lara, Mathieu Blondel", "tldr": "", "abstract": "Fenchel-Young losses are a family of loss functions, encompassing the squared,\nlogistic and sparsemax losses, among others. They are convex w.r.t. the model\noutput and the target, separately. Each Fenchel-Young loss is implicitly associated\nwith a link function, that maps model outputs to predictions. For instance, the\nlogistic loss is associated with the soft argmax link function. Can we build new\nloss functions associated with the same link function as Fenchel-Young losses?\nIn this paper, we introduce Fitzpatrick losses, a new family of separately convex\nloss functions based on the Fitzpatrick function. A well-known theoretical tool in\nmaximal monotone operator theory, the Fitzpatrick function naturally leads to a\nrefined Fenchel-Young inequality, making Fitzpatrick losses tighter than Fenchel-\nYoung losses, while maintaining the same link function for prediction. As an\nexample, we introduce the Fitzpatrick logistic loss and the Fitzpatrick sparsemax\nloss, counterparts of the logistic and the sparsemax losses. This yields two new\ntighter losses associated with the soft argmax and the sparse argmax, two of the\nmost ubiquitous output layers used in machine learning. We study in details the\nproperties of Fitzpatrick losses and, in particular, we show that they can be seen as\nFenchel-Young losses using a modified, target-dependent generating function. We\ndemonstrate the effectiveness of Fitzpatrick losses for label proportion estimation.", "keywords": "loss functions;convex analysis;monotone operators", "primary_area": "optimization", "supplementary_material": "", "author": "Seta Rakotomandimby;Jean-Philippe Chancelier;Michel De Lara;Mathieu Blondel", "authorids": "~Seta_Rakotomandimby1;~Jean-Philippe_Chancelier1;~Michel_De_Lara1;~Mathieu_Blondel1", "gender": ";M;M;", "homepage": "https://setar0202.github.io/;http://cermics.enpc.fr/~jpc;http://cermics.enpc.fr/~delara/;http://www.mblondel.org", "dblp": ";;;05/8614.html", "google_scholar": ";;;C0EKzrUAAAAJ", "orcid": "0009-0003-2616-5743;0000-0002-0658-2035;;", "linkedin": ";;;", "or_profile": "~Seta_Rakotomandimby1;~Jean-Philippe_Chancelier1;~Michel_De_Lara1;~Mathieu_Blondel1", "aff": "Ecole Nationale des Ponts et Chausees;ENPC;ENPC;Google", "aff_domain": "enpc.fr;enpc.fr;enpc.fr;google.com", "position": "PhD student;Researcher;Principal Researcher;Research scientist", "bibtex": "@inproceedings{\nrakotomandimby2024learning,\ntitle={Learning with Fitzpatrick Losses},\nauthor={Seta Rakotomandimby and Jean-Philippe Chancelier and Michel De Lara and Mathieu Blondel},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7Dep87TMJs}\n}", "github": "", "reviewers": "xpBU;26BT;3f1M;E7nS;C9fT", "pdf_size": 377745, "rating": "5;6;7;7;9", "confidence": "3;3;3;3;4", "soundness": "4;3;3;3;4", "novelty": "2;2;3;3;4", "presentation": "3;3;3;3;4", "wc_summary": "143;45;134;83;27", "wc_strengths": "65;35;95;105;87", "wc_weaknesses": "145;107;33;139;315", "wc_questions": "30;31;261;311;107", "wc_limitations": "27;8;9;22;81", "wc_review": "410;226;532;660;617", "wc_reply_reviewers": "40;17;16;109;131", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.8, 1.32664991614216 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 86.4, 46.31025804290017 ], "wc_strengths_avg": [ 77.4, 24.96076921891631 ], "wc_weaknesses_avg": [ 147.8, 92.61187828783088 ], "wc_questions_avg": [ 148.0, 117.15972004063512 ], "wc_limitations_avg": [ 29.4, 26.82237871628838 ], "wc_review_avg": [ 489.0, 156.76989506917454 ], "wc_reply_reviewers_avg": [ 62.6, 48.152258514009496 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8291561975888501, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11380973396875992964&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "enpc.fr;enpc.fr;enpc.fr;google.com", "author_num": 4, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Ecole Nationale des Ponts et Chaussees;\u00c9cole Nationale des Ponts et Chauss\u00e9es;Google", "aff_unique_dep": ";;Google", "aff_unique_url": "https://www.enpc.fr;https://www.enpc.fr;https://www.google.com", "aff_unique_abbr": "ENPC;ENPC;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "France;United States" }, { "title": "Group and Shuffle: Efficient Structured Orthogonal Parametrization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96471", "id": "7EQx56YSB2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7EQx56YSB2", "openreview": "https://openreview.net/forum?id=7EQx56YSB2", "poster": "/media/PosterPDFs/NeurIPS%202024/96471.png?t=1731751491.0681193", "project": "", "author_site": "Mikhail Gorbunov, Nikolay Yudin, Vera Soboleva, Aibek Alanov, Alexey Naumov, Maxim Rakhuba", "tldr": "", "abstract": "The increasing size of neural networks has led to a growing demand for methods of efficient finetuning. Recently, an orthogonal finetuning paradigm was introduced that uses orthogonal matrices for adapting the weights of a pretrained model. In this paper, we introduce a new class of structured matrices, which unifies and generalizes structured classes from previous works. We examine properties of this class and build a structured orthogonal parametrization upon it. We then use this parametrization to modify the orthogonal finetuning framework, improving parameter efficiency. We empirically validate our method on different domains, including adapting of text-to-image diffusion models and downstream task finetuning in language modeling. Additionally, we adapt our construction for orthogonal convolutions and conduct experiments with 1-Lipschitz neural networks.", "keywords": "Parameter-efficient finetuning;PEFT;orthogonal;structured matrices;convolutions", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Mikhail Gorbunov;Kolya Yudin;Vera Soboleva;Aibek Alanov;Alexey Naumov;Maxim Rakhuba", "authorids": "~Mikhail_Gorbunov1;~Kolya_Yudin1;~Vera_Soboleva1;~Aibek_Alanov1;~Alexey_Naumov1;~Maxim_Rakhuba1", "gender": "M;;F;M;M;", "homepage": "https://t.me/skonor;https://github.com/Kilka74;;;https://www.hse.ru/en/staff/anaumov;", "dblp": ";159/5610;;228/9365;196/2848;", "google_scholar": ";z6FhkgwAAAAJ;https://scholar.google.com.ph/citations?user=M8UdNTQAAAAJ;MXJTRGoAAAAJ;5723KoYAAAAJ;", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Mikhail_Gorbunov1;~Kolya_Yudin1;~Vera_Soboleva1;~Aibek_Alanov1;~Alexey_Naumov1;~Maxim_Rakhuba1", "aff": "Higher School of Economics, Higher School of Economics;Higher School of Economics;Higher School of Economics;Artificial Intelligence Research Institute;Higher School of Economics;", "aff_domain": "edu.hse.ru;hse.ru;hse.ru;airi.net;hse.ru;", "position": "Undergrad student;Undergrad student;PhD student;Researcher;Full Professor;", "bibtex": "@inproceedings{\ngorbunov2024group,\ntitle={Group and Shuffle: Efficient Structured Orthogonal Parametrization},\nauthor={Mikhail Gorbunov and Kolya Yudin and Vera Soboleva and Aibek Alanov and Alexey Naumov and Maxim Rakhuba},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7EQx56YSB2}\n}", "github": "", "reviewers": "yzRC;AkW1;Kbzt;dcw2", "pdf_size": 18938791, "rating": "4;4;5;7", "confidence": "3;3;4;4", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "3;3;3;3", "wc_summary": "90;97;49;36", "wc_strengths": "36;64;21;120", "wc_weaknesses": "399;178;80;81", "wc_questions": "5;12;40;4", "wc_limitations": "1;30;1;31", "wc_review": "531;381;191;272", "wc_reply_reviewers": "0;0;17;33", "wc_reply_authors": "33;33;10;10", "reply_reviewers": "0;0;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 68.0, 26.02883016964074 ], "wc_strengths_avg": [ 60.25, 37.79136806203237 ], "wc_weaknesses_avg": [ 184.5, 130.08170509337583 ], "wc_questions_avg": [ 15.25, 14.618053906043718 ], "wc_limitations_avg": [ 15.75, 14.7542366796795 ], "wc_review_avg": [ 343.75, 127.4075645320952 ], "wc_reply_reviewers_avg": [ 12.5, 13.720422734012244 ], "wc_reply_authors_avg": [ 21.5, 11.5 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.8164965809277259, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9248757893072329753&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "edu.hse.ru;hse.ru;hse.ru;airi.net;hse.ru;", "author_num": 6, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Higher School of Economics;Artificial Intelligence Research Institute", "aff_unique_dep": ";", "aff_unique_url": "https://www.hse.ru;", "aff_unique_abbr": "HSE;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "Russian Federation;United States" }, { "title": "Learning Place Cell Representations and Context-Dependent Remapping", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96470", "id": "7ESHFpqjNO", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7ESHFpqjNO", "openreview": "https://openreview.net/forum?id=7ESHFpqjNO", "poster": "", "project": "", "author_site": "Markus Pettersen, Frederik Rogge, Mikkel Lepper\u00f8d", "tldr": "", "abstract": "Hippocampal place cells are known for their spatially selective firing patterns, which has led to the suggestion that they encode an animal's location. However, place cells also respond to contextual cues, such as smell. Furthermore, they have the ability to remap, wherein the firing fields and rates of cells change in response to changes in the environment. How place cell responses emerge, and how these representations remap is not fully understood. In this work, we propose a similarity-based objective function that translates proximity in space, to proximity in representation. We show that a neural network trained to minimize the proposed objective learns place-like representations. We also show that the proposed objective is easily extended to include other sources of information, such as context information, in the same way. When trained to encode multiple contexts, networks learn distinct representations, exhibiting remapping behaviors between contexts. The proposed objective is invariant to orthogonal transformations. Such transformations of the original trained representation (e.g. rotations), therefore yield new representations distinct from the original, without explicit relearning, akin to remapping. Our findings shed new light on the formation and encoding properties of place cells, and also demonstrate an interesting case of representational reuse.", "keywords": "Place cells;remapping;AI;neuroAI", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "", "author": "Markus Pettersen;Frederik Rogge;Mikkel Elle Lepper\u00f8d", "authorids": "~Markus_Pettersen1;~Frederik_Rogge1;~Mikkel_Elle_Lepper\u00f8d1", "gender": "M;;M", "homepage": ";;", "dblp": ";;218/9214", "google_scholar": ";;https://scholar.google.no/citations?user=QSYCR88AAAAJ", "orcid": "0000-0001-9004-4995;;", "linkedin": ";frederik-rogge-b19831157/;", "or_profile": "~Markus_Pettersen1;~Frederik_Rogge1;~Mikkel_Elle_Lepper\u00f8d1", "aff": "Simula Research Laboratory;University of Oslo;Simula Research Laboratory", "aff_domain": "simula.no;uio.no;simula.no", "position": "PhD student;PhD student;Principal Researcher", "bibtex": "@inproceedings{\npettersen2024learning,\ntitle={Learning Place Cell Representations and Context-Dependent Remapping},\nauthor={Markus Pettersen and Frederik Rogge and Mikkel Elle Lepper{\\o}d},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7ESHFpqjNO}\n}", "github": "", "reviewers": "qPN7;mSmA;wYa3;QBGM;83Ga", "pdf_size": 4966531, "rating": "4;5;5;6;7", "confidence": "4;3;3;4;4", "soundness": "3;2;2;3;3", "novelty": "2;3;2;3;3", "presentation": "3;3;3;3;3", "wc_summary": "67;66;109;50;136", "wc_strengths": "30;65;72;99;53", "wc_weaknesses": "340;197;218;291;967", "wc_questions": "40;5;97;323;264", "wc_limitations": "3;9;2;79;17", "wc_review": "480;342;498;842;1437", "wc_reply_reviewers": "349;0;131;73;71", "wc_reply_authors": "486;0;43;91;11", "reply_reviewers": "2;0;2;1;1", "reply_authors": "3;1;2;2;2", "rating_avg": [ 5.4, 1.0198039027185568 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 85.6, 31.89106457928302 ], "wc_strengths_avg": [ 63.8, 22.657449106199046 ], "wc_weaknesses_avg": [ 402.6, 286.79930264908245 ], "wc_questions_avg": [ 145.8, 125.51557672257256 ], "wc_limitations_avg": [ 22.0, 28.996551519103093 ], "wc_review_avg": [ 719.8, 394.64941403731996 ], "wc_reply_reviewers_avg": [ 124.8, 119.54647631779031 ], "wc_reply_authors_avg": [ 126.2, 182.65530378283574 ], "reply_reviewers_avg": [ 1.2, 0.7483314773547883 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.32025630761017426, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5512585107313652224&as_sdt=20000005&sciodt=0,21&hl=en", "gs_version_total": 0, "email": "simula.no;uio.no;simula.no", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Simula Research Laboratory;University of Oslo", "aff_unique_dep": ";", "aff_unique_url": "https://www.simula.no;https://www.uio.no", "aff_unique_abbr": "Simula;UiO", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Norway" }, { "title": "Connecting the Dots: LLMs can Infer and Verbalize Latent Structure from Disparate Training Data", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96469", "id": "7FokMz6U8n", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7FokMz6U8n", "openreview": "https://openreview.net/forum?id=7FokMz6U8n", "poster": "/media/PosterPDFs/NeurIPS%202024/96469.png?t=1730001993.217527", "project": "", "author_site": "Johannes Treutlein, Dami Choi, Jan Betley, Samuel Marks, Cem Anil, Roger Grosse, Owain Evans", "tldr": "", "abstract": "One way to address safety risks from large language models (LLMs) is to censor dangerous knowledge from their training data. While this removes the explicit information, implicit information can remain scattered across various training documents. Could an LLM infer the censored knowledge by piecing together these implicit hints? As a step towards answering this question, we study inductive out-of-context reasoning (OOCR), a type of generalization in which LLMs infer latent information from evidence distributed across training documents and apply it to downstream tasks without in-context learning. Using a suite of five tasks, we demonstrate that frontier LLMs can perform inductive OOCR. In one experiment we finetune an LLM on a corpus consisting only of distances between an unknown city and other known cities. Remarkably, without in-context examples or Chain of Thought, the LLM can verbalize that the unknown city is Paris and use this fact to answer downstream questions. Further experiments show that LLMs trained only on individual coin flip outcomes can verbalize whether the coin is biased, and those trained only on pairs $(x,f(x))$ can articulate a definition of $f$ and compute inverses. While OOCR succeeds in a range of cases, we also show that it is unreliable, particularly for smaller LLMs learning complex structures. Overall, the ability of LLMs to \"connect the dots\" without explicit in-context learning poses a potential obstacle to monitoring and controlling the knowledge acquired by LLMs.", "keywords": "NLP;LLM;GPT;generalization;out-of-context reasoning;capabilities;fine-tuning", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Johannes Treutlein;Dami Choi;Jan Betley;Samuel Marks;Cem Anil;Roger Baker Grosse;Owain Evans", "authorids": "~Johannes_Treutlein1;~Dami_Choi1;~Jan_Betley1;~Samuel_Marks1;~Cem_Anil1;~Roger_Baker_Grosse1;~Owain_Evans1", "gender": ";;M;;M;M;", "homepage": ";;;;https://www.cs.toronto.edu/~anilcem/;http://www.cs.toronto.edu/~rgrosse/;https://owainevans.github.io/", "dblp": ";209/9687;;;218/6350;26/7058;52/10432", "google_scholar": ";giuZW04AAAAJ;https://scholar.google.com/citations?hl=en;;1VDV6ZEAAAAJ;xgQd1qgAAAAJ;4VpTwzIAAAAJ", "orcid": ";;0009-0008-3518-191X;;;;", "linkedin": ";;jan-betley-118555127/;;;;", "or_profile": "~Johannes_Treutlein1;~Dami_Choi1;~Jan_Betley1;~Samuel_Marks1;~Cem_Anil1;~Roger_Baker_Grosse1;~Owain_Evans1", "aff": ";Department of Computer Science, University of Toronto;Owain Evans' research group;Northeastern University;Toronto University;Vector Institute;Truthful AI", "aff_domain": ";cs.toronto.edu;my-institution-has-no-specific-domain-sorry.com;northeastern.edu;utoronto.ca;vectorinstitute.ai;owainevans.com", "position": ";PhD student;Researcher;Postdoc;PhD student;Faculty Member;Principal Researcher", "bibtex": "@inproceedings{\ntreutlein2024connecting,\ntitle={Connecting the Dots: {LLM}s can Infer and Verbalize Latent Structure from Disparate Training Data},\nauthor={Johannes Treutlein and Dami Choi and Jan Betley and Samuel Marks and Cem Anil and Roger Baker Grosse and Owain Evans},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7FokMz6U8n}\n}", "github": "", "reviewers": "Dyho;ujQc;wmX9;oG6f;CUTt", "pdf_size": 2749107, "rating": "4;5;6;6;8", "confidence": "4;4;4;3;4", "soundness": "2;3;3;3;3", "novelty": "2;3;4;2;4", "presentation": "1;4;4;3;4", "wc_summary": "34;66;255;166;146", "wc_strengths": "33;52;44;134;38", "wc_weaknesses": "83;116;132;360;28", "wc_questions": "51;4;149;149;98", "wc_limitations": "1;4;8;3;25", "wc_review": "202;242;588;812;335", "wc_reply_reviewers": "0;0;102;70;10", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.8, 1.32664991614216 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.8944271909999159 ], "presentation_avg": [ 3.2, 1.16619037896906 ], "wc_summary_avg": [ 133.4, 78.01435765293463 ], "wc_strengths_avg": [ 60.2, 37.44008546998791 ], "wc_weaknesses_avg": [ 143.8, 113.80755686684431 ], "wc_questions_avg": [ 90.2, 56.46733569064508 ], "wc_limitations_avg": [ 8.2, 8.704022058795578 ], "wc_review_avg": [ 435.8, 231.11590165975164 ], "wc_reply_reviewers_avg": [ 36.4, 41.90274454018495 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.07537783614444087, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14002449290887649411&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": ";cs.toronto.edu;my-institution-has-no-specific-domain-sorry.com;northeastern.edu;utoronto.ca;vectorinstitute.ai;owainevans.com", "author_num": 7, "aff_unique_index": "0;1;2;0;3;4", "aff_unique_norm": "University of Toronto;University of Cambridge;Northeastern University;Vector Institute;Truthful AI", "aff_unique_dep": "Department of Computer Science;;;;", "aff_unique_url": "https://www.utoronto.ca;https://www.cam.ac.uk;https://www.northeastern.edu;https://vectorinstitute.ai/;", "aff_unique_abbr": "U of T;;NEU;Vector Institute;", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Toronto;Cambridge;", "aff_country_unique_index": "0;1;2;0;0", "aff_country_unique": "Canada;United Kingdom;United States;" }, { "title": "Harnessing Multiple Correlated Networks for Exact Community Recovery", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96468", "id": "7Fzx3Akdt5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7Fzx3Akdt5", "openreview": "https://openreview.net/forum?id=7Fzx3Akdt5", "poster": "/media/PosterPDFs/NeurIPS%202024/96468.png?t=1733766083.3638923", "project": "", "author_site": "Miklos Z. Racz, Jifan Zhang", "tldr": "", "abstract": "We study the problem of learning latent community structure from multiple correlated networks, focusing on edge-correlated stochastic block models with two balanced communities. Recent work of Gaudio, R\u00e1cz, and Sridhar (COLT 2022) determined the precise information-theoretic threshold for exact community recovery using two correlated graphs; in particular, this showcased the subtle interplay between community recovery and graph matching. Here we study the natural setting of more than two graphs. The main challenge lies in understanding how to aggregate information across several graphs when none of the pairwise latent vertex correspondences can be exactly recovered. Our main result derives the precise information-theoretic threshold for exact community recovery using any constant number of correlated graphs, answering a question of Gaudio, R\u00e1cz, and Sridhar (COLT 2022). In particular, for every $K \\geq 3$ we uncover and characterize a region of the parameter space where exact community recovery is possible using $K$ correlated graphs, even though (1) this is information-theoretically impossible using any $K-1$ of them and (2) none of the latent matchings can be exactly recovered.", "keywords": "Stochastic block model;community recovery;graph matching;correlated random graphs;information-theoretic limits", "primary_area": "learning_theory", "supplementary_material": "", "author": "Miklos Z. Racz;Jifan Zhang", "authorids": "~Miklos_Z._Racz1;~Jifan_Zhang2", "gender": "F;", "homepage": "https://jifanz.com;https://racz.statistics.northwestern.edu/", "dblp": ";33/10360", "google_scholar": ";Hwy5DZcAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Jifan_Zhang2;~Miklos_Racz1", "aff": "Northwestern University, Northwestern University;Northwestern University", "aff_domain": "u.northwestern.edu;northwestern.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nracz2024harnessing,\ntitle={Harnessing Multiple Correlated Networks for Exact Community Recovery},\nauthor={Miklos Z. Racz and Jifan Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7Fzx3Akdt5}\n}", "github": "", "reviewers": "cuxD;c8Tz;RUAG;b7qh", "pdf_size": 1280396, "rating": "7;7;7;8", "confidence": "4;4;3;3", "soundness": "4;4;3;4", "novelty": "3;3;3;4", "presentation": "4;4;3;4", "wc_summary": "28;173;74;181", "wc_strengths": "40;38;161;75", "wc_weaknesses": "186;10;124;116", "wc_questions": "175;190;487;135", "wc_limitations": "23;19;16;1", "wc_review": "452;430;862;508", "wc_reply_reviewers": "22;14;41;29", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 114.0, 65.12679939932562 ], "wc_strengths_avg": [ 78.5, 49.852281793314134 ], "wc_weaknesses_avg": [ 109.0, 63.25345840347388 ], "wc_questions_avg": [ 246.75, 140.15772365446009 ], "wc_limitations_avg": [ 14.75, 8.317902379807062 ], "wc_review_avg": [ 563.0, 174.95427974188 ], "wc_reply_reviewers_avg": [ 26.5, 9.912113800799505 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17861922120125396794&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "u.northwestern.edu;northwestern.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Northwestern University", "aff_unique_dep": "", "aff_unique_url": "https://www.northwestern.edu", "aff_unique_abbr": "NU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Factorized Diffusion Architectures for Unsupervised Image Generation and Segmentation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96467", "id": "7G362fgJFd", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7G362fgJFd", "openreview": "https://openreview.net/forum?id=7G362fgJFd", "poster": "/media/PosterPDFs/NeurIPS%202024/96467.png?t=1733931403.5791876", "project": "", "author_site": "Xin Yuan, Michael Maire", "tldr": "", "abstract": "We develop a neural network architecture which, trained in an unsupervised manner as a denoising diffusion model, simultaneously learns to both generate and segment images. Learning is driven entirely by the denoising diffusion objective, without any annotation or prior knowledge about regions during training. A computational bottleneck, built into the neural architecture, encourages the denoising network to partition an input into regions, denoise them in parallel, and combine the results. Our trained model generates both synthetic images and, by simple examination of its internal predicted partitions, semantic segmentations of those images. Without fine-tuning, we directly apply our unsupervised model to the downstream task of segmenting real images via noising and subsequently denoising them. Experiments demonstrate that our model achieves accurate unsupervised image segmentation and high-quality synthetic image generation across multiple datasets.", "keywords": "diffusion models;unsupervised learning;image segmentation;neural network architecture", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Xin Yuan;Michael Maire", "authorids": "~Xin_Yuan5;~Michael_Maire1", "gender": "M;M", "homepage": ";http://people.cs.uchicago.edu/~mmaire/", "dblp": "78/713-6;73/1498.html", "google_scholar": "EiD_2e0AAAAJ;HXowq5YAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Xin_Yuan5;~Michael_Maire1", "aff": "University of Chicago;University of Chicago", "aff_domain": "uchicago.edu;uchicago.edu", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nyuan2024factorized,\ntitle={Factorized Diffusion Architectures for Unsupervised Image Generation and Segmentation},\nauthor={Xin Yuan and Michael Maire},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7G362fgJFd}\n}", "github": "", "reviewers": "vzUs;FkB4;FyWe;9WST", "pdf_size": 11597579, "rating": "4;4;5;6", "confidence": "4;4;5;3", "soundness": "2;3;3;4", "novelty": "2;2;3;3", "presentation": "3;3;3;4", "wc_summary": "39;89;106;47", "wc_strengths": "26;13;48;34", "wc_weaknesses": "510;58;229;15", "wc_questions": "11;32;221;86", "wc_limitations": "1;14;73;13", "wc_review": "587;206;677;195", "wc_reply_reviewers": "0;0;90;98", "wc_reply_authors": "0;0;348;214", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;2;2", "rating_avg": [ 4.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 70.25, 28.047950014216724 ], "wc_strengths_avg": [ 30.25, 12.695963925594622 ], "wc_weaknesses_avg": [ 203.0, 194.48264704080927 ], "wc_questions_avg": [ 87.5, 81.78783528129351 ], "wc_limitations_avg": [ 25.25, 28.039035290109393 ], "wc_review_avg": [ 416.25, 218.1185170956377 ], "wc_reply_reviewers_avg": [ 47.0, 47.085029467974216 ], "wc_reply_authors_avg": [ 140.5, 148.27255309058384 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.42640143271122083, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15273693310817099879&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "uchicago.edu;uchicago.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Chicago", "aff_unique_dep": "", "aff_unique_url": "https://www.uchicago.edu", "aff_unique_abbr": "UChicago", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Neural Characteristic Activation Analysis and Geometric Parameterization for ReLU Networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96466", "id": "7HFQfRjdcn", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7HFQfRjdcn", "openreview": "https://openreview.net/forum?id=7HFQfRjdcn", "poster": "", "project": "", "author_site": "Wenlin Chen, Hong Ge", "tldr": "", "abstract": "We introduce a novel approach for analyzing the training dynamics of ReLU networks by examining the characteristic activation boundaries of individual ReLU neurons. Our proposed analysis reveals a critical instability in common neural network parameterizations and normalizations during stochastic optimization, which impedes fast convergence and hurts generalization performance. Addressing this, we propose Geometric Parameterization (GmP), a novel neural network parameterization technique that effectively separates the radial and angular components of weights in the hyperspherical coordinate system. We show theoretically that GmP resolves the aforementioned instability issue. We report empirical results on various models and benchmarks to verify GmP's advantages of optimization stability, convergence speed and generalization performance.", "keywords": "ReLU;deep learning;optimization;parameterization;normalization;neural network;training dynamics", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Wenlin Chen;Hong Ge", "authorids": "~Wenlin_Chen2;~Hong_Ge1", "gender": ";M", "homepage": "https://wenlin-chen.github.io/;", "dblp": ";31/835", "google_scholar": "https://scholar.google.com/citations?hl=en;", "orcid": ";", "linkedin": ";", "or_profile": "~Wenlin_Chen2;~Hong_Ge1", "aff": "Microsoft Research;University of Cambridge", "aff_domain": "microsoft.com;cam.ac.uk", "position": "Research Intern;Senior Research Fellow", "bibtex": "@inproceedings{\nchen2024neural,\ntitle={Neural Characteristic Activation Analysis and Geometric Parameterization for Re{LU} Networks},\nauthor={Wenlin Chen and Hong Ge},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7HFQfRjdcn}\n}", "github": "", "reviewers": "ywDS;D8e2;FLhQ;x1Bf", "pdf_size": 1501539, "rating": "3;6;7;8", "confidence": "3;3;4;2", "soundness": "1;4;3;4", "novelty": "2;3;3;4", "presentation": "2;3;3;4", "wc_summary": "72;96;154;72", "wc_strengths": "101;222;110;44", "wc_weaknesses": "259;140;61;6", "wc_questions": "139;79;122;58", "wc_limitations": "10;21;1;1", "wc_review": "581;558;448;181", "wc_reply_reviewers": "187;15;17;0", "wc_reply_authors": "898;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "5;1;1;1", "rating_avg": [ 6.0, 1.8708286933869707 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 1.224744871391589 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 98.5, 33.507461855533016 ], "wc_strengths_avg": [ 119.25, 64.49563938748108 ], "wc_weaknesses_avg": [ 116.5, 95.06445182085677 ], "wc_questions_avg": [ 99.5, 32.438403166617185 ], "wc_limitations_avg": [ 8.25, 8.227241335952167 ], "wc_review_avg": [ 442.0, 158.85055870219657 ], "wc_reply_reviewers_avg": [ 54.75, 76.63672422539993 ], "wc_reply_authors_avg": [ 224.5, 388.8454062992129 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 1.7320508075688772 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.18898223650461363, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:1AXUeSNvKKwJ:scholar.google.com/&scioq=Neural+Characteristic+Activation+Analysis+and+Geometric+Parameterization+for+ReLU+Networks&hl=en&as_sdt=0,47", "gs_version_total": 3, "email": "microsoft.com;cam.ac.uk", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Microsoft;University of Cambridge", "aff_unique_dep": "Microsoft Research;", "aff_unique_url": "https://www.microsoft.com/en-us/research;https://www.cam.ac.uk", "aff_unique_abbr": "MSR;Cambridge", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;United Kingdom" }, { "title": "Slot-VLM: Object-Event Slots for Video-Language Modeling", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96465", "id": "7Hb03vGcJk", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7Hb03vGcJk", "openreview": "https://openreview.net/forum?id=7Hb03vGcJk", "poster": "/media/PosterPDFs/NeurIPS%202024/96465.png?t=1733108987.0844655", "project": "", "author_site": "Jiaqi Xu, Cuiling Lan, Wenxuan Xie, Xuejin Chen, Yan Lu", "tldr": "", "abstract": "Video-Language Models (VLMs), powered by the advancements in Large Language Models (LLMs), are charting new frontiers in video understanding. A pivotal challenge is the development of an effective method to encapsulate video content into a set of representative tokens to align with LLMs. In this work, we introduce Slot-VLM, a new framework designed to generate semantically decomposed video tokens, in terms of object-wise and event-wise visual representations, to facilitate LLM inference. Particularly, we design an Object-Event Slots module, i.e., OE-Slots, that adaptively aggregates the dense video tokens from the vision encoder to a set of representative slots. In order to take into account both the spatial object details and the varied temporal dynamics, we build OE-Slots with two branches: the Object-Slots branch and the Event-Slots branch. The Object-Slots branch focuses on extracting object-centric slots from features of high spatial resolution but low frame sample rate, emphasizing detailed object information. The Event-Slots branch is engineered to learn event-centric slots from high temporal sample rate but low spatial resolution features. These complementary slots are combined to form the vision context, serving as the input to the LLM for effective video reasoning. Our experimental results demonstrate the effectiveness of our Slot-VLM, which achieves the state-of-the-art performance on video question-answering.", "keywords": "Video Language Modeling;Object-Centric Representation;Event-Centric Representation;Video Understanding", "primary_area": "machine_vision", "supplementary_material": "", "author": "Jiaqi Xu;Cuiling Lan;Wenxuan Xie;Xuejin Chen;Yan Lu", "authorids": "~Jiaqi_Xu3;~Cuiling_Lan1;~Wenxuan_Xie1;~Xuejin_Chen1;~Yan_Lu7", "gender": "M;F;M;;M", "homepage": "https://abcd.com;https://www.microsoft.com/en-us/research/people/culan/;https://www.microsoft.com/en-us/research/people/wenxie/;;https://www.microsoft.com/en-us/research/people/yanlu/", "dblp": ";95/8115;142/0064;;15/4830-1", "google_scholar": ";XZugqiwAAAAJ;7vjHnasAAAAJ;;djk5l-4AAAAJ", "orcid": "0009-0005-4081-736X;0000-0001-9145-9957;;;0000-0001-5383-6424", "linkedin": ";;;;", "or_profile": "~Jiaqi_Xu3;~Cuiling_Lan1;~Wenxuan_Xie1;~Xuejin_Chen1;~Yan_Lu7", "aff": "University of Science and Technology of China;Microsoft;Microsoft Research Asia;;Microsoft Research Asia", "aff_domain": "ustc.edu.cn;microsoft.com;microsoft.com;;microsoft.com", "position": "PhD student;Principal Researcher;Researcher;;Partner Research Manager", "bibtex": "@inproceedings{\nxu2024slotvlm,\ntitle={Slot-{VLM}: Object-Event Slots for Video-Language Modeling},\nauthor={Jiaqi Xu and Cuiling Lan and Wenxuan Xie and Xuejin Chen and Yan Lu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7Hb03vGcJk}\n}", "github": "", "reviewers": "wQQx;KJVB;gzPw;jNDE", "pdf_size": 3841989, "rating": "4;6;6;6", "confidence": "4;4;4;5", "soundness": "2;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "49;93;135;68", "wc_strengths": "68;38;181;73", "wc_weaknesses": "84;138;267;247", "wc_questions": "11;83;4;3", "wc_limitations": "65;67;1;7", "wc_review": "277;419;588;398", "wc_reply_reviewers": "177;239;65;168", "wc_reply_authors": "70;525;82;46", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;3;2", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 86.25, 32.18209906143476 ], "wc_strengths_avg": [ 90.0, 54.217155956394464 ], "wc_weaknesses_avg": [ 184.0, 75.78588259036111 ], "wc_questions_avg": [ 25.25, 33.48413803579241 ], "wc_limitations_avg": [ 35.0, 31.080540535840107 ], "wc_review_avg": [ 420.5, 110.8568897272515 ], "wc_reply_reviewers_avg": [ 162.25, 62.44747793145853 ], "wc_reply_authors_avg": [ 180.75, 199.17501725869135 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:w637P9OolZ0J:scholar.google.com/&scioq=Slot-VLM:+Object-Event+Slots+for+Video-Language+Modeling&hl=en&as_sdt=0,44", "gs_version_total": 3, "email": "ustc.edu.cn;microsoft.com;microsoft.com;;microsoft.com", "author_num": 5, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "University of Science and Technology of China;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "http://www.ustc.edu.cn;https://www.microsoft.com", "aff_unique_abbr": "USTC;Microsoft", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "China;United States" }, { "title": "Richelieu: Self-Evolving LLM-Based Agents for AI Diplomacy", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96464", "id": "7Jb4NJS8Yk", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7Jb4NJS8Yk", "openreview": "https://openreview.net/forum?id=7Jb4NJS8Yk", "poster": "/media/PosterPDFs/NeurIPS%202024/96464.png?t=1733495261.0084898", "project": "", "author_site": "Zhenyu Guan, Xiangyu Kong, Fangwei Zhong, Yizhou Wang", "tldr": "", "abstract": "Diplomacy is one of the most sophisticated activities in human society, involving complex interactions among multiple parties that require skills in social reasoning, negotiation, and long-term strategic planning. Previous AI agents have demonstrated their ability to handle multi-step games and large action spaces in multi-agent tasks. However, diplomacy involves a staggering magnitude of decision spaces, especially considering the negotiation stage required. While recent agents based on large language models (LLMs) have shown potential in various applications, they still struggle with extended planning periods in complex multi-agent settings. Leveraging recent technologies for LLM-based agents, we aim to explore AI's potential to create a human-like agent capable of executing comprehensive multi-agent missions by integrating three fundamental capabilities: 1) strategic planning with memory and reflection; 2) goal-oriented negotiation with social reasoning; and 3) augmenting memory through self-play games for self-evolution without human in the loop. Project page: https://sites.google.com/view/richelieu-diplomacy.", "keywords": "AI Diplomacy;LLM-based Agent;Self-play", "primary_area": "machine_learning_for_social_sciences", "supplementary_material": "", "author": "Zhenyu Guan;Xiangyu Kong;Fangwei Zhong;Yizhou Wang", "authorids": "~Zhenyu_Guan3;~Xiangyu_Kong1;~Fangwei_Zhong3;~Yizhou_Wang1", "gender": "M;M;M;M", "homepage": ";https://sites.google.com/site/pkuxykong/;https://cfcs.pku.edu.cn/wangyizhou/;https://fangweizhong.xyz/", "dblp": ";12/8442;71/3387-1;207/1900", "google_scholar": "XuI3O5wAAAAJ;https://scholar.google.com/citations?hl=zh-CN;831z_VcAAAAJ;ejDz1bYAAAAJ", "orcid": ";0000-0002-8737-0180;;0000-0002-0428-4552", "linkedin": ";;;", "or_profile": "~Zhenyu_Guan3;~Xiangyu_Kong1;~Yizhou_Wang1;~fangwei_zhong1", "aff": "Peking University;Beijing Institute for General Artificial Intelligence;Peking University;Peking University", "aff_domain": "pku.edu;bigai.ai;pku.edu.cn;pku.edu.cn", "position": "Undergrad student;Researcher;Full Professor;Postdoc", "bibtex": "@inproceedings{\nguan2024richelieu,\ntitle={Richelieu: Self-Evolving {LLM}-Based Agents for {AI} Diplomacy},\nauthor={Zhenyu Guan and Xiangyu Kong and Fangwei Zhong and Yizhou Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7Jb4NJS8Yk}\n}", "github": "", "reviewers": "raEc;EuFx;HUgn;36qq", "pdf_size": 2331306, "rating": "4;4;6;7", "confidence": "4;4;3;4", "soundness": "2;2;3;3", "novelty": "2;2;3;2", "presentation": "2;2;3;3", "wc_summary": "49;39;46;90", "wc_strengths": "33;37;48;72", "wc_weaknesses": "143;102;108;156", "wc_questions": "27;2;82;4", "wc_limitations": "1;24;4;11", "wc_review": "253;204;288;333", "wc_reply_reviewers": "0;0;15;50", "wc_reply_authors": "123;0;20;141", "reply_reviewers": "0;0;1;1", "reply_authors": "2;1;2;3", "rating_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 56.0, 19.96246477767713 ], "wc_strengths_avg": [ 47.5, 15.173990905493518 ], "wc_weaknesses_avg": [ 127.25, 22.818577957445115 ], "wc_questions_avg": [ 28.75, 32.275183965393595 ], "wc_limitations_avg": [ 10.0, 8.860022573334675 ], "wc_review_avg": [ 269.5, 47.26785376976619 ], "wc_reply_reviewers_avg": [ 16.25, 20.42516829796024 ], "wc_reply_authors_avg": [ 71.0, 61.73734688177003 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9837327785830339517&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "pku.edu;bigai.ai;pku.edu.cn;pku.edu.cn", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Peking University;Beijing Institute for General Artificial Intelligence", "aff_unique_dep": ";", "aff_unique_url": "http://www.pku.edu.cn;http://www.bigaiai.org/", "aff_unique_abbr": "Peking U;BIGAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "CALVIN: Improved Contextual Video Captioning via Instruction Tuning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96463", "id": "7Kz7icCZ6H", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7Kz7icCZ6H", "openreview": "https://openreview.net/forum?id=7Kz7icCZ6H", "poster": "", "project": "", "author_site": "Gowthami Somepalli, Arkabandhu Chowdhury, Jonas Geiping, Ronen Basri, Tom Goldstein, David Jacobs", "tldr": "", "abstract": "The recent emergence of powerful Vision-Language models (VLMs) has significantly improved image captioning. Some of these models are extended to caption videos as well. However, their capabilities to understand complex scenes are limited, and the descriptions they provide for scenes tend to be overly verbose and focused on the superficial appearance of objects. Scene descriptions, especially in movies, require a deeper contextual understanding, unlike general-purpose video captioning. To address this challenge, we propose a model, CALVIN, a specialized video LLM that leverages previous movie context to generate fully \"contextual\" scene descriptions. To achieve this, we train our model on a suite of tasks that integrate both image-based question-answering and video captioning within a unified framework, before applying instruction tuning to refine the model's ability to provide scene captions. Lastly, we observe that our model responds well to prompt engineering and few-shot in-context learning techniques, enabling the user to adapt it to any new movie with very little additional annotation.", "keywords": "contextual captioning;video understanding;ai for inclusivity", "primary_area": "machine_vision", "supplementary_material": "", "author": "Gowthami Somepalli;Arkabandhu Chowdhury;Jonas Geiping;Ronen Basri;Tom Goldstein;David W. Jacobs", "authorids": "~Gowthami_Somepalli1;~Arkabandhu_Chowdhury1;~Jonas_Geiping1;~Ronen_Basri1;~Tom_Goldstein1;~David_W._Jacobs1", "gender": "F;M;M;M;M;M", "homepage": "https://somepago.github.io/;;https://jonasgeiping.github.io/;https://www.weizmann.ac.il/math/ronen/;https://www.cs.umd.edu/~tomg/;http://www.cs.umd.edu/~djacobs", "dblp": "286/5012;95/10603;190/7229;b/RonenBasri.html;25/8184;j/DavidWJacobs.html", "google_scholar": "T2ezBDsAAAAJ;42v1i_YAAAAJ;https://scholar.google.de/citations?user=206vNCEAAAAJ;d6vuvHIAAAAJ;KmSuVtgAAAAJ;WH2KmRgAAAAJ", "orcid": ";;;;;", "linkedin": ";arkabandhu-chowdhury/;;;;", "or_profile": "~Gowthami_Somepalli1;~Arkabandhu_Chowdhury1;~Jonas_Geiping1;~Ronen_Basri1;~Tom_Goldstein1;~David_W._Jacobs1", "aff": "University of Maryland, College Park;Meta (Facebook);Max Planck Institute for Intelligent Systems, Max-Planck Institute;Meta Platforms Inc.;University of Maryland, College Park;University of Maryland, College Park", "aff_domain": "umd.edu;meta.com;tuebingen.mpg.de;meta.com;umd.edu;umd.edu", "position": "PhD student;Research Scientist;Principal Researcher;Researcher;Full Professor;Professor", "bibtex": "@inproceedings{\nsomepalli2024calvin,\ntitle={{CALVIN}: Improved Contextual Video Captioning via Instruction Tuning},\nauthor={Gowthami Somepalli and Arkabandhu Chowdhury and Jonas Geiping and Ronen Basri and Tom Goldstein and David W. Jacobs},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7Kz7icCZ6H}\n}", "github": "", "reviewers": "GsDL;Y2oh;suoE;jqA2", "pdf_size": 5995666, "rating": "5;6;6;6", "confidence": "3;4;4;3", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "90;61;128;88", "wc_strengths": "71;45;127;37", "wc_weaknesses": "65;123;323;67", "wc_questions": "32;2;78;85", "wc_limitations": "17;9;80;27", "wc_review": "275;240;736;304", "wc_reply_reviewers": "237;110;249;115", "wc_reply_authors": "54;33;396;56", "reply_reviewers": "1;1;2;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 91.75, 23.85765076448224 ], "wc_strengths_avg": [ 70.0, 35.22782990761707 ], "wc_weaknesses_avg": [ 144.5, 105.65391616026355 ], "wc_questions_avg": [ 49.25, 34.03949911499874 ], "wc_limitations_avg": [ 33.25, 27.73422975314079 ], "wc_review_avg": [ 388.75, 201.76146188011228 ], "wc_reply_reviewers_avg": [ 177.75, 65.41167709209114 ], "wc_reply_authors_avg": [ 134.75, 151.1015800711561 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:SJ_6PtvUNEwJ:scholar.google.com/&scioq=CALVIN:+Improved+Contextual+Video+Captioning+via+Instruction+Tuning&hl=en&as_sdt=0,44", "gs_version_total": 2, "email": "umd.edu;meta.com;tuebingen.mpg.de;meta.com;umd.edu;umd.edu", "author_num": 6, "aff_unique_index": "0;1;2;1;0;0", "aff_unique_norm": "University of Maryland;Meta;Max Planck Institute for Intelligent Systems", "aff_unique_dep": ";Meta Platforms, Inc.;Intelligent Systems", "aff_unique_url": "https://www/umd.edu;https://meta.com;https://www.mpi-is.mpg.de", "aff_unique_abbr": "UMD;Meta;MPI-IS", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "College Park;", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "United States;Germany" }, { "title": "Error Analysis of Spherically Constrained Least Squares Reformulation in Solving the Stackelberg Prediction Game", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96462", "id": "7L2tCirpwB", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7L2tCirpwB", "openreview": "https://openreview.net/forum?id=7L2tCirpwB", "poster": "/media/PosterPDFs/NeurIPS%202024/96462.png?t=1731671069.0470395", "project": "", "author_site": "Xiyuan Li, Weiwei Liu", "tldr": "", "abstract": "The Stackelberg prediction game (SPG) is a popular model for characterizing strategic interactions between a learner and an adversarial data provider. Although optimization problems in SPGs are often NP-hard, a notable special case involving the least squares loss (SPG-LS) has gained significant research attention recently, (Bishop et al. 2020; Wang et al. 2021; Wang et al. 2022). The latest state-of-the-art method for solving the SPG-LS problem is the spherically constrained least squares reformulation (SCLS) method proposed in the work of Wang et al. (2022). However, the lack of theoretical analysis on the error of the SCLS method limits its large-scale applications. In this paper, we investigate the estimation error between the learner obtained by the SCLS method and the actual learner. Specifically, we reframe the estimation error of the SCLS method as a Primary Optimization ($\\textbf{PO}$) problem and utilize the Convex Gaussian min-max theorem (CGMT) to transform the $\\textbf{PO}$ problem into an Auxiliary Optimization ($\\textbf{AO}$) problem. Subsequently, we provide a theoretical error analysis for the SCLS method based on this simplified $\\textbf{AO}$ problem. This analysis not only strengthens the theoretical framework of the SCLS method but also confirms the reliability of the learner produced by it. We further conduct experiments to validate our theorems, and the results are in excellent agreement with our theoretical predictions.", "keywords": "Stackelberg Prediction Game", "primary_area": "learning_theory", "supplementary_material": "", "author": "Xiyuan Li;Weiwei Liu", "authorids": "~Xiyuan_Li3;~Weiwei_Liu1", "gender": "M;M", "homepage": "https://sites.google.com/site/weiweiliuhomepage/;https://github.com/LXYambition", "dblp": "54/6677-3.html;205/4037", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;", "orcid": ";", "linkedin": "weiwei-liu-4a7849134/;", "or_profile": "~Weiwei_Liu1;~Xiyuan_Li1", "aff": "Wuhan University;Wuhan University", "aff_domain": "whu.edu.cn;whu.edu.cn", "position": "Full Professor;PhD student", "bibtex": "@inproceedings{\nli2024error,\ntitle={Error Analysis of Spherically Constrained Least Squares Reformulation in Solving the Stackelberg Prediction Game},\nauthor={Xiyuan Li and Weiwei Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7L2tCirpwB}\n}", "github": "", "reviewers": "PFXk;btrD;XvKF;rMtE", "pdf_size": 429250, "rating": "5;7;8;8", "confidence": "3;4;4;4", "soundness": "3;3;3;4", "novelty": "2;3;4;4", "presentation": "2;3;3;3", "wc_summary": "89;62;80;139", "wc_strengths": "21;109;110;116", "wc_weaknesses": "129;86;91;98", "wc_questions": "255;25;3;2", "wc_limitations": "1;1;7;9", "wc_review": "495;283;291;364", "wc_reply_reviewers": "50;17;20;17", "wc_reply_authors": "65;9;9;8", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 7.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 92.5, 28.552583070538468 ], "wc_strengths_avg": [ 89.0, 39.350984739901996 ], "wc_weaknesses_avg": [ 101.0, 16.718253497300488 ], "wc_questions_avg": [ 71.25, 106.48562109505677 ], "wc_limitations_avg": [ 4.5, 3.570714214271425 ], "wc_review_avg": [ 358.25, 85.02756905851184 ], "wc_reply_reviewers_avg": [ 26.0, 13.910427743243556 ], "wc_reply_authors_avg": [ 22.75, 24.39646490785089 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.9428090415820632, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:rGESiUtq5e8J:scholar.google.com/&scioq=Error+Analysis+of+Spherically+Constrained+Least+Squares+Reformulation+in+Solving+the+Stackelberg+Prediction+Game&hl=en&as_sdt=0,19", "gs_version_total": 2, "email": "whu.edu.cn;whu.edu.cn", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Wuhan University", "aff_unique_dep": "", "aff_unique_url": "http://www.whu.edu.cn/", "aff_unique_abbr": "WHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Error Correction Output Codes for Robust Neural Networks against Weight-errors: A Neural Tangent Kernel Point of View", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96461", "id": "7LIm53Jiic", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7LIm53Jiic", "openreview": "https://openreview.net/forum?id=7LIm53Jiic", "poster": "/media/PosterPDFs/NeurIPS%202024/96461.png?t=1730692433.942166", "project": "", "author_site": "Anlan Yu, Shusen Jing, Ning Lyu, Wujie Wen, Zhiyuan Yan", "tldr": "", "abstract": "Error correcting output code (ECOC) is a classic method that encodes binary classifiers to tackle the multi-class classification problem in decision trees and neural networks.\nAmong ECOCs, the one-hot code has become the default choice in modern deep neural networks (DNNs) due to its simplicity in decision making. However, it suffers from a significant limitation in its ability to achieve high robust accuracy, particularly in the presence of weight errors. While recent studies have experimentally demonstrated that the non-one-hot ECOCs with multi-bits error correction ability, could be a better solution, there is a notable absence of theoretical foundations that can elucidate the relationship between codeword design, weight-error magnitude, and network characteristics, so as to provide robustness guarantees. This work is positioned to bridge this gap through the lens of neural tangent kernel (NTK). We have two important theoretical findings: 1) In clean models (without weight errors), utilizing one-hot code and non-one-hot ECOC is akin to altering decoding metrics from $l_2$ distance to Mahalanobis distance. 2) In non-clean models (with weight errors), if the normalized distance exceeds a threshold, then non-clean DNNs can reach the clean model's accuracy as long as the code length approaches infinity. This threshold is determined by DNN architecture (e.g. layer number, activation), weight error magnitude, and the distance between the output and the nearest codeword. Based on these findings, we further demonstrate how to practically use them to identify optimal ECOCs for simple tasks (short-code ECOCs) and complex tasks (long-code ECOCs), by balancing the code orthogonality (as per finding 1) and code distance (as per finding 2). Extensive experimental results across four datasets and four DNN models validate the superior performance of constructed codes, guided by our findings, compared to existing ECOCs. To our best knowledge, this is the first work that provides theoretical explanations for the effectiveness of ECOCS and offers associated design guidance for optimal ECOCs specifically tailored to DNNs.", "keywords": "error correction output codes;ECOC;DNN hardware accelerator;DNN robustness", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/d572c5a04d8f55061895da7825272e00d6607a39.zip", "author": "Anlan Yu;Shusen Jing;Ning Lyu;Wujie Wen;Zhiyuan Yan", "authorids": "~Anlan_Yu1;~Shusen_Jing1;~Ning_Lyu1;~Wujie_Wen2;~Zhiyuan_Yan2", "gender": "F;;F;M;M", "homepage": ";;;https://www.lehigh.edu/~wuw219/;https://engineering.lehigh.edu/faculty/zhiyuan-yan", "dblp": ";;;70/11466.html;", "google_scholar": "hvdTK7wAAAAJ;ujhCaecAAAAJ;;QKQrD1wAAAAJ;4-FgZGAAAAAJ", "orcid": "0000-0003-4899-5118;;0000-0001-9102-6879;;", "linkedin": "anlan-yu-2a39ab139/;;;;", "or_profile": "~Anlan_Yu1;~Shusen_Jing1;~Ning_Lyu1;~Wujie_Wen2;~Zhiyuan_Yan1", "aff": "Lehigh University;Lehigh University;Lehigh University;North Carolina State University;", "aff_domain": "lehigh.edu;lehigh.edu;lehigh.edu;ncsu.edu;", "position": "PhD student;PhD student;PhD student;Associate Professor;", "bibtex": "@inproceedings{\nyu2024error,\ntitle={Error Correction Output Codes for Robust Neural Networks against Weight-errors: A Neural Tangent Kernel Point of View},\nauthor={Anlan Yu and Shusen Jing and Ning Lyu and Wujie Wen and Zhiyuan Yan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7LIm53Jiic}\n}", "github": "", "reviewers": "Vn9W;vbnL;Nfve", "pdf_size": 590164, "rating": "4;7;7", "confidence": "3;3;2", "soundness": "3;3;3", "novelty": "2;3;4", "presentation": "2;4;3", "wc_summary": "85;87;44", "wc_strengths": "99;64;61", "wc_weaknesses": "336;1;144", "wc_questions": "2;81;86", "wc_limitations": "20;1;39", "wc_review": "542;234;374", "wc_reply_reviewers": "0;7;11", "wc_reply_authors": "195;0;0", "reply_reviewers": "0;1;1", "reply_authors": "3;1;1", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 72.0, 19.8158185969358 ], "wc_strengths_avg": [ 74.66666666666667, 17.249798710580816 ], "wc_weaknesses_avg": [ 160.33333333333334, 137.2499747014751 ], "wc_questions_avg": [ 56.333333333333336, 38.47365621074013 ], "wc_limitations_avg": [ 20.0, 15.513435037626794 ], "wc_review_avg": [ 383.3333333333333, 125.91355059016571 ], "wc_reply_reviewers_avg": [ 6.0, 4.546060565661952 ], "wc_reply_authors_avg": [ 65.0, 91.92388155425118 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1645773059423825475&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": "lehigh.edu;lehigh.edu;lehigh.edu;ncsu.edu;", "author_num": 5, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Lehigh University;North Carolina State University", "aff_unique_dep": ";", "aff_unique_url": "https://www.lehigh.edu;https://www.ncsu.edu", "aff_unique_abbr": "Lehigh;NCSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "A Boosting-Type Convergence Result for AdaBoost.MH with Factorized Multi-Class Classifiers", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96460", "id": "7Lv8zHQWwS", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7Lv8zHQWwS", "openreview": "https://openreview.net/forum?id=7Lv8zHQWwS", "poster": "", "project": "", "author_site": "Xin Zou, Zhengyu Zhou, Jingyuan Xu, Weiwei Liu", "tldr": "", "abstract": "AdaBoost is a well-known algorithm in boosting. Schapire and Singer propose, an extension of AdaBoost, named AdaBoost.MH, for multi-class classification problems. K\u00e9gl shows empirically that AdaBoost.MH works better when the classical one-against-all base classifiers are replaced by factorized base classifiers containing a binary classifier and a vote (or code) vector. However, the factorization makes it much more difficult to provide a convergence result for the factorized version of AdaBoost.MH. Then, K\u00e9gl raises an open problem in COLT 2014 to look for a convergence result for the factorized AdaBoost.MH. In this work, we resolve this open problem by presenting a convergence result for AdaBoost.MH with factorized multi-class classifiers.", "keywords": "AdaBoost", "primary_area": "learning_theory", "supplementary_material": "", "author": "Xin Zou;Zhengyu Zhou;Jingyuan Xu;Weiwei Liu", "authorids": "~Xin_Zou3;~Zhengyu_Zhou1;~Jingyuan_Xu2;~Weiwei_Liu1", "gender": "M;;;M", "homepage": "https://zouxinn.github.io/;;https://github.com/fzJing;https://sites.google.com/site/weiweiliuhomepage/", "dblp": "18/6081-2;;;54/6677-3.html", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";;;", "linkedin": ";zhengyu-zhou-650534212/;;weiwei-liu-4a7849134/", "or_profile": "~Xin_Zou3;~Zhengyu_Zhou1;~Jingyuan_Xu2;~Weiwei_Liu1", "aff": "Wuhan University;, Wuhan University;Wuhan University;Wuhan University", "aff_domain": "whu.edu.cn;cs.whu.edu.cn;whut.edu.cn;whu.edu.cn", "position": "PhD student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nzou2024a,\ntitle={A Boosting-Type Convergence Result for AdaBoost.{MH} with Factorized Multi-Class Classifiers},\nauthor={Xin Zou and Zhengyu Zhou and Jingyuan Xu and Weiwei Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7Lv8zHQWwS}\n}", "github": "", "reviewers": "g6ZT;iRY2;HyZV;jA8z;BMi6", "pdf_size": 335131, "rating": "4;6;8;8;9", "confidence": "4;3;4;4;4", "soundness": "3;3;4;3;4", "novelty": "2;3;4;4;4", "presentation": "1;3;3;3;4", "wc_summary": "100;112;58;78;40", "wc_strengths": "94;10;53;106;120", "wc_weaknesses": "253;19;45;51;85", "wc_questions": "44;45;69;6;2", "wc_limitations": "1;35;1;12;7", "wc_review": "492;221;226;253;254", "wc_reply_reviewers": "106;374;18;6;17", "wc_reply_authors": "44;469;15;12;15", "reply_reviewers": "1;2;1;1;1", "reply_authors": "2;3;2;2;2", "rating_avg": [ 7.0, 1.7888543819998317 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 3.4, 0.8 ], "presentation_avg": [ 2.8, 0.9797958971132712 ], "wc_summary_avg": [ 77.6, 26.393938698117793 ], "wc_strengths_avg": [ 76.6, 40.1078545923364 ], "wc_weaknesses_avg": [ 90.6, 83.87991416304621 ], "wc_questions_avg": [ 33.2, 25.498235233050934 ], "wc_limitations_avg": [ 11.2, 12.592060990957755 ], "wc_review_avg": [ 289.2, 102.2964320003391 ], "wc_reply_reviewers_avg": [ 104.2, 139.62292075443776 ], "wc_reply_authors_avg": [ 111.0, 179.3800434831032 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.2795084971874738, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:OPkGK_Crk8kJ:scholar.google.com/&scioq=A+Boosting-Type+Convergence+Result+for+AdaBoost.MH+with+Factorized+Multi-Class+Classifiers&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "whu.edu.cn;cs.whu.edu.cn;whut.edu.cn;whu.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Wuhan University", "aff_unique_dep": "", "aff_unique_url": "http://www.whu.edu.cn/", "aff_unique_abbr": "WHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "COLD: Causal reasOning in cLosed Daily activities", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96459", "id": "7Mo1NOosNT", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7Mo1NOosNT", "openreview": "https://openreview.net/forum?id=7Mo1NOosNT", "poster": "/media/PosterPDFs/NeurIPS%202024/96459.png?t=1733544665.5335903", "project": "", "author_site": "Abhinav Joshi, areeb ahmad, Ashutosh Modi", "tldr": "", "abstract": "Large Language Models (LLMs) have shown state-of-the-art performance in a variety of tasks, including arithmetic and reasoning; however, to gauge the intellectual capabilities of LLMs, causal reasoning has become a reliable proxy for validating a general understanding of the mechanics and intricacies of the world similar to humans. Previous works in natural language processing (NLP) have either focused on open-ended causal reasoning via causal commonsense reasoning (CCR) or framed a symbolic representation-based question answering for theoretically backed-up analysis via a causal inference engine. The former adds an advantage of real-world grounding but lacks theoretically backed-up analysis/validation, whereas the latter is far from real-world grounding. In this work, we bridge this gap by proposing the COLD (Causal reasOning in cLosed Daily activities) framework, which is built upon human understanding of daily real-world activities to reason about the causal nature of events. We show that the proposed framework facilitates the creation of enormous causal queries (\u223c 9 million) and comes close to the mini-turing test, simulating causal reasoning to evaluate the understanding of a daily real-world task. We evaluate multiple LLMs on the created causal queries and find that causal reasoning is challenging even for activities trivial to humans. We further explore (the causal reasoning abilities of LLMs) using the backdoor criterion to determine the causal strength between events.", "keywords": "Causal Common Sense;Causal NLP;LLMs;Commonsense Reasoning", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Abhinav Joshi;Areeb Ahmad;Ashutosh Modi", "authorids": "~Abhinav_Joshi1;~Areeb_Ahmad2;~Ashutosh_Modi1", "gender": "M;M;M", "homepage": "https://www.cse.iitk.ac.in/users/ajoshi/;https://ashutosh-modi.github.io/;", "dblp": "308/0603;139/0873;", "google_scholar": ";AWu6f60AAAAJ;", "orcid": "0000-0001-6756-1126;;", "linkedin": ";dr-ashutosh-modi-3907835/;https://www.linkedin.com/mwlite/in/areeb-ahmad-987b56143", "or_profile": "~Abhinav_Joshi1;~Ashutosh_Modi1;~areeb_ahmad1", "aff": "Indian Institute of Technology, Kanpur;IIT Kanpur;", "aff_domain": "iitk.ac.in;iitk.ac.in;", "position": "PhD student;Assistant Professor;", "bibtex": "@inproceedings{\njoshi2024cold,\ntitle={{COLD}: Causal reasOning in cLosed Daily activities},\nauthor={Abhinav Joshi and Areeb Ahmad and Ashutosh Modi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7Mo1NOosNT}\n}", "github": "", "reviewers": "SJyu;G7bs;iBzV;LejL", "pdf_size": 957640, "rating": "5;5;5;6", "confidence": "4;3;2;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "2;2;2;3", "wc_summary": "75;79;69;143", "wc_strengths": "33;46;49;104", "wc_weaknesses": "61;251;163;329", "wc_questions": "135;126;64;9", "wc_limitations": "1;26;6;31", "wc_review": "305;528;351;616", "wc_reply_reviewers": "549;0;40;0", "wc_reply_authors": "1432;0;0;0", "reply_reviewers": "3;0;1;0", "reply_authors": "4;1;1;1", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 91.5, 29.94578434437809 ], "wc_strengths_avg": [ 58.0, 27.230497608380205 ], "wc_weaknesses_avg": [ 201.0, 99.90995946350894 ], "wc_questions_avg": [ 83.5, 50.96322203314857 ], "wc_limitations_avg": [ 16.0, 12.747548783981962 ], "wc_review_avg": [ 450.0, 126.95077786291819 ], "wc_reply_reviewers_avg": [ 147.25, 232.5245954732531 ], "wc_reply_authors_avg": [ 358.0, 620.074189109658 ], "reply_reviewers_avg": [ 1.0, 1.224744871391589 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:eCJEn-ZrA6EJ:scholar.google.com/&scioq=COLD:+Causal+reasOning+in+cLosed+Daily+activities&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "iitk.ac.in;iitk.ac.in;", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Indian Institute of Technology Kanpur", "aff_unique_dep": "", "aff_unique_url": "https://www.iitk.ac.in", "aff_unique_abbr": "IIT Kanpur", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Kanpur", "aff_country_unique_index": "0;0", "aff_country_unique": "India" }, { "title": "Boosting Semi-Supervised Scene Text Recognition via Viewing and Summarizing", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96458", "id": "7NrYnCN2be", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7NrYnCN2be", "openreview": "https://openreview.net/forum?id=7NrYnCN2be", "poster": "/media/PosterPDFs/NeurIPS%202024/96458.png?t=1731138516.9162433", "project": "", "author_site": "Yadong Qu, Yuxin Wang, Bangbang Zhou, Zixiao Wang, Hongtao Xie, Yongdong Zhang", "tldr": "", "abstract": "Existing scene text recognition (STR) methods struggle to recognize challenging texts, especially for artistic and severely distorted characters. The limitation lies in the insufficient exploration of character morphologies, including the monotonousness of widely used synthetic training data and the sensitivity of the model to character morphologies. To address these issues, inspired by the human learning process of viewing and summarizing, we facilitate the contrastive learning-based STR framework in a self-motivated manner by leveraging synthetic and real unlabeled data without any human cost. In the viewing process, to compensate for the simplicity of synthetic data and enrich character morphology diversity, we propose an Online Generation Strategy to generate background-free samples with diverse character styles. By excluding background noise distractions, the model is encouraged to focus on character morphology and generalize the ability to recognize complex samples when trained with only simple synthetic data. To boost the summarizing process, we theoretically demonstrate the derivation error in the previous character contrastive loss, which mistakenly causes the sparsity in the intra-class distribution and exacerbates ambiguity on challenging samples. Therefore, a new Character Unidirectional Alignment Loss is proposed to correct this error and unify the representation of the same characters in all samples by aligning the character features in the student model with the reference features in the teacher model. Extensive experiment results show that our method achieves SOTA performance (94.7\\% and 70.9\\% average accuracy on common benchmarks and Union14M-Benchmark). Code will be available.", "keywords": "Scene text recognition;Semi-supervised learning;Contrastive Learning", "primary_area": "machine_vision", "supplementary_material": "", "author": "Yadong Qu;Yuxin Wang;Bangbang Zhou;Zixiao Wang;Hongtao Xie;Yongdong Zhang", "authorids": "~Yadong_Qu1;~Yuxin_Wang1;~Bangbang_Zhou1;~Zixiao_Wang3;~Hongtao_Xie2;~Yongdong_Zhang2", "gender": ";M;M;M;M;M", "homepage": ";https://wangyuxin87.github.io/;https://github.com/bang123-box;http://imcc.ustc.edu.cn/main.htm;https://imcc.ustc.edu.cn/_upload/tpl/0d/13/3347/template3347/zhangyongdong.html;https://github.com/wzx99", "dblp": ";68/1041-2;;;z/YongdongZhang;", "google_scholar": "_OJlYC8AAAAJ;aBe1jXgAAAAJ;;;https://scholar.google.com.hk/citations?user=hxGs4ukAAAAJ;", "orcid": "0000-0003-0265-5011;0000-0002-0228-6220;;0000-0002-0163-9434;0000-0003-0066-3448;0000-0002-0009-5033", "linkedin": ";;;;;", "or_profile": "~Yadong_Qu1;~Yuxin_Wang1;~Bangbang_Zhou1;~Hongtao_Xie2;~Yongdong_Zhang2;~Zx_W1", "aff": "University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China", "aff_domain": "mail.ustc.edu.cn;ustc.edu.cn;mail.ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn", "position": "PhD student;Postdoc;MS student;Full Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\nqu2024boosting,\ntitle={Boosting Semi-Supervised Scene Text Recognition via Viewing and Summarizing},\nauthor={Yadong Qu and Yuxin Wang and Bangbang Zhou and Zixiao Wang and Hongtao Xie and Yongdong Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7NrYnCN2be}\n}", "github": "", "reviewers": "Smtd;9AqC;qi8n;S6t1", "pdf_size": 821824, "rating": "6;6;7;7", "confidence": "4;3;5;4", "soundness": "2;3;3;3", "novelty": "3;3;3;3", "presentation": "3;2;3;3", "wc_summary": "100;64;72;74", "wc_strengths": "92;40;78;74", "wc_weaknesses": "76;25;42;76", "wc_questions": "6;43;43;2", "wc_limitations": "9;1;4;11", "wc_review": "283;173;239;237", "wc_reply_reviewers": "30;0;53;0", "wc_reply_authors": "23;0;15;0", "reply_reviewers": "1;0;1;0", "reply_authors": "2;1;2;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 77.5, 13.518505834595775 ], "wc_strengths_avg": [ 71.0, 19.1049731745428 ], "wc_weaknesses_avg": [ 54.75, 22.083647796503186 ], "wc_questions_avg": [ 23.5, 19.551214796017153 ], "wc_limitations_avg": [ 6.25, 3.960744879438715 ], "wc_review_avg": [ 233.0, 39.21734310225516 ], "wc_reply_reviewers_avg": [ 20.75, 22.286486937155438 ], "wc_reply_authors_avg": [ 9.5, 9.912113800799505 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.7071067811865475, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:agu8JwD7fmQJ:scholar.google.com/&scioq=Boosting+Semi-Supervised+Scene+Text+Recognition+via+Viewing+and+Summarizing&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "mail.ustc.edu.cn;ustc.edu.cn;mail.ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "University of Science and Technology of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ustc.edu.cn", "aff_unique_abbr": "USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Uncovering the Redundancy in Graph Self-supervised Learning Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96457", "id": "7Ntft3U7jj", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7Ntft3U7jj", "openreview": "https://openreview.net/forum?id=7Ntft3U7jj", "poster": "/media/PosterPDFs/NeurIPS%202024/96457.png?t=1731656430.4788637", "project": "", "author_site": "Zhibiao Wang, Xiao Wang, Haoyue Deng, Nian Liu, Shirui Pan, Chunming Hu", "tldr": "", "abstract": "Graph self-supervised learning, as a powerful pre-training paradigm for Graph Neural Networks (GNNs) without labels, has received considerable attention. We have witnessed the success of graph self-supervised learning on pre-training the parameters of GNNs, leading many not to doubt that whether the learned GNNs parameters are all useful. In this paper, by presenting the experimental evidence and analysis, we surprisingly discover that the graph self-supervised learning models are highly redundant at both of neuron and layer levels, e.g., even randomly removing 51.6\\% of parameters, the performance of graph self-supervised learning models still retains at least 96.2\\%. This discovery implies that the parameters of graph self-supervised models can be largely reduced, making simultaneously fine-tuning both graph self-supervised learning models and prediction layers more feasible. Therefore, we further design a novel graph pre-training and fine-tuning paradigm called SLImming DE-correlation Fine-tuning (SLIDE). The effectiveness of SLIDE is verified through extensive experiments on various benchmarks, and the performance can be even improved with fewer parameters of models in most cases. For example, in comparison with full fine-tuning GraphMAE on Amazon-Computers dataset, even randomly reducing 40\\% of parameters, we can still achieve the improvement of 0.24\\% and 0.27\\% for Micro-F1 and Macro-F1 scores respectively.", "keywords": "Graph self-supervised learning;Model redundancy;Pre-training and fine-tuning paradigm", "primary_area": "graph_neural_networks", "supplementary_material": "/attachment/ae363f26c7894f8910bd97f22adc0e64df0d38ac.zip", "author": "Zhibiao Wang;Xiao Wang;Haoyue Deng;Nian Liu;Shirui Pan;Chunming Hu", "authorids": "~Zhibiao_Wang1;~Xiao_Wang2;~Haoyue_Deng1;~Nian_Liu3;~Shirui_Pan1;~Chunming_Hu1", "gender": "M;M;F;M;;M", "homepage": "https://github.com/zhlgg/buaaLurker.github.io;https://wangxiaocs.github.io/;https://github.com/Gatlin1111;https://liun-online.github.io/;;", "dblp": ";49/67-17;;;91/8171;03/405.html", "google_scholar": ";MnzarAQAAAAJ;;Tx8vRjUAAAAJ;https://scholar.google.com.au/citations?user=frWRJN4AAAAJ;", "orcid": ";0000-0002-4444-7811;;0009-0000-8378-1129;0000-0003-0794-527X;", "linkedin": ";;;;;", "or_profile": "~Zhibiao_Wang1;~Xiao_Wang2;~Haoyue_Deng1;~Nian_Liu3;~Shirui_Pan1;~Chunming_Hu1", "aff": "Beihang University;Beihang University;Beihang University;National University of Singapore;Griffith University;Beihang University", "aff_domain": "buaa.edu.cn;buaa.edu.cn;buaa.edu.cn;u.nus.edu;griffith.edu.au;buaa.edu.cn", "position": "MS student;Full Professor;Undergrad student;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nwang2024uncovering,\ntitle={Uncovering the Redundancy in Graph Self-supervised Learning Models},\nauthor={Zhibiao Wang and Xiao Wang and Haoyue Deng and Nian Liu and Shirui Pan and Chunming Hu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7Ntft3U7jj}\n}", "github": "", "reviewers": "WSVe;oLUe;1uus;nfbW", "pdf_size": 2778984, "rating": "5;6;7;7", "confidence": "4;4;4;4", "soundness": "3;3;3;3", "novelty": "3;3;4;4", "presentation": "3;3;3;3", "wc_summary": "111;60;69;60", "wc_strengths": "80;72;89;75", "wc_weaknesses": "199;159;59;63", "wc_questions": "82;6;2;3", "wc_limitations": "1;1;1;1", "wc_review": "473;298;220;202", "wc_reply_reviewers": "0;0;0;23", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 75.0, 21.106870919205434 ], "wc_strengths_avg": [ 79.0, 6.442049363362563 ], "wc_weaknesses_avg": [ 120.0, 60.687725282795036 ], "wc_questions_avg": [ 23.25, 33.9512518178638 ], "wc_limitations_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 298.25, 107.15030331268316 ], "wc_reply_reviewers_avg": [ 5.75, 9.959292143521045 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:8bTNcLmtyMgJ:scholar.google.com/&scioq=Uncovering+the+Redundancy+in+Graph+Self-supervised+Learning+Models&hl=en&as_sdt=0,33", "gs_version_total": 0, "email": "buaa.edu.cn;buaa.edu.cn;buaa.edu.cn;u.nus.edu;griffith.edu.au;buaa.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;1;2;0", "aff_unique_norm": "Beihang University;National University of Singapore;Griffith University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.buaa.edu.cn/;https://www.nus.edu.sg;https://www.griffith.edu.au", "aff_unique_abbr": "BUAA;NUS;Griffith", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;2;0", "aff_country_unique": "China;Singapore;Australia" }, { "title": "Learning Social Welfare Functions", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96456", "id": "7O6KtaAr8n", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7O6KtaAr8n", "openreview": "https://openreview.net/forum?id=7O6KtaAr8n", "poster": "/media/PosterPDFs/NeurIPS%202024/96456.png?t=1732474127.972038", "project": "", "author_site": "Kanad Pardeshi, Itai Shapira, Ariel Procaccia, Aarti Singh", "tldr": "", "abstract": "Is it possible to understand or imitate a policy maker's rationale by looking at past decisions they made? We formalize this question as the problem of learning social welfare functions belonging to the well-studied family of power mean functions. We focus on two learning tasks; in the first, the input is vectors of utilities of an action (decision or policy) for individuals in a group and their associated social welfare as judged by a policy maker, whereas in the second, the input is pairwise comparisons between the welfares associated with a given pair of utility vectors. We show that power mean functions are learnable with polynomial sample complexity in both cases, even if the social welfare information is noisy. Finally, we design practical algorithms for these tasks and evaluate their performance.", "keywords": "Social Choice Learning;Power Mean Functions;PAC Learning;Preference Learning;Statistical Learning Theory;Social Welfare Functions", "primary_area": "algorithmic_game_theory", "supplementary_material": "", "author": "Kanad Shrikar Pardeshi;Itai Shapira;Ariel D. Procaccia;Aarti Singh", "authorids": "~Kanad_Shrikar_Pardeshi1;~Itai_Shapira1;~Ariel_D._Procaccia1;~Aarti_Singh1", "gender": "M;M;F;M", "homepage": "https://kanpard005.github.io/;https://ishapira1.github.io/;https://www.cs.cmu.edu/~aarti;http://procaccia.info/", "dblp": ";342/2947;64/5328;p/ArielDProcaccia", "google_scholar": ";JOQXRbIAAAAJ;vGBcNVAAAAAJ;https://scholar.google.com.tw/citations?user=8ZpV-lkAAAAJ", "orcid": ";;;", "linkedin": ";itai-shapira-968362171;;", "or_profile": "~Kanad_Shrikar_Pardeshi1;~Itai_Shapira1;~Aarti_Singh1;~Ariel_Procaccia1", "aff": "Carnegie Mellon University;Harvard University, Harvard University;University of Wisconsin - Madison;Harvard University", "aff_domain": "andrew.cmu.edu;g.harvard.edu;wisc.edu;harvard.edu", "position": "MS student;PhD student;PhD student;Gordon McKay Professor of Computer Science", "bibtex": "@inproceedings{\npardeshi2024learning,\ntitle={Learning Social Welfare Functions},\nauthor={Kanad Shrikar Pardeshi and Itai Shapira and Ariel D. Procaccia and Aarti Singh},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7O6KtaAr8n}\n}", "github": "", "reviewers": "W5KA;7Nn5;1QjM;5Qbq", "pdf_size": 768651, "rating": "4;5;7;7", "confidence": "1;4;4;2", "soundness": "3;3;4;3", "novelty": "2;2;4;3", "presentation": "2;3;4;3", "wc_summary": "142;123;90;78", "wc_strengths": "97;23;22;61", "wc_weaknesses": "171;157;125;37", "wc_questions": "1;2;1;56", "wc_limitations": "13;1;1;2", "wc_review": "424;306;239;234", "wc_reply_reviewers": "417;32;5;16", "wc_reply_authors": "225;11;0;0", "reply_reviewers": "2;1;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 2.75, 1.299038105676658 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 108.25, 25.518375732009275 ], "wc_strengths_avg": [ 50.75, 30.986892390170397 ], "wc_weaknesses_avg": [ 122.5, 52.10326285368316 ], "wc_questions_avg": [ 15.0, 23.67488120350343 ], "wc_limitations_avg": [ 4.25, 5.0682837331783235 ], "wc_review_avg": [ 300.75, 76.62693716964029 ], "wc_reply_reviewers_avg": [ 117.5, 173.18270698889077 ], "wc_reply_authors_avg": [ 59.0, 95.94529691443974 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.4074074074074074, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16868570959172632721&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "andrew.cmu.edu;g.harvard.edu;wisc.edu;harvard.edu", "author_num": 4, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "Carnegie Mellon University;Harvard University;University of Wisconsin-Madison", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cmu.edu;https://www.harvard.edu;https://www.wisc.edu", "aff_unique_abbr": "CMU;Harvard;UW-Madison", "aff_campus_unique_index": "1", "aff_campus_unique": ";Madison", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Great Minds Think Alike: The Universal Convergence Trend of Input Salience", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96455", "id": "7PORYhql4V", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7PORYhql4V", "openreview": "https://openreview.net/forum?id=7PORYhql4V", "poster": "", "project": "", "author_site": "Yipei Wang, Jeffrey Siskind, Xiaoqian Wang", "tldr": "", "abstract": "Uncertainty is introduced in optimized DNNs through stochastic algorithms, forming specific distributions. Training models can be seen as random sampling from this distribution of optimized models. In this work, we study the distribution of optimized DNNs as a family of functions by leveraging a pointwise approach. We focus on the input saliency maps, as the input gradient field is decisive to the models' mathematical essence. Our investigation of saliency maps reveals a counter-intuitive trend: two stochastically optimized models tend to resemble each other more as either of their capacities increases. Therefore, we hypothesize several properties of these distributions, suggesting that (1) Within the same model architecture (e.g., CNNs, ResNets), different family variants (e.g., varying capacities) tend to align in terms of their population mean directions of the input salience. And (2) the distributions of optimized models follow a convergence trend to their shared population mean as the capacity increases. Furthermore, we also propose semi-parametric distributions based on the Saw distribution to model the convergence trend, satisfying all the counter-intuitive observations. Our experiments shed light on the significant implications of our hypotheses in various application domains, including black-box attacks, deep ensembles, etc. These findings not only enhance our understanding of DNN behaviors but also offer valuable insights for their practical application in diverse areas of deep learning.", "keywords": "explainable artificial intelligence;saliency maps;model distributions", "primary_area": "interpretability_and_explainability", "supplementary_material": "/attachment/b38d8f34e26c397da966bd3e4d1e8eecd853fd78.zip", "author": "Yipei Wang;Jeffrey Mark Siskind;Xiaoqian Wang", "authorids": "~Yipei_Wang1;~Jeffrey_Mark_Siskind1;~Xiaoqian_Wang1", "gender": "M;M;F", "homepage": "https://yipei-wang.github.io;http://engineering.purdue.edu/~qobi;https://engineering.purdue.edu/~joywang/", "dblp": "140/2763;49/5331;151/3215-1", "google_scholar": "NXENco8AAAAJ;CgSBtPYAAAAJ;I3tc214AAAAJ", "orcid": ";0000-0002-0105-6503;", "linkedin": ";;", "or_profile": "~Yipei_Wang1;~Jeffrey_Mark_Siskind1;~Xiaoqian_Wang1", "aff": "Purdue University;Purdue University;Purdue University", "aff_domain": "purdue.edu;purdue.edu;purdue.edu", "position": "PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nwang2024great,\ntitle={Great Minds Think Alike: The Universal Convergence Trend of Input Salience},\nauthor={Yipei Wang and Jeffrey Mark Siskind and Xiaoqian Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7PORYhql4V}\n}", "github": "", "reviewers": "We4a;YpLK;fWRt;RiRS", "pdf_size": 7725666, "rating": "4;5;6;7", "confidence": "4;4;2;3", "soundness": "3;2;3;3", "novelty": "2;2;2;3", "presentation": "2;3;2;2", "wc_summary": "155;101;57;82", "wc_strengths": "76;24;34;56", "wc_weaknesses": "526;175;95;297", "wc_questions": "72;288;55;74", "wc_limitations": "45;11;10;1", "wc_review": "874;599;251;510", "wc_reply_reviewers": "25;253;338;13", "wc_reply_authors": "59;1014;774;0", "reply_reviewers": "1;2;1;1", "reply_authors": "2;3;3;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 98.75, 36.030369134939484 ], "wc_strengths_avg": [ 47.5, 20.11839953873071 ], "wc_weaknesses_avg": [ 273.25, 162.6904653014429 ], "wc_questions_avg": [ 122.25, 95.98014117514101 ], "wc_limitations_avg": [ 16.75, 16.768646337734005 ], "wc_review_avg": [ 558.5, 222.53595215155684 ], "wc_reply_reviewers_avg": [ 157.25, 141.5421756933247 ], "wc_reply_authors_avg": [ 461.75, 440.9934098147046 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.674199862463242, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:PioAEcXrxRoJ:scholar.google.com/&scioq=Great+Minds+Think+Alike:+The+Universal+Convergence+Trend+of+Input+Salience&hl=en&as_sdt=0,33", "gs_version_total": 0, "email": "purdue.edu;purdue.edu;purdue.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Purdue University", "aff_unique_dep": "", "aff_unique_url": "https://www.purdue.edu", "aff_unique_abbr": "Purdue", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Doubly Mild Generalization for Offline Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96454", "id": "7QG9R8urVy", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7QG9R8urVy", "openreview": "https://openreview.net/forum?id=7QG9R8urVy", "poster": "/media/PosterPDFs/NeurIPS%202024/96454.png?t=1732870445.660273", "project": "", "author_site": "Yixiu Mao, Qi Wang, Yun Qu, Yuhang Jiang, Xiangyang Ji", "tldr": "", "abstract": "Offline Reinforcement Learning (RL) suffers from the extrapolation error and value overestimation. From a generalization perspective, this issue can be attributed to the over-generalization of value functions or policies towards out-of-distribution (OOD) actions. Significant efforts have been devoted to mitigating such generalization, and recent in-sample learning approaches have further succeeded in entirely eschewing it. Nevertheless, we show that mild generalization beyond the dataset can be trusted and leveraged to improve performance under certain conditions. To appropriately exploit generalization in offline RL, we propose Doubly Mild Generalization (DMG), comprising (i) mild action generalization and (ii) mild generalization propagation. The former refers to selecting actions in a close neighborhood of the dataset to maximize the Q values. Even so, the potential erroneous generalization can still be propagated, accumulated, and exacerbated by bootstrapping. In light of this, the latter concept is introduced to mitigate the generalization propagation without impeding the propagation of RL learning signals. Theoretically, DMG guarantees better performance than the in-sample optimal policy in the oracle generalization scenario. Even under worst-case generalization, DMG can still control value overestimation at a certain level and lower bound the performance. Empirically, DMG achieves state-of-the-art performance across Gym-MuJoCo locomotion tasks and challenging AntMaze tasks. Moreover, benefiting from its flexibility in both generalization aspects, DMG enjoys a seamless transition from offline to online learning and attains strong online fine-tuning performance.", "keywords": "offline reinforcement learning", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Yixiu Mao;Cheems Wang;Yun Qu;Yuhang Jiang;Xiangyang Ji", "authorids": "~Yixiu_Mao2;~Cheems_Wang1;~Yun_Qu2;~Yuhang_Jiang3;~Xiangyang_Ji1", "gender": "M;;M;;", "homepage": ";;https://github.com/cloud-qu;;", "dblp": "280/1045;;80/10774-2;239/4567;", "google_scholar": ";;l9Ky9goAAAAJ;https://scholar.google.com/citations?hl=en;", "orcid": "0009-0000-7302-5039;;0009-0000-1803-8435;;", "linkedin": ";;;;", "or_profile": "~Yixiu_Mao2;~Cheems_Wang1;~Yun_Qu2;~Yuhang_Jiang3;~Xiangyang_Ji1", "aff": "Tsinghua University;;Tsinghua University;Tsinghua University;", "aff_domain": "mails.tsinghua.edu.cn;;tsinghua.edu.cn;tsinghua.edu.cn;", "position": "PhD student;;PhD student;PhD student;", "bibtex": "@inproceedings{\nmao2024doubly,\ntitle={Doubly Mild Generalization for Offline Reinforcement Learning},\nauthor={Yixiu Mao and Cheems Wang and Yun Qu and Yuhang Jiang and Xiangyang Ji},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7QG9R8urVy}\n}", "github": "", "reviewers": "iL9e;RKt1;kJjy;Kscy", "pdf_size": 561816, "rating": "6;6;6;7", "confidence": "4;4;5;4", "soundness": "3;4;3;3", "novelty": "3;3;3;3", "presentation": "4;3;3;4", "wc_summary": "44;75;31;153", "wc_strengths": "77;29;42;136", "wc_weaknesses": "119;108;59;85", "wc_questions": "160;6;38;103", "wc_limitations": "8;14;6;9", "wc_review": "408;232;176;486", "wc_reply_reviewers": "29;20;18;17", "wc_reply_authors": "30;32;17;19", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 75.75, 47.378133141777546 ], "wc_strengths_avg": [ 71.0, 41.430664971733194 ], "wc_weaknesses_avg": [ 92.75, 23.025800746119558 ], "wc_questions_avg": [ 76.75, 59.428002658679354 ], "wc_limitations_avg": [ 9.25, 2.947456530637899 ], "wc_review_avg": [ 325.5, 126.15367612558899 ], "wc_reply_reviewers_avg": [ 21.0, 4.743416490252569 ], "wc_reply_authors_avg": [ 24.5, 6.576473218982953 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3829601964527840403&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "mails.tsinghua.edu.cn;;tsinghua.edu.cn;tsinghua.edu.cn;", "author_num": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "RouterDC: Query-Based Router by Dual Contrastive Learning for Assembling Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96453", "id": "7RQvjayHrM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7RQvjayHrM", "openreview": "https://openreview.net/forum?id=7RQvjayHrM", "poster": "/media/PosterPDFs/NeurIPS%202024/96453.png?t=1732013831.677801", "project": "", "author_site": "Shuhao Chen, Weisen Jiang, Baijiong Lin, James Kwok, Yu Zhang", "tldr": "", "abstract": "Recent works show that assembling multiple off-the-shelf large language models (LLMs) can harness their complementary abilities. To achieve this, routing is a promising method, which learns a router to select the most suitable LLM for each query. However, existing routing models are ineffective when multiple LLMs perform well for a query. To address this problem, in this paper, we propose a method called query-based Router by Dual Contrastive learning (RouterDC). The RouterDC model, which consists of an encoder and LLM embeddings, is trained by two proposed contrastive losses (sample-LLM and sample-sample losses). Experimental results show that RouterDC is effective in assembling LLMs and largely outperforms individual top-performing LLMs as well as existing routing methods on both in-distribution (+2.76\\%) and out-of-distribution (+1.90\\%) tasks. The source code is available at https://github.com/shuhao02/RouterDC.", "keywords": "LLM Routing; Large Language Models;", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Shuhao Chen;Weisen Jiang;Baijiong Lin;James Kwok;Yu Zhang", "authorids": "~Shuhao_Chen1;~Weisen_Jiang1;~Baijiong_Lin1;~James_Kwok1;~Yu_Zhang3", "gender": "M;M;M;;M", "homepage": "https://scholar.google.com/citations?user=YqX_IbAAAAAJ&hl=zh-CN;https://wayson-ust.github.io/;https://baijiong-lin.github.io/;;http://cse.sustech.edu.cn/faculty/~zhangy/", "dblp": "43/2127;302/7625;279/2950;;50/671-6", "google_scholar": "YqX_IbAAAAAJ;https://scholar.google.com/citations?hl=en;KVdbYTYAAAAJ;;https://scholar.google.com.hk/citations?user=jaRS5w4AAAAJ", "orcid": "0009-0002-0410-5961;;0000-0002-4257-0226;;", "linkedin": ";;;;", "or_profile": "~Shuhao_Chen1;~Weisen_Jiang1;~Baijiong_Lin1;~James_Kwok1;~Yu_Zhang3", "aff": "Southern University of Science and Technology;Hong Kong University of Science and Technology;The Hong Kong University of Science and Technology (Guangzhou);;Southern University of Science and Technology", "aff_domain": "sustech.edu;ust.hk;connect.hkust-gz.edu.cn;;sustc.edu.cn", "position": "MS student;PhD student;PhD student;;Associate Professor", "bibtex": "@inproceedings{\nchen2024routerdc,\ntitle={Router{DC}: Query-Based Router by Dual Contrastive Learning for Assembling Large Language Models},\nauthor={Shuhao Chen and Weisen Jiang and Baijiong Lin and James Kwok and Yu Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7RQvjayHrM}\n}", "github": "", "reviewers": "ytgM;GGL7;4ok1;cnpT", "pdf_size": 1932192, "rating": "5;6;7;7", "confidence": "5;4;4;4", "soundness": "2;3;3;4", "novelty": "2;2;3;3", "presentation": "3;3;4;3", "wc_summary": "55;61;99;85", "wc_strengths": "38;37;65;102", "wc_weaknesses": "129;129;165;72", "wc_questions": "4;146;264;44", "wc_limitations": "1;32;21;11", "wc_review": "227;405;614;314", "wc_reply_reviewers": "0;197;165;0", "wc_reply_authors": "58;733;359;0", "reply_reviewers": "0;2;1;0", "reply_authors": "2;3;2;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 75.0, 17.832554500127006 ], "wc_strengths_avg": [ 60.5, 26.462237244798484 ], "wc_weaknesses_avg": [ 123.75, 33.29695932063467 ], "wc_questions_avg": [ 114.5, 100.65162691183883 ], "wc_limitations_avg": [ 16.25, 11.519006033508273 ], "wc_review_avg": [ 390.0, 143.82802230441743 ], "wc_reply_reviewers_avg": [ 90.5, 91.20444068136156 ], "wc_reply_authors_avg": [ 287.5, 291.07945650629483 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10951766659955776549&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "sustech.edu;ust.hk;connect.hkust-gz.edu.cn;;sustc.edu.cn", "author_num": 5, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Southern University of Science and Technology;Hong Kong University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.sustech.edu.cn;https://www.ust.hk", "aff_unique_abbr": "SUSTech;HKUST", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Hong Kong SAR;Guangzhou", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "You Don\u2019t Need Domain-Specific Data Augmentations When Scaling Self-Supervised Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96452", "id": "7RwKMRMNrc", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7RwKMRMNrc", "openreview": "https://openreview.net/forum?id=7RwKMRMNrc", "poster": "/media/PosterPDFs/NeurIPS%202024/96452.png?t=1733765778.8241425", "project": "", "author_site": "Th\u00e9o Moutakanni, Maxime Oquab, Marc Szafraniec, Maria Vakalopoulou, Piotr Bojanowski", "tldr": "", "abstract": "Self-Supervised learning (SSL) with Joint-Embedding Architectures (JEA) has led to outstanding performances. All instantiations of this paradigm were trained using strong and well-established hand-crafted data augmentations, leading to the general belief that they are required for the proper training and performance of such models. On the other hand, generative reconstruction-based models such as BEIT and MAE or Joint-Embedding Predictive Architectures such as I-JEPA have shown strong performance without using data augmentations except masking. In this work, we challenge the importance of invariance and data-augmentation in JEAs at scale. By running a case-study on a recent SSL foundation model -- DINOv2 -- we show that strong image representations can be obtained with JEAs and only cropping without resizing provided the training data is large enough, reaching state-of-the-art results and using the least amount of augmentation in the literature. Through this study, we also discuss the impact of compute constraints on the outcomes of experimental deep learning research, showing that they can lead to very different conclusions.", "keywords": "dinov2;self-supervised learning;data-augmentations;scaling law", "primary_area": "machine_vision", "supplementary_material": "", "author": "Th\u00e9o Moutakanni;Maxime Oquab;Marc Szafraniec;Maria Vakalopoulou;Piotr Bojanowski", "authorids": "~Th\u00e9o_Moutakanni1;~Maxime_Oquab1;~Marc_Szafraniec1;~Maria_Vakalopoulou1;~Piotr_Bojanowski1", "gender": "M;;M;F;M", "homepage": ";;;;", "dblp": "344/5453;151/8880;205/3119;169/9108.html;142/2542", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.fr/citations?user=5vteYV8AAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.gr/citations?hl=el;https://scholar.google.fr/citations?user=lJ_oh2EAAAAJ", "orcid": ";;;0000-0003-0791-1264;", "linkedin": "theo-moutakanni/;;marc-szafraniec-ab429995/;maria-vakalopoulou-8a709395/;piotr-bojanowski-9a94402a", "or_profile": "~Th\u00e9o_Moutakanni1;~Maxime_Oquab1;~Marc_Szafraniec1;~Maria_Vakalopoulou1;~Piotr_Bojanowski1", "aff": "Meta;Meta;Meta Facebook;CentraleSupelec;Meta", "aff_domain": "meta.com;meta.com;fb.com;centralesupelec.fr;meta.com", "position": "PhD student;Research Scientist;Researcher;Assistant Professor;Researcher", "bibtex": "@inproceedings{\nmoutakanni2024you,\ntitle={You Don{\\textquoteright}t Need Domain-Specific Data Augmentations When Scaling Self-Supervised Learning},\nauthor={Th{\\'e}o Moutakanni and Maxime Oquab and Marc Szafraniec and Maria Vakalopoulou and Piotr Bojanowski},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7RwKMRMNrc}\n}", "github": "", "reviewers": "Jk8X;hWoD;PAfm", "pdf_size": 9825169, "rating": "5;5;7", "confidence": "4;5;4", "soundness": "3;2;3", "novelty": "3;3;3", "presentation": "3;4;4", "wc_summary": "195;85;128", "wc_strengths": "179;60;80", "wc_weaknesses": "296;315;87", "wc_questions": "35;3;75", "wc_limitations": "5;8;1", "wc_review": "710;471;371", "wc_reply_reviewers": "74;0;6", "wc_reply_authors": "31;0;17", "reply_reviewers": "1;0;1", "reply_authors": "2;1;2", "rating_avg": [ 5.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 136.0, 45.2621990922521 ], "wc_strengths_avg": [ 106.33333333333333, 52.027770362460174 ], "wc_weaknesses_avg": [ 232.66666666666666, 103.2935407252339 ], "wc_questions_avg": [ 37.666666666666664, 29.4542960458327 ], "wc_limitations_avg": [ 4.666666666666667, 2.8674417556808756 ], "wc_review_avg": [ 517.3333333333334, 142.2212673579057 ], "wc_reply_reviewers_avg": [ 26.666666666666668, 33.559234529741914 ], "wc_reply_authors_avg": [ 16.0, 12.675435561221029 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6040736199921026591&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "meta.com;meta.com;fb.com;centralesupelec.fr;meta.com", "author_num": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Meta;CentraleSup\u00e9lec", "aff_unique_dep": "Meta Platforms, Inc.;", "aff_unique_url": "https://meta.com;https://www.centralesupelec.fr", "aff_unique_abbr": "Meta;CS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "United States;France" }, { "title": "Overfitting Behaviour of Gaussian Kernel Ridgeless Regression: Varying Bandwidth or Dimensionality", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96451", "id": "7Sh0XkN1KS", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7Sh0XkN1KS", "openreview": "https://openreview.net/forum?id=7Sh0XkN1KS", "poster": "/media/PosterPDFs/NeurIPS%202024/96451.png?t=1731727281.866425", "project": "", "author_site": "Marko Medvedev, Gal Vardi, Nati Srebro", "tldr": "", "abstract": "We consider the overfitting behavior of minimum norm interpolating solutions of Gaussian kernel ridge regression (i.e. kernel ridgeless regression), when the bandwidth or input dimension varies with the sample size. For fixed dimensions, we show that even with varying or tuned bandwidth, the ridgeless solution is never consistent and, at least with large enough noise, always worse than the null predictor. For increasing dimension, we give a generic characterization of the overfitting behavior for any scaling of the dimension with sample size. We use this to provide the first example of benign overfitting using the Gaussian kernel with sub-polynomial scaling dimension. All our results are under the Gaussian universality ansatz and the (non-rigorous) risk predictions in terms of the kernel eigenstructure.", "keywords": "Kernel ridge regression;benign overfitting;tempered overfitting;Gaussian kernel", "primary_area": "learning_theory", "supplementary_material": "", "author": "Marko Medvedev;Gal Vardi;Nathan Srebro", "authorids": "~Marko_Medvedev1;~Gal_Vardi1;~Nathan_Srebro1", "gender": "M;M;M", "homepage": "https://math.uchicago.edu/~medvedev/;https://sites.google.com/view/galvardi/home;http://ttic.uchicago.edu/~nati/", "dblp": ";https://dblp.uni-trier.de/pid/167/9638.html;50/3633", "google_scholar": "ftjH16EAAAAJ;https://scholar.google.co.il/citations?hl=en;https://scholar.google.com.tw/citations?user=ZnT-QpMAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Marko_Medvedev1;~Gal_Vardi1;~Nathan_Srebro1", "aff": "University of Chicago;Toyota Technological Institute at Chicago;University of Chicago", "aff_domain": "uchicago.edu;ttic.edu;uchicago.edu", "position": "PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\nmedvedev2024overfitting,\ntitle={Overfitting Behaviour of Gaussian Kernel Ridgeless Regression: Varying Bandwidth or Dimensionality},\nauthor={Marko Medvedev and Gal Vardi and Nathan Srebro},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7Sh0XkN1KS}\n}", "github": "", "reviewers": "zGqe;dZGD;FXXZ;cvRM;Ymgo", "pdf_size": 916807, "rating": "3;5;6;7;7", "confidence": "4;4;4;4;3", "soundness": "2;2;2;3;3", "novelty": "1;2;2;3;3", "presentation": "3;2;2;4;3", "wc_summary": "116;86;192;183;240", "wc_strengths": "5;24;126;140;53", "wc_weaknesses": "169;60;599;133;56", "wc_questions": "18;25;3;54;38", "wc_limitations": "23;1;5;1;3", "wc_review": "331;196;925;511;390", "wc_reply_reviewers": "83;118;78;13;90", "wc_reply_authors": "38;203;318;0;18", "reply_reviewers": "1;2;2;1;1", "reply_authors": "2;2;3;1;2", "rating_avg": [ 5.6, 1.4966629547095764 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "novelty_avg": [ 2.2, 0.7483314773547882 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 163.4, 55.33028104031282 ], "wc_strengths_avg": [ 69.6, 54.15754795040115 ], "wc_weaknesses_avg": [ 203.4, 202.4495986659396 ], "wc_questions_avg": [ 27.6, 17.3735431043872 ], "wc_limitations_avg": [ 6.6, 8.333066662399863 ], "wc_review_avg": [ 470.6, 248.79758841274966 ], "wc_reply_reviewers_avg": [ 76.4, 34.5867026471157 ], "wc_reply_authors_avg": [ 115.4, 124.51120431511374 ], "reply_reviewers_avg": [ 1.4, 0.4898979485566356 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.4677071733467426, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15521520239660994914&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "uchicago.edu;ttic.edu;uchicago.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Chicago;Toyota Technological Institute at Chicago", "aff_unique_dep": ";", "aff_unique_url": "https://www.uchicago.edu;https://www.tti-chicago.org", "aff_unique_abbr": "UChicago;TTI Chicago", "aff_campus_unique_index": "1", "aff_campus_unique": ";Chicago", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "7Su7gAei1l", "title": "SymmetricDiffusers: Learning Discrete Diffusion on Finite Symmetric Groups", "track": "main", "status": "Reject", "tldr": "", "abstract": "Finite symmetric groups $S_n$ are essential in fields such as combinatorics, physics, and chemistry. However, learning a probability distribution over $S_n$ poses significant challenges due to its intractable size and discrete nature. In this paper, we introduce SymmetricDiffusers, a novel discrete diffusion model that simplifies the task of learning a complicated distribution over $S_n$ by decomposing it into learning simpler transitions of the reverse diffusion using deep neural networks. We identify the riffle shuffle as an effective forward transition and provide empirical guidelines for selecting the diffusion length based on the theory of random walks on finite groups. Additionally, we propose a generalized Plackett-Luce (PL) distribution for the reverse transition, which is provably more expressive than the PL distribution. We further introduce a theoretically grounded \"denoising schedule\" to improve sampling and learning efficiency. Extensive experiments show that our model achieves state-of-the-art or comparable performances on solving tasks including sorting 4-digit MNIST images, jigsaw puzzles, and traveling salesman problems.", "keywords": "Finite Symmetric Groups;Discrete Diffusion;Permutation;Riffle Shuffles;the Plackett-Luce Distribution;Sorting;Jigsaw Puzzle", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/4f96603d441054dff53d3a4eb001fb1bf0c0f9f9.zip", "author": "Yongxing Zhang;Donglin Yang;Renjie Liao", "authorids": "~Yongxing_Zhang1;~Donglin_Yang1;~Renjie_Liao1", "gender": "M;M;M", "homepage": ";https://linydthu.github.io/;https://lrjconan.github.io/", "dblp": ";;08/8180", "google_scholar": ";jg2atr8AAAAJ;2wrS35MAAAAJ", "orcid": ";;", "linkedin": "yongxing-zhang-a980a8195/;donglin-yang-473635212/;", "or_profile": "~Yongxing_Zhang1;~Donglin_Yang1;~Renjie_Liao1", "aff": "University of Waterloo;The University of British Columbia;Department of Electrical and Computer Engineering, The University of British Columbia", "aff_domain": "uwaterloo.ca;ubc.ca;ece.ubc.ca", "position": "Undergrad student;PhD student;Assistant Professor", "bibtex": "@misc{\nanonymous2024symmetricdiffusers,\ntitle={SymmetricDiffusers: Learning Discrete Diffusion on Finite Symmetric Groups},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=7Su7gAei1l}\n}", "github": "", "project": "", "reviewers": "UKH2;1hYZ;fFZa;1BDf", "site": "https://openreview.net/forum?id=7Su7gAei1l", "pdf_size": 1242831, "rating": "2;7;7;7", "confidence": "5;1;4;4", "soundness": "1;3;3;3", "novelty": "1;3;3;3", "presentation": "3;3;3;4", "wc_summary": "104;15;69;47", "wc_strengths": "23;15;119;95", "wc_weaknesses": "122;15;275;312", "wc_questions": "122;15;72;6", "wc_limitations": "122;15;16;1", "wc_review": "493;75;551;461", "wc_reply_reviewers": "889;0;223;27", "wc_reply_authors": "607;0;49;17", "reply_reviewers": "6;0;1;1", "reply_authors": "4;0;2;2", "rating_avg": [ 5.75, 2.165063509461097 ], "confidence_avg": [ 3.5, 1.5 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 58.75, 32.42202183701689 ], "wc_strengths_avg": [ 63.0, 44.8998886412873 ], "wc_weaknesses_avg": [ 181.0, 119.40896113776387 ], "wc_questions_avg": [ 53.75, 46.83147979724749 ], "wc_limitations_avg": [ 38.5, 48.57211133973898 ], "wc_review_avg": [ 395.0, 187.54732736032258 ], "wc_reply_reviewers_avg": [ 284.75, 359.321843894857 ], "wc_reply_authors_avg": [ 168.25, 253.9226013965673 ], "reply_reviewers_avg": [ 2.0, 2.345207879911715 ], "reply_authors_avg": [ 2.0, 1.4142135623730951 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5773502691896258, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:u8R9lL4WkI4J:scholar.google.com/&scioq=SymmetricDiffusers:+Learning+Discrete+Diffusion+on+Finite+Symmetric+Groups&hl=en&as_sdt=0,34", "gs_version_total": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "University of Waterloo;University of British Columbia", "aff_unique_dep": ";", "aff_unique_url": "https://uwaterloo.ca;https://www.ubc.ca", "aff_unique_abbr": "UW;UBC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Vancouver", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Canada" }, { "title": "Stable Minima Cannot Overfit in Univariate ReLU Networks: Generalization by Large Step Sizes", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96450", "id": "7Swrtm9Qsp", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7Swrtm9Qsp", "openreview": "https://openreview.net/forum?id=7Swrtm9Qsp", "poster": "", "project": "", "author_site": "Dan Qiao, Kaiqi Zhang, Esha Singh, Daniel Soudry, Yu-Xiang Wang", "tldr": "", "abstract": "We study the generalization of two-layer ReLU neural networks in a univariate nonparametric regression problem with noisy labels. This is a problem where kernels (\\emph{e.g.} NTK) are provably sub-optimal and benign overfitting does not happen, thus disqualifying existing theory for interpolating (0-loss, global optimal) solutions. We present a new theory of generalization for local minima that gradient descent with a constant learning rate can \\emph{stably} converge to. We show that gradient descent with a fixed learning rate $\\eta$ can only find local minima that represent smooth functions with a certain weighted \\emph{first order total variation} bounded by $1/\\eta - 1/2 + \\widetilde{O}(\\sigma + \\sqrt{\\mathrm{MSE}})$ where $\\sigma$ is the label noise level, $\\mathrm{MSE}$ is short for mean squared error against the ground truth, and $\\widetilde{O}(\\cdot)$ hides a logarithmic factor. Under mild assumptions, we also prove a nearly-optimal MSE bound of $\\widetilde{O}(n^{-4/5})$ within the strict interior of the support of the $n$ data points. Our theoretical results are validated by extensive simulation that demonstrates large learning rate training induces sparse linear spline fits. To the best of our knowledge, we are the first to obtain generalization bound via minima stability in the non-interpolation case and the first to show ReLU NNs without regularization can achieve near-optimal rates in nonparametric regression.", "keywords": "Generalization bound;minima stability;gradient descent;large learning rate;ReLU neural network", "primary_area": "optimization_for_deep_networks", "supplementary_material": "/attachment/f904a3c8806d87f942853224e6b00fdd2208880d.zip", "author": "Dan Qiao;Kaiqi Zhang;Esha Singh;Daniel Soudry;Yu-Xiang Wang", "authorids": "~Dan_Qiao1;~Kaiqi_Zhang2;~Esha_Singh1;~Daniel_Soudry1;~Yu-Xiang_Wang1", "gender": "M;M;F;M;", "homepage": ";;https://esha-singh.github.io/;https://soudry.github.io/;http://www.cs.ucsb.edu/~yuxiangw/publications.html", "dblp": ";;284/9252.html;126/1779;62/1637-3.html", "google_scholar": "EyfAUuUAAAAJ;XA7np8gAAAAJ;8w_EDpgAAAAJ;https://scholar.google.co.il/citations?user=AEBWEm8AAAAJ;HGNZ1fkAAAAJ", "orcid": ";;;0000-0001-9368-6352;", "linkedin": ";;esha-singh-582a17116/;daniel-soudry-2aa3a88/;", "or_profile": "~Dan_Qiao1;~Kaiqi_Zhang2;~Esha_Singh1;~Daniel_Soudry1;~Yu-Xiang_Wang1", "aff": ", University of California, Santa Barbara;UC Santa Barbara;University of California, Santa Barbara;Technion - Israel Institute of Technology, Technion;UC Santa Barbara", "aff_domain": "cs.ucsb.edu;ucsb.edu;ucsb.edu;technion.ac.il;ucsb.edu", "position": "PhD student;PhD student;PhD student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nqiao2024stable,\ntitle={Stable Minima Cannot Overfit in Univariate Re{LU} Networks: Generalization by Large Step Sizes},\nauthor={Dan Qiao and Kaiqi Zhang and Esha Singh and Daniel Soudry and Yu-Xiang Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7Swrtm9Qsp}\n}", "github": "", "reviewers": "qFzY;2Lhx;3THJ;4Yyh", "pdf_size": 1398513, "rating": "6;7;8;8", "confidence": "3;4;4;3", "soundness": "4;3;4;3", "novelty": "3;3;4;3", "presentation": "4;3;4;4", "wc_summary": "154;44;68;98", "wc_strengths": "85;18;102;54", "wc_weaknesses": "335;158;23;70", "wc_questions": "60;76;115;12", "wc_limitations": "4;7;4;24", "wc_review": "638;303;312;258", "wc_reply_reviewers": "205;13;11;13", "wc_reply_authors": "380;59;10;64", "reply_reviewers": "1;1;1;1", "reply_authors": "3;3;2;3", "rating_avg": [ 7.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 91.0, 41.09744517606904 ], "wc_strengths_avg": [ 64.75, 32.01074038506451 ], "wc_weaknesses_avg": [ 146.5, 119.13123016237178 ], "wc_questions_avg": [ 65.75, 36.92136915121107 ], "wc_limitations_avg": [ 9.75, 8.317902379807062 ], "wc_review_avg": [ 377.75, 151.64164170833814 ], "wc_reply_reviewers_avg": [ 60.5, 83.43110930582189 ], "wc_reply_authors_avg": [ 128.25, 146.871329741376 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10679247062742553843&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "cs.ucsb.edu;ucsb.edu;ucsb.edu;technion.ac.il;ucsb.edu", "author_num": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "University of California, Santa Barbara;Technion - Israel Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucsb.edu;https://www.technion.ac.il", "aff_unique_abbr": "UCSB;Technion", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Santa Barbara;", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "United States;Israel" }, { "title": "IaC-Eval: A Code Generation Benchmark for Cloud Infrastructure-as-Code Programs", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97835", "id": "7TCK0aBL1C", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7TCK0aBL1C", "openreview": "https://openreview.net/forum?id=7TCK0aBL1C", "poster": "/media/PosterPDFs/NeurIPS%202024/97835.png?t=1734070843.3177443", "project": "", "author_site": "Patrick Tser Jern Kon, Jiachen Liu, Yiming Qiu, Weijun Fan, Ting He, Lei Lin, Haoran Zhang, Owen Park, George Elengikal, Yuxin Kang, Ang Chen, Mosharaf Chowdhury, Myungjin Lee, Xinyu Wang", "tldr": "", "abstract": "Infrastructure-as-Code (IaC), an important component of cloud computing, allows the definition of cloud infrastructure in high-level programs. However, developing IaC programs is challenging, complicated by factors that include the burgeoning complexity of the cloud ecosystem (e.g., diversity of cloud services and workloads), and the relative scarcity of IaC-specific code examples and public repositories. \nWhile large language models (LLMs) have shown promise in general code generation and could potentially aid in IaC development, no benchmarks currently exist for evaluating their ability to generate IaC code. We present IaC-Eval, a first step in this research direction. IaC-Eval's dataset includes 458 human-curated scenarios covering a wide range of popular AWS services, at varying difficulty levels. Each scenario mainly comprises a natural language IaC problem description and an infrastructure intent specification. The former is fed as user input to the LLM, while the latter is a general notion used to verify if the generated IaC program conforms to the user's intent; by making explicit the problem's requirements that can encompass various cloud services, resources and internal infrastructure details. Our in-depth evaluation shows that contemporary LLMs perform poorly on IaC-Eval, with the top-performing model, GPT-4, obtaining a pass@1 accuracy of 19.36%. In contrast, it scores 86.6% on EvalPlus, a popular Python code generation benchmark, highlighting a need for advancements in this domain. We open-source the IaC-Eval dataset and evaluation framework at https://github.com/autoiac-project/iac-eval to enable future research on LLM-based IaC code generation.", "keywords": "Infrastructure-as-Code;Cloud Computing;Code-generation;dataset;benchmarks;LLM", "primary_area": "", "supplementary_material": "/attachment/bd56a37a0f1f9e00713a7c68b07b4347fa4b7b5b.pdf", "author": "Patrick Tser Jern Kon;Jiachen Liu;Yiming Qiu;Weijun Fan;Ting He;Lei Lin;Haoran Zhang;Owen M. Park;George Sajan Elengikal;Yuxin Kang;Ang Chen;Mosharaf Chowdhury;Myungjin Lee;Xinyu Wang", "authorids": "~Patrick_Tser_Jern_Kon1;~Jiachen_Liu3;~Yiming_Qiu2;~Weijun_Fan1;~Ting_He3;~Lei_Lin5;~Haoran_Zhang18;~Owen_M._Park1;~George_Sajan_Elengikal1;~Yuxin_Kang2;~Ang_Chen1;~Mosharaf_Chowdhury1;~Myungjin_Lee1;~Xinyu_Wang18", "gender": ";;M;F;F;M;M;M;M;F;M;M;M;M", "homepage": "https://www.cs-pk.com/;https://www-personal.umich.edu/~amberljc/;https://yimingqiu.me/;;https://tinghe-c.github.io/;;https://mcommunity.umich.edu/person/haoranwh;https://owenpark.info;;;https://web.eecs.umich.edu/~chenang/;;https://web.eecs.umich.edu/~xwangsd/;http://www.mosharaf.com/", "dblp": "332/2997;;25/9687;;;;;;;;59/146-1.html;;;42/1518", "google_scholar": ";-kQZFScAAAAJ;duUEb5IAAAAJ;;;;;;;;8Y4dDxkAAAAJ;XjWpxJUAAAAJ;mvasb5AAAAAJ;https://scholar.google.com.tw/citations?user=Dzh5C9EAAAAJ", "orcid": ";0000-0003-2317-1956;0009-0003-9328-3205;;;;0009-0001-3931-0634;;;;;0000-0003-2360-7019;0000-0002-1836-0202;0000-0003-0884-6740", "linkedin": ";jiachen-amber-liu-872506169/;;weijun-fan;ting-he-c/;lei-lin-ll;haoran-zhang-6556b1251/;owen-park/;george-elengikal/;yuxin-kang-438b171b6;;;;mosharaf", "or_profile": "~Patrick_Tser_Jern_Kon1;~Jiachen_Liu3;~Yiming_Qiu2;~Weijun_Fan1;~Ting_He3;~Lei_Lin5;~Haoran_Zhang18;~Owen_M._Park1;~George_Sajan_Elengikal1;~Yuxin_Kang2;~Ang_Chen1;~Myungjin_Lee1;~Xinyu_Wang18;~N._Chowdhury1", "aff": "University of Michigan - Ann Arbor;University of Michigan - Ann Arbor;University of Michigan - Ann Arbor;University of Michigan - Ann Arbor;University of Michigan - Ann Arbor;University of Michigan - Ann Arbor;University of Michigan - Ann Arbor;University of Michigan - Ann Arbor;University of Michigan - Ann Arbor;University of Michigan - Ann Arbor;Rice University;Cisco;University of Michigan - Ann Arbor;University of Michigan", "aff_domain": "umich.edu;umich.edu;umich.edu;umich.edu;umich.edu;umich.edu;umich.edu;umich.edu;umich.edu;umich.edu;rice.edu;cisco.com;umich.edu;umich.edu", "position": "PhD student;PhD student;PhD student;Undergrad student;Undergrad student;Undergrad student;Undergrad student;Undergrad student;Undergrad student;Undergrad student;Assistant Professor;Principal Researcher;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nkon2024iaceval,\ntitle={IaC-Eval: A Code Generation Benchmark for Cloud Infrastructure-as-Code Programs},\nauthor={Patrick Tser Jern Kon and Jiachen Liu and Yiming Qiu and Weijun Fan and Ting He and Lei Lin and Haoran Zhang and Owen M. Park and George Sajan Elengikal and Yuxin Kang and Ang Chen and Mosharaf Chowdhury and Myungjin Lee and Xinyu Wang},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=7TCK0aBL1C}\n}", "github": "", "reviewers": "nLxB;5a8w;QEzy;VvUx", "pdf_size": 1716903, "rating": "3;6;7;8", "confidence": "3;2;4;5", "wc_summary_and_contributions": "70;111;84;94", "wc_strengths": "50;69;87;2", "wc_improvement": "99;76;37;2", "wc_limitations": "45;7;1;2", "wc_correctness": "9;7;1;24", "wc_clarity": "8;8;1;1", "wc_relation_to_prior_work": "12;12;1;29", "wc_documentation": "1;57;1;8", "wc_additional_feedback": "1;1;1;1", "wc_review": "295;348;214;163", "wc_reply_reviewers": "0;0;0;53", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 1.8708286933869707 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "wc_summary_and_contributions_avg": [ 89.75, 14.939461168328663 ], "wc_strengths_avg": [ 52.0, 31.693847983480957 ], "wc_improvement_avg": [ 53.5, 37.08436328157732 ], "wc_limitations_avg": [ 13.75, 18.18481509391833 ], "wc_correctness_avg": [ 10.25, 8.46684711093805 ], "wc_clarity_avg": [ 4.5, 3.5 ], "wc_relation_to_prior_work_avg": [ 13.5, 10.012492197250394 ], "wc_documentation_avg": [ 16.75, 23.41340428045439 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 255.0, 71.40378141247143 ], "wc_reply_reviewers_avg": [ 13.25, 22.949673200287624 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 14, 0 ], "corr_rating_confidence": 0.5976143046671969, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11969344086227194196&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "umich.edu;umich.edu;umich.edu;umich.edu;umich.edu;umich.edu;umich.edu;umich.edu;umich.edu;umich.edu;rice.edu;cisco.com;umich.edu;umich.edu", "author_num": 14, "aff_unique_index": "0;0;0;0;0;0;0;0;0;0;1;2;0;0", "aff_unique_norm": "University of Michigan;Rice University;Cisco Systems", "aff_unique_dep": ";;", "aff_unique_url": "https://www.umich.edu;https://www.rice.edu;https://www.cisco.com", "aff_unique_abbr": "UM;Rice;Cisco", "aff_campus_unique_index": "0;0;0;0;0;0;0;0;0;0;0", "aff_campus_unique": "Ann Arbor;", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Randomized Exploration in Cooperative Multi-Agent Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96449", "id": "7Tir0u0ukg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7Tir0u0ukg", "openreview": "https://openreview.net/forum?id=7Tir0u0ukg", "poster": "/media/PosterPDFs/NeurIPS%202024/96449.png?t=1731534382.6465163", "project": "", "author_site": "Hao-Lun Hsu, Weixin Wang, Miroslav Pajic, Pan Xu", "tldr": "", "abstract": "We present the first study on provably efficient randomized exploration in cooperative multi-agent reinforcement learning (MARL). We propose a unified algorithm framework for randomized exploration in parallel Markov Decision Processes (MDPs), and two Thompson Sampling (TS)-type algorithms, CoopTS-PHE and CoopTS-LMC, incorporating the perturbed-history exploration (PHE) strategy and the Langevin Monte Carlo exploration (LMC) strategy respectively, which are flexible in design and easy to implement in practice. For a special class of parallel MDPs where the transition is (approximately) linear, we theoretically prove that both CoopTS-PHE and CoopTS-LMC achieve a $\\widetilde{\\mathcal{O}}(d^{3/2}H^2\\sqrt{MK})$ regret bound with communication complexity $\\widetilde{\\mathcal{O}}(dHM^2)$, where $d$ is the feature dimension, $H$ is the horizon length, $M$ is the number of agents, and $K$ is the number of episodes. This is the first theoretical result for randomized exploration in cooperative MARL. We evaluate our proposed method on multiple parallel RL environments, including a deep exploration problem (i.e., $N$-chain), a video game, and a real-world problem in energy systems. Our experimental results support that our framework can achieve better performance, even under conditions of misspecified transition models. Additionally, we establish a connection between our unified framework and the practical application of federated learning.", "keywords": "Multi-Agent Reinforcement Learning;Randomized Exploration;Deep Reinforcement Learning", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/73290999d61d20bdfe079c4bbfb9512982dde8ed.zip", "author": "Hao-Lun Hsu;Weixin Wang;Miroslav Pajic;Pan Xu", "authorids": "~Hao-Lun_Hsu1;~Weixin_Wang2;~Miroslav_Pajic2;~Pan_Xu1", "gender": "M;M;M;M", "homepage": "https://hlhsu.github.io/;https://weixinwilliamwang.github.io/weixinwang.github.io/;http://people.duke.edu/~mp275/;https://panxulab.github.io/", "dblp": "303/0321;;74/7446.html;11/9718-2", "google_scholar": "h9qf9vUAAAAJ;WluAK5cAAAAJ;Fbn21-8AAAAJ;UkYBx6YAAAAJ", "orcid": ";;;0000-0002-2559-8622", "linkedin": "hlhsu/;;;pan-xu-0931a2a6/", "or_profile": "~Hao-Lun_Hsu1;~Weixin_Wang2;~Miroslav_Pajic2;~Pan_Xu1", "aff": "Duke University;Duke University;Duke University;Duke University", "aff_domain": "duke.edu;duke.edu;duke.edu;duke.edu", "position": "PhD student;PhD student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nhsu2024randomized,\ntitle={Randomized Exploration in Cooperative Multi-Agent Reinforcement Learning},\nauthor={Hao-Lun Hsu and Weixin Wang and Miroslav Pajic and Pan Xu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7Tir0u0ukg}\n}", "github": "", "reviewers": "kVyU;kfpt;CHJv;WAEL", "pdf_size": 10231752, "rating": "5;5;5;7", "confidence": "3;4;3;4", "soundness": "3;3;3;4", "novelty": "2;2;3;3", "presentation": "3;3;2;4", "wc_summary": "40;36;52;139", "wc_strengths": "35;37;78;233", "wc_weaknesses": "313;17;216;32", "wc_questions": "53;15;43;86", "wc_limitations": "1;10;11;1", "wc_review": "442;115;400;491", "wc_reply_reviewers": "49;0;0;68", "wc_reply_authors": "20;0;0;21", "reply_reviewers": "1;0;0;1", "reply_authors": "2;1;1;2", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 66.75, 42.12704000995085 ], "wc_strengths_avg": [ 95.75, 81.07828007549247 ], "wc_weaknesses_avg": [ 144.5, 124.91697242568762 ], "wc_questions_avg": [ 49.25, 25.380849079571785 ], "wc_limitations_avg": [ 5.75, 4.763139720814412 ], "wc_review_avg": [ 362.0, 146.19678519037276 ], "wc_reply_reviewers_avg": [ 29.25, 30.011456145945335 ], "wc_reply_authors_avg": [ 10.25, 10.256095748383007 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15335237409782148024&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "duke.edu;duke.edu;duke.edu;duke.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Duke University", "aff_unique_dep": "", "aff_unique_url": "https://www.duke.edu", "aff_unique_abbr": "Duke", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Towards Harmless Rawlsian Fairness Regardless of Demographic Prior", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96448", "id": "7U5MwUS3Rw", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7U5MwUS3Rw", "openreview": "https://openreview.net/forum?id=7U5MwUS3Rw", "poster": "/media/PosterPDFs/NeurIPS%202024/96448.png?t=1731395129.6177623", "project": "", "author_site": "Xuanqian Wang, Jing Li, Ivor Tsang, Yew Soon Ong", "tldr": "", "abstract": "Due to privacy and security concerns, recent advancements in group fairness advocate for model training regardless of demographic information. However, most methods still require prior knowledge of demographics. In this study, we explore the potential for achieving fairness without compromising its utility when no prior demographics are provided to the training set, namely _harmless Rawlsian fairness_. We ascertain that such a fairness requirement with no prior demographic information essential promotes training losses to exhibit a Dirac delta distribution. To this end, we propose a simple but effective method named VFair to minimize the variance of training losses inside the optimal set of empirical losses. This problem is then optimized by a tailored dynamic update approach that operates in both loss and gradient dimensions, directing the model towards relatively fairer solutions while preserving its intact utility. Our experimental findings indicate that regression tasks, which are relatively unexplored from literature, can achieve significant fairness improvement through VFair regardless of any prior, whereas classification tasks usually do not because of their quantized utility measurements. The implementation of our method is publicly available at https://github.com/wxqpxw/VFair.", "keywords": "Harmless fairness;demographics-free;reducing variance of losses", "primary_area": "fairness", "supplementary_material": "", "author": "Xuanqian Wang;Jing Li;Ivor Tsang;Yew-Soon Ong", "authorids": "~Xuanqian_Wang1;~Jing_Li17;~Ivor_Tsang1;~Yew-Soon_Ong1", "gender": "F;;;", "homepage": ";;;", "dblp": "336/0539.html;;;", "google_scholar": ";;;", "orcid": "0009-0004-4496-6939;;;", "linkedin": ";;;", "or_profile": "~Xuanqian_Wang1;~Jing_Li17;~Ivor_Tsang1;~Yew-Soon_Ong1", "aff": "Beihang University;;;", "aff_domain": "buaa.edu.cn;;;", "position": "MS student;;;", "bibtex": "@inproceedings{\nwang2024towards,\ntitle={Towards Harmless Rawlsian Fairness Regardless of Demographic Prior},\nauthor={Xuanqian Wang and Jing Li and Ivor Tsang and Yew-Soon Ong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7U5MwUS3Rw}\n}", "github": "", "reviewers": "UBKm;Xh7a;kaxC;cPfu", "pdf_size": 2786252, "rating": "4;5;7;8", "confidence": "4;3;4;5", "soundness": "1;2;2;3", "novelty": "2;3;3;3", "presentation": "2;2;3;4", "wc_summary": "58;54;75;73", "wc_strengths": "11;213;116;134", "wc_weaknesses": "38;484;347;110", "wc_questions": "210;105;55;5", "wc_limitations": "6;31;44;1", "wc_review": "323;887;637;323", "wc_reply_reviewers": "19;0;28;10", "wc_reply_authors": "321;0;36;36", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;2;2", "rating_avg": [ 6.0, 1.5811388300841898 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 65.0, 9.137833441248533 ], "wc_strengths_avg": [ 118.5, 71.99479147827293 ], "wc_weaknesses_avg": [ 244.75, 179.30333934425204 ], "wc_questions_avg": [ 93.75, 75.8596566034938 ], "wc_limitations_avg": [ 20.5, 17.698870020427858 ], "wc_review_avg": [ 542.5, 236.6278724072885 ], "wc_reply_reviewers_avg": [ 14.25, 10.40132203135736 ], "wc_reply_authors_avg": [ 98.25, 129.44183056492983 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.670820393249937, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:xGXVB-Hj6ysJ:scholar.google.com/&scioq=Towards+Harmless+Rawlsian+Fairness+Regardless+of+Demographic+Prior&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "buaa.edu.cn;;;", "author_num": 4, "aff_unique_index": "0", "aff_unique_norm": "Beihang University", "aff_unique_dep": "", "aff_unique_url": "http://www.buaa.edu.cn/", "aff_unique_abbr": "BUAA", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "title": "SMART: Towards Pre-trained Missing-Aware Model for Patient Health Status Prediction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96447", "id": "7UenF4kx4j", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7UenF4kx4j", "openreview": "https://openreview.net/forum?id=7UenF4kx4j", "poster": "/media/PosterPDFs/NeurIPS%202024/96447.png?t=1731222522.5911357", "project": "", "author_site": "Zhihao Yu, Chu Xu, Yujie Jin, Yasha Wang, Junfeng Zhao", "tldr": "", "abstract": "Electronic health record (EHR) data has emerged as a valuable resource for analyzing patient health status. However, the prevalence of missing data in EHR poses significant challenges to existing methods, leading to spurious correlations and suboptimal predictions. While various imputation techniques have been developed to address this issue, they often obsess difficult-to-interpolate details and may introduce additional noise when making clinical predictions. To tackle this problem, we propose SMART, a Self-Supervised Missing-Aware RepresenTation Learning approach for patient health status prediction, which encodes missing information via missing-aware temporal and variable attentions and learns to impute missing values through a novel self-supervised pre-training approach which reconstructs missing data representations in the latent space rather than in input space as usual. By adopting elaborated attentions and focusing on learning higher-order representations, SMART promotes better generalization and robustness to missing data. We validate the effectiveness of SMART through extensive experiments on six EHR tasks, demonstrating its superiority over state-of-the-art methods.", "keywords": "Multivariate Time Series;Healthcare Prediction;Electronic Medical Records", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "/attachment/7c16d07e8cde30ab6a2475e0e8f21c51699fa539.zip", "author": "Zhihao Yu;Xu Chu;Yujie Jin;Yasha Wang;Junfeng Zhao", "authorids": "~Zhihao_Yu1;~Xu_Chu1;~Yujie_Jin2;~Yasha_Wang3;~Junfeng_Zhao1", "gender": ";;M;M;F", "homepage": "https://github.com/yzhHoward;;https://github.com/JinYujie99;;https://cs.pku.edu.cn/info/1084/1224.htm", "dblp": "201/5271;;;70/2725.html;72/3918-1", "google_scholar": ";;;;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Zhihao_Yu1;~Xu_Chu1;~Yujie_Jin2;~Yasha_Wang3;~Junfeng_Zhao1", "aff": "Peking University;;Peking University;Peking University;Peking University", "aff_domain": "pku.edu.cn;;stu.pku.edu.cn;pku.edu.cn;pku.edu.cn", "position": "PhD student;;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nyu2024smart,\ntitle={{SMART}: Towards Pre-trained Missing-Aware Model for Patient Health Status Prediction},\nauthor={Zhihao Yu and Xu Chu and Yujie Jin and Yasha Wang and Junfeng Zhao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7UenF4kx4j}\n}", "github": "", "reviewers": "Hfjo;MRHA;PYgK;CSeo", "pdf_size": 515076, "rating": "5;6;7;8", "confidence": "4;4;4;4", "soundness": "2;2;4;3", "novelty": "2;3;4;3", "presentation": "2;3;4;4", "wc_summary": "107;93;102;93", "wc_strengths": "88;46;24;99", "wc_weaknesses": "102;590;81;80", "wc_questions": "146;83;41;118", "wc_limitations": "59;23;5;1", "wc_review": "502;835;253;391", "wc_reply_reviewers": "35;402;24;211", "wc_reply_authors": "38;344;38;316", "reply_reviewers": "1;2;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 98.75, 6.015604707757983 ], "wc_strengths_avg": [ 64.25, 30.515364982251153 ], "wc_weaknesses_avg": [ 213.25, 217.6940226556531 ], "wc_questions_avg": [ 97.0, 39.287402561126385 ], "wc_limitations_avg": [ 22.0, 22.9128784747792 ], "wc_review_avg": [ 495.25, 215.0748416249562 ], "wc_reply_reviewers_avg": [ 168.0, 154.13468136665415 ], "wc_reply_authors_avg": [ 184.0, 146.33523157462798 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4675627243762156251&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "pku.edu.cn;;stu.pku.edu.cn;pku.edu.cn;pku.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Interpreting CLIP with Sparse Linear Concept Embeddings (SpLiCE)", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96446", "id": "7UyBKTFrtd", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7UyBKTFrtd", "openreview": "https://openreview.net/forum?id=7UyBKTFrtd", "poster": "", "project": "", "author_site": "Usha Bhalla, Alex Oesterling, Suraj Srinivas, Flavio Calmon, Himabindu Lakkaraju", "tldr": "", "abstract": "CLIP embeddings have demonstrated remarkable performance across a wide range of multimodal applications. However, these high-dimensional, dense vector representations are not easily interpretable, limiting our understanding of the rich structure of CLIP and its use in downstream applications that require transparency. \nIn this work, we show that the semantic structure of CLIP's latent space can be leveraged to provide interpretability, allowing for the decomposition of representations into semantic concepts. \nWe formulate this problem as one of sparse recovery and propose a novel method, Sparse Linear Concept Embeddings (SpLiCE), for transforming CLIP representations into sparse linear combinations of human-interpretable concepts. Distinct from previous work, \\method is task-agnostic and can be used, without training, \nto explain and even replace traditional dense CLIP representations, maintaining high downstream performance while significantly improving their interpretability. We also demonstrate significant use cases of \\method representations including detecting spurious correlations and model editing. Code is provided at https://github.com/AI4LIFE-GROUP/SpLiCE.", "keywords": "Interpretable Machine Learning;Dictionary Learning;Representation Learning;Multimodal Models;Interpretability;CLIP", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Usha Bhalla;Alex Oesterling;Suraj Srinivas;Flavio Calmon;Himabindu Lakkaraju", "authorids": "~Usha_Bhalla1;~Alex_Oesterling2;~Suraj_Srinivas1;~Flavio_Calmon1;~Himabindu_Lakkaraju1", "gender": ";M;M;;", "homepage": ";https://www.alexoesterling.com/;https://suraj-srinivas.github.io/;http://people.seas.harvard.edu/~flavio/;", "dblp": ";295/9299;144/0584;89/4611;", "google_scholar": ";dlRlXT4AAAAJ;https://scholar.google.co.in/citations?user=J2JWgKgAAAAJ;P8N_YH4AAAAJ;", "orcid": ";0000-0001-8546-0089;;;", "linkedin": ";axo/;;;", "or_profile": "~Usha_Bhalla1;~Alex_Oesterling2;~Suraj_Srinivas1;~Flavio_Calmon1;~Himabindu_Lakkaraju1", "aff": ";School of Engineering and Applied Sciences, Harvard University;School of Engineering and Applied Sciences, Harvard University;Harvard University;", "aff_domain": ";seas.harvard.edu;seas.harvard.edu;harvard.edu;", "position": ";PhD student;Postdoc;Assistant Professor;", "bibtex": "@inproceedings{\nbhalla2024interpreting,\ntitle={Interpreting {CLIP} with Sparse Linear Concept Embeddings (SpLi{CE})},\nauthor={Usha Bhalla and Alex Oesterling and Suraj Srinivas and Flavio Calmon and Himabindu Lakkaraju},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7UyBKTFrtd}\n}", "github": "", "reviewers": "nCHL;jbkR;acbD;djnP;Vxod", "pdf_size": 2523576, "rating": "6;6;6;7;7", "confidence": "3;4;3;4;3", "soundness": "2;3;2;3;3", "novelty": "2;3;3;2;3", "presentation": "3;3;3;4;3", "wc_summary": "92;50;49;133;162", "wc_strengths": "117;60;82;93;95", "wc_weaknesses": "299;2;108;806;624", "wc_questions": "61;75;61;170;75", "wc_limitations": "86;4;8;24;10", "wc_review": "655;191;308;1226;966", "wc_reply_reviewers": "169;0;36;399;205", "wc_reply_authors": "51;0;55;903;550", "reply_reviewers": "1;0;1;2;1", "reply_authors": "2;1;2;4;2", "rating_avg": [ 6.4, 0.48989794855663565 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 97.2, 44.85264763645508 ], "wc_strengths_avg": [ 89.4, 18.57525235360209 ], "wc_weaknesses_avg": [ 367.8, 304.5313776936623 ], "wc_questions_avg": [ 88.4, 41.277596829272895 ], "wc_limitations_avg": [ 26.4, 30.552250326285296 ], "wc_review_avg": [ 669.2, 389.2194239757312 ], "wc_reply_reviewers_avg": [ 161.8, 141.55338215669735 ], "wc_reply_authors_avg": [ 311.8, 357.05428158754796 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.2, 0.9797958971132712 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.16666666666666669, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8725377257841814578&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 4, "email": ";seas.harvard.edu;seas.harvard.edu;harvard.edu;", "author_num": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "Harvard University", "aff_unique_dep": "School of Engineering and Applied Sciences", "aff_unique_url": "https://www.harvard.edu", "aff_unique_abbr": "Harvard", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Prediction-Powered Ranking of Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96445", "id": "7V62sQ5Jra", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7V62sQ5Jra", "openreview": "https://openreview.net/forum?id=7V62sQ5Jra", "poster": "/media/PosterPDFs/NeurIPS%202024/96445.png?t=1733144417.1238525", "project": "", "author_site": "Ivi Chatzi, Eleni Straitouri, Suhas Thejaswi, Manuel Rodriguez", "tldr": "", "abstract": "Large language models are often ranked according to their level of alignment with human preferences---a model is better than other models if its outputs are more frequently preferred by humans. One of the popular ways to elicit human preferences utilizes pairwise comparisons between the outputs provided by different models to the same inputs. However, since gathering pairwise comparisons by humans is costly and time-consuming, it has become a common practice to gather pairwise comparisons by a strong large language model---a model strongly aligned with human preferences. Surprisingly, practitioners cannot currently measure the uncertainty that any mismatch between human and model preferences may introduce in the constructed rankings. In this work, we develop a statistical framework to bridge this gap. Given a (small) set of pairwise comparisons by humans and a large set of pairwise comparisons by a model, our framework provides a rank-set---a set of possible ranking positions---for each of the models under comparison. Moreover, it guarantees that, with a probability greater than or equal to a user-specified value, the rank-sets cover the true ranking consistent with the distribution of human pairwise preferences asymptotically. Using pairwise comparisons made by humans in the LMSYS Chatbot Arena platform and pairwise comparisons made by three strong large language models, we empirically demonstrate the effectivity of our framework and show that the rank-sets constructed using only pairwise comparisons by the strong large language models are often inconsistent with (the distribution of) human pairwise preferences.", "keywords": "Large language models;LLM evaluation;LLM ranking", "primary_area": "evaluation", "supplementary_material": "", "author": "Ivi Chatzi;Eleni Straitouri;Suhas Thejaswi;Manuel Gomez Rodriguez", "authorids": "~Ivi_Chatzi1;~Eleni_Straitouri1;~Suhas_Thejaswi1;~Manuel_Gomez_Rodriguez1", "gender": "F;;;M", "homepage": "https://qvapil.github.io/;https://people.mpi-sws.org/~estraitouri/;;https://www.mpi-sws.org/~manuelgr/", "dblp": ";302/4619;;73/8260", "google_scholar": "_WMz0f8AAAAJ;kphSqwwAAAAJ;;https://scholar.google.com.tw/citations?user=UcuXmuwAAAAJ", "orcid": ";;;", "linkedin": "ivi-chatzi-40a66624b/;eleni-straitouri-919419205;;", "or_profile": "~Ivi_Chatzi1;~Eleni_Straitouri1;~Suhas_Thejaswi1;~Manuel_Gomez_Rodriguez1", "aff": "MPI-SWS;MPI-SWS;;MPI-SWS", "aff_domain": "mpi-sws.org;mpi-sws.org;;mpi-sws.org", "position": "PhD student;PhD student;;Associate Professor", "bibtex": "@inproceedings{\nchatzi2024predictionpowered,\ntitle={Prediction-Powered Ranking of Large Language Models},\nauthor={Ivi Chatzi and Eleni Straitouri and Suhas Thejaswi and Manuel Gomez Rodriguez},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7V62sQ5Jra}\n}", "github": "", "reviewers": "Fevo;EPH6;dge6;G9Nw", "pdf_size": 1160782, "rating": "6;6;6;7", "confidence": "3;3;4;3", "soundness": "3;3;3;4", "novelty": "3;3;2;3", "presentation": "2;3;3;3", "wc_summary": "107;114;104;117", "wc_strengths": "53;42;110;107", "wc_weaknesses": "412;144;288;231", "wc_questions": "2;2;59;1", "wc_limitations": "12;4;29;1", "wc_review": "586;306;590;457", "wc_reply_reviewers": "0;21;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 110.5, 5.220153254455275 ], "wc_strengths_avg": [ 78.0, 30.76524012583032 ], "wc_weaknesses_avg": [ 268.75, 97.31231936399419 ], "wc_questions_avg": [ 16.0, 24.829418035870273 ], "wc_limitations_avg": [ 11.5, 10.874281585465774 ], "wc_review_avg": [ 484.75, 116.244085871067 ], "wc_reply_reviewers_avg": [ 5.25, 9.093266739736606 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1226754804743879693&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "mpi-sws.org;mpi-sws.org;;mpi-sws.org", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Max Planck Institute for Software Systems", "aff_unique_dep": "", "aff_unique_url": "https://www.mpi-sws.org", "aff_unique_abbr": "MPI-SWS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "id": "7VNvM9SnRE", "title": "Optimal, Efficient and Practical Algorithms for Assortment Optimization", "track": "main", "status": "Reject", "tldr": "", "abstract": "We address the problem of active online assortment optimization problem with preference feedback, which is a framework for modeling user choices and subsetwise utility maximization. The framework is useful in various real-world applications including ad placement, online retail, recommender systems, and fine-tuning language models, amongst many others. The problem, although has been studied in the past, lacks an intuitive and practical solution approach with simultaneously efficient algorithm and optimal regret guarantee. E.g., popularly used assortment selection algorithms often require the presence of a ``strong reference\" which is always included in the choice sets, further they are also designed to offer the same assortments repeatedly until the reference item gets selected---all such requirements are quite unrealistic for practical applications. In this paper, we designed efficient algorithms for the problem of regret minimization in assortment selection with \\emph{Plackett Luce} (PL) based user choices. We designed a novel concentration guarantee for estimating the score parameters of the PL model using `\\emph{Pairwise Rank-Breaking}', which builds the foundation of our proposed algorithms. Moreover, our methods are practical, provably optimal, and devoid of the aforementioned limitations of the existing methods.", "keywords": "Active online assortment optimization;Preference feedback;Subsetwise utility maximization;Assortment selection algorithms;Plackett Luce model;Regret minimization;Pairwise Rank-Breaking;Concentration guarantee;Practical algorithms;Empirical evaluations", "primary_area": "bandits", "supplementary_material": "", "author": "Aadirupa Saha;Pierre Gaillard", "authorids": "~Aadirupa_Saha1;~Pierre_Gaillard1", "gender": ";M", "homepage": "http://aadirupa.github.io/;http://pierre.gaillard.me", "dblp": ";25/2131", "google_scholar": "https://scholar.google.co.in/citations?user=7a49tQYAAAAJ;https://scholar.google.fr/citations?user=-CPaGaEAAAAJ", "orcid": "0000-0003-4965-6417;0000-0001-6777-6127", "linkedin": "aadirupa-saha;", "or_profile": "~Aadirupa_Saha1;~Pierre_Gaillard1", "aff": "Apple;INRIA", "aff_domain": "apple.com;inria.fr", "position": "Researcher;Researcher", "bibtex": "@misc{\nanonymous2024optimal,\ntitle={Optimal, Efficient and Practical Algorithms for Assortment Optimization},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=7VNvM9SnRE}\n}", "github": "", "project": "", "reviewers": "1RHQ;dQv5;L6d3;mX9J", "site": "https://openreview.net/forum?id=7VNvM9SnRE", "pdf_size": 1694349, "rating": "5;5;6;7", "confidence": "3;2;2;3", "soundness": "2;2;2;3", "novelty": "2;3;2;3", "presentation": "2;1;3;3", "wc_summary": "76;546;159;133", "wc_strengths": "34;30;142;47", "wc_weaknesses": "242;79;424;21", "wc_questions": "48;91;85;185", "wc_limitations": "3;14;52;10", "wc_review": "403;760;862;396", "wc_reply_reviewers": "5;30;86;8", "wc_reply_authors": "845;885;2680;365", "reply_reviewers": "1;1;2;1", "reply_authors": "2;3;7;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 2.5, 0.5 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 228.5, 185.75050470994688 ], "wc_strengths_avg": [ 63.25, 45.89866555794406 ], "wc_weaknesses_avg": [ 191.5, 156.79046527132957 ], "wc_questions_avg": [ 102.25, 50.53402319230085 ], "wc_limitations_avg": [ 19.75, 19.031224343168255 ], "wc_review_avg": [ 605.25, 208.9011428882092 ], "wc_reply_reviewers_avg": [ 32.25, 32.4990384473141 ], "wc_reply_authors_avg": [ 1193.75, 882.1449356540002 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.5, 2.0615528128088303 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:8u_CxzWbBj0J:scholar.google.com/&scioq=Optimal,+Efficient+and+Practical+Algorithms+for+Assortment+Optimization&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Apple;INRIA", "aff_unique_dep": "Apple Inc.;", "aff_unique_url": "https://www.apple.com;https://www.inria.fr", "aff_unique_abbr": "Apple;INRIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;France" }, { "title": "Human-3Diffusion: Realistic Avatar Creation via Explicit 3D Consistent Diffusion Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96444", "id": "7W0f7lifDk", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7W0f7lifDk", "openreview": "https://openreview.net/forum?id=7W0f7lifDk", "poster": "/media/PosterPDFs/NeurIPS%202024/96444.png?t=1733315926.858062", "project": "", "author_site": "Yuxuan Xue, Xianghui Xie, Riccardo Marin, Gerard Pons-Moll", "tldr": "", "abstract": "Creating realistic avatars from a single RGB image is an attractive yet challenging problem. To deal with challenging loose clothing or occlusion by interaction objects, we leverage powerful shape prior from 2D diffusion models pretrained on large datasets. Although 2D diffusion models demonstrate strong generalization capability, they cannot provide multi-view shape priors with guaranteed 3D consistency. We propose Human-3Diffusion: Realistic Avatar Creation via Explicit 3D Consistent Diffusion. Our key insight is that 2D multi-view diffusion and 3D reconstruction models provide complementary information for each other. By coupling them in a tight manner, we can fully leverage the potential of both models. We introduce a novel image-conditioned generative 3D Gaussian Splats reconstruction model that leverages the prior from 2D multi-view diffusion models, and provides an explicit 3D representation, which further guides the 2D reverse sampling process\nto have better 3D consistency. Experiments show that our proposed framework outperforms state-of-the-art methods and enables the creation of realistic avatars from a single RGB image, achieving high-fidelity in both geometry and appearance. Extensive ablations also validate the efficacy of our design, (1) multi-view 2D priors conditioning in generative 3D reconstruction and (2) consistency refinement of sampling trajectory via the explicit 3D representation. Our code and models are released at https://yuxuan-xue.com/human-3diffusion/.", "keywords": "3D Reconstruction;3D Human Reconstruction;Diffusion Models;3D Generative Models;2D Foundation Models", "primary_area": "machine_vision", "supplementary_material": "", "author": "Yuxuan Xue;Xianghui Xie;Riccardo Marin;Gerard Pons-Moll", "authorids": "~Yuxuan_Xue1;~Xianghui_Xie1;~Riccardo_Marin1;~Gerard_Pons-Moll2", "gender": ";;;", "homepage": "http://yuxuan-xue.com;https://people.mpi-inf.mpg.de/~xxie/;;", "dblp": "254/6994;27/6971;;", "google_scholar": "5SKNmhcAAAAJ;J3TVNXEAAAAJ;;", "orcid": ";;;", "linkedin": ";xianghui-xie-3a8817198/;;", "or_profile": "~Yuxuan_Xue1;~Xianghui_Xie1;~Riccardo_Marin1;~Gerard_Pons-Moll2", "aff": "Eberhard-Karls-Universit\u00e4t T\u00fcbingen;Saarland Informatics Campus, Max-Planck Institute;;", "aff_domain": "uni-tuebingen.de;mpi-inf.mpg.de;;", "position": "PhD student;PhD student;;", "bibtex": "@inproceedings{\nxue2024humandiffusion,\ntitle={Human-3Diffusion: Realistic Avatar Creation via Explicit 3D Consistent Diffusion Models},\nauthor={Yuxuan Xue and Xianghui Xie and Riccardo Marin and Gerard Pons-Moll},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7W0f7lifDk}\n}", "github": "", "reviewers": "RJbF;uByW;KYnm;iJQG", "pdf_size": 35661348, "rating": "4;5;5;6", "confidence": "5;5;4;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "98;46;81;286", "wc_strengths": "52;29;47;107", "wc_weaknesses": "133;74;203;91", "wc_questions": "61;39;109;130", "wc_limitations": "11;16;11;66", "wc_review": "355;204;451;680", "wc_reply_reviewers": "212;262;76;55", "wc_reply_authors": "909;747;244;0", "reply_reviewers": "2;2;2;1", "reply_authors": "4;3;2;1", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 127.75, 93.2694349720207 ], "wc_strengths_avg": [ 58.75, 29.140821882712917 ], "wc_weaknesses_avg": [ 125.25, 49.761305248154414 ], "wc_questions_avg": [ 84.75, 36.375644324190326 ], "wc_limitations_avg": [ 26.0, 23.18404623873926 ], "wc_review_avg": [ 422.5, 172.7838244743992 ], "wc_reply_reviewers_avg": [ 151.25, 87.86744277603623 ], "wc_reply_authors_avg": [ 475.0, 367.87429918383805 ], "reply_reviewers_avg": [ 1.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.7071067811865476, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4152648346544311755&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "uni-tuebingen.de;mpi-inf.mpg.de;;", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Eberhard Karls University of T\u00fcbingen;Max-Planck Institute", "aff_unique_dep": ";Informatics", "aff_unique_url": "https://www.uni-tuebingen.de/;https://www.mpi-sws.org", "aff_unique_abbr": "Uni T\u00fcbingen;MPI-SWS", "aff_campus_unique_index": "0;1", "aff_campus_unique": "T\u00fcbingen;Saarland", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "title": "Derivatives of Stochastic Gradient Descent in parametric optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96443", "id": "7WoOphIZ8u", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7WoOphIZ8u", "openreview": "https://openreview.net/forum?id=7WoOphIZ8u", "poster": "/media/PosterPDFs/NeurIPS%202024/96443.png?t=1732032081.7571323", "project": "", "author_site": "Franck Iutzeler, Edouard Pauwels, Samuel Vaiter", "tldr": "", "abstract": "We consider stochastic optimization problems where the objective depends on some parameter, as commonly found in hyperparameter optimization for instance. We investigate the behavior of the derivatives of the iterates of Stochastic Gradient Descent (SGD) with respect to that parameter and show that they are driven by an inexact SGD recursion on a different objective function, perturbed by the convergence of the original SGD. This enables us to establish that the derivatives of SGD converge to the derivative of the solution mapping in terms of mean squared error whenever the objective is strongly convex. Specifically, we demonstrate that with constant step-sizes, these derivatives stabilize within a noise ball centered at the solution derivative, and that with vanishing step-sizes they exhibit $O(\\log(k)^2 / k)$ convergence rates. Additionally, we prove exponential convergence in the interpolation regime. Our theoretical findings are illustrated by numerical experiments on synthetic tasks.", "keywords": "automatic differentiation;stochastic gradient descent;optimization", "primary_area": "optimization", "supplementary_material": "/attachment/312eccdb3c0f260a7de2de82883f0b8eef67905a.zip", "author": "Franck Iutzeler;Edouard Pauwels;Samuel Vaiter", "authorids": "~Franck_Iutzeler1;~Edouard_Pauwels1;~Samuel_Vaiter1", "gender": "M;M;M", "homepage": "http://www.iutzeler.org;https://edouardpauwels.fr/;https://samuelvaiter.com", "dblp": "119/4896;52/9593;51/10261.html", "google_scholar": "https://scholar.google.fr/citations?user=De82J1EAAAAJ;E9lzDYQAAAAJ;HkXkm7IAAAAJ", "orcid": "0000-0003-2537-380X;;0000-0002-4077-708X", "linkedin": ";;", "or_profile": "~Franck_Iutzeler1;~Edouard_Pauwels1;~Samuel_Vaiter1", "aff": "Universit\u00e9 de Toulouse;TSE;CNRS", "aff_domain": "univ-toulouse.fr;tse-fr.eu;cnrs.fr", "position": "Full Professor;Full Professor;Researcher", "bibtex": "@inproceedings{\niutzeler2024derivatives,\ntitle={Derivatives of Stochastic Gradient Descent in parametric optimization},\nauthor={Franck Iutzeler and Edouard Pauwels and Samuel Vaiter},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7WoOphIZ8u}\n}", "github": "", "reviewers": "d938;cZZE;mZD2;bPEV", "pdf_size": 2057022, "rating": "4;6;6;7", "confidence": "3;1;3;4", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "3;3;3;3", "wc_summary": "73;63;49;107", "wc_strengths": "20;21;34;18", "wc_weaknesses": "128;34;38;135", "wc_questions": "186;47;24;145", "wc_limitations": "7;3;2;1", "wc_review": "414;168;147;406", "wc_reply_reviewers": "0;44;45;86", "wc_reply_authors": "83;75;75;67", "reply_reviewers": "0;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 2.75, 1.0897247358851685 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 73.0, 21.400934559032695 ], "wc_strengths_avg": [ 23.25, 6.299801584177076 ], "wc_weaknesses_avg": [ 83.75, 47.835002874464216 ], "wc_questions_avg": [ 100.5, 67.09135562797938 ], "wc_limitations_avg": [ 3.25, 2.277608394786075 ], "wc_review_avg": [ 283.75, 126.49975296418566 ], "wc_reply_reviewers_avg": [ 43.75, 30.416894976312097 ], "wc_reply_authors_avg": [ 75.0, 5.656854249492381 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.15789473684210528, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:V1YtHZMCUisJ:scholar.google.com/&scioq=Derivatives+of+Stochastic+Gradient+Descent+in+parametric+optimization&hl=en&as_sdt=0,10", "gs_version_total": 4, "email": "univ-toulouse.fr;tse-fr.eu;cnrs.fr", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Universit\u00e9 de Toulouse;TSE;Centre National de la Recherche Scientifique", "aff_unique_dep": ";;", "aff_unique_url": "https://www.univ-toulouse.fr;;https://www.cnrs.fr", "aff_unique_abbr": "UT;;CNRS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "France;" }, { "title": "Progressive Entropic Optimal Transport Solvers", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96442", "id": "7WvwzuYkUq", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7WvwzuYkUq", "openreview": "https://openreview.net/forum?id=7WvwzuYkUq", "poster": "", "project": "", "author_site": "Parnian Kassraie, Aram-Alexandre Pooladian, Michal Klein, James Thornton, Jonathan Niles-Weed, Marco Cuturi", "tldr": "", "abstract": "Optimal transport (OT) has profoundly impacted machine learning by providing theoretical and computational tools to realign datasets.\nIn this context, given two large point clouds of sizes $n$ and $m$ in $\\mathbb{R}^d$, entropic OT (EOT) solvers have emerged as the most reliable tool to either solve the Kantorovich problem and output a $n\\times m$ coupling matrix, or to solve the Monge problem and learn a vector-valued push-forward map. \nWhile the robustness of EOT couplings/maps makes them a go-to choice in practical applications, EOT solvers remain difficult to tune because of a small but influential set of hyperparameters, notably the omnipresent entropic regularization strength $\\varepsilon$. Setting $\\varepsilon$ can be difficult, as it simultaneously impacts various performance metrics, such as compute speed, statistical performance, generalization, and bias. In this work, we propose a new class of EOT solvers (ProgOT), that can estimate both plans and transport maps.\nWe take advantage of several opportunities to optimize the computation of EOT solutions by *dividing* mass displacement using a time discretization, borrowing inspiration from dynamic OT formulations, and *conquering* each of these steps using EOT with properly scheduled parameters. We provide experimental evidence demonstrating that ProgOT is a faster and more robust alternative to *standard solvers* when computing couplings at large scales, even outperforming neural network-based approaches. We also prove statistical consistency of our approach for estimating OT maps.", "keywords": "Optimal Transport;Entropy Regularization", "primary_area": "other", "supplementary_material": "/attachment/14c9e7cec1b60d51d6de935ddfa9b462d8a280b3.zip", "author": "Parnian Kassraie;Aram-Alexandre Pooladian;Michal Klein;James Thornton;Jonathan Niles-Weed;marco cuturi", "authorids": "~Parnian_Kassraie1;~Aram-Alexandre_Pooladian2;~Michal_Klein1;~James_Thornton1;~Jonathan_Niles-Weed1;~marco_cuturi2", "gender": "F;M;M;;M;M", "homepage": "https://pkassraie.github.io;http://www.arampooladian.com;https://github.com/michalk8;https://jtt94.github.io/;http://marcocuturi.net;http://jonathannilesweed.com", "dblp": "216/8534.html;238/0532;332/4607;;85/5102;160/8992", "google_scholar": "GFDOkb0AAAAJ;6CNhsjoAAAAJ;zByzdzcAAAAJ;oFZHOwgAAAAJ;https://scholar.google.fr/citations?user=kQEydDMAAAAJ;", "orcid": ";;0000-0002-2433-6380;;;", "linkedin": "parnian-kassraie/;;michal-klein-148697165/;;;", "or_profile": "~Parnian_Kassraie1;~Aram-Alexandre_Pooladian2;~Michal_Klein1;~James_Thornton1;~marco_cuturi2;~Jonathan_Weed1", "aff": "Google;New York University;Apple;Apple;Ensae ParisTech;New York University", "aff_domain": "deepmind.com;nyu.edu;apple.com;apple.com;ensae.fr;nyu.edu", "position": "Intern;PhD student;Researcher;Researcher;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nkassraie2024progressive,\ntitle={Progressive Entropic Optimal Transport Solvers},\nauthor={Parnian Kassraie and Aram-Alexandre Pooladian and Michal Klein and James Thornton and Jonathan Niles-Weed and marco cuturi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7WvwzuYkUq}\n}", "github": "", "reviewers": "Agip;9mT1;Z1nb;mHCn", "pdf_size": 1252401, "rating": "4;5;5;7", "confidence": "4;4;4;3", "soundness": "2;3;2;4", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "36;86;137;116", "wc_strengths": "10;55;125;89", "wc_weaknesses": "145;229;289;174", "wc_questions": "137;56;242;97", "wc_limitations": "2;9;29;1", "wc_review": "330;435;822;477", "wc_reply_reviewers": "35;46;79;39", "wc_reply_authors": "77;115;511;33", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;4;2", "rating_avg": [ 5.25, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 93.75, 37.94980237102691 ], "wc_strengths_avg": [ 69.75, 42.45806754905362 ], "wc_weaknesses_avg": [ 209.25, 55.04713889022753 ], "wc_questions_avg": [ 133.0, 69.14115995555758 ], "wc_limitations_avg": [ 10.25, 11.255554184490428 ], "wc_review_avg": [ 516.0, 184.60362943344316 ], "wc_reply_reviewers_avg": [ 49.75, 17.340343133859836 ], "wc_reply_authors_avg": [ 184.0, 191.01047091717248 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.9271726499455306, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2009888159250887300&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 7, "email": "deepmind.com;nyu.edu;apple.com;apple.com;ensae.fr;nyu.edu", "author_num": 6, "aff_unique_index": "0;1;2;2;3;1", "aff_unique_norm": "Google;New York University;Apple;ENSAE ParisTech", "aff_unique_dep": "Google;;Apple Inc.;", "aff_unique_url": "https://www.google.com;https://www.nyu.edu;https://www.apple.com;https://www.ensae.fr", "aff_unique_abbr": "Google;NYU;Apple;Ensae", "aff_campus_unique_index": "0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;0;1;0", "aff_country_unique": "United States;France" }, { "title": "Do's and Don'ts: Learning Desirable Skills with Instruction Videos", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96441", "id": "7X5zu6GIuW", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7X5zu6GIuW", "openreview": "https://openreview.net/forum?id=7X5zu6GIuW", "poster": "", "project": "", "author_site": "HYUNSEUNG KIM, BYUNG KUN LEE, Hojoon Lee, Dongyoon Hwang, Donghu Kim, Jaegul Choo", "tldr": "", "abstract": "Unsupervised skill discovery is a learning paradigm that aims to acquire diverse behaviors without explicit rewards. However, it faces challenges in learning complex behaviors and often leads to learning unsafe or undesirable behaviors. For instance, in various continuous control tasks, current unsupervised skill discovery methods succeed in learning basic locomotions like standing but struggle with learning more complex movements such as walking and running. Moreover, they may acquire unsafe behaviors like tripping and rolling or navigate to undesirable locations such as pitfalls or hazardous areas. In response, we present **DoDont** (Do\u2019s and Dont\u2019s), an instruction-based skill discovery algorithm composed of two stages. First, in instruction learning stage, DoDont leverages action-free instruction videos to train an instruction network to distinguish desirable transitions from undesirable ones. Then, in the skill learning stage, the instruction network adjusts the reward function of the skill discovery algorithm to weight the desired behaviors. \nSpecifically, we integrate the instruction network into a distance-maximizing skill discovery algorithm, where the instruction network serves as the distance function. Empirically, with less than 8 instruction videos, DoDont effectively learns desirable behaviors and avoids undesirable ones across complex continuous control tasks. Code and videos are available at https://mynsng.github.io/dodont/", "keywords": "Reinforcement Learning;Skill Discovery", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Hyunseung Kim;Byungkun Lee;Hojoon Lee;Dongyoon Hwang;Donghu Kim;Jaegul Choo", "authorids": "~Hyunseung_Kim1;~Byungkun_Lee1;~Hojoon_Lee1;~Dongyoon_Hwang1;~Donghu_Kim1;~Jaegul_Choo1", "gender": "M;M;M;M;M;M", "homepage": ";;https://joonleesky.github.io/;;https://i-am-proto.github.io;https://sites.google.com/site/jaegulchoo/", "dblp": "244/0949;;;;379/3468;07/2074", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;RFjZjzkAAAAJ;;https://scholar.google.com/citations?view_op=list_works;LcYjQYcAAAAJ;GHJYsLEAAAAJ", "orcid": ";;;;;", "linkedin": ";;;;donghu-kim-3b57972b6/;", "or_profile": "~Hyunseung_Kim1;~Byungkun_Lee1;~Hojoon_Lee1;~Dongyoon_Hwang1;~Donghu_Kim1;~Jaegul_Choo1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Sony AI;Korea Advanced Institute of Science & Technology;Korea University;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr;sony.com;kaist.ac.kr;korea.ac.kr;kaist.ac.kr", "position": "PhD student;PhD student;Intern;PhD student;Undergrad student;Associate Professor", "bibtex": "@inproceedings{\nkim2024dos,\ntitle={Do's and Don'ts: Learning Desirable Skills with Instruction Videos},\nauthor={Hyunseung Kim and Byungkun Lee and Hojoon Lee and Dongyoon Hwang and Donghu Kim and Jaegul Choo},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7X5zu6GIuW}\n}", "github": "", "reviewers": "AtB3;DB1p;oJKL", "pdf_size": 1994699, "rating": "5;7;7", "confidence": "3;3;4", "soundness": "3;4;3", "novelty": "2;2;3", "presentation": "3;4;4", "wc_summary": "63;81;94", "wc_strengths": "21;88;45", "wc_weaknesses": "130;27;119", "wc_questions": "143;143;35", "wc_limitations": "2;7;19", "wc_review": "359;346;312", "wc_reply_reviewers": "213;11;26", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 79.33333333333333, 12.710450643291745 ], "wc_strengths_avg": [ 51.333333333333336, 27.716822007983207 ], "wc_weaknesses_avg": [ 92.0, 46.180804092898455 ], "wc_questions_avg": [ 107.0, 50.91168824543142 ], "wc_limitations_avg": [ 9.333333333333334, 7.133644853010899 ], "wc_review_avg": [ 339.0, 19.8158185969358 ], "wc_reply_reviewers_avg": [ 83.33333333333333, 91.89244921222975 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.49999999999999983, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4333200646088035943&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "kaist.ac.kr;kaist.ac.kr;sony.com;kaist.ac.kr;korea.ac.kr;kaist.ac.kr", "author_num": 6, "aff_unique_index": "0;0;1;0;2;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Sony;Korea University", "aff_unique_dep": ";Sony AI;", "aff_unique_url": "https://www.kaist.ac.kr;https://www.sony.com;https://www.korea.ac.kr", "aff_unique_abbr": "KAIST;Sony AI;KU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "South Korea;Japan" }, { "title": "Utilizing Human Behavior Modeling to Manipulate Explanations in AI-Assisted Decision Making: The Good, the Bad, and the Scary", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96440", "id": "7XkwzaPMvX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7XkwzaPMvX", "openreview": "https://openreview.net/forum?id=7XkwzaPMvX", "poster": "", "project": "", "author_site": "Zhuoyan Li, Ming Yin", "tldr": "", "abstract": "Recent advances in AI models have increased the integration of AI-based decision aids into the human decision making process. To fully unlock the potential of AI-assisted decision making, researchers have computationally modeled how humans incorporate AI recommendations into their final decisions, and utilized these models to improve human-AI team performance. Meanwhile, due to the ``black-box'' nature of AI models, providing AI explanations to human decision makers to help them rely on AI recommendations more appropriately has become a common practice. In this paper, we explore whether we can quantitatively model how humans integrate both AI recommendations and explanations into their decision process, and whether this quantitative understanding of human behavior from the learned model can be utilized to manipulate AI explanations, thereby nudging individuals towards making targeted decisions. Our extensive human experiments across various tasks demonstrate that human behavior can be easily influenced by these manipulated explanations towards targeted outcomes, regardless of the intent being adversarial or benign. Furthermore, individuals often fail to detect any anomalies in these explanations, despite their decisions being affected by them.", "keywords": "human-AI interaction;human-centered evaluation of explanation", "primary_area": "human-AI_interaction", "supplementary_material": "", "author": "Zhuoyan Li;Ming Yin", "authorids": "~Zhuoyan_Li2;~Ming_Yin2", "gender": ";", "homepage": "https://xfleezy.github.io/zhuoyanli/;http://mingyin.org/", "dblp": ";", "google_scholar": ";J8ei9I0AAAAJ", "orcid": ";0000-0002-7364-139X", "linkedin": ";", "or_profile": "~Zhuoyan_Li2;~Ming_Yin2", "aff": "Purdue University;Purdue University", "aff_domain": "purdue.edu;purdue.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nli2024utilizing,\ntitle={Utilizing Human Behavior Modeling to Manipulate Explanations in {AI}-Assisted Decision Making: The Good, the Bad, and the Scary},\nauthor={Zhuoyan Li and Ming Yin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7XkwzaPMvX}\n}", "github": "", "reviewers": "1Xcr;L1hd;f7Sf;9s1B", "pdf_size": 19479928, "rating": "3;6;6;7", "confidence": "3;3;3;2", "soundness": "3;4;3;3", "novelty": "2;3;3;3", "presentation": "3;4;2;3", "wc_summary": "82;114;27;51", "wc_strengths": "38;109;99;78", "wc_weaknesses": "286;66;194;102", "wc_questions": "25;127;93;96", "wc_limitations": "1;49;31;46", "wc_review": "432;465;444;373", "wc_reply_reviewers": "0;74;35;30", "wc_reply_authors": "347;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 68.5, 32.71467560591118 ], "wc_strengths_avg": [ 81.0, 27.230497608380205 ], "wc_weaknesses_avg": [ 162.0, 85.46344247688599 ], "wc_questions_avg": [ 85.25, 37.24496610281717 ], "wc_limitations_avg": [ 31.75, 19.01808349965895 ], "wc_review_avg": [ 428.5, 34.15040263305837 ], "wc_reply_reviewers_avg": [ 34.75, 26.31895704620531 ], "wc_reply_authors_avg": [ 86.75, 150.25540755660012 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5773502691896258, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13990591115584831051&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "purdue.edu;purdue.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Purdue University", "aff_unique_dep": "", "aff_unique_url": "https://www.purdue.edu", "aff_unique_abbr": "Purdue", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Provably and Practically Efficient Adversarial Imitation Learning with General Function Approximation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96439", "id": "7YdafFbhxL", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7YdafFbhxL", "openreview": "https://openreview.net/forum?id=7YdafFbhxL", "poster": "/media/PosterPDFs/NeurIPS%202024/96439.png?t=1733725876.5861735", "project": "", "author_site": "Tian Xu, Zhilong Zhang, Ruishuo Chen, Yihao Sun, Yang Yu", "tldr": "", "abstract": "As a prominent category of imitation learning methods, adversarial imitation learning (AIL) has garnered significant practical success powered by neural network approximation. However, existing theoretical studies on AIL are primarily limited to simplified scenarios such as tabular and linear function approximation and involve complex algorithmic designs that hinder practical implementation, highlighting a gap between theory and practice. In this paper, we explore the theoretical underpinnings of online AIL with general function approximation. We introduce a new method called optimization-based AIL (OPT-AIL), which centers on performing online optimization for reward functions and optimism-regularized Bellman error minimization for Q-value functions. Theoretically, we prove that OPT-AIL achieves polynomial expert sample complexity and interaction complexity for learning near-expert policies. To our best knowledge, OPT-AIL is the first provably efficient AIL method with general function approximation. Practically, OPT-AIL only requires the approximate optimization of two objectives, thereby facilitating practical implementation. Empirical studies demonstrate that OPT-AIL outperforms previous state-of-the-art deep AIL methods in several challenging tasks.", "keywords": "imitation learning;adversarial imitation learning;sample efficiency", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/2c50e49e1010ab3eb5a5903382bd97c224e113df.zip", "author": "Tian Xu;Zhilong Zhang;Ruishuo Chen;Yihao Sun;Yang Yu", "authorids": "~Tian_Xu2;~Zhilong_Zhang2;~Ruishuo_Chen1;~Yihao_Sun1;~Yang_Yu5", "gender": "M;M;M;M;M", "homepage": "http://www.lamda.nju.edu.cn/xut/;http://www.lamda.nju.edu.cn/zhangzl/;https://rayshore.cn/;http://www.lamda.nju.edu.cn/sunyh/;http://www.lamda.nju.edu.cn/yuy", "dblp": "07/2985-3;;392/4303;;46/2181-1", "google_scholar": "e5mnk1wAAAAJ;;y3QxJFYAAAAJ;pFNG8fMAAAAJ;PG2lDSwAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Tian_Xu2;~Zhilong_Zhang2;~Ruishuo_Chen1;~Yihao_Sun1;~Yang_Yu2", "aff": "Nanjing University;Nanjing University;Nanjing University;Nanjing University;Nanjing University", "aff_domain": "nju.edu.cn;nju.edu.cn;smail.nju.edu.cn;nju.edu.cn;nju.edu.cn", "position": "PhD student;MS student;Undergrad student;MS student;Professor", "bibtex": "@inproceedings{\nxu2024provably,\ntitle={Provably and Practically Efficient Adversarial Imitation Learning with General Function Approximation},\nauthor={Tian Xu and Zhilong Zhang and Ruishuo Chen and Yihao Sun and Yang Yu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7YdafFbhxL}\n}", "github": "", "reviewers": "ntyv;HVwx;bZiM;t2br", "pdf_size": 793470, "rating": "5;6;7;7", "confidence": "3;4;4;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "37;76;56;76", "wc_strengths": "30;52;89;90", "wc_weaknesses": "145;65;118;127", "wc_questions": "172;93;47;68", "wc_limitations": "15;23;10;8", "wc_review": "399;309;320;369", "wc_reply_reviewers": "22;72;42;158", "wc_reply_authors": "38;43;44;45", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 61.25, 16.20763708873073 ], "wc_strengths_avg": [ 65.25, 25.469344318219107 ], "wc_weaknesses_avg": [ 113.75, 29.77729839995563 ], "wc_questions_avg": [ 95.0, 47.34448225506326 ], "wc_limitations_avg": [ 14.0, 5.787918451395113 ], "wc_review_avg": [ 349.25, 36.540217569138804 ], "wc_reply_reviewers_avg": [ 73.5, 51.9302416709185 ], "wc_reply_authors_avg": [ 42.5, 2.692582403567252 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18072398718086885498&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "nju.edu.cn;nju.edu.cn;smail.nju.edu.cn;nju.edu.cn;nju.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Nanjing University", "aff_unique_dep": "", "aff_unique_url": "https://www.nju.edu.cn", "aff_unique_abbr": "Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Asynchronous Perception Machine for Efficient Test Time Training", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96438", "id": "7Ye12RLZ4P", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7Ye12RLZ4P", "openreview": "https://openreview.net/forum?id=7Ye12RLZ4P", "poster": "/media/PosterPDFs/NeurIPS%202024/96438.png?t=1733611189.7462907", "project": "", "author_site": "Rajat Modi, Yogesh Rawat", "tldr": "", "abstract": "In this work, we propose Asynchronous Perception Machine (APM), a computationally-efficient architecture for test-time-training (TTT). APM can process patches of an image one at a time in any order asymmetrically and still encode semantic-awareness in the net. We demonstrate APM's ability to recognize out-of-distribution images without dataset-specific pre-training, augmentation or any-pretext task. APM offers competitive performance over existing TTT approaches. To perform TTT, APM just distills test sample's representation once. APM possesses a unique property: it can learn using just this single representation and starts predicting semantically-aware features.\nAPM demostrates potential applications beyond test-time-training: APM can scale up to a dataset of 2D images and yield semantic-clusterings in a single forward pass. APM also provides first empirical evidence towards validating GLOM's insight, i.e. input percept is a field. Therefore, APM helps us converge towards an implementation which can do both interpolation and perception on a shared-connectionist hardware. Our code is publicly available at https://rajatmodi62.github.io/apm_project_page/\n\n--------\n\n**It now appears that some of the ideas in GLOM could be made to work.**\n\nhttps://www.technologyreview.com/2021/04/16/1021871/geoffrey-hinton-glom-godfather-ai-neural-networks/\n\nGLOM = Geoff's Latest Original Model.\n```\n .-\"\"\"\"\"\"-.\n .' '.\n/ O O \\\n| O |\n \\ '------' /\n '. .'\n '-....-'\nSilent men in deep-contemplation.\nSilent men emerges only sometimes.\nSilent men love all.\nSilent men practice slow science.\n```", "keywords": "MORTAL COMPUTATION;GLOM;test time training;neural fields;implicit representation;distillation", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Rajat Modi;Yogesh S Rawat", "authorids": "~Rajat_Modi1;~Yogesh_S_Rawat1", "gender": "male;M", "homepage": "https://www.linkedin.com/in/rajat-modi-54377877/;https://www.crcv.ucf.edu/person/rawat/", "dblp": ";148/2258", "google_scholar": "https://scholar.google.ca/citations?user=Ypzln1UAAAAJ;D_JvEcwAAAAJ", "orcid": ";", "linkedin": "rajat-modi-54377877/;", "or_profile": "~Rajat_Modi1;~Yogesh_S_Rawat1", "aff": "University of Central Florida;University of Central Florida", "aff_domain": "ucf.edu;ucf.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nmodi2024asynchronous,\ntitle={Asynchronous Perception Machine for Efficient Test Time Training},\nauthor={Rajat Modi and Yogesh S Rawat},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7Ye12RLZ4P}\n}", "github": "", "reviewers": "X1eH;PHih;KZWi;doB6", "pdf_size": 6002547, "rating": "5;6;7;7", "confidence": "3;4;4;3", "soundness": "2;2;2;3", "novelty": "2;2;2;4", "presentation": "1;2;3;3", "wc_summary": "41;50;170;165", "wc_strengths": "13;14;301;119", "wc_weaknesses": "161;246;579;101", "wc_questions": "2;17;205;10", "wc_limitations": "2;5;79;5", "wc_review": "219;332;1334;400", "wc_reply_reviewers": "16;421;93;105", "wc_reply_authors": "31;983;126;35", "reply_reviewers": "1;2;1;1", "reply_authors": "2;4;3;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 106.5, 61.10851004565567 ], "wc_strengths_avg": [ 111.75, 117.44653038723622 ], "wc_weaknesses_avg": [ 271.75, 184.72056599090422 ], "wc_questions_avg": [ 58.5, 84.74815632212892 ], "wc_limitations_avg": [ 22.75, 32.4990384473141 ], "wc_review_avg": [ 571.25, 445.09402096635716 ], "wc_reply_reviewers_avg": [ 158.75, 155.2133612161015 ], "wc_reply_authors_avg": [ 293.75, 399.7482801714099 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5305845745796582836&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "ucf.edu;ucf.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Central Florida", "aff_unique_dep": "", "aff_unique_url": "https://www.ucf.edu", "aff_unique_abbr": "UCF", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Untrained Neural Nets for Snapshot Compressive Imaging: Theory and Algorithms", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96437", "id": "7aFEqIb1dp", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7aFEqIb1dp", "openreview": "https://openreview.net/forum?id=7aFEqIb1dp", "poster": "/media/PosterPDFs/NeurIPS%202024/96437.png?t=1733333364.2324429", "project": "", "author_site": "Mengyu Zhao, Xi Chen, Xin Yuan, Shirin Jalali", "tldr": "", "abstract": "Snapshot compressive imaging (SCI) recovers high-dimensional (3D) data cubes from a single 2D measurement, enabling diverse applications like video and hyperspectral imaging to go beyond standard techniques in terms of acquisition speed and efficiency. In this paper, we focus on SCI recovery algorithms that employ untrained neural networks (UNNs), such as deep image prior (DIP), to model source structure. Such UNN-based methods are appealing as they have the potential of avoiding the computationally intensive retraining required for different source models and different measurement scenarios. We first develop a theoretical framework for characterizing the performance of such UNN-based methods. The theoretical framework, on the one hand, enables us to optimize the parameters of data-modulating masks, and on the other hand, provides a fundamental connection between the number of data frames that can be recovered from a single measurement to the parameters of the untrained NN. We also employ the recently proposed bagged-deep-image-prior (bagged-DIP) idea to develop SCI Bagged Deep Video Prior (SCI-BDVP) algorithms that address the common challenges faced by standard UNN solutions. Our experimental results show that in video SCI our proposed solution achieves state-of-the-art among UNN methods, and in the case of noisy measurements, it even outperforms supervised solutions. Code is publicly available at [https://github.com/Computational-Imaging-RU/SCI-BDVP](https://github.com/Computational-Imaging-RU/SCI-BDVP).", "keywords": "Snapshot Compressive Imaging;Deep Image Prior;Inverse Problem", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "/attachment/d3e054bad8f82be040ee9e3a797a70b64d25f3e9.zip", "author": "Mengyu Zhao;Xi Chen;Xin Yuan;Shirin Jalali", "authorids": "~Mengyu_Zhao2;~Xi_Chen33;~Xin_Yuan4;~Shirin_Jalali1", "gender": ";;M;F", "homepage": "https://mengyu02.github.io/;https://xichen-97.github.io/;https://en.westlake.edu.cn/faculty/xin-yuan.html;https://sites.google.com/site/shirinjalali/", "dblp": ";;78/713-2;99/5024", "google_scholar": "https://scholar.google.com/citations?hl=en;VEvx_30AAAAJ;cS9CbWkAAAAJ;", "orcid": "0009-0009-9149-1653;0000-0002-5116-7500;0000-0002-8311-7524;", "linkedin": "mengyu-zhao-7ab19274/;;xin-yuan-0024bb31/;", "or_profile": "~Mengyu_Zhao2;~Xi_Chen33;~Xin_Yuan4;~Shirin_Jalali1", "aff": "Rutgers University;Rutgers University, New Brunswick;Westlake University;Rutgers University", "aff_domain": "rutgers.edu;rutgers.edu;westlake.edu.cn;rutgers.edu", "position": "PhD student;PhD student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhao2024untrained,\ntitle={Untrained Neural Nets for Snapshot Compressive Imaging: Theory and Algorithms},\nauthor={Mengyu Zhao and Xi Chen and Xin Yuan and Shirin Jalali},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7aFEqIb1dp}\n}", "github": "", "reviewers": "E4xU;ytyT;pMi8;tX5M", "pdf_size": 4467778, "rating": "5;6;6;7", "confidence": "4;3;4;3", "soundness": "2;3;4;4", "novelty": "3;3;3;4", "presentation": "2;3;3;4", "wc_summary": "27;5;147;78", "wc_strengths": "36;12;101;39", "wc_weaknesses": "362;16;153;101", "wc_questions": "18;1;33;40", "wc_limitations": "10;1;15;46", "wc_review": "453;35;449;304", "wc_reply_reviewers": "187;10;5;14", "wc_reply_authors": "246;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 64.25, 54.62314070062248 ], "wc_strengths_avg": [ 47.0, 32.8861673048107 ], "wc_weaknesses_avg": [ 158.0, 127.52842820328337 ], "wc_questions_avg": [ 23.0, 14.983324063771697 ], "wc_limitations_avg": [ 18.0, 16.926310879810757 ], "wc_review_avg": [ 310.25, 169.87550588592813 ], "wc_reply_reviewers_avg": [ 54.0, 76.85375722760729 ], "wc_reply_authors_avg": [ 61.5, 106.52112466548596 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.7071067811865476, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11200925668033877092&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "rutgers.edu;rutgers.edu;westlake.edu.cn;rutgers.edu", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Rutgers University;Westlake University", "aff_unique_dep": ";", "aff_unique_url": "https://www.rutgers.edu;https://www.westlake.edu.cn", "aff_unique_abbr": "Rutgers;WU", "aff_campus_unique_index": "1", "aff_campus_unique": ";New Brunswick", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;China" }, { "title": "Optimal Multiclass U-Calibration Error and Beyond", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96436", "id": "7aFRgCC8Q7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7aFRgCC8Q7", "openreview": "https://openreview.net/forum?id=7aFRgCC8Q7", "poster": "/media/PosterPDFs/NeurIPS%202024/96436.png?t=1733804725.7739296", "project": "", "author_site": "Haipeng Luo, Spandan Senapati, Vatsal Sharan", "tldr": "", "abstract": "We consider the problem of online multiclass U-calibration, where a forecaster aims to make sequential distributional predictions over $K$ classes with low U-calibration error, that is, low regret with respect to all bounded proper losses simultaneously.\n Kleinberg et al. (2023) developed an algorithm with U-calibration error $\\mathcal{O}(K\\sqrt{T})$ after $T$ rounds and raised the open question of what the optimal bound is.\n We resolve this question by showing that the optimal U-calibration error is $\\Theta(\\sqrt{KT})$ --- we start with a simple observation that the Follow-the-Perturbed-Leader algorithm of Daskalakis and Syrgkanis (2016) achieves this upper bound, followed by a matching lower bound constructed with a specific proper loss (which, as a side result, also proves the optimality of the algorithm of Daskalakis and Syrgkanis (2016) in the context of online learning against an adversary with finite choices).\n We also strengthen our results under natural assumptions on the loss functions, including $\\Theta(\\log T)$ U-calibration error for Lipschitz proper losses, $\\mathcal{O}(\\log T)$ U-calibration error for a certain class of decomposable proper losses, U-calibration error bounds for proper losses with a low covering number, and others.", "keywords": "Multiclass U-calibration;Pseudo Multiclass U-calibration;Minimax Regret;Follow-the-Perturbed-Leader", "primary_area": "online_learning", "supplementary_material": "", "author": "Haipeng Luo;Spandan Senapati;Vatsal Sharan", "authorids": "~Haipeng_Luo1;~Spandan_Senapati1;~Vatsal_Sharan1", "gender": "M;;M", "homepage": "https://haipeng-luo.net/;;https://vatsalsharan.github.io/", "dblp": "62/2576;;126/2543", "google_scholar": "ct2hw4UAAAAJ;;Ize17HEAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Haipeng_Luo1;~Spandan_Senapati1;~Vatsal_Sharan1", "aff": "University of Southern California;;University of Southern California", "aff_domain": "usc.edu;;usc.edu", "position": "Associate Professor;;Assistant Professor", "bibtex": "@inproceedings{\nluo2024optimal,\ntitle={Optimal Multiclass U-Calibration Error and Beyond},\nauthor={Haipeng Luo and Spandan Senapati and Vatsal Sharan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7aFRgCC8Q7}\n}", "github": "", "reviewers": "Uz2D;jtgy;aUK6;PchL", "pdf_size": 514551, "rating": "6;7;7;8", "confidence": "3;5;3;3", "soundness": "3;4;4;4", "novelty": "2;3;4;4", "presentation": "3;4;4;4", "wc_summary": "25;380;76;93", "wc_strengths": "167;255;59;84", "wc_weaknesses": "64;3;10;133", "wc_questions": "66;12;206;22", "wc_limitations": "7;5;7;9", "wc_review": "329;655;358;341", "wc_reply_reviewers": "28;18;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 143.5, 138.8173260079591 ], "wc_strengths_avg": [ 141.25, 76.88424741128705 ], "wc_weaknesses_avg": [ 52.5, 52.12724815295739 ], "wc_questions_avg": [ 76.5, 77.4774160642958 ], "wc_limitations_avg": [ 7.0, 1.4142135623730951 ], "wc_review_avg": [ 420.75, 135.63623225377503 ], "wc_reply_reviewers_avg": [ 11.5, 12.031209415515965 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11060530355825871762&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 4, "email": "usc.edu;;usc.edu", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "University of Southern California", "aff_unique_dep": "", "aff_unique_url": "https://www.usc.edu", "aff_unique_abbr": "USC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Ensemble Learning for Heterogeneous Large Language Models with Deep Parallel Collaboration", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96435", "id": "7arAADUK6D", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7arAADUK6D", "openreview": "https://openreview.net/forum?id=7arAADUK6D", "poster": "/media/PosterPDFs/NeurIPS%202024/96435.png?t=1731489516.0423002", "project": "", "author_site": "Yichong Huang, Xiaocheng Feng, Baohang Li, Yang Xiang, Hui Wang, Ting Liu, Bing Qin", "tldr": "", "abstract": "Large language models (LLMs) exhibit complementary strengths in various tasks, motivating the research of LLM ensembling.\nHowever, existing work focuses on training an extra reward model or fusion model to select or combine all candidate answers, posing a great challenge to the generalization on unseen data distributions.\nBesides, prior methods use textual responses as communication media, ignoring the valuable information in the internal representations.\nIn this work, we propose a training-free ensemble framework \\textsc{DeePEn}, fusing the informative probability distributions yielded by different LLMs at each decoding step.\nUnfortunately, the vocabulary discrepancy between heterogeneous LLMs directly makes averaging the distributions unfeasible due to the token misalignment.\nTo address this challenge, \\textsc{DeePEn} maps the probability distribution of each model from its own probability space to a universal \\textit{relative space} based on the relative representation theory, and performs aggregation.\nNext, we devise a search-based inverse transformation to transform the aggregated result back to the probability space of one of the ensembling LLMs (main model), in order to determine the next token.\nWe conduct extensive experiments on ensembles of different number of LLMs, ensembles of LLMs with different architectures, and ensembles between the LLM and the specialist model.\nExperimental results show that (i) \\textsc{DeePEn} achieves consistent improvements across six benchmarks covering subject examination, reasoning, and knowledge, (ii) a well-performing specialist model can benefit from a less effective LLM through distribution fusion, and (iii) \\textsc{DeePEn} has complementary strengths with other ensemble methods such as voting.", "keywords": "Ensemble Learning;Large Language Model;Relative Representation", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/0ed79889a1042258b5b502e00b09bba87d8e88a6.zip", "author": "Yichong Huang;Xiaocheng Feng;Baohang Li;Yang Xiang;Hui Wang;Ting Liu;Bing Qin", "authorids": "~Yichong_Huang1;~Xiaocheng_Feng1;~Baohang_Li1;~Yang_Xiang4;~Hui_Wang13;~Ting_Liu2;~Bing_Qin2", "gender": "M;M;;M;M;M;", "homepage": "https://ychuang.netlify.app/;http://ir.hit.edu.cn/~xcfeng/;;;https://openi.pcl.ac.cn;;http://ir.hit.edu.cn/~qinb", "dblp": "291/4211;;348/5894.html;50/2192-3;39/721-73;52/5150-1;86/5934.html", "google_scholar": "e0H1eqEAAAAJ;Xu8NbhYAAAAJ;https://scholar.google.com/citations?hl=zh-CN;zDyL-NoAAAAJ;;zyMJ1V0AAAAJ;LKnCub0AAAAJ", "orcid": "0009-0005-4004-8564;;;0000-0003-1395-6805;;;0000-0002-2543-5604", "linkedin": ";;;yang-xiang-7554b6195/;;;", "or_profile": "~Yichong_Huang1;~Xiaocheng_Feng1;~Baohang_Li1;~Yang_Xiang4;~Hui_Wang13;~Ting_Liu2;~Bing_Qin2", "aff": "Harbin Institute of Technology;Harbin Institute of Technology;Harbin Institute of Technology;Peng Cheng Laboratory;Cloud Computing;Harbin Institute of Technology;Harbin Institute of Technology", "aff_domain": "hit.edu.cn;hit.edu.cn;stu.hit.edu.cn;pcl.ac;pcl.ac.cn;hit.edu.cn;hit.edu.cn", "position": "PhD student;Full Professor;MS student;Researcher;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nhuang2024ensemble,\ntitle={Ensemble Learning for Heterogeneous Large Language Models with Deep Parallel Collaboration},\nauthor={Yichong Huang and Xiaocheng Feng and Baohang Li and Yang Xiang and Hui Wang and Ting Liu and Bing Qin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7arAADUK6D}\n}", "github": "", "reviewers": "Q9GV;c5c3;JJTA", "pdf_size": 945245, "rating": "6;6;7", "confidence": "4;4;3", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "108;30;50", "wc_strengths": "108;41;31", "wc_weaknesses": "88;130;58", "wc_questions": "2;79;10", "wc_limitations": "2;6;3", "wc_review": "308;286;152", "wc_reply_reviewers": "0;14;14", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 62.666666666666664, 33.079029946814074 ], "wc_strengths_avg": [ 60.0, 34.18576701884378 ], "wc_weaknesses_avg": [ 92.0, 29.5296461204668 ], "wc_questions_avg": [ 30.333333333333332, 34.56716489515576 ], "wc_limitations_avg": [ 3.6666666666666665, 1.699673171197595 ], "wc_review_avg": [ 248.66666666666666, 68.9411987775734 ], "wc_reply_reviewers_avg": [ 9.333333333333334, 6.599663291074444 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.9999999999999997, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8034518513398180505&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "hit.edu.cn;hit.edu.cn;stu.hit.edu.cn;pcl.ac;pcl.ac.cn;hit.edu.cn;hit.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;1;2;0;0", "aff_unique_norm": "Harbin Institute of Technology;Pengcheng Laboratory;Cloud Computing", "aff_unique_dep": ";Peng Cheng Laboratory;", "aff_unique_url": "http://www.hit.edu.cn/;http://www.pcl.ac.cn;", "aff_unique_abbr": "HIT;PCL;", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Harbin;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China;" }, { "title": "Exploring the Role of Large Language Models in Prompt Encoding for Diffusion Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96434", "id": "7b2DrIBGZz", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7b2DrIBGZz", "openreview": "https://openreview.net/forum?id=7b2DrIBGZz", "poster": "", "project": "", "author_site": "Bingqi Ma, ZHUOFAN ZONG, Guanglu Song, Hongsheng Li, Yu Liu", "tldr": "", "abstract": "Large language models based on decoder-only transformers have demonstrated superior text understanding capabilities compared to CLIP and T5-series models.\nHowever, the paradigm for utilizing current advanced LLMs in text-to-image diffusion models remains to be explored.\nWe observed an unusual phenomenon: directly using a large language model as the prompt encoder significantly degrades the prompt-following ability in image generation.\nWe identified two main obstacles behind this issue.\nOne is the misalignment between the next token prediction training in LLM and the requirement for discriminative prompt features in diffusion models.\nThe other is the intrinsic positional bias introduced by the decoder-only architecture.\nTo deal with this issue, we propose a novel framework to fully harness the capabilities of LLMs.\nThrough the carefully designed usage guidance, we effectively enhance the text representation capability of the LLM for prompt encoding and eliminate its inherent positional bias.\nThis allows us to flexibly integrate state-of-the-art LLMs into the text-to-image generation model.\nFurthermore, we also provide an effective manner to fuse multiple LLMs into our framework.\nConsidering the excellent performance and scaling capabilities demonstrated by the transformer architecture, we further design an LLM-Infused Diffusion Transformer (LI-DIT)based on the framework.\nWe conduct extensive experiments to validate LI-DIT across model size and data size.\nBenefiting from the inherent ability of the LLMs and our innovative designs, the prompt understanding performance of LI-DIT easily surpasses state-of-the-art open-source models as well as mainstream closed-source commercial models including Stable Diffusion 3, DALL-E 3, and Midjourney V6.", "keywords": "large language model; text-to-image generation; diffusion model", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Bingqi Ma;Zhuofan Zong;Guanglu Song;Hongsheng Li;Yu Liu", "authorids": "~Bingqi_Ma1;~Zhuofan_Zong1;~Guanglu_Song2;~Hongsheng_Li3;~Yu_Liu2", "gender": "M;M;M;M;M", "homepage": "https://mabingqi.github.io/;https://zongzhuofan.github.io/;;http://www.ee.cuhk.edu.hk/~hsli;http://liuyu.us", "dblp": "309/1989;266/4989;207/4745;27/7402-1;97/2274-15", "google_scholar": "rcWQWCoAAAAJ;vls0YhoAAAAJ;Bd3v08QAAAAJ;BN2Ze-QAAAAJ;", "orcid": ";;;;", "linkedin": "https://www.linkedin.cn/injobs/in/%E5%86%B0%E5%A5%87-%E9%A9%AC-106637154;;;;", "or_profile": "~Bingqi_Ma1;~Zhuofan_Zong1;~Guanglu_Song2;~Hongsheng_Li3;~Yu_Liu2", "aff": "Sensetime Group Limmited;The Chinese University of Hong Kong;Sensetime;The Chinese University of Hong Kong;SenseTime", "aff_domain": "sensetime.com;link.cuhk.edu.hk;sensetime.com;cuhk.edu.hk;sensetime.com", "position": "Researcher;PhD student;Computer Vision Researcher;Associate Professor;Principal Researcher", "bibtex": "@inproceedings{\nma2024exploring,\ntitle={Exploring the Role of Large Language Models in Prompt Encoding for Diffusion Models},\nauthor={Bingqi Ma and Zhuofan Zong and Guanglu Song and Hongsheng Li and Yu Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7b2DrIBGZz}\n}", "github": "", "reviewers": "NTY6;Nr61;nE1N;yHpu", "pdf_size": 33593898, "rating": "4;5;6;6", "confidence": "4;3;3;4", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "2;3;3;2", "wc_summary": "91;75;97;99", "wc_strengths": "41;45;36;110", "wc_weaknesses": "152;205;56;307", "wc_questions": "2;67;96;20", "wc_limitations": "1;19;1;1", "wc_review": "287;411;286;537", "wc_reply_reviewers": "0;37;11;27", "wc_reply_authors": "0;21;11;21", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 90.5, 9.420721840708387 ], "wc_strengths_avg": [ 58.0, 30.191058278901057 ], "wc_weaknesses_avg": [ 180.0, 90.7110798083674 ], "wc_questions_avg": [ 46.25, 37.25838831726354 ], "wc_limitations_avg": [ 5.5, 7.794228634059948 ], "wc_review_avg": [ 380.25, 103.79637517755617 ], "wc_reply_reviewers_avg": [ 18.75, 14.254385290148432 ], "wc_reply_authors_avg": [ 13.25, 8.671072598012312 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1039684135846498298&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "sensetime.com;link.cuhk.edu.hk;sensetime.com;cuhk.edu.hk;sensetime.com", "author_num": 5, "aff_unique_index": "0;1;2;1;2", "aff_unique_norm": "SenseTime Group Limited;Chinese University of Hong Kong;SenseTime", "aff_unique_dep": ";;", "aff_unique_url": "https://www.sensetime.com;https://www.cuhk.edu.hk;https://www.sensetime.com", "aff_unique_abbr": "SenseTime;CUHK;SenseTime", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Dissecting the Failure of Invariant Learning on Graphs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96433", "id": "7eFS8aZHAM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7eFS8aZHAM", "openreview": "https://openreview.net/forum?id=7eFS8aZHAM", "poster": "/media/PosterPDFs/NeurIPS%202024/96433.png?t=1731486981.4051583", "project": "", "author_site": "Qixun Wang, Yifei Wang, Yisen Wang, Xianghua Ying", "tldr": "", "abstract": "Enhancing node-level Out-Of-Distribution (OOD) generalization on graphs remains a crucial area. In this paper, we develop a Structural Causal Model (SCM) to theoretically dissect the performance of two prominent invariant learning methods--Invariant Risk Minimization (IRM) and Variance-Risk Extrapolation (VREx)--in node-level OOD settings. Our analysis reveals a critical limitation: these methods may struggle to identify invariant features due to the complexities introduced by the message-passing mechanism, which can obscure causal features within a range of neighboring samples. To address this, we propose Cross-environment Intra-class Alignment (CIA), which explicitly eliminates spurious features by aligning representations within the same class, bypassing the need for explicit knowledge of underlying causal patterns. To adapt CIA to node-level OOD scenarios where environment labels are hard to obtain, we further propose CIA-LRA (Localized Reweighting Alignment) that leverages the distribution of neighboring labels to selectively align node representations, effectively distinguishing and preserving invariant features while removing spurious ones, all without relying on environment labels. We theoretically prove CIA-LRA's effectiveness by deriving an OOD generalization error bound based on PAC-Bayesian analysis. Experiments on graph OOD benchmarks validate the superiority of CIA and CIA-LRA, marking a significant advancement in node-level OOD generalization.", "keywords": "Machine Learning;Out-of-distribution Generalization;Invariant Learning;Graph Machine Learning", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Qixun Wang;Yifei Wang;Yisen Wang;Xianghua Ying", "authorids": "~Qixun_Wang1;~Yifei_Wang1;~Yisen_Wang1;~Xianghua_Ying3", "gender": ";M;M;M", "homepage": "https://novaglow646.github.io/QixunWang-Homepage.github.io/;https://yifeiwang77.com;https://yisenwang.github.io/;http://www.cis.pku.edu.cn/info/1084/1268.htm", "dblp": "256/6758;00/555-1;172/1346-1;00/131", "google_scholar": ";-CLy6YsAAAAJ;uMWPDboAAAAJ;27o9L1wAAAAJ", "orcid": ";;;0000-0002-9785-0727", "linkedin": ";;;", "or_profile": "~Qixun_Wang1;~Yifei_Wang1;~Yisen_Wang1;~Xianghua_Ying3", "aff": "Peking University;Massachusetts Institute of Technology;Peking University;Peking University", "aff_domain": "pku.edu.cn;mit.edu;pku.edu.cn;pku.edu.cn", "position": "PhD student;Postdoc;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nwang2024dissecting,\ntitle={Dissecting the Failure of Invariant Learning on Graphs},\nauthor={Qixun Wang and Yifei Wang and Yisen Wang and Xianghua Ying},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7eFS8aZHAM}\n}", "github": "", "reviewers": "yNy3;LzeD;empj;nx4e", "pdf_size": 4385102, "rating": "5;5;6;7", "confidence": "2;4;4;3", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "4;3;2;3", "wc_summary": "41;83;67;74", "wc_strengths": "27;60;64;45", "wc_weaknesses": "181;95;204;475", "wc_questions": "54;1;94;17", "wc_limitations": "1;1;1;41", "wc_review": "304;240;430;652", "wc_reply_reviewers": "612;0;98;45", "wc_reply_authors": "1832;71;26;23", "reply_reviewers": "3;0;1;1", "reply_authors": "5;2;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 66.25, 15.642490210960657 ], "wc_strengths_avg": [ 49.0, 14.543039572248986 ], "wc_weaknesses_avg": [ 238.75, 142.32072055747892 ], "wc_questions_avg": [ 41.5, 35.892199709686224 ], "wc_limitations_avg": [ 11.0, 17.320508075688775 ], "wc_review_avg": [ 406.5, 157.36184416814643 ], "wc_reply_reviewers_avg": [ 188.75, 246.81306185046205 ], "wc_reply_authors_avg": [ 488.0, 776.1916644746966 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0909090909090909, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Iy6355B2CXsJ:scholar.google.com/&scioq=Dissecting+the+Failure+of+Invariant+Learning+on+Graphs&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "pku.edu.cn;mit.edu;pku.edu.cn;pku.edu.cn", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Peking University;Massachusetts Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "http://www.pku.edu.cn;https://web.mit.edu", "aff_unique_abbr": "Peking U;MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "China;United States" }, { "title": "Vidu4D: Single Generated Video to High-Fidelity 4D Reconstruction with Dynamic Gaussian Surfels", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96432", "id": "7eIaqYrpcs", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7eIaqYrpcs", "openreview": "https://openreview.net/forum?id=7eIaqYrpcs", "poster": "", "project": "", "author_site": "Yikai Wang, Xinzhou Wang, Zilong Chen, Zhengyi Wang, Fuchun Sun, Jun Zhu", "tldr": "", "abstract": "Video generative models are receiving particular attention given their ability to generate realistic and imaginative frames. Besides, these models are also observed to exhibit strong 3D consistency, significantly enhancing their potential to act as world simulators. In this work, we present Vidu4D, a novel reconstruction model that excels in accurately reconstructing 4D (i.e., sequential 3D) representations from single generated videos, addressing challenges associated with non-rigidity and frame distortion. This capability is pivotal for creating high-fidelity virtual contents that maintain both spatial and temporal coherence. At the core of Vidu4D is our proposed Dynamic Gaussian Surfels (DGS) technique. DGS optimizes time-varying warping functions to transform Gaussian surfels (surface elements) from a static state to a dynamically warped state. This transformation enables a precise depiction of motion and deformation over time. To preserve the structural integrity of surface-aligned Gaussian surfels, we design the warped-state geometric regularization based on continuous warping fields for estimating normals. Additionally, we learn refinements on rotation and scaling parameters of Gaussian surfels, which greatly alleviates texture flickering during the warping process and enhances the capture of fine-grained appearance details. Vidu4D also contains a novel initialization state that provides a proper start for the warping fields in DGS. Equipping Vidu4D with an existing video generative model, the overall framework demonstrates high-fidelity text-to-4D generation in both appearance and geometry.", "keywords": "generative models;4D reconstruction;Gaussian splatting", "primary_area": "generative_models", "supplementary_material": "", "author": "Yikai Wang;Xinzhou Wang;Zilong Chen;Zhengyi Wang;Fuchun Sun;Jun Zhu", "authorids": "~Yikai_Wang2;~Xinzhou_Wang1;~Zilong_Chen1;~Zhengyi_Wang1;~Fuchun_Sun3;~Jun_Zhu2", "gender": "M;M;M;M;M;M", "homepage": "https://yikaiw.github.io/;https://zz7379.github.io/;https://heheyas.github.io/;https://thuwzy.github.io;https://github.com/fuchuns;http://ml.cs.tsinghua.edu.cn/~jun", "dblp": "85/9555-1;76/3738;;;02/2737-1;50/2644-1", "google_scholar": "MnW5aegAAAAJ;;2pbka1gAAAAJ;dtuPuRQAAAAJ;;axsP38wAAAAJ", "orcid": ";0009-0009-5332-0853;;;;", "linkedin": ";;https://www.linkedin.cn/incareer/in/zilong-chen-99671523b;;;", "or_profile": "~Yikai_Wang2;~Xinzhou_Wang1;~Zilong_Chen1;~Zhengyi_Wang1;~Fuchun_Sun3;~Jun_Zhu2", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;cs.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;mail.tsinghua.edu.cn", "position": "Postdoc;PhD student;PhD student;PhD student;Instructor;Professor", "bibtex": "@inproceedings{\nwang2024vidud,\ntitle={Vidu4D: Single Generated Video to High-Fidelity 4D Reconstruction with Dynamic Gaussian Surfels},\nauthor={Yikai Wang and Xinzhou Wang and Zilong Chen and Zhengyi Wang and Fuchun Sun and Jun Zhu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7eIaqYrpcs}\n}", "github": "", "reviewers": "7btN;yVYk;crn6;1Eme", "pdf_size": 14528714, "rating": "4;5;5;6", "confidence": "5;4;4;4", "soundness": "3;3;2;3", "novelty": "3;3;3;3", "presentation": "1;3;3;2", "wc_summary": "127;57;59;64", "wc_strengths": "31;46;42;94", "wc_weaknesses": "136;111;154;141", "wc_questions": "75;25;14;66", "wc_limitations": "150;30;1;7", "wc_review": "519;269;270;372", "wc_reply_reviewers": "0;19;397;31", "wc_reply_authors": "42;42;549;0", "reply_reviewers": "0;1;2;1", "reply_authors": "2;2;2;1", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 76.75, 29.123658767400773 ], "wc_strengths_avg": [ 53.25, 24.159625411003375 ], "wc_weaknesses_avg": [ 135.5, 15.596473960482221 ], "wc_questions_avg": [ 45.0, 25.990382836734053 ], "wc_limitations_avg": [ 47.0, 60.44418913344772 ], "wc_review_avg": [ 357.5, 102.20200585115734 ], "wc_reply_reviewers_avg": [ 111.75, 165.0596483093309 ], "wc_reply_authors_avg": [ 158.25, 226.25027624292528 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2714591728051566970&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 5, "email": "tsinghua.edu.cn;tsinghua.edu.cn;cs.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;mail.tsinghua.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "CleanDiffuser: An Easy-to-use Modularized Library for Diffusion Models in Decision Making", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97834", "id": "7ey2ugXs36", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7ey2ugXs36", "openreview": "https://openreview.net/forum?id=7ey2ugXs36", "poster": "", "project": "", "author_site": "Zibin Dong, Yifu Yuan, Jianye Hao, Fei Ni, Yi Ma, Pengyi Li, YAN ZHENG", "tldr": "", "abstract": "Leveraging the powerful generative capability of diffusion models (DMs) to build decision-making agents has achieved extensive success. However, there is still a demand for an easy-to-use and modularized open-source library that offers customized and efficient development for DM-based decision-making algorithms. In this work, we introduce **CleanDiffuser**, the first DM library specifically designed for decision-making algorithms. By revisiting the roles of DMs in the decision-making domain, we identify a set of essential sub-modules that constitute the core of CleanDiffuser, allowing for the implementation of various DM algorithms with simple and flexible building blocks. To demonstrate the reliability and flexibility of CleanDiffuser, we conduct comprehensive evaluations of various DM algorithms implemented with CleanDiffuser across an extensive range of tasks. The analytical experiments provide a wealth of valuable design choices and insights, reveal opportunities and challenges, and lay a solid groundwork for future research. CleanDiffuser will provide long-term support to the decision-making community, enhancing reproducibility and fostering the development of more robust solutions.", "keywords": "Diffusion Models;Decision Making;Library;Reinforcement Learning;Imitation Learning", "primary_area": "", "supplementary_material": "/attachment/638989b3977128ebd2350a70695b641c324d97f7.zip", "author": "Zibin Dong;Yifu Yuan;Jianye HAO;Fei Ni;Yi Ma;Pengyi Li;YAN ZHENG", "authorids": "~Zibin_Dong1;~Yifu_Yuan1;~Jianye_HAO1;~Fei_Ni1;~Yi_Ma5;~Pengyi_Li1;~YAN_ZHENG1", "gender": "M;M;M;M;;M;M", "homepage": "https://github.com/GrandpaDZB;https://yifu-yuan.github.io/;http://www.icdai.org/jianye.html;https://fei-ni.github.io/;https://mayi1996.top/;https://yeshenpy.github.io/;https://yanzzzzz.github.io", "dblp": "358/5885;261/3688;21/7664.html;11/579-1;69/1112-5.html;195/6948;10/2381-2", "google_scholar": "JQ6881QAAAAJ;83JhosMAAAAJ;;https://scholar.google.com.hk/citations?hl=zh-CN;TdVWzqgAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.hk/citations?user=tJuhd1kAAAAJ", "orcid": "0000-0002-2986-6022;0009-0009-2194-942X;0000-0002-0422-8235;0009-0007-5623-2782;0000-0001-9375-6605;0009-0009-8546-2346;", "linkedin": ";;;;;;", "or_profile": "~Zibin_Dong1;~Yifu_Yuan1;~Jianye_HAO1;~Fei_Ni1;~Yi_Ma5;~Pengyi_Li1;~YAN_ZHENG1", "aff": "Tianjin University;Tianjin University;Tianjin University;Tianjin University;Tianjin University;Tianjin University;Tianjin Unibersity, China", "aff_domain": "tju.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu.cn", "position": "MS student;PhD student;Associate Professor;PhD student;PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\ndong2024cleandiffuser,\ntitle={CleanDiffuser: An Easy-to-use Modularized Library for Diffusion Models in Decision Making},\nauthor={Zibin Dong and Yifu Yuan and Jianye HAO and Fei Ni and Yi Ma and Pengyi Li and YAN ZHENG},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=7ey2ugXs36}\n}", "github": "", "reviewers": "WBYx;QWaA;PkLJ;eaGW", "pdf_size": 5812125, "rating": "6;7;7;7", "confidence": "4;3;4;3", "wc_summary_and_contributions": "50;35;71;75", "wc_strengths": "55;104;15;33", "wc_improvement": "258;166;2;9", "wc_limitations": "12;54;1;7", "wc_correctness": "47;13;1;5", "wc_clarity": "8;10;2;5", "wc_relation_to_prior_work": "23;30;1;7", "wc_documentation": "95;26;1;8", "wc_additional_feedback": "1;1;1;1", "wc_review": "549;439;95;150", "wc_reply_reviewers": "29;74;0;0", "wc_reply_authors": "115;145;94;94", "reply_reviewers": "1;1;0;0", "reply_authors": "2;2;2;3", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "wc_summary_and_contributions_avg": [ 57.75, 16.20763708873073 ], "wc_strengths_avg": [ 51.75, 33.32697856091968 ], "wc_improvement_avg": [ 108.75, 108.28059613799695 ], "wc_limitations_avg": [ 18.5, 20.862646045025066 ], "wc_correctness_avg": [ 16.5, 18.131464364468744 ], "wc_clarity_avg": [ 6.25, 3.031088913245535 ], "wc_relation_to_prior_work_avg": [ 15.25, 11.712706775122479 ], "wc_documentation_avg": [ 32.5, 37.218946787892854 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 308.25, 190.77129632101366 ], "wc_reply_reviewers_avg": [ 25.75, 30.26858932953434 ], "wc_reply_authors_avg": [ 112.0, 20.89258241577618 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12027441721237039527&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "tju.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "Tianjin University", "aff_unique_dep": "", "aff_unique_url": "http://www.tju.edu.cn", "aff_unique_abbr": "TJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "DistillNeRF: Perceiving 3D Scenes from Single-Glance Images by Distilling Neural Fields and Foundation Model Features", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96431", "id": "7fScrgJ3An", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7fScrgJ3An", "openreview": "https://openreview.net/forum?id=7fScrgJ3An", "poster": "/media/PosterPDFs/NeurIPS%202024/96431.png?t=1730847288.9227169", "project": "", "author_site": "Letian Wang, Seung Wook Kim, Jiawei Yang, Cunjun Yu, Boris Ivanovic, Steven Waslander, Yue Wang, Sanja Fidler, Marco Pavone, Peter Karkus", "tldr": "", "abstract": "We propose DistillNeRF, a self-supervised learning framework addressing the challenge of understanding 3D environments from limited 2D observations in outdoor autonomous driving scenes. Our method is a generalizable feedforward model that predicts a rich neural scene representation from sparse, single-frame multi-view camera inputs with limited view overlap, and is trained self-supervised with differentiable rendering to reconstruct RGB, depth, or feature images. Our first insight is to exploit per-scene optimized Neural Radiance Fields (NeRFs) by generating dense depth and virtual camera targets from them, which helps our model to learn enhanced 3D geometry from sparse non-overlapping image inputs. Second, to learn a semantically rich 3D representation, we propose distilling features from pre-trained 2D foundation models, such as CLIP or DINOv2, thereby enabling various downstream tasks without the need for costly 3D human annotations. To leverage these two insights, we introduce a novel model architecture with a two-stage lift-splat-shoot encoder and a parameterized sparse hierarchical voxel representation. Experimental results on the NuScenes and Waymo NOTR datasets demonstrate that DistillNeRF significantly outperforms existing comparable state-of-the-art self-supervised methods for scene reconstruction, novel view synthesis, and depth estimation; and it allows for competitive zero-shot 3D semantic occupancy prediction, as well as open-world scene understanding through distilled foundation model features. Demos and code will be available at https://distillnerf.github.io/.", "keywords": "Autonomous Driving; Generalizable NeRF; Scene Representation Learning; Distillation", "primary_area": "machine_vision", "supplementary_material": "", "author": "Letian Wang;Seung Wook Kim;Jiawei Yang;Cunjun Yu;Boris Ivanovic;Steven L. Waslander;Yue Wang;Sanja Fidler;Marco Pavone;Peter Karkus", "authorids": "~Letian_Wang1;~Seung_Wook_Kim1;~Jiawei_Yang1;~Cunjun_Yu1;~Boris_Ivanovic1;~Steven_L._Waslander1;~Yue_Wang2;~Sanja_Fidler1;~Marco_Pavone1;~Peter_Karkus1", "gender": "M;M;M;Unspecified;;M;F;M;M;M", "homepage": ";http://www.cs.toronto.edu/~seung/;https://jiawei-yang.github.io/;;http://www.borisivanovic.com/;https://yuewang.xyz;http://www.cs.toronto.edu/~fidler/;https://web.stanford.edu/~pavone/;https://peterkarkus.com/;https://trailab.utias.utoronto.ca", "dblp": "17/8467;;96/2976;232/3014;203/8356;33/4822-41;08/6607;91/3382-1.html;154/9692;18/7142", "google_scholar": "https://scholar.google.com.hk/citations?user=HEzCWisAAAAJ;https://scholar.google.co.kr/citations?hl=en;OYrpIa8AAAAJ;4xwyGM8AAAAJ;ey9AQcEAAAAJ;v-AEFIEAAAAJ;CUlqK5EAAAAJ;RhOpyXcAAAAJ;cjUid0YAAAAJ;jY_Bcd8AAAAJ", "orcid": ";;;;0000-0002-8698-202X;;;;;0000-0003-4217-4415", "linkedin": ";;;;boris-ivanovic-a3103064;;sanja-fidler-2846a1a?trk=hp-identity-name;;;", "or_profile": "~Letian_Wang1;~Seung_Wook_Kim1;~Jiawei_Yang1;~Cunjun_Yu1;~Boris_Ivanovic1;~Yue_Wang2;~Sanja_Fidler1;~Marco_Pavone1;~Peter_Karkus1;~Steven_Lake_Waslander1", "aff": "University of Toronto;Vector Institute;University of Southern California;Toyota Research Institute;NVIDIA;NVIDIA;Department of Computer Science, University of Toronto;Stanford University;NVIDIA;University of Toronto", "aff_domain": "utoronto.ca;vectorinstitute.ai;usc.edu;tri.global;nvidia.com;nvidia.com;cs.toronto.edu;stanford.edu;nvidia.com;utoronto.ca", "position": "PhD student;Grad student;PhD student;Intern;Researcher;Researcher;Associate Professor;Associate Professor;Researcher;Full Professor", "bibtex": "@inproceedings{\nwang2024distillnerf,\ntitle={DistillNe{RF}: Perceiving 3D Scenes from Single-Glance Images by Distilling Neural Fields and Foundation Model Features},\nauthor={Letian Wang and Seung Wook Kim and Jiawei Yang and Cunjun Yu and Boris Ivanovic and Steven L. Waslander and Yue Wang and Sanja Fidler and Marco Pavone and Peter Karkus},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7fScrgJ3An}\n}", "github": "", "reviewers": "WxYD;aV9Y;T1bA;aj67", "pdf_size": 14057195, "rating": "3;5;5;6", "confidence": "3;3;3;4", "soundness": "1;3;3;3", "novelty": "1;2;3;3", "presentation": "1;3;3;3", "wc_summary": "84;87;77;63", "wc_strengths": "28;59;38;85", "wc_weaknesses": "192;86;257;202", "wc_questions": "272;24;21;3", "wc_limitations": "38;39;10;4", "wc_review": "614;295;403;357", "wc_reply_reviewers": "569;37;21;18", "wc_reply_authors": "1325;195;589;0", "reply_reviewers": "2;1;1;1", "reply_authors": "4;2;4;1", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 77.75, 9.256754290786809 ], "wc_strengths_avg": [ 52.5, 21.84605227495348 ], "wc_weaknesses_avg": [ 184.25, 61.8885086264001 ], "wc_questions_avg": [ 80.0, 111.14180131705622 ], "wc_limitations_avg": [ 22.75, 15.896147331979533 ], "wc_review_avg": [ 417.25, 119.88405857327321 ], "wc_reply_reviewers_avg": [ 161.25, 235.5253436469205 ], "wc_reply_authors_avg": [ 527.25, 507.0997806152158 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.6622661785325219, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10178520033255351216&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "utoronto.ca;vectorinstitute.ai;usc.edu;tri.global;nvidia.com;nvidia.com;cs.toronto.edu;stanford.edu;nvidia.com;utoronto.ca", "author_num": 10, "aff_unique_index": "0;1;2;3;4;4;0;5;4;0", "aff_unique_norm": "University of Toronto;Vector Institute;University of Southern California;Toyota Research Institute;NVIDIA;Stanford University", "aff_unique_dep": ";;;;NVIDIA Corporation;", "aff_unique_url": "https://www.utoronto.ca;https://vectorinstitute.ai/;https://www.usc.edu;https://www.tri.global;https://www.nvidia.com;https://www.stanford.edu", "aff_unique_abbr": "U of T;Vector Institute;USC;TRI;NVIDIA;Stanford", "aff_campus_unique_index": "1;2;3", "aff_campus_unique": ";Los Angeles;Toronto;Stanford", "aff_country_unique_index": "0;0;1;1;1;1;0;1;1;0", "aff_country_unique": "Canada;United States" }, { "title": "Navigable Graphs for High-Dimensional Nearest Neighbor Search: Constructions and Limits", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96430", "id": "7flSQgZ4RT", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7flSQgZ4RT", "openreview": "https://openreview.net/forum?id=7flSQgZ4RT", "poster": "/media/PosterPDFs/NeurIPS%202024/96430.png?t=1731717659.0489986", "project": "", "author_site": "Haya Diwan, Jinrui Gou, Cameron Musco, Christopher Musco, Torsten Suel", "tldr": "", "abstract": "There has been significant recent interest in graph-based nearest neighbor search methods, many of which are centered on the construction of (approximately) \"navigable\" graphs over high-dimensional point sets. A graph is navigable if we can successfully move from any starting node to any target node using a greedy routing strategy where we always move to the neighbor that is closest to the destination according to the given distance function. The complete graph is obviously navigable for any point set, but the important question for applications is if sparser graphs can be constructed. While this question is fairly well understood in low-dimensions, we establish some of the first upper and lower bounds for high-dimensional point sets. First, we give a simple and efficient way to construct a navigable graph with average degree $O(\\sqrt{n \\log n })$ for any set of $n$ points, in any dimension, for any distance function. We compliment this result with a nearly matching lower bound: even under the Euclidean metric in $O(\\log n)$ dimensions, a random point set has no navigable graph with average degree $O(n^{\\alpha})$ for any $\\alpha < 1/2$. Our lower bound relies on sharp anti-concentration bounds for binomial random variables, which we use to show that the {near-neighborhoods} of a set of random points do not overlap significantly, forcing any navigable graph to have many edges.", "keywords": "vector databases;navigable graphs;near-neighbor search", "primary_area": "other", "supplementary_material": "", "author": "Haya Diwan;Jinrui Gou;Cameron N Musco;Christopher Musco;Torsten Suel", "authorids": "~Haya_Diwan1;~Jinrui_Gou1;~Cameron_N_Musco1;~Christopher_Musco1;~Torsten_Suel1", "gender": "F;M;M;M;M", "homepage": ";https://j9rrygou.github.io/;https://people.cs.umass.edu/~cmusco/;http://engineering.nyu.edu/~suel/;https://www.chrismusco.com/", "dblp": "378/4974;291/2796.html;149/2327;s/TorstenSuel;149/2243", "google_scholar": ";xSJwdCcAAAAJ;EeYGZCwAAAAJ;https://scholar.google.com.tw/citations?user=eQUn8ugAAAAJ;HXXSrNMAAAAJ", "orcid": ";0009-0004-7544-3292;;;", "linkedin": "haya-diwan-965150172;jinrui-jerry-gou-a13861201/;;;", "or_profile": "~Haya_Diwan1;~Jinrui_Gou1;~Cameron_N_Musco1;~Torsten_Suel1;~Christopher_P_Musco1", "aff": "New York University;New York University;University of Massachusetts, Amherst;New York University;New York University", "aff_domain": "nyu.edu;nyu.edu;umass.edu;nyu.edu;nyu.edu", "position": "PhD student;PhD student;Assistant Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\ndiwan2024navigable,\ntitle={Navigable Graphs for High-Dimensional Nearest Neighbor Search: Constructions and Limits},\nauthor={Haya Diwan and Jinrui Gou and Cameron N Musco and Christopher Musco and Torsten Suel},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7flSQgZ4RT}\n}", "github": "", "reviewers": "Jtod;MezS;Z5U8;damy", "pdf_size": 396013, "rating": "3;5;5;7", "confidence": "4;4;3;3", "soundness": "3;3;3;4", "novelty": "2;3;3;3", "presentation": "2;4;2;4", "wc_summary": "53;95;33;71", "wc_strengths": "22;125;77;42", "wc_weaknesses": "29;196;51;65", "wc_questions": "389;16;107;59", "wc_limitations": "7;6;1;9", "wc_review": "500;438;269;246", "wc_reply_reviewers": "27;50;15;17", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 1.0 ], "wc_summary_avg": [ 63.0, 22.847319317591726 ], "wc_strengths_avg": [ 66.5, 39.092838218783754 ], "wc_weaknesses_avg": [ 85.25, 65.21646647894993 ], "wc_questions_avg": [ 142.75, 145.7710104924844 ], "wc_limitations_avg": [ 5.75, 2.947456530637899 ], "wc_review_avg": [ 363.25, 108.30368183953858 ], "wc_reply_reviewers_avg": [ 27.25, 13.899190623917638 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.7071067811865476, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13925856803535804808&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "nyu.edu;nyu.edu;umass.edu;nyu.edu;nyu.edu", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "New York University;University of Massachusetts Amherst", "aff_unique_dep": ";", "aff_unique_url": "https://www.nyu.edu;https://www.umass.edu", "aff_unique_abbr": "NYU;UMass Amherst", "aff_campus_unique_index": "1", "aff_campus_unique": ";Amherst", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "7g8WSOHJtP", "title": "Revisiting the Message Passing in Heterophilous Graph Neural Networks", "track": "main", "status": "Reject", "tldr": "", "abstract": "Graph Neural Networks (GNNs) have demonstrated strong performance in graph mining tasks due to their message-passing mechanism, which is aligned with the homophily assumption that adjacent nodes exhibit similar behaviors. However, in many real-world graphs, connected nodes may display contrasting behaviors, termed as heterophilous patterns, which has attracted increased interest in heterophilous GNNs (HTGNNs).\nAlthough the message-passing mechanism seems unsuitable for heterophilous graphs due to the propagation of class-irrelevant information, it is still widely used in many existing HTGNNs and consistently achieves notable success. \nThis raises the question: why does message passing remain effective on heterophilous graphs?\nTo answer this question, in this paper, we revisit the message-passing mechanisms in heterophilous graph neural networks and reformulate them into a unified heterophilious message-passing (HTMP) mechanism.\nBased on HTMP and empirical analysis, we reveal that the success of message passing in existing HTGNNs is attributed to implicitly enhancing the compatibility matrix among classes.\nMoreover, we argue that the full potential of the compatibility matrix is not completely achieved due to the existence of incomplete and noisy semantic neighborhoods in real-world heterophilous graphs.\nTo bridge this gap, we introduce a new approach named CMGNN, which operates within the HTMP mechanism to explicitly leverage and improve the compatibility matrix.\nA thorough evaluation involving 10 benchmark datasets and comparative analysis against 13 well-established baselines highlights the superior performance of the HTMP mechanism and CMGNN method.", "keywords": "GNNs;Message Passing;Heterophily", "primary_area": "graph_neural_networks", "supplementary_material": "/attachment/9bd302c75c2d7783a81ca94160e99a28fcc25136.zip", "author": "Zhuonan Zheng;Yuanchen Bei;Sheng Zhou;Yao Ma;Ming Gu;HONGJIA XU;Chengyu Lai;Jiawei Chen;Jiajun Bu", "authorids": "~Zhuonan_Zheng1;~Yuanchen_Bei1;~Sheng_Zhou1;~Yao_Ma3;~Ming_Gu6;~HONGJIA_XU1;~Chengyu_Lai1;~Jiawei_Chen6;~Jiajun_Bu1", "gender": "M;M;M;M;M;M;M;M;M", "homepage": "https://github.com/zfx233;https://github.com/YuanchenBei;https://zhoushengisnoob.github.io/;https://yaoma24.github.io/;;https://github.com/Frostland12138;https://github.com/cycl2018;https://jiawei-chen.github.io/;https://person.zju.edu.cn/bjj", "dblp": "322/4802;331/2167;34/4858-4.html;212/7871.html;76/2502-14;322/5125;;03/1390-7;50/3147", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;7XFbwrcAAAAJ;https://scholar.google.co.jp/citations?user=Ss76nMwAAAAJ;wf9TTOIAAAAJ;dw1BYBAAAAAJ;https://scholar.google.com/citations?hl=zh-CN;;;OgZP2okAAAAJ", "orcid": "0009-0003-7326-7945;0000-0003-2834-2873;0000-0003-3645-1041;;;0009-0003-0138-3250;;0000-0002-4752-2629;0000-0002-1097-2044", "linkedin": ";;;;;;;;", "or_profile": "~Zhuonan_Zheng1;~Yuanchen_Bei1;~Sheng_Zhou1;~Yao_Ma3;~Ming_Gu6;~HONGJIA_XU1;~Chengyu_Lai1;~Jiawei_Chen6;~Jiajun_Bu1", "aff": "Zhejiang University;Zhejiang University;Zhejiang University;Rensselaer Polytechnic Institute;Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn;zju.edu.cn;rpi.edu;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn", "position": "PhD student;MS student;Associate Professor;Assistant Professor;PhD student;PhD student;MS student;Researcher;Full Professor", "bibtex": "@misc{\nanonymous2024revisiting,\ntitle={Revisiting the Message Passing in Heterophilous Graph Neural Networks},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=7g8WSOHJtP}\n}", "github": "", "project": "", "reviewers": "Fk2z;GM4Z;cjsi", "site": "https://openreview.net/forum?id=7g8WSOHJtP", "pdf_size": 764650, "rating": "5;5;6", "confidence": "3;3;5", "soundness": "2;3;3", "novelty": "2;3;3", "presentation": "2;2;3", "wc_summary": "82;46;126", "wc_strengths": "31;58;93", "wc_weaknesses": "254;111;373", "wc_questions": "120;47;75", "wc_limitations": "30;16;35", "wc_review": "517;278;702", "wc_reply_reviewers": "102;0;22", "wc_reply_authors": "26;0;17", "reply_reviewers": "1;0;1", "reply_authors": "2;1;2", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 84.66666666666667, 32.71425105702746 ], "wc_strengths_avg": [ 60.666666666666664, 25.381533094401966 ], "wc_weaknesses_avg": [ 246.0, 107.1105348071172 ], "wc_questions_avg": [ 80.66666666666667, 30.07028803025043 ], "wc_limitations_avg": [ 27.0, 8.04155872120988 ], "wc_review_avg": [ 499.0, 173.5645893224383 ], "wc_reply_reviewers_avg": [ 41.333333333333336, 43.827946437049604 ], "wc_reply_authors_avg": [ 14.333333333333334, 10.780641085864152 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15448902487735193144&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;1;0;0;0;0;0", "aff_unique_norm": "Zhejiang University;Rensselaer Polytechnic Institute", "aff_unique_dep": ";", "aff_unique_url": "https://www.zju.edu.cn;https://www.rpi.edu", "aff_unique_abbr": "ZJU;RPI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;0;0;0;0", "aff_country_unique": "China;United States" }, { "title": "Retrieval-Retro: Retrieval-based Inorganic Retrosynthesis with Expert Knowledge", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96429", "id": "7gf6oGdKPU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7gf6oGdKPU", "openreview": "https://openreview.net/forum?id=7gf6oGdKPU", "poster": "/media/PosterPDFs/NeurIPS%202024/96429.png?t=1731371659.324926", "project": "", "author_site": "Heewoong Noh, Namkyeong Lee, Gyoung S. Na, Chanyoung Park", "tldr": "", "abstract": "While inorganic retrosynthesis planning is essential in the field of chemical science, the application of machine learning in this area has been notably less explored compared to organic retrosynthesis planning. In this paper, we propose Retrieval-Retro for inorganic retrosynthesis planning, which implicitly extracts the precursor information of reference materials that are retrieved from the knowledge base regarding domain expertise in the field. Specifically, instead of directly employing the precursor information of reference materials, we propose implicitly extracting it with various attention layers, which enables the model to learn novel synthesis recipes more effectively.\nMoreover, during retrieval, we consider the thermodynamic relationship between target material and precursors, which is essential domain expertise in identifying the most probable precursor set among various options. Extensive experiments demonstrate the superiority of Retrieval-Retro in retrosynthesis planning, especially in discovering novel synthesis recipes, which is crucial for materials discovery.\nThe source code for Retrieval-Retro is available at https://github.com/HeewoongNoh/Retrieval-Retro.", "keywords": "Inogranic Synthesis;Inorganic Retrosynthesis;Material Science", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Heewoong Noh;Namkyeong Lee;Gyoung S. Na;Chanyoung Park", "authorids": "~Heewoong_Noh2;~Namkyeong_Lee1;~Gyoung_S._Na2;~Chanyoung_Park1", "gender": "M;M;;M", "homepage": "https://github.com/HeewoongNoh;https://namkyeong.github.io/;;https://dsail.kaist.ac.kr/", "dblp": "342/4417;308/0443;;170/5430.html", "google_scholar": "VIAHsCMAAAAJ;88ZqjpwAAAAJ;;lWk2LtQAAAAJ", "orcid": ";;;0000-0002-5957-5816", "linkedin": ";;;", "or_profile": "~Heewoong_Noh2;~Namkyeong_Lee1;~Gyoung_S._Na2;~Chanyoung_Park1", "aff": "Korea Advanced Institute of Science & Technology;University of Illinois, Urbana Champaign;;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;uiuc.edu;;kaist.ac.kr", "position": "MS student;Researcher;;Assistant Professor", "bibtex": "@inproceedings{\nnoh2024retrievalretro,\ntitle={Retrieval-Retro: Retrieval-based Inorganic Retrosynthesis with Expert Knowledge},\nauthor={Heewoong Noh and Namkyeong Lee and Gyoung S. Na and Chanyoung Park},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7gf6oGdKPU}\n}", "github": "", "reviewers": "URJP;wQcK;X1nP;N6yS;moYU", "pdf_size": 1114620, "rating": "5;5;6;6;7", "confidence": "1;3;2;5;5", "soundness": "2;3;3;3;3", "novelty": "2;3;3;3;3", "presentation": "3;3;3;3;3", "wc_summary": "36;86;41;76;63", "wc_strengths": "11;60;51;117;96", "wc_weaknesses": "28;53;26;19;35", "wc_questions": "21;94;1;254;34", "wc_limitations": "10;6;1;109;22", "wc_review": "106;299;120;575;250", "wc_reply_reviewers": "0;12;12;224;34", "wc_reply_authors": "37;37;37;414;56", "reply_reviewers": "0;1;1;2;1", "reply_authors": "2;2;2;4;3", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 3.2, 1.6 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 60.4, 19.37627415165258 ], "wc_strengths_avg": [ 67.0, 36.82933613303395 ], "wc_weaknesses_avg": [ 32.2, 11.582745788456208 ], "wc_questions_avg": [ 80.8, 91.98565105493356 ], "wc_limitations_avg": [ 29.6, 40.30186099921442 ], "wc_review_avg": [ 270.0, 169.50634206424255 ], "wc_reply_reviewers_avg": [ 56.4, 84.5165072633743 ], "wc_reply_authors_avg": [ 116.2, 149.08172255511406 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.6, 0.8 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.701560760020114, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4033525328893748738&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "email": "kaist.ac.kr;uiuc.edu;;kaist.ac.kr", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;University of Illinois Urbana-Champaign", "aff_unique_dep": ";", "aff_unique_url": "https://www.kaist.ac.kr;https://illinois.edu", "aff_unique_abbr": "KAIST;UIUC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "0;1;0", "aff_country_unique": "South Korea;United States" }, { "title": "Invisible Image Watermarks Are Provably Removable Using Generative AI", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96428", "id": "7hy5fy2OC6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7hy5fy2OC6", "openreview": "https://openreview.net/forum?id=7hy5fy2OC6", "poster": "", "project": "", "author_site": "Xuandong Zhao, Kexun Zhang, Zihao Su, Saastha Vasan, Ilya Grishchenko, Christopher Kruegel, Giovanni Vigna, Yu-Xiang Wang, Lei Li", "tldr": "", "abstract": "Invisible watermarks safeguard images' copyrights by embedding hidden messages only detectable by owners. They also prevent people from misusing images, especially those generated by AI models.\nWe propose a family of regeneration attacks to remove these invisible watermarks. \nThe proposed attack method first adds random noise to an image to destroy the watermark and then reconstructs the image. \nThis approach is flexible and can be instantiated with many existing image-denoising algorithms and pre-trained generative models such as diffusion models. Through formal proofs and extensive empirical evaluations, we demonstrate that pixel-level invisible watermarks are vulnerable to this regeneration attack.\nOur results reveal that, across four different pixel-level watermarking schemes, the proposed method consistently achieves superior performance compared to existing attack techniques, with lower detection rates and higher image quality.\nHowever, watermarks that keep the image semantically similar can be an alternative defense against our attacks.\nOur finding underscores the need for a shift in research/industry emphasis from invisible watermarks to semantic-preserving watermarks. Code is available at https://github.com/XuandongZhao/WatermarkAttacker", "keywords": "Image Watermark;AI Safety", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/4ceed5db04aff3b033c79daa2d443d290c1a1231.zip", "author": "Xuandong Zhao;Kexun Zhang;Zihao Su;Saastha Vasan;Ilya Grishchenko;Christopher Kruegel;Giovanni Vigna;Yu-Xiang Wang;Lei Li", "authorids": "~Xuandong_Zhao1;~Kexun_Zhang1;~Zihao_Su1;~Saastha_Vasan1;~Ilya_Grishchenko1;~Christopher_Kruegel1;~Giovanni_Vigna1;~Yu-Xiang_Wang1;~Lei_Li11", "gender": "M;M;;M;;;M;;M", "homepage": "https://xuandongzhao.github.io/;https://zkx06111.github.io;https://github.com/JerrySu11;;;;https://www.cs.ucsb.edu/~vigna/;http://www.cs.ucsb.edu/~yuxiangw/publications.html;https://www.cs.cmu.edu/~leili", "dblp": "244/8033;295/8815;;;;;v/GiovanniVigna;62/1637-3.html;13/7007-5.html", "google_scholar": "CxeH4uoAAAAJ;;iBAr8iQAAAAJ;;;;gG5PTlAAAAAJ;HGNZ1fkAAAAJ;BYXqAlwAAAAJ", "orcid": ";;;;;;;;0000-0003-3095-9776", "linkedin": "xuandong-zhao-a3270610b/;;;saastha-vasan-30b5b7169;;;;;", "or_profile": "~Xuandong_Zhao1;~Kexun_Zhang1;~Zihao_Su1;~Saastha_Vasan1;~Ilya_Grishchenko1;~Christopher_Kruegel1;~Giovanni_Vigna1;~Yu-Xiang_Wang1;~Lei_Li11", "aff": "UC Santa Barbara;Carnegie Mellon University;University of California, Santa Barbara;University of California Santa Barbara;;;, University of California, Santa Barbara;UC Santa Barbara;School of Computer Science, Carnegie Mellon University", "aff_domain": "ucsb.edu;cmu.edu;ucsb.edu;cs.ucsb;;;cs.ucsb.edu;ucsb.edu;cs.cmu.edu", "position": "PhD student;PhD student;PhD student;PhD student;;;Full Professor;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhao2024invisible,\ntitle={Invisible Image Watermarks Are Provably Removable Using Generative {AI}},\nauthor={Xuandong Zhao and Kexun Zhang and Zihao Su and Saastha Vasan and Ilya Grishchenko and Christopher Kruegel and Giovanni Vigna and Yu-Xiang Wang and Lei Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7hy5fy2OC6}\n}", "github": "", "reviewers": "ii78;V7G3;n9vc", "pdf_size": 8839992, "rating": "7;7;7", "confidence": "3;5;3", "soundness": "4;3;3", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "60;89;96", "wc_strengths": "47;29;56", "wc_weaknesses": "15;575;76", "wc_questions": "1;187;241", "wc_limitations": "1;15;21", "wc_review": "124;895;490", "wc_reply_reviewers": "11;332;0", "wc_reply_authors": "0;606;0", "reply_reviewers": "1;2;0", "reply_authors": "1;3;1", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 81.66666666666667, 15.58489297008128 ], "wc_strengths_avg": [ 44.0, 11.224972160321824 ], "wc_weaknesses_avg": [ 222.0, 250.84789547984386 ], "wc_questions_avg": [ 143.0, 102.80077820717118 ], "wc_limitations_avg": [ 12.333333333333334, 8.379870059984357 ], "wc_review_avg": [ 503.0, 314.8936328349622 ], "wc_reply_reviewers_avg": [ 114.33333333333333, 153.97907505747511 ], "wc_reply_authors_avg": [ 202.0, 285.6711395993652 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4840687485457399339&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 4, "email": "ucsb.edu;cmu.edu;ucsb.edu;cs.ucsb;;;cs.ucsb.edu;ucsb.edu;cs.cmu.edu", "author_num": 9, "aff_unique_index": "0;1;0;0;0;0;1", "aff_unique_norm": "University of California, Santa Barbara;Carnegie Mellon University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucsb.edu;https://www.cmu.edu", "aff_unique_abbr": "UCSB;CMU", "aff_campus_unique_index": "0;0;0;0;0;2", "aff_campus_unique": "Santa Barbara;;Pittsburgh", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Initializing Variable-sized Vision Transformers from Learngene with Learnable Transformation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96427", "id": "7j6xgGj5lF", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7j6xgGj5lF", "openreview": "https://openreview.net/forum?id=7j6xgGj5lF", "poster": "/media/PosterPDFs/NeurIPS%202024/96427.png?t=1731141757.2390487", "project": "", "author_site": "Shiyu Xia, Yuankun Zu, Xu Yang, Xin Geng", "tldr": "", "abstract": "In practical scenarios, it is necessary to build variable-sized models to accommodate diverse resource constraints, where weight initialization serves as a crucial step preceding training. The recently introduced Learngene framework firstly learns one compact module, termed learngene, from a large well-trained model, and then transforms learngene to initialize variable-sized models. However, the existing Learngene methods provide limited guidance on transforming learngene, where transformation mechanisms are manually designed and generally lack a learnable component. Moreover, these methods only consider transforming learngene along depth dimension, thus constraining the flexibility of learngene. Motivated by these concerns, we propose a novel and effective Learngene approach termed LeTs (Learnable Transformation), where we transform the learngene module along both width and depth dimension with a set of learnable matrices for flexible variablesized model initialization. Specifically, we construct an auxiliary model comprising the compact learngene module and learnable transformation matrices, enabling both components to be trained. To meet the varying size requirements of target models, we select specific parameters from well-trained transformation matrices to adaptively transform the learngene, guided by strategies such as continuous selection and magnitude-wise selection. Extensive experiments on ImageNet-1K demonstrate that Des-Nets initialized via LeTs outperform those with 100-epoch from scratch training after only 1 epoch tuning. When transferring to downstream image classification tasks, LeTs achieves better results while outperforming from scratch training after about 10 epochs within a 300-epoch training schedule.", "keywords": "Parameter initialization;Learngene", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Shiyu Xia;Yuankun Zu;Xu Yang;Xin Geng", "authorids": "~Shiyu_Xia1;~Yuankun_Zu1;~Xu_Yang5;~Xin_Geng1", "gender": "M;M;M;M", "homepage": "http://palm.seu.edu.cn/homepage/xiashiyu/demo/index.html;https://github.com/zykrous;;http://palm.seu.edu.cn/xgeng/index.htm", "dblp": "294/8485;396/6774;63/1534-21.html;", "google_scholar": "https://scholar.google.com/citations?hl=en;;SqdxMH0AAAAJ;ZOCxkIcAAAAJ", "orcid": ";0009-0006-6984-6399;0000-0002-8276-2679;", "linkedin": ";;;", "or_profile": "~Shiyu_Xia1;~Yuankun_Zu1;~Xu_Yang5;~Xin_Geng1", "aff": "Southeast University;Southeast University;Southeast University;Southeast University, China", "aff_domain": "seu.edu.cn;seu.edu.cn;seu.edu.cn;seu.edu.cn", "position": "PhD student;MS student;Associate Professor;Professor", "bibtex": "@inproceedings{\nxia2024initializing,\ntitle={Initializing Variable-sized Vision Transformers from Learngene with Learnable Transformation},\nauthor={Shiyu Xia and Yuankun Zu and Xu Yang and Xin Geng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7j6xgGj5lF}\n}", "github": "", "reviewers": "i78Y;p972;8CMe;JzzD", "pdf_size": 1185279, "rating": "4;5;5;7", "confidence": "2;4;4;3", "soundness": "3;2;1;3", "novelty": "2;2;1;3", "presentation": "3;2;2;4", "wc_summary": "65;119;166;181", "wc_strengths": "37;53;40;37", "wc_weaknesses": "77;205;480;36", "wc_questions": "40;47;25;76", "wc_limitations": "2;11;5;35", "wc_review": "221;435;716;365", "wc_reply_reviewers": "0;29;36;0", "wc_reply_authors": "32;78;43;0", "reply_reviewers": "0;1;1;0", "reply_authors": "2;3;2;1", "rating_avg": [ 5.25, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "novelty_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 132.75, 45.312112067304916 ], "wc_strengths_avg": [ 41.75, 6.609652033201143 ], "wc_weaknesses_avg": [ 199.5, 173.52881605082194 ], "wc_questions_avg": [ 47.0, 18.533752992850637 ], "wc_limitations_avg": [ 13.25, 12.968712349342937 ], "wc_review_avg": [ 434.25, 180.03801681867083 ], "wc_reply_reviewers_avg": [ 16.25, 16.43738117827776 ], "wc_reply_authors_avg": [ 38.25, 27.860141779969464 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.20751433915982243, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15021160715123529424&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "email": "seu.edu.cn;seu.edu.cn;seu.edu.cn;seu.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Southeast University", "aff_unique_dep": "", "aff_unique_url": "https://www.seu.edu.cn/", "aff_unique_abbr": "SEU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Improving Visual Prompt Tuning by Gaussian Neighborhood Minimization for Long-Tailed Visual Recognition", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96426", "id": "7lMN6xoBjb", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7lMN6xoBjb", "openreview": "https://openreview.net/forum?id=7lMN6xoBjb", "poster": "/media/PosterPDFs/NeurIPS%202024/96426.png?t=1731374155.2863836", "project": "", "author_site": "Mengke Li, Ye Liu, Yang Lu, Yiqun Zhang, Yiu-ming Cheung, Hui Huang", "tldr": "", "abstract": "Long-tailed visual recognition has received increasing attention recently. Despite fine-tuning techniques represented by visual prompt tuning (VPT) achieving substantial performance improvement by leveraging pre-trained knowledge, models still exhibit unsatisfactory generalization performance on tail classes. To address this issue, we propose a novel optimization strategy called Gaussian neighborhood minimization prompt tuning (GNM-PT), for VPT to address the long-tail learning problem. We introduce a novel Gaussian neighborhood loss, which provides a tight upper bound on the loss function of data distribution, facilitating a flattened loss landscape correlated to improved model generalization. Specifically, GNM-PT seeks the gradient descent direction within a random parameter neighborhood, independent of input samples, during each gradient update. Ultimately, GNM-PT enhances generalization across all classes while simultaneously reducing computational overhead. The proposed GNM-PT achieves state-of-the-art classification accuracies of 90.3%, 76.5%, and 50.1% on benchmark datasets CIFAR100-LT (IR 100), iNaturalist 2018, and Places-LT, respectively. The source code is available at https://github.com/Keke921/GNM-PT.", "keywords": "Long-tailed learning;Sharpness-aware minimization;Gaussian neighborhood minimization;Optimization", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Mengke Li;Ye Liu;Yang Lu;Yiqun Zhang;Yiu-ming Cheung;Hui Huang", "authorids": "~Mengke_Li3;~Ye_Liu13;~Yang_Lu5;~Yiqun_Zhang3;~Yiu-ming_Cheung1;~Hui_Huang3", "gender": ";M;M;M;;", "homepage": ";https://github.com/liuye-13;https://jasonyanglu.github.io/;https://yqzhang.notion.site/Zhang-Yiqun-cd968d8e4cc04b3c9a13db85fa4f991e;;https://vcc.tech/~huihuang", "dblp": ";;16/6317-9;125/5587-6.html;;33/5763-4", "google_scholar": ";;r7r4FGwAAAAJ;;;https://scholar.google.com.hk/citations?user=wjzkl3YAAAAJ", "orcid": ";;0000-0002-3497-9611;0000-0002-0328-987X;;0000-0003-3212-0544", "linkedin": ";;;;;", "or_profile": "~Mengke_Li3;~Ye_Liu13;~Yang_Lu5;~Yiqun_Zhang3;~Yiu-ming_Cheung1;~Hui_Huang3", "aff": ";Shenzhen University;Xiamen University;Guangdong University of Technology;;Shenzhen University", "aff_domain": ";szu.edu.cn;xmu.edu.cn;gdut.edu.cn;;szu.edu", "position": ";MS student;Assistant Professor;Associate Professor;;Full Professor", "bibtex": "@inproceedings{\nli2024improving,\ntitle={Improving Visual Prompt Tuning by Gaussian Neighborhood Minimization for Long-Tailed Visual Recognition},\nauthor={Mengke Li and Ye Liu and Yang Lu and Yiqun Zhang and Yiu-ming Cheung and Hui Huang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7lMN6xoBjb}\n}", "github": "", "reviewers": "XQK2;ZADq;X9cb;veiW", "pdf_size": 4601686, "rating": "5;5;5;6", "confidence": "4;4;3;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;4;3", "wc_summary": "63;82;52;32", "wc_strengths": "34;41;26;47", "wc_weaknesses": "150;83;193;237", "wc_questions": "2;3;2;2", "wc_limitations": "1;9;15;6", "wc_review": "250;218;288;324", "wc_reply_reviewers": "79;0;20;97", "wc_reply_authors": "813;0;92;118", "reply_reviewers": "1;0;1;2", "reply_authors": "3;1;2;3", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 57.25, 18.102140757380052 ], "wc_strengths_avg": [ 37.0, 7.842193570679061 ], "wc_weaknesses_avg": [ 165.75, 56.82154080980205 ], "wc_questions_avg": [ 2.25, 0.4330127018922193 ], "wc_limitations_avg": [ 7.75, 5.0682837331783235 ], "wc_review_avg": [ 270.0, 39.824615503479755 ], "wc_reply_reviewers_avg": [ 49.0, 40.1434926233381 ], "wc_reply_authors_avg": [ 255.75, 324.7016900171602 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5545807166389450834&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": ";szu.edu.cn;xmu.edu.cn;gdut.edu.cn;;szu.edu", "author_num": 6, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Shenzhen University;Xiamen University;Guangdong University of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.szu.edu.cn;https://www.xmu.edu.cn;http://www.gdut.edu.cn", "aff_unique_abbr": "SZU;XMU;GDUT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "7nbAots3f8", "title": "Learning Unsigned Distance Fields from Local Shape Functions for 3D Surface Reconstruction", "track": "main", "status": "Reject", "tldr": "", "abstract": "Unsigned distance fields (UDFs) provide a versatile framework for representing a diverse array of 3D shapes, encompassing both watertight and non-watertight geometries. Traditional UDF learning methods typically require extensive training on large datasets of 3D shapes, which is costly and often necessitates hyperparameter adjustments for new datasets. This paper presents a novel neural framework, LoSF-UDF, for reconstructing surfaces from 3D point clouds by leveraging local shape functions to learn UDFs. We observe that 3D shapes manifest simple patterns within localized areas, prompting us to create a training dataset of point cloud patches characterized by mathematical functions that represent a continuum from smooth surfaces to sharp edges and corners. Our approach learns features within a specific radius around each query point and utilizes an attention mechanism to focus on the crucial features for UDF estimation. This method enables efficient and robust surface reconstruction from point clouds without the need for shape-specific training. Additionally, our method exhibits enhanced resilience to noise and outliers in point clouds compared to existing methods. We present comprehensive experiments and comparisons across various datasets, including synthetic and real-scanned point clouds, to validate our method's efficacy.", "keywords": "Surface Reconstruction;Implicit Fields;Unsigned Distance Fields", "primary_area": "machine_vision", "supplementary_material": "", "author": "Jiangbei Hu;Yanggeng Li;Fei Hou;Junhui Hou;Zhebin Zhang;Shengfa Wang;Na Lei;Ying He", "authorids": "~Jiangbei_Hu1;~Yanggeng_Li1;~Fei_Hou1;~Junhui_Hou2;~Zhebin_Zhang2;~Shengfa_Wang1;~Na_Lei1;~Ying_He1", "gender": "M;M;M;M;;M;F;M", "homepage": ";;https://lcs.ios.ac.cn/~houf/;http://www.cityu.edu.hk/stfprofile/csjhhou.htm;;;https://faculty.dlut.edu.cn/leina/en;https://personal.ntu.edu.sg/yhe/", "dblp": "210/0076;;24/3702;122/2673.html;;34/1653;90/2981;h/YingHe1", "google_scholar": "https://scholar.google.com/citations?hl=en;;NWoYRf8AAAAJ;j6eefhwAAAAJ;;;;ISNmBxwAAAAJ", "orcid": "0000-0002-6774-6267;;;0000-0003-3431-2021;;0000-0001-9030-833X;;0000-0002-6749-4485", "linkedin": ";yanggeng-li-b0a0b4171/;;;;;;", "or_profile": "~Jiangbei_Hu1;~Yanggeng_Li1;~Fei_Hou1;~Junhui_Hou2;~Zhebin_Zhang2;~Shengfa_Wang1;~Na_Lei1;~Ying_He1", "aff": "Dalian University of Technology;Nanyang Technological University;Institute of Software, Chinese Academy of Sciences;City University of Hong Kong;;Dalian University of Technology;Dalian University of Technology;Nanyang Technological University", "aff_domain": "dlut.edu.cn;ntu.edu.sg;ios.ac.cn;cityu.edu.hk;;dlut.edu.cn;dlut.edu.cn;ntu.edu.sg", "position": "Postdoc;MS student;Associate Professor;Assistant Professor;;Associate Professor;Full Professor;Associate Professor", "bibtex": "@misc{\nanonymous2024learning,\ntitle={Learning Unsigned Distance Fields from Local Shape Functions for 3D Surface Reconstruction},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=7nbAots3f8}\n}", "github": "", "project": "", "reviewers": "15wp;AbEM;1t3W;YXLQ", "site": "https://openreview.net/forum?id=7nbAots3f8", "pdf_size": 12653985, "rating": "4;4;5;6", "confidence": "3;4;5;4", "soundness": "1;3;3;3", "novelty": "3;2;2;3", "presentation": "2;2;3;3", "wc_summary": "179;59;46;110", "wc_strengths": "91;87;82;60", "wc_weaknesses": "332;408;165;66", "wc_questions": "119;36;48;30", "wc_limitations": "72;25;8;9", "wc_review": "793;615;349;275", "wc_reply_reviewers": "112;120;13;0", "wc_reply_authors": "490;547;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;3;1;1", "rating_avg": [ 4.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 98.5, 52.27092882281699 ], "wc_strengths_avg": [ 80.0, 11.979148550710939 ], "wc_weaknesses_avg": [ 242.75, 134.68365713775373 ], "wc_questions_avg": [ 58.25, 35.66773752286512 ], "wc_limitations_avg": [ 28.5, 26.004807247891687 ], "wc_review_avg": [ 508.0, 207.51144546747295 ], "wc_reply_reviewers_avg": [ 61.25, 55.01533877020117 ], "wc_reply_authors_avg": [ 259.25, 260.03208936590886 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.42640143271122083, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:1fJ28KjS2JUJ:scholar.google.com/&scioq=Learning+Unsigned+Distance+Fields+from+Local+Shape+Functions+for+3D+Surface+Reconstruction&hl=en&as_sdt=0,5", "gs_version_total": 4, "aff_unique_index": "0;1;2;3;0;0;1", "aff_unique_norm": "Dalian University of Technology;Nanyang Technological University;Chinese Academy of Sciences;City University of Hong Kong", "aff_unique_dep": ";;Institute of Software;", "aff_unique_url": "http://www.dlut.edu.cn/;https://www.ntu.edu.sg;http://www.ios.ac.cn;https://www.cityu.edu.hk", "aff_unique_abbr": "DUT;NTU;CAS;CityU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;0;0;0;0;1", "aff_country_unique": "China;Singapore" }, { "title": "DeltaDEQ: Exploiting Heterogeneous Convergence for Accelerating Deep Equilibrium Iterations", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96425", "id": "7qBkADV4zD", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7qBkADV4zD", "openreview": "https://openreview.net/forum?id=7qBkADV4zD", "poster": "/media/PosterPDFs/NeurIPS%202024/96425.png?t=1729631402.6043031", "project": "", "author_site": "Zuowen Wang, Longbiao Cheng, Pehuen Moure, Niklas Hahn, Shih-Chii Liu", "tldr": "", "abstract": "Implicit neural networks including deep equilibrium models have achieved superior task performance with better parameter efficiency in various applications. However, it is often at the expense of higher computation costs during inference. In this work, we identify a phenomenon named $\\textbf{heterogeneous convergence}$ that exists in deep equilibrium models and other iterative methods. We observe much faster convergence of state activations in certain dimensions therefore indicating the dimensionality of the underlying dynamics of the forward pass is much lower than the defined dimension of the states. We thereby propose to exploit heterogeneous convergence by storing past linear operation results (e.g., fully connected and convolutional layers) and only propagating the state activation when its change exceeds a threshold. Thus, for the already converged dimensions, the computations can be skipped. We verified our findings and reached 84\\% FLOPs reduction on the implicit neural representation task, 73\\% on the Sintel and 76\\% on the KITTI datasets for the optical flow estimation task while keeping comparable task accuracy with the models that perform the full update.", "keywords": "implicit layers;deep learning acceleration;deep equilibrium model", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Zuowen Wang;Longbiao Cheng;Pehuen Moure;Niklas Hahn;Shih-Chii Liu", "authorids": "~Zuowen_Wang2;~Longbiao_Cheng1;~Pehuen_Moure1;~Niklas_Hahn1;~Shih-Chii_Liu1", "gender": "M;M;M;;", "homepage": "https://services.ini.uzh.ch/people/zuowen;;;;", "dblp": "243/5726.html;284/2790;;;10/3688", "google_scholar": "pdZLukIAAAAJ;;;;", "orcid": ";;;;", "linkedin": ";;pehuenmoure/;niklas-hahn-086316286/;", "or_profile": "~Zuowen_Wang2;~Longbiao_Cheng1;~Pehuen_Moure1;~Niklas_Hahn1;~Shih-Chii_Liu1", "aff": "Institute of Neuroinformatics, University of Zurich and ETH Zurich;Insititute of Neuroinformatics, University of Zurich and ETH Zurich;ETHZ - ETH Zurich;, University of Zurich;University of Zurich and ETH Zurich", "aff_domain": "ini.uzh.ch;ini.uzh.ch;ethz.ch;ini.uzh.ch;ini.uzh.ch", "position": "PhD student;Postdoc;PhD student;PhD student;Adjunct professor", "bibtex": "@inproceedings{\nwang2024deltadeq,\ntitle={Delta{DEQ}: Exploiting Heterogeneous Convergence for Accelerating Deep Equilibrium Iterations},\nauthor={Zuowen Wang and Longbiao Cheng and Pehuen Moure and Niklas Hahn and Shih-Chii Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7qBkADV4zD}\n}", "github": "", "reviewers": "zWM7;WcEE;71es;dpig", "pdf_size": 3064106, "rating": "4;6;6;6", "confidence": "4;4;4;3", "soundness": "2;3;3;3", "novelty": "2;3;2;3", "presentation": "3;3;3;3", "wc_summary": "123;85;129;89", "wc_strengths": "63;93;190;13", "wc_weaknesses": "318;108;356;127", "wc_questions": "223;96;234;4", "wc_limitations": "10;126;1;1", "wc_review": "737;508;910;234", "wc_reply_reviewers": "18;15;210;11", "wc_reply_authors": "71;0;187;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;2;1", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 106.5, 19.665960439297137 ], "wc_strengths_avg": [ 89.75, 64.54988381089466 ], "wc_weaknesses_avg": [ 227.25, 110.77313528107797 ], "wc_questions_avg": [ 139.25, 95.07201218024156 ], "wc_limitations_avg": [ 34.5, 52.95516971930125 ], "wc_review_avg": [ 597.25, 253.60340593138727 ], "wc_reply_reviewers_avg": [ 63.5, 84.61826044064011 ], "wc_reply_authors_avg": [ 64.5, 76.43461257833391 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9764791302028331343&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "ini.uzh.ch;ini.uzh.ch;ethz.ch;ini.uzh.ch;ini.uzh.ch", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "University of Zurich;ETH Zurich", "aff_unique_dep": "Institute of Neuroinformatics;", "aff_unique_url": "https://www.neuro.ethz.ch/;https://www.ethz.ch", "aff_unique_abbr": "UZH;ETHZ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Switzerland" }, { "title": "Personalized Steering of Large Language Models: Versatile Steering Vectors Through Bi-directional Preference Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96424", "id": "7qJFkuZdYo", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7qJFkuZdYo", "openreview": "https://openreview.net/forum?id=7qJFkuZdYo", "poster": "/media/PosterPDFs/NeurIPS%202024/96424.png?t=1731737049.3874245", "project": "", "author_site": "Yuanpu Cao, Tianrong Zhang, Bochuan Cao, Ziyi Yin, Lu Lin, Fenglong Ma, Jinghui Chen", "tldr": "", "abstract": "Researchers have been studying approaches to steer the behavior of Large Language Models (LLMs) and build personalized LLMs tailored for various applications. While fine-tuning seems to be a direct solution, it requires substantial computational resources and may significantly affect the utility of the original LLM. \nRecent endeavors have introduced more lightweight strategies, focusing on extracting ``steering vectors'' to guide the model's output toward desired behaviors by adjusting activations within specific layers of the LLM's transformer architecture. However, such steering vectors are directly extracted from the activations of human preference data and thus often lead to suboptimal results and occasional failures, especially in alignment-related scenarios.\nIn this work, we propose an innovative approach that could produce more effective steering vectors through bi-directional preference optimization. \nOur method is designed to allow steering vectors to directly influence the generation probability of contrastive human preference data pairs, thereby offering a more precise representation of the target behavior. By carefully adjusting the direction and magnitude of the steering vector, we enabled personalized control over the desired behavior across a spectrum of intensities.\nExtensive experimentation across various open-ended generation tasks, particularly focusing on steering AI personas, has validated the efficacy of our approach. \nMoreover, we comprehensively investigate critical alignment-concerning scenarios, such as managing truthfulness, mitigating hallucination, and addressing jailbreaking attacks alongside their respective defenses. Remarkably, our method can still demonstrate outstanding steering effectiveness across these scenarios. Furthermore, we showcase the transferability of our steering vectors across different models/LoRAs and highlight the synergistic benefits of applying multiple vectors simultaneously. These findings significantly broaden the practicality and versatility of our proposed method.", "keywords": "personalized steering of LLMs;steering vector;LLM alignment", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Yuanpu Cao;Tianrong Zhang;Bochuan Cao;Ziyi Yin;Lu Lin;Fenglong Ma;Jinghui Chen", "authorids": "~Yuanpu_Cao1;~Tianrong_Zhang1;~Bochuan_Cao1;~Ziyi_Yin1;~Lu_Lin2;~Fenglong_Ma1;~Jinghui_Chen1", "gender": "M;M;;M;F;M;M", "homepage": ";https://zhangtianrong.github.io/profile/#en;https://aaaaaasuka.github.io/;https://ericyinyzy.github.io/;https://louise-lulin.github.io;https://fenglong-ma.github.io/;https://jinghuichen.github.io/", "dblp": "243/0230;;334/3881;358/6428;86/2209-1;85/10856;67/5633", "google_scholar": "F5S2bO8AAAAJ;;eOZCg2IAAAAJ;wvbK37AAAAAJ;8N04pBgAAAAJ;DLJIxNMAAAAJ;mKia7Y4AAAAJ", "orcid": ";;;0009-0002-3502-3205;0000-0002-2539-3352;0000-0002-4999-0303;", "linkedin": "yuanpu-cao-a392751b2/;;;%E6%A2%93%E8%AF%91-%E6%AE%B7-ab816a249/?locale=en_US&trk=eml-email_network_conversations_01-header-0-profile_glimmer;lulin92/;fenglong-ma-69805832/;", "or_profile": "~Yuanpu_Cao1;~Tianrong_Zhang1;~Bochuan_Cao1;~Ziyi_Yin1;~Lu_Lin2;~Fenglong_Ma1;~Jinghui_Chen1", "aff": "Pennsylvania State University;Pennsylvania State University;Pennsylvania State University;Pennsylvania State University;Pennsylvania State University;Pennsylvania State University;Pennsylvania State University", "aff_domain": "psu.edu;psu.edu;psu.edu;psu.edu;psu.edu;psu.edu;psu.edu", "position": "PhD student;PhD student;PhD student;PhD student;Assistant Professor;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\ncao2024personalized,\ntitle={Personalized Steering of Large Language Models: Versatile Steering Vectors Through Bi-directional Preference Optimization},\nauthor={Yuanpu Cao and Tianrong Zhang and Bochuan Cao and Ziyi Yin and Lu Lin and Fenglong Ma and Jinghui Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7qJFkuZdYo}\n}", "github": "", "reviewers": "EQVZ;XMbM;sXZi", "pdf_size": 1059998, "rating": "5;6;8", "confidence": "4;4;4", "soundness": "2;2;3", "novelty": "2;3;3", "presentation": "3;2;3", "wc_summary": "42;130;128", "wc_strengths": "57;73;91", "wc_weaknesses": "79;104;101", "wc_questions": "46;53;91", "wc_limitations": "26;26;9", "wc_review": "250;386;420", "wc_reply_reviewers": "44;18;0", "wc_reply_authors": "23;28;0", "reply_reviewers": "1;1;0", "reply_authors": "2;2;1", "rating_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 100.0, 41.02032016777376 ], "wc_strengths_avg": [ 73.66666666666667, 13.888444437333106 ], "wc_weaknesses_avg": [ 94.66666666666667, 11.14550233153366 ], "wc_questions_avg": [ 63.333333333333336, 19.770910168449223 ], "wc_limitations_avg": [ 20.333333333333332, 8.013876853447538 ], "wc_review_avg": [ 352.0, 73.44839458195575 ], "wc_reply_reviewers_avg": [ 20.666666666666668, 18.06162291219209 ], "wc_reply_authors_avg": [ 17.0, 12.192894105447921 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15074889482056731233&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "psu.edu;psu.edu;psu.edu;psu.edu;psu.edu;psu.edu;psu.edu", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "Pennsylvania State University", "aff_unique_dep": "", "aff_unique_url": "https://www.psu.edu", "aff_unique_abbr": "PSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Performative Control for Linear Dynamical Systems", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96423", "id": "7qT72IGkr4", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7qT72IGkr4", "openreview": "https://openreview.net/forum?id=7qT72IGkr4", "poster": "/media/PosterPDFs/NeurIPS%202024/96423.png?t=1731677016.0646513", "project": "", "author_site": "Songfu Cai, Fei Han, Xuanyu Cao", "tldr": "", "abstract": "We introduce the framework of performative control, where the policy chosen by the controller affects the underlying dynamics of the control system. This results in a sequence of policy-dependent system state data with policy-dependent temporal correlations. Following the recent literature on performative prediction \\cite{perdomo2020performative}, we introduce the concept of a performatively stable control (PSC) solution. We first propose a sufficient condition for the performative control problem to admit a unique PSC solution with a problem-specific structure of distributional sensitivity propagation and aggregation. We further analyze the impacts of system stability on the existence of the PSC solution. Specifically, for {almost surely strongly stable} policy-dependent dynamics, the PSC solution exists if the sum of the distributional sensitivities is small enough. However, for almost surely unstable policy-dependent dynamics, the existence of the PSC solution will necessitate a temporally backward decaying of the distributional sensitivities. We finally provide a repeated stochastic gradient descent scheme that converges to the PSC solution and analyze its non-asymptotic convergence rate. Numerical results validate our theoretical analysis.", "keywords": "Performative prediction; Policy-dependent Dynamics; Disturbance-action control policy; Stochastic control", "primary_area": "optimization", "supplementary_material": "", "author": "Songfu Cai;Fei Han;Xuanyu Cao", "authorids": "~Songfu_Cai1;~Fei_Han3;~Xuanyu_Cao1", "gender": ";F;M", "homepage": ";;https://labs.wsu.edu/xuanyu/", "dblp": ";;117/3366", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;IJ08PvUAAAAJ;jvrZYmAAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Songfu_Cai1;~Fei_Han3;~Xuanyu_Cao1", "aff": "Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;Hong Kong University of Science and Technology", "aff_domain": "ust.hk;connect.ust.hk;ust.hk", "position": "Postdoc;PhD student;Assistant Professor", "bibtex": "@inproceedings{\ncai2024performative,\ntitle={Performative Control for Linear Dynamical Systems},\nauthor={Songfu Cai and Fei Han and Xuanyu Cao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7qT72IGkr4}\n}", "github": "", "reviewers": "e5pZ;wfwc;JGbs;nkz4", "pdf_size": 3466855, "rating": "3;4;7;8", "confidence": "4;2;4;4", "soundness": "2;3;3;3", "novelty": "1;2;2;4", "presentation": "2;3;3;2", "wc_summary": "45;161;67;105", "wc_strengths": "22;27;59;95", "wc_weaknesses": "59;185;34;216", "wc_questions": "109;22;212;225", "wc_limitations": "25;10;75;9", "wc_review": "260;405;447;650", "wc_reply_reviewers": "73;180;72;480", "wc_reply_authors": "539;1014;36;1414", "reply_reviewers": "1;1;1;3", "reply_authors": "4;5;2;7", "rating_avg": [ 5.5, 2.0615528128088303 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 1.0897247358851685 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 94.5, 43.98579316097415 ], "wc_strengths_avg": [ 50.75, 29.22648627529488 ], "wc_weaknesses_avg": [ 123.5, 78.27675261531996 ], "wc_questions_avg": [ 142.0, 82.58026398601545 ], "wc_limitations_avg": [ 29.75, 26.88284769141841 ], "wc_review_avg": [ 440.5, 139.4390547873873 ], "wc_reply_reviewers_avg": [ 201.25, 166.81333130178774 ], "wc_reply_authors_avg": [ 750.75, 515.9715956329379 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 4.5, 1.8027756377319946 ], "replies_avg": [ 32, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.42008402520840293, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6004380649070042264&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "ust.hk;connect.ust.hk;ust.hk", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Hong Kong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.ust.hk", "aff_unique_abbr": "HKUST", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Revisiting Ensembling in One-Shot Federated Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96422", "id": "7rWTS2wuYX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7rWTS2wuYX", "openreview": "https://openreview.net/forum?id=7rWTS2wuYX", "poster": "/media/PosterPDFs/NeurIPS%202024/96422.png?t=1731442303.11776", "project": "", "author_site": "Youssef Allouah, Akash Dhasade, Rachid Guerraoui, Nirupam Gupta, Anne-marie Kermarrec, Rafael Pinot, Rafael Pires, Rishi Sharma", "tldr": "", "abstract": "Federated Learning (FL) is an appealing approach to training machine learning models without sharing raw data. However, standard FL algorithms are iterative and thus induce a significant communication cost. One-Shot FL (OFL) trades the iterative exchange of models between clients and the server with a single round of communication, thereby saving substantially on communication costs. Not surprisingly, OFL exhibits a performance gap in terms of accuracy with respect to FL, especially under high data heterogeneity. We introduce Fens, a novel federated ensembling scheme that approaches the accuracy of FL with the communication efficiency of OFL. Learning in Fens proceeds in two phases: first, clients train models locally and send them to the server, similar to OFL; second, clients collaboratively train a lightweight prediction aggregator model using FL. We showcase the effectiveness of Fens through exhaustive experiments spanning several datasets and heterogeneity levels. In the particular case of heterogeneously distributed CIFAR-10 dataset, Fens achieves up to a $26.9$% higher accuracy over SOTA OFL, being only $3.1$% lower than FL. At the same time, Fens incurs at most $4.3\\times$ more communication than OFL, whereas FL is at least $10.9\\times$ more communication-intensive than Fens.", "keywords": "One-shot Federated Learning;Communication Efficiency;Federated Ensembles", "primary_area": "other", "supplementary_material": "", "author": "Youssef Allouah;Akash Dhasade;Rachid Guerraoui;Nirupam Gupta;Anne-Marie Kermarrec;Rafael Pinot;Rafael Pires;Rishi Sharma", "authorids": "~Youssef_Allouah1;~Akash_Dhasade1;~Rachid_Guerraoui1;~Nirupam_Gupta1;~Anne-Marie_Kermarrec3;~Rafael_Pinot1;~Rafael_Pires1;~Rishi_Sharma2", "gender": "M;;M;;;;M;M", "homepage": "https://youssefallouah.com/;;https://lpdwww.epfl.ch/rachid/;;;;https://pires.tech/;https://rishisharma.netlify.app/", "dblp": "312/3936;;g/RachidGuerraoui;;;;189/6914;158/4544-1", "google_scholar": "kVZu88cAAAAJ;;;;;;https://scholar.google.ch/citations?user=EegvylkAAAAJ;jUfDXOsAAAAJ", "orcid": "0000-0003-1048-7548;;;;;;0000-0002-7826-1599;0000-0002-1928-1549", "linkedin": ";;;;;;rafaelppires/?locale=en_US;rishi-s8/", "or_profile": "~Youssef_Allouah1;~Akash_Dhasade1;~Rachid_Guerraoui1;~Nirupam_Gupta1;~Anne-Marie_Kermarrec3;~Rafael_Pinot1;~Rafael_Pires1;~Rishi_Sharma2", "aff": "Stanford University;;;;;;EPFL - EPF Lausanne;Massachusetts Institute of Technology", "aff_domain": "stanford.edu;;;;;;epfl.ch;mit.edu", "position": "Visiting student researcher;;;;;;Postdoc;Intern", "bibtex": "@inproceedings{\nallouah2024revisiting,\ntitle={Revisiting Ensembling in One-Shot Federated Learning},\nauthor={Youssef Allouah and Akash Dhasade and Rachid Guerraoui and Nirupam Gupta and Anne-Marie Kermarrec and Rafael Pinot and Rafael Pires and Rishi Sharma},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7rWTS2wuYX}\n}", "github": "", "reviewers": "3Dng;y6N3;B5VB;zQJt", "pdf_size": 631253, "rating": "3;5;6;7", "confidence": "4;4;5;4", "soundness": "3;3;4;3", "novelty": "2;2;2;3", "presentation": "2;3;4;3", "wc_summary": "55;103;49;111", "wc_strengths": "51;114;28;251", "wc_weaknesses": "255;273;33;671", "wc_questions": "44;57;33;69", "wc_limitations": "6;17;6;4", "wc_review": "411;564;149;1106", "wc_reply_reviewers": "127;41;0;0", "wc_reply_authors": "353;29;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "2;2;1;1", "rating_avg": [ 5.25, 1.479019945774904 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 79.5, 27.726341266023542 ], "wc_strengths_avg": [ 111.0, 86.7438758645243 ], "wc_weaknesses_avg": [ 308.0, 229.9065027353511 ], "wc_questions_avg": [ 50.75, 13.534677683639163 ], "wc_limitations_avg": [ 8.25, 5.11737237261468 ], "wc_review_avg": [ 557.5, 349.72453445533387 ], "wc_reply_reviewers_avg": [ 42.0, 51.85074734273364 ], "wc_reply_authors_avg": [ 95.5, 149.13835858021235 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.29277002188455997, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13533255327932602125&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 7, "email": "stanford.edu;;;;;;epfl.ch;mit.edu", "author_num": 8, "aff_unique_index": "0;1;2", "aff_unique_norm": "Stanford University;EPFL;Massachusetts Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.stanford.edu;https://www.epfl.ch;https://web.mit.edu", "aff_unique_abbr": "Stanford;EPFL;MIT", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Stanford;Lausanne;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;Switzerland" }, { "title": "AlphaTablets: A Generic Plane Representation for 3D Planar Reconstruction from Monocular Videos", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96421", "id": "7rrJQ9iWoX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7rrJQ9iWoX", "openreview": "https://openreview.net/forum?id=7rrJQ9iWoX", "poster": "/media/PosterPDFs/NeurIPS%202024/96421.png?t=1731594975.380808", "project": "", "author_site": "Yuze He, Wang Zhao, Shaohui Liu, Yubin Hu, Yushi Bai, Yu-Hui Wen, Yong-jin Liu", "tldr": "", "abstract": "We introduce AlphaTablets, a novel and generic representation of 3D planes that features continuous 3D surface and precise boundary delineation. By representing 3D planes as rectangles with alpha channels, AlphaTablets combine the advantages of current 2D and 3D plane representations, enabling accurate, consistent and flexible modeling of 3D planes. We derive differentiable rasterization on top of AlphaTablets to efficiently render 3D planes into images, and propose a novel bottom-up pipeline for 3D planar reconstruction from monocular videos. Starting with 2D superpixels and geometric cues from pre-trained models, we initialize 3D planes as AlphaTablets and optimize them via differentiable rendering. An effective merging scheme is introduced to facilitate the growth and refinement of AlphaTablets. Through iterative optimization and merging, we reconstruct complete and accurate 3D planes with solid surfaces and clear boundaries. Extensive experiments on the ScanNet dataset demonstrate state-of-the-art performance in 3D planar reconstruction, underscoring the great potential of AlphaTablets as a generic 3D plane representation for various applications.", "keywords": "3D Planar Reconstruction;3D Plane Representation", "primary_area": "machine_vision", "supplementary_material": "/attachment/8bbf3c3e6f0b5be5dc15047b17737aa1ee2e4362.zip", "author": "Yuze He;Wang Zhao;Shaohui Liu;Yubin Hu;Yushi Bai;Yu-Hui Wen;Yong-jin Liu", "authorids": "~Yuze_He1;~Wang_Zhao1;~Shaohui_Liu1;~Yubin_Hu1;~Yushi_Bai1;~Yu-Hui_Wen2;~Yong-jin_Liu1", "gender": "M;M;M;M;M;;M", "homepage": ";https://thuzhaowang.github.io;http://b1ueber2y.me;;https://bys0318.github.io/;;https://cg.cs.tsinghua.edu.cn/people/~Yongjin/Yongjin.htm", "dblp": ";;;266/8226-1;302/4421;;27/2098", "google_scholar": "bYeKwD8AAAAJ;oKqr-ZQAAAAJ;I5svuawAAAAJ;swN2J1QAAAAJ;https://scholar.google.com/citations?hl=zh-CN;;https://scholar.google.com.tw/citations?user=GNDtwWQAAAAJ", "orcid": ";0000-0001-8925-8574;;0000-0001-6107-2858;;;0000-0001-5774-1916", "linkedin": ";;;;;;", "or_profile": "~Yuze_He1;~Wang_Zhao1;~Shaohui_Liu1;~Yubin_Hu1;~Yushi_Bai1;~Yu-Hui_Wen2;~Yong-jin_Liu1", "aff": "Tsinghua University;Tsinghua University;Google;Tsinghua University;Tsinghua University;;Tsinghua University", "aff_domain": "tsinghua.edu.cn;mails.tsinghua.edu.cn;google.com;tsinghua.edu.cn;tsinghua.edu.cn;;tsinghua.edu.cn", "position": "PhD student;PhD student;Intern;PhD student;PhD student;;Full Professor", "bibtex": "@inproceedings{\nhe2024alphatablets,\ntitle={AlphaTablets: A Generic Plane Representation for 3D Planar Reconstruction from Monocular Videos},\nauthor={Yuze He and Wang Zhao and Shaohui Liu and Yubin Hu and Yushi Bai and Yu-Hui Wen and Yong-jin Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7rrJQ9iWoX}\n}", "github": "", "reviewers": "ELQv;Ku7J;VGFx", "pdf_size": 36375457, "rating": "5;6;7", "confidence": "3;5;5", "soundness": "2;3;3", "novelty": "3;3;3", "presentation": "3;2;3", "wc_summary": "183;93;80", "wc_strengths": "69;116;169", "wc_weaknesses": "226;271;134", "wc_questions": "159;2;7", "wc_limitations": "24;6;9", "wc_review": "661;488;399", "wc_reply_reviewers": "0;135;10", "wc_reply_authors": "0;424;0", "reply_reviewers": "0;3;1", "reply_authors": "1;2;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 4.333333333333333, 0.9428090415820634 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 118.66666666666667, 45.79907810814051 ], "wc_strengths_avg": [ 118.0, 40.849316599750686 ], "wc_weaknesses_avg": [ 210.33333333333334, 57.01656679324781 ], "wc_questions_avg": [ 56.0, 72.86059749045891 ], "wc_limitations_avg": [ 13.0, 7.874007874011811 ], "wc_review_avg": [ 516.0, 108.77806151364652 ], "wc_reply_reviewers_avg": [ 48.333333333333336, 61.41841924229426 ], "wc_reply_authors_avg": [ 141.33333333333334, 199.87551681539742 ], "reply_reviewers_avg": [ 1.3333333333333333, 1.247219128924647 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:BGjZf3b9IZgJ:scholar.google.com/&scioq=AlphaTablets:+A+Generic+Plane+Representation+for+3D+Planar+Reconstruction+from+Monocular+Videos&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "tsinghua.edu.cn;mails.tsinghua.edu.cn;google.com;tsinghua.edu.cn;tsinghua.edu.cn;;tsinghua.edu.cn", "author_num": 7, "aff_unique_index": "0;0;1;0;0;0", "aff_unique_norm": "Tsinghua University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.google.com", "aff_unique_abbr": "THU;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "China;United States" }, { "title": "LAM3D: Large Image-Point Clouds Alignment Model for 3D Reconstruction from Single Image", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96420", "id": "7s53dAJlwz", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7s53dAJlwz", "openreview": "https://openreview.net/forum?id=7s53dAJlwz", "poster": "/media/PosterPDFs/NeurIPS%202024/96420.png?t=1731204609.737736", "project": "", "author_site": "Ruikai Cui, Xibin Song, Weixuan Sun, Senbo Wang, Weizhe Liu, Shenzhou Chen, Taizhang Shang, YANG LI, Nick Barnes, Hongdong Li, Pan Ji", "tldr": "", "abstract": "Large Reconstruction Models have made significant strides in the realm of automated 3D content generation from single or multiple input images. Despite their success, these models often produce 3D meshes with geometric inaccuracies, stemming from the inherent challenges of deducing 3D shapes solely from image data. In this work, we introduce a novel framework, the Large Image and Point Cloud Alignment Model (LAM3D), which utilizes 3D point cloud data to enhance the fidelity of generated 3D meshes. Our methodology begins with the development of a point-cloud-based network that effectively generates precise and meaningful latent tri-planes, laying the groundwork for accurate 3D mesh reconstruction. Building upon this, our Image-Point-Cloud Feature Alignment technique processes a single input image, aligning to the latent tri-planes to imbue image features with robust 3D information. This process not only enriches the image features but also facilitates the production of high-fidelity 3D meshes without the need for multi-view input, significantly reducing geometric distortions. Our approach achieves state-of-the-art high-fidelity 3D mesh reconstruction from a single image in just 6 seconds, and experiments on various datasets demonstrate its effectiveness.", "keywords": "3D Representation;3D Reconstruction;Feature Alignment", "primary_area": "generative_models", "supplementary_material": "", "author": "Ruikai Cui;Xibin Song;Weixuan Sun;Senbo Wang;Weizhe Liu;Shenzhou Chen;Taizhang Shang;YANG LI;Nick Barnes;Hongdong Li;Pan Ji", "authorids": "~Ruikai_Cui1;~Xibin_Song2;~Weixuan_Sun1;~Senbo_Wang1;~Weizhe_Liu2;~Shenzhou_Chen1;~Taizhang_Shang1;~YANG_LI49;~Nick_Barnes3;~Hongdong_Li1;~Pan_Ji3", "gender": "M;M;M;M;M;M;M;M;M;M;M", "homepage": "https://www.ruikai.ink;https://xbsong.github.io;https://weixuansun.github.io/weixuansun-github.io/;;https://weizheliu.github.io;;https://yang-l1.github.io/;http://users.cecs.anu.edu.au/~nmb/;http://users.cecs.anu.edu.au/~hongdong/;https://sites.google.com/view/panji530;https://github.com/AbnerCSZ", "dblp": "279/0571;;186/6724;;25/7664;264/5772.html;37/4190-193;41/2904;59/4859.html;133/6525;268/8089", "google_scholar": ";2gudyEQAAAAJ;vIS56AoAAAAJ;J6ylgW0AAAAJ;https://scholar.google.com/citations?hl=en;rv9ymNYAAAAJ;ECzmAC8AAAAJ;https://scholar.google.com.au/citations?user=yMXs1WcAAAAJ;https://scholar.google.com.tw/citations?hl=en;https://scholar.google.com/citations?hl=en;uCOzg9EAAAAJ", "orcid": ";;;0000-0001-6007-7593;;0000-0002-9402-6919;;;;;", "linkedin": ";;;;weizhe-liu-68873589/;;;;;;", "or_profile": "~Ruikai_Cui1;~Xibin_Song2;~Weixuan_Sun1;~Senbo_Wang1;~Weizhe_Liu2;~Taizhang_Shang1;~YANG_LI49;~Nick_Barnes3;~Hongdong_Li1;~Pan_Ji2;~Shenzhou1", "aff": "Australian National University;Tencent XR Vision Lab;Tencent;Tencent ;Tencent;Tencent;Tencent XR Vision Lab;Australian National University;Australian National University;Tencent XR Vision Labs;Tencent XR Vision Labs", "aff_domain": "anu.edu.au;tencent.com;tencent.com;tencent.com;tencent.com;tencent.com;tencent.com;anu.edu.au;anu.edu.au;tencent.com;tencent.com", "position": "PhD student;Researcher;Researcher;Researcher;Researcher;Researcher;Researcher;Full Professor;Full Professor;Researcher;Researcher", "bibtex": "@inproceedings{\ncui2024lamd,\ntitle={{LAM}3D: Large Image-Point Clouds Alignment Model for 3D Reconstruction from Single Image},\nauthor={Ruikai Cui and Xibin Song and Weixuan Sun and Senbo Wang and Weizhe Liu and Shenzhou Chen and Taizhang Shang and YANG LI and Nick Barnes and Hongdong Li and Pan Ji},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7s53dAJlwz}\n}", "github": "", "reviewers": "XUip;tycK;1gWL;vD6Z", "pdf_size": 7742883, "rating": "4;4;5;7", "confidence": "5;2;4;5", "soundness": "3;3;4;4", "novelty": "2;3;3;3", "presentation": "3;3;4;4", "wc_summary": "88;90;94;145", "wc_strengths": "44;48;89;116", "wc_weaknesses": "214;197;86;175", "wc_questions": "39;102;164;31", "wc_limitations": "52;62;1;18", "wc_review": "437;499;434;485", "wc_reply_reviewers": "0;0;48;51", "wc_reply_authors": "0;0;85;31", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;2;2", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 4.0, 1.224744871391589 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 104.25, 23.62599204266352 ], "wc_strengths_avg": [ 74.25, 29.852763691155968 ], "wc_weaknesses_avg": [ 168.0, 49.320381182630776 ], "wc_questions_avg": [ 84.0, 53.754069613379045 ], "wc_limitations_avg": [ 33.25, 24.752525123712125 ], "wc_review_avg": [ 463.75, 28.699956445959984 ], "wc_reply_reviewers_avg": [ 24.75, 24.772716847370617 ], "wc_reply_authors_avg": [ 29.0, 34.72031105851444 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.49999999999999994, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15627479616942552047&as_sdt=5,39&sciodt=0,39&hl=en", "gs_version_total": 6, "email": "anu.edu.au;tencent.com;tencent.com;tencent.com;tencent.com;tencent.com;tencent.com;anu.edu.au;anu.edu.au;tencent.com;tencent.com", "author_num": 11, "aff_unique_index": "0;1;1;1;1;1;1;0;0;1;1", "aff_unique_norm": "Australian National University;Tencent", "aff_unique_dep": ";XR Vision Lab", "aff_unique_url": "https://www.anu.edu.au;https://www.tencent.com", "aff_unique_abbr": "ANU;Tencent", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1;1;1;0;0;1;1", "aff_country_unique": "Australia;China" }, { "title": "The Power of Resets in Online Reinforcement Learning", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96419", "id": "7sACcaOmGi", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7sACcaOmGi", "openreview": "https://openreview.net/forum?id=7sACcaOmGi", "poster": "", "project": "", "author_site": "Zak Mhammedi, Dylan J Foster, Alexander Rakhlin", "tldr": "", "abstract": "Simulators are a pervasive tool in reinforcement learning, but most existing algorithms cannot efficiently exploit simulator access -- particularly in high-dimensional domains that require general function approximation. We explore the power of simulators through online reinforcement learning with local simulator access (or, local planning), an RL protocol where the agent is allowed to reset to previously observed states and follow their dynamics during training. We use local simulator access to unlock new statistical guarantees that were previously out of reach:\n- We show that MDPs with low coverability (Xie et al. 2023) -- a general structural condition that subsumes Block MDPs and Low-Rank MDPs -- can be learned in a sample-efficient fashion with only Q\u22c6-realizability (realizability of the optimal state-value function); existing online RL algorithms require significantly stronger representation conditions.\n- As a consequence, we show that the notorious Exogenous Block MDP problem (Efroni et al. 2022) is tractable under local simulator access.\nThe results above are achieved through a computationally inefficient algorithm. We complement them with a more computationally efficient algorithm, RVFS (Recursive Value Function Search), which achieves provable sample complexity guarantees under a strengthened statistical assumption known as pushforward coverability. RVFS can be viewed as a principled, provable counterpart to a successful empirical paradigm that combines recursive search (e.g., MCTS) with value function approximation.", "keywords": "Reinforcement learning;learning theory;generative model;simulator;coverability", "primary_area": "learning_theory", "supplementary_material": "", "author": "Zakaria Mhammedi;Dylan J Foster;Alexander Rakhlin", "authorids": "~Zakaria_Mhammedi1;~Dylan_J_Foster1;~Alexander_Rakhlin1", "gender": "M;;M", "homepage": ";http://dylanfoster.net;http://www.mit.edu/~rakhlin/", "dblp": "192/1360;167/4271;59/407", "google_scholar": ";RqwU8xsAAAAJ;https://scholar.google.com.tw/citations?user=fds2VpgAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Zakaria_Mhammedi1;~Dylan_J_Foster1;~Alexander_Rakhlin1", "aff": "Research, Google;Microsoft Research;Massachusetts Institute of Technology", "aff_domain": "research.google.com;microsoft.com;mit.edu", "position": "Researcher;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nmhammedi2024the,\ntitle={The Power of Resets in Online Reinforcement Learning},\nauthor={Zakaria Mhammedi and Dylan J Foster and Alexander Rakhlin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7sACcaOmGi}\n}", "github": "", "reviewers": "DqGS;XHMe;bJ3L;1avF", "pdf_size": 880629, "rating": "4;6;7;7", "confidence": "1;2;3;3", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;2;3;2", "wc_summary": "19;84;65;132", "wc_strengths": "6;78;36;41", "wc_weaknesses": "13;212;78;308", "wc_questions": "15;384;66;174", "wc_limitations": "8;41;1;15", "wc_review": "61;799;246;670", "wc_reply_reviewers": "0;111;8;170", "wc_reply_authors": "0;0;0;201", "reply_reviewers": "0;1;1;2", "reply_authors": "0;1;1;2", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 2.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 75.0, 40.51542915976579 ], "wc_strengths_avg": [ 40.25, 25.577089357469898 ], "wc_weaknesses_avg": [ 152.75, 114.81588522499837 ], "wc_questions_avg": [ 159.75, 141.626930701756 ], "wc_limitations_avg": [ 16.25, 15.122417134836613 ], "wc_review_avg": [ 444.0, 301.2449169695648 ], "wc_reply_reviewers_avg": [ 72.25, 71.4225979645098 ], "wc_reply_authors_avg": [ 50.25, 87.03555308033609 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.0, 0.7071067811865476 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.9847319278346618, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2345732942335526383&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 3, "email": "research.google.com;microsoft.com;mit.edu", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Google;Microsoft;Massachusetts Institute of Technology", "aff_unique_dep": "Google Research;Microsoft Research;", "aff_unique_url": "https://research.google;https://www.microsoft.com/en-us/research;https://web.mit.edu", "aff_unique_abbr": "Google;MSR;MIT", "aff_campus_unique_index": "0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "QTIP: Quantization with Trellises and Incoherence Processing", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96418", "id": "7sdkLVuYCU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7sdkLVuYCU", "openreview": "https://openreview.net/forum?id=7sdkLVuYCU", "poster": "", "project": "", "author_site": "Albert Tseng, Qingyao Sun, David Hou, Christopher De Sa", "tldr": "", "abstract": "Post-training quantization (PTQ) reduces the memory footprint of LLMs by quantizing weights to low-precision datatypes.\nSince LLM inference is usually memory-bound, PTQ methods can improve inference throughput.\nRecent state-of-the-art PTQ approaches use vector quantization (VQ) to quantize multiple weights at once, which improves information utilization through better shaping.\nHowever, VQ requires a codebook with size exponential in the dimension.\nThis limits current VQ-based PTQ works to low VQ dimensions ($\\le 8$) that in turn limit quantization quality.\nHere, we introduce QTIP, which instead uses trellis coded quantization (TCQ) to achieve ultra-high-dimensional quantization. \nTCQ uses a stateful decoder that separates the codebook size from the bitrate and effective dimension. \nQTIP introduces a spectrum of lookup-only to computed lookup-free trellis codes designed for a hardware-efficient \"bitshift\" trellis structure; these codes achieve state-of-the-art results in both quantization quality and inference speed.", "keywords": "quantization;llms;trellises;fast inference;post training quantization;trellis coded quantization;model compression;computed codes", "primary_area": "other", "supplementary_material": "", "author": "Albert Tseng;Qingyao Sun;David Hou;Christopher De Sa", "authorids": "~Albert_Tseng1;~Qingyao_Sun1;~David_Hou2;~Christopher_De_Sa2", "gender": ";;;M", "homepage": "https://tsengalb99.github.io/;https://nalzok.github.io/;https://github.com/chaosagent;http://cs.cornell.edu/~cdesa", "dblp": "249/9439;271/4259;;154/6336", "google_scholar": ";;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Albert_Tseng1;~Qingyao_Sun1;~David_Hou2;~Christopher_De_Sa1", "aff": "Cornell University;Cornell University;California Institute of Technology;Cornell University", "aff_domain": "cs.cornell.edu;cornell.edu;caltech.edu;cornell.edu", "position": "PhD student;PhD student;Open Source Contributor;Assistant Professor", "bibtex": "@inproceedings{\ntseng2024qtip,\ntitle={{QTIP}: Quantization with Trellises and Incoherence Processing},\nauthor={Albert Tseng and Qingyao Sun and David Hou and Christopher De Sa},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7sdkLVuYCU}\n}", "github": "", "reviewers": "F569;ZTfk;3PHK;1CRX", "pdf_size": 1289045, "rating": "5;6;6;7", "confidence": "4;4;3;4", "soundness": "3;4;3;4", "novelty": "3;3;3;4", "presentation": "3;3;3;4", "wc_summary": "150;165;49;47", "wc_strengths": "152;107;57;57", "wc_weaknesses": "249;327;173;124", "wc_questions": "210;350;248;6", "wc_limitations": "10;4;1;13", "wc_review": "771;953;528;247", "wc_reply_reviewers": "147;1241;122;13", "wc_reply_authors": "934;1611;17;709", "reply_reviewers": "1;3;1;1", "reply_authors": "3;5;2;4", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 102.75, 55.0107943952821 ], "wc_strengths_avg": [ 93.25, 39.58771905528279 ], "wc_weaknesses_avg": [ 218.25, 76.97848725455704 ], "wc_questions_avg": [ 203.5, 124.99099967597667 ], "wc_limitations_avg": [ 7.0, 4.743416490252569 ], "wc_review_avg": [ 624.75, 265.13805366261556 ], "wc_reply_reviewers_avg": [ 380.75, 499.21457060065865 ], "wc_reply_authors_avg": [ 817.75, 569.163146646021 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 3.5, 1.118033988749895 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2152171083385389375&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "cs.cornell.edu;cornell.edu;caltech.edu;cornell.edu", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Cornell University;California Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.cornell.edu;https://www.caltech.edu", "aff_unique_abbr": "Cornell;Caltech", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pasadena", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "ContactField: Implicit Field Representation for Multi-Person Interaction Geometry", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96417", "id": "7su2GfqvmN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7su2GfqvmN", "openreview": "https://openreview.net/forum?id=7su2GfqvmN", "poster": "/media/PosterPDFs/NeurIPS%202024/96417.png?t=1732260082.3500805", "project": "", "author_site": "Hansol Lee, Tackgeun You, Hansoo Park, Woohyeon Shim, Sanghyeon Kim, Hwasup Lim", "tldr": "", "abstract": "We introduce a novel implicit field representation tailored for multi-person interaction geometry in 3D spaces, capable of simultaneously reconstructing occupancy, instance identification (ID) tags, and contact fields. Volumetric representation of interacting human bodies presents significant challenges, including inaccurately captured geometries, varying degrees of occlusion, and data scarcity. Existing multi-view methods, which either reconstruct each subject in isolation or merge nearby 3D surfaces into a single unified mesh, often fail to capture the intricate geometry between interacting bodies and exploit on datasets with many views and a small group of people for training. Our approach utilizes an implicit representation for interaction geometry contextualized by a multi-view local-global feature module. This module adeptly aggregates both local and global information from individual views and interacting groups, enabling precise modeling of close physical interactions through dense point retrieval in small areas, supported by the implicit fields. Furthermore, we develop a synthetic dataset encompassing diverse multi-person interaction scenarios to enhance the robustness of our geometry estimation. The experimental results demonstrate the superiority of our method to accurately reconstruct human geometries and ID tags within three-dimensional spaces, outperforming conventional multi-view techniques. Notably, our method facilitates unsupervised estimation of contact points without the need for specific training data on contact supervision.", "keywords": "3D recontruction;multi-view;3D human estimation", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/5800bd29cc266a559eaf44f5c2b364fb23da8e16.zip", "author": "Hansol Lee;Tackgeun You;Hansoo Park;Woohyeon Shim;Sanghyeon Kim;Hwasup Lim", "authorids": "~Hansol_Lee1;~Tackgeun_You1;~Hansoo_Park1;~Woohyeon_Shim2;~Sanghyeon_Kim2;~Hwasup_Lim1", "gender": "F;M;M;M;M;F", "homepage": "https://2hansol.github.io/;http://tackgeun.github.io/;;;https://www.hwasup.net/;https://sim0609.tistory.com/", "dblp": ";159/2150;;;99/510;", "google_scholar": ";VDOtnIEAAAAJ;;;jA1KmF0AAAAJ;", "orcid": ";;;;0000-0003-2957-668X;", "linkedin": ";;hansoopark/;%EC%83%81%ED%98%84-%EA%B9%80-243b15159/;;", "or_profile": "~Hansol_Lee1;~Tackgeun_You1;~Hansoo_Park1;~Sanghyeon_Kim2;~Hwasup_Lim1;~Shimwoohyeon1", "aff": "Ulsan National Institute of Science and Technology;POSTECH;;Sogang University;Korea Institute of Science and Technology;Sungshin Women's University", "aff_domain": "unist.ac.kr;postech.edu;;sogang.ac.kr;kist.re.kr;sungshin.ac.kr", "position": "MS student;PhD student;;Undergrad student;Principal Researcher;Undergrad student", "bibtex": "@inproceedings{\nlee2024contactfield,\ntitle={ContactField: Implicit Field Representation for Multi-Person Interaction Geometry},\nauthor={Hansol Lee and Tackgeun You and Hansoo Park and Woohyeon Shim and Sanghyeon Kim and Hwasup Lim},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7su2GfqvmN}\n}", "github": "", "reviewers": "Vhig;N9qi;A26V;Ar19", "pdf_size": 16025538, "rating": "4;5;6;7", "confidence": "4;3;4;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "71;52;47;267", "wc_strengths": "20;30;62;84", "wc_weaknesses": "129;39;152;629", "wc_questions": "50;48;36;9", "wc_limitations": "10;8;13;9", "wc_review": "280;177;310;998", "wc_reply_reviewers": "56;9;9;45", "wc_reply_authors": "832;47;50;342", "reply_reviewers": "2;1;1;2", "reply_authors": "4;2;2;3", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 109.25, 91.51605050481582 ], "wc_strengths_avg": [ 49.0, 25.475478405713993 ], "wc_weaknesses_avg": [ 237.25, 230.08517444633412 ], "wc_questions_avg": [ 35.75, 16.345871038277526 ], "wc_limitations_avg": [ 10.0, 1.8708286933869707 ], "wc_review_avg": [ 441.25, 325.20253304671536 ], "wc_reply_reviewers_avg": [ 29.75, 21.111312133545844 ], "wc_reply_authors_avg": [ 317.75, 320.17056001450226 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.2581988897471611, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:07xCIWO1O_UJ:scholar.google.com/&scioq=ContactField:+Implicit+Field+Representation+for+Multi-Person+Interaction+Geometry&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "unist.ac.kr;postech.edu;;sogang.ac.kr;kist.re.kr;sungshin.ac.kr", "author_num": 6, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "Ulsan National Institute of Science and Technology;Pohang University of Science and Technology;Sogang University;Korea Institute of Science and Technology;Sungshin Women's University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.unist.ac.kr;https://www.postech.ac.kr;https://www.sogang.ac.kr;https://www.kist.re.kr;http://www.sungshin.ac.kr", "aff_unique_abbr": "UNIST;POSTECH;Sogang;KIST;SWU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pohang", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "FASTopic: Pretrained Transformer is a Fast, Adaptive, Stable, and Transferable Topic Model", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96416", "id": "7t6aq0Fa9D", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7t6aq0Fa9D", "openreview": "https://openreview.net/forum?id=7t6aq0Fa9D", "poster": "/media/PosterPDFs/NeurIPS%202024/96416.png?t=1729780464.1249776", "project": "", "author_site": "Xiaobao Wu, Thong Nguyen, Delvin Zhang, William Yang Wang, Anh Tuan Luu", "tldr": "", "abstract": "Topic models have been evolving rapidly over the years, from conventional to recent neural models. However, existing topic models generally struggle with either effectiveness, efficiency, or stability, highly impeding their practical applications. In this paper, we propose FASTopic, a fast, adaptive, stable, and transferable topic model. FASTopic follows a new paradigm: Dual Semantic-relation Reconstruction (DSR). Instead of previous conventional, VAE-based, or clustering-based methods, DSR directly models the semantic relations among document embeddings from a pretrained Transformer and learnable topic and word embeddings. By reconstructing through these semantic relations, DSR discovers latent topics. This brings about a neat and efficient topic modeling framework. We further propose a novel Embedding Transport Plan (ETP) method. Rather than early straightforward approaches, ETP explicitly regularizes the semantic relations as optimal transport plans. This addresses the relation bias issue and thus leads to effective topic modeling. Extensive experiments on benchmark datasets demonstrate that our FASTopic shows superior effectiveness, efficiency, adaptivity, stability, and transferability, compared to state-of-the-art baselines across various scenarios.", "keywords": "topic model;topic modeling;document embedding", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/e762f67beab73193cb5902c93f1fda3871763c4f.zip", "author": "Xiaobao Wu;Thong Thanh Nguyen;Delvin Ce Zhang;William Yang Wang;Anh Tuan Luu", "authorids": "~Xiaobao_Wu1;~Thong_Thanh_Nguyen1;~Delvin_Ce_Zhang1;~William_Yang_Wang2;~Anh_Tuan_Luu2", "gender": ";M;M;M;M", "homepage": "https://bobxwu.github.io/;https://nguyentthong.github.io/;https://tuanluu.github.io/;http://delvincezhang.com;https://www.cs.ucsb.edu/~william/", "dblp": "249/8429;29/5255.html;81/8329.html;97/919-4;08/9282", "google_scholar": "Y1oag4sAAAAJ;C2zb0lkAAAAJ;https://scholar.google.com.sg/citations?hl=en;0PcgNCsAAAAJ;gf8Ms_8AAAAJ", "orcid": ";;;0000-0001-5571-9766;", "linkedin": "xiaobao-wu/;;;;", "or_profile": "~Xiaobao_Wu1;~Thong_Thanh_Nguyen1;~Anh_Tuan_Luu2;~Ce_Zhang3;~William_Wang1", "aff": "Nanyang Technological University;National University of Singapore;Nanyang Technological University;Yale University;UC Santa Barbara", "aff_domain": "ntu.edu.sg;nus.edu;ntu.edu.sg;yale.edu;ucsb.edu", "position": "PhD student;PhD student;Assistant Professor;Postdoc;Full Professor", "bibtex": "@inproceedings{\nwu2024fastopic,\ntitle={{FAST}opic: Pretrained Transformer is a Fast, Adaptive, Stable, and Transferable Topic Model},\nauthor={Xiaobao Wu and Thong Thanh Nguyen and Delvin Ce Zhang and William Yang Wang and Anh Tuan Luu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7t6aq0Fa9D}\n}", "github": "", "reviewers": "FTng;Py7Z;YWcC;ceW2", "pdf_size": 1363867, "rating": "5;6;7;7", "confidence": "4;4;4;3", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "3;3;3;4", "wc_summary": "114;71;46;48", "wc_strengths": "84;34;24;59", "wc_weaknesses": "143;125;488;63", "wc_questions": "89;2;63;96", "wc_limitations": "28;1;2;5", "wc_review": "458;233;623;271", "wc_reply_reviewers": "0;0;41;0", "wc_reply_authors": "56;0;19;0", "reply_reviewers": "0;0;1;0", "reply_authors": "2;1;2;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 69.75, 27.371289702898547 ], "wc_strengths_avg": [ 50.25, 23.284920012746447 ], "wc_weaknesses_avg": [ 204.75, 166.20525713707133 ], "wc_questions_avg": [ 62.5, 37.030392922571046 ], "wc_limitations_avg": [ 9.0, 11.067971810589327 ], "wc_review_avg": [ 396.25, 156.17838358748628 ], "wc_reply_reviewers_avg": [ 10.25, 17.75352077758099 ], "wc_reply_authors_avg": [ 18.75, 22.862359895688808 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17787717198854157507&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "ntu.edu.sg;nus.edu;ntu.edu.sg;yale.edu;ucsb.edu", "author_num": 5, "aff_unique_index": "0;1;0;2;3", "aff_unique_norm": "Nanyang Technological University;National University of Singapore;Yale University;University of California, Santa Barbara", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.ntu.edu.sg;https://www.nus.edu.sg;https://www.yale.edu;https://www.ucsb.edu", "aff_unique_abbr": "NTU;NUS;Yale;UCSB", "aff_campus_unique_index": "1", "aff_campus_unique": ";Santa Barbara", "aff_country_unique_index": "0;0;0;1;1", "aff_country_unique": "Singapore;United States" }, { "title": "Flipping-based Policy for Chance-Constrained Markov Decision Processes", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96415", "id": "7t9eDEY2GT", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7t9eDEY2GT", "openreview": "https://openreview.net/forum?id=7t9eDEY2GT", "poster": "/media/PosterPDFs/NeurIPS%202024/96415.png?t=1731140852.1807058", "project": "", "author_site": "Xun Shen, Shuo Jiang, Akifumi Wachi, Kazumune Hashimoto, Sebastien Gros", "tldr": "", "abstract": "Safe reinforcement learning (RL) is a promising approach for many real-world decision-making problems where ensuring safety is a critical necessity. In safe RL research, while expected cumulative safety constraints (ECSCs) are typically the first choices, chance constraints are often more pragmatic for incorporating safety under uncertainties. This paper proposes a \\textit{flipping-based policy} for Chance-Constrained Markov Decision Processes (CCMDPs). The flipping-based policy selects the next action by tossing a potentially distorted coin between two action candidates. The probability of the flip and the two action candidates vary depending on the state. We establish a Bellman equation for CCMDPs and further prove the existence of a flipping-based policy within the optimal solution sets. Since solving the problem with joint chance constraints is challenging in practice, we then prove that joint chance constraints can be approximated into Expected Cumulative Safety Constraints (ECSCs) and that there exists a flipping-based policy in the optimal solution sets for constrained MDPs with ECSCs. As a specific instance of practical implementations, we present a framework for adapting constrained policy optimization to train a flipping-based policy. This framework can be applied to other safe RL algorithms. We demonstrate that the flipping-based policy can improve the performance of the existing safe RL algorithms under the same limits of safety constraints on Safety Gym benchmarks.", "keywords": "Reinforcement learning;Chance constraints;Stochastic policy", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/1c0d528bd9ce4a1553b55858a832933e4dc0af61.zip", "author": "Xun Shen;Shuo Jiang;Akifumi Wachi;Kazumune Hashimoto;Sebastien Gros", "authorids": "~Xun_Shen1;~Shuo_Jiang2;~Akifumi_Wachi2;~Kazumune_Hashimoto1;~Sebastien_Gros1", "gender": "M;M;M;M;", "homepage": "https://sites.google.com/view/xunshen/home;https://github.com/Shuoj1238/Shuo-Jiang.git;https://akifumi-wachi-4.github.io/website/;https://sites.google.com/view/kazumunehashimotoupdate/;https://www.researchgate.net/profile/Sebastien-Gros-2", "dblp": "193/0660;;218/7526;166/3737;125/5655.html", "google_scholar": "TPQUTVsAAAAJ;https://scholar.google.com/citations?view_op=list_works;https://scholar.google.co.jp/citations?user=iC2b9GUAAAAJ;https://scholar.google.co.jp/citations?user=uk_8zNcAAAAJ;https://scholar.google.no/citations?user=38fYqeYAAAAJ", "orcid": ";;;;", "linkedin": "%E8%BF%85-%E6%B2%88-8a3387224/;;akifumi-wachi-008654123/?originalSubdomain=jp;;", "or_profile": "~Xun_Shen1;~Shuo_Jiang2;~Akifumi_Wachi2;~Kazumune_Hashimoto1;~Sebastien_Gros1", "aff": "Osaka University;Osaka University, Tokyo Institute of Technology;LY Corporation;Osaka University;Norwegian Institute of Technology", "aff_domain": "eei.eng.osaka-u.ac.jp;osaka-u.ac.jp;lycorp.co.jp;osaka-u.ac.jp;ntnu.no", "position": "Assistant Professor;MS student;Chief Research Scientist;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nshen2024flippingbased,\ntitle={Flipping-based Policy for Chance-Constrained Markov Decision Processes},\nauthor={Xun Shen and Shuo Jiang and Akifumi Wachi and Kazumune Hashimoto and Sebastien Gros},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7t9eDEY2GT}\n}", "github": "", "reviewers": "GDWH;g2rG;7QvG;7qnc", "pdf_size": 4403956, "rating": "5;5;7;7", "confidence": "3;3;3;3", "soundness": "3;2;3;4", "novelty": "2;2;2;3", "presentation": "2;3;2;4", "wc_summary": "59;110;104;82", "wc_strengths": "29;85;80;61", "wc_weaknesses": "60;274;93;65", "wc_questions": "25;133;149;331", "wc_limitations": "1;14;3;174", "wc_review": "174;616;429;713", "wc_reply_reviewers": "0;0;148;155", "wc_reply_authors": "0;0;170;142", "reply_reviewers": "0;0;2;1", "reply_authors": "1;1;3;2", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 88.75, 20.09197601033806 ], "wc_strengths_avg": [ 63.75, 21.970150204311302 ], "wc_weaknesses_avg": [ 123.0, 88.08234783428516 ], "wc_questions_avg": [ 159.5, 109.90336664543084 ], "wc_limitations_avg": [ 48.0, 72.9143332959988 ], "wc_review_avg": [ 483.0, 205.53953391014585 ], "wc_reply_reviewers_avg": [ 75.75, 75.79041825983018 ], "wc_reply_authors_avg": [ 78.0, 78.62569554541314 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18262956834957156076&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "eei.eng.osaka-u.ac.jp;osaka-u.ac.jp;lycorp.co.jp;osaka-u.ac.jp;ntnu.no", "author_num": 5, "aff_unique_index": "0;0;1;0;2", "aff_unique_norm": "Osaka University;LY Corporation;Norwegian Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.osaka-u.ac.jp;;https://www.ntnu.no", "aff_unique_abbr": "Osaka U;;NTNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;2", "aff_country_unique": "Japan;;Norway" }, { "title": "Randomized Exploration for Reinforcement Learning with Multinomial Logistic Function Approximation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96414", "id": "7tRtH0AoBl", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7tRtH0AoBl", "openreview": "https://openreview.net/forum?id=7tRtH0AoBl", "poster": "/media/PosterPDFs/NeurIPS%202024/96414.png?t=1733707046.3327348", "project": "", "author_site": "Wooseong Cho, Taehyun Hwang, Joongkyu Lee, Min-hwan Oh", "tldr": "", "abstract": "We study reinforcement learning with _multinomial logistic_ (MNL) function approximation where the underlying transition probability kernel of the _Markov decision processes_ (MDPs) is parametrized by an unknown transition core with features of state and action. For the finite horizon episodic setting with inhomogeneous state transitions, we propose provably efficient algorithms with randomized exploration having frequentist regret guarantees. For our first algorithm, $\\texttt{RRL-MNL}$, we adapt optimistic sampling to ensure the optimism of the estimated value function with sufficient frequency and establish that $\\texttt{RRL-MNL}$ is both _statistically_ and _computationally_ efficient, achieving a $\\tilde{\\mathcal{O}}(\\kappa^{-1} d^{\\frac{3}{2}} H^{\\frac{3}{2}} \\sqrt{T})$ frequentist regret bound with constant-time computational cost per episode. Here, $d$ is the dimension of the transition core, $H$ is the horizon length, $T$ is the total number of steps, and $\\kappa$ is a problem-dependent constant. Despite the simplicity and practicality of $\\texttt{RRL-MNL}$, its regret bound scales with $\\kappa^{-1}$, which is potentially large in the worst case. To improve the dependence on $\\kappa^{-1}$, we propose $\\texttt{ORRL-MNL}$, which estimates the value function using local gradient information of the MNL transition model. We show that its frequentist regret bound is $\\tilde{\\mathcal{O}}(d^{\\frac{3}{2}} H^{\\frac{3}{2}} \\sqrt{T} + \\kappa^{-1} d^2 H^2)$. To the best of our knowledge, these are the first randomized RL algorithms for the MNL transition model that achieve both computational and statistical efficiency. Numerical experiments demonstrate the superior performance of the proposed algorithms.", "keywords": "Reinforcement learning;Function approximation;Multinomial logistic regression;Regret analysis", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/5d337cf875821150d8c3b1481ca271ac0564311a.zip", "author": "Wooseong Cho;Taehyun Hwang;Joongkyu Lee;Min-hwan Oh", "authorids": "~Wooseong_Cho1;~Taehyun_Hwang1;~Joongkyu_Lee1;~Min-hwan_Oh1", "gender": "M;;M;", "homepage": ";;https://joongkyulee.com/;https://minoh.io", "dblp": "142/9218;;368/2751;172/0531", "google_scholar": "https://scholar.google.com/citations?hl=ko;;YWZakwkAAAAJ;KzVALFwAAAAJ", "orcid": ";;;", "linkedin": "wooseong-cho-ba7a14333;;joongkyu-lee-939aa91a7;", "or_profile": "~Wooseong_Cho1;~Taehyun_Hwang1;~Joongkyu_Lee1;~Min-hwan_Oh1", "aff": "Seoul National University;;Seoul National University;Seoul National University", "aff_domain": "snu.ac.kr;;snu.ac.kr;snu.ac.kr", "position": "PhD student;;PhD student;Assistant Professor", "bibtex": "@inproceedings{\ncho2024randomized,\ntitle={Randomized Exploration for Reinforcement Learning with Multinomial Logistic Function Approximation},\nauthor={Wooseong Cho and Taehyun Hwang and Joongkyu Lee and Min-hwan Oh},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7tRtH0AoBl}\n}", "github": "", "reviewers": "rYza;u5d2;QBih;Ftt2", "pdf_size": 2986213, "rating": "4;6;6;7", "confidence": "4;2;4;4", "soundness": "2;3;3;4", "novelty": "2;3;3;3", "presentation": "2;2;3;2", "wc_summary": "97;45;48;114", "wc_strengths": "52;30;26;119", "wc_weaknesses": "346;47;38;150", "wc_questions": "2;386;123;8", "wc_limitations": "1;30;22;1", "wc_review": "498;538;257;392", "wc_reply_reviewers": "415;36;47;21", "wc_reply_authors": "1947;787;752;160", "reply_reviewers": "1;1;1;1", "reply_authors": "5;4;4;3", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 76.0, 30.124740662784138 ], "wc_strengths_avg": [ 56.75, 37.278512577623054 ], "wc_weaknesses_avg": [ 145.25, 123.97454375798283 ], "wc_questions_avg": [ 129.75, 155.60587231849576 ], "wc_limitations_avg": [ 13.5, 12.816005617976296 ], "wc_review_avg": [ 421.25, 108.80573284528715 ], "wc_reply_reviewers_avg": [ 129.75, 164.9475295359103 ], "wc_reply_authors_avg": [ 911.5, 647.6791258022756 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 4.0, 0.7071067811865476 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.13245323570650439, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:4DDTQPUw3PsJ:scholar.google.com/&scioq=Randomized+Exploration+for+Reinforcement+Learning+with+Multinomial+Logistic+Function+Approximation&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "snu.ac.kr;;snu.ac.kr;snu.ac.kr", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Seoul National University", "aff_unique_dep": "", "aff_unique_url": "https://www.snu.ac.kr", "aff_unique_abbr": "SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "Identifying Functionally Important Features with End-to-End Sparse Dictionary Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96413", "id": "7txPaUpUnc", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7txPaUpUnc", "openreview": "https://openreview.net/forum?id=7txPaUpUnc", "poster": "/media/PosterPDFs/NeurIPS%202024/96413.png?t=1733305472.8805327", "project": "", "author_site": "Dan Braun, Jordan Taylor, Nicholas Goldowsky-Dill, Lee Sharkey", "tldr": "", "abstract": "Identifying the features learned by neural networks is a core challenge in mechanistic interpretability. Sparse autoencoders (SAEs), which learn a sparse, overcomplete dictionary that reconstructs a network's internal activations, have been used to identify these features. However, SAEs may learn more about the structure of the datatset than the computational structure of the network. There is therefore only indirect reason to believe that the directions found in these dictionaries are functionally important to the network. We propose end-to-end (e2e) sparse dictionary learning, a method for training SAEs that ensures the features learned are functionally important by minimizing the KL divergence between the output distributions of the original model and the model with SAE activations inserted. Compared to standard SAEs, e2e SAEs offer a Pareto improvement: They explain more network performance, require fewer total features, and require fewer simultaneously active features per datapoint, all with no cost to interpretability. We explore geometric and qualitative differences between e2e SAE features and standard SAE features. E2e dictionary learning brings us closer to methods that can explain network behavior concisely and accurately. We release our library for training e2e SAEs and reproducing our analysis at\nhttps://github.com/ApolloResearch/e2e_sae.", "keywords": "Mechnastic Interpretability;Interpretability;Explainability;Transparency;Sparse Coding;Causal mediation analysis;High dimensional data analysis", "primary_area": "interpretability_and_explainability", "supplementary_material": "/attachment/17717229ed5310e7f405f082eac100458bdb3ff4.zip", "author": "Dan Braun;Jordan Taylor;Nicholas Goldowsky-Dill;Lee Sharkey", "authorids": "~Dan_Braun1;~Jordan_Taylor3;~Nicholas_Goldowsky-Dill1;~Lee_Sharkey1", "gender": "Not Specified;M;;M", "homepage": "https://danbraunai.github.io/;https://sites.google.com/view/jordantensor/;;https://leesharkey.github.io", "dblp": "264/4978.html;367/7107.html;344/6035;", "google_scholar": ";https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?view_op=list_works;ZzXikBsAAAAJ", "orcid": ";0000-0002-5799-0557;;", "linkedin": ";jordan-tensor/;;", "or_profile": "~Dan_Braun1;~Jordan_Taylor3;~Nicholas_Goldowsky-Dill1;~Lee_Sharkey1", "aff": "Apollo Research;NTT Research;Apollo Research;Apollo Research", "aff_domain": "apolloresearch.ai;ntt-research.com;apolloresearch.ai;apolloresearch.ai", "position": "Researcher;Intern;Researcher;Researcher", "bibtex": "@inproceedings{\nbraun2024identifying,\ntitle={Identifying Functionally Important Features with End-to-End Sparse Dictionary Learning},\nauthor={Dan Braun and Jordan Taylor and Nicholas Goldowsky-Dill and Lee Sharkey},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7txPaUpUnc}\n}", "github": "", "reviewers": "Hd7R;SeEL;ErCK;TS92;byuk", "pdf_size": 18068597, "rating": "5;5;6;6;6", "confidence": "2;3;4;4;3", "soundness": "3;3;4;4;3", "novelty": "2;2;3;3;3", "presentation": "3;2;4;4;3", "wc_summary": "64;101;121;262;68", "wc_strengths": "60;60;116;78;16", "wc_weaknesses": "248;165;134;357;122", "wc_questions": "199;111;286;36;15", "wc_limitations": "46;2;33;61;11", "wc_review": "617;439;690;794;232", "wc_reply_reviewers": "138;61;103;101;53", "wc_reply_authors": "0;431;0;0;0", "reply_reviewers": "1;2;1;1;1", "reply_authors": "1;2;1;1;1", "rating_avg": [ 5.6, 0.48989794855663565 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 123.2, 72.54626110283012 ], "wc_strengths_avg": [ 66.0, 32.29860678109816 ], "wc_weaknesses_avg": [ 205.2, 87.73004046505393 ], "wc_questions_avg": [ 129.4, 101.50586189969522 ], "wc_limitations_avg": [ 30.6, 21.767866225241278 ], "wc_review_avg": [ 554.4, 198.480830308622 ], "wc_reply_reviewers_avg": [ 91.2, 30.9735370921695 ], "wc_reply_authors_avg": [ 86.2, 172.4 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.7637626158259733, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15125479253218269921&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "apolloresearch.ai;ntt-research.com;apolloresearch.ai;apolloresearch.ai", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Apollo Research;NTT Research", "aff_unique_dep": ";", "aff_unique_url": ";https://www.ntt-research.com/", "aff_unique_abbr": ";NTT Research", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1", "aff_country_unique": ";Japan" }, { "title": "HENASY: Learning to Assemble Scene-Entities for Interpretable Egocentric Video-Language Model", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96412", "id": "7uWzoGn4kv", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7uWzoGn4kv", "openreview": "https://openreview.net/forum?id=7uWzoGn4kv", "poster": "", "project": "", "author_site": "Khoa Vo, Thinh Phan, Kashu Yamazaki, Minh Tran, Ngan Le", "tldr": "", "abstract": "Current video-language models (VLMs) rely extensively on instance-level alignment between video and language modalities, which presents two major limitations: (1) visual reasoning disobeys the natural perception that humans do in first-person perspective, leading to a lack of reasoning interpretation; and (2) learning is limited in capturing inherent fine-grained relationships between two modalities.\n\nIn this paper, we take an inspiration from human perception and explore a compositional approach for egocentric video representation. We introduce HENASY (Hierarchical ENtities ASsemblY), which includes a spatiotemporal token grouping mechanism to explicitly assemble dynamically evolving scene entities through time and model their relationship for video representation. By leveraging compositional structure understanding, HENASY possesses strong interpretability via visual grounding with free-form text queries. We further explore a suite of multi-grained contrastive losses to facilitate entity-centric understandings. This comprises three alignment types: video-narration, noun-entity, verb-entities alignments.\n\nOur method demonstrates strong interpretability in both quantitative and qualitative experiments; while maintaining competitive performances on five downstream tasks via zero-shot transfer or as video/text representation, including video/text retrieval, action recognition, multi-choice query, natural language query, and moments query.\n\nProject page: https://uark-aicv.github.io/HENASY", "keywords": "egocentric;video understanding;vision langauge models", "primary_area": "machine_vision", "supplementary_material": "", "author": "Khoa Vo;Thinh Phan;Kashu Yamazaki;Minh Tran;Ngan Hoang Le", "authorids": "~Khoa_Vo1;~Thinh_Phan1;~Kashu_Yamazaki1;~Minh_Tran2;~Ngan_Hoang_Le1", "gender": "M;M;M;M;F", "homepage": "https://vhvkhoa.github.io;https://sciprofiles.com/profile/1628355;;https://trqminh.github.io/;https://computer-science-and-computer-engineering.uark.edu/directory/index/uid/thile/name/Thi+Hoang+Ngan+Le/", "dblp": "224/1953;253/9015.html;280/0133;281/8723;37/245", "google_scholar": "Iyvx8vcAAAAJ;;https://scholar.google.com/citations?hl=en;AmQwXDUAAAAJ;8ck0k_UAAAAJ", "orcid": ";;;;0000-0003-2571-0511", "linkedin": ";;;;", "or_profile": "~Khoa_Vo1;~Thinh_Phan1;~Kashu_Yamazaki1;~Minh_Tran2;~Ngan_Hoang_Le1", "aff": "University of Arkansas - Fayetteville;University of Arkansas - Fayetteville;Carnegie Mellon University;University of Arkansas - Fayetteville;University of Arkansas, Fayetteville", "aff_domain": "uark.edu;uark.edu;andrew.cmu.edu;uark.edu;uark.edu", "position": "PhD student;PhD student;Intern;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nvo2024henasy,\ntitle={{HENASY}: Learning to Assemble Scene-Entities for Interpretable Egocentric Video-Language Model},\nauthor={Khoa Vo and Thinh Phan and Kashu Yamazaki and Minh Tran and Ngan Hoang Le},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7uWzoGn4kv}\n}", "github": "", "reviewers": "xaMC;cNiH;EqYJ;4RbK", "pdf_size": 1949587, "rating": "4;5;5;6", "confidence": "4;3;5;3", "soundness": "3;3;3;3", "novelty": "2;3;2;2", "presentation": "2;3;2;3", "wc_summary": "50;92;91;102", "wc_strengths": "66;105;96;72", "wc_weaknesses": "260;51;116;160", "wc_questions": "10;52;3;6", "wc_limitations": "16;53;1;23", "wc_review": "402;353;307;363", "wc_reply_reviewers": "0;0;24;134", "wc_reply_authors": "187;188;203;493", "reply_reviewers": "0;0;1;2", "reply_authors": "4;4;4;4", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 83.75, 19.954636052807377 ], "wc_strengths_avg": [ 84.75, 16.20763708873073 ], "wc_weaknesses_avg": [ 146.75, 76.01767886485354 ], "wc_questions_avg": [ 17.75, 19.929563467371782 ], "wc_limitations_avg": [ 23.25, 18.925842121290138 ], "wc_review_avg": [ 356.25, 33.818449106959356 ], "wc_reply_reviewers_avg": [ 39.5, 55.43239125276845 ], "wc_reply_authors_avg": [ 267.75, 130.20248653539608 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 4.0, 0.0 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.4264014327112209, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10710364443896742050&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 0, "email": "uark.edu;uark.edu;andrew.cmu.edu;uark.edu;uark.edu", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "University of Arkansas;Carnegie Mellon University", "aff_unique_dep": ";", "aff_unique_url": "https://www.uark.edu;https://www.cmu.edu", "aff_unique_abbr": "UARK;CMU", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Fayetteville;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "7ug4oSmN7l", "title": "A Learning-based Capacitated Arc Routing Problem Solver Comparable to Metaheuristics While with Far Less Runtimes", "track": "main", "status": "Reject", "tldr": "", "abstract": "Recently, neural networks (NN) have made great strides in combinatorial optimization problems (COPs). However, they face challenges in solving the capacitated arc routing problem (CARP) which is to find the minimum-cost tour that covers all required edges on a graph, while within capacity constraints. Actually, NN-based approaches tend to lag behind advanced metaheuristics due to complexities caused by non-Euclidean graph, traversal direction and capacity constraints. In this paper, we introduce an NN-based solver tailored for these complexities, which significantly narrows the gap with advanced metaheuristics while with far less runtimes. First, we propose the direction-aware attention model (DaAM) to in corporate directionality into the embedding process, facilitating more effective one-stage decision-making. Second, we design a supervised reinforcement learning scheme that involves supervised pre-training to establish a robust initial policy for subsequent reinforcement fine-tuning. It proves particularly valuable for solving CARP that has a higher complexity than the node routing problems (NRPs). Finally, a path optimization method is introduced to adjust the depot return positions within the path generated by DaAM. Experiments show that DaAM surpasses heuristics and achieves decision quality comparable to state-of-the-art metaheuristics for the first time while maintaining superior efficiency, even in large-scale CARP instances. The code and datasets are provided in the Appendix.", "keywords": "Capacitated Arc Routing;Metaheuristics;Reinforcement learning", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Runze Guo;Feng Xue;Nicu Sebe;Anlong Ming", "authorids": "~Runze_Guo1;~Feng_Xue3;~Nicu_Sebe1;~Anlong_Ming1", "gender": ";M;M;M", "homepage": ";https://xuefeng-cvr.github.io/;http://disi.unitn.it/~sebe/;https://teacher.bupt.edu.cn/mal/en/index.htm", "dblp": ";;20/3519;52/3276", "google_scholar": ";66SeiQsAAAAJ;https://scholar.google.it/citations?user=stFCYOAAAAAJ;y5kFLCwAAAAJ", "orcid": "0009-0002-8074-4001;0000-0002-4101-3401;0000-0002-6597-7248;0000-0003-2952-7757", "linkedin": ";;;", "or_profile": "~Runze_Guo1;~Feng_Xue3;~Nicu_Sebe1;~Anlong_Ming1", "aff": "Beijing University of Posts and Telecommunications;University of Trento;University of Trento;Beijing University of Posts and Telecommunications", "aff_domain": "bupt.edu.cn;unitn.it;unitn.it;bupt.edu.cn", "position": "Undergrad student;Postdoc;Full Professor;Full Professor", "bibtex": "@misc{\nanonymous2024a,\ntitle={A Learning-based Capacitated Arc Routing Problem Solver Comparable to Metaheuristics While with Far Less Runtimes},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=7ug4oSmN7l}\n}", "github": "", "project": "", "reviewers": "UWHC;DrmY;NUZV;h6eF", "site": "https://openreview.net/forum?id=7ug4oSmN7l", "pdf_size": 3002565, "rating": "3;4;4;6", "confidence": "5;4;3;3", "soundness": "3;2;2;2", "novelty": "2;3;2;3", "presentation": "3;3;3;2", "wc_summary": "86;57;31;156", "wc_strengths": "37;70;19;191", "wc_weaknesses": "48;75;36;410", "wc_questions": "106;145;41;32", "wc_limitations": "23;30;12;63", "wc_review": "300;377;139;852", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 4.25, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 82.5, 46.682437811236895 ], "wc_strengths_avg": [ 79.25, 67.06107291119044 ], "wc_weaknesses_avg": [ 142.25, 155.22946724124256 ], "wc_questions_avg": [ 81.0, 46.69582422444217 ], "wc_limitations_avg": [ 32.0, 19.013153341831543 ], "wc_review_avg": [ 417.0, 265.423246909535 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.7608859102526822, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:7Vup2riPozwJ:scholar.google.com/&scioq=A+Learning-based+Capacitated+Arc+Routing+Problem+Solver+Comparable+to+Metaheuristics+While+with+Far+Less+Runtimes&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Beijing University of Posts and Telecommunications;University of Trento", "aff_unique_dep": ";", "aff_unique_url": "http://www.bupt.edu.cn/;https://www.unitn.it", "aff_unique_abbr": "BUPT;UniTN", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Beijing;", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "China;Italy" }, { "title": "Not All Diffusion Model Activations Have Been Evaluated as Discriminative Features", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96411", "id": "7uqVfZW6Mo", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7uqVfZW6Mo", "openreview": "https://openreview.net/forum?id=7uqVfZW6Mo", "poster": "/media/PosterPDFs/NeurIPS%202024/96411.png?t=1729469135.3742208", "project": "", "author_site": "Benyuan Meng, Qianqian Xu, Zitai Wang, Xiaochun Cao, Qingming Huang", "tldr": "", "abstract": "Diffusion models are initially designed for image generation. Recent research shows that the internal signals within their backbones, named activations, can also serve as dense features for various discriminative tasks such as semantic segmentation. Given numerous activations, selecting a small yet effective subset poses a fundamental problem. To this end, the early study of this field performs a large-scale quantitative comparison of the discriminative ability of the activations. However, we find that many potential activations have not been evaluated, such as the queries and keys used to compute attention scores. Moreover, recent advancements in diffusion architectures bring many new activations, such as those within embedded ViT modules. Both combined, activation selection remains unresolved but overlooked. To tackle this issue, this paper takes a further step with a much broader range of activations evaluated. Considering the significant increase in activations, a full-scale quantitative comparison is no longer operational. Instead, we seek to understand the properties of these activations, such that the activations that are clearly inferior can be filtered out in advance via simple qualitative evaluation. After careful analysis, we discover three properties universal among diffusion models, enabling this study to go beyond specific models. On top of this, we present effective feature selection solutions for several popular diffusion models. Finally, the experiments across multiple discriminative tasks validate the superiority of our method over the SOTA competitors. Our code is available at https://github.com/Darkbblue/generic-diffusion-feature.", "keywords": "Diffusion Models;Representation Learning;Model Property Study", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Benyuan Meng;Qianqian Xu;Zitai Wang;Xiaochun Cao;Qingming Huang", "authorids": "~Benyuan_Meng1;~Qianqian_Xu2;~Zitai_Wang1;~Xiaochun_Cao3;~Qingming_Huang2", "gender": "M;F;M;M;", "homepage": "https://github.com/Darkbblue;http://vipl.ict.ac.cn/people/~qianqianxu;https://wang22ti.github.io;https://scst.sysu.edu.cn/members/caoxiaochun.htm;https://qmhuang-ucas.github.io/", "dblp": "359/4161;07/7627;251/3361;39/3695;68/4388", "google_scholar": ";https://scholar.google.com.hk/citations?user=MjifS2MAAAAJ;45qZ_LcAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.hk/citations?user=J1vMnRgAAAAJ", "orcid": "0009-0006-5739-6781;;0000-0003-4156-6417;0000-0001-7141-708X;", "linkedin": ";;;;", "or_profile": "~Benyuan_Meng1;~Qianqian_Xu2;~Zitai_Wang1;~Xiaochun_Cao3;~Qingming_Huang2", "aff": "Institute of Information Engineering, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;University of Chinese Academy of Sciences;SUN YAT-SEN UNIVERSITY;University of Chinese Academy of Sciences", "aff_domain": "iie.ac.cn;ict.ac.cn;ucas.ac.cn;sysu.edu.cn;ucas.ac.cn", "position": "MS student;Full Professor;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nmeng2024not,\ntitle={Not All Diffusion Model Activations Have Been Evaluated as Discriminative Features},\nauthor={Benyuan Meng and Qianqian Xu and Zitai Wang and Xiaochun Cao and Qingming Huang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7uqVfZW6Mo}\n}", "github": "", "reviewers": "xf1j;8YQr;cCcb;AMWL", "pdf_size": 12665440, "rating": "7;7;7;7", "confidence": "5;5;5;4", "soundness": "4;4;3;3", "novelty": "3;3;4;3", "presentation": "3;3;3;3", "wc_summary": "107;89;75;90", "wc_strengths": "182;107;96;108", "wc_weaknesses": "63;87;96;136", "wc_questions": "3;5;30;5", "wc_limitations": "1;1;5;10", "wc_review": "356;289;302;349", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 90.25, 11.344051304538427 ], "wc_strengths_avg": [ 123.25, 34.24452510986245 ], "wc_weaknesses_avg": [ 95.5, 26.31064423384574 ], "wc_questions_avg": [ 10.75, 11.143944544011335 ], "wc_limitations_avg": [ 4.25, 3.6996621467371855 ], "wc_review_avg": [ 324.0, 28.97412638890084 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=124178940934429593&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "iie.ac.cn;ict.ac.cn;ucas.ac.cn;sysu.edu.cn;ucas.ac.cn", "author_num": 5, "aff_unique_index": "0;0;1;2;1", "aff_unique_norm": "Chinese Academy of Sciences;University of Chinese Academy of Sciences;Sun Yat-sen University", "aff_unique_dep": "Institute of Information Engineering;;", "aff_unique_url": "http://www.cas.cn;http://www.ucas.ac.cn;http://www.sysu.edu.cn", "aff_unique_abbr": "CAS;UCAS;SYSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Online Posterior Sampling with a Diffusion Prior", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96410", "id": "7v0UyO0B6q", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7v0UyO0B6q", "openreview": "https://openreview.net/forum?id=7v0UyO0B6q", "poster": "/media/PosterPDFs/NeurIPS%202024/96410.png?t=1733726602.9322689", "project": "", "author_site": "Branislav Kveton, Boris Oreshkin, Youngsuk Park, Aniket Anand Deshmukh, Rui Song", "tldr": "", "abstract": "Posterior sampling in contextual bandits with a Gaussian prior can be implemented exactly or approximately using the Laplace approximation. The Gaussian prior is computationally efficient but it cannot describe complex distributions. In this work, we propose approximate posterior sampling algorithms for contextual bandits with a diffusion model prior. The key idea is to sample from a chain of approximate conditional posteriors, one for each stage of the reverse diffusion process, which are obtained by the Laplace approximation. Our approximations are motivated by posterior sampling with a Gaussian prior, and inherit its simplicity and efficiency. They are asymptotically consistent and perform well empirically on a variety of contextual bandit problems.", "keywords": "posterior sampling;diffusion models;online learning;contextual bandits", "primary_area": "bandits", "supplementary_material": "/attachment/bade5b8d56b510eeb729cc39b3fe7ada85350bd5.zip", "author": "Branislav Kveton;Boris N. Oreshkin;Youngsuk Park;Aniket Anand Deshmukh;Rui Song", "authorids": "~Branislav_Kveton1;~Boris_N._Oreshkin1;~Youngsuk_Park1;~Aniket_Anand_Deshmukh1;~Rui_Song2", "gender": "M;M;M;M;", "homepage": "http://www.bkveton.com;;https://youngsuk0723.github.io/;http://www-personal.umich.edu/~aniketde/;https://song-ray.github.io/", "dblp": "92/5526;33/1017;88/11095;;01/2743-6.html", "google_scholar": "CZaDvPgAAAAJ;https://scholar.google.ca/citations?user=48MBCeIAAAAJ;jWROvQ0AAAAJ;a4cD32QAAAAJ;", "orcid": ";;0000-0002-0970-9214;;0000-0003-1875-2115", "linkedin": ";boris-oreshkin-1710061a/;y-park;aniket2305/;", "or_profile": "~Branislav_Kveton1;~Boris_N._Oreshkin1;~Youngsuk_Park1;~Aniket_Anand_Deshmukh1;~Rui_Song2", "aff": "Amazon;Amazon;Amazon, AWS AI Labs;Amazon;North Carolina State University", "aff_domain": "amazon.com;amazon.com;amazon.com;amazon.com;ncsu.edu", "position": "Principal Scientist;Principal Researcher;Research;Applied Scientist;Full Professor", "bibtex": "@inproceedings{\nkveton2024online,\ntitle={Online Posterior Sampling with a Diffusion Prior},\nauthor={Branislav Kveton and Boris N. Oreshkin and Youngsuk Park and Aniket Anand Deshmukh and Rui Song},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7v0UyO0B6q}\n}", "github": "", "reviewers": "1tRW;YMJY;r4r4;Vag9;AZGY", "pdf_size": 575697, "rating": "5;6;6;6;7", "confidence": "3;3;2;2;3", "soundness": "3;3;3;4;3", "novelty": "2;3;2;3;3", "presentation": "4;3;2;3;3", "wc_summary": "88;104;62;47;89", "wc_strengths": "67;78;25;76;92", "wc_weaknesses": "69;141;105;107;264", "wc_questions": "44;31;87;57;36", "wc_limitations": "13;35;18;16;18", "wc_review": "281;389;297;303;499", "wc_reply_reviewers": "9;17;12;27;19", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.0, 0.6324555320336759 ], "confidence_avg": [ 2.6, 0.4898979485566356 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 78.0, 20.5621010599598 ], "wc_strengths_avg": [ 67.6, 22.756098083810414 ], "wc_weaknesses_avg": [ 137.2, 67.36883552504081 ], "wc_questions_avg": [ 51.0, 20.02997753368685 ], "wc_limitations_avg": [ 20.0, 7.720103626247513 ], "wc_review_avg": [ 353.8, 81.76649680645491 ], "wc_reply_reviewers_avg": [ 16.8, 6.20966987850401 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:lViKoWGAZ54J:scholar.google.com/&scioq=Online+Posterior+Sampling+with+a+Diffusion+Prior&hl=en&as_sdt=0,33", "gs_version_total": 6, "email": "amazon.com;amazon.com;amazon.com;amazon.com;ncsu.edu", "author_num": 5, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Amazon;North Carolina State University", "aff_unique_dep": "Amazon.com, Inc.;", "aff_unique_url": "https://www.amazon.com;https://www.ncsu.edu", "aff_unique_abbr": "Amazon;NCSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Learning Diffusion Priors from Observations by Expectation Maximization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96409", "id": "7v88Fh6iSM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7v88Fh6iSM", "openreview": "https://openreview.net/forum?id=7v88Fh6iSM", "poster": "/media/PosterPDFs/NeurIPS%202024/96409.png?t=1733180736.5290194", "project": "", "author_site": "Fran\u00e7ois Rozet, Gerome Andry, Francois Lanusse, Gilles Louppe", "tldr": "", "abstract": "Diffusion models recently proved to be remarkable priors for Bayesian inverse problems. However, training these models typically requires access to large amounts of clean data, which could prove difficult in some settings. In this work, we present a novel method based on the expectation-maximization algorithm for training diffusion models from incomplete and noisy observations only. Unlike previous works, our method leads to proper diffusion models, which is crucial for downstream tasks. As part of our method, we propose and motivate an improved posterior sampling scheme for unconditional diffusion models. We present empirical evidence supporting the effectiveness of our method.", "keywords": "diffusion;score-based;generative model;corrupted data;inverse problems;expectation-maximization;empirical bayes", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Fran\u00e7ois Rozet;G\u00e9r\u00f4me Andry;Francois Lanusse;Gilles Louppe", "authorids": "~Fran\u00e7ois_Rozet1;~G\u00e9r\u00f4me_Andry1;~Francois_Lanusse2;~Gilles_Louppe1", "gender": "M;M;M;M", "homepage": "https://francois-rozet.github.io/;https://gerome-andry.github.io/;https://flanusse.net;http://glouppe.github.io", "dblp": ";378/1690;;05/9382", "google_scholar": "C-WS1pwAAAAJ;YZxxzigAAAAJ;fRDCooIAAAAJ;F_77d4QAAAAJ", "orcid": "0000-0002-8846-8761;;;0000-0002-2082-3106", "linkedin": ";;;", "or_profile": "~Fran\u00e7ois_Rozet1;~G\u00e9r\u00f4me_Andry1;~Francois_Lanusse2;~Gilles_Louppe1", "aff": "Flatiron Institute;Universit\u00e9 de Li\u00e8ge;CNRS;University of Li\u00e8ge", "aff_domain": "flatironinstitute.org;ulg.ac.be;cnrs.fr;uliege.be", "position": "Intern;PhD student;Researcher;Associate Professor", "bibtex": "@inproceedings{\nrozet2024learning,\ntitle={Learning Diffusion Priors from Observations by Expectation Maximization},\nauthor={Fran{\\c{c}}ois Rozet and G{\\'e}r{\\^o}me Andry and Francois Lanusse and Gilles Louppe},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7v88Fh6iSM}\n}", "github": "", "reviewers": "s8XN;mKff;XeV5;tnYw;3yec", "pdf_size": 19262019, "rating": "4;5;5;7;8", "confidence": "5;4;4;5;5", "soundness": "2;3;3;3;3", "novelty": "2;3;3;3;3", "presentation": "2;3;2;3;4", "wc_summary": "44;81;63;230;81", "wc_strengths": "17;65;46;79;153", "wc_weaknesses": "21;250;55;296;392", "wc_questions": "340;6;103;1;18", "wc_limitations": "1;8;7;2;10", "wc_review": "423;410;274;608;654", "wc_reply_reviewers": "0;0;0;377;250", "wc_reply_authors": "0;0;0;871;39", "reply_reviewers": "0;0;0;2;1", "reply_authors": "1;1;1;4;2", "rating_avg": [ 5.8, 1.469693845669907 ], "confidence_avg": [ 4.6, 0.48989794855663565 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 99.8, 66.52337934891762 ], "wc_strengths_avg": [ 72.0, 45.51922670696417 ], "wc_weaknesses_avg": [ 202.8, 142.55300768486086 ], "wc_questions_avg": [ 93.6, 128.6586180556903 ], "wc_limitations_avg": [ 5.6, 3.4985711369071804 ], "wc_review_avg": [ 473.8, 139.32178580537933 ], "wc_reply_reviewers_avg": [ 125.4, 158.74709446159952 ], "wc_reply_authors_avg": [ 182.0, 344.8309730868154 ], "reply_reviewers_avg": [ 0.6, 0.8 ], "reply_authors_avg": [ 1.8, 1.1661903789690602 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.44444444444444436, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6514057711800987501&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 8, "email": "flatironinstitute.org;ulg.ac.be;cnrs.fr;uliege.be", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Flatiron Institute;Universit\u00e9 de Li\u00e8ge;Centre National de la Recherche Scientifique;University of Li\u00e8ge", "aff_unique_dep": ";;;", "aff_unique_url": "https://flatironinstitute.org;https://www.ulg.ac.be;https://www.cnrs.fr;https://www.ulg.ac.be", "aff_unique_abbr": "Flatiron;ULi\u00e8ge;CNRS;ULi\u00e8ge", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;1", "aff_country_unique": "United States;Belgium;France" }, { "title": "Self-Guided Masked Autoencoder", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96408", "id": "7vXufiEzSy", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7vXufiEzSy", "openreview": "https://openreview.net/forum?id=7vXufiEzSy", "poster": "", "project": "", "author_site": "Jeongwoo Shin, Inseo Lee, Junho Lee, Joonseok Lee", "tldr": "", "abstract": "Masked Autoencoder (MAE) is a self-supervised approach for representation learning, widely applicable to a variety of downstream tasks in computer vision. In spite of its success, it is still not fully uncovered what and how MAE exactly learns. In this paper, with an in-depth analysis, we discover that MAE intrinsically learns pattern-based patch-level clustering from surprisingly early stages of pre-training. Upon this understanding, we propose self-guided masked autoencoder, which internally generates informed mask by utilizing its progress in patch clustering, substituting the naive random masking of the vanilla MAE. Our approach significantly boosts its learning process without relying on any external models or supplementary information, keeping the benefit of self-supervised nature of MAE intact. Comprehensive experiments on various downstream tasks verify the effectiveness of the proposed method.", "keywords": "self-supervised learning;self-supervision;SSL;representation learning", "primary_area": "machine_vision", "supplementary_material": "/attachment/ea168b9c0640ce77a9a5e8bdc317f165157fe302.zip", "author": "Jeongwoo Shin;Inseo Lee;Junho Lee;Joonseok Lee", "authorids": "~Jeongwoo_Shin1;~Inseo_Lee1;~Junho_Lee2;~Joonseok_Lee1", "gender": "M;M;M;M", "homepage": "https://github.com/swswss;https://github.com/Devlee247;https://sites.google.com/view/junopage;http://www.joonseok.net", "dblp": ";;;77/1319.html", "google_scholar": ";;s_orZYMAAAAJ;https://scholar.google.co.kr/citations?user=M-MfqpMAAAAJ", "orcid": ";;;", "linkedin": ";inseo-lee-586661215/;junho-lee-457748229/;joonseoklee", "or_profile": "~Jeongwoo_Shin1;~Inseo_Lee1;~Junho_Lee2;~Joonseok_Lee1", "aff": "Seoul National University;Seoul National University;Seoul National University;Google Research", "aff_domain": "snu.ac.kr;snu.ac.kr;snu.ac.kr;google.com", "position": "MS student;MS student;PhD student;Research Scientist", "bibtex": "@inproceedings{\nshin2024selfguided,\ntitle={Self-Guided Masked Autoencoder},\nauthor={Jeongwoo Shin and Inseo Lee and Junho Lee and Joonseok Lee},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7vXufiEzSy}\n}", "github": "", "reviewers": "3PrK;BsNN;LZWD;Pvc4;ZFhW", "pdf_size": 5355634, "rating": "5;5;5;6;6", "confidence": "3;4;3;4;5", "soundness": "3;2;2;3;3", "novelty": "3;3;2;2;3", "presentation": "2;2;2;4;3", "wc_summary": "155;65;97;81;87", "wc_strengths": "85;111;15;53;161", "wc_weaknesses": "115;648;274;268;296", "wc_questions": "43;6;7;71;11", "wc_limitations": "9;12;24;4;8", "wc_review": "407;842;417;477;563", "wc_reply_reviewers": "63;121;426;119;398", "wc_reply_authors": "204;1080;992;235;1145", "reply_reviewers": "1;1;2;2;2", "reply_authors": "2;3;3;3;4", "rating_avg": [ 5.4, 0.48989794855663565 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.8 ], "wc_summary_avg": [ 97.0, 30.80259729308553 ], "wc_strengths_avg": [ 85.0, 49.75138189035557 ], "wc_weaknesses_avg": [ 320.2, 176.0708948122886 ], "wc_questions_avg": [ 27.6, 25.640592816859755 ], "wc_limitations_avg": [ 11.4, 6.8 ], "wc_review_avg": [ 541.2, 160.3201796406179 ], "wc_reply_reviewers_avg": [ 225.4, 154.02934785293354 ], "wc_reply_authors_avg": [ 731.2, 420.7286061108752 ], "reply_reviewers_avg": [ 1.6, 0.4898979485566356 ], "reply_authors_avg": [ 3.0, 0.6324555320336759 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.7637626158259732, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8331824863279324058&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "snu.ac.kr;snu.ac.kr;snu.ac.kr;google.com", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Seoul National University;Google", "aff_unique_dep": ";Google Research", "aff_unique_url": "https://www.snu.ac.kr;https://research.google", "aff_unique_abbr": "SNU;Google Research", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "South Korea;United States" }, { "title": "All-in-One Image Coding for Joint Human-Machine Vision with Multi-Path Aggregation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96407", "id": "7vsx6PxAOH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7vsx6PxAOH", "openreview": "https://openreview.net/forum?id=7vsx6PxAOH", "poster": "/media/PosterPDFs/NeurIPS%202024/96407.png?t=1732300512.6729271", "project": "", "author_site": "Xu Zhang, Peiyao Guo, Ming Lu, Zhan Ma", "tldr": "", "abstract": "Image coding for multi-task applications, catering to both human perception and machine vision, has been extensively investigated. Existing methods often rely on multiple task-specific encoder-decoder pairs, leading to high overhead of parameter and bitrate usage, or face challenges in multi-objective optimization under a unified representation, failing to achieve both performance and efficiency. To this end, we propose Multi-Path Aggregation (MPA) integrated into existing coding models for joint human-machine vision, unifying the feature representation with an all-in-one architecture. MPA employs a predictor to allocate latent features among task-specific paths based on feature importance varied across tasks, maximizing the utility of shared features while preserving task-specific features for subsequent refinement. Leveraging feature correlations, we develop a two-stage optimization strategy to alleviate multi-task performance degradation. Upon the reuse of shared features, as low as 1.89\\% parameters are further augmented and fine-tuned for a specific task, which completely avoids extensive optimization of the entire model. Experimental results show that MPA achieves performance comparable to state-of-the-art methods in both task-specific and multi-objective optimization across human viewing and machine analysis tasks. Moreover, our all-in-one design supports seamless transitions between human- and machine-oriented reconstruction, enabling task-controllable interpretation without altering the unified model. Code is available at https://github.com/NJUVISION/MPA.", "keywords": "learned image coding;joint human-machine vision;multi-path aggregation", "primary_area": "machine_vision", "supplementary_material": "", "author": "Xu Zhang;Peiyao Guo;Ming Lu;Zhan Ma", "authorids": "~Xu_Zhang24;~Peiyao_Guo1;~Ming_Lu3;~Zhan_Ma1", "gender": "M;F;;M", "homepage": "https://vision.nju.edu.cn/d4/ed/c29471a644333/page.htm;;;http://vision.nju.edu.cn", "dblp": "98/5660-27;;;", "google_scholar": "-ZhpHg0AAAAJ;https://scholar.google.com/citations?hl=zh-CN;qDtMMVgAAAAJ;78KxtRMAAAAJ", "orcid": "0009-0009-0906-1532;;;", "linkedin": ";;;", "or_profile": "~Xu_Zhang24;~Peiyao_Guo1;~Ming_Lu3;~Zhan_Ma1", "aff": "Nanjing University;Nanjing University;Nanjing University;Nanjing University", "aff_domain": "nju.edu.cn;smail.nju.edu.cn;nju.edu.cn;nju.edu.cn", "position": "MS student;PhD student;Researcher;Full Professor", "bibtex": "@inproceedings{\nzhang2024allinone,\ntitle={All-in-One Image Coding for Joint Human-Machine Vision with Multi-Path Aggregation},\nauthor={Xu Zhang and Peiyao Guo and Ming Lu and Zhan Ma},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7vsx6PxAOH}\n}", "github": "", "reviewers": "TGUq;sciG;N4Rb", "pdf_size": 17837602, "rating": "5;6;7", "confidence": "4;3;2", "soundness": "2;3;4", "novelty": "2;2;4", "presentation": "3;3;4", "wc_summary": "57;55;92", "wc_strengths": "50;99;71", "wc_weaknesses": "67;90;68", "wc_questions": "36;140;4", "wc_limitations": "53;42;15", "wc_review": "263;426;250", "wc_reply_reviewers": "0;0;36", "wc_reply_authors": "0;0;12", "reply_reviewers": "0;0;1", "reply_authors": "1;1;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.6666666666666665, 0.9428090415820634 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 68.0, 16.990193249832878 ], "wc_strengths_avg": [ 73.33333333333333, 20.07209228976613 ], "wc_weaknesses_avg": [ 75.0, 10.614455552060438 ], "wc_questions_avg": [ 60.0, 58.057442818872644 ], "wc_limitations_avg": [ 36.666666666666664, 15.965240019770729 ], "wc_review_avg": [ 313.0, 80.07912753437482 ], "wc_reply_reviewers_avg": [ 12.0, 16.97056274847714 ], "wc_reply_authors_avg": [ 4.0, 5.656854249492381 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8206080766112441939&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "nju.edu.cn;smail.nju.edu.cn;nju.edu.cn;nju.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Nanjing University", "aff_unique_dep": "", "aff_unique_url": "https://www.nju.edu.cn", "aff_unique_abbr": "Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "QUEEN: QUantized Efficient ENcoding of Dynamic Gaussians for Streaming Free-viewpoint Videos", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96406", "id": "7xhwE7VH4S", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7xhwE7VH4S", "openreview": "https://openreview.net/forum?id=7xhwE7VH4S", "poster": "/media/PosterPDFs/NeurIPS%202024/96406.png?t=1733867349.624203", "project": "", "author_site": "Sharath Girish, Tianye Li, Amrita Mazumdar, Abhinav Shrivastava, david luebke, Shalini De Mello", "tldr": "", "abstract": "Online free-viewpoint video (FVV) streaming is a challenging problem, which is relatively under-explored. It requires incremental on-the-fly updates to a volumetric representation, fast training and rendering to satisfy realtime constraints and a small memory footprint for efficient transmission. If acheived, it can enhance user experience by enabling novel applications, e.g., 3D video conferencing and live volumetric video broadcast, among others. In this work, we propose a novel framework for QUantized and Efficient ENcoding (QUEEN) for streaming FVV using 3D Gaussian Splatting (3D-GS). QUEEN directly learns Gaussian attribute residuals between consecutive frames at each time-step without imposing any structural constraints on them, allowing for high quality reconstruction and generalizability. To efficiently store the residuals, we further propose a quantization-sparsity framework, which contains a learned latent-decoder for effectively quantizing attribute residuals other than Gaussian positions and a learned gating module to sparsify position residuals. We propose to use the Gaussian viewspace gradient difference vector as a signal to separate the static and dynamic content of the scene. It acts as a guide for effective sparsity learning and speeds up training. On diverse FVV benchmarks, QUEEN outperforms the state-of-the-art online FVV methods on all metrics. Notably, for several highly dynamic scenes, it reduces the model size to just 0.7 MB per frame while training in under 5 sec and rendering at ~350 FPS.", "keywords": "Free-viewpoint video;4D reconstruction;Streaming;Gaussian splatting;Compression;Quantization", "primary_area": "machine_vision", "supplementary_material": "/attachment/1acff36fff40bc8973e3df3060d86ce5d15d145e.zip", "author": "Sharath Girish;Tianye Li;Amrita Mazumdar;Abhinav Shrivastava;david luebke;Shalini De Mello", "authorids": "~Sharath_Girish1;~Tianye_Li2;~Amrita_Mazumdar1;~Abhinav_Shrivastava2;~david_luebke1;~Shalini_De_Mello1", "gender": ";M;;M;M;Not Specified", "homepage": "https://sharath-girish.github.io/;https://tianyeli.github.io/;https://www.amritamaz.net;http://abhinavsh.info;http://luebke.us;https://research.nvidia.com/person/shalini-de-mello", "dblp": "232/3030;179/2336;202/2236;65/10572;;206/7364", "google_scholar": "KRB9iksAAAAJ;ztIh4rgAAAAJ;umX575MAAAAJ;mIF9BowAAAAJ;https://scholar.google.com/citations?hl=en;xQM4BlMAAAAJ", "orcid": "0000-0003-4364-0262;0000-0002-9422-5782;;0000-0001-8928-8554;0000-0002-8206-5785;", "linkedin": ";;;;david-p-luebke;shalini-de-mello-02b8251/", "or_profile": "~Sharath_Girish1;~Tianye_Li2;~Amrita_Mazumdar1;~Abhinav_Shrivastava2;~david_luebke1;~Shalini_De_Mello1", "aff": "University of Maryland, College Park;NVIDIA;NVIDIA;Department of Computer Science, University of Maryland, College Park;NVIDIA Research;NVIDIA", "aff_domain": "umd.edu;nvidia.com;nvidia.com;cs.umd.edu;research.nvidia.com;nvidia.com", "position": "PhD student;Researcher;Researcher;Assistant Professor;Vice President of Research;Principal Researcher", "bibtex": "@inproceedings{\ngirish2024queen,\ntitle={{QUEEN}: {QU}antized Efficient {EN}coding for Streaming Free-viewpoint Videos},\nauthor={Sharath Girish and Tianye Li and Amrita Mazumdar and Abhinav Shrivastava and david luebke and Shalini De Mello},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7xhwE7VH4S}\n}", "github": "", "reviewers": "dVjq;L5E8;jnXV;9x86", "pdf_size": 16842755, "rating": "5;6;6;7", "confidence": "4;2;2;4", "soundness": "4;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;2;3", "wc_summary": "83;91;152;159", "wc_strengths": "54;55;43;110", "wc_weaknesses": "88;106;170;161", "wc_questions": "50;1;125;108", "wc_limitations": "28;18;66;18", "wc_review": "303;271;556;556", "wc_reply_reviewers": "0;0;0;96", "wc_reply_authors": "0;0;0;695", "reply_reviewers": "0;0;0;2", "reply_authors": "1;1;1;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.0, 1.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 121.25, 34.455587355318734 ], "wc_strengths_avg": [ 65.5, 26.119915773217954 ], "wc_weaknesses_avg": [ 131.25, 34.98124497498624 ], "wc_questions_avg": [ 71.0, 49.05609034564414 ], "wc_limitations_avg": [ 32.5, 19.767397400770797 ], "wc_review_avg": [ 421.5, 134.9749976847564 ], "wc_reply_reviewers_avg": [ 24.0, 41.569219381653056 ], "wc_reply_authors_avg": [ 173.75, 300.94382781509245 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Do_lYOA58PcJ:scholar.google.com/&scioq=QUEEN:+QUantized+Efficient+ENcoding+of+Dynamic+Gaussians+for+Streaming+Free-viewpoint+Videos&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "umd.edu;nvidia.com;nvidia.com;cs.umd.edu;research.nvidia.com;nvidia.com", "author_num": 6, "aff_unique_index": "0;1;1;2;1;1", "aff_unique_norm": "University of Maryland;NVIDIA;University of Maryland, College Park", "aff_unique_dep": ";NVIDIA Corporation;Department of Computer Science", "aff_unique_url": "https://www/umd.edu;https://www.nvidia.com;https://www/umd.edu", "aff_unique_abbr": "UMD;NVIDIA;UMD", "aff_campus_unique_index": "0;0", "aff_campus_unique": "College Park;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "7yqjVgWWxx", "title": "Your Absorbing Discrete Diffusion Secretly Models the Conditional Distributions of Clean Data", "track": "main", "status": "Reject", "tldr": "", "abstract": "Discrete diffusion models with absorbing processes have shown promise in language modeling. The key quantities to be estimated are the ratios between the marginal probabilities of two transitive states at all timesteps, called the concrete score. In this paper, we reveal that the concrete score in absorbing diffusion can be expressed as conditional probabilities of clean data, multiplied by a time-dependent scalar in an analytic form. Motivated by the finding, we propose reparameterized absorbing discrete diffusion (RADD), a dedicated diffusion model that characterizes the time-independent conditional probabilities. Besides its simplicity, RADD can reduce the number of function evaluations (NFEs) by caching the output of the time-independent network when the noisy sample remains unchanged in a sampling interval. Empirically, RADD is up to 3.5 times faster while consistently achieving a better performance than the strongest baseline.\nBuilt upon the new factorization of the concrete score, we further prove a surprising result that the exact likelihood of absorbing diffusion can be rewritten to a simple form (named denoise cross-entropy) and then estimated efficiently by the Monte Carlo method. The resulting approach also applies to the original parameterization of the concrete score. It significantly advances the state-of-the-art discrete diffusion on 5 zero-shot language modeling benchmarks (measured by perplexity) at the GPT-2 scale.", "keywords": "Discrete Diffusion Models;Diffusion Models;Language Modeling;Concrete Score;Score Entropy", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/5d8d36968f5d4da747e772dd95d3cdd0ae25bd67.zip", "author": "Jingyang Ou;Shen Nie;Kaiwen Xue;Fengqi Zhu;Jiacheng Sun;Zhenguo Li;Chongxuan Li", "authorids": "~Jingyang_Ou1;~Shen_Nie2;~Kaiwen_Xue1;~Fengqi_Zhu1;~Jiacheng_Sun1;~Zhenguo_Li1;~Chongxuan_Li1", "gender": "M;M;;;M;M;M", "homepage": "https://jingyangou.github.io/;https://github.com/NieShenRuc;;;;http://www.ee.columbia.edu/~zgli/;http://ml.cs.tsinghua.edu.cn/~chongxuan", "dblp": ";342/3413;;;165/5350;23/6479;161/9965", "google_scholar": "https://scholar.google.com/citations?hl=en;;;;;XboZC1AAAAAJ;UKMcQn4AAAAJ", "orcid": ";;;;;;0000-0002-0912-9076", "linkedin": ";;;;https://www.linkedin.cn/incareer/in/jiacheng-sun-ab622b131;;", "or_profile": "~Jingyang_Ou1;~Shen_Nie2;~Kaiwen_Xue1;~Fengqi_Zhu1;~Jiacheng_Sun1;~Zhenguo_Li1;~Chongxuan_Li1", "aff": "Renmin University of China;Renmin University of China;;;Huawei Noah's Ark Lab;Huawei Noah's Ark Lab;Renmin University of China", "aff_domain": "ruc.edu.cn;ruc.edu.cn;;;huawei.com;huawei.com;ruc.edu.cn", "position": "Undergrad student;PhD student;;;Senior Researcher;Principal Researcher;Associate Professor", "bibtex": "@misc{\nanonymous2024your,\ntitle={Your Absorbing Discrete Diffusion Secretly Models the Conditional Distributions of Clean Data},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=7yqjVgWWxx}\n}", "github": "", "project": "", "reviewers": "vbN2;9MAq;7sHz", "site": "https://openreview.net/forum?id=7yqjVgWWxx", "pdf_size": 472089, "rating": "6;6;6", "confidence": "4;4;2", "soundness": "3;3;2", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "68;15;82", "wc_strengths": "166;53;46", "wc_weaknesses": "350;193;102", "wc_questions": "95;1;102", "wc_limitations": "9;1;19", "wc_review": "688;263;351", "wc_reply_reviewers": "123;0;16", "wc_reply_authors": "384;0;18", "reply_reviewers": "2;0;1", "reply_authors": "3;1;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 55.0, 28.855964143772194 ], "wc_strengths_avg": [ 88.33333333333333, 54.99292883837178 ], "wc_weaknesses_avg": [ 215.0, 102.4337184069126 ], "wc_questions_avg": [ 66.0, 46.05069670120819 ], "wc_limitations_avg": [ 9.666666666666666, 7.363574011458175 ], "wc_review_avg": [ 434.0, 183.16295113004338 ], "wc_reply_reviewers_avg": [ 46.333333333333336, 54.60362218347383 ], "wc_reply_authors_avg": [ 134.0, 176.92936443677178 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10234403750394996924&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;1;1;0", "aff_unique_norm": "Renmin University of China;Huawei", "aff_unique_dep": ";Noah's Ark Lab", "aff_unique_url": "http://www.ruc.edu.cn;https://www.huawei.com", "aff_unique_abbr": "RUC;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Sub-optimal Experts mitigate Ambiguity in Inverse Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96405", "id": "7zzOcyT0hd", "proceeding": "", "pdf": "https://openreview.net/pdf?id=7zzOcyT0hd", "openreview": "https://openreview.net/forum?id=7zzOcyT0hd", "poster": "", "project": "", "author_site": "Riccardo Poiani, Curti Gabriele, Alberto Maria Metelli, Marcello Restelli", "tldr": "", "abstract": "Inverse Reinforcement Learning (IRL) deals with the problem of deducing a reward function that explains the behavior of an expert agent who is assumed to act *optimally* in an underlying unknown task. Recent works have studied the IRL problem from the perspective of recovering the *feasible reward set*, i.e., the class of reward functions that are compatible with a unique optimal expert. However, in several problems of interest it is possible to observe the behavior of multiple experts with different degree of optimality (e.g., racing drivers whose skills ranges from amateurs to professionals). For this reason, in this work, we focus on the reconstruction of the feasible reward set when, in addition to demonstrations from the optimal expert, we observe the behavior of multiple *sub-optimal experts*. Given this problem, we first study the theoretical properties showing that the presence of multiple sub-optimal experts, in addition to the optimal one, can significantly shrink the set of compatible rewards, ultimately mitigating the inherent ambiguity of IRL.\nFurthermore, we study the statistical complexity of estimating the feasible reward set with a generative model and analyze a uniform sampling algorithm that turns out to be minimax optimal whenever the sub-optimal experts' performance level is sufficiently close to that of the optimal expert.", "keywords": "Inverse Reinforcement Learning;Sub-optimal Experts;Sample Complexity", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Riccardo Poiani;Curti Gabriele;Alberto Maria Metelli;Marcello Restelli", "authorids": "~Riccardo_Poiani3;~Curti_Gabriele1;~Alberto_Maria_Metelli2;~Marcello_Restelli1", "gender": "M;M;M;M", "homepage": ";;https://albertometelli.github.io/;http://home.deib.polimi.it/restelli/", "dblp": "268/8198;;209/4941;64/1011", "google_scholar": "WQWOAkkAAAAJ;;R31IsPwAAAAJ;https://scholar.google.com.tw/citations?user=xdgxRiEAAAAJ", "orcid": ";;0000-0002-3424-5212;0000-0002-6322-1076", "linkedin": ";gabrielecurti/;;", "or_profile": "~Riccardo_Poiani3;~Curti_Gabriele1;~Alberto_Maria_Metelli2;~Marcello_Restelli1", "aff": "Polytechnic Institute of Milan;;Politecnico di Milano;Politecnico di Milano", "aff_domain": "polimi.it;;polimi.it;polimi.it", "position": "PhD student;;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\npoiani2024suboptimal,\ntitle={Sub-optimal Experts mitigate Ambiguity in Inverse Reinforcement Learning},\nauthor={Riccardo Poiani and Curti Gabriele and Alberto Maria Metelli and Marcello Restelli},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=7zzOcyT0hd}\n}", "github": "", "reviewers": "KsdM;6BJu;zNGu;Euvz", "pdf_size": 603546, "rating": "6;6;7;7", "confidence": "3;3;4;2", "soundness": "3;3;4;3", "novelty": "3;3;3;3", "presentation": "3;4;3;3", "wc_summary": "78;93;112;101", "wc_strengths": "89;102;48;202", "wc_weaknesses": "184;82;108;53", "wc_questions": "58;103;87;1", "wc_limitations": "25;10;25;1", "wc_review": "434;390;380;358", "wc_reply_reviewers": "11;21;22;12", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 96.0, 12.389511693363866 ], "wc_strengths_avg": [ 110.25, 56.596709268295804 ], "wc_weaknesses_avg": [ 106.75, 48.65888921872344 ], "wc_questions_avg": [ 62.25, 38.86756359742658 ], "wc_limitations_avg": [ 15.25, 10.256095748383007 ], "wc_review_avg": [ 390.5, 27.654113618049667 ], "wc_reply_reviewers_avg": [ 16.5, 5.024937810560445 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:BB-mf6o9lMgJ:scholar.google.com/&scioq=Sub-optimal+Experts+mitigate+Ambiguity+in+Inverse+Reinforcement+Learning&hl=en&as_sdt=0,3", "gs_version_total": 0, "email": "polimi.it;;polimi.it;polimi.it", "author_num": 4, "aff_unique_index": "0;1;1", "aff_unique_norm": "Polytechnic Institute of Milan;Politecnico di Milano", "aff_unique_dep": ";", "aff_unique_url": "https://www.polimi.it/;https://www.polimi.it", "aff_unique_abbr": "Politecnico di Milano;Polimi", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Italy" }, { "title": "SwitchHead: Accelerating Transformers with Mixture-of-Experts Attention", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96404", "id": "80SSl69GAz", "proceeding": "", "pdf": "https://openreview.net/pdf?id=80SSl69GAz", "openreview": "https://openreview.net/forum?id=80SSl69GAz", "poster": "", "project": "", "author_site": "R\u00f3bert Csord\u00e1s, Piotr Pi\u0119kos, Kazuki Irie, J\u00fcrgen Schmidhuber", "tldr": "", "abstract": "Despite many recent works on Mixture of Experts (MoEs) for resource-efficient Transformer language models, existing methods mostly focus on MoEs for feedforward layers. Previous attempts at extending MoE to the self-attention layer fail to match the performance of the parameter-matched baseline. Our novel SwitchHead is an effective MoE method for the attention layer that successfully reduces both the compute and memory requirements, achieving wall-clock speedup, while matching the language modeling performance of the baseline Transformer. Our novel MoE mechanism allows SwitchHead to compute up to 8 times fewer attention matrices than the standard Transformer. SwitchHead can also be combined with MoE feedforward layers, resulting in fully-MoE \"SwitchAll\" Transformers. For our 262M parameter model trained on C4, SwitchHead matches the perplexity of standard models with only 44% compute and 27% memory usage. Zero-shot experiments on downstream tasks confirm the performance of SwitchHead, e.g., achieving more than 3.5% absolute improvements on BliMP compared to the baseline with an equal compute resource.", "keywords": "MoE;mixture of experts;attention;transformers", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/5f5f3cba25e6de64b48c9cb16679c6978eb07883.zip", "author": "R\u00f3bert Csord\u00e1s;Piotr Pi\u0119kos;Kazuki Irie;J\u00fcrgen Schmidhuber", "authorids": "~R\u00f3bert_Csord\u00e1s1;~Piotr_Pi\u0119kos2;~Kazuki_Irie1;~J\u00fcrgen_Schmidhuber1", "gender": "M;M;;M", "homepage": "https://robertcsordas.github.io/;https://piotrpiekos.github.io;https://sites.harvard.edu/kazuki-irie/;http://people.idsia.ch/~juergen/", "dblp": "166/4773.html;;148/9667;s/JurgenSchmidhuber", "google_scholar": "av1lplwAAAAJ;;https://scholar.google.de/citations?user=-gZ-BdwAAAAJ;https://scholar.google.ch/citations?user=gLnCTgIAAAAJ", "orcid": ";;0000-0003-0923-691X;", "linkedin": "robertcsordas/;;;", "or_profile": "~R\u00f3bert_Csord\u00e1s1;~Piotr_Pi\u0119kos2;~Kazuki_Irie1;~J\u00fcrgen_Schmidhuber1", "aff": "IDSIA;King Abdullah University of Science and Technology;Harvard University;IDSIA", "aff_domain": "idsia.ch;kaust.edu.sa;fas.harvard.edu;idsia.ch", "position": "Postdoc;PhD student;Postpostdoc;Scientific Director", "bibtex": "@inproceedings{\ncsord{\\'a}s2024switchhead,\ntitle={SwitchHead: Accelerating Transformers with Mixture-of-Experts Attention},\nauthor={R{\\'o}bert Csord{\\'a}s and Piotr Pi{\\k{e}}kos and Kazuki Irie and J{\\\"u}rgen Schmidhuber},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=80SSl69GAz}\n}", "github": "", "reviewers": "HTZS;Q7cF;YarL;58yB", "pdf_size": 948331, "rating": "5;6;6;7", "confidence": "4;4;4;3", "soundness": "3;3;3;4", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "80;155;76;119", "wc_strengths": "98;100;37;67", "wc_weaknesses": "185;27;55;83", "wc_questions": "353;423;151;2", "wc_limitations": "185;6;9;5", "wc_review": "901;711;328;276", "wc_reply_reviewers": "261;44;38;7", "wc_reply_authors": "224;39;27;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 107.5, 32.159757461772 ], "wc_strengths_avg": [ 75.5, 25.79244075305786 ], "wc_weaknesses_avg": [ 87.5, 59.6720202439971 ], "wc_questions_avg": [ 232.25, 166.2699236181938 ], "wc_limitations_avg": [ 51.25, 77.23462630193792 ], "wc_review_avg": [ 554.0, 261.446935342528 ], "wc_reply_reviewers_avg": [ 87.5, 101.14964162071955 ], "wc_reply_authors_avg": [ 72.5, 88.60163655373415 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17263232493020117137&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "idsia.ch;kaust.edu.sa;fas.harvard.edu;idsia.ch", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Institute of Digital Technologies;King Abdullah University of Science and Technology;Harvard University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.idsia.ch;https://www.kast.kau.edu.sa;https://www.harvard.edu", "aff_unique_abbr": "IDSIA;KAUST;Harvard", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;0", "aff_country_unique": "Switzerland;Saudi Arabia;United States" }, { "title": "DMPlug: A Plug-in Method for Solving Inverse Problems with Diffusion Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96403", "id": "81IFFsfQUj", "proceeding": "", "pdf": "https://openreview.net/pdf?id=81IFFsfQUj", "openreview": "https://openreview.net/forum?id=81IFFsfQUj", "poster": "", "project": "", "author_site": "Hengkang Wang, Xu Zhang, Taihui Li, Yuxiang Wan, Tiancong Chen, Ju Sun", "tldr": "", "abstract": "Pretrained diffusion models (DMs) have recently been popularly used in solving inverse problems (IPs). The existing methods mostly interleave iterative steps in the reverse diffusion process and iterative steps to bring the iterates closer to satisfying the measurement constraint. However, such interleaving methods struggle to produce final results that look like natural objects of interest (i.e., manifold feasibility) and fit the measurement (i.e., measurement feasibility), especially for nonlinear IPs. Moreover, their capabilities to deal with noisy IPs with unknown types and levels of measurement noise are unknown. In this paper, we advocate viewing the reverse process in DMs as a function and propose a novel plug-in method for solving IPs using pretrained DMs, dubbed DMPlug. DMPlug addresses the issues of manifold feasibility and measurement feasibility in a principled manner, and also shows great potential for being robust to unknown types and levels of noise. Through extensive experiments across various IP tasks, including two linear and three nonlinear IPs, we demonstrate that DMPlug consistently outperforms state-of-the-art methods, often by large margins especially for nonlinear IPs.", "keywords": "Inverse problems;diffusion models", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/c794cfb39365a00cc94bad920e0afe92ba84473b.zip", "author": "Hengkang Wang;Xu Zhang;Taihui Li;Yuxiang Wan;Tiancong Chen;Ju Sun", "authorids": "~Hengkang_Wang1;~Xu_Zhang1;~Taihui_Li1;~Yuxiang_Wan1;~Tiancong_Chen1;~Ju_Sun2", "gender": "M;M;M;M;M;", "homepage": ";https://xu-zhang-1987.github.io;https://taihui.github.io/;;https://sites.google.com/view/tiancong-chen;http://www.sunju.org", "dblp": "175/7774;98/5660-22;174/3814.html;;242/8507;31/6843.html", "google_scholar": "APqDZvUAAAAJ;efE70pEAAAAJ;1zrHEeYAAAAJ;;Y97x5I8AAAAJ;V6FaD-UAAAAJ", "orcid": ";;0000-0002-3758-8923;;;0000-0002-2017-5903", "linkedin": "hk-wang/;xu-zhang-22008054;;yuxiang-wan-31518921a/;;", "or_profile": "~Hengkang_Wang1;~Xu_Zhang1;~Taihui_Li1;~Yuxiang_Wan1;~Tiancong_Chen1;~Ju_Sun1", "aff": "University of Minnesota, Twin Cities;Amazon;University of Minnesota, Minneapolis;University of Minnesota - Twin Cities;University of Minnesota, Minneapolis;University of Minnesota, Twin Cities", "aff_domain": "umn.edu;amazon.com;umn.edu;umn.edu;umn.edu;umn.edu", "position": "PhD student;Applied Scientist;PhD student;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nwang2024dmplug,\ntitle={{DMP}lug: A Plug-in Method for Solving Inverse Problems with Diffusion Models},\nauthor={Hengkang Wang and Xu Zhang and Taihui Li and Yuxiang Wan and Tiancong Chen and Ju Sun},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=81IFFsfQUj}\n}", "github": "", "reviewers": "uJfQ;y52g;wUnV;yL1C", "pdf_size": 9571428, "rating": "4;5;6;6", "confidence": "4;3;4;5", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "2;2;3;2", "wc_summary": "63;57;47;154", "wc_strengths": "85;61;64;82", "wc_weaknesses": "160;173;181;212", "wc_questions": "152;68;31;160", "wc_limitations": "10;13;5;9", "wc_review": "470;372;328;617", "wc_reply_reviewers": "0;170;213;279", "wc_reply_authors": "96;150;194;424", "reply_reviewers": "0;1;3;2", "reply_authors": "3;4;3;5", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 80.25, 42.96146529158427 ], "wc_strengths_avg": [ 73.0, 10.606601717798213 ], "wc_weaknesses_avg": [ 181.5, 19.13765920900464 ], "wc_questions_avg": [ 102.75, 54.906169962946784 ], "wc_limitations_avg": [ 9.25, 2.8613807855648994 ], "wc_review_avg": [ 446.75, 110.9219883521748 ], "wc_reply_reviewers_avg": [ 165.5, 103.13704475114652 ], "wc_reply_authors_avg": [ 216.0, 125.00399993600205 ], "reply_reviewers_avg": [ 1.5, 1.118033988749895 ], "reply_authors_avg": [ 3.75, 0.82915619758885 ], "replies_avg": [ 32, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.42640143271122083, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16628627236132922059&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "umn.edu;amazon.com;umn.edu;umn.edu;umn.edu;umn.edu", "author_num": 6, "aff_unique_index": "0;1;0;0;0;0", "aff_unique_norm": "University of Minnesota;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.minnesota.edu;https://www.amazon.com", "aff_unique_abbr": "UMN;Amazon", "aff_campus_unique_index": "0;2;0;2;0", "aff_campus_unique": "Twin Cities;;Minneapolis", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Twin-Merging: Dynamic Integration of Modular Expertise in Model Merging", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96402", "id": "81YIt63TTn", "proceeding": "", "pdf": "https://openreview.net/pdf?id=81YIt63TTn", "openreview": "https://openreview.net/forum?id=81YIt63TTn", "poster": "", "project": "", "author_site": "Zhenyi Lu, Chenghao Fan, Wei Wei, Xiaoye Qu, Dangyang Chen, Yu Cheng", "tldr": "", "abstract": "In the era of large language models, model merging is a promising way to combine multiple task-specific models into a single multitask model without extra training. \nHowever, two challenges remain: (a) interference between different models and (b) heterogeneous data during testing. Traditional model merging methods often show significant performance gaps compared to fine-tuned models due to these issues. \nAdditionally, a one-size-fits-all model lacks flexibility for diverse test data, leading to performance degradation. \nWe show that both shared and exclusive task-specific knowledge are crucial for merging performance, but directly merging exclusive knowledge hinders overall performance. \nIn view of this, we propose Twin-Merging, a method that encompasses two principal stages: \n(1) modularizing knowledge into shared and exclusive components, with compression to reduce redundancy and enhance efficiency; \n(2) dynamically merging shared and task-specific knowledge based on the input. \nThis approach narrows the performance gap between merged and fine-tuned models and improves adaptability to heterogeneous data. \nExtensive experiments on $20$ datasets for both language and vision tasks demonstrate the effectiveness of our method, showing an average improvement of $28.34\\%$ in absolute normalized score for discriminative tasks and even surpassing the fine-tuned upper bound on the generative tasks.", "keywords": "Model Merging;Fusion of Experts;Efficiency;Large Language Models", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/09729a0976ca4e15f437af9701afc4a3528186b4.zip", "author": "Zhenyi Lu;Chenghao Fan;Wei Wei;Xiaoye Qu;Dangyang Chen;Yu Cheng", "authorids": "~Zhenyi_Lu2;~Chenghao_Fan1;~Wei_Wei14;~Xiaoye_Qu1;~Dangyang_Chen1;~Yu_Cheng1", "gender": "M;M;M;M;M;M", "homepage": "https://orcid.org/0009-0002-8381-3236;;https://www.eric-weiwei.com;;;https://ych133.github.io", "dblp": "307/6180;313/1709.html;24/4105-2;229/8206;327/3353;96/3060-1.html", "google_scholar": ";https://scholar.google.com/citations?hl=en;https://scholar.google.com.sg/citations?hl=en;rT3hqdcAAAAJ;;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0003-4488-0102;;;", "linkedin": ";;;%E6%99%93%E6%99%94-xiaoye-qu-%E7%9E%BF-8b9a0a133/;;chengyu05/", "or_profile": "~Zhenyi_Lu2;~Chenghao_Fan1;~Wei_Wei14;~Xiaoye_Qu1;~Dangyang_Chen1;~Yu_Cheng1", "aff": "Huazhong University of Science and Technology;Huazhong University of Science and Technology;Huazhong University of Science and Technology;Shanghai Artificial Intelligence Laboratory;Pingan Technology;The Chinese University of Hong Kong", "aff_domain": "hust.edu.cn;hust.edu.cn;hust.edu.cn;pjlab.org.cn;pingan.com.cn;cuhk.edu.hk", "position": "MS student;MS student;Full Professor;Researcher;CTO;Associate Professor", "bibtex": "@inproceedings{\nlu2024twinmerging,\ntitle={Twin-Merging: Dynamic Integration of Modular Expertise in Model Merging},\nauthor={Zhenyi Lu and Chenghao Fan and Wei Wei and Xiaoye Qu and Dangyang Chen and Yu Cheng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=81YIt63TTn}\n}", "github": "", "reviewers": "vkK5;LqLU;2gMR;Q3uE", "pdf_size": 920198, "rating": "5;6;6;6", "confidence": "5;4;4;3", "soundness": "3;3;3;2", "novelty": "3;3;3;2", "presentation": "3;2;4;3", "wc_summary": "62;82;48;74", "wc_strengths": "39;56;50;30", "wc_weaknesses": "105;277;236;289", "wc_questions": "4;46;166;194", "wc_limitations": "1;15;10;18", "wc_review": "211;476;510;605", "wc_reply_reviewers": "0;301;153;178", "wc_reply_authors": "125;1210;358;567", "reply_reviewers": "0;4;1;1", "reply_authors": "2;5;2;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 66.5, 12.835497652993435 ], "wc_strengths_avg": [ 43.75, 10.0093706095838 ], "wc_weaknesses_avg": [ 226.75, 72.98758456066346 ], "wc_questions_avg": [ 102.5, 79.52829685086938 ], "wc_limitations_avg": [ 11.0, 6.442049363362563 ], "wc_review_avg": [ 450.5, 146.13435598790588 ], "wc_reply_reviewers_avg": [ 158.0, 107.04905417611124 ], "wc_reply_authors_avg": [ 565.0, 403.8805516486279 ], "reply_reviewers_avg": [ 1.5, 1.5 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 48, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5883076762358660219&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "hust.edu.cn;hust.edu.cn;hust.edu.cn;pjlab.org.cn;pingan.com.cn;cuhk.edu.hk", "author_num": 6, "aff_unique_index": "0;0;0;1;2;3", "aff_unique_norm": "Huazhong University of Science and Technology;Shanghai Artificial Intelligence Laboratory;PingAn Technology;Chinese University of Hong Kong", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.hust.edu.cn;http://www.shailab.org/;https://www.pingan.com;https://www.cuhk.edu.hk", "aff_unique_abbr": "HUST;Shanghai AI Lab;;CUHK", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Identifiability Analysis of Linear ODE Systems with Hidden Confounders", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96401", "id": "8271eFxojN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=8271eFxojN", "openreview": "https://openreview.net/forum?id=8271eFxojN", "poster": "/media/PosterPDFs/NeurIPS%202024/96401.png?t=1733462288.7331042", "project": "", "author_site": "Yuanyuan Wang, Biwei Huang, Wei Huang, Xi Geng, Mingming Gong", "tldr": "", "abstract": "The identifiability analysis of linear Ordinary Differential Equation (ODE) systems is a necessary prerequisite for making reliable causal inferences about these systems. While identifiability has been well studied in scenarios where the system is fully observable, the conditions for identifiability remain unexplored when latent variables interact with the system. This paper aims to address this gap by presenting a systematic analysis of identifiability in linear ODE systems incorporating hidden confounders. Specifically, we investigate two cases of such systems. In the first case, latent confounders exhibit no causal relationships, yet their evolution adheres to specific functional forms, such as polynomial functions of time $t$. Subsequently, we extend this analysis to encompass scenarios where hidden confounders exhibit causal dependencies, with the causal structure of latent variables described by a Directed Acyclic Graph (DAG). The second case represents a more intricate variation of the first case, prompting a more comprehensive identifiability analysis. Accordingly, we conduct detailed identifiability analyses of the second system under various observation conditions, including both continuous and discrete observations from single or multiple trajectories. To validate our theoretical results, we perform a series of simulations, which support and substantiate our findings.", "keywords": "Linear ODEs;Identifiability analysis;Hidden confounders;Causality", "primary_area": "causal_inference", "supplementary_material": "/attachment/2d0dfc4da566f56f537954f5b4c34c80fde6dcc7.zip", "author": "Yuanyuan Wang;Biwei Huang;Wei Huang;Xi Geng;Mingming Gong", "authorids": "~Yuanyuan_Wang5;~Biwei_Huang1;~Wei_Huang8;~Xi_Geng1;~Mingming_Gong1", "gender": "F;F;F;;M", "homepage": ";;https://sites.google.com/view/w-huang/home;;https://mingming-gong.github.io/", "dblp": "95/494;165/3288;;;98/8479", "google_scholar": "https://scholar.google.com.au/citations?user=gT6kOFQAAAAJ;;https://scholar.google.com.au/citations?user=qOd4sB0AAAAJ;;https://scholar.google.com.au/citations?user=6BmiCJIAAAAJ", "orcid": ";;0000-0002-5691-7411;;0000-0001-7147-5589", "linkedin": ";;;;", "or_profile": "~Yuanyuan_Wang5;~Biwei_Huang1;~Wei_Huang8;~Xi_Geng1;~Mingming_Gong1", "aff": "University of Melbourne;University of California, San Diego;University of Melbourne;;University of Melbourne", "aff_domain": "unimelb.edu.au;ucsd.edu;unimelb.edu.au;;unimelb.edu.au", "position": "PhD student;Assistant Professor;Assistant Professor;;Assistant Professor", "bibtex": "@inproceedings{\nwang2024identifiability,\ntitle={Identifiability Analysis of Linear {ODE} Systems with Hidden Confounders},\nauthor={Yuanyuan Wang and Biwei Huang and Wei Huang and Xi Geng and Mingming Gong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=8271eFxojN}\n}", "github": "", "reviewers": "1wF2;LnTR;Te42", "pdf_size": 765899, "rating": "5;6;6", "confidence": "2;2;4", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "3;2;3", "wc_summary": "129;90;91", "wc_strengths": "37;54;96", "wc_weaknesses": "134;81;66", "wc_questions": "152;25;20", "wc_limitations": "21;4;10", "wc_review": "473;254;283", "wc_reply_reviewers": "420;9;38", "wc_reply_authors": "1810;26;480", "reply_reviewers": "3;1;1", "reply_authors": "6;2;3", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 2.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 103.33333333333333, 18.153665072253467 ], "wc_strengths_avg": [ 62.333333333333336, 24.796953217863056 ], "wc_weaknesses_avg": [ 93.66666666666667, 29.169999809545573 ], "wc_questions_avg": [ 65.66666666666667, 61.081002684049714 ], "wc_limitations_avg": [ 11.666666666666666, 7.039570693980959 ], "wc_review_avg": [ 336.6666666666667, 97.1264925525243 ], "wc_reply_reviewers_avg": [ 155.66666666666666, 187.28647100691023 ], "wc_reply_authors_avg": [ 772.0, 757.0169526943678 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.9428090415820634 ], "reply_authors_avg": [ 3.6666666666666665, 1.699673171197595 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:i-7GTIFhWo8J:scholar.google.com/&scioq=Identifiability+Analysis+of+Linear+ODE+Systems+with+Hidden+Confounders&hl=en&as_sdt=0,10", "gs_version_total": 3, "email": "unimelb.edu.au;ucsd.edu;unimelb.edu.au;;unimelb.edu.au", "author_num": 5, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Melbourne;University of California, San Diego", "aff_unique_dep": ";", "aff_unique_url": "https://www.unimelb.edu.au;https://www.ucsd.edu", "aff_unique_abbr": "UniMelb;UCSD", "aff_campus_unique_index": "1", "aff_campus_unique": ";San Diego", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "Australia;United States" }, { "title": "Adversarially Trained Weighted Actor-Critic for Safe Offline Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96400", "id": "82Ndsr4OS6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=82Ndsr4OS6", "openreview": "https://openreview.net/forum?id=82Ndsr4OS6", "poster": "/media/PosterPDFs/NeurIPS%202024/96400.png?t=1731129651.4379447", "project": "", "author_site": "Honghao Wei, Xiyue Peng, Arnob Ghosh, Xin Liu", "tldr": "", "abstract": "We propose WSAC (Weighted Safe Actor-Critic), a novel algorithm for Safe Offline Reinforcement Learning (RL) under functional approximation, which can robustly optimize policies to improve upon an arbitrary reference policy with limited data coverage. WSAC is designed as a two-player Stackelberg game to optimize a refined objective function. The actor optimizes the policy against two adversarially trained value critics with small importance-weighted Bellman errors, which focus on scenarios where the actor's performance is inferior to the reference policy. In theory, we demonstrate that when the actor employs a no-regret optimization oracle, WSAC achieves a number of guarantees: $(i)$ For the first time in the safe offline RL setting, we establish that WSAC can produce a policy that outperforms {\\bf any} reference policy while maintaining the same level of safety, which is critical to designing a safe algorithm for offline RL. $(ii)$ WSAC achieves the optimal statistical convergence rate of $1/\\sqrt{N}$ to the reference policy, where $N$ is the size of the offline dataset. $(iii)$ We theoretically show that WSAC guarantees a safe policy improvement across a broad range of hyperparameters that control the degree of pessimism, indicating its practical robustness. Additionally, we offer a practical version of WSAC and compare it with existing state-of-the-art safe offline RL algorithms in several continuous control environments. WSAC outperforms all baselines across a range of tasks, supporting the theoretical results.", "keywords": "RL;safe-RL;Offline RL", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/f2db834f0125160d64c81a081fd6b10247aeea7a.zip", "author": "Honghao Wei;Xiyue Peng;Arnob Ghosh;Xin Liu", "authorids": "~Honghao_Wei2;~Xiyue_Peng1;~Arnob_Ghosh3;~Xin_Liu14", "gender": "M;;M;", "homepage": "https://honghaow.me;https://github.com/pxyWaterMoon;https://sites.google.com/site/arnob008/;", "dblp": ";347/6307;34/8285;76/1820-49", "google_scholar": ";;aw2d6pQAAAAJ;y0U4EF4AAAAJ", "orcid": "0000-0002-1131-326X;;;", "linkedin": "honghao-wei-19565b155/;;;", "or_profile": "~Honghao_Wei2;~Xiyue_Peng1;~Arnob_Ghosh3;~Xin_Liu14", "aff": "Washington State University ;ShanghaiTech University;New Jersey Institute of Technology;ShanghaiTech University", "aff_domain": "wsu.edu;shanghaitech.edu.cn;njit.edu;shanghaitech.edu.cm", "position": "Assistant Professor;Undergrad student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nwei2024adversarially,\ntitle={Adversarially Trained Weighted Actor-Critic for Safe Offline Reinforcement Learning},\nauthor={Honghao Wei and Xiyue Peng and Arnob Ghosh and Xin Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=82Ndsr4OS6}\n}", "github": "", "reviewers": "HDvF;U4TL;1bfE;Tiai", "pdf_size": 3186124, "rating": "5;6;6;7", "confidence": "2;3;3;3", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "49;44;171;241", "wc_strengths": "29;73;128;55", "wc_weaknesses": "168;7;171;54", "wc_questions": "24;327;93;78", "wc_limitations": "1;14;20;53", "wc_review": "271;465;583;481", "wc_reply_reviewers": "352;244;57;22", "wc_reply_authors": "1532;416;203;14", "reply_reviewers": "2;2;1;1", "reply_authors": "3;3;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 126.25, 83.52058129586982 ], "wc_strengths_avg": [ 71.25, 36.306851970392586 ], "wc_weaknesses_avg": [ 100.0, 71.46677549742957 ], "wc_questions_avg": [ 130.5, 116.31530423809242 ], "wc_limitations_avg": [ 22.0, 19.170289512680814 ], "wc_review_avg": [ 450.0, 112.8228700219951 ], "wc_reply_reviewers_avg": [ 168.75, 135.33915730489827 ], "wc_reply_authors_avg": [ 541.25, 589.4231820178097 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:HwmWVXYg3zIJ:scholar.google.com/&scioq=Adversarially+Trained+Weighted+Actor-Critic+for+Safe+Offline+Reinforcement+Learning&hl=en&as_sdt=0,33", "gs_version_total": 2, "email": "wsu.edu;shanghaitech.edu.cn;njit.edu;shanghaitech.edu.cm", "author_num": 4, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "Washington State University;ShanghaiTech University;New Jersey Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://wsu.edu;https://www.shanghaitech.edu.cn;https://www.njit.edu", "aff_unique_abbr": "WSU;ShanghaiTech;NJIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "United States;China" }, { "title": "Rethinking The Training And Evaluation of Rich-Context Layout-to-Image Generation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96399", "id": "83e3DPVrFC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=83e3DPVrFC", "openreview": "https://openreview.net/forum?id=83e3DPVrFC", "poster": "/media/PosterPDFs/NeurIPS%202024/96399.png?t=1731669831.8222725", "project": "", "author_site": "Jiaxin Cheng, ZIXU ZHAO, Tong He, Tianjun Xiao, Yicong Zhou, Zheng Zhang", "tldr": "", "abstract": "Recent advancements in generative models have significantly enhanced their capacity for image generation, enabling a wide range of applications such as image editing, completion and video editing. A specialized area within generative modeling is layout-to-image (L2I) generation, where predefined layouts of objects guide the generative process. In this study, we introduce a novel regional cross-attention module tailored to enrich layout-to-image generation. This module notably improves the representation of layout regions, particularly in scenarios where existing methods struggle with highly complex and detailed textual descriptions. Moreover, while current open-vocabulary L2I methods are trained in an open-set setting, their evaluations often occur in closed-set environments. To bridge this gap, we propose two metrics to assess L2I performance in open-vocabulary scenarios. Additionally, we conduct a comprehensive user study to validate the consistency of these metrics with human preferences.", "keywords": "Layout-to-Image; Diffusion Model; Computer Vision", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Jiaxin Cheng;Zixu Zhao;Tong He;Tianjun Xiao;Zheng Zhang;Yicong Zhou", "authorids": "~Jiaxin_Cheng1;~Zixu_Zhao1;~Tong_He5;~Tianjun_Xiao1;~Zheng_Zhang1;~Yicong_Zhou1", "gender": "M;M;M;M;M;M", "homepage": ";;https://hetong007.github.io/;http://tianjunxiao.com/;https://shanghai.nyu.edu/academics/faculty/directory/zheng-zhang;https://www.fst.um.edu.mo/personal/yicongzhou/", "dblp": "230/4175;;02/1554-2;;;51/8204.html", "google_scholar": "cPeV9YIAAAAJ;https://scholar.google.com.hk/citations?user=GSQY0CEAAAAJ;hV5D8GYAAAAJ;DaKJ9pAAAAAJ;https://scholar.google.com.hk/citations?user=k0KiE4wAAAAJ;Fe5Ru58AAAAJ", "orcid": ";0000-0001-9399-3475;;;;0000-0002-4487-6384", "linkedin": ";;;;;", "or_profile": "~Jiaxin_Cheng1;~Zixu_Zhao1;~Tong_He5;~Tianjun_Xiao1;~Zheng_Zhang1;~Yicong_Zhou1", "aff": "USC/ISI;Amazon;Amazon;Amazon;Amazon;University of Macau", "aff_domain": "isi.edu;amazon.com;amazon.com;amazon.com;amazon.com;um.edu.mo", "position": "PhD student;Researcher;Researcher;Senior Applied Scientist;Senior Principal Scientist;Full Professor", "bibtex": "@inproceedings{\ncheng2024rethinking,\ntitle={Rethinking The Training And Evaluation of Rich-Context Layout-to-Image Generation},\nauthor={Jiaxin Cheng and Zixu Zhao and Tong He and Tianjun Xiao and Yicong Zhou and Zheng Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=83e3DPVrFC}\n}", "github": "", "reviewers": "SX1k;6Uy3;ZVVH;2Dp7", "pdf_size": 2995738, "rating": "5;5;6;6", "confidence": "4;3;4;4", "soundness": "3;1;3;3", "novelty": "3;1;3;3", "presentation": "2;2;3;2", "wc_summary": "65;19;38;44", "wc_strengths": "37;29;40;49", "wc_weaknesses": "172;128;224;86", "wc_questions": "4;48;12;212", "wc_limitations": "8;3;17;1", "wc_review": "286;227;331;392", "wc_reply_reviewers": "31;48;16;64", "wc_reply_authors": "54;67;0;66", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;1;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 41.5, 16.408839081421938 ], "wc_strengths_avg": [ 38.75, 7.1545440106270926 ], "wc_weaknesses_avg": [ 152.5, 51.27133702177075 ], "wc_questions_avg": [ 69.0, 84.20807562223472 ], "wc_limitations_avg": [ 7.25, 6.179603547154137 ], "wc_review_avg": [ 309.0, 60.46900032247929 ], "wc_reply_reviewers_avg": [ 39.75, 18.005207580030838 ], "wc_reply_authors_avg": [ 46.75, 27.471576219794887 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13665696897489600718&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "isi.edu;amazon.com;amazon.com;amazon.com;amazon.com;um.edu.mo", "author_num": 6, "aff_unique_index": "0;1;1;1;1;2", "aff_unique_norm": "University of Southern California;Amazon;University of Macau", "aff_unique_dep": ";Amazon.com, Inc.;", "aff_unique_url": "https://isi.usc.edu;https://www.amazon.com;https://www.um.edu.mo", "aff_unique_abbr": "USC;Amazon;UM", "aff_campus_unique_index": "0;2", "aff_campus_unique": "ISI;;Macau SAR", "aff_country_unique_index": "0;0;0;0;0;1", "aff_country_unique": "United States;China" }, { "title": "Learning from Pattern Completion: Self-supervised Controllable Generation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96398", "id": "83pV20DD2s", "proceeding": "", "pdf": "https://openreview.net/pdf?id=83pV20DD2s", "openreview": "https://openreview.net/forum?id=83pV20DD2s", "poster": "/media/PosterPDFs/NeurIPS%202024/96398.png?t=1729659492.2824526", "project": "", "author_site": "Zhiqiang Chen, Guofan Fan, Jinying Gao, Lei Ma, Bo Lei, Tiejun Huang, Shan Yu", "tldr": "", "abstract": "The human brain exhibits a strong ability to spontaneously associate different visual attributes of the same or similar visual scene, such as associating sketches and graffiti with real-world visual objects, usually without supervising information. In contrast, in the field of artificial intelligence, controllable generation methods like ControlNet heavily rely on annotated training datasets such as depth maps, semantic segmentation maps, and poses, which limits the method\u2019s scalability. Inspired by the neural mechanisms that may contribute to the brain\u2019s associative power, specifically the cortical modularization and hippocampal pattern completion, here we propose a self-supervised controllable generation (SCG) framework. Firstly, we introduce an equivariance constraint to promote inter-module independence and intra-module correlation in a modular autoencoder network, thereby achieving functional specialization. Subsequently, based on these specialized modules, we employ a self-supervised pattern completion approach for controllable generation training. Experimental results demonstrate that the proposed modular autoencoder effectively achieves functional specialization, including the modular processing of color, brightness, and edge detection, and exhibits brain-like features including orientation selectivity, color antagonism, and center-surround receptive fields. Through self-supervised training, associative generation capabilities spontaneously emerge in SCG, demonstrating excellent zero-shot generalization ability to various tasks such as superresolution, dehaze and associative or conditional generation on painting, sketches, and ancient graffiti. Compared to the previous representative method ControlNet, our proposed approach not only demonstrates superior robustness in more challenging high-noise scenarios but also possesses more promising scalability potential due to its self-supervised manner. Codes are released on Github and Gitee.", "keywords": "Self-supervised Controllable Generation\uff0cModularization\uff0cPattern Completion\uff0cAssociative Generation", "primary_area": "generative_models", "supplementary_material": "/attachment/53b2bfda2e161349d013514daecb444823fbdadd.zip", "author": "Zhiqiang Chen;Guofan Fan;Jinying Gao;Lei Ma;Bo Lei;Tiejun Huang;Shan Yu", "authorids": "~Zhiqiang_Chen5;~Guofan_Fan1;~Jinying_Gao1;~Lei_Ma3;~Bo_Lei1;~Tiejun_Huang1;~Shan_Yu1", "gender": "M;M;F;Not Specified;M;M;M", "homepage": "https://gitee.com/chenzq;https://github.com/Asterisci;;https://nbic.pku.edu.cn/rcdw/kyry/02c5f5ce8e254b1e82a48bebd0a24c33.htm;;https://idm.pku.edu.cn/~tjhuang/;https://people.ucas.ac.cn/~yushan?language=en", "dblp": ";334/3972;;20/6534-8;;h/TiejunHuang;", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;91CwQH4AAAAJ;https://scholar.google.com/citations?hl=zh-CN;;;https://scholar.google.com.tw/citations?user=knvEK4AAAAAJ;YdaRHiIAAAAJ", "orcid": ";0009-0008-3094-7621;;0000-0001-6024-3854;0000-0001-7139-3655;0000-0002-4234-6099;", "linkedin": ";;;maleiwhat/;;;", "or_profile": "~Zhiqiang_Chen5;~Guofan_Fan1;~Jinying_Gao1;~Lei_Ma3;~Bo_Lei1;~Tiejun_Huang1;~Shan_Yu1", "aff": "Beijing Academy of Artificial Intelligence;Xi'an Jiaotong University;Institute of Automation, Chinese Academy of Sciences;Beijing Academy of Artifical Intelligence;Tsinghua University;Peking University;Institute of Automation, Chinese Academy of Sciences", "aff_domain": "baai.ac.cn;xjtu.edu.cn;ia.ac.cn;baai.ac.cn;tsinghua.edu.cn;pku.edu.cn;ia.ac.cn", "position": "Researcher;MS student;PhD student;Principal Researcher;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nchen2024learning,\ntitle={Learning from Pattern Completion: Self-supervised Controllable Generation},\nauthor={Zhiqiang Chen and Guofan Fan and Jinying Gao and Lei Ma and Bo Lei and Tiejun Huang and Shan Yu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=83pV20DD2s}\n}", "github": "", "reviewers": "h4wx;gXoh;eu8g;G9Tm", "pdf_size": 40860142, "rating": "5;5;7;7", "confidence": "4;3;4;3", "soundness": "2;3;4;3", "novelty": "2;2;4;3", "presentation": "1;3;4;4", "wc_summary": "169;59;144;46", "wc_strengths": "84;52;114;223", "wc_weaknesses": "676;111;149;87", "wc_questions": "105;342;5;55", "wc_limitations": "80;1;1;43", "wc_review": "1114;565;413;454", "wc_reply_reviewers": "170;13;39;19", "wc_reply_authors": "246;26;31;23", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 1.224744871391589 ], "wc_summary_avg": [ 104.5, 52.945726928620026 ], "wc_strengths_avg": [ 118.25, 64.32874551862487 ], "wc_weaknesses_avg": [ 255.75, 243.63638377713622 ], "wc_questions_avg": [ 126.75, 129.20598863829804 ], "wc_limitations_avg": [ 31.25, 32.95735881407975 ], "wc_review_avg": [ 636.5, 281.2369996995417 ], "wc_reply_reviewers_avg": [ 60.25, 64.09124355167404 ], "wc_reply_authors_avg": [ 81.5, 95.0171037234876 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:7v2FaF72EewJ:scholar.google.com/&scioq=Learning+from+Pattern+Completion:+Self-supervised+Controllable+Generation&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "baai.ac.cn;xjtu.edu.cn;ia.ac.cn;baai.ac.cn;tsinghua.edu.cn;pku.edu.cn;ia.ac.cn", "author_num": 7, "aff_unique_index": "0;1;2;0;3;4;2", "aff_unique_norm": "Beijing Academy of Artificial Intelligence;Xi'an Jiao Tong University;Chinese Academy of Sciences;Tsinghua University;Peking University", "aff_unique_dep": ";;Institute of Automation;;", "aff_unique_url": "https://www.baaic.cn;https://www.xjtu.edu.cn;http://www.ia.cas.cn;https://www.tsinghua.edu.cn;http://www.pku.edu.cn", "aff_unique_abbr": "BAAI;XJTU;CAS;THU;Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "FSP-Laplace: Function-Space Priors for the Laplace Approximation in Bayesian Deep Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96397", "id": "83vxe8alV4", "proceeding": "", "pdf": "https://openreview.net/pdf?id=83vxe8alV4", "openreview": "https://openreview.net/forum?id=83vxe8alV4", "poster": "/media/PosterPDFs/NeurIPS%202024/96397.png?t=1733482281.3604636", "project": "", "author_site": "Tristan Cinquin, Marvin Pf\u00f6rtner, Vincent Fortuin, Philipp Hennig, Robert Bamler", "tldr": "", "abstract": "Laplace approximations are popular techniques for endowing deep networks with epistemic uncertainty estimates as they can be applied without altering the predictions of the trained network, and they scale to large models and datasets. While the choice of prior strongly affects the resulting posterior distribution, computational tractability and lack of interpretability of the weight space typically limit the Laplace approximation to isotropic Gaussian priors, which are known to cause pathological behavior as depth increases. As a remedy, we directly place a prior on function space. More precisely, since Lebesgue densities do not exist on infinite-dimensional function spaces, we recast training as finding the so-called weak mode of the posterior measure under a Gaussian process (GP) prior restricted to the space of functions representable by the neural network. Through the GP prior, one can express structured and interpretable inductive biases, such as regularity or periodicity, directly in function space, while still exploiting the implicit inductive biases that allow deep networks to generalize. After model linearization, the training objective induces a negative log-posterior density to which we apply a Laplace approximation, leveraging highly scalable methods from matrix-free linear algebra. Our method provides improved results where prior knowledge is abundant (as is the case in many scientific inference tasks). At the same time, it stays competitive for black-box supervised learning problems, where neural networks typically excel.", "keywords": "Bayesian neural networks;Laplace approximation;uncertainty quantification;Gaussian processes", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Tristan Cinquin;Marvin Pf\u00f6rtner;Vincent Fortuin;Philipp Hennig;Robert Bamler", "authorids": "~Tristan_Cinquin1;~Marvin_Pf\u00f6rtner1;~Vincent_Fortuin1;~Philipp_Hennig1;~Robert_Bamler1", "gender": ";;M;M;M", "homepage": ";;https://fortuin.github.io/;http://mml.inf.uni-tuebingen.de;https://robamler.github.io/", "dblp": ";;218/7489;08/9077;195/6208.html", "google_scholar": ";;https://scholar.google.ch/citations?user=XBlrYTIAAAAJ;https://scholar.google.de/citations?user=UeG5w08AAAAJ;LwvdNAgAAAAJ", "orcid": ";;0000-0002-0640-2671;0000-0001-7293-6092;", "linkedin": "tristan-cinquin-60104a144;;vincent-fortuin-42426b134/;;", "or_profile": "~Tristan_Cinquin1;~Marvin_Pf\u00f6rtner1;~Vincent_Fortuin1;~Philipp_Hennig1;~Robert_Bamler1", "aff": "Eberhard-Karls-Universit\u00e4t T\u00fcbingen;;Helmholtz AI;University of T\u00fcbingen;University of Tuebingen", "aff_domain": "uni-tuebingen.de;;helmholtz.ai;uni-tuebingen.de;uni-tuebingen.de", "position": "PhD student;;Principal Researcher;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\ncinquin2024fsplaplace,\ntitle={{FSP}-Laplace: Function-Space Priors for the Laplace Approximation in Bayesian Deep Learning},\nauthor={Tristan Cinquin and Marvin Pf{\\\"o}rtner and Vincent Fortuin and Philipp Hennig and Robert Bamler},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=83vxe8alV4}\n}", "github": "", "reviewers": "KV8g;Lja8;ospn;wqcS", "pdf_size": 2256705, "rating": "2;5;8;8", "confidence": "4;4;4;4", "soundness": "1;3;3;3", "novelty": "2;2;3;4", "presentation": "3;3;3;4", "wc_summary": "58;94;89;116", "wc_strengths": "47;59;33;237", "wc_weaknesses": "373;233;90;232", "wc_questions": "7;52;59;130", "wc_limitations": "20;1;1;1", "wc_review": "505;439;272;716", "wc_reply_reviewers": "414;0;5;0", "wc_reply_authors": "1134;0;0;0", "reply_reviewers": "1;0;1;0", "reply_authors": "3;1;1;1", "rating_avg": [ 5.75, 2.48746859276655 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 89.25, 20.7047699818182 ], "wc_strengths_avg": [ 94.0, 83.07225770376053 ], "wc_weaknesses_avg": [ 232.0, 100.05748347824864 ], "wc_questions_avg": [ 62.0, 44.039754767709596 ], "wc_limitations_avg": [ 5.75, 8.227241335952167 ], "wc_review_avg": [ 483.0, 159.08331150689565 ], "wc_reply_reviewers_avg": [ 104.75, 178.55723872192917 ], "wc_reply_authors_avg": [ 283.5, 491.0364039457767 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5965890454778086490&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "uni-tuebingen.de;;helmholtz.ai;uni-tuebingen.de;uni-tuebingen.de", "author_num": 5, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Eberhard Karls University of T\u00fcbingen;Helmholtz Association of German Research Centres;University of T\u00fcbingen;University of Tuebingen", "aff_unique_dep": ";Helmholtz AI;;", "aff_unique_url": "https://www.uni-tuebingen.de/;https://www.helmholtz-ai.de;https://www.uni-tuebingen.de/;https://www.uni-tuebingen.de/", "aff_unique_abbr": "Uni T\u00fcbingen;Helmholtz AI;Uni T\u00fcbingen;Uni T\u00fcbingen", "aff_campus_unique_index": "0", "aff_campus_unique": "T\u00fcbingen;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "title": "Offline Oracle-Efficient Learning for Contextual MDPs via Layerwise Exploration-Exploitation Tradeoff", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96396", "id": "848vuK2cKp", "proceeding": "", "pdf": "https://openreview.net/pdf?id=848vuK2cKp", "openreview": "https://openreview.net/forum?id=848vuK2cKp", "poster": "", "project": "", "author_site": "Jian Qian, Haichen Hu, David Simchi-Levi", "tldr": "", "abstract": "Motivated by the recent discovery of a statistical and computational reduction from contextual bandits to offline regression \\citep{simchi2020bypassing}, we address the general (stochastic) Contextual Markov Decision Process (CMDP) problem with horizon $H$ (as known as CMDP with $H$ layers). In this paper, we introduce a reduction from CMDPs to offline density estimation under the realizability assumption, i.e., a model class $\\mathcal{M}$ containing the true underlying CMDP is provided in advance. We develop an efficient, statistically near-optimal algorithm requiring only $O(H \\log T)$ calls to an offline density estimation algorithm (or oracle) across all $T$ rounds. This number can be further reduced to $O(H \\log \\log T)$ if $T$ is known in advance. Our results mark the first efficient and near-optimal reduction from CMDPs to offline density estimation without imposing any structural assumptions on the model class. A notable feature of our algorithm is the design of a layerwise exploration-exploitation tradeoff tailored to address the layerwise structure of CMDPs. Additionally, our algorithm is versatile and applicable to pure exploration tasks in reward-free reinforcement learning.", "keywords": "reinforcement learning;contextual MDP;offline density estimation;computational efficiency", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Jian Qian;Haichen Hu;David Simchi-Levi", "authorids": "~Jian_Qian2;~Haichen_Hu1;~David_Simchi-Levi2", "gender": ";M;M", "homepage": "https://sites.google.com/view/jianqian/about;;http://slevi1.mit.edu/", "dblp": ";;", "google_scholar": ";-Ta8SkUAAAAJ;https://scholar.google.co.uk/citations?hl=en", "orcid": ";;", "linkedin": "jianQ/;%E6%B5%B7%E8%BE%B0-%E8%83%A1-4996a2292/;", "or_profile": "~Jian_Qian2;~Haichen_Hu1;~David_Simchi-Levi2", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;mit.edu", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nqian2024offline,\ntitle={Offline Oracle-Efficient Learning for Contextual {MDP}s via Layerwise Exploration-Exploitation Tradeoff},\nauthor={Jian Qian and Haichen Hu and David Simchi-Levi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=848vuK2cKp}\n}", "github": "", "reviewers": "kcAb;Nj2f;Bij6;eNC6", "pdf_size": 498097, "rating": "6;6;6;7", "confidence": "3;3;3;4", "soundness": "3;3;3;3", "novelty": "3;3;3;4", "presentation": "2;2;3;2", "wc_summary": "206;72;67;168", "wc_strengths": "67;175;63;39", "wc_weaknesses": "462;201;32;228", "wc_questions": "280;168;62;2", "wc_limitations": "15;9;1;1", "wc_review": "1030;625;225;438", "wc_reply_reviewers": "627;31;0;14", "wc_reply_authors": "2575;0;0;0", "reply_reviewers": "4;1;0;1", "reply_authors": "8;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 128.25, 60.29251611933275 ], "wc_strengths_avg": [ 86.0, 52.48809388804284 ], "wc_weaknesses_avg": [ 230.75, 153.1916691599122 ], "wc_questions_avg": [ 128.0, 105.99056561788883 ], "wc_limitations_avg": [ 6.5, 5.894913061275798 ], "wc_review_avg": [ 579.5, 296.1051333563807 ], "wc_reply_reviewers_avg": [ 168.0, 265.23103136699524 ], "wc_reply_authors_avg": [ 643.75, 1115.0077073724647 ], "reply_reviewers_avg": [ 1.5, 1.5 ], "reply_authors_avg": [ 2.75, 3.031088913245535 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11672630801991904471&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "mit.edu;mit.edu;mit.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Pin-Tuning: Parameter-Efficient In-Context Tuning for Few-Shot Molecular Property Prediction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96395", "id": "859DtlwnAD", "proceeding": "", "pdf": "https://openreview.net/pdf?id=859DtlwnAD", "openreview": "https://openreview.net/forum?id=859DtlwnAD", "poster": "/media/PosterPDFs/NeurIPS%202024/96395.png?t=1731595192.5565639", "project": "", "author_site": "Liang Wang, Qiang Liu, Shaozhen Liu, Xin Sun, Shu Wu, Liang Wang", "tldr": "", "abstract": "Molecular property prediction (MPP) is integral to drug discovery and material science, but often faces the challenge of data scarcity in real-world scenarios. Addressing this, few-shot molecular property prediction (FSMPP) has been developed. Unlike other few-shot tasks, FSMPP typically employs a pre-trained molecular encoder and a context-aware classifier, benefiting from molecular pre-training and molecular context information. Despite these advancements, existing methods struggle with the ineffective fine-tuning of pre-trained encoders. We attribute this issue to the imbalance between the abundance of tunable parameters and the scarcity of labeled molecules, and the lack of contextual perceptiveness in the encoders. To overcome this hurdle, we propose a parameter-efficient in-context tuning method, named Pin-Tuning. Specifically, we propose a lightweight adapter for pre-trained message passing layers (MP-Adapter) and Bayesian weight consolidation for pre-trained atom/bond embedding layers (Emb-BWC), to achieve parameter-efficient tuning while preventing over-fitting and catastrophic forgetting. Additionally, we enhance the MP-Adapters with contextual perceptiveness. This innovation allows for in-context tuning of the pre-trained encoder, thereby improving its adaptability for specific FSMPP tasks. When evaluated on public datasets, our method demonstrates superior tuning with fewer trainable parameters, improving few-shot predictive performance.", "keywords": "few-shot molecular property prediction;molecular property prediction;parameter-efficient tuning;molecular representation learning", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "/attachment/189f75a2cc1b4a0a81db2a9b3f8d5bdc8f221e36.zip", "author": "Liang Wang;Qiang Liu;Shaozhen Liu;Xin Sun;Shu Wu;Liang Wang", "authorids": "~Liang_Wang16;~Qiang_Liu8;~Shaozhen_Liu1;~Xin_Sun9;~Shu_Wu1;~Liang_Wang3", "gender": "M;M;;M;M;M", "homepage": "https://azureleon1.github.io/;https://john-qiangliu.tech/;https://shaozhenliu.github.io/;https://sunxin000.github.io/;http://www.shuwu.name;", "dblp": "56/4499-56;61/3234-6;;;06/3577;56/4499-1", "google_scholar": "PdzDZdgAAAAJ;https://scholar.google.co.jp/citations?user=D-lKLcMAAAAJ;;;https://scholar.google.com/citations?hl=en;", "orcid": "0000-0002-4714-7582;0000-0002-9233-3827;;;0000-0003-2164-3577;", "linkedin": ";;;;;", "or_profile": "~Liang_Wang16;~Qiang_Liu8;~Shaozhen_Liu1;~Xin_Sun9;~Shu_Wu1;~Liang_Wang3", "aff": "Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;Beijing Institute of Technology;University of Science and Technology of China;Institute of Automation, Chinese Academy of Sciences;Institute of Automation\uff0c CAS\uff0cChina", "aff_domain": "ia.ac.cn;nlpr.ia.ac.cn;bit.edu.cn;ustc.edu.cn;ia.ac.cn;ia.ac.cn", "position": "PhD student;Associate Professor;Undergrad student;MS student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nwang2024pintuning,\ntitle={Pin-Tuning: Parameter-Efficient In-Context Tuning for Few-Shot Molecular Property Prediction},\nauthor={Liang Wang and Qiang Liu and Shaozhen Liu and Xin Sun and Shu Wu and Liang Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=859DtlwnAD}\n}", "github": "", "reviewers": "nYSV;W6tN;1DKF;u7hY", "pdf_size": 4754643, "rating": "4;5;5;7", "confidence": "3;4;4;4", "soundness": "2;2;3;4", "novelty": "2;2;2;3", "presentation": "3;2;2;4", "wc_summary": "35;108;112;107", "wc_strengths": "19;206;90;87", "wc_weaknesses": "14;380;127;81", "wc_questions": "69;1;49;67", "wc_limitations": "1;7;1;1", "wc_review": "138;702;379;343", "wc_reply_reviewers": "0;899;17;80", "wc_reply_authors": "151;2297;70;80", "reply_reviewers": "0;2;1;1", "reply_authors": "4;7;3;3", "rating_avg": [ 5.25, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 90.5, 32.09750769140807 ], "wc_strengths_avg": [ 100.5, 67.20305052599919 ], "wc_weaknesses_avg": [ 150.5, 138.4602831139674 ], "wc_questions_avg": [ 46.5, 27.399817517640514 ], "wc_limitations_avg": [ 2.5, 2.598076211353316 ], "wc_review_avg": [ 390.5, 201.9758648947938 ], "wc_reply_reviewers_avg": [ 249.0, 376.4591611317222 ], "wc_reply_authors_avg": [ 649.5, 951.6970368767574 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 4.25, 1.6393596310755 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.6622661785325219, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17225053603867792164&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "ia.ac.cn;nlpr.ia.ac.cn;bit.edu.cn;ustc.edu.cn;ia.ac.cn;ia.ac.cn", "author_num": 6, "aff_unique_index": "0;0;1;2;0;0", "aff_unique_norm": "Chinese Academy of Sciences;Beijing Institute of Technology;University of Science and Technology of China", "aff_unique_dep": "Institute of Automation;;", "aff_unique_url": "http://www.ia.cas.cn;http://www.bit.edu.cn/;http://www.ustc.edu.cn", "aff_unique_abbr": "CAS;BIT;USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Looks Too Good To Be True: An Information-Theoretic Analysis of Hallucinations in Generative Restoration Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96394", "id": "85tu7K06i3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=85tu7K06i3", "openreview": "https://openreview.net/forum?id=85tu7K06i3", "poster": "/media/PosterPDFs/NeurIPS%202024/96394.png?t=1733174188.5939436", "project": "", "author_site": "Regev Cohen, Idan Kligvasser, Ehud Rivlin, Daniel Freedman", "tldr": "", "abstract": "The pursuit of high perceptual quality in image restoration has driven the development of revolutionary generative models, capable of producing results often visually indistinguishable from real data.\nHowever, as their perceptual quality continues to improve, these models also exhibit a growing tendency to generate hallucinations \u2013 realistic-looking details that do not exist in the ground truth images.\nHallucinations in these models create uncertainty about their reliability, raising major concerns about their practical application.\nThis paper investigates this phenomenon through the lens of information theory, revealing a fundamental tradeoff between uncertainty and perception. We rigorously analyze the relationship between these two factors, proving that the global minimal uncertainty in generative models grows in tandem with perception. \nIn particular, we define the inherent uncertainty of the restoration problem and show that attaining perfect perceptual quality entails at least twice this uncertainty. Additionally, we establish a relation between distortion, uncertainty and perception, through which we prove the aforementioned uncertainly-perception tradeoff induces the well-known perception-distortion tradeoff.\nWe demonstrate our theoretical findings through experiments with super-resolution and inpainting algorithms.\nThis work uncovers fundamental limitations of generative models in achieving both high perceptual quality and reliable predictions for image restoration. \nThus, we aim to raise awareness among practitioners about this inherent tradeoff, empowering them to make informed decisions and potentially prioritize safety over perceptual performance.", "keywords": "uncertainty;hallucinations;perception;tradeoff;distortion;restoration tasks;inverse problems.", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Regev Cohen;Idan Kligvasser;Ehud Rivlin;Daniel Freedman", "authorids": "~Regev_Cohen1;~Idan_Kligvasser1;~Ehud_Rivlin2;~Daniel_Freedman2", "gender": "M;M;M;", "homepage": ";;http://www.cs.technion.ac.il/~ehudr/;", "dblp": ";210/0854;;59/1865", "google_scholar": "naMCufgAAAAJ;a-r4CdYAAAAJ;;https://scholar.google.com/citations?hl=en", "orcid": ";;;", "linkedin": "regev-cohen/;;;", "or_profile": "~Regev_Cohen1;~Idan_Kligvasser1;~Ehud_Rivlin2;~Daniel_Freedman2", "aff": "Google;Verily (Google Life Sciences);Technion, Technion;Verily", "aff_domain": "google.com;google.com;technion.ac.il;google.com", "position": "Researcher;Researcher;Full Professor;Researcher", "bibtex": "@inproceedings{\ncohen2024looks,\ntitle={Looks Too Good To Be True: An Information-Theoretic Analysis of Hallucinations in Generative Restoration Models},\nauthor={Regev Cohen and Idan Kligvasser and Ehud Rivlin and Daniel Freedman},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=85tu7K06i3}\n}", "github": "", "reviewers": "WuZt;xyrt;W9yk;DvxH", "pdf_size": 3082903, "rating": "6;6;6;7", "confidence": "4;3;4;3", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "3;3;3;4", "wc_summary": "34;45;106;89", "wc_strengths": "48;14;119;47", "wc_weaknesses": "81;69;93;77", "wc_questions": "2;101;109;19", "wc_limitations": "5;2;27;1", "wc_review": "170;231;454;233", "wc_reply_reviewers": "30;10;24;10", "wc_reply_authors": "12;23;13;13", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 68.5, 29.87055406248769 ], "wc_strengths_avg": [ 57.0, 38.32101251271944 ], "wc_weaknesses_avg": [ 80.0, 8.660254037844387 ], "wc_questions_avg": [ 57.75, 47.714646598293065 ], "wc_limitations_avg": [ 8.75, 10.638961415476606 ], "wc_review_avg": [ 272.0, 108.0856142139184 ], "wc_reply_reviewers_avg": [ 18.5, 8.760707733967616 ], "wc_reply_authors_avg": [ 15.25, 4.493050188902857 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=79983831521380080&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "google.com;google.com;technion.ac.il;google.com", "author_num": 4, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Google;Technion - Israel Institute of Technology;Verily", "aff_unique_dep": "Google;;", "aff_unique_url": "https://www.google.com;https://www.technion.ac.il/en/;https://www.verily.com", "aff_unique_abbr": "Google;Technion;Verily", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;Israel" }, { "title": "Self-supervised Transformation Learning for Equivariant Representations", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96393", "id": "87AXdbkRyd", "proceeding": "", "pdf": "https://openreview.net/pdf?id=87AXdbkRyd", "openreview": "https://openreview.net/forum?id=87AXdbkRyd", "poster": "/media/PosterPDFs/NeurIPS%202024/96393.png?t=1733376893.9971318", "project": "", "author_site": "Jaemyung Yu, Jaehyun Choi, DongJae Lee, HyeongGwon Hong, Junmo Kim", "tldr": "", "abstract": "Unsupervised representation learning has significantly advanced various machine learning tasks. In the computer vision domain, state-of-the-art approaches utilize transformations like random crop and color jitter to achieve invariant representations, embedding semantically the same inputs despite transformations. However, this can degrade performance in tasks requiring precise features, such as localization or flower classification. To address this, recent research incorporates equivariant representation learning, which captures transformation-sensitive information. However, current methods depend on transformation labels and thus struggle with interdependency and complex transformations. We propose Self-supervised Transformation Learning (STL), replacing transformation labels with transformation representations derived from image pairs. The proposed method ensures transformation representation is image-invariant and learns corresponding equivariant transformations, enhancing performance without increased batch complexity. We demonstrate the approach\u2019s effectiveness across diverse classification and detection tasks, outperforming existing methods in 7 out of 11 benchmarks and excelling in detection. By integrating complex transformations like AugMix, unusable by prior equivariant methods, this approach enhances performance across tasks, underscoring its adaptability and resilience. Additionally, its compatibility with various base models highlights its flexibility and broad applicability. The code is available at https://github.com/jaemyung-u/stl.", "keywords": "Equivariant Learning;Transformation Representation;Self-supervised Transformation Learning", "primary_area": "machine_vision", "supplementary_material": "", "author": "Jaemyung Yu;Jaehyun Choi;Dong-Jae Lee;HyeongGwon Hong;Junmo Kim", "authorids": "~Jaemyung_Yu1;~Jaehyun_Choi1;~Dong-Jae_Lee1;~HyeongGwon_Hong1;~Junmo_Kim1", "gender": ";M;M;M;M", "homepage": ";;;https://siit.kaist.ac.kr;https://siit.kaist.ac.kr/Faculty", "dblp": ";62/6980;121/1605;255/5248;40/240-2.html", "google_scholar": ";;https://scholar.google.com/citations?hl=en;;https://scholar.google.com.tw/citations?user=GdQtWNQAAAAJ", "orcid": ";0000-0002-9183-761X;;;", "linkedin": ";;;;", "or_profile": "~Jaemyung_Yu1;~Jaehyun_Choi1;~Dong-Jae_Lee1;~HyeongGwon_Hong1;~Junmo_Kim1", "aff": ";Korea Advanced Institute of Science & Technology;KAIST, Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": ";kaist.ac.kr;ee.kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "position": ";PhD student;MS student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nyu2024selfsupervised,\ntitle={Self-supervised Transformation Learning for Equivariant Representations},\nauthor={Jaemyung Yu and Jaehyun Choi and Dong-Jae Lee and HyeongGwon Hong and Junmo Kim},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=87AXdbkRyd}\n}", "github": "", "reviewers": "gZBh;KJt3;18TY;r3iR", "pdf_size": 2366660, "rating": "5;6;6;6", "confidence": "3;4;4;4", "soundness": "2;3;2;3", "novelty": "3;3;3;3", "presentation": "2;3;3;3", "wc_summary": "47;22;77;101", "wc_strengths": "87;111;156;85", "wc_weaknesses": "170;187;403;95", "wc_questions": "126;42;256;1", "wc_limitations": "17;44;35;1", "wc_review": "447;406;927;283", "wc_reply_reviewers": "0;40;233;38", "wc_reply_authors": "53;57;543;64", "reply_reviewers": "0;1;2;1", "reply_authors": "2;2;3;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 61.75, 29.877876430563134 ], "wc_strengths_avg": [ 109.75, 28.595235617144336 ], "wc_weaknesses_avg": [ 213.75, 114.6153894553432 ], "wc_questions_avg": [ 106.25, 97.49455112979392 ], "wc_limitations_avg": [ 24.25, 16.57369904396722 ], "wc_review_avg": [ 515.75, 244.98507607607448 ], "wc_reply_reviewers_avg": [ 77.75, 91.03948319273347 ], "wc_reply_authors_avg": [ 179.25, 210.04805997675865 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ecfIQxj-ycYJ:scholar.google.com/&scioq=Self-supervised+Transformation+Learning+for+Equivariant+Representations&hl=en&as_sdt=0,11", "gs_version_total": 5, "email": ";kaist.ac.kr;ee.kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Benign overfitting in leaky ReLU networks with moderate input dimension", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96392", "id": "88TzdGyPT6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=88TzdGyPT6", "openreview": "https://openreview.net/forum?id=88TzdGyPT6", "poster": "/media/PosterPDFs/NeurIPS%202024/96392.png?t=1733816144.8381467", "project": "", "author_site": "Kedar Karhadkar, Erin George, Michael Murray, Guido Montufar, Deanna Needell", "tldr": "", "abstract": "The problem of benign overfitting asks whether it is possible for a model to perfectly fit noisy training data and still generalize well. We study benign overfitting in two-layer leaky ReLU networks trained with the hinge loss on a binary classification task. We consider input data which can be decomposed into the sum of a common signal and a random noise component, which lie on subspaces orthogonal to one another. We characterize conditions on the signal to noise ratio (SNR) of the model parameters giving rise to benign versus non-benign, or harmful, overfitting: in particular, if the SNR is high then benign overfitting occurs, conversely if the SNR is low then harmful overfitting occurs. We attribute both benign and non-benign overfitting to an approximate margin maximization property and show that leaky ReLU networks trained on hinge loss with gradient descent (GD) satisfy this property. In contrast to prior work we do not require the training data to be nearly orthogonal. Notably, for input dimension $d$ and training sample size $n$, while results in prior work require $d = \\Omega(n^2 \\log n)$, here we require only $d = \\Omega(n)$.", "keywords": "benign overfitting;leaky relu;optimization;generalization;hinge loss;margin;overparameterization", "primary_area": "learning_theory", "supplementary_material": "", "author": "Kedar Karhadkar;Erin George;Michael Murray;Guido Montufar;Deanna Needell", "authorids": "~Kedar_Karhadkar1;~Erin_George1;~Michael_Murray3;~Guido_Montufar1;~Deanna_Needell2", "gender": "M;Non-Binary;M;M;Not Specified", "homepage": "https://www.math.ucla.edu/~kedar/;http://egeo.cc;https://www.math.ucla.edu/people/visiting/mmurray;http://www.math.ucla.edu/~montufar/;https://www.math.ucla.edu/~deanna/index.html", "dblp": "278/8407;310/1223;;;03/2691", "google_scholar": "WVdm3mQAAAAJ;;wplO7UoAAAAJ;https://scholar.google.de/citations?user=pDIuuVwAAAAJ;", "orcid": ";0000-0001-6792-9058;;0000-0002-0131-2669;0000-0002-8058-8638", "linkedin": ";;;;", "or_profile": "~Kedar_Karhadkar1;~Erin_George1;~Michael_Murray3;~Guido_Montufar1;~Deanna_Needell2", "aff": "Susquehanna International Group;University of California, Los Angeles;University of California, Los Angeles;UCLA ;University of California, Los Angeles", "aff_domain": "sig.com;ucla.edu;ucla.edu;math.ucla.edu;ucla.edu", "position": "Intern;PhD student;Postdoc;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nkarhadkar2024benign,\ntitle={Benign overfitting in leaky Re{LU} networks with moderate input dimension},\nauthor={Kedar Karhadkar and Erin George and Michael Murray and Guido Montufar and Deanna Needell},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=88TzdGyPT6}\n}", "github": "", "reviewers": "1LbK;xTBG;7S96;8RfS", "pdf_size": 550584, "rating": "4;5;5;8", "confidence": "3;4;3;4", "soundness": "3;3;3;4", "novelty": "3;3;3;4", "presentation": "3;3;2;4", "wc_summary": "68;140;66;94", "wc_strengths": "25;29;48;281", "wc_weaknesses": "41;198;43;5", "wc_questions": "26;108;80;43", "wc_limitations": "2;1;13;1", "wc_review": "162;476;250;424", "wc_reply_reviewers": "130;61;0;20", "wc_reply_authors": "466;221;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;1;1", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 92.0, 29.832867780352597 ], "wc_strengths_avg": [ 95.75, 107.3065119179633 ], "wc_weaknesses_avg": [ 71.75, 74.4425113762291 ], "wc_questions_avg": [ 64.25, 31.92471613029629 ], "wc_limitations_avg": [ 4.25, 5.0682837331783235 ], "wc_review_avg": [ 328.0, 127.23993083933989 ], "wc_reply_reviewers_avg": [ 52.75, 49.72612492442981 ], "wc_reply_authors_avg": [ 171.75, 192.35692735121341 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.6666666666666667, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12665635959918824830&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "sig.com;ucla.edu;ucla.edu;math.ucla.edu;ucla.edu", "author_num": 5, "aff_unique_index": "0;1;1;1;1", "aff_unique_norm": "Susquehanna International Group;University of California, Los Angeles", "aff_unique_dep": ";", "aff_unique_url": "https://www.susquehannainternational.com;https://www.ucla.edu", "aff_unique_abbr": "SIG;UCLA", "aff_campus_unique_index": "1;1;1;1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Make-it-Real: Unleashing Large Multimodal Model for Painting 3D Objects with Realistic Materials", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96391", "id": "88rbNOtAez", "proceeding": "", "pdf": "https://openreview.net/pdf?id=88rbNOtAez", "openreview": "https://openreview.net/forum?id=88rbNOtAez", "poster": "/media/PosterPDFs/NeurIPS%202024/96391.png?t=1731602105.1177976", "project": "", "author_site": "Ye Fang, Zeyi Sun, Tong Wu, Jiaqi Wang, Ziwei Liu, Gordon Wetzstein, Dahua Lin", "tldr": "", "abstract": "Physically realistic materials are pivotal in augmenting the realism of 3D assets across various applications and lighting conditions. However, existing 3D assets and generative models often lack authentic material properties. Manual assignment of materials using graphic software is a tedious and time-consuming task. In this paper, we exploit advancements in Multimodal Large Language Models (MLLMs), particularly GPT-4V, to present a novel approach, Make-it-Real: 1) We demonstrate that GPT-4V can effectively recognize and describe materials, allowing the construction of a detailed material library. 2) Utilizing a combination of visual cues and hierarchical text prompts, GPT-4V precisely identifies and aligns materials with the corresponding components of 3D objects. 3) The correctly matched materials are then meticulously applied as reference for the new SVBRDF material generation according to the original albedo map, significantly enhancing their visual authenticity. Make-it-Real offers a streamlined integration into the 3D content creation workflow, showcasing its utility as an essential tool for developers of 3D assets.", "keywords": "Texture Synthesis;MLLMs;Material Refinement;3D Content Creation", "primary_area": "machine_vision", "supplementary_material": "/attachment/56c096e657b5a44133422e321306adb9761bfd60.zip", "author": "Ye Fang;Zeyi Sun;Tong Wu;Jiaqi Wang;Ziwei Liu;Gordon Wetzstein;Dahua Lin", "authorids": "~Ye_Fang1;~Zeyi_Sun1;~Tong_Wu2;~Jiaqi_Wang1;~Ziwei_Liu1;~Gordon_Wetzstein3;~Dahua_Lin1", "gender": "F;;F;M;M;M;M", "homepage": "https://github.com/Aleafy;https://github.com/SunzeY;https://wutong16.github.io/;https://myownskyw7.github.io/;https://liuziwei7.github.io/;http://web.stanford.edu/~gordonwz/;http://dahua.site", "dblp": ";123/5522-2;75/5056-2;44/740-3;05/6300-2;13/4660;53/6088", "google_scholar": ";RvGxDLUAAAAJ;https://scholar.google.com.hk/citations?user=cLUgV4YAAAAJ;https://scholar.google.com.hk/citations?user=GDvt570AAAAJ;https://scholar.google.com.hk/citations?user=lc45xlcAAAAJ;VOf45S0AAAAJ;GMzzRRUAAAAJ", "orcid": ";;;;;0000-0002-9243-6885;", "linkedin": ";;;;;gordon-wetzstein-2406723/;", "or_profile": "~Ye_Fang1;~Zeyi_Sun1;~Tong_Wu2;~Jiaqi_Wang1;~Ziwei_Liu1;~Gordon_Wetzstein3;~Dahua_Lin1", "aff": "Fudan University;Shanghai Jiaotong University;The Chinese University of Hong Kong;Shanghai AI Laboratory;Nanyang Technological University;Stanford University;The Chinese University of Hong Kong", "aff_domain": "fudan.edu.cn;sjtu.edu.cn;cuhk.edu.hk;pjlab.org.cn;ntu.edu.sg;stanford.edu;cuhk.edu.hk", "position": "PhD student;PhD student;PhD student;Research Scientist;Assistant Professor;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nfang2024makeitreal,\ntitle={Make-it-Real: Unleashing Large Multimodal Model for Painting 3D Objects with Realistic Materials},\nauthor={Ye Fang and Zeyi Sun and Tong Wu and Jiaqi Wang and Ziwei Liu and Gordon Wetzstein and Dahua Lin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=88rbNOtAez}\n}", "github": "", "reviewers": "jx7i;dDLL;LVe6;mcgT;s3Ca", "pdf_size": 0, "rating": "3;5;5;7;7", "confidence": "5;3;4;4;4", "soundness": "2;3;3;3;4", "novelty": "1;3;2;3;4", "presentation": "1;3;4;3;4", "wc_summary": "92;84;89;56;65", "wc_strengths": "37;49;44;37;68", "wc_weaknesses": "392;106;83;83;196", "wc_questions": "118;43;42;3;24", "wc_limitations": "19;9;65;10;14", "wc_review": "658;291;323;189;367", "wc_reply_reviewers": "139;0;0;0;0", "wc_reply_authors": "302;0;0;0;0", "reply_reviewers": "1;0;0;0;0", "reply_authors": "2;1;1;1;1", "rating_avg": [ 5.4, 1.4966629547095764 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.6, 1.019803902718557 ], "presentation_avg": [ 3.0, 1.0954451150103321 ], "wc_summary_avg": [ 77.2, 14.161920773680382 ], "wc_strengths_avg": [ 47.0, 11.436782764396638 ], "wc_weaknesses_avg": [ 172.0, 117.62142661947269 ], "wc_questions_avg": [ 46.0, 38.83812559843742 ], "wc_limitations_avg": [ 23.4, 21.095971179350812 ], "wc_review_avg": [ 365.6, 157.52282374310082 ], "wc_reply_reviewers_avg": [ 27.8, 55.6 ], "wc_reply_authors_avg": [ 60.4, 120.80000000000001 ], "reply_reviewers_avg": [ 0.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.42257712736425823, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4965180538996705156&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "fudan.edu.cn;sjtu.edu.cn;cuhk.edu.hk;pjlab.org.cn;ntu.edu.sg;stanford.edu;cuhk.edu.hk", "author_num": 7, "aff_unique_index": "0;1;2;3;4;5;2", "aff_unique_norm": "Fudan University;Shanghai Jiao Tong University;Chinese University of Hong Kong;Shanghai AI Laboratory;Nanyang Technological University;Stanford University", "aff_unique_dep": ";;;;;", "aff_unique_url": "https://www.fudan.edu.cn;https://www.sjtu.edu.cn;https://www.cuhk.edu.hk;https://www.shanghai-ai-lab.com;https://www.ntu.edu.sg;https://www.stanford.edu", "aff_unique_abbr": "Fudan;SJTU;CUHK;SAIL;NTU;Stanford", "aff_campus_unique_index": "1;2;1", "aff_campus_unique": ";Hong Kong SAR;Stanford", "aff_country_unique_index": "0;0;0;0;1;2;0", "aff_country_unique": "China;Singapore;United States" }, { "title": "SOFTS: Efficient Multivariate Time Series Forecasting with Series-Core Fusion", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96390", "id": "89AUi5L1uA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=89AUi5L1uA", "openreview": "https://openreview.net/forum?id=89AUi5L1uA", "poster": "/media/PosterPDFs/NeurIPS%202024/96390.png?t=1731583425.579173", "project": "", "author_site": "Han Lu, Xu-Yang Chen, Han-Jia Ye, De-Chuan Zhan", "tldr": "", "abstract": "Multivariate time series forecasting plays a crucial role in various fields such as finance, traffic management, energy, and healthcare. Recent studies have highlighted the advantages of channel independence to resist distribution drift but neglect channel correlations, limiting further enhancements. Several methods utilize mechanisms like attention or mixer to address this by capturing channel correlations, but they either introduce excessive complexity or rely too heavily on the correlation to achieve satisfactory results under distribution drifts, particularly with a large number of channels. Addressing this gap, this paper presents an efficient MLP-based model, the Series-cOre Fused Time Series forecaster (SOFTS), which incorporates a novel STar Aggregate-Redistribute (STAR) module. Unlike traditional approaches that manage channel interactions through distributed structures, \\textit{e.g.}, attention, STAR employs a centralized strategy to improve efficiency and reduce reliance on the quality of each channel. It aggregates all series to form a global core representation, which is then dispatched and fused with individual series representations to facilitate channel interactions effectively. SOFTS achieves superior performance over existing state-of-the-art methods with only linear complexity. The broad applicability of the STAR module across different forecasting models is also demonstrated empirically. We have made our code publicly available at https://github.com/Secilia-Cxy/SOFTS.", "keywords": "multivariate time series forecasting;channel interaction;centralized structure;efficiency", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Lu Han;Xu-Yang Chen;Han-Jia Ye;De-Chuan Zhan", "authorids": "~Lu_Han2;~Xu-Yang_Chen1;~Han-Jia_Ye1;~De-Chuan_Zhan1", "gender": "M;;M;M", "homepage": "http://www.lamda.nju.edu.cn/hanlu/;http://www.lamda.nju.edu.cn/chenxy/;http://www.lamda.nju.edu.cn/yehj;http://www.lamda.nju.edu.cn/zhandc/", "dblp": ";251/0901;165/3014;74/498", "google_scholar": "https://scholar.google.com.hk/citations?user=m-WYn7gAAAAJ;;mgOYhtoAAAAJ;mYJf4TcAAAAJ", "orcid": ";;;0000-0002-3533-2078", "linkedin": ";;;", "or_profile": "~Lu_Han2;~Xu-Yang_Chen1;~Han-Jia_Ye1;~De-Chuan_Zhan1", "aff": "Nanjing University;Nanjing University;Nanjing University;Nanjing University", "aff_domain": "nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn", "position": "PhD student;MS student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nhan2024softs,\ntitle={{SOFTS}: Efficient Multivariate Time Series Forecasting with Series-Core Fusion},\nauthor={Lu Han and Xu-Yang Chen and Han-Jia Ye and De-Chuan Zhan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=89AUi5L1uA}\n}", "github": "", "reviewers": "NURe;mAnn;hzF8;gtby", "pdf_size": 4000053, "rating": "5;5;7;7", "confidence": "4;5;4;3", "soundness": "3;3;4;3", "novelty": "3;2;4;3", "presentation": "3;3;4;3", "wc_summary": "61;66;99;56", "wc_strengths": "48;40;90;58", "wc_weaknesses": "80;170;21;145", "wc_questions": "66;5;28;98", "wc_limitations": "1;1;15;80", "wc_review": "256;282;253;437", "wc_reply_reviewers": "56;16;45;45", "wc_reply_authors": "63;20;16;21", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 70.5, 16.830032679706836 ], "wc_strengths_avg": [ 59.0, 19.0 ], "wc_weaknesses_avg": [ 104.0, 58.099053348570145 ], "wc_questions_avg": [ 49.25, 35.59055352196703 ], "wc_limitations_avg": [ 24.25, 32.69078616368839 ], "wc_review_avg": [ 307.0, 75.89795781178832 ], "wc_reply_reviewers_avg": [ 40.5, 14.84082207965583 ], "wc_reply_authors_avg": [ 30.0, 19.144189719076646 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.7071067811865475, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1719933077150328135&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Nanjing University", "aff_unique_dep": "", "aff_unique_url": "https://www.nju.edu.cn", "aff_unique_abbr": "Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Offline Behavior Distillation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96389", "id": "89fSR2gpxp", "proceeding": "", "pdf": "https://openreview.net/pdf?id=89fSR2gpxp", "openreview": "https://openreview.net/forum?id=89fSR2gpxp", "poster": "/media/PosterPDFs/NeurIPS%202024/96389.png?t=1731219777.9654586", "project": "", "author_site": "Shiye Lei, Sen Zhang, Dacheng Tao", "tldr": "", "abstract": "Massive reinforcement learning (RL) data are typically collected to train policies offline without the need for interactions, but the large data volume can cause training inefficiencies. To tackle this issue, we formulate offline behavior distillation (OBD), which synthesizes limited expert behavioral data from sub-optimal RL data, enabling rapid policy learning. We propose two naive OBD objectives, DBC and PBC, which measure distillation performance via the decision difference between policies trained on distilled data and either offline data or a near-expert policy. Due to intractable bi-level optimization, the OBD objective is difficult to minimize to small values, which deteriorates PBC by its distillation performance guarantee with quadratic discount complexity $\\mathcal{O}(1/(1-\\gamma)^2)$. We theoretically establish the equivalence between the policy performance and action-value weighted decision difference, and introduce action-value weighted PBC (Av-PBC) as a more effective OBD objective. By optimizing the weighted decision difference, Av-PBC achieves a superior distillation guarantee with linear discount complexity $\\mathcal{O}(1/(1-\\gamma))$. Extensive experiments on multiple D4RL datasets reveal that Av-PBC offers significant improvements in OBD performance, fast distillation convergence speed, and robust cross-architecture/optimizer generalization.", "keywords": "Efficient Deep Learning;Dataset Distillation;Offline Reinforcement Learning", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Shiye Lei;Sen Zhang;Dacheng Tao", "authorids": "~Shiye_Lei1;~Sen_Zhang3;~Dacheng_Tao1", "gender": ";M;", "homepage": ";https://github.com/SenZHANG-GitHub;", "dblp": ";57/6221-6;", "google_scholar": ";-bJJNV0AAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Shiye_Lei1;~Sen_Zhang3;~Dacheng_Tao1", "aff": ";University of Sydney, University of Sydney;", "aff_domain": ";sydney.edu.au;", "position": ";Postdoc;", "bibtex": "@inproceedings{\nlei2024offline,\ntitle={Offline Behavior Distillation},\nauthor={Shiye Lei and Sen Zhang and Dacheng Tao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=89fSR2gpxp}\n}", "github": "", "reviewers": "2J9f;rzRx;VAdy;kzVy", "pdf_size": 3040760, "rating": "5;5;6;6", "confidence": "4;4;3;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "40;35;103;109", "wc_strengths": "21;48;15;133", "wc_weaknesses": "139;15;56;327", "wc_questions": "4;26;56;255", "wc_limitations": "1;9;6;170", "wc_review": "205;133;236;994", "wc_reply_reviewers": "11;19;14;23", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 71.75, 34.361133566865924 ], "wc_strengths_avg": [ 54.25, 47.134780152240026 ], "wc_weaknesses_avg": [ 134.25, 119.91533471579021 ], "wc_questions_avg": [ 85.25, 99.72806776429593 ], "wc_limitations_avg": [ 46.5, 71.36000280269053 ], "wc_review_avg": [ 392.0, 349.5675900308837 ], "wc_reply_reviewers_avg": [ 16.75, 4.602988159880492 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:w-3OB-k5K3QJ:scholar.google.com/&scioq=Offline+Behavior+Distillation&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": ";sydney.edu.au;", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "University of Sydney", "aff_unique_dep": "", "aff_unique_url": "https://www.sydney.edu.au", "aff_unique_abbr": "USYD", "aff_country_unique_index": "0", "aff_country_unique": "Australia" }, { "title": "On the Expressivity and Sample Complexity of Node-Individualized Graph Neural Networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96388", "id": "8APPypS0yN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=8APPypS0yN", "openreview": "https://openreview.net/forum?id=8APPypS0yN", "poster": "", "project": "", "author_site": "Paolo Pellizzoni, Till Hendrik Schulz, Dexiong Chen, Karsten Borgwardt", "tldr": "", "abstract": "Graph neural networks (GNNs) employing message passing for graph classification are inherently limited by the expressive power of the Weisfeiler-Leman (WL) test for graph isomorphism. Node individualization schemes, which assign unique identifiers to nodes (e.g., by adding random noise to features), are a common approach for achieving universal expressiveness. However, the ability of GNNs endowed with individualization schemes to generalize beyond the training data is still an open question. To address this question, this paper presents a theoretical analysis of the sample complexity of such GNNs from a statistical learning perspective, employing Vapnik\u2013Chervonenkis (VC) dimension and covering number bounds. We demonstrate that node individualization schemes that are permutation-equivariant result in lower sample complexity, and design novel individualization schemes that exploit these results. As an application of this analysis, we also develop a novel architecture that can perform substructure identification (i.e., subgraph isomorphism) while having a lower VC dimension compared to competing methods. Finally, our theoretical findings are validated experimentally on both synthetic and real-world datasets.", "keywords": "Graph neural networks;graph learning;Weisfeiler-Leman;VC dimension", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Paolo Pellizzoni;Till Hendrik Schulz;Dexiong Chen;Karsten Borgwardt", "authorids": "~Paolo_Pellizzoni1;~Till_Hendrik_Schulz1;~Dexiong_Chen1;~Karsten_Borgwardt2", "gender": ";M;;M", "homepage": ";https://dexiong.me;https://www.biochem.mpg.de/borgwardt;", "dblp": "279/2647;240/6347;11/3733.html;233/9975", "google_scholar": "https://scholar.google.it/citations?user=SRXEcPUAAAAJ;goM0yAIAAAAJ;v3JsjMYAAAAJ;https://scholar.google.com/citations?hl=de", "orcid": ";;0000-0001-7221-2393;", "linkedin": ";;;", "or_profile": "~Paolo_Pellizzoni1;~Dexiong_Chen1;~Karsten_Borgwardt2;~Till_Schulz1", "aff": "Max-Planck Institute;Max-Planck Institute;Max Planck Institute of Biochemistry;Max Planck Institute of Biochemistry", "aff_domain": "mpg.de;mpg.de;biochem.mpg.de;biochem.mpg.de", "position": "PhD student;Project leader;Max Planck Director;Postdoc", "bibtex": "@inproceedings{\npellizzoni2024on,\ntitle={On the Expressivity and Sample Complexity of Node-Individualized Graph Neural Networks},\nauthor={Paolo Pellizzoni and Till Hendrik Schulz and Dexiong Chen and Karsten Borgwardt},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=8APPypS0yN}\n}", "github": "", "reviewers": "xW3P;QJ5E;Tve2", "pdf_size": 753936, "rating": "4;6;7", "confidence": "3;4;3", "soundness": "2;2;3", "novelty": "2;3;4", "presentation": "1;2;3", "wc_summary": "52;113;46", "wc_strengths": "38;67;52", "wc_weaknesses": "275;495;177", "wc_questions": "5;307;5", "wc_limitations": "2;34;29", "wc_review": "372;1016;309", "wc_reply_reviewers": "84;127;33", "wc_reply_authors": "28;56;26", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 5.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 2.0, 0.816496580927726 ], "wc_summary_avg": [ 70.33333333333333, 30.26916289265731 ], "wc_strengths_avg": [ 52.333333333333336, 11.841546445554407 ], "wc_weaknesses_avg": [ 315.6666666666667, 132.9695036047322 ], "wc_questions_avg": [ 105.66666666666667, 142.36416527889156 ], "wc_limitations_avg": [ 21.666666666666668, 14.055445761538676 ], "wc_review_avg": [ 565.6666666666666, 319.47074287883635 ], "wc_reply_reviewers_avg": [ 81.33333333333333, 38.42163742245016 ], "wc_reply_authors_avg": [ 36.666666666666664, 13.695092389449425 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.18898223650461363, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=388598193820415117&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "mpg.de;mpg.de;biochem.mpg.de;biochem.mpg.de", "author_num": 4, "aff_unique_index": "0;0;1;1", "aff_unique_norm": "Max-Planck-Gesellschaft zur F\u00f6rderung der Wissenschaften e.V.;Max Planck Institute of Biochemistry", "aff_unique_dep": ";Biochemistry", "aff_unique_url": "https://www.mpg.de;https://www.biochem.mpg.de", "aff_unique_abbr": "MPG;MPIB", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "title": "Unified Insights: Harnessing Multi-modal Data for Phenotype Imputation via View Decoupling", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96387", "id": "8B3sAX889P", "proceeding": "", "pdf": "https://openreview.net/pdf?id=8B3sAX889P", "openreview": "https://openreview.net/forum?id=8B3sAX889P", "poster": "", "project": "", "author_site": "Qiannan Zhang, Weishen Pan, Zilong Bai, Chang Su, Fei Wang", "tldr": "", "abstract": "Phenotype imputation plays a crucial role in improving comprehensive and accurate medical evaluation, which in turn can optimize patient treatment and bolster the reliability of clinical research. Despite the adoption of various techniques, multi-modal biological data, which can provide crucial insights into a patient's overall health, is often overlooked. With multi-modal biological data, patient characterization can be enriched from two distinct views: the biological view and the phenotype view. However, the heterogeneity and imprecise nature of the multimodal data still pose challenges in developing an effective method to model from two views. In this paper, we propose a novel framework to incorporate multi-modal biological data via view decoupling. Specifically, we segregate the modeling of biological data from phenotype data in a graph-based learning framework. From the biological view, the latent factors in biological data are discovered to model patient correlation. From the phenotype view, phenotype co-occurrence can be modeled to reveal patterns across patients. Then patients are encoded from these two distinct views. To mitigate the influence of noise and irrelevant information in biological data, we devise a cross-view contrastive knowledge distillation aimed at distilling insights from the biological view to enhance phenotype imputation. We show that phenotype imputation with the proposed model significantly outperforms the state-of-the-art models on the real-world biomedical database.", "keywords": "Phenotype Imputation;Graph Neural Networks;Biological Multi-modal data", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "", "author": "Qiannan Zhang;Weishen Pan;Zilong Bai;Chang Su;Fei Wang", "authorids": "~Qiannan_Zhang1;~Weishen_Pan1;~Zilong_Bai1;~Chang_Su9;~Fei_Wang3", "gender": "F;M;M;M;", "homepage": ";https://scholar.google.com/citations?user=PtTBMhUAAAAJ;;http://chang-su.net;https://wcm-wanglab.github.io/index.html", "dblp": "117/7963;161/2032;137/0122;;52/3194-9.html", "google_scholar": ";PtTBMhUAAAAJ;vBpronQAAAAJ;ef_JSlYAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";0009-0006-0431-5642;0000-0002-3891-8015;;", "linkedin": ";;zilong-bai-72a227133/;;fei-wang-50682425/", "or_profile": "~Qiannan_Zhang1;~Weishen_Pan1;~Zilong_Bai1;~Chang_Su9;~Fei_Wang3", "aff": "Weill Cornell Medicine, Cornell University;Weill Cornell Medicine, Cornell University;Weill Cornell Medicine, Cornell University;Weill Cornell Medicine, Cornell University;Cornell University", "aff_domain": "med.cornell.edu;med.cornell.edu;med.cornell.edu;med.cornell.edu;cornell.edu", "position": "Postdoc;Postdoc;Postdoc;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nzhang2024unified,\ntitle={Unified Insights: Harnessing Multi-modal Data for Phenotype Imputation via View Decoupling},\nauthor={Qiannan Zhang and Weishen Pan and Zilong Bai and Chang Su and Fei Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=8B3sAX889P}\n}", "github": "", "reviewers": "8HRK;wLvH;QunD;dJb6;W1JA", "pdf_size": 829308, "rating": "4;4;5;7;7", "confidence": "5;3;4;5;5", "soundness": "3;2;3;4;4", "novelty": "2;2;2;4;3", "presentation": "3;3;3;3;3", "wc_summary": "112;80;66;92;84", "wc_strengths": "36;50;28;164;227", "wc_weaknesses": "248;90;31;131;75", "wc_questions": "4;188;39;4;26", "wc_limitations": "21;17;13;4;7", "wc_review": "421;425;177;395;419", "wc_reply_reviewers": "0;0;0;21;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;0;1;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.4, 1.3564659966250536 ], "confidence_avg": [ 4.4, 0.7999999999999999 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 2.6, 0.8 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 86.8, 15.157836257197133 ], "wc_strengths_avg": [ 101.0, 80.0 ], "wc_weaknesses_avg": [ 115.0, 73.79159843776254 ], "wc_questions_avg": [ 52.2, 69.20809201242295 ], "wc_limitations_avg": [ 12.4, 6.248199740725323 ], "wc_review_avg": [ 367.4, 95.77807682345683 ], "wc_reply_reviewers_avg": [ 4.2, 8.4 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.2, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5897678246195887, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:YBRgG146F28J:scholar.google.com/&scioq=Unified+Insights:+Harnessing+Multi-modal+Data+for+Phenotype+Imputation+via+View+Decoupling&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": "med.cornell.edu;med.cornell.edu;med.cornell.edu;med.cornell.edu;cornell.edu", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Cornell University", "aff_unique_dep": "", "aff_unique_url": "https://www.weill.cornell.edu", "aff_unique_abbr": "Cornell", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Weill Cornell Medicine;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Stratified Prediction-Powered Inference for Effective Hybrid Evaluation of Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96386", "id": "8CBcdDQFDQ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=8CBcdDQFDQ", "openreview": "https://openreview.net/forum?id=8CBcdDQFDQ", "poster": "", "project": "", "author_site": "Adam Fisch, Joshua Maynez, R. Hofer, Bhuwan Dhingra, Amir Globerson, William Cohen", "tldr": "", "abstract": "Prediction-powered inference (PPI) is a method that improves statistical estimates based on limited human-labeled data. PPI achieves this by combining small amounts of human-labeled data with larger amounts of data labeled by a reasonably accurate---but potentially biased---automatic system, in a way that results in tighter confidence intervals for certain parameters of interest (e.g., the mean performance of a language model). In this paper, we propose a method called Stratified Prediction-Powered Inference (StratPPI), in which we show that the basic PPI estimates can be considerably improved by employing simple data stratification strategies. Without making any assumptions on the underlying automatic labeling system or data distribution, we derive an algorithm for computing provably valid confidence intervals for parameters of any dimensionality that is based on stratified sampling. In particular, we show both theoretically and empirically that, with appropriate choices of stratification and sample allocation, our approach can provide substantially tighter confidence intervals than unstratified approaches. Specifically, StratPPI is expected to improve in cases where the performance of the autorater varies across different conditional distributions of the target data.", "keywords": "prediction powered inference;auto-raters", "primary_area": "learning_theory", "supplementary_material": "", "author": "Adam Fisch;Joshua Maynez;R. Alex Hofer;Bhuwan Dhingra;Amir Globerson;William W. Cohen", "authorids": "~Adam_Fisch2;~Joshua_Maynez1;~R._Alex_Hofer1;~Bhuwan_Dhingra1;~Amir_Globerson1;~William_W._Cohen2", "gender": "M;M;M;M;M;", "homepage": ";;https://users.cs.duke.edu/~bdhingra/;http://www.cs.tau.ac.il/~gamir/;https://wwcohen.github.io/;https://people.csail.mit.edu/fisch/", "dblp": "220/3863;;180/5692;08/4162.html;c/WWCohen.html;https://dblp.org/pers/f/Fisch:Adam.html", "google_scholar": "ZOYd-0oAAAAJ;HOyYjW4AAAAJ;2W2ttrQAAAAJ;https://scholar.google.com.tw/citations?user=5JserkUAAAAJ;8ys-38kAAAAJ;https://scholar.google.com/citations?authorid=LYRkQhMAAAAJ", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Joshua_Maynez1;~R._Alex_Hofer1;~Bhuwan_Dhingra1;~Amir_Globerson1;~William_W._Cohen2;~Adam_Fisch1", "aff": "Google;Google DeepMind;Duke University;Tel Aviv University;Google DeepMind;Massachusetts Institute of Technology", "aff_domain": "google.com;google.com;duke.edu;tau.ac.il;google.com;mit.edu", "position": "Researcher;Researcher;Assistant Professor;Associate Professor;Principle Scientist;PhD student", "bibtex": "@inproceedings{\nfisch2024stratified,\ntitle={Stratified Prediction-Powered Inference for Effective Hybrid Evaluation of Language Models},\nauthor={Adam Fisch and Joshua Maynez and R. Alex Hofer and Bhuwan Dhingra and Amir Globerson and William W. Cohen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=8CBcdDQFDQ}\n}", "github": "", "reviewers": "deoY;GJBL;BR9i;5dNx", "pdf_size": 2209546, "rating": "6;6;7;7", "confidence": "3;3;1;3", "soundness": "3;3;4;3", "novelty": "3;3;3;4", "presentation": "3;3;4;4", "wc_summary": "46;65;77;212", "wc_strengths": "28;40;46;64", "wc_weaknesses": "21;140;52;103", "wc_questions": "18;3;2;29", "wc_limitations": "7;1;7;1", "wc_review": "120;249;184;409", "wc_reply_reviewers": "12;139;0;49", "wc_reply_authors": "0;493;0;33", "reply_reviewers": "1;1;0;1", "reply_authors": "1;2;1;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 2.5, 0.8660254037844386 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 100.0, 65.60106706449217 ], "wc_strengths_avg": [ 44.5, 12.99038105676658 ], "wc_weaknesses_avg": [ 79.0, 45.79847159021794 ], "wc_questions_avg": [ 13.0, 11.20267825120404 ], "wc_limitations_avg": [ 4.0, 3.0 ], "wc_review_avg": [ 240.5, 107.44417154969366 ], "wc_reply_reviewers_avg": [ 50.0, 54.46558546458488 ], "wc_reply_authors_avg": [ 131.5, 209.14647976956246 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2891916553588672925&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": "google.com;google.com;duke.edu;tau.ac.il;google.com;mit.edu", "author_num": 6, "aff_unique_index": "0;0;1;2;0;3", "aff_unique_norm": "Google;Duke University;Tel Aviv University;Massachusetts Institute of Technology", "aff_unique_dep": "Google;;;", "aff_unique_url": "https://www.google.com;https://www.duke.edu;https://www.tau.ac.il;https://web.mit.edu", "aff_unique_abbr": "Google;Duke;TAU;MIT", "aff_campus_unique_index": "0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;1;0;2;1;0", "aff_country_unique": "United States;United Kingdom;Israel" }, { "title": "Bayesian Nonparametrics Meets Data-Driven Distributionally Robust Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96385", "id": "8CguPoe3TP", "proceeding": "", "pdf": "https://openreview.net/pdf?id=8CguPoe3TP", "openreview": "https://openreview.net/forum?id=8CguPoe3TP", "poster": "/media/PosterPDFs/NeurIPS%202024/96385.png?t=1730569581.234229", "project": "", "author_site": "Nicola Bariletto, Nhat Ho", "tldr": "", "abstract": "Training machine learning and statistical models often involves optimizing a data-driven risk criterion. The risk is usually computed with respect to the empirical data distribution, but this may result in poor and unstable out-of-sample performance due to distributional uncertainty. In the spirit of distributionally robust optimization, we propose a novel robust criterion by combining insights from Bayesian nonparametric (i.e., Dirichlet process) theory and a recent decision-theoretic model of smooth ambiguity-averse preferences. First, we highlight novel connections with standard regularized empirical risk minimization techniques, among which Ridge and LASSO regressions. Then, we theoretically demonstrate the existence of favorable finite-sample and asymptotic statistical guarantees on the performance of the robust optimization procedure. For practical implementation, we propose and study tractable approximations of the criterion based on well-known Dirichlet process representations. We also show that the smoothness of the criterion naturally leads to standard gradient-based numerical optimization. Finally, we provide insights into the workings of our method by applying it to a variety of tasks based on simulated and real datasets.", "keywords": "Bayesian Nonparametrics;Distributionally Robust Optimization;Dirichlet Process;Decision Theory;Ambiguity Aversion;Machine Learning", "primary_area": "learning_theory", "supplementary_material": "/attachment/bfba3fa7d11178dd6778225ff46fb7d375f507f2.zip", "author": "Nicola Bariletto;Nhat Ho", "authorids": "~Nicola_Bariletto1;~Nhat_Ho1", "gender": "M;M", "homepage": "https://nbariletto.github.io/;https://nhatptnk8912.github.io/", "dblp": ";203/4479", "google_scholar": "https://scholar.google.it/citations?user=sqZB8ZEAAAAJ;https://scholar.google.ca/citations?user=Xs7cKMwAAAAJ", "orcid": ";", "linkedin": ";nhat-pham-minh-ho-267b8164/", "or_profile": "~Nicola_Bariletto1;~Nhat_Ho1", "aff": "University of Texas at Austin;University of Texas, Austin", "aff_domain": "utexas.edu;utexas.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nbariletto2024bayesian,\ntitle={Bayesian Nonparametrics Meets Data-Driven Distributionally Robust Optimization},\nauthor={Nicola Bariletto and Nhat Ho},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=8CguPoe3TP}\n}", "github": "", "reviewers": "uUEe;DqoX;p57R;Vivx;7wnT", "pdf_size": 579126, "rating": "5;5;6;7;7", "confidence": "1;2;3;4;3", "soundness": "3;3;3;3;3", "novelty": "3;2;3;4;3", "presentation": "3;2;2;3;4", "wc_summary": "53;84;92;53;64", "wc_strengths": "40;28;76;47;34", "wc_weaknesses": "40;65;215;359;1", "wc_questions": "1;18;287;260;40", "wc_limitations": "4;1;89;1;9", "wc_review": "138;196;759;720;148", "wc_reply_reviewers": "0;0;49;7;11", "wc_reply_authors": "0;0;0;29;29", "reply_reviewers": "0;0;1;1;1", "reply_authors": "1;1;1;2;2", "rating_avg": [ 6.0, 0.8944271909999159 ], "confidence_avg": [ 2.6, 1.019803902718557 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 69.2, 16.067358214715945 ], "wc_strengths_avg": [ 45.0, 16.73320053068151 ], "wc_weaknesses_avg": [ 136.0, 133.0052630537604 ], "wc_questions_avg": [ 121.2, 125.25717544316574 ], "wc_limitations_avg": [ 20.8, 34.225136961011565 ], "wc_review_avg": [ 392.2, 284.513901242101 ], "wc_reply_reviewers_avg": [ 13.4, 18.2931681236466 ], "wc_reply_authors_avg": [ 11.6, 14.207040508142432 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.8770580193070292, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14039051458726128743&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "utexas.edu;utexas.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Adjust Pearson's $r$ to Measure Arbitrary Monotone Dependence", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96384", "id": "8Dkz60yGfj", "proceeding": "", "pdf": "https://openreview.net/pdf?id=8Dkz60yGfj", "openreview": "https://openreview.net/forum?id=8Dkz60yGfj", "poster": "/media/PosterPDFs/NeurIPS%202024/96384.png?t=1730944878.0045826", "project": "", "tldr": "", "abstract": "Pearson's $r$, the most widely-used correlation coefficient, is traditionally regarded as exclusively capturing linear dependence, leading to its discouragement in contexts involving nonlinear relationships. However, recent research challenges this notion, suggesting that Pearson's $r$ should not be ruled out a priori for measuring nonlinear monotone relationships. Pearson's $r$ is essentially a scaled covariance, rooted in the renowned Cauchy-Schwarz Inequality. Our findings reveal that different scaling bounds yield coefficients with different capture ranges, and interestingly, tighter bounds actually expand these ranges. We derive a tighter inequality than Cauchy-Schwarz Inequality, leverage it to refine Pearson's $r$, and propose a new correlation coefficient, i.e., rearrangement correlation. This coefficient is able to capture arbitrary monotone relationships, both linear and nonlinear ones. It reverts to Pearson's $r$ in linear scenarios. Simulation experiments and real-life investigations show that the rearrangement correlation is more accurate in measuring nonlinear monotone dependence than the three classical correlation coefficients, and other recently proposed dependence measures.", "keywords": "Pearson's r;correlation coefficient;rearrangement inequality;nonlinear monotone dependence", "primary_area": "probabilistic_methods", "supplementary_material": "/attachment/ada2ba94e5b59faacbaf47d8a58f581b54143426.zip", "author": "Xinbo Ai", "authorids": "~Xinbo_Ai1", "gender": "M", "homepage": "https://teacher.bupt.edu.cn/aixinbo/zh_CN/index.htm", "dblp": "", "google_scholar": "", "orcid": "", "linkedin": "", "or_profile": "~Xinbo_Ai1", "aff": "Beijing University of Posts and Telecommunications", "aff_domain": "bupt.edu.cn", "position": "Full Professor", "bibtex": "@inproceedings{\nai2024adjust,\ntitle={Adjust Pearson's \\$r\\$ to Measure Arbitrary Monotone Dependence},\nauthor={Xinbo Ai},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=8Dkz60yGfj}\n}", "github": "", "reviewers": "61Js;Z7Nv;Uy7T;4BVE", "pdf_size": 1468181, "rating": "3;5;6;7", "confidence": "3;4;5;3", "soundness": "3;2;3;3", "novelty": "2;2;3;3", "presentation": "3;3;3;4", "wc_summary": "60;58;38;114", "wc_strengths": "274;33;23;110", "wc_weaknesses": "573;85;37;1", "wc_questions": "159;29;37;30", "wc_limitations": "1;1;37;1", "wc_review": "1067;206;172;256", "wc_reply_reviewers": "639;94;19;14", "wc_reply_authors": "1301;0;0;0", "reply_reviewers": "2;1;1;1", "reply_authors": "5;1;1;1", "rating_avg": [ 5.25, 1.479019945774904 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 67.5, 28.191310718020897 ], "wc_strengths_avg": [ 110.0, 100.49129315517837 ], "wc_weaknesses_avg": [ 174.0, 232.28215600859227 ], "wc_questions_avg": [ 63.75, 55.07892065028145 ], "wc_limitations_avg": [ 10.0, 15.588457268119896 ], "wc_review_avg": [ 425.25, 371.7172144251595 ], "wc_reply_reviewers_avg": [ 191.5, 260.30030733750584 ], "wc_reply_authors_avg": [ 325.25, 563.3495251617774 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 1.7320508075688772 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.25482359571881275, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=712995592406358643&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "bupt.edu.cn", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "Beijing University of Posts and Telecommunications", "aff_unique_dep": "", "aff_unique_url": "http://www.bupt.edu.cn/", "aff_unique_abbr": "BUPT", "aff_campus_unique_index": "0", "aff_campus_unique": "Beijing", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "title": "Large Language Model Unlearning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96383", "id": "8Dy42ThoNe", "proceeding": "", "pdf": "https://openreview.net/pdf?id=8Dy42ThoNe", "openreview": "https://openreview.net/forum?id=8Dy42ThoNe", "poster": "/media/PosterPDFs/NeurIPS%202024/96383.png?t=1731658544.2183383", "project": "", "author_site": "Yuanshun Yao, Xiaojun Xu, Yang Liu", "tldr": "", "abstract": "We study how to perform unlearning, i.e. forgetting undesirable (mis)behaviors, on large language models (LLMs). We show at least three scenarios of aligning LLMs with human preferences can benefit from unlearning: (1) removing harmful responses, (2) erasing copyright-protected content as requested, and (3) reducing hallucinations. Unlearning, as an alignment technique, has three advantages. (1) It only requires negative (e.g. harmful) examples, which are much easier and cheaper to collect (e.g. via red teaming or user reporting) than positive (e.g. helpful and often human-written) examples required in the standard alignment process. (2) It is computationally efficient. (3) It is especially effective when we know which training samples cause the misbehavior. To the best of our knowledge, our work is among the first to explore LLM unlearning. We are also among the first to formulate the settings, goals, and evaluations in LLM unlearning. Despite only having negative samples, our ablation study shows that unlearning can still achieve better alignment performance than RLHF with just 2% of its computational time.", "keywords": "Large Language Model; LLM Alignment; Machine Unlearning; AI Privacy; AI Security", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/ca6121cfbb5bd79da8a56c48885e8e9616359fd5.zip", "author": "Yuanshun Yao;Xiaojun Xu;Yang Liu", "authorids": "~Yuanshun_Yao2;~Xiaojun_Xu1;~Yang_Liu3", "gender": "M;M;M", "homepage": ";http://www.yliuu.com;https://kevyao.com", "dblp": ";51/3710-18;186/1486", "google_scholar": "rdMZZQwAAAAJ;jKrIVCIAAAAJ;AG51Bv4AAAAJ", "orcid": ";0000-0001-8420-6011;", "linkedin": ";;", "or_profile": "~Xiaojun_Xu1;~Yang_Liu3;~Kevin_Yao1", "aff": "ByteDance Inc.;University of California, Santa Cruz;ByteDance Research", "aff_domain": "bytedance.com;ucsc.edu;bytedance.com", "position": "Researcher;Assistant Professor;Researcher", "bibtex": "@inproceedings{\nyao2024large,\ntitle={Large Language Model Unlearning},\nauthor={Yuanshun Yao and Xiaojun Xu and Yang Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=8Dy42ThoNe}\n}", "github": "", "reviewers": "EdQC;4XoD;acG2", "pdf_size": 479024, "rating": "5;5;7", "confidence": "5;4;3", "soundness": "2;3;3", "novelty": "2;2;3", "presentation": "2;4;4", "wc_summary": "124;104;73", "wc_strengths": "107;70;67", "wc_weaknesses": "236;165;140", "wc_questions": "10;31;30", "wc_limitations": "15;5;13", "wc_review": "492;375;323", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "rating_avg": [ 5.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.9428090415820634 ], "wc_summary_avg": [ 100.33333333333333, 20.98147330914162 ], "wc_strengths_avg": [ 81.33333333333333, 18.190351532856337 ], "wc_weaknesses_avg": [ 180.33333333333334, 40.66393433443886 ], "wc_questions_avg": [ 23.666666666666668, 9.672412085697939 ], "wc_limitations_avg": [ 11.0, 4.320493798938574 ], "wc_review_avg": [ 396.6666666666667, 70.67452786463373 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 190, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2719231857981180679&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "bytedance.com;ucsc.edu;bytedance.com", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "ByteDance;University of California, Santa Cruz", "aff_unique_dep": ";", "aff_unique_url": "https://www.bytedance.com;https://www.ucsc.edu", "aff_unique_abbr": "ByteDance;UCSC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Santa Cruz", "aff_country_unique_index": "0;1;0", "aff_country_unique": "China;United States" }, { "title": "Scale Equivariant Graph Metanetworks", "status": "Oral", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96382", "id": "8Fxqn1tZM1", "proceeding": "", "pdf": "https://openreview.net/pdf?id=8Fxqn1tZM1", "openreview": "https://openreview.net/forum?id=8Fxqn1tZM1", "poster": "", "project": "", "author_site": "Ioannis Kalogeropoulos, Giorgos Bouritsas, Yannis Panagakis", "tldr": "", "abstract": "This paper pertains to an emerging machine learning paradigm: learning higher- order functions, i.e. functions whose inputs are functions themselves, particularly when these inputs are Neural Networks (NNs). With the growing interest in architectures that process NNs, a recurring design principle has permeated the field: adhering to the permutation symmetries arising from the connectionist structure of\nNNs. However, are these the sole symmetries present in NN parameterizations? Zooming into most practical activation functions (e.g. sine, ReLU, tanh) answers this question negatively and gives rise to intriguing new symmetries, which we collectively refer to as scaling symmetries, that is, non-zero scalar multiplications and divisions of weights and biases. In this work, we propose Scale Equivariant Graph MetaNetworks - ScaleGMNs, a framework that adapts the Graph Metanetwork (message-passing) paradigm by incorporating scaling symmetries and thus rendering neuron and edge representations equivariant to valid scalings. We introduce novel building blocks, of independent technical interest, that allow for equivariance or invariance with respect to individual scalar multipliers or their product and use them in all components of ScaleGMN. Furthermore, we prove that, under certain expressivity conditions, ScaleGMN can simulate the forward and backward pass of any input feedforward neural network. Experimental results demonstrate that our method advances the state-of-the-art performance for several datasets and activation functions, highlighting the power of scaling symmetries as an inductive bias for NN processing. The source code is publicly available at https://github.com/jkalogero/scalegmn.", "keywords": "graph neural networks;weight space networks;implicit neural representations;symmetries", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Ioannis Kalogeropoulos;Giorgos Bouritsas;Yannis Panagakis", "authorids": "~Ioannis_Kalogeropoulos1;~Giorgos_Bouritsas1;~Yannis_Panagakis1", "gender": ";;", "homepage": "https://jkalogero.github.io;http://users.uoa.gr/~gbouritsas/;", "dblp": "344/5562;190/1675;", "google_scholar": "mNF0c-EAAAAJ;eNUJDXUAAAAJ;", "orcid": ";0000-0002-8476-4918;", "linkedin": ";giorgos-bouritsas;", "or_profile": "~Ioannis_Kalogeropoulos1;~Giorgos_Bouritsas1;~Yannis_Panagakis1", "aff": "National and Kapodistrian University of Athens;University of Athens;", "aff_domain": "di.uoa.gr;uoa.gr;", "position": "PhD student;Postdoc;", "bibtex": "@inproceedings{\nkalogeropoulos2024scale,\ntitle={Scale Equivariant Graph Metanetworks},\nauthor={Ioannis Kalogeropoulos and Giorgos Bouritsas and Yannis Panagakis},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=8Fxqn1tZM1}\n}", "github": "", "reviewers": "x2pG;ugM4;AXwX", "pdf_size": 1279650, "rating": "6;7;8", "confidence": "4;4;4", "soundness": "3;3;4", "novelty": "3;3;4", "presentation": "3;2;4", "wc_summary": "123;100;76", "wc_strengths": "114;52;185", "wc_weaknesses": "159;218;163", "wc_questions": "133;112;12", "wc_limitations": "2;1;6", "wc_review": "531;483;442", "wc_reply_reviewers": "42;53;28", "wc_reply_authors": "0;241;0", "reply_reviewers": "1;1;1", "reply_authors": "1;2;1", "rating_avg": [ 7.0, 0.816496580927726 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 99.66666666666667, 19.189117286165672 ], "wc_strengths_avg": [ 117.0, 54.33844556726542 ], "wc_weaknesses_avg": [ 180.0, 26.919633479426622 ], "wc_questions_avg": [ 85.66666666666667, 52.79099249766847 ], "wc_limitations_avg": [ 3.0, 2.160246899469287 ], "wc_review_avg": [ 485.3333333333333, 36.37153954521157 ], "wc_reply_reviewers_avg": [ 41.0, 10.23067283548187 ], "wc_reply_authors_avg": [ 80.33333333333333, 113.60848951063865 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5622557106113325646&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "di.uoa.gr;uoa.gr;", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "National and Kapodistrian University of Athens;University of Athens", "aff_unique_dep": ";", "aff_unique_url": "https://www.uoa.gr;https://www.uoa.gr", "aff_unique_abbr": "NKUA;UoA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Greece" }, { "title": "WeiPer: OOD Detection using Weight Perturbations of Class Projections", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96381", "id": "8HeUvbImKT", "proceeding": "", "pdf": "https://openreview.net/pdf?id=8HeUvbImKT", "openreview": "https://openreview.net/forum?id=8HeUvbImKT", "poster": "/media/PosterPDFs/NeurIPS%202024/96381.png?t=1733878921.352586", "project": "", "author_site": "Maximilian Granz, Manuel Heurich, Tim Landgraf", "tldr": "", "abstract": "Recent advances in out-of-distribution (OOD) detection on image data show that pre-trained neural network classifiers can separate in-distribution (ID) from OOD data well, leveraging the class-discriminative ability of the model itself. Methods have been proposed that either use logit information directly or that process the model's penultimate layer activations. With \"WeiPer\", we introduce perturbations of the class projections in the final fully connected layer which creates a richer representation of the input. We show that this simple trick can improve the OOD detection performance of a variety of methods and additionally propose a distance-based method that leverages the properties of the augmented WeiPer space. We achieve state-of-the-art OOD detection results across multiple benchmarks of the OpenOOD framework, especially pronounced in difficult settings in which OOD samples are positioned close to the training set distribution. We support our findings with theoretical motivations and empirical observations, and run extensive ablations to provide insights into why WeiPer works. Our code is available at: https://github.com/mgranz/weiper.", "keywords": "OOD detection;weight perturbations;KL divergence;image data", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/4c9bdf4de4f4131e77681403078fc1b700fe5b1a.zip", "author": "Maximilian Granz;Manuel Heurich;Tim Landgraf", "authorids": "~Maximilian_Granz1;~Manuel_Heurich1;~Tim_Landgraf1", "gender": "M;;", "homepage": ";;", "dblp": "255/6095;;04/10008", "google_scholar": ";;https://scholar.google.de/citations?user=ChX0opIAAAAJ", "orcid": ";0000-0003-4233-7130;0000-0003-4951-5235", "linkedin": "maximilian-granz-0ab301196/;;", "or_profile": "~Maximilian_Granz1;~Manuel_Heurich1;~Tim_Landgraf1", "aff": "Freie Universit\u00e4t Berlin;Freie Universit\u00e4t Berlin;Freie Universit\u00e4t Berlin", "aff_domain": "fu-berlin.de;fu-berlin.de;fu-berlin.de", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\ngranz2024weiper,\ntitle={WeiPer: {OOD} Detection using Weight Perturbations of Class Projections},\nauthor={Maximilian Granz and Manuel Heurich and Tim Landgraf},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=8HeUvbImKT}\n}", "github": "", "reviewers": "QiAi;ak1i;bFW6;T1ji", "pdf_size": 1410398, "rating": "5;6;7;7", "confidence": "4;3;5;2", "soundness": "3;2;3;4", "novelty": "3;2;3;3", "presentation": "2;3;3;4", "wc_summary": "87;94;83;19", "wc_strengths": "51;21;87;40", "wc_weaknesses": "67;210;27;31", "wc_questions": "21;41;31;15", "wc_limitations": "6;1;6;7", "wc_review": "232;367;234;112", "wc_reply_reviewers": "43;0;14;68", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 70.75, 30.13614938906429 ], "wc_strengths_avg": [ 49.75, 24.0351305384431 ], "wc_weaknesses_avg": [ 83.75, 74.53648435497881 ], "wc_questions_avg": [ 27.0, 9.899494936611665 ], "wc_limitations_avg": [ 5.0, 2.345207879911715 ], "wc_review_avg": [ 236.25, 90.21744565215754 ], "wc_reply_reviewers_avg": [ 31.25, 26.280934153869037 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.1348399724926484, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15195664233979424284&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "fu-berlin.de;fu-berlin.de;fu-berlin.de", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Freie Universit\u00e4t Berlin", "aff_unique_dep": "", "aff_unique_url": "https://www.fu-berlin.de", "aff_unique_abbr": "FU Berlin", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "title": "ReplaceAnything3D: Text-Guided Object Replacement in 3D Scenes with Compositional Scene Representations", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96380", "id": "8HwI6UavYc", "proceeding": "", "pdf": "https://openreview.net/pdf?id=8HwI6UavYc", "openreview": "https://openreview.net/forum?id=8HwI6UavYc", "poster": "/media/PosterPDFs/NeurIPS%202024/96380.png?t=1733404570.284235", "project": "", "author_site": "Edward Bartrum, Thu Nguyen-Phuoc, Christopher Xie, Zhengqin Li, Numair Khan, Armen Avetisyan, Douglas Lanman, Lei Xiao", "tldr": "", "abstract": "We introduce ReplaceAnything3D model RAM3D, a novel method for 3D object replacement in 3D scenes based on users' text description. Given multi-view images of a scene, a text prompt describing the object to replace, and another describing the new object, our Erase-and-Replace approach can effectively swap objects in 3D scenes with newly generated content while maintaining 3D consistency across multiple viewpoints. We demonstrate the versatility of RAM3D by applying it to various realistic 3D scene types, showcasing results of modified objects that blend in seamlessly with the scene without impacting its overall integrity.", "keywords": "3D inpainting;Text-to-3D;Diffusion;Score-based Distillation;3D scenes", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/f1df65c530b25ed4625e98fccf9b4f2042c29ca4.zip", "author": "Edward Bartrum;Thu Nguyen-Phuoc;Chris Xie;Zhengqin Li;Numair Khan;Armen Avetisyan;Douglas Lanman;Lei Xiao", "authorids": "~Edward_Bartrum1;~Thu_Nguyen-Phuoc1;~Chris_Xie1;~Zhengqin_Li1;~Numair_Khan1;~Armen_Avetisyan1;~Douglas_Lanman1;~Lei_Xiao2", "gender": "M;;;M;;;M;", "homepage": ";;;https://sites.google.com/view/zhengqinli;;;https://twitter.com/douglaslanman;https://leixiao-ubc.github.io/", "dblp": ";;;169/4895;;;46/4485;", "google_scholar": ";;;Nxc2RbQAAAAJ;;;-qncsGYAAAAJ;Qx17Zl8AAAAJ", "orcid": ";;;0000-0003-0868-2141;;;0000-0003-2447-5652;", "linkedin": "edbartrum/;;;;;;dlanman;", "or_profile": "~Edward_Bartrum1;~Thu_Nguyen-Phuoc1;~Chris_Xie1;~Zhengqin_Li1;~Numair_Khan1;~Armen_Avetisyan1;~Douglas_Lanman1;~Lei_Xiao2", "aff": "Korea Advanced Institute of Science & Technology;;;Meta Facebook;;;Meta, Reality Labs Research;Meta", "aff_domain": "kaist.ac.kr;;;fb.com;;;meta.com;meta.com", "position": "Intern;;;Researcher;;;Senior Director;Researcher", "bibtex": "@inproceedings{\nbartrum2024replaceanythingd,\ntitle={ReplaceAnything3D: Text-Guided Object Replacement in 3D Scenes with Compositional Scene Representations},\nauthor={Edward Bartrum and Thu Nguyen-Phuoc and Chris Xie and Zhengqin Li and Numair Khan and Armen Avetisyan and Douglas Lanman and Lei Xiao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=8HwI6UavYc}\n}", "github": "", "reviewers": "ZFSz;NhGj;MnXs;MzLe", "pdf_size": 47271118, "rating": "3;4;5;6", "confidence": "4;5;3;3", "soundness": "4;2;3;3", "novelty": "1;2;3;3", "presentation": "3;1;3;3", "wc_summary": "93;59;134;35", "wc_strengths": "25;17;70;75", "wc_weaknesses": "353;322;40;232", "wc_questions": "82;3;117;71", "wc_limitations": "6;11;8;1", "wc_review": "559;412;369;414", "wc_reply_reviewers": "151;2095;16;33", "wc_reply_authors": "596;1257;15;168", "reply_reviewers": "1;5;1;1", "reply_authors": "2;3;2;2", "rating_avg": [ 4.5, 1.118033988749895 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 80.25, 37.25167781456293 ], "wc_strengths_avg": [ 46.75, 25.96512083545925 ], "wc_weaknesses_avg": [ 236.75, 121.9782255158682 ], "wc_questions_avg": [ 68.25, 41.324175732856425 ], "wc_limitations_avg": [ 6.5, 3.640054944640259 ], "wc_review_avg": [ 438.5, 71.85575829396 ], "wc_reply_reviewers_avg": [ 573.75, 879.8316245168731 ], "wc_reply_authors_avg": [ 509.0, 481.50545168253285 ], "reply_reviewers_avg": [ 2.0, 1.7320508075688772 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.674199862463242, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9387296814582600783&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 2, "email": "kaist.ac.kr;;;fb.com;;;meta.com;meta.com", "author_num": 8, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://www.kaist.ac.kr;https://meta.com", "aff_unique_abbr": "KAIST;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "South Korea;United States" }, { "id": "8Hy3KMZTL5", "title": "Parameter-efficient Fine-tuning in Hyperspherical Space for Open-vocabulary Semantic Segmentation", "track": "main", "status": "Reject", "tldr": "", "abstract": "Open-vocabulary semantic segmentation seeks to label each pixel in an image with arbitrary text descriptions. Vision-language foundation models, especially CLIP, have recently emerged as powerful tools for acquiring open-vocabulary capabilities. \nHowever, fine-tuning CLIP to equip it with pixel-level prediction ability often suffers three issues: 1) high computational cost, 2) misalignment between the two inherent modalities of CLIP, and 3) degraded generalization ability on unseen categories. To address these issues, we propose \\alg, a symmetrical parameter-efficient fine-tuning (PEFT) strategy conducted in hyperspherical space for both of the two CLIP modalities. Specifically, the PEFT strategy is achieved by a series of efficient block-diagonal learnable transformation matrices and a dual cross-relation communication module among all learnable matrices. Since the PEFT strategy is conducted symmetrically to the two CLIP modalities, the misalignment between them is mitigated. Furthermore, we apply an additional constraint to PEFT on the CLIP text encoder according to the hyperspherical energy principle, i.e., minimizing hyperspherical energy during fine-tuning preserves the intrinsic structure of the original parameter space, to prevent the destruction of the generalization ability offered by the CLIP text encoder. Extensive evaluations across various benchmarks show that H-CLIP achieves new SOTA open-vocabulary semantic segmentation results while only requiring updating approximately 4\\% of the total parameters of CLIP.", "keywords": "Open-vocabulary Semantic Segmentation;Hyperspherical Energy;Partial Orthogonal Fine-tuning;Dual Cross Relation Communication", "primary_area": "machine_vision", "supplementary_material": "", "author": "Zelin Peng;Zhengqin Xu;Zhilin Zeng;Yaoming Wang;Lingxi Xie;Qi Tian;Wei Shen", "authorids": "~Zelin_Peng1;~Zhengqin_Xu1;~Zhilin_Zeng1;~Yaoming_Wang1;~Lingxi_Xie1;~Qi_Tian3;~Wei_Shen2", "gender": "M;M;;;M;M;M", "homepage": ";;;;http://lingxixie.com/;https://www.qitian1987.com/index.html;https://shenwei1231.github.io/", "dblp": ";240/7110;;;123/2869;78/1467-1.html;71/3692-2", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.hk/citations?view_op=list_works;;;EEMm7hwAAAAJ;https://scholar.google.com/citations?hl=en;Ae2kRCEAAAAJ", "orcid": ";;;;;0000-0002-7252-5047;", "linkedin": ";;;;;;", "or_profile": "~Zelin_Peng1;~Zhengqin_Xu1;~Zhilin_Zeng1;~Yaoming_Wang1;~Lingxi_Xie1;~Qi_Tian3;~Wei_Shen2", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;;;Huawei Technologies Ltd.;Huawei Technologies Ltd.;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;;;huawei.com;huawei.com;sjtu.edu.cn", "position": "PhD student;Postdoc;;;Researcher;Principal Researcher;Associate Professor", "bibtex": "@misc{\nanonymous2024parameterefficient,\ntitle={Parameter-efficient Fine-tuning in Hyperspherical Space for Open-vocabulary Semantic Segmentation},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=8Hy3KMZTL5}\n}", "github": "", "project": "", "reviewers": "MnaP;zhUR;WXoe;YGpA", "site": "https://openreview.net/forum?id=8Hy3KMZTL5", "pdf_size": 0, "rating": "5;5;5;7", "confidence": "4;3;4;3", "soundness": "3;3;3;4", "novelty": "3;3;3;4", "presentation": "3;2;3;4", "wc_summary": "102;50;72;88", "wc_strengths": "69;48;13;45", "wc_weaknesses": "110;61;430;18", "wc_questions": "63;46;2;110", "wc_limitations": "69;26;9;7", "wc_review": "413;231;526;268", "wc_reply_reviewers": "0;0;13;35", "wc_reply_authors": "0;0;23;21", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;2;2", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 78.0, 19.339079605813716 ], "wc_strengths_avg": [ 43.75, 20.017180121085985 ], "wc_weaknesses_avg": [ 154.75, 162.21494228337906 ], "wc_questions_avg": [ 55.25, 38.661188548724155 ], "wc_limitations_avg": [ 27.75, 24.933661985356263 ], "wc_review_avg": [ 359.5, 117.75928838100204 ], "wc_reply_reviewers_avg": [ 12.0, 14.300349646075091 ], "wc_reply_authors_avg": [ 11.0, 11.022703842524301 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1336634334348230362&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;1;1;0", "aff_unique_norm": "Shanghai Jiao Tong University;Huawei", "aff_unique_dep": ";Huawei Technologies", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.huawei.com", "aff_unique_abbr": "SJTU;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Distributional Successor Features Enable Zero-Shot Policy Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96379", "id": "8IysmgZte4", "proceeding": "", "pdf": "https://openreview.net/pdf?id=8IysmgZte4", "openreview": "https://openreview.net/forum?id=8IysmgZte4", "poster": "", "project": "", "author_site": "Chuning Zhu, Xinqi Wang, Tyler Han, Simon Du, Abhishek Gupta", "tldr": "", "abstract": "Intelligent agents must be generalists, capable of quickly adapting to various tasks. In reinforcement learning (RL), model-based RL learns a dynamics model of the world, in principle enabling transfer to arbitrary reward functions through planning. However, autoregressive model rollouts suffer from compounding error, making model-based RL ineffective for long-horizon problems. Successor features offer an alternative by modeling a policy's long-term state occupancy, reducing policy evaluation under new rewards to linear regression. Yet, policy optimization with successor features can be challenging. This work proposes a novel class of models, i.e., Distributional Successor Features for Zero-Shot Policy Optimization (DiSPOs), that learn a distribution of successor features of a stationary dataset's behavior policy, along with a policy that acts to realize different successor features within the dataset. By directly modeling long-term outcomes in the dataset, DiSPOs avoid compounding error while enabling a simple scheme for zero-shot policy optimization across reward functions. We present a practical instantiation of DiSPOs using diffusion models and show their efficacy as a new class of transferable models, both theoretically and empirically across various simulated robotics problems. Videos and code are available at https://weirdlabuw.github.io/dispo/.", "keywords": "Deep Reinforcement Learning;Multitask Transfer;Diffusion Models", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Chuning Zhu;Xinqi Wang;Tyler Han;Simon Shaolei Du;Abhishek Gupta", "authorids": "~Chuning_Zhu1;~Xinqi_Wang1;~Tyler_Han1;~Simon_Shaolei_Du1;~Abhishek_Gupta1", "gender": "M;M;M;M;M", "homepage": "https://homes.cs.washington.edu/~zchuning/;;https://thanandnow.github.io/;http://simonshaoleidu.com;https://homes.cs.washington.edu/~abhgupta/", "dblp": "295/9468;;;176/5602;18/6404-4", "google_scholar": ";;W-wk8X0AAAAJ;OttawxUAAAAJ;1wLVDP4AAAAJ", "orcid": ";;;;", "linkedin": "chuning-zhu-39b086167/;xinqi-wang-6a37b023a/;;;", "or_profile": "~Chuning_Zhu1;~Xinqi_Wang1;~Tyler_Han1;~Simon_Shaolei_Du1;~Abhishek_Gupta1", "aff": "University of Washington;University of Washington;University of Washington;University of Washington;University of Washington", "aff_domain": "cs.washington.edu;uw.edu;cs.washington.edu;washington.edu;uw.edu", "position": "PhD student;PhD student;PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhu2024distributional,\ntitle={Distributional Successor Features Enable Zero-Shot Policy Optimization},\nauthor={Chuning Zhu and Xinqi Wang and Tyler Han and Simon Shaolei Du and Abhishek Gupta},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=8IysmgZte4}\n}", "github": "", "reviewers": "7fQM;EMA6;tiLN;AWqu", "pdf_size": 8360667, "rating": "4;5;7;8", "confidence": "3;2;4;4", "soundness": "2;2;3;4", "novelty": "2;3;3;4", "presentation": "3;3;3;3", "wc_summary": "53;73;59;54", "wc_strengths": "80;52;86;125", "wc_weaknesses": "397;516;372;467", "wc_questions": "66;34;3;193", "wc_limitations": "115;9;1;71", "wc_review": "711;684;521;910", "wc_reply_reviewers": "571;45;177;519", "wc_reply_authors": "578;167;167;108", "reply_reviewers": "1;1;2;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 1.5811388300841898 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 59.75, 7.980444849756184 ], "wc_strengths_avg": [ 85.75, 26.042033330752037 ], "wc_weaknesses_avg": [ 438.0, 56.925389765903226 ], "wc_questions_avg": [ 74.0, 72.22534181296756 ], "wc_limitations_avg": [ 49.0, 46.75467891024384 ], "wc_review_avg": [ 706.5, 138.1566140291517 ], "wc_reply_reviewers_avg": [ 328.0, 222.72179956169535 ], "wc_reply_authors_avg": [ 255.0, 188.03324174198562 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.7627700713964739, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11730769545411932210&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 2, "email": "cs.washington.edu;uw.edu;cs.washington.edu;washington.edu;uw.edu", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of Washington", "aff_unique_dep": "", "aff_unique_url": "https://www.washington.edu", "aff_unique_abbr": "UW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "SUGARCREPE++ Dataset: Vision-Language Model Sensitivity to Semantic and Lexical Alterations", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97833", "id": "8J8w43S9kr", "proceeding": "", "pdf": "https://openreview.net/pdf?id=8J8w43S9kr", "openreview": "https://openreview.net/forum?id=8J8w43S9kr", "poster": "", "project": "", "author_site": "Sri Harsha Dumpala, Aman Jaiswal, Chandramouli Shama Sastry, Evangelos Milios, Sageev Oore, Hassan Sajjad", "tldr": "", "abstract": "Despite their remarkable successes, state-of-the-art large language models (LLMs), including vision-and-language models (VLMs) and unimodal language models (ULMs), fail to understand precise semantics. For example, semantically equivalent sentences expressed using different lexical compositions elicit diverging representations. The degree of this divergence and its impact on encoded semantics is not very well understood. In this paper, we introduce the SUGARCREPE++ dataset to analyze the sensitivity of VLMs and ULMs to lexical and semantic alterations. Each sample in SUGARCREPE++ dataset consists of an image and a corresponding triplet of captions: a pair of semantically equivalent but lexically different positive captions and one hard negative caption. This poses a 3-way semantic (in)equivalence problem to the language models. We comprehensively evaluate VLMs and ULMs that differ in architecture, pre-training objectives and datasets to benchmark the performance of SUGARCREPE++ dataset. Experimental results highlight the difficulties of VLMs in distinguishing between lexical and semantic variations, particularly to object attributes and spatial relations. Although VLMs with larger pre-training datasets, model sizes, and multiple pre-training objectives achieve better performance on SUGARCREPE++, there is a significant opportunity for improvement. We demonstrate that models excelling on compositionality datasets may not perform equally well on SUGARCREPE++. This indicates that compositionality alone might not be sufficient to fully understand semantic and lexical alterations. Given the importance of the property that the SUGARCREPE++ dataset targets, it serves as a new challenge to the vision-and-language community. Data and code is available at https://github.com/Sri-Harsha/scpp.", "keywords": "Large Language Models (LLMs);Vision-and-Language Models (VLMs);Semantic;Lexical;compositionality", "primary_area": "", "supplementary_material": "/attachment/86655687beeb4e53443d18d574ca811349e8fb01.zip", "author": "Sri Harsha Dumpala;Aman Jaiswal;Chandramouli Shama Sastry;Evangelos Milios;Sageev Oore;Hassan Sajjad", "authorids": "~Sri_Harsha_Dumpala3;~Aman_Jaiswal1;~Chandramouli_Shama_Sastry1;~Evangelos_Milios1;~Sageev_Oore1;~Hassan_Sajjad1", "gender": "M;;M;M;M;M", "homepage": ";;https://scholar.google.com/citations?user=yR5pPqAAAAAJ&hl=en;https://www.cs.dal.ca/~eem/;;https://hsajjad.github.io/", "dblp": "148/9851;;223/6317;m/EvangelosEMilios.html;67/4980;73/5938", "google_scholar": "https://scholar.google.ca/citations?user=D4KhVXoAAAAJ;;;https://scholar.google.ca/citations?hl=en;https://scholar.google.ca/citations?user=cI0dYX4AAAAJ;https://scholar.google.de/citations?user=t3BH6NkAAAAJ", "orcid": ";;;0000-0001-5549-4675;;", "linkedin": ";;;evangelos-milios-2973685/;;hassan-sajjad-154b043a/", "or_profile": "~Sri_Harsha_Dumpala3;~Aman_Jaiswal1;~Chandramouli_Shama_Sastry1;~Evangelos_Milios1;~Sageev_Oore1;~Hassan_Sajjad1", "aff": "Dalhousie University;;Vector Institute/Dalhousie University;Dalhousie University;Vector Institute;Dalhousie University", "aff_domain": "dal.ca;;dal.ca;dal.ca;vectorinstitute.ai;dal.ca", "position": "PhD student;;PhD student;Full Professor;Researcher;Associate Professor", "bibtex": "@inproceedings{\ndumpala2024sugarcrepe,\ntitle={{SUGARCREPE}++ Dataset: Vision-Language Model Sensitivity to Semantic and Lexical Alterations},\nauthor={Sri Harsha Dumpala and Aman Jaiswal and Chandramouli Shama Sastry and Evangelos Milios and Sageev Oore and Hassan Sajjad},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=8J8w43S9kr}\n}", "github": "", "reviewers": "XGk7;a1nX;DXNt;hjnT", "pdf_size": 28697856, "rating": "6;7;7;8", "confidence": "5;4;4;4", "wc_summary_and_contributions": "70;79;46;61", "wc_strengths": "68;86;5;76", "wc_improvement": "227;59;5;63", "wc_limitations": "150;10;17;5", "wc_correctness": "9;6;3;34", "wc_clarity": "16;1;3;27", "wc_relation_to_prior_work": "1;1;3;48", "wc_documentation": "8;1;9;78", "wc_additional_feedback": "1;1;1;1", "wc_review": "550;244;92;393", "wc_reply_reviewers": "367;38;58;0", "wc_reply_authors": "2002;43;89;0", "reply_reviewers": "3;1;1;0", "reply_authors": "6;3;3;2", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 64.0, 12.186057606953941 ], "wc_strengths_avg": [ 58.75, 31.68102744546016 ], "wc_improvement_avg": [ 88.5, 83.17902379807063 ], "wc_limitations_avg": [ 45.5, 60.48346881586737 ], "wc_correctness_avg": [ 13.0, 12.30853362509117 ], "wc_clarity_avg": [ 11.75, 10.520812706250407 ], "wc_relation_to_prior_work_avg": [ 13.25, 20.07952937695503 ], "wc_documentation_avg": [ 24.0, 31.32890039564108 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 319.75, 170.2856056747017 ], "wc_reply_reviewers_avg": [ 115.75, 146.54756053923245 ], "wc_reply_authors_avg": [ 533.5, 848.4228014380566 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 3.5, 1.5 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3379454159698182205&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 3, "email": "dal.ca;;dal.ca;dal.ca;vectorinstitute.ai;dal.ca", "author_num": 6, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Dalhousie University;Vector Institute", "aff_unique_dep": ";", "aff_unique_url": "https://www.dal.ca;https://vectorinstitute.ai/", "aff_unique_abbr": "Dal;Vector Institute", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Canada" }, { "title": "Near-Optimal Streaming Heavy-Tailed Statistical Estimation with Clipped SGD", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96378", "id": "8JauriwDeH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=8JauriwDeH", "openreview": "https://openreview.net/forum?id=8JauriwDeH", "poster": "", "project": "", "author_site": "Aniket Das, Dheeraj Nagaraj, Soumyabrata Pal, Arun Suggala, Prateek Varshney", "tldr": "", "abstract": "$\\newcommand{\\Tr}{\\mathsf{Tr}}$\nWe consider the problem of high-dimensional heavy-tailed statistical estimation in the streaming setting, which is much harder than the traditional batch setting due to memory constraints. We cast this problem as stochastic convex optimization with heavy tailed stochastic gradients, and prove that the widely used Clipped-SGD algorithm attains near-optimal sub-Gaussian statistical rates whenever the second moment of the stochastic gradient noise is finite. More precisely, with $T$ samples, we show that Clipped-SGD, for smooth and strongly convex objectives, achieves an error of $\\sqrt{\\frac{\\Tr(\\Sigma)+\\sqrt{\\Tr(\\Sigma)\\\\|\\Sigma\\\\|_2}\\ln(\\tfrac{\\ln(T)}{\\delta})}{T}}$ with probability $1-\\delta$, where $\\Sigma$ is the covariance of the clipped gradient. Note that the fluctuations (depending on $\\tfrac{1}{\\delta}$) are of lower order than the term $\\Tr(\\Sigma)$.\nThis improves upon the current best rate of\n$\\sqrt{\\frac{\\Tr(\\Sigma)\\ln(\\tfrac{1}{\\delta})}{T}}$ for Clipped-SGD, known \\emph{only} for smooth and strongly convex objectives. Our results also extend to smooth convex and lipschitz convex objectives. Key to our result is a novel iterative refinement strategy for martingale concentration, improving upon the PAC-Bayes approach of \\citet{catoni2018dimension}.", "keywords": "Streaming estimation;heavy tailed estimation;clipped SGD", "primary_area": "optimization", "supplementary_material": "", "author": "Aniket Das;Dheeraj Mysore Nagaraj;Soumyabrata Pal;Arun Suggala;Prateek Varshney", "authorids": "~Aniket_Das1;~Dheeraj_Mysore_Nagaraj1;~Soumyabrata_Pal1;~Arun_Suggala1;~Prateek_Varshney1", "gender": "M;M;M;M;M", "homepage": "https://aniket1998.github.io;https://dheerajmn.mit.edu;https://soumyabratap.github.io/;;https://pvarshney1729.github.io/", "dblp": "248/8281;215/5097;206/6371;164/7327;", "google_scholar": "o8Dyas0AAAAJ;0g80b7sAAAAJ;J4UxoTEAAAAJ;CKgmfDMAAAAJ;GUKPKh0AAAAJ", "orcid": ";;;;", "linkedin": ";dheeraj-m-nagaraj-01739792/;;;pvarshney1729/", "or_profile": "~Aniket_Das1;~Dheeraj_Mysore_Nagaraj1;~Soumyabrata_Pal1;~Arun_Suggala1;~Prateek_Varshney1", "aff": "Google;Google;Adobe Systems;Google;Stanford University", "aff_domain": "google.com;google.com;adobe.com;google.com;stanford.edu", "position": "Predoctoral Researcher;Research Scientist;Researcher;Researcher;MS student", "bibtex": "@inproceedings{\ndas2024nearoptimal,\ntitle={Near-Optimal Streaming Heavy-Tailed Statistical Estimation with Clipped {SGD}},\nauthor={Aniket Das and Dheeraj Mysore Nagaraj and Soumyabrata Pal and Arun Suggala and Prateek Varshney},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=8JauriwDeH}\n}", "github": "", "reviewers": "rk1f;86zv;Wj6A;cbSj", "pdf_size": 763858, "rating": "5;6;6;7", "confidence": "3;3;3;3", "soundness": "3;3;3;3", "novelty": "2;4;3;3", "presentation": "3;2;3;3", "wc_summary": "102;130;29;79", "wc_strengths": "85;261;15;67", "wc_weaknesses": "60;342;77;263", "wc_questions": "282;4;2;53", "wc_limitations": "3;15;2;10", "wc_review": "532;752;125;472", "wc_reply_reviewers": "0;45;487;23", "wc_reply_authors": "0;0;333;0", "reply_reviewers": "0;1;2;1", "reply_authors": "1;1;3;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 85.0, 37.03376837428241 ], "wc_strengths_avg": [ 107.0, 92.5526876973327 ], "wc_weaknesses_avg": [ 185.5, 120.43774325351667 ], "wc_questions_avg": [ 85.25, 115.41528278352048 ], "wc_limitations_avg": [ 7.5, 5.315072906367325 ], "wc_review_avg": [ 470.25, 224.942631575253 ], "wc_reply_reviewers_avg": [ 138.75, 201.69082155616303 ], "wc_reply_authors_avg": [ 83.25, 144.19322973010904 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6918680722770541671&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "google.com;google.com;adobe.com;google.com;stanford.edu", "author_num": 5, "aff_unique_index": "0;0;1;0;2", "aff_unique_norm": "Google;Adobe;Stanford University", "aff_unique_dep": "Google;Adobe Systems Incorporated;", "aff_unique_url": "https://www.google.com;https://www.adobe.com;https://www.stanford.edu", "aff_unique_abbr": "Google;Adobe;Stanford", "aff_campus_unique_index": "0;0;0;2", "aff_campus_unique": "Mountain View;;Stanford", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "8JmUmTgKiY", "title": "Kolmogorov\u2013Smirnov GAN", "track": "main", "status": "Reject", "tldr": "", "abstract": "We propose a novel deep generative model, the Kolmogorov-Smirnov Generative Adversarial Network (KSGAN). Unlike existing approaches, KSGAN formulates the learning process as a minimization of the Kolmogorov-Smirnov (KS) distance, generalized to handle multivariate distributions. This distance is calculated using the quantile function, which acts as the critic in the adversarial training process. We formally demonstrate that minimizing the KS distance leads to the trained approximate distribution aligning with the target distribution. We propose an efficient implementation and evaluate its effectiveness through experiments. The results show that KSGAN performs on par with existing adversarial methods, exhibiting stability during training, resistance to mode dropping and collapse, and tolerance to variations in hyperparameter settings. Additionally, we review the literature on the Generalized KS test and discuss the connections between KSGAN and existing adversarial generative models.", "keywords": "generative models;generative adversarial networks;adversarial training", "primary_area": "generative_models", "supplementary_material": "", "author": "Maciej Falkiewicz;Naoya Takeishi;Alexandros Kalousis", "authorids": "~Maciej_Falkiewicz1;~Naoya_Takeishi1;~Alexandros_Kalousis1", "gender": ";;M", "homepage": ";https://ntake.jp/;http://dmml.ch/alexandros-kalousis/", "dblp": "225/0679;143/0393;68/6004", "google_scholar": "https://scholar.google.ch/citations?user=08jtE7MAAAAJ;https://scholar.google.co.jp/citations?user=rqF9bAsAAAAJ;uVkn9UEAAAAJ", "orcid": ";0000-0003-0111-2269;", "linkedin": ";;", "or_profile": "~Maciej_Falkiewicz1;~Naoya_Takeishi1;~Alexandros_Kalousis1", "aff": "University of Geneva;The University of Tokyo;University of Applied Sciences Western Switzerland", "aff_domain": "unige.ch;u-tokyo.ac.jp;hesge.ch", "position": "PhD student;Lecturer;Full Professor", "bibtex": "@misc{\nanonymous2024kolmogorovsmirnov,\ntitle={Kolmogorov{\\textendash}Smirnov {GAN}},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=8JmUmTgKiY}\n}", "github": "", "project": "", "reviewers": "L1Da;Yfup;BXzn;dLnS", "site": "https://openreview.net/forum?id=8JmUmTgKiY", "pdf_size": 4967223, "rating": "3;4;4;6", "confidence": "4;3;4;3", "soundness": "2;3;2;3", "novelty": "1;2;1;2", "presentation": "3;3;3;3", "wc_summary": "123;62;67;67", "wc_strengths": "41;39;21;40", "wc_weaknesses": "150;115;125;158", "wc_questions": "112;98;101;5", "wc_limitations": "9;6;5;6", "wc_review": "435;320;319;276", "wc_reply_reviewers": "0;22;114;32", "wc_reply_authors": "0;0;34;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;2;1", "rating_avg": [ 4.25, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 1.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 79.75, 25.053692342646823 ], "wc_strengths_avg": [ 35.25, 8.257572282456872 ], "wc_weaknesses_avg": [ 137.0, 17.592612085759182 ], "wc_questions_avg": [ 79.0, 43.04067843331469 ], "wc_limitations_avg": [ 6.5, 1.5 ], "wc_review_avg": [ 337.5, 59.02753594721704 ], "wc_reply_reviewers_avg": [ 42.0, 43.15089802078283 ], "wc_reply_authors_avg": [ 8.5, 14.722431864335457 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.6882472016116854, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ptRgSTgK4fgJ:scholar.google.com/&scioq=Kolmogorov%E2%80%93Smirnov+GAN&hl=en&as_sdt=0,33", "gs_version_total": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Geneva;University of Tokyo;University of Applied Sciences Western Switzerland", "aff_unique_dep": ";;", "aff_unique_url": "https://www.unige.ch;https://www.u-tokyo.ac.jp;https://www.hes-so.ch/en", "aff_unique_abbr": "UNIGE;UTokyo;HES-SO", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Switzerland;Japan" }, { "title": "How does PDE order affect the convergence of PINNs?", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96377", "id": "8K6ul0hgtC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=8K6ul0hgtC", "openreview": "https://openreview.net/forum?id=8K6ul0hgtC", "poster": "", "project": "", "author_site": "Chang hoon Song, Yesom Park, Myungjoo Kang", "tldr": "", "abstract": "This paper analyzes the inverse relationship between the order of partial differential equations (PDEs) and the convergence of gradient descent in physics-informed neural networks (PINNs) with the power of ReLU activation. The integration of the PDE into a loss function endows PINNs with a distinctive feature to require computing derivatives of model up to the PDE order. Although it has been empirically observed that PINNs encounter difficulties in convergence when dealing with high-order or high-dimensional PDEs, a comprehensive theoretical understanding of this issue remains elusive. This paper offers theoretical support for this pathological behavior by demonstrating that the gradient flow converges in a lower probability when the PDE order is higher. In addition, we show that PINNs struggle to address high-dimensional problems because the influence of dimensionality on convergence is exacerbated with increasing PDE order. To address the pathology, we use the insights garnered to consider variable splitting that decomposes the high-order PDE into a system of lower-order PDEs. We prove that by reducing the differential order, the gradient flow of variable splitting is more likely to converge to the global optimum. Furthermore, we present numerical experiments in support of our theoretical claims.", "keywords": "Physics-Informed Neural Networks;Variable Splitting;Higher-order PDEs;Gradient Flow;Convergence", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "/attachment/b3ca324709276e22d98249b09eafca5a8fc6f6e7.zip", "author": "Chang hoon Song;Yesom Park;Myungjoo Kang", "authorids": "~Chang_hoon_Song1;~Yesom_Park1;~Myungjoo_Kang1", "gender": "M;F;", "homepage": ";;http://ncia.snu.ac.kr/", "dblp": ";213/0699;64/5657.html", "google_scholar": "https://scholar.google.co.kr/citations?user=DAaFII4AAAAJ;https://scholar.google.com/citations?hl=ko;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Chang_hoon_Song1;~Yesom_Park1;~Myungjoo_Kang1", "aff": "Seoul National University;Seoul National University;Seoul National University", "aff_domain": "snu.ac.kr;snu.ac.kr;snu.ac.kr", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nsong2024how,\ntitle={How does {PDE} order affect the convergence of {PINN}s?},\nauthor={Chang hoon Song and Yesom Park and Myungjoo Kang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=8K6ul0hgtC}\n}", "github": "", "reviewers": "BvZt;inw1;AeaP;M6m4", "pdf_size": 9211660, "rating": "5;5;6;6", "confidence": "2;3;3;4", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "3;3;2;2", "wc_summary": "158;40;250;61", "wc_strengths": "223;81;35;20", "wc_weaknesses": "417;182;639;93", "wc_questions": "229;2;3;45", "wc_limitations": "5;2;3;54", "wc_review": "1032;307;930;273", "wc_reply_reviewers": "49;16;12;27", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 127.25, 83.68803677945851 ], "wc_strengths_avg": [ 89.75, 80.14791014118833 ], "wc_weaknesses_avg": [ 332.75, 212.77496915755856 ], "wc_questions_avg": [ 69.75, 93.56648705599672 ], "wc_limitations_avg": [ 16.0, 21.965882636488796 ], "wc_review_avg": [ 635.5, 347.58488171955923 ], "wc_reply_reviewers_avg": [ 26.0, 14.370107863199914 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.7071067811865475, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:8lv-lbpK9uQJ:scholar.google.com/&scioq=How+does+PDE+order+affect+the+convergence+of+PINNs%3F&hl=en&as_sdt=0,44", "gs_version_total": 0, "email": "snu.ac.kr;snu.ac.kr;snu.ac.kr", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Seoul National University", "aff_unique_dep": "", "aff_unique_url": "https://www.snu.ac.kr", "aff_unique_abbr": "SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "Is Behavior Cloning All You Need? Understanding Horizon in Imitation Learning", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96376", "id": "8KPyJm4gt5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=8KPyJm4gt5", "openreview": "https://openreview.net/forum?id=8KPyJm4gt5", "poster": "", "project": "", "author_site": "Dylan J Foster, Adam Block, Dipendra Misra", "tldr": "", "abstract": "Imitation learning (IL) aims to mimic the behavior of an expert in a sequential decision making task by learning from demonstrations, and has been widely applied to robotics, autonomous driving, and autoregressive text generation. The simplest approach to IL, behavior cloning (BC) is thought to incur sample complexity with unfavorable quadratic dependence on the problem horizon, motivating a variety of different online algorithms that attain improved linear horizon dependence under stronger assumptions on the data and the learner\u2019s access to the expert. \n\nWe revisit the apparent gap between offline and online IL from a learning-theoretic perspective, with a focus on general policy classes up to and including deep neural networks. Through a new analysis of BC with the logarithmic loss, we show that it is possible to achieve horizon-independent sample complexity in offline IL whenever (i) the range of the cumulative payoffs is controlled, and (ii) an appropriate notion of supervised learning complexity for the policy class is controlled. Specializing our results to deterministic, stationary policies, we show that the gap between offline and online IL is not fundamental: (i) it is possible to achieve linear dependence on horizon in offline IL under dense rewards (matching what was previously only known to be achievable in online IL); and (ii) without further assumptions on the policy class, online IL cannot improve over offline IL with the logarithmic loss, even in benign MDPs. We complement our theoretical results with experiments on standard RL tasks and autoregressive language generation to validate the practical relevance of our findings.", "keywords": "Imitation learning;statistical learning theory;reinforcement learning theory", "primary_area": "learning_theory", "supplementary_material": "", "author": "Dylan J Foster;Adam Block;Dipendra Misra", "authorids": "~Dylan_J_Foster1;~Adam_Block1;~Dipendra_Misra1", "gender": ";;M", "homepage": "http://dylanfoster.net;https://abblock.github.io/index.html;https://dipendramisra.com/", "dblp": "167/4271;258/1018;218/6569", "google_scholar": "RqwU8xsAAAAJ;;rIoPIFsAAAAJ", "orcid": ";0000-0003-1677-2665;", "linkedin": ";;", "or_profile": "~Dylan_J_Foster1;~Adam_Block1;~Dipendra_Misra1", "aff": "Microsoft Research;Massachusetts Institute of Technology;Microsoft Research", "aff_domain": "microsoft.com;mit.edu;microsoft.com", "position": "Principal Researcher;PhD student;Researcher", "bibtex": "@inproceedings{\nfoster2024is,\ntitle={Is Behavior Cloning All You Need? Understanding Horizon in Imitation Learning},\nauthor={Dylan J Foster and Adam Block and Dipendra Misra},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=8KPyJm4gt5}\n}", "github": "", "reviewers": "DwTf;meEq;FtQL;2nRQ", "pdf_size": 939161, "rating": "4;6;7;9", "confidence": "3;4;4;3", "soundness": "2;3;3;4", "novelty": "2;3;3;4", "presentation": "3;3;3;4", "wc_summary": "38;158;46;54", "wc_strengths": "21;145;60;73", "wc_weaknesses": "96;211;53;28", "wc_questions": "1;44;195;33", "wc_limitations": "2;12;46;3", "wc_review": "158;570;400;191", "wc_reply_reviewers": "0;35;306;41", "wc_reply_authors": "33;0;412;97", "reply_reviewers": "0;1;2;1", "reply_authors": "2;1;2;2", "rating_avg": [ 6.5, 1.8027756377319946 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 74.0, 48.82622246293481 ], "wc_strengths_avg": [ 74.75, 44.846265173367556 ], "wc_weaknesses_avg": [ 97.0, 70.16765636673352 ], "wc_questions_avg": [ 68.25, 74.86446086094523 ], "wc_limitations_avg": [ 15.75, 17.893783836852393 ], "wc_review_avg": [ 329.75, 166.88675052262238 ], "wc_reply_reviewers_avg": [ 95.5, 122.5367291876195 ], "wc_reply_authors_avg": [ 135.5, 163.40211136946792 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1255014313556608436&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 4, "email": "microsoft.com;mit.edu;microsoft.com", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Microsoft;Massachusetts Institute of Technology", "aff_unique_dep": "Microsoft Research;", "aff_unique_url": "https://www.microsoft.com/en-us/research;https://web.mit.edu", "aff_unique_abbr": "MSR;MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Saliency-driven Experience Replay for Continual Learning", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96375", "id": "8KkBxzn0km", "proceeding": "", "pdf": "https://openreview.net/pdf?id=8KkBxzn0km", "openreview": "https://openreview.net/forum?id=8KkBxzn0km", "poster": "/media/PosterPDFs/NeurIPS%202024/96375.png?t=1733420858.1650703", "project": "", "author_site": "Giovanni Bellitto, Federica Proietto Salanitri, Matteo Pennisi, Matteo Boschini, Lorenzo Bonicelli, Angelo Porrello, SIMONE CALDERARA, Simone Palazzo, Concetto Spampinato", "tldr": "", "abstract": "We present Saliency-driven Experience Replay - SER - a biologically-plausible approach based on replicating human visual saliency to enhance classification models in continual learning settings. Inspired by neurophysiological evidence that the primary visual cortex does not contribute to object manifold untangling for categorization and that primordial saliency biases are still embedded in the modern brain, we propose to employ auxiliary saliency prediction features as a modulation signal to drive and stabilize the learning of a sequence of non-i.i.d. classification tasks. Experimental results confirm that SER effectively enhances the performance (in some cases up to about twenty percent points) of state-of-the-art continual learning methods, both in class-incremental and task-incremental settings. Moreover, we show that saliency-based modulation successfully encourages the learning of features that are more robust to the presence of spurious features and to adversarial attacks than baseline methods. Code is available at: https://github.com/perceivelab/SER", "keywords": "Continual Learning;Transfer Learning;Saliency Prediction", "primary_area": "online_learning", "supplementary_material": "", "author": "Giovanni Bellitto;Federica Proietto Salanitri;Matteo Pennisi;Matteo Boschini;Lorenzo Bonicelli;Angelo Porrello;Simone Calderara;Simone Palazzo;Concetto Spampinato", "authorids": "~Giovanni_Bellitto1;~Federica_Proietto_Salanitri1;~Matteo_Pennisi1;~Matteo_Boschini1;~Lorenzo_Bonicelli1;~Angelo_Porrello1;~Simone_Calderara1;~Simone_Palazzo2;~Concetto_Spampinato1", "gender": "M;F;;M;M;M;M;M;M", "homepage": ";;;https://mbosc.github.io/;https://lorenzobonicelli.net/;;;;http://www.perceivelab.com", "dblp": "276/0018;276/0328;;193/6399;299/8442;223/4466;13/422;46/11431;", "google_scholar": "5X6QO-kAAAAJ;G5rufDwAAAAJ;;https://scholar.google.it/citations?user=4GTV0XoAAAAJ;ovXU58MAAAAJ;b3-5Ys4AAAAJ;https://scholar.google.it/citations?user=CZd-WXkAAAAJ;https://scholar.google.it/citations?user=yJr6TqAAAAAJ;https://scholar.google.it/citations?user=Xc2rx8j4O7UC", "orcid": "0000-0002-1333-8348;0000-0002-6122-4249;;0000-0002-2809-813X;0000-0002-9717-5602;0000-0002-9022-8484;0000-0001-9056-1538;;", "linkedin": ";federica-proietto-salanitri-2a0a68226/;;matteo-boschini-5a0021141;;;;;", "or_profile": "~Giovanni_Bellitto1;~Federica_Proietto_Salanitri1;~Matteo_Pennisi1;~Matteo_Boschini1;~Lorenzo_Bonicelli1;~Angelo_Porrello1;~Simone_Calderara1;~Simone_Palazzo2;~Concetto_Spampinato1", "aff": "University of Catania;University of Catania;;;University of Modena and Reggio Emilia;University of Modena and Reggio Emilia, AimageLab;University of Modena and Reggio Emilia;University of Catania;University of Catania", "aff_domain": "unict.it;unict.it;;;unimore.it;unimore.it;unimore.it;unict.it;unict.it", "position": "Assistant Professor;Assistant Professor;;;PhD student;Postdoc;Full Professor;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nbellitto2024saliencydriven,\ntitle={Saliency-driven Experience Replay for Continual Learning},\nauthor={Giovanni Bellitto and Federica Proietto Salanitri and Matteo Pennisi and Matteo Boschini and Lorenzo Bonicelli and Angelo Porrello and Simone Calderara and Simone Palazzo and Concetto Spampinato},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=8KkBxzn0km}\n}", "github": "", "reviewers": "L6ph;U9b5;oLZW;H211", "pdf_size": 5626465, "rating": "5;6;7;7", "confidence": "3;4;4;4", "soundness": "3;3;2;3", "novelty": "2;3;2;3", "presentation": "3;3;3;3", "wc_summary": "86;63;111;118", "wc_strengths": "96;58;32;46", "wc_weaknesses": "148;174;211;10", "wc_questions": "117;2;49;188", "wc_limitations": "1;3;29;37", "wc_review": "448;300;432;399", "wc_reply_reviewers": "0;43;56;11", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 94.5, 21.73131381210073 ], "wc_strengths_avg": [ 58.0, 23.790754506740637 ], "wc_weaknesses_avg": [ 135.75, 75.97491362285317 ], "wc_questions_avg": [ 89.0, 70.27446193319447 ], "wc_limitations_avg": [ 17.5, 15.771810295587505 ], "wc_review_avg": [ 394.75, 57.48641143783459 ], "wc_reply_reviewers_avg": [ 27.5, 22.808989455914087 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=232848894360151050&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "unict.it;unict.it;;;unimore.it;unimore.it;unimore.it;unict.it;unict.it", "author_num": 9, "aff_unique_index": "0;0;1;1;1;0;0", "aff_unique_norm": "University of Catania;University of Modena and Reggio Emilia", "aff_unique_dep": ";", "aff_unique_url": "https://www.unict.it;https://www.unimore.it", "aff_unique_abbr": "UNICT;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "Italy" }, { "title": "Pricing and Competition for Generative AI", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96374", "id": "8LbJfEjIrT", "proceeding": "", "pdf": "https://openreview.net/pdf?id=8LbJfEjIrT", "openreview": "https://openreview.net/forum?id=8LbJfEjIrT", "poster": "", "project": "", "tldr": "", "abstract": "Compared to classical machine learning (ML) models, generative models offer a new usage paradigm where (i) a single model can be used for many different tasks out-of-the-box; (ii) users interact with this model over a series of natural language prompts; and (iii) the model is ideally evaluated on binary user satisfaction with respect to model outputs. Given these characteristics, we explore the problem of how developers of new generative AI software can release and price their technology. We first develop a comparison of two different models for a specific task with respect to user cost-effectiveness. We then model the pricing problem of generative AI software as a game between two different companies who sequentially release their models before users choose their preferred model for each task. Here, the price optimization problem becomes piecewise continuous where the companies must choose a subset of the tasks on which to be cost-effective and forgo revenue for the remaining tasks. In particular, we reveal the value of market information by showing that a company who deploys later after knowing their competitor\u2019s price can always secure cost-effectiveness on at least one task, whereas the company who is the first-to-market must price their model in a way that incentivizes higher prices from the latecomer in order to gain revenue. Most importantly, we find that if the different tasks are sufficiently similar, the first-to-market model may become cost-ineffective on all tasks regardless of how this technology is priced.", "keywords": "pricing;revenue management;model evaluation;foundation models", "primary_area": "human-AI_interaction", "supplementary_material": "", "author": "Rafid Mahmood", "authorids": "~Rafid_Mahmood1", "gender": "", "homepage": "http://rafidrm.github.io", "dblp": "164/5832", "google_scholar": "https://scholar.google.ca/citations?user=NoPweUQAAAAJ", "orcid": "", "linkedin": "", "or_profile": "~Rafid_Mahmood1", "aff": "NVIDIA", "aff_domain": "nvidia.com", "position": "Research Scientist", "bibtex": "@inproceedings{\nmahmood2024pricing,\ntitle={Pricing and Competition for Generative {AI}},\nauthor={Rafid Mahmood},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=8LbJfEjIrT}\n}", "github": "", "reviewers": "CY6H;6DrW;U4W8;aKR2;USe1", "pdf_size": 510383, "rating": "5;5;5;6;8", "confidence": "3;4;4;3;3", "soundness": "3;2;3;3;3", "novelty": "2;2;2;2;4", "presentation": "3;3;3;3;4", "wc_summary": "67;85;137;171;134", "wc_strengths": "74;12;104;44;79", "wc_weaknesses": "125;81;214;99;24", "wc_questions": "3;167;2;118;74", "wc_limitations": "3;1;1;24;15", "wc_review": "272;346;458;456;326", "wc_reply_reviewers": "13;31;311;16;49", "wc_reply_authors": "13;35;614;13;24", "reply_reviewers": "1;1;2;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 5.8, 1.16619037896906 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.8 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 118.8, 37.71683974036001 ], "wc_strengths_avg": [ 62.6, 31.68343415730056 ], "wc_weaknesses_avg": [ 108.6, 62.272305240772965 ], "wc_questions_avg": [ 72.8, 64.50240305601024 ], "wc_limitations_avg": [ 8.8, 9.21737489744233 ], "wc_review_avg": [ 371.6, 73.81490364418286 ], "wc_reply_reviewers_avg": [ 84.0, 114.21733668756245 ], "wc_reply_authors_avg": [ 139.8, 237.2403001178341 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": -0.560112033611204, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13194291958608901409&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "nvidia.com", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "NVIDIA", "aff_unique_dep": "NVIDIA Corporation", "aff_unique_url": "https://www.nvidia.com", "aff_unique_abbr": "NVIDIA", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Faster Neighborhood Attention: Reducing the O(n^2) Cost of Self Attention at the Threadblock Level", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96373", "id": "8Ofbg2KYMu", "proceeding": "", "pdf": "https://openreview.net/pdf?id=8Ofbg2KYMu", "openreview": "https://openreview.net/forum?id=8Ofbg2KYMu", "poster": "", "project": "", "author_site": "Ali Hassani, Wen-Mei Hwu, Humphrey Shi", "tldr": "", "abstract": "Neighborhood attention reduces the cost of self attention by restricting each token\u2019s attention span to its nearest neighbors. This restriction, parameterized by a window size and dilation factor, draws a spectrum of possible attention patterns between linear projection and self attention. Neighborhood attention, and more generally sliding window attention patterns, have long been bounded by infrastructure, particularly in higher-rank spaces (2-D and 3-D), calling for the development of custom kernels, which have been limited in either functionality, or performance, if not both. In this work, we aim to massively improve upon existing infrastructure by providing two new methods for implementing neighborhood attention. We first show that neighborhood attention can be represented as a batched GEMM problem, similar to standard attention, and implement it for 1-D and 2-D neighborhood attention. These kernels on average provide 895% and 272% improvement in full precision runtime compared to existing naive CUDA kernels for 1-D and 2-D neighborhood attention respectively. We find that aside from being heavily bound by memory bandwidth, certain inherent inefficiencies exist in all unfused implementations of neighborhood attention, which in most cases undo their theoretical efficiency gain. Motivated by the progress made into fused dot-product attention kernels, we developed fused neighborhood attention; an adaptation of fused dot-product attention kernels that allow fine-grained control over attention across different spatial axes. Known for reducing the quadratic time complexity of self attention to a linear complexity, neighborhood attention can now enjoy a reduced and constant memory footprint, and record-breaking half precision runtime. We observe that our fused implementation successfully circumvents some of the unavoidable inefficiencies in unfused implementations. While our unfused GEMM-based kernels only improve half precision performance compared to naive kernels by an average of 548% and 193% in 1-D and 2-D problems respectively, our fused kernels improve naive kernels by an average of 1759% and 958% in 1-D and 2-D problems respectively. These improvements translate into up to 104% improvement in inference and 39% improvement in training existing models based on neighborhood attention, and additionally extend its applicability to image and video perception, as well as other modalities. Our work is open-sourced at https://github.com/SHI-Labs/NATTEN/.", "keywords": "fused attention kernel;neighborhood attention;sliding window attention", "primary_area": "infrastructure", "supplementary_material": "", "author": "Ali Hassani;Wen-mei Hwu;Humphrey Shi", "authorids": "~Ali_Hassani1;~Wen-mei_Hwu1;~Humphrey_Shi1", "gender": "M;M;M", "homepage": "https://alihassanijr.com;https://www.ece.illinois.edu/directory/profile/w-hwu/;https://www.humphreyshi.com", "dblp": "250/9292-1;03/4630;176/5516", "google_scholar": "Ndu0dUcAAAAJ;https://scholar.google.com.tw/citations?user=ohjQPx8AAAAJ;WBvt5A8AAAAJ", "orcid": "0000-0001-6151-162X;0000-0003-2532-5349;0000-0002-2922-5663", "linkedin": "alihassanijr/;wen-mei-hwu-a510795/;humphreyshi", "or_profile": "~Ali_Hassani1;~Wen-mei_Hwu1;~Honghui_Shi1", "aff": "NVIDIA;NVIDIA;University of Illinois, Urbana Champaign", "aff_domain": "nvidia.com;nvidia.com;illinois.edu", "position": "Intern;Researcher;Adjunct Assistant Professor", "bibtex": "@inproceedings{\nhassani2024faster,\ntitle={Faster Neighborhood Attention: Reducing the O(n{\\textasciicircum}2) Cost of Self Attention at the Threadblock Level},\nauthor={Ali Hassani and Wen-mei Hwu and Humphrey Shi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=8Ofbg2KYMu}\n}", "github": "", "reviewers": "rBem;9m2o;RCPh", "pdf_size": 2553326, "rating": "5;7;7", "confidence": "4;4;3", "soundness": "3;4;4", "novelty": "2;3;3", "presentation": "3;4;3", "wc_summary": "113;104;63", "wc_strengths": "40;31;60", "wc_weaknesses": "40;50;40", "wc_questions": "122;44;55", "wc_limitations": "20;42;5", "wc_review": "335;271;223", "wc_reply_reviewers": "42;71;21", "wc_reply_authors": "897;71;10", "reply_reviewers": "2;1;1", "reply_authors": "4;2;2", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 93.33333333333333, 21.761331658599286 ], "wc_strengths_avg": [ 43.666666666666664, 12.119772641798562 ], "wc_weaknesses_avg": [ 43.333333333333336, 4.714045207910317 ], "wc_questions_avg": [ 73.66666666666667, 34.4705993887867 ], "wc_limitations_avg": [ 22.333333333333332, 15.195028426721976 ], "wc_review_avg": [ 276.3333333333333, 45.87906809089401 ], "wc_reply_reviewers_avg": [ 44.666666666666664, 20.49932248202906 ], "wc_reply_authors_avg": [ 326.0, 404.52523613078415 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 0.9428090415820634 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.49999999999999983, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16261507021394214258&as_sdt=4005&sciodt=0,6&hl=en", "gs_version_total": 3, "email": "nvidia.com;nvidia.com;illinois.edu", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "NVIDIA;University of Illinois Urbana-Champaign", "aff_unique_dep": "NVIDIA Corporation;", "aff_unique_url": "https://www.nvidia.com;https://illinois.edu", "aff_unique_abbr": "NVIDIA;UIUC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Contrasting with Symile: Simple Model-Agnostic Representation Learning for Unlimited Modalities", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96372", "id": "8PWvdaRQAu", "proceeding": "", "pdf": "https://openreview.net/pdf?id=8PWvdaRQAu", "openreview": "https://openreview.net/forum?id=8PWvdaRQAu", "poster": "/media/PosterPDFs/NeurIPS%202024/96372.png?t=1733591500.6863198", "project": "", "author_site": "Adriel Saporta, Aahlad Manas Puli, Mark Goldstein, Rajesh Ranganath", "tldr": "", "abstract": "Contrastive learning methods, such as CLIP, leverage naturally paired data\u2014for example, images and their corresponding text captions\u2014to learn general representations that transfer efficiently to downstream tasks. While such approaches are generally applied to two modalities, domains such as robotics, healthcare, and video need to support many types of data at once. We show that the pairwise application of CLIP fails to capture joint information between modalities, thereby limiting the quality of the learned representations. To address this issue, we present Symile, a simple contrastive learning approach that captures higher-order information between any number of modalities. Symile provides a flexible, architecture-agnostic objective for learning modality-specific representations. To develop Symile's objective, we derive a lower bound on total correlation, and show that Symile representations for any set of modalities form a sufficient statistic for predicting the remaining modalities. Symile outperforms pairwise CLIP, even with modalities missing in the data, on cross-modal classification and retrieval across several experiments including on an original multilingual dataset of 33M image, text and audio samples and a clinical dataset of chest X-rays, electrocardiograms, and laboratory measurements. All datasets and code used in this work are publicly available at https://github.com/rajesh-lab/symile.", "keywords": "multimodal;contrastive learning;representation learning;total correlation", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Adriel Saporta;Aahlad Manas Puli;Mark Goldstein;Rajesh Ranganath", "authorids": "~Adriel_Saporta1;~Aahlad_Manas_Puli1;~Mark_Goldstein1;~Rajesh_Ranganath2", "gender": "F;M;M;", "homepage": ";http://aahladmanas.github.io;https://cims.nyu.edu/~mg3479/;", "dblp": "295/9771;228/9272;;97/7057", "google_scholar": "cbBgjV8AAAAJ;xWmCmBQAAAAJ;https://scholar.google.fr/citations?hl=en;", "orcid": "0000-0002-8726-2278;;;", "linkedin": "adrielsaporta/;;;", "or_profile": "~Adriel_Saporta1;~Aahlad_Manas_Puli1;~Mark_Goldstein1;~Rajesh_Ranganath2", "aff": "New York University;New York University;Google;New York University", "aff_domain": "nyu.edu;nyu.edu;google.com;nyu.edu", "position": "PhD student;PhD student;Intern;Assistant Professor", "bibtex": "@inproceedings{\nsaporta2024contrasting,\ntitle={Contrasting with Symile: Simple Model-Agnostic Representation Learning for Unlimited Modalities},\nauthor={Adriel Saporta and Aahlad Manas Puli and Mark Goldstein and Rajesh Ranganath},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=8PWvdaRQAu}\n}", "github": "", "reviewers": "QTPJ;hrNA;HxmV", "pdf_size": 3650306, "rating": "6;7;7", "confidence": "4;3;2", "soundness": "2;3;2", "novelty": "2;3;3", "presentation": "2;3;3", "wc_summary": "126;98;95", "wc_strengths": "41;136;80", "wc_weaknesses": "97;133;127", "wc_questions": "235;59;5", "wc_limitations": "26;7;9", "wc_review": "525;433;316", "wc_reply_reviewers": "0;34;0", "wc_reply_authors": "0;29;0", "reply_reviewers": "0;1;0", "reply_authors": "1;2;1", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 106.33333333333333, 13.960261060914616 ], "wc_strengths_avg": [ 85.66666666666667, 38.990027214945904 ], "wc_weaknesses_avg": [ 119.0, 15.748015748023622 ], "wc_questions_avg": [ 99.66666666666667, 98.20160668520427 ], "wc_limitations_avg": [ 14.0, 8.524474568362947 ], "wc_review_avg": [ 424.6666666666667, 85.52712370288673 ], "wc_reply_reviewers_avg": [ 11.333333333333334, 16.027753706895076 ], "wc_reply_authors_avg": [ 9.666666666666666, 13.67073110293992 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11598899371124820045&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "nyu.edu;nyu.edu;google.com;nyu.edu", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "New York University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.nyu.edu;https://www.google.com", "aff_unique_abbr": "NYU;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "LexEval: A Comprehensive Chinese Legal Benchmark for Evaluating Large Language Models", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97832", "id": "8RaxRs5VDf", "proceeding": "", "pdf": "https://openreview.net/pdf?id=8RaxRs5VDf", "openreview": "https://openreview.net/forum?id=8RaxRs5VDf", "poster": "", "project": "", "author_site": "Haitao Li, You Chen, Qingyao Ai, Yueyue WU, Ruizhe Zhang, Yiqun LIU", "tldr": "", "abstract": "Large language models (LLMs) have made significant progress in natural language processing tasks and demonstrate considerable potential in the legal domain. However, legal applications demand high standards of accuracy, reliability, and fairness. Applying existing LLMs to legal systems without careful evaluation of their potential and limitations could pose significant risks in legal practice.\nTo this end, we introduce a standardized comprehensive Chinese legal benchmark LexEval.\nThis benchmark is notable in the following three aspects: (1) Ability Modeling: We propose a new taxonomy of legal cognitive abilities to organize different tasks. (2) Scale: To our knowledge, LexEval is currently the largest Chinese legal evaluation dataset, comprising 23 tasks and 14,150 questions. (3) Data: we utilize formatted existing datasets, exam datasets and newly annotated datasets by legal experts to comprehensively evaluate the various capabilities of LLMs. LexEval not only focuses on the ability of LLMs to apply fundamental legal knowledge but also dedicates efforts to examining the ethical issues involved in their application.\nWe evaluated 38 open-source and commercial LLMs and obtained some interesting findings. The experiments and findings offer valuable insights into the challenges and potential solutions for developing Chinese legal systems and LLM evaluation pipelines. The LexEval dataset and leaderboard are publicly available at https://github.com/CSHaitao/LexEval and will be continuously updated.", "keywords": "Legal Domain;Evaluation;Benchmark", "primary_area": "", "supplementary_material": "", "author": "Haitao Li;You Chen;Qingyao Ai;Yueyue WU;Ruizhe Zhang;Yiqun LIU", "authorids": "~Haitao_Li3;~You_Chen2;~Qingyao_Ai1;~Yueyue_WU1;~Ruizhe_Zhang5;~Yiqun_LIU1", "gender": "M;;Not Specified;;;M", "homepage": "https://cshaitao.github.io;https://github.com/Cruise-pp;https://qingyaoai.github.io;http://www.thuir.cn/members/1_post_wuyueyue.html;http://www.thuir.cn/members/2_stu_2019_zhangruizhe.html;http://www.thuir.cn/group/~YQLiu/", "dblp": "29/5847-6.html;;169/1808;;;49/1579", "google_scholar": "https://scholar.google.com.hk/citations?user=jj5HjKUAAAAJ;3-48zCwAAAAJ;UKqaI5IAAAAJ;;cCeo-7QAAAAJ;NJOnxh4AAAAJ", "orcid": ";;0000-0002-5030-709X;;;", "linkedin": ";;qingyao-ai-4ab8306a;;;", "or_profile": "~Haitao_Li3;~You_Chen2;~Qingyao_Ai1;~Yueyue_WU1;~Ruizhe_Zhang5;~Yiqun_LIU1", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;, Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "tinghua.edu.cn;mails.tsinghua.edu.cn;tsinghua.edu.cn;cs.tsinghua.edu.cn;mails.tsinghua.edu.cn;tsinghua.edu.cn", "position": "MS student;Undergrad student;Assistant Professor;Postdoc;PhD student;Full Professor", "bibtex": "@inproceedings{\nli2024lexeval,\ntitle={LexEval: A Comprehensive Chinese Legal Benchmark for Evaluating Large Language Models},\nauthor={Haitao Li and You Chen and Qingyao Ai and Yueyue WU and Ruizhe Zhang and Yiqun LIU},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=8RaxRs5VDf}\n}", "github": "", "reviewers": "Houv;AqtW;NYnk;tqv1", "pdf_size": 444122, "rating": "6;6;6;7", "confidence": "4;4;4;5", "wc_summary_and_contributions": "69;92;109;120", "wc_strengths": "34;5;52;60", "wc_improvement": "6;8;4;322", "wc_limitations": "1;1;1;10", "wc_correctness": "19;1;12;7", "wc_clarity": "1;1;7;15", "wc_relation_to_prior_work": "1;1;1;83", "wc_documentation": "1;1;11;5", "wc_additional_feedback": "1;1;1;1", "wc_review": "133;111;198;623", "wc_reply_reviewers": "33;0;0;15", "wc_reply_authors": "1664;739;739;1921", "reply_reviewers": "2;0;0;1", "reply_authors": "5;4;4;5", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 97.5, 19.241881404893856 ], "wc_strengths_avg": [ 37.75, 21.123150806638673 ], "wc_improvement_avg": [ 85.0, 136.83932183404008 ], "wc_limitations_avg": [ 3.25, 3.897114317029974 ], "wc_correctness_avg": [ 9.75, 6.609652033201143 ], "wc_clarity_avg": [ 6.0, 5.744562646538029 ], "wc_relation_to_prior_work_avg": [ 21.5, 35.50704155516198 ], "wc_documentation_avg": [ 4.5, 4.092676385936225 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 266.25, 208.43869002658792 ], "wc_reply_reviewers_avg": [ 12.0, 13.583077707206124 ], "wc_reply_authors_avg": [ 1265.75, 534.5294075165556 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 4.5, 0.5 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7968372114126733363&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "tinghua.edu.cn;mails.tsinghua.edu.cn;tsinghua.edu.cn;cs.tsinghua.edu.cn;mails.tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "An Autoencoder-Like Nonnegative Matrix Co-Factorization for Improved Student Cognitive Modeling", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96371", "id": "8UqyWNsnyA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=8UqyWNsnyA", "openreview": "https://openreview.net/forum?id=8UqyWNsnyA", "poster": "/media/PosterPDFs/NeurIPS%202024/96371.png?t=1731552513.6106381", "project": "", "author_site": "Shenbao Yu, Yinghui Pan, Yifeng Zeng, Prashant Doshi, Guoquan Liu, Kim-Leng Poh, Mingwei Lin", "tldr": "", "abstract": "Student cognitive modeling (SCM) is a fundamental task in intelligent education, with applications ranging from personalized learning to educational resource allocation. By exploiting students' response logs, SCM aims to predict their exercise performance as well as estimate knowledge proficiency in a subject. Data mining approaches such as matrix factorization can obtain high accuracy in predicting student performance on exercises, but the knowledge proficiency is unknown or poorly estimated. The situation is further exacerbated if only sparse interactions exist between exercises and students (or knowledge concepts). To solve this dilemma, we root monotonicity (a fundamental psychometric theory on educational assessments) in a co-factorization framework and present an autoencoder-like nonnegative matrix co-factorization (AE-NMCF), which improves the accuracy of estimating the student's knowledge proficiency via an encoder-decoder learning pipeline. The resulting estimation problem is nonconvex with nonnegative constraints. We introduce a projected gradient method based on block coordinate descent with Lipschitz constants and guarantee the method's theoretical convergence. Experiments on several real-world data sets demonstrate the efficacy of our approach in terms of both performance prediction accuracy and knowledge estimation ability, when compared with existing student cognitive models.", "keywords": "Student Cognitive Modeling;Matrix Co-Factorization;Autoencoder", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "/attachment/5f070d7b9b7780d54eb69342942cd407a615c76b.zip", "author": "Shenbao Yu;Yinghui Pan;Yifeng Zeng;Prashant Doshi;Guoquan Liu;Kim-Leng Poh;Mingwei Lin", "authorids": "~Shenbao_Yu1;~Yinghui_Pan2;~Yifeng_Zeng2;~Prashant_Doshi1;~Guoquan_Liu1;~Kim-Leng_Poh1;~Mingwei_Lin1", "gender": "M;F;;M;;M;M", "homepage": ";https://csse.szu.edu.cn/staff/panyh/;https://www.northumbria.ac.uk/about-us/our-staff/z/yifeng-zeng/;http://thinc.cs.uga.edu;;http://www.isem.nus.edu.sg/staff/poh;https://www.webofscience.com/wos/author/record/AAD-2775-2019", "dblp": "198/6469;02/9049.html;50/1401.html;d/PrashantDoshi;;;", "google_scholar": "EdM7kV0AAAAJ;;https://scholar.google.com.vn/citations?user=pa7wzD4AAAAJ;3PkyzawAAAAJ;GM5yFBkAAAAJ;;https://scholar.google.com.hk/citations?user=-270sPUAAAAJ", "orcid": "0000-0002-6824-4293;0000-0002-0320-0337;;;;;", "linkedin": ";;;;;;", "or_profile": "~Shenbao_Yu1;~Yinghui_Pan2;~Yifeng_Zeng2;~Prashant_Doshi1;~Guoquan_Liu1;~Kim-Leng_Poh1;~Mingwei_Lin1", "aff": "Fujian Normal University;Shenzhen University;Northumbria University;University of Georgia;Fudan University;;Fujian Normal University", "aff_domain": "fjnu.edu.cn;szu.edu.cn;northumbria.ac.uk;cs.uga.edu;fudan.edu.cn;;fjnu.edu.cn", "position": "Lecturer;Associate Professor;Full Professor;Full Professor;Researcher;;Full Professor", "bibtex": "@inproceedings{\nyu2024an,\ntitle={An Autoencoder-Like Nonnegative Matrix Co-Factorization for Improved Student Cognitive Modeling},\nauthor={Shenbao Yu and Yinghui Pan and Yifeng Zeng and Prashant Doshi and Guoquan Liu and Kim-Leng Poh and Mingwei Lin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=8UqyWNsnyA}\n}", "github": "", "reviewers": "Px4s;RqUb;LdEU;Qt1t", "pdf_size": 1574230, "rating": "5;6;6;7", "confidence": "3;3;3;2", "soundness": "2;3;3;3", "novelty": "2;3;3;4", "presentation": "3;3;2;3", "wc_summary": "60;148;70;50", "wc_strengths": "32;13;1142;29", "wc_weaknesses": "260;10;2;13", "wc_questions": "137;1;2;20", "wc_limitations": "1;9;1;4", "wc_review": "490;181;1217;116", "wc_reply_reviewers": "154;14;229;0", "wc_reply_authors": "257;30;41;0", "reply_reviewers": "2;1;1;0", "reply_authors": "3;2;2;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 82.0, 38.7556447501522 ], "wc_strengths_avg": [ 304.0, 483.87343386468325 ], "wc_weaknesses_avg": [ 71.25, 109.04901420920778 ], "wc_questions_avg": [ 40.0, 56.51106086422374 ], "wc_limitations_avg": [ 3.75, 3.2691742076555053 ], "wc_review_avg": [ 501.0, 436.8643954363871 ], "wc_reply_reviewers_avg": [ 99.25, 96.11288935413397 ], "wc_reply_authors_avg": [ 82.0, 102.1445054812054 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:kblnQcdmp44J:scholar.google.com/&scioq=An+Autoencoder-Like+Nonnegative+Matrix+Co-Factorization+for+Improved+Student+Cognitive+Modeling&hl=en&as_sdt=0,33", "gs_version_total": 2, "email": "fjnu.edu.cn;szu.edu.cn;northumbria.ac.uk;cs.uga.edu;fudan.edu.cn;;fjnu.edu.cn", "author_num": 7, "aff_unique_index": "0;1;2;3;4;0", "aff_unique_norm": "Fujian Normal University;Shenzhen University;Northumbria University;University of Georgia;Fudan University", "aff_unique_dep": ";;;;", "aff_unique_url": "http://www.fjnu.edu.cn;https://www.szu.edu.cn;https://www.northumbria.ac.uk;https://www.uga.edu;https://www.fudan.edu.cn", "aff_unique_abbr": "FJNU;SZU;Northumbria;UGA;Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;2;0;0", "aff_country_unique": "China;United Kingdom;United States" }, { "title": "Robust Reinforcement Learning with General Utility", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96370", "id": "8Uyfr5TcNR", "proceeding": "", "pdf": "https://openreview.net/pdf?id=8Uyfr5TcNR", "openreview": "https://openreview.net/forum?id=8Uyfr5TcNR", "poster": "/media/PosterPDFs/NeurIPS%202024/96370.png?t=1731644030.438821", "project": "", "author_site": "Ziyi Chen, Yan Wen, Zhengmian Hu, Heng Huang", "tldr": "", "abstract": "Reinforcement Learning (RL) problem with general utility is a powerful decision making framework that covers standard RL with cumulative cost, exploration problems, and demonstration learning. Existing works on RL with general utility do not consider the robustness under environmental perturbation, which is important to adapt RL system in the real-world environment that differs from the training environment. To train a robust policy, we propose a robust RL framework with general utility, which subsumes many existing RL frameworks including RL, robust RL, RL with general utility, constrained RL, robust constrained RL, pure exploration, robust entropy regularized RL, etc. Then we focus on popular convex utility functions, with which our proposed learning framework is a challenging nonconvex-nonconcave minimax optimization problem, and design a two-phase stochastic policy gradient type algorithm and obtain its sample complexity result for gradient convergence. Furthermore, for convex utility on a widely used polyhedral ambiguity set, we design an algorithm and obtain its convergence rate to a global optimal solution.", "keywords": "robust reinforcement Learning;general utility;minimax optimization", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/7cbe91af50b67685627d7396680f6728f7a9b3b4.zip", "author": "Ziyi Chen;Yan Wen;Zhengmian Hu;Heng Huang", "authorids": "~Ziyi_Chen2;~Yan_Wen2;~Zhengmian_Hu1;~Heng_Huang1", "gender": "M;M;M;M", "homepage": ";;https://www.umd.edu/;https://www.cs.umd.edu/~heng/", "dblp": "37/1439-2;;285/4945;03/281", "google_scholar": "zjSBVOIAAAAJ;;4eXiWWgAAAAJ;4OqLaDwAAAAJ", "orcid": ";0009-0002-6425-5056;0000-0003-0316-146X;", "linkedin": "ziyi-chen-84616184/;overwenyan/;;", "or_profile": "~Ziyi_Chen2;~Yan_Wen2;~Zhengmian_Hu1;~Heng_Huang1", "aff": "University of Maryland, College Park;University of Maryland, College Park;University of Maryland, College Park;Department of Computer Science, University of Maryland, College Park", "aff_domain": "umd.edu;umd.edu;umd.edu;cs.umd.edu", "position": "Postdoc;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nchen2024robust,\ntitle={Robust Reinforcement Learning with General Utility},\nauthor={Ziyi Chen and Yan Wen and Zhengmian Hu and Heng Huang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=8Uyfr5TcNR}\n}", "github": "", "reviewers": "joNT;iNNQ;7fYr;nGSn", "pdf_size": 629892, "rating": "5;6;7;7", "confidence": "3;4;2;3", "soundness": "3;3;1;3", "novelty": "3;2;2;4", "presentation": "2;2;1;3", "wc_summary": "52;48;58;111", "wc_strengths": "31;61;37;68", "wc_weaknesses": "169;20;515;92", "wc_questions": "66;126;170;50", "wc_limitations": "9;1;93;33", "wc_review": "327;256;873;354", "wc_reply_reviewers": "209;9;493;20", "wc_reply_authors": "324;621;1112;9", "reply_reviewers": "2;1;3;1", "reply_authors": "4;3;6;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 67.25, 25.508576988926684 ], "wc_strengths_avg": [ 49.25, 15.594470173750693 ], "wc_weaknesses_avg": [ 199.0, 189.89865718324603 ], "wc_questions_avg": [ 103.0, 47.94788837894741 ], "wc_limitations_avg": [ 34.0, 36.0416425818802 ], "wc_review_avg": [ 452.5, 245.40018337401463 ], "wc_reply_reviewers_avg": [ 182.75, 195.9724151507043 ], "wc_reply_authors_avg": [ 516.5, 406.24899999876925 ], "reply_reviewers_avg": [ 1.75, 0.82915619758885 ], "reply_authors_avg": [ 3.75, 1.479019945774904 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.42640143271122083, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:W5jHFnZiKNYJ:scholar.google.com/&scioq=Robust+Reinforcement+Learning+with+General+Utility&hl=en&as_sdt=0,44", "gs_version_total": 0, "email": "umd.edu;umd.edu;umd.edu;cs.umd.edu", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "University of Maryland;University of Maryland, College Park", "aff_unique_dep": ";Department of Computer Science", "aff_unique_url": "https://www/umd.edu;https://www/umd.edu", "aff_unique_abbr": "UMD;UMD", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "College Park", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "MambaAD: Exploring State Space Models for Multi-class Unsupervised Anomaly Detection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96369", "id": "8VKxTlnejE", "proceeding": "", "pdf": "https://openreview.net/pdf?id=8VKxTlnejE", "openreview": "https://openreview.net/forum?id=8VKxTlnejE", "poster": "/media/PosterPDFs/NeurIPS%202024/96369.png?t=1729084177.8173506", "project": "", "author_site": "Haoyang He, Yuhu Bai, Jiangning Zhang, Qingdong He, Hongxu Chen, Zhenye Gan, Chengjie Wang, Xiangtai Li, Guanzhong Tian, Lei Xie", "tldr": "", "abstract": "Recent advancements in anomaly detection have seen the efficacy of CNN- and transformer-based approaches. However, CNNs struggle with long-range dependencies, while transformers are burdened by quadratic computational complexity. Mamba-based models, with their superior long-range modeling and linear efficiency, have garnered substantial attention. This study pioneers the application of Mamba to multi-class unsupervised anomaly detection, presenting MambaAD, which consists of a pre-trained encoder and a Mamba decoder featuring (Locality-Enhanced State Space) LSS modules at multi-scales. The proposed LSS module, integrating parallel cascaded (Hybrid State Space) HSS blocks and multi-kernel convolutions operations, effectively captures both long-range and local information. The HSS block, utilizing (Hybrid Scanning) HS encoders, encodes feature maps into five scanning methods and eight directions, thereby strengthening global connections through the (State Space Model) SSM. The use of Hilbert scanning and eight directions significantly improves feature sequence modeling. Comprehensive experiments on six diverse anomaly detection datasets and seven metrics demonstrate state-of-the-art performance, substantiating the method's effectiveness. The code and models are available at https://lewandofskee.github.io/projects/MambaAD.", "keywords": "Mamba;State Space Models;Anomaly Detection;Unsupervised Learning", "primary_area": "machine_vision", "supplementary_material": "", "author": "Haoyang He;Yuhu Bai;Jiangning Zhang;Qingdong He;Hongxu Chen;Zhenye Gan;Chengjie Wang;Xiangtai Li;Guanzhong Tian;Lei Xie", "authorids": "~Haoyang_He1;~Yuhu_Bai1;~Jiangning_Zhang1;~Qingdong_He1;~Hongxu_Chen7;~Zhenye_Gan1;~Chengjie_Wang1;~Xiangtai_Li1;~Guanzhong_Tian1;~Lei_Xie6", "gender": "M;M;M;M;;;M;;M;M", "homepage": "https://github.com/lewandofskee;https://github.com/yuhbai;https://www.researchgate.net/profile/Jiangning_Zhang2;;https://scholar.google.com/citations?user=uFT3YfMAAAAJ&hl=zh-CN;;;;https://person.zju.edu.cn/NB21052;https://person.zju.edu.cn/leighxie", "dblp": ";;241/9593;267/1653;;;;;240/6842;", "google_scholar": "8NfQv1sAAAAJ;;https://scholar.google.com.hk/citations?user=2hA4X9wAAAAJ;gUJWww0AAAAJ;;;fqte5H4AAAAJ;;https://scholar.google.com/citations?hl=zh-CN;", "orcid": ";;;;;;0000-0003-4216-8090;;;", "linkedin": ";;;;;;;;;", "or_profile": "~Haoyang_He1;~Yuhu_Bai1;~Jiangning_Zhang1;~Qingdong_He1;~Hongxu_Chen7;~Zhenye_Gan1;~Chengjie_Wang1;~Xiangtai_Li1;~Guanzhong_Tian1;~Lei_Xie6", "aff": "Zhejiang University;Zhejiang University;Tencent Youtu Lab;Tencent Youtu Lab;Zhejiang University;;Tencent YouTu Lab;; Ningbo Innovation Center, Zhejiang University.;Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn;tencent.com;tencent.com;zju.edu.cn;;tencent.com;;zju.edu.cn;zju.edu.cn", "position": "PhD student;MS student;Principal Researcher;Researcher;MS student;;Researcher;;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nhe2024mambaad,\ntitle={Mamba{AD}: Exploring State Space Models for Multi-class Unsupervised Anomaly Detection},\nauthor={Haoyang He and Yuhu Bai and Jiangning Zhang and Qingdong He and Hongxu Chen and Zhenye Gan and Chengjie Wang and Xiangtai Li and Guanzhong Tian and Lei Xie},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=8VKxTlnejE}\n}", "github": "", "reviewers": "gwhg;FXx2;aMDx;wfnx", "pdf_size": 3926671, "rating": "5;5;7;8", "confidence": "4;5;3;5", "soundness": "3;2;3;3", "novelty": "3;1;3;3", "presentation": "3;2;3;4", "wc_summary": "33;50;87;94", "wc_strengths": "35;41;67;47", "wc_weaknesses": "80;106;64;171", "wc_questions": "28;56;84;3", "wc_limitations": "12;23;36;3", "wc_review": "188;276;338;318", "wc_reply_reviewers": "51;38;0;175", "wc_reply_authors": "504;526;38;794", "reply_reviewers": "1;1;0;1", "reply_authors": "2;4;2;2", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 66.0, 25.347583711273153 ], "wc_strengths_avg": [ 47.5, 12.031209415515965 ], "wc_weaknesses_avg": [ 105.25, 40.81283499096822 ], "wc_questions_avg": [ 42.75, 30.309858132297485 ], "wc_limitations_avg": [ 18.5, 12.338962679253067 ], "wc_review_avg": [ 280.0, 57.63679380395825 ], "wc_reply_reviewers_avg": [ 66.0, 65.66201337150727 ], "wc_reply_authors_avg": [ 465.5, 271.94254907976426 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.058025885318565944, "gs_citation": 44, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15481876724486702892&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "zju.edu.cn;zju.edu.cn;tencent.com;tencent.com;zju.edu.cn;;tencent.com;;zju.edu.cn;zju.edu.cn", "author_num": 10, "aff_unique_index": "0;0;1;1;0;1;0;0", "aff_unique_norm": "Zhejiang University;Tencent", "aff_unique_dep": ";Youtu Lab", "aff_unique_url": "https://www.zju.edu.cn;https://www.tencent.com", "aff_unique_abbr": "ZJU;Tencent", "aff_campus_unique_index": "1", "aff_campus_unique": ";Ningbo", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Neuc-MDS: Non-Euclidean Multidimensional Scaling Through Bilinear Forms", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96368", "id": "8W5ADJOKcv", "proceeding": "", "pdf": "https://openreview.net/pdf?id=8W5ADJOKcv", "openreview": "https://openreview.net/forum?id=8W5ADJOKcv", "poster": "", "project": "", "author_site": "Chengyuan Deng, Jie Gao, Kevin Lu, Feng Luo, Hongbin Sun, Cheng Xin", "tldr": "", "abstract": "We introduce \\textbf{N}on-\\textbf{Euc}lidean-\\textbf{MDS} (Neuc-MDS), which extends Multidimensional Scaling (MDS) to generate outputs that can be non-Euclidean and non-metric. The main idea is to generalize the inner product to other symmetric bilinear forms to utilize the negative eigenvalues of dissimiliarity Gram matrices. Neuc-MDS efficiently optimizes the choice of (both positive and negative) eigenvalues of the dissimilarity Gram matrix to reduce STRESS, the sum of squared pairwise error. We provide an in-depth error analysis and proofs of the optimality in minimizing lower bounds of STRESS. We demonstrate Neuc-MDS's ability to address limitations of classical MDS raised by prior research, and test it on various synthetic and real-world datasets in comparison with both linear and non-linear dimension reduction methods.", "keywords": "multidimensional scaling;dimension reduction;non-Euclidean geometry", "primary_area": "learning_theory", "supplementary_material": "", "author": "Chengyuan Deng;Jie Gao;Kevin Lu;Feng Luo;Hongbin Sun;Cheng Xin", "authorids": "~Chengyuan_Deng1;~Jie_Gao6;~Kevin_Lu4;~Feng_Luo2;~Hongbin_Sun1;~Cheng_Xin2", "gender": ";;M;M;;Not Specified", "homepage": ";https://sites.rutgers.edu/jie-gao/;https://math.rutgers.edu/people/department-directory/detail/344-department-directory/1983-lu-kevin;https://sites.math.rutgers.edu/~fluo/;https://sites.math.rutgers.edu/~hs735/;https://jackal092927.github.io/", "dblp": "246/4646;g/JieGao;;;;168/8972", "google_scholar": "QPaVr9QAAAAJ;P1CMmgEAAAAJ;;LB5YgiIAAAAJ;UsEYIHcAAAAJ;lQk90B0AAAAJ", "orcid": ";0000-0001-5083-6082;;;0000-0003-0368-7592;", "linkedin": ";;;;;", "or_profile": "~Chengyuan_Deng1;~Jie_Gao6;~Kevin_Lu4;~Feng_Luo2;~Hongbin_Sun1;~Cheng_Xin2", "aff": "Rutgers University;Rutgers University;Rutgers University;Rutgers University;Rutgers University;Rutgers University", "aff_domain": "rutgers.edu;rutgers.edu;rutgers.edu;rutgers.edu;rutgers.edu;cs.rutgers.edu", "position": "PhD student;Full Professor;PhD student;Full Professor;Assistant Professor;Postdoc", "bibtex": "@inproceedings{\ndeng2024neucmds,\ntitle={Neuc-{MDS}: Non-Euclidean Multidimensional Scaling Through Bilinear Forms},\nauthor={Chengyuan Deng and Jie Gao and Kevin Lu and Feng Luo and Hongbin Sun and Cheng Xin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=8W5ADJOKcv}\n}", "github": "", "reviewers": "3cJU;EWdH;RtpY;wzYh", "pdf_size": 2120836, "rating": "4;6;6;7", "confidence": "4;2;4;5", "soundness": "2;3;3;4", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "47;79;38;241", "wc_strengths": "22;45;149;22", "wc_weaknesses": "123;78;160;294", "wc_questions": "19;176;142;2", "wc_limitations": "1;16;2;2", "wc_review": "212;394;491;561", "wc_reply_reviewers": "0;13;8;45", "wc_reply_authors": "138;10;14;10", "reply_reviewers": "0;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 101.25, 82.11082449957496 ], "wc_strengths_avg": [ 59.5, 52.5190441649503 ], "wc_weaknesses_avg": [ 163.75, 80.61133605145122 ], "wc_questions_avg": [ 84.75, 75.45652721932014 ], "wc_limitations_avg": [ 5.25, 6.219927652312364 ], "wc_review_avg": [ 414.5, 131.0925245771093 ], "wc_reply_reviewers_avg": [ 16.5, 17.09532099727876 ], "wc_reply_authors_avg": [ 43.0, 54.872579673275794 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.15789473684210528, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2473153741489416237&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "rutgers.edu;rutgers.edu;rutgers.edu;rutgers.edu;rutgers.edu;cs.rutgers.edu", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Rutgers University", "aff_unique_dep": "", "aff_unique_url": "https://www.rutgers.edu", "aff_unique_abbr": "Rutgers", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Learnability of high-dimensional targets by two-parameter models and gradient flow", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96367", "id": "8XoWofmZkI", "proceeding": "", "pdf": "https://openreview.net/pdf?id=8XoWofmZkI", "openreview": "https://openreview.net/forum?id=8XoWofmZkI", "poster": "/media/PosterPDFs/NeurIPS%202024/96367.png?t=1733920008.6243532", "project": "", "tldr": "", "abstract": "We explore the theoretical possibility of learning $d$-dimensional targets with $W$-parameter models by gradient flow (GF) when $W30% absolute increase in attack success rates compared with GCG when generating both query-specific (38% ->68%) and universal adversarial prompts (26.68% -> 60.32%) for attacking the Llama-2-7B-Chat model on AdvBench.\nCode at: https://github.com/qizhangli/Gradient-based-Jailbreak-Attacks.", "keywords": "large language model;adversarial attack;jailbreak attack", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Qizhang Li;Yiwen Guo;Wangmeng Zuo;Hao Chen", "authorids": "~Qizhang_Li1;~Yiwen_Guo1;~Wangmeng_Zuo2;~Hao_Chen5", "gender": "M;;;", "homepage": ";;;https://www.cs.ucdavis.edu/~hchen/", "dblp": "272/9084;;;86/475-3", "google_scholar": "W5JLehEAAAAJ;;;1Aa3qxIAAAAJ", "orcid": ";;;0000-0002-4072-0710", "linkedin": ";;;", "or_profile": "~Qizhang_Li1;~Yiwen_Guo1;~Wangmeng_Zuo2;~Hao_Chen5", "aff": "Harbin Institute of Technology;;;University of California, Davis", "aff_domain": "hit.edu;;;ucdavis.edu", "position": "PhD student;;;Full Professor", "bibtex": "@inproceedings{\nli2024improved,\ntitle={Improved Generation of Adversarial Examples Against Safety-aligned {LLM}s},\nauthor={Qizhang Li and Yiwen Guo and Wangmeng Zuo and Hao Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=8hBc843g1p}\n}", "github": "", "reviewers": "heVA;qAjA;BhKe;s3ui", "pdf_size": 2013290, "rating": "4;5;5;7", "confidence": "4;4;4;4", "soundness": "2;3;3;4", "novelty": "3;4;3;4", "presentation": "2;1;3;3", "wc_summary": "84;52;59;67", "wc_strengths": "67;89;69;110", "wc_weaknesses": "336;136;53;157", "wc_questions": "35;18;165;113", "wc_limitations": "61;2;31;1", "wc_review": "583;297;377;448", "wc_reply_reviewers": "0;88;238;186", "wc_reply_authors": "442;541;325;80", "reply_reviewers": "0;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 5.25, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 65.5, 11.926860441876563 ], "wc_strengths_avg": [ 83.75, 17.426631917843448 ], "wc_weaknesses_avg": [ 170.5, 103.16128149649946 ], "wc_questions_avg": [ 82.75, 59.48266554215606 ], "wc_limitations_avg": [ 23.75, 24.651318423159438 ], "wc_review_avg": [ 426.25, 105.08895041820524 ], "wc_reply_reviewers_avg": [ 128.0, 91.44397191723465 ], "wc_reply_authors_avg": [ 347.0, 172.0712061909255 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17231361463860594161&as_sdt=2005&sciodt=0,5&hl=en&oe=ASCII", "gs_version_total": 4, "email": "hit.edu;;;ucdavis.edu", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Harbin Institute of Technology;University of California, Davis", "aff_unique_dep": ";", "aff_unique_url": "http://www.hit.edu.cn/;https://www.ucdavis.edu", "aff_unique_abbr": "HIT;UC Davis", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Harbin;Davis", "aff_country_unique_index": "0;1", "aff_country_unique": "China;United States" }, { "title": "StreamBench: Towards Benchmarking Continuous Improvement of Language Agents", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97831", "id": "8hUUy3hoS8", "proceeding": "", "pdf": "https://openreview.net/pdf?id=8hUUy3hoS8", "openreview": "https://openreview.net/forum?id=8hUUy3hoS8", "poster": "/media/PosterPDFs/NeurIPS%202024/97831.png?t=1731123456.5306334", "project": "", "author_site": "Cheng-Kuang Wu, Zhi Rui Tam, Chieh-Yen Lin, Yun-Nung (Vivian) Chen, Hung-yi Lee", "tldr": "", "abstract": "Recent works have shown that large language model (LLM) agents are able to improve themselves from experience, which is an important ability for continuous enhancement post-deployment. However, existing benchmarks primarily evaluate their innate capabilities and do not assess their ability to improve over time. To address this gap, we introduce StreamBench, a pioneering benchmark designed to evaluate the continuous improvement of LLM agents over an input-feedback sequence. StreamBench simulates an online learning environment where LLMs receive a continuous flow of feedback stream and iteratively enhance their performance. In addition, we propose several simple yet effective baselines for improving LLMs on StreamBench, and provide a comprehensive analysis to identify critical components that contribute to successful streaming strategies. Our work serves as a stepping stone towards developing effective online learning strategies for LLMs, paving the way for more adaptive AI systems in streaming scenarios.", "keywords": "large language models;benchmark;streaming;continuous improvement", "primary_area": "", "supplementary_material": "", "author": "Cheng-Kuang Wu;Zhi Rui Tam;Chieh-Yen Lin;Yun-Nung Chen;Hung-yi Lee", "authorids": "~Cheng-Kuang_Wu1;~Zhi_Rui_Tam1;~Chieh-Yen_Lin1;~Yun-Nung_Chen1;~Hung-yi_Lee2", "gender": "M;M;M;Non-Binary;F", "homepage": "https://brian-ckwu.github.io/;http://theblackcat102.github.io/;;https://speech.ee.ntu.edu.tw/~hylee/index.html;http://vivianchen.idv.tw", "dblp": "88/415;279/1685.html;123/5095;81/8056;04/9878", "google_scholar": "hc_e7rsAAAAJ;https://scholar.google.com.tw/citations?user=WVv1_h0AAAAJ;;DxLO11IAAAAJ;https://scholar.google.com.tw/citations?user=jQLg-_UAAAAJ", "orcid": "0000-0002-0740-0846;0000-0001-9968-2416;;;", "linkedin": "cheng-kuang-wu-062214219/;;;;", "or_profile": "~Cheng-Kuang_Wu1;~Zhi_Rui_Tam1;~Chieh-Yen_Lin1;~Hung-yi_Lee2;~Vivian_Chen1", "aff": "National Taiwan University;Appier;Appier Inc.;National Taiwan University;Department of Computer Science and Informational Engineering, National Taiwan University", "aff_domain": "csie.ntu.edu.tw;appier.com;appier.com;ntu.edu.tw;csie.ntu.edu.tw", "position": "MS student;Researcher;Researcher;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nwu2024streambench,\ntitle={StreamBench: Towards Benchmarking Continuous Improvement of Language Agents},\nauthor={Cheng-Kuang Wu and Zhi Rui Tam and Chieh-Yen Lin and Yun-Nung Chen and Hung-yi Lee},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=8hUUy3hoS8}\n}", "github": "", "reviewers": "XjJz;GLtH;x115", "pdf_size": 886625, "rating": "6;7;7", "confidence": "2;3;3", "wc_summary_and_contributions": "63;161;71", "wc_strengths": "5;48;70", "wc_improvement": "5;134;109", "wc_limitations": "8;10;57", "wc_correctness": "1;6;59", "wc_clarity": "1;10;30", "wc_relation_to_prior_work": "1;40;4", "wc_documentation": "1;31;50", "wc_additional_feedback": "1;1;1", "wc_review": "86;441;451", "wc_reply_reviewers": "0;26;19", "wc_reply_authors": "83;51;54", "reply_reviewers": "0;1;1", "reply_authors": "3;3;3", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 98.33333333333333, 44.43222054120435 ], "wc_strengths_avg": [ 41.0, 26.993826454703797 ], "wc_improvement_avg": [ 82.66666666666667, 55.8589493476401 ], "wc_limitations_avg": [ 25.0, 22.642143596988927 ], "wc_correctness_avg": [ 22.0, 26.242459234352765 ], "wc_clarity_avg": [ 13.666666666666666, 12.119772641798562 ], "wc_relation_to_prior_work_avg": [ 15.0, 17.72004514666935 ], "wc_documentation_avg": [ 27.333333333333332, 20.17148702720969 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 326.0, 169.75472502014978 ], "wc_reply_reviewers_avg": [ 15.0, 10.98483803552272 ], "wc_reply_authors_avg": [ 62.666666666666664, 14.429907214608907 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 3.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.9999999999999997, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15360548041823589208&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "csie.ntu.edu.tw;appier.com;appier.com;ntu.edu.tw;csie.ntu.edu.tw", "author_num": 5, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "National Taiwan University;Appier;Appier Inc.", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ntu.edu.tw;https://www.appier.com;https://www.appier.com", "aff_unique_abbr": "NTU;Appier;Appier", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Taiwan", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Evaluating alignment between humans and neural network representations in image-based learning tasks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96361", "id": "8i6px5W1Rf", "proceeding": "", "pdf": "https://openreview.net/pdf?id=8i6px5W1Rf", "openreview": "https://openreview.net/forum?id=8i6px5W1Rf", "poster": "", "project": "", "author_site": "Can Demircan, Tankred Saanum, Leonardo Pettini, Marcel Binz, Blazej Baczkowski, Christian Doeller, Mona Garvert, Eric Schulz", "tldr": "", "abstract": "Humans represent scenes and objects in rich feature spaces, carrying information that allows us to generalise about category memberships and abstract functions with few examples. What determines whether a neural network model generalises like a human? We tested how well the representations of $86$ pretrained neural network models mapped to human learning trajectories across two tasks where humans had to learn continuous relationships and categories of natural images. In these tasks, both human participants and neural networks successfully identified the relevant stimulus features within a few trials, demonstrating effective generalisation. We found that while training dataset size was a core determinant of alignment with human choices, contrastive training with multi-modal data (text and imagery) was a common feature of currently publicly available models that predicted human generalisation. Intrinsic dimensionality of representations had different effects on alignment for different model types. Lastly, we tested three sets of human-aligned representations and found no consistent improvements in predictive accuracy compared to the baselines. In conclusion, pretrained neural networks can serve to extract representations for cognitive models, as they appear to capture some fundamental aspects of cognition that are transferable across tasks. Both our paradigms and modelling approach offer a novel way to quantify alignment between neural networks and humans and extend cognitive science into more naturalistic domains.", "keywords": "human alignment;neural network representations;generalization;function learning;decision-making", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "", "author": "Can Demircan;Tankred Saanum;Leonardo Pettini;Marcel Binz;Blazej M Baczkowski;Christian F. Doeller;Mona M. Garvert;Eric Schulz", "authorids": "~Can_Demircan1;~Tankred_Saanum1;~Leonardo_Pettini2;~Marcel_Binz1;~Blazej_M_Baczkowski1;~Christian_F._Doeller1;~Mona_M._Garvert1;~Eric_Schulz1", "gender": "M;M;M;M;M;M;F;M", "homepage": ";;;;;https://www.cbs.mpg.de/employees/doeller.html;;https://cpilab.org", "dblp": ";;;212/5102;;;;124/0016", "google_scholar": "Hu7VKscAAAAJ;https://scholar.google.com/citations?hl=no;HVWwTX4AAAAJ;https://scholar.google.de/citations?user=Lvm9Q8QAAAAJ;;;https://scholar.google.com.au/citations?user=BIv6LNwAAAAJ;", "orcid": "0000-0001-6069-1761;;0009-0006-4855-5947;;0000-0001-9827-797X;0000-0003-4120-4600;0000-0002-8678-5536;", "linkedin": ";;;;;;;", "or_profile": "~Can_Demircan1;~Tankred_Saanum1;~Leonardo_Pettini2;~Marcel_Binz1;~Blazej_M_Baczkowski1;~Christian_F._Doeller1;~Mona_M._Garvert1;~Eric_Schulz1", "aff": "Helmholtz Zentrum M\u00fcnchen;Max Planck Institute for Biological Cybernetics, Max-Planck Institute;Max Planck School of Cognition;Helmholtz Zentrum M\u00fcnchen;Eberhard-Karls-Universit\u00e4t T\u00fcbingen;Max Planck Institute for Human Cognitive and Brain Sciences;Bayerische Julius-Maximilians-Universit\u00e4t W\u00fcrzburg;Max Planck Institute for Biological Cybernetics", "aff_domain": "helmholtz-munich.de;tuebingen.mpg.de;mpg.de;helmholtz-munich.de;uni-tuebingen.de;cbs.mpg.de;uni-wuerzburg.de;tuebingen.mpg.de", "position": "PhD student;PhD student;PhD student;Postdoc;Postdoc;Director;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\ndemircan2024evaluating,\ntitle={Evaluating alignment between humans and neural network representations in image-based learning tasks},\nauthor={Can Demircan and Tankred Saanum and Leonardo Pettini and Marcel Binz and Blazej M Baczkowski and Christian F. Doeller and Mona M. Garvert and Eric Schulz},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=8i6px5W1Rf}\n}", "github": "", "reviewers": "PLVt;MCn8;NCLd;pzQG", "pdf_size": 2199904, "rating": "5;6;6;7", "confidence": "3;3;4;4", "soundness": "2;3;2;4", "novelty": "3;3;3;3", "presentation": "2;3;3;3", "wc_summary": "112;77;31;122", "wc_strengths": "41;80;67;39", "wc_weaknesses": "150;230;196;63", "wc_questions": "636;4;38;166", "wc_limitations": "9;32;13;4", "wc_review": "948;423;345;394", "wc_reply_reviewers": "50;54;16;0", "wc_reply_authors": "430;20;8;25", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 85.5, 35.62653505464712 ], "wc_strengths_avg": [ 56.75, 17.383541066192468 ], "wc_weaknesses_avg": [ 159.75, 62.65929699573719 ], "wc_questions_avg": [ 211.0, 252.69942619642015 ], "wc_limitations_avg": [ 14.5, 10.594810050208546 ], "wc_review_avg": [ 527.5, 244.37113168293837 ], "wc_reply_reviewers_avg": [ 30.0, 22.759613353482084 ], "wc_reply_authors_avg": [ 120.75, 178.65242091838553 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.7071067811865476, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:j6pfR-zjY5QJ:scholar.google.com/&scioq=Evaluating+alignment+between+humans+and+neural+network+representations+in+image-based+learning+tasks&hl=en&as_sdt=0,6", "gs_version_total": 5, "email": "helmholtz-munich.de;tuebingen.mpg.de;mpg.de;helmholtz-munich.de;uni-tuebingen.de;cbs.mpg.de;uni-wuerzburg.de;tuebingen.mpg.de", "author_num": 8, "aff_unique_index": "0;1;2;0;3;4;5;1", "aff_unique_norm": "Helmholtz Zentrum M\u00fcnchen;Max Planck Institute for Biological Cybernetics;Max Planck School of Cognition;Eberhard Karls University of T\u00fcbingen;Max Planck Institute for Human Cognitive and Brain Sciences;University of W\u00fcrzburg", "aff_unique_dep": ";Biological Cybernetics;;;;", "aff_unique_url": "https://www.helmholtz-muenchen.de;https://www.biological-cybernetics.de;https://www.mps-cognition.de;https://www.uni-tuebingen.de/;https://www.mpi-cbs.de;https://www.uni-wuerzburg.de", "aff_unique_abbr": ";MPIBC;MPS Cognition;Uni T\u00fcbingen;MPI CBS;JMU", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";T\u00fcbingen;W\u00fcrzburg", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "Germany" }, { "title": "Efficient Minimum Bayes Risk Decoding using Low-Rank Matrix Completion Algorithms", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96360", "id": "8iPobEKUUA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=8iPobEKUUA", "openreview": "https://openreview.net/forum?id=8iPobEKUUA", "poster": "/media/PosterPDFs/NeurIPS%202024/96360.png?t=1731716859.059697", "project": "", "author_site": "Firas Trabelsi, David Vilar, Mara Finkelstein, Markus Freitag", "tldr": "", "abstract": "Minimum Bayes Risk (MBR) decoding is a powerful decoding strategy widely used for text generation tasks but its quadratic computational complexity limits its practical application. This paper presents a novel approach for approximating MBR decoding using matrix completion techniques, focusing on a machine translation task. We formulate MBR decoding as a matrix completion problem, where the utility metric scores between candidate hypotheses and reference translations form a low-rank matrix. First we empirically show that the scores matrices indeed have a low-rank structure. Then we exploit this by only computing a random subset of the scores and efficiently recover the missing entries in the matrix by applying the Alternating Least Squares (ALS) algorithm, thereby enabling fast approximation of the MBR decoding process. Our experimental results on machine translation tasks demonstrate that the proposed method requires 1/16 utility metric computations compared to the vanilla MBR decoding while achieving equal translation quality measured by COMET on the WMT22 dataset (en<>de, en<>ru). We also benchmark our method against other approximation methods and we show significant gains in quality.", "keywords": "Machine Translation;Minimum Bayes Risk;Natural Language Processing;Low-Rank Matrix Completion", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/6091897d1263242e845221601fdcb789213bffc1.zip", "author": "Firas Trabelsi;David Vilar;Mara Finkelstein;Markus Freitag", "authorids": "~Firas_Trabelsi1;~David_Vilar1;~Mara_Finkelstein1;~Markus_Freitag2", "gender": "M;M;F;M", "homepage": ";https://research.google/people/david-vilar/;;", "dblp": "379/6048;06/6883;354/9098;57/8503", "google_scholar": ";2cP6vV4AAAAJ;2fwViA0AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;", "linkedin": "firas-t-13a15a114/;david-vilar-torres-a6144417/;mara-finkelstein-696776104;markus-freitag-7b17b4101/", "or_profile": "~Firas_Trabelsi1;~David_Vilar1;~Mara_Finkelstein1;~Markus_Freitag2", "aff": "Google;Google;Google;Google", "aff_domain": "google.com;google.com;google.com;google.com", "position": "Researcher;Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\ntrabelsi2024efficient,\ntitle={Efficient Minimum Bayes Risk Decoding using Low-Rank Matrix Completion Algorithms},\nauthor={Firas Trabelsi and David Vilar and Mara Finkelstein and Markus Freitag},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=8iPobEKUUA}\n}", "github": "", "reviewers": "PUED;D2EM;Xkoo;GHFT", "pdf_size": 1379359, "rating": "6;6;7;7", "confidence": "3;3;5;4", "soundness": "3;3;3;3", "novelty": "3;4;3;3", "presentation": "3;2;3;3", "wc_summary": "93;61;126;107", "wc_strengths": "43;59;49;114", "wc_weaknesses": "67;215;158;131", "wc_questions": "27;353;167;125", "wc_limitations": "15;14;1;24", "wc_review": "245;702;501;501", "wc_reply_reviewers": "40;77;39;15", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 96.75, 23.731571797923543 ], "wc_strengths_avg": [ 66.25, 28.154706533721853 ], "wc_weaknesses_avg": [ 142.75, 53.218300423820374 ], "wc_questions_avg": [ 168.0, 118.27510304370908 ], "wc_limitations_avg": [ 13.5, 8.200609733428363 ], "wc_review_avg": [ 487.25, 162.15790915030942 ], "wc_reply_reviewers_avg": [ 42.75, 22.16275028059469 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.9045340337332909, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16215608470883966250&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "google.com;google.com;google.com;google.com", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Autoformalize Mathematical Statements by Symbolic Equivalence and Semantic Consistency", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96359", "id": "8ihVBYpMV4", "proceeding": "", "pdf": "https://openreview.net/pdf?id=8ihVBYpMV4", "openreview": "https://openreview.net/forum?id=8ihVBYpMV4", "poster": "", "project": "", "author_site": "Zenan Li, Yifan Wu, Zhaoyu Li, Xinming Wei, Xian Zhang, Fan Yang, Xiaoxing Ma", "tldr": "", "abstract": "Autoformalization, the task of automatically translating natural language descriptions into a formal language, poses a significant challenge across various domains, especially in mathematics. Recent advancements in large language models (LLMs) have unveiled their promising capabilities to formalize even competition-level math problems. However, we observe a considerable discrepancy between pass@1 and pass@k accuracies in LLM-generated formalizations. To address this gap, we introduce a novel framework that scores and selects the best result from k autoformalization candidates based on two complementary self-consistency methods: symbolic equivalence and semantic consistency. Elaborately, symbolic equivalence identifies the logical homogeneity among autoformalization candidates using automated theorem provers, and semantic consistency evaluates the preservation of the original meaning by informalizing the candidates and computing the similarity between the embeddings of the original and informalized texts. \nOur extensive experiments on the MATH and miniF2F datasets demonstrate that our approach significantly enhances autoformalization accuracy, achieving up to 0.22-1.35x relative improvements across various LLMs and baseline methods.", "keywords": "Automated theorem proving;Autoformalization;Large language model;Self-consistency", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "/attachment/57a12047ba2694605b6dbfeb9d7c5df6ddafb1a8.zip", "author": "Zenan Li;Yifan Wu;Zhaoyu Li;Xinming Wei;Xian Zhang;Fan Yang;Xiaoxing Ma", "authorids": "~Zenan_Li3;~Yifan_Wu11;~Zhaoyu_Li3;~Xinming_Wei1;~Xian_Zhang4;~Fan_Yang28;~Xiaoxing_Ma1", "gender": "M;M;M;M;M;M;", "homepage": "https://lizn-zn.github.io/;;https://www.zhaoyu-li.com/;;https://www.microsoft.com/en-us/research/people/zhxian/;https://fanyangcs.github.io/;", "dblp": "242/2285;;;;;29/3081-24.html;", "google_scholar": "eu4eqTcAAAAJ;;;n1JyTKsAAAAJ;;https://scholar.google.com/citations?hl=en;", "orcid": ";;;;;0000-0002-0378-060X;", "linkedin": ";%E4%B8%80%E5%87%A1-%E5%90%B4-898450250/;zhaoyu-li-9171892a5/;;;;", "or_profile": "~Zenan_Li3;~Yifan_Wu11;~Zhaoyu_Li3;~Xinming_Wei1;~Xian_Zhang4;~Fan_Yang28;~Xiaoxing_Ma1", "aff": "Microsoft Research;Peking University;University of Toronto;Peking University;Microsoft;Microsoft Research;", "aff_domain": "research.microsoft.com;stu.pku.edu.cn;cs.toronto.edu;pku.edu.cn;microsoft.com;research.microsoft.com;", "position": "Intern;MS student;PhD student;PhD student;Researcher;Senior Principal Researcher;", "bibtex": "@inproceedings{\nli2024autoformalize,\ntitle={Autoformalize Mathematical Statements by Symbolic Equivalence and Semantic Consistency},\nauthor={Zenan Li and Yifan Wu and Zhaoyu Li and Xinming Wei and Xian Zhang and Fan Yang and Xiaoxing Ma},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=8ihVBYpMV4}\n}", "github": "", "reviewers": "GLbW;LZ2a;37Eh;zk1U", "pdf_size": 712182, "rating": "5;6;7;8", "confidence": "4;4;2;4", "soundness": "2;3;3;4", "novelty": "2;3;3;4", "presentation": "4;3;3;4", "wc_summary": "97;138;125;38", "wc_strengths": "58;111;122;46", "wc_weaknesses": "50;101;55;61", "wc_questions": "130;220;137;18", "wc_limitations": "11;20;6;49", "wc_review": "346;590;445;212", "wc_reply_reviewers": "21;164;22;14", "wc_reply_authors": "0;652;0;0", "reply_reviewers": "1;2;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 99.5, 38.47401720642127 ], "wc_strengths_avg": [ 84.25, 32.7595405950694 ], "wc_weaknesses_avg": [ 66.75, 20.154093876927337 ], "wc_questions_avg": [ 126.25, 71.82748429396646 ], "wc_limitations_avg": [ 21.5, 16.650825805346713 ], "wc_review_avg": [ 398.25, 138.17810065274455 ], "wc_reply_reviewers_avg": [ 55.25, 62.86244904551524 ], "wc_reply_authors_avg": [ 163.0, 282.324281633727 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.2581988897471611, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3115338742117883899&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 3, "email": "research.microsoft.com;stu.pku.edu.cn;cs.toronto.edu;pku.edu.cn;microsoft.com;research.microsoft.com;", "author_num": 7, "aff_unique_index": "0;1;2;1;0;0", "aff_unique_norm": "Microsoft;Peking University;University of Toronto", "aff_unique_dep": "Microsoft Research;;", "aff_unique_url": "https://www.microsoft.com/en-us/research;http://www.pku.edu.cn;https://www.utoronto.ca", "aff_unique_abbr": "MSR;Peking U;U of T", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;1;0;0", "aff_country_unique": "United States;China;Canada" }, { "title": "BricksRL: A Platform for Democratizing Robotics and Reinforcement Learning Research and Education with LEGO", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96358", "id": "8iytZCnXIu", "proceeding": "", "pdf": "https://openreview.net/pdf?id=8iytZCnXIu", "openreview": "https://openreview.net/forum?id=8iytZCnXIu", "poster": "/media/PosterPDFs/NeurIPS%202024/96358.png?t=1731077899.563504", "project": "", "author_site": "Sebastian Dittert, Vincent Moens, Gianni De Fabritiis", "tldr": "", "abstract": "We present BricksRL, a platform designed to democratize access to robotics for reinforcement learning research and education. BricksRL facilitates the creation, design, and training of custom LEGO robots in the real world by interfacing them with the TorchRL library for reinforcement learning agents. The integration of TorchRL with the LEGO hubs, via Bluetooth bidirectional communication, enables state-of-the-art reinforcement learning training on GPUs for a wide variety of LEGO builds. This offers a flexible and cost-efficient approach for scaling and also provides a robust infrastructure for robot-environment-algorithm communication. We present various experiments across tasks and robot configurations, providing built plans and training results. Furthermore, we demonstrate that inexpensive LEGO robots can be trained end-to-end in the real world to achieve simple tasks, with training times typically under 120 minutes on a normal laptop. Moreover, we show how users can extend the capabilities, exemplified by the successful integration of non-LEGO sensors. By enhancing accessibility to both robotics and reinforcement learning, BricksRL establishes a strong foundation for democratized robotic learning in research and educational settings.", "keywords": "Robotics;Reinforcement learning", "primary_area": "robotics", "supplementary_material": "", "author": "Sebastian Dittert;Vincent Moens;Gianni De Fabritiis", "authorids": "~Sebastian_Dittert1;~Vincent_Moens3;~Gianni_De_Fabritiis1", "gender": "M;M;M", "homepage": ";https://github.com/vmoens;https://www.compscience.org", "dblp": ";220/5625;29/605", "google_scholar": ";8l-tvFoAAAAJ;-_kX4kMAAAAJ", "orcid": ";;", "linkedin": "sebastian-dittert/;vincent-moens-9bb91972/;gdefabritiis/", "or_profile": "~Sebastian_Dittert1;~Vincent_Moens3;~Gianni_De_Fabritiis1", "aff": "Universitat Pompeu Fabra;Meta;Universitat Pompeu Fabra", "aff_domain": "upf.es;fb.com;upf.edu", "position": "PhD student;Applied ML Scientist;Full Professor", "bibtex": "@inproceedings{\ndittert2024bricksrl,\ntitle={Bricks{RL}: A Platform for Democratizing Robotics and Reinforcement Learning Research and Education with {LEGO}},\nauthor={Sebastian Dittert and Vincent Moens and Gianni De Fabritiis},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=8iytZCnXIu}\n}", "github": "", "reviewers": "R3sL;tHkr;6ArU;Tpu8", "pdf_size": 1549329, "rating": "5;6;6;7", "confidence": "3;5;4;4", "soundness": "3;3;3;3", "novelty": "3;4;3;3", "presentation": "2;2;3;2", "wc_summary": "60;60;64;110", "wc_strengths": "39;230;86;117", "wc_weaknesses": "55;211;89;96", "wc_questions": "37;63;61;42", "wc_limitations": "12;40;40;7", "wc_review": "203;604;340;372", "wc_reply_reviewers": "80;34;0;30", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 73.5, 21.13646138784825 ], "wc_strengths_avg": [ 118.0, 70.37400088100719 ], "wc_weaknesses_avg": [ 112.75, 58.80635594899585 ], "wc_questions_avg": [ 50.75, 11.409973707244026 ], "wc_limitations_avg": [ 24.75, 15.35211711784404 ], "wc_review_avg": [ 379.75, 144.19496350427778 ], "wc_reply_reviewers_avg": [ 36.0, 28.600699292150182 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2735960950902492797&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 4, "email": "upf.es;fb.com;upf.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Universitat Pompeu Fabra;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://www.upf.edu/;https://meta.com", "aff_unique_abbr": "UPF;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Spain;United States" }, { "title": "Efficient Adversarial Training in LLMs with Continuous Attacks", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96357", "id": "8jB6sGqvgQ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=8jB6sGqvgQ", "openreview": "https://openreview.net/forum?id=8jB6sGqvgQ", "poster": "", "project": "", "author_site": "Sophie Xhonneux, Alessandro Sordoni, Stephan G\u00fcnnemann, Gauthier Gidel, Leo Schwinn", "tldr": "", "abstract": "Large language models (LLMs) are vulnerable to adversarial attacks that can bypass their safety guardrails. In many domains, adversarial training has proven to be one of the most promising methods to reliably improve robustness against such attacks. Yet, in the context of LLMs, current methods for adversarial training are hindered by the high computational costs required to perform discrete adversarial attacks at each training iteration. We address this problem by instead calculating adversarial attacks in the continuous embedding space of the LLM, which is orders of magnitudes more efficient. We propose a fast adversarial training algorithm (C-AdvUL) composed of two losses: the first makes the model robust on continuous embedding attacks computed on an adversarial behaviour dataset; the second ensures the usefulness of the final model by fine-tuning on utility data. Moreover, we introduce C-AdvIPO, an adversarial variant of IPO that does not require utility data for adversarially robust alignment. Our empirical evaluation on five models from different families (Gemma, Phi3, Mistral, Zephyr, Llama2) and at different scales (2B, 3.8B, 7B) shows that both algorithms substantially enhance LLM robustness against discrete attacks (GCG, AutoDAN, PAIR), while maintaining utility. Our results demonstrate that robustness to continuous perturbations can extrapolate to discrete threat models. Thereby, we present a path toward scalable adversarial training algorithms for robustly aligning LLMs.", "keywords": "Large Language Models;Adversarial Training;Robustness", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/afe7b91e1ef2a982345d82e5006b8d6089b5fbd3.zip", "author": "Sophie Xhonneux;Alessandro Sordoni;Stephan G\u00fcnnemann;Gauthier Gidel;Leo Schwinn", "authorids": "~Sophie_Xhonneux1;~Alessandro_Sordoni2;~Stephan_G\u00fcnnemann1;~Gauthier_Gidel1;~Leo_Schwinn1", "gender": "M;M;M;M;", "homepage": "http://www.daml.in.tum.de;https://gauthiergidel.github.io/;;;", "dblp": "43/3011;188/6326;259/2852;57/7642;255/5495", "google_scholar": ";https://scholar.google.fr/citations?user=bDrXQPUAAAAJ;PXT4k9cAAAAJ;;", "orcid": ";;0000-0003-3967-2202;;0000-0003-1990-4475", "linkedin": ";;leo-schwinn-34a61623b/;;", "or_profile": "~Stephan_G\u00fcnnemann1;~Gauthier_Gidel1;~Leo_Schwinn1;~Alessandro_Sordoni1;~Louis-Pascal_A._C._Xhonneux1", "aff": "Technical University Munich;Mila - Quebec Artificial Intelligence Institute;Technical University of Munich;Microsoft;Montreal Institute for Learning Algorithms, University of Montreal, University of Montreal", "aff_domain": "tum.de;mila.quebec;tum.de;microsoft.com;mila.umontreal.ca", "position": "Professor;Assistant Professor;Postdoc;Researcher;PhD student", "bibtex": "@inproceedings{\nxhonneux2024efficient,\ntitle={Efficient Adversarial Training in {LLM}s with Continuous Attacks},\nauthor={Sophie Xhonneux and Alessandro Sordoni and Stephan G{\\\"u}nnemann and Gauthier Gidel and Leo Schwinn},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=8jB6sGqvgQ}\n}", "github": "", "reviewers": "Z2aE;TJPv;Jk9f", "pdf_size": 1826903, "rating": "5;5;7", "confidence": "3;3;4", "soundness": "2;2;3", "novelty": "3;3;3", "presentation": "3;2;3", "wc_summary": "64;46;108", "wc_strengths": "22;46;69", "wc_weaknesses": "258;171;245", "wc_questions": "2;142;20", "wc_limitations": "8;14;3", "wc_review": "354;419;445", "wc_reply_reviewers": "101;57;89", "wc_reply_authors": "262;368;48", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 5.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 72.66666666666667, 26.042699979499478 ], "wc_strengths_avg": [ 45.666666666666664, 19.189117286165672 ], "wc_weaknesses_avg": [ 224.66666666666666, 38.31738798799081 ], "wc_questions_avg": [ 54.666666666666664, 62.18967402676714 ], "wc_limitations_avg": [ 8.333333333333334, 4.4969125210773475 ], "wc_review_avg": [ 406.0, 38.27096375408734 ], "wc_reply_reviewers_avg": [ 82.33333333333333, 18.571184369578827 ], "wc_reply_authors_avg": [ 226.0, 133.09645625134678 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.9999999999999998, "gs_citation": 38, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6237809154394444153&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 4, "email": "tum.de;mila.quebec;tum.de;microsoft.com;mila.umontreal.ca", "author_num": 5, "aff_unique_index": "0;1;0;2;3", "aff_unique_norm": "Technical University of Munich;Quebec Artificial Intelligence Institute;Microsoft;University of Montreal", "aff_unique_dep": ";Artificial Intelligence;Microsoft Corporation;Montreal Institute for Learning Algorithms", "aff_unique_url": "https://www.tum.de;https://mila.quebec;https://www.microsoft.com;https://www.umontreal.ca", "aff_unique_abbr": "TUM;Mila;Microsoft;UM", "aff_campus_unique_index": "1", "aff_campus_unique": ";Montreal", "aff_country_unique_index": "0;1;0;2;1", "aff_country_unique": "Germany;Canada;United States" }, { "title": "Fast Channel Simulation via Error-Correcting Codes", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96356", "id": "8jpSenKvoS", "proceeding": "", "pdf": "https://openreview.net/pdf?id=8jpSenKvoS", "openreview": "https://openreview.net/forum?id=8jpSenKvoS", "poster": "/media/PosterPDFs/NeurIPS%202024/96356.png?t=1731731566.859089", "project": "", "author_site": "Sharang Sriramu, Rochelle Barsz, Elizabeth Polito, Aaron Wagner", "tldr": "", "abstract": "We consider the design of practically-implementable schemes for the task of channel simulation. Existing methods do not scale with the \nnumber of simultaneous uses of the channel and are therefore unable to harness the amortization gains associated with simulating many uses of the channel at once. We show how techniques from the theory of error-correcting codes can be applied to achieve scalability and hence improved performance. As an exemplar, we focus on how polar codes can be used to efficiently simulate i.i.d. copies of a class of binary-output channels.", "keywords": "channel simulation;polar codes;linear codes;Error-correcting codes;quantization;data compression;DNN-based data compression", "primary_area": "probabilistic_methods", "supplementary_material": "/attachment/cbd5c1b5b662a5dc9acc0375da67ded564115f62.zip", "author": "Sharang M. Sriramu;Rochelle Barsz;Elizabeth Polito;Aaron B. Wagner", "authorids": "~Sharang_M._Sriramu1;~Rochelle_Barsz1;~Elizabeth_Polito1;~Aaron_B._Wagner1", "gender": "M;F;F;", "homepage": ";;;http://people.ece.cornell.edu/wagner", "dblp": ";;;", "google_scholar": ";;;", "orcid": "0000-0002-2700-5421;;;0000-0001-9127-0089", "linkedin": ";rochelle-barsz/;elizabeth-polito/;", "or_profile": "~Sharang_M._Sriramu1;~Rochelle_Barsz1;~Elizabeth_Polito1;~Aaron_Wagner1", "aff": "Cornell University;Cornell University;Cornell University;Cornell University", "aff_domain": "cornell.edu;cornell.edu;cornell.edu;cornell.edu", "position": "PhD student;MS student;Undergrad student;Full Professor", "bibtex": "@inproceedings{\nsriramu2024fast,\ntitle={Fast Channel Simulation via Error-Correcting Codes},\nauthor={Sharang M. Sriramu and Rochelle Barsz and Elizabeth Polito and Aaron B. Wagner},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=8jpSenKvoS}\n}", "github": "", "reviewers": "Y8UR;6bFv;w9ei;y5AJ", "pdf_size": 3194095, "rating": "4;4;7;8", "confidence": "4;2;4;1", "soundness": "2;3;3;4", "novelty": "2;3;3;3", "presentation": "1;2;2;4", "wc_summary": "113;33;72;70", "wc_strengths": "20;14;110;53", "wc_weaknesses": "449;70;571;156", "wc_questions": "102;14;169;23", "wc_limitations": "21;1;48;10", "wc_review": "705;132;970;312", "wc_reply_reviewers": "855;42;19;6", "wc_reply_authors": "992;327;0;0", "reply_reviewers": "3;1;1;1", "reply_authors": "3;2;1;1", "rating_avg": [ 5.75, 1.7853571071357126 ], "confidence_avg": [ 2.75, 1.299038105676658 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 1.0897247358851685 ], "wc_summary_avg": [ 72.0, 28.310775333784132 ], "wc_strengths_avg": [ 49.25, 38.08789177678386 ], "wc_weaknesses_avg": [ 311.5, 205.39535048291623 ], "wc_questions_avg": [ 77.0, 63.194145298437256 ], "wc_limitations_avg": [ 20.0, 17.649362594722792 ], "wc_review_avg": [ 529.75, 327.9301564357874 ], "wc_reply_reviewers_avg": [ 230.5, 360.78560115392634 ], "wc_reply_authors_avg": [ 329.75, 404.9854164040972 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.3503292361635921, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4061581385437912675&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": "cornell.edu;cornell.edu;cornell.edu;cornell.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Cornell University", "aff_unique_dep": "", "aff_unique_url": "https://www.cornell.edu", "aff_unique_abbr": "Cornell", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Efficient Sketches for Training Data Attribution and Studying the Loss Landscape", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96355", "id": "8jyCRGXOr5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=8jyCRGXOr5", "openreview": "https://openreview.net/forum?id=8jyCRGXOr5", "poster": "/media/PosterPDFs/NeurIPS%202024/96355.png?t=1731598227.252734", "project": "", "tldr": "", "abstract": "The study of modern machine learning models often necessitates storing vast quantities of gradients or Hessian vector products (HVPs). Traditional sketching methods struggle to scale under these memory constraints. We present a novel framework for scalable gradient and HVP sketching, tailored for modern hardware. We provide theoretical guarantees and demonstrate the power of our methods in applications like training data attribution, Hessian spectrum analysis, and intrinsic dimension computation for pre-trained language models. Our work sheds new light on the behavior of pre-trained language models, challenging assumptions about their intrinsic dimensionality and Hessian properties.", "keywords": "Deep Learning;Language Models;Interpretability;Training Data Attribution;Loss Landscape", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Andrea Schioppa", "authorids": "~Andrea_Schioppa1", "gender": "", "homepage": "", "dblp": "", "google_scholar": "", "orcid": "", "linkedin": "", "or_profile": "", "aff": "", "aff_domain": "", "position": "", "bibtex": "@inproceedings{\nschioppa2024efficient,\ntitle={Efficient Sketches for Training Data Attribution and Studying the Loss Landscape},\nauthor={Andrea Schioppa},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=8jyCRGXOr5}\n}", "github": "", "reviewers": "9kYE;xaAC;NhaV;kKVg", "pdf_size": 1034828, "rating": "5;6;6;6", "confidence": "2;4;3;4", "soundness": "3;2;3;3", "novelty": "2;3;3;3", "presentation": "2;2;3;2", "wc_summary": "51;79;49;97", "wc_strengths": "104;87;34;180", "wc_weaknesses": "248;143;80;225", "wc_questions": "74;410;65;95", "wc_limitations": "31;11;1;6", "wc_review": "508;730;229;603", "wc_reply_reviewers": "35;209;0;315", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 69.0, 20.049937655763422 ], "wc_strengths_avg": [ 101.25, 52.284677487768825 ], "wc_weaknesses_avg": [ 174.0, 66.84683986547158 ], "wc_questions_avg": [ 161.0, 144.1717725492754 ], "wc_limitations_avg": [ 12.25, 11.388041973930374 ], "wc_review_avg": [ 517.5, 184.24779510213955 ], "wc_reply_reviewers_avg": [ 139.75, 128.46278644027615 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:-q3ZDrI5HNEJ:scholar.google.com/&scioq=Efficient+Sketches+for+Training+Data+Attribution+and+Studying+the+Loss+Landscape&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": "", "author_num": 1 }, { "title": "Proving Olympiad Algebraic Inequalities without Human Demonstrations", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97830", "id": "8kFctyli9H", "proceeding": "", "pdf": "https://openreview.net/pdf?id=8kFctyli9H", "openreview": "https://openreview.net/forum?id=8kFctyli9H", "poster": "/media/PosterPDFs/NeurIPS%202024/97830.png?t=1731676192.9137022", "project": "", "author_site": "Chenrui Wei, Mengzhou Sun, Wei Wang", "tldr": "", "abstract": "Solving Olympiad-level mathematical problems represents a significant advancement in machine intelligence and automated reasoning. Current machine learning methods, however, struggle to solve Olympiad-level problems beyond Euclidean plane geometry due to a lack of large-scale, high-quality datasets. The challenge is even greater in algebraic systems, which involve infinite reasoning spaces within finite conditions. To address these issues, we propose *AIPS*, an *Algebraic Inequality Proving System* capable of autonomously generating complex inequality theorems and effectively solving Olympiad-level inequality problems without requiring human demonstrations. During proof search in a mixed reasoning manner, a value curriculum learning strategy on generated datasets is implemented to improve proving performance, demonstrating strong mathematical intuitions. \nOn a test set of 20 International Mathematical Olympiad-level inequality problems, AIPS successfully solved 10, outperforming state-of-the-art methods. Furthermore, AIPS automatically generated a vast array of non-trivial theorems without human intervention, some of which have been evaluated by professional contestants and deemed to reach the level of the International Mathematical Olympiad. Notably, one theorem was selected as a competition problem in a major city's 2024 Mathematical Olympiad.\nAll the materials are available at [sites.google.com/view/aips2](https://sites.google.com/view/aips2)", "keywords": "automated theorem proving;value network;algebraic inequality;theorem proving;efficient search", "primary_area": "", "supplementary_material": "/attachment/1a349e76fad0f5efa9988a068b5949604b894895.pdf", "author": "Chenrui Wei;Mengzhou Sun;Wei Wang", "authorids": "~Chenrui_Wei1;~Mengzhou_Sun1;~Wei_Wang4", "gender": ";M;M", "homepage": ";;http://cognn.com/", "dblp": ";;", "google_scholar": ";;https://scholar.google.com/citations?hl=en", "orcid": ";0009-0001-2201-1910;", "linkedin": ";;", "or_profile": "~Chenrui_Wei1;~Mengzhou_Sun1;~Wei_Wang4", "aff": ";National University of Singapore;Beijing Institute for General Artificial Intelligence", "aff_domain": ";u.nus.edu;bigai.ai", "position": ";PhD student;Research Scientist", "bibtex": "@inproceedings{\nwei2024proving,\ntitle={Proving Olympiad Algebraic Inequalities without Human Demonstrations},\nauthor={Chenrui Wei and Mengzhou Sun and Wei Wang},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=8kFctyli9H}\n}", "github": "", "reviewers": "5Jfo;gDey;V585", "pdf_size": 734501, "rating": "3;7;8", "confidence": "3;3;4", "wc_summary_and_contributions": "51;55;64", "wc_strengths": "2;2;102", "wc_improvement": "2;4;121", "wc_limitations": "2;27;30", "wc_correctness": "2;1;1", "wc_clarity": "2;1;1", "wc_relation_to_prior_work": "2;1;1", "wc_documentation": "2;1;24", "wc_additional_feedback": "1;1;1", "wc_review": "66;93;345", "wc_reply_reviewers": "0;73;15", "wc_reply_authors": "0;43;0", "reply_reviewers": "0;1;1", "reply_authors": "1;2;1", "rating_avg": [ 6.0, 2.160246899469287 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 56.666666666666664, 5.436502143433364 ], "wc_strengths_avg": [ 35.333333333333336, 47.14045207910317 ], "wc_improvement_avg": [ 42.333333333333336, 55.63172556095027 ], "wc_limitations_avg": [ 19.666666666666668, 12.552113589175153 ], "wc_correctness_avg": [ 1.3333333333333333, 0.4714045207910317 ], "wc_clarity_avg": [ 1.3333333333333333, 0.4714045207910317 ], "wc_relation_to_prior_work_avg": [ 1.3333333333333333, 0.4714045207910317 ], "wc_documentation_avg": [ 9.0, 10.614455552060438 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 168.0, 125.64234954823155 ], "wc_reply_reviewers_avg": [ 29.333333333333332, 31.47838764754143 ], "wc_reply_authors_avg": [ 14.333333333333334, 20.27039439401436 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.654653670707977, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2998247305570941809&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": ";u.nus.edu;bigai.ai", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "National University of Singapore;Beijing Institute for General Artificial Intelligence", "aff_unique_dep": ";", "aff_unique_url": "https://www.nus.edu.sg;http://www.bigaiai.org/", "aff_unique_abbr": "NUS;BIGAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Singapore;China" }, { "title": "Improving Neural Network Surface Processing with Principal Curvatures", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96354", "id": "8koaqRdRYH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=8koaqRdRYH", "openreview": "https://openreview.net/forum?id=8koaqRdRYH", "poster": "", "project": "", "author_site": "Josquin Harrison, James Benn, Maxime Sermesant", "tldr": "", "abstract": "The modern study and use of surfaces is a research topic grounded in centuries of mathematical and empirical inquiry. From a mathematical point of view, curvature is an invariant that characterises the intrinsic geometry and the extrinsic shape of a surface. Yet, in modern applications the focus has shifted away from finding expressive representations of surfaces, and towards the design of efficient neural network architectures to process them. The literature suggests a tendency to either overlook the representation of the processed surface, or use overcomplicated representations whose ability to capture the essential features of a surface is opaque. We propose using curvature as the input of neural network architectures for surface processing, and explore this proposition through experiments making use of the shape operator. Our results show that using curvature as input leads to significant a increase in performance on segmentation and classification tasks, while allowing far less computational overhead than current methods.", "keywords": "geometric deep learning;geometry processing;shape analysis;discrete differential geometry", "primary_area": "machine_vision", "supplementary_material": "", "author": "Josquin Harrison;James Benn;Maxime Sermesant", "authorids": "~Josquin_Harrison1;~James_Benn1;~Maxime_Sermesant1", "gender": ";;M", "homepage": ";https://iksiri.github.io/jamesbenn.github.io/index.html;https://team.inria.fr/epione/en/team/maxime-sermesant/", "dblp": ";;92/5704", "google_scholar": ";;LTDUiAkAAAAJ", "orcid": ";;0000-0002-6256-8350", "linkedin": ";;", "or_profile": "~Josquin_Harrison1;~James_Benn1;~Maxime_Sermesant1", "aff": "INRIA;INRIA;INRIA", "aff_domain": "inria.fr;inria.fr;inria.fr", "position": "PhD student;Researcher;Researcher", "bibtex": "@inproceedings{\nharrison2024improving,\ntitle={Improving Neural Network Surface Processing with Principal Curvatures},\nauthor={Josquin Harrison and James Benn and Maxime Sermesant},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=8koaqRdRYH}\n}", "github": "", "reviewers": "ifJ1;LGgy;qKsN", "pdf_size": 5867865, "rating": "6;6;7", "confidence": "3;4;3", "soundness": "2;2;3", "novelty": "3;2;4", "presentation": "2;3;3", "wc_summary": "61;81;60", "wc_strengths": "57;30;120", "wc_weaknesses": "119;252;186", "wc_questions": "4;24;1", "wc_limitations": "4;28;62", "wc_review": "245;415;429", "wc_reply_reviewers": "811;15;271", "wc_reply_authors": "719;17;539", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 67.33333333333333, 9.672412085697939 ], "wc_strengths_avg": [ 69.0, 37.70941526992961 ], "wc_weaknesses_avg": [ 185.66666666666666, 54.297534218620115 ], "wc_questions_avg": [ 9.666666666666666, 10.208928554075703 ], "wc_limitations_avg": [ 31.333333333333332, 23.79542439676633 ], "wc_review_avg": [ 363.0, 83.63412381717565 ], "wc_reply_reviewers_avg": [ 365.6666666666667, 331.7884198635563 ], "wc_reply_authors_avg": [ 425.0, 297.71126952132664 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:eBE2GYFi19UJ:scholar.google.com/&scioq=Improving+Neural+Network+Surface+Processing+with+Principal+Curvatures&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": "inria.fr;inria.fr;inria.fr", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "INRIA", "aff_unique_dep": "", "aff_unique_url": "https://www.inria.fr", "aff_unique_abbr": "INRIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "France" }, { "title": "Any2Policy: Learning Visuomotor Policy with Any-Modality", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96353", "id": "8lcW9ltJx9", "proceeding": "", "pdf": "https://openreview.net/pdf?id=8lcW9ltJx9", "openreview": "https://openreview.net/forum?id=8lcW9ltJx9", "poster": "", "project": "", "author_site": "Yichen Zhu, Zhicai Ou, Feifei Feng, Jian Tang", "tldr": "", "abstract": "Humans can communicate and observe media with different modalities, such as texts, sounds, and images. For robots to be more generalizable embodied agents, they should be capable of following instructions and perceiving the world with adaptation to diverse modalities. Current robotic learning methodologies often focus on single-modal task specification and observation, thereby limiting their ability to process rich multi-modal information. Addressing this limitation, we present an end-to-end general-purpose multi-modal system named Any-to-Policy Embodied Agents. This system empowers robots to handle tasks using various modalities, whether in combinations like text-image, audio-image, text-point cloud, or in isolation. Our innovative approach involves training a versatile modality network that adapts to various inputs and connects with policy networks for effective control. Because of the lack of existing multi-modal robotics datasets for evaluation, we assembled a comprehensive real-world dataset encompassing 30 robotic tasks. Each task in this dataset is richly annotated across multiple modalities, providing a robust foundation for assessment. We conducted extensive validation of our proposed unified modality embodied agent using several simulation benchmarks, including Franka Kitchen, Meta-World, and Maniskill2, as well as in our real-world settings. Our experiments showcase the promising capability of building embodied agents that can adapt to diverse multi-modal in a unified framework.", "keywords": "multi-modal;robot learning", "primary_area": "robotics", "supplementary_material": "/attachment/42734737f3aa1930054544a5200f4920030e202b.zip", "author": "Yichen Zhu;Zhicai Ou;Feifei Feng;Jian Tang", "authorids": "~Yichen_Zhu1;~Zhicai_Ou1;~Feifei_Feng1;~Jian_Tang5", "gender": "M;;M;M", "homepage": ";;;https://ecs.syr.edu/faculty/tang", "dblp": ";;27/4916.html;181/2667-8", "google_scholar": "eyKyrbsAAAAJ;;;", "orcid": "0000-0001-5126-838X;;;", "linkedin": ";;fengff/;", "or_profile": "~Yichen_Zhu1;~Zhicai_Ou1;~Feifei_Feng1;~Jian_Tang5", "aff": "Midea Group;;Midea Group;x-humanoid", "aff_domain": "midea.com;;midea.com;x-humanoid.com", "position": "Researcher;;Researcher;Researcher", "bibtex": "@inproceedings{\nzhu2024anypolicy,\ntitle={Any2Policy: Learning Visuomotor Policy with Any-Modality},\nauthor={Yichen Zhu and Zhicai Ou and Feifei Feng and Jian Tang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=8lcW9ltJx9}\n}", "github": "", "reviewers": "RRf1;5TLy;QY6x;h8z5", "pdf_size": 3694349, "rating": "5;6;6;6", "confidence": "4;4;5;3", "soundness": "3;3;3;3", "novelty": "2;3;2;2", "presentation": "3;3;3;3", "wc_summary": "67;106;125;107", "wc_strengths": "112;78;63;205", "wc_weaknesses": "195;190;153;230", "wc_questions": "43;26;385;4", "wc_limitations": "12;13;61;1", "wc_review": "429;413;787;547", "wc_reply_reviewers": "46;24;253;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 101.25, 21.1704392963396 ], "wc_strengths_avg": [ 114.5, 55.1837838499681 ], "wc_weaknesses_avg": [ 192.0, 27.285527299284507 ], "wc_questions_avg": [ 114.5, 156.78408720275155 ], "wc_limitations_avg": [ 21.75, 23.14492384951828 ], "wc_review_avg": [ 544.0, 149.53594885511643 ], "wc_reply_reviewers_avg": [ 80.75, 100.77046938463668 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:7_06IoKMY2gJ:scholar.google.com/&scioq=Any2Policy:+Learning+Visuomotor+Policy+with+Any-Modality&hl=en&as_sdt=0,44", "gs_version_total": 0, "email": "midea.com;;midea.com;x-humanoid.com", "author_num": 4, "aff_unique_index": "0;0;1", "aff_unique_norm": "Midea Group;x-humanoid", "aff_unique_dep": ";", "aff_unique_url": "https://www.mideaglobal.com;", "aff_unique_abbr": "Midea;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China;" }, { "id": "8m6zw8Jur0", "title": "Image2Struct: Benchmarking Structure Extraction for Vision-Language Models", "track": "Datasets & Benchmarks", "status": "Poster", "tldr": "", "abstract": "We introduce Image2Struct, a benchmark to evaluate vision-language models (VLMs) on extracting structure from images.\nOur benchmark 1) captures real-world use cases, 2) is fully automatic and does not require human judgment, and 3) is based on a renewable stream of fresh data.\nIn Image2Struct, VLMs are prompted to generate the underlying structure (e.g., LaTeX code or HTML) from an input image (e.g., webpage screenshot).\nThe structure is then rendered to produce an output image (e.g., rendered webpage), which is compared against the input image to produce a similarity score.\nThis round-trip evaluation allows us to quantitatively evaluate VLMs on tasks with multiple valid structures.\nWe create a pipeline that downloads fresh data from active online communities upon execution and evaluates the VLMs without human intervention.\nWe introduce three domains (Webpages, LaTeX, and Musical Scores) and use five image metrics (pixel similarity, cosine similarity between the Inception vectors, learned perceptual image patch similarity, structural similarity index measure, and earth mover similarity) that allow efficient and automatic comparison between pairs of images. \nWe evaluate Image2Struct on 14 prominent VLMs and find that scores vary widely, indicating that Image2Struct can differentiate between the performances of different VLMs.\nAdditionally, the best score varies considerably across domains (e.g., 0.402 on sheet music vs. 0.830 on LaTeX equations), indicating that Image2Struct contains tasks of varying difficulty.\nFor transparency, we release the full results at https://crfm.stanford.edu/helm/image2struct/v1.0.1/.", "keywords": "Vision-language models;benchmark;automatic;structured information", "primary_area": "", "supplementary_material": "/attachment/3f101db940da0c726dda18a5d99b55507542b64c.pdf", "author": "Josselin Somerville Roberts;Tony Lee;Chi Heem Wong;Michihiro Yasunaga;Yifan Mai;Percy Liang", "authorids": "~Josselin_Somerville_Roberts1;~Tony_Lee1;~Chi_Heem_Wong1;~Michihiro_Yasunaga1;~Yifan_Mai1;~Percy_Liang1", "gender": "M;M;;;Non-Binary;", "homepage": "https://josselinsomervilleroberts.github.io/;;;;https://yifanmai.com/;https://cs.stanford.edu/~pliang/", "dblp": ";46/4265;;202/1809;156/8369;04/1701", "google_scholar": "eLfRuNAAAAAJ;OYNdx48AAAAJ;;SieJYoEAAAAJ;QLbLGIMAAAAJ;pouyVyUAAAAJ", "orcid": "0009-0009-7878-5067;;;;0009-0004-7270-2607;", "linkedin": "josselin-somerville-roberts/;tonyhlee/;;;yifan-mai;", "or_profile": "~Josselin_Somerville_Roberts1;~Tony_Lee1;~Chi_Heem_Wong1;~Michihiro_Yasunaga1;~Yifan_Mai1;~Percy_Liang1", "aff": "Stanford University;Stanford University;;Stanford University;Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;;stanford.edu;stanford.edu;stanford.edu", "position": "MS student;Researcher;;PhD student;Researcher;Associate Professor", "bibtex": "@inproceedings{\nroberts2024imagestruct,\ntitle={Image2Struct: Benchmarking Structure Extraction for Vision-Language Models},\nauthor={Josselin Somerville Roberts and Tony Lee and Chi Heem Wong and Michihiro Yasunaga and Yifan Mai and Percy Liang},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=8m6zw8Jur0}\n}", "github": "", "project": "", "reviewers": "ksMf;khfM;qbWL", "site": "https://openreview.net/forum?id=8m6zw8Jur0", "pdf_size": 5575009, "rating": "6;7;7", "confidence": "4;3;4", "wc_summary_and_contributions": "124;134;68", "wc_strengths": "267;88;4", "wc_improvement": "275;274;27", "wc_limitations": "1;7;4", "wc_correctness": "17;33;22", "wc_clarity": "1;50;21", "wc_relation_to_prior_work": "8;41;1", "wc_documentation": "1;67;39", "wc_additional_feedback": "1;1;1", "wc_review": "695;695;187", "wc_reply_reviewers": "45;69;0", "wc_reply_authors": "12;0;0", "reply_reviewers": "1;1;0", "reply_authors": "2;2;1", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 108.66666666666667, 29.044027881055953 ], "wc_strengths_avg": [ 119.66666666666667, 109.67933057579971 ], "wc_improvement_avg": [ 192.0, 116.67333314286802 ], "wc_limitations_avg": [ 4.0, 2.449489742783178 ], "wc_correctness_avg": [ 24.0, 6.683312551921141 ], "wc_clarity_avg": [ 24.0, 20.11632835948615 ], "wc_relation_to_prior_work_avg": [ 16.666666666666668, 17.441967269268172 ], "wc_documentation_avg": [ 35.666666666666664, 27.047283700134393 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 525.6666666666666, 239.4734965618441 ], "wc_reply_reviewers_avg": [ 38.0, 28.600699292150182 ], "wc_reply_authors_avg": [ 4.0, 5.656854249492381 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17890601626602141835&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Learning Cut Generating Functions for Integer Programming", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96352", "id": "8mZc259r8X", "proceeding": "", "pdf": "https://openreview.net/pdf?id=8mZc259r8X", "openreview": "https://openreview.net/forum?id=8mZc259r8X", "poster": "/media/PosterPDFs/NeurIPS%202024/96352.png?t=1733464546.2145112", "project": "", "author_site": "Hongyu Cheng, Amitabh Basu", "tldr": "", "abstract": "The branch-and-cut algorithm is the method of choice to solve large scale integer programming problems in practice. A key ingredient of branch-and-cut is the use of *cutting planes* which are derived constraints that reduce the search space for an optimal solution. Selecting effective cutting planes to produce small branch-and-cut trees is a critical challenge in the branch-and-cut algorithm. Recent advances have employed a data-driven approach to select good cutting planes from a parameterized family, aimed at reducing the branch-and-bound tree size (in expectation) for a given distribution of integer programming instances. We extend this idea to the selection of the best cut generating function (CGF), which is a tool in the integer programming literature for generating a wide variety of cutting planes that generalize the well-known Gomory Mixed-Integer (GMI) cutting planes. We provide rigorous sample complexity bounds for the selection of an effective CGF from certain parameterized families that provably performs well for any specified distribution on the problem instances. Our empirical results show that the selected CGF can outperform the GMI cuts for certain distributions. Additionally, we explore the sample complexity of using neural networks for instance-dependent CGF selection.", "keywords": "Integer programming;branch-and-cut;cut generating functions;cutting planes;branch-and-bound;sample complexity;learning theory", "primary_area": "optimization", "supplementary_material": "", "author": "Hongyu Cheng;Amitabh Basu", "authorids": "~Hongyu_Cheng1;~Amitabh_Basu1", "gender": "M;M", "homepage": "https://hongyucheng.net/;", "dblp": ";", "google_scholar": "vx1h9sUAAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Hongyu_Cheng1;~Amitabh_Basu1", "aff": "Johns Hopkins University;Johns Hopkins University", "aff_domain": "jhu.edu;jhu.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\ncheng2024learning,\ntitle={Learning Cut Generating Functions for Integer Programming},\nauthor={Hongyu Cheng and Amitabh Basu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=8mZc259r8X}\n}", "github": "", "reviewers": "6RAE;TAJ6;yCxh", "pdf_size": 751293, "rating": "5;7;7", "confidence": "2;2;3", "soundness": "2;3;3", "novelty": "2;3;3", "presentation": "3;4;3", "wc_summary": "352;56;134", "wc_strengths": "49;47;128", "wc_weaknesses": "340;107;56", "wc_questions": "257;1;301", "wc_limitations": "18;1;36", "wc_review": "1016;212;655", "wc_reply_reviewers": "0;0;86", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;1", "reply_authors": "1;1;1", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 2.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 180.66666666666666, 125.26593932731896 ], "wc_strengths_avg": [ 74.66666666666667, 37.72119946248911 ], "wc_weaknesses_avg": [ 167.66666666666666, 123.62398185177862 ], "wc_questions_avg": [ 186.33333333333334, 132.27580613584968 ], "wc_limitations_avg": [ 18.333333333333332, 14.29063407348401 ], "wc_review_avg": [ 627.6666666666666, 328.80017572312147 ], "wc_reply_reviewers_avg": [ 28.666666666666668, 40.54078878802872 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.49999999999999983, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10762819834305652153&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "jhu.edu;jhu.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Johns Hopkins University", "aff_unique_dep": "", "aff_unique_url": "https://www.jhu.edu", "aff_unique_abbr": "JHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Temporal-Difference Learning Using Distributed Error Signals", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96351", "id": "8moTQjfqAV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=8moTQjfqAV", "openreview": "https://openreview.net/forum?id=8moTQjfqAV", "poster": "", "project": "", "author_site": "Jonas Guan, Shon Verch, Claas Voelcker, Ethan Jackson, Nicolas Papernot, William Cunningham", "tldr": "", "abstract": "A computational problem in biological reward-based learning is how credit assignment is performed in the nucleus accumbens (NAc). Much research suggests that NAc dopamine encodes temporal-difference (TD) errors for learning value predictions. However, dopamine is synchronously distributed in regionally homogeneous concentrations, which does not support explicit credit assignment (like used by backpropagation). It is unclear whether distributed errors alone are sufficient for synapses to make coordinated updates to learn complex, nonlinear reward-based learning tasks. We design a new deep Q-learning algorithm, Artificial Dopamine, to computationally demonstrate that synchronously distributed, per-layer TD errors may be sufficient to learn surprisingly complex RL tasks. We empirically evaluate our algorithm on MinAtar, the DeepMind Control Suite, and classic control tasks, and show it often achieves comparable performance to deep RL algorithms that use backpropagation.", "keywords": "neuroscience;reinforcement learning;temporal-difference learning", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "", "author": "Jonas Guan;Shon Eduard Verch;Claas A Voelcker;Ethan C Jackson;Nicolas Papernot;William A Cunningham", "authorids": "~Jonas_Guan1;~Shon_Eduard_Verch1;~Claas_A_Voelcker1;~Ethan_C_Jackson1;~Nicolas_Papernot1;~William_A_Cunningham1", "gender": ";M;M;M;M;", "homepage": ";https://github.com/galacticglum/;;;https://www.papernot.fr;http://social-ai.ca", "dblp": "275/9969;;250/2358;;162/1405;00/8258", "google_scholar": "OoUzutoAAAAJ;;UZq8qZ8AAAAJ;https://scholar.google.ca/citations?hl=en;cGxq0cMAAAAJ;U2KY2F8AAAAJ", "orcid": ";;;;;", "linkedin": ";https://linkedin.com/in/shon-verch;;;nicolaspapernot;", "or_profile": "~Jonas_Guan1;~Shon_Eduard_Verch1;~Claas_A_Voelcker1;~Ethan_C_Jackson1;~Nicolas_Papernot1;~William_A_Cunningham1", "aff": "Department of Computer Science, University of Toronto;University of Toronto;Toronto University;;Google;University of Toronto", "aff_domain": "cs.toronto.edu;utoronto.ca;utoronto.ca;;google.com;utoronto.ca", "position": "PhD student;Undergrad student;PhD student;;Research Scientist;Full Professor", "bibtex": "@inproceedings{\nguan2024temporaldifference,\ntitle={Temporal-Difference Learning Using Distributed Error Signals},\nauthor={Jonas Guan and Shon Eduard Verch and Claas A Voelcker and Ethan C Jackson and Nicolas Papernot and William A Cunningham},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=8moTQjfqAV}\n}", "github": "", "reviewers": "Sdew;8Waq;K1xv;dpw2", "pdf_size": 9325092, "rating": "4;7;7;8", "confidence": "5;4;4;4", "soundness": "2;4;3;4", "novelty": "2;3;2;3", "presentation": "4;3;4;4", "wc_summary": "84;55;75;138", "wc_strengths": "121;53;60;154", "wc_weaknesses": "432;180;41;208", "wc_questions": "1;97;36;108", "wc_limitations": "29;15;29;65", "wc_review": "667;400;241;673", "wc_reply_reviewers": "851;32;75;125", "wc_reply_authors": "259;324;139;286", "reply_reviewers": "3;2;1;2", "reply_authors": "2;3;2;2", "rating_avg": [ 6.5, 1.5 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 88.0, 30.71644510681534 ], "wc_strengths_avg": [ 97.0, 42.2196636651691 ], "wc_weaknesses_avg": [ 215.25, 140.21300759915252 ], "wc_questions_avg": [ 60.5, 43.957365708149524 ], "wc_limitations_avg": [ 34.5, 18.513508581573618 ], "wc_review_avg": [ 495.25, 183.58155544607416 ], "wc_reply_reviewers_avg": [ 270.75, 336.6202422612164 ], "wc_reply_authors_avg": [ 252.0, 69.20621359386742 ], "reply_reviewers_avg": [ 2.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.9622504486493763, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Pwdy2ZQe-owJ:scholar.google.com/&scioq=Temporal-Difference+Learning+Using+Distributed+Error+Signals&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "cs.toronto.edu;utoronto.ca;utoronto.ca;;google.com;utoronto.ca", "author_num": 6, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "University of Toronto;Google", "aff_unique_dep": "Department of Computer Science;Google", "aff_unique_url": "https://www.utoronto.ca;https://www.google.com", "aff_unique_abbr": "U of T;Google", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Toronto;;Mountain View", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "Canada;United States" }, { "title": "Finding Transformer Circuits With Edge Pruning", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96350", "id": "8oSY3rA9jY", "proceeding": "", "pdf": "https://openreview.net/pdf?id=8oSY3rA9jY", "openreview": "https://openreview.net/forum?id=8oSY3rA9jY", "poster": "", "project": "", "author_site": "Adithya Bhaskar, Alexander Wettig, Dan Friedman, Danqi Chen", "tldr": "", "abstract": "The path to interpreting a language model often proceeds via analysis of circuits---sparse computational subgraphs of the model that capture specific aspects of its behavior. Recent work has automated the task of discovering circuits. Yet, these methods have practical limitations, as they either rely on inefficient search algorithms or inaccurate approximations. In this paper, we frame circuit discovery as an optimization problem and propose _Edge Pruning_ as an effective and scalable solution. Edge Pruning leverages gradient-based pruning techniques, but instead of removing neurons or components, prunes the _edges_ between components. Our method finds circuits in GPT-2 that use less than half the number of edges than circuits found by previous methods while being equally faithful to the full model predictions on standard circuit-finding tasks. Edge Pruning is efficient on tasks involving up to 100,000 examples, outperforming previous methods in speed and producing substantially better circuits. It also perfectly recovers the ground-truth circuits in two models compiled with Tracr. Thanks to its efficiency, we scale Edge Pruning to CodeLlama-13B, a model over 100x the size of GPT-2.\nWe use this setting for a case study, where we compare the mechanisms behind instruction prompting and in-context learning.\nWe find two circuits with more than 99.96% sparsity that match the performance of the full model. Further analysis reveals that the mechanisms in the two settings overlap substantially. This shows that Edge Pruning is a practical and scalable tool for interpretability, \nwhich can shed light on behaviors that only emerge in large models.", "keywords": "interpretability;circuits;pruning;optimization", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Adithya Bhaskar;Alexander Wettig;Dan Friedman;Danqi Chen", "authorids": "~Adithya_Bhaskar2;~Alexander_Wettig1;~Dan_Friedman2;~Danqi_Chen1", "gender": "M;;;F", "homepage": "https://adithyabh.github.io;https://www.cs.princeton.edu/~awettig/;http://danfriedman0.github.io/;https://www.cs.princeton.edu/~danqic/", "dblp": "334/7656;302/0235;205/9386;87/7949", "google_scholar": ";N_jSE08AAAAJ;1UMQ_KwAAAAJ;sVR8ktkAAAAJ", "orcid": ";;;", "linkedin": ";alexander-wettig/;;", "or_profile": "~Adithya_Bhaskar2;~Alexander_Wettig1;~Dan_Friedman2;~Danqi_Chen1", "aff": "Princeton University;Allen Institute for Artificial Intelligence;Princeton University;Princeton University", "aff_domain": "princeton.edu;allenai.org;princeton.edu;cs.princeton.edu", "position": "PhD student;Intern;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nbhaskar2024finding,\ntitle={Finding Transformer Circuits With Edge Pruning},\nauthor={Adithya Bhaskar and Alexander Wettig and Dan Friedman and Danqi Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=8oSY3rA9jY}\n}", "github": "", "reviewers": "qgos;TqQ2;HMkg;xxo6", "pdf_size": 1726714, "rating": "3;7;8;8", "confidence": "5;4;3;4", "soundness": "3;3;3;3", "novelty": "1;4;4;4", "presentation": "2;3;3;3", "wc_summary": "81;60;62;320", "wc_strengths": "39;138;86;35", "wc_weaknesses": "329;238;33;154", "wc_questions": "32;67;62;105", "wc_limitations": "4;52;22;14", "wc_review": "485;555;265;628", "wc_reply_reviewers": "175;111;16;41", "wc_reply_authors": "197;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.5, 2.0615528128088303 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.25, 1.299038105676658 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 130.75, 109.57046819284838 ], "wc_strengths_avg": [ 74.5, 41.78815621680382 ], "wc_weaknesses_avg": [ 188.5, 109.0424229371303 ], "wc_questions_avg": [ 66.5, 25.947061490658243 ], "wc_limitations_avg": [ 23.0, 17.916472867168917 ], "wc_review_avg": [ 483.25, 135.7725579784074 ], "wc_reply_reviewers_avg": [ 85.75, 62.1907348404889 ], "wc_reply_authors_avg": [ 49.25, 85.3035022727672 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8574929257125441, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9152909514072420745&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "princeton.edu;allenai.org;princeton.edu;cs.princeton.edu", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Princeton University;Allen Institute for Artificial Intelligence", "aff_unique_dep": ";", "aff_unique_url": "https://www.princeton.edu;https://allenai.org", "aff_unique_abbr": "Princeton;AI2", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Graph Diffusion Policy Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96349", "id": "8ohsbxw7q8", "proceeding": "", "pdf": "https://openreview.net/pdf?id=8ohsbxw7q8", "openreview": "https://openreview.net/forum?id=8ohsbxw7q8", "poster": "/media/PosterPDFs/NeurIPS%202024/96349.png?t=1733907496.2087677", "project": "", "author_site": "Yijing Liu, Chao Du, Tianyu Pang, Chongxuan LI, Min Lin, Wei Chen", "tldr": "", "abstract": "Recent research has made significant progress in optimizing diffusion models for downstream objectives, which is an important pursuit in fields such as graph generation for drug design. However, directly applying these models to graph presents challenges, resulting in suboptimal performance. This paper introduces graph diffusion policy optimization (GDPO), a novel approach to optimize graph diffusion models for arbitrary (e.g., non-differentiable) objectives using reinforcement learning. GDPO is based on an eager policy gradient tailored for graph diffusion models, developed through meticulous analysis and promising improved performance. Experimental results show that GDPO achieves state-of-the-art performance in various graph generation tasks with complex and diverse objectives. Code is available at https://github.com/sail-sg/GDPO.", "keywords": "Graph Generation;Diffusion Models;Reinforcement Learning", "primary_area": "generative_models", "supplementary_material": "/attachment/c29c33965c9cf214f505e5b3e585d7120fdad557.zip", "author": "Yijing Liu;Chao Du;Tianyu Pang;Chongxuan Li;Min Lin;Wei Chen", "authorids": "~Yijing_Liu1;~Chao_Du1;~Tianyu_Pang1;~Chongxuan_Li1;~Min_Lin1;~Wei_Chen34", "gender": "M;M;M;M;M;M", "homepage": "https://diphda.net;https://duchao0726.github.io/;https://p2333.github.io/;http://ml.cs.tsinghua.edu.cn/~chongxuan;https://linmin.me;http://www.cad.zju.edu.cn/home/chenwei/", "dblp": ";75/7523;202/2550;161/9965;;c/WeiChen1", "google_scholar": "LQj6EzIAAAAJ;QOp7xW0AAAAJ;wYDbtFsAAAAJ;UKMcQn4AAAAJ;BGONmkIAAAAJ;EgQyYGUAAAAJ", "orcid": "0000-0001-8420-2213;0000-0003-1244-6336;0000-0003-0639-6176;0000-0002-0912-9076;;0000-0002-8365-4741", "linkedin": ";duchao/;%E5%A4%A9%E5%AE%87-%E5%BA%9E-b3999017a/;;min-lin-08a3a422/;", "or_profile": "~Yijing_Liu1;~Chao_Du1;~Tianyu_Pang1;~Chongxuan_Li1;~Min_Lin1;~Wei_Chen34", "aff": "Zhejiang University;Sea AI Lab;Sea AI Lab;Renmin University of China;Sea AI Lab;State key laboratory of CAD&CG, Zhejiang University", "aff_domain": "zju.edu.cn;sea.com;sea.com;ruc.edu.cn;sea.com;zju.edu.cn", "position": "PhD student;Senior Research Scientist;Senior Research Scientist;Associate Professor;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nliu2024graph,\ntitle={Graph Diffusion Policy Optimization},\nauthor={Yijing Liu and Chao Du and Tianyu Pang and Chongxuan Li and Min Lin and Wei Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=8ohsbxw7q8}\n}", "github": "", "reviewers": "4R3n;hRfp;gTVc", "pdf_size": 1780941, "rating": "6;6;7", "confidence": "4;4;4", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "70;131;110", "wc_strengths": "70;28;307", "wc_weaknesses": "176;82;111", "wc_questions": "77;82;470", "wc_limitations": "35;22;9", "wc_review": "428;345;1007", "wc_reply_reviewers": "21;28;95", "wc_reply_authors": "22;27;48", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 103.66666666666667, 25.30261295246446 ], "wc_strengths_avg": [ 135.0, 122.82507887235408 ], "wc_weaknesses_avg": [ 123.0, 39.30224760324359 ], "wc_questions_avg": [ 209.66666666666666, 184.0947823510729 ], "wc_limitations_avg": [ 22.0, 10.614455552060438 ], "wc_review_avg": [ 593.3333333333334, 294.46259902103395 ], "wc_reply_reviewers_avg": [ 48.0, 33.35665850571167 ], "wc_reply_authors_avg": [ 32.333333333333336, 11.2644968324772 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17822250656320840072&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "zju.edu.cn;sea.com;sea.com;ruc.edu.cn;sea.com;zju.edu.cn", "author_num": 6, "aff_unique_index": "0;1;1;2;1;0", "aff_unique_norm": "Zhejiang University;Sea AI Lab;Renmin University of China", "aff_unique_dep": ";;", "aff_unique_url": "https://www.zju.edu.cn;;http://www.ruc.edu.cn", "aff_unique_abbr": "ZJU;;RUC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China;" }, { "title": "Provable Benefit of Cutout and CutMix for Feature Learning", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96348", "id": "8on9dIUh5v", "proceeding": "", "pdf": "https://openreview.net/pdf?id=8on9dIUh5v", "openreview": "https://openreview.net/forum?id=8on9dIUh5v", "poster": "", "project": "", "author_site": "Junsoo Oh, Chulhee Yun", "tldr": "", "abstract": "Patch-level data augmentation techniques such as Cutout and CutMix have demonstrated significant efficacy in enhancing the performance of vision tasks. However, a comprehensive theoretical understanding of these methods remains elusive. In this paper, we study two-layer neural networks trained using three distinct methods: vanilla training without augmentation, Cutout training, and CutMix training. Our analysis focuses on a feature-noise data model, which consists of several label-dependent features of varying rarity and label-independent noises of differing strengths. Our theorems demonstrate that Cutout training can learn low-frequency features that vanilla training cannot, while CutMix training can learn even rarer features that Cutout cannot capture. From this, we establish that CutMix yields the highest test accuracy among the three. Our novel analysis reveals that CutMix training makes the network learn all features and noise vectors \"evenly\" regardless of the rarity and strength, which provides an interesting insight into understanding patch-level augmentation.", "keywords": "Cutout;CutMix;feature learning;theory", "primary_area": "learning_theory", "supplementary_material": "", "author": "Junsoo Oh;Chulhee Yun", "authorids": "~Junsoo_Oh1;~Chulhee_Yun1", "gender": "M;M", "homepage": "https://junsoo424.github.io/;https://chulheeyun.github.io/", "dblp": ";138/0148.html", "google_scholar": ";Ukl64ggAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Junsoo_Oh1;~Chulhee_Yun1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.edu;kaist.ac.kr", "position": "MS student;Assistant Professor", "bibtex": "@inproceedings{\noh2024provable,\ntitle={Provable Benefit of Cutout and CutMix for Feature Learning},\nauthor={Junsoo Oh and Chulhee Yun},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=8on9dIUh5v}\n}", "github": "", "reviewers": "pSne;ZwXm;59hr;mYpt;hATA", "pdf_size": 1583797, "rating": "5;6;6;6;7", "confidence": "3;3;3;4;2", "soundness": "4;3;3;3;3", "novelty": "2;3;3;3;3", "presentation": "4;3;3;2;3", "wc_summary": "85;52;71;236;168", "wc_strengths": "38;18;78;47;56", "wc_weaknesses": "117;24;53;48;209", "wc_questions": "79;98;50;343;258", "wc_limitations": "27;3;23;95;1", "wc_review": "346;195;275;769;692", "wc_reply_reviewers": "28;11;13;23;24", "wc_reply_authors": "39;19;36;33;41", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 6.0, 0.6324555320336759 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 122.4, 69.25489152399273 ], "wc_strengths_avg": [ 47.4, 19.815145722401336 ], "wc_weaknesses_avg": [ 90.2, 66.90112106684013 ], "wc_questions_avg": [ 165.6, 114.40384608919405 ], "wc_limitations_avg": [ 29.8, 34.21344764854896 ], "wc_review_avg": [ 455.4, 230.93081214944013 ], "wc_reply_reviewers_avg": [ 19.8, 6.615134163416491 ], "wc_reply_authors_avg": [ 33.6, 7.787168933572715 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.49999999999999994, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15175984151879065001&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "kaist.edu;kaist.ac.kr", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "title": "Visual Prompt Tuning in Null Space for Continual Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96347", "id": "8pRemr5kEi", "proceeding": "", "pdf": "https://openreview.net/pdf?id=8pRemr5kEi", "openreview": "https://openreview.net/forum?id=8pRemr5kEi", "poster": "/media/PosterPDFs/NeurIPS%202024/96347.png?t=1731407070.929215", "project": "", "author_site": "Yue Lu, Shizhou Zhang, De Cheng, Yinghui Xing, Nannan Wang, PENG WANG, Yanning Zhang", "tldr": "", "abstract": "Existing prompt-tuning methods have demonstrated impressive performances in continual learning (CL), by selecting and updating relevant prompts in the vision-transformer models. On the contrary, this paper aims to learn each task by tuning the prompts in the direction orthogonal to the subspace spanned by previous tasks' features, so as to ensure no interference on tasks that have been learned to overcome catastrophic forgetting in CL. However, different from the orthogonal projection in the traditional CNN architecture, the prompt gradient orthogonal projection in the ViT architecture shows completely different and greater challenges, i.e., 1) the high-order and non-linear self-attention operation; 2) the drift of prompt distribution brought by the LayerNorm in the transformer block. Theoretically, we have finally deduced two consistency conditions to achieve the prompt gradient orthogonal projection, which provide a theoretical guarantee of eliminating interference on previously learned knowledge via the self-attention mechanism in visual prompt tuning. In practice, an effective null-space-based approximation solution has been proposed to implement the prompt gradient orthogonal projection. Extensive experimental results demonstrate the effectiveness of anti-forgetting on four class-incremental benchmarks with diverse pre-trained baseline models, and our approach achieves superior performances to state-of-the-art methods. Our code is available at https://github.com/zugexiaodui/VPTinNSforCL", "keywords": "Continual Learning;Visual prompt tuning;Gradient orthogonal projection;Null space", "primary_area": "machine_vision", "supplementary_material": "/attachment/b4ca47ecbc1514f8f4fadccf44bc5223e687772b.zip", "author": "Yue Lu;Shizhou Zhang;De Cheng;Yinghui Xing;Nannan Wang;PENG WANG;Yanning Zhang", "authorids": "~Yue_Lu4;~Shizhou_Zhang3;~De_Cheng3;~Yinghui_Xing1;~Nannan_Wang1;~PENG_WANG15;~Yanning_Zhang1", "gender": "M;M;M;F;M;M;F", "homepage": ";https://teacher.nwpu.edu.cn/szzhang;https://web.xidian.edu.cn/dcheng/index.html;https://teacher.nwpu.edu.cn/yinghuixing.html;;https://wangpengnorman.github.io/;http://teacher.nwpu.edu.cn/ynzhang", "dblp": "74/6493-8;151/0743;154/1991/;218/2673;10/8359-1;95/4442-15.html;14/6655", "google_scholar": ";https://scholar.google.fi/citations?user=Hlpe0osAAAAJ;180lASkAAAAJ;5bSDQUQAAAAJ;SRBn7oUAAAAJ;https://scholar.google.com.au/citations?user=aPLp7pAAAAAJ;", "orcid": "0000-0002-9805-1732;;;0000-0001-6021-8261;;0000-0001-7689-3405;", "linkedin": ";;;;;;", "or_profile": "~Yue_Lu4;~Shizhou_Zhang3;~De_Cheng3;~Yinghui_Xing1;~Nannan_Wang1;~PENG_WANG15;~Yanning_Zhang1", "aff": "Northwestern Polytechnical University Xi'an;Northwest Polytechnical University Xi'an;Xidian University;Northwest Polytechnical University;Xidian University;Northwestern Polytechnical University;Northwestern Polytechnical University", "aff_domain": "nwpu.edu.cn;nwpu.edu.cn;xidian.edu.cn;nwpu.edu.cn;xidian.edu.cn;nwpu.edu.cn;nwpu.edu.cn", "position": "PhD student;Associate Professor;Associate Professor;Associate Professor;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nlu2024visual,\ntitle={Visual Prompt Tuning in Null Space for Continual Learning},\nauthor={Yue Lu and Shizhou Zhang and De Cheng and Yinghui Xing and Nannan Wang and PENG WANG and Yanning Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=8pRemr5kEi}\n}", "github": "", "reviewers": "4B73;W3C1;rV5K;2TYW", "pdf_size": 1018917, "rating": "6;7;7;8", "confidence": "5;5;5;5", "soundness": "3;4;3;4", "novelty": "3;4;3;4", "presentation": "3;4;2;4", "wc_summary": "155;76;48;94", "wc_strengths": "154;62;53;44", "wc_weaknesses": "178;123;154;85", "wc_questions": "37;4;37;3", "wc_limitations": "9;1;14;1", "wc_review": "533;266;306;227", "wc_reply_reviewers": "26;28;311;12", "wc_reply_authors": "0;0;942;0", "reply_reviewers": "1;1;2;1", "reply_authors": "1;1;4;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 5.0, 0.0 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 93.25, 39.23885192000398 ], "wc_strengths_avg": [ 78.25, 44.19488092528364 ], "wc_weaknesses_avg": [ 135.0, 34.835326896700714 ], "wc_questions_avg": [ 20.25, 16.753730927766508 ], "wc_limitations_avg": [ 6.25, 5.539629951540085 ], "wc_review_avg": [ 333.0, 118.80025252498413 ], "wc_reply_reviewers_avg": [ 94.25, 125.29240799026891 ], "wc_reply_authors_avg": [ 235.5, 407.8979651824706 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12426790229755875096&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "nwpu.edu.cn;nwpu.edu.cn;xidian.edu.cn;nwpu.edu.cn;xidian.edu.cn;nwpu.edu.cn;nwpu.edu.cn", "author_num": 7, "aff_unique_index": "0;1;2;1;2;0;0", "aff_unique_norm": "Northwestern Polytechnical University;Northwest Polytechnical University;Xidian University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.nwpu.edu.cn;http://www.nwpu.edu.cn;http://www.xidian.edu.cn/", "aff_unique_abbr": "NWPU;NWPU;Xidian", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Xi'an;", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Beyond the Doors of Perception: Vision Transformers Represent Relations Between Objects", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96346", "id": "8puv3c9CPg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=8puv3c9CPg", "openreview": "https://openreview.net/forum?id=8puv3c9CPg", "poster": "/media/PosterPDFs/NeurIPS%202024/96346.png?t=1733252981.9331584", "project": "", "author_site": "Michael Lepori, Alexa Tartaglini, Wai Keen Vong, Thomas Serre, Brenden Lake, Ellie Pavlick", "tldr": "", "abstract": "Though vision transformers (ViTs) have achieved state-of-the-art performance in a variety of settings, they exhibit surprising failures when performing tasks involving visual relations. This begs the question: how do ViTs attempt to perform tasks that require computing visual relations between objects? Prior efforts to interpret ViTs tend to focus on characterizing relevant low-level visual features. In contrast, we adopt methods from mechanistic interpretability to study the higher-level visual algorithms that ViTs use to perform abstract visual reasoning. We present a case study of a fundamental, yet surprisingly difficult, relational reasoning task: judging whether two visual entities are the same or different. We find that pretrained ViTs fine-tuned on this task often exhibit two qualitatively different stages of processing despite having no obvious inductive biases to do so: 1) a perceptual stage wherein local object features are extracted and stored in a disentangled representation, and 2) a relational stage wherein object representations are compared. In the second stage, we find evidence that ViTs can learn to represent somewhat abstract visual relations, a capability that has long been considered out of reach for artificial neural networks. Finally, we demonstrate that failures at either stage can prevent a model from learning a generalizable solution to our fairly simple tasks. By understanding ViTs in terms of discrete processing stages, one can more precisely diagnose and rectify shortcomings of existing and future models.", "keywords": "visual reasoning;mechanistic interpretability;transformers;cognitive science", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Michael A. Lepori;Alexa R. Tartaglini;Wai Keen Vong;Thomas Serre;Brenden Lake;Ellie Pavlick", "authorids": "~Michael_A._Lepori1;~Alexa_R._Tartaglini1;~Wai_Keen_Vong1;~Thomas_Serre1;~Brenden_M._Lake1;~Ellie_Pavlick1", "gender": "M;M;M;M;F;F", "homepage": "https://lepori.xyz/;https://www.waikeenvong.com;https://serre-lab.clps.brown.edu/;https://cims.nyu.edu/~brenden/;http://cs.brown.edu/people/epavlick/;https://www.alexatartaglini.com/", "dblp": "262/0162;;;47/9567;141/4059;", "google_scholar": "G1fepc8AAAAJ;SwVlB20AAAAJ;kZlPW4wAAAAJ;vspmOX8AAAAJ;sFyrSa8AAAAJ;Z-uWbOQAAAAJ", "orcid": ";;;;;", "linkedin": "michael-lepori-925426124/;;;;;", "or_profile": "~Michael_A._Lepori1;~Wai_Keen_Vong1;~Thomas_Serre1;~Brenden_M._Lake1;~Ellie_Pavlick1;~Alexa_Rae_Tartaglini1", "aff": "Brown University;New York University;Universit\u00e9 de Toulouse;New York University;Brown University;New York University", "aff_domain": "brown.edu;nyu.edu;univ-toulouse.fr;nyu.edu;brown.edu;nyu.edu", "position": "PhD student;Postdoc;Full Professor;Assistant Professor;Assistant Professor;Researcher", "bibtex": "@inproceedings{\nlepori2024beyond,\ntitle={Beyond the Doors of Perception: Vision Transformers Represent Relations Between Objects},\nauthor={Michael A. Lepori and Alexa R. Tartaglini and Wai Keen Vong and Thomas Serre and Brenden Lake and Ellie Pavlick},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=8puv3c9CPg}\n}", "github": "", "reviewers": "NSpf;UKTX;c3Nf;DaaT", "pdf_size": 14388583, "rating": "5;5;6;6", "confidence": "3;3;5;3", "soundness": "3;2;3;2", "novelty": "2;2;3;2", "presentation": "2;2;3;3", "wc_summary": "109;57;69;115", "wc_strengths": "57;28;63;125", "wc_weaknesses": "127;96;121;61", "wc_questions": "75;170;100;104", "wc_limitations": "17;1;33;10", "wc_review": "385;352;386;415", "wc_reply_reviewers": "65;18;60;20", "wc_reply_authors": "203;0;0;0", "reply_reviewers": "2;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 87.5, 24.95495942693556 ], "wc_strengths_avg": [ 68.25, 35.336772631353874 ], "wc_weaknesses_avg": [ 101.25, 25.984370302164336 ], "wc_questions_avg": [ 112.25, 35.1452343853331 ], "wc_limitations_avg": [ 15.25, 11.712706775122479 ], "wc_review_avg": [ 384.5, 22.299103120977758 ], "wc_reply_reviewers_avg": [ 40.75, 21.833174299675253 ], "wc_reply_authors_avg": [ 50.75, 87.90157848412052 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18058223166391028136&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "brown.edu;nyu.edu;univ-toulouse.fr;nyu.edu;brown.edu;nyu.edu", "author_num": 6, "aff_unique_index": "0;1;2;1;0;1", "aff_unique_norm": "Brown University;New York University;Universit\u00e9 de Toulouse", "aff_unique_dep": ";;", "aff_unique_url": "https://www.brown.edu;https://www.nyu.edu;https://www.univ-toulouse.fr", "aff_unique_abbr": "Brown;NYU;UT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "United States;France" }, { "title": "Off-policy estimation with adaptively collected data: the power of online learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96345", "id": "8qEkjSEdls", "proceeding": "", "pdf": "https://openreview.net/pdf?id=8qEkjSEdls", "openreview": "https://openreview.net/forum?id=8qEkjSEdls", "poster": "/media/PosterPDFs/NeurIPS%202024/96345.png?t=1733860894.8771164", "project": "", "author_site": "Jeonghwan Lee, Cong Ma", "tldr": "", "abstract": "We consider estimation of a linear functional of the treatment effect from adaptively collected data. This problem finds a variety of applications including off-policy evaluation in contextual bandits, and estimation of the average treatment effect in causal inference. While a certain class of augmented inverse propensity weighting (AIPW) estimators enjoys desirable asymptotic properties including the semi-parametric efficiency, much less is known about their non-asymptotic theory with adaptively collected data. To fill in the gap, we first present generic upper bounds on the mean-squared error of the class of AIPW estimators that crucially depends on a sequentially weighted error between the treatment effect and its estimates. Motivated by this, we propose a general reduction scheme that allows one to produce a sequence of estimates for the treatment effect via online learning to minimize the sequentially weighted estimation error. To illustrate this, we provide three concrete instantiations in (1) the tabular case; (2) the case of linear function approximation; and (3) the case of general function approximation for the outcome model. We then provide a local minimax lower bound to show the instance-dependent optimality of the AIPW estimator using no-regret online learning algorithms.", "keywords": "Causal inference;function approximation;learning theory;off-policy estimation;online learning;reinforcement learning", "primary_area": "causal_inference", "supplementary_material": "", "author": "Jeonghwan Lee;Cong Ma", "authorids": "~Jeonghwan_Lee2;~Cong_Ma1", "gender": "M;M", "homepage": "https://jhbrianlee.github.io/;https://congma1028.github.io/", "dblp": ";42/10808", "google_scholar": "s9LzirIAAAAJ;", "orcid": "0000-0002-3223-2573;", "linkedin": "jeonghwan-lee-5bb7491b1/;", "or_profile": "~Jeonghwan_Lee2;~Cong_Ma1", "aff": "The University of Chicago;University of Chicago", "aff_domain": "uchicago.edu;uchicago.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nlee2024offpolicy,\ntitle={Off-policy estimation with adaptively collected data: the power of online learning},\nauthor={Jeonghwan Lee and Cong Ma},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=8qEkjSEdls}\n}", "github": "", "reviewers": "kxx3;KnWJ;AMTN", "pdf_size": 497327, "rating": "5;5;5", "confidence": "4;2;3", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "2;2;3", "wc_summary": "132;78;61", "wc_strengths": "24;51;37", "wc_weaknesses": "358;155;87", "wc_questions": "84;2;68", "wc_limitations": "113;7;13", "wc_review": "711;293;266", "wc_reply_reviewers": "78;97;48", "wc_reply_authors": "0;0;268", "reply_reviewers": "1;1;1", "reply_authors": "1;1;2", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 90.33333333333333, 30.26916289265731 ], "wc_strengths_avg": [ 37.333333333333336, 11.025223605694151 ], "wc_weaknesses_avg": [ 200.0, 115.1202270092735 ], "wc_questions_avg": [ 51.333333333333336, 35.490217744549774 ], "wc_limitations_avg": [ 44.333333333333336, 48.61641240934542 ], "wc_review_avg": [ 423.3333333333333, 203.70948813336003 ], "wc_reply_reviewers_avg": [ 74.33333333333333, 20.17148702720969 ], "wc_reply_authors_avg": [ 89.33333333333333, 126.33641157199649 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:xdmBdSrtQewJ:scholar.google.com/&scioq=Off-policy+estimation+with+adaptively+collected+data:+the+power+of+online+learning&hl=en&as_sdt=0,44", "gs_version_total": 5, "email": "uchicago.edu;uchicago.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Chicago", "aff_unique_dep": "", "aff_unique_url": "https://www.uchicago.edu", "aff_unique_abbr": "UChicago", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "NeuroClips: Towards High-fidelity and Smooth fMRI-to-Video Reconstruction", "status": "Oral", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96344", "id": "8qu52Fl1Dt", "proceeding": "", "pdf": "https://openreview.net/pdf?id=8qu52Fl1Dt", "openreview": "https://openreview.net/forum?id=8qu52Fl1Dt", "poster": "", "project": "", "author_site": "Zixuan Gong, Guangyin Bao, Qi Zhang, Zhongwei Wan, Duoqian Miao, Shoujin Wang, Lei Zhu, Changwei Wang, Rongtao Xu, Liang Hu, Ke Liu, Yu Zhang", "tldr": "", "abstract": "Reconstruction of static visual stimuli from non-invasion brain activity fMRI achieves great success, owning to advanced deep learning models such as CLIP and Stable Diffusion. However, the research on fMRI-to-video reconstruction remains limited since decoding the spatiotemporal perception of continuous visual experiences is formidably challenging. We contend that the key to addressing these challenges lies in accurately decoding both high-level semantics and low-level perception flows, as perceived by the brain in response to video stimuli. To the end, we propose NeuroClips, an innovative framework to decode high-fidelity and smooth video from fMRI. NeuroClips utilizes a semantics reconstructor to reconstruct video keyframes, guiding semantic accuracy and consistency, and employs a perception reconstructor to capture low-level perceptual details, ensuring video smoothness. During inference, it adopts a pre-trained T2V diffusion model injected with both keyframes and low-level perception flows for video reconstruction. Evaluated on a publicly available fMRI-video dataset, NeuroClips achieves smooth high-fidelity video reconstruction of up to 6s at 8FPS, gaining significant improvements over state-of-the-art models in various metrics, e.g., a 128% improvement in SSIM and an 81% improvement in spatiotemporal metrics. Our project is available at https://github.com/gongzix/NeuroClips.", "keywords": "fMRI visual decoding; fMRI-to-video Reconstruction", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "", "author": "Zixuan Gong;Guangyin Bao;Qi Zhang;Zhongwei Wan;Duoqian Miao;Shoujin Wang;Lei Zhu;Changwei Wang;Rongtao Xu;Liang Hu;Ke Liu;Yu Zhang", "authorids": "~Zixuan_Gong2;~Guangyin_Bao1;~Qi_Zhang25;~Zhongwei_Wan1;~Duoqian_Miao1;~Shoujin_Wang1;~Lei_Zhu8;~Changwei_Wang2;~Rongtao_Xu1;~Liang_Hu1;~Ke_Liu11;~Yu_Zhang60", "gender": "M;M;M;M;M;M;M;;;M;;", "homepage": "https://github.com/gongzix;;https://sites.google.com/view/qizhang-bit-uts/home;https://people.engineering.osu.edu/people/wan.512;https://iip.tongji.edu.cn;https://shoujinwang1.github.io/;https://sites.google.com/site/homepageleizhu;;http://www.nlpr.ia.ac.cn/ivc/;https://sites.google.com/view/lianghu/home;;", "dblp": "363/7481;363/7435;52/323-20;260/6958.html;90/1041-1;16/8492;99/549-2.html;;93/4025;48/5388-4;;", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com.hk/citations?user=0AiYk9sAAAAJ;8UAk1p4AAAAJ;https://scholar.google.com/citations?hl=en;;BQ0mBRIAAAAJ;https://scholar.google.com.sg/citations?user=gw4ISc4AAAAJ;;;https://scholar.google.com.au/citations?user=cj6wAgYAAAAJ;;", "orcid": ";0009-0009-0072-6323;0000-0002-1037-1361;;0000-0001-6588-1468;0000-0003-1133-9379;0000-0002-2993-7142;;;;;", "linkedin": ";;;;;;;;;;;", "or_profile": "~Zixuan_Gong2;~Guangyin_Bao1;~Qi_Zhang25;~Zhongwei_Wan1;~Duoqian_Miao1;~Shoujin_Wang1;~Lei_Zhu8;~Changwei_Wang2;~Rongtao_Xu1;~Liang_Hu1;~Ke_Liu11;~Yu_Zhang60", "aff": ";Tongji University;Tongji University;Ohio State University, Columbus;Tongji University;University of Technology Sydney;Shandong Normal University;;Institute of Automation, Chinese Academy of Sciences;Tongji University;;", "aff_domain": ";tongji.edu.cn;tongji.edu.cn;osu.edu;tongji.edu.cn;uts.edu.au;sdnu.edu.cn;;ia.ac.cn;tongji.edu.cn;;", "position": ";MS student;Researcher;PhD student;Full Professor;Lecturer;Full Professor;;PhD student;Full Professor;;", "bibtex": "@inproceedings{\ngong2024neuroclips,\ntitle={NeuroClips: Towards High-fidelity and Smooth f{MRI}-to-Video Reconstruction},\nauthor={Zixuan Gong and Guangyin Bao and Qi Zhang and Zhongwei Wan and Duoqian Miao and Shoujin Wang and Lei Zhu and Changwei Wang and Rongtao Xu and Liang Hu and Ke Liu and Yu Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=8qu52Fl1Dt}\n}", "github": "", "reviewers": "oPVf;85K8;mcBd", "pdf_size": 0, "rating": "7;7;8", "confidence": "4;4;4", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;3;4", "wc_summary": "97;40;51", "wc_strengths": "150;48;123", "wc_weaknesses": "180;16;81", "wc_questions": "915;21;195", "wc_limitations": "36;9;1", "wc_review": "1378;134;451", "wc_reply_reviewers": "74;12;34", "wc_reply_authors": "2191;54;54", "reply_reviewers": "1;1;1", "reply_authors": "6;2;2", "rating_avg": [ 7.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 62.666666666666664, 24.689178916188272 ], "wc_strengths_avg": [ 107.0, 43.15089802078283 ], "wc_weaknesses_avg": [ 92.33333333333333, 67.43062278289361 ], "wc_questions_avg": [ 377.0, 386.9987080081793 ], "wc_limitations_avg": [ 15.333333333333334, 14.974051630144134 ], "wc_review_avg": [ 654.3333333333334, 527.8208871282842 ], "wc_reply_reviewers_avg": [ 40.0, 25.664502073226878 ], "wc_reply_authors_avg": [ 766.3333333333334, 1007.3914609304347 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.3333333333333335, 1.8856180831641267 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4508162164155678019&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": ";tongji.edu.cn;tongji.edu.cn;osu.edu;tongji.edu.cn;uts.edu.au;sdnu.edu.cn;;ia.ac.cn;tongji.edu.cn;;", "author_num": 12, "aff_unique_index": "0;0;1;0;2;3;4;0", "aff_unique_norm": "Tongji University;Ohio State University;University of Technology Sydney;Shandong Normal University;Chinese Academy of Sciences", "aff_unique_dep": ";;;;Institute of Automation", "aff_unique_url": "https://www.tongji.edu.cn;https://www.osu.edu;https://www.uts.edu.au;http://www.sdu.edu.cn/;http://www.ia.cas.cn", "aff_unique_abbr": "Tongji;OSU;UTS;SDU;CAS", "aff_campus_unique_index": "1", "aff_campus_unique": ";Columbus", "aff_country_unique_index": "0;0;1;0;2;0;0;0", "aff_country_unique": "China;United States;Australia" }, { "title": "ReST-MCTS*: LLM Self-Training via Process Reward Guided Tree Search", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96343", "id": "8rcFOqEud5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=8rcFOqEud5", "openreview": "https://openreview.net/forum?id=8rcFOqEud5", "poster": "/media/PosterPDFs/NeurIPS%202024/96343.png?t=1731141909.9129388", "project": "", "author_site": "Dan Zhang, Sining Zhoubian, Ziniu Hu, Yisong Yue, Yuxiao Dong, Jie Tang", "tldr": "", "abstract": "Recent methodologies in LLM self-training mostly rely on LLM generating responses and filtering those with correct output answers as training data. This approach often yields a low-quality fine-tuning training set (e.g., incorrect plans or intermediate reasoning). In this paper, we develop a reinforced self-training approach, called ReST-MCTS*, based on integrating process reward guidance with tree search MCTS* for collecting higher-quality reasoning traces as well as per-step value to train policy and reward models. ReST-MCTS* circumvents the per-step manual annotation typically used to train process rewards by tree-search-based reinforcement learning: Given oracle final correct answers, ReST-MCTS* is able to infer the correct process rewards by estimating the probability this step can help lead to the correct answer. These inferred rewards serve dual purposes: they act as value targets for further refining the process reward model and also facilitate the selection of high-quality traces for policy model self-training. We first show that the tree-search policy in ReST-MCTS* achieves higher accuracy compared with prior LLM reasoning baselines such as Best-of-N and Tree-of-Thought, within the same search budget. We then show that by using traces searched by this tree-search policy as training data, we can continuously enhance the three language models for multiple iterations, and outperform other self-training algorithms such as ReST$^\\text{EM}$ and Self-Rewarding LM.", "keywords": "LLM self-training;process reward model;tree search", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/505572f7ae5d80821ed195c9db25d9237eeee21e.zip", "author": "Dan Zhang;Sining Zhoubian;Ziniu Hu;Yisong Yue;Yuxiao Dong;Jie Tang", "authorids": "~Dan_Zhang7;~Sining_Zhoubian1;~Ziniu_Hu1;~Yisong_Yue1;~Yuxiao_Dong1;~Jie_Tang1", "gender": "F;M;M;M;M;", "homepage": "https://zhangdan0602.github.io/;https://github.com/zhoubiansining/zhou;http://acbull.github.io;http://www.yisongyue.com;https://keg.cs.tsinghua.edu.cn/yuxiao/;", "dblp": ";367/1870;180/5436;28/1244;17/9267;", "google_scholar": "https://scholar.google.ca/citations?hl=en;https://scholar.google.com/citations?view_op=list_works;x6ct1CsAAAAJ;tEk4qo8AAAAJ;https://scholar.google.com.hk/citations?hl=en;", "orcid": "0000-0003-1115-3945;;;0000-0001-9127-1989;0000-0002-6092-2002;", "linkedin": ";;;yisongyue/;;", "or_profile": "~Dan_Zhang7;~Sining_Zhoubian1;~Ziniu_Hu1;~Yisong_Yue1;~Yuxiao_Dong1;~Jie_Tang1", "aff": "California Institute of Technology;Tsinghua University;Deepmind;California Institute of Technology;Tsinghua University;", "aff_domain": "caltech.edu;mails.tsinghua.edu.cn;deepmind.com;caltech.edu;tsinghua.edu.cn;", "position": "Researcher;Undergrad student;Visiting Researcher;Full Professor;Associate Professor;", "bibtex": "@inproceedings{\nzhang2024restmcts,\ntitle={Re{ST}-{MCTS}*: {LLM} Self-Training via Process Reward Guided Tree Search},\nauthor={Dan Zhang and Sining Zhoubian and Ziniu Hu and Yisong Yue and Yuxiao Dong and Jie Tang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=8rcFOqEud5}\n}", "github": "", "reviewers": "S8hx;r9nL;7P64;rDpc", "pdf_size": 1072518, "rating": "5;5;5;7", "confidence": "5;3;3;4", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "3;2;2;3", "wc_summary": "70;63;134;69", "wc_strengths": "55;72;66;51", "wc_weaknesses": "285;125;132;114", "wc_questions": "118;59;33;102", "wc_limitations": "8;55;3;22", "wc_review": "536;374;368;358", "wc_reply_reviewers": "87;35;0;71", "wc_reply_authors": "1119;230;84;311", "reply_reviewers": "1;1;0;1", "reply_authors": "5;3;2;3", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 84.0, 28.991378028648448 ], "wc_strengths_avg": [ 61.0, 8.396427811873332 ], "wc_weaknesses_avg": [ 164.0, 70.15340333868343 ], "wc_questions_avg": [ 78.0, 33.771289581536564 ], "wc_limitations_avg": [ 22.0, 20.285462775100793 ], "wc_review_avg": [ 409.0, 73.54590403278758 ], "wc_reply_reviewers_avg": [ 48.25, 33.625697018797986 ], "wc_reply_authors_avg": [ 436.0, 402.63320777104315 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.25, 1.0897247358851685 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 129, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11767881371453366778&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "caltech.edu;mails.tsinghua.edu.cn;deepmind.com;caltech.edu;tsinghua.edu.cn;", "author_num": 6, "aff_unique_index": "0;1;2;0;1", "aff_unique_norm": "California Institute of Technology;Tsinghua University;DeepMind", "aff_unique_dep": ";;", "aff_unique_url": "https://www.caltech.edu;https://www.tsinghua.edu.cn;https://deepmind.com", "aff_unique_abbr": "Caltech;THU;DeepMind", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Pasadena;", "aff_country_unique_index": "0;1;2;0;1", "aff_country_unique": "United States;China;United Kingdom" }, { "title": "BoostAdapter: Improving Vision-Language Test-Time Adaptation via Regional Bootstrapping", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96342", "id": "8tOYl6WsGY", "proceeding": "", "pdf": "https://openreview.net/pdf?id=8tOYl6WsGY", "openreview": "https://openreview.net/forum?id=8tOYl6WsGY", "poster": "/media/PosterPDFs/NeurIPS%202024/96342.png?t=1731555586.4624076", "project": "", "author_site": "Taolin Zhang, Jinpeng Wang, Hang Guo, Tao Dai, Bin Chen, Shu-Tao Xia", "tldr": "", "abstract": "Adaptation of \npretrained vision-language models such as CLIP to various downstream tasks have raised great interest in recent researches. \nPrevious works have proposed a variety of test-time adaptation (TTA) methods to achieve strong generalization without any knowledge of the target domain. \nHowever, existing training-required TTA approaches like TPT necessitate entropy minimization that involves large computational overhead, while training-free methods like TDA overlook the potential for information mining from the test samples themselves.\nIn this paper, we break down the design of existing popular training-required and training-free TTA methods and bridge the gap between them within our framework.\nSpecifically, we maintain a light-weight key-value memory for feature retrieval from instance-agnostic historical samples and instance-aware boosting samples. \nThe historical samples are filtered from the testing data stream and serve to extract useful information from the target distribution, while the boosting samples are drawn from regional bootstrapping and capture the knowledge of the test sample itself.\nWe theoretically justify the rationality behind our method and empirically verify its effectiveness on both the out-of-distribution and the cross-domain datasets, showcasing its applicability in real-world situations.", "keywords": "Test-time adaptation;Vision-Language models", "primary_area": "machine_vision", "supplementary_material": "", "author": "Taolin Zhang;Jinpeng Wang;Hang Guo;Tao Dai;Bin Chen;Shu-Tao Xia", "authorids": "~Taolin_Zhang3;~Jinpeng_Wang3;~Hang_Guo3;~Tao_Dai3;~Bin_Chen4;~Shu-Tao_Xia1", "gender": ";;M;M;M;M", "homepage": "https://github.com/taolinzhang;;https://github.com/csguoh;https://csse.szu.edu.cn/pages/user/index?id=1204;https://binchen17tsinghua.wixsite.com/website;https://www.sigs.tsinghua.edu.cn/xst/list.htm", "dblp": "270/2482-3;;;54/875-1;22/5523-11;03/6195", "google_scholar": "DWnu_G0AAAAJ;;https://scholar.google.com.hk/citations?user=fRwhfpoAAAAJ;MqJNdaAAAAAJ;Yl0wv7AAAAAJ;https://scholar.google.com.hk/citations?user=koAXTXgAAAAJ", "orcid": "0009-0006-2441-2861;;0000-0003-1746-2693;0000-0003-0594-6404;0000-0002-4798-230X;0000-0002-8639-982X", "linkedin": ";;;;;", "or_profile": "~Taolin_Zhang3;~Jinpeng_Wang3;~Hang_Guo3;~Tao_Dai3;~Bin_Chen4;~Shu-Tao_Xia1", "aff": "Tsinghua University;;Tsinghua University;Department of Software Engineering, Shenzhen University;Harbin Institute of Technology, Shenzhen;Shenzhen International Graduate School, Tsinghua University", "aff_domain": "tsinghua.edu.cn;;tsinghua.edu.cn;szu.edu;hit.edu.cn;sz.tsinghua.edu.cn", "position": "MS student;;MS student;Assistant Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nzhang2024boostadapter,\ntitle={BoostAdapter: Improving Vision-Language Test-Time Adaptation via Regional Bootstrapping},\nauthor={Taolin Zhang and Jinpeng Wang and Hang Guo and Tao Dai and Bin Chen and Shu-Tao Xia},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=8tOYl6WsGY}\n}", "github": "", "reviewers": "a236;YJpm;HZ6e", "pdf_size": 1584946, "rating": "4;5;6", "confidence": "4;4;3", "soundness": "3;3;3", "novelty": "2;2;2", "presentation": "3;3;3", "wc_summary": "55;86;37", "wc_strengths": "63;47;75", "wc_weaknesses": "137;128;202", "wc_questions": "171;5;62", "wc_limitations": "29;6;46", "wc_review": "455;272;422", "wc_reply_reviewers": "174;71;343", "wc_reply_authors": "798;51;627", "reply_reviewers": "2;1;2", "reply_authors": "4;2;3", "rating_avg": [ 5.0, 0.816496580927726 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 59.333333333333336, 20.237478982214054 ], "wc_strengths_avg": [ 61.666666666666664, 11.469767022723502 ], "wc_weaknesses_avg": [ 155.66666666666666, 32.96799795087486 ], "wc_questions_avg": [ 79.33333333333333, 68.86863501543274 ], "wc_limitations_avg": [ 27.0, 16.391054470858997 ], "wc_review_avg": [ 383.0, 79.63667496825819 ], "wc_reply_reviewers_avg": [ 196.0, 112.12790315825346 ], "wc_reply_authors_avg": [ 492.0, 319.5528125365195 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.4714045207910317 ], "reply_authors_avg": [ 3.0, 0.816496580927726 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:mkPFVwBDP3AJ:scholar.google.com/&scioq=BoostAdapter:+Improving+Vision-Language+Test-Time+Adaptation+via+Regional+Bootstrapping&hl=en&as_sdt=0,10", "gs_version_total": 2, "email": "tsinghua.edu.cn;;tsinghua.edu.cn;szu.edu;hit.edu.cn;sz.tsinghua.edu.cn", "author_num": 6, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Tsinghua University;Shenzhen University;Harbin Institute of Technology", "aff_unique_dep": ";Department of Software Engineering;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.szu.edu.cn;http://en.hhit.edu.cn/", "aff_unique_abbr": "THU;SZU;HIT", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Private Algorithms for Stochastic Saddle Points and Variational Inequalities: Beyond Euclidean Geometry", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96341", "id": "8ugOlbjJpp", "proceeding": "", "pdf": "https://openreview.net/pdf?id=8ugOlbjJpp", "openreview": "https://openreview.net/forum?id=8ugOlbjJpp", "poster": "", "project": "", "author_site": "Raef Bassily, Crist\u00f3bal Guzm\u00e1n, Michael Menart", "tldr": "", "abstract": "In this work, we conduct a systematic study of stochastic saddle point problems (SSP) and stochastic variational inequalities (SVI) under the constraint of $(\\epsilon,\\delta)$-differential privacy (DP) in both Euclidean and non-Euclidean setups. We first consider Lipschitz convex-concave SSPs in the $\\ell_p/\\ell_q$ setup, $p,q\\in[1,2]$. That is, we consider the case where the primal problem has an $\\ell_p$-setup (i.e., the primal parameter is constrained to an $\\ell_p$ bounded domain and the loss is $\\ell_p$-Lipschitz with respect to the primal parameter) and the dual problem has an $\\ell_q$ setup. Here, we obtain a bound of $\\tilde{O}\\big(\\frac{1}{\\sqrt{n}} + \\frac{\\sqrt{d}}{n\\epsilon}\\big)$ on the strong SP-gap, where $n$ is the number of samples and $d$ is the dimension. This rate is nearly optimal for any $p,q\\in[1,2]$. Without additional assumptions, such as smoothness or linearity requirements, prior work under DP has only obtained this rate when $p=q=2$ (i.e., only in the Euclidean setup). Further, existing algorithms have each only been shown to work for specific settings of $p$ and $q$ and under certain assumptions on the loss and the feasible set, whereas we provide a general algorithm for DP SSPs whenever $p,q\\in[1,2]$. Our result is obtained via a novel analysis of the recursive regularization algorithm. In particular, we develop new tools for analyzing generalization, which may be of independent interest. Next, we turn our attention towards SVIs with a monotone, bounded and Lipschitz operator and consider $\\ell_p$-setups, $p\\in[1,2]$. Here, we provide the first analysis which obtains a bound on the strong VI-gap of $\\tilde{O}\\big(\\frac{1}{\\sqrt{n}} + \\frac{\\sqrt{d}}{n\\epsilon}\\big)$. For $p-1=\\Omega(1)$, this rate is near optimal due to existing lower bounds. To obtain this result, we develop a modified version of recursive regularization. Our analysis builds on the techniques we develop for SSPs as well as employing additional novel components which handle difficulties arising from adapting the recursive regularization framework to SVIs.", "keywords": "Differential Privacy;Stochastic Saddle Point Problem;Stochastic Variational Inequality;Strong Gap;Stochastic Minimax Optimization;Algorithmic Stability", "primary_area": "optimization", "supplementary_material": "", "author": "Raef Bassily;Crist\u00f3bal A Guzm\u00e1n;Michael Menart", "authorids": "~Raef_Bassily2;~Crist\u00f3bal_A_Guzm\u00e1n1;~Michael_Menart1", "gender": ";M;M", "homepage": "https://sites.google.com/view/cguzman/;https://mikemenart.github.io/;https://sites.google.com/view/rbassily", "dblp": "21/9888;297/3184.html;88/8656", "google_scholar": "Rt9fMGEAAAAJ;https://scholar.google.com/citations?hl=en;C8qMVQUAAAAJ", "orcid": "0000-0002-1498-2055;;", "linkedin": ";;", "or_profile": "~Crist\u00f3bal_A_Guzm\u00e1n1;~Michael_Menart1;~RAEF_BASSILY1", "aff": "Pontificia Universidad Catolica de Chile;The Ohio State University;Google", "aff_domain": "uc.cl;osu.edu;google.com", "position": "Associate Professor;Graduate Student;Researcher", "bibtex": "@inproceedings{\nbassily2024private,\ntitle={Private Algorithms for Stochastic Saddle Points and Variational Inequalities: Beyond Euclidean Geometry},\nauthor={Raef Bassily and Crist{\\'o}bal A Guzm{\\'a}n and Michael Menart},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=8ugOlbjJpp}\n}", "github": "", "reviewers": "nH8p;sj99;9vNb;mPfo", "pdf_size": 538565, "rating": "5;5;5;6", "confidence": "2;2;4;2", "soundness": "2;3;3;3", "novelty": "3;3;2;3", "presentation": "2;3;3;3", "wc_summary": "28;79;57;50", "wc_strengths": "19;60;18;27", "wc_weaknesses": "86;20;137;41", "wc_questions": "2;1;1;178", "wc_limitations": "2;1;1;29", "wc_review": "137;161;214;325", "wc_reply_reviewers": "18;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 2.5, 0.8660254037844386 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 53.5, 18.200274723201296 ], "wc_strengths_avg": [ 31.0, 17.10263137648707 ], "wc_weaknesses_avg": [ 71.0, 44.94997219131509 ], "wc_questions_avg": [ 45.5, 76.5 ], "wc_limitations_avg": [ 8.25, 11.986972094736853 ], "wc_review_avg": [ 209.25, 72.4029522878729 ], "wc_reply_reviewers_avg": [ 4.5, 7.794228634059948 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:3P6uOfBYGD4J:scholar.google.com/&scioq=Private+Algorithms+for+Stochastic+Saddle+Points+and+Variational+Inequalities:+Beyond+Euclidean+Geometry&hl=en&as_sdt=0,47", "gs_version_total": 3, "email": "uc.cl;osu.edu;google.com", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Pontificia Universidad Catolica de Chile;Ohio State University;Google", "aff_unique_dep": ";;Google", "aff_unique_url": "https://www.puc.cl;https://www.osu.edu;https://www.google.com", "aff_unique_abbr": "PUC;OSU;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Chile;United States" }, { "id": "8v0RSyfj6l", "title": "Rule-based outlier detection of AI-generated anatomy segmentations", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "There is a dire need for medical imaging datasets with accompanying annotations to perform downstream patient analysis. However, it is difficult to manually generate these annotations, due to the time-consuming nature, and the variability in clinical conventions. Artificial intelligence has been adopted in the field as a potential method to annotate these large datasets, however, a lack of expert annotations or ground truth can inhibit the adoption of these annotations. We recently made a dataset publicly available including annotations and extracted features of up to 104 organs for the National Lung Screening Trial using the TotalSegmentator method. However, the released dataset does not include expert-derived annotations or an assessment of the accuracy of the segmentations, limiting its usefulness. We propose the development of heuristics to assess the quality of the segmentations, providing methods to measure the consistency of the annotations and a comparison of results to the literature. We make our code and related materials publicly available at https://github.com/ImagingDataCommons/CloudSegmentatorResults and interactive tools at https://huggingface.co/spaces/ImagingDataCommons/CloudSegmentatorResults.", "keywords": "segmentation;artificial intelligence;computed tomography;benchmarking", "primary_area": "", "supplementary_material": "/attachment/ea1e87797594c484197fd2a0cd7c98f64ad24202.zip", "author": "Deepa Krishnaswamy;Vamsi Krishna Thiriveedhi;David A Clunie;Steve Pieper;Ron Kikinis;Andriy Fedorov", "authorids": "~Deepa_Krishnaswamy1;~Vamsi_Krishna_Thiriveedhi1;~David_A_Clunie1;~Steve_Pieper1;~Ron_Kikinis2;~Andriy_Fedorov1", "gender": "F;M;M;M;;M", "homepage": ";;http://www.dclunie.com/;https://isomics.com;https://spl.harvard.edu/people/ron-kikinis;", "dblp": "121/1194;;;;k/RonKikinis;", "google_scholar": "X8jB1n0AAAAJ;;;;_FF4PIYAAAAJ;", "orcid": "my-orcid?orcid=0000-0002-3235-1222;0000-0002-7901-4645;0000-0002-2406-1145;;0000-0001-7227-7058;0000-0003-4806-9413", "linkedin": "deepa-krishnaswamy-32163122;;dclunie;;;", "or_profile": "~Deepa_Krishnaswamy1;~Vamsi_Krishna_Thiriveedhi1;~David_A_Clunie1;~Steve_Pieper1;~Ron_Kikinis2;~Andriy_Fedorov1", "aff": ";Massachusetts General Hospital, Harvard University;PixelMed;Brigham and Women's Hospital, Harvard University;Brigham and Women's Hospital, Harvard University;Brigham and Women's Hospital, Harvard University", "aff_domain": ";mgh.harvard.edu;pixelmed.com;bwh.harvard.edu;bwh.harvard.edu;bwh.harvard.edu", "position": ";Researcher;Researcher;Researcher;Full Professor;Associate Professor", "bibtex": "@misc{\nanonymous2024rulebased,\ntitle={Rule-based outlier detection of {AI}-generated anatomy segmentations},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=8v0RSyfj6l}\n}", "github": "", "project": "", "reviewers": "oSdE;ow35;VQu4", "site": "https://openreview.net/forum?id=8v0RSyfj6l", "pdf_size": 3781791, "rating": "4;4;6", "confidence": "4;5;5", "wc_summary_and_contributions": "120;115;54", "wc_strengths": "50;83;2", "wc_improvement": "4;81;66", "wc_limitations": "87;6;2", "wc_correctness": "1;54;16", "wc_clarity": "1;1;1", "wc_relation_to_prior_work": "12;68;1", "wc_documentation": "1;8;1", "wc_additional_feedback": "1;1;1", "wc_review": "277;417;144", "wc_reply_reviewers": "0;53;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;1;0", "reply_authors": "0;0;0", "rating_avg": [ 4.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 96.33333333333333, 30.00370347510824 ], "wc_strengths_avg": [ 45.0, 33.25657829663178 ], "wc_improvement_avg": [ 50.333333333333336, 33.32999983331666 ], "wc_limitations_avg": [ 31.666666666666668, 39.16063783387032 ], "wc_correctness_avg": [ 23.666666666666668, 22.305953365762146 ], "wc_clarity_avg": [ 1.0, 0.0 ], "wc_relation_to_prior_work_avg": [ 27.0, 29.337120967584166 ], "wc_documentation_avg": [ 3.3333333333333335, 3.2998316455372216 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 279.3333333333333, 111.46399518329775 ], "wc_reply_reviewers_avg": [ 17.666666666666668, 24.984439601924677 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:tcZGC_KDwuIJ:scholar.google.com/&scioq=Rule-based+outlier+detection+of+AI-generated+anatomy+segmentations&hl=en&as_sdt=0,44", "gs_version_total": 3, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "Harvard University;PixelMed", "aff_unique_dep": "Massachusetts General Hospital;", "aff_unique_url": "https://www.harvard.edu;", "aff_unique_abbr": "Harvard;", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States;" }, { "title": "GO4Align: Group Optimization for Multi-Task Alignment", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96340", "id": "8vCs5U9Hbt", "proceeding": "", "pdf": "https://openreview.net/pdf?id=8vCs5U9Hbt", "openreview": "https://openreview.net/forum?id=8vCs5U9Hbt", "poster": "/media/PosterPDFs/NeurIPS%202024/96340.png?t=1731414275.247771", "project": "", "author_site": "Jiayi Shen, Qi Wang, Zehao Xiao, Nanne van Noord, Marcel Worring", "tldr": "", "abstract": "This paper proposes **GO4Align**, a multi-task optimization approach that tackles task imbalance by explicitly aligning the optimization across tasks. To achieve this, we design an adaptive group risk minimization strategy, comprising two techniques in implementation: (i) dynamical group assignment, which clusters similar tasks based on task interactions; (ii) risk-guided group indicators, which exploit consistent task correlations with risk information from previous iterations. Comprehensive experimental results on diverse benchmarks demonstrate our method's performance superiority with even lower computational costs.", "keywords": "multi-task learning;multi-task optimization;task grouping;dense prediction tasks", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Jiayi Shen;Cheems Wang;Zehao Xiao;Nanne Van Noord;Marcel Worring", "authorids": "~Jiayi_Shen3;~Cheems_Wang1;~Zehao_Xiao1;~Nanne_Van_Noord1;~Marcel_Worring2", "gender": "F;M;M;M;M", "homepage": "https://autumn9999.github.io/;https://zzzx1224.github.io/;https://nanne.github.io/;https://staff.fnwi.uva.nl/m.worring/;https://sites.google.com/view/albert-q-wang-at-ai-community/home", "dblp": ";225/5426;123/5104.html;35/4613;375/3186", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=zh-CN;wFDJzDkAAAAJ;pdu8f3sAAAAJ;Mvbvv3IAAAAJ", "orcid": ";;0000-0002-5145-3603;;0000-0001-6135-6965", "linkedin": ";;;;qi-cheems-wang-518a421a1/", "or_profile": "~Jiayi_Shen3;~Zehao_Xiao1;~Nanne_Van_Noord1;~Marcel_Worring2;~Qi_Wang11", "aff": "University of Amsterdam;University of Amsterdam;University of Amsterdam;University of Amsterdam;Tsinghua University", "aff_domain": "uva.nl;uva.nl;uva.nl;uva.nl;cs.tsinghua.edu.cn", "position": "PhD student;PhD student;Assistant Professor;Full Professor;Postdoc", "bibtex": "@inproceedings{\nshen2024goalign,\ntitle={{GO}4Align: Group Optimization for Multi-Task Alignment},\nauthor={Jiayi Shen and Cheems Wang and Zehao Xiao and Nanne Van Noord and Marcel Worring},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=8vCs5U9Hbt}\n}", "github": "", "reviewers": "FqZ5;9PzS;sLBS;AXsM", "pdf_size": 7881753, "rating": "5;6;7;7", "confidence": "4;3;4;3", "soundness": "3;2;3;3", "novelty": "3;3;3;2", "presentation": "3;3;3;4", "wc_summary": "85;81;196;69", "wc_strengths": "47;39;72;53", "wc_weaknesses": "281;84;153;52", "wc_questions": "3;2;17;60", "wc_limitations": "1;1;6;1", "wc_review": "417;207;444;235", "wc_reply_reviewers": "42;0;0;17", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 107.75, 51.290228114134955 ], "wc_strengths_avg": [ 52.75, 12.173228823939851 ], "wc_weaknesses_avg": [ 142.5, 87.89908987014599 ], "wc_questions_avg": [ 20.5, 23.56374333589636 ], "wc_limitations_avg": [ 2.25, 2.165063509461097 ], "wc_review_avg": [ 325.75, 105.64888783134444 ], "wc_reply_reviewers_avg": [ 14.75, 17.195566289017645 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17109311613887945749&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "uva.nl;uva.nl;uva.nl;uva.nl;cs.tsinghua.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "University of Amsterdam;Tsinghua University", "aff_unique_dep": ";", "aff_unique_url": "https://www.uva.nl;https://www.tsinghua.edu.cn", "aff_unique_abbr": "UvA;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "Netherlands;China" }, { "title": "Conformalized Multiple Testing after Data-dependent Selection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96339", "id": "8wvH0RZPsG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=8wvH0RZPsG", "openreview": "https://openreview.net/forum?id=8wvH0RZPsG", "poster": "/media/PosterPDFs/NeurIPS%202024/96339.png?t=1731385573.9661608", "project": "", "author_site": "Xiaoning Wang, Yuyang Huo, Liuhua Peng, Changliang Zou", "tldr": "", "abstract": "The task of distinguishing individuals of interest from a vast pool of candidates using predictive models has garnered significant attention in recent years. This task can be framed as a *conformalized multiple testing* procedure, which aims at quantifying prediction uncertainty by controlling the false discovery rate (FDR) via conformal inference. In this paper, we tackle the challenge of conformalized multiple testing after data-dependent selection procedures. To guarantee the construction of valid test statistics that accurately capture the distorted distribution resulting from the selection process, we leverage a holdout labeled set to closely emulate the selective distribution. Our approach involves adaptively picking labeled data to create a calibration set based on the stability of the selection rule. This strategy ensures that the calibration data and the selected test unit are exchangeable, allowing us to develop valid conformal p-values. Implementing with the famous Benjamini-Hochberg (BH) procedure, it effectively controls the FDR over the selected subset. To handle the randomness of the selected subset and the dependence among the constructed p-values, we establish a unified theoretical framework. This framework extends the application of conformalized multiple testing to complex selective settings. Furthermore, we conduct numerical studies to showcase the effectiveness and validity of our procedures across various scenarios.", "keywords": "multiple testing;conformal p-value;conformal inference;selective inference;distribution-free", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/aa098ac8efd0d6be09d32e31f3666eb0759f52da.zip", "author": "Xiaoning Wang;Yuyang Huo;Liuhua Peng;Changliang Zou", "authorids": "~Xiaoning_Wang2;~Yuyang_Huo1;~Liuhua_Peng1;~Changliang_Zou2", "gender": "M;M;M;M", "homepage": ";;https://sites.google.com/view/liuhua-peng;http://web.stat.nankai.edu.cn/chlzou/", "dblp": ";;180/7474.html;", "google_scholar": ";;https://scholar.google.com.au/citations?user=SstmL7wAAAAJ;LPwSdmwAAAAJ", "orcid": "0009-0004-9846-0140;0000-0002-7521-1043;0000-0002-5431-8079;", "linkedin": ";;;", "or_profile": "~Xiaoning_Wang2;~Yuyang_Huo1;~Liuhua_Peng1;~Changliang_Zou2", "aff": "Nankai University;Nankai University;University of Melbourne;Nankai University", "aff_domain": "nankai.edu.cn;nku.nankai.edu.cn;unimelb.edu.au;nankai.edu.cn", "position": "PhD student;PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nwang2024conformalized,\ntitle={Conformalized Multiple Testing after Data-dependent Selection},\nauthor={Xiaoning Wang and Yuyang Huo and Liuhua Peng and Changliang Zou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=8wvH0RZPsG}\n}", "github": "", "reviewers": "SifE;C3MM;9oVU;cTUv", "pdf_size": 478692, "rating": "6;6;6;7", "confidence": "3;5;4;4", "soundness": "3;3;3;4", "novelty": "2;3;2;4", "presentation": "2;4;3;3", "wc_summary": "75;137;65;46", "wc_strengths": "52;42;22;53", "wc_weaknesses": "115;344;137;73", "wc_questions": "20;242;4;19", "wc_limitations": "24;5;7;12", "wc_review": "286;770;235;203", "wc_reply_reviewers": "13;97;13;39", "wc_reply_authors": "40;251;38;39", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 80.75, 34.10553474144629 ], "wc_strengths_avg": [ 42.25, 12.457427503300993 ], "wc_weaknesses_avg": [ 167.25, 104.60491145256995 ], "wc_questions_avg": [ 71.25, 98.786069362031 ], "wc_limitations_avg": [ 12.0, 7.3824115301167 ], "wc_review_avg": [ 373.5, 230.82515027613434 ], "wc_reply_reviewers_avg": [ 40.5, 34.30378987808781 ], "wc_reply_authors_avg": [ 92.0, 91.80141611108186 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1390412849503542309&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "nankai.edu.cn;nku.nankai.edu.cn;unimelb.edu.au;nankai.edu.cn", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Nankai University;University of Melbourne", "aff_unique_dep": ";", "aff_unique_url": "http://www.nankai.edu.cn;https://www.unimelb.edu.au", "aff_unique_abbr": "NKU;UniMelb", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "China;Australia" }, { "title": "Globally Convergent Variational Inference", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96338", "id": "8x48XFLvyd", "proceeding": "", "pdf": "https://openreview.net/pdf?id=8x48XFLvyd", "openreview": "https://openreview.net/forum?id=8x48XFLvyd", "poster": "/media/PosterPDFs/NeurIPS%202024/96338.png?t=1731689737.240764", "project": "", "author_site": "Declan McNamara, Jackson Loper, Jeffrey Regier", "tldr": "", "abstract": "In variational inference (VI), an approximation of the posterior distribution is selected from a family of distributions through numerical optimization. With the most common variational objective function, known as the evidence lower bound (ELBO), only convergence to a *local* optimum can be guaranteed. In this work, we instead establish the *global* convergence of a particular VI method. This VI method, which may be considered an instance of neural posterior estimation (NPE), minimizes an expectation of the inclusive (forward) KL divergence to fit a variational distribution that is parameterized by a neural network. Our convergence result relies on the neural tangent kernel (NTK) to characterize the gradient dynamics that arise from considering the variational objective in function space. In the asymptotic regime of a fixed, positive-definite neural tangent kernel, we establish conditions under which the variational objective admits a unique solution in a reproducing kernel Hilbert space (RKHS). Then, we show that the gradient descent dynamics in function space converge to this unique function. In ablation studies and practical problems, we demonstrate that our results explain the behavior of NPE in non-asymptotic finite-neuron settings, and show that NPE outperforms ELBO-based optimization, which often converges to shallow local optima.", "keywords": "forward KL divergence; neural posterior estimation; neural tangent kernel; convex optimization", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Declan McNamara;Jackson Loper;Jeffrey Regier", "authorids": "~Declan_McNamara1;~Jackson_Loper1;~Jeffrey_Regier1", "gender": "M;;M", "homepage": ";;https://regier.stat.lsa.umich.edu", "dblp": "347/8126;https://dblp.uni-trier.de/pers/hd/l/Loper:Jackson;164/7281", "google_scholar": "ploel6YAAAAJ;4JmKgfkAAAAJ;q-J0TmgAAAAJ", "orcid": ";;0000-0002-1472-5235", "linkedin": "dmmcnamara/;;", "or_profile": "~Declan_McNamara1;~Jackson_Loper1;~Jeffrey_Regier1", "aff": "University of Michigan - Ann Arbor;University of Michigan - Ann Arbor;University of Michigan", "aff_domain": "umich.edu;umich.edu;umich.edu", "position": "PhD student;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nmcnamara2024globally,\ntitle={Globally Convergent Variational Inference},\nauthor={Declan McNamara and Jackson Loper and Jeffrey Regier},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=8x48XFLvyd}\n}", "github": "", "reviewers": "KvcV;grF1;gbq5;Dszp", "pdf_size": 4582534, "rating": "5;6;7;7", "confidence": "4;2;1;3", "soundness": "3;3;3;3", "novelty": "2;3;4;3", "presentation": "3;3;2;3", "wc_summary": "48;121;62;179", "wc_strengths": "48;80;18;28", "wc_weaknesses": "92;106;15;179", "wc_questions": "54;17;73;1", "wc_limitations": "8;1;18;1", "wc_review": "250;325;186;388", "wc_reply_reviewers": "0;10;13;137", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 2.5, 1.118033988749895 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 102.5, 51.97355096585185 ], "wc_strengths_avg": [ 43.5, 23.680160472429236 ], "wc_weaknesses_avg": [ 98.0, 58.20223363411408 ], "wc_questions_avg": [ 36.25, 28.630185119904482 ], "wc_limitations_avg": [ 7.0, 6.96419413859206 ], "wc_review_avg": [ 287.25, 76.18193683544676 ], "wc_reply_reviewers_avg": [ 40.0, 56.20942981386664 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.674199862463242, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ZGGy5KUDuv0J:scholar.google.com/&scioq=Globally+Convergent+Variational+Inference&hl=en&as_sdt=0,44", "gs_version_total": 5, "email": "umich.edu;umich.edu;umich.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Michigan", "aff_unique_dep": "", "aff_unique_url": "https://www.umich.edu", "aff_unique_abbr": "UM", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Ann Arbor;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "CV-VAE: A Compatible Video VAE for Latent Generative Video Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96337", "id": "8z4isrqbcf", "proceeding": "", "pdf": "https://openreview.net/pdf?id=8z4isrqbcf", "openreview": "https://openreview.net/forum?id=8z4isrqbcf", "poster": "/media/PosterPDFs/NeurIPS%202024/96337.png?t=1730255097.2495766", "project": "", "author_site": "Sijie Zhao, Yong Zhang, Xiaodong Cun, Shaoshu Yang, Muyao Niu, Xiaoyu Li, Wenbo HU, Ying Shan", "tldr": "", "abstract": "Spatio-temporal compression of videos, utilizing networks such as Variational Autoencoders (VAE), plays a crucial role in OpenAI's SORA and numerous other video generative models. For instance, many LLM-like video models learn the distribution of discrete tokens derived from 3D VAEs within the VQVAE framework, while most diffusion-based video models capture the distribution of continuous latent extracted by 2D VAEs without quantization. The temporal compression is simply realized by uniform frame sampling which results in unsmooth motion between consecutive frames. Currently, there lacks of a commonly used continuous video (3D) VAE for latent diffusion-based video models in the research community. Moreover, since current diffusion-based approaches are often implemented using pre-trained text-to-image (T2I) models, directly training a video VAE without considering the compatibility with existing T2I models will result in a latent space gap between them, which will take huge computational resources for training to bridge the gap even with the T2I models as initialization. To address this issue, we propose a method for training a video VAE of latent video models, namely CV-VAE, whose latent space is compatible with that of a given image VAE, e.g., image VAE of Stable Diffusion (SD). The compatibility is achieved by the proposed novel latent space regularization, which involves formulating a regularization loss using the image VAE. Benefiting from the latent space compatibility, video models can be trained seamlessly from pre-trained T2I or video models in a truly spatio-temporally compressed latent space, rather than simply sampling video frames at equal intervals. To improve the training efficiency, we also design a novel architecture for the video VAE. With our CV-VAE, existing video models can generate four times more frames with minimal finetuning. Extensive experiments are conducted to demonstrate the effectiveness of the proposed video VAE.", "keywords": "Variational Autoencoder;Video Generation;Image Generation", "primary_area": "generative_models", "supplementary_material": "/attachment/b6108a10c922130d90f5abeedf5caa76f20a140e.zip", "author": "Sijie Zhao;Yong Zhang;Xiaodong Cun;Shaoshu Yang;Muyao Niu;Xiaoyu Li;Wenbo Hu;Ying Shan", "authorids": "~Sijie_Zhao2;~Yong_Zhang6;~Xiaodong_Cun1;~Shaoshu_Yang1;~Muyao_Niu2;~Xiaoyu_Li2;~Wenbo_Hu2;~Ying_Shan2", "gender": "M;M;M;M;;M;M;M", "homepage": "https://sijeh.github.io/;https://yzhang2016.github.io/yongnorriszhang.github.io/;https://vinthony.github.io;https://github.com/ssyang1999/ssyang1999.github.io;;https://xiaoyu258.github.io;https://wbhu.github.io/;", "dblp": "300/5422;66/4615-34.html;210/0897;358/9392;;18/6855-2;95/7076-2;68/5910", "google_scholar": "tZ3dS3MAAAAJ;a_zSeVEAAAAJ;p42qwXcAAAAJ;;;https://scholar.google.com.hk/citations?user=Dt0PcAYAAAAJ;https://scholar.google.com/citations?hl=en;4oXBp9UAAAAJ", "orcid": ";;0000-0003-3607-2236;;;;0000-0001-6082-4966;0000-0001-7673-8325", "linkedin": ";;;;;;huwenbo/;YingShanProfile/", "or_profile": "~Sijie_Zhao2;~Yong_Zhang6;~Xiaodong_Cun1;~Shaoshu_Yang1;~Muyao_Niu2;~Xiaoyu_Li2;~Wenbo_Hu2;~Ying_Shan2", "aff": "Tencent AI Lab;Tencent AI Lab;Great Bay University;Institute of Automation, Chinese Academy of Sciences;;Tencent;Tencent AI Lab;Tencent PCG ARC Lab", "aff_domain": "tencent.com;tencent.com;gbu.edu.cn;ia.ac.cn;;tencent.com;tencent.com;arc.tencent.com", "position": "Researcher;Researcher;Assistant Professor;PhD student;;Researcher;Researcher;Director", "bibtex": "@inproceedings{\nzhao2024cvvae,\ntitle={{CV}-{VAE}: A Compatible Video {VAE} for Latent Generative Video Models},\nauthor={Sijie Zhao and Yong Zhang and Xiaodong Cun and Shaoshu Yang and Muyao Niu and Xiaoyu Li and Wenbo Hu and Ying Shan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=8z4isrqbcf}\n}", "github": "", "reviewers": "tAbR;LvuQ;xmxT;U7Gm", "pdf_size": 43969134, "rating": "5;6;6;7", "confidence": "5;4;4;5", "soundness": "3;3;3;4", "novelty": "2;3;2;4", "presentation": "2;3;3;4", "wc_summary": "160;67;92;53", "wc_strengths": "68;45;81;110", "wc_weaknesses": "181;117;122;148", "wc_questions": "48;100;48;93", "wc_limitations": "6;1;10;18", "wc_review": "463;330;353;422", "wc_reply_reviewers": "163;19;6;27", "wc_reply_authors": "460;0;0;0", "reply_reviewers": "3;1;1;1", "reply_authors": "3;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 93.0, 41.12784944535758 ], "wc_strengths_avg": [ 76.0, 23.484037131634757 ], "wc_weaknesses_avg": [ 142.0, 25.406692031825003 ], "wc_questions_avg": [ 72.25, 24.375961519497032 ], "wc_limitations_avg": [ 8.75, 6.219927652312364 ], "wc_review_avg": [ 392.0, 53.16483800407935 ], "wc_reply_reviewers_avg": [ 53.75, 63.519190013727346 ], "wc_reply_authors_avg": [ 115.0, 199.18584287042088 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9248695370062885486&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "tencent.com;tencent.com;gbu.edu.cn;ia.ac.cn;;tencent.com;tencent.com;arc.tencent.com", "author_num": 8, "aff_unique_index": "0;0;1;2;0;0;0", "aff_unique_norm": "Tencent;Great Bay University;Chinese Academy of Sciences", "aff_unique_dep": "Tencent AI Lab;;Institute of Automation", "aff_unique_url": "https://ai.tencent.com;https://www.greatbay.edu;http://www.ia.cas.cn", "aff_unique_abbr": "Tencent AI Lab;;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;0;0", "aff_country_unique": "China;United States" }, { "title": "Evidence of Learned Look-Ahead in a Chess-Playing Neural Network", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96336", "id": "8zg9sO4ttV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=8zg9sO4ttV", "openreview": "https://openreview.net/forum?id=8zg9sO4ttV", "poster": "", "project": "", "author_site": "Erik Jenner, Shreyas Kapur, Vasil Georgiev, Cameron Allen, Scott Emmons, Stuart J Russell", "tldr": "", "abstract": "Do neural networks learn to implement algorithms such as look-ahead or search \"in the wild\"? Or do they rely purely on collections of simple heuristics? We present evidence of *learned look-ahead* in the policy and value network of Leela Chess Zero, the currently strongest deep neural chess engine. We find that Leela internally represents future optimal moves and that these representations are crucial for its final output in certain board states. Concretely, we exploit the fact that Leela is a transformer that treats every chessboard square like a token in language models, and give three lines of evidence: (1) activations on certain squares of future moves are unusually important causally; (2) we find attention heads that move important information \"forward and backward in time,\" e.g., from squares of future moves to squares of earlier ones; and (3) we train a simple probe that can predict the optimal move 2 turns ahead with 92% accuracy (in board states where Leela finds a single best line). These findings are clear evidence of learned look-ahead in neural networks and might be a step towards a better understanding of their capabilities.", "keywords": "Mechanistic interpretability;Learned search;Chess", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Erik Jenner;Shreyas Kapur;Vasil Georgiev;Cameron Allen;Scott Emmons;Stuart Russell", "authorids": "~Erik_Jenner1;~Shreyas_Kapur1;~Vasil_Georgiev1;~Cameron_Allen1;~Scott_Emmons1;~Stuart_Russell1", "gender": "M;M;M;;M;M", "homepage": "https://ejenner.com;https://shreyaskapur.com;;;http://scottemmons.com/;https://people.eecs.berkeley.edu/~russell/", "dblp": "295/8670;;;;180/5699;", "google_scholar": "https://scholar.google.com/citations?hl=en;SJJ5O5wAAAAJ;;;LoT0z6oAAAAJ;https://scholar.google.com.tw/citations?user=KJGrjCAAAAAJ", "orcid": "0000-0002-6037-5715;;;;0000-0002-7946-7046;", "linkedin": "erik-jenner/;;georgievvasil/;;scott-emmons-5258005b/;", "or_profile": "~Erik_Jenner1;~Shreyas_Kapur1;~Vasil_Georgiev1;~Cameron_Allen1;~Scott_Emmons1;~Stuart_Russell1", "aff": "University of California, Berkeley;University of California, Berkeley;Independent;;University of California, Berkeley;University of California, Berkeley", "aff_domain": "berkeley.edu;berkeley.edu;uni-sofia.bg;;berkeley.edu;berkeley.edu", "position": "PhD student;PhD student;Researcher;;PhD student;Full Professor", "bibtex": "@inproceedings{\njenner2024evidence,\ntitle={Evidence of Learned Look-Ahead in a Chess-Playing Neural Network},\nauthor={Erik Jenner and Shreyas Kapur and Vasil Georgiev and Cameron Allen and Scott Emmons and Stuart Russell},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=8zg9sO4ttV}\n}", "github": "", "reviewers": "p7Jw;abVe;SpQ5;ScDd;mWPe", "pdf_size": 1842972, "rating": "4;4;6;7;8", "confidence": "4;4;5;4;4", "soundness": "2;3;4;4;3", "novelty": "3;2;3;3;4", "presentation": "3;2;4;4;4", "wc_summary": "34;49;21;242;69", "wc_strengths": "81;52;85;168;61", "wc_weaknesses": "164;462;164;579;128", "wc_questions": "101;182;50;234;223", "wc_limitations": "2;2;1;7;7", "wc_review": "382;747;321;1230;488", "wc_reply_reviewers": "539;88;5;354;83", "wc_reply_authors": "1041;124;0;32;0", "reply_reviewers": "2;1;1;1;1", "reply_authors": "3;2;1;2;1", "rating_avg": [ 5.8, 1.6 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.4, 0.8 ], "wc_summary_avg": [ 83.0, 81.09007337522885 ], "wc_strengths_avg": [ 89.4, 41.166005392799526 ], "wc_weaknesses_avg": [ 299.4, 184.74804464459157 ], "wc_questions_avg": [ 158.0, 71.40028011149536 ], "wc_limitations_avg": [ 3.8, 2.6381811916545836 ], "wc_review_avg": [ 633.6, 331.895525730613 ], "wc_reply_reviewers_avg": [ 213.8, 200.96905234388703 ], "wc_reply_authors_avg": [ 239.4, 403.36564057936323 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.06250000000000001, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4266917582823910352&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "berkeley.edu;berkeley.edu;uni-sofia.bg;;berkeley.edu;berkeley.edu", "author_num": 6, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "University of California, Berkeley;Independent", "aff_unique_dep": ";", "aff_unique_url": "https://www.berkeley.edu;", "aff_unique_abbr": "UC Berkeley;", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States;" }, { "title": "Bandit-Feedback Online Multiclass Classification: Variants and Tradeoffs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96335", "id": "90IpKvVdXd", "proceeding": "", "pdf": "https://openreview.net/pdf?id=90IpKvVdXd", "openreview": "https://openreview.net/forum?id=90IpKvVdXd", "poster": "/media/PosterPDFs/NeurIPS%202024/96335.png?t=1732188137.4801223", "project": "", "author_site": "Yuval Filmus, Steve Hanneke, Idan Mehalel, Shay Moran", "tldr": "", "abstract": "Consider the domain of multiclass classification within the adversarial online setting. What is the price of relying on bandit feedback as opposed to full information? To what extent can an adaptive adversary amplify the loss compared to an oblivious one? To what extent can a randomized learner reduce the loss compared to a deterministic one? We study these questions in the mistake bound model and provide nearly tight answers.\nWe demonstrate that the optimal mistake bound under bandit feedback is at most $O(k)$ times higher than the optimal mistake bound in the full information case, where $k$ represents the number of labels. This bound is tight and provides an answer to an open question previously posed and studied by Daniely and Helbertal ['13] and by Long ['17, '20], who focused on deterministic learners.\nMoreover, we present nearly optimal bounds of $\\tilde{\\Theta}(k)$ on the gap between randomized and deterministic learners, as well as between adaptive and oblivious adversaries in the bandit feedback setting. This stands in contrast to the full information scenario, where adaptive and oblivious adversaries are equivalent, and the gap in mistake bounds between randomized and deterministic learners is a constant multiplicative factor of $2$.\nIn addition, our results imply that in some cases the optimal randomized mistake bound is approximately the square-root of its deterministic parallel. Previous results show that this is essentially the smallest it can get.\nSome of our results are proved via a reduction to prediction with expert advice under bandit feedback, a problem interesting on its own right. For this problem, we provide a randomized algorithm which is nearly optimal in some scenarios.", "keywords": "Online learning;multiclass classification;bandit feedback", "primary_area": "online_learning", "supplementary_material": "", "author": "Yuval Filmus;Steve Hanneke;Idan Mehalel;Shay Moran", "authorids": "~Yuval_Filmus1;~Steve_Hanneke1;~Idan_Mehalel1;~Shay_Moran1", "gender": "M;M;M;M", "homepage": "http://www.cs.technion.ac.il/people/yuvalfi/;http://www.stevehanneke.com;;http://www.cs.technion.ac.il/~shaymrn/", "dblp": ";40/154;294/5021;119/5111", "google_scholar": "https://scholar.google.com.tw/citations?user=TFvs8NgAAAAJ;fEhNO7YAAAAJ;;kALYnggAAAAJ", "orcid": ";;;", "linkedin": ";;idan-mehalel-a51123162/;", "or_profile": "~Yuval_Filmus1;~Steve_Hanneke1;~Idan_Mehalel1;~Shay_Moran1", "aff": "Technion;Purdue University;Computer Science Departmen, Technion-Israel Institute of Technology;Google", "aff_domain": ";purdue.edu;cs.technion.ac.il;google.com", "position": "Assistant Professor;Assistant Professor;PhD student;Visiting Faculty", "bibtex": "@inproceedings{\nfilmus2024banditfeedback,\ntitle={Bandit-Feedback Online Multiclass Classification: Variants and Tradeoffs},\nauthor={Yuval Filmus and Steve Hanneke and Idan Mehalel and Shay Moran},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=90IpKvVdXd}\n}", "github": "", "reviewers": "WVbQ;x7iL;qHYZ;qJc1", "pdf_size": 453814, "rating": "5;6;7;7", "confidence": "2;2;4;2", "soundness": "2;4;3;3", "novelty": "2;4;3;3", "presentation": "2;2;3;3", "wc_summary": "48;175;131;87", "wc_strengths": "30;73;83;136", "wc_weaknesses": "32;75;65;90", "wc_questions": "1;153;59;29", "wc_limitations": "5;1;26;5", "wc_review": "116;477;364;347", "wc_reply_reviewers": "10;71;67;29", "wc_reply_authors": "0;0;243;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;2;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 2.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 110.25, 47.536170438940495 ], "wc_strengths_avg": [ 80.5, 37.72598573927526 ], "wc_weaknesses_avg": [ 65.5, 21.289668856043768 ], "wc_questions_avg": [ 60.5, 57.207953992430106 ], "wc_limitations_avg": [ 9.25, 9.807522622966516 ], "wc_review_avg": [ 326.0, 131.13542618224872 ], "wc_reply_reviewers_avg": [ 44.25, 25.68438241422207 ], "wc_reply_authors_avg": [ 60.75, 105.2220865598093 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3220861142472424502&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": ";purdue.edu;cs.technion.ac.il;google.com", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Technion - Israel Institute of Technology;Purdue University;Technion-Israel Institute of Technology;Google", "aff_unique_dep": ";;Computer Science Department;Google", "aff_unique_url": "https://www.technion.ac.il/en/;https://www.purdue.edu;https://www.technion.ac.il;https://www.google.com", "aff_unique_abbr": "Technion;Purdue;Technion;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "Israel;United States" }, { "title": "Cluster-Learngene: Inheriting Adaptive Clusters for Vision Transformers", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96334", "id": "92vVuJVLVW", "proceeding": "", "pdf": "https://openreview.net/pdf?id=92vVuJVLVW", "openreview": "https://openreview.net/forum?id=92vVuJVLVW", "poster": "/media/PosterPDFs/NeurIPS%202024/96334.png?t=1732108780.7083108", "project": "", "author_site": "Qiufeng Wang, Xu Yang, Fu Feng, Jingq Wang, Xin Geng", "tldr": "", "abstract": "In recent years, the merging of vast datasets with powerful computational resources has led to the emergence of large pre-trained models in the field of deep learning. However, the common practices often overgeneralize the applicability of these models, overlooking the task-specific resource constraints. To mitigate this issue, we propose \\textbf{Cluster-Learngene}, which effectively clusters critical internal modules from a large ancestry model and then inherits them to initialize descendant models of elastic scales. Specifically, based on the density characteristics of attention heads, our method adaptively clusters attention heads of each layer and position-wise feed-forward networks (FFNs) in the ancestry model as the learngene. Moreover, we introduce priority weight-sharing and learnable parameter transformations that expand the learngene to initialize descendant models of elastic scales. Through extensive experimentation, we demonstrate that Cluster-Learngene not only is more efficient compared to other initialization methods but also customizes models of elastic scales according to downstream task resources.", "keywords": "Vision Transformer;Model Initialization;Learngene", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Qiufeng Wang;Xu Yang;Fu Feng;Jing wang;Xin Geng", "authorids": "~Qiufeng_Wang3;~Xu_Yang5;~Fu_Feng1;~Jing_wang2;~Xin_Geng1", "gender": "M;M;M;M;M", "homepage": "http://palm.seu.edu.cn/homepage/wangqiufeng/demo/index.html;;https://github.com/fu-feng;https://wangjing4research.github.io/;http://palm.seu.edu.cn/xgeng/index.htm", "dblp": ";63/1534-21.html;;02/736-113;", "google_scholar": "HQYQkTwAAAAJ;SqdxMH0AAAAJ;https://scholar.google.com.hk/citations?user=iQHFcXEAAAAJ;gL4a4cgAAAAJ;ZOCxkIcAAAAJ", "orcid": "0000-0001-7680-6607;0000-0002-8276-2679;0009-0005-7139-7791;0000-0003-2734-7138;", "linkedin": ";;;;", "or_profile": "~Qiufeng_Wang3;~Xu_Yang5;~Fu_Feng1;~Jing_wang2;~Xin_Geng1", "aff": "Southeast University;Southeast University;Southeast University;Southeast University;Southeast University, China", "aff_domain": "seu.edu.cn;seu.edu.cn;seu.edu.cn;seu.edu.cn;seu.edu.cn", "position": "PhD student;Associate Professor;PhD student;Postdoc;Professor", "bibtex": "@inproceedings{\nwang2024clusterlearngene,\ntitle={Cluster-Learngene: Inheriting Adaptive Clusters for Vision Transformers},\nauthor={Qiufeng Wang and Xu Yang and Fu Feng and Jing wang and Xin Geng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=92vVuJVLVW}\n}", "github": "", "reviewers": "GgZj;ScsP;2s46;SzCi", "pdf_size": 1884169, "rating": "4;5;5;8", "confidence": "3;3;2;5", "soundness": "3;2;2;3", "novelty": "2;2;2;4", "presentation": "2;2;2;3", "wc_summary": "57;203;81;111", "wc_strengths": "41;126;68;113", "wc_weaknesses": "81;81;31;61", "wc_questions": "108;122;89;2", "wc_limitations": "5;92;6;14", "wc_review": "292;624;275;301", "wc_reply_reviewers": "0;927;0;0", "wc_reply_authors": "0;818;0;0", "reply_reviewers": "0;3;0;0", "reply_authors": "1;4;1;1", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 3.25, 1.0897247358851685 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 113.0, 55.37147279962851 ], "wc_strengths_avg": [ 87.0, 34.18332927027442 ], "wc_weaknesses_avg": [ 63.5, 20.463381929681123 ], "wc_questions_avg": [ 80.25, 46.67105634116288 ], "wc_limitations_avg": [ 29.25, 36.396256675652786 ], "wc_review_avg": [ 373.0, 145.21535731457607 ], "wc_reply_reviewers_avg": [ 231.75, 401.4027746540873 ], "wc_reply_authors_avg": [ 204.5, 354.2043901478354 ], "reply_reviewers_avg": [ 0.75, 1.299038105676658 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.8411910241920598, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9590531893496602037&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 2, "email": "seu.edu.cn;seu.edu.cn;seu.edu.cn;seu.edu.cn;seu.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Southeast University", "aff_unique_dep": "", "aff_unique_url": "https://www.seu.edu.cn/", "aff_unique_abbr": "SEU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "ReMI: A Dataset for Reasoning with Multiple Images", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97828", "id": "930e8v5ctj", "proceeding": "", "pdf": "https://openreview.net/pdf?id=930e8v5ctj", "openreview": "https://openreview.net/forum?id=930e8v5ctj", "poster": "/media/PosterPDFs/NeurIPS%202024/97828.png?t=1733175328.1015224", "project": "", "author_site": "Mehran Kazemi, Nishanth Dikkala, Ankit Anand, Petar Devic, Ishita Dasgupta, Fangyu Liu, Bahare Fatemi, Pranjal Awasthi, Sreenivas Gollapudi, Dee Guo, Ahmed Qureshi", "tldr": "", "abstract": "With the continuous advancement of large language models (LLMs), it is essential to create new benchmarks to evaluate their expanding capabilities and identify areas for improvement. This work focuses on multi-image reasoning, an emerging capability in state-of-the-art LLMs. We introduce ReMI, a dataset designed to assess LLMs' ability to reason with multiple images. This dataset encompasses a diverse range of tasks, spanning various reasoning domains such as math, physics, logic, code, table/chart understanding, and spatial and temporal reasoning. It also covers a broad spectrum of characteristics found in multi-image reasoning scenarios. We have benchmarked several cutting-edge LLMs using ReMI and found a substantial gap between their performance and human-level proficiency. This highlights the challenges in multi-image reasoning and the need for further research. Our analysis also reveals the strengths and weaknesses of different models, shedding light on the types of reasoning that are currently attainable and areas where future models require improvement. We anticipate that ReMI will be a valuable resource for developing and evaluating more sophisticated LLMs capable of handling real-world multi-image understanding tasks.", "keywords": "Large Language Models;Vision Language Models;LLM Reasoning;Multi Image Reasoning;LLM Evaluation", "primary_area": "", "supplementary_material": "/attachment/f3dfb48f9d05d6d5e55bae68683d08ab5d9dd374.pdf", "author": "Mehran Kazemi;Nishanth Dikkala;Ankit Anand;Petar Devic;Ishita Dasgupta;Fangyu Liu;Bahare Fatemi;Pranjal Awasthi;Sreenivas Gollapudi;Dee Guo;Ahmed Qureshi", "authorids": "~Mehran_Kazemi1;~Nishanth_Dikkala1;~Ankit_Anand4;~Petar_Devic1;~Ishita_Dasgupta1;~Fangyu_Liu1;~Bahare_Fatemi1;~Pranjal_Awasthi3;~Sreenivas_Gollapudi2;~Dee_Guo1;~Ahmed_Qureshi2", "gender": ";M;;M;;M;F;;M;;", "homepage": ";http://people.csail.mit.edu/nishanthd/;;https://linkedin.com/in/pdevic;;http://fangyuliu.me/about;;https://www.cs.rutgers.edu/~pa336/;https://www.sreenivasgollapudi.com;;", "dblp": ";138/8092;;;169/6218;84/11483-1;;57/679;https://dblp.uni-trier.de/pers/g/Gollapudi:Sreenivas.html;;", "google_scholar": ";CMZoOTIAAAAJ;;;;https://scholar.google.ch/citations?user=d19PiS0AAAAJ;;;Ysd-WJgAAAAJ;;", "orcid": ";;;;;0000-0001-7038-3623;;;;;", "linkedin": ";;;https://linkedin.com/in/pdevic;idasgupta6/;fangyu-liu-48a003b0/;bahare-fatemi-b0049179/;;;https://linkedin.com/in/dee-guo;", "or_profile": "~Mehran_Kazemi1;~Nishanth_Dikkala1;~Ankit_Anand4;~Petar_Devic1;~Ishita_Dasgupta1;~Fangyu_Liu1;~Bahare_Fatemi1;~Pranjal_Awasthi3;~Sreenivas_Gollapudi2;~Dee_Guo1;~Ahmed_Qureshi2", "aff": ";Google;;Google;Google DeepMind;Google DeepMind;Google;Rutgers University;Google;Google;", "aff_domain": ";google.com;;google.com;deepmind.com;google.com;google.com;rutgers.edu;google.com;google.com;", "position": ";Google Research;;Intern;Researcher;Research Scientist;Researcher;Assistant Professor;Researcher;Researcher;", "bibtex": "@inproceedings{\nkazemi2024remi,\ntitle={Re{MI}: A Dataset for Reasoning with Multiple Images},\nauthor={Mehran Kazemi and Nishanth Dikkala and Ankit Anand and Petar Devic and Ishita Dasgupta and Fangyu Liu and Bahare Fatemi and Pranjal Awasthi and Sreenivas Gollapudi and Dee Guo and Ahmed Qureshi},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=930e8v5ctj}\n}", "github": "", "reviewers": "ZMgP;FcqE;Lwd4;rtaP", "pdf_size": 3668936, "rating": "6;7;7;7", "confidence": "3;4;3;3", "wc_summary_and_contributions": "46;161;132;46", "wc_strengths": "5;38;3;22", "wc_improvement": "21;21;3;62", "wc_limitations": "5;1;4;1", "wc_correctness": "2;1;1;1", "wc_clarity": "13;1;1;1", "wc_relation_to_prior_work": "3;1;1;1", "wc_documentation": "1;1;15;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "97;226;161;136", "wc_reply_reviewers": "0;9;19;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 96.25, 51.28535365969508 ], "wc_strengths_avg": [ 17.0, 14.19506956657839 ], "wc_improvement_avg": [ 26.75, 21.637640814099857 ], "wc_limitations_avg": [ 2.75, 1.7853571071357126 ], "wc_correctness_avg": [ 1.25, 0.4330127018922193 ], "wc_clarity_avg": [ 4.0, 5.196152422706632 ], "wc_relation_to_prior_work_avg": [ 1.5, 0.8660254037844386 ], "wc_documentation_avg": [ 4.5, 6.06217782649107 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 155.0, 46.90948731333567 ], "wc_reply_reviewers_avg": [ 7.0, 7.842193570679061 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18420026327814410139&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": ";google.com;;google.com;deepmind.com;google.com;google.com;rutgers.edu;google.com;google.com;", "author_num": 11, "aff_unique_index": "0;0;0;0;0;1;0;0", "aff_unique_norm": "Google;Rutgers University", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://www.rutgers.edu", "aff_unique_abbr": "Google;Rutgers", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;1;1;0;0;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Semi-Discrete Optimal Transport: Nearly Minimax Estimation With Stochastic Gradient Descent and Adaptive Entropic Regularization", "author": "Ferdinand Genans, Antoine Godichon-Baggioni, Francois-Xavier Vialard, Olivier Wintenberger", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93687", "id": "93687", "proceeding": "", "pdf": "", "openreview": "https://openreview.net/forum?id=nXEzW3gVZ6", "poster": "", "project": "" }, { "title": "ELSA: Exploiting Layer-wise N:M Sparsity for Vision Transformer Acceleration", "author": "Ning-Chi Huang, Chi-Chih Chang, Wei-Cheng Lin, Endri Taka, Diana Marculescu, Kai-Chiang Wu", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93770", "id": "93770", "proceeding": "", "pdf": "", "openreview": "", "poster": "", "project": "" }, { "title": "Global Distortions from Local Rewards: Neural Coding Strategies in Path-Integrating Neural Systems", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96333", "id": "938EYYewtq", "proceeding": "", "pdf": "https://openreview.net/pdf?id=938EYYewtq", "openreview": "https://openreview.net/forum?id=938EYYewtq", "poster": "", "project": "", "author_site": "Francisco Acosta, Fatih Dinc, William Redman, Manu Madhav, David Klindt, Nina Miolane", "tldr": "", "abstract": "Grid cells in the mammalian brain are fundamental to spatial navigation, and therefore crucial to how animals perceive and interact with their environment. Traditionally, grid cells are thought support path integration through highly symmetric hexagonal lattice firing patterns. However, recent findings show that their firing patterns become distorted in the presence of significant spatial landmarks such as rewarded locations. This introduces a novel perspective of dynamic, subjective, and action-relevant interactions between spatial representations and environmental cues. Here, we propose a practical and theoretical framework to quantify and explain these interactions. To this end, we train path-integrating recurrent neural networks (piRNNs) on a spatial navigation task, whose goal is to predict the agent's position with a special focus on rewarded locations. Grid-like neurons naturally emerge from the training of piRNNs, which allows us to investigate how the two aspects of the task, space and reward, are integrated in their firing patterns. We find that geometry, but not topology, of the grid cell population code becomes distorted. Surprisingly, these distortions are global in the firing patterns of the grid cells despite local changes in the reward. Our results indicate that after training with location-specific reward information, the preserved representational topology supports successful path integration, whereas the emergent heterogeneity in individual responses due to global distortions may encode dynamically changing environmental cues. By bridging the gap between computational models and the biological reality of spatial navigation under reward information, we offer new insights into how neural systems prioritize environmental landmarks in their spatial navigation code.", "keywords": "neuroscience;recurrent neural networks;grid cells;geometry", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "", "author": "Francisco Acosta;Fatih Dinc;William T Redman;Manu Madhav;David Klindt;Nina Miolane", "authorids": "~Francisco_Acosta1;~Fatih_Dinc1;~William_T_Redman1;~Manu_Madhav1;~David_Klindt1;~Nina_Miolane2", "gender": "M;M;M;M;;", "homepage": "https://web.physics.ucsb.edu/~facosta/;https://sites.google.com/view/fatihdinc/;https://wredman4.wixsite.com/wtredman;http://nc4.sbme.ubc.ca;;https://www.ece.ucsb.edu/people/faculty/nina-miolane", "dblp": ";218/5297;266/7985;;;", "google_scholar": ";https://scholar.google.com.tr/citations?user=jFHyg0oAAAAJ;-SOfw0AAAAAJ;OzV0TFoAAAAJ;;", "orcid": ";0000-0003-0921-0162;;;;", "linkedin": ";;;;;", "or_profile": "~Francisco_Acosta1;~Fatih_Dinc1;~William_T_Redman1;~Manu_Madhav1;~David_Klindt1;~Nina_Miolane2", "aff": "University of California, Santa Barbara;Stanford University;AIMdyn Inc.;University of British Columbia;;University of California, Santa Barbara", "aff_domain": "ucsb.edu;stanford.edu;aimdyn.com;ubc.ca;;ucsb.edu", "position": "PhD student;PhD student;Researcher;Assistant Professor;;Assistant Professor", "bibtex": "@inproceedings{\nacosta2024global,\ntitle={Global Distortions from Local Rewards: Neural Coding Strategies in Path-Integrating Neural Systems},\nauthor={Francisco Acosta and Fatih Dinc and William T Redman and Manu Madhav and David Klindt and Nina Miolane},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=938EYYewtq}\n}", "github": "", "reviewers": "BFFn;STz6;yMbD;MTEF", "pdf_size": 18025488, "rating": "5;6;7;8", "confidence": "4;4;4;3", "soundness": "2;3;3;4", "novelty": "1;2;3;3", "presentation": "4;4;3;4", "wc_summary": "76;176;91;21", "wc_strengths": "68;92;109;33", "wc_weaknesses": "152;139;101;72", "wc_questions": "185;139;145;52", "wc_limitations": "1;82;11;1", "wc_review": "482;628;457;179", "wc_reply_reviewers": "115;70;37;27", "wc_reply_authors": "34;35;27;28", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 91.0, 55.56527692723217 ], "wc_strengths_avg": [ 75.5, 28.53506614676055 ], "wc_weaknesses_avg": [ 116.0, 31.567388235329194 ], "wc_questions_avg": [ 130.25, 48.51481732419489 ], "wc_limitations_avg": [ 23.75, 33.87753680538182 ], "wc_review_avg": [ 436.5, 162.37995565955794 ], "wc_reply_reviewers_avg": [ 62.25, 34.361133566865924 ], "wc_reply_authors_avg": [ 31.0, 3.5355339059327378 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.7745966692414834, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:YvZ37YIH8-AJ:scholar.google.com/&scioq=Global+Distortions+from+Local+Rewards:+Neural+Coding+Strategies+in+Path-Integrating+Neural+Systems&hl=en&as_sdt=0,33", "gs_version_total": 2, "email": "ucsb.edu;stanford.edu;aimdyn.com;ubc.ca;;ucsb.edu", "author_num": 6, "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "University of California, Santa Barbara;Stanford University;AIMdyn Inc.;University of British Columbia", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.ucsb.edu;https://www.stanford.edu;;https://www.ubc.ca", "aff_unique_abbr": "UCSB;Stanford;;UBC", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Santa Barbara;Stanford;", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "United States;Canada" }, { "title": "Transformers need glasses! Information over-squashing in language tasks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96332", "id": "93HCE8vTye", "proceeding": "", "pdf": "https://openreview.net/pdf?id=93HCE8vTye", "openreview": "https://openreview.net/forum?id=93HCE8vTye", "poster": "", "project": "", "author_site": "Federico Barbero, Andrea Banino, Steven Kapturowski, Dharshan Kumaran, Jo\u00e3o Madeira Ara\u00fajo, Oleksandr Vitvitskyi, Razvan Pascanu, Petar Veli\u010dkovi\u0107", "tldr": "", "abstract": "We study how information propagates in decoder-only Transformers, which are the architectural foundation of most existing frontier large language models (LLMs). We rely on a theoretical signal propagation analysis---specifically, we analyse the representations of the last token in the final layer of the Transformer, as this is the representation used for next-token prediction. Our analysis reveals a representational collapse phenomenon: we prove that certain distinct pairs of inputs to the Transformer can yield arbitrarily close representations in the final token. This effect is exacerbated by the low-precision floating-point formats frequently used in modern LLMs. As a result, the model is provably unable to respond to these sequences in different ways---leading to errors in, e.g., tasks involving counting or copying. Further, we show that decoder-only Transformer language models can lose sensitivity to specific tokens in the input, which relates to the well-known phenomenon of over-squashing in graph neural networks. We provide empirical evidence supporting our claims on contemporary LLMs. Our theory points to simple solutions towards ameliorating these issues.", "keywords": "Transformers;Over-squashing;Large Language Models", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Federico Barbero;Andrea Banino;Steven Kapturowski;Dharshan Kumaran;Jo\u00e3o Guilherme Madeira Ara\u00fajo;Alex Vitvitskyi;Razvan Pascanu;Petar Veli\u010dkovi\u0107", "authorids": "~Federico_Barbero1;~Andrea_Banino1;~Steven_Kapturowski1;~Dharshan_Kumaran1;~Jo\u00e3o_Guilherme_Madeira_Ara\u00fajo1;~Alex_Vitvitskyi1;~Razvan_Pascanu1;~Petar_Veli\u010dkovi\u01071", "gender": ";;;M;M;;M;M", "homepage": "https://federicobarbero.com;;;https://scholar.google.co.uk/citations?user=_jkvGEUAAAAJ&hl=en;https://www.joaogui1.netlify.app;https://www.linkedin.com/in/avlife/;https://razp.info;https://petar-v.com", "dblp": ";;;174/5441.html;;;65/8368.html;184/4786.html", "google_scholar": "jpYtKMEAAAAJ;;;;;uebYofUAAAAJ;https://scholar.google.ca/citations?user=eSPY8LwAAAAJ;https://scholar.google.co.uk/citations?user=kcTK_FAAAAAJ", "orcid": ";;;;;;;0000-0002-2820-4692", "linkedin": ";;stevenkapturowski/;;;;;petarvelickovic", "or_profile": "~Federico_Barbero1;~Andrea_Banino1;~Steven_Kapturowski1;~Dharshan_Kumaran1;~Jo\u00e3o_Guilherme_Madeira_Ara\u00fajo1;~Alex_Vitvitskyi1;~Razvan_Pascanu1;~Petar_Veli\u010dkovi\u01071", "aff": "University of Oxford;;Google DeepMind;Google;Google;Google DeepMind;Google DeepMind;Google DeepMind", "aff_domain": "ox.ac.uk;;deepmind.com;deepmind.com;google.com;deepmind.com;google.com;google.com", "position": "PhD student;;Staff Research Engineer;Researcher;Researcher;Research Engineer;Research Scientist;Senior Staff Research Scientist", "bibtex": "@inproceedings{\nbarbero2024transformers,\ntitle={Transformers need glasses! Information over-squashing in language tasks},\nauthor={Federico Barbero and Andrea Banino and Steven Kapturowski and Dharshan Kumaran and Jo{\\~a}o Guilherme Madeira Ara{\\'u}jo and Alex Vitvitskyi and Razvan Pascanu and Petar Veli{\\v{c}}kovi{\\'c}},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=93HCE8vTye}\n}", "github": "", "reviewers": "GKd9;SAa1;vhDf", "pdf_size": 2696529, "rating": "6;6;7", "confidence": "3;3;3", "soundness": "3;3;3", "novelty": "3;2;3", "presentation": "3;4;4", "wc_summary": "20;62;68", "wc_strengths": "64;37;95", "wc_weaknesses": "71;44;92", "wc_questions": "53;131;83", "wc_limitations": "4;1;31", "wc_review": "212;275;369", "wc_reply_reviewers": "72;16;76", "wc_reply_authors": "48;27;33", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 50.0, 21.354156504062622 ], "wc_strengths_avg": [ 65.33333333333333, 23.697163449568293 ], "wc_weaknesses_avg": [ 69.0, 19.6468827043885 ], "wc_questions_avg": [ 89.0, 32.12475680841802 ], "wc_limitations_avg": [ 12.0, 13.490737563232042 ], "wc_review_avg": [ 285.3333333333333, 64.51011979182456 ], "wc_reply_reviewers_avg": [ 54.666666666666664, 27.39018477889885 ], "wc_reply_authors_avg": [ 36.0, 8.831760866327848 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12591359419641528659&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "ox.ac.uk;;deepmind.com;deepmind.com;google.com;deepmind.com;google.com;google.com", "author_num": 8, "aff_unique_index": "0;1;1;1;1;1;1", "aff_unique_norm": "University of Oxford;Google", "aff_unique_dep": ";Google DeepMind", "aff_unique_url": "https://www.ox.ac.uk;https://deepmind.com", "aff_unique_abbr": "Oxford;DeepMind", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;1;1;0;0;0", "aff_country_unique": "United Kingdom;United States" }, { "title": "UNION: Unsupervised 3D Object Detection using Object Appearance-based Pseudo-Classes", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96331", "id": "93gz2lmFtm", "proceeding": "", "pdf": "https://openreview.net/pdf?id=93gz2lmFtm", "openreview": "https://openreview.net/forum?id=93gz2lmFtm", "poster": "/media/PosterPDFs/NeurIPS%202024/96331.png?t=1733435294.8839896", "project": "", "author_site": "Ted Lentsch, Holger Caesar, Dariu Gavrila", "tldr": "", "abstract": "Unsupervised 3D object detection methods have emerged to leverage vast amounts of data without requiring manual labels for training. Recent approaches rely on dynamic objects for learning to detect mobile objects but penalize the detections of static instances during training. Multiple rounds of (self) training are used to add detected static instances to the set of training targets; this procedure to improve performance is computationally expensive. To address this, we propose the method UNION. We use spatial clustering and self-supervised scene flow to obtain a set of static and dynamic object proposals from LiDAR. Subsequently, object proposals' visual appearances are encoded to distinguish static objects in the foreground and background by selecting static instances that are visually similar to dynamic objects. As a result, static and dynamic mobile objects are obtained together, and existing detectors can be trained with a single training. In addition, we extend 3D object discovery to detection by using object appearance-based cluster labels as pseudo-class labels for training object classification. We conduct extensive experiments on the nuScenes dataset and increase the state-of-the-art performance for unsupervised 3D object discovery, i.e. UNION more than doubles the average precision to 38.4. The code is available at github.com/TedLentsch/UNION.", "keywords": "3D Object Detection;Multi-Modal Data;Self-Supervision", "primary_area": "machine_vision", "supplementary_material": "", "author": "Ted Lentsch;Holger Caesar;Dariu Gavrila", "authorids": "~Ted_Lentsch1;~Holger_Caesar2;~Dariu_Gavrila4", "gender": "M;M;", "homepage": "https://github.com/TedLentsch;http://it-caesar.com;", "dblp": "334/3783;125/7460;", "google_scholar": "54NWkMoAAAAJ;373LKEYAAAAJ;wQU1dJAAAAAJ", "orcid": "0000-0001-8567-5047;;0000-0002-1810-4196", "linkedin": "teddevrieslentsch/;holger-caesar-18600638/?originalSubdomain=sg;dariu-gavrila", "or_profile": "~Ted_Lentsch1;~Holger_Caesar2;~Dariu_Gavrila4", "aff": "Delft University of Technology;Delft University of Technology;Delft University of Technology", "aff_domain": "tudelft.nl;tudelft.nl;tudelft.nl", "position": "PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nlentsch2024union,\ntitle={{UNION}: Unsupervised 3D Object Detection using Object Appearance-based Pseudo-Classes},\nauthor={Ted Lentsch and Holger Caesar and Dariu Gavrila},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=93gz2lmFtm}\n}", "github": "", "reviewers": "Gw7r;ZwY5;JvDj;LFQD", "pdf_size": 2176844, "rating": "5;5;6;6", "confidence": "5;4;4;4", "soundness": "3;2;3;3", "novelty": "2;3;3;3", "presentation": "3;3;2;2", "wc_summary": "60;82;52;91", "wc_strengths": "51;33;25;117", "wc_weaknesses": "208;95;69;261", "wc_questions": "2;2;79;2", "wc_limitations": "9;16;6;26", "wc_review": "330;228;231;497", "wc_reply_reviewers": "11;29;10;23", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 71.25, 15.833114033569013 ], "wc_strengths_avg": [ 56.5, 36.17664992781946 ], "wc_weaknesses_avg": [ 158.25, 79.05496505596597 ], "wc_questions_avg": [ 21.25, 33.34197804570089 ], "wc_limitations_avg": [ 14.25, 7.693341276714559 ], "wc_review_avg": [ 321.5, 109.32177276279414 ], "wc_reply_reviewers_avg": [ 18.25, 8.042853971072706 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4039085719252943378&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "tudelft.nl;tudelft.nl;tudelft.nl", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Delft University of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.tudelft.nl", "aff_unique_abbr": "TU Delft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Netherlands" }, { "title": "Boosting Alignment for Post-Unlearning Text-to-Image Generative Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96330", "id": "93ktalFvnJ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=93ktalFvnJ", "openreview": "https://openreview.net/forum?id=93ktalFvnJ", "poster": "", "project": "", "author_site": "Myeongseob Ko, Henry Li, Zhun Wang, Jonathan Patsenker, Jiachen (Tianhao) Wang, Qinbin Li, Ming Jin, Dawn Song, Ruoxi Jia", "tldr": "", "abstract": "Large-scale generative models have shown impressive image-generation capabilities, propelled by massive data. However, this often inadvertently leads to the generation of harmful or inappropriate content and raises copyright concerns. Driven by these concerns, machine unlearning has become crucial to effectively purge undesirable knowledge from models. While existing literature has studied various unlearning techniques, these often suffer from either poor unlearning quality or degradation in text-image alignment after unlearning, due to the competitive nature of these objectives. To address these challenges, we propose a framework that seeks an optimal model update at each unlearning iteration, ensuring monotonic improvement on both objectives. We further derive the characterization of such an update.\n In addition, we design procedures to strategically diversify the unlearning and remaining datasets to boost performance improvement. Our evaluation demonstrates that our method effectively removes target classes from recent diffusion-based generative models and concepts from stable diffusion models while maintaining close alignment with the models' original trained states, thus outperforming state-of-the-art baselines.", "keywords": "unlearning;diffusion models;stable diffusion", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Myeongseob Ko;Henry Li;Zhun Wang;Jonathan Patsenker;Jiachen T. Wang;Qinbin Li;Ming Jin;Dawn Song;Ruoxi Jia", "authorids": "~Myeongseob_Ko1;~Henry_Li2;~Zhun_Wang1;~Jonathan_Patsenker1;~Jiachen_T._Wang1;~Qinbin_Li1;~Ming_Jin2;~Dawn_Song1;~Ruoxi_Jia1", "gender": "M;;;;;M;M;F;", "homepage": ";https://hnry.li;;;;https://qinbinli.com/;http://www.jinming.tech/;;https://ruoxijia.info/", "dblp": "234/1494;31/6498;;;;225/9769;;s/DXSong;147/5355-1", "google_scholar": "https://scholar.google.com/citations?hl=en;o7-TIlcAAAAJ;;;;https://scholar.google.com.sg/citations?user=1EMOEqQAAAAJ;YdxdTtkAAAAJ;;JCrug-YAAAAJ", "orcid": ";;;;;;;;", "linkedin": ";;;jonathan-patsenker/;;;;;", "or_profile": "~Myeongseob_Ko1;~Henry_Li2;~Zhun_Wang1;~Jonathan_Patsenker1;~Jiachen_T._Wang1;~Qinbin_Li1;~Ming_Jin2;~Dawn_Song1;~Ruoxi_Jia1", "aff": "Virginia Polytechnic Institute and State University;Yale University;;Yale University;;University of California, Berkeley;Virginia Tech;University of California, Berkeley;Virginia Tech", "aff_domain": "vt.edu;yale.edu;;yale.edu;;berkeley.edu;vt.edu;berkeley.edu;vt.edu", "position": "PhD student;PhD student;;PhD student;;Postdoc;Assistant Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nko2024boosting,\ntitle={Boosting Alignment for Post-Unlearning Text-to-Image Generative Models},\nauthor={Myeongseob Ko and Henry Li and Zhun Wang and Jonathan Patsenker and Jiachen T. Wang and Qinbin Li and Ming Jin and Dawn Song and Ruoxi Jia},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=93ktalFvnJ}\n}", "github": "", "reviewers": "68ka;h3BB;DUTP;RWKV", "pdf_size": 14450813, "rating": "5;5;6;6", "confidence": "3;3;4;4", "soundness": "3;2;3;2", "novelty": "3;3;2;2", "presentation": "3;3;3;3", "wc_summary": "63;96;85;435", "wc_strengths": "26;71;82;103", "wc_weaknesses": "169;103;104;774", "wc_questions": "133;13;1;298", "wc_limitations": "4;10;17;13", "wc_review": "395;293;289;1623", "wc_reply_reviewers": "22;19;24;153", "wc_reply_authors": "543;0;0;2336", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;7", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 169.75, 153.6023681458069 ], "wc_strengths_avg": [ 70.5, 28.146935890075138 ], "wc_weaknesses_avg": [ 287.5, 282.15111199497335 ], "wc_questions_avg": [ 111.25, 119.53738954820788 ], "wc_limitations_avg": [ 11.0, 4.743416490252569 ], "wc_review_avg": [ 650.0, 563.3657781583827 ], "wc_reply_reviewers_avg": [ 54.5, 56.89683646741706 ], "wc_reply_authors_avg": [ 719.75, 959.1121871293263 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.75, 2.48746859276655 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16782800496615332054&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "vt.edu;yale.edu;;yale.edu;;berkeley.edu;vt.edu;berkeley.edu;vt.edu", "author_num": 9, "aff_unique_index": "0;1;1;2;0;2;0", "aff_unique_norm": "Virginia Tech;Yale University;University of California, Berkeley", "aff_unique_dep": ";;", "aff_unique_url": "https://www.vt.edu;https://www.yale.edu;https://www.berkeley.edu", "aff_unique_abbr": "VT;Yale;UC Berkeley", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "93qSRpucpN", "title": "Robust Guided Diffusion for Offline Black-box Optimization", "track": "main", "status": "Reject", "tldr": "", "abstract": "Offline black-box optimization aims to maximize a black-box function using an offline dataset of designs and their measured properties. Two main approaches have emerged: the forward approach, which learns a mapping from input to its value, thereby acting as a proxy to guide optimization, and the inverse approach, which learns a mapping from value to input for conditional generation. (a) Although proxy-free (classifier-free) diffusion shows promise in robustly modeling the inverse mapping, it lacks explicit guidance from proxies, essential for generating high-performance samples beyond the training distribution. Therefore, we propose proxy-enhanced sampling which utilizes the explicit guidance from a trained proxy to bolster proxy-free diffusion with enhanced sampling control. (b) Yet, the trained proxy is susceptible to out-of-distribution issues. To address this, we devise the module diffusion-based proxy refinement, which seamlessly integrates insights from proxy-free diffusion back into the proxy for refinement. To sum up, we propose Robust Guided Diffusion for Offline Black-box Optimization (RGD), combining proxy and proxy-free diffusion for effective conditional generation. Empirical evaluations on design-bench underscore the efficacy of RGD. Our code is here.", "keywords": "offline model-based optimization;black-box optimization;diffusion models;score-based SDE;guided diffusion;classifier diffusion guidance;classifier-free diffusion guidance", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Can Chen;Christopher Beckham;Zixuan Liu;Xue Liu;Christopher Pal", "authorids": "~Can_Chen3;~Christopher_Beckham1;~Zixuan_Liu1;~Xue_Liu1;~Christopher_Pal1", "gender": "Not Specified;M;M;M;", "homepage": ";;;http://www.cs.mcgill.ca/~xueliu/;https://scholar.google.ca/citations?user=1ScWJOoAAAAJ&hl=en&oi=ao", "dblp": "370/4546.html;192/1312;;l/XueLiu;45/1217", "google_scholar": ";;yjKOHbEAAAAJ;https://scholar.google.com.tw/citations?user=rfLIRakAAAAJ;https://scholar.google.ca/citations?user=1ScWJOoAAAAJ", "orcid": ";;;;", "linkedin": "can-chen-018851202/;;;;", "or_profile": "~Can_Chen3;~Christopher_Beckham1;~Zixuan_Liu1;~Xue_Liu1;~Christopher_Pal1", "aff": "Mila - Quebec AI Institute;;University of Washington;McGill University;Polytechnique Montreal", "aff_domain": "mila.quebec;;uw.edu;mcgill.ca;polymtl.ca", "position": "PhD student;;PhD student;Full Professor;Full Professor", "bibtex": "@misc{\nanonymous2024robust,\ntitle={Robust Guided Diffusion for Offline Black-box Optimization},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=93qSRpucpN}\n}", "github": "", "project": "", "reviewers": "AXdG;2CyP;a4Cg;hUhz;1Uun", "site": "https://openreview.net/forum?id=93qSRpucpN", "pdf_size": 956817, "rating": "3;5;5;6;6", "confidence": "4;4;4;3;4", "soundness": "2;2;3;4;2", "novelty": "2;2;2;3;2", "presentation": "3;3;3;3;2", "wc_summary": "48;40;68;39;64", "wc_strengths": "57;28;64;47;29", "wc_weaknesses": "127;291;232;35;296", "wc_questions": "85;36;5;66;71", "wc_limitations": "12;14;7;48;4", "wc_review": "329;409;376;235;464", "wc_reply_reviewers": "170;378;0;69;287", "wc_reply_authors": "411;1027;0;95;285", "reply_reviewers": "2;4;0;1;2", "reply_authors": "4;6;1;2;3", "rating_avg": [ 5.0, 1.0954451150103321 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 2.6, 0.8 ], "novelty_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 51.8, 12.073110618229256 ], "wc_strengths_avg": [ 45.0, 14.51895313030523 ], "wc_weaknesses_avg": [ 196.2, 100.99782175868943 ], "wc_questions_avg": [ 52.6, 28.66775191744201 ], "wc_limitations_avg": [ 17.0, 15.899685531481433 ], "wc_review_avg": [ 362.6, 77.49219315518177 ], "wc_reply_reviewers_avg": [ 180.8, 138.18161961708222 ], "wc_reply_authors_avg": [ 363.6, 361.35168465083984 ], "reply_reviewers_avg": [ 1.8, 1.32664991614216 ], "reply_authors_avg": [ 3.2, 1.7204650534085255 ], "replies_avg": [ 38, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.4564354645876385, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:bT5UdLOcIAkJ:scholar.google.com/&scioq=Robust+Guided+Diffusion+for+Offline+Black-box+Optimization&hl=en&as_sdt=0,48", "gs_version_total": 2, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Quebec AI Institute;University of Washington;McGill University;Polytechnique Montreal", "aff_unique_dep": "AI Institute;;;", "aff_unique_url": "https://mila.quebec;https://www.washington.edu;https://www.mcgill.ca;https://www.polymtl.ca", "aff_unique_abbr": "Mila;UW;McGill;PolyMTL", "aff_campus_unique_index": "1", "aff_campus_unique": ";Montreal", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "Canada;United States" }, { "title": "Bayesian Identification of the Hamiltonian Inductive Bias in Dynamical Systems", "author": "Stefano Cortinovis, Mark van der Wilk", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94322", "id": "94322", "proceeding": "", "pdf": "", "openreview": "", "poster": "", "project": "" }, { "title": "Autonomous Driving with Spiking Neural Networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96329", "id": "95VyH4VxN9", "proceeding": "", "pdf": "https://openreview.net/pdf?id=95VyH4VxN9", "openreview": "https://openreview.net/forum?id=95VyH4VxN9", "poster": "", "project": "", "author_site": "Rui-Jie Zhu, Ziqing Wang, Leilani Gilpin, Jason Eshraghian", "tldr": "", "abstract": "Autonomous driving demands an integrated approach that encompasses perception, prediction, and planning, all while operating under strict energy constraints to enhance scalability and environmental sustainability. We present Spiking Autonomous Driving (SAD), the first unified Spiking Neural Network (SNN) to address the energy challenges faced by autonomous driving systems through its event-driven and energy-efficient nature. SAD is trained end-to-end and consists of three main modules: perception, which processes inputs from multi-view cameras to construct a spatiotemporal bird's eye view; prediction, which utilizes a novel dual-pathway with spiking neurons to forecast future states; and planning, which generates safe trajectories considering predicted occupancy, traffic rules, and ride comfort. Evaluated on the nuScenes dataset, SAD achieves competitive performance in perception, prediction, and planning tasks, while drawing upon the energy efficiency of SNNs. This work highlights the potential of neuromorphic computing to be applied to energy-efficient autonomous driving, a critical step toward sustainable and safety-critical automotive technology. Our code is available at [https://github.com/ridgerchu/SAD](https://github.com/ridgerchu/SAD).", "keywords": "Spiking Neural Networks;Neuromorphic Computing;Brain-inspired Computing", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "/attachment/9af8659ef1426b22d769835eaa6654bfede88f42.zip", "author": "Rui-Jie Zhu;Ziqing Wang;Leilani H. Gilpin;Jason Eshraghian", "authorids": "~Rui-Jie_Zhu2;~Ziqing_Wang2;~Leilani_H._Gilpin1;~Jason_Eshraghian1", "gender": "M;M;F;M", "homepage": "https://ruijie-zhu.github.io;https://alexandrewang915.github.io/;http://lgilpin.com;https://www.ncg.ucsc.edu", "dblp": "317/4836;58/1382;215/8848;184/4409", "google_scholar": "08ITzJsAAAAJ;FSo0Zg4AAAAJ;UFT_ijYAAAAJ;https://scholar.google.com.au/citations?user=C16f0r3hL-kC", "orcid": ";;0000-0002-9741-2014;0000-0002-5832-4054", "linkedin": ";;leilanigilpin/;", "or_profile": "~Rui-Jie_Zhu2;~Ziqing_Wang2;~Leilani_H._Gilpin1;~Jason_Eshraghian1", "aff": "University of California, Santa Cruz;Northwestern University;University of California, Santa Cruz;University of California, Santa Cruz", "aff_domain": "ucsc.edu;northwestern.edu;ucsc.edu;ucsc.edu", "position": "PhD student;PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhu2024autonomous,\ntitle={Autonomous Driving with Spiking Neural Networks},\nauthor={Rui-Jie Zhu and Ziqing Wang and Leilani H. Gilpin and Jason Eshraghian},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=95VyH4VxN9}\n}", "github": "", "reviewers": "j4mM;EVdw;GVJq;x6bz;XRtX;S2mt", "pdf_size": 2354270, "rating": "4;4;5;5;6;7", "confidence": "4;5;2;4;5;4", "soundness": "2;3;3;3;3;3", "novelty": "3;2;3;3;3;3", "presentation": "3;3;3;2;3;3", "wc_summary": "46;35;48;101;74;70", "wc_strengths": "37;23;71;63;123;107", "wc_weaknesses": "77;37;13;306;22;100", "wc_questions": "154;2;35;29;129;67", "wc_limitations": "4;1;31;1;15;29", "wc_review": "318;98;198;500;363;373", "wc_reply_reviewers": "0;43;80;89;0;58", "wc_reply_authors": "608;657;30;185;129;50", "reply_reviewers": "0;1;1;1;0;1", "reply_authors": "4;2;2;3;3;2", "rating_avg": [ 5.166666666666667, 1.0671873729054748 ], "confidence_avg": [ 4.0, 1.0 ], "soundness_avg": [ 2.8333333333333335, 0.3726779962499649 ], "novelty_avg": [ 2.8333333333333335, 0.3726779962499649 ], "presentation_avg": [ 2.8333333333333335, 0.3726779962499649 ], "wc_summary_avg": [ 62.333333333333336, 22.020192753218325 ], "wc_strengths_avg": [ 70.66666666666667, 35.410293544234975 ], "wc_weaknesses_avg": [ 92.5, 100.2077009682057 ], "wc_questions_avg": [ 69.33333333333333, 54.883715455699814 ], "wc_limitations_avg": [ 13.5, 12.592987466575726 ], "wc_review_avg": [ 308.3333333333333, 129.36597526225958 ], "wc_reply_reviewers_avg": [ 45.0, 35.08085897846099 ], "wc_reply_authors_avg": [ 276.5, 257.18524452230923 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 0.7453559924999298 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10364276741451244304&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 4, "email": "ucsc.edu;northwestern.edu;ucsc.edu;ucsc.edu", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of California, Santa Cruz;Northwestern University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucsc.edu;https://www.northwestern.edu", "aff_unique_abbr": "UCSC;NU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Santa Cruz;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Implicit Multimodal Alignment: On the Generalization of Frozen LLMs to Multimodal Inputs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96328", "id": "9622QfVSAb", "proceeding": "", "pdf": "https://openreview.net/pdf?id=9622QfVSAb", "openreview": "https://openreview.net/forum?id=9622QfVSAb", "poster": "/media/PosterPDFs/NeurIPS%202024/96328.png?t=1731008222.516086", "project": "", "author_site": "Mustafa Shukor, Matthieu Cord", "tldr": "", "abstract": "Large Language Models (LLMs) have demonstrated impressive performance on multimodal tasks, without any multimodal finetuning. They are the de facto building block for Large Multimodal Models (LMMs), yet, we still lack a proper understanding of their success. In this work, we expose frozen LLMs to image, video, audio and text inputs and analyse their internal representation with the attempt to understand their generalization beyond textual inputs. Our work provides the following **findings.** Perceptual tokens (1) are easily distinguishable from textual ones inside LLMs, with significantly different representations (e.g. live in different narrow cones), and complete translation to textual tokens does not exists. Yet, (2) both perceptual and textual tokens activate similar LLM weights. Despite their differences, (3) perceptual tokens are implicitly aligned to textual tokens inside LLMs, we call this the implicit multimodal alignment effect (IMA), and argue that this is linked to architectural design, helping LLMs to generalize. This provide more evidence to believe that the generalization of LLMs to multimodal inputs is mainly due to their architecture. These findings lead to several **implications.** This work provides several implications. (1) We find a positive correlation between the implicit alignment score and the task performance, suggesting that this could act as a proxy metric for model evaluation and selection. (2) A negative correlation exists regarding hallucinations (e.g. describing non-existing objects in images), revealing that this problem is mainly due to misalignment between the internal perceptual and textual representations. (3) Perceptual tokens change slightly throughout the model, thus, we propose different approaches to skip computations (e.g. in FFN layers), and significantly reduce the inference cost. (4) Due to the slowly changing embeddings across layers, and the high overlap between textual and multimodal activated weights, we compress LLMs by keeping only 1 subnetwork (called alpha-SubNet) that works well across a wide range of multimodal tasks. The code is available here: https://github.com/mshukor/ima-lmms.", "keywords": "Large multimodal models;LLMs;cross-modal alignment;efficiency;safety;interpretability", "primary_area": "machine_vision", "supplementary_material": "", "author": "Mustafa Shukor;Matthieu Cord", "authorids": "~Mustafa_Shukor1;~Matthieu_Cord1", "gender": "M;M", "homepage": "https://twitter.com/MustafaShukor1;https://cord.isir.upmc.fr/", "dblp": ";68/3117", "google_scholar": "lhp9mRgAAAAJ;SpAotDcAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Mustafa_Shukor1;~Matthieu_Cord1", "aff": "Universit\u00e9 Pierre et Marie Curie - Paris 6, Sorbonne Universit\u00e9 - Facult\u00e9 des Sciences (Paris VI);Sorbonne Universit\u00e9", "aff_domain": "isir.upmc.fr;isir.upmc.fr", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nshukor2024implicit,\ntitle={Implicit Multimodal Alignment: On the Generalization of Frozen {LLM}s to Multimodal Inputs},\nauthor={Mustafa Shukor and Matthieu Cord},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=9622QfVSAb}\n}", "github": "", "reviewers": "dcgF;69kn;vaQc;hn2C", "pdf_size": 35838916, "rating": "5;6;7;9", "confidence": "2;4;4;4", "soundness": "2;3;4;4", "novelty": "2;4;4;4", "presentation": "2;1;4;3", "wc_summary": "76;119;39;82", "wc_strengths": "52;64;50;70", "wc_weaknesses": "84;554;106;45", "wc_questions": "47;277;105;34", "wc_limitations": "24;221;26;4", "wc_review": "283;1235;326;235", "wc_reply_reviewers": "0;63;19;0", "wc_reply_authors": "46;0;0;0", "reply_reviewers": "0;1;1;0", "reply_authors": "2;1;1;1", "rating_avg": [ 6.75, 1.479019945774904 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.5, 0.8660254037844386 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 79.0, 28.36370920736567 ], "wc_strengths_avg": [ 59.0, 8.306623862918075 ], "wc_weaknesses_avg": [ 197.25, 207.12481140606985 ], "wc_questions_avg": [ 115.75, 96.85911160030325 ], "wc_limitations_avg": [ 68.75, 88.32150077982145 ], "wc_review_avg": [ 519.75, 414.20247162468746 ], "wc_reply_reviewers_avg": [ 20.5, 25.734218464915543 ], "wc_reply_authors_avg": [ 11.5, 19.91858428704209 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.6831300510639732, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1454442702695552522&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "isir.upmc.fr;isir.upmc.fr", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Universit\u00e9 Pierre et Marie Curie - Paris 6;Sorbonne Universit\u00e9", "aff_unique_dep": "Facult\u00e9 des Sciences;", "aff_unique_url": "https://www.upmc.fr;https://www.sorbonne-universite.fr", "aff_unique_abbr": "UPMC;Sorbonne U", "aff_campus_unique_index": "0", "aff_campus_unique": "Paris;", "aff_country_unique_index": "0;0", "aff_country_unique": "France" }, { "title": "Pearls from Pebbles: Improved Confidence Functions for Auto-labeling", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96327", "id": "96gXvFYWSE", "proceeding": "", "pdf": "https://openreview.net/pdf?id=96gXvFYWSE", "openreview": "https://openreview.net/forum?id=96gXvFYWSE", "poster": "", "project": "", "author_site": "Harit Vishwakarma, Yi Chen, Sui Jiet Tay, Satya Sai Srinath Namburi, Frederic Sala, Ramya Korlakai Vinayak", "tldr": "", "abstract": "Auto-labeling is an important family of techniques that produce labeled training sets with minimum manual annotation. A prominent variant, threshold-based auto-labeling (TBAL), works by finding thresholds on a model's confidence scores above which it can accurately automatically label unlabeled data. However, many models are known to produce overconfident scores, leading to poor TBAL performance. While a natural idea is to apply off-the-shelf calibration methods to alleviate the overconfidence issue, we show that such methods fall short. Rather than experimenting with ad-hoc choices of confidence functions, we propose a framework for studying the optimal TBAL confidence function. We develop a tractable version of the framework to obtain Colander (Confidence functions for Efficient and Reliable Auto-labeling), a new post-hoc method specifically designed to maximize performance in TBAL systems. We perform an extensive empirical evaluation of Colander and compare it against methods designed for calibration. Colander achieves up to 60% improvement on coverage over the baselines while maintaining error level below 5% and using the same amount of labeled data.", "keywords": "Auto-labeling;Confidence Calibration;Failure Prediction;Selective Classification", "primary_area": "active_learning", "supplementary_material": "/attachment/3b22108ccaf40c4f4679f5154594cbc0d5fcc25e.zip", "author": "Harit Vishwakarma;Yi Chen;Sui Jiet Tay;Satya Sai Srinath Namburi GNVV;Frederic Sala;Ramya Korlakai Vinayak", "authorids": "~Harit_Vishwakarma1;~Yi_Chen19;~Sui_Jiet_Tay1;~Satya_Sai_Srinath_Namburi_GNVV1;~Frederic_Sala1;~Ramya_Korlakai_Vinayak1", "gender": "M;M;M;M;M;", "homepage": "https://harit7.github.io;https://www.deepneural.network/;https://jiet.tech/;;https://pages.cs.wisc.edu/~fredsala/;https://ramyakv.github.io/", "dblp": "207/7622;;;362/5934;133/3602;148/9626", "google_scholar": "pJF_ZZUAAAAJ;QoO6pMEAAAAJ;;brolZJEAAAAJ;9KhIkNkAAAAJ;", "orcid": ";0000-0002-7936-1575;;;;", "linkedin": "harit7;reid-chen-1601a3185/;sui-jiet-tay/;namburi-gnvv-satya-sai-srinath/;;", "or_profile": "~Harit_Vishwakarma1;~Yi_Chen19;~Sui_Jiet_Tay1;~Satya_Sai_Srinath_Namburi_GNVV1;~Frederic_Sala1;~Ramya_Korlakai_Vinayak1", "aff": "University of Wisconsin, Madison;University of Wisconsin - Madison;New York University;University of Wisconsin - Madison;University of Wisconsin, Madison;University of Wisconsin - Madison", "aff_domain": "wisc.edu;wisc.edu;nyu.edu;wisc.edu;wisc.edu;wisc.edu", "position": "PhD student;PhD student;MS student;MS student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nvishwakarma2024pearls,\ntitle={Pearls from Pebbles: Improved Confidence Functions for Auto-labeling},\nauthor={Harit Vishwakarma and Yi Chen and Sui Jiet Tay and Satya Sai Srinath Namburi GNVV and Frederic Sala and Ramya Korlakai Vinayak},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=96gXvFYWSE}\n}", "github": "", "reviewers": "YvdP;ffq7;Nkxd;oXkX;7Dad", "pdf_size": 3665339, "rating": "4;6;6;7;8", "confidence": "3;2;4;3;4", "soundness": "3;3;3;3;4", "novelty": "2;3;2;3;4", "presentation": "1;3;4;3;4", "wc_summary": "69;73;106;92;33", "wc_strengths": "13;49;55;67;61", "wc_weaknesses": "247;135;131;144;77", "wc_questions": "498;84;1;32;20", "wc_limitations": "11;41;15;41;1", "wc_review": "838;382;308;376;192", "wc_reply_reviewers": "142;30;0;28;93", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;0;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.2, 1.32664991614216 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.0, 1.0954451150103321 ], "wc_summary_avg": [ 74.6, 24.71113109511582 ], "wc_strengths_avg": [ 49.0, 18.973665961010276 ], "wc_weaknesses_avg": [ 146.8, 55.333172690529864 ], "wc_questions_avg": [ 127.0, 187.53133071569667 ], "wc_limitations_avg": [ 21.8, 16.32666530556684 ], "wc_review_avg": [ 419.2, 220.28563275892506 ], "wc_reply_reviewers_avg": [ 58.6, 51.62789943431749 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.36262033381142106, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13229425339108655742&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "wisc.edu;wisc.edu;nyu.edu;wisc.edu;wisc.edu;wisc.edu", "author_num": 6, "aff_unique_index": "0;1;2;1;0;1", "aff_unique_norm": "University of Wisconsin;University of Wisconsin-Madison;New York University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.wisc.edu;https://www.wisc.edu;https://www.nyu.edu", "aff_unique_abbr": "UW;UW-Madison;NYU", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Madison;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Human-level shape inferences: A benchmark for evaluating the 3D understanding of vision models", "author": "tyler bonnen, Stephanie Fu, Yutong Bai, Thomas O'Connell, Yoni Friedman, Josh Tenenbaum, Alexei Efros", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97465", "id": "97465", "proceeding": "", "pdf": "", "openreview": "", "poster": "", "project": "" }, { "title": "Prejudice and Volatility: A Statistical Framework for Measuring Social Discrimination in Large Language Models", "author": "Yiran Liu, Ke Yang, Zehan Qi, Xiao Liu, Yang Yu, Cheng Xiang Zhai", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97475", "id": "97475", "proceeding": "", "pdf": "", "openreview": "", "poster": "/media/PosterPDFs/NeurIPS%202024/97475.png?t=1731220456.115147", "project": "https://github.com/EmpathYang/Prejudice-Caprice-Framework" }, { "title": "Do Multimodal Foundation Models Understand Enterprise Workflows? A Benchmark for Business Process Management Tasks", "author": "Michael Wornow, Avanika Narayan, Ben Viggiano, Ishan Khare, Tathagat Verma, Tibor Thompson, Miguel Hernandez, Sudharsan Sundar, Chloe Trujillo, Krrish Chawla, Rongfei Lu, Justin Shen, Divya Nagaraj, Joshua Martinez, Vardhan Agrawal, Althea Hudson, Nigam Shah, Christopher R\u00e9", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97485", "id": "97485", "proceeding": "", "pdf": "", "openreview": "", "poster": "", "project": "https://wonderbread.stanford.edu/" }, { "title": "Evaluate calibration of language models with folktexts", "author": "Andr\u00e9 F. Cruz, Celestine Mendler-D\u00fcnner, Moritz Hardt", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97490", "id": "97490", "proceeding": "", "pdf": "", "openreview": "", "poster": "/media/PosterPDFs/NeurIPS%202024/97490.png?t=1733315832.7335024", "project": "https://github.com/socialfoundations/folktexts" }, { "title": "EEVR: A Virtual Reality-Based Emotion Dataset Featuring Paired Physiological Signals and Textual Descriptions", "author": "Pragya Singh, Ritvik Budhiraja, Ankush Gupta, Anshul Goswami, Mohan Kumar, Pushpendra Singh", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97493", "id": "97493", "proceeding": "", "pdf": "", "openreview": "", "poster": "/media/PosterPDFs/NeurIPS%202024/97493.png?t=1731326224.0202897", "project": "https://melangelabiiitd.github.io/EEVR/" }, { "title": "RandNet-Parareal: a time-parallel PDE solver using Random Neural Networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96326", "id": "974ojuN0jU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=974ojuN0jU", "openreview": "https://openreview.net/forum?id=974ojuN0jU", "poster": "", "project": "", "author_site": "Guglielmo Gattiglio, Lyudmila Grigoryeva, Massimiliano Tamborrino", "tldr": "", "abstract": "Parallel-in-time (PinT) techniques have been proposed to solve systems of time-dependent differential equations by parallelizing the temporal domain. Among them, Parareal computes the solution sequentially using an inaccurate (fast) solver, and then ``corrects'' it using an accurate (slow) integrator that runs in parallel across temporal subintervals. This work introduces RandNet-Parareal, a novel method to learn the discrepancy between the coarse and fine solutions using random neural networks (RandNets). RandNet-Parareal achieves speed gains up to x125 and x22 compared to the fine solver run serially and Parareal, respectively. Beyond theoretical guarantees of RandNets as universal approximators, these models are quick to train, allowing the PinT solution of partial differential equations on a spatial mesh of up to $10^5$ points with minimal overhead, dramatically increasing the scalability of existing PinT approaches. RandNet-Parareal's numerical performance is illustrated on systems of real-world significance, such as the viscous Burgers' equation, the Diffusion-Reaction equation, the two- and three-dimensional Brusselator, and the shallow water equation.", "keywords": "Random Neural Networks;Extreme Learning Machines;Parareal;Parallel-in-time algorithms;Scalability;Ordinary and Partial Differential Equations", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "/attachment/e53514122bf5674de3d7e88bae708de26f4522c5.zip", "author": "Guglielmo Gattiglio;Lyudmila Grigoryeva;Massimiliano Tamborrino", "authorids": "~Guglielmo_Gattiglio1;~Lyudmila_Grigoryeva1;~Massimiliano_Tamborrino1", "gender": "M;;M", "homepage": "https://guglielmogattiglio.github.io/;;https://www.warwick.ac.uk/tamborrino", "dblp": ";;173/3074", "google_scholar": ";svYRWEMAAAAJ;https://scholar.google.dk/citations?user=Zrr-TugAAAAJ", "orcid": "0009-0008-0040-7253;0000-0002-4857-7779;0000-0002-4661-8071", "linkedin": ";;", "or_profile": "~Guglielmo_Gattiglio1;~Lyudmila_Grigoryeva1;~Massimiliano_Tamborrino1", "aff": "University of Warwick;Universit\u00e4t St. Gallen;University of Warwick", "aff_domain": "warwick.ac.uk;unisg.ch;warwick.ac.uk", "position": "PhD student;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\ngattiglio2024randnetparareal,\ntitle={RandNet-Parareal: a time-parallel {PDE} solver using Random Neural Networks},\nauthor={Guglielmo Gattiglio and Lyudmila Grigoryeva and Massimiliano Tamborrino},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=974ojuN0jU}\n}", "github": "", "reviewers": "FREq;awZW;nFhC;Eu5i", "pdf_size": 834708, "rating": "5;6;6;7", "confidence": "2;2;3;4", "soundness": "3;3;2;3", "novelty": "2;2;3;3", "presentation": "2;3;3;3", "wc_summary": "70;53;74;144", "wc_strengths": "41;27;32;112", "wc_weaknesses": "122;85;110;407", "wc_questions": "152;19;113;211", "wc_limitations": "8;1;27;54", "wc_review": "393;185;356;928", "wc_reply_reviewers": "326;26;34;54", "wc_reply_authors": "1228;588;0;41", "reply_reviewers": "2;1;1;1", "reply_authors": "3;2;1;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 85.25, 34.823662931977736 ], "wc_strengths_avg": [ 53.0, 34.43109060137364 ], "wc_weaknesses_avg": [ 181.0, 131.16211343219504 ], "wc_questions_avg": [ 123.75, 69.81896232399906 ], "wc_limitations_avg": [ 22.5, 20.524375751773793 ], "wc_review_avg": [ 465.5, 278.31322282636876 ], "wc_reply_reviewers_avg": [ 110.0, 125.12393855693642 ], "wc_reply_authors_avg": [ 464.25, 498.321369700317 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8528028654224418, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:oL8Vu3gxV9wJ:scholar.google.com/&scioq=RandNet-Parareal:+a+time-parallel+PDE+solver+using+Random+Neural+Networks&hl=en&as_sdt=0,21", "gs_version_total": 5, "email": "warwick.ac.uk;unisg.ch;warwick.ac.uk", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Warwick;University of St. Gallen", "aff_unique_dep": ";", "aff_unique_url": "https://www.warwick.ac.uk;https://www.unisg.ch", "aff_unique_abbr": "Warwick;HSG", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United Kingdom;Switzerland" }, { "title": "RedCode: Multi-dimensional Safety Benchmark for Code Agents", "author": "Chengquan Guo, Xun Liu, Chulin Xie, Andy Zhou, Yi Zeng, Zinan Lin, Dawn Song, Bo Li", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97521", "id": "97521", "proceeding": "", "pdf": "", "openreview": "", "poster": "", "project": "https://redcode-agent.github.io" }, { "title": "CryoBench: Datasets and Benchmarks for Heterogeneous Cryo-EM Reconstruction", "author": "Minkyu Jeon, Rishwanth Raghu, Miro Astore, Geoffrey Woollard, J. Feathers, Alkin Kaz, Sonya Hanson, Pilar Cossio, Ellen Zhong", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97535", "id": "97535", "proceeding": "", "pdf": "", "openreview": "", "poster": "/media/PosterPDFs/NeurIPS%202024/97535.png?t=1731631772.9541116", "project": "https://cryobench.cs.princeton.edu" }, { "title": "OpenCDA-Loop: A Closed-loop Benchmarking Platform for End-to-end Evaluation of Cooperative Perception", "author": "Chia-Ju Chen, Runsheng Xu, Wei Shao, Junshan Zhang, Zhengzhong Tu", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97548", "id": "97548", "proceeding": "", "pdf": "", "openreview": "", "poster": "/media/PosterPDFs/NeurIPS%202024/97548.png?t=1731733964.2986073", "project": "" }, { "title": "Norms for Managing Datasets: A Systematic Review of NeurIPS Datasets", "author": "Yiwei Wu, Leah Ajmani, Shayne Longpre, Hanlin Li", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97568", "id": "97568", "proceeding": "", "pdf": "", "openreview": "", "poster": "/media/PosterPDFs/NeurIPS%202024/97568.png?t=1733539910.3844864", "project": "" }, { "title": "SubjECTive-QA: A dataset for the subjective evaluation of answers in Earnings Call Transcripts (ECTs)", "author": "Huzaifa Pardawala, Siddhant Sukhani, Veer Kejriwal, Rohan Bhasin, Abhishek Pillai, Dhruv Adha, Tarun Mandapati, Andrew DiBiasio, Agam Shah, Sudheer Chava", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97586", "id": "97586", "proceeding": "", "pdf": "", "openreview": "", "poster": "/media/PosterPDFs/NeurIPS%202024/97586.png?t=1731266846.5631933", "project": "https://github.com/gtfintechlab/SubjECTive-QA" }, { "title": "There is No Silver Bullet: Benchmarking Methods in Predictive Combinatorial Optimization", "author": "Haoyu Geng, Hang Ruan, Runzhong Wang, Yang Li, YANG WANG, Lei Chen, Junchi Yan", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97602", "id": "97602", "proceeding": "", "pdf": "", "openreview": "", "poster": "/media/PosterPDFs/NeurIPS%202024/97602.png?t=1731476426.282061", "project": "" }, { "title": "Quantifying the Bitter Lesson: How Safety Benchmarks Measure Capabilities Instead of Safety", "author": "Richard Ren, Steven Basart, Adam Khoja, Alexander Pan, Alice Gatti, Long Phan, Xuwang Yin, Mantas Mazeika, Gabriel Mukobi, Ryan Kim, Stephen Fitz, Dan Hendrycks", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97637", "id": "97637", "proceeding": "", "pdf": "", "openreview": "", "poster": "", "project": "https://www.safetywashing.ai" }, { "title": "SurgicAI: A Fine-grained Platform for Data Collection and Benchmarking in Surgical Policy Learning", "author": "Jin Wu, Haoying Zhou, Peter Kazanzides, Adnan Munawar, Anqi Liu", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97680", "id": "97680", "proceeding": "", "pdf": "", "openreview": "", "poster": "/media/PosterPDFs/NeurIPS%202024/97680.png?t=1731715165.6204348", "project": "https://github.com/surgical-robotics-ai/SurgicAI" }, { "title": "A Novel Benchmark for Decision-Making in Uncertain and Competitive Games", "author": "Kefan Su, Yusen Huo, ZHILIN ZHANG, Shuai Dou, Chuan Yu, Jian Xu, Zongqing Lu, Bo Zheng", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97717", "id": "97717", "proceeding": "", "pdf": "", "openreview": "", "poster": "/media/PosterPDFs/NeurIPS%202024/97717.png?t=1733831522.5319178", "project": "https://github.com/alimama-tech/AuctionNet" }, { "title": "Comprehensive Framework for Curating Speech Datasets and Evaluating ASR Systems: A Case Study for the Polish Language", "author": "Micha\u0142 Junczyk", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97720", "id": "97720", "proceeding": "", "pdf": "", "openreview": "", "poster": "/media/PosterPDFs/NeurIPS%202024/97720.png?t=1733091907.523992", "project": "" }, { "title": "SETBENCH: Assessing the Analytical and Semantic Robustness of Language Models", "author": "Nicholas Dronen, Bardiya Akhbari, Manish Digambar Gawali", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97730", "id": "97730", "proceeding": "", "pdf": "", "openreview": "", "poster": "", "project": "https://github.com/amazon-science/SetLexSem-Challenge" }, { "title": "WikiDO: Evaluating Out-of-Distribution Generalization of Vision-Language Models in Cross-Modal Retrieval", "author": "Pavan Kalyan Tankala, Piyush Pasi, Sahil Dharod, Azeem Motiwala, Preethi Jyothi, Aditi Chaudhary, Krishna Srinivasan", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97785", "id": "97785", "proceeding": "", "pdf": "", "openreview": "", "poster": "", "project": "https://kaggle.com/competitions/wikido24" }, { "title": "Image2Struct: A Benchmark for Evaluating Vision-Language Models in Extracting Structured Information from Images", "author": "Josselin Roberts, Tony Lee, Chi Heem Wong, Michihiro Yasunaga, Yifan Mai, Percy Liang", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97829", "id": "97829", "proceeding": "", "pdf": "", "openreview": "", "poster": "", "project": "" }, { "title": "Through the Looking-Glass: Tracing Shifts in AI Data Consent across the Web", "author": "Shayne Longpre, Robert Mahari, Ariel Lee, Campbell Lund, Hamidah Oderinwale, William Brannon, Nayan Saxena, Naana Obeng-Marnu, Tobin South, Cole Hunter, Kevin Klyman, Christopher Klamm, Hailey Schoelkopf, Nikhil Singh, Manuel Cherep, Ahmad Anis, An Dinh, Caroline Shamiso Chitongo, Da Yin, Damien Sileo, Deividas Mataciunas, Diganta Misra, Emad Alghamdi, Enrico Shippole, Jianguo Zhang, Joanna Materzynska, Kun Qian, Kushagra Tiwary, Lester James V. Miranda, Manan Dey, Minnie Liang, Mohammed Hamdy, Niklas Muennighoff, Seonghyeon Ye, Seungone Kim, Shrestha Mohanty, Vipul Gupta, Vivek Sharma, Minh Chien Vu, Xuhui Zhou, Yizhi Li, Caiming Xiong, Luis Villa, Stella Biderman, Hanlin Li, Daphne Ippolito, Sara Hooker, Jad Kabbara, Alex Pentland", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97842", "id": "97842", "proceeding": "", "pdf": "", "openreview": "", "poster": "/media/PosterPDFs/NeurIPS%202024/97842.png?t=1731013748.8279972", "project": "" }, { "title": "Benchmarking Trustworthiness of Multimodal Large Language Models: A Comprehensive Study", "author": "Yichi Zhang, Yao Huang, Yitong Sun, Chang Liu, Zhe Zhao, Zhengwei Fang, Yifan Wang, Huanran Chen, Xiao Yang, Xingxing Wei, Hang Su, Yinpeng Dong, Jun Zhu", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97845", "id": "97845", "proceeding": "", "pdf": "", "openreview": "", "poster": "/media/PosterPDFs/NeurIPS%202024/97845.png?t=1733241018.2098536", "project": "multi-trust.github.io" }, { "title": "A Benchmark Suite for Systematically Evaluating Reasoning Shortcuts", "author": "Samuele Bortolotti, Emanuele Marconato, Tommaso Carraro, Paolo Morettin, Emile van Krieken, Antonio Vergari, Stefano Teso, Andrea Passerini", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97846", "id": "97846", "proceeding": "", "pdf": "", "openreview": "", "poster": "/media/PosterPDFs/NeurIPS%202024/97846.png?t=1731487978.9033308", "project": "https://unitn-sml.github.io/rsbench/" }, { "title": "Enhancing Chess Reinforcement Learning with Graph Representation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96325", "id": "97OvPgmjRN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=97OvPgmjRN", "openreview": "https://openreview.net/forum?id=97OvPgmjRN", "poster": "", "project": "", "author_site": "Tomas Rigaux, Hisashi Kashima", "tldr": "", "abstract": "Mastering games is a hard task, as games can be extremely complex, and still fundamentally different in structure from one another. While the AlphaZero algorithm has demonstrated an impressive ability to learn the rules and strategy of a large variety of games, ranging from Go and Chess, to Atari games, its reliance on extensive computational resources and rigid Convolutional Neural Network (CNN) architecture limits its adaptability and scalability. A model trained to play on a $19\\times 19$ Go board cannot be used to play on a smaller $13\\times 13$ board, despite the similarity between the two Go variants.\nIn this paper, we focus on Chess, and explore using a more generic Graph-based Representation of a game state, rather than a grid-based one, to introduce a more general architecture based on Graph Neural Networks (GNN). We also expand the classical Graph Attention Network (GAT) layer to incorporate edge-features, to naturally provide a generic policy output format.\nOur experiments, performed on smaller networks than the initial AlphaZero paper, show that this new architecture outperforms previous architectures with a similar number of parameters, being able to increase playing strength an order of magnitude faster. We also show that the model, when trained on a smaller $5\\times 5$ variant of chess, is able to be quickly fine-tuned to play on regular $8\\times 8$ chess, suggesting that this approach yields promising generalization abilities.\nOur code is available at https://github.com/akulen/AlphaGateau.", "keywords": "deep learning;GNN;Chess;RL", "primary_area": "graph_neural_networks", "supplementary_material": "/attachment/362a2756a7b9eea83c407fc298a84e2c3cb8d4a8.zip", "author": "Tomas Rigaux;Hisashi Kashima", "authorids": "~Tomas_Rigaux1;~Hisashi_Kashima2", "gender": "M;M", "homepage": "https://tomas.rigaux.com;https://hkashima.github.io/index_e.html", "dblp": "223/4813.html;27/4448", "google_scholar": "ZHOsepEAAAAJ;bkTB0t8AAAAJ", "orcid": ";0000-0002-2770-0184", "linkedin": ";", "or_profile": "~Tomas_Rigaux1;~Hisashi_Kashima2", "aff": "Kyoto University;Kyoto University", "aff_domain": "st.kyoto-u.ac.jp;kyoto-u.ac.jp", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nrigaux2024enhancing,\ntitle={Enhancing Chess Reinforcement Learning with Graph Representation},\nauthor={Tomas Rigaux and Hisashi Kashima},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=97OvPgmjRN}\n}", "github": "", "reviewers": "abiC;R1p6;NRN3", "pdf_size": 636383, "rating": "5;6;7", "confidence": "4;3;4", "soundness": "1;3;3", "novelty": "3;2;3", "presentation": "3;3;4", "wc_summary": "37;147;14", "wc_strengths": "114;116;131", "wc_weaknesses": "457;151;183", "wc_questions": "139;90;210", "wc_limitations": "9;96;2", "wc_review": "756;600;540", "wc_reply_reviewers": "702;10;196", "wc_reply_authors": "525;0;0", "reply_reviewers": "2;1;1", "reply_authors": "2;1;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.9428090415820634 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 66.0, 58.04021594262608 ], "wc_strengths_avg": [ 120.33333333333333, 7.586537784494029 ], "wc_weaknesses_avg": [ 263.6666666666667, 137.33009704924686 ], "wc_questions_avg": [ 146.33333333333334, 49.2634640366356 ], "wc_limitations_avg": [ 35.666666666666664, 42.7577153531643 ], "wc_review_avg": [ 632.0, 91.03845341392834 ], "wc_reply_reviewers_avg": [ 302.6666666666667, 292.40307035931676 ], "wc_reply_authors_avg": [ 175.0, 247.48737341529164 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:iOcNssO0MMYJ:scholar.google.com/&scioq=Enhancing+Chess+Reinforcement+Learning+with+Graph+Representation&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "st.kyoto-u.ac.jp;kyoto-u.ac.jp", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Kyoto University", "aff_unique_dep": "", "aff_unique_url": "https://www.kyoto-u.ac.jp", "aff_unique_abbr": "Kyoto U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Japan" }, { "title": "Distributed Sparse Regression via Penalization", "author": "Yao Ji, Gesualdo Scutari, Ying Sun, Harsha Honnappa", "status": "Journal", "track": "Journal", "site": "https://neurips.cc/virtual/2024/poster/98302", "id": "98302", "proceeding": "", "pdf": "", "openreview": "", "poster": "", "project": "" }, { "title": "Inference on the Change Point under a High Dimensional Covariance Shift", "author": "Abhishek Kaul, Hongjin Zhang, Konstantinos Tsampourakis, George Michailidis", "status": "Journal", "track": "Journal", "site": "https://neurips.cc/virtual/2024/poster/98303", "id": "98303", "proceeding": "", "pdf": "", "openreview": "", "poster": "/media/PosterPDFs/NeurIPS%202024/98303.png?t=1731362733.0211112", "project": "" }, { "title": "Towards Explainable Evaluation Metrics for Machine Translation", "author": "Christoph Leiter, Piyawat Lertvittayakumjorn, Marina Fomicheva, Wei Zhao, Yang Gao, Steffen Eger", "status": "Journal", "track": "Journal", "site": "https://neurips.cc/virtual/2024/poster/98304", "id": "98304", "proceeding": "", "pdf": "", "openreview": "", "poster": "/media/PosterPDFs/NeurIPS%202024/98304.png?t=1733963746.9785333", "project": "" }, { "title": "Unsupervised Anomaly Detection Algorithms on Real-world Data: How Many Do We Need?", "author": "Roel Bouman, Zaharah Bukhsh, Tom Heskes", "status": "Journal", "track": "Journal", "site": "https://neurips.cc/virtual/2024/poster/98305", "id": "98305", "proceeding": "", "pdf": "", "openreview": "", "poster": "", "project": "https://github.com/RoelBouman/outlierdetection" }, { "title": "Nonparametric Regression for 3D Point Cloud Learning", "author": "Xinyi Li, Shan Yu, Yueying Wang, Guannan Wang, Li Wang, Ming-Jun Lai", "status": "Journal", "track": "Journal", "site": "https://neurips.cc/virtual/2024/poster/98307", "id": "98307", "proceeding": "", "pdf": "", "openreview": "", "poster": "/media/PosterPDFs/NeurIPS%202024/98307.png?t=1731562967.5218356", "project": "" }, { "title": "A Unified Recipe for Deriving (Time-Uniform) PAC-Bayes Bounds", "author": "Ben Chugg, Hongjian Wang, Aaditya Ramdas", "status": "Journal", "track": "Journal", "site": "https://neurips.cc/virtual/2024/poster/98308", "id": "98308", "proceeding": "", "pdf": "", "openreview": "", "poster": "", "project": "" }, { "title": "A Continuous-time Stochastic Gradient Descent Method for Continuous Data", "author": "Kexin Jin, Jonas Latz, Chenguang Liu, Carola-Bibiane Sch\u00f6nlieb", "status": "Journal", "track": "Journal", "site": "https://neurips.cc/virtual/2024/poster/98309", "id": "98309", "proceeding": "", "pdf": "", "openreview": "", "poster": "/media/PosterPDFs/NeurIPS%202024/98309.png?t=1733424525.3275309", "project": "" }, { "title": "Exploration, Exploitation, and Engagement in Multi-Armed Bandits with Abandonment", "author": "Zixian Yang, Xin Liu, Lei Ying", "status": "Journal", "track": "Journal", "site": "https://neurips.cc/virtual/2024/poster/98312", "id": "98312", "proceeding": "", "pdf": "", "openreview": "", "poster": "/media/PosterPDFs/NeurIPS%202024/98312.png?t=1731405861.583966", "project": "https://github.com/Multi-wavelength/Exploration-Exploitation-and-Engagement-in-Multi-Armed-Bandits-with-Abandonment" }, { "title": "Optimization-based Causal Estimation from Heterogeneous Environments", "author": "Mingzhang Yin, Yixin Wang, David Blei", "status": "Journal", "track": "Journal", "site": "https://neurips.cc/virtual/2024/poster/98313", "id": "98313", "proceeding": "", "pdf": "", "openreview": "", "poster": "/media/PosterPDFs/NeurIPS%202024/98313.png?t=1732304078.3768868", "project": "https://github.com/mingzhang-yin/CoCo" }, { "title": "Critically Assessing the State of the Art in Neural Network Verification", "author": "Matthias K\u00f6nig, Annelot W. Bosman, Holger H. Hoos, Jan N. Van Rijn", "status": "Journal", "track": "Journal", "site": "https://neurips.cc/virtual/2024/poster/98315", "id": "98315", "proceeding": "", "pdf": "", "openreview": "", "poster": "", "project": "https://github.com/ADA-research/nn-verification-assessment" }, { "title": "Causal-learn: Causal Discovery in Python", "author": "Yujia Zheng, Biwei Huang, Wei Chen, Joseph D Ramsey, Mingming Gong, Ruichu Cai, Shohei Shimizu, Peter Spirtes, Kun Zhang", "status": "Journal", "track": "Journal", "site": "https://neurips.cc/virtual/2024/poster/98316", "id": "98316", "proceeding": "", "pdf": "", "openreview": "", "poster": "", "project": "https://causal-learn.readthedocs.io/en/latest/" }, { "title": "Causal Bandits for Linear Structural Equation Models", "author": "Burak Varici, Karthikeyan Shanmugam, Prasanna Sattigeri, Ali Tajer", "status": "Journal", "track": "Journal", "site": "https://neurips.cc/virtual/2024/poster/98317", "id": "98317", "proceeding": "", "pdf": "", "openreview": "", "poster": "/media/PosterPDFs/NeurIPS%202024/98317.png?t=1732771636.716528", "project": "" }, { "title": "BenchMARL: Benchmarking Multi-Agent Reinforcement Learning", "author": "Matteo Bettini, Amanda Prorok, Vincent MOENS", "status": "Journal", "track": "Journal", "site": "https://neurips.cc/virtual/2024/poster/98318", "id": "98318", "proceeding": "", "pdf": "", "openreview": "", "poster": "/media/PosterPDFs/NeurIPS%202024/98318.png?t=1730146450.003954", "project": "https://github.com/facebookresearch/BenchMARL" }, { "title": "Pre-trained Gaussian Processes for Bayesian Optimization", "author": "Zi Wang, George Dahl, Kevin Swersky, Chansoo Lee, Zachary Nado, Justin Gilmer, Jasper Snoek, Zoubin Ghahramani", "status": "Journal", "track": "Journal", "site": "https://neurips.cc/virtual/2024/poster/98319", "id": "98319", "proceeding": "", "pdf": "", "openreview": "", "poster": "", "project": "https://github.com/google-research/hyperbo/" }, { "title": "Topological Hidden Markov Models", "author": "Adam B Kashlak, Prachi Loliencar, Giseon Heo", "status": "Journal", "track": "Journal", "site": "https://neurips.cc/virtual/2024/poster/98320", "id": "98320", "proceeding": "", "pdf": "", "openreview": "", "poster": "/media/PosterPDFs/NeurIPS%202024/98320.png?t=1729961685.669894", "project": "https://github.com/cachelack/Topological-Hidden-Markov-Model" }, { "title": "An Analysis of Robustness of Non-Lipschitz Networks", "author": "Maria-Florina Balcan, Avrim Blum, Dravyansh Sharma, Hongyang Zhang", "status": "Journal", "track": "Journal", "site": "https://neurips.cc/virtual/2024/poster/98321", "id": "98321", "proceeding": "", "pdf": "", "openreview": "", "poster": "/media/PosterPDFs/NeurIPS%202024/98321.png?t=1731749530.5080278", "project": "" }, { "title": "Efficient Convex Algorithms for Universal Kernel Learning", "author": "Aleksandr Talitckii, Brendon Colbert, Matthew Peet", "status": "Journal", "track": "Journal", "site": "https://neurips.cc/virtual/2024/poster/98322", "id": "98322", "proceeding": "", "pdf": "", "openreview": "", "poster": "", "project": "" }, { "title": "Numerically Stable Sparse Gaussian Processes via Minimum Separation using Cover Trees", "author": "Alexander Terenin, David Burt, Artem Artemev, Seth Flaxman, Mark van der Wilk, Carl Edward Rasmussen, Hong Ge", "status": "Journal", "track": "Journal", "site": "https://neurips.cc/virtual/2024/poster/98323", "id": "98323", "proceeding": "", "pdf": "", "openreview": "", "poster": "", "project": "" }, { "title": "Label Alignment Regularization for Distribution Shift", "author": "Ehsan Imani, Guojun Zhang, Runjia Li, Jun Luo, Pascal Poupart, Philip Torr, Yangchen Pan", "status": "Journal", "track": "Journal", "site": "https://neurips.cc/virtual/2024/poster/98324", "id": "98324", "proceeding": "", "pdf": "", "openreview": "", "poster": "/media/PosterPDFs/NeurIPS%202024/98324.png?t=1731737021.1328824", "project": "https://github.com/EhsanEI/lar/" }, { "title": "Generic Unsupervised Optimization for a Latent Variable Model With Exponential Family Observables", "author": "Hamid Mousavi, Jakob Drefs, Florian Hirschberger, J\u00f6rg L\u00fccke", "status": "Journal", "track": "Journal", "site": "https://neurips.cc/virtual/2024/poster/98325", "id": "98325", "proceeding": "", "pdf": "", "openreview": "", "poster": "/media/PosterPDFs/NeurIPS%202024/98325.png?t=1733842116.019589", "project": "https://www.jmlr.org/papers/v24/22-0359.html" }, { "title": "TorchOpt: An Efficient Library for Differentiable Optimization", "author": "Jie Ren, Xidong Feng, Bo Liu, Xuehai Pan, Yao Fu, Luo Mai, Yaodong Yang", "status": "Journal", "track": "Journal", "site": "https://neurips.cc/virtual/2024/poster/98326", "id": "98326", "proceeding": "", "pdf": "", "openreview": "", "poster": "/media/PosterPDFs/NeurIPS%202024/98326.png?t=1732592449.0400834", "project": "https://github.com/metaopt/torchopt" }, { "title": "Fourier Neural Operator with Learned Deformations for PDEs on General Geometries", "author": "Zongyi Li, Daniel Zhengyu Huang, Burigede Liu, Anima Anandkumar", "status": "Journal", "track": "Journal", "site": "https://neurips.cc/virtual/2024/poster/98327", "id": "98327", "proceeding": "", "pdf": "", "openreview": "", "poster": "/media/PosterPDFs/NeurIPS%202024/98327.png?t=1733727276.605571", "project": "" }, { "title": "Variation Spaces for Multi-Output Neural Networks: Insights on Multi-Task Learning and Network Compression", "author": "Joseph Shenouda, Rahul Parhi, Kangwook Lee, Robert Nowak", "status": "Journal", "track": "Journal", "site": "https://neurips.cc/virtual/2024/poster/98328", "id": "98328", "proceeding": "", "pdf": "", "openreview": "", "poster": "", "project": "" }, { "title": "Metrizing Weak Convergence with Maximum Mean Discrepancies", "author": "Carl-Johann Simon-Gabriel, Alessandro Barp, Bernhard Sch\u00f6lkopf, Lester Mackey", "status": "Journal", "track": "Journal", "site": "https://neurips.cc/virtual/2024/poster/98329", "id": "98329", "proceeding": "", "pdf": "", "openreview": "", "poster": "", "project": "" }, { "title": "Statistical Inference for Fairness Auditing", "author": "John Cherian, Emmanuel Candes", "status": "Journal", "track": "Journal", "site": "https://neurips.cc/virtual/2024/poster/98330", "id": "98330", "proceeding": "", "pdf": "", "openreview": "", "poster": "/media/PosterPDFs/NeurIPS%202024/98330.png?t=1733434526.950093", "project": "github.com/jjcherian/fairaudit" }, { "title": "Fixed points of nonnegative neural networks", "author": "Tomasz J. Piotrowski, Renato L. G. Cavalcante, Mateusz Gabor", "status": "Journal", "track": "Journal", "site": "https://neurips.cc/virtual/2024/poster/98331", "id": "98331", "proceeding": "", "pdf": "", "openreview": "", "poster": "/media/PosterPDFs/NeurIPS%202024/98331.png?t=1733412114.041526", "project": "" }, { "title": "Optimal Clustering with Bandit Feedback", "author": "Junwen Yang, Zixin Zhong, Vincent Tan", "status": "Journal", "track": "Journal", "site": "https://neurips.cc/virtual/2024/poster/98332", "id": "98332", "proceeding": "", "pdf": "", "openreview": "", "poster": "", "project": "" }, { "title": "[Re] GNNInterpreter: A probabilistic generative model-level explanation for Graph Neural Networks", "author": "Batu Helvacioglu, Ana Vasilcoiu, Thijs Stessen, Thies Kersten", "status": "Journal", "track": "Journal", "site": "https://neurips.cc/virtual/2024/poster/99329", "id": "99329", "proceeding": "", "pdf": "", "openreview": "", "poster": "/media/PosterPDFs/NeurIPS%202024/99329.png?t=1731353705.4394703", "project": "https://openreview.net/forum?id=8cYcR23WUo" }, { "title": "Transfer Learning with Informative Priors: Simple Baselines Better than Previously Reported", "author": "Ethan Harvey, Mikhail Petrov, Michael Hughes", "status": "Journal", "track": "Journal", "site": "https://neurips.cc/virtual/2024/poster/99332", "id": "99332", "proceeding": "", "pdf": "", "openreview": "", "poster": "/media/PosterPDFs/NeurIPS%202024/99332.png?t=1729690611.5414934", "project": "https://openreview.net/forum?id=BbvSU02jLg" }, { "title": "Reproducibility study of FairAC", "author": "Gijs de Jong, Macha Meijer, Derck Prinzhorn, Harold Ruiter", "status": "Journal", "track": "Journal", "site": "https://neurips.cc/virtual/2024/poster/99333", "id": "99333", "proceeding": "", "pdf": "", "openreview": "", "poster": "/media/PosterPDFs/NeurIPS%202024/99333.png?t=1733942625.57383", "project": "https://openreview.net/forum?id=ccDi5jtSF7" }, { "title": "[Re] On the Reproducibility of Post-Hoc Concept Bottleneck Models", "author": "Gregory Hok Tjoan Go, Diego Canez, Nesta Midavaine, Satchit Chatterji, Ioana Simion", "status": "Journal", "track": "Journal", "site": "https://neurips.cc/virtual/2024/poster/99334", "id": "99334", "proceeding": "", "pdf": "", "openreview": "", "poster": "", "project": "https://openreview.net/forum?id=8UfhCZjOV7" }, { "title": "Explaining RL Decisions with Trajectories': A Reproducibility Study", "author": "Karim Abdel Sadek, Matteo Nulli, Joan Velja, Jort Vincenti", "status": "Journal", "track": "Journal", "site": "https://neurips.cc/virtual/2024/poster/99336", "id": "99336", "proceeding": "", "pdf": "", "openreview": "", "poster": "/media/PosterPDFs/NeurIPS%202024/99336.png?t=1731853042.6384137", "project": "https://openreview.net/forum?id=QdeBbK5CSh" }, { "title": "Reproducibility Study of \"ITI-GEN: Inclusive Text-to-Image Generation\"", "author": "Daniel Gallo Fern\u00e1ndez, R\u0103zvan-Andrei Mati\u0219an, Alejandro Monroy, Janusz Partyka", "status": "Journal", "track": "Journal", "site": "https://neurips.cc/virtual/2024/poster/99338", "id": "99338", "proceeding": "", "pdf": "", "openreview": "", "poster": "/media/PosterPDFs/NeurIPS%202024/99338.png?t=1732269391.2259", "project": "https://openreview.net/forum?id=d3Vj360Wi2" }, { "title": "Reproducibility study of \"Robust Fair Clustering: A Novel Fairness Attack and Defense Framework\"", "author": "Lucas Ponticelli, Vincent Loos, Eren Kocadag, Kacper Bartosik", "status": "Journal", "track": "Journal", "site": "https://neurips.cc/virtual/2024/poster/99339", "id": "99339", "proceeding": "", "pdf": "", "openreview": "", "poster": "", "project": "https://openreview.net/forum?id=Xu1sEPhjqH" }, { "title": "[Re] CUDA: Curriculum of Data Augmentation for Long\u2010tailed Recognition", "author": "Barath Chandran C", "status": "Journal", "track": "Journal", "site": "https://neurips.cc/virtual/2024/poster/99340", "id": "99340", "proceeding": "", "pdf": "", "openreview": "", "poster": "/media/PosterPDFs/NeurIPS%202024/99340.png?t=1733996009.2110121", "project": "https://openreview.net/forum?id=Wm6d44I8St" }, { "title": "[Re] Reproducibility Study of \u201cExplaining Temporal Graph Models Through an Explorer-Navigator Framework\"", "author": "Helia Ghasemi, Christina Isaicu, Jesse Wonnink, Andreas Berentzen", "status": "Journal", "track": "Journal", "site": "https://neurips.cc/virtual/2024/poster/99341", "id": "99341", "proceeding": "", "pdf": "", "openreview": "", "poster": "/media/PosterPDFs/NeurIPS%202024/99341.png?t=1733324926.4328003", "project": "https://openreview.net/forum?id=9M2XqvH2SB" }, { "title": "Reproducibility Study of \"Robust Fair Clustering: A Novel Fairness Attack and Defense Framework\"", "author": "Iason Skylitsis, Zheng Feng, Idries Nasim, Camille Niessink", "status": "Journal", "track": "Journal", "site": "https://neurips.cc/virtual/2024/poster/99342", "id": "99342", "proceeding": "", "pdf": "", "openreview": "", "poster": "", "project": "https://openreview.net/forum?id=H1hLNjwrGy" }, { "title": "Reproducibility Study on Adversarial Attacks Against Robust Transformer Trackers", "author": "Fatemeh Nourilenjan Nokabadi, Christian Gagn\u00e9, Jean-Francois Lalonde", "status": "Journal", "track": "Journal", "site": "https://neurips.cc/virtual/2024/poster/99343", "id": "99343", "proceeding": "", "pdf": "", "openreview": "", "poster": "/media/PosterPDFs/NeurIPS%202024/99343.png?t=1730334894.4428053", "project": "https://lvsn.github.io/ReproStudy/" }, { "title": "Reproducibility study of \u201cLICO: Explainable Models with Language-Image Consistency\"", "author": "Luan Fletcher, Robert van der Klis, Martin Sedlacek, Stefan Vasilev, Christos Athanasiadis", "status": "Journal", "track": "Journal", "site": "https://neurips.cc/virtual/2024/poster/99344", "id": "99344", "proceeding": "", "pdf": "", "openreview": "", "poster": "", "project": "https://openreview.net/forum?id=Mf1H8X5DVb" }, { "title": "On the Reproducibility of: \"Learning Perturbations to Explain Time Series Predictions\"", "author": "Jasper Eppink, Floris Six Dijkstra, Wouter Bant, \u00c1d\u00e1m Div\u00e1k", "status": "Journal", "track": "Journal", "site": "https://neurips.cc/virtual/2024/poster/99345", "id": "99345", "proceeding": "", "pdf": "", "openreview": "", "poster": "", "project": "https://openreview.net/forum?id=nPZgtpfgIx" }, { "title": "Reproducibility Study: Equal Improvability: A New Fairness Notion Considering the Long-Term Impact", "author": "Berkay Chakar, Amina Izbassar, Mina Jani\u0107ijevi\u0107, Jakub Tomaszewski", "status": "Journal", "track": "Journal", "site": "https://neurips.cc/virtual/2024/poster/99346", "id": "99346", "proceeding": "", "pdf": "", "openreview": "", "poster": "/media/PosterPDFs/NeurIPS%202024/99346.png?t=1731847312.2406797", "project": "https://openreview.net/forum?id=Yj8fUQGXXL" }, { "title": "Chain-of-Thought Unfaithfulness as Disguised Accuracy", "author": "Ana Marasovic, Nathan Stringham, Oliver Bentham", "status": "Journal", "track": "Journal", "site": "https://neurips.cc/virtual/2024/poster/99347", "id": "99347", "proceeding": "", "pdf": "", "openreview": "", "poster": "", "project": "https://openreview.net/forum?id=ydcrP55u2e" }, { "title": "Studying How to Efficiently and Effectively Guide Models with Explanations - A Reproducibility Study", "author": "Adrian Sauter, Milan Mileti\u0107, Ryan Ott, Rohith Prabakaran", "status": "Journal", "track": "Journal", "site": "https://neurips.cc/virtual/2024/poster/99349", "id": "99349", "proceeding": "", "pdf": "", "openreview": "", "poster": "/media/PosterPDFs/NeurIPS%202024/99349.png?t=1732058834.531543", "project": "https://openreview.net/forum?id=9ZzASCVhDF" }, { "title": "Reproducibility Study Of Learning Fair Graph Representations Via Automated Data Augmentations", "author": "Thijmen Nijdam, Juell Sprott, Taiki Papandreou-Lazos, Jurgen de Heus", "status": "Journal", "track": "Journal", "site": "https://neurips.cc/virtual/2024/poster/99350", "id": "99350", "proceeding": "", "pdf": "", "openreview": "", "poster": "/media/PosterPDFs/NeurIPS%202024/99350.png?t=1732374957.6227055", "project": "https://openreview.net/forum?id=4WiqHopXQX" }, { "title": "Noise-Aware Differentially Private Regression via Meta-Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96324", "id": "99rOAM7Jfm", "proceeding": "", "pdf": "https://openreview.net/pdf?id=99rOAM7Jfm", "openreview": "https://openreview.net/forum?id=99rOAM7Jfm", "poster": "/media/PosterPDFs/NeurIPS%202024/96324.png?t=1733328140.5450792", "project": "", "author_site": "Ossi R\u00e4is\u00e4, Stratis Markou, Matthew Ashman, Wessel Bruinsma, Marlon Tobaben, Antti Honkela, Richard Turner", "tldr": "", "abstract": "Many high-stakes applications require machine learning models that protect user privacy and provide well-calibrated, accurate predictions. While Differential Privacy (DP) is the gold standard for protecting user privacy, standard DP mechanisms typically significantly impair performance. One approach to mitigating this issue is pre-training models on simulated data before DP learning on the private data. In this work we go a step further, using simulated data to train a meta-learning model that combines the Convolutional Conditional Neural Process (ConvCNP) with an improved functional DP mechanism of Hall et al. (2013), yielding the DPConvCNP. DPConvCNP learns from simulated data how to map private data to a DP predictive model in one forward pass, and then provides accurate, well-calibrated predictions. We compare DPConvCNP with a DP Gaussian Process (GP) baseline with carefully tuned hyperparameters. The DPConvCNP outperforms the GP baseline, especially on non-Gaussian data, yet is much faster at test time and requires less tuning.", "keywords": "differential privacy;meta-learning;neural processes;Gaussian processes;sim-to-real;probabilistic regression", "primary_area": "privacy", "supplementary_material": "/attachment/96e9f4dc1d94651a95b814a8a121ec6024a60e5c.zip", "author": "Ossi R\u00e4is\u00e4;Stratis Markou;Matthew Ashman;Wessel P Bruinsma;Marlon Tobaben;Antti Honkela;Richard E. Turner", "authorids": "~Ossi_R\u00e4is\u00e41;~Stratis_Markou1;~Matthew_Ashman1;~Wessel_P_Bruinsma1;~Marlon_Tobaben1;~Antti_Honkela1;~Richard_E_Turner1", "gender": "M;M;M;;M;M;", "homepage": ";;https://mattashman.github.io/;https://www.helsinki.fi/en/about-us/people/people-finder/marlon-tobaben-9428638;https://www.cs.helsinki.fi/u/ahonkela/;https://rich-turner-group.github.io/;https://wessel.ai", "dblp": "296/0031;300/3941;;330/3364;h/AnttiHonkela;40/5352;242/3348.html", "google_scholar": "https://scholar.google.fi/citations?user=FpmQ-jcAAAAJ;;j1YiUKUAAAAJ;pgyBA6YAAAAJ;XsyLs6AAAAAJ;https://scholar.google.co.uk/citations?user=DgLEyZgAAAAJ;QRQwz3cAAAAJ", "orcid": ";;;0000-0002-9778-0853;0000-0001-9193-8093;;", "linkedin": "ossi-r%C3%A4is%C3%A4-749502139/;stratos-m-85884b94/;matthew-ashman-a69017150/;marlon-tobaben/;;;", "or_profile": "~Ossi_R\u00e4is\u00e41;~Stratis_Markou1;~Matthew_Ashman1;~Marlon_Tobaben1;~Antti_Honkela1;~Richard_E_Turner1;~Wessel_Bruinsma1", "aff": "University of Helsinki;;University of Cambridge;University of Helsinki;University of Helsinki;Microsoft Research;", "aff_domain": "helsinki.fi;;cam.ac.uk;helsinki.fi;helsinki.fi;research.microsoft.com;", "position": "PhD student;;PhD student;PhD student;Full Professor;Researcher;", "bibtex": "@inproceedings{\nr{\\\"a}is{\\\"a}2024noiseaware,\ntitle={Noise-Aware Differentially Private Regression via Meta-Learning},\nauthor={Ossi R{\\\"a}is{\\\"a} and Stratis Markou and Matthew Ashman and Wessel P Bruinsma and Marlon Tobaben and Antti Honkela and Richard E. Turner},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=99rOAM7Jfm}\n}", "github": "", "reviewers": "aFHD;AqSn;Eg3Y", "pdf_size": 2360483, "rating": "6;7;7", "confidence": "4;3;4", "soundness": "3;4;3", "novelty": "3;4;2", "presentation": "2;3;4", "wc_summary": "97;109;83", "wc_strengths": "67;56;262", "wc_weaknesses": "421;200;103", "wc_questions": "55;119;298", "wc_limitations": "4;1;21", "wc_review": "644;485;767", "wc_reply_reviewers": "20;9;341", "wc_reply_authors": "0;438;292", "reply_reviewers": "1;1;2", "reply_authors": "1;2;2", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 96.33333333333333, 10.624918300339486 ], "wc_strengths_avg": [ 128.33333333333334, 94.62322947117983 ], "wc_weaknesses_avg": [ 241.33333333333334, 133.07224437207867 ], "wc_questions_avg": [ 157.33333333333334, 102.84076148211965 ], "wc_limitations_avg": [ 8.666666666666666, 8.806563209081938 ], "wc_review_avg": [ 632.0, 115.43829520570719 ], "wc_reply_reviewers_avg": [ 123.33333333333333, 153.97907505747511 ], "wc_reply_authors_avg": [ 243.33333333333334, 182.09399282299847 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7527833452652390398&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 9, "email": "helsinki.fi;;cam.ac.uk;helsinki.fi;helsinki.fi;research.microsoft.com;", "author_num": 7, "aff_unique_index": "0;1;0;0;2", "aff_unique_norm": "University of Helsinki;University of Cambridge;Microsoft", "aff_unique_dep": ";;Microsoft Research", "aff_unique_url": "https://www.helsinki.fi;https://www.cam.ac.uk;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "UH;Cambridge;MSR", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;1;0;0;2", "aff_country_unique": "Finland;United Kingdom;United States" }, { "title": "Separate and Reconstruct: Asymmetric Encoder-Decoder for Speech Separation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96323", "id": "99y2EfLe3B", "proceeding": "", "pdf": "https://openreview.net/pdf?id=99y2EfLe3B", "openreview": "https://openreview.net/forum?id=99y2EfLe3B", "poster": "/media/PosterPDFs/NeurIPS%202024/96323.png?t=1732431728.5684874", "project": "", "author_site": "Ui-Hyeop Shin, Sangyoun Lee, Taehan Kim, Hyung-Min Park", "tldr": "", "abstract": "In speech separation, time-domain approaches have successfully replaced the time-frequency domain with latent sequence feature from a learnable encoder. Conventionally, the feature is separated into speaker-specific ones at the final stage of the network. Instead, we propose a more intuitive strategy that separates features earlier by expanding the feature sequence to the number of speakers as an extra dimension. To achieve this, an asymmetric strategy is presented in which the encoder and decoder are partitioned to perform distinct processing in separation tasks. The encoder analyzes features, and the output of the encoder is split into the number of speakers to be separated. The separated sequences are then reconstructed by the weight-shared decoder, which also performs cross-speaker processing. \nWithout relying on speaker information, the weight-shared network in the decoder directly learns to discriminate features using a separation objective. In addition, to improve performance, traditional methods have extended the sequence length, leading to the adoption of dual-path models, which handle the much longer sequence effectively by segmenting it into chunks. To address this, we introduce global and local Transformer blocks that can directly handle long sequences more efficiently without chunking and dual-path processing. The experimental results demonstrated that this asymmetric structure is effective and that the combination of proposed global and local Transformer can sufficiently replace the role of inter- and intra-chunk processing in dual-path structure. Finally, the presented model combining both of these achieved state-of-the-art performance with much less computation in various benchmark datasets.", "keywords": "Speech Separation;Time-Domain Audio Separation;Asymmetric Encoder-Decoder;Discriminative Learning;Transformer", "primary_area": "speech_and_audio", "supplementary_material": "/attachment/7a1f204d14ae2c7adda1a691d944e74422fe35b9.zip", "author": "Ui-Hyeop Shin;Sangyoun Lee;Taehan Kim;Hyung-Min Park", "authorids": "~Ui-Hyeop_Shin1;~Sangyoun_Lee2;~Taehan_Kim2;~Hyung-Min_Park1", "gender": "M;M;M;M", "homepage": ";;http://iip.sogang.ac.kr;", "dblp": ";;;", "google_scholar": "6MzlaHwAAAAJ;;jXnUwf4AAAAJ;", "orcid": "0000-0002-6145-7157;;;0000-0002-7105-5493", "linkedin": ";leesy236/;taehan-kim-4a2785317/;", "or_profile": "~Ui-Hyeop_Shin1;~Sangyoun_Lee2;~Taehan_Kim2;~Hyung-Min_Park1", "aff": "Sogang University;Sogang University;Sogang University;Sogang University", "aff_domain": "sogang.ac.kr;sogang.ac.kr;sogang.ac.kr;sogang.ac.kr", "position": "PhD student;MS student;MS student;Full Professor", "bibtex": "@inproceedings{\nshin2024separate,\ntitle={Separate and Reconstruct: Asymmetric Encoder-Decoder for Speech Separation},\nauthor={Ui-Hyeop Shin and Sangyoun Lee and Taehan Kim and Hyung-Min Park},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=99y2EfLe3B}\n}", "github": "", "reviewers": "y9eH;UNNK;aFMn;Gfro", "pdf_size": 1826982, "rating": "5;6;6;7", "confidence": "3;5;4;5", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "1;3;3;2", "wc_summary": "28;77;70;100", "wc_strengths": "88;90;26;140", "wc_weaknesses": "183;136;29;91", "wc_questions": "17;1;81;132", "wc_limitations": "1;1;14;12", "wc_review": "317;305;220;475", "wc_reply_reviewers": "0;0;13;18", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 68.75, 26.013217794036937 ], "wc_strengths_avg": [ 86.0, 40.422765862815474 ], "wc_weaknesses_avg": [ 109.75, 56.847933119859334 ], "wc_questions_avg": [ 57.75, 52.284677487768825 ], "wc_limitations_avg": [ 7.0, 6.041522986797286 ], "wc_review_avg": [ 329.25, 92.08250376700234 ], "wc_reply_reviewers_avg": [ 7.75, 7.949056547792323 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8528028654224418, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8672190850383971187&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "sogang.ac.kr;sogang.ac.kr;sogang.ac.kr;sogang.ac.kr", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Sogang University", "aff_unique_dep": "", "aff_unique_url": "https://www.sogang.ac.kr", "aff_unique_abbr": "Sogang", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Computational Aspects of Bayesian Persuasion under Approximate Best Response", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96322", "id": "9B0iOkn3UP", "proceeding": "", "pdf": "https://openreview.net/pdf?id=9B0iOkn3UP", "openreview": "https://openreview.net/forum?id=9B0iOkn3UP", "poster": "/media/PosterPDFs/NeurIPS%202024/96322.png?t=1733783854.3940432", "project": "", "author_site": "Kunhe Yang, Hanrui Zhang", "tldr": "", "abstract": "We study Bayesian persuasion under approximate best response, where the receiver may choose any action that is not too much suboptimal, given their posterior belief upon receiving the signal. We focus on the computational aspects of the problem, aiming to design algorithms that efficiently compute (almost) optimal strategies for the sender. Despite the absence of the revelation principle --- which has been one of the most powerful tools in Bayesian persuasion --- we design polynomial-time exact algorithms for the problem when either the state space or the action space is small, as well as a quasi-polynomial-time approximation scheme (QPTAS) for the general problem. On the negative side, we show there is no polynomial-time exact algorithm for the general problem unless $\\mathsf{P} = \\mathsf{NP}$. Our results build on several new algorithmic ideas, which might be useful in other principal-agent problems where robustness is desired.", "keywords": "Bayesian persuasion;computational complexity;robustness;approximate best response.", "primary_area": "algorithmic_game_theory", "supplementary_material": "", "author": "Kunhe Yang;Hanrui Zhang", "authorids": "~Kunhe_Yang1;~Hanrui_Zhang1", "gender": "F;", "homepage": "https://kunheyang.com/;", "dblp": "267/5467;168/8847", "google_scholar": "-j0q9B4AAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Kunhe_Yang1;~Hanrui_Zhang1", "aff": "University of California, Berkeley;Google Research", "aff_domain": "berkeley.edu;google.com", "position": "PhD student;Researcher", "bibtex": "@inproceedings{\nyang2024computational,\ntitle={Computational Aspects of Bayesian Persuasion under Approximate Best Response},\nauthor={Kunhe Yang and Hanrui Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=9B0iOkn3UP}\n}", "github": "", "reviewers": "FR8F;zPak;BTN6;R1Q8", "pdf_size": 452138, "rating": "3;5;5;7", "confidence": "5;3;3;4", "soundness": "3;3;3;3", "novelty": "1;2;2;3", "presentation": "2;3;2;4", "wc_summary": "34;62;82;223", "wc_strengths": "42;31;21;170", "wc_weaknesses": "161;120;203;52", "wc_questions": "8;2;22;17", "wc_limitations": "1;1;1;63", "wc_review": "246;216;329;525", "wc_reply_reviewers": "178;93;63;394", "wc_reply_authors": "242;110;0;32", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;1;2", "rating_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 100.25, 72.89161474408425 ], "wc_strengths_avg": [ 66.0, 60.502066080424065 ], "wc_weaknesses_avg": [ 134.0, 55.70008976653449 ], "wc_questions_avg": [ 12.25, 7.75806032459145 ], "wc_limitations_avg": [ 16.5, 26.846787517317598 ], "wc_review_avg": [ 329.0, 120.4927383704097 ], "wc_reply_reviewers_avg": [ 182.0, 129.46234973921955 ], "wc_reply_authors_avg": [ 96.0, 93.30594836343501 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.4264014327112209, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9817189046093322680&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "berkeley.edu;google.com", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "University of California, Berkeley;Google", "aff_unique_dep": ";Google Research", "aff_unique_url": "https://www.berkeley.edu;https://research.google", "aff_unique_abbr": "UC Berkeley;Google Research", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Berkeley;Mountain View", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Articulate your NeRF: Unsupervised articulated object modeling via conditional view synthesis", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96321", "id": "9B6J64eTp4", "proceeding": "", "pdf": "https://openreview.net/pdf?id=9B6J64eTp4", "openreview": "https://openreview.net/forum?id=9B6J64eTp4", "poster": "/media/PosterPDFs/NeurIPS%202024/96321.png?t=1732879331.3851666", "project": "", "author_site": "Jianning Deng, Kartic Subr, Hakan Bilen", "tldr": "", "abstract": "We propose a novel unsupervised method to learn pose and part-segmentation of articulated objects with rigid parts.\n Given two observations of an object in different articulation states, our method learns the geometry and appearance of object parts by using an implicit model from the first observation, distills the part segmentation and articulation from the second observation while rendering the latter observation.\n Additionally, to tackle the complexities in the joint optimization of part segmentation and articulation, we propose a voxel grid based initialization strategy and a decoupled optimization procedure.\n Compared to the prior unsupervised work, our model obtains significantly better performance, generalizes to objects with multiple parts while it can be efficiently from few views for the latter observation.", "keywords": "articulated object;neural radiance fields;unsupervised learning;analysis by synthesis", "primary_area": "machine_vision", "supplementary_material": "/attachment/a5ef385b2bb410ad613f950d2e20a82600f40d19.zip", "author": "Jianning Deng;Kartic Subr;Hakan Bilen", "authorids": "~Jianning_Deng1;~Kartic_Subr2;~Hakan_Bilen1", "gender": "M;M;M", "homepage": ";http://homepages.inf.ed.ac.uk/ksubr/index.html;http://homepages.inf.ed.ac.uk/hbilen/", "dblp": ";40/2604;97/2993", "google_scholar": ";;PtBtfawAAAAJ", "orcid": "0000-0003-2096-4176;;0000-0002-6947-6918", "linkedin": ";;", "or_profile": "~Jianning_Deng1;~Kartic_Subr2;~Hakan_Bilen1", "aff": "University of Edinburgh, University of Edinburgh;University of Edinburgh;University of Edinburgh", "aff_domain": "ed.ac.uk;ed.ac.uk;ed.ac.uk", "position": "PhD student;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\ndeng2024articulate,\ntitle={Articulate your Ne{RF}: Unsupervised articulated object modeling via conditional view synthesis},\nauthor={Jianning Deng and Kartic Subr and Hakan Bilen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=9B6J64eTp4}\n}", "github": "", "reviewers": "AUv9;1D4A;DWX2;JqAE", "pdf_size": 45741653, "rating": "2;6;6;6", "confidence": "4;3;3;3", "soundness": "2;3;3;3", "novelty": "1;3;3;3", "presentation": "1;3;3;2", "wc_summary": "40;62;109;68", "wc_strengths": "9;20;87;105", "wc_weaknesses": "153;36;138;330", "wc_questions": "5;55;90;35", "wc_limitations": "29;10;30;22", "wc_review": "236;183;454;560", "wc_reply_reviewers": "200;19;98;31", "wc_reply_authors": "364;6;18;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;1", "rating_avg": [ 5.0, 1.7320508075688772 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 69.75, 24.94368657596547 ], "wc_strengths_avg": [ 55.25, 41.42689343892443 ], "wc_weaknesses_avg": [ 164.25, 105.75531901516821 ], "wc_questions_avg": [ 46.25, 30.898017735770686 ], "wc_limitations_avg": [ 22.75, 7.980444849756184 ], "wc_review_avg": [ 358.25, 154.5386278572448 ], "wc_reply_reviewers_avg": [ 87.0, 71.85053931599957 ], "wc_reply_authors_avg": [ 97.0, 154.2886904474855 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16288270240461290805&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "ed.ac.uk;ed.ac.uk;ed.ac.uk", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Edinburgh", "aff_unique_dep": "", "aff_unique_url": "https://www.ed.ac.uk", "aff_unique_abbr": "Edinburgh", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Evaluating the design space of diffusion-based generative models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96320", "id": "9CMOrofB75", "proceeding": "", "pdf": "https://openreview.net/pdf?id=9CMOrofB75", "openreview": "https://openreview.net/forum?id=9CMOrofB75", "poster": "/media/PosterPDFs/NeurIPS%202024/96320.png?t=1733790474.3889062", "project": "", "author_site": "Yuqing Wang, Ye He, Molei Tao", "tldr": "", "abstract": "Most existing theoretical investigations of the accuracy of diffusion models, albeit significant, assume the score function has been approximated to a certain accuracy, and then use this a priori bound to control the error of generation. This article instead provides a first quantitative understanding of the whole generation process, i.e., both training and sampling. More precisely, it conducts a non-asymptotic convergence analysis of denoising score matching under gradient descent. In addition, a refined sampling error analysis for variance exploding models is also provided. The combination of these two results yields a full error analysis, which elucidates (again, but this time theoretically) how to design the training and sampling processes for effective generation. For instance, our theory implies a preference toward noise distribution and loss weighting in training that qualitatively agree with the ones used in [Karras et al., 2022]. It also provides perspectives on the choices of time and variance schedules in sampling: when the score is well trained, the design in [Song et al., 2021] is more preferable, but when it is less trained, the design in [Karras et al., 2022] becomes more preferable.", "keywords": "diffusion-based models;denoising score matching;convergence of neural network training;generation error analysis;noising distribution;loss weighting;time schedule;variance schedule", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Yuqing Wang;Ye He;Molei Tao", "authorids": "~Yuqing_Wang3;~Ye_He1;~Molei_Tao1", "gender": ";M;", "homepage": "https://yzwangyuqing.github.io;https://yeleohe.github.io/;http://people.math.gatech.edu/~mtao8/", "dblp": ";72/7636-3;56/9263", "google_scholar": "c7Bi9RUAAAAJ;PC25rDIAAAAJ;", "orcid": ";0000-0003-4686-8449;", "linkedin": ";;", "or_profile": "~Yuqing_Wang3;~Ye_He1;~Molei_Tao1", "aff": "Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology", "aff_domain": "gatech.edu;gatech.edu;gatech.edu", "position": "PhD student;Postdoc;Associate Professor", "bibtex": "@inproceedings{\nwang2024evaluating,\ntitle={Evaluating the design space of diffusion-based generative models},\nauthor={Yuqing Wang and Ye He and Molei Tao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=9CMOrofB75}\n}", "github": "", "reviewers": "1csf;Ti47;RRb9;5uDw", "pdf_size": 1040447, "rating": "5;5;6;7", "confidence": "3;4;4;3", "soundness": "3;3;4;4", "novelty": "2;2;3;3", "presentation": "1;2;2;4", "wc_summary": "34;70;56;36", "wc_strengths": "105;53;61;21", "wc_weaknesses": "243;216;162;54", "wc_questions": "111;106;198;46", "wc_limitations": "261;14;25;1", "wc_review": "754;459;502;158", "wc_reply_reviewers": "438;92;149;11", "wc_reply_authors": "1557;407;1012;0", "reply_reviewers": "2;2;1;1", "reply_authors": "3;2;3;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 1.0897247358851685 ], "wc_summary_avg": [ 49.0, 14.866068747318506 ], "wc_strengths_avg": [ 60.0, 29.9833287011299 ], "wc_weaknesses_avg": [ 168.75, 72.38568573965436 ], "wc_questions_avg": [ 115.25, 54.19121238724965 ], "wc_limitations_avg": [ 75.25, 107.57875022512577 ], "wc_review_avg": [ 468.25, 211.62038536020106 ], "wc_reply_reviewers_avg": [ 172.5, 160.9386529084918 ], "wc_reply_authors_avg": [ 744.0, 591.5864264839078 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10130128767907377258&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "gatech.edu;gatech.edu;gatech.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Georgia Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.gatech.edu", "aff_unique_abbr": "Georgia Tech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "SpikeReveal: Unlocking Temporal Sequences from Real Blurry Inputs with Spike Streams", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96319", "id": "9FYat8HPpv", "proceeding": "", "pdf": "https://openreview.net/pdf?id=9FYat8HPpv", "openreview": "https://openreview.net/forum?id=9FYat8HPpv", "poster": "/media/PosterPDFs/NeurIPS%202024/96319.png?t=1734010436.3356745", "project": "", "author_site": "Kang Chen, Shiyan Chen, Jiyuan Zhang, Baoyue Zhang, Yajing Zheng, Tiejun Huang, Zhaofei Yu", "tldr": "", "abstract": "Reconstructing a sequence of sharp images from the blurry input is crucial for enhancing our insights into the captured scene and poses a significant challenge due to the limited temporal features embedded in the image. Spike cameras, sampling at rates up to 40,000 Hz, have proven effective in capturing motion features and beneficial for solving this ill-posed problem. Nonetheless, existing methods fall into the supervised learning paradigm, which suffers from notable performance degradation when applied to real-world scenarios that diverge from the synthetic training data domain. To address these challenges, we propose the first self-supervised framework for the task of spike-guided motion deblurring. Our approach begins with the formulation of a spike-guided deblurring model that explores the theoretical relationships among spike streams, blurry images, and their corresponding sharp sequences. We subsequently develop a self-supervised cascaded framework to alleviate the issues of spike noise and spatial-resolution mismatching encountered in the deblurring model. With knowledge distillation and re-blurring loss, we further design a lightweight deblur network to generate high-quality sequences with brightness and texture consistency with the original input. Quantitative and qualitative experiments conducted on our real-world and synthetic datasets with spikes validate the superior generalization of the proposed framework. Our code, data and trained models are available at \\url{https://github.com/chenkang455/S-SDM}.", "keywords": "Motion Deblur;Spike Camera;Self-supervised", "primary_area": "machine_vision", "supplementary_material": "/attachment/efe6ef4c6cd32b07336f3f1f6bb1201c64ca63c1.zip", "author": "Kang Chen;Shiyan Chen;Jiyuan Zhang;Baoyue Zhang;Yajing Zheng;Tiejun Huang;Zhaofei Yu", "authorids": "~Kang_Chen9;~Shiyan_Chen1;~Jiyuan_Zhang3;~Baoyue_Zhang1;~Yajing_Zheng1;~Tiejun_Huang1;~Zhaofei_Yu1", "gender": "M;;M;F;F;M;M", "homepage": "https://chenkang455.github.io/;;;;https://zyj061.github.io;https://idm.pku.edu.cn/~tjhuang/;https://yuzhaofei.github.io", "dblp": ";;;97/10613.html;230/4398;h/TiejunHuang;166/0573", "google_scholar": "F5feBP4AAAAJ;;ukHrw0IAAAAJ;;_bUM0NcAAAAJ;https://scholar.google.com.tw/citations?user=knvEK4AAAAAJ;qaUgD50AAAAJ", "orcid": ";;;;;0000-0002-4234-6099;", "linkedin": ";;jiyuanzhang-leo;;;;", "or_profile": "~Kang_Chen9;~Shiyan_Chen1;~Jiyuan_Zhang3;~Baoyue_Zhang1;~Yajing_Zheng1;~Tiejun_Huang1;~Zhaofei_Yu1", "aff": "Peking University;;Peking University;Dalian University of Technology;Peking University;Peking University;Peking University", "aff_domain": "stu.pku.edu.cn;;pku.edu.cn;dlut.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn", "position": "PhD student;;PhD student;Undergrad student;Postdoc;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nchen2024spikereveal,\ntitle={SpikeReveal: Unlocking Temporal Sequences from Real Blurry Inputs with Spike Streams},\nauthor={Kang Chen and Shiyan Chen and Jiyuan Zhang and Baoyue Zhang and Yajing Zheng and Tiejun Huang and Zhaofei Yu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=9FYat8HPpv}\n}", "github": "", "reviewers": "RXBh;1hhy;G1PY;VbQb", "pdf_size": 13852129, "rating": "5;6;6;7", "confidence": "4;4;5;5", "soundness": "2;3;3;4", "novelty": "2;3;3;4", "presentation": "3;3;4;4", "wc_summary": "52;56;28;87", "wc_strengths": "41;56;22;71", "wc_weaknesses": "34;28;94;61", "wc_questions": "3;126;15;36", "wc_limitations": "1;57;3;6", "wc_review": "131;323;162;261", "wc_reply_reviewers": "0;0;10;59", "wc_reply_authors": "0;0;27;34", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 55.75, 20.980645843252777 ], "wc_strengths_avg": [ 47.5, 18.145247311624054 ], "wc_weaknesses_avg": [ 54.25, 26.099568961957974 ], "wc_questions_avg": [ 45.0, 48.23380557244058 ], "wc_limitations_avg": [ 16.75, 23.306383245797704 ], "wc_review_avg": [ 219.25, 76.76709907245422 ], "wc_reply_reviewers_avg": [ 17.25, 24.44764814864612 ], "wc_reply_authors_avg": [ 15.25, 15.449514555480375 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.7071067811865476, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14599303082559697418&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "stu.pku.edu.cn;;pku.edu.cn;dlut.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn", "author_num": 7, "aff_unique_index": "0;0;1;0;0;0", "aff_unique_norm": "Peking University;Dalian University of Technology", "aff_unique_dep": ";", "aff_unique_url": "http://www.pku.edu.cn;http://www.dlut.edu.cn/", "aff_unique_abbr": "Peking U;DUT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Leveraging Hallucinations to Reduce Manual Prompt Dependency in Promptable Segmentation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96318", "id": "9GhSOp1LYH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=9GhSOp1LYH", "openreview": "https://openreview.net/forum?id=9GhSOp1LYH", "poster": "/media/PosterPDFs/NeurIPS%202024/96318.png?t=1731495041.3755798", "project": "", "author_site": "Jian Hu, Jiayi Lin, Junchi Yan, Shaogang Gong", "tldr": "", "abstract": "Promptable segmentation typically requires instance-specific manual prompts to guide the segmentation of each desired object. To minimize such a need, task-generic promptable segmentation has been introduced, which employs a single task-generic prompt to segment various images of different objects in the same task. Current methods use Multimodal Large Language Models (MLLMs) to reason detailed instance-specific prompts from a task-generic prompt for improving segmentation accuracy. The effectiveness of this segmentation heavily depends on the precision of these derived prompts. However, MLLMs often suffer hallucinations during reasoning, resulting in inaccurate prompting. While existing methods focus on eliminating hallucinations to improve a model, we argue that MLLM hallucinations can reveal valuable contextual insights when leveraged correctly, as they represent pre-trained large-scale knowledge beyond individual images. In this paper, we first utilize hallucinations to mine task-related information from images and verify its accuracy to enhance precision of the generated prompts. Specifically, we introduce an iterative \\textbf{Pro}mpt-\\textbf{Ma}sk \\textbf{C}ycle generation framework (ProMaC) with a prompt generator and a mask generator. The prompt generator uses a multi-scale chain of thought prompting, initially leveraging hallucinations to extract extended contextual prompts on a test image. These hallucinations are then minimized to formulate precise instance-specific prompts, directing the mask generator to produce masks that are consistent with task semantics by mask semantic alignment. Iteratively the generated masks induce the prompt generator to focus more on task-relevant image areas and reduce irrelevant hallucinations, resulting jointly in better prompts and masks. Experiments on 5 benchmarks demonstrate the effectiveness of ProMaC. Code is in https://lwpyh.github.io/ProMaC/.", "keywords": "Camouflaged Object Detection; Transfer Learning; Test-time Domain Adaptation; Manual-free Promptable Segmentation; Unsupervised Learning", "primary_area": "machine_vision", "supplementary_material": "/attachment/5ad917c5ea3173b5ad09c378f1cea19f4aa8e70a.zip", "author": "Jian Hu;Jiayi Lin;Junchi Yan;Shaogang Gong", "authorids": "~Jian_Hu4;~Jiayi_Lin2;~Junchi_Yan2;~Shaogang_Gong2", "gender": "M;F;;", "homepage": "https://lwpyh.github.io/;https://jylin8100.github.io/;;", "dblp": "61/5788-2;238/1997-2;;", "google_scholar": "unJmXtoAAAAJ;l4Fps4EAAAAJ;;", "orcid": "0000-0001-9918-672X;;;", "linkedin": ";jiayi-lin-922947263/;;", "or_profile": "~Jian_Hu4;~Jiayi_Lin2;~Junchi_Yan2;~Shaogang_Gong2", "aff": "Queen Mary, University of London;Queen Mary University of London;;", "aff_domain": "qmul.ac.uk;qmul.ac.uk;;", "position": "PhD student;PhD student;;", "bibtex": "@inproceedings{\nhu2024leveraging,\ntitle={Leveraging Hallucinations to Reduce Manual Prompt Dependency in Promptable Segmentation},\nauthor={Jian Hu and Jiayi Lin and Junchi Yan and Shaogang Gong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=9GhSOp1LYH}\n}", "github": "", "reviewers": "E6it;u8FA;E5Cz;fu4N", "pdf_size": 31575546, "rating": "6;6;6;7", "confidence": "5;4;4;5", "soundness": "3;3;3;4", "novelty": "3;3;3;4", "presentation": "3;2;3;3", "wc_summary": "44;89;104;53", "wc_strengths": "191;72;46;113", "wc_weaknesses": "199;255;195;83", "wc_questions": "3;64;4;126", "wc_limitations": "3;9;26;84", "wc_review": "440;489;375;459", "wc_reply_reviewers": "24;22;19;12", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 72.5, 24.78406746278746 ], "wc_strengths_avg": [ 105.5, 54.83839895547644 ], "wc_weaknesses_avg": [ 183.0, 62.41794613730894 ], "wc_questions_avg": [ 49.25, 50.73152373031979 ], "wc_limitations_avg": [ 30.5, 32.01952529317073 ], "wc_review_avg": [ 440.75, 41.78740839056665 ], "wc_reply_reviewers_avg": [ 19.25, 4.548351349665063 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=467195812460304069&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "qmul.ac.uk;qmul.ac.uk;;", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Queen Mary, University of London;Queen Mary University of London", "aff_unique_dep": ";", "aff_unique_url": "https://www.qmul.ac.uk;https://www.qmul.ac.uk", "aff_unique_abbr": "QMUL;QMUL", "aff_campus_unique_index": "0;0", "aff_campus_unique": "London", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "title": "Spectral-Risk Safe Reinforcement Learning with Convergence Guarantees", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96317", "id": "9JFSJitKC0", "proceeding": "", "pdf": "https://openreview.net/pdf?id=9JFSJitKC0", "openreview": "https://openreview.net/forum?id=9JFSJitKC0", "poster": "/media/PosterPDFs/NeurIPS%202024/96317.png?t=1731751293.8260179", "project": "", "author_site": "Dohyeong Kim, Taehyun Cho, Seungyub Han, Hojun Chung, Kyungjae Lee, Songhwai Oh", "tldr": "", "abstract": "The field of risk-constrained reinforcement learning (RCRL) has been developed to effectively reduce the likelihood of worst-case scenarios by explicitly handling risk-measure-based constraints.\nHowever, the nonlinearity of risk measures makes it challenging to achieve convergence and optimality.\nTo overcome the difficulties posed by the nonlinearity, we propose a spectral risk measure-constrained RL algorithm, spectral-risk-constrained policy optimization (SRCPO), a bilevel optimization approach that utilizes the duality of spectral risk measures.\nIn the bilevel optimization structure, the outer problem involves optimizing dual variables derived from the risk measures, while the inner problem involves finding an optimal policy given these dual variables.\nThe proposed method, to the best of our knowledge, is the first to guarantee convergence to an optimum in the tabular setting.\nFurthermore, the proposed method has been evaluated on continuous control tasks and showed the best performance among other RCRL algorithms satisfying the constraints.\nOur code is available at https://github.com/rllab-snu/Spectral-Risk-Constrained-RL.", "keywords": "Safe Reinforcement Learning;Risk Constraint;Spectral Risk Measure", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/eba42c5f32032d385167a7aeb19da89175d30573.zip", "author": "Dohyeong Kim;Taehyun Cho;Seungyub Han;Hojun Chung;Kyungjae Lee;Songhwai Oh", "authorids": "~Dohyeong_Kim1;~Taehyun_Cho1;~Seungyub_Han1;~Hojun_Chung1;~Kyungjae_Lee1;~Songhwai_Oh1", "gender": "M;M;M;M;M;", "homepage": "https://dobro12.github.io/;;;https://rllab.snu.ac.kr/people/hojun-chung/profile;https://sites.google.com/view/kyungjaelee;https://rllab.snu.ac.kr/", "dblp": "126/4248;274/0287;347/8731;;13/7265-1;17/3173", "google_scholar": ";https://scholar.google.com/citations?view_op=list_works;ot1-XNAAAAAJ;;https://scholar.google.co.kr/citations?user=OZZJagIAAAAJ;VEzNY_oAAAAJ", "orcid": "0000-0003-0788-6089;0000-0003-1047-9847;0009-0001-8704-8968;;0000-0003-0147-2715;0000-0002-9781-2018", "linkedin": ";;;;;", "or_profile": "~Dohyeong_Kim1;~Taehyun_Cho1;~Seungyub_Han1;~Hojun_Chung1;~Kyungjae_Lee1;~Songhwai_Oh1", "aff": "Seoul National University;Seoul National University;Seoul National University;Seoul National University;ChungAng University;Seoul National University", "aff_domain": "snu.ac.kr;snu.ac.kr;snu.ac.kr;snu.ac.kr;cau.ac.kr;snu.ac.kr", "position": "PhD student;PhD student;PhD student;PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nkim2024spectralrisk,\ntitle={Spectral-Risk Safe Reinforcement Learning with Convergence Guarantees},\nauthor={Dohyeong Kim and Taehyun Cho and Seungyub Han and Hojun Chung and Kyungjae Lee and Songhwai Oh},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=9JFSJitKC0}\n}", "github": "", "reviewers": "DPN9;tq31;WtS2;s1fZ", "pdf_size": 2600582, "rating": "7;7;7;7", "confidence": "3;3;4;2", "soundness": "3;3;3;4", "novelty": "3;3;3;4", "presentation": "3;3;3;3", "wc_summary": "88;105;107;59", "wc_strengths": "37;70;20;71", "wc_weaknesses": "37;185;19;126", "wc_questions": "134;5;141;8", "wc_limitations": "3;5;1;11", "wc_review": "299;370;288;275", "wc_reply_reviewers": "121;49;76;14", "wc_reply_authors": "114;143;57;0", "reply_reviewers": "3;2;2;1", "reply_authors": "3;2;2;1", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 89.75, 19.22725929507375 ], "wc_strengths_avg": [ 49.5, 21.84605227495348 ], "wc_weaknesses_avg": [ 91.75, 67.37720311796862 ], "wc_questions_avg": [ 72.0, 65.55532015023647 ], "wc_limitations_avg": [ 5.0, 3.7416573867739413 ], "wc_review_avg": [ 308.0, 36.78994427829431 ], "wc_reply_reviewers_avg": [ 65.0, 39.096035604649224 ], "wc_reply_authors_avg": [ 78.5, 54.874857630794814 ], "reply_reviewers_avg": [ 2.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:rBta8LDnvZAJ:scholar.google.com/&scioq=Spectral-Risk+Safe+Reinforcement+Learning+with+Convergence+Guarantees&hl=en&as_sdt=0,33", "gs_version_total": 6, "email": "snu.ac.kr;snu.ac.kr;snu.ac.kr;snu.ac.kr;cau.ac.kr;snu.ac.kr", "author_num": 6, "aff_unique_index": "0;0;0;0;1;0", "aff_unique_norm": "Seoul National University;Chungang University", "aff_unique_dep": ";", "aff_unique_url": "https://www.snu.ac.kr;http://www.cau.ac.kr", "aff_unique_abbr": "SNU;CAU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Optimization Algorithm Design via Electric Circuits", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96316", "id": "9Jmt1eER9P", "proceeding": "", "pdf": "https://openreview.net/pdf?id=9Jmt1eER9P", "openreview": "https://openreview.net/forum?id=9Jmt1eER9P", "poster": "", "project": "", "author_site": "Stephen Boyd, Tetiana Parshakova, Ernest Ryu, Jaewook J. Suh", "tldr": "", "abstract": "We present a novel methodology for convex optimization algorithm design using ideas from electric RLC circuits. Given an optimization problem, the first stage of the methodology is to design an appropriate electric circuit whose continuous-time dynamics converge to the solution of the optimization problem at hand. Then, the second stage is an automated, computer-assisted discretization of the continuous-time dynamics, yielding a provably convergent discrete-time algorithm. Our methodology recovers many classical (distributed) optimization algorithms and enables users to quickly design and explore a wide range of new algorithms with convergence guarantees.", "keywords": "Convex optimization;Distributed optimization;Decentralized optimization;ADMM;Alternating direction method of multipliers;PG-EXTRA;Performance estimation problem;Continuous-time analysis;first-order optimization;proximal methods", "primary_area": "optimization", "supplementary_material": "", "author": "Stephen P. Boyd;Tetiana Parshakova;Ernest K. Ryu;Jaewook J. Suh", "authorids": "~Stephen_P._Boyd1;~Tetiana_Parshakova1;~Ernest_K._Ryu1;~Jaewook_J._Suh1", "gender": ";;M;M", "homepage": "https://web.stanford.edu/~boyd/;https://parshakova.github.io/;http://www.math.snu.ac.kr/~ernestryu/;https://jaewookjsuh.github.io/", "dblp": "b/SPBoyd;;165/5192;323/9242", "google_scholar": "GExyiRkAAAAJ;;CNOqUZoAAAAJ;https://scholar.google.com/citations?hl=ko", "orcid": "0000-0001-8353-6000;;0000-0001-6820-9095;", "linkedin": ";;;", "or_profile": "~Stephen_P._Boyd1;~Tetiana_Parshakova1;~Ernest_K._Ryu1;~Jaewook_J._Suh1", "aff": ";Stanford University;Seoul National University;Seoul National University", "aff_domain": ";stanford.edu;snu.ac.kr;snu.ac.kr", "position": ";PhD student;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nboyd2024optimization,\ntitle={Optimization Algorithm Design via Electric Circuits},\nauthor={Stephen P. Boyd and Tetiana Parshakova and Ernest K. Ryu and Jaewook J. Suh},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=9Jmt1eER9P}\n}", "github": "", "reviewers": "UMkv;iKJ3;bgKe;a4Xt", "pdf_size": 999103, "rating": "6;6;6;7", "confidence": "2;4;2;2", "soundness": "3;2;3;3", "novelty": "3;2;3;4", "presentation": "3;3;2;3", "wc_summary": "57;39;64;56", "wc_strengths": "86;37;60;70", "wc_weaknesses": "107;184;31;64", "wc_questions": "133;88;160;69", "wc_limitations": "11;21;1;18", "wc_review": "394;369;316;277", "wc_reply_reviewers": "82;18;51;17", "wc_reply_authors": "0;0;662;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;2;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 2.5, 0.8660254037844386 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 54.0, 9.192388155425117 ], "wc_strengths_avg": [ 63.25, 17.76759691123141 ], "wc_weaknesses_avg": [ 96.5, 57.256004051976944 ], "wc_questions_avg": [ 112.5, 35.947878936037384 ], "wc_limitations_avg": [ 12.75, 7.693341276714559 ], "wc_review_avg": [ 339.0, 45.54667935206693 ], "wc_reply_reviewers_avg": [ 42.0, 26.842131062939096 ], "wc_reply_authors_avg": [ 165.5, 286.6544086526492 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1922612850876108298&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": ";stanford.edu;snu.ac.kr;snu.ac.kr", "author_num": 4, "aff_unique_index": "0;1;1", "aff_unique_norm": "Stanford University;Seoul National University", "aff_unique_dep": ";", "aff_unique_url": "https://www.stanford.edu;https://www.snu.ac.kr", "aff_unique_abbr": "Stanford;SNU", "aff_campus_unique_index": "0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United States;South Korea" }, { "title": "Stability and Generalization of Adversarial Training for Shallow Neural Networks with Smooth Activation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96315", "id": "9Nsa4lVZeD", "proceeding": "", "pdf": "https://openreview.net/pdf?id=9Nsa4lVZeD", "openreview": "https://openreview.net/forum?id=9Nsa4lVZeD", "poster": "/media/PosterPDFs/NeurIPS%202024/96315.png?t=1733680018.019378", "project": "", "author_site": "Kaibo Zhang, Yunjuan Wang, Raman Arora", "tldr": "", "abstract": "Adversarial training has emerged as a popular approach for training models that are robust to inference-time adversarial attacks. However, our theoretical understanding of why and when it works remains limited. Prior work has offered generalization analysis of adversarial training, but they are either restricted to the Neural Tangent Kernel (NTK) regime or they make restrictive assumptions about data such as (noisy) linear separability or robust realizability. In this work, we study the stability and generalization of adversarial training for two-layer networks **without any data distribution assumptions** and **beyond the NTK regime**. Our findings suggest that for networks with *any given initialization* and *sufficiently large width*, the generalization bound can be effectively controlled via early stopping. We further improve the generalization bound by leveraging smoothing using Moreau\u2019s envelope.", "keywords": "Stability;adversarial training;neural networks;optimization and generalization guarantees;Moreau envelope;convexity;smoothness", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Kaibo Zhang;Yunjuan Wang;Raman Arora", "authorids": "~Kaibo_Zhang3;~Yunjuan_Wang1;~Raman_Arora1", "gender": "M;F;M", "homepage": ";https://yunjuanwang.github.io/;http://www.cs.jhu.edu/~raman/Home.html", "dblp": "72/9686;31/560;", "google_scholar": ";t_VSEEwAAAAJ;Spe0xdkAAAAJ", "orcid": ";;", "linkedin": "kaibo-zhang-97b911230/;yunjuan-wang-12ab85169/;", "or_profile": "~Kaibo_Zhang3;~Yunjuan_Wang1;~Raman_Arora1", "aff": "Department of Computer Science, Whiting School of Engineering;Johns Hopkins University;Johns Hopkins University", "aff_domain": "cs.jhu.edu;jhu.edu;jhu.edu", "position": "PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nzhang2024stability,\ntitle={Stability and Generalization of Adversarial Training for Shallow Neural Networks with Smooth Activation},\nauthor={Kaibo Zhang and Yunjuan Wang and Raman Arora},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=9Nsa4lVZeD}\n}", "github": "", "reviewers": "381R;GasY;fRZq;RLPb", "pdf_size": 424921, "rating": "6;6;6;7", "confidence": "3;4;3;3", "soundness": "3;4;3;2", "novelty": "3;2;3;3", "presentation": "3;3;3;3", "wc_summary": "60;95;53;40", "wc_strengths": "44;90;45;95", "wc_weaknesses": "202;109;59;335", "wc_questions": "24;83;36;60", "wc_limitations": "21;8;1;8", "wc_review": "351;385;194;538", "wc_reply_reviewers": "42;39;47;102", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 62.0, 20.359273071502333 ], "wc_strengths_avg": [ 68.5, 24.06761309311748 ], "wc_weaknesses_avg": [ 176.25, 105.04136090131354 ], "wc_questions_avg": [ 50.75, 22.68672519338126 ], "wc_limitations_avg": [ 9.5, 7.22841614740048 ], "wc_review_avg": [ 367.0, 122.21906561580316 ], "wc_reply_reviewers_avg": [ 57.5, 25.85053190942113 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:BihlzhQUT2QJ:scholar.google.com/&scioq=Stability+and+Generalization+of+Adversarial+Training+for+Shallow+Neural+Networks+with+Smooth+Activation&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": "cs.jhu.edu;jhu.edu;jhu.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Johns Hopkins University", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.jhu.edu", "aff_unique_abbr": "JHU", "aff_campus_unique_index": "0", "aff_campus_unique": "Baltimore;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Weisfeiler and Leman Go Loopy: A New Hierarchy for Graph Representational Learning", "status": "Oral", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96314", "id": "9O2sVnEHor", "proceeding": "", "pdf": "https://openreview.net/pdf?id=9O2sVnEHor", "openreview": "https://openreview.net/forum?id=9O2sVnEHor", "poster": "", "project": "", "author_site": "Raffaele Paolino, Sohir Maskey, Pascal Welke, Gitta Kutyniok", "tldr": "", "abstract": "We introduce $r$-loopy Weisfeiler-Leman ($r$-$\\ell$WL), a novel hierarchy of graph isomorphism tests and a corresponding GNN framework, $r$-$\\ell$MPNN, that can count cycles up to length $r{+}2$. Most notably, we show that $r$-$\\ell$WL can count homomorphisms of cactus graphs. This extends 1-WL, which can only count homomorphisms of trees and, in fact, is incomparable to $k$-WL for any fixed $k$. We empirically validate the expressive and counting power of $r$-$\\ell$MPNN on several synthetic datasets and demonstrate the scalability and strong performance on various real-world datasets, particularly on sparse graphs.", "keywords": "Graph Neural Networks;Weisfeiler-Leman (WL) Test;Homomorphism Counting;Theory and Expressivity in GNNs;Cactus Graphs", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Raffaele Paolino;Sohir Maskey;Pascal Welke;Gitta Kutyniok", "authorids": "~Raffaele_Paolino1;~Sohir_Maskey1;~Pascal_Welke1;~Gitta_Kutyniok2", "gender": "M;;M;F", "homepage": ";https://www.sohirmaskey.com/;https://pwelke.de;https://www.ai.math.lmu.de/kutyniok", "dblp": "331/2660;302/4278;174/0119;13/2736", "google_scholar": "uF27REUAAAAJ;3KpzqLMAAAAJ;https://scholar.google.de/citations?user=hgwvC6gAAAAJ;https://scholar.google.de/citations?user=JHs9LssAAAAJ", "orcid": ";0000-0002-9691-6712;0000-0002-2123-3781;0000-0001-9738-2487", "linkedin": "raffaele-paolino-717653202;;;gitta-kutyniok-2606b215/?originalSubdomain=de", "or_profile": "~Raffaele_Paolino1;~Sohir_Maskey1;~Pascal_Welke1;~Gitta_Kutyniok2", "aff": "Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;University of Munich;TU Wien;LMU Munich", "aff_domain": "lmu.de;math.lmu;tuwien.ac.at;uni-muenchen.de", "position": "PhD student;PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\npaolino2024weisfeiler,\ntitle={Weisfeiler and Leman Go Loopy: A New Hierarchy for Graph Representational Learning},\nauthor={Raffaele Paolino and Sohir Maskey and Pascal Welke and Gitta Kutyniok},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=9O2sVnEHor}\n}", "github": "", "reviewers": "rfpS;zkXv;pJ5G;1SHr", "pdf_size": 712194, "rating": "7;7;7;8", "confidence": "4;4;4;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;4", "wc_summary": "25;84;40;61", "wc_strengths": "44;84;15;117", "wc_weaknesses": "31;239;53;59", "wc_questions": "62;7;30;99", "wc_limitations": "7;11;1;44", "wc_review": "169;425;139;380", "wc_reply_reviewers": "23;89;0;33", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 52.5, 22.23173407541571 ], "wc_strengths_avg": [ 65.0, 38.749193539995126 ], "wc_weaknesses_avg": [ 95.5, 83.50299395830068 ], "wc_questions_avg": [ 49.5, 34.61574786134195 ], "wc_limitations_avg": [ 15.75, 16.69393602479655 ], "wc_review_avg": [ 278.25, 125.71271813146035 ], "wc_reply_reviewers_avg": [ 36.25, 32.72136152423979 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4232155808498782917&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 10, "email": "lmu.de;math.lmu;tuwien.ac.at;uni-muenchen.de", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;University of Munich;Technische Universit\u00e4t Wien;Ludwig Maximilian University of Munich", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.lmu.de;https://www.uni-muenchen.de;https://www.tuwien.ac.at;https://www.lmu.de", "aff_unique_abbr": "LMU;LMU;TU Wien;LMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Munich", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Germany;Austria" }, { "title": "Aligning Model Properties via Conformal Risk Control", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96313", "id": "9OHXQybMZB", "proceeding": "", "pdf": "https://openreview.net/pdf?id=9OHXQybMZB", "openreview": "https://openreview.net/forum?id=9OHXQybMZB", "poster": "/media/PosterPDFs/NeurIPS%202024/96313.png?t=1731652968.3660765", "project": "", "author_site": "William Overman, Jacqueline Vallon, Mohsen Bayati", "tldr": "", "abstract": "AI model alignment is crucial due to inadvertent biases in training data and the underspecified machine learning pipeline, where models with excellent test metrics may not meet end-user requirements. While post-training alignment via human feedback shows promise, these methods are often limited to generative AI settings where humans can interpret and provide feedback on model outputs. In traditional non-generative settings with numerical or categorical outputs, detecting misalignment through single-sample outputs remains challenging, and enforcing alignment during training requires repeating costly training processes.\nIn this paper we consider an alternative strategy. We propose interpreting model alignment through property testing, defining an aligned model $f$ as one belonging to a subset $\\mathcal{P}$ of functions that exhibit specific desired behaviors. We focus on post-processing a pre-trained model $f$ to better align with $\\mathcal{P}$ using conformal risk control. Specifically, we develop a general procedure for converting queries for testing a given property $\\mathcal{P}$ to a collection of loss functions suitable for use in a conformal risk control algorithm. We prove a probabilistic guarantee that the resulting conformal interval around $f$ contains a function approximately satisfying $\\mathcal{P}$. We exhibit applications of our methodology on a collection of supervised learning datasets for (shape-constrained) properties such as monotonicity and concavity. The general procedure is flexible and can be applied to a wide range of desired properties. Finally, we prove that pre-trained models will always require alignment techniques even as model sizes or training data increase, as long as the training data contains even small biases.", "keywords": "Alignment;Conformal Prediction;Conformal Risk Control;Property Testing", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "William Overman;Jacqueline Jil Vallon;Mohsen Bayati", "authorids": "~William_Overman1;~Jacqueline_Jil_Vallon1;~Mohsen_Bayati1", "gender": ";;M", "homepage": ";https://web.stanford.edu/~bayati/;https://willoverman.github.io/", "dblp": ";73/6405;294/4924", "google_scholar": ";PS-TM94AAAAJ;B2XPxEkAAAAJ", "orcid": "0000-0001-5756-8527;;", "linkedin": ";;", "or_profile": "~Jacqueline_Jil_Vallon1;~Mohsen_Bayati1;~Will_Overman1", "aff": "Stanford University;Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu", "position": "PhD student;Full Professor;PhD student", "bibtex": "@inproceedings{\noverman2024aligning,\ntitle={Aligning Model Properties via Conformal Risk Control},\nauthor={William Overman and Jacqueline Jil Vallon and Mohsen Bayati},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=9OHXQybMZB}\n}", "github": "", "reviewers": "9EgD;8oYs;Hsyg;7tC7", "pdf_size": 492494, "rating": "4;5;7;7", "confidence": "4;4;4;3", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;4;3", "wc_summary": "65;62;96;75", "wc_strengths": "30;52;116;46", "wc_weaknesses": "101;103;97;47", "wc_questions": "1;77;169;4", "wc_limitations": "1;57;62;58", "wc_review": "198;351;540;230", "wc_reply_reviewers": "0;22;0;98", "wc_reply_authors": "62;28;0;46", "reply_reviewers": "0;1;0;1", "reply_authors": "2;2;1;2", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 74.5, 13.313526955694348 ], "wc_strengths_avg": [ 61.0, 32.7566787083184 ], "wc_weaknesses_avg": [ 87.0, 23.194827009486403 ], "wc_questions_avg": [ 62.75, 68.47764233675105 ], "wc_limitations_avg": [ 44.5, 25.184320518926057 ], "wc_review_avg": [ 329.75, 134.13123238082918 ], "wc_reply_reviewers_avg": [ 30.0, 40.27406113120453 ], "wc_reply_authors_avg": [ 34.0, 23.021728866442675 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5555555555555555, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17799455135971237757&as_sdt=20000005&sciodt=0,21&hl=en", "gs_version_total": 3, "email": "stanford.edu;stanford.edu;stanford.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "UMB: Understanding Model Behavior for Open-World Object Detection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96312", "id": "9Pa6cCB3gL", "proceeding": "", "pdf": "https://openreview.net/pdf?id=9Pa6cCB3gL", "openreview": "https://openreview.net/forum?id=9Pa6cCB3gL", "poster": "/media/PosterPDFs/NeurIPS%202024/96312.png?t=1731238312.388307", "project": "", "author_site": "Xing Xi, Yangyang Huang, Zhijie Zhong, Ronghua Luo", "tldr": "", "abstract": "Open-World Object Detection (OWOD) is a challenging task that requires the detector to identify unlabeled objects and continuously demands the detector to learn new knowledge based on existing ones. Existing methods primarily focus on recalling unknown objects, neglecting to explore the reasons behind them. This paper aims to understand the model's behavior in predicting the unknown category. First, we model the text attribute and the positive sample probability, obtaining their empirical probability, which can be seen as the detector's estimation of the likelihood of the target with certain known attributes being predicted as the foreground. Then, we jointly decide whether the current object should be categorized in the unknown category based on the empirical, the in-distribution, and the out-of-distribution probability. Finally, based on the decision-making process, we can infer the similarity of an unknown object to known classes and identify the attribute with the most significant impact on the decision-making process. This additional information can help us understand the behavior of the model's prediction in the unknown class. The evaluation results on the Real-World Object Detection (RWD) benchmark, which consists of five real-world application datasets, show that we surpassed the previous state-of-the-art (SOTA) with an absolute gain of 5.3 mAP for unknown classes, reaching 20.5 mAP. Our code is available at https://github.com/xxyzll/UMB.", "keywords": "Computer Vision;Object Detection;Open-World Object Detection", "primary_area": "machine_vision", "supplementary_material": "", "author": "Xing Xi;Yangyang Huang;Zhijie Zhong;Ronghua Luo", "authorids": "~Xing_Xi1;~Yangyang_Huang1;~Zhijie_Zhong2;~Ronghua_Luo1", "gender": "M;M;M;M", "homepage": ";;https://github.com/EmorZz1G;", "dblp": ";192/3151;;", "google_scholar": "5PGppCcAAAAJ;;rn_Xd0kAAAAJ;", "orcid": "0000-0001-9069-2626;0009-0001-8694-5755;0000-0003-0203-8419;0000-0001-8629-3323", "linkedin": ";;;", "or_profile": "~Xing_Xi1;~Yangyang_Huang1;~Zhijie_Zhong2;~Ronghua_Luo1", "aff": "South China University of Technology;South China University of Technology;South China University of Technology;South China University of Technology", "aff_domain": "scut.edu.cn;scut.edu.cn;scut.edu.cn;scut.edu.cn", "position": "PhD student;PhD student;MS student;Assistant Professor", "bibtex": "@inproceedings{\nxi2024umb,\ntitle={{UMB}: Understanding Model Behavior for Open-World Object Detection},\nauthor={Xing Xi and Yangyang Huang and Zhijie Zhong and Ronghua Luo},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=9Pa6cCB3gL}\n}", "github": "", "reviewers": "jKzy;sXkA;N6Np;bGxY", "pdf_size": 18403578, "rating": "5;6;6;8", "confidence": "4;3;4;3", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "3;2;2;3", "wc_summary": "110;100;100;49", "wc_strengths": "25;41;44;16", "wc_weaknesses": "43;32;109;10", "wc_questions": "2;248;75;41", "wc_limitations": "1;5;29;1", "wc_review": "181;426;357;117", "wc_reply_reviewers": "0;49;20;20", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 89.75, 23.878599205146017 ], "wc_strengths_avg": [ 31.5, 11.5 ], "wc_weaknesses_avg": [ 48.5, 36.89512162874653 ], "wc_questions_avg": [ 91.5, 93.97473064606251 ], "wc_limitations_avg": [ 9.0, 11.661903789690601 ], "wc_review_avg": [ 270.25, 125.73260317037901 ], "wc_reply_reviewers_avg": [ 22.25, 17.469616481193857 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.6882472016116854, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:3pDhIv07ucIJ:scholar.google.com/&scioq=UMB:+Understanding+Model+Behavior+for+Open-World+Object+Detection&hl=en&as_sdt=0,33", "gs_version_total": 2, "email": "scut.edu.cn;scut.edu.cn;scut.edu.cn;scut.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "South China University of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.scut.edu.cn", "aff_unique_abbr": "SCUT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "MSPE: Multi-Scale Patch Embedding Prompts Vision Transformers to Any Resolution", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96311", "id": "9Q9UiAyV40", "proceeding": "", "pdf": "https://openreview.net/pdf?id=9Q9UiAyV40", "openreview": "https://openreview.net/forum?id=9Q9UiAyV40", "poster": "", "project": "", "author_site": "Wenzhuo Liu, Fei Zhu, Shijie Ma, Cheng-lin Liu", "tldr": "", "abstract": "Although Vision Transformers (ViTs) have recently advanced computer vision tasks significantly, an important real-world problem was overlooked: adapting to variable input resolutions. Typically, images are resized to a fixed resolution, such as 224x224, for efficiency during training and inference. However, uniform input size conflicts with real-world scenarios where images naturally vary in resolution. Modifying the preset resolution of a model may severely degrade the performance. In this work, we propose to enhance the model adaptability to resolution variation by optimizing the patch embedding. The proposed method, called Multi-Scale Patch Embedding (MSPE), substitutes the standard patch embedding with multiple variable-sized patch kernels and selects the best parameters for different resolutions, eliminating the need to resize the original image. Our method does not require high-cost training or modifications to other parts, making it easy to apply to most ViT models. Experiments in image classification, segmentation, and detection tasks demonstrate the effectiveness of MSPE, yielding superior performance on low-resolution inputs and performing comparably on high-resolution inputs with existing methods.", "keywords": "Multi-Resolution Vision Model;Vision Transformers;Multi-Scale Patch Embedding", "primary_area": "machine_vision", "supplementary_material": "", "author": "Wenzhuo Liu;Fei Zhu;Shijie Ma;Cheng-Lin Liu", "authorids": "~Wenzhuo_Liu5;~Fei_Zhu1;~Shijie_Ma1;~Cheng-Lin_Liu2", "gender": "M;M;M;M", "homepage": "http://www.nlpr.ia.ac.cn/pal/People/ZhuFei.html;https://mashijie1028.github.io/;http://www.nlpr.ia.ac.cn/liucl/;http://www.nlpr.ia.ac.cn/pal/People/LiuWenZhuo.html", "dblp": ";191/4553;24/3006-1.html;", "google_scholar": "fjZ1CBwAAAAJ;https://scholar.google.com/citations?hl=en;8r3y8IMAAAAJ;", "orcid": ";0009-0005-1131-5686;0000-0002-6743-4175;", "linkedin": ";;;", "or_profile": "~Fei_Zhu1;~Shijie_Ma1;~Cheng-lin_Liu1;~WenZhuo_Liu4", "aff": "Centre for Artificial Intelligence and Robotics Hong Kong Institute of Science & Innovation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;University of Chinese Academy of Sciences", "aff_domain": "cair-cas.org.hk;ia.ac.cn;ia.ac.cn;ucas.ac.cn", "position": "Postdoc;PhD student;Full Professor;PhD student", "bibtex": "@inproceedings{\nliu2024mspe,\ntitle={{MSPE}: Multi-Scale Patch Embedding Prompts Vision Transformers to Any Resolution},\nauthor={Wenzhuo Liu and Fei Zhu and Shijie Ma and Cheng-Lin Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=9Q9UiAyV40}\n}", "github": "", "reviewers": "gLBy;tsgU;9L8r;RdsV", "pdf_size": 1818093, "rating": "5;6;7;7", "confidence": "4;4;4;4", "soundness": "2;3;3;4", "novelty": "2;2;3;4", "presentation": "2;3;3;4", "wc_summary": "55;66;78;32", "wc_strengths": "44;125;101;19", "wc_weaknesses": "164;115;82;1", "wc_questions": "74;2;26;29", "wc_limitations": "1;8;1;1", "wc_review": "338;316;288;82", "wc_reply_reviewers": "12;12;30;28", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 57.75, 16.946607330082326 ], "wc_strengths_avg": [ 72.25, 42.55217385751285 ], "wc_weaknesses_avg": [ 90.5, 59.340121334557445 ], "wc_questions_avg": [ 32.75, 26.013217794036937 ], "wc_limitations_avg": [ 2.75, 3.031088913245535 ], "wc_review_avg": [ 256.0, 102.00980345045274 ], "wc_reply_reviewers_avg": [ 20.5, 8.52936105461599 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13291306014389362424&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "cair-cas.org.hk;ia.ac.cn;ia.ac.cn;ucas.ac.cn", "author_num": 4, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Hong Kong Institute of Science & Innovation;Chinese Academy of Sciences;University of Chinese Academy of Sciences", "aff_unique_dep": "Centre for Artificial Intelligence and Robotics;Institute of Automation;", "aff_unique_url": "http://www.hkisi.org;http://www.ia.cas.cn;http://www.ucas.ac.cn", "aff_unique_abbr": "HKISI;CAS;UCAS", "aff_campus_unique_index": "0", "aff_campus_unique": "Hong Kong;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Minimax Optimal and Computationally Efficient Algorithms for Distributionally Robust Offline Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96310", "id": "9SghPrjYU1", "proceeding": "", "pdf": "https://openreview.net/pdf?id=9SghPrjYU1", "openreview": "https://openreview.net/forum?id=9SghPrjYU1", "poster": "/media/PosterPDFs/NeurIPS%202024/96310.png?t=1731469174.8546586", "project": "", "author_site": "Zhishuai Liu, Pan Xu", "tldr": "", "abstract": "Distributionally robust offline reinforcement learning (RL), which seeks robust policy training against environment perturbation by modeling dynamics uncertainty, calls for function approximations when facing large state-action spaces. However, the consideration of dynamics uncertainty introduces essential nonlinearity and computational burden, posing unique challenges for analyzing and practically employing function approximation. Focusing on a basic setting where the nominal model and perturbed models are linearly parameterized, we propose minimax optimal and computationally efficient algorithms realizing function approximation and initiate the study on instance-dependent suboptimality analysis in the context of robust offline RL. Our results uncover that function approximation in robust offline RL is essentially distinct from and probably harder than that in standard offline RL. Our algorithms and theoretical results crucially depend on a novel function approximation mechanism incorporating variance information, a new procedure of suboptimality and estimation uncertainty decomposition, a quantification of the robust value function shrinkage, and a meticulously designed family of hard instances, which might be of independent interest.", "keywords": "offline reinforcement learning;distributionally robust Markov decision processes;function approximation", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Zhishuai Liu;Pan Xu", "authorids": "~Zhishuai_Liu1;~Pan_Xu1", "gender": "M;M", "homepage": ";https://panxulab.github.io/", "dblp": "276/4462;11/9718-2", "google_scholar": "bb_OeB4AAAAJ;UkYBx6YAAAAJ", "orcid": ";0000-0002-2559-8622", "linkedin": "zhishuai-liu-bab585195/;pan-xu-0931a2a6/", "or_profile": "~Zhishuai_Liu1;~Pan_Xu1", "aff": "Duke University;Duke University", "aff_domain": "duke.edu;duke.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nliu2024minimax,\ntitle={Minimax Optimal and Computationally Efficient Algorithms for Distributionally Robust Offline Reinforcement Learning},\nauthor={Zhishuai Liu and Pan Xu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=9SghPrjYU1}\n}", "github": "", "reviewers": "nZV5;d55R;Qkxw;mtm1", "pdf_size": 785588, "rating": "5;6;6;6", "confidence": "4;3;3;5", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;2;3;3", "wc_summary": "47;62;58;40", "wc_strengths": "17;38;25;43", "wc_weaknesses": "207;41;10;33", "wc_questions": "6;162;135;97", "wc_limitations": "10;7;8;9", "wc_review": "287;310;236;222", "wc_reply_reviewers": "141;75;23;15", "wc_reply_authors": "705;335;20;0", "reply_reviewers": "2;2;1;1", "reply_authors": "2;2;2;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 51.75, 8.728545125048045 ], "wc_strengths_avg": [ 30.75, 10.304731922762475 ], "wc_weaknesses_avg": [ 72.75, 78.34020359942907 ], "wc_questions_avg": [ 100.0, 58.9788097540125 ], "wc_limitations_avg": [ 8.5, 1.118033988749895 ], "wc_review_avg": [ 263.75, 36.030369134939484 ], "wc_reply_reviewers_avg": [ 63.5, 50.32643440578718 ], "wc_reply_authors_avg": [ 265.0, 286.6836235294929 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2026187253064729924&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "duke.edu;duke.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Duke University", "aff_unique_dep": "", "aff_unique_url": "https://www.duke.edu", "aff_unique_abbr": "Duke", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Generating Code World Models with Large Language Models Guided by Monte Carlo Tree Search", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96309", "id": "9SpWvX9ykp", "proceeding": "", "pdf": "https://openreview.net/pdf?id=9SpWvX9ykp", "openreview": "https://openreview.net/forum?id=9SpWvX9ykp", "poster": "/media/PosterPDFs/NeurIPS%202024/96309.png?t=1733956842.8623226", "project": "", "author_site": "Nicola Dainese, Matteo Merler, Minttu Alakuijala, Pekka Marttinen", "tldr": "", "abstract": "In this work we consider Code World Models, world models generated by a Large Language Model (LLM) in the form of Python code for model-based Reinforcement Learning (RL). Calling code instead of LLMs for planning has potential to be more precise, reliable, interpretable, and extremely efficient.\nHowever, writing appropriate Code World Models requires the ability to understand complex instructions, to generate exact code with non-trivial logic and to self-debug a long program with feedback from unit tests and environment trajectories. To address these challenges, we propose Generate, Improve and Fix with Monte Carlo Tree Search (GIF-MCTS), a new code generation strategy for LLMs. To test our approach in an offline RL setting, we introduce the Code World Models Benchmark (CWMB), a suite of program synthesis and planning tasks comprised of 18 diverse RL environments paired with corresponding textual descriptions and curated trajectories. GIF-MCTS surpasses all baselines on the CWMB and two other benchmarks, and we show that the Code World Models synthesized with it can be successfully used for planning, resulting in model-based RL agents with greatly improved sample efficiency and inference speed.", "keywords": "Large Language Models;code generation;MCTS;model-based reinforcement learning", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/64cd03a96bd4d080a869159353cc25dc069a27d3.zip", "author": "Nicola Dainese;Matteo Merler;Minttu Alakuijala;Pekka Marttinen", "authorids": "~Nicola_Dainese2;~Matteo_Merler1;~Minttu_Alakuijala1;~Pekka_Marttinen1", "gender": "M;M;;M", "homepage": ";;https://minttualakuijala.com;https://users.ics.aalto.fi/~pemartti/", "dblp": "362/8284;;295/8527;32/894", "google_scholar": "https://scholar.google.it/citations?hl=en;;WmrdmjIAAAAJ;id47-5cAAAAJ", "orcid": ";;;0000-0001-7078-7927", "linkedin": "nicola-dainese-b72a3015b/;matteo-merler-09962a234/;minttualakuijala/;", "or_profile": "~Nicola_Dainese2;~Matteo_Merler1;~Minttu_Alakuijala1;~Pekka_Marttinen1", "aff": "Aalto University;Aalto University;Aalto University;Aalto University", "aff_domain": "aalto.fi;aalto.fi;aalto.fi;aalto.fi", "position": "PhD student;MS student;Postdoc;Associate Professor", "bibtex": "@inproceedings{\ndainese2024generating,\ntitle={Generating Code World Models with Large Language Models Guided by Monte Carlo Tree Search},\nauthor={Nicola Dainese and Matteo Merler and Minttu Alakuijala and Pekka Marttinen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=9SpWvX9ykp}\n}", "github": "", "reviewers": "49z7;3vW3;WBDc;u7M2", "pdf_size": 695410, "rating": "5;6;6;8", "confidence": "5;3;4;3", "soundness": "2;2;3;3", "novelty": "3;2;2;3", "presentation": "3;1;3;4", "wc_summary": "182;82;79;195", "wc_strengths": "69;68;59;231", "wc_weaknesses": "408;377;163;94", "wc_questions": "2;99;70;60", "wc_limitations": "1;12;8;18", "wc_review": "662;638;379;598", "wc_reply_reviewers": "53;19;214;0", "wc_reply_authors": "52;0;180;0", "reply_reviewers": "1;1;2;0", "reply_authors": "2;1;2;1", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 134.5, 54.20562701417631 ], "wc_strengths_avg": [ 106.75, 71.84140519227057 ], "wc_weaknesses_avg": [ 260.5, 134.6820329516896 ], "wc_questions_avg": [ 57.75, 35.23049105533444 ], "wc_limitations_avg": [ 9.75, 6.179603547154137 ], "wc_review_avg": [ 569.25, 112.19486396444358 ], "wc_reply_reviewers_avg": [ 71.5, 84.43488615495374 ], "wc_reply_authors_avg": [ 58.0, 73.56629663099808 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.7608859102526822, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7767736364064723630&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "aalto.fi;aalto.fi;aalto.fi;aalto.fi", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Aalto University", "aff_unique_dep": "", "aff_unique_url": "https://www.aalto.fi", "aff_unique_abbr": "Aalto", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Finland" }, { "title": "Compact Language Models via Pruning and Knowledge Distillation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96308", "id": "9U0nLnNMJ7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=9U0nLnNMJ7", "openreview": "https://openreview.net/forum?id=9U0nLnNMJ7", "poster": "/media/PosterPDFs/NeurIPS%202024/96308.png?t=1733180954.5448334", "project": "", "author_site": "Saurav Muralidharan, Sharath Turuvekere Sreenivas, Raviraj Joshi, Marcin Chochowski, Mostofa Patwary, Mohammad Shoeybi, Bryan Catanzaro, Jan Kautz, Pavlo Molchanov", "tldr": "", "abstract": "Large language models (LLMs) targeting different deployment scales and sizes are currently produced by training each variant from scratch; this is extremely compute-intensive. In this paper, we investigate if pruning an existing LLM and then re-training it with a fraction <3% of the original training data can be a suitable alternative to repeated, full retraining. To this end, we develop a set of practical and effective **compression best practices** for LLMs that combine depth, width, attention and MLP pruning with knowledge distillation-based retraining; we arrive at these best practices through a detailed empirical exploration of pruning strategies for each axis, methods to combine axes, distillation strategies, and search techniques for arriving at optimal compressed architectures. We use this guide to compress the Nemotron-4 family of LLMs by a factor of 2-4x, and compare their performance to similarly-sized models on a variety of language modeling tasks. On these tasks, we perform better than Nemotron-3 8B and LLaMa2 7B using **up to 40x** fewer training tokens}, on par with Mistral 7B and Gemma 7B using **up to 85x fewer tokens** and slightly worse than LLaMa3 8B using **up to 159x fewer tokens**. Our models also compare favorably to state-of-the-art compression techniques from the literature.", "keywords": "llm;pruning;distillation;compression", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Saurav Muralidharan;Sharath Turuvekere Sreenivas;Raviraj Bhuminand Joshi;Marcin Chochowski;Mostofa Patwary;Mohammad Shoeybi;Bryan Catanzaro;Jan Kautz;Pavlo Molchanov", "authorids": "~Saurav_Muralidharan1;~Sharath_Turuvekere_Sreenivas1;~Raviraj_Bhuminand_Joshi1;~Marcin_Chochowski1;~Mostofa_Patwary1;~Mohammad_Shoeybi1;~Bryan_Catanzaro1;~Jan_Kautz1;~Pavlo_Molchanov1", "gender": "M;M;M;M;M;M;M;;M", "homepage": "https://sauravm.com;;;https://www.linkedin.com/in/marcinchochowski/;https://sites.google.com/view/mostofa-patwary;;https://ctnzr.io;http://jankautz.com;", "dblp": "31/8395;;;;249/2672;53/9742;14/4826;48/6214;165/8169.html", "google_scholar": "GXlChWcAAAAJ;;6Zf_5SAAAAAJ;tYpWp-4AAAAJ;0rt4tbMAAAAJ;62ElavIAAAAJ;UZ6kI2AAAAAJ;P9FclNEAAAAJ;J9PoyoIAAAAJ", "orcid": ";;0000-0003-1892-1812;;;;0000-0003-0034-7728;;", "linkedin": ";sharath-ts-33a9b276/;;;;shoeybi/;bryancatanzaro/;;", "or_profile": "~Saurav_Muralidharan1;~Sharath_Turuvekere_Sreenivas1;~Raviraj_Bhuminand_Joshi1;~Marcin_Chochowski1;~Mostofa_Patwary1;~Mohammad_Shoeybi1;~Bryan_Catanzaro1;~Jan_Kautz1;~Pavlo_Molchanov1", "aff": "NVIDIA;NVIDIA;;NVIDIA;NVIDIA;NVIDIA;NVIDIA;NVIDIA;NVIDIA Research", "aff_domain": "nvidia.com;nvidia.com;;nvidia.com;nvidia.com;nvidia.com;nvidia.com;nvidia.com;nvidia.com", "position": "Researcher;Deep Learning Engineer;;Researcher;Principal Researcher;Director of Applied Resesrch;Vice President;VP Research;Research Scientist", "bibtex": "@inproceedings{\nmuralidharan2024compact,\ntitle={Compact Language Models via Pruning and Knowledge Distillation},\nauthor={Saurav Muralidharan and Sharath Turuvekere Sreenivas and Raviraj Bhuminand Joshi and Marcin Chochowski and Mostofa Patwary and Mohammad Shoeybi and Bryan Catanzaro and Jan Kautz and Pavlo Molchanov},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=9U0nLnNMJ7}\n}", "github": "", "reviewers": "W3pm;enpQ;qFD8", "pdf_size": 1298176, "rating": "7;7;7", "confidence": "4;4;4", "soundness": "3;4;4", "novelty": "2;3;3", "presentation": "3;3;4", "wc_summary": "29;88;32", "wc_strengths": "32;92;51", "wc_weaknesses": "130;94;45", "wc_questions": "1;146;2", "wc_limitations": "15;5;1", "wc_review": "207;425;131", "wc_reply_reviewers": "37;38;0", "wc_reply_authors": "21;28;0", "reply_reviewers": "1;1;0", "reply_authors": "2;2;1", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 49.666666666666664, 27.13341523329163 ], "wc_strengths_avg": [ 58.333333333333336, 25.037749277618563 ], "wc_weaknesses_avg": [ 89.66666666666667, 34.83612429010374 ], "wc_questions_avg": [ 49.666666666666664, 68.11917661145225 ], "wc_limitations_avg": [ 7.0, 5.887840577551898 ], "wc_review_avg": [ 254.33333333333334, 124.60426245607421 ], "wc_reply_reviewers_avg": [ 25.0, 17.682382946499793 ], "wc_reply_authors_avg": [ 16.333333333333332, 11.897712198383164 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 66, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=325435180673808084&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "nvidia.com;nvidia.com;;nvidia.com;nvidia.com;nvidia.com;nvidia.com;nvidia.com;nvidia.com", "author_num": 9, "aff_unique_index": "0;0;0;0;0;0;0;0", "aff_unique_norm": "NVIDIA", "aff_unique_dep": "NVIDIA Corporation", "aff_unique_url": "https://www.nvidia.com", "aff_unique_abbr": "NVIDIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "No \"Zero-Shot\" Without Exponential Data: Pretraining Concept Frequency Determines Multimodal Model Performance", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96307", "id": "9VbGjXLzig", "proceeding": "", "pdf": "https://openreview.net/pdf?id=9VbGjXLzig", "openreview": "https://openreview.net/forum?id=9VbGjXLzig", "poster": "", "project": "", "author_site": "Vishaal Udandarao, Ameya Prabhu, Adhiraj Ghosh, Yash Sharma, Philip Torr, Adel Bibi, Samuel Albanie, Matthias Bethge", "tldr": "", "abstract": "Web-crawled pretraining datasets underlie the impressive \"zero-shot\" evaluation performance of multimodal models, such as CLIP for classification and Stable-Diffusion for image generation. However, it is unclear how meaningful the notion of \"zero-shot\" generalization is for such multimodal models, as it is not known to what extent their pretraining datasets encompass the downstream concepts targeted for during \"zero-shot\" evaluation. In this work, we ask: How is the performance of multimodal models on downstream concepts influenced by the frequency of these concepts in their pretraining datasets?\n\nWe comprehensively investigate this question across 34 models and 5 standard pretraining datasets (CC-3M, CC-12M, YFCC-15M, LAION-400M, LAION-Aesthetics), generating over 300GB of data artifacts. We consistently find that, far from exhibiting \"zero-shot\" generalization, multimodal models require exponentially more data to achieve linear improvements in downstream \"zero-shot\" performance, following a sample inefficient log-linear scaling trend. This trend persists even when controlling for sample-level similarity between pretraining and downstream datasets, and testing on purely synthetic data distributions. Furthermore, upon benchmarking models on long-tailed data sampled based on our analysis, we demonstrate that multimodal models across the board perform poorly. We contribute this long-tail test set as the Let it Wag! benchmark to further research in this direction. Taken together, our study reveals an exponential need for training data which implies that the key to \"zero-shot\" generalization capabilities under large-scale training data and compute paradigms remains to be found.", "keywords": "Multimodal Datasets;Long-tailed Concept Distribution;CLIP Models;Diffusion Models;Data-Centric ML", "primary_area": "machine_vision", "supplementary_material": "/attachment/f53c035886476b062f239719ed157fbffa6d9e29.zip", "author": "Vishaal Udandarao;Ameya Prabhu;Adhiraj Ghosh;Yash Sharma;Philip Torr;Adel Bibi;Samuel Albanie;Matthias Bethge", "authorids": "~Vishaal_Udandarao1;~Ameya_Prabhu1;~Adhiraj_Ghosh2;~Yash_Sharma1;~Philip_Torr1;~Adel_Bibi1;~Samuel_Albanie2;~Matthias_Bethge1", "gender": "M;M;M;;;M;M;Not Specified", "homepage": "https://vishaal27.github.io/;https://drimpossible.github.io/;http://adhirajghosh.github.io/;http://www.yash-sharma.com;http://www.robots.ox.ac.uk/~tvg/;http://adelbibi.com;https://bethgelab.org;https://samuelalbanie.com/", "dblp": "247/4693;181/4512;304/2904;121/9967-1;;176/0964;77/3005;188/5765", "google_scholar": "jUOcawkAAAAJ;0kK7sSAAAAAJ;https://scholar.google.com/citations?hl=en;AlGCn8wAAAAJ;;Q4j2laYAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.co.uk/citations?user=QjufASUAAAAJ", "orcid": ";;;;;0000-0002-6169-3918;;", "linkedin": "vishaal-udandarao/?originalSubdomain=de;;adhiraj-ghosh/;yashjsharma/;;adel-bibi-ba3671ab/;;", "or_profile": "~Vishaal_Udandarao1;~Ameya_Prabhu1;~Adhiraj_Ghosh2;~Yash_Sharma1;~Philip_Torr1;~Adel_Bibi1;~Matthias_Bethge1;~samuel_Albanie1", "aff": "University of Cambridge;University of Oxford;Eberhard-Karls-Universit\u00e4t T\u00fcbingen;University of Tuebingen;University of Oxford;University of Oxford;University of Tuebingen;University of Cambridge", "aff_domain": "cam.ac.uk;ox.ac.uk;uni-tuebingen.de;uni-tuebingen.de;ox.ac.uk;ox.ac.uk;uni-tuebingen.de;cam.ac.uk", "position": "PhD student;PhD student;MS student;PhD student;Full Professor;Senior Researcher;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nudandarao2024no,\ntitle={No ''Zero-Shot'' Without Exponential Data: Pretraining Concept Frequency Determines Multimodal Model Performance},\nauthor={Vishaal Udandarao and Ameya Prabhu and Adhiraj Ghosh and Yash Sharma and Philip Torr and Adel Bibi and Samuel Albanie and Matthias Bethge},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=9VbGjXLzig}\n}", "github": "", "reviewers": "KUaM;oRi7;WCHb;s2XF", "pdf_size": 6536950, "rating": "6;6;7;8", "confidence": "3;4;5;4", "soundness": "2;3;3;3", "novelty": "2;2;4;4", "presentation": "2;3;4;4", "wc_summary": "59;65;159;82", "wc_strengths": "47;39;62;93", "wc_weaknesses": "54;206;218;5", "wc_questions": "62;2;90;115", "wc_limitations": "5;21;160;12", "wc_review": "227;333;689;307", "wc_reply_reviewers": "0;0;334;41", "wc_reply_authors": "0;0;1589;23", "reply_reviewers": "0;0;3;1", "reply_authors": "1;1;6;2", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 1.0 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 91.25, 40.01484099681017 ], "wc_strengths_avg": [ 60.25, 20.632195714465293 ], "wc_weaknesses_avg": [ 120.75, 92.97681162526493 ], "wc_questions_avg": [ 67.25, 42.07953778263255 ], "wc_limitations_avg": [ 49.5, 64.04880951274582 ], "wc_review_avg": [ 389.0, 177.5556250869006 ], "wc_reply_reviewers_avg": [ 93.75, 139.7146645846455 ], "wc_reply_authors_avg": [ 403.0, 684.8017961425043 ], "reply_reviewers_avg": [ 1.0, 1.224744871391589 ], "reply_authors_avg": [ 2.5, 2.0615528128088303 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.42640143271122083, "gs_citation": 50, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=952612694430275234&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "cam.ac.uk;ox.ac.uk;uni-tuebingen.de;uni-tuebingen.de;ox.ac.uk;ox.ac.uk;uni-tuebingen.de;cam.ac.uk", "author_num": 8, "aff_unique_index": "0;1;2;3;1;1;3;0", "aff_unique_norm": "University of Cambridge;University of Oxford;Eberhard Karls University of T\u00fcbingen;University of Tuebingen", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.cam.ac.uk;https://www.ox.ac.uk;https://www.uni-tuebingen.de/;https://www.uni-tuebingen.de/", "aff_unique_abbr": "Cambridge;Oxford;Uni T\u00fcbingen;Uni T\u00fcbingen", "aff_campus_unique_index": "0;2;0", "aff_campus_unique": "Cambridge;;T\u00fcbingen", "aff_country_unique_index": "0;0;1;1;0;0;1;0", "aff_country_unique": "United Kingdom;Germany" }, { "title": "Vision Mamba Mender", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96306", "id": "9VnevS2YoR", "proceeding": "", "pdf": "https://openreview.net/pdf?id=9VnevS2YoR", "openreview": "https://openreview.net/forum?id=9VnevS2YoR", "poster": "/media/PosterPDFs/NeurIPS%202024/96306.png?t=1732105349.157194", "project": "", "author_site": "Jiacong Hu, Anda Cao, Zunlei Feng, Shengxuming Zhang, Yi Wang, Lingxiang Jia, Mingli Song", "tldr": "", "abstract": "Mamba, a state-space model with selective mechanisms and hardware-aware architecture, has demonstrated outstanding performance in long sequence modeling tasks, particularly garnering widespread exploration and application in the field of computer vision. While existing works have mixed opinions of its application in visual tasks, the exploration of its internal workings and the optimization of its performance remain urgent and worthy research questions given its status as a novel model. Existing optimizations of the Mamba model, especially when applied in the visual domain, have primarily relied on predefined methods such as improving scanning mechanisms or integrating other architectures, often requiring strong priors and extensive trial and error. In contrast to these approaches, this paper proposes the Vision Mamba Mender, a systematic approach for understanding the workings of Mamba, identifying flaws within, and subsequently optimizing model performance. Specifically, we present methods for predictive correlation analysis of Mamba's hidden states from both internal and external perspectives, along with corresponding definitions of correlation scores, aimed at understanding the workings of Mamba in visual recognition tasks and identifying flaws therein. Additionally, tailored repair methods are proposed for identified external and internal state flaws to eliminate them and optimize model performance. Extensive experiments validate the efficacy of the proposed methods on prevalent Mamba architectures, significantly enhancing Mamba's performance.", "keywords": "Mamba;State Space Models;Computer Vision", "primary_area": "machine_vision", "supplementary_material": "/attachment/231f16a7aedf14ace21ad17af99f0f84a5a06ee6.zip", "author": "Jiacong Hu;Anda Cao;Zunlei Feng;Shengxuming Zhang;Yi Wang;Lingxiang Jia;Mingli Song", "authorids": "~Jiacong_Hu1;~Anda_Cao1;~Zunlei_Feng1;~Shengxuming_Zhang1;~Yi_Wang56;~Lingxiang_Jia1;~Mingli_Song1", "gender": "M;M;M;M;M;;M", "homepage": "https://jiaconghu.com;https://www.vipazoo.cn/people/caoanda;https://person.zju.edu.cn/en/zunleifeng;https://www.vipazoo.cn/people/zhangshengxuming;https://marsha1147.github.io/;;https://person.zju.edu.cn/msong", "dblp": "136/3061;374/8708;191/2455;355/1122;;;71/5333", "google_scholar": ";;wMtjcGwAAAAJ;https://scholar.google.com.hk/citations?user=9fajYpsAAAAJ;;;7oLbhAwAAAAJ", "orcid": ";;;0000-0002-8827-9012;;;0000-0003-2621-6048", "linkedin": ";;;;;;", "or_profile": "~Jiacong_Hu1;~Anda_Cao1;~Zunlei_Feng1;~Shengxuming_Zhang1;~Yi_Wang56;~Lingxiang_Jia1;~Mingli_Song1", "aff": "Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University;;Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;;zju.edu.cn", "position": "PhD student;PhD student;Associate Professor;PhD student;PhD student;;Full Professor", "bibtex": "@inproceedings{\nhu2024vision,\ntitle={Vision Mamba Mender},\nauthor={Jiacong Hu and Anda Cao and Zunlei Feng and Shengxuming Zhang and Yi Wang and Lingxiang Jia and Mingli Song},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=9VnevS2YoR}\n}", "github": "", "reviewers": "3tmX;JDBm;nvv2;4vzy", "pdf_size": 6975349, "rating": "4;6;6;7", "confidence": "4;4;4;5", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "52;93;50;159", "wc_strengths": "53;156;58;231", "wc_weaknesses": "87;188;93;109", "wc_questions": "43;94;2;6", "wc_limitations": "36;23;2;6", "wc_review": "271;554;205;511", "wc_reply_reviewers": "80;31;36;15", "wc_reply_authors": "909;394;79;63", "reply_reviewers": "1;1;1;1", "reply_authors": "5;3;3;3", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 88.5, 44.17295552710957 ], "wc_strengths_avg": [ 124.5, 73.94085474215186 ], "wc_weaknesses_avg": [ 119.25, 40.49922838771129 ], "wc_questions_avg": [ 36.25, 36.9754986443726 ], "wc_limitations_avg": [ 16.75, 13.626720074911644 ], "wc_review_avg": [ 385.25, 149.86056018846318 ], "wc_reply_reviewers_avg": [ 40.5, 24.088378940891808 ], "wc_reply_authors_avg": [ 361.25, 342.6808828925244 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.5, 0.8660254037844386 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.6622661785325219, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:eAn77KhuAXoJ:scholar.google.com/&scioq=Vision+Mamba+Mender&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;;zju.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Zhejiang University", "aff_unique_dep": "", "aff_unique_url": "https://www.zju.edu.cn", "aff_unique_abbr": "ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Coded Computing for Resilient Distributed Computing: A Learning-Theoretic Framework", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96305", "id": "9XDYEEBRV6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=9XDYEEBRV6", "openreview": "https://openreview.net/forum?id=9XDYEEBRV6", "poster": "/media/PosterPDFs/NeurIPS%202024/96305.png?t=1731479377.1210284", "project": "", "author_site": "Parsa Moradi, Behrooz Tahmasebi, Mohammad Maddah-Ali", "tldr": "", "abstract": "Coded computing has emerged as a promising framework for tackling significant challenges in large-scale distributed computing, including the presence of slow, faulty, or compromised servers. In this approach, each worker node processes a combination of the data, rather than the raw data itself. The final result then is decoded from the collective outputs of the worker nodes. However, there is a significant gap between current coded computing approaches and the broader landscape of general distributed computing, particularly when it comes to machine learning workloads. To bridge this gap, we propose a novel foundation for coded computing, integrating the principles of learning theory, and developing a framework that seamlessly adapts with machine learning applications. \nIn this framework, the objective is to find the encoder and decoder functions that minimize the loss function, defined as the mean squared error between the estimated and true values. Facilitating the search for the optimum decoding and functions, we show that the loss function can be upper-bounded by the summation of two terms: the generalization error of the decoding function and the training error of the encoding function. \nFocusing on \nthe second-order Sobolev space, we then derive the optimal encoder and decoder. We show that in the proposed solution, the mean squared error of the estimation decays with the rate of $\\mathcal{O}(S^3 N^{-3})$ and $\\mathcal{O}(S^{\\frac{8}{5}}N^{\\frac{-3}{5}})$ in noiseless and noisy computation settings, respectively, where $N$ is the number of worker nodes with at most $S$ slow servers (stragglers). Finally, we evaluate the proposed scheme on inference tasks for various machine learning models and demonstrate that the proposed framework outperforms the state-of-the-art in terms of accuracy and rate of convergence.", "keywords": "Coded Computing;Distributed Computing;Non-Parametric Regression;Smoothing Spline;Kernel Ridge Regression", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Parsa Moradi;Behrooz Tahmasebi;Mohammad Ali Maddah-Ali", "authorids": "~Parsa_Moradi2;~Behrooz_Tahmasebi1;~Mohammad_Ali_Maddah-Ali2", "gender": "M;M;M", "homepage": ";https://people.csail.mit.edu/bzt/;https://maddah.umn.edu/", "dblp": ";223/0884;", "google_scholar": "eAfZOoMAAAAJ;ZXCO3DMAAAAJ;CFIJZwoAAAAJ", "orcid": ";;", "linkedin": "parsamoradi/;;", "or_profile": "~Parsa_Moradi2;~Behrooz_Tahmasebi1;~Mohammad_Ali_Maddah-Ali2", "aff": "University of Minnesota - Twin Cities;Microsoft Research ;University of Minnesota - Twin Cities", "aff_domain": "umn.edu;microsoft.com;umn.edu", "position": "PhD student;Intern;Associate Professor", "bibtex": "@inproceedings{\nmoradi2024coded,\ntitle={Coded Computing for Resilient Distributed Computing: A Learning-Theoretic Framework},\nauthor={Parsa Moradi and Behrooz Tahmasebi and Mohammad Ali Maddah-Ali},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=9XDYEEBRV6}\n}", "github": "", "reviewers": "a2Af;JpiK;Uhit;SdcZ", "pdf_size": 1834648, "rating": "4;6;6;7", "confidence": "3;3;3;2", "soundness": "3;3;3;3", "novelty": "2;2;3;2", "presentation": "3;2;3;4", "wc_summary": "16;134;137;184", "wc_strengths": "5;28;81;228", "wc_weaknesses": "5;110;259;396", "wc_questions": "279;59;2;55", "wc_limitations": "1;19;4;59", "wc_review": "306;350;483;922", "wc_reply_reviewers": "93;27;0;57", "wc_reply_authors": "133;6;0;19", "reply_reviewers": "1;1;0;1", "reply_authors": "3;2;1;2", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 117.75, 62.00151207833564 ], "wc_strengths_avg": [ 85.5, 86.76548853086693 ], "wc_weaknesses_avg": [ 192.5, 148.15279275126744 ], "wc_questions_avg": [ 98.75, 106.47153375433267 ], "wc_limitations_avg": [ 20.75, 23.112496619794236 ], "wc_review_avg": [ 515.25, 243.71025316961945 ], "wc_reply_reviewers_avg": [ 44.25, 34.62206666275137 ], "wc_reply_authors_avg": [ 39.5, 54.41736855085883 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.6622661785325219, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6677675432799214914&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 0, "email": "umn.edu;microsoft.com;umn.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Minnesota;Microsoft", "aff_unique_dep": ";Microsoft Research", "aff_unique_url": "https://www.minnesota.edu;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "UMN;MSR", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Twin Cities;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "SWT-Bench: Testing and Validating Real-World Bug-Fixes with Code Agents", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96304", "id": "9Y8zUO11EQ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=9Y8zUO11EQ", "openreview": "https://openreview.net/forum?id=9Y8zUO11EQ", "poster": "", "project": "", "author_site": "Niels M\u00fcndler, Mark M\u00fcller, Jingxuan He, Martin Vechev", "tldr": "", "abstract": "Rigorous software testing is crucial for developing and maintaining high-quality code, making automated test generation a promising avenue for both improving software quality and boosting the effectiveness of code generation methods. However, while code generation with Large Language Models (LLMs) is an extraordinarily active research area, test generation remains relatively unexplored. We address this gap and investigate the capability of LLM-based Code Agents to formalize user issues into test cases. To this end, we propose a novel benchmark based on popular GitHub repositories, containing real-world issues, ground-truth bug-fixes, and golden tests. We find that LLMs generally perform surprisingly well at generating relevant test cases, with Code Agents designed for code repair exceeding the performance of systems designed specifically for test generation. Further, as test generation is a similar but more structured task than code generation, it allows for a more fine-grained analysis using issue reproduction rate and coverage changes, providing a dual metric for analyzing systems designed for code repair. Finally, we find that generated tests are an effective filter for proposed code fixes, doubling the precision of SWE-Agent. We release all data and code at https://github.com/logic-star-ai/SWT-Bench.", "keywords": "language model;test generation;code agent", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/5d6a5302141ade20a4a8491a9afd86e5e9c2d93f.zip", "author": "Niels M\u00fcndler;Mark Niklas Mueller;Jingxuan He;Martin Vechev", "authorids": "~Niels_M\u00fcndler1;~Mark_Niklas_Mueller2;~Jingxuan_He1;~Martin_Vechev1", "gender": "M;M;M;M", "homepage": "https://blog.nielstron.de;https://www.sri.inf.ethz.ch/people/mark;https://www.sri.inf.ethz.ch/people/jingxuan;https://www.sri.inf.ethz.ch/people/martin", "dblp": "245/7560;287/4254;;93/2189.html", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;RBpmcCAAAAAJ;ylHZY58AAAAJ;https://scholar.google.ch/citations?user=aZ1Rh50AAAAJ", "orcid": "0000-0003-3851-2557;0000-0002-2496-6542;;", "linkedin": "niels-m%C3%BCndler-04115b231/;mark-m%C3%BCller-8bb4b1140/;;", "or_profile": "~Niels_M\u00fcndler1;~Mark_Niklas_Mueller2;~Jingxuan_He1;~Martin_Vechev1", "aff": "Department of Computer Science, ETHZ - ETH Zurich;Swiss Federal Institute of Technology;ETHZ - ETH Zurich;Swiss Federal Institute of Technology", "aff_domain": "inf.ethz.ch;ethz.ch;ethz.ch;ethz.ch", "position": "PhD student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nm{\\\"u}ndler2024swtbench,\ntitle={{SWT}-Bench: Testing and Validating Real-World Bug-Fixes with Code Agents},\nauthor={Niels M{\\\"u}ndler and Mark Niklas Mueller and Jingxuan He and Martin Vechev},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=9Y8zUO11EQ}\n}", "github": "", "reviewers": "ueFW;i5fN;pxq3;PhkB;Ky1F", "pdf_size": 523724, "rating": "3;4;5;7;8", "confidence": "4;4;4;4;4", "soundness": "2;3;3;3;4", "novelty": "2;2;2;3;4", "presentation": "1;3;3;4;4", "wc_summary": "63;88;152;61;94", "wc_strengths": "28;94;176;140;99", "wc_weaknesses": "96;148;208;60;74", "wc_questions": "97;9;145;69;96", "wc_limitations": "20;1;183;12;87", "wc_review": "304;340;864;342;450", "wc_reply_reviewers": "158;90;0;0;0", "wc_reply_authors": "773;84;0;0;0", "reply_reviewers": "2;1;0;0;0", "reply_authors": "3;2;1;1;1", "rating_avg": [ 5.4, 1.8547236990991407 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.6, 0.8 ], "presentation_avg": [ 3.0, 1.0954451150103321 ], "wc_summary_avg": [ 91.6, 32.927799805027966 ], "wc_strengths_avg": [ 107.4, 49.64514074912066 ], "wc_weaknesses_avg": [ 117.2, 54.37058028014783 ], "wc_questions_avg": [ 83.2, 44.454021190439 ], "wc_limitations_avg": [ 60.6, 68.17800231746307 ], "wc_review_avg": [ 460.0, 207.8345495821135 ], "wc_reply_reviewers_avg": [ 49.6, 64.4409807498303 ], "wc_reply_authors_avg": [ 171.4, 302.55419349267004 ], "reply_reviewers_avg": [ 0.6, 0.7999999999999999 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12560302090979175237&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "inf.ethz.ch;ethz.ch;ethz.ch;ethz.ch", "author_num": 4, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "ETH Zurich;Swiss Federal Institute of Technology", "aff_unique_dep": "Department of Computer Science;", "aff_unique_url": "https://www.ethz.ch;https://www.ethz.ch", "aff_unique_abbr": "ETHZ;ETH Zurich", "aff_campus_unique_index": "0", "aff_campus_unique": "Zurich;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Switzerland" }, { "title": "UltraEdit: Instruction-based Fine-Grained Image Editing at Scale", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97827", "id": "9ZDdlgH6O8", "proceeding": "", "pdf": "https://openreview.net/pdf?id=9ZDdlgH6O8", "openreview": "https://openreview.net/forum?id=9ZDdlgH6O8", "poster": "/media/PosterPDFs/NeurIPS%202024/97827.png?t=1731914959.1426606", "project": "", "author_site": "Haozhe Zhao, Xiaojian (Shawn) Ma, Liang Chen, Shuzheng Si, Rujie Wu, Kaikai An, Peiyu Yu, Minjia Zhang, Qing Li, Baobao Chang", "tldr": "", "abstract": "This paper presents UltraEdit, a large-scale (~ 4M editing samples), automatically generated dataset for instruction-based image editing. Our key idea is to address the drawbacks in existing image editing datasets like InstructPix2Pix and MagicBrush, and provide a *systematic* approach to producing massive and high-quality image editing samples: 1) UltraEdit includes more diverse editing instructions by combining LLM creativity and in-context editing examples by human raters; 2) UltraEdit is anchored on real images (photographs or artworks), which offers more diversity and less biases than those purely synthesized by text-to-image models; 3) UltraEdit supports region-based editing with high-quality, automatically produced region annotations. Our experiments show that canonical diffusion-based editing baselines trained on UltraEdit set new records on challenging MagicBrush and Emu-Edit benchmarks, respectively. Our analysis further confirms the crucial role of real image anchors and region-based editing data. The dataset, code, and models will be made public.", "keywords": "image editing;text-to-image;instruction-based image editing;region-based image editing", "primary_area": "", "supplementary_material": "/attachment/ef10a07f71633e3d89d7ee6fe0c3ddfe7153fc34.pdf", "author": "Haozhe Zhao;Xiaojian Ma;Liang Chen;Shuzheng Si;Rujie Wu;Kaikai An;Peiyu Yu;Minjia Zhang;Qing Li;Baobao Chang", "authorids": "~Haozhe_Zhao1;~Xiaojian_Ma1;~Liang_Chen10;~Shuzheng_Si1;~Rujie_Wu2;~Kaikai_An1;~Peiyu_Yu2;~Minjia_Zhang1;~Qing_Li1;~Baobao_Chang1", "gender": "M;;M;M;M;M;;M;M;M", "homepage": ";;https://chenllliang.github.io;;https://rujiewu.github.io;https://github.com/kkk-an;;https://minjiazhang.github.io/;http://liqing-ustc.github.io/;http://eecs.pku.edu.cn/EN/People/Faculty/Detail/?ID=6027", "dblp": "299/7199;;01/5394-24;324/3680;324/8753;;249/9449;58/9033;181/2689-3;91/6051", "google_scholar": "skIXywUAAAAJ;;lMKPaTYAAAAJ;https://scholar.google.com.hk/citations?user=zO2XyZUAAAAJ;https://scholar.google.com/citations?;6TrBRiEAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;iwdFZBEAAAAJ;LaKNyhQAAAAJ", "orcid": "0000-0003-0502-4426;;;;0009-0001-6426-1248;;;0000-0002-8165-166X;;0000-0003-2824-6750", "linkedin": ";;;;;;;minjia-zhang-05857226/;;", "or_profile": "~Haozhe_Zhao1;~Xiaojian_Ma1;~Liang_Chen10;~Shuzheng_Si1;~Rujie_Wu2;~Kaikai_An1;~Peiyu_Yu2;~Minjia_Zhang1;~Qing_Li1;~Baobao_Chang1", "aff": "Peking University;;Peking University;Peking University;Peking University;Peking University;UCLA Department of Statistics;Microsoft ;Beijing Institute for General Artificial Intelligence (BIGAI);Peking University", "aff_domain": "pku.edu.cn;;pku.edu.cn;pku.edu.cn;stu.pku.edu.cn;pku.edu.cn;g.ucla.edu;microsoft.com;bigai.ai;pku.edu.cn", "position": "MS student;;PhD student;MS student;PhD student;MS student;PhD student;Principle Researcher;Researcher;Associate Professor", "bibtex": "@inproceedings{\nzhao2024ultraedit,\ntitle={UltraEdit: Instruction-based Fine-Grained Image Editing at Scale},\nauthor={Haozhe Zhao and Xiaojian Ma and Liang Chen and Shuzheng Si and Rujie Wu and Kaikai An and Peiyu Yu and Minjia Zhang and Qing Li and Baobao Chang},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=9ZDdlgH6O8}\n}", "github": "", "reviewers": "fz2t;3R87;j7cR;xmAL;RFxS", "pdf_size": 18585724, "rating": "6;6;6;6;7", "confidence": "4;4;2;4;4", "wc_summary_and_contributions": "51;71;43;123;82", "wc_strengths": "97;27;25;47;59", "wc_improvement": "204;42;100;448;151", "wc_limitations": "16;1;34;11;42", "wc_correctness": "2;1;10;1;57", "wc_clarity": "5;1;4;1;59", "wc_relation_to_prior_work": "2;1;26;10;18", "wc_documentation": "9;1;34;1;40", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "387;146;277;643;509", "wc_reply_reviewers": "0;0;115;0;0", "wc_reply_authors": "201;204;249;240;177", "reply_reviewers": "0;0;3;0;0", "reply_authors": "5;6;7;7;3", "rating_avg": [ 6.2, 0.39999999999999997 ], "confidence_avg": [ 3.6, 0.8 ], "wc_summary_and_contributions_avg": [ 74.0, 28.156704352604905 ], "wc_strengths_avg": [ 51.0, 26.260236099471765 ], "wc_improvement_avg": [ 189.0, 140.19985734657507 ], "wc_limitations_avg": [ 20.8, 15.065191668213187 ], "wc_correctness_avg": [ 14.2, 21.664717861075413 ], "wc_clarity_avg": [ 14.0, 22.556595487794695 ], "wc_relation_to_prior_work_avg": [ 11.4, 9.54148835350125 ], "wc_documentation_avg": [ 17.0, 16.69730517179344 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 392.4, 173.46769151631665 ], "wc_reply_reviewers_avg": [ 23.0, 46.0 ], "wc_reply_authors_avg": [ 214.2, 26.603759132874437 ], "reply_reviewers_avg": [ 0.6, 1.2 ], "reply_authors_avg": [ 5.6, 1.4966629547095764 ], "replies_avg": [ 38, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.25, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5989491465999097288&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "pku.edu.cn;;pku.edu.cn;pku.edu.cn;stu.pku.edu.cn;pku.edu.cn;g.ucla.edu;microsoft.com;bigai.ai;pku.edu.cn", "author_num": 10, "aff_unique_index": "0;0;0;0;0;1;2;3;0", "aff_unique_norm": "Peking University;University of California, Los Angeles;Microsoft;Beijing Institute for General Artificial Intelligence", "aff_unique_dep": ";Department of Statistics;Microsoft Corporation;", "aff_unique_url": "http://www.pku.edu.cn;https://www.ucla.edu;https://www.microsoft.com;http://www.bigmodel.cn/", "aff_unique_abbr": "Peking U;UCLA;Microsoft;BIGAI", "aff_campus_unique_index": "1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;0;0;0;0;1;1;0;0", "aff_country_unique": "China;United States" }, { "title": "ZSC-Eval: An Evaluation Toolkit and Benchmark for Multi-agent Zero-shot Coordination", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97826", "id": "9aXjIBLwKc", "proceeding": "", "pdf": "https://openreview.net/pdf?id=9aXjIBLwKc", "openreview": "https://openreview.net/forum?id=9aXjIBLwKc", "poster": "/media/PosterPDFs/NeurIPS%202024/97826.png?t=1731592674.0715878", "project": "", "author_site": "Xihuai Wang, Shao Zhang, Wenhao Zhang, Wentao Dong, Jingxiao Chen, Ying Wen, Weinan Zhang", "tldr": "", "abstract": "Zero-shot coordination (ZSC) is a new cooperative multi-agent reinforcement learning (MARL) challenge that aims to train an ego agent to work with diverse, unseen partners during deployment. The significant difference between the deployment-time partners' distribution and the training partners' distribution determined by the training algorithm makes ZSC a unique out-of-distribution (OOD) generalization challenge. The potential distribution gap between evaluation and deployment-time partners leads to inadequate evaluation, which is exacerbated by the lack of appropriate evaluation metrics. In this paper, we present **ZSC-Eval**, the first evaluation toolkit and benchmark for ZSC algorithms. ZSC-Eval consists of: 1) Generation of evaluation partner candidates through behavior-preferring rewards to approximate deployment-time partners' distribution; 2) Selection of evaluation partners by Best-Response Diversity (BR-Div); 3) Measurement of generalization performance with various evaluation partners via the Best-Response Proximity (BR-Prox) metric. We use ZSC-Eval to benchmark ZSC algorithms in Overcooked and Google Research Football environments and get novel empirical findings. We also conduct a human experiment of current ZSC algorithms to verify the ZSC-Eval's consistency with human evaluation. ZSC-Eval is now available at https://github.com/sjtu-marl/ZSC-Eval.", "keywords": "Multi-agent Reinforcement Learning;Zero-shot Coordination;Multi-agent Generalization", "primary_area": "", "supplementary_material": "", "author": "Xihuai Wang;Shao Zhang;Wenhao Zhang;Wentao Dong;Jingxiao Chen;Ying Wen;Weinan Zhang", "authorids": "~Xihuai_Wang1;~Shao_Zhang1;~Wenhao_Zhang8;~Wentao_Dong1;~Jingxiao_Chen1;~Ying_Wen1;~Weinan_Zhang1", "gender": "M;F;;M;M;M;M", "homepage": "https://xihuai18.github.io/;https://shaozhang.info;https://github.com/bluixe;https://github.com/WentDong;https://github.com/TimerChen;https://yingwen.io;http://wnzhang.net", "dblp": "79/6482;57/1330;;;239/4404;41/4203-1;28/10261-1", "google_scholar": "hy6v3qUAAAAJ;UG36L2YAAAAJ;;;-zs1V28AAAAJ;_A1CxG8AAAAJ;Qzss0GEAAAAJ", "orcid": ";0000-0002-0111-0776;;;;0000-0003-1247-2382;0000-0002-0127-2425", "linkedin": ";;;;;wenying45;", "or_profile": "~Xihuai_Wang1;~Shao_Zhang1;~Wenhao_Zhang8;~Wentao_Dong1;~Jingxiao_Chen1;~Ying_Wen1;~Weinan_Zhang1", "aff": "Shanghai Jiaotong University;Northeastern University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;northeastern.edu;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "position": "PhD student;PhD student;Undergrad student;Undergrad student;PhD student;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nwang2024zsceval,\ntitle={{ZSC}-Eval: An Evaluation Toolkit and Benchmark for Multi-agent Zero-shot Coordination},\nauthor={Xihuai Wang and Shao Zhang and Wenhao Zhang and Wentao Dong and Jingxiao Chen and Ying Wen and Weinan Zhang},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=9aXjIBLwKc}\n}", "github": "", "reviewers": "9xeX;jqjf;yyn1", "pdf_size": 10366582, "rating": "6;7;7", "confidence": "2;3;3", "wc_summary_and_contributions": "60;135;110", "wc_strengths": "2;126;76", "wc_improvement": "2;313;37", "wc_limitations": "23;7;14", "wc_correctness": "1;1;2", "wc_clarity": "1;1;1", "wc_relation_to_prior_work": "12;25;5", "wc_documentation": "1;31;13", "wc_additional_feedback": "1;1;1", "wc_review": "103;640;259", "wc_reply_reviewers": "16;145;0", "wc_reply_authors": "0;0;62", "reply_reviewers": "1;1;0", "reply_authors": "2;4;2", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 101.66666666666667, 31.18047822311618 ], "wc_strengths_avg": [ 68.0, 50.93787065304817 ], "wc_improvement_avg": [ 117.33333333333333, 139.09309432494805 ], "wc_limitations_avg": [ 14.666666666666666, 6.548960901462833 ], "wc_correctness_avg": [ 1.3333333333333333, 0.4714045207910317 ], "wc_clarity_avg": [ 1.0, 0.0 ], "wc_relation_to_prior_work_avg": [ 14.0, 8.286535263104035 ], "wc_documentation_avg": [ 15.0, 12.328828005937952 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 334.0, 225.55265460641337 ], "wc_reply_reviewers_avg": [ 53.666666666666664, 64.91190611556216 ], "wc_reply_authors_avg": [ 20.666666666666668, 29.227080289043965 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 0.9428090415820634 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.9999999999999997, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13896171420506086057&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "sjtu.edu.cn;northeastern.edu;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "author_num": 7, "aff_unique_index": "0;1;0;0;0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University;Northeastern University", "aff_unique_dep": ";", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.northeastern.edu", "aff_unique_abbr": "SJTU;NEU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0;0;0", "aff_country_unique": "China;United States" }, { "title": "Context and Geometry Aware Voxel Transformer for Semantic Scene Completion", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96303", "id": "9bu627mTfs", "proceeding": "", "pdf": "https://openreview.net/pdf?id=9bu627mTfs", "openreview": "https://openreview.net/forum?id=9bu627mTfs", "poster": "/media/PosterPDFs/NeurIPS%202024/96303.png?t=1729478148.3068125", "project": "", "author_site": "Zhu Yu, Runmin Zhang, Jiacheng Ying, Junchen Yu, Xiaohai Hu, Lun Luo, Si-Yuan Cao, Hui-liang Shen", "tldr": "", "abstract": "Vision-based Semantic Scene Completion (SSC) has gained much attention due to its widespread applications in various 3D perception tasks. Existing sparse-to-dense approaches typically employ shared context-independent queries across various input images, which fails to capture distinctions among them as the focal regions of different inputs vary and may result in undirected feature aggregation of cross-attention. Additionally, the absence of depth information may lead to points projected onto the image plane sharing the same 2D position or similar sampling points in the feature map, resulting in depth ambiguity. In this paper, we present a novel context and geometry aware voxel transformer. It utilizes a context aware query generator to initialize context-dependent queries tailored to individual input images, effectively capturing their unique characteristics and aggregating information within the region of interest. Furthermore, it extend deformable cross-attention from 2D to 3D pixel space, enabling the differentiation of points with similar image coordinates based on their depth coordinates. Building upon this module, we introduce a neural network named CGFormer to achieve semantic scene completion. Simultaneously, CGFormer leverages multiple 3D representations (i.e., voxel and TPV) to boost the semantic and geometric representation abilities of the transformed 3D volume from both local and global perspectives. Experimental results demonstrate that CGFormer achieves state-of-the-art performance on the SemanticKITTI and SSCBench-KITTI-360 benchmarks, attaining a mIoU of 16.87 and 20.05, as well as an IoU of 45.99 and 48.07, respectively. Remarkably, CGFormer even outperforms approaches employing temporal images as inputs or much larger image backbone networks.", "keywords": "Transformer;Semantic Scene Completion;Depth Estimation;Occupancy Prediction", "primary_area": "machine_vision", "supplementary_material": "/attachment/fc82f7c46ef0de1d3d3c8b12330ba3dfae8609e0.zip", "author": "Zhu Yu;Runmin Zhang;Jiacheng Ying;Junchen Yu;Xiaohai Hu;Lun Luo;Si-Yuan Cao;Hui-liang Shen", "authorids": "~Zhu_Yu2;~Runmin_Zhang1;~Jiacheng_Ying1;~Junchen_Yu1;~Xiaohai_Hu1;~Lun_Luo1;~Si-Yuan_Cao1;~Hui-liang_Shen1", "gender": ";M;M;M;;M;;M", "homepage": ";;;;;https://github.com/zjuluolun;;http://ivlab.org/", "dblp": ";342/8502;;;;189/3267;;97/6127.html", "google_scholar": ";l5pK7NwAAAAJ;3VzsfZEAAAAJ;;;G-yZREkAAAAJ;;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0002-8545-9966;0000-0002-8721-003X;0009-0004-1276-2114;;0000-0002-0531-9171;;0000-0001-8469-019X", "linkedin": ";;;;;;;", "or_profile": "~Zhu_Yu2;~Runmin_Zhang1;~Jiacheng_Ying1;~Junchen_Yu1;~Xiaohai_Hu1;~Lun_Luo1;~Si-Yuan_Cao1;~Hui-liang_Shen1", "aff": ";Zhejiang University;Zhejiang University;Zhejiang University;;HAOMO.AI;;Zhejiang University", "aff_domain": ";zju.edu.cn;zju.edu.cn;zju.edu.cn;;haomo.ai;;zju.edu.cn", "position": ";PhD student;PhD student;MS student;;Researcher;;Full Professor", "bibtex": "@inproceedings{\nyu2024context,\ntitle={Context and Geometry Aware Voxel Transformer for Semantic Scene Completion},\nauthor={Zhu Yu and Runmin Zhang and Jiacheng Ying and Junchen Yu and Xiaohai Hu and Lun Luo and Si-Yuan Cao and Hui-liang Shen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=9bu627mTfs}\n}", "github": "", "reviewers": "ryRH;jg8M;wDiN;Lm1y", "pdf_size": 10493932, "rating": "5;5;6;7", "confidence": "4;3;5;4", "soundness": "2;2;2;3", "novelty": "2;3;2;3", "presentation": "3;3;3;3", "wc_summary": "57;68;74;75", "wc_strengths": "40;68;104;50", "wc_weaknesses": "237;87;115;54", "wc_questions": "80;4;8;5", "wc_limitations": "9;36;45;47", "wc_review": "423;263;346;231", "wc_reply_reviewers": "158;0;84;40", "wc_reply_authors": "177;91;26;33", "reply_reviewers": "1;0;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 68.5, 7.158910531638177 ], "wc_strengths_avg": [ 65.5, 24.387496796514398 ], "wc_weaknesses_avg": [ 123.25, 69.13166785200542 ], "wc_questions_avg": [ 24.25, 32.220917119163445 ], "wc_limitations_avg": [ 34.25, 15.155444566227676 ], "wc_review_avg": [ 315.75, 74.80432808334021 ], "wc_reply_reviewers_avg": [ 70.5, 58.606740226700886 ], "wc_reply_authors_avg": [ 81.75, 60.50361559444196 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.42640143271122083, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4828097647973376906&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": ";zju.edu.cn;zju.edu.cn;zju.edu.cn;;haomo.ai;;zju.edu.cn", "author_num": 8, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Zhejiang University;HAOMO.AI", "aff_unique_dep": ";", "aff_unique_url": "https://www.zju.edu.cn;https://www.haomo.ai", "aff_unique_abbr": "ZJU;HAOMO.AI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "IPM-LSTM: A Learning-Based Interior Point Method for Solving Nonlinear Programs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96302", "id": "9c3IiAWeiN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=9c3IiAWeiN", "openreview": "https://openreview.net/forum?id=9c3IiAWeiN", "poster": "/media/PosterPDFs/NeurIPS%202024/96302.png?t=1731298802.7117746", "project": "", "author_site": "Xi Gao, Jinxin Xiong, Akang Wang, qihong duan, Jiang Xue, Qingjiang Shi", "tldr": "", "abstract": "Solving constrained nonlinear programs (NLPs) is of great importance in various domains such as power systems, robotics, and wireless communication networks. One widely used approach for addressing NLPs is the interior point method (IPM). The most computationally expensive procedure in IPMs is to solve systems of linear equations via matrix factorization. Recently, machine learning techniques have been adopted to expedite classic optimization algorithms. In this work, we propose using Long Short-Term Memory (LSTM) neural networks to approximate the solution of linear systems and integrate this approximating step into an IPM. The resulting approximate NLP solution is then utilized to warm-start an interior point solver. Experiments on various types of NLPs, including Quadratic Programs and Quadratically Constrained Quadratic Programs, show that our approach can significantly accelerate NLP solving, reducing iterations by up to 60% and solution time by up to 70% compared to the default solver.", "keywords": "learning to optimize;interior point method;nonlinear constrained programs", "primary_area": "optimization", "supplementary_material": "", "author": "Xi Gao;Jinxin Xiong;Akang Wang;Qihong Duan;Jiang Xue;Qingjiang Shi", "authorids": "~Xi_Gao1;~Jinxin_Xiong1;~Akang_Wang1;~Qihong_Duan2;~Jiang_Xue3;~Qingjiang_Shi1", "gender": "M;;Not Specified;M;M;M", "homepage": ";;https://akangw.github.io/;https://math.xjtu.edu.cn/info/1338/7590.htm;http://gr.xjtu.edu.cn/web/x.jiang;https://sse.tongji.edu.cn/info/1210/4497.htm", "dblp": ";;222/3290;;;63/1006", "google_scholar": "xFQkY1YAAAAJ;;TyYzzmoAAAAJ;;;8xoKeR0AAAAJ", "orcid": ";0009-0009-7724-1017;0000-0002-3325-8441;0000-0001-7549-0970;;", "linkedin": ";;wangakang/;;;", "or_profile": "~Xi_Gao1;~Jinxin_Xiong1;~Akang_Wang1;~Qihong_Duan2;~Jiang_Xue3;~Qingjiang_Shi1", "aff": "Xi'an Jiaotong University;The Chinese University of Hong Kong, Shenzhen;Shenzhen Research Institute of Big Data;Xi'an Jiaotong University;Xi'an Jiaotong University;Tongji University", "aff_domain": "xjtu.edu.cn;link.cuhk.edu.cn;sribd.cn;xjtu.edu.cn;xjtu.edu.cn;tongji.edu.cn", "position": "PhD student;PhD student;Researcher;Associate Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\ngao2024ipmlstm,\ntitle={{IPM}-{LSTM}: A Learning-Based Interior Point Method for Solving Nonlinear Programs},\nauthor={Xi Gao and Jinxin Xiong and Akang Wang and Qihong Duan and Jiang Xue and Qingjiang Shi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=9c3IiAWeiN}\n}", "github": "", "reviewers": "dSF8;Qdyo;9Qph", "pdf_size": 7451533, "rating": "5;5;7", "confidence": "4;4;5", "soundness": "2;3;2", "novelty": "2;3;3", "presentation": "2;3;4", "wc_summary": "109;128;47", "wc_strengths": "76;53;99", "wc_weaknesses": "321;163;223", "wc_questions": "99;4;3", "wc_limitations": "9;4;11", "wc_review": "614;352;383", "wc_reply_reviewers": "198;0;35", "wc_reply_authors": "712;0;22", "reply_reviewers": "2;0;1", "reply_authors": "3;1;2", "rating_avg": [ 5.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 94.66666666666667, 34.58644564308715 ], "wc_strengths_avg": [ 76.0, 18.7794213613377 ], "wc_weaknesses_avg": [ 235.66666666666666, 65.1221075280038 ], "wc_questions_avg": [ 35.333333333333336, 45.02098276236192 ], "wc_limitations_avg": [ 8.0, 2.943920288775949 ], "wc_review_avg": [ 449.6666666666667, 116.88836079306138 ], "wc_reply_reviewers_avg": [ 77.66666666666667, 86.27990624833932 ], "wc_reply_authors_avg": [ 244.66666666666666, 330.57660063726365 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.9999999999999997, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17062751114064925872&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 3, "email": "xjtu.edu.cn;link.cuhk.edu.cn;sribd.cn;xjtu.edu.cn;xjtu.edu.cn;tongji.edu.cn", "author_num": 6, "aff_unique_index": "0;1;2;0;0;3", "aff_unique_norm": "Xi'an Jiao Tong University;Chinese University of Hong Kong;Shenzhen Research Institute of Big Data;Tongji University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.xjtu.edu.cn;https://www.cuhk.edu.cn;http://www.sribd.cn;https://www.tongji.edu.cn", "aff_unique_abbr": "XJTU;CUHK;;Tongji", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "A Flexible, Equivariant Framework for Subgraph GNNs via Graph Products and Graph Coarsening", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96301", "id": "9cFyqhjEHC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=9cFyqhjEHC", "openreview": "https://openreview.net/forum?id=9cFyqhjEHC", "poster": "/media/PosterPDFs/NeurIPS%202024/96301.png?t=1733591584.397949", "project": "", "author_site": "Guy Bar-Shalom, Yam Eitan, Fabrizio Frasca, Haggai Maron", "tldr": "", "abstract": "Subgraph GNNs enhance message-passing GNNs expressivity by representing graphs as sets of subgraphs, demonstrating impressive performance across various tasks. However, their scalability is hindered by the need to process large numbers of subgraphs. While previous approaches attempted to generate smaller subsets of subgraphs through random or learnable sampling, these methods often yielded suboptimal selections or were limited to small subset sizes, ultimately compromising their effectiveness. This paper introduces a new Subgraph GNN framework to address these issues. \nOur approach diverges from most previous methods by associating subgraphs with node clusters rather than with individual nodes. We show that the resulting collection of subgraphs can be viewed as the product of coarsened and original graphs, unveiling a new connectivity structure on which we perform generalized message passing.\n\nCrucially, controlling the coarsening function enables meaningful selection of any number of subgraphs. In addition, we reveal novel permutation symmetries in the resulting node feature tensor, characterize associated linear equivariant layers, and integrate them into our Subgraph GNN. We also introduce novel node marking strategies and provide a theoretical analysis of their expressive power and other key aspects of our approach. Extensive experiments on multiple graph learning benchmarks demonstrate that our method is significantly more flexible than previous approaches, as it can seamlessly handle any number of subgraphs, while consistently outperforming baseline approaches. \nOur code is available at https://github.com/BarSGuy/Efficient-Subgraph-GNNs.", "keywords": "Equivariance;Subgraph GNNs;Graph Products", "primary_area": "graph_neural_networks", "supplementary_material": "/attachment/167e6987300a2a9a489f9852e6bcc638e8895691.zip", "author": "Guy Bar-Shalom;Yam Eitan;Fabrizio Frasca;Haggai Maron", "authorids": "~Guy_Bar-Shalom1;~Yam_Eitan1;~Fabrizio_Frasca1;~Haggai_Maron1", "gender": "M;M;M;M", "homepage": "https://barsguy.github.io/;;https://noired.github.io;https://haggaim.github.io/", "dblp": "321/1651;379/9962;228/1840;181/6629", "google_scholar": "9Zvzm5MAAAAJ;INF2QpcAAAAJ;PT2CDA4AAAAJ;https://scholar.google.co.il/citations?user=4v8uJrIAAAAJ", "orcid": ";;0000-0002-5165-1394;", "linkedin": ";yam-eitan-907414204/;;", "or_profile": "~Guy_Bar-Shalom1;~Yam_Eitan1;~Fabrizio_Frasca1;~Haggai_Maron1", "aff": "Technion, Technion;Technion - Israel Institute of Technology, Technion;Technion - Israel Institute of Technology, Technion - Israel Institute of Technology;NVIDIA", "aff_domain": "technion.ac.il;technion.ac.il;campus.technion.ac.il;nvidia.com", "position": "PhD student;PhD student;Postdoc;Research Scientist", "bibtex": "@inproceedings{\nbar-shalom2024a,\ntitle={A Flexible, Equivariant Framework for Subgraph {GNN}s via Graph Products and Graph Coarsening},\nauthor={Guy Bar-Shalom and Yam Eitan and Fabrizio Frasca and Haggai Maron},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=9cFyqhjEHC}\n}", "github": "", "reviewers": "hoHT;h2GK;DujM", "pdf_size": 2472040, "rating": "2;6;7", "confidence": "3;4;2", "soundness": "1;3;3", "novelty": "2;2;3", "presentation": "4;1;3", "wc_summary": "41;95;58", "wc_strengths": "46;97;41", "wc_weaknesses": "155;517;44", "wc_questions": "67;35;76", "wc_limitations": "20;11;18", "wc_review": "329;755;237", "wc_reply_reviewers": "274;22;9", "wc_reply_authors": "362;0;0", "reply_reviewers": "1;1;1", "reply_authors": "3;1;1", "rating_avg": [ 5.0, 2.160246899469287 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 2.3333333333333335, 0.9428090415820634 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 1.247219128924647 ], "wc_summary_avg": [ 64.66666666666667, 22.543784558547888 ], "wc_strengths_avg": [ 61.333333333333336, 25.302612952464457 ], "wc_weaknesses_avg": [ 238.66666666666666, 201.96094231861323 ], "wc_questions_avg": [ 59.333333333333336, 17.594190960528863 ], "wc_limitations_avg": [ 16.333333333333332, 3.858612300930075 ], "wc_review_avg": [ 440.3333333333333, 225.65066412980536 ], "wc_reply_reviewers_avg": [ 101.66666666666667, 121.97358548290508 ], "wc_reply_authors_avg": [ 120.66666666666667, 170.64843652635346 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.1889822365046136, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14753925299689448824&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "technion.ac.il;technion.ac.il;campus.technion.ac.il;nvidia.com", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Technion - Israel Institute of Technology;NVIDIA", "aff_unique_dep": ";NVIDIA Corporation", "aff_unique_url": "https://www.technion.ac.il/en/;https://www.nvidia.com", "aff_unique_abbr": "Technion;NVIDIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "Israel;United States" }, { "title": "A Bayesian Approach to Data Point Selection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96300", "id": "9f5tOXKoMC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=9f5tOXKoMC", "openreview": "https://openreview.net/forum?id=9f5tOXKoMC", "poster": "", "project": "", "author_site": "XINNUO XU, Minyoung Kim, Royson Lee, Brais Martinez, Timothy Hospedales", "tldr": "", "abstract": "Data point selection (DPS) is becoming a critical topic in deep learning due to the ease of acquiring uncurated training data compared to the difficulty of obtaining curated or processed data. \nExisting approaches to DPS are predominantly based on a bi-level optimisation (BLO) formulation, which is demanding in terms of memory and computation, and exhibits some theoretical defects regarding minibatches.\nThus, we propose a novel Bayesian approach to DPS. We view the DPS problem as posterior inference in a novel Bayesian model where the posterior distributions of the instance-wise weights and the main neural network parameters are inferred under a reasonable prior and likelihood model.\nWe employ stochastic gradient Langevin MCMC sampling to learn the main network and instance-wise weights jointly, ensuring convergence even with minibatches. Our update equation is comparable to the widely used SGD and much more efficient than existing BLO-based methods. Through controlled experiments in both the vision and language domains, we present the proof-of-concept. Additionally, we demonstrate that our method scales effectively to large language models and facilitates automated per-task optimization for instruction fine-tuning datasets.", "keywords": "Bayesian;LLM;Data selection;Data unbalancing;Data denoising;Domain adaptation", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Xinnuo Xu;Minyoung Kim;Royson Lee;Brais Martinez;Timothy Hospedales", "authorids": "~Xinnuo_Xu1;~Minyoung_Kim2;~Royson_Lee1;~Brais_Martinez3;~Timothy_Hospedales1", "gender": "F;M;;M;M", "homepage": ";https://sites.google.com/site/mikim21/;;http://www.braismartinez.org/;http://homepages.inf.ed.ac.uk/thospeda/", "dblp": "211/7908;;247/5940;14/111;32/3545", "google_scholar": "osgiI-AAAAAJ;;qNu3yNoAAAAJ;https://scholar.google.co.uk/citations?user=-62MApgAAAAJ;https://scholar.google.fr/citations?user=nHhtvqkAAAAJ", "orcid": ";;;;0000-0003-4867-7486", "linkedin": ";;royson-lee-025a09169/;;timothyhospedales/", "or_profile": "~Xinnuo_Xu1;~Minyoung_Kim2;~Royson_Lee1;~Brais_Martinez3;~Timothy_Hospedales1", "aff": "Samsung;;Samsung AI Center, Cambridge;Samsung;Samsung AI Research Centre", "aff_domain": "samsung.com;;samsung.com;samsung.com;samsung.com", "position": "Researcher;;Research Engineer;Samsung AI Center;Principal Researcher", "bibtex": "@inproceedings{\nxu2024a,\ntitle={A Bayesian Approach to Data Point Selection},\nauthor={Xinnuo Xu and Minyoung Kim and Royson Lee and Brais Martinez and Timothy Hospedales},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=9f5tOXKoMC}\n}", "github": "", "reviewers": "oLJs;5MKv;JeEz", "pdf_size": 28470353, "rating": "6;7;7", "confidence": "4;3;4", "soundness": "3;2;3", "novelty": "3;4;2", "presentation": "3;3;3", "wc_summary": "69;310;101", "wc_strengths": "37;72;84", "wc_weaknesses": "67;315;199", "wc_questions": "4;155;45", "wc_limitations": "5;9;11", "wc_review": "182;861;440", "wc_reply_reviewers": "0;255;41", "wc_reply_authors": "46;455;27", "reply_reviewers": "0;2;1", "reply_authors": "2;3;2", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 160.0, 106.86751923136733 ], "wc_strengths_avg": [ 64.33333333333333, 19.93879523831757 ], "wc_weaknesses_avg": [ 193.66666666666666, 101.31578795473531 ], "wc_questions_avg": [ 68.0, 63.75473838599502 ], "wc_limitations_avg": [ 8.333333333333334, 2.494438257849294 ], "wc_review_avg": [ 494.3333333333333, 279.85035683776107 ], "wc_reply_reviewers_avg": [ 98.66666666666667, 111.80439267856259 ], "wc_reply_authors_avg": [ 176.0, 197.43522144406418 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:_QAR8GGYkSYJ:scholar.google.com/&scioq=A+Bayesian+Approach+to+Data+Point+Selection&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": "samsung.com;;samsung.com;samsung.com;samsung.com", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Samsung", "aff_unique_dep": "Samsung", "aff_unique_url": "https://www.samsung.com", "aff_unique_abbr": "Samsung", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "South Korea;United Kingdom" }, { "title": "Knowledge-Empowered Dynamic Graph Network for Irregularly Sampled Medical Time Series", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96299", "id": "9hCn01VAdC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=9hCn01VAdC", "openreview": "https://openreview.net/forum?id=9hCn01VAdC", "poster": "/media/PosterPDFs/NeurIPS%202024/96299.png?t=1730302105.6781976", "project": "", "author_site": "Yicheng Luo, Zhen Liu, Linghao Wang, Binquan Wu, Junhao Zheng, Qianli Ma", "tldr": "", "abstract": "Irregularly Sampled Medical Time Series (ISMTS) are commonly found in the healthcare domain, where different variables exhibit unique temporal patterns while interrelated. However, many existing methods fail to efficiently consider the differences and correlations among medical variables together, leading to inadequate capture of fine-grained features at the variable level in ISMTS. We propose Knowledge-Empowered Dynamic Graph Network (KEDGN), a graph neural network empowered by variables' textual medical knowledge, aiming to model variable-specific temporal dependencies and inter-variable dependencies in ISMTS. Specifically, we leverage a pre-trained language model to extract semantic representations for each variable from their textual descriptions of medical properties, forming an overall semantic view among variables from a medical perspective. Based on this, we allocate variable-specific parameter spaces to capture variable-specific temporal patterns and generate a complete variable graph to measure medical correlations among variables. Additionally, we employ a density-aware mechanism to dynamically adjust the variable graph at different timestamps, adapting to the time-varying correlations among variables in ISMTS. The variable-specific parameter spaces and dynamic graphs are injected into the graph convolutional recurrent network to capture intra-variable and inter-variable dependencies in ISMTS together. Experiment results on four healthcare datasets demonstrate that KEDGN significantly outperforms existing methods.", "keywords": "healthcare;irregularly sampled medical time series;graph neural network;recurrent neural network", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "/attachment/5df5d13eab528ce490d650c57855cc6c515cda91.zip", "author": "Yicheng Luo;Zhen Liu;Linghao Wang;Binquan Wu;Junhao Zheng;Qianli Ma", "authorids": "~Yicheng_Luo2;~Zhen_Liu5;~Linghao_Wang1;~Binquan_Wu1;~Junhao_Zheng3;~Qianli_Ma3", "gender": ";M;M;M;M;M", "homepage": "https://scholar.google.com/citations?user=zKB1QXQAAAAJ&hl=zh-CN;https://zliu21.github.io/;https://waterking.top/;https://github.com/zzz47zzz;http://www2.scut.edu.cn/qianlima;https://github.com/KNwbq", "dblp": ";77/35-23;;37/3126;57/8221-1;", "google_scholar": "zKB1QXQAAAAJ;https://scholar.google.com.hk/citations?user=M5qB8dsAAAAJ;;eyh-5tkAAAAJ;https://scholar.google.com/citations?hl=en;", "orcid": ";0000-0002-8107-0929;;0000-0001-9124-2467;0000-0002-9356-2883;", "linkedin": ";;;;;", "or_profile": "~Yicheng_Luo2;~Zhen_Liu5;~Linghao_Wang1;~Junhao_Zheng3;~Qianli_Ma3;~Wu_Binquan2", "aff": "South China University of Technology;South China University of Technology;;South China University of Technology;South China University of Technology;South China University of Technology", "aff_domain": "scut.edu.cn;scut.edu.cn;;scut.edu.cn;scut.edu.cn;scut.edu.cn", "position": "MS student;PhD student;;PhD student;Full Professor;PhD student", "bibtex": "@inproceedings{\nluo2024knowledgeempowered,\ntitle={Knowledge-Empowered Dynamic Graph Network for Irregularly Sampled Medical Time Series},\nauthor={Yicheng Luo and Zhen Liu and Linghao Wang and Binquan Wu and Junhao Zheng and Qianli Ma},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=9hCn01VAdC}\n}", "github": "", "reviewers": "vHK1;n8KK;68iN;yFgU", "pdf_size": 1373805, "rating": "5;6;6;6", "confidence": "5;4;5;3", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "72;65;22;169", "wc_strengths": "76;86;22;47", "wc_weaknesses": "278;308;124;22", "wc_questions": "262;57;2;102", "wc_limitations": "18;1;1;10", "wc_review": "706;517;171;350", "wc_reply_reviewers": "0;20;84;0", "wc_reply_authors": "262;0;0;0", "reply_reviewers": "0;1;1;0", "reply_authors": "3;1;1;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 82.0, 53.754069613379045 ], "wc_strengths_avg": [ 57.75, 25.1234452255259 ], "wc_weaknesses_avg": [ 183.0, 116.24543001770004 ], "wc_questions_avg": [ 105.75, 96.91329888101014 ], "wc_limitations_avg": [ 7.5, 7.088723439378913 ], "wc_review_avg": [ 436.0, 198.1678581405168 ], "wc_reply_reviewers_avg": [ 26.0, 34.46737587922817 ], "wc_reply_authors_avg": [ 65.5, 113.44932789576146 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11169995892007849725&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "scut.edu.cn;scut.edu.cn;;scut.edu.cn;scut.edu.cn;scut.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "South China University of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.scut.edu.cn", "aff_unique_abbr": "SCUT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Exploring the Edges of Latent State Clusters for Goal-Conditioned Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96298", "id": "9hKN99RNdR", "proceeding": "", "pdf": "https://openreview.net/pdf?id=9hKN99RNdR", "openreview": "https://openreview.net/forum?id=9hKN99RNdR", "poster": "/media/PosterPDFs/NeurIPS%202024/96298.png?t=1731695342.3288157", "project": "", "author_site": "Yuanlin Duan, Guofeng Cui, He Zhu", "tldr": "", "abstract": "Exploring unknown environments efficiently is a fundamental challenge in unsupervised goal-conditioned reinforcement learning. While selecting exploratory goals at the frontier of previously explored states is an effective strategy, the policy during training may still have limited capability of reaching rare goals on the frontier, resulting in reduced exploratory behavior. We propose \"Cluster Edge Exploration\" (CE$^2$), a new goal-directed exploration algorithm that when choosing goals in sparsely explored areas of the state space gives priority to goal states that remain accessible to the agent. The key idea is clustering to group states that are easily reachable from one another by the current policy under training in a latent space, and traversing to states holding significant exploration potential on the boundary of these clusters before doing exploratory behavior. In challenging robotics environments including navigating a maze with a multi-legged ant robot, manipulating objects with a robot arm on a cluttered tabletop, and rotating objects in the palm of an anthropomorphic robotic hand, CE$^2$ demonstrates superior efficiency in exploration compared to baseline methods and ablations.", "keywords": "Reinforcement Learning;Goal-Conditioned Reinforcement Learning;Model-Based Reinforcement Learning;Exploration Strategies;World Models", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Yuanlin Duan;Guofeng Cui;He Zhu", "authorids": "~Yuanlin_Duan2;~Guofeng_Cui1;~He_Zhu4", "gender": "M;M;M", "homepage": "https://achlin512.github.io/;;https://herowanzhu.github.io", "dblp": ";218/1163.html;59/2802-1", "google_scholar": ";;3X9GC2gAAAAJ", "orcid": ";;", "linkedin": ";guofeng-cui-56b729197/;", "or_profile": "~Yuanlin_Duan2;~Guofeng_Cui1;~He_Zhu4", "aff": "Rutgers University;Rutgers University;Rutgers University", "aff_domain": "rutgers.edu;rutgers.edu;rutgers.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nduan2024exploring,\ntitle={Exploring the Edges of Latent State Clusters for Goal-Conditioned Reinforcement Learning},\nauthor={Yuanlin Duan and Guofeng Cui and He Zhu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=9hKN99RNdR}\n}", "github": "", "reviewers": "7wGc;ZPaX;NgsZ;WfSq;5zDz", "pdf_size": 4536973, "rating": "5;6;6;7;7", "confidence": "4;3;4;3;3", "soundness": "3;3;3;3;3", "novelty": "2;3;3;3;3", "presentation": "2;3;3;3;3", "wc_summary": "80;88;208;63;65", "wc_strengths": "135;40;82;26;31", "wc_weaknesses": "171;28;66;59;43", "wc_questions": "102;101;49;85;15", "wc_limitations": "14;9;8;32;13", "wc_review": "502;266;413;265;167", "wc_reply_reviewers": "177;12;44;78;12", "wc_reply_authors": "366;0;0;50;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;1;1;2;1", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 100.8, 54.403676346364676 ], "wc_strengths_avg": [ 62.8, 41.150455647538095 ], "wc_weaknesses_avg": [ 73.4, 50.543446657306625 ], "wc_questions_avg": [ 70.4, 33.6903547027929 ], "wc_limitations_avg": [ 15.2, 8.704022058795577 ], "wc_review_avg": [ 322.6, 119.23858435925848 ], "wc_reply_reviewers_avg": [ 64.6, 61.27185324437315 ], "wc_reply_authors_avg": [ 83.2, 142.71986547078865 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.7637626158259733, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:E5wVHb-wGFQJ:scholar.google.com/&scioq=Exploring+the+Edges+of+Latent+State+Clusters+for+Goal-Conditioned+Reinforcement+Learning&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": "rutgers.edu;rutgers.edu;rutgers.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Rutgers University", "aff_unique_dep": "", "aff_unique_url": "https://www.rutgers.edu", "aff_unique_abbr": "Rutgers", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Connectivity Shapes Implicit Regularization in Matrix Factorization Models for Matrix Completion", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96297", "id": "9jgODkdH0F", "proceeding": "", "pdf": "https://openreview.net/pdf?id=9jgODkdH0F", "openreview": "https://openreview.net/forum?id=9jgODkdH0F", "poster": "/media/PosterPDFs/NeurIPS%202024/96297.png?t=1729053729.1274934", "project": "", "author_site": "Zhiwei Bai, Jiajie Zhao, Yaoyu Zhang", "tldr": "", "abstract": "Matrix factorization models have been extensively studied as a valuable test-bed for understanding the implicit biases of overparameterized models. Although both low nuclear norm and low rank regularization have been studied for these models, a unified understanding of when, how, and why they achieve different implicit regularization effects remains elusive. In this work, we systematically investigate the implicit regularization of matrix factorization for solving matrix completion problems. We empirically discover that the connectivity of observed data plays a key role in the implicit bias, with a transition from low nuclear norm to low rank as data shifts from disconnected to connected with increased observations. We identify a hierarchy of intrinsic invariant manifolds in the loss landscape that guide the training trajectory to evolve from low-rank to higher-rank solutions. Based on this finding, we theoretically characterize the training trajectory as following the hierarchical invariant manifold traversal process, generalizing the characterization of Li et al.(2020) to include the disconnected case. Furthermore, we establish conditions that guarantee minimum nuclear norm, closely aligning with our experimental findings, and we provide a dynamics characterization condition for ensuring minimum rank. Our work reveals the intricate interplay between data connectivity, training dynamics, and implicit regularization in matrix factorization models.", "keywords": "matrix completion; implicit regularization; training dynamics; low nuclear norm; low rank;", "primary_area": "learning_theory", "supplementary_material": "/attachment/d03138a24db448c6e6bf31313a31bbcb104e670c.zip", "author": "Zhiwei Bai;Jiajie Zhao;Yaoyu Zhang", "authorids": "~Zhiwei_Bai1;~Jiajie_Zhao1;~Yaoyu_Zhang1", "gender": "M;M;", "homepage": "https://zhiweibai.github.io/;https://zjjsjtu.github.io/Zhao-Jiajie.github.io/;https://ins.sjtu.edu.cn/peoples/zhangyaoyu", "dblp": "286/1647;;", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Zhiwei_Bai1;~Jiajie_Zhao1;~Yaoyu_Zhang1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "position": "PhD student;Undergrad student;Associate Professor", "bibtex": "@inproceedings{\nbai2024connectivity,\ntitle={Connectivity Shapes Implicit Regularization in Matrix Factorization Models for Matrix Completion},\nauthor={Zhiwei Bai and Jiajie Zhao and Yaoyu Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=9jgODkdH0F}\n}", "github": "", "reviewers": "1kCy;PxmR;FMau;CGEg;AjkV", "pdf_size": 6213974, "rating": "6;6;6;6;9", "confidence": "3;2;3;3;5", "soundness": "3;3;3;3;4", "novelty": "2;2;3;3;4", "presentation": "4;3;3;3;4", "wc_summary": "125;42;98;145;321", "wc_strengths": "53;32;59;87;185", "wc_weaknesses": "35;5;75;72;39", "wc_questions": "134;112;49;1;293", "wc_limitations": "12;1;10;1;20", "wc_review": "359;192;291;306;858", "wc_reply_reviewers": "63;16;36;34;15", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.6, 1.2 ], "confidence_avg": [ 3.2, 0.9797958971132712 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 146.2, 94.00723376421625 ], "wc_strengths_avg": [ 83.2, 53.84570549263887 ], "wc_weaknesses_avg": [ 45.2, 25.941472587345537 ], "wc_questions_avg": [ 117.8, 99.3647824935978 ], "wc_limitations_avg": [ 8.8, 7.19444229944198 ], "wc_review_avg": [ 401.2, 234.69929697380857 ], "wc_reply_reviewers_avg": [ 32.8, 17.451647486698786 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.9185586535436918, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:iKZVUvbjuXcJ:scholar.google.com/&scioq=Connectivity+Shapes+Implicit+Regularization+in+Matrix+Factorization+Models+for+Matrix+Completion&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Score-based 3D molecule generation with neural fields", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96296", "id": "9lGJrkqJUw", "proceeding": "", "pdf": "https://openreview.net/pdf?id=9lGJrkqJUw", "openreview": "https://openreview.net/forum?id=9lGJrkqJUw", "poster": "/media/PosterPDFs/NeurIPS%202024/96296.png?t=1733429256.890376", "project": "", "author_site": "Matthieu Kirchmeyer, Pedro O. Pinheiro, Saeed Saremi", "tldr": "", "abstract": "We introduce a new representation for 3D molecules based on their continuous atomic density fields. Using this representation, we propose a new model based on walk-jump sampling for unconditional 3D molecule generation in the continuous space using neural fields. Our model, FuncMol, encodes molecular fields into latent codes using a conditional neural field, samples noisy codes from a Gaussian-smoothed distribution with Langevin MCMC (walk), denoises these samples in a single step (jump), and finally decodes them into molecular fields. FuncMol performs all-atom generation of 3D molecules without assumptions on the molecular structure and scales well with the size of molecules, unlike most approaches. Our method achieves competitive results on drug-like molecules and easily scales to macro-cyclic peptides, with at least one order of magnitude faster sampling. The code is available at https://github.com/prescient-design/funcmol.", "keywords": "score-based generative model;neural fields;3D molecule", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "", "author": "Matthieu Kirchmeyer;Pedro O. Pinheiro;Saeed Saremi", "authorids": "~Matthieu_Kirchmeyer1;~Pedro_O._Pinheiro1;~Saeed_Saremi1", "gender": ";M;M", "homepage": "https://mkirchmeyer.github.io;;https://saeedsaremi.github.io/", "dblp": "241/9725;223/9937;128/2619", "google_scholar": "oJkKtrkAAAAJ;https://scholar.google.ca/citations?user=BU6f7L4AAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Matthieu_Kirchmeyer1;~Pedro_O._Pinheiro1;~Saeed_Saremi1", "aff": "Genentech;Prescient Design, Genentech;Genentech", "aff_domain": "gene.com;gene.com;gene.com", "position": "Researcher;Researcher;Senior Principal Research Scientist", "bibtex": "@inproceedings{\nkirchmeyer2024scorebased,\ntitle={Score-based 3D molecule generation with neural fields},\nauthor={Matthieu Kirchmeyer and Pedro O. Pinheiro and Saeed Saremi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=9lGJrkqJUw}\n}", "github": "", "reviewers": "NVMn;eB4P;R2Qo;yg8b", "pdf_size": 35373161, "rating": "6;7;7;7", "confidence": "4;3;4;4", "soundness": "3;3;3;4", "novelty": "3;2;3;3", "presentation": "3;3;3;4", "wc_summary": "99;53;80;56", "wc_strengths": "53;85;88;53", "wc_weaknesses": "52;125;241;12", "wc_questions": "364;73;245;145", "wc_limitations": "25;2;17;7", "wc_review": "593;338;671;273", "wc_reply_reviewers": "64;4;36;10", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 72.0, 18.774983355518586 ], "wc_strengths_avg": [ 69.75, 16.78354849249705 ], "wc_weaknesses_avg": [ 107.5, 87.07611612836209 ], "wc_questions_avg": [ 206.75, 109.42206130392536 ], "wc_limitations_avg": [ 12.75, 8.898735865278843 ], "wc_review_avg": [ 468.75, 167.15019443602213 ], "wc_reply_reviewers_avg": [ 28.5, 23.76446927663229 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4995202582500880659&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "gene.com;gene.com;gene.com", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Genentech", "aff_unique_dep": "", "aff_unique_url": "https://www.genentech.com", "aff_unique_abbr": "Genentech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "RL on Incorrect Synthetic Data Scales the Efficiency of LLM Math Reasoning by Eight-Fold", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96295", "id": "9m87e9Keq1", "proceeding": "", "pdf": "https://openreview.net/pdf?id=9m87e9Keq1", "openreview": "https://openreview.net/forum?id=9m87e9Keq1", "poster": "", "project": "", "author_site": "Amrith Setlur, Saurabh Garg, Xinyang Geng, Naman Garg, Virginia Smith, Aviral Kumar", "tldr": "", "abstract": "Training on model-generated synthetic data is a promising approach for finetuning LLMs, but it remains unclear when it helps or hurts. In this paper, we investigate this question for math reasoning via an empirical study, followed by building a conceptual understanding of our observations. First, we find that while the typical approach of finetuning a model on synthetic correct or positive problem-solution pairs generated by capable models offers modest performance gains, sampling more correct solutions from the finetuned learner itself followed by subsequent fine-tuning on this self-generated data doubles the efficiency of the same synthetic problems. At the same time, training on model-generated positives can amplify various spurious correlations, resulting in flat or even inverse scaling trends as the amount of data increases. Surprisingly, we find that several of these issues can be addressed if we also utilize negative responses, i.e., model-generated responses that are deemed incorrect by a final answer verifier. Crucially, these negatives must be constructed such that the training can appropriately recover the utility or advantage of each intermediate step in the negative response. With this per-step scheme, we are able to attain consistent gains over only positive data, attaining performance similar to amplifying the amount of synthetic data by $\\mathbf{8 \\times}$. We show that training on per-step negatives can help to unlearn spurious correlations in the positive data, and is equivalent to advantage-weighted reinforcement learning (RL), implying that it inherits robustness benefits of RL over imitating positive data alone.", "keywords": "Synthetic data;math reasoning;reinforcement learning;large language models", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/34ee10614bedaefe931b15a30892f17fa3989384.zip", "author": "Amrith Setlur;Saurabh Garg;Xinyang Geng;Naman Garg;Virginia Smith;Aviral Kumar", "authorids": "~Amrith_Setlur1;~Saurabh_Garg3;~Xinyang_Geng1;~Naman_Garg1;~Virginia_Smith1;~Aviral_Kumar2", "gender": "M;M;M;M;F;M", "homepage": "http://ars22.github.io;http://saurabhgarg1996.github.io/;http://young-geng.xyz/;https://naman-garg.com;;https://aviralkumar2907.github.io/", "dblp": "https://dblp.uni-trier.de/pers/hd/s/Setlur:Amrith;80/208;186/8221;;120/0921;202/7961", "google_scholar": "https://scholar.google.ru/citations?user=i7V1kJgAAAAJ;SAnJ1hIAAAAJ;vYougn0AAAAJ;;;", "orcid": "0000-0002-7061-3094;;;;;", "linkedin": ";saurabh-garg-b680b5b8/;;namangarg20/;;", "or_profile": "~Amrith_Setlur1;~Saurabh_Garg3;~Xinyang_Geng1;~Naman_Garg1;~Virginia_Smith1;~Aviral_Kumar2", "aff": "Carnegie Mellon University;Carnegie Mellon University;Google;Multion, Inc;Carnegie Mellon University;Google DeepMind", "aff_domain": "cmu.edu;cmu.edu;google.com;multion.ai;cmu.edu;google.com", "position": "PhD student;PhD student;Researcher;Researcher;Associate Professor;Researcher", "bibtex": "@inproceedings{\nsetlur2024rl,\ntitle={{RL} on Incorrect Synthetic Data Scales the Efficiency of {LLM} Math Reasoning by Eight-Fold},\nauthor={Amrith Setlur and Saurabh Garg and Xinyang Geng and Naman Garg and Virginia Smith and Aviral Kumar},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=9m87e9Keq1}\n}", "github": "", "reviewers": "e455;jvk2;8A1v;iuJa", "pdf_size": 1014821, "rating": "4;5;6;8", "confidence": "3;4;4;3", "soundness": "2;2;3;3", "novelty": "2;3;3;4", "presentation": "2;2;3;3", "wc_summary": "165;81;61;54", "wc_strengths": "68;37;26;155", "wc_weaknesses": "133;693;374;243", "wc_questions": "1;4;188;74", "wc_limitations": "1;6;3;40", "wc_review": "368;821;652;566", "wc_reply_reviewers": "0;36;96;68", "wc_reply_authors": "46;70;839;32", "reply_reviewers": "0;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 1.479019945774904 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 90.25, 44.27965108263614 ], "wc_strengths_avg": [ 71.5, 50.608793702280636 ], "wc_weaknesses_avg": [ 360.75, 209.9409143068592 ], "wc_questions_avg": [ 66.75, 75.85306519844798 ], "wc_limitations_avg": [ 12.5, 15.976545308670458 ], "wc_review_avg": [ 601.75, 163.1814557478882 ], "wc_reply_reviewers_avg": [ 50.0, 35.832945734337834 ], "wc_reply_authors_avg": [ 246.75, 342.20562166627246 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.16903085094570333, "gs_citation": 41, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9694282267840550669&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "cmu.edu;cmu.edu;google.com;multion.ai;cmu.edu;google.com", "author_num": 6, "aff_unique_index": "0;0;1;2;0;1", "aff_unique_norm": "Carnegie Mellon University;Google;Multion, Inc", "aff_unique_dep": ";Google;", "aff_unique_url": "https://www.cmu.edu;https://www.google.com;", "aff_unique_abbr": "CMU;Google;", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;0;1", "aff_country_unique": "United States;United Kingdom" }, { "title": "Disentangling the Roles of Distinct Cell Classes with Cell-Type Dynamical Systems", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96294", "id": "9sP4oejtjB", "proceeding": "", "pdf": "https://openreview.net/pdf?id=9sP4oejtjB", "openreview": "https://openreview.net/forum?id=9sP4oejtjB", "poster": "", "project": "", "author_site": "Aditi Jha, Diksha Gupta, Carlos Brody, Jonathan Pillow", "tldr": "", "abstract": "Latent dynamical systems have been widely used to characterize the dynamics of neural population activity in the brain. However, these models typically ignore the fact that the brain contains multiple cell types. This limits their ability to capture the functional roles of distinct cell classes, and to predict the effects of cell-specific perturbations on neural activity or behavior. To overcome these limitations, we introduce the `\"cell-type dynamical systems\" (CTDS) model. This model extends latent linear dynamical systems to contain distinct latent variables for each cell class, with biologically inspired constraints on both dynamics and emissions. To illustrate our approach, we consider neural recordings with distinct excitatory (E) and inhibitory (I) populations. \n\nThe CTDS model defines separate latents for both cell types, and constrains the dynamics so that E (I) latents have a strictly positive (negative) effects on other latents. We applied CTDS to recordings from rat frontal orienting fields (FOF) and anterior dorsal striatum (ADS) during an auditory decision-making task. The model achieved higher accuracy than a standard linear dynamical system (LDS), and revealed that the animal's choice can be decoded from both E and I latents and thus is not restricted to a single cell-class. We also performed in-silico optogenetic perturbation experiments in the FOF and ADS, and found that CTDS was able to replicate the experimentally observed effects of different perturbations on behavior, whereas a standard LDS model---which does not differentiate between cell types---did not. Crucially, our model allowed us to understand the effects of these perturbations by revealing the dynamics of different cell-specific latents. Finally, CTDS can also be used to identify cell types for neurons whose class labels are unknown in electrophysiological recordings. These results illustrate the power of the CTDS model to provide more accurate and more biologically interpretable descriptions of neural population dynamics and their relationship to behavior.", "keywords": "neuroscience;neural dynamics;animal decision making", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "", "author": "Aditi Jha;Diksha Gupta;Carlos D Brody;Jonathan W. Pillow", "authorids": "~Aditi_Jha1;~Diksha_Gupta1;~Carlos_D_Brody1;~Jonathan_W._Pillow1", "gender": "F;F;M;Not Specified", "homepage": "https://aditijha7.com;;http://brodylab.org;http://pillowlab.princeton.edu/", "dblp": "249/7171;;;06/3460", "google_scholar": ";https://scholar.google.co.in/citations?user=IT-v1rUAAAAJ;;https://scholar.google.com.tw/citations?user=-ElvJ9wAAAAJ", "orcid": ";;0000-0002-4201-561X;0000-0002-3638-8831", "linkedin": ";;;", "or_profile": "~Aditi_Jha1;~Diksha_Gupta1;~Carlos_D_Brody1;~Jonathan_W._Pillow1", "aff": "Princeton University;University College London, University of London;Princeton University;Princeton University", "aff_domain": "princeton.edu;ucl.ac.uk;princeton.edu;princeton.edu", "position": "PhD student;Postdoc;Full Professor;Professor", "bibtex": "@inproceedings{\njha2024disentangling,\ntitle={Disentangling the Roles of Distinct Cell Classes with Cell-Type Dynamical Systems},\nauthor={Aditi Jha and Diksha Gupta and Carlos D Brody and Jonathan W. Pillow},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=9sP4oejtjB}\n}", "github": "", "reviewers": "upoR;KBww;yzj8", "pdf_size": 3375257, "rating": "6;6;7", "confidence": "2;4;4", "soundness": "3;3;4", "novelty": "3;3;3", "presentation": "3;3;4", "wc_summary": "37;85;152", "wc_strengths": "72;101;154", "wc_weaknesses": "56;125;242", "wc_questions": "137;30;2", "wc_limitations": "2;5;13", "wc_review": "304;346;563", "wc_reply_reviewers": "15;22;193", "wc_reply_authors": "11;6;73", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 91.33333333333333, 47.16166051171462 ], "wc_strengths_avg": [ 109.0, 33.95094500402996 ], "wc_weaknesses_avg": [ 141.0, 76.77239087067694 ], "wc_questions_avg": [ 56.333333333333336, 58.17406829698454 ], "wc_limitations_avg": [ 6.666666666666667, 4.642796092394707 ], "wc_review_avg": [ 404.3333333333333, 113.49694073214289 ], "wc_reply_reviewers_avg": [ 76.66666666666667, 82.3097132110718 ], "wc_reply_authors_avg": [ 30.0, 30.474032661705056 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12521566698394494038&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "princeton.edu;ucl.ac.uk;princeton.edu;princeton.edu", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Princeton University;University College London", "aff_unique_dep": ";", "aff_unique_url": "https://www.princeton.edu;https://www.ucl.ac.uk", "aff_unique_abbr": "Princeton;UCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "HEMM: Holistic Evaluation of Multimodal Foundation Models", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97825", "id": "9tVn4f8aJO", "proceeding": "", "pdf": "https://openreview.net/pdf?id=9tVn4f8aJO", "openreview": "https://openreview.net/forum?id=9tVn4f8aJO", "poster": "", "project": "", "author_site": "Paul Pu Liang, Akshay Goindani, Talha Chafekar, Leena Mathur, Haofei Yu, Ruslan Salakhutdinov, Louis-Philippe Morency", "tldr": "", "abstract": "Multimodal foundation models that can holistically process text alongside images, video, audio, and other sensory modalities are increasingly used in a variety of real-world applications. However, it is challenging to characterize and study progress in multimodal foundation models, given the range of possible modeling decisions, tasks, and domains. In this paper, we introduce Holistic Evaluation of Multimodal Models (HEMM) to systematically evaluate the capabilities of multimodal foundation models across a set of 3 dimensions: basic skills, information flow, and real-world use cases. Basic multimodal skills are internal abilities required to solve problems, such as learning interactions across modalities, fine-grained alignment, multi-step reasoning, and the ability to handle external knowledge. Information flow studies how multimodal content changes during a task through querying, translation, editing, and fusion. Use cases span domain-specific challenges introduced in real-world multimedia, affective computing, natural sciences, healthcare, and human-computer interaction applications. Through comprehensive experiments across the 30 tasks in HEMM, we (1) identify key dataset dimensions (e.g., basic skills, information flows, and use cases) that pose challenges to today\u2019s models, and (2) distill performance trends regarding how different modeling dimensions (e.g., scale, pre-training data, multimodal alignment, pre-training, and instruction tuning objectives) influence performance. Our conclusions regarding challenging multimodal interactions, use cases, and tasks requiring reasoning and external knowledge, the benefits of data and model scale, and the impacts of instruction-tuning yield actionable insights for future work in multimodal foundation models.", "keywords": "multimodal learning;multimodal foundation models;evaluation;multimodal interactions;multimodal applications", "primary_area": "", "supplementary_material": "", "author": "Paul Pu Liang;Akshay Goindani;Talha Chafekar;Leena Mathur;Haofei Yu;Russ Salakhutdinov;Louis-Philippe Morency", "authorids": "~Paul_Pu_Liang1;~Akshay_Goindani2;~Talha_Chafekar1;~Leena_Mathur1;~Haofei_Yu1;~Russ_Salakhutdinov1;~Louis-Philippe_Morency1", "gender": "M;M;M;;M;M;M", "homepage": "https://pliang279.github.io/;https://akshayg08.github.io/;;https://l-mathur.github.io;https://www.haofeiyu.me;https://www.cs.cmu.edu/~rsalakhu/;https://www.cs.cmu.edu/~morency/", "dblp": "207/9749;;;263/4173;156/1412;;31/739", "google_scholar": "https://scholar.google.com/citations?hl=en;lvTPGo0AAAAJ;kEEyynoAAAAJ;loh93ZkAAAAJ;EL-QbZ4AAAAJ;;https://scholar.google.com.tw/citations?user=APgaFK0AAAAJ", "orcid": ";;;;;;0000-0001-6376-7696", "linkedin": ";akshay-goindani/;talhachafekar/;leena-mathur/;%E6%98%8A%E9%A3%9E-%E4%BA%8E-a04247188/;;morency?challengeId=AQELGK_OvMa0vwAAAY72L-VV4X9hW8juuY80VHVeeSGHZ1PJHeeEa5LTFoeTmDGU0t1OL07MXJTYC9EAi6qgPDd2z9ztnbdFYA&submissionId=09a0ff34-04ac-c717-bef7-8c9c8811b463&challengeSource=AgFhxWkU3q7v4wAAAY72L-1xRE0eG-BnZUNE9e3eAG95pgOCZ9u1nxEg-1dK2Dw&challegeType=AgHMzV0lqKgEFwAAAY72L-11X6DHMd3V_A3Iur8XZeyYF2-oBzoufs8&memberId=AgH4yz7pZ_riCgAAAY72L-146jmR2pdr3dmhy2icxBtEQzQ&recognizeDevice=AgFDCNyrhKiFSAAAAY72L-16m7z2EH2t0ueWmMKjyk1_ZJAkfFVe", "or_profile": "~Paul_Pu_Liang1;~Akshay_Goindani2;~Talha_Chafekar1;~Leena_Mathur1;~Haofei_Yu1;~Russ_Salakhutdinov1;~Louis-Philippe_Morency1", "aff": "Massachusetts Institute of Technology;Carnegie Mellon University;InfAI;Carnegie Mellon University;Carnegie Mellon University;School of Computer Science, Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "mit.edu;cmu.edu;infai.org;cmu.edu;cmu.edu;cs.cmu.edu;cmu.edu", "position": "Assistant Professor;MS student;Intern;PhD student;MS student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nliang2024hemm,\ntitle={{HEMM}: Holistic Evaluation of Multimodal Foundation Models},\nauthor={Paul Pu Liang and Akshay Goindani and Talha Chafekar and Leena Mathur and Haofei Yu and Russ Salakhutdinov and Louis-Philippe Morency},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=9tVn4f8aJO}\n}", "github": "", "reviewers": "PdCU;Ddcs;VBn8;J6Fw", "pdf_size": 2026125, "rating": "6;6;7;7", "confidence": "4;4;4;4", "wc_summary_and_contributions": "45;53;188;41", "wc_strengths": "29;26;85;87", "wc_improvement": "66;100;101;54", "wc_limitations": "1;5;14;1", "wc_correctness": "1;3;11;6", "wc_clarity": "1;6;11;9", "wc_relation_to_prior_work": "1;1;21;1", "wc_documentation": "6;1;59;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "151;196;491;201", "wc_reply_reviewers": "139;0;0;17", "wc_reply_authors": "177;26;0;0", "reply_reviewers": "3;0;0;1", "reply_authors": "4;2;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.0, 0.0 ], "wc_summary_and_contributions_avg": [ 81.75, 61.495426659224016 ], "wc_strengths_avg": [ 56.75, 29.277764600460877 ], "wc_improvement_avg": [ 80.25, 20.69269194667528 ], "wc_limitations_avg": [ 5.25, 5.3091901453988255 ], "wc_correctness_avg": [ 5.25, 3.766629793329841 ], "wc_clarity_avg": [ 6.75, 3.766629793329841 ], "wc_relation_to_prior_work_avg": [ 6.0, 8.660254037844387 ], "wc_documentation_avg": [ 16.75, 24.47830672248389 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 259.75, 134.92474754469617 ], "wc_reply_reviewers_avg": [ 39.0, 58.15066637623339 ], "wc_reply_authors_avg": [ 50.75, 73.65926621953275 ], "reply_reviewers_avg": [ 1.0, 1.224744871391589 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13202244486463869716&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "mit.edu;cmu.edu;infai.org;cmu.edu;cmu.edu;cs.cmu.edu;cmu.edu", "author_num": 7, "aff_unique_index": "0;1;2;1;1;1;1", "aff_unique_norm": "Massachusetts Institute of Technology;Carnegie Mellon University;Institute of Computer Science, University of Innsbruck", "aff_unique_dep": ";;Institute of Computer Science", "aff_unique_url": "https://web.mit.edu;https://www.cmu.edu;https://www.uibk.ac.at/compsci/", "aff_unique_abbr": "MIT;CMU;InfAI", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pittsburgh", "aff_country_unique_index": "0;0;1;0;0;0;0", "aff_country_unique": "United States;Austria" }, { "title": "Online Learning with Sublinear Best-Action Queries", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96293", "id": "9uKeqtIoGZ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=9uKeqtIoGZ", "openreview": "https://openreview.net/forum?id=9uKeqtIoGZ", "poster": "/media/PosterPDFs/NeurIPS%202024/96293.png?t=1730738345.5309958", "project": "", "author_site": "Matteo Russo, Andrea Celli, Riccardo Colini Baldeschi, Federico Fusco, Daniel Haimovich, Dima Karamshuk, Stefano Leonardi, Niek Tax", "tldr": "", "abstract": "In online learning, a decision maker repeatedly selects one of a set of actions, with the goal of minimizing the overall loss incurred. Following the recent line of research on algorithms endowed with additional predictive features, we revisit this problem by allowing the decision maker to acquire additional information on the actions to be selected. In particular, we study the power of \\emph{best-action queries}, which reveal beforehand the identity of the best action at a given time step. In practice, predictive features may be expensive, so we allow the decision maker to issue at most $k$ such queries.\n\nWe establish tight bounds on the performance any algorithm can achieve when given access to $k$ best-action queries for different types of feedback models. In particular, we prove that in the full feedback model, $k$ queries are enough to achieve an optimal regret of $\\Theta(\\min\\{\\sqrt T, \\frac{T}{k}\\})$. This finding highlights the significant multiplicative advantage in the regret rate achievable with even a modest (sublinear) number $k \\in \\Omega(\\sqrt{T})$ of queries.\n \nAdditionally, we study the challenging setting in which the only available feedback is obtained during the time steps corresponding to the $k$ best-action queries. There, we provide a tight regret rate of $\\Theta(\\min\\{\\frac{T}{\\sqrt k},\\frac{T^2}{k^2}\\})$, which improves over the standard $\\Theta(\\frac{T}{\\sqrt k})$ regret rate for label efficient prediction for $k \\in \\Omega(T^{2/3})$.", "keywords": "Online learning;Learning Theory;Low-regret algorithms", "primary_area": "online_learning", "supplementary_material": "", "author": "Matteo Russo;Andrea Celli;Riccardo Colini Baldeschi;Federico Fusco;Daniel Haimovich;Dima Karamshuk;Stefano Leonardi;Niek Tax", "authorids": "~Matteo_Russo1;~Andrea_Celli1;~Riccardo_Colini_Baldeschi1;~Federico_Fusco1;~Daniel_Haimovich1;~Dima_Karamshuk1;~Stefano_Leonardi2;~Niek_Tax1", "gender": "M;M;M;M;;M;M;M", "homepage": "https://sites.google.com/diag.uniroma1.it/matteo-russo/home?authuser=0&pli=1;https://andcelli.github.io/;https://research.fb.com/people/colini-baldeschi-riccardo/;https://sites.google.com/uniroma1.it/federicofusco/home;;https://karamsh.uk/;https://sites.google.com/a/uniroma1.it/stefanoleonardi-eng/;", "dblp": "190/5146-2;190/7301.html;;243/5755;274/1714.html;;l/StefanoLeonardi;143/7361", "google_scholar": "https://scholar.google.com/citations?hl=en;9wQscqEAAAAJ;;https://scholar.google.co.il/citations?user=oaS8iAQAAAAJ;;;https://scholar.google.it/citations?user=p5LCHHEAAAAJ;XkRvCC4AAAAJ", "orcid": ";;;0000-0001-6250-945X;;;0000-0002-9809-7191;0000-0001-7239-5206", "linkedin": ";;;;daniel-haimovich-b9a49bb5/;;;niektax/", "or_profile": "~Matteo_Russo1;~Andrea_Celli1;~Riccardo_Colini_Baldeschi1;~Federico_Fusco1;~Daniel_Haimovich1;~Dima_Karamshuk1;~Stefano_Leonardi2;~Niek_Tax1", "aff": "University of Roma \"La Sapienza\";Bocconi University;Meta Facebook;University of Roma \"La Sapienza\";;Meta;Sapienza University of Rome;Meta Facebook", "aff_domain": "uniroma1.it;unibocconi.it;facebook.com;uniroma1.it;;meta.com;uniroma1.it;facebook.com", "position": "PhD student;Assistant Professor;Researcher;Lecturer;;Researcher;Full Professor;Researcher", "bibtex": "@inproceedings{\nrusso2024online,\ntitle={Online Learning with Sublinear Best-Action Queries},\nauthor={Matteo Russo and Andrea Celli and Riccardo Colini Baldeschi and Federico Fusco and Daniel Haimovich and Dima Karamshuk and Stefano Leonardi and Niek Tax},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=9uKeqtIoGZ}\n}", "github": "", "reviewers": "tNck;aUFF;VoCp;Sy4T", "pdf_size": 389529, "rating": "5;5;6;7", "confidence": "2;3;3;4", "soundness": "3;4;3;4", "novelty": "3;2;3;3", "presentation": "2;3;2;4", "wc_summary": "339;412;132;259", "wc_strengths": "92;80;119;79", "wc_weaknesses": "175;106;142;100", "wc_questions": "420;18;151;119", "wc_limitations": "22;60;100;3", "wc_review": "1048;676;644;560", "wc_reply_reviewers": "77;0;13;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 285.5, 103.8376136089423 ], "wc_strengths_avg": [ 92.5, 16.132265804901678 ], "wc_weaknesses_avg": [ 130.75, 30.177599308096063 ], "wc_questions_avg": [ 177.0, 148.63546010289738 ], "wc_limitations_avg": [ 46.25, 37.204670405743414 ], "wc_review_avg": [ 732.0, 187.29655629509048 ], "wc_reply_reviewers_avg": [ 22.5, 31.9100297712177 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.8528028654224417, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4535779593888005617&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 5, "email": "uniroma1.it;unibocconi.it;facebook.com;uniroma1.it;;meta.com;uniroma1.it;facebook.com", "author_num": 8, "aff_unique_index": "0;1;2;0;2;3;2", "aff_unique_norm": "University of Rome La Sapienza;Bocconi University;Meta;Sapienza University of Rome", "aff_unique_dep": ";;Meta Platforms, Inc.;", "aff_unique_url": "https://www.uniroma1.it;https://www.bocconi.edu;https://meta.com;https://www.uniroma1.it", "aff_unique_abbr": "La Sapienza;Bocconi;Meta;Sapienza", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Rome;", "aff_country_unique_index": "0;0;1;0;1;0;1", "aff_country_unique": "Italy;United States" }, { "title": "Ask, Attend, Attack: An Effective Decision-Based Black-Box Targeted Attack for Image-to-Text Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96292", "id": "9uMJeCUeKk", "proceeding": "", "pdf": "https://openreview.net/pdf?id=9uMJeCUeKk", "openreview": "https://openreview.net/forum?id=9uMJeCUeKk", "poster": "/media/PosterPDFs/NeurIPS%202024/96292.png?t=1731678584.3514993", "project": "", "author_site": "Qingyuan Zeng, Zhenzhong Wang, Yiu-ming Cheung, Min", "tldr": "", "abstract": "While image-to-text models have demonstrated significant advancements in various vision-language tasks, they remain susceptible to adversarial attacks. Existing white-box attacks on image-to-text models require access to the architecture, gradients, and parameters of the target model, resulting in low practicality. Although the recently proposed gray-box attacks have improved practicality, they suffer from semantic loss during the training process, which limits their targeted attack performance. To advance adversarial attacks of image-to-text models, this paper focuses on a challenging scenario: decision-based black-box targeted attacks where the attackers only have access to the final output text and aim to perform targeted attacks. Specifically, we formulate the decision-based black-box targeted attack as a large-scale optimization problem. To efficiently solve the optimization problem, a three-stage process \\textit{Ask, Attend, Attack}, called \\textit{AAA}, is proposed to coordinate with the solver. \\textit{Ask} guides attackers to create target texts that satisfy the specific semantics. \\textit{Attend} identifies the crucial regions of the image for attacking, thus reducing the search space for the subsequent \\textit{Attack}. \\textit{Attack} uses an evolutionary algorithm to attack the crucial regions, where the attacks are semantically related to the target texts of \\textit{Ask}, thus achieving targeted attacks without semantic loss. Experimental results on transformer-based and CNN+RNN-based image-to-text models confirmed the effectiveness of our proposed \\textit{AAA}.", "keywords": "Black-box adversarial attack;Image-to-text model;Attention;Differential evolution", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/cd950e7ff810e0797fd72f4f750e8b8b11e06d02.zip", "author": "Qingyuan Zeng;Zhenzhong Wang;Yiu-ming Cheung;Min Jiang", "authorids": "~Qingyuan_Zeng2;~Zhenzhong_Wang2;~Yiu-ming_Cheung1;~Min_Jiang1", "gender": "M;;;M", "homepage": "https://github.com/1057939502;;;https://minjiang.xmu.edu.cn/", "dblp": "311/0118;;;35/994-5.html", "google_scholar": "https://scholar.google.com.hk/citations?user=kf0TSbAAAAAJ;;;22w5ZwcAAAAJ", "orcid": "0009-0002-2805-0803;;;0000-0003-2946-6974", "linkedin": "qingyuan-zeng-3239b3275/;;;", "or_profile": "~Qingyuan_Zeng2;~Zhenzhong_Wang2;~Yiu-ming_Cheung1;~Min_Jiang1", "aff": "Xiamen University;;;Xiamen University", "aff_domain": "xiamen.edu;;;xmu.edu.cn", "position": "MS student;;;Full Professor", "bibtex": "@inproceedings{\nzeng2024ask,\ntitle={Ask, Attend, Attack: An Effective Decision-Based Black-Box Targeted Attack for Image-to-Text Models},\nauthor={Qingyuan Zeng and Zhenzhong Wang and Yiu-ming Cheung and Min Jiang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=9uMJeCUeKk}\n}", "github": "", "reviewers": "xQiz;83Kr;8Kf2", "pdf_size": 15026435, "rating": "5;6;6", "confidence": "3;3;4", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "2;3;2", "wc_summary": "85;99;146", "wc_strengths": "40;20;126", "wc_weaknesses": "94;91;163", "wc_questions": "54;118;63", "wc_limitations": "4;15;1", "wc_review": "277;343;499", "wc_reply_reviewers": "0;39;39", "wc_reply_authors": "106;702;106", "reply_reviewers": "0;1;1", "reply_authors": "2;2;2", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 110.0, 26.08958923913266 ], "wc_strengths_avg": [ 62.0, 45.985504962614755 ], "wc_weaknesses_avg": [ 116.0, 33.25657829663178 ], "wc_questions_avg": [ 78.33333333333333, 28.288199345702832 ], "wc_limitations_avg": [ 6.666666666666667, 6.018490028422596 ], "wc_review_avg": [ 373.0, 93.08061022576076 ], "wc_reply_reviewers_avg": [ 26.0, 18.384776310850235 ], "wc_reply_authors_avg": [ 304.6666666666667, 280.95709439145486 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1650683084150465446&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "xiamen.edu;;;xmu.edu.cn", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Xiamen University", "aff_unique_dep": "", "aff_unique_url": "https://www.xmu.edu.cn", "aff_unique_abbr": "XMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Model Reconstruction Using Counterfactual Explanations: A Perspective From Polytope Theory", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96291", "id": "9uolDxbYLm", "proceeding": "", "pdf": "https://openreview.net/pdf?id=9uolDxbYLm", "openreview": "https://openreview.net/forum?id=9uolDxbYLm", "poster": "/media/PosterPDFs/NeurIPS%202024/96291.png?t=1731480092.4257677", "project": "", "author_site": "Pasan Dissanayake, Sanghamitra Dutta", "tldr": "", "abstract": "Counterfactual explanations provide ways of achieving a favorable model outcome with minimum input perturbation. However, counterfactual explanations can also be leveraged to reconstruct the model by strategically training a surrogate model to give similar predictions as the original (target) model. In this work, we analyze how model reconstruction using counterfactuals can be improved by\nfurther leveraging the fact that the counterfactuals also lie quite close to the decision boundary. Our main contribution is to derive novel theoretical relationships between the error in model reconstruction and the number of counterfactual queries required using polytope theory. Our theoretical analysis leads us to propose a strategy for model reconstruction that we call Counterfactual Clamping Attack (CCA) which trains a surrogate model using a unique loss function that treats counterfactuals differently than ordinary instances. Our approach also alleviates the related problem of decision boundary shift that arises in existing model reconstruction approaches when counterfactuals are treated as ordinary instances. Experimental results demonstrate that our strategy improves fidelity between the target and surrogate model predictions on several datasets.", "keywords": "model extraction;counterfactual explanations;decision boundary shift;query complexity", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Pasan Dissanayake;Sanghamitra Dutta", "authorids": "~Pasan_Dissanayake1;~Sanghamitra_Dutta2", "gender": "M;F", "homepage": ";https://sites.google.com/site/sanghamitraweb/", "dblp": "292/8397;154/6653", "google_scholar": "isO0bMwAAAAJ;BgaqaXwAAAAJ", "orcid": "0000-0003-0997-332X;0000-0002-6500-2627", "linkedin": "pasandissanayake;", "or_profile": "~Pasan_Dissanayake1;~Sanghamitra_Dutta2", "aff": "University of Maryland, College Park;University of Maryland, College Park", "aff_domain": "umd.edu;umd.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\ndissanayake2024model,\ntitle={Model Reconstruction Using Counterfactual Explanations: A Perspective From Polytope Theory},\nauthor={Pasan Dissanayake and Sanghamitra Dutta},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=9uolDxbYLm}\n}", "github": "", "reviewers": "z1A7;J5Uq;Bnbn;VLao", "pdf_size": 2602585, "rating": "5;6;6;6", "confidence": "4;3;3;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "164;131;99;181", "wc_strengths": "28;53;159;61", "wc_weaknesses": "190;105;176;185", "wc_questions": "129;84;6;54", "wc_limitations": "13;104;9;10", "wc_review": "524;477;449;491", "wc_reply_reviewers": "355;6;29;42", "wc_reply_authors": "648;25;22;31", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 143.75, 31.47518864121389 ], "wc_strengths_avg": [ 75.25, 49.861683685972736 ], "wc_weaknesses_avg": [ 164.0, 34.43109060137364 ], "wc_questions_avg": [ 68.25, 44.76815274277017 ], "wc_limitations_avg": [ 34.0, 40.441315507782384 ], "wc_review_avg": [ 485.25, 27.003471998985614 ], "wc_reply_reviewers_avg": [ 108.0, 143.1869407453068 ], "wc_reply_authors_avg": [ 181.5, 269.35339240484797 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2860519850375725732&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "umd.edu;umd.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Maryland", "aff_unique_dep": "", "aff_unique_url": "https://www/umd.edu", "aff_unique_abbr": "UMD", "aff_campus_unique_index": "0;0", "aff_campus_unique": "College Park", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "UDPM: Upsampling Diffusion Probabilistic Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96290", "id": "9utMGIbHBt", "proceeding": "", "pdf": "https://openreview.net/pdf?id=9utMGIbHBt", "openreview": "https://openreview.net/forum?id=9utMGIbHBt", "poster": "/media/PosterPDFs/NeurIPS%202024/96290.png?t=1729576027.590181", "project": "", "author_site": "Shady Abu-Hussein, Raja Giryes", "tldr": "", "abstract": "Denoising Diffusion Probabilistic Models (DDPM) have recently gained significant attention. DDPMs compose a Markovian process that begins in the data domain and gradually adds noise until reaching pure white noise. DDPMs generate high-quality samples from complex data distributions by defining an inverse process and training a deep neural network to learn this mapping. However, these models are inefficient because they require many diffusion steps to produce aesthetically pleasing samples. Additionally, unlike generative adversarial networks (GANs), the latent space of diffusion models is less interpretable. In this work, we propose to generalize the denoising diffusion process into an Upsampling Diffusion Probabilistic Model (UDPM). In the forward process, we reduce the latent variable dimension through downsampling, followed by the traditional noise perturbation. As a result, the reverse process gradually denoises and upsamples the latent variable to produce a sample from the data distribution. We formalize the Markovian diffusion processes of UDPM and demonstrate its generation capabilities on the popular FFHQ, AFHQv2, and CIFAR10 datasets. UDPM generates images with as few as three network evaluations, whose overall computational cost is less than a single DDPM or EDM step while achieving an FID score of 6.86. This surpasses current state-of-the-art efficient diffusion models that use a single denoising step for sampling. Additionally, UDPM offers an interpretable and interpolable latent space, which gives it an advantage over traditional DDPMs. Our code is available online: \\url{https://github.com/shadyabh/UDPM/}", "keywords": "diffusion models;generative models", "primary_area": "generative_models", "supplementary_material": "", "author": "Shady Abu-Hussein;Raja Giryes", "authorids": "~Shady_Abu-Hussein1;~Raja_Giryes1", "gender": "M;M", "homepage": "https://www.giryes.sites.tau.ac.il/;https://shadyabh.github.io/", "dblp": "50/7998;243/2778", "google_scholar": "https://scholar.google.co.il/citations?user=9aQUYVQAAAAJ;FZYAWe4AAAAJ", "orcid": "0000-0002-2830-0297;", "linkedin": "raja-giryes-0818935/;shady-abu-hussein-919a69104/", "or_profile": "~Raja_Giryes1;~Shady_Abu_Hussein1", "aff": "Tel Aviv University;Tel Aviv University", "aff_domain": "tauex.tau.ac.il;tau.ac.il", "position": "Associate Professor;PhD student", "bibtex": "@inproceedings{\nabu-hussein2024udpm,\ntitle={{UDPM}: Upsampling Diffusion Probabilistic Models},\nauthor={Shady Abu-Hussein and Raja Giryes},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=9utMGIbHBt}\n}", "github": "", "reviewers": "9qFt;ZKuR;AGqF", "pdf_size": 4291799, "rating": "4;5;6", "confidence": "3;4;4", "soundness": "2;3;3", "novelty": "2;3;3", "presentation": "2;2;2", "wc_summary": "75;38;93", "wc_strengths": "65;16;63", "wc_weaknesses": "65;53;262", "wc_questions": "78;106;4", "wc_limitations": "15;14;14", "wc_review": "298;227;436", "wc_reply_reviewers": "0;25;39", "wc_reply_authors": "80;102;102", "reply_reviewers": "0;1;1", "reply_authors": "2;2;2", "rating_avg": [ 5.0, 0.816496580927726 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 68.66666666666667, 22.895899681432528 ], "wc_strengths_avg": [ 48.0, 22.642143596988927 ], "wc_weaknesses_avg": [ 126.66666666666667, 95.82043391446082 ], "wc_questions_avg": [ 62.666666666666664, 43.02970550161313 ], "wc_limitations_avg": [ 14.333333333333334, 0.4714045207910317 ], "wc_review_avg": [ 320.3333333333333, 86.77301167733869 ], "wc_reply_reviewers_avg": [ 21.333333333333332, 16.131404843417148 ], "wc_reply_authors_avg": [ 94.66666666666667, 10.370899457402697 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1566884974123011228&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "tauex.tau.ac.il;tau.ac.il", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Tel Aviv University", "aff_unique_dep": "", "aff_unique_url": "https://www.tau.ac.il", "aff_unique_abbr": "TAU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Israel" }, { "title": "FAST: A Dual-tier Few-Shot Learning Paradigm for Whole Slide Image Classification", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96289", "id": "9vcqleAHPl", "proceeding": "", "pdf": "https://openreview.net/pdf?id=9vcqleAHPl", "openreview": "https://openreview.net/forum?id=9vcqleAHPl", "poster": "/media/PosterPDFs/NeurIPS%202024/96289.png?t=1730101177.613325", "project": "", "author_site": "Kexue Fu, xiaoyuan luo, Linhao Qu, Shuo Wang, Ying Xiong, Ilias Maglogiannis, Longxiang Gao, Manning Wang", "tldr": "", "abstract": "The expensive fine-grained annotation and data scarcity \n have become the primary obstacles for the widespread adoption of deep learning-based Whole Slide Images (WSI) classification algorithms in clinical practice. Unlike few-shot learning methods in natural images that can leverage the labels of each image, existing few-shot WSI classification methods only utilize a small number of fine-grained labels or weakly supervised slide labels for training in order to avoid expensive fine-grained annotation. They lack sufficient mining of available WSIs, severely limiting WSI classification performance. To address the above issues, we propose a novel and efficient dual-tier few-shot learning paradigm for WSI classification, named FAST. FAST consists of a dual-level annotation strategy and a dual-branch classification framework. Firstly, to avoid expensive fine-grained annotation, we collect a very small number of WSIs at the slide level, and annotate an extremely small number of patches. Then, to fully mining the available WSIs, we use all the patches and available patch labels to build a cache branch, which utilizes the labeled patches to learn the labels of unlabeled patches and through knowledge retrieval for patch classification. In addition to the cache branch, we also construct a prior branch that includes learnable prompt vectors, using the text encoder of visual-language models for patch classification. Finally, we integrate the results from both branches to achieve WSI classification. Extensive experiments on binary and multi-class datasets demonstrate that our proposed method significantly surpasses existing few-shot classification methods and approaches the accuracy of fully supervised methods with only 0.22% annotation costs. All codes and models will be publicly available on https://github.com/fukexue/FAST.", "keywords": "Whole Slide Image Classification;Few-shot Learning;Vision-Language Model Adaption;Multimodal Large Model", "primary_area": "machine_vision", "supplementary_material": "", "author": "Kexue Fu;xiaoyuan Luo;Linhao Qu;Shuo Wang;Ying Xiong;Ilias Maglogiannis;Longxiang Gao;Manning Wang", "authorids": "~Kexue_Fu1;~xiaoyuan_Luo1;~Linhao_Qu1;~Shuo_Wang8;~Ying_Xiong3;~Ilias_Maglogiannis1;~Longxiang_Gao1;~Manning_Wang1", "gender": "M;M;M;;M;M;M;M", "homepage": "https://kexuefu.me/;;https://linhao-qu.com/;;https://www.zs-hospital.sh.cn/;https://www.ds.unipi.gr/en/faculty/imaglo-en/;https://www.deakin.edu.au/about-deakin/people/longxiang-gao;http://www.fudanmiccai.org/nd.jsp?id=58#_np=117_394", "dblp": ";;308/1001.html;;;;44/7500;23/5931", "google_scholar": "wRs-_DwAAAAJ;;C8gTFhUAAAAJ;;;;https://scholar.google.com.au/citations?user=dYG_FfMAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": "0000-0003-1204-0942;0000-0002-8456-5847;0000-0001-8815-7050;;;0000-0003-2860-399X;0000-0002-3026-7537;0000-0002-9255-3897", "linkedin": ";;;;;;;", "or_profile": "~Kexue_Fu1;~xiaoyuan_Luo1;~Linhao_Qu1;~Shuo_Wang8;~Ying_Xiong3;~Ilias_Maglogiannis1;~Longxiang_Gao1;~Manning_Wang1", "aff": "Qilu University of Technology (Shandong Academy of Sciences);Fudan University;Fudan University;;Fudan University;;Qilu University of Technology;Fudan University", "aff_domain": "sdas.org;fudan.edu.cn;fudan.edu.cn;;fudan.edu.cn;;qlu.edu.au;fudan.edu.cn", "position": "Associate Professor;PhD student;PhD student;;Researcher;;Full Professor;Full Professor", "bibtex": "@inproceedings{\nfu2024fast,\ntitle={{FAST}: A Dual-tier Few-Shot Learning Paradigm for Whole Slide Image Classification},\nauthor={Kexue Fu and xiaoyuan Luo and Linhao Qu and Shuo Wang and Ying Xiong and Ilias Maglogiannis and Longxiang Gao and Manning Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=9vcqleAHPl}\n}", "github": "", "reviewers": "x5Ho;HxPT;Bx9c;hypM", "pdf_size": 3449971, "rating": "5;6;7;8", "confidence": "5;3;5;5", "soundness": "2;3;4;3", "novelty": "2;3;3;4", "presentation": "1;3;4;3", "wc_summary": "76;75;42;144", "wc_strengths": "96;50;24;197", "wc_weaknesses": "415;176;196;63", "wc_questions": "174;35;2;23", "wc_limitations": "12;38;5;25", "wc_review": "773;374;269;452", "wc_reply_reviewers": "534;18;18;83", "wc_reply_authors": "1362;348;45;45", "reply_reviewers": "2;1;1;1", "reply_authors": "4;3;2;2", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 4.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 84.25, 37.11047695732298 ], "wc_strengths_avg": [ 91.75, 66.00899559908483 ], "wc_weaknesses_avg": [ 212.5, 127.43723945534916 ], "wc_questions_avg": [ 58.5, 67.72185762366534 ], "wc_limitations_avg": [ 20.0, 12.62933094031509 ], "wc_review_avg": [ 467.0, 188.22459988003695 ], "wc_reply_reviewers_avg": [ 163.25, 215.69118549444713 ], "wc_reply_authors_avg": [ 450.0, 540.8784521498337 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.2581988897471611, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10532262723350426910&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 4, "email": "sdas.org;fudan.edu.cn;fudan.edu.cn;;fudan.edu.cn;;qlu.edu.au;fudan.edu.cn", "author_num": 8, "aff_unique_index": "0;1;1;1;0;1", "aff_unique_norm": "Qilu University of Technology;Fudan University", "aff_unique_dep": ";", "aff_unique_url": "http://www.qilu.edu.cn/;https://www.fudan.edu.cn", "aff_unique_abbr": "QUT;Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Global Convergence in Training Large-Scale Transformers", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96288", "id": "9wtlfRKwZS", "proceeding": "", "pdf": "https://openreview.net/pdf?id=9wtlfRKwZS", "openreview": "https://openreview.net/forum?id=9wtlfRKwZS", "poster": "", "project": "", "author_site": "Cheng Gao, Yuan Cao, Zihao Li, Yihan He, Mengdi Wang, Han Liu, Jason Klusowski, Jianqing Fan", "tldr": "", "abstract": "Despite the widespread success of Transformers across various domains, their optimization guarantees in large-scale model settings are not well-understood. This paper rigorously analyzes the convergence properties of gradient flow in training Transformers with weight decay regularization. First, we construct the mean-field limit of large-scale Transformers, showing that as the model width and depth go to infinity, gradient flow converges to the Wasserstein gradient flow, which is represented by a partial differential equation. Then, we demonstrate that the gradient flow reaches a global minimum consistent with the PDE solution when the weight decay regularization parameter is sufficiently small. Our analysis is based on a series of novel mean-field techniques that adapt to Transformers. Compared with existing tools for deep networks (Lu et al., 2020) that demand homogeneity and global Lipschitz smoothness, we utilize a refined analysis assuming only $\\textit{partial homogeneity}$ and $\\textit{local Lipschitz smoothness}$. These new techniques may be of independent interest.", "keywords": "Transformer;gradient flow;mean-field analysis;global convergence", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Cheng Gao;Yuan Cao;Zihao Li;Yihan He;Mengdi Wang;Han Liu;Jason Matthew Klusowski;Jianqing Fan", "authorids": "~Cheng_Gao3;~Yuan_Cao1;~Zihao_Li3;~Yihan_He1;~Mengdi_Wang1;~Han_Liu4;~Jason_Matthew_Klusowski1;~Jianqing_Fan1", "gender": "M;M;M;M;F;;M;M", "homepage": ";https://yuancaohku.github.io/;;;http://mwang.princeton.edu;;https://klusowski.princeton.edu/;https://fan.princeton.edu", "dblp": ";;;;;;;33/2768", "google_scholar": ";-VGnHI4AAAAJ;;;;;4HkhCjsAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;;;;0000-0001-6484-8682;0000-0003-3250-7677", "linkedin": "chenggao1999;;zihao-li-48b313235/;\u9038\u6db5-\u8d3a-187106180/;;;jklusowski/;", "or_profile": "~Cheng_Gao3;~Yuan_Cao1;~Zihao_Li3;~Yihan_He1;~Mengdi_Wang1;~Han_Liu4;~Jason_Matthew_Klusowski1;~Jianqing_Fan1", "aff": "Princeton University;University of Hong Kong;Princeton University;Princeton University;Princeton University;Northwestern University;Princeton University;Princeton University", "aff_domain": "princeton.edu;hku.hk;princeton.edu;princeton.edu;princeton.edu;u.northwestern.edu;princeton.edu;princeton.edu", "position": "PhD student;Assistant Professor;PhD student;PhD student;Full Professor;Associate Professor;Assistant Professor;Professor", "bibtex": "@inproceedings{\ngao2024global,\ntitle={Global Convergence in Training Large-Scale Transformers},\nauthor={Cheng Gao and Yuan Cao and Zihao Li and Yihan He and Mengdi Wang and Han Liu and Jason Matthew Klusowski and Jianqing Fan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=9wtlfRKwZS}\n}", "github": "", "reviewers": "mtEE;oct4;iBME;eZmg;accV", "pdf_size": 889613, "rating": "5;5;5;6;7", "confidence": "3;3;4;1;4", "soundness": "4;2;2;3;4", "novelty": "2;2;3;3;4", "presentation": "2;3;3;3;2", "wc_summary": "118;105;104;30;81", "wc_strengths": "96;74;208;46;29", "wc_weaknesses": "310;205;848;1;150", "wc_questions": "108;97;206;1;314", "wc_limitations": "2;2;10;1;14", "wc_review": "634;483;1376;79;588", "wc_reply_reviewers": "40;87;1534;0;38", "wc_reply_authors": "0;32;1958;0;0", "reply_reviewers": "1;1;4;0;1", "reply_authors": "1;2;6;1;1", "rating_avg": [ 5.6, 0.8 ], "confidence_avg": [ 3.0, 1.0954451150103321 ], "soundness_avg": [ 3.0, 0.8944271909999159 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 87.6, 31.167932238119356 ], "wc_strengths_avg": [ 90.6, 63.04157358442126 ], "wc_weaknesses_avg": [ 302.8, 290.27256156929474 ], "wc_questions_avg": [ 145.2, 106.48079639071075 ], "wc_limitations_avg": [ 5.8, 5.230678732248808 ], "wc_review_avg": [ 632.0, 420.3536606240036 ], "wc_reply_reviewers_avg": [ 339.8, 597.737199779301 ], "wc_reply_authors_avg": [ 398.0, 780.0984553247109 ], "reply_reviewers_avg": [ 1.4, 1.3564659966250536 ], "reply_authors_avg": [ 2.2, 1.9390719429665317 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1678040105848445743&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "princeton.edu;hku.hk;princeton.edu;princeton.edu;princeton.edu;u.northwestern.edu;princeton.edu;princeton.edu", "author_num": 8, "aff_unique_index": "0;1;0;0;0;2;0;0", "aff_unique_norm": "Princeton University;University of Hong Kong;Northwestern University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.princeton.edu;https://www.hku.hk;https://www.northwestern.edu", "aff_unique_abbr": "Princeton;HKU;NU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;0;0;0;0;0;0", "aff_country_unique": "United States;China" }, { "title": "Attention boosted Individualized Regression", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96287", "id": "9xoFciqYIU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=9xoFciqYIU", "openreview": "https://openreview.net/forum?id=9xoFciqYIU", "poster": "/media/PosterPDFs/NeurIPS%202024/96287.png?t=1731565240.9045794", "project": "", "author_site": "Guang Yang, Yuan Cao, Long Feng", "tldr": "", "abstract": "Different from classical one-model-fits-all strategy, individualized models allow parameters to vary across samples and are gaining popularity in various fields, particularly in personalized medicine. Motivated by medical imaging analysis, this paper introduces a novel individualized modeling framework for matrix-valued data that does not require additional information on sample similarity for the individualized coefficients. Under our framework, the model individualization stems from an optimal internal relation map within the samples themselves. We refer to the proposed method as Attention boosted Individualized Regression, due to its close connections with the self-attention mechanism. Therefore, our approach provides a new interpretation for attention from the perspective of individualized modeling. Comprehensive numerical experiments and real brain MRI analysis using an ADNI dataset demonstrated the superior performance of our model.", "keywords": "Individualized regression;Vector correlation;Brain imaging data;Self-attention mechanism", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "/attachment/312c12886ccaa50c06cd4343dc0b94f21429d2d0.zip", "author": "Guang Yang;Yuan Cao;Long Feng", "authorids": "~Guang_Yang12;~Yuan_Cao1;~Long_Feng2", "gender": "M;M;M", "homepage": "https://ylknight.github.io/;https://yuancaohku.github.io/;https://lfengstat.github.io/", "dblp": ";;", "google_scholar": "qLFk50MAAAAJ;-VGnHI4AAAAJ;", "orcid": "0000-0003-1566-3758;;", "linkedin": ";;", "or_profile": "~Guang_Yang12;~Yuan_Cao1;~Long_Feng2", "aff": "City University of Hong Kong;University of Hong Kong;University of Hong Kong", "aff_domain": "cityu.edu.hk;hku.hk;hku.hk", "position": "PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nyang2024attention,\ntitle={Attention boosted Individualized Regression},\nauthor={Guang Yang and Yuan Cao and Long Feng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=9xoFciqYIU}\n}", "github": "", "reviewers": "2raH;hyyZ;EsRE;3WVs;gg3f", "pdf_size": 615377, "rating": "5;6;6;6;8", "confidence": "4;2;4;2;4", "soundness": "3;3;3;2;3", "novelty": "3;3;3;3;4", "presentation": "3;3;3;3;4", "wc_summary": "89;30;55;117;36", "wc_strengths": "57;15;20;58;35", "wc_weaknesses": "65;16;44;137;213", "wc_questions": "29;26;66;49;213", "wc_limitations": "11;2;16;24;95", "wc_review": "251;89;201;385;592", "wc_reply_reviewers": "11;5;36;98;18", "wc_reply_authors": "0;0;75;50;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;2;2;1", "rating_avg": [ 6.2, 0.9797958971132712 ], "confidence_avg": [ 3.2, 0.9797958971132712 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 65.4, 33.00060605504087 ], "wc_strengths_avg": [ 37.0, 17.988885457415087 ], "wc_weaknesses_avg": [ 95.0, 71.31619731870173 ], "wc_questions_avg": [ 76.6, 69.72115891176796 ], "wc_limitations_avg": [ 29.6, 33.469986555121295 ], "wc_review_avg": [ 303.6, 172.7120146370831 ], "wc_reply_reviewers_avg": [ 33.6, 33.838439680339874 ], "wc_reply_authors_avg": [ 25.0, 31.622776601683793 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.16666666666666663, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:g3BRuiZBiegJ:scholar.google.com/&scioq=Attention+boosted+Individualized+Regression&hl=en&as_sdt=0,44", "gs_version_total": 0, "email": "cityu.edu.hk;hku.hk;hku.hk", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "City University of Hong Kong;University of Hong Kong", "aff_unique_dep": ";", "aff_unique_url": "https://www.cityu.edu.hk;https://www.hku.hk", "aff_unique_abbr": "CityU;HKU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Mixed Dynamics In Linear Networks: Unifying the Lazy and Active Regimes", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96286", "id": "9zQl27mqWE", "proceeding": "", "pdf": "https://openreview.net/pdf?id=9zQl27mqWE", "openreview": "https://openreview.net/forum?id=9zQl27mqWE", "poster": "", "project": "", "author_site": "Zhenfeng Tu, Santiago Tomas Aranguri Diaz, Arthur Jacot", "tldr": "", "abstract": "The training dynamics of linear networks are well studied in two distinct\nsetups: the lazy regime and balanced/active regime, depending on the\ninitialization and width of the network. We provide a surprisingly\nsimple unifying formula for the evolution of the learned matrix that\ncontains as special cases both lazy and balanced regimes but also\na mixed regime in between the two. In the mixed regime, a part of\nthe network is lazy while the other is balanced. More precisely the\nnetwork is lazy along singular values that are below a certain threshold\nand balanced along those that are above the same threshold. At initialization,\nall singular values are lazy, allowing for the network to align itself\nwith the task, so that later in time, when some of the singular value\ncross the threshold and become active they will converge rapidly (convergence\nin the balanced regime is notoriously difficult in the absence of\nalignment). The mixed regime is the `best of both worlds': it converges\nfrom any random initialization (in contrast to balanced dynamics which\nrequire special initialization), and has a low rank bias (absent in\nthe lazy dynamics). This allows us to prove an almost complete phase\ndiagram of training behavior as a function of the variance at initialization\nand the width, for a MSE training task.", "keywords": "Linear Networks;Lazy Regime;Active Regime;Training Dynamics;Phase Diagram", "primary_area": "learning_theory", "supplementary_material": "", "author": "Zhenfeng Tu;Santiago Aranguri;Arthur Jacot", "authorids": "~Zhenfeng_Tu1;~Santiago_Aranguri1;~Arthur_Jacot1", "gender": "M;M;M", "homepage": ";https://cims.nyu.edu/~sa7270/;", "dblp": ";;222/2747", "google_scholar": ";;https://scholar.google.ch/citations?user=G6OhFawAAAAJ", "orcid": ";;", "linkedin": "zhenfeng-tu-b50495153/;;", "or_profile": "~Zhenfeng_Tu1;~Santiago_Aranguri1;~Arthur_Jacot1", "aff": "New York University;New York University;NYU, New York University", "aff_domain": "nyu.edu;nyu.edu;cims.nyu.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\ntu2024mixed,\ntitle={Mixed Dynamics In Linear Networks: Unifying the Lazy and Active Regimes},\nauthor={Zhenfeng Tu and Santiago Aranguri and Arthur Jacot},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=9zQl27mqWE}\n}", "github": "", "reviewers": "mU2H;oPFX;r8Jt;pxfL", "pdf_size": 818045, "rating": "5;6;6;7", "confidence": "4;3;3;3", "soundness": "2;2;3;4", "novelty": "2;3;3;3", "presentation": "3;2;3;3", "wc_summary": "24;43;75;164", "wc_strengths": "66;30;74;157", "wc_weaknesses": "34;261;92;126", "wc_questions": "98;48;312;33", "wc_limitations": "13;5;37;2", "wc_review": "235;387;590;482", "wc_reply_reviewers": "21;34;0;10", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 76.5, 53.70521389958334 ], "wc_strengths_avg": [ 81.75, 46.499327952132816 ], "wc_weaknesses_avg": [ 128.25, 83.40376190556395 ], "wc_questions_avg": [ 122.75, 111.88247181752824 ], "wc_limitations_avg": [ 14.25, 13.736356867816154 ], "wc_review_avg": [ 423.5, 130.39267617469932 ], "wc_reply_reviewers_avg": [ 16.25, 12.65652005884714 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13504244336422128878&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "nyu.edu;nyu.edu;cims.nyu.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "New York University", "aff_unique_dep": "", "aff_unique_url": "https://www.nyu.edu", "aff_unique_abbr": "NYU", "aff_campus_unique_index": "1", "aff_campus_unique": ";New York", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Testing Semantic Importance via Betting", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96285", "id": "A0HSmrwtLH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=A0HSmrwtLH", "openreview": "https://openreview.net/forum?id=A0HSmrwtLH", "poster": "/media/PosterPDFs/NeurIPS%202024/96285.png?t=1730306971.4578974", "project": "", "author_site": "Jacopo Teneggi, Jeremias Sulam", "tldr": "", "abstract": "Recent works have extended notions of feature importance to semantic concepts that are inherently interpretable to the users interacting with a black-box predictive model. Yet, precise statistical guarantees such as false positive rate and false discovery rate control are needed to communicate findings transparently, and to avoid unintended consequences in real-world scenarios. In this paper, we formalize the global (i.e., over a population) and local (i.e., for a sample) statistical importance of semantic concepts for the predictions of opaque models by means of conditional independence, which allows for rigorous testing. We use recent ideas of sequential kernelized independence testing to induce a rank of importance across concepts, and we showcase the effectiveness and flexibility of our framework on synthetic datasets as well as on image classification using several vision-language models.", "keywords": "Explainability;Semantic Concepts;Sequential Testing;Conditional Independence Testing", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Jacopo Teneggi;Jeremias Sulam", "authorids": "~Jacopo_Teneggi1;~Jeremias_Sulam1", "gender": "M;M", "homepage": "https://github.com/JacopoTeneggi;", "dblp": "289/7481;156/3028", "google_scholar": "4qzIl0EAAAAJ;1awx1aIAAAAJ", "orcid": "0000-0003-2189-5192;", "linkedin": "jacopo-teneggi/;", "or_profile": "~Jacopo_Teneggi1;~Jeremias_Sulam1", "aff": "Johns Hopkins University;Johns Hopkins University", "aff_domain": "johnshopkins.edu;jhu.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nteneggi2024testing,\ntitle={Testing Semantic Importance via Betting},\nauthor={Jacopo Teneggi and Jeremias Sulam},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=A0HSmrwtLH}\n}", "github": "", "reviewers": "FuBo;nqVJ;mYwv;go3R", "pdf_size": 12550726, "rating": "6;6;6;6", "confidence": "3;3;2;3", "soundness": "2;3;3;3", "novelty": "4;3;3;3", "presentation": "3;3;3;3", "wc_summary": "97;91;117;102", "wc_strengths": "31;128;74;30", "wc_weaknesses": "104;174;65;12", "wc_questions": "16;3;73;46", "wc_limitations": "1;23;25;27", "wc_review": "249;419;354;217", "wc_reply_reviewers": "18;14;41;0", "wc_reply_authors": "30;38;35;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 101.75, 9.627434756984853 ], "wc_strengths_avg": [ 65.75, 40.08974307725107 ], "wc_weaknesses_avg": [ 88.75, 59.06511237608881 ], "wc_questions_avg": [ 34.5, 27.15234796477093 ], "wc_limitations_avg": [ 19.0, 10.488088481701515 ], "wc_review_avg": [ 309.75, 80.91160299981703 ], "wc_reply_reviewers_avg": [ 18.25, 14.737282653189496 ], "wc_reply_authors_avg": [ 25.75, 15.138939857202683 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:cMFZu7PwNnIJ:scholar.google.com/&scioq=Testing+Semantic+Importance+via+Betting&hl=en&as_sdt=0,4", "gs_version_total": 2, "email": "johnshopkins.edu;jhu.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Johns Hopkins University", "aff_unique_dep": "", "aff_unique_url": "https://www.jhu.edu", "aff_unique_abbr": "JHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Learning to Embed Distributions via Maximum Kernel Entropy", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96284", "id": "A0cok1GK9c", "proceeding": "", "pdf": "https://openreview.net/pdf?id=A0cok1GK9c", "openreview": "https://openreview.net/forum?id=A0cok1GK9c", "poster": "", "project": "", "author_site": "Oleksii Kachaiev, Stefano Recanatesi", "tldr": "", "abstract": "Empirical data can often be considered as samples from a set of probability distributions. Kernel methods have emerged as a natural approach for learning to classify these distributions. Although numerous kernels between distributions have been proposed, applying kernel methods to distribution regression tasks remains challenging, primarily because selecting a suitable kernel is not straightforward. Surprisingly, the question of learning a data-dependent distribution kernel has received little attention. In this paper, we propose a novel objective for the unsupervised learning of data-dependent distribution kernel, based on the principle of entropy maximization in the space of probability measure embeddings. We examine the theoretical properties of the latent embedding space induced by our objective, demonstrating that its geometric structure is well-suited for solving downstream discriminative tasks. Finally, we demonstrate the performance of the learned kernel across different modalities.", "keywords": "distribution regression;kernel methods;Reproducing Kernel Hilbert Spaces (RKHS);kernel mean embeddings;data-dependent kernel;unsupervised learning", "primary_area": "other", "supplementary_material": "", "author": "Oleksii Kachaiev;Stefano Recanatesi", "authorids": "~Oleksii_Kachaiev1;~Stefano_Recanatesi1", "gender": "M;M", "homepage": "https://kachayev.github.io/talks/;", "dblp": ";", "google_scholar": ";", "orcid": ";0000-0002-3576-9261", "linkedin": "kachayev/;", "or_profile": "~Oleksii_Kachaiev1;~Stefano_Recanatesi1", "aff": ";University of Washington", "aff_domain": ";uw.edu", "position": ";Postdoc", "bibtex": "@inproceedings{\nkachaiev2024learning,\ntitle={Learning to Embed Distributions via Maximum Kernel Entropy},\nauthor={Oleksii Kachaiev and Stefano Recanatesi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=A0cok1GK9c}\n}", "github": "", "reviewers": "bRTz;Z862;pXGE;m5U2;1Qk4", "pdf_size": 7788456, "rating": "4;6;6;6;6", "confidence": "4;4;4;3;3", "soundness": "4;3;3;2;3", "novelty": "2;3;3;2;2", "presentation": "3;3;3;3;4", "wc_summary": "59;106;70;82;47", "wc_strengths": "74;40;77;156;56", "wc_weaknesses": "136;94;96;349;126", "wc_questions": "56;80;427;2;2", "wc_limitations": "1;1;9;3;1", "wc_review": "326;321;679;592;232", "wc_reply_reviewers": "139;25;200;35;15", "wc_reply_authors": "271;0;474;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;1;2;1;1", "rating_avg": [ 5.6, 0.7999999999999999 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 72.8, 20.252407264322926 ], "wc_strengths_avg": [ 80.6, 39.987998199459796 ], "wc_weaknesses_avg": [ 160.2, 95.81732619938839 ], "wc_questions_avg": [ 113.4, 159.7342793516783 ], "wc_limitations_avg": [ 3.0, 3.0983866769659336 ], "wc_review_avg": [ 430.0, 173.28935339483496 ], "wc_reply_reviewers_avg": [ 82.8, 73.64346542633638 ], "wc_reply_authors_avg": [ 149.0, 193.44870121042428 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.408248290463863, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:J31zNSq0-NsJ:scholar.google.com/&scioq=Learning+to+Embed+Distributions+via+Maximum+Kernel+Entropy&hl=en&as_sdt=0,44", "gs_version_total": 3, "email": ";uw.edu", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "University of Washington", "aff_unique_dep": "", "aff_unique_url": "https://www.washington.edu", "aff_unique_abbr": "UW", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "BIOSCAN-5M: A Multimodal Dataset for Insect Biodiversity", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97824", "id": "A33u66KmYf", "proceeding": "", "pdf": "https://openreview.net/pdf?id=A33u66KmYf", "openreview": "https://openreview.net/forum?id=A33u66KmYf", "poster": "/media/PosterPDFs/NeurIPS%202024/97824.png?t=1733432818.902617", "project": "", "author_site": "Zahra Gharaee, Scott C. Lowe, ZeMing Gong, Pablo Millan Arias, Nicholas Pellegrino, Austin T. Wang, Joakim Bruslund Haurum, Iuliia Eyriay, Lila Kari, Dirk Steinke, Graham Taylor, Paul Fieguth, Angel Chang", "tldr": "", "abstract": "As part of an ongoing worldwide effort to comprehend and monitor insect biodiversity, this paper presents the BIOSCAN-5M Insect dataset to the machine learning community and establish several benchmark tasks. BIOSCAN-5M is a comprehensive dataset containing multi-modal information for over 5 million insect specimens, and it significantly expands existing image-based biological datasets by including taxonomic labels, raw nucleotide barcode sequences, assigned barcode index numbers, geographical, and size information. We propose three benchmark experiments to demonstrate the impact of the multi-modal data types on the classification and clustering accuracy. First, we pretrain a masked language model on the DNA barcode sequences of the BIOSCAN-5M dataset, and demonstrate the impact of using this large reference library on species- and genus-level classification performance. Second, we propose a zero-shot transfer learning task applied to images and DNA barcodes to cluster feature embeddings obtained from self-supervised learning, to investigate whether meaningful clusters can be derived from these representation embeddings. Third, we benchmark multi-modality by performing contrastive learning on DNA barcodes, image data, and taxonomic information. This yields a general shared embedding space enabling taxonomic classification using multiple types of information and modalities. The code repository of the BIOSCAN-5M Insect dataset is available at https://github.com/bioscan-ml/BIOSCAN-5M.", "keywords": "Biodiversity;Multi-modal datasets;Taxonomic classification;Nucleotide barcode sequences;zero-shot clustering;self-supervised learning;open-world;fine-grained", "primary_area": "", "supplementary_material": "/attachment/4a289a84ee8a81227bdd6d9c827bfbafaf179f87.pdf", "author": "Zahra Gharaee;Scott C Lowe;ZeMing Gong;Pablo Andres Millan Arias;Nicholas Pellegrino;Austin Wang;Joakim Bruslund Haurum;Iuliia Zarubiieva;Lila Kari;Dirk Steinke;Graham W. Taylor;Paul W. Fieguth;Angel X Chang", "authorids": "~Zahra_Gharaee1;~Scott_C_Lowe1;~ZeMing_Gong1;~Pablo_Andres_Millan_Arias1;~Nicholas_Pellegrino1;~Austin_Wang2;~Joakim_Bruslund_Haurum1;~Iuliia_Zarubiieva1;~Lila_Kari1;~Dirk_Steinke1;~Graham_W._Taylor1;~Paul_W._Fieguth1;~Angel_X_Chang1", "gender": "F;;M;M;Not Specified;M;M;F;F;M;;F;", "homepage": "https://zahrag.github.io/;https://scottclowe.com/;https://github.com/zmgong;;https://uwaterloo.ca/scholar/npellegr;https://atwang16.github.io;https://vbn.aau.dk/en/persons/139317;;https://cs.uwaterloo.ca/~lila/;https://biodiversitygenomics.net;;https://angelxuanchang.github.io;https://www.gwtaylor.ca", "dblp": "174/4584;245/0038;352/5753;;;359/9540.html;201/0296;;;;f/PWFieguth;46/10489;17/1633", "google_scholar": "https://scholar.google.pl/citations?user=nWe8d1MAAAAJ;https://scholar.google.ca/citations?user=ZFPhxuAAAAAJ;;64oaiaEAAAAJ;https://scholar.google.ca/citations?user=eiIC09EAAAAJ;5bZFMK8AAAAJ;GAEtgr4AAAAJ;;B4myWaQAAAAJ;;TObmBfYAAAAJ;8gfs8XIAAAAJ;https://scholar.google.ca/citations?user=PUeKU8kAAAAJ", "orcid": "0000-0003-0140-0025;0000-0002-5237-3867;;;0000-0002-2102-2431;;0000-0002-0544-0422;0009-0007-1597-8684;;;0000-0001-7260-2260;0009-0003-5055-6437;", "linkedin": "zahragh/;scottclowe/;zeming-gong-86a12b1ab/;;nicholas-nick-pellegrino/;austin-wang-234b3b162/;;ieyriay/;;;paul-fieguth-1071461;;", "or_profile": "~Zahra_Gharaee1;~Scott_C_Lowe1;~ZeMing_Gong1;~Pablo_Andres_Millan_Arias1;~Nicholas_Pellegrino1;~Austin_Wang2;~Joakim_Bruslund_Haurum1;~Iuliia_Zarubiieva1;~Lila_Kari1;~Dirk_Steinke1;~Paul_W._Fieguth1;~Angel_X_Chang1;~Graham_W_Taylor1", "aff": "University of Waterloo;Vector Institute;Simon Fraser University;University of Waterloo;University of Waterloo;Simon Fraser University;University of Edinburgh;University of Guelph;;;University of Waterloo;Simon Fraser University;University of Guelph", "aff_domain": "uwaterloo.ca;vectorinstitute.ai;sfu.ca;cs.uwaterloo.ca;uwaterloo.ca;sfu.ca;ed.ac.uk;uoguelph.ca;;;uwaterloo.ca;sfu.ca;uoguelph.ca", "position": "Postdoc;Postdoc;MS student;PhD student;PhD student;PhD student;Visiting Researcher;Postdoc;;;Full Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\ngharaee2024bioscanm,\ntitle={{BIOSCAN}-5M: A Multimodal Dataset for Insect Biodiversity},\nauthor={Zahra Gharaee and Scott C Lowe and ZeMing Gong and Pablo Andres Millan Arias and Nicholas Pellegrino and Austin Wang and Joakim Bruslund Haurum and Iuliia Zarubiieva and Lila Kari and Dirk Steinke and Graham W. Taylor and Paul W. Fieguth and Angel X Chang},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=A33u66KmYf}\n}", "github": "", "reviewers": "eL4D;PFXf;CH9G;aHng", "pdf_size": 5971651, "rating": "4;7;8;8", "confidence": "3;3;4;5", "wc_summary_and_contributions": "31;67;48;79", "wc_strengths": "16;93;93;167", "wc_improvement": "139;99;25;64", "wc_limitations": "102;56;9;81", "wc_correctness": "24;33;4;23", "wc_clarity": "128;25;7;5", "wc_relation_to_prior_work": "20;43;20;21", "wc_documentation": "103;27;29;5", "wc_additional_feedback": "1;1;1;1", "wc_review": "564;444;236;446", "wc_reply_reviewers": "315;34;12;41", "wc_reply_authors": "562;0;0;0", "reply_reviewers": "2;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.75, 1.6393596310755 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "wc_summary_and_contributions_avg": [ 56.25, 18.2944663764757 ], "wc_strengths_avg": [ 92.25, 53.39182989933947 ], "wc_improvement_avg": [ 81.75, 42.16263155923738 ], "wc_limitations_avg": [ 62.0, 34.66266002487403 ], "wc_correctness_avg": [ 21.0, 10.559356040971437 ], "wc_clarity_avg": [ 41.25, 50.68715320473226 ], "wc_relation_to_prior_work_avg": [ 26.0, 9.82344135219425 ], "wc_documentation_avg": [ 41.0, 37.013511046643494 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 422.5, 118.13022475217763 ], "wc_reply_reviewers_avg": [ 100.5, 124.30305708227775 ], "wc_reply_authors_avg": [ 140.5, 243.35313846342726 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 13, 0 ], "corr_rating_confidence": 0.6897007348075542, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17226346429652002282&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "uwaterloo.ca;vectorinstitute.ai;sfu.ca;cs.uwaterloo.ca;uwaterloo.ca;sfu.ca;ed.ac.uk;uoguelph.ca;;;uwaterloo.ca;sfu.ca;uoguelph.ca", "author_num": 13, "aff_unique_index": "0;1;2;0;0;2;3;4;0;2;4", "aff_unique_norm": "University of Waterloo;Vector Institute;Simon Fraser University;University of Edinburgh;University of Guelph", "aff_unique_dep": ";;;;", "aff_unique_url": "https://uwaterloo.ca;https://vectorinstitute.ai/;https://www.sfu.ca;https://www.ed.ac.uk;https://www.uoguelph.ca", "aff_unique_abbr": "UW;Vector Institute;SFU;Edinburgh;U of G", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;1;0;0;0;0", "aff_country_unique": "Canada;United Kingdom" }, { "title": "Optimal Transport-based Labor-free Text Prompt Modeling for Sketch Re-identification", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96283", "id": "A34sBX4R5N", "proceeding": "", "pdf": "https://openreview.net/pdf?id=A34sBX4R5N", "openreview": "https://openreview.net/forum?id=A34sBX4R5N", "poster": "/media/PosterPDFs/NeurIPS%202024/96283.png?t=1731502273.3085072", "project": "", "author_site": "Rui Li, Tingting Ren, Jie Wen, Jinxing Li", "tldr": "", "abstract": "Sketch Re-identification (Sketch Re-ID), \nwhich aims to retrieve target person from an image gallery based on a sketch query, is crucial for criminal investigation, law enforcement, and missing person searches. \nExisting methods aim to alleviate the modality gap by employing semantic metrics constraints or auxiliary modal guidance. However, they incur expensive labor costs and inevitably omit fine-grained modality-consistent information due to the abstraction of sketches.\nTo address this issue, this paper proposes a novel $\\textit{Optimal Transport-based Labor-free Text Prompt Modeling}$ (OLTM) network, which hierarchically extracts coarse- and fine-grained similarity representations guided by textual semantic information without any additional annotations. \nSpecifically, multiple target attributes are flexibly obtained by a pre-trained visual question answering (VQA) model. Subsequently, a text prompt reasoning module employs learnable prompt strategy and optimal transport algorithm to extract discriminative global and local text representations, which serve as a bridge for hierarchical and multi-granularity modal alignment between sketch and image modalities.\nAdditionally, instead of measuring the similarity of two samples by only computing their distance, a novel triplet assignment loss is further proposed, in which the whole data distribution also contributes to optimizing the inter/intra-class distances. Extensive experiments conducted on two public benchmarks consistently demonstrate the robustness and superiority of our OLTM over state-of-the-art methods.", "keywords": "Sketch Re-identification; Optimal Transport; Prompt Learning", "primary_area": "machine_vision", "supplementary_material": "", "author": "Rui Li;Tingting Ren;Jie Wen;Jinxing Li", "authorids": "~Rui_Li37;~Tingting_Ren1;~Jie_Wen1;~Jinxing_Li2", "gender": "M;F;;M", "homepage": "https://github.com/LrHITer;https://github.com/HITRTT;;", "dblp": ";;;119/7206", "google_scholar": ";;;i4I4hIEAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Rui_Li37;~Tingting_Ren1;~Jie_Wen1;~Jinxing_Li2", "aff": "Harbin Institute of Technology;Harbin Institute of Technology;;Harbin Institute of Technology", "aff_domain": "stu.hit.edu.cn;stu.hit.edu.cn;;hit.edu.cn", "position": "PhD student;MS student;;Associate Professor", "bibtex": "@inproceedings{\nli2024optimal,\ntitle={Optimal Transport-based Labor-free Text Prompt Modeling for Sketch Re-identification},\nauthor={Rui Li and Tingting Ren and Jie Wen and Jinxing Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=A34sBX4R5N}\n}", "github": "", "reviewers": "nGMK;beT2;edWJ;Bynq", "pdf_size": 1671462, "rating": "4;6;6;7", "confidence": "4;5;5;5", "soundness": "2;3;4;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "156;62;44;44", "wc_strengths": "186;51;46;41", "wc_weaknesses": "149;59;242;53", "wc_questions": "106;107;43;77", "wc_limitations": "79;1;61;6", "wc_review": "676;280;436;221", "wc_reply_reviewers": "0;20;14;21", "wc_reply_authors": "30;26;20;18", "reply_reviewers": "0;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 76.5, 46.48386816950586 ], "wc_strengths_avg": [ 81.0, 60.7247890074556 ], "wc_weaknesses_avg": [ 125.75, 77.14069937458436 ], "wc_questions_avg": [ 83.25, 26.176086414894034 ], "wc_limitations_avg": [ 36.75, 33.89966813996857 ], "wc_review_avg": [ 403.25, 175.97638335867686 ], "wc_reply_reviewers_avg": [ 13.75, 8.37779804005802 ], "wc_reply_authors_avg": [ 23.5, 4.769696007084728 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.9271726499455306, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:djweXOGfPU4J:scholar.google.com/&scioq=Optimal+Transport-based+Labor-free+Text+Prompt+Modeling+for+Sketch+Re-identification&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "stu.hit.edu.cn;stu.hit.edu.cn;;hit.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Harbin Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.hit.edu.cn/", "aff_unique_abbr": "HIT", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Harbin", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Generative Modelling of Structurally Constrained Graphs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96282", "id": "A3hxp0EeNW", "proceeding": "", "pdf": "https://openreview.net/pdf?id=A3hxp0EeNW", "openreview": "https://openreview.net/forum?id=A3hxp0EeNW", "poster": "/media/PosterPDFs/NeurIPS%202024/96282.png?t=1733469530.8000646", "project": "", "author_site": "Manuel Madeira, Clement Vignac, Dorina Thanou, Pascal Frossard", "tldr": "", "abstract": "Graph diffusion models have emerged as state-of-the-art techniques in graph generation; yet, integrating domain knowledge into these models remains challenging. \nDomain knowledge is particularly important in real-world scenarios, where invalid generated graphs hinder deployment in practical applications.\nUnconstrained and conditioned graph diffusion models fail to guarantee such domain-specific structural properties. \nWe present ConStruct, a novel framework that enables graph diffusion models to incorporate hard constraints on specific properties, such as planarity or acyclicity.\nOur approach ensures that the sampled graphs remain within the domain of graphs that satisfy the specified property throughout the entire trajectory in both the forward and reverse processes. This is achieved by introducing an edge-absorbing noise model and a new projector operator.\nConStruct demonstrates versatility across several structural and edge-deletion invariant constraints and achieves state-of-the-art performance for both synthetic benchmarks and attributed real-world datasets. \nFor example, by incorporating planarity constraints in digital pathology graph datasets, the proposed method outperforms existing baselines, improving data validity by up to 71.1 percentage points.", "keywords": "Graph Generative Models;Constrained Diffusion", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Manuel Madeira;Clement Vignac;Dorina Thanou;Pascal Frossard", "authorids": "~Manuel_Madeira1;~Clement_Vignac1;~Dorina_Thanou1;~Pascal_Frossard1", "gender": "M;;F;", "homepage": "https://manuelmlmadeira.github.io/;https://cvignac.github.io/;https://people.epfl.ch/dorina.thanou/?lang=en;", "dblp": "301/8148;254/1004.html;;", "google_scholar": "OhijpAwAAAAJ;eKJLfHQAAAAJ;https://scholar.google.com/citations?hl=en;", "orcid": "0000-0002-8205-404X;;;", "linkedin": "manuel-madeira/;;;", "or_profile": "~Manuel_Madeira1;~Clement_Vignac1;~Dorina_Thanou1;~Pascal_Frossard1", "aff": "EPFL - EPF Lausanne;Isomorphic Labs;Swiss Federal Institute of Technology Lausanne;", "aff_domain": "epfl.ch;isomorphiclabs.com;epfl.ch;", "position": "PhD student;Researcher;Researcher;", "bibtex": "@inproceedings{\nmadeira2024generative,\ntitle={Generative Modelling of Structurally Constrained Graphs},\nauthor={Manuel Madeira and Clement Vignac and Dorina Thanou and Pascal Frossard},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=A3hxp0EeNW}\n}", "github": "", "reviewers": "ZwhZ;FDH7;wnSX", "pdf_size": 4443695, "rating": "6;7;7", "confidence": "4;4;4", "soundness": "3;3;3", "novelty": "3;4;3", "presentation": "3;4;3", "wc_summary": "94;50;96", "wc_strengths": "104;52;92", "wc_weaknesses": "392;274;66", "wc_questions": "3;119;6", "wc_limitations": "87;57;1", "wc_review": "680;552;261", "wc_reply_reviewers": "571;78;0", "wc_reply_authors": "1297;18;0", "reply_reviewers": "2;1;0", "reply_authors": "6;2;1", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 80.0, 21.228911104120876 ], "wc_strengths_avg": [ 82.66666666666667, 22.23110933404409 ], "wc_weaknesses_avg": [ 244.0, 134.76893806314075 ], "wc_questions_avg": [ 42.666666666666664, 53.989710953929816 ], "wc_limitations_avg": [ 48.333333333333336, 35.64017707899643 ], "wc_review_avg": [ 497.6666666666667, 175.3174897784651 ], "wc_reply_reviewers_avg": [ 216.33333333333334, 252.80075597636613 ], "wc_reply_authors_avg": [ 438.3333333333333, 607.2134898223377 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 3.0, 2.160246899469287 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16504141058467960838&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "epfl.ch;isomorphiclabs.com;epfl.ch;", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "EPFL;Isomorphic Labs;Swiss Federal Institute of Technology Lausanne", "aff_unique_dep": ";;", "aff_unique_url": "https://www.epfl.ch;;https://www.epfl.ch", "aff_unique_abbr": "EPFL;;EPFL", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Lausanne;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Switzerland;United States" }, { "title": "Semi-Open 3D Object Retrieval via Hierarchical Equilibrium on Hypergraph", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96281", "id": "A3jHvChR8K", "proceeding": "", "pdf": "https://openreview.net/pdf?id=A3jHvChR8K", "openreview": "https://openreview.net/forum?id=A3jHvChR8K", "poster": "/media/PosterPDFs/NeurIPS%202024/96281.png?t=1730860105.7254262", "project": "", "author_site": "Yang Xu, Yifan Feng, Jun Zhang, Jun-Hai Yong, Yue Gao", "tldr": "", "abstract": "Existing open-set learning methods consider only the single-layer labels of objects and strictly assume no overlap between the training and testing sets, leading to contradictory optimization for superposed categories. In this paper, we introduce a more practical Semi-Open Environment setting for open-set 3D object retrieval with hierarchical labels, in which the training and testing set share a partial label space for coarse categories but are completely disjoint from fine categories. We propose the Hypergraph-Based Hierarchical Equilibrium Representation (HERT) framework for this task. Specifically, we propose the Hierarchical Retrace Embedding (HRE) module to overcome the global disequilibrium of unseen categories by fully leveraging the multi-level category information. Besides, tackling the feature overlap and class confusion problem, we perform the Structured Equilibrium Tuning (SET) module to utilize more equilibrial correlations among objects and generalize to unseen categories, by constructing a superposed hypergraph based on the local coherent and global entangled correlations. Furthermore, we generate four semi-open 3DOR datasets with multi-level labels for benchmarking. Results demonstrate that the proposed method can effectively generate the hierarchical embeddings of 3D objects and generalize them towards semi-open environments.", "keywords": "Hypergraph;3D Object Retrieval;Open-Set Learning;Multi-Modal Fusion;Multi-Label Learning", "primary_area": "machine_vision", "supplementary_material": "", "author": "Yang Xu;Yifan Feng;Jun Zhang;Jun-Hai Yong;Yue Gao", "authorids": "~Yang_Xu10;~Yifan_Feng1;~Jun_Zhang17;~Jun-Hai_Yong3;~Yue_Gao4", "gender": "M;M;M;M;M", "homepage": ";;https://junzhang.org;https://www.thss.tsinghua.edu.cn/person/yongjunhai;http://www.gaoyue.org", "dblp": ";225/5463;29/4190-18.html;;33/3099-2", "google_scholar": "ecK5NOIAAAAJ;https://scholar.google.com.hk/citations?user=WntYF-sAAAAJ;;;UTDfWocAAAAJ", "orcid": "0000-0002-8691-2726;0000-0003-0878-2986;0000-0001-5579-7094;;", "linkedin": ";;;;", "or_profile": "~Yang_Xu10;~Yifan_Feng1;~Jun_Zhang17;~Jun-Hai_Yong3;~Yue_Gao4", "aff": "School of Software, Tsinghua University;Tsinghua University;Tencent AI Lab;Tsinghua University;Tsinghua University", "aff_domain": "mails.tsinghua.edu.cn;tsinghua.edu.cn;tencent.com;tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;PhD student;Principal Researcher;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nxu2024semiopen,\ntitle={Semi-Open 3D Object Retrieval via Hierarchical Equilibrium on Hypergraph},\nauthor={Yang Xu and Yifan Feng and Jun Zhang and Jun-Hai Yong and Yue Gao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=A3jHvChR8K}\n}", "github": "", "reviewers": "nF63;oLzP;YDem;vaz9", "pdf_size": 0, "rating": "4;6;6;8", "confidence": "3;3;3;5", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "2;2;3;2", "wc_summary": "116;83;97;73", "wc_strengths": "75;97;22;56", "wc_weaknesses": "309;67;63;208", "wc_questions": "28;59;5;134", "wc_limitations": "8;8;5;7", "wc_review": "536;314;192;478", "wc_reply_reviewers": "229;28;0;28", "wc_reply_authors": "1316;46;0;46", "reply_reviewers": "1;1;0;1", "reply_authors": "4;2;1;2", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 92.25, 16.145819892467525 ], "wc_strengths_avg": [ 62.5, 27.518175811634027 ], "wc_weaknesses_avg": [ 161.75, 103.13916569373635 ], "wc_questions_avg": [ 56.5, 48.67494221876385 ], "wc_limitations_avg": [ 7.0, 1.224744871391589 ], "wc_review_avg": [ 380.0, 135.68345514468592 ], "wc_reply_reviewers_avg": [ 71.25, 91.79154372816703 ], "wc_reply_authors_avg": [ 352.0, 556.882393329148 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:yycrtPJH4usJ:scholar.google.com/&scioq=Semi-Open+3D+Object+Retrieval+via+Hierarchical+Equilibrium+on+Hypergraph&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": "mails.tsinghua.edu.cn;tsinghua.edu.cn;tencent.com;tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Tsinghua University;Tencent", "aff_unique_dep": "School of Software;Tencent AI Lab", "aff_unique_url": "https://www.tsinghua.edu.cn;https://ai.tencent.com", "aff_unique_abbr": "THU;Tencent AI Lab", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "MultiOOD: Scaling Out-of-Distribution Detection for Multiple Modalities", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96280", "id": "A5pabdZp2F", "proceeding": "", "pdf": "https://openreview.net/pdf?id=A5pabdZp2F", "openreview": "https://openreview.net/forum?id=A5pabdZp2F", "poster": "/media/PosterPDFs/NeurIPS%202024/96280.png?t=1732816225.7235944", "project": "", "author_site": "Hao Dong, Yue Zhao, Eleni Chatzi, Olga Fink", "tldr": "", "abstract": "Detecting out-of-distribution (OOD) samples is important for deploying machine learning models in safety-critical applications such as autonomous driving and robot-assisted surgery. Existing research has mainly focused on unimodal scenarios on image data. However, real-world applications are inherently multimodal, which makes it essential to leverage information from multiple modalities to enhance the efficacy of OOD detection. To establish a foundation for more realistic Multimodal OOD Detection, we introduce the first-of-its-kind benchmark, MultiOOD, characterized by diverse dataset sizes and varying modality combinations. We first evaluate existing unimodal OOD detection algorithms on MultiOOD, observing that the mere inclusion of additional modalities yields substantial improvements. This underscores the importance of utilizing multiple modalities for OOD detection. Based on the observation of Modality Prediction Discrepancy between in-distribution (ID) and OOD data, and its strong correlation with OOD performance, we propose the Agree-to-Disagree (A2D) algorithm to encourage such discrepancy during training. Moreover, we introduce a novel outlier synthesis method, NP-Mix, which explores broader feature spaces by leveraging the information from nearest neighbor classes and complements A2D to strengthen OOD detection performance. Extensive experiments on MultiOOD demonstrate that training with A2D and NP-Mix improves existing OOD detection algorithms by a large margin. To support accessibility and reproducibility, our source code and MultiOOD benchmark are available at https://github.com/donghao51/MultiOOD.", "keywords": "Out-of-Distribution Detection;Multimodal Learning", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/b73cc5a2ca7401a9eb504022832ed44962c21438.zip", "author": "Hao Dong;Yue Zhao;Eleni Chatzi;Olga Fink", "authorids": "~Hao_Dong4;~Yue_Zhao13;~Eleni_Chatzi1;~Olga_Fink1", "gender": "M;M;F;F", "homepage": "https://sites.google.com/view/dong-hao/;https://viterbi-web.usc.edu/~yzhao010/;https://chatzi.ibk.ethz.ch/;", "dblp": ";48/76-16;281/5425;", "google_scholar": "5jcoGEIAAAAJ;https://scholar.google.ca/citations?user=zoGDYsoAAAAJ;2n9Mwt8AAAAJ;eAcIoUgAAAAJ", "orcid": ";0000-0003-3401-4921;0000-0002-6870-240X;0000-0002-9546-1488", "linkedin": "hao-dong-276317100/;yzhao062/;eleni-chatzi-88065010/;", "or_profile": "~Hao_Dong4;~Yue_Zhao13;~Eleni_Chatzi1;~Olga_Fink1", "aff": "ETH Zurich;University of Southern California;Swiss Federal Institute of Technology;EPFL - EPF Lausanne", "aff_domain": "ethz.ch;usc.edu;ethz.ch;epfl.ch", "position": "PhD student;Assistant Professor;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\ndong2024multiood,\ntitle={Multi{OOD}: Scaling Out-of-Distribution Detection for Multiple Modalities},\nauthor={Hao Dong and Yue Zhao and Eleni Chatzi and Olga Fink},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=A5pabdZp2F}\n}", "github": "", "reviewers": "z2YR;gP94;Lt2v", "pdf_size": 7746389, "rating": "6;6;8", "confidence": "3;4;4", "soundness": "2;3;4", "novelty": "2;3;4", "presentation": "3;3;4", "wc_summary": "28;52;117", "wc_strengths": "21;46;97", "wc_weaknesses": "6;126;9", "wc_questions": "321;40;89", "wc_limitations": "7;41;39", "wc_review": "383;305;351", "wc_reply_reviewers": "22;32;11", "wc_reply_authors": "63;64;40", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 65.66666666666667, 37.59728122558273 ], "wc_strengths_avg": [ 54.666666666666664, 31.626290048347787 ], "wc_weaknesses_avg": [ 47.0, 55.87486017879597 ], "wc_questions_avg": [ 150.0, 122.55882941129401 ], "wc_limitations_avg": [ 29.0, 15.57776192739723 ], "wc_review_avg": [ 346.3333333333333, 32.013885876114585 ], "wc_reply_reviewers_avg": [ 21.666666666666668, 8.576453553512405 ], "wc_reply_authors_avg": [ 55.666666666666664, 11.08552609887726 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.49999999999999983, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17815943937250731929&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 8, "email": "ethz.ch;usc.edu;ethz.ch;epfl.ch", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "ETH Zurich;University of Southern California;Swiss Federal Institute of Technology;EPFL", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.ethz.ch;https://www.usc.edu;https://www.ethz.ch;https://www.epfl.ch", "aff_unique_abbr": "ETHZ;USC;ETH Zurich;EPFL", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Los Angeles;Lausanne", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "Switzerland;United States" }, { "title": "Efficient Lifelong Model Evaluation in an Era of Rapid Progress", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96279", "id": "A7wC1CTkYl", "proceeding": "", "pdf": "https://openreview.net/pdf?id=A7wC1CTkYl", "openreview": "https://openreview.net/forum?id=A7wC1CTkYl", "poster": "", "project": "", "author_site": "Ameya Prabhu, Vishaal Udandarao, Philip Torr, Matthias Bethge, Adel Bibi, Samuel Albanie", "tldr": "", "abstract": "Standardized benchmarks drive progress in machine learning. However, with repeated testing, the risk of overfitting grows as algorithms over-exploit benchmark idiosyncrasies. In our work, we seek to mitigate this challenge by compiling \\textit{ever-expanding} large-scale benchmarks called \\textit{Lifelong Benchmarks}. As exemplars of our approach, we create \\textit{Lifelong-CIFAR10} and \\textit{Lifelong-ImageNet}, containing (for now) 1.69M and 1.98M test samples, respectively. While reducing overfitting, lifelong benchmarks introduce a key challenge: the high cost of evaluating a growing number of models across an ever-expanding sample set. To address this challenge, we also introduce an efficient evaluation framework: \\textit{Sort \\& Search (S\\&S)}, which reuses previously evaluated models by leveraging dynamic programming algorithms to selectively rank and sub-select test samples, enabling cost-effective lifelong benchmarking. Extensive empirical evaluations across $\\sim$31,000 models demonstrate that \\textit{S\\&S} achieves highly-efficient approximate accuracy measurement, reducing compute cost from 180 GPU days to 5 GPU hours ($\\sim$1000x reduction) on a single A100 GPU, with low approximation error. As such, lifelong benchmarks offer a robust, practical solution to the ``benchmark exhaustion'' problem.", "keywords": "benchmarking;efficient model evaluation", "primary_area": "evaluation", "supplementary_material": "/attachment/6ac8a55fa4695acac8c4f11677c654ea56a6956a.zip", "author": "Ameya Prabhu;Vishaal Udandarao;Philip Torr;Matthias Bethge;Adel Bibi;Samuel Albanie", "authorids": "~Ameya_Prabhu1;~Vishaal_Udandarao1;~Philip_Torr1;~Matthias_Bethge1;~Adel_Bibi1;~Samuel_Albanie2", "gender": "M;M;;M;M;Not Specified", "homepage": "https://drimpossible.github.io/;https://vishaal27.github.io/;http://www.robots.ox.ac.uk/~tvg/;https://bethgelab.org;http://adelbibi.com;https://samuelalbanie.com/", "dblp": "181/4512;247/4693;;77/3005;176/0964;188/5765", "google_scholar": "0kK7sSAAAAAJ;jUOcawkAAAAJ;;https://scholar.google.com/citations?hl=en;Q4j2laYAAAAJ;https://scholar.google.co.uk/citations?user=QjufASUAAAAJ", "orcid": ";;;;0000-0002-6169-3918;", "linkedin": ";vishaal-udandarao/?originalSubdomain=de;;;adel-bibi-ba3671ab/;", "or_profile": "~Ameya_Prabhu1;~Vishaal_Udandarao1;~Philip_Torr1;~Matthias_Bethge1;~Adel_Bibi1;~samuel_Albanie1", "aff": "University of Oxford;University of Cambridge;University of Oxford;University of Tuebingen;University of Oxford;University of Cambridge", "aff_domain": "ox.ac.uk;cam.ac.uk;ox.ac.uk;uni-tuebingen.de;ox.ac.uk;cam.ac.uk", "position": "PhD student;PhD student;Full Professor;Full Professor;Senior Researcher;Assistant Professor", "bibtex": "@inproceedings{\nprabhu2024efficient,\ntitle={Efficient Lifelong Model Evaluation in an Era of Rapid Progress},\nauthor={Ameya Prabhu and Vishaal Udandarao and Philip Torr and Matthias Bethge and Adel Bibi and Samuel Albanie},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=A7wC1CTkYl}\n}", "github": "", "reviewers": "jvk8;tnhy;bEaJ;T9R1;oB2P", "pdf_size": 4496121, "rating": "5;5;5;7;7", "confidence": "3;3;2;3;3", "soundness": "3;3;3;3;4", "novelty": "3;3;2;3;3", "presentation": "2;2;3;4;3", "wc_summary": "164;109;96;107;266", "wc_strengths": "357;37;27;55;9", "wc_weaknesses": "6;109;205;37;28", "wc_questions": "184;71;64;109;1", "wc_limitations": "27;9;1;7;4", "wc_review": "738;335;393;315;308", "wc_reply_reviewers": "60;180;0;20;75", "wc_reply_authors": "392;0;29;0;0", "reply_reviewers": "2;1;0;1;1", "reply_authors": "3;1;2;1;1", "rating_avg": [ 5.8, 0.9797958971132712 ], "confidence_avg": [ 2.8, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 148.4, 63.38012306709415 ], "wc_strengths_avg": [ 97.0, 130.8495319059262 ], "wc_weaknesses_avg": [ 77.0, 72.73238618387272 ], "wc_questions_avg": [ 85.8, 60.11123023196248 ], "wc_limitations_avg": [ 9.6, 9.112628599915613 ], "wc_review_avg": [ 417.8, 162.86362393118972 ], "wc_reply_reviewers_avg": [ 67.0, 62.57795138864806 ], "wc_reply_authors_avg": [ 84.2, 154.3092997845561 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.408248290463863, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6371863059629263095&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 2, "email": "ox.ac.uk;cam.ac.uk;ox.ac.uk;uni-tuebingen.de;ox.ac.uk;cam.ac.uk", "author_num": 6, "aff_unique_index": "0;1;0;2;0;1", "aff_unique_norm": "University of Oxford;University of Cambridge;University of Tuebingen", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ox.ac.uk;https://www.cam.ac.uk;https://www.uni-tuebingen.de/", "aff_unique_abbr": "Oxford;Cambridge;Uni T\u00fcbingen", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;0;0;1;0;0", "aff_country_unique": "United Kingdom;Germany" }, { "title": "DiffLight: A Partial Rewards Conditioned Diffusion Model for Traffic Signal Control with Missing Data", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96278", "id": "A969ouPqEs", "proceeding": "", "pdf": "https://openreview.net/pdf?id=A969ouPqEs", "openreview": "https://openreview.net/forum?id=A969ouPqEs", "poster": "/media/PosterPDFs/NeurIPS%202024/96278.png?t=1731221627.8017428", "project": "", "author_site": "Hanyang Chen, Yang Jiang, Shengnan Guo, Xiaowei Mao, Youfang Lin, Huaiyu Wan", "tldr": "", "abstract": "The application of reinforcement learning in traffic signal control (TSC) has been extensively researched and yielded notable achievements. However, most existing works for TSC assume that traffic data from all surrounding intersections is fully and continuously available through sensors. In real-world applications, this assumption often fails due to sensor malfunctions or data loss, making TSC with missing data a critical challenge. To meet the needs of practical applications, we introduce DiffLight, a novel conditional diffusion model for TSC under data-missing scenarios in the offline setting. Specifically, we integrate two essential sub-tasks, i.e., traffic data imputation and decision-making, by leveraging a Partial Rewards Conditioned Diffusion (PRCD) model to prevent missing rewards from interfering with the learning process. Meanwhile, to effectively capture the spatial-temporal dependencies among intersections, we design a Spatial-Temporal transFormer (STFormer) architecture. In addition, we propose a Diffusion Communication Mechanism (DCM) to promote better communication and control performance under data-missing scenarios. Extensive experiments on five datasets with various data-missing scenarios demonstrate that DiffLight is an effective controller to address TSC with missing data. The code of DiffLight is released at https://github.com/lokol5579/DiffLight-release.", "keywords": "Traffic signal control;Reinforcement learning;Diffusion model;Spatial-temporal data", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Hanyang Chen;Yang Jiang;Shengnan Guo;Xiaowei Mao;Youfang Lin;Huaiyu Wan", "authorids": "~Hanyang_Chen1;~Yang_Jiang2;~Shengnan_Guo1;~Xiaowei_Mao1;~Youfang_Lin1;~Huaiyu_Wan1", "gender": ";;F;M;M;M", "homepage": "https://github.com/lokol5579;https://github.com/YangJiangDev;http://faculty.bjtu.edu.cn/9685/;https://github.com/maoxiaowei97;https://faculty.bjtu.edu.cn/7443/;https://faculty.bjtu.edu.cn/8793/", "dblp": "136/9110;;163/0779-1;;12/4988;07/9988", "google_scholar": "_xGaoJ8AAAAJ;;3JsSBYsAAAAJ;;e8xT-e0AAAAJ;T5wVWIUAAAAJ", "orcid": ";;0000-0002-3008-4511;;0000-0002-5143-3645;0000-0002-0501-9363", "linkedin": ";;;;youfang-lin-a1625091/;", "or_profile": "~Hanyang_Chen1;~Yang_Jiang2;~Shengnan_Guo1;~Xiaowei_Mao1;~Youfang_Lin1;~Huaiyu_Wan1", "aff": "Beijing Jiaotong University;Beijing Jiaotong University;Beijing Jiaotong University;Beijing Jiaotong University;Beijing Jiaotong University;Beijing Jiaotong University", "aff_domain": "bjtu.edu.cn;bjtu.edu.cn;bjtu.edu.cn;bjtu.edu.cn;bjtu.edu.cn;bjtu.edu.cn", "position": "MS student;PhD student;Associate Professor;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nchen2024difflight,\ntitle={DiffLight: A Partial Rewards Conditioned Diffusion Model for Traffic Signal Control with Missing Data},\nauthor={Hanyang Chen and Yang Jiang and Shengnan Guo and Xiaowei Mao and Youfang Lin and Huaiyu Wan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=A969ouPqEs}\n}", "github": "", "reviewers": "PkZo;3ABW;V6Ax;qAN4", "pdf_size": 643200, "rating": "5;6;7;7", "confidence": "4;4;5;3", "soundness": "2;3;4;4", "novelty": "2;3;4;3", "presentation": "3;2;3;3", "wc_summary": "64;113;99;86", "wc_strengths": "65;42;214;75", "wc_weaknesses": "56;82;170;57", "wc_questions": "36;35;14;76", "wc_limitations": "4;26;2;8", "wc_review": "225;298;499;302", "wc_reply_reviewers": "88;36;17;13", "wc_reply_authors": "156;341;24;24", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 90.5, 18.034688796871432 ], "wc_strengths_avg": [ 99.0, 67.4648056396815 ], "wc_weaknesses_avg": [ 91.25, 46.64426545675256 ], "wc_questions_avg": [ 40.25, 22.431841208425134 ], "wc_limitations_avg": [ 10.0, 9.486832980505138 ], "wc_review_avg": [ 331.0, 101.72266217515151 ], "wc_reply_reviewers_avg": [ 38.5, 29.87055406248769 ], "wc_reply_authors_avg": [ 136.25, 129.91607868158582 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4341732395964447924&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "bjtu.edu.cn;bjtu.edu.cn;bjtu.edu.cn;bjtu.edu.cn;bjtu.edu.cn;bjtu.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Beijing Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "http://www.njtu.edu.cn/en", "aff_unique_abbr": "BJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "AAN46kUPXM", "title": "Neural expressiveness for beyond importance model compression", "track": "main", "status": "Reject", "tldr": "", "abstract": "Neural Network Pruning has been established as driving force in the exploration of memory and energy efficient solutions with high throughput both during training and at test time. In this paper, we introduce a novel criterion for model compression, named \"Expressiveness\". Unlike existing pruning methods that rely on the inherent \"Importance\" of neurons' and filters' weights, \"Expressiveness\" emphasizes a neuron's or group of neurons ability to redistribute informational resources effectively, based on the overlap of activations. This characteristic is strongly correlated to a network's initialization state, establishing criterion autonomy from the learning state ($\\textit{stateless}$) and thus setting a new fundamental basis for the expansion of compression strategies in regards to the \"When to Prune\" question. We show that expressiveness is effectively approximated with arbitrary data or limited dataset's representative samples, making ground for the exploration of $\\textit{Data-Agnostic strategies}$. Our work also facilitates a \"hybrid\" formulation of expressiveness and importance-based pruning strategies, illustrating their complementary benefits and delivering up to 10$\\times$ extra gains w.r.t. weight-based approaches in parameter compression ratios, with an average of 1\\% in performance degradation. We also show that employing expressiveness (independently) for pruning leads to an improvement over top-performing and foundational methods in terms of compression efficiency. Finally, on YOLOv8, we achieve a 46.1\\% MACs reduction by removing 55.4\\% of the parameters, with an increase of 3\\% in the mean Absolute Precision ($mAP_{50-95}$) for object detection on COCO dataset.", "keywords": "model compression;efficient deep learning;pruning", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Angelos-Christos Maroudis;Sotirios Xydis", "authorids": "~Angelos-Christos_Maroudis1;~Sotirios_Xydis1", "gender": "M;M", "homepage": "https://www.ece.ntua.gr/en/staff/501;", "dblp": "19/1327.html;304/6496", "google_scholar": "_vlFOPgAAAAJ;", "orcid": ";0000-0002-9447-5837", "linkedin": ";", "or_profile": "~Sotirios_Xydis1;~Maroudis_Angelos-Christos1", "aff": "National Technical University of Athens;Harokopio University", "aff_domain": "ntua.gr;hua.gr", "position": "Assistant Professor;MS student", "bibtex": "@misc{\nanonymous2024neural,\ntitle={Neural expressiveness for beyond importance model compression},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=AAN46kUPXM}\n}", "github": "", "project": "", "reviewers": "ha2w;4Yxj;pBA2;9m28", "site": "https://openreview.net/forum?id=AAN46kUPXM", "pdf_size": 17133852, "rating": "4;4;5;5", "confidence": "4;4;4;4", "soundness": "2;3;2;2", "novelty": "2;2;2;2", "presentation": "2;3;2;3", "wc_summary": "76;53;48;43", "wc_strengths": "52;40;60;58", "wc_weaknesses": "171;55;222;198", "wc_questions": "56;7;29;114", "wc_limitations": "27;1;8;1", "wc_review": "382;156;367;414", "wc_reply_reviewers": "131;0;117;38", "wc_reply_authors": "541;0;1158;84", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;3;2", "rating_avg": [ 4.5, 0.5 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 55.0, 12.62933094031509 ], "wc_strengths_avg": [ 52.5, 7.794228634059948 ], "wc_weaknesses_avg": [ 161.5, 64.08002808988148 ], "wc_questions_avg": [ 51.5, 40.04060439104285 ], "wc_limitations_avg": [ 9.25, 10.638961415476606 ], "wc_review_avg": [ 329.75, 101.74078582358207 ], "wc_reply_reviewers_avg": [ 71.5, 54.41736855085883 ], "wc_reply_authors_avg": [ 445.75, 459.87192510524056 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:dF7ifgfCf_kJ:scholar.google.com/&scioq=Neural+expressiveness+for+beyond+importance+model+compression&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "National Technical University of Athens;Harokopio University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ntua.gr;https://www.harokopio.gr", "aff_unique_abbr": "NTUA;HU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Greece" }, { "title": "MassSpecGym: A benchmark for the discovery and identification of molecules", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97823", "id": "AAo8zAShX3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=AAo8zAShX3", "openreview": "https://openreview.net/forum?id=AAo8zAShX3", "poster": "", "project": "", "author_site": "Roman Bushuiev, Anton Bushuiev, Niek de Jonge, Adamo Young, Fleming Kretschmer, Raman Samusevich, Janne Heirman, Fei Wang, Luke Zhang, Kai D\u00fchrkop, Marcus Ludwig, Nils Haupt, Apurva Kalia, Corinna Brungs, Robin Schmid, Russell Greiner, Bo Wang, David Wishart, Liping Liu, Juho Rousu, Wout Bittremieux, Hannes Rost, Tytus Mak, Soha Hassoun, Florian Huber, Justin J.J. van der Hooft, Michael Stravs, Sebastian B\u00f6cker, Josef Sivic, Tom\u00e1\u0161 Pluskal", "tldr": "", "abstract": "The discovery and identification of molecules in biological and environmental samples is crucial for advancing biomedical and chemical sciences. Tandem mass spectrometry (MS/MS) is the leading technique for high-throughput elucidation of molecular structures. However, decoding a molecular structure from its mass spectrum is exceptionally challenging, even when performed by human experts. As a result, the vast majority of acquired MS/MS spectra remain uninterpreted, thereby limiting our understanding of the underlying (bio)chemical processes. Despite decades of progress in machine learning applications for predicting molecular structures from MS/MS spectra, the development of new methods is severely hindered by the lack of standard datasets and evaluation protocols. To address this problem, we propose MassSpecGym -- the first comprehensive benchmark for the discovery and identification of molecules from MS/MS data. Our benchmark comprises the largest publicly available collection of high-quality MS/MS spectra and defines three MS/MS annotation challenges: \\textit{de novo} molecular structure generation, molecule retrieval, and spectrum simulation. It includes new evaluation metrics and a generalization-demanding data split, therefore standardizing the MS/MS annotation tasks and rendering the problem accessible to the broad machine learning community. MassSpecGym is publicly available at \\url{https://github.com/pluskal-lab/MassSpecGym}.", "keywords": "Molecule discovery;mass spectrometry;small molecules;metabolomics;generative modeling;benchmark;dataset", "primary_area": "", "supplementary_material": "/attachment/3ca44856ba7c297b242b5e9b37c0d5772b69acb1.pdf", "author": "Roman Bushuiev;Anton Bushuiev;Niek F. de Jonge;Adamo Young;Fleming Kretschmer;Raman Samusevich;Janne Heirman;Fei Wang;Luke Zhang;Kai D\u00fchrkop;Marcus Ludwig;Nils A. Haupt;Apurva Kalia;Corinna Brungs;Robin Schmid;Russell Greiner;BO WANG;David Wishart;Liping Liu;Juho Rousu;Wout Bittremieux;Hannes Rost;Tytus D. Mak;Soha Hassoun;Florian Huber;Justin J.J. van der Hooft;Michael A. Stravs;Sebastian B\u00f6cker;Josef Sivic;Tomas Pluskal", "authorids": "~Roman_Bushuiev1;~Anton_Bushuiev1;~Niek_F._de_Jonge1;~Adamo_Young1;~Fleming_Kretschmer1;~Raman_Samusevich1;~Janne_Heirman1;~Fei_Wang13;~Luke_Zhang2;~Kai_D\u00fchrkop1;~Marcus_Ludwig1;~Nils_A._Haupt1;~Apurva_Kalia1;~Corinna_Brungs1;~Robin_Schmid3;~Russell_Greiner2;~BO_WANG11;~David_Wishart1;~Liping_Liu1;~Juho_Rousu1;~Wout_Bittremieux1;~Hannes_Rost1;~Tytus_D._Mak1;~Soha_Hassoun1;~Florian_Huber1;~Justin_J.J._van_der_Hooft1;~Michael_A._Stravs1;~Sebastian_B\u00f6cker1;~Josef_Sivic1;~Tomas_Pluskal1", "gender": "M;;M;Not Specified;M;M;;;;M;;M;M;F;M;;M;M;;M;;M;;F;;M;;;M;M", "homepage": ";;;;;;;;;;;https://bio.informatik.uni-jena.de/;;;https://robinschmid.github.io;;https://wanglab.ai/;http://www.wishartlab.com/;https://www.eecs.tufts.edu/~liulp/;https://people.aalto.fi/juho_rousu;;http://roestlab.org/;;http://www.cs.tufts.edu/~soha/;https://www.hs-duesseldorf.de/;https://vdhooftcompmet.github.io;;https://bio.informatik.uni-jena.de/;http://people.ciirc.cvut.cz/~sivic;https://www.pluskal-lab.org", "dblp": ";;;;;190/5286;;;;56/11538;;;;;;;;;47/5615-1;r/JuhoRousu;;;;82/450;;;;65/1419.html;71/5006;", "google_scholar": "zs8gt4UAAAAJ;1KKXOA0AAAAJ;;;BKTmkAoAAAAJ;;;https://scholar.google.ca/citations?user=_aH5jgwAAAAJ;;;kInNrosAAAAJ;;;;;;37FDILIAAAAJ;https://scholar.google.com.tw/citations?user=-JSn-WgAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=JYN5cL4AAAAJ;;;x_CnHNYAAAAJ;https://scholar.google.com.tw/citations?user=tR5MNfkAAAAJ;;https://scholar.google.com/citations?hl=en;;TWoYOQ4AAAAJ;https://scholar.google.fr/citations?user=NCtKHnQAAAAJ;https://scholar.google.cz/citations?user=qfYftX0AAAAJ", "orcid": "0000-0003-1769-1509;;0000-0002-3054-6210;;0000-0001-8523-6546;0009-0003-1684-3600;0009-0002-5435-9564;0000-0002-0191-9719;;0000-0002-9056-0540;0000-0001-9981-2153;0009-0001-2280-5286;;0000-0002-2571-5235;;;;;0000-0002-3690-3928;;;0000-0003-0990-7488;0000-0001-9501-5640;0000-0001-9477-2199;0000-0002-3535-9406;0000-0002-9340-5511;0000-0002-1426-8572;0000-0002-9304-8091;;", "linkedin": "roman-bushuiev;anton-bushuiev/;niek-de-jonge-6b1378195/;adamo-young-b83548102/;;ramansamusevich/;janne-heirman/;;;;;;apurva-kalia;;;;;;;;;;tytus-mak-01843722/;sohahassoun/;;jjjvanderhooft/;;;;plusik/", "or_profile": "~Roman_Bushuiev1;~Anton_Bushuiev1;~Niek_F._de_Jonge1;~Adamo_Young1;~Fleming_Kretschmer1;~Raman_Samusevich1;~Janne_Heirman1;~Fei_Wang13;~Luke_Zhang2;~Kai_D\u00fchrkop1;~Marcus_Ludwig1;~Nils_A._Haupt1;~Apurva_Kalia1;~Corinna_Brungs1;~Robin_Schmid3;~Russell_Greiner2;~BO_WANG11;~David_Wishart1;~Liping_Liu1;~Juho_Rousu1;~Wout_Bittremieux1;~Hannes_Rost1;~Tytus_D._Mak1;~Soha_Hassoun1;~Florian_Huber1;~Justin_J.J._van_der_Hooft1;~Michael_A._Stravs1;~Sebastian_B\u00f6cker1;~Josef_Sivic1;~Tomas_Pluskal1", "aff": "Institute of Organic Chemistry and Biochemistry of the CAS;Czech Technical University in Prague;Wageningen University & Research;University of Toronto;Friedrich-Schiller Universit\u00e4t Jena;Honeywell spol. s r.o.;Universiteit Antwerpen;University of Alberta;;Friedrich-Schiller Universit\u00e4t Jena;Bright Giant GmbH;Friedrich-Schiller Universit\u00e4t Jena;Tufts University;Institute of Organic Chemistry and Biochemistry of the Czech Academy of Sciences;IOCB of the CAS;;Vector Institute;University of Alberta;Tufts University;Aalto University;;University of Toronto;NIST;Tufts University;Hochschule D\u00fcsseldorf;Wageningen University;Eawag;Friedrich Schiller University Jena;Czech Technical University in Prague;IOCB Prague", "aff_domain": "cas.cz;cvut.cz;wur.nl;utoronto.ca;uni-jena.de;honeywell.com;uantwerpen.be;ualberta.ca;;uni-jena.de;bright-giant.com;uni-jena.de;tufts.edu;uochb.cas.cz;uochb.cas.cz;;vectorinstitute.ai;ualberta.ca;tufts.edu;aalto.fi;;utoronto.ca;nist.gov;tufts.edu;hs-duesseldorf.de;wageningenuniversity.nl;eawag.ch;fsu-jena.de;cvut.cz;iocb.cz", "position": "PhD student;PhD student;PhD student;PhD student;PhD student;Researcher;PhD student;PhD student;;Postdoc;Principal Researcher;PhD student;PhD student;Postdoc;Postdoc;;Assistant Professor;Full Professor;Assistant Professor;Full Professor;;Associate Professor;Researcher;Full Professor;Associate Professor;Assistant Professor;Researcher;Full Professor;Principal investigator;Principal Researcher", "bibtex": "@inproceedings{\nbushuiev2024massspecgym,\ntitle={MassSpecGym: A benchmark for the discovery and identification of molecules},\nauthor={Roman Bushuiev and Anton Bushuiev and Niek F. de Jonge and Adamo Young and Fleming Kretschmer and Raman Samusevich and Janne Heirman and Fei Wang and Luke Zhang and Kai D{\\\"u}hrkop and Marcus Ludwig and Nils A. Haupt and Apurva Kalia and Corinna Brungs and Robin Schmid and Russell Greiner and BO WANG and David Wishart and Liping Liu and Juho Rousu and Wout Bittremieux and Hannes Rost and Tytus D. Mak and Soha Hassoun and Florian Huber and Justin J.J. van der Hooft and Michael A. Stravs and Sebastian B{\\\"o}cker and Josef Sivic and Tomas Pluskal},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=AAo8zAShX3}\n}", "github": "", "reviewers": "FppL;TGFc;wDbm;ga1S", "pdf_size": 3704196, "rating": "6;7;7;8", "confidence": "4;3;3;5", "wc_summary_and_contributions": "82;50;161;35", "wc_strengths": "44;51;53;139", "wc_improvement": "28;134;8;41", "wc_limitations": "26;1;35;152", "wc_correctness": "1;1;25;163", "wc_clarity": "1;1;7;32", "wc_relation_to_prior_work": "1;1;11;71", "wc_documentation": "1;1;65;14", "wc_additional_feedback": "1;1;1;1", "wc_review": "185;241;366;648", "wc_reply_reviewers": "9;25;0;18", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "wc_summary_and_contributions_avg": [ 82.0, 48.6672374395753 ], "wc_strengths_avg": [ 71.75, 38.970341286675946 ], "wc_improvement_avg": [ 52.75, 48.35997828783632 ], "wc_limitations_avg": [ 53.5, 58.21726548026796 ], "wc_correctness_avg": [ 47.5, 67.39992581598291 ], "wc_clarity_avg": [ 10.25, 12.794041581923986 ], "wc_relation_to_prior_work_avg": [ 21.0, 29.154759474226502 ], "wc_documentation_avg": [ 20.25, 26.37588861062315 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 360.0, 178.72185093043325 ], "wc_reply_reviewers_avg": [ 13.0, 9.40744386111339 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 30, 0 ], "corr_rating_confidence": 0.4264014327112209, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9356172642663171917&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "cas.cz;cvut.cz;wur.nl;utoronto.ca;uni-jena.de;honeywell.com;uantwerpen.be;ualberta.ca;;uni-jena.de;bright-giant.com;uni-jena.de;tufts.edu;uochb.cas.cz;uochb.cas.cz;;vectorinstitute.ai;ualberta.ca;tufts.edu;aalto.fi;;utoronto.ca;nist.gov;tufts.edu;hs-duesseldorf.de;wageningenuniversity.nl;eawag.ch;fsu-jena.de;cvut.cz;iocb.cz", "author_num": 30, "aff_unique_index": "0;1;2;3;4;5;6;7;4;8;4;9;0;10;11;7;9;12;3;13;9;14;15;16;17;1;18", "aff_unique_norm": "Czech Academy of Sciences;Czech Technical University;Wageningen University & Research;University of Toronto;Friedrich-Schiller-Universit\u00e4t Jena;Honeywell;University of Antwerp;University of Alberta;Bright Giant;Tufts University;Institute of Chemistry, Chinese Academy of Sciences;Vector Institute;Aalto University;National Institute of Standards and Technology;Hochschule D\u00fcsseldorf;Wageningen University;Eawag;Friedrich Schiller University;Institute of Organic Chemistry and Biochemistry", "aff_unique_dep": "Institute of Organic Chemistry and Biochemistry;;;;;;;;;;Institute of Chemistry;;;;;;;;", "aff_unique_url": "https://www.uochb.cas.cz;https://www.ctu.cz;https://www.wur.nl;https://www.utoronto.ca;https://www.uni-jena.de;https://www.honeywell.com;https://www.uantwerp.be;https://www.ualberta.ca;;https://www.tufts.edu;http://www.ict.cas.cn;https://vectorinstitute.ai/;https://www.aalto.fi;https://www.nist.gov;https://www.hs-duesseldorf.de;https://www.wageningenur.nl;https://www.eawag.ch;https://www.uni-jena.de;https://www.iocb.cz", "aff_unique_abbr": "CAS;CTU;WUR;U of T;FSU Jena;Honeywell;UA;UAlberta;Bright Giant;Tufts;IOCB;Vector Institute;Aalto;NIST;;WU;;FSU;IOCB", "aff_campus_unique_index": "1;2;2;2;2;1;1", "aff_campus_unique": ";Prague;Jena", "aff_country_unique_index": "0;0;1;2;3;0;4;2;3;3;3;5;0;6;2;2;5;7;2;5;5;3;1;8;3;0;0", "aff_country_unique": "Czech Republic;Netherlands;Canada;Germany;Belgium;United States;China;Finland;Switzerland" }, { "title": "Many-Shot In-Context Learning", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96277", "id": "AB6XpMzvqH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=AB6XpMzvqH", "openreview": "https://openreview.net/forum?id=AB6XpMzvqH", "poster": "/media/PosterPDFs/NeurIPS%202024/96277.png?t=1733151312.3941362", "project": "", "author_site": "Rishabh Agarwal, Avi Singh, Lei Zhang, Bernd Bohnet, Luis Rosias, Stephanie Chan, Biao Zhang, Ankesh Anand, Zaheer Abbas, Azade Nova, John Co-Reyes, Eric Chu, Feryal Behbahani, Aleksandra Faust, Hugo Larochelle", "tldr": "", "abstract": "Large language models (LLMs) excel at few-shot in-context learning (ICL) -- learning from a few examples provided in context at inference, without any weight updates. Newly expanded context windows allow us to investigate ICL with hundreds or thousands of examples \u2013 the many-shot regime. Going from few-shot to many-shot, we observe significant performance gains across a wide variety of generative and discriminative tasks. While promising, many-shot ICL can be bottlenecked by the available amount of human-generated outputs. To mitigate this limitation, we explore two new settings: (1) \"Reinforced ICL\" that uses model-generated chain-of-thought rationales in place of human rationales, and (2) \"Unsupervised ICL\" where we remove rationales from the prompt altogether, and prompts the model only with domain-specific inputs. We find that both Reinforced and Unsupervised ICL can be quite effective in the many-shot regime, particularly on complex reasoning tasks. We demonstrate that, unlike few-shot learning, many-shot learning is effective at overriding pretraining biases, can learn high-dimensional functions with numerical inputs, and performs comparably to supervised fine-tuning. Finally, we reveal the limitations of next-token prediction loss as an indicator of downstream ICL performance.", "keywords": "large language models;in-context learning;long-context models", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Rishabh Agarwal;Avi Singh;Lei M Zhang;Bernd Bohnet;Luis Rosias;Stephanie C.Y. Chan;Biao Zhang;Ankesh Anand;Zaheer Abbas;Azade Nova;John D Co-Reyes;Eric Chu;Feryal Behbahani;Aleksandra Faust;Hugo Larochelle", "authorids": "~Rishabh_Agarwal2;~Avi_Singh1;~Lei_M_Zhang1;~Bernd_Bohnet1;~Luis_Rosias1;~Stephanie_C.Y._Chan1;~Biao_Zhang2;~Ankesh_Anand1;~Zaheer_Abbas1;~Azade_Nova1;~John_D_Co-Reyes1;~Eric_Chu1;~Feryal_Behbahani1;~Aleksandra_Faust1;~Hugo_Larochelle1", "gender": "M;;M;M;M;F;M;M;M;M;;F;F;M;F", "homepage": "https://agarwl.github.io;https://www.avisingh.org/;https://leiz86.github.io;;;https://scychan.github.io/;;https://ankeshanand.com;;;;https://feryal.github.io;http://www.afaust.info;https://mila.quebec/en/directory/hugo-larochelle;https://sites.google.com/site/azadenazi/", "dblp": ";https://dblp.org/pers/s/Singh:Avi.html;;59/4391;;255/7866;https://dblp.uni-trier.de/pers/hd/z/Zhang_0002:Biao;;;198/1129;83/3305;;135/8420;86/3862.html;99/7868.html", "google_scholar": "https://scholar.google.ca/citations?user=aH8AJu4AAAAJ;C2_ZXdcAAAAJ;-kdBDxYAAAAJ;https://scholar.google.co.uk/citations?user=IzqMoZMAAAAJ;;https://scholar.google.com/citations?hl=en;gqPKjaIAAAAJ;;https://scholar.google.com/citations?hl=en;;;;RK72t68AAAAJ;https://scholar.google.ca/citations?user=U89FHq4AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;;;;;;;;;;0000-0002-3268-8685;;", "linkedin": ";;;;lrosias/;scychan;;;;;;;aleksandrafaust;;", "or_profile": "~Rishabh_Agarwal2;~Avi_Singh1;~Lei_M_Zhang1;~Bernd_Bohnet1;~Luis_Rosias1;~Stephanie_C.Y._Chan1;~Biao_Zhang2;~Ankesh_Anand1;~Zaheer_Abbas1;~John_D_Co-Reyes1;~Eric_Chu1;~Feryal_Behbahani1;~Aleksandra_Faust1;~Hugo_Larochelle1;~Azade_Nazi1", "aff": "Google DeepMind;Google;Google DeepMind;Google;Google;Google DeepMind;Google DeepMind;Mila, University of Montreal;Google DeepMind;;;Google DeepMind;Google Brain;Google;", "aff_domain": "google.com;google.com;deepmind.com;google.com;google.com;deepmind.com;google.com;umontreal.ca;deepmind.com;;;google.com;google.com;google.com;", "position": "Research Scientist;Researcher;Research Scientist;Researcher;Researcher;Research Scientist;Researcher;PhD student;Researcher;;;Research Scientist;Principal Researcher;Research Scientist;", "bibtex": "@inproceedings{\nagarwal2024manyshot,\ntitle={Many-Shot In-Context Learning},\nauthor={Rishabh Agarwal and Avi Singh and Lei M Zhang and Bernd Bohnet and Luis Rosias and Stephanie C.Y. Chan and Biao Zhang and Ankesh Anand and Zaheer Abbas and Azade Nova and John D Co-Reyes and Eric Chu and Feryal Behbahani and Aleksandra Faust and Hugo Larochelle},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=AB6XpMzvqH}\n}", "github": "", "reviewers": "2oDD;mj1R;XEKv;kkH9", "pdf_size": 918021, "rating": "7;7;7;9", "confidence": "3;4;3;4", "soundness": "3;3;3;4", "novelty": "3;3;3;4", "presentation": "3;4;3;4", "wc_summary": "151;153;109;93", "wc_strengths": "80;167;29;158", "wc_weaknesses": "136;289;130;177", "wc_questions": "1075;6;100;156", "wc_limitations": "33;14;17;32", "wc_review": "1475;629;385;616", "wc_reply_reviewers": "0;78;0;0", "wc_reply_authors": "802;0;0;0", "reply_reviewers": "0;1;0;0", "reply_authors": "2;1;1;1", "rating_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 126.5, 26.12948526090784 ], "wc_strengths_avg": [ 108.5, 57.019733426244635 ], "wc_weaknesses_avg": [ 183.0, 63.81614215854794 ], "wc_questions_avg": [ 334.25, 431.0176185494045 ], "wc_limitations_avg": [ 24.0, 8.573214099741124 ], "wc_review_avg": [ 776.25, 414.9369681047954 ], "wc_reply_reviewers_avg": [ 19.5, 33.77499074759311 ], "wc_reply_authors_avg": [ 200.5, 347.27618691755987 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 15, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 115, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18401894884898660219&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 7, "email": "google.com;google.com;deepmind.com;google.com;google.com;deepmind.com;google.com;umontreal.ca;deepmind.com;;;google.com;google.com;google.com;", "author_num": 15, "aff_unique_index": "0;0;0;0;0;0;0;1;0;0;0;0", "aff_unique_norm": "Google;University of Montreal", "aff_unique_dep": "Google DeepMind;Mila", "aff_unique_url": "https://deepmind.com;https://www.mila.quebec", "aff_unique_abbr": "DeepMind;Mila", "aff_campus_unique_index": "1;1;1;2;1;1", "aff_campus_unique": ";Mountain View;Montreal", "aff_country_unique_index": "0;1;0;1;1;0;0;2;0;0;1;1", "aff_country_unique": "United Kingdom;United States;Canada" }, { "title": "Learning Transferable Features for Implicit Neural Representations", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96276", "id": "ABYdKpDb8p", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ABYdKpDb8p", "openreview": "https://openreview.net/forum?id=ABYdKpDb8p", "poster": "/media/PosterPDFs/NeurIPS%202024/96276.png?t=1733340818.7990873", "project": "", "author_site": "Kushal Kardam Vyas, Imtiaz Humayun, Aniket Dashpute, Richard Baraniuk, Ashok Veeraraghavan, Guha Balakrishnan", "tldr": "", "abstract": "Implicit neural representations (INRs) have demonstrated success in a variety of applications, including inverse problems and neural rendering. An INR is typically trained to capture one signal of interest, resulting in learned neural features that are highly attuned to that signal. Assumed to be less generalizable, we explore the aspect of transferability of such learned neural features for fitting similar signals. We introduce a new INR training framework, STRAINER that learns transferable features for fitting INRs to new signals from a given distribution, faster and with better reconstruction quality. Owing to the sequential layer-wise affine operations in an INR, we propose to learn transferable representations by sharing initial encoder layers across multiple INRs with independent decoder layers. At test time, the learned encoder representations are transferred as initialization for an otherwise randomly initialized INR. We find STRAINER to yield extremely powerful initialization for fitting images from the same domain and allow for a \u2248 +10dB gain in signal quality early on compared to an untrained INR itself. STRAINER also provides a simple way to encode data-driven priors in INRs. We evaluate STRAINER on multiple in-domain and out-of-domain signal fitting tasks and inverse problems and further provide detailed analysis and discussion on the transferability of STRAINER\u2019s features.", "keywords": "implicit neural representations;inverse problems;transfer learning", "primary_area": "machine_vision", "supplementary_material": "/attachment/8e97e5c19bd5d649819053a18837407ec8838777.zip", "author": "Kushal Vyas;Ahmed Imtiaz Humayun;Aniket Dashpute;Richard Baraniuk;Ashok Veeraraghavan;Guha Balakrishnan", "authorids": "~Kushal_Vyas1;~Ahmed_Imtiaz_Humayun1;~Aniket_Dashpute1;~Richard_Baraniuk1;~Ashok_Veeraraghavan1;~Guha_Balakrishnan1", "gender": "M;M;;;M;M", "homepage": "https://kushalvyas.github.io/;https://imtiazhumayun.github.io;http://richb.rice.edu/;https://computationalimaging.rice.edu/;http://www.guhabalakrishnan.com;https://aniketdashpute.github.io/", "dblp": "387/3555;222/1771;32/2804;84/858;72/8177;231/6430.html", "google_scholar": "0SxLnLcAAAAJ;wJ2HUn4AAAAJ;https://scholar.google.com.tw/citations?user=N-BBA20AAAAJ;tI-oUmsAAAAJ;8rZyuc8AAAAJ;6jHlWz0AAAAJ", "orcid": ";;;;;0000-0001-8201-1405", "linkedin": ";;richard-baraniuk;;;aniketdashpute/", "or_profile": "~Kushal_Vyas1;~Ahmed_Imtiaz_Humayun1;~Richard_Baraniuk1;~Ashok_Veeraraghavan1;~Guha_Balakrishnan1;~Aniket_Sanjay_Dashpute1", "aff": "Rice University;Google;William Marsh Rice University;William Marsh Rice University;Rice University;Rice University", "aff_domain": "rice.edu;google.com;rice.edu;rice.edu;rice.edu;rice.edu", "position": "PhD student;Student Researcher;C. Sidney Burrus Professor;Full Professor;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nvyas2024learning,\ntitle={Learning Transferable Features for Implicit Neural Representations},\nauthor={Kushal Vyas and Ahmed Imtiaz Humayun and Aniket Dashpute and Richard Baraniuk and Ashok Veeraraghavan and Guha Balakrishnan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ABYdKpDb8p}\n}", "github": "", "reviewers": "FYQ8;HDgr;GtRf", "pdf_size": 47617168, "rating": "5;5;6", "confidence": "4;4;4", "soundness": "1;3;3", "novelty": "1;3;2", "presentation": "3;3;2", "wc_summary": "49;170;114", "wc_strengths": "59;75;154", "wc_weaknesses": "290;92;104", "wc_questions": "59;54;90", "wc_limitations": "19;6;29", "wc_review": "476;397;491", "wc_reply_reviewers": "71;81;10", "wc_reply_authors": "41;374;39", "reply_reviewers": "1;2;1", "reply_authors": "2;3;2", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.3333333333333335, 0.9428090415820634 ], "novelty_avg": [ 2.0, 0.816496580927726 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 111.0, 49.44357052910587 ], "wc_strengths_avg": [ 96.0, 41.52910625894406 ], "wc_weaknesses_avg": [ 162.0, 90.64215354899729 ], "wc_questions_avg": [ 67.66666666666667, 15.92342788332825 ], "wc_limitations_avg": [ 18.0, 9.41629792788369 ], "wc_review_avg": [ 454.6666666666667, 41.23375100839387 ], "wc_reply_reviewers_avg": [ 54.0, 31.379398762032817 ], "wc_reply_authors_avg": [ 151.33333333333334, 157.45122701614264 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3772864226096300031&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "rice.edu;google.com;rice.edu;rice.edu;rice.edu;rice.edu", "author_num": 6, "aff_unique_index": "0;1;0;0;0;0", "aff_unique_norm": "Rice University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.rice.edu;https://www.google.com", "aff_unique_abbr": "Rice;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Vector Quantization Prompting for Continual Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96275", "id": "ACCqGLviig", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ACCqGLviig", "openreview": "https://openreview.net/forum?id=ACCqGLviig", "poster": "/media/PosterPDFs/NeurIPS%202024/96275.png?t=1730032668.827729", "project": "", "author_site": "Li Jiao, Qiuxia LAI, YU LI, Qiang Xu", "tldr": "", "abstract": "Continual learning requires to overcome catastrophic forgetting when training a single model on a sequence of tasks. Recent top-performing approaches are prompt-based methods that utilize a set of learnable parameters (i.e., prompts) to encode task knowledge, from which appropriate ones are selected to guide the fixed pre-trained model in generating features tailored to a certain task. However, existing methods rely on predicting prompt identities for prompt selection, where the identity prediction process cannot be optimized with task loss. This limitation leads to sub-optimal prompt selection and inadequate adaptation of pre-trained features for a specific task. Previous efforts have tried to address this by directly generating prompts from input queries instead of selecting from a set of candidates. However, these prompts are continuous, which lack sufficient abstraction for task knowledge representation, making them less effective for continual learning. To address these challenges, we propose VQ-Prompt, a prompt-based continual learning method that incorporates Vector Quantization (VQ) into end-to-end training of a set of discrete prompts. In this way, VQ-Prompt can optimize the prompt selection process with task loss and meanwhile achieve effective abstraction of task knowledge for continual learning. Extensive experiments show that VQ-Prompt outperforms state-of-the-art continual learning methods across a variety of benchmarks under the challenging class-incremental setting.", "keywords": "continual learning;incremental learning;life-long learning;image classification;deep learning", "primary_area": "machine_vision", "supplementary_material": "/attachment/d22be31c9d4c0a49984233b7e03e787dc981d593.zip", "author": "Li Jiao;Qiuxia Lai;YU LI;Qiang Xu", "authorids": "~Li_Jiao2;~Qiuxia_Lai3;~YU_LI10;~Qiang_Xu1", "gender": "F;Not Specified;M;F", "homepage": "https://www.researchgate.net/profile/Li-Jiao-28?ev=hdr_xprf&_tp=eyJjb250ZXh0Ijp7ImZpcnN0UGFnZSI6ImxvZ2luIiwicGFnZSI6ImhvbWUiLCJwb3NpdGlvbiI6Imdsb2JhbEhlYWRlciJ9fQ;http://liyu.one;https://github.com/cure-lab;https://ashleylqx.github.io/", "dblp": ";34/2997-7;43/1230-1;210/4586.html", "google_scholar": ";M0zhrM8AAAAJ;https://scholar.google.com.tw/citations?user=eSiKPqUAAAAJ;LwIItp4AAAAJ", "orcid": ";;;0000-0001-6872-5540", "linkedin": ";;;%E7%A7%8B%E9%9C%9E-%E8%B5%96-11813b169/", "or_profile": "~Li_Jiao2;~YU_LI10;~Qiang_Xu1;~Qiuxia_LAI1", "aff": "Communication University of China;Harbin Institute of Technology (Shen Zhen);The Chinese University of Hong Kong;Communication University of China", "aff_domain": "cuc.edu.cn;hit.edu.cn;cuhk.edu.hk;cuc.edu.cn", "position": "PhD student;Assistant Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\njiao2024vector,\ntitle={Vector Quantization Prompting for Continual Learning},\nauthor={Li Jiao and Qiuxia Lai and YU LI and Qiang Xu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ACCqGLviig}\n}", "github": "", "reviewers": "EHaW;7MCa;xyoq;GsNc", "pdf_size": 1098948, "rating": "5;5;5;7", "confidence": "5;4;4;5", "soundness": "3;3;3;4", "novelty": "3;3;2;4", "presentation": "3;3;3;4", "wc_summary": "61;178;147;200", "wc_strengths": "34;61;57;85", "wc_weaknesses": "78;226;201;136", "wc_questions": "18;12;1;189", "wc_limitations": "1;56;8;40", "wc_review": "192;533;414;650", "wc_reply_reviewers": "41;40;0;19", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 146.5, 52.83228179815821 ], "wc_strengths_avg": [ 59.25, 18.08832496390973 ], "wc_weaknesses_avg": [ 160.25, 57.74242374545772 ], "wc_questions_avg": [ 55.0, 77.60476789476276 ], "wc_limitations_avg": [ 26.25, 22.609455986378798 ], "wc_review_avg": [ 447.25, 169.3507824014994 ], "wc_reply_reviewers_avg": [ 25.0, 16.896745248715803 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11752726288101523605&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 3, "email": "cuc.edu.cn;hit.edu.cn;cuhk.edu.hk;cuc.edu.cn", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Communication University of China;Harbin Institute of Technology;Chinese University of Hong Kong", "aff_unique_dep": ";;", "aff_unique_url": "http://www.cuc.edu.cn/;http://www.hit.edu.cn/;https://www.cuhk.edu.hk", "aff_unique_abbr": "CUC;HIT;CUHK", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Shenzhen;Hong Kong SAR", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Feint Behaviors and Strategies: Formalization, Implementation and Evaluation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96274", "id": "ACIDDnTbSJ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ACIDDnTbSJ", "openreview": "https://openreview.net/forum?id=ACIDDnTbSJ", "poster": "/media/PosterPDFs/NeurIPS%202024/96274.png?t=1731294208.48951", "project": "", "author_site": "Junyu Liu, Xiangjun Peng", "tldr": "", "abstract": "Feint behaviors refer to a set of deceptive behaviors in a nuanced manner, which enable players to obtain temporal and spatial advantages over opponents in competitive games. Such behaviors are crucial tactics in most competitive multi-player games (e.g., boxing, fencing, basketball, motor racing, etc.). However, existing literature does not provide a comprehensive (and/or concrete) formalization for Feint behaviors, and their implications on game strategies. In this work, we introduce the first comprehensive formalization of Feint behaviors at both action-level and strategy-level, and provide concrete implementation and quantitative evaluation of them in multi-player games. The key idea of our work is to (1) allow automatic generation of Feint behaviors via Palindrome-directed templates, combine them into meaningful behavior sequences via a Dual-Behavior Model; (2) concertize the implications from our formalization of Feint on game strategies, in terms of temporal, spatial, and their collective impacts respectively; and (3) provide a unified implementation scheme of Feint behaviors in existing MARL frameworks. The experimental results show that our design of Feint behaviors can (1) greatly improve the game reward gains; (2) significantly improve the diversity of Multi-Player Games; and (3) only incur negligible overheads in terms of time consumption.", "keywords": "Feint Behaviors;Multi-Player Games;Multi-Agent Reinforcement Learning", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Junyu Liu;Xiangjun Peng", "authorids": "~Junyu_Liu5;~Xiangjun_Peng1", "gender": "M;M", "homepage": "https://junyu-liu-nate.github.io/;https://shiangjun.com", "dblp": ";", "google_scholar": "EiSrD1UAAAAJ;", "orcid": "0009-0006-9309-5871;", "linkedin": ";", "or_profile": "~Junyu_Liu5;~Xiangjun_Peng1", "aff": "Brown University;The Chinese University of Hong Kong", "aff_domain": "brown.edu;cuhk.edu", "position": "MS student;PhD student", "bibtex": "@inproceedings{\nliu2024feint,\ntitle={Feint Behaviors and Strategies: Formalization, Implementation and Evaluation},\nauthor={Junyu Liu and Xiangjun Peng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ACIDDnTbSJ}\n}", "github": "", "reviewers": "R6PG;8F3g;CkiB;Wu2K", "pdf_size": 17161193, "rating": "5;6;6;6", "confidence": "3;4;3;3", "soundness": "2;2;3;3", "novelty": "3;3;2;3", "presentation": "2;3;3;3", "wc_summary": "71;126;82;178", "wc_strengths": "81;58;95;214", "wc_weaknesses": "138;204;164;214", "wc_questions": "552;103;70;214", "wc_limitations": "87;5;32;1", "wc_review": "929;496;443;821", "wc_reply_reviewers": "266;152;27;0", "wc_reply_authors": "438;732;0;271", "reply_reviewers": "1;3;1;0", "reply_authors": "3;3;1;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 114.25, 42.16856056352884 ], "wc_strengths_avg": [ 112.0, 60.35312750802563 ], "wc_weaknesses_avg": [ 180.0, 30.62678566222711 ], "wc_questions_avg": [ 234.75, 190.77391724237356 ], "wc_limitations_avg": [ 31.25, 34.3247359785913 ], "wc_review_avg": [ 672.25, 207.16343185996897 ], "wc_reply_reviewers_avg": [ 111.25, 106.16349419645154 ], "wc_reply_authors_avg": [ 360.25, 265.5130646503106 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.0, 1.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:J-b6L149_9YJ:scholar.google.com/&scioq=Feint+Behaviors+and+Strategies:+Formalization,+Implementation+and+Evaluation&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "brown.edu;cuhk.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Brown University;Chinese University of Hong Kong", "aff_unique_dep": ";", "aff_unique_url": "https://www.brown.edu;https://www.cuhk.edu.hk", "aff_unique_abbr": "Brown;CUHK", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;China" }, { "title": "2DQuant: Low-bit Post-Training Quantization for Image Super-Resolution", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96273", "id": "ADJASE9uQ2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ADJASE9uQ2", "openreview": "https://openreview.net/forum?id=ADJASE9uQ2", "poster": "/media/PosterPDFs/NeurIPS%202024/96273.png?t=1731395126.3134634", "project": "", "author_site": "Kai Liu, Haotong Qin, Yong Guo, Xin Yuan, Linghe Kong, Guihai Chen, Yulun Zhang", "tldr": "", "abstract": "Low-bit quantization has become widespread for compressing image super-resolution (SR) models for edge deployment, which allows advanced SR models to enjoy compact low-bit parameters and efficient integer/bitwise constructions for storage compression and inference acceleration, respectively. However, it is notorious that low-bit quantization degrades the accuracy of SR models compared to their full-precision (FP) counterparts. Despite several efforts to alleviate the degradation, the transformer-based SR model still suffers severe degradation due to its distinctive activation distribution. In this work, we present a dual-stage low-bit post-training quantization (PTQ) method for image super-resolution, namely 2DQuant, which achieves efficient and accurate SR under low-bit quantization. The proposed method first investigates the weight and activation and finds that the distribution is characterized by coexisting symmetry and asymmetry, long tails. Specifically, we propose Distribution-Oriented Bound Initialization (DOBI), using different searching strategies to search a coarse bound for quantizers. To obtain refined quantizer parameters, we further propose Distillation Quantization Calibration (DQC), which employs a distillation approach to make the quantized model learn from its FP counterpart. Through extensive experiments on different bits and scaling factors, the performance of DOBI can reach the state-of-the-art (SOTA) while after stage two, our method surpasses existing PTQ in both metrics and visual effects. 2DQuant gains an increase in PSNR as high as 4.52dB on Set5 (x2) compared with SOTA when quantized to 2-bit and enjoys a 3.60x compression ratio and 5.08x speedup ratio. The code and models are available at https://github.com/Kai-Liu001/2DQuant.", "keywords": "Quantization;Image super resolution;Low bit;Post-training quantization.", "primary_area": "optimization_for_deep_networks", "supplementary_material": "/attachment/fde665f755f52c35d3b74735ec1c41886ada480f.zip", "author": "Kai Liu;Haotong Qin;Yong Guo;Xin Yuan;Linghe Kong;Guihai Chen;Yulun Zhang", "authorids": "~Kai_Liu21;~Haotong_Qin1;~Yong_Guo1;~Xin_Yuan4;~Linghe_Kong1;~Guihai_Chen3;~Yulun_Zhang1", "gender": "M;M;M;M;M;M;M", "homepage": "https://kai-liu001.github.io/;https://htqin.github.io/;http://www.guoyongcs.com/;https://en.westlake.edu.cn/faculty/xin-yuan.html;https://www.cs.sjtu.edu.cn/~linghe.kong/;https://cs.nju.edu.cn/gchen/index.htm;http://yulunzhang.com/", "dblp": ";262/3626.html;;78/713-2;23/7909;51/1742.html;166/2763-1.html", "google_scholar": ";mK6n-KgAAAAJ;https://scholar.google.com/citations?hl=en;cS9CbWkAAAAJ;https://scholar.google.com.tw/citations?user=-wm2X-8AAAAJ;;ORmLjWoAAAAJ", "orcid": "0000-0003-4298-0580;;0000-0002-3444-4588;0000-0002-8311-7524;0000-0001-9266-3044;;0000-0002-2288-5079", "linkedin": ";;;xin-yuan-0024bb31/;;;yulun-zhang-1116b5b9/", "or_profile": "~Kai_Liu21;~Haotong_Qin1;~Yong_Guo1;~Xin_Yuan4;~Linghe_Kong1;~Guihai_Chen3;~Yulun_Zhang1", "aff": "Shanghai Jiaotong University;ETHZ - ETH Zurich;Saarland Informatics Campus, Max-Planck Institute;Westlake University;Shanghai Jiaotong University;Shanghai Jiaotong University;Swiss Federal Institute of Technology", "aff_domain": "sjtu.edu.cn;ethz.ch;mpi-inf.mpg.de;westlake.edu.cn;sjtu.edu.cn;sjtu.edu.cn;ethz.ch", "position": "Undergrad student;Postdoc;Postdoc;Associate Professor;Full Professor;Full Professor;Postdoc", "bibtex": "@inproceedings{\nliu2024dquant,\ntitle={2{DQ}uant: Low-bit Post-Training Quantization for Image Super-Resolution},\nauthor={Kai Liu and Haotong Qin and Yong Guo and Xin Yuan and Linghe Kong and Guihai Chen and Yulun Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ADJASE9uQ2}\n}", "github": "", "reviewers": "DoGX;EXmU;6qcS;Xp5V", "pdf_size": 2273774, "rating": "3;6;7;8", "confidence": "5;5;5;4", "soundness": "2;3;3;3", "novelty": "1;3;4;4", "presentation": "2;3;3;3", "wc_summary": "59;68;94;78", "wc_strengths": "35;38;193;177", "wc_weaknesses": "250;221;138;125", "wc_questions": "2;6;110;9", "wc_limitations": "1;9;17;9", "wc_review": "347;342;552;398", "wc_reply_reviewers": "246;159;52;21", "wc_reply_authors": "1199;445;0;0", "reply_reviewers": "2;2;1;1", "reply_authors": "4;3;1;1", "rating_avg": [ 6.0, 1.8708286933869707 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 1.224744871391589 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 74.75, 12.987975207860538 ], "wc_strengths_avg": [ 110.75, 74.47272990833625 ], "wc_weaknesses_avg": [ 183.5, 53.20009398487939 ], "wc_questions_avg": [ 31.75, 45.245856163852174 ], "wc_limitations_avg": [ 9.0, 5.656854249492381 ], "wc_review_avg": [ 409.75, 85.0011029340208 ], "wc_reply_reviewers_avg": [ 119.5, 89.19220817986289 ], "wc_reply_authors_avg": [ 411.0, 489.883149332573 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.25, 1.299038105676658 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.6172133998483676, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15526455091582829636&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "sjtu.edu.cn;ethz.ch;mpi-inf.mpg.de;westlake.edu.cn;sjtu.edu.cn;sjtu.edu.cn;ethz.ch", "author_num": 7, "aff_unique_index": "0;1;2;3;0;0;4", "aff_unique_norm": "Shanghai Jiao Tong University;ETH Zurich;Max-Planck Institute;Westlake University;Swiss Federal Institute of Technology", "aff_unique_dep": ";;Informatics;;", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.ethz.ch;https://www.mpi-sws.org;https://www.westlake.edu.cn;https://www.ethz.ch", "aff_unique_abbr": "SJTU;ETHZ;MPI-SWS;WU;ETH Zurich", "aff_campus_unique_index": "1", "aff_campus_unique": ";Saarland", "aff_country_unique_index": "0;1;2;0;0;0;1", "aff_country_unique": "China;Switzerland;Germany" }, { "title": "SciCode: A Research Coding Benchmark Curated by Scientists", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97822", "id": "ADLaALtdoG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ADLaALtdoG", "openreview": "https://openreview.net/forum?id=ADLaALtdoG", "poster": "", "project": "", "author_site": "Minyang Tian, Luyu Gao, Shizhuo Zhang, Xinan Chen, Cunwei Fan, Xuefei Guo, Roland Haas, Pan Ji, Kittithat Krongchon, Yao Li, Shengyan Liu, Di Luo, Yutao Ma, HAO TONG, Kha Trinh, Chenyu Tian, Zihan Wang, Bohao Wu, Shengzhu Yin, Minhui Zhu, Kilian Lieret, Yanxin Lu, Genglin Liu, Yufeng Du, Tianhua Tao, Ofir Press, Jamie Callan, Eliu Huerta, Hao Peng", "tldr": "", "abstract": "Since language models (LMs) now outperform average humans on many challenging tasks, it is becoming increasingly difficult to develop challenging, high-quality, and realistic evaluations. We address this by examining LM capabilities to generate code for solving real scientific research problems. Incorporating input from scientists and AI researchers in 16 diverse natural science sub-fields, including mathematics, physics, chemistry, biology, and materials science, we create a scientist-curated coding benchmark, SciCode. The problems naturally factorize into multiple subproblems, each involving knowledge recall, reasoning, and code synthesis. In total, SciCode contains 338 subproblems decomposed from 80 challenging main problems, and it offers optional descriptions specifying useful scientific background information and scientist-annotated gold-standard solutions and test cases for evaluation. OpenAI o1-preview, the best-performing model among those tested, can solve only 7.7\\% of the problems in the most realistic setting. We believe that SciCode demonstrates both contemporary LMs' progress towards realizing helpful scientific assistants and sheds light on the building and evaluation of scientific AI in the future.", "keywords": "Large Language Models;code generation;benchmark;AI4Science", "primary_area": "", "supplementary_material": "", "author": "Minyang Tian;Luyu Gao;Dylan Zhang;Xinan Chen;Cunwei Fan;Xuefei Guo;Roland Haas;Pan Ji;Kittithat Krongchon;Yao Li;Shengyan Liu;Di Luo;Yutao Ma;HAO TONG;Kha Trinh;Chenyu Tian;Zihan Wang;Bohao Wu;Shengzhu Yin;Minhui Zhu;Kilian Lieret;Yanxin Lu;Genglin Liu;Yufeng Du;Tianhua Tao;Ofir Press;Jamie Callan;Eliu A Huerta;Hao Peng", "authorids": "~Minyang_Tian1;~Luyu_Gao1;~Dylan_Zhang2;~Xinan_Chen2;~Cunwei_Fan1;~Xuefei_Guo1;~Roland_Haas1;~Pan_Ji4;~Kittithat_Krongchon1;~Yao_Li15;~Shengyan_Liu1;~Di_Luo1;~Yutao_Ma1;~HAO_TONG3;~Kha_Trinh1;~Chenyu_Tian1;~Zihan_Wang2;~Bohao_Wu1;~Shengzhu_Yin1;~Minhui_Zhu1;~Kilian_Lieret1;~Yanxin_Lu1;~Genglin_Liu1;~Yufeng_Du2;~Tianhua_Tao1;~Ofir_Press1;~Jamie_Callan1;~Eliu_A_Huerta1;~Hao_Peng4", "gender": ";M;;;M;;;M;M;M;M;M;M;M;M;M;M;M;M;F;M;F;M;;M;M;;M;", "homepage": ";https://luyug.github.io/;;;https://fancunwei95.github.io/about/;;;https://physics.unc.edu/people-pages/graduate-students/;;;;;;;;;;;;;https://lieret.net;;https://genglinliu.github.io/;https://github.com/Bznkxs;http://www.taotianhua.com/;https://ofir.io/about;http://www.cs.cmu.edu/~callan/;https://www.anl.gov/profile/eliu-a-huerta;", "dblp": ";;;;;;;;;;;;;;;;;183/2903;;;276/7672;97/10577;347/9436;;296/1990.html;185/0577;c/JamesPCallan;;", "google_scholar": "Bzl5qN8AAAAJ;https://scholar.google.com/citations?hl=zh-CN;;rBfwuygAAAAJ;;JPzMgNsAAAAJ;;;6Ei4yuUAAAAJ;;Za8-L-sAAAAJ;OxZytTQAAAAJ;;y3mahjoAAAAJ;;;;https://scholar.google.com/citations?hl=en;;QPcB69UAAAAJ;byA9yI0AAAAJ;;xTX3r0IAAAAJ;;;LeHa8psAAAAJ;https://scholar.google.com/citations?hl=en;CZQuCS0AAAAJ;", "orcid": ";;;;;0000-0003-1088-6039;0000-0003-1424-6178;;0000-0002-5794-1608;0000-0002-9728-5035;;;;;0009-0004-8184-6338;;0000-0002-8964-9046;0000-0003-0128-8459;;0000-0001-9927-5212;0000-0003-2792-7511;;;;;;;0000-0002-9682-3604;", "linkedin": ";;;;;xfguo-physics/;;;kittithat-krongchon/;;;;yutao-ma-7b41627b/;;;chenyu-tian-63a0a1107/;zihan-wang-594061195/;bohao-wu-a8595821a/;shengzhu-yin-7a30a612b;;kilian-lieret-ph-d-0b0667104/;;genglin-liu-085101190/;;;;;eliu-huerta-72a84165/;", "or_profile": "~Minyang_Tian1;~Luyu_Gao1;~Dylan_Zhang2;~Xinan_Chen2;~Cunwei_Fan1;~Xuefei_Guo1;~Roland_Haas1;~Pan_Ji4;~Kittithat_Krongchon1;~Yao_Li15;~Shengyan_Liu1;~Di_Luo1;~Yutao_Ma1;~HAO_TONG3;~Kha_Trinh1;~Chenyu_Tian1;~Zihan_Wang2;~Bohao_Wu1;~Shengzhu_Yin1;~Minhui_Zhu1;~Kilian_Lieret1;~Yanxin_Lu1;~Genglin_Liu1;~Yufeng_Du2;~Tianhua_Tao1;~Ofir_Press1;~Jamie_Callan1;~Eliu_A_Huerta1;~Hao_Peng4", "aff": "University of Illinois, Urbana Champaign;Carnegie Mellon University;;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;University of North Carolina at Chapel Hill;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;Massachusetts Institute of Technology;University of Chicago;University of Illinois, Urbana Champaign;University of Chicago;University of Texas at Austin;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;Princeton University;Zhejiang University;UCLA Computer Science Department, University of California, Los Angeles;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;Princeton University;Carnegie Mellon University;Argonne National Laboratory;", "aff_domain": "illinois.edu;cmu.edu;;illinois.edu;illinois.edu;uiuc.edu;illinois.edu;unc.edu;illinois.edu;illinois.edu;illinois.edu;mit.edu;uchicago.edu;illinois.edu;uchicago.edu;utexas.edu;illinois.edu;illinois.edu;illinois.edu;illinois.edu;princeton.edu;intl.zju.edu.cn;cs.ucla.edu;cs.illinois.edu;illinois.edu;princeton.edu;cmu.edu;anl.gov;", "position": "PhD student;PhD student;;PhD student;PhD student;PhD student;Researcher;PhD student;PhD student;Postdoc;PhD student;Postdoc;PhD student;PhD student;PhD student;PhD student;PhD student;MS student;PhD student;PhD student;Postdoc;Undergrad student;PhD student;PhD student;MS student;Postdoc;Full Professor;Principal Researcher;", "bibtex": "@inproceedings{\ntian2024scicode,\ntitle={SciCode: A Research Coding Benchmark Curated by Scientists},\nauthor={Minyang Tian and Luyu Gao and Dylan Zhang and Xinan Chen and Cunwei Fan and Xuefei Guo and Roland Haas and Pan Ji and Kittithat Krongchon and Yao Li and Shengyan Liu and Di Luo and Yutao Ma and HAO TONG and Kha Trinh and Chenyu Tian and Zihan Wang and Bohao Wu and Shengzhu Yin and Minhui Zhu and Kilian Lieret and Yanxin Lu and Genglin Liu and Yufeng Du and Tianhua Tao and Ofir Press and Jamie Callan and Eliu A Huerta and Hao Peng},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=ADLaALtdoG}\n}", "github": "", "reviewers": "fuak;SHks;qWyP", "pdf_size": 1710913, "rating": "5;8;8", "confidence": "4;4;4", "wc_summary_and_contributions": "60;71;92", "wc_strengths": "2;226;3", "wc_improvement": "5;169;3", "wc_limitations": "27;17;3", "wc_correctness": "1;169;3", "wc_clarity": "1;12;3", "wc_relation_to_prior_work": "1;52;3", "wc_documentation": "8;56;27", "wc_additional_feedback": "1;1;1", "wc_review": "106;773;138", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "3;2;3", "rating_avg": [ 7.0, 1.4142135623730951 ], "confidence_avg": [ 4.0, 0.0 ], "wc_summary_and_contributions_avg": [ 74.33333333333333, 13.27487183449325 ], "wc_strengths_avg": [ 77.0, 105.35970134100924 ], "wc_improvement_avg": [ 59.0, 77.78603130811256 ], "wc_limitations_avg": [ 15.666666666666666, 9.843215373488933 ], "wc_correctness_avg": [ 57.666666666666664, 78.72878903058412 ], "wc_clarity_avg": [ 5.333333333333333, 4.784233364802441 ], "wc_relation_to_prior_work_avg": [ 18.666666666666668, 23.584363935078304 ], "wc_documentation_avg": [ 30.333333333333332, 19.737161790783283 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 339.0, 307.16228067044085 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 29, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6718856148962756729&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 6, "email": "illinois.edu;cmu.edu;;illinois.edu;illinois.edu;uiuc.edu;illinois.edu;unc.edu;illinois.edu;illinois.edu;illinois.edu;mit.edu;uchicago.edu;illinois.edu;uchicago.edu;utexas.edu;illinois.edu;illinois.edu;illinois.edu;illinois.edu;princeton.edu;intl.zju.edu.cn;cs.ucla.edu;cs.illinois.edu;illinois.edu;princeton.edu;cmu.edu;anl.gov;", "author_num": 29, "aff_unique_index": "0;1;0;0;0;0;2;0;0;0;3;4;0;4;5;0;0;0;0;6;7;8;0;0;6;1;9", "aff_unique_norm": "University of Illinois Urbana-Champaign;Carnegie Mellon University;University of North Carolina;Massachusetts Institute of Technology;University of Chicago;University of Texas at Austin;Princeton University;Zhejiang University;University of California, Los Angeles;Argonne National Laboratory", "aff_unique_dep": ";;;;;;;;Computer Science Department;", "aff_unique_url": "https://illinois.edu;https://www.cmu.edu;https://www.unc.edu;https://web.mit.edu;https://www.uchicago.edu;https://www.utexas.edu;https://www.princeton.edu;https://www.zju.edu.cn;https://www.ucla.edu;https://www.anl.gov", "aff_unique_abbr": "UIUC;CMU;UNC;MIT;UChicago;UT Austin;Princeton;ZJU;UCLA;ANL", "aff_campus_unique_index": "0;0;0;0;0;2;0;0;0;0;3;0;0;0;0;4;0;0", "aff_campus_unique": "Urbana-Champaign;;Chapel Hill;Austin;Los Angeles", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1;0;0;0;0;0;0", "aff_country_unique": "United States;China" }, { "title": "Beyond Accuracy: Ensuring Correct Predictions With Correct Rationales", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96272", "id": "ADV0Pzi3Ol", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ADV0Pzi3Ol", "openreview": "https://openreview.net/forum?id=ADV0Pzi3Ol", "poster": "/media/PosterPDFs/NeurIPS%202024/96272.png?t=1733529681.8252516", "project": "", "author_site": "Tang Li, Mengmeng Ma, Xi Peng", "tldr": "", "abstract": "Large pretrained foundation models demonstrate exceptional performance and, in some high-stakes applications, even surpass human experts. However, most of these models are currently evaluated primarily on prediction accuracy, overlooking the validity of the rationales behind their accurate predictions. For the safe deployment of foundation models, there is a pressing need to ensure *double-correct predictions*, *i.e.*, correct prediction backed by correct rationales. To achieve this, we propose a two-phase scheme: First, we curate a new dataset that offers structured rationales for visual recognition tasks. Second, we propose a rationale-informed optimization method to guide the model in disentangling and localizing visual evidence for each rationale, without requiring manual annotations. Extensive experiments and ablation studies demonstrate that our model outperforms state-of-the-art models by up to 10.1\\% in prediction accuracy across a wide range of tasks. Furthermore, our method significantly improves the model's rationale correctness, improving localization by 7.5\\% and disentanglement by 36.5\\%. Our dataset, source code, and pretrained weights: https://github.com/deep-real/DCP", "keywords": "Explainable Machine Learning;Vision-Language Models", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Tang Li;Mengmeng Ma;Xi Peng", "authorids": "~Tang_Li1;~Mengmeng_Ma1;~Xi_Peng1", "gender": "M;M;Not Specified", "homepage": "https://tangli0305.github.io/;https://mengmenm.top/;https://deep-real.github.io/dr_xipeng.html", "dblp": "01/1190-5;150/6565-2;149/7762-5", "google_scholar": "mQFL3DYAAAAJ;ycXTxwoAAAAJ;DWw4v0kAAAAJ", "orcid": "0000-0002-3134-4151;0000-0002-2804-2718;0000-0002-7772-001X", "linkedin": "tang-li-613132180/;;xi-peng-74b540b6/", "or_profile": "~Tang_Li1;~Mengmeng_Ma1;~Xi_Peng1", "aff": "University of Delaware;University of Delaware;University of Delaware", "aff_domain": "udel.edu;udel.edu;udel.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nli2024beyond,\ntitle={Beyond Accuracy: Ensuring Correct Predictions With Correct Rationales},\nauthor={Tang Li and Mengmeng Ma and Xi Peng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ADV0Pzi3Ol}\n}", "github": "", "reviewers": "WT2A;L5Ee;VWuj;dtoB", "pdf_size": 2450103, "rating": "4;6;6;7", "confidence": "3;3;3;4", "soundness": "3;3;2;3", "novelty": "3;2;1;3", "presentation": "2;2;1;3", "wc_summary": "142;84;137;48", "wc_strengths": "134;190;44;31", "wc_weaknesses": "297;200;414;46", "wc_questions": "16;29;165;75", "wc_limitations": "1;8;23;4", "wc_review": "590;511;783;204", "wc_reply_reviewers": "63;92;0;28", "wc_reply_authors": "838;630;883;142", "reply_reviewers": "1;2;0;1", "reply_authors": "4;3;4;4", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 102.75, 38.931831449342326 ], "wc_strengths_avg": [ 99.75, 65.48425383250542 ], "wc_weaknesses_avg": [ 239.25, 134.86914954873853 ], "wc_questions_avg": [ 71.25, 58.396810700585355 ], "wc_limitations_avg": [ 9.0, 8.455767262643882 ], "wc_review_avg": [ 522.0, 208.56054276876054 ], "wc_reply_reviewers_avg": [ 45.75, 34.802119188348286 ], "wc_reply_authors_avg": [ 623.25, 293.7834023562257 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 3.75, 0.4330127018922193 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.6622661785325219, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7180162550889207599&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "udel.edu;udel.edu;udel.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Delaware", "aff_unique_dep": "", "aff_unique_url": "https://www.udel.edu", "aff_unique_abbr": "UD", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Fast Graph Sharpness-Aware Minimization for Enhancing and Accelerating Few-Shot Node Classification", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96271", "id": "AF32GbuupC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=AF32GbuupC", "openreview": "https://openreview.net/forum?id=AF32GbuupC", "poster": "", "project": "", "author_site": "Yihong Luo, Yuhan Chen, Siya Qiu, Yiwei Wang, Chen Zhang, Yan Zhou, Xiaochun Cao, Jing Tang", "tldr": "", "abstract": "Graph Neural Networks (GNNs) have shown superior performance in node classification. However, GNNs perform poorly in the Few-Shot Node Classification (FSNC) task that requires robust generalization to make accurate predictions for unseen classes with limited labels. To tackle the challenge, we propose the integration of Sharpness-Aware Minimization (SAM)--a technique designed to enhance model generalization by finding a flat minimum of the loss landscape--into GNN training. The standard SAM approach, however, consists of two forward-backward steps in each training iteration, doubling the computational cost compared to the base optimizer (e.g., Adam). To mitigate this drawback, we introduce a novel algorithm, Fast Graph Sharpness-Aware Minimization (FGSAM), that integrates the rapid training of Multi-Layer Perceptrons (MLPs) with the superior performance of GNNs. Specifically, we utilize GNNs for parameter perturbation while employing MLPs to minimize the perturbed loss so that we can find a flat minimum with good generalization more efficiently. Moreover, our method reutilizes the gradient from the perturbation phase to incorporate graph topology into the minimization process at almost zero additional cost. To further enhance training efficiency, we develop FGSAM+ that executes exact perturbations periodically. Extensive experiments demonstrate that our proposed algorithm outperforms the standard SAM with lower computational costs in FSNC tasks. In particular, our FGSAM+ as a SAM variant offers a faster optimization than the base optimizer in most cases. In addition to FSNC, our proposed methods also demonstrate competitive performance in the standard node classification task for heterophilic graphs, highlighting the broad applicability.", "keywords": "Sharpness-Aware Minimization; Graph Neural Networks; Few-Shot Node Classification", "primary_area": "graph_neural_networks", "supplementary_material": "/attachment/d31b227049f09c803f9e110560ff154eda682eb3.zip", "author": "Yihong Luo;Yuhan Chen;Siya Qiu;Yiwei Wang;Chen Zhang;Yan Zhou;Xiaochun Cao;Jing Tang", "authorids": "~Yihong_Luo1;~Yuhan_Chen5;~Siya_Qiu1;~Yiwei_Wang2;~Chen_Zhang24;~Yan_Zhou10;~Xiaochun_Cao3;~Jing_Tang5", "gender": ";M;F;M;M;M;M;M", "homepage": "https://luo-yihong.github.io/;https://github.com/draym28;;;https://facultyprofiles.hkust-gz.edu.cn/faculty-personal-page/ZHANG-Chen/chenzhang;http://www.chuanglintech.com;https://scst.sysu.edu.cn/members/caoxiaochun.htm;https://sites.google.com/view/jtang", "dblp": "291/6620;155/2863-7;346/6829;50/5889-1;94/4084-10;;39/3695;83/663-4", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;B6ZWYcQAAAAJ;sQhRHHIAAAAJ;https://scholar.google.com.hk/citations?user=Sh9QvBkAAAAJ;https://scholar.google.ca/citations?user=AHEItGQAAAAJ;;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en", "orcid": "0000-0001-6573-2331;0000-0002-2788-3275;0000-0003-2862-6681;;0000-0001-8706-1372;;0000-0001-7141-708X;0000-0002-0785-707X", "linkedin": ";;https://linkedin.com/in/siya-qiu-8b1008302;;;;;", "or_profile": "~Yihong_Luo1;~Yuhan_Chen5;~Siya_Qiu1;~Yiwei_Wang2;~Chen_Zhang24;~Yan_Zhou10;~Xiaochun_Cao3;~Jing_Tang5", "aff": "Hong Kong University of Science and Technology;SUN YAT-SEN UNIVERSITY;Hong Kong University of Science and Technology;UCLA Computer Science Department, University of California, Los Angeles;Hong Kong University of Science and Technology;;SUN YAT-SEN UNIVERSITY;Hong Kong University of Science and Technology", "aff_domain": "ust.hk;sysu.edu.cn;hkust.edu;cs.ucla.edu;hkust-gz.edu.cn;;sysu.edu.cn;ust.hk", "position": "PhD student;PhD student;PhD student;Postdoc;Associate Professor;;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nluo2024fast,\ntitle={Fast Graph Sharpness-Aware Minimization for Enhancing and Accelerating Few-Shot Node Classification},\nauthor={Yihong Luo and Yuhan Chen and Siya Qiu and Yiwei Wang and Chen Zhang and Yan Zhou and Xiaochun Cao and Jing Tang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=AF32GbuupC}\n}", "github": "", "reviewers": "phcW;4EHj;nij7", "pdf_size": 2796152, "rating": "5;5;8", "confidence": "3;3;4", "soundness": "2;3;3", "novelty": "2;2;3", "presentation": "3;3;3", "wc_summary": "80;74;163", "wc_strengths": "8;99;80", "wc_weaknesses": "13;76;156", "wc_questions": "49;84;170", "wc_limitations": "6;13;2", "wc_review": "156;346;571", "wc_reply_reviewers": "19;26;12", "wc_reply_authors": "59;18;19", "reply_reviewers": "1;1;1", "reply_authors": "3;2;2", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 105.66666666666667, 40.61472092179824 ], "wc_strengths_avg": [ 62.333333333333336, 39.19467083956979 ], "wc_weaknesses_avg": [ 81.66666666666667, 58.516854172299986 ], "wc_questions_avg": [ 101.0, 50.839617097954886 ], "wc_limitations_avg": [ 7.0, 4.546060565661952 ], "wc_review_avg": [ 357.6666666666667, 169.62376667855898 ], "wc_reply_reviewers_avg": [ 19.0, 5.715476066494082 ], "wc_reply_authors_avg": [ 32.0, 19.096247449870006 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:w628GoMreJoJ:scholar.google.com/&scioq=Fast+Graph+Sharpness-Aware+Minimization+for+Enhancing+and+Accelerating+Few-Shot+Node+Classification&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "ust.hk;sysu.edu.cn;hkust.edu;cs.ucla.edu;hkust-gz.edu.cn;;sysu.edu.cn;ust.hk", "author_num": 8, "aff_unique_index": "0;1;0;2;0;1;0", "aff_unique_norm": "Hong Kong University of Science and Technology;Sun Yat-sen University;University of California, Los Angeles", "aff_unique_dep": ";;Computer Science Department", "aff_unique_url": "https://www.ust.hk;http://www.sysu.edu.cn;https://www.ucla.edu", "aff_unique_abbr": "HKUST;SYSU;UCLA", "aff_campus_unique_index": "0;0;2;0;0", "aff_campus_unique": "Hong Kong SAR;;Los Angeles", "aff_country_unique_index": "0;0;0;1;0;0;0", "aff_country_unique": "China;United States" }, { "title": "Disentangling Interpretable Factors with Supervised Independent Subspace Principal Component Analysis", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96270", "id": "AFnSMlye5K", "proceeding": "", "pdf": "https://openreview.net/pdf?id=AFnSMlye5K", "openreview": "https://openreview.net/forum?id=AFnSMlye5K", "poster": "/media/PosterPDFs/NeurIPS%202024/96270.png?t=1733719001.6087914", "project": "", "author_site": "Jiayu Su, David A Knowles, Ra\u00fal Rabad\u00e1n", "tldr": "", "abstract": "The success of machine learning models relies heavily on effectively representing high-dimensional data. However, ensuring data representations capture human-understandable concepts remains difficult, often requiring the incorporation of prior knowledge and decomposition of data into multiple subspaces. Traditional linear methods fall short in modeling more than one space, while more expressive deep learning approaches lack interpretability. Here, we introduce Supervised Independent Subspace Principal Component Analysis ($\\texttt{sisPCA}$), a PCA extension designed for multi-subspace learning. Leveraging the Hilbert-Schmidt Independence Criterion (HSIC), $\\texttt{sisPCA}$ incorporates supervision and simultaneously ensures subspace disentanglement. We demonstrate $\\texttt{sisPCA}$'s connections with autoencoders and regularized linear regression and showcase its ability to identify and separate hidden data structures through extensive applications, including breast cancer diagnosis from image features, learning aging-associated DNA methylation changes, and single-cell analysis of malaria infection. Our results reveal distinct functional pathways associated with malaria colonization, underscoring the essentiality of explainable representation in high-dimensional data analysis.", "keywords": "Principal Component Analysis (PCA);Disentanglement Representation Learning;Computational Biology", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "", "author": "Jiayu Su;David A. Knowles;Raul Rabadan", "authorids": "~Jiayu_Su1;~David_A._Knowles1;~Raul_Rabadan1", "gender": "M;;M", "homepage": ";;https://daklab.github.io/", "dblp": ";;03/11107", "google_scholar": ";;N9QGV6YAAAAJ", "orcid": "0000-0002-8997-9272;;0000-0002-7408-146X", "linkedin": ";;david-a-knowles-4744928/", "or_profile": "~Jiayu_Su1;~Raul_Rabadan1;~David_Knowles1", "aff": "Columbia University;Columbia University;New York Genome Center", "aff_domain": "columbia.edu;ee.columbia.edu;nygenome.org", "position": "PhD student;;Core Faculty Member", "bibtex": "@inproceedings{\nsu2024disentangling,\ntitle={Disentangling Interpretable Factors with Supervised Independent Subspace Principal Component Analysis},\nauthor={Jiayu Su and David A. Knowles and Raul Rabadan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=AFnSMlye5K}\n}", "github": "", "reviewers": "QSK9;UeQu;PWKM;debZ", "pdf_size": 10528039, "rating": "6;6;6;7", "confidence": "4;4;3;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;2;3;3", "wc_summary": "67;86;209;36", "wc_strengths": "78;25;78;178", "wc_weaknesses": "149;170;52;416", "wc_questions": "97;39;2;16", "wc_limitations": "14;25;20;43", "wc_review": "405;345;361;689", "wc_reply_reviewers": "33;82;0;177", "wc_reply_authors": "36;746;0;60", "reply_reviewers": "1;2;0;1", "reply_authors": "2;3;1;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 99.5, 65.69056248807739 ], "wc_strengths_avg": [ 89.75, 55.355103649076476 ], "wc_weaknesses_avg": [ 196.75, 134.18154679388667 ], "wc_questions_avg": [ 38.5, 36.26637561157718 ], "wc_limitations_avg": [ 25.5, 10.828203913853857 ], "wc_review_avg": [ 450.0, 139.7247293788755 ], "wc_reply_reviewers_avg": [ 73.0, 66.75702210254738 ], "wc_reply_authors_avg": [ 210.5, 309.90764753390647 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:n7DEWjUx68AJ:scholar.google.com/&scioq=Disentangling+Interpretable+Factors+with+Supervised+Independent+Subspace+Principal+Component+Analysis&hl=en&as_sdt=0,5", "gs_version_total": 7, "email": "columbia.edu;ee.columbia.edu;nygenome.org", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Columbia University;New York Genome Center", "aff_unique_dep": ";", "aff_unique_url": "https://www.columbia.edu;https://www.nygenome.org", "aff_unique_abbr": "Columbia;NYGC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "InterControl: Zero-shot Human Interaction Generation by Controlling Every Joint", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96269", "id": "AH1mFs3c7o", "proceeding": "", "pdf": "https://openreview.net/pdf?id=AH1mFs3c7o", "openreview": "https://openreview.net/forum?id=AH1mFs3c7o", "poster": "/media/PosterPDFs/NeurIPS%202024/96269.png?t=1731758329.8961618", "project": "", "author_site": "Zhenzhi Wang, Jingbo Wang, Yixuan Li, Dahua Lin, Bo Dai", "tldr": "", "abstract": "Text-conditioned motion synthesis has made remarkable progress with the emergence of diffusion models. However, the majority of these motion diffusion models are primarily designed for a single character and overlook multi-human interactions. In our approach, we strive to explore this problem by synthesizing human motion with interactions for a group of characters of any size in a zero-shot manner. The key aspect of our approach is the adaptation of human-wise interactions as pairs of human joints that can be either in contact or separated by a desired distance. In contrast to existing methods that necessitate training motion generation models on multi-human motion datasets with a fixed number of characters, our approach inherently possesses the flexibility to model human interactions involving an arbitrary number of individuals, thereby transcending the limitations imposed by the training data. We introduce a novel controllable motion generation method, InterControl, to encourage the synthesized motions maintaining the desired distance between joint pairs. It consists of a motion controller and an inverse kinematics guidance module that realistically and accurately aligns the joints of synthesized characters to the desired location. Furthermore, we demonstrate that the distance between joint pairs for human-wise interactions can be generated using an off-the-shelf Large Language Model (LLM). Experimental results highlight the capability of our framework to generate interactions with multiple human characters and its potential to work with off-the-shelf physics-based character simulators. Code is available at https://github.com/zhenzhiwang/intercontrol.", "keywords": "human motion generation;human interaction generation;diffusion model;controllable generation", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/9caed53ad7861e34aaff840525658de3764856ab.zip", "author": "Zhenzhi Wang;Jingbo Wang;Yixuan Li;Dahua Lin;Bo Dai", "authorids": "~Zhenzhi_Wang1;~Jingbo_Wang3;~Yixuan_Li3;~Dahua_Lin1;~Bo_Dai2", "gender": "M;M;F;M;M", "homepage": "https://zhenzhiwang.github.io/;https://scholar.google.com/citations?user=GStTsxAAAAAJ&hl=en;https://yixuanli98.github.io;http://dahua.site;http://daibo.info/", "dblp": "59/9543-1;10/1491-3.html;144/6087-2;53/6088;64/2903-2", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;GStTsxAAAAAJ;dC3bpFcAAAAJ;GMzzRRUAAAAJ;https://scholar.google.com.hk/citations?user=KNWTvgEAAAAJ", "orcid": ";;;;0000-0003-0777-9232", "linkedin": ";;liyixuan1998/;;", "or_profile": "~Zhenzhi_Wang1;~Jingbo_Wang3;~Yixuan_Li3;~Dahua_Lin1;~Bo_Dai2", "aff": "The Chinese University of Hong Kong;Shanghai Artificial Intelligence Laboratory;The Chinese University of Hong Kong;The Chinese University of Hong Kong;Shanghai AI Laboratory", "aff_domain": "cuhk.edu.hk;pjlab.org.cn;cuhk.edu.hk;cuhk.edu.hk;pjlab.org.cn", "position": "PhD student;Researcher;PhD student;Associate Professor;Scientist", "bibtex": "@inproceedings{\nwang2024intercontrol,\ntitle={InterControl: Zero-shot Human Interaction Generation by Controlling Every Joint},\nauthor={Zhenzhi Wang and Jingbo Wang and Yixuan Li and Dahua Lin and Bo Dai},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=AH1mFs3c7o}\n}", "github": "", "reviewers": "VAgJ;7GL6;HwRy;BPk3", "pdf_size": 3739953, "rating": "4;5;5;6", "confidence": "5;4;4;5", "soundness": "2;3;3;3", "novelty": "2;3;2;3", "presentation": "3;3;3;2", "wc_summary": "244;85;109;249", "wc_strengths": "171;67;59;53", "wc_weaknesses": "285;153;228;358", "wc_questions": "246;108;20;334", "wc_limitations": "4;7;52;5", "wc_review": "950;420;468;999", "wc_reply_reviewers": "601;139;121;443", "wc_reply_authors": "1264;14;19;503", "reply_reviewers": "2;1;1;2", "reply_authors": "4;2;2;3", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 171.75, 75.25083056020047 ], "wc_strengths_avg": [ 87.5, 48.46390409366542 ], "wc_weaknesses_avg": [ 256.0, 75.22964841071636 ], "wc_questions_avg": [ 177.0, 121.26417442921878 ], "wc_limitations_avg": [ 17.0, 20.23610634484806 ], "wc_review_avg": [ 709.25, 266.35631680138545 ], "wc_reply_reviewers_avg": [ 326.0, 203.90438935932693 ], "wc_reply_authors_avg": [ 450.0, 510.2112307662386 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10404830558079033046&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "cuhk.edu.hk;pjlab.org.cn;cuhk.edu.hk;cuhk.edu.hk;pjlab.org.cn", "author_num": 5, "aff_unique_index": "0;1;0;0;2", "aff_unique_norm": "Chinese University of Hong Kong;Shanghai Artificial Intelligence Laboratory;Shanghai AI Laboratory", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cuhk.edu.hk;http://www.shailab.org/;https://www.shanghai-ai-lab.com", "aff_unique_abbr": "CUHK;Shanghai AI Lab;SAIL", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Credal Learning Theory", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96268", "id": "AH5KwUSsln", "proceeding": "", "pdf": "https://openreview.net/pdf?id=AH5KwUSsln", "openreview": "https://openreview.net/forum?id=AH5KwUSsln", "poster": "/media/PosterPDFs/NeurIPS%202024/96268.png?t=1731321993.5393522", "project": "", "author_site": "Michele Caprio, Maryam Sultana, Eleni Elia, Fabio Cuzzolin", "tldr": "", "abstract": "Statistical learning theory is the foundation of machine learning, providing theoretical bounds for the risk of models learned from a (single) training set, assumed to issue from an unknown probability distribution. In actual deployment, however, the data distribution may (and often does) vary, causing domain adaptation/generalization issues. In this paper we lay the foundations for a `credal' theory of learning, using convex sets of probabilities (credal sets) to model the variability in the data-generating distribution. Such credal sets, we argue, may be inferred from a finite sample of training sets. Bounds are derived for the case of finite hypotheses spaces (both assuming realizability or not), as well as infinite model spaces, which directly generalize classical results.", "keywords": "Statistical learning;imprecise probabilities;credal sets;epistemic and aleatory uncertainties", "primary_area": "learning_theory", "supplementary_material": "", "author": "Michele Caprio;Maryam Sultana;Eleni Elia;Fabio Cuzzolin", "authorids": "~Michele_Caprio1;~Maryam_Sultana1;~Eleni_Elia1;~Fabio_Cuzzolin1", "gender": "M;F;;M", "homepage": "https://mc6034.wixsite.com/caprio;;;https://www.brookes.ac.uk/profiles/staff/fabio-cuzzolin", "dblp": "322/9067;210/1661;;60/2919", "google_scholar": "6rngqVgAAAAJ;dKsfEyIAAAAJ;;https://scholar.google.co.uk/citations?user=T8LkBTYAAAAJ", "orcid": "0000-0002-7569-097X;0000-0002-8831-843X;;0000-0002-9271-2130", "linkedin": "michele-caprio-5866b162/;dr-maryam-sultana-9166bb189/;;fabio-cuzzolin-b481a928/", "or_profile": "~Michele_Caprio1;~Maryam_Sultana1;~Eleni_Elia1;~Fabio_Cuzzolin1", "aff": "University of Pennsylvania;Oxford Brookes University;;Oxford Brookes University", "aff_domain": "seas.upenn.edu;brookes.ac.uk;;brookes.ac.uk", "position": "Postdoc;Postdoc;;Full Professor", "bibtex": "@inproceedings{\ncaprio2024credal,\ntitle={Credal Learning Theory},\nauthor={Michele Caprio and Maryam Sultana and Eleni Elia and Fabio Cuzzolin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=AH5KwUSsln}\n}", "github": "", "reviewers": "sJhN;SbDN;2ZTx;hWdx", "pdf_size": 1345660, "rating": "5;6;6;6", "confidence": "3;2;3;2", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "3;2;2;2", "wc_summary": "110;46;74;84", "wc_strengths": "98;41;60;32", "wc_weaknesses": "276;141;83;68", "wc_questions": "46;29;108;62", "wc_limitations": "12;10;19;12", "wc_review": "542;267;344;258", "wc_reply_reviewers": "4;202;25;4", "wc_reply_authors": "370;72;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;1;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 2.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 78.5, 22.907422377910613 ], "wc_strengths_avg": [ 57.75, 25.341418665891617 ], "wc_weaknesses_avg": [ 142.0, 82.02743443507178 ], "wc_questions_avg": [ 61.25, 29.40556920040828 ], "wc_limitations_avg": [ 13.25, 3.418698582794336 ], "wc_review_avg": [ 352.75, 114.26148738748327 ], "wc_reply_reviewers_avg": [ 58.75, 83.14858687915267 ], "wc_reply_authors_avg": [ 110.5, 152.67858395989924 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8072285899294170642&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "seas.upenn.edu;brookes.ac.uk;;brookes.ac.uk", "author_num": 4, "aff_unique_index": "0;1;1", "aff_unique_norm": "University of Pennsylvania;Oxford Brookes University", "aff_unique_dep": ";", "aff_unique_url": "https://www.upenn.edu;https://www.oxfordbrookes.ac.uk", "aff_unique_abbr": "UPenn;OBU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United States;United Kingdom" }, { "title": "DEFT: Efficient Fine-tuning of Diffusion Models by Learning the Generalised $h$-transform", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96267", "id": "AKBTFQhCjm", "proceeding": "", "pdf": "https://openreview.net/pdf?id=AKBTFQhCjm", "openreview": "https://openreview.net/forum?id=AKBTFQhCjm", "poster": "/media/PosterPDFs/NeurIPS%202024/96267.png?t=1730231248.2683616", "project": "", "author_site": "Alexander Denker, Francisco Vargas, Shreyas Padhy, Kieran Didi, Simon Mathis, Riccardo Barbano, Vincent Dutordoir, Emile Mathieu, Urszula Julia Komorowska, Pietro Li\u00f3", "tldr": "", "abstract": "Generative modelling paradigms based on denoising diffusion processes have emerged as a leading candidate for conditional sampling in inverse problems. \nIn many real-world applications, we often have access to large, expensively trained unconditional diffusion models, which we aim to exploit for improving conditional sampling.\nMost recent approaches are motivated heuristically and lack a unifying framework, obscuring connections between them. Further, they often suffer from issues such as being very sensitive to hyperparameters, being expensive to train or needing access to weights hidden behind a closed API. In this work, we unify conditional training and sampling using the mathematically well-understood Doob's h-transform. This new perspective allows us to unify many existing methods under a common umbrella. Under this framework, we propose DEFT (Doob's h-transform Efficient FineTuning), a new approach for conditional generation that simply fine-tunes a very small network to quickly learn the conditional $h$-transform, while keeping the larger unconditional network unchanged. DEFT is much faster than existing baselines while achieving state-of-the-art performance across a variety of linear and non-linear benchmarks. On image reconstruction tasks, we achieve speedups of up to 1.6$\\times$, while having the best perceptual quality on natural images and reconstruction performance on medical images. Further, we also provide initial experiments on protein motif scaffolding and outperform reconstruction guidance methods.", "keywords": "Inverse problems;Generative Modelling;Diffusion Models;Conditional Generative Modelling;Diffusion model guidance", "primary_area": "generative_models", "supplementary_material": "", "author": "Alexander Denker;Francisco Vargas;Shreyas Padhy;Kieran Didi;Simon V Mathis;Riccardo Barbano;Vincent Dutordoir;Emile Mathieu;Urszula Julia Komorowska;Pietro Lio", "authorids": "~Alexander_Denker1;~Francisco_Vargas1;~Shreyas_Padhy1;~Kieran_Didi1;~Simon_V_Mathis1;~Riccardo_Barbano1;~Vincent_Dutordoir1;~Emile_Mathieu1;~Urszula_Julia_Komorowska1;~Pietro_Lio1", "gender": "M;M;M;M;M;M;M;M;;M", "homepage": "https://alexdenker.github.io/;;http://shreyaspadhy.github.io;https://kdidi.netlify.app/;https://simonmathis.org;http://www0.cs.ucl.ac.uk/people/R.Barbano.html;;http://emilemathieu.fr;;https://www.cst.cam.ac.uk/people/pl219", "dblp": "182/0952;79/7431-1;267/9851;336/6909;338/5638;;212/5487;223/6084.html;;l/PietroLio.html", "google_scholar": "j5NT9kAAAAAJ;;JxbV2R0AAAAJ;KxQAWKQAAAAJ;https://scholar.google.ch/citations?user=N6I6fT0AAAAJ;;;g9BjTqgAAAAJ;2ldyD9MAAAAJ;https://scholar.google.co.uk/citations?user=3YrWf7EAAAAJ", "orcid": "0000-0002-7265-261X;;;0000-0001-6839-3320;0000-0002-5246-6481;;;;;0000-0002-0540-5053", "linkedin": ";;;kieran-didi/;simonmathis/;riccardo-barbano-5b733a88/;;;urszula-julia-komorowska-b080b9230/;", "or_profile": "~Alexander_Denker1;~Francisco_Vargas1;~Shreyas_Padhy1;~Kieran_Didi1;~Simon_V_Mathis1;~Riccardo_Barbano1;~Vincent_Dutordoir1;~Emile_Mathieu1;~Urszula_Julia_Komorowska1;~Pietro_Lio1", "aff": "Universit\u00e4t Bremen;University of Cambridge;University of Cambridge;University of Cambridge;University of Cambridge;;;University of Cambridge;University of Cambridge;University of Cambridge", "aff_domain": "uni-bremen.de;cam.ac.uk;cam.ac.uk;cam.ac.uk;cam.ac.uk;;;cam.ac.uk;cam.ac.uk;cam.ac.uk", "position": "PhD student;PhD student;PhD student;MS student;PhD student;;;Postdoc;PhD student;Full Professor", "bibtex": "@inproceedings{\ndenker2024deft,\ntitle={{DEFT}: Efficient Fine-tuning of Diffusion Models by Learning the Generalised \\$h\\$-transform},\nauthor={Alexander Denker and Francisco Vargas and Shreyas Padhy and Kieran Didi and Simon V Mathis and Riccardo Barbano and Vincent Dutordoir and Emile Mathieu and Urszula Julia Komorowska and Pietro Lio},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=AKBTFQhCjm}\n}", "github": "", "reviewers": "ZJuc;ESdx;qBFS;no8y", "pdf_size": 13716927, "rating": "5;5;5;7", "confidence": "3;4;4;3", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;4;3", "wc_summary": "44;85;79;40", "wc_strengths": "81;107;95;113", "wc_weaknesses": "68;112;98;114", "wc_questions": "83;4;39;36", "wc_limitations": "7;3;12;5", "wc_review": "283;311;323;308", "wc_reply_reviewers": "25;48;56;30", "wc_reply_authors": "293;981;566;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;4;3;1", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 62.0, 20.161845153655953 ], "wc_strengths_avg": [ 99.0, 12.24744871391589 ], "wc_weaknesses_avg": [ 98.0, 18.384776310850235 ], "wc_questions_avg": [ 40.5, 28.111385593741193 ], "wc_limitations_avg": [ 6.75, 3.344772040064913 ], "wc_review_avg": [ 306.25, 14.549484526951462 ], "wc_reply_reviewers_avg": [ 39.75, 12.695963925594622 ], "wc_reply_authors_avg": [ 460.0, 361.3052725881536 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11279479112107871981&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "uni-bremen.de;cam.ac.uk;cam.ac.uk;cam.ac.uk;cam.ac.uk;;;cam.ac.uk;cam.ac.uk;cam.ac.uk", "author_num": 10, "aff_unique_index": "0;1;1;1;1;1;1;1", "aff_unique_norm": "University of Bremen;University of Cambridge", "aff_unique_dep": ";", "aff_unique_url": "https://www.uni-bremen.de;https://www.cam.ac.uk", "aff_unique_abbr": "Uni Bremen;Cambridge", "aff_campus_unique_index": "1;1;1;1;1;1;1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;1;1;1;1;1;1;1", "aff_country_unique": "Germany;United Kingdom" }, { "title": "ConStat: Performance-Based Contamination Detection in Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96266", "id": "ALISPmDPCq", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ALISPmDPCq", "openreview": "https://openreview.net/forum?id=ALISPmDPCq", "poster": "/media/PosterPDFs/NeurIPS%202024/96266.png?t=1730826565.7188766", "project": "", "author_site": "Jasper Dekoninck, Mark M\u00fcller, Martin Vechev", "tldr": "", "abstract": "Public benchmarks play an essential role in the evaluation of large language models. However, data contamination can lead to inflated performance, rendering them unreliable for model comparison. It is therefore crucial to detect contamination and estimate its impact on measured performance. Unfortunately, existing detection methods can be easily evaded and fail to quantify contamination. To overcome these limitations, we propose a novel definition of *contamination as artificially inflated and non-generalizing benchmark performance* instead of the inclusion of benchmark samples in the training data. This perspective enables us to detect *any* model with inflated performance, i.e., performance that does not generalize to rephrased samples, synthetic samples from the same distribution, or different benchmarks for the same task. Based on this insight, we develop ConStat, a statistical method that reliably detects and quantifies contamination by comparing performance between a primary and reference benchmark relative to a set of reference models. We demonstrate the effectiveness of ConStat in an extensive evaluation of diverse model architectures, benchmarks, and contamination scenarios and find high levels of contamination in multiple popular models including Mistral, Llama, Yi, and the top-3 Open LLM Leaderboard models.", "keywords": "large language models;model evaluation;contamination detection", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/7a8be72c45ef9a41da35118d8016f57937f0d05b.zip", "author": "Jasper Dekoninck;Mark Niklas Mueller;Martin Vechev", "authorids": "~Jasper_Dekoninck1;~Mark_Niklas_Mueller2;~Martin_Vechev1", "gender": "M;M;M", "homepage": ";https://www.sri.inf.ethz.ch/people/mark;https://www.sri.inf.ethz.ch/people/martin", "dblp": "361/7298;287/4254;93/2189.html", "google_scholar": "https://scholar.google.com/citations?hl=en;RBpmcCAAAAAJ;https://scholar.google.ch/citations?user=aZ1Rh50AAAAJ", "orcid": ";0000-0002-2496-6542;", "linkedin": "jasper-dekoninck-8a1bb41a1/;mark-m%C3%BCller-8bb4b1140/;", "or_profile": "~Jasper_Dekoninck1;~Mark_Niklas_Mueller2;~Martin_Vechev1", "aff": "Department of Computer Science, ETHZ - ETH Zurich;Swiss Federal Institute of Technology;Swiss Federal Institute of Technology", "aff_domain": "inf.ethz.ch;ethz.ch;ethz.ch", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\ndekoninck2024constat,\ntitle={ConStat: Performance-Based Contamination Detection in Large Language Models},\nauthor={Jasper Dekoninck and Mark Niklas Mueller and Martin Vechev},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ALISPmDPCq}\n}", "github": "", "reviewers": "hKkW;xz9Q;UkYs;E2e6;tnxc", "pdf_size": 1896097, "rating": "5;5;6;6;8", "confidence": "4;4;3;3;3", "soundness": "2;3;2;3;3", "novelty": "2;3;2;2;4", "presentation": "4;2;2;3;4", "wc_summary": "108;74;79;72;46", "wc_strengths": "26;95;27;61;54", "wc_weaknesses": "302;124;63;97;108", "wc_questions": "29;4;97;46;82", "wc_limitations": "83;7;23;9;5", "wc_review": "548;304;289;285;295", "wc_reply_reviewers": "40;0;0;0;135", "wc_reply_authors": "97;0;0;0;48", "reply_reviewers": "1;0;0;0;1", "reply_authors": "2;1;1;1;2", "rating_avg": [ 6.0, 1.0954451150103321 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.8 ], "presentation_avg": [ 3.0, 0.8944271909999159 ], "wc_summary_avg": [ 75.8, 19.762590923257 ], "wc_strengths_avg": [ 52.6, 25.429117169103613 ], "wc_weaknesses_avg": [ 138.8, 84.01761720020392 ], "wc_questions_avg": [ 51.6, 34.038801389003105 ], "wc_limitations_avg": [ 25.4, 29.486267990371385 ], "wc_review_avg": [ 344.2, 102.10073457130463 ], "wc_reply_reviewers_avg": [ 35.0, 52.3450093132096 ], "wc_reply_authors_avg": [ 29.0, 38.750483867946734 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.74535599249993, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15583214378204931566&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "inf.ethz.ch;ethz.ch;ethz.ch", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "ETH Zurich;Swiss Federal Institute of Technology", "aff_unique_dep": "Department of Computer Science;", "aff_unique_url": "https://www.ethz.ch;https://www.ethz.ch", "aff_unique_abbr": "ETHZ;ETH Zurich", "aff_campus_unique_index": "0", "aff_campus_unique": "Zurich;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Switzerland" }, { "title": "MTGS: A Novel Framework for Multi-Person Temporal Gaze Following and Social Gaze Prediction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96265", "id": "ALU676zGFE", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ALU676zGFE", "openreview": "https://openreview.net/forum?id=ALU676zGFE", "poster": "/media/PosterPDFs/NeurIPS%202024/96265.png?t=1733473237.4212997", "project": "", "author_site": "Anshul Gupta, Samy Tafasca, Arya Farkhondeh, Pierre Vuillecard, Jean-marc Odobez", "tldr": "", "abstract": "Gaze following and social gaze prediction are fundamental tasks providing insights into human communication behaviors, intent, and social interactions. Most previous approaches addressed these tasks separately, either by designing highly specialized social gaze models that do not generalize to other social gaze tasks or by considering social gaze inference as an ad-hoc post-processing of the gaze following task. Furthermore, the vast majority of gaze following approaches have proposed models that can handle only one person at a time and are static, therefore failing to take advantage of social interactions and temporal dynamics. In this paper, we address these limitations and introduce a novel framework to jointly predict the gaze target and social gaze label for all people in the scene. It comprises (i) a temporal, transformer-based architecture that, in addition to frame tokens, handles person-specific tokens capturing the gaze information related to each individual; (ii) a new dataset, VSGaze, built from multiple gaze following and social gaze datasets by extending and validating head detections and tracks, and unifying annotation types. We demonstrate that our model can address and benefit from training on all tasks jointly, achieving state-of-the-art results for multi-person gaze following and social gaze prediction. Our annotations and code will be made publicly available.", "keywords": "gaze following;social gaze prediction;multi-task learning", "primary_area": "machine_vision", "supplementary_material": "", "author": "Anshul Gupta;Samy Tafasca;Arya Farkhondeh;Pierre Vuillecard;Jean-marc Odobez", "authorids": "~Anshul_Gupta2;~Samy_Tafasca1;~Arya_Farkhondeh1;~Pierre_Vuillecard1;~Jean-marc_Odobez1", "gender": ";M;;M;", "homepage": "https://anshul-gupta24.github.io/;;https://aryafarkhondeh.github.io;;", "dblp": ";327/3661;;;", "google_scholar": "https://scholar.google.com/citations?hl=en;N6SqiAIAAAAJ;ZgMfa7gAAAAJ;;", "orcid": ";0009-0008-2427-7406;;;", "linkedin": ";samy-tafasca/;;pierre-vuillecard-b5326b195/;", "or_profile": "~Anshul_Gupta2;~Samy_Tafasca1;~Arya_Farkhondeh1;~Pierre_Vuillecard1;~Jean-marc_Odobez1", "aff": "Meta Facebook;EPFL;Idiap Research Institute;EPFL - EPF Lausanne;", "aff_domain": "meta.com;epfl.ch;idiap.ch;epfl.ch;", "position": "Intern;PhD student;Research Assistant;PhD student;", "bibtex": "@inproceedings{\ngupta2024mtgs,\ntitle={{MTGS}: A Novel Framework for Multi-Person Temporal Gaze Following and Social Gaze Prediction},\nauthor={Anshul Gupta and Samy Tafasca and Arya Farkhondeh and Pierre Vuillecard and Jean-marc Odobez},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ALU676zGFE}\n}", "github": "", "reviewers": "FWeu;poG5;5XsF;7cdN", "pdf_size": 32218943, "rating": "6;6;6;6", "confidence": "5;4;4;4", "soundness": "3;3;3;3", "novelty": "3;3;3;2", "presentation": "1;4;3;3", "wc_summary": "94;55;102;60", "wc_strengths": "57;54;44;44", "wc_weaknesses": "131;55;49;223", "wc_questions": "77;78;78;70", "wc_limitations": "1;1;10;23", "wc_review": "360;243;283;420", "wc_reply_reviewers": "9;0;0;53", "wc_reply_authors": "15;0;0;20", "reply_reviewers": "1;0;0;1", "reply_authors": "2;1;1;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 77.75, 20.522853115490545 ], "wc_strengths_avg": [ 49.75, 5.84700778176325 ], "wc_weaknesses_avg": [ 114.5, 70.48936089935842 ], "wc_questions_avg": [ 75.75, 3.344772040064913 ], "wc_limitations_avg": [ 8.75, 9.01041064547005 ], "wc_review_avg": [ 326.5, 68.42696836774226 ], "wc_reply_reviewers_avg": [ 15.5, 21.96019125599775 ], "wc_reply_authors_avg": [ 8.75, 8.926785535678562 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7030471280910282658&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "meta.com;epfl.ch;idiap.ch;epfl.ch;", "author_num": 5, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "Meta;EPFL;Idiap Research Institute", "aff_unique_dep": "Meta Platforms, Inc.;;", "aff_unique_url": "https://meta.com;https://www.epfl.ch;https://www.idiap.ch", "aff_unique_abbr": "Meta;EPFL;Idiap", "aff_campus_unique_index": "1", "aff_campus_unique": ";Lausanne", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "United States;Switzerland" }, { "id": "AM1znjeo9l", "title": "Noise Balance and Stationary Distribution of Stochastic Gradient Descent", "track": "main", "status": "Reject", "tldr": "", "abstract": "How the stochastic gradient descent (SGD) navigates the loss landscape of a neural network remains poorly understood. This work shows that the minibatch noise of SGD regularizes the solution towards a noise-balanced solution whenever the loss function contains a rescaling symmetry. We prove that when the rescaling symmetry exists, the SGD dynamics is limited to only a low-dimensional subspace and prefers a special set of solutions in an infinitely large degenerate manifold, which offers a partial explanation of the effectiveness of SGD in training neural networks. We then apply this result to derive the stationary distribution of stochastic gradient flow for a diagonal linear network with arbitrary depth and width, which is the first analytical expression of the stationary distribution of SGD in a high-dimensional non-quadratic potential. The stationary distribution exhibits complicated nonlinear phenomena such as phase transitions, loss of ergodicity, memory effects, and fluctuation inversion. These phenomena are shown to exist uniquely in deep networks, highlighting a fundamental difference between deep and shallow models. Lastly, we discuss the implication of the proposed theory for the practical problem of variational Bayesian inference.", "keywords": "stochastic gradient descent;stationary distribution;stochastic differential equation;phase transition", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Liu Ziyin;Hongchao Li;Masahito Ueda", "authorids": "~Liu_Ziyin1;~Hongchao_Li2;~Masahito_Ueda1", "gender": ";M;M", "homepage": "https://www.mit.edu/~ziyinl/;https://sites.google.com/view/condmat-hongchaoli;http://cat.phys.s.u-tokyo.ac.jp/index-e.html", "dblp": ";;", "google_scholar": "NpN9oRMAAAAJ;;https://scholar.google.co.jp/citations?user=Xpjx9CwAAAAJ", "orcid": ";;0000-0002-5367-1436", "linkedin": ";;", "or_profile": "~Liu_Ziyin1;~Hongchao_Li2;~Masahito_Ueda1", "aff": "Massachusetts Institute of Technology;;The University of Tokyo", "aff_domain": "mit.edu;;u-tokyo.ac.jp", "position": "Postdoc;;Full Professor", "bibtex": "@misc{\nanonymous2024noise,\ntitle={Noise Balance and Stationary Distribution of Stochastic Gradient Descent},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=AM1znjeo9l}\n}", "github": "", "project": "", "reviewers": "mL9n;56YC;HVit", "site": "https://openreview.net/forum?id=AM1znjeo9l", "pdf_size": 1351310, "rating": "5;5;5", "confidence": "4;4;4", "soundness": "2;2;2", "novelty": "3;3;3", "presentation": "3;2;2", "wc_summary": "58;56;78", "wc_strengths": "29;14;66", "wc_weaknesses": "389;209;239", "wc_questions": "22;467;99", "wc_limitations": "25;1;15", "wc_review": "523;747;497", "wc_reply_reviewers": "205;526;39", "wc_reply_authors": "563;830;45", "reply_reviewers": "2;3;1", "reply_authors": "4;4;2", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 64.0, 9.93310961716756 ], "wc_strengths_avg": [ 36.333333333333336, 21.853044537445015 ], "wc_weaknesses_avg": [ 279.0, 78.74007874011811 ], "wc_questions_avg": [ 196.0, 194.18719490910482 ], "wc_limitations_avg": [ 13.666666666666666, 9.843215373488935 ], "wc_review_avg": [ 589.0, 112.22596253392824 ], "wc_reply_reviewers_avg": [ 256.6666666666667, 202.14571202201864 ], "wc_reply_authors_avg": [ 479.3333333333333, 325.8898927893012 ], "reply_reviewers_avg": [ 2.0, 0.816496580927726 ], "reply_authors_avg": [ 3.3333333333333335, 0.9428090415820634 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:WnIBKvVv48MJ:scholar.google.com/&scioq=Noise+Balance+and+Stationary+Distribution+of+Stochastic+Gradient+Descent&hl=en&as_sdt=0,33", "gs_version_total": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Massachusetts Institute of Technology;University of Tokyo", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.u-tokyo.ac.jp", "aff_unique_abbr": "MIT;UTokyo", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;Japan" }, { "title": "Buffer of Thoughts: Thought-Augmented Reasoning with Large Language Models", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96264", "id": "ANO1i9JPtb", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ANO1i9JPtb", "openreview": "https://openreview.net/forum?id=ANO1i9JPtb", "poster": "/media/PosterPDFs/NeurIPS%202024/96264.png?t=1733059245.2183244", "project": "", "author_site": "Ling Yang, Zhaochen Yu, Tianjun Zhang, Shiyi Cao, Minkai Xu, Wentao Zhang, Joseph Gonzalez, Bin CUI", "tldr": "", "abstract": "We introduce Buffer of Thoughts (BoT), a novel and versatile thought-augmented reasoning approach for enhancing accuracy, efficiency and robustness of large language models (LLMs). Specifically, we propose meta-buffer to store a series of informative high-level thoughts, namely thought-template, distilled from the problem-solving processes across various tasks. Then for each problem, we retrieve a relevant thought-template and adaptively instantiate it with specific reasoning structures to conduct efficient reasoning. To guarantee the scalability and stability, we further propose buffer-manager to dynamically update the meta-buffer, thus enhancing the capacity of meta-buffer as more tasks are solved. We conduct extensive experiments on 10 challenging reasoning-intensive tasks, and achieve significant performance improvements over previous SOTA methods: 11\\% on Game of 24, 20\\% on Geometric Shapes and 51\\% on Checkmate-in-One. Further analysis demonstrate the superior generalization ability and model robustness of our BoT, while requiring only 12\\% of the cost of multi-query prompting methods (e.g., tree/graph of thoughts) on average. Code is available at: https://github.com/YangLing0818/buffer-of-thought-llm", "keywords": "Large Language Models", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Ling Yang;Zhaochen Yu;Tianjun Zhang;Shiyi Cao;Minkai Xu;Wentao Zhang;Joseph E. Gonzalez;Bin CUI", "authorids": "~Ling_Yang1;~Zhaochen_Yu2;~Tianjun_Zhang1;~Shiyi_Cao1;~Minkai_Xu1;~Wentao_Zhang1;~Joseph_E._Gonzalez1;~Bin_CUI2", "gender": "M;M;;F;M;M;M;M", "homepage": "https://yangling0818.github.io/;https://zhaochenyu0201.github.io;https://tianjunz.github.io;https://shiyicao.com/;https://minkaixu.com;http://eecs.berkeley.edu/~jegonzal;https://cuibinpku.github.io/index.html;https://zwt233.github.io/", "dblp": "01/24-6.html;;;;257/3355;61/8262;55/5031.html;41/3249-1.html", "google_scholar": "https://scholar.google.com.hk/citations?user=sIKujqAAAAAJ;9RNgZOIAAAAJ;UE9jz_MAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=gM2WW9UAAAAJ;IJAU8KoAAAAJ;JE4VON0AAAAJ", "orcid": "0000-0003-1905-8053;;;;;0000-0003-2921-956X;0000-0003-1681-4677;0000-0002-7532-5550", "linkedin": ";;;;;;;", "or_profile": "~Ling_Yang1;~Zhaochen_Yu2;~Tianjun_Zhang1;~Shiyi_Cao1;~Minkai_Xu1;~Joseph_E._Gonzalez1;~Bin_CUI2;~Zhang_wen_tao1", "aff": "Peking University;Peking University;University of California, Berkeley;University of California, Berkeley;Stanford University;University of California, Berkeley;Peking University;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;berkeley.edu;berkeley.edu;stanford.edu;berkeley.edu;pku.edu.cn;pku.edu.cn", "position": "PhD student;Intern;PhD student;PhD student;PhD student;Associate Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nyang2024buffer,\ntitle={Buffer of Thoughts: Thought-Augmented Reasoning with Large Language Models},\nauthor={Ling Yang and Zhaochen Yu and Tianjun Zhang and Shiyi Cao and Minkai Xu and Wentao Zhang and Joseph E. Gonzalez and Bin CUI},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ANO1i9JPtb}\n}", "github": "", "reviewers": "upCD;cwbi;yiEf;hZUQ", "pdf_size": 1595074, "rating": "6;7;7;8", "confidence": "4;4;4;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "104;86;97;195", "wc_strengths": "26;72;40;117", "wc_weaknesses": "36;288;55;147", "wc_questions": "244;86;2;71", "wc_limitations": "10;5;7;66", "wc_review": "420;537;201;596", "wc_reply_reviewers": "19;85;15;55", "wc_reply_authors": "77;184;0;41", "reply_reviewers": "1;1;1;1", "reply_authors": "2;3;1;2", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 120.5, 43.48850422812907 ], "wc_strengths_avg": [ 63.75, 34.97409755805001 ], "wc_weaknesses_avg": [ 131.5, 99.63056759850362 ], "wc_questions_avg": [ 100.75, 88.56459507049078 ], "wc_limitations_avg": [ 22.0, 25.465663156493687 ], "wc_review_avg": [ 438.5, 151.04386780005336 ], "wc_reply_reviewers_avg": [ 43.5, 28.578838324886476 ], "wc_reply_authors_avg": [ 75.5, 68.30995535059293 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 44, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2657077137471556160&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 5, "email": "pku.edu.cn;pku.edu.cn;berkeley.edu;berkeley.edu;stanford.edu;berkeley.edu;pku.edu.cn;pku.edu.cn", "author_num": 8, "aff_unique_index": "0;0;1;1;2;1;0;0", "aff_unique_norm": "Peking University;University of California, Berkeley;Stanford University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.pku.edu.cn;https://www.berkeley.edu;https://www.stanford.edu", "aff_unique_abbr": "Peking U;UC Berkeley;Stanford", "aff_campus_unique_index": "1;1;2;1", "aff_campus_unique": ";Berkeley;Stanford", "aff_country_unique_index": "0;0;1;1;1;1;0;0", "aff_country_unique": "China;United States" }, { "id": "AO5MjDuHpr", "title": "Tree of Attributes Prompt Learning for Vision-Language Models", "track": "main", "status": "Reject", "tldr": "", "abstract": "Prompt learning has proven effective in adapting vision language models for downstream tasks. However, existing methods usually append learnable prompt tokens solely with the category names to obtain textual features, which fails to fully leverage the rich context indicated in the textual category name. To address this issue, we propose the Tree of Attributes Prompt learning (TAP), which first instructs LLMs to generate a tree of attributes with a ``concept - attribute - description'' structure for each associated category name, and then learn the hierarchy with vision and text prompt tokens. Unlike existing methods that merely augment category names with a set of unstructured descriptions, our approach essentially distills structured knowledge graphs associated with class names from LLMs. Furthermore, our approach introduces text and vision prompts designed to explicitly learn the corresponding visual attributes, effectively serving as domain experts. Additionally, the general and diverse descriptions generated based on the class names may be wrong or absent in the specific given images. To address this misalignment, we further introduce a vision-conditional pooling module to extract instance-specific text features. Extensive experimental results demonstrate that our approach outperforms state-of-the-art methods on the zero-shot base-to-novel generalization as well as few-shot classification across 11 diverse datasets.", "keywords": "Few-shot learning;Prompt tuning;Vision-Language Model", "primary_area": "machine_vision", "supplementary_material": "", "author": "Tong Ding;Wanhua Li;Zhongqi Miao;Hanspeter Pfister", "authorids": "~Tong_Ding1;~Wanhua_Li1;~Zhongqi_Miao1;~Hanspeter_Pfister1", "gender": "M;M;;M", "homepage": "https://hhenryd.github.io/;https://li-wanhua.github.io/;;https://vcg.seas.harvard.edu", "dblp": ";189/8563-1;239/5123;p/HanspeterPfister", "google_scholar": "BEuV0lEAAAAJ;I03QnrsAAAAJ;;tvBEoaMAAAAJ", "orcid": ";;0000-0002-0439-8592;0000-0002-3620-2582", "linkedin": "tong-ding-/;;;hpfister/", "or_profile": "~Tong_Ding1;~Wanhua_Li1;~Zhongqi_Miao1;~Hanspeter_Pfister1", "aff": "Harvard University, Harvard University;Harvard University;Microsoft;Harvard University", "aff_domain": "g.harvard.edu;harvard.edu;microsoft.com;harvard.edu", "position": "PhD student;Postdoc;Researcher;Full Professor", "bibtex": "@misc{\nanonymous2024tree,\ntitle={Tree of Attributes Prompt Learning for Vision-Language Models},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=AO5MjDuHpr}\n}", "github": "", "project": "", "reviewers": "grEg;ZE3g;bYva;Vt6k;1xvn", "site": "https://openreview.net/forum?id=AO5MjDuHpr", "pdf_size": 1087650, "rating": "4;4;5;6;8", "confidence": "4;4;4;5;5", "soundness": "2;2;3;4;4", "novelty": "3;2;2;3;4", "presentation": "3;3;2;3;3", "wc_summary": "105;94;53;50;116", "wc_strengths": "57;33;30;101;87", "wc_weaknesses": "98;135;204;75;150", "wc_questions": "3;56;4;172;64", "wc_limitations": "1;1;1;47;2", "wc_review": "264;319;292;445;419", "wc_reply_reviewers": "20;120;7;16;489", "wc_reply_authors": "35;378;26;26;943", "reply_reviewers": "1;1;1;1;3", "reply_authors": "2;2;2;2;4", "rating_avg": [ 5.4, 1.4966629547095764 ], "confidence_avg": [ 4.4, 0.48989794855663565 ], "soundness_avg": [ 3.0, 0.8944271909999159 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 83.6, 27.13374283065276 ], "wc_strengths_avg": [ 61.6, 28.408449447303525 ], "wc_weaknesses_avg": [ 132.4, 44.54480890070133 ], "wc_questions_avg": [ 59.8, 61.580516399263814 ], "wc_limitations_avg": [ 10.4, 18.304097901836084 ], "wc_review_avg": [ 347.8, 71.39019540525155 ], "wc_reply_reviewers_avg": [ 130.4, 183.9593433343357 ], "wc_reply_authors_avg": [ 281.6, 357.27222114236645 ], "reply_reviewers_avg": [ 1.4, 0.8 ], "reply_authors_avg": [ 2.4, 0.8 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8728715609439694, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17976253356895398646&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Harvard University;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.harvard.edu;https://www.microsoft.com", "aff_unique_abbr": "Harvard;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "A New Neural Kernel Regime: The Inductive Bias of Multi-Task Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96263", "id": "APBq3KAmFa", "proceeding": "", "pdf": "https://openreview.net/pdf?id=APBq3KAmFa", "openreview": "https://openreview.net/forum?id=APBq3KAmFa", "poster": "", "project": "", "author_site": "Julia Nakhleh, Joseph Shenouda, Robert Nowak", "tldr": "", "abstract": "This paper studies the properties of solutions to multi-task shallow ReLU neural network learning problems, wherein the network is trained to fit a dataset with minimal sum of squared weights. Remarkably, the solutions learned for each individual task resemble those obtained by solving a kernel regression problem, revealing a novel connection between neural networks and kernel methods. It is known that single-task neural network learning problems are equivalent to a minimum norm interpolation problem in a non-Hilbertian Banach space, and that the solutions of such problems are generally non-unique. In contrast, we prove that the solutions to univariate-input, multi-task neural network interpolation problems are almost always unique, and coincide with the solution to a minimum-norm interpolation problem in a Sobolev (Reproducing Kernel) Hilbert Space. We also demonstrate a similar phenomenon in the multivariate-input case; specifically, we show that neural network learning problems with large numbers of tasks are approximately equivalent to an $\\ell^2$ (Hilbert space) minimization problem over a fixed kernel determined by the optimal neurons.", "keywords": "neural networks;weight decay;multitask learning;regularization;kernels", "primary_area": "learning_theory", "supplementary_material": "", "author": "Julia B Nakhleh;Joseph Shenouda;Robert D Nowak", "authorids": "~Julia_B_Nakhleh1;~Joseph_Shenouda1;~Robert_D_Nowak1", "gender": ";M;M", "homepage": ";https://joeshenouda.github.io/;http://nowak.ece.wisc.edu", "dblp": "276/0600;300/9012;n/RobertDNowak", "google_scholar": "https://scholar.google.com/citations?hl=en;GslaaDUAAAAJ;fn13u8IAAAAJ", "orcid": "0000-0002-0105-2924;;", "linkedin": ";joseph-shenouda-723231141;", "or_profile": "~Julia_B_Nakhleh1;~Joseph_Shenouda1;~Robert_D_Nowak1", "aff": "University of Wisconsin - Madison;University of Wisconsin - Madison;University of Wisconsin - Madison", "aff_domain": "wisc.edu;wisc.edu;", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nnakhleh2024a,\ntitle={A New Neural Kernel Regime: The Inductive Bias of Multi-Task Learning},\nauthor={Julia B Nakhleh and Joseph Shenouda and Robert D Nowak},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=APBq3KAmFa}\n}", "github": "", "reviewers": "Snxf;XF9Q;aewJ;WQUb", "pdf_size": 3294890, "rating": "3;5;6;6", "confidence": "5;3;4;3", "soundness": "3;2;3;3", "novelty": "3;2;3;2", "presentation": "3;3;3;3", "wc_summary": "66;86;175;94", "wc_strengths": "55;75;119;24", "wc_weaknesses": "163;291;446;158", "wc_questions": "26;213;1;1", "wc_limitations": "23;27;6;1", "wc_review": "333;692;747;278", "wc_reply_reviewers": "115;244;91;31", "wc_reply_authors": "667;822;33;0", "reply_reviewers": "1;2;1;1", "reply_authors": "3;3;2;1", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 105.25, 41.541395017500314 ], "wc_strengths_avg": [ 68.25, 34.47734763580284 ], "wc_weaknesses_avg": [ 264.5, 117.5680653919252 ], "wc_questions_avg": [ 60.25, 88.77886854426565 ], "wc_limitations_avg": [ 14.25, 10.985786271359915 ], "wc_review_avg": [ 512.5, 208.8187012697857 ], "wc_reply_reviewers_avg": [ 120.25, 77.7218598593729 ], "wc_reply_authors_avg": [ 380.5, 368.2869126102637 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.7385489458759963, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:6YfUIvYAJXwJ:scholar.google.com/&scioq=A+New+Neural+Kernel+Regime:+The+Inductive+Bias+of+Multi-Task+Learning&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": "wisc.edu;wisc.edu;", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Wisconsin-Madison", "aff_unique_dep": "", "aff_unique_url": "https://www.wisc.edu", "aff_unique_abbr": "UW-Madison", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Madison", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Optimized Feature Generation for Tabular Data via LLMs with Decision Tree Reasoning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96262", "id": "APSBwuMopO", "proceeding": "", "pdf": "https://openreview.net/pdf?id=APSBwuMopO", "openreview": "https://openreview.net/forum?id=APSBwuMopO", "poster": "/media/PosterPDFs/NeurIPS%202024/96262.png?t=1730187417.053583", "project": "", "author_site": "Jaehyun Nam, Kyuyoung Kim, Seunghyuk Oh, Jihoon Tack, Jaehyung Kim, Jinwoo Shin", "tldr": "", "abstract": "In tabular prediction tasks, tree-based models combined with automated feature engineering methods often outperform deep learning approaches that rely on learned representations. While these feature engineering techniques are effective, they typically depend on a pre-defined search space and primarily use validation scores for feature selection, thereby missing valuable insights from previous experiments.\nTo address these limitations, we propose a novel tabular learning framework that utilizes large language models (LLMs), termed Optimizing Column feature generator with decision Tree reasoning (OCTree). Our key idea is to leverage the reasoning capabilities of LLMs to identify effective feature generation rules without manually specifying the search space and provide language-based reasoning information highlighting past experiments as feedback for iterative rule improvements. We use decision trees to convey this reasoning information, as they can be easily represented in natural language, effectively providing knowledge from prior experiments (i.e., the impact of the generated features on performance) to the LLMs. Our empirical results demonstrate that OCTree consistently enhances the performance of various prediction models across diverse benchmarks, outperforming competing automated feature engineering methods. Code is available at https://github.com/jaehyun513/OCTree.", "keywords": "Tabular Learning;Large Language Models", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Jaehyun Nam;Kyuyoung Kim;Seunghyuk Oh;Jihoon Tack;Jaehyung Kim;Jinwoo Shin", "authorids": "~Jaehyun_Nam2;~Kyuyoung_Kim1;~Seunghyuk_Oh1;~Jihoon_Tack1;~Jaehyung_Kim1;~Jinwoo_Shin1", "gender": ";;M;M;M;", "homepage": "https://jaehyun513.github.io/;https://www.seunghyukoh.com;https://jihoontack.github.io;https://sites.google.com/view/jaehyungkim;https://sites.google.com/site/mijirim/;https://kykim0.github.io", "dblp": "162/5227;;267/5487;02/7206-1;31/7062;", "google_scholar": "https://scholar.google.com/citations?hl=en;IXJcR1gAAAAJ;eW8-OT4AAAAJ;https://scholar.google.co.kr/citations?user=6OYOsGsAAAAJ;https://scholar.google.com.tw/citations?user=m3eDp7kAAAAJ;9iLSqKAAAAAJ", "orcid": ";;;;;", "linkedin": ";seunghyuk-oh-570347197/;;;;", "or_profile": "~Jaehyun_Nam2;~Seunghyuk_Oh1;~Jihoon_Tack1;~Jaehyung_Kim1;~Jinwoo_Shin1;~Kyu-Young_Kim1", "aff": "Korea Advanced Institute of Science & Technology;KAIST;Meta FAIR;Carnegie Mellon University;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr;meta.com;andrew.cmu.edu;kaist.ac.kr;kaist.edu", "position": "PhD student;MS student;Intern;Postdoc;Full Professor;PhD student", "bibtex": "@inproceedings{\nnam2024optimized,\ntitle={Optimized Feature Generation for Tabular Data via {LLM}s with Decision Tree Reasoning},\nauthor={Jaehyun Nam and Kyuyoung Kim and Seunghyuk Oh and Jihoon Tack and Jaehyung Kim and Jinwoo Shin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=APSBwuMopO}\n}", "github": "", "reviewers": "245r;8DXj;Pmbp;6JM8", "pdf_size": 4498692, "rating": "3;4;6;6", "confidence": "3;4;5;4", "soundness": "2;2;2;3", "novelty": "1;2;3;3", "presentation": "1;3;3;3", "wc_summary": "84;78;105;27", "wc_strengths": "21;46;68;38", "wc_weaknesses": "123;92;457;45", "wc_questions": "115;55;2;274", "wc_limitations": "75;1;2;23", "wc_review": "418;272;634;407", "wc_reply_reviewers": "254;161;27;32", "wc_reply_authors": "847;661;52;44", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 4.75, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 73.5, 28.6574597618142 ], "wc_strengths_avg": [ 43.25, 16.90229274388537 ], "wc_weaknesses_avg": [ 179.25, 162.7457756748236 ], "wc_questions_avg": [ 111.5, 101.98161599033426 ], "wc_limitations_avg": [ 25.25, 30.036436206714004 ], "wc_review_avg": [ 432.75, 129.6367521191425 ], "wc_reply_reviewers_avg": [ 118.5, 94.8959957005563 ], "wc_reply_authors_avg": [ 401.0, 359.08425195210106 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16859781806532629718&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "kaist.ac.kr;kaist.ac.kr;meta.com;andrew.cmu.edu;kaist.ac.kr;kaist.edu", "author_num": 6, "aff_unique_index": "0;0;1;2;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Meta;Carnegie Mellon University", "aff_unique_dep": ";Meta Platforms, Inc.;", "aff_unique_url": "https://www.kaist.ac.kr;https://meta.com;https://www.cmu.edu", "aff_unique_abbr": "KAIST;Meta;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1;0;0", "aff_country_unique": "South Korea;United States" }, { "title": "Model Decides How to Tokenize: Adaptive DNA Sequence Tokenization with MxDNA", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96261", "id": "AQ1umQL7dZ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=AQ1umQL7dZ", "openreview": "https://openreview.net/forum?id=AQ1umQL7dZ", "poster": "/media/PosterPDFs/NeurIPS%202024/96261.png?t=1732074703.4246619", "project": "", "author_site": "Lifeng Qiao, Peng Ye, Yuchen Ren, Weiqiang Bai, Chaoqi Liang, Xinzhu Ma, Nanqing Dong, Wanli Ouyang", "tldr": "", "abstract": "Foundation models have made significant strides in understanding the genomic language of DNA sequences. However, previous models typically adopt the tokenization methods designed for natural language, which are unsuitable for DNA sequences due to their unique characteristics. In addition, the optimal approach to tokenize DNA remains largely under-explored, and may not be intuitively understood by humans even if discovered. To address these challenges, we introduce MxDNA, a novel framework where the model autonomously learns an effective DNA tokenization strategy through gradient decent. MxDNA employs a sparse Mixture of Convolution Experts coupled with a deformable convolution to model the tokenization process, with the discontinuous, overlapping, and ambiguous nature of meaningful genomic segments explicitly considered. On Nucleotide Transformer Benchmarks and Genomic Benchmarks, MxDNA demonstrates superior performance to existing methods with less pretraining data and time, highlighting its effectiveness. Finally, we show that MxDNA learns unique tokenization strategy distinct to those of previous methods and captures genomic functionalities at a token level during self-supervised pretraining. Our MxDNA aims to provide a new perspective on DNA tokenization, potentially offering broad applications in various domains and yielding profound insights. Code is available at https://github.com/qiaoqiaoLF/MxDNA.", "keywords": "genomics;tokenization;foundation models", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Lifeng Qiao;Peng Ye;Yuchen Ren;Weiqiang Bai;chaoqi liang;Xinzhu Ma;Nanqing Dong;Wanli Ouyang", "authorids": "~Lifeng_Qiao1;~Peng_Ye4;~Yuchen_Ren1;~Weiqiang_Bai1;~chaoqi_liang1;~Xinzhu_Ma1;~Nanqing_Dong1;~Wanli_Ouyang1", "gender": "M;M;;;M;M;;", "homepage": "https://github.com/qiaoqiaoLF;;;;https://github.com/ChaoqiLiang;https://github.com/xinzhuma;;", "dblp": "55/10318;53/930-6;;;320/0293;191/3902;198/1455;", "google_scholar": ";UEZZP5QAAAAJ;;;r1yke4EAAAAJ;8PuKa_8AAAAJ;0DX2YsQAAAAJ;", "orcid": ";0000-0002-8486-7562;;;;;;", "linkedin": ";;;;;;;", "or_profile": "~Lifeng_Qiao1;~Peng_Ye4;~Yuchen_Ren1;~Weiqiang_Bai1;~chaoqi_liang1;~Xinzhu_Ma1;~Nanqing_Dong1;~Wanli_Ouyang1", "aff": "Shanghai Jiaotong University;Fudan University;;;Harbin Institute of Technology;The Chinese University of Hong Kong;Shanghai Artificial Intelligence Laboratory;", "aff_domain": "sjtu.edu.cn;fudan.edu.cn;;;hit.edu.cn;cuhk.edu.hk;pjlab.org.cn;", "position": "Undergrad student;PhD student;;;PhD student;Postdoc;Assistant Professor;", "bibtex": "@inproceedings{\nqiao2024model,\ntitle={Model Decides How to Tokenize: Adaptive {DNA} Sequence Tokenization with Mx{DNA}},\nauthor={Lifeng Qiao and Peng Ye and Yuchen Ren and Weiqiang Bai and chaoqi liang and Xinzhu Ma and Nanqing Dong and Wanli Ouyang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=AQ1umQL7dZ}\n}", "github": "", "reviewers": "eT4M;RCJz;nT57;JPfb", "pdf_size": 3038600, "rating": "6;6;6;7", "confidence": "4;3;4;4", "soundness": "4;3;3;3", "novelty": "3;2;3;1", "presentation": "2;3;3;3", "wc_summary": "186;104;76;70", "wc_strengths": "125;152;45;144", "wc_weaknesses": "254;308;57;169", "wc_questions": "298;167;47;16", "wc_limitations": "63;18;53;94", "wc_review": "926;749;278;493", "wc_reply_reviewers": "141;20;26;10", "wc_reply_authors": "1228;424;184;194", "reply_reviewers": "2;1;1;1", "reply_authors": "4;4;3;3", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 109.0, 46.2709412050371 ], "wc_strengths_avg": [ 116.5, 42.42935304715357 ], "wc_weaknesses_avg": [ 197.0, 94.80770010922109 ], "wc_questions_avg": [ 132.0, 111.2002697838454 ], "wc_limitations_avg": [ 57.0, 27.120103244641236 ], "wc_review_avg": [ 611.5, 246.51622664644208 ], "wc_reply_reviewers_avg": [ 49.25, 53.27933464299268 ], "wc_reply_authors_avg": [ 507.5, 426.9153897436821 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.5, 0.5 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2510214521938316977&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "sjtu.edu.cn;fudan.edu.cn;;;hit.edu.cn;cuhk.edu.hk;pjlab.org.cn;", "author_num": 8, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "Shanghai Jiao Tong University;Fudan University;Harbin Institute of Technology;Chinese University of Hong Kong;Shanghai Artificial Intelligence Laboratory", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.fudan.edu.cn;http://www.hit.edu.cn/;https://www.cuhk.edu.hk;http://www.shailab.org/", "aff_unique_abbr": "SJTU;Fudan;HIT;CUHK;Shanghai AI Lab", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Harbin;Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "xLSTM: Extended Long Short-Term Memory", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96260", "id": "ARAxPPIAhq", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ARAxPPIAhq", "openreview": "https://openreview.net/forum?id=ARAxPPIAhq", "poster": "/media/PosterPDFs/NeurIPS%202024/96260.png?t=1733330725.300208", "project": "", "author_site": "Maximilian Beck, Korbinian P\u00f6ppel, Markus Spanring, Andreas Auer, Oleksandra Prudnikova, Michael Kopp, G\u00fcnter Klambauer, Johannes Brandstetter, Sepp Hochreiter", "tldr": "", "abstract": "In the 1990s, the constant error carousel and gating were introduced as the central ideas of the Long Short-Term Memory (LSTM). Since then, LSTMs have stood the test of time and contributed to numerous deep learning success stories, in particular they constituted the first Large Language Models (LLMs). However, the advent of the Transformer technology with parallelizable self-attention at its core marked the dawn of a new era, outpacing LSTMs at scale. We now raise a simple question: How far do we get in language modeling when scaling LSTMs to billions of parameters, leveraging the latest techniques from modern LLMs, but mitigating known limitations of LSTMs? Firstly, we introduce exponential gating with appropriate normalization and stabilization techniques. Secondly, we modify the LSTM memory structure, obtaining: (i) sLSTM with a scalar memory, a scalar update, and new memory mixing, (ii) mLSTM that is fully parallelizable with a matrix memory and a covariance update rule. Integrating these LSTM extensions into residual block backbones yields xLSTM blocks that are then residually stacked into xLSTM architectures. Exponential gating and modified memory structures boost xLSTM capabilities to perform favorably when compared to state-of-the-art Transformers and State Space Models, both in performance and scaling.", "keywords": "LSTM;LLM;Language modeling;NLP;Memory", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/18ca2452f8248a0a7a0f83255e2a0d37a4576dab.zip", "author": "Maximilian Beck;Korbinian P\u00f6ppel;Markus Spanring;Andreas Auer;Oleksandra Prudnikova;Michael K Kopp;G\u00fcnter Klambauer;Johannes Brandstetter;Sepp Hochreiter", "authorids": "~Maximilian_Beck1;~Korbinian_P\u00f6ppel1;~Markus_Spanring1;~Andreas_Auer2;~Oleksandra_Prudnikova1;~Michael_K_Kopp1;~G\u00fcnter_Klambauer1;~Johannes_Brandstetter1;~Sepp_Hochreiter1", "gender": "M;M;M;M;F;M;M;M;M", "homepage": "http://maxbeck.ai;https://korbi.ai;;https://apointa.github.io/;;;http://www.bioinf.jku.at/people/klambauer/;;https://www.jku.at/en/institute-for-machine-learning/about-us/team/sepp-hochreiter/", "dblp": ";;;;;;119/4499;251/8691;h/SeppHochreiter.html", "google_scholar": "_YcZWcYAAAAJ;sBrtrxwAAAAJ;IcPmZIoAAAAJ;Rg_Ooc8AAAAJ;;DCwTo40AAAAJ;https://scholar.google.at/citations?user=rb2AvxIAAAAJ;KiRvOHcAAAAJ;https://scholar.google.at/citations?user=tvUH3WMAAAAJ", "orcid": ";;;;;0000-0002-1385-1109;0000-0003-2861-5552;;0000-0001-7449-2528", "linkedin": "maximilianmbeck/;;https://at.linkedin.com/in/markus-spanring-a2a1041a0;andreas-auer-cs/;prudnikova-oleksandra/;michael-kopp-95931490;;;https://linkedin.com/in/sepp-hochreiter-41514846", "or_profile": "~Maximilian_Beck1;~Korbinian_P\u00f6ppel1;~Markus_Spanring1;~Andreas_Auer2;~Oleksandra_Prudnikova1;~Michael_K_Kopp1;~G\u00fcnter_Klambauer1;~Johannes_Brandstetter1;~Sepp_Hochreiter1", "aff": "Johannes Kepler University Linz;Johannes Kepler Universit\u00e4t Linz;Johannes Kepler Universit\u00e4t Linz;Amazon;Johannes Kepler Universit\u00e4t Linz;Nirmata Research LLP;Johannes Kepler Universit\u00e4t Linz;Microsoft;Johannes Kepler University Linz", "aff_domain": "jku.at;jku.at;jku.at;amazon.com;jku.at;nirmata.tech;jku.at;microsoft.com;jku.at", "position": "PhD student;PhD student;Postdoc;Intern;PhD student;Principal Researcher;Full Professor;Researcher;Full Professor", "bibtex": "@inproceedings{\nbeck2024xlstm,\ntitle={x{LSTM}: Extended Long Short-Term Memory},\nauthor={Maximilian Beck and Korbinian P{\\\"o}ppel and Markus Spanring and Andreas Auer and Oleksandra Prudnikova and Michael K Kopp and G{\\\"u}nter Klambauer and Johannes Brandstetter and Sepp Hochreiter},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ARAxPPIAhq}\n}", "github": "", "reviewers": "WcbD;TAMc;ZvGL", "pdf_size": 1583874, "rating": "7;7;10", "confidence": "5;4;4", "soundness": "3;3;4", "novelty": "4;3;4", "presentation": "3;3;4", "wc_summary": "90;203;71", "wc_strengths": "27;88;191", "wc_weaknesses": "90;151;25", "wc_questions": "920;13;350", "wc_limitations": "1;24;16", "wc_review": "1128;479;653", "wc_reply_reviewers": "240;117;10", "wc_reply_authors": "354;145;0", "reply_reviewers": "2;2;1", "reply_authors": "3;2;1", "rating_avg": [ 8.0, 1.4142135623730951 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 121.33333333333333, 58.265675048770255 ], "wc_strengths_avg": [ 102.0, 67.68062253456795 ], "wc_weaknesses_avg": [ 88.66666666666667, 51.44792404838984 ], "wc_questions_avg": [ 427.6666666666667, 374.33170079786487 ], "wc_limitations_avg": [ 13.666666666666666, 9.533566430716728 ], "wc_review_avg": [ 753.3333333333334, 274.2873594527381 ], "wc_reply_reviewers_avg": [ 122.33333333333333, 93.97280930614392 ], "wc_reply_authors_avg": [ 166.33333333333334, 145.30504311810913 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 178, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8245879391939683926&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 6, "email": "jku.at;jku.at;jku.at;amazon.com;jku.at;nirmata.tech;jku.at;microsoft.com;jku.at", "author_num": 9, "aff_unique_index": "0;1;1;2;1;3;1;4;0", "aff_unique_norm": "Johannes Kepler University;Johannes Kepler University Linz;Amazon;Nirmata Research LLP;Microsoft", "aff_unique_dep": ";;Amazon.com, Inc.;;Microsoft Corporation", "aff_unique_url": "https://www.jku.at;https://www.jku.at;https://www.amazon.com;;https://www.microsoft.com", "aff_unique_abbr": "JKU;JKU;Amazon;;Microsoft", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Linz;", "aff_country_unique_index": "0;0;0;1;0;1;0;1;0", "aff_country_unique": "Austria;United States" }, { "title": "Improving Neural ODE Training with Temporal Adaptive Batch Normalization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96259", "id": "ARLEUVVfTL", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ARLEUVVfTL", "openreview": "https://openreview.net/forum?id=ARLEUVVfTL", "poster": "/media/PosterPDFs/NeurIPS%202024/96259.png?t=1730170275.780775", "project": "", "author_site": "Su Zheng, Zhengqi Gao, Fan-Keng Sun, Duane Boning, Bei Yu, Martin D. Wong", "tldr": "", "abstract": "Neural ordinary differential equations (Neural ODEs) is a family of continuous-depth neural networks where the evolution of hidden states is governed by learnable temporal derivatives. We identify a significant limitation in applying traditional Batch Normalization (BN) to Neural ODEs, due to a fundamental mismatch --- BN was initially designed for discrete neural networks with no temporal dimension, whereas Neural ODEs operate continuously over time. To bridge this gap, we introduce temporal adaptive Batch Normalization (TA-BN), a novel technique that acts as the continuous-time analog to traditional BN. Our empirical findings reveal that TA-BN enables the stacking of more layers within Neural ODEs, enhancing their performance. Moreover, when confined to a model architecture consisting of a single Neural ODE followed by a linear layer, TA-BN achieves 91.1\\% test accuracy on CIFAR-10 with 2.2 million parameters, making it the first \\texttt{unmixed} Neural ODE architecture to approach MobileNetV2-level parameter efficiency. Extensive numerical experiments on image classification and physical system modeling substantiate the superiority of TA-BN compared to baseline methods.", "keywords": "Neural ODE;Batch Normalization", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/13a50fceeb4b0b290859290010438704ab24ce67.zip", "author": "Su Zheng;Zhengqi Gao;Fan-Keng Sun;Duane S Boning;Bei Yu;Martin D. Wong", "authorids": "~Su_Zheng2;~Zhengqi_Gao1;~Fan-Keng_Sun1;~Duane_S_Boning1;~Bei_Yu2;~Martin_D._Wong1", "gender": "Non-Binary;M;;M;M;M", "homepage": "https://shelljane.github.io/;http://zhengqigao.github.io/;https://daikon-sun.github.io/;https://boning.mit.edu/;http://www.cse.cuhk.edu.hk/~byu/index.html;https://www.ece.illinois.edu/directory/profile/mdfwong/", "dblp": ";256/9403;https://dblp.org/pers/hd/s/Sun:Fan=Keng;26/1132;28/4556-1.html;", "google_scholar": "imuy2mcAAAAJ;igvvVY4AAAAJ;sfEwE4gAAAAJ;https://scholar.google.com.tw/citations?user=oIdI_PcAAAAJ;tGneTm4AAAAJ;https://scholar.google.com.tw/citations?user=WPhoQiUAAAAJ", "orcid": ";;;0000-0002-0417-445X;0000-0001-6406-4810;", "linkedin": ";zhengqi-gao-729b51146/;fan-keng-sun/;;yubei/;", "or_profile": "~Su_Zheng2;~Zhengqi_Gao1;~Fan-Keng_Sun1;~Duane_S_Boning1;~Bei_Yu2;~Martin_D._Wong1", "aff": "Department of Computer Science and Engineering, The Chinese University of Hong Kong;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Department of Computer Science and Engineering, The Chinese University of Hong Kong;University of Illinois, Urbana Champaign", "aff_domain": "cse.cuhk.edu.hk;mit.edu;mit.edu;mit.edu;cse.cuhk.edu.hk;", "position": "PhD student;PhD student;Ph.D.;Full Professor;Associate Professor;", "bibtex": "@inproceedings{\nzheng2024improving,\ntitle={Improving Neural {ODE} Training with Temporal Adaptive Batch Normalization},\nauthor={Su Zheng and Zhengqi Gao and Fan-Keng Sun and Duane S Boning and Bei Yu and Martin D. Wong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ARLEUVVfTL}\n}", "github": "", "reviewers": "97fY;EgcE;6xoP;XMSL", "pdf_size": 661034, "rating": "4;6;6;7", "confidence": "3;4;4;4", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "3;3;3;4", "wc_summary": "88;30;76;151", "wc_strengths": "33;69;48;34", "wc_weaknesses": "63;184;265;215", "wc_questions": "34;217;5;151", "wc_limitations": "15;39;6;28", "wc_review": "233;539;400;579", "wc_reply_reviewers": "74;401;21;83", "wc_reply_authors": "163;471;13;61", "reply_reviewers": "1;4;1;1", "reply_authors": "2;4;2;2", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 86.25, 43.19939235683761 ], "wc_strengths_avg": [ 46.0, 14.543039572248986 ], "wc_weaknesses_avg": [ 181.75, 74.40220090830647 ], "wc_questions_avg": [ 101.75, 86.10857971189631 ], "wc_limitations_avg": [ 22.0, 12.549900398011133 ], "wc_review_avg": [ 437.75, 135.6012075904931 ], "wc_reply_reviewers_avg": [ 144.75, 149.83052926556724 ], "wc_reply_authors_avg": [ 177.0, 178.1740721878467 ], "reply_reviewers_avg": [ 1.75, 1.299038105676658 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.9271726499455306, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11530128384139872976&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "cse.cuhk.edu.hk;mit.edu;mit.edu;mit.edu;cse.cuhk.edu.hk;", "author_num": 6, "aff_unique_index": "0;1;1;1;0;2", "aff_unique_norm": "Chinese University of Hong Kong;Massachusetts Institute of Technology;University of Illinois Urbana-Champaign", "aff_unique_dep": "Department of Computer Science and Engineering;;", "aff_unique_url": "https://www.cuhk.edu.hk;https://web.mit.edu;https://illinois.edu", "aff_unique_abbr": "CUHK;MIT;UIUC", "aff_campus_unique_index": "0;0;2", "aff_campus_unique": "Hong Kong SAR;;Urbana-Champaign", "aff_country_unique_index": "0;1;1;1;0;1", "aff_country_unique": "China;United States" }, { "title": "Persistent Homology for High-dimensional Data Based on Spectral Methods", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96258", "id": "ARV1gJSOzV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ARV1gJSOzV", "openreview": "https://openreview.net/forum?id=ARV1gJSOzV", "poster": "/media/PosterPDFs/NeurIPS%202024/96258.png?t=1733422446.0059497", "project": "", "author_site": "Sebastian Damrich, Philipp Berens, Dmitry Kobak", "tldr": "", "abstract": "Persistent homology is a popular computational tool for analyzing the topology of point clouds, such as the presence of loops or voids. However, many real-world datasets with low intrinsic dimensionality reside in an ambient space of much higher dimensionality. We show that in this case traditional persistent homology becomes very sensitive to noise and fails to detect the correct topology. The same holds true for existing refinements of persistent homology. As a remedy, we find that spectral distances on the k-nearest-neighbor graph of the data, such as diffusion distance and effective resistance, allow to detect the correct topology even in the presence of high-dimensional noise. Moreover, we derive a novel closed-form formula for effective resistance, and describe its relation to diffusion distances. Finally, we apply these methods to high-dimensional single-cell RNA-sequencing data and show that spectral distances allow robust detection of cell cycle loops.", "keywords": "persistent homology;spectral methods-sequencing;topology;topological data analysis;curse of dimensionality;effective resistance;diffusion distance;single-cell RNA", "primary_area": "other", "supplementary_material": "", "author": "Sebastian Damrich;Philipp Berens;Dmitry Kobak", "authorids": "~Sebastian_Damrich1;~Philipp_Berens1;~Dmitry_Kobak2", "gender": ";M;", "homepage": ";http://www.berenslab.org;https://dkobak.github.io/", "dblp": "252/5237;78/3560;236/5191", "google_scholar": "-ClpooYAAAAJ;https://scholar.google.de/citations?user=lPQLk3QAAAAJ;BUQbD5kAAAAJ", "orcid": "0000-0003-1394-6236;;", "linkedin": "sebastian-damrich-a44a8131;;", "or_profile": "~Sebastian_Damrich1;~Philipp_Berens1;~Dmitry_Kobak2", "aff": "Eberhard-Karls-Universit\u00e4t T\u00fcbingen;University of Tuebingen;Eberhard-Karls-Universit\u00e4t T\u00fcbingen", "aff_domain": "uni-tuebingen.de;uni-tuebingen.de;uni-tuebingen.de", "position": "Postdoc;Full Professor;Researcher", "bibtex": "@inproceedings{\ndamrich2024persistent,\ntitle={Persistent Homology for High-dimensional Data Based on Spectral Methods},\nauthor={Sebastian Damrich and Philipp Berens and Dmitry Kobak},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ARV1gJSOzV}\n}", "github": "", "reviewers": "aHMb;C1nu;QsUc", "pdf_size": 4621870, "rating": "5;6;7", "confidence": "4;3;4", "soundness": "2;3;3", "novelty": "2;2;4", "presentation": "3;3;3", "wc_summary": "61;65;59", "wc_strengths": "20;28;61", "wc_weaknesses": "232;27;131", "wc_questions": "158;138;73", "wc_limitations": "115;12;49", "wc_review": "586;270;373", "wc_reply_reviewers": "365;23;11", "wc_reply_authors": "1295;0;0", "reply_reviewers": "2;1;1", "reply_authors": "5;1;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.9428090415820634 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 61.666666666666664, 2.494438257849294 ], "wc_strengths_avg": [ 36.333333333333336, 17.745108872274887 ], "wc_weaknesses_avg": [ 130.0, 83.69388667439615 ], "wc_questions_avg": [ 123.0, 36.2859017617954 ], "wc_limitations_avg": [ 58.666666666666664, 42.60151275352659 ], "wc_review_avg": [ 409.6666666666667, 131.58605127021968 ], "wc_reply_reviewers_avg": [ 133.0, 164.12190591142914 ], "wc_reply_authors_avg": [ 431.6666666666667, 610.468854424386 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 1.8856180831641267 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14355986058280832432&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "uni-tuebingen.de;uni-tuebingen.de;uni-tuebingen.de", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Eberhard Karls University of T\u00fcbingen;University of Tuebingen", "aff_unique_dep": ";", "aff_unique_url": "https://www.uni-tuebingen.de/;https://www.uni-tuebingen.de/", "aff_unique_abbr": "Uni T\u00fcbingen;Uni T\u00fcbingen", "aff_campus_unique_index": "0;0", "aff_campus_unique": "T\u00fcbingen;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "id": "ASA2jdKtf3", "title": "A Causal Model of Theory-of-Mind in AI Agents", "track": "main", "status": "Reject", "tldr": "", "abstract": "Agency is a vital concept for understanding and predicting the behaviour of future AI systems. There has been much focus on the goal-directed nature of agency, i.e., the fact that AI agents may capably pursue goals. However, the dynamics of agency become significantly more complex when autonomous agents interact with other agents and humans, necessitating engagement in theory-of-mind, the ability to reason about the beliefs and intentions of others. In this paper, we extend the framework of multi-agent influence diagrams (MAIDs) to explicitly capture this complex form of reasoning. We also show that our extended framework, MAIDs with incomplete information (II-MAIDs), has a strong theoretical connection to dynamic games with incomplete information with no common prior over types. We prove the existence of important equilibria concepts in these frameworks, and illustrate the applicability of II-MAIDs using an example from the AI safety literature.", "keywords": "causality;games with incomplete information;causal inference;multi-agent influence diagrams;game theory", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Jack Foxabbott;Rohan Subramani;James Fox;Francis Rhys Ward", "authorids": "~Jack_Foxabbott1;~Rohan_Subramani1;~James_Fox2;~Francis_Rhys_Ward1", "gender": "M;M;M;M", "homepage": ";;;https://francisrhysward.wordpress.com/", "dblp": ";;;273/0874", "google_scholar": ";;hMZs5tsAAAAJ;", "orcid": ";;0000-0003-1485-2995;", "linkedin": "foxabbott/;rohan-subramani-70a919225/;;", "or_profile": "~Jack_Foxabbott1;~Rohan_Subramani1;~James_Fox2;~Francis_Rhys_Ward1", "aff": "University of Oxford;Columbia University;Department of Computer Science;Imperial College London", "aff_domain": "stats.ox.ac.uk;columbia.edu;cs.ox.ac.uk;ic.ac.uk", "position": "PhD student;Undergrad student;PhD student;PhD student", "bibtex": "@misc{\nanonymous2024a,\ntitle={A Causal Model of Theory-of-Mind in {AI} Agents},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=ASA2jdKtf3}\n}", "github": "", "project": "", "reviewers": "vD8C;VEtx;wW2F", "site": "https://openreview.net/forum?id=ASA2jdKtf3", "pdf_size": 464854, "rating": "3;4;4", "confidence": "2;3;3", "soundness": "2;3;3", "novelty": "2;1;2", "presentation": "2;3;2", "wc_summary": "57;86;47", "wc_strengths": "31;136;73", "wc_weaknesses": "104;324;297", "wc_questions": "43;40;86", "wc_limitations": "3;75;1", "wc_review": "238;661;504", "wc_reply_reviewers": "42;247;187", "wc_reply_authors": "67;268;595", "reply_reviewers": "1;1;2", "reply_authors": "2;2;3", "rating_avg": [ 3.6666666666666665, 0.4714045207910317 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 63.333333333333336, 16.539514973407037 ], "wc_strengths_avg": [ 80.0, 43.15089802078283 ], "wc_weaknesses_avg": [ 241.66666666666666, 97.96711466382766 ], "wc_questions_avg": [ 56.333333333333336, 21.01322334996598 ], "wc_limitations_avg": [ 26.333333333333332, 34.4222150491349 ], "wc_review_avg": [ 467.6666666666667, 174.58967768901906 ], "wc_reply_reviewers_avg": [ 158.66666666666666, 86.05553762283724 ], "wc_reply_authors_avg": [ 310.0, 217.5913601225931 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.9999999999999998, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:cUJ9xja5JhoJ:scholar.google.com/&scioq=A+Causal+Model+of+Theory-of-Mind+in+AI+Agents&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "University of Oxford;Columbia University;Unknown Institution;Imperial College London", "aff_unique_dep": ";;Department of Computer Science;", "aff_unique_url": "https://www.ox.ac.uk;https://www.columbia.edu;;https://www.imperial.ac.uk", "aff_unique_abbr": "Oxford;Columbia;;ICL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United Kingdom;United States;" }, { "title": "4-bit Shampoo for Memory-Efficient Network Training", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96257", "id": "ASqdVeifn7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ASqdVeifn7", "openreview": "https://openreview.net/forum?id=ASqdVeifn7", "poster": "/media/PosterPDFs/NeurIPS%202024/96257.png?t=1731677216.3148854", "project": "", "author_site": "Sike Wang, Pan Zhou, Jia Li, Hua Huang", "tldr": "", "abstract": "Second-order optimizers, maintaining a matrix termed a preconditioner, are superior to first-order optimizers in both theory and practice.\nThe states forming the preconditioner and its inverse root restrict the maximum size of models trained by second-order optimizers. To address this, compressing 32-bit optimizer states to lower bitwidths has shown promise in reducing memory usage. However, current approaches only pertain to first-order optimizers. In this paper, we propose the first 4-bit second-order optimizers, exemplified by 4-bit Shampoo, maintaining performance similar to that of 32-bit ones. We show that quantizing the eigenvector matrix of the preconditioner in 4-bit Shampoo is remarkably better than quantizing the preconditioner itself both theoretically and experimentally. By rectifying the orthogonality of the quantized eigenvector matrix, we enhance the approximation of the preconditioner's eigenvector matrix, which also benefits the computation of its inverse 4-th root. Besides, we find that linear square quantization slightly outperforms dynamic tree quantization when quantizing second-order optimizer states. Evaluation on various networks for image classification and natural language modeling demonstrates that our 4-bit Shampoo achieves comparable performance to its 32-bit counterpart while being more memory-efficient.", "keywords": "memory efficient;second-order optimizer;Shampoo;quantization", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Sike Wang;Pan Zhou;Jia Li;Hua Huang", "authorids": "~Sike_Wang2;~Pan_Zhou3;~Jia_Li7;~Hua_Huang1", "gender": "M;;M;M", "homepage": "https://vmcl.bnu.edu.cn/group/student/f73d94ea9e104f73adde9fe5207b0c8c.htm;;https://li-jia.github.io/;https://vmcl.bnu.edu.cn/group/teacher/teacher01.html", "dblp": ";;23/6950-2;70/5618-1", "google_scholar": ";;;", "orcid": ";;;0000-0003-2587-1702", "linkedin": ";;;", "or_profile": "~Sike_Wang2;~Pan_Zhou3;~Jia_Li7;~Hua_Huang1", "aff": "Beijing Normal University;;Beijing Normal University;Beijing Normal University", "aff_domain": "bnu.edu.cn;;bnu.edu.cn;bnu.edu.cn", "position": "MS student;;Lecturer;Full Professor", "bibtex": "@inproceedings{\nwang2024bit,\ntitle={4-bit Shampoo for Memory-Efficient Network Training},\nauthor={Sike Wang and Pan Zhou and Jia Li and Hua Huang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ASqdVeifn7}\n}", "github": "", "reviewers": "e8nk;b2wM;RMP9;LFix", "pdf_size": 791078, "rating": "5;6;7;7", "confidence": "4;3;4;5", "soundness": "2;4;3;4", "novelty": "2;3;3;4", "presentation": "2;4;3;3", "wc_summary": "51;84;23;143", "wc_strengths": "45;56;74;61", "wc_weaknesses": "292;43;201;25", "wc_questions": "7;5;121;118", "wc_limitations": "26;34;43;1", "wc_review": "421;222;462;348", "wc_reply_reviewers": "32;0;491;12", "wc_reply_authors": "23;0;1001;0", "reply_reviewers": "1;0;4;1", "reply_authors": "2;1;5;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 75.25, 44.6787141712919 ], "wc_strengths_avg": [ 59.0, 10.41633332799983 ], "wc_weaknesses_avg": [ 140.25, 111.19661640535651 ], "wc_questions_avg": [ 62.75, 56.7643153750664 ], "wc_limitations_avg": [ 26.0, 15.636495771111889 ], "wc_review_avg": [ 363.25, 91.2013568978006 ], "wc_reply_reviewers_avg": [ 133.75, 206.5748956189982 ], "wc_reply_authors_avg": [ 256.0, 430.228427698589 ], "reply_reviewers_avg": [ 1.5, 1.5 ], "reply_authors_avg": [ 2.25, 1.6393596310755 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.42640143271122083, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17955272453759961441&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "bnu.edu.cn;;bnu.edu.cn;bnu.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Beijing Normal University", "aff_unique_dep": "", "aff_unique_url": "https://www.bnu.edu.cn", "aff_unique_abbr": "BNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "GoMatching: A Simple Baseline for Video Text Spotting via Long and Short Term Matching", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96256", "id": "ASv9lQcHCc", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ASv9lQcHCc", "openreview": "https://openreview.net/forum?id=ASv9lQcHCc", "poster": "/media/PosterPDFs/NeurIPS%202024/96256.png?t=1730104584.977147", "project": "", "author_site": "Haibin He, Maoyuan Ye, Jing Zhang, Juhua Liu, Bo Du, Dacheng Tao", "tldr": "", "abstract": "Beyond the text detection and recognition tasks in image text spotting, video text spotting presents an augmented challenge with the inclusion of tracking. While advanced end-to-end trainable methods have shown commendable performance, the pursuit of multi-task optimization may pose the risk of producing sub-optimal outcomes for individual tasks. In this paper, we identify a main bottleneck in the state-of-the-art video text spotter: the limited recognition capability. In response to this issue, we propose to efficiently turn an off-the-shelf query-based image text spotter into a specialist on video and present a simple baseline termed GoMatching, which focuses the training efforts on tracking while maintaining strong recognition performance. To adapt the image text spotter to video datasets, we add a rescoring head to rescore each detected instance's confidence via efficient tuning, leading to a better tracking candidate pool. \nAdditionally, we design a long-short term matching module, termed LST-Matcher, to enhance the spotter's tracking capability by integrating both long- and short-term matching results via Transformer. Based on the above simple designs, GoMatching delivers new records on ICDAR15-video, DSText, BOVText, and our proposed novel test set with arbitrary-shaped text termed ArTVideo, which demonstates GoMatching's capability to accommodate general, dense, small, arbitrary-shaped, Chinese and English text scenarios while saving considerable training budgets. The code will be released.", "keywords": "video text spotting;efficient tuning;long and short term matching", "primary_area": "machine_vision", "supplementary_material": "/attachment/60e3cc32cf3555ed9fe70230139d0b0af3344253.zip", "author": "Haibin He;Maoyuan Ye;Jing Zhang;Juhua Liu;Bo Du;Dacheng Tao", "authorids": "~Haibin_He1;~Maoyuan_Ye1;~Jing_Zhang17;~Juhua_Liu2;~Bo_Du3;~Dacheng_Tao1", "gender": "M;M;M;M;;", "homepage": "https://github.com/loptr777;;;http://jszy.whu.edu.cn/liujuhua1/zh_CN/index.htm;;", "dblp": "287/7299-1.html;324/2398;05/3499-37.html;122/1682;;", "google_scholar": "https://scholar.google.com.hk/citations?user=A37-0EwAAAAJ;Xy8cr_4AAAAJ;https://scholar.google.com/citations?hl=en;wN-rIgIAAAAJ;;", "orcid": "0009-0001-2004-150X;;0000-0001-6595-7661;0000-0002-3907-8820;;", "linkedin": ";;;;;", "or_profile": "~Haibin_He1;~Maoyuan_Ye1;~Jing_Zhang17;~Juhua_Liu2;~Bo_Du3;~Dacheng_Tao1", "aff": "Wuhan University;Wuhan University;The University of Sydney;Wuhan University;;", "aff_domain": "whu.edu.cn;whu.edu.cn;sydney.edu.au;whu.edu.cn;;", "position": "PhD student;MS student;Research Fellow;Full Professor;;", "bibtex": "@inproceedings{\nhe2024gomatching,\ntitle={GoMatching: A Simple Baseline for Video Text Spotting via Long and Short Term Matching},\nauthor={Haibin He and Maoyuan Ye and Jing Zhang and Juhua Liu and Bo Du and Dacheng Tao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ASv9lQcHCc}\n}", "github": "", "reviewers": "xYhx;1HqK;BgWx;Lmjj", "pdf_size": 0, "rating": "4;4;6;6", "confidence": "4;2;5;3", "soundness": "3;2;3;3", "novelty": "2;2;3;2", "presentation": "2;3;3;3", "wc_summary": "44;78;43;65", "wc_strengths": "43;87;82;21", "wc_weaknesses": "223;123;83;204", "wc_questions": "23;38;2;10", "wc_limitations": "9;11;13;10", "wc_review": "342;337;223;310", "wc_reply_reviewers": "257;38;18;66", "wc_reply_authors": "564;66;0;192", "reply_reviewers": "2;1;1;1", "reply_authors": "3;2;1;2", "rating_avg": [ 5.0, 1.0 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 57.5, 14.739402972983676 ], "wc_strengths_avg": [ 58.25, 27.43515081059333 ], "wc_weaknesses_avg": [ 158.25, 57.42549520900973 ], "wc_questions_avg": [ 18.25, 13.645054048995188 ], "wc_limitations_avg": [ 10.75, 1.479019945774904 ], "wc_review_avg": [ 303.0, 47.765049984271975 ], "wc_reply_reviewers_avg": [ 94.75, 95.21390392164372 ], "wc_reply_authors_avg": [ 205.5, 218.17137759110383 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.4472135954999579, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14199134696654472881&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "whu.edu.cn;whu.edu.cn;sydney.edu.au;whu.edu.cn;;", "author_num": 6, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Wuhan University;University of Sydney", "aff_unique_dep": ";", "aff_unique_url": "http://www.whu.edu.cn/;https://www.sydney.edu.au", "aff_unique_abbr": "WHU;USYD", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "China;Australia" }, { "title": "Optimal Batched Best Arm Identification", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96255", "id": "ATSPPGEmAA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ATSPPGEmAA", "openreview": "https://openreview.net/forum?id=ATSPPGEmAA", "poster": "", "project": "", "author_site": "Tianyuan Jin, Yu Yang, Jing Tang, Xiaokui Xiao, Pan Xu", "tldr": "", "abstract": "We study the batched best arm identification (BBAI) problem, where the learner's goal is to identify the best arm while switching the policy as less as possible. In particular, we aim to find the best arm with probability $1-\\delta$ for some small constant $\\delta>0$ while minimizing both the sample complexity (total number of arm pulls) and the batch complexity (total number of batches). We propose the three-batch best arm identification (Tri-BBAI) algorithm, which is the first batched algorithm that achieves the optimal sample complexity in the asymptotic setting (i.e., $\\delta\\rightarrow 0$) and runs in $3$ batches in expectation. Based on Tri-BBAI, we further propose the almost optimal batched best arm identification (Opt-BBAI) algorithm, which is the first algorithm that achieves the near-optimal sample and batch complexity in the non-asymptotic setting (i.e., $1/\\delta$ is finite), while enjoying the same batch and sample complexity as Tri-BBAI when $\\delta$ tends to zero. Moreover, in the non-asymptotic setting, the complexity of previous batch algorithms is usually conditioned on the event that the best arm is returned (with a probability of at least $1-\\delta$), which is potentially unbounded in cases where a sub-optimal arm is returned. In contrast, the complexity of Opt-BBAI does not rely on such an event. This is achieved through a novel procedure that we design for checking whether the best arm is eliminated, which is of independent interest.", "keywords": "Best arm identification;Batched bandits", "primary_area": "bandits", "supplementary_material": "/attachment/a4fbe9c7764ca46ca191faab8bace1e51ffca9fd.zip", "author": "Tianyuan Jin;Yu Yang;Jing Tang;Xiaokui Xiao;Pan Xu", "authorids": "~Tianyuan_Jin1;~Yu_Yang15;~Jing_Tang5;~Xiaokui_Xiao2;~Pan_Xu1", "gender": "M;M;M;;M", "homepage": "https://tianyuanjin.github.io/;;https://sites.google.com/view/jtang;;https://panxulab.github.io/", "dblp": "208/2335;;83/663-4;;11/9718-2", "google_scholar": "3e5kmjsAAAAJ;https://scholar.google.com.hk/citations?user=sC2yg2gAAAAJ;https://scholar.google.com/citations?hl=en;;UkYBx6YAAAAJ", "orcid": ";;0000-0002-0785-707X;;0000-0002-2559-8622", "linkedin": ";;;;pan-xu-0931a2a6/", "or_profile": "~Tianyuan_Jin1;~Yu_Yang15;~Jing_Tang5;~Xiaokui_Xiao2;~Pan_Xu1", "aff": "National University of Singapore;Duke University;Hong Kong University of Science and Technology;;Duke University", "aff_domain": "nus.edu.sg;duke.edu;ust.hk;;duke.edu", "position": "PhD student;PhD student;Assistant Professor;;Assistant Professor", "bibtex": "@inproceedings{\njin2024optimal,\ntitle={Optimal Batched Best Arm Identification},\nauthor={Tianyuan Jin and Yu Yang and Jing Tang and Xiaokui Xiao and Pan Xu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ATSPPGEmAA}\n}", "github": "", "reviewers": "vkzZ;YCPj;Uhz8;3W3Z", "pdf_size": 561048, "rating": "3;5;6;7", "confidence": "4;2;4;3", "soundness": "2;3;3;3", "novelty": "1;3;2;3", "presentation": "2;3;3;3", "wc_summary": "54;76;149;137", "wc_strengths": "10;60;89;33", "wc_weaknesses": "124;194;427;25", "wc_questions": "1;201;8;19", "wc_limitations": "1;1;5;2", "wc_review": "190;532;678;216", "wc_reply_reviewers": "13;10;0;10", "wc_reply_authors": "306;36;0;36", "reply_reviewers": "1;1;0;1", "reply_authors": "3;2;1;2", "rating_avg": [ 5.25, 1.479019945774904 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 104.0, 39.99374951164244 ], "wc_strengths_avg": [ 48.0, 29.5550334122633 ], "wc_weaknesses_avg": [ 192.5, 148.10553669596555 ], "wc_questions_avg": [ 57.25, 83.24174133209853 ], "wc_limitations_avg": [ 2.25, 1.6393596310755 ], "wc_review_avg": [ 404.0, 207.72578077840987 ], "wc_reply_reviewers_avg": [ 8.25, 4.9180788932265 ], "wc_reply_authors_avg": [ 94.5, 122.99085331844803 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.25482359571881275, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18112671561549788670&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 3, "email": "nus.edu.sg;duke.edu;ust.hk;;duke.edu", "author_num": 5, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "National University of Singapore;Duke University;Hong Kong University of Science and Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.nus.edu.sg;https://www.duke.edu;https://www.ust.hk", "aff_unique_abbr": "NUS;Duke;HKUST", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;2;1", "aff_country_unique": "Singapore;United States;China" }, { "title": "Freya PAGE: First Optimal Time Complexity for Large-Scale Nonconvex Finite-Sum Optimization with Heterogeneous Asynchronous Computations", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96254", "id": "AUeTkSymOq", "proceeding": "", "pdf": "https://openreview.net/pdf?id=AUeTkSymOq", "openreview": "https://openreview.net/forum?id=AUeTkSymOq", "poster": "/media/PosterPDFs/NeurIPS%202024/96254.png?t=1731347787.468695", "project": "", "author_site": "Alexander Tyurin, Kaja Gruntkowska, Peter Richtarik", "tldr": "", "abstract": "In practical distributed systems, workers are typically not homogeneous, and due to differences in hardware configurations and network conditions, can have highly varying processing times. We consider smooth nonconvex finite-sum (empirical risk minimization) problems in this setup and introduce a new parallel method, Freya PAGE, designed to handle arbitrarily heterogeneous and asynchronous computations. By being robust to \"stragglers\" and adaptively ignoring slow computations, Freya PAGE offers significantly improved time complexity guarantees compared to all previous methods, including Asynchronous SGD, Rennala SGD, SPIDER, and PAGE, while requiring weaker assumptions. The algorithm relies on novel generic stochastic gradient collection strategies with theoretical guarantees that can be of interest on their own, and may be used in the design of future optimization methods. Furthermore, we establish a lower bound for smooth nonconvex finite-sum problems in the asynchronous setup, providing a fundamental time complexity limit. This lower bound is tight and demonstrates the optimality of Freya PAGE in the large-scale regime, i.e., when $\\sqrt{m} \\geq n,$ where $n$ is \\# of workers, and $m$ is \\# of data samples.", "keywords": "Nonconvex Optimization;Asynchronous Methods;Distributed Optimization;Lower Bounds;Variance Reduction", "primary_area": "optimization", "supplementary_material": "/attachment/f4cb3bcc867dc41e8e421efd2a2a82106cc039c0.zip", "author": "Alexander Tyurin;Kaja Gruntkowska;Peter Richt\u00e1rik", "authorids": "~Alexander_Tyurin1;~Kaja_Gruntkowska1;~Peter_Richt\u00e1rik1", "gender": "M;F;M", "homepage": "https://k3nfalt.github.io/;;https://richtarik.org", "dblp": "203/8919;;62/8001", "google_scholar": ";H0W8ADAAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0003-4380-5848", "linkedin": ";kaja-gruntkowska-a240a924b;richtarik/", "or_profile": "~Alexander_Tyurin1;~Kaja_Gruntkowska1;~Peter_Richtarik1", "aff": "KAUST;King Abdullah University of Science and Technology;King Abdullah University of Science and Technology (KAUST)", "aff_domain": "kaust.edu.sa;kaust.edu.sa;kaust.edu.sa", "position": "Postdoc;PhD student;Full Professor", "bibtex": "@inproceedings{\ntyurin2024freya,\ntitle={Freya {PAGE}: First Optimal Time Complexity for Large-Scale Nonconvex Finite-Sum Optimization with Heterogeneous Asynchronous Computations},\nauthor={Alexander Tyurin and Kaja Gruntkowska and Peter Richt{\\'a}rik},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=AUeTkSymOq}\n}", "github": "", "reviewers": "6ZMd;wr2A;MLsi;QBDK", "pdf_size": 709961, "rating": "5;6;6;8", "confidence": "3;3;3;3", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "2;3;4;4", "wc_summary": "122;42;125;73", "wc_strengths": "83;69;37;131", "wc_weaknesses": "67;36;51;64", "wc_questions": "19;301;74;44", "wc_limitations": "7;7;7;45", "wc_review": "298;455;294;357", "wc_reply_reviewers": "119;0;10;27", "wc_reply_authors": "688;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "4;1;1;1", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 90.5, 34.78864757359791 ], "wc_strengths_avg": [ 80.0, 33.83784863137726 ], "wc_weaknesses_avg": [ 54.5, 12.257650672131263 ], "wc_questions_avg": [ 109.5, 112.26419732042802 ], "wc_limitations_avg": [ 16.5, 16.454482671904334 ], "wc_review_avg": [ 351.0, 65.01922792528376 ], "wc_reply_reviewers_avg": [ 39.0, 47.185802949616104 ], "wc_reply_authors_avg": [ 172.0, 297.9127389018469 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3644775842135005781&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "kaust.edu.sa;kaust.edu.sa;kaust.edu.sa", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "King Abdullah University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaust.edu.sa", "aff_unique_abbr": "KAUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Saudi Arabia" }, { "title": "One Sample Fits All: Approximating All Probabilistic Values Simultaneously and Efficiently", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96253", "id": "AUg9D2VjcF", "proceeding": "", "pdf": "https://openreview.net/pdf?id=AUg9D2VjcF", "openreview": "https://openreview.net/forum?id=AUg9D2VjcF", "poster": "/media/PosterPDFs/NeurIPS%202024/96253.png?t=1733833984.4557028", "project": "", "author_site": "Weida Li, Yaoliang Yu", "tldr": "", "abstract": "The concept of probabilistic values, such as Beta Shapley values and weighted Banzhaf values, has gained recent attention in applications like feature attribution and data valuation. However, exact computation of these values is often exponentially expensive, necessitating approximation techniques. Prior research has shown that the choice of probabilistic values significantly impacts downstream performance, with no universally superior option. Consequently, one may have to approximate multiple candidates and select the best-performing one. Although there have been many efforts to develop efficient estimators, none are intended to approximate all probabilistic values both simultaneously and efficiently. In this work, we embark on the first exploration of achieving this goal. Adhering to the principle of maximum sample reuse and avoiding amplifying factors, we propose a one-sample-fits-all framework parameterized by a sampling vector to approximate intermediate terms that can be converted to any probabilistic value. Leveraging the concept of $ (\\epsilon, \\delta) $-approximation, we theoretically identify a key formula that effectively determines the convergence rate of our framework. By optimizing the sampling vector using this formula, we obtain i) a one-for-all estimator that achieves the currently best time complexity for all probabilistic values on average, and ii) a faster generic estimator with the sampling vector optimally tuned for each probabilistic value. Particularly, our one-for-all estimator achieves the fastest convergence rate on Beta Shapley values, including the well-known Shapley value, both theoretically and empirically. Finally, we establish a connection between probabilistic values and the least square regression used in (regularized) datamodels, showing that our one-for-all estimator can solve a family of datamodels simultaneously. Our code is available at https://github.com/watml/one-for-all.", "keywords": "Beta Shapley values;weighted Banzhaf values;approximation;datamodels", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Weida Li;Yaoliang Yu", "authorids": "~Weida_Li1;~Yaoliang_Yu1", "gender": ";M", "homepage": ";https://cs.uwaterloo.ca/~y328yu/", "dblp": "121/8659;90/4989", "google_scholar": "claK_XkAAAAJ;https://scholar.google.ca/citations?user=zbXIQMsAAAAJ", "orcid": ";0000-0002-3823-0720", "linkedin": ";", "or_profile": "~Weida_Li1;~Yaoliang_Yu1", "aff": "University of Waterloo;University of Waterloo", "aff_domain": "uwaterloo.ca;uwaterloo.ca", "position": "Intern;Associate Professor", "bibtex": "@inproceedings{\nli2024one,\ntitle={One Sample Fits All: Approximating All Probabilistic Values Simultaneously and Efficiently},\nauthor={Weida Li and Yaoliang Yu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=AUg9D2VjcF}\n}", "github": "", "reviewers": "YTCS;k6r4;DfL8;2hZa", "pdf_size": 1029983, "rating": "6;6;6;7", "confidence": "2;1;1;3", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;2;3;3", "wc_summary": "33;87;120;119", "wc_strengths": "20;28;64;108", "wc_weaknesses": "66;104;52;127", "wc_questions": "2;4;56;4", "wc_limitations": "2;10;11;36", "wc_review": "123;233;303;394", "wc_reply_reviewers": "10;13;6;11", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 1.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 89.75, 35.35091936569684 ], "wc_strengths_avg": [ 55.0, 34.79942528261063 ], "wc_weaknesses_avg": [ 87.25, 29.810862114336782 ], "wc_questions_avg": [ 16.5, 22.819947414488052 ], "wc_limitations_avg": [ 14.75, 12.754901018824098 ], "wc_review_avg": [ 263.25, 99.07162812833955 ], "wc_reply_reviewers_avg": [ 10.0, 2.5495097567963922 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:3drMmZHBZxoJ:scholar.google.com/&scioq=One+Sample+Fits+All:+Approximating+All+Probabilistic+Values+Simultaneously+and+Efficiently&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "uwaterloo.ca;uwaterloo.ca", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Waterloo", "aff_unique_dep": "", "aff_unique_url": "https://uwaterloo.ca", "aff_unique_abbr": "UW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Canada" }, { "title": "QKFormer: Hierarchical Spiking Transformer using Q-K Attention", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96252", "id": "AVd7DpiooC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=AVd7DpiooC", "openreview": "https://openreview.net/forum?id=AVd7DpiooC", "poster": "/media/PosterPDFs/NeurIPS%202024/96252.png?t=1730798173.03879", "project": "", "author_site": "chenlin zhou, Han Zhang, Zhaokun Zhou, Liutao Yu, Liwei Huang, Xiaopeng Fan, Li Yuan, Zhengyu Ma, Huihui Zhou, Yonghong Tian", "tldr": "", "abstract": "Spiking Transformers, which integrate Spiking Neural Networks (SNNs) with Transformer architectures, have attracted significant attention due to their potential for low energy consumption and high performance. However, there remains a substantial gap in performance between SNNs and Artificial Neural Networks (ANNs). To narrow this gap, we have developed QKFormer, a direct training spiking transformer with the following features: i) _Linear complexity and high energy efficiency_, the novel spike-form Q-K attention module efficiently models the token or channel attention through binary vectors and enables the construction of larger models. ii) _Multi-scale spiking representation_, achieved by a hierarchical structure with the different numbers of tokens across blocks. iii) _Spiking Patch Embedding with Deformed Shortcut (SPEDS)_, enhances spiking information transmission and integration, thus improving overall performance. It is shown that QKFormer achieves significantly superior performance over existing state-of-the-art SNN models on various mainstream datasets. Notably, with comparable size to Spikformer (66.34 M, 74.81\\%), QKFormer (64.96 M) achieves a groundbreaking top-1 accuracy of **85.65\\%** on ImageNet-1k, substantially outperforming Spikformer by **10.84\\%**. To our best knowledge, this is the first time that directly training SNNs have exceeded 85\\% accuracy on ImageNet-1K.", "keywords": "Spiking Neural Network; Neuromorphic Computing; Event-driven; Transformer; Spatio-temporal", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "", "author": "Chenlin Zhou;Han Zhang;Zhaokun Zhou;Liutao Yu;Liwei Huang;Xiaopeng Fan;Li Yuan;Zhengyu Ma;Huihui Zhou;Yonghong Tian", "authorids": "~Chenlin_Zhou1;~Han_Zhang23;~Zhaokun_Zhou1;~Liutao_Yu1;~Liwei_Huang1;~Xiaopeng_Fan1;~Li_Yuan2;~Zhengyu_Ma1;~Huihui_Zhou1;~Yonghong_Tian1", "gender": "M;M;M;;M;M;;;M;M", "homepage": ";;;;https://grasshlw.github.io/;http://homepage.hit.edu.cn/xiaopengfan;;;https://www.researchgate.net/profile/Huihui_Zhou4/research;http://www.pkuml.org", "dblp": "298/2895;;;;27/10780;76/1458;;;;86/5857", "google_scholar": ";;;;;;;;c2mrU24AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0001-6510-1417;0000-0003-1109-4738;0000-0003-4454-6630;;;;;;;0000-0002-2978-5935", "linkedin": ";;;;;;;;;", "or_profile": "~Chenlin_Zhou1;~Han_Zhang23;~Zhaokun_Zhou1;~Liutao_Yu1;~Liwei_Huang1;~Xiaopeng_Fan1;~Li_Yuan2;~Zhengyu_Ma1;~Huihui_Zhou1;~Yonghong_Tian1", "aff": "Peng Cheng Laboratory;Harbin Institute of Technology;Peking University;;Peking University;Harbin Institute of Technology;;;Pengcheng Lab;Peking University", "aff_domain": "pcl.ac.cn;hit.edu.cn;pku.edu.cn;;pku.edu.cn;hit.edu.cn;;;pcl.ac.cn;pku.edu.cn", "position": "Engineer;PhD student;PhD student;;PhD student;Full Professor;;;Full Professor;Full Professor", "bibtex": "@inproceedings{\nzhou2024qkformer,\ntitle={{QKF}ormer: Hierarchical Spiking Transformer using Q-K Attention},\nauthor={Chenlin Zhou and Han Zhang and Zhaokun Zhou and Liutao Yu and Liwei Huang and Xiaopeng Fan and Li Yuan and Zhengyu Ma and Huihui Zhou and Yonghong Tian},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=AVd7DpiooC}\n}", "github": "", "reviewers": "Etfg;ukE8;7jT8;m2NM", "pdf_size": 1678198, "rating": "5;6;7;8", "confidence": "5;4;5;5", "soundness": "3;3;3;4", "novelty": "2;3;3;4", "presentation": "3;3;3;3", "wc_summary": "46;112;83;22", "wc_strengths": "35;57;182;101", "wc_weaknesses": "244;88;151;20", "wc_questions": "97;104;44;87", "wc_limitations": "1;4;33;4", "wc_review": "423;365;493;234", "wc_reply_reviewers": "19;8;13;10", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 65.75, 34.4265522525855 ], "wc_strengths_avg": [ 93.75, 56.21999199573049 ], "wc_weaknesses_avg": [ 125.75, 82.5056816225404 ], "wc_questions_avg": [ 83.0, 23.313086453749534 ], "wc_limitations_avg": [ 10.5, 13.047988350699889 ], "wc_review_avg": [ 378.75, 95.06938255821376 ], "wc_reply_reviewers_avg": [ 12.5, 4.153311931459037 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.2581988897471611, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7824783191476871745&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "pcl.ac.cn;hit.edu.cn;pku.edu.cn;;pku.edu.cn;hit.edu.cn;;;pcl.ac.cn;pku.edu.cn", "author_num": 10, "aff_unique_index": "0;1;2;2;1;3;2", "aff_unique_norm": "Pengcheng Laboratory;Harbin Institute of Technology;Peking University;Pengcheng Lab", "aff_unique_dep": "Peng Cheng Laboratory;;;", "aff_unique_url": "http://www.pcl.ac.cn;http://www.hit.edu.cn/;http://www.pku.edu.cn;", "aff_unique_abbr": "PCL;HIT;Peking U;", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Harbin", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Probabilistic Conformal Distillation for Enhancing Missing Modality Robustness", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96251", "id": "AVrGtVrx10", "proceeding": "", "pdf": "https://openreview.net/pdf?id=AVrGtVrx10", "openreview": "https://openreview.net/forum?id=AVrGtVrx10", "poster": "/media/PosterPDFs/NeurIPS%202024/96251.png?t=1731565957.3057244", "project": "", "author_site": "Mengxi Chen, Fei Zhang, Zihua Zhao, Jiangchao Yao, Ya Zhang, Yanfeng Wang", "tldr": "", "abstract": "Multimodal models trained on modality-complete data are plagued with severe performance degradation when encountering modality-missing data. Prevalent cross-modal knowledge distillation-based methods precisely align the representation of modality-missing data and that of its modality-complete counterpart to enhance robustness. However, due to the irreparable information asymmetry, this determinate alignment is too stringent, easily inducing modality-missing features to capture spurious factors erroneously. In this paper, a novel multimodal Probabilistic Conformal Distillation (PCD) method is proposed, which considers the inherent indeterminacy in this alignment. Given a modality-missing input, our goal is to learn the unknown Probability Density Function (PDF) of the mapped variables in the modality-complete space, rather than relying on the brute-force point alignment. Specifically, PCD models the modality-missing feature as a probabilistic distribution, enabling it to satisfy two characteristics of the PDF. One is the extremes of probabilities of modality-complete feature points on the PDF, and the other is the geometric consistency between the modeled distributions and the peak points of different PDFs. Extensive experiments on a range of benchmark datasets demonstrate the superiority of PCD over state-of-the-art methods. Code is available at: https://github.com/mxchen-mc/PCD.", "keywords": "Robust Learning;Multimodal Learning;Missing Modality", "primary_area": "machine_vision", "supplementary_material": "", "author": "mengxi Chen;Fei Zhang;Zihua Zhao;Jiangchao Yao;Ya Zhang;Yanfeng Wang", "authorids": "~mengxi_Chen1;~Fei_Zhang3;~Zihua_Zhao3;~Jiangchao_Yao1;~Ya_Zhang1;~Yanfeng_Wang1", "gender": "F;M;M;M;F;M", "homepage": "https://mediabrain.sjtu.edu.cn/members/;;https://github.com/ZihuaZhao;https://sunarker.github.io/;https://annzhanglion.github.io/;https://cmic.sjtu.edu.cn/wangyanfeng/", "dblp": ";;;166/5900;85/3714-2;55/5407-1.html", "google_scholar": "X1RcPb4AAAAJ;https://scholar.google.com/citations?hl=zh-CN;1jVWGqIAAAAJ;w8oDh9QAAAAJ;pbjw9sMAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";;;;0000-0002-5390-9053;0000-0002-3196-2347", "linkedin": ";ferenas97/;;;;", "or_profile": "~mengxi_Chen1;~Fei_Zhang3;~Zihua_Zhao3;~Jiangchao_Yao1;~Ya_Zhang1;~Yanfeng_Wang1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Artificial Intelligence Laboratory;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;pjlab.org.cn;sjtu.edu.cn;sjtu.edu.cn", "position": "PhD student;PhD student;PhD student;Researcher;Professor;Full Professor", "bibtex": "@inproceedings{\nchen2024probabilistic,\ntitle={Probabilistic Conformal Distillation for Enhancing Missing Modality Robustness},\nauthor={mengxi Chen and Fei Zhang and Zihua Zhao and Jiangchao Yao and Ya Zhang and Yanfeng Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=AVrGtVrx10}\n}", "github": "", "reviewers": "UGMA;upEV;EqMG", "pdf_size": 1566273, "rating": "4;5;7", "confidence": "4;4;5", "soundness": "2;3;3", "novelty": "1;3;3", "presentation": "2;2;3", "wc_summary": "62;143;65", "wc_strengths": "35;116;125", "wc_weaknesses": "331;268;243", "wc_questions": "2;5;2", "wc_limitations": "1;9;7", "wc_review": "431;541;442", "wc_reply_reviewers": "288;25;17", "wc_reply_authors": "1077;78;59", "reply_reviewers": "1;1;1", "reply_authors": "4;2;2", "rating_avg": [ 5.333333333333333, 1.247219128924647 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.9428090415820634 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 90.0, 37.49666651850535 ], "wc_strengths_avg": [ 92.0, 40.47221268969612 ], "wc_weaknesses_avg": [ 280.6666666666667, 37.025516726831626 ], "wc_questions_avg": [ 3.0, 1.4142135623730951 ], "wc_limitations_avg": [ 5.666666666666667, 3.39934634239519 ], "wc_review_avg": [ 471.3333333333333, 49.46603773185082 ], "wc_reply_reviewers_avg": [ 110.0, 125.90737336100165 ], "wc_reply_authors_avg": [ 404.6666666666667, 475.4747335266325 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.6666666666666665, 0.9428090415820634 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.944911182523068, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:v_QqhMgfzlwJ:scholar.google.com/&scioq=Probabilistic+Conformal+Distillation+for+Enhancing+Missing+Modality+Robustness&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;pjlab.org.cn;sjtu.edu.cn;sjtu.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;1;0;0", "aff_unique_norm": "Shanghai Jiao Tong University;Shanghai Artificial Intelligence Laboratory", "aff_unique_dep": ";", "aff_unique_url": "https://www.sjtu.edu.cn;http://www.shailab.org/", "aff_unique_abbr": "SJTU;Shanghai AI Lab", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Lambda: Learning Matchable Prior For Entity Alignment with Unlabeled Dangling Cases", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96250", "id": "AWFryOJaGi", "proceeding": "", "pdf": "https://openreview.net/pdf?id=AWFryOJaGi", "openreview": "https://openreview.net/forum?id=AWFryOJaGi", "poster": "/media/PosterPDFs/NeurIPS%202024/96250.png?t=1731221488.1570399", "project": "", "author_site": "Hang Yin, Liyao Xiang, Dong Ding, Yuheng He, Yihan Wu, Pengzhi Chu, Xinbing Wang, Chenghu Zhou", "tldr": "", "abstract": "We investigate the entity alignment (EA) problem with unlabeled dangling cases, meaning that partial entities have no counterparts in the other knowledge graph (KG), yet these entities are unlabeled. The problem arises when the source and target graphs are of different scales, and it is much cheaper to label the matchable pairs than the dangling entities. To address this challenge, we propose the framework \\textit{Lambda} for dangling detection and entity alignment. Lambda features a GNN-based encoder called KEESA with a spectral contrastive learning loss for EA and a positive-unlabeled learning algorithm called iPULE for dangling detection. Our dangling detection module offers theoretical guarantees of unbiasedness, uniform deviation bounds, and convergence. Experimental results demonstrate that each component contributes to overall performances that are superior to baselines, even when baselines additionally exploit 30\\% of dangling entities labeled for training.", "keywords": "Knowledge Graph;Entity Alignment;Positive-Unlabeled Learning;Dangling Cases", "primary_area": "graph_neural_networks", "supplementary_material": "/attachment/cf30598ce05b3d1995e77c2f347e1a88c5340c34.zip", "author": "Hang Yin;Liyao Xiang;Dong Ding;Yuheng He;Yihan Wu;Pengzhi Chu;Xinbing Wang;Chenghu Zhou", "authorids": "~Hang_Yin7;~Liyao_Xiang1;~Dong_Ding1;~Yuheng_He2;~Yihan_Wu4;~Pengzhi_Chu1;~Xinbing_Wang1;~Chenghu_Zhou3", "gender": "M;F;M;M;;;M;M", "homepage": "https://blog.csdn.net/weixin_47741017?spm=1000.2115.3001.5343;http://xiangliyao.cn;https://github.com/BabilaBUBU;https://oc.sjtu.edu.cn/profile;https://github.com/caracallium;https://www.si.sjtu.edu.cn/update;http://www.cs.sjtu.edu.cn/~wang-xb/;http://www.igsnrr.cas.cn/gkjj/ysfc/ysfc_zhouchenghu/", "dblp": ";115/6308;;;;229/1433;96/1149.html;85/1324.html", "google_scholar": "fmbkSu8AAAAJ;;;;;;https://scholar.google.com.tw/citations?user=CT5yZbwAAAAJ;", "orcid": "0009-0002-2013-7373;;;;;;0000-0002-0357-8356;", "linkedin": ";;;;;;;", "or_profile": "~Hang_Yin7;~Liyao_Xiang1;~Dong_Ding1;~Yuheng_He2;~Yihan_Wu4;~Pengzhi_Chu1;~Xinbing_Wang1;~Chenghu_Zhou3", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;;Shanghai Jiaotong University;IGSNRR, Chinese Academy of Sciences, Beijing, China", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;;cs.sjtu.edu.cn;lreis.ac.cn", "position": "PhD student;Associate Professor;Undergrad student;Undergrad student;Undergrad student;;Full Professor;Full Professor", "bibtex": "@inproceedings{\nyin2024lambda,\ntitle={Lambda: Learning Matchable Prior For Entity Alignment with Unlabeled Dangling Cases},\nauthor={Hang Yin and Liyao Xiang and Dong Ding and Yuheng He and Yihan Wu and Pengzhi Chu and Xinbing Wang and Chenghu Zhou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=AWFryOJaGi}\n}", "github": "", "reviewers": "jgr6;RNuV;HTtm;cR2t", "pdf_size": 1745507, "rating": "5;5;6;7", "confidence": "4;4;4;4", "soundness": "3;2;3;3", "novelty": "2;3;3;3", "presentation": "2;1;3;3", "wc_summary": "26;57;75;86", "wc_strengths": "26;32;48;81", "wc_weaknesses": "88;124;123;97", "wc_questions": "2;9;73;47", "wc_limitations": "5;1;1;1", "wc_review": "147;223;320;312", "wc_reply_reviewers": "243;0;17;14", "wc_reply_authors": "506;0;23;12", "reply_reviewers": "2;0;1;1", "reply_authors": "3;1;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 61.0, 22.704625079485456 ], "wc_strengths_avg": [ 46.75, 21.34683817336891 ], "wc_weaknesses_avg": [ 108.0, 15.827191791344413 ], "wc_questions_avg": [ 32.75, 28.86498744153546 ], "wc_limitations_avg": [ 2.0, 1.7320508075688772 ], "wc_review_avg": [ 250.5, 70.85372255569922 ], "wc_reply_reviewers_avg": [ 68.5, 100.95172113441157 ], "wc_reply_authors_avg": [ 135.25, 214.20711356068455 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6632804460802652245&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;;cs.sjtu.edu.cn;lreis.ac.cn", "author_num": 8, "aff_unique_index": "0;0;0;0;0;0;1", "aff_unique_norm": "Shanghai Jiao Tong University;Chinese Academy of Sciences", "aff_unique_dep": ";IGSNRR", "aff_unique_url": "https://www.sjtu.edu.cn;http://www.cas.cn", "aff_unique_abbr": "SJTU;CAS", "aff_campus_unique_index": "1", "aff_campus_unique": ";Beijing", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "EAGLE: Efficient Adaptive Geometry-based Learning in Cross-view Understanding", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96249", "id": "AXcYtHQnxt", "proceeding": "", "pdf": "https://openreview.net/pdf?id=AXcYtHQnxt", "openreview": "https://openreview.net/forum?id=AXcYtHQnxt", "poster": "/media/PosterPDFs/NeurIPS%202024/96249.png?t=1731349522.5627062", "project": "", "author_site": "Thanh-Dat Truong, Utsav Prabhu, Dongyi Wang, Bhiksha Raj, Susan Gauch, Jeyamkondan Subbiah, Khoa Luu", "tldr": "", "abstract": "Unsupervised Domain Adaptation has been an efficient approach to transferring the semantic segmentation model across data distributions. Meanwhile, the recent Open-vocabulary Semantic Scene understanding based on large-scale vision language models is effective in open-set settings because it can learn diverse concepts and categories. However, these prior methods fail to generalize across different camera views due to the lack of cross-view geometric modeling. At present, there are limited studies analyzing cross-view learning. To address this problem, we introduce a novel Unsupervised Cross-view Adaptation Learning approach to modeling the geometric structural change across views in Semantic Scene Understanding. First, we introduce a novel Cross-view Geometric Constraint on Unpaired Data to model structural changes in images and segmentation masks across cameras. Second, we present a new Geodesic Flow-based Correlation Metric to efficiently measure the geometric structural changes across camera views. Third, we introduce a novel view-condition prompting mechanism to enhance the view-information modeling of the open-vocabulary segmentation network in cross-view adaptation learning. The experiments on different cross-view adaptation benchmarks have shown the effectiveness of our approach in cross-view modeling, demonstrating that we achieve State-of-the-Art (SOTA) performance compared to prior unsupervised domain adaptation and open-vocabulary semantic segmentation methods.", "keywords": "Unsupervised Cross-view Adaptation;Cross-view Geometric Constraint;Semantic Segmentation;Geodesic Flow-based Correlation Metric", "primary_area": "machine_vision", "supplementary_material": "/attachment/a048c7c310dbeaa639944d9887fcbea8d3331fe2.zip", "author": "Thanh-Dat Truong;Utsav Prabhu;Dongyi Wang;Bhiksha Raj;Susan Gauch;Jeyamkondan Subbiah;Khoa Luu", "authorids": "~Thanh-Dat_Truong1;~Utsav_Prabhu1;~Dongyi_Wang1;~Bhiksha_Raj1;~Susan_Gauch1;~Jeyamkondan_Subbiah1;~Khoa_Luu2", "gender": "M;;;M;F;M;M", "homepage": "https://truongthanhdat.github.io/;;;https://www.cs.cmu.edu/directory/bhikshar/;http://www.csce.uark.edu/~sgauch/;https://food-science.uark.edu/people/faculty/uid/jsubbiah/name/Jeyam+Subbiah/;https://uark-cviu.github.io", "dblp": "213/5771;;;60/3996;g/SusanGauch;;43/8092", "google_scholar": "qrmxykkAAAAJ;;;;WWUqQQMAAAAJ;fRIFkS0AAAAJ;JPAl8-gAAAAJ", "orcid": ";;;;0000-0001-5538-7343;0000-0002-8512-0735;0000-0003-2104-0901", "linkedin": "%08truongthanhdat/;;;;;jeyam-subbiah-11714517/;khoa-luu-90900215/", "or_profile": "~Thanh-Dat_Truong1;~Utsav_Prabhu1;~Dongyi_Wang1;~Bhiksha_Raj1;~Susan_Gauch1;~Jeyamkondan_Subbiah1;~Khoa_Luu2", "aff": "University of Arkansas, Fayetteville;;;Mohamed bin Zayed University of Artificial Intelligence;University of Arkansas - Fayetteville;University of Arkansas - Fayetteville;University of Arkansas, Fayetteville", "aff_domain": "uark.edu;;;mbzuai.ac.ae;uark.edu;uark.edu;uark.edu", "position": "PhD student;;;Full Professor;Full Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\ntruong2024eagle,\ntitle={{EAGLE}: Efficient Adaptive Geometry-based Learning in Cross-view Understanding},\nauthor={Thanh-Dat Truong and Utsav Prabhu and Dongyi Wang and Bhiksha Raj and Susan Gauch and Jeyamkondan Subbiah and Khoa Luu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=AXcYtHQnxt}\n}", "github": "", "reviewers": "29y1;8Etw;upJj;JhPV", "pdf_size": 25163463, "rating": "3;6;6;7", "confidence": "5;4;4;4", "soundness": "2;3;3;4", "novelty": "2;3;3;4", "presentation": "3;3;3;3", "wc_summary": "55;53;75;64", "wc_strengths": "45;60;99;57", "wc_weaknesses": "155;105;54;93", "wc_questions": "39;26;454;81", "wc_limitations": "4;29;34;10", "wc_review": "298;273;716;305", "wc_reply_reviewers": "215;23;0;26", "wc_reply_authors": "443;49;57;42", "reply_reviewers": "2;1;0;1", "reply_authors": "4;2;2;2", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 61.75, 8.699856320652657 ], "wc_strengths_avg": [ 65.25, 20.27775875189366 ], "wc_weaknesses_avg": [ 101.75, 36.06504540410285 ], "wc_questions_avg": [ 150.0, 176.6875773788299 ], "wc_limitations_avg": [ 19.25, 12.55736835487436 ], "wc_review_avg": [ 398.0, 183.98233610866018 ], "wc_reply_reviewers_avg": [ 66.0, 86.6112001995123 ], "wc_reply_authors_avg": [ 147.75, 170.54526525236636 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.9622504486493763, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14354784101651597233&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "uark.edu;;;mbzuai.ac.ae;uark.edu;uark.edu;uark.edu", "author_num": 7, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "University of Arkansas;Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": ";", "aff_unique_url": "https://www.uark.edu;https://mbzuai.ac.ae", "aff_unique_abbr": "UARK;MBZUAI", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Fayetteville;", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "United States;United Arab Emirates" }, { "title": "Linking In-context Learning in Transformers to Human Episodic Memory", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96248", "id": "AYDBFxNon4", "proceeding": "", "pdf": "https://openreview.net/pdf?id=AYDBFxNon4", "openreview": "https://openreview.net/forum?id=AYDBFxNon4", "poster": "/media/PosterPDFs/NeurIPS%202024/96248.png?t=1733427585.4822266", "project": "", "author_site": "Ji-An Li, Corey Zhou, Marcus Benna, Marcelo G Mattar", "tldr": "", "abstract": "Understanding connections between artificial and biological intelligent systems can reveal fundamental principles of general intelligence. While many artificial intelligence models have a neuroscience counterpart, such connections are largely missing in Transformer models and the self-attention mechanism. Here, we examine the relationship between interacting attention heads and human episodic memory. We focus on induction heads, which contribute to in-context learning in Transformer-based large language models (LLMs). We demonstrate that induction heads are behaviorally, functionally, and mechanistically similar to the contextual maintenance and retrieval (CMR) model of human episodic memory. Our analyses of LLMs pre-trained on extensive text data show that CMR-like heads often emerge in the intermediate and late layers, qualitatively mirroring human memory biases. The ablation of CMR-like heads suggests their causal role in in-context learning. Our findings uncover a parallel between the computational mechanisms of LLMs and human memory, offering valuable insights into both research fields.", "keywords": "in-context learning;Transformer;induction head;episodic memory;mechanistic interpretability", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "", "author": "Li Ji-An;Corey Yishan Zhou;Marcus K. Benna;Marcelo G Mattar", "authorids": "~Li_Ji-An1;~Corey_Yishan_Zhou1;~Marcus_K._Benna1;~Marcelo_G_Mattar1", "gender": "M;Non-Binary;;M", "homepage": ";https://www.mattarlab.com/people#h.uxj4dan7mcan;;https://mattarlab.com", "dblp": "367/4504;;;378/2931.html", "google_scholar": "lge1u8kAAAAJ;;;tcCGGDsZJUsC", "orcid": "0000-0003-2419-2281;;;0000-0003-3303-2490", "linkedin": ";;;", "or_profile": "~Li_Ji-An1;~Corey_Yishan_Zhou1;~Marcus_K._Benna1;~Marcelo_G_Mattar1", "aff": "University of California, San Diego;University of California, San Diego;;New York University", "aff_domain": "ucsd.edu;ucsd.edu;;nyu.edu", "position": "PhD student;PhD student;;Assistant Professor", "bibtex": "@inproceedings{\nji-an2024linking,\ntitle={Linking In-context Learning in Transformers to Human Episodic Memory},\nauthor={Li Ji-An and Corey Yishan Zhou and Marcus K. Benna and Marcelo G Mattar},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=AYDBFxNon4}\n}", "github": "", "reviewers": "uMwU;rPH1;18Dm;xjvL", "pdf_size": 2265502, "rating": "4;4;6;7", "confidence": "2;2;4;4", "soundness": "2;2;3;3", "novelty": "3;3;3;3", "presentation": "1;2;4;4", "wc_summary": "63;27;73;101", "wc_strengths": "48;45;56;160", "wc_weaknesses": "168;24;119;267", "wc_questions": "45;9;122;223", "wc_limitations": "4;399;4;95", "wc_review": "328;504;374;846", "wc_reply_reviewers": "43;89;184;151", "wc_reply_authors": "47;60;197;39", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 3.0, 1.0 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 1.299038105676658 ], "wc_summary_avg": [ 66.0, 26.476404589747453 ], "wc_strengths_avg": [ 77.25, 47.94462952198087 ], "wc_weaknesses_avg": [ 144.5, 87.64844550817773 ], "wc_questions_avg": [ 99.75, 82.03467254764902 ], "wc_limitations_avg": [ 125.5, 162.21667608479714 ], "wc_review_avg": [ 513.0, 202.80285994038644 ], "wc_reply_reviewers_avg": [ 116.75, 54.554445281754994 ], "wc_reply_authors_avg": [ 85.75, 64.66596863884435 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.9622504486493761, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5793516904996402412&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "ucsd.edu;ucsd.edu;;nyu.edu", "author_num": 4, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of California, San Diego;New York University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucsd.edu;https://www.nyu.edu", "aff_unique_abbr": "UCSD;NYU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "San Diego;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Causal Context Adjustment Loss for Learned Image Compression", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96247", "id": "AYntCZvoLI", "proceeding": "", "pdf": "https://openreview.net/pdf?id=AYntCZvoLI", "openreview": "https://openreview.net/forum?id=AYntCZvoLI", "poster": "/media/PosterPDFs/NeurIPS%202024/96247.png?t=1731672842.6365776", "project": "", "author_site": "Minghao Han, Shiyin Jiang, Shengxi Li, Xin Deng, Mai Xu, Ce Zhu, Shuhang Gu", "tldr": "", "abstract": "In recent years, learned image compression (LIC) technologies have surpassed conventional methods notably in terms of rate-distortion (RD) performance. Most present learned techniques are VAE-based with an autoregressive entropy model, which obviously promotes the RD performance by utilizing the decoded causal context. However, extant methods are highly dependent on the fixed hand-crafted causal context. The question of how to guide the auto-encoder to generate a more effective causal context benefit for the autoregressive entropy models is worth exploring. In this paper, we make the first attempt in investigating the way to explicitly adjust the causal context with our proposed Causal Context Adjustment loss (CCA-loss). By imposing the CCA-loss, we enable the neural network to spontaneously adjust important information into the early stage of the autoregressive entropy model. Furthermore, as transformer technology develops remarkably, variants of which have been adopted by many state-of-the-art (SOTA) LIC techniques. The existing computing devices have not adapted the calculation of the attention mechanism well, which leads to a burden on computation quantity and inference latency. To overcome it, we establish a convolutional neural network (CNN) image compression model and adopt the unevenly channel-wise grouped strategy for high efficiency. Ultimately, the proposed CNN-based LIC network trained with our Causal Context Adjustment loss attains a great trade-off between inference latency and rate-distortion performance.", "keywords": "image compression;entropy model;causal context", "primary_area": "machine_vision", "supplementary_material": "", "author": "Minghao Han;Shiyin Jiang;Shengxi Li;Xin Deng;Mai Xu;Ce Zhu;Shuhang Gu", "authorids": "~Minghao_Han4;~Shiyin_Jiang1;~Shengxi_Li1;~Xin_Deng3;~Mai_Xu1;~Ce_Zhu1;~Shuhang_Gu3", "gender": ";M;M;F;M;M;M", "homepage": ";https://github.com/IrisNNN;https://scholar.google.co.uk/citations?user=GNxRL-gAAAAJ&hl=zh-CN;;http://buaamc2.net/;http://www.avc2-lab.net/~eczhu/;", "dblp": ";364/9075;147/5453;24/4856-2;20/5353;69/2369;126/1028", "google_scholar": "IqrXj74AAAAJ;;https://scholar.google.co.uk/citations?user=GNxRL-gAAAAJ;5GyRpScAAAAJ;JdhDuXAAAAAJ;C7iZbYMAAAAJ;-kSTt40AAAAJ", "orcid": ";;0000-0003-4979-9290;;;;", "linkedin": ";;;;;;", "or_profile": "~Minghao_Han4;~Shiyin_Jiang1;~Shengxi_Li1;~Xin_Deng3;~Mai_Xu1;~Ce_Zhu1;~Shuhang_Gu3", "aff": "University of Electronic Science and Technology of China;University of Electronic Science and Technology of China;Beihang University;Beihang University;Beihang University;University of Electronic Science and Technology of China;University of Electronic Science and Technology of China", "aff_domain": "uestc.edu;uestc.edu.cn;buaa.edu.cn;buaa.edu.cn;buaa.edu.cn;uestc.edu.cn;uestc.edu.cn", "position": "Undergrad student;Undergrad student;Full Professor;Associate Professor;Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nhan2024causal,\ntitle={Causal Context Adjustment Loss for Learned Image Compression},\nauthor={Minghao Han and Shiyin Jiang and Shengxi Li and Xin Deng and Mai Xu and Ce Zhu and Shuhang Gu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=AYntCZvoLI}\n}", "github": "", "reviewers": "cfcU;r1X1;NpdM;YBcc", "pdf_size": 1601585, "rating": "5;5;7;8", "confidence": "3;5;4;5", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "58;65;60;104", "wc_strengths": "44;67;44;123", "wc_weaknesses": "62;105;6;65", "wc_questions": "6;14;9;163", "wc_limitations": "6;8;4;60", "wc_review": "176;259;123;515", "wc_reply_reviewers": "71;53;0;29", "wc_reply_authors": "26;241;0;0", "reply_reviewers": "1;2;0;1", "reply_authors": "2;2;1;1", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 71.75, 18.793283374652763 ], "wc_strengths_avg": [ 69.5, 32.283896914715854 ], "wc_weaknesses_avg": [ 59.5, 35.245567097154215 ], "wc_questions_avg": [ 48.0, 66.45675285477014 ], "wc_limitations_avg": [ 19.5, 23.425413550244958 ], "wc_review_avg": [ 268.25, 150.48151879882127 ], "wc_reply_reviewers_avg": [ 38.25, 26.639960585556427 ], "wc_reply_authors_avg": [ 66.75, 101.16168988307777 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.4061811972299616, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11103463376361849230&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "uestc.edu;uestc.edu.cn;buaa.edu.cn;buaa.edu.cn;buaa.edu.cn;uestc.edu.cn;uestc.edu.cn", "author_num": 7, "aff_unique_index": "0;0;1;1;1;0;0", "aff_unique_norm": "University of Electronic Science and Technology of China;Beihang University", "aff_unique_dep": ";", "aff_unique_url": "https://www.uestc.edu.cn;http://www.buaa.edu.cn/", "aff_unique_abbr": "UESTC;BUAA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Transferable Boltzmann Generators", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96246", "id": "AYq6GxxrrY", "proceeding": "", "pdf": "https://openreview.net/pdf?id=AYq6GxxrrY", "openreview": "https://openreview.net/forum?id=AYq6GxxrrY", "poster": "/media/PosterPDFs/NeurIPS%202024/96246.png?t=1733430898.0757596", "project": "", "author_site": "Leon Klein, Frank Noe", "tldr": "", "abstract": "The generation of equilibrium samples of molecular systems has been a long-standing problem in statistical physics. Boltzmann Generators are a generative machine learning method that addresses this issue by learning a transformation via a normalizing flow from a simple prior distribution to the target Boltzmann distribution of interest. Recently, flow matching has been employed to train Boltzmann Generators for small molecular systems in Cartesian coordinates. We extend this work and propose a first framework for Boltzmann Generators that are transferable across chemical space, such that they predict zero-shot Boltzmann distributions for test molecules without being retraining for these systems. These transferable Boltzmann Generators allow approximate sampling from the target distribution of unseen systems, as well as efficient reweighting to the target Boltzmann distribution. The transferability of the proposed framework is evaluated on dipeptides, where we show that it generalizes efficiently to unseen systems.\nFurthermore, we demonstrate that our proposed architecture enhances the efficiency of Boltzmann Generators trained on single molecular systems.", "keywords": "Boltzmann Generators;Normalizing Flows;Sampling Problem;Flow Matching;Molecular Dynamics", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "/attachment/0d1fdc866ce7da7a927280363a66099e219f327a.zip", "author": "Leon Klein;Frank Noe", "authorids": "~Leon_Klein1;~Frank_Noe1", "gender": ";M", "homepage": ";", "dblp": "249/9262;", "google_scholar": "P1vYX2AAAAAJ;QGiLc_cAAAAJ", "orcid": "0000-0003-1095-1902;", "linkedin": ";", "or_profile": "~Leon_Klein1;~Frank_Noe1", "aff": "Freie Universit\u00e4t Berlin;Freie Universit\u00e4t Berlin", "aff_domain": "fu-berlin.de;fu-berlin.de", "position": "PhD student;Professor", "bibtex": "@inproceedings{\nklein2024transferable,\ntitle={Transferable Boltzmann Generators},\nauthor={Leon Klein and Frank Noe},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=AYq6GxxrrY}\n}", "github": "", "reviewers": "84N3;GrZN;AbVU;C3Gj", "pdf_size": 6471632, "rating": "5;6;6;7", "confidence": "4;2;2;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "74;81;47;138", "wc_strengths": "90;79;46;135", "wc_weaknesses": "229;19;308;57", "wc_questions": "11;12;3;266", "wc_limitations": "40;8;30;37", "wc_review": "444;199;434;633", "wc_reply_reviewers": "61;21;29;24", "wc_reply_authors": "34;11;25;11", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.0, 1.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 85.0, 33.12853754695489 ], "wc_strengths_avg": [ 87.5, 31.847291878588358 ], "wc_weaknesses_avg": [ 153.25, 119.34482602945131 ], "wc_questions_avg": [ 73.0, 111.48318258822718 ], "wc_limitations_avg": [ 28.75, 12.517487767120047 ], "wc_review_avg": [ 427.5, 153.91312484645357 ], "wc_reply_reviewers_avg": [ 33.75, 15.990231392947383 ], "wc_reply_authors_avg": [ 20.25, 9.781998773256925 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6007090939514299620&as_sdt=4005&sciodt=0,6&hl=en", "gs_version_total": 4, "email": "fu-berlin.de;fu-berlin.de", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Freie Universit\u00e4t Berlin", "aff_unique_dep": "", "aff_unique_url": "https://www.fu-berlin.de", "aff_unique_abbr": "FU Berlin", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "id": "AZuONuYzKl", "title": "What Do You See in Common? Learning Hierarchical Prototypes over Tree-of-Life to Discover Evolutionary Traits", "track": "main", "status": "Reject", "tldr": "", "abstract": "A grand challenge in biology is to discover evolutionary traits---features of organisms common to a group of species with a shared ancestor in the tree of life (also referred to as phylogenetic tree). With the growing availability of image repositories in biology, there is a tremendous opportunity to discover evolutionary traits directly from images in the form of a hierarchy of prototypes. However, current prototype-based methods are mostly designed to operate over a flat structure of classes and face several challenges in discovering hierarchical prototypes, including the issue of learning over-specific features at internal nodes. To overcome these challenges, we introduce the framework of Hierarchy aligned Commonality through Prototypical Networks (HComP-Net). We empirically show that HComP-Net learns prototypes that are accurate, semantically consistent, and generalizable to unseen species in comparison to baselines on birds, butterflies, and fishes datasets.", "keywords": "deep learning;interpretability;prototype-based neural network;phylogeny", "primary_area": "interpretability_and_explainability", "supplementary_material": "/attachment/00620576623cf6f593ee19a83a2acf1b11095afd.zip", "author": "Harish Babu Manogaran;M. Maruf;Arka Daw;Kazi Sajeed Mehrab;Caleb Patrick Charpentier;Josef Uyeda;Wasila M Dahdul;Matthew J Thompson;Elizabeth G Campolongo;Kaiya L Provost;Paula Mabee;Hilmar Lapp;Anuj Karpatne", "authorids": "~Harish_Babu_Manogaran1;~M._Maruf1;~Arka_Daw1;~Kazi_Sajeed_Mehrab1;~Caleb_Patrick_Charpentier1;~Josef_Uyeda1;~Wasila_M_Dahdul1;~Matthew_J_Thompson1;~Elizabeth_G_Campolongo1;~Kaiya_L_Provost1;~Paula_Mabee1;~Hilmar_Lapp1;~Anuj_Karpatne1", "gender": "M;;M;M;M;;;M;;F;F;M;", "homepage": ";https://people.cs.vt.edu/marufm/;https://people.cs.vt.edu/darka/;;;https://uyedalab.com;;;;http://kaiyaprovost.com;https://www.neonscience.org/person/paula-mabee;http://lappland.io;http://people.cs.vt.edu/karpatne/", "dblp": "348/9579.html;268/8054.html;252/5645;290/2015;348/9849;;;;;;;25/1647;09/9720", "google_scholar": "tkA8j9QAAAAJ;SiY3Sz4AAAAJ;pz2Nm8AAAAAJ;rdh3gVMAAAAJ;;lo-oG3EAAAAJ;;;;4juxrs8AAAAJ;ZKnlvcoAAAAJ;CK6Qg7gAAAAJ;", "orcid": "0000-0003-3709-4656;;;;0000-0002-9787-7081;;;0000-0003-0583-8585;;;;0000-0001-9107-0714;", "linkedin": "harish-babu-m/;;arka-daw-1207a41a3/;;;;;thompson-m-j/;;;;hlapp/;", "or_profile": "~Harish_Babu_Manogaran1;~M._Maruf1;~Arka_Daw1;~Kazi_Sajeed_Mehrab1;~Caleb_Patrick_Charpentier1;~Josef_Uyeda1;~Wasila_M_Dahdul1;~Matthew_J_Thompson1;~Elizabeth_G_Campolongo1;~Kaiya_L_Provost1;~Paula_Mabee1;~Hilmar_Lapp1;~Anuj_Karpatne1", "aff": "Virginia Polytechnic Institute and State University;Virginia Tech;Oak Ridge National Laboratory;Virginia Polytechnic Institute and State University;Virginia Polytechnic Institute and State University;Virginia Polytechnic Institute and State University;;Ohio State University, Columbus;;Adelphi University;National Ecological Observatory Network, Battelle;Duke University;Virginia Polytechnic Institute and State University", "aff_domain": "vt.edu;vt.edu;ornl.gov;vt.edu;vt.edu;vt.edu;;osu.edu;;adelphi.edu;battelle.org;duke.edu;vt.edu", "position": "MS student;PhD student;Researcher;PhD student;PhD student;Assistant Professor;;Research Software Engineer;;Assistant Professor;Principal Researcher;Researcher;Associate Professor", "bibtex": "@misc{\nanonymous2024what,\ntitle={What Do You See in Common? Learning Hierarchical Prototypes over Tree-of-Life to Discover Evolutionary Traits},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=AZuONuYzKl}\n}", "github": "", "project": "", "reviewers": "Ghgm;hQyP;2ruh;yoRo", "site": "https://openreview.net/forum?id=AZuONuYzKl", "pdf_size": 43735783, "rating": "5;6;7;7", "confidence": "3;2;3;4", "soundness": "3;2;3;3", "novelty": "2;2;3;3", "presentation": "3;2;3;3", "wc_summary": "104;74;47;30", "wc_strengths": "28;108;89;37", "wc_weaknesses": "111;105;28;139", "wc_questions": "18;5;51;5", "wc_limitations": "42;16;8;10", "wc_review": "303;308;223;221", "wc_reply_reviewers": "0;20;12;0", "wc_reply_authors": "0;0;9;0", "reply_reviewers": "0;1;1;0", "reply_authors": "1;1;2;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 63.75, 28.039035290109393 ], "wc_strengths_avg": [ 65.5, 33.826764551165695 ], "wc_weaknesses_avg": [ 95.75, 41.166582321101174 ], "wc_questions_avg": [ 19.75, 18.806581294855267 ], "wc_limitations_avg": [ 19.0, 13.601470508735444 ], "wc_review_avg": [ 263.75, 41.79339062579154 ], "wc_reply_reviewers_avg": [ 8.0, 8.48528137423857 ], "wc_reply_authors_avg": [ 2.25, 3.897114317029974 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 13, 0 ], "corr_rating_confidence": 0.42640143271122083, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12643484683216965860&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;1;0;0;0;2;3;4;5;0", "aff_unique_norm": "Virginia Tech;Oak Ridge National Laboratory;Ohio State University;Adelphi University;Battelle;Duke University", "aff_unique_dep": ";;;;National Ecological Observatory Network;", "aff_unique_url": "https://www.vt.edu;https://www.ornl.gov;https://www.osu.edu;https://www.adelphi.edu;https://www.battelle.org;https://www.duke.edu", "aff_unique_abbr": "VT;ORNL;OSU;Adelphi;;Duke", "aff_campus_unique_index": "1", "aff_campus_unique": ";Columbus", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Flexible task abstractions emerge in linear networks with fast and bounded units", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96245", "id": "AbTpJl7vN6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=AbTpJl7vN6", "openreview": "https://openreview.net/forum?id=AbTpJl7vN6", "poster": "/media/PosterPDFs/NeurIPS%202024/96245.png?t=1733936099.8516247", "project": "", "author_site": "Kai Sandbrink, Jan Bauer, Alexandra Proca, Andrew Saxe, Christopher Summerfield, Ali Hummos", "tldr": "", "abstract": "Animals survive in dynamic environments changing at arbitrary timescales, but such data distribution shifts are a challenge to neural networks. To adapt to change, neural systems may change a large number of parameters, which is a slow process involving forgetting past information. In contrast, animals leverage distribution changes to segment their stream of experience into tasks and associate them with internal task abstracts. Animals can then respond flexibly by selecting the appropriate task abstraction. However, how such flexible task abstractions may arise in neural systems remains unknown. Here, we analyze a linear gated network where the weights and gates are jointly optimized via gradient descent, but with neuron-like constraints on the gates including a faster timescale, non-negativity, and bounded activity. We observe that the weights self-organize into modules specialized for tasks or sub-tasks encountered, while the gates layer forms unique representations that switch the appropriate weight modules (task abstractions). We analytically reduce the learning dynamics to an effective eigenspace, revealing a virtuous cycle: fast adapting gates drive weight specialization by protecting previous knowledge, while weight specialization in turn increases the update rate of the gating layer. Task switching in the gating layer accelerates as a function of curriculum block size and task training, mirroring key findings in cognitive neuroscience. We show that the discovered task abstractions support generalization through both task and subtask composition, and we extend our findings to a non-linear network switching between two tasks. Overall, our work offers a theory of cognitive flexibility in animals as arising from joint gradient descent on synaptic and neural gating in a neural network architecture.", "keywords": "Deep linear networks;Learning dynamics;Cognitive Science;Cognitive control;Task representations", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "", "author": "Kai Jappe Sandbrink;Jan Philipp Bauer;Alexandra Maria Proca;Andrew M Saxe;Christopher Summerfield;Ali Hummos", "authorids": "~Kai_Jappe_Sandbrink1;~Jan_Philipp_Bauer1;~Alexandra_Maria_Proca1;~Andrew_M_Saxe1;~Christopher_Summerfield2;~Ali_Hummos1", "gender": "M;M;F;M;M;M", "homepage": "https://kjsandbrink.github.io/;https://japhba.github.io;https://aproca.github.io/;https://www.saxelab.org;https://humaninformationprocessing.com;", "dblp": "374/3095;;;39/6894;;", "google_scholar": "1wVliBkAAAAJ;t3i5WEMAAAAJ;ONLlzEUAAAAJ;h0Al1fcAAAAJ;;YFDOLsUAAAAJ", "orcid": "0000-0003-0360-075X;0009-0006-0934-6835;0000-0003-1516-0114;0000-0002-9831-8812;; 0000-0003-4831-305X", "linkedin": "kaisandbrink;;;;;ali-hummos-b77a1422/", "or_profile": "~Kai_Jappe_Sandbrink1;~Jan_Philipp_Bauer1;~Alexandra_Maria_Proca1;~Andrew_M_Saxe1;~Christopher_Summerfield2;~Ali_Hummos1", "aff": "EPFL - EPF Lausanne;Hebrew University of Jerusalem;Imperial College London;University College London, University of London;;Massachusetts Institute of Technology", "aff_domain": "epfl.ch;huji.ac.il;ic.ac.uk;ucl.ac.uk;;mit.edu", "position": "PhD student;PhD student;PhD student;Full Professor;;Postdoc", "bibtex": "@inproceedings{\nsandbrink2024flexible,\ntitle={Flexible task abstractions emerge in linear networks with fast and bounded units},\nauthor={Kai Jappe Sandbrink and Jan Philipp Bauer and Alexandra Maria Proca and Andrew M Saxe and Christopher Summerfield and Ali Hummos},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=AbTpJl7vN6}\n}", "github": "", "reviewers": "9Ed7;SwAN;pBhB", "pdf_size": 19808498, "rating": "6;8;8", "confidence": "3;3;3", "soundness": "3;4;4", "novelty": "3;4;4", "presentation": "4;4;4", "wc_summary": "100;87;162", "wc_strengths": "84;70;77", "wc_weaknesses": "188;28;198", "wc_questions": "1;115;96", "wc_limitations": "2;4;23", "wc_review": "375;304;556", "wc_reply_reviewers": "138;15;24", "wc_reply_authors": "0;32;29", "reply_reviewers": "1;1;1", "reply_authors": "1;2;2", "rating_avg": [ 7.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 4.0, 0.0 ], "wc_summary_avg": [ 116.33333333333333, 32.72443871006635 ], "wc_strengths_avg": [ 77.0, 5.715476066494082 ], "wc_weaknesses_avg": [ 138.0, 77.88880963698615 ], "wc_questions_avg": [ 70.66666666666667, 49.86871653540814 ], "wc_limitations_avg": [ 9.666666666666666, 9.46337971105226 ], "wc_review_avg": [ 411.6666666666667, 106.09534496019239 ], "wc_reply_reviewers_avg": [ 59.0, 55.98214000911362 ], "wc_reply_authors_avg": [ 20.333333333333332, 14.429907214608907 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18220095191457597272&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "epfl.ch;huji.ac.il;ic.ac.uk;ucl.ac.uk;;mit.edu", "author_num": 6, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "EPFL;Hebrew University of Jerusalem;Imperial College London;University College London;Massachusetts Institute of Technology", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.epfl.ch;https://www.huji.ac.il;https://www.imperial.ac.uk;https://www.ucl.ac.uk;https://web.mit.edu", "aff_unique_abbr": "EPFL;HUJI;ICL;UCL;MIT", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Lausanne;Jerusalem;", "aff_country_unique_index": "0;1;2;2;3", "aff_country_unique": "Switzerland;Israel;United Kingdom;United States" }, { "title": "Expanding Sparse Tuning for Low Memory Usage", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96244", "id": "AbZyNGWfpN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=AbZyNGWfpN", "openreview": "https://openreview.net/forum?id=AbZyNGWfpN", "poster": "/media/PosterPDFs/NeurIPS%202024/96244.png?t=1731240819.3844101", "project": "", "author_site": "Shufan Shen, Junshu Sun, Xiangyang Ji, Qingming Huang, Shuhui Wang", "tldr": "", "abstract": "Parameter-efficient fine-tuning (PEFT) is an effective method for adapting pre-trained vision models to downstream tasks by tuning a small subset of parameters. Among PEFT methods, sparse tuning achieves superior performance by only adjusting the weights most relevant to downstream tasks, rather than densely tuning the whole weight matrix. However, this performance improvement has been accompanied by increases in memory usage, which stems from two factors, i.e., the storage of the whole weight matrix as learnable parameters in the optimizer and the additional storage of tunable weight indexes. In this paper, we propose a method named SNELL (Sparse tuning with kerNELized LoRA) for sparse tuning with low memory usage. To achieve low memory usage, SNELL decomposes the tunable matrix for sparsification into two learnable low-rank matrices, saving from the costly storage of the whole original matrix. A competition-based sparsification mechanism is further proposed to avoid the storage of tunable weight indexes. To maintain the effectiveness of sparse tuning with low-rank matrices, we extend the low-rank decomposition by applying nonlinear kernel functions to the whole-matrix merging. Consequently, we gain an increase in the rank of the merged matrix, enhancing the ability of SNELL in adapting the pre-trained models to downstream tasks. Extensive experiments on multiple downstream tasks show that SNELL achieves state-of-the-art performance with low memory usage, endowing PEFT with sparse tuning to large-scale models. Codes are available at https://github.com/ssfgunner/SNELL.", "keywords": "sparse tuning;parameter-efficient fine-tuning;vision model fine-tuning", "primary_area": "machine_vision", "supplementary_material": "", "author": "Shufan Shen;Junshu Sun;Xiangyang Ji;Qingming Huang;Shuhui Wang", "authorids": "~Shufan_Shen1;~Junshu_Sun1;~Xiangyang_Ji1;~Qingming_Huang2;~Shuhui_Wang1", "gender": "M;;;;M", "homepage": ";http://vipl.ict.ac.cn/edu/student/master/202205/t20220518_123547.html;;https://qmhuang-ucas.github.io/;https://vipl.ict.ac.cn/people/shwang/", "dblp": "277/0707;354/4214;;68/4388;37/2537", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;;https://scholar.google.com.hk/citations?user=J1vMnRgAAAAJ;h-JxBSYAAAAJ", "orcid": ";;;;0000-0002-5931-0527", "linkedin": ";;;;", "or_profile": "~Shufan_Shen1;~Junshu_Sun1;~Xiangyang_Ji1;~Qingming_Huang2;~Shuhui_Wang1", "aff": "Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;;University of Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences", "aff_domain": "ict.ac.cn;ict.ac.cn;;ucas.ac.cn;ict.ac.cn", "position": "PhD student;PhD student;;Full Professor;Full Professor", "bibtex": "@inproceedings{\nshen2024expanding,\ntitle={Expanding Sparse Tuning for Low Memory Usage},\nauthor={Shufan Shen and Junshu Sun and Xiangyang Ji and Qingming Huang and Shuhui Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=AbZyNGWfpN}\n}", "github": "", "reviewers": "hyWE;QyQ5;CYCL;7e64", "pdf_size": 4114552, "rating": "5;5;5;7", "confidence": "5;5;4;4", "soundness": "3;2;3;3", "novelty": "3;2;3;4", "presentation": "4;3;4;3", "wc_summary": "111;81;76;47", "wc_strengths": "83;71;73;135", "wc_weaknesses": "150;310;94;141", "wc_questions": "65;65;2;8", "wc_limitations": "52;69;2;1", "wc_review": "461;596;247;332", "wc_reply_reviewers": "13;98;86;42", "wc_reply_authors": "0;80;221;20", "reply_reviewers": "1;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 78.75, 22.69774217846348 ], "wc_strengths_avg": [ 90.5, 26.091186251299497 ], "wc_weaknesses_avg": [ 173.75, 81.48734564335741 ], "wc_questions_avg": [ 35.0, 30.074906483645133 ], "wc_limitations_avg": [ 31.0, 30.108138434649195 ], "wc_review_avg": [ 409.0, 132.1419691089852 ], "wc_reply_reviewers_avg": [ 59.75, 34.10553474144629 ], "wc_reply_authors_avg": [ 80.25, 86.43024644185623 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=341705317073368502&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "ict.ac.cn;ict.ac.cn;;ucas.ac.cn;ict.ac.cn", "author_num": 5, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Chinese Academy of Sciences;University of Chinese Academy of Sciences", "aff_unique_dep": "Institute of Computing Technology;", "aff_unique_url": "http://www.ict.ac.cn;http://www.ucas.ac.cn", "aff_unique_abbr": "CAS;UCAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Jailbreaking Large Language Models Against Moderation Guardrails via Cipher Characters", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96243", "id": "AcBLtTKK5q", "proceeding": "", "pdf": "https://openreview.net/pdf?id=AcBLtTKK5q", "openreview": "https://openreview.net/forum?id=AcBLtTKK5q", "poster": "/media/PosterPDFs/NeurIPS%202024/96243.png?t=1731378201.0216532", "project": "", "author_site": "Haibo Jin, Andy Zhou, Joe Menke, Haohan Wang", "tldr": "", "abstract": "Large Language Models (LLMs) are typically harmless but remain vulnerable to carefully crafted prompts known as ``jailbreaks'', which can bypass protective measures and induce harmful behavior. Recent advancements in LLMs have incorporated moderation guardrails that can filter outputs, which trigger processing errors for certain malicious questions. Existing red-teaming benchmarks often neglect to include questions that trigger moderation guardrails, making it difficult to evaluate jailbreak effectiveness. To address this issue, we introduce JAMBench, a harmful behavior benchmark designed to trigger and evaluate moderation guardrails. JAMBench involves 160 manually crafted instructions covering four major risk categories at multiple severity levels. Furthermore, we propose a jailbreak method, JAM (Jailbreak Against Moderation), designed to attack moderation guardrails using jailbreak prefixes to bypass input-level filters and a fine-tuned shadow model functionally equivalent to the guardrail model to generate cipher characters to bypass output-level filters. Our extensive experiments on four LLMs demonstrate that JAM achieves higher jailbreak success ($\\sim$ $\\times$ 19.88) and lower filtered-out rates ($\\sim$ $\\times$ 1/6) than baselines.", "keywords": "Large Language Models;Jailbreak;Red-teaming;Safety", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/2a024eb0964a9f8891439847018fa98c58a588f0.zip", "author": "Haibo Jin;Andy Zhou;Joe D. Menke;Haohan Wang", "authorids": "~Haibo_Jin2;~Andy_Zhou2;~Joe_D._Menke1;~Haohan_Wang1", "gender": "M;M;;M", "homepage": ";https://www.andyzhou.ai;;http://cs.cmu.edu/~haohanw", "dblp": ";;;132/4066", "google_scholar": "https://scholar.google.com.hk/citations?user=tj0eV-sAAAAJ;https://scholar.google.com/citations?hl=en;;nZxJGeUAAAAJ", "orcid": ";;0000-0002-7374-7549;", "linkedin": ";andy-zhou-679376206/;;haohanwang/", "or_profile": "~Haibo_Jin2;~Andy_Zhou2;~Joe_D._Menke1;~Haohan_Wang1", "aff": "Zhejiang University of Technology;Department of Computer Science;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign", "aff_domain": "zjut.edu.cn;cs.illinois.edu;uiuc.edu;illinois.edu", "position": "PhD student;Undergrad student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\njin2024jailbreaking,\ntitle={Jailbreaking Large Language Models Against Moderation Guardrails via Cipher Characters},\nauthor={Haibo Jin and Andy Zhou and Joe D. Menke and Haohan Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=AcBLtTKK5q}\n}", "github": "", "reviewers": "uapy;53yx;BWd5", "pdf_size": 1649845, "rating": "5;5;8", "confidence": "4;5;4", "soundness": "2;3;4", "novelty": "3;2;4", "presentation": "2;2;4", "wc_summary": "71;50;87", "wc_strengths": "109;43;66", "wc_weaknesses": "239;29;50", "wc_questions": "16;27;1", "wc_limitations": "1;50;1", "wc_review": "436;199;205", "wc_reply_reviewers": "325;30;0", "wc_reply_authors": "790;36;36", "reply_reviewers": "3;1;0", "reply_authors": "4;2;2", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 2.6666666666666665, 0.9428090415820634 ], "wc_summary_avg": [ 69.33333333333333, 15.15109090315135 ], "wc_strengths_avg": [ 72.66666666666667, 27.35365098523819 ], "wc_weaknesses_avg": [ 106.0, 94.43516294262429 ], "wc_questions_avg": [ 14.666666666666666, 10.656244908763853 ], "wc_limitations_avg": [ 17.333333333333332, 23.098821518760555 ], "wc_review_avg": [ 280.0, 110.33585092797354 ], "wc_reply_reviewers_avg": [ 118.33333333333333, 146.64772604972626 ], "wc_reply_authors_avg": [ 287.3333333333333, 355.4390086764379 ], "reply_reviewers_avg": [ 1.3333333333333333, 1.247219128924647 ], "reply_authors_avg": [ 2.6666666666666665, 0.9428090415820634 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1142009137099406849&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "zjut.edu.cn;cs.illinois.edu;uiuc.edu;illinois.edu", "author_num": 4, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "Zhejiang University of Technology;Unknown Institution;University of Illinois Urbana-Champaign", "aff_unique_dep": ";Department of Computer Science;", "aff_unique_url": "https://www.zjut.edu.cn;;https://illinois.edu", "aff_unique_abbr": "ZJUT;;UIUC", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "0;2;2", "aff_country_unique": "China;;United States" }, { "title": "Proportional Fairness in Non-Centroid Clustering", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96242", "id": "Actjv6Wect", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Actjv6Wect", "openreview": "https://openreview.net/forum?id=Actjv6Wect", "poster": "/media/PosterPDFs/NeurIPS%202024/96242.png?t=1731700674.2331285", "project": "", "author_site": "Ioannis Caragiannis, Evi Micha, Nisarg Shah", "tldr": "", "abstract": "We revisit the recently developed framework of proportionally fair clustering, where the goal is to provide group fairness guarantees that become stronger for groups of data points that are large and cohesive. Prior work applies this framework to centroid-based clustering, where points are partitioned into clusters, and the cost to each data point is measured by its distance to a centroid assigned to its cluster. However, real-life applications often do not require such centroids. We extend the theory of proportionally fair clustering to non-centroid clustering by considering a variety of cost functions, both metric and non-metric, for a data point to be placed in a cluster with other data points. Our results indicate that Greedy Capture, a clustering algorithm developed for centroid clustering, continues to provide strong proportional fairness guarantees for non-centroid clustering, although the guarantees are significantly different and establishing them requires novel proof ideas. We also design algorithms for auditing proportional fairness of a given clustering solution. We conduct experiments on real data which suggest that traditional clustering algorithms are highly unfair, while our algorithms achieve strong fairness guarantees with a moderate loss in common clustering objectives.", "keywords": "clustering;proportional fairness;core;fully justified representation;auditing", "primary_area": "fairness", "supplementary_material": "/attachment/2027b21d2fa5690f3bf16226a65a34b97d0132c8.zip", "author": "Ioannis Caragiannis;Evi Micha;Nisarg Shah", "authorids": "~Ioannis_Caragiannis1;~Evi_Micha1;~Nisarg_Shah1", "gender": "M;F;M", "homepage": "https://cs.au.dk/~iannis/;https://evi-micha.github.io;https://www.cs.toronto.edu/~nisarg/", "dblp": "c/IoannisCaragiannis;204/3011;95/9508-1", "google_scholar": "https://scholar.google.gr/citations?hl=en;;https://scholar.google.ca/citations?user=klcw_tAAAAAJ", "orcid": "0000-0002-4918-7131;;0000-0002-0946-3402", "linkedin": "ioannis-caragiannis-293979104/;;", "or_profile": "~Ioannis_Caragiannis1;~Evi_Micha1;~Nisarg_Shah1", "aff": "Aarhus University;University of Southern California;University of Toronto", "aff_domain": "au.dk;usc.edu;utoronto.ca", "position": "Full Professor;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\ncaragiannis2024proportional,\ntitle={Proportional Fairness in Non-Centroid Clustering},\nauthor={Ioannis Caragiannis and Evi Micha and Nisarg Shah},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Actjv6Wect}\n}", "github": "", "reviewers": "YYLK;pibV;UU1a;X5mR", "pdf_size": 819878, "rating": "6;7;7;8", "confidence": "4;4;4;4", "soundness": "3;3;3;4", "novelty": "2;3;3;3", "presentation": "2;3;3;4", "wc_summary": "224;425;228;146", "wc_strengths": "59;89;52;41", "wc_weaknesses": "77;81;7;14", "wc_questions": "215;58;1;37", "wc_limitations": "25;15;5;1", "wc_review": "600;668;293;239", "wc_reply_reviewers": "17;17;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 255.75, 103.03973748025564 ], "wc_strengths_avg": [ 60.25, 17.795715776557007 ], "wc_weaknesses_avg": [ 44.75, 34.368408458932166 ], "wc_questions_avg": [ 77.75, 81.8210700247803 ], "wc_limitations_avg": [ 11.5, 9.313968005098578 ], "wc_review_avg": [ 450.0, 186.5435605964462 ], "wc_reply_reviewers_avg": [ 8.5, 8.5 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3102161503516617646&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 7, "email": "au.dk;usc.edu;utoronto.ca", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Aarhus University;University of Southern California;University of Toronto", "aff_unique_dep": ";;", "aff_unique_url": "https://au.dk;https://www.usc.edu;https://www.utoronto.ca", "aff_unique_abbr": "AU;USC;U of T", "aff_campus_unique_index": "1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;1;2", "aff_country_unique": "Denmark;United States;Canada" }, { "title": "SpelsNet: Surface Primitive Elements Segmentation by B-Rep Graph Structure Supervision", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96241", "id": "Ad3PzTuqIq", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Ad3PzTuqIq", "openreview": "https://openreview.net/forum?id=Ad3PzTuqIq", "poster": "/media/PosterPDFs/NeurIPS%202024/96241.png?t=1730906762.7859156", "project": "", "author_site": "Kseniya Cherenkova, Elona Dupont, Anis Kacem, Gleb Gusev, Djamila Aouada", "tldr": "", "abstract": "Within the realm of Computer-Aided Design (CAD), Boundary-Representation (B-Rep) is the standard option for modeling shapes. We present SpelsNet, a neural architecture for the segmentation of 3D point clouds into surface primitive elements under topological supervision of its B-Rep graph structure. We also propose a point-to-BRep adjacency representation that allows for adapting conventional Linear Algebraic Representation of B-Rep graph structure to the point cloud domain. Thanks to this representation, SpelsNet learns from both spatial and topological domains to enable accurate and topologically consistent surface primitive element segmentation. In particular, SpelsNet is composed of two main components; (1) a supervised 3D spatial segmentation head that outputs B-Rep element types and memberships; (2) a graph-based head that leverages the proposed topological supervision. To enable the learning of SpelsNet with the proposed point-to-BRep adjacency supervision, we extend two existing CAD datasets with the required annotations, and conduct a thorough experimental validation on them. The obtained results showcase the efficacy of SpelsNet and its topological supervision compared to a set of baselines and state-of-the-art approaches.", "keywords": "Surface Primitives Segmentation;Scan2Brep;B-Rep Topology Supervision;", "primary_area": "machine_vision", "supplementary_material": "/attachment/c1dcec49bc86a3a73d81f4dfa7cebdacfdad6d37.zip", "author": "Kseniya Cherenkova;Elona Dupont;Anis Kacem;Gleb A Gusev;Djamila Aouada", "authorids": "~Kseniya_Cherenkova1;~Elona_Dupont1;~Anis_Kacem1;~Gleb_A_Gusev1;~Djamila_Aouada1", "gender": ";F;M;M;F", "homepage": ";;;https://www.linkedin.com/in/gleb-gusev-8099542/;https://cvi2.uni.lu/profile-djamila-aouada/", "dblp": ";326/8559;116/8798;;20/7872", "google_scholar": ";i9J6YFMAAAAJ;https://scholar.google.fr/citations?user=K3EWusMAAAAJ;;WBmJVSkAAAAJ", "orcid": ";0000-0003-4045-5651;;;0000-0002-7576-2064", "linkedin": ";elona-dupont-225125a2/;;;djamilaaouada/", "or_profile": "~Kseniya_Cherenkova1;~Elona_Dupont1;~Anis_Kacem1;~Gleb_A_Gusev1;~Djamila_Aouada1", "aff": ";University of Luxemburg;University of Luxemburg;Artec 3D;University of Luxemburg", "aff_domain": ";uni.lu;uni.lu;artec3d.com;uni.lu", "position": ";PhD student;Researcher;CTO;Senior Research Scientist", "bibtex": "@inproceedings{\ncherenkova2024spelsnet,\ntitle={SpelsNet: Surface Primitive Elements Segmentation by B-Rep Graph Structure Supervision},\nauthor={Kseniya Cherenkova and Elona Dupont and Anis Kacem and Gleb A Gusev and Djamila Aouada},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Ad3PzTuqIq}\n}", "github": "", "reviewers": "PwTs;9eMm;js6S;LGxC", "pdf_size": 13770955, "rating": "5;6;6;6", "confidence": "4;4;4;3", "soundness": "3;3;2;3", "novelty": "3;3;3;3", "presentation": "3;3;2;3", "wc_summary": "135;95;63;48", "wc_strengths": "113;76;47;28", "wc_weaknesses": "193;185;84;89", "wc_questions": "50;47;45;15", "wc_limitations": "6;13;7;21", "wc_review": "497;416;246;201", "wc_reply_reviewers": "21;10;63;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 85.25, 33.36446462930284 ], "wc_strengths_avg": [ 66.0, 32.07023542164915 ], "wc_weaknesses_avg": [ 137.75, 51.35842189943145 ], "wc_questions_avg": [ 39.25, 14.113380176272443 ], "wc_limitations_avg": [ 11.75, 5.973901572674261 ], "wc_review_avg": [ 340.0, 121.01859361271721 ], "wc_reply_reviewers_avg": [ 23.5, 23.984369910422913 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4296292175170386522&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": ";uni.lu;uni.lu;artec3d.com;uni.lu", "author_num": 5, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Luxembourg;Artec 3D", "aff_unique_dep": ";", "aff_unique_url": "https://wwwen.uniluxembourg.lu;https://www.artec3d.com", "aff_unique_abbr": "Uni Lu;Artec 3D", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Luxembourg;Russian Federation" }, { "title": "What does guidance do? A fine-grained analysis in a simple setting", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96240", "id": "AdS3H8SaPi", "proceeding": "", "pdf": "https://openreview.net/pdf?id=AdS3H8SaPi", "openreview": "https://openreview.net/forum?id=AdS3H8SaPi", "poster": "", "project": "", "author_site": "Muthu Chidambaram, Khashayar Gatmiry, Sitan Chen, Holden Lee, Jianfeng Lu", "tldr": "", "abstract": "The use of guidance in diffusion models was originally motivated by the premise that the guidance-modified score is that of the data distribution tilted by a conditional likelihood raised to some power. In this work we clarify this misconception by rigorously proving that guidance fails to sample from the intended tilted distribution. Our main result is to give a fine-grained characterization of the dynamics of guidance in two cases, (1) mixtures of compactly supported distributions and (2) mixtures of Gaussians, which reflect salient properties of guidance that manifest on real-world data. In both cases, we prove that as the guidance parameter increases, the guided model samples more heavily from the boundary of the support of the conditional distribution. We also prove that for any nonzero level of score estimation error, sufficiently large guidance will result in sampling away from the support, theoretically justifying the empirical finding that large guidance results in distorted generations. In addition to verifying these results empirically in synthetic settings, we also show how our theoretical insights can offer useful prescriptions for practical deployment.", "keywords": "diffusion;guidance;sampling;probability flow ode", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/8261c3369cf8dfafe286d985ba9e96fdd5ee4c02.zip", "author": "Muthu Chidambaram;Khashayar Gatmiry;Sitan Chen;Holden Lee;Jianfeng Lu", "authorids": "~Muthu_Chidambaram1;~Khashayar_Gatmiry1;~Sitan_Chen1;~Holden_Lee1;~Jianfeng_Lu1", "gender": "M;M;M;M;M", "homepage": "https://2014mchidamb.github.io/;http://ce.sharif.edu/~kgatmiry/;https://sitanchen.com;http://holdenlee.github.io;https://services.math.duke.edu/~jianfeng/", "dblp": "304/3319;;141/7670;150/3407;82/6187-1.html", "google_scholar": "R43EbqAAAAAJ;;YnJVsp4AAAAJ;hR9rFHgAAAAJ;ej9SRrAAAAAJ", "orcid": ";;;;0000-0001-6255-5165", "linkedin": "muthu-chidambaram-b8803919a/;;;;", "or_profile": "~Muthu_Chidambaram1;~Khashayar_Gatmiry1;~Sitan_Chen1;~Holden_Lee1;~Jianfeng_Lu1", "aff": "Duke University;Massachusetts Institute of Technology;Harvard University;Johns Hopkins University;Duke University", "aff_domain": "duke.edu;mit.edu;seas.harvard.edu;jh.edu;duke.edu", "position": "PhD student;PhD student;Assistant Professor;Assistant Professor;Professor", "bibtex": "@inproceedings{\nchidambaram2024what,\ntitle={What does guidance do? A fine-grained analysis in a simple setting},\nauthor={Muthu Chidambaram and Khashayar Gatmiry and Sitan Chen and Holden Lee and Jianfeng Lu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=AdS3H8SaPi}\n}", "github": "", "reviewers": "4NtD;gYu8;5E9W;CopY;xts9", "pdf_size": 3252954, "rating": "4;5;5;7;9", "confidence": "3;3;3;3;4", "soundness": "3;3;3;3;4", "novelty": "2;3;3;3;4", "presentation": "2;2;2;3;4", "wc_summary": "36;103;59;33;199", "wc_strengths": "33;38;40;39;63", "wc_weaknesses": "137;120;391;57;29", "wc_questions": "3;106;77;53;19", "wc_limitations": "18;8;33;6;2", "wc_review": "227;375;600;188;312", "wc_reply_reviewers": "0;41;162;11;0", "wc_reply_authors": "0;0;162;0;0", "reply_reviewers": "0;1;2;1;0", "reply_authors": "1;1;2;1;1", "rating_avg": [ 6.0, 1.7888543819998317 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.6, 0.8 ], "wc_summary_avg": [ 86.0, 61.79967637455717 ], "wc_strengths_avg": [ 42.6, 10.480458005259122 ], "wc_weaknesses_avg": [ 146.8, 128.36572751322683 ], "wc_questions_avg": [ 51.6, 37.49986666642963 ], "wc_limitations_avg": [ 13.4, 11.128342194594845 ], "wc_review_avg": [ 340.4, 145.24544743295743 ], "wc_reply_reviewers_avg": [ 42.8, 61.46023104414756 ], "wc_reply_authors_avg": [ 32.4, 64.8 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.8385254915624212, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13503442856530833002&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "duke.edu;mit.edu;seas.harvard.edu;jh.edu;duke.edu", "author_num": 5, "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "Duke University;Massachusetts Institute of Technology;Harvard University;Johns Hopkins University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.duke.edu;https://web.mit.edu;https://www.harvard.edu;https://www.jhu.edu", "aff_unique_abbr": "Duke;MIT;Harvard;JHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Infusing Synthetic Data with Real-World Patterns for Zero-Shot Material State Segmentation", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97821", "id": "AdpSHMOujG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=AdpSHMOujG", "openreview": "https://openreview.net/forum?id=AdpSHMOujG", "poster": "/media/PosterPDFs/NeurIPS%202024/97821.png?t=1729472612.8225625", "project": "", "author_site": "sagi eppel, Jolina Li, Manuel Drehwald, Alan Aspuru-Guzik", "tldr": "", "abstract": "Visual recognition of materials and their states is essential for understanding the physical world, from identifying wet regions on surfaces or stains on fabrics to detecting infected areas or minerals in rocks. Collecting data that captures this vast variability is complex due to the scattered and gradual nature of material states. Manually annotating real-world images is constrained by cost and precision, while synthetic data, although accurate and inexpensive, lacks real-world diversity. This work aims to bridge this gap by infusing patterns automatically extracted from real-world images into synthetic data. Hence, patterns collected from natural images are used to generate and map materials into synthetic scenes. This unsupervised approach captures the complexity of the real world while maintaining the precision and scalability of synthetic data. We also present the first comprehensive benchmark for zero-shot material state segmentation, utilizing real-world images across a diverse range of domains, including food, soils, construction, plants, liquids, and more, each appears in various states such as wet, dry, infected, cooked, burned, and many others. The annotation includes partial similarity between regions with similar but not identical materials and hard segmentation of only identical material states. This benchmark eluded top foundation models, exposing the limitations of existing data collection methods. Meanwhile, nets trained on the infused data performed significantly better on this and related tasks. The dataset, code, and trained model are publicly available. We also share 300,000 extracted textures and SVBRDF/PBR materials to facilitate future datasets generation.", "keywords": "Synthetic Data;Texture Extraction;Material Recognition;Zero-shot Segmentation;Zero-shot learning;pattern transfer", "primary_area": "", "supplementary_material": "/attachment/d57a12e224fb3cce112ff563be08cb94e1e6e1f0.zip", "author": "Sagi Eppel;Jolina Yining Li;Manuel S. Drehwald;Alan Aspuru-Guzik", "authorids": "~Sagi_Eppel1;~Jolina_Yining_Li1;~Manuel_S._Drehwald1;~Alan_Aspuru-Guzik2", "gender": "M;F;M;M", "homepage": ";;;http://matter.toronto.edu", "dblp": ";;;", "google_scholar": "https://scholar.google.co.il/citations?user=Ifl1aB0AAAAJ;;;Ag_6KEgAAAAJ", "orcid": "0000-0001-5873-8305;;;0000-0002-8277-4434", "linkedin": ";jolinayli/;manuel-drehwald/;", "or_profile": "~Sagi_Eppel1;~Jolina_Yining_Li1;~Manuel_S._Drehwald1;~Alan_Aspuru-Guzik2", "aff": ";University of Toronto;University of Toronto;University of Toronto", "aff_domain": ";utoronto.ca;utoronto.ca;utoronto.ca", "position": ";Undergrad student;MS student;Full Professor", "bibtex": "@inproceedings{\neppel2024infusing,\ntitle={Infusing Synthetic Data with Real-World Patterns for Zero-Shot Material State Segmentation},\nauthor={Sagi Eppel and Jolina Yining Li and Manuel S. Drehwald and Alan Aspuru-Guzik},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=AdpSHMOujG}\n}", "github": "", "reviewers": "LGQh;RRue;vW41;WJgU;sTfb", "pdf_size": 13681838, "rating": "5;6;6;7;7", "confidence": "3;3;3;4;4", "wc_summary_and_contributions": "63;63;76;80;51", "wc_strengths": "41;45;54;88;45", "wc_improvement": "112;38;24;2;70", "wc_limitations": "5;83;14;116;1", "wc_correctness": "11;15;15;8;1", "wc_clarity": "9;8;17;5;1", "wc_relation_to_prior_work": "9;11;11;11;1", "wc_documentation": "15;1;10;17;1", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "266;265;222;328;172", "wc_reply_reviewers": "0;16;0;17;23", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;1;0;1;1", "reply_authors": "1;1;2;2;1", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_and_contributions_avg": [ 66.6, 10.36532681587995 ], "wc_strengths_avg": [ 54.6, 17.23484841824842 ], "wc_improvement_avg": [ 49.2, 38.37916101219514 ], "wc_limitations_avg": [ 43.8, 46.850400211737785 ], "wc_correctness_avg": [ 10.0, 5.215361924162119 ], "wc_clarity_avg": [ 8.0, 5.291502622129181 ], "wc_relation_to_prior_work_avg": [ 8.6, 3.8781438859330635 ], "wc_documentation_avg": [ 8.8, 6.764613810115105 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 250.6, 51.82894943947832 ], "wc_reply_reviewers_avg": [ 11.2, 9.453041838477178 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8728715609439696, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1699047892392913713&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "email": ";utoronto.ca;utoronto.ca;utoronto.ca", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Toronto", "aff_unique_dep": "", "aff_unique_url": "https://www.utoronto.ca", "aff_unique_abbr": "U of T", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Canada" }, { "title": "Understanding Transformer Reasoning Capabilities via Graph Algorithms", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96239", "id": "AfzbDw6DSp", "proceeding": "", "pdf": "https://openreview.net/pdf?id=AfzbDw6DSp", "openreview": "https://openreview.net/forum?id=AfzbDw6DSp", "poster": "/media/PosterPDFs/NeurIPS%202024/96239.png?t=1731691365.6215372", "project": "", "author_site": "Clayton Sanford, Bahare Fatemi, Ethan Hall, Anton Tsitsulin, Mehran Kazemi, Jonathan Halcrow, Bryan Perozzi, Vahab Mirrokni", "tldr": "", "abstract": "Which transformer scaling regimes are able to perfectly solve different classes of algorithmic problems? While tremendous empirical advances have been attained by transformer-based neural networks, a theoretical understanding of their algorithmic reasoning capabilities in realistic parameter regimes is lacking. We investigate this question in terms of the network\u2019s depth, width, and number of extra tokens for algorithm execution. Our novel representational hierarchy separates 9 algorithmic reasoning problems into classes solvable by transformers in different realistic parameter scaling regimes. We prove that logarithmic depth is necessary and sufficient for tasks like graph connectivity, while single-layer transformers with small embedding dimensions can solve contextual retrieval tasks. We also support our theoretical analysis with ample empirical evidence using the GraphQA benchmark. These results show that transformers excel at many graph reasoning tasks, even outperforming specialized graph neural networks.", "keywords": "transformers;graph neural networks;representation;theory", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Clayton Sanford;Bahare Fatemi;Ethan Hall;Anton Tsitsulin;Mehran Kazemi;Jonathan Halcrow;Bryan Perozzi;Vahab Mirrokni", "authorids": "~Clayton_Sanford1;~Bahare_Fatemi1;~Ethan_Hall1;~Anton_Tsitsulin1;~Mehran_Kazemi1;~Jonathan_Halcrow1;~Bryan_Perozzi1;~Vahab_Mirrokni2", "gender": "F;M;M;M;;M;M;M", "homepage": ";;http://tsitsul.in;;http://www.perozzi.net/;https://people.csail.mit.edu/mirrokni/Welcome.html;https://claytonsanford.com/;https://mehran-k.github.io/", "dblp": ";;217/1668;;91/10813;m/VahabSMirrokni;232/1797;149/1283", "google_scholar": ";;https://scholar.google.com/citations?hl=en;2zZucy4AAAAJ;rZgbMs4AAAAJ;opbZfw0AAAAJ;Qo18yHAAAAAJ;https://scholar.google.ca/citations?user=L79ecZkAAAAJ", "orcid": ";;;;;;;", "linkedin": "bahare-fatemi-b0049179/;ethan-hall-397391b0/;atsitsulin/;;;;claytonsanford/;mehran-kazemi-64675071/", "or_profile": "~Bahare_Fatemi1;~Ethan_Hall1;~Anton_Tsitsulin1;~Jonathan_Halcrow1;~Bryan_Perozzi1;~Vahab_Mirrokni2;~Clayton_Hendrick_Sanford1;~Seyed_Mehran_Kazemi1", "aff": "Google;Google;Google;Google;Google;Google Research;Columbia University;Google", "aff_domain": "google.com;google.com;google.com;google.com;google.com;google.com;columbia.edu;google.com", "position": "Researcher;Software Engineer;Research Scientist;Researcher;Researcher;VP, Google Fellow;PhD student;Researcher", "bibtex": "@inproceedings{\nsanford2024understanding,\ntitle={Understanding Transformer Reasoning Capabilities via Graph Algorithms},\nauthor={Clayton Sanford and Bahare Fatemi and Ethan Hall and Anton Tsitsulin and Mehran Kazemi and Jonathan Halcrow and Bryan Perozzi and Vahab Mirrokni},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=AfzbDw6DSp}\n}", "github": "", "reviewers": "43vz;RdpW;CKNh", "pdf_size": 676312, "rating": "6;7;7", "confidence": "3;4;4", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "3;4;3", "wc_summary": "42;76;117", "wc_strengths": "46;119;93", "wc_weaknesses": "107;256;128", "wc_questions": "4;70;145", "wc_limitations": "5;5;71", "wc_review": "204;526;554", "wc_reply_reviewers": "0;35;40", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 78.33333333333333, 30.663043264200347 ], "wc_strengths_avg": [ 86.0, 30.21037349432586 ], "wc_weaknesses_avg": [ 163.66666666666666, 65.8499978908293 ], "wc_questions_avg": [ 73.0, 57.60208329565867 ], "wc_limitations_avg": [ 27.0, 31.11269837220809 ], "wc_review_avg": [ 428.0, 158.80386225361985 ], "wc_reply_reviewers_avg": [ 25.0, 17.795130420052185 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.9999999999999997, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=841863360941643873&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "google.com;google.com;google.com;google.com;google.com;google.com;columbia.edu;google.com", "author_num": 8, "aff_unique_index": "0;0;0;0;0;0;1;0", "aff_unique_norm": "Google;Columbia University", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://www.columbia.edu", "aff_unique_abbr": "Google;Columbia", "aff_campus_unique_index": "0;0;0;0;0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "First-Explore, then Exploit: Meta-Learning to Solve Hard Exploration-Exploitation Trade-Offs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96238", "id": "AhjTu2aiiW", "proceeding": "", "pdf": "https://openreview.net/pdf?id=AhjTu2aiiW", "openreview": "https://openreview.net/forum?id=AhjTu2aiiW", "poster": "/media/PosterPDFs/NeurIPS%202024/96238.png?t=1733538071.8890798", "project": "", "author_site": "Ben Norman, Jeff Clune", "tldr": "", "abstract": "Standard reinforcement learning (RL) agents never intelligently explore like a human (i.e. taking into account complex domain priors and adapting quickly based on previous exploration). Across episodes, RL agents struggle to perform even simple exploration strategies, for example systematic search that avoids exploring the same location multiple times. This poor exploration limits performance on challenging domains. Meta-RL is a potential solution, as unlike standard RL, meta-RL can *learn* to explore, and potentially learn highly complex strategies far beyond those of standard RL, strategies such as experimenting in early episodes to learn new skills, or conducting experiments to learn about the current environment.\nTraditional meta-RL focuses on the problem of learning to optimally balance exploration and exploitation to maximize the *cumulative reward* of the episode sequence (e.g., aiming to maximize the total wins in a tournament -- while also improving as a player).\nWe identify a new challenge with state-of-the-art cumulative-reward meta-RL methods.\nWhen optimal behavior requires exploration that sacrifices immediate reward to enable higher subsequent reward, existing state-of-the-art cumulative-reward meta-RL methods become stuck on the local optimum of failing to explore.\nOur method, First-Explore, overcomes this limitation by learning two policies: one to solely explore, and one to solely exploit. When exploring requires forgoing early-episode reward, First-Explore significantly outperforms existing cumulative meta-RL methods. By identifying and solving the previously unrecognized problem of forgoing reward in early episodes, First-Explore represents a significant step towards developing meta-RL algorithms capable of human-like exploration on a broader range of domains.", "keywords": "Meta Reinforcement Learning;Meta-RL;Reinforcement Learning;RL", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/dc5e966c829399c7418867ea4831dac3d0ca61c7.zip", "author": "Ben Norman;Jeff Clune", "authorids": "~Ben_Norman1;~Jeff_Clune3", "gender": "M;", "homepage": ";", "dblp": ";", "google_scholar": ";", "orcid": ";", "linkedin": "ben-t-norman/;", "or_profile": "~Ben_Norman1;~Jeff_Clune3", "aff": "University of British Columbia;", "aff_domain": "cs.ubc.ca;", "position": "PhD student;", "bibtex": "@inproceedings{\nnorman2024firstexplore,\ntitle={First-Explore, then Exploit: Meta-Learning to Solve Hard Exploration-Exploitation Trade-Offs},\nauthor={Ben Norman and Jeff Clune},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=AhjTu2aiiW}\n}", "github": "", "reviewers": "Vncg;zDt1;Erp8", "pdf_size": 1425157, "rating": "5;6;6", "confidence": "4;3;3", "soundness": "2;3;3", "novelty": "2;3;2", "presentation": "2;3;2", "wc_summary": "94;128;53", "wc_strengths": "62;81;36", "wc_weaknesses": "141;128;121", "wc_questions": "65;119;44", "wc_limitations": "7;12;6", "wc_review": "369;468;260", "wc_reply_reviewers": "52;106;410", "wc_reply_authors": "0;60;1337", "reply_reviewers": "1;1;3", "reply_authors": "1;2;5", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 91.66666666666667, 30.663043264200347 ], "wc_strengths_avg": [ 59.666666666666664, 18.445113776342563 ], "wc_weaknesses_avg": [ 130.0, 8.286535263104035 ], "wc_questions_avg": [ 76.0, 31.591137997862628 ], "wc_limitations_avg": [ 8.333333333333334, 2.6246692913372702 ], "wc_review_avg": [ 365.6666666666667, 84.94835032078153 ], "wc_reply_reviewers_avg": [ 189.33333333333334, 157.5845452095125 ], "wc_reply_authors_avg": [ 465.6666666666667, 616.6124300473425 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.9428090415820634 ], "reply_authors_avg": [ 2.6666666666666665, 1.699673171197595 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.9999999999999997, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4174885087623819555&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "cs.ubc.ca;", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "University of British Columbia", "aff_unique_dep": "", "aff_unique_url": "https://www.ubc.ca", "aff_unique_abbr": "UBC", "aff_country_unique_index": "0", "aff_country_unique": "Canada" }, { "title": "Learning Identifiable Factorized Causal Representations of Cellular Responses", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96237", "id": "AhlaBDHMQh", "proceeding": "", "pdf": "https://openreview.net/pdf?id=AhlaBDHMQh", "openreview": "https://openreview.net/forum?id=AhlaBDHMQh", "poster": "/media/PosterPDFs/NeurIPS%202024/96237.png?t=1731697298.1272147", "project": "", "author_site": "Haiyi Mao, Romain Lopez, Kai Liu, Jan-Christian Huetter, David Richmond, Panayiotis Benos, Lin Qiu", "tldr": "", "abstract": "The study of cells and their responses to genetic or chemical perturbations promises to accelerate the discovery of therapeutics targets. However, designing adequate and insightful models for such data is difficult because the response of a cell to perturbations essentially depends on contextual covariates (e.g., genetic background or type of the cell). There is therefore a need for models that can identify interactions between drugs and contextual covariates. This is crucial for discovering therapeutics targets, as such interactions may reveal drugs that affect certain cell types but not others.\nWe tackle this problem with a novel Factorized Causal Representation (FCR) learning method, an identifiable deep generative model that reveals causal structure in single-cell perturbation data from several cell lines. FCR learns multiple cellular representations that are disentangled, comprised of covariate-specific (Z_x), treatment-specific (Z_t) and interaction-specific (Z_tx) representations. Based on recent advances of non-linear ICA theory, we prove the component-wise identifiability of Z_tx and block-wise identifiability of Z_t and Z_x. Then, we present our implementation of FCR, and empirically demonstrate that FCR outperforms state-of-the-art baselines in various tasks across four single-cell datasets.", "keywords": "Single Cell;Computational Biology;Causality", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "", "author": "Haiyi Mao;Romain Lopez;Kai Liu;Jan-Christian Huetter;David Richmond;Panayiotis V. Benos;Lin Qiu", "authorids": "~Haiyi_Mao1;~Romain_Lopez1;~Kai_Liu7;~Jan-Christian_Huetter1;~D.L._Richmond1;~Panayiotis_V._Benos1;~Lin_Qiu1", "gender": "M;;M;M;;;M", "homepage": "https://harrymao1011.github.io/;https://romain-lopez.github.io/;;https://jchuetter.com;;;", "dblp": "188/1160;132/4587;;;;;82/4490", "google_scholar": ";https://scholar.google.fr/citations?user=Z8RR17oAAAAJ;TGWgOqQAAAAJ;UE7lJeUAAAAJ;;;iKNJLTUAAAAJ", "orcid": ";0000-0003-0495-738X;;0000-0002-1219-4821;;;", "linkedin": ";;kai-liu-5aa28914/;jan-christian-huetter;;;", "or_profile": "~Haiyi_Mao1;~Romain_Lopez1;~Kai_Liu7;~Jan-Christian_Huetter1;~D.L._Richmond1;~Panayiotis_V._Benos1;~Lin_Qiu1", "aff": "University of Pittsburgh;Stanford University;Genentech Inc. ;Genentech;;;Genentech Inc.", "aff_domain": "pitt.edu;stanford.edu;gene.com;gene.com;;;gene.com", "position": "PhD student;Postdoc;Principal Researcher;Researcher;;;Researcher", "bibtex": "@inproceedings{\nmao2024learning,\ntitle={Learning Identifiable Factorized Causal Representations of Cellular Responses},\nauthor={Haiyi Mao and Romain Lopez and Kai Liu and Jan-Christian Huetter and David Richmond and Panayiotis V. Benos and Lin Qiu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=AhlaBDHMQh}\n}", "github": "", "reviewers": "WECd;q35E;yZVk;xvG5;vnnM", "pdf_size": 5521276, "rating": "5;6;6;6;6", "confidence": "2;3;4;3;4", "soundness": "3;3;3;3;3", "novelty": "3;3;3;3;3", "presentation": "2;2;3;3;3", "wc_summary": "23;92;72;88;71", "wc_strengths": "19;57;34;63;23", "wc_weaknesses": "52;134;64;109;78", "wc_questions": "12;61;21;12;152", "wc_limitations": "28;11;1;109;52", "wc_review": "134;355;192;381;376", "wc_reply_reviewers": "17;23;29;0;81", "wc_reply_authors": "35;30;88;0;114", "reply_reviewers": "1;1;1;0;1", "reply_authors": "2;2;2;1;2", "rating_avg": [ 5.8, 0.39999999999999997 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 69.2, 24.57152823900052 ], "wc_strengths_avg": [ 39.2, 17.780888616714297 ], "wc_weaknesses_avg": [ 87.4, 30.090530071768427 ], "wc_questions_avg": [ 51.6, 53.36890480420223 ], "wc_limitations_avg": [ 40.2, 38.49883115108821 ], "wc_review_avg": [ 287.6, 103.74314435180764 ], "wc_reply_reviewers_avg": [ 30.0, 27.27636339397171 ], "wc_reply_authors_avg": [ 53.4, 41.49023981613025 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.8017837257372734, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2527189478442956703&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "pitt.edu;stanford.edu;gene.com;gene.com;;;gene.com", "author_num": 7, "aff_unique_index": "0;1;2;2;2", "aff_unique_norm": "University of Pittsburgh;Stanford University;Genentech", "aff_unique_dep": ";;", "aff_unique_url": "https://www.pitt.edu;https://www.stanford.edu;https://www.gene.com", "aff_unique_abbr": "Pitt;Stanford;Genentech", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Auditing Privacy Mechanisms via Label Inference Attacks", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96236", "id": "Ai76ATrb2y", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Ai76ATrb2y", "openreview": "https://openreview.net/forum?id=Ai76ATrb2y", "poster": "", "project": "", "author_site": "R\u00f3bert Busa-Fekete, Travis Dick, Claudio Gentile, Andres Munoz Medina, Adam Smith, Marika Swanberg", "tldr": "", "abstract": "We propose reconstruction advantage measures to audit label privatization mechanisms. A reconstruction advantage measure quantifies the increase in an attacker's ability to infer the true label of an unlabeled example when provided with a private version of the labels in a dataset (e.g., aggregate of labels from different users or noisy labels output by randomized response), compared to an attacker that only observes the feature vectors, but may have prior knowledge of the correlation between features and labels. We consider two such auditing measures: one additive, and on multiplicative. These cover previous approaches taken in the literature on empirical auditing and differential privacy. These measures allow us to place a variety of proposed privatization schemes---some differentially private, some not---on the same footing. We analyze these measures theoretically under a distributional model which, we claim, encapsulates reasonable adversarial settings. We also quantify their behavior empirically on real and simulated prediction tasks. Across a range of experimental settings, we find that differentially private schemes dominate or match the privacy-utility tradeoff of more heuristic approaches.", "keywords": "label inference;label reconstruction;differential privacy;learning from label proportions", "primary_area": "privacy", "supplementary_material": "", "author": "Robert Istvan Busa-Fekete;Travis Dick;Claudio Gentile;Andres Munoz medina;Adam Smith;Marika Swanberg", "authorids": "~Robert_Istvan_Busa-Fekete1;~Travis_Dick1;~Claudio_Gentile1;~Andres_Munoz_medina1;~Adam_Smith1;~Marika_Swanberg1", "gender": "M;M;M;;M;F", "homepage": ";https://www.cis.upenn.edu/~tbd/;https://sites.google.com/corp/view/cgentile;https://ammedina-ml.com;http://cs-people.bu.edu/ads22;https://cs-people.bu.edu/marikas/", "dblp": "69/4876;135/8679;56/5759;10/11472;04/5072;237/9667.html", "google_scholar": "UNtKl1MAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.it/citations?user=0SiZNDEAAAAJ;;fkGi-JMAAAAJ;", "orcid": ";;0000-0003-1551-2167;;;", "linkedin": ";;;;;marika-swanberg-21b21211b", "or_profile": "~Robert_Istvan_Busa-Fekete1;~Travis_Dick1;~Claudio_Gentile1;~Andres_Munoz_medina1;~Adam_Smith1;~Marika_Swanberg1", "aff": "Google Research;Google;Google;Google;Google;Google Research", "aff_domain": "google.com;google.com;google.com;google.com;google.com;google.com", "position": "Researcher;Researcher;Research Scientist;Researcher;Researcher;Intern", "bibtex": "@inproceedings{\nbusa-fekete2024auditing,\ntitle={Auditing Privacy Mechanisms via Label Inference Attacks},\nauthor={Robert Istvan Busa-Fekete and Travis Dick and Claudio Gentile and Andres Munoz medina and Adam Smith and Marika Swanberg},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Ai76ATrb2y}\n}", "github": "", "reviewers": "Us4Y;zE7q;mygV;enWp;kz5F", "pdf_size": 2336265, "rating": "3;7;8;8;9", "confidence": "4;4;3;4;4", "soundness": "2;3;4;4;4", "novelty": "2;3;4;4;3", "presentation": "2;3;3;3;4", "wc_summary": "26;90;148;449;94", "wc_strengths": "25;62;162;142;259", "wc_weaknesses": "573;92;317;71;70", "wc_questions": "1;4;50;409;13", "wc_limitations": "5;7;36;8;16", "wc_review": "630;255;713;1079;452", "wc_reply_reviewers": "2491;18;28;78;0", "wc_reply_authors": "1255;0;0;28;0", "reply_reviewers": "4;1;1;1;0", "reply_authors": "3;1;1;2;1", "rating_avg": [ 7.0, 2.0976176963403033 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.4, 0.8 ], "novelty_avg": [ 3.2, 0.7483314773547882 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 161.4, 148.9074880588616 ], "wc_strengths_avg": [ 130.0, 81.80220045940085 ], "wc_weaknesses_avg": [ 224.6, 197.48275874111135 ], "wc_questions_avg": [ 95.4, 157.7727479636455 ], "wc_limitations_avg": [ 14.4, 11.42978564978364 ], "wc_review_avg": [ 625.8, 275.9386888422861 ], "wc_reply_reviewers_avg": [ 523.0, 984.3401850986273 ], "wc_reply_authors_avg": [ 256.6, 499.31777456846055 ], "reply_reviewers_avg": [ 1.4, 1.3564659966250538 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.23836564731139806, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9582271198558803126&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 2, "email": "google.com;google.com;google.com;google.com;google.com;google.com", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google Research", "aff_unique_url": "https://research.google", "aff_unique_abbr": "Google Research", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Contrastive-Equivariant Self-Supervised Learning Improves Alignment with Primate Visual Area IT", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96235", "id": "AiMs8GPP5q", "proceeding": "", "pdf": "https://openreview.net/pdf?id=AiMs8GPP5q", "openreview": "https://openreview.net/forum?id=AiMs8GPP5q", "poster": "", "project": "", "author_site": "Thomas Yerxa, Jenelle Feather, Eero Simoncelli, SueYeon Chung", "tldr": "", "abstract": "Models trained with self-supervised learning objectives have recently matched or surpassed models trained with traditional supervised object recognition in their ability to predict neural responses of object-selective neurons in the primate visual system. A self-supervised learning objective is arguably a more biologically plausible organizing principle, as the optimization does not require a large number of labeled examples. However, typical self-supervised objectives may result in network representations that are overly invariant to changes in the input. Here, we show that a representation with structured variability to the input transformations is better aligned with known features of visual perception and neural computation. We introduce a novel framework for converting standard invariant SSL losses into \"contrastive-equivariant\" versions that encourage preserving aspects of the input transformation without supervised access to the transformation parameters. We further demonstrate that our proposed method systematically increases models' ability to predict responses in macaque inferior temporal cortex. Our results demonstrate the promise of incorporating known features of neural computation into task-optimization for building better models of visual cortex.", "keywords": "Neural Predictivity;Equivariance;Factorization;Self-Supervised Learning", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "", "author": "Thomas Edward Yerxa;Jenelle Feather;Eero P Simoncelli;SueYeon Chung", "authorids": "~Thomas_Edward_Yerxa1;~Jenelle_Feather1;~Eero_P_Simoncelli1;~SueYeon_Chung1", "gender": "M;;F;M", "homepage": ";https://jenellefeather.com;https://sites.google.com/site/sueyeonchung/;https://www.cns.nyu.edu/~eero/", "dblp": "311/8930;243/9963.html;173/5418;30/5604", "google_scholar": "n4Uu99gAAAAJ;TtTfnKIAAAAJ;h7yVv0QAAAAJ;MplR7_cAAAAJ", "orcid": ";0000-0001-9753-2393;;0000-0002-1206-527X", "linkedin": ";;;eero-simoncelli-445782123", "or_profile": "~Thomas_Edward_Yerxa1;~Jenelle_Feather1;~SueYeon_Chung1;~Eero_Peter_Simoncelli1", "aff": "New York University;Flatiron Institute;Flatiron Institute / Simons Foundation;New York University", "aff_domain": "nyu.edu;flatironinstitute.org;simonsfoundation.org;nyu.edu", "position": "PhD student;Postdoc;Principal Investigator;Full Professor", "bibtex": "@inproceedings{\nyerxa2024contrastiveequivariant,\ntitle={Contrastive-Equivariant Self-Supervised Learning Improves Alignment with Primate Visual Area {IT}},\nauthor={Thomas Edward Yerxa and Jenelle Feather and Eero P Simoncelli and SueYeon Chung},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=AiMs8GPP5q}\n}", "github": "", "reviewers": "AoJ5;Fzg3;d1rT;aTov", "pdf_size": 8390291, "rating": "6;7;8;8", "confidence": "4;3;4;4", "soundness": "3;3;3;4", "novelty": "3;3;2;4", "presentation": "2;4;3;3", "wc_summary": "91;93;122;121", "wc_strengths": "141;74;67;143", "wc_weaknesses": "227;103;365;118", "wc_questions": "6;164;109;83", "wc_limitations": "7;18;10;66", "wc_review": "472;452;673;531", "wc_reply_reviewers": "0;36;7;0", "wc_reply_authors": "0;31;0;0", "reply_reviewers": "0;1;1;0", "reply_authors": "1;2;1;1", "rating_avg": [ 7.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 106.75, 14.771171246722448 ], "wc_strengths_avg": [ 106.25, 35.84253757757673 ], "wc_weaknesses_avg": [ 203.25, 104.9342055766374 ], "wc_questions_avg": [ 90.5, 56.879258082362504 ], "wc_limitations_avg": [ 25.25, 23.8681272830526 ], "wc_review_avg": [ 532.0, 86.43205423915366 ], "wc_reply_reviewers_avg": [ 10.75, 14.85555451674558 ], "wc_reply_authors_avg": [ 7.75, 13.423393758658799 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16396315218923634872&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": "nyu.edu;flatironinstitute.org;simonsfoundation.org;nyu.edu", "author_num": 4, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "New York University;Flatiron Institute", "aff_unique_dep": ";", "aff_unique_url": "https://www.nyu.edu;https://flatironinstitute.org", "aff_unique_abbr": "NYU;Flatiron", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Equivariant Blurring Diffusion for Hierarchical Molecular Conformer Generation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96234", "id": "Aj0Zf28l6o", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Aj0Zf28l6o", "openreview": "https://openreview.net/forum?id=Aj0Zf28l6o", "poster": "/media/PosterPDFs/NeurIPS%202024/96234.png?t=1733751852.3460913", "project": "", "author_site": "Jiwoong Park, Yang Shen", "tldr": "", "abstract": "How can diffusion models process 3D geometries in a coarse-to-fine manner, akin to our multiscale view of the world?\nIn this paper, we address the question by focusing on a fundamental biochemical problem of generating 3D molecular conformers conditioned on molecular graphs in a multiscale manner. \nOur approach consists of two hierarchical stages: i) generation of coarse-grained fragment-level 3D structure from the molecular graph, and ii) generation of fine atomic details from the coarse-grained approximated structure while allowing the latter to be adjusted simultaneously.\nFor the challenging second stage, which demands preserving coarse-grained information while ensuring SE(3) equivariance, we introduce a novel generative model termed Equivariant Blurring Diffusion (EBD), which defines a forward process that moves towards the fragment-level coarse-grained structure by blurring the fine atomic details of conformers, and a reverse process that performs the opposite operation using equivariant networks.\nWe demonstrate the effectiveness of EBD by geometric and chemical comparison to state-of-the-art denoising diffusion models on a benchmark of drug-like molecules.\nAblation studies draw insights on the design of EBD by thoroughly analyzing its architecture, which includes the design of the loss function and the data corruption process.\nCodes are released at https://github.com/Shen-Lab/EBD.", "keywords": "molecular conformer generation;diffusion models;hierarchical models", "primary_area": "generative_models", "supplementary_material": "", "author": "Jiwoong Park;Yang Shen", "authorids": "~Jiwoong_Park1;~Yang_Shen4", "gender": ";", "homepage": "https://jiwoongpark92.github.io;https://shen-lab.github.io/", "dblp": ";95/5308-1.html", "google_scholar": "https://scholar.google.co.kr/citations?user=B8XAbUwAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0002-1703-7796", "linkedin": ";", "or_profile": "~Jiwoong_Park1;~Yang_Shen4", "aff": "Texas A&M University - College Station;Texas A&M University - College Station", "aff_domain": "tamu.edu;tamu.edu", "position": "Postdoc;Associate Professor", "bibtex": "@inproceedings{\npark2024equivariant,\ntitle={Equivariant Blurring Diffusion for Hierarchical Molecular Conformer Generation},\nauthor={Jiwoong Park and Yang Shen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Aj0Zf28l6o}\n}", "github": "", "reviewers": "pba2;sBnm;8MLu;78eu;UJjj", "pdf_size": 14405019, "rating": "4;5;5;7;7", "confidence": "3;4;1;4;4", "soundness": "3;2;3;3;2", "novelty": "2;2;3;3;3", "presentation": "3;3;3;3;2", "wc_summary": "73;78;57;145;75", "wc_strengths": "78;29;65;161;129", "wc_weaknesses": "75;72;1;88;140", "wc_questions": "29;251;1;281;128", "wc_limitations": "21;9;1;11;1", "wc_review": "276;439;125;686;473", "wc_reply_reviewers": "107;25;0;177;1", "wc_reply_authors": "294;18;0;451;17", "reply_reviewers": "1;1;0;2;1", "reply_authors": "2;2;1;4;2", "rating_avg": [ 5.6, 1.2 ], "confidence_avg": [ 3.2, 1.16619037896906 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 85.6, 30.578423765786226 ], "wc_strengths_avg": [ 92.4, 46.953594111633244 ], "wc_weaknesses_avg": [ 75.2, 44.4495219321873 ], "wc_questions_avg": [ 138.0, 113.10879718218207 ], "wc_limitations_avg": [ 8.6, 7.418894796396563 ], "wc_review_avg": [ 399.8, 189.55041545720758 ], "wc_reply_reviewers_avg": [ 62.0, 69.5471063380785 ], "wc_reply_authors_avg": [ 156.0, 183.72261700726995 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.2, 0.9797958971132712 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.485912657903775, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:AcWPs4VkV-cJ:scholar.google.com/&scioq=Equivariant+Blurring+Diffusion+for+Hierarchical+Molecular+Conformer+Generation&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "tamu.edu;tamu.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Texas A&M University", "aff_unique_dep": "", "aff_unique_url": "https://www.tamu.edu", "aff_unique_abbr": "TAMU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "College Station", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "AROMA: Preserving Spatial Structure for Latent PDE Modeling with Local Neural Fields", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96233", "id": "Aj8RKCGwjE", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Aj8RKCGwjE", "openreview": "https://openreview.net/forum?id=Aj8RKCGwjE", "poster": "", "project": "", "author_site": "Louis Serrano, Thomas X Wang, Etienne Le Naour, Jean-No\u00ebl Vittaut, Patrick Gallinari", "tldr": "", "abstract": "We present AROMA (Attentive Reduced Order Model with Attention), a framework designed to enhance the modeling of partial differential equations (PDEs) using local neural fields. Our flexible encoder-decoder architecture can obtain smooth latent representations of spatial physical fields from a variety of data types, including irregular-grid inputs and point clouds. This versatility eliminates the need for patching and allows efficient processing of diverse geometries. The sequential nature of our latent representation can be interpreted spatially and permits the use of a conditional transformer for modeling the temporal dynamics of PDEs. By employing a diffusion-based formulation, we achieve greater stability and enable longer rollouts compared to conventional MSE training. AROMA's superior performance in simulating 1D and 2D equations underscores the efficacy of our approach in capturing complex dynamical behaviors.", "keywords": "PDE;Neural Operator;Neural Fields;Transformer;Diffusion", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "/attachment/de8b0a1101360698ff59fcdca02d62b798462c13.zip", "author": "Louis Serrano;Thomas X Wang;Etienne Le Naour;Jean-No\u00ebl Vittaut;Patrick Gallinari", "authorids": "~Louis_Serrano1;~Thomas_X_Wang1;~Etienne_Le_Naour1;~Jean-No\u00ebl_Vittaut1;~Patrick_Gallinari2", "gender": "M;Not Specified;M;M;M", "homepage": "https://www.isir.upmc.fr/personnel/serrano/;https://thomasxwang.github.io/;;https://webia.lip6.fr/~vittaut/;", "dblp": "349/0965;;;12/3351;g/PatrickGallinari", "google_scholar": ";;;https://scholar.google.fr/citations?hl=fr;rFaxB20AAAAJ", "orcid": ";;;0000-0001-6654-4199;", "linkedin": "louis-serrano-a0596578/;;etienne-le-naour-913248156/;vittaut/;", "or_profile": "~Louis_Serrano1;~Thomas_X_Wang1;~Etienne_Le_Naour1;~Jean-No\u00ebl_Vittaut1;~patrick_gallinari1", "aff": "Universit\u00e9 Pierre et Marie Curie - Paris 6, Sorbonne Universit\u00e9 - Facult\u00e9 des Sciences (Paris VI);Sorbonne Universite;ISIR, UMR 7222;Sorbonne Universit\u00e9 - Facult\u00e9 des Sciences (Paris VI);Sorbonne Universite", "aff_domain": "isir.upmc.fr;sorbonne-universite.fr;sorbonne-universite.fr;sorbonne-universite.fr;sorbonne-universite.fr", "position": "PhD student;PhD student;PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nserrano2024aroma,\ntitle={{AROMA}: Preserving Spatial Structure for Latent {PDE} Modeling with Local Neural Fields},\nauthor={Louis Serrano and Thomas X Wang and Etienne Le Naour and Jean-No{\\\"e}l Vittaut and Patrick Gallinari},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Aj8RKCGwjE}\n}", "github": "", "reviewers": "gPBs;qbYC;xV3P;rnRB", "pdf_size": 8319382, "rating": "5;6;7;9", "confidence": "5;4;4;5", "soundness": "3;2;3;4", "novelty": "3;2;2;4", "presentation": "3;3;3;4", "wc_summary": "71;63;54;72", "wc_strengths": "86;47;74;1", "wc_weaknesses": "56;164;143;1", "wc_questions": "46;101;157;78", "wc_limitations": "39;8;10;1", "wc_review": "298;383;438;153", "wc_reply_reviewers": "6;109;25;0", "wc_reply_authors": "0;364;20;0", "reply_reviewers": "1;2;1;0", "reply_authors": "1;3;2;1", "rating_avg": [ 6.75, 1.479019945774904 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 65.0, 7.245688373094719 ], "wc_strengths_avg": [ 52.0, 32.657311585615865 ], "wc_weaknesses_avg": [ 91.0, 65.87488140406782 ], "wc_questions_avg": [ 95.5, 40.52468383590426 ], "wc_limitations_avg": [ 14.5, 14.534441853748634 ], "wc_review_avg": [ 318.0, 107.52906583803284 ], "wc_reply_reviewers_avg": [ 35.0, 43.70926675202868 ], "wc_reply_authors_avg": [ 96.0, 154.94515158597252 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.16903085094570333, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15786360764008434686&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "isir.upmc.fr;sorbonne-universite.fr;sorbonne-universite.fr;sorbonne-universite.fr;sorbonne-universite.fr", "author_num": 5, "aff_unique_index": "0;1;2;3;1", "aff_unique_norm": "Universit\u00e9 Pierre et Marie Curie - Paris 6;Sorbonne University;Institut des Sciences de l'Ing\u00e9nierie de Robotique;Sorbonne Universit\u00e9", "aff_unique_dep": "Facult\u00e9 des Sciences;;UMR 7222;Facult\u00e9 des Sciences", "aff_unique_url": "https://www.upmc.fr;https://www.sorbonne-universite.fr;https://www.isir.upmc.fr;https://www.sorbonne-universite.fr", "aff_unique_abbr": "UPMC;Sorbonne;ISIR;Sorbonne U", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Paris;;Paris VI", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "France" }, { "title": "On the Minimax Regret for Contextual Linear Bandits and Multi-Armed Bandits with Expert Advice", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96232", "id": "AkiPax5SXu", "proceeding": "", "pdf": "https://openreview.net/pdf?id=AkiPax5SXu", "openreview": "https://openreview.net/forum?id=AkiPax5SXu", "poster": "", "project": "", "tldr": "", "abstract": "This paper examines two extensions of multi-armed bandit problems: multi-armed bandits with expert advice and contextual linear bandits.\n For the former problem, multi-armed bandits with expert advice, the previously known best upper and lower bounds have been $O(\\sqrt{KT \\log \\frac{N}{K} })$ and $\\Omega( \\sqrt{KT \\frac{ \\log N }{\\log K }} )$, respectively.\n Here, $K$, $N$, and $T$ represent the numbers of arms, experts, and rounds, respectively.\n We provide a lower bound of $\\Omega( \\sqrt{KT \\log \\frac{N}{K}} )$ for the setup in which the player chooses an expert before observing the advices in each round.\n For the latter problem, contextual linear bandits, we provide an algorithm that achieves $O ( \\sqrt{d T \\log ( K \\min\\{ 1, \\frac{S}{d} \\} )} )$ together with a matching lower bound, where $d$ and $S$ represent the dimensionality of feature vectors and the size of the context space, respectively.", "keywords": "contextual linear bandit;multi-armed bandit with expert advice", "primary_area": "bandits", "supplementary_material": "", "author": "Shinji Ito", "authorids": "~Shinji_Ito1", "gender": "M", "homepage": "https://researchmap.jp/shinji_ito?lang=en", "dblp": "49/852", "google_scholar": "https://scholar.google.co.jp/citations?user=GX0V06wAAAAJ", "orcid": "", "linkedin": "", "or_profile": "~Shinji_Ito1", "aff": "NEC", "aff_domain": "nec.com", "position": "Principal Researcher", "bibtex": "@inproceedings{\nito2024on,\ntitle={On the Minimax Regret for Contextual Linear Bandits and Multi-Armed Bandits with Expert Advice},\nauthor={Shinji Ito},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=AkiPax5SXu}\n}", "github": "", "reviewers": "nf4R;sKYc;FSFk;aY6j;PXkJ", "pdf_size": 315152, "rating": "6;6;6;6;7", "confidence": "3;2;3;3;3", "soundness": "3;3;3;3;3", "novelty": "3;3;3;3;4", "presentation": "3;2;2;2;3", "wc_summary": "91;39;167;69;171", "wc_strengths": "100;19;217;50;120", "wc_weaknesses": "104;96;404;113;102", "wc_questions": "28;8;237;50;2", "wc_limitations": "4;1;1;11;14", "wc_review": "327;163;1026;293;409", "wc_reply_reviewers": "0;200;38;75;0", "wc_reply_authors": "0;685;0;62;0", "reply_reviewers": "0;4;1;1;0", "reply_authors": "1;3;1;2;1", "rating_avg": [ 6.2, 0.39999999999999997 ], "confidence_avg": [ 2.8, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 107.4, 52.95129837879332 ], "wc_strengths_avg": [ 101.2, 68.03352114950394 ], "wc_weaknesses_avg": [ 163.8, 120.22379132268289 ], "wc_questions_avg": [ 65.0, 87.6310447273111 ], "wc_limitations_avg": [ 6.2, 5.344155686354955 ], "wc_review_avg": [ 443.6, 301.7943670779824 ], "wc_reply_reviewers_avg": [ 62.6, 74.1285370151064 ], "wc_reply_authors_avg": [ 149.4, 268.8743944670076 ], "reply_reviewers_avg": [ 1.2, 1.4696938456699067 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.2500000000000001, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8939373413640445667&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "email": "nec.com", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "NEC Corporation", "aff_unique_dep": "", "aff_unique_url": "https://www.nec.com", "aff_unique_abbr": "NEC", "aff_country_unique_index": "0", "aff_country_unique": "Japan" }, { "title": "Simple and Fast Distillation of Diffusion Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96231", "id": "Ao0FiZqrXa", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Ao0FiZqrXa", "openreview": "https://openreview.net/forum?id=Ao0FiZqrXa", "poster": "/media/PosterPDFs/NeurIPS%202024/96231.png?t=1731477841.0540156", "project": "", "author_site": "Zhenyu Zhou, Defang Chen, Can Wang, Chun Chen, Siwei Lyu", "tldr": "", "abstract": "Diffusion-based generative models have demonstrated their powerful performance across various tasks, but this comes at a cost of the slow sampling speed. To achieve both efficient and high-quality synthesis, various distillation-based accelerated sampling methods have been developed recently. However, they generally require time-consuming fine tuning with elaborate designs to achieve satisfactory performance in a specific number of function evaluation (NFE), making them difficult to employ in practice. To address this issue, we propose **S**imple and **F**ast **D**istillation (SFD) of diffusion models, which simplifies the paradigm used in existing methods and largely shortens their fine-tuning time up to $1000\\times$. We begin with a vanilla distillation-based sampling method and boost its performance to state of the art by identifying and addressing several small yet vital factors affecting the synthesis efficiency and quality. Our method can also achieve sampling with variable NFEs using a single distilled model. Extensive experiments demonstrate that SFD strikes a good balance between the sample quality and fine-tuning costs in few-step image generation task. For example, SFD achieves 4.53 FID (NFE=2) on CIFAR-10 with only **0.64 hours** of fine-tuning on a single NVIDIA A100 GPU.", "keywords": "Diffusion models;fast distillation;fast sampling", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/59e4f9380d9a7f2bc3a08f4b6ddde02059dc5832.zip", "author": "Zhenyu Zhou;Defang Chen;Can Wang;Chun Chen;Siwei Lyu", "authorids": "~Zhenyu_Zhou6;~Defang_Chen1;~Can_Wang5;~Chun_Chen1;~Siwei_Lyu1", "gender": ";M;M;M;M", "homepage": ";https://www.researchgate.net/profile/Defang-Chen-3;https://person.zju.edu.cn/en/wangcan;https://person.zju.edu.cn/en/0082004;https://www.cse.buffalo.edu/~siweilyu", "dblp": ";236/4507-1;71/4716-1;07/4182-0001.html;51/4482", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.fr/citations?user=C63q3HoAAAAJ;;wefAEM4AAAAJ", "orcid": ";0000-0003-0833-7401;0000-0002-5890-4307;0000-0002-6198-7481;0000-0002-0992-685X", "linkedin": ";defang-chen-805b34165/;;;siwei-lyu-0806022/", "or_profile": "~Zhenyu_Zhou6;~Defang_Chen1;~Can_Wang5;~Chun_Chen1;~Siwei_Lyu1", "aff": ";Zhejiang University;Zhejiang University;Zhejiang University;State University of New York, Buffalo", "aff_domain": ";zju.edu.cn;zju.edu.cn;zju.edu.cn;buffalo.edu", "position": ";PhD student;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nzhou2024simple,\ntitle={Simple and Fast Distillation of Diffusion Models},\nauthor={Zhenyu Zhou and Defang Chen and Can Wang and Chun Chen and Siwei Lyu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Ao0FiZqrXa}\n}", "github": "", "reviewers": "RDCV;bYyR;4y5W", "pdf_size": 17528354, "rating": "6;6;8", "confidence": "3;4;4", "soundness": "3;2;4", "novelty": "3;2;4", "presentation": "3;3;4", "wc_summary": "52;34;68", "wc_strengths": "45;39;91", "wc_weaknesses": "81;175;70", "wc_questions": "37;71;76", "wc_limitations": "7;39;1", "wc_review": "222;358;306", "wc_reply_reviewers": "61;240;225", "wc_reply_authors": "43;434;105", "reply_reviewers": "1;1;2", "reply_authors": "2;4;2", "rating_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 51.333333333333336, 13.888444437333106 ], "wc_strengths_avg": [ 58.333333333333336, 23.22833518691246 ], "wc_weaknesses_avg": [ 108.66666666666667, 47.11923410054775 ], "wc_questions_avg": [ 61.333333333333336, 17.326921891156037 ], "wc_limitations_avg": [ 15.666666666666666, 16.679994670929073 ], "wc_review_avg": [ 295.3333333333333, 56.031737038535184 ], "wc_reply_reviewers_avg": [ 175.33333333333334, 81.07746638589266 ], "wc_reply_authors_avg": [ 194.0, 171.5828274235702 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 0.9428090415820634 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.49999999999999983, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1960550915367437078&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": ";zju.edu.cn;zju.edu.cn;zju.edu.cn;buffalo.edu", "author_num": 5, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Zhejiang University;State University of New York at Buffalo", "aff_unique_dep": ";", "aff_unique_url": "https://www.zju.edu.cn;https://www.buffalo.edu", "aff_unique_abbr": "ZJU;SUNY Buffalo", "aff_campus_unique_index": "1", "aff_campus_unique": ";Buffalo", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "China;United States" }, { "title": "Unsupervised Anomaly Detection in The Presence of Missing Values", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96230", "id": "AoEeBqP8AD", "proceeding": "", "pdf": "https://openreview.net/pdf?id=AoEeBqP8AD", "openreview": "https://openreview.net/forum?id=AoEeBqP8AD", "poster": "/media/PosterPDFs/NeurIPS%202024/96230.png?t=1731295667.0782187", "project": "", "author_site": "Feng Xiao, Jicong Fan", "tldr": "", "abstract": "Anomaly detection methods typically require fully observed data for model training and inference and cannot handle incomplete data, while the missing data problem is pervasive in science and engineering, leading to challenges in many important applications such as abnormal user detection in recommendation systems and novel or anomalous cell detection in bioinformatics, where the missing rates can be higher than 30\\% or even 80\\%. In this work, first, we construct and evaluate a straightforward strategy, ''impute-then-detect'', via combining state-of-the-art imputation methods with unsupervised anomaly detection methods, where the training data are composed of normal samples only. We observe that such two-stage methods frequently yield imputation bias from normal data, namely, the imputation methods are inclined to make incomplete samples ''normal\", where the fundamental reason is that the imputation models learned only on normal data and cannot generalize well to abnormal data in the inference stage. To address this challenge, we propose an end-to-end method that integrates data imputation with anomaly detection into a unified optimization problem. The proposed model learns to generate well-designed pseudo-abnormal samples to mitigate the imputation bias and ensure the discrimination ability of both the imputation and detection processes. Furthermore, we provide theoretical guarantees for the effectiveness of the proposed method, proving that the proposed method can correctly detect anomalies with high probability. Experimental results on datasets with manually constructed missing values and inherent missing values demonstrate that our proposed method effectively mitigates the imputation bias and surpasses the baseline methods significantly. The source code of our method is available at https://github.com/jicongfan/ImAD-Anomaly-Detection-With-Missing-Data.", "keywords": "missing data;anomaly detection;deep learning", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Feng Xiao;Jicong Fan", "authorids": "~Feng_Xiao3;~Jicong_Fan2", "gender": "M;M", "homepage": ";https://jicongfan.github.io/", "dblp": ";139/1570", "google_scholar": ";vdJsnhIAAAAJ", "orcid": "0000-0002-1410-4295;0000-0001-9665-0355", "linkedin": ";", "or_profile": "~Feng_Xiao3;~Jicong_Fan2", "aff": "The Chinese University of Hong Kong,Shenzhen;The Chinese University of Hong Kong, Shenzhen", "aff_domain": "cuhk.edu.cn;cuhk.edu.cn", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nxiao2024unsupervised,\ntitle={Unsupervised Anomaly Detection in The Presence of Missing Values},\nauthor={Feng Xiao and Jicong Fan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=AoEeBqP8AD}\n}", "github": "", "reviewers": "aFbK;4yRa;US97;tw7F", "pdf_size": 0, "rating": "6;6;6;8", "confidence": "4;3;3;5", "soundness": "3;3;2;4", "novelty": "3;2;3;4", "presentation": "2;2;3;3", "wc_summary": "78;90;64;163", "wc_strengths": "48;21;56;50", "wc_weaknesses": "32;8;178;31", "wc_questions": "254;1;129;40", "wc_limitations": "1;1;6;24", "wc_review": "413;121;433;308", "wc_reply_reviewers": "27;19;67;9", "wc_reply_authors": "17;18;40;7", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 98.75, 38.21894163893082 ], "wc_strengths_avg": [ 43.75, 13.460590625971804 ], "wc_weaknesses_avg": [ 62.25, 67.5143503264306 ], "wc_questions_avg": [ 106.0, 97.22911086706492 ], "wc_limitations_avg": [ 8.0, 9.460443964212251 ], "wc_review_avg": [ 318.75, 123.6494541031217 ], "wc_reply_reviewers_avg": [ 30.5, 22.017038856303998 ], "wc_reply_authors_avg": [ 20.5, 12.05197079319395 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:9C7ELOyiZekJ:scholar.google.com/&scioq=Unsupervised+Anomaly+Detection+in+The+Presence+of+Missing+Values&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": "cuhk.edu.cn;cuhk.edu.cn", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Chinese University of Hong Kong, Shenzhen;Chinese University of Hong Kong", "aff_unique_dep": ";", "aff_unique_url": "https://www.cuhk.edu.cn;https://www.cuhk.edu.cn", "aff_unique_abbr": "CUHK;CUHK", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Shenzhen", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Instance-Optimal Private Density Estimation in the Wasserstein Distance", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96229", "id": "Apq6corvfZ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Apq6corvfZ", "openreview": "https://openreview.net/forum?id=Apq6corvfZ", "poster": "", "project": "", "author_site": "Vitaly Feldman, Audra McMillan, Satchit Sivakumar, Kunal Talwar", "tldr": "", "abstract": "Estimating the density of a distribution from samples is a fundamental problem in statistics. In many practical settings, the Wasserstein distance is an appropriate error metric for density estimation. For example, when estimating population densities in a geographic region, a small Wasserstein distance means that the estimate is able to capture roughly where the population mass is. In this work we study differentially private density estimation in the Wasserstein distance. We design and analyze instance-optimal algorithms for this problem that can adapt to easy instances.\n\nFor distributions $P$ over $\\mathbb{R}$, we consider a strong notion of instance-optimality: an algorithm that uniformly achieves the instance-optimal estimation rate is competitive with an algorithm that is told that the distribution is either $P$ or $Q_P$ for some distribution $Q_P$ whose probability density function (pdf) is within a factor of 2 of the pdf of $P$. For distributions over $\\mathbb{R}^2$, we use a slightly different notion of instance optimality. We say that an algorithm is instance-optimal if it is competitive with an algorithm that is given a constant multiplicative approximation of the density of the distribution. We characterize the instance-optimal estimation rates in both these settings and show that they are uniformly achievable (up to polylogarithmic factors). Our approach for $\\mathbb{R}^2$ extends to arbitrary metric spaces as it goes via hierarchically separated trees. As a special case our results lead to instance-optimal learning in TV distance for discrete distributions.", "keywords": "Differential Privacy;Density Estimation;Instance Optimality;Wasserstein Distance", "primary_area": "privacy", "supplementary_material": "", "author": "Vitaly Feldman;Audra McMillan;Satchit Sivakumar;Kunal Talwar", "authorids": "~Vitaly_Feldman1;~Audra_McMillan1;~Satchit_Sivakumar1;~Kunal_Talwar1", "gender": "M;F;M;M", "homepage": "https://vtaly.net;https://audramarymcmillan.wixsite.com/mysite;;http://www.kunaltalwar.org", "dblp": "67/1162;179/2626;;06/3696", "google_scholar": "GqZBmfgAAAAJ;;;XD_01h8AAAAJ", "orcid": ";;;", "linkedin": ";;satchit-s-a85344114/;kunal-talwar-128a6159", "or_profile": "~Vitaly_Feldman1;~Audra_McMillan1;~Satchit_Sivakumar1;~Kunal_Talwar1", "aff": "Apple AI Research;Apple;Boston University;Apple", "aff_domain": "apple.com;apple.com;bu.edu;apple.com", "position": "Research Scientist;Researcher;PhD student;Research Scientist", "bibtex": "@inproceedings{\nfeldman2024instanceoptimal,\ntitle={Instance-Optimal Private Density Estimation in the Wasserstein Distance},\nauthor={Vitaly Feldman and Audra McMillan and Satchit Sivakumar and Kunal Talwar},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Apq6corvfZ}\n}", "github": "", "reviewers": "FtyV;WXt4;e9RQ;8M6K", "pdf_size": 736129, "rating": "3;5;7;8", "confidence": "3;3;3;4", "soundness": "3;3;3;4", "novelty": "2;2;3;4", "presentation": "2;2;4;4", "wc_summary": "265;71;126;243", "wc_strengths": "13;83;94;1", "wc_weaknesses": "59;160;11;56", "wc_questions": "1;15;54;1", "wc_limitations": "1;21;3;1", "wc_review": "339;350;288;302", "wc_reply_reviewers": "35;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 1.920286436967152 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 1.0 ], "wc_summary_avg": [ 176.25, 80.521348100985 ], "wc_strengths_avg": [ 47.75, 41.154434754956846 ], "wc_weaknesses_avg": [ 71.5, 54.51834553615874 ], "wc_questions_avg": [ 17.75, 21.695333599647643 ], "wc_limitations_avg": [ 6.5, 8.411301920630361 ], "wc_review_avg": [ 319.75, 25.537961939042827 ], "wc_reply_reviewers_avg": [ 8.75, 15.155444566227676 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.676481425202546, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3795220196602090360&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "apple.com;apple.com;bu.edu;apple.com", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Apple;Boston University", "aff_unique_dep": "Apple AI Research;", "aff_unique_url": "https://www.apple.com/research/;https://www.bu.edu", "aff_unique_abbr": "Apple AI;BU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "MVGamba: Unify 3D Content Generation as State Space Sequence Modeling", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96228", "id": "AprsVxrwXT", "proceeding": "", "pdf": "https://openreview.net/pdf?id=AprsVxrwXT", "openreview": "https://openreview.net/forum?id=AprsVxrwXT", "poster": "/media/PosterPDFs/NeurIPS%202024/96228.png?t=1731790336.6858177", "project": "", "author_site": "Xuanyu Yi, Zike Wu, Qiuhong Shen, Qingshan Xu, Pan Zhou, Joo-Hwee Lim, Shuicheng Yan, Xinchao Wang, Hanwang Zhang", "tldr": "", "abstract": "Recent 3D large reconstruction models (LRMs) can generate high-quality 3D content in sub-seconds by integrating multi-view diffusion models with scalable multi-view reconstructors. Current works further leverage 3D Gaussian Splatting as 3D representation for improved visual quality and rendering efficiency. However, we observe that existing Gaussian reconstruction models often suffer from multi-view inconsistency and blurred textures. We attribute this to the compromise of multi-view information propagation in favor of adopting powerful yet computationally intensive architectures (\\eg, Transformers). \nTo address this issue, we introduce MVGamba, a general and lightweight Gaussian reconstruction model featuring a multi-view Gaussian reconstructor based on the RNN-like State Space Model (SSM). Our Gaussian reconstructor propagates causal context containing multi-view information for cross-view self-refinement while generating a long sequence of Gaussians for fine-detail modeling with linear complexity.\nWith off-the-shelf multi-view diffusion models integrated, MVGamba unifies 3D generation tasks from a single image, sparse images, or text prompts. Extensive experiments demonstrate that MVGamba outperforms state-of-the-art baselines in all 3D content generation scenarios with approximately only $0.1\\times$ of the model size. The codes are available at \\url{https://github.com/SkyworkAI/MVGamba}.", "keywords": "3D Generation and Reconstruction;State Space Model", "primary_area": "generative_models", "supplementary_material": "/attachment/836f2031251d5559938f7200420a18aa0a39f08c.zip", "author": "Xuanyu Yi;Zike Wu;Qiuhong Shen;Qingshan Xu;Pan Zhou;Joo Hwee Lim;Shuicheng YAN;Xinchao Wang;Hanwang Zhang", "authorids": "~Xuanyu_Yi1;~Zike_Wu1;~Qiuhong_Shen1;~Qingshan_Xu1;~Pan_Zhou3;~Joo_Hwee_Lim1;~Shuicheng_YAN3;~Xinchao_Wang1;~Hanwang_Zhang3", "gender": ";M;;;;M;M;;M", "homepage": ";;;;;;https://yanshuicheng.ai/;;https://mreallab.github.io/index.html", "dblp": ";331/1483;;32/9530-1;;236/4727;y/ShuichengYan;;79/8116.html", "google_scholar": ";dQ4wdnoAAAAJ;;k96kDhsAAAAJ;;;https://scholar.google.com.hk/citations?user=DNuiPHwAAAAJ;;YG0DFyYAAAAJ", "orcid": ";;;;;;;;", "linkedin": ";;;;;;;;", "or_profile": "~Xuanyu_Yi1;~Zike_Wu1;~Qiuhong_Shen1;~Qingshan_Xu1;~Pan_Zhou3;~Joo_Hwee_Lim1;~Shuicheng_YAN3;~Xinchao_Wang1;~Hanwang_Zhang3", "aff": ";Nanyang Technological University;;Nanyang Technological University;;I2R, ASTAR;sea Group;;Nanyang Technological University", "aff_domain": ";ntu.edu.sg;;ntu.edu.sg;;i2r.a-star.edu.sg;sea.com;;ntu.edu.sg", "position": ";MS student;;Research Fellow;;Principal Researcher;Researcher;;Associate Professor", "bibtex": "@inproceedings{\nyi2024mvgamba,\ntitle={{MVG}amba: Unify 3D Content Generation as State Space Sequence Modeling},\nauthor={Xuanyu Yi and Zike Wu and Qiuhong Shen and Qingshan Xu and Pan Zhou and Joo Hwee Lim and Shuicheng YAN and Xinchao Wang and Hanwang Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=AprsVxrwXT}\n}", "github": "", "reviewers": "XU7f;5iTN;eshR", "pdf_size": 21074124, "rating": "5;6;6", "confidence": "5;4;4", "soundness": "3;3;3", "novelty": "3;3;2", "presentation": "3;3;3", "wc_summary": "90;91;68", "wc_strengths": "67;58;71", "wc_weaknesses": "221;78;115", "wc_questions": "144;3;77", "wc_limitations": "6;3;23", "wc_review": "528;233;354", "wc_reply_reviewers": "67;30;8", "wc_reply_authors": "343;148;112", "reply_reviewers": "1;1;1", "reply_authors": "3;2;3", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 83.0, 10.614455552060438 ], "wc_strengths_avg": [ 65.33333333333333, 5.436502143433363 ], "wc_weaknesses_avg": [ 138.0, 60.602530200204235 ], "wc_questions_avg": [ 74.66666666666667, 57.586649687425144 ], "wc_limitations_avg": [ 10.666666666666666, 8.806563209081938 ], "wc_review_avg": [ 371.6666666666667, 121.07940461623612 ], "wc_reply_reviewers_avg": [ 35.0, 24.34474618201362 ], "wc_reply_authors_avg": [ 201.0, 101.47906187977893 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.9999999999999998, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15419689316949085885&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": ";ntu.edu.sg;;ntu.edu.sg;;i2r.a-star.edu.sg;sea.com;;ntu.edu.sg", "author_num": 9, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Nanyang Technological University;Agency for Science, Technology and Research;Sea Group", "aff_unique_dep": ";Institute for Infocomm Research;", "aff_unique_url": "https://www.ntu.edu.sg;https://www.a-star.edu.sg;", "aff_unique_abbr": "NTU;A*STAR;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Singapore;" }, { "title": "Semi-supervised Multi-label Learning with Balanced Binary Angular Margin Loss", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96227", "id": "AqcPvWwktK", "proceeding": "", "pdf": "https://openreview.net/pdf?id=AqcPvWwktK", "openreview": "https://openreview.net/forum?id=AqcPvWwktK", "poster": "/media/PosterPDFs/NeurIPS%202024/96227.png?t=1731323281.8765635", "project": "", "author_site": "Ximing Li, Silong Liang, Changchun Li, pengfei wang, Fangming Gu", "tldr": "", "abstract": "Semi-supervised multi-label learning (SSMLL) refers to inducing classifiers using a small number of samples with multiple labels and many unlabeled samples. The prevalent solution of SSMLL involves forming pseudo-labels for unlabeled samples and inducing classifiers using both labeled and pseudo-labeled samples in a self-training manner. Unfortunately, with the commonly used binary type of loss and negative sampling, we have empirically found that learning with labeled and pseudo-labeled samples can result in the variance bias problem between the feature distributions of positive and negative samples for each label. To alleviate this problem, we aim to balance the variance bias between positive and negative samples from the perspective of the feature angle distribution for each label. Specifically, we extend the traditional binary angular margin loss to a balanced extension with feature angle distribution transformations under the Gaussian assumption, where the distributions are iteratively updated during classifier training. We also suggest an efficient prototype-based negative sampling method to maintain high-quality negative samples for each label. With this insight, we propose a novel SSMLL method, namely Semi-Supervised Multi-Label Learning with Balanced Binary Angular Margin loss (S$^2$ML$^2$-BBAM). To evaluate the effectiveness of S$^2$ML$^2$-BBAM, we compare it with existing competitors on benchmark datasets. The experimental results validate that S$^2$ML$^2$-BBAM can achieve very competitive performance.", "keywords": "Semi-supervised Multi-label Learning;Variance Bias;Balanced Binary Angular Margin Loss;Self-training", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "/attachment/4a79c77d2b7dc0898cd05d11669d96184bd8103c.zip", "author": "Ximing Li;Silong Liang;Changchun Li;pengfei wang;Fangming Gu", "authorids": "~Ximing_Li1;~Silong_Liang2;~Changchun_Li1;~pengfei_wang6;~Fangming_Gu1", "gender": "M;M;M;;M", "homepage": "https://ccst.jlu.edu.cn/info/1367/19282.htm;https://ccst.jlu.edu.cn;;;https://ccst.jlu.edu.cn/info/1211/19219.htm", "dblp": "130/1013-2;;73/7819;;06/6808", "google_scholar": "2WQ--c4AAAAJ;;https://scholar.google.com.hk/citations?user=tO6IqzAAAAAJ;;", "orcid": "0000-0001-8190-5087;;;;0000-0001-8944-3139", "linkedin": ";;;;", "or_profile": "~Ximing_Li1;~Silong_Liang2;~Changchun_Li1;~pengfei_wang6;~Fangming_Gu1", "aff": "Jilin University;Jilin University;Jilin University;;Jilin University", "aff_domain": "jlu.edu.cn;mails.jlu.edu.cn;jlu.edu.cn;;jlu.edu.cn", "position": "Full Professor;MS student;Postdoc;;Lecturer", "bibtex": "@inproceedings{\nli2024semisupervised,\ntitle={Semi-supervised Multi-label Learning with Balanced Binary Angular Margin Loss},\nauthor={Ximing Li and Silong Liang and Changchun Li and pengfei wang and Fangming Gu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=AqcPvWwktK}\n}", "github": "", "reviewers": "nsG4;ZohP;MzwP;zC26", "pdf_size": 1085336, "rating": "6;7;7;7", "confidence": "3;4;5;4", "soundness": "3;3;4;3", "novelty": "3;3;4;3", "presentation": "3;3;4;2", "wc_summary": "81;145;114;127", "wc_strengths": "29;152;158;98", "wc_weaknesses": "42;172;70;88", "wc_questions": "170;5;103;5", "wc_limitations": "40;7;7;7", "wc_review": "362;481;452;325", "wc_reply_reviewers": "22;24;15;24", "wc_reply_authors": "59;13;13;13", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 116.75, 23.39203924415313 ], "wc_strengths_avg": [ 109.25, 51.89111195570972 ], "wc_weaknesses_avg": [ 93.0, 48.46648326421054 ], "wc_questions_avg": [ 70.75, 69.88696230342252 ], "wc_limitations_avg": [ 15.25, 14.289419162443238 ], "wc_review_avg": [ 405.0, 63.70635761052424 ], "wc_reply_reviewers_avg": [ 21.25, 3.6996621467371855 ], "wc_reply_authors_avg": [ 24.5, 19.91858428704209 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:DzxrI_BdS4IJ:scholar.google.com/&scioq=Semi-supervised+Multi-label+Learning+with+Balanced+Binary+Angular+Margin+Loss&hl=en&as_sdt=0,33", "gs_version_total": 2, "email": "jlu.edu.cn;mails.jlu.edu.cn;jlu.edu.cn;;jlu.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Jilin University", "aff_unique_dep": "", "aff_unique_url": "http://www.jlu.edu.cn", "aff_unique_abbr": "JLU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "End-to-end Learnable Clustering for Intent Learning in Recommendation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96226", "id": "As91fJvY9E", "proceeding": "", "pdf": "https://openreview.net/pdf?id=As91fJvY9E", "openreview": "https://openreview.net/forum?id=As91fJvY9E", "poster": "", "project": "", "author_site": "Yue Liu, Shihao Zhu, Jun Xia, YINGWEI MA, Jian Ma, Xinwang Liu, Shengju Yu, Kejun Zhang, Wenliang Zhong", "tldr": "", "abstract": "Intent learning, which aims to learn users' intents for user understanding and item recommendation, has become a hot research spot in recent years. However, existing methods suffer from complex and cumbersome alternating optimization, limiting performance and scalability. To this end, we propose a novel intent learning method termed \\underline{ELCRec}, by unifying behavior representation learning into an \\underline{E}nd-to-end \\underline{L}earnable \\underline{C}lustering framework, for effective and efficient \\underline{Rec}ommendation. Concretely, we encode user behavior sequences and initialize the cluster centers (latent intents) as learnable neurons. Then, we design a novel learnable clustering module to separate different cluster centers, thus decoupling users' complex intents. Meanwhile, it guides the network to learn intents from behaviors by forcing behavior embeddings close to cluster centers. This allows simultaneous optimization of recommendation and clustering via mini-batch data. Moreover, we propose intent-assisted contrastive learning by using cluster centers as self-supervision signals, further enhancing mutual promotion. Both experimental results and theoretical analyses demonstrate the superiority of ELCRec from six perspectives. Compared to the runner-up, ELCRec improves NDCG@5 by 8.9\\% and reduces computational costs by 22.5\\% on the Beauty dataset. Furthermore, due to the scalability and universal applicability, we deploy this method on the industrial recommendation system with 130 million page views and achieve promising results. The codes are available on GitHub\\footnote{https://github.com/yueliu1999/ELCRec}. A collection (papers, codes, datasets) of deep group recommendation/intent learning methods is available on GitHub\\footnote{https://github.com/yueliu1999/Awesome-Deep-Group-Recommendation}.", "keywords": "Clustering Algorithm;Intent Learning;Recommendation", "primary_area": "machine_learning_for_social_sciences", "supplementary_material": "/attachment/f9cf435e76ad18baad152e027d84c998de8d3a5d.zip", "author": "Yue Liu;Shihao Zhu;Jun Xia;YINGWEI MA;Jian Ma;Xinwang Liu;Shengju Yu;Kejun Zhang;Wenliang Zhong", "authorids": "~Yue_Liu10;~Shihao_Zhu1;~Jun_Xia1;~YINGWEI_MA2;~Jian_Ma9;~Xinwang_Liu1;~Shengju_Yu1;~Kejun_Zhang1;~Wenliang_Zhong1", "gender": "M;M;M;M;M;M;;M;M", "homepage": "https://yueliu1999.github.io/;http://alexzsh.github.com;http://junxia97.github.io/;https://yingweima2022.github.io/;https://www.lamda.nju.edu.cn/maj/;https://xinwangliu.github.io/;;https://person.zju.edu.cn/zhangkejun;https://scholar.google.com/citations?user=M2n8XvQAAAAJ&hl=en", "dblp": "74/1932-8;;;337/0877;;45/6569-2.html;;18/584;81/8863.html", "google_scholar": "5tfpu3MAAAAJ;;aPKKpSYAAAAJ;https://scholar.google.com.hk/citations?user=DYtPD6oAAAAJ;;A56vWC4AAAAJ;;;M2n8XvQAAAAJ", "orcid": ";;;;;;;;0009-0006-8861-9503", "linkedin": ";;;;;;;;", "or_profile": "~Yue_Liu10;~Shihao_Zhu1;~Jun_Xia1;~YINGWEI_MA2;~Jian_Ma9;~Xinwang_Liu1;~Shengju_Yu1;~Kejun_Zhang1;~Wenliang_Zhong1", "aff": "University of Illinois, Urbana Champaign;;Westlake University, China;National University of Defense Technology;Ant Group;National University of Defense Technology;;Zhejiang University;Ant Group", "aff_domain": "uiuc.edu;;westlake.edu.cn;nudt.edu.cn;antgroup.com;nudt.edu.cn;;zju.edu.cn;antgroup.com", "position": "Intern;;PhD student;MS student;Researcher;Full Professor;;Full Professor;Researcher", "bibtex": "@inproceedings{\nliu2024endtoend,\ntitle={End-to-end Learnable Clustering for Intent Learning in Recommendation},\nauthor={Yue Liu and Shihao Zhu and Jun Xia and YINGWEI MA and Jian Ma and Xinwang Liu and Shengju Yu and Kejun Zhang and Wenliang Zhong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=As91fJvY9E}\n}", "github": "", "reviewers": "py4J;WQpX;bYUb", "pdf_size": 2965037, "rating": "4;6;7", "confidence": "3;5;5", "soundness": "3;4;4", "novelty": "3;4;4", "presentation": "3;3;3", "wc_summary": "117;52;46", "wc_strengths": "109;52;68", "wc_weaknesses": "254;59;166", "wc_questions": "90;44;79", "wc_limitations": "1;1;1", "wc_review": "571;208;360", "wc_reply_reviewers": "0;0;27", "wc_reply_authors": "1400;503;569", "reply_reviewers": "0;0;1", "reply_authors": "7;4;5", "rating_avg": [ 5.666666666666667, 1.247219128924647 ], "confidence_avg": [ 4.333333333333333, 0.9428090415820634 ], "soundness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 71.66666666666667, 32.14895885647863 ], "wc_strengths_avg": [ 76.33333333333333, 24.00462918318511 ], "wc_weaknesses_avg": [ 159.66666666666666, 79.73428093082394 ], "wc_questions_avg": [ 71.0, 19.61292091114087 ], "wc_limitations_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 379.6666666666667, 148.84518429861575 ], "wc_reply_reviewers_avg": [ 9.0, 12.727922061357855 ], "wc_reply_authors_avg": [ 824.0, 408.18378213740925 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 5.333333333333333, 1.247219128924647 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.9449111825230683, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2842792563800465051&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "uiuc.edu;;westlake.edu.cn;nudt.edu.cn;antgroup.com;nudt.edu.cn;;zju.edu.cn;antgroup.com", "author_num": 9, "aff_unique_index": "0;1;2;3;2;4;3", "aff_unique_norm": "University of Illinois Urbana-Champaign;Westlake University;National University of Defense Technology;Ant Group;Zhejiang University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://illinois.edu;https://www.westlake.edu.cn;http://www.nudt.edu.cn/;https://www.antgroup.com;https://www.zju.edu.cn", "aff_unique_abbr": "UIUC;WU;NUDT;Ant Group;ZJU", "aff_campus_unique_index": "0", "aff_campus_unique": "Urbana-Champaign;", "aff_country_unique_index": "0;1;1;1;1;1;1", "aff_country_unique": "United States;China" }, { "id": "AthZ2g6VE2", "title": "LoCoDL: Communication-Efficient Distributed Learning with Local Training and Compression", "track": "main", "status": "Reject", "tldr": "", "abstract": "In $D$istributed optimization and $L$earning, and even more in the modern framework of federated learning, communication, which is slow and costly, is critical. We introduce LoCoDL, a communication-efficient algorithm that leverages the two popular and effective techniques of $Lo$cal training, which reduces the communication frequency, and $Co$mpression, in which short bitstreams are sent instead of full-dimensional vectors of floats. LoCoDL works with a large class of unbiased compressors that includes widely-used sparsification and quantization methods. LoCoDL provably benefits from local training and compression and enjoys a doubly-accelerated communication complexity, with respect to the condition number of the functions and the model dimension, in the general heterogenous regime with strongly convex functions. This is confirmed in practice, with LoCoDL outperforming existing algorithms.", "keywords": "distributed optimization;local training;compression;communication-efficient algorithm;federated learning", "primary_area": "optimization", "supplementary_material": "/attachment/85667bedc9f6a5b9149c983ccb2dbf664b876198.zip", "author": "Laurent Condat;Arto Maranjyan;Peter Richt\u00e1rik", "authorids": "~Laurent_Condat1;~Arto_Maranjyan1;~Peter_Richt\u00e1rik1", "gender": "M;M;M", "homepage": "https://lcondat.github.io/;https://artomaranjyan.github.io/;https://richtarik.org", "dblp": "88/1335;332/0784;62/8001", "google_scholar": "PixYHyEAAAAJ;93WEFj8AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0001-7087-1002;0000-0001-8409-817X;0000-0003-4380-5848", "linkedin": "laurent-condat-40291720b/;arto-maranjyan/;richtarik/", "or_profile": "~Laurent_Condat1;~Arto_Maranjyan1;~Peter_Richtarik1", "aff": "KAUST;King Abdullah University of Science and Technology;King Abdullah University of Science and Technology (KAUST)", "aff_domain": "kaust.edu.sa;kaust.edu.sa;kaust.edu.sa", "position": "research scientist;PhD student;Full Professor", "bibtex": "@misc{\nanonymous2024locodl,\ntitle={LoCo{DL}: Communication-Efficient Distributed Learning with Local Training and Compression},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=AthZ2g6VE2}\n}", "github": "", "project": "", "reviewers": "Q4WD;89ZX;eZWQ", "site": "https://openreview.net/forum?id=AthZ2g6VE2", "pdf_size": 1446548, "rating": "3;7;7", "confidence": "4;4;3", "soundness": "2;3;3", "novelty": "1;3;3", "presentation": "2;3;3", "wc_summary": "22;41;86", "wc_strengths": "9;53;79", "wc_weaknesses": "87;102;347", "wc_questions": "8;132;2", "wc_limitations": "2;17;31", "wc_review": "128;345;545", "wc_reply_reviewers": "119;20;19", "wc_reply_authors": "295;28;48", "reply_reviewers": "1;1;1", "reply_authors": "3;2;3", "rating_avg": [ 5.666666666666667, 1.8856180831641267 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.9428090415820634 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 49.666666666666664, 26.83695627716046 ], "wc_strengths_avg": [ 47.0, 28.890598240027266 ], "wc_weaknesses_avg": [ 178.66666666666666, 119.18706119187415 ], "wc_questions_avg": [ 47.333333333333336, 59.91846311632352 ], "wc_limitations_avg": [ 16.666666666666668, 11.841546445554407 ], "wc_review_avg": [ 339.3333333333333, 170.286686371999 ], "wc_reply_reviewers_avg": [ 52.666666666666664, 46.906526435265086 ], "wc_reply_authors_avg": [ 123.66666666666667, 121.42578895037998 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10755700086628123598&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 7, "aff_unique_index": "0;0;0", "aff_unique_norm": "King Abdullah University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaust.edu.sa", "aff_unique_abbr": "KAUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Saudi Arabia" }, { "title": "$\\textit{NeuroPath}$: A Neural Pathway Transformer for Joining the Dots of Human Connectomes", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96225", "id": "AvBuK8Ezrg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=AvBuK8Ezrg", "openreview": "https://openreview.net/forum?id=AvBuK8Ezrg", "poster": "/media/PosterPDFs/NeurIPS%202024/96225.png?t=1731641204.4459186", "project": "", "author_site": "Ziquan Wei, Tingting Dan, Jiaqi Ding, Guorong Wu", "tldr": "", "abstract": "Although modern imaging technologies allow us to study connectivity between two distinct brain regions $\\textit{in-vivo}$, an in-depth understanding of how anatomical structure supports brain function and how spontaneous functional fluctuations emerge remarkable cognition is still elusive. Meanwhile, tremendous efforts have been made in the realm of machine learning to establish the nonlinear mapping between neuroimaging data and phenotypic traits. However, the absence of neuroscience insight in the current approaches poses significant challenges in understanding cognitive behavior from transient neural activities. \nTo address this challenge, we put the spotlight on the coupling mechanism of structural connectivity (SC) and functional connectivity (FC) by formulating such network neuroscience question into an expressive graph representation learning problem for high-order topology. Specifically, we introduce the concept of $\\textit{topological detour}$ to characterize how a ubiquitous instance of FC (direct link) is supported by neural pathways (detour) physically wired by SC, which forms a cyclic loop interacted by brain structure and function. In the clich\\'e of machine learning, the multi-hop detour pathway underlying SC-FC coupling allows us to devise a novel multi-head self-attention mechanism within Transformer to capture multi-modal feature representation from paired graphs of SC and FC. Taken together, we propose a biological-inspired deep model, coined as $\\textit{NeuroPath}$, to find putative connectomic feature representations from the unprecedented amount of neuroimages, which can be plugged into various downstream applications such as task recognition and disease diagnosis. \nWe have evaluated $\\textit{NeuroPath}$ on large-scale public datasets including Human Connectome Project (HCP) and UK Biobank (UKB) under different experiment settings of supervised and zero-shot learning, where the state-of-the-art performance by our $\\textit{NeuroPath}$ indicates great potential in network neuroscience.", "keywords": "Graph transformer;Brain newtork;Neural pathway;Topological detour;structure-function coupling;fMRI;DWI;Cognition classification;Alzheimer's disease diagnosis", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "", "author": "Ziquan Wei;Tingting Dan;Jiaqi Ding;Guorong Wu", "authorids": "~Ziquan_Wei1;~Tingting_Dan1;~Jiaqi_Ding1;~Guorong_Wu1", "gender": "M;F;F;M", "homepage": "https://ziquanw.com/;https://www.researchgate.net/profile/Tingting_Dan;;https://www.acmlab.org/", "dblp": "206/5669;223/8556;253/0206;03/5225-1.html", "google_scholar": "z1IYb2oAAAAJ;FMcmg0gAAAAJ;https://scholar.google.com/citations?hl=en;XVsMB2kAAAAJ", "orcid": "0000-0001-6553-4482;;0009-0005-0131-4348;0000-0002-0550-6145", "linkedin": "weiziquan142857/;;;", "or_profile": "~Ziquan_Wei1;~Tingting_Dan1;~Jiaqi_Ding1;~Guorong_Wu1", "aff": "University of North Carolina at Chapel Hill;University of North Carolina at Chapel Hill;Department of Computer Science, University of North Carolina at Chapel Hill;University of North Carolina, Chapel Hill", "aff_domain": "unc.edu;unc.edu;cs.unc.edu;unc.edu", "position": "PhD student;Postdoc;PhD student;Associate Professor", "bibtex": "@inproceedings{\nwei2024textitneuropath,\ntitle={\\${\\textbackslash}textit\\{NeuroPath\\}\\$: A Neural Pathway Transformer for Joining the Dots of Human Connectomes},\nauthor={Ziquan Wei and Tingting Dan and Jiaqi Ding and Guorong Wu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=AvBuK8Ezrg}\n}", "github": "", "reviewers": "4uWQ;g82y;6veV;Qyot", "pdf_size": 6052373, "rating": "5;6;6;6", "confidence": "4;3;3;3", "soundness": "3;3;3;3", "novelty": "2;2;2;3", "presentation": "3;2;3;3", "wc_summary": "82;111;58;57", "wc_strengths": "112;99;56;45", "wc_weaknesses": "78;126;33;36", "wc_questions": "55;173;15;13", "wc_limitations": "34;18;38;2", "wc_review": "361;527;200;153", "wc_reply_reviewers": "33;21;0;19", "wc_reply_authors": "0;11;0;11", "reply_reviewers": "1;1;0;1", "reply_authors": "1;2;1;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 77.0, 22.034064536530703 ], "wc_strengths_avg": [ 78.0, 28.151376520518493 ], "wc_weaknesses_avg": [ 68.25, 37.79136806203237 ], "wc_questions_avg": [ 64.0, 65.12296062065974 ], "wc_limitations_avg": [ 23.0, 14.247806848775006 ], "wc_review_avg": [ 310.25, 147.00233841677485 ], "wc_reply_reviewers_avg": [ 18.25, 11.818946653572814 ], "wc_reply_authors_avg": [ 5.5, 5.5 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -1.0, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "email": "unc.edu;unc.edu;cs.unc.edu;unc.edu", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of North Carolina;University of North Carolina at Chapel Hill", "aff_unique_dep": ";Department of Computer Science", "aff_unique_url": "https://www.unc.edu;https://www.unc.edu", "aff_unique_abbr": "UNC;UNC Chapel Hill", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Chapel Hill", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "NeuMA: Neural Material Adaptor for Visual Grounding of Intrinsic Dynamics", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96224", "id": "AvWB40qXZh", "proceeding": "", "pdf": "https://openreview.net/pdf?id=AvWB40qXZh", "openreview": "https://openreview.net/forum?id=AvWB40qXZh", "poster": "", "project": "", "author_site": "Junyi Cao, Shanyan Guan, Yanhao Ge, Wei Li, Xiaokang Yang, Chao Ma", "tldr": "", "abstract": "While humans effortlessly discern intrinsic dynamics and adapt to new scenarios, modern AI systems often struggle. Current methods for visual grounding of dynamics either use pure neural-network-based simulators (black box), which may violate physical laws, or traditional physical simulators (white box), which rely on expert-defined equations that may not fully capture actual dynamics. We propose the Neural Material Adaptor (NeuMA), which integrates existing physical laws with learned corrections, facilitating accurate learning of actual dynamics while maintaining the generalizability and interpretability of physical priors. Additionally, we propose Particle-GS, a particle-driven 3D Gaussian Splatting variant that bridges simulation and observed images, allowing back-propagate image gradients to optimize the simulator. Comprehensive experiments on various dynamics in terms of grounded particle accuracy, dynamic rendering quality, and generalization ability demonstrate that NeuMA can accurately capture intrinsic dynamics. Project Page: https://xjay18.github.io/projects/neuma.html.", "keywords": "Intuitive Physics;Differentiable Renderer;Neural Simulation", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "", "author": "Junyi Cao;Shanyan Guan;Yanhao Ge;Wei Li;Xiaokang Yang;Chao Ma", "authorids": "~Junyi_Cao1;~Shanyan_Guan1;~Yanhao_Ge1;~Wei_Li81;~Xiaokang_Yang1;~Chao_Ma3", "gender": "M;M;M;;M;M", "homepage": "https://xjay18.github.io/;https://github.com/syguan96;https://scholar.google.com/citations?user=h6tuBAcAAAAJ&hl=zh-CN;;https://icne.sjtu.edu.cn/info/1064/1078.htm;https://vision.sjtu.edu.cn/", "dblp": "49/1101;226/6490;238/8561;;06/3071-1.html;79/1552-4", "google_scholar": "Qer0nEoAAAAJ;m8kEVxgAAAAJ;h6tuBAcAAAAJ;;yDEavdMAAAAJ;syoPhv8AAAAJ", "orcid": ";0000-0003-0875-167X;;;0000-0003-4029-3322;", "linkedin": ";;;;;", "or_profile": "~Junyi_Cao1;~Shanyan_Guan1;~Yanhao_Ge1;~Wei_Li81;~Xiaokang_Yang1;~Chao_Ma3", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Future Imaging Area;;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;vivo.com;;sjtu.edu.cn;sjtu.edu.cn", "position": "MS student;PhD student;Researcher;;Full Professor;Full Professor", "bibtex": "@inproceedings{\ncao2024neuma,\ntitle={Neu{MA}: Neural Material Adaptor for Visual Grounding of Intrinsic Dynamics},\nauthor={Junyi Cao and Shanyan Guan and Yanhao Ge and Wei Li and Xiaokang Yang and Chao Ma},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=AvWB40qXZh}\n}", "github": "", "reviewers": "Rwta;se8S;zoZv;TxrR", "pdf_size": 4224941, "rating": "4;6;7;7", "confidence": "5;3;2;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "46;79;87;87", "wc_strengths": "102;44;28;82", "wc_weaknesses": "289;139;23;72", "wc_questions": "2;88;18;126", "wc_limitations": "1;3;23;111", "wc_review": "440;353;179;478", "wc_reply_reviewers": "0;17;0;14", "wc_reply_authors": "110;28;82;108", "reply_reviewers": "0;1;0;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 74.75, 16.917077170717167 ], "wc_strengths_avg": [ 64.0, 29.427877939124322 ], "wc_weaknesses_avg": [ 130.75, 100.21570485707318 ], "wc_questions_avg": [ 58.5, 50.64336086793609 ], "wc_limitations_avg": [ 34.5, 44.997222136483046 ], "wc_review_avg": [ 362.5, 115.2269499726518 ], "wc_reply_reviewers_avg": [ 7.75, 7.8222439235810075 ], "wc_reply_authors_avg": [ 82.0, 33.075670817082454 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.7302967433402214, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7689575450249691364&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "sjtu.edu.cn;sjtu.edu.cn;vivo.com;;sjtu.edu.cn;sjtu.edu.cn", "author_num": 6, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Shanghai Jiao Tong University;Future Imaging Area", "aff_unique_dep": ";", "aff_unique_url": "https://www.sjtu.edu.cn;", "aff_unique_abbr": "SJTU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China;" }, { "title": "GS-Blur: A 3D Scene-Based Dataset for Realistic Image Deblurring", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97820", "id": "Awu8YlEofZ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Awu8YlEofZ", "openreview": "https://openreview.net/forum?id=Awu8YlEofZ", "poster": "/media/PosterPDFs/NeurIPS%202024/97820.png?t=1732176366.0538983", "project": "", "author_site": "Dongwoo Lee, JoonKyu Park, Kyoung Mu Lee", "tldr": "", "abstract": "To train a deblurring network, an appropriate dataset with paired blurry and sharp images is essential.\nExisting datasets collect blurry images either synthetically by aggregating consecutive sharp frames or using sophisticated camera systems to capture real blur.\nHowever, these methods offer limited diversity in blur types (blur trajectories) or require extensive human effort to reconstruct large-scale datasets, failing to fully reflect real-world blur scenarios.\nTo address this, we propose GS-Blur, a dataset of synthesized realistic blurry images created using a novel approach.\nTo this end, we first reconstruct 3D scenes from multi-view images using 3D Gaussian Splatting~(3DGS), then render blurry images by moving the camera view along the randomly generated motion trajectories.\nBy adopting various camera trajectories in reconstructing our GS-Blur, our dataset contains realistic and diverse types of blur, offering a large-scale dataset that generalizes well to real-world blur.\nUsing GS-Blur with various deblurring methods, we demonstrate its ability to generalize effectively compared to previous synthetic or real blur datasets, showing significant improvements in deblurring performance.\nWe will publicly release our dataset.", "keywords": "Image Deblurring;3D Gaussian Splatting;Synthetic Dataset", "primary_area": "", "supplementary_material": "", "author": "Dongwoo Lee;JoonKyu Park;Kyoung Mu Lee", "authorids": "~Dongwoo_Lee1;~JoonKyu_Park1;~Kyoung_Mu_Lee2", "gender": "M;M;M", "homepage": ";https://jkpark0825.github.io/;https://cv.snu.ac.kr/kmlee/", "dblp": ";290/1681;17/4029", "google_scholar": ";anUxIqcAAAAJ;Hofj9kAAAAAJ", "orcid": "0000-0002-5626-8175;0000-0003-3247-6831;", "linkedin": ";;", "or_profile": "~Dongwoo_Lee1;~JoonKyu_Park1;~Kyoung_Mu_Lee1", "aff": "Seoul National University;Meta Reality Labs Research;Seoul National University", "aff_domain": "snu.ac.kr;meta.com;snu.ac.kr", "position": "PhD student;Intern;Full Professor", "bibtex": "@inproceedings{\nlee2024gsblur,\ntitle={{GS}-Blur: A 3D Scene-Based Dataset for Realistic Image Deblurring},\nauthor={Dongwoo Lee and JoonKyu Park and Kyoung Mu Lee},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=Awu8YlEofZ}\n}", "github": "", "reviewers": "h9V9;EMvE;zVqH", "pdf_size": 44455255, "rating": "6;7;10", "confidence": "4;3;4", "wc_summary_and_contributions": "73;28;73", "wc_strengths": "29;26;33", "wc_improvement": "25;25;24", "wc_limitations": "1;11;21", "wc_correctness": "1;1;1", "wc_clarity": "1;1;1", "wc_relation_to_prior_work": "1;1;1", "wc_documentation": "1;15;1", "wc_additional_feedback": "1;1;1", "wc_review": "133;109;156", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "rating_avg": [ 7.666666666666667, 1.699673171197595 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 58.0, 21.213203435596427 ], "wc_strengths_avg": [ 29.333333333333332, 2.8674417556808756 ], "wc_improvement_avg": [ 24.666666666666668, 0.4714045207910317 ], "wc_limitations_avg": [ 11.0, 8.16496580927726 ], "wc_correctness_avg": [ 1.0, 0.0 ], "wc_clarity_avg": [ 1.0, 0.0 ], "wc_relation_to_prior_work_avg": [ 1.0, 0.0 ], "wc_documentation_avg": [ 5.666666666666667, 6.599663291074443 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 132.66666666666666, 19.189117286165672 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.2773500981126145, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12742697808974626202&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "snu.ac.kr;meta.com;snu.ac.kr", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Seoul National University;Meta", "aff_unique_dep": ";Research", "aff_unique_url": "https://www.snu.ac.kr;https://www.meta.com", "aff_unique_abbr": "SNU;MRL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "South Korea;United States" }, { "title": "Cross-Care: Assessing the Healthcare Implications of Pre-training Data on Language Model Bias", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97819", "id": "AxToUp4FMU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=AxToUp4FMU", "openreview": "https://openreview.net/forum?id=AxToUp4FMU", "poster": "/media/PosterPDFs/NeurIPS%202024/97819.png?t=1731420255.0545533", "project": "", "author_site": "Shan Chen, Jack Gallifant, Mingye Gao, Pedro Moreira, Nikolaj Munch, Ajay Muthukkumar, Arvind Rajan, Jaya Kolluri, Amelia Fiske, Janna Hastings, Hugo Aerts, Brian Anthony, Leo Anthony Celi, William La Cava, Danielle Bitterman", "tldr": "", "abstract": "Large language models (LLMs) are increasingly essential in processing natural languages, yet their application is frequently compromised by biases and inaccuracies originating in their training data.\nIn this study, we introduce \\textbf{Cross-Care}, the first benchmark framework dedicated to assessing biases and real world knowledge in LLMs, specifically focusing on the representation of disease prevalence across diverse demographic groups.\nWe systematically evaluate how demographic biases embedded in pre-training corpora like $ThePile$ influence the outputs of LLMs.\nWe expose and quantify discrepancies by juxtaposing these biases against actual disease prevalences in various U.S. demographic groups.\nOur results highlight substantial misalignment between LLM representation of disease prevalence and real disease prevalence rates across demographic subgroups, indicating a pronounced risk of bias propagation and a lack of real-world grounding for medical applications of LLMs.\nFurthermore, we observe that various alignment methods minimally resolve inconsistencies in the models' representation of disease prevalence across different languages.\nFor further exploration and analysis, we make all data and a data visualization tool available at: \\url{www.crosscare.net}.", "keywords": "llm;healthcare;bias", "primary_area": "", "supplementary_material": "", "author": "Shan Chen;Jack Gallifant;Mingye Gao;Pedro Jos\u00e9 Ferreira Moreira;Nikolaj Munch;Ajay Muthukkumar;Arvind Rajan;Jaya Kolluri;Amelia Fiske;Janna Hastings;Hugo Aerts;Brian W. Anthony;Leo Anthony Celi;William La Cava;Danielle Bitterman", "authorids": "~Shan_Chen1;~Jack_Gallifant1;~Mingye_Gao1;~Pedro_Jos\u00e9_Ferreira_Moreira1;~Nikolaj_Munch1;~Ajay_Muthukkumar1;~Arvind_Rajan1;~Jaya_Kolluri1;~Amelia_Fiske2;~Janna_Hastings1;~Hugo_Aerts2;~Brian_W._Anthony1;~Leo_Anthony_Celi1;~William_La_Cava1;~Danielle_Bitterman1", "gender": "M;M;F;M;M;M;M;F;F;;Not Specified;M;M;M;F", "homepage": "https://shanchen.dev;https://www.jackgallifant.com/;https://onelab.mit.edu/people;;;;;;http://ameliamfiske.com;;https://aim.hms.harvard.edu/team/hugo-aerts;https://devicerealization.mit.edu;;https://cavalab.org/;https://aim.hms.harvard.edu/", "dblp": ";366/7591;;;;;;;;;;;;148/0921;281/1619", "google_scholar": ";SlLz8KoAAAAJ;;;;;;;;https://scholar.google.com/citations?hl=en;v7G4QvIAAAAJ;;kssA7YwAAAAJ;iZB7inEAAAAJ;aCFYAEsAAAAJ", "orcid": ";0000-0003-1306-2334;;;;;0000-0003-4833-845X;;;0000-0002-3469-4923;0000-0002-2122-2003;;0000-0001-6712-6626;0000-0002-1332-2960;", "linkedin": ";jackgallifant/;;pjfmoreira/;nikolajmunch;ajay-muthukkumar-2a3094202/;;jayakolluri/;;;;;leo-anthony-celi-b25131/;williamlacava/;", "or_profile": "~Shan_Chen1;~Jack_Gallifant1;~Mingye_Gao1;~Pedro_Jos\u00e9_Ferreira_Moreira1;~Nikolaj_Munch1;~Ajay_Muthukkumar1;~Arvind_Rajan1;~Jaya_Kolluri1;~Amelia_Fiske2;~Janna_Hastings1;~Hugo_Aerts2;~Brian_W._Anthony1;~Leo_Anthony_Celi1;~William_La_Cava1;~Danielle_Bitterman1", "aff": "Maastricht University ;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Universitat Pompeu Fabra;Aarhus University;University of North Carolina at Chapel Hill;University of North Carolina at Chapel Hill;Mass General Brigham;Technische Universit\u00e4t M\u00fcnchen;University of Zurich;;Massachusetts Institute of Technology;Beth Israel Deaconess Medical Center;Boston Children's Hospital;Harvard University", "aff_domain": "maastrichtuniversity.nl;mit.edu;mit.edu;upf.edu;au.dk;unc.edu;unc.edu;massgeneralbrigham.org;tum.de;uzh.ch;;mit.edu;bidmc.harvard.edu;childrens.harvard.edu;harvard.edu", "position": "PhD student;Postdoc;PhD student;MS student;MS student;MS student;Researcher;Intern;Postdoc;Assistant Professor;;Principal Researcher;Physician;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nchen2024crosscare,\ntitle={Cross-Care: Assessing the Healthcare Implications of Pre-training Data on Language Model Bias},\nauthor={Shan Chen and Jack Gallifant and Mingye Gao and Pedro Jos{\\'e} Ferreira Moreira and Nikolaj Munch and Ajay Muthukkumar and Arvind Rajan and Jaya Kolluri and Amelia Fiske and Janna Hastings and Hugo Aerts and Brian W. Anthony and Leo Anthony Celi and William La Cava and Danielle Bitterman},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=AxToUp4FMU}\n}", "github": "", "reviewers": "eTqX;dKg5;3CK8", "pdf_size": 4330798, "rating": "6;7;8", "confidence": "4;3;4", "wc_summary_and_contributions": "106;99;78", "wc_strengths": "93;85;73", "wc_improvement": "118;6;42", "wc_limitations": "74;1;8", "wc_correctness": "4;1;17", "wc_clarity": "2;16;117", "wc_relation_to_prior_work": "4;1;2", "wc_documentation": "2;1;1", "wc_additional_feedback": "1;1;1", "wc_review": "404;211;339", "wc_reply_reviewers": "87;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;0;0", "reply_authors": "2;1;1", "rating_avg": [ 7.0, 0.816496580927726 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 94.33333333333333, 11.897712198383164 ], "wc_strengths_avg": [ 83.66666666666667, 8.219218670625303 ], "wc_improvement_avg": [ 55.333333333333336, 46.68571040003092 ], "wc_limitations_avg": [ 27.666666666666668, 32.8870119584549 ], "wc_correctness_avg": [ 7.333333333333333, 6.944222218666553 ], "wc_clarity_avg": [ 45.0, 51.23150072627842 ], "wc_relation_to_prior_work_avg": [ 2.3333333333333335, 1.247219128924647 ], "wc_documentation_avg": [ 1.3333333333333333, 0.4714045207910317 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 318.0, 80.17896648539858 ], "wc_reply_reviewers_avg": [ 29.0, 41.012193308819754 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 15, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17414268944667691100&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "maastrichtuniversity.nl;mit.edu;mit.edu;upf.edu;au.dk;unc.edu;unc.edu;massgeneralbrigham.org;tum.de;uzh.ch;;mit.edu;bidmc.harvard.edu;childrens.harvard.edu;harvard.edu", "author_num": 15, "aff_unique_index": "0;1;1;2;3;4;4;5;6;7;1;8;9;10", "aff_unique_norm": "Maastricht University;Massachusetts Institute of Technology;Universitat Pompeu Fabra;Aarhus University;University of North Carolina;Massachusetts General Hospital;Technische Universit\u00e4t M\u00fcnchen;University of Zurich;Beth Israel Deaconess Medical Center;Boston Children's Hospital;Harvard University", "aff_unique_dep": ";;;;;;;;;;", "aff_unique_url": "https://www.maastrichtuniversity.nl;https://web.mit.edu;https://www.upf.edu/;https://au.dk;https://www.unc.edu;https://www.massgeneralbrigham.org;https://www.tum.de;https://www.unizh.ch;https://www.bethisraeldeaconess.org;https://www.childrenshospital.org;https://www.harvard.edu", "aff_unique_abbr": "MU;MIT;UPF;AU;UNC;MGH;TUM;UZH;BIDMC;BCH;Harvard", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Chapel Hill", "aff_country_unique_index": "0;1;1;2;3;1;1;1;4;5;1;1;1;1", "aff_country_unique": "Netherlands;United States;Spain;Denmark;Germany;Switzerland" }, { "title": "MVSplat360: Feed-Forward 360 Scene Synthesis from Sparse Views", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96223", "id": "B0OWOkMwhz", "proceeding": "", "pdf": "https://openreview.net/pdf?id=B0OWOkMwhz", "openreview": "https://openreview.net/forum?id=B0OWOkMwhz", "poster": "/media/PosterPDFs/NeurIPS%202024/96223.png?t=1731739883.9387507", "project": "", "author_site": "Yuedong Chen, Chuanxia Zheng, Haofei Xu, Bohan Zhuang, Andrea Vedaldi, Tat-Jen Cham, Jianfei Cai", "tldr": "", "abstract": "We introduce MVSplat360, a feed-forward approach for 360\u00b0 novel view synthesis (NVS) of diverse real-world scenes, using only sparse observations. This setting is inherently ill-posed due to minimal overlap among input views and insufficient visual information provided, making it challenging for conventional methods to achieve high-quality results. Our MVSplat360 addresses this by effectively combining geometry-aware 3D reconstruction with temporally consistent video generation. Specifically, it refactors a feed-forward 3D Gaussian Splatting (3DGS) model to render features directly into the latent space of a pre-trained Stable Video Diffusion (SVD) model, where these features then act as pose and visual cues to guide the denoising process and produce photorealistic 3D-consistent views. Our model is end-to-end trainable and supports rendering arbitrary views with as few as 5 sparse input views. To evaluate MVSplat360's performance, we introduce a new benchmark using the challenging DL3DV-10K dataset, where MVSplat360 achieves superior visual quality compared to state-of-the-art methods on wide-sweeping or even 360\u00b0 NVS tasks. Experiments on the existing benchmark RealEstate10K also confirm the effectiveness of our model. Readers are highly recommended to view the video results at [donydchen.github.io/mvsplat360](https://donydchen.github.io/mvsplat360).", "keywords": "novel view synthesis;feed-forward 3DGS;3D gaussians splatting;latent video diffusion model", "primary_area": "generative_models", "supplementary_material": "", "author": "Yuedong Chen;Chuanxia Zheng;Haofei Xu;Bohan Zhuang;Andrea Vedaldi;Tat-Jen Cham;Jianfei Cai", "authorids": "~Yuedong_Chen1;~Chuanxia_Zheng1;~Haofei_Xu1;~Bohan_Zhuang1;~Andrea_Vedaldi1;~Tat-Jen_Cham1;~Jianfei_Cai1", "gender": "M;M;M;M;M;M;M", "homepage": "https://donydchen.github.io;http://www.chuanxiaz.com/;https://haofeixu.github.io/;https://bohanzhuang.github.io/;https://www.robots.ox.ac.uk/~vedaldi/;https://personal.ntu.edu.sg/astjcham/;https://jianfei-cai.github.io/", "dblp": "236/6258.html;195/8988;236/6248;145/1096;99/2825;29/3808;83/6096", "google_scholar": "https://scholar.google.com.sg/citations?user=GqgGZlQAAAAJ;mvpE6bIAAAAJ;https://scholar.google.com/citations?;https://scholar.google.com.au/citations?user=DFuDBBwAAAAJ;bRT7t28AAAAJ;Lx3X7W0AAAAJ;https://scholar.google.com.tw/citations?user=N6czCoUAAAAJ", "orcid": "0000-0003-0943-1512;;;;0000-0003-1374-2858;0000-0001-5264-2572;", "linkedin": "donydchen;chuanxia-zheng-80a3b8110/;;bohan-zhuang/;;tatjencham/;", "or_profile": "~Yuedong_Chen1;~Chuanxia_Zheng1;~Haofei_Xu1;~Bohan_Zhuang1;~Andrea_Vedaldi1;~Tat-Jen_Cham1;~Jianfei_Cai1", "aff": "Monash University;University of Oxford;Department of Computer Science, ETHZ - ETH Zurich;Monash University;Meta;Nanyang Technological University;Monash University", "aff_domain": "monash.edu;ox.ac.uk;inf.ethz.ch;monash.edu;meta.com;ntu.edu.sg;monash.edu", "position": "PhD student;Postdoc;PhD student;Assistant Professor;Researcher;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nchen2024mvsplat,\ntitle={{MVS}plat360: Feed-Forward 360 Scene Synthesis from Sparse Views},\nauthor={Yuedong Chen and Chuanxia Zheng and Haofei Xu and Bohan Zhuang and Andrea Vedaldi and Tat-Jen Cham and Jianfei Cai},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=B0OWOkMwhz}\n}", "github": "", "reviewers": "3TXW;pb1U;RhQM;4RiL", "pdf_size": 10078810, "rating": "5;5;5;5", "confidence": "4;4;3;5", "soundness": "2;2;3;3", "novelty": "2;3;2;2", "presentation": "3;3;3;2", "wc_summary": "100;79;27;101", "wc_strengths": "35;117;22;280", "wc_weaknesses": "232;65;19;655", "wc_questions": "231;161;15;124", "wc_limitations": "6;18;6;10", "wc_review": "604;440;89;1170", "wc_reply_reviewers": "71;0;0;666", "wc_reply_authors": "348;30;30;994", "reply_reviewers": "1;0;0;2", "reply_authors": "3;2;2;5", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 76.75, 30.036436206714004 ], "wc_strengths_avg": [ 113.5, 102.79713031014047 ], "wc_weaknesses_avg": [ 242.75, 250.86089272742373 ], "wc_questions_avg": [ 132.75, 78.09089255476594 ], "wc_limitations_avg": [ 10.0, 4.898979485566356 ], "wc_review_avg": [ 575.75, 390.28347069790186 ], "wc_reply_reviewers_avg": [ 184.25, 279.6447523197959 ], "wc_reply_authors_avg": [ 350.5, 393.5539988362461 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 3.0, 1.224744871391589 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17181784113436480838&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "monash.edu;ox.ac.uk;inf.ethz.ch;monash.edu;meta.com;ntu.edu.sg;monash.edu", "author_num": 7, "aff_unique_index": "0;1;2;0;3;4;0", "aff_unique_norm": "Monash University;University of Oxford;ETH Zurich;Meta;Nanyang Technological University", "aff_unique_dep": ";;Department of Computer Science;Meta Platforms, Inc.;", "aff_unique_url": "https://www.monash.edu;https://www.ox.ac.uk;https://www.ethz.ch;https://meta.com;https://www.ntu.edu.sg", "aff_unique_abbr": "Monash;Oxford;ETHZ;Meta;NTU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Zurich", "aff_country_unique_index": "0;1;2;0;3;4;0", "aff_country_unique": "Australia;United Kingdom;Switzerland;United States;Singapore" }, { "title": "Learning from Teaching Regularization: Generalizable Correlations Should be Easy to Imitate", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96222", "id": "B1FOes6cyq", "proceeding": "", "pdf": "https://openreview.net/pdf?id=B1FOes6cyq", "openreview": "https://openreview.net/forum?id=B1FOes6cyq", "poster": "/media/PosterPDFs/NeurIPS%202024/96222.png?t=1731485364.8108964", "project": "", "author_site": "Can Jin, Tong Che, Hongwu Peng, Yiyuan Li, Dimitris Metaxas, Marco Pavone", "tldr": "", "abstract": "Generalization remains a central challenge in machine learning. In this work, we propose *Learning from Teaching* (**LoT**), a novel regularization technique for deep neural networks to enhance generalization. Inspired by the human ability to capture concise and abstract patterns, we hypothesize that generalizable correlations are expected to be easier to imitate. LoT operationalizes this concept to improve the generalization of the main model with auxiliary student learners. The student learners are trained by the main model and, in turn, provide feedback to help the main model capture more generalizable and imitable correlations. Our experimental results across several domains, including Computer Vision, Natural Language Processing, and methodologies like Reinforcement Learning, demonstrate that the introduction of LoT brings significant benefits compared to training models on the original dataset. The results suggest the effectiveness and efficiency of LoT in identifying generalizable information at the right scales while discarding spurious data correlations, thus making LoT a valuable addition to current machine learning. Code is available at https://github.com/jincan333/LoT.", "keywords": "Generalization;Regularization;Learning from Teaching", "primary_area": "optimization_for_deep_networks", "supplementary_material": "/attachment/087ee19a9e5e261e289b194dce91143250431eeb.zip", "author": "Can Jin;Tong Che;Hongwu Peng;Yiyuan Li;Dimitris N. Metaxas;Marco Pavone", "authorids": "~Can_Jin1;~Tong_Che1;~Hongwu_Peng1;~Yiyuan_Li1;~Dimitris_N._Metaxas1;~Marco_Pavone1", "gender": "M;M;M;;M;M", "homepage": "https://jincan333.github.io/;;https://harveyp123.github.io/;https://nativeatom.github.io/;https://web.stanford.edu/~pavone/;https://www.cs.rutgers.edu/~dnm/", "dblp": ";125/0738;292/5365;14/5062;91/3382-1.html;m/DNMetaxas", "google_scholar": "RK-8dz0AAAAJ;7b5tlJkAAAAJ;9P2qtQoAAAAJ;XdQcrwUAAAAJ;RhOpyXcAAAAJ;https://scholar.google.com.tw/citations?user=a7VNhCIAAAAJ", "orcid": "0009-0007-3407-1658;;;;;", "linkedin": ";;hongwu-peng-374893119/;;;dimitris-metaxas-1bb74914/", "or_profile": "~Can_Jin1;~Tong_Che1;~Hongwu_Peng1;~Yiyuan_Li1;~Marco_Pavone1;~Dimitris_Metaxas1", "aff": "Rutgers University;NVIDIA;University of Connecticut;Carnegie Mellon University;Stanford University;Rutgers University", "aff_domain": "rutgers.edu;nvidia.com;uconn.edu;cmu.edu;stanford.edu;cs.rutgers.edu", "position": "PhD student;Researcher;PhD student;MS student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\njin2024learning,\ntitle={Learning from Teaching Regularization: Generalizable Correlations Should be Easy to Imitate},\nauthor={Can Jin and Tong Che and Hongwu Peng and Yiyuan Li and Dimitris N. Metaxas and Marco Pavone},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=B1FOes6cyq}\n}", "github": "", "reviewers": "NPF4;DocJ;JrP1", "pdf_size": 1137423, "rating": "5;6;6", "confidence": "3;3;5", "soundness": "2;3;3", "novelty": "3;2;2", "presentation": "2;3;4", "wc_summary": "68;48;76", "wc_strengths": "63;37;163", "wc_weaknesses": "289;12;177", "wc_questions": "4;180;106", "wc_limitations": "1;59;8", "wc_review": "425;336;530", "wc_reply_reviewers": "67;30;17", "wc_reply_authors": "286;45;50", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 64.0, 11.775681155103795 ], "wc_strengths_avg": [ 87.66666666666667, 54.31594813884981 ], "wc_weaknesses_avg": [ 159.33333333333334, 113.77267783708979 ], "wc_questions_avg": [ 96.66666666666667, 72.15415595946101 ], "wc_limitations_avg": [ 22.666666666666668, 25.849994627121728 ], "wc_review_avg": [ 430.3333333333333, 79.28990407920095 ], "wc_reply_reviewers_avg": [ 38.0, 21.18175315375634 ], "wc_reply_authors_avg": [ 127.0, 112.44850673382314 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 46, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8287598123145315889&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 7, "email": "rutgers.edu;nvidia.com;uconn.edu;cmu.edu;stanford.edu;cs.rutgers.edu", "author_num": 6, "aff_unique_index": "0;1;2;3;4;0", "aff_unique_norm": "Rutgers University;NVIDIA;University of Connecticut;Carnegie Mellon University;Stanford University", "aff_unique_dep": ";NVIDIA Corporation;;;", "aff_unique_url": "https://www.rutgers.edu;https://www.nvidia.com;https://www.uconn.edu;https://www.cmu.edu;https://www.stanford.edu", "aff_unique_abbr": "Rutgers;NVIDIA;UConn;CMU;Stanford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "DeformableTST: Transformer for Time Series Forecasting without Over-reliance on Patching", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96221", "id": "B1Iq1EOiVU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=B1Iq1EOiVU", "openreview": "https://openreview.net/forum?id=B1Iq1EOiVU", "poster": "/media/PosterPDFs/NeurIPS%202024/96221.png?t=1732638651.3609042", "project": "", "author_site": "Donghao Luo, Xue Wang", "tldr": "", "abstract": "With the proposal of patching technique in time series forecasting, Transformerbased models have achieved compelling performance and gained great interest from\nthe time series community. But at the same time, we observe a new problem that\nthe recent Transformer-based models are overly reliant on patching to achieve ideal\nperformance, which limits their applicability to some forecasting tasks unsuitable\nfor patching. In this paper, we intent to handle this emerging issue. Through diving\ninto the relationship between patching and full attention (the core mechanism\nin Transformer-based models), we further find out the reason behind this issue\nis that full attention relies overly on the guidance of patching to focus on the\nimportant time points and learn non-trivial temporal representation. Based on this\nfinding, we propose DeformableTST as an effective solution to this emerging\nissue. Specifically, we propose deformable attention, a sparse attention mechanism\nthat can better focus on the important time points by itself, to get rid of the need of\npatching. And we also adopt a hierarchical structure to alleviate the efficiency issue\ncaused by the removal of patching. Experimentally, our DeformableTST achieves\nthe consistent state-of-the-art performance in a broader range of time series tasks,\nespecially achieving promising performance in forecasting tasks unsuitable for\npatching, therefore successfully reducing the reliance on patching and broadening\nthe applicability of Transformer-based models. Code is available at this repository:\nhttps://github.com/luodhhh/DeformableTST.", "keywords": "Time series forecasting;Transformer;Deep learning", "primary_area": "other", "supplementary_material": "", "author": "Donghao Luo;Xue Wang", "authorids": "~Donghao_Luo2;~Xue_Wang10", "gender": "M;", "homepage": "https://scholar.google.com.hk/citations?hl=zh-CN&user=N9buLPcAAAAJ&view_op=list_works&gmla=AJsN-F5ymBP7m0S1jkq8tAOUf3_7FncTQ1vRGOiRrSsiiD0S1sGUud1O0YBQl8nx_uRy_AH2Nxn5eZB-fNwniib0RNHhOlB6Z-eEwpdIhUJPXP8Xy_QcJDpqw6jDUm4u2kI4XiN3lMcsppA17bCST12QfhdpAL19OQ;http://faculty.dpi.tsinghua.edu.cn/wangxue.html", "dblp": ";39/2811-1", "google_scholar": "https://scholar.google.com.hk/citations?hl=zh-CN;", "orcid": ";0000-0003-4842-3160", "linkedin": ";", "or_profile": "~Luo_donghao2;~wang_xue3", "aff": "Department of Precision Instrument, Tsinghua University, Tsinghua University;Department of Precision Instrument, Tsinghua University, Tsinghua University", "aff_domain": "mails.tsinghua.edu.cn;mail.tsinghua.edu.cn", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nluo2024deformabletst,\ntitle={Deformable{TST}: Transformer for Time Series Forecasting without Over-reliance on Patching},\nauthor={Donghao Luo and Xue Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=B1Iq1EOiVU}\n}", "github": "", "reviewers": "fiTw;eY9h;Qoam;reZp", "pdf_size": 36536607, "rating": "5;6;7;7", "confidence": "4;4;5;4", "soundness": "2;3;3;3", "novelty": "3;2;3;3", "presentation": "3;3;3;3", "wc_summary": "56;44;79;59", "wc_strengths": "138;47;90;69", "wc_weaknesses": "281;109;22;28", "wc_questions": "211;19;49;401", "wc_limitations": "57;2;49;6", "wc_review": "743;221;289;563", "wc_reply_reviewers": "23;16;9;22", "wc_reply_authors": "70;48;44;827", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;3", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 59.5, 12.579745625409124 ], "wc_strengths_avg": [ 86.0, 33.65263734092768 ], "wc_weaknesses_avg": [ 110.0, 104.53468323958322 ], "wc_questions_avg": [ 170.0, 152.05591076969023 ], "wc_limitations_avg": [ 28.5, 24.70323865407125 ], "wc_review_avg": [ 454.0, 210.3069185737835 ], "wc_reply_reviewers_avg": [ 17.5, 5.5901699437494745 ], "wc_reply_authors_avg": [ 247.25, 334.86517809411 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8015764256664023431&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": "mails.tsinghua.edu.cn;mail.tsinghua.edu.cn", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "Department of Precision Instrument", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "B1tCaKP5nB", "title": "A Conditional Independence Test in the Presence of Discretization", "track": "main", "status": "Reject", "tldr": "", "abstract": "Testing conditional independence has many important applications, such as Bayesian network learning and causal discovery. Although several approaches have been developed for learning conditional independence structures for observed variables, those existing methods generally fail to work when the variables of interest can not be directly observed and only discretized values of those variables are available. For example, if $X_1$, $\\tilde{X}_2$ and $X_3$ are the observed variables, where $\\tilde{X}_2$ is a discretization of the latent variable $X_2$, applying the existing methods to the observations of $X_1$, $\\tilde{X}_2$ and $X_3$ would lead to a false conclusion about the underlying conditional independence of variables $X_1$, $X_2$ and $X_3$.\nMotivated by this, we propose a conditional independence test specifically designed to accommodate the presence of discretization. \n% To achieve this, we design the bridge equations to estimate the underlying conditional independence. \nTo achieve this, a bridge function and nodewise regression are used to recover the precision coefficients reflecting the conditional dependence of the latent continuous variables under the nonparanormal model.\nAn appropriate test statistic has been proposed and its asymptotic distribution under the null hypothesis of conditional independence has been derived. Both theoretical results and empirical validation have been provided, demonstrating the effectiveness of our testing methods.", "keywords": "conditional independence test;causal discovery;discretization", "primary_area": "causal_inference", "supplementary_material": "/attachment/96467a2ba5e2e9d479497c16455d8e208d1f7fda.zip", "author": "Boyang Sun;Yu Yao;Guang-Yuan Hao;Yumou Qiu;Kun Zhang", "authorids": "~Boyang_Sun1;~Yu_Yao3;~Guang-Yuan_Hao1;~Yumou_Qiu1;~Kun_Zhang1", "gender": "M;M;;;M", "homepage": ";https://a5507203.github.io/;https://github.com/GuangyuanHao;https://yumou.org;http://www.andrew.cmu.edu/user/kunz1/", "dblp": ";230/9625;;;96/3115-1", "google_scholar": ";OkcaMKAAAAAJ;;-dudT7IAAAAJ;RGoypN4AAAAJ", "orcid": "0000-0002-0118-4425;;;;", "linkedin": ";yu-yao-150377134/;;;", "or_profile": "~Boyang_Sun1;~Yu_Yao3;~Guang-Yuan_Hao1;~Yumou_Qiu1;~Kun_Zhang1", "aff": "Mohamed bin Zayed University of Artificial Intelligence;University of Sydney;Hong Kong University of Science and Technology;Peking University;Carnegie Mellon University", "aff_domain": "mbzuai.ac.ae;sydney.edu.au;ust.hk;pku.edu.cn;cmu.edu", "position": "PhD student;Lecturer;MS student;Associate Professor;Associate Professor", "bibtex": "@misc{\nanonymous2024a,\ntitle={A Conditional Independence Test in the Presence of Discretization},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=B1tCaKP5nB}\n}", "github": "", "project": "", "reviewers": "dVXS;otB5;szaU;Q2XK", "site": "https://openreview.net/forum?id=B1tCaKP5nB", "pdf_size": 3454725, "rating": "3;5;7;9", "confidence": "3;3;4;5", "soundness": "2;3;4;4", "novelty": "1;2;3;4", "presentation": "2;3;3;3", "wc_summary": "107;85;62;85", "wc_strengths": "45;49;64;93", "wc_weaknesses": "234;156;15;10", "wc_questions": "89;3;98;7", "wc_limitations": "5;3;15;27", "wc_review": "480;296;254;222", "wc_reply_reviewers": "91;0;18;17", "wc_reply_authors": "430;109;18;27", "reply_reviewers": "1;0;1;1", "reply_authors": "4;2;2;2", "rating_avg": [ 6.0, 2.23606797749979 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.5, 1.118033988749895 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 84.75, 15.911866640969563 ], "wc_strengths_avg": [ 62.75, 18.846418757949746 ], "wc_weaknesses_avg": [ 103.75, 95.34247479481535 ], "wc_questions_avg": [ 49.25, 44.38679420728647 ], "wc_limitations_avg": [ 12.5, 9.526279441628825 ], "wc_review_avg": [ 313.0, 99.92497185388645 ], "wc_reply_reviewers_avg": [ 31.5, 35.0891721190455 ], "wc_reply_authors_avg": [ 146.0, 167.75726511838465 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.9438798074485388, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17348284049908641003&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "Mohamed bin Zayed University of Artificial Intelligence;University of Sydney;Hong Kong University of Science and Technology;Peking University;Carnegie Mellon University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://mbzuai.ac.ae;https://www.sydney.edu.au;https://www.ust.hk;http://www.pku.edu.cn;https://www.cmu.edu", "aff_unique_abbr": "MBZUAI;USYD;HKUST;Peking U;CMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;2;2;3", "aff_country_unique": "United Arab Emirates;Australia;China;United States" }, { "title": "Matryoshka Query Transformer for Large Vision-Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96220", "id": "B1vGiSgELw", "proceeding": "", "pdf": "https://openreview.net/pdf?id=B1vGiSgELw", "openreview": "https://openreview.net/forum?id=B1vGiSgELw", "poster": "", "project": "", "author_site": "Wenbo Hu, Zi-Yi Dou, Liunian Li, Amita Kamath, Nanyun Peng, Kai-Wei Chang", "tldr": "", "abstract": "Large Vision-Language Models (LVLMs) typically encode an image into a fixed number of visual tokens (e.g., 576) and process these tokens with a language model. Despite their strong performance, LVLMs face challenges in adapting to varying computational constraints. This raises the question: can we achieve flexibility in the number of visual tokens to suit different tasks and computational resources? We answer this with an emphatic yes. Inspired by Matryoshka Representation Learning, we introduce the Matryoshka Query Transformer (MQT), capable of encoding an image into $m$ visual tokens during inference, where $m$ can be any number up to a predefined maximum. This is achieved by employing a query transformer with $M$ latent query tokens to compress the visual embeddings. During each training step, we randomly select $m \\leq M$ latent query tokens and train the model using only these first $m$ tokens, discarding the rest.\nCombining MQT with LLaVA, we train a single model once, and flexibly and drastically reduce the number of inference-time visual tokens while maintaining similar or better performance compared to training independent models for each number of tokens. \nOur model, MQT-LLaVA, matches LLaVA-1.5 performance across 11 benchmarks using a maximum of 256 tokens instead of LLaVA\u2019s fixed 576. Reducing to 16 tokens (8x less TFLOPs) only sacrifices the performance by 2.4 points on MMBench. On certain tasks such as ScienceQA and MMMU, we can even go down to only 2 visual tokens with performance drops of just 3\\% and 6\\% each.\nOur exploration of the trade-off between the accuracy and computational cost brought about by the number of visual tokens facilitates future research to achieve the best of both worlds.", "keywords": "multimodal;vision-language;visual tokens;efficiency;llm", "primary_area": "machine_vision", "supplementary_material": "/attachment/59991263bbde955a963f717376d0d033f42caee7.zip", "author": "Wenbo Hu;Zi-Yi Dou;Liunian Harold Li;Amita Kamath;Nanyun Peng;Kai-Wei Chang", "authorids": "~Wenbo_Hu4;~Zi-Yi_Dou1;~Liunian_Harold_Li1;~Amita_Kamath1;~Nanyun_Peng1;~Kai-Wei_Chang1", "gender": "M;;M;F;F;M", "homepage": "https://gordonhu608.github.io/;https://zdou0830.github.io/;;https://amitakamath.github.io/;https://violetpeng.github.io/;http://kwchang.net", "dblp": "95/7076-6;205/8985;236/6323;267/9823;117/4036;18/2428", "google_scholar": "OjCQ61IAAAAJ;RWogNsEAAAAJ;ntbhn9UAAAAJ;B_ek5IIAAAAJ;XxRXvX0AAAAJ;fqDBtzYAAAAJ", "orcid": ";;;;;0000-0001-5365-0072", "linkedin": ";;;;;kai-wei-chang-41239040", "or_profile": "~Wenbo_Hu4;~Zi-Yi_Dou1;~Liunian_Harold_Li1;~Amita_Kamath1;~Nanyun_Peng1;~Kai-Wei_Chang1", "aff": "University of California, Los Angeles;University of California, Los Angeles;University of California, Los Angeles;UCLA Computer Science Department, University of California, Los Angeles;University of California, Los Angeles;Amazon", "aff_domain": "cs.ucla.edu;ucla.edu;cs.ucla.edu;cs.ucla.edu;ucla.edu;amazon.com", "position": "MS student;PhD student;PhD student;PhD student;Assistant Professor;Researcher", "bibtex": "@inproceedings{\nhu2024matryoshka,\ntitle={Matryoshka Query Transformer for Large Vision-Language Models},\nauthor={Wenbo Hu and Zi-Yi Dou and Liunian Harold Li and Amita Kamath and Nanyun Peng and Kai-Wei Chang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=B1vGiSgELw}\n}", "github": "", "reviewers": "gY5g;k14Y;jmsV;L7Jp", "pdf_size": 13009564, "rating": "5;6;6;7", "confidence": "5;4;3;5", "soundness": "3;2;3;4", "novelty": "3;2;2;3", "presentation": "3;2;3;4", "wc_summary": "64;29;103;125", "wc_strengths": "57;16;52;146", "wc_weaknesses": "150;64;164;177", "wc_questions": "29;43;137;2", "wc_limitations": "6;3;30;1", "wc_review": "306;155;486;451", "wc_reply_reviewers": "0;64;208;107", "wc_reply_authors": "0;155;72;88", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 80.25, 36.77890020106637 ], "wc_strengths_avg": [ 67.75, 47.8663503935698 ], "wc_weaknesses_avg": [ 138.75, 44.200537327050675 ], "wc_questions_avg": [ 52.75, 50.82506763399336 ], "wc_limitations_avg": [ 10.0, 11.683321445547923 ], "wc_review_avg": [ 349.5, 131.01240399290444 ], "wc_reply_reviewers_avg": [ 94.75, 75.66166466580022 ], "wc_reply_authors_avg": [ 78.75, 55.106147569939964 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14177291864252939115&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "cs.ucla.edu;ucla.edu;cs.ucla.edu;cs.ucla.edu;ucla.edu;amazon.com", "author_num": 6, "aff_unique_index": "0;0;0;0;0;1", "aff_unique_norm": "University of California, Los Angeles;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.ucla.edu;https://www.amazon.com", "aff_unique_abbr": "UCLA;Amazon", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "SLowcalSGD : Slow Query Points Improve Local-SGD for Stochastic Convex Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96219", "id": "B29BlRe26Z", "proceeding": "", "pdf": "https://openreview.net/pdf?id=B29BlRe26Z", "openreview": "https://openreview.net/forum?id=B29BlRe26Z", "poster": "", "project": "", "author_site": "Tehila Dahan, Kfir Y. Levy", "tldr": "", "abstract": "We consider distributed learning scenarios where $M$ machines interact with a parameter server along several communication rounds in order to minimize a joint objective function. \nFocusing on the heterogeneous case, where different machines may draw samples from different data-distributions, we design the first local update method that provably benefits over the two most prominent distributed baselines: namely Minibatch-SGD and Local-SGD. \nKey to our approach is a slow querying technique that we customize to the distributed setting, which in turn enables a better mitigation of the bias caused by local updates.", "keywords": "Stochastic Convex Optimization", "primary_area": "optimization", "supplementary_material": "", "author": "Tehila Dahan;Kfir Yehuda Levy", "authorids": "~Tehila_Dahan1;~Kfir_Yehuda_Levy1", "gender": "F;M", "homepage": ";http://kfiryehud.wixsite.com/kfir-y-levy", "dblp": "378/2189;83/11388", "google_scholar": ";", "orcid": ";", "linkedin": "tehila-dahan-b86481178/;", "or_profile": "~Tehila_Dahan1;~Kfir_Yehuda_Levy1", "aff": "Technion - Israel Institute of Technology, Technion;Technion - Israel Institute of Technology, Technion", "aff_domain": "technion.ac.il;technion.ac.il", "position": "MS student;Assistant Professor", "bibtex": "@inproceedings{\ndahan2024slowcalsgd,\ntitle={{SL}owcal{SGD} : Slow Query Points Improve Local-{SGD} for Stochastic Convex Optimization},\nauthor={Tehila Dahan and Kfir Yehuda Levy},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=B29BlRe26Z}\n}", "github": "", "reviewers": "3Xai;5jSD;mxJX", "pdf_size": 2277711, "rating": "4;6;7", "confidence": "4;4;4", "soundness": "2;3;4", "novelty": "2;3;4", "presentation": "2;4;4", "wc_summary": "47;54;202", "wc_strengths": "42;23;121", "wc_weaknesses": "102;48;193", "wc_questions": "6;3;67", "wc_limitations": "11;1;1", "wc_review": "208;129;584", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "rating_avg": [ 5.666666666666667, 1.247219128924647 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.3333333333333335, 0.9428090415820634 ], "wc_summary_avg": [ 101.0, 71.47493733237313 ], "wc_strengths_avg": [ 62.0, 42.43426288586461 ], "wc_weaknesses_avg": [ 114.33333333333333, 59.83495819520745 ], "wc_questions_avg": [ 25.333333333333332, 29.48822740612863 ], "wc_limitations_avg": [ 4.333333333333333, 4.714045207910317 ], "wc_review_avg": [ 307.0, 198.50608722824262 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13188985713875155302&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 3, "email": "technion.ac.il;technion.ac.il", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Technion - Israel Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.technion.ac.il", "aff_unique_abbr": "Technion", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Israel" }, { "title": "Differentiable Structure Learning with Partial Orders", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96218", "id": "B2cTLakrhV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=B2cTLakrhV", "openreview": "https://openreview.net/forum?id=B2cTLakrhV", "poster": "/media/PosterPDFs/NeurIPS%202024/96218.png?t=1729066282.0020058", "project": "", "author_site": "Taiyu Ban, Lyuzhou Chen, Xiangyu Wang, Xin Wang, Derui Lyu, Huanhuan Chen", "tldr": "", "abstract": "Differentiable structure learning is a novel line of causal discovery research that transforms the combinatorial optimization of structural models into a continuous optimization problem. However, the field has lacked feasible methods to integrate partial order constraints, a critical prior information typically used in real-world scenarios, into the differentiable structure learning framework. The main difficulty lies in adapting these constraints, typically suited for the space of total orderings, to the continuous optimization context of structure learning in the graph space. To bridge this gap, this paper formalizes a set of equivalent constraints that map partial orders onto graph spaces and introduces a plug-and-play module for their efficient application. This module preserves the equivalent effect of partial order constraints in the graph space, backed by theoretical validations of correctness and completeness. It significantly enhances the quality of recovered structures while maintaining good efficiency, which learns better structures using 90\\% fewer samples than the data-based method on a real-world dataset. This result, together with a comprehensive evaluation on synthetic cases, demonstrates our method's ability to effectively improve differentiable structure learning with partial orders.", "keywords": "Causal discovery;Continuous optimization;Differentiable Structure Learning;Partial Orders", "primary_area": "causal_inference", "supplementary_material": "/attachment/f5b8ea412919ed6322681b5a6bd2476f6fc70cbd.zip", "author": "Taiyu Ban;Lyuzhou Chen;Xiangyu Wang;Xin Wang;Derui Lyu;Huanhuan Chen", "authorids": "~Taiyu_Ban1;~Lyuzhou_Chen1;~Xiangyu_Wang7;~Xin_Wang46;~Derui_Lyu1;~Huanhuan_Chen1", "gender": "M;M;M;M;M;", "homepage": ";https://scholar.google.com/citations?hl=zh-CN&user=K0i72_4AAAAJ;;https://github.com/wangxin0126;;", "dblp": ";336/0856;02/6128-16.html?q=Xiangyu%20Wang%200016;;340/0074;", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?view_op=list_works;;;", "orcid": "0000-0002-1379-7528;;0000-0001-9843-5982;;0000-0002-6416-6013;", "linkedin": ";;;;;", "or_profile": "~Taiyu_Ban1;~Lyuzhou_Chen1;~Xiangyu_Wang7;~Xin_Wang46;~Derui_Lyu1;~Huanhuan_Chen1", "aff": "University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;", "aff_domain": "ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;", "position": "PhD student;PhD student;Assistant Professor;PhD student;PhD student;", "bibtex": "@inproceedings{\nban2024differentiable,\ntitle={Differentiable Structure Learning with Partial Orders},\nauthor={Taiyu Ban and Lyuzhou Chen and Xiangyu Wang and Xin Wang and Derui Lyu and Huanhuan Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=B2cTLakrhV}\n}", "github": "", "reviewers": "99Ub;FFcG;SXfo;jpLj", "pdf_size": 10719527, "rating": "6;7;7;7", "confidence": "3;4;3;4", "soundness": "3;3;2;3", "novelty": "2;3;2;4", "presentation": "3;3;3;2", "wc_summary": "126;100;218;47", "wc_strengths": "97;64;321;57", "wc_weaknesses": "103;222;475;81", "wc_questions": "248;5;516;115", "wc_limitations": "6;6;39;1", "wc_review": "580;397;1569;301", "wc_reply_reviewers": "16;17;121;21", "wc_reply_authors": "0;0;18;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;2;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 122.75, 61.924853653440316 ], "wc_strengths_avg": [ 134.75, 108.58723451676997 ], "wc_weaknesses_avg": [ 220.25, 156.555700950173 ], "wc_questions_avg": [ 221.0, 190.8179760924007 ], "wc_limitations_avg": [ 13.0, 15.149257407543116 ], "wc_review_avg": [ 711.75, 504.9798882133822 ], "wc_reply_reviewers_avg": [ 43.75, 44.63952844733018 ], "wc_reply_authors_avg": [ 4.5, 7.794228634059948 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13939839973570143005&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;", "author_num": 6, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of Science and Technology of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ustc.edu.cn", "aff_unique_abbr": "USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "B3jt0Ran2t", "title": "Probabilistic Analysis of Stable Matching in Large Markets with Siblings", "track": "main", "status": "Reject", "tldr": "", "abstract": "We study a practical matching problem that involves assigning children to daycare centers. The collective preferences of siblings from the same family introduce complementarities, which can lead to the non-existence of stable matchings, as observed in the well-studied hospital-doctor matching problems involving couples. Intriguingly, stable matchings have been observed in real-world daycare markets, even with a substantial number of sibling applicants.\n\nOur research systematically explores the presence of stable matchings in these markets. We conduct a probabilistic analysis of large random matching markets that incorporate sibling preferences. Specifically, we examine scenarios where daycares have similar priorities over children, a common characteristic in practical markets. Our analysis reveals that as the market size approaches infinity, the likelihood of stable matchings existing converges to 1.\n\nTo facilitate our investigation, we introduce significant modifications to the Sorted Deferred Acceptance algorithm proposed by ed by Ashlagi et al. [2014]. These adaptations are essential to accommodate a more stringent stability concept, as the original algorithm may yield matchings that fail to meet this criterion. By leveraging our revised algorithm, we successfully identify stable matchings in all real-life datasets examined. Additionally, we conduct comprehensive empirical investigations using synthetic datasets to validate the efficacy of our algorithm in identifying stable matchings.", "keywords": "Stable Matching;Stability;Siblings;Deferred Acceptance", "primary_area": "algorithmic_game_theory", "supplementary_material": "/attachment/62faf363baf1f713b37b2ef46561d99b763cb1ef.zip", "author": "Zhaohong Sun;Tomohiko Yokoyama;Makoto Yokoo", "authorids": "~Zhaohong_Sun3;~Tomohiko_Yokoyama1;~Makoto_Yokoo2", "gender": "M;M;M", "homepage": "https://sites.google.com/view/zhaohong-sun/home;https://www.linkedin.com/in/tomohiko-yokoyama-aa172b205/?originalSubdomain=jp;https://sites.google.com/view/makoto-yokoo/", "dblp": "159/6552-1;;32/2416.html", "google_scholar": "b5FGWKgAAAAJ;;https://scholar.google.co.jp/citations?user=dHXCX-sAAAAJ", "orcid": ";;0000-0003-4929-396X", "linkedin": ";;", "or_profile": "~Zhaohong_Sun3;~Tomohiko_Yokoyama1;~Makoto_Yokoo2", "aff": "CyberAgent Inc.;The University of Tokyo;Kyushu University", "aff_domain": "cyberagent.co.jp;u-tokyo.ac.jp;kyushu-u.ac.jp", "position": "Researcher;PhD student;Full Professor", "bibtex": "@misc{\nanonymous2024probabilistic,\ntitle={Probabilistic Analysis of Stable Matching in Large Markets with Siblings},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=B3jt0Ran2t}\n}", "github": "", "project": "", "reviewers": "52vJ;VWTA;1G7X;3tiK", "site": "https://openreview.net/forum?id=B3jt0Ran2t", "pdf_size": 611232, "rating": "6;6;6;7", "confidence": "3;4;3;3", "soundness": "3;3;4;3", "novelty": "3;3;3;3", "presentation": "2;3;4;3", "wc_summary": "194;385;63;291", "wc_strengths": "272;102;173;71", "wc_weaknesses": "259;191;28;129", "wc_questions": "122;188;82;254", "wc_limitations": "10;3;6;14", "wc_review": "857;869;352;759", "wc_reply_reviewers": "759;69;0;11", "wc_reply_authors": "437;0;0;0", "reply_reviewers": "3;1;0;1", "reply_authors": "3;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 233.25, 119.25681322255764 ], "wc_strengths_avg": [ 154.5, 77.26092155805547 ], "wc_weaknesses_avg": [ 151.75, 84.96285953285707 ], "wc_questions_avg": [ 161.5, 65.45800180268262 ], "wc_limitations_avg": [ 8.25, 4.14578098794425 ], "wc_review_avg": [ 709.25, 210.62570474659546 ], "wc_reply_reviewers_avg": [ 209.75, 318.1912750218019 ], "wc_reply_authors_avg": [ 109.25, 189.22655072689983 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:_rvh3xYRCgAJ:scholar.google.com/&scioq=Probabilistic+Analysis+of+Stable+Matching+in+Large+Markets+with+Siblings&hl=en&as_sdt=0,33", "gs_version_total": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "CyberAgent Inc.;University of Tokyo;Kyushu University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cyberagent.co.jp;https://www.u-tokyo.ac.jp;https://www.kyushu-u.ac.jp", "aff_unique_abbr": "CyberAgent;UTokyo;Kyushu U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Japan" }, { "title": "On improved Conditioning Mechanisms and Pre-training Strategies for Diffusion Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96217", "id": "B3rZZRALhk", "proceeding": "", "pdf": "https://openreview.net/pdf?id=B3rZZRALhk", "openreview": "https://openreview.net/forum?id=B3rZZRALhk", "poster": "/media/PosterPDFs/NeurIPS%202024/96217.png?t=1731751396.9434814", "project": "", "author_site": "Tariq Berrada Ifriqi, Pietro Astolfi, Melissa Hall, Reyhane Askari Hemmat, Yohann Benchetrit, Marton Havasi, Matthew Muckley, Karteek Alahari, Adriana Romero-Soriano, Jakob Verbeek, Michal Drozdzal", "tldr": "", "abstract": "Large-scale training of latent diffusion models (LDMs) has enabled unprecedented quality in image generation. \nHowever, large-scale end-to-end training of these models is computationally costly, and hence most research focuses either on finetuning pretrained models or experiments at smaller scales.\nIn this work we aim to improve the training efficiency and performance of LDMs with the goal of scaling to larger datasets and higher resolutions.\nWe focus our study on two points that are critical for good performance and efficient training: \n(i) the mechanisms used for semantic level (\\eg a text prompt, or class name) and low-level (crop size, random flip, \\etc) conditioning of the model, and \n(ii) pre-training strategies to transfer representations learned on smaller and lower-resolution datasets to larger ones.\nThe main contributions of our work are the following: \nwe present systematic experimental study of these points, \nwe propose a novel conditioning mechanism that disentangles semantic and low-level conditioning, \nwe obtain state-of-the-art performance on CC12M for text-to-image at 512 resolution.", "keywords": "Generative Models;Generative Modeling;Diffusion;Latent diffusion;Computer vision;text-to-image diffusion", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Tariq Berrada;Pietro Astolfi;Melissa Hall;Reyhane Askari Hemmat;Yohann Benchetrit;Marton Havasi;Matthew J. Muckley;Karteek Alahari;Adriana Romero-Soriano;Jakob Verbeek;Michal Drozdzal", "authorids": "~Tariq_Berrada1;~Pietro_Astolfi1;~Melissa_Hall1;~Reyhane_Askari_Hemmat1;~Yohann_Benchetrit1;~Marton_Havasi1;~Matthew_J._Muckley1;~Karteek_Alahari1;~Adriana_Romero-Soriano1;~Jakob_Verbeek1;~Michal_Drozdzal1", "gender": "M;M;F;;M;M;M;M;;Not Specified;M", "homepage": ";;;;;https://mhavasi.github.io/;https://mmuckley.github.io/;http://thoth.inrialpes.fr/people/alahari;;http://lear.inrialpes.fr/~verbeek;", "dblp": ";208/4543;287/5067;;42/9992.html;222/3332;158/8226;a/KarteekAlahari;;v/JakobJVerbeek;24/9794", "google_scholar": "StQIGq8AAAAJ;https://scholar.google.it/citations?user=4zR244YAAAAJ;DcfHZoUAAAAJ;;;EaYZfmoAAAAJ;Iz9v6dcAAAAJ;https://scholar.google.fr/citations?user=qcyG7rwAAAAJ;;oZGA-rAAAAAJ;https://scholar.google.ca/citations?user=XK_ktwQAAAAJ", "orcid": ";0000-0002-5192-9608;;;;;0000-0002-6525-8817;;;0000-0003-1419-1816;", "linkedin": ";pietroastolfi;;;;marton-havasi/;matthew-muckley-33a9b558/;;;jakob-verbeek-3b11aa14a/;", "or_profile": "~Tariq_Berrada1;~Pietro_Astolfi1;~Melissa_Hall1;~Reyhane_Askari_Hemmat1;~Yohann_Benchetrit1;~Marton_Havasi1;~Matthew_J._Muckley1;~Karteek_Alahari1;~Adriana_Romero-Soriano1;~Jakob_Verbeek1;~Michal_Drozdzal1", "aff": "INRIA;Meta;Research, Facebook;;Meta AI;Meta Facebook;Meta;Inria;;Meta;Meta", "aff_domain": "inria.fr;meta.com;research.facebook.com;;ai.meta.com;meta.com;fb.com;inria.fr;;meta.com;fb.com", "position": "PhD student;Postdoc;Researcher;;Researcher;Researcher;Research Engineer;Research director (eq. Full professor);;Research Scientist;Research Scientst", "bibtex": "@inproceedings{\nberrada2024on,\ntitle={On improved Conditioning Mechanisms and Pre-training Strategies for Diffusion Models},\nauthor={Tariq Berrada and Pietro Astolfi and Melissa Hall and Reyhane Askari Hemmat and Yohann Benchetrit and Marton Havasi and Matthew J. Muckley and Karteek Alahari and Adriana Romero-Soriano and Jakob Verbeek and Michal Drozdzal},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=B3rZZRALhk}\n}", "github": "", "reviewers": "cMfV;JPTZ;rR4N", "pdf_size": 8463259, "rating": "5;6;6", "confidence": "4;3;3", "soundness": "3;3;2", "novelty": "3;2;2", "presentation": "3;3;2", "wc_summary": "90;46;166", "wc_strengths": "81;74;36", "wc_weaknesses": "134;74;220", "wc_questions": "82;36;36", "wc_limitations": "9;20;10", "wc_review": "396;250;468", "wc_reply_reviewers": "99;297;342", "wc_reply_authors": "410;0;424", "reply_reviewers": "1;1;2", "reply_authors": "2;1;2", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 100.66666666666667, 49.56701412117628 ], "wc_strengths_avg": [ 63.666666666666664, 19.770910168449223 ], "wc_weaknesses_avg": [ 142.66666666666666, 59.91846311632352 ], "wc_questions_avg": [ 51.333333333333336, 21.684607956387456 ], "wc_limitations_avg": [ 13.0, 4.96655480858378 ], "wc_review_avg": [ 371.3333333333333, 90.69117315863153 ], "wc_reply_reviewers_avg": [ 246.0, 105.55567251455508 ], "wc_reply_authors_avg": [ 278.0, 196.65875690308496 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": -0.9999999999999997, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10803325817568740355&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 3, "email": "inria.fr;meta.com;research.facebook.com;;ai.meta.com;meta.com;fb.com;inria.fr;;meta.com;fb.com", "author_num": 11, "aff_unique_index": "0;1;1;1;1;1;0;1;1", "aff_unique_norm": "INRIA;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://www.inria.fr;https://meta.com", "aff_unique_abbr": "INRIA;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1;1;0;1;1", "aff_country_unique": "France;United States" }, { "title": "Towards Accurate and Fair Cognitive Diagnosis via Monotonic Data Augmentation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96216", "id": "B4k2TecKT2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=B4k2TecKT2", "openreview": "https://openreview.net/forum?id=B4k2TecKT2", "poster": "", "project": "", "author_site": "zheng zhang, Wei Song, Qi Liu, Qingyang Mao, Yiyan Wang, Weibo Gao, Zhenya Huang, Shijin Wang, Enhong Chen", "tldr": "", "abstract": "Intelligent education stands as a prominent application of machine learning. Within this domain, cognitive diagnosis (CD) is a key research focus that aims to diagnose students' proficiency levels in specific knowledge concepts. As a crucial task within the field of education, cognitive diagnosis encompasses two fundamental requirements: accuracy and fairness. Existing studies have achieved significant success by primarily utilizing observed historical logs of student-exercise interactions. However, real-world scenarios often present a challenge, where a substantial number of students engage with a limited number of exercises. This data sparsity issue can lead to both inaccurate and unfair diagnoses. To this end, we introduce a monotonic data augmentation framework, CMCD, to tackle the data sparsity issue and thereby achieve accurate and fair CD results. Specifically, CMCD integrates the monotonicity assumption, a fundamental educational principle in CD, to establish two constraints for data augmentation. These constraints are general and can be applied to the majority of CD backbones. Furthermore, we provide theoretical analysis to guarantee the accuracy and convergence speed of CMCD. Finally, extensive experiments on real-world datasets showcase the efficacy of our framework in addressing the data sparsity issue with accurate and fair CD results.", "keywords": "Cognitive Diagnosis;Intelligent Education;Monotonicity Assumption", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Zheng Zhang;Wei Song;Qi Liu;Qingyang Mao;Yiyan Wang;Weibo Gao;Zhenya Huang;Shijin Wang;Enhong Chen", "authorids": "~Zheng_Zhang20;~Wei_Song13;~Qi_Liu3;~Qingyang_Mao1;~Yiyan_Wang3;~Weibo_Gao1;~Zhenya_Huang2;~Shijin_Wang1;~Enhong_Chen1", "gender": ";F;M;M;M;;M;M;M", "homepage": ";https://mercidaiha.github.io;http://staff.ustc.edu.cn/~qiliuql/;https://github.com/Double680;https://kns.cnki.net/kcms2/author/detail?v=oslmGXurZ-oPztxRRG3IHPU3_duWlgrEi7SM3z42AOxi_P2XpTP0LvDKwm1dvN1BbFEOhyGKIY5k_DH1RWkqPNF-XSA8hcXMIMNHoPGlVVN5GeSqAtKkHQ==&uniplatform=NZKPT&language=gb;;http://staff.ustc.edu.cn/~huangzhy/;;http://staff.ustc.edu.cn/~cheneh", "dblp": ";;95/2446-3;349/7702;;;178/8690;74/5750-1.html;07/258", "google_scholar": ";;5EoHAFwAAAAJ;H8UI6vMAAAAJ;;;dVZuU90AAAAJ;;Q9h02J0AAAAJ", "orcid": ";0009-0007-9386-7357;0000-0001-6956-5550;;;;0000-0003-1661-0420;0000-0002-9202-7678;0000-0002-4835-4102", "linkedin": ";;;;;;;;", "or_profile": "~Zheng_Zhang20;~Wei_Song13;~Qi_Liu3;~Qingyang_Mao1;~Yiyan_Wang3;~Weibo_Gao1;~Zhenya_Huang2;~Shijin_Wang1;~Enhong_Chen1", "aff": ";University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;Beijing Normal University;;University of Science and Technology of China;State Key Laboratory of Cognitive Intelligence;University of Science and Technology of China", "aff_domain": ";mail.ustc.edu.cn;ustc.edu.cn;mail.ustc.edu.cn;bnu.edu.cn;;ustc.edu.cn;iflytek.com;ustc.edu.cn", "position": ";Undergrad student;Full Professor;PhD student;PhD student;;Associate Professor;Vice Dean;Full Professor", "bibtex": "@inproceedings{\nzhang2024towards,\ntitle={Towards Accurate and Fair Cognitive Diagnosis via Monotonic Data Augmentation},\nauthor={Zheng Zhang and Wei Song and Qi Liu and Qingyang Mao and Yiyan Wang and Weibo Gao and Zhenya Huang and Shijin Wang and Enhong Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=B4k2TecKT2}\n}", "github": "", "reviewers": "xrHo;ujU9;A4mA", "pdf_size": 0, "rating": "6;7;7", "confidence": "2;4;4", "soundness": "3;3;4", "novelty": "2;3;3", "presentation": "2;4;3", "wc_summary": "87;62;99", "wc_strengths": "68;87;104", "wc_weaknesses": "81;115;111", "wc_questions": "88;31;3", "wc_limitations": "18;40;10", "wc_review": "342;335;327", "wc_reply_reviewers": "155;15;23", "wc_reply_authors": "70;32;28", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 82.66666666666667, 15.412837362262522 ], "wc_strengths_avg": [ 86.33333333333333, 14.70449666674185 ], "wc_weaknesses_avg": [ 102.33333333333333, 15.173075568988056 ], "wc_questions_avg": [ 40.666666666666664, 35.3679076125361 ], "wc_limitations_avg": [ 22.666666666666668, 12.684198393626966 ], "wc_review_avg": [ 334.6666666666667, 6.128258770283412 ], "wc_reply_reviewers_avg": [ 64.33333333333333, 64.19414995845719 ], "wc_reply_authors_avg": [ 43.333333333333336, 18.926759422104517 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5025438759276723557&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": ";mail.ustc.edu.cn;ustc.edu.cn;mail.ustc.edu.cn;bnu.edu.cn;;ustc.edu.cn;iflytek.com;ustc.edu.cn", "author_num": 9, "aff_unique_index": "0;0;0;1;0;2;0", "aff_unique_norm": "University of Science and Technology of China;Beijing Normal University;State Key Laboratory of Cognitive Intelligence", "aff_unique_dep": ";;", "aff_unique_url": "http://www.ustc.edu.cn;https://www.bnu.edu.cn;", "aff_unique_abbr": "USTC;BNU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Genetic-guided GFlowNets for Sample Efficient Molecular Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96215", "id": "B4q98aAZwt", "proceeding": "", "pdf": "https://openreview.net/pdf?id=B4q98aAZwt", "openreview": "https://openreview.net/forum?id=B4q98aAZwt", "poster": "/media/PosterPDFs/NeurIPS%202024/96215.png?t=1733309274.5675147", "project": "", "author_site": "Hyeonah Kim, Minsu Kim, Sanghyeok Choi, Jinkyoo Park", "tldr": "", "abstract": "The challenge of discovering new molecules with desired properties is crucial in domains like drug discovery and material design. Recent advances in deep learning-based generative methods have shown promise but face the issue of sample efficiency due to the computational expense of evaluating the reward function. This paper proposes a novel algorithm for sample-efficient molecular optimization by distilling a powerful genetic algorithm into deep generative policy using GFlowNets training, the off-policy method for amortized inference. This approach enables the deep generative policy to learn from domain knowledge, which has been explicitly integrated into the genetic algorithm. Our method achieves state-of-the-art performance in the official molecular optimization benchmark, significantly outperforming previous methods. It also demonstrates effectiveness in designing inhibitors against SARS-CoV-2 with substantially fewer reward calls.", "keywords": "Molecular Optimization;Sample Efficiency;Generative Flow Networks;Genetic Algorithm", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Hyeonah Kim;Minsu Kim;Sanghyeok Choi;Jinkyoo Park", "authorids": "~Hyeonah_Kim1;~Minsu_Kim2;~Sanghyeok_Choi1;~Jinkyoo_Park1", "gender": "F;M;M;M", "homepage": ";https://minsuukim.github.io/;https://hyeok9855.github.io/;http://silab.kaist.ac.kr/", "dblp": ";;338/9899;156/7535", "google_scholar": ";https://scholar.google.ca/citations?user=VvyLuhAAAAAJ;wQqjHlIAAAAJ;sH2a0nkAAAAJ", "orcid": "0000-0002-0629-1879;;;0000-0003-2620-1479", "linkedin": "hyeonahkimm/;;hyeok9855/;", "or_profile": "~Hyeonah_Kim1;~Minsu_Kim2;~Sanghyeok_Choi1;~Jinkyoo_Park1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.edu;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "position": "PhD student;PhD student;MS student;Associate Professor", "bibtex": "@inproceedings{\nkim2024geneticguided,\ntitle={Genetic-guided {GF}lowNets for Sample Efficient Molecular Optimization},\nauthor={Hyeonah Kim and Minsu Kim and Sanghyeok Choi and Jinkyoo Park},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=B4q98aAZwt}\n}", "github": "", "reviewers": "gNyQ;FCer;4tbn;kTKB", "pdf_size": 2983223, "rating": "5;5;6;6", "confidence": "3;4;4;4", "soundness": "3;3;3;3", "novelty": "2;3;4;3", "presentation": "3;3;4;3", "wc_summary": "30;61;80;130", "wc_strengths": "76;28;131;153", "wc_weaknesses": "100;101;65;294", "wc_questions": "111;2;166;14", "wc_limitations": "2;1;73;2", "wc_review": "319;193;515;593", "wc_reply_reviewers": "9;21;33;26", "wc_reply_authors": "0;0;98;82", "reply_reviewers": "1;1;2;1", "reply_authors": "1;1;2;3", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 75.25, 36.29996556472196 ], "wc_strengths_avg": [ 97.0, 48.71857961804716 ], "wc_weaknesses_avg": [ 140.0, 90.08606995534882 ], "wc_questions_avg": [ 73.25, 68.21794118851726 ], "wc_limitations_avg": [ 19.5, 30.890937182286976 ], "wc_review_avg": [ 405.0, 157.94302770302968 ], "wc_reply_reviewers_avg": [ 22.25, 8.757139944068497 ], "wc_reply_authors_avg": [ 45.0, 45.35416188179427 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18020502117154717749&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "kaist.edu;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Model Sensitivity Aware Continual Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96214", "id": "B5vQ7IQW7d", "proceeding": "", "pdf": "https://openreview.net/pdf?id=B5vQ7IQW7d", "openreview": "https://openreview.net/forum?id=B5vQ7IQW7d", "poster": "", "project": "", "author_site": "Zhenyi Wang, Heng Huang", "tldr": "", "abstract": "Continual learning (CL) aims to adapt to non-stationary data distributions while retaining previously acquired knowledge. However, CL models typically face a trade-off between preserving old task knowledge and excelling in new task performance. Existing approaches often sacrifice one for the other. To overcome this limitation, orthogonal to existing approaches, we propose a novel perspective that views the CL model ability in preserving old knowledge and performing well in new task as a matter of model sensitivity to parameter updates. \\textit{Excessive} parameter sensitivity can lead to two drawbacks: (1) significant forgetting of previous knowledge; and (2) overfitting to new tasks. To reduce parameter sensitivity, we optimize the model's performance based on the parameter distribution, which achieves the worst-case CL performance within a distribution neighborhood. This innovative learning paradigm offers dual benefits: (1) reduced forgetting of old knowledge by mitigating drastic changes in model predictions under small parameter updates; and (2) enhanced new task performance by preventing overfitting to new tasks. Consequently, our method achieves superior ability in retaining old knowledge and achieving excellent new task performance simultaneously.\nImportantly, our approach is compatible with existing CL methodologies, allowing seamless integration while delivering significant improvements in effectiveness, efficiency, and versatility with both theoretical and empirical supports.", "keywords": "Continual Learning", "primary_area": "other", "supplementary_material": "", "author": "Zhenyi Wang;Heng Huang", "authorids": "~Zhenyi_Wang1;~Heng_Huang1", "gender": "M;M", "homepage": "https://www.cs.umd.edu/~heng/;https://joey-wang123.github.io/", "dblp": "03/281;10/10222-1", "google_scholar": "4OqLaDwAAAAJ;F4uLsroAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Heng_Huang1;~Zhenyi_Wang8", "aff": "Department of Computer Science, University of Maryland, College Park;University of Maryland, College Park", "aff_domain": "cs.umd.edu;umd.edu", "position": "Full Professor;Postdoc", "bibtex": "@inproceedings{\nwang2024model,\ntitle={Model Sensitivity Aware Continual Learning},\nauthor={Zhenyi Wang and Heng Huang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=B5vQ7IQW7d}\n}", "github": "", "reviewers": "YRBc;g3Gj;kb2F;1NyD", "pdf_size": 820680, "rating": "5;6;6;7", "confidence": "5;4;3;4", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "2;3;2;3", "wc_summary": "78;110;73;277", "wc_strengths": "47;70;58;118", "wc_weaknesses": "243;81;92;208", "wc_questions": "77;67;60;67", "wc_limitations": "9;52;1;17", "wc_review": "454;380;284;687", "wc_reply_reviewers": "818;41;16;36", "wc_reply_authors": "2364;10;10;10", "reply_reviewers": "3;1;1;1", "reply_authors": "6;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 134.5, 83.48802309313594 ], "wc_strengths_avg": [ 73.25, 27.086666461563706 ], "wc_weaknesses_avg": [ 156.0, 70.70007072132248 ], "wc_questions_avg": [ 67.75, 6.057020719792859 ], "wc_limitations_avg": [ 19.75, 19.45989465541887 ], "wc_review_avg": [ 451.25, 148.85794402718318 ], "wc_reply_reviewers_avg": [ 227.75, 340.90935378777743 ], "wc_reply_authors_avg": [ 598.5, 1019.3119002542843 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 3.0, 1.7320508075688772 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5884635123247221387&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": "cs.umd.edu;umd.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "University of Maryland, College Park;University of Maryland", "aff_unique_dep": "Department of Computer Science;", "aff_unique_url": "https://www/umd.edu;https://www/umd.edu", "aff_unique_abbr": "UMD;UMD", "aff_campus_unique_index": "0;0", "aff_campus_unique": "College Park", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Optimizing the coalition gain in Online Auctions with Greedy Structured Bandits", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96213", "id": "B74mb0tEY6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=B74mb0tEY6", "openreview": "https://openreview.net/forum?id=B74mb0tEY6", "poster": "", "project": "", "author_site": "Dorian Baudry, Hugo Richard, Maria Cherifa, Vianney Perchet, Cl\u00e9ment Calauz\u00e8nes", "tldr": "", "abstract": "Motivated by online display advertising, this work considers repeated second-price auctions, where agents sample their value from an unknown distribution with cumulative distribution function $F$. In each auction $t$, a decision-maker bound by limited observations selects $n_t$ agents from a coalition of $N$ to compete for a prize with $p$ other agents, aiming to maximize the cumulative reward of the coalition across all auctions.\nThe problem is framed as an $N$-armed structured bandit, each number of player sent being an arm $n$, with expected reward $r(n)$ fully characterized by $F$ and $p+n$. \nWe present two algorithms, Local-Greedy (LG) and Greedy-Grid (GG), both achieving *constant* problem-dependent regret. This relies on three key ingredients: **1.** an estimator of $r(n)$ from feedback collected from any arm $k$, **2.** concentration bounds of these estimates for $k$ within an estimation neighborhood of $n$ and **3.** the unimodality property of $r$ under standard assumptions on $F$. Additionally, GG exhibits problem-independent guarantees on top of best problem-dependent guarantees. However, by avoiding to rely on confidence intervals, LG practically outperforms GG, as well as standard unimodal bandit algorithms such as OSUB or multi-armed bandit algorithms.", "keywords": "unimodal bandits;multi-arm bandit;auctions", "primary_area": "bandits", "supplementary_material": "/attachment/98f2e92983c2309558424e0cf3e335f2a0e62921.zip", "author": "Dorian Baudry;Hugo Richard;Maria Cherifa;Vianney Perchet;Cl\u00e9ment Calauz\u00e8nes", "authorids": "~Dorian_Baudry1;~Hugo_Richard1;~Maria_Cherifa1;~Vianney_Perchet3;~Cl\u00e9ment_Calauz\u00e8nes1", "gender": "M;M;F;M;M", "homepage": "https://dbaudry.github.io/;https://hugorichard.github.io/;;;", "dblp": "277/6362;227/3044;;125/1895;83/7398", "google_scholar": "https://scholar.google.fr/citations?user=RRW-kfYAAAAJ;5KaKAOgAAAAJ;;lFsKnyUAAAAJ;", "orcid": ";;;;", "linkedin": ";;maria-cherifa-257257143/;;", "or_profile": "~Dorian_Baudry1;~Hugo_Richard1;~Maria_Cherifa1;~Cl\u00e9ment_Calauz\u00e8nes1;~Vianney_Perchet1", "aff": ";Criteo;Ecole Nationale de la Statistique et de l'Administration Economique;Criteo;", "aff_domain": ";criteo.com;ensae.fr;criteo.com;", "position": ";Researcher;PhD student;Researcher;", "bibtex": "@inproceedings{\nbaudry2024optimizing,\ntitle={Optimizing the coalition gain in Online Auctions with Greedy Structured Bandits},\nauthor={Dorian Baudry and Hugo Richard and Maria Cherifa and Vianney Perchet and Cl{\\'e}ment Calauz{\\`e}nes},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=B74mb0tEY6}\n}", "github": "", "reviewers": "Ja3N;RP8o;TZVE", "pdf_size": 874012, "rating": "5;6;6", "confidence": "3;4;4", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "158;123;145", "wc_strengths": "83;262;212", "wc_weaknesses": "100;350;92", "wc_questions": "44;243;89", "wc_limitations": "9;114;12", "wc_review": "394;1092;550", "wc_reply_reviewers": "10;22;88", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 142.0, 14.445299120013633 ], "wc_strengths_avg": [ 185.66666666666666, 75.41146390893687 ], "wc_weaknesses_avg": [ 180.66666666666666, 119.78128215858918 ], "wc_questions_avg": [ 125.33333333333333, 85.20693764138119 ], "wc_limitations_avg": [ 45.0, 48.80573736764972 ], "wc_review_avg": [ 678.6666666666666, 299.1291062772432 ], "wc_reply_reviewers_avg": [ 40.0, 34.292856398964496 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.9999999999999997, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:p3bSP0cvq1IJ:scholar.google.com/&scioq=Optimizing+the+coalition+gain+in+Online+Auctions+with+Greedy+Structured+Bandits&hl=en&as_sdt=0,5", "gs_version_total": 9, "email": ";criteo.com;ensae.fr;criteo.com;", "author_num": 5, "aff_unique_index": "0;1;0", "aff_unique_norm": "Criteo;Ecole Nationale de la Statistique et de l'Administration Economique", "aff_unique_dep": ";", "aff_unique_url": "https://www.criteo.com;https://ensae.fr", "aff_unique_abbr": "Criteo;ENSAE", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "France" }, { "title": "Symbolic Regression with a Learned Concept Library", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96212", "id": "B7S4jJGlvl", "proceeding": "", "pdf": "https://openreview.net/pdf?id=B7S4jJGlvl", "openreview": "https://openreview.net/forum?id=B7S4jJGlvl", "poster": "/media/PosterPDFs/NeurIPS%202024/96212.png?t=1733777966.710544", "project": "", "author_site": "Arya Grayeli, Atharva Sehgal, Omar Costilla Reyes, Miles Cranmer, Swarat Chaudhuri", "tldr": "", "abstract": "We present a novel method for symbolic regression (SR), the task of searching for compact programmatic hypotheses that best explain a dataset. The problem is commonly solved using genetic algorithms; we show that we can enhance such methods by inducing a library of abstract textual concepts. Our algorithm, called LaSR, \nuses zero-shot queries to a large language model (LLM) to discover and evolve concepts occurring in known high-performing hypotheses. We discover new hypotheses using a mix of standard evolutionary steps and LLM-guided steps (obtained through zero-shot LLM queries) conditioned on discovered concepts. Once discovered, hypotheses are used in a new round of concept abstraction and evolution. We validate LaSR on the Feynman equations, a popular SR benchmark, \nas well as a set of synthetic tasks. On these benchmarks, LaSR substantially outperforms a variety of state-of-the-art SR approaches based on deep learning and evolutionary algorithms. Moreover, we show that LASR can be used to discover a new and powerful scaling law for LLMs.", "keywords": "symbolic regression;genetic programming;program synthesis;generative modelling;foundation models", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "", "author": "Arya Grayeli;Atharva Sehgal;Omar Costilla Reyes;Miles Cranmer;Swarat Chaudhuri", "authorids": "~Arya_Grayeli1;~Atharva_Sehgal1;~Omar_Costilla_Reyes1;~Miles_Cranmer2;~Swarat_Chaudhuri1", "gender": "M;M;;;M", "homepage": ";https://www.atharvas.net;http://omarcostilla.mit.edu/;https://astroautomata.com/;http://www.cs.utexas.edu/~swarat", "dblp": ";;;205/2493;37/6100", "google_scholar": ";;;10WfwCQAAAAJ;9j6RBYQAAAAJ", "orcid": ";;;0000-0002-6458-3423;0000-0002-6859-1391", "linkedin": "aryagrayeli/;atharvas/;;milescranmer/;swarat-chaudhuri-609b3092/", "or_profile": "~Arya_Grayeli1;~Atharva_Sehgal1;~Omar_Costilla_Reyes1;~Miles_Cranmer2;~Swarat_Chaudhuri1", "aff": "University of Texas at Austin;University of Texas at Austin;Massachusetts Institute of Technology;University of Cambridge;University of Texas at Austin", "aff_domain": "utexas.edu;utexas.edu;mit.edu;cam.ac.uk;utexas.edu", "position": "Undergrad student;PhD student;Researcher;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\ngrayeli2024symbolic,\ntitle={Symbolic Regression with a Learned Concept Library},\nauthor={Arya Grayeli and Atharva Sehgal and Omar Costilla Reyes and Miles Cranmer and Swarat Chaudhuri},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=B7S4jJGlvl}\n}", "github": "", "reviewers": "1Xi9;2Egm;mCEz;CCz9", "pdf_size": 921203, "rating": "4;6;7;8", "confidence": "5;4;4;4", "soundness": "2;3;3;4", "novelty": "2;3;3;4", "presentation": "3;4;3;4", "wc_summary": "57;80;73;141", "wc_strengths": "32;78;283;195", "wc_weaknesses": "88;81;356;223", "wc_questions": "368;142;193;11", "wc_limitations": "2;5;19;1", "wc_review": "547;386;924;571", "wc_reply_reviewers": "458;19;13;30", "wc_reply_authors": "745;0;284;14", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;2;2", "rating_avg": [ 6.25, 1.479019945774904 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 87.75, 31.854159853934306 ], "wc_strengths_avg": [ 147.0, 98.47080785694814 ], "wc_weaknesses_avg": [ 187.0, 112.79849289773334 ], "wc_questions_avg": [ 178.5, 127.9736300962038 ], "wc_limitations_avg": [ 6.75, 7.224091638399945 ], "wc_review_avg": [ 607.0, 196.35809125167214 ], "wc_reply_reviewers_avg": [ 130.0, 189.46899482501087 ], "wc_reply_authors_avg": [ 260.75, 301.6267353866364 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8783100656536799, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12466652893828314161&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "utexas.edu;utexas.edu;mit.edu;cam.ac.uk;utexas.edu", "author_num": 5, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "University of Texas at Austin;Massachusetts Institute of Technology;University of Cambridge", "aff_unique_dep": ";;", "aff_unique_url": "https://www.utexas.edu;https://web.mit.edu;https://www.cam.ac.uk", "aff_unique_abbr": "UT Austin;MIT;Cambridge", "aff_campus_unique_index": "0;0;2;0", "aff_campus_unique": "Austin;;Cambridge", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "The Best of Both Worlds: On the Dilemma of Out-of-distribution Detection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96211", "id": "B9FPPdNmyk", "proceeding": "", "pdf": "https://openreview.net/pdf?id=B9FPPdNmyk", "openreview": "https://openreview.net/forum?id=B9FPPdNmyk", "poster": "", "project": "", "author_site": "Qingyang Zhang, Qiuxuan Feng, Joey Tianyi Zhou, Yatao Bian, Qinghua Hu, Changqing Zhang", "tldr": "", "abstract": "Out-of-distribution (OOD) detection is essential for model trustworthiness which aims to sensitively identity semantic OOD samples and robustly generalize for covariate-shifted OOD samples. However, we discover that the superior OOD detection performance of state-of-the-art methods is achieved by secretly sacrificing the OOD generalization ability. The classification accuracy frequently collapses catastrophically when even slight noise is encountered. Such a phenomenon violates the motivation of trustworthiness and significantly limits the model's deployment in the real world. What is the hidden reason behind such a limitation? In this work, we theoretically demystify the \"\\textit{sensitive-robust}\" dilemma that lies in previous OOD detection methods. Consequently, a theory-inspired algorithm is induced to overcome such a dilemma. By decoupling the uncertainty learning objective from a Bayesian perspective, the conflict between OOD detection and OOD generalization is naturally harmonized and a dual-optimized performance could be expected. Empirical studies show that our method achieves superior performance on commonly used benchmarks. To our best knowledge, this work is the first principled OOD detection method that achieves state-of-the-art OOD detection performance without sacrificing OOD generalization ability. Our code is available at https://github.com/QingyangZhang/DUL.", "keywords": "Out-of-Distribution Detection; Uncertainty estimation;", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/2ffe0b1f201bf5ac168a70b0b1d365d1f0f93674.zip", "author": "Qingyang Zhang;Qiuxuan Feng;Joey Tianyi Zhou;Yatao Bian;Qinghua Hu;Changqing Zhang", "authorids": "~Qingyang_Zhang1;~Qiuxuan_Feng1;~Joey_Tianyi_Zhou1;~Yatao_Bian1;~Qinghua_Hu1;~Changqing_Zhang1", "gender": "M;F;M;M;M;M", "homepage": "https://qingyangzhang.github.io;https://github.com/xuanxuanzzzii;https://joeyzhouty.github.io/;http://cic.tju.edu.cn/faculty/huqinghua/index.html;http://cic.tju.edu.cn/faculty/zhangchangqing/index.html;https://yataobian.com", "dblp": ";;123/5110;;78/2668;222/2694", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.sg/citations?user=cYNqDokAAAAJ;TVSNq_wAAAAJ;yJGhdykAAAAJ;oZBTlBkAAAAJ", "orcid": ";;0000-0002-4675-7055;0000-0001-7765-8095;;0000-0002-2368-4084", "linkedin": ";;;;;", "or_profile": "~Qingyang_Zhang1;~Qiuxuan_Feng1;~Joey_Tianyi_Zhou1;~Qinghua_Hu1;~Changqing_Zhang1;~An_Bian1", "aff": "Tianjin University;Tianjin University;A*STAR Centre for Frontier AI Research;Tianjin University;Tianjin University;Tencent AI Lab", "aff_domain": "tju.edu.cn;tju.edu;cfar.a-star.edu.sg;tju.edu.cn;tju.edu.cn;tencent.com", "position": "MS student;Undergrad student;Principal Researcher;Professor;Associate Professor;Senior researcher ", "bibtex": "@inproceedings{\nzhang2024the,\ntitle={The Best of Both Worlds: On the Dilemma of Out-of-distribution Detection},\nauthor={Qingyang Zhang and Qiuxuan Feng and Joey Tianyi Zhou and Yatao Bian and Qinghua Hu and Changqing Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=B9FPPdNmyk}\n}", "github": "", "reviewers": "gJee;DUjY;31hb;Tcyq", "pdf_size": 14947186, "rating": "5;6;6;8", "confidence": "4;4;5;4", "soundness": "3;3;3;3", "novelty": "3;4;3;4", "presentation": "3;4;3;4", "wc_summary": "71;48;91;66", "wc_strengths": "60;73;48;116", "wc_weaknesses": "133;116;294;60", "wc_questions": "105;48;68;85", "wc_limitations": "6;20;3;2", "wc_review": "375;305;504;329", "wc_reply_reviewers": "16;47;24;21", "wc_reply_authors": "66;123;16;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;1", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 69.0, 15.313392831113555 ], "wc_strengths_avg": [ 74.25, 25.674647027758727 ], "wc_weaknesses_avg": [ 150.75, 87.00395105970763 ], "wc_questions_avg": [ 76.5, 21.02974084481309 ], "wc_limitations_avg": [ 7.75, 7.224091638399945 ], "wc_review_avg": [ 378.25, 76.83545730976032 ], "wc_reply_reviewers_avg": [ 27.0, 11.895377253370318 ], "wc_reply_authors_avg": [ 51.25, 48.04880331496301 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.13245323570650439, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5874898446365687988&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "tju.edu.cn;tju.edu;cfar.a-star.edu.sg;tju.edu.cn;tju.edu.cn;tencent.com", "author_num": 6, "aff_unique_index": "0;0;1;0;0;2", "aff_unique_norm": "Tianjin University;A*STAR;Tencent", "aff_unique_dep": ";Centre for Frontier AI Research;Tencent AI Lab", "aff_unique_url": "http://www.tju.edu.cn;https://www.a-star.edu.sg;https://ai.tencent.com", "aff_unique_abbr": "TJU;A*STAR;Tencent AI Lab", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "China;Singapore" }, { "title": "Generative Fractional Diffusion Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96210", "id": "B9qg3wo75g", "proceeding": "", "pdf": "https://openreview.net/pdf?id=B9qg3wo75g", "openreview": "https://openreview.net/forum?id=B9qg3wo75g", "poster": "/media/PosterPDFs/NeurIPS%202024/96210.png?t=1733531281.51479", "project": "", "author_site": "Gabriel Nobis, Maximilian Springenberg, Marco Aversa, Michael Detzel, Rembert Daems, Roderick Murray-Smith, Shinichi Nakajima, Sebastian Lapuschkin, Stefano Ermon, Tolga Birdal, Manfred Opper, Christoph Knochenhauer, Luis Oala, Wojciech Samek", "tldr": "", "abstract": "We introduce the first continuous-time score-based generative model that leverages fractional diffusion processes for its underlying dynamics. Although diffusion models have excelled at capturing data distributions, they still suffer from various limitations such as slow convergence, mode-collapse on imbalanced data, and lack of diversity. These issues are partially linked to the use of light-tailed Brownian motion (BM) with independent increments. In this paper, we replace BM with an approximation of its non-Markovian counterpart, fractional Brownian motion (fBM), characterized by correlated increments and Hurst index $H \\in (0,1)$, where $H=0.5$ recovers the classical BM. To ensure tractable inference and learning, we employ a recently popularized Markov approximation of fBM (MA-fBM) and derive its reverse-time model, resulting in *generative fractional diffusion models* (GFDM). We characterize the forward dynamics using a continuous reparameterization trick and propose *augmented score matching* to efficiently learn the score function, which is partly known in closed form, at minimal added cost. The ability to drive our diffusion model via MA-fBM offers flexibility and control. $H \\leq 0.5$ enters the regime of *rough paths* whereas $H>0.5$ regularizes diffusion paths and invokes long-term memory. The Markov approximation allows added control by varying the number of Markov processes linearly combined to approximate fBM. Our evaluations on real image datasets demonstrate that GFDM achieves greater pixel-wise diversity and enhanced image quality, as indicated by a lower FID, offering a promising alternative to traditional diffusion models", "keywords": "diffusion models;fractional brownian motion;fractional noise;generative modeling", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Gabriel Nobis;Maximilian Springenberg;Marco Aversa;Michael Detzel;Rembert Daems;Roderick Murray-Smith;Shinichi Nakajima;Sebastian Lapuschkin;Stefano Ermon;Tolga Birdal;Manfred Opper;Christoph Knochenhauer;Luis Oala;Wojciech Samek", "authorids": "~Gabriel_Nobis1;~Maximilian_Springenberg1;~Marco_Aversa1;~Michael_Detzel1;~Rembert_Daems1;~Roderick_Murray-Smith1;~Shinichi_Nakajima2;~Sebastian_Lapuschkin1;~Stefano_Ermon1;~Tolga_Birdal3;~Manfred_Opper1;~Christoph_Knochenhauer1;~Luis_Oala1;~Wojciech_Samek1", "gender": "M;M;M;M;;M;M;M;M;M;;;Non-Binary;M", "homepage": ";;https://marcoaversa.github.io;;https://rdaems.github.io/;http://www.dcs.gla.ac.uk/~rod/;https://web.ml.tu-berlin.de/author/dr.-shinichi-nakajima/;http://iphome.hhi.de/lapuschkin/;http://cs.stanford.edu/~ermon/;http://tolgabirdal.github.io;;https://stochasticcontrol.org/;https://luisoala.net/;http://iphome.hhi.de/samek/", "dblp": ";351/0324;325/5090;;322/9167;78/604;97/6115.html;184/7883;47/8135;143/7056;;;https://dblp.uni-trier.de/pid/261/9215;79/9736", "google_scholar": "6adniB0AAAAJ;Xp-QvpsAAAAJ;XSd_7RgAAAAJ;https://scholar.google.com/citations?hl=en;-X_5BNcAAAAJ;https://scholar.google.co.uk/citations?user=laX7LzQAAAAJ;hXSvID4AAAAJ;https://scholar.google.de/citations?user=wpLQuroAAAAJ;;_Bxd5ggAAAAJ;;;v3ybnf0AAAAJ;7aQwO08AAAAJ", "orcid": ";0000-0002-6444-0254;0000-0002-7724-7488;;0000-0002-5225-4884;;0000-0003-3970-4569;0000-0002-0762-7258;;0000-0001-7915-7964;;;;", "linkedin": ";;marco-aversa-5bb15b169/;;rembert-daems/;rodms/;;sebastian-lapuschkin/;;https://linkedin.com/in/tbirdal;;;;", "or_profile": "~Gabriel_Nobis1;~Maximilian_Springenberg1;~Marco_Aversa1;~Michael_Detzel1;~Rembert_Daems1;~Roderick_Murray-Smith1;~Shinichi_Nakajima2;~Sebastian_Lapuschkin1;~Stefano_Ermon1;~Tolga_Birdal3;~Manfred_Opper1;~Christoph_Knochenhauer1;~Luis_Oala1;~Wojciech_Samek1", "aff": "Fraunhofer HHI;Fraunhofer HHI, Fraunhofer IAIS;Dotphoton;Fraunhofer HHI;Ghent University;University of Glasgow;BIFOLD, TU Berlin;Fraunhofer HHI;Stanford University;Imperial College London;;Technische Universit\u00e4t M\u00fcnchen;Dotphoton;Fraunhofer HHI", "aff_domain": "hhi.fraunhofer.de;hhi.fraunhofer.de;dotphoton.com;hhi.fraunhofer.de;ugent.be;gla.ac.uk;tu-berlin.de;hhi.fraunhofer.de;stanford.edu;imperial.ac.uk;;tum.de;dotphoton.com;hhi.fraunhofer.de", "position": "PhD student;PhD student;Researcher;PhD student;PhD student;Professor;Postdoc;Head of Explainable Artificial Intelligence;Associate Professor;Assistant Professor;;Assistant Professor;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nnobis2024generative,\ntitle={Generative Fractional Diffusion Models},\nauthor={Gabriel Nobis and Maximilian Springenberg and Marco Aversa and Michael Detzel and Rembert Daems and Roderick Murray-Smith and Shinichi Nakajima and Sebastian Lapuschkin and Stefano Ermon and Tolga Birdal and Manfred Opper and Christoph Knochenhauer and Luis Oala and Wojciech Samek},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=B9qg3wo75g}\n}", "github": "", "reviewers": "kgmF;kZkj;HxVk;jifC", "pdf_size": 3425773, "rating": "2;6;6;7", "confidence": "5;3;4;4", "soundness": "1;2;2;3", "novelty": "1;3;3;3", "presentation": "2;3;3;3", "wc_summary": "12;163;50;78", "wc_strengths": "9;87;19;36", "wc_weaknesses": "71;208;46;69", "wc_questions": "37;616;56;77", "wc_limitations": "1;2;1;3", "wc_review": "130;1076;172;263", "wc_reply_reviewers": "487;55;8;19", "wc_reply_authors": "589;1606;78;0", "reply_reviewers": "2;1;1;1", "reply_authors": "2;5;2;1", "rating_avg": [ 5.25, 1.920286436967152 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 75.75, 55.55346523845295 ], "wc_strengths_avg": [ 37.75, 30.028111828751403 ], "wc_weaknesses_avg": [ 98.5, 63.97851201770794 ], "wc_questions_avg": [ 196.5, 242.61131465782876 ], "wc_limitations_avg": [ 1.75, 0.82915619758885 ], "wc_review_avg": [ 410.25, 387.36570253443966 ], "wc_reply_reviewers_avg": [ 142.25, 199.79911786592052 ], "wc_reply_authors_avg": [ 568.25, 640.431251189384 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 1.5 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 14, 0 ], "corr_rating_confidence": -0.7364596943186587, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=573487113767288288&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 7, "email": "hhi.fraunhofer.de;hhi.fraunhofer.de;dotphoton.com;hhi.fraunhofer.de;ugent.be;gla.ac.uk;tu-berlin.de;hhi.fraunhofer.de;stanford.edu;imperial.ac.uk;;tum.de;dotphoton.com;hhi.fraunhofer.de", "author_num": 14, "aff_unique_index": "0;1;2;0;3;4;5;0;6;7;8;2;0", "aff_unique_norm": "Fraunhofer Heinrich Hertz Institute;Fraunhofer HHI;Dotphoton;Ghent University;University of Glasgow;Technische Universit\u00e4t Berlin;Stanford University;Imperial College London;Technische Universit\u00e4t M\u00fcnchen", "aff_unique_dep": ";;;;;Berlin Institute for Foundations of Learning and Data (BIFOLD);;;", "aff_unique_url": "https://www.hhi.fraunhofer.de/;https://www.fraunhofer.de/en/institutes/hhi.html;;https://www.ugent.be/en;https://www.gla.ac.uk;https://www.tu-berlin.de;https://www.stanford.edu;https://www.imperial.ac.uk;https://www.tum.de", "aff_unique_abbr": "HHI;HHI;;UGent;Glasgow;TU Berlin;Stanford;ICL;TUM", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;0;2;3;0;0;4;3;0;0", "aff_country_unique": "Germany;;Belgium;United Kingdom;United States" }, { "title": "Rethinking Fourier Transform from A Basis Functions Perspective for Long-term Time Series Forecasting", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96209", "id": "BAfKBkr8IP", "proceeding": "", "pdf": "https://openreview.net/pdf?id=BAfKBkr8IP", "openreview": "https://openreview.net/forum?id=BAfKBkr8IP", "poster": "/media/PosterPDFs/NeurIPS%202024/96209.png?t=1731653579.6212962", "project": "", "author_site": "Runze Yang, Longbing Cao, JIE YANG, li jianxun", "tldr": "", "abstract": "The interaction between Fourier transform and deep learning opens new avenues for long-term time series forecasting (LTSF). We propose a new perspective to reconsider the Fourier transform from a basis functions perspective. Specifically, the real and imaginary parts of the frequency components can be viewed as the coefficients of cosine and sine basis functions at tiered frequency levels, respectively. We argue existing Fourier-based methods do not involve basis functions thus fail to interpret frequency coefficients precisely and consider the time-frequency relationship sufficiently, leading to inconsistent starting cycles and inconsistent series length issues. Accordingly, a novel Fourier basis mapping (FBM) method addresses these issues by mixing time and frequency domain features through Fourier basis expansion. Differing from existing approaches, FBM (i) embeds the discrete Fourier transform with basis functions, and then (ii) can enable plug-and-play in various types of neural networks for better performance. FBM extracts explicit frequency features while preserving temporal characteristics, enabling the mapping network to capture the time-frequency relationships. By incorporating our unique time-frequency features, the FBM variants can enhance any type of networks like linear, multilayer-perceptron-based, transformer-based, and Fourier-based networks, achieving state-of-the-art LTSF results on diverse real-world datasets with just one or three fully connected layers. The code is available at: https://github.com/runze1223/Fourier-Basis-Mapping.", "keywords": "Time series forecasting;Deep learning;Fourier transform;Frequency domain", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Runze Yang;Longbing Cao;JIE YANG;li jianxun", "authorids": "~Runze_Yang2;~Longbing_Cao1;~JIE_YANG18;~li_jianxun1", "gender": "M;M;M;M", "homepage": ";https://www.datasciences.org;http://www.pami.sjtu.edu.cn;https://automation.sjtu.edu.cn/Jian-Xun", "dblp": ";14/2589;;", "google_scholar": ";cDs3DM8AAAAJ;;", "orcid": "0000-0003-4891-7478;0000-0003-1562-9429;;", "linkedin": ";;;", "or_profile": "~Runze_Yang2;~Longbing_Cao1;~JIE_YANG18;~li_jianxun1", "aff": "Shanghai Jiaotong University;Macquarie University;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;mq.edu.au;sjtu.edu.cn;sjtu.edu.cn", "position": "PhD student;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nyang2024rethinking,\ntitle={Rethinking Fourier Transform from A Basis Functions Perspective for Long-term Time Series Forecasting},\nauthor={Runze Yang and Longbing Cao and JIE YANG and li jianxun},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=BAfKBkr8IP}\n}", "github": "", "reviewers": "8ZRG;akVH;Z6Ax", "pdf_size": 2488615, "rating": "2;6;6", "confidence": "4;4;5", "soundness": "3;2;3", "novelty": "2;2;3", "presentation": "2;3;1", "wc_summary": "58;88;156", "wc_strengths": "19;63;91", "wc_weaknesses": "293;105;265", "wc_questions": "2;195;224", "wc_limitations": "12;25;12", "wc_review": "384;476;748", "wc_reply_reviewers": "482;1020;327", "wc_reply_authors": "2278;4258;1171", "reply_reviewers": "2;9;4", "reply_authors": "5;12;5", "rating_avg": [ 4.666666666666667, 1.8856180831641267 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.0, 0.816496580927726 ], "wc_summary_avg": [ 100.66666666666667, 40.99864496405813 ], "wc_strengths_avg": [ 57.666666666666664, 29.634814361190493 ], "wc_weaknesses_avg": [ 221.0, 82.81706748410419 ], "wc_questions_avg": [ 140.33333333333334, 98.53031118504711 ], "wc_limitations_avg": [ 16.333333333333332, 6.128258770283412 ], "wc_review_avg": [ 536.0, 154.54017816304815 ], "wc_reply_reviewers_avg": [ 609.6666666666666, 296.96950834873417 ], "wc_reply_authors_avg": [ 2569.0, 1276.9502731116822 ], "reply_reviewers_avg": [ 5.0, 2.943920288775949 ], "reply_authors_avg": [ 7.333333333333333, 3.299831645537222 ], "replies_avg": [ 45, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Xa5ADe7V02gJ:scholar.google.com/&scioq=Rethinking+Fourier+Transform+from+A+Basis+Functions+Perspective+for+Long-term+Time+Series+Forecasting&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "sjtu.edu.cn;mq.edu.au;sjtu.edu.cn;sjtu.edu.cn", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Shanghai Jiao Tong University;Macquarie University", "aff_unique_dep": ";", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.mq.edu.au", "aff_unique_abbr": "SJTU;MQ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "China;Australia" }, { "title": "Oracle-Efficient Differentially Private Learning with Public Data", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96208", "id": "BAjjINf0Oh", "proceeding": "", "pdf": "https://openreview.net/pdf?id=BAjjINf0Oh", "openreview": "https://openreview.net/forum?id=BAjjINf0Oh", "poster": "", "project": "", "author_site": "Adam Block, Mark Bun, Rathin Desai, Abhishek Shetty, Steven Wu", "tldr": "", "abstract": "Due to statistical lower bounds on the learnability of many function classes under privacy constraints, there has been recent interest in leveraging public data to improve the performance of private learning algorithms. In this model, algorithms must always guarantee differential privacy with respect to the private samples while also ensuring learning guarantees when the private data distribution is sufficiently close to that of the public data. Previous work has demonstrated that when sufficient public, unlabelled data is available, private learning can be made statistically tractable, but the resulting algorithms have all been computationally inefficient. In this work, we present the first computationally efficient, algorithms to provably leverage public data to learn privately whenever a function class is learnable non-privately, where our notion of computational efficiency is with respect to the number of calls to an optimization oracle for the function class. In addition to this general result, we provide specialized algorithms with improved sample complexities in the special cases when the function class is convex or when the task is binary classification.", "keywords": "Oracle Efficiency;Differential Privacy;PAC learning", "primary_area": "privacy", "supplementary_material": "", "author": "Adam Block;Mark Bun;Rathin Desai;Abhishek Shetty;Steven Wu", "authorids": "~Adam_Block1;~Mark_Bun1;~Rathin_Desai1;~Abhishek_Shetty1;~Steven_Wu1", "gender": ";;;M;M", "homepage": "https://abblock.github.io/index.html;https://cs-people.bu.edu/mbun/;https://cs-people.bu.edu/rathin/;https://ashettyv.github.io/;https://zstevenwu.com/", "dblp": "258/1018;126/4933;;223/4770;137/8350", "google_scholar": ";oDwLyYUAAAAJ;;https://scholar.google.co.in/citations?user=M-y2aLUAAAAJ;MbF6rTEAAAAJ", "orcid": "0000-0003-1677-2665;;;;", "linkedin": ";;;;zstevenwu/", "or_profile": "~Adam_Block1;~Mark_Bun1;~Rathin_Desai1;~Abhishek_Shetty1;~Zhiwei_Steven_Wu1", "aff": "Massachusetts Institute of Technology;Boston University;Boston University, Boston University;University of California, Berkeley;Carnegie Mellon University", "aff_domain": "mit.edu;bu.edu;bu.edu;berkeley.edu;cmu.edu", "position": "PhD student;Assistant Professor;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nblock2024oracleefficient,\ntitle={Oracle-Efficient Differentially Private Learning with Public Data},\nauthor={Adam Block and Mark Bun and Rathin Desai and Abhishek Shetty and Steven Wu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=BAjjINf0Oh}\n}", "github": "", "reviewers": "b8gT;Fe9c;3FxP;CX9N;21sV", "pdf_size": 551613, "rating": "5;6;7;8;8", "confidence": "2;3;3;3;4", "soundness": "2;3;4;3;4", "novelty": "3;2;3;4;3", "presentation": "2;3;4;4;3", "wc_summary": "107;115;121;175;255", "wc_strengths": "31;50;99;70;212", "wc_weaknesses": "144;69;63;7;70", "wc_questions": "6;2;88;150;214", "wc_limitations": "1;5;1;4;29", "wc_review": "289;241;372;406;780", "wc_reply_reviewers": "0;0;17;9;14", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.8, 1.16619037896906 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 154.6, 55.60431637921646 ], "wc_strengths_avg": [ 92.4, 63.90179966166837 ], "wc_weaknesses_avg": [ 70.6, 43.57338637287673 ], "wc_questions_avg": [ 92.0, 82.17055433669655 ], "wc_limitations_avg": [ 8.0, 10.620734437881403 ], "wc_review_avg": [ 417.6, 190.40651249366445 ], "wc_reply_reviewers_avg": [ 8.0, 7.014271166700073 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.8134892168199606, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11358193021657578523&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "mit.edu;bu.edu;bu.edu;berkeley.edu;cmu.edu", "author_num": 5, "aff_unique_index": "0;1;1;2;3", "aff_unique_norm": "Massachusetts Institute of Technology;Boston University;University of California, Berkeley;Carnegie Mellon University", "aff_unique_dep": ";;;", "aff_unique_url": "https://web.mit.edu;https://www.bu.edu;https://www.berkeley.edu;https://www.cmu.edu", "aff_unique_abbr": "MIT;BU;UC Berkeley;CMU", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Boston;Berkeley", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Toward Semantic Gaze Target Detection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96207", "id": "BAmAFraxvf", "proceeding": "", "pdf": "https://openreview.net/pdf?id=BAmAFraxvf", "openreview": "https://openreview.net/forum?id=BAmAFraxvf", "poster": "/media/PosterPDFs/NeurIPS%202024/96207.png?t=1731674687.8646579", "project": "", "author_site": "Samy Tafasca, Anshul Gupta, Victor Bros, Jean-marc Odobez", "tldr": "", "abstract": "From the onset of infanthood, humans naturally develop the ability to closely observe and interpret the visual gaze of others. This skill, known as gaze following, holds significance in developmental theory as it enables us to grasp another person\u2019s mental state, emotions, intentions, and more. In computer vision, gaze following is defined as the prediction of the pixel coordinates where a person in the image is focusing their attention. Existing methods in this research area have predominantly centered on pinpointing the gaze target by predicting a gaze heatmap or gaze point. However, a notable drawback of this approach is its limited practical value in gaze applications, as mere localization may not fully capture our primary interest \u2014 understanding the underlying semantics, such as the nature of the gaze target, rather than just its 2D pixel location. To address this gap, we extend the gaze following task, and introduce a novel architecture that simultaneously predicts the localization and semantic label of the gaze target. We devise a pseudo-annotation pipeline for the GazeFollow dataset, propose a new benchmark, develop an experimental protocol and design a suitable baseline for comparison. Our method sets a new state-of-the-art on the main GazeFollow benchmark for localization and achieves competitive results in the recognition task on both datasets compared to the baseline, with 40% fewer parameters", "keywords": "gaze following;dataset;deep learning;computer vision", "primary_area": "machine_vision", "supplementary_material": "", "author": "Samy Tafasca;Anshul Gupta;Victor Bros;Jean-marc Odobez", "authorids": "~Samy_Tafasca1;~Anshul_Gupta2;~Victor_Bros1;~Jean-marc_Odobez1", "gender": "M;;M;", "homepage": ";https://anshul-gupta24.github.io/;;", "dblp": "327/3661;;;", "google_scholar": "N6SqiAIAAAAJ;https://scholar.google.com/citations?hl=en;;", "orcid": "0009-0008-2427-7406;;0009-0005-5681-8332;", "linkedin": "samy-tafasca/;;;", "or_profile": "~Samy_Tafasca1;~Anshul_Gupta2;~Victor_Bros1;~Jean-marc_Odobez1", "aff": "EPFL;Meta Facebook;Idiap Research Institute;", "aff_domain": "epfl.ch;meta.com;idiap.ch;", "position": "PhD student;Intern;PhD student;", "bibtex": "@inproceedings{\ntafasca2024toward,\ntitle={Toward Semantic Gaze Target Detection},\nauthor={Samy Tafasca and Anshul Gupta and Victor Bros and Jean-marc Odobez},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=BAmAFraxvf}\n}", "github": "", "reviewers": "kgqK;9bSU;roY1;DJCe", "pdf_size": 32611176, "rating": "4;5;5;7", "confidence": "5;4;4;3", "soundness": "3;3;2;3", "novelty": "3;2;2;3", "presentation": "3;3;3;4", "wc_summary": "107;121;68;128", "wc_strengths": "118;30;28;164", "wc_weaknesses": "157;81;163;133", "wc_questions": "5;39;21;58", "wc_limitations": "7;11;16;24", "wc_review": "394;282;296;507", "wc_reply_reviewers": "0;0;28;18", "wc_reply_authors": "0;0;58;26", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;2;2", "rating_avg": [ 5.25, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 106.0, 23.205602771744587 ], "wc_strengths_avg": [ 85.0, 58.31809324729333 ], "wc_weaknesses_avg": [ 133.5, 32.32259271778797 ], "wc_questions_avg": [ 30.75, 19.803724397193573 ], "wc_limitations_avg": [ 14.5, 6.34428877022476 ], "wc_review_avg": [ 369.75, 90.22852930198962 ], "wc_reply_reviewers_avg": [ 11.5, 12.031209415515965 ], "wc_reply_authors_avg": [ 21.0, 23.853720883753127 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.9733285267845754, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10360874072744442449&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "epfl.ch;meta.com;idiap.ch;", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "EPFL;Meta;Idiap Research Institute", "aff_unique_dep": ";Meta Platforms, Inc.;", "aff_unique_url": "https://www.epfl.ch;https://meta.com;https://www.idiap.ch", "aff_unique_abbr": "EPFL;Meta;Idiap", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Switzerland;United States" }, { "title": "BERTs are Generative In-Context Learners", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96206", "id": "BCA9NMZkLS", "proceeding": "", "pdf": "https://openreview.net/pdf?id=BCA9NMZkLS", "openreview": "https://openreview.net/forum?id=BCA9NMZkLS", "poster": "", "project": "", "tldr": "", "abstract": "While in-context learning is commonly associated with causal language models, such as GPT, we demonstrate that this capability also 'emerges' in masked language models. Through an embarrassingly simple inference technique, we enable an existing masked model, DeBERTa, to perform generative tasks without additional training or architectural changes. Our evaluation reveals that the masked and causal language models behave very differently, as they clearly outperform each other on different categories of tasks. These complementary strengths suggest that the field's focus on causal models for in-context learning may be limiting \u2013 both architectures can develop these capabilities, but with distinct advantages; pointing toward promising hybrid approaches that combine the strengths of both objectives.", "keywords": "in-context learning;masked language modeling;bert;language modeling;evaluation;inference", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "David Samuel", "authorids": "~David_Samuel1", "gender": "M", "homepage": "https://www.mn.uio.no/ifi/english/people/aca/davisamu/index.html", "dblp": "160/7562", "google_scholar": "eh0roKUAAAAJ", "orcid": "0000-0003-2866-1022", "linkedin": "", "or_profile": "~David_Samuel1", "aff": "University of Oslo", "aff_domain": "uio.no", "position": "PhD student", "bibtex": "@inproceedings{\nsamuel2024berts,\ntitle={{BERT}s are Generative In-Context Learners},\nauthor={David Samuel},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=BCA9NMZkLS}\n}", "github": "", "reviewers": "iPtc;gXpi;7Ges;ayxu", "pdf_size": 504844, "rating": "3;7;7;7", "confidence": "4;4;4;4", "soundness": "2;2;3;3", "novelty": "2;2;3;3", "presentation": "2;3;4;3", "wc_summary": "67;81;86;94", "wc_strengths": "53;39;89;44", "wc_weaknesses": "253;219;403;66", "wc_questions": "6;96;143;103", "wc_limitations": "40;17;32;32", "wc_review": "419;452;753;339", "wc_reply_reviewers": "65;21;275;14", "wc_reply_authors": "0;0;130;121", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;2;2", "rating_avg": [ 6.0, 1.7320508075688772 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 82.0, 9.82344135219425 ], "wc_strengths_avg": [ 56.25, 19.562400159489634 ], "wc_weaknesses_avg": [ 235.25, 119.7546971938888 ], "wc_questions_avg": [ 87.0, 50.08492787256461 ], "wc_limitations_avg": [ 30.25, 8.317902379807062 ], "wc_review_avg": [ 490.75, 156.88590599540802 ], "wc_reply_reviewers_avg": [ 93.75, 106.45509616735124 ], "wc_reply_authors_avg": [ 62.75, 62.83062549426036 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6687878713954434550&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "uio.no", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "University of Oslo", "aff_unique_dep": "", "aff_unique_url": "https://www.uio.no", "aff_unique_abbr": "UiO", "aff_country_unique_index": "0", "aff_country_unique": "Norway" }, { "title": "BAM! Just Like That: Simple and Efficient Parameter Upcycling for Mixture of Experts", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96205", "id": "BDrWQTrfyI", "proceeding": "", "pdf": "https://openreview.net/pdf?id=BDrWQTrfyI", "openreview": "https://openreview.net/forum?id=BDrWQTrfyI", "poster": "", "project": "", "author_site": "Qizhen (Irene) Zhang, Nikolas Gritsch, Dwaraknath Gnaneshwar Talupuru, Simon Guo, David Cairuz, Bharat Venkitesh, Jakob Foerster, Phil Blunsom, Sebastian Ruder, Ahmet \u00dcst\u00fcn, Acyr Locatelli", "tldr": "", "abstract": "Mixture of Experts (MoE) framework has become a popular architecture for large language models due to its superior performance compared to dense models. However, training MoEs from scratch in a large-scale regime is prohibitively expensive. Previous work addresses this challenge by independently training multiple dense expert models and using them to initialize an MoE. In particular, state-of-the-art approaches initialize MoE layers using experts' feed-forward parameters while merging all other parameters, limiting the advantages of the specialized dense models when upcycling them as MoEs. We propose BAM (Branch-Attend-Mix), a simple yet effective improvement to MoE training. BAM makes full use of specialized dense models by not only using their feed-forward network (FFN) to initialize the MoE layers but also leveraging experts' attention weights fully by leveraging them as mixture-of-attention (MoA) layers. We explore two methods for upcycling MoA layers: 1) initializing separate attention experts from dense models including key, value, and query matrices; and 2) initializing only Q projections while sharing key-value pairs across all experts to facilitate efficient inference. Our experiments using seed models ranging from 590 million to 2 billion parameters show that our approach outperforms state-of-the-art approaches under the same data and compute budget in both perplexity and downstream tasks evaluations, confirming the effectiveness of BAM.", "keywords": "Mixture of Experts;Large Language Models", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Qizhen Zhang;Nikolas Gritsch;Dwaraknath Gnaneshwar;Simon Guo;David Cairuz;Bharat Venkitesh;Jakob Nicolaus Foerster;Phil Blunsom;Sebastian Ruder;Ahmet \u00dcst\u00fcn;Acyr Locatelli", "authorids": "~Qizhen_Zhang1;~Nikolas_Gritsch1;~Dwaraknath_Gnaneshwar1;~Simon_Guo1;~David_Cairuz1;~Bharat_Venkitesh1;~Jakob_Nicolaus_Foerster1;~Phil_Blunsom1;~Sebastian_Ruder2;~Ahmet_\u00dcst\u00fcn1;~Acyr_Locatelli1", "gender": "F;;M;M;M;M;M;;;M;M", "homepage": "https://irenezhang30.github.io/;;https://www.dwaraknathgnaneshwar.com/;https://simonguo.tech/;https://davidcairuz.com/;;https://www.jakobfoerster.com;;;https://ahmetustun.github.io/;https://acyrl.github.io/", "dblp": ";;;400/0445;;194/3553;176/5095;96/4705;;186/0896;330/2243.html", "google_scholar": "https://scholar.google.ca/citations?hl=en;;https://scholar.google.com/citations?hl=en;;;r6fDYb0AAAAJ;6z4lQzMAAAAJ;https://scholar.google.co.uk/citations?user=eJwbbXEAAAAJ;;fvotcRIAAAAJ;pv4OI2EAAAAJ", "orcid": ";;0000-0002-3771-8433;;;;;;;;", "linkedin": ";;https://www.linkedin.com/feed/;https://linkedin.com/in/simonguozirui;davidcairuz/;bharat-venkitesh-92350671/;;;;ahmet-%C3%BCst%C3%BCn/;", "or_profile": "~Qizhen_Zhang1;~Nikolas_Gritsch1;~Dwaraknath_Gnaneshwar1;~Simon_Guo1;~David_Cairuz1;~Bharat_Venkitesh1;~Jakob_Nicolaus_Foerster1;~Phil_Blunsom1;~Sebastian_Ruder2;~Ahmet_\u00dcst\u00fcn1;~Acyr_Locatelli1", "aff": "Cohere;;;Cohere;Cohere;Cohere;University of Oxford, University of Oxford;Department of Computer Science, University of Oxford;;Cohere For AI;Cohere", "aff_domain": "cohere.ai;;;cohere.com;cohere.com;cohere.ai;eng.ox.ac.uk;cs.ox.ac.uk;;cohere.com;cohere.com", "position": "Researcher;;;Research Intern;Researcher;Member of Technical Staff;Associate Professor;Associate Professor;;Researcher;Researcher", "bibtex": "@inproceedings{\nzhang2024bam,\ntitle={{BAM}! Just Like That: Simple and Efficient Parameter Upcycling for Mixture of Experts},\nauthor={Qizhen Zhang and Nikolas Gritsch and Dwaraknath Gnaneshwar and Simon Guo and David Cairuz and Bharat Venkitesh and Jakob Nicolaus Foerster and Phil Blunsom and Sebastian Ruder and Ahmet {\\\"U}st{\\\"u}n and Acyr Locatelli},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=BDrWQTrfyI}\n}", "github": "", "reviewers": "MjaL;ScVn;4yUX", "pdf_size": 1110510, "rating": "5;6;6", "confidence": "4;5;4", "soundness": "3;1;4", "novelty": "1;2;2", "presentation": "3;2;3", "wc_summary": "83;42;135", "wc_strengths": "40;26;55", "wc_weaknesses": "49;150;55", "wc_questions": "8;2;116", "wc_limitations": "13;1;1", "wc_review": "193;221;362", "wc_reply_reviewers": "74;47;79", "wc_reply_authors": "711;0;301", "reply_reviewers": "2;1;2", "reply_authors": "5;1;2", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 1.247219128924647 ], "novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 86.66666666666667, 38.055515004033545 ], "wc_strengths_avg": [ 40.333333333333336, 11.841546445554407 ], "wc_weaknesses_avg": [ 84.66666666666667, 46.26253583864834 ], "wc_questions_avg": [ 42.0, 52.38320341483518 ], "wc_limitations_avg": [ 5.0, 5.656854249492381 ], "wc_review_avg": [ 258.6666666666667, 73.9564436378302 ], "wc_reply_reviewers_avg": [ 66.66666666666667, 14.055445761538676 ], "wc_reply_authors_avg": [ 337.3333333333333, 291.3993060313555 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 1.699673171197595 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6338348921717017712&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "cohere.ai;;;cohere.com;cohere.com;cohere.ai;eng.ox.ac.uk;cs.ox.ac.uk;;cohere.com;cohere.com", "author_num": 11, "aff_unique_index": "0;0;0;0;1;1;0;0", "aff_unique_norm": "Cohere;University of Oxford", "aff_unique_dep": ";", "aff_unique_url": "https://cohere.ai;https://www.ox.ac.uk", "aff_unique_abbr": ";Oxford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Oxford", "aff_country_unique_index": "0;0;0;0;1;1;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Efficiently Learning Significant Fourier Feature Pairs for Statistical Independence Testing", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96204", "id": "BEiqNQZIky", "proceeding": "", "pdf": "https://openreview.net/pdf?id=BEiqNQZIky", "openreview": "https://openreview.net/forum?id=BEiqNQZIky", "poster": "/media/PosterPDFs/NeurIPS%202024/96204.png?t=1730266628.1843188", "project": "", "author_site": "Yixin Ren, Yewei Xia, Hao Zhang, Jihong Guan, Shuigeng Zhou", "tldr": "", "abstract": "We propose a novel method to efficiently learn significant Fourier feature pairs for maximizing the power of Hilbert-Schmidt Independence Criterion~(HSIC) based independence tests. We first reinterpret HSIC in the frequency domain, which reveals its limited discriminative power due to the inability to adapt to specific frequency-domain features under the current inflexible configuration. To remedy this shortcoming, we introduce a module of learnable Fourier features, thereby developing a new criterion. We then derive a finite sample estimate of the test power by modeling the behavior of the criterion, thus formulating an optimization objective for significant Fourier feature pairs learning. We show that this optimization objective can be computed in linear time (with respect to the sample size $n$), which ensures fast independence tests. We also prove the convergence property of the optimization objective and establish the consistency of the independence tests. Extensive empirical evaluation on both synthetic and real datasets validates our method's superiority in effectiveness and efficiency, particularly in handling high-dimensional data and dealing with large-scale scenarios.", "keywords": "independence test;learnable Fourier feature", "primary_area": "causal_inference", "supplementary_material": "/attachment/906388d95d7a84c83924a7912464780644547c3d.zip", "author": "Yixin Ren;Yewei Xia;Hao Zhang;Jihong Guan;Shuigeng Zhou", "authorids": "~Yixin_Ren1;~Yewei_Xia1;~Hao_Zhang61;~Jihong_Guan1;~Shuigeng_Zhou1", "gender": "M;;;F;M", "homepage": ";https://xyw5vplus1.github.io/;;https://admis.tongji.edu.cn/82/08/c25113a229896/page.htm;http://admis.fudan.edu.cn/sgzhou", "dblp": "303/5538.html;339/8598;;89/2685.html;52/6744.html", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;meWQHcgAAAAJ;;;yAE-Av4AAAAJ", "orcid": "0000-0002-0084-4903;0000-0001-5515-5913;;0000-0003-2313-7635;0000-0002-1949-2768", "linkedin": ";;;;", "or_profile": "~Yixin_Ren1;~Yewei_Xia1;~Hao_Zhang61;~Jihong_Guan1;~Shuigeng_Zhou1", "aff": "Fudan University;Fudan University;;Tongji University;Fudan University", "aff_domain": "fudan.edu.cn;fudan.edu.cn;;tongji.edu.cn;fudan.edu.cn", "position": "PhD student;PhD student;;Full Professor;Full Professor", "bibtex": "@inproceedings{\nren2024efficiently,\ntitle={Efficiently Learning Significant Fourier Feature Pairs for Statistical Independence Testing},\nauthor={Yixin Ren and Yewei Xia and Hao Zhang and Jihong Guan and Shuigeng Zhou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=BEiqNQZIky}\n}", "github": "", "reviewers": "gka3;X3Av;uRf4", "pdf_size": 1161540, "rating": "4;5;8", "confidence": "3;3;3", "soundness": "2;3;4", "novelty": "2;2;1", "presentation": "2;2;3", "wc_summary": "64;204;78", "wc_strengths": "192;27;108", "wc_weaknesses": "173;155;131", "wc_questions": "34;32;32", "wc_limitations": "14;1;39", "wc_review": "477;419;388", "wc_reply_reviewers": "0;64;0", "wc_reply_authors": "40;250;0", "reply_reviewers": "0;1;0", "reply_authors": "2;2;1", "rating_avg": [ 5.666666666666667, 1.699673171197595 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 115.33333333333333, 62.956775295082856 ], "wc_strengths_avg": [ 109.0, 67.36467917239716 ], "wc_weaknesses_avg": [ 153.0, 17.204650534085253 ], "wc_questions_avg": [ 32.666666666666664, 0.9428090415820634 ], "wc_limitations_avg": [ 18.0, 15.769168230019828 ], "wc_review_avg": [ 428.0, 36.88721549082645 ], "wc_reply_reviewers_avg": [ 21.333333333333332, 30.169889330626027 ], "wc_reply_authors_avg": [ 96.66666666666667, 109.64589468932351 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Rctx7bc8LN0J:scholar.google.com/&scioq=Efficiently+Learning+Significant+Fourier+Feature+Pairs+for+Statistical+Independence+Testing&hl=en&as_sdt=0,44", "gs_version_total": 0, "email": "fudan.edu.cn;fudan.edu.cn;;tongji.edu.cn;fudan.edu.cn", "author_num": 5, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Fudan University;Tongji University", "aff_unique_dep": ";", "aff_unique_url": "https://www.fudan.edu.cn;https://www.tongji.edu.cn", "aff_unique_abbr": "Fudan;Tongji", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "A Phase Transition between Positional and Semantic Learning in a Solvable Model of Dot-Product Attention", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96203", "id": "BFWdIPPLgZ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=BFWdIPPLgZ", "openreview": "https://openreview.net/forum?id=BFWdIPPLgZ", "poster": "", "project": "", "author_site": "Hugo Cui, Freya Behrens, Florent Krzakala, Lenka Zdeborov\u00e1", "tldr": "", "abstract": "Many empirical studies have provided evidence for the emergence of algorithmic mechanisms (abilities) in the learning of language models, that lead to qualitative improvements of the model capabilities. Yet, a theoretical characterization of how such mechanisms emerge remains elusive. In this paper, we take a step in this direction by providing a tight theoretical analysis of the emergence of semantic attention in a solvable model of dot-product attention. More precisely, we consider a non-linear self-attention layer with trainable tied and low-rank query and key matrices. In the asymptotic limit of high-dimensional data and a comparably large number of training samples we provide a tight closed-form characterization of the global minimum of the non-convex empirical loss landscape. We show that this minimum corresponds to either a positional attention mechanism (with tokens attending to each other based on their respective positions) or a semantic attention mechanism (with tokens attending to each other based on their meaning), and evidence an emergent phase transition from the former to the latter with increasing sample complexity. Finally, we compare the dot-product attention layer to a linear positional baseline, and show that it outperforms the latter using the semantic mechanism provided it has access to sufficient data.", "keywords": "replica method;statistical physics;phase transition;high-dimensional limit;attention layer", "primary_area": "learning_theory", "supplementary_material": "/attachment/de007cdb4cd3bcb0c7b24afb2bbdc28db8c8461d.zip", "author": "Hugo Cui;Freya Behrens;Florent Krzakala;Lenka Zdeborova", "authorids": "~Hugo_Cui1;~Freya_Behrens1;~Florent_Krzakala1;~Lenka_Zdeborova1", "gender": ";;;F", "homepage": ";;http://Krzakala.org;http://artax.karlin.mff.cuni.cz/~zdebl9am/", "dblp": ";;25/1282;27/6064.html", "google_scholar": ";;https://scholar.google.fr/citations?user=3jDeUlMAAAAJ;https://scholar.google.fr/citations?user=gkCjy_UAAAAJ", "orcid": ";;0000-0003-2313-2578;", "linkedin": ";;;", "or_profile": "~Hugo_Cui1;~Freya_Behrens1;~Florent_Krzakala1;~Lenka_Zdeborova1", "aff": ";;Swiss Federal Institute of Technology Lausanne;Swiss Federal Institute of Technology Lausanne", "aff_domain": ";;epfl.ch;epfl.ch", "position": ";;Full Professor;Associate Professor", "bibtex": "@inproceedings{\ncui2024a,\ntitle={A Phase Transition between Positional and Semantic Learning in a Solvable Model of Dot-Product Attention},\nauthor={Hugo Cui and Freya Behrens and Florent Krzakala and Lenka Zdeborova},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=BFWdIPPLgZ}\n}", "github": "", "reviewers": "wGcv;QM4T;uCdk", "pdf_size": 1179922, "rating": "6;7;7", "confidence": "2;2;3", "soundness": "3;4;3", "novelty": "3;3;3", "presentation": "2;3;3", "wc_summary": "110;100;136", "wc_strengths": "161;49;76", "wc_weaknesses": "74;57;76", "wc_questions": "39;63;69", "wc_limitations": "1;1;8", "wc_review": "385;270;365", "wc_reply_reviewers": "95;19;41", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 2.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 115.33333333333333, 15.173075568988057 ], "wc_strengths_avg": [ 95.33333333333333, 47.72374205314956 ], "wc_weaknesses_avg": [ 69.0, 8.524474568362947 ], "wc_questions_avg": [ 57.0, 12.96148139681572 ], "wc_limitations_avg": [ 3.3333333333333335, 3.2998316455372216 ], "wc_review_avg": [ 340.0, 50.16638981097471 ], "wc_reply_reviewers_avg": [ 51.666666666666664, 31.930480039541457 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11603844549847582846&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 5, "email": ";;epfl.ch;epfl.ch", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Swiss Federal Institute of Technology Lausanne", "aff_unique_dep": "", "aff_unique_url": "https://www.epfl.ch", "aff_unique_abbr": "EPFL", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Lausanne", "aff_country_unique_index": "0;0", "aff_country_unique": "Switzerland" }, { "title": "Self-Guiding Exploration for Combinatorial Problems", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96202", "id": "BGOGknwHbi", "proceeding": "", "pdf": "https://openreview.net/pdf?id=BGOGknwHbi", "openreview": "https://openreview.net/forum?id=BGOGknwHbi", "poster": "/media/PosterPDFs/NeurIPS%202024/96202.png?t=1731667237.8135982", "project": "", "author_site": "Zangir Iklassov, Yali Du, Farkhad Akimov, Martin Takac", "tldr": "", "abstract": "Large Language Models (LLMs) have become pivotal in addressing reasoning tasks across diverse domains, including arithmetic, commonsense, and symbolic reasoning. They utilize prompting techniques such as Exploration-of-Thought, Decomposition, and Refinement to effectively navigate and solve intricate tasks. Despite these advancements, the application of LLMs to Combinatorial Problems (CPs), known for their NP-hardness and critical roles in logistics and resource management remains underexplored. To address this gap, we introduce a novel prompting strategy: Self-Guiding Exploration (SGE), designed to enhance the performance of solving CPs. SGE operates autonomously, generating multiple thought trajectories for each CP task. It then breaks these trajectories down into actionable subtasks, executes them sequentially, and refines the results to ensure optimal outcomes. We present our research as the first to apply LLMs to a broad range of CPs and demonstrate that SGE outperforms existing prompting strategies by over 27.84% in CP optimization performance. Additionally, SGE achieves a 2.46% higher accuracy over the best existing results in other reasoning tasks (arithmetic, commonsense, and symbolic).", "keywords": "combinatorial problems;combinatorial optimization;LLM prompting strategies;LLM thought exploration", "primary_area": "other", "supplementary_material": "", "author": "Zangir Iklassov;Yali Du;Farkhad Akimov;Martin Tak\u00e1\u010d", "authorids": "~Zangir_Iklassov1;~Yali_Du1;~Farkhad_Akimov1;~Martin_Tak\u00e1\u010d1", "gender": "M;;M;", "homepage": ";;;", "dblp": ";;;", "google_scholar": ";;;", "orcid": ";;;", "linkedin": "zangir-iklassov-24b31622b/;;farkhad-akimov;", "or_profile": "~Zangir_Iklassov1;~Yali_Du1;~Farkhad_Akimov1;~Martin_Tak\u00e1\u010d1", "aff": "Mohamed bin Zayed University of Artificial Intelligence;;Mohamed bin Zayed University of Artificial Intelligence;", "aff_domain": "mbzuai.ac.ae;;mbzuai.ac.ae;", "position": "PhD student;;MS student;", "bibtex": "@inproceedings{\niklassov2024selfguiding,\ntitle={Self-Guiding Exploration for Combinatorial Problems},\nauthor={Zangir Iklassov and Yali Du and Farkhad Akimov and Martin Tak{\\'a}{\\v{c}}},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=BGOGknwHbi}\n}", "github": "", "reviewers": "8LKr;auHA;zLRa", "pdf_size": 706570, "rating": "6;6;8", "confidence": "3;2;4", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "3;3;3", "wc_summary": "56;97;134", "wc_strengths": "38;41;140", "wc_weaknesses": "214;80;150", "wc_questions": "1;39;134", "wc_limitations": "18;2;157", "wc_review": "327;259;715", "wc_reply_reviewers": "48;27;19", "wc_reply_authors": "190;18;192", "reply_reviewers": "2;1;1", "reply_authors": "3;2;3", "rating_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 95.66666666666667, 31.857320805254307 ], "wc_strengths_avg": [ 73.0, 47.391982444291145 ], "wc_weaknesses_avg": [ 148.0, 54.72354764328302 ], "wc_questions_avg": [ 58.0, 55.93448548674303 ], "wc_limitations_avg": [ 59.0, 69.60363975157239 ], "wc_review_avg": [ 433.6666666666667, 200.86037162389422 ], "wc_reply_reviewers_avg": [ 31.333333333333332, 12.229290885229428 ], "wc_reply_authors_avg": [ 133.33333333333334, 81.55706931686275 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1451353007645644565&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "mbzuai.ac.ae;;mbzuai.ac.ae;", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": "", "aff_unique_url": "https://mbzuai.ac.ae", "aff_unique_abbr": "MBZUAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Arab Emirates" }, { "title": "Self-Calibrating Conformal Prediction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96201", "id": "BJ6HkT7qIk", "proceeding": "", "pdf": "https://openreview.net/pdf?id=BJ6HkT7qIk", "openreview": "https://openreview.net/forum?id=BJ6HkT7qIk", "poster": "/media/PosterPDFs/NeurIPS%202024/96201.png?t=1733961325.1491308", "project": "", "author_site": "Lars van der Laan, Ahmed Alaa", "tldr": "", "abstract": "In machine learning, model calibration and predictive inference are essential for producing reliable predictions and quantifying uncertainty to support decision-making. Recognizing the complementary roles of point and interval predictions, we introduce Self-Calibrating Conformal Prediction, a method that combines Venn-Abers calibration and conformal prediction to deliver calibrated point predictions alongside prediction intervals with finite-sample validity conditional on these predictions. To achieve this, we extend the original Venn-Abers procedure from binary classification to regression. Our theoretical framework supports analyzing conformal prediction methods that involve calibrating model predictions and subsequently constructing conditionally valid prediction intervals on the same data, where the conditioning set or conformity scores may depend on the calibrated predictions. Real-data experiments show that our method improves interval efficiency through model calibration and offers a practical alternative to feature-conditional validity.", "keywords": "trustworthy machine learning;calibration;isotonic calibration;Venn-Abers predictors;conformal prediction;predictive inference;prediction intervals", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/ca7cfd411ed45ecda41dd9eda78bbcee9c4e537f.zip", "author": "Lars van der Laan;Ahmed Alaa", "authorids": "~Lars_van_der_Laan1;~Ahmed_Alaa1", "gender": "M;M", "homepage": "https://scholar.google.com/citations?hl=en&user=0bwP0i4AAAAJ;https://alaalab.berkeley.edu/", "dblp": ";140/7324", "google_scholar": ";https://scholar.google.com.eg/citations?user=_pv1sEcAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Lars_van_der_Laan1;~Ahmed_Alaa1", "aff": "University of Washington;University of California, Berkeley", "aff_domain": "uw.edu;berkeley.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nlaan2024selfcalibrating,\ntitle={Self-Calibrating Conformal Prediction},\nauthor={Lars van der Laan and Ahmed Alaa},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=BJ6HkT7qIk}\n}", "github": "", "reviewers": "qyB7;QroB;bFVX", "pdf_size": 4309597, "rating": "5;6;7", "confidence": "3;2;4", "soundness": "2;3;4", "novelty": "3;2;4", "presentation": "2;3;4", "wc_summary": "38;71;34", "wc_strengths": "42;55;127", "wc_weaknesses": "174;54;110", "wc_questions": "129;156;65", "wc_limitations": "60;47;68", "wc_review": "443;383;404", "wc_reply_reviewers": "14;21;87", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 47.666666666666664, 16.579773487261182 ], "wc_strengths_avg": [ 74.66666666666667, 37.38389433373087 ], "wc_weaknesses_avg": [ 112.66666666666667, 49.026070162267295 ], "wc_questions_avg": [ 116.66666666666667, 38.16047984790315 ], "wc_limitations_avg": [ 58.333333333333336, 8.65383665716478 ], "wc_review_avg": [ 410.0, 24.859605789312106 ], "wc_reply_reviewers_avg": [ 40.666666666666664, 32.8870119584549 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10762551439747235709&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "uw.edu;berkeley.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "University of Washington;University of California, Berkeley", "aff_unique_dep": ";", "aff_unique_url": "https://www.washington.edu;https://www.berkeley.edu", "aff_unique_abbr": "UW;UC Berkeley", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Model-based Diffusion for Trajectory Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96200", "id": "BJndYScO6o", "proceeding": "", "pdf": "https://openreview.net/pdf?id=BJndYScO6o", "openreview": "https://openreview.net/forum?id=BJndYScO6o", "poster": "/media/PosterPDFs/NeurIPS%202024/96200.png?t=1731716902.111646", "project": "", "author_site": "Chaoyi Pan, Zeji Yi, Guanya Shi, Guannan Qu", "tldr": "", "abstract": "Recent advances in diffusion models have demonstrated their strong capabilities in generating high-fidelity samples from complex distributions through an iterative refinement process. Despite the empirical success of diffusion models in motion planning and control, the model-free nature of these methods does not leverage readily available model information and limits their generalization to new scenarios beyond the training data (e.g., new robots with different dynamics). In this work, we introduce Model-Based Diffusion (MBD), an optimization approach using the diffusion process to solve trajectory optimization (TO) problems without data. The key idea is to explicitly compute the score function by leveraging the model information in TO problems, which is why we refer to our approach as model-based diffusion. Moreover, although MBD does not require external data, it can be naturally integrated with data of diverse qualities to steer the diffusion process. We also reveal that MBD has interesting connections to sampling-based optimization. Empirical evaluations show that MBD outperforms state-of-the-art reinforcement learning and sampling-based TO methods in challenging contact-rich tasks. Additionally, MBD\u2019s ability to integrate with data enhances its versatility and practical applicability, even with imperfect and infeasible data (e.g., partial-state demonstrations for high-dimensional humanoids), beyond the scope of standard diffusion models. Videos and codes are available in the supplementary materials.", "keywords": "Diffusion;Trajectory Optimization;Motion Planning;Robotics;Sampling-based Control", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/ebff0ade19155b743239159f21802220d1199fd2.zip", "author": "Chaoyi Pan;Zeji Yi;Guanya Shi;Guannan Qu", "authorids": "~Chaoyi_Pan1;~Zeji_Yi1;~Guanya_Shi1;~Guannan_Qu1", "gender": "M;M;M;", "homepage": "https://www.panchaoyi.com;https://neuralmachine.cc/members.html;http://guanyashi.github.io;https://www.guannanqu.com/", "dblp": "331/7271;;230/4386;", "google_scholar": "lJNKzEMAAAAJ;;joR1Z4UAAAAJ;oFIXoy8AAAAJ", "orcid": ";;0000-0002-9075-3705;", "linkedin": ";;guanya-shi-b07b43126/;", "or_profile": "~Chaoyi_Pan1;~Zeji_Yi1;~Guanya_Shi1;~Guannan_Qu1", "aff": "Carnegie Mellon University;Tsinghua University;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "cmu.edu;tsinghua.edu.cn;andrew.cmu.edu;cmu.edu", "position": "PhD student;MS student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\npan2024modelbased,\ntitle={Model-based Diffusion for Trajectory Optimization},\nauthor={Chaoyi Pan and Zeji Yi and Guanya Shi and Guannan Qu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=BJndYScO6o}\n}", "github": "", "reviewers": "p88D;YGTC;X3LL;sfi9", "pdf_size": 3061113, "rating": "5;5;5;7", "confidence": "2;3;4;3", "soundness": "3;3;3;3", "novelty": "3;3;2;3", "presentation": "3;3;3;3", "wc_summary": "33;56;183;112", "wc_strengths": "73;24;81;75", "wc_weaknesses": "117;176;79;118", "wc_questions": "161;5;173;52", "wc_limitations": "8;7;10;15", "wc_review": "392;268;526;372", "wc_reply_reviewers": "42;138;150;124", "wc_reply_authors": "128;260;360;436", "reply_reviewers": "1;2;2;1", "reply_authors": "2;2;3;2", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 96.0, 57.86622503671723 ], "wc_strengths_avg": [ 63.25, 22.851422275210794 ], "wc_weaknesses_avg": [ 122.5, 34.659053651246744 ], "wc_questions_avg": [ 97.75, 71.34204580750401 ], "wc_limitations_avg": [ 10.0, 3.082207001484488 ], "wc_review_avg": [ 389.5, 91.79733111588811 ], "wc_reply_reviewers_avg": [ 113.5, 42.29361653961505 ], "wc_reply_authors_avg": [ 296.0, 115.34296684236972 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=383385531394188592&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "cmu.edu;tsinghua.edu.cn;andrew.cmu.edu;cmu.edu", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Carnegie Mellon University;Tsinghua University", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://www.tsinghua.edu.cn", "aff_unique_abbr": "CMU;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;China" }, { "title": "A robust inlier identification algorithm for point cloud registration via $\\mathbf{\\ell_0}$-minimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96199", "id": "BJrBaLoDRJ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=BJrBaLoDRJ", "openreview": "https://openreview.net/forum?id=BJrBaLoDRJ", "poster": "/media/PosterPDFs/NeurIPS%202024/96199.png?t=1730084290.4167995", "project": "", "author_site": "Yinuo Jiang, Xiuchuan Tang, Cheng Cheng, Ye Yuan", "tldr": "", "abstract": "Correspondences in point cloud registration are prone to outliers, significantly reducing registration accuracy and highlighting the need for precise inlier identification. In this paper, we propose a robust inlier identification algorithm for point cloud registration by reformulating the conventional registration problem as an alignment error $\\ell_0$-minimization problem. The $\\ell_0$-minimization problem is formulated for each local set, where those local sets are built on a compatibility graph of input correspondences. To resolve the $\\ell_0$-minimization, we develop a novel two-stage decoupling strategy, which first decouples the alignment error into a rotation fitting error and a translation fitting error. Second, null-space matrices are employed to decouple inlier identification from the estimation of rotation and translation respectively, thereby applying Bayesian theory to $\\ell_0$-minimization problems and solving for fitting errors. Correspondences with the smallest errors are identified as inliers to generate a transformation hypothesis for each local set. The best hypothesis is selected to perform registration. We demonstrate that the proposed inlier identification algorithm is robust under high outlier ratios and noise through experiments. Extensive results on the KITTI, 3DMatch, and 3DLoMatch datasets demonstrate that our method achieves state-of-the-art performance compared to both traditional and learning-based methods in various indoor and outdoor scenes.", "keywords": "Point cloud registration;Inlier identification;Optimization", "primary_area": "machine_vision", "supplementary_material": "", "author": "Yinuo Jiang;Tang Xiuchuan;Cheng Cheng;Ye Yuan", "authorids": "~Yinuo_Jiang2;~Tang_Xiuchuan1;~Cheng_Cheng6;~Ye_Yuan20", "gender": "F;M;F;M", "homepage": ";;http://faculty.hust.edu.cn/chengcheng/zh_CN/index.htm;https://yy311.github.io", "dblp": ";;66/332;", "google_scholar": ";D8zwcp0AAAAJ;rBYsC9AAAAAJ;", "orcid": "0009-0003-2104-3624;;;", "linkedin": ";;;", "or_profile": "~Yinuo_Jiang2;~Tang_Xiuchuan1;~Cheng_Cheng6;~ye_yuan4", "aff": "Huazhong University of Science and Technology;Department of Automation, Tsinghua University;;Huazhong University of Science and Technology, Tsinghua University", "aff_domain": "hust.edu.cn;mail.tsinghua.edu.cn;;hust.edu.cn", "position": "PhD student;Postdoc;;Full Professor", "bibtex": "@inproceedings{\njiang2024a,\ntitle={A robust inlier identification algorithm for point cloud registration via \\${\\textbackslash}mathbf\\{{\\textbackslash}ell\\_0\\}\\$-minimization},\nauthor={Yinuo Jiang and Tang Xiuchuan and Cheng Cheng and Ye Yuan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=BJrBaLoDRJ}\n}", "github": "", "reviewers": "Z5SY;9KUM;TzsD", "pdf_size": 10717950, "rating": "5;5;6", "confidence": "5;3;4", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "3;3;3", "wc_summary": "30;125;111", "wc_strengths": "291;89;115", "wc_weaknesses": "92;174;272", "wc_questions": "37;18;35", "wc_limitations": "129;62;6", "wc_review": "579;468;539", "wc_reply_reviewers": "0;88;57", "wc_reply_authors": "0;394;45", "reply_reviewers": "0;1;1", "reply_authors": "1;2;2", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 88.66666666666667, 41.87547678003864 ], "wc_strengths_avg": [ 165.0, 89.72550733579982 ], "wc_weaknesses_avg": [ 179.33333333333334, 73.58139861556195 ], "wc_questions_avg": [ 30.0, 8.524474568362947 ], "wc_limitations_avg": [ 65.66666666666667, 50.28143019268865 ], "wc_review_avg": [ 528.6666666666666, 45.90085934804368 ], "wc_reply_reviewers_avg": [ 48.333333333333336, 36.444783196257625 ], "wc_reply_authors_avg": [ 146.33333333333334, 176.08773065971658 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5863323299011065192&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "email": "hust.edu.cn;mail.tsinghua.edu.cn;;hust.edu.cn", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Huazhong University of Science and Technology;Tsinghua University", "aff_unique_dep": ";Department of Automation", "aff_unique_url": "http://www.hust.edu.cn;https://www.tsinghua.edu.cn", "aff_unique_abbr": "HUST;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Slot State Space Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96198", "id": "BJv1t4XNJW", "proceeding": "", "pdf": "https://openreview.net/pdf?id=BJv1t4XNJW", "openreview": "https://openreview.net/forum?id=BJv1t4XNJW", "poster": "/media/PosterPDFs/NeurIPS%202024/96198.png?t=1734071804.249371", "project": "", "author_site": "Jindong Jiang, Fei Deng, Gautam Singh, Minseung Lee, Sungjin Ahn", "tldr": "", "abstract": "Recent State Space Models (SSMs) such as S4, S5, and Mamba have shown remarkable computational benefits in long-range temporal dependency modeling. However, in many sequence modeling problems, the underlying process is inherently modular and it is of interest to have inductive biases that mimic this modular structure. In this paper, we introduce SlotSSMs, a novel framework for incorporating independent mechanisms into SSMs to preserve or encourage separation of information. Unlike conventional SSMs that maintain a monolithic state vector, SlotSSMs maintains the state as a collection of multiple vectors called slots. Crucially, the state transitions are performed independently per slot with sparse interactions across slots implemented via the bottleneck of self-attention. In experiments, we evaluate our model in object-centric learning, 3D visual reasoning, and long-context video understanding tasks, which involve modeling multiple objects and their long-range temporal dependencies. We find that our proposed design offers substantial performance gains over existing sequence modeling methods. Project page is available at \\url{https://slotssms.github.io/}", "keywords": "State-Space Models;Object-Centric Learning;Video Understanding Models;Spatial-Temporal Reasoning", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Jindong Jiang;Fei Deng;Gautam Singh;Minseung Lee;Sungjin Ahn", "authorids": "~Jindong_Jiang1;~Fei_Deng1;~Gautam_Singh3;~Minseung_Lee2;~Sungjin_Ahn1", "gender": "M;M;M;M;", "homepage": "https://www.jindongjiang.me;;https://singhgautam.github.io;https://www.notion.so/agentlearning/Minseung-Lee-22-S-ff25798bb92a460daf083000c344d66c;", "dblp": "200/8116;46/10037-1;35/2642;;", "google_scholar": "6oo8xOQAAAAJ;https://scholar.google.com/citations?hl=en;lXpFxDwAAAAJ;;", "orcid": ";;;;", "linkedin": ";;gautam-singh-61302463/;;", "or_profile": "~Jindong_Jiang1;~Fei_Deng1;~Gautam_Singh3;~Minseung_Lee2;~Sungjin_Ahn1", "aff": "Rutgers University;Rutgers University;Rutgers University;KAIST;", "aff_domain": "rutgers.edu;rutgers.edu;rutgers.edu;kaist.ac.kr;", "position": "PhD student;PhD student;PhD student;MS student;", "bibtex": "@inproceedings{\njiang2024slot,\ntitle={Slot State Space Models},\nauthor={Jindong Jiang and Fei Deng and Gautam Singh and Minseung Lee and Sungjin Ahn},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=BJv1t4XNJW}\n}", "github": "", "reviewers": "YJW6;Ae5c;ERbD", "pdf_size": 8626507, "rating": "7;7;7", "confidence": "4;4;5", "soundness": "3;4;4", "novelty": "3;3;3", "presentation": "3;2;3", "wc_summary": "96;45;154", "wc_strengths": "46;79;125", "wc_weaknesses": "148;326;75", "wc_questions": "108;268;108", "wc_limitations": "5;103;5", "wc_review": "403;821;467", "wc_reply_reviewers": "36;61;70", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 98.33333333333333, 44.52964056545807 ], "wc_strengths_avg": [ 83.33333333333333, 32.396844839514166 ], "wc_weaknesses_avg": [ 183.0, 105.4166337285851 ], "wc_questions_avg": [ 161.33333333333334, 75.42472332656506 ], "wc_limitations_avg": [ 37.666666666666664, 46.19764303752111 ], "wc_review_avg": [ 563.6666666666666, 183.82842241853922 ], "wc_reply_reviewers_avg": [ 55.666666666666664, 14.383632673594278 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13161668480333306553&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "rutgers.edu;rutgers.edu;rutgers.edu;kaist.ac.kr;", "author_num": 5, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Rutgers University;Korea Advanced Institute of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.rutgers.edu;https://www.kaist.ac.kr", "aff_unique_abbr": "Rutgers;KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "United States;South Korea" }, { "title": "PUZZLES: A Benchmark for Neural Algorithmic Reasoning", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97818", "id": "BKu8JPQdQD", "proceeding": "", "pdf": "https://openreview.net/pdf?id=BKu8JPQdQD", "openreview": "https://openreview.net/forum?id=BKu8JPQdQD", "poster": "/media/PosterPDFs/NeurIPS%202024/97818.png?t=1732789913.3362463", "project": "", "author_site": "Benjamin Estermann, Luca Lanzend\u00f6rfer, Yannick Niedermayr, Roger Wattenhofer", "tldr": "", "abstract": "Algorithmic reasoning is a fundamental cognitive ability that plays a pivotal role in problem-solving and decision-making processes. Reinforcement Learning (RL) has demonstrated remarkable proficiency in tasks such as motor control, handling perceptual input, and managing stochastic environments. These advancements have been enabled in part by the availability of benchmarks. In this work we introduce PUZZLES, a benchmark based on Simon Tatham's Portable Puzzle Collection, aimed at fostering progress in algorithmic and logical reasoning in RL. PUZZLES contains 40 diverse logic puzzles of adjustable sizes and varying levels of complexity, providing detailed information on the strengths and generalization capabilities of RL agents. Furthermore, we evaluate various RL algorithms on PUZZLES, providing baseline comparisons and demonstrating the potential for future research. All the software, including the environment, is available at this https url.", "keywords": "Neural Algorithmic Reasoning;Benchmark;Reinforcement Learning", "primary_area": "", "supplementary_material": "", "author": "Benjamin Estermann;Luca A Lanzend\u00f6rfer;Yannick Niedermayr;Roger Wattenhofer", "authorids": "~Benjamin_Estermann1;~Luca_A_Lanzend\u00f6rfer1;~Yannick_Niedermayr1;~Roger_Wattenhofer1", "gender": "Not Specified;M;;Not Specified", "homepage": "https://disco.ethz.ch/members/besterma;;;https://disco.ethz.ch/members/wroger", "dblp": "277/5034;;;w/RogerWattenhofer", "google_scholar": "zawztfkAAAAJ;;;https://scholar.google.ch/citations?user=EG3VPm4AAAAJ", "orcid": ";;;", "linkedin": ";luca-lanzendoerfer/;yannick-niedermayr;roger-wattenhofer-4466731/", "or_profile": "~Benjamin_Estermann1;~Luca_A_Lanzend\u00f6rfer1;~Yannick_Niedermayr1;~Roger_Wattenhofer1", "aff": "Google;ETHZ - ETH Zurich;;Swiss Federal Institute of Technology", "aff_domain": "google.com;ethz.ch;;ethz.ch", "position": "Intern;PhD student;;Full Professor", "bibtex": "@inproceedings{\nestermann2024puzzles,\ntitle={{PUZZLES}: A Benchmark for Neural Algorithmic Reasoning},\nauthor={Benjamin Estermann and Luca A Lanzend{\\\"o}rfer and Yannick Niedermayr and Roger Wattenhofer},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=BKu8JPQdQD}\n}", "github": "", "reviewers": "3eo1;oMdw;EJo1", "pdf_size": 3356882, "rating": "5;6;7", "confidence": "5;3;4", "wc_summary_and_contributions": "200;162;77", "wc_strengths": "106;116;79", "wc_improvement": "40;132;389", "wc_limitations": "203;9;292", "wc_correctness": "52;7;19", "wc_clarity": "54;4;12", "wc_relation_to_prior_work": "109;1;39", "wc_documentation": "47;1;28", "wc_additional_feedback": "1;1;1", "wc_review": "812;433;936", "wc_reply_reviewers": "0;11;132", "wc_reply_authors": "408;413;556", "reply_reviewers": "0;1;1", "reply_authors": "3;2;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "wc_summary_and_contributions_avg": [ 146.33333333333334, 51.42200134399888 ], "wc_strengths_avg": [ 100.33333333333333, 15.627610892974724 ], "wc_improvement_avg": [ 187.0, 147.69111911914902 ], "wc_limitations_avg": [ 168.0, 118.15526508229189 ], "wc_correctness_avg": [ 26.0, 19.026297590440446 ], "wc_clarity_avg": [ 23.333333333333332, 21.9291789378647 ], "wc_relation_to_prior_work_avg": [ 49.666666666666664, 44.731296525910004 ], "wc_documentation_avg": [ 25.333333333333332, 18.873850222522755 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 727.0, 213.96417145556558 ], "wc_reply_reviewers_avg": [ 47.666666666666664, 59.801523577767 ], "wc_reply_authors_avg": [ 459.0, 68.61972505531239 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2676379414144546634&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "google.com;ethz.ch;;ethz.ch", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "Google;ETH Zurich;Swiss Federal Institute of Technology", "aff_unique_dep": "Google;;", "aff_unique_url": "https://www.google.com;https://www.ethz.ch;https://www.ethz.ch", "aff_unique_abbr": "Google;ETHZ;ETH Zurich", "aff_campus_unique_index": "0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United States;Switzerland" }, { "title": "Provably Faster Algorithms for Bilevel Optimization via Without-Replacement Sampling", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96197", "id": "BNnZwbZGpm", "proceeding": "", "pdf": "https://openreview.net/pdf?id=BNnZwbZGpm", "openreview": "https://openreview.net/forum?id=BNnZwbZGpm", "poster": "/media/PosterPDFs/NeurIPS%202024/96197.png?t=1731536759.407003", "project": "", "author_site": "Junyi Li, Heng Huang", "tldr": "", "abstract": "Bilevel Optimization has experienced significant advancements recently with the introduction of new efficient algorithms. Mirroring the success in single-level optimization, stochastic gradient-based algorithms are widely used in bilevel optimization. However, a common limitation in these algorithms is the presumption of independent sampling, which can lead to increased computational costs due to the unique hyper-gradient structure in bilevel problems. To address this challenge, we study the example-selection strategy for bilevel optimization in this work. More specifically, we introduce a without-replacement sampling based algorithm which achieves a faster convergence rate compared to its counterparts that rely on independent sampling. Beyond the standard bilevel optimization formulation, we extend our discussion to conditional bilevel optimization and also two special cases: minimax and compositional optimization. Finally, we validate our algorithms over both synthetic and real-world applications. Numerical results clearly showcase the superiority of our algorithms.", "keywords": "sampling;bilevel optimization", "primary_area": "optimization", "supplementary_material": "/attachment/86e263bea98690a3584406c52ffd478619ddbe17.zip", "author": "Junyi Li;Heng Huang", "authorids": "~Junyi_Li1;~Heng_Huang1", "gender": "M;M", "homepage": ";https://www.cs.umd.edu/~heng/", "dblp": ";03/281", "google_scholar": "MzvZSs0AAAAJ;4OqLaDwAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Junyi_Li1;~Heng_Huang1", "aff": "University of Maryland, College Park;Department of Computer Science, University of Maryland, College Park", "aff_domain": "umd.edu;cs.umd.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nli2024provably,\ntitle={Provably Faster Algorithms for Bilevel Optimization via Without-Replacement Sampling},\nauthor={Junyi Li and Heng Huang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=BNnZwbZGpm}\n}", "github": "", "reviewers": "H8tq;1Xqw;Qw9k;TL81;sRYA", "pdf_size": 7391240, "rating": "4;4;6;7;7", "confidence": "3;4;3;2;4", "soundness": "2;3;3;3;3", "novelty": "2;3;3;3;3", "presentation": "2;3;2;3;3", "wc_summary": "35;86;125;62;31", "wc_strengths": "23;110;43;79;32", "wc_weaknesses": "199;151;69;14;182", "wc_questions": "126;134;70;30;4", "wc_limitations": "6;36;1;1;1", "wc_review": "389;517;308;186;250", "wc_reply_reviewers": "267;20;0;15;11", "wc_reply_authors": "277;24;260;4;4", "reply_reviewers": "2;1;0;1;1", "reply_authors": "3;2;2;2;2", "rating_avg": [ 5.6, 1.3564659966250536 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 67.8, 34.83331738436637 ], "wc_strengths_avg": [ 57.4, 32.462901903557544 ], "wc_weaknesses_avg": [ 123.0, 70.48120316793691 ], "wc_questions_avg": [ 72.8, 51.2811856337195 ], "wc_limitations_avg": [ 9.0, 13.638181696985855 ], "wc_review_avg": [ 330.0, 114.95216396397242 ], "wc_reply_reviewers_avg": [ 62.6, 102.41210865908387 ], "wc_reply_authors_avg": [ 113.8, 126.63711936079405 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.3152441624956402, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5838412461795981300&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "umd.edu;cs.umd.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "University of Maryland;University of Maryland, College Park", "aff_unique_dep": ";Department of Computer Science", "aff_unique_url": "https://www/umd.edu;https://www/umd.edu", "aff_unique_abbr": "UMD;UMD", "aff_campus_unique_index": "0;0", "aff_campus_unique": "College Park", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Locally Private and Robust Multi-Armed Bandits", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96196", "id": "BOhnXyIPWW", "proceeding": "", "pdf": "https://openreview.net/pdf?id=BOhnXyIPWW", "openreview": "https://openreview.net/forum?id=BOhnXyIPWW", "poster": "", "project": "", "author_site": "Xingyu Zhou, Komo(Wei) ZHANG", "tldr": "", "abstract": "We study the interplay between local differential privacy (LDP) and robustness to Huber corruption and possibly heavy-tailed rewards in the context of multi-armed bandits (MABs). We consider two different practical settings: LDP-then-Corruption (LTC) where each user's locally private response might be further corrupted during the data collection process, and Corruption-then-LDP (CTL) where each user's raw data may be corrupted such that the LDP mechanism will only be applied to the corrupted data. To start with, we present the first tight characterization of the mean estimation error in high probability under both LTC and CTL settings. Leveraging this new result, we then present an almost tight characterization (up to log factor) of the minimax regret in online MABs and sub-optimality in offline MABs under both LTC and CTL settings, respectively. Our theoretical results in both settings are also corroborated by a set of systematic simulations. One key message in this paper is that LTC is a more difficult setting that leads to a worse performance guarantee compared to the CTL setting (in the minimax sense). Our sharp understanding of LTC and CTL also naturally allows us to give the first tight performance bounds for the most practical setting where corruption could happen both before and after the LDP mechanism. \nAs an important by-product, we also give the first correct and tight regret bound for locally private and heavy-tailed online MABs, i.e., without Huber corruption, by identifying a fundamental flaw in the state-of-the-art.", "keywords": "Local Differential Privacy;Robustness;Huber Corruption;Multi-Armed Bandits", "primary_area": "bandits", "supplementary_material": "/attachment/937900133584810d3c3956ba6dff8bca0e2512c9.zip", "author": "Xingyu Zhou;WEI ZHANG", "authorids": "~Xingyu_Zhou2;~WEI_ZHANG104", "gender": "M;Not Specified", "homepage": "http://xingyuzhou.org;https://github.com/komoshark", "dblp": "07/10352-1;396/6200", "google_scholar": "AsTyRmwAAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Xingyu_Zhou2;~WEI_ZHANG104", "aff": "Wayne State University;Texas A&M University - College Station", "aff_domain": "wayne.edu;tamu.edu", "position": "Assistant Professor;MS student", "bibtex": "@inproceedings{\nzhou2024locally,\ntitle={Locally Private and Robust Multi-Armed Bandits},\nauthor={Xingyu Zhou and WEI ZHANG},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=BOhnXyIPWW}\n}", "github": "", "reviewers": "5Gx9;1jNj;fKk6", "pdf_size": 3688298, "rating": "6;6;7", "confidence": "3;4;3", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "3;3;3", "wc_summary": "64;70;90", "wc_strengths": "49;68;43", "wc_weaknesses": "290;149;8", "wc_questions": "4;115;36", "wc_limitations": "4;1;24", "wc_review": "411;403;201", "wc_reply_reviewers": "22;11;10", "wc_reply_authors": "0;11;11", "reply_reviewers": "1;1;1", "reply_authors": "1;2;2", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 74.66666666666667, 11.115554667022044 ], "wc_strengths_avg": [ 53.333333333333336, 10.656244908763853 ], "wc_weaknesses_avg": [ 149.0, 115.12601791080937 ], "wc_questions_avg": [ 51.666666666666664, 46.64999702274612 ], "wc_limitations_avg": [ 9.666666666666666, 10.208928554075703 ], "wc_review_avg": [ 338.3333333333333, 97.16423667630436 ], "wc_reply_reviewers_avg": [ 14.333333333333334, 5.436502143433364 ], "wc_reply_authors_avg": [ 7.333333333333333, 5.185449728701348 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:KplI6sDZyKkJ:scholar.google.com/&scioq=Locally+Private+and+Robust+Multi-Armed+Bandits&hl=en&as_sdt=0,33", "gs_version_total": 0, "email": "wayne.edu;tamu.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Wayne State University;Texas A&M University", "aff_unique_dep": ";", "aff_unique_url": "https://wayne.edu;https://www.tamu.edu", "aff_unique_abbr": "WSU;TAMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";College Station", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Divide-and-Conquer Posterior Sampling for Denoising Diffusion priors", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96195", "id": "BOrut7M2X7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=BOrut7M2X7", "openreview": "https://openreview.net/forum?id=BOrut7M2X7", "poster": "/media/PosterPDFs/NeurIPS%202024/96195.png?t=1733570608.2582762", "project": "", "author_site": "Yazid Janati, Badr MOUFAD, Alain Durmus, Eric Moulines, Jimmy Olsson", "tldr": "", "abstract": "Recent advancements in solving Bayesian inverse problems have spotlighted denoising diffusion models (DDMs) as effective priors.\nAlthough these have great potential, DDM priors yield complex posterior distributions that are challenging to sample from.\nExisting approaches to posterior sampling in this context address this problem either by retraining model-specific components, leading to stiff and cumbersome methods, or by introducing approximations with uncontrolled errors that affect the accuracy of the produced samples.\nWe present an innovative framework, divide-and-conquer posterior sampling, which leverages the inherent structure of DDMs to construct a sequence of intermediate posteriors that guide the produced samples to the target posterior.\nOur method significantly reduces the approximation error associated with current techniques without the need for retraining.\nWe demonstrate the versatility and effectiveness of our approach for a wide range of Bayesian inverse problems.\nThe code is available at \\url{https://github.com/Badr-MOUFAD/dcps}", "keywords": "Denoising Diffusion models;posterior sampling;Monte Carlo methods", "primary_area": "probabilistic_methods", "supplementary_material": "/attachment/172200ca6fa875c8f96e32cfb70270f6877ad263.zip", "author": "Yazid Janati;Badr MOUFAD;Alain Oliviero Durmus;Eric Moulines;Jimmy Olsson", "authorids": "~Yazid_Janati1;~Badr_MOUFAD1;~Alain_Oliviero_Durmus1;~Eric_Moulines1;~Jimmy_Olsson1", "gender": "M;M;M;M;M", "homepage": ";;https://www.kth.se/profile/jimmyol;;http://yazidjanati.github.io", "dblp": ";54/2358;;01/11275;319/4479", "google_scholar": "0MkYv20AAAAJ;https://scholar.google.fr/citations?user=_XE1LvQAAAAJ;xBHS7MAAAAAJ;;JGor6XwAAAAJ", "orcid": ";0000-0002-2058-0693;;;", "linkedin": "badr-moufad/;;;;", "or_profile": "~Badr_MOUFAD1;~Eric_Moulines1;~Jimmy_Olsson1;~Alain_Durmus1;~Yazid_Janati_el_idrissi3", "aff": "\u00c9cole Polytechnique;Ecole polytechnique;KTH Royal Institute of Technology;\u00c9cole Polytechnique;\u00c9cole Polytechnique", "aff_domain": "polytechnique.edu;polytechnique.edu;kth.se;polytechnique.fr;polytechnique.edu", "position": "PhD student;Full Professor;Full Professor;Full Professor;Postdoc", "bibtex": "@inproceedings{\njanati2024divideandconquer,\ntitle={Divide-and-Conquer Posterior Sampling for Denoising Diffusion priors},\nauthor={Yazid Janati and Badr MOUFAD and Alain Oliviero Durmus and Eric Moulines and Jimmy Olsson},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=BOrut7M2X7}\n}", "github": "", "reviewers": "xni9;GK7d;af3x;vN6C", "pdf_size": 15102546, "rating": "4;7;7;7", "confidence": "5;3;4;3", "soundness": "2;3;3;3", "novelty": "2;3;3;4", "presentation": "1;3;3;2", "wc_summary": "54;87;69;72", "wc_strengths": "13;45;73;39", "wc_weaknesses": "341;143;493;112", "wc_questions": "101;54;2;87", "wc_limitations": "42;17;2;13", "wc_review": "551;346;639;323", "wc_reply_reviewers": "204;75;435;63", "wc_reply_authors": "230;42;666;20", "reply_reviewers": "1;1;2;1", "reply_authors": "2;2;3;2", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 70.5, 11.715374513859981 ], "wc_strengths_avg": [ 42.5, 21.324868112136123 ], "wc_weaknesses_avg": [ 272.25, 154.7924012992886 ], "wc_questions_avg": [ 61.0, 38.09855640309748 ], "wc_limitations_avg": [ 18.5, 14.637281168304447 ], "wc_review_avg": [ 464.75, 134.16105060709685 ], "wc_reply_reviewers_avg": [ 194.25, 149.5850510579182 ], "wc_reply_authors_avg": [ 239.5, 259.41231659271693 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8703882797784892, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2312059805615058681&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "polytechnique.edu;polytechnique.edu;kth.se;polytechnique.fr;polytechnique.edu", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Ecole Polytechnique;KTH Royal Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.polytechnique.edu;https://www.kth.se", "aff_unique_abbr": "X;KTH", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "France;Sweden" }, { "title": "Efficient Adaptation of Pre-trained Vision Transformer via Householder Transformation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96194", "id": "BOtjMacACI", "proceeding": "", "pdf": "https://openreview.net/pdf?id=BOtjMacACI", "openreview": "https://openreview.net/forum?id=BOtjMacACI", "poster": "/media/PosterPDFs/NeurIPS%202024/96194.png?t=1731244264.8817608", "project": "", "author_site": "Wei Dong, Yuan Sun, Yiting Yang, Xing Zhang, Zhijun Lin, Qingsen Yan, Haokui Zhang, Peng Wang, Yang Yang, Hengtao Shen", "tldr": "", "abstract": "A common strategy for Parameter-Efficient Fine-Tuning (PEFT) of pre-trained Vision Transformers (ViTs) involves adapting the model to downstream tasks by learning a low-rank adaptation matrix. This matrix is decomposed into a product of down-projection and up-projection matrices, with the bottleneck dimensionality being crucial for reducing the number of learnable parameters, as exemplified by prevalent methods like LoRA and Adapter. However, these low-rank strategies typically employ a fixed bottleneck dimensionality, which limits their flexibility in handling layer-wise variations. To address this limitation, we propose a novel PEFT approach inspired by Singular Value Decomposition (SVD) for representing the adaptation matrix. SVD decomposes a matrix into the product of a left unitary matrix, a diagonal matrix of scaling values, and a right unitary matrix. We utilize Householder transformations to construct orthogonal matrices that efficiently mimic the unitary matrices, requiring only a vector. The diagonal values are learned in a layer-wise manner, allowing them to flexibly capture the unique properties of each layer. This approach enables the generation of adaptation matrices with varying ranks across different layers, providing greater flexibility in adapting pre-trained models. Experiments on standard downstream vision tasks demonstrate that our method achieves promising fine-tuning performance.", "keywords": "Parameter-efficient fine-tuning;vision transformer;householder matrix;singular value decomposition", "primary_area": "optimization_for_deep_networks", "supplementary_material": "/attachment/3425046cdffb8059383582a913339d012484bb4c.zip", "author": "Wei Dong;Yuan Sun;Yiting Yang;Xing Zhang;Zhijun Lin;Qingsen Yan;Haokui Zhang;Peng Wang;Yang Yang;Heng Tao Shen", "authorids": "~Wei_Dong4;~Yuan_Sun4;~Yiting_Yang2;~Xing_Zhang10;~Zhijun_Lin2;~Qingsen_Yan1;~Haokui_Zhang1;~Peng_Wang19;~Yang_Yang37;~Heng_Tao_Shen3", "gender": "M;M;F;M;;M;M;M;M;M", "homepage": ";https://github.com/html4396;;;;https://qingsenyangit.github.io/;https://teacher.nwpu.edu.cn/2023050022.html;https://wp8619.github.io/;http://cfm.uestc.edu.cn/~yangyang/;https://cfm.uestc.edu.cn/~shenht/", "dblp": ";;;;;206/9166;197/5431;95/4442-23.html;;s/HTShen", "google_scholar": "tkTl3BMAAAAJ;;;;;BSGy3foAAAAJ;m3gPwCoAAAAJ;vIr3ICQAAAAJ;;https://scholar.google.com.au/citations?user=krryaDkAAAAJ", "orcid": "0000-0003-0263-3584;;0009-0006-2935-3960;0000-0002-9112-4070;0000-0002-6646-4866;;;;;", "linkedin": ";;;;;;%E5%8F%B7%E9%80%B5-%E5%BC%A0-1636a7110/;;;", "or_profile": "~Wei_Dong4;~Yuan_Sun4;~Yiting_Yang2;~Xing_Zhang10;~Zhijun_Lin2;~Qingsen_Yan1;~Haokui_Zhang1;~Peng_Wang19;~Yang_Yang37;~Hengtao_Shen1", "aff": "Xi'an University of Architecture and Technology;Xi'an University of Architecture and Technology;Xi'an University of Architecture and Technology;Xi'an University of Architecture and Technology;Northwest Polytechnical University Xi'an;Northwest Polytechnical University Xi'an;Northwest Polytechnical University;University of Electronic Science and Technology of China;University of Electronic Science and Technology of China;Tongji University", "aff_domain": "xauat.edu.cn;xauat.edu.cn;xauat.edu.cn;xauat.edu.cn;nwpu.edu.cn;nwpu.edu.cn;nwpu.edu.cn;uestc.edu.cn;uestc.edu.cn;tongji.edu.cn", "position": "Associate Professor;MS student;MS student;MS student;PhD student;Full Professor;Full Professor;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\ndong2024efficient,\ntitle={Efficient Adaptation of Pre-trained Vision Transformer via Householder Transformation},\nauthor={Wei Dong and Yuan Sun and Yiting Yang and Xing Zhang and Zhijun Lin and Qingsen Yan and Haokui Zhang and Peng Wang and Yang Yang and Heng Tao Shen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=BOtjMacACI}\n}", "github": "", "reviewers": "YBq9;PRgH;HQtR;6gjh", "pdf_size": 479804, "rating": "5;6;7;7", "confidence": "2;3;3;4", "soundness": "3;4;3;4", "novelty": "3;3;3;4", "presentation": "3;3;4;4", "wc_summary": "33;114;69;78", "wc_strengths": "33;42;88;80", "wc_weaknesses": "137;30;98;13", "wc_questions": "14;71;108;13", "wc_limitations": "13;8;44;1", "wc_review": "230;265;407;185", "wc_reply_reviewers": "26;172;13;5", "wc_reply_authors": "0;286;0;0", "reply_reviewers": "1;2;1;1", "reply_authors": "1;3;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 73.5, 28.81405906844782 ], "wc_strengths_avg": [ 60.75, 23.636571240347024 ], "wc_weaknesses_avg": [ 69.5, 50.30159043211258 ], "wc_questions_avg": [ 51.5, 40.190172928217166 ], "wc_limitations_avg": [ 16.5, 16.439282222773596 ], "wc_review_avg": [ 271.75, 83.07639556456455 ], "wc_reply_reviewers_avg": [ 54.0, 68.5383104548106 ], "wc_reply_authors_avg": [ 71.5, 123.84163274117472 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.8528028654224417, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2570904296195114296&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 3, "email": "xauat.edu.cn;xauat.edu.cn;xauat.edu.cn;xauat.edu.cn;nwpu.edu.cn;nwpu.edu.cn;nwpu.edu.cn;uestc.edu.cn;uestc.edu.cn;tongji.edu.cn", "author_num": 10, "aff_unique_index": "0;0;0;0;1;1;1;2;2;3", "aff_unique_norm": "Xi'an University of Architecture and Technology;Northwest Polytechnical University;University of Electronic Science and Technology of China;Tongji University", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.xauat.edu.cn;http://www.nwpu.edu.cn;https://www.uestc.edu.cn;https://www.tongji.edu.cn", "aff_unique_abbr": "XAUAT;NWPU;UESTC;Tongji", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Xi'an;", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "BQEOJZ0aSD", "title": "Scalable Ensemble Diversification for OOD Generalization and Detection", "track": "main", "status": "Reject", "tldr": "", "abstract": "Training a diverse ensemble of models has several practical application scenarios, such as model selection for out-of-distribution (OOD) generalization and the detection of OOD samples via Bayesian principles. Previous approaches to diverse ensemble training have relied on the framework of letting the models make the correct predictions for the given in-distribution (ID) data while letting them come up with different hypotheses for the OOD data. As such, they require well-separated ID and OOD datasets to ensure a performant and diverse ensemble and have only been verified in smaller-scale lab environments where such a separation is readily available. In this work, we propose a framework, Scalable Ensemble Diversification (SED), for scaling up existing diversification methods to large-scale datasets and tasks (e.g. ImageNet), where the ID-OOD separation may not be available. SED automatically identifies OOD samples within the large-scale ID dataset on the fly and encourages the ensemble to make diverse hypotheses on them. To make SED more suitable for large-scale applications, we propose an algorithm to speed up the expensive pairwise disagreement computation. We verify the resulting diversification of the ensemble on ImageNet and demonstrate the benefit of diversification on the OOD generalization and OOD detection tasks. In particular, for OOD detection, we propose a novel uncertainty score estimator based on the diversity of ensemble hypotheses, which lets SED surpass all the considered baselines in OOD detection task. Code will be available soon.", "keywords": "diverse;ensemble;scalable;robustness;uncertainty;OOD detection;OOD generalization", "primary_area": "machine_vision", "supplementary_material": "", "author": "Alexander Rubinstein;Luca Scimeca;Damien Teney;Seong Joon Oh", "authorids": "~Alexander_Rubinstein1;~Luca_Scimeca1;~Damien_Teney1;~Seong_Joon_Oh1", "gender": ";M;M;M", "homepage": ";https://lucascimeca.com;https://www.damienteney.info;https://seongjoonoh.com", "dblp": "129/1368-2;223/6396;62/10068;168/8835", "google_scholar": ";fKJvAvMAAAAJ;https://scholar.google.com.au/citations?user=iS_jP_3dpD8J;https://scholar.google.de/citations?user=kmXOOdsAAAAJ", "orcid": ";0000-0002-2821-0072;;0000-0002-8985-7689", "linkedin": "alexander-rubinstein-043564116/;luca-scimeca/;;seong-joon-oh-32113479/", "or_profile": "~Alexander_Rubinstein1;~Luca_Scimeca1;~Damien_Teney1;~Seong_Joon_Oh1", "aff": "Eberhard-Karls-Universit\u00e4t T\u00fcbingen;Montreal Institute for Learning Algorithms, University of Montreal, Universit\u00e9 de Montr\u00e9al;Idiap Research Institute;Eberhard-Karls-Universit\u00e4t T\u00fcbingen", "aff_domain": "uni-tuebingen.de;mila.umontreal.ca;idiap.ch;uni-tuebingen.de", "position": "PhD student;Postdoc;Researcher;Associate Professor", "bibtex": "@misc{\nanonymous2024scalable,\ntitle={Scalable Ensemble Diversification for {OOD} Generalization and Detection},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=BQEOJZ0aSD}\n}", "github": "", "project": "", "reviewers": "UGvs;nXDD;SDvr;CkrP;mQsd", "site": "https://openreview.net/forum?id=BQEOJZ0aSD", "pdf_size": 16086935, "rating": "5;5;5;5;5", "confidence": "3;3;4;3;3", "soundness": "2;3;2;2;2", "novelty": "2;2;2;2;3", "presentation": "2;3;2;3;3", "wc_summary": "49;54;80;123;49", "wc_strengths": "33;64;34;44;37", "wc_weaknesses": "269;170;427;405;54", "wc_questions": "11;100;76;4;19", "wc_limitations": "34;22;6;18;6", "wc_review": "396;410;623;594;165", "wc_reply_reviewers": "205;29;20;131;13", "wc_reply_authors": "876;12;69;387;41", "reply_reviewers": "3;1;1;2;1", "reply_authors": "5;2;3;3;2", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "soundness_avg": [ 2.2, 0.39999999999999997 ], "novelty_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 71.0, 28.432375911977527 ], "wc_strengths_avg": [ 42.4, 11.46472851837321 ], "wc_weaknesses_avg": [ 265.0, 141.00070921807452 ], "wc_questions_avg": [ 42.0, 38.61087929586686 ], "wc_limitations_avg": [ 17.2, 10.552724766618335 ], "wc_review_avg": [ 437.6, 164.70409830966562 ], "wc_reply_reviewers_avg": [ 79.6, 76.04630168522331 ], "wc_reply_authors_avg": [ 277.0, 328.65970242790644 ], "reply_reviewers_avg": [ 1.6, 0.8 ], "reply_authors_avg": [ 3.0, 1.0954451150103321 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3605606425821675906&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Eberhard Karls University of T\u00fcbingen;University of Montreal;Idiap Research Institute", "aff_unique_dep": ";Montreal Institute for Learning Algorithms;", "aff_unique_url": "https://www.uni-tuebingen.de/;https://www.mila.quebec;https://www.idiap.ch", "aff_unique_abbr": "Uni T\u00fcbingen;MILA;Idiap", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "T\u00fcbingen;Montreal;", "aff_country_unique_index": "0;1;2;0", "aff_country_unique": "Germany;Canada;Switzerland" }, { "title": "AdanCA: Neural Cellular Automata As Adaptors For More Robust Vision Transformer", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96193", "id": "BQh1SGvROG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=BQh1SGvROG", "openreview": "https://openreview.net/forum?id=BQh1SGvROG", "poster": "/media/PosterPDFs/NeurIPS%202024/96193.png?t=1731500386.0953035", "project": "", "author_site": "Yitao Xu, Tong Zhang, Sabine S\u00fcsstrunk", "tldr": "", "abstract": "Vision Transformers (ViTs) demonstrate remarkable performance in image classification through visual-token interaction learning, particularly when equipped with local information via region attention or convolutions. Although such architectures improve the feature aggregation from different granularities, they often fail to contribute to the robustness of the networks. Neural Cellular Automata (NCA) enables the modeling of global visual-token representations through local interactions, with its training strategies and architecture design conferring strong generalization ability and robustness against noisy input. In this paper, we propose Adaptor Neural Cellular Automata (AdaNCA) for Vision Transformers that uses NCA as plug-and-play adaptors between ViT layers, thus enhancing ViT's performance and robustness against adversarial samples as well as out-of-distribution inputs. To overcome the large computational overhead of standard NCAs, we propose Dynamic Interaction for more efficient interaction learning. Using our analysis of AdaNCA placement and robustness improvement, we also develop an algorithm for identifying the most effective insertion points for AdaNCA. With less than a 3% increase in parameters, AdaNCA contributes to more than 10% absolute improvement in accuracy under adversarial attacks on the ImageNet1K benchmark. Moreover, we demonstrate with extensive evaluations across eight robustness benchmarks and four ViT architectures that AdaNCA, as a plug-and-play module, consistently improves the robustness of ViTs.", "keywords": "Neural Cellular Automata;Vision Transformer;Adversarial Robustness;Out-of-distribution generalization", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Yitao Xu;Tong Zhang;Sabine Susstrunk", "authorids": "~Yitao_Xu1;~Tong_Zhang9;~Sabine_S\u00fcsstrunk1", "gender": "M;M;", "homepage": "https://xyt0098.github.io/;https://sites.google.com/view/tong-zhang;https://www.epfl.ch/labs/ivrl/", "dblp": "123/0905;07/4227-23;s/SSusstrunk", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.au/citations?user=kCy8JG8AAAAJ;https://scholar.google.com/citations?hl=de", "orcid": ";0000-0001-5818-4285;", "linkedin": ";;", "or_profile": "~Yitao_Xu1;~Tong_Zhang9;~Sabine_S\u00fcsstrunk1", "aff": "EPFL - EPF Lausanne;Swiss Federal Institute of Technology Lausanne;EPFL - EPF Lausanne", "aff_domain": "epfl.ch;epfl.ch;epfl.ch", "position": "PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\nxu2024adanca,\ntitle={Adan{CA}: Neural Cellular Automata As Adaptors For More Robust Vision Transformer},\nauthor={Yitao Xu and Tong Zhang and Sabine Susstrunk},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=BQh1SGvROG}\n}", "github": "", "reviewers": "oE4b;h6zC;ZhPF;3x5z", "pdf_size": 3032631, "rating": "5;6;7;7", "confidence": "4;5;4;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "98;663;56;79", "wc_strengths": "89;864;50;61", "wc_weaknesses": "203;439;142;50", "wc_questions": "171;2480;149;43", "wc_limitations": "7;69;5;1", "wc_review": "568;4515;402;234", "wc_reply_reviewers": "228;505;66;5", "wc_reply_authors": "112;64;63;17", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 224.0, 253.89269386888628 ], "wc_strengths_avg": [ 266.0, 345.5481153182578 ], "wc_weaknesses_avg": [ 208.5, 143.7923850556767 ], "wc_questions_avg": [ 710.75, 1022.6227004619054 ], "wc_limitations_avg": [ 20.5, 28.084693339967234 ], "wc_review_avg": [ 1429.75, 1785.1798753907126 ], "wc_reply_reviewers_avg": [ 201.0, 193.51098160052828 ], "wc_reply_authors_avg": [ 64.0, 33.59315406448165 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:SEiiiSCjpXYJ:scholar.google.com/&scioq=AdanCA:+Neural+Cellular+Automata+As+Adaptors+For+More+Robust+Vision+Transformer&hl=en&as_sdt=0,33", "gs_version_total": 5, "email": "epfl.ch;epfl.ch;epfl.ch", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "EPFL;Swiss Federal Institute of Technology Lausanne", "aff_unique_dep": ";", "aff_unique_url": "https://www.epfl.ch;https://www.epfl.ch", "aff_unique_abbr": "EPFL;EPFL", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Lausanne", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Switzerland" }, { "title": "SELF-DISCOVER: Large Language Models Self-Compose Reasoning Structures", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96192", "id": "BROvXhmzYK", "proceeding": "", "pdf": "https://openreview.net/pdf?id=BROvXhmzYK", "openreview": "https://openreview.net/forum?id=BROvXhmzYK", "poster": "", "project": "", "author_site": "Pei Zhou, Jay Pujara, Xiang Ren, Xinyun Chen, Heng-Tze Cheng, Quoc V Le, Ed Chi, Denny Zhou, Swaroop Mishra, Huaixiu (Steven) Zheng", "tldr": "", "abstract": "We introduce SELF-DISCOVER, a general framework for LLMs to self-discover the task-intrinsic reasoning structures to tackle complex reasoning problems that are challenging for typical prompting methods. Core to the framework is a self-discovery process where LLMs select multiple atomic reasoning modules such as critical thinking and step-by-step thinking, and compose them into an explicit reasoning structure for LLMs to follow during decoding. SELF-DISCOVER substantially improves GPT-4 and PaLM 2\u2019s performance on challenging reasoning benchmarks such as BigBench-Hard, grounded agent reasoning, and MATH, by as much as 32% compared to Chain of Thought (CoT). Furthermore, SELF-DISCOVER outperforms inference-intensive methods such as CoT-Self-Consistency by more than 20%, while requiring 10-40x fewer inference compute. Finally, we show that the self-discovered reasoning structures are universally applicable across model families: from PaLM 2-L to GPT-4, and from GPT-4 to Llama2, and share commonalities with human reasoning patterns.", "keywords": "Large Language Models;Reasoning;Prompting;Self-Improve;Self-Discover", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Pei Zhou;Jay Pujara;Xiang Ren;Xinyun Chen;Heng-Tze Cheng;Quoc V Le;Ed H. Chi;Denny Zhou;Swaroop Mishra;Steven Zheng", "authorids": "~Pei_Zhou1;~Jay_Pujara1;~Xiang_Ren1;~Xinyun_Chen1;~Heng-Tze_Cheng1;~Quoc_V_Le1;~Ed_H._Chi1;~Denny_Zhou1;~Swaroop_Mishra1;~Steven_Zheng1", "gender": "M;;M;;M;M;;M;M;F", "homepage": "https://shaoxia57.github.io/;https://www.jaypujara.org;https://shanzhenren.github.io/;https://www.linkedin.com/in/hengtze;;https://swarooprm.github.io/;https://dennyzhou.github.io/;http://edchi.net;;https://jungyhuk.github.io/", "dblp": ";65/10103;36/360-1;30/8739;29/6166;249/2784;178/3277;13/310;307/3201;", "google_scholar": "13PGDZsAAAAJ;yvdSr4AAAAAJ;_moJlrIAAAAJ;;;-7LK2SwAAAAJ;UwLsYw8AAAAJ;VuWl-KUAAAAJ;PyK4x4wAAAAJ;d4W1UT0AAAAJ", "orcid": ";0000-0001-6921-1744;;;;;;0000-0003-3230-5338;;", "linkedin": "pei-zhou-169051119/;pujara;xren7;;;;;edchi/;;", "or_profile": "~Pei_Zhou1;~Jay_Pujara1;~Xiang_Ren1;~Heng-Tze_Cheng1;~Quoc_V_Le1;~Swaroop_Mishra1;~Dengyong_Zhou2;~Ed_Chi1;~Huaixiu_Steven_Zheng1;~Xinyun_Chen2", "aff": "University of Southern California;University of Southern California;University of Southern California;;Google;Google;Google DeepMind;Google;Google;Google", "aff_domain": "usc.edu;usc.edu;usc.edu;;google.com;google.com;google.com;google.com;google.com;google.com", "position": "PhD student;Assistant Professor;Associate Professor;;Scientist;Researcher;Research Scientist;Researcher;Software Engineer;Researcher", "bibtex": "@inproceedings{\nzhou2024selfdiscover,\ntitle={{SELF}-{DISCOVER}: Large Language Models Self-Compose Reasoning Structures},\nauthor={Pei Zhou and Jay Pujara and Xiang Ren and Xinyun Chen and Heng-Tze Cheng and Quoc V Le and Ed H. Chi and Denny Zhou and Swaroop Mishra and Steven Zheng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=BROvXhmzYK}\n}", "github": "", "reviewers": "nKMJ;i8mj;Kd8z;NVyo;5mv1", "pdf_size": 1160286, "rating": "4;4;5;6;6", "confidence": "4;4;5;4;4", "soundness": "3;3;3;3;3", "novelty": "2;2;2;3;2", "presentation": "3;3;3;3;3", "wc_summary": "38;58;44;243;83", "wc_strengths": "27;89;70;97;46", "wc_weaknesses": "141;171;178;192;90", "wc_questions": "1;2;165;145;29", "wc_limitations": "1;2;75;57;1", "wc_review": "208;322;532;734;249", "wc_reply_reviewers": "0;0;75;48;0", "wc_reply_authors": "18;18;21;32;0", "reply_reviewers": "0;0;1;1;0", "reply_authors": "2;2;2;2;1", "rating_avg": [ 5.0, 0.8944271909999159 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 93.2, 76.4863386494608 ], "wc_strengths_avg": [ 65.8, 26.179381199715166 ], "wc_weaknesses_avg": [ 154.4, 36.2579646422686 ], "wc_questions_avg": [ 68.4, 71.69825660363018 ], "wc_limitations_avg": [ 27.2, 32.189439261969135 ], "wc_review_avg": [ 409.0, 197.14157349478572 ], "wc_reply_reviewers_avg": [ 24.6, 31.315172041679734 ], "wc_reply_authors_avg": [ 17.8, 10.283968105745952 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 47, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1686505560961998375&as_sdt=5,24&sciodt=0,24&hl=en", "gs_version_total": 6, "email": "usc.edu;usc.edu;usc.edu;;google.com;google.com;google.com;google.com;google.com;google.com", "author_num": 10, "aff_unique_index": "0;0;0;1;1;1;1;1;1", "aff_unique_norm": "University of Southern California;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.usc.edu;https://www.google.com", "aff_unique_abbr": "USC;Google", "aff_campus_unique_index": "0;0;0;1;1;1;1;1", "aff_campus_unique": "Los Angeles;Mountain View;", "aff_country_unique_index": "0;0;0;0;0;1;0;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Action Gaps and Advantages in Continuous-Time Distributional Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96191", "id": "BRW0MKJ7Rr", "proceeding": "", "pdf": "https://openreview.net/pdf?id=BRW0MKJ7Rr", "openreview": "https://openreview.net/forum?id=BRW0MKJ7Rr", "poster": "/media/PosterPDFs/NeurIPS%202024/96191.png?t=1733755240.465544", "project": "", "author_site": "Harley Wiltzer, Marc Bellemare, David Meger, Patrick Shafto, Yash Jhaveri", "tldr": "", "abstract": "When decisions are made at high frequency, traditional reinforcement learning (RL) methods struggle to accurately estimate action values. In turn, their performance is inconsistent and often poor. Whether the performance of distributional RL (DRL) agents suffers similarly, however, is unknown. In this work, we establish that DRL agents *are* sensitive to the decision frequency. We prove that action-conditioned return distributions collapse to their underlying policy's return distribution as the decision frequency increases. We quantify the rate of collapse of these return distributions and exhibit that their statistics collapse at different rates. Moreover, we define distributional perspectives on action gaps and advantages. In particular, we introduce the *superiority* as a probabilistic generalization of the advantage---the core object of approaches to mitigating performance issues in high-frequency value-based RL. In addition, we build a superiority-based DRL algorithm. Through simulations in an option-trading domain, we validate that proper modeling of the superiority distribution produces improved controllers at high decision frequencies.", "keywords": "distributional reinforcement learning;reinforcement learning;continuous time;advantage updating;stochastic differential equations", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/51d95fb00973e7f7c554c4d48a0048aec8d41be9.zip", "author": "Harley Wiltzer;Marc G Bellemare;David Meger;Patrick Shafto;Yash Jhaveri", "authorids": "~Harley_Wiltzer1;~Marc_G_Bellemare1;~David_Meger2;~Patrick_Shafto2;~Yash_Jhaveri1", "gender": "M;M;M;;", "homepage": "https://harwiltz.github.io/about;http://www.marcgbellemare.info;http://www.cim.mcgill.ca/~dmeger/;https://sasn.rutgers.edu/about-us/faculty-staff/yash-jhaveri;http://www.shaftolab.com", "dblp": "321/0992;38/4525;51/3415.html;;03/5979", "google_scholar": ";https://scholar.google.co.uk/citations?user=uyYPun0AAAAJ;https://scholar.google.com.tw/citations?user=gFwEytkAAAAJ;;HUi6F7wAAAAJ", "orcid": ";;;;", "linkedin": "harley-wiltzer-4998547a;;;;", "or_profile": "~Harley_Wiltzer1;~Marc_G_Bellemare1;~David_Meger2;~Yash_Jhaveri1;~Patrick_Shafto1", "aff": "Mila;Google;McGill University;Rutgers University;Rutgers University", "aff_domain": "mila.quebec;google.com;mcgill.ca;rutgers.edu;rutgers.edu", "position": "PhD student;Research Scientist;Associate Professor;Postdoc;Professor", "bibtex": "@inproceedings{\nwiltzer2024action,\ntitle={Action Gaps and Advantages in Continuous-Time Distributional Reinforcement Learning},\nauthor={Harley Wiltzer and Marc G Bellemare and David Meger and Patrick Shafto and Yash Jhaveri},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=BRW0MKJ7Rr}\n}", "github": "", "reviewers": "dYRz;ZgER;TDv5", "pdf_size": 1134112, "rating": "6;6;7", "confidence": "2;3;2", "soundness": "3;3;4", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "84;89;153", "wc_strengths": "88;31;45", "wc_weaknesses": "110;98;98", "wc_questions": "85;154;52", "wc_limitations": "13;1;1", "wc_review": "380;373;349", "wc_reply_reviewers": "117;46;115", "wc_reply_authors": "272;0;0", "reply_reviewers": "2;1;1", "reply_authors": "2;1;1", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 2.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 108.66666666666667, 31.414787742222437 ], "wc_strengths_avg": [ 54.666666666666664, 24.253293018108327 ], "wc_weaknesses_avg": [ 102.0, 5.656854249492381 ], "wc_questions_avg": [ 97.0, 42.49705872175156 ], "wc_limitations_avg": [ 5.0, 5.656854249492381 ], "wc_review_avg": [ 367.3333333333333, 13.27487183449325 ], "wc_reply_reviewers_avg": [ 92.66666666666667, 33.00841643513901 ], "wc_reply_authors_avg": [ 90.66666666666667, 128.2220296551606 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5131494814991798454&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "mila.quebec;google.com;mcgill.ca;rutgers.edu;rutgers.edu", "author_num": 5, "aff_unique_index": "0;1;2;3;3", "aff_unique_norm": "Mila;Google;McGill University;Rutgers University", "aff_unique_dep": "Quebec Artificial Intelligence Institute;Google;;", "aff_unique_url": "https://mila.quebec;https://www.google.com;https://www.mcgill.ca;https://www.rutgers.edu", "aff_unique_abbr": "Mila;Google;McGill;Rutgers", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;0;1;1", "aff_country_unique": "Canada;United States" }, { "title": "Multi-Group Proportional Representation in Retrieval", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96190", "id": "BRZYhVHvSg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=BRZYhVHvSg", "openreview": "https://openreview.net/forum?id=BRZYhVHvSg", "poster": "", "project": "", "author_site": "Alex Oesterling, Claudio Mayrink Verdun, Alexander Glynn, Carol Long, Lucas Monteiro Paes, Sajani Vithana, Martina Cardone, Flavio Calmon", "tldr": "", "abstract": "Image search and retrieval tasks can perpetuate harmful stereotypes, erase cultural identities, and amplify social disparities. Current approaches to mitigate these representational harms balance the number of retrieved items across population groups defined by a small number of (often binary) attributes. However, most existing methods overlook intersectional groups determined by combinations of\ngroup attributes, such as gender, race, and ethnicity. We introduce Multi-Group Proportional Representation (MPR), a novel metric that measures representation across intersectional groups. We develop practical methods for estimating MPR, provide theoretical guarantees, and propose optimization algorithms to ensure MPR in retrieval. We demonstrate that existing methods optimizing for equal and proportional representation metrics may fail to promote MPR. Crucially, our work shows that optimizing MPR yields more proportional representation across multiple intersectional groups specified by a rich function class, often with minimal compromise in retrieval accuracy. Code is provided at https://github.com/alex-oesterling/multigroup-proportional-representation.", "keywords": "Fairness;Proportional Representation;Multi-Group Fairness", "primary_area": "fairness", "supplementary_material": "", "author": "Alex Oesterling;Claudio Mayrink Verdun;Alexander Glynn;Carol Xuan Long;Lucas Monteiro Paes;Sajani Vithana;Martina Cardone;Flavio Calmon", "authorids": "~Alex_Oesterling2;~Claudio_Mayrink_Verdun1;~Alexander_Glynn1;~Carol_Xuan_Long1;~Lucas_Monteiro_Paes1;~Sajani_Vithana1;~Martina_Cardone1;~Flavio_Calmon1", "gender": "M;M;M;F;M;F;F;", "homepage": "https://www.alexoesterling.com/;;;;;;https://mcardone.umn.edu/;http://people.seas.harvard.edu/~flavio/", "dblp": "295/9299;;;;;;;89/4611", "google_scholar": "dlRlXT4AAAAJ;lsOne4AAAAAJ;;DGQASc8AAAAJ;ruB-9hwAAAAJ;l5oE7xwAAAAJ;;P8N_YH4AAAAJ", "orcid": "0000-0001-8546-0089;;;;0000-0003-0129-1420;;;", "linkedin": "axo/;;alexander-glynn-794406206/;carol-xuan-long;lucas-monteiro-paes-201125141;sajanivithana/;;", "or_profile": "~Alex_Oesterling2;~Claudio_Mayrink_Verdun1;~Alexander_Glynn1;~Carol_Xuan_Long1;~Lucas_Monteiro_Paes1;~Sajani_Vithana1;~Martina_Cardone1;~Flavio_Calmon1", "aff": "School of Engineering and Applied Sciences, Harvard University;Harvard University;Harvard University;Harvard University, Harvard University;Harvard University;Harvard University;University of Minnesota, Minneapolis;Harvard University", "aff_domain": "seas.harvard.edu;harvard.edu;harvard.edu;g.harvard.edu;g.harvard.edu;harvard.edu;umn.edu;harvard.edu", "position": "PhD student;Postdoc;Undergrad student;PhD student;PhD student;Postdoc;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\noesterling2024multigroup,\ntitle={Multi-Group Proportional Representation in Retrieval},\nauthor={Alex Oesterling and Claudio Mayrink Verdun and Alexander Glynn and Carol Xuan Long and Lucas Monteiro Paes and Sajani Vithana and Martina Cardone and Flavio Calmon},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=BRZYhVHvSg}\n}", "github": "", "reviewers": "XWMb;4ELS;XQou", "pdf_size": 3562367, "rating": "6;7;9", "confidence": "3;4;3", "soundness": "3;4;4", "novelty": "2;3;4", "presentation": "2;3;3", "wc_summary": "37;171;271", "wc_strengths": "32;56;363", "wc_weaknesses": "88;287;403", "wc_questions": "64;73;322", "wc_limitations": "11;21;369", "wc_review": "232;608;1728", "wc_reply_reviewers": "118;720;371", "wc_reply_authors": "655;1806;79", "reply_reviewers": "2;3;1", "reply_authors": "2;5;2", "rating_avg": [ 7.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 159.66666666666666, 95.86564672614598 ], "wc_strengths_avg": [ 150.33333333333334, 150.69689962157668 ], "wc_weaknesses_avg": [ 259.3333333333333, 130.0777545248311 ], "wc_questions_avg": [ 153.0, 119.55751753863075 ], "wc_limitations_avg": [ 133.66666666666666, 166.45586669010964 ], "wc_review_avg": [ 856.0, 635.4169234972159 ], "wc_reply_reviewers_avg": [ 403.0, 246.80491621251522 ], "wc_reply_authors_avg": [ 846.6666666666666, 717.952799438948 ], "reply_reviewers_avg": [ 2.0, 0.816496580927726 ], "reply_authors_avg": [ 3.0, 1.4142135623730951 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.18898223650461363, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:9umQA4JWQ88J:scholar.google.com/&scioq=Multi-Group+Proportional+Representation+in+Retrieval&hl=en&as_sdt=0,39", "gs_version_total": 2, "email": "seas.harvard.edu;harvard.edu;harvard.edu;g.harvard.edu;g.harvard.edu;harvard.edu;umn.edu;harvard.edu", "author_num": 8, "aff_unique_index": "0;0;0;0;0;0;1;0", "aff_unique_norm": "Harvard University;University of Minnesota", "aff_unique_dep": "School of Engineering and Applied Sciences;", "aff_unique_url": "https://www.harvard.edu;https://www.minnesota.edu", "aff_unique_abbr": "Harvard;UMN", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Cambridge;;Minneapolis", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "A Unifying Normative Framework of Decision Confidence", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96189", "id": "BRvGfN3Xfm", "proceeding": "", "pdf": "https://openreview.net/pdf?id=BRvGfN3Xfm", "openreview": "https://openreview.net/forum?id=BRvGfN3Xfm", "poster": "", "project": "", "author_site": "Amelia Johnson, Michael Buice, Koosha Khalvati", "tldr": "", "abstract": "Self-assessment of one\u2019s choices, i.e., confidence, is the topic of many decision neuroscience studies. Computational models of confidence, however, are limited to specific scenarios such as between choices with the same value. Here we present a normative framework for modeling decision confidence that is generalizable to various tasks and experimental setups. We further drive the implications of our model from both theoretical and experimental points of view. Specifically, we show that our model maps to the planning as an inference framework where the objective function is maximizing the gained reward and information entropy of the policy. Moreover, we validate our model on two different psychophysics experiments and show its superiority over other approaches in explaining subjects' confidence reports.", "keywords": "Decision making;cognitive science;computational neuroscience;planning as inference;confidence", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "/attachment/350f888c5a9f64a85c010a5d352d8dff0855ca8d.zip", "author": "Amelia Johnson;Michael A Buice;Koosha Khalvati", "authorids": "~Amelia_Johnson1;~Michael_A_Buice1;~Koosha_Khalvati1", "gender": "F;;M", "homepage": ";;", "dblp": "396/8736;;89/11240", "google_scholar": ";;T3v_IkcAAAAJ", "orcid": ";;", "linkedin": "amelia-johnson-1676671bb/;;", "or_profile": "~Amelia_Johnson1;~Michael_A_Buice1;~Koosha_Khalvati1", "aff": "University of Washington;Allen Institute;Allen Institute", "aff_domain": "uw.edu;alleninstitute.org;alleninstitute.org", "position": "Undergrad student;Associate Investigator;Scientist", "bibtex": "@inproceedings{\njohnson2024a,\ntitle={A Unifying Normative Framework of Decision Confidence},\nauthor={Amelia Johnson and Michael A Buice and Koosha Khalvati},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=BRvGfN3Xfm}\n}", "github": "", "reviewers": "Q2rg;R2CF;rZps;GT57", "pdf_size": 919882, "rating": "3;6;6;6", "confidence": "3;4;4;4", "soundness": "1;4;3;3", "novelty": "3;2;2;3", "presentation": "2;3;4;4", "wc_summary": "57;53;52;35", "wc_strengths": "49;46;62;54", "wc_weaknesses": "308;246;175;290", "wc_questions": "169;232;109;77", "wc_limitations": "3;3;11;2", "wc_review": "586;580;409;458", "wc_reply_reviewers": "31;107;82;116", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 1.0897247358851685 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 49.25, 8.437268515343103 ], "wc_strengths_avg": [ 52.75, 6.057020719792859 ], "wc_weaknesses_avg": [ 254.75, 51.27072751580574 ], "wc_questions_avg": [ 146.75, 59.27214775929754 ], "wc_limitations_avg": [ 4.75, 3.6314597615834874 ], "wc_review_avg": [ 508.25, 76.7605855892202 ], "wc_reply_reviewers_avg": [ 84.0, 33.03785707336358 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:nV5W5pLKBtwJ:scholar.google.com/&scioq=A+Unifying+Normative+Framework+of+Decision+Confidence&hl=en&as_sdt=0,47", "gs_version_total": 0, "email": "uw.edu;alleninstitute.org;alleninstitute.org", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "University of Washington;Allen Institute for Artificial Intelligence", "aff_unique_dep": ";", "aff_unique_url": "https://www.washington.edu;https://allenai.org", "aff_unique_abbr": "UW;AI2", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Bias Amplification in Language Model Evolution: An Iterated Learning Perspective", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96188", "id": "BSYn7ah4KX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=BSYn7ah4KX", "openreview": "https://openreview.net/forum?id=BSYn7ah4KX", "poster": "/media/PosterPDFs/NeurIPS%202024/96188.png?t=1733339039.5291476", "project": "", "author_site": "Yi Ren, Shangmin Guo, Linlu Qiu, Bailin Wang, Danica J. Sutherland", "tldr": "", "abstract": "With the widespread adoption of Large Language Models (LLMs), the prevalence of iterative interactions among these models is anticipated to increase. Notably, recent advancements in multi-round on-policy self-improving methods allow LLMs to generate new examples for training subsequent models. At the same time, multi-agent LLM systems, involving automated interactions among agents, are also increasing in prominence. Thus, in both short and long terms, LLMs may actively engage in an evolutionary process. We draw parallels between the behavior of LLMs and the evolution of human culture, as the latter has been extensively studied by cognitive scientists for decades. Our approach involves leveraging Iterated Learning (IL), a Bayesian framework that elucidates how subtle biases are magnified during human cultural evolution, to explain some behaviors of LLMs. This paper outlines key characteristics of agents' behavior in the Bayesian-IL framework, including predictions that are supported by experimental verification with various LLMs. This theoretical framework could help to more effectively predict and guide the evolution of LLMs in desired directions.", "keywords": "Large Language Model;LLM Agent;Self-improvement;Cognitive Science;Bayesian;Iterated Learning", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Yi Ren;Shangmin Guo;Linlu Qiu;Bailin Wang;Danica J. Sutherland", "authorids": "~Yi_Ren6;~Shangmin_Guo1;~Linlu_Qiu1;~Bailin_Wang3;~Danica_J._Sutherland1", "gender": "M;M;F;M;F", "homepage": "https://joshua-ren.github.io/;;https://linlu-qiu.github.io/;https://berlino.github.io/;http://www.djsutherland.ml", "dblp": ";183/0949;267/2348;218/7334;92/10966", "google_scholar": "5QNce38AAAAJ;cpOrbSoAAAAJ;D1uOAWcAAAAJ;;https://scholar.google.co.uk/citations?user=uO_NqicAAAAJ", "orcid": ";0000-0003-1716-0994;;;0000-0002-1525-3532", "linkedin": ";;;;", "or_profile": "~Yi_Ren6;~Shangmin_Guo1;~Linlu_Qiu1;~bailin_wang1;~Danica_J._Sutherland2", "aff": "University of British Columbia;University of Edinburgh;Massachusetts Institute of Technology;Massachusetts Institute of Technology;University of British Columbia", "aff_domain": "ubc.ca;ed.ac.uk;mit.edu;mit.edu;cs.ubc.ca", "position": "PhD student;PhD student;PhD student;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nren2024bias,\ntitle={Bias Amplification in Language Model Evolution: An Iterated Learning Perspective},\nauthor={Yi Ren and Shangmin Guo and Linlu Qiu and Bailin Wang and Danica J. Sutherland},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=BSYn7ah4KX}\n}", "github": "", "reviewers": "1qvU;YSn2;JuEo;Zowd", "pdf_size": 9864849, "rating": "3;6;6;6", "confidence": "2;3;3;2", "soundness": "2;3;3;3", "novelty": "1;3;3;3", "presentation": "1;3;2;2", "wc_summary": "86;69;96;90", "wc_strengths": "41;63;101;44", "wc_weaknesses": "251;66;459;35", "wc_questions": "103;23;7;59", "wc_limitations": "199;3;6;1", "wc_review": "680;224;669;229", "wc_reply_reviewers": "82;0;26;16", "wc_reply_authors": "238;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 2.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 85.25, 10.034316120194738 ], "wc_strengths_avg": [ 62.25, 23.909987452945266 ], "wc_weaknesses_avg": [ 202.75, 169.43490638000188 ], "wc_questions_avg": [ 48.0, 36.91882988394946 ], "wc_limitations_avg": [ 52.25, 84.7448376008828 ], "wc_review_avg": [ 450.5, 224.04073290363965 ], "wc_reply_reviewers_avg": [ 31.0, 30.870698080866262 ], "wc_reply_authors_avg": [ 59.5, 103.05702305034819 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7973927278752154333&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "ubc.ca;ed.ac.uk;mit.edu;mit.edu;cs.ubc.ca", "author_num": 5, "aff_unique_index": "0;1;2;2;0", "aff_unique_norm": "University of British Columbia;University of Edinburgh;Massachusetts Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ubc.ca;https://www.ed.ac.uk;https://web.mit.edu", "aff_unique_abbr": "UBC;Edinburgh;MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;2;0", "aff_country_unique": "Canada;United Kingdom;United States" }, { "title": "InterDreamer: Zero-Shot Text to 3D Dynamic Human-Object Interaction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96187", "id": "BUpxPo80QP", "proceeding": "", "pdf": "https://openreview.net/pdf?id=BUpxPo80QP", "openreview": "https://openreview.net/forum?id=BUpxPo80QP", "poster": "", "project": "", "author_site": "Sirui Xu, ziyin wang, Yu-Xiong Wang, Liangyan Gui", "tldr": "", "abstract": "Text-conditioned human motion generation has experienced significant advancements with diffusion models trained on extensive motion capture data and corresponding textual annotations. However, extending such success to 3D dynamic human-object interaction (HOI) generation faces notable challenges, primarily due to the lack of large-scale interaction data and comprehensive descriptions that align with these interactions. This paper takes the initiative and showcases the potential of generating human-object interactions without direct training on text-interaction pair data. Our key insight in achieving this is that interaction semantics and dynamics can be decoupled. Being unable to learn interaction semantics through supervised training, we instead leverage pre-trained large models, synergizing knowledge from a large language model and a text-to-motion model. While such knowledge offers high-level control over interaction semantics, it cannot grasp the intricacies of low-level interaction dynamics. To overcome this issue, we introduce a world model designed to comprehend simple physics, modeling how human actions influence object motion. By integrating these components, our novel framework, InterDreamer, is able to generate text-aligned 3D HOI sequences without relying on paired text-interaction data. We apply InterDreamer to the BEHAVE, OMOMO, and CHAIRS datasets, and our comprehensive experimental analysis demonstrates its capability to generate realistic and coherent interaction sequences that seamlessly align with the text directives.", "keywords": "human object interaction;human motion generation", "primary_area": "generative_models", "supplementary_material": "/attachment/1846d09eb24b78cc7025f9c26be3570681746bea.zip", "author": "Sirui Xu;Ziyin Wang;Yu-Xiong Wang;Liangyan Gui", "authorids": "~Sirui_Xu1;~Ziyin_Wang2;~Yu-Xiong_Wang1;~Liangyan_Gui1", "gender": "M;M;;F", "homepage": "https://sirui-xu.github.io;https://github.com/wzyabcas;https://yxw.cs.illinois.edu/;", "dblp": "194/1216-2;130/5386;35/10700;155/5055", "google_scholar": "sBAgRsIAAAAJ;;T_Q-xDkAAAAJ;3aE0r9QAAAAJ", "orcid": "0000-0001-5372-6321;;;", "linkedin": ";;;", "or_profile": "~Sirui_Xu1;~Ziyin_Wang2;~Yu-Xiong_Wang1;~Liangyan_Gui1", "aff": "NVIDIA;Fudan University;Department of Computer Science, University of Illinois Urbana-Champaign;UIUC", "aff_domain": "nvidia.com;fudan.edu.cn;cs.illinois.edu;cs.illinois.edu", "position": "Intern;Undergrad student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nxu2024interdreamer,\ntitle={InterDreamer: Zero-Shot Text to 3D Dynamic Human-Object Interaction},\nauthor={Sirui Xu and Ziyin Wang and Yu-Xiong Wang and Liangyan Gui},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=BUpxPo80QP}\n}", "github": "", "reviewers": "aGg2;R7nv;Jz5F;Kjsy", "pdf_size": 23369036, "rating": "4;4;5;7", "confidence": "5;4;4;4", "soundness": "2;2;3;3", "novelty": "2;3;3;3", "presentation": "1;2;3;4", "wc_summary": "73;91;106;109", "wc_strengths": "50;69;101;114", "wc_weaknesses": "255;164;203;47", "wc_questions": "4;64;21;37", "wc_limitations": "9;8;7;50", "wc_review": "391;396;438;357", "wc_reply_reviewers": "447;110;160;77", "wc_reply_authors": "1772;510;360;12", "reply_reviewers": "3;1;1;1", "reply_authors": "4;2;2;2", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 94.75, 14.289419162443238 ], "wc_strengths_avg": [ 83.5, 25.342651794948374 ], "wc_weaknesses_avg": [ 167.25, 76.56492343103335 ], "wc_questions_avg": [ 31.5, 22.096379793984354 ], "wc_limitations_avg": [ 18.5, 18.200274723201296 ], "wc_review_avg": [ 395.5, 28.76195403653931 ], "wc_reply_reviewers_avg": [ 198.5, 146.4829341595805 ], "wc_reply_authors_avg": [ 663.5, 664.9998120300486 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.4714045207910316, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13143596872347893864&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 3, "email": "nvidia.com;fudan.edu.cn;cs.illinois.edu;cs.illinois.edu", "author_num": 4, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "NVIDIA;Fudan University;University of Illinois Urbana-Champaign", "aff_unique_dep": "NVIDIA Corporation;;Department of Computer Science", "aff_unique_url": "https://www.nvidia.com;https://www.fudan.edu.cn;https://illinois.edu", "aff_unique_abbr": "NVIDIA;Fudan;UIUC", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;China" }, { "title": "CausalDiff: Causality-Inspired Disentanglement via Diffusion Model for Adversarial Defense", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96186", "id": "BZLdXBjB8O", "proceeding": "", "pdf": "https://openreview.net/pdf?id=BZLdXBjB8O", "openreview": "https://openreview.net/forum?id=BZLdXBjB8O", "poster": "/media/PosterPDFs/NeurIPS%202024/96186.png?t=1732115454.7857726", "project": "", "author_site": "Mingkun Zhang, Keping Bi, Wei Chen, Quanrun Chen, Jiafeng Guo, Xueqi Cheng", "tldr": "", "abstract": "Despite ongoing efforts to defend neural classifiers from adversarial attacks, they remain vulnerable, especially to unseen attacks. In contrast, humans are difficult to be cheated by subtle manipulations, since we make judgments only based on essential factors. Inspired by this observation, we attempt to model label generation with essential label-causative factors and incorporate label-non-causative factors to assist data generation. For an adversarial example, we aim to discriminate the perturbations as non-causative factors and make predictions only based on the label-causative factors. Concretely, we propose a casual diffusion model (CausalDiff) that adapts diffusion models for conditional data generation and disentangles the two types of casual factors by learning towards a novel casual information bottleneck objective. Empirically, CausalDiff has significantly outperformed state-of-the-art defense methods on various unseen attacks, achieving an average robustness of 86.39\\% (+4.01\\%) on CIFAR-10, 56.25\\% (+3.13\\%) on CIFAR-100, and 82.62\\% (+4.93\\%) on GTSRB (German Traffic Sign Recognition Benchmark). The code is available athttps://github.com/CAS-AISafetyBasicResearchGroup/CausalDiff.", "keywords": "Adversarial Defense;Diffusion Model;Causal", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/8b473d3f2b729dbeea660d8bbc8776cf1cb40cf7.zip", "author": "Mingkun Zhang;Keping Bi;Wei Chen;Quanrun Chen;Jiafeng Guo;Xueqi Cheng", "authorids": "~Mingkun_Zhang2;~Keping_Bi1;~Wei_Chen1;~Quanrun_Chen1;~Jiafeng_Guo1;~Xueqi_Cheng1", "gender": "F;F;F;Not Specified;M;M", "homepage": "https://github.com/ZhangMingKun1;https://kepingbi.github.io/;https://weichen-cas.github.io/;https://www.uibe.edu.cn/;http://www.bigdatalab.ac.cn/gjf/;https://people.ucas.ac.cn/~cxq?language=en", "dblp": ";56/10886;;;02/146;44/912", "google_scholar": ";kJQYiFIAAAAJ;https://scholar.google.com/citations?hl=en;;https://scholar.google.com/citations?view_op=list_works;hY8aLqAAAAAJ", "orcid": ";0000-0001-5123-4999;;;;", "linkedin": ";;;;;", "or_profile": "~Mingkun_Zhang2;~Keping_Bi1;~Wei_Chen1;~Quanrun_Chen1;~Jiafeng_Guo1;~Xueqi_Cheng1", "aff": ", Chinese Academy of Sciences;Chinese Academy of Sciences; Chinese Academy of Sciences;University of International Business and Economics;Institute of Computing Technolgy, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy", "aff_domain": "ict.ac.cn;ict.ac.cn;ict.ac.cn;uibe.edu.cn;ict.ac.cn;ict.ac.cn", "position": "PhD student;Assistant Professor;Full Professor;Associate Professor;Researcher;Full Professor", "bibtex": "@inproceedings{\nzhang2024causaldiff,\ntitle={CausalDiff: Causality-Inspired Disentanglement via Diffusion Model for Adversarial Defense},\nauthor={Mingkun Zhang and Keping Bi and Wei Chen and Quanrun Chen and Jiafeng Guo and Xueqi Cheng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=BZLdXBjB8O}\n}", "github": "", "reviewers": "t8de;pU16;dVc3;DuQ4", "pdf_size": 13754535, "rating": "5;6;6;8", "confidence": "3;4;3;4", "soundness": "3;3;3;4", "novelty": "3;3;3;4", "presentation": "3;3;3;3", "wc_summary": "25;47;41;39", "wc_strengths": "47;71;126;54", "wc_weaknesses": "51;141;314;59", "wc_questions": "5;179;4;59", "wc_limitations": "6;104;1;24", "wc_review": "134;542;486;235", "wc_reply_reviewers": "22;21;18;22", "wc_reply_authors": "14;14;16;14", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 38.0, 8.06225774829855 ], "wc_strengths_avg": [ 74.5, 30.987900864692335 ], "wc_weaknesses_avg": [ 141.25, 105.77422890288541 ], "wc_questions_avg": [ 61.75, 71.25789429950902 ], "wc_limitations_avg": [ 33.75, 41.45102531904368 ], "wc_review_avg": [ 349.25, 169.7341671555848 ], "wc_reply_reviewers_avg": [ 20.75, 1.6393596310755 ], "wc_reply_authors_avg": [ 14.5, 0.8660254037844386 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.6882472016116854, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10451969105756517240&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "ict.ac.cn;ict.ac.cn;ict.ac.cn;uibe.edu.cn;ict.ac.cn;ict.ac.cn", "author_num": 6, "aff_unique_index": "0;0;0;1;0;0", "aff_unique_norm": "Chinese Academy of Sciences;University of International Business and Economics", "aff_unique_dep": ";", "aff_unique_url": "http://www.cas.cn;http://www.uibe.edu.cn", "aff_unique_abbr": "CAS;UIBE", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "GAIA: Rethinking Action Quality Assessment for AI-Generated Videos", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97817", "id": "BZe6dmDk5K", "proceeding": "", "pdf": "https://openreview.net/pdf?id=BZe6dmDk5K", "openreview": "https://openreview.net/forum?id=BZe6dmDk5K", "poster": "/media/PosterPDFs/NeurIPS%202024/97817.png?t=1729738265.0412693", "project": "", "author_site": "Zijian Chen, Wei Sun, Yuan Tian, Jun Jia, Zicheng Zhang, Wang Jiarui, Ru Huang, Xiongkuo Min, Guangtao Zhai, Wenjun Zhang", "tldr": "", "abstract": "Assessing action quality is both imperative and challenging due to its significant impact on the quality of AI-generated videos, further complicated by the inherently ambiguous nature of actions within AI-generated video (AIGV). Current action quality assessment (AQA) algorithms predominantly focus on actions from real specific scenarios and are pre-trained with normative action features, thus rendering them inapplicable in AIGVs. To address these problems, we construct GAIA, a Generic AI-generated Action dataset, by conducting a large-scale subjective evaluation from a novel causal reasoning-based perspective, resulting in 971,244 ratings among 9,180 video-action pairs. Based on GAIA, we evaluate a suite of popular text-to-video (T2V) models on their ability to generate visually rational actions, revealing their pros and cons on different categories of actions. We also extend GAIA as a testbed to benchmark the AQA capacity of existing automatic evaluation methods. Results show that traditional AQA methods, action-related metrics in recent T2V benchmarks, and mainstream video quality methods perform poorly with an average SRCC of 0.454, 0.191, and 0.519, respectively, indicating a sizable gap between current models and human action perception patterns in AIGVs. Our findings underscore the significance of action quality as a unique perspective for studying AIGVs and can catalyze progress towards methods with enhanced capacities for AQA in AIGVs.", "keywords": "Action quality assessment;AI-generated videos;Action decomposition;Dataset", "primary_area": "", "supplementary_material": "/attachment/701583f09a0253971c8475a944db09af6153e87a.pdf", "author": "Zijian Chen;Wei Sun;Yuan Tian;Jun Jia;Zicheng Zhang;Wang Jiarui;Ru Huang;Xiongkuo Min;Guangtao Zhai;Wenjun Zhang", "authorids": "~Zijian_Chen1;~Wei_Sun12;~Yuan_Tian5;~Jun_Jia2;~Zicheng_Zhang7;~Wang_Jiarui1;~Ru_Huang3;~Xiongkuo_Min1;~Guangtao_Zhai1;~Wenjun_Zhang3", "gender": "M;M;M;;F;;M;M;M;M", "homepage": ";;https://scholar.google.com.hk/citations?user=Kzd0qtsAAAAJ&hl=zh-CN;https://multimedia.sjtu.edu.cn/;https://github.com/wangjiarui153/AIGCIQA2023;https://cise.ecust.edu.cn/2011/0615/c7766a55146/page.htm;;https://faculty.sjtu.edu.cn/zhaiguangtao/en/index.htm;https://ee.sjtu.edu.cn/FacultyDetail.aspx?id=14&infoid=66&flag=66;", "dblp": "205/6822-1;;39/5423-17.html;75/7384;;;139/6983;19/3230;;", "google_scholar": "NSR4UkMAAAAJ;;https://scholar.google.com.hk/citations?user=Kzd0qtsAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;;91sjuWIAAAAJ;E6zbSYgAAAAJ;;QICTEckAAAAJ", "orcid": "0000-0002-8502-4110;0000-0001-8162-1949;0000-0001-6073-8582;0000-0002-5424-4284;;0000-0001-7545-0987;0000-0001-5693-0416;;;", "linkedin": ";;%E5%85%83-%E7%94%B0-2b6017187/;;;;;;;", "or_profile": "~Zijian_Chen1;~Wei_Sun12;~Yuan_Tian5;~Jun_Jia2;~Wang_Jiarui1;~Ru_Huang3;~Xiongkuo_Min1;~Guangtao_Zhai1;~Wenjun_Zhang3;~zicheng_zhang6", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai AI Lab;Shanghai Jiaotong University;Shanghai Jiaotong University;East China University of Science and Technology;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;pjlab.org.cn;sjtu.edu.cn;sjtu.edu.cn;ecust.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "position": "PhD student;Postdoc;Postdoc;Postdoc;Undergrad student;Associate Professor;Associate Professor;Full Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\nchen2024gaia,\ntitle={{GAIA}: Rethinking Action Quality Assessment for {AI}-Generated Videos},\nauthor={Zijian Chen and Wei Sun and Yuan Tian and Jun Jia and Zicheng Zhang and Wang Jiarui and Ru Huang and Xiongkuo Min and Guangtao Zhai and Wenjun Zhang},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=BZe6dmDk5K}\n}", "github": "", "reviewers": "jQF6;5JDo;sWxM;T8oF", "pdf_size": 22352137, "rating": "8;8;8;9", "confidence": "5;3;4;5", "wc_summary_and_contributions": "70;56;199;21", "wc_strengths": "23;89;88;35", "wc_improvement": "75;159;224;253", "wc_limitations": "1;70;2;19", "wc_correctness": "4;41;1;8", "wc_clarity": "12;162;1;6", "wc_relation_to_prior_work": "8;21;9;8", "wc_documentation": "1;4;1;9", "wc_additional_feedback": "1;1;1;1", "wc_review": "195;603;526;360", "wc_reply_reviewers": "15;160;36;0", "wc_reply_authors": "24;0;35;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 8.25, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "wc_summary_and_contributions_avg": [ 86.5, 67.35911222692889 ], "wc_strengths_avg": [ 58.75, 30.053078045351693 ], "wc_improvement_avg": [ 177.75, 68.39362177864248 ], "wc_limitations_avg": [ 23.0, 28.062430400804562 ], "wc_correctness_avg": [ 13.5, 16.070158679988197 ], "wc_clarity_avg": [ 45.25, 67.51805314136361 ], "wc_relation_to_prior_work_avg": [ 11.5, 5.5 ], "wc_documentation_avg": [ 3.75, 3.2691742076555053 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 421.0, 157.2784155566173 ], "wc_reply_reviewers_avg": [ 52.75, 63.22726864257225 ], "wc_reply_authors_avg": [ 14.75, 15.2540978100968 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14488099635006092658&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "sjtu.edu.cn;sjtu.edu.cn;pjlab.org.cn;sjtu.edu.cn;sjtu.edu.cn;ecust.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "author_num": 10, "aff_unique_index": "0;0;1;0;0;2;0;0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University;Shanghai AI Lab;East China University of Science and Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.shanghaiailab.com;http://www.ecust.edu.cn", "aff_unique_abbr": "SJTU;SAIL;ECUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "DPIC: Decoupling Prompt and Intrinsic Characteristics for LLM Generated Text Detection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96185", "id": "BZh05P2EoN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=BZh05P2EoN", "openreview": "https://openreview.net/forum?id=BZh05P2EoN", "poster": "/media/PosterPDFs/NeurIPS%202024/96185.png?t=1731667792.7161999", "project": "", "author_site": "XIAO YU, Yuang Qi, Kejiang Chen, Guoqiang Chen, Xi Yang, Pengyuan Zhu, Xiuwei Shang, Weiming Zhang, Nenghai Yu", "tldr": "", "abstract": "Large language models (LLMs) have the potential to generate texts that pose risks of misuse, such as plagiarism, planting fake reviews on e-commerce platforms, or creating inflammatory false tweets. Consequently, detecting whether a text is generated by LLMs has become increasingly important. Existing high-quality detection methods usually require access to the interior of the model to extract the intrinsic characteristics. However, since we do not have access to the interior of the black-box model, we must resort to surrogate models, which impacts detection quality. In order to achieve high-quality detection of black-box models, we would like to extract deep intrinsic characteristics of the black-box model generated texts. We view the generation process as a coupled process of prompt and intrinsic characteristics of the generative model. Based on this insight, we propose to decouple prompt and intrinsic characteristics (DPIC) for LLM-generated text detection method. Specifically, given a candidate text, DPIC employs an auxiliary LLM to reconstruct the prompt corresponding to the candidate text, then uses the prompt to regenerate text by the auxiliary LLM, which makes the candidate text and the regenerated text align with their prompts, respectively. Then, the similarity between the candidate text and the regenerated text is used as a detection feature, thus eliminating the prompt in the detection process, which allows the detector to focus on the intrinsic characteristics of the generative model. Compared to the baselines, DPIC has achieved an average improvement of 6.76\\% and 2.91\\% in detecting texts from different domains generated by GPT4 and Claude3, respectively.", "keywords": "Large Language Models;Intrinsic Characteristics;Detection", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/702a3b9d26d5e0fe7b384c0679caba1c2248f2f3.zip", "author": "Xiao Yu;Yuang Qi;Kejiang Chen;Guoqiang Chen;Xi Yang;PENGYUAN ZHU;Xiuwei Shang;Weiming Zhang;Nenghai Yu", "authorids": "~Xiao_Yu5;~Yuang_Qi1;~Kejiang_Chen1;~Guoqiang_Chen1;~Xi_Yang13;~PENGYUAN_ZHU1;~Xiuwei_Shang1;~Weiming_Zhang2;~Nenghai_Yu1", "gender": "F;;M;M;M;;M;M;M", "homepage": "https://github.com/happy-Moer;http://home.ustc.edu.cn/~qya7ya/;http://home.ustc.edu.cn/~chenkj;;;;https://sxxxw.github.io/;http://staff.ustc.edu.cn/~zhangwm/;", "dblp": ";347/8619;193/7669;;;;338/7052.html;;96/5144", "google_scholar": ";5DPb-wcAAAAJ;bGKyyB8AAAAJ;;ztJiAk8AAAAJ;;;eTCfl6cAAAAJ;https://scholar.google.com.hk/citations?user=7620QAMAAAAJ", "orcid": ";0009-0008-0326-1616;0000-0002-9868-3414;0000-0003-0651-6617;;;;0000-0001-5576-6108;", "linkedin": ";;;;;;;;", "or_profile": "~Xiao_Yu5;~Yuang_Qi1;~Kejiang_Chen1;~Guoqiang_Chen1;~Xi_Yang13;~PENGYUAN_ZHU1;~Xiuwei_Shang1;~Weiming_Zhang2;~Nenghai_Yu1", "aff": "University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China", "aff_domain": "ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn", "position": "MS student;PhD student;Research Associate;MS student;PhD student;;MS student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nyu2024dpic,\ntitle={{DPIC}: Decoupling Prompt and Intrinsic Characteristics for {LLM} Generated Text Detection},\nauthor={Xiao Yu and Yuang Qi and Kejiang Chen and Guoqiang Chen and Xi Yang and PENGYUAN ZHU and Xiuwei Shang and Weiming Zhang and Nenghai Yu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=BZh05P2EoN}\n}", "github": "", "reviewers": "XAPE;qTwx;eWrN;Av5h", "pdf_size": 1800071, "rating": "6;6;7;7", "confidence": "4;4;4;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "128;53;67;135", "wc_strengths": "50;36;29;139", "wc_weaknesses": "129;183;24;111", "wc_questions": "46;9;329;102", "wc_limitations": "11;1;105;1", "wc_review": "364;282;554;488", "wc_reply_reviewers": "123;167;105;9", "wc_reply_authors": "820;727;75;24", "reply_reviewers": "3;1;1;1", "reply_authors": "4;2;2;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 95.75, 36.17578610065025 ], "wc_strengths_avg": [ 63.5, 44.24081825644729 ], "wc_weaknesses_avg": [ 111.75, 57.17243654069678 ], "wc_questions_avg": [ 121.5, 124.29098921482603 ], "wc_limitations_avg": [ 29.5, 43.780703511935485 ], "wc_review_avg": [ 422.0, 105.76388797694608 ], "wc_reply_reviewers_avg": [ 101.0, 57.706152185014034 ], "wc_reply_authors_avg": [ 411.5, 363.93715116761575 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12331935915480107877&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn", "author_num": 9, "aff_unique_index": "0;0;0;0;0;0;0;0", "aff_unique_norm": "University of Science and Technology of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ustc.edu.cn", "aff_unique_abbr": "USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "GeSS: Benchmarking Geometric Deep Learning under Scientific Applications with Distribution Shifts", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97816", "id": "BZxtiElo0c", "proceeding": "", "pdf": "https://openreview.net/pdf?id=BZxtiElo0c", "openreview": "https://openreview.net/forum?id=BZxtiElo0c", "poster": "", "project": "", "author_site": "Deyu Zou, Shikun Liu, Siqi Miao, Victor Fung, Shiyu Chang, Pan Li", "tldr": "", "abstract": "Geometric deep learning (GDL) has gained significant attention in scientific fields, for its proficiency in modeling data with intricate geometric structures. \nYet, very few works have delved into its capability of tackling the distribution shift problem, a prevalent challenge in many applications.\nTo bridge this gap, we propose GeSS, a comprehensive benchmark designed for evaluating the performance of GDL models in scientific scenarios with distribution shifts.\nOur evaluation datasets cover diverse scientific domains from particle physics, materials science to biochemistry, and encapsulate a broad spectrum of distribution shifts including conditional, covariate, and concept shifts. \nFurthermore, we study three levels of information access from the out-of-distribution (OOD) test data, including no OOD information, only unlabeled OOD data, and OOD data with a few labels. \nOverall, our benchmark results in 30 different experiment settings, and evaluates 3 GDL backbones and 11 learning algorithms in each setting. A thorough analysis of the evaluation results is provided, poised to illuminate insights for GDL researchers and domain practitioners who are to use GDL in their applications.", "keywords": "geometric deep learning;distribution shift", "primary_area": "", "supplementary_material": "/attachment/fa4a63a705e254266e18091363a6e46270f28581.zip", "author": "Deyu Zou;Shikun Liu;Siqi Miao;Victor Fung;Shiyu Chang;Pan Li", "authorids": "~Deyu_Zou1;~Shikun_Liu3;~Siqi_Miao1;~Victor_Fung1;~Shiyu_Chang2;~Pan_Li2", "gender": "M;;;;Unspecified;", "homepage": "https://github.com/unimpor;https://shikun-liu.com/;https://siqi.plus/;;http://people.csail.mit.edu/chang87/;", "dblp": ";;312/7014-1;;28/9988;https://dblp.org/pers/hd/l/Li_0005:Pan", "google_scholar": ";BLafTygAAAAJ;bVF_CzUAAAAJ;2QsddMIAAAAJ;r21asW4AAAAJ;IroP0EwAAAAJ", "orcid": ";;;;;", "linkedin": ";;;;;pan-li-b951105a/", "or_profile": "~Deyu_Zou1;~Shikun_Liu3;~Siqi_Miao1;~Victor_Fung1;~Shiyu_Chang2;~Pan_Li2", "aff": "University of Science and Technology of China;Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology;University of California, Santa Barbara;Purdue University", "aff_domain": "ustc.edu.cn;gatech.edu;gatech.edu;gatech.edu;ucsb.edu;purdue.edu", "position": "Undergrad student;PhD student;PhD student;Assistant Professor;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nzou2024gess,\ntitle={Ge{SS}: Benchmarking Geometric Deep Learning under Scientific Applications with Distribution Shifts},\nauthor={Deyu Zou and Shikun Liu and Siqi Miao and Victor Fung and Shiyu Chang and Pan Li},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=BZxtiElo0c}\n}", "github": "", "reviewers": "UETZ;Ft87;yPRy;xiTX", "pdf_size": 1545210, "rating": "6;6;6;7", "confidence": "3;3;3;3", "wc_summary_and_contributions": "48;93;44;71", "wc_strengths": "38;22;21;18", "wc_improvement": "56;23;39;10", "wc_limitations": "1;34;1;4", "wc_correctness": "1;7;1;4", "wc_clarity": "1;4;1;5", "wc_relation_to_prior_work": "1;11;1;6", "wc_documentation": "5;4;1;6", "wc_additional_feedback": "1;1;1;1", "wc_review": "152;199;110;125", "wc_reply_reviewers": "14;11;22;35", "wc_reply_authors": "0;0;15;13", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;2;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.0 ], "wc_summary_and_contributions_avg": [ 64.0, 19.6596032513375 ], "wc_strengths_avg": [ 24.75, 7.790218225441442 ], "wc_improvement_avg": [ 32.0, 17.24818831066034 ], "wc_limitations_avg": [ 10.0, 13.910427743243556 ], "wc_correctness_avg": [ 3.25, 2.48746859276655 ], "wc_clarity_avg": [ 2.75, 1.7853571071357126 ], "wc_relation_to_prior_work_avg": [ 4.75, 4.14578098794425 ], "wc_documentation_avg": [ 4.0, 1.8708286933869707 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 146.5, 33.84154251803543 ], "wc_reply_reviewers_avg": [ 20.5, 9.287087810503355 ], "wc_reply_authors_avg": [ 7.0, 7.035623639735144 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:CH-8dlhGjFcJ:scholar.google.com/&scioq=GeSS:+Benchmarking+Geometric+Deep+Learning+under+Scientific+Applications+with+Distribution+Shifts&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "ustc.edu.cn;gatech.edu;gatech.edu;gatech.edu;ucsb.edu;purdue.edu", "author_num": 6, "aff_unique_index": "0;1;1;1;2;3", "aff_unique_norm": "University of Science and Technology of China;Georgia Institute of Technology;University of California, Santa Barbara;Purdue University", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.ustc.edu.cn;https://www.gatech.edu;https://www.ucsb.edu;https://www.purdue.edu", "aff_unique_abbr": "USTC;Georgia Tech;UCSB;Purdue", "aff_campus_unique_index": "1", "aff_campus_unique": ";Santa Barbara", "aff_country_unique_index": "0;1;1;1;1;1", "aff_country_unique": "China;United States" }, { "title": "Addressing Bias in Online Selection with Limited Budget of Comparisons", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96184", "id": "BdGFgKrlHl", "proceeding": "", "pdf": "https://openreview.net/pdf?id=BdGFgKrlHl", "openreview": "https://openreview.net/forum?id=BdGFgKrlHl", "poster": "", "project": "", "author_site": "Ziyad Benomar, Evgenii Chzhen, Nicolas Schreuder, Vianney Perchet", "tldr": "", "abstract": "Consider a hiring process with candidates coming from different universities. It is easy to order candidates with the same background, yet it can be challenging to compare them otherwise. The latter case requires additional costly assessments, leading to a potentially high total cost for the hiring organization. Given an assigned budget, what would be an optimal strategy to select the most qualified candidate?\nWe model the above problem as a multicolor secretary problem, allowing comparisons between candidates from distinct groups at a fixed cost. Our study explores how the allocated budget enhances the success probability of online selection algorithms.", "keywords": "the secretary problem;online selection;online algorithms;decision-making under uncertainty", "primary_area": "algorithmic_game_theory", "supplementary_material": "", "author": "Ziyad Benomar;Evgenii Chzhen;Nicolas Schreuder;Vianney Perchet", "authorids": "~Ziyad_Benomar1;~Evgenii_Chzhen1;~Nicolas_Schreuder1;~Vianney_Perchet3", "gender": ";;M;", "homepage": ";;https://nicolasschreuder.github.io/;", "dblp": ";;238/1524;", "google_scholar": ";;https://scholar.google.ch/citations?user=JVKPO3YAAAAJ;", "orcid": ";;0000-0001-7363-8679;", "linkedin": ";;;", "or_profile": "~Ziyad_Benomar1;~Evgenii_Chzhen1;~Nicolas_Schreuder1;~Vianney_Perchet3", "aff": ";;CNRS;", "aff_domain": ";;cnrs.fr;", "position": ";;Researcher;", "bibtex": "@inproceedings{\nbenomar2024addressing,\ntitle={Addressing Bias in Online Selection with Limited Budget of Comparisons},\nauthor={Ziyad Benomar and Evgenii Chzhen and Nicolas Schreuder and Vianney Perchet},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=BdGFgKrlHl}\n}", "github": "", "reviewers": "U5mj;5bhX;io3s", "pdf_size": 1609637, "rating": "5;6;7", "confidence": "2;4;3", "soundness": "3;4;3", "novelty": "3;4;3", "presentation": "3;3;4", "wc_summary": "34;70;215", "wc_strengths": "29;68;64", "wc_weaknesses": "26;76;141", "wc_questions": "3;28;69", "wc_limitations": "3;9;2", "wc_review": "95;251;491", "wc_reply_reviewers": "0;28;63", "wc_reply_authors": "0;0;82", "reply_reviewers": "0;1;1", "reply_authors": "1;1;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 106.33333333333333, 78.2318491550738 ], "wc_strengths_avg": [ 53.666666666666664, 17.518244457961217 ], "wc_weaknesses_avg": [ 81.0, 47.08148963941844 ], "wc_questions_avg": [ 33.333333333333336, 27.207025236549146 ], "wc_limitations_avg": [ 4.666666666666667, 3.0912061651652345 ], "wc_review_avg": [ 279.0, 162.87418457201866 ], "wc_reply_reviewers_avg": [ 30.333333333333332, 25.77250904010361 ], "wc_reply_authors_avg": [ 27.333333333333332, 38.6551707048646 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14686156794548805447&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 13, "email": ";;cnrs.fr;", "author_num": 4, "aff_unique_index": "0", "aff_unique_norm": "Centre National de la Recherche Scientifique", "aff_unique_dep": "", "aff_unique_url": "https://www.cnrs.fr", "aff_unique_abbr": "CNRS", "aff_country_unique_index": "0", "aff_country_unique": "France" }, { "id": "Becrgm5xAq", "title": "RL4CO: an Extensive Reinforcement Learning for Combinatorial Optimization Benchmark", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "Deep reinforcement learning (RL) has recently shown significant benefits in solving combinatorial optimization (CO) problems, reducing reliance on domain expertise, and improving computational efficiency. However, the field lacks a unified benchmark for easy development and standardized comparison of algorithms across diverse CO problems. To fill this gap, we introduce RL4CO, a unified and extensive benchmark with in-depth library coverage of 23 state-of-the-art methods and 20+ CO problems. Built on efficient software libraries and best practices in implementation, RL4CO features modularized implementation and flexible configuration of diverse RL algorithms, neural network architectures, inference techniques, and environments. RL4COallows researchers to seamlessly navigate existing successes and develop their unique designs, facilitating the entire research process by decoupling science from heavy engineering. We also provide extensive benchmark studies to inspire new insights and future work. RL4CO has attracted numerous researchers in the community and is open-sourced at https://github.com/ai4co/rl4co.", "keywords": "Reinforcement Learning;Combinatorial Optimization;PyTorch;Research;Benchmark", "primary_area": "", "supplementary_material": "/attachment/6050a605bfde4ef9b8efc21fa87af633d4858ca1.pdf", "author": "Federico Berto;Chuanbo Hua;Junyoung Park;Laurin Luttmann;Yining Ma;Fanchen Bu;Jiarui Wang;Haoran Ye;Minsu Kim;Sanghyeok Choi;Nayeli Gast Zepeda;Andr\u00e9 Hottung;Jianan Zhou;Jieyi Bi;Yu Hu;Fei Liu;Hyeonah Kim;Jiwoo Son;Haeyeon Kim;Davide Angioni;Wouter Kool;Zhiguang Cao;Qingfu Zhang;Joungho Kim;Jie Zhang;Kijung Shin;Cathy Wu;Sungsoo Ahn;Guojie Song;Changhyun Kwon;Kevin Tierney;Lin Xie;Jinkyoo Park", "authorids": "~Federico_Berto1;~Chuanbo_Hua1;~Junyoung_Park1;~Laurin_Luttmann1;~Yining_Ma1;~Fanchen_Bu1;~Jiarui_Wang3;~Haoran_Ye1;~Minsu_Kim2;~Sanghyeok_Choi1;~Nayeli_Gast_Zepeda1;~Andr\u00e9_Hottung1;~Jianan_Zhou1;~Jieyi_Bi1;~Yu_Hu11;~Fei_Liu14;~Hyeonah_Kim1;~Jiwoo_Son2;~Haeyeon_Kim1;~Davide_Angioni1;~Wouter_Kool1;~Zhiguang_Cao1;~Qingfu_Zhang1;~Joungho_Kim1;~Jie_Zhang9;~Kijung_Shin2;~Cathy_Wu1;~Sungsoo_Ahn1;~Guojie_Song1;~Changhyun_Kwon1;~Kevin_Tierney1;~Lin_Xie1;~Jinkyoo_Park1", "gender": "M;M;;M;M;M;M;M;M;M;F;;M;F;;;F;;F;M;M;M;M;M;M;M;F;M;M;M;M;F;M", "homepage": "https://fedebotu.github.io/;https://github.com/cbhua;;;https://yining043.github.io/;https://github.com/bokveizen;;https://yehaoran.info;https://minsuukim.github.io/;https://hyeok9855.github.io/;;;https://royalskye.github.io/;https://jieyibi.github.io/;;;;;http://sites.google.com/view/haeyeon-rachel-kim;;https://wouterkool.github.io/;https://zhiguangcaosg.github.io/;https://www.cs.cityu.edu.hk/~qzhan7/index.html;;https://personal.ntu.edu.sg/zhangj/;https://kijungs.github.io/;http://wucathy.com;https://sungsooahn.super.site/;http://sai.pku.edu.cn/info/1022/2212.htm;https://www.chkwon.net;http://www.tierney.de;https://people.utwente.nl/lin.xie?tab=research;http://silab.kaist.ac.kr/", "dblp": "317/1711;326/5321;;;160/6245-1;270/0123;178/5014-2;237/9631;;338/9899;;;296/2326-2;331/2378;;;;348/9675;;;130/2463-1;178/8621;98/1240.html;;84/6889-2;153/2052;155/3740;90/5164;37/2900;49/34-1;13/7407;;156/7535", "google_scholar": "https://scholar.google.com/citations?hl=en;fjKA5gYAAAAJ;;https://scholar.google.de/citations?user=wLkqh5IAAAAJ;4_VyBTsAAAAJ;XjNu7-AAAAAJ;;https://scholar.google.com.hk/citations?view_op=list_works;https://scholar.google.ca/citations?user=VvyLuhAAAAAJ;wQqjHlIAAAAJ;;zzqATFsAAAAJ;9T58m-EAAAAJ;https://scholar.google.com/citations?hl=en;;;;zHyj8zAAAAAJ;rP_9IY8AAAAJ;;https://scholar.google.nl/citations?user=DLCKZqUAAAAJ;https://scholar.google.com.sg/citations?user=2R-cOkYAAAAJ;https://scholar.google.co.uk/citations?user=nhL9PHwAAAAJ;;IFV_RdMAAAAJ;https://scholar.google.co.kr/citations?user=Yp3Cz5AAAAAJ;https://scholar.google.com/citations?hl=en;XTenHs0AAAAJ;https://scholar.google.com.tw/citations?user=a832IIMAAAAJ;HFiBSkgAAAAJ;https://scholar.google.de/citations?user=G-EGfLEAAAAJ;bf2Ebj4AAAAJ;sH2a0nkAAAAJ", "orcid": "0000-0002-7438-8365;0000-0001-7700-792X;;0000-0003-3242-7263;0000-0002-6639-8547;0000-0003-0497-3902;0000-0002-2138-6016;0000-0002-8510-3716;;;;0000-0002-7251-9093;0000-0002-4896-148X;0000-0001-9480-3434;;;0000-0002-0629-1879;0009-0008-1032-6318;;0000-0002-8825-7817;0000-0002-1837-1454;0000-0002-4499-759X;;;;0000-0002-2872-1526;0000-0001-8594-303X;;0000-0001-8295-2520;0000-0001-8455-6396;0000-0002-5931-4907;0000-0002-3168-4922;0000-0003-2620-1479", "linkedin": "federicoberto/;;;;yiningma/;fanchen-bu-1268a1255/;;;;hyeok9855/;nayeligastzepeda/;;;jieyi-bi-9003b5292/;;;hyeonahkimm/;jiwoo-son-303b31284/;;;wouterkool/;;;joungho-kim-3280b1a4/;;kijungshin/;cathywu/;;;chkwon/;kevinbtierney/;lin-xie-b9a157b0/;", "or_profile": "~Federico_Berto1;~Chuanbo_Hua1;~Junyoung_Park1;~Laurin_Luttmann1;~Yining_Ma1;~Fanchen_Bu1;~Jiarui_Wang3;~Haoran_Ye1;~Minsu_Kim2;~Sanghyeok_Choi1;~Nayeli_Gast_Zepeda1;~Andr\u00e9_Hottung1;~Jianan_Zhou1;~Jieyi_Bi1;~Yu_Hu11;~Fei_Liu14;~Hyeonah_Kim1;~Jiwoo_Son2;~Haeyeon_Kim1;~Davide_Angioni1;~Wouter_Kool1;~Zhiguang_Cao1;~Qingfu_Zhang1;~Joungho_Kim1;~Jie_Zhang9;~Kijung_Shin2;~Cathy_Wu1;~Sungsoo_Ahn1;~Guojie_Song1;~Changhyun_Kwon1;~Kevin_Tierney1;~Lin_Xie1;~Jinkyoo_Park1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;;Leuphana Universit\u00e4t L\u00fcneburg;Nanyang Technological University;Korea Advanced Institute of Science & Technology;Soochow University;Suzhou University;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Universit\u00e4t Bielefeld;Bielefeld University;Nanyang Technological University;Nanyang Technological University;;;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;University of Brescia;ORTEC;Singapore Management University;City University of Hong Kong;Korea Advanced Institute of Science & Technology;Nanyang Technological University;Korea Advanced Institute of Science & Technology;Massachusetts Institute of Technology;Pohang University of Science and Technology;Peking University;Omelet, Inc.;Bielefeld University;University of Twente;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr;;leuphana.de;ntu.edu.sg;kaist.ac.kr;suda.edu.cn;suda.edu.cn;kaist.ac.kr;kaist.ac.kr;uni-bielefeld.de;uni-bielefeld.de;ntu.edu.sg;ntu.edu.sg;;;kaist.edu;kaist.ac.kr;kaist.ac.kr;unibs.it;ortec.com;smu.edu.sg;cityu.edu.hk;kaist.ac.kr;ntu.edu.sg;kaist.ac.kr;mit.edu;postech.ac.kr;pku.edu.cn;omelet.ai;uni-bielefeld.de;utwente.nl;kaist.ac.kr", "position": "PhD student;PhD student;;PhD student;Research Fellow;PhD student;Undergrad student;Undergrad student;PhD student;MS student;PhD student;Postdoc;PhD student;PhD student;;;PhD student;MS student;PhD student;PhD student;OR Engineer;Assistant Professor;Full Professor;Full Professor;Full Professor;Associate Professor;Assistant Professor;Assistant Professor;Associate Professor;Researcher;Full Professor;Assistant Professor;Associate Professor", "bibtex": "@misc{\nanonymous2024rlco,\ntitle={{RL}4{CO}: an Extensive Reinforcement Learning for Combinatorial Optimization Benchmark},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=Becrgm5xAq}\n}", "github": "", "project": "", "reviewers": "su7y;hVj8;Vcfw;7ic3", "site": "https://openreview.net/forum?id=Becrgm5xAq", "pdf_size": 2083678, "rating": "4;6;7;7", "confidence": "3;4;4;3", "wc_summary_and_contributions": "81;27;142;76", "wc_strengths": "8;30;105;46", "wc_improvement": "1;13;75;114", "wc_limitations": "232;7;77;1", "wc_correctness": "1;78;32;1", "wc_clarity": "1;15;87;5", "wc_relation_to_prior_work": "1;7;6;1", "wc_documentation": "1;10;87;3", "wc_additional_feedback": "1;1;1;1", "wc_review": "327;188;612;248", "wc_reply_reviewers": "319;316;0;9", "wc_reply_authors": "1642;1246;328;24", "reply_reviewers": "4;3;0;1", "reply_authors": "5;4;2;2", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "wc_summary_and_contributions_avg": [ 81.5, 40.80747480548141 ], "wc_strengths_avg": [ 47.25, 35.967867604293694 ], "wc_improvement_avg": [ 50.75, 46.06720634030243 ], "wc_limitations_avg": [ 79.25, 93.11384161337132 ], "wc_correctness_avg": [ 28.0, 31.51983502494897 ], "wc_clarity_avg": [ 27.0, 35.014282800023196 ], "wc_relation_to_prior_work_avg": [ 3.75, 2.7726341266023544 ], "wc_documentation_avg": [ 25.25, 35.80764583158184 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 343.75, 162.53057404685433 ], "wc_reply_reviewers_avg": [ 161.0, 156.53593836560344 ], "wc_reply_authors_avg": [ 810.0, 658.1109328980943 ], "reply_reviewers_avg": [ 2.0, 1.5811388300841898 ], "reply_authors_avg": [ 3.25, 1.299038105676658 ], "replies_avg": [ 41, 0 ], "authors#_avg": [ 33, 0 ], "corr_rating_confidence": 0.40824829046386296, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12686896239119847413&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;1;2;0;3;4;0;0;5;6;2;2;0;0;0;7;8;9;10;0;2;0;11;12;13;14;6;15;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Leuphana University L\u00fcneburg;Nanyang Technological University;Soochow University;Suzhou University;Universit\u00e4t Bielefeld;Bielefeld University;University of Brescia;ORTEC;Singapore Management University;City University of Hong Kong;Massachusetts Institute of Technology;Pohang University of Science and Technology;Peking University;Omelet, Inc.;University of Twente", "aff_unique_dep": ";;;;;;;;;;;;;;;", "aff_unique_url": "https://www.kaist.ac.kr;https://www.leuphana.de;https://www.ntu.edu.sg;https://www.soochow.edu.cn;https://www.suda.edu.cn;https://www.uni-bielefeld.de/;https://www.uni-bielefeld.de/;https://www.unibs.it;https://www.ortec.com;https://www.smu.edu.sg;https://www.cityu.edu.hk;https://web.mit.edu;https://www.postech.ac.kr;http://www.pku.edu.cn;;https://www.utwente.nl", "aff_unique_abbr": "KAIST;Leuphana;NTU;Soochow U;Suda;Uni Bielefeld;Uni Bielefeld;UNIBS;;SMU;CityU;MIT;POSTECH;Peking U;;UT", "aff_campus_unique_index": "1;2;3", "aff_campus_unique": ";L\u00fcneburg;Hong Kong SAR;Pohang", "aff_country_unique_index": "0;0;1;2;0;3;3;0;0;1;1;2;2;0;0;0;4;5;2;3;0;2;0;6;0;3;6;1;5;0", "aff_country_unique": "South Korea;Germany;Singapore;China;Italy;Netherlands;United States" }, { "title": "Causal Inference in the Closed-Loop: Marginal Structural Models for Sequential Excursion Effects", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96183", "id": "BgZcuEsYU8", "proceeding": "", "pdf": "https://openreview.net/pdf?id=BgZcuEsYU8", "openreview": "https://openreview.net/forum?id=BgZcuEsYU8", "poster": "", "project": "", "author_site": "Alexander Levis, Gabriel Loewinger, Francisco Pereira", "tldr": "", "abstract": "Optogenetics is widely used to study the effects of neural circuit manipulation on behavior. However, the paucity of causal inference methodological work on this topic has resulted in analysis conventions that discard information, and constrain the scientific questions that can be posed. To fill this gap, we introduce a nonparametric causal inference framework for analyzing \"closed-loop\" designs, which use dynamic policies that assign treatment based on covariates. In this setting, standard methods can introduce bias and occlude causal effects. Building on the sequentially randomized experiments literature in causal inference, our approach extends history-restricted marginal structural models for dynamic regimes. In practice, our framework can identify a wide range of causal effects of optogenetics on trial-by-trial behavior, such as, fast/slow-acting, dose-response, additive/antagonistic, and floor/ceiling. Importantly, it does so without requiring negative controls, and can estimate how causal effect magnitudes evolve across time points. From another view, our work extends \"excursion effect\" methods---popular in the mobile health literature---to enable estimation of causal contrasts for treatment sequences greater than length one, in the presence of positivity violations. We derive rigorous statistical guarantees, enabling hypothesis testing of these causal effects. We demonstrate our approach on data from a recent study of dopaminergic activity on learning, and show how our method reveals relevant effects obscured in standard analyses.", "keywords": "marginal structural models;optogenetics;excursion effects;neuroscience;dynamic treatment regimes;micro-randomized trials;sequentially randomized experiments", "primary_area": "causal_inference", "supplementary_material": "", "author": "Alexander W. Levis;Gabriel Loewinger;Francisco Pereira", "authorids": "~Alexander_W._Levis1;~Gabriel_Loewinger1;~Francisco_Pereira1", "gender": "M;M;M", "homepage": "https://www.awlevis.com;https://www.gabeloewinger.com/;http://www.franciscopereira.org", "dblp": ";;73/5236", "google_scholar": "T-FpnSYAAAAJ;ZGBjoDsAAAAJ;HpbSzssAAAAJ", "orcid": ";0000-0002-0755-8520;", "linkedin": ";gabrielloewinger/;francisco-pereira-35735a7/", "or_profile": "~Alexander_W._Levis1;~Gabriel_Loewinger1;~Francisco_Pereira1", "aff": "Carnegie Mellon University;National Institutes of Health;National Institute of Mental Health", "aff_domain": "cmu.edu;nih.gov;nih.gov", "position": "Postdoc;Researcher;Staff Scientist", "bibtex": "@inproceedings{\nlevis2024causal,\ntitle={Causal Inference in the Closed-Loop: Marginal Structural Models for Sequential Excursion Effects},\nauthor={Alexander W. Levis and Gabriel Loewinger and Francisco Pereira},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=BgZcuEsYU8}\n}", "github": "", "reviewers": "pQ5n;z4yk;manL", "pdf_size": 1260730, "rating": "4;5;7", "confidence": "2;2;2", "soundness": "3;3;4", "novelty": "3;2;3", "presentation": "3;2;3", "wc_summary": "61;116;68", "wc_strengths": "13;77;60", "wc_weaknesses": "28;318;54", "wc_questions": "99;28;24", "wc_limitations": "1;9;1", "wc_review": "202;548;207", "wc_reply_reviewers": "0;19;6", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;1;1", "reply_authors": "1;1;1", "rating_avg": [ 5.333333333333333, 1.247219128924647 ], "confidence_avg": [ 2.0, 0.0 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 81.66666666666667, 24.44494948973214 ], "wc_strengths_avg": [ 50.0, 27.067816067549053 ], "wc_weaknesses_avg": [ 133.33333333333334, 131.00975366573115 ], "wc_questions_avg": [ 50.333333333333336, 34.451253807211266 ], "wc_limitations_avg": [ 3.6666666666666665, 3.7712361663282534 ], "wc_review_avg": [ 319.0, 161.94031822454428 ], "wc_reply_reviewers_avg": [ 8.333333333333334, 7.93025150224688 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5134915768144345225&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "cmu.edu;nih.gov;nih.gov", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Carnegie Mellon University;National Institutes of Health;National Institute of Mental Health", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cmu.edu;https://www.nih.gov;https://www.nimh.nih.gov", "aff_unique_abbr": "CMU;NIH;NIMH", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Contracting with a Learning Agent", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96182", "id": "Bh0LLUp8OA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Bh0LLUp8OA", "openreview": "https://openreview.net/forum?id=Bh0LLUp8OA", "poster": "/media/PosterPDFs/NeurIPS%202024/96182.png?t=1731265193.4168618", "project": "", "author_site": "Guru Guruganesh, Yoav Kolumbus, Jon Schneider, Inbal Talgam-Cohen, Emmanouil-Vasileios Vlatakis-Gkaragkounis, Joshua Wang, S. Weinberg", "tldr": "", "abstract": "Real-life contractual relations typically involve repeated interactions between the principal and agent, where, despite theoretical appeal, players rarely use complex dynamic strategies and instead manage uncertainty through learning algorithms.\n\nIn this paper, we initiate the study of repeated contracts with learning agents, focusing on those achieving no-regret outcomes. For the canonical setting where the agent\u2019s actions result in success or failure, we present a simple, optimal solution for the principal: Initially provide a linear contract with scalar $\\alpha > 0$, then switch to a zero-scalar contract. This shift causes the agent to \u201cfree-fall\u201d through their action space, yielding non-zero rewards for the principal at zero cost. Interestingly, despite the apparent exploitation, there are instances where our dynamic contract can make \\emph{both} players better off compared to the best static contract. \n\nWe then broaden the scope of our results to general linearly-scaled contracts, and, finally, to the best of our knowledge, we provide the first analysis of optimization against learning agents with uncertainty about the time horizon.", "keywords": "Contract Theory;Learning;No-Regret Learning;Mean-Based Learners", "primary_area": "algorithmic_game_theory", "supplementary_material": "/attachment/901141f29189ce584d6a2f2f3bef138444de42c4.zip", "author": "Guru Guruganesh;Yoav Kolumbus;Jon Schneider;Inbal Talgam-Cohen;Emmanouil-Vasileios Vlatakis-Gkaragkounis;Joshua Ruizhi Wang;S. Matthew Weinberg", "authorids": "~Guru_Guruganesh1;~Yoav_Kolumbus1;~Jon_Schneider1;~Inbal_Talgam-Cohen2;~Emmanouil-Vasileios_Vlatakis-Gkaragkounis1;~Joshua_Ruizhi_Wang1;~S._Matthew_Weinberg1", "gender": ";;M;F;M;;M", "homepage": ";;https://jschnei.github.io;http://www.inbaltalgam.com/;http://www.cs.columbia.edu/~emvlatakis/;https://sites.google.com/site/joshw0/;https://www.cs.princeton.edu/~smattw/", "dblp": "153/2209;;146/0503;07/8319;251/8372;;52/2474", "google_scholar": ";;Jc97EyAAAAAJ;R1YK5BsAAAAJ;MKutDKcAAAAJ;u7-FFAsAAAAJ;https://scholar.google.com.tw/citations?user=CBUpEcQAAAAJ", "orcid": ";;;;;;", "linkedin": ";;;;;;", "or_profile": "~Guru_Guruganesh1;~Yoav_Kolumbus1;~Jon_Schneider1;~Inbal_Talgam-Cohen2;~Emmanouil-Vasileios_Vlatakis-Gkaragkounis1;~Joshua_Ruizhi_Wang1;~S._Matthew_Weinberg1", "aff": ";;Google;Tel Aviv University;University of California, Berkeley;Google Research;Princeton University", "aff_domain": ";;google.com;tau.ac.il;berkeley.edu;google.com;princeton.edu", "position": ";;Researcher;Assistant Professor;Postdoc;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nguruganesh2024contracting,\ntitle={Contracting with a Learning Agent},\nauthor={Guru Guruganesh and Yoav Kolumbus and Jon Schneider and Inbal Talgam-Cohen and Emmanouil-Vasileios Vlatakis-Gkaragkounis and Joshua Ruizhi Wang and S. Matthew Weinberg},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Bh0LLUp8OA}\n}", "github": "", "reviewers": "8ua3;x3aE;caMQ;HskF", "pdf_size": 1049647, "rating": "6;6;7;7", "confidence": "3;4;3;4", "soundness": "3;3;4;4", "novelty": "3;3;3;3", "presentation": "3;4;4;4", "wc_summary": "104;84;139;81", "wc_strengths": "49;63;21;50", "wc_weaknesses": "14;273;44;22", "wc_questions": "164;10;1;118", "wc_limitations": "1;1;1;1", "wc_review": "332;431;206;272", "wc_reply_reviewers": "13;10;10;10", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 102.0, 23.119256043393783 ], "wc_strengths_avg": [ 45.75, 15.31951369985353 ], "wc_weaknesses_avg": [ 88.25, 107.22960178980429 ], "wc_questions_avg": [ 73.25, 69.74731177615378 ], "wc_limitations_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 310.25, 82.74169142578607 ], "wc_reply_reviewers_avg": [ 10.75, 1.299038105676658 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11238448413930215494&as_sdt=5,24&sciodt=0,24&hl=en", "gs_version_total": 6, "email": ";;google.com;tau.ac.il;berkeley.edu;google.com;princeton.edu", "author_num": 7, "aff_unique_index": "0;1;2;0;3", "aff_unique_norm": "Google;Tel Aviv University;University of California, Berkeley;Princeton University", "aff_unique_dep": "Google;;;", "aff_unique_url": "https://www.google.com;https://www.tau.ac.il;https://www.berkeley.edu;https://www.princeton.edu", "aff_unique_abbr": "Google;TAU;UC Berkeley;Princeton", "aff_campus_unique_index": "0;2;0", "aff_campus_unique": "Mountain View;;Berkeley", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "United States;Israel" }, { "title": "Truncated Variance Reduced Value Iteration", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96181", "id": "BiikUm6pLu", "proceeding": "", "pdf": "https://openreview.net/pdf?id=BiikUm6pLu", "openreview": "https://openreview.net/forum?id=BiikUm6pLu", "poster": "", "project": "", "author_site": "Yujia Jin, Ishani Karmarkar, Aaron Sidford, Jiayi Wang", "tldr": "", "abstract": "We provide faster randomized algorithms for computing an $\\epsilon$-optimal policy in a discounted Markov decision process with $A_{\\text{tot}}$-state-action pairs, bounded rewards, and discount factor $\\gamma$. We provide an $\\tilde{O}(A_{\\text{tot}}[(1 - \\gamma)^{-3}\\epsilon^{-2} + (1 - \\gamma)^{-2}])$-time algorithm in the sampling setting, where the probability transition matrix is unknown but accessible through a generative model which can be queried in $\\tilde{O}(1)$-time, and an $\\tilde{O}(s + (1-\\gamma)^{-2})$-time algorithm in the offline setting where the probability transition matrix is known and $s$-sparse. These results improve upon the prior state-of-the-art which either ran in $\\tilde{O}(A_{\\text{tot}}[(1 - \\gamma)^{-3}\\epsilon^{-2} + (1 - \\gamma)^{-3}])$ time [Sidford, Wang, Wu, Ye 2018] in the sampling setting, $\\tilde{O}(s + A_{\\text{tot}} (1-\\gamma)^{-3})$ time [Sidford, Wang, Wu, Yang, Ye 2018] in the offline setting, or time at least quadratic in the number of states using interior point methods for linear programming. We achieve our results by building upon prior stochastic variance-reduced value iteration methods [Sidford, Wang, Wu, Yang, Ye 2018]. We provide a variant that carefully truncates the progress of its iterates to improve the variance of new variance-reduced sampling procedures that we introduce to implement the steps. Our method is essentially model-free and can be implemented in $\\tilde{O}(A_{\\text{tot}})$-space when given generative model access. Consequently, our results take a step in closing the sample-complexity gap between model-free and model-based methods.", "keywords": "Markov Decision Processes (MDP);discounted MDP;value iteration;variance reduction", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Yujia Jin;Ishani Karmarkar;Aaron Sidford;Jiayi Wang", "authorids": "~Yujia_Jin1;~Ishani_Karmarkar1;~Aaron_Sidford1;~Jiayi_Wang17", "gender": "F;F;;", "homepage": "https://web.stanford.edu/~yujiajin/;https://ishanikarmarkar.github.io/;;", "dblp": "https://dblp.uni-trier.de/pers/hd/j/Jin:Yujia;350/0555;;", "google_scholar": "XTncVoQAAAAJ;https://scholar.google.ch/citations?user=yg8x6wsAAAAJ;;https://scholar.google.com/citations?view_op=list_works", "orcid": ";;;", "linkedin": ";;;jiayi-joyee-wang-42622b236/", "or_profile": "~Yujia_Jin1;~Ishani_Karmarkar1;~Aaron_Sidford1;~Jiayi_Wang17", "aff": "Stanford University;Stanford University;;Stanford University", "aff_domain": "stanford.edu;stanford.edu;;stanford.edu", "position": "PhD student;PhD student;;PhD student", "bibtex": "@inproceedings{\njin2024truncated,\ntitle={Truncated Variance Reduced Value Iteration},\nauthor={Yujia Jin and Ishani Karmarkar and Aaron Sidford and Jiayi Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=BiikUm6pLu}\n}", "github": "", "reviewers": "fJN4;r2NT;ENzu;FfgG", "pdf_size": 454382, "rating": "5;7;7;8", "confidence": "3;3;3;4", "soundness": "3;3;3;4", "novelty": "3;3;3;4", "presentation": "2;2;3;3", "wc_summary": "126;88;113;91", "wc_strengths": "77;27;78;126", "wc_weaknesses": "40;51;33;72", "wc_questions": "24;165;230;2", "wc_limitations": "2;1;5;1", "wc_review": "269;332;459;292", "wc_reply_reviewers": "0;16;14;36", "wc_reply_authors": "0;0;0;38", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;2", "rating_avg": [ 6.75, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 104.5, 15.724185193516387 ], "wc_strengths_avg": [ 77.0, 35.00714212842859 ], "wc_weaknesses_avg": [ 49.0, 14.747881203752625 ], "wc_questions_avg": [ 105.25, 95.3870405243815 ], "wc_limitations_avg": [ 2.25, 1.6393596310755 ], "wc_review_avg": [ 338.0, 73.40640299047489 ], "wc_reply_reviewers_avg": [ 16.5, 12.835497652993435 ], "wc_reply_authors_avg": [ 9.5, 16.454482671904334 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.6622661785325219, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5835195931998388979&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "stanford.edu;stanford.edu;;stanford.edu", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Tangent Space Causal Inference: Leveraging Vector Fields for Causal Discovery in Dynamical Systems", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96180", "id": "Bj2CpB9Dey", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Bj2CpB9Dey", "openreview": "https://openreview.net/forum?id=Bj2CpB9Dey", "poster": "", "project": "", "author_site": "Kurt Butler, Daniel Waxman, Petar Djuric", "tldr": "", "abstract": "Causal discovery with time series data remains a challenging yet increasingly important task across many scientific domains. Convergent cross mapping (CCM) and related methods have been proposed to study time series that are generated by dynamical systems, where traditional approaches like Granger causality are unreliable. However, CCM often yields inaccurate results depending upon the quality of the data. We propose the Tangent Space Causal Inference (TSCI) method for detecting causalities in dynamical systems. TSCI works by considering vector fields as explicit representations of the systems' dynamics and checks for the degree of synchronization between the learned vector fields. The TSCI approach is model-agnostic and can be used as a drop-in replacement for CCM and its generalizations. We first present a basic version of the TSCI algorithm, which is shown to be more effective than the basic CCM algorithm with very little additional computation. We additionally present augmented versions of TSCI that leverage the expressive power of latent variable models and deep learning. We validate our theory on standard systems, and we demonstrate improved causal inference performance across a number of benchmark tasks.", "keywords": "causal discovery;convergent cross mapping;manifolds;dynamical systems;differential geometry", "primary_area": "causal_inference", "supplementary_material": "", "author": "Kurt Butler;Daniel Waxman;Petar Djuric", "authorids": "~Kurt_Butler1;~Daniel_Waxman1;~Petar_Djuric1", "gender": "M;M;M", "homepage": "https://sites.google.com/view/kurt-butler/home;https://danwaxman.github.io;https://sites.google.com/stonybrook.edu/petardjuric/", "dblp": ";259/8452-2;", "google_scholar": "XefBY5QAAAAJ;https://scholar.google.com/citations?hl=en;", "orcid": "0000-0002-1520-4909;0009-0004-0168-5547;", "linkedin": ";;", "or_profile": "~Kurt_Butler1;~Daniel_Waxman1;~Petar_Djuric1", "aff": "State University of New York at Stony Brook;State University of New York at Stony Brook;State University of New York at Stony Brook", "aff_domain": "stonybrook.edu;stonybrook.edu;stonybrook.edu", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nbutler2024tangent,\ntitle={Tangent Space Causal Inference: Leveraging Vector Fields for Causal Discovery in Dynamical Systems},\nauthor={Kurt Butler and Daniel Waxman and Petar Djuric},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Bj2CpB9Dey}\n}", "github": "", "reviewers": "VHEe;kcd7;rcKb;htpT", "pdf_size": 1290503, "rating": "4;5;6;7", "confidence": "3;3;4;3", "soundness": "2;2;3;4", "novelty": "2;2;3;4", "presentation": "2;3;4;3", "wc_summary": "91;75;96;154", "wc_strengths": "39;80;71;24", "wc_weaknesses": "253;232;369;125", "wc_questions": "20;96;311;66", "wc_limitations": "15;45;23;17", "wc_review": "418;528;870;386", "wc_reply_reviewers": "244;120;30;189", "wc_reply_authors": "260;594;30;220", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 104.0, 29.891470355270247 ], "wc_strengths_avg": [ 53.5, 22.85278976405288 ], "wc_weaknesses_avg": [ 244.75, 86.61516899481292 ], "wc_questions_avg": [ 123.25, 111.72594819467857 ], "wc_limitations_avg": [ 25.0, 11.916375287812984 ], "wc_review_avg": [ 550.5, 191.83521574518065 ], "wc_reply_reviewers_avg": [ 145.75, 79.97616832532051 ], "wc_reply_authors_avg": [ 276.0, 203.1206537996567 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.2581988897471611, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:lKARhsvJ-ZwJ:scholar.google.com/&scioq=Tangent+Space+Causal+Inference:+Leveraging+Vector+Fields+for+Causal+Discovery+in+Dynamical+Systems&hl=en&as_sdt=0,48", "gs_version_total": 4, "email": "stonybrook.edu;stonybrook.edu;stonybrook.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "State University of New York at Stony Brook", "aff_unique_dep": "", "aff_unique_url": "https://www.stonybrook.edu", "aff_unique_abbr": "SUNY Stony Brook", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stony Brook", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Effective Exploration Based on the Structural Information Principles", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96179", "id": "Bjh4mcYs20", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Bjh4mcYs20", "openreview": "https://openreview.net/forum?id=Bjh4mcYs20", "poster": "/media/PosterPDFs/NeurIPS%202024/96179.png?t=1729406739.2918522", "project": "", "author_site": "Xianghua Zeng, Hao Peng, Angsheng Li", "tldr": "", "abstract": "Traditional information theory provides a valuable foundation for Reinforcement Learning (RL), particularly through representation learning and entropy maximiza tion for agent exploration. However, existing methods primarily concentrate on modeling the uncertainty associated with RL\u2019s random variables, neglecting the in herent structure within the state and action spaces. In this paper, we propose a novel Structural Information principles-based Effective Exploration framework, namely SI2E. Structural mutual information between two variables is defined to address the single-variable limitation in structural information, and an innovative embedding principle is presented to capture dynamics-relevant state-action representations. The SI2E analyzes value differences in the agent\u2019s policy between state-action pairs and minimizes structural entropy to derive the hierarchical state-action struc ture, referred to as the encoding tree. Under this tree structure, value-conditional structural entropy is defined and maximized to design an intrinsic reward mechanism that avoids redundant transitions and promotes enhanced coverage in the state-action space. Theoretical connections are established between SI2E and classical information-theoretic methodologies, highlighting our framework\u2019s rationality and advantage. Comprehensive evaluations in the MiniGrid, MetaWorld, and DeepMind Control Suite benchmarks demonstrate that SI2E significantly outperforms state-of-the-art exploration baselines regarding final performance and sample efficiency, with maximum improvements of 37.63% and 60.25%, respectively.", "keywords": "Deep Reinforcement Learning;Structural Information;Representation Learning;Entropy Maximization Exploration", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/b6161453d9c47d4aade45c053c0b1d1d3e30018b.zip", "author": "Xianghua Zeng;Hao Peng;Angsheng Li", "authorids": "~Xianghua_Zeng1;~Hao_Peng7;~Angsheng_Li1", "gender": "M;M;M", "homepage": ";https://penghao-bdsc.github.io/;https://scse.buaa.edu.cn/info/1078/5403.htm", "dblp": "165/5653;69/7742-1;66/4917.html", "google_scholar": "PZ3jhoMAAAAJ;R25rbyQAAAAJ;", "orcid": "0000-0003-0717-5623;0000-0003-0458-5977;0000-0001-7242-0803", "linkedin": ";;", "or_profile": "~Xianghua_Zeng1;~Hao_Peng7;~Angsheng_Li1", "aff": "Beihang University;Beihang University;Beihang University", "aff_domain": "buaa.edu.cn;buaa.edu.cn;buaa.edu.cn", "position": "PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nzeng2024effective,\ntitle={Effective Exploration Based on the Structural Information Principles},\nauthor={Xianghua Zeng and Hao Peng and Angsheng Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Bjh4mcYs20}\n}", "github": "", "reviewers": "6Ten;sR8b;XMx1;i1kZ", "pdf_size": 7849659, "rating": "4;5;6;7", "confidence": "3;3;2;4", "soundness": "3;2;3;4", "novelty": "2;2;3;3", "presentation": "3;1;3;3", "wc_summary": "115;32;49;131", "wc_strengths": "51;32;38;173", "wc_weaknesses": "99;384;85;74", "wc_questions": "32;397;69;5", "wc_limitations": "13;25;30;1", "wc_review": "310;870;271;384", "wc_reply_reviewers": "0;135;505;0", "wc_reply_authors": "340;2170;1729;0", "reply_reviewers": "0;1;3;0", "reply_authors": "3;6;6;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 81.75, 42.06765384472968 ], "wc_strengths_avg": [ 73.5, 57.85542325486868 ], "wc_weaknesses_avg": [ 160.5, 129.34160196935863 ], "wc_questions_avg": [ 125.75, 158.24565554858054 ], "wc_limitations_avg": [ 17.25, 11.233320969330485 ], "wc_review_avg": [ 458.75, 240.87898932866685 ], "wc_reply_reviewers_avg": [ 160.0, 206.67002685440383 ], "wc_reply_authors_avg": [ 1059.75, 911.2711931691905 ], "reply_reviewers_avg": [ 1.0, 1.224744871391589 ], "reply_authors_avg": [ 4.0, 2.1213203435596424 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3162277660168379, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8286989140670550546&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "buaa.edu.cn;buaa.edu.cn;buaa.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Beihang University", "aff_unique_dep": "", "aff_unique_url": "http://www.buaa.edu.cn/", "aff_unique_abbr": "BUAA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "FERERO: A Flexible Framework for Preference-Guided Multi-Objective Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96178", "id": "BmG3NgH5xu", "proceeding": "", "pdf": "https://openreview.net/pdf?id=BmG3NgH5xu", "openreview": "https://openreview.net/forum?id=BmG3NgH5xu", "poster": "/media/PosterPDFs/NeurIPS%202024/96178.png?t=1731533668.9534373", "project": "", "author_site": "Lisha Chen, A Saif, Yanning Shen, Tianyi Chen", "tldr": "", "abstract": "Finding specific preference-guided Pareto solutions that represent different trade-offs among multiple objectives is critical yet challenging in multi-objective problems. \nExisting methods are restrictive in preference definitions and/or their theoretical guarantees.\nIn this work, we introduce a Flexible framEwork for pREfeRence-guided multi-Objective learning (**FERERO**) by casting it as a constrained vector optimization problem.\nSpecifically, two types of preferences are incorporated into this formulation -- the *relative preference* defined by the partial ordering induced by a polyhedral cone, and the *absolute preference* defined by constraints that are linear functions of the objectives. \nTo solve this problem, convergent algorithms are developed with both single-loop and stochastic variants. \nNotably, this is the *first single-loop primal algorithm* for constrained optimization to our knowledge. \nThe proposed algorithms adaptively adjust to both constraint and objective values, eliminating the need to solve different subproblems at different stages of constraint satisfaction. \nExperiments on multiple benchmarks demonstrate the proposed method is very competitive in finding preference-guided optimal solutions.\nCode is available at https://github.com/lisha-chen/FERERO/.", "keywords": "Multi-objective Learning; Preference-Guided Learning; Constrained Vector Optimization", "primary_area": "optimization", "supplementary_material": "", "author": "Lisha Chen;A F M Saif;Yanning Shen;Tianyi Chen", "authorids": "~Lisha_Chen1;~A_F_M_Saif2;~Yanning_Shen1;~Tianyi_Chen5", "gender": "F;M;F;M", "homepage": "https://lisha-chen.github.io/;https://afmsaif.github.io;https://sites.google.com/uci.edu/yanning-shen/home;https://chentianyi1991.github.io/", "dblp": "123/6690;;120/7392.html;", "google_scholar": "fh73S6gAAAAJ;nuh1mNsAAAAJ;MfzntAIAAAAJ;kFwvv38AAAAJ", "orcid": "0000-0003-3858-5537;;;", "linkedin": ";;;", "or_profile": "~Lisha_Chen1;~A_F_M_Saif2;~Yanning_Shen1;~Tianyi_Chen5", "aff": "Rensselaer Polytechnic Institute;Rensselaer Polytechnic Institute;University of California, Irvine;Rensselaer Polytechnic Institute", "aff_domain": "rpi.edu;rpi.edu;uci.edu;rpi.edu", "position": "PhD student;PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nchen2024ferero,\ntitle={{FERERO}: A Flexible Framework for Preference-Guided Multi-Objective Learning},\nauthor={Lisha Chen and A F M Saif and Yanning Shen and Tianyi Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=BmG3NgH5xu}\n}", "github": "", "reviewers": "PTWJ;rRCm;dD8J;aduv", "pdf_size": 1209621, "rating": "5;6;6;7", "confidence": "3;2;3;5", "soundness": "2;3;3;4", "novelty": "3;2;3;3", "presentation": "2;3;3;4", "wc_summary": "68;134;98;71", "wc_strengths": "51;60;119;61", "wc_weaknesses": "348;107;117;138", "wc_questions": "133;48;280;5", "wc_limitations": "4;1;1;7", "wc_review": "604;350;615;282", "wc_reply_reviewers": "175;17;101;74", "wc_reply_authors": "459;24;40;33", "reply_reviewers": "2;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 1.0897247358851685 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 92.75, 26.52710877574109 ], "wc_strengths_avg": [ 72.75, 26.984949508939238 ], "wc_weaknesses_avg": [ 177.5, 99.07194355618547 ], "wc_questions_avg": [ 116.5, 105.03451813570622 ], "wc_limitations_avg": [ 3.25, 2.48746859276655 ], "wc_review_avg": [ 462.75, 148.75714268565392 ], "wc_reply_reviewers_avg": [ 91.75, 56.83033960834653 ], "wc_reply_authors_avg": [ 139.0, 184.8391192361617 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.6488856845230502, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9700430472878012734&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "rpi.edu;rpi.edu;uci.edu;rpi.edu", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Rensselaer Polytechnic Institute;University of California, Irvine", "aff_unique_dep": ";", "aff_unique_url": "https://www.rpi.edu;https://www.uci.edu", "aff_unique_abbr": "RPI;UCI", "aff_campus_unique_index": "1", "aff_campus_unique": ";Irvine", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Are nuclear masks all you need for improved out-of-domain generalisation? A closer look at cancer classification in histopathology", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96177", "id": "BmwcbNYkuH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=BmwcbNYkuH", "openreview": "https://openreview.net/forum?id=BmwcbNYkuH", "poster": "/media/PosterPDFs/NeurIPS%202024/96177.png?t=1733498235.8109105", "project": "", "author_site": "Dhananjay Tomar, Alexander Binder, Andreas Kleppe", "tldr": "", "abstract": "Domain generalisation in computational histopathology is challenging because the images are substantially affected by differences among hospitals due to factors like fixation and staining of tissue and imaging equipment. We hypothesise that focusing on nuclei can improve the out-of-domain (OOD) generalisation in cancer detection. We propose a simple approach to improve OOD generalisation for cancer detection by focusing on nuclear morphology and organisation, as these are domain-invariant features critical in cancer detection. Our approach integrates original images with nuclear segmentation masks during training, encouraging the model to prioritise nuclei and their spatial arrangement. Going beyond mere data augmentation, we introduce a regularisation technique that aligns the representations of masks and original images. We show, using multiple datasets, that our method improves OOD generalisation and also leads to increased robustness to image corruptions and adversarial attacks. The source code is available at https://github.com/undercutspiky/SFL/", "keywords": "Deep learning;domain generalization;histopathology;computational pathology;digital pathology;computer vision;single domain generalization", "primary_area": "machine_vision", "supplementary_material": "", "author": "Dhananjay Tomar;Alexander Binder;Andreas Kleppe", "authorids": "~Dhananjay_Tomar1;~Alexander_Binder2;~Andreas_Kleppe1", "gender": "M;M;M", "homepage": ";;https://www.mn.uio.no/ifi/english/people/aca/andrekle/index.html", "dblp": ";54/7546;", "google_scholar": ";https://scholar.google.de/citations?hl=de;", "orcid": ";0000-0001-9605-6209;0000-0002-8370-5289", "linkedin": "djay-tomar/;;", "or_profile": "~Dhananjay_Tomar1;~Alexander_Binder2;~Andreas_Kleppe1", "aff": "University of Oslo;Singapore Institute of Technology;University of Oslo", "aff_domain": "uio.no;singaporetech.edu.sg;uio.no", "position": "PhD student;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\ntomar2024are,\ntitle={Are nuclear masks all you need for improved out-of-domain generalisation? A closer look at cancer classification in histopathology},\nauthor={Dhananjay Tomar and Alexander Binder and Andreas Kleppe},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=BmwcbNYkuH}\n}", "github": "", "reviewers": "sg4y;fXPp;UomK", "pdf_size": 2574681, "rating": "5;5;5", "confidence": "5;4;3", "soundness": "2;2;1", "novelty": "2;3;2", "presentation": "2;2;2", "wc_summary": "93;81;73", "wc_strengths": "45;49;51", "wc_weaknesses": "198;211;317", "wc_questions": "41;68;42", "wc_limitations": "19;1;35", "wc_review": "396;410;518", "wc_reply_reviewers": "42;198;47", "wc_reply_authors": "0;743;0", "reply_reviewers": "1;2;1", "reply_authors": "1;3;1", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 1.6666666666666667, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 82.33333333333333, 8.219218670625303 ], "wc_strengths_avg": [ 48.333333333333336, 2.494438257849294 ], "wc_weaknesses_avg": [ 242.0, 53.29790489941107 ], "wc_questions_avg": [ 50.333333333333336, 12.498888839501783 ], "wc_limitations_avg": [ 18.333333333333332, 13.888444437333106 ], "wc_review_avg": [ 441.3333333333333, 54.51197625802568 ], "wc_reply_reviewers_avg": [ 95.66666666666667, 72.38937920870866 ], "wc_reply_authors_avg": [ 247.66666666666666, 350.25355894773656 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:kZFmwkVNdh0J:scholar.google.com/&scioq=Are+nuclear+masks+all+you+need+for+improved+out-of-domain+generalisation%3F+A+closer+look+at+cancer+classification+in+histopathology&hl=en&as_sdt=0,5", "gs_version_total": 6, "email": "uio.no;singaporetech.edu.sg;uio.no", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Oslo;Singapore Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.uio.no;https://www.singaporetech.edu.sg", "aff_unique_abbr": "UiO;SIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Norway;Singapore" }, { "title": "Clustering then Propagation: Select Better Anchors for Knowledge Graph Embedding", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96176", "id": "BpJ6OTfWw3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=BpJ6OTfWw3", "openreview": "https://openreview.net/forum?id=BpJ6OTfWw3", "poster": "", "project": "", "author_site": "KE LIANG, Yue Liu, Hao Li, Lingyuan Meng, Suyuan Liu, Siwei Wang, sihang zhou, Xinwang Liu", "tldr": "", "abstract": "Traditional knowledge graph embedding (KGE) models map entities and relations to unique embedding vectors in a shallow lookup manner. As the scale of data becomes larger, this manner will raise unaffordable computational costs. Anchor-based strategies have been treated as effective ways to alleviate such efficiency problems by propagation on representative entities instead of the whole graph. However, most existing anchor-based KGE models select the anchors in a primitive manner, which limits their performance. To this end, we propose a novel anchor-based strategy for KGE, i.e., a relational clustering-based anchor selection strategy (RecPiece), where two characteristics are leveraged, i.e., (1) representative ability of the cluster centroids and (2) descriptive ability of relation types in KGs. Specifically, we first perform clustering over features of factual triplets instead of entities, where cluster number is naturally set as number of relation types since each fact can be characterized by its relation in KGs. Then, representative triplets are selected around the clustering centroids, further mapped into corresponding anchor entities. Extensive experiments on six datasets show that RecPiece achieves higher performances but comparable or even fewer parameters compared to previous anchor-based KGE models, indicating that our model can select better anchors in a more scalable way.", "keywords": "Anchor-based Method;Clustering;Large Scale;Knowledge Graph Embedding", "primary_area": "learning_theory", "supplementary_material": "", "author": "KE LIANG;Yue Liu;Hao Li;Lingyuan Meng;Suyuan Liu;Siwei Wang;sihang zhou;Xinwang Liu", "authorids": "~KE_LIANG1;~Yue_Liu10;~Hao_Li44;~Lingyuan_Meng1;~Suyuan_Liu1;~Siwei_Wang4;~sihang_zhou1;~Xinwang_Liu1", "gender": "M;M;M;M;M;M;M;M", "homepage": "https://liangke23.github.io/;https://yueliu1999.github.io/;https://github.com/LIANGKE23/Awesome-Knowledge-Graph-Reasoning;;https://tracesource.github.io//;https://wangsiwei2010.github.io/;;https://xinwangliu.github.io/", "dblp": "48/73-6;74/1932-8;;273/8753;227/5361;51/8279-1;;45/6569-2.html", "google_scholar": "gwea2McAAAAJ;5tfpu3MAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=zh-CN;5o9hK3EAAAAJ;_cbTMjIAAAAJ;A56vWC4AAAAJ", "orcid": "0000-0002-4837-455X;;;0000-0002-2489-573X;0000-0003-1481-5393;0000-0001-9517-262X;0000-0003-1491-4594;", "linkedin": ";;;;;;;", "or_profile": "~KE_LIANG1;~Yue_Liu10;~Hao_Li44;~Lingyuan_Meng1;~Suyuan_Liu1;~Siwei_Wang4;~sihang_zhou1;~Xinwang_Liu1", "aff": "National University of Defense Technology;University of Illinois, Urbana Champaign;National University of Defense Technology;National University of Defense Technology;National University of Defense Technology;Intelligent Game and Decision Lab;National University of Defense Technology;National University of Defense Technology", "aff_domain": "nudt.edu.cn;uiuc.edu;nudt.edu.cn;nudt.edu.cn;nudt.edu.cn;nudt.edu.cn;nudt.edu.cn;nudt.edu.cn", "position": "PhD student;Intern;MS student;MS student;PhD student;Assistant Professor;Lecturer;Full Professor", "bibtex": "@inproceedings{\nliang2024clustering,\ntitle={Clustering then Propagation: Select Better Anchors for Knowledge Graph Embedding},\nauthor={KE LIANG and Yue Liu and Hao Li and Lingyuan Meng and Suyuan Liu and Siwei Wang and sihang zhou and Xinwang Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=BpJ6OTfWw3}\n}", "github": "", "reviewers": "vJzB;z3Fe;89jh;GCgr", "pdf_size": 1143333, "rating": "6;6;7;7", "confidence": "4;4;3;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "59;100;62;56", "wc_strengths": "161;155;106;131", "wc_weaknesses": "25;48;54;80", "wc_questions": "30;51;43;60", "wc_limitations": "1;1;1;1", "wc_review": "276;355;266;328", "wc_reply_reviewers": "0;35;10;19", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 69.25, 17.879807045938723 ], "wc_strengths_avg": [ 138.25, 21.741377601246892 ], "wc_weaknesses_avg": [ 51.75, 19.57517560585345 ], "wc_questions_avg": [ 46.0, 11.022703842524301 ], "wc_limitations_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 306.25, 36.69042790701684 ], "wc_reply_reviewers_avg": [ 16.0, 12.864680330268607 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:IYZUdv7kHDwJ:scholar.google.com/&scioq=Clustering+then+Propagation:+Select+Better+Anchors+for+Knowledge+Graph+Embedding&hl=en&as_sdt=0,33", "gs_version_total": 0, "email": "nudt.edu.cn;uiuc.edu;nudt.edu.cn;nudt.edu.cn;nudt.edu.cn;nudt.edu.cn;nudt.edu.cn;nudt.edu.cn", "author_num": 8, "aff_unique_index": "0;1;0;0;0;2;0;0", "aff_unique_norm": "National University of Defense Technology;University of Illinois Urbana-Champaign;Intelligent Game and Decision Lab", "aff_unique_dep": ";;Intelligent Game and Decision Lab", "aff_unique_url": "http://www.nudt.edu.cn/;https://illinois.edu;", "aff_unique_abbr": "NUDT;UIUC;", "aff_campus_unique_index": "1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "0;1;0;0;0;0;0", "aff_country_unique": "China;United States;" }, { "title": "QWO: Speeding Up Permutation-Based Causal Discovery in LiGAMs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96175", "id": "BptJGaPn9C", "proceeding": "", "pdf": "https://openreview.net/pdf?id=BptJGaPn9C", "openreview": "https://openreview.net/forum?id=BptJGaPn9C", "poster": "", "project": "", "author_site": "Mohammad Shahverdikondori, Ehsan Mokhtarian, Negar Kiyavash", "tldr": "", "abstract": "Causal discovery is essential for understanding relationships among variables of interest in many scientific domains. In this paper, we focus on permutation-based methods for learning causal graphs in Linear Gaussian Acyclic Models (LiGAMs), where the permutation encodes a causal ordering of the variables. Existing methods in this setting are not scalable due to their high computational complexity. These methods are comprised of two main components: (i) constructing a specific DAG, $\\mathcal{G}^\\pi$, for a given permutation $\\pi$, which represents the best structure that can be learned from the available data while adhering to $\\pi$, and (ii) searching over the space of permutations (i.e., causal orders) to minimize the number of edges in $\\mathcal{G}^\\pi$. We introduce QWO, a novel approach that significantly enhances the efficiency of computing $\\mathcal{G}^\\pi$ for a given permutation $\\pi$. QWO has a speed-up of $O(n^2)$ ($n$ is the number of variables) compared to the state-of-the-art BIC-based method, making it highly scalable. We show that our method is theoretically sound and can be integrated into existing search strategies such as GRASP and hill-climbing-based methods to improve their performance.", "keywords": "causal discovery;permutation-based;linear gaussian acyclic model;DAG learning", "primary_area": "causal_inference", "supplementary_material": "/attachment/fc72906fb0b579ce728152243d8a18565b1798a0.zip", "author": "Mohammad Shahverdikondori;Ehsan Mokhtarian;Negar Kiyavash", "authorids": "~Mohammad_Shahverdikondori2;~Ehsan_Mokhtarian1;~Negar_Kiyavash1", "gender": "M;F;M", "homepage": ";https://people.epfl.ch/negar.kiyavash?lang=en;https://mohammadshahverdi.github.io/", "dblp": "276/5445;85/4976;329/4714", "google_scholar": "https://scholar.google.com/scholar?hl=en;7tBDvOwAAAAJ;https://scholar.google.ch/citations?hl=en", "orcid": ";0000-0002-8545-7709;", "linkedin": "https://linkedin.com/in/ehsanmokhtarian/;;mohammad-shahverdi-kondori/", "or_profile": "~Ehsan_Mokhtarian1;~Negar_Kiyavash1;~Mohammad_ShahverdiKondori1", "aff": "Swiss Federal Institute of Technology Lausanne;EPFL - EPF Lausanne;EPFL - EPF Lausanne", "aff_domain": "epfl.ch;epfl.ch;epfl.ch", "position": "PhD student;Full Professor;PhD student", "bibtex": "@inproceedings{\nshahverdikondori2024qwo,\ntitle={{QWO}: Speeding Up Permutation-Based Causal Discovery in Li{GAM}s},\nauthor={Mohammad Shahverdikondori and Ehsan Mokhtarian and Negar Kiyavash},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=BptJGaPn9C}\n}", "github": "", "reviewers": "Znq2;9HgY;rQrH;D3Yo", "pdf_size": 430289, "rating": "5;5;7;7", "confidence": "3;4;3;4", "soundness": "2;3;4;3", "novelty": "3;2;3;3", "presentation": "2;3;4;4", "wc_summary": "41;111;127;64", "wc_strengths": "63;68;33;80", "wc_weaknesses": "100;318;116;45", "wc_questions": "2;3;54;11", "wc_limitations": "14;1;6;34", "wc_review": "220;501;336;234", "wc_reply_reviewers": "10;0;11;13", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 85.75, 34.694199803425356 ], "wc_strengths_avg": [ 61.0, 17.30606829987678 ], "wc_weaknesses_avg": [ 144.75, 103.43445992511393 ], "wc_questions_avg": [ 17.5, 21.360009363293827 ], "wc_limitations_avg": [ 13.75, 12.577261228105266 ], "wc_review_avg": [ 322.75, 112.23051055751283 ], "wc_reply_reviewers_avg": [ 8.5, 5.024937810560445 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:jLP8zkz7x20J:scholar.google.com/&scioq=QWO:+Speeding+Up+Permutation-Based+Causal+Discovery+in+LiGAMs&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": "epfl.ch;epfl.ch;epfl.ch", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Swiss Federal Institute of Technology Lausanne;EPFL", "aff_unique_dep": ";", "aff_unique_url": "https://www.epfl.ch;https://www.epfl.ch", "aff_unique_abbr": "EPFL;EPFL", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Lausanne", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Switzerland" }, { "title": "SequentialAttention++ for Block Sparsification: Differentiable Pruning Meets Combinatorial Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96174", "id": "BrPZMOQiSN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=BrPZMOQiSN", "openreview": "https://openreview.net/forum?id=BrPZMOQiSN", "poster": "/media/PosterPDFs/NeurIPS%202024/96174.png?t=1733868144.016617", "project": "", "author_site": "Taisuke Yasuda, Kyriakos Axiotis, Gang Fu, Mohammadhossein Bateni, Vahab Mirrokni", "tldr": "", "abstract": "Neural network pruning is a key technique towards engineering large yet scalable, interpretable, and generalizable models. Prior work on the subject has developed largely along two orthogonal directions: (1) differentiable pruning for efficiently and accurately scoring the importance of parameters, and (2) combinatorial optimization for efficiently searching over the space of sparse models. We unite the two approaches, both theoretically and empirically, to produce a coherent framework for structured neural network pruning in which differentiable pruning guides combinatorial optimization algorithms to select the most important sparse set of parameters. Theoretically, we show how many existing differentiable pruning techniques can be understood as nonconvex regularization for group sparse optimization, and prove that for a wide class of nonconvex regularizers, the global optimum is unique, group-sparse, and provably yields an approximate solution to a sparse convex optimization problem. The resulting algorithm that we propose, SequentialAttention++, advances the state of the art in large-scale neural network block-wise pruning tasks on the ImageNet and Criteo datasets.", "keywords": "pruning;sparsification;sparse optimization;neural network", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Taisuke Yasuda;Kyriakos Axiotis;Gang Fu;Mohammadhossein Bateni;Vahab Mirrokni", "authorids": "~Taisuke_Yasuda1;~Kyriakos_Axiotis1;~Gang_Fu3;~Mohammadhossein_Bateni1;~Vahab_Mirrokni2", "gender": "M;;;M;M", "homepage": "https://taisukeyasuda.github.io/;;http://mhbateni.com/academic;https://people.csail.mit.edu/mirrokni/Welcome.html;", "dblp": "177/9741-2;176/5139;22/4739;m/VahabSMirrokni;239/8404", "google_scholar": "c62WqiEAAAAJ;Xhv2tkcAAAAJ;n4eReqMAAAAJ;opbZfw0AAAAJ;", "orcid": ";;;;", "linkedin": "taisukeyasuda/;;;;", "or_profile": "~Taisuke_Yasuda1;~Kyriakos_Axiotis1;~Mohammadhossein_Bateni1;~Vahab_Mirrokni2;~Thomas_Fu1", "aff": "School of Computer Science, Carnegie Mellon University;Google;Google;Google Research;Google Research", "aff_domain": "cs.cmu.edu;google.com;google.com;google.com;google.com", "position": "PhD student;Researcher;Research scientist;VP, Google Fellow;Researcher", "bibtex": "@inproceedings{\nyasuda2024sequentialattention,\ntitle={SequentialAttention++ for Block Sparsification: Differentiable Pruning Meets Combinatorial Optimization},\nauthor={Taisuke Yasuda and Kyriakos Axiotis and Gang Fu and Mohammadhossein Bateni and Vahab Mirrokni},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=BrPZMOQiSN}\n}", "github": "", "reviewers": "rDbF;5Nz3;2z8b", "pdf_size": 669398, "rating": "6;6;7", "confidence": "3;3;2", "soundness": "3;3;3", "novelty": "3;4;3", "presentation": "3;3;3", "wc_summary": "87;60;20", "wc_strengths": "44;28;5", "wc_weaknesses": "266;44;5", "wc_questions": "2;41;118", "wc_limitations": "33;9;1", "wc_review": "432;182;149", "wc_reply_reviewers": "18;6;5", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 55.666666666666664, 27.523727137790686 ], "wc_strengths_avg": [ 25.666666666666668, 16.006942938057293 ], "wc_weaknesses_avg": [ 105.0, 114.95216396397242 ], "wc_questions_avg": [ 53.666666666666664, 48.19635763093399 ], "wc_limitations_avg": [ 14.333333333333334, 13.59738536958076 ], "wc_review_avg": [ 254.33333333333334, 126.34960317398003 ], "wc_reply_reviewers_avg": [ 9.666666666666666, 5.90668171555645 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.9999999999999997, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13964581321402766306&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "cs.cmu.edu;google.com;google.com;google.com;google.com", "author_num": 5, "aff_unique_index": "0;1;1;1;1", "aff_unique_norm": "Carnegie Mellon University;Google", "aff_unique_dep": "School of Computer Science;Google", "aff_unique_url": "https://www.cmu.edu;https://www.google.com", "aff_unique_abbr": "CMU;Google", "aff_campus_unique_index": "0;1;1;1;1", "aff_campus_unique": "Pittsburgh;Mountain View", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Learning Equilibria in Adversarial Team Markov Games: A Nonconvex-Hidden-Concave Min-Max Optimization Problem", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96173", "id": "BrvLTxEx08", "proceeding": "", "pdf": "https://openreview.net/pdf?id=BrvLTxEx08", "openreview": "https://openreview.net/forum?id=BrvLTxEx08", "poster": "", "project": "", "author_site": "Fivos Kalogiannis, Jingming Yan, Ioannis Panageas", "tldr": "", "abstract": "We study the problem of learning a Nash equilibrium (NE) in Markov games which is a cornerstone in multi-agent reinforcement learning (MARL). In particular, we focus on infinite-horizon adversarial team Markov games (ATMGs) in which agents that share a common reward function compete against a single opponent, *the adversary*. These games unify two-player zero-sum Markov games and Markov potential games, resulting in a setting that encompasses both collaboration and competition. Kalogiannis et al. (2023) provided an efficient equilibrium computation algorithm for ATMGs which presumes knowledge of the reward and transition functions and has no sample complexity guarantees. We contribute a learning algorithm that utilizes MARL policy gradient methods with iteration and sample complexity that is polynomial in the approximation error $\\epsilon$ and the natural parameters of the ATMG, resolving the main caveats of the solution by (Kalogiannis et al., 2023). It is worth noting that previously, the existence of learning algorithms for NE was known for Markov two-player zero-sum and potential games but not for ATMGs.\n \nSeen through the lens of min-max optimization, computing a NE in these games consists a nonconvex--nonconcave saddle-point problem. Min-max optimization has received an extensive study. Nevertheless, the case of nonconvex--nonconcave landscapes remains elusive: in full generality, finding saddle-points is computationally intractable (Daskalakis et al., 2021). We circumvent the aforementioned intractability by developing techniques that exploit the hidden structure of the objective function via a nonconvex--concave reformulation. However, this introduces a challenge of a feasibility set with coupled constraints. We tackle these challenges by establishing novel techniques for optimizing weakly-smooth nonconvex functions, extending the framework of (Devolder et al., 2014).", "keywords": "MARL;Convex RL;Nash Equilibrium;Non-smooth Optimization;Minimax Optimization;Hidden Convexity;Nonconvex-nonconcave;Markov Games;Stochastic Games;Learning in Games", "primary_area": "algorithmic_game_theory", "supplementary_material": "", "author": "Fivos Kalogiannis;Jingming Yan;Ioannis Panageas", "authorids": "~Fivos_Kalogiannis1;~Jingming_Yan1;~Ioannis_Panageas1", "gender": "M;M;M", "homepage": "https://fivoskal.github.io/;https://jingming-yan.github.io/;https://panageas.github.io", "dblp": "305/7347;;139/3829", "google_scholar": "FVEj9MIAAAAJ;;5NiFWuwAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Fivos_Kalogiannis1;~Jingming_Yan1;~Ioannis_Panageas1", "aff": "University of California, Irvine;University of California, Irvine;Donald Bren School of Information and Computer Sciences, University of California, Irvine", "aff_domain": "uci.edu;uci.edu;ics.uci.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nkalogiannis2024learning,\ntitle={Learning Equilibria in Adversarial Team Markov Games: A Nonconvex-Hidden-Concave Min-Max Optimization Problem},\nauthor={Fivos Kalogiannis and Jingming Yan and Ioannis Panageas},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=BrvLTxEx08}\n}", "github": "", "reviewers": "hE5h;LYwL;mZxv;LXfR;YssW", "pdf_size": 599837, "rating": "4;4;5;5;7", "confidence": "4;4;3;2;4", "soundness": "2;3;2;2;3", "novelty": "2;2;3;3;3", "presentation": "1;3;3;3;3", "wc_summary": "97;61;23;23;125", "wc_strengths": "54;32;28;11;193", "wc_weaknesses": "305;229;287;162;210", "wc_questions": "65;270;1;2;3", "wc_limitations": "1;117;1;2;27", "wc_review": "522;709;340;200;558", "wc_reply_reviewers": "13;19;18;419;0", "wc_reply_authors": "659;170;66;462;0", "reply_reviewers": "1;1;1;3;0", "reply_authors": "3;2;2;3;0", "rating_avg": [ 5.0, 1.0954451150103321 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.8000000000000002 ], "wc_summary_avg": [ 65.8, 40.409899777158564 ], "wc_strengths_avg": [ 63.6, 66.13501341951931 ], "wc_weaknesses_avg": [ 238.6, 52.01768929892984 ], "wc_questions_avg": [ 68.2, 103.81021144376886 ], "wc_limitations_avg": [ 29.6, 44.81785358537377 ], "wc_review_avg": [ 465.8, 177.35884528266413 ], "wc_reply_reviewers_avg": [ 93.8, 162.74077546822738 ], "wc_reply_authors_avg": [ 271.4, 250.0764683051966 ], "reply_reviewers_avg": [ 1.2, 0.9797958971132713 ], "reply_authors_avg": [ 2.0, 1.0954451150103321 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9943284906277524678&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "uci.edu;uci.edu;ics.uci.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of California, Irvine", "aff_unique_dep": "", "aff_unique_url": "https://www.uci.edu", "aff_unique_abbr": "UCI", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Irvine", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Fully Unconstrained Online Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96172", "id": "BtCrHwiBHP", "proceeding": "", "pdf": "https://openreview.net/pdf?id=BtCrHwiBHP", "openreview": "https://openreview.net/forum?id=BtCrHwiBHP", "poster": "", "project": "", "author_site": "Ashok Cutkosky, Zak Mhammedi", "tldr": "", "abstract": "We provide a technique for OLO that obtains regret $G\\|w_\\star\\|\\sqrt{T\\log(\\|w_\\star\\|G\\sqrt{T})} + \\|w_\\star\\|^2 + G^2$ on $G$-Lipschitz losses for any comparison point $w_\\star$ without knowing either $G$ or $\\|w_\\star\\|$. Importantly, this matches the optimal bound $G\\|w_\\star\\|\\sqrt{T}$ available with such knowledge (up to logarithmic factors), unless either $\\|w_\\star\\|$ or $G$ is so large that even $G\\|w_\\star\\|\\sqrt{T}$ is roughly linear in $T$. Thus, at a high level it matches the optimal bound in all cases in which one can achieve sublinear regret.", "keywords": "online learning;online convex optimization;parameter-free", "primary_area": "online_learning", "supplementary_material": "", "author": "Ashok Cutkosky;Zakaria Mhammedi", "authorids": "~Ashok_Cutkosky1;~Zakaria_Mhammedi1", "gender": ";M", "homepage": "http://www.cs.stanford.edu/~ashokc;", "dblp": "191/6725;192/1360", "google_scholar": "h4AbGp0AAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Ashok_Cutkosky1;~Zakaria_Mhammedi1", "aff": "Boston University;Research, Google", "aff_domain": "bu.edu;research.google.com", "position": "Assistant Professor;Researcher", "bibtex": "@inproceedings{\ncutkosky2024fully,\ntitle={Fully Unconstrained Online Learning},\nauthor={Ashok Cutkosky and Zakaria Mhammedi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=BtCrHwiBHP}\n}", "github": "", "reviewers": "1CHH;Wuav;itTv;q7Dc;xTkL", "pdf_size": 626915, "rating": "6;6;7;7;7", "confidence": "2;3;4;3;2", "soundness": "3;3;3;3;3", "novelty": "3;3;3;4;3", "presentation": "1;3;4;3;2", "wc_summary": "63;171;83;164;105", "wc_strengths": "68;122;61;89;53", "wc_weaknesses": "807;137;20;39;159", "wc_questions": "103;15;225;225;32", "wc_limitations": "1;1;11;1;1", "wc_review": "1042;446;400;518;350", "wc_reply_reviewers": "46;26;21;65;16", "wc_reply_authors": "252;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;1;1;1;1", "rating_avg": [ 6.6, 0.48989794855663565 ], "confidence_avg": [ 2.8, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 2.6, 1.019803902718557 ], "wc_summary_avg": [ 117.2, 43.22221650956831 ], "wc_strengths_avg": [ 78.6, 24.77579463912308 ], "wc_weaknesses_avg": [ 232.4, 292.29136148712985 ], "wc_questions_avg": [ 120.0, 90.67303899175322 ], "wc_limitations_avg": [ 3.0, 4.0 ], "wc_review_avg": [ 551.2, 251.55389084647447 ], "wc_reply_reviewers_avg": [ 34.8, 18.214280112043955 ], "wc_reply_authors_avg": [ 50.4, 100.80000000000001 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.32732683535398854, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10875763978749637983&as_sdt=8005&sciodt=0,7&hl=en", "gs_version_total": 5, "email": "bu.edu;research.google.com", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Boston University;Google", "aff_unique_dep": ";Google Research", "aff_unique_url": "https://www.bu.edu;https://research.google", "aff_unique_abbr": "BU;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Learning Interaction-aware 3D Gaussian Splatting for One-shot Hand Avatars", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96171", "id": "BxPa7Sn5Zq", "proceeding": "", "pdf": "https://openreview.net/pdf?id=BxPa7Sn5Zq", "openreview": "https://openreview.net/forum?id=BxPa7Sn5Zq", "poster": "/media/PosterPDFs/NeurIPS%202024/96171.png?t=1729086781.3169749", "project": "", "author_site": "Xuan Huang, Hanhui Li, Wanquan Liu, Xiaodan Liang, Yiqiang Yan, Yuhao Cheng, CHENQIANG GAO", "tldr": "", "abstract": "In this paper, we propose to create animatable avatars for interacting hands with 3D Gaussian Splatting (GS) and single-image inputs. Existing GS-based methods designed for single subjects often yield unsatisfactory results due to limited input views, various hand poses, and occlusions. To address these challenges, we introduce a novel two-stage interaction-aware GS framework that exploits cross-subject hand priors and refines 3D Gaussians in interacting areas. Particularly, to handle hand variations, we disentangle the 3D presentation of hands into optimization-based identity maps and learning-based latent geometric features and neural texture maps. Learning-based features are captured by trained networks to provide reliable priors for poses, shapes, and textures, while optimization-based identity maps enable efficient one-shot fitting of out-of-distribution hands. Furthermore, we devise an interaction-aware attention module and a self-adaptive Gaussian refinement module. These modules enhance image rendering quality in areas with intra- and inter-hand interactions, overcoming the limitations of existing GS-based methods. Our proposed method is validated via extensive experiments on the large-scale InterHand2.6M dataset, and it significantly improves the state-of-the-art performance in image quality. Code and models will be released upon acceptance.", "keywords": "Gaussian splatting;interacting-hand avatar;one-shot learning", "primary_area": "machine_vision", "supplementary_material": "", "author": "Xuan Huang;Hanhui Li;Wanquan Liu;Xiaodan Liang;Yiqiang Yan;Yuhao Cheng;CHENQIANG GAO", "authorids": "~Xuan_Huang1;~Hanhui_Li1;~Wanquan_Liu1;~Xiaodan_Liang2;~Yiqiang_Yan1;~Yuhao_Cheng2;~CHENQIANG_GAO3", "gender": "F;M;M;F;M;M;M", "homepage": "https://github.com/XuanHuang0;;http://ise.sysu.edu.cn/teacher/teacher01/1389393.htm;https://www.sysu-hcp.net/;https://research.lenovo.com/;https://yuhaocheng.github.io/;https://ise.sysu.edu.cn/teacher/teacher01/1411154.htm", "dblp": ";137/6248;53/4712;;;;82/9201", "google_scholar": ";aKLA6owAAAAJ;https://scholar.google.com.au/scholar?hl=zh-CN;voxznZAAAAAJ;;https://scholar.google.com.hk/citations?user=Uki0KqEAAAAJ;YiVDoL0AAAAJ", "orcid": ";;0000-0003-4910-353X;;;;", "linkedin": ";;;;;;", "or_profile": "~Xuan_Huang1;~Hanhui_Li1;~Wanquan_Liu1;~Xiaodan_Liang2;~Yiqiang_Yan1;~Yuhao_Cheng2;~CHENQIANG_GAO3", "aff": "SUN YAT-SEN UNIVERSITY;SUN YAT-SEN UNIVERSITY;SUN YAT-SEN UNIVERSITY;SUN YAT-SEN UNIVERSITY;Lenovo Research;Lenovo;SUN YAT-SEN UNIVERSITY", "aff_domain": "mail.sysu.edu.cn;sysu.edu.cn;sysu.edu.cn;sysu.edu.cn;lenovo.com;lenovo.com;sysu.edu.cn", "position": "MS student;Associate Research Professor;Full Professor;Associate Professor;Principal Researcher;Researcher;Full Professor", "bibtex": "@inproceedings{\nhuang2024learning,\ntitle={Learning Interaction-aware 3D Gaussian Splatting for One-shot Hand Avatars},\nauthor={Xuan Huang and Hanhui Li and Wanquan Liu and Xiaodan Liang and Yiqiang Yan and Yuhao Cheng and CHENQIANG GAO},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=BxPa7Sn5Zq}\n}", "github": "", "reviewers": "eWJx;NUjU;mjNr;gWjw", "pdf_size": 13084929, "rating": "5;5;6;6", "confidence": "4;5;3;4", "soundness": "3;2;3;3", "novelty": "2;2;3;3", "presentation": "2;3;4;3", "wc_summary": "88;90;88;64", "wc_strengths": "32;44;80;94", "wc_weaknesses": "91;117;105;75", "wc_questions": "48;122;96;62", "wc_limitations": "9;10;10;1", "wc_review": "268;383;379;296", "wc_reply_reviewers": "163;0;21;34", "wc_reply_authors": "347;658;67;67", "reply_reviewers": "2;0;1;1", "reply_authors": "5;4;3;3", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 82.5, 10.712142642814275 ], "wc_strengths_avg": [ 62.5, 25.352514668174436 ], "wc_weaknesses_avg": [ 97.0, 15.684387141358123 ], "wc_questions_avg": [ 82.0, 28.948229652260256 ], "wc_limitations_avg": [ 7.5, 3.774917217635375 ], "wc_review_avg": [ 331.5, 50.5 ], "wc_reply_reviewers_avg": [ 54.5, 63.80634764660958 ], "wc_reply_authors_avg": [ 284.75, 243.93685145955294 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 3.75, 0.82915619758885 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.7071067811865475, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2647813903311596445&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "mail.sysu.edu.cn;sysu.edu.cn;sysu.edu.cn;sysu.edu.cn;lenovo.com;lenovo.com;sysu.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;0;1;2;0", "aff_unique_norm": "Sun Yat-sen University;Lenovo;Lenovo Group Limited", "aff_unique_dep": ";Research;", "aff_unique_url": "http://www.sysu.edu.cn;https://www.lenovo.com;https://www.lenovo.com", "aff_unique_abbr": "SYSU;Lenovo;Lenovo", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "ABCFair: an Adaptable Benchmark approach for Comparing Fairness Methods", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97815", "id": "ByknnPI5Km", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ByknnPI5Km", "openreview": "https://openreview.net/forum?id=ByknnPI5Km", "poster": "", "project": "", "author_site": "MaryBeth Defrance, Maarten Buyl, Tijl De Bie", "tldr": "", "abstract": "Numerous methods have been implemented that pursue fairness with respect to sensitive features by mitigating biases in machine learning. Yet, the problem settings that each method tackles vary significantly, including the stage of intervention, the composition of sensitive features, the fairness notion, and the distribution of the output. Even in binary classification, the greatest common denominator of problem settings is small, significantly complicating benchmarking.\n\nHence, we introduce ABCFair, a benchmark approach which allows adapting to the desiderata of the real-world problem setting, enabling proper comparability between methods for any use case. We apply this benchmark to a range of pre-, in-, and postprocessing methods on both large-scale, traditional datasets and on a dual label (biased and unbiased) dataset to sidestep the fairness-accuracy trade-off.", "keywords": "fairness;benchmark;machine learning;desiderata;pipeline", "primary_area": "", "supplementary_material": "/attachment/e96d49f33500390324bef827b323902d2508afe8.pdf", "author": "MaryBeth Defrance;Maarten Buyl;Tijl De Bie", "authorids": "~MaryBeth_Defrance1;~Maarten_Buyl1;~Tijl_De_Bie1", "gender": "F;M;M", "homepage": ";;http://www.tijldebie.net", "dblp": ";259/2365;49/2018", "google_scholar": ";A5bU3BUAAAAJ;https://scholar.google.be/citations?user=eH_c4R4AAAAJ", "orcid": "0000-0002-6570-8857;0000-0002-5434-2386;0000-0002-2692-7504", "linkedin": "marybeth-defrance-bb514964/;maarten-buyl-44a54715a/;tijldebie/", "or_profile": "~MaryBeth_Defrance1;~Maarten_Buyl1;~Tijl_De_Bie1", "aff": "Universiteit Gent;School of Engineering and Applied Sciences, Harvard University;Ghent University", "aff_domain": "ugent.be;seas.harvard.edu;ugent.be", "position": "PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\ndefrance2024abcfair,\ntitle={{ABCF}air: an Adaptable Benchmark approach for Comparing Fairness Methods},\nauthor={MaryBeth Defrance and Maarten Buyl and Tijl De Bie},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=ByknnPI5Km}\n}", "github": "", "reviewers": "BMCx;bsKd;ZRyy", "pdf_size": 520163, "rating": "5;6;8", "confidence": "4;4;3", "wc_summary_and_contributions": "54;37;159", "wc_strengths": "11;95;55", "wc_improvement": "4;246;84", "wc_limitations": "4;6;77", "wc_correctness": "4;4;12", "wc_clarity": "5;19;29", "wc_relation_to_prior_work": "4;10;246", "wc_documentation": "8;30;13", "wc_additional_feedback": "1;1;1", "wc_review": "95;448;676", "wc_reply_reviewers": "21;292;84", "wc_reply_authors": "20;489;57", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 83.33333333333333, 53.952654141282885 ], "wc_strengths_avg": [ 53.666666666666664, 34.30581421404962 ], "wc_improvement_avg": [ 111.33333333333333, 100.66887414798192 ], "wc_limitations_avg": [ 29.0, 33.95094500402996 ], "wc_correctness_avg": [ 6.666666666666667, 3.7712361663282534 ], "wc_clarity_avg": [ 17.666666666666668, 9.843215373488935 ], "wc_relation_to_prior_work_avg": [ 86.66666666666667, 112.69230477523989 ], "wc_documentation_avg": [ 17.0, 9.41629792788369 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 406.3333333333333, 239.01510877394807 ], "wc_reply_reviewers_avg": [ 132.33333333333334, 115.79387817247603 ], "wc_reply_authors_avg": [ 188.66666666666666, 212.90425599837647 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.944911182523068, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8422501600252877394&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "ugent.be;seas.harvard.edu;ugent.be", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Ghent;Harvard University;Ghent University", "aff_unique_dep": ";School of Engineering and Applied Sciences;", "aff_unique_url": "https://www.ugent.be/en;https://www.harvard.edu;https://www.ugent.be/en", "aff_unique_abbr": "UGent;Harvard;UGent", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Belgium;United States" }, { "title": "Inferring stochastic low-rank recurrent neural networks from neural data", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96170", "id": "C0EhyoPpTN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=C0EhyoPpTN", "openreview": "https://openreview.net/forum?id=C0EhyoPpTN", "poster": "/media/PosterPDFs/NeurIPS%202024/96170.png?t=1733306347.664026", "project": "", "author_site": "Matthijs Pals, A Erdem Sa\u011ftekin, Felix Pei, Manuel Gloeckler, Jakob H Macke", "tldr": "", "abstract": "A central aim in computational neuroscience is to relate the activity of large populations of neurons to an underlying dynamical system. Models of these neural dynamics should ideally be both interpretable and fit the observed data well. Low-rank recurrent neural networks (RNNs) exhibit such interpretability by having tractable dynamics. However, it is unclear how to best fit low-rank RNNs to data consisting of noisy observations of an underlying stochastic system. Here, we propose to fit stochastic low-rank RNNs with variational sequential Monte Carlo methods. We validate our method on several datasets consisting of both continuous and spiking neural data, where we obtain lower dimensional latent dynamics than current state of the art methods. Additionally, for low-rank models with piecewise linear nonlinearities, we show how to efficiently identify all fixed points in polynomial rather than exponential cost in the number of units, making analysis of the inferred dynamics tractable for large RNNs. Our method both elucidates the dynamical systems underlying experimental recordings and provides a generative model whose trajectories match observed variability.", "keywords": "Low-rank RNNs;dynamical systems;variational inference;sequential monte carlo;neural data", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "/attachment/8ca98520591c0237d55d7f775d7e88dd1fae53b1.zip", "author": "Matthijs Pals;A Erdem Sa\u011ftekin;Felix C Pei;Manuel Gloeckler;Jakob H. Macke", "authorids": "~Matthijs_Pals1;~A_Erdem_Sa\u011ftekin1;~Felix_C_Pei1;~Manuel_Gloeckler1;~Jakob_H._Macke1", "gender": "M;M;M;M;M", "homepage": ";https://aesagtekin.github.io;;http://www.mackelab.org;https://manuelgloeckler.github.io/", "dblp": "269/0085;372/3366;301/8911;97/11106;", "google_scholar": "SrQZ1IUAAAAJ;wUwGim8AAAAJ;bhHQOjgAAAAJ;FKOqtF8AAAAJ;0Vdv0H0AAAAJ", "orcid": "0000-0002-3051-1325;;;0000-0001-5154-8912;", "linkedin": "matthijs-pals-02a3b9164/?originalSubdomain=de;;felix-pei-b41742196;;", "or_profile": "~Matthijs_Pals1;~A_Erdem_Sa\u011ftekin1;~Felix_C_Pei1;~Jakob_H_Macke1;~Manuel_Gl\u00f6ckler1", "aff": "Eberhard-Karls-Universit\u00e4t T\u00fcbingen;Eberhard-Karls-Universit\u00e4t T\u00fcbingen;Eberhard-Karls-Universit\u00e4t T\u00fcbingen;University of Tuebingen;University of Tuebingen", "aff_domain": "uni-tuebingen.de;uni-tuebingen.de;uni-tuebingen.de;uni-tuebingen.de;uni-tuebingen.de", "position": "PhD student;MS student;Intern;Full Professor;PhD student", "bibtex": "@inproceedings{\npals2024inferring,\ntitle={Inferring stochastic low-rank recurrent neural networks from neural data},\nauthor={Matthijs Pals and A Erdem Sa{\\u{g}}tekin and Felix C Pei and Manuel Gloeckler and Jakob H. Macke},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=C0EhyoPpTN}\n}", "github": "", "reviewers": "nuem;Strk;bWek;drM9", "pdf_size": 2965802, "rating": "5;6;6;8", "confidence": "4;2;4;5", "soundness": "3;4;4;4", "novelty": "2;3;3;4", "presentation": "3;3;3;4", "wc_summary": "92;76;87;58", "wc_strengths": "68;70;53;94", "wc_weaknesses": "93;127;106;64", "wc_questions": "26;45;139;1", "wc_limitations": "5;106;1;5", "wc_review": "284;424;386;222", "wc_reply_reviewers": "79;11;48;10", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 78.25, 13.045593125649749 ], "wc_strengths_avg": [ 71.25, 14.686303142724515 ], "wc_weaknesses_avg": [ 97.5, 22.830900113661748 ], "wc_questions_avg": [ 52.75, 52.184169055375406 ], "wc_limitations_avg": [ 29.25, 44.34171286723146 ], "wc_review_avg": [ 329.0, 80.23091673413684 ], "wc_reply_reviewers_avg": [ 37.0, 28.679260799399973 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.4736842105263159, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2498707711785479578&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "uni-tuebingen.de;uni-tuebingen.de;uni-tuebingen.de;uni-tuebingen.de;uni-tuebingen.de", "author_num": 5, "aff_unique_index": "0;0;0;1;1", "aff_unique_norm": "Eberhard Karls University of T\u00fcbingen;University of Tuebingen", "aff_unique_dep": ";", "aff_unique_url": "https://www.uni-tuebingen.de/;https://www.uni-tuebingen.de/", "aff_unique_abbr": "Uni T\u00fcbingen;Uni T\u00fcbingen", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "T\u00fcbingen;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Germany" }, { "title": "Unchosen Experts Can Contribute Too: Unleashing MoE Models\u2019 Power by Self-Contrast", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96169", "id": "C1d3VVfdVG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=C1d3VVfdVG", "openreview": "https://openreview.net/forum?id=C1d3VVfdVG", "poster": "/media/PosterPDFs/NeurIPS%202024/96169.png?t=1731479370.9364688", "project": "", "author_site": "Chufan Shi, Cheng Yang, Xinyu Zhu, Jiahao Wang, Taiqiang Wu, Siheng Li, Deng Cai, Yujiu Yang, Yu Meng", "tldr": "", "abstract": "Mixture-of-Experts (MoE) has emerged as a prominent architecture for scaling model size while maintaining computational efficiency. In MoE, each token in the input sequence activates a different subset of experts determined by a routing mechanism. However, the unchosen experts in MoE models do not contribute to the output, potentially leading to underutilization of the model's capacity.\nIn this work, we first conduct exploratory studies to demonstrate that increasing the number of activated experts does not necessarily improve and can even degrade the output quality. Then, we show that output distributions from an MoE model using different routing strategies substantially differ, indicating that different experts do not always act synergistically. \nMotivated by these findings, we propose **S**elf-**C**ontrast **M**ixture-**o**f-**E**xperts (SCMoE), a training-free strategy that utilizes unchosen experts in a self-contrast manner during inference. \nIn SCMoE, the next-token probabilities are determined by contrasting the outputs from strong and weak activation using the same MoE model.\nOur method is conceptually simple and computationally lightweight, as it incurs minimal latency compared to greedy decoding. \nExperiments on several benchmarks (GSM8K, StrategyQA, MBPP and HumanEval) demonstrate that SCMoE can consistently enhance Mixtral 8x7B\u2019s reasoning capability across various domains. For example, it improves the accuracy on GSM8K from 61.79 to 66.94. \nMoreover, combining SCMoE with self-consistency yields additional gains, increasing major@20 accuracy from 75.59 to 78.31.", "keywords": "Mixture-of-Experts;Self-Contrast;Text Generation", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/02a29432686a6284b0f9bb73666eaeccada0f143.zip", "author": "Chufan Shi;Cheng Yang;Xinyu Zhu;Jiahao Wang;Taiqiang Wu;Siheng Li;Deng Cai;Yujiu Yang;Yu Meng", "authorids": "~Chufan_Shi1;~Cheng_Yang7;~Xinyu_Zhu2;~Jiahao_Wang1;~Taiqiang_Wu1;~Siheng_Li1;~Deng_Cai1;~Yujiu_Yang2;~Yu_Meng1", "gender": "M;;;M;M;M;M;M;M", "homepage": ";;;https://www.zhihu.com/people/wang-jia-hao-hku;https://wutaiqiang.github.io;;https://jcyk.github.io/;https://sites.google.com/view/iigroup-thu;https://yumeng5.github.io/", "dblp": "342/5731;;;;303/5950;312/9450;c/DCai-2;30/3847;30/4233-1", "google_scholar": "BYWnPHYAAAAJ;;;https://scholar.google.com/citations?hl=zh-CN;mCtvn50AAAAJ;;KpbRLYcAAAAJ;4gH3sxsAAAAJ;S2-yZKcAAAAJ", "orcid": "0009-0005-7889-5187;;;;0000-0002-3664-3513;;;0000-0002-6427-1024;0000-0003-2554-2888", "linkedin": ";;;;;;;;", "or_profile": "~Chufan_Shi1;~Cheng_Yang7;~Xinyu_Zhu2;~Jiahao_Wang1;~Taiqiang_Wu1;~Siheng_Li1;~Deng_Cai1;~Yujiu_Yang2;~Yu_Meng1", "aff": "Tsinghua University;;;University of Hong Kong;The University of Hong Kong;Tsinghua University;Tencent AI Lab;Tsinghua University;University of Virginia", "aff_domain": "tsinghua.edu.cn;;;hku.hk;hku.hk;tsinghua.edu.cn;tencent.com;tsinghua.edu.cn;virginia.edu", "position": "MS student;;;PhD student;PhD student;MS student;Research Scientist;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nshi2024unchosen,\ntitle={Unchosen Experts Can Contribute Too: Unleashing MoE Models{\\textquoteright} Power by Self-Contrast},\nauthor={Chufan Shi and Cheng Yang and Xinyu Zhu and Jiahao Wang and Taiqiang Wu and Siheng Li and Deng Cai and Yujiu Yang and Yu Meng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=C1d3VVfdVG}\n}", "github": "", "reviewers": "bydP;KVjj;Vqkj;cUsK", "pdf_size": 617655, "rating": "6;6;7;8", "confidence": "3;4;4;4", "soundness": "3;2;2;4", "novelty": "3;2;3;4", "presentation": "4;3;3;4", "wc_summary": "50;135;48;129", "wc_strengths": "90;118;48;131", "wc_weaknesses": "135;204;49;133", "wc_questions": "2;2;36;37", "wc_limitations": "25;2;1;1", "wc_review": "302;461;182;431", "wc_reply_reviewers": "14;19;31;0", "wc_reply_authors": "0;98;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;2;1;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 90.5, 41.56019730463271 ], "wc_strengths_avg": [ 96.75, 31.80703538527286 ], "wc_weaknesses_avg": [ 130.25, 54.933482503842775 ], "wc_questions_avg": [ 19.25, 17.25362280797862 ], "wc_limitations_avg": [ 7.25, 10.256095748383007 ], "wc_review_avg": [ 344.0, 110.97972787856348 ], "wc_reply_reviewers_avg": [ 16.0, 11.113055385446435 ], "wc_reply_authors_avg": [ 24.5, 42.4352447854375 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10054689777481832049&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "tsinghua.edu.cn;;;hku.hk;hku.hk;tsinghua.edu.cn;tencent.com;tsinghua.edu.cn;virginia.edu", "author_num": 9, "aff_unique_index": "0;1;1;0;2;0;3", "aff_unique_norm": "Tsinghua University;University of Hong Kong;Tencent;University of Virginia", "aff_unique_dep": ";;Tencent AI Lab;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.hku.hk;https://ai.tencent.com;https://www.virginia.edu", "aff_unique_abbr": "THU;HKU;Tencent AI Lab;UVA", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;1", "aff_country_unique": "China;United States" }, { "title": "Out-Of-Distribution Detection with Diversification (Provably)", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96168", "id": "C1hiRbzEH9", "proceeding": "", "pdf": "https://openreview.net/pdf?id=C1hiRbzEH9", "openreview": "https://openreview.net/forum?id=C1hiRbzEH9", "poster": "", "project": "", "author_site": "Haiyun Yao, Zongbo Han, Huazhu Fu, Xi Peng, Qinghua Hu, Changqing Zhang", "tldr": "", "abstract": "Out-of-distribution (OOD) detection is crucial for ensuring reliable deployment of machine learning models. Recent advancements focus on utilizing easily accessible auxiliary outliers (e.g., data from the web or other datasets) in training. However, we experimentally reveal that these methods still struggle to generalize their detection capabilities to unknown OOD data, due to the limited diversity of the auxiliary outliers collected. Therefore, we thoroughly examine this problem from the generalization perspective and demonstrate that a more diverse set of auxiliary outliers is essential for enhancing the detection capabilities. However, in practice, it is difficult and costly to collect sufficiently diverse auxiliary outlier data. Therefore, we propose a simple yet practical approach with a theoretical guarantee, termed Diversity-induced Mixup for OOD detection (diverseMix), which enhances the diversity of auxiliary outlier set for training in an efficient way. Extensive experiments show that diverseMix achieves superior performance on commonly used and recent challenging large-scale benchmarks, which further confirm the importance of the diversity of auxiliary outliers.", "keywords": "OOD detection", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Haiyun Yao;Zongbo Han;Huazhu Fu;Xi Peng;Qinghua Hu;Changqing Zhang", "authorids": "~Haiyun_Yao2;~Zongbo_Han1;~Huazhu_Fu4;~Xi_Peng3;~Qinghua_Hu1;~Changqing_Zhang1", "gender": "M;M;M;M;M;M", "homepage": "https://github.com/HaiyunYao;https://zongbo-han.github.io/;https://hzfu.github.io;http://cic.tju.edu.cn/faculty/huqinghua/index.html;http://cic.tju.edu.cn/faculty/zhangchangqing/index.html;http://www.pengxi.me", "dblp": "280/1096;255/6965;63/7767;;78/2668;18/931-1", "google_scholar": ";F2BBkQEAAAAJ;https://scholar.google.com/citations?hl=en;TVSNq_wAAAAJ;yJGhdykAAAAJ;bw9FOHAAAAAJ", "orcid": ";;0000-0002-9702-5524;0000-0001-7765-8095;;", "linkedin": ";;;;;", "or_profile": "~Haiyun_Yao2;~Zongbo_Han1;~Huazhu_Fu4;~Qinghua_Hu1;~Changqing_Zhang1;~Xi_Peng2", "aff": "Tianjin University;Tianjin University;Institute of High Performance Computing, Singapore, A*STAR;Tianjin University;Tianjin University;Sichuan University", "aff_domain": "tju.edu.cn;tju.edu.cn;ihpc.a-star.edu.sg;tju.edu.cn;tju.edu.cn;scu.edu.cn", "position": "MS student;PhD student;Principal Scientist;Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nyao2024outofdistribution,\ntitle={Out-Of-Distribution Detection with Diversification (Provably)},\nauthor={Haiyun Yao and Zongbo Han and Huazhu Fu and Xi Peng and Qinghua Hu and Changqing Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=C1hiRbzEH9}\n}", "github": "", "reviewers": "hmUc;E172;1vuo", "pdf_size": 710558, "rating": "6;6;7", "confidence": "4;4;4", "soundness": "3;4;3", "novelty": "2;3;3", "presentation": "3;3;3", "wc_summary": "23;64;45", "wc_strengths": "25;71;39", "wc_weaknesses": "66;37;352", "wc_questions": "2;25;72", "wc_limitations": "1;23;7", "wc_review": "117;220;515", "wc_reply_reviewers": "24;0;493", "wc_reply_authors": "13;0;1675", "reply_reviewers": "1;0;2", "reply_authors": "2;1;7", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 44.0, 16.753109164172084 ], "wc_strengths_avg": [ 45.0, 19.252705437591537 ], "wc_weaknesses_avg": [ 151.66666666666666, 142.15093699616926 ], "wc_questions_avg": [ 33.0, 29.13188402192118 ], "wc_limitations_avg": [ 10.333333333333334, 9.285592184789412 ], "wc_review_avg": [ 284.0, 168.66732542690855 ], "wc_reply_reviewers_avg": [ 172.33333333333334, 226.95716678606018 ], "wc_reply_authors_avg": [ 562.6666666666666, 786.5563481969292 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 3.3333333333333335, 2.6246692913372702 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1420754277029737405&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "tju.edu.cn;tju.edu.cn;ihpc.a-star.edu.sg;tju.edu.cn;tju.edu.cn;scu.edu.cn", "author_num": 6, "aff_unique_index": "0;0;1;0;0;2", "aff_unique_norm": "Tianjin University;Institute of High Performance Computing;Sichuan University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.tju.edu.cn;https://www.ihpc.a-star.edu.sg;https://www.scu.edu.cn", "aff_unique_abbr": "TJU;IHPC;SCU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "China;Singapore" }, { "title": "Reverse Transition Kernel: A Flexible Framework to Accelerate Diffusion Inference", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96167", "id": "C2xCLze1kS", "proceeding": "", "pdf": "https://openreview.net/pdf?id=C2xCLze1kS", "openreview": "https://openreview.net/forum?id=C2xCLze1kS", "poster": "", "project": "", "author_site": "Xunpeng Huang, Difan Zou, Hanze Dong, Zhang, Yian Ma, Tong Zhang", "tldr": "", "abstract": "To generate data from trained diffusion models, most inference algorithms, such as DDPM, DDIM, and other variants, rely on discretizing the reverse SDEs or their equivalent ODEs. In this paper, we view such approaches as decomposing the entire denoising diffusion process into several segments, each corresponding to a reverse transition kernel (RTK) sampling subproblem. Specifically, DDPM uses a Gaussian approximation for the RTK, resulting in low per-subproblem complexity but requiring a large number of segments (i.e., subproblems), which is conjectured to be inefficient. To address this, we develop a general RTK framework that enables a more balanced subproblem decomposition, resulting in $\\tilde O(1)$ subproblems, each with strongly log-concave targets. We then propose leveraging two fast sampling algorithms, the Metropolis-Adjusted Langevin Algorithm (MALA) and Underdamped Langevin Dynamics (ULD), for solving these strongly log-concave subproblems. This gives rise to the RTK-MALA and RTK-ULD algorithms for diffusion inference. In theory, we further develop the convergence guarantees for RTK-MALA and RTK-ULD in total variation (TV) distance: RTK-ULD can achieve $\\epsilon$ target error within $\\tilde{\\mathcal O}(d^{1/2}\\epsilon^{-1})$ under mild conditions, and RTK-MALA enjoys a $\\mathcal{O}(d^{2}\\log(d/\\epsilon))$ convergence rate under slightly stricter conditions. These theoretical results surpass the state-of-the-art convergence rates for diffusion inference and are well supported by numerical experiments.", "keywords": "diffusion inference;reverse transition kernel", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Xunpeng Huang;Difan Zou;Hanze Dong;Yi Zhang;Yian Ma;Tong Zhang", "authorids": "~Xunpeng_Huang2;~Difan_Zou1;~Hanze_Dong1;~Yi_Zhang94;~Yian_Ma1;~Tong_Zhang2", "gender": "M;M;M;;M;M", "homepage": "https://xunpeng746.github.io;https://difanzou.github.io/;https://hendrydong.github.io/;;https://sites.google.com/view/yianma;http://tongzhang-ml.org", "dblp": ";161/8923;228/7798;;;07/4227-1", "google_scholar": "T2L6rKcAAAAJ;Cp4fcTQAAAAJ;g9WLzWoAAAAJ;;A0TFlacAAAAJ;LurWtuYAAAAJ", "orcid": ";;;;;0000-0002-5511-2558", "linkedin": ";;hanze-dong/;;;", "or_profile": "~Xunpeng_Huang2;~Difan_Zou1;~Hanze_Dong1;~Yi_Zhang94;~Yian_Ma1;~Tong_Zhang2", "aff": "Hong Kong University of Science and Technology;University of Hong Kong;SalesForce;;University of California, San Diego;UIUC", "aff_domain": "ust.hk;hku.hk;salesforce.com;;ucsd.edu;illinois.edu", "position": "PhD student;Assistant Professor;Researcher;;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nhuang2024reverse,\ntitle={Reverse Transition Kernel: A Flexible Framework to Accelerate Diffusion Inference},\nauthor={Xunpeng Huang and Difan Zou and Hanze Dong and Yi Zhang and Yian Ma and Tong Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=C2xCLze1kS}\n}", "github": "", "reviewers": "kQ8M;PyUH;81an;sbHs;Y93M", "pdf_size": 10471304, "rating": "6;6;6;7;7", "confidence": "2;3;3;3;1", "soundness": "2;3;3;3;3", "novelty": "3;3;3;3;3", "presentation": "3;3;3;2;3", "wc_summary": "68;62;52;97;28", "wc_strengths": "43;73;48;77;84", "wc_weaknesses": "189;45;101;134;73", "wc_questions": "33;92;141;257;2", "wc_limitations": "11;17;8;21;1", "wc_review": "344;289;350;586;188", "wc_reply_reviewers": "17;7;12;18;11", "wc_reply_authors": "37;33;33;0;33", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;1;2", "rating_avg": [ 6.4, 0.48989794855663565 ], "confidence_avg": [ 2.4, 0.8 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 61.4, 22.428553230201896 ], "wc_strengths_avg": [ 65.0, 16.382917933017914 ], "wc_weaknesses_avg": [ 108.4, 49.95838267998675 ], "wc_questions_avg": [ 105.0, 89.84653582637452 ], "wc_limitations_avg": [ 11.6, 6.974238309665078 ], "wc_review_avg": [ 351.4, 130.91004545106537 ], "wc_reply_reviewers_avg": [ 13.0, 4.049691346263317 ], "wc_reply_authors_avg": [ 27.2, 13.687950905814938 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.408248290463863, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2138217464989754552&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "ust.hk;hku.hk;salesforce.com;;ucsd.edu;illinois.edu", "author_num": 6, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "Hong Kong University of Science and Technology;University of Hong Kong;Salesforce;University of California, San Diego;University of Illinois Urbana-Champaign", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.ust.hk;https://www.hku.hk;https://www.salesforce.com;https://www.ucsd.edu;https://www illinois.edu", "aff_unique_abbr": "HKUST;HKU;Salesforce;UCSD;UIUC", "aff_campus_unique_index": "0;0;2;3", "aff_campus_unique": "Hong Kong SAR;;San Diego;Urbana-Champaign", "aff_country_unique_index": "0;0;1;1;1", "aff_country_unique": "China;United States" }, { "title": "Advancing Open-Set Domain Generalization Using Evidential Bi-Level Hardest Domain Scheduler", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96166", "id": "C3JCwbMXbU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=C3JCwbMXbU", "openreview": "https://openreview.net/forum?id=C3JCwbMXbU", "poster": "/media/PosterPDFs/NeurIPS%202024/96166.png?t=1731405110.8561447", "project": "", "author_site": "Kunyu Peng, Di Wen, Kailun Yang, Ao Luo, Yufan Chen, Jia Fu, M. Saquib Sarfraz, Alina Roitberg, Rainer Stiefelhagen", "tldr": "", "abstract": "In Open-Set Domain Generalization (OSDG), the model is exposed to both new variations of data appearance (domains) and open-set conditions, where both known and novel categories are present at test time. The challenges of this task arise from the dual need to generalize across diverse domains and accurately quantify category novelty, which is critical for applications in dynamic environments. Recently, meta-learning techniques have demonstrated superior results in OSDG, effectively orchestrating the meta-train and -test tasks by employing varied random categories and predefined domain partition strategies. These approaches prioritize a well-designed training schedule over traditional methods that focus primarily on data augmentation and the enhancement of discriminative feature learning. \nThe prevailing meta-learning models in OSDG typically utilize a predefined sequential domain scheduler to structure data partitions. However, a crucial aspect that remains inadequately explored is the influence brought by strategies of domain schedulers during training. \nIn this paper, we observe that an adaptive domain scheduler benefits more in OSDG compared with prefixed sequential and random domain schedulers. We propose the Evidential Bi-Level Hardest Domain Scheduler (EBiL-HaDS) to achieve an adaptive domain scheduler. This method strategically sequences domains by assessing their reliabilities in utilizing a follower network, trained with confidence scores learned in an evidential manner, regularized by max rebiasing discrepancy, and optimized in a bilevel manner. We verify our approach on three OSDG benchmarks, i.e., PACS, DigitsDG, and OfficeHome. The results show that our method substantially improves OSDG performance and achieves more discriminative embeddings for both the seen and unseen categories, underscoring the advantage of a judicious domain scheduler for the generalizability to unseen domains and unseen categories. The source code is publicly available at https://github.com/KPeng9510/EBiL-HaDS.", "keywords": "open set domain generalization;image classification", "primary_area": "machine_vision", "supplementary_material": "", "author": "Kunyu Peng;Di Wen;Kailun Yang;Ao Luo;Yufan Chen;Jia Fu;M. Saquib Sarfraz;Alina Roitberg;Rainer Stiefelhagen", "authorids": "~Kunyu_Peng1;~Di_Wen2;~Kailun_Yang1;~Ao_Luo4;~Yufan_Chen2;~Jia_Fu3;~M._Saquib_Sarfraz1;~Alina_Roitberg1;~Rainer_Stiefelhagen1", "gender": "F;M;M;Not Specified;M;M;M;;M", "homepage": ";;https://yangkailun.com/;;;;https://ssarfraz.github.io/;;https://cvhci.anthropomatik.kit.edu/people_596.php", "dblp": "292/4197;78/3909-6;190/9526;;94/8231-1;33/4686-1;12/1561;159/9887;31/4699", "google_scholar": "pA9c0YsAAAAJ;aqGMqEcAAAAJ;pKFqWhgAAAAJ;;nG2ebe8AAAAJ;vK6jKMoAAAAJ;https://scholar.google.de/citations?user=4YLsmYIAAAAJ;https://scholar.google.de/citations?user=UuEFRDoAAAAJ;SFCOJxMAAAAJ", "orcid": "0000-0002-5419-9292;0009-0000-1693-7912;0000-0002-1090-667X;;0009-0008-3670-4567;0009-0004-3798-8603;0000-0002-1271-0005;;0000-0001-8046-4945", "linkedin": ";diwen96/;yangkailun/;;;;saquib-sarfraz-6395783a/;;", "or_profile": "~Kunyu_Peng1;~Di_Wen2;~Kailun_Yang1;~Ao_Luo4;~Yufan_Chen2;~Jia_Fu3;~M._Saquib_Sarfraz1;~Alina_Roitberg1;~Rainer_Stiefelhagen1", "aff": "Karlsruher Institut f\u00fcr Technologie;Karlsruher Institut f\u00fcr Technologie;Hunan University;Waseda University;Karlsruhe Institute for Technology;RISE Research Institutes of Sweden AB;Karlsruher Institut f\u00fcr Technologie;Universit\u00e4t Stuttgart;Karlsruhe Institute of Technology", "aff_domain": "kit.edu;kit.edu;hnu.edu.cn;waseda.jp;kit.edu;ri.se;kit.edu;uni-stuttgart.de;kit.edu", "position": "Postdoc;PhD student;Full Professor;PhD student;PhD student;Researcher;Lecturer;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\npeng2024advancing,\ntitle={Advancing Open-Set Domain Generalization Using Evidential Bi-Level Hardest Domain Scheduler},\nauthor={Kunyu Peng and Di Wen and Kailun Yang and Ao Luo and Yufan Chen and Jia Fu and M. Saquib Sarfraz and Alina Roitberg and Rainer Stiefelhagen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=C3JCwbMXbU}\n}", "github": "", "reviewers": "tsHR;euSj;QWTs;DFZJ", "pdf_size": 4797014, "rating": "5;5;5;7", "confidence": "3;4;3;4", "soundness": "2;3;2;3", "novelty": "2;3;2;3", "presentation": "3;2;2;3", "wc_summary": "68;49;74;44", "wc_strengths": "38;50;69;158", "wc_weaknesses": "202;69;45;48", "wc_questions": "4;7;27;33", "wc_limitations": "1;17;7;58", "wc_review": "313;192;222;341", "wc_reply_reviewers": "16;21;0;43", "wc_reply_authors": "102;120;65;106", "reply_reviewers": "1;1;0;2", "reply_authors": "3;3;2;2", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 58.75, 12.55736835487436 ], "wc_strengths_avg": [ 78.75, 47.07108985353961 ], "wc_weaknesses_avg": [ 91.0, 64.7495173727187 ], "wc_questions_avg": [ 17.75, 12.47747971346778 ], "wc_limitations_avg": [ 20.75, 22.252808811473667 ], "wc_review_avg": [ 267.0, 61.72924752497798 ], "wc_reply_reviewers_avg": [ 20.0, 15.378556499229699 ], "wc_reply_authors_avg": [ 98.25, 20.327014045353536 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=952383556804253651&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "kit.edu;kit.edu;hnu.edu.cn;waseda.jp;kit.edu;ri.se;kit.edu;uni-stuttgart.de;kit.edu", "author_num": 9, "aff_unique_index": "0;0;1;2;3;4;0;5;3", "aff_unique_norm": "Karlsruher Institut f\u00fcr Technologie;Hunan University;Waseda University;Karlsruhe Institute of Technology;RISE Research Institutes of Sweden;University of Stuttgart", "aff_unique_dep": ";;;;;", "aff_unique_url": "https://www.kit.edu;http://www.hunu.edu.cn/;https://www.waseda.jp/top;https://www.kit.edu;https://www.rise.se;https://www.uni-stuttgart.de", "aff_unique_abbr": "KIT;HNU;Waseda;KIT;RISE;Uni Stuttgart", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;2;0;3;0;0;0", "aff_country_unique": "Germany;China;Japan;Sweden" }, { "title": "VLMimic: Vision Language Models are Visual Imitation Learner for Fine-grained Actions", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96165", "id": "C3ZHiij9QE", "proceeding": "", "pdf": "https://openreview.net/pdf?id=C3ZHiij9QE", "openreview": "https://openreview.net/forum?id=C3ZHiij9QE", "poster": "/media/PosterPDFs/NeurIPS%202024/96165.png?t=1730117819.3845174", "project": "", "author_site": "Guangyan Chen, Meiling Wang, Te Cui, Yao Mu, Haoyang Lu, Tianxing Zhou, Zicai Peng, Mengxiao Hu, Haizhou Li, Li Yuan, Yi Yang, Yufeng Yue", "tldr": "", "abstract": "Visual imitation learning (VIL) provides an efficient and intuitive strategy for robotic systems to acquire novel skills. Recent advancements in Vision Language Models (VLMs) have demonstrated remarkable performance in vision and language reasoning capabilities for VIL tasks. Despite the progress, current VIL methods naively employ VLMs to learn high-level plans from human videos, relying on pre-defined motion primitives for executing physical interactions, which remains a major bottleneck. In this work, we present VLMimic, a novel paradigm that harnesses VLMs to directly learn even fine-grained action levels, only given a limited number of human videos. Specifically, VLMimic first grounds object-centric movements from human videos, and learns skills using hierarchical constraint representations, facilitating the derivation of skills with fine-grained action levels from limited human videos. These skills are refined and updated through an iterative comparison strategy, enabling efficient adaptation to unseen environments. Our extensive experiments exhibit that our VLMimic, using only 5 human videos, yields significant improvements of over 27% and 21% in RLBench and real-world manipulation tasks, and surpasses baselines by more than 37% in long-horizon tasks. Code and videos are available on our anonymous homepage.", "keywords": "Multimodal language models;Vision language models;Robotic manipulation;Code generation;Visual imitation learning", "primary_area": "robotics", "supplementary_material": "/attachment/e9f7a3d906392c24f5b22eebdcb688cca4fc31fd.zip", "author": "Guangyan Chen;Meiling Wang;Te Cui;Yao Mu;Haoyang Lu;Tianxing Zhou;Zicai Peng;Mengxiao Hu;Haizhou Li;Li Yuan;Yi Yang;Yufeng Yue", "authorids": "~Guangyan_Chen1;~Meiling_Wang2;~Te_Cui1;~Yao_Mu1;~Haoyang_Lu1;~Tianxing_Zhou1;~Zicai_Peng1;~Mengxiao_Hu1;~Haizhou_Li4;~Li_Yuan2;~Yi_Yang34;~Yufeng_Yue1", "gender": "M;F;M;M;M;Not Specified;M;F;M;M;M;M", "homepage": ";http://www.bit.edu.cn;;https://yaomarkmu.github.io/;;;;;;;https://yfyue-bit.github.io/;https://yuanli2333.github.io/", "dblp": "309/5775;17/1320-2.html;;260/0674;;353/3006;362/9291.html;239/2696;;33/4854-9;194/9143;98/4583-7", "google_scholar": "Ic8DObsAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;;https://scholar.google.com/citations?view_op=list_works;sc9GTbcAAAAJ;https://scholar.google.com/citations?hl=en;iuVtO2kAAAAJ;https://scholar.google.com.hk/citations?user=ca2_UcsAAAAJ;;https://scholar.google.com.sg/citations?user=7M_xficAAAAJ;-5juAR0AAAAJ", "orcid": "0000-0002-2903-1957;0000-0002-3618-7423;0009-0002-3265-3148;;0009-0009-6718-0919;0009-0007-8706-8655;0009-0004-7633-1516;0009-0008-3967-8894;0009-0007-2992-1478;0000-0003-3964-2433;;0000-0002-2120-5588", "linkedin": ";;;;;;;;;;;", "or_profile": "~Guangyan_Chen1;~Meiling_Wang2;~Te_Cui1;~Yao_Mu1;~Haoyang_Lu1;~Tianxing_Zhou1;~Zicai_Peng1;~Mengxiao_Hu1;~Haizhou_Li4;~Yi_Yang34;~Yufeng_Yue1;~Yuan_LI2", "aff": "Beijing Institute of Technology;Beijing Institute of Technology;Beijing Institute of Technology;The University of Hong Kong;Beijing Institute of Technology;Beijing Institute of Technology;Beijing Institute of Technology;Beijing Institute of Technology;Beijing Institute of Technology;Beijing Institute of Technology;Beijing Institute of Technology;Peking University", "aff_domain": "bit.edu.cn;bit.edu.cn;bit.edu.cn;hku.hk;bit.edu.cn;bit.edu.cn;bit.edu.cn;bit.edu.cn;bit.edu.cn;bit.edu.cn;bit.edu.cn;pku.edu.cn", "position": "PhD student;Full Professor;PhD student;PhD student;MS student;Undergrad student;Undergrad student;PhD student;Undergrad student;Full Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nchen2024vlmimic,\ntitle={{VLM}imic: Vision Language Models are Visual Imitation Learner for Fine-grained Actions},\nauthor={Guangyan Chen and Meiling Wang and Te Cui and Yao Mu and Haoyang Lu and Tianxing Zhou and Zicai Peng and Mengxiao Hu and Haizhou Li and Li Yuan and Yi Yang and Yufeng Yue},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=C3ZHiij9QE}\n}", "github": "", "reviewers": "aTuD;xpmL;Y6C5;ovGy", "pdf_size": 12562961, "rating": "6;6;6;7", "confidence": "5;4;3;4", "soundness": "3;3;3;4", "novelty": "2;2;2;3", "presentation": "4;3;3;2", "wc_summary": "55;53;82;68", "wc_strengths": "25;52;67;55", "wc_weaknesses": "83;110;273;74", "wc_questions": "82;74;171;28", "wc_limitations": "1;7;23;4", "wc_review": "246;296;616;229", "wc_reply_reviewers": "23;33;0;14", "wc_reply_authors": "107;106;63;106", "reply_reviewers": "1;1;0;1", "reply_authors": "3;3;2;3", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 64.5, 11.629703349613008 ], "wc_strengths_avg": [ 49.75, 15.35211711784404 ], "wc_weaknesses_avg": [ 135.0, 80.76818680644007 ], "wc_questions_avg": [ 88.75, 51.76569810212164 ], "wc_limitations_avg": [ 8.75, 8.496322733983215 ], "wc_review_avg": [ 346.75, 157.39023953218955 ], "wc_reply_reviewers_avg": [ 17.5, 12.134661099511597 ], "wc_reply_authors_avg": [ 95.5, 18.76832437912346 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3396534388024424068&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "bit.edu.cn;bit.edu.cn;bit.edu.cn;hku.hk;bit.edu.cn;bit.edu.cn;bit.edu.cn;bit.edu.cn;bit.edu.cn;bit.edu.cn;bit.edu.cn;pku.edu.cn", "author_num": 12, "aff_unique_index": "0;0;0;1;0;0;0;0;0;0;0;2", "aff_unique_norm": "Beijing Institute of Technology;University of Hong Kong;Peking University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.bit.edu.cn/;https://www.hku.hk;http://www.pku.edu.cn", "aff_unique_abbr": "BIT;HKU;Peking U", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "C3t6GMPnC5", "title": "Mamba State-Space Models Can Be Strong Downstream Learners", "track": "main", "status": "Reject", "tldr": "", "abstract": "Mamba state-space models (SSMs) have recently outperformed state-of-the-art (SOTA) Transformer large language models (LLMs) in various tasks and been widely adapted. However, Mamba\u2019s downstream learning capabilities remain either unexplored\u2013e.g., mixed-precision (MPFT) and parameter-efficient fine-tuning (PEFT)\u2013or under-evaluated\u2013e.g., in-context learning (ICL). For the latter, recent works reported Mamba\u2019s ICL rivals SOTA Transformer LLMs using non-standard benchmarks. In contrast, we show that on standard benchmarks, pretrained Mamba models achieve only 38% of the ICL performance improvements (over zero-shot) of comparable Transformers.\n\nEnabling MPFT and PEFT in Mamba architectures is challenging due to recurrent dynamics and highly customized CUDA kernels, respectively. However, we prove that Mamba\u2019s recurrent dynamics are robust to small input changes using dynamical systems theory. Empirically, we show that performance changes in Mamba\u2019s inference and fine-tuning due to mixed-precision align with Transformer LLMs. Furthermore, we prove that targeting key memory buffers in Mamba\u2019s customized CUDA kernels for low-rank adaptation regularizes SSM parameters, thus achieving parameter efficiency while retaining speedups. We show that combining MPFT and PEFT enables up to 2.15 times more tokens-per-second and 65.5% reduced memory-per-token compared to full Mamba fine-tuning, while achieving up to 81.5% of the ICL performance improvements (over zero-shot) of comparably fine-tuned Transformers.", "keywords": "Mamba;state-space models;in-context learning;large language models;LLMs;SSMs", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "John Timothy Halloran;Manbir S Gulati;Paul F Roysdon", "authorids": "~John_Timothy_Halloran1;~Manbir_S_Gulati1;~Paul_F_Roysdon1", "gender": ";M;", "homepage": ";;https://github.com/pfroysdon/publications/tree/main", "dblp": ";324/7962;", "google_scholar": ";;https://scholar.google.com/citations?hl=en", "orcid": ";;", "linkedin": ";;", "or_profile": "~John_Timothy_Halloran1;~Manbir_S_Gulati1;~Paul_F_Roysdon1", "aff": ";Leidos Inc;Leidos", "aff_domain": ";leidos.com;leidos.com", "position": ";Researcher;Principal Researcher", "bibtex": "@misc{\nanonymous2024mamba,\ntitle={Mamba State-Space Models Can Be Strong Downstream Learners},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=C3t6GMPnC5}\n}", "github": "", "project": "", "reviewers": "32SR;TRbW;Uruo", "site": "https://openreview.net/forum?id=C3t6GMPnC5", "pdf_size": 549237, "rating": "4;6;7", "confidence": "3;2;4", "soundness": "3;4;2", "novelty": "2;3;3", "presentation": "3;3;3", "wc_summary": "86;31;89", "wc_strengths": "31;35;79", "wc_weaknesses": "234;31;188", "wc_questions": "55;2;3", "wc_limitations": "9;21;26", "wc_review": "415;120;385", "wc_reply_reviewers": "0;14;177", "wc_reply_authors": "126;70;495", "reply_reviewers": "0;1;2", "reply_authors": "3;2;4", "rating_avg": [ 5.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 68.66666666666667, 26.662499674428293 ], "wc_strengths_avg": [ 48.333333333333336, 21.746008573733455 ], "wc_weaknesses_avg": [ 151.0, 86.90607957252857 ], "wc_questions_avg": [ 20.0, 24.752104287649296 ], "wc_limitations_avg": [ 18.666666666666668, 7.1336448530109 ], "wc_review_avg": [ 306.6666666666667, 132.56025883432116 ], "wc_reply_reviewers_avg": [ 63.666666666666664, 80.34232314844331 ], "wc_reply_authors_avg": [ 230.33333333333334, 188.53882594545053 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 3.0, 0.816496580927726 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3273268353539886, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7651224840802071788&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Leidos Inc;Leidos", "aff_unique_dep": ";", "aff_unique_url": "https://www.leidos.com;https://www.leidos.com", "aff_unique_abbr": "Leidos;Leidos", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Diffusion Spectral Representation for Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96164", "id": "C3tEX45hJX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=C3tEX45hJX", "openreview": "https://openreview.net/forum?id=C3tEX45hJX", "poster": "/media/PosterPDFs/NeurIPS%202024/96164.png?t=1730500339.6444404", "project": "", "author_site": "Dmitry Shribak, Chen-Xiao Gao, Yitong Li, Chenjun Xiao, Bo Dai", "tldr": "", "abstract": "Diffusion-based models have achieved notable empirical successes in reinforcement learning (RL) due to their expressiveness in modeling complex distributions. Despite existing methods being promising, the key challenge of extending existing methods for broader real-world applications lies in the computational cost at inference time, i.e., sampling from a diffusion model is considerably slow as it often requires tens to hundreds of iterations to generate even one sample. To circumvent this issue, we propose to leverage the flexibility of diffusion models for RL from a representation learning perspective. In particular, by exploiting the connection between diffusion models and energy-based models, we develop Diffusion Spectral Representation (Diff-SR), a coherent algorithm framework that enables extracting sufficient representations for value functions in Markov decision processes (MDP) and partially observable Markov decision processes (POMDP). We further demonstrate how Diff-SR facilitates efficient policy optimization and practical algorithms while explicitly bypassing the difficulty and inference cost of sampling from the diffusion model. Finally, we provide comprehensive empirical studies to verify the benefits of Diff-SR in delivering robust and advantageous performance across various benchmarks with both fully and partially observable settings.", "keywords": "Diffusion Models;Reinforcement Learning;Representation Learning", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Dmitry Shribak;Chen-Xiao Gao;Yitong Li;Chenjun Xiao;Bo Dai", "authorids": "~Dmitry_Shribak1;~Chen-Xiao_Gao1;~Yitong_Li3;~Chenjun_Xiao1;~Bo_Dai1", "gender": "M;;;;", "homepage": ";;;https://chenjun-x.github.io/;https://bo-dai.github.io/", "dblp": ";;;178/8641;64/2903", "google_scholar": "https://scholar.google.com/citations?hl=en;;;;TIKl_foAAAAJ", "orcid": ";;0009-0003-5153-3202;0000-0002-5493-1500;0009-0002-8070-574X", "linkedin": ";;yitong-li-25122818b;;", "or_profile": "~Dmitry_Shribak1;~Chen-Xiao_Gao1;~Yitong_Li3;~Chenjun_Xiao1;~Bo_Dai1", "aff": "Georgia Institute of Technology;;Georgia Institute of Technology;Huawei Technologies Ltd.;Google Brain", "aff_domain": "gatech.edu;;gatech.edu;huawei.com;google.com", "position": "PhD student;;MS student;Researcher;Research Scientist", "bibtex": "@inproceedings{\nshribak2024diffusion,\ntitle={Diffusion Spectral Representation for Reinforcement Learning},\nauthor={Dmitry Shribak and Chen-Xiao Gao and Yitong Li and Chenjun Xiao and Bo Dai},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=C3tEX45hJX}\n}", "github": "", "reviewers": "iUqH;sdAM;2EpY;TNcM", "pdf_size": 829741, "rating": "6;6;6;7", "confidence": "3;3;3;2", "soundness": "3;2;3;3", "novelty": "3;3;3;3", "presentation": "3;2;3;3", "wc_summary": "87;329;31;50", "wc_strengths": "73;206;78;33", "wc_weaknesses": "108;495;92;37", "wc_questions": "43;99;171;41", "wc_limitations": "1;68;5;1", "wc_review": "312;1197;377;162", "wc_reply_reviewers": "11;617;44;0", "wc_reply_authors": "0;373;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;2;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 124.25, 119.91533471579021 ], "wc_strengths_avg": [ 97.5, 65.02499519415592 ], "wc_weaknesses_avg": [ 183.0, 182.04807057477979 ], "wc_questions_avg": [ 88.5, 53.016506863428866 ], "wc_limitations_avg": [ 18.75, 28.481353549296074 ], "wc_review_avg": [ 512.0, 403.09738277493193 ], "wc_reply_reviewers_avg": [ 168.0, 259.73544232545544 ], "wc_reply_authors_avg": [ 93.25, 161.5137378057978 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9116356016483441751&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "gatech.edu;;gatech.edu;huawei.com;google.com", "author_num": 5, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Georgia Institute of Technology;Huawei;Google", "aff_unique_dep": ";Huawei Technologies;Google Brain", "aff_unique_url": "https://www.gatech.edu;https://www.huawei.com;https://brain.google.com", "aff_unique_abbr": "Georgia Tech;Huawei;Google Brain", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;China" }, { "title": "Flipped Classroom: Aligning Teacher Attention with Student in Generalized Category Discovery", "status": "Oral", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96163", "id": "C4NbtYnyQg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=C4NbtYnyQg", "openreview": "https://openreview.net/forum?id=C4NbtYnyQg", "poster": "/media/PosterPDFs/NeurIPS%202024/96163.png?t=1733732402.3360467", "project": "", "author_site": "Haonan Lin, Wenbin An, Jiahao Wang, Yan Chen, Feng Tian, Mengmeng Wang, QianYing Wang, Guang Dai, Jingdong Wang", "tldr": "", "abstract": "Recent advancements have shown promise in applying traditional Semi-Supervised Learning strategies to the task of Generalized Category Discovery (GCD). Typically, this involves a teacher-student framework in which the teacher imparts knowledge to the student to classify categories, even in the absence of explicit labels. Nevertheless, GCD presents unique challenges, particularly the absence of priors for new classes, which can lead to the teacher's misguidance and unsynchronized learning with the student, culminating in suboptimal outcomes. In our work, we delve into why traditional teacher-student designs falter in generalized category discovery as compared to their success in closed-world semi-supervised learning. We identify inconsistent pattern learning as the crux of this issue and introduce FlipClass\u2014a method that dynamically updates the teacher to align with the student's attention, instead of maintaining a static teacher reference. Our teacher-attention-update strategy refines the teacher's focus based on student feedback, promoting consistent pattern recognition and synchronized learning across old and new classes. Extensive experiments on a spectrum of benchmarks affirm that FlipClass significantly surpasses contemporary GCD methods, establishing new standards for the field.", "keywords": "generalized category discovery;semi-supervised learning;open world learning", "primary_area": "machine_vision", "supplementary_material": "/attachment/5c65a95858bdcbf45bb02f517d1bf71ee62e7469.zip", "author": "Haonan Lin;Wenbin An;Jiahao Wang;Yan Chen;Feng Tian;Mengmeng Wang;QianYing Wang;Guang Dai;Jingdong Wang", "authorids": "~Haonan_Lin1;~Wenbin_An1;~Jiahao_Wang14;~Yan_Chen16;~Feng_Tian4;~Mengmeng_Wang1;~QianYing_Wang1;~Guang_Dai1;~Jingdong_Wang1", "gender": "M;M;;;;F;F;M;M", "homepage": ";;;;;https://sallymmx.github.io/;https://research.lenovo.com/webapp/view/home.html;;https://jingdongwang2017.github.io/", "dblp": "269/0292;331/2394;;;;;86/11012;;49/3441", "google_scholar": "GBnV3HIAAAAJ;https://scholar.google.com.hk/citations?user=BpkQZGgAAAAJ;;;;VSRnUiUAAAAJ;gXgWhfEAAAAJ;;z5SPCmgAAAAJ", "orcid": ";;;0000-0003-4838-3779;;;;0000-0002-3529-9087;0000-0002-4888-4445", "linkedin": "haonan-lin-035276207/;;;;;;qianying-jane-wang-0255231/;;", "or_profile": "~Haonan_Lin1;~Wenbin_An1;~Jiahao_Wang14;~Yan_Chen16;~Feng_Tian4;~Mengmeng_Wang1;~QianYing_Wang1;~Guang_Dai1;~Jingdong_Wang1", "aff": "Xi'an Jiaotong University;Xi'an Jiaotong University;;Xi'an Jiaotong University;;Zhejiang University;lenovo group;SGIT AI;Baidu", "aff_domain": "xjtu.edu.cn;xjtu.edu.cn;;xjtu.edu.cn;;zju.edu.cn;lenovo.com;sgcc.com.cn;baidu.com", "position": "MS student;PhD student;;Associate Professor;;PhD student;Principal Researcher;Principal Researcher;Chief Scientist for Computer Vision", "bibtex": "@inproceedings{\nlin2024flipped,\ntitle={Flipped Classroom: Aligning Teacher Attention with Student in Generalized Category Discovery},\nauthor={Haonan Lin and Wenbin An and Jiahao Wang and Yan Chen and Feng Tian and Mengmeng Wang and QianYing Wang and Guang Dai and Jingdong Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=C4NbtYnyQg}\n}", "github": "", "reviewers": "Btrz;MRfK;P1mR;MqmB", "pdf_size": 8482557, "rating": "6;6;7;7", "confidence": "3;4;4;3", "soundness": "3;3;3;3", "novelty": "3;3;4;3", "presentation": "3;4;2;4", "wc_summary": "107;131;73;67", "wc_strengths": "56;54;44;116", "wc_weaknesses": "65;59;95;73", "wc_questions": "3;81;91;68", "wc_limitations": "5;11;1;14", "wc_review": "236;336;304;338", "wc_reply_reviewers": "0;46;33;0", "wc_reply_authors": "68;163;57;73", "reply_reviewers": "0;1;2;0", "reply_authors": "2;3;2;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 94.5, 26.014419078657127 ], "wc_strengths_avg": [ 67.5, 28.368115905008565 ], "wc_weaknesses_avg": [ 73.0, 13.638181696985855 ], "wc_questions_avg": [ 60.75, 34.3247359785913 ], "wc_limitations_avg": [ 7.75, 5.0682837331783235 ], "wc_review_avg": [ 303.5, 41.24015033920221 ], "wc_reply_reviewers_avg": [ 19.75, 20.27775875189366 ], "wc_reply_authors_avg": [ 90.25, 42.39914503855001 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11742443343063439680&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "xjtu.edu.cn;xjtu.edu.cn;;xjtu.edu.cn;;zju.edu.cn;lenovo.com;sgcc.com.cn;baidu.com", "author_num": 9, "aff_unique_index": "0;0;0;1;2;3;4", "aff_unique_norm": "Xi'an Jiao Tong University;Zhejiang University;Lenovo Group;SGIT AI;Baidu", "aff_unique_dep": ";;;;Baidu, Inc.", "aff_unique_url": "https://www.xjtu.edu.cn;https://www.zju.edu.cn;https://www.lenovo.com;;https://www.baidu.com", "aff_unique_abbr": "XJTU;ZJU;Lenovo;;Baidu", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China;" }, { "title": "Reshuffling Resampling Splits Can Improve Generalization of Hyperparameter Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96162", "id": "C4SInFLvuB", "proceeding": "", "pdf": "https://openreview.net/pdf?id=C4SInFLvuB", "openreview": "https://openreview.net/forum?id=C4SInFLvuB", "poster": "", "project": "", "author_site": "Thomas Nagler, Lennart Schneider, Bernd Bischl, Matthias Feurer", "tldr": "", "abstract": "Hyperparameter optimization is crucial for obtaining peak performance of machine learning models. The standard protocol evaluates various hyperparameter configurations using a resampling estimate of the generalization error to guide optimization and select a final hyperparameter configuration. Without much evidence, paired resampling splits, i.e., either a fixed train-validation split or a fixed cross-validation scheme, are often recommended. We show that, surprisingly, reshuffling the splits for every configuration often improves the final model's generalization performance on unseen data. Our theoretical analysis explains how reshuffling affects the asymptotic behavior of the validation loss surface and provides a bound on the expected regret in the limiting regime. This bound connects the potential benefits of reshuffling to the signal and noise characteristics of the underlying optimization problem. We confirm our theoretical results in a controlled simulation study and demonstrate the practical usefulness of reshuffling in a large-scale, realistic hyperparameter optimization experiment. While reshuffling leads to test performances that are competitive with using fixed splits, it drastically improves results for a single train-validation holdout protocol and can often make holdout become competitive with standard CV while being computationally cheaper.", "keywords": "Hyperparameter Optimization;Generalization Performance;Cross-Validation;Resampling;Validation Splits;Model Selection;Automated Machine Learning", "primary_area": "other", "supplementary_material": "", "author": "Thomas Nagler;Lennart Schneider;Bernd Bischl;Matthias Feurer", "authorids": "~Thomas_Nagler1;~Lennart_Schneider1;~Bernd_Bischl1;~Matthias_Feurer2", "gender": "M;;M;", "homepage": "http://www.tnagler.com;;https://www.slds.stat.uni-muenchen.de/;", "dblp": "22/8947;;48/5326;", "google_scholar": "tZR1rZoAAAAJ;;https://scholar.google.de/citations?user=s34UckkAAAAJ;", "orcid": "0000-0003-1855-0046;;0000-0001-6002-6980;", "linkedin": ";;;", "or_profile": "~Thomas_Nagler1;~Lennart_Schneider1;~Bernd_Bischl1;~Matthias_Feurer2", "aff": "Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;;LMU;", "aff_domain": "lmu.de;;uni-muenchen.de;", "position": "Associate Professor;;Full Professor;", "bibtex": "@inproceedings{\nnagler2024reshuffling,\ntitle={Reshuffling Resampling Splits Can Improve Generalization of Hyperparameter Optimization},\nauthor={Thomas Nagler and Lennart Schneider and Bernd Bischl and Matthias Feurer},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=C4SInFLvuB}\n}", "github": "", "reviewers": "sYf9;DL64;QU3x;8FSs;rcFG", "pdf_size": 7569127, "rating": "4;4;6;6;7", "confidence": "3;4;4;3;3", "soundness": "2;2;3;3;3", "novelty": "2;2;3;3;3", "presentation": "2;3;3;3;4", "wc_summary": "121;70;125;53;57", "wc_strengths": "78;97;101;347;86", "wc_weaknesses": "103;258;23;4;45", "wc_questions": "93;8;35;141;27", "wc_limitations": "2;11;33;9;5", "wc_review": "397;444;317;554;220", "wc_reply_reviewers": "0;0;12;224;0", "wc_reply_authors": "0;0;0;141;0", "reply_reviewers": "0;0;1;2;0", "reply_authors": "1;1;1;2;1", "rating_avg": [ 5.4, 1.2 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 85.2, 31.396815125104645 ], "wc_strengths_avg": [ 141.8, 102.92016323345003 ], "wc_weaknesses_avg": [ 86.6, 91.91865969431888 ], "wc_questions_avg": [ 60.8, 49.121889214483595 ], "wc_limitations_avg": [ 12.0, 10.954451150103322 ], "wc_review_avg": [ 386.4, 113.15935666130308 ], "wc_reply_reviewers_avg": [ 47.2, 88.52208763918755 ], "wc_reply_authors_avg": [ 28.2, 56.4 ], "reply_reviewers_avg": [ 0.6, 0.7999999999999999 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.2721655269759087, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1146740691481151823&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "lmu.de;;uni-muenchen.de;", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;Ludwig Maximilian University of Munich", "aff_unique_dep": ";", "aff_unique_url": "https://www.lmu.de;https://www.lmu.de", "aff_unique_abbr": "LMU;LMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "title": "Stabilizing Zero-Shot Prediction: A Novel Antidote to Forgetting in Continual Vision-Language Tasks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96161", "id": "C4zmR2kyP8", "proceeding": "", "pdf": "https://openreview.net/pdf?id=C4zmR2kyP8", "openreview": "https://openreview.net/forum?id=C4zmR2kyP8", "poster": "/media/PosterPDFs/NeurIPS%202024/96161.png?t=1731380302.3666134", "project": "", "author_site": "Zijian Gao, Xingxing Zhang, Kele Xu, Xinjun Mao, Huaimin Wang", "tldr": "", "abstract": "Continual learning (CL) empowers pre-trained vision-language (VL) models to efficiently adapt to a sequence of downstream tasks. However, these models often encounter challenges in retaining previously acquired skills due to parameter shifts and limited access to historical data. In response, recent efforts focus on devising specific frameworks and various replay strategies, striving for a typical learning-forgetting trade-off. Surprisingly, both our empirical research and theoretical analysis demonstrate that the stability of the model in consecutive zero-shot predictions serves as a reliable indicator of its anti-forgetting capabilities for previously learned tasks. \nMotivated by these insights, we develop a novel replay-free CL method named ZAF (Zero-shot Antidote to Forgetting), which preserves acquired knowledge through a zero-shot stability regularization applied to wild data in a plug-and-play manner. To enhance efficiency in adapting to new tasks and seamlessly access historical models, we introduce a parameter-efficient EMA-LoRA neural architecture based on the Exponential Moving Average (EMA). ZAF utilizes new data for low-rank adaptation (LoRA), complemented by a zero-shot antidote on wild data, effectively decoupling learning from forgetting. Our extensive experiments demonstrate ZAF's superior performance and robustness in pre-trained models across various continual VL concept learning tasks, achieving leads of up to 3.70\\%, 4.82\\%, and 4.38\\%, along with at least a 10x acceleration in training speed on three benchmarks, respectively. Additionally, our zero-shot antidote significantly reduces forgetting in existing models by at least 6.37\\%. Our code is available at https://github.com/Zi-Jian-Gao/Stabilizing-Zero-Shot-Prediction-ZAF.", "keywords": "Vision-language Learning; Continual Learning;", "primary_area": "online_learning", "supplementary_material": "/attachment/7436fbeca7264290b95fcf78872259bda81dd99f.zip", "author": "Zijian Gao;Xingxing Zhang;Kele Xu;Xinjun Mao;Huaimin Wang", "authorids": "~Zijian_Gao3;~Xingxing_Zhang3;~Kele_Xu2;~Xinjun_Mao1;~Huaimin_Wang1", "gender": "M;F;;M;M", "homepage": ";https://indussky8.github.io/;;http://www.nudt.edu.cn/xjmao;", "dblp": "250/3021;;;74/4827;02/661", "google_scholar": ";https://scholar.google.com.hk/citations?user=RKjiLyAAAAAJ;;;", "orcid": "0000-0001-5151-3381;0000-0002-2909-1589;;;", "linkedin": ";;;;", "or_profile": "~Zijian_Gao3;~Xingxing_Zhang3;~Kele_Xu2;~Xinjun_Mao1;~Huaimin_Wang1", "aff": "National University of Defense Technology;Tsinghua University;;National University of Defense Technology;National University of Defense Technology", "aff_domain": "nudt.edu.cn;mail.tsinghua.edu.cn;;nudt.edu.cn;nudt.edu.cn", "position": "PhD student;Researcher;;Full Professor;Full Professor", "bibtex": "@inproceedings{\ngao2024stabilizing,\ntitle={Stabilizing Zero-Shot Prediction: A Novel Antidote to Forgetting in Continual Vision-Language Tasks},\nauthor={Zijian Gao and Xingxing Zhang and Kele Xu and Xinjun Mao and Huaimin Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=C4zmR2kyP8}\n}", "github": "", "reviewers": "DvN8;CbcE;wYS7", "pdf_size": 7533192, "rating": "5;5;6", "confidence": "5;4;4", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "2;3;4", "wc_summary": "90;81;55", "wc_strengths": "36;94;52", "wc_weaknesses": "426;251;63", "wc_questions": "80;3;9", "wc_limitations": "8;8;19", "wc_review": "640;437;198", "wc_reply_reviewers": "504;298;15", "wc_reply_authors": "669;767;20", "reply_reviewers": "2;1;1", "reply_authors": "3;3;2", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 75.33333333333333, 14.83988619303471 ], "wc_strengths_avg": [ 60.666666666666664, 24.458581770458856 ], "wc_weaknesses_avg": [ 246.66666666666666, 148.22580372151432 ], "wc_questions_avg": [ 30.666666666666668, 34.96982826507572 ], "wc_limitations_avg": [ 11.666666666666666, 5.185449728701348 ], "wc_review_avg": [ 425.0, 180.6451401689696 ], "wc_reply_reviewers_avg": [ 272.3333333333333, 200.45670078320876 ], "wc_reply_authors_avg": [ 485.3333333333333, 331.4637570266502 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8368654493257983000&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "email": "nudt.edu.cn;mail.tsinghua.edu.cn;;nudt.edu.cn;nudt.edu.cn", "author_num": 5, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "National University of Defense Technology;Tsinghua University", "aff_unique_dep": ";", "aff_unique_url": "http://www.nudt.edu.cn/;https://www.tsinghua.edu.cn", "aff_unique_abbr": "NUDT;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Multistep Distillation of Diffusion Models via Moment Matching", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96160", "id": "C62d2nS3KO", "proceeding": "", "pdf": "https://openreview.net/pdf?id=C62d2nS3KO", "openreview": "https://openreview.net/forum?id=C62d2nS3KO", "poster": "", "project": "", "author_site": "Tim Salimans, Thomas Mensink, Jonathan Heek, Emiel Hoogeboom", "tldr": "", "abstract": "We present a new method for making diffusion models faster to sample. The method distills many-step diffusion models into few-step models by matching conditional expectations of the clean data given noisy data along the sampling trajectory. Our approach extends recently proposed one-step methods to the multi-step case, and provides a new perspective by interpreting these approaches in terms of moment matching. By using up to 8 sampling steps, we obtain distilled models that outperform not only their one-step versions but also their original many-step teacher models, obtaining new state-of-the-art results on the Imagenet dataset. We also show promising results on a large text-to-image model where we achieve fast generation of high resolution images directly in image space, without needing autoencoders or upsamplers.", "keywords": "generative modeling;diffusion;distillation", "primary_area": "generative_models", "supplementary_material": "", "author": "Tim Salimans;Thomas Mensink;Jonathan Heek;Emiel Hoogeboom", "authorids": "~Tim_Salimans1;~Thomas_Mensink1;~Jonathan_Heek1;~Emiel_Hoogeboom1", "gender": "M;M;;", "homepage": ";http://www.mensink.nu;;", "dblp": "116/2791;95/2677;247/1004;217/1488", "google_scholar": ";https://scholar.google.nl/citations?user=ADII6_IAAAAJ;;https://scholar.google.nl/citations?user=nkTd_BIAAAAJ", "orcid": ";0000-0002-5730-713X;;", "linkedin": ";;;", "or_profile": "~Tim_Salimans1;~Thomas_Mensink1;~Jonathan_Heek1;~Emiel_Hoogeboom1", "aff": "Google;Google Research;Google;Google", "aff_domain": "google.com;google.com;google.com;google.com", "position": "Research Scientist;Research Scientist;Software Engineer;Researcher", "bibtex": "@inproceedings{\nsalimans2024multistep,\ntitle={Multistep Distillation of Diffusion Models via Moment Matching},\nauthor={Tim Salimans and Thomas Mensink and Jonathan Heek and Emiel Hoogeboom},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=C62d2nS3KO}\n}", "github": "", "reviewers": "V8xX;cU8D;Xksr;14eo;5zBL", "pdf_size": 7792610, "rating": "6;6;6;6;7", "confidence": "3;5;3;3;3", "soundness": "3;2;3;3;3", "novelty": "3;3;4;2;3", "presentation": "3;3;2;3;3", "wc_summary": "67;118;31;95;36", "wc_strengths": "79;75;42;81;55", "wc_weaknesses": "109;274;71;216;43", "wc_questions": "60;11;138;3;75", "wc_limitations": "17;1;16;1;20", "wc_review": "332;479;298;396;229", "wc_reply_reviewers": "13;13;43;56;27", "wc_reply_authors": "0;0;9;22;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;2;2;1", "rating_avg": [ 6.2, 0.39999999999999997 ], "confidence_avg": [ 3.4, 0.8000000000000002 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 69.4, 33.505820389896435 ], "wc_strengths_avg": [ 66.4, 15.304901175767192 ], "wc_weaknesses_avg": [ 142.6, 88.12400354046564 ], "wc_questions_avg": [ 57.4, 48.836871316659916 ], "wc_limitations_avg": [ 11.0, 8.270429251254134 ], "wc_review_avg": [ 346.8, 85.2933760616849 ], "wc_reply_reviewers_avg": [ 30.4, 16.918628786045282 ], "wc_reply_authors_avg": [ 6.2, 8.634813257969162 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.24999999999999992, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3200185100056485641&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "google.com;google.com;google.com;google.com", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "An In-depth Investigation of Sparse Rate Reduction in Transformer-like Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96159", "id": "CAC74VuMWX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=CAC74VuMWX", "openreview": "https://openreview.net/forum?id=CAC74VuMWX", "poster": "/media/PosterPDFs/NeurIPS%202024/96159.png?t=1731756025.3486686", "project": "", "author_site": "Yunzhe Hu, Difan Zou, Dong Xu", "tldr": "", "abstract": "Deep neural networks have long been criticized for being black-box. To unveil the inner workings of modern neural architectures, a recent work proposed an information-theoretic objective function called Sparse Rate Reduction (SRR) and interpreted its unrolled optimization as a Transformer-like model called Coding Rate Reduction Transformer (CRATE). However, the focus of the study was primarily on the basic implementation, and whether this objective is optimized in practice and its causal relationship to generalization remain elusive. Going beyond this study, we derive different implementations by analyzing layer-wise behaviors of CRATE, both theoretically and empirically. To reveal the predictive power of SRR on generalization, we collect a set of model variants induced by varied implementations and hyperparameters and evaluate SRR as a complexity measure based on its correlation with generalization. Surprisingly, we find out that SRR has a positive correlation coefficient and outperforms other baseline measures, such as path-norm and sharpness-based ones. Furthermore, we show that generalization can be improved using SRR as regularization on benchmark image classification datasets. We hope this paper can shed light on leveraging SRR to design principled models and study their generalization ability.", "keywords": "Sparse Rate Reduction;Transformer;Complexity Measure;Generalization", "primary_area": "evaluation", "supplementary_material": "", "author": "Yunzhe Hu;Difan Zou;Dong Xu", "authorids": "~Yunzhe_Hu1;~Difan_Zou1;~Dong_Xu2", "gender": "M;M;Unspecified", "homepage": ";https://difanzou.github.io/;https://www.cs.hku.hk/people/academic-staff/dongxu", "dblp": "301/9545;161/8923;09/3493-1", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;Cp4fcTQAAAAJ;7Hdu5k4AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Yunzhe_Hu1;~Difan_Zou1;~Dong_Xu2", "aff": "University of Hong Kong;University of Hong Kong;University of Hong Kong", "aff_domain": "hku.hk;hku.hk;hku.hk", "position": "PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nhu2024an,\ntitle={An In-depth Investigation of Sparse Rate Reduction in Transformer-like Models},\nauthor={Yunzhe Hu and Difan Zou and Dong Xu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=CAC74VuMWX}\n}", "github": "", "reviewers": "uGk9;SqrQ;7qne;RBPW", "pdf_size": 614940, "rating": "5;6;7;7", "confidence": "3;4;4;3", "soundness": "2;3;3;2", "novelty": "2;3;2;3", "presentation": "3;3;3;4", "wc_summary": "197;74;67;153", "wc_strengths": "88;104;59;88", "wc_weaknesses": "518;441;60;300", "wc_questions": "26;2;1;168", "wc_limitations": "21;1;1;33", "wc_review": "850;622;188;742", "wc_reply_reviewers": "100;249;33;80", "wc_reply_authors": "175;109;0;27", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;1;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 122.75, 54.57277251523877 ], "wc_strengths_avg": [ 84.75, 16.23845743905498 ], "wc_weaknesses_avg": [ 329.75, 174.25896677072316 ], "wc_questions_avg": [ 49.25, 69.28699372898207 ], "wc_limitations_avg": [ 14.0, 13.674794331177344 ], "wc_review_avg": [ 600.5, 251.44134504890002 ], "wc_reply_reviewers_avg": [ 115.5, 80.82233602167163 ], "wc_reply_authors_avg": [ 77.75, 69.01947189018473 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Bn7eR_I2o-EJ:scholar.google.com/&scioq=An+In-depth+Investigation+of+Sparse+Rate+Reduction+in+Transformer-like+Models&hl=en&as_sdt=0,44", "gs_version_total": 4, "email": "hku.hk;hku.hk;hku.hk", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Hong Kong", "aff_unique_dep": "", "aff_unique_url": "https://www.hku.hk", "aff_unique_abbr": "HKU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Improving Linear System Solvers for Hyperparameter Optimisation in Iterative Gaussian Processes", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96158", "id": "CAdBTYBlOv", "proceeding": "", "pdf": "https://openreview.net/pdf?id=CAdBTYBlOv", "openreview": "https://openreview.net/forum?id=CAdBTYBlOv", "poster": "", "project": "", "author_site": "Jihao Andreas Lin, Shreyas Padhy, Bruno Mlodozeniec, Javier Antor\u00e1n, Jos\u00e9 Miguel Hern\u00e1ndez-Lobato", "tldr": "", "abstract": "Scaling hyperparameter optimisation to very large datasets remains an open problem in the Gaussian process community. This paper focuses on iterative methods, which use linear system solvers, like conjugate gradients, alternating projections or stochastic gradient descent, to construct an estimate of the marginal likelihood gradient. We discuss three key improvements which are applicable across solvers: (i) a pathwise gradient estimator, which reduces the required number of solver iterations and amortises the computational cost of making predictions, (ii) warm starting linear system solvers with the solution from the previous step, which leads to faster solver convergence at the cost of negligible bias, (iii) early stopping linear system solvers after a limited computational budget, which synergises with warm starting, allowing solver progress to accumulate over multiple marginal likelihood steps. These techniques provide speed-ups of up to $72\\times$ when solving to tolerance, and decrease the average residual norm by up to $7\\times$ when stopping early.", "keywords": "Gaussian process;marginal likelihood;iterative linear system solver;pathwise;warm start;conjugate gradient;alternating projection;stochastic gradient descent", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Jihao Andreas Lin;Shreyas Padhy;Bruno Kacper Mlodozeniec;Javier Antoran;Jos\u00e9 Miguel Hern\u00e1ndez-Lobato", "authorids": "~Jihao_Andreas_Lin1;~Shreyas_Padhy1;~Bruno_Kacper_Mlodozeniec2;~Javier_Antoran1;~Jos\u00e9_Miguel_Hern\u00e1ndez-Lobato1", "gender": "M;M;Not Specified;Unspecified;", "homepage": "https://jandylin.github.io;http://shreyaspadhy.github.io;https://brunokm.github.io;https://javierantoran.github.io/about/;http://jmhl.org", "dblp": "279/2864;267/9851;241/6874;234/8818.html;40/6058", "google_scholar": "Bn1GyeEAAAAJ;JxbV2R0AAAAJ;kGPBRy8AAAAJ;_b-Cs2cAAAAJ;BEBccCQAAAAJ", "orcid": ";;;0000-0003-2877-2689;0000-0001-7610-949X", "linkedin": "jihao-andreas-lin/;;bkmlodozeniec/;javier-antoran/;", "or_profile": "~Jihao_Andreas_Lin1;~Shreyas_Padhy1;~Bruno_Kacper_Mlodozeniec2;~Javier_Antoran1;~Jose_Miguel_Hernandez_Lobato1", "aff": "University of Cambridge;University of Cambridge;University of Cambridge;University of Cambridge;University of Cambridge", "aff_domain": "cam.ac.uk;cam.ac.uk;cam.ac.uk;cam.ac.uk;cam.ac.uk", "position": "PhD student;PhD student;PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nlin2024improving,\ntitle={Improving Linear System Solvers for Hyperparameter Optimisation in Iterative Gaussian Processes},\nauthor={Jihao Andreas Lin and Shreyas Padhy and Bruno Kacper Mlodozeniec and Javier Antoran and Jos{\\'e} Miguel Hern{\\'a}ndez-Lobato},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=CAdBTYBlOv}\n}", "github": "", "reviewers": "NcMH;89S9;upE1;Xgts", "pdf_size": 4090649, "rating": "3;5;6;7", "confidence": "4;2;4;4", "soundness": "3;2;3;3", "novelty": "2;2;3;2", "presentation": "3;2;4;3", "wc_summary": "61;120;119;77", "wc_strengths": "22;36;78;80", "wc_weaknesses": "152;232;277;86", "wc_questions": "25;18;44;99", "wc_limitations": "13;1;1;11", "wc_review": "273;407;519;353", "wc_reply_reviewers": "575;31;97;89", "wc_reply_authors": "1704;90;30;415", "reply_reviewers": "2;1;1;1", "reply_authors": "6;3;2;2", "rating_avg": [ 5.25, 1.479019945774904 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 94.25, 25.878321042911576 ], "wc_strengths_avg": [ 54.0, 25.495097567963924 ], "wc_weaknesses_avg": [ 186.75, 73.40086852347184 ], "wc_questions_avg": [ 46.5, 31.768695283250146 ], "wc_limitations_avg": [ 6.5, 5.545268253204709 ], "wc_review_avg": [ 388.0, 89.40357934669059 ], "wc_reply_reviewers_avg": [ 198.0, 219.14607000811125 ], "wc_reply_authors_avg": [ 559.75, 676.6758363500207 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.25, 1.6393596310755 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.09759000729485331, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7612172009361161582&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "cam.ac.uk;cam.ac.uk;cam.ac.uk;cam.ac.uk;cam.ac.uk", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of Cambridge", "aff_unique_dep": "", "aff_unique_url": "https://www.cam.ac.uk", "aff_unique_abbr": "Cambridge", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Cambridge", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "DropEdge not Foolproof: Effective Augmentation Method for Signed Graph Neural Networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96157", "id": "CDe2zBPioj", "proceeding": "", "pdf": "https://openreview.net/pdf?id=CDe2zBPioj", "openreview": "https://openreview.net/forum?id=CDe2zBPioj", "poster": "", "project": "", "author_site": "ZEYU ZHANG, Lu Li, Shuyan Wan, Wang, Zhiyi Wang, Zhiyuan Lu, Dong Hao, Wanli Li", "tldr": "", "abstract": "Signed graphs can model friendly or antagonistic relations where edges are annotated with a positive or negative sign. The main downstream task in signed graph analysis is $\\textit{link sign prediction}$. Signed Graph Neural Networks (SGNNs) have been widely used for signed graph representation learning. While significant progress has been made in SGNNs research, two issues (i.e., graph sparsity and unbalanced triangles) persist in the current SGNN models. We aim to alleviate these issues through data augmentation ($\\textit{DA}$) techniques which have demonstrated effectiveness in improving the performance of graph neural networks. However, most graph augmentation methods are primarily aimed at graph-level and node-level tasks (e.g., graph classification and node classification) and cannot be directly applied to signed graphs due to the lack of side information (e.g., node features and label information) in available real-world signed graph datasets. Random $\\textit{DropEdge} $is one of the few $\\textit{DA}$ methods that can be directly used for signed graph data augmentation, but its effectiveness is still unknown. In this paper, we first provide the generalization bound for the SGNN model and demonstrate from both experimental and theoretical perspectives that the random $\\textit{DropEdge}$ cannot improve the performance of link sign prediction. Therefore, we propose a novel signed graph augmentation method, $\\underline{S}$igned $\\underline{G}$raph $\\underline{A}$ugmentation framework (SGA). Specifically, SGA first integrates a structure augmentation module to detect candidate edges solely based on network information. Furthermore, SGA incorporates a novel strategy to select beneficial candidates. Finally, SGA introduces a novel data augmentation perspective to enhance the training process of SGNNs. Experiment results on six real-world datasets demonstrate that SGA effectively boosts the performance of diverse SGNN models, achieving improvements of up to 32.3\\% in F1-micro for SGCN on the Slashdot dataset in the link sign prediction task.", "keywords": "Graph Neural Networks;Signed Graph Neural Networks;Graph Data Augmentation;Stability;Generalization Guarantees", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Zeyu Zhang;Lu Li;Shuyan Wan;Sijie Wang;Zhiyi Wang;Zhiyuan Lu;Dong Hao;Wanli.Li", "authorids": "~Zeyu_Zhang3;~Lu_Li9;~Shuyan_Wan1;~Sijie_Wang2;~Zhiyi_Wang1;~Zhiyuan_Lu3;~Dong_Hao1;~Wanli.Li1", "gender": ";M;M;F;M;M;M;M", "homepage": "https://orcid.org/0009-0000-6800-8373;https://github.com/Ladybirdzzz;;https://github.com/Kristen-77;https://github.com/Litchi3914;https://sites.google.com/view/hao-dong/;https://leeworry.github.io/;https://alex-zeyu.github.io/", "dblp": ";;;;;https://dblp.org/pers/hd/h/Hao:Dong;19/7699-2;44/8352-4", "google_scholar": ";;;;;aUBGG_sAAAAJ;G_h0_j8AAAAJ;https://scholar.google.co.nz/citations?user=P5aygEcAAAAJ", "orcid": ";;0009-0007-7202-6153;;;0000-0002-1476-2861;;0000-0002-2376-6151", "linkedin": ";;;;;;;alexzhangzeyu/", "or_profile": "~Lu_Li9;~Shuyan_Wan1;~Sijie_Wang2;~Zhiyi_Wang1;~Zhiyuan_Lu3;~Dong_Hao1;~Wanli.Li1;~ZEYU_ZHANG2", "aff": "Huazhong Agricultural University;University of Electronic Science and Technology of China;;Huazhong Agricultural University;Huazhong Agricultural University;University of Electronic Science and Technology of China;Huazhong Agricultural University;Huazhong Agricultural University", "aff_domain": "hzau.edu.cn;uestc.edu.cn;;hzau.edu;hzau.edu.cn;edu.cn;hzau.edu.cn;hzau.edu.cn", "position": "MS student;MS student;;Undergrad student;Undergrad student;Associate Professor;Lecturer;Associate Professor", "bibtex": "@inproceedings{\nzhang2024dropedge,\ntitle={DropEdge not Foolproof: Effective Augmentation Method for Signed Graph Neural Networks},\nauthor={Zeyu Zhang and Lu Li and Shuyan Wan and Sijie Wang and Zhiyi Wang and Zhiyuan Lu and Dong Hao and Wanli.Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=CDe2zBPioj}\n}", "github": "", "reviewers": "ShtJ;QdQs;B6jA;wfoh", "pdf_size": 5076317, "rating": "5;6;7;7", "confidence": "4;3;5;5", "soundness": "2;3;4;3", "novelty": "2;3;3;3", "presentation": "2;3;3;2", "wc_summary": "86;76;117;199", "wc_strengths": "79;29;94;98", "wc_weaknesses": "59;182;128;133", "wc_questions": "58;71;4;2", "wc_limitations": "80;9;60;2", "wc_review": "362;367;403;434", "wc_reply_reviewers": "32;32;30;29", "wc_reply_authors": "21;26;23;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 119.5, 48.32442446630896 ], "wc_strengths_avg": [ 75.0, 27.486360253769504 ], "wc_weaknesses_avg": [ 125.5, 43.8092455995307 ], "wc_questions_avg": [ 33.75, 31.09963826156182 ], "wc_limitations_avg": [ 37.75, 33.10872241570188 ], "wc_review_avg": [ 391.5, 29.19332115399 ], "wc_reply_reviewers_avg": [ 30.75, 1.299038105676658 ], "wc_reply_authors_avg": [ 17.5, 10.259142264341596 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.6363636363636364, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=559507404103264138&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "hzau.edu.cn;uestc.edu.cn;;hzau.edu;hzau.edu.cn;edu.cn;hzau.edu.cn;hzau.edu.cn", "author_num": 8, "aff_unique_index": "0;1;0;0;1;0;0", "aff_unique_norm": "Huazhong Agricultural University;University of Electronic Science and Technology of China", "aff_unique_dep": ";", "aff_unique_url": "http://www.hzau.edu.cn/;https://www.uestc.edu.cn", "aff_unique_abbr": "HAU;UESTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "CECVgSZbLW", "title": "Distributional Monte-Carlo Planning with Thompson Sampling in Stochastic Environments", "track": "main", "status": "Reject", "tldr": "", "abstract": "We focus on a class of reinforcement learning algorithms, Monte-Carlo Tree Search (MCTS), in stochastic settings. While recent advancements combining MCTS with deep learning have excelled in deterministic environments, they face challenges in highly stochastic settings, leading to suboptimal action choices and decreased performance. Distributional Reinforcement Learning (RL) addresses these challenges by extending the traditional Bellman equation to consider value distributions instead of a single mean value, showing promising results in Deep Q Learning. In this paper, we bring the concept of Distributional RL to MCTS, focusing on modeling value functions as categorical and particle distributions. Consequently, we propose two novel algorithms: Categorical Thompson Sampling for MCTS (CATS), which uses categorical distributions for Q values, and Particle Thompson Sampling for MCTS (PATS), which models Q values with particle-based distributions. Both algorithms employ Thompson Sampling to handle action selection randomness. Our contributions are threefold: We introduce a distributional framework for Monte-Carlo Planning to model uncertainty in return estimation. We prove the effectiveness of our algorithms by achieving a non-asymptotic problem-dependent upper bound on simple regret of order $O(n^{-1})$, where $n$ is the number of trajectories. We provide empirical evidence demonstrating the efficacy of our approach compared to baselines in both stochastic and deterministic environments.", "keywords": "Monte-Carlo Tree Search;Planning;Reinforcement Learning", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/1802729172faf26c210eb18aea4ebcdf4b0341ff.zip", "author": "Tuan Quang Dam;Brahim Driss;Odalric-Ambrym Maillard", "authorids": "~Tuan_Quang_Dam1;~Brahim_Driss1;~Odalric-Ambrym_Maillard3", "gender": "M;M;", "homepage": "https://tuanquangdam.com/;;http://odalricambrymmaillard.neowordpress.fr/", "dblp": "252/5881.html;;83/7401", "google_scholar": "https://scholar.google.com/citations?hl=en;;https://scholar.google.fr/citations?hl=fr", "orcid": ";;", "linkedin": ";brahimdriss/;", "or_profile": "~Tuan_Quang_Dam1;~Brahim_Driss1;~odalric-ambrym_maillard1", "aff": "Scool, INRIA;INRIA;inria", "aff_domain": "inria.fr;inria.fr;inria.fr", "position": "Postdoc;Researcher;Assistant Professor", "bibtex": "@misc{\nanonymous2024distributional,\ntitle={Distributional Monte-Carlo Planning with Thompson Sampling in Stochastic Environments},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=CECVgSZbLW}\n}", "github": "", "project": "", "reviewers": "Fztp;1ykv;Wm16;TwqS", "site": "https://openreview.net/forum?id=CECVgSZbLW", "pdf_size": 1331765, "rating": "3;3;4;5", "confidence": "4;4;4;4", "soundness": "2;2;2;3", "novelty": "1;3;2;3", "presentation": "1;1;2;2", "wc_summary": "64;65;50;85", "wc_strengths": "21;67;44;25", "wc_weaknesses": "152;335;97;90", "wc_questions": "44;1;192;17", "wc_limitations": "132;38;372;16", "wc_review": "413;506;755;233", "wc_reply_reviewers": "95;109;197;222", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 3.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 1.5, 0.5 ], "wc_summary_avg": [ 66.0, 12.469963913339926 ], "wc_strengths_avg": [ 39.25, 18.226011631731172 ], "wc_weaknesses_avg": [ 168.5, 99.08203671705583 ], "wc_questions_avg": [ 63.5, 75.76443756803056 ], "wc_limitations_avg": [ 139.5, 141.12671611002645 ], "wc_review_avg": [ 476.75, 188.25298802409486 ], "wc_reply_reviewers_avg": [ 155.75, 54.69632071721095 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:LcW3-TKVUsgJ:scholar.google.com/&scioq=Distributional+Monte-Carlo+Planning+with+Thompson+Sampling+in+Stochastic+Environments&hl=en&as_sdt=0,5", "gs_version_total": 2, "aff_unique_index": "0;0;0", "aff_unique_norm": "INRIA", "aff_unique_dep": "Scool", "aff_unique_url": "https://www.inria.fr", "aff_unique_abbr": "INRIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "France" }, { "title": "Mind's Eye of LLMs: Visualization-of-Thought Elicits Spatial Reasoning in Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96156", "id": "CEJ1mYPgWw", "proceeding": "", "pdf": "https://openreview.net/pdf?id=CEJ1mYPgWw", "openreview": "https://openreview.net/forum?id=CEJ1mYPgWw", "poster": "/media/PosterPDFs/NeurIPS%202024/96156.png?t=1733378967.049707", "project": "", "author_site": "Wenshan Wu, Shaoguang Mao, Yadong Zhang, Yan Xia, Li Dong, Lei Cui, Furu Wei", "tldr": "", "abstract": "Large language models (LLMs) have exhibited impressive performance in language comprehension and various reasoning tasks. However, their abilities in spatial reasoning, a crucial aspect of human cognition, remain relatively unexplored. Human possess a remarkable ability to create mental images of unseen objects and actions through a process known as the Mind's Eye, enabling the imagination of the unseen world. Inspired by this cognitive capacity, we propose Visualization-of-Thought (VoT) prompting. VoT aims to elicit spatial reasoning of LLMs by visualizing their reasoning traces, thereby guiding subsequent reasoning steps. We employed VoT for multi-hop spatial reasoning tasks, including natural language navigation, visual navigation, and visual tiling in 2D grid worlds. Experimental results demonstrated that VoT significantly enhances the spatial reasoning abilities of LLMs. Notably, VoT outperformed existing multimodal large language models (MLLMs) in these tasks. While VoT works surprisingly well on LLMs, the ability to generate mental images to facilitate spatial reasoning resembles the mind's eye process, suggesting its potential viability in MLLMs. Please find the dataset and codes in our [project page](https://microsoft.github.io/visualization-of-thought).", "keywords": "Large Language Model;Spatial Reasoning;LLM Reasoning", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Wenshan Wu;Shaoguang Mao;Yadong Zhang;Yan Xia;Li Dong;Lei Cui;Furu Wei", "authorids": "~Wenshan_Wu2;~Shaoguang_Mao1;~Yadong_Zhang1;~Yan_Xia7;~Li_Dong1;~Lei_Cui2;~Furu_Wei1", "gender": ";M;M;M;M;M;M", "homepage": ";https://www.linkedin.com/in/shaoguang-mao-929733120/;https://github.com/enria;https://www.microsoft.com/en-us/research/people/yanxia/;http://dong.li;https://www.microsoft.com/en-us/research/people/lecu/;https://www.microsoft.com/en-us/research/people/fuwei/", "dblp": ";214/0365;;;85/5090-4;47/5523-1.html;72/5870", "google_scholar": ";S6XnZsQAAAAJ;;;wEfQgPgAAAAJ;ajJQoUcAAAAJ;G-V1VpwAAAAJ", "orcid": ";;;;;;", "linkedin": ";;;;;;", "or_profile": "~Wenshan_Wu2;~Shaoguang_Mao1;~Yadong_Zhang1;~Yan_Xia7;~Li_Dong1;~Lei_Cui2;~Furu_Wei1", "aff": ";Microsoft;East China Normal University;Microsoft Research;Microsoft Research;Microsoft Research Asia;Microsoft Research", "aff_domain": ";microsoft.com;ecnu.edu.cn;research.microsoft.com;microsoft.com;microsoft.com;microsoft.com", "position": ";Researcher;PhD student;Researcher;Principal Researcher;Principal Researcher;Distinguished Scientist", "bibtex": "@inproceedings{\nwu2024minds,\ntitle={Mind's Eye of {LLM}s: Visualization-of-Thought Elicits Spatial Reasoning in Large Language Models},\nauthor={Wenshan Wu and Shaoguang Mao and Yadong Zhang and Yan Xia and Li Dong and Lei Cui and Furu Wei},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=CEJ1mYPgWw}\n}", "github": "", "reviewers": "yVwi;us5D;MbkM;Af54", "pdf_size": 13303177, "rating": "5;5;6;8", "confidence": "3;4;5;4", "soundness": "3;3;4;4", "novelty": "3;2;3;4", "presentation": "2;3;4;4", "wc_summary": "84;58;84;78", "wc_strengths": "57;51;87;111", "wc_weaknesses": "630;200;159;52", "wc_questions": "76;4;4;18", "wc_limitations": "7;5;4;10", "wc_review": "854;318;338;269", "wc_reply_reviewers": "35;18;0;26", "wc_reply_authors": "88;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 76.0, 10.677078252031311 ], "wc_strengths_avg": [ 76.5, 24.14021540914662 ], "wc_weaknesses_avg": [ 260.25, 220.20714679592032 ], "wc_questions_avg": [ 25.5, 29.71110903349116 ], "wc_limitations_avg": [ 6.5, 2.29128784747792 ], "wc_review_avg": [ 444.75, 237.61036909192325 ], "wc_reply_reviewers_avg": [ 19.75, 12.891373084353738 ], "wc_reply_authors_avg": [ 22.0, 38.1051177665153 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.28867513459481287, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13333300311900323302&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": ";microsoft.com;ecnu.edu.cn;research.microsoft.com;microsoft.com;microsoft.com;microsoft.com", "author_num": 7, "aff_unique_index": "0;1;0;0;0;0", "aff_unique_norm": "Microsoft;East China Normal University", "aff_unique_dep": "Microsoft Corporation;", "aff_unique_url": "https://www.microsoft.com;http://www.ecnu.edu.cn", "aff_unique_abbr": "Microsoft;ECNU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;1;0;0;1;0", "aff_country_unique": "United States;China" }, { "title": "Convergence of No-Swap-Regret Dynamics in Self-Play", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96155", "id": "CEnoUjEqNx", "proceeding": "", "pdf": "https://openreview.net/pdf?id=CEnoUjEqNx", "openreview": "https://openreview.net/forum?id=CEnoUjEqNx", "poster": "", "project": "", "author_site": "Renato Leme, Georgios Piliouras, Jon Schneider", "tldr": "", "abstract": "In this paper, we investigate the question of whether no-swap-regret dynamics have stronger convergence properties in repeated games than regular no-external-regret dynamics. We prove that in almost all symmetric zero-sum games under symmetric initializations of the agents, no-swap-regret dynamics in self-play are guaranteed to converge in a strong ``frequent-iterate'' sense to the Nash equilibrium: in all but a vanishing fraction of the rounds, the players must play a strategy profile close to a symmetric Nash equilibrium. Remarkably, relaxing any of these three constraints, i.e. by allowing either i) asymmetric initial conditions, or ii) an asymmetric game or iii) no-external regret dynamics suffices to destroy this result and lead to complex non-equilibrating or even chaotic behavior. \n\nIn a dual type of result, we show that the power of no-swap-regret dynamics comes at a cost of imposing a time-asymmetry on its inputs. While no-external-regret dynamics can be completely determined by the cumulative reward vector received by each player, we show there does not exist any general no-swap-regret dynamics defined on the same state space. In fact, we prove that any no-swap-regret learning algorithm must play a time-asymmetric function over the set of previously observed rewards, ruling out any dynamics based on a symmetric function of the current set of rewards.", "keywords": "online learning;game theory;dynamics", "primary_area": "algorithmic_game_theory", "supplementary_material": "", "author": "Renato Paes Leme;Georgios Piliouras;Jon Schneider", "authorids": "~Renato_Paes_Leme1;~Georgios_Piliouras1;~Jon_Schneider1", "gender": ";;M", "homepage": ";;https://jschnei.github.io", "dblp": "https://dblp.org/pers/hd/l/Leme:Renato_Paes;62/1236;146/0503", "google_scholar": ";;Jc97EyAAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Renato_Paes_Leme1;~Georgios_Piliouras1;~Jon_Schneider1", "aff": "Google;Singapore University of Technology and Design;Google", "aff_domain": "google.com;sutd.edu.sg;google.com", "position": "Researcher;Associate Professor;Researcher", "bibtex": "@inproceedings{\nleme2024convergence,\ntitle={Convergence of No-Swap-Regret Dynamics in Self-Play},\nauthor={Renato Paes Leme and Georgios Piliouras and Jon Schneider},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=CEnoUjEqNx}\n}", "github": "", "reviewers": "NEnG;r1mG;b4Uc;xwUS", "pdf_size": 1061980, "rating": "4;6;6;7", "confidence": "3;4;4;4", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "3;3;3;3", "wc_summary": "133;157;92;117", "wc_strengths": "41;72;161;196", "wc_weaknesses": "185;161;168;58", "wc_questions": "44;89;2;94", "wc_limitations": "2;2;7;10", "wc_review": "405;481;430;475", "wc_reply_reviewers": "99;201;18;63", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 124.75, 23.668280461410795 ], "wc_strengths_avg": [ 117.5, 63.20007911387453 ], "wc_weaknesses_avg": [ 143.0, 49.84475900232641 ], "wc_questions_avg": [ 57.25, 37.37228250990298 ], "wc_limitations_avg": [ 5.25, 3.418698582794336 ], "wc_review_avg": [ 447.75, 31.586191603293994 ], "wc_reply_reviewers_avg": [ 95.25, 67.46248957754227 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.9271726499455306, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:9o0pfJZAI84J:scholar.google.com/&scioq=Convergence+of+No-Swap-Regret+Dynamics+in+Self-Play&hl=en&as_sdt=0,47", "gs_version_total": 0, "email": "google.com;sutd.edu.sg;google.com", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Google;Singapore University of Technology and Design", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://www.sutd.edu.sg", "aff_unique_abbr": "Google;SUTD", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;Singapore" }, { "title": "Cross-Scale Self-Supervised Blind Image Deblurring via Implicit Neural Representation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96154", "id": "CFez7MFUFd", "proceeding": "", "pdf": "https://openreview.net/pdf?id=CFez7MFUFd", "openreview": "https://openreview.net/forum?id=CFez7MFUFd", "poster": "/media/PosterPDFs/NeurIPS%202024/96154.png?t=1730309527.2389495", "project": "", "author_site": "Tianjing Zhang, Yuhui Quan, Hui Ji", "tldr": "", "abstract": "Blind image deblurring (BID) is an important yet challenging image recovery problem. Most existing deep learning methods require supervised training with ground truth (GT) images. This paper introduces a self-supervised method for BID that does not require GT images. The key challenge is to regularize the training to prevent over-fitting due to the absence of GT images. By leveraging an exact relationship among the blurred image, latent image, and blur kernel across consecutive scales, we propose an effective cross-scale consistency loss. This is implemented by representing the image and kernel with implicit neural representations (INRs), whose resolution-free property enables consistent yet efficient computation for network training across multiple scales. Combined with a progressively coarse-to-fine training scheme, the proposed method significantly outperforms existing self-supervised methods in extensive experiments.", "keywords": "Blind Image Deblurring;Self-Supervised Learning;Implicit Neural Representation", "primary_area": "machine_vision", "supplementary_material": "", "author": "Tianjing Zhang;Yuhui Quan;Hui Ji", "authorids": "~Tianjing_Zhang2;~Yuhui_Quan5;~Hui_Ji1", "gender": ";;M", "homepage": ";;https://blog.nus.edu.sg/matjh/", "dblp": ";;", "google_scholar": ";;AsKY0XoAAAAJ", "orcid": ";;0000-0002-1674-6056", "linkedin": ";;", "or_profile": "~Tianjing_Zhang2;~Yuhui_Quan5;~Hui_Ji1", "aff": ";;National University of Singapore", "aff_domain": ";;nus.edu.sg", "position": ";;Full Professor", "bibtex": "@inproceedings{\nzhang2024crossscale,\ntitle={Cross-Scale Self-Supervised Blind Image Deblurring via Implicit Neural Representation},\nauthor={Tianjing Zhang and Yuhui Quan and Hui Ji},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=CFez7MFUFd}\n}", "github": "", "reviewers": "UEmh;PNcB;ihYN;QZrB", "pdf_size": 45777920, "rating": "5;5;6;6", "confidence": "4;4;4;4", "soundness": "2;2;3;3", "novelty": "2;2;2;3", "presentation": "3;3;2;2", "wc_summary": "43;81;78;67", "wc_strengths": "17;165;29;76", "wc_weaknesses": "212;155;139;102", "wc_questions": "60;388;86;36", "wc_limitations": "1;113;11;56", "wc_review": "333;902;343;337", "wc_reply_reviewers": "37;67;0;51", "wc_reply_authors": "98;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 67.25, 14.939461168328663 ], "wc_strengths_avg": [ 71.75, 58.17806717311946 ], "wc_weaknesses_avg": [ 152.0, 39.61691557908061 ], "wc_questions_avg": [ 142.5, 142.83819517201974 ], "wc_limitations_avg": [ 45.25, 44.2627100390385 ], "wc_review_avg": [ 478.75, 244.3894177332562 ], "wc_reply_reviewers_avg": [ 38.75, 24.762623043611516 ], "wc_reply_authors_avg": [ 24.5, 42.4352447854375 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Gyma32Ih8tUJ:scholar.google.com/&scioq=Cross-Scale+Self-Supervised+Blind+Image+Deblurring+via+Implicit+Neural+Representation&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": ";;nus.edu.sg", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "National University of Singapore", "aff_unique_dep": "", "aff_unique_url": "https://www.nus.edu.sg", "aff_unique_abbr": "NUS", "aff_country_unique_index": "0", "aff_country_unique": "Singapore" }, { "id": "CGZGY3X8NH", "title": "ELSA: Evaluating Localization of Social Activities in Urban Streets using Open-Vocabulary Detection", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "Existing Open Vocabulary Detection (OVD) models exhibit a number of challenges. They often struggle with semantic consistency across diverse inputs, and are often sensitive to slight variations in input phrasing, leading to inconsistent performance. The calibration of their predictive confidence, especially in complex multi-label scenarios, remains suboptimal, frequently resulting in overconfident predictions that do not accurately reflect their context understanding. The Understanding of those limitations requires multi-label detection benchmarks. Among those, one challenging domain is social activity interaction. Due to the lack of multi-label benchmarks for social interactions, in this work we present ELSA: Evaluating Localization of Social Activities. ELSA draws on theoretical frameworks in urban sociology and design and uses in-the-wild street-level imagery, where the size of social groups and the types of activities can vary significantly. ELSA includes more than 900 manually annotated images with more than 4,000 multi-labeled bounding boxes for individual and group activities. We introduce a novel re-ranking method for predictive confidence and new evaluation techniques for OVD models. We report our results on the widely-used, SOTA model Grounding DINO. Our evaluation protocol considers semantic stability and localization accuracy and sheds more light on the limitations of the existing approaches.", "keywords": "computer vision;grounded vision-language models;open-vocabulary object detection;social interaction recognition;benchmark", "primary_area": "", "supplementary_material": "/attachment/b242797aa6982fc9c4e7388935559093e8cf3cb7.zip", "author": "Maryam Hosseini;Marco Cipriano;Daniel Hodczak;Sedigheh Eslami;Liu Liu;Andres Sevtsuk;Gerard de Melo", "authorids": "~Maryam_Hosseini2;~Marco_Cipriano1;~Daniel_Hodczak1;~Sedigheh_Eslami1;~Liu_Liu17;~Andres_Sevtsuk1;~Gerard_de_Melo3", "gender": "F;M;Not Specified;F;M;M;M", "homepage": "https://www.maryamhosseini.me/;;;https://www.sarah.eslami.me;http://lyons66.github.io;http://cityform.mit.edu/;http://gerard.demelo.org/", "dblp": ";;;;74/7037-18;22/4875;86/1747", "google_scholar": "d2VXcm8AAAAJ;I9-JCwYAAAAJ;;U1BkZnsAAAAJ;D3PSmFsAAAAJ;2tjH9nEAAAAJ;https://scholar.google.com.tw/citations?user=WCQXaGkAAAAJ", "orcid": "0000-0002-4088-810X;0000-0003-2087-0831;0009-0002-4478-8752;;0000-0003-0100-9763;0000-0001-5098-9636;0000-0002-2930-2059", "linkedin": ";;;sedigheh-sarah-eslami-2779a552/;liu-liu-mit/;;gdemelo/", "or_profile": "~Maryam_Hosseini2;~Marco_Cipriano1;~Daniel_Hodczak1;~Sedigheh_Eslami1;~Liu_Liu17;~Andres_Sevtsuk1;~Gerard_Melo1", "aff": "Massachusetts Institute of Technology;Hasso Plattner Institute;University of Illinois at Chicago;Hasso Plattner Institute;Amazon;Massachusetts Institute of Technology;University of Potsdam", "aff_domain": "mit.edu;hpi.de;uic.edu;hpi.de;amazon.com;mit.edu;uni-potsdam.de", "position": "Postdoc;PhD student;Undergrad student;PhD student;Applied Scientist Intern;Associate Professor;Full Professor", "bibtex": "@misc{\nanonymous2024elsa,\ntitle={{ELSA}: Evaluating Localization of Social Activities in Urban Streets using Open-Vocabulary Detection},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=CGZGY3X8NH}\n}", "github": "", "project": "", "reviewers": "jcjB;nrdp;Tsa4", "site": "https://openreview.net/forum?id=CGZGY3X8NH", "pdf_size": 2821658, "rating": "5;6;7", "confidence": "3;4;3", "wc_summary_and_contributions": "53;43;86", "wc_strengths": "76;9;50", "wc_improvement": "1;82;17", "wc_limitations": "180;11;11", "wc_correctness": "13;1;39", "wc_clarity": "13;1;50", "wc_relation_to_prior_work": "18;1;23", "wc_documentation": "21;1;37", "wc_additional_feedback": "1;1;1", "wc_review": "376;150;314", "wc_reply_reviewers": "0;0;53", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;1", "reply_authors": "1;1;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 60.666666666666664, 18.372685039360892 ], "wc_strengths_avg": [ 45.0, 27.58018612458347 ], "wc_improvement_avg": [ 33.333333333333336, 35.02697373295171 ], "wc_limitations_avg": [ 67.33333333333333, 79.66736401368436 ], "wc_correctness_avg": [ 17.666666666666668, 15.86050300449376 ], "wc_clarity_avg": [ 21.333333333333332, 20.8539897594894 ], "wc_relation_to_prior_work_avg": [ 14.0, 9.41629792788369 ], "wc_documentation_avg": [ 19.666666666666668, 14.72714802291635 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 280.0, 95.34498763263157 ], "wc_reply_reviewers_avg": [ 17.666666666666668, 24.984439601924677 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:EebPE-ZBz6cJ:scholar.google.com/&scioq=ELSA:+Evaluating+Localization+of+Social+Activities+in+Urban+Streets+using+Open-Vocabulary+Detection&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2;1;3;0;4", "aff_unique_norm": "Massachusetts Institute of Technology;Hasso Plattner Institute;University of Illinois at Chicago;Amazon;University of Potsdam", "aff_unique_dep": ";;;Amazon.com, Inc.;", "aff_unique_url": "https://web.mit.edu;https://www.hpi.de;https://www.uic.edu;https://www.amazon.com;https://www.uni-potsdam.de", "aff_unique_abbr": "MIT;HPI;UIC;Amazon;UP", "aff_campus_unique_index": "1", "aff_campus_unique": ";Chicago", "aff_country_unique_index": "0;1;0;1;0;0;1", "aff_country_unique": "United States;Germany" }, { "title": "Self-Supervised Adversarial Training via Diverse Augmented Queries and Self-Supervised Double Perturbation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96153", "id": "CIHdlhfrOo", "proceeding": "", "pdf": "https://openreview.net/pdf?id=CIHdlhfrOo", "openreview": "https://openreview.net/forum?id=CIHdlhfrOo", "poster": "/media/PosterPDFs/NeurIPS%202024/96153.png?t=1733077496.471682", "project": "", "author_site": "Ruize Zhang, Sheng Tang, Juan Cao", "tldr": "", "abstract": "Recently, there have been some works studying self-supervised adversarial training, a learning paradigm that learns robust features without labels. While those works have narrowed the performance gap between self-supervised adversarial training (SAT) and supervised adversarial training (supervised AT), a well-established formulation of SAT and its connections with supervised AT are under-explored. Based on a simple SAT benchmark, we find that SAT still faces the problem of large robust generalization gap and degradation on natural samples. We hypothesize this is due to the lack of data complexity and model regularization and propose a method named as DAQ-SDP (Diverse Augmented Queries Self-supervised Double Perturbation). We first challenge the previous conclusion that complex data augmentations degrade robustness in SAT by using diversely augmented samples as queries to guide adversarial training. Inspired by previous works in supervised AT, we then incorporate a self-supervised double perturbation scheme to self-supervised learning (SSL), which promotes robustness transferable to downstream classification. Our work can be seamlessly combined with models pretrained by different SSL frameworks without revising the learning objectives and helps to bridge the gap between SAT and AT. Our method also improves both robust and natural accuracies across different SSL frameworks. Our code is available at https://github.com/rzzhang222/DAQ-SDP.", "keywords": "Self-supervised Learning;Adversarial Training", "primary_area": "machine_vision", "supplementary_material": "", "author": "Ruize Zhang;Sheng Tang;Juan Cao", "authorids": "~Ruize_Zhang2;~Sheng_Tang1;~Juan_Cao1", "gender": "M;M;F", "homepage": ";http://www.ict.cas.cn/sourcedb_2018_ict_cas/cn/jssrck/200909/t20090917_2496726.html;https://www.ict.ac.cn/sourcedb/cn/jssrck/201011/t20101123_3028158.html", "dblp": ";https://dblp.uni-trier.de/pid/62/1647;75/2820-1.html", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;fSBdNg0AAAAJ", "orcid": "0000-0001-5999-2866;;0000-0002-7857-1546", "linkedin": ";;", "or_profile": "~Ruize_Zhang2;~Sheng_Tang1;~Juan_Cao1", "aff": ", Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences", "aff_domain": "ict.ac.cn;ict.ac.cn;ict.ac.cn", "position": "PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nzhang2024selfsupervised,\ntitle={Self-Supervised Adversarial Training via Diverse Augmented Queries and Self-Supervised Double Perturbation},\nauthor={Ruize Zhang and Sheng Tang and Juan Cao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=CIHdlhfrOo}\n}", "github": "", "reviewers": "iqHN;6QGn;dErS;mcfF", "pdf_size": 750360, "rating": "5;5;5;7", "confidence": "4;3;5;3", "soundness": "3;2;2;3", "novelty": "3;2;2;3", "presentation": "3;1;1;3", "wc_summary": "53;39;137;96", "wc_strengths": "54;36;23;192", "wc_weaknesses": "184;127;128;72", "wc_questions": "8;49;26;21", "wc_limitations": "2;15;19;27", "wc_review": "301;266;333;408", "wc_reply_reviewers": "569;13;27;0", "wc_reply_authors": "1486;144;0;0", "reply_reviewers": "3;1;1;0", "reply_authors": "6;4;1;1", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.0, 1.0 ], "wc_summary_avg": [ 81.25, 38.43419701255641 ], "wc_strengths_avg": [ 76.25, 67.72877896433687 ], "wc_weaknesses_avg": [ 127.75, 39.60034722069997 ], "wc_questions_avg": [ 26.0, 14.815532390029054 ], "wc_limitations_avg": [ 15.75, 9.03811374126261 ], "wc_review_avg": [ 327.0, 52.426138518872435 ], "wc_reply_reviewers_avg": [ 152.25, 240.8000986295479 ], "wc_reply_authors_avg": [ 407.5, 625.4412442428145 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 3.0, 2.1213203435596424 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:fL7peIvyNpcJ:scholar.google.com/&scioq=Self-Supervised+Adversarial+Training+via+Diverse+Augmented+Queries+and+Self-Supervised+Double+Perturbation&hl=en&as_sdt=0,44", "gs_version_total": 0, "email": "ict.ac.cn;ict.ac.cn;ict.ac.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Chinese Academy of Sciences", "aff_unique_dep": "", "aff_unique_url": "http://www.cas.cn", "aff_unique_abbr": "CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Mitigating Object Hallucination via Concentric Causal Attention", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96152", "id": "CIRPE1bSmV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=CIRPE1bSmV", "openreview": "https://openreview.net/forum?id=CIRPE1bSmV", "poster": "/media/PosterPDFs/NeurIPS%202024/96152.png?t=1731663198.38972", "project": "", "author_site": "Yun Xing, Yiheng Li, Ivan Laptev, Shijian Lu", "tldr": "", "abstract": "Recent Large Vision Language Models (LVLMs) present remarkable zero-shot conversational and reasoning capabilities given multimodal queries. Nevertheless, they suffer from object hallucination, a phenomenon where LVLMs are prone to generate textual responses not factually aligned with image inputs. Our pilot study reveals that object hallucination is closely tied with Rotary Position Encoding (RoPE), a widely adopted positional dependency modeling design in existing LVLMs. Due to the long-term decay in RoPE, LVLMs tend to hallucinate more when relevant visual cues are distant from instruction tokens in the multimodal input sequence, Additionally, we observe a similar effect when reversing the sequential order of visual tokens during multimodal alignment. Our tests indicate that long-term decay in RoPE poses challenges to LVLMs while capturing visual-instruction interactions across long distances. We propose Concentric Causal Attention (CCA), a simple yet effective positional alignment strategy that mitigates the impact of RoPE long-term decay in LVLMs by naturally reducing relative distance between visual and instruction tokens. With CCA, visual tokens can better interact with instruction tokens, thereby enhancing model's perception capability and alleviating object hallucination. Without bells and whistles, our positional alignment method surpasses existing hallucination mitigation strategies by large margins on multiple object hallucination benchmarks.", "keywords": "Object Hallucination;Multimodal Learning;Visual Hallucination", "primary_area": "machine_vision", "supplementary_material": "", "author": "Yun Xing;Yiheng Li;Ivan Laptev;Shijian Lu", "authorids": "~Yun_Xing2;~Yiheng_Li3;~Ivan_Laptev1;~Shijian_Lu1", "gender": "M;;M;M", "homepage": "https://xing0047.github.io;;https://www.di.ens.fr/~laptev/;https://personal.ntu.edu.sg/shijian.lu/", "dblp": "09/9613-1;;41/1854;42/2718", "google_scholar": "uOAYTXoAAAAJ;;https://scholar.google.com.tw/citations?user=-9ifK0cAAAAJ;https://scholar.google.com.sg/scholar?hl=en", "orcid": "0000-0001-9839-0120;;;", "linkedin": "yun-xing-2bbb22239/;;;", "or_profile": "~Yun_Xing2;~Yiheng_Li3;~Ivan_Laptev1;~Shijian_Lu1", "aff": "Nanyang Technological University;;Mohamed bin Zayed University of Artificial Intelligence;Nanyang Technological University", "aff_domain": "ntu.edu.sg;;mbzuai.ac.ae;ntu.edu.sg", "position": "PhD student;;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nxing2024mitigating,\ntitle={Mitigating Object Hallucination via Concentric Causal Attention},\nauthor={Yun Xing and Yiheng Li and Ivan Laptev and Shijian Lu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=CIRPE1bSmV}\n}", "github": "", "reviewers": "5R3r;5Msc;1NX2;9xbc", "pdf_size": 1869660, "rating": "5;5;6;6", "confidence": "5;4;4;5", "soundness": "3;2;3;4", "novelty": "2;2;3;4", "presentation": "2;2;2;3", "wc_summary": "97;67;84;106", "wc_strengths": "70;17;32;27", "wc_weaknesses": "178;109;78;174", "wc_questions": "174;109;75;3", "wc_limitations": "22;15;5;1", "wc_review": "541;317;274;311", "wc_reply_reviewers": "115;118;62;22", "wc_reply_authors": "49;45;277;59", "reply_reviewers": "1;1;3;1", "reply_authors": "2;2;3;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 88.5, 14.67140075112121 ], "wc_strengths_avg": [ 36.5, 20.081085628023203 ], "wc_weaknesses_avg": [ 134.75, 42.70465431308396 ], "wc_questions_avg": [ 90.25, 61.66593468034033 ], "wc_limitations_avg": [ 10.75, 8.257572282456872 ], "wc_review_avg": [ 360.75, 105.36217300340763 ], "wc_reply_reviewers_avg": [ 79.25, 39.85834291588149 ], "wc_reply_authors_avg": [ 107.5, 97.99362224144998 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6392310638689806303&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "ntu.edu.sg;;mbzuai.ac.ae;ntu.edu.sg", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Nanyang Technological University;Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": ";", "aff_unique_url": "https://www.ntu.edu.sg;https://mbzuai.ac.ae", "aff_unique_abbr": "NTU;MBZUAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Singapore;United Arab Emirates" }, { "title": "Neuro-Symbolic Data Generation for Math Reasoning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96151", "id": "CIcMZGLyZW", "proceeding": "", "pdf": "https://openreview.net/pdf?id=CIcMZGLyZW", "openreview": "https://openreview.net/forum?id=CIcMZGLyZW", "poster": "", "project": "", "author_site": "Zenan Li, Zhi Zhou, Yuan Yao, Xian Zhang, Yu-Feng Li, Chun Cao, Fan Yang, Xiaoxing Ma", "tldr": "", "abstract": "A critical question about Large Language Models (LLMs) is whether their apparent deficiency in mathematical reasoning is inherent, or merely a result of insufficient exposure to high-quality mathematical data. To explore this, we developed an automated method for generating high-quality, supervised mathematical datasets. The method carefully mutates existing math problems, ensuring both diversity and validity of the newly generated problems. This is achieved by a neuro-symbolic data generation framework combining the intuitive informalization strengths of LLMs, and the precise symbolic reasoning of math solvers along with projected Markov chain Monte Carlo sampling in the highly-irregular symbolic space.\nEmpirical experiments demonstrate the high quality of data generated by the proposed method, and that the LLMs, specifically LLaMA-2 and Mistral, when realigned with the generated data, surpass their state-of-the-art counterparts.", "keywords": "Neuro-symbolic AI;Large language models;Mathemtical reasoning;Data generation", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/a8de59ffca4ffc13846bcbe3b6ea641e8db17f8d.zip", "author": "Zenan Li;Zhi Zhou;Yuan Yao;Xian Zhang;Yu-Feng Li;Chun Cao;Fan Yang;Xiaoxing Ma", "authorids": "~Zenan_Li3;~Zhi_Zhou2;~Yuan_Yao7;~Xian_Zhang4;~Yu-Feng_Li1;~Chun_Cao1;~Fan_Yang28;~Xiaoxing_Ma1", "gender": "M;M;M;M;;M;M;", "homepage": "https://lizn-zn.github.io/;http://www.lamda.nju.edu.cn/zhouz/;;https://www.microsoft.com/en-us/research/people/zhxian/;;https://ccao.cc;https://fanyangcs.github.io/;", "dblp": "242/2285;04/2090-7;25/4120-1;;;;29/3081-24.html;", "google_scholar": "eu4eqTcAAAAJ;VzvP5a8AAAAJ;;;;;https://scholar.google.com/citations?hl=en;", "orcid": ";;;;;;0000-0002-0378-060X;", "linkedin": ";;;;;;;", "or_profile": "~Zenan_Li3;~Zhi_Zhou2;~Yuan_Yao7;~Xian_Zhang4;~Yu-Feng_Li1;~Chun_Cao1;~Fan_Yang28;~Xiaoxing_Ma1", "aff": "Microsoft Research;Nanjing University;Nanjing University;Microsoft;;Nanjing University;Microsoft Research;", "aff_domain": "research.microsoft.com;nju.edu.cn;nju.edu.cn;microsoft.com;;nju.edu.cn;research.microsoft.com;", "position": "Intern;PhD student;Associate Professor;Researcher;;Full Professor;Senior Principal Researcher;", "bibtex": "@inproceedings{\nli2024neurosymbolic,\ntitle={Neuro-Symbolic Data Generation for Math Reasoning},\nauthor={Zenan Li and Zhi Zhou and Yuan Yao and Xian Zhang and Yu-Feng Li and Chun Cao and Fan Yang and Xiaoxing Ma},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=CIcMZGLyZW}\n}", "github": "", "reviewers": "bEBk;UMpg;upsU;BS8K", "pdf_size": 685709, "rating": "5;7;7;7", "confidence": "4;3;5;3", "soundness": "3;3;4;3", "novelty": "3;3;4;3", "presentation": "3;3;4;3", "wc_summary": "115;117;64;67", "wc_strengths": "32;96;74;70", "wc_weaknesses": "151;283;42;160", "wc_questions": "5;160;25;29", "wc_limitations": "1;11;1;1", "wc_review": "304;667;206;327", "wc_reply_reviewers": "14;164;25;17", "wc_reply_authors": "0;166;0;0", "reply_reviewers": "1;2;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 90.75, 25.282157740192982 ], "wc_strengths_avg": [ 68.0, 23.021728866442675 ], "wc_weaknesses_avg": [ 159.0, 85.33756499924286 ], "wc_questions_avg": [ 54.75, 61.44255447163635 ], "wc_limitations_avg": [ 3.5, 4.330127018922194 ], "wc_review_avg": [ 376.0, 174.04453453067694 ], "wc_reply_reviewers_avg": [ 55.0, 63.059495716347115 ], "wc_reply_authors_avg": [ 41.5, 71.88010851410841 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14075808859394958959&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "research.microsoft.com;nju.edu.cn;nju.edu.cn;microsoft.com;;nju.edu.cn;research.microsoft.com;", "author_num": 8, "aff_unique_index": "0;1;1;0;1;0", "aff_unique_norm": "Microsoft;Nanjing University", "aff_unique_dep": "Microsoft Research;", "aff_unique_url": "https://www.microsoft.com/en-us/research;https://www.nju.edu.cn", "aff_unique_abbr": "MSR;Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0;1;0", "aff_country_unique": "United States;China" }, { "title": "HYDRA: Model Factorization Framework for Black-Box LLM Personalization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96150", "id": "CKgNgKmHYp", "proceeding": "", "pdf": "https://openreview.net/pdf?id=CKgNgKmHYp", "openreview": "https://openreview.net/forum?id=CKgNgKmHYp", "poster": "/media/PosterPDFs/NeurIPS%202024/96150.png?t=1733295802.206791", "project": "", "author_site": "Yuchen Zhuang, Haotian Sun, Yue Yu, Rushi Qiang, Qifan Wang, Chao Zhang, Bo Dai", "tldr": "", "abstract": "Personalization has emerged as a critical research area in modern intelligent systems, focusing on mining users' behavioral history and adapting to their preferences for delivering tailored experiences. Despite the remarkable few-shot capabilities exhibited by black-box large language models (LLMs), the inherent opacity of their model parameters presents significant challenges in aligning the generated output with individual expectations. Existing solutions have primarily focused on prompt design to incorporate user-specific profiles and behaviors; however, such approaches often struggle to generalize effectively due to their inability to capture shared knowledge among all users. To address these challenges, we propose HYDRA, a model factorization framework that captures both user-specific behavior patterns from historical data and shared general knowledge among all users to deliver personalized generation. In order to capture user-specific behavior patterns, we first train a reranker to prioritize the most useful information from top-retrieved relevant historical records.\nBy combining the prioritized history with the corresponding query, we train an adapter to align the output with individual user-specific preferences, eliminating the reliance on access to inherent model parameters of black-box LLMs. Both the reranker and the adapter can be decomposed into a base model with multiple user-specific heads, resembling a hydra. The base model maintains shared knowledge across users, while the multiple personal heads capture user-specific preferences. Experimental results demonstrate that \\method outperforms existing state-of-the-art prompt-based methods by an average relative improvement of 9.01% across five diverse personalization tasks in the LaMP benchmark.", "keywords": "LLM;LLM Personalization;Black-Box LLMs", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/4d419ea4c5f18df346d7e75fa886447eda7dcfab.zip", "author": "Yuchen Zhuang;Haotian Sun;Yue Yu;Rushi Qiang;Qifan Wang;Chao Zhang;Bo Dai", "authorids": "~Yuchen_Zhuang1;~Haotian_Sun1;~Yue_Yu2;~Rushi_Qiang1;~Qifan_Wang2;~Chao_Zhang15;~Bo_Dai1", "gender": "M;M;M;M;M;;", "homepage": "https://night-chen.github.io/;https://haotiansun.tech/;https://yueyu1030.github.io;https://github.com/jerrycool2002;https://wqfcr.github.io/;http://chaozhang.org/;https://bo-dai.github.io/", "dblp": "191/5231.html;12/8162;;372/2703;33/8610;94/3019-14;64/2903", "google_scholar": "T-f6XlEAAAAJ;lcWkVCQAAAAJ;zQ3Jh6UAAAAJ;https://scholar.google.com/citations?view_op=list_works;LrSyLosAAAAJ;https://scholar.google.com/citations?hl=en;TIKl_foAAAAJ", "orcid": ";0000-0001-9013-7016;0000-0002-3683-5208;;0000-0002-7570-5756;0000-0003-3009-598X;0009-0002-8070-574X", "linkedin": ";haotian-sun-159597218/;;;;;", "or_profile": "~Yuchen_Zhuang1;~Haotian_Sun1;~Yue_Yu2;~Rushi_Qiang1;~Qifan_Wang2;~Chao_Zhang15;~Bo_Dai1", "aff": "Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology;Meta AI;Georgia Institute of Technology;Google Brain", "aff_domain": "gatech.edu;gatech.edu;gatech.edu;gatech.edu;fb.com;gatech.edu;google.com", "position": "PhD student;PhD student;PhD student;PhD student;Principal Researcher;Assistant Professor;Research Scientist", "bibtex": "@inproceedings{\nzhuang2024hydra,\ntitle={{HYDRA}: Model Factorization Framework for Black-Box {LLM} Personalization},\nauthor={Yuchen Zhuang and Haotian Sun and Yue Yu and Rushi Qiang and Qifan Wang and Chao Zhang and Bo Dai},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=CKgNgKmHYp}\n}", "github": "", "reviewers": "AVA9;KhW9;xKC3", "pdf_size": 1029323, "rating": "6;6;6", "confidence": "4;4;4", "soundness": "3;3;3", "novelty": "3;2;2", "presentation": "3;3;3", "wc_summary": "94;21;114", "wc_strengths": "71;51;121", "wc_weaknesses": "139;184;219", "wc_questions": "86;6;219", "wc_limitations": "90;8;1", "wc_review": "480;270;674", "wc_reply_reviewers": "16;40;0", "wc_reply_authors": "471;169;111", "reply_reviewers": "1;1;0", "reply_authors": "3;3;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 76.33333333333333, 39.96943276499625 ], "wc_strengths_avg": [ 81.0, 29.43920288775949 ], "wc_weaknesses_avg": [ 180.66666666666666, 32.74480450731417 ], "wc_questions_avg": [ 103.66666666666667, 87.84961898355368 ], "wc_limitations_avg": [ 33.0, 40.40627014049511 ], "wc_review_avg": [ 474.6666666666667, 164.97541904444094 ], "wc_reply_reviewers_avg": [ 18.666666666666668, 16.438437341250605 ], "wc_reply_authors_avg": [ 250.33333333333334, 157.8212772586623 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10107752627903704595&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "gatech.edu;gatech.edu;gatech.edu;gatech.edu;fb.com;gatech.edu;google.com", "author_num": 7, "aff_unique_index": "0;0;0;0;1;0;2", "aff_unique_norm": "Georgia Institute of Technology;Meta;Google", "aff_unique_dep": ";Meta AI;Google Brain", "aff_unique_url": "https://www.gatech.edu;https://meta.com;https://brain.google.com", "aff_unique_abbr": "Georgia Tech;Meta;Google Brain", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "The Surprising Effectiveness of SP Voting with Partial Preferences", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96149", "id": "CL9k2PaUQb", "proceeding": "", "pdf": "https://openreview.net/pdf?id=CL9k2PaUQb", "openreview": "https://openreview.net/forum?id=CL9k2PaUQb", "poster": "/media/PosterPDFs/NeurIPS%202024/96149.png?t=1731470493.7859378", "project": "", "author_site": "Hadi Hosseini, Debmalya Mandal, Amrit Puhan", "tldr": "", "abstract": "We consider the problem of recovering the ground truth ordering (ranking, top-$k$, or others) over a large number of alternatives. \nThe wisdom of crowd is a heuristic approach based on Condorcet's Jury theorem to address this problem through collective opinions.\nThis approach fails to recover the ground truth when the majority of the crowd is misinformed. The \\emph{surprisingly popular} (SP) algorithm~\\citep{prelec2017solution} is an alternative approach that is able to recover the ground truth even when experts are in minority. The SP algorithm requires the voters to predict other voters' report in the form of a full probability distribution over all rankings of alternatives. However, when the number of alternatives, $m$, is large, eliciting the prediction report or even the vote over $m$ alternatives might be too costly. \nIn this paper, we design a scalable alternative of the SP algorithm which only requires eliciting partial preferences from the voters, and propose new variants of the SP algorithm. In particular, we propose two versions---\\emph{Aggregated-SP} and \\emph{Partial-SP}---that ask voters to report vote and prediction on a subset of size $k$ ($\\ll m$) in terms of top alternative, partial rank, or an approval set. Through a large-scale crowdsourcing experiment on MTurk, we show that both of our approaches outperform conventional preference aggregation algorithms for the recovery of ground truth rankings, when measured in terms of Kendall-Tau distance and Spearman's $\\rho$. We further analyze the collected data and demonstrate that voters' behavior in the experiment, including the minority of the experts, and the SP phenomenon, can be correctly simulated by a concentric mixtures of Mallows model. Finally, we provide theoretical bounds on the sample complexity of SP algorithms with partial rankings to demonstrate the theoretical guarantees of the proposed methods.", "keywords": "Surprisingly Popular Algorithm;Preference Aggregation;Partial Rankings", "primary_area": "machine_learning_for_social_sciences", "supplementary_material": "/attachment/f6154d79ab9f514fdd1f8d0755cf8386eb195f9f.zip", "author": "Hadi Hosseini;Debmalya Mandal;Amrit Puhan", "authorids": "~Hadi_Hosseini4;~Debmalya_Mandal2;~Amrit_Puhan1", "gender": ";M;M", "homepage": ";https://debmandal.github.io;https://www.amritpuhan.com", "dblp": ";151/3685;325/6917", "google_scholar": ";OquWQpEAAAAJ;G1U8jiIAAAAJ", "orcid": ";;", "linkedin": ";;amritpuhan/", "or_profile": "~Hadi_Hosseini4;~Debmalya_Mandal2;~Amrit_Puhan1", "aff": ";University of Warwick;Pennsylvania State University", "aff_domain": ";warwick.ac.uk;psu.edu", "position": ";Assistant Professor;MS student", "bibtex": "@inproceedings{\nhosseini2024the,\ntitle={The Surprising Effectiveness of {SP} Voting with Partial Preferences},\nauthor={Hadi Hosseini and Debmalya Mandal and Amrit Puhan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=CL9k2PaUQb}\n}", "github": "", "reviewers": "227u;TQiC;GckY;1pvg", "pdf_size": 8035067, "rating": "5;6;7;7", "confidence": "3;2;3;4", "soundness": "2;3;4;4", "novelty": "3;2;2;4", "presentation": "1;3;3;4", "wc_summary": "72;130;78;122", "wc_strengths": "60;64;83;117", "wc_weaknesses": "277;12;265;32", "wc_questions": "196;68;2;1", "wc_limitations": "121;24;4;1", "wc_review": "726;298;432;273", "wc_reply_reviewers": "95;12;10;0", "wc_reply_authors": "69;0;0;0", "reply_reviewers": "3;1;1;0", "reply_authors": "2;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 100.5, 25.743931323712 ], "wc_strengths_avg": [ 81.0, 22.52776065213762 ], "wc_weaknesses_avg": [ 146.5, 124.7727935088415 ], "wc_questions_avg": [ 66.75, 79.40835913176899 ], "wc_limitations_avg": [ 37.5, 49.012753442343964 ], "wc_review_avg": [ 432.25, 180.05051374544868 ], "wc_reply_reviewers_avg": [ 29.25, 38.23202191880518 ], "wc_reply_authors_avg": [ 17.25, 29.877876430563134 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.42640143271122083, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17761469147199213445&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": ";warwick.ac.uk;psu.edu", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "University of Warwick;Pennsylvania State University", "aff_unique_dep": ";", "aff_unique_url": "https://www.warwick.ac.uk;https://www.psu.edu", "aff_unique_abbr": "Warwick;PSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "Soft Prompt Threats: Attacking Safety Alignment and Unlearning in Open-Source LLMs through the Embedding Space", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96148", "id": "CLxcLPfARc", "proceeding": "", "pdf": "https://openreview.net/pdf?id=CLxcLPfARc", "openreview": "https://openreview.net/forum?id=CLxcLPfARc", "poster": "/media/PosterPDFs/NeurIPS%202024/96148.png?t=1730815254.1572886", "project": "", "author_site": "Leo Schwinn, David Dobre, Sophie Xhonneux, Gauthier Gidel, Stephan G\u00fcnnemann", "tldr": "", "abstract": "Current research in adversarial robustness of LLMs focuses on \\textit{discrete} input manipulations in the natural language space, which can be directly transferred to \\textit{closed-source} models. However, this approach neglects the steady progression of \\textit{open-source} models. As open-source models advance in capability, ensuring their safety becomes increasingly imperative. Yet, attacks tailored to open-source LLMs that exploit full model access remain largely unexplored. We address this research gap and propose the \\textit{embedding space attack}, which directly attacks the \\textit{continuous} embedding representation of input tokens.\nWe find that embedding space attacks circumvent model alignments and trigger harmful behaviors more efficiently than discrete attacks or model fine-tuning. Additionally, we demonstrate that models compromised by embedding attacks can be used to create discrete jailbreaks in natural language. Lastly, we present a novel threat model in the context of unlearning and show that embedding space attacks can extract supposedly deleted information from unlearned LLMs across multiple datasets and models. Our findings highlight embedding space attacks as an important threat model in open-source LLMs.", "keywords": "Adversarial Attacks;Large Language Models", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/1484b1907cddbb05d4fd926413e65b448c3ef02b.zip", "author": "Leo Schwinn;David Dobre;Sophie Xhonneux;Gauthier Gidel;Stephan G\u00fcnnemann", "authorids": "~Leo_Schwinn1;~David_Dobre1;~Sophie_Xhonneux1;~Gauthier_Gidel1;~Stephan_G\u00fcnnemann1", "gender": "M;M;M;M;", "homepage": ";;https://gauthiergidel.github.io/;http://www.daml.in.tum.de;", "dblp": "259/2852;;188/6326;43/3011;255/5495", "google_scholar": "PXT4k9cAAAAJ;;https://scholar.google.fr/citations?user=bDrXQPUAAAAJ;;", "orcid": "0000-0003-3967-2202;;;;0000-0003-1990-4475", "linkedin": "leo-schwinn-34a61623b/;daviddobre/;;;", "or_profile": "~Leo_Schwinn1;~David_Dobre1;~Gauthier_Gidel1;~Stephan_G\u00fcnnemann1;~Louis-Pascal_A._C._Xhonneux1", "aff": "Technical University of Munich;Mila - Quebec Artificial Intelligence Institute;Mila - Quebec Artificial Intelligence Institute;Technical University Munich;Montreal Institute for Learning Algorithms, University of Montreal, University of Montreal", "aff_domain": "tum.de;mila.quebec;mila.quebec;tum.de;mila.umontreal.ca", "position": "Postdoc;PhD student;Assistant Professor;Professor;PhD student", "bibtex": "@inproceedings{\nschwinn2024soft,\ntitle={Soft Prompt Threats: Attacking Safety Alignment and Unlearning in Open-Source {LLM}s through the Embedding Space},\nauthor={Leo Schwinn and David Dobre and Sophie Xhonneux and Gauthier Gidel and Stephan G{\\\"u}nnemann},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=CLxcLPfARc}\n}", "github": "", "reviewers": "HnT1;A4UR;eHxU;oPEA", "pdf_size": 1516054, "rating": "4;4;5;7", "confidence": "4;3;4;5", "soundness": "3;2;3;3", "novelty": "2;2;2;3", "presentation": "2;2;3;3", "wc_summary": "26;102;70;107", "wc_strengths": "103;71;43;86", "wc_weaknesses": "766;202;176;187", "wc_questions": "123;15;5;41", "wc_limitations": "5;5;1;56", "wc_review": "1023;395;295;477", "wc_reply_reviewers": "0;0;0;345", "wc_reply_authors": "0;0;0;505", "reply_reviewers": "0;0;0;2", "reply_authors": "1;1;1;3", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 76.25, 32.29841327371981 ], "wc_strengths_avg": [ 75.75, 22.038318901404434 ], "wc_weaknesses_avg": [ 332.75, 250.30718627318714 ], "wc_questions_avg": [ 46.0, 46.357307945997036 ], "wc_limitations_avg": [ 16.75, 22.71976012197312 ], "wc_review_avg": [ 547.5, 281.99423752977646 ], "wc_reply_reviewers_avg": [ 86.25, 149.38938215281567 ], "wc_reply_authors_avg": [ 126.25, 218.67141445557075 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.8660254037844386, "gs_citation": 40, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11135672592736199524&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "tum.de;mila.quebec;mila.quebec;tum.de;mila.umontreal.ca", "author_num": 5, "aff_unique_index": "0;1;1;0;2", "aff_unique_norm": "Technical University of Munich;Quebec Artificial Intelligence Institute;University of Montreal", "aff_unique_dep": ";Artificial Intelligence;Montreal Institute for Learning Algorithms", "aff_unique_url": "https://www.tum.de;https://mila.quebec;https://www.umontreal.ca", "aff_unique_abbr": "TUM;Mila;UM", "aff_campus_unique_index": "1", "aff_campus_unique": ";Montreal", "aff_country_unique_index": "0;1;1;0;1", "aff_country_unique": "Germany;Canada" }, { "title": "Optimal Private and Communication Constraint Distributed Goodness-of-Fit Testing for Discrete Distributions in the Large Sample Regime", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96147", "id": "CMc0jMY0Wr", "proceeding": "", "pdf": "https://openreview.net/pdf?id=CMc0jMY0Wr", "openreview": "https://openreview.net/forum?id=CMc0jMY0Wr", "poster": "/media/PosterPDFs/NeurIPS%202024/96147.png?t=1733812884.8802776", "project": "", "tldr": "", "abstract": "We study distributed goodness-of-fit testing for discrete distribution under bandwidth and differential privacy constraints. Information constraint distributed goodness-of-fit testing is a problem that has received considerable attention recently. The important case of discrete distributions is theoretically well understood in the classical case where all data is available in one \"central\" location. In a federated setting, however, data is distributed across multiple \"locations\" (e.g. servers) and cannot readily be shared due to e.g. bandwidth or privacy constraints that each server needs to satisfy. We show how recently derived results for goodness-of-fit testing for the mean of a multivariate Gaussian model extend to the discrete distributions, by leveraging Le Cam's theory of statistical equivalence. In doing so, we derive matching minimax upper- and lower-bounds for the goodness-of-fit testing for discrete distributions under bandwidth or privacy constraints in the regime where number of samples held locally are large.", "keywords": "hypothesis testing;federated learning;distributed inference;goodness-of-fit;differential privacy;communication constraint", "primary_area": "learning_theory", "supplementary_material": "", "author": "Lasse Vuursteen", "authorids": "~Lasse_Vuursteen1", "gender": "M", "homepage": "https://lassev.github.io/", "dblp": "281/7082", "google_scholar": "JtvExucAAAAJ", "orcid": "0000-0002-3255-8549", "linkedin": "", "or_profile": "~Lasse_Vuursteen1", "aff": "The Wharton School, University of Pennsylvania", "aff_domain": "wharton.upenn.edu", "position": "Postdoc", "bibtex": "@inproceedings{\nvuursteen2024optimal,\ntitle={Optimal Private and Communication Constraint Distributed Goodness-of-Fit Testing for Discrete Distributions in the Large Sample Regime},\nauthor={Lasse Vuursteen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=CMc0jMY0Wr}\n}", "github": "", "reviewers": "Nun8;3Xr9;4qfs;aGfz", "pdf_size": 481993, "rating": "4;6;6;7", "confidence": "4;3;2;3", "soundness": "3;3;3;4", "novelty": "2;2;2;2", "presentation": "2;3;3;4", "wc_summary": "235;41;52;79", "wc_strengths": "34;50;63;42", "wc_weaknesses": "265;202;78;34", "wc_questions": "1;2;86;44", "wc_limitations": "1;6;6;17", "wc_review": "536;301;285;216", "wc_reply_reviewers": "200;13;0;10", "wc_reply_authors": "782;0;0;27", "reply_reviewers": "1;1;0;1", "reply_authors": "2;1;1;2", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 101.75, 78.16449001944554 ], "wc_strengths_avg": [ 47.25, 10.709224995301948 ], "wc_weaknesses_avg": [ 144.75, 92.81534086561338 ], "wc_questions_avg": [ 33.25, 35.0526389876711 ], "wc_limitations_avg": [ 7.5, 5.852349955359813 ], "wc_review_avg": [ 334.5, 120.6409963486708 ], "wc_reply_reviewers_avg": [ 55.75, 83.42174476717686 ], "wc_reply_authors_avg": [ 202.25, 334.9002650043741 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": -0.6488856845230502, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15086189307008145239&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "wharton.upenn.edu", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "University of Pennsylvania", "aff_unique_dep": "The Wharton School", "aff_unique_url": "https://www.wharton.upenn.edu", "aff_unique_abbr": "UPenn Wharton", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Accelerating Greedy Coordinate Gradient and General Prompt Optimization via Probe Sampling", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96146", "id": "CMgxAaRqZh", "proceeding": "", "pdf": "https://openreview.net/pdf?id=CMgxAaRqZh", "openreview": "https://openreview.net/forum?id=CMgxAaRqZh", "poster": "", "project": "", "author_site": "Yiran Zhao, Wenyue Zheng, Tianle Cai, Do Xuan Long, Kenji Kawaguchi, Anirudh Goyal, Michael Qizhe Shieh", "tldr": "", "abstract": "Safety of Large Language Models (LLMs) has become a central issue given their rapid progress and wide applications. Greedy Coordinate Gradient (GCG) is shown to be effective in constructing prompts containing adversarial suffixes to break the presumingly safe LLMs, but the optimization of GCG is time-consuming and limits its practicality. To reduce the time cost of GCG and enable more comprehensive studies of LLM safety, in this work, we study a new algorithm called $\\texttt{Probe sampling}$ to accelerate the GCG algorithm. At the core of the algorithm is a mechanism that dynamically determines how similar a smaller draft model's predictions are to the target model's predictions for prompt candidates. When the target model is similar to the draft model, we rely heavily on the draft model to filter out a large number of potential prompt candidates to reduce the computation time. Probe sampling achieves up to $5.6$ times speedup using Llama2-7b-chat and leads to equal or improved attack success rate (ASR) on the AdvBench. Furthermore, probe sampling is also able to accelerate other prompt optimization techniques and adversarial attack methods, leading to acceleration of $1.8\\times$ for AutoPrompt, $2.4\\times$ for APE and $2.4\\times$ for AutoDAN.", "keywords": "Large Language Model;Prompt Optimization;Alignment;Jailbreak;Acceleration", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/b7576ede62336e11eaad64df75c1fe3f7b58607a.zip", "author": "Yiran Zhao;Wenyue Zheng;Tianle Cai;Do Xuan Long;Kenji Kawaguchi;Anirudh Goyal;Michael Shieh", "authorids": "~Yiran_Zhao2;~Wenyue_Zheng1;~Tianle_Cai1;~Do_Xuan_Long1;~Kenji_Kawaguchi1;~Anirudh_Goyal1;~Michael_Shieh1", "gender": "M;F;M;M;;M;", "homepage": "https://zhaoyiran924.github.io/;https://github.com/luna5999;https://tianle.website;https://dxlong2000.github.io/;https://ml.comp.nus.edu.sg/#members;https://anirudh9119.github.io/;", "dblp": ";;241/9458;317/0657.html;;172/1039;", "google_scholar": "D_HwSlEAAAAJ;;CvwLRSMAAAAJ;uZyF8wwAAAAJ;aLl3rYoAAAAJ;krrh6OUAAAAJ;", "orcid": ";;;;;;", "linkedin": ";;;;;;", "or_profile": "~Yiran_Zhao2;~Wenyue_Zheng1;~Tianle_Cai1;~Do_Xuan_Long1;~Kenji_Kawaguchi1;~Anirudh_Goyal1;~Michael_Shieh1", "aff": "National University of Singapore;National University of Singapore;Princeton University;National University of Singapore;National University of Singapore;Google DeepMind;", "aff_domain": "u.nus.edu;nus.edu.sg;princeton.edu;nus.edu.sg;nus.edu;google.com;", "position": "PhD student;MS student;PhD student;PhD student;Presidential Young Professor;Researcher;", "bibtex": "@inproceedings{\nzhao2024accelerating,\ntitle={Accelerating Greedy Coordinate Gradient and General Prompt Optimization via Probe Sampling},\nauthor={Yiran Zhao and Wenyue Zheng and Tianle Cai and Do Xuan Long and Kenji Kawaguchi and Anirudh Goyal and Michael Shieh},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=CMgxAaRqZh}\n}", "github": "", "reviewers": "Jngk;bSGi;hJdQ", "pdf_size": 1826235, "rating": "4;7;8", "confidence": "4;4;4", "soundness": "3;3;3", "novelty": "2;3;2", "presentation": "3;3;3", "wc_summary": "36;109;91", "wc_strengths": "34;123;60", "wc_weaknesses": "218;67;90", "wc_questions": "49;1;91", "wc_limitations": "1;42;13", "wc_review": "338;342;345", "wc_reply_reviewers": "0;18;61", "wc_reply_authors": "110;0;0", "reply_reviewers": "0;1;1", "reply_authors": "2;1;1", "rating_avg": [ 6.333333333333333, 1.699673171197595 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 78.66666666666667, 31.051927834229907 ], "wc_strengths_avg": [ 72.33333333333333, 37.366057086910075 ], "wc_weaknesses_avg": [ 125.0, 66.42790578263526 ], "wc_questions_avg": [ 47.0, 36.76955262170047 ], "wc_limitations_avg": [ 18.666666666666668, 17.211107524567446 ], "wc_review_avg": [ 341.6666666666667, 2.8674417556808756 ], "wc_reply_reviewers_avg": [ 26.333333333333332, 25.590796956892316 ], "wc_reply_authors_avg": [ 36.666666666666664, 51.85449728701349 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18058040664246885687&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "u.nus.edu;nus.edu.sg;princeton.edu;nus.edu.sg;nus.edu;google.com;", "author_num": 7, "aff_unique_index": "0;0;1;0;0;2", "aff_unique_norm": "National University of Singapore;Princeton University;Google", "aff_unique_dep": ";;Google DeepMind", "aff_unique_url": "https://www.nus.edu.sg;https://www.princeton.edu;https://deepmind.com", "aff_unique_abbr": "NUS;Princeton;DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;2", "aff_country_unique": "Singapore;United States;United Kingdom" }, { "title": "DataComp-LM: In search of the next generation of training sets for language models", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97814", "id": "CNWdWn47IE", "proceeding": "", "pdf": "https://openreview.net/pdf?id=CNWdWn47IE", "openreview": "https://openreview.net/forum?id=CNWdWn47IE", "poster": "", "project": "", "author_site": "Amro Abbas, Alon Albalak, Kushal Arora, Hritik Bansal, Yonatan Bitton, Yair Carmon, Khyathi Chandu, Mayee Chen, Giannis Daras, Achal Dave, Alex Dimakis, Alaaeldin El-Nouby, Fartash Faghri, Alex Fang, Samir Yitzhak Gadre, Josh Gardner, Saurabh Garg, Dhruba Ghosh, Aaron Gokaslan, Dirk Groeneveld, Etash Guha, Suchin Gururangan, Reinhard Heckel, Cheng-Yu Hsieh, Gabriel Ilharco, Maor Ivgi, Jenia Jitsev, Matt Jordan, Sham Kakade, Sedrick Scott Keh, Maciej Kilian, Pang Wei Koh, Thomas Kollar, Jeffrey Li, Kyle Lo, Kalyani Marathe, Jean Mercat, Niklas Muennighoff, Marianna Nezhurina, Thao Nguyen, Sewoong Oh, Hadi Pouransari, Sarah Pratt, Sunny Sanyal, Ludwig Schmidt, Vaishaal Shankar, Rulin Shao, Georgios Smyrnis, Luca Soldaini, Shuran Song, Alexander Toshev, Igor Vasiljevic, Stephanie Wang, Mitchell Wortsman, Rui Xin, Luke Zettlemoyer, Hanlin Zhang, Jieyu Zhang", "tldr": "", "abstract": "We introduce DataComp for Language Models, a testbed for controlled dataset experiments with the goal of improving language models.\nAs part of DCLM, we provide a standardized corpus of 240T tokens extracted from Common Crawl, effective pretraining recipes based on the OpenLM framework, and a broad suite of 53 downstream evaluations.\nParticipants in the DCLM benchmark can experiment with data curation strategies such as deduplication, filtering, and data mixing at\nmodel scales ranging from 412M to 7B parameters.\nAs a baseline for DCLM, we conduct extensive experiments and find that model-based filtering is key to assembling a high-quality training set.\nThe resulting dataset, DCLM-Baseline, enables training a 7B parameter language model from scratch to 63% 5-shot accuracy on MMLU with 2T training tokens.\nCompared to MAP-Neo, the previous state-of-the-art in open-data language models, DCLM-Baseline represents a 6 percentage point improvement on MMLU while being trained with half the compute.\nOur results highlight the importance of dataset design for training language models and offer a starting point for further research on data curation. We release the \\dclm benchmark, framework, models, and datasets at https://www.datacomp.ai/dclm/", "keywords": "language models;data comp;datacomp;dataset design;data-centric AI;data-centric benchmarks;benchmarks;data curation", "primary_area": "", "supplementary_material": "", "author": "Jeffrey Li;Alex Fang;Georgios Smyrnis;Maor Ivgi;Matt Jordan;Samir Yitzhak Gadre;Hritik Bansal;Etash Kumar Guha;Sedrick Keh;Kushal Arora;Saurabh Garg;Rui Xin;Niklas Muennighoff;Reinhard Heckel;Jean Mercat;Mayee F Chen;Suchin Gururangan;Mitchell Wortsman;Alon Albalak;Yonatan Bitton;Marianna Nezhurina;Amro Kamal Mohamed Abbas;Cheng-Yu Hsieh;Dhruba Ghosh;Joshua P Gardner;Maciej Kilian;Hanlin Zhang;Rulin Shao;Sarah M Pratt;Sunny Sanyal;Gabriel Ilharco;Giannis Daras;Kalyani Marathe;Aaron Gokaslan;Jieyu Zhang;Khyathi Chandu;Thao Nguyen;Igor Vasiljevic;Sham M. Kakade;Shuran Song;Sujay Sanghavi;Fartash Faghri;Sewoong Oh;Luke Zettlemoyer;Kyle Lo;Alaaeldin El-Nouby;Hadi Pouransari;Alexander T Toshev;Stephanie Wang;Dirk Groeneveld;Luca Soldaini;Pang Wei Koh;Jenia Jitsev;Thomas Kollar;Alex Dimakis;Yair Carmon;Achal Dave;Ludwig Schmidt;Vaishaal Shankar", "authorids": "~Jeffrey_Li1;~Alex_Fang1;~Georgios_Smyrnis1;~Maor_Ivgi2;~Matt_Jordan1;~Samir_Yitzhak_Gadre1;~Hritik_Bansal2;~Etash_Kumar_Guha1;~Sedrick_Keh1;~Kushal_Arora1;~Saurabh_Garg3;~Rui_Xin1;~Niklas_Muennighoff1;~Reinhard_Heckel1;~Jean_Mercat1;~Mayee_F_Chen1;~Suchin_Gururangan1;~Mitchell_Wortsman1;~Alon_Albalak1;~Yonatan_Bitton1;~Marianna_Nezhurina1;~Amro_Kamal_Mohamed_Abbas1;~Cheng-Yu_Hsieh1;~Dhruba_Ghosh1;~Joshua_P_Gardner1;~Maciej_Kilian1;~Hanlin_Zhang1;~Rulin_Shao1;~Sarah_M_Pratt1;~Sunny_Sanyal1;~Gabriel_Ilharco1;~Giannis_Daras1;~Kalyani_Marathe1;~Aaron_Gokaslan1;~Jieyu_Zhang1;~Khyathi_Chandu1;~Thao_Nguyen3;~Igor_Vasiljevic1;~Sham_M._Kakade1;~Shuran_Song3;~Sujay_Sanghavi2;~Fartash_Faghri1;~Sewoong_Oh3;~Luke_Zettlemoyer1;~Kyle_Lo1;~Alaaeldin_El-Nouby1;~Hadi_Pouransari1;~Alexander_T_Toshev1;~Stephanie_Wang4;~Dirk_Groeneveld1;~Luca_Soldaini1;~Pang_Wei_Koh1;~Jenia_Jitsev1;~Thomas_Kollar1;~Alex_Dimakis1;~Yair_Carmon1;~Achal_Dave1;~Ludwig_Schmidt1;~Vaishaal_Shankar1", "gender": "M;;M;M;M;M;M;M;;M;M;;M;M;M;;M;M;;M;;;M;;;;M;;F;M;M;M;F;M;M;;F;M;M;F;;M;;M;;;M;;F;;Non-Binary;M;;M;M;M;M;M;", "homepage": ";;;https://mivg.github.io/;https://www.cs.utexas.edu/~mjordan/;https://sagadre.github.io/;https://sites.google.com/view/hbansal;https://etash.me/;;http://kushalarora.github.io;http://saurabhgarg1996.github.io/;;https://muennighoff.github.io/;;http://jean-mercat.netlify.app;;https://suchin.io;https://mitchellnw.github.io/;https://alon-albalak.github.io/;https://yonatanbitton.github.io/;;;https://chengyuhsieh.github.io/;https://djghosh13.github.io/;;https://github.com/iejMac;https://hanlin-zhang.com/;https://rulinshao.github.io/;;https://sites.google.com/view/sunnysanyal/home;http://gabrielilharco.com/;https://giannisdaras.github.io/;https://kalyani7195.github.io/;https://skylion007.github.io/;https://jieyuz2.github.io/;;https://thaonguyen19.github.io/;https://scholar.google.com/citations?user=Sl_2kHcAAAAJ&hl=en;https://shamulent.github.io;https://shurans.github.io/;;;;https://www.cs.washington.edu/people/faculty/lsz/;https://kyleclo.github.io/;;;;https://stephanie-wang.github.io;;https://soldaini.net;http://cs.stanford.edu/~pangwei;;http://tkollar.github.io;https://people.eecs.berkeley.edu/~alexdimakis/;https://www.cs.tau.ac.il/~ycarmon/;http://www.achaldave.com/;http://people.csail.mit.edu/ludwigs/;", "dblp": ";260/0449;255/9114;275/3578;236/5728;246/7901;239/5922;331/5590;;;80/208;;281/6745;81/9668;248/2886;;217/1570;232/2273;283/4427;277/7042;323/5785.html;;40/4421;292/8318;;;;;;198/3765;249/2616;254/2703;;220/6816;;;77/2922;;s/SMKakade;;;115/7922;;21/6793;220/2020;;162/5187;;;185/7781;160/1741;10/10453;53/5156;10/6653;19/5000.html;13/558;156/1161;141/2720;", "google_scholar": "JDS2BnIAAAAJ;;;https://scholar.google.com/citations?hl=en;Zj7R8p0AAAAJ;oAhlg9gAAAAJ;gAKTYtoAAAAJ;https://scholar.google.com/citations?hl=en;;;SAnJ1hIAAAAJ;;Me0IoRMAAAAJ;ZWV0I7cAAAAJ;https://scholar.google.com/citations?hl=fr;;CJIKhNIAAAAJ;fzRnjFgAAAAJ;F6J_7d8AAAAJ;P9Fpf4sAAAAJ;2KPv4VYAAAAJ;;WXX6ZwwAAAAJ;lHuZ55oAAAAJ;;;h5IXxToAAAAJ;Vdwh6bcAAAAJ;;https://scholar.google.co.uk/citations?user=xx9rrGMAAAAJ;https://scholar.google.com/citations?hl=en;LaScvbQAAAAJ;gCxlvdcAAAAJ;Mt2wyL4AAAAJ;T_INUHUAAAAJ;;DvJG-_8AAAAJ;;https://scholar.google.com.tw/citations?user=wb-DKCIAAAAJ;https://scholar.google.com/citations?hl=en;;https://scholar.google.ca/citations?user=KUG_tG0AAAAJ;;https://scholar.google.com.tw/citations?user=UjpbO6IAAAAJ;VJS12uMAAAAJ;;besz69AAAAAJ;;7w24ptsAAAAJ;KEhvGNMAAAAJ;3KPvwcgAAAAJ;Nn990CkAAAAJ;https://scholar.google.com/citations?hl=en;AEKT17QAAAAJ;JSFmVQEAAAAJ;kTKmpT0AAAAJ;oQyYH9kAAAAJ;SWMKy70AAAAJ;", "orcid": ";;;;;;;;;;;;;;0000-0002-4012-9082;;;;0000-0003-0809-1704;;0009-0000-9541-5150;;;0000-0002-8518-2696;;;0000-0002-9292-1645;;;0000-0002-2357-5152;;;;0000-0002-3575-2961;0000-0002-1846-2436;;;;;;;;;;;;;;;0000-0002-8274-768X;0000-0001-6998-9863;;0000-0002-1221-7851;0000-0003-2598-8118;;;;;", "linkedin": "jeffrey-li-a78684111/;alex-fang-8a11a8115/;;maor-ivgi-a3314111b;;;hritik-bansal/;etash-guha-00097116a/;;;saurabh-garg-b680b5b8/;;niklasmuennighoff/;;;;;;alonalbalak;yonatanbitton/;https://www.linkedin.com/mwlite/in/marianna-nezhurina-957848145;;;dhruba-ghosh-b82467170/;;;hanlin-zhang-931b46143/;;sarahpratt;sunny-sanyal/;;;;aarongokaslan/;jieyu-zhang-3baaa8154/;;;;;;;fartash-faghri;;luke-zettlemoyer-a0109b226/;kylelo/;;;;;mechanicaldirk/;soldni/;;;;alex-dimakis-b1b20320/;;;ludwig-schmidt-87ba3612/;", "or_profile": "~Jeffrey_Li1;~Alex_Fang1;~Georgios_Smyrnis1;~Maor_Ivgi2;~Matt_Jordan1;~Samir_Yitzhak_Gadre1;~Hritik_Bansal2;~Etash_Kumar_Guha1;~Sedrick_Keh1;~Kushal_Arora1;~Saurabh_Garg3;~Rui_Xin1;~Niklas_Muennighoff1;~Reinhard_Heckel1;~Jean_Mercat1;~Mayee_F_Chen1;~Suchin_Gururangan1;~Mitchell_Wortsman1;~Alon_Albalak1;~Yonatan_Bitton1;~Marianna_Nezhurina1;~Amro_Kamal_Mohamed_Abbas1;~Cheng-Yu_Hsieh1;~Dhruba_Ghosh1;~Joshua_P_Gardner1;~Maciej_Kilian1;~Hanlin_Zhang1;~Rulin_Shao1;~Sarah_M_Pratt1;~Sunny_Sanyal1;~Gabriel_Ilharco1;~Giannis_Daras1;~Kalyani_Marathe1;~Aaron_Gokaslan1;~Jieyu_Zhang1;~Khyathi_Chandu1;~Thao_Nguyen3;~Igor_Vasiljevic1;~Sham_M._Kakade1;~Shuran_Song3;~Sujay_Sanghavi2;~Fartash_Faghri1;~Sewoong_Oh3;~Luke_Zettlemoyer1;~Kyle_Lo1;~Alaaeldin_El-Nouby1;~Hadi_Pouransari1;~Alexander_T_Toshev1;~Stephanie_Wang4;~Dirk_Groeneveld1;~Luca_Soldaini1;~Pang_Wei_Koh1;~Jenia_Jitsev1;~Thomas_Kollar1;~Alex_Dimakis1;~Yair_Carmon1;~Achal_Dave1;~Ludwig_Schmidt1;~Vaishaal_Shankar1", "aff": "Department of Computer Science, University of Washington;Department of Computer Science, University of Washington;Toyota Research Institute;Tel Aviv University;University of Texas, Austin;Columbia University;University of California, Los Angeles;;;McGill University;Carnegie Mellon University;;Allen Institute for Artificial Intelligence;Rice University;Toyota Research Institute;;University of Washington, Seattle;University of Washington, Seattle;University of California, Santa Barbara;Google;Forschungszentrum Juelich GmbH;;Google;University of Washington;;University of Southern California;Harvard University;University of Washington;University of Washington;University of Texas at Austin;Department of Computer Science, University of Washington;University of Texas, Austin;University of Washington, Seattle;Cornell University;University of Washington;;Meta;Toyota Research Institute;Harvard University;Stanford University;;Apple;;Meta;Allen Institute for Artificial Intelligence;;Apple;;Department of Computer Science, University of Washington;Allen Institute for Artificial Intelligence;Allen Institute for Artificial Intelligence;University of Washington;Juelich Supercomputing Center, Research Center Juelich;Toyota Research Institute;University of Texas at Austin;Tel Aviv University;Toyota Research Institute;University of Washington;", "aff_domain": "cs.washington.edu;cs.washington.edu;tri.global;tau.ac.il;utexas.edu;columbia.edu;ucla.edu;;;mcgill.ca;cmu.edu;;allenai.org;rice.edu;tri.global;;uw.edu;uw.edu;ucsb.edu;google.com;fz-juelich.de;;google.com;uw.edu;;usc.edu;harvard.edu;uw.edu;uw.edu;utexas.edu;cs.washington.edu;utexas.edu;uw.edu;cornell.edu;cs.washington.edu;;meta.com;tri.global;harvard.edu;stanford.edu;;apple.com;;meta.com;allenai.org;;apple.com;;cs.washington.edu;allenai.org;allenai.org;cs.washington.edu;fz-juelich.de;tri.global;utexas.edu;tau.ac.il;tri.global;washington.edu;", "position": "PhD student;PhD student;Intern;PhD student;PhD student;PhD student;PhD student;;;PhD student;PhD student;;Researcher;Assistant Professor;Researcher;;PhD student;PhD student;PhD student;Research Scientist;Researcher;;Intern;PhD student;;Undergrad student;PhD student;PhD student;PhD student;PhD student;PhD student;PhD student;PhD student;PhD student;PhD student;;Visiting Researcher;Research Scientist;Full Professor;Assistant Professor;;Researcher;;Researcher;Researcher;;Principal Researcher;;Assistant Professor;Principal Researcher;Researcher;Assistant Professor;Senior Scientist;Principal Researcher;Full Professor;Assistant Professor;Researcher;Assistant Professor;", "bibtex": "@inproceedings{\nli2024datacomplm,\ntitle={DataComp-{LM}: In search of the next generation of training sets for language models},\nauthor={Jeffrey Li and Alex Fang and Georgios Smyrnis and Maor Ivgi and Matt Jordan and Samir Yitzhak Gadre and Hritik Bansal and Etash Kumar Guha and Sedrick Keh and Kushal Arora and Saurabh Garg and Rui Xin and Niklas Muennighoff and Reinhard Heckel and Jean Mercat and Mayee F Chen and Suchin Gururangan and Mitchell Wortsman and Alon Albalak and Yonatan Bitton and Marianna Nezhurina and Amro Kamal Mohamed Abbas and Cheng-Yu Hsieh and Dhruba Ghosh and Joshua P Gardner and Maciej Kilian and Hanlin Zhang and Rulin Shao and Sarah M Pratt and Sunny Sanyal and Gabriel Ilharco and Giannis Daras and Kalyani Marathe and Aaron Gokaslan and Jieyu Zhang and Khyathi Chandu and Thao Nguyen and Igor Vasiljevic and Sham M. Kakade and Shuran Song and Sujay Sanghavi and Fartash Faghri and Sewoong Oh and Luke Zettlemoyer and Kyle Lo and Alaaeldin El-Nouby and Hadi Pouransari and Alexander T Toshev and Stephanie Wang and Dirk Groeneveld and Luca Soldaini and Pang Wei Koh and Jenia Jitsev and Thomas Kollar and Alex Dimakis and Yair Carmon and Achal Dave and Ludwig Schmidt and Vaishaal Shankar},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=CNWdWn47IE}\n}", "github": "", "reviewers": "vx9q;5qZv;gmJb;JUUa", "pdf_size": 1272517, "rating": "7;7;7;8", "confidence": "4;4;5;4", "wc_summary_and_contributions": "58;100;86;83", "wc_strengths": "38;66;4;36", "wc_improvement": "50;152;4;25", "wc_limitations": "114;12;4;2", "wc_correctness": "37;6;18;17", "wc_clarity": "9;1;1;5", "wc_relation_to_prior_work": "8;1;7;9", "wc_documentation": "14;33;1;99", "wc_additional_feedback": "1;1;1;1", "wc_review": "329;372;126;277", "wc_reply_reviewers": "13;10;0;39", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "2;1;1;1", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 81.75, 15.138939857202683 ], "wc_strengths_avg": [ 36.0, 21.95449840010015 ], "wc_improvement_avg": [ 57.75, 56.79953785023255 ], "wc_limitations_avg": [ 33.0, 46.9148164229596 ], "wc_correctness_avg": [ 19.5, 11.146748404803978 ], "wc_clarity_avg": [ 4.0, 3.3166247903554 ], "wc_relation_to_prior_work_avg": [ 6.25, 3.112474899497183 ], "wc_documentation_avg": [ 36.75, 37.69864055904404 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 276.0, 92.90586633792293 ], "wc_reply_reviewers_avg": [ 15.5, 14.396180048887969 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 59, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 64, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14691729232576505865&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 5, "email": "cs.washington.edu;cs.washington.edu;tri.global;tau.ac.il;utexas.edu;columbia.edu;ucla.edu;;;mcgill.ca;cmu.edu;;allenai.org;rice.edu;tri.global;;uw.edu;uw.edu;ucsb.edu;google.com;fz-juelich.de;;google.com;uw.edu;;usc.edu;harvard.edu;uw.edu;uw.edu;utexas.edu;cs.washington.edu;utexas.edu;uw.edu;cornell.edu;cs.washington.edu;;meta.com;tri.global;harvard.edu;stanford.edu;;apple.com;;meta.com;allenai.org;;apple.com;;cs.washington.edu;allenai.org;allenai.org;cs.washington.edu;fz-juelich.de;tri.global;utexas.edu;tau.ac.il;tri.global;washington.edu;", "author_num": 59, "aff_unique_index": "0;0;1;2;3;4;5;6;7;8;9;1;0;0;10;11;12;11;0;13;14;0;0;3;0;3;0;15;0;16;1;14;17;18;16;8;18;0;8;8;0;19;1;3;2;1;0", "aff_unique_norm": "University of Washington;Toyota Research Institute;Tel Aviv University;University of Texas at Austin;Columbia University;University of California, Los Angeles;McGill University;Carnegie Mellon University;Allen Institute for Artificial Intelligence;Rice University;University of California, Santa Barbara;Google;Forschungszentrum Juelich;University of Southern California;Harvard University;Cornell University;Meta;Stanford University;Apple;Research Center Juelich", "aff_unique_dep": "Department of Computer Science;;;;;;;;;;;Google;;;;;Meta Platforms, Inc.;;Apple Inc.;Juelich Supercomputing Center", "aff_unique_url": "https://www.washington.edu;https://www.tri.global;https://www.tau.ac.il;https://www.utexas.edu;https://www.columbia.edu;https://www.ucla.edu;https://www.mcgill.ca;https://www.cmu.edu;https://allenai.org;https://www.rice.edu;https://www.ucsb.edu;https://www.google.com;https://www.fz-juelich.de;https://www.usc.edu;https://www.harvard.edu;https://www.cornell.edu;https://meta.com;https://www.stanford.edu;https://www.apple.com;https://www.fz-juelich.de/", "aff_unique_abbr": "UW;TRI;TAU;UT Austin;Columbia;UCLA;McGill;CMU;AI2;Rice;UCSB;Google;FZJ;USC;Harvard;Cornell;Meta;Stanford;Apple;FZ J\u00fclich", "aff_campus_unique_index": "0;0;2;3;0;0;4;5;5;3;2;0;2;0;6;0;2", "aff_campus_unique": "Seattle;;Austin;Los Angeles;Santa Barbara;Mountain View;Stanford", "aff_country_unique_index": "0;0;0;1;0;0;0;2;0;0;0;0;0;0;0;0;3;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;3;0;0;1;0;0", "aff_country_unique": "United States;Israel;Canada;Germany" }, { "title": "Bisimulation Metrics are Optimal Transport Distances, and Can be Computed Efficiently", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96145", "id": "CSjVSnvTbG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=CSjVSnvTbG", "openreview": "https://openreview.net/forum?id=CSjVSnvTbG", "poster": "/media/PosterPDFs/NeurIPS%202024/96145.png?t=1731667274.276917", "project": "", "author_site": "Sergio Calo, Anders Jonsson, Gergely Neu, Ludovic Schwartz, Javier Segovia-Aguas", "tldr": "", "abstract": "We propose a new framework for formulating optimal transport distances between Markov chains. Previously known formulations studied couplings between the entire joint distribution induced by the chains, and derived solutions via a reduction to dynamic programming (DP) in an appropriately defined Markov decision process. This formulation has, however, not led to particularly efficient algorithms so far, since computing the associated DP operators requires fully solving a static optimal transport problem, and these operators need to be applied numerous times during the overall optimization process. In this work, we develop an alternative perspective by considering couplings between a ``flattened'' version of the joint distributions that we call discounted occupancy couplings, and show that calculating optimal transport distances in the full space of joint distributions can be equivalently formulated as solving a linear program (LP) in this reduced space. This LP formulation formulation allows us to port several algorithmic ideas from other areas of optimal transport theory. In particular, our formulation makes it possible to introduce an appropriate notion of entropy regularization into the optimization problem, which in turn enables us to directly calculate optimal transport distances via a Sinkhorn-like method we call Sinkhorn Value Iteration (SVI). We show both theoretically and empirically that this method converges quickly to an optimal coupling, essentially at the same computational cost of running vanilla Sinkhorn in each pair of states. Along the way, we point out that our optimal transport distance exactly matches the common notion of bisimulation metrics between Markov chains, and thus our results also apply to computing such metrics, and in fact our algorithm turns out to be significantly more efficient than the best known methods developed so far for this purpose.", "keywords": "Optimal transport;Markov chains;bisimulation metrics;Markov decision processes", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Sergio Calo;Anders Jonsson;Gergely Neu;Ludovic Schwartz;Javier Segovia-Aguas", "authorids": "~Sergio_Calo1;~Anders_Jonsson1;~Gergely_Neu1;~Ludovic_Schwartz1;~Javier_Segovia-Aguas1", "gender": "M;M;M;M;", "homepage": ";https://www.upf.edu/web/anders-jonsson;http://cs.bme.hu/~gergo;;", "dblp": ";05/3488;83/7606;;", "google_scholar": "JYO5MNAAAAAJ;https://scholar.google.es/citations?user=SI_uHCIAAAAJ;https://scholar.google.ch/citations?user=uz27G84AAAAJ;;", "orcid": ";;;;", "linkedin": ";;;ludovic-schwartz-2128ba154/;", "or_profile": "~Sergio_Calo1;~Anders_Jonsson1;~Gergely_Neu1;~Ludovic_Schwartz1;~Javier_Segovia-Aguas1", "aff": "Universitat Pompeu Fabra;Universitat Pompeu Fabra;Universitat Pompeu Fabra;Universitat Pompeu Fabra;", "aff_domain": "upf.edu;upf.edu;upf.edu;upf.edu;", "position": "PhD student;Full Professor;Assistant Professor;PhD student;", "bibtex": "@inproceedings{\noliveira2024bisimulation,\ntitle={Bisimulation Metrics are Optimal Transport Distances, and Can be Computed Efficiently},\nauthor={Sergio Calo and Anders Jonsson and Gergely Neu and Ludovic Schwartz and Javier Segovia-Aguas},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=CSjVSnvTbG}\n}", "github": "", "reviewers": "NKJ1;aiTP;VaVz", "pdf_size": 1256013, "rating": "6;6;7", "confidence": "2;2;3", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "85;120;247", "wc_strengths": "27;92;56", "wc_weaknesses": "42;36;145", "wc_questions": "306;57;33", "wc_limitations": "1;1;11", "wc_review": "461;306;492", "wc_reply_reviewers": "65;14;12", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 2.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 150.66666666666666, 69.60044699728746 ], "wc_strengths_avg": [ 58.333333333333336, 26.587382136812355 ], "wc_weaknesses_avg": [ 74.33333333333333, 50.02888054802834 ], "wc_questions_avg": [ 132.0, 123.42609124492276 ], "wc_limitations_avg": [ 4.333333333333333, 4.714045207910317 ], "wc_review_avg": [ 419.6666666666667, 81.36474803145538 ], "wc_reply_reviewers_avg": [ 30.333333333333332, 24.526629518862872 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.9999999999999997, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:aSXG580NszoJ:scholar.google.com/&scioq=Bisimulation+Metrics+are+Optimal+Transport+Distances,+and+Can+be+Computed+Efficiently&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "upf.edu;upf.edu;upf.edu;upf.edu;", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Universitat Pompeu Fabra", "aff_unique_dep": "", "aff_unique_url": "https://www.upf.edu/", "aff_unique_abbr": "UPF", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Spain" }, { "title": "Bidirectional Recurrence for Cardiac Motion Tracking with Gaussian Process Latent Coding", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96144", "id": "CTIFk7b9jU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=CTIFk7b9jU", "openreview": "https://openreview.net/forum?id=CTIFk7b9jU", "poster": "/media/PosterPDFs/NeurIPS%202024/96144.png?t=1731328693.7891517", "project": "", "author_site": "Jiewen Yang, Yiqun Lin, Bin Pu, Xiaomeng Li", "tldr": "", "abstract": "Quantitative analysis of cardiac motion is crucial for assessing cardiac function. This analysis typically uses imaging modalities such as MRI and Echocardiograms that capture detailed image sequences throughout the heartbeat cycle. Previous methods predominantly focused on the analysis of image pairs lacking consideration of the motion dynamics and spatial variability. Consequently, these methods often overlook the long-term relationships and regional motion characteristic of cardiac. To overcome these limitations, we introduce the GPTrack, a novel unsupervised framework crafted to fully explore the temporal and spatial dynamics of cardiac motion. The GPTrack enhances motion tracking by employing the sequential Gaussian Process in the latent space and encoding statistics by spatial information at each time stamp, which robustly promotes temporal consistency and spatial variability of cardiac dynamics. Also, we innovatively aggregate sequential information in a bidirectional recursive manner, mimicking the behavior of diffeomorphic registration to better capture consistent long-term relationships of motions across cardiac regions such as the ventricles and atria. Our GPTrack significantly improves the precision of motion tracking in both 3D and 4D medical images while maintaining computational efficiency. The code is available at: https://github.com/xmed-lab/GPTrack.", "keywords": "Medical Image Analysis;Cardiac Motion Tracking", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "", "author": "Jiewen Yang;Yiqun Lin;Bin Pu;Xiaomeng Li", "authorids": "~Jiewen_Yang1;~Yiqun_Lin1;~Bin_Pu2;~Xiaomeng_Li1", "gender": "M;M;M;F", "homepage": "https://gitlab.com/Jiewen_Yang;;https://scholar.google.com/citations?user=JzXzqGgAAAAJ&hl=zh-CN;https://xmengli.github.io/", "dblp": "302/4089.html;26/2888;;02/9850-1", "google_scholar": "Y0MYdh8AAAAJ;dnG10ZwAAAAJ;JzXzqGgAAAAJ;uVTzPpoAAAAJ", "orcid": ";0000-0002-7697-0842;0009-0007-8771-6501;", "linkedin": ";;;", "or_profile": "~Jiewen_Yang1;~Yiqun_Lin1;~Bin_Pu2;~Xiaomeng_Li1", "aff": "Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;Hong Kong University of Science and Technology", "aff_domain": "ust.hk;ust.hk;hkust.edu;ust.hk", "position": "PhD student;PhD student;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nyang2024bidirectional,\ntitle={Bidirectional Recurrence for Cardiac Motion Tracking with Gaussian Process Latent Coding},\nauthor={Jiewen Yang and Yiqun Lin and Bin Pu and Xiaomeng Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=CTIFk7b9jU}\n}", "github": "", "reviewers": "Tej6;bQtH;ssxr", "pdf_size": 2190351, "rating": "4;6;7", "confidence": "4;4;5", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "2;3;3", "wc_summary": "66;104;51", "wc_strengths": "26;84;35", "wc_weaknesses": "69;346;90", "wc_questions": "131;6;43", "wc_limitations": "15;6;6", "wc_review": "307;546;225", "wc_reply_reviewers": "0;5;24", "wc_reply_authors": "519;125;225", "reply_reviewers": "0;1;1", "reply_authors": "6;3;4", "rating_avg": [ 5.666666666666667, 1.247219128924647 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 73.66666666666667, 22.305953365762143 ], "wc_strengths_avg": [ 48.333333333333336, 25.48637980482037 ], "wc_weaknesses_avg": [ 168.33333333333334, 125.92149229667754 ], "wc_questions_avg": [ 60.0, 52.42772803266099 ], "wc_limitations_avg": [ 9.0, 4.242640687119285 ], "wc_review_avg": [ 359.3333333333333, 136.172276506229 ], "wc_reply_reviewers_avg": [ 9.666666666666666, 10.338708279513881 ], "wc_reply_authors_avg": [ 289.6666666666667, 167.22307124184616 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 4.333333333333333, 1.247219128924647 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.7559289460184544, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:BWhd00YVIkIJ:scholar.google.com/&scioq=Bidirectional+Recurrence+for+Cardiac+Motion+Tracking+with+Gaussian+Process+Latent+Coding&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "ust.hk;ust.hk;hkust.edu;ust.hk", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Hong Kong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.ust.hk", "aff_unique_abbr": "HKUST", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "SceneCraft: Layout-Guided 3D Scene Generation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96143", "id": "CTvxvAcSJN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=CTvxvAcSJN", "openreview": "https://openreview.net/forum?id=CTvxvAcSJN", "poster": "/media/PosterPDFs/NeurIPS%202024/96143.png?t=1731208586.6514366", "project": "", "author_site": "Xiuyu Yang, Yunze Man, Junkun Chen, Yu-Xiong Wang", "tldr": "", "abstract": "The creation of complex 3D scenes tailored to user specifications has been a tedious and challenging task with traditional 3D modeling tools. Although some pioneering methods have achieved automatic text-to-3D generation, they are generally limited to small-scale scenes with restricted control over the shape and texture. We introduce SceneCraft, a novel method for generating detailed indoor scenes that adhere to textual descriptions and spatial layout preferences provided by users. Central to our method is a rendering-based technique, which converts 3D semantic layouts into multi-view 2D proxy maps. Furthermore, we design a semantic and depth conditioned diffusion model to generate multi-view images, which are used to learn a neural radiance field (NeRF) as the final scene representation. Without the constraints of panorama image generation, we surpass previous methods in supporting complicated indoor space generation beyond a single room, even as complicated as a whole multi-bedroom apartment with irregular shapes and layouts. Through experimental analysis, we demonstrate that our method significantly outperforms existing approaches in complex indoor scene generation with diverse textures, consistent geometry, and realistic visual quality.", "keywords": "3D Scene Generation; 3D Content Generation", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Xiuyu Yang;Yunze Man;Jun-Kun Chen;Yu-Xiong Wang", "authorids": "~Xiuyu_Yang1;~Yunze_Man2;~Jun-Kun_Chen1;~Yu-Xiong_Wang1", "gender": "M;M;;M", "homepage": "https://chatgpt.com;https://yunzeman.github.io/;https://yxw.cs.illinois.edu/;https://scholar.google.com/citations?user=_m5__wUAAAAJ", "dblp": ";230/4287.html;35/10700;333/0859", "google_scholar": ";xvQIEKAAAAAJ;T_Q-xDkAAAAJ;_m5__wUAAAAJ", "orcid": ";;;0000-0002-3431-0870", "linkedin": ";;;junkun-chen-a24520167/", "or_profile": "~Xiuyu_Yang1;~Yunze_Man2;~Yu-Xiong_Wang1;~Junkun_Chen2", "aff": "Shanghai Jiaotong University;Department of Computer Science, University of Illinois at Urbana-Champaign;Department of Computer Science, University of Illinois Urbana-Champaign;SpreeAI", "aff_domain": "sjtu.edu.cn;cs.illinois.edu;cs.illinois.edu;spreeai.com", "position": "Undergrad student;PhD student;Assistant Professor;Intern", "bibtex": "@inproceedings{\nyang2024scenecraft,\ntitle={SceneCraft: Layout-Guided 3D Scene Generation},\nauthor={Xiuyu Yang and Yunze Man and Jun-Kun Chen and Yu-Xiong Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=CTvxvAcSJN}\n}", "github": "", "reviewers": "DHDm;Gzhh;Nerf;DRMD", "pdf_size": 24330289, "rating": "5;6;6;6", "confidence": "4;3;3;4", "soundness": "3;3;2;3", "novelty": "2;2;3;2", "presentation": "3;2;3;3", "wc_summary": "75;86;176;109", "wc_strengths": "53;83;69;36", "wc_weaknesses": "265;210;120;98", "wc_questions": "68;69;118;3", "wc_limitations": "4;30;30;2", "wc_review": "465;478;513;248", "wc_reply_reviewers": "40;144;53;24", "wc_reply_authors": "435;172;43;50", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 111.5, 39.207779840230685 ], "wc_strengths_avg": [ 60.25, 17.5695048308141 ], "wc_weaknesses_avg": [ 173.25, 67.57727058708423 ], "wc_questions_avg": [ 64.5, 40.85645603818325 ], "wc_limitations_avg": [ 16.5, 13.518505834595775 ], "wc_review_avg": [ 426.0, 104.25689425644714 ], "wc_reply_reviewers_avg": [ 65.25, 46.61209606958263 ], "wc_reply_authors_avg": [ 175.0, 158.63322476707077 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16373611283956337583&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "sjtu.edu.cn;cs.illinois.edu;cs.illinois.edu;spreeai.com", "author_num": 4, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Shanghai Jiao Tong University;University of Illinois Urbana-Champaign;SpreeAI", "aff_unique_dep": ";Department of Computer Science;", "aff_unique_url": "https://www.sjtu.edu.cn;https://illinois.edu;", "aff_unique_abbr": "SJTU;UIUC;", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "0;1;1", "aff_country_unique": "China;United States;" }, { "title": "Uncertainty of Thoughts: Uncertainty-Aware Planning Enhances Information Seeking in LLMs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96142", "id": "CVpuVe1N22", "proceeding": "", "pdf": "https://openreview.net/pdf?id=CVpuVe1N22", "openreview": "https://openreview.net/forum?id=CVpuVe1N22", "poster": "", "project": "", "author_site": "Zhiyuan Hu, Chumin Liu, Xidong Feng, Yilun Zhao, See-Kiong Ng, Anh Tuan Luu, Junxian He, Pang Wei Koh, Bryan Hooi", "tldr": "", "abstract": "In the face of uncertainty, the ability to *seek information* is of fundamental importance. In many practical applications, such as medical diagnosis and troubleshooting, the information needed to solve the task is not initially given, and has to be actively sought by asking follow-up questions (for example, a doctor asking a patient for more details about their symptoms). In this work, we introduce **Uncertainty of Thoughts (UoT)**, an algorithm to augment large language models with the ability to actively seek information by asking effective questions. UoT combines:\n\n1. An *uncertainty-aware simulation approach* which enables the model to simulate possible future scenarios and how likely they are to occur,\n2. *Uncertainty-based rewards* motivated by information gain which incentivizes the model to seek information, and\n3. A *reward propagation scheme* to select the optimal question to ask in a way that maximizes the expected reward.\n\nIn experiments on medical diagnosis, troubleshooting and the `20 Questions' game, UoT achieves an average performance improvement of 38.1% in the rate of successful task completion across multiple LLMs compared with direct prompting, and also improves efficiency (i.e., the number of questions needed to complete the task).", "keywords": "Large Language Model;Planning;Uncertainty;Reseasoning;Information Seeking", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/02e49e34f90295c60d56e6c183741bec35e20832.zip", "author": "Zhiyuan Hu;Chumin Liu;Xidong Feng;Yilun Zhao;See-Kiong Ng;Anh Tuan Luu;Junxian He;Pang Wei Koh;Bryan Hooi", "authorids": "~Zhiyuan_Hu4;~Chumin_Liu2;~Xidong_Feng1;~Yilun_Zhao1;~See-Kiong_Ng1;~Anh_Tuan_Luu2;~Junxian_He1;~Pang_Wei_Koh1;~Bryan_Hooi1", "gender": "M;;;M;M;M;M;;F", "homepage": "https://zhiyuanhubj.github.io/zhiyuan.github.io/;https://waterhorse1.github.io/;https://yilunzhao.github.io/;https://www.comp.nus.edu.sg/~ngsk/;https://tuanluu.github.io/;https://jxhe.github.io;http://cs.stanford.edu/~pangwei;http://bhooi.github.io;https://ntu-nail.github.io/author/liu-chumin/", "dblp": ";;271/8391;00/5480;81/8329.html;188/6127.html;10/10453;169/9975;", "google_scholar": "https://scholar.google.com.hk/citations?hl=zh-CN;JfOLNu8AAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.tw/citations?user=_wsommYAAAAJ;https://scholar.google.com.sg/citations?hl=en;BIFGeoUAAAAJ;Nn990CkAAAAJ;;https://scholar.google.com/citations?hl=en", "orcid": ";;;0000-0001-6565-7511;;;;0000-0002-5645-1754;0000-0002-2993-0725", "linkedin": ";;;seekiong/?originalSubdomain=sg;;;;;", "or_profile": "~Zhiyuan_Hu4;~Xidong_Feng1;~Yilun_Zhao1;~See-Kiong_Ng1;~Anh_Tuan_Luu2;~Junxian_He1;~Pang_Wei_Koh1;~Bryan_Hooi1;~CHUMIN_LIU1", "aff": "National University of Singapore;University College London;Yale University;National University of Singapore;Nanyang Technological University;Hong Kong University of Science and Technology;University of Washington;National University of Singapore;Nanyang Technological University", "aff_domain": "u.nus.edu;ucl.ac.uk;yale.edu;nus.edu.sg;ntu.edu.sg;ust.hk;cs.washington.edu;nus.edu.sg;ntu.edu.sg", "position": "PhD student;PhD student;PhD student;Full Professor;Assistant Professor;Assistant Professor;Assistant Professor;Assistant Professor;Undergrad student", "bibtex": "@inproceedings{\nhu2024uncertainty,\ntitle={Uncertainty of Thoughts: Uncertainty-Aware Planning Enhances Information Seeking in {LLM}s},\nauthor={Zhiyuan Hu and Chumin Liu and Xidong Feng and Yilun Zhao and See-Kiong Ng and Anh Tuan Luu and Junxian He and Pang Wei Koh and Bryan Hooi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=CVpuVe1N22}\n}", "github": "", "reviewers": "moAn;Ho3e;c9Ar;W6dt;YbAg", "pdf_size": 1141452, "rating": "6;6;7;7;10", "confidence": "3;3;3;4;2", "soundness": "3;2;3;3;4", "novelty": "3;3;3;3;4", "presentation": "3;2;3;4;4", "wc_summary": "96;134;99;47;162", "wc_strengths": "61;61;84;41;83", "wc_weaknesses": "225;114;106;108;51", "wc_questions": "35;175;21;17;180", "wc_limitations": "4;7;11;18;11", "wc_review": "421;491;321;231;487", "wc_reply_reviewers": "24;16;5;22;163", "wc_reply_authors": "862;778;217;389;458", "reply_reviewers": "1;1;1;1;1", "reply_authors": "4;5;2;3;2", "rating_avg": [ 7.2, 1.469693845669907 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 107.6, 38.82576464153668 ], "wc_strengths_avg": [ 66.0, 16.049922118191105 ], "wc_weaknesses_avg": [ 120.8, 56.848570782386425 ], "wc_questions_avg": [ 85.6, 75.290371230324 ], "wc_limitations_avg": [ 10.2, 4.707440918375928 ], "wc_review_avg": [ 390.2, 100.61093379946337 ], "wc_reply_reviewers_avg": [ 46.0, 58.872744118140105 ], "wc_reply_authors_avg": [ 540.8, 242.56083772942407 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.2, 1.16619037896906 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.6454972243679027, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7186869163863663244&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": "u.nus.edu;ucl.ac.uk;yale.edu;nus.edu.sg;ntu.edu.sg;ust.hk;cs.washington.edu;nus.edu.sg;ntu.edu.sg", "author_num": 9, "aff_unique_index": "0;1;2;0;3;4;5;0;3", "aff_unique_norm": "National University of Singapore;University College London;Yale University;Nanyang Technological University;Hong Kong University of Science and Technology;University of Washington", "aff_unique_dep": ";;;;;", "aff_unique_url": "https://www.nus.edu.sg;https://www.ucl.ac.uk;https://www.yale.edu;https://www.ntu.edu.sg;https://www.ust.hk;https://www.washington.edu", "aff_unique_abbr": "NUS;UCL;Yale;NTU;HKUST;UW", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;2;0;0;3;2;0;0", "aff_country_unique": "Singapore;United Kingdom;United States;China" }, { "title": "Exploring and Exploiting the Asymmetric Valley of Deep Neural Networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96141", "id": "CW0OVWEKKu", "proceeding": "", "pdf": "https://openreview.net/pdf?id=CW0OVWEKKu", "openreview": "https://openreview.net/forum?id=CW0OVWEKKu", "poster": "/media/PosterPDFs/NeurIPS%202024/96141.png?t=1729863937.8221571", "project": "", "author_site": "Xin-Chun Li, Jin-Lin Tang, Bo Zhang, Lan Li, De-Chuan Zhan", "tldr": "", "abstract": "Exploring the loss landscape offers insights into the inherent principles of deep neural networks (DNNs). Recent work suggests an additional asymmetry of the valley beyond the flat and sharp ones, yet without thoroughly examining its causes or implications. Our study methodically explores the factors affecting the symmetry of DNN valleys, encompassing (1) the dataset, network architecture, initialization, and hyperparameters that influence the convergence point; and (2) the magnitude and direction of the noise for 1D visualization. Our major observation shows that the {\\it degree of sign consistency} between the noise and the convergence point is a critical indicator of valley symmetry. Theoretical insights from the aspects of ReLU activation and softmax function could explain the interesting phenomenon. Our discovery propels novel understanding and applications in the scenario of Model Fusion: (1) the efficacy of interpolating separate models significantly correlates with their sign consistency ratio, and (2) imposing sign alignment during federated learning emerges as an innovative approach for model parameter alignment.", "keywords": "Loss Landscape;Asymmetric Valley;Model Fusion;Perturbation Analysis", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Xin-Chun Li;Jin-Lin Tang;Bo Zhang;Lan Li;De-Chuan Zhan", "authorids": "~Xin-Chun_Li1;~Jin-Lin_Tang1;~Bo_Zhang34;~Lan_Li2;~De-Chuan_Zhan1", "gender": "M;M;M;M;M", "homepage": "http://www.lamda.nju.edu.cn/tangjl/;http://www.lamda.nju.edu.cn/zhangb/;http://www.lamda.nju.edu.cn/lil/;http://www.lamda.nju.edu.cn/zhandc/;http://www.lamda.nju.edu.cn/lixc/", "dblp": ";;21/820-1;74/498;https://dblp.uni-trier.de/pid/246/2947", "google_scholar": ";;https://scholar.google.com.hk/citations?user=D2vVpPUAAAAJ;mYJf4TcAAAAJ;7WOxRe0AAAAJ", "orcid": ";;;0000-0002-3533-2078;", "linkedin": ";;;;", "or_profile": "~Jin-Lin_Tang1;~Bo_Zhang34;~Lan_Li2;~De-Chuan_Zhan1;~Li_Xin-Chun1", "aff": "Nanjing University;Nanjing University;Nanjing University;Nanjing University;Nanjing University", "aff_domain": "nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn", "position": "MS student;MS student;PhD student;Full Professor;PhD student", "bibtex": "@inproceedings{\nli2024exploring,\ntitle={Exploring and Exploiting the Asymmetric Valley of Deep Neural Networks},\nauthor={Xin-Chun Li and Jin-Lin Tang and Bo Zhang and Lan Li and De-Chuan Zhan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=CW0OVWEKKu}\n}", "github": "", "reviewers": "Jp1U;MUA2;QEpC", "pdf_size": 1614862, "rating": "4;5;7", "confidence": "5;4;4", "soundness": "3;3;3", "novelty": "2;2;3", "presentation": "3;3;3", "wc_summary": "95;100;105", "wc_strengths": "37;68;210", "wc_weaknesses": "108;196;96", "wc_questions": "14;2;369", "wc_limitations": "19;6;8", "wc_review": "273;372;788", "wc_reply_reviewers": "131;0;27", "wc_reply_authors": "351;0;0", "reply_reviewers": "1;0;1", "reply_authors": "2;1;1", "rating_avg": [ 5.333333333333333, 1.247219128924647 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 100.0, 4.08248290463863 ], "wc_strengths_avg": [ 105.0, 75.31710739710246 ], "wc_weaknesses_avg": [ 133.33333333333334, 44.58200932613464 ], "wc_questions_avg": [ 128.33333333333334, 170.24753220596833 ], "wc_limitations_avg": [ 11.0, 5.715476066494082 ], "wc_review_avg": [ 477.6666666666667, 223.12975796358697 ], "wc_reply_reviewers_avg": [ 52.666666666666664, 56.47615032520857 ], "wc_reply_authors_avg": [ 117.0, 165.46298679765212 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.7559289460184544, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13204197861468576979&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 5, "email": "nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Nanjing University", "aff_unique_dep": "", "aff_unique_url": "https://www.nju.edu.cn", "aff_unique_abbr": "Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "GV-Rep: A Large-Scale Dataset for Genetic Variant Representation Learning", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97813", "id": "CW9SJyhpVt", "proceeding": "", "pdf": "https://openreview.net/pdf?id=CW9SJyhpVt", "openreview": "https://openreview.net/forum?id=CW9SJyhpVt", "poster": "/media/PosterPDFs/NeurIPS%202024/97813.png?t=1733515061.7060049", "project": "", "author_site": "Zehui Li, Vallijah Subasri, Guy-Bart Stan, Yiren Zhao, Bo Wang", "tldr": "", "abstract": "Genetic variants (GVs) are defined as differences in the DNA sequences among individuals and play a crucial role in diagnosing and treating genetic diseases. The rapid decrease in next generation sequencing cost, analogous to Moore\u2019s Law, has led to an exponential increase in the availability of patient-level GV data. This growth poses a challenge for clinicians who must efficiently prioritize patient-specific GVs and integrate them with existing genomic databases to inform patient management. To addressing the interpretation of GVs, genomic foundation models (GFMs) have emerged. However, these models lack standardized performance assessments, leading to considerable variability in model evaluations. This poses the question: *How effectively do deep learning methods classify unknown GVs and align them with clinically-verified GVs?* We argue that representation learning, which transforms raw data into meaningful feature spaces, is an effective approach for addressing both indexing and classification challenges. We introduce a large-scale Genetic Variant dataset, named $\\textsf{GV-Rep}$, featuring variable-length contexts and detailed annotations, designed for deep learning models to learn GV representations across various traits, diseases, tissue types, and experimental contexts. Our contributions are three-fold: (i) $\\textbf{Construction}$ of a comprehensive dataset with 7 million records, each labeled with characteristics of the corresponding variants, alongside additional data from 17,548 gene knockout tests across 1,107 cell types, 1,808 variant combinations, and 156 unique clinically-verified GVs from real-world patients. (ii) $\\textbf{Analysis}$ of the structure and properties of the dataset. (iii) $\\textbf{Experimentation}$ of the dataset with pre-trained genomic foundation models (GFMs). The results highlight a significant disparity between the current capabilities of GFMs and the accurate representation of GVs. We hope this dataset will advance genomic deep learning to bridge this gap.", "keywords": "Computational Biology;Genomics;Clinical;AI for Science", "primary_area": "", "supplementary_material": "", "author": "Zehui Li;Vallijah Subasri;Guy-Bart Stan;Yiren Zhao;BO WANG", "authorids": "~Zehui_Li2;~Vallijah_Subasri1;~Guy-Bart_Stan1;~Yiren_Zhao2;~BO_WANG11", "gender": ";F;M;M;M", "homepage": "https://zehui127.github.io/;;https://gstan.bg-research.cc.ic.ac.uk/welcome.html;https://aaronzhao.me;https://wanglab.ai/", "dblp": ";;https://dblp.uni-trier.de/pid/52/7139.html;https://dblp.uni-trier.de/pers/hd/z/Zhao:Yiren;", "google_scholar": ";029JbjsAAAAJ;s2xYpAYAAAAJ;lOOmgEgAAAAJ;37FDILIAAAAJ", "orcid": ";0000-0002-6584-877X;0000-0002-5560-902X;;", "linkedin": ";;guystan/;yiren-aaron-zhao-baa8b5116/;", "or_profile": "~Zehui_Li2;~Vallijah_Subasri1;~Guy-Bart_Stan1;~Yiren_Zhao2;~BO_WANG11", "aff": "Imperial College London;University of Toronto;Imperial College London;Imperial College London;Vector Institute", "aff_domain": "ic.ac.uk;utoronto.ca;imperial.ac.uk;ic.ac.uk;vectorinstitute.ai", "position": "PhD student;PhD student;Full Professor;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nli2024gvrep,\ntitle={{GV}-Rep: A Large-Scale Dataset for Genetic Variant Representation Learning},\nauthor={Zehui Li and Vallijah Subasri and Guy-Bart Stan and Yiren Zhao and BO WANG},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=CW9SJyhpVt}\n}", "github": "", "reviewers": "3LTZ;TKj5;gPE3;fiQq", "pdf_size": 2244797, "rating": "5;6;6;9", "confidence": "4;3;3;4", "wc_summary_and_contributions": "31;36;62;74", "wc_strengths": "22;45;103;28", "wc_improvement": "263;16;58;6", "wc_limitations": "3;1;47;1", "wc_correctness": "9;1;31;1", "wc_clarity": "4;1;26;1", "wc_relation_to_prior_work": "9;1;26;1", "wc_documentation": "33;1;28;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "375;103;382;114", "wc_reply_reviewers": "362;0;0;0", "wc_reply_authors": "1409;0;0;0", "reply_reviewers": "4;0;0;0", "reply_authors": "7;1;1;1", "rating_avg": [ 6.5, 1.5 ], "confidence_avg": [ 3.5, 0.5 ], "wc_summary_and_contributions_avg": [ 50.75, 17.851820635442202 ], "wc_strengths_avg": [ 49.5, 32.01952529317073 ], "wc_improvement_avg": [ 85.75, 104.17863264604695 ], "wc_limitations_avg": [ 13.0, 19.6468827043885 ], "wc_correctness_avg": [ 10.5, 12.278029157808675 ], "wc_clarity_avg": [ 8.0, 10.464224768228174 ], "wc_relation_to_prior_work_avg": [ 9.25, 10.207227831296802 ], "wc_documentation_avg": [ 15.75, 14.85555451674558 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 243.5, 135.07868077531703 ], "wc_reply_reviewers_avg": [ 90.5, 156.7505980849834 ], "wc_reply_authors_avg": [ 352.25, 610.114896966137 ], "reply_reviewers_avg": [ 1.0, 1.7320508075688772 ], "reply_authors_avg": [ 2.5, 2.598076211353316 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.33333333333333337, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11844241194396518412&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "ic.ac.uk;utoronto.ca;imperial.ac.uk;ic.ac.uk;vectorinstitute.ai", "author_num": 5, "aff_unique_index": "0;1;0;0;2", "aff_unique_norm": "Imperial College London;University of Toronto;Vector Institute", "aff_unique_dep": ";;", "aff_unique_url": "https://www.imperial.ac.uk;https://www.utoronto.ca;https://vectorinstitute.ai/", "aff_unique_abbr": "ICL;U of T;Vector Institute", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;1", "aff_country_unique": "United Kingdom;Canada" }, { "title": "Quantum Deep Equilibrium Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96140", "id": "CWhwKb0Q4k", "proceeding": "", "pdf": "https://openreview.net/pdf?id=CWhwKb0Q4k", "openreview": "https://openreview.net/forum?id=CWhwKb0Q4k", "poster": "/media/PosterPDFs/NeurIPS%202024/96140.png?t=1733431041.8168423", "project": "", "author_site": "Philipp Schleich, Marta Skreta, Lasse Kristensen, Rodrigo Vargas-Hernandez, Alan Aspuru-Guzik", "tldr": "", "abstract": "The feasibility of variational quantum algorithms, the most popular correspondent of neural networks on noisy, near-term quantum hardware, is highly impacted by the circuit depth of the involved parametrized quantum circuits (PQCs). Higher depth increases expressivity, but also results in a detrimental accumulation of errors. Furthermore, the number of parameters involved in the PQC significantly influences the performance through the necessary number of measurements to evaluate gradients, which scales linearly with the number of parameters.\n Motivated by this, we look at deep equilibrium models (DEQs), which mimic an infinite-depth, weight-tied network using a fraction of the memory by employing a root solver to find the fixed points of the network. In this work, we present Quantum Deep Equilibrium Models (QDEQs): a training paradigm that learns parameters of a quantum machine learning model given by a PQC using DEQs. To our knowledge, no work has yet explored the application of DEQs to QML models. We apply QDEQs to find the parameters of a quantum circuit in two settings: the first involves classifying MNIST-4 digits with 4 qubits; the second extends it to 10 classes of MNIST, FashionMNIST and CIFAR. We find that QDEQ is not only competitive with comparable existing baseline models, but also achieves higher performance than a network with 5 times more layers. This demonstrates that the QDEQ paradigm can be used to develop significantly more shallow quantum circuits for a given task, something which is essential for the utility of near-term quantum computers. \n Our code is available at \\url{https://github.com/martaskrt/qdeq}.", "keywords": "quantum;deep equilibrium models;quantum machine learning", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "", "author": "Philipp Schleich;Marta Skreta;Lasse Bj\u00f8rn Kristensen;Rodrigo Vargas-Hernandez;Alan Aspuru-Guzik", "authorids": "~Philipp_Schleich1;~Marta_Skreta1;~Lasse_Bj\u00f8rn_Kristensen1;~Rodrigo_Vargas-Hernandez2;~Alan_Aspuru-Guzik2", "gender": "Not Specified;F;M;M;M", "homepage": ";;;https://chemai-lab.com/;http://matter.toronto.edu", "dblp": ";255/5167;;;", "google_scholar": "SWUHnvQAAAAJ;https://scholar.google.ca/citations?user=OYd3hjYAAAAJ;https://scholar.google.ca/citations?user=MMdBMTwAAAAJ;https://scholar.google.com/citations?view_op=list_works;Ag_6KEgAAAAJ", "orcid": "0000-0002-4336-4555;;;0000-0002-5559-6521;0000-0002-8277-4434", "linkedin": ";martaskreta/;;rodrigo-a-vargas-hernandez-591368141/;", "or_profile": "~Philipp_Schleich1;~Marta_Skreta1;~Lasse_Bj\u00f8rn_Kristensen1;~Rodrigo_Vargas-Hernandez2;~Alan_Aspuru-Guzik2", "aff": "University of Toronto;Department of Computer Science, University of Toronto;University of Toronto ;McMaster University;University of Toronto", "aff_domain": "cs.toronto;cs.toronto.edu;mail.utoronto.ca;mcmaster.ca;utoronto.ca", "position": "PhD student;PhD student;Postdoc;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nschleich2024quantum,\ntitle={Quantum Deep Equilibrium Models},\nauthor={Philipp Schleich and Marta Skreta and Lasse Bj{\\o}rn Kristensen and Rodrigo Vargas-Hernandez and Alan Aspuru-Guzik},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=CWhwKb0Q4k}\n}", "github": "", "reviewers": "1Wfk;fCtD;BzP4", "pdf_size": 1065987, "rating": "4;6;6", "confidence": "4;3;2", "soundness": "2;4;3", "novelty": "2;3;3", "presentation": "2;4;3", "wc_summary": "58;59;95", "wc_strengths": "27;53;131", "wc_weaknesses": "133;26;120", "wc_questions": "20;29;64", "wc_limitations": "3;8;11", "wc_review": "241;175;421", "wc_reply_reviewers": "0;14;23", "wc_reply_authors": "0;10;24", "reply_reviewers": "0;1;1", "reply_authors": "1;2;3", "rating_avg": [ 5.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 70.66666666666667, 17.21110752456745 ], "wc_strengths_avg": [ 70.33333333333333, 44.19150245113747 ], "wc_weaknesses_avg": [ 93.0, 47.672493816315786 ], "wc_questions_avg": [ 37.666666666666664, 18.979521127315678 ], "wc_limitations_avg": [ 7.333333333333333, 3.299831645537222 ], "wc_review_avg": [ 279.0, 103.96153134693621 ], "wc_reply_reviewers_avg": [ 12.333333333333334, 9.463379711052259 ], "wc_reply_authors_avg": [ 11.333333333333334, 9.843215373488933 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:KTqxNVw7SQwJ:scholar.google.com/&scioq=Quantum+Deep+Equilibrium+Models&hl=en&as_sdt=0,44", "gs_version_total": 3, "email": "cs.toronto;cs.toronto.edu;mail.utoronto.ca;mcmaster.ca;utoronto.ca", "author_num": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "University of Toronto;McMaster University", "aff_unique_dep": ";", "aff_unique_url": "https://www.utoronto.ca;https://www.mcmaster.ca", "aff_unique_abbr": "U of T;McMaster", "aff_campus_unique_index": "1", "aff_campus_unique": ";Toronto", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Canada" }, { "title": "OccFusion: Rendering Occluded Humans with Generative Diffusion Priors", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96139", "id": "CZwphz5vgz", "proceeding": "", "pdf": "https://openreview.net/pdf?id=CZwphz5vgz", "openreview": "https://openreview.net/forum?id=CZwphz5vgz", "poster": "", "project": "", "author_site": "Adam Sun, Tiange Xiang, Scott Delp, Fei-Fei Li, Ehsan Adeli", "tldr": "", "abstract": "Existing human rendering methods require every part of the human to be fully visible throughout the input video. However, this assumption does not hold in real-life settings where obstructions are common, resulting in only partial visibility of the human. Considering this, we present OccFusion, an approach that utilizes efficient 3D Gaussian splatting supervised by pretrained 2D diffusion models for efficient and high-fidelity human rendering. We propose a pipeline consisting of three stages. In the Initialization stage, complete human masks are generated from partial visibility masks. In the Optimization stage, 3D human Gaussians are optimized with additional supervisions by Score-Distillation Sampling (SDS) to create a complete geometry of the human. Finally, in the Refinement stage, in-context inpainting is designed to further improve rendering quality on the less observed human body parts. We evaluate OccFusion on ZJU-MoCap and challenging OcMotion sequences and found that it achieves state-of-the-art performance in the rendering of occluded humans.", "keywords": "vision;gaussian splatting;human;rendering;diffusion", "primary_area": "machine_vision", "supplementary_material": "/attachment/9d3aca26de50a5906f5eb089fcdbbe3697992c09.zip", "author": "Adam Sun;Tiange Xiang;Scott Delp;Li Fei-Fei;Ehsan Adeli", "authorids": "~Adam_Sun1;~Tiange_Xiang1;~Scott_Delp1;~Li_Fei-Fei1;~Ehsan_Adeli1", "gender": "M;M;M;F;M", "homepage": "https://adamsunn.github.io;https://tiangexiang.github.io/;https://nmbl.stanford.edu/people/scott-delp/;https://profiles.stanford.edu/fei-fei-li;http://stanford.edu/~eadeli/", "dblp": "354/7211;245/7663;;79/2528;93/2941", "google_scholar": ";;OEivUAQAAAAJ;rDfyQnIAAAAJ;7NX_J_cAAAAJ", "orcid": ";;;;0000-0002-0579-7763", "linkedin": "adam-sun/;;;fei-fei-li-4541247/;eadeli", "or_profile": "~Adam_Sun1;~Tiange_Xiang1;~Scott_Delp1;~Li_Fei-Fei1;~Ehsan_Adeli1", "aff": "Stanford University;Stanford University;Stanford University;Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu", "position": "Undergrad student;PhD student;Full Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nsun2024occfusion,\ntitle={OccFusion: Rendering Occluded Humans with Generative Diffusion Priors},\nauthor={Adam Sun and Tiange Xiang and Scott Delp and Li Fei-Fei and Ehsan Adeli},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=CZwphz5vgz}\n}", "github": "", "reviewers": "BvJA;gVDG;9Cmr;WHSE;Vqeb", "pdf_size": 6296767, "rating": "5;5;6;7;7", "confidence": "5;5;4;3;3", "soundness": "3;3;4;3;3", "novelty": "2;2;4;3;3", "presentation": "2;3;3;3;3", "wc_summary": "80;61;68;64;146", "wc_strengths": "33;48;75;77;61", "wc_weaknesses": "85;68;411;16;29", "wc_questions": "158;39;23;13;37", "wc_limitations": "15;4;1;6;1", "wc_review": "371;220;578;176;274", "wc_reply_reviewers": "50;75;19;9;0", "wc_reply_authors": "217;137;74;0;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "3;2;2;1;1", "rating_avg": [ 6.0, 0.8944271909999159 ], "confidence_avg": [ 4.0, 0.8944271909999159 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 83.8, 31.764130713746912 ], "wc_strengths_avg": [ 58.8, 16.61806246227279 ], "wc_weaknesses_avg": [ 121.8, 146.75884981833292 ], "wc_questions_avg": [ 54.0, 52.86208471106678 ], "wc_limitations_avg": [ 5.4, 5.1613951602255765 ], "wc_review_avg": [ 323.8, 142.78991561031194 ], "wc_reply_reviewers_avg": [ 30.6, 27.875437216302096 ], "wc_reply_authors_avg": [ 85.6, 83.30330125511233 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11544208338334078479&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Dispelling the Mirage of Progress in Offline MARL through Standardised Baselines and Evaluation", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97812", "id": "CaAJeNkceP", "proceeding": "", "pdf": "https://openreview.net/pdf?id=CaAJeNkceP", "openreview": "https://openreview.net/forum?id=CaAJeNkceP", "poster": "", "project": "", "author_site": "Juan Formanek, Callum R. Tilbury, Louise Beyers, Jonathan Shock, Arnu Pretorius", "tldr": "", "abstract": "Offline multi-agent reinforcement learning (MARL) is an emerging field with great promise for real-world applications. Unfortunately, the current state of research in offline MARL is plagued by inconsistencies in baselines and evaluation protocols, which ultimately makes it difficult to accurately assess progress, trust newly proposed innovations, and allow researchers to easily build upon prior work. In this paper, we firstly identify significant shortcomings in existing methodologies for measuring the performance of novel algorithms through a representative study of published offline MARL work. Secondly, by directly comparing to this prior work, we demonstrate that simple, well-implemented baselines can achieve state-of-the-art (SOTA) results across a wide range of tasks. Specifically, we show that on 35 out of 47 datasets used in prior work (almost 75\\% of cases), we match or surpass the performance of the current purported SOTA. Strikingly, our baselines often substantially outperform these more sophisticated algorithms. Finally, we correct for the shortcomings highlighted from this prior work by introducing a straightforward standardised methodology for evaluation and by providing our baseline implementations with statistically robust results across several scenarios, useful for comparisons in future work. Our proposal includes simple and sensible steps that are easy to adopt, which in combination with solid baselines and comparative results, could substantially improve the overall rigour of empirical science in offline MARL moving forward.", "keywords": "Offline Multi-Agent Reinforcement Learning;Multi-Agent Reinforcement Learning;Offline Reinforcement Learning;Reinforcement Learning", "primary_area": "", "supplementary_material": "", "author": "Juan Claude Formanek;Callum Rhys Tilbury;Louise Beyers;Jonathan Phillip Shock;Arnu Pretorius", "authorids": "~Juan_Claude_Formanek1;~Callum_Rhys_Tilbury1;~Louise_Beyers1;~Jonathan_Phillip_Shock1;~Arnu_Pretorius1", "gender": "M;M;F;M;M", "homepage": ";https://callum.tilbury.co.za;https://www.linkedin.com/in/louisebeyers/?originalSubdomain=za;http://www.shocklab.net;", "dblp": ";341/1568;;161/9917;188/4368", "google_scholar": "YGkp5PYAAAAJ;leaLiLIAAAAJ;;lR_UV54AAAAJ;zZ6ydrAAAAAJ", "orcid": ";;;0000-0003-3757-0376;", "linkedin": "claude-formanek/;https://linkedin.com/in/callumtilbury;;;arnupretorius/", "or_profile": "~Juan_Claude_Formanek1;~Callum_Rhys_Tilbury1;~Louise_Beyers1;~Jonathan_Phillip_Shock1;~Arnu_Pretorius1", "aff": "University of Cape Town;InstaDeep;InstaDeep;University of Cape Town;InstaDeep", "aff_domain": "uct.ac.za;instadeep.com;instadeep.com;uct.ac.za;instadeep.com", "position": "PhD student;Researcher;Researcher;Lecturer;Researcher", "bibtex": "@inproceedings{\nformanek2024dispelling,\ntitle={Dispelling the Mirage of Progress in Offline {MARL} through Standardised Baselines and Evaluation},\nauthor={Juan Claude Formanek and Callum Rhys Tilbury and Louise Beyers and Jonathan Phillip Shock and Arnu Pretorius},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=CaAJeNkceP}\n}", "github": "", "reviewers": "2EYb;qAJW;CVYJ", "pdf_size": 912014, "rating": "5;6;7", "confidence": "3;4;4", "wc_summary_and_contributions": "66;78;81", "wc_strengths": "92;156;47", "wc_improvement": "28;173;272", "wc_limitations": "3;11;11", "wc_correctness": "2;5;1", "wc_clarity": "1;5;1", "wc_relation_to_prior_work": "5;7;1", "wc_documentation": "12;4;1", "wc_additional_feedback": "1;1;1", "wc_review": "210;440;416", "wc_reply_reviewers": "0;0;34", "wc_reply_authors": "77;77;77", "reply_reviewers": "0;0;1", "reply_authors": "2;2;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 75.0, 6.48074069840786 ], "wc_strengths_avg": [ 98.33333333333333, 44.72384400096018 ], "wc_improvement_avg": [ 157.66666666666666, 100.20090928840028 ], "wc_limitations_avg": [ 8.333333333333334, 3.7712361663282534 ], "wc_correctness_avg": [ 2.6666666666666665, 1.699673171197595 ], "wc_clarity_avg": [ 2.3333333333333335, 1.8856180831641267 ], "wc_relation_to_prior_work_avg": [ 4.333333333333333, 2.494438257849294 ], "wc_documentation_avg": [ 5.666666666666667, 4.642796092394707 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 355.3333333333333, 103.23220858282986 ], "wc_reply_reviewers_avg": [ 11.333333333333334, 16.027753706895076 ], "wc_reply_authors_avg": [ 77.0, 0.0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4702501562615571678&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 4, "email": "uct.ac.za;instadeep.com;instadeep.com;uct.ac.za;instadeep.com", "author_num": 5, "aff_unique_index": "0;1;1;0;1", "aff_unique_norm": "University of Cape Town;InstaDeep", "aff_unique_dep": ";", "aff_unique_url": "https://www.uct.ac.za;https://www.instadeep.com", "aff_unique_abbr": "UCT;InstaDeep", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0;1", "aff_country_unique": "South Africa;United Kingdom" }, { "title": "UV-free Texture Generation with Denoising and Geodesic Heat Diffusion", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96138", "id": "Cb1Md0RvqF", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Cb1Md0RvqF", "openreview": "https://openreview.net/forum?id=Cb1Md0RvqF", "poster": "/media/PosterPDFs/NeurIPS%202024/96138.png?t=1731641067.6414807", "project": "", "author_site": "Simone Foti, Stefanos Zafeiriou, Tolga Birdal", "tldr": "", "abstract": "Seams, distortions, wasted UV space, vertex-duplication, and varying resolution over the surface are the most prominent issues of the standard UV-based texturing of meshes. These issues are particularly acute when automatic UV-unwrapping techniques are used. For this reason, instead of generating textures in automatically generated UV-planes like most state-of-the-art methods, we propose to represent textures as coloured point-clouds whose colours are generated by a denoising diffusion probabilistic model constrained to operate on the surface of 3D objects. Our sampling and resolution agnostic generative model heavily relies on heat diffusion over the surface of the meshes for spatial communication between points. To enable processing of arbitrarily sampled point-cloud textures and ensure long-distance texture consistency we introduce a fast re-sampling of the mesh spectral properties used during the heat diffusion and introduce a novel heat-diffusion-based self-attention mechanism. Our code and pre-trained models are available at github.com/simofoti/UV3-TeD.", "keywords": "Texture Generation;Heat diffusion;DDPM", "primary_area": "machine_vision", "supplementary_material": "/attachment/769b93b558340f35ec4c54b137972bb0a116ccdb.zip", "author": "Simone Foti;Stefanos Zafeiriou;Tolga Birdal", "authorids": "~Simone_Foti1;~Stefanos_Zafeiriou1;~Tolga_Birdal3", "gender": "M;M;M", "homepage": "https://www.simofoti.com/;http://www.imperial.ac.uk/people/s.zafeiriou/;http://tolgabirdal.github.io", "dblp": "246/7113;25/1885.html;143/7056", "google_scholar": "BuWPfNsAAAAJ;QKOH5iYAAAAJ;_Bxd5ggAAAAJ", "orcid": ";;0000-0001-7915-7964", "linkedin": "simone-foti/;;https://linkedin.com/in/tbirdal", "or_profile": "~Simone_Foti1;~Stefanos_Zafeiriou1;~Tolga_Birdal3", "aff": "Imperial College London;Imperial College London;Imperial College London", "aff_domain": "imperial.ac.uk;ic.ac.uk;imperial.ac.uk", "position": "Postdoc;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nfoti2024uvfree,\ntitle={{UV}-free Texture Generation with Denoising and Geodesic Heat Diffusion},\nauthor={Simone Foti and Stefanos Zafeiriou and Tolga Birdal},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Cb1Md0RvqF}\n}", "github": "", "reviewers": "CcU9;Yhxo;cPWU", "pdf_size": 36643113, "rating": "5;5;6", "confidence": "4;3;3", "soundness": "2;3;2", "novelty": "2;3;3", "presentation": "3;3;3", "wc_summary": "31;113;149", "wc_strengths": "12;68;101", "wc_weaknesses": "23;140;28", "wc_questions": "35;23;152", "wc_limitations": "39;27;10", "wc_review": "140;371;440", "wc_reply_reviewers": "14;0;0", "wc_reply_authors": "28;0;0", "reply_reviewers": "1;0;0", "reply_authors": "2;1;1", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 97.66666666666667, 49.378357832376544 ], "wc_strengths_avg": [ 60.333333333333336, 36.736297521056144 ], "wc_weaknesses_avg": [ 63.666666666666664, 54.01440137181524 ], "wc_questions_avg": [ 70.0, 58.18934610390462 ], "wc_limitations_avg": [ 25.333333333333332, 11.897712198383164 ], "wc_review_avg": [ 317.0, 128.28873683998918 ], "wc_reply_reviewers_avg": [ 4.666666666666667, 6.599663291074443 ], "wc_reply_authors_avg": [ 9.333333333333334, 13.199326582148887 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13997501932181002061&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "imperial.ac.uk;ic.ac.uk;imperial.ac.uk", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Imperial College London", "aff_unique_dep": "", "aff_unique_url": "https://www.imperial.ac.uk", "aff_unique_abbr": "ICL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Spatio-Spectral Graph Neural Networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96137", "id": "Cb3kcwYBgw", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Cb3kcwYBgw", "openreview": "https://openreview.net/forum?id=Cb3kcwYBgw", "poster": "", "project": "", "author_site": "Simon Geisler, Arthur Kosmala, Daniel Herbst, Stephan G\u00fcnnemann", "tldr": "", "abstract": "Spatial Message Passing Graph Neural Networks (MPGNNs) are widely used for learning on graph-structured data. However, key limitations of *\u2113*-step MPGNNs are that their \"receptive field\" is typically limited to the *\u2113*-hop neighborhood of a node and that information exchange between distant nodes is limited by over-squashing. Motivated by these limitations, we propose *Spatio-Spectral Graph Neural Networks (S\u00b2GNNs)* \u2013 a new modeling paradigm for Graph Neural Networks (GNNs) that synergistically combines spatially and spectrally parametrized graph filters. Parameterizing filters partially in the frequency domain enables global yet efficient information propagation. We show that S\u00b2GNNs vanquish over-squashing and yield strictly tighter approximation-theoretic error bounds than MPGNNs. Further, rethinking graph convolutions at a fundamental level unlocks new design spaces. For example, S\u00b2GNNs allow for free positional encodings that make them strictly more expressive than the 1-Weisfeiler-Leman (WL) test. Moreover, to obtain general-purpose S\u00b2GNNs, we propose spectrally parametrized filters for directed graphs. S\u00b2GNNs outperform spatial MPGNNs, graph transformers, and graph rewirings, e.g., on the peptide long-range benchmark tasks, and are competitive with state-of-the-art sequence modeling. On a 40 GB GPU, S\u00b2GNNs scale to millions of nodes.", "keywords": "Graph Neural Networks;long-range interactions", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Simon Geisler;Arthur Kosmala;Daniel Herbst;Stephan G\u00fcnnemann", "authorids": "~Simon_Geisler1;~Arthur_Kosmala1;~Daniel_Herbst1;~Stephan_G\u00fcnnemann1", "gender": ";M;M;M", "homepage": ";https://danielherbst.com;http://www.daml.in.tum.de;https://www.in.tum.de/en/daml/team/simon-geisler/", "dblp": ";;43/3011;237/0253", "google_scholar": ";https://scholar.google.com/citations?hl=de;;00x9jJwAAAAJ", "orcid": ";;;0000-0003-0867-1856", "linkedin": "arthur-kosmala-9219371b2/;daniel-herbst-869547178/;;simon-geisler-ai/", "or_profile": "~Arthur_Kosmala1;~Daniel_Herbst1;~Stephan_G\u00fcnnemann1;~Simon_Markus_Geisler1", "aff": "Technische Universit\u00e4t M\u00fcnchen;Technical University of Munich;Technical University Munich;Technical University Munich", "aff_domain": "tum.de;tum.de;tum.de;tum.de", "position": "PhD student;MS student;Professor;PhD student", "bibtex": "@inproceedings{\ngeisler2024spatiospectral,\ntitle={Spatio-Spectral Graph Neural Networks},\nauthor={Simon Geisler and Arthur Kosmala and Daniel Herbst and Stephan G{\\\"u}nnemann},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Cb3kcwYBgw}\n}", "github": "", "reviewers": "ohZY;H69b;EZD5;rAa9", "pdf_size": 2167434, "rating": "5;6;6;8", "confidence": "3;4;5;4", "soundness": "2;3;3;4", "novelty": "2;2;3;4", "presentation": "2;2;3;3", "wc_summary": "62;67;60;92", "wc_strengths": "34;75;64;276", "wc_weaknesses": "107;59;190;660", "wc_questions": "54;64;2;111", "wc_limitations": "1;7;1;8", "wc_review": "258;272;317;1147", "wc_reply_reviewers": "25;28;18;80", "wc_reply_authors": "167;39;47;42", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 70.25, 12.813567028739499 ], "wc_strengths_avg": [ 112.25, 95.72453969594213 ], "wc_weaknesses_avg": [ 254.0, 239.04288318207676 ], "wc_questions_avg": [ 57.75, 38.71934271136327 ], "wc_limitations_avg": [ 4.25, 3.2691742076555053 ], "wc_review_avg": [ 498.5, 375.0456638864126 ], "wc_reply_reviewers_avg": [ 37.75, 24.661457783350926 ], "wc_reply_authors_avg": [ 73.75, 53.91370419475924 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.3244428422615251, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1779299061368160344&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "tum.de;tum.de;tum.de;tum.de", "author_num": 4, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Technische Universit\u00e4t M\u00fcnchen;Technical University of Munich", "aff_unique_dep": ";", "aff_unique_url": "https://www.tum.de;https://www.tum.de", "aff_unique_abbr": "TUM;TUM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "title": "Taming \"data-hungry\" reinforcement learning? Stability in continuous state-action spaces", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96136", "id": "CbHz30KeA4", "proceeding": "", "pdf": "https://openreview.net/pdf?id=CbHz30KeA4", "openreview": "https://openreview.net/forum?id=CbHz30KeA4", "poster": "/media/PosterPDFs/NeurIPS%202024/96136.png?t=1731745432.298417", "project": "", "author_site": "Yaqi Duan, Martin Wainwright", "tldr": "", "abstract": "We introduce a novel framework for analyzing reinforcement learning (RL) in continuous state-action spaces, and use it to prove fast rates of convergence in both off-line and on-line settings. Our analysis highlights two key stability properties, relating to how changes in value functions and/or policies affect the Bellman operator and occupation measures. We argue that these properties are satisfied in many continuous state-action Markov decision processes. Our analysis also offers fresh perspectives on the roles of pessimism and optimism in off-line and on-line RL.", "keywords": "reinforcement learning;continuous control;stability analysis", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/a8d0a6b4d5fefeb19dc412f882ed347b5a155cae.zip", "author": "Yaqi Duan;Martin J Wainwright", "authorids": "~Yaqi_Duan1;~Martin_J_Wainwright1", "gender": "F;", "homepage": "https://duanyq22.github.io/;", "dblp": ";", "google_scholar": "T99vQCsAAAAJ;p1DZVX8AAAAJ", "orcid": ";", "linkedin": "yaqi-duan-542062188/;", "or_profile": "~Yaqi_Duan1;~Martin_J_Wainwright1", "aff": "New York University;University of California, Berkeley", "aff_domain": "stern.nyu.edu;berkeley.edu", "position": "Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nduan2024taming,\ntitle={Taming ''data-hungry'' reinforcement learning? Stability in continuous state-action spaces},\nauthor={Yaqi Duan and Martin J Wainwright},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=CbHz30KeA4}\n}", "github": "", "reviewers": "bfUp;PVTM;gsVp", "pdf_size": 1275023, "rating": "7;7;7", "confidence": "3;2;3", "soundness": "3;3;4", "novelty": "4;3;3", "presentation": "4;2;3", "wc_summary": "28;89;257", "wc_strengths": "214;110;142", "wc_weaknesses": "19;289;80", "wc_questions": "1;48;210", "wc_limitations": "11;9;11", "wc_review": "273;545;700", "wc_reply_reviewers": "14;13;12", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 124.66666666666667, 96.83089497790579 ], "wc_strengths_avg": [ 155.33333333333334, 43.49201714746691 ], "wc_weaknesses_avg": [ 129.33333333333334, 115.61526235272267 ], "wc_questions_avg": [ 86.33333333333333, 89.5259118294189 ], "wc_limitations_avg": [ 10.333333333333334, 0.9428090415820634 ], "wc_review_avg": [ 506.0, 176.4898486221422 ], "wc_reply_reviewers_avg": [ 13.0, 0.816496580927726 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17201690716923824126&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "stern.nyu.edu;berkeley.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "New York University;University of California, Berkeley", "aff_unique_dep": ";", "aff_unique_url": "https://www.nyu.edu;https://www.berkeley.edu", "aff_unique_abbr": "NYU;UC Berkeley", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Ex Uno Pluria: Insights on Ensembling in Low Precision Number Systems", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96135", "id": "CbtkDWZzDq", "proceeding": "", "pdf": "https://openreview.net/pdf?id=CbtkDWZzDq", "openreview": "https://openreview.net/forum?id=CbtkDWZzDq", "poster": "", "project": "", "author_site": "Giung Nam, Juho Lee", "tldr": "", "abstract": "While ensembling deep neural networks has shown promise in improving generalization performance, scaling current ensemble methods for large models remains challenging. Given that recent progress in deep learning is largely driven by the scale, exemplified by the widespread adoption of large-scale neural network architectures, scalability emerges an increasingly critical issue for machine learning algorithms in the era of large-scale models. In this work, we first showcase the potential of low precision ensembling, where ensemble members are derived from a single model within low precision number systems in a training-free manner. Our empirical analysis demonstrates the effectiveness of our proposed low precision ensembling method compared to existing ensemble approaches.", "keywords": "ensemble methods;low precision number systems", "primary_area": "other", "supplementary_material": "/attachment/e5d307cf80395e50c4c89422c7dbcfe0ade95cc2.zip", "author": "Giung Nam;Juho Lee", "authorids": "~Giung_Nam1;~Juho_Lee2", "gender": ";M", "homepage": "https://cs-giung.github.io/;https://juho.lee.github.io", "dblp": "304/9008;55/3410-1", "google_scholar": "https://scholar.google.co.kr/citations?user=HO-fMd8AAAAJ;Py4URJUAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Giung_Nam1;~Juho_Lee2", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nnam2024ex,\ntitle={Ex Uno Pluria: Insights on Ensembling in Low Precision Number Systems},\nauthor={Giung Nam and Juho Lee},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=CbtkDWZzDq}\n}", "github": "", "reviewers": "svQu;yx79;bKfp;9d5B", "pdf_size": 1944687, "rating": "4;4;5;6", "confidence": "3;4;4;4", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "3;3;3;3", "wc_summary": "57;79;116;60", "wc_strengths": "23;44;40;87", "wc_weaknesses": "165;97;250;143", "wc_questions": "55;71;29;41", "wc_limitations": "1;9;74;23", "wc_review": "301;300;509;354", "wc_reply_reviewers": "153;132;0;47", "wc_reply_authors": "357;93;0;41", "reply_reviewers": "2;1;0;1", "reply_authors": "3;2;1;2", "rating_avg": [ 4.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 78.0, 23.50531854708632 ], "wc_strengths_avg": [ 48.5, 23.58495283014151 ], "wc_weaknesses_avg": [ 163.75, 55.51294893986447 ], "wc_questions_avg": [ 49.0, 15.684387141358123 ], "wc_limitations_avg": [ 26.75, 28.39344114403888 ], "wc_review_avg": [ 366.0, 85.40199060911871 ], "wc_reply_reviewers_avg": [ 83.0, 62.22137896253988 ], "wc_reply_authors_avg": [ 122.75, 139.20196658093593 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:qenNi-l8jnMJ:scholar.google.com/&scioq=Ex+Uno+Pluria:+Insights+on+Ensembling+in+Low+Precision+Number+Systems&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "kaist.ac.kr;kaist.ac.kr", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "title": "Reward Machines for Deep RL in Noisy and Uncertain Environments", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96134", "id": "Cc0ckJlJF2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Cc0ckJlJF2", "openreview": "https://openreview.net/forum?id=Cc0ckJlJF2", "poster": "/media/PosterPDFs/NeurIPS%202024/96134.png?t=1733777084.6140733", "project": "", "author_site": "Andrew Li, Zizhao Chen, Toryn Klassen, Pashootan Vaezipoor, Rodrigo Toro Icarte, Sheila McIlraith", "tldr": "", "abstract": "Reward Machines provide an automaton-inspired structure for specifying instructions, safety constraints, and other temporally extended reward-worthy behaviour. By exposing the underlying structure of a reward function, they enable the decomposition of an RL task, leading to impressive gains in sample efficiency. Although Reward Machines and similar formal specifications have a rich history of application towards sequential decision-making problems, prior frameworks have traditionally ignored ambiguity and uncertainty when interpreting the domain-specific vocabulary forming the building blocks of the reward function. Such uncertainty critically arises in many real-world settings due to factors like partial observability or noisy sensors. In this work, we explore the use of Reward Machines for Deep RL in noisy and uncertain environments. We characterize this problem as a POMDP and propose a suite of RL algorithms that exploit task structure under uncertain interpretation of the domain-specific vocabulary. Through theory and experiments, we expose pitfalls in naive approaches to this problem while simultaneously demonstrating how task structure can be successfully leveraged under noisy interpretations of the vocabulary.", "keywords": "Reward Machines;LTL;Linear Temporal Logic;Automata;RL;Reinforcement Learning;Formal Language", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/3e0f985b1cee489f1de1baf0a7e6432ff2e06a66.zip", "author": "Andrew C Li;Zizhao Chen;Toryn Q. Klassen;Pashootan Vaezipoor;Rodrigo Toro Icarte;Sheila A. McIlraith", "authorids": "~Andrew_C_Li1;~Zizhao_Chen1;~Toryn_Q._Klassen1;~Pashootan_Vaezipoor1;~Rodrigo_Toro_Icarte1;~Sheila_A._McIlraith1", "gender": "M;F;;M;M;F", "homepage": ";https://chenzizhao.github.io/;http://tqk.ca;http://www.cs.toronto.edu/~pashootan/;http://www.cs.toronto.edu/~rntoro/;http://www.cs.toronto.edu/~sheila/", "dblp": "244/1976;321/0149;213/4964;50/10263;200/8660;66/3221", "google_scholar": "I3NDYlcAAAAJ;;https://scholar.google.ca/citations?user=uNl1QHMAAAAJ;tUc11rUAAAAJ;https://scholar.google.ca/citations?user=W9DykFMAAAAJ;https://scholar.google.com.tw/citations?user=ny2zuvMAAAAJ", "orcid": ";0000-0002-5354-0292;;;0000-0002-7734-099X;0000-0003-4953-0945", "linkedin": ";;;;;sheila-mcilraith-a76aa513/?originalSubdomain=ca", "or_profile": "~Andrew_C_Li1;~Zizhao_Chen1;~Toryn_Q._Klassen1;~Pashootan_Vaezipoor1;~Rodrigo_Toro_Icarte1;~Sheila_A._McIlraith1", "aff": "Department of Computer Science, University of Toronto;Department of Computer Science, Cornell University;University of Toronto;Georgian;Pontificia Universidad Catolica de Chile;Department of Computer Science, University of Toronto", "aff_domain": "cs.toronto.edu;cs.cornell.edu;toronto.edu;georgian.io;uc.cl;cs.toronto.edu", "position": "PhD student;PhD student;Postdoc;Researcher;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nli2024reward,\ntitle={Reward Machines for Deep {RL} in Noisy and Uncertain Environments},\nauthor={Andrew C Li and Zizhao Chen and Toryn Q. Klassen and Pashootan Vaezipoor and Rodrigo Toro Icarte and Sheila A. McIlraith},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Cc0ckJlJF2}\n}", "github": "", "reviewers": "qw5f;sM3w;ed91;pete", "pdf_size": 1063134, "rating": "5;5;7;8", "confidence": "2;3;3;4", "soundness": "2;2;2;4", "novelty": "3;2;3;3", "presentation": "2;2;4;4", "wc_summary": "93;89;78;77", "wc_strengths": "63;50;126;54", "wc_weaknesses": "176;234;74;107", "wc_questions": "99;92;125;24", "wc_limitations": "2;37;33;16", "wc_review": "433;502;436;278", "wc_reply_reviewers": "17;347;110;29", "wc_reply_authors": "33;1334;42;29", "reply_reviewers": "1;3;1;1", "reply_authors": "2;4;2;2", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 1.0 ], "wc_summary_avg": [ 84.25, 6.905613658466566 ], "wc_strengths_avg": [ 73.25, 30.81700017847292 ], "wc_weaknesses_avg": [ 147.75, 61.92081637058736 ], "wc_questions_avg": [ 85.0, 37.302814907188974 ], "wc_limitations_avg": [ 22.0, 13.982131454109563 ], "wc_review_avg": [ 412.25, 82.26899476716608 ], "wc_reply_reviewers_avg": [ 125.75, 132.6525065726238 ], "wc_reply_authors_avg": [ 359.5, 562.6475362071711 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16550938042738923419&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "cs.toronto.edu;cs.cornell.edu;toronto.edu;georgian.io;uc.cl;cs.toronto.edu", "author_num": 6, "aff_unique_index": "0;1;0;2;3;0", "aff_unique_norm": "University of Toronto;Cornell University;Georgian Technical University;Pontificia Universidad Catolica de Chile", "aff_unique_dep": "Department of Computer Science;Department of Computer Science;;", "aff_unique_url": "https://www.utoronto.ca;https://www.cornell.edu;https://gtu.ge;https://www.puc.cl", "aff_unique_abbr": "U of T;Cornell;GTU;PUC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Toronto;", "aff_country_unique_index": "0;1;0;2;3;0", "aff_country_unique": "Canada;United States;Georgia;Chile" }, { "title": "Spiking Neural Network as Adaptive Event Stream Slicer", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96133", "id": "CcNw4mVIxo", "proceeding": "", "pdf": "https://openreview.net/pdf?id=CcNw4mVIxo", "openreview": "https://openreview.net/forum?id=CcNw4mVIxo", "poster": "/media/PosterPDFs/NeurIPS%202024/96133.png?t=1730963609.3104622", "project": "", "author_site": "Jiahang Cao, Mingyuan Sun, Ziqing Wang, Hao Cheng, Qiang Zhang, shibo zhou, Renjing Xu", "tldr": "", "abstract": "Event-based cameras are attracting significant interest as they provide rich edge information, high dynamic range, and high temporal resolution. Many state-of-the-art event-based algorithms rely on splitting the events into fixed groups, resulting in the omission of crucial temporal information, particularly when dealing with diverse motion scenarios (e.g., high/low speed). In this work, we propose SpikeSlicer, a novel-designed event processing framework capable of splitting events stream adaptively. SpikeSlicer utilizes a low-energy spiking neural network (SNN) to trigger event slicing. To guide the SNN to fire spikes at optimal time steps, we propose the Spiking Position-aware Loss (SPA-Loss) to modulate the neuron's state. Additionally, we develop a Feedback-Update training strategy that refines the slicing decisions using feedback from the downstream artificial neural network (ANN). Extensive experiments demonstrate that our method yields significant performance improvements in event-based object tracking and recognition. Notably, SpikeSlicer provides a brand-new SNN-ANN cooperation paradigm, where the SNN acts as an efficient, low-energy data processor to assist the ANN in improving downstream performance, injecting new perspectives and potential avenues of exploration.", "keywords": "Event-based Camera;Spiking Neural Network;Object Tracking;Image Recognition", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "", "author": "Jiahang Cao;Mingyuan Sun;Ziqing Wang;Hao Cheng;Qiang Zhang;shibo zhou;Renjing Xu", "authorids": "~Jiahang_Cao1;~Mingyuan_Sun1;~Ziqing_Wang2;~Hao_Cheng15;~Qiang_Zhang10;~shibo_zhou2;~Renjing_Xu1", "gender": "M;M;M;M;;M;", "homepage": "https://github.com/AndyCao1125;https://myuansun.github.io;https://alexandrewang915.github.io/;https://github.com/ChaduCheng;;;", "dblp": ";;58/1382;;;;", "google_scholar": ";https://scholar.google.com/citations?hl=en;FSo0Zg4AAAAJ;;;;", "orcid": ";;;0000-0002-3246-6636;;;", "linkedin": ";;;;;bob-zhou;", "or_profile": "~Jiahang_Cao1;~Mingyuan_Sun1;~Ziqing_Wang2;~Hao_Cheng15;~Qiang_Zhang10;~shibo_zhou2;~Renjing_Xu1", "aff": "Hong Kong University of Science and Technology;Northeastern University;Northwestern University;Hong Kong University of Science and Technology(Guangzhou);;Huinao Zhixin;", "aff_domain": "ust.hk;neu.edu.cn;northwestern.edu;connect.hkust-gz.edu.cn;;brain-mind.com.cn;", "position": "MS student;MS student;PhD student;PhD student;;Researcher;", "bibtex": "@inproceedings{\ncao2024spiking,\ntitle={Spiking Neural Network as Adaptive Event Stream Slicer},\nauthor={Jiahang Cao and Mingyuan Sun and Ziqing Wang and Hao Cheng and Qiang Zhang and shibo zhou and Renjing Xu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=CcNw4mVIxo}\n}", "github": "", "reviewers": "ij7X;cS27;HdTR", "pdf_size": 2976951, "rating": "5;6;6", "confidence": "4;5;3", "soundness": "3;3;2", "novelty": "3;3;2", "presentation": "3;3;3", "wc_summary": "49;135;37", "wc_strengths": "82;168;15", "wc_weaknesses": "56;146;23", "wc_questions": "27;42;2", "wc_limitations": "8;16;47", "wc_review": "222;507;124", "wc_reply_reviewers": "183;48;18", "wc_reply_authors": "603;52;0", "reply_reviewers": "2;1;1", "reply_authors": "3;3;1", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 73.66666666666667, 43.645032808887755 ], "wc_strengths_avg": [ 88.33333333333333, 62.622324737712795 ], "wc_weaknesses_avg": [ 75.0, 51.980765673468106 ], "wc_questions_avg": [ 23.666666666666668, 16.49915822768611 ], "wc_limitations_avg": [ 23.666666666666668, 16.81930108205715 ], "wc_review_avg": [ 284.3333333333333, 162.45272816696212 ], "wc_reply_reviewers_avg": [ 83.0, 71.76350047203663 ], "wc_reply_authors_avg": [ 218.33333333333334, 272.8275808801025 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.9428090415820634 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:wxBb4LYX4YkJ:scholar.google.com/&scioq=Spiking+Neural+Network+as+Adaptive+Event+Stream+Slicer&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "ust.hk;neu.edu.cn;northwestern.edu;connect.hkust-gz.edu.cn;;brain-mind.com.cn;", "author_num": 7, "aff_unique_index": "0;1;2;0;3", "aff_unique_norm": "Hong Kong University of Science and Technology;Northeastern University;Northwestern University;Huinao Zhixin", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.ust.hk;https://www.northeastern.edu;https://www.northwestern.edu;", "aff_unique_abbr": "HKUST;NEU;NU;", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;1;1;0;0", "aff_country_unique": "China;United States" }, { "title": "LuSh-NeRF: Lighting up and Sharpening NeRFs for Low-light Scenes", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96132", "id": "CcmHlE6N6u", "proceeding": "", "pdf": "https://openreview.net/pdf?id=CcmHlE6N6u", "openreview": "https://openreview.net/forum?id=CcmHlE6N6u", "poster": "/media/PosterPDFs/NeurIPS%202024/96132.png?t=1731497085.6336129", "project": "", "author_site": "Zefan Qu, Ke Xu, Gerhard Hancke, Rynson Lau", "tldr": "", "abstract": "Neural Radiance Fields (NeRFs) have shown remarkable performances in producing novel-view images from high-quality scene images. However, hand-held low-light photography challenges NeRFs as the captured images may simultaneously suffer from low visibility, noise, and camera shakes.\nWhile existing NeRF methods may handle either low light or motion, directly combining them or incorporating additional image-based enhancement methods does not work as these degradation factors are highly coupled.\nWe observe that noise in low-light images is always sharp regardless of camera shakes, which implies an implicit order of these degradation factors within the image formation process.\nThis inspires us to explore such an order to decouple and remove these degradation factors while training the NeRF.\nTo this end, we propose in this paper a novel model, named LuSh-NeRF, which can reconstruct a clean and sharp NeRF from a group of hand-held low-light images.\nThe key idea of LuSh-NeRF is to sequentially model noise and blur in the images via multi-view feature consistency and frequency information of NeRF, respectively.\nSpecifically, LuSh-NeRF includes a novel Scene-Noise Decomposition (SND) module for decoupling the noise from the scene representation and a novel Camera Trajectory Prediction (CTP) module for the estimation of camera motions based on low-frequency scene information.\nTo facilitate training and evaluations, we construct a new dataset containing both synthetic and real images.\nExperiments show that LuSh-NeRF outperforms existing approaches. Our code and dataset can be found here: https://github.com/quzefan/LuSh-NeRF.", "keywords": "Deep learning;Neural Radiance Field;Low light", "primary_area": "machine_vision", "supplementary_material": "", "author": "Zefan Qu;Ke Xu;Gerhard Petrus Hancke;Rynson W. H. Lau", "authorids": "~Zefan_Qu1;~Ke_Xu5;~Gerhard_Petrus_Hancke1;~Rynson_W._H._Lau1", "gender": "M;M;M;M", "homepage": "https://quzefan.github.io/;https://kkbless.github.io/;https://scholars.cityu.edu.hk/en/persons/gerhard-petrus-hancke(9e59c8eb-ba32-4075-97f7-e44e82367742).html;http://www.cs.cityu.edu.hk/~rynson/", "dblp": "331/0123;181/2626-10;44/2703-2;l/RynsonWHLau", "google_scholar": "VieVpRUAAAAJ;https://scholar.google.com/citations?hl=en;C2iR3xUAAAAJ;KilQqKYAAAAJ", "orcid": ";0000-0001-5855-3810;0000-0002-2388-3542;", "linkedin": ";;gerhard-hancke-0522772/;", "or_profile": "~Zefan_Qu1;~Ke_Xu5;~Gerhard_Petrus_Hancke1;~Rynson_Lau1", "aff": "Tongji University;City University of Hong Kong;University of Pretoria;City University of Hong Kong", "aff_domain": "tongji.edu.cn;cityu.edu.hk;up.ac.za;cityu.edu.hk", "position": "MS student;Researcher;Visiting/Extraordinary Faculty;Researcher", "bibtex": "@inproceedings{\nqu2024lushnerf,\ntitle={LuSh-Ne{RF}: Lighting up and Sharpening Ne{RF}s for Low-light Scenes},\nauthor={Zefan Qu and Ke Xu and Gerhard Petrus Hancke and Rynson W. H. Lau},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=CcmHlE6N6u}\n}", "github": "", "reviewers": "4eod;m3Lt;54cf;4gW7", "pdf_size": 45178798, "rating": "4;6;6;6", "confidence": "3;4;5;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "1;2;2;3", "wc_summary": "90;75;66;128", "wc_strengths": "91;61;57;97", "wc_weaknesses": "292;599;77;89", "wc_questions": "102;3;50;42", "wc_limitations": "1;16;6;35", "wc_review": "576;754;256;391", "wc_reply_reviewers": "430;80;24;43", "wc_reply_authors": "708;73;25;20", "reply_reviewers": "2;2;1;1", "reply_authors": "4;3;2;2", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 89.75, 23.689396362085716 ], "wc_strengths_avg": [ 76.5, 17.684739183827393 ], "wc_weaknesses_avg": [ 264.25, 211.3070928766945 ], "wc_questions_avg": [ 49.25, 35.26595383652624 ], "wc_limitations_avg": [ 14.5, 13.009611831257688 ], "wc_review_avg": [ 494.25, 188.13343004367937 ], "wc_reply_reviewers_avg": [ 144.25, 166.20224878141693 ], "wc_reply_authors_avg": [ 206.5, 290.27960658647726 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13976374270488297763&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "tongji.edu.cn;cityu.edu.hk;up.ac.za;cityu.edu.hk", "author_num": 4, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "Tongji University;City University of Hong Kong;University of Pretoria", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tongji.edu.cn;https://www.cityu.edu.hk;https://www.up.ac.za", "aff_unique_abbr": "Tongji;CityU;UP", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "China;South Africa" }, { "id": "Cdc90HKs1I", "title": "The Genomics Long-Range Benchmark: Advancing DNA Language Models", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "The advent of language models (LMs) in genomics necessitates benchmarks that can assess models\u2019 capabilities and limitations. In contrast to protein models, DNA LMs can be used to study non-coding regions of the genome and must account for unique challenges, especially interactions across long sequence lengths. However, existing benchmarks for DNA LMs are defined over short sequence datasets and can involve tasks that are often not considered to be biologically meaningful. Here, we present the Genomics Long-Range Benchmark (LRB), which focuses on biologically meaningful tasks and supports long-range contexts. We complement our benchmark with fine-tuning recipes that meaningfully improve performance and affect model evaluation. We evaluate DNA LMs across nine compiled tasks and observe that DNA LMs achieve competitive performance relative to supervised baselines on several tasks (e.g., genome annotation), but there remains a significant gap in domains, such as variant effect and gene expression prediction. Additionally, we introduce a visualization tool to examine model performance split by various genomic properties.\nLastly, we present methods for context-length extrapolation of transformer-based models that enable studying the effect of context length on DNA LM performance. The Genomics LRB is publicly available on Hugging Face: https://hf.co/datasets/InstaDeepAI/genomics-long-range-benchmark.", "keywords": "DNA;Language Models;Genomics;Benchmark", "primary_area": "", "supplementary_material": "/attachment/fe9e4821d979f53edc752cdfdf03a77a012e2b68.pdf", "author": "Evan Trop;Yair Schiff;Edgar Mariano Marroquin;Chia Hsiang Kao;Aaron Gokaslan;McKinley Polen;Mingyi Shao;Bernardo P de Almeida;Thomas PIERROT;Yang I Li;Volodymyr Kuleshov", "authorids": "~Evan_Trop1;~Yair_Schiff1;~Edgar_Mariano_Marroquin1;~Chia_Hsiang_Kao1;~Aaron_Gokaslan1;~McKinley_Polen1;~Mingyi_Shao1;~Bernardo_P_de_Almeida1;~Thomas_PIERROT1;~Yang_I_Li1;~Volodymyr_Kuleshov1", "gender": ";M;M;M;M;M;;;M;;", "homepage": "https://www.linkedin.com/in/evan-trop/;https://github.com/yair-schiff;https://www.cs.cornell.edu/~emarro/;https://iandrover.github.io;https://skylion007.github.io/;;;https://bernardo-de-almeida.github.io/;;https://thelilab.com/;https://www.cs.cornell.edu/~kuleshov/", "dblp": ";;;241/3791;220/6816;;;;228/7739;;81/8612", "google_scholar": ";GhFrOdQAAAAJ;;https://scholar.google.com.tw/citations?user=W_i9B0sAAAAJ;Mt2wyL4AAAAJ;;;;https://scholar.google.fr/citations?user=0zBiyNUAAAAJ;https://scholar.google.com/citations?hl=en;RY_t8XAAAAAJ", "orcid": ";;;;0000-0002-3575-2961;;;;0000-0002-5227-6194;;", "linkedin": ";yair-schiff;;;aarongokaslan/;mckinley-polen;mingyi-shao;;thomas-pierrot-120a43128/;;", "or_profile": "~Evan_Trop1;~Yair_Schiff1;~Edgar_Mariano_Marroquin1;~Chia_Hsiang_Kao1;~Aaron_Gokaslan1;~McKinley_Polen1;~Mingyi_Shao1;~Bernardo_P_de_Almeida1;~Thomas_PIERROT1;~Yang_I_Li1;~Volodymyr_Kuleshov1", "aff": "InstaDeep;Department of Computer Science, Cornell University;Department of Computer Science, Cornell University;Cornell University;Cornell University;Massachusetts Institute of Technology;Cornell University;InstaDeep;Universit\u00e9 Pierre et Marie Curie - Paris 6, Computer Science Lab - Pierre and Marie Curie University, Paris, France;University of Chicago;Cornell University", "aff_domain": "instadeep.com;cs.cornell.edu;cs.cornell.edu;cornell.edu;cornell.edu;mit.edu;cornell.edu;instadeep.com;isir.upmc.fr;uchicago.edu;cornell.edu", "position": "Researcher;PhD student;PhD student;PhD student;PhD student;MS student;MS student;Researcher;PhD student;Associate Professor;Assistant Professor", "bibtex": "@misc{\nanonymous2024the,\ntitle={The Genomics Long-Range Benchmark: Advancing {DNA} Language Models},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=Cdc90HKs1I}\n}", "github": "", "project": "", "reviewers": "zqqq;V6kt;mcHo;QE3m", "site": "https://openreview.net/forum?id=Cdc90HKs1I", "pdf_size": 1948087, "rating": "3;4;5;6", "confidence": "5;4;5;4", "wc_summary_and_contributions": "80;75;87;55", "wc_strengths": "157;40;39;30", "wc_improvement": "419;44;29;17", "wc_limitations": "15;34;20;21", "wc_correctness": "8;32;13;4", "wc_clarity": "7;12;41;5", "wc_relation_to_prior_work": "11;26;221;11", "wc_documentation": "17;21;42;7", "wc_additional_feedback": "1;1;1;1", "wc_review": "715;285;493;151", "wc_reply_reviewers": "0;0;199;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;1;0", "reply_authors": "2;1;3;1", "rating_avg": [ 4.5, 1.118033988749895 ], "confidence_avg": [ 4.5, 0.5 ], "wc_summary_and_contributions_avg": [ 74.25, 11.903255857117413 ], "wc_strengths_avg": [ 66.5, 52.3951333617923 ], "wc_improvement_avg": [ 127.25, 168.71332934892845 ], "wc_limitations_avg": [ 22.5, 7.0178344238090995 ], "wc_correctness_avg": [ 14.25, 10.732543966832841 ], "wc_clarity_avg": [ 16.25, 14.515078366994786 ], "wc_relation_to_prior_work_avg": [ 67.25, 88.97857888278504 ], "wc_documentation_avg": [ 21.75, 12.754901018824098 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 411.0, 213.66796671471369 ], "wc_reply_reviewers_avg": [ 49.75, 86.16952767655164 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": -0.4472135954999579, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:FIRAQJJ1sR0J:scholar.google.com/&scioq=The+Genomics+Long-Range+Benchmark:+Advancing+DNA+Language+Models&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;1;1;1;2;1;0;3;4;1", "aff_unique_norm": "InstaDeep;Cornell University;Massachusetts Institute of Technology;Universit\u00e9 Pierre et Marie Curie - Paris 6;University of Chicago", "aff_unique_dep": ";Department of Computer Science;;Computer Science Lab;", "aff_unique_url": "https://www.instadeep.com;https://www.cornell.edu;https://web.mit.edu;https://www.upmc.fr;https://www.uchicago.edu", "aff_unique_abbr": "InstaDeep;Cornell;MIT;UPMC;UChicago", "aff_campus_unique_index": "1", "aff_campus_unique": ";Paris", "aff_country_unique_index": "0;1;1;1;1;1;1;0;2;1;1", "aff_country_unique": "United Kingdom;United States;France" }, { "title": "Can Large Language Model Agents Simulate Human Trust Behavior?", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96131", "id": "CeOwahuQic", "proceeding": "", "pdf": "https://openreview.net/pdf?id=CeOwahuQic", "openreview": "https://openreview.net/forum?id=CeOwahuQic", "poster": "/media/PosterPDFs/NeurIPS%202024/96131.png?t=1731220583.4267278", "project": "", "author_site": "Chengxing Xie, Canyu Chen, Feiran Jia, Ziyu Ye, Shiyang Lai, Kai Shu, Jindong Gu, Adel Bibi, Ziniu Hu, David Jurgens, James Evans, Philip Torr, Bernard Ghanem, Guohao Li", "tldr": "", "abstract": "Large Language Model (LLM) agents have been increasingly adopted as simulation tools to model humans in social science and role-playing applications. However, one fundamental question remains: can LLM agents really simulate human behavior? In this paper, we focus on one critical and elemental behavior in human interactions, trust, and investigate whether LLM agents can simulate human trust behavior. We first find that LLM agents generally exhibit trust behavior, referred to as agent trust, under the framework of Trust Games, which are widely recognized in behavioral economics. Then, we discover that GPT-4 agents manifest high behavioral alignment with humans in terms of trust behavior, indicating the feasibility of simulating human trust behavior with LLM agents. In addition, we probe the biases of agent trust and differences in agent trust towards other LLM agents and humans. We also explore the intrinsic properties of agent trust under conditions including external manipulations and advanced reasoning strategies. Our study provides new insights into the behaviors of LLM agents and the fundamental analogy between LLMs and humans beyond value alignment. We further illustrate broader implications of our discoveries for applications where trust is paramount.", "keywords": "LLM Agent;Human Simulation;Behavioral Alignment;Trust Games", "primary_area": "machine_learning_for_social_sciences", "supplementary_material": "", "author": "Chengxing Xie;Canyu Chen;Feiran Jia;Ziyu Ye;Shiyang Lai;Kai Shu;Jindong Gu;Adel Bibi;Ziniu Hu;David Jurgens;James Evans;Philip Torr;Bernard Ghanem;Guohao Li", "authorids": "~Chengxing_Xie2;~Canyu_Chen1;~Feiran_Jia1;~Ziyu_Ye1;~Shiyang_Lai1;~Kai_Shu1;~Jindong_Gu1;~Adel_Bibi1;~Ziniu_Hu1;~David_Jurgens1;~James_Evans1;~Philip_Torr1;~Bernard_Ghanem1;~Guohao_Li1", "gender": "M;;F;;M;;;M;M;M;M;;M;M", "homepage": "https://yitianlian.github.io/;https://canyuchen.com;https://feiran.io;https://hazelye-bot.github.io/;;https://www.cs.emory.edu/~kshu5/;;http://adelbibi.com;http://acbull.github.io;http://jurgens.people.si.umich.edu;https://macss.uchicago.edu/directory/James-Evans;http://www.robots.ox.ac.uk/~tvg/;https://ivul.kaust.edu.sa;https://ghli.org/", "dblp": ";319/2330;277/9284;;;153/5265;;176/0964;180/5436;48/4613.html;;;37/2516;211/7175-1", "google_scholar": "emzHNrIAAAAJ;https://scholar.google.com/citations?hl=en;haBpKDQAAAAJ;S2da4LUAAAAJ;qALDmfcAAAAJ;-6bAV2cAAAAJ;;Q4j2laYAAAAJ;x6ct1CsAAAAJ;https://scholar.google.com/citations?hl=en;kV4N4zoAAAAJ;;rVsGTeEAAAAJ;J9K-D0sAAAAJ", "orcid": ";0000-0003-0937-1046;;0000-0002-0078-6758;0000-0001-8393-6151;;;0000-0002-6169-3918;;0000-0002-2135-9878;;;0000-0002-5534-587X;0000-0003-0260-5129", "linkedin": ";canyu-chen-1b2415100/;jiafeiran/;;;;;adel-bibi-ba3671ab/;;;;;bernardghanem/;", "or_profile": "~Chengxing_Xie2;~Canyu_Chen1;~Feiran_Jia1;~Ziyu_Ye1;~Shiyang_Lai1;~Kai_Shu1;~Jindong_Gu1;~Adel_Bibi1;~Ziniu_Hu1;~David_Jurgens1;~James_Evans1;~Philip_Torr1;~Bernard_Ghanem1;~Guohao_Li1", "aff": "King Abdullah University of Science and Technology;Illinois Institute of Technology;Meta Facebook;Google DeepMind;University of Chicago;Emory University;;University of Oxford;Deepmind;University of Michigan - Ann Arbor;University of Chicago;University of Oxford;King Abdullah University of Science and Technology;University of Oxford", "aff_domain": "kaust.edu.sa;hawk.iit.edu;meta.com;deepmind.google;uchicago.edu;emory.edu;;ox.ac.uk;deepmind.com;umich.edu;uchicago.edu;ox.ac.uk;kaust.edu.sa;robots.ox.ac.uk", "position": "Intern;MS student;Intern;Research Intern;Researcher;Assistant Professor;;Senior Researcher;Visiting Researcher;Associate Professor;Full Professor;Full Professor;Full Professor;Postdoc", "bibtex": "@inproceedings{\nxie2024can,\ntitle={Can Large Language Model Agents Simulate Human Trust Behavior?},\nauthor={Chengxing Xie and Canyu Chen and Feiran Jia and Ziyu Ye and Shiyang Lai and Kai Shu and Jindong Gu and Adel Bibi and Ziniu Hu and David Jurgens and James Evans and Philip Torr and Bernard Ghanem and Guohao Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=CeOwahuQic}\n}", "github": "", "reviewers": "aywP;6WVz;TM2T;QQ9h", "pdf_size": 5121261, "rating": "4;5;6;7", "confidence": "4;4;3;4", "soundness": "2;3;4;3", "novelty": "2;3;3;3", "presentation": "3;3;4;2", "wc_summary": "68;72;120;106", "wc_strengths": "62;105;84;67", "wc_weaknesses": "222;72;291;133", "wc_questions": "131;16;126;189", "wc_limitations": "7;9;61;47", "wc_review": "490;274;682;542", "wc_reply_reviewers": "0;16;295;92", "wc_reply_authors": "0;54;747;34", "reply_reviewers": "0;1;2;1", "reply_authors": "1;2;3;2", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 91.5, 22.107690969434145 ], "wc_strengths_avg": [ 79.5, 16.830032679706836 ], "wc_weaknesses_avg": [ 179.5, 83.60173443176882 ], "wc_questions_avg": [ 115.5, 62.5559749344537 ], "wc_limitations_avg": [ 31.0, 23.53720459187964 ], "wc_review_avg": [ 497.0, 146.65265084545862 ], "wc_reply_reviewers_avg": [ 100.75, 117.41246739592862 ], "wc_reply_authors_avg": [ 208.75, 311.3578126529026 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 14, 0 ], "corr_rating_confidence": -0.2581988897471611, "gs_citation": 91, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12307970010199530186&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": "kaust.edu.sa;hawk.iit.edu;meta.com;deepmind.google;uchicago.edu;emory.edu;;ox.ac.uk;deepmind.com;umich.edu;uchicago.edu;ox.ac.uk;kaust.edu.sa;robots.ox.ac.uk", "author_num": 14, "aff_unique_index": "0;1;2;3;4;5;6;7;8;4;6;0;6", "aff_unique_norm": "King Abdullah University of Science and Technology;Illinois Institute of Technology;Meta;Google;University of Chicago;Emory University;University of Oxford;DeepMind;University of Michigan", "aff_unique_dep": ";;Meta Platforms, Inc.;Google DeepMind;;;;;", "aff_unique_url": "https://www.kast.kau.edu.sa;https://www.iit.edu;https://meta.com;https://deepmind.com;https://www.uchicago.edu;https://www.emory.edu;https://www.ox.ac.uk;https://deepmind.com;https://www.umich.edu", "aff_unique_abbr": "KAUST;IIT;Meta;DeepMind;UChicago;Emory;Oxford;DeepMind;UM", "aff_campus_unique_index": "1", "aff_campus_unique": ";Ann Arbor", "aff_country_unique_index": "0;1;1;2;1;1;2;2;1;1;2;0;2", "aff_country_unique": "Saudi Arabia;United States;United Kingdom" }, { "title": "Fair Kernel K-Means: from Single Kernel to Multiple Kernel", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96130", "id": "CehOqpvOxG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=CehOqpvOxG", "openreview": "https://openreview.net/forum?id=CehOqpvOxG", "poster": "/media/PosterPDFs/NeurIPS%202024/96130.png?t=1731233365.2299542", "project": "", "author_site": "Peng Zhou, Rongwen Li, Liang Du", "tldr": "", "abstract": "Kernel k-means has been widely studied in machine learning. However, existing kernel k-means methods often ignore the \\textit{fairness} issue, which may cause discrimination. To address this issue, in this paper, we propose a novel Fair Kernel K-Means (FKKM) framework. In this framework, we first propose a new fairness regularization term that can lead to a fair partition of data. The carefully designed fairness regularization term has a similar form to the kernel k-means which can be seamlessly integrated into the kernel k-means framework. Then, we extend this method to the multiple kernel setting, leading to a Fair Multiple Kernel K-Means (FMKKM) method. We also provide some theoretical analysis of the generalization error bound, and based on this bound we give a strategy to set the hyper-parameter, which makes the proposed methods easy to use. At last, we conduct extensive experiments on both the single kernel and multiple kernel settings to compare the proposed methods with state-of-the-art methods to demonstrate their effectiveness.", "keywords": "kernel k-means;multiple kernel k-means;fair clustering", "primary_area": "fairness", "supplementary_material": "/attachment/4be25aed9efefafb67aa6e31e703d27d9accabdd.zip", "author": "Peng Zhou;Rongwen Li;Liang Du", "authorids": "~Peng_Zhou3;~Rongwen_Li1;~Liang_Du2", "gender": "M;M;M", "homepage": "https://doctor-nobody.github.io/;https://github.com/Lirongwen259306;http://cs.sxu.edu.cn/faculty/associate_professor/4314/index.htm", "dblp": "23/5823-6;;40/5548-3", "google_scholar": "https://scholar.google.com.hk/citations?user=gMsIFuEAAAAJ;;https://scholar.google.com.hk/citations?user=YisD9wsAAAAJ", "orcid": ";;0000-0002-3294-5071", "linkedin": ";;", "or_profile": "~Peng_Zhou3;~Rongwen_Li1;~Liang_Du2", "aff": "Anhui University;Anhui University;Shanxi University", "aff_domain": "ahu.edu.cn;ahu.edu.cn;sxu.edu.cn", "position": "Associate Professor;MS student;Associate Professor", "bibtex": "@inproceedings{\nzhou2024fair,\ntitle={Fair Kernel K-Means: from Single Kernel to Multiple Kernel},\nauthor={Peng Zhou and Rongwen Li and Liang Du},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=CehOqpvOxG}\n}", "github": "", "reviewers": "GAQw;dRCy;px1V;t6xq", "pdf_size": 820318, "rating": "5;5;7;7", "confidence": "4;5;5;5", "soundness": "3;3;4;3", "novelty": "2;2;3;4", "presentation": "3;2;3;3", "wc_summary": "95;78;64;92", "wc_strengths": "109;79;98;129", "wc_weaknesses": "81;106;116;97", "wc_questions": "66;331;4;87", "wc_limitations": "85;48;6;1", "wc_review": "436;642;288;406", "wc_reply_reviewers": "0;307;19;0", "wc_reply_authors": "35;631;0;0", "reply_reviewers": "0;3;1;0", "reply_authors": "2;4;1;1", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 82.25, 12.336429791475327 ], "wc_strengths_avg": [ 103.75, 18.102140757380052 ], "wc_weaknesses_avg": [ 100.0, 12.864680330268607 ], "wc_questions_avg": [ 122.0, 124.46485447707718 ], "wc_limitations_avg": [ 35.0, 34.15406271587613 ], "wc_review_avg": [ 443.0, 127.5186260904657 ], "wc_reply_reviewers_avg": [ 81.5, 130.4233491365714 ], "wc_reply_authors_avg": [ 166.5, 268.55958370536695 ], "reply_reviewers_avg": [ 1.0, 1.224744871391589 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:NLP9E78uKQQJ:scholar.google.com/&scioq=Fair+Kernel+K-Means:+from+Single+Kernel+to+Multiple+Kernel&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "ahu.edu.cn;ahu.edu.cn;sxu.edu.cn", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Anhui University;Shanxi University", "aff_unique_dep": ";", "aff_unique_url": "http://www.ahu.edu.cn/;http://www.sxu.edu.cn", "aff_unique_abbr": "AHU;SXU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Universal Exact Compression of Differentially Private Mechanisms", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96129", "id": "CgGjT8EG8A", "proceeding": "", "pdf": "https://openreview.net/pdf?id=CgGjT8EG8A", "openreview": "https://openreview.net/forum?id=CgGjT8EG8A", "poster": "/media/PosterPDFs/NeurIPS%202024/96129.png?t=1730374926.4448647", "project": "", "author_site": "Yanxiao Liu, Wei-Ning Chen, Ayfer Ozgur, Cheuk Ting Li", "tldr": "", "abstract": "To reduce the communication cost of differential privacy mechanisms, we introduce a novel construction, called Poisson private representation (PPR), designed to compress and simulate any local randomizer while ensuring local differential privacy. Unlike previous simulation-based local differential privacy mechanisms, PPR exactly preserves the joint distribution of the data and the output of the original local randomizer. Hence, the PPR-compressed privacy mechanism retains all desirable statistical properties of the original privacy mechanism such as unbiasedness and Gaussianity. Moreover, PPR achieves a compression size within a logarithmic gap from the theoretical lower bound. Using the PPR, we give a new order-wise trade-off between communication, accuracy, central and local differential privacy for distributed mean estimation. Experiment results on distributed mean estimation show that PPR consistently gives a better trade-off between communication, accuracy and central differential privacy compared to the coordinate subsampled Gaussian mechanism, while also providing local differential privacy.", "keywords": "Differential Privacy;Channel Simulation;Federated Learning;Communication;Poisson Process", "primary_area": "privacy", "supplementary_material": "/attachment/22cd1056267d08b4b1f81fc096f7215f0958ae17.zip", "author": "Yanxiao Liu;Wei-Ning Chen;Ayfer Ozgur;Cheuk Ting Li", "authorids": "~Yanxiao_Liu1;~Wei-Ning_Chen1;~Ayfer_Ozgur1;~Cheuk_Ting_Li1", "gender": "M;;;M", "homepage": "https://yanxiaoliu-mike.github.io;https://web.stanford.edu/~wnchen/index.html;;https://www.ie.cuhk.edu.hk/people/ctli.shtml", "dblp": "297/3726.html;51/2118;12/4534;120/7097", "google_scholar": "M6u1tgUAAAAJ;-TqCZLIAAAAJ;;", "orcid": "0009-0008-2844-3272;0000-0001-7355-9487;;", "linkedin": ";;;", "or_profile": "~Yanxiao_Liu1;~Wei-Ning_Chen1;~Ayfer_Ozgur1;~Cheuk_Ting_Li1", "aff": "The Chinese University of Hong Kong;Stanford University;Stanford University;The Chinese University of Hong Kong", "aff_domain": "cuhk.edu.hk;stanford.edu;stanford.edu;cuhk.edu.hk", "position": "PhD student;PhD student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nliu2024universal,\ntitle={Universal Exact Compression of Differentially Private Mechanisms},\nauthor={Yanxiao Liu and Wei-Ning Chen and Ayfer Ozgur and Cheuk Ting Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=CgGjT8EG8A}\n}", "github": "", "reviewers": "VD1w;GP8R;U6Tv;NbQx", "pdf_size": 1585114, "rating": "6;7;7;7", "confidence": "3;2;3;3", "soundness": "3;3;3;3", "novelty": "3;3;2;2", "presentation": "3;2;2;3", "wc_summary": "204;134;136;79", "wc_strengths": "81;74;170;57", "wc_weaknesses": "2;148;234;44", "wc_questions": "79;77;175;12", "wc_limitations": "132;6;12;7", "wc_review": "498;439;727;199", "wc_reply_reviewers": "0;5;279;15", "wc_reply_authors": "0;12;762;10", "reply_reviewers": "0;1;2;1", "reply_authors": "1;2;3;2", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 138.25, 44.31915500096995 ], "wc_strengths_avg": [ 95.5, 43.88906469725688 ], "wc_weaknesses_avg": [ 107.0, 90.55937278934744 ], "wc_questions_avg": [ 85.75, 58.15227854521265 ], "wc_limitations_avg": [ 39.25, 53.597457962108614 ], "wc_review_avg": [ 465.75, 187.85815792773013 ], "wc_reply_reviewers_avg": [ 74.75, 118.04739514279846 ], "wc_reply_authors_avg": [ 196.0, 326.8118724893574 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14397078308978307430&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "cuhk.edu.hk;stanford.edu;stanford.edu;cuhk.edu.hk", "author_num": 4, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Chinese University of Hong Kong;Stanford University", "aff_unique_dep": ";", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.stanford.edu", "aff_unique_abbr": "CUHK;Stanford", "aff_campus_unique_index": "0;1;1;0", "aff_campus_unique": "Hong Kong SAR;Stanford", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "China;United States" }, { "title": "PutnamBench: Evaluating Neural Theorem-Provers on the Putnam Mathematical Competition", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97811", "id": "ChKCF75Ocd", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ChKCF75Ocd", "openreview": "https://openreview.net/forum?id=ChKCF75Ocd", "poster": "", "project": "", "author_site": "George Tsoukalas, Jasper Lee, John Jennings, Jimmy Xin, Michelle Ding, Michael Jennings, Amitayush Thakur, Swarat Chaudhuri", "tldr": "", "abstract": "We present PutnamBench, a new multi-language benchmark for evaluating the ability of neural theorem-provers to solve competition mathematics problems. PutnamBench consists of 1692 hand-constructed formalizations of 640 theorems sourced from the William Lowell Putnam Mathematical Competition, the premier undergraduate-level mathematics competition in North America. \nAll the problems have formalizations in Lean 4 and Isabelle; a substantial subset also has Coq formalizations. PutnamBench requires significant problem-solving ability and proficiency in a broad range of topics taught in undergraduate mathematics courses. We use PutnamBench to evaluate several established neural and symbolic theorem-provers. \nThese approaches can only solve a handful of the PutnamBench problems, establishing the benchmark as a difficult open challenge for research on neural theorem-proving. PutnamBench is available at https://github.com/trishullab/PutnamBench.", "keywords": "neural theorem proving;formal methods;large language models;mathematical reasoning;theorem proving;Lean;Isabelle;Coq;AI for Math;formal reasoning", "primary_area": "", "supplementary_material": "", "author": "George Tsoukalas;Jasper Lee;John Jennings;Jimmy Xin;Michelle Ding;Michael Jennings;Amitayush Thakur;Swarat Chaudhuri", "authorids": "~George_Tsoukalas1;~Jasper_Lee2;~John_Jennings1;~Jimmy_Xin1;~Michelle_Ding1;~Michael_Jennings1;~Amitayush_Thakur1;~Swarat_Chaudhuri1", "gender": "Not Specified;;Not Specified;M;F;Not Specified;M;M", "homepage": "https://georgetsoukalas.github.io/;;;;https://www.cs.utexas.edu/~mding01/;;https://amit9oct.github.io/aboutme/;http://www.cs.utexas.edu/~swarat", "dblp": ";;;;;;299/3365;37/6100", "google_scholar": ";;;;https://scholar.google.com/citations?hl=en;;d3XU8EAAAAAJ;9j6RBYQAAAAJ", "orcid": ";;;;;;;0000-0002-6859-1391", "linkedin": ";jasper-w-lee/;john-jennings-2624b1291/;jimmyxin31415/;michelleding01/;michael-jennings-a315a6291/;;swarat-chaudhuri-609b3092/", "or_profile": "~George_Tsoukalas1;~Jasper_Lee2;~John_Jennings1;~Jimmy_Xin1;~Michelle_Ding1;~Michael_Jennings1;~Amitayush_Thakur1;~Swarat_Chaudhuri1", "aff": "University of Texas at Austin;University of Texas at Austin;University of Texas at Austin;University of Texas at Austin;University of Texas at Austin;University of Texas at Austin;University of Texas at Austin;University of Texas at Austin", "aff_domain": "utexas.edu;utexas.edu;utexas.edu;utexas.edu;utexas.edu;utexas.edu;utexas.edu;utexas.edu", "position": "PhD student;Undergrad student;Undergrad student;Undergrad student;Undergrad student;Undergrad student;PhD student;Full Professor", "bibtex": "@inproceedings{\ntsoukalas2024putnambench,\ntitle={PutnamBench: Evaluating Neural Theorem-Provers on the Putnam Mathematical Competition},\nauthor={George Tsoukalas and Jasper Lee and John Jennings and Jimmy Xin and Michelle Ding and Michael Jennings and Amitayush Thakur and Swarat Chaudhuri},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=ChKCF75Ocd}\n}", "github": "", "reviewers": "QCd7;DZY3;pTSB", "pdf_size": 406659, "rating": "7;7;8", "confidence": "4;4;4", "wc_summary_and_contributions": "49;68;117", "wc_strengths": "35;60;90", "wc_improvement": "54;66;130", "wc_limitations": "6;1;47", "wc_correctness": "6;1;8", "wc_clarity": "8;1;9", "wc_relation_to_prior_work": "7;11;10", "wc_documentation": "5;1;5", "wc_additional_feedback": "1;1;1", "wc_review": "171;210;417", "wc_reply_reviewers": "39;17;38", "wc_reply_authors": "0;0;12", "reply_reviewers": "1;1;1", "reply_authors": "1;1;3", "rating_avg": [ 7.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "wc_summary_and_contributions_avg": [ 78.0, 28.647280266487194 ], "wc_strengths_avg": [ 61.666666666666664, 22.484562605386735 ], "wc_improvement_avg": [ 83.33333333333333, 33.359989341858146 ], "wc_limitations_avg": [ 18.0, 20.607442021431645 ], "wc_correctness_avg": [ 5.0, 2.943920288775949 ], "wc_clarity_avg": [ 6.0, 3.559026084010437 ], "wc_relation_to_prior_work_avg": [ 9.333333333333334, 1.699673171197595 ], "wc_documentation_avg": [ 3.6666666666666665, 1.8856180831641267 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 266.0, 107.95369377654477 ], "wc_reply_reviewers_avg": [ 31.333333333333332, 10.143416036468626 ], "wc_reply_authors_avg": [ 4.0, 5.656854249492381 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15366958463763850491&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 5, "email": "utexas.edu;utexas.edu;utexas.edu;utexas.edu;utexas.edu;utexas.edu;utexas.edu;utexas.edu", "author_num": 8, "aff_unique_index": "0;0;0;0;0;0;0;0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0;0;0;0;0;0;0;0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Enhancing Robustness of Last Layer Two-Stage Fair Model Corrections", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96128", "id": "ChnJ3W4HFG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ChnJ3W4HFG", "openreview": "https://openreview.net/forum?id=ChnJ3W4HFG", "poster": "", "project": "", "author_site": "Nathan Stromberg, Rohan Ayyagari, Sanmi Koyejo, Richard Nock, Lalitha Sankar", "tldr": "", "abstract": "Last-layer retraining methods have emerged as an efficient framework for correcting existing base models. Within this framework, several methods have been proposed to deal with correcting models for subgroup fairness with and without group membership information. Importantly, prior work has demonstrated that many methods are susceptible to noisy labels. To this end, we propose a drop-in correction for label noise in last-layer retraining, and demonstrate that it achieves state-of-the-art worst-group accuracy for a broad range of symmetric label noise and across a wide variety of datasets exhibiting spurious correlations. Our proposed approach uses label spreading on a latent nearest neighbors graph and has minimal computational overhead compared to existing methods.", "keywords": "fairness;robustness;worst-group accuracy;subgroup;subpopulation", "primary_area": "fairness", "supplementary_material": "/attachment/5685975de0dcb4efebb0c9ebf90e3005fce7e143.zip", "author": "Nathan Stromberg;Rohan Ayyagari;Sanmi Koyejo;Richard Nock;Lalitha Sankar", "authorids": "~Nathan_Stromberg1;~Rohan_Ayyagari1;~Sanmi_Koyejo1;~Richard_Nock1;~Lalitha_Sankar2", "gender": "M;M;;F;M", "homepage": "https://nstromberg.github.io;;http://users.cecs.anu.edu.au/~rnock/;https://sankar.engineering.asu.edu/;https://cs.stanford.edu/~sanmi/", "dblp": "340/7480;;n/RichardNock;https://dblp.uni-trier.de/pers/s/Sankar:Lalitha.html;14/8885", "google_scholar": ";;https://scholar.google.fr/citations?user=0J2s3YQAAAAJ;VQq0aIwAAAAJ;EaaOeJwAAAAJ", "orcid": "0000-0003-2701-4683;;;;0000-0002-4023-419X", "linkedin": ";rohan-ayyagari/;;lalitha-sankar-045b3a7/;sanmi-koyejo-984754/", "or_profile": "~Nathan_Stromberg1;~Rohan_Ayyagari1;~Richard_Nock1;~Lalitha_Sankar2;~Oluwasanmi_O_Koyejo1", "aff": "Arizona State University;Arizona State University;Google Research;Arizona State University;Google", "aff_domain": "asu.edu;asu.edu;google.com;asu.edu;google.com", "position": "PhD student;MS student;Researcher;Professor;Research Scientist", "bibtex": "@inproceedings{\nstromberg2024enhancing,\ntitle={Enhancing Robustness of Last Layer Two-Stage Fair Model Corrections},\nauthor={Nathan Stromberg and Rohan Ayyagari and Sanmi Koyejo and Richard Nock and Lalitha Sankar},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ChnJ3W4HFG}\n}", "github": "", "reviewers": "phDr;e2kD;9x3j", "pdf_size": 1036623, "rating": "5;6;6", "confidence": "4;4;5", "soundness": "2;4;3", "novelty": "2;2;2", "presentation": "3;4;4", "wc_summary": "61;266;75", "wc_strengths": "97;352;23", "wc_weaknesses": "323;309;44", "wc_questions": "68;92;113", "wc_limitations": "27;6;10", "wc_review": "576;1025;265", "wc_reply_reviewers": "378;17;103", "wc_reply_authors": "0;34;66", "reply_reviewers": "1;1;1", "reply_authors": "1;2;3", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 134.0, 93.5129224581644 ], "wc_strengths_avg": [ 157.33333333333334, 140.92630067599953 ], "wc_weaknesses_avg": [ 225.33333333333334, 128.34934964991274 ], "wc_questions_avg": [ 91.0, 18.384776310850235 ], "wc_limitations_avg": [ 14.333333333333334, 9.104333522498441 ], "wc_review_avg": [ 622.0, 311.9690155554982 ], "wc_reply_reviewers_avg": [ 166.0, 153.9631990660972 ], "wc_reply_authors_avg": [ 33.333333333333336, 26.948510575210314 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1765419367378944274&as_sdt=8000005&sciodt=0,19&hl=en", "gs_version_total": 0, "email": "asu.edu;asu.edu;google.com;asu.edu;google.com", "author_num": 5, "aff_unique_index": "0;0;1;0;1", "aff_unique_norm": "Arizona State University;Google", "aff_unique_dep": ";Google Research", "aff_unique_url": "https://www.asu.edu;https://research.google", "aff_unique_abbr": "ASU;Google Research", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Fast Proxy Experiment Design for Causal Effect Identification", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96127", "id": "Ci7II4CPwm", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Ci7II4CPwm", "openreview": "https://openreview.net/forum?id=Ci7II4CPwm", "poster": "/media/PosterPDFs/NeurIPS%202024/96127.png?t=1733322717.5838425", "project": "", "author_site": "Sepehr Elahi, Sina Akbari, Jalal Etesami, Negar Kiyavash, Patrick Thiran", "tldr": "", "abstract": "Identifying causal effects is a key problem of interest across many disciplines. The two long-standing approaches to estimate causal effects are observational and experimental (randomized) studies. Observational studies can suffer from unmeasured confounding, which may render the causal effects unidentifiable. On the other hand, direct experiments on the target variable may be too costly or even infeasible to conduct. A middle ground between these two approaches is to estimate the causal effect of interest through proxy experiments, which are conducted on variables with a lower cost to intervene on compared to the main target. In an earlier work, we studied this setting and demonstrated that the problem of designing the optimal (minimum-cost) experiment for causal effect identification is NP-complete and provided a naive algorithm that may require solving exponentially many NP-hard problems as a sub-routine in the worst case. In this work, we provide a few reformulations of the problem that allow for designing significantly more efficient algorithms to solve it as witnessed by our extensive simulations. Additionally, we study the closely-related problem of designing experiments that enable us to identify a given effect through valid adjustments sets.", "keywords": "Causal Inference;Identifiability;Experiment design", "primary_area": "causal_inference", "supplementary_material": "", "author": "Sepehr Elahi;Sina Akbari;Jalal Etesami;Negar Kiyavash;Patrick Thiran", "authorids": "~Sepehr_Elahi1;~Sina_Akbari1;~Jalal_Etesami2;~Negar_Kiyavash1;~Patrick_Thiran1", "gender": "M;M;M;F;", "homepage": "https://sepehrelahi.com;https://sinaakbarii.github.io;https://www.cs.cit.tum.de/en/dss/members/prof-jalal-etesami/;https://people.epfl.ch/negar.kiyavash?lang=en;https://people.epfl.ch/patrick.thiran", "dblp": "268/2614;;76/10800;85/4976;t/PThiran", "google_scholar": "https://scholar.google.com/citations?hl=en;-kNnS1AAAAAJ;3Usg1G0AAAAJ;7tBDvOwAAAAJ;https://scholar.google.ch/citations?user=7Ek7pqgAAAAJ", "orcid": ";;;0000-0002-8545-7709;", "linkedin": ";sina-akbari/;;;", "or_profile": "~Sepehr_Elahi1;~Sina_Akbari1;~Jalal_Etesami2;~Negar_Kiyavash1;~Patrick_Thiran1", "aff": "EPFL - EPF Lausanne;Swiss Federal Institute of Technology Lausanne;Technische Universit\u00e4t M\u00fcnchen;EPFL - EPF Lausanne;EPFL", "aff_domain": "epfl.ch;epfl.ch;tum.de;epfl.ch;epfl.ch", "position": "PhD student;PhD student;Assistant Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nelahi2024fast,\ntitle={Fast Proxy Experiment Design for Causal Effect Identification},\nauthor={Sepehr Elahi and Sina Akbari and Jalal Etesami and Negar Kiyavash and Patrick Thiran},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Ci7II4CPwm}\n}", "github": "", "reviewers": "Zgff;tZFY;ARx2;Jqzf", "pdf_size": 2430394, "rating": "2;4;6;6", "confidence": "3;5;2;3", "soundness": "3;1;3;3", "novelty": "2;2;3;3", "presentation": "2;1;3;2", "wc_summary": "71;33;104;78", "wc_strengths": "61;22;59;44", "wc_weaknesses": "202;673;6;84", "wc_questions": "2;12;19;248", "wc_limitations": "87;22;1;60", "wc_review": "423;762;189;514", "wc_reply_reviewers": "118;433;0;44", "wc_reply_authors": "858;1161;0;0", "reply_reviewers": "2;1;0;1", "reply_authors": "3;4;1;1", "rating_avg": [ 4.5, 1.6583123951777 ], "confidence_avg": [ 3.25, 1.0897247358851685 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 71.5, 25.401771591761076 ], "wc_strengths_avg": [ 46.5, 15.596473960482221 ], "wc_weaknesses_avg": [ 241.25, 258.8526366487311 ], "wc_questions_avg": [ 70.25, 102.80169016120308 ], "wc_limitations_avg": [ 42.5, 33.27536626394967 ], "wc_review_avg": [ 472.0, 205.15481958754953 ], "wc_reply_reviewers_avg": [ 148.75, 169.442283683855 ], "wc_reply_authors_avg": [ 504.75, 515.9929141955342 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.25, 1.299038105676658 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.3458572319330373, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:uwCBlO26ZuYJ:scholar.google.com/&scioq=Fast+Proxy+Experiment+Design+for+Causal+Effect+Identification&hl=en&as_sdt=0,5", "gs_version_total": 5, "email": "epfl.ch;epfl.ch;tum.de;epfl.ch;epfl.ch", "author_num": 5, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "EPFL;Swiss Federal Institute of Technology Lausanne;Technische Universit\u00e4t M\u00fcnchen", "aff_unique_dep": ";;", "aff_unique_url": "https://www.epfl.ch;https://www.epfl.ch;https://www.tum.de", "aff_unique_abbr": "EPFL;EPFL;TUM", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Lausanne;", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "Switzerland;Germany" }, { "title": "Distributional Reinforcement Learning with Regularized Wasserstein Loss", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96126", "id": "CiEynTpF28", "proceeding": "", "pdf": "https://openreview.net/pdf?id=CiEynTpF28", "openreview": "https://openreview.net/forum?id=CiEynTpF28", "poster": "/media/PosterPDFs/NeurIPS%202024/96126.png?t=1729385830.804", "project": "", "author_site": "Ke Sun, Yingnan Zhao, Wulong Liu, Bei Jiang, Linglong Kong", "tldr": "", "abstract": "The empirical success of distributional reinforcement learning (RL) highly relies on the choice of distribution divergence equipped with an appropriate distribution representation. In this paper, we propose \\textit{Sinkhorn distributional RL (SinkhornDRL)}, which leverages Sinkhorn divergence\u2014a regularized Wasserstein loss\u2014to minimize the difference between current and target Bellman return distributions. Theoretically, we prove the contraction properties of SinkhornDRL, aligning with the interpolation nature of Sinkhorn divergence between Wasserstein distance and Maximum Mean Discrepancy (MMD). The introduced SinkhornDRL enriches the family of distributional RL algorithms, contributing to interpreting the algorithm behaviors compared with existing approaches by our investigation into their relationships. Empirically, we show that SinkhornDRL consistently outperforms or matches existing algorithms on the Atari games suite and particularly stands out in the multi-dimensional reward setting. \\thanks{Code is available in \\url{https://github.com/datake/SinkhornDistRL}.}.", "keywords": "distributional reinforcement learning;Sinkhorn divergence", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/f39a2c44020055db4a3d167f825fab61a5c15fad.zip", "author": "Ke Sun;Yingnan Zhao;Wulong Liu;Bei Jiang;Linglong Kong", "authorids": "~Ke_Sun6;~Yingnan_Zhao1;~Wulong_Liu1;~Bei_Jiang1;~Linglong_Kong2", "gender": "M;M;M;F;M", "homepage": "https://sites.google.com/view/kesun;;;https://www.ualberta.ca/~bei1;https://www.ualberta.ca/~lkong", "dblp": "69/476-13;;36/9257.html;190/4697;35/8525", "google_scholar": "lYdNhFQAAAAJ;NMgYY5cAAAAJ;https://scholar.google.ca/citations?user=od00FfIAAAAJ;https://scholar.google.ca/citations?user=MfOZ8G0AAAAJ;https://scholar.google.ca/citations?hl=en", "orcid": ";;;0000-0002-0033-839X;0000-0003-3011-9216", "linkedin": ";;wulong-liu-28006155/;;", "or_profile": "~Ke_Sun6;~Yingnan_Zhao1;~Wulong_Liu1;~Bei_Jiang1;~Linglong_Kong2", "aff": "University of Alberta;Harbin Institute of Technology;Huawei Noah's Ark Lab;University of Alberta;University of Alberta", "aff_domain": "ualberta.ca;hit.edu.cn;huawei.com;ualberta.ca;ualberta.ca", "position": "PhD student;Assistant Professor;Researcher;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nsun2024distributional,\ntitle={Distributional Reinforcement Learning with Regularized Wasserstein Loss},\nauthor={Ke Sun and Yingnan Zhao and Wulong Liu and Bei Jiang and Linglong Kong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=CiEynTpF28}\n}", "github": "", "reviewers": "uu9w;Fdqh;nXJK", "pdf_size": 21595323, "rating": "5;5;7", "confidence": "3;5;3", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "3;2;4", "wc_summary": "50;64;124", "wc_strengths": "48;88;74", "wc_weaknesses": "331;229;76", "wc_questions": "32;72;44", "wc_limitations": "8;23;1", "wc_review": "469;476;319", "wc_reply_reviewers": "18;0;60", "wc_reply_authors": "0;0;183", "reply_reviewers": "1;0;2", "reply_authors": "1;1;2", "rating_avg": [ 5.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 79.33333333333333, 32.097074979228594 ], "wc_strengths_avg": [ 70.0, 16.57307052620807 ], "wc_weaknesses_avg": [ 212.0, 104.7950380504726 ], "wc_questions_avg": [ 49.333333333333336, 16.75974011996871 ], "wc_limitations_avg": [ 10.666666666666666, 9.177266598624136 ], "wc_review_avg": [ 421.3333333333333, 72.41700230071818 ], "wc_reply_reviewers_avg": [ 26.0, 25.13961017995307 ], "wc_reply_authors_avg": [ 61.0, 86.2670273047588 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4791239477751627826&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "ualberta.ca;hit.edu.cn;huawei.com;ualberta.ca;ualberta.ca", "author_num": 5, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "University of Alberta;Harbin Institute of Technology;Huawei", "aff_unique_dep": ";;Noah's Ark Lab", "aff_unique_url": "https://www.ualberta.ca;http://www.hit.edu.cn/;https://www.huawei.com", "aff_unique_abbr": "UAlberta;HIT;Huawei", "aff_campus_unique_index": "1", "aff_campus_unique": ";Harbin", "aff_country_unique_index": "0;1;1;0;0", "aff_country_unique": "Canada;China" }, { "title": "Coherence-free Entrywise Estimation of Eigenvectors in Low-rank Signal-plus-noise Matrix Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96125", "id": "CiuH7zOBCQ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=CiuH7zOBCQ", "openreview": "https://openreview.net/forum?id=CiuH7zOBCQ", "poster": "", "project": "", "author_site": "Hao Yan, Keith Levin", "tldr": "", "abstract": "Spectral methods are widely used to estimate eigenvectors of a low-rank signal matrix subject to noise. These methods use the leading eigenspace of an observed matrix to estimate this low-rank signal. Typically, the entrywise estimation error of these methods depends on the coherence of the low-rank signal matrix with respect to the standard basis. In this work, we present a novel method for eigenvector estimation that avoids this dependence on coherence. Assuming a rank-one signal matrix, under mild technical conditions, the entrywise estimation error of our method provably has no dependence on the coherence under Gaussian noise (i.e., in the spiked Wigner model), and achieves the optimal estimation rate up to logarithmic factors. Simulations demonstrate that our method performs well under non-Gaussian noise and that an extension of our method to the case of a rank-$r$ signal matrix has little to no dependence on the coherence. In addition, we derive new metric entropy bounds for rank-$r$ singular subspaces under $\\ell_{2,\\infty}$ distance, which may be of independent interest. We use these new bounds to improve the best known lower bound for rank-$r$ eigenspace estimation under $\\ell_{2,\\infty}$ distance.", "keywords": "eigenvector estimation;spiked Wigner models;low-rank signal-plus-noise matrix models;subspace estimation;minimax theory", "primary_area": "probabilistic_methods", "supplementary_material": "/attachment/49ec4ccde3a93982ea5ba0b3a5ad2a6acdd495f9.zip", "author": "Hao Yan;Keith Levin", "authorids": "~Hao_Yan12;~Keith_Levin2", "gender": "M;", "homepage": "https://ezyhdxm.github.io/academic-site/;https://pages.stat.wisc.edu/~kdlevin/", "dblp": ";", "google_scholar": "fzYfS8sAAAAJ;qumjO10AAAAJ", "orcid": ";", "linkedin": "hao-yan-79aa0b169/;", "or_profile": "~Hao_Yan12;~Keith_Levin2", "aff": "University of Wisconsin - Madison;University of Wisconsin - Madison", "aff_domain": "wisc.edu;wisc.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nyan2024coherencefree,\ntitle={Coherence-free Entrywise Estimation of Eigenvectors in Low-rank Signal-plus-noise Matrix Models},\nauthor={Hao Yan and Keith Levin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=CiuH7zOBCQ}\n}", "github": "", "reviewers": "nQFd;dN7g;pryd;85jE;qfDx", "pdf_size": 7111407, "rating": "4;6;7;7;7", "confidence": "4;4;3;2;3", "soundness": "2;2;4;3;3", "novelty": "2;2;3;3;4", "presentation": "2;3;3;4;4", "wc_summary": "122;58;98;89;83", "wc_strengths": "34;92;141;154;44", "wc_weaknesses": "103;221;69;134;33", "wc_questions": "234;53;162;190;10", "wc_limitations": "11;10;6;10;1", "wc_review": "504;434;476;577;171", "wc_reply_reviewers": "10;16;16;17;35", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.2, 1.16619037896906 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 90.0, 20.79422996891205 ], "wc_strengths_avg": [ 93.0, 48.80163931672787 ], "wc_weaknesses_avg": [ 112.0, 64.08744026718496 ], "wc_questions_avg": [ 129.8, 84.57990305031095 ], "wc_limitations_avg": [ 7.6, 3.7202150475476548 ], "wc_review_avg": [ 432.4, 138.75820696448915 ], "wc_reply_reviewers_avg": [ 18.8, 8.471127433818948 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.7333587976225691, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2291401438901423235&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "wisc.edu;wisc.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Wisconsin-Madison", "aff_unique_dep": "", "aff_unique_url": "https://www.wisc.edu", "aff_unique_abbr": "UW-Madison", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Madison", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "SAFE: Slow and Fast Parameter-Efficient Tuning for Continual Learning with Pre-Trained Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96124", "id": "Cjnirz5pan", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Cjnirz5pan", "openreview": "https://openreview.net/forum?id=Cjnirz5pan", "poster": "/media/PosterPDFs/NeurIPS%202024/96124.png?t=1731743276.031661", "project": "", "author_site": "Linglan Zhao, Xuerui Zhang, Ke Yan, Shouhong Ding, Weiran Huang", "tldr": "", "abstract": "Continual learning aims to incrementally acquire new concepts in data streams while resisting forgetting previous knowledge.\nWith the rise of powerful pre-trained models (PTMs), there is a growing interest in training incremental learning systems using these foundation models, rather than learning from scratch. \nExisting works often view PTMs as a strong initial point and directly apply parameter-efficient tuning (PET) in the first session for adapting to downstream tasks.\nIn the following sessions, most methods freeze model parameters for tackling forgetting issues. \nHowever, applying PET directly to downstream data cannot fully explore the inherent knowledge in PTMs.\nAdditionally, freezing the parameters in incremental sessions hinders models' plasticity to novel concepts not covered in the first session. \nTo solve the above issues, we propose a Slow And Fast parameter-Efficient tuning (SAFE) framework.\nIn particular, to inherit general knowledge from foundation models, we include a transfer loss function by measuring the correlation between the PTM and the PET-applied model.\nAfter calibrating in the first session, the slow efficient tuning parameters can capture more informative features, improving generalization to incoming classes.\nMoreover, to further incorporate novel concepts, we strike a balance between stability and plasticity by fixing slow efficient tuning parameters and continuously updating the fast ones.\nSpecifically, a cross-classification loss with feature alignment is proposed to circumvent catastrophic forgetting.\nDuring inference, we introduce an entropy-based aggregation strategy to dynamically utilize the complementarity in the slow and fast learners.\nExtensive experiments on seven benchmark datasets verify the effectiveness of our method by significantly surpassing the state-of-the-art.", "keywords": "continual learning;class incremental learning;pre-trained models;parameter-efficient tuning", "primary_area": "machine_vision", "supplementary_material": "", "author": "Linglan Zhao;Xuerui Zhang;Ke Yan;Shouhong Ding;Weiran Huang", "authorids": "~Linglan_Zhao1;~Xuerui_Zhang1;~Ke_Yan2;~Shouhong_Ding3;~Weiran_Huang1", "gender": "M;;;M;M", "homepage": "https://scholar.google.com/citations?user=cUB4ypcAAAAJ&hl=zh-CN;;;;https://www.weiranhuang.com", "dblp": "266/9556.html;;;119/6735;170/0073-1", "google_scholar": "cUB4ypcAAAAJ;;;OGf40fkAAAAJ;AjJ2rf8AAAAJ", "orcid": "0000-0002-2241-6977;0009-0004-4749-1326;;0000-0002-3175-3553;", "linkedin": ";;;;", "or_profile": "~Linglan_Zhao1;~Xuerui_Zhang1;~Ke_Yan2;~Shouhong_Ding3;~Weiran_Huang1", "aff": "Tencent Youtu Lab;Zhejiang University;;Tencent Youtu Lab;Shanghai AI Laboratory", "aff_domain": "tencent.com;zju.edu.cn;;tencent.com;pjlab.org.cn", "position": "Researcher;MS student;;researcher;Consultant", "bibtex": "@inproceedings{\nzhao2024safe,\ntitle={{SAFE}: Slow and Fast Parameter-Efficient Tuning for Continual Learning with Pre-Trained Models},\nauthor={Linglan Zhao and Xuerui Zhang and Ke Yan and Shouhong Ding and Weiran Huang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Cjnirz5pan}\n}", "github": "", "reviewers": "RFpE;Psm6;JgdG;bXaj", "pdf_size": 5485141, "rating": "4;4;4;5", "confidence": "5;5;5;5", "soundness": "2;2;3;3", "novelty": "3;2;2;3", "presentation": "3;3;3;3", "wc_summary": "46;64;77;66", "wc_strengths": "42;76;70;39", "wc_weaknesses": "77;83;156;242", "wc_questions": "20;128;88;7", "wc_limitations": "7;21;27;57", "wc_review": "192;372;418;411", "wc_reply_reviewers": "21;23;34;243", "wc_reply_authors": "1644;395;456;1223", "reply_reviewers": "1;1;1;1", "reply_authors": "4;2;2;3", "rating_avg": [ 4.25, 0.4330127018922193 ], "confidence_avg": [ 5.0, 0.0 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 63.25, 11.121488209767612 ], "wc_strengths_avg": [ 56.75, 16.422164899914993 ], "wc_weaknesses_avg": [ 139.5, 66.85244946896113 ], "wc_questions_avg": [ 60.75, 49.53471005264894 ], "wc_limitations_avg": [ 28.0, 18.24828759089466 ], "wc_review_avg": [ 348.25, 91.8977012770178 ], "wc_reply_reviewers_avg": [ 80.25, 94.09403541139045 ], "wc_reply_authors_avg": [ 929.5, 525.9622134716524 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10517239154160950520&as_sdt=4005&sciodt=0,6&hl=en", "gs_version_total": 3, "email": "tencent.com;zju.edu.cn;;tencent.com;pjlab.org.cn", "author_num": 5, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Tencent;Zhejiang University;Shanghai AI Laboratory", "aff_unique_dep": "Youtu Lab;;", "aff_unique_url": "https://www.tencent.com;https://www.zju.edu.cn;https://www.shanghai-ai-lab.com", "aff_unique_abbr": "Tencent;ZJU;SAIL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "From Instance Training to Instruction Learning: Task Adapters Generation from Instructions", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96123", "id": "CluvZBfrjj", "proceeding": "", "pdf": "https://openreview.net/pdf?id=CluvZBfrjj", "openreview": "https://openreview.net/forum?id=CluvZBfrjj", "poster": "/media/PosterPDFs/NeurIPS%202024/96123.png?t=1729521496.7535684", "project": "", "author_site": "Huanxuan Liao, Shizhu He, Yao Xu, Yuanzhe Zhang, Yanchao Hao, Shengping Liu, Kang Liu, Jun Zhao", "tldr": "", "abstract": "Large language models (LLMs) have acquired the ability to solve general tasks by utilizing instruction finetuning (IFT). However, IFT still relies heavily on instance training of extensive task data, which greatly limits the adaptability of LLMs to real-world scenarios where labeled task instances are scarce and broader task generalization becomes paramount. Contrary to LLMs, humans acquire skills and complete tasks not merely through repeated practice but also by understanding and following instructional guidelines. This paper is dedicated to simulating human learning to address the shortcomings of instance training, focusing on instruction learning to enhance cross-task generalization. Within this context, we introduce Task Adapters Generation from Instructions (TAGI), which automatically constructs the task-specific model in a parameter generation manner based on the given task instructions without retraining for unseen tasks. Specifically, we utilize knowledge distillation to enhance the consistency between TAGI developed through Learning with Instruction and task-specific models developed through Training with Instance, by aligning the labels, output logits, and adapter parameters between them. TAGI is endowed with cross-task generalization capabilities through a two-stage training process that includes hypernetwork pretraining and finetuning. We evaluate TAGI on the Super-Natural Instructions and P3 datasets. The experimental results demonstrate that TAGI can match or even outperform traditional meta-trained models and other hypernetwork models, while significantly reducing computational requirements. Our code will be available at https://github.com/Xnhyacinth/TAGI.", "keywords": "Hypernetwork;Generalization;Instruction Learning", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Huanxuan Liao;Shizhu He;Yao Xu;Yuanzhe Zhang;Yanchao Hao;Shengping Liu;Kang Liu;Jun Zhao", "authorids": "~Huanxuan_Liao1;~Shizhu_He2;~Yao_Xu3;~Yuanzhe_Zhang1;~Yanchao_Hao1;~Shengping_Liu1;~Kang_Liu1;~Jun_Zhao4", "gender": "M;M;M;M;M;M;M;M", "homepage": "https://xnhyacinth.github.io/;https://heshizhu.github.io/;https://github.com/YaooXu/;https://yuanzhe-zhang.github.io/;;;http://www.nlpr.ia.ac.cn/cip/~liukang/index.html;http://nlpr-web.ia.ac.cn/cip/english/~junzhao/index.html", "dblp": "355/1072.html;136/8650;;141/4448;190/1825;21/5679;42/4903.html;https://dblp.uni-trier.de/pid/47/2026-1.html", "google_scholar": "https://scholar.google.com.hk/citations?user=sRcWOKUAAAAJ;zBPIt3QAAAAJ;;H4GYRx8AAAAJ;;;DtZCfl0AAAAJ;https://scholar.google.com.hk/citations?user=HljRttwAAAAJ", "orcid": ";;;;;;;", "linkedin": "huanxuan-liao-09ab8b341/;;;;;;;", "or_profile": "~Huanxuan_Liao1;~Shizhu_He2;~Yao_Xu3;~Yuanzhe_Zhang1;~Yanchao_Hao1;~Shengping_Liu1;~Kang_Liu1;~Jun_Zhao4", "aff": "Institute of Automation, Chinese Academy of Sciences (CASIA);Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;Tencent PCG;;Institute of Automation, Chinese Academy of Sciences;Institute of automation, Chinese academy of science", "aff_domain": "ia.ac.cn;ia.ac.cn;ia.ac.cn;ia.ac.cn;tencent.com;;ia.ac.cn;nlpr.ia.ac.cn", "position": "MS student;Associate Researcher;PhD student;Associate Professor;Principal Researcher;;Professor;Full Professor", "bibtex": "@inproceedings{\nliao2024from,\ntitle={From Instance Training to Instruction Learning: Task Adapters Generation from Instructions},\nauthor={Huanxuan Liao and Shizhu He and Yao Xu and Yuanzhe Zhang and Yanchao Hao and Shengping Liu and Kang Liu and Jun Zhao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=CluvZBfrjj}\n}", "github": "", "reviewers": "2ySK;Wqw9;UQYV;z6rG", "pdf_size": 1083228, "rating": "4;7;7;7", "confidence": "4;3;4;3", "soundness": "1;3;3;3", "novelty": "2;4;3;3", "presentation": "1;2;3;3", "wc_summary": "57;206;109;65", "wc_strengths": "38;193;54;46", "wc_weaknesses": "278;212;175;29", "wc_questions": "2;6;17;35", "wc_limitations": "26;6;14;6", "wc_review": "401;623;369;181", "wc_reply_reviewers": "323;0;11;40", "wc_reply_authors": "861;0;48;26", "reply_reviewers": "2;0;1;1", "reply_authors": "4;1;2;2", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 109.25, 59.26371149362821 ], "wc_strengths_avg": [ 82.75, 63.90373619750257 ], "wc_weaknesses_avg": [ 173.5, 91.22088576636384 ], "wc_questions_avg": [ 15.0, 12.786711852544421 ], "wc_limitations_avg": [ 13.0, 8.18535277187245 ], "wc_review_avg": [ 393.5, 156.910006054426 ], "wc_reply_reviewers_avg": [ 93.5, 133.30510117771186 ], "wc_reply_authors_avg": [ 233.75, 362.54129075182595 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10695894115733844627&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "ia.ac.cn;ia.ac.cn;ia.ac.cn;ia.ac.cn;tencent.com;;ia.ac.cn;nlpr.ia.ac.cn", "author_num": 8, "aff_unique_index": "0;0;0;0;1;0;0", "aff_unique_norm": "Chinese Academy of Sciences;Tencent", "aff_unique_dep": "Institute of Automation;PCG (Platform and Content Group)", "aff_unique_url": "http://www.ia.cas.cn;https://www.tencent.com", "aff_unique_abbr": "CASIA;Tencent PCG", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "ODGS: 3D Scene Reconstruction from Omnidirectional Images with 3D Gaussian Splattings", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96122", "id": "CovjSQmNOD", "proceeding": "", "pdf": "https://openreview.net/pdf?id=CovjSQmNOD", "openreview": "https://openreview.net/forum?id=CovjSQmNOD", "poster": "/media/PosterPDFs/NeurIPS%202024/96122.png?t=1733222609.8994484", "project": "", "author_site": "Suyoung Lee, Jaeyoung Chung, Jaeyoo Huh, Kyoung Mu Lee", "tldr": "", "abstract": "Omnidirectional (or 360-degree) images are increasingly being used for 3D applications since they allow the rendering of an entire scene with a single image. Existing works based on neural radiance fields demonstrate successful 3D reconstruction quality on egocentric videos, yet they suffer from long training and rendering times. Recently, 3D Gaussian splatting has gained attention for its fast optimization and real-time rendering. However, directly using a perspective rasterizer to omnidirectional images results in severe distortion due to the different optical properties between the two image domains. In this work, we present ODGS, a novel rasterization pipeline for omnidirectional images with geometric interpretation. For each Gaussian, we define a tangent plane that touches the unit sphere and is perpendicular to the ray headed toward the Gaussian center. We then leverage a perspective camera rasterizer to project the Gaussian onto the corresponding tangent plane. The projected Gaussians are transformed and combined into the omnidirectional image, finalizing the omnidirectional rasterization process. This interpretation reveals the implicit assumptions within the proposed pipeline, which we verify through mathematical proofs. The entire rasterization process is parallelized using CUDA, achieving optimization and rendering speeds 100 times faster than NeRF-based methods. Our comprehensive experiments highlight the superiority of ODGS by delivering the best reconstruction and perceptual quality across various datasets. Additionally, results on roaming datasets demonstrate that ODGS effectively restores fine details, even when reconstructing large 3D scenes. The source code is available on our project page (https://github.com/esw0116/ODGS).", "keywords": "3D scene reconstruction;3D Gaussian splatting;Omnidirectional images", "primary_area": "machine_vision", "supplementary_material": "", "author": "Suyoung Lee;Jaeyoung Chung;Jaeyoo Huh;Kyoung Mu Lee", "authorids": "~Suyoung_Lee5;~Jaeyoung_Chung1;~Jaeyoo_Huh1;~Kyoung_Mu_Lee2", "gender": "M;M;M;M", "homepage": "https://robot0321.github.io/;https://cv.snu.ac.kr/;https://cv.snu.ac.kr/kmlee/;https://esw0116.github.io/", "dblp": "51/3834;;17/4029;256/5167", "google_scholar": "https://scholar.google.co.kr/citations?user=htBMfSkAAAAJ;;Hofj9kAAAAAJ;s0uhy1gAAAAJ", "orcid": "0000-0002-5251-9926;;;", "linkedin": "jaeyoung-chung-13781a172/;;;", "or_profile": "~Jaeyoung_Chung1;~Jaeyoo_Huh1;~Kyoung_Mu_Lee1;~SuYoung_Lee2", "aff": "Seoul National University;Seoul National University;Seoul National University;Seoul National University", "aff_domain": "snu.ac.kr;cv.snu.ac.kr;snu.ac.kr;snu.ac.kr", "position": "PhD student;PhD student;Full Professor;PhD student", "bibtex": "@inproceedings{\nlee2024odgs,\ntitle={{ODGS}: 3D Scene Reconstruction from Omnidirectional Images with 3D Gaussian Splattings},\nauthor={Suyoung Lee and Jaeyoung Chung and Jaeyoo Huh and Kyoung Mu Lee},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=CovjSQmNOD}\n}", "github": "", "reviewers": "iEY4;oCBJ;cMFb;sZSB", "pdf_size": 8483586, "rating": "3;6;6;7", "confidence": "5;3;3;4", "soundness": "3;4;3;3", "novelty": "2;2;3;3", "presentation": "1;3;2;3", "wc_summary": "71;63;156;59", "wc_strengths": "86;128;172;210", "wc_weaknesses": "382;78;279;111", "wc_questions": "15;33;63;100", "wc_limitations": "11;8;1;50", "wc_review": "565;310;671;530", "wc_reply_reviewers": "224;39;201;67", "wc_reply_authors": "313;25;366;57", "reply_reviewers": "1;1;2;1", "reply_authors": "2;2;3;2", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 87.25, 39.927277643235335 ], "wc_strengths_avg": [ 149.0, 46.52956049652737 ], "wc_weaknesses_avg": [ 212.5, 124.04132375946332 ], "wc_questions_avg": [ 52.75, 32.220917119163445 ], "wc_limitations_avg": [ 17.5, 19.11151485361639 ], "wc_review_avg": [ 519.0, 131.3601918390804 ], "wc_reply_reviewers_avg": [ 132.75, 80.77244270170365 ], "wc_reply_authors_avg": [ 190.25, 150.84656940083192 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.7035264706814485, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2118928989099757783&as_sdt=20000005&sciodt=0,21&hl=en", "gs_version_total": 4, "email": "snu.ac.kr;cv.snu.ac.kr;snu.ac.kr;snu.ac.kr", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Seoul National University", "aff_unique_dep": "", "aff_unique_url": "https://www.snu.ac.kr", "aff_unique_abbr": "SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "A Metalearned Neural Circuit for Nonparametric Bayesian Inference", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96121", "id": "Cp7HD618bd", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Cp7HD618bd", "openreview": "https://openreview.net/forum?id=Cp7HD618bd", "poster": "/media/PosterPDFs/NeurIPS%202024/96121.png?t=1733760562.7933815", "project": "", "author_site": "Jake Snell, Gianluca Bencomo, Tom Griffiths", "tldr": "", "abstract": "Most applications of machine learning to classification assume a closed set of balanced classes. This is at odds with the real world, where class occurrence statistics often follow a long-tailed power-law distribution and it is unlikely that all classes are seen in a single sample. Nonparametric Bayesian models naturally capture this phenomenon, but have significant practical barriers to widespread adoption, namely implementation complexity and computational inefficiency. To address this, we present a method for extracting the inductive bias from a nonparametric Bayesian model and transferring it to an artificial neural network. By simulating data with a nonparametric Bayesian prior, we can metalearn a sequence model that performs inference over an unlimited set of classes. After training, this \"neural circuit\" has distilled the corresponding inductive bias and can successfully perform sequential inference over an open set of classes. Our experimental results show that the metalearned neural circuit achieves comparable or better performance than particle filter-based methods for inference in these models while being faster and simpler to use than methods that explicitly incorporate Bayesian nonparametric inference.", "keywords": "Nonparametric Bayes;metalearning;amortized inference", "primary_area": "probabilistic_methods", "supplementary_material": "/attachment/5e99ed271e3ef4e8bd08327fd2fc20ef9c47d6d1.zip", "author": "Jake Snell;Gianluca Bencomo;Thomas L. Griffiths", "authorids": "~Jake_Snell1;~Gianluca_Bencomo1;~Thomas_L._Griffiths1", "gender": "M;M;", "homepage": "https://www.jakesnell.com;https://gianlucabencomo.github.io;http://cocosci.princeton.edu/tom/", "dblp": "172/1406;;34/4472", "google_scholar": "MbXKAK8AAAAJ;xSS55BgAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;", "linkedin": ";gianluca-bencomo-326469133/;", "or_profile": "~Jake_Snell1;~Gianluca_Bencomo1;~Thomas_L._Griffiths1", "aff": "Princeton University;Princeton University;Princeton University", "aff_domain": "princeton.edu;princeton.edu;princeton.edu", "position": "Postdoc;PhD student;Professor", "bibtex": "@inproceedings{\nsnell2024a,\ntitle={A Metalearned Neural Circuit for Nonparametric Bayesian Inference},\nauthor={Jake Snell and Gianluca Bencomo and Thomas L. Griffiths},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Cp7HD618bd}\n}", "github": "", "reviewers": "SHmK;aqs6;QfVx;grnU", "pdf_size": 5918505, "rating": "6;6;6;6", "confidence": "3;4;3;4", "soundness": "3;2;2;3", "novelty": "2;3;2;3", "presentation": "3;3;2;3", "wc_summary": "39;80;197;94", "wc_strengths": "76;92;187;34", "wc_weaknesses": "120;215;262;100", "wc_questions": "92;398;5;137", "wc_limitations": "11;62;29;4", "wc_review": "338;847;680;369", "wc_reply_reviewers": "9;44;15;25", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 102.5, 58.18290126832797 ], "wc_strengths_avg": [ 97.25, 55.97934887081128 ], "wc_weaknesses_avg": [ 174.25, 66.73969957978534 ], "wc_questions_avg": [ 158.0, 146.46330598480972 ], "wc_limitations_avg": [ 26.5, 22.433234274174556 ], "wc_review_avg": [ 558.5, 213.6147232753398 ], "wc_reply_reviewers_avg": [ 23.25, 13.273563952458284 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15398114556244440366&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "princeton.edu;princeton.edu;princeton.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Princeton University", "aff_unique_dep": "", "aff_unique_url": "https://www.princeton.edu", "aff_unique_abbr": "Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "The Limits of Differential Privacy in Online Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96120", "id": "Cqr6E81iB7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Cqr6E81iB7", "openreview": "https://openreview.net/forum?id=Cqr6E81iB7", "poster": "/media/PosterPDFs/NeurIPS%202024/96120.png?t=1732774800.5416415", "project": "", "author_site": "Bo Li, Wei Wang, Peng Ye", "tldr": "", "abstract": "Differential privacy (DP) is a formal notion that restricts the privacy leakage of an algorithm when running on sensitive data, in which privacy-utility trade-off is one of the central problems in private data analysis. In this work, we investigate the fundamental limits of differential privacy in online learning algorithms and present evidence that separates three types of constraints: no DP, pure DP, and approximate DP. We first describe a hypothesis class that is online learnable under approximate DP but not online learnable under pure DP under the adaptive adversarial setting. This indicates that approximate DP must be adopted when dealing with adaptive adversaries. We then prove that any private online learner must make an infinite number of mistakes for almost all hypothesis classes. This essentially generalizes previous results and shows a strong separation between private and non-private settings since a finite mistake bound is always attainable (as long as the class is online learnable) when there is no privacy requirement.", "keywords": "online learning;differential privacy", "primary_area": "privacy", "supplementary_material": "", "author": "Bo Li;Wei Wang;Peng Ye", "authorids": "~Bo_Li33;~Wei_Wang50;~Peng_Ye5", "gender": ";M;", "homepage": ";https://www.cse.ust.hk/~weiwa/;", "dblp": ";35/7092-30;53/930-5", "google_scholar": ";https://scholar.google.ca/citations?user=FeJrzPMAAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Bo_Li33;~Wei_Wang50;~Peng_Ye5", "aff": ";HKUST;Hong Kong University of Science and Technology", "aff_domain": ";cse.ust.hk;hkust.edu", "position": ";Associate Professor;PhD student", "bibtex": "@inproceedings{\nli2024the,\ntitle={The Limits of Differential Privacy in Online Learning},\nauthor={Bo Li and Wei Wang and Peng Ye},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Cqr6E81iB7}\n}", "github": "", "reviewers": "kW4d;YzYS;sPMY;ud1R", "pdf_size": 442085, "rating": "6;6;7;8", "confidence": "3;3;3;1", "soundness": "3;3;3;4", "novelty": "3;4;2;3", "presentation": "2;2;3;4", "wc_summary": "93;21;133;19", "wc_strengths": "44;38;66;52", "wc_weaknesses": "98;50;8;53", "wc_questions": "241;1;47;2", "wc_limitations": "1;19;3;5", "wc_review": "477;129;257;131", "wc_reply_reviewers": "10;4;57;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 2.5, 0.8660254037844386 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 66.5, 48.60812689252693 ], "wc_strengths_avg": [ 50.0, 10.488088481701515 ], "wc_weaknesses_avg": [ 52.25, 31.846310618343217 ], "wc_questions_avg": [ 72.75, 98.89988624867068 ], "wc_limitations_avg": [ 7.0, 7.0710678118654755 ], "wc_review_avg": [ 248.5, 141.74889770294513 ], "wc_reply_reviewers_avg": [ 17.75, 22.93877721239735 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:58FdPJu6JZAJ:scholar.google.com/&scioq=The+Limits+of+Differential+Privacy+in+Online+Learning&hl=en&as_sdt=0,44", "gs_version_total": 4, "email": ";cse.ust.hk;hkust.edu", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Hong Kong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.ust.hk", "aff_unique_abbr": "HKUST", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Scaling Law for Time Series Forecasting", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96119", "id": "Cr2jEHJB9q", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Cr2jEHJB9q", "openreview": "https://openreview.net/forum?id=Cr2jEHJB9q", "poster": "/media/PosterPDFs/NeurIPS%202024/96119.png?t=1733657570.8712847", "project": "", "author_site": "Jingzhe Shi, Qinwei Ma, Huan Ma, Lei Li", "tldr": "", "abstract": "Scaling law that rewards large datasets, complex models and enhanced data granularity has been observed in various fields of deep learning. Yet, studies on time series forecasting have cast doubt on scaling behaviors of deep learning methods for time series forecasting: while more training data improves performance, more capable models do not always outperform less capable models, and longer input horizon may hurt performance for some models. We propose a theory for scaling law for time series forecasting that can explain these seemingly abnormal behaviors. We take into account the impact of dataset size and model complexity, as well as time series data granularity, particularly focusing on the look-back horizon, an aspect that has been unexplored in previous theories. Furthermore, we empirically evaluate various models using a diverse set of time series forecasting datasets, which (1) verifies the validity of scaling law on dataset size and model complexity within the realm of time series forecasting, and (2) validates our theoretical framework, particularly regarding the influence of look back horizon. We hope our findings may inspire new models targeting time series forecasting datasets of limited size, as well as large foundational datasets and models for time series forecasting in future works.", "keywords": "Time series forecasting;Scaling law;Theory", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/69bc5264a0fbb14b8bbd4a1ed2a0f1e3bf34834e.zip", "author": "Jingzhe Shi;Qinwei Ma;Huan Ma;Lei Li", "authorids": "~Jingzhe_Shi1;~Qinwei_Ma1;~Huan_Ma2;~Lei_Li29", "gender": "M;M;M;Not Specified", "homepage": "https://jingzheshi.github.io/;https://github.com/Aquahorse;https://github.com/cjxh21;https://llei66.github.io/li-lei.github.io/", "dblp": "359/6451;;;13/7007-20", "google_scholar": "x_4IcIoAAAAJ;LwUaV3kAAAAJ;;DOyVxx0AAAAJ", "orcid": ";;;0000-0002-2929-0828", "linkedin": ";;;", "or_profile": "~Jingzhe_Shi1;~Qinwei_Ma1;~Huan_Ma2;~Lei_Li29", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;University of Copenhagen", "aff_domain": "tsinghua.edu.cn;mail.tsinghua.edu.cn;mails.tsinghua.edu.cn;diku.dk", "position": "Researcher;Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\nshi2024scaling,\ntitle={Scaling Law for Time Series Forecasting},\nauthor={Jingzhe Shi and Qinwei Ma and Huan Ma and Lei Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Cr2jEHJB9q}\n}", "github": "", "reviewers": "SJaN;Vwbb;7rY6;oD7J;urDc", "pdf_size": 2349752, "rating": "4;5;7;7;7", "confidence": "3;4;3;2;4", "soundness": "2;2;3;3;3", "novelty": "1;2;3;4;3", "presentation": "2;2;1;4;4", "wc_summary": "63;63;38;62;215", "wc_strengths": "17;123;13;108;135", "wc_weaknesses": "85;195;817;58;108", "wc_questions": "2;72;32;43;96", "wc_limitations": "2;57;15;104;135", "wc_review": "169;510;915;375;689", "wc_reply_reviewers": "0;90;434;19;0", "wc_reply_authors": "0;132;1921;44;0", "reply_reviewers": "0;1;3;1;0", "reply_authors": "1;2;5;2;1", "rating_avg": [ 6.0, 1.2649110640673518 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.6, 1.019803902718557 ], "presentation_avg": [ 2.6, 1.2 ], "wc_summary_avg": [ 88.2, 64.11676847752076 ], "wc_strengths_avg": [ 79.2, 53.127770515992864 ], "wc_weaknesses_avg": [ 252.6, 285.91369327123874 ], "wc_questions_avg": [ 49.0, 32.47152598816385 ], "wc_limitations_avg": [ 62.6, 50.84328864265174 ], "wc_review_avg": [ 531.6, 256.2183443861895 ], "wc_reply_reviewers_avg": [ 108.6, 166.0404769928104 ], "wc_reply_authors_avg": [ 419.4, 752.3455589022906 ], "reply_reviewers_avg": [ 1.0, 1.0954451150103321 ], "reply_authors_avg": [ 2.2, 1.469693845669907 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.21128856368212912, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17828876733956922409&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "tsinghua.edu.cn;mail.tsinghua.edu.cn;mails.tsinghua.edu.cn;diku.dk", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Tsinghua University;University of Copenhagen", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.ku.dk", "aff_unique_abbr": "THU;UCPH", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "China;Denmark" }, { "title": "DAGER: Exact Gradient Inversion for Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96118", "id": "CrADAX7h23", "proceeding": "", "pdf": "https://openreview.net/pdf?id=CrADAX7h23", "openreview": "https://openreview.net/forum?id=CrADAX7h23", "poster": "/media/PosterPDFs/NeurIPS%202024/96118.png?t=1733407502.718394", "project": "", "author_site": "Ivo Petrov, Dimitar I. Dimitrov, Maximilian Baader, Mark M\u00fcller, Martin Vechev", "tldr": "", "abstract": "Federated learning works by aggregating locally computed gradients from multiple clients, thus enabling collaborative training without sharing private client data. However, prior work has shown that the data can actually be recovered by the server using so-called gradient inversion attacks. While these attacks perform well when applied on images, they are limited in the text domain and only permit approximate reconstruction of small batches and short input sequences. In this work, we propose DAGER, the first algorithm to recover whole batches of input text exactly. DAGER leverages the low-rank structure of self-attention layer gradients and the discrete nature of token embeddings to efficiently check if a given token sequence is part of the client data. We use this check to exactly recover full batches in the honest-but-curious setting without any prior on the data for both encoder and decoder-based architectures using exhaustive heuristic search and a greedy approach, respectively. We provide an efficient GPU implementation of DAGER and show experimentally that it recovers full batches of size up to 128 on large language models (LLMs), beating prior attacks in speed (20x at same batch size), scalability (10x larger batches), and reconstruction quality (ROUGE-1/2 > 0.99).", "keywords": "Federated Learning;Exact Gradient Inversion;Gradient Leakage;Privacy;Language Model;LLM;Attack", "primary_area": "privacy", "supplementary_material": "/attachment/82cb303b9bf5f5d151044143f51ddb25439aebc4.zip", "author": "Ivo Petrov;Dimitar Iliev Dimitrov;Maximilian Baader;Mark Niklas Mueller;Martin Vechev", "authorids": "~Ivo_Petrov1;~Dimitar_Iliev_Dimitrov2;~Maximilian_Baader1;~Mark_Niklas_Mueller2;~Martin_Vechev1", "gender": "M;M;;M;M", "homepage": ";https://www.sri.inf.ethz.ch/people/dimitadi;https://www.sri.inf.ethz.ch/people/max;https://www.sri.inf.ethz.ch/people/mark;https://www.sri.inf.ethz.ch/people/martin", "dblp": "225/9294;271/0915;249/8060;287/4254;93/2189.html", "google_scholar": "g_ueVOsAAAAJ;https://scholar.google.com/citations?hl=en;LKqCkWoAAAAJ;RBpmcCAAAAAJ;https://scholar.google.ch/citations?user=aZ1Rh50AAAAJ", "orcid": ";0000-0001-9813-0900;0000-0002-9271-6422;0000-0002-2496-6542;", "linkedin": "ivo-petrov-b318501ba/;;;mark-m%C3%BCller-8bb4b1140/;", "or_profile": "~Ivo_Petrov1;~Dimitar_Iliev_Dimitrov2;~Maximilian_Baader1;~Mark_Niklas_Mueller2;~Martin_Vechev1", "aff": "University of Cambridge;Swiss Federal Institute of Technology;ETH Zurich;Swiss Federal Institute of Technology;Swiss Federal Institute of Technology", "aff_domain": "cam.ac.uk;ethz.ch;ethz.ch;ethz.ch;ethz.ch", "position": "MS student;PhD student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\npetrov2024dager,\ntitle={{DAGER}: Exact Gradient Inversion for Large Language Models},\nauthor={Ivo Petrov and Dimitar Iliev Dimitrov and Maximilian Baader and Mark Niklas Mueller and Martin Vechev},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=CrADAX7h23}\n}", "github": "", "reviewers": "ju5t;Ujeg;L4TG", "pdf_size": 831638, "rating": "6;7;7", "confidence": "4;3;2", "soundness": "3;4;4", "novelty": "3;4;3", "presentation": "2;3;3", "wc_summary": "111;38;63", "wc_strengths": "18;65;79", "wc_weaknesses": "178;91;66", "wc_questions": "56;3;164", "wc_limitations": "12;3;1", "wc_review": "375;200;373", "wc_reply_reviewers": "24;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;0;0", "reply_authors": "1;1;1", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 70.66666666666667, 30.291179500896884 ], "wc_strengths_avg": [ 54.0, 26.08958923913266 ], "wc_weaknesses_avg": [ 111.66666666666667, 48.00231475900117 ], "wc_questions_avg": [ 74.33333333333333, 66.99419543678559 ], "wc_limitations_avg": [ 5.333333333333333, 4.784233364802441 ], "wc_review_avg": [ 316.0, 82.02845034905064 ], "wc_reply_reviewers_avg": [ 8.0, 11.313708498984761 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16697623463705007552&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "cam.ac.uk;ethz.ch;ethz.ch;ethz.ch;ethz.ch", "author_num": 5, "aff_unique_index": "0;1;2;1;1", "aff_unique_norm": "University of Cambridge;Swiss Federal Institute of Technology;ETH Zurich", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cam.ac.uk;https://www.ethz.ch;https://www.ethz.ch", "aff_unique_abbr": "Cambridge;ETH Zurich;ETHZ", "aff_campus_unique_index": "0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;1;1;1;1", "aff_country_unique": "United Kingdom;Switzerland" }, { "title": "Fourier-enhanced Implicit Neural Fusion Network for Multispectral and Hyperspectral Image Fusion", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96117", "id": "CscowTrOP9", "proceeding": "", "pdf": "https://openreview.net/pdf?id=CscowTrOP9", "openreview": "https://openreview.net/forum?id=CscowTrOP9", "poster": "/media/PosterPDFs/NeurIPS%202024/96117.png?t=1731225762.9499254", "project": "", "author_site": "YuJie Liang, ZiHan Cao, Shangqi Deng, Hong-Xia Dou, Liang-Jian Deng", "tldr": "", "abstract": "Recently, implicit neural representations (INR) have made significant strides in various vision-related domains, providing a novel solution for Multispectral and Hyperspectral Image Fusion (MHIF) tasks. However, INR is prone to losing high-frequency information and is confined to the lack of global perceptual capabilities. To address these issues, this paper introduces a Fourier-enhanced Implicit Neural Fusion Network (FeINFN) specifically designed for MHIF task, targeting the following phenomena: The Fourier amplitudes of the HR-HSI latent code and LR-HSI are remarkably similar; however, their phases exhibit different patterns. In FeINFN, we innovatively propose a spatial and frequency implicit fusion function (Spa-Fre IFF), helping INR capture high-frequency information and expanding the receptive field. Besides, a new decoder employing a complex Gabor wavelet activation function, called Spatial-Frequency Interactive Decoder (SFID), is invented to enhance the interaction of INR features. Especially, we further theoretically prove that the Gabor wavelet activation possesses a time-frequency tightness property that favors learning the optimal bandwidths in the decoder. Experiments on two benchmark MHIF datasets verify the state-of-the-art (SOTA) performance of the proposed method, both visually and quantitatively. Also, ablation studies demonstrate the mentioned contributions. The code can be available at https://github.com/294coder/Efficient-MIF.", "keywords": "Image fusion;Implicit neural representation;Fourier representation", "primary_area": "machine_vision", "supplementary_material": "", "author": "Yujie Liang;Zihan Cao;Shangqi Deng;Hong-Xia Dou;Liang-Jian Deng", "authorids": "~Yujie_Liang1;~Zihan_Cao1;~Shangqi_Deng3;~Hong-Xia_Dou1;~Liang-Jian_Deng2", "gender": ";M;M;F;M", "homepage": ";https://294coder.github.io/;https://liangjiandeng.github.io/;;https://liangjiandeng.github.io/", "dblp": ";235/8988;;210/0039.html;136/7368", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=zh-CN;;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";0000-0002-7532-2122;;;", "linkedin": ";;;;", "or_profile": "~Yujie_Liang1;~Zihan_Cao1;~Shangqi_Deng3;~Hong-Xia_Dou1;~Liang-Jian_Deng2", "aff": ";University of Electronic Science and Technology of China;University of Electronic Science and Technology of China;Xihua University;University of Electronic Science and Technology of China", "aff_domain": ";uestc.edu.cn;uestc.edu.cn;xhu.edu.cn;uestc.edu.cn", "position": ";MS student;MS student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nliang2024fourierenhanced,\ntitle={Fourier-enhanced Implicit Neural Fusion Network for Multispectral and Hyperspectral Image Fusion},\nauthor={Yujie Liang and Zihan Cao and Shangqi Deng and Hong-Xia Dou and Liang-Jian Deng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=CscowTrOP9}\n}", "github": "", "reviewers": "2Qbn;EUje;EFSC;8xGR", "pdf_size": 29737538, "rating": "6;6;7;7", "confidence": "4;5;5;4", "soundness": "3;3;4;4", "novelty": "3;3;4;4", "presentation": "3;3;3;3", "wc_summary": "75;29;66;75", "wc_strengths": "68;71;78;76", "wc_weaknesses": "92;76;97;175", "wc_questions": "145;16;136;2", "wc_limitations": "23;5;9;1", "wc_review": "403;197;386;329", "wc_reply_reviewers": "0;14;14;15", "wc_reply_authors": "0;0;85;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;2;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 61.25, 18.978606376654742 ], "wc_strengths_avg": [ 73.25, 3.960744879438715 ], "wc_weaknesses_avg": [ 110.0, 38.32101251271944 ], "wc_questions_avg": [ 74.75, 66.01278285302021 ], "wc_limitations_avg": [ 9.5, 8.2915619758885 ], "wc_review_avg": [ 328.75, 80.85287564459287 ], "wc_reply_reviewers_avg": [ 10.75, 6.219927652312364 ], "wc_reply_authors_avg": [ 21.25, 36.80607966083864 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8659118976271460921&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": ";uestc.edu.cn;uestc.edu.cn;xhu.edu.cn;uestc.edu.cn", "author_num": 5, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Electronic Science and Technology of China;Xihua University", "aff_unique_dep": ";", "aff_unique_url": "https://www.uestc.edu.cn;http://www.xihua.edu.cn", "aff_unique_abbr": "UESTC;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "Cth1PyCwZt", "title": "Beyond accuracy: understanding the performance of LLMs on exams designed for humans", "track": "main", "status": "Reject", "tldr": "", "abstract": "Many recent studies of LLM performance have focused on the ability of LLMs to achieve outcomes comparable to humans on academic and professional exams. However, it is not clear whether such studies shed light on the extent to which models show reasoning ability, and there is controversy about the significance and implications of such results. We seek to look more deeply into the question of how and whether the performance of LLMs on exams designed for humans reflects true aptitude inherent in LLMs. We do so by making use of the tools of psychometrics which are designed to perform meaningful measurement in test taking. We leverage a unique dataset that captures the detailed performance of over 5M students across 8 college-entrance exams given over a span of two years in Brazil. With respect to the evaluation of LLM abilities, we show that the tools of Item Response Theory (IRT) provide a more informative evaluation of model performance than the usual accuracy metrics employed in previous studies. Digging deeper, we show that the modeling framework of IRT, by explicitly modeling the difficulty levels of questions, allows us to quantitatively distinguish between LLMs that answer questions in \u201chuman-like\u201d patterns versus LLMs that do not. We also show how to quantitatively identify cases in which exam results are not reliable measurements of an LLM's ability. Using the tools of IRT we can also identify specific questions that appear to be either much easier, or much harder, for machines than for humans, and we give some reasons for those differences. Overall, our study shows that the conventional focus on accuracy as the primary performance metric for LLM studies does not allow us to deeply understand the true capabilities of LLMs and compare them to that of humans. Thus, we claim that psychometric modeling should play a larger role in the evaluation of LLM capabilities on exams designed for humans.", "keywords": "large language models;model evaluation;psychometrics", "primary_area": "evaluation", "supplementary_material": "/attachment/920e5198a1db88769a5b32f989799cbd7f778265.zip", "author": "Pedro Calais;Gabriel Franco;Themistoklis Nikas;Zilu Tang;Mark Crovella;Wagner Meira Jr.;Evimaria Terzi", "authorids": "~Pedro_Calais1;~Gabriel_Franco1;~Themistoklis_Nikas1;~Zilu_Tang1;~Mark_Crovella1;~Wagner_Meira_Jr.1;~Evimaria_Terzi1", "gender": "M;M;M;M;M;;M", "homepage": "https://cs-people.bu.edu/gvfranco/;;https://pootiet.github.io/;https://www.cs.bu.edu/faculty/crovella;http://www.dcc.ufmg.br/~meira/;https://cs-people.bu.edu/evimaria/;http://www.dcc.ufmg.br/~pcalais", "dblp": "255/9604;;266/2889;c/MarkCrovella;m/WagnerMeiraJr;t/EvimariaTerzi;21/9989", "google_scholar": "Ls46A88AAAAJ;;E9g28XEAAAAJ;vrHhOlUAAAAJ;https://scholar.google.com.br/citations?user=oad6_oEAAAAJ;j6KFCRAAAAAJ;_ORnu0sAAAAJ", "orcid": ";;;0000-0002-5005-7019;0000-0002-2614-2723;;", "linkedin": ";tnikas/;peter-tang-83802495/;;wagner-meira-jr-3b83a0/;evimaria-terzi-4a621a2/;pedrohcalais/", "or_profile": "~Gabriel_Franco1;~Themistoklis_Nikas1;~Zilu_Tang1;~Mark_Crovella1;~Wagner_Meira_Jr.1;~Evimaria_Terzi1;~Pedro_Henrique_Calais_Guerra1", "aff": "Boston University;;Boston University, Boston University;Boston University;Universidade Federal de Minas Gerais, Universidade Federal de Minas Gerais;Boston University, Boston University;IBMEC", "aff_domain": "bu.edu;;bu.edu;bu.edu;dcc.ufmg.br;bu.edu;ibmec.edu.br", "position": "PhD student;;PhD student;Full Professor;Full Professor;Full Professor;Associate Professor", "bibtex": "@misc{\nanonymous2024beyond,\ntitle={Beyond accuracy: understanding the performance of {LLM}s on exams designed for humans},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=Cth1PyCwZt}\n}", "github": "", "project": "", "reviewers": "KhP9;Wmva;rYgY;9Juf", "site": "https://openreview.net/forum?id=Cth1PyCwZt", "pdf_size": 8172034, "rating": "6;6;7;8", "confidence": "4;4;2;3", "soundness": "3;4;3;4", "novelty": "3;4;3;4", "presentation": "3;3;4;4", "wc_summary": "93;22;38;138", "wc_strengths": "52;244;69;96", "wc_weaknesses": "124;329;70;84", "wc_questions": "81;71;42;46", "wc_limitations": "24;1;77;31", "wc_review": "374;667;296;395", "wc_reply_reviewers": "84;0;27;6", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 72.75, 45.963980462966866 ], "wc_strengths_avg": [ 115.25, 75.97162299174607 ], "wc_weaknesses_avg": [ 151.75, 104.23621011913278 ], "wc_questions_avg": [ 60.0, 16.446884203398525 ], "wc_limitations_avg": [ 33.25, 27.589626673806226 ], "wc_review_avg": [ 433.0, 140.04463574160917 ], "wc_reply_reviewers_avg": [ 29.25, 33.16153645415122 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.6363636363636364, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13206261957804907689&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0;0;1;0;2", "aff_unique_norm": "Boston University;Universidade Federal de Minas Gerais;Instituto de Bioci\u00eancias, Medicina e Epidemiologia", "aff_unique_dep": ";;", "aff_unique_url": "https://www.bu.edu;https://www.ufmg.br;", "aff_unique_abbr": "BU;UFMG;IBMEC", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Boston", "aff_country_unique_index": "0;0;0;1;0;1", "aff_country_unique": "United States;Brazil" }, { "title": "Large Language Models-guided Dynamic Adaptation for Temporal Knowledge Graph Reasoning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96116", "id": "Cw7Agrr8GJ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Cw7Agrr8GJ", "openreview": "https://openreview.net/forum?id=Cw7Agrr8GJ", "poster": "/media/PosterPDFs/NeurIPS%202024/96116.png?t=1731414225.7513013", "project": "", "author_site": "Jiapu Wang, Sun Kai, LINHAO LUO, Wei Wei, Yongli Hu, Alan Wee-Chung Liew, Shirui Pan, Baocai Yin", "tldr": "", "abstract": "Temporal Knowledge Graph Reasoning (TKGR) is the process of utilizing temporal information to capture complex relations within a Temporal Knowledge Graph (TKG) to infer new knowledge. Conventional methods in TKGR typically depend on deep learning algorithms or temporal logical rules. However, deep learning-based TKGRs often lack interpretability, whereas rule-based TKGRs struggle to effectively learn temporal rules that capture temporal patterns. Recently, Large Language Models (LLMs) have demonstrated extensive knowledge and remarkable proficiency in temporal reasoning. Consequently, the employment of LLMs for Temporal Knowledge Graph Reasoning (TKGR) has sparked increasing interest among researchers. Nonetheless, LLMs are known to function as black boxes, making it challenging to comprehend their reasoning process. Additionally, due to the resource-intensive nature of fine-tuning, promptly updating LLMs to integrate evolving knowledge within TKGs for reasoning is impractical. To address these challenges, in this paper, we propose a Large Language Models-guided Dynamic Adaptation (LLM-DA) method for reasoning on TKGs. Specifically, LLM-DA harnesses the capabilities of LLMs to analyze historical data and extract temporal logical rules. These rules unveil temporal patterns and facilitate interpretable reasoning. To account for the evolving nature of TKGs, a dynamic adaptation strategy is proposed to update the LLM-generated rules with the latest events. This ensures that the extracted rules always incorporate the most recent knowledge and better generalize to the predictions on future events. Experimental results show that without the need of fine-tuning, LLM-DA significantly improves the accuracy of reasoning over several common datasets, providing a robust framework for TKGR tasks.", "keywords": "Large Language Model;Temporal Knowledge Graph;Knowledge Graph Reasoning", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Jiapu Wang;Kai Sun;LINHAO LUO;Wei Wei;Yongli Hu;Alan Wee-Chung Liew;Shirui Pan;Baocai Yin", "authorids": "~Jiapu_Wang1;~Kai_Sun13;~LINHAO_LUO1;~Wei_Wei20;~Yongli_Hu1;~Alan_Wee-Chung_Liew1;~Shirui_Pan1;~Baocai_Yin1", "gender": "M;;M;;M;M;;M", "homepage": ";;https://rmanluo.github.io/;;;https://experts.griffith.edu.au/7401-alan-weechung-liew;;https://www.bjut.edu.cn/info/1059/1568.htm", "dblp": "343/7016;;251/5530;;72/4503;76/2013.html;91/8171;", "google_scholar": "k8YPCRgAAAAJ;;https://scholar.google.com.hk/citations?user=RO46HpcAAAAJ;;qeG9e2AAAAAJ;https://scholar.google.com.au/citations?hl=en;https://scholar.google.com.au/citations?user=frWRJN4AAAAJ;", "orcid": "0000-0001-7639-5289;;0000-0003-0027-942X;;;0000-0001-6718-7584;0000-0003-0794-527X;0000-0003-3121-1823", "linkedin": ";;linhao-luo-36b489134/;;;alan-liew-0214a138/;;", "or_profile": "~Jiapu_Wang1;~Kai_Sun13;~LINHAO_LUO1;~Wei_Wei20;~Yongli_Hu1;~Alan_Wee-Chung_Liew1;~Shirui_Pan1;~Baocai_Yin1", "aff": "Beijing University of Technology;;Monash University;;Beijing University of Technology;Griffith University;Griffith University;Beijing University of Technology", "aff_domain": "bjut.edu;;monash.edu;;bjut.edu.cn;griffith.edu.au;griffith.edu.au;bjut.edu.cn", "position": "PhD student;;PhD student;;Full Professor;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nwang2024large,\ntitle={Large Language Models-guided Dynamic Adaptation for Temporal Knowledge Graph Reasoning},\nauthor={Jiapu Wang and Kai Sun and LINHAO LUO and Wei Wei and Yongli Hu and Alan Wee-Chung Liew and Shirui Pan and Baocai Yin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Cw7Agrr8GJ}\n}", "github": "", "reviewers": "CXWD;9wEQ;thzY", "pdf_size": 2907240, "rating": "5;6;7", "confidence": "4;3;5", "soundness": "2;3;4", "novelty": "3;3;3", "presentation": "2;3;4", "wc_summary": "34;82;59", "wc_strengths": "42;115;57", "wc_weaknesses": "41;95;52", "wc_questions": "3;9;50", "wc_limitations": "1;18;90", "wc_review": "121;319;308", "wc_reply_reviewers": "0;27;21", "wc_reply_authors": "23;56;14", "reply_reviewers": "0;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 58.333333333333336, 19.601587237318874 ], "wc_strengths_avg": [ 71.33333333333333, 31.47838764754143 ], "wc_weaknesses_avg": [ 62.666666666666664, 23.299976156401726 ], "wc_questions_avg": [ 20.666666666666668, 20.885933597094056 ], "wc_limitations_avg": [ 36.333333333333336, 38.57748335781148 ], "wc_review_avg": [ 249.33333333333334, 90.85641908466836 ], "wc_reply_reviewers_avg": [ 16.0, 11.575836902790225 ], "wc_reply_authors_avg": [ 31.0, 18.05547008526779 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4013740519549578841&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "bjut.edu;;monash.edu;;bjut.edu.cn;griffith.edu.au;griffith.edu.au;bjut.edu.cn", "author_num": 8, "aff_unique_index": "0;1;0;2;2;0", "aff_unique_norm": "Beijing University of Technology;Monash University;Griffith University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.bjut.edu.cn;https://www.monash.edu;https://www.griffith.edu.au", "aff_unique_abbr": "BJUT;Monash;Griffith", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1;1;0", "aff_country_unique": "China;Australia" }, { "title": "Plan-on-Graph: Self-Correcting Adaptive Planning of Large Language Model on Knowledge Graphs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96115", "id": "CwCUEr6wO5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=CwCUEr6wO5", "openreview": "https://openreview.net/forum?id=CwCUEr6wO5", "poster": "/media/PosterPDFs/NeurIPS%202024/96115.png?t=1733391213.4131234", "project": "", "author_site": "Liyi Chen, Panrong Tong, Zhongming Jin, Ying Sun, Jieping Ye, Hui Xiong", "tldr": "", "abstract": "Large Language Models (LLMs) have shown remarkable reasoning capabilities on complex tasks, but they still suffer from out-of-date knowledge, hallucinations, and opaque decision-making. In contrast, Knowledge Graphs (KGs) can provide explicit and editable knowledge for LLMs to alleviate these issues. Existing paradigm of KG-augmented LLM manually predefines the breadth of exploration space and requires flawless navigation in KGs. However, this paradigm cannot adaptively explore reasoning paths in KGs based on the question semantics and self-correct erroneous reasoning paths, resulting in a bottleneck in efficiency and effect. To address these limitations, we propose a novel self-correcting adaptive planning paradigm for KG-augmented LLM named Plan-on-Graph (PoG), which first decomposes the question into several sub-objectives and then repeats the process of adaptively exploring reasoning paths, updating memory, and reflecting on the need to self-correct erroneous reasoning paths until arriving at the answer. Specifically, three important mechanisms of Guidance, Memory, and Reflection are designed to work together, to guarantee the adaptive breadth of self-correcting planning for graph reasoning. Finally, extensive experiments on three real-world datasets demonstrate the effectiveness and efficiency of PoG.", "keywords": "Large Language Model;KG-Augmented LLM;LLM Explainability", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Liyi Chen;Panrong Tong;Zhongming Jin;Ying Sun;Jieping Ye;Hui Xiong", "authorids": "~Liyi_Chen3;~Panrong_Tong1;~Zhongming_Jin1;~Ying_Sun4;~Jieping_Ye4;~Hui_Xiong1", "gender": "F;M;M;F;M;M", "homepage": "http://home.ustc.edu.cn/~liyichen/;https://www.linkedin.com/in/panrong-tong-3695b4116/;https://sites.google.com/site/zjuzhongmingjin/;https://sunyinggilly.github.io;http://yelabs.net/;https://www.hkust-gz.edu.cn/people/hui-xiong/", "dblp": ";223/6918;;10/5415-6.html;03/5454;262/1686-1.html", "google_scholar": "J7JWDwkAAAAJ;;fOC90nQAAAAJ;;T9AzhwcAAAAJ;cVDF1tkAAAAJ", "orcid": "0000-0003-2166-4386;0000-0003-3046-5143;;0000-0002-4763-6060;0000-0001-8662-5818;0000-0001-6016-6465", "linkedin": ";;;;;", "or_profile": "~Liyi_Chen3;~Panrong_Tong1;~Zhongming_Jin1;~Ying_Sun4;~Jieping_Ye4;~Hui_Xiong1", "aff": "University of Science and Technology of China;Alibaba Cloud;Alibaba Cloud Computing;Hong Kong University of Science and Technology (Guangzhou);Alibaba Group;Hong Kong University of Science and Technology (Guangzhou)", "aff_domain": "ustc.edu.cn;alibaba-inc.com;alibaba-inc.com;hkust-gz.edu.cn;alibaba-inc.com;hkust.edu", "position": "PhD student;Researcher;Researcher;Assistant Professor;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nchen2024planongraph,\ntitle={Plan-on-Graph: Self-Correcting Adaptive Planning of Large Language Model on Knowledge Graphs},\nauthor={Liyi Chen and Panrong Tong and Zhongming Jin and Ying Sun and Jieping Ye and Hui Xiong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=CwCUEr6wO5}\n}", "github": "", "reviewers": "vd15;QT9p;kA7J;Gwnc;uH9F", "pdf_size": 1090989, "rating": "5;5;5;6;6", "confidence": "4;4;4;4;4", "soundness": "3;3;2;3;3", "novelty": "2;2;3;2;3", "presentation": "4;3;2;2;3", "wc_summary": "35;38;82;72;117", "wc_strengths": "53;37;77;72;41", "wc_weaknesses": "75;192;261;78;122", "wc_questions": "36;42;9;22;106", "wc_limitations": "7;14;16;3;1", "wc_review": "206;323;445;247;387", "wc_reply_reviewers": "26;25;105;15;26", "wc_reply_authors": "22;22;125;21;22", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;3;2;2", "rating_avg": [ 5.4, 0.48989794855663565 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 68.8, 30.327545235313725 ], "wc_strengths_avg": [ 56.0, 16.074825037928093 ], "wc_weaknesses_avg": [ 145.6, 71.5278966557804 ], "wc_questions_avg": [ 43.0, 33.51417610504546 ], "wc_limitations_avg": [ 8.2, 5.912698199637793 ], "wc_review_avg": [ 321.6, 87.67576632114486 ], "wc_reply_reviewers_avg": [ 39.4, 33.06115545470242 ], "wc_reply_authors_avg": [ 42.4, 41.301815940706526 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2667826903821217588&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "ustc.edu.cn;alibaba-inc.com;alibaba-inc.com;hkust-gz.edu.cn;alibaba-inc.com;hkust.edu", "author_num": 6, "aff_unique_index": "0;1;2;3;2;3", "aff_unique_norm": "University of Science and Technology of China;Alibaba Cloud;Alibaba Group;Hong Kong University of Science and Technology", "aff_unique_dep": ";;Cloud Computing;", "aff_unique_url": "http://www.ustc.edu.cn;https://www.alibabacloud.com;https://www.alibabacloud.com;https://www.ust.hk", "aff_unique_abbr": "USTC;Alibaba Cloud;Alibaba Cloud;HKUST", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Simplifying Latent Dynamics with Softly State-Invariant World Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96114", "id": "CwNevJONgq", "proceeding": "", "pdf": "https://openreview.net/pdf?id=CwNevJONgq", "openreview": "https://openreview.net/forum?id=CwNevJONgq", "poster": "", "project": "", "author_site": "Tankred Saanum, Peter Dayan, Eric Schulz", "tldr": "", "abstract": "To solve control problems via model-based reasoning or planning, an agent needs to know how its actions affect the state of the world. The actions an agent has at its disposal often change the state of the environment in systematic ways. However, existing techniques for world modelling do not guarantee that the effect of actions are represented in such systematic ways. We introduce the Parsimonious Latent Space Model (PLSM), a world model that regularizes the latent dynamics to make the effect of the agent's actions more predictable. Our approach minimizes the mutual information between latent states and the change that an action produces in the agent's latent state, in turn minimizing the dependence the state has on the dynamics. This makes the world model softly state-invariant. We combine PLSM with different model classes used for i) future latent state prediction, ii) planning, and iii) model-free reinforcement learning. We find that our regularization improves accuracy, generalization, and performance in downstream tasks, highlighting the importance of systematic treatment of actions in world models.", "keywords": "World model;latent dynamics;reinforcement learning;compression", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Tankred Saanum;Peter Dayan;Eric Schulz", "authorids": "~Tankred_Saanum1;~Peter_Dayan1;~Eric_Schulz1", "gender": "M;;M", "homepage": ";https://www.mpg.de/12309370/biological-cybernetics-dayan;https://cpilab.org", "dblp": ";22/522;124/0016", "google_scholar": "https://scholar.google.com/citations?hl=no;;", "orcid": ";0000-0003-3476-1839;", "linkedin": ";;", "or_profile": "~Tankred_Saanum1;~Peter_Dayan1;~Eric_Schulz1", "aff": "Max Planck Institute for Biological Cybernetics, Max-Planck Institute;Max-Planck Institute;Max Planck Institute for Biological Cybernetics", "aff_domain": "tuebingen.mpg.de;mpg.de;tuebingen.mpg.de", "position": "PhD student;Professor;Assistant Professor", "bibtex": "@inproceedings{\nsaanum2024simplifying,\ntitle={Simplifying Latent Dynamics with Softly State-Invariant World Models},\nauthor={Tankred Saanum and Peter Dayan and Eric Schulz},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=CwNevJONgq}\n}", "github": "", "reviewers": "hbwZ;gEcM;VZsj;itzz;PXJB", "pdf_size": 2276588, "rating": "5;5;6;6;7", "confidence": "4;4;4;4;3", "soundness": "2;3;3;3;3", "novelty": "2;2;3;3;3", "presentation": "3;2;3;3;3", "wc_summary": "66;97;84;93;91", "wc_strengths": "26;46;63;30;75", "wc_weaknesses": "65;342;307;41;84", "wc_questions": "25;119;6;83;22", "wc_limitations": "1;12;9;8;1", "wc_review": "183;616;469;255;273", "wc_reply_reviewers": "21;170;407;193;32", "wc_reply_authors": "34;37;571;181;29", "reply_reviewers": "1;1;2;1;1", "reply_authors": "2;2;3;2;2", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 86.2, 10.943491216243563 ], "wc_strengths_avg": [ 48.0, 18.793615937333612 ], "wc_weaknesses_avg": [ 167.8, 129.14395069069244 ], "wc_questions_avg": [ 51.0, 42.871902220452036 ], "wc_limitations_avg": [ 6.2, 4.445222154178574 ], "wc_review_avg": [ 359.2, 159.58496169752337 ], "wc_reply_reviewers_avg": [ 164.6, 139.8550678381016 ], "wc_reply_authors_avg": [ 170.4, 208.3205222727708 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8017837257372731, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ikCQMaQHupYJ:scholar.google.com/&scioq=Simplifying+Latent+Dynamics+with+Softly+State-Invariant+World+Models&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": "tuebingen.mpg.de;mpg.de;tuebingen.mpg.de", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Max Planck Institute for Biological Cybernetics;Max-Planck-Gesellschaft zur F\u00f6rderung der Wissenschaften e.V.", "aff_unique_dep": "Biological Cybernetics;", "aff_unique_url": "https://www.biological-cybernetics.de;https://www.mpg.de", "aff_unique_abbr": "MPIBC;MPG", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "title": "TAPTRv2: Attention-based Position Update Improves Tracking Any Point", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96113", "id": "Cx2O6Xz03H", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Cx2O6Xz03H", "openreview": "https://openreview.net/forum?id=Cx2O6Xz03H", "poster": "/media/PosterPDFs/NeurIPS%202024/96113.png?t=1731616612.51025", "project": "", "author_site": "Hongyang Li, Hao Zhang, Shilong Liu, Zhaoyang Zeng, Feng Li, Bohan Li, Tianhe Ren, Lei Zhang", "tldr": "", "abstract": "In this paper, we present TAPTRv2, a Transformer-based approach built upon TAPTR for solving the Tracking Any Point (TAP) task. TAPTR borrows designs from DEtection TRansformer (DETR) and formulates each tracking point as a point query, making it possible to leverage well-studied operations in DETR-like algorithms. TAPTRv2 improves TAPTR by addressing a critical issue regarding its reliance on cost-volume, which contaminates the point query\u2019s content feature and negatively impacts both visibility prediction and cost-volume computation. In TAPTRv2, we propose a novel attention-based position update (APU) operation and use key-aware deformable attention to realize. For each query, this operation uses key-aware attention weights to combine their corresponding deformable sampling positions to predict a new query position. This design is based on the observation that local attention is essentially the same as cost-volume, both of which are computed by dot-production between a query and its surrounding features. By introducing this new operation, TAPTRv2 not only removes the extra burden of cost-volume computation, but also leads to a substantial performance improvement. TAPTRv2 surpasses TAPTR and achieves state-of-the-art performance on many challenging datasets, demonstrating the effectiveness of our approach.", "keywords": "Tracking Any Point; 2D Detection; Attention", "primary_area": "machine_vision", "supplementary_material": "/attachment/f93ca11ea33aa7163e12af1255e936f64b54b9a3.zip", "author": "Hongyang Li;Hao Zhang;Shilong Liu;Zhaoyang Zeng;Feng Li;Bohan Li;Tianhe Ren;Lei Zhang", "authorids": "~Hongyang_Li6;~Hao_Zhang39;~Shilong_Liu1;~Zhaoyang_Zeng1;~Feng_Li9;~Bohan_Li6;~Tianhe_Ren1;~Lei_Zhang23", "gender": "M;M;M;M;M;;M;M", "homepage": "https://haozhang534.github.io/;https://www.lsl.zone;;https://fengli-ust.github.io/;https://scholar.google.com/citations?user=V-YdQiAAAAAJ&hl=zh-CN;https://rentainhe.github.io/;https://lhy-hongyangli.github.io;https://www.leizhang.org/", "dblp": "55/2270-97;;;92/2954-40.html;;;;z/LeiZhang", "google_scholar": "B8hPxMQAAAAJ;nkSVY3MAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;V-YdQiAAAAAJ;cW4ILs0AAAAJ;https://scholar.google.com.hk/citations?user=zdgHNmkAAAAJ;fIlGZToAAAAJ", "orcid": ";;;;0000-0002-6959-7517;;0000-0002-2295-322X;", "linkedin": "hao-zhang-3b09b8196/;;%E5%85%86%E9%98%B3-%E6%9B%BE-1a505291/;;;;;", "or_profile": "~Hao_Zhang39;~Shilong_Liu1;~Zhaoyang_Zeng1;~Feng_Li9;~Bohan_Li6;~Tianhe_Ren1;~Wangyeung_Lei2;~Lei_Zhang1", "aff": "Hong Kong University of Science and Technology;NVIDIA;International Digital Economy Academy, International Digital Economy Academy;Hong Kong University of Science and Technology;Shanghai Jiaotong University;The International Digital Economy Academy;South China University of Technology;International Digital Economy Academy", "aff_domain": "ust.hk;nvidia.com;idea.edu.cn;ust.hk;sjtu.edu.cn;idea.edu.cn;scut.edu.cn;idea.edu.cn", "position": "PhD student;Research Intern;Researcher;PhD student;PhD student;Researcher;PhD student;Chief Scientist", "bibtex": "@inproceedings{\nli2024taptrv,\ntitle={{TAPTR}v2: Attention-based Position Update Improves Tracking Any Point},\nauthor={Hongyang Li and Hao Zhang and Shilong Liu and Zhaoyang Zeng and Feng Li and Bohan Li and Tianhe Ren and Lei Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Cx2O6Xz03H}\n}", "github": "", "reviewers": "zD8r;zGVx;wowB;yaYP", "pdf_size": 8717002, "rating": "5;5;6;6", "confidence": "4;3;4;4", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "3;2;3;3", "wc_summary": "60;101;56;84", "wc_strengths": "88;59;93;50", "wc_weaknesses": "88;80;366;127", "wc_questions": "2;33;26;6", "wc_limitations": "35;11;1;1", "wc_review": "273;284;542;268", "wc_reply_reviewers": "28;99;132;0", "wc_reply_authors": "0;633;308;52", "reply_reviewers": "1;2;2;0", "reply_authors": "1;3;4;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 75.25, 18.32177666057525 ], "wc_strengths_avg": [ 72.5, 18.364367672206956 ], "wc_weaknesses_avg": [ 165.25, 117.25906148353738 ], "wc_questions_avg": [ 16.75, 13.06474263045392 ], "wc_limitations_avg": [ 12.0, 13.892443989449804 ], "wc_review_avg": [ 341.75, 115.7591789017182 ], "wc_reply_reviewers_avg": [ 64.75, 53.00648545225386 ], "wc_reply_authors_avg": [ 248.25, 250.87085821194935 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5873617400168452062&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "ust.hk;nvidia.com;idea.edu.cn;ust.hk;sjtu.edu.cn;idea.edu.cn;scut.edu.cn;idea.edu.cn", "author_num": 8, "aff_unique_index": "0;1;2;0;3;2;4;2", "aff_unique_norm": "Hong Kong University of Science and Technology;NVIDIA;International Digital Economy Academy;Shanghai Jiao Tong University;South China University of Technology", "aff_unique_dep": ";NVIDIA Corporation;;;", "aff_unique_url": "https://www.ust.hk;https://www.nvidia.com;;https://www.sjtu.edu.cn;https://www.scut.edu.cn", "aff_unique_abbr": "HKUST;NVIDIA;;SJTU;SCUT", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;1;0;0;3;0", "aff_country_unique": "China;United States;;Unknown" }, { "title": "PrivacyLens: Evaluating Privacy Norm Awareness of Language Models in Action", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97810", "id": "CxNXoMnCKc", "proceeding": "", "pdf": "https://openreview.net/pdf?id=CxNXoMnCKc", "openreview": "https://openreview.net/forum?id=CxNXoMnCKc", "poster": "/media/PosterPDFs/NeurIPS%202024/97810.png?t=1731661863.9042578", "project": "", "author_site": "Yijia Shao, Tianshi Li, Weiyan Shi, Yanchen Liu, Diyi Yang", "tldr": "", "abstract": "As language models (LMs) are widely utilized in personalized communication scenarios (e.g., sending emails, writing social media posts) and endowed with a certain level of agency, ensuring they act in accordance with the contextual privacy norms becomes increasingly critical. However, quantifying the privacy norm awareness of LMs and the emerging privacy risk in LM-mediated communication is challenging due to (1) the contextual and long-tailed nature of privacy-sensitive cases, and (2) the lack of evaluation approaches that capture realistic application scenarios. To address these challenges, we propose PrivacyLens, a novel framework designed to extend privacy-sensitive seeds into expressive vignettes and further into agent trajectories, enabling multi-level evaluation of privacy leakage in LM agents' actions. We instantiate PrivacyLens with a collection of privacy norms grounded in privacy literature and crowdsourced seeds. Using this dataset, we reveal a discrepancy between LM performance in answering probing questions and their actual behavior when executing user instructions in an agent setup. State-of-the-art LMs, like GPT-4 and Llama-3-70B, leak sensitive information in 25.68% and 38.69% of cases, even when prompted with privacy-enhancing instructions. We also demonstrate the dynamic nature of PrivacyLens by extending each seed into multiple trajectories to red-team LM privacy leakage risk. Dataset and code are available at https://github.com/SALT-NLP/PrivacyLens.", "keywords": "Privacy;Contextual Integrity;Language Model;Language Model Agent", "primary_area": "", "supplementary_material": "/attachment/0ee969067827566656711bff4f1ae9c21d0ee5ab.zip", "author": "Yijia Shao;Tianshi Li;Weiyan Shi;Yanchen Liu;Diyi Yang", "authorids": "~Yijia_Shao1;~Tianshi_Li2;~Weiyan_Shi2;~Yanchen_Liu2;~Diyi_Yang2", "gender": "F;;F;M;F", "homepage": "https://cs.stanford.edu/~shaoyj/;https://tianshili.me;https://sites.google.com/ucdavis.edu/wyshi/;https://liuyanchen1015.github.io/;https://cs.stanford.edu/~diyiy/", "dblp": "329/4063;;218/5722;;70/11145", "google_scholar": "H0zcQh4AAAAJ;FzBd1YYAAAAJ;xj666rUAAAAJ;https://scholar.google.com/citations?hl=en;j9jhYqQAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Yijia_Shao1;~Tianshi_Li2;~Weiyan_Shi2;~Yanchen_Liu2;~Diyi_Yang2", "aff": "Computer Science Department, Stanford University;Northeastern University;Stanford University;Harvard University;Stanford University", "aff_domain": "cs.stanford.edu;northeastern.edu;stanford.edu;harvard.edu;stanford.edu", "position": "PhD student;Assistant Professor;Postdoc;MS student;Assistant Professor", "bibtex": "@inproceedings{\nshao2024privacylens,\ntitle={PrivacyLens: Evaluating Privacy Norm Awareness of Language Models in Action},\nauthor={Yijia Shao and Tianshi Li and Weiyan Shi and Yanchen Liu and Diyi Yang},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=CxNXoMnCKc}\n}", "github": "", "reviewers": "8LHB;xcbX;fpGk;QdSz", "pdf_size": 3718772, "rating": "7;7;7;8", "confidence": "5;4;4;5", "wc_summary_and_contributions": "137;90;101;130", "wc_strengths": "160;3;34;30", "wc_improvement": "445;134;1;153", "wc_limitations": "86;52;1;9", "wc_correctness": "60;15;16;23", "wc_clarity": "8;17;5;6", "wc_relation_to_prior_work": "19;3;1;11", "wc_documentation": "29;51;1;14", "wc_additional_feedback": "1;1;1;1", "wc_review": "945;366;161;377", "wc_reply_reviewers": "152;31;0;28", "wc_reply_authors": "106;78;0;52", "reply_reviewers": "1;1;0;1", "reply_authors": "3;4;1;4", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 4.5, 0.5 ], "wc_summary_and_contributions_avg": [ 114.5, 19.551214796017153 ], "wc_strengths_avg": [ 56.75, 60.79216643614537 ], "wc_improvement_avg": [ 183.25, 162.0715505571536 ], "wc_limitations_avg": [ 37.0, 34.30014577228499 ], "wc_correctness_avg": [ 28.5, 18.445866745696716 ], "wc_clarity_avg": [ 9.0, 4.743416490252569 ], "wc_relation_to_prior_work_avg": [ 8.5, 7.123903424387503 ], "wc_documentation_avg": [ 23.75, 18.59267328815305 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 462.25, 291.6893681641482 ], "wc_reply_reviewers_avg": [ 52.75, 58.56353387561239 ], "wc_reply_authors_avg": [ 59.0, 39.05124837953327 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.0, 1.224744871391589 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3647051004482615199&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "cs.stanford.edu;northeastern.edu;stanford.edu;harvard.edu;stanford.edu", "author_num": 5, "aff_unique_index": "0;1;0;2;0", "aff_unique_norm": "Stanford University;Northeastern University;Harvard University", "aff_unique_dep": "Computer Science Department;;", "aff_unique_url": "https://www.stanford.edu;https://www.northeastern.edu;https://www.harvard.edu", "aff_unique_abbr": "Stanford;NEU;Harvard", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Spike-based Neuromorphic Model for Sound Source Localization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96112", "id": "CyCDqnrymT", "proceeding": "", "pdf": "https://openreview.net/pdf?id=CyCDqnrymT", "openreview": "https://openreview.net/forum?id=CyCDqnrymT", "poster": "/media/PosterPDFs/NeurIPS%202024/96112.png?t=1731575965.1126058", "project": "", "author_site": "Dehao Zhang, Shuai Wang, Ammar Belatreche, Wenjie Wei, Yichen Xiao, Haorui Zheng, Zijian Zhou, Malu Zhang, Yang Yang", "tldr": "", "abstract": "Biological systems possess remarkable sound source localization (SSL) capabilities that are critical for survival in complex environments. This ability arises from the collaboration between the auditory periphery, which encodes sound as precisely timed spikes, and the auditory cortex, which performs spike-based computations. Inspired by these biological mechanisms, we propose a novel neuromorphic SSL framework that integrates spike-based neural encoding and computation. The framework employs Resonate-and-Fire (RF) neurons with a phase-locking coding (RF-PLC) method to achieve energy-efficient audio processing. The RF-PLC method leverages the resonance properties of RF neurons to efficiently convert audio signals to time-frequency representation and encode interaural time difference (ITD) cues into discriminative spike patterns. In addition, biological adaptations like frequency band selectivity and short-term memory effectively filter out many environmental noises, enhancing SSL capabilities in real-world settings. Inspired by these adaptations, we propose a spike-driven multi-auditory attention (MAA) module that significantly improves both the accuracy and robustness of the proposed SSL framework. Extensive experimentation demonstrates that our SSL framework achieves state-of-the-art accuracy in SSL tasks. Furthermore, it shows exceptional noise robustness and maintains high accuracy even at very low signal-to-noise ratios. By mimicking biological hearing, this neuromorphic approach contributes to the development of high-performance and explainable artificial intelligence systems capable of superior performance in real-world environments.", "keywords": "Spiking Neural Networks;Resonate-and-Fire Neurons;Bio-inspired Neuromorphic System;Sound Source Localization", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "/attachment/93c0c62a99a8c660ebcb32ab19af9eddd699b967.zip", "author": "Dehao Zhang;Shuai Wang;Ammar Belatreche;Wenjie Wei;Yichen Xiao;Haorui Zheng;Zijian Zhou;Malu Zhang;Yang Yang", "authorids": "~Dehao_Zhang2;~Shuai_Wang23;~Ammar_Belatreche1;~Wenjie_Wei1;~Yichen_Xiao1;~Haorui_Zheng1;~Zijian_Zhou5;~Malu_Zhang1;~Yang_Yang37", "gender": "M;M;M;;;;M;M;M", "homepage": "https://github.com/i-spoon;https://www.uestc.edu.cn/;https://www.northumbria.ac.uk/about-us/our-staff/b/ammar-belatreche/;;https://github.com/xiaoooyc;https://github.com/getsum-zero;https://github.com/Kailai1104;;http://cfm.uestc.edu.cn/~yangyang/", "dblp": ";;80/4997;;;;73/1606-5;156/7882;", "google_scholar": "https://scholar.google.com.hk/citations?user=JNSktnMylNQC;89j9Uu4AAAAJ;https://scholar.google.co.uk/citations?user=DsJo3T8AAAAJ;;;;https://scholar.google.com.hk/citations?hl=zh-CN;https://scholar.google.com.sg/citations?user=FfMQsgMAAAAJ;", "orcid": ";0009-0000-6028-9748;0000-0003-1927-9366;;;;0009-0006-8050-4912;;", "linkedin": ";;ammar-belatreche-87b65816/;;;;;;", "or_profile": "~Dehao_Zhang2;~Shuai_Wang23;~Ammar_Belatreche1;~Wenjie_Wei1;~Yichen_Xiao1;~Haorui_Zheng1;~Zijian_Zhou5;~Malu_Zhang1;~Yang_Yang37", "aff": "University of Electronic Science and Technology of China;University of Electronic Science and Technology of China;Northumbria University;;University of Electronic Science and Technology of China;Peking University;University of Electronic Science and Technology of China;University of Electronic Science and Technology of China;University of Electronic Science and Technology of China", "aff_domain": "uestc.edu.cn;uestc.edu.cn;northumbria.ac.uk;;uestc.edu.cn;pku.edu.cn;uestc.edu.cn;uestc.edu.cn;uestc.edu.cn", "position": "MS student;PhD student;Associate Professor;;MS student;MS student;Undergrad student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nzhang2024spikebased,\ntitle={Spike-based Neuromorphic Model for Sound Source Localization},\nauthor={Dehao Zhang and Shuai Wang and Ammar Belatreche and Wenjie Wei and Yichen Xiao and Haorui Zheng and Zijian Zhou and Malu Zhang and Yang Yang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=CyCDqnrymT}\n}", "github": "", "reviewers": "PWNz;2syp;ajui;uJwn;zZvi;ym4t", "pdf_size": 6368074, "rating": "3;4;5;7;7;8", "confidence": "5;4;2;5;5;5", "soundness": "2;2;2;3;3;3", "novelty": "2;1;2;3;3;3", "presentation": "1;2;2;3;3;3", "wc_summary": "99;30;95;74;52;66", "wc_strengths": "81;30;67;147;66;96", "wc_weaknesses": "111;70;107;104;162;96", "wc_questions": "110;70;118;53;6;97", "wc_limitations": "2;3;4;120;6;1", "wc_review": "403;203;391;498;292;356", "wc_reply_reviewers": "20;0;0;18;10;115", "wc_reply_authors": "296;0;0;28;28;202", "reply_reviewers": "1;0;0;1;1;1", "reply_authors": "4;1;1;2;2;3", "rating_avg": [ 5.666666666666667, 1.7950549357115013 ], "confidence_avg": [ 4.333333333333333, 1.1055415967851332 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.3333333333333335, 0.7453559924999298 ], "presentation_avg": [ 2.3333333333333335, 0.7453559924999298 ], "wc_summary_avg": [ 69.33333333333333, 23.872345693058502 ], "wc_strengths_avg": [ 81.16666666666667, 35.596894371403934 ], "wc_weaknesses_avg": [ 108.33333333333333, 27.47524137999317 ], "wc_questions_avg": [ 75.66666666666667, 38.35216928530756 ], "wc_limitations_avg": [ 22.666666666666668, 43.55711448457327 ], "wc_review_avg": [ 357.1666666666667, 92.28112242249524 ], "wc_reply_reviewers_avg": [ 27.166666666666668, 40.043379255779875 ], "wc_reply_authors_avg": [ 92.33333333333333, 114.62644643459129 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.1666666666666665, 1.0671873729054748 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.3079408810257199, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6121446430708464904&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "email": "uestc.edu.cn;uestc.edu.cn;northumbria.ac.uk;;uestc.edu.cn;pku.edu.cn;uestc.edu.cn;uestc.edu.cn;uestc.edu.cn", "author_num": 9, "aff_unique_index": "0;0;1;0;2;0;0;0", "aff_unique_norm": "University of Electronic Science and Technology of China;Northumbria University;Peking University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uestc.edu.cn;https://www.northumbria.ac.uk;http://www.pku.edu.cn", "aff_unique_abbr": "UESTC;Northumbria;Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;0;0;0", "aff_country_unique": "China;United Kingdom" }, { "title": "FairMedFM: Fairness Benchmarking for Medical Imaging Foundation Models", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97809", "id": "CyrKKKN3fs", "proceeding": "", "pdf": "https://openreview.net/pdf?id=CyrKKKN3fs", "openreview": "https://openreview.net/forum?id=CyrKKKN3fs", "poster": "/media/PosterPDFs/NeurIPS%202024/97809.png?t=1733637978.0961688", "project": "", "author_site": "Ruinan Jin, Zikang Xu, Yuan Zhong, Qingsong Yao, DOU QI, S. Kevin Zhou, Xiaoxiao Li", "tldr": "", "abstract": "The advent of foundation models (FMs) in healthcare offers unprecedented opportunities to enhance medical diagnostics through automated classification and segmentation tasks. However, these models also raise significant concerns about their fairness, especially when applied to diverse and underrepresented populations in healthcare applications. Currently, there is a lack of comprehensive benchmarks, standardized pipelines, and easily adaptable libraries to evaluate and understand the fairness performance of FMs in medical imaging, leading to considerable challenges in formulating and implementing solutions that ensure equitable outcomes across diverse patient populations. To fill this gap, we introduce FairMedFM, a fairness benchmark for FM research in medical imaging. FairMedFM integrates with 17 popular medical imaging datasets, encompassing different modalities, dimensionalities, and sensitive attributes. It explores 20 widely used FMs, with various usages such as zero-shot learning, linear probing, parameter-efficient fine-tuning, and prompting in various downstream tasks -- classification and segmentation. Our exhaustive analysis evaluates the fairness performance over different evaluation metrics from multiple perspectives, revealing the existence of bias, varied utility-fairness trade-offs on different FMs, consistent disparities on the same datasets regardless FMs, and limited effectiveness of existing unfairness mitigation methods. Furthermore, FairMedFM provides an open-sourced codebase at https://github.com/FairMedFM/FairMedFM, supporting extendible functionalities and applications and inclusive for studies on FMs in medical imaging over the long term.", "keywords": "Foundation Models;Medical Image Analysis;Fairness", "primary_area": "", "supplementary_material": "", "author": "Ruinan Jin;Zikang Xu;Yuan Zhong;Qingsong Yao;Qi Dou;S Kevin Zhou;Xiaoxiao Li", "authorids": "~Ruinan_Jin4;~Zikang_Xu1;~Yuan_Zhong5;~Qingsong_Yao1;~Qi_Dou2;~S_Kevin_Zhou1;~Xiaoxiao_Li1", "gender": ";M;M;;F;M;Unspecified", "homepage": "https://nanboy-ronan.github.io/Personal-Web/;https://Xuzikang.github.io;;;https://www.cse.cuhk.edu.hk/~qdou;;https://xxlya.github.io/", "dblp": "280/2578;;;;165/7846;57/98;71/8042", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;fwfhpsAAAAAJ;;https://scholar.google.com.hk/citations?user=iHh7IJQAAAAJ;8eNm2GMAAAAJ;sdENOQ4AAAAJ", "orcid": ";;;;0000-0002-3416-9950;0000-0002-6881-4444;", "linkedin": ";;;;;s-kevin-zhou-231a094b/;", "or_profile": "~Ruinan_Jin4;~Zikang_Xu1;~Yuan_Zhong5;~Qingsong_Yao1;~Qi_Dou2;~S_Kevin_Zhou1;~Xiaoxiao_Li1", "aff": "University of British Columbia;University of Science and Technology of China;The Chinese University of Hong Kong;;The Chinese University of Hong Kong;University of Science and Technology of China;University of British Columbia", "aff_domain": "ubc.ca;ustc.edu.cn;cuhk.edu.hk;;cuhk.edu.hk;ustc.edu.cn;ece.ubc.ca", "position": "Undergrad student;PhD student;PhD student;;Assistant Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\njin2024fairmedfm,\ntitle={FairMed{FM}: Fairness Benchmarking for Medical Imaging Foundation Models},\nauthor={Ruinan Jin and Zikang Xu and Yuan Zhong and Qingsong Yao and Qi Dou and S Kevin Zhou and Xiaoxiao Li},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=CyrKKKN3fs}\n}", "github": "", "reviewers": "ppic;EbvA;XNR2;Lo9r", "pdf_size": 9183348, "rating": "4;7;8;10", "confidence": "5;3;2;4", "wc_summary_and_contributions": "54;119;27;80", "wc_strengths": "58;141;4;70", "wc_improvement": "77;10;6;11", "wc_limitations": "5;17;4;3", "wc_correctness": "1;10;1;1", "wc_clarity": "1;4;1;27", "wc_relation_to_prior_work": "2;11;1;1", "wc_documentation": "17;19;1;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "216;332;46;195", "wc_reply_reviewers": "23;0;16;0", "wc_reply_authors": "0;0;16;0", "reply_reviewers": "1;0;1;0", "reply_authors": "6;4;4;1", "rating_avg": [ 7.25, 2.165063509461097 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "wc_summary_and_contributions_avg": [ 70.0, 33.933759001914304 ], "wc_strengths_avg": [ 68.25, 48.80765821057183 ], "wc_improvement_avg": [ 26.0, 29.50423698386386 ], "wc_limitations_avg": [ 7.25, 5.673402858955108 ], "wc_correctness_avg": [ 3.25, 3.897114317029974 ], "wc_clarity_avg": [ 8.25, 10.894379284750462 ], "wc_relation_to_prior_work_avg": [ 3.75, 4.205650960315181 ], "wc_documentation_avg": [ 9.5, 8.52936105461599 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 197.25, 101.7235837945164 ], "wc_reply_reviewers_avg": [ 9.75, 10.059199769365355 ], "wc_reply_authors_avg": [ 4.0, 6.928203230275509 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 3.75, 1.7853571071357126 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.46475800154489, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18123470773123772487&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "ubc.ca;ustc.edu.cn;cuhk.edu.hk;;cuhk.edu.hk;ustc.edu.cn;ece.ubc.ca", "author_num": 7, "aff_unique_index": "0;1;2;2;1;0", "aff_unique_norm": "University of British Columbia;University of Science and Technology of China;Chinese University of Hong Kong", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ubc.ca;http://www.ustc.edu.cn;https://www.cuhk.edu.hk", "aff_unique_abbr": "UBC;USTC;CUHK", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;1;1;1;0", "aff_country_unique": "Canada;China" }, { "title": "PAC-Bayes-Chernoff bounds for unbounded losses", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96111", "id": "CyzZeND3LB", "proceeding": "", "pdf": "https://openreview.net/pdf?id=CyzZeND3LB", "openreview": "https://openreview.net/forum?id=CyzZeND3LB", "poster": "/media/PosterPDFs/NeurIPS%202024/96111.png?t=1732705536.1806543", "project": "", "author_site": "Ioar Casado Telletxea, Luis Antonio Ortega Andr\u00e9s, Aritz P\u00e9rez, Andres Masegosa", "tldr": "", "abstract": "We introduce a new PAC-Bayes oracle bound for unbounded losses that extends Cram\u00e9r-Chernoff bounds to the PAC-Bayesian setting. The proof technique relies on controlling the tails of certain random variables involving the Cram\u00e9r transform of the loss. Our approach naturally leverages properties of Cram\u00e9r-Chernoff bounds, such as exact optimization of the free parameter in many PAC-Bayes bounds. We highlight several applications of the main theorem. Firstly, we show that our bound recovers and generalizes previous results. Additionally, our approach allows working with richer assumptions that result in more informative and potentially tighter bounds. In this direction, we provide a general bound under a new *model-dependent* assumption from which we obtain bounds based on parameter norms and log-Sobolev inequalities. Notably, many of these bounds can be minimized to obtain distributions beyond the Gibbs posterior and provide novel theoretical coverage to existing regularization techniques.", "keywords": "Statistical learning theory;PAC-Bayes;Chernoff bounds;regularization", "primary_area": "learning_theory", "supplementary_material": "/attachment/462a156d5c4c88e1ce9e92ae5f7a9ccec621e311.zip", "author": "Ioar Casado;Luis A. Ortega;Aritz P\u00e9rez;Andres R Masegosa", "authorids": "~Ioar_Casado1;~Luis_A._Ortega1;~Aritz_P\u00e9rez1;~Andres_R_Masegosa1", "gender": "M;;M;M", "homepage": ";;https://andresmasegosa.github.io/;", "dblp": "331/3776;;70/2802;304/8839", "google_scholar": "https://scholar.google.es/citations?user=NBTdoV8AAAAJ;https://scholar.google.es/citations?user=F6BMpa0AAAAJ;J1zoY7AAAAAJ;1Ly8qeoAAAAJ", "orcid": ";0000-0002-8128-1099;;", "linkedin": ";;;ludvins", "or_profile": "~Ioar_Casado1;~Aritz_P\u00e9rez1;~Andres_R_Masegosa1;~Luis_Antonio_Ortega_Andr\u00e9s1", "aff": "Basque Center for Applied Mathematics;Basque Center for Applied Mathematics;Aalborg University;Universidad Aut\u00f3noma de Madrid", "aff_domain": "bcamath.org;bcamath.org;cs.aau.dk;uam.es", "position": "PhD student;Researcher;Associate Professor;PhD student", "bibtex": "@inproceedings{\ncasado2024pacbayeschernoff,\ntitle={{PAC}-Bayes-Chernoff bounds for unbounded losses},\nauthor={Ioar Casado and Luis A. Ortega and Aritz P{\\'e}rez and Andres R Masegosa},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=CyzZeND3LB}\n}", "github": "", "reviewers": "XnKo;Kbx9;JzAv;MaqK", "pdf_size": 561939, "rating": "6;7;7;7", "confidence": "2;4;3;3", "soundness": "3;3;3;4", "novelty": "3;3;3;4", "presentation": "3;3;3;4", "wc_summary": "123;110;103;50", "wc_strengths": "70;103;28;21", "wc_weaknesses": "59;171;80;93", "wc_questions": "56;341;22;2", "wc_limitations": "1;17;1;2", "wc_review": "309;742;234;168", "wc_reply_reviewers": "10;207;8;0", "wc_reply_authors": "0;219;0;0", "reply_reviewers": "1;2;1;0", "reply_authors": "1;2;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 96.5, 27.789386463180506 ], "wc_strengths_avg": [ 55.5, 33.21520736048475 ], "wc_weaknesses_avg": [ 100.75, 42.334235554690245 ], "wc_questions_avg": [ 105.25, 137.47249724944987 ], "wc_limitations_avg": [ 5.25, 6.796138609534093 ], "wc_review_avg": [ 363.25, 224.28929421619748 ], "wc_reply_reviewers_avg": [ 56.25, 87.11594285778006 ], "wc_reply_authors_avg": [ 54.75, 94.82978171439603 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17247956704723792331&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "bcamath.org;bcamath.org;cs.aau.dk;uam.es", "author_num": 4, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Basque Center for Applied Mathematics;Aalborg University;Universidad Aut\u00f3noma de Madrid", "aff_unique_dep": ";;", "aff_unique_url": "https://www.bcamath.org/;https://www.aau.dk;https://www.uam.es", "aff_unique_abbr": "BCAM;AAU;UAM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Spain;Denmark" }, { "title": "Don't Compress Gradients in Random Reshuffling: Compress Gradient Differences", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96110", "id": "CzPtBzgfae", "proceeding": "", "pdf": "https://openreview.net/pdf?id=CzPtBzgfae", "openreview": "https://openreview.net/forum?id=CzPtBzgfae", "poster": "", "project": "", "author_site": "Abdurakhmon Sadiev, Grigory Malinovsky, Eduard Gorbunov, Igor Sokolov, Ahmed Khaled, Konstantin Burlachenko, Peter Richtarik", "tldr": "", "abstract": "Gradient compression is a popular technique for improving communication complexity of stochastic first-order methods in distributed training of machine learning models. However, the existing works consider only with-replacement sampling of stochastic gradients. In contrast, it is well-known in practice and recently confirmed in theory that stochastic methods based on without-replacement sampling, e.g., Random Reshuffling (RR) method, perform better than ones that sample the gradients with-replacement. In this work, we close this gap in the literature and provide the first analysis of methods with gradient compression and without-replacement sampling. We first develop a distributed variant of random reshuffling with gradient compression (Q-RR), and show how to reduce the variance coming from gradient quantization through the use of control iterates. Next, to have a better fit to Federated Learning applications, we incorporate local computation and propose a variant of Q-RR called Q-NASTYA. Q-NASTYA uses local gradient steps and different local and global stepsizes. Next, we show how to reduce compression variance in this setting as well. Finally, we prove the convergence results for the proposed methods and outline several settings in which they improve upon existing algorithms.", "keywords": "Random reshuffling;communication compression;distributed optimization;Federated Learning", "primary_area": "optimization", "supplementary_material": "/attachment/da1a03ddbd171a9a679b5a523fceb355189f6458.zip", "author": "Abdurakhmon Sadiev;Grigory Malinovsky;Eduard Gorbunov;Igor Sokolov;Ahmed Khaled;Konstantin Pavlovich Burlachenko;Peter Richt\u00e1rik", "authorids": "~Abdurakhmon_Sadiev1;~Grigory_Malinovsky1;~Eduard_Gorbunov1;~Igor_Sokolov3;~Ahmed_Khaled1;~Konstantin_Pavlovich_Burlachenko1;~Peter_Richt\u00e1rik1", "gender": "M;M;M;M;M;M;M", "homepage": "https://sadiev.netlify.app;https://grigory-malinovsky.github.io;https://eduardgorbunov.github.io;https://cemse.kaust.edu.sa/people/person/igor-sokolov;https://www.akhaled.net;https://burlachenkok.github.io/;https://richtarik.org", "dblp": "264/9455;262/3277.html;215/5512.html;202/5678-1;154/3591-1;;62/8001", "google_scholar": "R-xZRIAAAAAJ;4w2W9KQAAAAJ;https://scholar.google.ru/citations?user=85j2RqQAAAAJ;https://scholar.google.ru/citations?user=OBbPecwAAAAJ;Bc3wOdsAAAAJ;3pA-LoQAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;0000-0002-2338-0187;;;0000-0003-4380-5848", "linkedin": ";;;igor-sokolov-7a6b47147/;;burlachenkok/;richtarik/", "or_profile": "~Abdurakhmon_Sadiev1;~Grigory_Malinovsky1;~Eduard_Gorbunov1;~Igor_Sokolov3;~Ahmed_Khaled1;~Konstantin_Pavlovich_Konstantin_Burlachenko1;~Peter_Richtarik1", "aff": "King Abdullah University of Science and Technology;Samsung;Mohamed bin Zayed University of Artificial Intelligence;King Abdullah University of Science and Technology;Princeton University;;King Abdullah University of Science and Technology (KAUST)", "aff_domain": "kaust.edu.sa;samsung.com;mbzuai.ac.ae;kaust.edu.sa;princeton.edu;;kaust.edu.sa", "position": "PhD student;Intern;Postdoc;PhD student;PhD student;;Full Professor", "bibtex": "@inproceedings{\nsadiev2024dont,\ntitle={Don't Compress Gradients in Random Reshuffling: Compress Gradient Differences},\nauthor={Abdurakhmon Sadiev and Grigory Malinovsky and Eduard Gorbunov and Igor Sokolov and Ahmed Khaled and Konstantin Pavlovich Burlachenko and Peter Richt{\\'a}rik},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=CzPtBzgfae}\n}", "github": "", "reviewers": "zJsy;X78L;BTLT;r1qn", "pdf_size": 2197459, "rating": "4;5;6;7", "confidence": "3;3;3;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;2;3", "wc_summary": "61;84;296;70", "wc_strengths": "64;88;67;87", "wc_weaknesses": "120;325;523;72", "wc_questions": "36;106;1;83", "wc_limitations": "16;10;29;73", "wc_review": "297;613;916;385", "wc_reply_reviewers": "216;96;446;22", "wc_reply_authors": "639;650;2783;0", "reply_reviewers": "1;1;3;1", "reply_authors": "4;3;9;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 127.75, 97.48429360671389 ], "wc_strengths_avg": [ 76.5, 11.05667219374799 ], "wc_weaknesses_avg": [ 260.0, 179.12146716683625 ], "wc_questions_avg": [ 56.5, 40.7829621288106 ], "wc_limitations_avg": [ 32.0, 24.647515087732476 ], "wc_review_avg": [ 552.75, 239.33697478659664 ], "wc_reply_reviewers_avg": [ 195.0, 160.60199251565965 ], "wc_reply_authors_avg": [ 1018.0, 1052.4511865165055 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 4.25, 2.947456530637899 ], "replies_avg": [ 31, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.7745966692414834, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:JfqJYhb5UcYJ:scholar.google.com/&scioq=Don%27t+Compress+Gradients+in+Random+Reshuffling:+Compress+Gradient+Differences&hl=en&as_sdt=0,23", "gs_version_total": 0, "email": "kaust.edu.sa;samsung.com;mbzuai.ac.ae;kaust.edu.sa;princeton.edu;;kaust.edu.sa", "author_num": 7, "aff_unique_index": "0;1;2;0;3;0", "aff_unique_norm": "King Abdullah University of Science and Technology;Samsung;Mohamed bin Zayed University of Artificial Intelligence;Princeton University", "aff_unique_dep": ";Samsung;;", "aff_unique_url": "https://www.kast.kau.edu.sa;https://www.samsung.com;https://mbzuai.ac.ae;https://www.princeton.edu", "aff_unique_abbr": "KAUST;Samsung;MBZUAI;Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;0;3;0", "aff_country_unique": "Saudi Arabia;South Korea;United Arab Emirates;United States" }, { "title": "MSA Generation with Seqs2Seqs Pretraining: Advancing Protein Structure Predictions", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96109", "id": "D0DLlMOufv", "proceeding": "", "pdf": "https://openreview.net/pdf?id=D0DLlMOufv", "openreview": "https://openreview.net/forum?id=D0DLlMOufv", "poster": "/media/PosterPDFs/NeurIPS%202024/96109.png?t=1733805763.7127721", "project": "", "author_site": "LE ZHANG, Jiayang Chen, Tao Shen, Yu Li, Siqi Sun", "tldr": "", "abstract": "Deep learning models like AlphaFold2 have revolutionized protein structure prediction, achieving unprecedented accuracy. However, the dependence on robust multiple sequence alignments (MSAs) continues to pose a challenge, especially for proteins that lack a wealth of homologous sequences. To overcome this limitation, we introduce MSA-Generator, a self-supervised generative protein language model. Trained on a sequence-to-sequence task using an automatically constructed dataset, MSA-Generator employs protein-specific attention mechanisms to harness large-scale protein databases, generating virtual MSAs that enrich existing ones and boost prediction accuracy. Our experiments on CASP14 and CASP15 benchmarks reveal significant improvements in LDDT scores, particularly for complex and challenging sequences, enhancing the performance of both AlphaFold2 and RoseTTAFold. The code is released at \\url{https://github.com/lezhang7/MSAGen}.", "keywords": "Protein Language Model", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Le Zhang;Jiayang Chen;Tao Shen;Yu Li;Siqi Sun", "authorids": "~Le_Zhang6;~Jiayang_Chen2;~Tao_Shen2;~Yu_Li1;~Siqi_Sun2", "gender": "M;;M;M;M", "homepage": "https://lezhang7.github.io/;;;https://sites.google.com/view/liyu1995;https://intersun.github.io/", "dblp": "03/4043-12;;95/4097;;120/1735", "google_scholar": "NqbBXAsAAAAJ;W592S5YAAAAJ;8cprenoAAAAJ;8YHZx-AAAAAJ;2dyg3WgAAAAJ", "orcid": ";;;0000-0002-3664-6722;", "linkedin": ";;;yuli1995/;", "or_profile": "~Le_Zhang6;~Jiayang_Chen2;~Tao_Shen2;~Yu_Li1;~Siqi_Sun2", "aff": "Mila - Quebec AI Institute & Universit\u00e9 de Montr\u00e9al;;Tencent AI Lab;Department of Computer Science and Engineering, The Chinese University of Hong Kong;Fudan University", "aff_domain": "mila.umontreal.ca;;tencent.com;cse.cuhk.edu.hk;fudan.edu.cn", "position": "PhD student;;Researcher;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nzhang2024msa,\ntitle={{MSA} Generation with Seqs2Seqs Pretraining: Advancing Protein Structure Predictions},\nauthor={Le Zhang and Jiayang Chen and Tao Shen and Yu Li and Siqi Sun},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=D0DLlMOufv}\n}", "github": "", "reviewers": "xVQk;Gq4n;6doy;3nEJ", "pdf_size": 5608828, "rating": "4;5;5;6", "confidence": "4;5;3;3", "soundness": "2;2;2;2", "novelty": "2;2;3;3", "presentation": "3;3;3;3", "wc_summary": "17;68;66;44", "wc_strengths": "27;68;87;82", "wc_weaknesses": "76;152;172;105", "wc_questions": "2;2;2;3", "wc_limitations": "1;1;1;24", "wc_review": "123;291;328;258", "wc_reply_reviewers": "253;57;32;0", "wc_reply_authors": "471;95;59;59", "reply_reviewers": "2;1;1;0", "reply_authors": "2;2;2;2", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 48.75, 20.60794749605113 ], "wc_strengths_avg": [ 66.0, 23.569047498785352 ], "wc_weaknesses_avg": [ 126.25, 37.85746293665227 ], "wc_questions_avg": [ 2.25, 0.4330127018922193 ], "wc_limitations_avg": [ 6.75, 9.959292143521045 ], "wc_review_avg": [ 250.0, 77.39186003708659 ], "wc_reply_reviewers_avg": [ 85.5, 98.79397754924133 ], "wc_reply_authors_avg": [ 171.0, 173.82750070112613 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.4264014327112209, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:vYin7XwL7J4J:scholar.google.com/&scioq=MSA+Generation+with+Seqs2Seqs+Pretraining:+Advancing+Protein+Structure+Predictions&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": "mila.umontreal.ca;;tencent.com;cse.cuhk.edu.hk;fudan.edu.cn", "author_num": 5, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Universit\u00e9 de Montr\u00e9al;Tencent;Chinese University of Hong Kong;Fudan University", "aff_unique_dep": "Quebec AI Institute;Tencent AI Lab;Department of Computer Science and Engineering;", "aff_unique_url": "https://www.umontreal.ca;https://ai.tencent.com;https://www.cuhk.edu.hk;https://www.fudan.edu.cn", "aff_unique_abbr": "UdeM;Tencent AI Lab;CUHK;Fudan", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Montreal;;Hong Kong SAR", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "Canada;China" }, { "title": "CosAE: Learnable Fourier Series for Image Restoration", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96108", "id": "D0s29c5GvL", "proceeding": "", "pdf": "https://openreview.net/pdf?id=D0s29c5GvL", "openreview": "https://openreview.net/forum?id=D0s29c5GvL", "poster": "/media/PosterPDFs/NeurIPS%202024/96108.png?t=1733795792.8740733", "project": "", "author_site": "Sifei Liu, Shalini De Mello, Jan Kautz", "tldr": "", "abstract": "In this paper, we introduce Cosine Autoencoder (CosAE), a novel, generic Autoencoder that seamlessly leverages the classic Fourier series with a feed-forward neural network. CosAE represents an input image as a series of 2D Cosine time series, each defined by a tuple of learnable frequency and Fourier coefficients. This method stands in contrast to a conventional Autoencoder that often sacrifices detail in their reduced-resolution bottleneck latent spaces. CosAE, however, encodes frequency coefficients, i.e., the amplitudes and phases, in its bottleneck. This encoding enables extreme spatial compression, e.g., $64\\times$ downsampled feature maps in the bottleneck, without losing detail upon decoding. We showcase the advantage of CosAE via extensive experiments on flexible-resolution super-resolution and blind image restoration, two highly challenging tasks that demand the restoration network to effectively generalize to complex and even unknown image degradations. Our method surpasses state-of-the-art approaches, highlighting its capability to learn a generalizable representation for image restoration. The project page is maintained at [https://sifeiliu.net/CosAE-page/](https://sifeiliu.net/CosAE-page/).", "keywords": "Fourier Series;Image Restoration;Autoencoder", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Sifei Liu;Shalini De Mello;Jan Kautz", "authorids": "~Sifei_Liu2;~Shalini_De_Mello1;~Jan_Kautz1", "gender": "F;Not Specified;", "homepage": "https://www.sifeiliu.net;https://research.nvidia.com/person/shalini-de-mello;http://jankautz.com", "dblp": "118/1301;206/7364;48/6214", "google_scholar": "j4pcHV4AAAAJ;xQM4BlMAAAAJ;P9FclNEAAAAJ", "orcid": ";;", "linkedin": ";shalini-de-mello-02b8251/;", "or_profile": "~Sifei_Liu2;~Shalini_De_Mello1;~Jan_Kautz1", "aff": "NVIDIA;NVIDIA;NVIDIA", "aff_domain": "nvidia.com;nvidia.com;nvidia.com", "position": "Researcher;Principal Researcher;VP Research", "bibtex": "@inproceedings{\nliu2024cosae,\ntitle={Cos{AE}: Learnable Fourier Series for Image Restoration},\nauthor={Sifei Liu and Shalini De Mello and Jan Kautz},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=D0s29c5GvL}\n}", "github": "", "reviewers": "Fx9w;Yyg3;gTQ7;Gqnq", "pdf_size": 50688038, "rating": "4;6;7;7", "confidence": "5;4;3;5", "soundness": "3;3;3;4", "novelty": "3;3;3;4", "presentation": "3;3;3;4", "wc_summary": "49;62;34;104", "wc_strengths": "25;13;57;103", "wc_weaknesses": "189;8;47;219", "wc_questions": "5;208;19;3", "wc_limitations": "13;6;7;20", "wc_review": "281;297;164;449", "wc_reply_reviewers": "43;74;18;48", "wc_reply_authors": "139;264;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 62.25, 26.06122598804592 ], "wc_strengths_avg": [ 49.5, 34.8245602987317 ], "wc_weaknesses_avg": [ 115.75, 89.94824901019474 ], "wc_questions_avg": [ 58.75, 86.38974186788614 ], "wc_limitations_avg": [ 11.5, 5.5901699437494745 ], "wc_review_avg": [ 297.75, 101.29998766041386 ], "wc_reply_reviewers_avg": [ 45.75, 19.879323429131084 ], "wc_reply_authors_avg": [ 100.75, 110.01676008681586 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.4923659639173309, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15058214160523261857&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "nvidia.com;nvidia.com;nvidia.com", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "NVIDIA", "aff_unique_dep": "NVIDIA Corporation", "aff_unique_url": "https://www.nvidia.com", "aff_unique_abbr": "NVIDIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Metacognitive Capabilities of LLMs: An Exploration in Mathematical Problem Solving", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96107", "id": "D19UyP4HYk", "proceeding": "", "pdf": "https://openreview.net/pdf?id=D19UyP4HYk", "openreview": "https://openreview.net/forum?id=D19UyP4HYk", "poster": "", "project": "", "author_site": "Aniket Didolkar, Anirudh Goyal, Nan Rosemary Ke, Siyuan Guo, Michal Valko, Timothy Lillicrap, Danilo Jimenez Rezende, Yoshua Bengio, Michael Mozer, Sanjeev Arora", "tldr": "", "abstract": "\\emph{Metacognitive knowledge} refers to humans' intuitive knowledge of their own thinking and reasoning processes. Today's best LLMs clearly possess some reasoning processes. The paper gives evidence that they also have metacognitive knowledge, including ability to name skills and procedures to apply given a task. We explore this primarily in context of math reasoning, developing a prompt-guided interaction procedure to get a powerful LLM to assign sensible skill labels to math questions, followed by having it perform semantic clustering to obtain coarser families of skill labels. These coarse skill labels look interpretable to humans.\n\nTo validate that these skill labels are meaningful and relevant to the LLM's reasoning processes we perform the following experiments. (a) We ask GPT-4 to assign skill labels to training questions in math datasets GSM8K and MATH. (b) When using an LLM to solve the test questions, we present it with the full list of skill labels and ask it to identify the skill needed. Then it is presented with randomly selected exemplar solved questions associated with that skill label. This improves accuracy on GSM8k and MATH for several strong LLMs, including code-assisted models. The methodology presented is domain-agnostic, even though this article applies it to math problems.", "keywords": "Metacognitive abilities of LLM;Mathematical Reasoning", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Aniket Rajiv Didolkar;Anirudh Goyal;Nan Rosemary Ke;Siyuan Guo;Michal Valko;Timothy P Lillicrap;Danilo Jimenez Rezende;Yoshua Bengio;Michael Curtis Mozer;Sanjeev Arora", "authorids": "~Aniket_Rajiv_Didolkar1;~Anirudh_Goyal1;~Nan_Rosemary_Ke1;~Siyuan_Guo1;~Michal_Valko1;~Timothy_P_Lillicrap1;~Danilo_Jimenez_Rezende2;~Yoshua_Bengio1;~Michael_Curtis_Mozer1;~Sanjeev_Arora1", "gender": "M;M;F;F;M;M;M;M;;M", "homepage": "https://github.com/dido1998/;https://anirudh9119.github.io/;https://nke001.github.io/;https://siyuanguo.com/;https://misovalko.github.io/research.html;http://contrastiveconvergence.net/~timothylillicrap/index.php;http://yoshuabengio.org;https://www.cs.colorado.edu/~mozer;http://www.cs.princeton.edu/~arora/;", "dblp": "245/8589;172/1039;120/5291;;03/5455;37/10849;56/953;m/MichaelCMozer;a/SArora;31/11107", "google_scholar": "https://scholar.google.ca/citations?user=ekvl5o0AAAAJ;krrh6OUAAAAJ;https://scholar.google.ca/citations?user=dxwPYhQAAAAJ;;jrazNCQAAAAJ;https://scholar.google.co.uk/citations?user=htPVdRMAAAAJ;kukA0LcAAAAJ;lmjR_qMAAAAJ;RUP4S68AAAAJ;https://scholar.google.co.uk/citations?user=UGlyhFMAAAAJ", "orcid": ";;;;;;;;;", "linkedin": "aniket-didolkar-7a9b8912a;;;;michalvalko/;;yoshuabengio/?originalSubdomain=ca;;;", "or_profile": "~Aniket_Rajiv_Didolkar1;~Anirudh_Goyal1;~Nan_Rosemary_Ke1;~Siyuan_Guo1;~Michal_Valko1;~Timothy_P_Lillicrap1;~Yoshua_Bengio1;~Michael_Curtis_Mozer1;~Sanjeev_Arora1;~Danilo_Jimenez_Rezende1", "aff": "Manipal Institute of Technology;Google DeepMind;Google DeepMind;Max Planck Institute for Intelligent Systems, Max-Planck Institute;Meta;Google DeepMind;University of Montreal;Google DeepMind;Princeton University;Google DeepMind", "aff_domain": "manipal.edu;google.com;deepmind.com;tuebingen.mpg.de;meta.com;deepmind.com;umontreal.ca;google.com;princeton.edu;google.com", "position": "Undergrad student;Researcher;Researcher;PhD student;Principal Researcher;Research Scientist;Full Professor;Research Scientist;Full Professor;Director", "bibtex": "@inproceedings{\ndidolkar2024metacognitive,\ntitle={Metacognitive Capabilities of {LLM}s: An Exploration in Mathematical Problem Solving},\nauthor={Aniket Rajiv Didolkar and Anirudh Goyal and Nan Rosemary Ke and Siyuan Guo and Michal Valko and Timothy P Lillicrap and Danilo Jimenez Rezende and Yoshua Bengio and Michael Curtis Mozer and Sanjeev Arora},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=D19UyP4HYk}\n}", "github": "", "reviewers": "ADDk;rRAL;xadG;C2BC", "pdf_size": 478373, "rating": "5;6;6;7", "confidence": "4;4;3;4", "soundness": "2;3;3;3", "novelty": "2;2;2;3", "presentation": "3;3;3;3", "wc_summary": "256;178;113;56", "wc_strengths": "433;60;99;60", "wc_weaknesses": "334;95;138;122", "wc_questions": "113;66;80;3", "wc_limitations": "329;39;6;1", "wc_review": "1465;438;436;242", "wc_reply_reviewers": "30;128;23;0", "wc_reply_authors": "166;336;106;106", "reply_reviewers": "1;1;1;0", "reply_authors": "2;3;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 150.75, 74.53648435497881 ], "wc_strengths_avg": [ 163.0, 156.6955647106835 ], "wc_weaknesses_avg": [ 172.25, 94.64241913645276 ], "wc_questions_avg": [ 65.5, 39.91553582253406 ], "wc_limitations_avg": [ 93.75, 136.60412695083556 ], "wc_review_avg": [ 645.25, 479.931961323686 ], "wc_reply_reviewers_avg": [ 45.25, 49.04780830985214 ], "wc_reply_authors_avg": [ 178.5, 94.17404100918681 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 43, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10089551812977618420&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "manipal.edu;google.com;deepmind.com;tuebingen.mpg.de;meta.com;deepmind.com;umontreal.ca;google.com;princeton.edu;google.com", "author_num": 10, "aff_unique_index": "0;1;1;2;3;1;4;1;5;1", "aff_unique_norm": "Manipal Institute of Technology;Google;Max Planck Institute for Intelligent Systems;Meta;University of Montreal;Princeton University", "aff_unique_dep": ";Google DeepMind;Intelligent Systems;Meta Platforms, Inc.;;", "aff_unique_url": "https://mit manipal.edu;https://deepmind.com;https://www.mpi-is.mpg.de;https://meta.com;https://wwwumontreal.ca;https://www.princeton.edu", "aff_unique_abbr": "MIT Manipal;DeepMind;MPI-IS;Meta;UM;Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;2;3;1;4;1;3;1", "aff_country_unique": "India;United Kingdom;Germany;United States;Canada" }, { "title": "ACFun: Abstract-Concrete Fusion Facial Stylization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96106", "id": "D2VK206HaJ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=D2VK206HaJ", "openreview": "https://openreview.net/forum?id=D2VK206HaJ", "poster": "/media/PosterPDFs/NeurIPS%202024/96106.png?t=1730880204.742955", "project": "", "author_site": "Jiapeng Ji, Kun Wei, Ziqi Zhang, Cheng Deng", "tldr": "", "abstract": "Owing to advancements in image synthesis techniques, stylization methodologies for large models have garnered remarkable outcomes. However, when it comes to processing facial images, the outcomes frequently fall short of expectations. Facial stylization is predominantly challenged by two significant hurdles. Firstly, obtaining a large dataset of high-quality stylized images is difficult. The scarcity and diversity of artistic styles make it impractical to compile comprehensive datasets for each style. Secondly, while many methods can transfer colors and strokes from style images, these elements alone cannot fully capture a specific style, which encompasses both concrete and abstract visual elements. Additionally, facial stylization often alters the visual features of the face, making it challenging to balance these changes with the need to retain facial information. To address these issues, we propose a novel method called ACFun, which uses only one style image and one facial image for facial stylization. ACFun comprises an Abstract Fusion Module (AFun) and a Concrete Fusion Module (CFun), which separately learn the abstract and concrete features of the style and face. We also design a Face and Style Imagery Alignment Loss to align the style image with the face image in the latent space. Finally, we generate styled facial images from noise directly to complete the facial stylization task. Experiments show that our method outperforms others in facial stylization, producing highly artistic and visually pleasing results.", "keywords": "Large-Model Stylized;One-Shot Learning;Facial Stylization", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Jiapeng Ji;Kun Wei;Ziqi Zhang;Cheng Deng", "authorids": "~Jiapeng_Ji1;~Kun_Wei1;~Ziqi_Zhang4;~Cheng_Deng2", "gender": "M;M;;M", "homepage": "https://github.com/neverenough7;https://scholar.google.com.hk/citations?user=fju0aWQAAAAJ&hl=zh-CN;https://github.com/ZZZ9116;http://see.xidian.edu.cn/faculty/chdeng/", "dblp": ";;;", "google_scholar": ";https://scholar.google.com.hk/citations?user=fju0aWQAAAAJ;https://scholar.google.com.hk/citations?user=JITPPf8AAAAJ;OROjmc8AAAAJ", "orcid": "0009-0008-1315-6094;;0000-0001-7000-3539;0000-0003-2620-3247", "linkedin": ";;;", "or_profile": "~Jiapeng_Ji1;~Kun_Wei1;~Ziqi_Zhang4;~Cheng_Deng1", "aff": "Xidian University;Xidian University;Xidian University;Xidian University", "aff_domain": "xidian.edu.cn;xidian.edu.cn;xidian.edu.cn;xidian.edu.cn", "position": "PhD student;Lecturer;PhD student;Full Professor", "bibtex": "@inproceedings{\nji2024acfun,\ntitle={{ACF}un: Abstract-Concrete Fusion Facial Stylization},\nauthor={Jiapeng Ji and Kun Wei and Ziqi Zhang and Cheng Deng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=D2VK206HaJ}\n}", "github": "", "reviewers": "5hdY;bo1j;uyVB;6WJE", "pdf_size": 10871817, "rating": "4;4;6;7", "confidence": "4;5;3;4", "soundness": "3;1;2;3", "novelty": "2;2;3;3", "presentation": "3;1;3;3", "wc_summary": "68;73;130;86", "wc_strengths": "43;47;100;89", "wc_weaknesses": "124;149;276;101", "wc_questions": "32;43;176;3", "wc_limitations": "9;3;47;3", "wc_review": "276;315;729;282", "wc_reply_reviewers": "0;145;211;11", "wc_reply_authors": "137;424;97;0", "reply_reviewers": "0;1;1;1", "reply_authors": "2;3;2;1", "rating_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 89.25, 24.427187721880717 ], "wc_strengths_avg": [ 69.75, 25.093574874855914 ], "wc_weaknesses_avg": [ 162.5, 67.69231861888024 ], "wc_questions_avg": [ 63.5, 66.57514551242078 ], "wc_limitations_avg": [ 15.5, 18.350749303502567 ], "wc_review_avg": [ 400.5, 190.23998002523024 ], "wc_reply_reviewers_avg": [ 91.75, 89.43538170098006 ], "wc_reply_authors_avg": [ 164.5, 157.88682655623933 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5443310539518174, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3193239231871758857&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "xidian.edu.cn;xidian.edu.cn;xidian.edu.cn;xidian.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Xidian University", "aff_unique_dep": "", "aff_unique_url": "http://www.xidian.edu.cn/", "aff_unique_abbr": "Xidian", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Shopping MMLU: A Massive Multi-Task Online Shopping Benchmark for Large Language Models", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97808", "id": "D3jyWDBZTk", "proceeding": "", "pdf": "https://openreview.net/pdf?id=D3jyWDBZTk", "openreview": "https://openreview.net/forum?id=D3jyWDBZTk", "poster": "/media/PosterPDFs/NeurIPS%202024/97808.png?t=1733143799.5116966", "project": "", "author_site": "Yilun Jin, Zheng Li, Chenwei Zhang, Tianyu Cao, Yifan Gao, Pratik Jayarao, Mao Li, Xin Liu, Ritesh Sarkhel, Xianfeng Tang, Haodong Wang, Zhengyang Wang, Wenju Xu, Jingfeng Yang, Qingyu Yin, Xian Li, Priyanka Nigam, Yi Xu, Kai Chen, Qiang Yang, Meng Jiang, Bing Yin", "tldr": "", "abstract": "Online shopping is a complex multi-task, few-shot learning problem with a wide and evolving range of entities, relations, and tasks. However, existing models and benchmarks are commonly tailored to specific tasks, falling short of capturing the full complexity of online shopping. Large Language Models (LLMs), with their multi-task and few-shot learning abilities, have the potential to profoundly transform online shopping by alleviating task-specific engineering efforts and by providing users with interactive conversations. Despite the potential, LLMs face unique challenges in online shopping, such as domain-specific concepts, implicit knowledge, and heterogeneous user behaviors. Motivated by the potential and challenges, we propose Shopping MMLU, a diverse multi-task online shopping benchmark derived from real-world Amazon data. Shopping MMLU consists of 57 tasks covering 4 major shopping skills: concept understanding, knowledge reasoning, user behavior alignment, and multi-linguality, and can thus comprehensively evaluate the abilities of LLMs as general shop assistants. With Shoppping MMLU, we benchmark over 20 existing LLMs and uncover valuable insights about practices and prospects of building versatile LLM-based shop assistants. Shopping MMLU can be publicly accessed at https://github.com/KL4805/ShoppingMMLU. In addition, with Shopping MMLU, we are hosting a competition in KDD Cup 2024 with over 500 participating teams. The winning solutions and the associated workshop can be accessed at our website https://amazon-kddcup24.github.io/.", "keywords": "Online shopping;large language models;domain-specific LLMs;multi-task learning", "primary_area": "", "supplementary_material": "", "author": "Yilun Jin;Zheng Li;Chenwei Zhang;Tianyu Cao;Yifan Gao;Pratik Sridatt Jayarao;Mao Li;Xin Liu;Ritesh Sarkhel;Xianfeng Tang;Haodong Wang;Zhengyang Wang;Wenju Xu;Jingfeng Yang;Qingyu Yin;Xian Li;Priyanka Nigam;Yi Xu;Kai Chen;Qiang Yang;Meng Jiang;Bing Yin", "authorids": "~Yilun_Jin1;~Zheng_Li9;~Chenwei_Zhang1;~Tianyu_Cao1;~Yifan_Gao1;~Pratik_Sridatt_Jayarao1;~Mao_Li1;~Xin_Liu9;~Ritesh_Sarkhel1;~Xianfeng_Tang1;~Haodong_Wang4;~Zhengyang_Wang1;~Wenju_Xu1;~Jingfeng_Yang2;~Qingyu_Yin2;~Xian_Li3;~Priyanka_Nigam1;~Yi_Xu10;~Kai_Chen7;~Qiang_Yang1;~Meng_Jiang3;~Bing_Yin1", "gender": "M;;M;M;Not Specified;M;M;M;M;M;;M;;M;M;F;;M;M;;M;M", "homepage": "https://kl4805.github.io;;https://www.cwzhang.com;;http://yifan-gao.github.io;;;https://www.cse.ust.hk/~xliucr/;https://sarkhelritesh.github.io;https://xta.ng/;;;;https://jingfengyang.github.io/;;https://scholar.google.com/citations?user=6-Xx0IoAAAAJ&hl=en;;;http://www.cse.ust.hk/~kaichen/;;http://www.meng-jiang.com/;", "dblp": "213/7371;;133/3207;65/7211;79/3190-1;;https://dblp.org/rec/conf/nips/LiMZ20;76/1820-39.html;167/6748;33/7694;;;;;179/2542;;217/9234;;c/KaiChen5.html;;69/339-1;", "google_scholar": "y3yJm98AAAAJ;;u_bIiBQAAAAJ;kX0CcGUAAAAJ;https://scholar.google.com.hk/citations?user=erdMFJwAAAAJ;;;https://scholar.google.com.hk/citations?user=WvC4upQAAAAJ;6FE__csAAAAJ;u1PEv-QAAAAJ;;A4fNBtEAAAAJ;;hysBvrwAAAAJ;P-mBKNYAAAAJ;6-Xx0IoAAAAJ;;y7BhrpQAAAAJ;;;LZIPfCkAAAAJ;qSOxydEAAAAJ", "orcid": "0000-0002-9502-7622;;;;;;;0000-0001-9610-9526;;;;0000-0002-5146-2884;;;;;;;0000-0003-2587-6028;;0000-0002-3009-519X;0000-0002-5890-0031", "linkedin": ";;;;yi-fan-gao/;https://www.linkedin.com/feed/;;xin-liu-179830143;;xianfengtang/;wang-haodong-95a05813b;;;jingfeng-yang-797864172/;;xianl/;;yeahgoyixu;;;meng-jiang-94b10916/;bingyin", "or_profile": "~Yilun_Jin1;~Zheng_Li9;~Chenwei_Zhang1;~Tianyu_Cao1;~Yifan_Gao1;~Pratik_Sridatt_Jayarao1;~Mao_Li1;~Xin_Liu9;~Ritesh_Sarkhel1;~Xianfeng_Tang1;~Haodong_Wang4;~Zhengyang_Wang1;~Wenju_Xu1;~Jingfeng_Yang2;~Qingyu_Yin2;~Xian_Li3;~Priyanka_Nigam1;~Yi_Xu10;~Kai_Chen7;~Qiang_Yang1;~Meng_Jiang3;~Bing_Yin1", "aff": "Hong Kong University of Science and Technology;;Amazon;;Amazon;;Amazon;Amazon;Amazon;Amazon;Amazon;Amazon;;Amazon;Amazon;Amazon;;Amazon;Hong Kong University of Science and Technology;;University of Notre Dame;Amazon", "aff_domain": "ust.hk;;amazon.com;;amazon.com;;amazon.com;amazon.com;amazon.com;amazon.com;amazon.com;amazon.com;;amazon.com;amazon.com;amazon.com;;amazon.com;ust.hk;;nd.edu;amazon.com", "position": "PhD student;;Researcher;;Researcher;;Researcher;Researcher;Researcher;Researcher;Researcher;Researcher;;Researcher;Researcher;Applied Scientist;;Senior Applied Science Manager;Full Professor;;Associate Professor;Senior Science Manager", "bibtex": "@inproceedings{\njin2024shopping,\ntitle={Shopping {MMLU}: A Massive Multi-Task Online Shopping Benchmark for Large Language Models},\nauthor={Yilun Jin and Zheng Li and Chenwei Zhang and Tianyu Cao and Yifan Gao and Pratik Sridatt Jayarao and Mao Li and Xin Liu and Ritesh Sarkhel and Xianfeng Tang and Haodong Wang and Zhengyang Wang and Wenju Xu and Jingfeng Yang and Qingyu Yin and Xian Li and Priyanka Nigam and Yi Xu and Kai Chen and Qiang Yang and Meng Jiang and Bing Yin},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=D3jyWDBZTk}\n}", "github": "", "reviewers": "a2Ap;qCzS;ePES;VJmm", "pdf_size": 852587, "rating": "6;6;7;8", "confidence": "3;4;4;3", "wc_summary_and_contributions": "109;146;76;69", "wc_strengths": "6;91;57;63", "wc_improvement": "67;190;44;194", "wc_limitations": "5;25;58;10", "wc_correctness": "1;3;47;68", "wc_clarity": "18;54;32;11", "wc_relation_to_prior_work": "21;164;99;45", "wc_documentation": "1;78;23;5", "wc_additional_feedback": "1;1;1;1", "wc_review": "229;752;437;466", "wc_reply_reviewers": "16;64;32;66", "wc_reply_authors": "0;38;44;41", "reply_reviewers": "1;1;1;1", "reply_authors": "1;3;2;2", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "wc_summary_and_contributions_avg": [ 100.0, 30.553232234904378 ], "wc_strengths_avg": [ 54.25, 30.670629272970583 ], "wc_improvement_avg": [ 123.75, 68.74727267317591 ], "wc_limitations_avg": [ 24.5, 20.694202086574876 ], "wc_correctness_avg": [ 29.75, 28.734778579275673 ], "wc_clarity_avg": [ 28.75, 16.422164899914993 ], "wc_relation_to_prior_work_avg": [ 82.25, 55.00624964492671 ], "wc_documentation_avg": [ 26.75, 30.727634142575962 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 471.0, 186.21627211390523 ], "wc_reply_reviewers_avg": [ 44.5, 21.277922830953212 ], "wc_reply_authors_avg": [ 30.75, 17.879807045938723 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 22, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14269913992537267902&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "ust.hk;;amazon.com;;amazon.com;;amazon.com;amazon.com;amazon.com;amazon.com;amazon.com;amazon.com;;amazon.com;amazon.com;amazon.com;;amazon.com;ust.hk;;nd.edu;amazon.com", "author_num": 22, "aff_unique_index": "0;1;1;1;1;1;1;1;1;1;1;1;1;0;2;1", "aff_unique_norm": "Hong Kong University of Science and Technology;Amazon;University of Notre Dame", "aff_unique_dep": ";Amazon.com, Inc.;", "aff_unique_url": "https://www.ust.hk;https://www.amazon.com;https://www.nd.edu", "aff_unique_abbr": "HKUST;Amazon;Notre Dame", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;1;1;1;1;1;1;1;1;1;1;1;1;0;1;1", "aff_country_unique": "China;United States" }, { "title": "Grokking of Implicit Reasoning in Transformers: A Mechanistic Journey to the Edge of Generalization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96105", "id": "D4QgSWxiOb", "proceeding": "", "pdf": "https://openreview.net/pdf?id=D4QgSWxiOb", "openreview": "https://openreview.net/forum?id=D4QgSWxiOb", "poster": "", "project": "", "author_site": "Boshi Wang, Xiang Yue, Yu Su, Huan Sun", "tldr": "", "abstract": "We study whether transformers can learn to *implicitly* reason over parametric knowledge, a skill that even the most capable language models struggle with. Focusing on two representative reasoning types, composition and comparison, we consistently find that transformers *can* learn implicit reasoning, but only through *grokking*, i.e., extended training far beyond overfitting. The levels of generalization also vary across reasoning types: when faced with out-of-distribution examples, transformers fail to systematically generalize for composition but succeed for comparison. We delve into the model's internals throughout training, conducting analytical experiments that reveal: 1) the mechanism behind grokking, such as the formation of the generalizing circuit and its relation to the relative efficiency of generalizing and memorizing circuits, and 2) the connection between systematicity and the configuration of the generalizing circuit. Our findings guide data and training setup to better induce implicit reasoning and suggest potential improvements to the transformer architecture, such as encouraging cross-layer knowledge sharing. Furthermore, we demonstrate that for a challenging reasoning task with a large search space, GPT-4-Turbo and Gemini-1.5-Pro based on non-parametric memory fail badly regardless of prompting styles or retrieval augmentation, while a fully grokked transformer can achieve near-perfect accuracy, showcasing the power of parametric memory for complex reasoning.", "keywords": "Reasoning;Grokking;Systematic Generalization;Mechanistic Interpretability;Transformer", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Boshi Wang;Xiang Yue;Yu Su;Huan Sun", "authorids": "~Boshi_Wang2;~Xiang_Yue1;~Yu_Su2;~Huan_Sun1", "gender": "M;;M;F", "homepage": "https://boshi-wang.github.io/;;http://ysu1989.github.io;https://u.osu.edu/ihudas/people/", "dblp": "216/7905;;38/1070-1;33/2952-1.html", "google_scholar": "https://scholar.google.com/citations?hl=en;;rIh5OqoAAAAJ;wIFkulcAAAAJ", "orcid": ";;;", "linkedin": ";;;huan-sun-81527924/?originalSubdomain=cn", "or_profile": "~Boshi_Wang2;~Xiang_Yue1;~Yu_Su2;~Huan_Sun1", "aff": "Ohio State University;;Microsoft;The Ohio State University, Columbus", "aff_domain": "osu.edu;;microsoft.com;osu.edu", "position": "PhD student;;Senior Researcher;Associate Professor", "bibtex": "@inproceedings{\nwang2024grokking,\ntitle={Grokking of Implicit Reasoning in Transformers: A Mechanistic Journey to the Edge of Generalization},\nauthor={Boshi Wang and Xiang Yue and Yu Su and Huan Sun},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=D4QgSWxiOb}\n}", "github": "", "reviewers": "f5kN;jEH8;Xiqz;smMo;EZN5", "pdf_size": 985006, "rating": "5;5;6;7;8", "confidence": "3;4;4;4;4", "soundness": "3;1;2;3;3", "novelty": "2;2;2;3;3", "presentation": "3;2;3;3;3", "wc_summary": "44;215;46;162;349", "wc_strengths": "53;160;115;222;146", "wc_weaknesses": "48;229;260;164;259", "wc_questions": "31;1857;103;313;269", "wc_limitations": "3;14;2;57;4", "wc_review": "179;2475;526;918;1027", "wc_reply_reviewers": "0;0;65;23;29", "wc_reply_authors": "0;1168;97;0;0", "reply_reviewers": "0;0;1;1;1", "reply_authors": "1;3;2;1;1", "rating_avg": [ 6.2, 1.16619037896906 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 2.4, 0.8 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 163.2, 114.14972623707864 ], "wc_strengths_avg": [ 139.2, 55.40902453571981 ], "wc_weaknesses_avg": [ 192.0, 80.00249996093872 ], "wc_questions_avg": [ 514.6, 679.1587737782676 ], "wc_limitations_avg": [ 16.0, 20.94755355644186 ], "wc_review_avg": [ 1025.0, 784.6871988251114 ], "wc_reply_reviewers_avg": [ 23.4, 23.904811231214524 ], "wc_reply_authors_avg": [ 253.0, 459.0398675496498 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5144957554275267, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17162479383262899728&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "osu.edu;;microsoft.com;osu.edu", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Ohio State University;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.osu.edu;https://www.microsoft.com", "aff_unique_abbr": "OSU;Microsoft", "aff_campus_unique_index": "1", "aff_campus_unique": ";Columbus", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "DeSparsify: Adversarial Attack Against Token Sparsification Mechanisms", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96104", "id": "D4yRz3s7UL", "proceeding": "", "pdf": "https://openreview.net/pdf?id=D4yRz3s7UL", "openreview": "https://openreview.net/forum?id=D4yRz3s7UL", "poster": "", "project": "", "author_site": "Oryan Yehezkel, Alon Zolfi, Amit Baras, Yuval Elovici, Asaf Shabtai", "tldr": "", "abstract": "Vision transformers have shown remarkable advancements in the computer vision domain, demonstrating state-of-the-art performance in diverse tasks (e.g., image classification, object detection). However, their high computational requirements grow quadratically with the number of tokens used. Token sparsification mechanisms have been proposed to address this issue. These mechanisms employ an input-dependent strategy, in which uninformative tokens are discarded from the computation pipeline, improving the model\u2019s efficiency. However, their dynamism and average-case assumption makes them vulnerable to a new threat vector \u2013 carefully crafted adversarial examples capable of fooling the sparsification mechanism, resulting in worst-case performance. In this paper, we present DeSparsify, an attack targeting the availability of vision transformers that use token sparsification mechanisms. The attack aims to exhaust the operating system\u2019s resources, while maintaining its stealthiness. Our evaluation demonstrates the attack\u2019s effectiveness on three token sparsification mechanisms and examines the attack\u2019s transferability between them and its effect on the GPU resources. To mitigate the impact of the attack, we propose various countermeasures.", "keywords": "Adversarial Attack;Vision Transformers;Token Sparsification", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Oryan Yehezkel;Alon Zolfi;Amit Baras;Yuval Elovici;Asaf Shabtai", "authorids": "~Oryan_Yehezkel1;~Alon_Zolfi1;~Amit_Baras2;~Yuval_Elovici1;~Asaf_Shabtai1", "gender": "M;M;M;M;", "homepage": ";;https://www.linkedin.com/in/amit-baras-230655221;https://cyber.bgu.ac.il/yuval/;", "dblp": ";282/0500;335/2198;38/4086;", "google_scholar": ";https://scholar.google.com/citations?hl=iw;;https://scholar.google.co.il/citations?user=ruZDm9QAAAAJ;", "orcid": ";0000-0003-0270-1743;;0000-0002-9641-128X;", "linkedin": "oryan-data-scientist/;;;yuval-elovici-0baa4a4/?originalSubdomain=il;", "or_profile": "~Oryan_Yehezkel1;~Alon_Zolfi1;~Amit_Baras2;~Yuval_Elovici1;~Asaf_Shabtai1", "aff": "Ben Gurion University of the Negev;Ben Gurion University of the Negev;Ben-Gurion University of the Negev;Ben Gurion University of the Negev, Technion;", "aff_domain": "post.bgu.ac.il;post.bgu.ac.il;bgu.ac.il;bgu.ac.il;", "position": "MS student;PhD student;MS student;Full Professor;", "bibtex": "@inproceedings{\nyehezkel2024desparsify,\ntitle={DeSparsify: Adversarial Attack Against Token Sparsification Mechanisms},\nauthor={Oryan Yehezkel and Alon Zolfi and Amit Baras and Yuval Elovici and Asaf Shabtai},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=D4yRz3s7UL}\n}", "github": "", "reviewers": "vvP9;TQdZ;KzdF;ZMXm;yY7m;Ux8f", "pdf_size": 8743520, "rating": "6;6;6;6;7;7", "confidence": "3;4;4;2;4;4", "soundness": "3;3;2;3;4;4", "novelty": "2;3;2;3;3;3", "presentation": "3;2;3;3;3;4", "wc_summary": "92;150;87;37;61;65", "wc_strengths": "70;191;92;54;89;91", "wc_weaknesses": "360;332;178;43;36;54", "wc_questions": "100;33;67;146;8;24", "wc_limitations": "74;19;22;4;6;1", "wc_review": "696;725;446;284;200;235", "wc_reply_reviewers": "70;38;21;0;0;69", "wc_reply_authors": "55;30;30;0;0;30", "reply_reviewers": "1;1;1;0;0;1", "reply_authors": "2;2;2;1;1;2", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.5, 0.7637626158259734 ], "soundness_avg": [ 3.1666666666666665, 0.6871842709362768 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.5773502691896257 ], "wc_summary_avg": [ 82.0, 35.364765892999586 ], "wc_strengths_avg": [ 97.83333333333333, 43.84600577272943 ], "wc_weaknesses_avg": [ 167.16666666666666, 135.33959345127187 ], "wc_questions_avg": [ 63.0, 47.81910357447813 ], "wc_limitations_avg": [ 21.0, 24.926558794453225 ], "wc_review_avg": [ 431.0, 212.23885915009376 ], "wc_reply_reviewers_avg": [ 33.0, 28.902133715927157 ], "wc_reply_authors_avg": [ 24.166666666666668, 19.238993967691993 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.46291004988627565, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:QS4zH2dZvvgJ:scholar.google.com/&scioq=DeSparsify:+Adversarial+Attack+Against+Token+Sparsification+Mechanisms&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": "post.bgu.ac.il;post.bgu.ac.il;bgu.ac.il;bgu.ac.il;", "author_num": 5, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Ben Gurion University of the Negev;Ben-Gurion University of the Negev", "aff_unique_dep": ";", "aff_unique_url": "https://www.bgu.ac.il;https://www.bgu.ac.il", "aff_unique_abbr": "BGU;BGU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Israel" }, { "title": "FOOGD: Federated Collaboration for Both Out-of-distribution Generalization and Detection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96103", "id": "D6MQrw9HFu", "proceeding": "", "pdf": "https://openreview.net/pdf?id=D6MQrw9HFu", "openreview": "https://openreview.net/forum?id=D6MQrw9HFu", "poster": "/media/PosterPDFs/NeurIPS%202024/96103.png?t=1729760457.9207485", "project": "", "author_site": "Xinting Liao, Weiming Liu, Pengyang Zhou, Fengyuan Yu, Jiahe Xu, Jun Wang, Wenjie Wang, Chaochao Chen, Xiaolin Zheng", "tldr": "", "abstract": "Federated learning (FL) is a promising machine learning paradigm that collaborates with client models to capture global knowledge. However, deploying FL models in real-world scenarios remains unreliable due to the coexistence of in-distribution data and unexpected out-of-distribution (OOD) data, such as covariate-shift and semantic-shift data. Current FL researches typically address either covariate-shift data through OOD generalization or semantic-shift data via OOD detection, overlooking the simultaneous occurrence of various OOD shifts. In this work, we propose FOOGD, a method that estimates the probability density of each client and obtains reliable global distribution as guidance for the subsequent FL process. Firstly, SM3D in FOOGD estimates score model for arbitrary distributions without prior constraints, and detects semantic-shift data powerfully. Then SAG in FOOGD provides invariant yet diverse knowledge for both local covariate-shift generalization and client performance generalization. In empirical validations, FOOGD significantly enjoys three main advantages: (1) reliably estimating non-normalized decentralized distributions, (2) detecting semantic shift data via score values, and (3) generalizing to covariate-shift data by regularizing feature extractor. The project is open in https://github.com/XeniaLLL/FOOGD-main.git.", "keywords": "Federated learning; Out-of-distribution;", "primary_area": "privacy", "supplementary_material": "/attachment/f2ca06343414eef65bec0d81cb29e910aaf32f98.zip", "author": "Xinting Liao;Weiming Liu;Pengyang Zhou;Fengyuan Yu;Jiahe Xu;Jun Wang;Wenjie Wang;Chaochao Chen;Xiaolin Zheng", "authorids": "~Xinting_Liao1;~Weiming_Liu2;~Pengyang_Zhou1;~Fengyuan_Yu1;~Jiahe_Xu1;~Jun_Wang38;~Wenjie_Wang1;~Chaochao_Chen3;~Xiaolin_Zheng1", "gender": "F;M;M;F;M;M;;M;M", "homepage": "https://xenialll.github.io/;https://github.com/PengyangZhou;https://anonymifish.github.io;https://github.com/Che-Xu;https://dblp.org/pid/w/JunWang20.html;https://wenjiewwj.github.io/;https://sites.google.com/site/ccchomepage/;https://person.zju.edu.cn/xlzheng;https://www.github.com/459548764", "dblp": "331/1544;300/4317;;72/7143-3.html;w/JunWang20;38/1956-7;26/1492-1;09/5763;00/105-5", "google_scholar": "FoMerO8AAAAJ;3LnDqE4AAAAJ;Lyh7nRQAAAAJ;;8alC56MAAAAJ;Ma5DtmoAAAAJ;qZTMyzwAAAAJ;MY23M60AAAAJ;", "orcid": "0000-0002-8257-2381;0000-0002-7219-0937;0009-0005-0491-1869;0009-0009-0680-1806;0000-0002-0481-5341;0000-0002-5199-1428;0000-0003-1419-964X;0000-0001-5483-0366;0000-0002-4115-7667", "linkedin": ";;;;;;ccchomepage/;;", "or_profile": "~Xinting_Liao1;~Pengyang_Zhou1;~Fengyuan_Yu1;~Jiahe_Xu1;~Jun_Wang38;~Wenjie_Wang1;~Chaochao_Chen3;~Xiaolin_Zheng1;~Liu_Weiming2", "aff": "Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University;OPPO Research Institute ;National University of Singapore;Zhejiang University;Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;oppo.com;nus.edu;zju.edu.cn;zju.edu.cn;zju.edu.cn", "position": "PhD student;PhD student;Undergrad student;Undergrad student;Principal Researcher;Postdoc;Distinguished Research Fellow;Full Professor;PhD student", "bibtex": "@inproceedings{\nliao2024foogd,\ntitle={{FOOGD}: Federated Collaboration for Both Out-of-distribution Generalization and Detection},\nauthor={Xinting Liao and Weiming Liu and Pengyang Zhou and Fengyuan Yu and Jiahe Xu and Jun Wang and Wenjie Wang and Chaochao Chen and Xiaolin Zheng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=D6MQrw9HFu}\n}", "github": "", "reviewers": "dEK5;cFix;MUVw;RpTd", "pdf_size": 4862199, "rating": "5;5;7;8", "confidence": "4;4;4;4", "soundness": "3;3;3;4", "novelty": "2;3;3;3", "presentation": "2;3;3;4", "wc_summary": "43;56;74;82", "wc_strengths": "79;58;46;91", "wc_weaknesses": "19;91;189;38", "wc_questions": "185;25;2;10", "wc_limitations": "18;4;2;1", "wc_review": "344;234;313;222", "wc_reply_reviewers": "9;85;11;26", "wc_reply_authors": "42;1859;31;24", "reply_reviewers": "1;2;1;1", "reply_authors": "2;5;2;2", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 63.75, 15.237699957670777 ], "wc_strengths_avg": [ 68.5, 17.55704986607944 ], "wc_weaknesses_avg": [ 84.25, 65.9824787348884 ], "wc_questions_avg": [ 55.5, 75.22134005719387 ], "wc_limitations_avg": [ 6.25, 6.869315832017043 ], "wc_review_avg": [ 278.25, 51.60608006814701 ], "wc_reply_reviewers_avg": [ 32.75, 30.873734791890662 ], "wc_reply_authors_avg": [ 489.0, 790.9958912661937 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2206020920350883963&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;oppo.com;nus.edu;zju.edu.cn;zju.edu.cn;zju.edu.cn", "author_num": 9, "aff_unique_index": "0;0;0;0;1;2;0;0;0", "aff_unique_norm": "Zhejiang University;OPPO Research Institute;National University of Singapore", "aff_unique_dep": ";;", "aff_unique_url": "https://www.zju.edu.cn;https://www.oppo.com/en;https://www.nus.edu.sg", "aff_unique_abbr": "ZJU;OPPO RI;NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;1;0;0;0", "aff_country_unique": "China;Singapore" }, { "title": "Learning Distinguishable Trajectory Representation with Contrastive Loss", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96102", "id": "D6nlm2AYHi", "proceeding": "", "pdf": "https://openreview.net/pdf?id=D6nlm2AYHi", "openreview": "https://openreview.net/forum?id=D6nlm2AYHi", "poster": "/media/PosterPDFs/NeurIPS%202024/96102.png?t=1730275075.1920345", "project": "", "author_site": "Tianxu Li, Kun Zhu, Juan Li, Yang Zhang", "tldr": "", "abstract": "Policy network parameter sharing is a commonly used technique in advanced deep multi-agent reinforcement learning (MARL) algorithms to improve learning efficiency by reducing the number of policy parameters and sharing experiences among agents. Nevertheless, agents that share the policy parameters tend to learn similar behaviors. To encourage multi-agent diversity, prior works typically maximize the mutual information between trajectories and agent identities using variational inference. However, this category of methods easily leads to inefficient exploration due to limited trajectory visitations. To resolve this limitation, inspired by the learning of pre-trained models, in this paper, we propose a novel Contrastive Trajectory Representation (CTR) method based on learning distinguishable trajectory representations to encourage multi-agent diversity. Specifically, CTR maps the trajectory of an agent into a latent trajectory representation space by an encoder and an autoregressive model. To achieve the distinguishability among trajectory representations of different agents, we introduce contrastive learning to maximize the mutual information between the trajectory representations and learnable identity representations of different agents. We implement CTR on top of QMIX and evaluate its performance in various cooperative multi-agent tasks. The empirical results demonstrate that our proposed CTR yields significant performance improvement over the state-of-the-art methods.", "keywords": "Multi-Agent Reinforcement Learning;multi-agent exploration;trajectory representation;contrastive learning;multi-agent diversity", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/b07a55437a38be887b533951b441093fba6862ea.zip", "author": "Tianxu Li;Kun Zhu;Juan Li;Yang Zhang", "authorids": "~Tianxu_Li1;~Kun_Zhu1;~Juan_Li4;~Yang_Zhang29", "gender": "M;M;F;Not Specified", "homepage": ";http://inet-nuaa.cn/kunzhu/;https://scholar.google.com/citations?user=a_COJH8AAAAJ&hl=zh-CN;", "dblp": "225/8647;95/1161-1;59/2144-11.html;https://dblp.uni-trier.de/pid/06/6785-25", "google_scholar": ";;a_COJH8AAAAJ;", "orcid": ";;0000-0003-0115-0783;", "linkedin": "%E5%A4%A9%E6%97%AD-%E6%9D%8E-316898275/;;;", "or_profile": "~Tianxu_Li1;~Kun_Zhu1;~Juan_Li4;~Yang_Zhang29", "aff": "Nanjing University of Aeronautics and Astronautics;Nanjing University of Aeronautics and Astronautics;Nanjing University of Aeronautics and Astronautics;Nanjing University of Aeronautics and Astronautics", "aff_domain": "nuaa.edu.cn;nuaa.edu.cn;nuaa.edu.cn;nuaa.edu.cn", "position": "PhD student;Full Professor;Instructor;Associate Professor", "bibtex": "@inproceedings{\nli2024learning,\ntitle={Learning Distinguishable Trajectory Representation with Contrastive Loss},\nauthor={Tianxu Li and Kun Zhu and Juan Li and Yang Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=D6nlm2AYHi}\n}", "github": "", "reviewers": "u9xg;mufx;GQbC;ncsG;Urvi", "pdf_size": 8634691, "rating": "4;4;6;7;7", "confidence": "3;4;3;2;3", "soundness": "3;2;3;3;3", "novelty": "2;2;2;3;3", "presentation": "2;3;3;3;4", "wc_summary": "46;40;35;189;70", "wc_strengths": "30;50;35;217;55", "wc_weaknesses": "184;89;174;202;45", "wc_questions": "55;36;30;94;1", "wc_limitations": "1;15;10;4;1", "wc_review": "316;230;284;706;172", "wc_reply_reviewers": "281;17;84;16;7", "wc_reply_authors": "552;0;123;0;0", "reply_reviewers": "2;1;1;1;1", "reply_authors": "3;1;2;1;1", "rating_avg": [ 5.6, 1.3564659966250536 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 76.0, 57.76157892578769 ], "wc_strengths_avg": [ 77.4, 70.40624972259211 ], "wc_weaknesses_avg": [ 138.8, 60.91764933087947 ], "wc_questions_avg": [ 43.2, 30.746707140765498 ], "wc_limitations_avg": [ 6.2, 5.491812087098393 ], "wc_review_avg": [ 341.6, 188.66859834111239 ], "wc_reply_reviewers_avg": [ 81.0, 103.73620390201292 ], "wc_reply_authors_avg": [ 135.0, 213.87285942821262 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.6993786061802353, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:aSz9Ve8XUC8J:scholar.google.com/&scioq=Learning+Distinguishable+Trajectory+Representation+with+Contrastive+Loss&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": "nuaa.edu.cn;nuaa.edu.cn;nuaa.edu.cn;nuaa.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Nanjing University of Aeronautics and Astronautics", "aff_unique_dep": "", "aff_unique_url": "http://www.nuaa.edu.cn", "aff_unique_abbr": "NUAA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Segment Any Change", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96101", "id": "D7X9Grmd7L", "proceeding": "", "pdf": "https://openreview.net/pdf?id=D7X9Grmd7L", "openreview": "https://openreview.net/forum?id=D7X9Grmd7L", "poster": "/media/PosterPDFs/NeurIPS%202024/96101.png?t=1731621923.3761117", "project": "", "author_site": "Zhuo Zheng, Yanfei Zhong, Liangpei Zhang, Stefano Ermon", "tldr": "", "abstract": "Visual foundation models have achieved remarkable results in zero-shot image classification and segmentation, but zero-shot change detection remains an open problem. \nIn this paper, we propose the segment any change models (AnyChange), a new type of change detection model that supports zero-shot prediction and generalization on unseen change types and data distributions.\nAnyChange is built on the segment anything model (SAM) via our training-free adaptation method, bitemporal latent matching.\nBy revealing and exploiting intra-image and inter-image semantic similarities in SAM's latent space, bitemporal latent matching endows SAM with zero-shot change detection capabilities in a training-free way. \nWe also propose a point query mechanism to enable AnyChange's zero-shot object-centric change detection capability.\nWe perform extensive experiments to confirm the effectiveness of AnyChange for zero-shot change detection.\nAnyChange sets a new record on the SECOND benchmark for unsupervised change detection, exceeding the previous SOTA by up to 4.4\\% F$_1$ score, and achieving comparable accuracy with negligible manual annotations (1 pixel per image) for supervised change detection. Code is available at https://github.com/Z-Zheng/pytorch-change-models.", "keywords": "change detection;remote sensing;zero-shot adaptation;visual foundation models;satellite imagery", "primary_area": "machine_vision", "supplementary_material": "", "author": "Zhuo Zheng;Yanfei Zhong;Liangpei Zhang;Stefano Ermon", "authorids": "~Zhuo_Zheng1;~Yanfei_Zhong1;~Liangpei_Zhang1;~Stefano_Ermon1", "gender": "M;;M;M", "homepage": "http://zhuozheng.top/;;http://www.lmars.whu.edu.cn/prof_web/zhangliangpei/rs/xueshu.htm;http://cs.stanford.edu/~ermon/", "dblp": "217/4050;;12/4846.html;47/8135", "google_scholar": "CREpn_AAAAAJ;;vzj2hcYAAAAJ;", "orcid": "0000-0003-1811-6725;;;", "linkedin": ";;;", "or_profile": "~Zhuo_Zheng1;~Yanfei_Zhong1;~Liangpei_Zhang1;~Stefano_Ermon1", "aff": "Stanford University;;Wuhan University;Stanford University", "aff_domain": "cs.stanford.edu;;whu.edu.cn;stanford.edu", "position": "Postdoc;;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nzheng2024segment,\ntitle={Segment Any Change},\nauthor={Zhuo Zheng and Yanfei Zhong and Liangpei Zhang and Stefano Ermon},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=D7X9Grmd7L}\n}", "github": "", "reviewers": "MDhn;n4V1;SmiM;eUYB", "pdf_size": 9743891, "rating": "3;4;5;6", "confidence": "4;3;2;3", "soundness": "2;3;3;4", "novelty": "2;3;2;4", "presentation": "3;3;3;2", "wc_summary": "69;86;149;104", "wc_strengths": "76;65;80;113", "wc_weaknesses": "143;399;125;158", "wc_questions": "42;47;25;26", "wc_limitations": "13;71;12;7", "wc_review": "343;668;391;408", "wc_reply_reviewers": "728;564;278;27", "wc_reply_authors": "994;1169;358;25", "reply_reviewers": "3;3;2;1", "reply_authors": "4;4;2;2", "rating_avg": [ 4.5, 1.118033988749895 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 102.0, 29.824486584013478 ], "wc_strengths_avg": [ 83.5, 17.89553016817328 ], "wc_weaknesses_avg": [ 206.25, 111.89587794016364 ], "wc_questions_avg": [ 35.0, 9.669539802906858 ], "wc_limitations_avg": [ 25.75, 26.223796445213647 ], "wc_review_avg": [ 452.5, 126.6816877058401 ], "wc_reply_reviewers_avg": [ 399.25, 268.556674651739 ], "wc_reply_authors_avg": [ 636.5, 464.45048175236076 ], "reply_reviewers_avg": [ 2.25, 0.82915619758885 ], "reply_authors_avg": [ 3.0, 1.0 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.6324555320336758, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=814220519764753861&as_sdt=4005&sciodt=0,6&hl=en", "gs_version_total": 4, "email": "cs.stanford.edu;;whu.edu.cn;stanford.edu", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Stanford University;Wuhan University", "aff_unique_dep": ";", "aff_unique_url": "https://www.stanford.edu;http://www.whu.edu.cn/", "aff_unique_abbr": "Stanford;WHU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;China" }, { "id": "D80tRH9CXF", "title": "Prediction Risk and Estimation Risk of the Ridgeless Least Squares Estimator under General Assumptions on Regression Errors", "track": "main", "status": "Reject", "tldr": "", "abstract": "In recent years, there has been a significant growth in research focusing on minimum $\\ell_2$ norm (ridgeless) interpolation least squares estimators. However, the majority of these analyses have been limited to an unrealistic regression error structure, assuming independent and identically distributed errors with zero mean and common variance. In this paper, we explore prediction risk as well as estimation risk under more general regression error assumptions, highlighting the benefits of overparameterization in a more realistic setting that allows for clustered or serial dependence. Notably, we establish that the estimation difficulties associated with the variance components of both risks can be summarized through the trace of the variance-covariance matrix of the regression errors. Our findings suggest that the benefits of overparameterization can extend to time series, panel and grouped data.", "keywords": "minimum norm solution;ridgeless estimator;benign overfitting;double descent;overparameterization", "primary_area": "learning_theory", "supplementary_material": "/attachment/22c695c03fc1415332fee7df756112c3fb740c00.zip", "author": "Sungyoon Lee;Sokbae Lee", "authorids": "~Sungyoon_Lee1;~Sokbae_Lee1", "gender": "M;M", "homepage": "https://sites.google.com/view/sungyoon-lee/home;https://sites.google.com/site/sokbae/", "dblp": ";270/3314", "google_scholar": "https://scholar.google.co.kr/citations?user=PAoFkGEAAAAJ;nlNC3hQAAAAJ", "orcid": ";0000-0003-4080-7733", "linkedin": ";", "or_profile": "~Sungyoon_Lee1;~Sokbae_Lee1", "aff": "Hanyang University;Columbia University", "aff_domain": "hanyang.ac.kr;columbia.edu", "position": "Assistant Professor;Professor", "bibtex": "@misc{\nanonymous2024prediction,\ntitle={Prediction Risk and Estimation Risk of the Ridgeless Least Squares Estimator under General Assumptions on Regression Errors},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=D80tRH9CXF}\n}", "github": "", "project": "", "reviewers": "AHim;JpQB;pxrE;fTDr", "site": "https://openreview.net/forum?id=D80tRH9CXF", "pdf_size": 332896, "rating": "5;5;6;7", "confidence": "3;2;5;2", "soundness": "2;3;3;4", "novelty": "2;2;3;3", "presentation": "2;3;2;3", "wc_summary": "129;50;93;74", "wc_strengths": "89;73;94;48", "wc_weaknesses": "9;98;106;28", "wc_questions": "3;1;185;36", "wc_limitations": "1;1;10;6", "wc_review": "231;223;488;192", "wc_reply_reviewers": "9;71;0;12", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.0, 1.224744871391589 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 86.5, 28.883386228072357 ], "wc_strengths_avg": [ 76.0, 17.930421077041107 ], "wc_weaknesses_avg": [ 60.25, 42.381452311123084 ], "wc_questions_avg": [ 56.25, 75.62200407288873 ], "wc_limitations_avg": [ 4.5, 3.774917217635375 ], "wc_review_avg": [ 283.5, 118.96322961318762 ], "wc_reply_reviewers_avg": [ 23.0, 28.062430400804562 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:DoXQ8vjLUiAJ:scholar.google.com/&scioq=Prediction+Risk+and+Estimation+Risk+of+the+Ridgeless+Least+Squares+Estimator+under+General+Assumptions+on+Regression+Errors&hl=en&as_sdt=0,5", "gs_version_total": 6, "aff_unique_index": "0;1", "aff_unique_norm": "Hanyang University;Columbia University", "aff_unique_dep": ";", "aff_unique_url": "https://www.hanyang.ac.kr;https://www.columbia.edu", "aff_unique_abbr": "HYU;Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "South Korea;United States" }, { "title": "Interpreting the Weight Space of Customized Diffusion Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96100", "id": "DAO2BFzMfy", "proceeding": "", "pdf": "https://openreview.net/pdf?id=DAO2BFzMfy", "openreview": "https://openreview.net/forum?id=DAO2BFzMfy", "poster": "/media/PosterPDFs/NeurIPS%202024/96100.png?t=1731732864.5996459", "project": "", "author_site": "Amil Dravid, Yossi Gandelsman, Kuan-Chieh Wang, Rameen Abdal, Gordon Wetzstein, Alexei Efros, Kfir Aberman", "tldr": "", "abstract": "We investigate the space of weights spanned by a large collection of customized diffusion models. We populate this space by creating a dataset of over 60,000 models, each of which is a base model fine-tuned to insert a different person's visual identity. We model the underlying manifold of these weights as a subspace, which we term $\\textit{weights2weights}$. We demonstrate three immediate applications of this space that result in new diffusion models -- sampling, editing, and inversion. First, sampling a set of weights from this space results in a new model encoding a novel identity. Next, we find linear directions in this space corresponding to semantic edits of the identity (e.g., adding a beard), resulting in a new model with the original identity edited. Finally, we show that inverting a single image into this space encodes a realistic identity into a model, even if the input image is out of distribution (e.g., a painting). We further find that these linear properties of the diffusion model weight space extend to other visual concepts. Our results indicate that the weight space of fine-tuned diffusion models can behave as an interpretable $\\textit{meta}$-latent space producing new models.", "keywords": "Weight Space;Model Editing;Diffusion Models;Latent Space;Personalization", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Amil Dravid;Yossi Gandelsman;Kuan-Chieh Wang;Rameen Abdal;Gordon Wetzstein;Alexei A Efros;Kfir Aberman", "authorids": "~Amil_Dravid1;~Yossi_Gandelsman2;~Kuan-Chieh_Wang1;~Rameen_Abdal1;~Gordon_Wetzstein3;~Alexei_A_Efros1;~Kfir_Aberman1", "gender": ";M;M;M;M;;M", "homepage": "https://wangkua1.github.io;https://rameenabdal.github.io/;http://web.stanford.edu/~gordonwz/;https://kfiraberman.github.io/;http://www.eecs.berkeley.edu/~efros/;https://avdravid.github.io/;https://yossi.gandelsman.com", "dblp": "13/7562;239/4322;13/4660;;40/6158;272/9123;232/1765", "google_scholar": "https://scholar.google.ca/citations?user=LgMuT6IAAAAJ;https://scholar.google.co.in/citations?user=kEQimk0AAAAJ;VOf45S0AAAAJ;https://scholar.google.co.il/citations?user=jdbZDakAAAAJ;https://scholar.google.com.tw/citations?user=d97bGd8AAAAJ;YZ8Y-sUAAAAJ;https://scholar.google.co.il/citations?user=71L4yYMAAAAJ", "orcid": ";;0000-0002-9243-6885;;0000-0001-5720-8070;0000-0001-6007-0690;0000-0003-1259-3387", "linkedin": ";;gordon-wetzstein-2406723/;;alexei-efros-890736a3/;amil-dravid/;yossi-gandelsman-26582981/", "or_profile": "~Kuan-Chieh_Wang1;~Rameen_Abdal1;~Gordon_Wetzstein3;~Kfir_Aberman1;~Alyosha_Efros1;~Amil_V_Dravid1;~Yossi_Gandelsman1", "aff": "Snap Inc.;Stanford University;Stanford University;Snap Inc.;University of California, Berkeley;University of California, Berkeley;University of California, Berkeley", "aff_domain": "snapchat.com;stanford.edu;stanford.edu;snap.com;berkeley.edu;berkeley.edu;berkeley.edu", "position": "Researcher;Postdoc;Associate Professor;Researcher;Professor;PhD student;PhD student", "bibtex": "@inproceedings{\ndravid2024interpreting,\ntitle={Interpreting the Weight Space of Customized Diffusion Models},\nauthor={Amil Dravid and Yossi Gandelsman and Kuan-Chieh Wang and Rameen Abdal and Gordon Wetzstein and Alexei A Efros and Kfir Aberman},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=DAO2BFzMfy}\n}", "github": "", "reviewers": "wMSF;aBHC;215A;rzPz", "pdf_size": 50609027, "rating": "5;5;6;7", "confidence": "3;4;5;5", "soundness": "4;3;3;3", "novelty": "3;3;2;3", "presentation": "4;3;3;3", "wc_summary": "81;114;59;49", "wc_strengths": "73;49;62;60", "wc_weaknesses": "109;167;296;60", "wc_questions": "6;40;88;7", "wc_limitations": "9;8;12;7", "wc_review": "278;378;517;183", "wc_reply_reviewers": "0;242;96;0", "wc_reply_authors": "0;745;49;0", "reply_reviewers": "0;2;1;0", "reply_authors": "1;2;2;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 75.75, 24.933661985356263 ], "wc_strengths_avg": [ 61.0, 8.514693182963201 ], "wc_weaknesses_avg": [ 158.0, 88.21847878987712 ], "wc_questions_avg": [ 35.25, 33.38693606786942 ], "wc_limitations_avg": [ 9.0, 1.8708286933869707 ], "wc_review_avg": [ 339.0, 123.75580794451629 ], "wc_reply_reviewers_avg": [ 84.5, 99.01893758266648 ], "wc_reply_authors_avg": [ 198.5, 316.1554206399125 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.8181818181818182, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16873888504390518406&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "snapchat.com;stanford.edu;stanford.edu;snap.com;berkeley.edu;berkeley.edu;berkeley.edu", "author_num": 7, "aff_unique_index": "0;1;1;0;2;2;2", "aff_unique_norm": "Snap Inc.;Stanford University;University of California, Berkeley", "aff_unique_dep": ";;", "aff_unique_url": "https://www.snapinc.com;https://www.stanford.edu;https://www.berkeley.edu", "aff_unique_abbr": "Snap;Stanford;UC Berkeley", "aff_campus_unique_index": "1;1;2;2;2", "aff_campus_unique": ";Stanford;Berkeley", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Variational Delayed Policy Optimization", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96099", "id": "DAtNDZHbqj", "proceeding": "", "pdf": "https://openreview.net/pdf?id=DAtNDZHbqj", "openreview": "https://openreview.net/forum?id=DAtNDZHbqj", "poster": "", "project": "", "author_site": "Qingyuan Wu, Simon Zhan, Yixuan Wang, Yuhui Wang, Chung-Wei Lin, Chen Lv, Qi Zhu, Chao Huang", "tldr": "", "abstract": "In environments with delayed observation, state augmentation by including actions within the delay window is adopted to retrieve Markovian property to enable reinforcement learning (RL). Whereas, state-of-the-art (SOTA) RL techniques with Temporal-Difference (TD) learning frameworks commonly suffer from learning inefficiency, due to the significant expansion of the augmented state space with the delay. To improve the learning efficiency without sacrificing performance, this work novelly introduces Variational Delayed Policy Optimization (VDPO), reforming delayed RL as a variational inference problem. This problem is further modelled as a two-step iterative optimization problem, where the first step is TD learning in the delay-free environment with a small state space, and the second step is behaviour cloning which can be addressed much more efficiently than TD learning. We not only provide a theoretical analysis of VDPO in terms of sample complexity and performance, but also empirically demonstrate that VDPO can achieve consistent performance with SOTA methods, with a significant enhancement of sample efficiency (approximately 50\\% less amount of samples) in the MuJoCo benchmark.", "keywords": "reinforcement learning;time-delay system", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/0e7b769e29a52b9c7a715303ad7ce04d4725464a.zip", "author": "Qingyuan Wu;Simon Sinong Zhan;Yixuan Wang;Yuhui Wang;Chung-Wei Lin;Chen Lv;Qi Zhu;Chao Huang", "authorids": "~Qingyuan_Wu1;~Simon_Sinong_Zhan1;~Yixuan_Wang1;~Yuhui_Wang1;~Chung-Wei_Lin1;~Chen_Lv1;~Qi_Zhu2;~Chao_Huang5", "gender": "M;M;M;M;M;M;;M", "homepage": ";https://simonzhan.github.io/;https://wangyixu14.github.io/;https://wangyuhuix.github.io/;https://www.csie.ntu.edu.tw/~cwlin/;https://lvchen.wixsite.com/automan;http://zhulab.ece.northwestern.edu/;https://chaohuang2018.github.io/main/", "dblp": ";330/3557;44/4317;;;;66/5923-2.html;18/4087-15", "google_scholar": "CYfMzb8AAAAJ;uO4dG0wAAAAJ;7qP5C-kAAAAJ;https://scholar.google.com.tw/citations?hl=zh-CN;fh0S7TAAAAAJ;UKVs2CEAAAAJ;TN09YMcAAAAJ;GbY72eIAAAAJ", "orcid": ";;;;;0000-0001-6897-4512;;0000-0002-9300-1787", "linkedin": ";;;;;chen-lv-7964b590/;;", "or_profile": "~Qingyuan_Wu1;~Simon_Sinong_Zhan1;~Yixuan_Wang1;~Yuhui_Wang1;~Chung-Wei_Lin1;~Chen_Lv1;~Qi_Zhu2;~Chao_Huang5", "aff": "University of Liverpool;Northwestern University;Northwestern University, Northwestern University;King Abdullah University of Science and Technology;National Taiwan University;Nanyang Technological University;Northwestern University;University of Liverpool", "aff_domain": "liverpool.ac.uk;u.northwestern.edu;u.northwestern.edu;kaust.edu.sa;ntu.edu.tw;ntu.edu.sg;northwestern.edu;liverpool.ac.uk", "position": "PhD student;PhD student;PhD student;Postdoc;Associate Professor;Assistant Professor;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nwu2024variational,\ntitle={Variational Delayed Policy Optimization},\nauthor={Qingyuan Wu and Simon Sinong Zhan and Yixuan Wang and Yuhui Wang and Chung-Wei Lin and Chen Lv and Qi Zhu and Chao Huang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=DAtNDZHbqj}\n}", "github": "", "reviewers": "FFJG;io6Z;wuNr", "pdf_size": 1043367, "rating": "6;8;8", "confidence": "4;4;3", "soundness": "3;4;3", "novelty": "3;3;3", "presentation": "3;4;4", "wc_summary": "48;122;61", "wc_strengths": "44;122;103", "wc_weaknesses": "117;20;15", "wc_questions": "59;7;1", "wc_limitations": "10;11;3", "wc_review": "278;282;183", "wc_reply_reviewers": "158;27;21", "wc_reply_authors": "721;13;15", "reply_reviewers": "3;1;1", "reply_authors": "4;2;2", "rating_avg": [ 7.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 77.0, 32.25936556516056 ], "wc_strengths_avg": [ 89.66666666666667, 33.209770985191824 ], "wc_weaknesses_avg": [ 50.666666666666664, 46.94914506380518 ], "wc_questions_avg": [ 22.333333333333332, 26.04269997949948 ], "wc_limitations_avg": [ 8.0, 3.559026084010437 ], "wc_review_avg": [ 247.66666666666666, 45.75538826800135 ], "wc_reply_reviewers_avg": [ 68.66666666666667, 63.21568019267231 ], "wc_reply_authors_avg": [ 249.66666666666666, 333.2839963487929 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.9428090415820634 ], "reply_authors_avg": [ 2.6666666666666665, 0.9428090415820634 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.49999999999999983, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6370168694294425292&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "liverpool.ac.uk;u.northwestern.edu;u.northwestern.edu;kaust.edu.sa;ntu.edu.tw;ntu.edu.sg;northwestern.edu;liverpool.ac.uk", "author_num": 8, "aff_unique_index": "0;1;1;2;3;4;1;0", "aff_unique_norm": "University of Liverpool;Northwestern University;King Abdullah University of Science and Technology;National Taiwan University;Nanyang Technological University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.liverpool.ac.uk;https://www.northwestern.edu;https://www.kast.kau.edu.sa;https://www.ntu.edu.tw;https://www.ntu.edu.sg", "aff_unique_abbr": "Liv Uni;NU;KAUST;NTU;NTU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Taiwan", "aff_country_unique_index": "0;1;1;2;3;4;1;0", "aff_country_unique": "United Kingdom;United States;Saudi Arabia;China;Singapore" }, { "title": "Intervention and Conditioning in Causal Bayesian Networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96098", "id": "DC28Fpk76s", "proceeding": "", "pdf": "https://openreview.net/pdf?id=DC28Fpk76s", "openreview": "https://openreview.net/forum?id=DC28Fpk76s", "poster": "", "project": "", "author_site": "Sainyam Galhotra, Joseph Halpern", "tldr": "", "abstract": "Causal models are crucial for understanding complex systems and\nidentifying causal relationships among variables. Even though causal\nmodels are extremely popular, conditional probability calculation of\nformulas involving interventions pose significant challenges.\nIn case of Causal Bayesian Networks (CBNs), Pearl assumes autonomy \nof mechanisms that determine interventions to calculate a range of\nprobabilities. We show that by making simple yet\noften realistic independence assumptions, it is possible \nto uniquely estimate the probability of an interventional formula (including\nthe well-studied notions of probability of sufficiency and necessity). \nWe discuss when these assumptions are appropriate.\nImportantly, in many cases of interest, when the assumptions are appropriate,\nthese probability estimates can be evaluated using\nobservational data, which carries immense significance in scenarios\nwhere conducting experiments is impractical or unfeasible.", "keywords": "causal bayesian networks;interventions;counterfactuals", "primary_area": "causal_inference", "supplementary_material": "", "author": "sainyam galhotra;Joseph Halpern", "authorids": "~sainyam_galhotra1;~Joseph_Halpern1", "gender": ";M", "homepage": "https://sainyamgalhotra.com;https://www.cs.cornell.edu/home/halpern", "dblp": "136/7969;", "google_scholar": "0_9V8PgAAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~sainyam_galhotra1;~Joseph_Halpern1", "aff": "Department of Computer Science, University of Massachusetts, Amherst;Cornell University", "aff_domain": "cs.umass.edu;cornell.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\ngalhotra2024intervention,\ntitle={Intervention and Conditioning in Causal Bayesian Networks},\nauthor={sainyam galhotra and Joseph Halpern},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=DC28Fpk76s}\n}", "github": "", "reviewers": "nbxs;3ny6;Yecn;6Tw9", "pdf_size": 321268, "rating": "4;5;6;8", "confidence": "3;3;3;3", "soundness": "3;2;3;4", "novelty": "2;2;3;4", "presentation": "1;2;2;3", "wc_summary": "39;54;28;69", "wc_strengths": "22;31;15;92", "wc_weaknesses": "315;1525;30;30", "wc_questions": "64;138;348;15", "wc_limitations": "16;85;1;1", "wc_review": "456;1833;422;207", "wc_reply_reviewers": "11;521;0;0", "wc_reply_authors": "0;996;0;0", "reply_reviewers": "1;2;0;0", "reply_authors": "1;4;1;1", "rating_avg": [ 5.75, 1.479019945774904 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 47.5, 15.46770829825802 ], "wc_strengths_avg": [ 40.0, 30.553232234904378 ], "wc_weaknesses_avg": [ 475.0, 617.2823503065675 ], "wc_questions_avg": [ 141.25, 127.14435693336924 ], "wc_limitations_avg": [ 25.75, 34.751798514609284 ], "wc_review_avg": [ 729.5, 644.2198770606198 ], "wc_reply_reviewers_avg": [ 133.0, 224.05691241289566 ], "wc_reply_authors_avg": [ 249.0, 431.28065108465046 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ViDxLj5d8usJ:scholar.google.com/&scioq=Intervention+and+Conditioning+in+Causal+Bayesian+Networks&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "cs.umass.edu;cornell.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "University of Massachusetts Amherst;Cornell University", "aff_unique_dep": "Department of Computer Science;", "aff_unique_url": "https://www.umass.edu;https://www.cornell.edu", "aff_unique_abbr": "UMass Amherst;Cornell", "aff_campus_unique_index": "0", "aff_campus_unique": "Amherst;", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "TorchSpatial: A Location Encoding Framework and Benchmark for Spatial Representation Learning", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97807", "id": "DERtzUdhkk", "proceeding": "", "pdf": "https://openreview.net/pdf?id=DERtzUdhkk", "openreview": "https://openreview.net/forum?id=DERtzUdhkk", "poster": "/media/PosterPDFs/NeurIPS%202024/97807.png?t=1733257713.3837748", "project": "", "author_site": "Nemin Wu, Qian Cao, Zhangyu Wang, Zeping Liu, Yanlin Qi, Jielu Zhang, Joshua Ni, X. Yao, Hongxu Ma, Lan Mu, Stefano Ermon, Tanuja Ganu, Akshay Nambi, Ni Lao, Gengchen Mai", "tldr": "", "abstract": "Spatial representation learning (SRL) aims at learning general-purpose neural network representations from various types of spatial data (e.g., points, polylines, polygons, networks, images, etc.) in their native formats. Learning good spatial representations is a fundamental problem for various downstream applications such as species distribution modeling, weather forecasting, trajectory generation, geographic question answering, etc. Even though SRL has become the foundation of almost all geospatial artificial intelligence (GeoAI) research, we have not yet seen significant efforts to develop an extensive deep learning framework and benchmark to support SRL model development and evaluation. To fill this gap, we propose TorchSpatial, a learning framework and benchmark for location (point) encoding,\nwhich is one of the most fundamental data types of spatial representation learning. TorchSpatial contains three key components: 1) a unified location encoding framework that consolidates 15 commonly recognized location encoders, ensuring scalability and reproducibility of the implementations; 2) the LocBench benchmark tasks encompassing 7 geo-aware image classification and 10 geo-aware image\nregression datasets; 3) a comprehensive suite of evaluation metrics to quantify geo-aware models\u2019 overall performance as well as their geographic bias, with a novel Geo-Bias Score metric. Finally, we provide a detailed analysis and insights into the model performance and geographic bias of different location encoders. We believe TorchSpatial will foster future advancement of spatial representation\nlearning and spatial fairness in GeoAI research. The TorchSpatial model framework and LocBench benchmark are available at https://github.com/seai-lab/TorchSpatial, and the Geo-Bias Score evaluation framework is available at https://github.com/seai-lab/PyGBS.", "keywords": "Spatial Representation Learning;Spatially Explicit Artificial Intelligence;Geo-Aware Image Classification;Fine-grained Species Recognition;Remote Sensing Image Classification;Image Regression", "primary_area": "", "supplementary_material": "/attachment/ce7dd0b584bedbecd6482f05ef22ba59825a3920.pdf", "author": "Nemin Wu;Qian Cao;Zhangyu Wang;Zeping Liu;Yanlin Qi;Jielu Zhang;Joshua Ni;X. Angela Yao;Hongxu Ma;Lan Mu;Stefano Ermon;Tanuja Ganu;Akshay Nambi;Ni Lao;Gengchen Mai", "authorids": "~Nemin_Wu1;~Qian_Cao4;~Zhangyu_Wang1;~Zeping_Liu1;~Yanlin_Qi2;~Jielu_Zhang1;~Joshua_Ni3;~X._Angela_Yao1;~Hongxu_Ma1;~Lan_Mu1;~Stefano_Ermon1;~Tanuja_Ganu1;~Akshay_Nambi1;~Ni_Lao1;~Gengchen_Mai1", "gender": "F;F;;M;Not Specified;F;M;F;M;F;M;;;M;M", "homepage": "https://geography.uga.edu/directory/people/nemin-wu;;;https://zpl99.github.io/;;https://geography.uga.edu/directory/people/jielu-zhang;;https://geography.uga.edu/directory/people/angela-yao;https://www.linkedin.com/in/hongxu-ma-7458a735;https://geography.uga.edu/directory/people/lan-mu;http://cs.stanford.edu/~ermon/;https://www.microsoft.com/en-us/research/people/taganu/;;http://www.cs.cmu.edu/~nlao;https://gengchenmai.github.io/", "dblp": ";;;304/5181;;;;;;;47/8135;31/11538;;82/283;151/5583", "google_scholar": ";;8vNk5Z8AAAAJ;https://scholar.google.com.hk/citations?user=eSeCaz4AAAAJ;xCLtW8IAAAAJ;w3kpKuwAAAAJ;;-eRzjogAAAAJ;;;;https://scholar.google.co.in/citations?user=uU9COWkAAAAJ;;iUgWR3MAAAAJ;X2Wfl1UAAAAJ", "orcid": ";;;0000-0003-2898-0023;;;;0000-0003-2719-2017;;;;;;0000-0002-4034-7784;0000-0002-7818-7309", "linkedin": ";qian-cao-44794929b/;zhangyu-wang-26aab0170/;zeping-liu-55b336322/;;;joshua-ni-45129130a/;x-angela-yao-96b89046/;;;;;;ni-lao;gengchen-mai-144439121/", "or_profile": "~Nemin_Wu1;~Qian_Cao4;~Zhangyu_Wang1;~Zeping_Liu1;~Yanlin_Qi2;~Jielu_Zhang1;~Joshua_Ni3;~X._Angela_Yao1;~Hongxu_Ma1;~Lan_Mu1;~Stefano_Ermon1;~Tanuja_Ganu1;~Akshay_Nambi1;~Ni_Lao1;~Gengchen_Mai1", "aff": "University of Georgia;University of Georgia;University of California, Santa Barbara;University of Texas at Austin;University of California, Davis;University of Georgia;Basis Independent Fremont Upper School;University of Georgia;Google;University of Georgia;Stanford University;Microsoft;;Google;University of Georgia", "aff_domain": "uga.edu;uga.edu;ucsb.edu;utexas.edu;ucdavis.edu;uga.edu;basisindependent.com;uga.edu;google.com;uga.edu;stanford.edu;microsoft.com;;google.com;uga.edu", "position": "PhD student;PhD student;PhD student;PhD student;PhD student;PhD student;High School Student;Full Professor;Researcher;Full Professor;Associate Professor;Researcher;;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nwu2024torchspatial,\ntitle={TorchSpatial: A Location Encoding Framework and Benchmark for Spatial Representation Learning},\nauthor={Nemin Wu and Qian Cao and Zhangyu Wang and Zeping Liu and Yanlin Qi and Jielu Zhang and Joshua Ni and X. Angela Yao and Hongxu Ma and Lan Mu and Stefano Ermon and Tanuja Ganu and Akshay Nambi and Ni Lao and Gengchen Mai},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=DERtzUdhkk}\n}", "github": "", "reviewers": "ePbQ;xzSd;rFTf", "pdf_size": 4562225, "rating": "6;7;7", "confidence": "4;4;5", "wc_summary_and_contributions": "89;124;49", "wc_strengths": "28;70;11", "wc_improvement": "83;40;31", "wc_limitations": "78;7;20", "wc_correctness": "1;14;22", "wc_clarity": "1;5;3", "wc_relation_to_prior_work": "1;8;13", "wc_documentation": "1;6;18", "wc_additional_feedback": "1;1;1", "wc_review": "283;275;168", "wc_reply_reviewers": "0;26;20", "wc_reply_authors": "165;0;0", "reply_reviewers": "0;1;1", "reply_authors": "2;1;1", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 87.33333333333333, 30.64129385141706 ], "wc_strengths_avg": [ 36.333333333333336, 24.796953217863056 ], "wc_improvement_avg": [ 51.333333333333336, 22.691163233490013 ], "wc_limitations_avg": [ 35.0, 30.865298745786774 ], "wc_correctness_avg": [ 12.333333333333334, 8.65383665716478 ], "wc_clarity_avg": [ 3.0, 1.632993161855452 ], "wc_relation_to_prior_work_avg": [ 7.333333333333333, 4.9216076867444665 ], "wc_documentation_avg": [ 8.333333333333334, 7.133644853010899 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 242.0, 52.42772803266099 ], "wc_reply_reviewers_avg": [ 15.333333333333334, 11.115554667022044 ], "wc_reply_authors_avg": [ 55.0, 77.78174593052023 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 15, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12275012096099234109&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 6, "email": "uga.edu;uga.edu;ucsb.edu;utexas.edu;ucdavis.edu;uga.edu;basisindependent.com;uga.edu;google.com;uga.edu;stanford.edu;microsoft.com;;google.com;uga.edu", "author_num": 15, "aff_unique_index": "0;0;1;2;3;0;4;0;5;0;6;7;5;0", "aff_unique_norm": "University of Georgia;University of California, Santa Barbara;University of Texas at Austin;University of California, Davis;Basis Independent Fremont;Google;Stanford University;Microsoft", "aff_unique_dep": ";;;;Upper School;Google;;Microsoft Corporation", "aff_unique_url": "https://www.uga.edu;https://www.ucsb.edu;https://www.utexas.edu;https://www.ucdavis.edu;;https://www.google.com;https://www.stanford.edu;https://www.microsoft.com", "aff_unique_abbr": "UGA;UCSB;UT Austin;UC Davis;;Google;Stanford;Microsoft", "aff_campus_unique_index": "1;2;3;4;5;6;5", "aff_campus_unique": ";Santa Barbara;Austin;Davis;Fremont;Mountain View;Stanford", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "BioTrove: A Large Curated Image Dataset Enabling AI for Biodiversity", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97806", "id": "DFDCtGQs7S", "proceeding": "", "pdf": "https://openreview.net/pdf?id=DFDCtGQs7S", "openreview": "https://openreview.net/forum?id=DFDCtGQs7S", "poster": "", "project": "", "author_site": "Chih-Hsuan Yang, Benjamin Feuer, Talukder \"Zaki\" Jubery, Zi Deng, Andre Nakkab, Md Zahid Hasan, Shivani Chiranjeevi, Kelly Marshall, Nirmal Baishnab, Asheesh Singh, ARTI SINGH, Soumik Sarkar, Nirav Merchant, Chinmay Hegde, Baskar Ganapathysubramanian", "tldr": "", "abstract": "We introduce BioTrove, the largest publicly accessible dataset designed to advance AI applications in biodiversity. Curated from the iNaturalist platform and vetted to include only research-grade data, BioTrove contains 161.9 million images, offering unprecedented scale and diversity from three primary kingdoms: Animalia (\"animals\"), Fungi (\"fungi\"), and Plantae (\"plants\"), spanning approximately 366.6K species. Each image is annotated with scientific names, taxonomic hierarchies, and common names, providing rich metadata to support accurate AI model development across diverse species and ecosystems.\n\nWe demonstrate the value of BioTrove by releasing a suite of CLIP models trained using a subset of 40 million captioned images, known as BioTrove-Train. This subset focuses on seven categories within the dataset that are underrepresented in standard image recognition models, selected for their critical role in biodiversity and agriculture: Aves (\"birds\"), Arachnida} (\"spiders/ticks/mites\"), Insecta (\"insects\"), Plantae (\"plants\"), Fungi (\"fungi\"), Mollusca (\"snails\"), and Reptilia (\"snakes/lizards\"). To support rigorous assessment, we introduce several new benchmarks and report model accuracy for zero-shot learning across life stages, rare species, confounding species, and multiple taxonomic levels.\n\nWe anticipate that BioTrove will spur the development of AI models capable of supporting digital tools for pest control, crop monitoring, biodiversity assessment, and environmental conservation. These advancements are crucial for ensuring food security, preserving ecosystems, and mitigating the impacts of climate change. BioTrove is publicly available, easily accessible, and ready for immediate use.", "keywords": "AI for biodiversity; AI for agriculture; AI for ecology; vision-language models; fine-grained image recognition.", "primary_area": "", "supplementary_material": "/attachment/67b1a597b0cbe07a9a880f1ecb2bacf78249fa00.pdf", "author": "Chih-Hsuan Yang;Benjamin Feuer;Talukder Zaki Jubery;Zi K. Deng;Andre Nakkab;Md Zahid Hasan;Shivani Chiranjeevi;Kelly O. Marshall;Nirmal Baishnab;Asheesh K Singh;ARTI SINGH;Soumik Sarkar;Nirav Merchant;Chinmay Hegde;Baskar Ganapathysubramanian", "authorids": "~Chih-Hsuan_Yang1;~Benjamin_Feuer1;~Talukder_Zaki_Jubery1;~Zi_K._Deng1;~Andre_Nakkab1;~Md_Zahid_Hasan1;~Shivani_Chiranjeevi1;~Kelly_O._Marshall1;~Nirmal_Baishnab1;~Asheesh_K_Singh1;~ARTI_SINGH2;~Soumik_Sarkar1;~Nirav_Merchant1;~Chinmay_Hegde1;~Baskar_Ganapathysubramanian1", "gender": "F;M;M;M;;M;F;M;M;;F;M;M;M;M", "homepage": ";https://penfever.github.io/;;;;https://zahid-isu.github.io/;;;;;https://www.agron.iastate.edu/people/singh-arti-2/;http://web.me.iastate.edu/soumiks/index.html;https://datascience.arizona.edu/;https://chinmayhegde.github.io/;", "dblp": "306/8924;322/5063.html;;;;;;;;;;33/7053;;39/2056;", "google_scholar": "https://scholar.google.com/citations?hl=zh-TW;VPXu100AAAAJ;P3h1SM0AAAAJ;;;m9QYJ_cAAAAJ;;;TV6QvbEAAAAJ;lzTBffEAAAAJ;P-GSJbwAAAAJ;-rmRjqIAAAAJ;;eJAV17IAAAAJ;R1JIs4cAAAAJ", "orcid": "0000-0003-4841-1135;0000-0002-7938-542X;;0000-0001-5402-2107;;0009-0000-3213-2719;;;0000-0002-3903-1897;;;;0000-0002-1988-8180;;", "linkedin": "chih-hsuan-yang-isu/;benjaminfeuer/;;;andre-nakkab-2bb42b170/;zahid-isu/;shivani-chiranjeevi-10b06717a;kelly-marshall-031947151/;nirmal-baishnab/;;;;;;baskar-ganapathysubramanian-5b22a51a6/?original_referer=", "or_profile": "~Chih-Hsuan_Yang1;~Benjamin_Feuer1;~Talukder_Zaki_Jubery1;~Zi_K._Deng1;~Andre_Nakkab1;~Md_Zahid_Hasan1;~Shivani_Chiranjeevi1;~Kelly_O._Marshall1;~Nirmal_Baishnab1;~Asheesh_K_Singh1;~ARTI_SINGH2;~Soumik_Sarkar1;~Nirav_Merchant1;~Chinmay_Hegde1;~Baskar_Ganapathysubramanian1", "aff": "Iowa State University;Arthur AI;Iowa State University;University of Arizona;New York University;Iowa State University;Iowa State University;New York University;Iowa State University;Iowa State University;Iowa State University;Iowa State University;University of Arizona;New York University;Iowa State University", "aff_domain": "iastate.edu;arthur.ai;iastate.edu;arizona.edu;nyu.edu;iastate.edu;iastate.edu;nyu.edu;iastate.edu;iastate.edu;isu.edu;iastate.edu;arizona.edu;nyu.edu;iastate.edu", "position": "PhD student;Intern;Researcher;PhD student;PhD student;PhD student;PhD student;PhD student;PhD student;Professor;Associate Professor;Full Professor;Principal Researcher;Associate Professor;Professor", "bibtex": "@inproceedings{\nyang2024biotrove,\ntitle={BioTrove: A Large Curated Image Dataset Enabling {AI} for Biodiversity},\nauthor={Chih-Hsuan Yang and Benjamin Feuer and Talukder Zaki Jubery and Zi K. Deng and Andre Nakkab and Md Zahid Hasan and Shivani Chiranjeevi and Kelly O. Marshall and Nirmal Baishnab and Asheesh K Singh and ARTI SINGH and Soumik Sarkar and Nirav Merchant and Chinmay Hegde and Baskar Ganapathysubramanian},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=DFDCtGQs7S}\n}", "github": "", "reviewers": "KAB1;cn6Q;7g25;pSBJ", "pdf_size": 15116831, "rating": "3;6;7;8", "confidence": "5;3;3;4", "wc_summary_and_contributions": "222;45;137;123", "wc_strengths": "102;25;48;86", "wc_improvement": "115;392;50;31", "wc_limitations": "176;6;34;19", "wc_correctness": "1;27;57;16", "wc_clarity": "1;2;32;18", "wc_relation_to_prior_work": "20;1;36;9", "wc_documentation": "37;6;214;28", "wc_additional_feedback": "1;1;1;1", "wc_review": "675;505;609;331", "wc_reply_reviewers": "0;245;24;119", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 1.8708286933869707 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "wc_summary_and_contributions_avg": [ 131.75, 62.79878581628788 ], "wc_strengths_avg": [ 65.25, 30.408674749156695 ], "wc_improvement_avg": [ 147.0, 144.83956641746758 ], "wc_limitations_avg": [ 58.75, 68.41555013299242 ], "wc_correctness_avg": [ 25.25, 20.522853115490545 ], "wc_clarity_avg": [ 13.25, 12.754901018824098 ], "wc_relation_to_prior_work_avg": [ 16.5, 13.124404748406688 ], "wc_documentation_avg": [ 71.25, 83.18465904239795 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 530.0, 129.8961123359741 ], "wc_reply_reviewers_avg": [ 97.0, 96.3405418294915 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 15, 0 ], "corr_rating_confidence": -0.6446583712203042, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1191353760481332260&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "iastate.edu;arthur.ai;iastate.edu;arizona.edu;nyu.edu;iastate.edu;iastate.edu;nyu.edu;iastate.edu;iastate.edu;isu.edu;iastate.edu;arizona.edu;nyu.edu;iastate.edu", "author_num": 15, "aff_unique_index": "0;1;0;2;3;0;0;3;0;0;0;0;2;3;0", "aff_unique_norm": "Iowa State University;Arthur AI;University of Arizona;New York University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.iastate.edu;https://www.arthur.ai;https://www.arizona.edu;https://www.nyu.edu", "aff_unique_abbr": "ISU;Arthur AI;UA;NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "FIRE: A Dataset for Feedback Integration and Refinement Evaluation of Multimodal Models", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97805", "id": "DFb1gwnhQS", "proceeding": "", "pdf": "https://openreview.net/pdf?id=DFb1gwnhQS", "openreview": "https://openreview.net/forum?id=DFb1gwnhQS", "poster": "/media/PosterPDFs/NeurIPS%202024/97805.png?t=1730967992.8618507", "project": "", "author_site": "Pengxiang Li, Zhi Gao, Bofei Zhang, Tao Yuan, Yuwei Wu, Mehrtash Harandi, Yunde Jia, Song-Chun Zhu, Qing Li", "tldr": "", "abstract": "Vision language models (VLMs) have achieved impressive progress in diverse applications, becoming a prevalent research direction. In this paper, we build FIRE, a feedback-refinement dataset, consisting of 1.1M multi-turn conversations that are derived from 27 source datasets, empowering VLMs to spontaneously refine their responses based on user feedback across diverse tasks. To scale up the data collection, FIRE is collected in two components: FIRE-100K and FIRE-1M, where FIRE-100K is generated by GPT-4V, and FIRE-1M is freely generated via models trained on FIRE-100K. Then, we build FIRE-Bench, a benchmark to comprehensively evaluate the feedback-refining capability of VLMs, which contains 11K feedback-refinement conversations as the test data, two evaluation settings, and a model to provide feedback for VLMs. We develop the FIRE-LLaVA model by fine-tuning LLaVA on FIRE-100K and FIRE-1M, which shows remarkable feedback-refining capability on FIRE-Bench and outperforms untrained VLMs by 50%, making more efficient user-agent interactions and underscoring the significance of the FIRE dataset.", "keywords": "vision and language", "primary_area": "", "supplementary_material": "/attachment/3ff7cb847716b16b963aca7e8580175d8fd7c5b8.pdf", "author": "Pengxiang Li;Zhi Gao;Bofei Zhang;Tao Yuan;Yuwei Wu;Mehrtash Harandi;Yunde Jia;Song-Chun Zhu;Qing Li", "authorids": "~Pengxiang_Li1;~Zhi_Gao5;~Bofei_Zhang1;~Tao_Yuan6;~Yuwei_Wu1;~Mehrtash_Harandi2;~Yunde_Jia1;~Song-Chun_Zhu1;~Qing_Li1", "gender": ";M;M;M;M;M;M;M;M", "homepage": ";https://zhigao2017.github.io/;https://bofei5675.github.io/;;https://wu-yuwei-bit.github.io/;https://cs.bit.edu.cn/szdw/jsml/js/jyd/index.htm;https://zhusongchun.net/;http://liqing-ustc.github.io/;https://sites.google.com/site/mehrtashharandi/", "dblp": "17/9973-2;;;;63/5298-1.html;71/2334;10/10313;181/2689-3;92/5921", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;X4FB-QYAAAAJ;H8o9QtUAAAAJ;ZqfUSEkAAAAJ;MUF2MeAAAAAJ;Sl6TV7gAAAAJ;https://scholar.google.com.tw/citations?user=Al8dyb4AAAAJ;iwdFZBEAAAAJ;--M1XEkAAAAJ", "orcid": ";;;;0000-0002-0263-925X;;;;0000-0002-6937-6300", "linkedin": ";;;;;;;;mehrtash-harandi-b99358155/", "or_profile": "~Pengxiang_Li1;~Zhi_Gao5;~Bofei_Zhang1;~Tao_Yuan6;~Yuwei_Wu1;~Yunde_Jia1;~Song-Chun_Zhu1;~Qing_Li1;~Mehrtash_T._Harandi1", "aff": "Beijing Institute of Technology;Beijing Institute for General Artificial Intelligence;Beijing Institute for General Artificial Intelligence;Beijing Institute for General Artificial Intelligence;Beijing Institute of Technology;Shenzhen MSU-BIT University;Peking University;Beijing Institute for General Artificial Intelligence (BIGAI);Australian National University", "aff_domain": "bit.edu.cn;bigai.ai;eng.bigai.ai;bigai.ai;bit.edu.cn;smbu.edu.cn;pku.edu.cn;bigai.ai;anu.edu.au", "position": "PhD student;Researcher;Researcher;Researcher;Associate Professor;Full Professor;Full Professor;Researcher;Adjunct", "bibtex": "@inproceedings{\nli2024fire,\ntitle={{FIRE}: A Dataset for Feedback Integration and Refinement Evaluation of Multimodal Models},\nauthor={Pengxiang Li and Zhi Gao and Bofei Zhang and Tao Yuan and Yuwei Wu and Mehrtash Harandi and Yunde Jia and Song-Chun Zhu and Qing Li},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=DFb1gwnhQS}\n}", "github": "", "reviewers": "q6Ko;2z6w", "pdf_size": 13276754, "rating": "7;8", "confidence": "3;3", "wc_summary_and_contributions": "72;43", "wc_strengths": "48;29", "wc_improvement": "169;35", "wc_limitations": "13;13", "wc_correctness": "13;11", "wc_clarity": "73;5", "wc_relation_to_prior_work": "9;12", "wc_documentation": "6;25", "wc_additional_feedback": "1;1", "wc_review": "404;174", "wc_reply_reviewers": "49;0", "wc_reply_authors": "42;0", "reply_reviewers": "1;0", "reply_authors": "3;1", "rating_avg": [ 7.5, 0.5 ], "confidence_avg": [ 3.0, 0.0 ], "wc_summary_and_contributions_avg": [ 57.5, 14.5 ], "wc_strengths_avg": [ 38.5, 9.5 ], "wc_improvement_avg": [ 102.0, 67.0 ], "wc_limitations_avg": [ 13.0, 0.0 ], "wc_correctness_avg": [ 12.0, 1.0 ], "wc_clarity_avg": [ 39.0, 34.0 ], "wc_relation_to_prior_work_avg": [ 10.5, 1.5 ], "wc_documentation_avg": [ 15.5, 9.5 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 289.0, 115.0 ], "wc_reply_reviewers_avg": [ 24.5, 24.5 ], "wc_reply_authors_avg": [ 21.0, 21.0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 1.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8009184108816960986&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "bit.edu.cn;bigai.ai;eng.bigai.ai;bigai.ai;bit.edu.cn;smbu.edu.cn;pku.edu.cn;bigai.ai;anu.edu.au", "author_num": 9, "aff_unique_index": "0;1;1;1;0;2;3;1;4", "aff_unique_norm": "Beijing Institute of Technology;Beijing Institute for General Artificial Intelligence;Shenzhen MSU-BIT University;Peking University;Australian National University", "aff_unique_dep": ";;;;", "aff_unique_url": "http://www.bit.edu.cn/;http://www.bigaiai.org/;http://www.msubit.edu.cn;http://www.pku.edu.cn;https://www.anu.edu.au", "aff_unique_abbr": "BIT;BIGAI;;Peking U;ANU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;0;0;0;0;0;0;0;1", "aff_country_unique": "China;Australia" }, { "title": "The PRISM Alignment Dataset: What Participatory, Representative and Individualised Human Feedback Reveals About the Subjective and Multicultural Alignment of Large Language Models", "status": "Oral", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97804", "id": "DFr5hteojx", "proceeding": "", "pdf": "https://openreview.net/pdf?id=DFr5hteojx", "openreview": "https://openreview.net/forum?id=DFr5hteojx", "poster": "", "project": "", "author_site": "Hannah Rose Kirk, Alexander Whitefield, Paul Rottger, Andrew M. Bean, Katerina Margatina, Rafael Mosquera-Gomez, Juan Ciro, Max Bartolo, Adina Williams, He He, Bertie Vidgen, Scott Hale", "tldr": "", "abstract": "Human feedback is central to the alignment of Large Language Models (LLMs). However, open questions remain about the methods (how), domains (where), people (who) and objectives (to what end) of feedback processes. To navigate these questions, we introduce PRISM, a new dataset which maps the sociodemographics and stated preferences of 1,500 diverse participants from 75 countries, to their contextual preferences and fine-grained feedback in 8,011 live conversations with 21 LLMs. With PRISM, we contribute (i) wider geographic and demographic participation in feedback; (ii) census-representative samples for two countries (UK, US); and (iii) individualised ratings that link to detailed participant profiles, permitting personalisation and attribution of sample artefacts. We target subjective and multicultural perspectives on value-laden and controversial issues, where we expect interpersonal and cross-cultural disagreement. We use PRISM in three case studies to demonstrate the need for careful consideration of which humans provide alignment data.", "keywords": "Dataset;Alignment;Human Feedback;RLHF;Participation;Conversational AI;Preferences", "primary_area": "", "supplementary_material": "", "author": "Hannah Rose Kirk;Alexander Whitefield;Paul R\u00f6ttger;Andrew Michael Bean;Katerina Margatina;Rafael Mosquera;Juan Manuel Ciro;Max Bartolo;Adina Williams;He He;Bertie Vidgen;Scott A. Hale", "authorids": "~Hannah_Rose_Kirk1;~Alexander_Whitefield1;~Paul_R\u00f6ttger2;~Andrew_Michael_Bean1;~Katerina_Margatina1;~Rafael_Mosquera1;~Juan_Manuel_Ciro1;~Max_Bartolo1;~Adina_Williams1;~He_He2;~Bertie_Vidgen1;~Scott_A._Hale1", "gender": "F;;F;M;M;;F;M;Not Specified;M;F;M", "homepage": "https://www.hannahrosekirk.com/;https://sites.google.com/view/alexander-whitefield/home?authuser=0;https://katerinamargatina.github.io/;;;https://maxbartolo.com;http://www.adinawilliams.com;https://www.turing.ac.uk/people/researchers/bertie-vidgen;http://scott.hale.us;https://www.am-bean.github.io;http://hhexiy.github.io;https://paulrottger.com/", "dblp": "284/9434;;227/2313;;;227/3290;199/2104;;32/10840;244/9323;08/8618-1;282/4243", "google_scholar": "Fha8ldEAAAAJ;tJtxyJAAAAAJ;517t5gEAAAAJ;XC9DJhUAAAAJ;;jPSWYn4AAAAJ;MUtbKt0AAAAJ;https://scholar.google.co.uk/citations?user=yRhnVoIAAAAJ;PBJL9ZEAAAAJ;https://scholar.google.com.mx/citations?hl=en;https://scholar.google.com/citations?hl=en;7rpmd9cAAAAJ", "orcid": "0000-0002-7419-5993;;;0009-0009-0812-6330;;0009-0007-3301-7895;0000-0001-5281-3343;;0000-0002-6894-4951;0000-0001-8439-5975;;0009-0008-7115-6893", "linkedin": "hannah-rose-kirk;;katerina-margatina/;rafael-mosquera/;https://co.linkedin.com/in/juan-manuel-ciro-torres-471015aa;maxbartolo/;;bertie-vidgen-001/;https://linkedin.com/in/computermacgyver;;;paul-rottger/", "or_profile": "~Hannah_Rose_Kirk1;~Alexander_Whitefield1;~Katerina_Margatina1;~Rafael_Mosquera1;~Juan_Manuel_Ciro1;~Max_Bartolo1;~Adina_Williams1;~Bertie_Vidgen1;~Scott_A._Hale1;~Andrew_Bean1;~He_He1;~Paul_Rottger1", "aff": "University of Oxford;The Wharton School, University of Pennsylvania;University of Sheffield;Universidad de Los Andes;;University College London;FAIR (Meta Platforms Inc.);MLCommons;Alan Turing Institute;University of Oxford;New York University;Bocconi University", "aff_domain": "ox.ac.uk;wharton.upenn.edu;sheffield.ac.uk;uniandes.edu.co;;ucl.ac.uk;facebook.com;mlcommons.org;turing.ac.uk;ox.ac.uk;nyu.edu;unibocconi.it", "position": "PhD student;PhD student;PhD student;MS student;;PhD student;Research Scientist;Evaluation lead;Fellow;PhD student;Assistant Professor;Postdoc", "bibtex": "@inproceedings{\nkirk2024the,\ntitle={The {PRISM} Alignment Dataset: What Participatory, Representative and Individualised Human Feedback Reveals About the Subjective and Multicultural Alignment of Large Language Models},\nauthor={Hannah Rose Kirk and Alexander Whitefield and Paul R{\\\"o}ttger and Andrew Michael Bean and Katerina Margatina and Rafael Mosquera and Juan Manuel Ciro and Max Bartolo and Adina Williams and He He and Bertie Vidgen and Scott A. Hale},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=DFr5hteojx}\n}", "github": "", "reviewers": "FEUL;zHUk;t26D", "pdf_size": 10930630, "rating": "9;9;10", "confidence": "4;3;3", "wc_summary_and_contributions": "89;46;121", "wc_strengths": "88;47;139", "wc_improvement": "107;85;87", "wc_limitations": "26;6;75", "wc_correctness": "2;10;59", "wc_clarity": "2;6;31", "wc_relation_to_prior_work": "13;46;31", "wc_documentation": "15;2;15", "wc_additional_feedback": "1;1;1", "wc_review": "343;249;559", "wc_reply_reviewers": "64;95;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;0", "reply_authors": "2;1;1", "rating_avg": [ 9.333333333333334, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 85.33333333333333, 30.728199137310703 ], "wc_strengths_avg": [ 91.33333333333333, 37.63272807307786 ], "wc_improvement_avg": [ 93.0, 9.93310961716756 ], "wc_limitations_avg": [ 35.666666666666664, 28.986586936412888 ], "wc_correctness_avg": [ 23.666666666666668, 25.197001585285676 ], "wc_clarity_avg": [ 13.0, 12.832251036613439 ], "wc_relation_to_prior_work_avg": [ 30.0, 13.490737563232042 ], "wc_documentation_avg": [ 10.666666666666666, 6.128258770283412 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 383.6666666666667, 129.78272441105386 ], "wc_reply_reviewers_avg": [ 53.0, 39.5558676641869 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3405767099451095040&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "ox.ac.uk;wharton.upenn.edu;sheffield.ac.uk;uniandes.edu.co;;ucl.ac.uk;facebook.com;mlcommons.org;turing.ac.uk;ox.ac.uk;nyu.edu;unibocconi.it", "author_num": 12, "aff_unique_index": "0;1;2;3;4;5;6;7;0;8;9", "aff_unique_norm": "University of Oxford;University of Pennsylvania;University of Sheffield;Universidad de los Andes;University College London;Meta;MLCommons;Alan Turing Institute;New York University;Bocconi University", "aff_unique_dep": ";The Wharton School;;;;FAIR;;;;", "aff_unique_url": "https://www.ox.ac.uk;https://www.wharton.upenn.edu;https://www.sheffield.ac.uk;https://www.udea.edu.co;https://www.ucl.ac.uk;https://www.meta.com;https://mlcommons.org;https://www.turing.ac.uk;https://www.nyu.edu;https://www.bocconi.edu", "aff_unique_abbr": "Oxford;UPenn Wharton;Sheffield;UdeA;UCL;Meta;MLCommons;ATI;NYU;Bocconi", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;2;0;1;1;0;0;1;3", "aff_country_unique": "United Kingdom;United States;Colombia;Italy" }, { "title": "GaussianCube: A Structured and Explicit Radiance Representation for 3D Generative Modeling", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96097", "id": "DG2f1rVEM5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=DG2f1rVEM5", "openreview": "https://openreview.net/forum?id=DG2f1rVEM5", "poster": "/media/PosterPDFs/NeurIPS%202024/96097.png?t=1733473838.1279438", "project": "", "author_site": "Bowen Zhang, Yiji Cheng, Jiaolong Yang, Chunyu Wang, Feng Zhao, Yansong Tang, Dong Chen, Baining Guo", "tldr": "", "abstract": "We introduce a radiance representation that is both structured and fully explicit and thus greatly facilitates 3D generative modeling. Existing radiance representations either require an implicit feature decoder, which significantly degrades the modeling power of the representation, or are spatially unstructured, making them difficult to integrate with mainstream 3D diffusion methods. We derive GaussianCube by first using a novel densification-constrained Gaussian fitting algorithm, which yields high-accuracy fitting using a fixed number of free Gaussians, and then rearranging these Gaussians into a predefined voxel grid via Optimal Transport. Since GaussianCube is a structured grid representation, it allows us to use standard 3D U-Net as our backbone in diffusion modeling without elaborate designs. More importantly, the high-accuracy fitting of the Gaussians allows us to achieve a high-quality representation with orders of magnitude fewer parameters than previous structured representations for comparable quality, ranging from one to two orders of magnitude. The compactness of GaussianCube greatly eases the difficulty of 3D generative modeling. Extensive experiments conducted on unconditional and class-conditioned object generation, digital avatar creation, and text-to-3D synthesis all show that our model achieves state-of-the-art generation results both qualitatively and quantitatively, underscoring the potential of GaussianCube as a highly accurate and versatile radiance representation for 3D generative modeling.", "keywords": "3D Generative Modeling; Gaussian Splatting; Optimal Transport", "primary_area": "generative_models", "supplementary_material": "/attachment/d60c98eaf94ce50074b31aa952208b7baa37fa1c.zip", "author": "Bowen Zhang;Yiji Cheng;Jiaolong Yang;Chunyu Wang;Feng Zhao;Yansong Tang;Dong Chen;Baining Guo", "authorids": "~Bowen_Zhang8;~Yiji_Cheng1;~Jiaolong_Yang3;~Chunyu_Wang1;~Feng_Zhao6;~Yansong_Tang1;~Dong_Chen1;~Baining_Guo1", "gender": "M;;Not Specified;M;M;M;M;M", "homepage": "http://home.ustc.edu.cn/~zhangbowen;;http://jlyang.org/;https://www.chunyuwang.org/;https://bivlab123.github.io/;https://andytang15.github.io/;https://www.microsoft.com/en-us/research/people/bainguo/;http://www.dongchen.pro/", "dblp": "85/7433-10;;121/6218;63/7235;181/2734-4;214/9568;;44/3371-3", "google_scholar": "BbvmxdIAAAAJ;Plo8ZSYAAAAJ;GuqoolgAAAAJ;https://scholar.google.co.jp/citations?user=VXQV5xwAAAAJ;https://scholar.google.co.uk/citations?hl=en;TIbistUAAAAJ;h4kYmRYAAAAJ;https://scholar.google.com.hk/citations?user=_fKSYOwAAAAJ", "orcid": ";;;;0000-0001-6767-8105;;;", "linkedin": ";yiji-cheng-a8b922213/;;;;;;", "or_profile": "~Bowen_Zhang8;~Yiji_Cheng1;~Jiaolong_Yang3;~Chunyu_Wang1;~Feng_Zhao6;~Yansong_Tang1;~Baining_Guo1;~Dong_Chen4", "aff": "University of Science and Technology of China;Tsinghua University;Microsoft;Microsoft Research Asia;University of Science and Technology of China;Tsinghua University;Microsoft Research;Microsoft", "aff_domain": "ustc.edu.cn;tsinghua.edu.cn;microsoft.com;microsoft.com;ustc.edu.cn;tsinghua.edu.cn;microsoft.com;microsoft.com", "position": "PhD student;MS student;Researcher;Researcher;Full Professor;Assistant Professor;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nzhang2024gaussiancube,\ntitle={GaussianCube: A Structured and Explicit Radiance Representation for 3D Generative Modeling},\nauthor={Bowen Zhang and Yiji Cheng and Jiaolong Yang and Chunyu Wang and Feng Zhao and Yansong Tang and Dong Chen and Baining Guo},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=DG2f1rVEM5}\n}", "github": "", "reviewers": "E9Nw;hQzy;u6Hp;Z6fD", "pdf_size": 36438474, "rating": "6;6;7;7", "confidence": "4;4;5;4", "soundness": "4;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "180;106;111;55", "wc_strengths": "89;137;163;58", "wc_weaknesses": "186;92;210;60", "wc_questions": "41;43;52;25", "wc_limitations": "53;1;14;1", "wc_review": "549;379;550;199", "wc_reply_reviewers": "0;0;90;0", "wc_reply_authors": "49;49;82;49", "reply_reviewers": "0;0;1;0", "reply_authors": "2;2;2;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 113.0, 44.45784520194383 ], "wc_strengths_avg": [ 111.75, 40.837329736406616 ], "wc_weaknesses_avg": [ 137.0, 62.617888817813075 ], "wc_questions_avg": [ 40.25, 9.730750228014282 ], "wc_limitations_avg": [ 17.25, 21.3116752039815 ], "wc_review_avg": [ 419.25, 144.96615984428917 ], "wc_reply_reviewers_avg": [ 22.5, 38.97114317029974 ], "wc_reply_authors_avg": [ 57.25, 14.289419162443238 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6086860197627020799&as_sdt=20000005&sciodt=0,21&hl=en", "gs_version_total": 2, "email": "ustc.edu.cn;tsinghua.edu.cn;microsoft.com;microsoft.com;ustc.edu.cn;tsinghua.edu.cn;microsoft.com;microsoft.com", "author_num": 8, "aff_unique_index": "0;1;2;2;0;1;2;2", "aff_unique_norm": "University of Science and Technology of China;Tsinghua University;Microsoft", "aff_unique_dep": ";;Microsoft Corporation", "aff_unique_url": "http://www.ustc.edu.cn;https://www.tsinghua.edu.cn;https://www.microsoft.com", "aff_unique_abbr": "USTC;THU;Microsoft", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;0;1;0;0;0;1;1", "aff_country_unique": "China;United States" }, { "id": "DG4k8PJ8qv", "title": "POGEMA: A Benchmark Platform for Cooperative Multi-Agent Navigation", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "Multi-agent reinforcement learning (MARL) has recently excelled in solving challenging cooperative and competitive multi-agent problems in various environments with, mostly, few agents and vector observations. Moreover, a range of crucial robotics-related tasks, such as multi-robot navigation and obstacle avoidance, that have been conventionally approached with the classical non-learnable methods (e.g., heuristic search) is currently suggested to be solved by the learning-based or hybrid methods. Still, in this domain, it is hard, not to say impossible, to conduct a fair comparison between classical, learning-based, and hybrid approaches due to the lack of a unified framework that supports both learning and evaluation. To this end, we introduce POGEMA, a set of comprehensive tools that includes a fast environment for learning, a generator of problem instances, the collection of pre-defined ones, a visualization toolkit, and a benchmarking tool that allows automated evaluation. We introduce and specify an evaluation protocol defining a range of domain-related metrics computed on the basics of the primary evaluation indicators (such as success rate and path length), allowing a fair multi-fold comparison. The results of such a comparison, which involves seven state-of-the-art MARL, search-based, and hybrid methods, are presented.", "keywords": "MAPF;MARL;RL;Heuristic search", "primary_area": "", "supplementary_material": "/attachment/ba353cbb1ddbbe51fbd9d894bb4db8679cab3002.pdf", "author": "Alexey Skrynnik;Anton Andreychuk;Anatolii Borzilov;Alexander Chernyavskiy;Konstantin Yakovlev;Aleksandr Panov", "authorids": "~Alexey_Skrynnik1;~Anton_Andreychuk1;~Anatolii_Borzilov1;~Alexander_Chernyavskiy2;~Konstantin_Yakovlev1;~Aleksandr_Panov1", "gender": "M;M;;M;M;M", "homepage": "https://tviskaron.github.io;;https://github.com/tolyan3212;;http://kyakovlev.me;http://grafft.github.io", "dblp": "222/2698;185/0555;;274/2450;150/4804;177/9975", "google_scholar": "ITgs9IMAAAAJ;xtcLdhcAAAAJ;;;Tw0A27kAAAAJ;https://scholar.google.ru/citations?hl=ru", "orcid": "0000-0001-9243-1622;;;0000-0003-4231-3059;0000-0002-4377-321X;0000-0002-9747-3837", "linkedin": "alexey-skrynnik-65a125168/;;;alexander-chernyavskiy-b1a1b7252/;;", "or_profile": "~Alexey_Skrynnik1;~Anton_Andreychuk1;~Anatolii_Borzilov1;~Alexander_Chernyavskiy2;~Konstantin_Yakovlev1;~Aleksandr_Panov1", "aff": "AIRI; Artificial Intelligence Research Institute;Moscow State Institute of Radiotechnics, Electronics and Automation (Technical University);Moscow Institute of Physics and Technology;Federal Research Center for Computer Science and Control of Russian Academy of Sciences;Federal Research Center \u00abComputer Science and Control\u00bb of Russian Academy of Sciences", "aff_domain": "airi.net;airi.net;mirea.ru;phystech.edu;isa.ru;frccsc.ru", "position": "Senior Research Scientist;Researcher;MS student;MS student;Principal Researcher;Principal Researcher", "bibtex": "@misc{\nanonymous2024pogema,\ntitle={{POGEMA}: A Benchmark Platform for Cooperative Multi-Agent Navigation},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=DG4k8PJ8qv}\n}", "github": "", "project": "", "reviewers": "5xNj;BG3N;dFGp", "site": "https://openreview.net/forum?id=DG4k8PJ8qv", "pdf_size": 346603, "rating": "6;6;7", "confidence": "3;3;3", "wc_summary_and_contributions": "54;40;85", "wc_strengths": "13;12;104", "wc_improvement": "112;15;473", "wc_limitations": "1;1;46", "wc_correctness": "123;1;75", "wc_clarity": "17;1;1", "wc_relation_to_prior_work": "8;1;12", "wc_documentation": "4;1;20", "wc_additional_feedback": "1;1;1", "wc_review": "333;73;817", "wc_reply_reviewers": "93;0;0", "wc_reply_authors": "711;0;1288", "reply_reviewers": "1;0;0", "reply_authors": "3;1;3", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.0 ], "wc_summary_and_contributions_avg": [ 59.666666666666664, 18.80307303489394 ], "wc_strengths_avg": [ 43.0, 43.135445594854666 ], "wc_improvement_avg": [ 200.0, 197.06005852700508 ], "wc_limitations_avg": [ 16.0, 21.213203435596427 ], "wc_correctness_avg": [ 66.33333333333333, 50.18189137749017 ], "wc_clarity_avg": [ 6.333333333333333, 7.542472332656507 ], "wc_relation_to_prior_work_avg": [ 7.0, 4.546060565661952 ], "wc_documentation_avg": [ 8.333333333333334, 8.339997335464536 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 407.6666666666667, 308.2913484928754 ], "wc_reply_reviewers_avg": [ 31.0, 43.840620433565945 ], "wc_reply_authors_avg": [ 666.3333333333334, 526.7715085520688 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.9428090415820634 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16070700149448129777&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;1;2;3;3", "aff_unique_norm": "Artificial Intelligence Research Institute;Moscow State Institute of Radiotechnics, Electronics and Automation;Moscow Institute of Physics and Technology;Russian Academy of Sciences", "aff_unique_dep": ";Technical University;;Federal Research Center for Computer Science and Control", "aff_unique_url": "https://www.airi.jp;http://www.msi\u0440\u0430.ru;https://www.mipt.ru/en;https://www.ras.ru", "aff_unique_abbr": "AIRI;MSI\u0420\u0410;MIPT;RAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;2;2;2", "aff_country_unique": "Japan;United States;Russian Federation" }, { "id": "DHVqAQ9DHy", "title": "Posterior Label Smoothing for Node Classification", "track": "main", "status": "Reject", "tldr": "", "abstract": "Soft labels can improve the generalization of a neural network classifier in many domains, such as image classification. Despite its success, the current literature has overlooked the efficiency of label smoothing in node classification with graph-structured data. In this work, we propose a simple yet effective label smoothing for the transductive node classification task. We design the soft label to encapsulate the local context of the target node through the neighborhood label distribution. We apply the smoothing method for seven baseline models to show its effectiveness. The label smoothing methods improve the classification accuracy in 10 node classification datasets in most cases. In the following analysis, we find that incorporating global label statistics in posterior computation is the key to the success of label smoothing. Further investigation reveals that the soft labels mitigate overfitting during training, leading to better generalization performance.", "keywords": "node classification;label smoothing", "primary_area": "graph_neural_networks", "supplementary_material": "/attachment/ed813ea0c87f64bd8d1f38fa1a7e24c2e0fb9ff7.zip", "author": "Jaeseung Heo;MoonJeong Park;Dongwoo Kim", "authorids": "~Jaeseung_Heo1;~MoonJeong_Park1;~Dongwoo_Kim1", "gender": "M;;M", "homepage": "https://ml.postech.ac.kr/;https://jeong27.github.io/;http://dongwookim-ml.github.io/", "dblp": "348/9020;321/3773;15/398-2", "google_scholar": ";https://scholar.google.com/citations?hl=ko;https://scholar.google.co.kr/citations?user=RkspD6IAAAAJ", "orcid": ";;0000-0002-6515-5260", "linkedin": ";moonjeong-park-97ba85258/;", "or_profile": "~Jaeseung_Heo1;~MoonJeong_Park1;~Dongwoo_Kim1", "aff": "Pohang University of Science and Technology;Pohang University of Science and Technology;POSTECH", "aff_domain": "postech.edu;postech.ac.kr;postech.ac.kr", "position": "MS student;PhD student;Assistant Professor", "bibtex": "@misc{\nanonymous2024posterior,\ntitle={Posterior Label Smoothing for Node Classification},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=DHVqAQ9DHy}\n}", "github": "", "project": "", "reviewers": "k4jx;ESPd;yTqG;Qv2Y", "site": "https://openreview.net/forum?id=DHVqAQ9DHy", "pdf_size": 1446390, "rating": "2;5;6;7", "confidence": "4;4;4;3", "soundness": "2;2;3;3", "novelty": "1;2;3;2", "presentation": "2;3;3;3", "wc_summary": "12;60;126;40", "wc_strengths": "10;57;80;55", "wc_weaknesses": "186;176;345;93", "wc_questions": "2;148;234;80", "wc_limitations": "14;1;29;20", "wc_review": "224;442;814;288", "wc_reply_reviewers": "69;12;685;10", "wc_reply_authors": "213;0;1242;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;3;1", "rating_avg": [ 5.0, 1.8708286933869707 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 59.5, 42.00892762258994 ], "wc_strengths_avg": [ 50.5, 25.362373705944798 ], "wc_weaknesses_avg": [ 200.0, 91.16742839413646 ], "wc_questions_avg": [ 116.0, 85.49853799919622 ], "wc_limitations_avg": [ 16.0, 10.173494974687902 ], "wc_review_avg": [ 442.0, 228.92356803090414 ], "wc_reply_reviewers_avg": [ 194.0, 284.4670455430646 ], "wc_reply_authors_avg": [ 363.75, 514.4600932045167 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.6172133998483676, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:EvsiEEVqSZkJ:scholar.google.com/&scioq=Posterior+Label+Smoothing+for+Node+Classification&hl=en&as_sdt=0,44", "gs_version_total": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Pohang University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.postech.ac.kr", "aff_unique_abbr": "POSTECH", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Pohang", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "Pre-Trained Multi-Goal Transformers with Prompt Optimization for Efficient Online Adaptation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96096", "id": "DHucngOEe3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=DHucngOEe3", "openreview": "https://openreview.net/forum?id=DHucngOEe3", "poster": "/media/PosterPDFs/NeurIPS%202024/96096.png?t=1731946873.0427732", "project": "", "author_site": "Haoqi Yuan, Yuhui Fu, Feiyang Xie, Zongqing Lu", "tldr": "", "abstract": "Efficiently solving unseen tasks remains a challenge in reinforcement learning (RL), especially for long-horizon tasks composed of multiple subtasks. \nPre-training policies from task-agnostic datasets has emerged as a promising approach, yet existing methods still necessitate substantial interactions via RL to learn new tasks.\nWe introduce MGPO, a method that leverages the power of Transformer-based policies to model sequences of goals, enabling efficient online adaptation through prompt optimization.\nIn its pre-training phase, MGPO utilizes hindsight multi-goal relabeling and behavior cloning. This combination equips the policy to model diverse long-horizon behaviors that align with varying goal sequences.\nDuring online adaptation, the goal sequence, conceptualized as a prompt, is optimized to improve task performance. We adopt a multi-armed bandit framework for this process, enhancing prompt selection based on the returns from online trajectories.\nOur experiments across various environments demonstrate that MGPO holds substantial advantages in sample efficiency, online adaptation performance, robustness, and interpretability compared with existing methods.", "keywords": "reinforcement learning;pre-training;Transformers", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Haoqi Yuan;Yuhui Fu;Feiyang Xie;Zongqing Lu", "authorids": "~Haoqi_Yuan1;~Yuhui_Fu1;~Feiyang_Xie1;~Zongqing_Lu2", "gender": "M;;;", "homepage": ";;https://github.com/Xiefeiy;", "dblp": "254/2084;;301/5891;", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Haoqi_Yuan1;~Yuhui_Fu1;~Feiyang_Xie1;~Zongqing_Lu2", "aff": "Peking University;;Peking University;", "aff_domain": "pku.edu.cn;;pku.edu.cn;", "position": "PhD student;;Undergrad student;", "bibtex": "@inproceedings{\nyuan2024pretrained,\ntitle={Pre-Trained Multi-Goal Transformers with Prompt Optimization for Efficient Online Adaptation},\nauthor={Haoqi Yuan and Yuhui Fu and Feiyang Xie and Zongqing Lu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=DHucngOEe3}\n}", "github": "", "reviewers": "YjJH;JzeR;V4S2", "pdf_size": 1692575, "rating": "6;6;7", "confidence": "3;5;4", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "2;4;3", "wc_summary": "38;40;48", "wc_strengths": "23;45;65", "wc_weaknesses": "197;56;2", "wc_questions": "31;182;249", "wc_limitations": "15;35;2", "wc_review": "304;358;366", "wc_reply_reviewers": "25;19;18", "wc_reply_authors": "10;13;13", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 42.0, 4.320493798938574 ], "wc_strengths_avg": [ 44.333333333333336, 17.15290710702481 ], "wc_weaknesses_avg": [ 85.0, 82.20705565825844 ], "wc_questions_avg": [ 154.0, 91.1738266536327 ], "wc_limitations_avg": [ 17.333333333333332, 13.572848714334889 ], "wc_review_avg": [ 342.6666666666667, 27.535835237417842 ], "wc_reply_reviewers_avg": [ 20.666666666666668, 3.0912061651652345 ], "wc_reply_authors_avg": [ 12.0, 1.4142135623730951 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:SFmFodbGJQMJ:scholar.google.com/&scioq=Pre-Trained+Multi-Goal+Transformers+with+Prompt+Optimization+for+Efficient+Online+Adaptation&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "pku.edu.cn;;pku.edu.cn;", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Human-Aware Vision-and-Language Navigation: Bridging Simulation to Reality with Dynamic Human Interactions", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97803", "id": "DJVyRhT8nP", "proceeding": "", "pdf": "https://openreview.net/pdf?id=DJVyRhT8nP", "openreview": "https://openreview.net/forum?id=DJVyRhT8nP", "poster": "/media/PosterPDFs/NeurIPS%202024/97803.png?t=1731644932.0128846", "project": "", "author_site": "Minghan Li, Heng Li, Zhi-Qi Cheng, Yifei Dong, Yuxuan Zhou, Jun-Yan He, Qi Dai, Teruko Mitamura, Alexander Hauptmann", "tldr": "", "abstract": "Vision-and-Language Navigation (VLN) aims to develop embodied agents that navigate based on human instructions. However, current VLN frameworks often rely on static environments and optimal expert supervision, limiting their real-world applicability. To address this, we introduce Human-Aware Vision-and-Language Navigation (HA-VLN), extending traditional VLN by incorporating dynamic human activities and relaxing key assumptions. We propose the Human-Aware 3D (HA3D) simulator, which combines dynamic human activities with the Matterport3D dataset, and the Human-Aware Room-to-Room (HA-R2R) dataset, extending R2R with human activity descriptions. To tackle HA-VLN challenges, we present the Expert-Supervised Cross-Modal (VLN-CM) and Non-Expert-Supervised Decision Transformer (VLN-DT) agents, utilizing cross-modal fusion and diverse training strategies for effective navigation in dynamic human environments. A comprehensive evaluation, including metrics considering human activities, and systematic analysis of HA-VLN's unique challenges, underscores the need for further research to enhance HA-VLN agents' real-world robustness and adaptability. Ultimately, this work provides benchmarks and insights for future research on embodied AI and Sim2Real transfer, paving the way for more realistic and applicable VLN systems in human-populated environments.", "keywords": "Visual-and-Language Navigation;Human-Aware Navigation", "primary_area": "", "supplementary_material": "", "author": "Heng Li;Minghan Li;Zhi-Qi Cheng;Yifei Dong;Yuxuan Zhou;Jun-Yan He;Qi Dai;Teruko Mitamura;Alexander G Hauptmann", "authorids": "~Heng_Li13;~Minghan_Li5;~Zhi-Qi_Cheng1;~Yifei_Dong1;~Yuxuan_Zhou2;~Jun-Yan_He2;~Qi_Dai4;~Teruko_Mitamura1;~Alexander_G_Hauptmann1", "gender": "M;Not Specified;M;M;M;F;M;M;M", "homepage": ";https://faculty.washington.edu/zhiqics/;;;;http://www.cs.cmu.edu/~teruko;;;https://github.com/lpercc", "dblp": ";188/1193;;172/9870-4.html;35/5587-1.html;90/785;h/AlexanderGHauptmann;173/3747;", "google_scholar": ";uB2He2UAAAAJ;;ooVdh_kAAAAJ;NSJY12IAAAAJ;gjsxBCkAAAAJ;https://scholar.google.co.uk/citations?user=Py54GcEAAAAJ;bjNZqGAAAAAJ;", "orcid": ";0000-0002-1720-2085;0000-0002-3481-0154;;;;;;", "linkedin": "hengdylanli;zhiqicheng/;yifeidong2616/;;;;;;", "or_profile": "~Heng_Li13;~Zhi-Qi_Cheng1;~Yifei_Dong1;~Yuxuan_Zhou2;~Qi_Dai4;~Teruko_Mitamura1;~Alexander_G_Hauptmann1;~HE_JUNYAN1;~MinghanLi1", "aff": "Carnegie Mellon University;Carnegie Mellon University;University of Washington;Universit\u00e4t Mannheim;Microsoft Research Asia;Carnegie Mellon University;School of Computer Science, Carnegie Mellon University;Alibaba Group;Carnegie Mellon University", "aff_domain": "cmu.edu;cmu.edu;uw.edu;uni-mannheim.de;microsoft.com;cmu.edu;cs.cmu.edu;alibaba-inc.com;cmu.edu", "position": "MS student;Project Scientist & Instructor;Intern;PhD student;Researcher;Research Professor;Full Professor;Researcher;Intern", "bibtex": "@inproceedings{\nli2024humanaware,\ntitle={Human-Aware Vision-and-Language Navigation: Bridging Simulation to Reality with Dynamic Human Interactions},\nauthor={Heng Li and Minghan Li and Zhi-Qi Cheng and Yifei Dong and Yuxuan Zhou and Jun-Yan He and Qi Dai and Teruko Mitamura and Alexander G Hauptmann},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=DJVyRhT8nP}\n}", "github": "", "reviewers": "Np5K", "pdf_size": 15289346, "rating": "8", "confidence": "4", "wc_summary_and_contributions": "42", "wc_strengths": "61", "wc_improvement": "79", "wc_limitations": "2", "wc_correctness": "9", "wc_clarity": "5", "wc_relation_to_prior_work": "63", "wc_documentation": "18", "wc_additional_feedback": "1", "wc_review": "280", "wc_reply_reviewers": "0", "wc_reply_authors": "0", "reply_reviewers": "0", "reply_authors": "1", "rating_avg": [ 8.0, 0.0 ], "confidence_avg": [ 4.0, 0.0 ], "wc_summary_and_contributions_avg": [ 42.0, 0.0 ], "wc_strengths_avg": [ 61.0, 0.0 ], "wc_improvement_avg": [ 79.0, 0.0 ], "wc_limitations_avg": [ 2.0, 0.0 ], "wc_correctness_avg": [ 9.0, 0.0 ], "wc_clarity_avg": [ 5.0, 0.0 ], "wc_relation_to_prior_work_avg": [ 63.0, 0.0 ], "wc_documentation_avg": [ 18.0, 0.0 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 280.0, 0.0 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14071137518898733880&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "cmu.edu;cmu.edu;uw.edu;uni-mannheim.de;microsoft.com;cmu.edu;cs.cmu.edu;alibaba-inc.com;cmu.edu", "author_num": 9, "aff_unique_index": "0;0;1;2;3;0;0;4;0", "aff_unique_norm": "Carnegie Mellon University;University of Washington;University of Mannheim;Microsoft;Alibaba Group", "aff_unique_dep": ";;;Research;", "aff_unique_url": "https://www.cmu.edu;https://www.washington.edu;https://www.uni-mannheim.de;https://www.microsoft.com/en-us/research/group/asia;https://www.alibaba.com", "aff_unique_abbr": "CMU;UW;UM;MSR Asia;Alibaba", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Asia;Pittsburgh", "aff_country_unique_index": "0;0;0;1;2;0;0;2;0", "aff_country_unique": "United States;Germany;China" }, { "title": "Multiple Physics Pretraining for Spatiotemporal Surrogate Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96095", "id": "DKSI3bULiZ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=DKSI3bULiZ", "openreview": "https://openreview.net/forum?id=DKSI3bULiZ", "poster": "/media/PosterPDFs/NeurIPS%202024/96095.png?t=1733442886.1110713", "project": "", "author_site": "Michael McCabe, Bruno R\u00e9galdo-Saint Blancard, Liam Parker, Ruben Ohana, Miles Cranmer, Alberto Bietti, Michael Eickenberg, Siavash Golkar, Geraud Krawezik, Francois Lanusse, Mariel Pettee, Tiberiu Tesileanu, Kyunghyun Cho, Shirley Ho", "tldr": "", "abstract": "We introduce multiple physics pretraining (MPP), an autoregressive task-agnostic pretraining approach for physical surrogate modeling of spatiotemporal systems with transformers. In MPP, rather than training one model on a specific physical system, we train a backbone model to predict the dynamics of multiple heterogeneous physical systems simultaneously in order to learn features that are broadly useful across systems and facilitate transfer. In order to learn effectively in this setting, we introduce a shared embedding and normalization strategy that projects the fields of multiple systems into a shared embedding space. We validate the efficacy of our approach on both pretraining and downstream tasks over a broad fluid mechanics-oriented benchmark. We show that a single MPP-pretrained transformer is able to match or outperform task-specific baselines on all pretraining sub-tasks without the need for finetuning. For downstream tasks, we demonstrate that finetuning MPP-trained models results in more accurate predictions across multiple time-steps on systems with previously unseen physical components or higher dimensional systems compared to training from scratch or finetuning pretrained video foundation models. We open-source our code and model weights trained at multiple scales for reproducibility.", "keywords": "transfer learning;physics;pretraining;finetuning;surrogate models;spatiotemporal", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "/attachment/3bda966ab5ebc7a1ef9f0d7091eec3a790198a1f.zip", "author": "Michael McCabe;Bruno R\u00e9galdo-Saint Blancard;Liam Holden Parker;Ruben Ohana;Miles Cranmer;Alberto Bietti;Michael Eickenberg;Siavash Golkar;Geraud Krawezik;Francois Lanusse;Mariel Pettee;Tiberiu Tesileanu;Kyunghyun Cho;Shirley Ho", "authorids": "~Michael_McCabe2;~Bruno_R\u00e9galdo-Saint_Blancard1;~Liam_Holden_Parker1;~Ruben_Ohana1;~Miles_Cranmer2;~Alberto_Bietti1;~Michael_Eickenberg5;~Siavash_Golkar1;~Geraud_Krawezik1;~Francois_Lanusse2;~Mariel_Pettee1;~Tiberiu_Tesileanu1;~Kyunghyun_Cho1;~Shirley_Ho2", "gender": "M;;M;;;M;M;;M;M;F;M;M;", "homepage": "https://mikemccabe210.github.io/;https://users.flatironinstitute.org/~bregaldosaintblancard/;;https://rubenohana.github.io/;https://astroautomata.com/;http://alberto.bietti.me;http://eickenberg.github.io;;https://www.simonsfoundation.org/people/geraud-krawezik/;https://flanusse.net;https://mariel-pettee.github.io/;http://www.ttesileanu.com;http://kyunghyuncho.me;https://www.shirleyho.space/", "dblp": "56/706;;;251/5608;205/2493;166/6461;117/7268;222/3276;89/1532.html;;;160/0174;41/9736;162/2218", "google_scholar": "SMXfsHYAAAAJ;TfcmfBQAAAAJ;CjYa0N4AAAAJ;https://scholar.google.fr/citations?user=F9qNg2wAAAAJ;10WfwCQAAAAJ;iT7Tp70AAAAJ;GW0werQAAAAJ;UzaZt7MAAAAJ;;fRDCooIAAAAJ;;8--imZAAAAAJ;https://scholar.google.fi/citations?user=0RAmmIAAAAAJ;fhOi--4AAAAJ", "orcid": "0009-0007-8117-6110;0000-0003-0055-0953;;0000-0002-8493-1210;0000-0002-6458-3423;;;;;;;0000-0003-3107-3088;;", "linkedin": "mmccabe-210/;;liam-parker-96ba14160/;rubenohana/;milescranmer/;;;;gkrawezik/;;;ttesileanu/;;", "or_profile": "~Michael_McCabe2;~Bruno_R\u00e9galdo-Saint_Blancard1;~Liam_Holden_Parker1;~Ruben_Ohana1;~Miles_Cranmer2;~Alberto_Bietti1;~Michael_Eickenberg5;~Siavash_Golkar1;~Geraud_Krawezik1;~Francois_Lanusse2;~Mariel_Pettee1;~Tiberiu_Tesileanu1;~Kyunghyun_Cho1;~Shirley_Ho2", "aff": "University of Colorado, Boulder;Flatiron Institute;Flatiron Institute;Flatiron Institute;University of Cambridge;Flatiron Institute;Flatiron Institute;New York University;Flatiron Institute;CNRS;Lawrence Berkeley National Lab;Meta;Genentech;Carnegie Mellon University", "aff_domain": "colorado.edu;flatironinstitute.org;simonsfoundation.org;flatironinstitute.org;cam.ac.uk;flatironinstitute.org;flatironinstitute.org;nyu.edu;flatironinstitute.org;cnrs.fr;lbnl.gov;meta.com;gene.com;cmu.edu", "position": "PhD student;Postdoc;Researcher;Postdoc;Assistant Professor;Researcher;Researcher;Research Scientist;Researcher;Researcher;Postdoc;Researcher;Senior Director of Frontier Research;Associate Professor", "bibtex": "@inproceedings{\nmccabe2024multiple,\ntitle={Multiple Physics Pretraining for Spatiotemporal Surrogate Models},\nauthor={Michael McCabe and Bruno R{\\'e}galdo-Saint Blancard and Liam Holden Parker and Ruben Ohana and Miles Cranmer and Alberto Bietti and Michael Eickenberg and Siavash Golkar and Geraud Krawezik and Francois Lanusse and Mariel Pettee and Tiberiu Tesileanu and Kyunghyun Cho and Shirley Ho},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=DKSI3bULiZ}\n}", "github": "", "reviewers": "1VF5;mqdr;cf8J;1X1Y", "pdf_size": 3145755, "rating": "4;5;7;10", "confidence": "4;4;4;5", "soundness": "3;3;3;4", "novelty": "2;3;3;4", "presentation": "3;3;3;4", "wc_summary": "77;52;99;110", "wc_strengths": "29;49;35;13", "wc_weaknesses": "153;52;44;8", "wc_questions": "69;117;203;121", "wc_limitations": "1;1;4;4", "wc_review": "329;271;385;256", "wc_reply_reviewers": "80;14;12;12", "wc_reply_authors": "51;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.5, 2.29128784747792 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 84.5, 22.20923231451281 ], "wc_strengths_avg": [ 31.5, 12.913171570144957 ], "wc_weaknesses_avg": [ 64.25, 53.85338893700191 ], "wc_questions_avg": [ 127.5, 48.153400710645556 ], "wc_limitations_avg": [ 2.5, 1.5 ], "wc_review_avg": [ 310.25, 51.0459351956647 ], "wc_reply_reviewers_avg": [ 29.5, 29.16761903207048 ], "wc_reply_authors_avg": [ 12.75, 22.083647796503186 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 14, 0 ], "corr_rating_confidence": 0.8819171036881968, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6522335097956918823&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "colorado.edu;flatironinstitute.org;simonsfoundation.org;flatironinstitute.org;cam.ac.uk;flatironinstitute.org;flatironinstitute.org;nyu.edu;flatironinstitute.org;cnrs.fr;lbnl.gov;meta.com;gene.com;cmu.edu", "author_num": 14, "aff_unique_index": "0;1;1;1;2;1;1;3;1;4;5;6;7;8", "aff_unique_norm": "University of Colorado;Flatiron Institute;University of Cambridge;New York University;Centre National de la Recherche Scientifique;Lawrence Berkeley National Laboratory;Meta;Genentech;Carnegie Mellon University", "aff_unique_dep": ";;;;;;Meta Platforms, Inc.;;", "aff_unique_url": "https://www.colorado.edu;https://flatironinstitute.org;https://www.cam.ac.uk;https://www.nyu.edu;https://www.cnrs.fr;https://www.lbl.gov;https://meta.com;https://www.genentech.com;https://www.cmu.edu", "aff_unique_abbr": "CU;Flatiron;Cambridge;NYU;CNRS;LBNL;Meta;Genentech;CMU", "aff_campus_unique_index": "0;2;3", "aff_campus_unique": "Boulder;;Cambridge;Berkeley", "aff_country_unique_index": "0;0;0;0;1;0;0;0;0;2;0;0;0;0", "aff_country_unique": "United States;United Kingdom;France" }, { "title": "Efficient Federated Learning against Heterogeneous and Non-stationary Client Unavailability", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96094", "id": "DLNOBJa7TM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=DLNOBJa7TM", "openreview": "https://openreview.net/forum?id=DLNOBJa7TM", "poster": "/media/PosterPDFs/NeurIPS%202024/96094.png?t=1732311330.6165123", "project": "", "author_site": "Ming Xiang, Stratis Ioannidis, Edmund Yeh, Carlee Joe-Wong, Lili Su", "tldr": "", "abstract": "Addressing intermittent client availability is critical for the real-world deployment of federated learning algorithms. Most prior work either overlooks the potential non-stationarity in the dynamics of client unavailability or requires substantial memory/computation overhead. We study federated learning in the presence of heterogeneous and non-stationary client availability, which may occur when the deployment environments are uncertain, or the clients are mobile. The impacts of heterogeneity and non-stationarity on client unavailability can be significant, as we illustrate using FedAvg, the most widely adopted federated learning algorithm. We propose FedAWE, which includes novel algorithmic structures that (i) compensate for missed computations due to unavailability with only $O(1)$ additional memory and computation with respect to standard FedAvg, and (ii) evenly diffuse local updates within the federated learning system through implicit gossiping, despite being agnostic to non-stationary dynamics. We show that FedAWE converges to a stationary point of even non-convex objectives while achieving the desired linear speedup property. We corroborate our analysis with numerical experiments over diversified client unavailability dynamics on real-world data sets.", "keywords": "distributed learning;non-convex optimization;federated learning;fault-tolerance", "primary_area": "optimization", "supplementary_material": "/attachment/d2f67a6dd46ac0c4157d893fad62f2c6cd046d02.zip", "author": "Ming Xiang;Stratis Ioannidis;Edmund Yeh;Carlee Joe-Wong;Lili Su", "authorids": "~Ming_Xiang2;~Stratis_Ioannidis1;~Edmund_Yeh1;~Carlee_Joe-Wong1;~Lili_Su1", "gender": "M;M;M;F;F", "homepage": "https://www1.coe.neu.edu/~ming;https://ece.northeastern.edu/fac-ece/ioannidis/;https://coe.northeastern.edu/people/yeh-edmund/;https://www.andrew.cmu.edu/user/cjoewong/;https://sites.google.com/site/lilisuece/", "dblp": "84/6225;42/6940;;40/9937.html;", "google_scholar": "gRiVqSkAAAAJ;GPIB5kUAAAAJ;;XEztdZgAAAAJ;wSHziZ4AAAAJ", "orcid": ";0000-0001-8355-4751;;;", "linkedin": "ming-xiang-06b544204/;stratis-ioannidis-87b826110;;;", "or_profile": "~Ming_Xiang2;~Stratis_Ioannidis1;~Edmund_Yeh1;~Carlee_Joe-Wong1;~Lili_Su1", "aff": "Northeastern University;Meta, Inc.;Northeastern University;Carnegie Mellon University;Northeastern University", "aff_domain": "neu.edu;meta.com;northeastern.edu;cmu.edu;northeastern.edu", "position": "PhD student;Researcher;Full Professor;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nxiang2024efficient,\ntitle={Efficient Federated Learning against Heterogeneous and Non-stationary Client Unavailability},\nauthor={Ming Xiang and Stratis Ioannidis and Edmund Yeh and Carlee Joe-Wong and Lili Su},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=DLNOBJa7TM}\n}", "github": "", "reviewers": "i6Qf;ZjRF;DLNT", "pdf_size": 1607596, "rating": "4;6;7", "confidence": "4;3;4", "soundness": "2;3;3", "novelty": "2;3;3", "presentation": "2;2;4", "wc_summary": "78;77;110", "wc_strengths": "35;117;58", "wc_weaknesses": "189;123;100", "wc_questions": "11;100;99", "wc_limitations": "8;11;42", "wc_review": "321;428;409", "wc_reply_reviewers": "235;73;45", "wc_reply_authors": "752;52;27", "reply_reviewers": "2;1;1", "reply_authors": "3;2;2", "rating_avg": [ 5.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.9428090415820634 ], "wc_summary_avg": [ 88.33333333333333, 15.326085243430198 ], "wc_strengths_avg": [ 70.0, 34.53500639447844 ], "wc_weaknesses_avg": [ 137.33333333333334, 37.72119946248911 ], "wc_questions_avg": [ 70.0, 41.72129751897305 ], "wc_limitations_avg": [ 20.333333333333332, 15.369522511198006 ], "wc_review_avg": [ 386.0, 46.611872593435535 ], "wc_reply_reviewers_avg": [ 117.66666666666667, 83.75095356007728 ], "wc_reply_authors_avg": [ 277.0, 336.03075256093257 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.18898223650461363, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15460684861574023142&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "neu.edu;meta.com;northeastern.edu;cmu.edu;northeastern.edu", "author_num": 5, "aff_unique_index": "0;1;0;2;0", "aff_unique_norm": "Northeastern University;Meta;Carnegie Mellon University", "aff_unique_dep": ";Meta Platforms, Inc.;", "aff_unique_url": "https://www.northeastern.edu;https://www.meta.com;https://www.cmu.edu", "aff_unique_abbr": "NEU;Meta;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Pretraining with Random Noise for Fast and Robust Learning without Weight Transport", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96093", "id": "DNGfCVBOnU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=DNGfCVBOnU", "openreview": "https://openreview.net/forum?id=DNGfCVBOnU", "poster": "/media/PosterPDFs/NeurIPS%202024/96093.png?t=1733553141.8688745", "project": "", "author_site": "Jeonghwan Cheon, Sang Wan Lee, Se-Bum Paik", "tldr": "", "abstract": "The brain prepares for learning even before interacting with the environment, by refining and optimizing its structures through spontaneous neural activity that resembles random noise. However, the mechanism of such a process has yet to be understood, and it is unclear whether this process can benefit the algorithm of machine learning. Here, we study this issue using a neural network with a feedback alignment algorithm, demonstrating that pretraining neural networks with random noise increases the learning efficiency as well as generalization abilities without weight transport. First, we found that random noise training modifies forward weights to match backward synaptic feedback, which is necessary for teaching errors by feedback alignment. As a result, a network with pre-aligned weights learns notably faster and reaches higher accuracy than a network without random noise training, even comparable to the backpropagation algorithm. We also found that the effective dimensionality of weights decreases in a network pretrained with random noise. This pre-regularization allows the network to learn simple solutions of a low rank, reducing the generalization error during subsequent training. This also enables the network to robustly generalize a novel, out-of-distribution dataset. Lastly, we confirmed that random noise pretraining reduces the amount of meta-loss, enhancing the network ability to adapt to various tasks. Overall, our results suggest that random noise training with feedback alignment offers a straightforward yet effective method of pretraining that facilitates quick and reliable learning without weight transport.", "keywords": "Random noise training;Network pretraining;Pre-regularization;Feedback alignment;Error backpropagation;Weight transport problem;Biologically-Plausible Algorithm", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "/attachment/569a93fc83be87a797e149634f7ba2b8384e7afc.zip", "author": "Jeonghwan Cheon;Sang Wan Lee;Se-Bum Paik", "authorids": "~Jeonghwan_Cheon1;~Sang_Wan_Lee1;~Se-Bum_Paik1", "gender": "M;M;M", "homepage": "https://cheon.io;https://aibrain.kaist.ac.kr/sang-wan-lee;https://cogi.kaist.ac.kr/", "dblp": "341/8000;77/6650;54/11006", "google_scholar": "JnjF6p0AAAAJ;0rMoHW4AAAAJ;VQK2PP0AAAAJ", "orcid": ";;0000-0002-4078-305X", "linkedin": "https://linkedin.com/in/cheon;;https://kr.linkedin.com/in/se-bum-paik-02621572", "or_profile": "~Jeonghwan_Cheon1;~Sang_Wan_Lee1;~Se-Bum_Paik1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "position": "MS student;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\ncheon2024pretraining,\ntitle={Pretraining with Random Noise for Fast and Robust Learning without Weight Transport},\nauthor={Jeonghwan Cheon and Sang Wan Lee and Se-Bum Paik},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=DNGfCVBOnU}\n}", "github": "", "reviewers": "5zAn;kKCk;o5xh", "pdf_size": 2472764, "rating": "5;6;7", "confidence": "3;4;4", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "3;3;3", "wc_summary": "84;104;51", "wc_strengths": "68;34;41", "wc_weaknesses": "179;58;741", "wc_questions": "225;183;67", "wc_limitations": "44;12;82", "wc_review": "600;391;982", "wc_reply_reviewers": "121;55;0", "wc_reply_authors": "61;29;0", "reply_reviewers": "1;2;0", "reply_authors": "2;2;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 79.66666666666667, 21.853044537445015 ], "wc_strengths_avg": [ 47.666666666666664, 14.65908895153068 ], "wc_weaknesses_avg": [ 326.0, 297.57800097901503 ], "wc_questions_avg": [ 158.33333333333334, 66.81982407107108 ], "wc_limitations_avg": [ 46.0, 28.61235164516658 ], "wc_review_avg": [ 657.6666666666666, 244.6961835056326 ], "wc_reply_reviewers_avg": [ 58.666666666666664, 49.46603773185082 ], "wc_reply_authors_avg": [ 30.0, 24.91318258807306 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10384646161085581238&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "BOLD: Boolean Logic Deep Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96092", "id": "DO9wPZOPjk", "proceeding": "", "pdf": "https://openreview.net/pdf?id=DO9wPZOPjk", "openreview": "https://openreview.net/forum?id=DO9wPZOPjk", "poster": "", "project": "", "author_site": "Van Minh NGUYEN, Cristian Ocampo-Blandon, Aymen Askri, Louis Leconte, Ba-Hien Tran", "tldr": "", "abstract": "Computational intensiveness of deep learning has motivated low-precision arithmetic designs. However, the current quantized/binarized training approaches are limited by: (1) significant performance loss due to arbitrary approximations of the latent weight gradient through its discretization/binarization function, and (2) training computational intensiveness due to the reliance on full-precision latent weights. \nThis paper proposes a novel mathematical principle by introducing the notion of Boolean variation such that neurons made of Boolean weights and/or activations can be trained ---for the first time--- natively in Boolean domain instead of latent-weight gradient descent and real arithmetic. We explore its convergence, conduct extensively experimental benchmarking, and provide consistent complexity evaluation by considering chip architecture, memory hierarchy, dataflow, and arithmetic precision. Our approach achieves baseline full-precision accuracy in ImageNet classification and surpasses state-of-the-art results in semantic segmentation, with notable performance in image super-resolution, and natural language understanding with transformer-based models. Moreover, it significantly reduces energy consumption during both training and inference.", "keywords": "Boolean neural networks;1-bit training;low precision;Boolean logic;Backpropagation", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Van Minh Nguyen;Cristian Ocampo;Aymen Askri;Louis Leconte;Ba-Hien Tran", "authorids": "~Van_Minh_Nguyen1;~Cristian_Ocampo1;~Aymen_Askri1;~Louis_Leconte1;~Ba-Hien_Tran2", "gender": ";M;M;Not Specified;M", "homepage": ";;;;https://tranbahien.github.io/", "dblp": "69/7397;195/0858;;246/3352;279/6617.html", "google_scholar": ";;;;FW26AagAAAAJ", "orcid": "0000-0001-6475-6163;;0000-0002-5911-6724;;", "linkedin": "vanminh/;cfocampob;;;", "or_profile": "~Van_Minh_Nguyen1;~Cristian_Ocampo1;~Aymen_Askri1;~Louis_Leconte1;~Ba-Hien_TRAN1", "aff": "Huawei Technologies Ltd.;Huawei Technologies Ltd.;Huawei Technologies Ltd.;Lisite;Huawei Technologies Ltd.", "aff_domain": "huawei.com;huawei.com;huawei.com;sorbonne-universite.fr;huawei.com", "position": "Expert;Researcher;Researcher;PhD student;Researcher", "bibtex": "@inproceedings{\nnguyen2024boplusld,\ntitle={B\\${\\textbackslash}oplus\\${LD}: Boolean Logic Deep Learning},\nauthor={Van Minh Nguyen and Cristian Ocampo and Aymen Askri and Louis Leconte and Ba-Hien Tran},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=DO9wPZOPjk}\n}", "github": "", "reviewers": "xoY6;VF3V;HRmB;faoj;ojfq", "pdf_size": 25422078, "rating": "5;6;7;7;9", "confidence": "4;3;3;1;3", "soundness": "2;3;3;3;4", "novelty": "4;3;3;3;4", "presentation": "2;2;3;3;4", "wc_summary": "22;58;203;45;204", "wc_strengths": "40;55;69;34;200", "wc_weaknesses": "974;141;28;20;13", "wc_questions": "3;404;61;21;94", "wc_limitations": "17;9;1;1;6", "wc_review": "1056;667;362;121;517", "wc_reply_reviewers": "729;0;26;0;18", "wc_reply_authors": "2396;56;27;56;23", "reply_reviewers": "3;0;1;0;1", "reply_authors": "6;2;2;2;2", "rating_avg": [ 6.8, 1.32664991614216 ], "confidence_avg": [ 2.8, 0.9797958971132712 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 3.4, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 106.4, 80.11641529674178 ], "wc_strengths_avg": [ 79.6, 61.41856396888485 ], "wc_weaknesses_avg": [ 235.2, 372.3747574688703 ], "wc_questions_avg": [ 116.6, 147.13612744666074 ], "wc_limitations_avg": [ 6.8, 5.9464274989274015 ], "wc_review_avg": [ 544.6, 313.0665105053557 ], "wc_reply_reviewers_avg": [ 154.6, 287.3796095759057 ], "wc_reply_authors_avg": [ 511.6, 942.302838794408 ], "reply_reviewers_avg": [ 1.0, 1.0954451150103321 ], "reply_authors_avg": [ 2.8, 1.6000000000000003 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.33850160019316505, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:LF6PzbWwTWYJ:scholar.google.com/&scioq=BOLD:+Boolean+Logic+Deep+Learning&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "huawei.com;huawei.com;huawei.com;sorbonne-universite.fr;huawei.com", "author_num": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Huawei;Lisite", "aff_unique_dep": "Huawei Technologies;", "aff_unique_url": "https://www.huawei.com;", "aff_unique_abbr": "Huawei;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China;" }, { "title": "HORSE: Hierarchical Representation for Large-Scale Neural Subset Selection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96091", "id": "DONsOc7rY1", "proceeding": "", "pdf": "https://openreview.net/pdf?id=DONsOc7rY1", "openreview": "https://openreview.net/forum?id=DONsOc7rY1", "poster": "/media/PosterPDFs/NeurIPS%202024/96091.png?t=1731653435.9123225", "project": "", "author_site": "Binghui Xie, Yixuan Wang, Yongqiang Chen, Kaiwen Zhou, Yu Li, Wei Meng, James Cheng", "tldr": "", "abstract": "Subset selection tasks, such as anomaly detection and compound selection in AI-assisted drug discovery, are crucial for a wide range of applications. Learning subset-valued functions with neural networks has achieved great success by incorporating permutation invariance symmetry into the architecture. However, existing neural set architectures often struggle to either capture comprehensive information from the superset or address complex interactions within the input. Additionally, they often fail to perform in scenarios where superset sizes surpass available memory capacity. To address these challenges, we introduce the novel concept of the Identity Property, which requires models to integrate information from the originating set, resulting in the development of neural networks that excel at performing effective subset selection from large supersets. Moreover, we present the Hierarchical Representation of Neural Subset Selection (HORSE), an attention-based method that learns complex interactions and retains information from both the input set and the optimal subset supervision signal. Specifically, HORSE enables the partitioning of the input ground set into manageable chunks that can be processed independently and then aggregated, ensuring consistent outcomes across different partitions. Through extensive experimentation, we demonstrate that HORSE significantly enhances neural subset selection performance by capturing more complex information and surpasses state-of-the-art methods in handling large-scale inputs by a margin of up to 20%.", "keywords": "Subset Selection;Set Encoding;Large-scale inputs", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Binghui Xie;Yixuan Wang;Yongqiang Chen;Kaiwen Zhou;Yu Li;Wei Meng;James Cheng", "authorids": "~Binghui_Xie1;~Yixuan_Wang12;~Yongqiang_Chen1;~Kaiwen_Zhou2;~Yu_Li1;~Wei_Meng1;~James_Cheng2", "gender": "M;F;;M;M;M;M", "homepage": "https://xiebinghui.github.io/;;https://lfhase.win;https://jnhujnhu.github.io/;https://sites.google.com/view/liyu1995;https://www.cse.cuhk.edu.hk/~wei/;https://www.cse.cuhk.edu.hk/~jcheng/", "dblp": "286/4313;;76/5774-2;215/4936;;https://dblp.org/pers/hd/m/Meng_0001:Wei;06/4171", "google_scholar": ";TWt5aUsAAAAJ;huQ_Ig8AAAAJ;nHmlZ5QAAAAJ;8YHZx-AAAAAJ;CBLnYLEAAAAJ;", "orcid": "0000-0001-6533-9281;0000-0002-8989-3129;;;0000-0002-3664-6722;0000-0001-8260-3304;", "linkedin": ";;;;yuli1995/;;", "or_profile": "~Binghui_Xie1;~Yixuan_Wang12;~Yongqiang_Chen1;~Kaiwen_Zhou2;~Yu_Li1;~Wei_Meng1;~James_Cheng2", "aff": "Department of Computer Science and Engineering, The Chinese University of Hong Kong;Department of Computer Science and Engineering, The Chinese University of Hong Kong;Department of Computer Science and Engineering, The Chinese University of Hong Kong;Huawei Noah's Ark Lab;Department of Computer Science and Engineering, The Chinese University of Hong Kong;The Chinese University of Hong Kong;The Chinese University of Hong Kong", "aff_domain": "cse.cuhk.edu.hk;cse.cuhk.edu.hk;cse.cuhk.edu.hk;huawei.com;cse.cuhk.edu.hk;cse.cuhk.edu.hk;cuhk.edu.hk", "position": "PhD student;PhD student;PhD student;Researcher;Assistant Professor;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nxie2024horse,\ntitle={{HORSE}: Hierarchical Representation for Large-Scale Neural Subset Selection},\nauthor={Binghui Xie and Yixuan Wang and Yongqiang Chen and Kaiwen Zhou and Yu Li and Wei Meng and James Cheng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=DONsOc7rY1}\n}", "github": "", "reviewers": "6Cmw;aVVy;tohe;Unf9", "pdf_size": 1030018, "rating": "6;6;6;7", "confidence": "3;4;4;4", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "4;1;3;3", "wc_summary": "77;70;50;58", "wc_strengths": "75;45;125;31", "wc_weaknesses": "161;481;164;72", "wc_questions": "56;67;126;34", "wc_limitations": "9;9;63;1", "wc_review": "378;672;528;196", "wc_reply_reviewers": "0;0;0;15", "wc_reply_authors": "0;75;430;17", "reply_reviewers": "0;0;0;1", "reply_authors": "1;2;2;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 63.75, 10.449282272003183 ], "wc_strengths_avg": [ 69.0, 36.02776706930364 ], "wc_weaknesses_avg": [ 219.5, 155.4356780150555 ], "wc_questions_avg": [ 70.75, 34.03949911499874 ], "wc_limitations_avg": [ 20.5, 24.753787588973125 ], "wc_review_avg": [ 443.5, 176.7052630795133 ], "wc_reply_reviewers_avg": [ 3.75, 6.49519052838329 ], "wc_reply_authors_avg": [ 130.5, 175.13780288675542 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:lmhAHdghKVYJ:scholar.google.com/&scioq=HORSE:+Hierarchical+Representation+for+Large-Scale+Neural+Subset+Selection&hl=en&as_sdt=0,47", "gs_version_total": 0, "email": "cse.cuhk.edu.hk;cse.cuhk.edu.hk;cse.cuhk.edu.hk;huawei.com;cse.cuhk.edu.hk;cse.cuhk.edu.hk;cuhk.edu.hk", "author_num": 7, "aff_unique_index": "0;0;0;1;0;0;0", "aff_unique_norm": "Chinese University of Hong Kong;Huawei", "aff_unique_dep": "Department of Computer Science and Engineering;Noah's Ark Lab", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.huawei.com", "aff_unique_abbr": "CUHK;Huawei", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "GVKF: Gaussian Voxel Kernel Functions for Highly Efficient Surface Reconstruction in Open Scenes", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96090", "id": "DQD0DNRjxk", "proceeding": "", "pdf": "https://openreview.net/pdf?id=DQD0DNRjxk", "openreview": "https://openreview.net/forum?id=DQD0DNRjxk", "poster": "/media/PosterPDFs/NeurIPS%202024/96090.png?t=1731432461.2480407", "project": "", "author_site": "Gaochao Song, Chong Cheng, Hao Wang", "tldr": "", "abstract": "In this paper we present a novel method for efficient and effective 3D surface reconstruction in open scenes. Existing Neural Radiance Fields (NeRF) based works typically require extensive training and rendering time due to the adopted implicit representations. \nIn contrast, 3D Gaussian splatting (3DGS) uses an explicit and discrete representation, hence the reconstructed surface is built by the huge number of Gaussian primitives, which leads to excessive memory consumption and rough surface details in sparse Gaussian areas.\nTo address these issues, we propose Gaussian Voxel Kernel Functions (GVKF), which establish a continuous scene representation based on discrete 3DGS through kernel regression. The GVKF integrates fast 3DGS rasterization and highly effective scene implicit representations, achieving high-fidelity open scene surface reconstruction. Experiments on challenging scene datasets demonstrate the efficiency and effectiveness of our proposed GVKF, featuring with high reconstruction quality, real-time rendering speed, significant savings in storage and training memory consumption.", "keywords": "3dgs;Mesh;Sdf;Nerf;Surface Reconstruction", "primary_area": "machine_vision", "supplementary_material": "", "author": "Gaochao Song;Chong Cheng;Hao Wang", "authorids": "~Gaochao_Song1;~Chong_Cheng2;~Hao_Wang20", "gender": "M;M;M", "homepage": ";https://github.com/ChengChong001;https://wanghao.tech/", "dblp": "341/5763;;181/2812-94.html", "google_scholar": "https://scholar.google.com.hk/citations?user=5LbyDkcAAAAJ;;856zi9EAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Gaochao_Song1;~Chong_Cheng2;~Hao_Wang20", "aff": "Tianjin University;The Hong Kong University of Science and Technology (Guangzhou));The Hong Kong University of Science and Technology (Guangzhou)", "aff_domain": "tju.edu.cn;connect.hkust-gz.edu.cn;hkust-gz.edu.cn", "position": "MS student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nsong2024gvkf,\ntitle={{GVKF}: Gaussian Voxel Kernel Functions for Highly Efficient Surface Reconstruction in Open Scenes},\nauthor={Gaochao Song and Chong Cheng and Hao Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=DQD0DNRjxk}\n}", "github": "", "reviewers": "zurJ;6M18;sv7m;epfR", "pdf_size": 23949227, "rating": "4;5;6;6", "confidence": "4;2;3;5", "soundness": "2;3;3;4", "novelty": "2;3;3;3", "presentation": "1;2;3;4", "wc_summary": "56;64;40;387", "wc_strengths": "19;53;31;100", "wc_weaknesses": "245;201;64;436", "wc_questions": "28;105;159;50", "wc_limitations": "34;8;43;31", "wc_review": "382;431;337;1004", "wc_reply_reviewers": "213;646;16;84", "wc_reply_authors": "715;948;0;0", "reply_reviewers": "1;3;1;1", "reply_authors": "2;3;1;1", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 136.75, 144.74006874393837 ], "wc_strengths_avg": [ 50.75, 30.93844695520446 ], "wc_weaknesses_avg": [ 236.5, 133.12494131454105 ], "wc_questions_avg": [ 85.5, 50.865017448144066 ], "wc_limitations_avg": [ 29.0, 12.90348790056394 ], "wc_review_avg": [ 538.5, 270.8048190117746 ], "wc_reply_reviewers_avg": [ 239.75, 244.98813746791905 ], "wc_reply_authors_avg": [ 415.75, 423.8327352859852 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.1348399724926484, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13701752665249427051&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "tju.edu.cn;connect.hkust-gz.edu.cn;hkust-gz.edu.cn", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Tianjin University;Hong Kong University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "http://www.tju.edu.cn;https://www.ust.hk", "aff_unique_abbr": "TJU;HKUST", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Hong Kong SAR;Guangzhou", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Recursive Introspection: Teaching Language Model Agents How to Self-Improve", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96089", "id": "DRC9pZwBwR", "proceeding": "", "pdf": "https://openreview.net/pdf?id=DRC9pZwBwR", "openreview": "https://openreview.net/forum?id=DRC9pZwBwR", "poster": "/media/PosterPDFs/NeurIPS%202024/96089.png?t=1733524756.3571784", "project": "", "author_site": "Yuxiao Qu, Tianjun Zhang, Naman Garg, Aviral Kumar", "tldr": "", "abstract": "A central piece in enabling intelligent agentic behavior in foundation models is to make them capable of introspecting upon their behavior, reasoning, and correcting their mistakes as more computation or interaction is available. Even the strongest proprietary large language models (LLMs) do not quite exhibit the ability of continually improving their responses sequentially. In this paper, we develop $\\textbf{RISE:}$ $\\textbf{R}$ecursive $\\textbf{I}$ntro$\\textbf{S}$p$\\textbf{E}$ction, an approach for fine-tuning LLMs to introduce this capability, despite prior work hypothesizing that this capability may not be possible to attain. Our approach prescribes an iterative fine-tuning procedure, which attempts to teach the model how to alter its response after having executed previously unsuccessful attempts to solve a hard test-time problem, with optionally additional environment feedback. RISE poses fine-tuning for a single-turn prompt as solving a multi-turn Markov decision process (MDP), where the initial state is the prompt. Inspired by principles in online imitation and offline reinforcement learning, we propose strategies for multi-turn data collection and training so as to imbue an LLM with the capability to recursively detect and correct its previous mistakes in subsequent iterations. Our experiments show that RISE enables Llama2, Llama3, and Mistral models to improve themselves with more turns on reasoning tasks, outperforming several single-turn strategies given an equal amount of inference-time computation. We also find that RISE scales well, often attaining larger benefits with more capable models, without disrupting one-turn abilities as a result of expressing more complex distributions.", "keywords": "Large Language Model;Reinforcement Learning;Self-Improvement", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Yuxiao Qu;Tianjun Zhang;Naman Garg;Aviral Kumar", "authorids": "~Yuxiao_Qu1;~Tianjun_Zhang1;~Naman_Garg1;~Aviral_Kumar2", "gender": "M;;M;M", "homepage": "https://cohenqu.github.io/;https://tianjunz.github.io;https://naman-garg.com;https://aviralkumar2907.github.io/", "dblp": ";;;202/7961", "google_scholar": "X5SFuyYAAAAJ;UE9jz_MAAAAJ;;", "orcid": ";;;", "linkedin": ";;namangarg20/;", "or_profile": "~Yuxiao_Qu1;~Tianjun_Zhang1;~Naman_Garg1;~Aviral_Kumar2", "aff": "Carnegie Mellon University;University of California, Berkeley;Multion, Inc;Google DeepMind", "aff_domain": "cmu.edu;berkeley.edu;multion.ai;google.com", "position": "MS student;PhD student;Researcher;Researcher", "bibtex": "@inproceedings{\nqu2024recursive,\ntitle={Recursive Introspection: Teaching Language Model Agents How to Self-Improve},\nauthor={Yuxiao Qu and Tianjun Zhang and Naman Garg and Aviral Kumar},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=DRC9pZwBwR}\n}", "github": "", "reviewers": "yTdP;BtCg;spvE", "pdf_size": 4872779, "rating": "5;7;7", "confidence": "4;4;3", "soundness": "3;3;3", "novelty": "3;4;3", "presentation": "3;4;3", "wc_summary": "146;40;97", "wc_strengths": "102;78;59", "wc_weaknesses": "92;199;56", "wc_questions": "67;3;253", "wc_limitations": "44;8;20", "wc_review": "451;328;485", "wc_reply_reviewers": "21;7;20", "wc_reply_authors": "161;16;24", "reply_reviewers": "1;1;1", "reply_authors": "3;2;2", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 94.33333333333333, 43.31538089665404 ], "wc_strengths_avg": [ 79.66666666666667, 17.594190960528863 ], "wc_weaknesses_avg": [ 115.66666666666667, 60.73073539997867 ], "wc_questions_avg": [ 107.66666666666667, 106.03563342365412 ], "wc_limitations_avg": [ 24.0, 14.966629547095765 ], "wc_review_avg": [ 421.3333333333333, 67.44050876307371 ], "wc_reply_reviewers_avg": [ 16.0, 6.377042156569663 ], "wc_reply_authors_avg": [ 67.0, 66.54822812567339 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.49999999999999983, "gs_citation": 39, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10607011117495016630&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "cmu.edu;berkeley.edu;multion.ai;google.com", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Carnegie Mellon University;University of California, Berkeley;Multion, Inc;Google", "aff_unique_dep": ";;;Google DeepMind", "aff_unique_url": "https://www.cmu.edu;https://www.berkeley.edu;;https://deepmind.com", "aff_unique_abbr": "CMU;UC Berkeley;;DeepMind", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "United States;United Kingdom" }, { "id": "DSVGACQ3sO", "title": "Demystifying amortized causal discovery with transformers", "track": "main", "status": "Reject", "tldr": "", "abstract": "Supervised learning approaches for causal discovery from observational data often achieve competitive performance despite seemingly avoiding explicit assumptions that traditional methods make for identifiability. In this work, we investigate CSIvA \\citep{ke2023learning}, a transformer-based model promising to train on synthetic data and transfer to real data. First, we bridge the gap with existing identifiability theory and show that constraints on the training data distribution implicitly define a prior on the test observations. Consistent with classical approaches, good performance is achieved when we have a good prior on the test data, and the underlying model is identifiable. At the same time, we find new trade-offs. Training on datasets generated from different classes of causal models, unambiguously identifiable in isolation, improves the test generalization. Performance is still guaranteed, as the ambiguous cases resulting from the mixture of identifiable causal models are unlikely to occur (which we formally prove). \nOverall, our study finds that amortized causal discovery still needs to obey identifiability theory, but it also differs from classical methods in how the assumptions are formulated, trading more reliance on assumptions on the noise type for fewer hypotheses on the mechanisms.", "keywords": "causal discovery;amortized inference;transformers;identifiability", "primary_area": "causal_inference", "supplementary_material": "/attachment/6db12b22732acb2657e5bbf9125a756b6f5c26e8.zip", "author": "Francesco Montagna;Max Cairney-Leeming;Dhanya Sridhar;Francesco Locatello", "authorids": "~Francesco_Montagna2;~Max_Cairney-Leeming1;~Dhanya_Sridhar2;~Francesco_Locatello1", "gender": ";;;M", "homepage": "https://www.francescomontagna.com/;;;https://twitter.com/FrancescoLocat8", "dblp": ";;;195/6074", "google_scholar": "StwghVgAAAAJ;;;", "orcid": ";;;", "linkedin": "francesco-montagna/;;;", "or_profile": "~Francesco_Montagna2;~Max_Cairney-Leeming1;~Dhanya_Sridhar2;~Francesco_Locatello1", "aff": "University of Genoa;;;Institute of Science and Technology", "aff_domain": "unige.it;;;ist.ac.at", "position": "PhD student;;;Assistant Professor", "bibtex": "@misc{\nanonymous2024demystifying,\ntitle={Demystifying amortized causal discovery with transformers},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=DSVGACQ3sO}\n}", "github": "", "project": "", "reviewers": "7Mr6;romJ;8T6V;P3PB", "site": "https://openreview.net/forum?id=DSVGACQ3sO", "pdf_size": 615216, "rating": "3;4;4;6", "confidence": "5;3;4;2", "soundness": "2;2;2;3", "novelty": "1;2;3;3", "presentation": "2;2;3;3", "wc_summary": "100;46;106;137", "wc_strengths": "35;44;74;60", "wc_weaknesses": "468;78;308;31", "wc_questions": "27;141;7;101", "wc_limitations": "142;1;1;1", "wc_review": "772;310;496;330", "wc_reply_reviewers": "13;0;0;130", "wc_reply_authors": "0;0;0;278", "reply_reviewers": "1;0;0;1", "reply_authors": "1;1;1;2", "rating_avg": [ 4.25, 1.0897247358851685 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 97.25, 32.751908341347075 ], "wc_strengths_avg": [ 53.25, 14.956186011146023 ], "wc_weaknesses_avg": [ 221.25, 176.86629837252772 ], "wc_questions_avg": [ 69.0, 54.35071296680477 ], "wc_limitations_avg": [ 36.25, 61.054790966802926 ], "wc_review_avg": [ 477.0, 184.98918887329606 ], "wc_reply_reviewers_avg": [ 35.75, 54.67346248409735 ], "wc_reply_authors_avg": [ 69.5, 120.37753112603697 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.9233805168766388, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9131954774794491888&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1", "aff_unique_norm": "University of Genoa;Institute of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.unige.it;", "aff_unique_abbr": "UniGe;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0", "aff_country_unique": "Italy;" }, { "title": "Tensor-Based Synchronization and the Low-Rankness of the Block Trifocal Tensor", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96088", "id": "DT7n4F2bbP", "proceeding": "", "pdf": "https://openreview.net/pdf?id=DT7n4F2bbP", "openreview": "https://openreview.net/forum?id=DT7n4F2bbP", "poster": "/media/PosterPDFs/NeurIPS%202024/96088.png?t=1733798046.3517995", "project": "", "author_site": "Daniel Miao, Gilad Lerman, Joe Kileel", "tldr": "", "abstract": "The block tensor of trifocal tensors provides crucial geometric information on the three-view geometry of a scene. The underlying synchronization problem seeks to recover camera poses (locations and orientations up to a global transformation) from the block trifocal tensor. We establish an explicit Tucker factorization of this tensor, revealing a low multilinear rank of $(6,4,4)$ independent of the number of cameras under appropriate scaling conditions. We prove that this rank constraint provides sufficient information for camera recovery in the noiseless case. The constraint motivates a synchronization algorithm based on the higher-order singular value decomposition of the block trifocal tensor. Experimental comparisons with state-of-the-art global synchronization methods on real datasets demonstrate the potential of this algorithm for significantly improving location estimation accuracy. Overall this work suggests that higher-order interactions in synchronization problems can be exploited to improve performance, beyond the usual pairwise-based approaches.", "keywords": "synchronization;tensor decomposition;structure from motion;multilinear rank;multiview geometry;trifocal tensor;higher-order scene information", "primary_area": "machine_vision", "supplementary_material": "/attachment/a3c4d3573ad52e885ff1feb3d65a0dd13ee8994c.zip", "author": "Daniel Miao;Gilad Lerman;Joe Kileel", "authorids": "~Daniel_Miao1;~Gilad_Lerman1;~Joe_Kileel1", "gender": "M;;M", "homepage": ";;https://web.ma.utexas.edu/users/jkileel/", "dblp": ";;168/8795", "google_scholar": ";;brZD504AAAAJ", "orcid": ";;", "linkedin": "daniel-miao-471977191/;;", "or_profile": "~Daniel_Miao1;~Gilad_Lerman1;~Joe_Kileel1", "aff": "University of Minnesota - Twin Cities;;University of Texas, Austin", "aff_domain": "umn.edu;;utexas.edu", "position": "PhD student;;Assistant Professor", "bibtex": "@inproceedings{\nmiao2024tensorbased,\ntitle={Tensor-Based Synchronization and the Low-Rankness of the Block Trifocal Tensor},\nauthor={Daniel Miao and Gilad Lerman and Joe Kileel},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=DT7n4F2bbP}\n}", "github": "", "reviewers": "dwhb;8uUu;fgc4;pYrN", "pdf_size": 588943, "rating": "4;6;7;7", "confidence": "4;4;4;3", "soundness": "3;3;4;4", "novelty": "2;2;3;4", "presentation": "2;2;3;3", "wc_summary": "70;94;54;101", "wc_strengths": "99;81;12;35", "wc_weaknesses": "172;470;421;138", "wc_questions": "77;147;28;20", "wc_limitations": "27;86;1;10", "wc_review": "445;878;516;304", "wc_reply_reviewers": "0;0;0;12", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 79.75, 18.793283374652763 ], "wc_strengths_avg": [ 56.75, 34.816483165305485 ], "wc_weaknesses_avg": [ 300.25, 146.77257066632035 ], "wc_questions_avg": [ 68.0, 50.5618433208284 ], "wc_limitations_avg": [ 31.0, 33.09833832687073 ], "wc_review_avg": [ 535.75, 211.81876097267684 ], "wc_reply_reviewers_avg": [ 3.0, 5.196152422706632 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.4714045207910316, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:9HMdK9IvBQEJ:scholar.google.com/&scioq=Tensor-Based+Synchronization+and+the+Low-Rankness+of+the+Block+Trifocal+Tensor&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "umn.edu;;utexas.edu", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "University of Minnesota;University of Texas at Austin", "aff_unique_dep": ";", "aff_unique_url": "https://www.minnesota.edu;https://www.utexas.edu", "aff_unique_abbr": "UMN;UT Austin", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Twin Cities;Austin", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Federated Natural Policy Gradient and Actor Critic Methods for Multi-task Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96087", "id": "DUFD6vsyF8", "proceeding": "", "pdf": "https://openreview.net/pdf?id=DUFD6vsyF8", "openreview": "https://openreview.net/forum?id=DUFD6vsyF8", "poster": "/media/PosterPDFs/NeurIPS%202024/96087.png?t=1731694360.0710921", "project": "", "author_site": "Tong Yang, Shicong Cen, Yuting Wei, Yuxin Chen, Yuejie Chi", "tldr": "", "abstract": "Federated reinforcement learning (RL) enables collaborative decision making of multiple distributed agents without sharing local data trajectories. In this work, we consider a multi-task setting, in which each agent has its own private reward function corresponding to different tasks, while sharing the same transition kernel of the environment. Focusing on infinite-horizon Markov decision processes, the goal is to learn a globally optimal policy that maximizes the sum of the discounted total rewards of all the agents in a decentralized manner, where each agent only communicates with its neighbors over some prescribed graph topology.\n\nWe develop federated vanilla and entropy-regularized natural policy gradient (NPG) methods in the tabular setting under softmax parameterization, where gradient tracking is applied to estimate the global Q-function to mitigate the impact of imperfect information sharing. We establish non-asymptotic global convergence guarantees under exact policy evaluation, where the rates are nearly independent of the size of the state-action space and illuminate the impacts of network size and connectivity. To the best of our knowledge, this is the first time that global convergence is established for federated multi-task RL using policy optimization. We further go beyond the tabular setting by proposing a federated natural actor critic (NAC) method for multi-task RL with function approximation, and establish its finite-time sample complexity taking the errors of function approximation into account.", "keywords": "federated RL;multi-task RL;natural policy gradient methods;entropy regularization;global convergence;sample complexity", "primary_area": "optimization", "supplementary_material": "", "author": "Tong Yang;Shicong Cen;Yuting Wei;Yuxin Chen;Yuejie Chi", "authorids": "~Tong_Yang4;~Shicong_Cen1;~Yuting_Wei1;~Yuxin_Chen5;~Yuejie_Chi1", "gender": "F;;F;M;", "homepage": "https://pptmiao.github.io;https://www.andrew.cmu.edu/user/shicongc/;https://yutingwei.github.io/;https://yuxinchen2020.github.io/;", "dblp": ";241/9590;184/3856;11/5123-2;", "google_scholar": ";QIRWZf8AAAAJ;fsbXdAYAAAAJ;RtNVud4AAAAJ;", "orcid": ";;;0000-0001-9256-5815;", "linkedin": ";;;;", "or_profile": "~Tong_Yang4;~Shicong_Cen1;~Yuting_Wei1;~Yuxin_Chen5;~Yuejie_Chi1", "aff": "Carnegie Mellon University;Carnegie Mellon University;The Wharton School, University of Pennsylvania;University of Pennsylvania;", "aff_domain": "cmu.edu;andrew.cmu.edu;wharton.upenn.edu;upenn.edu;", "position": "PhD student;PhD student;Assistant Professor;Associate Professor;", "bibtex": "@inproceedings{\nyang2024federated,\ntitle={Federated Natural Policy Gradient and Actor Critic Methods for Multi-task Reinforcement Learning},\nauthor={Tong Yang and Shicong Cen and Yuting Wei and Yuxin Chen and Yuejie Chi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=DUFD6vsyF8}\n}", "github": "", "reviewers": "LgxC;tByy;bXwg;vHsi", "pdf_size": 1206542, "rating": "6;6;6;7", "confidence": "3;4;3;4", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "3;3;2;3", "wc_summary": "55;56;93;55", "wc_strengths": "29;52;79;20", "wc_weaknesses": "66;339;37;217", "wc_questions": "5;1;75;4", "wc_limitations": "6;1;17;1", "wc_review": "161;449;301;297", "wc_reply_reviewers": "20;18;47;55", "wc_reply_authors": "69;69;94;139", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;3;3", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 64.75, 16.315253599009733 ], "wc_strengths_avg": [ 45.0, 22.83637449333847 ], "wc_weaknesses_avg": [ 164.75, 121.6190260608923 ], "wc_questions_avg": [ 21.25, 31.06746690671771 ], "wc_limitations_avg": [ 6.25, 6.53356717268599 ], "wc_review_avg": [ 302.0, 101.87737727287643 ], "wc_reply_reviewers_avg": [ 35.0, 16.263455967290593 ], "wc_reply_authors_avg": [ 92.75, 28.586491565073178 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1215901747661997791&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "cmu.edu;andrew.cmu.edu;wharton.upenn.edu;upenn.edu;", "author_num": 5, "aff_unique_index": "0;0;1;1", "aff_unique_norm": "Carnegie Mellon University;University of Pennsylvania", "aff_unique_dep": ";The Wharton School", "aff_unique_url": "https://www.cmu.edu;https://www.wharton.upenn.edu", "aff_unique_abbr": "CMU;UPenn Wharton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Language Grounded Multi-agent Reinforcement Learning with Human-interpretable Communication", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96086", "id": "DUHX779C5q", "proceeding": "", "pdf": "https://openreview.net/pdf?id=DUHX779C5q", "openreview": "https://openreview.net/forum?id=DUHX779C5q", "poster": "/media/PosterPDFs/NeurIPS%202024/96086.png?t=1729732863.1467924", "project": "", "author_site": "Huao Li, Hossein Nourkhiz Mahjoub, Behdad Chalaki, Vaishnav Tadiparthi, Kwonjoon Lee, Ehsan Moradi Pari, Charles Lewis, Katia Sycara", "tldr": "", "abstract": "Multi-Agent Reinforcement Learning (MARL) methods have shown promise in enabling agents to learn a shared communication protocol from scratch and accomplish challenging team tasks. However, the learned language is usually not interpretable to humans or other agents not co-trained together, limiting its applicability in ad-hoc teamwork scenarios. In this work, we propose a novel computational pipeline that aligns the communication space between MARL agents with an embedding space of human natural language by grounding agent communications on synthetic data generated by embodied Large Language Models (LLMs) in interactive teamwork scenarios. Our results demonstrate that introducing language grounding not only maintains task performance but also accelerates the emergence of communication. Furthermore, the learned communication protocols exhibit zero-shot generalization capabilities in ad-hoc teamwork scenarios with unseen teammates and novel task states. This work presents a significant step toward enabling effective communication and collaboration between artificial agents and humans in real-world teamwork settings.", "keywords": "Multi-Agent Reinforcement Learning;Emergent Communication;Ad-hoc Teamwork;Large Language Models", "primary_area": "human-AI_interaction", "supplementary_material": "/attachment/649adc931719a6db5c7fa89e2221b511fd2cb1da.zip", "author": "Huao Li;Hossein Nourkhiz Mahjoub;Behdad Chalaki;Vaishnav Tadiparthi;Kwonjoon Lee;Ehsan Moradi Pari;Charles Michael Lewis;Katia P. Sycara", "authorids": "~Huao_Li1;~Hossein_Nourkhiz_Mahjoub1;~Behdad_Chalaki1;~Vaishnav_Tadiparthi1;~Kwonjoon_Lee1;~Ehsan_Moradi_Pari1;~Charles_Michael_Lewis1;~Katia_P._Sycara1", "gender": "M;;M;M;M;M;M;F", "homepage": "https://www.huao-li.com;;;;https://kjunelee.github.io;;http://www.pitt.edu/~cmlewis;", "dblp": "30/783;;;;127/7948;;;s/KatiaPSycara", "google_scholar": "7YPYztQAAAAJ;;i3jzLtYAAAAJ;JI5z86QAAAAJ;C6Wu8M0AAAAJ;2HQUXxsAAAAJ;BBS25qkAAAAJ;VWv6a9kAAAAJ", "orcid": "0000-0002-0027-615X;;0000-0002-3055-1693;;0000-0002-1433-551X;0000-0002-4822-3196;0000-0002-1013-9482;", "linkedin": ";;behdadchalaki/;;;;;", "or_profile": "~Huao_Li1;~Hossein_Nourkhiz_Mahjoub1;~Behdad_Chalaki1;~Vaishnav_Tadiparthi1;~Kwonjoon_Lee1;~Ehsan_Moradi_Pari1;~Charles_Michael_Lewis1;~Katia_P._Sycara1", "aff": "Honda Research Institution US;;Honda Research Institution US;Honda Research Institution US;Honda Research Institute USA;Honda Research Institution US;University of Pittsburgh;Carnegie Mellon University", "aff_domain": "honda-ri.com;;honda-ri.com;honda-ri.com;honda-ri.com;honda-ri.com;pitt.edu;cmu.edu", "position": "Intern;;Researcher;Researcher;Sr Research Scientist;Principal Researcher;Full Professor;Full Professor", "bibtex": "@inproceedings{\nli2024language,\ntitle={Language Grounded Multi-agent Reinforcement Learning with Human-interpretable Communication},\nauthor={Huao Li and Hossein Nourkhiz Mahjoub and Behdad Chalaki and Vaishnav Tadiparthi and Kwonjoon Lee and Ehsan Moradi Pari and Charles Michael Lewis and Katia P. Sycara},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=DUHX779C5q}\n}", "github": "", "reviewers": "YGBb;hS12;oeK1;NCiL", "pdf_size": 1661672, "rating": "5;6;7;7", "confidence": "4;4;4;3", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "3;2;3;3", "wc_summary": "60;47;49;103", "wc_strengths": "50;108;46;59", "wc_weaknesses": "273;103;248;89", "wc_questions": "79;2;8;44", "wc_limitations": "28;2;32;45", "wc_review": "490;262;383;340", "wc_reply_reviewers": "869;28;34;0", "wc_reply_authors": "1670;37;24;0", "reply_reviewers": "3;1;1;0", "reply_authors": "5;2;2;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 64.75, 22.63155982251334 ], "wc_strengths_avg": [ 65.75, 24.843258642939738 ], "wc_weaknesses_avg": [ 178.25, 82.87151199296414 ], "wc_questions_avg": [ 33.25, 30.914195768287424 ], "wc_limitations_avg": [ 26.75, 15.610493265749165 ], "wc_review_avg": [ 368.75, 82.35100181539991 ], "wc_reply_reviewers_avg": [ 232.75, 367.5631748420943 ], "wc_reply_authors_avg": [ 432.75, 714.4499195185062 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.5, 1.5 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11682458285346024838&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "honda-ri.com;;honda-ri.com;honda-ri.com;honda-ri.com;honda-ri.com;pitt.edu;cmu.edu", "author_num": 8, "aff_unique_index": "0;0;0;0;0;1;2", "aff_unique_norm": "Honda Research Institute;University of Pittsburgh;Carnegie Mellon University", "aff_unique_dep": "Honda Research Institute;;", "aff_unique_url": "https://honda-ri.com;https://www.pitt.edu;https://www.cmu.edu", "aff_unique_abbr": "HRI;Pitt;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Are Language Models Actually Useful for Time Series Forecasting?", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96085", "id": "DV15UbHCY1", "proceeding": "", "pdf": "https://openreview.net/pdf?id=DV15UbHCY1", "openreview": "https://openreview.net/forum?id=DV15UbHCY1", "poster": "", "project": "", "author_site": "Mingtian Tan, Mike Merrill, Vinayak Gupta, Tim Althoff, Tom Hartvigsen", "tldr": "", "abstract": "Large language models (LLMs) are being applied to time series forecasting. But are language models actually useful for time series? In a series of ablation studies on three recent and popular LLM-based time series forecasting methods, we find that removing the LLM component or replacing it with a basic attention layer does not degrade forecasting performance---in most cases, the results even improve! We also find that despite their significant computational cost, pretrained LLMs do no better than models trained from scratch, do not represent the sequential dependencies in time series, and do not assist in few-shot settings. Additionally, we explore time series encoders and find that patching and attention structures perform similarly to LLM-based forecasters. All resources needed to reproduce our work are available: https://github.com/BennyTMT/LLMsForTimeSeries.", "keywords": "Time Series;Language Models;Time Series Forecasting", "primary_area": "evaluation", "supplementary_material": "/attachment/74b543aa2d4a69414d2ac141127165f1a363108a.zip", "author": "Mingtian Tan;Mike A Merrill;Vinayak Gupta;Tim Althoff;Thomas Hartvigsen", "authorids": "~Mingtian_Tan1;~Mike_A_Merrill1;~Vinayak_Gupta1;~Tim_Althoff2;~Thomas_Hartvigsen1", "gender": "M;;M;M;M", "homepage": "https://www.researchgate.net/profile/Mingtian-Tan;http://mikemerrill.io;https://gvinayak.github.io/;https://althoff.cs.uw.edu/;https://www.tomhartvigsen.com", "dblp": ";274/1060.html;;119/1352;211/5752", "google_scholar": ";;tQuRm1AAAAAJ;yc4nBNgAAAAJ;rIjeeRsAAAAJ", "orcid": ";;;0000-0003-4793-2289;", "linkedin": ";;guptavinayak51/;timalthoff/;", "or_profile": "~Mingtian_Tan1;~Mike_A_Merrill1;~Vinayak_Gupta1;~Tim_Althoff2;~Thomas_Hartvigsen1", "aff": "University of Virginia, Charlottesville;University of Washington;Department of Computer Science, University of Washington;Department of Computer Science, University of Washington;University of Virginia, Charlottesville", "aff_domain": "virginia.edu;cs.washington.edu;cs.washington.edu;cs.washington.edu;virginia.edu", "position": "PhD student;PhD student;Postdoc;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\ntan2024are,\ntitle={Are Language Models Actually Useful for Time Series Forecasting?},\nauthor={Mingtian Tan and Mike A Merrill and Vinayak Gupta and Tim Althoff and Thomas Hartvigsen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=DV15UbHCY1}\n}", "github": "", "reviewers": "UHrk;CG3z;wccd;jLi6", "pdf_size": 7403244, "rating": "6;7;8;8", "confidence": "4;4;4;4", "soundness": "2;3;4;4", "novelty": "2;3;4;4", "presentation": "3;3;4;4", "wc_summary": "47;81;54;66", "wc_strengths": "29;87;99;89", "wc_weaknesses": "67;85;75;48", "wc_questions": "49;31;2;2", "wc_limitations": "1;50;7;10", "wc_review": "193;334;237;215", "wc_reply_reviewers": "16;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 7.25, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 62.0, 12.90348790056394 ], "wc_strengths_avg": [ 76.0, 27.51363298439521 ], "wc_weaknesses_avg": [ 68.75, 13.571569548139964 ], "wc_questions_avg": [ 21.0, 20.03746490951388 ], "wc_limitations_avg": [ 17.0, 19.32614809008769 ], "wc_review_avg": [ 244.75, 53.825528329966254 ], "wc_reply_reviewers_avg": [ 4.0, 6.928203230275509 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 73, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7838392632653285278&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "virginia.edu;cs.washington.edu;cs.washington.edu;cs.washington.edu;virginia.edu", "author_num": 5, "aff_unique_index": "0;1;1;1;0", "aff_unique_norm": "University of Virginia;University of Washington", "aff_unique_dep": ";", "aff_unique_url": "https://www.virginia.edu;https://www.washington.edu", "aff_unique_abbr": "UVA;UW", "aff_campus_unique_index": "0;2;2;0", "aff_campus_unique": "Charlottesville;;Seattle", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Deep Policy Gradient Methods Without Batch Updates, Target Networks, or Replay Buffers", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96084", "id": "DX5GUwMFFb", "proceeding": "", "pdf": "https://openreview.net/pdf?id=DX5GUwMFFb", "openreview": "https://openreview.net/forum?id=DX5GUwMFFb", "poster": "", "project": "", "author_site": "Gautham Vasan, Mohamed Elsayed, Seyed Alireza Azimi, Jiamin He, Fahim Shahriar, Colin Bellinger, Martha White, Rupam Mahmood", "tldr": "", "abstract": "Modern deep policy gradient methods achieve effective performance on simulated robotic tasks, but they all require large replay buffers or expensive batch updates, or both, making them incompatible for real systems with resource-limited computers. We show that these methods fail catastrophically when limited to small replay buffers or during *incremental learning*, where updates only use the most recent sample without batch updates or a replay buffer. We propose a novel incremental deep policy gradient method --- *Action Value Gradient (AVG)* and a set of normalization and scaling techniques to address the challenges of instability in incremental learning. On robotic simulation benchmarks, we show that AVG is the only incremental method that learns effectively, often achieving final performance comparable to batch policy gradient methods. This advancement enabled us to show for the first time effective deep reinforcement learning with real robots using only incremental updates, employing a robotic manipulator and a mobile robot.", "keywords": "Reinforcement Learning;Robotics;Deep Learning;Incremental Learning;Real-time Learning", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/00cd51923ef47d96808a3eba1877db0edcb96685.zip", "author": "Gautham Vasan;Mohamed Elsayed;Seyed Alireza Azimi;Jiamin He;Fahim Shahriar;Colin Bellinger;Martha White;A. Rupam Mahmood", "authorids": "~Gautham_Vasan1;~Mohamed_Elsayed2;~Seyed_Alireza_Azimi1;~Jiamin_He1;~Fahim_Shahriar1;~Colin_Bellinger1;~Martha_White1;~A._Rupam_Mahmood1", "gender": "M;M;M;M;M;M;F;", "homepage": "https://gauthamvasan.github.io/;https://mohmdelsayed.github.io;;;https://fahimfss.github.io/;;http://marthawhite.ca;", "dblp": "156/0016;224/8735-3;;307/2922.html;;42/9477;60/7057;120/6935", "google_scholar": "https://scholar.google.ca/citations?user=4qMs0zMAAAAJ;https://scholar.google.ca/citations?user=gShveMAAAAAJ;;;;;t5zdD_IAAAAJ;https://scholar.google.ca/citations?user=YwB8XM4AAAAJ", "orcid": ";;;;;;0000-0002-5356-2950;", "linkedin": ";mohamedelsayed95/;seyedalirezaazimi/;jiamin-he-0314a1180/;;;;", "or_profile": "~Gautham_Vasan1;~Mohamed_Elsayed2;~Seyed_Alireza_Azimi1;~Jiamin_He1;~Fahim_Shahriar1;~Colin_Bellinger1;~Martha_White1;~Rupam_Mahmood1", "aff": "University of Alberta;University of Alberta;University of Alberta;University of Alberta;, University of Alberta;National Research Council Canada;University of Alberta;University of Alberta", "aff_domain": "ualberta.ca;ualberta.ca;ualberta.ca;ualberta.ca;cs.ualberta.ca;nrc-cnrc.gc.ca;ualberta.ca;ualberta.ca", "position": "PhD student;PhD student;MS student;PhD student;MS student;Researcher;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nvasan2024deep,\ntitle={Deep Policy Gradient Methods Without Batch Updates, Target Networks, or Replay Buffers},\nauthor={Gautham Vasan and Mohamed Elsayed and Seyed Alireza Azimi and Jiamin He and Fahim Shahriar and Colin Bellinger and Martha White and A. Rupam Mahmood},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=DX5GUwMFFb}\n}", "github": "", "reviewers": "2Tun;8wnP;bZo8;gtgH", "pdf_size": 4777284, "rating": "6;6;6;7", "confidence": "3;3;4;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "2;3;4;4", "wc_summary": "199;85;35;136", "wc_strengths": "61;49;48;183", "wc_weaknesses": "364;22;59;182", "wc_questions": "88;50;118;74", "wc_limitations": "8;13;42;37", "wc_review": "720;219;302;612", "wc_reply_reviewers": "302;31;172;62", "wc_reply_authors": "440;53;174;56", "reply_reviewers": "2;1;1;1", "reply_authors": "3;3;3;3", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 113.75, 60.808613699047605 ], "wc_strengths_avg": [ 85.25, 56.66734068226601 ], "wc_weaknesses_avg": [ 156.75, 133.51287391109517 ], "wc_questions_avg": [ 82.5, 24.591665254715874 ], "wc_limitations_avg": [ 25.0, 14.713938969562161 ], "wc_review_avg": [ 463.25, 208.39070876601 ], "wc_reply_reviewers_avg": [ 141.75, 106.32585527518695 ], "wc_reply_authors_avg": [ 180.75, 157.43153273725056 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.0, 0.0 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4458843217728604191&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "ualberta.ca;ualberta.ca;ualberta.ca;ualberta.ca;cs.ualberta.ca;nrc-cnrc.gc.ca;ualberta.ca;ualberta.ca", "author_num": 8, "aff_unique_index": "0;0;0;0;0;1;0;0", "aff_unique_norm": "University of Alberta;National Research Council Canada", "aff_unique_dep": ";", "aff_unique_url": "https://www.ualberta.ca;https://www.nrc-cnrc.gc.ca", "aff_unique_abbr": "UAlberta;NRC-CNRC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "Canada" }, { "title": "Identifying Spatio-Temporal Drivers of Extreme Events", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96083", "id": "DdKdr4kqxh", "proceeding": "", "pdf": "https://openreview.net/pdf?id=DdKdr4kqxh", "openreview": "https://openreview.net/forum?id=DdKdr4kqxh", "poster": "/media/PosterPDFs/NeurIPS%202024/96083.png?t=1732058673.9529023", "project": "", "author_site": "Mohamad Hakam Shams Eddin, J\u00fcrgen Gall", "tldr": "", "abstract": "The spatio-temporal relations of impacts of extreme events and their drivers in climate data are not fully understood and there is a need of machine learning approaches to identify such spatio-temporal relations from data. The task, however, is very challenging since there are time delays between extremes and their drivers, and the spatial response of such drivers is inhomogeneous. In this work, we propose a first approach and benchmarks to tackle this challenge. Our approach is trained end-to-end to predict spatio-temporally extremes and spatio-temporally drivers in the physical input variables jointly. By enforcing the network to predict extremes from spatio-temporal binary masks of identified drivers, the network successfully identifies drivers that are correlated with extremes. We evaluate our approach on three newly created synthetic benchmarks, where two of them are based on remote sensing or reanalysis climate data, and on two real-world reanalysis datasets. The source code and datasets are publicly available at the project page https://hakamshams.github.io/IDE.", "keywords": "anomaly detection;weakly supervised learning;Earth science;climate science;remote sensing;deep learning", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "", "author": "Mohamad Hakam Shams Eddin;Juergen Gall", "authorids": "~Mohamad_Hakam_Shams_Eddin1;~Juergen_Gall1", "gender": "M;", "homepage": "https://hakamshams.github.io/;https://pages.iai.uni-bonn.de/gall_juergen/", "dblp": "336/3504;13/6920", "google_scholar": "T6vkqEsAAAAJ;1CLaPMEAAAAJ", "orcid": "0000-0003-3136-4619;0000-0002-9447-3399", "linkedin": "mohamad-hakam-shams-eddin-03bb29195/;", "or_profile": "~Mohamad_Hakam_Shams_Eddin1;~Juergen_Gall1", "aff": "Rheinische Friedrich-Wilhelms Universit\u00e4t Bonn;University of Bonn", "aff_domain": "uni-bonn.de;uni-bonn.de", "position": "PhD student;Professor", "bibtex": "@inproceedings{\neddin2024identifying,\ntitle={Identifying Spatio-Temporal Drivers of Extreme Events},\nauthor={Mohamad Hakam Shams Eddin and Juergen Gall},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=DdKdr4kqxh}\n}", "github": "", "reviewers": "fRrQ;wHsZ;TRJw;5qbR", "pdf_size": 33076472, "rating": "4;6;6;6", "confidence": "3;4;4;3", "soundness": "2;3;3;2", "novelty": "2;3;3;2", "presentation": "2;1;2;3", "wc_summary": "48;106;150;56", "wc_strengths": "34;176;81;14", "wc_weaknesses": "94;241;915;46", "wc_questions": "76;359;16;23", "wc_limitations": "6;9;37;11", "wc_review": "258;891;1199;150", "wc_reply_reviewers": "0;144;230;0", "wc_reply_authors": "28;468;367;0", "reply_reviewers": "0;3;2;0", "reply_authors": "2;4;3;1", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 90.0, 41.15823125451335 ], "wc_strengths_avg": [ 76.25, 62.51549807847651 ], "wc_weaknesses_avg": [ 324.0, 348.6954258375065 ], "wc_questions_avg": [ 118.5, 140.77730640980457 ], "wc_limitations_avg": [ 15.75, 12.397076268217438 ], "wc_review_avg": [ 624.5, 436.0461558138083 ], "wc_reply_reviewers_avg": [ 93.5, 98.31963181379393 ], "wc_reply_authors_avg": [ 215.75, 205.1248095672486 ], "reply_reviewers_avg": [ 1.25, 1.299038105676658 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:UIC-tsGwaxEJ:scholar.google.com/&scioq=Identifying+Spatio-Temporal+Drivers+of+Extreme+Events&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": "uni-bonn.de;uni-bonn.de", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Rheinische Friedrich-Wilhelms Universit\u00e4t Bonn;University of Bonn", "aff_unique_dep": ";", "aff_unique_url": "https://www.uni-bonn.de/;https://www.uni-bonn.de/", "aff_unique_abbr": "Uni Bonn;UBonn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "title": "Policy Learning from Tutorial Books via Understanding, Rehearsing and Introspecting", "status": "Oral", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96082", "id": "Ddak3nSqQM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Ddak3nSqQM", "openreview": "https://openreview.net/forum?id=Ddak3nSqQM", "poster": "", "project": "", "author_site": "Xiong-Hui Chen, Ziyan Wang, Yali Du, Shengyi Jiang, Meng Fang, Yang Yu, Jun Wang", "tldr": "", "abstract": "When humans need to learn a new skill, we can acquire knowledge through written books, including textbooks, tutorials, etc. However, current research for decision-making, like reinforcement learning (RL), has primarily required numerous real interactions with the target environment to learn a skill, while failing to utilize the existing knowledge already summarized in the text. The success of Large Language Models (LLMs) sheds light on utilizing such knowledge behind the books. In this paper, we discuss a new policy learning problem called Policy Learning from tutorial Books (PLfB) upon the shoulders of LLMs\u2019 systems, which aims to leverage rich resources such as tutorial books to derive a policy network. Inspired by how humans learn from books, we solve the problem via a three-stage framework: Understanding, Rehearsing, and Introspecting (URI). In particular, it first rehearses decision-making trajectories based on the derived knowledge after understanding the books, then introspects in the imaginary dataset to distill a policy network. \n We build two benchmarks for PLfB~based on Tic-Tac-Toe and Football games. In experiment, URI's policy achieves at least 44% net win rate against GPT-based agents without any real data; In Football game, which is a complex scenario, URI's policy beat the built-in AIs with a 37% while using GPT-based agent can only achieve a 6\\% winning rate. The project page: https://plfb-football.github.io.", "keywords": "Reinforcement Learning;Large Language Model;Agent;Retrieval Augmented Generation", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Xiong-Hui Chen;Ziyan Wang;Yali Du;Shengyi Jiang;Meng Fang;Yang Yu;Jun Wang", "authorids": "~Xiong-Hui_Chen1;~Ziyan_Wang3;~Yali_Du1;~Shengyi_Jiang2;~Meng_Fang1;~Yang_Yu5;~Jun_Wang2", "gender": "M;M;;M;M;;M", "homepage": "http://www.lamda.nju.edu.cn/chenxh/;https://ziyan-wang98.github.io/;;http://www.lamda.nju.edu.cn/jiangsy;;;http://www0.cs.ucl.ac.uk/staff/jun.wang/", "dblp": "241/7938;;;67/3929;67/463;;w/JunWang12", "google_scholar": "H5pguCYAAAAJ;1Yu8JFIAAAAJ;;;IcNYP1oAAAAJ;;https://scholar.google.co.uk/citations?user=wIE1tY4AAAAJ", "orcid": ";;;0000-0002-4443-0753;;;", "linkedin": ";;;;;;", "or_profile": "~Xiong-Hui_Chen1;~Ziyan_Wang3;~Yali_Du1;~Shengyi_Jiang2;~Meng_Fang1;~Yang_Yu5;~Jun_Wang2", "aff": "Nanjing University;King's College London;;The University of Hong Kong;Eindhoven University of Technology;;University College London", "aff_domain": "nju.edu.cn;kcl.ac.uk;;hku.hk;tue.nl;;ucl.ac.uk", "position": "PhD student;PhD student;;PhD student;Assistant Professor;;Professor", "bibtex": "@inproceedings{\nchen2024policy,\ntitle={Policy Learning from Tutorial Books via Understanding, Rehearsing and Introspecting},\nauthor={Xiong-Hui Chen and Ziyan Wang and Yali Du and Shengyi Jiang and Meng Fang and Yang Yu and Jun Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Ddak3nSqQM}\n}", "github": "", "reviewers": "HEXR;b6EF;mSmU;uFxN", "pdf_size": 5258770, "rating": "6;7;7;8", "confidence": "4;4;4;3", "soundness": "3;3;3;3", "novelty": "2;3;3;4", "presentation": "2;2;3;3", "wc_summary": "278;140;170;116", "wc_strengths": "147;44;106;57", "wc_weaknesses": "117;128;80;90", "wc_questions": "108;48;121;27", "wc_limitations": "1;12;5;1", "wc_review": "651;372;482;291", "wc_reply_reviewers": "15;39;17;17", "wc_reply_authors": "36;48;32;51", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 176.0, 61.91930232165088 ], "wc_strengths_avg": [ 88.5, 40.929817981515626 ], "wc_weaknesses_avg": [ 103.75, 19.472737352514155 ], "wc_questions_avg": [ 76.0, 39.477841886303764 ], "wc_limitations_avg": [ 4.75, 4.493050188902857 ], "wc_review_avg": [ 449.0, 134.8944031455716 ], "wc_reply_reviewers_avg": [ 22.0, 9.848857801796104 ], "wc_reply_authors_avg": [ 41.75, 7.949056547792323 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10528066843190586053&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "nju.edu.cn;kcl.ac.uk;;hku.hk;tue.nl;;ucl.ac.uk", "author_num": 7, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "Nanjing University;King's College London;University of Hong Kong;Eindhoven University of Technology;University College London", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.nju.edu.cn;https://www.kcl.ac.uk;https://www.hku.hk;https://www.tue.nl;https://www.ucl.ac.uk", "aff_unique_abbr": "Nanjing U;KCL;HKU;TU/e;UCL", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;0;2;1", "aff_country_unique": "China;United Kingdom;Netherlands" }, { "title": "HiCoM: Hierarchical Coherent Motion for Dynamic Streamable Scenes with 3D Gaussian Splatting", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96081", "id": "De4VWE4rbz", "proceeding": "", "pdf": "https://openreview.net/pdf?id=De4VWE4rbz", "openreview": "https://openreview.net/forum?id=De4VWE4rbz", "poster": "/media/PosterPDFs/NeurIPS%202024/96081.png?t=1732697969.9978526", "project": "", "author_site": "Qiankun Gao, Jiarui Meng, Chengxiang Wen, Jie Chen, Jian Zhang", "tldr": "", "abstract": "The online reconstruction of dynamic scenes from multi-view streaming videos faces significant challenges in training, rendering and storage efficiency. Harnessing superior learning speed and real-time rendering capabilities, 3D Gaussian Splatting (3DGS) has recently demonstrated considerable potential in this field. However, 3DGS can be inefficient in terms of storage and prone to overfitting by excessively growing Gaussians, particularly with limited views. This paper proposes an efficient framework, dubbed HiCoM, with three key components. First, we construct a compact and robust initial 3DGS representation using a perturbation smoothing strategy. Next, we introduce a Hierarchical Coherent Motion mechanism that leverages the inherent non-uniform distribution and local consistency of 3D Gaussians to swiftly and accurately learn motions across frames. Finally, we continually refine the 3DGS with additional Gaussians, which are later merged into the initial 3DGS to maintain consistency with the evolving scene. To preserve a compact representation, an equivalent number of low-opacity Gaussians that minimally impact the representation are removed before processing subsequent frames. Extensive experiments conducted on two widely used datasets show that our framework improves learning efficiency of the state-of-the-art methods by about 20% and reduces the data storage by 85%, achieving competitive free-viewpoint video synthesis quality but with higher robustness and stability. Moreover, by parallel learning multiple frames simultaneously, our HiCoM decreases the average training wall time to <2 seconds per frame with negligible performance degradation, substantially boosting real-world applicability and responsiveness.", "keywords": "3D Gaussian Splatting;Streamable Dynamic Scene;Online 4D Resconstruction", "primary_area": "online_learning", "supplementary_material": "", "author": "Qiankun Gao;Jiarui Meng;Chengxiang Wen;Jie Chen;Jian Zhang", "authorids": "~Qiankun_Gao1;~Jiarui_Meng1;~Chengxiang_Wen1;~Jie_Chen15;~Jian_Zhang22", "gender": ";M;M;M;M", "homepage": ";https://github.com/JrMeng0312;https://villa.jianzhang.tech/people/chengxiang-wen-%E6%B8%A9%E7%A8%8B%E7%BF%94/;https://aimia-pku.github.io/;http://jianzhang.tech/", "dblp": ";374/6336;;92/6289-1;07/314-18", "google_scholar": ";;;https://scholar.google.fi/citations?user=ZAZFfwwAAAAJ;7brFI_4AAAAJ", "orcid": ";;;;0000-0001-5486-3125", "linkedin": ";;;;", "or_profile": "~Qiankun_Gao1;~Jiarui_Meng1;~Chengxiang_Wen1;~Jie_Chen15;~Jian_Zhang22", "aff": ";Peking University;Peking University;Peking University;Peking University", "aff_domain": ";pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn", "position": ";MS student;Undergrad student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\ngao2024hicom,\ntitle={HiCoM: Hierarchical Coherent Motion for Dynamic Streamable Scenes with 3D Gaussian Splatting},\nauthor={Qiankun Gao and Jiarui Meng and Chengxiang Wen and Jie Chen and Jian Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=De4VWE4rbz}\n}", "github": "", "reviewers": "oHau;QKxo;q4mz", "pdf_size": 12927613, "rating": "4;5;6", "confidence": "4;4;5", "soundness": "3;3;3", "novelty": "2;2;3", "presentation": "3;3;3", "wc_summary": "104;38;73", "wc_strengths": "41;35;51", "wc_weaknesses": "104;114;103", "wc_questions": "53;31;105", "wc_limitations": "7;58;7", "wc_review": "309;276;339", "wc_reply_reviewers": "74;270;449", "wc_reply_authors": "164;561;1412", "reply_reviewers": "1;2;5", "reply_authors": "2;4;6", "rating_avg": [ 5.0, 0.816496580927726 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 71.66666666666667, 26.960877005188255 ], "wc_strengths_avg": [ 42.333333333333336, 6.599663291074444 ], "wc_weaknesses_avg": [ 107.0, 4.96655480858378 ], "wc_questions_avg": [ 63.0, 31.026870075253587 ], "wc_limitations_avg": [ 24.0, 24.041630560342615 ], "wc_review_avg": [ 308.0, 25.729360660537214 ], "wc_reply_reviewers_avg": [ 264.3333333333333, 153.145537171527 ], "wc_reply_authors_avg": [ 712.3333333333334, 520.6101121654178 ], "reply_reviewers_avg": [ 2.6666666666666665, 1.699673171197595 ], "reply_authors_avg": [ 4.0, 1.632993161855452 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14734703081618096272&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": ";pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Animal-Bench: Benchmarking Multimodal Video Models for Animal-centric Video Understanding", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96080", "id": "DexM7d1H6e", "proceeding": "", "pdf": "https://openreview.net/pdf?id=DexM7d1H6e", "openreview": "https://openreview.net/forum?id=DexM7d1H6e", "poster": "/media/PosterPDFs/NeurIPS%202024/96080.png?t=1731563246.6432207", "project": "", "author_site": "Yinuo Jing, Ruxu Zhang, Kongming Liang, Yongxiang Li, Zhongjiang He, Zhanyu Ma, Jun Guo", "tldr": "", "abstract": "With the emergence of large pre-trained multimodal video models, multiple benchmarks have been proposed to evaluate model capabilities. However, most of the benchmarks are human-centric, with evaluation data and tasks centered around human applications. Animals are an integral part of the natural world, and animal-centric video understanding is crucial for animal welfare and conservation efforts. Yet, existing benchmarks overlook evaluations focused on animals, limiting the application of the models. To address this limitation, our work established an animal-centric benchmark, namely Animal-Bench, to allow for a comprehensive evaluation of model capabilities in real-world contexts, overcoming agent-bias in previous benchmarks. Animal-Bench includes 13 tasks encompassing both common tasks shared with humans and special tasks relevant to animal conservation, spanning 7 major animal categories and 819 species, comprising a total of 41,839 data entries. To generate this benchmark, we defined a task system centered on animals and proposed an automated pipeline for animal-centric data processing. To further validate the robustness of models against real-world challenges, we utilized a video editing approach to simulate realistic scenarios like weather changes and shooting parameters due to animal movements. We evaluated 8 current multimodal video models on our benchmark and found considerable room for improvement. We hope our work provides insights for the community and opens up new avenues for research in multimodal video models. Our data and code will be released at https://github.com/PRIS-CV/Animal-Bench.", "keywords": "Multimodal video model;Evaluation benchmark;Robustness testing", "primary_area": "evaluation", "supplementary_material": "", "author": "Yinuo Jing;Ruxu Zhang;Kongming Liang;Yongxiang Li;Zhongjiang He;Zhanyu Ma;Jun Guo", "authorids": "~Yinuo_Jing1;~Ruxu_Zhang2;~Kongming_Liang2;~Yongxiang_Li2;~Zhongjiang_He1;~Zhanyu_Ma1;~Jun_Guo1", "gender": "F;F;M;;M;M;M", "homepage": ";https://mail.bupt.edu.cn;;;;https://zhanyuma.cn/;", "dblp": "303/9214;;161/1948;;348/6925;;204/6231", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;dmlkJR4AAAAJ;;;5GAAs7IAAAAJ;bZOvn6QAAAAJ", "orcid": ";;;;;0000-0003-2950-2488;", "linkedin": ";;;;%E5%BF%A0%E6%B1%9F-%E4%BD%95-7bb92391/;;", "or_profile": "~Yinuo_Jing1;~Ruxu_Zhang2;~Kongming_Liang2;~Yongxiang_Li2;~Zhongjiang_He1;~Zhanyu_Ma1;~Jun_Guo1", "aff": "Beijing University of Posts and Telecommunications;;Beijing University of Posts and Telecommunications;;ChinaTelecom;Beijing University of Post and Telecommunication;Beijing University of Posts and Telecommunications", "aff_domain": "bupt.edu.cn;;bupt.edu.cn;;chinatelecom.cn;bupt.edu.cn;bupt.edu.cn", "position": "PhD student;;Associate Professor;;Researcher;Full Professor;Full Professor", "bibtex": "@inproceedings{\njing2024animalbench,\ntitle={Animal-Bench: Benchmarking Multimodal Video Models for Animal-centric Video Understanding},\nauthor={Yinuo Jing and Ruxu Zhang and Kongming Liang and Yongxiang Li and Zhongjiang He and Zhanyu Ma and Jun Guo},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=DexM7d1H6e}\n}", "github": "", "reviewers": "yXxA;xc4z;Mynx;xfS8", "pdf_size": 14365685, "rating": "5;6;7;7", "confidence": "3;3;4;4", "soundness": "2;2;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "94;86;68;112", "wc_strengths": "59;72;75;69", "wc_weaknesses": "62;207;258;59", "wc_questions": "98;2;13;309", "wc_limitations": "12;40;13;132", "wc_review": "325;407;427;681", "wc_reply_reviewers": "22;42;47;122", "wc_reply_authors": "0;0;21;30", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;2;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 90.0, 15.811388300841896 ], "wc_strengths_avg": [ 68.75, 6.015604707757983 ], "wc_weaknesses_avg": [ 146.5, 87.8763335602937 ], "wc_questions_avg": [ 105.5, 123.22438881974624 ], "wc_limitations_avg": [ 49.25, 49.078381187647174 ], "wc_review_avg": [ 460.0, 133.19534526401438 ], "wc_reply_reviewers_avg": [ 58.25, 37.97614382740828 ], "wc_reply_authors_avg": [ 12.75, 13.141061600951424 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.9045340337332909, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10193937425744617726&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "bupt.edu.cn;;bupt.edu.cn;;chinatelecom.cn;bupt.edu.cn;bupt.edu.cn", "author_num": 7, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Beijing University of Posts and Telecommunications;China Telecom", "aff_unique_dep": ";", "aff_unique_url": "http://www.bupt.edu.cn/;https://www.chinatelecom.com.cn", "aff_unique_abbr": "BUPT;China Telecom", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Beijing;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "cPAPERS: A Dataset of Situated and Multimodal Interactive Conversations in Scientific Papers", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97802", "id": "DfhcOelEnP", "proceeding": "", "pdf": "https://openreview.net/pdf?id=DfhcOelEnP", "openreview": "https://openreview.net/forum?id=DfhcOelEnP", "poster": "/media/PosterPDFs/NeurIPS%202024/97802.png?t=1733578866.4010928", "project": "", "author_site": "Anirudh Sundar, Jin Xu, William Gay, Christopher Richardson, Larry Heck", "tldr": "", "abstract": "An emerging area of research in situated and multimodal interactive conversations (SIMMC) includes interactions in scientific papers. Since scientific papers are primarily composed of text, equations, figures, and tables, SIMMC methods must be developed specifically for each component to support the depth of inquiry and interactions required by research scientists. This work introduces $Conversational Papers$ (cPAPERS), a dataset of conversational question-answer pairs from reviews of academic papers grounded in these paper components and their associated references from scientific documents available on arXiv. We present a data collection strategy to collect these question-answer pairs from OpenReview and associate them with contextual information from $LaTeX$ source files. Additionally, we present a series of baseline approaches utilizing Large Language Models (LLMs) in both zero-shot and fine-tuned configurations to address the cPAPERS dataset.", "keywords": "Conversational Papers", "primary_area": "", "supplementary_material": "/attachment/d0c2a1df370e16d53e4576b7a6da41c02f5c6b21.pdf", "author": "Anirudh Sundar;Jin Xu;William Gay;Christopher Gordon Richardson;Larry Heck", "authorids": "~Anirudh_Sundar1;~Jin_Xu9;~William_Gay1;~Christopher_Gordon_Richardson1;~Larry_Heck2", "gender": ";M;M;;M", "homepage": ";https://williamgay.me;;https://larryheck.github.io/;https://anirudhssundar.github.io/", "dblp": ";;;81/5380.html;335/2670", "google_scholar": "tNg5EqkAAAAJ;;https://scholar.google.com/citations?hl=en;33ZWJmEAAAAJ;zaosyNUAAAAJ", "orcid": ";;;;0000-0002-0177-6842", "linkedin": "jinxugt/;;christopher-richardson-b9690293/;larryheck/;anirudhssundar/", "or_profile": "~Jin_Xu9;~William_Gay1;~Christopher_Gordon_Richardson1;~Larry_Heck2;~Anirudh_S_Sundar1", "aff": "Georgia Institute of Technology;;Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology", "aff_domain": "gatech.edu;;gatech.edu;gatech.edu;gatech.edu", "position": "Researcher;;PhD student;Full Professor;PhD student", "bibtex": "@inproceedings{\nsundar2024cpapers,\ntitle={c{PAPERS}: A Dataset of Situated and Multimodal Interactive Conversations in Scientific Papers},\nauthor={Anirudh Sundar and Jin Xu and William Gay and Christopher Gordon Richardson and Larry Heck},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=DfhcOelEnP}\n}", "github": "", "reviewers": "yM2s;TWp6;xcYG", "pdf_size": 5107195, "rating": "6;7;8", "confidence": "4;3;3", "wc_summary_and_contributions": "36;75;28", "wc_strengths": "44;80;91", "wc_improvement": "413;80;54", "wc_limitations": "9;194;165", "wc_correctness": "2;57;16", "wc_clarity": "5;22;24", "wc_relation_to_prior_work": "6;33;1", "wc_documentation": "48;35;1", "wc_additional_feedback": "1;1;1", "wc_review": "564;577;381", "wc_reply_reviewers": "318;0;83", "wc_reply_authors": "36;0;0", "reply_reviewers": "1;0;1", "reply_authors": "4;1;1", "rating_avg": [ 7.0, 0.816496580927726 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 46.333333333333336, 20.531818125912658 ], "wc_strengths_avg": [ 71.66666666666667, 20.07209228976613 ], "wc_improvement_avg": [ 182.33333333333334, 163.45097803996836 ], "wc_limitations_avg": [ 122.66666666666667, 81.24175171808041 ], "wc_correctness_avg": [ 25.0, 23.338094752285727 ], "wc_clarity_avg": [ 17.0, 8.524474568362947 ], "wc_relation_to_prior_work_avg": [ 13.333333333333334, 14.055445761538675 ], "wc_documentation_avg": [ 28.0, 19.8158185969358 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 507.3333333333333, 89.48867091549758 ], "wc_reply_reviewers_avg": [ 133.66666666666666, 134.67574226844104 ], "wc_reply_authors_avg": [ 12.0, 16.97056274847714 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 1.4142135623730951 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Cn73sL_K0KwJ:scholar.google.com/&scioq=cPAPERS:+A+Dataset+of+Situated+and+Multimodal+Interactive+Conversations+in+Scientific+Papers&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "gatech.edu;;gatech.edu;gatech.edu;gatech.edu", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Georgia Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.gatech.edu", "aff_unique_abbr": "Georgia Tech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Contextual Decision-Making with Knapsacks Beyond the Worst Case", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96079", "id": "Dgt6sh2ruQ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Dgt6sh2ruQ", "openreview": "https://openreview.net/forum?id=Dgt6sh2ruQ", "poster": "/media/PosterPDFs/NeurIPS%202024/96079.png?t=1730366294.9079967", "project": "", "author_site": "Zhaohua Chen, Rui Ai, Mingwei Yang, Yuqi Pan, Chang Wang, Xiaotie Deng", "tldr": "", "abstract": "We study the framework of a dynamic decision-making scenario with resource constraints.\nIn this framework, an agent, whose target is to maximize the total reward under the initial inventory, selects an action in each round upon observing a random request, leading to a reward and resource consumptions that are further associated with an unknown random external factor.\nWhile previous research has already established an $\\widetilde{O}(\\sqrt{T})$ worst-case regret for this problem, this work offers two results that go beyond the worst-case perspective: one for the worst-case gap between benchmarks and another for logarithmic regret rates.\nWe first show that an $\\Omega(\\sqrt{T})$ distance between the commonly used fluid benchmark and the online optimum is unavoidable when the former has a degenerate optimal solution.\nOn the algorithmic side, we merge the re-solving heuristic with distribution estimation skills and propose an algorithm that achieves an $\\widetilde{O}(1)$ regret as long as the fluid LP has a unique and non-degenerate solution.\nFurthermore, we prove that our algorithm maintains a near-optimal $\\widetilde{O}(\\sqrt{T})$ regret even in the worst cases and extend these results to the setting where the request and external factor are continuous.\nRegarding information structure, our regret results are obtained under two feedback models, respectively, where the algorithm accesses the external factor at the end of each round and at the end of a round only when a non-null action is executed.", "keywords": "Contextual Decision-Making with Knapsacks;Re-Solving", "primary_area": "learning_theory", "supplementary_material": "/attachment/b6cb58c60fa4539410cff6f2ef68a517d5936675.zip", "author": "Zhaohua Chen;Rui Ai;Mingwei Yang;Yuqi Pan;Chang Wang;Xiaotie Deng", "authorids": "~Zhaohua_Chen1;~Rui_Ai1;~Mingwei_Yang1;~Yuqi_Pan1;~Chang_Wang4;~Xiaotie_Deng1", "gender": "M;M;M;F;;M", "homepage": "https://daleczh.github.io/;https://air-8.github.io/;https://mingwei-yang.netlify.app/;;;https://cfcs.pku.edu.cn/english/people/faculty/xiaotiedeng/index.htm", "dblp": "121/7325-1;184/2621-2;193/9236-2;52/4131.html;;d/XiaotieDeng", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;IoU4kj8AAAAJ;nVfIRLMAAAAJ;Kg3H9PsAAAAJ;;https://scholar.google.com.tw/citations?user=OBUwP_oAAAAJ", "orcid": "0000-0002-8895-5236;;;;;0000-0002-5282-6467", "linkedin": ";;;;;", "or_profile": "~Zhaohua_Chen1;~Rui_Ai1;~Mingwei_Yang1;~Yuqi_Pan1;~Chang_Wang4;~Xiaotie_Deng1", "aff": "Peking University;Massachusetts Institute of Technology;Stanford University;Peking University;;Peking University", "aff_domain": "pku.edu.cn;mit.edu;stanford.edu;pku.edu.cn;;pku.edu.cn", "position": "PhD student;PhD student;PhD student;Undergrad student;;Full Professor", "bibtex": "@inproceedings{\nchen2024contextual,\ntitle={Contextual Decision-Making with Knapsacks Beyond the Worst Case},\nauthor={Zhaohua Chen and Rui Ai and Mingwei Yang and Yuqi Pan and Chang Wang and Xiaotie Deng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Dgt6sh2ruQ}\n}", "github": "", "reviewers": "ZSDZ;TgME;m7z3;c8Hc", "pdf_size": 1458882, "rating": "5;5;6;6", "confidence": "3;2;3;3", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;2;3;3", "wc_summary": "69;60;60;77", "wc_strengths": "102;71;31;28", "wc_weaknesses": "209;46;103;81", "wc_questions": "13;205;18;142", "wc_limitations": "1;11;1;1", "wc_review": "394;393;213;329", "wc_reply_reviewers": "46;16;9;11", "wc_reply_authors": "27;3;3;15", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;2;3", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 66.5, 7.088723439378913 ], "wc_strengths_avg": [ 58.0, 30.553232234904378 ], "wc_weaknesses_avg": [ 109.75, 60.80039062374517 ], "wc_questions_avg": [ 94.5, 82.09902557278009 ], "wc_limitations_avg": [ 3.5, 4.330127018922194 ], "wc_review_avg": [ 332.25, 73.71355031471487 ], "wc_reply_reviewers_avg": [ 20.5, 14.941552797483935 ], "wc_reply_authors_avg": [ 12.0, 9.9498743710662 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ePv6RU37nEsJ:scholar.google.com/&scioq=Contextual+Decision-Making+with+Knapsacks+Beyond+the+Worst+Case&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "pku.edu.cn;mit.edu;stanford.edu;pku.edu.cn;;pku.edu.cn", "author_num": 6, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "Peking University;Massachusetts Institute of Technology;Stanford University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.pku.edu.cn;https://web.mit.edu;https://www.stanford.edu", "aff_unique_abbr": "Peking U;MIT;Stanford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;1;1;0;0", "aff_country_unique": "China;United States" }, { "title": "Instruction Tuning Large Language Models to Understand Electronic Health Records", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97801", "id": "Dgy5WVgPd2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Dgy5WVgPd2", "openreview": "https://openreview.net/forum?id=Dgy5WVgPd2", "poster": "/media/PosterPDFs/NeurIPS%202024/97801.png?t=1733987087.0227022", "project": "", "author_site": "Zhenbang Wu, Anant Dadu, Michael Nalls, Faraz Faghri, Jimeng Sun", "tldr": "", "abstract": "Large language models (LLMs) have shown impressive capabilities in solving a wide range of tasks based on human instructions. However, developing a conversational AI assistant for electronic health record (EHR) data remains challenging due to (1) the lack of large-scale instruction-following datasets and (2) the limitations of existing model architectures in handling complex and heterogeneous EHR data.\nIn this paper, we introduce MIMIC-Instr, a dataset comprising over 400K open-ended instruction-following examples derived from the MIMIC-IV EHR database. This dataset covers various topics and is suitable for instruction-tuning general-purpose LLMs for diverse clinical use cases. Additionally, we propose Llemr, a general framework that enables LLMs to process and interpret EHRs with complex data structures. Llemr demonstrates competitive performance in answering a wide range of patient-related questions based on EHR data.\nFurthermore, our evaluations on clinical predictive modeling benchmarks reveal that the fine-tuned Llemr achieves performance comparable to state-of-the-art (SOTA) baselines using curated features. The dataset and code are available at \\url{https://github.com/zzachw/llemr}.", "keywords": "electronic health record;clinical predictive modeling;instruction following;large language models;foundation models", "primary_area": "", "supplementary_material": "/attachment/18a4f67c98fcca1182e37c54286181341ab91981.pdf", "author": "Zhenbang Wu;Anant Dadu;Michael Nalls;Faraz Faghri;Jimeng Sun", "authorids": "~Zhenbang_Wu1;~Anant_Dadu1;~Michael_Nalls1;~Faraz_Faghri1;~Jimeng_Sun3", "gender": "M;M;M;M;", "homepage": ";;https://www.datatecnica.com;;http://sunlab.org", "dblp": "315/0212;;;;", "google_scholar": "N8p-spIAAAAJ;https://scholar.google.com/citations?hl=en;ZjfgPLMAAAAJ;Rr643xYAAAAJ;9jmmp5sAAAAJ", "orcid": ";;;;0000-0003-1512-6426", "linkedin": ";;;;jimengsun/", "or_profile": "~Zhenbang_Wu1;~Anant_Dadu1;~Michael_Nalls1;~Faraz_Faghri1;~Jimeng_Sun3", "aff": "University of Illinois Urbana Champaign;Department of Computer Science;;National Institutes of Health ;Georgia Institute of Technology", "aff_domain": "illinois.edu;cs.illinois.edu;;nih.gov;gatech.edu", "position": "PhD student;PhD student;;Researcher;Associate Professor", "bibtex": "@inproceedings{\nwu2024instruction,\ntitle={Instruction Tuning Large Language Models to Understand Electronic Health Records},\nauthor={Zhenbang Wu and Anant Dadu and Michael Nalls and Faraz Faghri and Jimeng Sun},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=Dgy5WVgPd2}\n}", "github": "", "reviewers": "AKvM;AY1w;Tq86;vJma", "pdf_size": 598764, "rating": "7;7;9;9", "confidence": "4;4;3;5", "wc_summary_and_contributions": "70;138;49;65", "wc_strengths": "7;354;16;17", "wc_improvement": "7;99;346;117", "wc_limitations": "20;15;38;51", "wc_correctness": "14;599;5;42", "wc_clarity": "5;620;7;1", "wc_relation_to_prior_work": "7;92;7;44", "wc_documentation": "17;138;4;8", "wc_additional_feedback": "1;1;1;1", "wc_review": "148;2056;473;346", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "1;2;1;1", "rating_avg": [ 8.0, 1.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 80.5, 34.09178786746157 ], "wc_strengths_avg": [ 98.5, 147.56439272399015 ], "wc_improvement_avg": [ 142.25, 124.81461252593785 ], "wc_limitations_avg": [ 31.0, 14.370107863199914 ], "wc_correctness_avg": [ 165.0, 250.94122817903 ], "wc_clarity_avg": [ 158.25, 266.6002391221733 ], "wc_relation_to_prior_work_avg": [ 37.5, 34.90343822605446 ], "wc_documentation_avg": [ 41.75, 55.76905503951094 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 755.75, 759.5809288680173 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=378983972097289828&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "illinois.edu;cs.illinois.edu;;nih.gov;gatech.edu", "author_num": 5, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "University of Illinois Urbana-Champaign;Unknown Institution;National Institutes of Health;Georgia Institute of Technology", "aff_unique_dep": ";Department of Computer Science;;", "aff_unique_url": "https://illinois.edu;;https://www.nih.gov;https://www.gatech.edu", "aff_unique_abbr": "UIUC;;NIH;Georgia Tech", "aff_campus_unique_index": "0", "aff_campus_unique": "Urbana-Champaign;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States;" }, { "title": "Sim2Real-Fire: A Multi-modal Simulation Dataset for Forecast and Backtracking of Real-world Forest Fire", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97800", "id": "DjCSjizgsH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=DjCSjizgsH", "openreview": "https://openreview.net/forum?id=DjCSjizgsH", "poster": "/media/PosterPDFs/NeurIPS%202024/97800.png?t=1731163908.718497", "project": "", "author_site": "Yanzhi Li, Keqiu Li, LI GUOHUI, zumin wang, Chanqing Ji, Lubo Wang, Die Zuo, Qing Guo, Feng Zhang, Manyu Wang, Di Lin", "tldr": "", "abstract": "The latest research on wildfire forecast and backtracking has adopted AI models, which require a large amount of data from wildfire scenarios to capture fire spread patterns. This paper explores using cost-effective simulated wildfire scenarios to train AI models and apply them to the analysis of real-world wildfire. This solution requires AI models to minimize the Sim2Real gap, a brand-new topic in the fire spread analysis research community. To investigate the possibility of minimizing the Sim2Real gap, we collect the Sim2Real-Fire dataset that contains 1M simulated scenarios with multi-modal environmental information for training AI models. We prepare 1K real-world wildfire scenarios for testing the AI models. We also propose a deep transformer, S2R-FireTr, which excels in considering the multi-modal environmental information for forecasting and backtracking the wildfire. S2R-FireTr surpasses state-of-the-art methods in real-world wildfire scenarios.", "keywords": "Simulation and Real-world Dataset;Wildfire Forecast and Backtracking;Deep Transformer Network", "primary_area": "", "supplementary_material": "/attachment/6f392a2f434e63f1afc58bb236e31c35c9d46314.pdf", "author": "Yanzhi Li;Keqiu Li;LI GUOHUI;zumin wang;Chanqing Ji;Lubo Wang;Die Zuo;Qing Guo;Feng Zhang;Manyu Wang;Di Lin", "authorids": "~Yanzhi_Li2;~Keqiu_Li1;~LI_GUOHUI2;~zumin_wang1;~Chanqing_Ji1;~Lubo_Wang1;~Die_Zuo1;~Qing_Guo3;~Feng_Zhang17;~Manyu_Wang1;~Di_Lin3", "gender": "F;M;M;;M;M;M;F;;M;F", "homepage": ";https://cic.tju.edu.cn/faculty/likeqiu/index.html;https://xueshu.baidu.com/scholarID/CN-BG8FL3DJ;;http://www.jichangqing.net;https://github.com/glimmer-shining;https://tsingqguo.github.io;http://mi.hbu.cn/archives/6201;https://github.com/kaka473;https://dilincv.github.io/;", "dblp": ";;;;;352/4271;25/3038-5;48/1294-21;;20/3191-2.html;363/7516", "google_scholar": ";https://scholar.google.com.hk/citations?user=j7ejDkkAAAAJ;;;;0pkbc9IAAAAJ;Rj2x4QUAAAAJ;;;rW0r-hMAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": "0009-0008-3543-0373;;;0000-0003-1194-8203;;;0000-0003-0974-9299;;;;", "linkedin": ";;;;;;;;;;", "or_profile": "~Yanzhi_Li2;~Keqiu_Li1;~LI_GUOHUI2;~zumin_wang1;~Chanqing_Ji1;~Lubo_Wang1;~Qing_Guo3;~Feng_Zhang17;~Manyu_Wang1;~Di_Lin3;~ZuoDie1", "aff": "Tianjin Fire Science and Technology Research Institute of MEM;Tianjin University;University of Science and Technology of China;Dalian University;Dalian University;Tianjin University; Agency for Science, Technology and Research (A*STAR));Hebei University;Tianjin University;Tianjin University;Tianjin University", "aff_domain": "tfri.com.cn;tju.edu.cn;mail.ustc.edu.cn;dlu.edu.cn;dlu.edu.cn;tju.edu.cn;cfar.a-star.edu.sg;hbu.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu.cn", "position": "Lecturer;Full Professor;Associate Professor;Full Professor;Associate Professor;MS student;Researcher;Associate Professor;MS student;Associate Professor;MS student", "bibtex": "@inproceedings{\nli2024simrealfire,\ntitle={Sim2Real-Fire: A Multi-modal Simulation Dataset for Forecast and Backtracking of Real-world Forest Fire},\nauthor={Yanzhi Li and Keqiu Li and LI GUOHUI and zumin wang and Chanqing Ji and Lubo Wang and Die Zuo and Qing Guo and Feng Zhang and Manyu Wang and Di Lin},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=DjCSjizgsH}\n}", "github": "", "reviewers": "JYQM;T53a;Zjdq", "pdf_size": 29239994, "rating": "6;6;8", "confidence": "3;5;4", "wc_summary_and_contributions": "49;21;61", "wc_strengths": "52;3;72", "wc_improvement": "130;29;56", "wc_limitations": "45;3;10", "wc_correctness": "33;1;1", "wc_clarity": "31;4;1", "wc_relation_to_prior_work": "31;9;32", "wc_documentation": "27;1;114", "wc_additional_feedback": "1;1;1", "wc_review": "399;72;348", "wc_reply_reviewers": "206;115;0", "wc_reply_authors": "83;83;115", "reply_reviewers": "2;2;0", "reply_authors": "7;4;4", "rating_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "wc_summary_and_contributions_avg": [ 43.666666666666664, 16.759740119968715 ], "wc_strengths_avg": [ 42.333333333333336, 28.986586936412884 ], "wc_improvement_avg": [ 71.66666666666667, 42.695302890234764 ], "wc_limitations_avg": [ 19.333333333333332, 18.372685039360892 ], "wc_correctness_avg": [ 11.666666666666666, 15.084944665313014 ], "wc_clarity_avg": [ 12.0, 13.490737563232042 ], "wc_relation_to_prior_work_avg": [ 24.0, 10.614455552060438 ], "wc_documentation_avg": [ 47.333333333333336, 48.32068800098865 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 273.0, 143.6453967240162 ], "wc_reply_reviewers_avg": [ 107.0, 84.28918475502458 ], "wc_reply_authors_avg": [ 93.66666666666667, 15.084944665313014 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.9428090415820634 ], "reply_authors_avg": [ 5.0, 1.4142135623730951 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11575955592029755851&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "tfri.com.cn;tju.edu.cn;mail.ustc.edu.cn;dlu.edu.cn;dlu.edu.cn;tju.edu.cn;cfar.a-star.edu.sg;hbu.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu.cn", "author_num": 11, "aff_unique_index": "0;1;2;3;3;1;4;5;1;1;1", "aff_unique_norm": "Tianjin Fire Science and Technology Research Institute;Tianjin University;University of Science and Technology of China;Dalian University;Agency for Science, Technology and Research;Hebei University", "aff_unique_dep": "MEM;;;;;", "aff_unique_url": ";http://www.tju.edu.cn;http://www.ustc.edu.cn;http://www.dlu.edu.cn;https://www.a-star.edu.sg;http://www.hbu.edu.cn/", "aff_unique_abbr": ";TJU;USTC;DLU;A*STAR;HBU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;1;0;0;0;0", "aff_country_unique": "China;Singapore" }, { "title": "Aligning LLM Agents by Learning Latent Preference from User Edits", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96078", "id": "DlYNGpCuwa", "proceeding": "", "pdf": "https://openreview.net/pdf?id=DlYNGpCuwa", "openreview": "https://openreview.net/forum?id=DlYNGpCuwa", "poster": "", "project": "", "author_site": "Ge Gao, Alexey Taymanov, Eduardo Salinas, Paul Mineiro, Dipendra Misra", "tldr": "", "abstract": "We study interactive learning of language agents based on user edits made to the agent's output. In a typical setting such as writing assistants, the user interacts with a language agent to generate a response given a context, and may optionally edit the agent response to personalize it based on their latent preference, in addition to improving the correctness. The edit feedback is naturally generated, making it a suitable candidate for improving the agent's alignment with the user's preference, and for reducing the cost of user edits over time. We propose a learning framework, PRELUDE that infers a description of the user's latent preference based on historic edit data and using it to define a prompt policy that drives future response generation. This avoids fine-tuning the agent, which is costly, challenging to scale with the number of users, and may even degrade its performance on other tasks. Furthermore, learning descriptive preference improves interpretability, allowing the user to view and modify the learned preference. However, user preference can be complex and vary based on context, making it challenging to learn. To address this, we propose a simple yet effective algorithm named CIPHER that leverages a large language model (LLM) to infer the user preference for a given context based on user edits. In the future, CIPHER retrieves inferred preferences from the k-closest contexts in the history, and forms an aggregate preference for response generation. We introduce two interactive environments -- summarization and email writing, for evaluation using a GPT-4 simulated user. We compare with algorithms that directly retrieve user edits but do not learn descriptive preference, and algorithms that learn context-agnostic preference. On both tasks, CIPHER outperforms baselines by achieving the lowest edit distance cost. Meanwhile, CIPHER has a lower computational expense, as using learned preference results in a shorter prompt than directly using user edits. Our further analysis reports that the user preference learned by CIPHER shows significant similarity to the ground truth latent preference.", "keywords": "NLP;LLM;preference learning;user feedback;user edits", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Ge Gao;Alexey Taymanov;Eduardo Salinas;Paul Mineiro;Dipendra Misra", "authorids": "~Ge_Gao1;~Alexey_Taymanov1;~Eduardo_Salinas1;~Paul_Mineiro1;~Dipendra_Misra1", "gender": "F;M;;;M", "homepage": "https://gao-g.github.io/;https://www.microsoft.com/en-us/research/people/ataymano/;https://www.eduardosalinas.com;;https://dipendramisra.com/", "dblp": ";376/8544;;35/5613;218/6569", "google_scholar": "https://scholar.google.com/citations?hl=en;xEkpgjgAAAAJ;;;rIoPIFsAAAAJ", "orcid": ";;;;", "linkedin": ";;eduardo-salinas-6034b9147;;", "or_profile": "~Ge_Gao1;~Alexey_Taymanov1;~Eduardo_Salinas1;~Paul_Mineiro1;~Dipendra_Misra1", "aff": "Cornell University;Microsoft Research;Microsoft Research;;Microsoft Research", "aff_domain": "cornell.edu;research.microsoft.com;research.microsoft.com;;microsoft.com", "position": "PhD student;Researcher;Researcher;;Researcher", "bibtex": "@inproceedings{\ngao2024aligning,\ntitle={Aligning {LLM} Agents by Learning Latent Preference from User Edits},\nauthor={Ge Gao and Alexey Taymanov and Eduardo Salinas and Paul Mineiro and Dipendra Misra},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=DlYNGpCuwa}\n}", "github": "", "reviewers": "YQaz;EkvH;U3Kc;Kfzn", "pdf_size": 2871438, "rating": "4;7;7;7", "confidence": "3;3;4;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;4;3", "wc_summary": "81;98;61;103", "wc_strengths": "40;25;41;101", "wc_weaknesses": "249;20;43;139", "wc_questions": "16;113;12;292", "wc_limitations": "13;10;7;1", "wc_review": "399;266;164;636", "wc_reply_reviewers": "28;21;0;15", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 85.75, 16.452583383772897 ], "wc_strengths_avg": [ 51.75, 29.13224158900238 ], "wc_weaknesses_avg": [ 112.75, 90.444388991247 ], "wc_questions_avg": [ 108.25, 113.53496157571905 ], "wc_limitations_avg": [ 7.75, 4.437059837324712 ], "wc_review_avg": [ 366.25, 176.63008662173044 ], "wc_reply_reviewers_avg": [ 16.0, 10.319883720275147 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5949195912173705577&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "cornell.edu;research.microsoft.com;research.microsoft.com;;microsoft.com", "author_num": 5, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Cornell University;Microsoft", "aff_unique_dep": ";Microsoft Research", "aff_unique_url": "https://www.cornell.edu;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "Cornell;MSR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Is Knowledge Power? On the (Im)possibility of Learning from Strategic Interactions", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96077", "id": "Dlm6Z1RrjV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Dlm6Z1RrjV", "openreview": "https://openreview.net/forum?id=Dlm6Z1RrjV", "poster": "", "project": "", "author_site": "Nivasini Ananthakrishnan, Nika Haghtalab, Chara Podimata, Kunhe Yang", "tldr": "", "abstract": "When learning in strategic environments, a key question is whether agents can overcome uncertainty about their preferences to achieve outcomes they could have achieved absent any uncertainty. Can they do this solely through interactions with each other? We focus this question on the ability of agents to attain the value of their Stackelberg optimal strategy and study the impact of information asymmetry. We study repeated interactions in fully strategic environments where players' actions are decided based on learning algorithms that take into account their observed histories and knowledge of the game. We study the pure Nash equilibria (PNE) of a meta-game where players choose these algorithms as their actions. We demonstrate that if one player has perfect knowledge about the game, then any initial informational gap persists. That is, while there is always a PNE in which the informed agent achieves her Stackelberg value, there is a game where no PNE of the meta-game allows the partially informed player to achieve her Stackelberg value. On the other hand, if both players start with some uncertainty about the game, the quality of information alone does not determine which agent can achieve her Stackelberg value. In this case, the concept of information asymmetry becomes nuanced and depends on the game's structure. Overall, our findings suggest that repeated strategic interactions alone cannot facilitate learning effectively enough to earn an uninformed player her Stackelberg value.", "keywords": "Information asymmetry;Repeated games;Stackelberg games", "primary_area": "algorithmic_game_theory", "supplementary_material": "", "author": "Nivasini Ananthakrishnan;Nika Haghtalab;Chara Podimata;Kunhe Yang", "authorids": "~Nivasini_Ananthakrishnan1;~Nika_Haghtalab2;~Chara_Podimata1;~Kunhe_Yang1", "gender": ";F;F;F", "homepage": ";https://people.eecs.berkeley.edu/~nika/;https://www.charapodimata.com/;https://kunheyang.com/", "dblp": ";;209/9752;267/5467", "google_scholar": "https://scholar.google.ca/citations?user=xVFrEDwAAAAJ;;XY9hKvIAAAAJ;-j0q9B4AAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Nivasini_Ananthakrishnan1;~Nika_Haghtalab2;~Chara_Podimata1;~Kunhe_Yang1", "aff": "University of California, Berkeley;University of California, Berkeley;Massachusetts Institute of Technology;University of California, Berkeley", "aff_domain": "berkeley.edu;berkeley.edu;mit.edu;berkeley.edu", "position": "PhD student;Assistant Professor;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nananthakrishnan2024is,\ntitle={Is Knowledge Power? On the (Im)possibility of Learning from Strategic Interactions},\nauthor={Nivasini Ananthakrishnan and Nika Haghtalab and Chara Podimata and Kunhe Yang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Dlm6Z1RrjV}\n}", "github": "", "reviewers": "LLSw;bNuD;CgaS;r7Rg", "pdf_size": 482335, "rating": "6;6;6;8", "confidence": "3;3;4;4", "soundness": "3;3;4;4", "novelty": "3;3;3;4", "presentation": "3;2;3;4", "wc_summary": "116;105;85;220", "wc_strengths": "31;42;51;72", "wc_weaknesses": "192;112;101;58", "wc_questions": "4;30;13;69", "wc_limitations": "4;91;9;8", "wc_review": "347;380;259;427", "wc_reply_reviewers": "11;208;10;18", "wc_reply_authors": "0;443;0;0", "reply_reviewers": "1;2;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 131.5, 52.29005641611032 ], "wc_strengths_avg": [ 49.0, 15.049916943292411 ], "wc_weaknesses_avg": [ 115.75, 48.427135987997474 ], "wc_questions_avg": [ 29.0, 24.9098374141623 ], "wc_limitations_avg": [ 28.0, 36.42114770294862 ], "wc_review_avg": [ 353.25, 61.39370896109796 ], "wc_reply_reviewers_avg": [ 61.75, 84.49371278385156 ], "wc_reply_authors_avg": [ 110.75, 191.82462693825315 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13879717815014588797&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "berkeley.edu;berkeley.edu;mit.edu;berkeley.edu", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of California, Berkeley;Massachusetts Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.berkeley.edu;https://web.mit.edu", "aff_unique_abbr": "UC Berkeley;MIT", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Almost Surely Asymptotically Constant Graph Neural Networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96076", "id": "Dn68qdfTry", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Dn68qdfTry", "openreview": "https://openreview.net/forum?id=Dn68qdfTry", "poster": "", "project": "", "author_site": "Sam Adam-Day, Michael Benedikt, Ismail Ceylan, Ben Finkelshtein", "tldr": "", "abstract": "We present a new angle on the expressive power of graph neural networks (GNNs) by studying how the predictions of real-valued GNN classifiers, such as those classifying graphs probabilistically, evolve as we apply them on larger graphs drawn from some random graph model. We show that the output converges to a constant function, which upper-bounds what these classifiers can uniformly express. This strong convergence phenomenon applies to a very wide class of GNNs, including state of the art models, with aggregates including mean and the attention-based mechanism of graph transformers. Our results apply to a broad class of random graph models, including sparse and dense variants of the Erd\u0151s-R\u00e9nyi model, the stochastic block model, and the Barab\u00e1si-Albert model. We empirically validate these findings, observing that the convergence phenomenon appears not only on random graphs but also on some real-world graphs.", "keywords": "Graph Neural Networks; convergence laws", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Sam Adam-Day;Michael Benedikt;Ismail Ilkan Ceylan;Ben Finkelshtein", "authorids": "~Sam_Adam-Day1;~Michael_Benedikt1;~Ismail_Ilkan_Ceylan2;~Ben_Finkelshtein1", "gender": ";;;M", "homepage": "https://samadamday.com;;https://www.cs.ox.ac.uk/people/ismaililkan.ceylan/;https://benfinkelshtein.github.io/", "dblp": "338/9995;b/MichaelBenedikt;147/6111;278/2449.html", "google_scholar": "wCHY5IsAAAAJ;;avJ5kQcAAAAJ;goWM7VwAAAAJ", "orcid": "0000-0003-3316-1954;;0000-0003-4118-4689;", "linkedin": "sam-adam-day/;;;ben-finkelshtein/", "or_profile": "~Sam_Adam-Day1;~Michael_Benedikt1;~Ismail_Ilkan_Ceylan2;~Ben_Finkelshtein1", "aff": "University of Oxford;University of Oxford;University of Oxford;University of Oxford", "aff_domain": "ox.ac.uk;oxford.ac.uk;oxford.ac.uk;cs.ox.ac.uk", "position": "PhD student;Full Professor;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nadam-day2024almost,\ntitle={Almost Surely Asymptotically Constant Graph Neural Networks},\nauthor={Sam Adam-Day and Michael Benedikt and Ismail Ilkan Ceylan and Ben Finkelshtein},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Dn68qdfTry}\n}", "github": "", "reviewers": "JXJp;ETj1;xvBN;6Ly5", "pdf_size": 1426816, "rating": "5;5;6;7", "confidence": "3;3;4;3", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;2;3;3", "wc_summary": "67;65;35;88", "wc_strengths": "63;40;17;70", "wc_weaknesses": "297;216;69;157", "wc_questions": "5;76;147;95", "wc_limitations": "36;1;1;59", "wc_review": "468;398;269;469", "wc_reply_reviewers": "15;48;82;12", "wc_reply_authors": "12;0;51;13", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 63.75, 18.886172190256023 ], "wc_strengths_avg": [ 47.5, 20.81465829649865 ], "wc_weaknesses_avg": [ 184.75, 83.28377693164498 ], "wc_questions_avg": [ 80.75, 50.87423218093812 ], "wc_limitations_avg": [ 24.25, 24.631027181179434 ], "wc_review_avg": [ 401.0, 81.46471628870992 ], "wc_reply_reviewers_avg": [ 39.25, 28.43743131859838 ], "wc_reply_authors_avg": [ 19.0, 19.170289512680814 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13980985196703789412&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "ox.ac.uk;oxford.ac.uk;oxford.ac.uk;cs.ox.ac.uk", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Oxford", "aff_unique_dep": "", "aff_unique_url": "https://www.ox.ac.uk", "aff_unique_abbr": "Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Neural Combinatorial Optimization for Robust Routing Problem with Uncertain Travel Times", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96075", "id": "DoewNm2uT3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=DoewNm2uT3", "openreview": "https://openreview.net/forum?id=DoewNm2uT3", "poster": "/media/PosterPDFs/NeurIPS%202024/96075.png?t=1730448477.9258246", "project": "", "author_site": "Pei Xiao, Zizhen Zhang, Jinbiao Chen, Jiahai Wang, Zhenzhen Zhang", "tldr": "", "abstract": "We consider the robust routing problem with uncertain travel times under the min-max regret criterion, which represents an extended and robust version of the classic traveling salesman problem (TSP) and vehicle routing problem (VRP). The general budget uncertainty set is employed to capture the uncertainty, which provides the capability to control the conservatism of obtained solutions and covers the commonly used interval uncertainty set as a special case. The goal is to obtain a robust solution that minimizes the maximum deviation from the optimal routing time in the worst-case scenario. Given the significant advancements and broad applications of neural combinatorial optimization methods in recent years, we present our initial attempt to combine neural approaches for solving this problem. We propose a dual multi-head cross attention mechanism to extract problem features represented by the inputted uncertainty sets. To tackle the built-in maximization problem, we derive the regret value by invoking a pre-trained model, subsequently utilizing it as the reward during the model training. Our experimental results on the robust TSP and VRP demonstrate the efficacy of our neural combinatorial optimization method, showcasing its ability to efficiently handle the robust routing problem of various sizes within a shorter time compared with alternative heuristic approaches.", "keywords": "Robust Routing Problem;Min-max Regret Criterion;Deep Reinforcement Learning;Attention Mechanism", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Pei Xiao;Zizhen Zhang;Jinbiao Chen;Jiahai Wang;Zhenzhen Zhang", "authorids": "~Pei_Xiao1;~Zizhen_Zhang1;~Jinbiao_Chen1;~Jiahai_Wang1;~Zhenzhen_Zhang1", "gender": ";;M;M;M", "homepage": ";;;;https://sem.tongji.edu.cn/semen/22003.html", "dblp": ";45/9055;;00/2989;", "google_scholar": "https://scholar.google.com/citations?hl=en;;;;", "orcid": ";;0000-0001-7417-0430;;0000-0001-9776-8355", "linkedin": ";;;;", "or_profile": "~Pei_Xiao1;~Zizhen_Zhang1;~Jinbiao_Chen1;~Jiahai_Wang1;~Zhenzhen_Zhang1", "aff": "SUN YAT-SEN UNIVERSITY;SUN YAT-SEN UNIVERSITY;SUN YAT-SEN UNIVERSITY;SUN YAT-SEN UNIVERSITY;Tongji University", "aff_domain": "sysu.edu.cn;sysu.edu.cn;sysu.edu.cn;sysu.edu.cn;tongji.edu.cn", "position": "MS student;Associate Professor;PhD student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nxiao2024neural,\ntitle={Neural Combinatorial Optimization for Robust Routing Problem with Uncertain Travel Times},\nauthor={Pei Xiao and Zizhen Zhang and Jinbiao Chen and Jiahai Wang and Zhenzhen Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=DoewNm2uT3}\n}", "github": "", "reviewers": "3PAJ;Xutw;88Rd;tvNa;g5Qe", "pdf_size": 1003271, "rating": "5;5;7;7;7", "confidence": "5;4;3;4;4", "soundness": "3;3;3;4;4", "novelty": "3;3;3;3;3", "presentation": "3;3;3;4;4", "wc_summary": "62;85;59;47;94", "wc_strengths": "77;48;66;68;104", "wc_weaknesses": "62;173;85;84;89", "wc_questions": "3;92;7;287;92", "wc_limitations": "1;30;11;14;29", "wc_review": "205;428;228;500;408", "wc_reply_reviewers": "17;292;22;63;183", "wc_reply_authors": "22;245;30;26;28", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 6.2, 0.9797958971132712 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 69.4, 17.396551382386107 ], "wc_strengths_avg": [ 72.6, 18.30409790183608 ], "wc_weaknesses_avg": [ 98.6, 38.380203230311324 ], "wc_questions_avg": [ 96.2, 103.03669249349961 ], "wc_limitations_avg": [ 17.0, 11.081516141756055 ], "wc_review_avg": [ 353.8, 116.43435919006038 ], "wc_reply_reviewers_avg": [ 115.4, 106.68570663401916 ], "wc_reply_authors_avg": [ 70.2, 87.44003659651567 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.6454972243679028, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16860301353984268001&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "sysu.edu.cn;sysu.edu.cn;sysu.edu.cn;sysu.edu.cn;tongji.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Sun Yat-sen University;Tongji University", "aff_unique_dep": ";", "aff_unique_url": "http://www.sysu.edu.cn;https://www.tongji.edu.cn", "aff_unique_abbr": "SYSU;Tongji", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Make Continual Learning Stronger via C-Flat", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96074", "id": "Dokew2u49m", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Dokew2u49m", "openreview": "https://openreview.net/forum?id=Dokew2u49m", "poster": "/media/PosterPDFs/NeurIPS%202024/96074.png?t=1731381871.0759473", "project": "", "author_site": "Ang Bian, Wei Li, Hangjie Yuan, yu chengrong, Mang Wang, Zixiang Zhao, Aojun Lu, Pengliang Ji, Tao Feng", "tldr": "", "abstract": "How to balance the learning \u2019sensitivity-stability\u2019 upon new task training and memory preserving is critical in CL to resolve catastrophic forgetting. Improving model generalization ability within each learning phase is one solution to help CL learning overcome the gap in the joint knowledge space. Zeroth-order loss landscape sharpness-aware minimization is a strong training regime improving model generalization in transfer learning compared with optimizer like SGD. It has also been introduced into CL to improve memory representation or learning efficiency. However, zeroth-order sharpness alone could favors sharper over flatter minima in certain scenarios, leading to a rather sensitive minima rather than a global optima. To further enhance learning stability, we propose a Continual Flatness (C-Flat) method featuring a flatter loss landscape tailored for CL. C-Flat could be easily called with only one line of code and is plug-and-play to any CL methods. A general framework of C-Flat applied to all CL categories and a thorough comparison with loss minima optimizer and flat minima based CL approaches is presented in this paper, showing that our method can boost CL performance in almost all cases. Code is available at https://github.com/WanNaa/C-Flat.", "keywords": "Continual Learning;Incremental Learning", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Ang Bian;Wei Li;Hangjie Yuan;yu chengrong;Mang Wang;Zixiang Zhao;Aojun Lu;Pengliang Ji;Tao Feng", "authorids": "~Ang_Bian1;~Wei_Li84;~Hangjie_Yuan1;~yu_chengrong1;~Mang_Wang2;~Zixiang_Zhao1;~Aojun_Lu1;~Pengliang_Ji1;~Tao_Feng3", "gender": "F;M;M;M;M;M;M;;", "homepage": ";;https://jacobyuan7.github.io/;https://ychengrong.github.io/;;https://zhaozixiang1228.github.io/;;;", "dblp": "187/8125.html;;293/9956;331/7403.html;133/2868;65/5420;;;12/4774-6", "google_scholar": ";;jQ3bFDMAAAAJ;;igKgaDwAAAAJ;tUv_X8cAAAAJ;;;https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-7667-9780;0009-0009-8074-8468;;;;;0009-0000-2870-1296;;", "linkedin": ";;;;;;;;", "or_profile": "~Ang_Bian1;~Wei_Li84;~Hangjie_Yuan1;~yu_chengrong1;~Mang_Wang2;~Zixiang_Zhao1;~Aojun_Lu1;~Pengliang_Ji1;~Tao_Feng3", "aff": "Sichuan University;Huazhong University of Science and Technology;Zhejiang University;Sichuan University;ByteDance Inc.;ETHZ - ETH Zurich;Sichuan University;;Tsinghua University", "aff_domain": "scu.edu.cn;hust.edu.cn;zju.edu.cn;scu.edu.cn;bytedance.com;ethz.ch;scu.edu.cn;;mail.tsinghua.edu.cn", "position": "Postdoc;Intern;PhD student;PhD student;Principal Researcher;Postdoc;PhD student;;Researcher", "bibtex": "@inproceedings{\nbian2024make,\ntitle={Make Continual Learning Stronger via C-Flat},\nauthor={Ang Bian and Wei Li and Hangjie Yuan and yu chengrong and Mang Wang and Zixiang Zhao and Aojun Lu and Pengliang Ji and Tao Feng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Dokew2u49m}\n}", "github": "", "reviewers": "PuVp;Tb4q;p3Vh;475M", "pdf_size": 1780689, "rating": "5;6;6;8", "confidence": "4;4;4;4", "soundness": "2;4;3;3", "novelty": "3;4;3;3", "presentation": "3;3;3;3", "wc_summary": "27;87;35;70", "wc_strengths": "32;99;117;131", "wc_weaknesses": "290;183;251;137", "wc_questions": "32;70;174;48", "wc_limitations": "2;30;69;8", "wc_review": "383;469;646;394", "wc_reply_reviewers": "149;42;231;148", "wc_reply_authors": "246;29;363;32", "reply_reviewers": "1;1;2;1", "reply_authors": "3;2;3;2", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 54.75, 24.661457783350926 ], "wc_strengths_avg": [ 94.75, 37.962975383918476 ], "wc_weaknesses_avg": [ 215.25, 59.22151213874904 ], "wc_questions_avg": [ 81.0, 55.362442142665635 ], "wc_limitations_avg": [ 27.25, 26.261902063635834 ], "wc_review_avg": [ 473.0, 105.22119558339945 ], "wc_reply_reviewers_avg": [ 142.5, 67.09135562797938 ], "wc_reply_authors_avg": [ 167.5, 143.1127178136171 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8286167265229788038&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 5, "email": "scu.edu.cn;hust.edu.cn;zju.edu.cn;scu.edu.cn;bytedance.com;ethz.ch;scu.edu.cn;;mail.tsinghua.edu.cn", "author_num": 9, "aff_unique_index": "0;1;2;0;3;4;0;5", "aff_unique_norm": "Sichuan University;Huazhong University of Science and Technology;Zhejiang University;ByteDance;ETH Zurich;Tsinghua University", "aff_unique_dep": ";;;;;", "aff_unique_url": "https://www.scu.edu.cn;http://www.hust.edu.cn;https://www.zju.edu.cn;https://www.bytedance.com;https://www.ethz.ch;https://www.tsinghua.edu.cn", "aff_unique_abbr": "SCU;HUST;ZJU;ByteDance;ETHZ;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;1;0;0", "aff_country_unique": "China;Switzerland" }, { "title": "UniMTS: Unified Pre-training for Motion Time Series", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96073", "id": "DpByqSbdhI", "proceeding": "", "pdf": "https://openreview.net/pdf?id=DpByqSbdhI", "openreview": "https://openreview.net/forum?id=DpByqSbdhI", "poster": "/media/PosterPDFs/NeurIPS%202024/96073.png?t=1733469214.4832833", "project": "", "author_site": "Xiyuan Zhang, Diyan Teng, Ranak Roy Chowdhury, Shuheng Li, Dezhi Hong, Rajesh Gupta, Jingbo Shang", "tldr": "", "abstract": "Motion time series collected from low-power, always-on mobile and wearable devices such as smartphones and smartwatches offer significant insights into human behavioral patterns, with wide applications in healthcare, automation, IoT, and AR/XR. However, given security and privacy concerns, building large-scale motion time series datasets remains difficult, hindering the development of pre-trained models for human activity analysis. Typically, existing models are trained and tested on the same dataset, leading to poor generalizability across variations in device location, device mounting orientation, and human activity type. In this paper, we introduce UniMTS, the first unified pre-training procedure for motion time series that generalizes across diverse device latent factors and activities. Specifically, we employ a contrastive learning framework that aligns motion time series with text descriptions enriched by large language models. This helps the model learn the semantics of time series to generalize across activities. Given the absence of large-scale motion time series data, we derive and synthesize time series from existing motion skeleton data with all-joint coverage. We use spatio-temporal graph networks to capture the relationships across joints for generalization across different device locations. We further design rotation-invariant augmentation to make the model agnostic to changes in device mounting orientations. Our model shows exceptional generalizability across 18 motion time series classification benchmark datasets, outperforming the best baselines by 340% in the zero-shot setting, 16.3% in the few-shot setting, and 9.2% in the full-shot setting.", "keywords": "motion time series classification;pre-training;contrastive learning;physics-based simulation;human activity recognition", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "/attachment/9f200e50f147c63313a65c2129e46e9ff65ea50b.zip", "author": "Xiyuan Zhang;Diyan Teng;Ranak Roy Chowdhury;Shuheng Li;Dezhi Hong;Rajesh K. Gupta;Jingbo Shang", "authorids": "~Xiyuan_Zhang2;~Diyan_Teng1;~Ranak_Roy_Chowdhury1;~Shuheng_Li1;~Dezhi_Hong1;~Rajesh_K._Gupta1;~Jingbo_Shang2", "gender": ";M;M;M;;M;M", "homepage": "https://xiyuanzh.github.io;;https://ranakroychowdhury.github.io/;https://shuheng-li.github.io/;https://cseweb.ucsd.edu/~dehong/;http://mesl.ucsd.edu/gupta/;https://shangjingbo1226.github.io/", "dblp": "137/9583-1;157/9106.html;242/5137.html;233/6025.html;60/11186.html;213/9138-1.html;151/3145.html", "google_scholar": "https://scholar.google.com/citations?hl=en;;hTA5EgkAAAAJ;9UyRw1YAAAAJ;NsPO1GUAAAAJ;I1w51gUAAAAJ;0SkFI4MAAAAJ", "orcid": ";;0000-0002-8705-7485;0000-0002-7276-3715;;0000-0002-6489-7633;", "linkedin": "xiyuan-zhang-57210018a/;;ranakroychowdhury/;;;rajeshgupta4/;", "or_profile": "~Xiyuan_Zhang2;~Diyan_Teng1;~Ranak_Roy_Chowdhury1;~Shuheng_Li1;~Dezhi_Hong1;~Rajesh_K._Gupta1;~Jingbo_Shang2", "aff": "University of California, San Diego;Qualcomm Inc, QualComm;University of California, San Diego, University of California, San Diego;University of California, San Diego;Amazon;University of California, San Diego;University of California, San Diego", "aff_domain": "ucsd.edu;qti.qualcomm.com;eng.ucsd.edu;ucsd.edu;amazon.com;ucsd.edu;ucsd.edu", "position": "PhD student;Researcher;PhD student;PhD student;Researcher;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhang2024unimts,\ntitle={Uni{MTS}: Unified Pre-training for Motion Time Series},\nauthor={Xiyuan Zhang and Diyan Teng and Ranak Roy Chowdhury and Shuheng Li and Dezhi Hong and Rajesh K. Gupta and Jingbo Shang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=DpByqSbdhI}\n}", "github": "", "reviewers": "s9ZD;jvaP;6pyv;G59f;7idQ;4CUw", "pdf_size": 1607817, "rating": "6;6;7;7;7;7", "confidence": "4;3;4;3;5;4", "soundness": "3;3;3;4;3;3", "novelty": "3;3;4;3;3;3", "presentation": "3;3;4;4;3;3", "wc_summary": "55;81;133;61;69;73", "wc_strengths": "33;68;110;61;52;175", "wc_weaknesses": "31;117;445;54;55;82", "wc_questions": "20;4;20;22;31;11", "wc_limitations": "435;7;6;22;6;1", "wc_review": "574;277;714;220;213;342", "wc_reply_reviewers": "441;36;377;74;60;13", "wc_reply_authors": "668;49;533;25;194;15", "reply_reviewers": "3;1;2;1;2;1", "reply_authors": "4;2;3;2;3;2", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.8333333333333335, 0.6871842709362768 ], "soundness_avg": [ 3.1666666666666665, 0.3726779962499649 ], "novelty_avg": [ 3.1666666666666665, 0.3726779962499649 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 78.66666666666667, 25.675323215535617 ], "wc_strengths_avg": [ 83.16666666666667, 47.192572391096554 ], "wc_weaknesses_avg": [ 130.66666666666666, 143.11844356647012 ], "wc_questions_avg": [ 18.0, 8.54400374531753 ], "wc_limitations_avg": [ 79.5, 159.11709524749375 ], "wc_review_avg": [ 390.0, 188.91532494744834 ], "wc_reply_reviewers_avg": [ 166.83333333333334, 173.277635281905 ], "wc_reply_authors_avg": [ 247.33333333333334, 259.5371949365939 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.74535599249993 ], "reply_authors_avg": [ 2.6666666666666665, 0.7453559924999298 ], "replies_avg": [ 34, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.3429971702850177, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10338651884051295324&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "ucsd.edu;qti.qualcomm.com;eng.ucsd.edu;ucsd.edu;amazon.com;ucsd.edu;ucsd.edu", "author_num": 7, "aff_unique_index": "0;1;0;0;2;0;0", "aff_unique_norm": "University of California, San Diego;Qualcomm Incorporated;Amazon", "aff_unique_dep": ";;Amazon.com, Inc.", "aff_unique_url": "https://www.ucsd.edu;https://www.qualcomm.com;https://www.amazon.com", "aff_unique_abbr": "UCSD;Qualcomm;Amazon", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "San Diego;", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Divergences between Language Models and Human Brains", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96072", "id": "DpP5F3UfKw", "proceeding": "", "pdf": "https://openreview.net/pdf?id=DpP5F3UfKw", "openreview": "https://openreview.net/forum?id=DpP5F3UfKw", "poster": "/media/PosterPDFs/NeurIPS%202024/96072.png?t=1733600084.327895", "project": "", "author_site": "Yuchen Zhou, Emmy Liu, Graham Neubig, Michael Tarr, Leila Wehbe", "tldr": "", "abstract": "Do machines and humans process language in similar ways? Recent research has hinted at the affirmative, showing that human neural activity can be effectively predicted using the internal representations of language models (LMs). Although such results are thought to reflect shared computational principles between LMs and human brains, there are also clear differences in how LMs and humans represent and use language. In this work, we systematically explore the divergences between human and machine language processing by examining the differences between LM representations and human brain responses to language as measured by Magnetoencephalography (MEG) across two datasets in which subjects read and listened to narrative stories. Using an LLM-based data-driven approach, we identify two domains that LMs do not capture well: social/emotional intelligence and physical commonsense. We validate these findings with human behavioral experiments and hypothesize that the gap is due to insufficient representations of social/emotional and physical knowledge in LMs. Our results show that fine-tuning LMs on these domains can improve their alignment with human brain responses.", "keywords": "Natural Language Processing;NLP;Brain Imaging;Neuroimaging;Magnetoencephalography;MEG;Neuroscience;Cognitive Science;Interpretability;Deep Learning", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "", "author": "Yuchen Zhou;Emmy Liu;Graham Neubig;Michael J. Tarr;Leila Wehbe", "authorids": "~Yuchen_Zhou4;~Emmy_Liu1;~Graham_Neubig1;~Michael_J._Tarr1;~Leila_Wehbe1", "gender": "M;F;M;F;M", "homepage": "https://www.zhouyuchen.me/;https://nightingal3.github.io/;http://phontron.com;http://www.cs.cmu.edu/~lwehbe/;https://tarrlab.org", "dblp": ";249/6997;03/8155;125/4359;36/1880", "google_scholar": "cDhq1EYAAAAJ;;wlosgkoAAAAJ;YezyUawAAAAJ;O8ALPlkAAAAJ", "orcid": ";;;0000-0001-8545-2062;0000-0003-4724-1744", "linkedin": ";;;;michael-tarr-ab078046/", "or_profile": "~Yuchen_Zhou4;~Emmy_Liu1;~Graham_Neubig1;~Leila_Wehbe1;~Michael_Tarr1", "aff": "Carnegie Mellon University;School of Computer Science, Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "cmu.edu;cs.cmu.edu;cmu.edu;cmu.edu;cmu.edu", "position": "PhD student;PhD student;Associate Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nzhou2024divergences,\ntitle={Divergences between Language Models and Human Brains},\nauthor={Yuchen Zhou and Emmy Liu and Graham Neubig and Michael J. Tarr and Leila Wehbe},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=DpP5F3UfKw}\n}", "github": "", "reviewers": "jRnr;fRry;b2dQ;hoiu", "pdf_size": 6343662, "rating": "6;7;7;7", "confidence": "4;4;4;4", "soundness": "3;2;4;3", "novelty": "2;3;3;3", "presentation": "3;3;4;3", "wc_summary": "70;67;75;96", "wc_strengths": "55;102;42;77", "wc_weaknesses": "64;518;43;109", "wc_questions": "24;42;44;54", "wc_limitations": "4;10;2;1", "wc_review": "217;739;206;337", "wc_reply_reviewers": "21;101;11;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 77.0, 11.335784048754634 ], "wc_strengths_avg": [ 69.0, 22.792542640082964 ], "wc_weaknesses_avg": [ 183.5, 194.5899534919519 ], "wc_questions_avg": [ 41.0, 10.816653826391969 ], "wc_limitations_avg": [ 4.25, 3.491060010942235 ], "wc_review_avg": [ 374.75, 216.48599839250573 ], "wc_reply_reviewers_avg": [ 33.25, 39.814413219335535 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=544157752402283205&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 5, "email": "cmu.edu;cs.cmu.edu;cmu.edu;cmu.edu;cmu.edu", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pittsburgh", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "EASI: Evolutionary Adversarial Simulator Identification for Sim-to-Real Transfer", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96071", "id": "DqiggGDOmA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=DqiggGDOmA", "openreview": "https://openreview.net/forum?id=DqiggGDOmA", "poster": "/media/PosterPDFs/NeurIPS%202024/96071.png?t=1731500274.4369428", "project": "", "author_site": "Haoyu Dong, Huiqiao Fu, Wentao Xu, Zhehao Zhou, Chunlin Chen", "tldr": "", "abstract": "Reinforcement Learning (RL) controllers have demonstrated remarkable performance in complex robot control tasks. However, the presence of reality gap often leads to poor performance when deploying policies trained in simulation directly onto real robots. Previous sim-to-real algorithms like Domain Randomization (DR) requires domain-specific expertise and suffers from issues such as reduced control performance and high training costs. In this work, we introduce Evolutionary Adversarial Simulator Identification (EASI), a novel approach that combines Generative Adversarial Network (GAN) and Evolutionary Strategy (ES) to address sim-to-real challenges. Specifically, we consider the problem of sim-to-real as a search problem, where ES acts as a generator in adversarial competition with a neural network discriminator, aiming to find physical parameter distributions that make the state transitions between simulation and reality as similar as possible. The discriminator serves as the fitness function, guiding the evolution of the physical parameter distributions. EASI features simplicity, low cost, and high fidelity, enabling the construction of a more realistic simulator with minimal requirements for real-world data, thus aiding in transferring simulated-trained policies to the real world. We demonstrate the performance of EASI in both sim-to-sim and sim-to-real tasks, showing superior performance compared to existing sim-to-real algorithms.", "keywords": "Evolutionary adversarial simulator identification;reinforcement learning;sim-to-real transfer", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Haoyu Dong;Huiqiao Fu;Wentao Xu;Zhehao Zhou;Chunlin Chen", "authorids": "~Haoyu_Dong4;~Huiqiao_Fu1;~Wentao_Xu5;~Zhehao_Zhou1;~Chunlin_Chen1", "gender": "M;M;;M;M", "homepage": "https://github.com/BlackVegetab1e;;https://github.com/Simonforyou;https://lbwnbzzh.github.io/zhouzhehao.github.io/;https://sme.nju.edu.cn/ccl/list.htm", "dblp": ";243/7065;;;68/6992.html", "google_scholar": ";;;;", "orcid": "0009-0006-2197-2441;;0009-0000-4739-8595;;", "linkedin": ";;;;", "or_profile": "~Haoyu_Dong4;~Huiqiao_Fu1;~Wentao_Xu5;~Zhehao_Zhou1;~Chunlin_Chen1", "aff": "Nanjing University;Nanjing University;Nanjing University;Nanjing University;Nanjing University", "aff_domain": "nju.edu.cn;nju.edu.cn;smail.nju.edu.cn;nju.edu.cn;nju.edu.cn", "position": "MS student;PhD student;Undergrad student;Undergrad student;Full Professor", "bibtex": "@inproceedings{\ndong2024easi,\ntitle={{EASI}: Evolutionary Adversarial Simulator Identification for Sim-to-Real Transfer},\nauthor={Haoyu Dong and Huiqiao Fu and Wentao Xu and Zhehao Zhou and Chunlin Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=DqiggGDOmA}\n}", "github": "", "reviewers": "yoqm;ZLtE;rASM;zVKJ;2SzS", "pdf_size": 817875, "rating": "6;6;6;7;7", "confidence": "3;4;3;5;3", "soundness": "3;2;2;3;3", "novelty": "3;2;2;3;2", "presentation": "3;3;2;4;3", "wc_summary": "68;59;65;153;67", "wc_strengths": "114;24;60;144;170", "wc_weaknesses": "169;10;175;311;209", "wc_questions": "153;150;176;125;11", "wc_limitations": "8;1;42;32;15", "wc_review": "512;244;518;765;472", "wc_reply_reviewers": "16;53;15;325;14", "wc_reply_authors": "32;114;44;388;50", "reply_reviewers": "1;2;1;2;1", "reply_authors": "2;4;2;3;2", "rating_avg": [ 6.4, 0.48989794855663565 ], "confidence_avg": [ 3.6, 0.8 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 82.4, 35.437832890852675 ], "wc_strengths_avg": [ 102.4, 53.62685894213831 ], "wc_weaknesses_avg": [ 174.8, 96.86361546008905 ], "wc_questions_avg": [ 123.0, 58.28550420130206 ], "wc_limitations_avg": [ 19.6, 15.21315220458929 ], "wc_review_avg": [ 502.2, 165.5226872667309 ], "wc_reply_reviewers_avg": [ 84.6, 121.09929809870906 ], "wc_reply_authors_avg": [ 125.6, 134.2558751042203 ], "reply_reviewers_avg": [ 1.4, 0.4898979485566356 ], "reply_authors_avg": [ 2.6, 0.8 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.40824829046386285, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:mGVlr3X2se8J:scholar.google.com/&scioq=EASI:+Evolutionary+Adversarial+Simulator+Identification+for+Sim-to-Real+Transfer&hl=en&as_sdt=0,33", "gs_version_total": 2, "email": "nju.edu.cn;nju.edu.cn;smail.nju.edu.cn;nju.edu.cn;nju.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Nanjing University", "aff_unique_dep": "", "aff_unique_url": "https://www.nju.edu.cn", "aff_unique_abbr": "Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Exactly Minimax-Optimal Locally Differentially Private Sampling", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96070", "id": "Dr7UarlhVE", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Dr7UarlhVE", "openreview": "https://openreview.net/forum?id=Dr7UarlhVE", "poster": "/media/PosterPDFs/NeurIPS%202024/96070.png?t=1733407001.2488706", "project": "", "author_site": "Hyun-Young Park, Shahab Asoodeh, Si-Hyeon Lee", "tldr": "", "abstract": "The sampling problem under local differential privacy has recently been studied with potential applications to generative models, but a fundamental analysis of its privacy-utility trade-off (PUT) remains incomplete. In this work, we define the fundamental PUT of private sampling in the minimax sense, using the $f$-divergence between original and sampling distributions as the utility measure. We characterize the exact PUT for both finite and continuous data spaces under some mild conditions on the data distributions, and propose sampling mechanisms that are universally optimal for all $f$-divergences. Our numerical experiments demonstrate the superiority of our mechanisms over baselines, in terms of theoretical utilities for finite data space and of empirical utilities for continuous data space.", "keywords": "Local differential privacy;Sampling;Privacy-utility trade-off", "primary_area": "privacy", "supplementary_material": "/attachment/f0edd655e51bef30b93fd20525b4a4b7483c382f.zip", "author": "Hyun-Young Park;Shahab Asoodeh;Si-Hyeon Lee", "authorids": "~Hyun-Young_Park1;~Shahab_Asoodeh1;~Si-Hyeon_Lee1", "gender": "M;M;F", "homepage": ";https://www.cas.mcmaster.ca/~asoodehs/;https://sites.google.com/view/kaist-infolab/team/professor?authuser=0", "dblp": ";63/8658;", "google_scholar": "0qq-4IMAAAAJ;CSxeFMsAAAAJ;", "orcid": ";;", "linkedin": ";shahabasoodeh/;", "or_profile": "~Hyun-Young_Park1;~Shahab_Asoodeh1;~Si-Hyeon_Lee1", "aff": "Korea Advanced Institute of Science & Technology;McMaster University;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;mcmaster.ca;kaist.ac.kr", "position": "PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\npark2024exactly,\ntitle={Exactly Minimax-Optimal Locally Differentially Private Sampling},\nauthor={Hyun-Young Park and Shahab Asoodeh and Si-Hyeon Lee},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Dr7UarlhVE}\n}", "github": "", "reviewers": "NxAu;9St7;KHbV;D3Yh", "pdf_size": 621149, "rating": "6;6;6;7", "confidence": "4;3;3;4", "soundness": "4;4;3;4", "novelty": "2;3;3;4", "presentation": "4;2;3;3", "wc_summary": "274;234;77;99", "wc_strengths": "20;113;54;56", "wc_weaknesses": "145;189;30;42", "wc_questions": "2;109;46;44", "wc_limitations": "1;1;1;24", "wc_review": "442;646;208;265", "wc_reply_reviewers": "12;35;13;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 171.0, 84.55471601276892 ], "wc_strengths_avg": [ 60.75, 33.38693606786942 ], "wc_weaknesses_avg": [ 101.5, 67.45554091399757 ], "wc_questions_avg": [ 50.25, 38.19931282104431 ], "wc_limitations_avg": [ 6.75, 9.959292143521045 ], "wc_review_avg": [ 390.25, 171.0180911482759 ], "wc_reply_reviewers_avg": [ 15.0, 12.62933094031509 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:5MTXFgamObEJ:scholar.google.com/&scioq=Exactly+Minimax-Optimal+Locally+Differentially+Private+Sampling&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "kaist.ac.kr;mcmaster.ca;kaist.ac.kr", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;McMaster University", "aff_unique_dep": ";", "aff_unique_url": "https://www.kaist.ac.kr;https://www.mcmaster.ca", "aff_unique_abbr": "KAIST;McMaster", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "South Korea;Canada" }, { "title": "Online Non-convex Learning in Dynamic Environments", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96069", "id": "DrQXDKbGgy", "proceeding": "", "pdf": "https://openreview.net/pdf?id=DrQXDKbGgy", "openreview": "https://openreview.net/forum?id=DrQXDKbGgy", "poster": "/media/PosterPDFs/NeurIPS%202024/96069.png?t=1733809606.436196", "project": "", "author_site": "Zhipan Xu, Lijun Zhang", "tldr": "", "abstract": "This paper considers the problem of online learning with non-convex loss functions in dynamic environments. Recently, Suggala and Netrapalli [2020] demonstrated that follow the perturbed leader (FTPL) can achieve optimal regret for non-convex losses, but their results are limited to static environments. In this research, we examine dynamic environments and choose \\emph{dynamic regret} and \\emph{adaptive regret} to measure the performance. First, we propose an algorithm named FTPL-D by restarting FTPL periodically and establish $O(T^\\frac{2}{3}(V_T+1)^\\frac{1}{3})$ dynamic regret with the prior knowledge of $V_T$, which is the variation of loss functions. In the case that $V_T$ is unknown, we run multiple FTPL-D with different restarting parameters as experts and use a meta-algorithm to track the best one on the fly. To address the challenge of non-convexity, we utilize randomized sampling in the process of tracking experts. Next, we present a novel algorithm called FTPL-A that dynamically maintains a group of FTPL experts and combines them with an advanced meta-algorithm to obtain $O(\\sqrt{\\tau\\log{T}})$ adaptive regret for any interval of length $\\tau$. Moreover, we demonstrate that FTPL-A also attains an $\\tilde{O}(T^\\frac{2}{3}(V_T+1)^\\frac{1}{3})$ dynamic regret bound. Finally, we discuss the application to online constrained meta-learning and conduct experiments to verify the effectiveness of our methods.", "keywords": "Online Learning;Non-convexity;Dynamic Environments", "primary_area": "online_learning", "supplementary_material": "/attachment/a5d86fe497f4c140e01f8c3a1406dd93d601af02.zip", "author": "Zhipan Xu;Lijun Zhang", "authorids": "~Zhipan_Xu1;~Lijun_Zhang1", "gender": ";", "homepage": "https://xzptofu.github.io/publications/;", "dblp": ";", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "~Zhipan_Xu1;~Lijun_Zhang1", "aff": "Nanjing University;", "aff_domain": "nju.edu.cn;", "position": "Undergrad student;", "bibtex": "@inproceedings{\nxu2024online,\ntitle={Online Non-convex Learning in Dynamic Environments},\nauthor={Zhipan Xu and Lijun Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=DrQXDKbGgy}\n}", "github": "", "reviewers": "njVm;fFaA;wXns;PVQP", "pdf_size": 5877920, "rating": "5;5;6;6", "confidence": "3;2;3;4", "soundness": "2;2;4;3", "novelty": "2;2;2;3", "presentation": "3;3;3;4", "wc_summary": "51;45;20;87", "wc_strengths": "50;36;28;100", "wc_weaknesses": "50;42;149;346", "wc_questions": "15;117;63;8", "wc_limitations": "23;15;18;2", "wc_review": "189;255;278;543", "wc_reply_reviewers": "22;215;104;232", "wc_reply_authors": "29;368;196;570", "reply_reviewers": "1;3;2;3", "reply_authors": "2;4;3;5", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 50.75, 23.94133454926855 ], "wc_strengths_avg": [ 53.5, 27.977669667075563 ], "wc_weaknesses_avg": [ 146.75, 122.51402980883455 ], "wc_questions_avg": [ 50.75, 43.71713050967549 ], "wc_limitations_avg": [ 14.5, 7.762087348130012 ], "wc_review_avg": [ 316.25, 134.92845326320167 ], "wc_reply_reviewers_avg": [ 143.25, 85.53763791454614 ], "wc_reply_authors_avg": [ 290.75, 200.89720630212855 ], "reply_reviewers_avg": [ 2.25, 0.82915619758885 ], "reply_authors_avg": [ 3.5, 1.118033988749895 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.7071067811865475, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:9AtthG-aTB8J:scholar.google.com/&scioq=Online+Non-convex+Learning+in+Dynamic+Environments&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "nju.edu.cn;", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Nanjing University", "aff_unique_dep": "", "aff_unique_url": "https://www.nju.edu.cn", "aff_unique_abbr": "Nanjing U", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "id": "DsN7tHNo78", "title": "Manipulation Intention Understanding for Accurate Zero-Shot Composed Image Retrieval", "track": "main", "status": "Reject", "tldr": "", "abstract": "Composed Image Retrieval (CIR) facilitates retrieving an image matching a reference image while incorporating specified textual modifications, which is crucial for internet searches and e-commerce. Traditional supervised CIR methods rely on annotated triplets, which are labor-intensive and limit generalizability. Recent advances in Zero-Shot Composed Image Retrieval (ZS-CIR) address the challenge of performing this task without annotated triplets. A key challenge in ZS-CIR is training models on limited intention-relevant datasets to understand human intention implicitly expressed in textual modifications for accurately retrieving target images. In this paper, we introduce an image-text dataset incorporated with pseudo-manipulation intentions to enhance the training of ZS-CIR models in understanding human manipulation intents. Based on our dataset, we propose a novel framework, De-MINDS, for capturing the intent humans aim to modify, thereby enhancing the ZS-CIR model's ability to understand human manipulation descriptions. Specifically, a simple mapping network first maps image information into language space and forms a target description with a manipulation description. Subsequently, De-MINDS captures intention-relevant information from target descriptions and converts them into several pseudo-word tokens for accurate ZS-CIR. The De-MINDS model exhibits robust generalization and significant improvements in performance across four ZS-CIR tasks. It achieves performance improvements from 2.05% to 4.35% over the best methods and establishes new state-of-the-art results with comparable inference times. Our code is available at https://anonymous.4open.science/r/De-MINDS/.", "keywords": "Vision and Language;Zero-Shot Compose Image Retreival", "primary_area": "machine_vision", "supplementary_material": "/attachment/2f617e19f776bdedf808351ddd3db63b011f223a.gz", "author": "Yuanmin Tang;Jing Yu;Keke Gai;Gang Xiong;Gaopeng Gou;Qi Wu", "authorids": "~Yuanmin_Tang1;~Jing_Yu2;~Keke_Gai1;~Gang_Xiong1;~Gaopeng_Gou2;~Qi_Wu3", "gender": "M;M;M;M;M;F", "homepage": "https://mmlab-iie.github.io/author/yuanmin-tang/;https://www.cloud-conf.net/gaikeke/;https://people.ucas.edu.cn/~xionggang;https://people.ucas.ac.cn/~gougaopeng;http://qi-wu.me/;https://mmlab-iie.github.io/", "dblp": "256/1307;164/3309;;13/10808;96/3446-1;42/6466-7", "google_scholar": "https://scholar.google.com.hk/citations?user=gPohD_kAAAAJ;https://scholar.google.com/citations?hl=en;;;https://scholar.google.co.uk/citations?user=aKXe1FEAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": "0000-0003-2849-1852;0000-0001-6784-0221;0000-0002-3190-6521;;;", "linkedin": ";;;;;", "or_profile": "~Yuanmin_Tang1;~Keke_Gai1;~Gang_Xiong1;~Gaopeng_Gou2;~Qi_Wu3;~Jing_Yuan2", "aff": " Institute of Information Engineering, CAS ;Beijing Institute of Technology;Institute of Information Engineering, Chinese Academy of Science;University of Chinese Academy of Sciences;The University of Adelaide;Institute of Information Engineering, CAS", "aff_domain": "iie.ac.cn;bit.edu.cn;iie.ac.cn;ucas.ac.cn;adelaide.edu.au;iie.ac.cn", "position": "PhD student;Full Professor;Full Professor;Full Professor;Associate Professor;Associate Professor", "bibtex": "@misc{\nanonymous2024manipulation,\ntitle={Manipulation Intention Understanding for Accurate Zero-Shot Composed Image Retrieval},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=DsN7tHNo78}\n}", "github": "", "project": "", "reviewers": "6wVo;CKWd;KSmi", "site": "https://openreview.net/forum?id=DsN7tHNo78", "pdf_size": 16047358, "rating": "4;5;6", "confidence": "4;5;4", "soundness": "2;3;3", "novelty": "2;3;3", "presentation": "2;3;2", "wc_summary": "103;83;86", "wc_strengths": "21;50;74", "wc_weaknesses": "196;84;270", "wc_questions": "24;16;94", "wc_limitations": "5;6;42", "wc_review": "349;239;566", "wc_reply_reviewers": "0;105;84", "wc_reply_authors": "0;451;53", "reply_reviewers": "0;1;1", "reply_authors": "1;4;2", "rating_avg": [ 5.0, 0.816496580927726 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 90.66666666666667, 8.806563209081938 ], "wc_strengths_avg": [ 48.333333333333336, 21.669230617526676 ], "wc_weaknesses_avg": [ 183.33333333333334, 76.46059260967196 ], "wc_questions_avg": [ 44.666666666666664, 35.03648891592243 ], "wc_limitations_avg": [ 17.666666666666668, 17.21110752456745 ], "wc_review_avg": [ 384.6666666666667, 135.85858660959033 ], "wc_reply_reviewers_avg": [ 63.0, 45.36518488885502 ], "wc_reply_authors_avg": [ 168.0, 201.27758610105266 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 1.247219128924647 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:IUZVjEAc_vMJ:scholar.google.com/&scioq=Manipulation+Intention+Understanding+for+Accurate+Zero-Shot+Composed+Image+Retrieval&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;0;2;3;0", "aff_unique_norm": "Chinese Academy of Sciences;Beijing Institute of Technology;University of Chinese Academy of Sciences;University of Adelaide", "aff_unique_dep": "Institute of Information Engineering;;;", "aff_unique_url": "http://www.cas.cn;http://www.bit.edu.cn/;http://www.ucas.ac.cn;https://www.adelaide.edu.au", "aff_unique_abbr": "CAS;BIT;UCAS;Adelaide", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;0", "aff_country_unique": "China;Australia" }, { "title": "Language Model as Visual Explainer", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96068", "id": "Dsi8Ibxg9H", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Dsi8Ibxg9H", "openreview": "https://openreview.net/forum?id=Dsi8Ibxg9H", "poster": "/media/PosterPDFs/NeurIPS%202024/96068.png?t=1733508714.7443328", "project": "", "author_site": "Xingyi Yang, Xinchao Wang", "tldr": "", "abstract": "In this paper, we present Language Model as Visual Explainer (\\texttt{LVX}), a systematic approach for interpreting the internal workings of vision models using a tree-structured linguistic explanation, without the need for model training. Central to our strategy is the collaboration between vision models and LLM to craft explanations. On one hand, the LLM is harnessed to delineate hierarchical visual attributes, while concurrently, a text-to-image API retrieves images that are most aligned with these textual concepts. By mapping the collected texts and images to the vision model's embedding space, we construct a hierarchy-structured visual embedding tree. This tree is dynamically pruned and grown by querying the LLM using language templates, tailoring the explanation to the model. Such a scheme allows us \nto seamlessly incorporate new attributes while eliminating undesired concepts based on the model's representations. When applied to testing samples, our method provides human-understandable explanations in the form of attribute-laden trees. Beyond explanation, we retrained the vision model by calibrating it on the generated concept hierarchy, allowing the model to incorporate the refined knowledge of visual attributes. To access the effectiveness of our approach, we introduce new benchmarks and conduct rigorous evaluations, demonstrating its plausibility, faithfulness, and stability.", "keywords": "GPT; Large Language Model; Explainability", "primary_area": "interpretability_and_explainability", "supplementary_material": "/attachment/a93b32c659686d83d08083f492f7f1123a96f3c9.zip", "author": "Xingyi Yang;Xinchao Wang", "authorids": "~Xingyi_Yang1;~Xinchao_Wang1", "gender": "M;M", "homepage": "https://adamdad.github.io/;https://sites.google.com/site/sitexinchaowang/", "dblp": ";", "google_scholar": "1n2OPtwAAAAJ;https://scholar.google.com.tw/citations?user=w69Buq0AAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Xingyi_Yang1;~Xinchao_WANG3", "aff": "National University of Singapore;National University of Singapore", "aff_domain": "nus.edu;nus.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nyang2024language,\ntitle={Language Model as Visual Explainer},\nauthor={Xingyi Yang and Xinchao Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Dsi8Ibxg9H}\n}", "github": "", "reviewers": "UiCA;FVKA;U6GY;kLjQ", "pdf_size": 18968973, "rating": "4;5;5;8", "confidence": "4;4;5;4", "soundness": "2;3;2;4", "novelty": "2;3;2;4", "presentation": "2;2;4;4", "wc_summary": "127;65;92;158", "wc_strengths": "65;50;72;68", "wc_weaknesses": "514;378;234;11", "wc_questions": "2;16;25;101", "wc_limitations": "6;4;1;39", "wc_review": "714;513;424;377", "wc_reply_reviewers": "85;118;352;11", "wc_reply_authors": "870;84;808;11", "reply_reviewers": "2;1;2;1", "reply_authors": "4;2;4;2", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 1.0 ], "wc_summary_avg": [ 110.5, 35.14612354157995 ], "wc_strengths_avg": [ 63.75, 8.317902379807062 ], "wc_weaknesses_avg": [ 284.25, 186.25570461062395 ], "wc_questions_avg": [ 36.0, 38.41223763333763 ], "wc_limitations_avg": [ 12.5, 15.402921800749363 ], "wc_review_avg": [ 507.0, 129.10654514779642 ], "wc_reply_reviewers_avg": [ 141.5, 127.55880996622695 ], "wc_reply_authors_avg": [ 443.25, 397.19603157634896 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 3.0, 1.0 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.19245008972987526, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18191039759375165449&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "nus.edu;nus.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "National University of Singapore", "aff_unique_dep": "", "aff_unique_url": "https://www.nus.edu.sg", "aff_unique_abbr": "NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Singapore" }, { "title": "Spectral Learning of Shared Dynamics Between Generalized-Linear Processes", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96067", "id": "DupvYqqlAG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=DupvYqqlAG", "openreview": "https://openreview.net/forum?id=DupvYqqlAG", "poster": "/media/PosterPDFs/NeurIPS%202024/96067.png?t=1731688808.0956702", "project": "", "author_site": "Lucine L Oganesian, Omid G. Sani, Maryam Shanechi", "tldr": "", "abstract": "Generalized-linear dynamical models (GLDMs) remain a widely-used framework within neuroscience for modeling time-series data, such as neural spiking activity or categorical decision outcomes. Whereas the standard usage of GLDMs is to model a single data source, certain applications require jointly modeling two generalized-linear time-series sources while also dissociating their shared and private dynamics. Most existing GLDM variants and their associated learning algorithms do not support this capability. Here we address this challenge by developing a multi-step analytical subspace identification algorithm for learning a GLDM that explicitly models shared vs. private dynamics within two generalized-linear time-series. In simulations, we demonstrate our algorithm's ability to dissociate and model the dynamics within two time-series sources while being agnostic to their respective observation distributions. In neural data, we consider two specific applications of our algorithm for modeling discrete population spiking activity with respect to a secondary time-series. In both synthetic and real data, GLDMs learned with our algorithm more accurately decoded one time-series from the other using lower-dimensional latent states, as compared to models identified using existing GLDM learning algorithms.", "keywords": "state space models;subspace identification;dynamical systems;neural coding;generalized-linear models", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "/attachment/09ca7713bda751cebfb3f78ded1547c1e17aa93a.zip", "author": "Lucine L Oganesian;Omid G. Sani;Maryam Shanechi", "authorids": "~Lucine_L_Oganesian1;~Omid_G._Sani1;~Maryam_Shanechi1", "gender": "F;;F", "homepage": "https://www.lucineoganesian.com/;;https://nseip.usc.edu/", "dblp": ";;50/6132", "google_scholar": "k4QyNUYAAAAJ;;csGAeKgAAAAJ", "orcid": "0000-0001-6773-6643;;0000-0002-0544-7720", "linkedin": "lucine-oganesian-218b9b86/;;", "or_profile": "~Lucine_L_Oganesian1;~Omid_G._Sani1;~Maryam_Shanechi1", "aff": "University of Southern California;;University of Southern California", "aff_domain": "usc.edu;;usc.edu", "position": "MS student;;Full Professor", "bibtex": "@inproceedings{\noganesian2024spectral,\ntitle={Spectral Learning of Shared Dynamics Between Generalized-Linear Processes},\nauthor={Lucine L Oganesian and Omid G. Sani and Maryam Shanechi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=DupvYqqlAG}\n}", "github": "", "reviewers": "95d5;Hfka;m47S;3CeQ", "pdf_size": 2140452, "rating": "5;5;7;7", "confidence": "3;3;4;4", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "3;3;4;3", "wc_summary": "94;68;189;87", "wc_strengths": "129;36;135;21", "wc_weaknesses": "604;61;317;57", "wc_questions": "224;50;236;2", "wc_limitations": "122;20;70;2", "wc_review": "1173;235;947;169", "wc_reply_reviewers": "445;0;17;0", "wc_reply_authors": "827;0;0;0", "reply_reviewers": "2;0;1;0", "reply_authors": "3;1;1;1", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 109.5, 46.87483333303704 ], "wc_strengths_avg": [ 80.25, 52.06426317542581 ], "wc_weaknesses_avg": [ 259.75, 224.94152017802315 ], "wc_questions_avg": [ 128.0, 103.48912986396203 ], "wc_limitations_avg": [ 53.5, 46.74130935264865 ], "wc_review_avg": [ 631.0, 437.00114416326187 ], "wc_reply_reviewers_avg": [ 115.5, 190.36346813398836 ], "wc_reply_authors_avg": [ 206.75, 358.1015044648654 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:7AgCqgHFEKIJ:scholar.google.com/&scioq=Spectral+Learning+of+Shared+Dynamics+Between+Generalized-Linear+Processes&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "usc.edu;;usc.edu", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "University of Southern California", "aff_unique_dep": "", "aff_unique_url": "https://www.usc.edu", "aff_unique_abbr": "USC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "NaturalBench: Evaluating Vision-Language Models on Natural Adversarial Samples", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97799", "id": "Dx88A9Zgnv", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Dx88A9Zgnv", "openreview": "https://openreview.net/forum?id=Dx88A9Zgnv", "poster": "", "project": "", "author_site": "Baiqi Li, Zhiqiu Lin, WENXUAN PENG, Jean de Dieu Nyandwi, Daniel Jiang, Zixian Ma, Simran Khanuja, Ranjay Krishna, Graham Neubig, Deva Ramanan", "tldr": "", "abstract": "Vision-language models (VLMs) have made significant progress in recent visual-question-answering (VQA) benchmarks that evaluate complex visio-linguistic reasoning. However, are these models truly effective? In this work, we show that VLMs still struggle with natural images and questions that humans can easily answer, which we term $\\textbf{natural adversarial samples}$. We also find it surprisingly easy to generate these VQA samples from natural image-text corpora using off-the-shelf models like CLIP and ChatGPT. We propose a semi-automated approach to collect a new benchmark, ${\\bf NaturalBench}$, for reliably evaluating VLMs with 10,000 human-verified VQA samples. Crucially, we adopt a $\\textbf{vision-centric}$ design by pairing each question with two images that yield different answers, preventing ``blind'' solutions from answering without using the images. This makes NaturalBench more challenging than previous benchmarks that can largely be solved with language priors like commonsense knowledge. We evaluate ${\\bf 53}$ state-of-the-art VLMs on NaturalBench, showing that models like BLIP-3, LLaVA-OneVision, Cambrian-1, InternLM-XC2, Llama3.2-Vision, Molmo, Qwen2-VL, and even the (closed-source) GPT-4o lag 50%-70% behind human performance (which is above 90%). We analyze why NaturalBench is hard from two angles: (1) ${\\bf Compositionality:}$ Solving NaturalBench requires diverse visio-linguistic skills, including understanding attribute bindings, object relationships, and advanced reasoning like logic and counting. To this end, unlike prior work that uses a single tag per sample, we tag each NaturalBench sample with 1 to 8 skill tags for fine-grained evaluation. (2) ${\\bf Biases: }$ NaturalBench exposes severe biases in VLMs, as models often choose the same answer regardless of the image. We show that debiasing can be crucial for VLM performance. Lastly, we apply our benchmark curation method to diverse data sources, including long captions (over 100 words) and non-English languages like Chinese and Hindi, highlighting its potential for dynamic evaluations of VLMs.", "keywords": "vision-language models;visual-question-answering;compositionality;visual reasoning", "primary_area": "", "supplementary_material": "/attachment/13499546d7b41c8f410d868248bc5d4d40ff8aef.zip", "author": "Baiqi Li;Zhiqiu Lin;Wenxuan Peng;Jean de Dieu Nyandwi;Daniel Jiang;Zixian Ma;Simran Khanuja;Ranjay Krishna;Graham Neubig;Deva Ramanan", "authorids": "~Baiqi_Li2;~Zhiqiu_Lin1;~Wenxuan_Peng2;~Jean_de_Dieu_Nyandwi1;~Daniel_Jiang3;~Zixian_Ma1;~Simran_Khanuja1;~Ranjay_Krishna1;~Graham_Neubig1;~Deva_Ramanan1", "gender": ";M;M;M;F;F;M;M;M;F", "homepage": ";https://linzhiqiu.github.io;https://nyandwi.com;;https://zixianma.github.io/;https://simran-khanuja.github.io/;http://ranjaykrishna.com;http://phontron.com;https://www.cs.cmu.edu/~deva/;https://lilydaytoy.github.io/", "dblp": "219/2180.html;230/4394;390/9740;;311/3682;255/5469;167/3785;03/8155;49/488;331/2018", "google_scholar": "mwdpP7sAAAAJ;https://scholar.google.com/citations?hl=en;X1KlIsQAAAAJ;;0E-IY2IAAAAJ;yInhszwAAAAJ;IcqahyAAAAAJ;wlosgkoAAAAJ;9B8PoXUAAAAJ;", "orcid": ";;;;;;0000-0001-8784-2531;;;", "linkedin": ";zhiqiu-lin-b49ba7126/;nyandwi/;djiang04/;zixian-ma/;simran-khanuja-6b80b6144/;ranjay-krishna-1a344444/;;;wenxuan-peng-4858a5220/", "or_profile": "~Baiqi_Li2;~Zhiqiu_Lin1;~Jean_de_Dieu_Nyandwi1;~Daniel_Jiang3;~Zixian_Ma1;~Simran_Khanuja1;~Ranjay_Krishna1;~Graham_Neubig1;~Deva_Ramanan1;~WENXUAN_PENG1", "aff": "Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Department of Computer Science, University of Washington;Carnegie Mellon University;University of Washington;Carnegie Mellon University;School of Computer Science, Carnegie Mellon University;Nanyang Technological University", "aff_domain": "cmu.edu;cmu.edu;cmu.edu;cmu.edu;cs.washington.edu;andrew.cmu.edu;cs.washington.edu;cmu.edu;cs.cmu.edu;ntu.edu.sg", "position": "Research Assistant;PhD student;MS student;Undergrad student;PhD student;PhD student;Assistant Professor;Associate Professor;Full Professor;Undergrad student", "bibtex": "@inproceedings{\nli2024naturalbench,\ntitle={NaturalBench: Evaluating Vision-Language Models on Natural Adversarial Samples},\nauthor={Baiqi Li and Zhiqiu Lin and Wenxuan Peng and Jean de Dieu Nyandwi and Daniel Jiang and Zixian Ma and Simran Khanuja and Ranjay Krishna and Graham Neubig and Deva Ramanan},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=Dx88A9Zgnv}\n}", "github": "", "reviewers": "VBjr;E37r;4Nfw", "pdf_size": 6613979, "rating": "6;7;9", "confidence": "4;4;4", "wc_summary_and_contributions": "79;97;69", "wc_strengths": "63;2;63", "wc_improvement": "35;2;20", "wc_limitations": "37;29;1", "wc_correctness": "1;28;1", "wc_clarity": "1;9;1", "wc_relation_to_prior_work": "1;1;1", "wc_documentation": "1;14;1", "wc_additional_feedback": "1;1;1", "wc_review": "219;183;158", "wc_reply_reviewers": "0;14;5", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;1;1", "reply_authors": "1;1;1", "rating_avg": [ 7.333333333333333, 1.247219128924647 ], "confidence_avg": [ 4.0, 0.0 ], "wc_summary_and_contributions_avg": [ 81.66666666666667, 11.585431464655178 ], "wc_strengths_avg": [ 42.666666666666664, 28.75567576825293 ], "wc_improvement_avg": [ 19.0, 13.490737563232042 ], "wc_limitations_avg": [ 22.333333333333332, 15.434449203720302 ], "wc_correctness_avg": [ 10.0, 12.727922061357855 ], "wc_clarity_avg": [ 3.6666666666666665, 3.7712361663282534 ], "wc_relation_to_prior_work_avg": [ 1.0, 0.0 ], "wc_documentation_avg": [ 5.333333333333333, 6.128258770283412 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 186.66666666666666, 25.037749277618563 ], "wc_reply_reviewers_avg": [ 6.333333333333333, 5.792715732327588 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5526023683616977271&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "cmu.edu;cmu.edu;cmu.edu;cmu.edu;cs.washington.edu;andrew.cmu.edu;cs.washington.edu;cmu.edu;cs.cmu.edu;ntu.edu.sg", "author_num": 10, "aff_unique_index": "0;0;0;0;1;0;1;0;0;2", "aff_unique_norm": "Carnegie Mellon University;University of Washington;Nanyang Technological University", "aff_unique_dep": ";Department of Computer Science;", "aff_unique_url": "https://www.cmu.edu;https://www.washington.edu;https://www.ntu.edu.sg", "aff_unique_abbr": "CMU;UW;NTU", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Seattle;Pittsburgh", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;1", "aff_country_unique": "United States;Singapore" }, { "title": "Be like a Goldfish, Don't Memorize! Mitigating Memorization in Generative LLMs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96066", "id": "DylSyAfmWs", "proceeding": "", "pdf": "https://openreview.net/pdf?id=DylSyAfmWs", "openreview": "https://openreview.net/forum?id=DylSyAfmWs", "poster": "", "project": "", "author_site": "Abhimanyu Hans, John Kirchenbauer, Yuxin Wen, Neel Jain, Hamid Kazemi, Prajwal Singhania, Siddharth Singh, Gowthami Somepalli, Jonas Geiping, Abhinav Bhatele, Tom Goldstein", "tldr": "", "abstract": "Large language models can memorize and repeat their training data, causing privacy and copyright risks. To mitigate memorization, we introduce a subtle modification to the next-token training objective that we call the goldfish loss. During training, a randomly sampled subsets of tokens are excluded from the loss computation. These dropped tokens are not memorized by the model, which prevents verbatim reproduction of a complete chain of tokens from the training set. We run extensive experiments training billion-scale LLaMA-2 models, both pre-trained and trained from scratch, and demonstrate significant reductions in extractable memorization with little to no impact on downstream benchmarks.\n\n_Code and checkpoints: https://github.com/ahans30/goldfish-loss_", "keywords": "Generative AI;Memorization;LLMs;Law;Privacy", "primary_area": "generative_models", "supplementary_material": "/attachment/671019e40c7bce626c733f9d130abbf0b862bfc2.zip", "author": "Abhimanyu Hans;John Kirchenbauer;Yuxin Wen;Neel Jain;Hamid Kazemi;Prajwal Singhania;Siddharth Singh;Gowthami Somepalli;Jonas Geiping;Abhinav Bhatele;Tom Goldstein", "authorids": "~Abhimanyu_Hans1;~John_Kirchenbauer1;~Yuxin_Wen2;~Neel_Jain1;~Hamid_Kazemi1;~Prajwal_Singhania1;~Siddharth_Singh7;~Gowthami_Somepalli1;~Jonas_Geiping1;~Abhinav_Bhatele1;~Tom_Goldstein1", "gender": "M;M;;;M;M;M;F;M;;M", "homepage": "https://ahans30.github.io/;https://jwkirchenbauer.notion.site/;https://yuxinwenrick.github.io/;;;https://prajwal1210.github.io;https://siddharth9820.github.io/;https://somepago.github.io/;https://jonasgeiping.github.io/;https://www.cs.umd.edu/~bhatele;https://www.cs.umd.edu/~tomg/", "dblp": ";321/0678;;;;225/4526;;286/5012;190/7229;82/6441;25/8184", "google_scholar": "b77HAM8AAAAJ;48GJrbsAAAAJ;oUYfjg0AAAAJ;https://scholar.google.com/citations?hl=en;7hNdaGQAAAAJ;https://scholar.google.co.in/citations?user=zz6yeyYAAAAJ;jNyBgaEAAAAJ;T2ezBDsAAAAJ;https://scholar.google.de/citations?user=206vNCEAAAAJ;3x65qtwAAAAJ;KmSuVtgAAAAJ", "orcid": ";;;;;0000-0003-4277-1287;0000-0002-2756-4290;;;;", "linkedin": "abhimanyu-hans-891a15122/;johnkirchenbauer/;;neel-jain-0a6a239/;hamid-kazemi-608a8085/;prajwal1210/;;;;;", "or_profile": "~Abhimanyu_Hans1;~John_Kirchenbauer1;~Yuxin_Wen2;~Neel_Jain1;~Hamid_Kazemi1;~Prajwal_Singhania1;~Siddharth_Singh7;~Gowthami_Somepalli1;~Jonas_Geiping1;~Abhinav_Bhatele1;~Tom_Goldstein1", "aff": "Department of Computer Science, University of Maryland, College Park;University of Maryland, College Park;University of Maryland, College Park;University of Maryland, College Park;University of Maryland, College Park;University of Maryland, College Park;University of Maryland, College Park;University of Maryland, College Park;Max Planck Institute for Intelligent Systems, Max-Planck Institute;University of Maryland, College Park;University of Maryland, College Park", "aff_domain": "cs.umd.edu;umd.edu;umd.edu;umd.edu;umd.edu;umd.edu;umd.edu;umd.edu;tuebingen.mpg.de;umd.edu;umd.edu", "position": "PhD student;PhD student;PhD student;PhD student;PhD student;PhD student;PhD student;PhD student;Principal Researcher;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nhans2024be,\ntitle={Be like a Goldfish, Don't Memorize! Mitigating Memorization in Generative {LLM}s},\nauthor={Abhimanyu Hans and John Kirchenbauer and Yuxin Wen and Neel Jain and Hamid Kazemi and Prajwal Singhania and Siddharth Singh and Gowthami Somepalli and Jonas Geiping and Abhinav Bhatele and Tom Goldstein},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=DylSyAfmWs}\n}", "github": "", "reviewers": "YZ48;muP6;EFrP;wVka", "pdf_size": 573693, "rating": "4;5;6;7", "confidence": "4;4;2;4", "soundness": "2;2;3;4", "novelty": "2;2;2;4", "presentation": "3;4;3;4", "wc_summary": "57;246;90;95", "wc_strengths": "56;55;97;81", "wc_weaknesses": "178;179;80;108", "wc_questions": "94;82;41;41", "wc_limitations": "11;61;17;243", "wc_review": "396;623;325;568", "wc_reply_reviewers": "92;262;0;35", "wc_reply_authors": "644;470;0;22", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;1;2", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 122.0, 73.06503951959515 ], "wc_strengths_avg": [ 72.25, 17.68297203526602 ], "wc_weaknesses_avg": [ 136.25, 43.395708313150045 ], "wc_questions_avg": [ 64.5, 23.879907872519105 ], "wc_limitations_avg": [ 83.0, 94.37160589923221 ], "wc_review_avg": [ 478.0, 121.7148306493502 ], "wc_reply_reviewers_avg": [ 97.25, 100.62647514446682 ], "wc_reply_authors_avg": [ 284.0, 279.95356757862544 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": -0.2581988897471611, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9445291486345998739&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 5, "email": "cs.umd.edu;umd.edu;umd.edu;umd.edu;umd.edu;umd.edu;umd.edu;umd.edu;tuebingen.mpg.de;umd.edu;umd.edu", "author_num": 11, "aff_unique_index": "0;1;1;1;1;1;1;1;2;1;1", "aff_unique_norm": "University of Maryland, College Park;University of Maryland;Max Planck Institute for Intelligent Systems", "aff_unique_dep": "Department of Computer Science;;Intelligent Systems", "aff_unique_url": "https://www/umd.edu;https://www/umd.edu;https://www.mpi-is.mpg.de", "aff_unique_abbr": "UMD;UMD;MPI-IS", "aff_campus_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_campus_unique": "College Park;", "aff_country_unique_index": "0;0;0;0;0;0;0;0;1;0;0", "aff_country_unique": "United States;Germany" }, { "title": "RAGraph: A General Retrieval-Augmented Graph Learning Framework", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96065", "id": "Dzk2cRUFMt", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Dzk2cRUFMt", "openreview": "https://openreview.net/forum?id=Dzk2cRUFMt", "poster": "/media/PosterPDFs/NeurIPS%202024/96065.png?t=1731564236.6412532", "project": "", "author_site": "Xinke Jiang, Rihong Qiu, Yongxin Xu, WentaoZhang, Yichen Zhu, Ruizhe Zhang, Yuchen Fang, Chu Xu, Junfeng Zhao, Yasha Wang", "tldr": "", "abstract": "Graph Neural Networks (GNNs) have become essential in interpreting relational data across various domains, yet, they often struggle to generalize to unseen graph data that differs markedly from training instances. In this paper, we introduce a novel framework called General Retrieval-Augmented Graph Learning (RAGraph), which brings external graph data into the general graph foundation model to improve model generalization on unseen scenarios. On the top of our framework is a toy graph vector library that we established, which captures key attributes, such as features and task-specific label information. During inference, the RAGraph adeptly retrieves similar toy graphs based on key similarities in downstream tasks, integrating the retrieved data to enrich the learning context via the message-passing prompting mechanism. Our extensive experimental evaluations demonstrate that RAGraph significantly outperforms state-of-the-art graph learning methods in multiple tasks such as node classification, link prediction, and graph classification across both dynamic and static datasets. Furthermore, extensive testing confirms that RAGraph consistently maintains high performance without the need for task-specific fine-tuning, highlighting its adaptability, robustness, and broad applicability.", "keywords": "Graph Neural Networks;Graph Prompt Tuning;Retrieval-Augmented Generation", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Xinke Jiang;Rihong Qiu;Yongxin Xu;WentaoZhang;Yichen Zhu;Ruizhe zhang;Yuchen Fang;Xu Chu;Junfeng Zhao;Yasha Wang", "authorids": "~Xinke_Jiang1;~Rihong_Qiu1;~Yongxin_Xu1;~WentaoZhang1;~Yichen_Zhu1;~Ruizhe_zhang6;~Yuchen_Fang1;~Xu_Chu1;~Junfeng_Zhao1;~Yasha_Wang3", "gender": ";;M;Not Specified;M;M;;;F;M", "homepage": "https://thinker-jiang.gitee.io;;;https://github.com/VentaoZzz;;;;;https://cs.pku.edu.cn/info/1084/1224.htm;", "dblp": "326/4687.html;365/4704;;;;365/4533;;;72/3918-1;70/2725.html", "google_scholar": ";Vhgqb_EAAAAJ;XcvJ6yAAAAAJ;;eyKyrbsAAAAJ;U7zL6F4AAAAJ;;;;", "orcid": ";0009-0009-1835-103X;0000-0002-7301-7984;;0000-0001-5126-838X;0009-0005-4795-0702;;;;", "linkedin": ";;;;;;;;;", "or_profile": "~Xinke_Jiang1;~Rihong_Qiu1;~Yongxin_Xu1;~WentaoZhang1;~Yichen_Zhu1;~Ruizhe_zhang6;~Yuchen_Fang1;~Xu_Chu1;~Junfeng_Zhao1;~Yasha_Wang3", "aff": "Peking University;Zhejiang University;Peking University;ShanghaiTech University;Midea Group;Peking University;;;Peking University;Peking University", "aff_domain": "pku.edu.cn;zju.edu.cn;pku.edu.cn;shanghaitech.edu.cn;midea.com;pku.edu.cn;;;pku.edu.cn;pku.edu.cn", "position": "Undergrad student;Undergrad student;PhD student;MS student;Researcher;MS student;;;Full Professor;Full Professor", "bibtex": "@inproceedings{\njiang2024ragraph,\ntitle={{RAG}raph: A General Retrieval-Augmented Graph Learning Framework},\nauthor={Xinke Jiang and Rihong Qiu and Yongxin Xu and WentaoZhang and Yichen Zhu and Ruizhe zhang and Yuchen Fang and Xu Chu and Junfeng Zhao and Yasha Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Dzk2cRUFMt}\n}", "github": "", "reviewers": "sivC;v5NH;3qQ8;va3W", "pdf_size": 0, "rating": "5;6;6;6", "confidence": "3;3;4;4", "soundness": "3;3;2;3", "novelty": "3;3;3;3", "presentation": "3;2;3;3", "wc_summary": "30;66;145;161", "wc_strengths": "31;21;60;89", "wc_weaknesses": "78;102;378;59", "wc_questions": "1;38;81;4", "wc_limitations": "1;1;11;1", "wc_review": "141;228;675;314", "wc_reply_reviewers": "87;0;24;154", "wc_reply_authors": "1236;95;128;951", "reply_reviewers": "1;0;1;3", "reply_authors": "4;2;3;5", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 100.5, 54.31620384378864 ], "wc_strengths_avg": [ 50.25, 26.564779313971346 ], "wc_weaknesses_avg": [ 154.25, 130.07762105758238 ], "wc_questions_avg": [ 31.0, 32.31872522238462 ], "wc_limitations_avg": [ 3.5, 4.330127018922194 ], "wc_review_avg": [ 339.5, 203.12865381329144 ], "wc_reply_reviewers_avg": [ 66.25, 59.801233264875066 ], "wc_reply_authors_avg": [ 602.5, 501.3683775429001 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 3.5, 1.118033988749895 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9118276896512568256&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "pku.edu.cn;zju.edu.cn;pku.edu.cn;shanghaitech.edu.cn;midea.com;pku.edu.cn;;;pku.edu.cn;pku.edu.cn", "author_num": 10, "aff_unique_index": "0;1;0;2;3;0;0;0", "aff_unique_norm": "Peking University;Zhejiang University;ShanghaiTech University;Midea Group", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.pku.edu.cn;https://www.zju.edu.cn;https://www.shanghaitech.edu.cn;https://www.mideaglobal.com", "aff_unique_abbr": "Peking U;ZJU;ShanghaiTech;Midea", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Membership Inference on Text-to-Image Diffusion Models via Conditional Likelihood Discrepancy", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96064", "id": "DztaBt4wP5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=DztaBt4wP5", "openreview": "https://openreview.net/forum?id=DztaBt4wP5", "poster": "/media/PosterPDFs/NeurIPS%202024/96064.png?t=1731627195.6896918", "project": "", "author_site": "Shengfang ZHAI, Huanran Chen, Yinpeng Dong, Jiajun Li, Qingni Shen, Yansong Gao, Hang Su, Yang Liu", "tldr": "", "abstract": "Text-to-image diffusion models have achieved tremendous success in the field of controllable image generation, while also coming along with issues of privacy leakage and data copyrights. Membership inference arises in these contexts as a potential auditing method for detecting unauthorized data usage. While some efforts have been made on diffusion models, they are not applicable to text-to-image diffusion models due to the high computation overhead and enhanced generalization capabilities. In this paper, we first identify a conditional overfitting phenomenon in text-to-image diffusion models, indicating that these models tend to overfit the conditional distribution of images given the corresponding text rather than the marginal distribution of images only. Based on this observation, we derive an analytical indicator, namely Conditional Likelihood Discrepancy (CLiD), to perform membership inference, which reduces the stochasticity in estimating memorization of individual samples. Experimental results demonstrate that our method significantly outperforms previous methods across various data distributions and dataset scales. Additionally, our method shows superior resistance to overfitting mitigation strategies, such as early stopping and data augmentation.", "keywords": "Diffusion models;Membership inference;Conditional Likelihood;Text-to-Image Synthesis", "primary_area": "privacy", "supplementary_material": "/attachment/82c379664a9fcd88a7fc942d8262e92b3340416f.zip", "author": "Shengfang Zhai;Huanran Chen;Yinpeng Dong;Jiajun Li;Qingni Shen;Yansong Gao;Hang Su;Yang Liu", "authorids": "~Shengfang_Zhai1;~Huanran_Chen1;~Yinpeng_Dong2;~Jiajun_Li8;~Qingni_Shen1;~Yansong_Gao2;~Hang_Su3;~Yang_Liu36", "gender": ";M;M;M;;;;M", "homepage": "https://zhaisf.github.io/;https://huanranchen.github.io/;https://dongyp13.github.io;https://github.com/lj595;https://www.ss.pku.edu.cn/teacherteam/teacherlist/1634-%E6%B2%88%E6%99%B4%E9%9C%93.html;;;https://personal.ntu.edu.sg/yangliu/", "dblp": "322/0756;329/6558;183/0980;;11/325;;;51/3710-3", "google_scholar": "bJYY-tIAAAAJ;https://scholar.google.co.jp/citations?user=QYsKXccAAAAJ;6_4ad84AAAAJ;;Nm1fclcAAAAJ;;;https://scholar.google.com.sg/citations?hl=en", "orcid": "0000-0001-6820-6361;;;;0000-0002-0605-6043;;;0000-0001-7300-9215", "linkedin": ";;;;;;;", "or_profile": "~Shengfang_Zhai1;~Huanran_Chen1;~Yinpeng_Dong2;~Jiajun_Li8;~Qingni_Shen1;~Yansong_Gao2;~Hang_Su3;~Yang_Liu36", "aff": "Peking University;;Tsinghua University;Peking University;Peking University;;;Nanyang Technological University", "aff_domain": "pku.edu.cn;;tsinghua.edu.cn;stu.pku.edu.cn;pku.edu.cn;;;ntu.edu.sg", "position": "PhD student;;Postdoc;MS student;Researcher;;;Full Professor", "bibtex": "@inproceedings{\nzhai2024membership,\ntitle={Membership Inference on Text-to-Image Diffusion Models via Conditional Likelihood Discrepancy},\nauthor={Shengfang Zhai and Huanran Chen and Yinpeng Dong and Jiajun Li and Qingni Shen and Yansong Gao and Hang Su and Yang Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=DztaBt4wP5}\n}", "github": "", "reviewers": "gj8y;k8Sd;iMS8;meQ8", "pdf_size": 1168006, "rating": "6;7;7;7", "confidence": "5;5;3;3", "soundness": "3;3;3;3", "novelty": "3;2;2;3", "presentation": "3;3;3;3", "wc_summary": "47;69;52;124", "wc_strengths": "159;50;59;119", "wc_weaknesses": "44;412;56;64", "wc_questions": "6;42;47;82", "wc_limitations": "10;2;13;1", "wc_review": "266;575;227;390", "wc_reply_reviewers": "11;753;21;12", "wc_reply_authors": "28;3362;31;25", "reply_reviewers": "1;4;1;1", "reply_authors": "2;7;2;2", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 1.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 73.0, 30.553232234904378 ], "wc_strengths_avg": [ 96.75, 44.66752175798429 ], "wc_weaknesses_avg": [ 144.0, 154.89351180730586 ], "wc_questions_avg": [ 44.25, 26.929305598176867 ], "wc_limitations_avg": [ 6.5, 5.123475382979799 ], "wc_review_avg": [ 364.5, 135.6180297748054 ], "wc_reply_reviewers_avg": [ 199.25, 319.7314302660907 ], "wc_reply_authors_avg": [ 861.5, 1443.6659066418379 ], "reply_reviewers_avg": [ 1.75, 1.299038105676658 ], "reply_authors_avg": [ 3.25, 2.165063509461097 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11283512849860481455&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "pku.edu.cn;;tsinghua.edu.cn;stu.pku.edu.cn;pku.edu.cn;;;ntu.edu.sg", "author_num": 8, "aff_unique_index": "0;1;0;0;2", "aff_unique_norm": "Peking University;Tsinghua University;Nanyang Technological University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.pku.edu.cn;https://www.tsinghua.edu.cn;https://www.ntu.edu.sg", "aff_unique_abbr": "Peking U;THU;NTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "China;Singapore" }, { "title": "CVQA: Culturally-diverse Multilingual Visual Question Answering Benchmark", "status": "Oral", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97798", "id": "E18kRXTGmV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=E18kRXTGmV", "openreview": "https://openreview.net/forum?id=E18kRXTGmV", "poster": "/media/PosterPDFs/NeurIPS%202024/97798.png?t=1731352718.7776973", "project": "", "author_site": "David Romero, Chenyang Lyu, Haryo Wibowo, Santiago G\u00f3ngora, Aishik Mandal, Sukannya Purkayastha, Jesus-German Ortiz-Barajas, Emilio Cueva, Jinheon Baek, Soyeong Jeong, Injy Hamed, Yong Zheng-Xin, Zheng Wei Lim, Paula Silva, Jocelyn Dunstan, M\u00e9lanie Jouitteau, David LE MEUR, Joan Nwatu, Ganzorig Batnasan, Munkh-Erdene Otgonbold, Munkhjargal Gochoo, Guido Ivetta, Luciana Benotti, Laura Alonso Alemany, Hern\u00e1n Maina, Jiahui Geng, Tiago Timponi Torrent, Frederico Belcavello, Marcelo Viridiano, Jan Christian Blaise Cruz, Dan John Velasco, Oana Ignat, Zara Burzo, Chenxi Whitehouse, Artem Abzaliev, Teresa Clifford, Gr\u00e1inne Caulfield, Teresa Lynn, Christian Salamea-Palacios, Vladimir Araujo, Yova Kementchedjhieva, Mihail Mihaylov, Israel Azime, Henok Ademtew, Bontu Balcha, Naome A. Etori, David Adelani, Rada Mihalcea, Atnafu Lambebo Tonja, Maria Cabrera, Gisela Vallejo, Holy Lovenia, Ruochen Zhang, Marcos Estecha-Garitagoitia, Mario Rodr\u00edguez-Cantelar, Toqeer Ehsan, Rendi Chevi, Muhammad Adilazuarda, Ryandito Diandaru, Samuel Cahyawijaya, Fajri Koto, Tatsuki Kuribayashi, Haiyue Song, Aditya Khandavally, Thanmay Jayakumar, Raj Dabre, Mohamed Imam, Kumaranage Nagasinghe, Alina Dragonetti, Luis Fernando D'Haro, Niyomugisha Olivier, Jay Gala, Pranjal Chitale, Fauzan Farooqui, Thamar Solorio, Alham Aji", "tldr": "", "abstract": "Visual Question Answering~(VQA) is an important task in multimodal AI, which requires models to understand and reason on knowledge present in visual and textual data. However, most of the current VQA datasets and models are primarily focused on English and a few major world languages, with images that are Western-centric. While recent efforts have tried to increase the number of languages covered on VQA datasets, they still lack diversity in low-resource languages. More importantly, some datasets extend the text to other languages, either via translation or some other approaches, but usually keep the same images, resulting in narrow cultural representation. To address these limitations, we create CVQA, a new Culturally-diverse Multilingual Visual Question Answering benchmark dataset, designed to cover a rich set of languages and regions, where we engage native speakers and cultural experts in the data collection process. CVQA includes culturally-driven images and questions from across 28 countries in four continents, covering 26 languages with 11 scripts, providing a total of 9k questions. We benchmark several Multimodal Large Language Models (MLLMs) on CVQA, and we show that the dataset is challenging for the current state-of-the-art models. This benchmark will serve as a probing evaluation suite for assessing the cultural bias of multimodal models and hopefully encourage more research efforts towards increasing cultural awareness and linguistic diversity in this field.", "keywords": "Multimodality;Multicultural;Multilingual;VQA;Dataset;Benchmark", "primary_area": "", "supplementary_material": "/attachment/c2bdcaefeeddcc0718bea21ca4aedd49e4ba8e95.pdf", "author": "David Orlando Romero Mogrovejo;Chenyang Lyu;Haryo Akbarianto Wibowo;Santiago G\u00f3ngora;Aishik Mandal;Sukannya Purkayastha;Jesus-German Ortiz-Barajas;Emilio Villa Cueva;Jinheon Baek;Soyeong Jeong;Injy Hamed;Zheng Xin Yong;Zheng Wei Lim;Paula M\u00f3nica Silva;Jocelyn Dunstan;M\u00e9lanie Jouitteau;David LE MEUR;Joan Nwatu;Ganzorig Batnasan;Munkh-Erdene Otgonbold;Munkhjargal Gochoo;Guido Ivetta;Luciana Benotti;Laura Alonso Alemany;Hern\u00e1n Maina;Jiahui Geng;Tiago Timponi Torrent;Frederico Belcavello;Marcelo Viridiano;Jan Christian Blaise Cruz;Dan John Velasco;Oana Ignat;Zara Burzo;Chenxi Whitehouse;Artem Abzaliev;Teresa Clifford;Gr\u00e1inne Caulfield;Teresa Lynn;Christian Salamea-Palacios;Vladimir Araujo;Yova Kementchedjhieva;Mihail Minkov Mihaylov;Israel Abebe Azime;Henok Biadglign Ademtew;Bontu Fufa Balcha;Naome A Etori;David Ifeoluwa Adelani;Rada Mihalcea;Atnafu Lambebo Tonja;Maria Camila Buitrago Cabrera;Gisela Vallejo;Holy Lovenia;Ruochen Zhang;Marcos Estecha-Garitagoitia;Mario Rodr\u00edguez-Cantelar;Toqeer Ehsan;Rendi Chevi;Muhammad Farid Adilazuarda;Ryandito Diandaru;Samuel Cahyawijaya;Fajri Koto;Tatsuki Kuribayashi;Haiyue Song;Aditya Nanda Kishore Khandavally;Thanmay Jayakumar;Raj Dabre;Mohamed Fazli Mohamed Imam;Kumaranage Ravindu Yasas Nagasinghe;Alina Dragonetti;Luis Fernando D'Haro;Olivier NIYOMUGISHA;Jay Gala;Pranjal A Chitale;Fauzan Farooqui;Thamar Solorio;Alham Fikri Aji", "authorids": "~David_Orlando_Romero_Mogrovejo1;~Chenyang_Lyu1;~Haryo_Akbarianto_Wibowo1;~Santiago_G\u00f3ngora1;~Aishik_Mandal1;~Sukannya_Purkayastha1;~Jesus-German_Ortiz-Barajas1;~Emilio_Villa_Cueva1;~Jinheon_Baek1;~Soyeong_Jeong1;~Injy_Hamed1;~Zheng_Xin_Yong1;~Zheng_Wei_Lim1;~Paula_M\u00f3nica_Silva1;~Jocelyn_Dunstan1;~M\u00e9lanie_Jouitteau1;~David_LE_MEUR1;~Joan_Nwatu1;~Ganzorig_Batnasan1;~Munkh-Erdene_Otgonbold1;~Munkhjargal_Gochoo1;~Guido_Ivetta1;~Luciana_Benotti1;~Laura_Alonso_Alemany2;~Hern\u00e1n_Maina1;~Jiahui_Geng3;~Tiago_Timponi_Torrent1;~Frederico_Belcavello1;~Marcelo_Viridiano1;~Jan_Christian_Blaise_Cruz1;~Dan_John_Velasco1;~Oana_Ignat1;~Zara_Burzo1;~Chenxi_Whitehouse1;~Artem_Abzaliev1;~Teresa_Clifford1;~Gr\u00e1inne_Caulfield1;~Teresa_Lynn1;~Christian_Salamea-Palacios1;~Vladimir_Araujo1;~Yova_Kementchedjhieva1;~Mihail_Minkov_Mihaylov1;~Israel_Abebe_Azime1;~Henok_Biadglign_Ademtew1;~Bontu_Fufa_Balcha1;~Naome_A_Etori1;~David_Ifeoluwa_Adelani1;~Rada_Mihalcea1;~Atnafu_Lambebo_Tonja1;~Maria_Camila_Buitrago_Cabrera1;~Gisela_Vallejo1;~Holy_Lovenia1;~Ruochen_Zhang1;~Marcos_Estecha-Garitagoitia1;~Mario_Rodr\u00edguez-Cantelar1;~Toqeer_Ehsan1;~Rendi_Chevi1;~Muhammad_Farid_Adilazuarda1;~Ryandito_Diandaru1;~Samuel_Cahyawijaya1;~Fajri_Koto1;~Tatsuki_Kuribayashi1;~Haiyue_Song1;~Aditya_Nanda_Kishore_Khandavally1;~Thanmay_Jayakumar1;~Raj_Dabre1;~Mohamed_Fazli_Mohamed_Imam1;~Kumaranage_Ravindu_Yasas_Nagasinghe1;~Alina_Dragonetti1;~Luis_Fernando_D'Haro2;~Olivier_NIYOMUGISHA1;~Jay_Gala1;~Pranjal_A_Chitale1;~Fauzan_Farooqui1;~Thamar_Solorio2;~Alham_Fikri_Aji1", "gender": "M;M;M;M;M;F;M;M;M;F;F;M;F;F;F;F;;F;M;M;M;M;F;F;M;M;M;M;M;M;M;F;F;;;;;F;M;M;F;M;M;M;F;F;M;F;M;F;F;F;;M;;;M;M;M;M;M;M;M;M;M;M;M;M;F;M;M;M;M;M;F;M", "homepage": "https://daromog.github.io/;https://lyuchenyang.github.io;;https://sgongora27.github.io/;https://jitaishik.github.io/;;https://jgermanob.github.io/;https://villacu.github.io/;https://jinheonbaek.github.io;https://starsuzi.github.io/;;https://yongzx.github.io;;;https://sites.google.com/view/jdunstan/home;;;https://anniejoan.github.io/;;;https://moyog.github.io/;;https://benotti.github.io/;https://www.cs.famaf.unc.edu.ar/~laura/;https://nanom.github.io/;https://sites.google.com/view/jiahuigeng/;https://tiagotorrent.com;https://www.researchgate.net/profile/Frederico-Belcavello;http://viridiano.com;https://blaisecruz.com;https://danjohnvelasco.github.io/;https://oanaignat.github.io/;https://web.eecs.umich.edu/~mihalcea/urls/Zara.MyCovidStoryBook.pdf;https://chenxwh.github.io/;https://cse.engin.umich.edu/people/phd-students/;;;https://www.computing.dcu.ie/~tlynn/;http://christiansalamea.info;https://vgaraujov.github.io/;;;http://israelabebe.github.io/;https://henokb.github.io/;;;https://dadelani.github.io/;https://web.eecs.umich.edu/~mihalcea/;http://atnafuatx.github.io/;;https://gvallejo.co/;https://holylovenia.github.io/;;;https://mario-rc.github.io/;;;https://faridlazuarda.github.io/;;https://samuelcahyawijaya.github.io/;https://fajrikoto.com/;https://kuribayashi4.github.io/;https://shyyhs.github.io/;;https://thanmayj.github.io;;;;https://www.fing.edu.uy/inco/grupos/pln/;https://blogs.upm.es/gthau/luis-fernando-dharo/;;https://jaygala24.github.io;;https://FauzanFarooqui.GitHub.io;http://solorio.uh.edu/;", "dblp": ";248/1663;234/6844;277/0516;303/0684;255/8545;;328/0447;262/6003;164/0452;158/4321;266/0855;351/5563;;268/0573;;;348/0259.html;;;149/9570;375/8426;22/1403;78/6966;305/6433;228/5625;151/8505;321/1098;;244/2362;276/5347;219/1948;;;;;;126/8610;;248/8695;225/7708;;;380/4130;;;230/6973;m/RadaMihalcea;312/3167;;https://dblp.uni-trier.de/pid/236/5964.html;243/6573;;;;;310/1487;336/5056;;235/2988.html;160/0019;228/5787;https://dblp.org/pers/s/Song:Haiyue.html;;361/0280.html;127/0168;;;;57/1419.html;;308/1490;282/7184;361/0535;79/3530;188/8762", "google_scholar": "pfYPQMIAAAAJ;;X5tK6xsAAAAJ;p1lKpmYAAAAJ;Td8hNHUAAAAJ;SAhTZJIAAAAJ;3FzuZasAAAAJ;uYz6zaIAAAAJ;U1FHaSUAAAAJ;0wnquCEAAAAJ;N_RhXusAAAAJ;https://scholar.google.com/citations?hl=en;;;https://scholar.google.co.uk/citations?user=geH4mS0AAAAJ;;;GGw1q64AAAAJ;42IK-k0AAAAJ;K_JCmqgAAAAJ;https://scholar.google.com.tw/citations?user=TJh4iVUAAAAJ;https://scholar.google.com/citations?hl=en;MHvcOG0AAAAJ;https://scholar.google.es/citations?user=ZhbejRkAAAAJ;wA3vqywAAAAJ;eMC-gQUAAAAJ;https://scholar.google.com.br/citations?user=kvnCwvAAAAAJ;https://scholar.google.com/citations?hl=en;LHGEJ68AAAAJ;iBuxBEUAAAAJ;hCTQJToAAAAJ;RzK4fWkAAAAJ;;MxJqtPIAAAAJ;;;;https://scholar.google.com.tw/citations?hl=en;;p4TbBLEAAAAJ;;;;7t9kIY0AAAAJ;;;https://scholar.google.ca/citations?user=W9sTkS0AAAAJ;https://scholar.google.com.tw/citations?user=UetM7FgAAAAJ;https://scholar.google.com.mx/citations?user=rubyApkAAAAJ;;65O00-4AAAAJ;bugb-lAAAAAJ;;60S379IAAAAJ;gdGRy7gAAAAJ;;gU1JEeYAAAAJ;https://scholar.google.com/citations?hl=en;;w5w_WZEAAAAJ;RA9l3s4AAAAJ;https://scholar.google.co.jp/citations?user=-bqmkaAAAAAJ;https://scholar.google.co.jp/citations?user=IP5UyqcAAAAJ;;Wttw55EAAAAJ;https://scholar.google.co.jp/citations?user=x91u618AAAAJ;ndzhAkkAAAAJ;OPwE87QAAAAJ;;https://scholar.google.com.sg/citations?user=SCFRL80AAAAJ;;https://scholar.google.com/citations?hl=en;F6WxL7YAAAAJ;yPXjUeQAAAAJ;Gmjwy-IAAAAJ;0Cyfqv4AAAAJ", "orcid": ";;;0000-0002-6256-7577;;;0000-0003-0655-5667;;0000-0002-9367-560X;;;;0000-0003-1307-2399;;0000-0001-6726-7242;0000-0002-9288-0126;;0009-0006-0380-7370;;0000-0003-1083-954X;0000-0002-6613-7435;0009-0006-3248-1461;0000-0001-7456-4333;0000-0001-6283-6266;;;0000-0001-5373-2297;0000-0001-5808-5201;0000-0002-9706-8663;;;0000-0003-0272-5147;;;;;;0000-0003-4273-913X;;0000-0001-5760-8410;0009-0000-7465-5702;0009-0006-5591-8005;0000-0001-9063-1137;;;;0000-0002-0193-2083;0000-0002-0767-6703;0000-0002-3501-5136;0000-0002-2576-562X;0000-0003-4467-7630;0000-0002-8995-5107;;0000-0001-8153-0182;0000-0001-9703-4458;;;0009-0005-8576-6500;;0000-0002-9891-1608;;;0000-0003-1159-0918;;;;0009-0009-5092-5160;0009-0001-7483-218X;;0000-0002-3411-7384;;;0000-0002-0892-4721;;;", "linkedin": "david-romero-038350160/;;;;aishik-m-a1758b97/;sukannya-purkayastha-5144a3118/;;emilio-villa-cueva-8b3362134/;jinheon-baek-8100a8144/;soyeong-jeong-900155141;injy-khairy-hamed-a41b09bb/;;;paulamonicasilva/;https://cl.linkedin.com/in/jocelyn-dunstan-bb92858b;m%C3%A9lanie-jouitteau-11679721/;;joan-nwatu-927050115/;;;mgochoo/?originalSubdomain=ae;guidoivetta/;luciana-benotti/;laura-alonso-alemany-1125235/?originalSubdomain=ar;;%E4%BD%B3%E8%BE%89-%E8%80%BF-730961100/;;;viridiano/;;https://ph.linkedin.com/in/danjohnvelasco;oana-ignat-ro;;chenxwh;artem-a-78118568/;teresa-clifford-589165178?utm_source=share&utm_campaign=share_via&utm_content=profile&utm_medium=android_app;gr%C3%A1inne-caulfield-1a7600179/;teresa-lynn-41430817/;;vgaraujov/;;mihailmm/;israel-abebe/;henok-b-ademtew-7729b2183/;bontu-fufa;naome22/;david-adelani-7557b337/;;atnafu-lambebo-6b21a5184;maria-camila-buitrago-cabrera-14171415/;;holylovenia;;;;;;faridlazuarda/;ryandito-diandaru;samuelcahyawijaya/;fajri-koto-02705860/;;haiyue-song-844a74186/;aditya-nanda-kishore-130185252?utm_source=share&utm_campaign=share_via&utm_content=profile&utm_medium=ios_app;https://linkedin.com/in/thanmay;;fazliimaam/;ravindu-nagasinghe;alina-dragonetti-91287b210/;lfdharo/?originalSubdomain=es;niyomugisha-olivier-3783a6289?utm_source=share&utm_campaign=share_via&utm_content=profile&utm_medium=android_app;;pranjalchitale/;fauzan-farooqui/;;", "or_profile": "~David_Orlando_Romero_Mogrovejo1;~Chenyang_Lyu1;~Haryo_Akbarianto_Wibowo1;~Santiago_G\u00f3ngora1;~Aishik_Mandal1;~Sukannya_Purkayastha1;~Jesus-German_Ortiz-Barajas1;~Emilio_Villa_Cueva1;~Jinheon_Baek1;~Soyeong_Jeong1;~Injy_Hamed1;~Zheng_Xin_Yong1;~Zheng_Wei_Lim1;~Paula_M\u00f3nica_Silva1;~Jocelyn_Dunstan1;~M\u00e9lanie_Jouitteau1;~David_LE_MEUR1;~Joan_Nwatu1;~Ganzorig_Batnasan1;~Munkh-Erdene_Otgonbold1;~Munkhjargal_Gochoo1;~Guido_Ivetta1;~Luciana_Benotti1;~Laura_Alonso_Alemany2;~Hern\u00e1n_Maina1;~Jiahui_Geng3;~Tiago_Timponi_Torrent1;~Frederico_Belcavello1;~Marcelo_Viridiano1;~Jan_Christian_Blaise_Cruz1;~Dan_John_Velasco1;~Oana_Ignat1;~Zara_Burzo1;~Chenxi_Whitehouse1;~Artem_Abzaliev1;~Teresa_Clifford1;~Gr\u00e1inne_Caulfield1;~Teresa_Lynn1;~Christian_Salamea-Palacios1;~Vladimir_Araujo1;~Yova_Kementchedjhieva1;~Mihail_Minkov_Mihaylov1;~Israel_Abebe_Azime1;~Henok_Biadglign_Ademtew1;~Bontu_Fufa_Balcha1;~Naome_A_Etori1;~David_Ifeoluwa_Adelani1;~Rada_Mihalcea1;~Atnafu_Lambebo_Tonja1;~Maria_Camila_Buitrago_Cabrera1;~Gisela_Vallejo1;~Holy_Lovenia1;~Ruochen_Zhang1;~Marcos_Estecha-Garitagoitia1;~Mario_Rodr\u00edguez-Cantelar1;~Toqeer_Ehsan1;~Rendi_Chevi1;~Muhammad_Farid_Adilazuarda1;~Ryandito_Diandaru1;~Samuel_Cahyawijaya1;~Fajri_Koto1;~Tatsuki_Kuribayashi1;~Haiyue_Song1;~Aditya_Nanda_Kishore_Khandavally1;~Thanmay_Jayakumar1;~Raj_Dabre1;~Mohamed_Fazli_Mohamed_Imam1;~Kumaranage_Ravindu_Yasas_Nagasinghe1;~Alina_Dragonetti1;~Luis_Fernando_D'Haro2;~Olivier_NIYOMUGISHA1;~Jay_Gala1;~Pranjal_A_Chitale1;~Fauzan_Farooqui1;~Thamar_Solorio2;~Alham_Fikri_Aji1", "aff": "Mohamed bin Zayed University of Artificial Intelligence;Mohamed bin Zayed University of Artificial Intelligence;NICT, The University of Tokyo;Universidad de la Rep\u00fablica;Technische Universit\u00e4t Darmstadt;Technische Universit\u00e4t Darmstadt;Mohamed bin Zayed University of Artificial Intelligence;CIMAT;Google;Korea Advanced Institute of Science & Technology;Mohamed bin Zayed University of Artificial Intelligence;Brown University;University of Melbourne;Universidad de Santiago de Chile;Pontificia Universidad Catolica de Chile;CNRS;;University of Michigan - Ann Arbor;United Arab Emirates University;;United Arab Emirates University;Universidad Nacional de C\u00f3rdoba;Universidad nacional de C\u00f3rdoba ;Universidad Nacional de C\u00f3rdoba;Universidad Nacional de C\u00f3rdoba, Argentina;Mohamed bin Zayed University of Artificial Intelligence;Federal University of Juiz de Fora;Case Western Reserve University;Universidade Federal de Juiz de Fora;Samsung;Samsung;University of Michigan - Ann Arbor;Skyline High School;University of Cambridge;University of Michigan - Ann Arbor;Dublin City University;Dublin City University;Mohamed bin Zayed University of Artificial Intelligence;Universidad Polit\u00e9cnica Salesiana;KU Leuven;Mohamed bin Zayed University of Artificial Intelligence;Mohamed bin Zayed University of Artificial Intelligence;Universit\u00e4t des Saarlandes;Ethiopian Artificial Intelligence Institute;Addis Ababa Institute of Technology;University of Minnesota - Twin Cities;University College London, University of London;University of Michigan;Instituto Polit\u00e9cnico Nacional;Universit\u00e4t Stuttgart;University of Melbourne;AI Singapore;;Universidad Polit\u00e9cnica de Madrid;Universidad Polit\u00e9cnica de Madrid;;Mohamed bin Zayed University of Artificial Intelligence;Mohamed bin Zayed University of Artificial Intelligence;Institut Teknologi Bandung;Hong Kong University of Science and Technology;Mohamed bin Zayed University of Artificial Intelligence;Mohamed bin Zayed University of Artificial Intelligence;Kyoto University;Indian Institute of Technology, Madras;Indian Institute of Technology, Madras, Dhirubhai Ambani Institute Of Information and Communication Technology;National Institute of Information and Communications Technology (NICT), National Institute of Advanced Industrial Science and Technology;Mohamed bin Zayed University of Artificial Intelligence;Mohamed bin Zayed University of Artificial Intelligence;Universidad de la Rep\u00fablica;Universidad Polit\u00e9cnica de Madrid;University of Rwanda;AI4Bharat;Department of Computer Science, Indian Institute of Technology, Madras, Indian Institute of Technology, Madras;Centre for Development of Telematics;University of Houston;Mohamed bin Zayed University of Artificial Intelligence", "aff_domain": "mbzuai.ac.ae;mbzuai.ac.ae;nict.co.jp;fing.edu.uy;tu-darmstadt.de;tu-darmstadt.de;mbzuai.ac.ae;cimat.mx;google.com;kaist.ac.kr;mbzuai.ac.ae;brown.edu;unimelb.edu.au;usach.cl;uc.cl;cnrs.fr;;umich.edu;uaeu.ac.ae;;uaeu.ac.ae;unc.edu.ar;unc.edu.ar;unc.edu.ar;unc.edu.ar;mbzuai.ac.ae;ufjf.br;case.edu;ufjf.br;samsung.com;samsung.com;umich.edu;a2schools.org;cam.ac.uk;umich.edu;dcu.ie;dcu.ie;mbzuai.ac.ae;ups.edu.ec;kuleuven.be;mbzuai.ac.ae;mbzuai.ac.ae;uni-saarland.de;aii.et;aait.edu.et;umn.edu;ucl.ac.uk;umich.edu;ipn.mx;uni-stuttgart.de;unimelb.edu;aisingapore.org;;upm.es;upm.es;;mbzuai.ac.ae;mbzuai.ac.ae;itb.ac.id;ust.hk;mbzuai.ac.ae;mbzuai.ac.ae;kyoto-u.ac.jp;iitm.ac.in;iitm.ac.in;nict.go.jp;mbzuai.ac.ae;mbzuai.ac.ae;udelar.edu.uy;upm.es;ur.ac.rw;ai4bharat.org;cse.iitm.ac.in;cdot.in;uh.edu;mbzuai.ac.ae", "position": "PhD student;Postdoc;Intern;MS student;PhD student;PhD student;Intern;MS student;Intern;PhD student;Postdoc;PhD student;PhD student;Researcher;Assistant Professor;Researcher;;PhD student;MS student;;Assistant Professor;PhD student;Associate Professor;Associate Professor;PhD student;Postdoc;Full Professor;Postdoc;PhD student;Researcher;Researcher;Postdoc;Student;Postdoc;PhD student;PhD student;Researcher;Principal Researcher;Full Professor;Postdoc;Assistant Professor;PhD student;PhD student;Researcher;MS student;PhD student;Postdoc;Full Professor;PhD student;Researcher;PhD student;Researcher;;PhD student;PhD student;;Researcher;Researcher;Undergrad student;PhD student;Postdoc;Postdoc;PhD student;Undergrad student;Researcher;Postdoc;MS student;Researcher;Intern;Associate Professor;Undergrad student;Researcher;MS student;Researcher;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nmogrovejo2024cvqa,\ntitle={{CVQA}: Culturally-diverse Multilingual Visual Question Answering Benchmark},\nauthor={David Orlando Romero Mogrovejo and Chenyang Lyu and Haryo Akbarianto Wibowo and Santiago G{\\'o}ngora and Aishik Mandal and Sukannya Purkayastha and Jesus-German Ortiz-Barajas and Emilio Villa Cueva and Jinheon Baek and Soyeong Jeong and Injy Hamed and Zheng Xin Yong and Zheng Wei Lim and Paula M{\\'o}nica Silva and Jocelyn Dunstan and M{\\'e}lanie Jouitteau and David LE MEUR and Joan Nwatu and Ganzorig Batnasan and Munkh-Erdene Otgonbold and Munkhjargal Gochoo and Guido Ivetta and Luciana Benotti and Laura Alonso Alemany and Hern{\\'a}n Maina and Jiahui Geng and Tiago Timponi Torrent and Frederico Belcavello and Marcelo Viridiano and Jan Christian Blaise Cruz and Dan John Velasco and Oana Ignat and Zara Burzo and Chenxi Whitehouse and Artem Abzaliev and Teresa Clifford and Gr{\\'a}inne Caulfield and Teresa Lynn and Christian Salamea-Palacios and Vladimir Araujo and Yova Kementchedjhieva and Mihail Minkov Mihaylov and Israel Abebe Azime and Henok Biadglign Ademtew and Bontu Fufa Balcha and Naome A Etori and David Ifeoluwa Adelani and Rada Mihalcea and Atnafu Lambebo Tonja and Maria Camila Buitrago Cabrera and Gisela Vallejo and Holy Lovenia and Ruochen Zhang and Marcos Estecha-Garitagoitia and Mario Rodr{\\'\\i}guez-Cantelar and Toqeer Ehsan and Rendi Chevi and Muhammad Farid Adilazuarda and Ryandito Diandaru and Samuel Cahyawijaya and Fajri Koto and Tatsuki Kuribayashi and Haiyue Song and Aditya Nanda Kishore Khandavally and Thanmay Jayakumar and Raj Dabre and Mohamed Fazli Mohamed Imam and Kumaranage Ravindu Yasas Nagasinghe and Alina Dragonetti and Luis Fernando D'Haro and Olivier NIYOMUGISHA and Jay Gala and Pranjal A Chitale and Fauzan Farooqui and Thamar Solorio and Alham Fikri Aji},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=E18kRXTGmV}\n}", "github": "", "reviewers": "mHUB;Hjkn;9Fd4;nsfT", "pdf_size": 46819255, "rating": "7;7;9;9", "confidence": "4;5;4;4", "wc_summary_and_contributions": "112;83;112;81", "wc_strengths": "135;82;109;114", "wc_improvement": "349;174;235;196", "wc_limitations": "44;51;1;52", "wc_correctness": "14;30;1;1", "wc_clarity": "113;18;1;2", "wc_relation_to_prior_work": "90;39;1;9", "wc_documentation": "24;30;1;17", "wc_additional_feedback": "1;1;1;1", "wc_review": "882;508;462;473", "wc_reply_reviewers": "71;117;70;11", "wc_reply_authors": "0;0;39;0", "reply_reviewers": "2;2;2;1", "reply_authors": "2;3;2;1", "rating_avg": [ 8.0, 1.0 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 97.0, 15.016657417681207 ], "wc_strengths_avg": [ 110.0, 18.881207588499205 ], "wc_improvement_avg": [ 238.5, 67.43330037896706 ], "wc_limitations_avg": [ 37.0, 21.011901389450696 ], "wc_correctness_avg": [ 11.5, 11.926860441876563 ], "wc_clarity_avg": [ 33.5, 46.39234850705448 ], "wc_relation_to_prior_work_avg": [ 34.75, 34.90254288730264 ], "wc_documentation_avg": [ 18.0, 10.8397416943394 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 581.25, 174.46686648186238 ], "wc_reply_reviewers_avg": [ 67.25, 37.6189779233833 ], "wc_reply_authors_avg": [ 9.75, 16.887495373796554 ], "reply_reviewers_avg": [ 1.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 76, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 34, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10128460877349905732&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 6, "email": "mbzuai.ac.ae;mbzuai.ac.ae;nict.co.jp;fing.edu.uy;tu-darmstadt.de;tu-darmstadt.de;mbzuai.ac.ae;cimat.mx;google.com;kaist.ac.kr;mbzuai.ac.ae;brown.edu;unimelb.edu.au;usach.cl;uc.cl;cnrs.fr;;umich.edu;uaeu.ac.ae;;uaeu.ac.ae;unc.edu.ar;unc.edu.ar;unc.edu.ar;unc.edu.ar;mbzuai.ac.ae;ufjf.br;case.edu;ufjf.br;samsung.com;samsung.com;umich.edu;a2schools.org;cam.ac.uk;umich.edu;dcu.ie;dcu.ie;mbzuai.ac.ae;ups.edu.ec;kuleuven.be;mbzuai.ac.ae;mbzuai.ac.ae;uni-saarland.de;aii.et;aait.edu.et;umn.edu;ucl.ac.uk;umich.edu;ipn.mx;uni-stuttgart.de;unimelb.edu;aisingapore.org;;upm.es;upm.es;;mbzuai.ac.ae;mbzuai.ac.ae;itb.ac.id;ust.hk;mbzuai.ac.ae;mbzuai.ac.ae;kyoto-u.ac.jp;iitm.ac.in;iitm.ac.in;nict.go.jp;mbzuai.ac.ae;mbzuai.ac.ae;udelar.edu.uy;upm.es;ur.ac.rw;ai4bharat.org;cse.iitm.ac.in;cdot.in;uh.edu;mbzuai.ac.ae", "author_num": 76, "aff_unique_index": "0;0;1;2;3;3;0;4;5;6;0;7;8;9;10;11;12;13;13;14;14;14;14;0;15;16;17;18;18;12;19;20;12;21;21;0;22;23;0;0;24;25;26;27;28;12;29;30;8;31;32;32;0;0;33;34;0;0;35;36;37;38;0;0;2;32;39;40;37;41;42;0", "aff_unique_norm": "Mohamed bin Zayed University of Artificial Intelligence;University of Tokyo;Universidad de la Rep\u00fablica;Technische Universit\u00e4t Darmstadt;Centro de Investigaci\u00f3n en Matem\u00e1ticas;Google;Korea Advanced Institute of Science and Technology;Brown University;University of Melbourne;Universidad de Santiago de Chile;Pontificia Universidad Catolica de Chile;Centre National de la Recherche Scientifique;University of Michigan;United Arab Emirates University;Universidad Nacional de C\u00f3rdoba;Federal University of Juiz de Fora;Case Western Reserve University;Universidade Federal de Juiz de Fora;Samsung;Skyline High School;University of Cambridge;Dublin City University;Universidad Polit\u00e9cnica Salesiana;Katholieke Universiteit Leuven;Universit\u00e4t des Saarlandes;Ethiopian Artificial Intelligence Institute;Addis Ababa Institute of Technology;University of Minnesota;University College London;Instituto Polit\u00e9cnico Nacional;University of Stuttgart;AI Singapore;Universidad Polit\u00e9cnica de Madrid;Institut Teknologi Bandung;Hong Kong University of Science and Technology;Kyoto University;Indian Institute of Technology Madras;Indian Institute of Technology, Madras;National Institute of Information and Communications Technology;University of Rwanda;AI4Bharat;Centre for Development of Telematics;University of Houston", "aff_unique_dep": ";;;;;Google;;;;;;;;;;;;;Samsung;;;;;;;;;;;;;;;;;;;;;;;;", "aff_unique_url": "https://mbzuai.ac.ae;https://www.u-tokyo.ac.jp;https://www.unorte.edu.uy;https://www.tu-darmstadt.de;https://www.cimat.mx/;https://www.google.com;https://www.kaist.ac.kr;https://www.brown.edu;https://www.unimelb.edu.au;https://www.uds.cl;https://www.puc.cl;https://www.cnrs.fr;https://www.umich.edu;https://www.uaeu.ac.ae;https://www.unc.edu.ar;https://www.ufjf.edu.br;https://www.case.edu;http://www.ufjf.edu.br;https://www.samsung.com;;https://www.cam.ac.uk;https://www.dcu.ie;https://www.ups.edu.ec;https://www.kuleuven.be;https://www.uni-saarland.de;;https://www.aait.edu.et;https://www.minnesota.edu;https://www.ucl.ac.uk;https://www.ipn.mx;https://www.uni-stuttgart.de;https://www.aisingapore.gov.sg;https://www.upm.es;https://www.itb.ac.id;https://www.ust.hk;https://www.kyoto-u.ac.jp;https://www.iitm.ac.in;https://www.iitm.ac.in;https://www.nict.go.jp/;https://www.ur.ac.rw;;http://www.cdot.co.in;https://www.uh.edu", "aff_unique_abbr": "MBZUAI;UTokyo;Udelar;TUD;CIMAT;Google;KAIST;Brown;UniMelb;USACH;PUC;CNRS;UM;UAEU;UNC;UFJF;CWRU;UFJF;Samsung;;Cambridge;DCU;;KU Leuven;UDS;;AAIT;UMN;UCL;IPN;Uni Stuttgart;AI Singapore;UPM;ITB;HKUST;Kyoto U;IIT Madras;IIT Madras;NICT;UR;;CDOT;UH", "aff_campus_unique_index": "1;2;2;3;2;4;5;6;6;6", "aff_campus_unique": ";Mountain View;Ann Arbor;Cambridge;Twin Cities;Hong Kong SAR;Madras", "aff_country_unique_index": "0;0;1;2;3;3;0;4;5;6;0;5;7;8;8;9;5;0;0;10;10;10;10;0;11;5;11;6;6;5;13;5;14;14;0;15;16;0;0;3;17;17;5;13;5;4;3;7;18;19;19;0;0;20;21;0;0;1;22;22;1;0;0;2;19;23;22;22;22;5;0", "aff_country_unique": "United Arab Emirates;Japan;Uruguay;Germany;Mexico;United States;South Korea;Australia;Chile;France;Argentina;Brazil;;United Kingdom;Ireland;Ecuador;Belgium;Ethiopia;Singapore;Spain;Indonesia;China;India;Rwanda" }, { "title": "On the Benefits of Public Representations for Private Transfer Learning under Distribution Shift", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96063", "id": "E1nBLrEaJo", "proceeding": "", "pdf": "https://openreview.net/pdf?id=E1nBLrEaJo", "openreview": "https://openreview.net/forum?id=E1nBLrEaJo", "poster": "", "project": "", "author_site": "Pratiksha Thaker, Amrith Setlur, Steven Wu, Virginia Smith", "tldr": "", "abstract": "Public pretraining is a promising approach to improve differentially private model training. However, recent work has noted that many positive research results studying this paradigm only consider in-distribution tasks, and may not apply to settings where there is distribution shift between the pretraining and finetuning data---a scenario that is likely when finetuning private tasks due to the sensitive nature of the data. In this work, we show empirically across three tasks that even in settings with large distribution shift, where both zero-shot performance from public data and training from scratch with private data give unusably weak results, public features can in fact improve private training accuracy by up to 67\\% over private training from scratch. We provide a theoretical explanation for this phenomenon, showing that if the public and private data share a low-dimensional representation, public representations can improve the sample complexity of private training even if it is \\emph{impossible} to learn the private task from the public data alone. Altogether, our results provide evidence that public data can indeed make private training practical in realistic settings of extreme distribution shift.", "keywords": "pretraining;representations;privacy;distribution shift", "primary_area": "privacy", "supplementary_material": "", "author": "Pratiksha Thaker;Amrith Setlur;Steven Wu;Virginia Smith", "authorids": "~Pratiksha_Thaker1;~Amrith_Setlur1;~Steven_Wu1;~Virginia_Smith1", "gender": ";M;;F", "homepage": ";http://ars22.github.io;;", "dblp": ";https://dblp.uni-trier.de/pers/hd/s/Setlur:Amrith;;120/0921", "google_scholar": ";https://scholar.google.ru/citations?user=i7V1kJgAAAAJ;;", "orcid": ";0000-0002-7061-3094;;", "linkedin": ";;;", "or_profile": "~Pratiksha_Thaker1;~Amrith_Setlur1;~Steven_Wu1;~Virginia_Smith1", "aff": ";Carnegie Mellon University;;Carnegie Mellon University", "aff_domain": ";cmu.edu;;cmu.edu", "position": ";PhD student;;Associate Professor", "bibtex": "@inproceedings{\nthaker2024on,\ntitle={On the Benefits of Public Representations for Private Transfer Learning under Distribution Shift},\nauthor={Pratiksha Thaker and Amrith Setlur and Steven Wu and Virginia Smith},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=E1nBLrEaJo}\n}", "github": "", "reviewers": "t1VG;S2j2;iSuQ;PptC", "pdf_size": 660545, "rating": "6;6;7;7", "confidence": "3;3;3;3", "soundness": "3;3;3;3", "novelty": "3;2;4;3", "presentation": "3;3;3;3", "wc_summary": "46;80;62;240", "wc_strengths": "42;49;209;135", "wc_weaknesses": "71;44;18;115", "wc_questions": "30;3;70;117", "wc_limitations": "7;11;11;51", "wc_review": "196;187;370;658", "wc_reply_reviewers": "0;0;338;33", "wc_reply_authors": "62;60;0;0", "reply_reviewers": "0;0;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 107.0, 77.72387020729218 ], "wc_strengths_avg": [ 108.75, 68.49224408646573 ], "wc_weaknesses_avg": [ 62.0, 35.88175023601831 ], "wc_questions_avg": [ 55.0, 43.00581356049435 ], "wc_limitations_avg": [ 20.0, 17.97220075561143 ], "wc_review_avg": [ 352.75, 190.73459964044278 ], "wc_reply_reviewers_avg": [ 92.75, 142.2346213128154 ], "wc_reply_authors_avg": [ 30.5, 30.5081956201936 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8643373118217773204&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": ";cmu.edu;;cmu.edu", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "On Mesa-Optimization in Autoregressively Trained Transformers: Emergence and Capability", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96062", "id": "E2BYPreuU8", "proceeding": "", "pdf": "https://openreview.net/pdf?id=E2BYPreuU8", "openreview": "https://openreview.net/forum?id=E2BYPreuU8", "poster": "/media/PosterPDFs/NeurIPS%202024/96062.png?t=1731416427.5378056", "project": "", "author_site": "Chenyu Zheng, Wei Huang, Rongzhen Wang, Guoqiang Wu, Jun Zhu, Chongxuan LI", "tldr": "", "abstract": "Autoregressively trained transformers have brought a profound revolution to the world, especially with their in-context learning (ICL) ability to address downstream tasks. \nRecently, several studies suggest that transformers learn a mesa-optimizer during autoregressive (AR) pretraining to implement ICL. Namely, the forward pass of the trained transformer is equivalent to optimizing an inner objective function in-context.\nHowever, whether the practical non-convex training dynamics will converge to the ideal mesa-optimizer is still unclear.\nTowards filling this gap, we investigate the non-convex dynamics of a one-layer linear causal self-attention model autoregressively trained by gradient flow, where the sequences are generated by an AR process $x_{t+1} = W x_t$. First, under a certain condition of data distribution, we prove that an autoregressively trained transformer learns $W$ by implementing one step of gradient descent to minimize an ordinary least squares (OLS) problem in-context. It then applies the learned $\\widehat{W}$ for next-token prediction, thereby verifying the mesa-optimization hypothesis. Next, under the same data conditions, we explore the capability limitations of the obtained mesa-optimizer. We show that a stronger assumption related to the moments of data is the sufficient and necessary condition that the learned mesa-optimizer recovers the distribution. Besides, we conduct exploratory analyses beyond the first data condition \nand prove that generally, the trained transformer will not perform vanilla gradient descent for the OLS problem. Finally, our simulation results verify the theoretical results.", "keywords": "Mesa-Optimization;In-context learning;Autoregressive pretraining;Non-convex optimization;Learning theory;Transformers", "primary_area": "learning_theory", "supplementary_material": "/attachment/a83d7f988fe60f484f0fc3cbe6686796cdfd60b7.zip", "author": "Chenyu Zheng;Wei Huang;Rongzhen Wang;Guoqiang Wu;Jun Zhu;Chongxuan Li", "authorids": "~Chenyu_Zheng1;~Wei_Huang6;~Rongzhen_Wang1;~Guoqiang_Wu2;~Jun_Zhu2;~Chongxuan_Li1", "gender": "M;M;F;M;M;M", "homepage": "https://chen-yu-zheng.github.io;https://weihuang05.github.io/;https://github.com/rongzhenwang;https://guoqiangwoodrowwu.github.io/;http://ml.cs.tsinghua.edu.cn/~jun;http://ml.cs.tsinghua.edu.cn/~chongxuan", "dblp": "133/5078;81/6685-34;245/1816;98/4857;50/2644-1;161/9965", "google_scholar": "QDfsVgYAAAAJ;RZfDh4MAAAAJ;;KCTX-_0AAAAJ;axsP38wAAAAJ;UKMcQn4AAAAJ", "orcid": ";0000-0001-5674-7021;;0000-0003-4486-7944;;0000-0002-0912-9076", "linkedin": ";;;;;", "or_profile": "~Chenyu_Zheng1;~Wei_Huang6;~Rongzhen_Wang1;~Guoqiang_Wu2;~Jun_Zhu2;~Chongxuan_Li1", "aff": "Renmin University of China;RIKEN AIP;Renmin University of China;Shandong University;Tsinghua University;Renmin University of China", "aff_domain": "ruc.edu.cn;riken.jp;ruc.edu.cn;sdu.edu.cn;mail.tsinghua.edu.cn;ruc.edu.cn", "position": "PhD student;Research Scientist;PhD student;Associate Professor;Professor;Associate Professor", "bibtex": "@inproceedings{\nzheng2024on,\ntitle={On Mesa-Optimization in Autoregressively Trained Transformers: Emergence and Capability},\nauthor={Chenyu Zheng and Wei Huang and Rongzhen Wang and Guoqiang Wu and Jun Zhu and Chongxuan Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=E2BYPreuU8}\n}", "github": "", "reviewers": "c1Lc;dsNs;ykMD;WaqB", "pdf_size": 8493290, "rating": "5;5;5;7", "confidence": "4;4;3;4", "soundness": "3;2;3;3", "novelty": "2;2;3;4", "presentation": "2;3;4;4", "wc_summary": "124;169;113;100", "wc_strengths": "87;67;49;170", "wc_weaknesses": "884;285;34;88", "wc_questions": "14;290;39;23", "wc_limitations": "4;7;7;42", "wc_review": "1113;818;242;423", "wc_reply_reviewers": "153;109;12;0", "wc_reply_authors": "152;70;18;23", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;2;2", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 126.5, 25.96632434519757 ], "wc_strengths_avg": [ 93.25, 46.30537225851877 ], "wc_weaknesses_avg": [ 322.75, 337.2353592077794 ], "wc_questions_avg": [ 91.5, 114.95325136767555 ], "wc_limitations_avg": [ 15.0, 15.636495771111889 ], "wc_review_avg": [ 649.0, 339.3309593892075 ], "wc_reply_reviewers_avg": [ 68.5, 64.54649486997725 ], "wc_reply_authors_avg": [ 65.75, 53.769763808296574 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7603499574731404714&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "ruc.edu.cn;riken.jp;ruc.edu.cn;sdu.edu.cn;mail.tsinghua.edu.cn;ruc.edu.cn", "author_num": 6, "aff_unique_index": "0;1;0;2;3;0", "aff_unique_norm": "Renmin University of China;RIKEN;Shandong University;Tsinghua University", "aff_unique_dep": ";Advanced Institute for Computational Science;;", "aff_unique_url": "http://www.ruc.edu.cn;https://www.aip.riken.jp;http://www.sdu.edu.cn;https://www.tsinghua.edu.cn", "aff_unique_abbr": "RUC;RIKEN AIP;SDU;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0;0", "aff_country_unique": "China;Japan" }, { "title": "Multi-hypotheses Conditioned Point Cloud Diffusion for 3D Human Reconstruction from Occluded Images", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96061", "id": "E2JCQyYu0E", "proceeding": "", "pdf": "https://openreview.net/pdf?id=E2JCQyYu0E", "openreview": "https://openreview.net/forum?id=E2JCQyYu0E", "poster": "", "project": "", "author_site": "Donghwan Kim, Tae-Kyun Kim", "tldr": "", "abstract": "3D human shape reconstruction under severe occlusion due to human-object or human-human interaction is a challenging problem. While implicit function methods capture detailed clothed shapes, they require aligned shape priors and or are weak at inpainting occluded regions given an image input. Parametric models i.e. SMPL, instead offer whole body shapes, however, are often misaligned with images. In this work, we propose a novel pipeline composed of a probabilistic SMPL model and point cloud diffusion for pixel-aligned detailed 3D human reconstruction under occlusion. Multiple hypotheses generated by the probabilistic SMPL method are conditioned via continuous 3D shape representations. Point cloud diffusion refines the distribution of 3D points fitted to both the multi-hypothesis shape condition and pixel-aligned image features, offering detailed clothed shapes and inpainting occluded parts of human bodies. In the experiments using the CAPE, MultiHuman and Hi4D datasets, the proposed method outperforms various SOTA methods based on SMPL, implicit functions, point cloud diffusion, and their combined, under synthetic and real occlusions. Our code is publicly available at https://donghwankim0101.github.io/projects/mhcdiff.", "keywords": "human reconstruction;point cloud diffusion;multi-hypothesis", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/289c638080fd4975f9a72bcacf2eb09fabe0c63b.zip", "author": "Donghwan Kim;Tae-Kyun Kim", "authorids": "~Donghwan_Kim6;~Tae-Kyun_Kim2", "gender": "M;M", "homepage": "https://donghwankim0101.github.io/;https://sites.google.com/view/tkkim/", "dblp": ";28/787", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.co.uk/citations?user=j2WcLecAAAAJ", "orcid": "0009-0000-4390-725X;", "linkedin": "donghwan-kim-1060a81a5/;", "or_profile": "~Donghwan_Kim6;~Tae-kyun_Kim1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr", "position": "MS student;Full Professor", "bibtex": "@inproceedings{\nkim2024multihypotheses,\ntitle={Multi-hypotheses Conditioned Point Cloud Diffusion for 3D Human Reconstruction from Occluded Images},\nauthor={Donghwan Kim and Tae-Kyun Kim},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=E2JCQyYu0E}\n}", "github": "", "reviewers": "w3VR;vFH7;V2cx;1vtm", "pdf_size": 7439700, "rating": "3;5;5;6", "confidence": "4;4;4;4", "soundness": "2;3;3;3", "novelty": "2;3;2;3", "presentation": "1;3;3;3", "wc_summary": "73;56;104;102", "wc_strengths": "26;30;43;85", "wc_weaknesses": "83;112;35;188", "wc_questions": "24;51;292;100", "wc_limitations": "1;9;18;12", "wc_review": "207;258;492;487", "wc_reply_reviewers": "0;44;0;0", "wc_reply_authors": "0;41;0;0", "reply_reviewers": "0;1;0;0", "reply_authors": "1;2;1;1", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 83.75, 20.17888748172208 ], "wc_strengths_avg": [ 46.0, 23.37733945512192 ], "wc_weaknesses_avg": [ 104.5, 55.5 ], "wc_questions_avg": [ 116.75, 104.78400402733234 ], "wc_limitations_avg": [ 10.0, 6.123724356957945 ], "wc_review_avg": [ 361.0, 129.7709520655528 ], "wc_reply_reviewers_avg": [ 11.0, 19.05255888325765 ], "wc_reply_authors_avg": [ 10.25, 17.75352077758099 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11722749734864714545&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "kaist.ac.kr;kaist.ac.kr", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "title": "Breaking the False Sense of Security in Backdoor Defense through Re-Activation Attack", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96060", "id": "E2odGznGim", "proceeding": "", "pdf": "https://openreview.net/pdf?id=E2odGznGim", "openreview": "https://openreview.net/forum?id=E2odGznGim", "poster": "/media/PosterPDFs/NeurIPS%202024/96060.png?t=1731303190.0151896", "project": "", "author_site": "Mingli Zhu, Siyuan Liang, Baoyuan Wu", "tldr": "", "abstract": "Deep neural networks face persistent challenges in defending against backdoor attacks, leading to an ongoing battle between attacks and defenses. While existing backdoor defense strategies have shown promising performance on reducing attack success rates, can we confidently claim that the backdoor threat has truly been eliminated from the model? To address it, we re-investigate the characteristics of the backdoored models after defense (denoted as defense models). Surprisingly, we find that the original backdoors still exist in defense models derived from existing post-training defense strategies, and the backdoor existence is measured by a novel metric called backdoor existence coefficient. It implies that the backdoors just lie dormant rather than being eliminated. To further verify this finding, we empirically show that these dormant backdoors can be easily re-activated during inference stage, by manipulating the original trigger with well-designed tiny perturbation using universal adversarial attack. More practically, we extend our backdoor re-activation to black-box scenario, where the defense model can only be queried by the adversary during inference stage, and develop two effective methods, i.e., query-based and transfer-based backdoor re-activation attacks. The effectiveness of the proposed methods are verified on both image classification and multimodal contrastive learning (i.e., CLIP) tasks. In conclusion, this work uncovers a critical vulnerability that has never been explored in existing defense strategies, emphasizing the urgency of designing more robust and advanced backdoor defense mechanisms in the future.", "keywords": "Backdoor Learning; Backdoor Attack; Backdoor Defense; Adversarial Machine Learning", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Mingli Zhu;Siyuan Liang;Baoyuan Wu", "authorids": "~Mingli_Zhu1;~Siyuan_Liang1;~Baoyuan_Wu1", "gender": "F;F;M", "homepage": ";https://www.github.com/;https://sites.google.com/site/baoyuanwu2015/", "dblp": "329/6304;205/8767.html;73/7781", "google_scholar": ";Hon4nf0AAAAJ;JNTG1KoAAAAJ", "orcid": ";0000-0002-6154-0233;0000-0003-2183-5990", "linkedin": "%E6%98%8E%E4%B8%BD-%E6%9C%B1-03b47423a;;", "or_profile": "~Mingli_Zhu1;~Siyuan_Liang1;~Baoyuan_Wu1", "aff": "The Chinese University of Hong Kong(Shen Zhen);National University of Singapore;The Chinese University of Hong Kong, Shenzhen", "aff_domain": "cuhk.edu.cn;nus.edu;cuhk.edu.cn", "position": "PhD student;Researcher;Associate Professor", "bibtex": "@inproceedings{\nzhu2024breaking,\ntitle={Breaking the False Sense of Security in Backdoor Defense through Re-Activation Attack},\nauthor={Mingli Zhu and Siyuan Liang and Baoyuan Wu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=E2odGznGim}\n}", "github": "", "reviewers": "G6gk;ULSi;ccvM;AhAB", "pdf_size": 9360144, "rating": "4;5;6;8", "confidence": "4;5;3;3", "soundness": "3;2;3;4", "novelty": "3;2;3;4", "presentation": "3;3;2;3", "wc_summary": "73;126;195;133", "wc_strengths": "69;35;207;55", "wc_weaknesses": "129;444;465;301", "wc_questions": "62;2;79;130", "wc_limitations": "8;6;62;49", "wc_review": "341;613;1008;668", "wc_reply_reviewers": "163;30;107;107", "wc_reply_authors": "1452;95;383;303", "reply_reviewers": "1;1;2;1", "reply_authors": "6;3;3;2", "rating_avg": [ 5.75, 1.479019945774904 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 131.75, 43.26300382543958 ], "wc_strengths_avg": [ 91.5, 67.76983104597502 ], "wc_weaknesses_avg": [ 334.75, 134.51091963108422 ], "wc_questions_avg": [ 68.25, 45.70763065397286 ], "wc_limitations_avg": [ 31.25, 24.691850882426778 ], "wc_review_avg": [ 657.5, 237.23037326615662 ], "wc_reply_reviewers_avg": [ 101.75, 47.314770421085214 ], "wc_reply_authors_avg": [ 558.25, 526.6058179511502 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.5, 1.5 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.6625413488689132, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9867860491547140682&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "cuhk.edu.cn;nus.edu;cuhk.edu.cn", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Chinese University of Hong Kong;National University of Singapore", "aff_unique_dep": ";", "aff_unique_url": "https://www.cuhk.edu.cn;https://www.nus.edu.sg", "aff_unique_abbr": "CUHK;NUS", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Shenzhen;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "China;Singapore" }, { "title": "A Siamese Transformer with Hierarchical Refinement for Lane Detection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96059", "id": "E3HDagVPNG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=E3HDagVPNG", "openreview": "https://openreview.net/forum?id=E3HDagVPNG", "poster": "/media/PosterPDFs/NeurIPS%202024/96059.png?t=1731390282.1354558", "project": "", "author_site": "Zinan Lv, Dong Han, Wenzhe Wang, Danny Z Chen", "tldr": "", "abstract": "Lane detection is an important yet challenging task in autonomous driving systems. Existing lane detection methods mainly rely on finer-scale information to identify key points of lane lines. Since local information in realistic road environments is frequently obscured by other vehicles or affected by poor outdoor lighting conditions, these methods struggle with the regression of such key points. In this paper, we propose a novel Siamese Transformer with hierarchical refinement for lane detection to improve the detection accuracy in complex road environments. Specifically, we propose a high-to-low hierarchical refinement Transformer structure, called LAne TRansformer (LATR), to refine the key points of lane lines, which integrates global semantics information and finer-scale features. Moreover, exploiting the thin and long characteristics of lane lines, we propose a novel Curve-IoU loss to supervise the fit of lane lines. Extensive experiments on three benchmark datasets of lane detection demonstrate that our proposed new method achieves state-of-the-art results with high accuracy and efficiency. Specifically, our method achieves improved F1 scores on the OpenLane dataset, surpassing the current best-performing method by 5.0 points.", "keywords": "Lane detection;Siamese Transformer;Hierarchical Refinement", "primary_area": "machine_vision", "supplementary_material": "", "author": "Zinan Lv;Dong Han;Wenzhe Wang;Danny Chen", "authorids": "~Zinan_Lv1;~Dong_Han3;~Wenzhe_Wang1;~Danny_Chen1", "gender": "M;M;;Not Specified", "homepage": "https://github.com/lvzinan;https://github.com/phhandong;;https://engineering.nd.edu/faculty/danny-chen/", "dblp": ";;;c/DannyZChen.html", "google_scholar": ";Stg9K0AAAAAJ;;tRerdSIAAAAJ", "orcid": ";0009-0003-9546-0797;;0000-0001-6565-2884", "linkedin": ";;;", "or_profile": "~Zinan_Lv1;~Dong_Han3;~Wenzhe_Wang1;~Danny_Chen1", "aff": "Harbin Engineering University;Harbin Engineering University;;University of Notre Dame, USA", "aff_domain": "hrbeu.edu.cn;hrbeu.edu.cn;;nd.edu", "position": "Undergrad student;Undergrad student;;Full Professor", "bibtex": "@inproceedings{\nlv2024a,\ntitle={A Siamese Transformer with Hierarchical Refinement for Lane Detection},\nauthor={Zinan Lv and Dong Han and Wenzhe Wang and Danny Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=E3HDagVPNG}\n}", "github": "", "reviewers": "oCFK;UDCn;qpE6;UWBy", "pdf_size": 5811839, "rating": "3;4;7;8", "confidence": "4;5;5;5", "soundness": "2;2;3;3", "novelty": "1;1;3;3", "presentation": "1;2;3;4", "wc_summary": "57;55;55;72", "wc_strengths": "20;31;93;74", "wc_weaknesses": "163;135;2;78", "wc_questions": "27;24;84;51", "wc_limitations": "2;1;43;62", "wc_review": "269;246;277;337", "wc_reply_reviewers": "0;234;73;6", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 2.0615528128088303 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.0, 1.0 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 59.75, 7.119515432949071 ], "wc_strengths_avg": [ 54.5, 30.02082610455615 ], "wc_weaknesses_avg": [ 94.5, 61.56500629416032 ], "wc_questions_avg": [ 46.5, 24.046829312822098 ], "wc_limitations_avg": [ 27.0, 26.372333988481188 ], "wc_review_avg": [ 282.25, 33.5959446957516 ], "wc_reply_reviewers_avg": [ 78.25, 94.37789730651981 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.7001400420140048, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:RVHHkNWfVBIJ:scholar.google.com/&scioq=A+Siamese+Transformer+with+Hierarchical+Refinement+for+Lane+Detection&hl=en&as_sdt=0,33", "gs_version_total": 2, "email": "hrbeu.edu.cn;hrbeu.edu.cn;;nd.edu", "author_num": 4, "aff_unique_index": "0;0;1", "aff_unique_norm": "Harbin Engineering University;University of Notre Dame", "aff_unique_dep": ";", "aff_unique_url": "http://www.heu.edu.cn;https://www.nd.edu", "aff_unique_abbr": "HEU;Notre Dame", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "China;United States" }, { "title": "SemFlow: Binding Semantic Segmentation and Image Synthesis via Rectified Flow", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96058", "id": "E3P1X94Y51", "proceeding": "", "pdf": "https://openreview.net/pdf?id=E3P1X94Y51", "openreview": "https://openreview.net/forum?id=E3P1X94Y51", "poster": "/media/PosterPDFs/NeurIPS%202024/96058.png?t=1731417030.2440958", "project": "", "author_site": "Chaoyang Wang, Xiangtai Li, Lu Qi, Henghui Ding, Yunhai Tong, Ming-Hsuan Yang", "tldr": "", "abstract": "Semantic segmentation and semantic image synthesis are two representative tasks in visual perception and generation. While existing methods consider them as two distinct tasks, we propose a unified framework (SemFlow) and model them as a pair of reverse problems. Specifically, motivated by rectified flow theory, we train an ordinary differential equation (ODE) model to transport between the distributions of real images and semantic masks. As the training object is symmetric, samples belonging to the two distributions, images and semantic masks, can be effortlessly transferred reversibly. For semantic segmentation, our approach solves the contradiction between the randomness of diffusion outputs and the uniqueness of segmentation results. For image synthesis, we propose a finite perturbation approach to enhance the diversity of generated results without changing the semantic categories. Experiments show that our SemFlow achieves competitive results on semantic segmentation and semantic image synthesis tasks. We hope this simple framework will motivate people to rethink the unification of low-level and high-level vision.", "keywords": "Segmentation;Diffusion Model", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Chaoyang Wang;Xiangtai Li;Lu Qi;Henghui Ding;Yunhai Tong;Ming-Hsuan Yang", "authorids": "~Chaoyang_Wang3;~Xiangtai_Li1;~Lu_Qi1;~Henghui_Ding2;~Yunhai_Tong1;~Ming-Hsuan_Yang1", "gender": ";;M;;M;M", "homepage": ";;https://www.luqi.info;;http://www.cis.pku.edu.cn/faculty/system/tongyunhai/tongyunhai.htm;https://faculty.ucmerced.edu/mhyang/", "dblp": ";;;;14/1705;79/3711.html", "google_scholar": ";;https://scholar.google.com.hk/citations?user=SSI90d4AAAAJ;;T4gqdPkAAAAJ;p9-ohHsAAAAJ", "orcid": ";;;;;0000-0003-4848-2304", "linkedin": ";;;;;minghsuanyang/", "or_profile": "~Chaoyang_Wang3;~Xiangtai_Li1;~Lu_Qi1;~Henghui_Ding2;~Yunhai_Tong1;~Ming-Hsuan_Yang1", "aff": ";;University of California, Merced;;Peking University;University of California at Merced", "aff_domain": ";;ucmerced.edu;;pku.edu.cn;umcerced.edu", "position": ";;Postdoc;;Full Professor;Professor", "bibtex": "@inproceedings{\nwang2024semflow,\ntitle={SemFlow: Binding Semantic Segmentation and Image Synthesis via Rectified Flow},\nauthor={Chaoyang Wang and Xiangtai Li and Lu Qi and Henghui Ding and Yunhai Tong and Ming-Hsuan Yang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=E3P1X94Y51}\n}", "github": "", "reviewers": "3dM2;eo5D;5PqV", "pdf_size": 5068853, "rating": "4;5;5", "confidence": "5;3;3", "soundness": "2;4;3", "novelty": "2;3;2", "presentation": "2;3;3", "wc_summary": "103;74;87", "wc_strengths": "31;76;24", "wc_weaknesses": "195;44;149", "wc_questions": "31;2;2", "wc_limitations": "1;1;1", "wc_review": "361;197;263", "wc_reply_reviewers": "138;109;0", "wc_reply_authors": "343;82;54", "reply_reviewers": "1;1;0", "reply_authors": "3;3;2", "rating_avg": [ 4.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 88.0, 11.86029791643813 ], "wc_strengths_avg": [ 43.666666666666664, 23.041026211713937 ], "wc_weaknesses_avg": [ 129.33333333333334, 63.19458485942886 ], "wc_questions_avg": [ 11.666666666666666, 13.67073110293992 ], "wc_limitations_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 273.6666666666667, 67.37622396332074 ], "wc_reply_reviewers_avg": [ 82.33333333333333, 59.41006274660511 ], "wc_reply_authors_avg": [ 159.66666666666666, 130.13924166915305 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10492351658511222310&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": ";;ucmerced.edu;;pku.edu.cn;umcerced.edu", "author_num": 6, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of California, Merced;Peking University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucmerced.edu;http://www.pku.edu.cn", "aff_unique_abbr": "UC Merced;Peking U", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Merced;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;China" }, { "title": "Zero-Shot Event-Intensity Asymmetric Stereo via Visual Prompting from Image Domain", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96057", "id": "E3ZMsqdO0D", "proceeding": "", "pdf": "https://openreview.net/pdf?id=E3ZMsqdO0D", "openreview": "https://openreview.net/forum?id=E3ZMsqdO0D", "poster": "/media/PosterPDFs/NeurIPS%202024/96057.png?t=1731843254.3737123", "project": "", "author_site": "Hanyue Lou, Jinxiu Liang, Minggui Teng, Bin Fan, Yong Xu, Boxin Shi", "tldr": "", "abstract": "Event-intensity asymmetric stereo systems have emerged as a promising approach for robust 3D perception in dynamic and challenging environments by integrating event cameras with frame-based sensors in different views. However, existing methods often suffer from overfitting and poor generalization due to limited dataset sizes and lack of scene diversity in the event domain. To address these issues, we propose a zero-shot framework that utilizes monocular depth estimation and stereo matching models pretrained on diverse image datasets. Our approach introduces a visual prompting technique to align the representations of frames and events, allowing the use of off-the-shelf stereo models without additional training. Furthermore, we introduce a monocular cue-guided disparity refinement module to improve robustness across static and dynamic regions by incorporating monocular depth information from foundation models. Extensive experiments on real-world datasets demonstrate the superior zero-shot evaluation performance and enhanced generalization ability of our method compared to existing approaches.", "keywords": "Event cameras;stereo matching;asymetric stereo;visual prompting;disparity filtering", "primary_area": "machine_vision", "supplementary_material": "", "author": "Hanyue Lou;Jinxiu Liang;Minggui Teng;Bin Fan;Yong Xu;Boxin Shi", "authorids": "~Hanyue_Lou1;~Jinxiu_Liang1;~Minggui_Teng1;~Bin_Fan3;~Yong_Xu2;~Boxin_Shi3", "gender": "F;F;M;M;M;M", "homepage": "https://hylz-2019.github.io/;https://sherrycattt.github.io;https://tengminggui.cn/;;http://camera.pku.edu.cn;https://gitcvfb.github.io/", "dblp": "333/1244;254/2323;274/5255;07/4630-7;69/783;60/105-2", "google_scholar": ";wR2OMogAAAAJ;BukbqcwAAAAJ;;K1LjZxcAAAAJ;c6As1PcAAAAJ", "orcid": ";;0000-0002-9234-4243;;0000-0001-6749-0364;0000-0002-8028-0166", "linkedin": ";;;;;", "or_profile": "~Hanyue_Lou1;~Jinxiu_Liang1;~Minggui_Teng1;~Yong_Xu2;~Boxin_Shi3;~Fan_Bin1", "aff": "Peking University;Peking University;Peking University;South China University of Technology;Peking University;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;pku.edu.cn;scut.edu.cn;pku.edu.cn;pku.edu.cn", "position": "PhD student;Postdoc;PhD student;Full Professor;Assistant Professor;Postdoc", "bibtex": "@inproceedings{\nlou2024zeroshot,\ntitle={Zero-Shot Event-Intensity Asymmetric Stereo via Visual Prompting from Image Domain},\nauthor={Hanyue Lou and Jinxiu Liang and Minggui Teng and Bin Fan and Yong Xu and Boxin Shi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=E3ZMsqdO0D}\n}", "github": "", "reviewers": "U212;PsoF;EJ3V;dhj9;ggyM", "pdf_size": 9623319, "rating": "5;6;6;6;8", "confidence": "4;4;4;3;5", "soundness": "3;2;3;3;4", "novelty": "2;3;3;3;4", "presentation": "3;3;3;2;4", "wc_summary": "92;119;140;68;112", "wc_strengths": "162;288;39;126;171", "wc_weaknesses": "131;295;85;119;47", "wc_questions": "19;135;63;79;217", "wc_limitations": "16;27;67;143;37", "wc_review": "420;864;394;535;584", "wc_reply_reviewers": "39;392;101;52;33", "wc_reply_authors": "7;379;376;12;18", "reply_reviewers": "1;2;1;1;1", "reply_authors": "2;3;2;2;2", "rating_avg": [ 6.2, 0.9797958971132712 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 106.2, 24.498163196452097 ], "wc_strengths_avg": [ 157.2, 80.33280774378548 ], "wc_weaknesses_avg": [ 135.4, 84.98846980620371 ], "wc_questions_avg": [ 102.6, 68.19266822760348 ], "wc_limitations_avg": [ 58.0, 45.76461515188345 ], "wc_review_avg": [ 559.4, 167.7922525029091 ], "wc_reply_reviewers_avg": [ 123.4, 136.41202293053206 ], "wc_reply_authors_avg": [ 158.4, 178.9308246222545 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.6454972243679028, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16869794887764964949&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 2, "email": "pku.edu.cn;pku.edu.cn;pku.edu.cn;scut.edu.cn;pku.edu.cn;pku.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;1;0;0", "aff_unique_norm": "Peking University;South China University of Technology", "aff_unique_dep": ";", "aff_unique_url": "http://www.pku.edu.cn;https://www.scut.edu.cn", "aff_unique_abbr": "Peking U;SCUT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Length Optimization in Conformal Prediction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96056", "id": "E4ILjwzdEA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=E4ILjwzdEA", "openreview": "https://openreview.net/forum?id=E4ILjwzdEA", "poster": "", "project": "", "author_site": "Shayan Kiyani, George J. Pappas, Hamed Hassani", "tldr": "", "abstract": "Conditional validity and length efficiency are two crucial aspects of conformal prediction (CP). Conditional validity ensures accurate uncertainty quantification for data subpopulations, while proper length efficiency ensures that the prediction sets remain informative. Despite significant efforts to address each of these issues individually, a principled framework that reconciles these two objectives has been missing in the CP literature. In this paper, we develop Conformal Prediction with Length-Optimization (CPL) - a novel and practical framework that constructs prediction sets with (near-) optimal length while ensuring conditional validity under various classes of covariate shifts, including the key cases of marginal and group-conditional coverage. In the infinite sample regime, we provide strong duality results which indicate that CPL achieves conditional validity and length optimality. In the finite sample regime, we show that CPL constructs conditionally valid prediction sets. Our extensive empirical evaluations demonstrate the superior prediction set size performance of CPL compared to state-of-the-art methods across diverse real-world and synthetic datasets in classification, regression, and large language model-based multiple choice question answering. An Implementation of our algorithm can be accessed at the following link: https://github.com/shayankiyani98/CP.", "keywords": "Conformal prediction;Conditional coverage;Prediction set size;Uncertainty quantification", "primary_area": "learning_theory", "supplementary_material": "", "author": "Shayan Kiyani;George J. Pappas;Hamed Hassani", "authorids": "~Shayan_Kiyani2;~George_J._Pappas1;~Hamed_Hassani2", "gender": "M;M;M", "homepage": ";https://www.seas.upenn.edu/~hassani/;http://www.georgejpappas.org/", "dblp": "303/1168;73/4984;p/GeorgeJPappas", "google_scholar": "R1oEJ0YAAAAJ;;https://scholar.google.com.tw/citations?user=Kia-4B0AAAAJ", "orcid": ";;0000-0001-9081-0637", "linkedin": "shayan-k-b97b6388;;", "or_profile": "~Shayan_Kiyani2;~Hamed_Hassani2;~George_Pappas1", "aff": "University of Pennsylvania;University of Pennsylvania;School of Engineering and Applied Science, University of Pennsylvania", "aff_domain": "upenn.edu;upenn.edu;seas.upenn.edu", "position": "PhD student;;Full Professor", "bibtex": "@inproceedings{\nkiyani2024length,\ntitle={Length Optimization in Conformal Prediction},\nauthor={Shayan Kiyani and George J. Pappas and Hamed Hassani},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=E4ILjwzdEA}\n}", "github": "", "reviewers": "xcAK;vXbJ;5vGW;qXD6", "pdf_size": 1164708, "rating": "4;5;6;7", "confidence": "5;3;4;3", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "77;19;59;49", "wc_strengths": "99;70;126;64", "wc_weaknesses": "606;31;105;46", "wc_questions": "264;71;119;2", "wc_limitations": "14;2;1;2", "wc_review": "1060;193;410;163", "wc_reply_reviewers": "237;15;16;21", "wc_reply_authors": "2712;0;271;0", "reply_reviewers": "1;1;1;1", "reply_authors": "5;1;3;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 51.0, 21.02379604162864 ], "wc_strengths_avg": [ 89.75, 24.762623043611516 ], "wc_weaknesses_avg": [ 197.0, 237.75092008234165 ], "wc_questions_avg": [ 114.0, 96.07028676963549 ], "wc_limitations_avg": [ 4.75, 5.356071321407137 ], "wc_review_avg": [ 456.5, 361.2301897682418 ], "wc_reply_reviewers_avg": [ 72.25, 95.14561209010114 ], "wc_reply_authors_avg": [ 745.75, 1140.593348875926 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.5, 1.6583123951777 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.674199862463242, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1542437394796004059&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "upenn.edu;upenn.edu;seas.upenn.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Pennsylvania", "aff_unique_dep": "", "aff_unique_url": "https://www.upenn.edu", "aff_unique_abbr": "UPenn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "PuLID: Pure and Lightning ID Customization via Contrastive Alignment", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96055", "id": "E6ZodZu0HQ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=E6ZodZu0HQ", "openreview": "https://openreview.net/forum?id=E6ZodZu0HQ", "poster": "/media/PosterPDFs/NeurIPS%202024/96055.png?t=1731662059.7220783", "project": "", "author_site": "Zinan Guo, Yanze Wu, Chen Zhuowei, Lang chen, Peng Zhang, Qian HE", "tldr": "", "abstract": "We propose Pure and Lightning ID customization (PuLID), a novel tuning-free ID customization method for text-to-image generation. By incorporating a Lightning T2I branch with a standard diffusion one, PuLID introduces both contrastive alignment loss and accurate ID loss, minimizing disruption to the original model and ensuring high ID fidelity. Experiments show that PuLID achieves superior performance in both ID fidelity and editability. Another attractive property of PuLID is that the image elements (\\eg, background, lighting, composition, and style) before and after the ID insertion are kept as consistent as possible. Codes and models are available at https://github.com/ToTheBeginning/PuLID", "keywords": "diffusion;controllable image generation;image customization", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Zinan Guo;Yanze Wu;Zhuowei Chen;Lang chen;Peng Zhang;Qian HE", "authorids": "~Zinan_Guo1;~Yanze_Wu1;~Zhuowei_Chen1;~Lang_chen3;~Peng_Zhang34;~Qian_HE3", "gender": "F;M;M;M;M;M", "homepage": "https://github.com/guozinan126?tab=repositories;https://tothebeginning.github.io/;;https://scholar.google.com/citations?hl=zh-CN&user=h5xex20AAAAJ;https://scholar.google.com/citations?hl=en&user=Ulg0n4QAAAAJ;", "dblp": ";;331/1474;94/6317;https://dblp.org/rec/conf/mm/ZhangSLBCLH19;", "google_scholar": ";https://scholar.google.com.hk/citations?user=FdHiVvkAAAAJ;ow1jGJkAAAAJ;;Ulg0n4QAAAAJ;https://scholar.google.com/citations?view_op=list_works", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Zinan_Guo1;~Yanze_Wu1;~Zhuowei_Chen1;~Lang_chen3;~Peng_Zhang34;~Qian_HE3", "aff": "Zhejiang University;ByteDance Inc.;University of Science and Technology of China;ByteDance;ByteDance Inc.;ByteDance", "aff_domain": "zju.edu.cn;bytedance.com;ustc.edu.cn;bytedance.com;bytedance.com;bytedance.com", "position": "MS student;Researcher;PhD student;Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\nguo2024pulid,\ntitle={Pu{LID}: Pure and Lightning {ID} Customization via Contrastive Alignment},\nauthor={Zinan Guo and Yanze Wu and Zhuowei Chen and Lang chen and Peng Zhang and Qian HE},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=E6ZodZu0HQ}\n}", "github": "", "reviewers": "pgk9;GW6B;bfEK;BtJK", "pdf_size": 21343431, "rating": "7;7;7;8", "confidence": "5;3;2;4", "soundness": "3;3;3;4", "novelty": "4;2;2;4", "presentation": "4;3;2;4", "wc_summary": "78;54;44;87", "wc_strengths": "19;32;35;101", "wc_weaknesses": "240;407;30;201", "wc_questions": "41;74;65;3", "wc_limitations": "52;1;5;3", "wc_review": "430;568;179;395", "wc_reply_reviewers": "14;0;0;35", "wc_reply_authors": "26;0;60;44", "reply_reviewers": "1;0;0;1", "reply_authors": "2;1;2;2", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 1.0 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 65.75, 17.41228014936585 ], "wc_strengths_avg": [ 46.75, 31.893377055432683 ], "wc_weaknesses_avg": [ 219.5, 134.00466409793356 ], "wc_questions_avg": [ 45.75, 27.471576219794887 ], "wc_limitations_avg": [ 15.25, 21.26470079733077 ], "wc_review_avg": [ 393.0, 139.45787894557984 ], "wc_reply_reviewers_avg": [ 12.25, 14.324367350776788 ], "wc_reply_authors_avg": [ 32.5, 22.28788908802267 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.2581988897471611, "gs_citation": 46, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6004362258700056075&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "zju.edu.cn;bytedance.com;ustc.edu.cn;bytedance.com;bytedance.com;bytedance.com", "author_num": 6, "aff_unique_index": "0;1;2;1;1;1", "aff_unique_norm": "Zhejiang University;ByteDance;University of Science and Technology of China", "aff_unique_dep": ";;", "aff_unique_url": "https://www.zju.edu.cn;https://www.bytedance.com;http://www.ustc.edu.cn", "aff_unique_abbr": "ZJU;ByteDance;USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Bayesian Online Natural Gradient (BONG)", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96054", "id": "E7en5DyO2G", "proceeding": "", "pdf": "https://openreview.net/pdf?id=E7en5DyO2G", "openreview": "https://openreview.net/forum?id=E7en5DyO2G", "poster": "/media/PosterPDFs/NeurIPS%202024/96054.png?t=1733793334.2137418", "project": "", "author_site": "Matt Jones, Peter Chang, Kevin Murphy", "tldr": "", "abstract": "We propose a novel approach to sequential Bayesian inference based on variational Bayes (VB).\n The key insight is that,\n in the online setting,\n we do not need to add the KL term to regularize to the prior (which comes from the posterior at the previous timestep);\n instead we can optimize just the expected log-likelihood,\n performing a single step of natural gradient descent\n starting at the prior predictive.\n We prove this method\n recovers exact Bayesian inference \n if the model is conjugate.\n We also show how to compute an\n efficient deterministic \n approximation to the VB objective,\n as well as our simplified objective,\n when the variational distribution is\n Gaussian or a sub-family, including the case of\n a diagonal plus low-rank\nprecision matrix.\nWe show empirically that our\nmethod outperforms other online VB methods\n in the non-conjugate setting,\n such as online learning for neural networks,\n especially when controlling for computational costs.", "keywords": "online learning;Bayesian neural networks;variational inference;natural gradient descent", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Matt Jones;Peter G. Chang;Kevin Patrick Murphy", "authorids": "~Matt_Jones1;~Peter_G._Chang1;~Kevin_Patrick_Murphy1", "gender": "M;M;", "homepage": "http://Matt.Colorado.edu;https://petergchang.github.io/;https://www.cs.ubc.ca/~murphyk/", "dblp": ";;26/2599", "google_scholar": "Q7FDrMIAAAAJ;uMf2VukAAAAJ;MxxZkEcAAAAJ", "orcid": ";;", "linkedin": ";https://linkedin.com/in/gyuyoungchang;", "or_profile": "~Matt_Jones1;~Peter_G._Chang1;~Kevin_Patrick_Murphy1", "aff": "University of Colorado Boulder;University of Chicago Booth School of Business;Google", "aff_domain": "colorado.edu;uchicago.edu;google.com", "position": "Full Professor;Researcher;Principal Researcher", "bibtex": "@inproceedings{\njones2024bayesian,\ntitle={Bayesian Online Natural Gradient ({BONG})},\nauthor={Matt Jones and Peter G. Chang and Kevin Patrick Murphy},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=E7en5DyO2G}\n}", "github": "", "reviewers": "ARk4;uSab;zVLu;6NTx;ch8o", "pdf_size": 1374271, "rating": "4;6;7;7;7", "confidence": "3;3;3;4;4", "soundness": "2;4;3;3;4", "novelty": "1;3;3;4;4", "presentation": "3;3;3;4;3", "wc_summary": "66;45;122;81;62", "wc_strengths": "94;32;45;59;24", "wc_weaknesses": "170;150;68;90;36", "wc_questions": "27;1;25;27;34", "wc_limitations": "9;1;1;10;4", "wc_review": "366;229;261;267;160", "wc_reply_reviewers": "30;721;0;35;0", "wc_reply_authors": "134;433;0;0;0", "reply_reviewers": "1;2;0;1;0", "reply_authors": "2;3;1;1;1", "rating_avg": [ 6.2, 1.16619037896906 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 3.0, 1.0954451150103321 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 75.2, 26.056860900730157 ], "wc_strengths_avg": [ 50.8, 24.652788888886384 ], "wc_weaknesses_avg": [ 102.8, 50.16133969502808 ], "wc_questions_avg": [ 22.8, 11.320777358467925 ], "wc_limitations_avg": [ 5.0, 3.847076812334269 ], "wc_review_avg": [ 256.6, 66.61711491801488 ], "wc_reply_reviewers_avg": [ 157.2, 282.27886920561366 ], "wc_reply_authors_avg": [ 113.4, 168.01618969611232 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.560112033611204, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3373330617219154221&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "colorado.edu;uchicago.edu;google.com", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Colorado;University of Chicago;Google", "aff_unique_dep": ";Booth School of Business;Google", "aff_unique_url": "https://www.colorado.edu;https://\u5e03\u65af\u829d\u52a0\u54e5\u5927\u5b66.com;https://www.google.com", "aff_unique_abbr": "CU;UChicago;Google", "aff_campus_unique_index": "0;1;2", "aff_campus_unique": "Boulder;Chicago;Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "FuseFL: One-Shot Federated Learning through the Lens of Causality with Progressive Model Fusion", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96053", "id": "E7fZOoiEKl", "proceeding": "", "pdf": "https://openreview.net/pdf?id=E7fZOoiEKl", "openreview": "https://openreview.net/forum?id=E7fZOoiEKl", "poster": "/media/PosterPDFs/NeurIPS%202024/96053.png?t=1731569410.2438264", "project": "", "author_site": "Zhenheng Tang, Yonggang Zhang, Peijie Dong, Yiu-ming Cheung, Amelie Zhou, Bo Han, Xiaowen Chu", "tldr": "", "abstract": "One-shot Federated Learning (OFL) significantly reduces communication costs in FL by aggregating trained models only once. However, the performance of advanced OFL methods is far behind the normal FL. In this work, we provide a causal view to find that this performance drop of OFL methods comes from the isolation problem, which means that local isolatedly trained models in OFL may easily fit to spurious correlations due to the data heterogeneity. From the causal perspective, we observe that the spurious fitting can be alleviated by augmenting intermediate features from other clients. Built upon our observation, we propose a novel learning approach to endow OFL with superb performance and low communication and storage costs, termed as FuseFL. Specifically, FuseFL decomposes neural networks into several blocks, and progressively trains and fuses each block following a bottom-up manner for feature augmentation, introducing no additional communication costs. Comprehensive experiments demonstrate that FuseFL outperforms existing OFL and ensemble FL by a significant margin. We conduct comprehensive experiments to show that FuseFL supports high scalability of clients, heterogeneous model training, and low memory costs. Our work is the first attempt using causality to analyze and alleviate data heterogeneity of OFL.", "keywords": "Federated Learning;communication efficiency;causality", "primary_area": "other", "supplementary_material": "/attachment/a2c806312d9084f33ccbee6e8dd823933357d87d.zip", "author": "Zhenheng Tang;Yonggang Zhang;Peijie Dong;Yiu-ming Cheung;Amelie Chi Zhou;Bo Han;Xiaowen Chu", "authorids": "~Zhenheng_Tang2;~Yonggang_Zhang1;~Peijie_Dong1;~Yiu-ming_Cheung1;~Amelie_Chi_Zhou1;~Bo_Han1;~Xiaowen_Chu2", "gender": ";M;M;;F;;M", "homepage": ";https://yonggangzhangben.github.io/index.html;https://pprp.github.io;;https://www.comp.hkbu.edu.hk/v1/?page=profile&id=amelieczhou;;https://facultyprofiles.hkust-gz.edu.cn/faculty-personal-page/CHU-Xiaowen/xwchu", "dblp": ";27/6859-3;315/4734;;;;24/2536", "google_scholar": ";XSbEr98AAAAJ;TqS6s4gAAAAJ;;https://scholar.google.com.sg/citations?hl=en;;https://scholar.google.com.hk/citations?user=v4rX24EAAAAJ", "orcid": ";0000-0002-4080-7592;0000-0003-1952-4544;;;;0000-0001-9745-4372", "linkedin": ";;;;;;", "or_profile": "~Zhenheng_Tang2;~Yonggang_Zhang1;~Peijie_Dong1;~Yiu-ming_Cheung1;~Amelie_Chi_Zhou1;~Bo_Han1;~Xiaowen_Chu2", "aff": ";Hong Kong Baptist University;The Hong Kong University of Science and Technology (Guang Zhou);;Hong Kong Baptist University;;Hong Kong University of Science and Technology (Guangzhou)", "aff_domain": ";hkbu.edu.hk;connect.hkust-gz.edu.cn;;hkbu.edu.hk;;ust.hk", "position": ";Postdoc;Phd student;;Assistant Professor;;Full Professor", "bibtex": "@inproceedings{\ntang2024fusefl,\ntitle={Fuse{FL}: One-Shot Federated Learning through the Lens of Causality with Progressive Model Fusion},\nauthor={Zhenheng Tang and Yonggang Zhang and Peijie Dong and Yiu-ming Cheung and Amelie Chi Zhou and Bo Han and Xiaowen Chu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=E7fZOoiEKl}\n}", "github": "", "reviewers": "FVeg;twUP;V6z4;mHC6", "pdf_size": 1798473, "rating": "5;6;7;7", "confidence": "2;4;5;4", "soundness": "2;2;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;2", "wc_summary": "46;50;81;117", "wc_strengths": "45;16;45;74", "wc_weaknesses": "98;147;315;348", "wc_questions": "122;26;72;70", "wc_limitations": "1;3;37;2", "wc_review": "312;242;550;611", "wc_reply_reviewers": "21;15;73;216", "wc_reply_authors": "50;54;192;200", "reply_reviewers": "1;1;1;3", "reply_authors": "2;2;2;3", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 73.5, 28.53506614676055 ], "wc_strengths_avg": [ 45.0, 20.506096654409877 ], "wc_weaknesses_avg": [ 227.0, 106.56688040850216 ], "wc_questions_avg": [ 72.5, 33.98161267509239 ], "wc_limitations_avg": [ 10.75, 15.171931320698759 ], "wc_review_avg": [ 428.75, 155.26006408603598 ], "wc_reply_reviewers_avg": [ 81.25, 81.00115739913844 ], "wc_reply_authors_avg": [ 124.0, 72.069410986909 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.899228803025897, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4158449598745187862&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 5, "email": ";hkbu.edu.hk;connect.hkust-gz.edu.cn;;hkbu.edu.hk;;ust.hk", "author_num": 7, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "Hong Kong Baptist University;Hong Kong University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.hkbu.edu.hk;https://www.ust.hk", "aff_unique_abbr": "HKBU;HKUST", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "InfiBench: Evaluating the Question-Answering Capabilities of Code Large Language Models", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97797", "id": "E8EAeyTxOy", "proceeding": "", "pdf": "https://openreview.net/pdf?id=E8EAeyTxOy", "openreview": "https://openreview.net/forum?id=E8EAeyTxOy", "poster": "/media/PosterPDFs/NeurIPS%202024/97797.png?t=1733392180.257225", "project": "", "author_site": "Linyi Li, Shijie Geng, Zhenwen Li, Yibo He, Hao Yu, Ziyue Hua, Guanghan Ning, Siwei Wang, Tao Xie, Hongxia Yang", "tldr": "", "abstract": "Large Language Models for code (code LLMs) have witnessed tremendous progress in recent years. With the rapid development of code LLMs, many popular evaluation benchmarks, such as HumanEval, DS-1000, and MBPP, have emerged to measure the performance of code LLMs with a particular focus on code generation tasks. However, they are insufficient to cover the full range of expected capabilities of code LLMs, which span beyond code generation to answering diverse coding-related questions. To fill this gap, we propose InfiBench, the first large-scale freeform question-answering (QA) benchmark for code to our knowledge, comprising 234 carefully selected high-quality Stack Overflow questions that span across 15 programming languages. InfiBench uses four types of model-free automatic metrics to evaluate response correctness where domain experts carefully concretize the criterion for each question. We conduct a systematic evaluation for over 100 latest code LLMs on InfiBench, leading to a series of novel and insightful findings. Our detailed analyses showcase potential directions for further advancement of code LLMs. InfiBench is fully open source at https://infi-coder.github.io/infibench and continuously expanding to foster more scientific and systematic practices for code LLM evaluation.", "keywords": "Benchmark;Code LLMs;Freeform QA", "primary_area": "", "supplementary_material": "/attachment/46d94c3e42db6475844886a777492a40dabfbe37.zip", "author": "Linyi Li;Shijie Geng;Zhenwen Li;Yibo He;Hao Yu;Ziyue Hua;Guanghan Ning;Siwei Wang;Tao Xie;Hongxia Yang", "authorids": "~Linyi_Li1;~Shijie_Geng1;~Zhenwen_Li1;~Yibo_He1;~Hao_Yu11;~Ziyue_Hua1;~Guanghan_Ning1;~Siwei_Wang8;~Tao_Xie4;~Hongxia_Yang2", "gender": "M;M;M;M;;M;M;M;M;F", "homepage": "http://linyil.com;;;;;;http://guanghan.info;;https://taoxiease.github.io/;https://www4.comp.polyu.edu.hk/~hongxyang/", "dblp": "99/4340-1.html;171/3642;254/2103;;;;172/9653;;x/TaoXie;", "google_scholar": "-b0sk-YAAAAJ;wujqvGYAAAAJ;https://scholar.google.com/citations?view_op=list_works;;Ns6QRcIAAAAJ;;CcwcL6oAAAAJ;https://scholar.google.com/citations?hl=en;DhhH9J4AAAAJ;iJlC5mMAAAAJ", "orcid": ";;0009-0005-0172-8890;0009-0002-1999-436X;;0009-0007-8348-5232;0000-0002-4356-7862;;0000-0002-6731-216X;", "linkedin": ";;;;;;;siwei-wang-5289a6a4/;;", "or_profile": "~Linyi_Li1;~Shijie_Geng1;~Zhenwen_Li1;~Yibo_He1;~Hao_Yu11;~Ziyue_Hua1;~Guanghan_Ning1;~Siwei_Wang8;~Tao_Xie4;~Hongxia_Yang2", "aff": "Simon Fraser University;ByteDance Inc.;Peking University;Peking University;Peking University;Peking University;ByteDance;ByteDance Inc.;Peking University;ByteDance Inc.", "aff_domain": "sfu.ca;bytedance.com;pku.edu.cn;pku.edu.cn;pku.edu.cn;stu.pku.edu.cn;bytedance.com;bytedance.com;pku.edu.cn;bytedance.com", "position": "Assistant Professor;Researcher;PhD student;PhD student;PhD student;PhD student;Researcher;Researcher;Chair Professor;Principal Researcher", "bibtex": "@inproceedings{\nli2024infibench,\ntitle={InfiBench: Evaluating the Question-Answering Capabilities of Code Large Language Models},\nauthor={Linyi Li and Shijie Geng and Zhenwen Li and Yibo He and Hao Yu and Ziyue Hua and Guanghan Ning and Siwei Wang and Tao Xie and Hongxia Yang},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=E8EAeyTxOy}\n}", "github": "", "reviewers": "meDx;KYrR;bRXT;AyvS", "pdf_size": 3143878, "rating": "4;6;8;8", "confidence": "5;4;4;4", "wc_summary_and_contributions": "51;81;99;107", "wc_strengths": "85;2;97;123", "wc_improvement": "153;2;34;131", "wc_limitations": "101;6;1;131", "wc_correctness": "1;1;7;49", "wc_clarity": "8;6;9;21", "wc_relation_to_prior_work": "8;1;1;32", "wc_documentation": "1;1;1;25", "wc_additional_feedback": "1;1;1;1", "wc_review": "409;101;250;620", "wc_reply_reviewers": "0;14;12;18", "wc_reply_authors": "99;0;86;86", "reply_reviewers": "0;1;1;1", "reply_authors": "2;1;2;2", "rating_avg": [ 6.5, 1.6583123951777 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 84.5, 21.511624764298954 ], "wc_strengths_avg": [ 76.75, 45.29003753586433 ], "wc_improvement_avg": [ 80.0, 63.50196847342608 ], "wc_limitations_avg": [ 59.75, 57.26855594477654 ], "wc_correctness_avg": [ 14.5, 20.068632240389476 ], "wc_clarity_avg": [ 11.0, 5.873670062235365 ], "wc_relation_to_prior_work_avg": [ 10.5, 12.737739202856996 ], "wc_documentation_avg": [ 7.0, 10.392304845413264 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 345.0, 192.5370094293562 ], "wc_reply_reviewers_avg": [ 11.0, 6.708203932499369 ], "wc_reply_authors_avg": [ 67.75, 39.47388377142538 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10944967978330831135&as_sdt=20000005&sciodt=0,21&hl=en", "gs_version_total": 3, "email": "sfu.ca;bytedance.com;pku.edu.cn;pku.edu.cn;pku.edu.cn;stu.pku.edu.cn;bytedance.com;bytedance.com;pku.edu.cn;bytedance.com", "author_num": 10, "aff_unique_index": "0;1;2;2;2;2;1;1;2;1", "aff_unique_norm": "Simon Fraser University;ByteDance;Peking University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.sfu.ca;https://www.bytedance.com;http://www.pku.edu.cn", "aff_unique_abbr": "SFU;ByteDance;Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1;1;1;1;1;1", "aff_country_unique": "Canada;China" }, { "title": "Meta-Learning Universal Priors Using Non-Injective Change of Variables", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96052", "id": "E8b4yOLGZ5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=E8b4yOLGZ5", "openreview": "https://openreview.net/forum?id=E8b4yOLGZ5", "poster": "/media/PosterPDFs/NeurIPS%202024/96052.png?t=1730840992.8374348", "project": "", "author_site": "Yilang Zhang, Alireza Sadeghi, Georgios Giannakis", "tldr": "", "abstract": "Meta-learning empowers data-hungry deep neural networks to rapidly learn from merely a few samples, which is especially appealing to tasks with small datasets. Critical in this context is the *prior knowledge* accumulated from related tasks. Existing meta-learning approaches typically rely on preselected priors, such as a Gaussian probability density function (pdf). The limited expressiveness of such priors however, hinders the enhanced performance of the trained model when dealing with tasks having exceedingly scarce data. Targeting improved expressiveness, this contribution introduces a *data-driven* prior that optimally fits the provided tasks using a novel non-injective change-of-variable (NCoV) model. Unlike preselected prior pdfs with fixed shapes, the advocated NCoV model can effectively approximate a considerably wide range of pdfs. Moreover, compared to conventional change-of-variable models, the introduced NCoV exhibits augmented expressiveness for pdf modeling, especially in high-dimensional spaces. Theoretical analysis underscores the appealing universal approximation capacity of the NCoV model. Numerical experiments conducted on three few-shot learning datasets validate the superiority of data-driven priors over the prespecified ones, showcasing its pronounced effectiveness when dealing with extremely limited data resources.", "keywords": "Meta-learning;prior expressiveness;change-of-variable formula", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/38f965f929b579225f006c0226e39c74969e73cc.zip", "author": "Yilang Zhang;Alireza Sadeghi;Georgios B. Giannakis", "authorids": "~Yilang_Zhang2;~Alireza_Sadeghi1;~Georgios_B._Giannakis1", "gender": "M;M;M", "homepage": "https://zhangyilang.github.io/;;http://spincom.umn.edu/", "dblp": "254/1439;;33/4080", "google_scholar": "cGhJeOcAAAAJ;https://scholar.google.com/;Nu_6R8sAAAAJ", "orcid": "0000-0001-7109-6677;0000-0003-1280-7592;", "linkedin": "yilang-zhang-529709323/;alireza-sadeghi-96605997/;georgios-b-giannakis-54023b18/", "or_profile": "~Yilang_Zhang2;~Alireza_Sadeghi1;~Georgios_B._Giannakis1", "aff": "University of Minnesota - Twin Cities;;", "aff_domain": "umn.edu;;", "position": "PhD student;;", "bibtex": "@inproceedings{\nzhang2024metalearning,\ntitle={Meta-Learning Universal Priors Using Non-Injective Change of Variables},\nauthor={Yilang Zhang and Alireza Sadeghi and Georgios B. Giannakis},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=E8b4yOLGZ5}\n}", "github": "", "reviewers": "BS6h;VcV4;XQHx;4d6S", "pdf_size": 1044061, "rating": "5;5;6;6", "confidence": "3;3;3;5", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "2;3;3;3", "wc_summary": "92;55;80;122", "wc_strengths": "34;27;90;59", "wc_weaknesses": "137;67;81;172", "wc_questions": "4;43;22;4", "wc_limitations": "4;9;27;8", "wc_review": "271;201;300;365", "wc_reply_reviewers": "37;0;125;21", "wc_reply_authors": "19;0;275;16", "reply_reviewers": "1;0;2;1", "reply_authors": "2;1;3;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 87.25, 24.097458372201828 ], "wc_strengths_avg": [ 52.5, 24.70323865407125 ], "wc_weaknesses_avg": [ 114.25, 42.39914503855001 ], "wc_questions_avg": [ 18.25, 16.068213964221414 ], "wc_limitations_avg": [ 12.0, 8.860022573334675 ], "wc_review_avg": [ 284.25, 58.89556434910867 ], "wc_reply_reviewers_avg": [ 45.75, 47.59923843928598 ], "wc_reply_authors_avg": [ 77.5, 114.25519681835046 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:yvEo0kXnV1gJ:scholar.google.com/&scioq=Meta-Learning+Universal+Priors+Using+Non-Injective+Change+of+Variables&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "umn.edu;;", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "University of Minnesota", "aff_unique_dep": "", "aff_unique_url": "https://www.minnesota.edu", "aff_unique_abbr": "UMN", "aff_campus_unique_index": "0", "aff_campus_unique": "Twin Cities", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Distributionally Robust Performative Prediction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96051", "id": "E8wDxddIqU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=E8wDxddIqU", "openreview": "https://openreview.net/forum?id=E8wDxddIqU", "poster": "", "project": "", "author_site": "Songkai Xue, Yuekai Sun", "tldr": "", "abstract": "Performative prediction aims to model scenarios where predictive outcomes subsequently influence the very systems they target. The pursuit of a performative optimum (PO)\u2014minimizing performative risk\u2014is generally reliant on modeling of the distribution map, which characterizes how a deployed ML model alters the data distribution. Unfortunately, inevitable misspecification of the distribution map can lead to a poor approximation of the true PO. To address this issue, we introduce a novel framework of distributionally robust performative prediction and study a new solution concept termed as distributionally robust performative optimum (DRPO). We show provable guarantees for DRPO as a robust approximation to the true PO when the nominal distribution map is different from the actual one. Moreover, distributionally robust performative prediction can be reformulated as an augmented performative prediction problem, enabling efficient optimization. The experimental results demonstrate that DRPO offers potential advantages over traditional PO approach when the distribution map is misspecified at either micro- or macro-level.", "keywords": "performative prediction;distributionally robust learning;misspecification;distribution shift", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Songkai Xue;Yuekai Sun", "authorids": "~Songkai_Xue1;~Yuekai_Sun1", "gender": ";", "homepage": "http://www-personal.umich.edu/~sxue/;https://yuekai.github.io/", "dblp": "260/6635;", "google_scholar": "YZjCcnoAAAAJ;6T1XtW8AAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Songkai_Xue1;~Yuekai_Sun1", "aff": "University of Michigan;University of Michigan - Ann Arbor", "aff_domain": "umich.edu;umich.edu", "position": "PhD student;Assistant \u2192 Associate Professor of Statistics", "bibtex": "@inproceedings{\nxue2024distributionally,\ntitle={Distributionally Robust Performative Prediction},\nauthor={Songkai Xue and Yuekai Sun},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=E8wDxddIqU}\n}", "github": "", "reviewers": "Zy5f;UHwo;bCrg;67u7", "pdf_size": 664393, "rating": "4;6;6;8", "confidence": "2;4;4;5", "soundness": "2;3;4;4", "novelty": "2;3;3;4", "presentation": "2;3;2;4", "wc_summary": "79;208;131;161", "wc_strengths": "78;68;19;97", "wc_weaknesses": "165;83;619;60", "wc_questions": "74;59;32;104", "wc_limitations": "13;8;28;9", "wc_review": "409;426;829;431", "wc_reply_reviewers": "0;0;58;19", "wc_reply_authors": "39;39;33;16", "reply_reviewers": "0;0;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 144.75, 46.84215515964226 ], "wc_strengths_avg": [ 65.5, 28.79670119996386 ], "wc_weaknesses_avg": [ 231.75, 226.9596605126118 ], "wc_questions_avg": [ 67.25, 26.013217794036937 ], "wc_limitations_avg": [ 14.5, 8.0156097709407 ], "wc_review_avg": [ 523.75, 176.42473607745598 ], "wc_reply_reviewers_avg": [ 19.25, 23.678840765544244 ], "wc_reply_authors_avg": [ 31.75, 9.41740410091868 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.9733285267845752, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:dvVAAzEpMFYJ:scholar.google.com/&scioq=Distributionally+Robust+Performative+Prediction&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "umich.edu;umich.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Michigan", "aff_unique_dep": "", "aff_unique_url": "https://www.umich.edu", "aff_unique_abbr": "UM", "aff_campus_unique_index": "1", "aff_campus_unique": ";Ann Arbor", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "TGB 2.0: A Benchmark for Learning on Temporal Knowledge Graphs and Heterogeneous Graphs", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97796", "id": "EADRzNJFn1", "proceeding": "", "pdf": "https://openreview.net/pdf?id=EADRzNJFn1", "openreview": "https://openreview.net/forum?id=EADRzNJFn1", "poster": "/media/PosterPDFs/NeurIPS%202024/97796.png?t=1731354701.0660458", "project": "", "author_site": "Julia Gastinger, Shenyang Huang, Michael Galkin, Erfan Loghmani, Ali Parviz, Farimah Poursafaei, Jacob Danovitch, Emanuele Rossi, Ioannis Koutis, Heiner Stuckenschmidt, Reihaneh Rabbany, Guillaume Rabusseau", "tldr": "", "abstract": "Multi-relational temporal graphs are powerful tools for modeling real-world data, capturing the evolving and interconnected nature of entities over time. Recently, many novel models are proposed for ML on such graphs intensifying the need for robust evaluation and standardized benchmark datasets. However, the availability of such resources remains scarce and evaluation faces added complexity due to reproducibility issues in experimental protocols. To address these challenges, we introduce Temporal Graph Benchmark 2.0 (TGB 2.0), a novel benchmarking framework tailored for evaluating methods for predicting future links on Temporal Knowledge Graphs and Temporal Heterogeneous Graphs with a focus on large-scale datasets, extending the Temporal Graph Benchmark. TGB 2.0 facilitates comprehensive evaluations by presenting eight novel datasets spanning five domains with up to 53 million edges. TGB 2.0 datasets are significantly larger\nthan existing datasets in terms of number of nodes, edges, or timestamps. In addition, TGB 2.0 provides a reproducible and realistic evaluation pipeline for multi-relational temporal graphs. Through extensive experimentation, we observe that 1) leveraging edge-type information is crucial to obtain high performance, 2) simple heuristic baselines are often competitive with more complex methods, 3) most methods fail to run on our largest datasets, highlighting the need for research on more scalable methods.", "keywords": "Temporal Knowledge Graph; Temporal Heterogeneous Graph; Multi-relational Learning; Graph Representation Learning; Temporal Graphs", "primary_area": "", "supplementary_material": "/attachment/b0793550db609f19eb15f98cbedbc5ab53a08d27.zip", "author": "Julia Gastinger;Shenyang Huang;Mikhail Galkin;Erfan Loghmani;Ali Parviz;Farimah Poursafaei;Jacob Danovitch;Emanuele Rossi;Ioannis Koutis;Heiner Stuckenschmidt;Reihaneh Rabbany;Guillaume Rabusseau", "authorids": "~Julia_Gastinger1;~Shenyang_Huang1;~Mikhail_Galkin1;~Erfan_Loghmani1;~Ali_Parviz1;~Farimah_Poursafaei1;~Jacob_Danovitch1;~Emanuele_Rossi1;~Ioannis_Koutis1;~Heiner_Stuckenschmidt2;~Reihaneh_Rabbany1;~Guillaume_Rabusseau1", "gender": "F;;M;;;F;;M;;M;F;M", "homepage": ";;https://migalkin.github.io/;https://erfanloghmani.github.io/;;;http://jacobdanovitch.me/;https://www.emanuelerossi.co.uk/;https://web.njit.edu/~ikoutis/;https://www.uni-mannheim.de/dws/people/professors/prof-dr-heiner-stuckenschmidt/;http://www.reirab.com/;https://www-labs.iro.umontreal.ca/~grabus/", "dblp": "239/5124;;160/8154;;;277/0215;251/8947;;;https://dblp.uni-trier.de/pers/s/Stuckenschmidt:Heiner;94/9024;143/7327", "google_scholar": "UgrQkB4AAAAJ;;yfYRbG4AAAAJ;1y_jXb8AAAAJ;;https://scholar.google.ca/citations?user=gZ7HEsMAAAAJ;;DHlkBOYAAAAJ;;oqpT1YUAAAAJ;https://scholar.google.ca/citations?user=Foh_c-QAAAAJ;https://scholar.google.fr/citations?user=t2i4V4EAAAAJ", "orcid": "0000-0003-1914-6723;;;;;;;;;0000-0002-0209-3859;;", "linkedin": "julia-gastinger-a6ab0b177;;;;;farimah-poursafaei-133195167/?originalSubdomain=ca;;;;;;", "or_profile": "~Julia_Gastinger1;~Shenyang_Huang1;~Mikhail_Galkin1;~Erfan_Loghmani1;~Ali_Parviz1;~Farimah_Poursafaei1;~Jacob_Danovitch1;~Emanuele_Rossi1;~Ioannis_Koutis1;~Heiner_Stuckenschmidt2;~Reihaneh_Rabbany1;~Guillaume_Rabusseau1", "aff": "NEC;;Intel;University of Washington;;Mila, Quebec AI Institute;McGill University;Imperial College London;New Jersey Institute of Technology;University of Mannheim;Montreal Institute for Learning Algorithms, University of Montreal, University of Montreal;Universit\u00e9 de Montr\u00e9al", "aff_domain": "neclab.eu;;intel.com;uw.edu;;mila.quebec;mcgill.ca;ic.ac.uk;njit.edu;uni-mannheim.de;mila.umontreal.ca;umontreal.ca", "position": "Researcher;;Researcher;PhD student;;Postdoc;MS student;PhD student;Associate Professor;Full Professor;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\ngastinger2024tgb,\ntitle={{TGB} 2.0: A Benchmark for Learning on Temporal Knowledge Graphs and Heterogeneous Graphs},\nauthor={Julia Gastinger and Shenyang Huang and Mikhail Galkin and Erfan Loghmani and Ali Parviz and Farimah Poursafaei and Jacob Danovitch and Emanuele Rossi and Ioannis Koutis and Heiner Stuckenschmidt and Reihaneh Rabbany and Guillaume Rabusseau},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=EADRzNJFn1}\n}", "github": "", "reviewers": "ADfV;NhyZ;pUeu;AdXk", "pdf_size": 679879, "rating": "5;7;7;7", "confidence": "4;3;3;4", "wc_summary_and_contributions": "203;107;67;67", "wc_strengths": "220;61;56;22", "wc_improvement": "87;21;37;88", "wc_limitations": "5;59;43;1", "wc_correctness": "1;31;35;1", "wc_clarity": "1;28;33;1", "wc_relation_to_prior_work": "1;42;22;1", "wc_documentation": "1;31;36;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "520;381;330;183", "wc_reply_reviewers": "0;24;26;0", "wc_reply_authors": "32;19;41;31", "reply_reviewers": "0;1;1;0", "reply_authors": "2;2;2;2", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "wc_summary_and_contributions_avg": [ 111.0, 55.569775957799216 ], "wc_strengths_avg": [ 89.75, 76.68238063597138 ], "wc_improvement_avg": [ 58.25, 29.794084983432533 ], "wc_limitations_avg": [ 27.0, 24.698178070456937 ], "wc_correctness_avg": [ 17.0, 16.06237840420901 ], "wc_clarity_avg": [ 15.75, 14.85555451674558 ], "wc_relation_to_prior_work_avg": [ 16.5, 17.03672503740082 ], "wc_documentation_avg": [ 17.25, 16.345871038277526 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 353.5, 120.52074510224371 ], "wc_reply_reviewers_avg": [ 12.5, 12.519984025548915 ], "wc_reply_authors_avg": [ 30.75, 7.8222439235810075 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8326764636893174075&as_sdt=10005&sciodt=0,8&hl=en", "gs_version_total": 5, "email": "neclab.eu;;intel.com;uw.edu;;mila.quebec;mcgill.ca;ic.ac.uk;njit.edu;uni-mannheim.de;mila.umontreal.ca;umontreal.ca", "author_num": 12, "aff_unique_index": "0;1;2;3;4;5;6;7;8;9", "aff_unique_norm": "NEC Corporation;Intel;University of Washington;Quebec AI Institute;McGill University;Imperial College London;New Jersey Institute of Technology;University of Mannheim;University of Montreal;Universit\u00e9 de Montr\u00e9al", "aff_unique_dep": ";Intel Corporation;;AI Institute;;;;;Montreal Institute for Learning Algorithms;", "aff_unique_url": "https://www.nec.com;https://www.intel.com;https://www.washington.edu;https://www.mila.quebec;https://www.mcgill.ca;https://www.imperial.ac.uk;https://www.njit.edu;https://www.uni-mannheim.de;https://www.umontreal.ca;https://www.umontreal.ca", "aff_unique_abbr": "NEC;Intel;UW;Mila;McGill;ICL;NJIT;UM;UM;UdeM", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Quebec;Montreal", "aff_country_unique_index": "0;1;1;2;2;3;1;4;2;2", "aff_country_unique": "Japan;United States;Canada;United Kingdom;Germany" }, { "title": "A Theory of Optimistically Universal Online Learnability for General Concept Classes", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96050", "id": "EAbNopo3os", "proceeding": "", "pdf": "https://openreview.net/pdf?id=EAbNopo3os", "openreview": "https://openreview.net/forum?id=EAbNopo3os", "poster": "", "project": "", "author_site": "Steve Hanneke, Hongao Wang", "tldr": "", "abstract": "We provide a full characterization of the concept classes that are optimistically universally online learnable with {0, 1} labels. The notion of optimistically universal online learning was defined in [Hanneke, 2021] in order to understand learnability under minimal assumptions. In this paper, following the philosophy behind that work, we investigate two questions, namely, for every concept class: (1) What are the minimal assumptions on the data process admitting online learnability? (2) Is there a learning algorithm which succeeds under every data process satisfying the minimal assumptions? Such an algorithm is said to be optimistically universal for the given concept class. We resolve both of these questions for all concept classes, and moreover, as part of our solution we design general learning algorithms for each case. Finally, we extend these algorithms and results to the agnostic case, showing an equivalence between the minimal assumptions on the data process for learnability in the agnostic and realizable cases, for every concept class, as well as the equivalence of optimistically universal learnability.", "keywords": "Online Learning;Statistical Learning;Consistency", "primary_area": "learning_theory", "supplementary_material": "", "author": "Steve Hanneke;Hongao Wang", "authorids": "~Steve_Hanneke1;~Hongao_Wang1", "gender": "M;M", "homepage": "http://www.stevehanneke.com;https://phijack.github.io/", "dblp": "40/154;", "google_scholar": "fEhNO7YAAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Steve_Hanneke1;~Hongao_Wang1", "aff": "Purdue University;Purdue University", "aff_domain": "purdue.edu;purdue.edu", "position": "Assistant Professor;PhD student", "bibtex": "@inproceedings{\nhanneke2024a,\ntitle={A Theory of Optimistically Universal Online Learnability for General Concept Classes},\nauthor={Steve Hanneke and Hongao Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=EAbNopo3os}\n}", "github": "", "reviewers": "jKRy;vKfY;MwJp", "pdf_size": 419923, "rating": "4;7;8", "confidence": "2;3;3", "soundness": "3;3;4", "novelty": "3;3;3", "presentation": "2;3;2", "wc_summary": "157;137;296", "wc_strengths": "31;38;63", "wc_weaknesses": "10;50;19", "wc_questions": "34;15;41", "wc_limitations": "17;13;2", "wc_review": "249;253;421", "wc_reply_reviewers": "74;15;19", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.333333333333333, 1.699673171197595 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 196.66666666666666, 70.71224944959836 ], "wc_strengths_avg": [ 44.0, 13.73559851869101 ], "wc_weaknesses_avg": [ 26.333333333333332, 17.13346303452853 ], "wc_questions_avg": [ 30.0, 10.98483803552272 ], "wc_limitations_avg": [ 10.666666666666666, 6.342099196813483 ], "wc_review_avg": [ 307.6666666666667, 80.15540461434206 ], "wc_reply_reviewers_avg": [ 36.0, 26.919633479426622 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.9707253433941508, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Tftg86gsvSsJ:scholar.google.com/&scioq=A+Theory+of+Optimistically+Universal+Online+Learnability+for+General+Concept+Classes&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": "purdue.edu;purdue.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Purdue University", "aff_unique_dep": "", "aff_unique_url": "https://www.purdue.edu", "aff_unique_abbr": "Purdue", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Efficient Streaming Algorithms for Graphlet Sampling", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96049", "id": "EC9Hfi9V3k", "proceeding": "", "pdf": "https://openreview.net/pdf?id=EC9Hfi9V3k", "openreview": "https://openreview.net/forum?id=EC9Hfi9V3k", "poster": "/media/PosterPDFs/NeurIPS%202024/96049.png?t=1731470841.3133492", "project": "", "author_site": "Yann Bourreau, Marco Bressan, T-H. Hubert Chan, Qipeng Kuang, Mauro Sozio", "tldr": "", "abstract": "Given a graph $G$ and a positive integer $k$, the Graphlet Sampling problem asks to sample a connected induced $k$-vertex subgraph of $G$ uniformly at random.\nGraphlet sampling enhances machine learning applications by transforming graph structures into feature vectors for tasks such as graph classification and subgraph identification, boosting neural network performance, and supporting clustered federated learning by capturing local structures and relationships.\nA recent work has shown that the problem admits an algorithm that preprocesses $G$ in time $O(nk^2 \\log k + m)$, and draws one sample in expected time $k^{O(k)} \\log n$, where $n=|V(G)|$ and $m=|E(G)|$. Such an algorithm relies on the assumption that the input graph fits into main memory and it does not seem to be straightforward to adapt it to very large graphs. We consider Graphlet Sampling in the semi-streaming setting, where we have a memory of $M = \\Omega(n \\log n)$ words, and $G$ can be only read through sequential passes over the edge list. We develop a semi-streaming algorithm that preprocesses $G$ in $p={O}(\\log n)$ passes and samples $\\Theta(M k^{-O(k)})$ independent uniform $k$-graphlets in $O(k)$ passes. For constant $k$, both phases run in time $O((n+m)\\log n)$. We also show that the tradeoff between memory and number of passes of our algorithms is near-optimal. Our extensive evaluation on very large graphs shows the effectiveness of our algorithms.", "keywords": "graphlet sampling;streaming;approximation algorithms", "primary_area": "other", "supplementary_material": "/attachment/3b59b68a9a8bd85205d204b2a37e66541236de3a.zip", "author": "Yann Bourreau;Marco Bressan;T-H. Hubert Chan;Qipeng Kuang;Mauro Sozio", "authorids": "~Yann_Bourreau1;~Marco_Bressan4;~T-H._Hubert_Chan1;~Qipeng_Kuang1;~Mauro_Sozio2", "gender": "M;M;M;M;", "homepage": "https://cispa.de/en/people/c01yabo;https://sites.google.com/view/marco-bressan/home;http://kqp.world/;https://sites.google.com/site/maurosozio/home;https://i.cs.hku.hk/~hubert/", "dblp": ";b/MarcoBressan2;383/8444;72/3698.html;c/THHubertChan", "google_scholar": ";https://scholar.google.it/citations?user=8Rh17n8AAAAJ;https://scholar.google.com/citations?hl=zh-CN;xvH0eIsAAAAJ;", "orcid": ";0000-0001-5211-2264;0009-0007-2396-8312;;", "linkedin": ";;;;", "or_profile": "~Yann_Bourreau1;~Marco_Bressan4;~Qipeng_Kuang1;~Mauro_Sozio2;~Hubert_Chan1", "aff": "CISPA Helmholtz Center for Information Security;University of Milan;University of Hong Kong;;The University of Hong Kong", "aff_domain": "cispa.de;unimi.it;hku.hk;;hku.hk", "position": "PhD student;Assistant Professor;PhD student;;Associate Professor", "bibtex": "@inproceedings{\nbourreau2024efficient,\ntitle={Efficient Streaming Algorithms for Graphlet Sampling},\nauthor={Yann Bourreau and Marco Bressan and T-H. Hubert Chan and Qipeng Kuang and Mauro Sozio},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=EC9Hfi9V3k}\n}", "github": "", "reviewers": "5Ka2;T1eM;NpXf", "pdf_size": 549386, "rating": "5;5;7", "confidence": "4;3;3", "soundness": "4;3;3", "novelty": "2;2;3", "presentation": "4;3;2", "wc_summary": "91;314;61", "wc_strengths": "65;31;113", "wc_weaknesses": "177;43;55", "wc_questions": "17;26;46", "wc_limitations": "33;1;1", "wc_review": "383;415;276", "wc_reply_reviewers": "67;136;5", "wc_reply_authors": "0;50;0", "reply_reviewers": "1;2;1", "reply_authors": "1;2;1", "rating_avg": [ 5.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 155.33333333333334, 112.86077952750263 ], "wc_strengths_avg": [ 69.66666666666667, 33.6386021641143 ], "wc_weaknesses_avg": [ 91.66666666666667, 60.53832578531462 ], "wc_questions_avg": [ 29.666666666666668, 12.119772641798562 ], "wc_limitations_avg": [ 11.666666666666666, 15.084944665313014 ], "wc_review_avg": [ 358.0, 59.43624034767565 ], "wc_reply_reviewers_avg": [ 69.33333333333333, 53.50597059103674 ], "wc_reply_authors_avg": [ 16.666666666666668, 23.570226039551585 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.49999999999999983, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Aqe-PI4By1UJ:scholar.google.com/&scioq=Efficient+Streaming+Algorithms+for+Graphlet+Sampling&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "cispa.de;unimi.it;hku.hk;;hku.hk", "author_num": 5, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "CISPA Helmholtz Center for Information Security;University of Milan;University of Hong Kong", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cispa.de/;https://www.unimi.it;https://www.hku.hk", "aff_unique_abbr": "CISPA;UniMi;HKU", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;2;2", "aff_country_unique": "Germany;Italy;China" }, { "title": "Infer Induced Sentiment of Comment Response to Video: A New Task, Dataset and Baseline", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97795", "id": "EEwb201bnO", "proceeding": "", "pdf": "https://openreview.net/pdf?id=EEwb201bnO", "openreview": "https://openreview.net/forum?id=EEwb201bnO", "poster": "", "project": "", "author_site": "qi jia, baoyu, Cong Xu, Lu Liu, Liang Jin, Guoguang Du, Zhenhua Guo, Yaqian Zhao, Xuanjing Huang, Rengang Li", "tldr": "", "abstract": "Existing video multi-modal sentiment analysis mainly focuses on the sentiment expression of people within the video, yet often neglects the induced sentiment of viewers while watching the videos. Induced sentiment of viewers is essential for inferring the public response to videos and has broad application in analyzing public societal sentiment, effectiveness of advertising and other areas. The micro videos and the related comments provide a rich application scenario for viewers\u2019 induced sentiment analysis. In light of this, we introduces a novel research task, Multimodal Sentiment Analysis for Comment Response of Video Induced(MSA-CRVI), aims to infer opinions and emotions according to comments response to micro video. Meanwhile, we manually annotate a dataset named Comment Sentiment toward to Micro Video (CSMV) to support this research. It is the largest video multi-modal sentiment dataset in terms of scale and video duration to our knowledge, containing 107, 267 comments and 8, 210 micro videos with a video duration of 68.83 hours. To infer the induced sentiment of comment should leverage the video content, we propose the Video Content-aware Comment Sentiment Analysis (VC-CSA) method as a baseline to address the challenges inherent in this new task. Extensive experiments demonstrate that our method is showing significant improvements over other established baselines. We make the dataset and source code publicly available at https://github.com/IEIT-AGI/MSA-CRVI.", "keywords": "Multi-Modal; Micro Video; Sentiment Analysis; Emotion", "primary_area": "", "supplementary_material": "/attachment/233e1edba02b78bb40608fb7bdaa7482f53e10d3.pdf", "author": "Qi Jia;Baoyu Fan;Cong Xu;Lu Liu;Liang Jin;Guoguang Du;Zhenhua Guo;Yaqian Zhao;Xuanjing Huang;Rengang Li", "authorids": "~Qi_Jia4;~Baoyu_Fan1;~Cong_Xu3;~Lu_Liu13;~Liang_Jin1;~Guoguang_Du1;~Zhenhua_Guo5;~Yaqian_Zhao1;~Xuanjing_Huang1;~Rengang_Li1", "gender": "M;M;;F;;;;F;F;M", "homepage": ";;;;;https://georgedu.github.io/;;;https://xuanjing-huang.github.io/;", "dblp": "69/1921-4;218/2727;;31/2088-9;;156/3664;41/294-3;171/5841;05/6735-1;262/1327.html", "google_scholar": ";;;;;Yr_OJbYAAAAJ;vBluvlIAAAAJ;;RGsMgZA4H78C;", "orcid": "0000-0003-0481-4311;;;0000-0002-1303-9196;;;0000-0002-1303-6681;0000-0002-9170-0090;0000-0001-9197-9426;", "linkedin": ";;;;;;;;;", "or_profile": "~Qi_Jia4;~Baoyu_Fan1;~Cong_Xu3;~Lu_Liu13;~Liang_Jin1;~Guoguang_Du1;~Zhenhua_Guo5;~Yaqian_Zhao1;~Xuanjing_Huang1;~Rengang_Li1", "aff": "IEIT SYSTEMS Co., Ltd.;IEIT SYSTEMS CO.,LTD;;IEIT SYSTEMS CO.,LTD;IEIT SYSTEMS CO.,LTD;IEIT SYSTEMS CO.,LTD;IEIT SYSTEMS CO.,LTD;IEIT SYSTEMS Co., Ltd.;Fudan University;IEIT SYSTEMS CO.,LTD", "aff_domain": "ieisystem.com;ieisystem.com;;ieisystem.com;ieisystem.com;ieisystem.com;ieisystem.com;ieisystem.com;fudan.edu.cn;ieisystem.com", "position": "Researcher;Researcher;;Researcher;Researcher;Researcher;Principal Researcher;Researcher;Full Professor;Researcher", "bibtex": "@inproceedings{\njia2024infer,\ntitle={Infer Induced Sentiment of Comment Response to Video: A New Task, Dataset and Baseline},\nauthor={Qi Jia and Baoyu Fan and Cong Xu and Lu Liu and Liang Jin and Guoguang Du and Zhenhua Guo and Yaqian Zhao and Xuanjing Huang and Rengang Li},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=EEwb201bnO}\n}", "github": "", "reviewers": "6Nuc;scYW;6nAr;557t", "pdf_size": 990239, "rating": "6;6;6;8", "confidence": "4;3;2;5", "wc_summary_and_contributions": "106;102;94;182", "wc_strengths": "82;5;29;44", "wc_improvement": "374;26;121;87", "wc_limitations": "54;15;5;1", "wc_correctness": "10;1;12;1", "wc_clarity": "27;4;7;1", "wc_relation_to_prior_work": "51;1;9;1", "wc_documentation": "67;1;8;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "772;156;286;319", "wc_reply_reviewers": "0;0;0;199", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;2", "reply_authors": "1;1;1;2", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "wc_summary_and_contributions_avg": [ 121.0, 35.482389998420345 ], "wc_strengths_avg": [ 40.0, 27.955321496988727 ], "wc_improvement_avg": [ 152.0, 132.614101814249 ], "wc_limitations_avg": [ 18.75, 20.980645843252777 ], "wc_correctness_avg": [ 6.0, 5.049752469181039 ], "wc_clarity_avg": [ 9.75, 10.18270592720815 ], "wc_relation_to_prior_work_avg": [ 15.5, 20.75451758051726 ], "wc_documentation_avg": [ 19.25, 27.716195626384224 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 383.25, 232.5697476027353 ], "wc_reply_reviewers_avg": [ 49.75, 86.16952767655164 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.7745966692414834, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:_F-0pXc0M7AJ:scholar.google.com/&scioq=Infer+Induced+Sentiment+of+Comment+Response+to+Video:+A+New+Task,+Dataset+and+Baseline&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "ieisystem.com;ieisystem.com;;ieisystem.com;ieisystem.com;ieisystem.com;ieisystem.com;ieisystem.com;fudan.edu.cn;ieisystem.com", "author_num": 10, "aff_unique_index": "0;1;1;1;1;1;0;2;1", "aff_unique_norm": "IEIT SYSTEMS;IEIT SYSTEMS CO., LTD;Fudan University", "aff_unique_dep": ";;", "aff_unique_url": ";;https://www.fudan.edu.cn", "aff_unique_abbr": ";;Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China;" }, { "title": "Muscles in Time: Learning to Understand Human Motion In-Depth by Simulating Muscle Activations", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97794", "id": "EFV7fLZRWO", "proceeding": "", "pdf": "https://openreview.net/pdf?id=EFV7fLZRWO", "openreview": "https://openreview.net/forum?id=EFV7fLZRWO", "poster": "/media/PosterPDFs/NeurIPS%202024/97794.png?t=1733269881.7143395", "project": "", "author_site": "David Schneider, Simon Rei\u00df, Marco Kugler, Alexander Jaus, Kunyu Peng, Susanne Sutschet, M. Saquib Sarfraz, Sven Matthiesen, Rainer Stiefelhagen", "tldr": "", "abstract": "Exploring the intricate dynamics between muscular and skeletal structures is pivotal for understanding human motion. This domain presents substantial challenges, primarily attributed to the intensive resources required for acquiring ground truth muscle activation data, resulting in a scarcity of datasets.\nIn this work, we address this issue by establishing Muscles in Time (MinT), a large-scale synthetic muscle activation dataset.\nFor the creation of MinT, we enriched existing motion capture datasets by incorporating muscle activation simulations derived from biomechanical human body models using the OpenSim platform, a common framework used in biomechanics and human motion research.\nStarting from simple pose sequences, our pipeline enables us to extract detailed information about the timing of muscle activations within the human musculoskeletal system.\nMuscles in Time contains over nine hours of simulation data covering 227 subjects and 402 simulated muscle strands. \nWe demonstrate the utility of this dataset by presenting results on neural network-based muscle activation estimation from human pose sequences with two different sequence-to-sequence architectures.", "keywords": "Muscle Activation Estimation;Synthetic Data;Human Motion", "primary_area": "", "supplementary_material": "/attachment/9c04e9915976c20b5c4ece20982eef2857ba2a4b.pdf", "author": "David Schneider;Simon Rei\u00df;Marco Kugler;Alexander Jaus;Kunyu Peng;Susanne Sutschet;M. Saquib Sarfraz;Sven Matthiesen;Rainer Stiefelhagen", "authorids": "~David_Schneider4;~Simon_Rei\u00df1;~Marco_Kugler1;~Alexander_Jaus1;~Kunyu_Peng1;~Susanne_Sutschet1;~M._Saquib_Sarfraz1;~Sven_Matthiesen1;~Rainer_Stiefelhagen1", "gender": "M;M;M;M;F;F;M;M;M", "homepage": "https://simplexsigil.github.io/;;;;;https://www.ipek.kit.edu/21_10594.php;https://ssarfraz.github.io/;https://www.ipek.kit.edu/21_425.php;https://cvhci.anthropomatik.kit.edu/people_596.php", "dblp": ";259/5361;;;292/4197;;12/1561;;31/4699", "google_scholar": "https://scholar.google.com/citations?hl=de;pAiIxxkAAAAJ;;;pA9c0YsAAAAJ;;https://scholar.google.de/citations?user=4YLsmYIAAAAJ;https://scholar.google.de/citations?user=75P3ny0AAAAJ;SFCOJxMAAAAJ", "orcid": "0000-0002-3272-2337;0000-0003-1953-6211;;0000-0002-0669-0300;0000-0002-5419-9292;;0000-0002-1271-0005;0000-0001-5978-694X;0000-0001-8046-4945", "linkedin": "david-schneider-60a4941b4/;simon-rei%C3%9F-b534581b3?lipi=urn%3Ali%3Apage%3Ad_flagship3_profile_view_base_contact_details%3Bt3wofSkTQg%2BBvK8sDcGJbg%3D%3D;marco-kugler/;;;susanne-sutschet-681109120/?originalSubdomain=de;saquib-sarfraz-6395783a/;sven-matthiesen/?originalSubdomain=de;", "or_profile": "~David_Schneider4;~Simon_Rei\u00df1;~Marco_Kugler1;~Alexander_Jaus1;~Kunyu_Peng1;~Susanne_Sutschet1;~M._Saquib_Sarfraz1;~Sven_Matthiesen1;~Rainer_Stiefelhagen1", "aff": "Sony Europe Ltd.;Karlsruher Institut f\u00fcr Technologie;Karlsruher Institut f\u00fcr Technologie;Karlsruher Institut f\u00fcr Technologie;Karlsruher Institut f\u00fcr Technologie;Karlsruher Institut f\u00fcr Technologie;Karlsruher Institut f\u00fcr Technologie;Karlsruher Institut f\u00fcr Technologie;Karlsruhe Institute of Technology", "aff_domain": "sony.com;kit.edu;kit.edu;kit.edu;kit.edu;kit.edu;kit.edu;kit.edu;kit.edu", "position": "Intern;Postdoc;MS student;PhD student;Postdoc;PhD student;Lecturer;Full Professor;Full Professor", "bibtex": "@inproceedings{\nschneider2024muscles,\ntitle={Muscles in Time: Learning to Understand Human Motion In-Depth by Simulating Muscle Activations},\nauthor={David Schneider and Simon Rei{\\ss} and Marco Kugler and Alexander Jaus and Kunyu Peng and Susanne Sutschet and M. Saquib Sarfraz and Sven Matthiesen and Rainer Stiefelhagen},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=EFV7fLZRWO}\n}", "github": "", "reviewers": "6vHW;kyb6;Bfvp;pwPd", "pdf_size": 8773391, "rating": "5;6;8;9", "confidence": "4;4;4;4", "wc_summary_and_contributions": "94;38;52;77", "wc_strengths": "65;15;27;120", "wc_improvement": "48;14;219;78", "wc_limitations": "72;6;64;27", "wc_correctness": "23;4;37;18", "wc_clarity": "24;47;6;21", "wc_relation_to_prior_work": "23;137;26;27", "wc_documentation": "26;1;37;9", "wc_additional_feedback": "1;1;1;1", "wc_review": "376;263;469;378", "wc_reply_reviewers": "0;15;33;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;0", "reply_authors": "1;2;0;1", "rating_avg": [ 7.0, 1.5811388300841898 ], "confidence_avg": [ 4.0, 0.0 ], "wc_summary_and_contributions_avg": [ 65.25, 21.695333599647643 ], "wc_strengths_avg": [ 56.75, 40.91683638797115 ], "wc_improvement_avg": [ 89.75, 77.9819690697792 ], "wc_limitations_avg": [ 42.25, 26.947866334832522 ], "wc_correctness_avg": [ 20.5, 11.800423721205947 ], "wc_clarity_avg": [ 24.5, 14.67140075112121 ], "wc_relation_to_prior_work_avg": [ 53.25, 48.37548449369784 ], "wc_documentation_avg": [ 18.25, 14.095655359010449 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 371.5, 73.04279567486448 ], "wc_reply_reviewers_avg": [ 12.0, 13.583077707206124 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.7071067811865476 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1470622464491742493&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "sony.com;kit.edu;kit.edu;kit.edu;kit.edu;kit.edu;kit.edu;kit.edu;kit.edu", "author_num": 9, "aff_unique_index": "0;1;1;1;1;1;1;1;2", "aff_unique_norm": "Sony Europe;Karlsruher Institut f\u00fcr Technologie;Karlsruhe Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.sony.eu;https://www.kit.edu;https://www.kit.edu", "aff_unique_abbr": "Sony Europe;KIT;KIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1;1;1;1;1", "aff_country_unique": "United Kingdom;Germany" }, { "title": "Training an Open-Vocabulary Monocular 3D Detection Model without 3D Data", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96048", "id": "EFkw0OgZOr", "proceeding": "", "pdf": "https://openreview.net/pdf?id=EFkw0OgZOr", "openreview": "https://openreview.net/forum?id=EFkw0OgZOr", "poster": "/media/PosterPDFs/NeurIPS%202024/96048.png?t=1731701880.320096", "project": "", "author_site": "Rui Huang, Henry Zheng, Yan Wang, Zhuofan Xia, Marco Pavone, Gao Huang", "tldr": "", "abstract": "Open-vocabulary 3D object detection has recently attracted considerable attention due to its broad applications in autonomous driving and robotics, which aims to effectively recognize novel classes in previously unseen domains. However, existing point cloud-based open-vocabulary 3D detection models are limited by their high deployment costs. In this work, we propose a novel open-vocabulary monocular 3D object detection framework, dubbed OVM3D-Det, which trains detectors using only RGB images, making it both cost-effective and scalable to publicly available data. Unlike traditional methods, OVM3D-Det does not require high-precision LiDAR or 3D sensor data for either input or generating 3D bounding boxes. Instead, it employs open-vocabulary 2D models and pseudo-LiDAR to automatically label 3D objects in RGB images, fostering the learning of open-vocabulary monocular 3D detectors. However, training 3D models with labels directly derived from pseudo-LiDAR is inadequate due to imprecise boxes estimated from noisy point clouds and severely occluded objects. To address these issues, we introduce two innovative designs: adaptive pseudo-LiDAR erosion and bounding box refinement with prior knowledge from large language models. These techniques effectively calibrate the 3D labels and enable RGB-only training for 3D detectors. Extensive experiments demonstrate the superiority of OVM3D-Det over baselines in both indoor and outdoor scenarios. The code will be released.", "keywords": "Monocular 3D Detection;Open-Vocabulary 3D Detection", "primary_area": "machine_vision", "supplementary_material": "", "author": "Rui Huang;Henry Zheng;Yan Wang;Zhuofan Xia;Marco Pavone;Gao Huang", "authorids": "~Rui_Huang9;~Henry_Zheng1;~Yan_Wang10;~Zhuofan_Xia2;~Marco_Pavone1;~Gao_Huang1", "gender": ";M;M;;M;M", "homepage": ";;https://www.cs.cornell.edu/~yanwang/;;https://web.stanford.edu/~pavone/;http://www.gaohuang.net", "dblp": ";303/2237;59/2227;;91/3382-1.html;", "google_scholar": ";gZCggycAAAAJ;nZsD8XwAAAAJ;;RhOpyXcAAAAJ;-P9LwcgAAAAJ", "orcid": ";0009-0002-8657-8235;;;;", "linkedin": ";henry-zheng-70ab2aa1/;;;;", "or_profile": "~Rui_Huang9;~Henry_Zheng1;~Yan_Wang10;~Zhuofan_Xia2;~Marco_Pavone1;~Gao_Huang1", "aff": ";Tsinghua University;NVIDIA;;Stanford University;Tsinghua University", "aff_domain": ";tsinghua.edu.cn;nvidia.com;;stanford.edu;tsinghua.edu.cn", "position": ";PhD student;Researcher;;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nhuang2024training,\ntitle={Training an Open-Vocabulary Monocular 3D Detection Model without 3D Data},\nauthor={Rui Huang and Henry Zheng and Yan Wang and Zhuofan Xia and Marco Pavone and Gao Huang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=EFkw0OgZOr}\n}", "github": "", "reviewers": "2uq9;Rjjg;UAne;DUxk", "pdf_size": 2437656, "rating": "5;5;6;6", "confidence": "3;5;5;5", "soundness": "3;3;3;2", "novelty": "3;3;3;2", "presentation": "3;3;3;3", "wc_summary": "60;40;79;47", "wc_strengths": "35;37;107;67", "wc_weaknesses": "62;181;135;236", "wc_questions": "92;6;171;272", "wc_limitations": "10;1;11;20", "wc_review": "259;265;503;642", "wc_reply_reviewers": "18;33;105;11", "wc_reply_authors": "140;64;186;60", "reply_reviewers": "1;1;2;1", "reply_authors": "3;2;3;3", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.5, 0.8660254037844386 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 56.5, 14.84082207965583 ], "wc_strengths_avg": [ 61.5, 29.16761903207048 ], "wc_weaknesses_avg": [ 153.5, 63.79067329947224 ], "wc_questions_avg": [ 135.25, 98.1768175283758 ], "wc_limitations_avg": [ 10.5, 6.726812023536855 ], "wc_review_avg": [ 417.25, 162.85634006694366 ], "wc_reply_reviewers_avg": [ 41.75, 37.37228250990298 ], "wc_reply_authors_avg": [ 112.5, 53.073062847361655 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13761322392805296131&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 0, "email": ";tsinghua.edu.cn;nvidia.com;;stanford.edu;tsinghua.edu.cn", "author_num": 6, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Tsinghua University;NVIDIA;Stanford University", "aff_unique_dep": ";NVIDIA Corporation;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.nvidia.com;https://www.stanford.edu", "aff_unique_abbr": "THU;NVIDIA;Stanford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "China;United States" }, { "title": "Emergence of heavy tails in homogenized stochastic gradient descent", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96047", "id": "EFrgBP9au6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=EFrgBP9au6", "openreview": "https://openreview.net/forum?id=EFrgBP9au6", "poster": "/media/PosterPDFs/NeurIPS%202024/96047.png?t=1730885408.760965", "project": "", "author_site": "Zhezhe Jiao, Martin Keller-Ressel", "tldr": "", "abstract": "It has repeatedly been observed that loss minimization by stochastic gradient descent (SGD) leads to heavy-tailed distributions of neural network parameters. Here, we analyze a continuous diffusion approximation of SGD, called homogenized stochastic gradient descent (hSGD), and show in a regularized linear regression framework that it leads to an asymptotically heavy-tailed parameter distribution, even though local gradient noise is Gaussian. We give explicit upper and lower bounds on the tail-index of the resulting parameter distribution and validate these bounds in numerical experiments. Moreover, the explicit form of these bounds enables us to quantify the interplay between optimization hyperparameters and the tail-index. Doing so, we contribute to the ongoing discussion on links between heavy tails and the generalization performance of neural networks as well as the ability of SGD to avoid suboptimal local minima.", "keywords": "stochastic gradient descent;heavy tails;gradient noise;Pearson diffusion;tail index", "primary_area": "optimization", "supplementary_material": "", "author": "Zhe Jiao;Martin Keller-Ressel", "authorids": "~Zhe_Jiao1;~Martin_Keller-Ressel1", "gender": "M;", "homepage": ";", "dblp": ";", "google_scholar": "https://scholar.google.com/citations?hl=en;zjDFfloAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Zhe_Jiao1;~Martin_Keller-Ressel1", "aff": "Technische Universit\u00e4t Dresden;TU Dresden", "aff_domain": "tu-dresden.de;tu-dresden.de", "position": "Researcher;Full Professor", "bibtex": "@inproceedings{\njiao2024emergence,\ntitle={Emergence of heavy tails in homogenized stochastic gradient descent},\nauthor={Zhe Jiao and Martin Keller-Ressel},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=EFrgBP9au6}\n}", "github": "", "reviewers": "BR2r;G3NM;ya3E;qGrH", "pdf_size": 1481655, "rating": "5;6;6;7", "confidence": "4;3;3;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;2;3;4", "wc_summary": "124;121;256;160", "wc_strengths": "82;89;4;166", "wc_weaknesses": "201;179;4;860", "wc_questions": "280;196;4;177", "wc_limitations": "28;48;47;1", "wc_review": "715;633;315;1364", "wc_reply_reviewers": "0;153;136;208", "wc_reply_authors": "28;69;63;64", "reply_reviewers": "0;2;1;2", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 165.25, 54.59567290546019 ], "wc_strengths_avg": [ 85.25, 57.32963893135906 ], "wc_weaknesses_avg": [ 311.0, 326.0268393859622 ], "wc_questions_avg": [ 164.25, 100.31045558664361 ], "wc_limitations_avg": [ 31.0, 19.06567596493762 ], "wc_review_avg": [ 756.75, 381.1012824696343 ], "wc_reply_reviewers_avg": [ 124.25, 76.5126623507508 ], "wc_reply_authors_avg": [ 56.0, 16.32482771731451 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14166667284097768870&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "tu-dresden.de;tu-dresden.de", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Technische Universit\u00e4t Dresden", "aff_unique_dep": "", "aff_unique_url": "https://tu-dresden.de", "aff_unique_abbr": "TUD", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "title": "Data Mixture Inference Attack: BPE Tokenizers Reveal Training Data Compositions", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96046", "id": "EHXyeImux0", "proceeding": "", "pdf": "https://openreview.net/pdf?id=EHXyeImux0", "openreview": "https://openreview.net/forum?id=EHXyeImux0", "poster": "", "project": "", "author_site": "Jonathan Hayase, Alisa Liu, Yejin Choi, Sewoong Oh, Noah Smith", "tldr": "", "abstract": "The pretraining data of today's strongest language models remains opaque, even when their parameters are open-sourced.\nIn particular, little is known about the proportions of different domains, languages, or code represented in the data. While a long line of membership inference attacks aim to identify training examples on an instance level, they do not extend easily to *global* statistics about the corpus. In this work, we tackle a task which we call *data mixture inference*, which aims to uncover the distributional make-up of the pretraining data. We introduce a novel attack based on a previously overlooked source of information \u2014 byte-pair encoding (BPE) tokenizers, used by the vast majority of modern language models. Our key insight is that the ordered vocabulary learned by a BPE tokenizer naturally reveals information about the token frequencies in its training data: the first token is the most common byte pair, the second is the most common pair after merging the first token, and so on. Given a tokenizer's merge list along with data samples for each category of interest (e.g., different natural languages), we formulate a linear program that solves for the relative proportion of each category in the tokenizer's training set. Importantly, to the extent to which tokenizer training data is representative of the pretraining data, we indirectly learn about the pretraining data. In controlled experiments, we show that our attack can recover mixture ratios with high precision for tokenizers trained on known mixtures of natural languages, programming languages, and data sources. We then apply our approach to off-the-shelf tokenizers released alongside recent LMs. We confirm much publicly disclosed information about these models, and also make several new inferences: GPT-4o is much more multilingual than its predecessors, training on 10x more non-English data than GPT-3.5, Llama 3 and Claude are trained on predominantly code, and many recent models are trained on 7-16% books. We hope our work sheds light on current design practices for pretraining data, and inspires continued research into data mixture inference for LMs.", "keywords": "tokenizers;distribution inference;security", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/00fec0fd15e7b25774ab6e636f96c8c0f5b4a42e.zip", "author": "Jonathan Hayase;Alisa Liu;Yejin Choi;Sewoong Oh;Noah A. Smith", "authorids": "~Jonathan_Hayase2;~Alisa_Liu1;~Yejin_Choi1;~Sewoong_Oh3;~Noah_A._Smith2", "gender": "M;F;F;M;M", "homepage": "https://jhayase.github.io/;https://alisawuffles.github.io/;https://yejinc.github.io/;https://homes.cs.washington.edu/~nasmith/;https://homes.cs.washington.edu/~sewoong/", "dblp": "244/9599;;89/579-1;90/5204.html;80/4366", "google_scholar": "Zw-l1d8AAAAJ;3-lTFAwAAAAJ;vhP-tlcAAAAJ;https://scholar.google.com/citations?hl=en;55TAOdgAAAAJ", "orcid": "0000-0002-3757-6586;;;0000-0002-2310-6380;", "linkedin": "jonathan-hayase-5ab849128;;;;", "or_profile": "~Jonathan_Hayase2;~Alisa_Liu1;~Yejin_Choi1;~Noah_Smith1;~Sewoong_Oh1", "aff": "University of Washington;University of Washington;Department of Computer Science, University of Washington;Allen Institute for Artificial Intelligence;University of Washington", "aff_domain": "washington.edu;uw.edu;cs.washington.edu;allenai.org;uw.edu", "position": "PhD student;PhD student;Full Professor;Senior Director of NLP Research;Full Professor", "bibtex": "@inproceedings{\nhayase2024data,\ntitle={Data Mixture Inference Attack: {BPE} Tokenizers Reveal Training Data Compositions},\nauthor={Jonathan Hayase and Alisa Liu and Yejin Choi and Sewoong Oh and Noah A. Smith},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=EHXyeImux0}\n}", "github": "", "reviewers": "68Uz;rZKP;2xkV;vDkN", "pdf_size": 1399593, "rating": "5;5;6;7", "confidence": "3;3;3;4", "soundness": "3;3;3;2", "novelty": "2;3;3;2", "presentation": "3;3;3;3", "wc_summary": "58;88;92;119", "wc_strengths": "43;208;43;162", "wc_weaknesses": "52;95;127;63", "wc_questions": "43;212;109;231", "wc_limitations": "1;1;1;11", "wc_review": "197;604;372;586", "wc_reply_reviewers": "18;22;39;42", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 89.25, 21.62608378787061 ], "wc_strengths_avg": [ 114.0, 72.83886325307391 ], "wc_weaknesses_avg": [ 84.25, 29.303370113350443 ], "wc_questions_avg": [ 148.75, 76.69542033263785 ], "wc_limitations_avg": [ 3.5, 4.330127018922194 ], "wc_review_avg": [ 439.75, 167.24588933662915 ], "wc_reply_reviewers_avg": [ 30.25, 10.40132203135736 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7594985997552595222&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "email": "washington.edu;uw.edu;cs.washington.edu;allenai.org;uw.edu", "author_num": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "University of Washington;Allen Institute for Artificial Intelligence", "aff_unique_dep": ";", "aff_unique_url": "https://www.washington.edu;https://allenai.org", "aff_unique_abbr": "UW;AI2", "aff_campus_unique_index": "1", "aff_campus_unique": ";Seattle", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Diffusion-DICE: In-Sample Diffusion Guidance for Offline Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96045", "id": "EIl9qmMmvy", "proceeding": "", "pdf": "https://openreview.net/pdf?id=EIl9qmMmvy", "openreview": "https://openreview.net/forum?id=EIl9qmMmvy", "poster": "/media/PosterPDFs/NeurIPS%202024/96045.png?t=1731731037.5395226", "project": "", "author_site": "Liyuan Mao, Haoran Xu, Xianyuan Zhan, Weinan Zhang, Amy Zhang", "tldr": "", "abstract": "One important property of DIstribution Correction Estimation (DICE) methods is that the solution is the optimal stationary distribution ratio between the optimized and data collection policy. In this work, we show that DICE-based methods can be viewed as a transformation from the behavior distribution to the optimal policy distribution. Based on this, we propose a novel approach, Diffusion-DICE, that directly performs this transformation using diffusion models. We find that the optimal policy's score function can be decomposed into two terms: the behavior policy's score function and the gradient of a guidance term which depends on the optimal distribution ratio. The first term can be obtained from a diffusion model trained on the dataset and we propose an in-sample learning objective to learn the second term. Due to the multi-modality contained in the optimal policy distribution, the transformation in Diffusion-DICE may guide towards those local-optimal modes. We thus generate a few candidate actions and carefully select from them to achieve global-optimum. Different from all other diffusion-based offline RL methods, the \\textit{guide-then-select} paradigm in Diffusion-DICE only uses in-sample actions for training and brings minimal error exploitation in the value function. We use a didatic toycase example to show how previous diffusion-based methods fail to generate optimal actions due to leveraging these errors and how Diffusion-DICE successfully avoid that. We then conduct extensive experiments on benchmark datasets to show the strong performance of Diffusion-DICE.", "keywords": "Offline Reinforcement Learning;Diffusion Models", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Liyuan Mao;Haoran Xu;Xianyuan Zhan;Weinan Zhang;Amy Zhang", "authorids": "~Liyuan_Mao2;~Haoran_Xu4;~Xianyuan_Zhan1;~Weinan_Zhang1;~Amy_Zhang1", "gender": "M;M;M;F;M", "homepage": "https://ryanxhr.github.io/;http://zhanxianyuan.xyz/;http://wnzhang.net;;https://github.com/maoliyuan", "dblp": ";181/5081;28/10261-1;43/2754;", "google_scholar": "iX8AJI0AAAAJ;pDMnGloAAAAJ;Qzss0GEAAAAJ;;", "orcid": ";0000-0002-3683-0554;0000-0002-0127-2425;;", "linkedin": ";;;;", "or_profile": "~Haoran_Xu4;~Xianyuan_Zhan1;~Weinan_Zhang1;~Amy_Zhang2;~Liyuan_Richard_Mao1", "aff": "University of Texas at Austin;Tsinghua University;Shanghai Jiaotong University;Meta Facebook;Shanghai Jiaotong University", "aff_domain": "utexas.edu;tsinghua.edu.cn;sjtu.edu.cn;facebook.com;sjtu.edu.cn", "position": "PhD student;Associate Professor;Associate Professor;Research Scientist;Undergrad student", "bibtex": "@inproceedings{\nmao2024diffusiondice,\ntitle={Diffusion-{DICE}: In-Sample Diffusion Guidance for Offline Reinforcement Learning},\nauthor={Liyuan Mao and Haoran Xu and Xianyuan Zhan and Weinan Zhang and Amy Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=EIl9qmMmvy}\n}", "github": "", "reviewers": "uMN3;QJqE;UuBV", "pdf_size": 6092659, "rating": "6;6;8", "confidence": "3;3;4", "soundness": "3;3;4", "novelty": "3;3;4", "presentation": "4;2;3", "wc_summary": "36;86;620", "wc_strengths": "28;59;114", "wc_weaknesses": "149;196;93", "wc_questions": "5;46;1", "wc_limitations": "6;15;1", "wc_review": "224;402;829", "wc_reply_reviewers": "10;45;0", "wc_reply_authors": "102;0;0", "reply_reviewers": "1;1;0", "reply_authors": "2;1;1", "rating_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 247.33333333333334, 264.30453815416956 ], "wc_strengths_avg": [ 67.0, 35.56215216584433 ], "wc_weaknesses_avg": [ 146.0, 42.10304818735416 ], "wc_questions_avg": [ 17.333333333333332, 20.33606539022619 ], "wc_limitations_avg": [ 7.333333333333333, 5.792715732327589 ], "wc_review_avg": [ 485.0, 253.86741946667095 ], "wc_reply_reviewers_avg": [ 18.333333333333332, 19.293061504650375 ], "wc_reply_authors_avg": [ 34.0, 48.08326112068523 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.9999999999999998, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5645369943093489887&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "utexas.edu;tsinghua.edu.cn;sjtu.edu.cn;facebook.com;sjtu.edu.cn", "author_num": 5, "aff_unique_index": "0;1;2;3;2", "aff_unique_norm": "University of Texas at Austin;Tsinghua University;Shanghai Jiao Tong University;Meta", "aff_unique_dep": ";;;Meta Platforms, Inc.", "aff_unique_url": "https://www.utexas.edu;https://www.tsinghua.edu.cn;https://www.sjtu.edu.cn;https://meta.com", "aff_unique_abbr": "UT Austin;THU;SJTU;Meta", "aff_campus_unique_index": "0", "aff_campus_unique": "Austin;", "aff_country_unique_index": "0;1;1;0;1", "aff_country_unique": "United States;China" }, { "title": "Event-3DGS: Event-based 3D Reconstruction Using 3D Gaussian Splatting", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96044", "id": "EJZfcKXdiT", "proceeding": "", "pdf": "https://openreview.net/pdf?id=EJZfcKXdiT", "openreview": "https://openreview.net/forum?id=EJZfcKXdiT", "poster": "/media/PosterPDFs/NeurIPS%202024/96044.png?t=1729494940.1544309", "project": "", "author_site": "Haiqian Han, Jianing Li, Henglu Wei, Xiangyang Ji", "tldr": "", "abstract": "Event cameras, offering high temporal resolution and high dynamic range, have brought a new perspective to addressing 3D reconstruction challenges in fast-motion and low-light scenarios. Most methods use the Neural Radiance Field (NeRF) for event-based photorealistic 3D reconstruction. However, these NeRF methods suffer from time-consuming training and inference, as well as limited scene-editing capabilities of implicit representations. To address these problems, we propose Event-3DGS, the first event-based reconstruction using 3D Gaussian splatting (3DGS) for synthesizing novel views freely from event streams. Technically, we first propose an event-based 3DGS framework that directly processes event data and reconstructs 3D scenes by simultaneously optimizing scenario and sensor parameters. Then, we present a high-pass filter-based photovoltage estimation module, which effectively reduces noise in event data to improve the robustness of our method in real-world scenarios. Finally, we design an event-based 3D reconstruction loss to optimize the parameters of our method for better reconstruction quality. The results show that our method outperforms state-of-the-art methods in terms of reconstruction quality on both simulated and real-world datasets. We also verify that our method can perform robust 3D reconstruction even in real-world scenarios with extreme noise, fast motion, and low-light conditions. Our code is available in https://github.com/lanpokn/Event-3DGS.", "keywords": "Event Camera;Event-based vision;3D reconstruction;3D gaussian spallating", "primary_area": "machine_vision", "supplementary_material": "/attachment/ac4d6ff830f2ce93c5275f5f2824b8d53ab9fcd7.zip", "author": "Haiqian Han;Jianing Li;Henglu Wei;Xiangyang Ji", "authorids": "~Haiqian_Han1;~Jianing_Li4;~Henglu_Wei1;~Xiangyang_Ji1", "gender": "M;M;M;", "homepage": "https://lanpokn.github.io/;https://jianing-li.github.io/;https://faculty.dlut.edu.cn/weihenglu/zh_CN/index/1178003/list/index.htm;", "dblp": ";;170/3329;", "google_scholar": ";https://scholar.google.com.hk/citations?user=xrYnfwcAAAAJ;;", "orcid": ";0000-0002-7468-0622;0000-0002-2072-5020;", "linkedin": ";;;", "or_profile": "~Haiqian_Han1;~Jianing_Li4;~Henglu_Wei1;~Xiangyang_Ji1", "aff": "Tsinghua University;Peking University;Tsinghua University;", "aff_domain": "mail.tsinghua.edu.cn;pku.edu.cn;tsinghua.edu.cn;", "position": "MS student;Researcher;Postdoc;", "bibtex": "@inproceedings{\nhan2024eventdgs,\ntitle={Event-3{DGS}: Event-based 3D Reconstruction Using 3D Gaussian Splatting},\nauthor={Haiqian Han and Jianing Li and Henglu Wei and Xiangyang Ji},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=EJZfcKXdiT}\n}", "github": "", "reviewers": "nxFk;kCE4;bZpj;tU43", "pdf_size": 1812493, "rating": "5;5;6;7", "confidence": "5;4;5;3", "soundness": "2;2;3;3", "novelty": "2;3;2;3", "presentation": "1;2;3;3", "wc_summary": "70;142;25;71", "wc_strengths": "76;75;78;103", "wc_weaknesses": "335;804;222;85", "wc_questions": "1;319;87;4", "wc_limitations": "1;58;69;9", "wc_review": "483;1398;481;272", "wc_reply_reviewers": "38;60;16;35", "wc_reply_authors": "42;62;34;52", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 77.0, 41.87481343242021 ], "wc_strengths_avg": [ 83.0, 11.597413504743201 ], "wc_weaknesses_avg": [ 361.5, 270.3798254308187 ], "wc_questions_avg": [ 102.75, 129.53450312561515 ], "wc_limitations_avg": [ 34.25, 29.642663510555188 ], "wc_review_avg": [ 658.5, 435.47359276998645 ], "wc_reply_reviewers_avg": [ 37.25, 15.610493265749165 ], "wc_reply_authors_avg": [ 47.5, 10.523782589924593 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.6363636363636364, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4557311588563410623&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "mail.tsinghua.edu.cn;pku.edu.cn;tsinghua.edu.cn;", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Tsinghua University;Peking University", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;http://www.pku.edu.cn", "aff_unique_abbr": "THU;Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Sample Complexity of Posted Pricing for a Single Item", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96043", "id": "EK1tyHcb3W", "proceeding": "", "pdf": "https://openreview.net/pdf?id=EK1tyHcb3W", "openreview": "https://openreview.net/forum?id=EK1tyHcb3W", "poster": "", "project": "", "author_site": "Billy Jin, Thomas Kesselheim, Will Ma, Sahil Singla", "tldr": "", "abstract": "Selling a single item to $n$ self-interested bidders is a fundamental problem in economics, where the two objectives typically considered are welfare maximization and revenue maximization. Since the optimal auctions are often impractical and do not work for sequential bidders, posted pricing auctions, where fixed prices are set for the item for different bidders, have emerged as a practical and effective alternative. This paper investigates how many samples are needed from bidders' value distributions to find near-optimal posted prices, considering both independent and correlated bidder distributions, and welfare versus revenue maximization. We obtain matching upper and lower bounds (up to logarithmic terms) on the sample complexity for all these settings.", "keywords": "sample complexity;revenue;welfare;pricing;online;prophet inequality", "primary_area": "algorithmic_game_theory", "supplementary_material": "", "author": "Billy Jin;Thomas Kesselheim;Will Ma;Sahil Singla", "authorids": "~Billy_Jin1;~Thomas_Kesselheim1;~Will_Ma1;~Sahil_Singla3", "gender": ";;;M", "homepage": "https://billyzjin.github.io/;http://www.thomas-kesselheim.de;http://www.columbia.edu/~wm2428/;https://faculty.cc.gatech.edu/~ssingla7/", "dblp": "271/0586.html;;86/8650.html;55/8911", "google_scholar": "d7JQxqEAAAAJ;;;", "orcid": "0000-0002-6362-2048;;;", "linkedin": "billy-jin-891b17156/?originalSubdomain=ca;;;", "or_profile": "~Billy_Jin1;~Thomas_Kesselheim1;~Will_Ma1;~Sahil_Singla3", "aff": "Cornell University;Rheinische Friedrich-Wilhelms Universit\u00e4t Bonn;Columbia University;Georgia Institute of Technology", "aff_domain": "cornell.edu;uni-bonn.de;columbia.edu;gatech.edu", "position": "PhD student;Associate Professor;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\njin2024sample,\ntitle={Sample Complexity of Posted Pricing for a Single Item},\nauthor={Billy Jin and Thomas Kesselheim and Will Ma and Sahil Singla},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=EK1tyHcb3W}\n}", "github": "", "reviewers": "U8bp;gD9M;mFS4;L1MB", "pdf_size": 390701, "rating": "6;6;7;8", "confidence": "4;4;4;4", "soundness": "4;4;3;4", "novelty": "2;3;3;4", "presentation": "3;2;3;4", "wc_summary": "81;90;628;125", "wc_strengths": "34;7;3;48", "wc_weaknesses": "186;44;3;7", "wc_questions": "72;22;3;1", "wc_limitations": "1;6;3;8", "wc_review": "374;169;640;189", "wc_reply_reviewers": "29;0;29;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 231.0, 229.79664923579716 ], "wc_strengths_avg": [ 23.0, 18.721645226849056 ], "wc_weaknesses_avg": [ 60.0, 74.48154133743473 ], "wc_questions_avg": [ 24.5, 28.622543562723422 ], "wc_limitations_avg": [ 4.5, 2.692582403567252 ], "wc_review_avg": [ 343.0, 189.18377308849722 ], "wc_reply_reviewers_avg": [ 14.5, 14.5 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1153553619421208157&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "cornell.edu;uni-bonn.de;columbia.edu;gatech.edu", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Cornell University;Rheinische Friedrich-Wilhelms Universit\u00e4t Bonn;Columbia University;Georgia Institute of Technology", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.cornell.edu;https://www.uni-bonn.de/;https://www.columbia.edu;https://www.gatech.edu", "aff_unique_abbr": "Cornell;Uni Bonn;Columbia;Georgia Tech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;Germany" }, { "title": "Preference Alignment with Flow Matching", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96042", "id": "EKN8AGS1wG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=EKN8AGS1wG", "openreview": "https://openreview.net/forum?id=EKN8AGS1wG", "poster": "/media/PosterPDFs/NeurIPS%202024/96042.png?t=1733207216.2438333", "project": "", "author_site": "Minu Kim, Yongsik Lee, Sehyeok Kang, Jihwan Oh, Song Chong, Se-Young Yun", "tldr": "", "abstract": "We present Preference Flow Matching (PFM), a new framework for preference alignment that streamlines the integration of preferences into an arbitrary class of pre-trained models. Existing alignment methods require fine-tuning pre-trained models, which presents challenges such as scalability, inefficiency, and the need for model modifications, especially with black-box APIs like GPT-4. In contrast, PFM utilizes flow matching techniques to directly learn from preference data, thereby reducing the dependency on extensive fine-tuning of pre-trained models. By leveraging flow-based models, PFM transforms less preferred data into preferred outcomes, and effectively aligns model outputs with human preferences without relying on explicit or implicit reward function estimation, thus avoiding common issues like overfitting in reward models. We provide theoretical insights that support our method\u2019s alignment with standard preference alignment objectives. Experimental results indicate the practical effectiveness of our method, offering a new direction in aligning a pre-trained model to preference. Our code is available at https://github.com/jadehaus/preference-flow-matching.", "keywords": "Preference Alignment;Flow Matching", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/335c93c39f0606e50870e22f18589ce5b9a671c2.zip", "author": "Minu Kim;Yongsik Lee;Sehyeok Kang;Jihwan Oh;Song Chong;Se-Young Yun", "authorids": "~Minu_Kim1;~Yongsik_Lee1;~Sehyeok_Kang1;~Jihwan_Oh1;~Song_Chong2;~Se-Young_Yun1", "gender": "M;M;M;M;;M", "homepage": "https://jadehaus.github.io/jadehaus;https://sites.google.com/a/kaist.edu/song-chong/;https://github.com/cheesebro329;https://ericoh929.github.io;;https://fbsqkd.github.io", "dblp": ";210/8878;274/9636;;;23/8862", "google_scholar": ";;https://scholar.google.com/citations?hl=en;EqzucSIAAAAJ;;X_IAjb8AAAAJ", "orcid": ";;;;;", "linkedin": ";;;;;seyoung-yun-395130ab/", "or_profile": "~Minu_Kim1;~Yongsik_Lee1;~Sehyeok_Kang1;~Jihwan_Oh1;~Song_Chong2;~Se-Young_Yun1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;KAIST;Korea Advanced Institute of Science & Technology;;KAIST", "aff_domain": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;;kaist.ac.kr", "position": "PhD student;PhD student;PhD student;PhD student;;Assistant Professor", "bibtex": "@inproceedings{\nkim2024preference,\ntitle={Preference Alignment with Flow Matching},\nauthor={Minu Kim and Yongsik Lee and Sehyeok Kang and Jihwan Oh and Song Chong and Se-Young Yun},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=EKN8AGS1wG}\n}", "github": "", "reviewers": "XK5w;2tji;vT4A;XhGj;yRMJ", "pdf_size": 2141353, "rating": "3;4;5;6;9", "confidence": "4;4;5;3;4", "soundness": "3;2;3;3;4", "novelty": "1;2;2;2;4", "presentation": "3;3;2;3;4", "wc_summary": "137;48;155;48;78", "wc_strengths": "129;38;77;48;263", "wc_weaknesses": "202;228;325;63;18", "wc_questions": "93;3;160;78;220", "wc_limitations": "18;24;14;7;80", "wc_review": "579;341;731;244;659", "wc_reply_reviewers": "0;1521;65;20;0", "wc_reply_authors": "0;2011;0;19;0", "reply_reviewers": "0;3;1;1;0", "reply_authors": "1;5;1;2;1", "rating_avg": [ 5.4, 2.0591260281974 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.2, 0.9797958971132712 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 93.2, 44.84372865853151 ], "wc_strengths_avg": [ 111.0, 82.31889212082486 ], "wc_weaknesses_avg": [ 167.2, 112.18449090672026 ], "wc_questions_avg": [ 110.8, 73.97134580362858 ], "wc_limitations_avg": [ 28.6, 26.28763968103641 ], "wc_review_avg": [ 510.8, 187.14529115101988 ], "wc_reply_reviewers_avg": [ 321.2, 600.3696861101499 ], "wc_reply_authors_avg": [ 406.0, 802.5337376085818 ], "reply_reviewers_avg": [ 1.0, 1.0954451150103321 ], "reply_authors_avg": [ 2.0, 1.5491933384829668 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.1535737792084878, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12124108443013219120&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;;kaist.ac.kr", "author_num": 6, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "MDAgents: An Adaptive Collaboration of LLMs for Medical Decision-Making", "status": "Oral", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96041", "id": "EKdk4vxKO4", "proceeding": "", "pdf": "https://openreview.net/pdf?id=EKdk4vxKO4", "openreview": "https://openreview.net/forum?id=EKdk4vxKO4", "poster": "/media/PosterPDFs/NeurIPS%202024/96041.png?t=1731354619.3146102", "project": "", "author_site": "Yubin Kim, Chanwoo Park, Hyewon Jeong, Yik Siu Chan, Xuhai \"Orson\" Xu, Daniel McDuff, Hyeonhoon Lee, Marzyeh Ghassemi, Cynthia Breazeal, Hae Park", "tldr": "", "abstract": "Foundation models are becoming valuable tools in medicine. Yet despite their promise, the best way to leverage Large Language Models (LLMs) in complex medical tasks remains an open question. We introduce a novel multi-agent framework, named **M**edical **D**ecision-making **Agents** (**MDAgents**) that helps to address this gap by automatically assigning a collaboration structure to a team of LLMs. The assigned solo or group collaboration structure is tailored to the medical task at hand, a simple emulation inspired by the way real-world medical decision-making processes are adapted to tasks of different complexities. We evaluate our framework and baseline methods using state-of-the-art LLMs across a suite of real-world medical knowledge and clinical diagnosis benchmarks, including a comparison of\nLLMs\u2019 medical complexity classification against human physicians. MDAgents achieved the **best performance in seven out of ten** benchmarks on tasks requiring an understanding of medical knowledge and multi-modal reasoning, showing a significant **improvement of up to 4.2\\%** ($p$ < 0.05) compared to previous methods' best performances. Ablation studies reveal that MDAgents effectively determines medical complexity to optimize for efficiency and accuracy across diverse medical tasks. Notably, the combination of moderator review and external medical knowledge in group collaboration resulted in an average accuracy **improvement of 11.8\\%**. Our code can be found at https://github.com/mitmedialab/MDAgents.", "keywords": "Medical Decision Making;Multi-Agent Collaboration", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "", "author": "Yubin Kim;Chanwoo Park;Hyewon Jeong;Yik Siu Chan;Xuhai Xu;Daniel McDuff;Hyeonhoon Lee;Marzyeh Ghassemi;Cynthia Breazeal;Hae Won Park", "authorids": "~Yubin_Kim2;~Chanwoo_Park2;~Hyewon_Jeong1;~Yik_Siu_Chan1;~Xuhai_Xu1;~Daniel_McDuff1;~Hyeonhoon_Lee1;~Marzyeh_Ghassemi2;~Cynthia_Breazeal1;~Hae_Won_Park1", "gender": "M;M;F;;M;M;M;F;F;F", "homepage": "https://www.ybkim95.github.io;https://chanwoo-park-official.github.io/;https://sites.google.com/view/hyewon-jeong/;https://yiksiu-chan.github.io/;https://orsonxu.com;http://alumni.media.mit.edu/~djmcduff/;;https://www.healthyml.org/;https://www.media.mit.edu/people/cynthiab/overview/;", "dblp": "96/7992;;147/5446;;198/0980;63/9606;;145/6563;65/5630.html;307/5014-1", "google_scholar": ";https://scholar.google.com/citations?hl=ko;wkzH1QYAAAAJ;phvJzp0AAAAJ;MtX5Ij8AAAAJ;m7Jr-b4AAAAJ;Fu3Z-KcAAAAJ;;qb3jyP4AAAAJ;kJoNMc8AAAAJ", "orcid": "0000-0002-1902-3822;;0000-0002-6552-5276;;0000-0001-5930-3899;;0000-0002-9426-823X;;0000-0002-0587-2065;0000-0001-9638-1722", "linkedin": ";chanwoo-park-ab5096237/;hyewon-jeong-6bb884b1/;;;;hyeonhoon-lee-v8888/;;cynthia-breazeal-1792317/;", "or_profile": "~Yubin_Kim2;~Chanwoo_Park2;~Hyewon_Jeong1;~Yik_Siu_Chan1;~Xuhai_Xu1;~Daniel_McDuff1;~Hyeonhoon_Lee1;~Marzyeh_Ghassemi2;~Cynthia_Breazeal1;~Hae_Won_Park1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Apple;Wellesley College;Massachusetts Institute of Technology;Google;Seoul National University Hosipital;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;apple.com;wellesley.edu;mit.edu;google.com;seoul.ac.kr;mit.edu;mit.edu;mit.edu", "position": "Researcher;PhD student;Intern;Undergrad student;Postdoc;Principal Researcher;Assistant Professor;Assistant Professor;Full Professor;Principal Researcher", "bibtex": "@inproceedings{\nkim2024mdagents,\ntitle={{MDA}gents: An Adaptive Collaboration of {LLM}s for Medical Decision-Making},\nauthor={Yubin Kim and Chanwoo Park and Hyewon Jeong and Yik Siu Chan and Xuhai Xu and Daniel McDuff and Hyeonhoon Lee and Marzyeh Ghassemi and Cynthia Breazeal and Hae Won Park},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=EKdk4vxKO4}\n}", "github": "", "reviewers": "mREC;mJeV;QC4Q;g9qW", "pdf_size": 7320523, "rating": "5;6;7;7", "confidence": "3;3;5;3", "soundness": "3;2;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "82;73;112;168", "wc_strengths": "84;9;42;187", "wc_weaknesses": "216;55;173;232", "wc_questions": "123;168;57;225", "wc_limitations": "40;1;20;29", "wc_review": "545;306;404;841", "wc_reply_reviewers": "10;18;10;18", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 108.75, 37.13068138345969 ], "wc_strengths_avg": [ 80.5, 66.9869390254548 ], "wc_weaknesses_avg": [ 169.0, 69.26398775698668 ], "wc_questions_avg": [ 143.25, 61.532003867906006 ], "wc_limitations_avg": [ 22.5, 14.291605927956452 ], "wc_review_avg": [ 524.0, 201.7758657520765 ], "wc_reply_reviewers_avg": [ 14.0, 4.0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6819732860842067829&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "mit.edu;mit.edu;apple.com;wellesley.edu;mit.edu;google.com;seoul.ac.kr;mit.edu;mit.edu;mit.edu", "author_num": 10, "aff_unique_index": "0;0;1;2;0;3;4;0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology;Apple;Wellesley College;Google;Seoul National University", "aff_unique_dep": ";Apple Inc.;;Google;Hospital", "aff_unique_url": "https://web.mit.edu;https://www.apple.com;https://www.wellesley.edu;https://www.google.com;https://www.snuh.org", "aff_unique_abbr": "MIT;Apple;Wellesley;Google;SNUH", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Mountain View;Seoul", "aff_country_unique_index": "0;0;0;0;0;0;1;0;0;0", "aff_country_unique": "United States;South Korea" }, { "title": "Hierarchy-Agnostic Unsupervised Segmentation: Parsing Semantic Image Structure", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96040", "id": "ELnxXc8pik", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ELnxXc8pik", "openreview": "https://openreview.net/forum?id=ELnxXc8pik", "poster": "/media/PosterPDFs/NeurIPS%202024/96040.png?t=1731930300.906279", "project": "", "author_site": "Simone Rossetti, Fiora Pirri", "tldr": "", "abstract": "Unsupervised semantic segmentation aims to discover groupings within images, capturing objects' view-invariance without external supervision. Moreover, this task is inherently ambiguous due to the varying levels of semantic granularity. Existing methods often bypass this ambiguity using dataset-specific priors. In our research, we address this ambiguity head-on and provide a universal tool for pixel-level semantic parsing of images guided by the latent representations encoded in self-supervised models. \nWe introduce a novel algebraic approach that recursively decomposes an image into nested subgraphs, dynamically estimating their count and ensuring clear separation.\nThe innovative approach identifies scene-specific primitives and constructs a hierarchy-agnostic tree of semantic regions from the image pixels. \nThe model captures fine and coarse semantic details, producing a nuanced and unbiased segmentation. \nWe present a new metric for estimating the quality of the semantic segmentation of discovered elements on different levels of the hierarchy. The metric validates the intrinsic nature of the compositional relations among parts, objects, and scenes in a hierarchy-agnostic domain. Our results prove the power of this methodology, uncovering semantic regions without prior definitions and scaling effectively across various datasets. This robust framework for unsupervised image segmentation proves more accurate semantic hierarchical relationships between scene elements than traditional algorithms. The experiments underscore its potential for broad applicability in image analysis tasks, showcasing its ability to deliver a detailed and unbiased segmentation that surpasses existing unsupervised methods.", "keywords": "Unsupervised Hierarchical Segmentation;Spectral Clustering;Self-Supervised Feature Extraction;Semantic Region Tree", "primary_area": "machine_vision", "supplementary_material": "/attachment/e93fc3e7d247e725cd0f881936a22c196e12463c.zip", "author": "Simone Rossetti;fiora pirri", "authorids": "~Simone_Rossetti1;~fiora_pirri1", "gender": "M;F", "homepage": "https://www.diag.uniroma1.it/users/simone_rossetti;http://www.diag.uniroma1.it/alcor/index.php?q=node/5613", "dblp": "249/1241;p/FioraPirri", "google_scholar": "aynWg48AAAAJ;https://scholar.google.it/scholar?hl=en", "orcid": "0000-0002-5344-7872;0000-0001-8665-9807", "linkedin": "https://linkedin.com/in/rossettisimone;fiora-pirri-aa02245/", "or_profile": "~Simone_Rossetti1;~fiora_pirri1", "aff": "University of Roma \"La Sapienza\";DeepPlants srl", "aff_domain": "uniroma1.it;deepplants.com", "position": "PhD student;Principal Researcher", "bibtex": "@inproceedings{\nrossetti2024unsupervised,\ntitle={Unsupervised Hierarchy-Agnostic Segmentation: Parsing Semantic Image Structure},\nauthor={Simone Rossetti and fiora pirri},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ELnxXc8pik}\n}", "github": "", "reviewers": "Xo6d;47JN;oEkj;E2ZP", "pdf_size": 30673064, "rating": "5;6;6;7", "confidence": "3;4;3;3", "soundness": "2;3;3;3", "novelty": "2;3;2;3", "presentation": "2;2;3;3", "wc_summary": "116;117;74;124", "wc_strengths": "64;33;146;59", "wc_weaknesses": "34;83;89;51", "wc_questions": "1;90;119;5", "wc_limitations": "2;14;45;19", "wc_review": "217;337;473;258", "wc_reply_reviewers": "0;32;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 107.75, 19.727835664360143 ], "wc_strengths_avg": [ 75.5, 42.37039060476077 ], "wc_weaknesses_avg": [ 64.25, 22.664675157610354 ], "wc_questions_avg": [ 53.75, 51.794666713861574 ], "wc_limitations_avg": [ 20.0, 15.700318468107582 ], "wc_review_avg": [ 321.25, 97.65340495855739 ], "wc_reply_reviewers_avg": [ 8.0, 13.856406460551018 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:2_hrBwGQ6qsJ:scholar.google.com/&scioq=Hierarchy-Agnostic+Unsupervised+Segmentation:+Parsing+Semantic+Image+Structure&hl=en&as_sdt=0,33", "gs_version_total": 0, "email": "uniroma1.it;deepplants.com", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "University of Rome La Sapienza;DeepPlants srl", "aff_unique_dep": ";", "aff_unique_url": "https://www.uniroma1.it;", "aff_unique_abbr": "La Sapienza;", "aff_campus_unique_index": "0", "aff_campus_unique": "Rome;", "aff_country_unique_index": "0;1", "aff_country_unique": "Italy;Unknown" }, { "title": "Addressing Spatial-Temporal Heterogeneity: General Mixed Time Series Analysis via Latent Continuity Recovery and Alignment", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96039", "id": "EMV8nIDZJn", "proceeding": "", "pdf": "https://openreview.net/pdf?id=EMV8nIDZJn", "openreview": "https://openreview.net/forum?id=EMV8nIDZJn", "poster": "/media/PosterPDFs/NeurIPS%202024/96039.png?t=1730881477.4748847", "project": "", "author_site": "Jiawei Chen, \u6625\u6656 \u8d75", "tldr": "", "abstract": "Mixed time series (MiTS) comprising both continuous variables (CVs) and discrete variables (DVs) are frequently encountered yet under-explored in time series analysis. Essentially, CVs and DVs exhibit different temporal patterns and distribution types. Overlooking these heterogeneities would lead to insufficient and imbalanced representation learning, bringing biased results. This paper addresses the problem with two insights: 1) DVs may originate from intrinsic latent continuous variables (LCVs), which lose fine-grained information due to extrinsic discretization; 2) LCVs and CVs share similar temporal patterns and interact spatially. Considering these similarities and interactions, we propose a general MiTS analysis framework MiTSformer, which recovers LCVs behind DVs for sufficient and balanced spatial-temporal modeling by designing two essential inductive biases: 1) hierarchically aggregating multi-scale temporal context information to enrich the information granularity of DVs; 2) adaptively learning the aggregation processes via the adversarial guidance from CVs. Subsequently, MiTSformer captures complete spatial-temporal dependencies within and across LCVs and CVs via cascaded self- and cross-attention blocks. Empirically, MiTSformer achieves consistent SOTA on five mixed time series analysis tasks, including classification, extrinsic regression, anomaly detection, imputation, and long-term forecasting. The code is available at https://github.com/chunhuiz/MiTSformer.", "keywords": "Mixed Time Series;General Time Series Modeling;Spatial-Temporal Heterogeneity;Latent Continuity Recovery;Adversarial Learning", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Jiawei Chen;Chunhui Zhao", "authorids": "~Jiawei_Chen11;~Chunhui_Zhao1", "gender": "M;F", "homepage": "https://github.com/zzwzz1998;https://person.zju.edu.cn/chhzhao?fulltext=%E8%B5%B5%E6%98%A5%E6%99%96#0", "dblp": "https://dblp.uni-trier.de/pid/03/1390-7.html;", "google_scholar": ";qNVwr5AAAAAJ", "orcid": "0000-0001-7054-7974;", "linkedin": ";", "or_profile": "~Jiawei_Chen11;~Chunhui_Zhao1", "aff": "Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nchen2024addressing,\ntitle={Addressing Spatial-Temporal Heterogeneity: General Mixed Time Series Analysis via Latent Continuity Recovery and Alignment},\nauthor={Jiawei Chen and Chunhui Zhao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=EMV8nIDZJn}\n}", "github": "", "reviewers": "GdPs;ZwAv;3NoE;nZaK", "pdf_size": 6213719, "rating": "4;5;5;7", "confidence": "4;4;4;4", "soundness": "2;4;3;3", "novelty": "3;4;3;3", "presentation": "3;3;3;3", "wc_summary": "73;64;87;53", "wc_strengths": "74;82;87;59", "wc_weaknesses": "113;82;90;481", "wc_questions": "112;40;50;312", "wc_limitations": "50;1;41;7", "wc_review": "422;269;355;912", "wc_reply_reviewers": "95;0;9;218", "wc_reply_authors": "1403;340;764;1659", "reply_reviewers": "1;0;1;1", "reply_authors": "3;2;3;4", "rating_avg": [ 5.25, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 69.25, 12.457427503300993 ], "wc_strengths_avg": [ 75.5, 10.594810050208546 ], "wc_weaknesses_avg": [ 191.5, 167.52984808684093 ], "wc_questions_avg": [ 128.5, 109.4748829640845 ], "wc_limitations_avg": [ 24.75, 21.09946681790798 ], "wc_review_avg": [ 489.5, 249.88647422379628 ], "wc_reply_reviewers_avg": [ 80.5, 87.6199178269416 ], "wc_reply_authors_avg": [ 1041.5, 519.8790724774368 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.0, 0.7071067811865476 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=159482909661467380&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "zju.edu.cn;zju.edu.cn", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Zhejiang University", "aff_unique_dep": "", "aff_unique_url": "https://www.zju.edu.cn", "aff_unique_abbr": "ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Spectral Graph Pruning Against Over-Squashing and Over-Smoothing", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96038", "id": "EMkrwJY2de", "proceeding": "", "pdf": "https://openreview.net/pdf?id=EMkrwJY2de", "openreview": "https://openreview.net/forum?id=EMkrwJY2de", "poster": "/media/PosterPDFs/NeurIPS%202024/96038.png?t=1731425402.566632", "project": "", "author_site": "Adarsh Jamadandi, Celia Rubio-Madrigal, Rebekka Burkholz", "tldr": "", "abstract": "Message Passing Graph Neural Networks are known to suffer from two problems that are sometimes believed to be diametrically opposed: over-squashing and over-smoothing. The former results from topological bottlenecks that hamper the information flow from distant nodes and are mitigated by spectral gap maximization, primarily, by means of edge additions. However, such additions often promote over-smoothing that renders nodes of different classes less distinguishable. Inspired by the Braess phenomenon, we argue that deleting edges can address over-squashing and over-smoothing simultaneously. This insight explains how edge deletions can improve generalization, thus connecting spectral gap optimization to a seemingly disconnected objective of reducing computational resources by pruning graphs for lottery tickets. To this end, we propose a computationally effective spectral gap optimization framework to add or delete edges and demonstrate its effectiveness on the long range graph benchmark and on larger heterophilous datasets.", "keywords": "graph neural networks;rewiring;spectral gap optimization;over-smoothing;over-squashing;lottery tickets", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Adarsh Jamadandi;Celia Rubio-Madrigal;Rebekka Burkholz", "authorids": "~Adarsh_Jamadandi1;~Celia_Rubio-Madrigal1;~Rebekka_Burkholz1", "gender": "M;;F", "homepage": "https://adarshmj.github.io;;https://sites.google.com/view/rebekkaburkholz/startseite", "dblp": "250/5553;;194/3172", "google_scholar": "https://scholar.google.co.in/citations?user=dnA8qFkAAAAJ;;https://scholar.google.ch/citations?user=vkWBb2wAAAAJ", "orcid": ";;", "linkedin": "adarsh-jamadandi;;", "or_profile": "~Adarsh_Jamadandi1;~Celia_Rubio-Madrigal1;~Rebekka_Burkholz1", "aff": "Saarland University;;Helmholtz Center CISPA for Information Security", "aff_domain": "uni-saarland.de;;cispa.saarland", "position": "MS student;;Associate Professor", "bibtex": "@inproceedings{\njamadandi2024spectral,\ntitle={Spectral Graph Pruning Against Over-Squashing and Over-Smoothing},\nauthor={Adarsh Jamadandi and Celia Rubio-Madrigal and Rebekka Burkholz},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=EMkrwJY2de}\n}", "github": "", "reviewers": "p1uY;MxtA;1jBY;1tmm", "pdf_size": 3579771, "rating": "5;6;6;6", "confidence": "3;3;3;4", "soundness": "2;3;3;2", "novelty": "3;2;3;3", "presentation": "3;3;3;3", "wc_summary": "54;115;54;51", "wc_strengths": "61;32;46;53", "wc_weaknesses": "345;145;79;195", "wc_questions": "162;76;79;2", "wc_limitations": "1;2;2;10", "wc_review": "623;370;260;311", "wc_reply_reviewers": "67;66;140;18", "wc_reply_authors": "0;0;97;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;2;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 68.5, 26.874709300753377 ], "wc_strengths_avg": [ 48.0, 10.653637876331258 ], "wc_weaknesses_avg": [ 191.0, 97.96938297243685 ], "wc_questions_avg": [ 79.75, 56.62320637335897 ], "wc_limitations_avg": [ 3.75, 3.6314597615834874 ], "wc_review_avg": [ 391.0, 139.48655849220742 ], "wc_reply_reviewers_avg": [ 72.75, 43.58540466715893 ], "wc_reply_authors_avg": [ 24.25, 42.00223208354527 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=667084104206298185&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "uni-saarland.de;;cispa.saarland", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Saarland University;Helmholtz Center CISPA", "aff_unique_dep": ";Information Security", "aff_unique_url": "https://www.uni-saarland.de;https://www.cispa.de/", "aff_unique_abbr": "UdS;CISPA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "title": "FM-Delta: Lossless Compression for Storing Massive Fine-tuned Foundation Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96037", "id": "EMstukR5J4", "proceeding": "", "pdf": "https://openreview.net/pdf?id=EMstukR5J4", "openreview": "https://openreview.net/forum?id=EMstukR5J4", "poster": "/media/PosterPDFs/NeurIPS%202024/96037.png?t=1730188475.749215", "project": "", "author_site": "Wanyi Ning, Jingyu Wang, Qi Qi, Mengde Zhu, Haifeng Sun, Daixuan Cheng, Jianxin Liao, Ce Zhang", "tldr": "", "abstract": "Pre-trained foundation models, particularly large language models, have achieved remarkable success and led to massive fine-tuned variants. These models are commonly fine-tuned locally and then uploaded by users to cloud platforms such as HuggingFace for secure storage. However, the huge model number and their billion-level parameters impose heavy storage overhead for cloud with limited resources. Our empirical and theoretical analysis reveals that most fine-tuned models in cloud have a small difference (delta) from their pre-trained models. To this end, we propose a novel lossless compression scheme FM-Delta specifically for storing massive fine-tuned models in cloud. FM-Delta maps fine-tuned and pre-trained model parameters into integers with the same bits, and entropy codes their integer delta. In this way, cloud only needs to store one uncompressed pre-trained model and other compressed fine-tuned models. \nExtensive experiments have demonstrated that FM-Delta efficiently reduces cloud storage consumption for massive fine-tuned models by an average of around 50% with only negligible additional time in most end-to-end cases. For example, on up to 10 fine-tuned models in the GPT-NeoX-20B family, FM-Delta reduces the original storage requirement from 423GB to 205GB, significantly saving cloud storage costs.", "keywords": "model compression;lossless compression;cloud storage", "primary_area": "infrastructure", "supplementary_material": "", "author": "Wanyi Ning;Jingyu Wang;Qi Qi;Mengde Zhu;Haifeng Sun;Daixuan Cheng;Jianxin Liao;Ce Zhang", "authorids": "~Wanyi_Ning1;~Jingyu_Wang1;~Qi_Qi1;~Mengde_Zhu1;~Haifeng_Sun2;~Daixuan_Cheng1;~Jianxin_Liao2;~Ce_Zhang1", "gender": ";M;;M;F;M;;M", "homepage": ";https://jericwang.github.io/;;http://www.allycare8.com;;https://kyy.bupt.edu.cn/info/1014/3341.htm;;https://hfsun.github.io", "dblp": "311/3670;37/2749-1;80/6406-1.html;;289/2865;;97/919;00/11044-1", "google_scholar": ";H441DjwAAAAJ;;;https://scholar.google.com/citations?hl=en;;;dwhbTsEAAAAJ", "orcid": "0000-0003-4506-1299;0000-0002-2182-2228;0000-0003-0829-4624;;;0000-0000-8780-1230;;", "linkedin": ";;;;;;;", "or_profile": "~Wanyi_Ning1;~Jingyu_Wang1;~Qi_Qi1;~Mengde_Zhu1;~Daixuan_Cheng1;~Jianxin_Liao2;~Ce_Zhang1;~haifeng_sun1", "aff": "Beijing University of Posts and Telecommunications;Beijing University of Post and Telecommunication;Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;Beijing Insititute for General Artifical Intelligence;Beijing University of Posts and Telecommunications;University of Chicago;Beijing University of Posts and Telecommunications", "aff_domain": "bupt.edu.cn;bupt.edu.cn;bupt.edu.cn;bupt.edu.cn;bigai.ai;bupt.edu.cn;uchicago.edu;bupt.edu.cn", "position": "PhD student;Full Professor;Full Professor;MS student;Researcher;Full Professor;Associate Professor;Lecturer", "bibtex": "@inproceedings{\nning2024fmdelta,\ntitle={{FM}-Delta: Lossless Compression for Storing Massive Fine-tuned Foundation Models},\nauthor={Wanyi Ning and Jingyu Wang and Qi Qi and Mengde Zhu and Haifeng Sun and Daixuan Cheng and Jianxin Liao and Ce Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=EMstukR5J4}\n}", "github": "", "reviewers": "eKhr;yfTq;SDxT", "pdf_size": 3571882, "rating": "5;6;7", "confidence": "4;3;4", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "3;4;4", "wc_summary": "37;92;82", "wc_strengths": "86;95;131", "wc_weaknesses": "297;67;113", "wc_questions": "2;15;6", "wc_limitations": "2;3;8", "wc_review": "424;272;340", "wc_reply_reviewers": "89;44;4", "wc_reply_authors": "245;0;0", "reply_reviewers": "2;1;1", "reply_authors": "2;1;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 70.33333333333333, 23.921166824012207 ], "wc_strengths_avg": [ 104.0, 19.44222209522358 ], "wc_weaknesses_avg": [ 159.0, 99.37135737558718 ], "wc_questions_avg": [ 7.666666666666667, 5.436502143433363 ], "wc_limitations_avg": [ 4.333333333333333, 2.6246692913372702 ], "wc_review_avg": [ 345.3333333333333, 62.168230543332086 ], "wc_reply_reviewers_avg": [ 45.666666666666664, 34.721111093332766 ], "wc_reply_authors_avg": [ 81.66666666666667, 115.49410759380277 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=939331536426807880&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "bupt.edu.cn;bupt.edu.cn;bupt.edu.cn;bupt.edu.cn;bigai.ai;bupt.edu.cn;uchicago.edu;bupt.edu.cn", "author_num": 8, "aff_unique_index": "0;0;0;0;1;0;2;0", "aff_unique_norm": "Beijing University of Posts and Telecommunications;Beijing Institute for General Artificial Intelligence;University of Chicago", "aff_unique_dep": ";;", "aff_unique_url": "http://www.bupt.edu.cn/;https://www.bigaiai.org;https://www.uchicago.edu", "aff_unique_abbr": "BUPT;BIGAI;UChicago", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Beijing;", "aff_country_unique_index": "0;0;0;0;0;0;1;0", "aff_country_unique": "China;United States" }, { "title": "Novel Object Synthesis via Adaptive Text-Image Harmony", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96036", "id": "ENLsNDfys0", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ENLsNDfys0", "openreview": "https://openreview.net/forum?id=ENLsNDfys0", "poster": "/media/PosterPDFs/NeurIPS%202024/96036.png?t=1731589544.5951087", "project": "", "author_site": "Zeren Xiong, Zedong Zhang, Zikun Chen, Shuo Chen, Xiang Li, Gan Sun, Jian Yang, Jun Li", "tldr": "", "abstract": "In this paper, we study an object synthesis task that combines an object text with an object image to create a new object image. However, most diffusion models struggle with this task, \\textit{i.e.}, often generating an object that predominantly reflects either the text or the image due to an imbalance between their inputs. To address this issue, we propose a simple yet effective method called Adaptive Text-Image Harmony (ATIH) to generate novel and surprising objects.\nFirst, we introduce a scale factor and an injection step to balance text and image features in cross-attention and to preserve image information in self-attention during the text-image inversion diffusion process, respectively. Second, to better integrate object text and image, we design a balanced loss function with a noise parameter, ensuring both optimal editability and fidelity of the object image. Third, to adaptively adjust these parameters, we present a novel similarity score function that not only maximizes the similarities between the generated object image and the input text/image but also balances these similarities to harmonize text and image integration. \nExtensive experiments demonstrate the effectiveness of our approach, showcasing remarkable object creations such as colobus-glass jar. https://xzr52.github.io/ATIH/", "keywords": "Text-to-image Generation; Diffusion Model; Object Editing; Combination", "primary_area": "machine_vision", "supplementary_material": "/attachment/8900b32e0860511a8e35c7abc9505013cb20dd7e.zip", "author": "Zeren Xiong;Ze-dong Zhang;Zikun Chen;Shuo Chen;Xiang Li;Gan Sun;Jian Yang;Jun Li", "authorids": "~Zeren_Xiong1;~Ze-dong_Zhang1;~Zikun_Chen5;~Shuo_Chen8;~Xiang_Li20;~Gan_Sun1;~Jian_Yang1;~Jun_Li16", "gender": "M;M;M;M;M;M;M;M", "homepage": "https://github.com/xzr52;;https://dundunkirk.github.io/;https://shuochenya.github.io/;http://implus.github.io/;;;", "dblp": ";286/9537;;00/6472-3.html;40/1491-41;191/2425;y/JianYang3.html;", "google_scholar": ";POB4LxgAAAAJ;;vlu_3ksAAAAJ;oamjJdYAAAAJ;U4a4FLIAAAAJ;https://scholar.google.com.hk/citations?user=6CIDtZQAAAAJ;iGPEwQsAAAAJ", "orcid": ";0000-0002-3328-1713;;;;;;", "linkedin": ";;;;;;;", "or_profile": "~Zeren_Xiong1;~Ze-dong_Zhang1;~Zikun_Chen5;~Shuo_Chen8;~Xiang_Li20;~Gan_Sun1;~Jian_Yang1;~Jun_Li16", "aff": "Nanjing University of Science and Technology;Nanjing University of Science and Technology;Nanjing University of Science and Technology;RIKEN;Nankai University;Chinese Academy of Sciences;Nanjing University of Science and Technology;Nanjing University of Science and Technology", "aff_domain": "njust.edu.cn;njust.edu.cn;njust.edu.cn;riken.jp;nankai.edu.cn;ac.cn;njust.edu.cn;njust.edu.cn", "position": "MS student;PhD student;MS student;Research Scientist;Associate Professor;Associate Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nxiong2024novel,\ntitle={Novel Object Synthesis via Adaptive Text-Image Harmony},\nauthor={Zeren Xiong and Ze-dong Zhang and Zikun Chen and Shuo Chen and Xiang Li and Gan Sun and Jian Yang and Jun Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ENLsNDfys0}\n}", "github": "", "reviewers": "EP4q;yinU;xaqc;VGMU", "pdf_size": 15300307, "rating": "5;5;5;6", "confidence": "3;4;4;3", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "3;2;3;3", "wc_summary": "58;27;228;67", "wc_strengths": "111;31;322;48", "wc_weaknesses": "134;386;279;63", "wc_questions": "89;22;171;6", "wc_limitations": "36;1;6;1", "wc_review": "428;467;1006;185", "wc_reply_reviewers": "79;35;91;0", "wc_reply_authors": "110;93;708;77", "reply_reviewers": "1;1;1;0", "reply_authors": "3;3;3;2", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 95.0, 78.20805585104388 ], "wc_strengths_avg": [ 128.0, 115.90297666583028 ], "wc_weaknesses_avg": [ 215.5, 125.5 ], "wc_questions_avg": [ 72.0, 65.08840142452416 ], "wc_limitations_avg": [ 11.0, 14.577379737113251 ], "wc_review_avg": [ 521.5, 299.86872127649457 ], "wc_reply_reviewers_avg": [ 51.25, 36.196512262923896 ], "wc_reply_authors_avg": [ 247.0, 266.4141512757909 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5476353457404175774&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "njust.edu.cn;njust.edu.cn;njust.edu.cn;riken.jp;nankai.edu.cn;ac.cn;njust.edu.cn;njust.edu.cn", "author_num": 8, "aff_unique_index": "0;0;0;1;2;3;0;0", "aff_unique_norm": "Nanjing University of Science and Technology;RIKEN;Nankai University;Chinese Academy of Sciences", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.nust.edu.cn/;https://www.riken.jp;http://www.nankai.edu.cn;https://www.cas.cn", "aff_unique_abbr": "NUST;RIKEN;NKU;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;0;0;0", "aff_country_unique": "China;Japan" }, { "title": "Learning Noisy Halfspaces with a Margin: Massart is No Harder than Random", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96035", "id": "ENlubvb262", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ENlubvb262", "openreview": "https://openreview.net/forum?id=ENlubvb262", "poster": "", "project": "", "author_site": "Gautam Chandrasekaran, Vasilis Kontonis, Konstantinos Stavropoulos, Kevin Tian", "tldr": "", "abstract": "We study the problem of PAC learning $\\gamma$-margin halfspaces with Massart noise. We propose a simple proper learning algorithm, the Perspectron, that has sample complexity $\\widetilde{O}((\\epsilon\\gamma)^{-2})$ and achieves classification error at most $\\eta+\\epsilon$ where $\\eta$ is the Massart noise rate. \n Prior works (DGT19, CKMY20) came with worse sample complexity\n guarantees (in both $\\epsilon$ and $\\gamma$) or could only\n handle random classification noise (DDKWZ23,KITBMV23)--- a much milder noise assumption. \nWe also show that our results extend to the more challenging setting of learning generalized linear models with a known link function under Massart noise, achieving a similar sample complexity to the halfspace case. This significantly improves upon the prior state-of-the-art in this setting due to CKMY20, who introduced this model.", "keywords": "pac learning;learning halfspaces;massart noise;sgd;robust learning", "primary_area": "learning_theory", "supplementary_material": "", "author": "Gautam Chandrasekaran;Vasilis Kontonis;Konstantinos Stavropoulos;Kevin Tian", "authorids": "~Gautam_Chandrasekaran1;~Vasilis_Kontonis1;~Konstantinos_Stavropoulos1;~Kevin_Tian4", "gender": "M;M;;", "homepage": ";http://vkonton.github.io/;;https://kjtian.github.io", "dblp": ";203/8777;;", "google_scholar": ";7_44KWAAAAAJ;;", "orcid": ";;;", "linkedin": "gautam-chandrasekaran-5139a1188/;;;", "or_profile": "~Gautam_Chandrasekaran1;~Vasilis_Kontonis1;~Konstantinos_Stavropoulos1;~Kevin_Tian4", "aff": " University of Texas at Austin;, University of Texas at Austin;;University of Texas at Austin", "aff_domain": "cs.utexas.edu;cs.utexas.edu;;utexas.edu", "position": "PhD student;Postdoc;;Assistant Professor", "bibtex": "@inproceedings{\nchandrasekaran2024learning,\ntitle={Learning Noisy Halfspaces with a Margin: Massart is No Harder than Random},\nauthor={Gautam Chandrasekaran and Vasilis Kontonis and Konstantinos Stavropoulos and Kevin Tian},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ENlubvb262}\n}", "github": "", "reviewers": "LLF4;x3Ci;vx3q;kyaH;TiaP", "pdf_size": 443546, "rating": "6;7;7;7;7", "confidence": "3;2;4;3;3", "soundness": "3;3;3;3;4", "novelty": "3;3;4;3;4", "presentation": "4;3;4;2;3", "wc_summary": "138;144;109;93;122", "wc_strengths": "41;140;43;139;89", "wc_weaknesses": "27;31;20;423;33", "wc_questions": "1;325;47;6;80", "wc_limitations": "1;7;15;1;7", "wc_review": "208;647;234;662;331", "wc_reply_reviewers": "24;120;9;48;16", "wc_reply_authors": "0;393;0;0;0", "reply_reviewers": "1;3;1;1;1", "reply_authors": "1;3;1;1;1", "rating_avg": [ 6.8, 0.39999999999999997 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.4, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 121.2, 18.69117438792972 ], "wc_strengths_avg": [ 90.4, 43.61467642892699 ], "wc_weaknesses_avg": [ 106.8, 158.16244813482118 ], "wc_questions_avg": [ 91.8, 120.11228080425415 ], "wc_limitations_avg": [ 6.2, 5.1536394906900505 ], "wc_review_avg": [ 416.4, 198.74063499948872 ], "wc_reply_reviewers_avg": [ 43.4, 40.49493795525559 ], "wc_reply_authors_avg": [ 78.6, 157.19999999999996 ], "reply_reviewers_avg": [ 1.4, 0.8000000000000002 ], "reply_authors_avg": [ 1.4, 0.8000000000000002 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5071484769301879208&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "cs.utexas.edu;cs.utexas.edu;;utexas.edu", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "On Socially Fair Low-Rank Approximation and Column Subset Selection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96034", "id": "EO1Qev952p", "proceeding": "", "pdf": "https://openreview.net/pdf?id=EO1Qev952p", "openreview": "https://openreview.net/forum?id=EO1Qev952p", "poster": "", "project": "", "author_site": "Zhao Song, Ali Vakilian, David Woodruff, Samson Zhou", "tldr": "", "abstract": "Low-rank approximation and column subset selection are two fundamental and related problems that are applied across a wealth of machine learning applications. In this paper, we study the question of socially fair low-rank approximation and socially fair column subset selection, where the goal is to minimize the loss over all sub-populations of the data. We show that surprisingly, even constant-factor approximation to fair low-rank approximation requires exponential time under certain standard complexity hypotheses. On the positive side, we give an algorithm for fair low-rank approximation that, for a constant number of groups and constant-factor accuracy, runs in $2^{\\text{poly}(k)}$ rather than the naive $n^{\\text{poly}(k)}$, which is a substantial improvement when the dataset has a large number $n$ of observations. We then show that there exist bicriteria approximation algorithms for fair low-rank approximation and fair column subset selection that runs in polynomial time.", "keywords": "fairness;low-rank approximation;column subset selection", "primary_area": "fairness", "supplementary_material": "/attachment/4656632fcd81f5af946dc6148c11dca707e652f6.zip", "author": "Zhao Song;Ali Vakilian;David Woodruff;Samson Zhou", "authorids": "~Zhao_Song3;~Ali_Vakilian1;~David_Woodruff1;~Samson_Zhou1", "gender": "M;;M;", "homepage": "https://www.youtube.com/@zhaosong2031;http://www.mit.edu/~vakilian/;http://www.cs.cmu.edu/~dwoodruf/;https://samsonzhou.github.io/", "dblp": "76/4051-2;116/4679;w/DPWoodruff;179/2683", "google_scholar": "yDZct7UAAAAJ;uXZaVaAAAAAJ;https://scholar.google.com.tw/citations?user=0G2t-6sAAAAJ;NpjsgocAAAAJ", "orcid": ";0000-0001-5049-7594;;", "linkedin": ";;;", "or_profile": "~Zhao_Song3;~Ali_Vakilian1;~David_Woodruff1;~Samson_Zhou1", "aff": "Adobe;Toyota Technological Institute at Chicago;Carnegie Mellon University;Texas A&M University - College Station", "aff_domain": "adobe.com;ttic.edu;cmu.edu;tamu.edu", "position": "Researcher;Research Assistant Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nsong2024on,\ntitle={On Socially Fair Low-Rank Approximation and Column Subset Selection},\nauthor={Zhao Song and Ali Vakilian and David Woodruff and Samson Zhou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=EO1Qev952p}\n}", "github": "", "reviewers": "YsQa;aAWQ;CrGT;icAD", "pdf_size": 550833, "rating": "4;7;7;7", "confidence": "3;4;3;2", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;2;3", "wc_summary": "211;162;108;53", "wc_strengths": "29;89;115;68", "wc_weaknesses": "172;65;61;12", "wc_questions": "332;11;321;28", "wc_limitations": "7;12;6;6", "wc_review": "751;339;611;167", "wc_reply_reviewers": "588;30;18;0", "wc_reply_authors": "938;0;0;0", "reply_reviewers": "3;1;1;0", "reply_authors": "3;1;1;1", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 133.5, 59.052942348370756 ], "wc_strengths_avg": [ 75.25, 31.467244874631145 ], "wc_weaknesses_avg": [ 77.5, 58.4144673860851 ], "wc_questions_avg": [ 173.0, 153.6668474330101 ], "wc_limitations_avg": [ 7.75, 2.48746859276655 ], "wc_review_avg": [ 467.0, 227.91226382097125 ], "wc_reply_reviewers_avg": [ 159.0, 247.91329129354884 ], "wc_reply_authors_avg": [ 234.5, 406.1659143749017 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6317529206486738468&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "adobe.com;ttic.edu;cmu.edu;tamu.edu", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Adobe;Toyota Technological Institute at Chicago;Carnegie Mellon University;Texas A&M University", "aff_unique_dep": "Adobe Inc.;;;", "aff_unique_url": "https://www.adobe.com;https://www.tti-chicago.org;https://www.cmu.edu;https://www.tamu.edu", "aff_unique_abbr": "Adobe;TTI Chicago;CMU;TAMU", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Chicago;College Station", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "STONE: A Submodular Optimization Framework for Active 3D Object Detection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96033", "id": "EQHQzRJy75", "proceeding": "", "pdf": "https://openreview.net/pdf?id=EQHQzRJy75", "openreview": "https://openreview.net/forum?id=EQHQzRJy75", "poster": "", "project": "", "author_site": "RUIYU MAO, Sarthak Kumar Maharana, Rishabh Iyer, Yunhui Guo", "tldr": "", "abstract": "3D object detection is fundamentally important for various emerging applications, including autonomous driving and robotics. A key requirement for training an accurate 3D object detector is the availability of a large amount of LiDAR-based point cloud data. Unfortunately, labeling point cloud data is extremely challenging, as accurate 3D bounding boxes and semantic labels are required for each potential object. This paper proposes a unified active 3D object detection framework, for greatly reducing the labeling cost of training 3D object detectors. Our framework is based on a novel formulation of submodular optimization, specifically tailored to the problem of active 3D object detection. In particular, we address two fundamental challenges associated with active 3D object detection: data imbalance and the need to cover the distribution of the data, including LiDAR-based point cloud data of varying difficulty levels. Extensive experiments demonstrate that our method achieves state-of-the-art performance with high computational efficiency compared to existing active learning methods. The code is available at [https://github.com/RuiyuM/STONE](https://github.com/RuiyuM/STONE)", "keywords": "Active learning;3D object detection", "primary_area": "active_learning", "supplementary_material": "/attachment/6c7e03ea70527e82031e3c6cb702d2e4f8ead05e.zip", "author": "RUIYU MAO;Sarthak Kumar Maharana;Rishabh K Iyer;Yunhui Guo", "authorids": "~RUIYU_MAO1;~Sarthak_Kumar_Maharana1;~Rishabh_K_Iyer2;~Yunhui_Guo2", "gender": "M;M;M;M", "homepage": ";https://sarthaxxxxx.github.io/;https://www.rishiyer.com;https://yunhuiguo.github.io/", "dblp": ";296/4566;37/10544.html;165/3105", "google_scholar": ";1sIJMUgAAAAJ;l_XxJ1kAAAAJ;BxIXuZYAAAAJ", "orcid": ";;;", "linkedin": "ruiyu-mao-83539a200/;sarthak9811/;rishabh-iyer-36893717/;", "or_profile": "~RUIYU_MAO1;~Sarthak_Kumar_Maharana1;~Rishabh_K_Iyer2;~Yunhui_Guo2", "aff": "University of Texas at Dallas;University of Texas at Dallas;Microsoft;University of Texas at Dallas", "aff_domain": "utdallas.edu;utdallas.edu;microsoft.com;utdallas.edu", "position": "PhD student;PhD student;Research Scientist;Assistant Professor", "bibtex": "@inproceedings{\nmao2024stone,\ntitle={{STONE}: A Submodular Optimization Framework for Active 3D Object Detection},\nauthor={RUIYU MAO and Sarthak Kumar Maharana and Rishabh K Iyer and Yunhui Guo},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=EQHQzRJy75}\n}", "github": "", "reviewers": "qVYS;Lj2A;nHfr;4dDG;jaqa", "pdf_size": 4452575, "rating": "6;6;6;6;6", "confidence": "3;4;3;4;3", "soundness": "3;2;3;3;3", "novelty": "3;2;3;3;3", "presentation": "3;2;3;3;3", "wc_summary": "107;37;80;79;41", "wc_strengths": "93;19;51;32;40", "wc_weaknesses": "16;295;261;132;55", "wc_questions": "139;4;36;35;24", "wc_limitations": "2;1;2;8;6", "wc_review": "357;356;430;286;166", "wc_reply_reviewers": "47;85;8;16;10", "wc_reply_authors": "43;132;27;27;27", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;3;2;2;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 68.8, 26.354506256046612 ], "wc_strengths_avg": [ 47.0, 25.25866188063018 ], "wc_weaknesses_avg": [ 151.8, 110.1224772696292 ], "wc_questions_avg": [ 47.6, 47.12791105067145 ], "wc_limitations_avg": [ 3.8, 2.7129319932501073 ], "wc_review_avg": [ 319.0, 89.03033190997324 ], "wc_reply_reviewers_avg": [ 33.2, 29.471342012198903 ], "wc_reply_authors_avg": [ 51.2, 40.8724846320847 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:zXRB8Erap1wJ:scholar.google.com/&scioq=STONE:+A+Submodular+Optimization+Framework+for+Active+3D+Object+Detection&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "utdallas.edu;utdallas.edu;microsoft.com;utdallas.edu", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Texas at Dallas;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.utdallas.edu;https://www.microsoft.com", "aff_unique_abbr": "UT Dallas;Microsoft", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Dallas;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "On the Parameter Identifiability of Partially Observed Linear Causal Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96032", "id": "EQZlEfjrkV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=EQZlEfjrkV", "openreview": "https://openreview.net/forum?id=EQZlEfjrkV", "poster": "", "project": "", "author_site": "Xinshuai Dong, Ignavier Ng, Biwei Huang, Yuewen Sun, Songyao Jin, Roberto Legaspi, Peter Spirtes, Kun Zhang", "tldr": "", "abstract": "Linear causal models are important tools for modeling causal dependencies and yet in practice, only a subset of the variables can be observed. In this paper, we examine the parameter identifiability of these models by investigating whether the edge coefficients can be recovered given the causal structure and partially observed data. Our setting is more general than that of prior research\u2014we allow all variables, including both observed and latent ones, to be flexibly related, and we consider the coefficients of all edges, whereas most existing works focus only on the edges between observed variables. Theoretically, we identify three types of indeterminacy for the parameters in partially observed linear causal models. We then provide graphical conditions that are sufficient for all parameters to be identifiable and show that some of them are provably necessary. Methodologically, we propose a novel likelihood-based parameter estimation method that addresses the variance indeterminacy of latent variables in a specific way and can asymptotically recover the underlying parameters up to trivial indeterminacy. Empirical studies on both synthetic and real-world datasets validate our identifiability theory and the effectiveness of the proposed method in the finite-sample regime.", "keywords": "Linear Causal Model;Parameter Identification", "primary_area": "causal_inference", "supplementary_material": "", "author": "Xinshuai Dong;Ignavier Ng;Biwei Huang;Yuewen Sun;Songyao Jin;Roberto Legaspi;Peter Spirtes;Kun Zhang", "authorids": "~Xinshuai_Dong1;~Ignavier_Ng1;~Biwei_Huang1;~Yuewen_Sun1;~Songyao_Jin1;~Roberto_Legaspi1;~Peter_Spirtes1;~Kun_Zhang1", "gender": "M;M;F;F;M;M;M;M", "homepage": "https://dongxinshuai.github.io/;https://ignavierng.github.io/;;https://yuewen-sun.github.io/;https://github.com/Songyao-Jin;https://www.researchgate.net/profile/Roberto-Legaspi;https://www.cmu.edu/dietrich/philosophy/people/faculty/spirtes.html;http://www.andrew.cmu.edu/user/kunz1/", "dblp": "279/6151.html;251/3037;165/3288;219/9893;365/4229;296/0450.html;87/3550;96/3115-1", "google_scholar": "A7JyL1sAAAAJ;;;https://scholar.google.com/citations?hl=en;IPNzHfgAAAAJ;zE7Zhk0AAAAJ;mar1eCwAAAAJ;RGoypN4AAAAJ", "orcid": ";;;;;0000-0001-8909-635X;;", "linkedin": ";;;;songyao-jin-b97466223/;roberto-legaspi-5a3a4361/;;", "or_profile": "~Xinshuai_Dong1;~Ignavier_Ng1;~Biwei_Huang1;~Yuewen_Sun1;~Songyao_Jin1;~Roberto_Legaspi1;~Peter_Spirtes1;~Kun_Zhang1", "aff": "Carnegie Mellon University;Carnegie Mellon University;University of California, San Diego;Mohamed bin Zayed University of Artificial Intelligence;Mohamed bin Zayed University of Artificial Intelligence;KDDI Research, Inc.;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "cmu.edu;cmu.edu;ucsd.edu;mbzuai.ac.ae;mbzuai.ac.ae;kddi-research.jp;cmu.edu;cmu.edu", "position": "PhD student;PhD student;Assistant Professor;Postdoc;MS student;Researcher;Full Professor;Associate Professor", "bibtex": "@inproceedings{\ndong2024on,\ntitle={On the Parameter Identifiability of Partially Observed Linear Causal Models},\nauthor={Xinshuai Dong and Ignavier Ng and Biwei Huang and Yuewen Sun and Songyao Jin and Roberto Legaspi and Peter Spirtes and Kun Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=EQZlEfjrkV}\n}", "github": "", "reviewers": "PaWo;Ljpg;kYam;WKpu", "pdf_size": 2763911, "rating": "5;6;7;7", "confidence": "4;4;4;3", "soundness": "3;3;4;3", "novelty": "3;3;4;3", "presentation": "3;4;3;3", "wc_summary": "95;109;56;61", "wc_strengths": "67;111;63;143", "wc_weaknesses": "200;72;222;908", "wc_questions": "30;220;82;109", "wc_limitations": "1;36;18;7", "wc_review": "393;548;441;1228", "wc_reply_reviewers": "21;42;30;94", "wc_reply_authors": "117;43;45;332", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;2;3", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 80.25, 22.37604746151563 ], "wc_strengths_avg": [ 96.0, 33.03028912982749 ], "wc_weaknesses_avg": [ 350.5, 326.9292736969267 ], "wc_questions_avg": [ 110.25, 69.43477154855483 ], "wc_limitations_avg": [ 15.5, 13.313526955694348 ], "wc_review_avg": [ 652.5, 336.9692122434927 ], "wc_reply_reviewers_avg": [ 46.75, 28.27874643614883 ], "wc_reply_authors_avg": [ 134.25, 117.99867584002797 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:xK6QlpEpexIJ:scholar.google.com/&scioq=On+the+Parameter+Identifiability+of+Partially+Observed+Linear+Causal+Models&hl=en&as_sdt=0,5", "gs_version_total": 5, "email": "cmu.edu;cmu.edu;ucsd.edu;mbzuai.ac.ae;mbzuai.ac.ae;kddi-research.jp;cmu.edu;cmu.edu", "author_num": 8, "aff_unique_index": "0;0;1;2;2;3;0;0", "aff_unique_norm": "Carnegie Mellon University;University of California, San Diego;Mohamed bin Zayed University of Artificial Intelligence;KDDI Research", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.cmu.edu;https://www.ucsd.edu;https://mbzuai.ac.ae;https://www.kddi-research.com", "aff_unique_abbr": "CMU;UCSD;MBZUAI;KDDI", "aff_campus_unique_index": "1", "aff_campus_unique": ";San Diego", "aff_country_unique_index": "0;0;0;1;1;2;0;0", "aff_country_unique": "United States;United Arab Emirates;Japan" }, { "title": "HourVideo: 1-Hour Video-Language Understanding", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97793", "id": "EQhLbuitns", "proceeding": "", "pdf": "https://openreview.net/pdf?id=EQhLbuitns", "openreview": "https://openreview.net/forum?id=EQhLbuitns", "poster": "/media/PosterPDFs/NeurIPS%202024/97793.png?t=1733954873.3341289", "project": "", "author_site": "Keshigeyan Chandrasegaran, Agrim Gupta, Manling Li, Taran Kota, Lea M. Hadzic, Jimming He, Cristobal Eyzaguirre, Zane Durante, Jiajun Wu, Li Fei-Fei", "tldr": "", "abstract": "We present **HourVideo**, a benchmark dataset for hour-long video-language understanding. Our dataset consists of a novel task suite comprising summarization, perception (*recall*, *tracking*), visual reasoning (*spatial*, *temporal*, *predictive*, *causal*, *counterfactual*), and navigation (*room-to-room*, *object retrieval*) tasks. HourVideo includes 500 manually curated egocentric videos from the Ego4D dataset, spanning durations of 20 to 120 minutes, and features **12,976 high-quality, five-way multiple-choice questions**. Benchmarking results reveal that multimodal models, including GPT-4 and LLaVA-NeXT, achieve marginal improvements over random chance. In stark contrast, human experts significantly outperform the state-of-the-art long-context multimodal model, Gemini Pro 1.5 (85.0\\% vs. 37.3\\%), highlighting a substantial gap in multimodal capabilities. Our benchmark, evaluation toolkit, prompts, and documentation are available at https://hourvideo.stanford.edu.", "keywords": "multimodal understanding;long-form video-language understanding", "primary_area": "", "supplementary_material": "/attachment/1e6333f796291d8598be513c2f9cb34e95d89974.zip", "author": "Keshigeyan Chandrasegaran;Agrim Gupta;Lea M. Hadzic;Taran Kota;Jimming He;Cristobal Eyzaguirre;Zane Durante;Manling Li;Jiajun Wu;Li Fei-Fei", "authorids": "~Keshigeyan_Chandrasegaran1;~Agrim_Gupta1;~Lea_M._Hadzic1;~Taran_Kota1;~Jimming_He1;~Cristobal_Eyzaguirre1;~Zane_Durante1;~Manling_Li1;~Jiajun_Wu1;~Li_Fei-Fei1", "gender": "M;;F;M;M;;M;F;M;F", "homepage": "https://keshik6.github.io/;;;;https://www.linkedin.com/in/jimming-he/;;;https://limanling.github.io/;https://jiajunwu.com;https://profiles.stanford.edu/fei-fei-li", "dblp": "289/0842;200/8282;;;;;;178/3620;117/4768;79/2528", "google_scholar": "vh2Ywj8AAAAJ;AxzVaI8AAAAJ;Dz7W5nYAAAAJ;;;;qxH2dTsAAAAJ;6U4SXnUAAAAJ;2efgcS0AAAAJ;rDfyQnIAAAAJ", "orcid": ";;;;;;0000-0001-9038-8915;;0000-0002-4176-343X;", "linkedin": "keshigeyan-chandrasegaran/;;https://linkedin.com/in/lea-m-hadzic-bb4176265;taran-kota-695132191/;;;;;jiajunwu/;fei-fei-li-4541247/", "or_profile": "~Keshigeyan_Chandrasegaran1;~Agrim_Gupta1;~Lea_M._Hadzic1;~Taran_Kota1;~Jimming_He1;~Cristobal_Eyzaguirre1;~Zane_Durante1;~Manling_Li1;~Jiajun_Wu1;~Li_Fei-Fei1", "aff": "Stanford University;Stanford University;Computer Science Department, Stanford University;Stanford University;Stanford University;;Stanford University;Stanford University;Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;cs.stanford.edu;stanford.edu;stanford.edu;;stanford.edu;stanford.edu;stanford.edu;stanford.edu", "position": "PhD student;PhD student;Undergrad student;Undergrad student;Undergrad student;;PhD student;Postdoc;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nchandrasegaran2024hourvideo,\ntitle={HourVideo: 1-Hour Video-Language Understanding},\nauthor={Keshigeyan Chandrasegaran and Agrim Gupta and Lea M. Hadzic and Taran Kota and Jimming He and Cristobal Eyzaguirre and Zane Durante and Manling Li and Jiajun Wu and Li Fei-Fei},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=EQhLbuitns}\n}", "github": "", "reviewers": "Qx1u;rfPC;Z2rz;QtgE", "pdf_size": 7023350, "rating": "5;6;8;9", "confidence": "4;3;4;4", "wc_summary_and_contributions": "69;40;91;98", "wc_strengths": "73;4;50;92", "wc_improvement": "107;4;40;150", "wc_limitations": "1;1;104;3", "wc_correctness": "1;1;40;13", "wc_clarity": "1;1;9;1", "wc_relation_to_prior_work": "1;1;1;10", "wc_documentation": "1;1;30;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "255;54;366;369", "wc_reply_reviewers": "0;22;13;0", "wc_reply_authors": "128;33;137;22", "reply_reviewers": "0;1;1;0", "reply_authors": "4;2;5;2", "rating_avg": [ 7.0, 1.5811388300841898 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 74.5, 22.610838109190027 ], "wc_strengths_avg": [ 54.75, 32.85859857023729 ], "wc_improvement_avg": [ 75.25, 56.82154080980205 ], "wc_limitations_avg": [ 27.25, 44.31915500096995 ], "wc_correctness_avg": [ 13.75, 15.927570436196476 ], "wc_clarity_avg": [ 3.0, 3.4641016151377544 ], "wc_relation_to_prior_work_avg": [ 3.25, 3.897114317029974 ], "wc_documentation_avg": [ 8.25, 12.55736835487436 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 261.0, 128.0371039972398 ], "wc_reply_reviewers_avg": [ 8.75, 9.310612224768036 ], "wc_reply_authors_avg": [ 80.0, 52.73992794837702 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 3.25, 1.299038105676658 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.36514837167011077, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17446997022268653605&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "stanford.edu;stanford.edu;cs.stanford.edu;stanford.edu;stanford.edu;;stanford.edu;stanford.edu;stanford.edu;stanford.edu", "author_num": 10, "aff_unique_index": "0;0;0;0;0;0;0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0;0;0;0;0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Data subsampling for Poisson regression with pth-root-link", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96031", "id": "ES0Gj1KVUk", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ES0Gj1KVUk", "openreview": "https://openreview.net/forum?id=ES0Gj1KVUk", "poster": "/media/PosterPDFs/NeurIPS%202024/96031.png?t=1731235632.8044467", "project": "", "author_site": "Han Cheng Lie, Alexander Munteanu", "tldr": "", "abstract": "We develop and analyze data subsampling techniques for Poisson regression, the standard model for count data $y\\in\\mathbb{N}$. In particular, we consider the Poisson generalized linear model with ID- and square root-link functions. We consider the method of \\emph{coresets}, which are small weighted subsets that approximate the loss function of Poisson regression up to a factor of $1\\pm\\varepsilon$. We show $\\Omega(n)$ lower bounds against coresets for Poisson regression that continue to hold against arbitrary data reduction techniques up to logarithmic factors. By introducing a novel complexity parameter and a domain shifting approach, we show that sublinear coresets with $1\\pm\\varepsilon$ approximation guarantee exist when the complexity parameter is small. In particular, the dependence on the number of input points can be reduced to polylogarithmic. We show that the dependence on other input parameters can also be bounded sublinearly, though not always logarithmically. In particular, we show that the square root-link admits an $O(\\log(y_{\\max}))$ dependence, where $y_{\\max}$ denotes the largest count presented in the data, while the ID-link requires a $\\Theta(\\sqrt{y_{\\max}/\\log(y_{\\max})})$ dependence. As an auxiliary result for proving the tightness of the bound with respect to $y_{\\max}$ in the case of the ID-link, we show an improved bound on the principal branch of the Lambert $W_0$ function, which may be of independent interest. We further show the limitations of our analysis when $p$th degree root-link functions for $p\\geq 3$ are considered, which indicate that other analytical or computational methods would be required if such a generalization is even possible.", "keywords": "Poisson regression;subsampling;coresets;Lambert function", "primary_area": "learning_theory", "supplementary_material": "", "author": "Han Cheng Lie;Alexander Munteanu", "authorids": "~Han_Cheng_Lie2;~Alexander_Munteanu1", "gender": ";M", "homepage": ";https://biometrie.statistik.tu-dortmund.de/lehrstuhl/team/alexander-munteanu/", "dblp": ";145/3380", "google_scholar": ";https://scholar.google.de/citations?hl=en", "orcid": ";", "linkedin": ";", "or_profile": "~Han_Cheng_Lie2;~Alexander_Munteanu1", "aff": ";Universit\u00e4t K\u00f6ln", "aff_domain": ";uni-koeln.de", "position": ";Full Professor", "bibtex": "@inproceedings{\nlie2024data,\ntitle={Data subsampling for Poisson regression with pth-root-link},\nauthor={Han Cheng Lie and Alexander Munteanu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ES0Gj1KVUk}\n}", "github": "", "reviewers": "L5wz;73Rm;HZmN;SQxM", "pdf_size": 668712, "rating": "6;6;7;7", "confidence": "1;3;4;3", "soundness": "3;2;3;3", "novelty": "3;2;3;3", "presentation": "2;2;2;3", "wc_summary": "33;63;125;117", "wc_strengths": "6;44;39;99", "wc_weaknesses": "61;57;188;106", "wc_questions": "1;40;17;35", "wc_limitations": "7;71;2;2", "wc_review": "108;275;371;359", "wc_reply_reviewers": "9;11;544;10", "wc_reply_authors": "0;0;927;0", "reply_reviewers": "1;1;3;1", "reply_authors": "1;1;4;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 2.75, 1.0897247358851685 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 84.5, 38.11495769379785 ], "wc_strengths_avg": [ 47.0, 33.384127965247195 ], "wc_weaknesses_avg": [ 103.0, 52.7114788257738 ], "wc_questions_avg": [ 23.25, 15.433324334050653 ], "wc_limitations_avg": [ 20.5, 29.227555491350966 ], "wc_review_avg": [ 278.25, 105.02231905647484 ], "wc_reply_reviewers_avg": [ 143.5, 231.22986398819683 ], "wc_reply_authors_avg": [ 231.75, 401.4027746540873 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.6882472016116854, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:CaXXOy5XbasJ:scholar.google.com/&scioq=Data+subsampling+for+Poisson+regression+with+pth-root-link&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": ";uni-koeln.de", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "University of Cologne", "aff_unique_dep": "", "aff_unique_url": "https://www.uni-koeln.de/", "aff_unique_abbr": "UC", "aff_country_unique_index": "0", "aff_country_unique": "Germany" }, { "title": "PersonalSum: A User-Subjective Guided Personalized Summarization Dataset for Large Language Models", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97792", "id": "ETZk7lqyaF", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ETZk7lqyaF", "openreview": "https://openreview.net/forum?id=ETZk7lqyaF", "poster": "/media/PosterPDFs/NeurIPS%202024/97792.png?t=1730854382.6325576", "project": "", "author_site": "Lemei Zhang, Peng Liu, Marcus Henriksboe, Even Lauvrak, Jon Atle Gulla, Heri Ramampiaro", "tldr": "", "abstract": "With the rapid advancement of Natural Language Processing in recent years, numerous studies have shown that generic summaries generated by Large Language Models (LLMs) can sometimes surpass those annotated by experts, such as journalists, according to human evaluations. However, there is limited research on whether these generic summaries meet the individual needs of ordinary people. The biggest obstacle is the lack of human-annotated datasets from the general public. Existing work on personalized summarization often relies on pseudo datasets created from generic summarization datasets or controllable tasks that focus on specific named entities or other aspects, such as the length and specificity of generated summaries, collected from hypothetical tasks without the annotators' initiative. To bridge this gap, we propose a high-quality, personalized, manually annotated summarization dataset called PersonalSum. This dataset is the first to investigate whether the focus of public readers differs from the generic summaries generated by LLMs. It includes user profiles, personalized summaries accompanied by source sentences from given articles, and machine-generated generic summaries along with their sources. We investigate several personal signals \u2014 entities/topics, plot, and structure of articles\u2014that may affect the generation of personalized summaries using LLMs in a few-shot in-context learning scenario. Our preliminary results and analysis indicate that entities/topics are merely one of the key factors that impact the diverse preferences of users, and personalized summarization remains a significant challenge for existing LLMs.", "keywords": "Personalized Summarization;Pretrained Large Language Models;User Preferences;Summarization Dataset;Text Generation", "primary_area": "", "supplementary_material": "/attachment/830149942d26230ec8db5825764b326fa295d67f.pdf", "author": "Lemei Zhang;Peng Liu;Marcus Tiedemann Oekland Henriksboe;Even W. Lauvrak;Jon Atle Gulla;Heri Ramampiaro", "authorids": "~Lemei_Zhang1;~Peng_Liu8;~Marcus_Tiedemann_Oekland_Henriksboe1;~Even_W._Lauvrak1;~Jon_Atle_Gulla2;~Heri_Ramampiaro1", "gender": "F;M;M;M;M;M", "homepage": "https://www.ntnu.edu/employees/lemei.zhang;https://www.ntnu.edu/employees/penl;;;https://www.ntnu.edu/employees/jon.atle.gulla;https://folk.idi.ntnu.no/heri/", "dblp": ";21/6121-25;;;g/JonAtleGulla.html;r/HeriRamampiaro", "google_scholar": ";https://scholar.google.no/citations?user=CdygYjAAAAAJ;;;lq5InSEAAAAJ;https://scholar.google.no/citations?user=3H_HYnIAAAAJ", "orcid": "0000-0003-3037-4946;;;;;", "linkedin": ";;marcus-henriksbo/;even-lauvrak/;jon-atle-gulla-b8a884/;", "or_profile": "~Lemei_Zhang1;~Peng_Liu8;~Marcus_Tiedemann_Oekland_Henriksboe1;~Even_W._Lauvrak1;~Jon_Atle_Gulla2;~Heri_Ramampiaro1", "aff": "Norwegian University of Science and Technology;Norwegian University of Science and Technology;;Norwegian University of Science and Technology;Norwegian University of Science and Technology;Norwegian University of Science and Technology", "aff_domain": "ntnu.no;ntnu.no;;ntnu.no;ntnu.no;ntnu.no", "position": "Postdoc;Researcher;;MS student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nzhang2024personalsum,\ntitle={PersonalSum: A User-Subjective Guided Personalized Summarization Dataset for Large Language Models},\nauthor={Lemei Zhang and Peng Liu and Marcus Tiedemann Oekland Henriksboe and Even W. Lauvrak and Jon Atle Gulla and Heri Ramampiaro},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=ETZk7lqyaF}\n}", "github": "", "reviewers": "W8Qm;YJfx;dFop", "pdf_size": 2900117, "rating": "4;6;7", "confidence": "4;4;4", "wc_summary_and_contributions": "123;101;46", "wc_strengths": "43;74;33", "wc_improvement": "116;66;66", "wc_limitations": "84;20;30", "wc_correctness": "11;28;1", "wc_clarity": "5;6;1", "wc_relation_to_prior_work": "13;18;1", "wc_documentation": "49;11;1", "wc_additional_feedback": "1;1;1", "wc_review": "445;325;180", "wc_reply_reviewers": "0;0;28", "wc_reply_authors": "220;200;96", "reply_reviewers": "0;0;1", "reply_authors": "6;6;2", "rating_avg": [ 5.666666666666667, 1.247219128924647 ], "confidence_avg": [ 4.0, 0.0 ], "wc_summary_and_contributions_avg": [ 90.0, 32.38312317653544 ], "wc_strengths_avg": [ 50.0, 17.45470328211473 ], "wc_improvement_avg": [ 82.66666666666667, 23.570226039551585 ], "wc_limitations_avg": [ 44.666666666666664, 28.110891523077353 ], "wc_correctness_avg": [ 13.333333333333334, 11.145502331533658 ], "wc_clarity_avg": [ 4.0, 2.160246899469287 ], "wc_relation_to_prior_work_avg": [ 10.666666666666666, 7.133644853010899 ], "wc_documentation_avg": [ 20.333333333333332, 20.677416559027762 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 316.6666666666667, 108.3461530876334 ], "wc_reply_reviewers_avg": [ 9.333333333333334, 13.199326582148887 ], "wc_reply_authors_avg": [ 172.0, 54.35684562837202 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 4.666666666666667, 1.8856180831641267 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=225644145720222250&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "ntnu.no;ntnu.no;;ntnu.no;ntnu.no;ntnu.no", "author_num": 6, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Norwegian University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.ntnu.no", "aff_unique_abbr": "NTNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Norway" }, { "title": "Dual Defense: Enhancing Privacy and Mitigating Poisoning Attacks in Federated Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96030", "id": "EVw8Jh5Et9", "proceeding": "", "pdf": "https://openreview.net/pdf?id=EVw8Jh5Et9", "openreview": "https://openreview.net/forum?id=EVw8Jh5Et9", "poster": "", "project": "", "author_site": "Runhua Xu, Shiqi Gao, Chao Li, James Joshi, Jianxin Li", "tldr": "", "abstract": "Federated learning (FL) is inherently susceptible to privacy breaches and poisoning attacks. To tackle these challenges, researchers have separately devised secure aggregation mechanisms to protect data privacy and robust aggregation methods that withstand poisoning attacks. However, simultaneously addressing both concerns is challenging; secure aggregation facilitates poisoning attacks as most anomaly detection techniques require access to unencrypted local model updates, which are obscured by secure aggregation. Few recent efforts to simultaneously tackle both challenges offen depend on impractical assumption of non-colluding two-server setups that disrupt FL's topology, or three-party computation which introduces scalability issues, complicating deployment and application. To overcome this dilemma, this paper introduce a Dual Defense Federated learning (DDFed) framework. DDFed simultaneously boosts privacy protection and mitigates poisoning attacks, without introducing new participant roles or disrupting the existing FL topology. DDFed initially leverages cutting-edge fully homomorphic encryption (FHE) to securely aggregate model updates, without the impractical requirement for non-colluding two-server setups and ensures strong privacy protection. Additionally, we proposes a unique two-phase anomaly detection mechanism for encrypted model updates, featuring secure similarity computation and feedback-driven collaborative selection, with additional measures to prevent potential privacy breaches from Byzantine clients incorporated into the detection process. We conducted extensive experiments on various model poisoning attacks and FL scenarios, including both cross-device and cross-silo FL. Experiments on publicly available datasets demonstrate that DDFed successfully protects model privacy and effectively defends against model poisoning threats.", "keywords": "federated learning;privacy preservation;secure aggregation;model poisoning attack", "primary_area": "privacy", "supplementary_material": "", "author": "Runhua Xu;Shiqi Gao;Chao Li;James Joshi;Jianxin Li", "authorids": "~Runhua_Xu1;~Shiqi_Gao2;~Chao_Li40;~James_Joshi1;~Jianxin_Li3", "gender": "M;M;M;M;M", "homepage": "https://runhua.me/;https://orcid.org/0009-0007-1483-5550;http://www.lichao.work/;https://www.sis.pitt.edu/jjoshi/;http://myjianxin.github.io", "dblp": "136/6673;;66/190-23;j/JamesJoshi.html;l/JianxinLi-2.html", "google_scholar": "tst3wGMAAAAJ;;https://scholar.google.com.hk/citations?user=5FXndBwAAAAJ;veOn_3UAAAAJ;EY2lqD0AAAAJ", "orcid": "0000-0003-4541-9764;0009-0007-1483-5550;;0000-0003-4519-9802;0000-0001-5152-0055", "linkedin": ";;;james-joshi-8b4b75/;", "or_profile": "~Runhua_Xu1;~Shiqi_Gao2;~Chao_Li40;~James_Joshi1;~Jianxin_Li3", "aff": "Beihang University;Beihang University;Beijing Jiaotong University;University of Pittsburgh;Beihang University ", "aff_domain": "buaa.edu.cn;buaa.edu.cn;bjtu.edu.cn;pitt.edu;buaa.edu.cn", "position": "Full Professor;PhD student;Associate Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nxu2024dual,\ntitle={Dual Defense: Enhancing Privacy and Mitigating Poisoning Attacks in Federated Learning},\nauthor={Runhua Xu and Shiqi Gao and Chao Li and James Joshi and Jianxin Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=EVw8Jh5Et9}\n}", "github": "", "reviewers": "65Lv;VKjn;zJWB;LqQw", "pdf_size": 1178056, "rating": "3;5;6;6", "confidence": "4;3;4;5", "soundness": "2;3;3;3", "novelty": "2;3;3;2", "presentation": "2;3;3;3", "wc_summary": "44;50;68;67", "wc_strengths": "19;81;72;55", "wc_weaknesses": "180;24;282;78", "wc_questions": "3;134;108;39", "wc_limitations": "5;2;60;1", "wc_review": "251;291;590;240", "wc_reply_reviewers": "644;0;301;23", "wc_reply_authors": "1616;611;1336;724", "reply_reviewers": "2;0;3;1", "reply_authors": "3;1;3;2", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 57.25, 10.473180032826706 ], "wc_strengths_avg": [ 56.75, 23.710493457539005 ], "wc_weaknesses_avg": [ 141.0, 98.81801455200362 ], "wc_questions_avg": [ 71.0, 52.407060593015515 ], "wc_limitations_avg": [ 17.0, 24.869660230891775 ], "wc_review_avg": [ 343.0, 143.86278184436725 ], "wc_reply_reviewers_avg": [ 242.0, 260.58108143148075 ], "wc_reply_authors_avg": [ 1071.75, 418.1078658671707 ], "reply_reviewers_avg": [ 1.5, 1.118033988749895 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.28867513459481287, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=691734235922692869&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 4, "email": "buaa.edu.cn;buaa.edu.cn;bjtu.edu.cn;pitt.edu;buaa.edu.cn", "author_num": 5, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Beihang University;Beijing Jiao Tong University;University of Pittsburgh", "aff_unique_dep": ";;", "aff_unique_url": "http://www.buaa.edu.cn/;http://www.njtu.edu.cn/en;https://www.pitt.edu", "aff_unique_abbr": "BUAA;BJTU;Pitt", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "China;United States" }, { "title": "Aligning Target-Aware Molecule Diffusion Models with Exact Energy Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96029", "id": "EWcvxXtzNu", "proceeding": "", "pdf": "https://openreview.net/pdf?id=EWcvxXtzNu", "openreview": "https://openreview.net/forum?id=EWcvxXtzNu", "poster": "/media/PosterPDFs/NeurIPS%202024/96029.png?t=1731748346.3351376", "project": "", "author_site": "Siyi Gu, Minkai Xu, Alexander Powers, Weili Nie, Tomas Geffner, Karsten Kreis, Jure Leskovec, Arash Vahdat, Stefano Ermon", "tldr": "", "abstract": "Generating ligand molecules for specific protein targets, known as structure-based drug design, is a fundamental problem in therapeutics development and biological discovery. Recently, target-aware generative models, especially diffusion models, have shown great promise in modeling protein-ligand interactions and generating candidate drugs. However, existing models primarily focus on learning the chemical distribution of all drug candidates, which lacks effective steerability on the chemical quality of model generations. In this paper, we propose a novel and general alignment framework to align pretrained target diffusion models with preferred functional properties, named AliDiff. AliDiff shifts the target-conditioned chemical distribution towards regions with higher binding affinity and structural rationality, specified by user-defined reward functions, via the preference optimization approach. To avoid the overfitting problem in common preference optimization objectives, we further develop an improved Exact Energy Preference Optimization method to yield an exact and efficient alignment of the diffusion models, and provide the closed-form expression for the converged distribution. Empirical studies on the CrossDocked2020 benchmark show that AliDiff can generate molecules with state-of-the-art binding energies with up to -7.07 Avg. Vina Score, while maintaining strong molecular properties. Code is available at https://github.com/MinkaiXu/AliDiff.", "keywords": "Diffusion Models;Reinforcement Learning from Human Feedback;Computational Biology", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "/attachment/679288f0a4b59f6bfe8468541a642c2130eb055f.zip", "author": "Siyi Gu;Minkai Xu;Alexander S Powers;Weili Nie;Tomas Geffner;Karsten Kreis;Jure Leskovec;Arash Vahdat;Stefano Ermon", "authorids": "~Siyi_Gu1;~Minkai_Xu1;~Alexander_S_Powers1;~Weili_Nie1;~Tomas_Geffner1;~Karsten_Kreis1;~Jure_Leskovec1;~Arash_Vahdat3;~Stefano_Ermon1", "gender": "F;M;M;M;M;;;M;M", "homepage": "https://carriegu0818.github.io/;https://minkaixu.com;;https://weilinie.github.io/;https://people.umass.edu/tgeffner/;https://karstenkreis.github.io/;http://cs.stanford.edu/~jure/;http://latentspace.cc/;http://cs.stanford.edu/~ermon/", "dblp": "323/5349;257/3355;;147/4786;201/5406;238/6834;l/JureLeskovec;92/8108;47/8135", "google_scholar": ";https://scholar.google.com/citations?hl=en;1xsooTYAAAAJ;zW7BH7oAAAAJ;KIIe2K8AAAAJ;https://scholar.google.de/citations?user=rFd-DiAAAAAJ;Q_kKkIUAAAAJ;https://scholar.google.ca/citations?user=p9-nlRIAAAAJ;", "orcid": ";;;;;;0000-0002-5411-923X;;", "linkedin": ";;;;tomasgeffner/;karstenkreis;leskovec/;;", "or_profile": "~Siyi_Gu1;~Minkai_Xu1;~Alexander_S_Powers1;~Weili_Nie1;~Tomas_Geffner1;~Karsten_Kreis1;~Jure_Leskovec1;~Arash_Vahdat3;~Stefano_Ermon1", "aff": "Stanford University;Stanford University;;NVIDIA;NVIDIA;NVIDIA;Kumo.AI;NVIDIA;Stanford University", "aff_domain": "stanford.edu;stanford.edu;;nvidia.com;nvidia.com;nvidia.com;kumo.ai;nvidia.com;stanford.edu", "position": "MS student;PhD student;;Research Scientist;Researcher;Research Scientist;Chief Scientist;Research Scientist;Associate Professor", "bibtex": "@inproceedings{\ngu2024aligning,\ntitle={Aligning Target-Aware Molecule Diffusion Models with Exact Energy Optimization},\nauthor={Siyi Gu and Minkai Xu and Alexander S Powers and Weili Nie and Tomas Geffner and Karsten Kreis and Jure Leskovec and Arash Vahdat and Stefano Ermon},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=EWcvxXtzNu}\n}", "github": "", "reviewers": "5bxb;bNcB;fWJ4;FKJR", "pdf_size": 6264208, "rating": "4;5;6;7", "confidence": "3;2;4;4", "soundness": "2;3;3;3", "novelty": "3;3;3;3", "presentation": "2;2;4;4", "wc_summary": "67;93;45;67", "wc_strengths": "62;59;73;74", "wc_weaknesses": "65;89;47;34", "wc_questions": "6;42;2;98", "wc_limitations": "3;1;1;23", "wc_review": "203;284;168;296", "wc_reply_reviewers": "0;45;21;35", "wc_reply_authors": "204;44;44;41", "reply_reviewers": "0;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 1.0 ], "wc_summary_avg": [ 68.0, 17.0 ], "wc_strengths_avg": [ 67.0, 6.59545297913646 ], "wc_weaknesses_avg": [ 58.75, 20.64430914319973 ], "wc_questions_avg": [ 37.0, 38.50973902793941 ], "wc_limitations_avg": [ 7.0, 9.273618495495704 ], "wc_review_avg": [ 237.75, 53.862672603575845 ], "wc_reply_reviewers_avg": [ 25.25, 16.887495373796554 ], "wc_reply_authors_avg": [ 83.25, 69.7258022542588 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.674199862463242, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11857292261879287587&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "stanford.edu;stanford.edu;;nvidia.com;nvidia.com;nvidia.com;kumo.ai;nvidia.com;stanford.edu", "author_num": 9, "aff_unique_index": "0;0;1;1;1;2;1;0", "aff_unique_norm": "Stanford University;NVIDIA;Kumo.AI", "aff_unique_dep": ";NVIDIA Corporation;", "aff_unique_url": "https://www.stanford.edu;https://www.nvidia.com;https://www.kumo.ai", "aff_unique_abbr": "Stanford;NVIDIA;Kumo.AI", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "The Multimodal Universe: Enabling Large-Scale Machine Learning with 100 TB of Astronomical Scientific Data", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97791", "id": "EWm9zR5Qy1", "proceeding": "", "pdf": "https://openreview.net/pdf?id=EWm9zR5Qy1", "openreview": "https://openreview.net/forum?id=EWm9zR5Qy1", "poster": "/media/PosterPDFs/NeurIPS%202024/97791.png?t=1733789192.8761983", "project": "", "author_site": "Eirini Angeloudi, Jeroen Audenaert, Micah Bowles, Benjamin M. Boyd, David Chemaly, Brian Cherinka, Ioana Ciuc\u0103, Miles Cranmer, Aaron Do, Matthew Grayling, Erin E. Hayes, Tom Hehir, Shirley Ho, Marc Huertas-Company, Kartheik Iyer, Maja Jablonska, Francois Lanusse, Henry Leung, Kaisey Mandel, Rafael Mart\u00ednez-Galarza, Peter Melchior, Lucas Meyer, Liam Parker, Helen Qu, Jeff Shen, Michael Smith, Connor Stone, Mike Walmsley, John Wu", "tldr": "", "abstract": "We present the `Multimodal Universe`, a large-scale multimodal dataset of scientific astronomical data, compiled specifically to facilitate machine learning research. Overall, our dataset contains hundreds of millions of astronomical observations, constituting 100TB of multi-channel and hyper-spectral images, spectra, multivariate time series, as well as a wide variety of associated scientific measurements and metadata. In addition, we include a range of benchmark tasks representative of standard practices for machine learning methods in astrophysics. This massive dataset will enable the development of large multi-modal models specifically targeted towards scientific applications. All codes used to compile the dataset, and a description of how to access the data is available at https://github.com/MultimodalUniverse/MultimodalUniverse", "keywords": "Multimodal Dataset;Open Dataset;Scientific Applications;Astrophysics", "primary_area": "", "supplementary_material": "", "author": "Eirini Angeloudi;Jeroen Audenaert;Micah Bowles;Benjamin M. Boyd;David Chemaly;Brian Cherinka;Ioana Ciuca;Miles Cranmer;Aaron Do;Matthew Grayling;Erin Elizabeth Hayes;Tom Hehir;Shirley Ho;Marc Huertas-Company;Kartheik G. Iyer;Maja Jablonska;Francois Lanusse;Henry W. Leung;Kaisey Mandel;Juan Rafael Mart\u00ednez-Galarza;Peter Melchior;Lucas Thibaut Meyer;Liam Holden Parker;Helen Qu;Jeff Shen;Michael J. Smith;Connor Stone;Mike Walmsley;John F Wu", "authorids": "~Eirini_Angeloudi1;~Jeroen_Audenaert1;~Micah_Bowles1;~Benjamin_M._Boyd1;~David_Chemaly1;~Brian_Cherinka1;~Ioana_Ciuca1;~Miles_Cranmer2;~Aaron_Do1;~Matthew_Grayling1;~Erin_Elizabeth_Hayes1;~Tom_Hehir1;~Shirley_Ho2;~Marc_Huertas-Company1;~Kartheik_G._Iyer1;~Maja_Jablonska1;~Francois_Lanusse2;~Henry_W._Leung1;~Kaisey_Mandel1;~Juan_Rafael_Mart\u00ednez-Galarza1;~Peter_Melchior1;~Lucas_Thibaut_Meyer1;~Liam_Holden_Parker1;~Helen_Qu1;~Jeff_Shen1;~Michael_J._Smith1;~Connor_Stone1;~Mike_Walmsley1;~John_F_Wu1", "gender": "F;;M;;M;;F;;M;M;F;;;M;M;F;M;M;M;M;;M;M;;M;;M;M;M", "homepage": ";;;;https://david-chemaly.github.io/;;https://www.iciuca.com;https://astroautomata.com/;;;https://www.ast.cam.ac.uk/people/erin.hayes;https://www.ast.cam.ac.uk/people/thomas.hehir;https://www.shirleyho.space/;https://mhuertascompany.weebly.com/;https://kartheikiyer.com/;;https://flanusse.net;https://henrysky.github.io/;https://www.ast.cam.ac.uk/people/kaisey.mandel;https://lweb.cfa.harvard.edu/~jmartine/Welcome.html;https://pmelchior.net;https://ltmeyer.github.io/;;https://www.helenqu.com;https://jshen.net;https://mjjsmith.com;https://connorjstone.com/;https://www.walmsley.dev;https://jwuphysics.github.io", "dblp": ";;280/0063;;;;;205/2493;;;;;162/2218;;;;;;;;205/3171;354/3369;;317/0339;;;;241/7149.html;", "google_scholar": "w_nSCywAAAAJ;nlCcKhwAAAAJ;Q7ziv7YAAAAJ;;;;tvfcVIgAAAAJ;10WfwCQAAAAJ;;;;;fhOi--4AAAAJ;;YcYe9bcAAAAJ;2Uya0rgAAAAJ;fRDCooIAAAAJ;;;uL75PPwAAAAJ;https://scholar.google.com/citations?view_op=list_works;;CjYa0N4AAAAJ;;1A7poIcAAAAJ;Uod9ut4AAAAJ;;https://scholar.google.co.uk/citations?user=mfQ57x8AAAAJ;bhxxaPwAAAAJ", "orcid": ";0000-0002-4371-3460;;0000-0002-0622-1117;0009-0001-4503-3071;0000-0002-4289-7923;0000-0001-6823-5453;0000-0002-6458-3423;0000-0003-3429-7845;0000-0002-6741-983X;0000-0003-3847-0780;0009-0002-4149-9736;;0000-0002-1416-8483;0000-0001-9298-3523;0000-0001-6962-4979;;0000-0002-0036-2752;;0000-0002-5069-0324;0000-0002-8873-5065;;;;0000-0001-6662-7306;0000-0003-0220-5125;0000-0002-9086-6398;0000-0002-6408-4181;0000-0002-5077-881X", "linkedin": ";;;benboyd97/;davidchemaly/;;ioana-ciuc%C4%83-11787025/;milescranmer/;;;;;;;;;;henry-leung-2664b3259/;;;;lucas-meyer-a7983b103/;liam-parker-96ba14160/;;;;connor-stone-9b970959/;m1kewalmsley/;jwuphysics/", "or_profile": "~Eirini_Angeloudi1;~Jeroen_Audenaert1;~Micah_Bowles1;~Benjamin_M._Boyd1;~David_Chemaly1;~Brian_Cherinka1;~Ioana_Ciuca1;~Miles_Cranmer2;~Aaron_Do1;~Matthew_Grayling1;~Erin_Elizabeth_Hayes1;~Tom_Hehir1;~Shirley_Ho2;~Marc_Huertas-Company1;~Kartheik_G._Iyer1;~Maja_Jablonska1;~Francois_Lanusse2;~Henry_W._Leung1;~Kaisey_Mandel1;~Juan_Rafael_Mart\u00ednez-Galarza1;~Peter_Melchior1;~Lucas_Thibaut_Meyer1;~Liam_Holden_Parker1;~Helen_Qu1;~Jeff_Shen1;~Michael_J._Smith1;~Connor_Stone1;~Mike_Walmsley1;~John_F_Wu1", "aff": "Instituto de Astrof\u00edsica de Canarias;Massachusetts Institute of Technology;University of Manchester;University of Cambridge;University of Cambridge;Space Telescope Science Institute;Australian National University;University of Cambridge;University of Hawaii System;University of Cambridge;University of Cambridge;University of Cambridge;Carnegie Mellon University;Institute of Astrophysics of the Canary Islands;Columbia University;Australian National University;CNRS;University of Toronto;University of Cambridge;Harvard University;Princeton University;INRIA;Flatiron Institute;University of Pennsylvania;Princeton University;Aspia Space;Universit\u00e9 de Montr\u00e9al;University of Toronto;Space Telescope Science Institute", "aff_domain": "iac.es;mit.edu;manchester.ac.uk;cam.ac.uk;cam.ac.uk;stsci.edu;anu.edu.au;cam.ac.uk;hawaii.edu;cam.ac.uk;cam.ac.uk;cam.ac.uk;cmu.edu;iac.es;columbia.edu;anu.edu.au;cnrs.fr;utoronto.ca;cam.ac.uk;harvard.edu;princeton.edu;inria.fr;simonsfoundation.org;upenn.edu;princeton.edu;aspiaspace.com;umontreal.ca;utoronto.ca;stsci.edu", "position": "PhD student;Postdoc;PhD student;PhD student;PhD student;Researcher;Lecturer;Assistant Professor;PhD student;Postdoc;PhD student;PhD student;Associate Professor;Researcher;Postdoc;PhD student;Researcher;PhD student;Full Professor;Researcher;Assistant Professor;PhD student;Researcher;PhD student;PhD student;Researcher;Postdoc;Postdoc;Researcher", "bibtex": "@inproceedings{\nangeloudi2024the,\ntitle={The Multimodal Universe: Enabling Large-Scale Machine Learning with 100{TB} of Astronomical Scientific Data},\nauthor={Eirini Angeloudi and Jeroen Audenaert and Micah Bowles and Benjamin M. Boyd and David Chemaly and Brian Cherinka and Ioana Ciuca and Miles Cranmer and Aaron Do and Matthew Grayling and Erin Elizabeth Hayes and Tom Hehir and Shirley Ho and Marc Huertas-Company and Kartheik G. Iyer and Maja Jablonska and Francois Lanusse and Henry W. Leung and Kaisey Mandel and Juan Rafael Mart{\\'\\i}nez-Galarza and Peter Melchior and Lucas Thibaut Meyer and Liam Holden Parker and Helen Qu and Jeff Shen and Michael J. Smith and Connor Stone and Mike Walmsley and John F Wu},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=EWm9zR5Qy1}\n}", "github": "", "reviewers": "JFFL;iM3x;a5Ep", "pdf_size": 7148370, "rating": "6;7;7", "confidence": "3;3;3", "wc_summary_and_contributions": "42;83;222", "wc_strengths": "54;59;9", "wc_improvement": "19;48;29", "wc_limitations": "7;36;12", "wc_correctness": "5;30;1", "wc_clarity": "5;5;1", "wc_relation_to_prior_work": "8;25;1", "wc_documentation": "23;29;1", "wc_additional_feedback": "1;1;1", "wc_review": "164;316;277", "wc_reply_reviewers": "33;34;171", "wc_reply_authors": "0;31;0", "reply_reviewers": "1;1;1", "reply_authors": "1;2;1", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.0 ], "wc_summary_and_contributions_avg": [ 115.66666666666667, 77.02957584951092 ], "wc_strengths_avg": [ 40.666666666666664, 22.484562605386735 ], "wc_improvement_avg": [ 32.0, 12.027745701779143 ], "wc_limitations_avg": [ 18.333333333333332, 12.657891697365017 ], "wc_correctness_avg": [ 12.0, 12.832251036613439 ], "wc_clarity_avg": [ 3.6666666666666665, 1.8856180831641267 ], "wc_relation_to_prior_work_avg": [ 11.333333333333334, 10.077477638553983 ], "wc_documentation_avg": [ 17.666666666666668, 12.036980056845191 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 252.33333333333334, 64.45842760174102 ], "wc_reply_reviewers_avg": [ 79.33333333333333, 64.81940724470994 ], "wc_reply_authors_avg": [ 10.333333333333334, 14.613540144521982 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 29, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14817376917192111475&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "iac.es;mit.edu;manchester.ac.uk;cam.ac.uk;cam.ac.uk;stsci.edu;anu.edu.au;cam.ac.uk;hawaii.edu;cam.ac.uk;cam.ac.uk;cam.ac.uk;cmu.edu;iac.es;columbia.edu;anu.edu.au;cnrs.fr;utoronto.ca;cam.ac.uk;harvard.edu;princeton.edu;inria.fr;simonsfoundation.org;upenn.edu;princeton.edu;aspiaspace.com;umontreal.ca;utoronto.ca;stsci.edu", "author_num": 29, "aff_unique_index": "0;1;2;3;3;4;5;3;6;3;3;3;7;8;9;5;10;11;3;12;13;14;15;16;13;17;18;11;4", "aff_unique_norm": "Instituto de Astrof\u00edsica de Canarias;Massachusetts Institute of Technology;University of Manchester;University of Cambridge;Space Telescope Science Institute;Australian National University;University of Hawaii;Carnegie Mellon University;Institute of Astrophysics of the Canary Islands;Columbia University;Centre National de la Recherche Scientifique;University of Toronto;Harvard University;Princeton University;INRIA;Flatiron Institute;University of Pennsylvania;Aspia Space;Universit\u00e9 de Montr\u00e9al", "aff_unique_dep": ";;;;;;;;;;;;;;;;;;", "aff_unique_url": "https://www.iac.es;https://web.mit.edu;https://www.manchester.ac.uk;https://www.cam.ac.uk;https://www.stsci.edu;https://www.anu.edu.au;https://www.hawaii.edu;https://www.cmu.edu;http://www.iac.es;https://www.columbia.edu;https://www.cnrs.fr;https://www.utoronto.ca;https://www.harvard.edu;https://www.princeton.edu;https://www.inria.fr;https://flatironinstitute.org;https://www.upenn.edu;;https://www.umontreal.ca", "aff_unique_abbr": "IAC;MIT;UoM;Cambridge;STScI;ANU;UH;CMU;IAC;Columbia;CNRS;U of T;Harvard;Princeton;INRIA;Flatiron;UPenn;;UdeM", "aff_campus_unique_index": "1;1;1;1;1;1;1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;1;2;2;2;1;3;2;1;2;2;2;1;0;1;3;4;5;2;1;1;4;1;1;1;6;5;5;1", "aff_country_unique": "Spain;United States;United Kingdom;Australia;France;Canada;Unknown" }, { "title": "Rethinking Out-of-Distribution Detection on Imbalanced Data Distribution", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96028", "id": "EWxNEnFjKR", "proceeding": "", "pdf": "https://openreview.net/pdf?id=EWxNEnFjKR", "openreview": "https://openreview.net/forum?id=EWxNEnFjKR", "poster": "/media/PosterPDFs/NeurIPS%202024/96028.png?t=1731678283.0720453", "project": "", "author_site": "Kai Liu, Zhihang Fu, Sheng Jin, Chao Chen, Ze Chen, Rongxin Jiang, Fan Zhou, Yaowu Chen, Jieping Ye", "tldr": "", "abstract": "Detecting and rejecting unknown out-of-distribution (OOD) samples is critical for deployed neural networks to void unreliable predictions. In real-world scenarios, however, the efficacy of existing OOD detection methods is often impeded by the inherent imbalance of in-distribution (ID) data, which causes significant performance decline. Through statistical observations, we have identified two common challenges faced by different OOD detectors: misidentifying tail class ID samples as OOD, while erroneously predicting OOD samples as head class from ID. To explain this phenomenon, we introduce a generalized statistical framework, termed ImOOD, to formulate the OOD detection problem on imbalanced data distribution. Consequently, the theoretical analysis reveals that there exists a class-aware bias item between balanced and imbalanced OOD detection, which contributes to the performance gap. Building upon this finding, we present a unified training-time regularization technique to mitigate the bias and boost imbalanced OOD detectors across architecture designs. Our theoretically grounded method translates into consistent improvements on the representative CIFAR10-LT, CIFAR100-LT, and ImageNet-LT benchmarks against several state-of-the-art OOD detection ap- proaches. Code is available at https://github.com/alibaba/imood.", "keywords": "Out-of-Distribution Detection;Imbalanced Recognition", "primary_area": "machine_vision", "supplementary_material": "", "author": "Kai Liu;Zhihang Fu;Sheng Jin;Chao Chen;Ze Chen;Rongxin Jiang;Fan Zhou;Yaowu Chen;Jieping Ye", "authorids": "~Kai_Liu8;~Zhihang_Fu1;~Sheng_Jin3;~Chao_Chen19;~Ze_Chen3;~Rongxin_Jiang1;~Fan_Zhou13;~Yaowu_Chen2;~Jieping_Ye4", "gender": "M;M;M;M;M;M;;;M", "homepage": "https://kail8.github.io/;https://zhihangfu.top/;;https://chaochen.cc/;;https://person.zju.edu.cn/0008430;https://person.zju.edu.cn/fanzhou;https://person.zju.edu.cn/0088219;http://yelabs.net/", "dblp": ";207/1894;70/6780-2;66/3019-26.html;15/4184-1;10/2064-1;;;03/5454", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;e_e3Ur0AAAAJ;https://scholar.google.com/citations?view_op=list_works;https://scholar.google.com.hk/citations?user=_xDUAtQAAAAJ;hVqKZq0AAAAJ;;;;T9AzhwcAAAAJ", "orcid": ";;0000-0001-7254-1664;;;;;;0000-0001-8662-5818", "linkedin": ";;;;;;;;", "or_profile": "~Kai_Liu8;~Zhihang_Fu1;~Sheng_Jin3;~Chao_Chen19;~Ze_Chen3;~Rongxin_Jiang1;~Fan_Zhou13;~Yaowu_Chen2;~Jieping_Ye4", "aff": "Alibaba Group;Alibaba Group;Nanyang Technological University;Alibaba Group;Alibaba Group;Zhejiang University;Zhejiang University;Zhejiang University;Alibaba Group", "aff_domain": "alibaba-inc.com;alibaba-inc.com;ntu.edu.sg;alibaba-inc.com;alibaba-inc.com;zju.edu.cn;zju.edu.cn;zju.edu.cn;alibaba-inc.com", "position": "Intern;Researcher;Postdoc;Researcher;Researcher;Researcher;Full Professor;Full Professor;Principal Researcher", "bibtex": "@inproceedings{\nliu2024rethinking,\ntitle={Rethinking Out-of-Distribution Detection on Imbalanced Data Distribution},\nauthor={Kai Liu and Zhihang Fu and Sheng Jin and Chao Chen and Ze Chen and Rongxin Jiang and Fan Zhou and Yaowu Chen and Jieping Ye},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=EWxNEnFjKR}\n}", "github": "", "reviewers": "XZSm;QTK3;E388;5Lex", "pdf_size": 637664, "rating": "5;5;6;6", "confidence": "3;4;3;3", "soundness": "3;2;3;3", "novelty": "2;2;3;3", "presentation": "3;2;3;3", "wc_summary": "45;77;112;46", "wc_strengths": "37;64;45;72", "wc_weaknesses": "82;106;92;34", "wc_questions": "98;69;5;19", "wc_limitations": "1;5;5;2", "wc_review": "263;321;259;173", "wc_reply_reviewers": "276;9;0;14", "wc_reply_authors": "423;99;62;28", "reply_reviewers": "1;1;0;1", "reply_authors": "3;3;2;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 70.0, 27.44995446262161 ], "wc_strengths_avg": [ 54.5, 14.080127840328723 ], "wc_weaknesses_avg": [ 78.5, 27.069355367278327 ], "wc_questions_avg": [ 47.75, 37.51916177102042 ], "wc_limitations_avg": [ 3.25, 1.7853571071357126 ], "wc_review_avg": [ 254.0, 52.81098370604357 ], "wc_reply_reviewers_avg": [ 74.75, 116.29998925193415 ], "wc_reply_authors_avg": [ 153.0, 157.89395175243413 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:DPWkKruRee0J:scholar.google.com/&scioq=Rethinking+Out-of-Distribution+Detection+on+Imbalanced+Data+Distribution&hl=en&as_sdt=0,10", "gs_version_total": 4, "email": "alibaba-inc.com;alibaba-inc.com;ntu.edu.sg;alibaba-inc.com;alibaba-inc.com;zju.edu.cn;zju.edu.cn;zju.edu.cn;alibaba-inc.com", "author_num": 9, "aff_unique_index": "0;0;1;0;0;2;2;2;0", "aff_unique_norm": "Alibaba Group;Nanyang Technological University;Zhejiang University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.alibaba.com;https://www.ntu.edu.sg;https://www.zju.edu.cn", "aff_unique_abbr": "Alibaba;NTU;ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;0;0;0;0", "aff_country_unique": "China;Singapore" }, { "title": "Enhancing Feature Diversity Boosts Channel-Adaptive Vision Transformers", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96027", "id": "EXuv4tVNa3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=EXuv4tVNa3", "openreview": "https://openreview.net/forum?id=EXuv4tVNa3", "poster": "/media/PosterPDFs/NeurIPS%202024/96027.png?t=1731443890.6093113", "project": "", "author_site": "Chau Pham, Bryan Plummer", "tldr": "", "abstract": "Multi-Channel Imaging (MCI) contains an array of challenges for encoding useful feature representations not present in traditional images. For example, images from two different satellites may both contain RGB channels, but the remaining channels can be different for each imaging source. Thus, MCI models must support a variety of channel configurations at test time. Recent work has extended traditional visual encoders for MCI, such as Vision Transformers (ViT), by supplementing pixel information with an encoding representing the channel configuration. However, these methods treat each channel equally, i.e., they do not consider the unique properties of each channel type, which can result in needless and potentially harmful redundancies in the learned features. For example, if RGB channels are always present, the other channels can focus on extracting information that cannot be captured by the RGB channels. To this end, we propose DiChaViT, which aims to enhance the diversity in the learned features of MCI-ViT models. This is achieved through a novel channel sampling strategy that encourages the selection of more distinct channel sets for training. Additionally, we employ regularization and initialization techniques to increase the likelihood that new information is learned from each channel. Many of our improvements are architecture agnostic and can be incorporated into new architectures as they are developed. Experiments on both satellite and cell microscopy datasets, CHAMMI, JUMP-CP, and So2Sat, report DiChaViT yields a 1.5 - 5.0% gain over the state-of-the-art. Our code is publicly available at https://github.com/chaudatascience/diverse_channel_vit.", "keywords": "vision transformer;representation learning;multi-channel imaging", "primary_area": "machine_vision", "supplementary_material": "", "author": "Chau Pham;Bryan A. Plummer", "authorids": "~Chau_Pham1;~Bryan_A._Plummer1", "gender": "M;M", "homepage": "http://mchaupham.com;http://bryanplummer.com/", "dblp": "259/7023-1;163/2330", "google_scholar": "fu-qT-wAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";", "linkedin": "chau-pham-bu;", "or_profile": "~Chau_Pham1;~Bryan_Allen_Plummer1", "aff": "Boston University;Boston University", "aff_domain": "bu.edu;bu.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\npham2024enhancing,\ntitle={Enhancing Feature Diversity Boosts Channel-Adaptive Vision Transformers},\nauthor={Chau Pham and Bryan A. Plummer},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=EXuv4tVNa3}\n}", "github": "", "reviewers": "K1xF;gjzo;4kDP;q1Xa", "pdf_size": 1912689, "rating": "5;6;7;7", "confidence": "4;5;4;3", "soundness": "2;3;4;3", "novelty": "3;2;3;2", "presentation": "3;3;4;4", "wc_summary": "260;175;41;154", "wc_strengths": "116;153;14;105", "wc_weaknesses": "417;249;228;321", "wc_questions": "227;102;4;4", "wc_limitations": "229;30;4;10", "wc_review": "1249;709;291;594", "wc_reply_reviewers": "0;47;44;55", "wc_reply_authors": "43;74;80;76", "reply_reviewers": "0;1;1;1", "reply_authors": "2;3;3;3", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 157.5, 78.0976952284765 ], "wc_strengths_avg": [ 97.0, 51.112620750652184 ], "wc_weaknesses_avg": [ 303.75, 73.92352467246134 ], "wc_questions_avg": [ 84.25, 91.61434112626691 ], "wc_limitations_avg": [ 68.25, 93.3069531171177 ], "wc_review_avg": [ 710.75, 346.242960217244 ], "wc_reply_reviewers_avg": [ 36.5, 21.453437952924933 ], "wc_reply_authors_avg": [ 68.25, 14.737282653189496 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.42640143271122083, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8885874495874787777&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "bu.edu;bu.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Boston University", "aff_unique_dep": "", "aff_unique_url": "https://www.bu.edu", "aff_unique_abbr": "BU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "IKEA Manuals at Work: 4D Grounding of Assembly Instructions on Internet Videos", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97790", "id": "EXwf5iE98P", "proceeding": "", "pdf": "https://openreview.net/pdf?id=EXwf5iE98P", "openreview": "https://openreview.net/forum?id=EXwf5iE98P", "poster": "", "project": "", "author_site": "Yunong Liu, Weiyu Liu, Shubh Khanna, Cristobal Eyzaguirre, Manling Li, Juan Carlos Niebles, Vineeth Ravi, Saumitra Mishra, Jiajun Wu", "tldr": "", "abstract": "Shape assembly is a ubiquitous task in daily life, integral for constructing complex 3D structures like IKEA furniture. While significant progress has been made in developing autonomous agents for shape assembly, existing datasets have not yet tackled the 4D grounding of assembly instructions in videos, essential for a holistic understanding of assembly in 3D space over time. We introduce IKEA Video Manuals, a dataset that features 3D models of furniture parts, instructional manuals, assembly videos from the Internet, and most importantly, annotations of dense spatio-temporal alignments between these data modalities. To demonstrate the utility of IKEA Video Manuals, we present five applications essential for shape assembly: assembly plan generation, part-conditioned segmentation, part-conditioned pose estimation, video object segmentation, and furniture assembly based on instructional video manuals. For each application, we provide evaluation metrics and baseline methods. Through experiments on our annotated data, we highlight many challenges in grounding assembly instructions in videos to improve shape assembly, including handling occlusions, varying viewpoints, and extended assembly sequences.", "keywords": "Shape Assembly;3D;Video;Procedural Knowledge", "primary_area": "", "supplementary_material": "/attachment/c82a8bc0a94e3db98e42aec31fd9c6511695bf27.pdf", "author": "Yunong Liu;Cristobal Eyzaguirre;Manling Li;Shubh Khanna;Juan Carlos Niebles;Vineeth Ravi;Saumitra Mishra;Weiyu Liu;Jiajun Wu", "authorids": "~Yunong_Liu1;~Cristobal_Eyzaguirre1;~Manling_Li1;~Shubh_Khanna1;~Juan_Carlos_Niebles1;~Vineeth_Ravi1;~Saumitra_Mishra1;~Weiyu_Liu1;~Jiajun_Wu1", "gender": "F;;F;M;M;M;M;M;M", "homepage": "http://yunongliu.com;;https://limanling.github.io/;;http://www.niebles.net/;;https://sites.google.com/site/saumitramishrac4dm/;http://weiyuliu.com/;https://jiajunwu.com", "dblp": "277/1853;;178/3620;;26/647;;208/1387;133/0311.html;117/4768", "google_scholar": "676UJYgAAAAJ;;6U4SXnUAAAAJ;HsShv4sAAAAJ;hqNhUCYAAAAJ;iNT6NOAAAAAJ;https://scholar.google.co.uk/citations?user=On6E6ogAAAAJ;PHi0YEQAAAAJ;2efgcS0AAAAJ", "orcid": "0000-0001-9169-8552;;;;;0009-0003-0424-5531;;;0000-0002-4176-343X", "linkedin": ";;;shubh-khanna/;;vineethravi/;;;jiajunwu/", "or_profile": "~Yunong_Liu1;~Cristobal_Eyzaguirre1;~Manling_Li1;~Shubh_Khanna1;~Juan_Carlos_Niebles1;~Vineeth_Ravi1;~Saumitra_Mishra1;~Weiyu_Liu1;~Jiajun_Wu1", "aff": "Computer Science Department, Stanford University;;Stanford University;Stanford University;Stanford University;J.P. Morgan Chase;J.P. Morgan Chase;Stanford University;Stanford University", "aff_domain": "cs.stanford.edu;;stanford.edu;stanford.edu;stanford.edu;jpmorgan.com;jpmorgan.com;stanford.edu;stanford.edu", "position": "MS student;;Postdoc;Undergrad student;Adjunct Professor;Principal Researcher;Researcher;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nliu2024ikea,\ntitle={{IKEA} Manuals at Work: 4D Grounding of Assembly Instructions on Internet Videos},\nauthor={Yunong Liu and Cristobal Eyzaguirre and Manling Li and Shubh Khanna and Juan Carlos Niebles and Vineeth Ravi and Saumitra Mishra and Weiyu Liu and Jiajun Wu},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=EXwf5iE98P}\n}", "github": "", "reviewers": "hZeh;pCx9;fTbB;U61q", "pdf_size": 35780009, "rating": "6;6;7;7", "confidence": "4;3;4;4", "wc_summary_and_contributions": "97;170;79;126", "wc_strengths": "115;44;60;148", "wc_improvement": "250;83;72;93", "wc_limitations": "9;165;1;25", "wc_correctness": "16;13;1;17", "wc_clarity": "11;1;1;5", "wc_relation_to_prior_work": "6;19;1;3", "wc_documentation": "7;1;1;41", "wc_additional_feedback": "1;1;1;1", "wc_review": "512;497;217;459", "wc_reply_reviewers": "0;0;11;0", "wc_reply_authors": "50;49;0;49", "reply_reviewers": "0;0;1;0", "reply_authors": "4;4;3;4", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 118.0, 34.3874977280988 ], "wc_strengths_avg": [ 91.75, 41.811332195948985 ], "wc_improvement_avg": [ 124.5, 72.83714711601492 ], "wc_limitations_avg": [ 50.0, 66.9552089086428 ], "wc_correctness_avg": [ 11.75, 6.378675411086537 ], "wc_clarity_avg": [ 4.5, 4.092676385936225 ], "wc_relation_to_prior_work_avg": [ 7.25, 7.013380069552769 ], "wc_documentation_avg": [ 12.5, 16.635804759614125 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 421.25, 119.49555431061023 ], "wc_reply_reviewers_avg": [ 2.75, 4.763139720814412 ], "wc_reply_authors_avg": [ 37.0, 21.365860619221497 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.75, 0.4330127018922193 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=542164600988971892&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "cs.stanford.edu;;stanford.edu;stanford.edu;stanford.edu;jpmorgan.com;jpmorgan.com;stanford.edu;stanford.edu", "author_num": 9, "aff_unique_index": "0;0;0;0;1;1;0;0", "aff_unique_norm": "Stanford University;JPMorgan Chase & Co.", "aff_unique_dep": "Computer Science Department;", "aff_unique_url": "https://www.stanford.edu;https://www.jpmorganchase.com", "aff_unique_abbr": "Stanford;JPM", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Rethinking the Power of Timestamps for Robust Time Series Forecasting: A Global-Local Fusion Perspective", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96026", "id": "EY2agT920S", "proceeding": "", "pdf": "https://openreview.net/pdf?id=EY2agT920S", "openreview": "https://openreview.net/forum?id=EY2agT920S", "poster": "/media/PosterPDFs/NeurIPS%202024/96026.png?t=1727939783.0497313", "project": "", "author_site": "Chengsen Wang, Qi Qi, Jingyu Wang, Haifeng Sun, Zirui Zhuang, Jinming Wu, Jianxin Liao", "tldr": "", "abstract": "Time series forecasting has played a pivotal role across various industries, including finance, transportation, energy, healthcare, and climate. Due to the abundant seasonal information they contain, timestamps possess the potential to offer robust global guidance for forecasting techniques. However, existing works primarily focus on local observations, with timestamps being treated merely as an optional supplement that remains underutilized. When data gathered from the real world is polluted, the absence of global information will damage the robust prediction capability of these algorithms. To address these problems, we propose a novel framework named GLAFF. Within this framework, the timestamps are modeled individually to capture the global dependencies. Working as a plugin, GLAFF adaptively adjusts the combined weights for global and local information, enabling seamless collaboration with any time series forecasting backbone. Extensive experiments conducted on nine real-world datasets demonstrate that GLAFF significantly enhances the average performance of widely used mainstream forecasting models by 12.5\\%, surpassing the previous state-of-the-art method by 5.5\\%.", "keywords": "Time Series Forecasting", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "/attachment/eb5cd8850bf45bd7402ee5cdc11ece4a79e88aa4.zip", "author": "Chengsen Wang;Qi Qi;Jingyu Wang;Haifeng Sun;Zirui Zhuang;Jinming Wu;Jianxin Liao", "authorids": "~Chengsen_Wang1;~Qi_Qi1;~Jingyu_Wang1;~Haifeng_Sun2;~Zirui_Zhuang1;~Jinming_Wu1;~Jianxin_Liao2", "gender": "M;;M;Not Specified;M;M;M", "homepage": "https://forestsking.github.io/;;https://jericwang.github.io/;https://ziruizhuang.github.io/;https://kimingng.notion.site/Jinming-Kimmy-Wu-b22c1682d48d47939dcd7c41bf6a6bab?pvs=4;https://kyy.bupt.edu.cn/info/1014/3341.htm;https://hfsun.github.io", "dblp": "223/1052;80/6406-1.html;37/2749-1;https://dblp.uni-trier.de/pid/235/7014;;;00/11044-1", "google_scholar": "6KQpKtgAAAAJ;;H441DjwAAAAJ;j74lPwkAAAAJ;eh-XJIoAAAAJ;;dwhbTsEAAAAJ", "orcid": "0000-0002-3826-1148;0000-0003-0829-4624;0000-0002-2182-2228;0000-0003-3345-1732;;0000-0000-8780-1230;", "linkedin": ";;;;;;", "or_profile": "~Chengsen_Wang1;~Qi_Qi1;~Jingyu_Wang1;~Zirui_Zhuang1;~Jinming_Wu1;~Jianxin_Liao2;~haifeng_sun1", "aff": "Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;Beijing University of Post and Telecommunication;Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications", "aff_domain": "bupt.edu.cn;bupt.edu.cn;bupt.edu.cn;bupt.edu.cn;bupt.edu.cn;bupt.edu.cn;bupt.edu.cn", "position": "PhD student;Full Professor;Full Professor;Associate Professor;MS student;Full Professor;Lecturer", "bibtex": "@inproceedings{\nwang2024rethinking,\ntitle={Rethinking the Power of Timestamps for Robust Time Series Forecasting: A Global-Local Fusion Perspective},\nauthor={Chengsen Wang and Qi Qi and Jingyu Wang and Haifeng Sun and Zirui Zhuang and Jinming Wu and Jianxin Liao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=EY2agT920S}\n}", "github": "", "reviewers": "XBcx;Y7JY;wbSq;N7JP;NzBw", "pdf_size": 726203, "rating": "4;6;6;7;7", "confidence": "4;3;4;5;4", "soundness": "2;4;3;3;3", "novelty": "2;2;3;3;3", "presentation": "3;3;3;3;3", "wc_summary": "42;91;146;43;70", "wc_strengths": "19;78;131;50;172", "wc_weaknesses": "205;79;322;101;67", "wc_questions": "3;5;203;6;55", "wc_limitations": "16;35;1;12;17", "wc_review": "285;288;803;212;381", "wc_reply_reviewers": "0;41;165;27;17", "wc_reply_authors": "95;373;116;28;28", "reply_reviewers": "0;1;1;1;1", "reply_authors": "2;4;3;2;2", "rating_avg": [ 6.0, 1.0954451150103321 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 78.4, 38.41145662429375 ], "wc_strengths_avg": [ 90.0, 55.11805511808268 ], "wc_weaknesses_avg": [ 154.8, 96.77272342969377 ], "wc_questions_avg": [ 54.4, 76.82083050839792 ], "wc_limitations_avg": [ 16.2, 10.979981785048645 ], "wc_review_avg": [ 393.8, 211.51397116975514 ], "wc_reply_reviewers_avg": [ 50.0, 59.03219460599445 ], "wc_reply_authors_avg": [ 128.0, 127.48176340167248 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 2.6, 0.8 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.2886751345948129, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6939986434937314668&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "bupt.edu.cn;bupt.edu.cn;bupt.edu.cn;bupt.edu.cn;bupt.edu.cn;bupt.edu.cn;bupt.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "Beijing University of Posts and Telecommunications", "aff_unique_dep": "", "aff_unique_url": "http://www.bupt.edu.cn/", "aff_unique_abbr": "BUPT", "aff_campus_unique_index": "0;0;0;0;0;0;0", "aff_campus_unique": "Beijing", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Kernel PCA for Out-of-Distribution Detection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96025", "id": "EZpKBC1ohS", "proceeding": "", "pdf": "https://openreview.net/pdf?id=EZpKBC1ohS", "openreview": "https://openreview.net/forum?id=EZpKBC1ohS", "poster": "/media/PosterPDFs/NeurIPS%202024/96025.png?t=1730115420.5399783", "project": "", "author_site": "Kun Fang, Qinghua Tao, Kexin Lv, Mingzhen He, Xiaolin Huang, JIE YANG", "tldr": "", "abstract": "Out-of-Distribution (OoD) detection is vital for the reliability of Deep Neural Networks (DNNs).\nExisting works have shown the insufficiency of Principal Component Analysis (PCA) straightforwardly applied on the features of DNNs in detecting OoD data from In-Distribution (InD) data.\nThe failure of PCA suggests that the network features residing in OoD and InD are not well separated by simply proceeding in a linear subspace, which instead can be resolved through proper non-linear mappings.\nIn this work, we leverage the framework of Kernel PCA (KPCA) for OoD detection, and seek suitable non-linear kernels that advocate the separability between InD and OoD data in the subspace spanned by the principal components.\nBesides, explicit feature mappings induced from the devoted task-specific kernels are adopted so that the KPCA reconstruction error for new test samples can be efficiently obtained with large-scale data.\nExtensive theoretical and empirical results on multiple OoD data sets and network structures verify the superiority of our KPCA detector in efficiency and efficacy with state-of-the-art detection performance.", "keywords": "out-of-distribution detection;kernel principal component analysis;random Fourier features", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Kun Fang;Qinghua Tao;Kexin Lv;Mingzhen He;Xiaolin Huang;JIE YANG", "authorids": "~Kun_Fang1;~Qinghua_Tao1;~Kexin_Lv1;~Mingzhen_He1;~Xiaolin_Huang1;~JIE_YANG18", "gender": "M;F;F;M;M;M", "homepage": "https://fanghenshaometeor.github.io/;https://qinghua-tao.github.io/;;https://mingzhenhe.github.io/;http://www.pami.sjtu.edu.cn/en/xiaolin;http://www.pami.sjtu.edu.cn", "dblp": "51/5923-4;182/9643.html;263/6869;313/1706;61/2227;", "google_scholar": "yC2s2JIAAAAJ;_dZHZD8AAAAJ;;;DR-gBcEAAAAJ;", "orcid": "0000-0001-6351-201X;0000-0001-9705-7748;0000-0002-3462-241X;0000-0002-9214-4196;;", "linkedin": ";;;;;", "or_profile": "~Kun_Fang1;~Qinghua_Tao1;~Kexin_Lv1;~Mingzhen_He1;~Xiaolin_Huang1;~JIE_YANG18", "aff": "Shanghai Jiaotong University;(ESAT) Department of Electrical Engineering, KU Leuven, Belgium, KU Leuven;;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;esat.kuleuven.be;;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "position": "PhD student;Postdoc;;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nfang2024kernel,\ntitle={Kernel {PCA} for Out-of-Distribution Detection},\nauthor={Kun Fang and Qinghua Tao and Kexin Lv and Mingzhen He and Xiaolin Huang and JIE YANG},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=EZpKBC1ohS}\n}", "github": "", "reviewers": "w1ZW;LuQe;sSKR;6eWv", "pdf_size": 2807924, "rating": "5;5;5;6", "confidence": "4;4;3;3", "soundness": "3;3;2;3", "novelty": "3;2;2;3", "presentation": "3;3;3;3", "wc_summary": "70;65;107;51", "wc_strengths": "25;57;155;52", "wc_weaknesses": "92;147;188;186", "wc_questions": "2;84;76;139", "wc_limitations": "1;8;168;44", "wc_review": "190;361;694;472", "wc_reply_reviewers": "0;339;125;90", "wc_reply_authors": "48;590;1009;125", "reply_reviewers": "0;2;1;2", "reply_authors": "2;3;4;2", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 73.25, 20.69269194667528 ], "wc_strengths_avg": [ 72.25, 49.30200300190653 ], "wc_weaknesses_avg": [ 153.25, 38.957508903932755 ], "wc_questions_avg": [ 75.25, 48.75128203442449 ], "wc_limitations_avg": [ 55.25, 67.10951869891484 ], "wc_review_avg": [ 429.25, 182.90622597385797 ], "wc_reply_reviewers_avg": [ 138.5, 124.41563406581987 ], "wc_reply_authors_avg": [ 443.0, 387.01227370717845 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=645912820328726052&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "sjtu.edu.cn;esat.kuleuven.be;;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "author_num": 6, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University;KU Leuven", "aff_unique_dep": ";Department of Electrical Engineering", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.kuleuven.be", "aff_unique_abbr": "SJTU;KU Leuven", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "China;Belgium" }, { "title": "Matching the Statistical Query Lower Bound for $k$-Sparse Parity Problems with Sign Stochastic Gradient Descent", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96024", "id": "EbSSBvwUWw", "proceeding": "", "pdf": "https://openreview.net/pdf?id=EbSSBvwUWw", "openreview": "https://openreview.net/forum?id=EbSSBvwUWw", "poster": "", "project": "", "author_site": "Yiwen Kou, Zixiang Chen, Quanquan Gu, Sham Kakade", "tldr": "", "abstract": "The $k$-sparse parity problem is a classical problem in computational complexity and algorithmic theory, serving as a key benchmark for understanding computational classes. In this paper, we solve the $k$-sparse parity problem with sign stochastic gradient descent, a variant of stochastic gradient descent (SGD) on two-layer fully-connected neural networks. We demonstrate that this approach can efficiently solve the $k$-sparse parity problem on a $d$-dimensional hypercube ($k\\le O(\\sqrt{d})$) with a sample complexity of $\\tilde{O}(d^{k-1})$ using $2^{\\Theta(k)}$ neurons, matching the established $\\Omega(d^{k})$ lower bounds of Statistical Query (SQ) models. Our theoretical analysis begins by constructing a good neural network capable of correctly solving the $k$-parity problem. We then demonstrate how a trained neural network with sign SGD can effectively approximate this good network, solving the $k$-parity problem with small statistical errors. To the best of our knowledge, this is the first result that matches the SQ lower bound for solving $k$-sparse parity problem using gradient-based methods.", "keywords": "k-Parity;Optimization;Generalization", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Yiwen Kou;Zixiang Chen;Quanquan Gu;Sham M. Kakade", "authorids": "~Yiwen_Kou1;~Zixiang_Chen1;~Quanquan_Gu1;~Sham_M._Kakade1", "gender": "F;M;M;M", "homepage": "https://evankou.github.io/;https://sites.google.com/view/zxchen;http://web.cs.ucla.edu/~qgu/;https://shamulent.github.io", "dblp": "323/9058;137/3624;50/4597;s/SMKakade", "google_scholar": "https://scholar.google.com/citations?hl=en;6nrCHr0AAAAJ;GU9HgNAAAAAJ;https://scholar.google.com.tw/citations?user=wb-DKCIAAAAJ", "orcid": ";;;", "linkedin": "yiwen-kou-5a444916b/;;;", "or_profile": "~Yiwen_Kou1;~Zixiang_Chen1;~Quanquan_Gu1;~Sham_M._Kakade1", "aff": "University of California, Los Angeles; University of California, Los Angeles;University of California, Los Angeles;Harvard University", "aff_domain": "ucla.edu;cs.ucla.edu;cs.ucla.edu;harvard.edu", "position": "PhD student;PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nkou2024matching,\ntitle={Matching the Statistical Query Lower Bound for \\$k\\$-Sparse Parity Problems with Sign Stochastic Gradient Descent},\nauthor={Yiwen Kou and Zixiang Chen and Quanquan Gu and Sham M. Kakade},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=EbSSBvwUWw}\n}", "github": "", "reviewers": "6YuP;Bgvr;f3yt;9WvP", "pdf_size": 603744, "rating": "6;6;6;6", "confidence": "4;4;3;4", "soundness": "3;4;3;3", "novelty": "2;2;3;3", "presentation": "3;4;3;3", "wc_summary": "78;114;56;76", "wc_strengths": "91;113;57;34", "wc_weaknesses": "221;219;36;124", "wc_questions": "26;85;53;103", "wc_limitations": "1;1;1;13", "wc_review": "417;532;203;350", "wc_reply_reviewers": "86;106;0;24", "wc_reply_authors": "32;26;0;6", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;1;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 81.0, 20.904544960366874 ], "wc_strengths_avg": [ 73.75, 30.408674749156695 ], "wc_weaknesses_avg": [ 150.0, 76.60613552451265 ], "wc_questions_avg": [ 66.75, 29.566661969184143 ], "wc_limitations_avg": [ 4.0, 5.196152422706632 ], "wc_review_avg": [ 375.5, 118.97583788316012 ], "wc_reply_reviewers_avg": [ 54.0, 43.42810150121693 ], "wc_reply_authors_avg": [ 16.0, 13.341664064126334 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:7hN28gdy65oJ:scholar.google.com/&scioq=Matching+the+Statistical+Query+Lower+Bound+for+%24k%24-Sparse+Parity+Problems+with+Sign+Stochastic+Gradient+Descent&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "ucla.edu;cs.ucla.edu;cs.ucla.edu;harvard.edu", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "University of California, Los Angeles;Harvard University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucla.edu;https://www.harvard.edu", "aff_unique_abbr": "UCLA;Harvard", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Remove that Square Root: A New Efficient Scale-Invariant Version of AdaGrad", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96023", "id": "EdG59dnOzN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=EdG59dnOzN", "openreview": "https://openreview.net/forum?id=EdG59dnOzN", "poster": "", "project": "", "author_site": "Sayantan Choudhury, Nazarii Tupitsa, Nicolas Loizou, Samuel Horv\u00e1th, Martin Takac, Eduard Gorbunov", "tldr": "", "abstract": "Adaptive methods are extremely popular in machine learning as they make learning rate tuning less expensive. This paper introduces a novel optimization algorithm named KATE, which presents a scale-invariant adaptation of the well-known AdaGrad algorithm. We prove the scale-invariance of KATE for the case of Generalized Linear Models. Moreover, for general smooth non-convex problems, we establish a convergence rate of $O((\\log T)/\\sqrt{T})$ for KATE, matching the best-known ones for AdaGrad and Adam. We also compare KATE to other state-of-the-art adaptive algorithms Adam and AdaGrad in numerical experiments with different problems, including complex machine learning tasks like image classification and text classification on real data. The results indicate that KATE consistently outperforms AdaGrad and matches/surpasses the performance of Adam in all considered scenarios.", "keywords": "Scale-invariant methods;AdaGrad;non-convex optimization;adaptive methods", "primary_area": "optimization", "supplementary_material": "", "author": "Sayantan Choudhury;Nazarii Tupitsa;Nicolas Loizou;Samuel Horv\u00e1th;Martin Tak\u00e1\u010d;Eduard Gorbunov", "authorids": "~Sayantan_Choudhury1;~Nazarii_Tupitsa1;~Nicolas_Loizou1;~Samuel_Horv\u00e1th1;~Martin_Tak\u00e1\u010d1;~Eduard_Gorbunov1", "gender": "M;;M;M;M;M", "homepage": "https://sites.google.com/view/sayantan-homepage/home;;https://nicolasloizou.github.io/;https://sites.google.com/view/samuelhorvath;https://eduardgorbunov.github.io;http://mtakac.com", "dblp": ";234/8907;173/4958;234/8604;215/5512.html;42/3759-1.html", "google_scholar": ";5siCuX4AAAAJ;https://scholar.google.co.uk/citations?user=mvDmzAQAAAAJ;k252J7kAAAAJ;https://scholar.google.ru/citations?user=85j2RqQAAAAJ;qKQD-2cAAAAJ", "orcid": ";0000-0002-9005-0129;;0000-0003-0619-9260;;0000-0001-7455-2025", "linkedin": "sayantan-choudhury-21168941;;;samuel-horvath/;;martintakac/", "or_profile": "~Sayantan_Choudhury1;~Nazarii_Tupitsa1;~Nicolas_Loizou1;~Samuel_Horv\u00e1th1;~Eduard_Gorbunov1;~Martin_Takac3", "aff": "Johns Hopkins University;Mohamed bin Zayed University of Artificial Intelligence;Johns Hopkins University;MBZUAI;Mohamed bin Zayed University of Artificial Intelligence;Mohamed bin Zayed University of Artificial Intelligence", "aff_domain": "jhu.edu;mbzuai.ac.ae;jhu.edu;mbzuai.ac.ae;mbzuai.ac.ae;mbzuai.ac.ae", "position": "PhD student;Researcher;Assistant Professor;Assistant Professor;Postdoc;Associate Professor", "bibtex": "@inproceedings{\nchoudhury2024remove,\ntitle={Remove that Square Root: A New Efficient Scale-Invariant Version of AdaGrad},\nauthor={Sayantan Choudhury and Nazarii Tupitsa and Nicolas Loizou and Samuel Horv{\\'a}th and Martin Tak{\\'a}{\\v{c}} and Eduard Gorbunov},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=EdG59dnOzN}\n}", "github": "", "reviewers": "uk2H;eTbr;bngL;ZFDF", "pdf_size": 1189185, "rating": "4;6;7;7", "confidence": "2;4;4;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;4;4;3", "wc_summary": "16;81;21;55", "wc_strengths": "61;38;26;58", "wc_weaknesses": "74;211;126;26", "wc_questions": "69;83;180;58", "wc_limitations": "1;5;43;1", "wc_review": "221;418;396;198", "wc_reply_reviewers": "0;15;27;69", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 43.25, 26.461056290329758 ], "wc_strengths_avg": [ 45.75, 14.428704030508076 ], "wc_weaknesses_avg": [ 109.25, 68.56885225814999 ], "wc_questions_avg": [ 97.5, 48.44842618702903 ], "wc_limitations_avg": [ 12.5, 17.684739183827393 ], "wc_review_avg": [ 308.25, 99.38907133080578 ], "wc_reply_reviewers_avg": [ 27.75, 25.66490794840301 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.9428090415820632, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=828106797036552911&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "jhu.edu;mbzuai.ac.ae;jhu.edu;mbzuai.ac.ae;mbzuai.ac.ae;mbzuai.ac.ae", "author_num": 6, "aff_unique_index": "0;1;0;1;1;1", "aff_unique_norm": "Johns Hopkins University;Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": ";", "aff_unique_url": "https://www.jhu.edu;https://mbzuai.ac.ae", "aff_unique_abbr": "JHU;MBZUAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1;1;1", "aff_country_unique": "United States;United Arab Emirates" }, { "title": "CRT-Fusion: Camera, Radar, Temporal Fusion Using Motion Information for 3D Object Detection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96022", "id": "EdXW71LvKE", "proceeding": "", "pdf": "https://openreview.net/pdf?id=EdXW71LvKE", "openreview": "https://openreview.net/forum?id=EdXW71LvKE", "poster": "/media/PosterPDFs/NeurIPS%202024/96022.png?t=1732521115.3802369", "project": "", "author_site": "Jisong Kim, Minjae Seong, Jun Won Choi", "tldr": "", "abstract": "Accurate and robust 3D object detection is a critical component in autonomous vehicles and robotics. While recent radar-camera fusion methods have made significant progress by fusing information in the bird's-eye view (BEV) representation, they often struggle to effectively capture the motion of dynamic objects, leading to limited performance in real-world scenarios. In this paper, we introduce CRT-Fusion, a novel framework that integrates temporal information into radar-camera fusion to address this challenge. Our approach comprises three key modules: Multi-View Fusion (MVF), Motion Feature Estimator (MFE), and Motion Guided Temporal Fusion (MGTF). The MVF module fuses radar and image features within both the camera view and bird's-eye view, thereby generating a more precise unified BEV representation. The MFE module conducts two simultaneous tasks: estimation of pixel-wise velocity information and BEV segmentation. Based on the velocity and the occupancy score map obtained from the MFE module, the MGTF module aligns and fuses feature maps across multiple timestamps in a recurrent manner. By considering the motion of dynamic objects, CRT-Fusion can produce robust BEV feature maps, thereby improving detection accuracy and robustness. Extensive evaluations on the challenging nuScenes dataset demonstrate that CRT-Fusion achieves state-of-the-art performance for radar-camera-based 3D object detection. Our approach outperforms the previous best method in terms of NDS by +1.7%, while also surpassing the leading approach in mAP by +1.4%. These significant improvements in both metrics showcase the effectiveness of our proposed fusion strategy in enhancing the reliability and accuracy of 3D object detection.", "keywords": "3D Object Detection;Sensor Fusion;Temporal Fusion;Radar;Camera", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Jisong Kim;Minjae Seong;Jun Won Choi", "authorids": "~Jisong_Kim2;~Minjae_Seong1;~Jun_Won_Choi1", "gender": "M;M;M", "homepage": "https://www.spa.hanyang.ac.kr/;https://www.spa.snu.ac.kr/;https://jskim808.github.io/", "dblp": ";;", "google_scholar": ";IHH2PyYAAAAJ;yD7Led0AAAAJ", "orcid": ";0000-0002-3733-0148;0009-0000-8333-5858", "linkedin": "minjae-seong-5312b0299/;;jisong-kim-4159a32b2/", "or_profile": "~Minjae_Seong1;~Jun_Won_Choi1;~Ji_Song_Kim1", "aff": "RideFlux;Hanyang University;Hanyang University", "aff_domain": "rideflux.com;hanyang.ac.kr;hanyang.ac.kr", "position": "Intern;Full Professor;PhD student", "bibtex": "@inproceedings{\nkim2024crtfusion,\ntitle={{CRT}-Fusion: Camera, Radar, Temporal Fusion Using Motion Information for 3D Object Detection},\nauthor={Jisong Kim and Minjae Seong and Jun Won Choi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=EdXW71LvKE}\n}", "github": "", "reviewers": "dkEf;NhQh;EC1R;6YHD;mr4N", "pdf_size": 1738558, "rating": "4;4;5;5;6", "confidence": "4;3;4;4;5", "soundness": "3;2;3;3;3", "novelty": "3;2;3;2;3", "presentation": "3;3;3;3;3", "wc_summary": "48;114;105;89;63", "wc_strengths": "40;72;84;72;52", "wc_weaknesses": "8;151;187;125;95", "wc_questions": "105;262;20;41;85", "wc_limitations": "8;49;19;1;5", "wc_review": "209;648;415;328;300", "wc_reply_reviewers": "0;0;31;14;70", "wc_reply_authors": "0;0;0;85;210", "reply_reviewers": "0;0;1;1;2", "reply_authors": "1;1;1;2;2", "rating_avg": [ 4.8, 0.7483314773547882 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 83.8, 24.911041728518704 ], "wc_strengths_avg": [ 64.0, 15.798734126505199 ], "wc_weaknesses_avg": [ 113.2, 60.684100059241224 ], "wc_questions_avg": [ 102.6, 85.25397351443509 ], "wc_limitations_avg": [ 16.4, 17.36202753136856 ], "wc_review_avg": [ 380.0, 149.26084550209407 ], "wc_reply_reviewers_avg": [ 23.0, 26.122786987609114 ], "wc_reply_authors_avg": [ 59.0, 82.365041127896 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8451542547285165, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:PSVjaJMbppMJ:scholar.google.com/&scioq=CRT-Fusion:+Camera,+Radar,+Temporal+Fusion+Using+Motion+Information+for+3D+Object+Detection&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "rideflux.com;hanyang.ac.kr;hanyang.ac.kr", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "RideFlux;Hanyang University", "aff_unique_dep": ";", "aff_unique_url": ";https://www.hanyang.ac.kr", "aff_unique_abbr": ";HYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1;1", "aff_country_unique": ";South Korea" }, { "title": "SHMT: Self-supervised Hierarchical Makeup Transfer via Latent Diffusion Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96021", "id": "EeXcOYf3Lg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=EeXcOYf3Lg", "openreview": "https://openreview.net/forum?id=EeXcOYf3Lg", "poster": "/media/PosterPDFs/NeurIPS%202024/96021.png?t=1729493407.022685", "project": "", "author_site": "Zhaoyang Sun, Shengwu Xiong, Yaxiong Chen, Fei Du, Weihua Chen, Fan Wang, Yi Rong", "tldr": "", "abstract": "This paper studies the challenging task of makeup transfer, which aims to apply diverse makeup styles precisely and naturally to a given facial image. Due to the absence of paired data, current methods typically synthesize sub-optimal pseudo ground truths to guide the model training, resulting in low makeup fidelity. Additionally, different makeup styles generally have varying effects on the person face, but existing methods struggle to deal with this diversity. To address these issues, we propose a novel Self-supervised Hierarchical Makeup Transfer (SHMT) method via latent diffusion models. Following a \"decoupling-and-reconstruction\" paradigm, SHMT works in a self-supervised manner, freeing itself from the misguidance of imprecise pseudo-paired data. Furthermore, to accommodate a variety of makeup styles, hierarchical texture details are decomposed via a Laplacian pyramid and selectively introduced to the content representation. Finally, we design a novel Iterative Dual Alignment (IDA) module that dynamically adjusts the injection condition of the diffusion model, allowing the alignment errors caused by the domain gap between content and makeup representations to be corrected. Extensive quantitative and qualitative analyses demonstrate the effectiveness of our method. Our code is available at https://github.com/Snowfallingplum/SHMT.", "keywords": "Makeup transfer;self-supervised learning;diffusion models", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Zhaoyang Sun;Shengwu Xiong;Yaxiong Chen;Fei Du;Weihua Chen;Fan Wang;Yi Rong", "authorids": "~Zhaoyang_Sun1;~Shengwu_Xiong1;~Yaxiong_Chen2;~Fei_Du1;~Weihua_Chen1;~Fan_Wang6;~Yi_Rong3", "gender": "M;M;M;M;M;F;M", "homepage": ";;https://xueshu.dailyheadlines.cc/citations?user=l0zg1_MAAAAJ&hl=zh-CN&oi=ao;;https://cwhgn.github.io;;http://cst.whut.edu.cn/xygk/szdw/202402/t20240226_985498.shtml", "dblp": ";;10/9546;;;;", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;r6ZfScIAAAAJ;;0iHvDYwAAAAJ;KWVlYaMAAAAJ;WCRGTHsAAAAJ;https://scholar.google.com.hk/citations?user=Sv5DluIAAAAJ", "orcid": "0000-0002-7795-6719;;;;0000-0003-4141-7833;0000-0001-7320-1119;0000-0003-4867-6811", "linkedin": ";;;;;;", "or_profile": "~Zhaoyang_Sun1;~Shengwu_Xiong1;~Yaxiong_Chen2;~Fei_Du1;~Weihua_Chen1;~Fan_Wang6;~Yi_Rong3", "aff": "Wuhan University of Technology;Wuhan University of Technology;Wuhan University of Technology;Alibaba Group;Alibaba Group;Alibaba Group;Wuhan University of Technology", "aff_domain": "whut.edu.cn;whut.edu.cn;whut.edu.cn;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;whut.edu.cn", "position": "PhD student;Full Professor;Associate Professor;Researcher;Algorithm Engineer;Senior Staff Algorithm Engineer;Associate Professor", "bibtex": "@inproceedings{\nsun2024shmt,\ntitle={{SHMT}: Self-supervised Hierarchical Makeup Transfer via Latent Diffusion Models},\nauthor={Zhaoyang Sun and Shengwu Xiong and Yaxiong Chen and Fei Du and Weihua Chen and Fan Wang and Yi Rong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=EeXcOYf3Lg}\n}", "github": "", "reviewers": "JmHE;izVu;dGT5", "pdf_size": 21696086, "rating": "3;6;7", "confidence": "4;3;5", "soundness": "2;3;4", "novelty": "2;3;2", "presentation": "3;3;3", "wc_summary": "44;62;175", "wc_strengths": "46;57;37", "wc_weaknesses": "116;78;40", "wc_questions": "79;22;2", "wc_limitations": "299;8;67", "wc_review": "584;227;321", "wc_reply_reviewers": "265;180;0", "wc_reply_authors": "13;102;0", "reply_reviewers": "2;2;0", "reply_authors": "2;2;1", "rating_avg": [ 5.333333333333333, 1.699673171197595 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 93.66666666666667, 57.97892337354632 ], "wc_strengths_avg": [ 46.666666666666664, 8.178562764256865 ], "wc_weaknesses_avg": [ 78.0, 31.026870075253587 ], "wc_questions_avg": [ 34.333333333333336, 32.62241901242491 ], "wc_limitations_avg": [ 124.66666666666667, 125.60343236640558 ], "wc_review_avg": [ 377.3333333333333, 151.09011292014517 ], "wc_reply_reviewers_avg": [ 148.33333333333334, 110.47875612784368 ], "wc_reply_authors_avg": [ 38.333333333333336, 45.330882286680556 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.9428090415820634 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.2401922307076307, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2425407367541828193&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "whut.edu.cn;whut.edu.cn;whut.edu.cn;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;whut.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;1;1;1;0", "aff_unique_norm": "Wuhan University of Technology;Alibaba Group", "aff_unique_dep": ";", "aff_unique_url": "http://www.wut.edu.cn;https://www.alibaba.com", "aff_unique_abbr": "WUT;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "SE(3)-bi-equivariant Transformers for Point Cloud Assembly", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96020", "id": "EehS4erXWB", "proceeding": "", "pdf": "https://openreview.net/pdf?id=EehS4erXWB", "openreview": "https://openreview.net/forum?id=EehS4erXWB", "poster": "/media/PosterPDFs/NeurIPS%202024/96020.png?t=1731087222.1881857", "project": "", "author_site": "Ziming Wang, Rebecka J\u00f6rnsten", "tldr": "", "abstract": "Given a pair of point clouds, the goal of assembly is to recover a rigid transformation that aligns one point cloud to the other. This task is challenging because the point clouds may be non-overlapped, and they may have arbitrary initial positions. To address these difficulties, we propose a method, called $SE(3)$-bi-equivariant transformer (BITR), based on the $SE(3)$-bi-equivariance prior of the task:it guarantees that when the inputs are rigidly perturbed, the output will transform accordingly. Due to its equivariance property, BITR can not only handle non-overlapped PCs, but also guarantee robustness against initial positions. Specifically, BITR first extracts features of the inputs using a novel $SE(3) \\times SE(3)$-transformer, and then projects the learned feature to group $SE(3)$ as the output. Moreover, we theoretically show that swap and scale equivariances can be incorporated into BITR, thus it further guarantees stable performance under scaling and swapping the inputs. We experimentally show the effectiveness of BITR in practical tasks.", "keywords": "equivariant neural networks;SE(3)-bi-equivariant transformer;point cloud assembly", "primary_area": "machine_vision", "supplementary_material": "", "author": "Ziming Wang;Rebecka J\u00f6rnsten", "authorids": "~Ziming_Wang1;~Rebecka_J\u00f6rnsten1", "gender": "M;F", "homepage": ";", "dblp": ";41/4910", "google_scholar": "Z93qEesAAAAJ;SO9llAMAAAAJ", "orcid": "0000-0002-1739-307X;", "linkedin": ";rebecka-j%C3%B6rnsten-533675220/", "or_profile": "~Ziming_Wang1;~Rebecka_J\u00f6rnsten1", "aff": "Chalmers University of Technology;G\u00f6teborg University", "aff_domain": "chalmers.se;gu.se", "position": "Postdoc;Full Professor", "bibtex": "@inproceedings{\nwang2024sebiequivariant,\ntitle={{SE}(3)-bi-equivariant Transformers for Point Cloud Assembly},\nauthor={Ziming Wang and Rebecka J{\\\"o}rnsten},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=EehS4erXWB}\n}", "github": "", "reviewers": "P5ME;1793;1H6H;mtoz;fq5D", "pdf_size": 3020074, "rating": "4;5;5;5;9", "confidence": "5;4;3;3;4", "soundness": "2;3;3;3;4", "novelty": "2;2;3;3;4", "presentation": "3;3;3;2;2", "wc_summary": "133;57;77;77;173", "wc_strengths": "51;72;93;40;254", "wc_weaknesses": "421;244;330;70;73", "wc_questions": "207;21;169;15;74", "wc_limitations": "8;6;20;1;173", "wc_review": "820;400;689;203;747", "wc_reply_reviewers": "766;0;66;55;11", "wc_reply_authors": "56;0;100;178;0", "reply_reviewers": "1;0;1;1;1", "reply_authors": "2;1;2;2;1", "rating_avg": [ 5.6, 1.7435595774162693 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 103.4, 43.05159695063587 ], "wc_strengths_avg": [ 102.0, 78.14089838234521 ], "wc_weaknesses_avg": [ 227.6, 139.21005710795467 ], "wc_questions_avg": [ 97.2, 77.86244280781332 ], "wc_limitations_avg": [ 41.6, 65.99575743939909 ], "wc_review_avg": [ 571.8, 233.0505524558996 ], "wc_reply_reviewers_avg": [ 179.6, 294.27103153385656 ], "wc_reply_authors_avg": [ 66.8, 67.09515630803762 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.06131393394849662, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=674749317928446243&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "chalmers.se;gu.se", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Chalmers University of Technology;University of Gothenburg", "aff_unique_dep": ";", "aff_unique_url": "https://www.chalmers.se;https://www.gu.se", "aff_unique_abbr": "Chalmers;GU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Sweden" }, { "title": "QuanTA: Efficient High-Rank Fine-Tuning of LLMs with Quantum-Informed Tensor Adaptation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96019", "id": "EfpZNpkrm2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=EfpZNpkrm2", "openreview": "https://openreview.net/forum?id=EfpZNpkrm2", "poster": "/media/PosterPDFs/NeurIPS%202024/96019.png?t=1733501472.6372175", "project": "", "author_site": "Zhuo Chen, Rumen Dangovski, Charlotte Loh, Owen Dugan, Di Luo, Marin Soljacic", "tldr": "", "abstract": "We propose **Quan**tum-informed **T**ensor **A**daptation (**QuanTA**), a novel, easy-to-implement, fine-tuning method with no inference overhead for large-scale pre-trained language models. By leveraging quantum-inspired methods derived from quantum circuit structures, QuanTA enables efficient *high-rank* fine-tuning, surpassing the limitations of Low-Rank Adaptation (LoRA)---low-rank approximation may fail for complicated downstream tasks. Our approach is theoretically supported by the universality theorem and the rank representation theorem to achieve efficient high-rank adaptations. Experiments demonstrate that QuanTA significantly enhances commonsense reasoning, arithmetic reasoning, and scalability compared to traditional methods. Furthermore, QuanTA shows superior performance with fewer trainable parameters compared to other approaches and can be designed to integrate with existing fine-tuning algorithms for further improvement, providing a scalable and efficient solution for fine-tuning large language models and advancing state-of-the-art in natural language processing.", "keywords": "LLM;Language Model;PEFT;Finetuning;High Rank", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Zhuo Chen;Rumen Dangovski;Charlotte Loh;Owen M Dugan;Di Luo;Marin Soljacic", "authorids": "~Zhuo_Chen8;~Rumen_Dangovski1;~Charlotte_Loh1;~Owen_M_Dugan1;~Di_Luo1;~Marin_Soljacic1", "gender": ";M;F;M;M;", "homepage": ";http://super-ms.mit.edu/rumen.html;;;;https://www.rle.mit.edu/marin/", "dblp": ";207/8546;217/6481;;;131/2044", "google_scholar": ";;https://scholar.google.com/citations?hl=en;1VvL1cgAAAAJ;OxZytTQAAAAJ;", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Zhuo_Chen8;~Rumen_Dangovski1;~Charlotte_Loh1;~Owen_M_Dugan1;~Di_Luo1;~Marin_Soljacic1", "aff": ";Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;", "aff_domain": ";mit.edu;mit.edu;mit.edu;mit.edu;", "position": ";PhD student;PhD student;Undergrad student;Postdoc;", "bibtex": "@inproceedings{\nchen2024quanta,\ntitle={Quan{TA}: Efficient High-Rank Fine-Tuning of {LLM}s with Quantum-Informed Tensor Adaptation},\nauthor={Zhuo Chen and Rumen Dangovski and Charlotte Loh and Owen M Dugan and Di Luo and Marin Soljacic},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=EfpZNpkrm2}\n}", "github": "", "reviewers": "ddRu;H46K;Qk3g;GYap", "pdf_size": 749546, "rating": "6;7;8;8", "confidence": "3;3;3;3", "soundness": "3;4;3;4", "novelty": "2;3;3;4", "presentation": "3;3;3;3", "wc_summary": "39;48;208;59", "wc_strengths": "15;36;66;47", "wc_weaknesses": "21;71;98;96", "wc_questions": "75;6;54;197", "wc_limitations": "7;1;10;1", "wc_review": "157;162;436;400", "wc_reply_reviewers": "19;18;13;18", "wc_reply_authors": "36;33;33;704", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;3", "rating_avg": [ 7.25, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 88.5, 69.35596585730747 ], "wc_strengths_avg": [ 41.0, 18.452642087245934 ], "wc_weaknesses_avg": [ 71.5, 31.03626910567699 ], "wc_questions_avg": [ 83.0, 70.40951640225914 ], "wc_limitations_avg": [ 4.75, 3.897114317029974 ], "wc_review_avg": [ 288.75, 129.88721068681087 ], "wc_reply_reviewers_avg": [ 17.0, 2.345207879911715 ], "wc_reply_authors_avg": [ 201.5, 290.1210954067284 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=933158533697907929&as_sdt=800005&sciodt=0,15&hl=en", "gs_version_total": 3, "email": ";mit.edu;mit.edu;mit.edu;mit.edu;", "author_num": 6, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Revisiting K-mer Profile for Effective and Scalable Genome Representation Learning", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96018", "id": "Ehsd856Ltb", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Ehsd856Ltb", "openreview": "https://openreview.net/forum?id=Ehsd856Ltb", "poster": "/media/PosterPDFs/NeurIPS%202024/96018.png?t=1731649148.7601745", "project": "", "author_site": "Abdulkadir Celikkanat, Andres Masegosa, Thomas Nielsen", "tldr": "", "abstract": "Obtaining effective representations of DNA sequences is crucial for genome analysis. Metagenomic binning, for instance, relies on genome representations to cluster complex mixtures of DNA fragments from biological samples with the aim of determining their microbial compositions. In this paper, we revisit k-mer-based representations of genomes and provide a theoretical analysis of their use in representation learning. Based on the analysis, we propose a lightweight and scalable model for performing metagenomic binning at the genome read level, relying only on the k-mer compositions of the DNA fragments. We compare the model to recent genome foundation models and demonstrate that while the models are comparable in performance, the proposed model is significantly more effective in terms of scalability, a crucial aspect for performing metagenomic binning of real-world data sets.", "keywords": "metagenomic binning;genome representation learning;dna sequences;genome analysis", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Abdulkadir Celikkanat;Andres R Masegosa;Thomas Dyhre Nielsen", "authorids": "~Abdulkadir_Celikkanat2;~Andres_R_Masegosa1;~Thomas_Dyhre_Nielsen1", "gender": "M;M;M", "homepage": "https://andresmasegosa.github.io/;http://people.cs.aau.dk/~tdn/;https://abdcelikkanat.github.io", "dblp": "70/2802;23/1643;228/7764", "google_scholar": "J1zoY7AAAAAJ;https://scholar.google.dk/citations?user=6fWF0CgAAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Andres_R_Masegosa1;~Thomas_Dyhre_Nielsen1;~Abdulkadir_CELIKKANAT1", "aff": "Aalborg University;Aalborg University;Aalborg University, Aalborg University", "aff_domain": "cs.aau.dk;aau.dk;cs.aau.dk", "position": "Associate Professor;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\ncelikkanat2024revisiting,\ntitle={Revisiting K-mer Profile for Effective and Scalable Genome Representation Learning},\nauthor={Abdulkadir Celikkanat and Andres R Masegosa and Thomas Dyhre Nielsen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Ehsd856Ltb}\n}", "github": "", "reviewers": "tV95;8HNd;xd8g;e86P", "pdf_size": 1739866, "rating": "5;5;5;8", "confidence": "4;3;3;4", "soundness": "3;3;3;3", "novelty": "2;3;2;4", "presentation": "3;3;3;4", "wc_summary": "36;43;42;91", "wc_strengths": "24;128;19;52", "wc_weaknesses": "232;176;343;56", "wc_questions": "4;3;2;128", "wc_limitations": "10;1;3;7", "wc_review": "306;351;409;334", "wc_reply_reviewers": "127;35;12;25", "wc_reply_authors": "344;15;0;0", "reply_reviewers": "2;1;1;1", "reply_authors": "3;2;1;1", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 53.0, 22.102036105300343 ], "wc_strengths_avg": [ 55.75, 43.56819367382587 ], "wc_weaknesses_avg": [ 201.75, 103.4078696231578 ], "wc_questions_avg": [ 34.25, 54.131206341628854 ], "wc_limitations_avg": [ 5.25, 3.491060010942235 ], "wc_review_avg": [ 350.0, 37.662979170532964 ], "wc_reply_reviewers_avg": [ 49.75, 45.33969011804117 ], "wc_reply_authors_avg": [ 89.75, 146.91898277622263 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2912425214060443795&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "cs.aau.dk;aau.dk;cs.aau.dk", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Aalborg University", "aff_unique_dep": "", "aff_unique_url": "https://www.aau.dk", "aff_unique_abbr": "AAU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Aalborg", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Denmark" }, { "title": "ShareGPT4Video: Improving Video Understanding and Generation with Better Captions", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97789", "id": "EiH6WWLzlu", "proceeding": "", "pdf": "https://openreview.net/pdf?id=EiH6WWLzlu", "openreview": "https://openreview.net/forum?id=EiH6WWLzlu", "poster": "/media/PosterPDFs/NeurIPS%202024/97789.png?t=1731647964.544691", "project": "", "author_site": "Lin Chen, Xilin Wei, Jinsong Li, Xiaoyi Dong, Pan Zhang, Yuhang Zang, Zehui Chen, Haodong Duan, lin bin, Zhenyu Tang, Li Yuan, Yu Qiao, Dahua Lin, Feng Zhao, Jiaqi Wang", "tldr": "", "abstract": "We present the ShareGPT4Video series, aiming to facilitate the video understanding of large video-language models (LVLMs) and the video generation of text-to-video models (T2VMs) via dense and precise captions. The series comprises: 1) ShareGPT4Video, 40K GPT4V annotated dense captions of videos with various lengths and sources, developed through carefully designed data filtering and annotating strategy. 2) ShareCaptioner-Video, an efficient and capable captioning model for arbitrary videos, with 4.8M high-quality aesthetic videos annotated by it. 3) ShareGPT4Video-8B, a simple yet superb LVLM that reached SOTA performance on three advancing video benchmarks. To achieve this, taking aside the non-scalable costly human annotators, we find using GPT4V to caption video with a naive multi-frame or frame-concatenation input strategy leads to less detailed and sometimes temporal-confused results. We argue the challenge of designing a high-quality video captioning strategy lies in three aspects: 1) Inter-frame precise temporal change understanding. 2) Intra-frame detailed content description. 3) Frame-number scalability for arbitrary-length videos. To this end, we meticulously designed a differential video captioning strategy, which is stable, scalable, and efficient for generating captions for videos with arbitrary resolution, aspect ratios, and length. Based on it, we construct ShareGPT4Video, which contains 40K high-quality videos spanning a wide range of categories, and the resulting captions encompass rich world knowledge, object attributes, camera movements, and crucially, detailed and precise temporal descriptions of events. Based on ShareGPT4Video, we further develop ShareCaptioner-Video, a superior captioner capable of efficiently generating high-quality captions for arbitrary videos. We annotated 4.8M aesthetically appealing videos by it and verified their effectiveness on a 10-second text2video generation task. For video understanding, we verified the effectiveness of ShareGPT4Video on several current LVLM architectures and presented our superb new LVLM ShareGPT4Video-8B. All the models, strategies, and annotations will be open-sourced and we hope this project can serve as a pivotal resource for advancing both the LVLMs and T2VMs community.", "keywords": "large video-language models;text-to-video models", "primary_area": "", "supplementary_material": "", "author": "Lin Chen;Xilin Wei;Jinsong Li;Xiaoyi Dong;Pan Zhang;Yuhang Zang;Zehui Chen;Haodong Duan;Bin Lin;Zhenyu Tang;Li Yuan;Yu Qiao;Dahua Lin;Feng Zhao;Jiaqi Wang", "authorids": "~Lin_Chen18;~Xilin_Wei1;~Jinsong_Li1;~Xiaoyi_Dong1;~Pan_Zhang1;~Yuhang_Zang1;~Zehui_Chen1;~Haodong_Duan1;~Bin_Lin1;~Zhenyu_Tang5;~Li_Yuan2;~Yu_Qiao1;~Dahua_Lin1;~Feng_Zhao6;~Jiaqi_Wang1", "gender": "M;M;M;M;M;M;M;M;;M;;;M;M;M", "homepage": "https://lin-chen.site;https://github.com/Wiselnn570;https://li-jinsong.github.io/;;https://panzhang0212.github.io/;https://yuhangzang.github.io;https://lovesnowbest.site;https://kennymckormick.github.io;;https://github.com/Tzy010822;;;http://dahua.site;https://bivlab123.github.io/;https://myownskyw7.github.io/", "dblp": "13/3479-19;11/1871.html;29/3923-1;230/3711;;230/4433;;211/7919;;72/4431-4;;;53/6088;181/2734-4;44/740-3", "google_scholar": "https://scholar.google.com/citations?hl=en;zxtbqQwAAAAJ;4yD2aTkAAAAJ;FscToE0AAAAJ;moHH480AAAAJ;hW23VKIAAAAJ;NfSsLncAAAAJ;vi3W-m8AAAAJ;;;;;GMzzRRUAAAAJ;https://scholar.google.co.uk/citations?hl=en;https://scholar.google.com.hk/citations?user=GDvt570AAAAJ", "orcid": "0000-0002-1546-791X;;;;;0000-0003-1110-5062;0000-0002-1843-4478;0000-0002-3052-4177;;;;;;0000-0001-6767-8105;", "linkedin": ";;;;;yuhang-zang/;;haodong-duan-bb9349166/;;;;;;;", "or_profile": "~Lin_Chen18;~Xilin_Wei1;~Jinsong_Li1;~Xiaoyi_Dong1;~Pan_Zhang1;~Yuhang_Zang1;~Zehui_Chen1;~Haodong_Duan1;~Bin_Lin1;~Zhenyu_Tang5;~Li_Yuan2;~Yu_Qiao1;~Dahua_Lin1;~Feng_Zhao6;~Jiaqi_Wang1", "aff": "University of Science and Technology of China;Fudan University;Xi'an Jiaotong University;Shanghai Artificial Intelligence Laboratory;Shanghai Artificial Intelligence Laboratory;Shanghai Artificial Intelligence Laboratory;University of Science and Technology of China;Shanghai Artificial Intelligence Laboratory;;Peking University;;;The Chinese University of Hong Kong;University of Science and Technology of China;Shanghai AI Laboratory", "aff_domain": "ustc.edu.cn;fudan.edu.cn;xjtu.edu.cn;pjlab.org.cn;pjlab.org.cn;pjlab.org.cn;ustc.edu.cn;pjlab.org.cn;;pku.edu.cn;;;cuhk.edu.hk;ustc.edu.cn;pjlab.org.cn", "position": "MS student;PhD student;Undergrad student;Researcher;Researcher;Researcher;PhD student;Postdoc;;MS student;;;Associate Professor;Full Professor;Research Scientist", "bibtex": "@inproceedings{\nchen2024sharegptvideo,\ntitle={Share{GPT}4Video: Improving Video Understanding and Generation with Better Captions},\nauthor={Lin Chen and Xilin Wei and Jinsong Li and Xiaoyi Dong and Pan Zhang and Yuhang Zang and Zehui Chen and Haodong Duan and Bin Lin and Zhenyu Tang and Li Yuan and Yu Qiao and Dahua Lin and Feng Zhao and Jiaqi Wang},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=EiH6WWLzlu}\n}", "github": "", "reviewers": "6BYE;e5xH;R7XY;74Ty", "pdf_size": 9550783, "rating": "6;6;7;7", "confidence": "5;3;4;5", "wc_summary_and_contributions": "84;116;185;107", "wc_strengths": "88;2;135;4", "wc_improvement": "182;2;64;215", "wc_limitations": "1;1;95;1", "wc_correctness": "1;1;13;1", "wc_clarity": "1;1;151;1", "wc_relation_to_prior_work": "1;1;16;1", "wc_documentation": "1;1;9;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "360;126;669;332", "wc_reply_reviewers": "0;0;0;183", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "wc_summary_and_contributions_avg": [ 123.0, 37.6497011940334 ], "wc_strengths_avg": [ 57.25, 56.74229022519271 ], "wc_improvement_avg": [ 115.75, 86.39552939822755 ], "wc_limitations_avg": [ 24.5, 40.703193977868615 ], "wc_correctness_avg": [ 4.0, 5.196152422706632 ], "wc_clarity_avg": [ 38.5, 64.9519052838329 ], "wc_relation_to_prior_work_avg": [ 4.75, 6.49519052838329 ], "wc_documentation_avg": [ 3.0, 3.4641016151377544 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 371.75, 193.95150811478626 ], "wc_reply_reviewers_avg": [ 45.75, 79.24132444627614 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 15, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 156, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5437535431763980848&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "ustc.edu.cn;fudan.edu.cn;xjtu.edu.cn;pjlab.org.cn;pjlab.org.cn;pjlab.org.cn;ustc.edu.cn;pjlab.org.cn;;pku.edu.cn;;;cuhk.edu.hk;ustc.edu.cn;pjlab.org.cn", "author_num": 15, "aff_unique_index": "0;1;2;3;3;3;0;3;4;5;0;6", "aff_unique_norm": "University of Science and Technology of China;Fudan University;Xi'an Jiao Tong University;Shanghai Artificial Intelligence Laboratory;Peking University;Chinese University of Hong Kong;Shanghai AI Laboratory", "aff_unique_dep": ";;;;;;", "aff_unique_url": "http://www.ustc.edu.cn;https://www.fudan.edu.cn;https://www.xjtu.edu.cn;http://www.shailab.org/;http://www.pku.edu.cn;https://www.cuhk.edu.hk;https://www.shanghai-ai-lab.com", "aff_unique_abbr": "USTC;Fudan;XJTU;Shanghai AI Lab;Peking U;CUHK;SAIL", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Base of RoPE Bounds Context Length", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96017", "id": "EiIelh2t7S", "proceeding": "", "pdf": "https://openreview.net/pdf?id=EiIelh2t7S", "openreview": "https://openreview.net/forum?id=EiIelh2t7S", "poster": "", "project": "", "author_site": "Mingyu Xu, Xin Men, Bingning Wang, Qingyu Zhang, Hongyu Lin, Xianpei Han, weipeng chen", "tldr": "", "abstract": "Position embedding is a core component of current Large Language Models (LLMs). Rotary position embedding (RoPE), a technique that encodes the position information with a rotation matrix, has been the de facto choice for position embedding in many LLMs, such as the Llama series. RoPE has been further utilized to extend long context capability, which is roughly based on adjusting the \\textit{base} parameter of RoPE to mitigate out-of-distribution (OOD) problems in position embedding. However, in this paper, we find that LLMs may obtain a superficial long-context ability based on the OOD theory. We revisit the role of RoPE in LLMs and propose a novel property of long-term decay, we derive that the \\textit{base of RoPE bounds context length}: there is an absolute lower bound for the base value to obtain certain context length capability. Our work reveals the relationship between context length and RoPE base both theoretically and empirically, which may shed light on future long context training.", "keywords": "Large Language Model;Long context;Rotary position embedding", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Mingyu Xu;Xin Men;Bingning Wang;Qingyu Zhang;Hongyu Lin;Xianpei Han;weipeng chen", "authorids": "~Mingyu_Xu1;~Xin_Men1;~Bingning_Wang3;~Qingyu_Zhang4;~Hongyu_Lin1;~Xianpei_Han1;~weipeng_chen2", "gender": "M;M;;M;M;M;M", "homepage": ";;;http://linhongyu.top/;http://www.icip.org.cn/team/homepage/;;", "dblp": ";183/0924;;;57/2368;;", "google_scholar": ";wW00MTkAAAAJ;uuki79cAAAAJ;mu5lLakAAAAJ;pA88bm4AAAAJ;https://scholar.google.com.hk/citations?view_op=list_works;https://scholar.google.com/citations?view_op=list_works", "orcid": ";;;;;;0000-0001-9378-3806", "linkedin": ";;;;;;", "or_profile": "~Xin_Men1;~Bingning_Wang3;~Qingyu_Zhang4;~Hongyu_Lin1;~Xianpei_Han1;~weipeng_chen2;~\u540d\u5b87_\u5f901", "aff": ";Beijing Baichuan Intelligence Technology Co., Ltd.;Fuzhou University;Institute of Software, Chinese Academy of Sciences;Institute of Software, CAS;Beijing Baichuan Intelligence Technology Co., Ltd.;Institute of Automation, Chinese Academy of Sciences", "aff_domain": ";baichuan-ai.com;fzu.edu.cn;iscas.ac.cn;iscas.ac.cn;baichuan-ai.com;ia.ac.cn", "position": ";Researcher;Undergrad student;Associate Professor;Professor;Principal Researcher;MS student", "bibtex": "@inproceedings{\nxu2024base,\ntitle={Base of Ro{PE} Bounds Context Length},\nauthor={Mingyu Xu and Xin Men and Bingning Wang and Qingyu Zhang and Hongyu Lin and Xianpei Han and weipeng chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=EiIelh2t7S}\n}", "github": "", "reviewers": "3a2j;mP48;aPSb;XTUy", "pdf_size": 3603396, "rating": "6;6;7;8", "confidence": "3;1;5;4", "soundness": "3;3;4;4", "novelty": "3;3;3;4", "presentation": "3;3;3;4", "wc_summary": "137;20;88;95", "wc_strengths": "105;1;112;63", "wc_weaknesses": "55;1;223;115", "wc_questions": "9;1;2;2", "wc_limitations": "1;1;2;2", "wc_review": "307;24;427;277", "wc_reply_reviewers": "88;0;0;13", "wc_reply_authors": "282;0;0;0", "reply_reviewers": "2;0;0;1", "reply_authors": "2;0;1;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 1.479019945774904 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 85.0, 41.94639436232869 ], "wc_strengths_avg": [ 70.25, 44.15526582413472 ], "wc_weaknesses_avg": [ 98.5, 82.41814120689692 ], "wc_questions_avg": [ 3.5, 3.2015621187164243 ], "wc_limitations_avg": [ 1.5, 0.5 ], "wc_review_avg": [ 258.75, 146.69419722674786 ], "wc_reply_reviewers_avg": [ 25.25, 36.615399765672365 ], "wc_reply_authors_avg": [ 70.5, 122.10958193360585 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.0, 0.7071067811865476 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.6625413488689132, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17818226093553643836&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": ";baichuan-ai.com;fzu.edu.cn;iscas.ac.cn;iscas.ac.cn;baichuan-ai.com;ia.ac.cn", "author_num": 7, "aff_unique_index": "0;1;2;2;0;2", "aff_unique_norm": "Beijing Baichuan Intelligence Technology Co., Ltd.;Fuzhou University;Chinese Academy of Sciences", "aff_unique_dep": ";;Institute of Software", "aff_unique_url": ";https://www.fznu.edu.cn;http://www.ios.ac.cn", "aff_unique_abbr": ";FZU;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Last-Iterate Convergence for Generalized Frank-Wolfe in Monotone Variational Inequalities", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96016", "id": "EjKNSErSMJ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=EjKNSErSMJ", "openreview": "https://openreview.net/forum?id=EjKNSErSMJ", "poster": "", "project": "", "author_site": "Zaiwei Chen, Eric Mazumdar", "tldr": "", "abstract": "We study the convergence behavior of a generalized Frank-Wolfe algorithm in constrained (stochastic) monotone variational inequality (MVI) problems. In recent years, there have been numerous efforts to design algorithms for solving constrained MVI problems due to their connections with optimization, machine learning, and equilibrium computation in games. Most work in this domain has focused on extensions of simultaneous gradient play, with particular emphasis on understanding the convergence properties of extragradient and optimistic gradient methods. In contrast, we examine the performance of an algorithm from another well-known class of optimization algorithms: Frank-Wolfe. We show that a generalized variant of this algorithm achieves a fast $\\mathcal{O}(T^{-1/2})$ last-iterate convergence rate in constrained MVI problems. By drawing connections between our generalized Frank-Wolfe algorithm and the well-known smoothed fictitious play (FP) from game theory, we also derive a finite-sample convergence rate for smoothed FP in zero-sum matrix games. Furthermore, we demonstrate that a stochastic variant of the generalized Frank-Wolfe algorithm for MVI problems also converges in a last-iterate sense, albeit at a slower $\\mathcal{O}(T^{-1/6})$ convergence rate.", "keywords": "Monotone variational inequalities;generalized Frank-Wolfe method;last-iterate convergence;smoothed fictitious-play", "primary_area": "optimization", "supplementary_material": "", "author": "Zaiwei Chen;Eric Mazumdar", "authorids": "~Zaiwei_Chen1;~Eric_Mazumdar1", "gender": ";M", "homepage": ";http://people.eecs.berkeley.edu/~emazumdar/", "dblp": ";177/9322", "google_scholar": ";FZOxxvcAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Zaiwei_Chen1;~Eric_Mazumdar1", "aff": ";Deparment of Computing + Mathematical Sciences, California Institute of Technology", "aff_domain": ";cms.caltech.edu", "position": ";Assistant Professor", "bibtex": "@inproceedings{\nchen2024lastiterate,\ntitle={Last-Iterate Convergence for Generalized Frank-Wolfe in Monotone Variational Inequalities},\nauthor={Zaiwei Chen and Eric Mazumdar},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=EjKNSErSMJ}\n}", "github": "", "reviewers": "3YVN;UPhr;BD4Y;EbDg", "pdf_size": 678039, "rating": "5;6;6;9", "confidence": "4;4;4;4", "soundness": "2;3;4;4", "novelty": "2;3;3;4", "presentation": "3;3;4;4", "wc_summary": "44;69;51;44", "wc_strengths": "33;56;73;102", "wc_weaknesses": "119;4;46;22", "wc_questions": "22;209;148;1", "wc_limitations": "1;45;6;1", "wc_review": "219;383;324;170", "wc_reply_reviewers": "48;42;0;0", "wc_reply_authors": "1014;1070;29;0", "reply_reviewers": "1;1;0;0", "reply_authors": "5;5;2;1", "rating_avg": [ 6.5, 1.5 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 52.0, 10.222524150130436 ], "wc_strengths_avg": [ 66.0, 25.16942589730644 ], "wc_weaknesses_avg": [ 47.75, 43.751428548105714 ], "wc_questions_avg": [ 95.0, 86.55922827752106 ], "wc_limitations_avg": [ 13.25, 18.444172521422587 ], "wc_review_avg": [ 274.0, 83.99702375679747 ], "wc_reply_reviewers_avg": [ 22.5, 22.599778759979046 ], "wc_reply_authors_avg": [ 528.25, 514.2335923488469 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 3.25, 1.7853571071357126 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:9Qs9jixON-cJ:scholar.google.com/&scioq=Last-Iterate+Convergence+for+Generalized+Frank-Wolfe+in+Monotone+Variational+Inequalities&hl=en&as_sdt=0,14", "gs_version_total": 0, "email": ";cms.caltech.edu", "author_num": 2, "aff_unique_index": "1", "aff_unique_norm": ";California Institute of Technology", "aff_unique_dep": ";Mathematical Sciences", "aff_unique_url": ";https://www.caltech.edu", "aff_unique_abbr": ";Caltech", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pasadena", "aff_country_unique_index": "1", "aff_country_unique": ";United States" }, { "title": "Elliptical Attention", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96015", "id": "Ejg4d4FVrs", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Ejg4d4FVrs", "openreview": "https://openreview.net/forum?id=Ejg4d4FVrs", "poster": "", "project": "", "author_site": "Stefan Nielsen, Laziz Abdullaev, Rachel S.Y. Teo, Tan Nguyen", "tldr": "", "abstract": "Pairwise dot-product self-attention is key to the success of transformers that achieve state-of-the-art performance across a variety of applications in language and vision. This dot-product self-attention computes attention weights among the input tokens using Euclidean distance, which makes the model prone to representation collapse and vulnerable to contaminated samples. In this paper, we propose using a Mahalanobis distance metric for computing the attention weights to stretch the underlying feature space in directions of high contextual relevance. In particular, we define a hyper-ellipsoidal neighborhood around each query to increase the attention weights of the tokens lying in the contextually important directions. We term this novel class of attention Elliptical Attention. Our Elliptical Attention provides two benefits: 1) reducing representation collapse and 2) enhancing the model's robustness as the Elliptical Attention pays more attention to contextually relevant information, rather than focusing on some small subset of informative features. We empirically demonstrate the advantages of Elliptical Attention over the baseline dot-product attention and state-of-the-art attention methods on various practical tasks, including object classification, image\nsegmentation, and language modeling across different data modalities.", "keywords": "attention;non-parametric kernel regression;robustness;representation collapse", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/1af0c17937df41b48a17ef745260fccd103f3ce0.zip", "author": "Stefan Nielsen;Laziz Abdullaev;Rachel Teo;Tan Minh Nguyen", "authorids": "~Stefan_Nielsen1;~Laziz_Abdullaev1;~Rachel_Teo1;~Tan_Minh_Nguyen1", "gender": "M;M;F;M", "homepage": ";;https://github.com/rachtsy;https://tanmnguyen89.github.io/", "dblp": "380/3225;380/3830;380/2055.html;255/4725", "google_scholar": "https://scholar.google.com/citations?hl=en;IwZ0wCYAAAAJ;;OizOh88AAAAJ", "orcid": ";;;", "linkedin": "stefan-nielsen-850090b5/;laziz-abdullaev/;;", "or_profile": "~Stefan_Nielsen1;~Laziz_Abdullaev1;~Rachel_Teo1;~Tan_Minh_Nguyen1", "aff": "FPT AI;National University of Singapore;National University of Singapore;National University of Singapore", "aff_domain": "fpt.com;u.nus.edu;nus.edu.sg;nus.edu.sg", "position": "Researcher;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nnielsen2024elliptical,\ntitle={Elliptical Attention},\nauthor={Stefan Nielsen and Laziz Abdullaev and Rachel Teo and Tan Minh Nguyen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Ejg4d4FVrs}\n}", "github": "", "reviewers": "EhBA;5EPe;ySYF", "pdf_size": 2798218, "rating": "5;7;7", "confidence": "4;3;4", "soundness": "2;3;3", "novelty": "2;3;3", "presentation": "2;3;3", "wc_summary": "48;56;87", "wc_strengths": "59;58;98", "wc_weaknesses": "112;170;74", "wc_questions": "1;16;33", "wc_limitations": "1;15;1", "wc_review": "221;315;293", "wc_reply_reviewers": "45;21;16", "wc_reply_authors": "81;66;66", "reply_reviewers": "1;1;1", "reply_authors": "3;3;3", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 63.666666666666664, 16.81930108205715 ], "wc_strengths_avg": [ 71.66666666666667, 18.624953392931992 ], "wc_weaknesses_avg": [ 118.66666666666667, 39.47432358156656 ], "wc_questions_avg": [ 16.666666666666668, 13.072447700751718 ], "wc_limitations_avg": [ 5.666666666666667, 6.599663291074443 ], "wc_review_avg": [ 276.3333333333333, 40.14418457953226 ], "wc_reply_reviewers_avg": [ 27.333333333333332, 12.657891697365017 ], "wc_reply_authors_avg": [ 71.0, 7.0710678118654755 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.49999999999999983, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6152302390383227029&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "fpt.com;u.nus.edu;nus.edu.sg;nus.edu.sg", "author_num": 4, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "FPT Corporation;National University of Singapore", "aff_unique_dep": "FPT AI;", "aff_unique_url": "https://www.fpt.com.vn;https://www.nus.edu.sg", "aff_unique_abbr": "FPT;NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "Vietnam;Singapore" }, { "title": "$\\nabla^2$DFT: A Universal Quantum Chemistry Dataset of Drug-Like Molecules and a Benchmark for Neural Network Potentials", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97788", "id": "ElUrNM9U8c", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ElUrNM9U8c", "openreview": "https://openreview.net/forum?id=ElUrNM9U8c", "poster": "/media/PosterPDFs/NeurIPS%202024/97788.png?t=1733748315.4687138", "project": "", "author_site": "Kuzma Khrabrov, Anton Ber, Artem Tsypin, Konstantin Ushenin, Egor Rumiantsev, Alexander Telepov, Dmitry Protasov, Ilya Shenbin, Anton Alekseev, Mikhail Shirokikh, Sergey Nikolenko, Elena Tutubalina, Artur Kadurin", "tldr": "", "abstract": "Methods of computational quantum chemistry provide accurate approximations of molecular properties crucial for computer-aided drug discovery and other areas of chemical science. \nHowever, high computational complexity limits the scalability of their applications.\nNeural network potentials (NNPs) are a promising alternative to quantum chemistry methods, but they require large and diverse datasets for training.\nThis work presents a new dataset and benchmark called $\\nabla^2$DFT that is based on the nablaDFT.\nIt contains twice as much molecular structures, three times more conformations, new data types and tasks, and state-of-the-art models.\nThe dataset includes energies, forces, 17 molecular properties, Hamiltonian and overlap matrices, and a wavefunction object.\nAll calculations were performed at the DFT level ($\\omega$B97X-D/def2-SVP) for each conformation. \nMoreover, $\\nabla^2$DFT is the first dataset that contains relaxation trajectories for a substantial number of drug-like molecules. \nWe also introduce a novel benchmark for evaluating NNPs in molecular property prediction, Hamiltonian prediction, and conformational optimization tasks. \nFinally, we propose an extendable framework for training NNPs and implement 10 models within it.", "keywords": "Graph neural networks;computational physics;computational chemistry;quantum chemistry;neural network potentials", "primary_area": "", "supplementary_material": "/attachment/d726141e6474ebf0bae43e4404357ee2a4816264.pdf", "author": "Kuzma Khrabrov;Anton Ber;Artem Tsypin;Konstantin Ushenin;Egor Rumiantsev;Alexander Telepov;Dmitry Protasov;Ilya Shenbin;Anton M. Alekseev;Mikhail Shirokikh;Sergey Nikolenko;Elena Tutubalina;Artur Kadurin", "authorids": "~Kuzma_Khrabrov1;~Anton_Ber1;~Artem_Tsypin1;~Konstantin_Ushenin1;~Egor_Rumiantsev1;~Alexander_Telepov1;~Dmitry_Protasov1;~Ilya_Shenbin1;~Anton_M._Alekseev1;~Mikhail_Shirokikh1;~Sergey_Nikolenko1;~Elena_Tutubalina1;~Artur_Kadurin1", "gender": "M;M;M;M;M;M;M;Not Specified;;M;M;F;M", "homepage": ";;;;https://github.com/AztecAlive;;https://www.scopus.com/authid/detail.uri?authorId=57268229500;;;https://arabel1a.github.io/;http://logic.pdmi.ras.ru/~sergey/;;", "dblp": "360/7510;;304/7594;;;;;https://dblp.uni-trier.de/pid/234/8529;;;50/1870.html;153/5554;230/8564", "google_scholar": "https://scholar.google.co.uk/citations?user=DRmC-YUAAAAJ;0H-sMxAAAAAJ;ppR0eQYAAAAJ;NxdGxysAAAAJ;IxItI3UAAAAJ;;;SluT_kMAAAAJ;;;https://scholar.google.ru/citations?hl=ru;https://scholar.google.ru/citations?user=npM9yekAAAAJ;https://scholar.google.ru/citations?user=HFLFHzUAAAAJ", "orcid": "0000-0002-0446-6751;;0000-0002-0754-759X;0000-0003-0575-3506;0000-0001-8262-5685;0000-0002-7280-1531;;0000-0002-6778-225X;;0009-0008-1771-0769;0000-0001-7787-2251;0000-0001-7936-0284;0000-0003-1482-9365", "linkedin": ";;;;;;;;;;;;", "or_profile": "~Kuzma_Khrabrov1;~Anton_Ber1;~Artem_Tsypin1;~Konstantin_Ushenin1;~Egor_Rumiantsev1;~Alexander_Telepov1;~Dmitry_Protasov1;~Ilya_Shenbin1;~Anton_M._Alekseev1;~Mikhail_Shirokikh1;~Sergey_Nikolenko1;~Elena_Tutubalina1;~Artur_Kadurin1", "aff": "Artificial Intelligence Research Institute (AIRI);Artificial Intelligence Research Institute;Artificial Intelligence Research Institute;Institute of Immunology and Physiology;EPFL - EPF Lausanne;AIRI;AIRI;St. Petersburg Department of Steklov Mathematical Institute;;St. Petersburg State University;Steklov Institute of Mathematics at St. Petersburg;Kazan Federal University;Kuban State University", "aff_domain": "airi.net;airi.net;airi.net;iip.uran.ru;epfl.ch;airi.net;airi.net;pdmi.ras.ru;;spbu.ru;pdmi.ras.ru;kpfu.ru;kubsu.ru", "position": "Researcher;Researcher;Researcher;Researcher;PhD student;Researcher;Researcher;Researcher;;MS student;Assistant Professor;Researcher;Associate Professor", "bibtex": "@inproceedings{\nkhrabrov2024nabladft,\ntitle={\\${\\textbackslash}nabla{\\textasciicircum}2\\${DFT}: A Universal Quantum Chemistry Dataset of Drug-Like Molecules and a Benchmark for Neural Network Potentials},\nauthor={Kuzma Khrabrov and Anton Ber and Artem Tsypin and Konstantin Ushenin and Egor Rumiantsev and Alexander Telepov and Dmitry Protasov and Ilya Shenbin and Anton M. Alekseev and Mikhail Shirokikh and Sergey Nikolenko and Elena Tutubalina and Artur Kadurin},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=ElUrNM9U8c}\n}", "github": "", "reviewers": "taPs;qiHs;EvHy;VPVC", "pdf_size": 1637557, "rating": "6;7;7;8", "confidence": "3;3;3;4", "wc_summary_and_contributions": "67;76;50;108", "wc_strengths": "7;7;39;26", "wc_improvement": "7;14;83;37", "wc_limitations": "4;11;6;4", "wc_correctness": "14;3;20;37", "wc_clarity": "6;5;5;9", "wc_relation_to_prior_work": "7;12;11;22", "wc_documentation": "3;9;4;25", "wc_additional_feedback": "1;1;1;1", "wc_review": "116;138;219;269", "wc_reply_reviewers": "0;0;23;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;1;0", "reply_authors": "2;1;1;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 75.25, 21.087614848531352 ], "wc_strengths_avg": [ 19.75, 13.5531361684298 ], "wc_improvement_avg": [ 35.25, 29.71847068743612 ], "wc_limitations_avg": [ 6.25, 2.8613807855648994 ], "wc_correctness_avg": [ 18.5, 12.298373876248844 ], "wc_clarity_avg": [ 6.25, 1.6393596310755 ], "wc_relation_to_prior_work_avg": [ 13.0, 5.522680508593631 ], "wc_documentation_avg": [ 10.25, 8.814051281902097 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 185.5, 61.605600394769304 ], "wc_reply_reviewers_avg": [ 5.75, 9.959292143521045 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 13, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3410554910830517475&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "airi.net;airi.net;airi.net;iip.uran.ru;epfl.ch;airi.net;airi.net;pdmi.ras.ru;;spbu.ru;pdmi.ras.ru;kpfu.ru;kubsu.ru", "author_num": 13, "aff_unique_index": "0;0;0;1;2;0;0;3;4;5;6;7", "aff_unique_norm": "Artificial Intelligence Research Institute;Institute of Immunology and Physiology;EPFL;Steklov Mathematical Institute;St. Petersburg State University;Steklov Institute of Mathematics;Kazan Federal University;Kuban State University", "aff_unique_dep": "AI Research;;;Department of Steklov Mathematical Institute;;Mathematics;;", "aff_unique_url": ";;https://www.epfl.ch;http://www.mi.ras.ru;https://www.spbu.ru;http://www.pdmi.ras.ru;http://kpfu.ru;http://www.kubsu.ru", "aff_unique_abbr": "AIRI;;EPFL;SMI;SPbU;PDMI;KFU;KubSU", "aff_campus_unique_index": "1;2;2", "aff_campus_unique": ";Lausanne;St. Petersburg", "aff_country_unique_index": "1;1;2;3;3;4;4;4;4;4", "aff_country_unique": ";United States;Switzerland;Japan;Russian Federation" }, { "title": "Biomedical Visual Instruction Tuning with Clinician Preference Alignment", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97787", "id": "Eogs84mv7N", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Eogs84mv7N", "openreview": "https://openreview.net/forum?id=Eogs84mv7N", "poster": "", "project": "", "author_site": "Hejie Cui, Lingjun Mao, Xin Liang, Jieyu Zhang, Hui Ren, Quanzheng Li, Xiang Li, Carl Yang", "tldr": "", "abstract": "Recent advancements in multimodal foundation models have showcased impressive capabilities in understanding and reasoning with visual and textual information. Adapting these foundation models trained for general usage to specialized domains like biomedicine requires large-scale domain-specific instruction datasets. While existing works have explored curating such datasets automatically, the resultant datasets are not explicitly aligned with domain expertise. In this work, we propose a data-centric framework, Biomedical Visual Instruction Tuning with Clinician Preference Alignment (BioMed-VITAL), that incorporates clinician preferences into both stages of generating and selecting instruction data for tuning biomedical multimodal foundation models. First, during the generation stage, we prompt the GPT-4V generator with a diverse set of clinician-selected demonstrations for preference-aligned data candidate generation. Then, during the selection phase, we train a separate selection model, which explicitly distills clinician and policy-guided model preferences into a rating function to select high-quality data for medical instruction tuning. Results show that the model tuned with the instruction-following data from our method demonstrates a significant improvement in open visual chat (18.5% relatively) and medical VQA (win rate up to 81.73%). Our instruction-following data and models are available at https://BioMed-VITAL.github.io.", "keywords": "resource-constrained human preference alignment;biomedical visual instruction tuning;instructioin data generation and selection", "primary_area": "", "supplementary_material": "/attachment/c3b3cf2e5f6d854b3a57edddb429194a90f67d9c.pdf", "author": "Hejie Cui;Lingjun Mao;Xin LIANG;Jieyu Zhang;Hui Ren;Quanzheng Li;Xiang Li;Carl Yang", "authorids": "~Hejie_Cui1;~Lingjun_Mao1;~Xin_LIANG2;~Jieyu_Zhang1;~Hui_Ren1;~Quanzheng_Li1;~Xiang_Li14;~Carl_Yang1", "gender": "F;;F;M;F;M;M;M", "homepage": "https://hejiecui.com/;https://mao1207.github.io/;https://liangxin1001.github.io/;https://jieyuz2.github.io/;;https://camca.mgh.harvard.edu/people/faculty/;https://xiangli-shaun.github.io/;https://cs.emory.edu/~jyang71/", "dblp": "221/7865;;;;;;;305/0254", "google_scholar": "r0Vh6GEAAAAJ;;;T_INUHUAAAAJ;-8N8iIYAAAAJ;MHq2z7oAAAAJ;MjkwwiQAAAAJ;mOINlwcAAAAJ", "orcid": "0000-0001-6388-2619;;;0000-0002-1846-2436;;;0000-0002-9851-6376;0000-0001-9145-4531", "linkedin": "hejie-cui-b1071b13b/;;;jieyu-zhang-3baaa8154/;;;xiang-shaun-li-11b2b99/;", "or_profile": "~Hejie_Cui1;~Lingjun_Mao1;~Xin_LIANG2;~Jieyu_Zhang1;~Hui_Ren1;~Quanzheng_Li1;~Xiang_Li14;~Carl_Yang1", "aff": "Emory University;University of California, Berkeley;Tongji University;University of Washington;Harvard University;Harvard University;Massachusetts General Hospital, Harvard University;Emory University", "aff_domain": "emory.edu;berkeley.edu;tongji.edu.cn;cs.washington.edu;harvard.edu;harvard.edu;mgh.harvard.edu;emory.edu", "position": "PhD student;Intern;Undergrad student;PhD student;Instructor;Associate Professor;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\ncui2024biomedical,\ntitle={Biomedical Visual Instruction Tuning with Clinician Preference Alignment},\nauthor={Hejie Cui and Lingjun Mao and Xin LIANG and Jieyu Zhang and Hui Ren and Quanzheng Li and Xiang Li and Carl Yang},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=Eogs84mv7N}\n}", "github": "", "reviewers": "wFxu;s4zq;K4A9;c7Cq", "pdf_size": 3934406, "rating": "6;7;7;7", "confidence": "4;4;4;5", "wc_summary_and_contributions": "58;37;77;81", "wc_strengths": "46;4;129;90", "wc_improvement": "277;8;122;94", "wc_limitations": "9;60;36;74", "wc_correctness": "36;12;71;47", "wc_clarity": "5;7;12;4", "wc_relation_to_prior_work": "21;9;59;4", "wc_documentation": "26;66;55;3", "wc_additional_feedback": "1;1;1;1", "wc_review": "479;204;562;398", "wc_reply_reviewers": "114;82;0;0", "wc_reply_authors": "69;37;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "3;5;2;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 63.25, 17.469616481193857 ], "wc_strengths_avg": [ 67.25, 46.85816364306224 ], "wc_improvement_avg": [ 125.25, 97.16320033839972 ], "wc_limitations_avg": [ 44.75, 24.71209218176397 ], "wc_correctness_avg": [ 41.5, 21.219095173922945 ], "wc_clarity_avg": [ 7.0, 3.082207001484488 ], "wc_relation_to_prior_work_avg": [ 23.25, 21.545011023436494 ], "wc_documentation_avg": [ 37.5, 24.70323865407125 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 410.75, 132.70526553230658 ], "wc_reply_reviewers_avg": [ 49.0, 50.28916384272063 ], "wc_reply_authors_avg": [ 26.5, 28.81405906844782 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.75, 1.479019945774904 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9914322051049422309&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "emory.edu;berkeley.edu;tongji.edu.cn;cs.washington.edu;harvard.edu;harvard.edu;mgh.harvard.edu;emory.edu", "author_num": 8, "aff_unique_index": "0;1;2;3;4;4;4;0", "aff_unique_norm": "Emory University;University of California, Berkeley;Tongji University;University of Washington;Harvard University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.emory.edu;https://www.berkeley.edu;https://www.tongji.edu.cn;https://www.washington.edu;https://www.harvard.edu", "aff_unique_abbr": "Emory;UC Berkeley;Tongji;UW;Harvard", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;0;1;0;0;0;0;0", "aff_country_unique": "United States;China" }, { "title": "Fast Tree-Field Integrators: From Low Displacement Rank to Topological Transformers", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96014", "id": "Eok6HbcSRI", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Eok6HbcSRI", "openreview": "https://openreview.net/forum?id=Eok6HbcSRI", "poster": "/media/PosterPDFs/NeurIPS%202024/96014.png?t=1733278930.5660098", "project": "", "author_site": "Krzysztof M Choromanski, Arijit Sehanobish, Somnath Basu Roy Chowdhury, Han Lin, Kumar Avinava Dubey, Tamas Sarlos, Snigdha Chaturvedi", "tldr": "", "abstract": "We present a new class of fast polylog-linear algorithms based on the theory of structured matrices (in particular *low displacement rank*) for integrating tensor fields defined on weighted trees. Several applications of the resulting *fast tree-field integrators* (FTFIs) are presented, including: (a) approximation of graph metrics with tree metrics, (b) graph classification, (c) modeling on meshes, and finally (d) *Topological Transformers* (TTs) (Choromanski et al., 2022) for images. For Topological Transformers, we propose new relative position encoding (RPE) masking mechanisms with as few as **three** extra learnable parameters per Transformer layer, leading to **1.0-1.5\\%+** accuracy gains. Importantly, most of FTFIs are **exact** methods, thus numerically equivalent to their brute-force counterparts. When applied to graphs with thousands of nodes, those exact algorithms provide **5.7-13x** speedups. We also provide an extensive theoretical analysis of our methods.", "keywords": "Tree Metrics;Low Displacement Rank;Field Integrators;Topological Transformers;Graph Theory;Efficient Algorithms on Graphs", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Krzysztof Marcin Choromanski;Arijit Sehanobish;Somnath Basu Roy Chowdhury;Han Lin;Kumar Avinava Dubey;Tamas Sarlos;Snigdha Chaturvedi", "authorids": "~Krzysztof_Marcin_Choromanski1;~Arijit_Sehanobish1;~Somnath_Basu_Roy_Chowdhury3;~Han_Lin1;~Kumar_Avinava_Dubey1;~Tamas_Sarlos1;~Snigdha_Chaturvedi2", "gender": ";M;;M;M;F;M", "homepage": ";https://github.com/arijitthegame/;https://www.cs.unc.edu/~somnath/;https://hl-hanlin.github.io/;https://sites.google.com/site/stamas/;https://sites.google.com/site/snigdhac/;https://sites.google.com/site/kumaravinavadubey/", "dblp": "78/11411;249/5322;190/7535;;48/959;77/8700;10/7789", "google_scholar": ";MEby6-QAAAAJ;https://scholar.google.co.in/citations?user=xGbyrIUAAAAJ;https://scholar.google.com/citations?view_op=list_works;c4YtO-MAAAAJ;gZD3EesAAAAJ;tBbUAfsAAAAJ", "orcid": ";0000-0003-2769-2003;;;;;", "linkedin": ";arijit-sehanobish-b76627112/;;han-lin-9336981a3/;;;", "or_profile": "~Krzysztof_Marcin_Choromanski1;~Arijit_Sehanobish1;~Somnath_Basu_Roy_Chowdhury3;~Han_Lin1;~Tamas_Sarlos1;~Snigdha_Chaturvedi2;~Kumar_A_Dubey1", "aff": "Google Brain Robotics & Columbia University;Kensho Technologies;Department of Computer Science, University of North Carolina, Chapel Hill;Department of Computer Science, University of North Carolina at Chapel Hill;Google Research;Department of Computer Science, University of North Carolina, Chapel Hill;Google Research", "aff_domain": "columbia.edu;kensho.com;cs.unc.edu;cs.unc.edu;google.com;cs.unc.edu;google.com", "position": "research scientist & adjunct assistant professor;Applied Scientist;PhD student;PhD student;Staff Research Scientist;Assistant Professor;Research Scientist", "bibtex": "@inproceedings{\nchoromanski2024fast,\ntitle={Fast Tree-Field Integrators: From Low Displacement Rank to Topological Transformers},\nauthor={Krzysztof Marcin Choromanski and Arijit Sehanobish and Somnath Basu Roy Chowdhury and Han Lin and Kumar Avinava Dubey and Tamas Sarlos and Snigdha Chaturvedi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Eok6HbcSRI}\n}", "github": "", "reviewers": "Lag9;H9MT;o1jE;SX39", "pdf_size": 1841051, "rating": "5;7;7;7", "confidence": "1;2;2;4", "soundness": "3;3;4;4", "novelty": "2;3;3;3", "presentation": "3;4;2;4", "wc_summary": "135;71;64;434", "wc_strengths": "82;39;50;76", "wc_weaknesses": "117;97;110;30", "wc_questions": "34;61;120;278", "wc_limitations": "8;7;34;1", "wc_review": "376;275;378;819", "wc_reply_reviewers": "0;68;30;63", "wc_reply_authors": "521;121;305;121", "reply_reviewers": "0;1;1;1", "reply_authors": "4;3;4;3", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 2.25, 1.0897247358851685 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 176.0, 151.50412535637437 ], "wc_strengths_avg": [ 61.75, 17.80975856096876 ], "wc_weaknesses_avg": [ 88.5, 34.528973341239094 ], "wc_questions_avg": [ 123.25, 94.60278801388466 ], "wc_limitations_avg": [ 12.5, 12.698425099200294 ], "wc_review_avg": [ 462.0, 210.27957580326245 ], "wc_reply_reviewers_avg": [ 40.25, 27.444261695297982 ], "wc_reply_authors_avg": [ 267.0, 164.76650144977893 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.5, 0.5 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.6622661785325219, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1482990803261306061&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 4, "email": "columbia.edu;kensho.com;cs.unc.edu;cs.unc.edu;google.com;cs.unc.edu;google.com", "author_num": 7, "aff_unique_index": "0;1;2;3;0;2;0", "aff_unique_norm": "Google;Kensho Technologies;University of North Carolina;University of North Carolina at Chapel Hill", "aff_unique_dep": "Google Brain Robotics;;Department of Computer Science;Department of Computer Science", "aff_unique_url": "https://ai.google;https://www.kensho.com;https://www.unc.edu;https://www.unc.edu", "aff_unique_abbr": "Google;;UNC;UNC Chapel Hill", "aff_campus_unique_index": "0;2;2;0;2;0", "aff_campus_unique": "Mountain View;;Chapel Hill", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "CoIN: A Benchmark of Continual Instruction Tuning for Multimodel Large Language Models", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97786", "id": "EpnsUQavJA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=EpnsUQavJA", "openreview": "https://openreview.net/forum?id=EpnsUQavJA", "poster": "/media/PosterPDFs/NeurIPS%202024/97786.png?t=1730121101.4687476", "project": "", "author_site": "Cheng Chen, Junchen Zhu, Xu Luo, Hengtao Shen, Lianli Gao, Jingkuan Song", "tldr": "", "abstract": "Instruction tuning demonstrates impressive performance in adapting Multimodal Large Language Models (MLLMs) to follow task instructions and improve generalization ability. By extending tuning across diverse tasks, MLLMs can further enhance their understanding of world knowledge and instruction intent. However, continual instruction tuning has been largely overlooked and there are no public benchmarks available. In this paper, we present CoIN, a comprehensive benchmark tailored for assessing the behavior of existing MLLMs under continual instruction tuning. CoIN comprises 10 meticulously crafted datasets spanning 8 tasks, ensuring diversity and serving as a robust evaluation framework to assess crucial aspects of continual instruction tuning, such as task order, instruction diversity and volume. Additionally, apart from traditional evaluation, we design another LLM-based metric to assess the knowledge preserved within MLLMs for reasoning. Following an in-depth evaluation of several MLLMs, we demonstrate that they still suffer catastrophic forgetting, and the failure in instruction alignment assumes the main responsibility, instead of reasoning knowledge forgetting. To this end, we introduce MoELoRA which is effective in retaining the previous instruction alignment.", "keywords": "Continual Learning;Multimodel Large Language Model;Instruction Tuning", "primary_area": "", "supplementary_material": "/attachment/da446b7280936ed02780a6f21f423f8287b572a5.pdf", "author": "Cheng Chen;Junchen Zhu;Xu Luo;Heng Tao Shen;Jingkuan Song;Lianli Gao", "authorids": "~Cheng_Chen21;~Junchen_Zhu1;~Xu_Luo1;~Heng_Tao_Shen3;~Jingkuan_Song3;~Lianli_Gao1", "gender": "M;M;M;M;F;M", "homepage": "https://zackschen.github.io/;;https://frankluox.github.io/;https://cfm.uestc.edu.cn/~songjingkuan/;https://lianligao.github.io/;https://cfm.uestc.edu.cn/~shenht/", "dblp": ";276/3203;06/2622-3;70/10575;123/9849.html;s/HTShen", "google_scholar": "https://scholar.google.com.hk/citations?user=RS6YyYAAAAAJ;J0qJuYAAAAAJ;https://scholar.google.com/citations?hl=en;F5Zy9V4AAAAJ;https://scholar.google.com.au/citations?user=zsm2dpYAAAAJ;https://scholar.google.com.au/citations?user=krryaDkAAAAJ", "orcid": "0000-0003-3662-0263;0000-0002-3872-6689;0000-0001-9827-1244;;;", "linkedin": ";;;;;", "or_profile": "~Cheng_Chen21;~Junchen_Zhu1;~Xu_Luo1;~Jingkuan_Song3;~Lianli_Gao1;~Hengtao_Shen1", "aff": "University of Electronic Science and Technology of China;University of Electronic Science and Technology of China;University of Electronic Science and Technology of China;University of Electronic Science and Technology of China,;University of Electronic Science and Technology of China;Tongji University", "aff_domain": "uestc.edu.cn;uestc.edu.cn;uestc.edu.cn;uestc.edu.cn;uestc.edu.cn;tongji.edu.cn", "position": "PhD student;PhD student;PhD student;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nchen2024coin,\ntitle={Co{IN}: A Benchmark of Continual Instruction Tuning for Multimodel Large Language Models},\nauthor={Cheng Chen and Junchen Zhu and Xu Luo and Heng Tao Shen and Jingkuan Song and Lianli Gao},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=EpnsUQavJA}\n}", "github": "", "reviewers": "zYZs;XVFj;FxCm;9Qeu", "pdf_size": 1766747, "rating": "5;5;8;8", "confidence": "4;3;4;4", "wc_summary_and_contributions": "66;64;66;66", "wc_strengths": "61;4;212;94", "wc_improvement": "139;4;36;118", "wc_limitations": "1;36;1;9", "wc_correctness": "9;1;6;1", "wc_clarity": "7;1;4;1", "wc_relation_to_prior_work": "9;12;10;1", "wc_documentation": "8;7;13;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "301;130;349;292", "wc_reply_reviewers": "0;0;69;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;1;0", "reply_authors": "2;4;2;2", "rating_avg": [ 6.5, 1.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 65.5, 0.8660254037844386 ], "wc_strengths_avg": [ 92.75, 76.00452289173323 ], "wc_improvement_avg": [ 74.25, 55.91231975155386 ], "wc_limitations_avg": [ 11.75, 14.376630342329875 ], "wc_correctness_avg": [ 4.25, 3.418698582794336 ], "wc_clarity_avg": [ 3.25, 2.48746859276655 ], "wc_relation_to_prior_work_avg": [ 8.0, 4.183300132670378 ], "wc_documentation_avg": [ 7.25, 4.264680527307995 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 268.0, 82.56815366713731 ], "wc_reply_reviewers_avg": [ 17.25, 29.877876430563134 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5773502691896258, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16873865632098026565&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "uestc.edu.cn;uestc.edu.cn;uestc.edu.cn;uestc.edu.cn;uestc.edu.cn;tongji.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;0;1", "aff_unique_norm": "University of Electronic Science and Technology of China;Tongji University", "aff_unique_dep": ";", "aff_unique_url": "https://www.uestc.edu.cn;https://www.tongji.edu.cn", "aff_unique_abbr": "UESTC;Tongji", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "3D Structure Prediction of Atomic Systems with Flow-based Direct Preference Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96013", "id": "EpusiLXfNd", "proceeding": "", "pdf": "https://openreview.net/pdf?id=EpusiLXfNd", "openreview": "https://openreview.net/forum?id=EpusiLXfNd", "poster": "", "project": "", "author_site": "Rui Jiao, Xiangzhe Kong, Wenbing Huang, Yang Liu", "tldr": "", "abstract": "Predicting high-fidelity 3D structures of atomic systems is a fundamental yet challenging problem in scientific domains. While recent work demonstrates the advantage of generative models in this realm, the exploration of different probability paths are still insufficient, and hallucinations during sampling are persistently occurring. To address these pitfalls, we introduce FlowDPO, a novel framework that explores various probability paths with flow matching models and further suppresses hallucinations using Direct Preference Optimization (DPO) for structure generation. Our approach begins with a pre-trained flow matching model to generate multiple candidate structures for each training sample. These structures are then evaluated and ranked based on their distance to the ground truth, resulting in an automatic preference dataset. Using this dataset, we apply DPO to optimize the original model, improving its performance in generating structures closely aligned with the desired reference distribution. As confirmed by our theoretical analysis, such paradigm and objective function are compatible with arbitrary Gaussian paths, exhibiting favorable universality. Extensive experimental results on antibodies and crystals demonstrate substantial benefits of our FlowDPO, highlighting its potential to advance the field of 3D structure prediction with generative models.", "keywords": "Flow Matching;Direct Preference Optimization;Geometric Graph Neural Networks;Structure Prediction", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Rui Jiao;Xiangzhe Kong;Wenbing Huang;Yang Liu", "authorids": "~Rui_Jiao1;~Xiangzhe_Kong1;~Wenbing_Huang1;~Yang_Liu19", "gender": "M;M;M;M", "homepage": "https://jiaor17.github.io/;https://kxz18.github.io/;https://gsai.ruc.edu.cn/english/wenbing_huang;http://nlp.csai.tsinghua.edu.cn/~ly/", "dblp": "223/1073;293/7526;155/3181-1.html;51/3710-5", "google_scholar": "buW16-AAAAAJ;0oSFYmkAAAAJ;0yNkmO4AAAAJ;https://scholar.google.com.hk/citations?user=lVhoKNcAAAAJ", "orcid": ";;;0000-0002-3087-242X", "linkedin": ";;;", "or_profile": "~Rui_Jiao1;~Xiangzhe_Kong1;~Wenbing_Huang1;~Yang_Liu19", "aff": "Tsinghua University;Tsinghua University;Renmin University of China;Tsinghua University", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;ruc.edu.cn;tsinghua.edu.cn", "position": "PhD student;PhD student;Associate Professor;Professor", "bibtex": "@inproceedings{\njiao2024d,\ntitle={3D Structure Prediction of Atomic Systems with Flow-based Direct Preference Optimization},\nauthor={Rui Jiao and Xiangzhe Kong and Wenbing Huang and Yang Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=EpusiLXfNd}\n}", "github": "", "reviewers": "SYsR;jaPH;WEst", "pdf_size": 3196312, "rating": "5;6;6", "confidence": "3;2;3", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "2;3;4", "wc_summary": "91;49;93", "wc_strengths": "87;78;155", "wc_weaknesses": "246;218;224", "wc_questions": "24;65;102", "wc_limitations": "1;33;18", "wc_review": "449;443;592", "wc_reply_reviewers": "160;357;322", "wc_reply_authors": "277;403;341", "reply_reviewers": "1;1;2", "reply_authors": "4;3;3", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 77.66666666666667, 20.28683207293725 ], "wc_strengths_avg": [ 106.66666666666667, 34.37376260399137 ], "wc_weaknesses_avg": [ 229.33333333333334, 12.036980056845193 ], "wc_questions_avg": [ 63.666666666666664, 31.857320805254307 ], "wc_limitations_avg": [ 17.333333333333332, 13.072447700751718 ], "wc_review_avg": [ 494.6666666666667, 68.86863501543274 ], "wc_reply_reviewers_avg": [ 279.6666666666667, 85.81504659570035 ], "wc_reply_authors_avg": [ 340.3333333333333, 51.44144459696114 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 3.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:MnWj8XtzDnIJ:scholar.google.com/&scioq=3D+Structure+Prediction+of+Atomic+Systems+with+Flow-based+Direct+Preference+Optimization&hl=en&as_sdt=0,48", "gs_version_total": 2, "email": "tsinghua.edu.cn;tsinghua.edu.cn;ruc.edu.cn;tsinghua.edu.cn", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Tsinghua University;Renmin University of China", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;http://www.ruc.edu.cn", "aff_unique_abbr": "THU;RUC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "EqaSEbU4LP", "title": "WikiDO: A New Benchmark Evaluating Cross-Modal Retrieval for Vision-Language Models", "track": "Datasets & Benchmarks", "status": "Poster", "tldr": "", "abstract": "Cross-modal (image-to-text and text-to-image) retrieval is an established task used in evaluation benchmarks to test the performance of vision-language models (VLMs). Several state-of-the-art VLMs (e.g. CLIP, BLIP-2) have achieved near-perfect performance on widely-used image-text retrieval benchmarks such as MSCOCO-Test-5K and Flickr30K-Test-1K. As a measure of out-of-distribution (OOD) generalization, prior works rely on zero-shot performance evaluated on one dataset (Flickr) using a VLM finetuned on another one (MSCOCO). We argue that such comparisons are insufficient to assess the OOD generalization capability of models due to high visual and linguistic similarity between the evaluation and finetuning datasets. To address this gap, we introduce WikiDO (drawn from Wikipedia Diversity Observatory), a novel cross-modal retrieval benchmark to assess the OOD generalization capabilities of pretrained VLMs. This consists of newly scraped 380K image-text pairs from Wikipedia with domain labels, a carefully curated, human-verified a)in-distribution (ID) test set (3K) and b) OOD test set (3K). The image-text pairs are very diverse in topics and geographical locations. We evaluate different VLMs of varying capacity on the \\wikido benchmark; BLIP-2 achieves zero-shot performance of $R@1\\approx66\\%$ on the OOD test set, compared to $\\approx$ $81\\%$ on COCO and $\\approx95\\%$ on Flickr. When fine-tuned on WikiDO, the $R@1$ improvement is at most $\\approx5\\%$ on OOD instances compared to $\\approx12\\%$ on ID instances. We probe the VLMs with varying finetuning objectives and datasets of varying sizes to identify what aids OOD generalization the most. Our results confirm that WikiDO offers a strong cross-modal benchmark for current VLMs in specifically evaluating for OOD generalization. Our benchmark is hosted as a competition at https://kaggle.com/competitions/wikido24 with public access to dataset and code.", "keywords": "Out-of-Distribution;cross-modal retrieval;vision-language models;evaluation", "primary_area": "", "supplementary_material": "/attachment/6e8772ae181018e8392a9b37106c9ba7732d5a69.pdf", "author": "Pavan Kalyan Tankala;Piyush Singh Pasi;Sahil Dharod;Azeem Motiwala;Preethi Jyothi;Aditi Chaudhary;Krishna Srinivasan", "authorids": "~Pavan_Kalyan_Tankala1;~Piyush_Singh_Pasi1;~Sahil_Dharod1;~Azeem_Motiwala1;~Preethi_Jyothi2;~Aditi_Chaudhary1;~Krishna_Srinivasan1", "gender": "M;;M;M;F;;M", "homepage": ";;;;http://www.cse.iitb.ac.in/~pjyothi;;https://krishna2.com", "dblp": "371/5125;;;;01/9014;225/7684;50/145.html", "google_scholar": "11_GsJAAAAAJ;AKlQVZEAAAAJ;;;https://scholar.google.co.in/citations?user=QN_uhu8AAAAJ;iNuUxiwAAAAJ;aYn5qFUAAAAJ", "orcid": ";;;;;;0000-0003-1366-0895", "linkedin": "pavan-kalyan-1b88351a0/;https://in.linkedin.com/in/piyushsinghpasi;sahil-dharod-23153b226;azeem-motiwala-311b6b239/;;;krishna2/", "or_profile": "~Pavan_Kalyan_Tankala1;~Piyush_Singh_Pasi1;~Sahil_Dharod1;~Azeem_Motiwala1;~Preethi_Jyothi2;~Aditi_Chaudhary1;~Krishna_Srinivasan1", "aff": "Indian Institute of Technology, Bombay;Amazon;Indian Institute of Technology, Bombay;Indian Institute of Technology, Bombay;Indian Institute of Technology Bombay;Google;Research, Google", "aff_domain": "iitb.ac.in;amazon.com;iitb.ac.in;iitb.ac.in;iitb.ac.in;google.com;research.google.com", "position": "Undergrad student;Researcher;Undergrad student;Undergrad student;Associate Professor;Researcher;Researcher", "bibtex": "@inproceedings{\ntankala2024wikido,\ntitle={Wiki{DO}: A New Benchmark Evaluating Cross-Modal Retrieval for Vision-Language Models},\nauthor={Pavan Kalyan Tankala and Piyush Singh Pasi and Sahil Dharod and Azeem Motiwala and Preethi Jyothi and Aditi Chaudhary and Krishna Srinivasan},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=EqaSEbU4LP}\n}", "github": "", "project": "", "reviewers": "UUyd;sRnE;GkTz", "site": "https://openreview.net/forum?id=EqaSEbU4LP", "pdf_size": 2127276, "rating": "6;7;7", "confidence": "5;3;4", "wc_summary_and_contributions": "49;68;75", "wc_strengths": "46;42;52", "wc_improvement": "67;43;98", "wc_limitations": "1;1;6", "wc_correctness": "1;1;21", "wc_clarity": "1;1;4", "wc_relation_to_prior_work": "1;1;8", "wc_documentation": "1;1;5", "wc_additional_feedback": "1;1;1", "wc_review": "168;159;270", "wc_reply_reviewers": "0;12;11", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "wc_summary_and_contributions_avg": [ 64.0, 10.98483803552272 ], "wc_strengths_avg": [ 46.666666666666664, 4.109609335312651 ], "wc_improvement_avg": [ 69.33333333333333, 22.51419305435771 ], "wc_limitations_avg": [ 2.6666666666666665, 2.3570226039551585 ], "wc_correctness_avg": [ 7.666666666666667, 9.428090415820632 ], "wc_clarity_avg": [ 2.0, 1.4142135623730951 ], "wc_relation_to_prior_work_avg": [ 3.3333333333333335, 3.2998316455372216 ], "wc_documentation_avg": [ 2.3333333333333335, 1.8856180831641267 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 199.0, 50.33885179461288 ], "wc_reply_reviewers_avg": [ 7.666666666666667, 5.436502143433364 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:dSZs6nQ3XjkJ:scholar.google.com/&scioq=WikiDO:+A+New+Benchmark+Evaluating+Cross-Modal+Retrieval+for+Vision-Language+Models&hl=en&as_sdt=0,33", "gs_version_total": 3, "aff_unique_index": "0;1;0;0;0;2;2", "aff_unique_norm": "Indian Institute of Technology Bombay;Amazon;Google", "aff_unique_dep": ";Amazon.com, Inc.;Google", "aff_unique_url": "https://www.iitb.ac.in;https://www.amazon.com;https://www.google.com", "aff_unique_abbr": "IIT Bombay;Amazon;Google", "aff_campus_unique_index": "0;0;0;0;2;2", "aff_campus_unique": "Bombay;;Mountain View", "aff_country_unique_index": "0;1;0;0;0;1;1", "aff_country_unique": "India;United States" }, { "title": "Constrained Diffusion Models via Dual Training", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96012", "id": "Es2Ey2tGmM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Es2Ey2tGmM", "openreview": "https://openreview.net/forum?id=Es2Ey2tGmM", "poster": "/media/PosterPDFs/NeurIPS%202024/96012.png?t=1733931717.9438694", "project": "", "author_site": "Shervin Khalafi, Dongsheng Ding, Alejandro Ribeiro", "tldr": "", "abstract": "Diffusion models have attained prominence for their ability to synthesize a probability distribution for a given dataset via a diffusion process, enabling the generation of new data points with high fidelity. However, diffusion processes are prone to generating samples that reflect biases in a training dataset. To address this issue, we develop constrained diffusion models by imposing diffusion constraints based on desired distributions that are informed by requirements. Specifically, we cast the training of diffusion models under requirements as a constrained distribution optimization problem that aims to reduce the distribution difference between original and generated data while obeying constraints on the distribution of generated data. We show that our constrained diffusion models generate new data from a mixture data distribution that achieves the optimal trade-off among objective and constraints. To train constrained diffusion models, we develop a dual training algorithm and characterize the optimality of the trained constrained diffusion model. We empirically demonstrate the effectiveness of our constrained models in two constrained generation tasks: (i) we consider a dataset with one or more underrepresented classes where we train the model with constraints to ensure fairly sampling from all classes during inference; (ii) we fine-tune a pre-trained diffusion model to sample from a new dataset while avoiding overfitting.", "keywords": "Constrained diffusion model;constrained optimization;Lagrangian method;dual algorithm", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Shervin Khalafi;Dongsheng Ding;Alejandro Ribeiro", "authorids": "~Shervin_Khalafi1;~Dongsheng_Ding1;~Alejandro_Ribeiro1", "gender": "M;;M", "homepage": "https://shervinkhalafi.github.io/;https://dongshed.github.io;https://alelab.seas.upenn.edu", "dblp": ";120/4610;32/15", "google_scholar": "rdfxlq8AAAAJ;Ixa7PJoAAAAJ;7mrPM4kAAAAJ", "orcid": ";;0000-0003-4230-9906", "linkedin": "shervin-khalafi-316b03221/;;", "or_profile": "~Shervin_Khalafi1;~Dongsheng_Ding1;~Alejandro_Ribeiro1", "aff": "University of Pennsylvania;University of Pennsylvania;University of Pennsylvania", "aff_domain": "upenn.edu;upenn.edu;upenn.edu", "position": "PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\nkhalafi2024constrained,\ntitle={Constrained Diffusion Models via Dual Training},\nauthor={Shervin Khalafi and Dongsheng Ding and Alejandro Ribeiro},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Es2Ey2tGmM}\n}", "github": "", "reviewers": "Zh95;c1z8;Rjho;EijC", "pdf_size": 1664538, "rating": "5;5;5;6", "confidence": "3;3;5;4", "soundness": "3;2;3;3", "novelty": "3;3;3;2", "presentation": "2;2;3;3", "wc_summary": "75;100;60;77", "wc_strengths": "72;82;33;150", "wc_weaknesses": "357;142;314;75", "wc_questions": "3;92;2;44", "wc_limitations": "107;54;1;38", "wc_review": "614;470;410;384", "wc_reply_reviewers": "57;508;14;24", "wc_reply_authors": "54;440;71;24", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 78.0, 14.300349646075091 ], "wc_strengths_avg": [ 84.25, 42.14483954175173 ], "wc_weaknesses_avg": [ 222.0, 116.93801777009905 ], "wc_questions_avg": [ 35.25, 36.88749788207381 ], "wc_limitations_avg": [ 50.0, 38.11167800031901 ], "wc_review_avg": [ 469.5, 89.06598677385212 ], "wc_reply_reviewers_avg": [ 150.75, 206.87118576544196 ], "wc_reply_authors_avg": [ 147.25, 169.85490131285584 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7363646996142525153&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "upenn.edu;upenn.edu;upenn.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Pennsylvania", "aff_unique_dep": "", "aff_unique_url": "https://www.upenn.edu", "aff_unique_abbr": "UPenn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Leveraging an ECG Beat Diffusion Model for Morphological Reconstruction from Indirect Signals", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96011", "id": "Eu0nYM4BPo", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Eu0nYM4BPo", "openreview": "https://openreview.net/forum?id=Eu0nYM4BPo", "poster": "", "project": "", "author_site": "Lisa Bedin, Gabriel Cardoso, Josselin Duchateau, Remi Dubois, Eric Moulines", "tldr": "", "abstract": "Electrocardiogram (ECG) signals provide essential information about the heart's condition and are widely used for diagnosing cardiovascular diseases. The morphology of a single heartbeat over the available leads is a primary biosignal for monitoring cardiac conditions. However, analyzing heartbeat morphology can be challenging due to noise and artifacts, missing leads, and a lack of annotated data.\nGenerative models, such as denoising diffusion generative models (DDMs), have proven successful in generating complex data. We introduce $\\texttt{BeatDiff}$, a light-weight DDM tailored for the morphology of multiple leads heartbeats.\nWe then show that many important ECG downstream tasks can be formulated as conditional generation methods in a Bayesian inverse problem framework using $\\texttt{BeatDiff}$ as priors. We propose $\\texttt{EM-BeatDiff}$, an Expectation-Maximization algorithm, to solve this conditional generation tasks without fine-tuning. We illustrate our results with several tasks, such as removal of ECG noise and artifacts (baseline wander, electrode motion), reconstruction of a 12-lead ECG from a single lead (useful for ECG reconstruction of smartwatch experiments), and unsupervised explainable anomaly detection. Numerical experiments show that the combination of $\\texttt{BeatDiff}$ and $\\texttt{EM-BeatDiff}$ outperforms SOTA methods for the problems considered in this work.", "keywords": "Electrocardiogram;Denoising Diffusion Generative Models;Conditional Sampling;Healthcare;Bayesian Inverse Problem", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "", "author": "Lisa Bedin;Gabriel Cardoso;Josselin Duchateau;Remi Dubois;Eric Moulines", "authorids": "~Lisa_Bedin2;~Gabriel_Cardoso1;~Josselin_Duchateau1;~Remi_Dubois1;~Eric_Moulines1", "gender": "F;M;M;;M", "homepage": ";https://gabrielvc.github.io/;;https://scholar.google.fr/citations?hl=fr&user=bN8sMLoAAAAJ;", "dblp": ";320/0681;;;54/2358", "google_scholar": ";WJbgdMsAAAAJ;oV1OhF0AAAAJ;https://scholar.google.fr/citations?hl=fr;https://scholar.google.fr/citations?user=_XE1LvQAAAAJ", "orcid": ";;0000-0002-4367-1117;;0000-0002-2058-0693", "linkedin": "lisa-bedin-82a90a243/;gabriel-victorino-cardoso-379b09127/;;;", "or_profile": "~Lisa_Bedin2;~Gabriel_Cardoso1;~Josselin_Duchateau1;~Remi_Dubois1;~Eric_Moulines1", "aff": "\u00c9cole Polytechnique;\u00c9cole Polytechnique;CHU Bordeaux ;;Ecole polytechnique", "aff_domain": "polytechnique.fr;polytechnique.edu;chu-bordeaux.fr;;polytechnique.edu", "position": "PhD student;PhD student;Researcher;;Full Professor", "bibtex": "@inproceedings{\nbedin2024leveraging,\ntitle={Leveraging an {ECG} Beat Diffusion Model for Morphological Reconstruction from Indirect Signals},\nauthor={Lisa Bedin and Gabriel Cardoso and Josselin Duchateau and Remi Dubois and Eric Moulines},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Eu0nYM4BPo}\n}", "github": "", "reviewers": "TuvT;h18s;UDMo;sRY5", "pdf_size": 10634572, "rating": "4;4;7;8", "confidence": "4;4;3;5", "soundness": "2;2;4;3", "novelty": "3;2;4;3", "presentation": "2;1;3;4", "wc_summary": "83;52;55;91", "wc_strengths": "26;63;64;75", "wc_weaknesses": "183;104;119;74", "wc_questions": "167;106;80;2", "wc_limitations": "10;72;33;24", "wc_review": "469;397;351;266", "wc_reply_reviewers": "413;137;0;5", "wc_reply_authors": "899;154;85;85", "reply_reviewers": "2;1;0;1", "reply_authors": "4;2;2;2", "rating_avg": [ 5.75, 1.7853571071357126 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 70.25, 17.020208576865326 ], "wc_strengths_avg": [ 57.0, 18.506755523321747 ], "wc_weaknesses_avg": [ 120.0, 39.818337484129096 ], "wc_questions_avg": [ 88.75, 59.20884646739877 ], "wc_limitations_avg": [ 34.75, 23.01494079940246 ], "wc_review_avg": [ 370.75, 73.6626601474587 ], "wc_reply_reviewers_avg": [ 138.75, 167.59829205573666 ], "wc_reply_authors_avg": [ 305.75, 343.669445106777 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.19802950859533483, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:hRo-GOVRTEUJ:scholar.google.com/&scioq=Leveraging+an+ECG+Beat+Diffusion+Model+for+Morphological+Reconstruction+from+Indirect+Signals&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": "polytechnique.fr;polytechnique.edu;chu-bordeaux.fr;;polytechnique.edu", "author_num": 5, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Ecole Polytechnique;CHU Bordeaux", "aff_unique_dep": ";", "aff_unique_url": "https://www.polytechnique.edu;https://www.chu-bordeaux.fr", "aff_unique_abbr": "X;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "France" }, { "title": "Understanding and Improving Training-free Loss-based Diffusion Guidance", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96010", "id": "Eu80DGuOcs", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Eu80DGuOcs", "openreview": "https://openreview.net/forum?id=Eu80DGuOcs", "poster": "/media/PosterPDFs/NeurIPS%202024/96010.png?t=1731235596.9931848", "project": "", "author_site": "Yifei Shen, XINYANG JIANG, Yifan Yang, Yezhen Wang, Dongqi Han, Dongsheng Li", "tldr": "", "abstract": "Adding additional guidance to pretrained diffusion models has become an increasingly popular research area, with extensive applications in computer vision, reinforcement learning, and AI for science. Recently, several studies have proposed training-free loss-based guidance by using off-the-shelf networks pretrained on clean images. This approach enables zero-shot conditional generation for universal control formats, which appears to offer a free lunch in diffusion guidance. In this paper, we aim to develop a deeper understanding of training-free guidance, as well as overcome its limitations. We offer a theoretical analysis that supports training-free guidance from the perspective of optimization, distinguishing it from classifier-based (or classifier-free) guidance. To elucidate their drawbacks, we theoretically demonstrate that training-free guidance is more susceptible to misaligned gradients and exhibits slower convergence rates compared to classifier guidance. We then introduce a collection of techniques designed to overcome the limitations, accompanied by theoretical rationale and empirical evidence. Our experiments in image and motion generation confirm the efficacy of these techniques.", "keywords": "Training-free guidance;universal guidance;motion diffusion", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/e45ba082df496a3eebbb84ffe51a823fa15ecd56.zip", "author": "Yifei Shen;XINYANG JIANG;Yifan Yang;Yezhen Wang;Dongqi Han;Dongsheng Li", "authorids": "~Yifei_Shen1;~XINYANG_JIANG2;~Yifan_Yang9;~Yezhen_Wang1;~Dongqi_Han1;~Dongsheng_Li2", "gender": "M;M;M;M;M;M", "homepage": "https://openreview.net/profile?id=~Yifei_Shen1;;https://www.microsoft.com/en-us/research/people/yifanyang/;;https://frosthan.github.io/;http://recmind.cn", "dblp": "51/609.html;155/6316;83/89-4;;;254/0830-2.html", "google_scholar": ";JiTfWVMAAAAJ;;g-VEnLEAAAAJ;3V_9fRUAAAAJ;VNg5rA8AAAAJ", "orcid": ";;;;0000-0002-6872-7121;0000-0003-3103-8442", "linkedin": ";xinyang-jiang-ab5416b0/;yifyang/;;;", "or_profile": "~Yifei_Shen1;~XINYANG_JIANG2;~Yifan_Yang9;~Yezhen_Wang1;~Dongqi_Han1;~Dongsheng_Li2", "aff": "Microsoft Research Asia;Microsoft;Microsoft;National University of Singapore;Microsoft;Microsoft Research Asia", "aff_domain": "microsoft.com;microsoft.com;microsoft.com;nus.edu;microsoft.com;microsoft.com", "position": "Research Cheerleader;Senior Researcher;Researcher;PhD student;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nshen2024understanding,\ntitle={Understanding and Improving Training-free Loss-based Diffusion Guidance},\nauthor={Yifei Shen and XINYANG JIANG and Yifan Yang and Yezhen Wang and Dongqi Han and Dongsheng Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Eu80DGuOcs}\n}", "github": "", "reviewers": "uPWj;DfoY;P7k4;wPsc", "pdf_size": 9233439, "rating": "5;5;5;7", "confidence": "5;4;4;3", "soundness": "2;3;3;3", "novelty": "2;3;2;3", "presentation": "3;2;3;3", "wc_summary": "56;137;30;135", "wc_strengths": "23;35;52;58", "wc_weaknesses": "385;268;96;37", "wc_questions": "71;89;30;837", "wc_limitations": "1;18;10;92", "wc_review": "536;547;218;1159", "wc_reply_reviewers": "192;12;42;48", "wc_reply_authors": "702;48;103;58", "reply_reviewers": "2;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 89.5, 47.40516849458506 ], "wc_strengths_avg": [ 42.0, 13.838352503098047 ], "wc_weaknesses_avg": [ 196.5, 138.00815193313764 ], "wc_questions_avg": [ 256.75, 335.68912329713635 ], "wc_limitations_avg": [ 30.25, 36.15504805694496 ], "wc_review_avg": [ 615.0, 340.7381692737108 ], "wc_reply_reviewers_avg": [ 73.5, 69.76209572540091 ], "wc_reply_authors_avg": [ 227.75, 274.5909457720702 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2121932679690245024&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "microsoft.com;microsoft.com;microsoft.com;nus.edu;microsoft.com;microsoft.com", "author_num": 6, "aff_unique_index": "0;0;0;1;0;0", "aff_unique_norm": "Microsoft;National University of Singapore", "aff_unique_dep": "Research;", "aff_unique_url": "https://www.microsoft.com/en-us/research/group/asia;https://www.nus.edu.sg", "aff_unique_abbr": "MSR Asia;NUS", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Asia;", "aff_country_unique_index": "0;1;1;2;1;0", "aff_country_unique": "China;United States;Singapore" }, { "title": "Automating Dataset Updates Towards Reliable and Timely Evaluation of Large Language Models", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97784", "id": "EvEqYlQv8T", "proceeding": "", "pdf": "https://openreview.net/pdf?id=EvEqYlQv8T", "openreview": "https://openreview.net/forum?id=EvEqYlQv8T", "poster": "/media/PosterPDFs/NeurIPS%202024/97784.png?t=1730977951.565747", "project": "", "author_site": "Jiahao Ying, Yixin Cao, Yushi Bai, QIANRU SUN, Bo Wang, Wei Tang, Zhaojun Ding, Yizhe Yang, Xuanjing Huang, Shuicheng Yan", "tldr": "", "abstract": "Large language models (LLMs) have achieved impressive performance across various natural language benchmarks, prompting a continual need to curate more difficult datasets for larger LLMs, which is costly and time-consuming. In this paper, we propose to automate dataset updating and provide systematical analysis regarding its effectiveness in dealing with benchmark leakage issue, difficulty control, and stability. Thus, once current benchmark has been mastered or leaked, we can update it for timely and reliable evaluation. There are two updating strategies: 1) mimicking strategy to generate similar samples based on original data, preserving stylistic and contextual essence, and 2) extending strategy that further expands existing samples at varying cognitive levels by adapting Bloom\u2019s taxonomy of educational objectives. Extensive experiments on updated MMLU and BIG-Bench demonstrate the stability of the proposed strategies and find that the mimicking strategy can effectively alleviate issues of overestimation from benchmark leakage. In cases where the efficient mimicking strategy fails, our extending strategy still shows promising results. Additionally, by controlling the difficulty, we can better discern the models\u2019 performance and enable fine-grained analysis \u2014 neither too difficult nor too easy an exam can fairly judge students\u2019 learning status. To the best of our knowledge, we are the first to automate updating benchmarks for reliable and timely evaluation. Our demo leaderboard can be found at https://yingjiahao14.github.io/Automating-DatasetUpdates/.", "keywords": "LLMs evaluation;data leakage;dataset updates;overestimation", "primary_area": "", "supplementary_material": "", "author": "Jiahao Ying;Yixin Cao;Yushi Bai;Qianru Sun;Bo Wang;Wei Tang;Zhaojun Ding;Yizhe Yang;Xuanjing Huang;Shuicheng YAN", "authorids": "~Jiahao_Ying1;~Yixin_Cao2;~Yushi_Bai1;~Qianru_Sun2;~Bo_Wang16;~Wei_Tang19;~Zhaojun_Ding1;~Yizhe_Yang1;~Xuanjing_Huang1;~Shuicheng_YAN3", "gender": "M;M;M;F;Not Specified;M;Not Specified;M;F;M", "homepage": ";https://sites.google.com/view/yixin-homepage;https://bys0318.github.io/;https://qianrusun.com/;;;;;https://xuanjing-huang.github.io/;https://yanshuicheng.ai/", "dblp": "303/6904;20/8038-2;302/4421;127/6132.html;72/6811-13.html;58/1874-15;;;05/6735-1;y/ShuichengYan", "google_scholar": "JOEJg9UAAAAJ;https://scholar.google.co.uk/citations?user=CnhTvdoAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.de/citations?user=fNfrGMIAAAAJ;CjV_87gAAAAJ;XTBDCOIAAAAJ;;VqTU-GYAAAAJ;RGsMgZA4H78C;https://scholar.google.com.hk/citations?user=DNuiPHwAAAAJ", "orcid": ";;;0000-0003-2689-317X;0000-0001-8102-5346;0000-0001-6561-7026;;;0000-0001-9197-9426;", "linkedin": "jiahao-ying-89b456265/;;;;;;zhaojun-ding-383a0a241/;;;", "or_profile": "~Jiahao_Ying1;~Yixin_Cao2;~Yushi_Bai1;~Qianru_Sun2;~Bo_Wang16;~Wei_Tang19;~Zhaojun_Ding1;~Yizhe_Yang1;~Xuanjing_Huang1;~Shuicheng_YAN3", "aff": "Singapore Management University;Singapore Management University;Tsinghua University;Singapore Management University;School of Computer Science & Technology, Beijing Institute of Technology;University of Science and Technology of China;University of Georgia;Beijing Institute of Technology;Fudan University;sea Group", "aff_domain": "smu.edu.sg;smu.edu.sg;tsinghua.edu.cn;smu.edu.sg;bit.edu.cn;ustc.edu.cn;uga.edu;bit.edu.cn;fudan.edu.cn;sea.com", "position": "PhD student;Assistant Professor;PhD student;Assistant Professor;PhD student;PhD student;PhD student;PhD student;Full Professor;Researcher", "bibtex": "@inproceedings{\nying2024automating,\ntitle={Automating Dataset Updates Towards Reliable and Timely Evaluation of Large Language Models},\nauthor={Jiahao Ying and Yixin Cao and Yushi Bai and Qianru Sun and Bo Wang and Wei Tang and Zhaojun Ding and Yizhe Yang and Xuanjing Huang and Shuicheng YAN},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=EvEqYlQv8T}\n}", "github": "", "reviewers": "ajgi;La6o;Eypk;miBT", "pdf_size": 639682, "rating": "6;6;6;7", "confidence": "4;4;3;3", "wc_summary_and_contributions": "104;37;77;93", "wc_strengths": "6;25;2;5", "wc_improvement": "6;59;34;5", "wc_limitations": "15;118;23;99", "wc_correctness": "10;9;8;28", "wc_clarity": "5;9;8;36", "wc_relation_to_prior_work": "1;11;1;2", "wc_documentation": "12;21;1;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "160;290;155;270", "wc_reply_reviewers": "18;0;0;48", "wc_reply_authors": "55;55;55;80", "reply_reviewers": "1;0;0;1", "reply_authors": "2;2;2;5", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "wc_summary_and_contributions_avg": [ 77.75, 25.410381736605217 ], "wc_strengths_avg": [ 9.5, 9.069178573608527 ], "wc_improvement_avg": [ 26.0, 22.327113561766108 ], "wc_limitations_avg": [ 63.75, 45.33969011804117 ], "wc_correctness_avg": [ 13.75, 8.257572282456872 ], "wc_clarity_avg": [ 14.5, 12.5 ], "wc_relation_to_prior_work_avg": [ 3.75, 4.205650960315181 ], "wc_documentation_avg": [ 8.75, 8.37779804005802 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 218.75, 61.68214895737664 ], "wc_reply_reviewers_avg": [ 16.5, 19.615045245933032 ], "wc_reply_authors_avg": [ 61.25, 10.825317547305483 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11091993537728274237&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "smu.edu.sg;smu.edu.sg;tsinghua.edu.cn;smu.edu.sg;bit.edu.cn;ustc.edu.cn;uga.edu;bit.edu.cn;fudan.edu.cn;sea.com", "author_num": 10, "aff_unique_index": "0;0;1;0;2;3;4;2;5;6", "aff_unique_norm": "Singapore Management University;Tsinghua University;Beijing Institute of Technology;University of Science and Technology of China;University of Georgia;Fudan University;Sea Group", "aff_unique_dep": ";;School of Computer Science & Technology;;;;", "aff_unique_url": "https://www.smu.edu.sg;https://www.tsinghua.edu.cn;http://www.bit.edu.cn/;http://www.ustc.edu.cn;https://www.uga.edu;https://www.fudan.edu.cn;", "aff_unique_abbr": "SMU;THU;BIT;USTC;UGA;Fudan;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;1;1;2;1;1", "aff_country_unique": "Singapore;China;United States;" }, { "title": "Stylebreeder: Exploring and Democratizing Artistic Styles through Text-to-Image Models", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97783", "id": "EvgyfFsv0w", "proceeding": "", "pdf": "https://openreview.net/pdf?id=EvgyfFsv0w", "openreview": "https://openreview.net/forum?id=EvgyfFsv0w", "poster": "", "project": "", "author_site": "Matthew Zheng, Enis Simsar, Hidir Yesiltepe, Federico Tombari, Joel Simon, Pinar Yanardag", "tldr": "", "abstract": "Text-to-image models are becoming increasingly popular, revolutionizing the landscape of digital art creation by enabling highly detailed and creative visual content generation. These models have been widely employed across various domains, particularly in art generation, where they facilitate a broad spectrum of creative expression and democratize access to artistic creation. In this paper, we introduce STYLEBREEDER, a comprehensive dataset of 6.8M images and 1.8M prompts generated by 95K users on Artbreeder, a platform that has emerged as a significant hub for creative exploration with over 13M users. We introduce a series of tasks with this dataset aimed at identifying diverse artistic styles, generating personalized content, and recommending styles based on user interests. By documenting unique, user-generated styles that transcend conventional categories like 'cyberpunk' or 'Picasso,' we explore the potential for unique, crowd-sourced styles that could provide deep insights into the collective creative psyche of users worldwide. We also evaluate different personalization methods to enhance artistic expression and introduce a style atlas, making these models available in LoRA format for public use. Our research demonstrates the potential of text-to-image diffusion models to uncover and promote unique artistic expressions, further democratizing AI in art and fostering a more diverse and inclusive artistic community. The dataset, code, and models are available at https://stylebreeder.github.io under a Public Domain (CC0) license.", "keywords": "diffusion;creativity;image generation;artistic styles", "primary_area": "", "supplementary_material": "", "author": "Matthew Zheng;Enis Simsar;Hidir Yesiltepe;Federico Tombari;Joel Simon;Pinar Yanardag", "authorids": "~Matthew_Zheng1;~Enis_Simsar1;~Hidir_Yesiltepe1;~Federico_Tombari1;~Joel_Simon1;~Pinar_Yanardag1", "gender": "M;M;M;M;M;F", "homepage": ";https://enis.dev;https://sites.google.com/view/hidir-yesiltepe;https://federicotombari.github.io/;https://joelsimon.net/;http://pinguar.org", "dblp": ";247/8740;;16/3539;;143/7485", "google_scholar": ";xDvFUb4AAAAJ;9RxXfaUAAAAJ;TFsE4BIAAAAJ;;qzczdd8AAAAJ", "orcid": ";0000-0002-6662-3249;;0000-0001-5598-5212;;0009-0003-3452-7417", "linkedin": "matthew-zheng-b93973220;enisimsar;hidiryesiltepe/;fedet/;;", "or_profile": "~Matthew_Zheng1;~Enis_Simsar1;~Hidir_Yesiltepe1;~Federico_Tombari1;~Joel_Simon1;~Pinar_Yanardag1", "aff": "Virginia Polytechnic Institute and State University;Department of Computer Science, ETHZ - ETH Zurich;Virginia Polytechnic Institute and State University;Technical University Munich (TUM);Rockefeller University;Virginia Polytechnic Institute and State University", "aff_domain": "vt.edu;inf.ethz.ch;vt.edu;in.tum.de;rockefeller.edu;vt.edu", "position": "MS student;PhD student;PhD student;Lecturer;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nzheng2024stylebreeder,\ntitle={Stylebreeder: Exploring and Democratizing Artistic Styles through Text-to-Image Models},\nauthor={Matthew Zheng and Enis Simsar and Hidir Yesiltepe and Federico Tombari and Joel Simon and Pinar Yanardag},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=EvgyfFsv0w}\n}", "github": "", "reviewers": "yd7g;o4hr;WApe", "pdf_size": 11209261, "rating": "6;7;8", "confidence": "4;4;5", "wc_summary_and_contributions": "66;100;31", "wc_strengths": "3;90;56", "wc_improvement": "4;124;118", "wc_limitations": "12;9;1", "wc_correctness": "9;8;1", "wc_clarity": "4;12;1", "wc_relation_to_prior_work": "17;7;1", "wc_documentation": "1;20;1", "wc_additional_feedback": "1;1;1", "wc_review": "117;371;211", "wc_reply_reviewers": "10;144;13", "wc_reply_authors": "32;0;0", "reply_reviewers": "1;2;1", "reply_authors": "4;3;2", "rating_avg": [ 7.0, 0.816496580927726 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 65.66666666666667, 28.1701181317288 ], "wc_strengths_avg": [ 49.666666666666664, 35.798820588890294 ], "wc_improvement_avg": [ 82.0, 55.20869496736904 ], "wc_limitations_avg": [ 7.333333333333333, 4.642796092394707 ], "wc_correctness_avg": [ 6.0, 3.559026084010437 ], "wc_clarity_avg": [ 5.666666666666667, 4.642796092394707 ], "wc_relation_to_prior_work_avg": [ 8.333333333333334, 6.599663291074444 ], "wc_documentation_avg": [ 7.333333333333333, 8.956685895029603 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 233.0, 104.85545606532197 ], "wc_reply_reviewers_avg": [ 55.666666666666664, 62.473105324522564 ], "wc_reply_authors_avg": [ 10.666666666666666, 15.084944665313014 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 3.0, 0.816496580927726 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13467018192492209715&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "vt.edu;inf.ethz.ch;vt.edu;in.tum.de;rockefeller.edu;vt.edu", "author_num": 6, "aff_unique_index": "0;1;0;2;3;0", "aff_unique_norm": "Virginia Tech;ETH Zurich;Technical University Munich;Rockefeller University", "aff_unique_dep": ";Department of Computer Science;;", "aff_unique_url": "https://www.vt.edu;https://www.ethz.ch;https://www.tum.de;https://www.rockefeller.edu", "aff_unique_abbr": "VT;ETHZ;TUM;RU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Zurich", "aff_country_unique_index": "0;1;0;2;0;0", "aff_country_unique": "United States;Switzerland;Germany" }, { "title": "Effective Rank Analysis and Regularization for Enhanced 3D Gaussian Splatting", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96009", "id": "EwWpAPzcay", "proceeding": "", "pdf": "https://openreview.net/pdf?id=EwWpAPzcay", "openreview": "https://openreview.net/forum?id=EwWpAPzcay", "poster": "", "project": "", "author_site": "Junha Hyung, Susung Hong, Sungwon Hwang, Jaeseong Lee, Jaegul Choo, Jin-Hwa Kim", "tldr": "", "abstract": "3D reconstruction from multi-view images is one of the fundamental challenges in computer vision and graphics. Recently, 3D Gaussian Splatting (3DGS) has emerged as a promising technique capable of real-time rendering with high-quality 3D reconstruction. This method utilizes 3D Gaussian representation and tile-based splatting techniques, bypassing the expensive neural field querying. Despite its potential, 3DGS encounters challenges such as needle-like artifacts, suboptimal geometries, and inaccurate normals caused by the Gaussians converging into anisotropic shapes with one dominant variance. We propose using the effective rank analysis to examine the shape statistics of 3D Gaussian primitives, and identify the Gaussians indeed converge into needle-like shapes with the effective rank 1. To address this, we introduce the effective rank as a regularization, which constrains the structure of the Gaussians. Our new regularization method enhances normal and geometry reconstruction while reducing needle-like artifacts. The approach can be integrated as an add-on module to other 3DGS variants, improving their quality without compromising visual fidelity. The project page is available at https://junhahyung.github.io/erankgs.github.io/.", "keywords": "3D reconstruction;3D Gaussian Splatting;NeRF;Surface reconstruction;3DGS regularization", "primary_area": "machine_vision", "supplementary_material": "/attachment/c1cd2739af075720e85da2eaf4e9205330c94ab7.zip", "author": "Junha Hyung;Susung Hong;Sungwon Hwang;Jaeseong Lee;Jaegul Choo;Jin-Hwa Kim", "authorids": "~Junha_Hyung1;~Susung_Hong1;~Sungwon_Hwang1;~Jaeseong_Lee2;~Jaegul_Choo1;~Jin-Hwa_Kim1", "gender": "M;M;M;M;Unspecified;M", "homepage": "https://junhahyung.github.io/;https://susunghong.github.io/;https://deepshwang.github.io/;https://sites.google.com/site/jaegulchoo/;http://wityworks.com;https://leejesse.github.io/", "dblp": ";330/5127;287/4515;07/2074;48/258;", "google_scholar": "DsaV5aQAAAAJ;HigIHvUAAAAJ;whmeZAMAAAAJ;GHJYsLEAAAAJ;https://scholar.google.co.kr/citations?user=3f2wPekAAAAJ;vWkHKrMAAAAJ", "orcid": ";;;;0000-0002-0423-0415;", "linkedin": ";;;;;", "or_profile": "~Junha_Hyung1;~Susung_Hong1;~Sungwon_Hwang1;~Jaegul_Choo1;~Jin-Hwa_Kim1;~Jesse_Jaeseong_Lee1", "aff": "Korea Advanced Institute of Science & Technology;Korea University;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;NAVER;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;korea.ac.kr;kaist.ac.kr;kaist.ac.kr;navercorp.com;kaist.ac.kr", "position": "PhD student;Undergrad student;PhD student;Associate Professor;Research Scientist;MS student", "bibtex": "@inproceedings{\nhyung2024effective,\ntitle={Effective Rank Analysis and Regularization for Enhanced 3D Gaussian Splatting},\nauthor={Junha Hyung and Susung Hong and Sungwon Hwang and Jaeseong Lee and Jaegul Choo and Jin-Hwa Kim},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=EwWpAPzcay}\n}", "github": "", "reviewers": "n78p;w1Sn;biCn;7LDG", "pdf_size": 9593288, "rating": "5;5;6;7", "confidence": "5;4;4;4", "soundness": "3;3;3;3", "novelty": "2;1;2;3", "presentation": "3;3;3;4", "wc_summary": "133;58;118;57", "wc_strengths": "45;84;97;81", "wc_weaknesses": "213;475;152;110", "wc_questions": "2;52;27;48", "wc_limitations": "2;32;11;40", "wc_review": "395;701;405;336", "wc_reply_reviewers": "38;158;72;194", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 91.5, 34.41293361513953 ], "wc_strengths_avg": [ 76.75, 19.292161620720474 ], "wc_weaknesses_avg": [ 237.5, 141.92691781335915 ], "wc_questions_avg": [ 32.25, 19.879323429131084 ], "wc_limitations_avg": [ 21.25, 15.35211711784404 ], "wc_review_avg": [ 459.25, 142.042907249887 ], "wc_reply_reviewers_avg": [ 115.5, 62.98214032565105 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13125311668452185278&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "kaist.ac.kr;korea.ac.kr;kaist.ac.kr;kaist.ac.kr;navercorp.com;kaist.ac.kr", "author_num": 6, "aff_unique_index": "0;1;0;0;2;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Korea University;NAVER Corporation", "aff_unique_dep": ";;", "aff_unique_url": "https://www.kaist.ac.kr;https://www.korea.ac.kr;https://www.naver.com", "aff_unique_abbr": "KAIST;KU;NAVER", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Towards a Scalable Reference-Free Evaluation of Generative Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96008", "id": "Ex3rPvEct8", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Ex3rPvEct8", "openreview": "https://openreview.net/forum?id=Ex3rPvEct8", "poster": "/media/PosterPDFs/NeurIPS%202024/96008.png?t=1731752271.2878017", "project": "", "author_site": "Azim Ospanov, Jingwei Zhang, Mohammad Jalali, Xuenan Cao, Andrej Bogdanov, Farzan Farnia", "tldr": "", "abstract": "While standard evaluation scores for generative models are mostly reference-based, a reference-dependent assessment of generative models could be generally difficult due to the unavailability of applicable reference datasets. Recently, the reference-free entropy scores, VENDI and RKE, have been proposed to evaluate the diversity of generated data. However, estimating these scores from data leads to significant computational costs for large-scale generative models. In this work, we leverage the random Fourier features framework to reduce the metrics' complexity and propose the *Fourier-based Kernel Entropy Approximation (FKEA)* method. We utilize FKEA's approximated eigenspectrum of the kernel matrix to efficiently estimate the mentioned entropy scores. Furthermore, we show the application of FKEA's proxy eigenvectors to reveal the method's identified modes in evaluating the diversity of produced samples. We provide a stochastic implementation of the FKEA assessment algorithm with a complexity $O(n)$ linearly growing with sample size $n$. We extensively evaluate FKEA's numerical performance in application to standard image, text, and video datasets. Our empirical results indicate the method's scalability and interpretability applied to large-scale generative models. The codebase is available at [https://github.com/aziksh-ospanov/FKEA](https://github.com/aziksh-ospanov/FKEA).", "keywords": "Evaluation of Generative Models;Scalable Algorithms;Kernel Methods;Random Fourier Features", "primary_area": "generative_models", "supplementary_material": "/attachment/a0d3621ac3f058cfeaa1ca3bc3be988c89fd6c46.zip", "author": "Azim Ospanov;Jingwei Zhang;Mohammad Jalali;Xuenan Cao;Andrej Bogdanov;Farzan Farnia", "authorids": "~Azim_Ospanov1;~Jingwei_Zhang9;~Mohammad_Jalali1;~Xuenan_Cao1;~Andrej_Bogdanov2;~Farzan_Farnia1", "gender": "M;M;M;;M;M", "homepage": ";;https://mjalali.github.io/;https://www2.crs.cuhk.edu.hk/faculty-staff/teaching-faculty/cao-xuenan;https://andrejb.net;https://www.cse.cuhk.edu.hk/~farnia/", "dblp": ";;;;;132/7757", "google_scholar": ";;NxaTDyUAAAAJ;RmJtqkUAAAAJ;;GYPCqcYAAAAJ", "orcid": "0009-0008-7174-3924;;0000-0003-1203-6812;;;0000-0002-6049-9232", "linkedin": "azim-ospanov/;anthonzhang/;mjalali/;;;farzan-farnia-00798335", "or_profile": "~Azim_Ospanov1;~Jingwei_Zhang9;~Mohammad_Jalali1;~Xuenan_Cao1;~Andrej_Bogdanov2;~Farzan_Farnia1", "aff": "Department of Computer Science and Engineering, The Chinese University of Hong Kong;The Chinese University of Hong Kong;Department of Computer Science and Engineering, The Chinese University of Hong Kong;The Chinese University of Hong Kong;University of Ottawa;The Chinese University of Hong Kong", "aff_domain": "cse.cuhk.edu.hk;cse.cuhk.edu.hk;cse.cuhk.edu.hk;cuhk.edu.hk;uottawa.ca;cuhk.edu.hk", "position": "PhD student;PhD student;Junior Research Assistant;Assistant Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nospanov2024towards,\ntitle={Towards a Scalable Reference-Free Evaluation of Generative Models},\nauthor={Azim Ospanov and Jingwei Zhang and Mohammad Jalali and Xuenan Cao and Andrej Bogdanov and Farzan Farnia},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Ex3rPvEct8}\n}", "github": "", "reviewers": "ymdX;ohKy;NG2d", "pdf_size": 9354040, "rating": "5;5;5", "confidence": "4;4;4", "soundness": "3;3;3", "novelty": "2;3;2", "presentation": "3;3;3", "wc_summary": "91;108;71", "wc_strengths": "57;37;43", "wc_weaknesses": "162;58;25", "wc_questions": "125;3;54", "wc_limitations": "15;3;1", "wc_review": "450;209;194", "wc_reply_reviewers": "25;12;0", "wc_reply_authors": "79;76;76", "reply_reviewers": "1;1;0", "reply_authors": "2;2;2", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 90.0, 15.121728296285006 ], "wc_strengths_avg": [ 45.666666666666664, 8.379870059984356 ], "wc_weaknesses_avg": [ 81.66666666666667, 58.37998134825175 ], "wc_questions_avg": [ 60.666666666666664, 50.02888054802835 ], "wc_limitations_avg": [ 6.333333333333333, 6.18241233033047 ], "wc_review_avg": [ 284.3333333333333, 117.3039735994575 ], "wc_reply_reviewers_avg": [ 12.333333333333334, 10.208928554075703 ], "wc_reply_authors_avg": [ 77.0, 1.4142135623730951 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=802383889960662163&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "cse.cuhk.edu.hk;cse.cuhk.edu.hk;cse.cuhk.edu.hk;cuhk.edu.hk;uottawa.ca;cuhk.edu.hk", "author_num": 6, "aff_unique_index": "0;0;0;0;1;0", "aff_unique_norm": "Chinese University of Hong Kong;University of Ottawa", "aff_unique_dep": "Department of Computer Science and Engineering;", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.uottawa.ca", "aff_unique_abbr": "CUHK;U Ottawa", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;1;0", "aff_country_unique": "China;Canada" }, { "title": "LLaNA: Large Language and NeRF Assistant", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96007", "id": "ExeIyx6U0Z", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ExeIyx6U0Z", "openreview": "https://openreview.net/forum?id=ExeIyx6U0Z", "poster": "/media/PosterPDFs/NeurIPS%202024/96007.png?t=1732876445.4503653", "project": "", "author_site": "Andrea Amaduzzi, Pierluigi Zama Ramirez, Giuseppe Lisanti, Samuele Salti, Luigi Di Stefano", "tldr": "", "abstract": "Multimodal Large Language Models (MLLMs) have demonstrated an excellent understanding of images and 3D data. However, both modalities have shortcomings in holistically capturing the appearance and geometry of objects. Meanwhile, Neural Radiance Fields (NeRFs), which encode information within the weights of a simple Multi-Layer Perceptron (MLP), have emerged as an increasingly widespread modality that simultaneously encodes the geometry and photorealistic appearance of objects. This paper investigates the feasibility and effectiveness of ingesting NeRF into MLLM. We create LLaNA, the first general-purpose NeRF-language\nassistant capable of performing new tasks such as NeRF captioning and Q&A. Notably, our method directly processes the weights of the NeRF\u2019s MLP to extract information about the represented objects without the need to render images or materialize 3D data structures. Moreover, we build a dataset of NeRFs with text annotations for various NeRF-language tasks with no human intervention.\nBased on this dataset, we develop a benchmark to evaluate the NeRF understanding capability of our method. Results show that processing NeRF weights performs favourably against extracting 2D or 3D representations from NeRFs.", "keywords": "LLM;NeRF;VQA", "primary_area": "machine_vision", "supplementary_material": "", "author": "Andrea Amaduzzi;Pierluigi Zama Ramirez;Giuseppe Lisanti;Samuele Salti;Luigi di Stefano", "authorids": "~Andrea_Amaduzzi1;~Pierluigi_Zama_Ramirez1;~Giuseppe_Lisanti3;~Samuele_Salti1;~Luigi_di_Stefano1", "gender": "M;M;M;M;M", "homepage": "https://www.unibo.it/sitoweb/andrea.amaduzzi4;https://pierlui92.github.io/;https://www.unibo.it/sitoweb/giuseppe.lisanti/en;https://www.unibo.it/sitoweb/samuele.salti/en;https://www.unibo.it/sitoweb/luigi.distefano/en", "dblp": ";228/7804;14/8618;31/495.html;00/2029", "google_scholar": ";https://scholar.google.com/citations?hl=it;https://scholar.google.it/citations?user=OdDBed4AAAAJ;https://scholar.google.it/citations?user=1kcIJG0AAAAJ;https://scholar.google.it/citations?user=xZVTzyAAAAAJ", "orcid": ";0000-0001-7734-5064;0000-0002-0785-9972;;0000-0001-6014-6421", "linkedin": ";pierluigi-zama-ramirez-b02770171/;;;", "or_profile": "~Andrea_Amaduzzi1;~Pierluigi_Zama_Ramirez1;~Giuseppe_Lisanti3;~Samuele_Salti1;~Luigi_di_Stefano1", "aff": "University of Bologna;University of Bologna;University of Bologna;University of Bologna;University of Bologna", "aff_domain": "unibo.it;unibo.it;unibo.it;unibo.it;unibo.it", "position": "PhD student;Assistant Professor;Associate Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\namaduzzi2024llana,\ntitle={{LL}a{NA}: Large Language and Ne{RF} Assistant},\nauthor={Andrea Amaduzzi and Pierluigi Zama Ramirez and Giuseppe Lisanti and Samuele Salti and Luigi di Stefano},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ExeIyx6U0Z}\n}", "github": "", "reviewers": "W4u1;aHSF;ZJvT", "pdf_size": 21101088, "rating": "5;6;6", "confidence": "4;4;4", "soundness": "2;3;2", "novelty": "2;3;3", "presentation": "3;4;3", "wc_summary": "93;102;26", "wc_strengths": "52;80;57", "wc_weaknesses": "188;491;270", "wc_questions": "43;122;5", "wc_limitations": "12;40;9", "wc_review": "388;835;367", "wc_reply_reviewers": "111;68;65", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 73.66666666666667, 33.9050963065371 ], "wc_strengths_avg": [ 63.0, 12.192894105447921 ], "wc_weaknesses_avg": [ 316.3333333333333, 127.9644047729246 ], "wc_questions_avg": [ 56.666666666666664, 48.732831734764694 ], "wc_limitations_avg": [ 20.333333333333332, 13.960261060914616 ], "wc_review_avg": [ 530.0, 215.83790213954546 ], "wc_reply_reviewers_avg": [ 81.33333333333333, 21.013223349965983 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6774960623635828847&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "unibo.it;unibo.it;unibo.it;unibo.it;unibo.it", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of Bologna", "aff_unique_dep": "", "aff_unique_url": "https://www.unibo.it", "aff_unique_abbr": "Unibo", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Italy" }, { "title": "ZeroMark: Towards Dataset Ownership Verification without Disclosing Watermark", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96006", "id": "Eyyt3ZmNV6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Eyyt3ZmNV6", "openreview": "https://openreview.net/forum?id=Eyyt3ZmNV6", "poster": "", "project": "", "author_site": "Junfeng Guo, Yiming Li, Ruibo Chen, Yihan Wu, chenxi liu, Heng Huang", "tldr": "", "abstract": "High-quality public datasets significantly prompt the prosperity of deep neural networks (DNNs). Currently, dataset ownership verification (DOV), which consists of dataset watermarking and ownership verification, is the only feasible solution to protect their copyright by preventing unauthorized use. In this paper, we revisit existing DOV methods and find that they all mainly focused on the first stage by designing different types of dataset watermarks and directly exploiting watermarked samples as the verification samples for ownership verification. As such, their success relies on an underlying assumption that verification is a \\emph{one-time} and \\emph{privacy-preserving} process, which does not necessarily hold in practice. To alleviate this problem, we propose \\emph{ZeroMark} to conduct ownership verification without disclosing dataset-specified watermarks. Our method is inspired by our empirical and theoretical findings of the intrinsic property of DNNs trained on the watermarked dataset. Specifically, ZeroMark first generates the closest boundary version of given benign samples and calculates their boundary gradients under the label-only black-box setting. After that, it examines whether the given suspicious method has been trained on the protected dataset by performing a hypothesis test, based on the cosine similarity measured on the boundary gradients and the watermark pattern. Extensive experiments on benchmark datasets verify the effectiveness of our ZeroMark and its resistance to potential adaptive attacks. The codes for reproducing our main experiments are publicly available at \\href{https://github.com/JunfengGo/ZeroMark.git}{GitHub}.", "keywords": "Dataset Ownership Verification;Ownership Verification;Copyright Protection;AI Security", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/e6750b3aaeb9d6fdb1846566c38de2168a51d20f.zip", "author": "Junfeng Guo;Yiming Li;Ruibo Chen;Yihan Wu;Chenxi Liu;Heng Huang", "authorids": "~Junfeng_Guo2;~Yiming_Li1;~Ruibo_Chen2;~Yihan_Wu1;~Chenxi_Liu2;~Heng_Huang1", "gender": "M;M;M;M;M;M", "homepage": "https://junfenggo.github.io/;http://liyiming.tech;https://rayruibochen.github.io/;https://yihwu.github.io/;https://dawnliu35.github.io/;https://www.cs.umd.edu/~heng/", "dblp": ";l/YimingLi-4;;;;03/281", "google_scholar": "TqblqYcAAAAJ;mSW7kU8AAAAJ;oXfEZL0AAAAJ;cajTg_wAAAAJ;;4OqLaDwAAAAJ", "orcid": ";0000-0002-2258-265X;;;;", "linkedin": ";yiming-li-thu/;;;chenxi-liu-8b9719211/;", "or_profile": "~Junfeng_Guo2;~Yiming_Li1;~Ruibo_Chen2;~Yihan_Wu1;~Chenxi_Liu2;~Heng_Huang1", "aff": "University of Maryland Institute for Advanced Computer Studies, University of Maryland, College Park;Zhejiang University;University of Maryland, College Park;University of Maryland, College Park;University of Maryland, College Park;Department of Computer Science, University of Maryland, College Park", "aff_domain": "umiacs.umd.edu;zju.edu.cn;umd.edu;umd.edu;umd.edu;cs.umd.edu", "position": "Postdoc;Research Professor;PhD student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nguo2024zeromark,\ntitle={ZeroMark: Towards Dataset Ownership Verification without Disclosing Watermark},\nauthor={Junfeng Guo and Yiming Li and Ruibo Chen and Yihan Wu and Chenxi Liu and Heng Huang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Eyyt3ZmNV6}\n}", "github": "", "reviewers": "s8Mh;RJcM;VV25;RHDz", "pdf_size": 10232646, "rating": "3;5;6;7", "confidence": "4;4;3;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;2;3", "wc_summary": "41;78;131;74", "wc_strengths": "38;23;97;151", "wc_weaknesses": "260;385;63;202", "wc_questions": "60;128;183;5", "wc_limitations": "6;10;32;1", "wc_review": "405;624;506;433", "wc_reply_reviewers": "441;141;348;156", "wc_reply_authors": "1618;1228;838;29", "reply_reviewers": "2;1;2;1", "reply_authors": "6;4;5;2", "rating_avg": [ 5.25, 1.479019945774904 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 81.0, 32.24127789030702 ], "wc_strengths_avg": [ 77.25, 50.77585548269965 ], "wc_weaknesses_avg": [ 227.5, 115.72920979597156 ], "wc_questions_avg": [ 94.0, 67.36839021380874 ], "wc_limitations_avg": [ 12.25, 11.840080236214618 ], "wc_review_avg": [ 492.0, 84.66108905512614 ], "wc_reply_reviewers_avg": [ 271.5, 127.42939221388447 ], "wc_reply_authors_avg": [ 928.25, 587.8776977399295 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 4.25, 1.479019945774904 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.29277002188455997, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11996816830470509059&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "email": "umiacs.umd.edu;zju.edu.cn;umd.edu;umd.edu;umd.edu;cs.umd.edu", "author_num": 6, "aff_unique_index": "0;1;0;0;0;2", "aff_unique_norm": "University of Maryland;Zhejiang University;University of Maryland, College Park", "aff_unique_dep": "Institute for Advanced Computer Studies;;Department of Computer Science", "aff_unique_url": "https://www.umd.edu;https://www.zju.edu.cn;https://www/umd.edu", "aff_unique_abbr": "UMD;ZJU;UMD", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "College Park;", "aff_country_unique_index": "0;1;0;0;0;0", "aff_country_unique": "United States;China" }, { "title": "Predicting Label Distribution from Ternary Labels", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96005", "id": "F6L23TNlFW", "proceeding": "", "pdf": "https://openreview.net/pdf?id=F6L23TNlFW", "openreview": "https://openreview.net/forum?id=F6L23TNlFW", "poster": "/media/PosterPDFs/NeurIPS%202024/96005.png?t=1730709716.2638414", "project": "", "author_site": "Yunan Lu, Xiuyi Jia", "tldr": "", "abstract": "Label distribution learning is a powerful learning paradigm to deal with label polysemy and has been widely applied in many practical tasks. A significant obstacle to the effective utilization of label distribution is the substantial expenses of accurate quantifying the label distributions. To tackle this challenge, label enhancement methods automatically infer label distributions from more easily accessible multi-label data based on binary annotations. However, the binary annotation of multi-label data requires experts to accurately assess whether each label can describe the instance, which may diminish the annotating efficiency and heighten the risk of erroneous annotation since the relationship between the label and the instance is unclear in many practical scenarios. Therefore, we propose to predict label distribution from ternary labels, allowing experts to annotate labels in a three-way annotation scheme. They can annotate the label as \"$0$\" indicating \"uncertain relevant\" if it is difficult to definitively determine whether the label can describe the instance, in addition to the binary annotation of \"$1$\" indicating \"definitely relevant\" and \"$-1$\" indicating \"definitely irrelevant\". Both the theoretical and methodological studies are conducted for the proposed learning paradigm. In the theoretical part, we conduct a quantitative comparison of approximation error between ternary and binary labels to elucidate the superiority of ternary labels over binary labels. In the methodological part, we propose a Categorical distribution with monotonicity and orderliness to model the mapping from label description degrees to ternary labels, which can serve as a loss function or as a probability distribution, allowing most existing label enhancement methods to be adapted to our task. Finally, we experimentally demonstrate the effectiveness of our proposal.", "keywords": "label distribution;label polysemy;multi-label;ternary label", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Yunan Lu;Xiuyi Jia", "authorids": "~Yunan_Lu1;~Xiuyi_Jia1", "gender": "M;", "homepage": ";", "dblp": "264/6413;23/5047", "google_scholar": ";https://scholar.google.com/citations?hl=en", "orcid": "0000-0001-8861-7897;", "linkedin": ";", "or_profile": "~Yunan_Lu1;~Xiuyi_Jia1", "aff": "Nanjing University of Science and Technology;Nanjing University of Science and Technology", "aff_domain": "njust.edu.cn;njust.edu.cn", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nlu2024predicting,\ntitle={Predicting Label Distribution from Ternary Labels},\nauthor={Yunan Lu and Xiuyi Jia},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=F6L23TNlFW}\n}", "github": "", "reviewers": "3ktH;uVMp;Lfun;7vWN", "pdf_size": 1313439, "rating": "3;6;7;8", "confidence": "5;4;3;5", "soundness": "1;3;3;4", "novelty": "2;3;3;4", "presentation": "3;3;3;4", "wc_summary": "63;46;86;32", "wc_strengths": "37;31;219;32", "wc_weaknesses": "162;40;95;59", "wc_questions": "241;35;63;140", "wc_limitations": "47;8;28;6", "wc_review": "550;160;491;269", "wc_reply_reviewers": "0;38;0;0", "wc_reply_authors": "42;0;0;0", "reply_reviewers": "0;1;0;0", "reply_authors": "2;1;1;1", "rating_avg": [ 6.0, 1.8708286933869707 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 1.0897247358851685 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 56.75, 20.141685629559408 ], "wc_strengths_avg": [ 79.75, 80.42815116611844 ], "wc_weaknesses_avg": [ 89.0, 46.54567649094812 ], "wc_questions_avg": [ 119.75, 79.86668579576843 ], "wc_limitations_avg": [ 22.25, 16.67895380412093 ], "wc_review_avg": [ 367.5, 159.15165723296758 ], "wc_reply_reviewers_avg": [ 9.5, 16.454482671904334 ], "wc_reply_authors_avg": [ 10.5, 18.186533479473212 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.3223291856101521, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:k49LcW7aAscJ:scholar.google.com/&scioq=Predicting+Label+Distribution+from+Ternary+Labels&hl=en&as_sdt=0,14", "gs_version_total": 0, "email": "njust.edu.cn;njust.edu.cn", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Nanjing University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.nust.edu.cn/", "aff_unique_abbr": "NUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Deep linear networks for regression are implicitly regularized towards flat minima", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96004", "id": "F738WY1Xm4", "proceeding": "", "pdf": "https://openreview.net/pdf?id=F738WY1Xm4", "openreview": "https://openreview.net/forum?id=F738WY1Xm4", "poster": "/media/PosterPDFs/NeurIPS%202024/96004.png?t=1730743277.8004467", "project": "", "author_site": "Pierre Marion, L\u00e9na\u00efc Chizat", "tldr": "", "abstract": "The largest eigenvalue of the Hessian, or sharpness, of neural networks is a key quantity to understand their optimization dynamics. In this paper, we study the sharpness of deep linear networks for univariate regression. Minimizers can have arbitrarily large sharpness, but not an arbitrarily small one. Indeed, we show a lower bound on the sharpness of minimizers, which grows linearly with depth. We then study the properties of the minimizer found by gradient flow, which is the limit of gradient descent with vanishing learning rate. We show an implicit regularization towards flat minima: the sharpness of the minimizer is no more than a constant times the lower bound. The constant depends on the condition number of the data covariance matrix, but not on width or depth. This result is proven both for a small-scale initialization and a residual initialization. Results of independent interest are shown in both cases. For small-scale initialization, we show that the learned weight matrices are approximately rank-one and that their singular vectors align. For residual initialization, convergence of the gradient flow for a Gaussian initialization of the residual network is proven. Numerical experiments illustrate our results and connect them to gradient descent with non-vanishing learning rate.", "keywords": "deep learning theory;sharpness;non-convex optimization;implicit regularization;gradient flow", "primary_area": "optimization_for_deep_networks", "supplementary_material": "/attachment/a432ec9ce3d7f2a75ef6fdf29aafe327118a320b.zip", "author": "Pierre Marion;L\u00e9na\u00efc Chizat", "authorids": "~Pierre_Marion1;~L\u00e9na\u00efc_Chizat1", "gender": "M;M", "homepage": "https://pierremarion23.github.io/;https://lchizat.github.io/", "dblp": "250/2318;192/1488", "google_scholar": "https://scholar.google.fr/citations?user=Q8H5LgIAAAAJ;https://scholar.google.fr/citations?user=jrJh9yIAAAAJ", "orcid": ";", "linkedin": "pierre-marion-816474130/;", "or_profile": "~Pierre_Marion1;~L\u00e9na\u00efc_Chizat1", "aff": "EPFL - EPF Lausanne;EPFL - EPF Lausanne", "aff_domain": "epfl.ch;epfl.ch", "position": "Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nmarion2024deep,\ntitle={Deep linear networks for regression are implicitly regularized towards flat minima},\nauthor={Pierre Marion and L{\\'e}na{\\\"\\i}c Chizat},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=F738WY1Xm4}\n}", "github": "", "reviewers": "PPoT;hhnc;hdrG;WC3N", "pdf_size": 722605, "rating": "6;6;6;7", "confidence": "2;4;3;2", "soundness": "4;3;4;3", "novelty": "3;2;2;3", "presentation": "4;3;4;4", "wc_summary": "112;86;93;150", "wc_strengths": "128;36;60;106", "wc_weaknesses": "170;156;97;36", "wc_questions": "103;2;19;23", "wc_limitations": "57;2;1;1", "wc_review": "570;282;270;316", "wc_reply_reviewers": "38;25;13;16", "wc_reply_authors": "10;24;10;10", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 110.25, 24.843258642939738 ], "wc_strengths_avg": [ 82.5, 36.36963018783666 ], "wc_weaknesses_avg": [ 114.75, 53.081894276674035 ], "wc_questions_avg": [ 36.75, 39.0536489972448 ], "wc_limitations_avg": [ 15.25, 24.107830678018296 ], "wc_review_avg": [ 359.5, 122.69779949127042 ], "wc_reply_reviewers_avg": [ 23.0, 9.72111104761179 ], "wc_reply_authors_avg": [ 13.5, 6.06217782649107 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2222491222986815443&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "epfl.ch;epfl.ch", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "EPFL", "aff_unique_dep": "", "aff_unique_url": "https://www.epfl.ch", "aff_unique_abbr": "EPFL", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Lausanne", "aff_country_unique_index": "0;0", "aff_country_unique": "Switzerland" }, { "title": "DiReCT: Diagnostic Reasoning for Clinical Notes via Large Language Models", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97782", "id": "F7rAX6yiS2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=F7rAX6yiS2", "openreview": "https://openreview.net/forum?id=F7rAX6yiS2", "poster": "/media/PosterPDFs/NeurIPS%202024/97782.png?t=1730880821.0310936", "project": "", "author_site": "Bowen Wang, Jiuyang Chang, Yiming Qian, Guoxin Chen, Junhao Chen, Zhouqiang Jiang, Jiahao Zhang, Yuta Nakashima, Hajime Nagahara", "tldr": "", "abstract": "Large language models (LLMs) have recently showcased remarkable capabilities, spanning a wide range of tasks and applications, including those in the medical domain. Models like GPT-4 excel in medical question answering but may face challenges in the lack of interpretability when handling complex tasks in real clinical settings. We thus introduce the diagnostic reasoning dataset for clinical notes (DiReCT), aiming at evaluating the reasoning ability and interpretability of LLMs compared to human doctors. It contains 511 clinical notes, each meticulously annotated by physicians, detailing the diagnostic reasoning process from observations in a clinical note to the final diagnosis. Additionally, a diagnostic knowledge graph is provided to offer essential knowledge for reasoning, which may not be covered in the training data of existing LLMs. Evaluations of leading LLMs on DiReCT bring out a significant gap between their reasoning ability and that of human doctors, highlighting the critical need for models that can reason effectively in real-world clinical scenarios.", "keywords": "Clinical Notes;Diagnostic Reasoning;Large Language Models", "primary_area": "", "supplementary_material": "/attachment/7f78b988f433fada180e921dab668c76a4a577e2.pdf", "author": "Bowen Wang;Jiuyang Chang;Yiming Qian;Guoxin Chen;Junhao Chen;Zhouqiang Jiang;Jiahao Zhang;Yuta Nakashima;Hajime Nagahara", "authorids": "~Bowen_Wang1;~Jiuyang_Chang1;~Yiming_Qian6;~Guoxin_Chen1;~Junhao_Chen3;~Zhouqiang_Jiang1;~Jiahao_Zhang12;~Yuta_Nakashima3;~Hajime_Nagahara1", "gender": "M;M;M;;M;M;M;M;M", "homepage": "https://www.bowen-wang.com/home;https://sciprofiles.com/home;;;https://github.com/3244we;https://github.com/minhoooo1;;https://www.n-yuta.jp/;https://www.is.ids.osaka-u.ac.jp/author/hajime-nagahara/", "dblp": "64/4732;;;;;;;03/2323;78/5843", "google_scholar": "hB4K5UMAAAAJ;;gmpm0a8AAAAJ;I6EjtN0AAAAJ;;https://scholar.google.com/citations?hl=zh-CN;jRcFD0MAAAAJ;LNvd0VQAAAAJ;https://scholar.google.co.uk/citations?user=CZyXjREAAAAJ", "orcid": "0000-0002-2911-5595;;0000-0002-1795-2038;0000-0001-9000-4782;;0009-0002-1304-4179;0000-0001-5522-9759;0000-0001-8000-3567;0000-0003-1579-8767", "linkedin": ";;;;;;jiahao-zhang-80a123308/;;", "or_profile": "~Bowen_Wang1;~Jiuyang_Chang1;~Yiming_Qian6;~Guoxin_Chen1;~Junhao_Chen3;~Zhouqiang_Jiang1;~Jiahao_Zhang12;~Yuta_Nakashima3;~Hajime_Nagahara1", "aff": "Osaka University;Dalian Medical University;Institute of High Performance Computing, Singapore, A*STAR;Institute of Computing Technology, Chinese Academy of Sciences;Osaka University;Meetyou AI Lab;Osaka University;Osaka University;Osaka University", "aff_domain": "osaka-u.ac.jp;firsthosp-dmu.com;ihpc.a-star.edu.sg;ict.ac.cn;osaka-u.ac.jp;xiaoyouzi.com;osaka-u.ac.jp;osaka-u.ac.jp;osaka-u.ac.jp", "position": "Postdoc;Researcher;Researcher;MS student;Intern;Researcher;PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nwang2024direct,\ntitle={DiRe{CT}: Diagnostic Reasoning for Clinical Notes via Large Language Models},\nauthor={Bowen Wang and Jiuyang Chang and Yiming Qian and Guoxin Chen and Junhao Chen and Zhouqiang Jiang and Jiahao Zhang and Yuta Nakashima and Hajime Nagahara},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=F7rAX6yiS2}\n}", "github": "", "reviewers": "DFEz;kZFT;54eB;B51N", "pdf_size": 778869, "rating": "7;7;7;8", "confidence": "5;4;4;4", "wc_summary_and_contributions": "104;76;152;97", "wc_strengths": "35;6;91;36", "wc_improvement": "62;6;946;176", "wc_limitations": "1;6;18;38", "wc_correctness": "1;1;35;1", "wc_clarity": "1;1;6;15", "wc_relation_to_prior_work": "1;1;24;1", "wc_documentation": "1;1;24;4", "wc_additional_feedback": "1;1;1;1", "wc_review": "207;99;1297;369", "wc_reply_reviewers": "0;23;17;626", "wc_reply_authors": "0;11;18;48", "reply_reviewers": "0;1;1;2", "reply_authors": "1;2;4;4", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 107.25, 27.815238629211866 ], "wc_strengths_avg": [ 42.0, 30.748983723043597 ], "wc_improvement_avg": [ 297.5, 379.3899708742971 ], "wc_limitations_avg": [ 15.75, 14.254385290148432 ], "wc_correctness_avg": [ 9.5, 14.722431864335457 ], "wc_clarity_avg": [ 5.75, 5.717298313014636 ], "wc_relation_to_prior_work_avg": [ 6.75, 9.959292143521045 ], "wc_documentation_avg": [ 7.5, 9.604686356149273 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 493.0, 474.0316445133173 ], "wc_reply_reviewers_avg": [ 166.5, 265.4265435106293 ], "wc_reply_authors_avg": [ 19.25, 17.795715776557007 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13334526992633475079&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 5, "email": "osaka-u.ac.jp;firsthosp-dmu.com;ihpc.a-star.edu.sg;ict.ac.cn;osaka-u.ac.jp;xiaoyouzi.com;osaka-u.ac.jp;osaka-u.ac.jp;osaka-u.ac.jp", "author_num": 9, "aff_unique_index": "0;1;2;3;0;4;0;0;0", "aff_unique_norm": "Osaka University;Dalian Medical University;Institute of High Performance Computing;Chinese Academy of Sciences;Meetyou AI Lab", "aff_unique_dep": ";;;Institute of Computing Technology;AI Lab", "aff_unique_url": "https://www.osaka-u.ac.jp;http://www.dmu.edu.cn/;https://www.ihpc.a-star.edu.sg;http://www.ict.ac.cn;", "aff_unique_abbr": "Osaka U;DMU;IHPC;CAS;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;1;0;0;0;0", "aff_country_unique": "Japan;China;Singapore;" }, { "title": "HonestLLM: Toward an Honest and Helpful Large Language Model", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96003", "id": "F7tGQ7b10q", "proceeding": "", "pdf": "https://openreview.net/pdf?id=F7tGQ7b10q", "openreview": "https://openreview.net/forum?id=F7tGQ7b10q", "poster": "/media/PosterPDFs/NeurIPS%202024/96003.png?t=1731651609.6272054", "project": "", "author_site": "Gao Chujie, Siyuan Wu, Yue Huang, Dongping Chen, Qihui Zhang, Zhengyan Fu, Yao Wan, Lichao Sun, Xiangliang Zhang", "tldr": "", "abstract": "Large Language Models (LLMs) have achieved remarkable success across various industries and applications, owing to their exceptional generative capabilities. Nevertheless, honesty and helpfulness, which ensure safe and useful real-world deployments, have been considered as the longstanding cornerstones in practice. In this paper, we first established comprehensive principles for honesty LLM and further created the HoneSet with 930 queries across six categories, which is designed to evaluate LLMs\u2019 ability to maintain honesty. Then, we improved the honesty and helpfulness of LLMs in both training-free and fine-tuning settings. Specifically, we propose a training-free method named Curiosity-Driven Prompting, which enables LLMs to express their internal confusion and uncertainty about the given query and then optimize their responses. Moreover, we also propose a two-stage fine-tuning approach, inspired by curriculum learning, to enhance the honesty and helpfulness of LLMs. The method first teaches LLMs to distinguish between honest and dishonest, and then LLMs are trained to learn to respond more helpfully. Experimental results demonstrated that both of the two proposed methods improve the helpfulness of LLMs while making them maintain honesty. Our research has paved the way for more reliable and trustworthy LLMs in real-world applications.", "keywords": "large language model;trustworthy;honesty;alignment", "primary_area": "evaluation", "supplementary_material": "/attachment/3d6c9ce5219cb3577dcba446515af7c786541b77.zip", "author": "Chujie Gao;Siyuan Wu;Yue Huang;Dongping Chen;Qihui Zhang;Zhengyan Fu;Yao Wan;Lichao Sun;Xiangliang Zhang", "authorids": "~Chujie_Gao1;~Siyuan_Wu6;~Yue_Huang9;~Dongping_Chen1;~Qihui_Zhang1;~Zhengyan_Fu1;~Yao_Wan2;~Lichao_Sun1;~Xiangliang_Zhang1", "gender": "F;;;M;M;M;M;M;F", "homepage": ";https://github.com/nauyisu022;;https://dongping-chen.github.io;https://github.com/Mask-Hui;https://github.com/Peter-Fu1;http://wanyao.me;https://lichao-sun.github.io/;https://sites.nd.edu/xiangliang-zhang/", "dblp": "366/6075;44/3983-1;;151/7051;160/4750;;167/0275.html;121/0780-1.html;74/1890-1", "google_scholar": "1AqAngQAAAAJ;v8qD1HsAAAAJ;;;;;c3MtqtMAAAAJ;WhGUE7AAAAAJ;BhRJe4wAAAAJ", "orcid": ";;;0009-0009-9848-2557;;;0000-0001-6937-4180;;0000-0002-3574-5665", "linkedin": ";;;;;;;lichao-sun-b273a290/;", "or_profile": "~Chujie_Gao1;~Siyuan_Wu6;~Yue_Huang9;~Dongping_Chen1;~Qihui_Zhang1;~Zhengyan_Fu1;~Yao_Wan2;~Lichao_Sun1;~Xiangliang_Zhang1", "aff": ";University of Waterloo;;Huazhong University of Science and Technology;Huazhong University of Science and Technology;Guangzhou City University of Technology (former name \"Guangzhou College of South China University of Technology\");Huazhong University of Science and Technology;Lehigh University;University of Notre Dame", "aff_domain": ";uwaterloo.ca;;hust.edu.cn;hust.edu.cn;gcu.edu.cn;hust.edu.cn;lehigh.edu;nd.edu", "position": ";Intern;;Undergrad student;Intern;Undergrad student;Assistant Professor;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\ngao2024honestllm,\ntitle={Honest{LLM}: Toward an Honest and Helpful Large Language Model},\nauthor={Chujie Gao and Siyuan Wu and Yue Huang and Dongping Chen and Qihui Zhang and Zhengyan Fu and Yao Wan and Lichao Sun and Xiangliang Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=F7tGQ7b10q}\n}", "github": "", "reviewers": "aGvd;RUhQ;mhHM;A9r3", "pdf_size": 2339558, "rating": "4;5;6;6", "confidence": "5;3;3;3", "soundness": "2;3;3;3", "novelty": "2;2;3;2", "presentation": "3;4;3;3", "wc_summary": "45;77;54;32", "wc_strengths": "14;39;61;160", "wc_weaknesses": "84;61;150;452", "wc_questions": "69;7;40;21", "wc_limitations": "14;1;22;12", "wc_review": "226;185;327;677", "wc_reply_reviewers": "9;0;11;29", "wc_reply_authors": "114;142;59;106", "reply_reviewers": "1;0;1;2", "reply_authors": "3;3;3;4", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 52.0, 16.416455159382004 ], "wc_strengths_avg": [ 68.5, 55.38275905008706 ], "wc_weaknesses_avg": [ 186.75, 156.58763520789245 ], "wc_questions_avg": [ 34.25, 23.23117517475171 ], "wc_limitations_avg": [ 12.25, 7.495832175282475 ], "wc_review_avg": [ 353.75, 193.650942419602 ], "wc_reply_reviewers_avg": [ 12.25, 10.520812706250407 ], "wc_reply_authors_avg": [ 105.25, 29.86113695089321 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 3.25, 0.4330127018922193 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16453543721152734088&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": ";uwaterloo.ca;;hust.edu.cn;hust.edu.cn;gcu.edu.cn;hust.edu.cn;lehigh.edu;nd.edu", "author_num": 9, "aff_unique_index": "0;1;1;2;1;3;4", "aff_unique_norm": "University of Waterloo;Huazhong University of Science and Technology;Guangzhou City University of Technology;Lehigh University;University of Notre Dame", "aff_unique_dep": ";;;;", "aff_unique_url": "https://uwaterloo.ca;http://www.hust.edu.cn;;https://www.lehigh.edu;https://www.nd.edu", "aff_unique_abbr": "UW;HUST;;Lehigh;Notre Dame", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1;2;2", "aff_country_unique": "Canada;China;United States" }, { "title": "Designing Cell-Type-Specific Promoter Sequences Using Conservative Model-Based Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96002", "id": "F8DWffLkYG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=F8DWffLkYG", "openreview": "https://openreview.net/forum?id=F8DWffLkYG", "poster": "/media/PosterPDFs/NeurIPS%202024/96002.png?t=1733766441.5526016", "project": "", "author_site": "Aniketh Janardhan Reddy, Xinyang Geng, Michael Herschl, Sathvik Kolli, Aviral Kumar, Patrick Hsu, Sergey Levine, Nilah Ioannidis", "tldr": "", "abstract": "Gene therapies have the potential to treat disease by delivering therapeutic genetic cargo to disease-associated cells. One limitation to their widespread use is the lack of short regulatory sequences, or promoters, that differentially induce the expression of delivered genetic cargo in target cells, minimizing side effects in other cell types. Such cell-type-specific promoters are difficult to discover using existing methods, requiring either manual curation or access to large datasets of promoter-driven expression from both targeted and untargeted cells. Model-based optimization (MBO) has emerged as an effective method to design biological sequences in an automated manner, and has recently been used in promoter design methods. However, these methods have only been tested using large training datasets that are expensive to collect, and focus on designing promoters for markedly different cell types, overlooking the complexities associated with designing promoters for closely related cell types that share similar regulatory features. Therefore, we introduce a comprehensive framework for utilizing MBO to design promoters in a data-efficient manner, with an emphasis on discovering promoters for similar cell types. We use conservative objective models (COMs) for MBO and highlight practical considerations such as best practices for improving sequence diversity, getting estimates of model uncertainty, and choosing the optimal set of sequences for experimental validation. Using three leukemia cell lines (Jurkat, K562, and THP1), we show that our approach discovers many novel cell-type-specific promoters after experimentally validating the designed sequences. For K562 cells, in particular, we discover a promoter that has 75.85\\% higher cell-type-specificity than the best promoter from the initial dataset used to train our models. Our code and data will be available at https://github.com/young-geng/promoter_design.", "keywords": "ML applications;computational genomics;computational biology;model-based optimization", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Aniketh Janardhan Reddy;Xinyang Geng;Michael H Herschl;Sathvik Kolli;Aviral Kumar;Patrick D Hsu;Sergey Levine;Nilah M Ioannidis", "authorids": "~Aniketh_Janardhan_Reddy1;~Xinyang_Geng1;~Michael_H_Herschl1;~Sathvik_Kolli1;~Aviral_Kumar2;~Patrick_D_Hsu1;~Sergey_Levine1;~Nilah_M_Ioannidis1", "gender": "M;M;;;M;;M;", "homepage": "https://anikethjr.github.io/;http://young-geng.xyz/;;;https://aviralkumar2907.github.io/;http://hsu.berkeley.edu;https://people.eecs.berkeley.edu/~svlevine/;", "dblp": ";186/8221;;;202/7961;;80/7594;", "google_scholar": "3d9b_SYAAAAJ;vYougn0AAAAJ;;;;LhnU_joAAAAJ;8R35rCwAAAAJ;", "orcid": ";;;;;;;", "linkedin": ";;;sathvik-kolli-6b4a31192/;;;;", "or_profile": "~Aniketh_Janardhan_Reddy1;~Xinyang_Geng1;~Michael_H_Herschl1;~Sathvik_Kolli1;~Aviral_Kumar2;~Patrick_D_Hsu1;~Sergey_Levine1;~Nilah_M_Ioannidis1", "aff": "Dyno Therapeutics;Google;;;Google DeepMind;University of California, Berkeley;Google;", "aff_domain": "dynotx.com;google.com;;;google.com;berkeley.edu;google.com;", "position": "Intern;Researcher;;;Researcher;Assistant Professor;Research Scientist;", "bibtex": "@inproceedings{\nreddy2024designing,\ntitle={Designing Cell-Type-Specific Promoter Sequences Using Conservative Model-Based Optimization},\nauthor={Aniketh Janardhan Reddy and Xinyang Geng and Michael H Herschl and Sathvik Kolli and Aviral Kumar and Patrick D Hsu and Sergey Levine and Nilah M Ioannidis},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=F8DWffLkYG}\n}", "github": "", "reviewers": "ipDz;XucD;zTWW", "pdf_size": 2590569, "rating": "6;7;8", "confidence": "5;4;3", "soundness": "3;4;4", "novelty": "3;3;4", "presentation": "3;3;4", "wc_summary": "55;149;79", "wc_strengths": "27;184;50", "wc_weaknesses": "46;262;56", "wc_questions": "139;354;36", "wc_limitations": "1;92;6", "wc_review": "268;1041;227", "wc_reply_reviewers": "21;44;13", "wc_reply_authors": "97;972;11", "reply_reviewers": "1;1;1", "reply_authors": "3;4;2", "rating_avg": [ 7.0, 0.816496580927726 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 94.33333333333333, 39.87759047663515 ], "wc_strengths_avg": [ 87.0, 69.2290882986817 ], "wc_weaknesses_avg": [ 121.33333333333333, 99.55009905681774 ], "wc_questions_avg": [ 176.33333333333334, 132.4797678473543 ], "wc_limitations_avg": [ 33.0, 41.769207158703246 ], "wc_review_avg": [ 512.0, 374.43379477107385 ], "wc_reply_reviewers_avg": [ 26.0, 13.140268896284683 ], "wc_reply_authors_avg": [ 360.0, 434.1712411787159 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.0, 0.816496580927726 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4154063524295815435&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 5, "email": "dynotx.com;google.com;;;google.com;berkeley.edu;google.com;", "author_num": 8, "aff_unique_index": "0;1;1;2;1", "aff_unique_norm": "Dyno Therapeutics;Google;University of California, Berkeley", "aff_unique_dep": ";Google;", "aff_unique_url": "https://www.dynotherapeutics.com;https://www.google.com;https://www.berkeley.edu", "aff_unique_abbr": ";Google;UC Berkeley", "aff_campus_unique_index": "1;2;1", "aff_campus_unique": ";Mountain View;Berkeley", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "MECD: Unlocking Multi-Event Causal Discovery in Video Reasoning", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96001", "id": "F8aSOovlEP", "proceeding": "", "pdf": "https://openreview.net/pdf?id=F8aSOovlEP", "openreview": "https://openreview.net/forum?id=F8aSOovlEP", "poster": "/media/PosterPDFs/NeurIPS%202024/96001.png?t=1730267523.6652215", "project": "", "author_site": "Tieyuan Chen, Huabin Liu, Tianyao He, Yihang Chen, chaofan gan, Xiao Ma, Cheng Zhong, Yang Zhang, Yingxue Wang, Hui Lin, Weiyao Lin", "tldr": "", "abstract": "Video causal reasoning aims to achieve a high-level understanding of video content from a causal perspective. However, current video reasoning tasks are limited in scope, primarily executed in a question-answering paradigm and focusing on short videos containing only a single event and simple causal relationships, lacking comprehensive and structured causality analysis for videos with multiple events. To fill this gap, we introduce a new task and dataset, Multi-Event Causal Discovery (MECD). It aims to uncover the causal relationships between events distributed chronologically across long videos. Given visual segments and textual descriptions of events, MECD requires identifying the causal associations between these events to derive a comprehensive, structured event-level video causal diagram explaining why and how the final result event occurred. To address MECD, we devise a novel framework inspired by the Granger Causality method, using an efficient mask-based event prediction model to perform an Event Granger Test, which estimates causality by comparing the predicted result event when premise events are masked versus unmasked. Furthermore, we integrate causal inference techniques such as front-door adjustment and counterfactual inference to address challenges in MECD like causality confounding and illusory causality. Experiments validate the effectiveness of our framework in providing causal relationships in multi-event videos, outperforming GPT-4o and VideoLLaVA by 5.7% and 4.1%, respectively.", "keywords": "Video understanding; Video reasoning; Causal discovery; Causal inference", "primary_area": "machine_vision", "supplementary_material": "", "author": "Tieyuan Chen;Huabin Liu;Tianyao He;Yihang Chen;Chaofan Gan;Xiao Ma;Cheng Zhong;Yang Zhang;Yingxue Wang;Hui Lin;Weiyao Lin", "authorids": "~Tieyuan_Chen1;~Huabin_Liu1;~Tianyao_He1;~Yihang_Chen2;~Chaofan_Gan1;~Xiao_Ma11;~Cheng_Zhong5;~Yang_Zhang23;~Yingxue_Wang2;~Hui_Lin7;~Weiyao_Lin1", "gender": ";M;M;M;M;M;M;M;F;M;M", "homepage": "https://tychen-sjtu.github.io//;http://r00kie-liu.github.io/;;https://yihangchen-ee.github.io;https://github.com/ganchaofan0000;https://github.com/SatMa34;;;;https://baike.baidu.com/item/%E6%9E%97%E6%99%96/54130577;https://weiyaolin.github.io/", "dblp": "347/1572;241/2160-1;;220/8712;;;;06/6785-2;;36/3545.html;42/6095", "google_scholar": "https://scholar.google.com.tw/citations?user=86n-wXsAAAAJ;hicoLSQAAAAJ;TTw8ZGcAAAAJ;;;;https://scholar.google.com/citations?view_op=list_works;https://scholar.google.com/citations?hl=zh-CN;;aLbznmQAAAAJ;S9g81n8AAAAJ", "orcid": ";;;0000-0003-1127-1570;;;;;0000-0003-2270-3519;0000-0003-0190-969X;", "linkedin": ";;;;;;;;;;", "or_profile": "~Tieyuan_Chen1;~Huabin_Liu1;~Tianyao_He1;~Yihang_Chen2;~Chaofan_Gan1;~Xiao_Ma11;~Cheng_Zhong5;~Yang_Zhang23;~Yingxue_Wang2;~Hui_Lin7;~Weiyao_Lin1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Monash University;Shanghai Jiaotong University;Lenovo Group Limited;Lenovo Research;Lenovo Research, AI Lab;;Electronic Science Research Institute of China Electronics Technology Group Corporation;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;monash.edu;sjtu.edu.cn;lenovo.com;lenovo.com;lenovo.com;;caeit.cetc.com.cn;sjtu.edu.cn", "position": "PhD student;PhD student;MS student;PhD student;PhD student;Researcher;Principal Researcher;Researcher;;Researcher;Full Professor", "bibtex": "@inproceedings{\nchen2024mecd,\ntitle={{MECD}: Unlocking Multi-Event Causal Discovery in Video Reasoning},\nauthor={Tieyuan Chen and Huabin Liu and Tianyao He and Yihang Chen and Chaofan Gan and Xiao Ma and Cheng Zhong and Yang Zhang and Yingxue Wang and Hui Lin and Weiyao Lin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=F8aSOovlEP}\n}", "github": "", "reviewers": "2Vef;eXFX;9my4;17Ce", "pdf_size": 3288501, "rating": "5;5;5;8", "confidence": "4;4;4;2", "soundness": "3;3;3;4", "novelty": "3;3;3;3", "presentation": "3;3;2;4", "wc_summary": "146;75;63;105", "wc_strengths": "155;60;94;70", "wc_weaknesses": "339;53;90;6", "wc_questions": "28;87;53;1", "wc_limitations": "1;20;1;1", "wc_review": "669;295;301;183", "wc_reply_reviewers": "45;25;124;0", "wc_reply_authors": "54;72;720;0", "reply_reviewers": "1;1;2;0", "reply_authors": "2;2;3;1", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 97.25, 32.03416145304884 ], "wc_strengths_avg": [ 94.75, 36.91459738369091 ], "wc_weaknesses_avg": [ 122.0, 128.77305618800852 ], "wc_questions_avg": [ 42.25, 31.712576369636068 ], "wc_limitations_avg": [ 5.75, 8.227241335952167 ], "wc_review_avg": [ 362.0, 183.3712082089225 ], "wc_reply_reviewers_avg": [ 48.5, 46.41389878042998 ], "wc_reply_authors_avg": [ 211.5, 294.77576223292175 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8722098015769766107&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;monash.edu;sjtu.edu.cn;lenovo.com;lenovo.com;lenovo.com;;caeit.cetc.com.cn;sjtu.edu.cn", "author_num": 11, "aff_unique_index": "0;0;0;1;0;2;3;4;5;0", "aff_unique_norm": "Shanghai Jiao Tong University;Monash University;Lenovo Group Limited;Lenovo;Lenovo Research;China Electronics Technology Group Corporation", "aff_unique_dep": ";;;Research;AI Lab;Electronic Science Research Institute", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.monash.edu;https://www.lenovo.com;https://www.lenovo.com;https://www.lenovo.com;http://www.cetc.com.cn", "aff_unique_abbr": "SJTU;Monash;Lenovo;Lenovo;Lenovo;CETC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;0;0;0;0;0", "aff_country_unique": "China;Australia" }, { "title": "Robust and Faster Zeroth-Order Minimax Optimization: Complexity and Applications", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/96000", "id": "F8wKoSFSaA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=F8wKoSFSaA", "openreview": "https://openreview.net/forum?id=F8wKoSFSaA", "poster": "/media/PosterPDFs/NeurIPS%202024/96000.png?t=1732015885.7762473", "project": "", "author_site": "Weixin An, Yuanyuan Liu, Fanhua Shang, Hongying Liu", "tldr": "", "abstract": "Many zeroth-order (ZO) optimization algorithms have been developed to solve nonconvex minimax problems in machine learning and computer vision areas. However, existing ZO minimax algorithms have high complexity and rely on some strict restrictive conditions for ZO estimations. To address these issues, we design a new unified ZO gradient descent extragradient ascent (ZO-GDEGA) algorithm, which reduces the overall complexity to $\\mathcal{O}(d\\epsilon^{-6})$ to find an $\\epsilon$-stationary point of the function $\\psi$ for nonconvex-concave (NC-C) problems, where $d$ is the variable dimension. To the best of our knowledge, ZO-GDEGA is the first ZO algorithm with complexity guarantees to solve stochastic NC-C problems. Moreover, ZO-GDEGA requires weaker conditions on the ZO estimations and achieves more robust theoretical results. As a by-product, ZO-GDEGA has advantages on the condition number for the NC-strongly concave case. Experimentally, ZO-GDEGA can generate more effective poisoning attack data with an average accuracy reduction of 5\\%. The improved AUC performance also verifies the robustness of gradient estimations.", "keywords": "zeroth-order algorithms;minimax problems;robustness;complexity;regression", "primary_area": "optimization", "supplementary_material": "/attachment/96f3cba7bd3842975d2e3ae9b7887ae6748230c3.zip", "author": "Weixin An;Yuanyuan Liu;Fanhua Shang;Hongying Liu", "authorids": "~Weixin_An2;~Yuanyuan_Liu1;~Fanhua_Shang2;~Hongying_Liu2", "gender": "M;Not Specified;M;F", "homepage": ";https://dblp.uni-trier.de/pid/97/2119-1.html;https://sites.google.com/site/fanhua217/home;", "dblp": ";97/2119-1;66/9057;43/8776", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/scholar?q=Yuanyuan+Liu;rk_HZTkAAAAJ;S0pp67AAAAAJ", "orcid": ";0000-0001-8646-8533;0000-0002-1040-352X;0000-0001-5961-5569", "linkedin": ";;;", "or_profile": "~Weixin_An2;~Yuanyuan_Liu1;~Fanhua_Shang2;~Hongying_Liu2", "aff": "Xidian University;The Chinese University of Hong Kong;Tianjin University;Tianjin University", "aff_domain": "xidian.edu.cn;cuhk.edu.hk;tju.edu.cn;tju.edu.cn", "position": "PhD student;Postdoc;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nan2024robust,\ntitle={Robust and Faster Zeroth-Order Minimax Optimization: Complexity and Applications},\nauthor={Weixin An and Yuanyuan Liu and Fanhua Shang and Hongying Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=F8wKoSFSaA}\n}", "github": "", "reviewers": "JCfr;A7AZ;4438;hV36;Rz6B", "pdf_size": 1156112, "rating": "5;6;6;6;6", "confidence": "3;4;2;2;5", "soundness": "3;3;3;3;4", "novelty": "2;3;3;3;2", "presentation": "3;3;3;3;3", "wc_summary": "69;60;44;34;84", "wc_strengths": "37;48;8;76;91", "wc_weaknesses": "158;131;38;338;256", "wc_questions": "96;53;5;2;10", "wc_limitations": "14;8;1;2;1", "wc_review": "374;300;96;452;442", "wc_reply_reviewers": "18;25;0;31;162", "wc_reply_authors": "262;11;11;11;678", "reply_reviewers": "1;1;0;1;2", "reply_authors": "3;2;2;2;3", "rating_avg": [ 5.8, 0.39999999999999997 ], "confidence_avg": [ 3.2, 1.16619037896906 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 58.2, 17.735839421916292 ], "wc_strengths_avg": [ 52.0, 29.23696290656743 ], "wc_weaknesses_avg": [ 184.2, 103.63474320902233 ], "wc_questions_avg": [ 33.2, 36.449417004939875 ], "wc_limitations_avg": [ 5.2, 5.114684741017769 ], "wc_review_avg": [ 332.8, 130.36932154460266 ], "wc_reply_reviewers_avg": [ 47.2, 58.3348952171854 ], "wc_reply_authors_avg": [ 194.6, 260.516870854845 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.4, 0.4898979485566356 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.08574929257125444, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ewTtkJYIKKUJ:scholar.google.com/&scioq=Robust+and+Faster+Zeroth-Order+Minimax+Optimization:+Complexity+and+Applications&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": "xidian.edu.cn;cuhk.edu.hk;tju.edu.cn;tju.edu.cn", "author_num": 4, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "Xidian University;Chinese University of Hong Kong;Tianjin University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.xidian.edu.cn/;https://www.cuhk.edu.hk;http://www.tju.edu.cn", "aff_unique_abbr": "Xidian;CUHK;TJU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Accelerating Diffusion Models with Parallel Sampling: Inference at Sub-Linear Time Complexity", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95999", "id": "F9NDzHQtOl", "proceeding": "", "pdf": "https://openreview.net/pdf?id=F9NDzHQtOl", "openreview": "https://openreview.net/forum?id=F9NDzHQtOl", "poster": "/media/PosterPDFs/NeurIPS%202024/95999.png?t=1731309643.2111065", "project": "", "author_site": "Haoxuan Chen, Yinuo Ren, Lexing Ying, Grant Rotskoff", "tldr": "", "abstract": "Diffusion models have become a leading method for generative modeling of both image and scientific data.\nAs these models are costly to train and \\emph{evaluate}, reducing the inference cost for diffusion models remains a major goal.\nInspired by the recent empirical success in accelerating diffusion models via the parallel sampling technique~\\cite{shih2024parallel}, we propose to divide the sampling process into $\\mathcal{O}(1)$ blocks with parallelizable Picard iterations within each block. Rigorous theoretical analysis reveals that our algorithm achieves $\\widetilde{\\mathcal{O}}(\\mathrm{poly} \\log d)$ overall time complexity, marking \\emph{the first implementation with provable sub-linear complexity w.r.t. the data dimension $d$}. Our analysis is based on a generalized version of Girsanov's theorem and is compatible with both the SDE and probability flow ODE implementations. Our results shed light on the potential of fast and efficient sampling of high-dimensional data on fast-evolving modern large-memory GPU clusters.", "keywords": "diffusion model;parallel sampling;stochastic differential equations;probability flow ode", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/24750ec840e8f949f92e9a052c88e83257a9c340.zip", "author": "Haoxuan Chen;Yinuo Ren;Lexing Ying;Grant M. Rotskoff", "authorids": "~Haoxuan_Chen1;~Yinuo_Ren1;~Lexing_Ying1;~Grant_M._Rotskoff1", "gender": "M;M;;M", "homepage": "https://haoxuanstevec00.github.io/;;http://web.stanford.edu/~lexing;https://statmech.stanford.edu", "dblp": "212/7201.html;281/6971;68/3945;220/5367", "google_scholar": "https://scholar.google.com/citations?hl=en;K9MUxuQAAAAJ;OwA3zyMAAAAJ;D6j2WboAAAAJ", "orcid": "0000-0002-8238-2764;;;", "linkedin": "haoxuan-steve-chen-748b0a171/;;;", "or_profile": "~Haoxuan_Chen1;~Yinuo_Ren1;~Lexing_Ying1;~Grant_M._Rotskoff1", "aff": "Stanford University;Stanford University;Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu;stanford.edu", "position": "PhD student;PhD student;Professor;Assistant Professor", "bibtex": "@inproceedings{\nchen2024accelerating,\ntitle={Accelerating Diffusion Models with Parallel Sampling: Inference at Sub-Linear Time Complexity},\nauthor={Haoxuan Chen and Yinuo Ren and Lexing Ying and Grant M. Rotskoff},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=F9NDzHQtOl}\n}", "github": "", "reviewers": "FgeD;jWja;GP1D;sp2J", "pdf_size": 353731, "rating": "6;7;7;8", "confidence": "4;4;4;4", "soundness": "3;3;3;4", "novelty": "2;3;3;4", "presentation": "2;3;4;4", "wc_summary": "55;97;175;133", "wc_strengths": "21;61;136;56", "wc_weaknesses": "170;115;397;72", "wc_questions": "5;2;206;43", "wc_limitations": "9;1;115;6", "wc_review": "260;276;1029;310", "wc_reply_reviewers": "41;0;86;0", "wc_reply_authors": "41;0;78;0", "reply_reviewers": "1;0;1;0", "reply_authors": "2;1;2;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 115.0, 44.294469180700204 ], "wc_strengths_avg": [ 68.5, 41.907636535600524 ], "wc_weaknesses_avg": [ 188.5, 125.28866668617717 ], "wc_questions_avg": [ 64.0, 83.56135470419325 ], "wc_limitations_avg": [ 32.75, 47.57297026673865 ], "wc_review_avg": [ 468.75, 323.9640219221881 ], "wc_reply_reviewers_avg": [ 31.75, 35.51320177060919 ], "wc_reply_authors_avg": [ 29.75, 32.4990384473141 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7965878340489143549&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 7, "email": "stanford.edu;stanford.edu;stanford.edu;stanford.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "SAM-Guided Masked Token Prediction for 3D Scene Understanding", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95998", "id": "F9i1avQTla", "proceeding": "", "pdf": "https://openreview.net/pdf?id=F9i1avQTla", "openreview": "https://openreview.net/forum?id=F9i1avQTla", "poster": "/media/PosterPDFs/NeurIPS%202024/95998.png?t=1731288437.116134", "project": "", "author_site": "Zhimin Chen, Liang Yang, Yingwei Li, Longlong Jing, Bing Li", "tldr": "", "abstract": "Foundation models have significantly enhanced 2D task performance, and recent works like Bridge3D have successfully applied these models to improve 3D scene understanding through knowledge distillation, marking considerable advancements. Nonetheless, challenges such as the misalignment between 2D and 3D representations and the persistent long-tail distribution in 3D datasets still restrict the effectiveness of knowledge distillation from 2D to 3D using foundation models. To tackle these issues, we introduce a novel SAM-guided tokenization method that seamlessly aligns 3D transformer structures with region-level knowledge distillation, replacing the traditional KNN-based tokenization techniques. Additionally, we implement a group-balanced re-weighting strategy to effectively address the long-tail problem in knowledge distillation. Furthermore, inspired by the recent success of masked feature prediction, our framework incorporates a two-stage masked token prediction process in which the student model predicts both the global embeddings and token-wise local embeddings derived from the teacher models trained in the first stage. Our methodology has been validated across multiple datasets, including SUN RGB-D, ScanNet, and S3DIS, for tasks like 3D object detection and semantic segmentation. The results demonstrate significant improvements over current state-of-the-art self-supervised methods, establishing new benchmarks in this field.", "keywords": "3D self-supervised learning;Multi-modal Representation Learning;Masked autoencoders;Knowledge distillation;Foundation models", "primary_area": "other", "supplementary_material": "", "author": "Zhimin Chen;Liang Yang;Yingwei Li;Longlong Jing;Bing Li", "authorids": "~Zhimin_Chen1;~Liang_Yang4;~Yingwei_Li4;~Longlong_Jing1;~Bing_Li4", "gender": "M;M;M;M;M", "homepage": "https://zhiminc.website;https://ericlyang.github.io;http://yingwei.li/;https://longlong-jing.github.io/;http://cecas.clemson.edu/bingli", "dblp": ";;;214/9050;13/2692-8", "google_scholar": "OIYNwLkAAAAJ;jV_fdF0AAAAJ;phWmJeIAAAAJ;lhdhi5wAAAAJ;yysOczkAAAAJ", "orcid": ";0000-0002-3454-6242;;;0000-0003-4987-6129", "linkedin": ";;;;", "or_profile": "~Zhimin_Chen1;~Liang_Yang4;~Yingwei_Li4;~Longlong_Jing1;~Bing_Li4", "aff": "Clemson University;Apple;Waymo LLC;Waymo LLC;Clemson University", "aff_domain": "clemson.edu;apple.com;waymo.com;waymo.com;clemson.edu", "position": "PhD student;Researcher;Researcher;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nchen2024samguided,\ntitle={{SAM}-Guided Masked Token Prediction for 3D Scene Understanding},\nauthor={Zhimin Chen and Liang Yang and Yingwei Li and Longlong Jing and Bing Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=F9i1avQTla}\n}", "github": "", "reviewers": "icGN;DTv4;KyqU;EQZo", "pdf_size": 911688, "rating": "5;5;5;7", "confidence": "5;4;5;3", "soundness": "3;3;2;3", "novelty": "2;3;2;3", "presentation": "3;2;3;2", "wc_summary": "50;66;55;40", "wc_strengths": "29;39;9;69", "wc_weaknesses": "109;42;155;86", "wc_questions": "83;44;4;38", "wc_limitations": "3;8;1;3", "wc_review": "274;199;224;236", "wc_reply_reviewers": "48;11;10;0", "wc_reply_authors": "160;22;22;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 52.75, 9.364160400164021 ], "wc_strengths_avg": [ 36.5, 21.650635094610966 ], "wc_weaknesses_avg": [ 98.0, 40.773766075750224 ], "wc_questions_avg": [ 42.25, 28.039035290109393 ], "wc_limitations_avg": [ 3.75, 2.5860201081971503 ], "wc_review_avg": [ 233.25, 27.049722734253674 ], "wc_reply_reviewers_avg": [ 17.25, 18.267115262131565 ], "wc_reply_authors_avg": [ 51.0, 63.56886030125127 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5014111402708559820&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "clemson.edu;apple.com;waymo.com;waymo.com;clemson.edu", "author_num": 5, "aff_unique_index": "0;1;2;2;0", "aff_unique_norm": "Clemson University;Apple;Waymo", "aff_unique_dep": ";Apple Inc.;", "aff_unique_url": "https://www.clemson.edu;https://www.apple.com;https://www.waymo.com", "aff_unique_abbr": "Clemson;Apple;Waymo", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Newton Informed Neural Operator for Solving Nonlinear Partial Differential Equations", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95997", "id": "F9mNL6vR27", "proceeding": "", "pdf": "https://openreview.net/pdf?id=F9mNL6vR27", "openreview": "https://openreview.net/forum?id=F9mNL6vR27", "poster": "/media/PosterPDFs/NeurIPS%202024/95997.png?t=1731611246.0292618", "project": "", "author_site": "Wenrui Hao, Xinliang Liu, Yahong Yang", "tldr": "", "abstract": "Solving nonlinear partial differential equations (PDEs) with multiple solutions is essential in various fields, including physics, biology, and engineering. However, traditional numerical methods, such as finite element and finite difference methods, often face challenges when dealing with nonlinear solvers, particularly in the presence of multiple solutions. These methods can become computationally expensive, especially when relying on solvers like Newton's method, which may struggle with ill-posedness near bifurcation points.\nIn this paper, we propose a novel approach, the Newton Informed Neural Operator, which learns the Newton solver for nonlinear PDEs. Our method integrates traditional numerical techniques with the Newton nonlinear solver, efficiently learning the nonlinear mapping at each iteration. This approach allows us to compute multiple solutions in a single learning process while requiring fewer supervised data points than existing neural network methods.", "keywords": "operator Learning;nonlinear partial differential equations;multiple solutions;Newton method", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "/attachment/77c36616ad1875d6d4bf3005a6107fc01cfccfba.zip", "author": "Wenrui Hao;Xinliang Liu;Yahong Yang", "authorids": "~Wenrui_Hao1;~Xinliang_Liu1;~Yahong_Yang1", "gender": ";M;M", "homepage": "https://sites.psu.edu/whao/;https://cemse.kaust.edu.sa/scml/people/person/xinliang-liu;", "dblp": ";67/10364;", "google_scholar": "7x6CVcAAAAAJ;9AsSTc4AAAAJ;", "orcid": ";;0000-0002-9721-2362", "linkedin": ";;", "or_profile": "~Wenrui_Hao1;~Xinliang_Liu1;~Yahong_Yang1", "aff": "Pennsylvania State University;King Abdullah University of Science and Technology;Pennsylvania State University", "aff_domain": "psu.edu;kaust.edu.sa;psu.edu", "position": "Associate Professor;Postdoc;Postdoc", "bibtex": "@inproceedings{\nhao2024newton,\ntitle={Newton Informed Neural Operator for Computing Multiple Solutions of Nonlinear Partials Differential Equations},\nauthor={Wenrui Hao and Xinliang Liu and Yahong Yang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=F9mNL6vR27}\n}", "github": "", "reviewers": "3WGd;tGuq;6H5E;zTaB", "pdf_size": 1286268, "rating": "5;5;5;6", "confidence": "4;2;3;4", "soundness": "3;2;3;2", "novelty": "4;2;3;3", "presentation": "2;2;3;1", "wc_summary": "65;85;112;94", "wc_strengths": "51;21;78;113", "wc_weaknesses": "137;43;221;487", "wc_questions": "327;315;254;114", "wc_limitations": "18;1;1;26", "wc_review": "598;465;666;834", "wc_reply_reviewers": "25;524;104;101", "wc_reply_authors": "176;714;340;34", "reply_reviewers": "1;3;1;1", "reply_authors": "2;5;2;2", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 89.0, 16.926310879810757 ], "wc_strengths_avg": [ 65.75, 33.92178503557854 ], "wc_weaknesses_avg": [ 222.0, 165.44787698849447 ], "wc_questions_avg": [ 252.5, 84.61826044064011 ], "wc_limitations_avg": [ 11.5, 10.874281585465774 ], "wc_review_avg": [ 640.75, 132.94618272067837 ], "wc_reply_reviewers_avg": [ 188.5, 196.2708587641069 ], "wc_reply_authors_avg": [ 316.0, 254.01968427663238 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15503103576324999427&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "psu.edu;kaust.edu.sa;psu.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Pennsylvania State University;King Abdullah University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.psu.edu;https://www.kast.kau.edu.sa", "aff_unique_abbr": "PSU;KAUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;Saudi Arabia" }, { "title": "Segmenting Watermarked Texts From Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95996", "id": "FAuFpGeLmx", "proceeding": "", "pdf": "https://openreview.net/pdf?id=FAuFpGeLmx", "openreview": "https://openreview.net/forum?id=FAuFpGeLmx", "poster": "/media/PosterPDFs/NeurIPS%202024/95996.png?t=1731461257.0551245", "project": "", "author_site": "Xingchi Li, Guanxun Li, Xianyang Zhang", "tldr": "", "abstract": "Watermarking is a technique that involves embedding nearly unnoticeable statistical signals within generated content to help trace its source. This work focuses on a scenario where an untrusted third-party user sends prompts to a trusted language model (LLM) provider, who then generates a text from their LLM with a watermark. This setup makes it possible for a detector to later identify the source of the text if the user publishes it. The user can modify the generated text by substitutions, insertions, or deletions. Our objective is to develop a statistical method to detect if a published text is LLM-generated from the perspective of a detector. We further propose a methodology to segment the published text into watermarked and non-watermarked sub-strings. The proposed approach is built upon randomization tests and change point detection techniques. We demonstrate that our method ensures Type I and Type II error control and can accurately identify watermarked sub-strings by finding the corresponding change point locations. To validate our technique, we apply it to texts generated by several language models with prompts extracted from Google's C4 dataset and obtain encouraging numerical results. We release all code publicly at https://github.com/doccstat/llm-watermark-cpd.", "keywords": "Large language models;Randomization test;Segmentation;Watermark", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/57c6deb273bf1516cc1a6ab474938b12c2664c0a.zip", "author": "Xingchi Li;Guanxun Li;Xianyang Zhang", "authorids": "~Xingchi_Li1;~Guanxun_Li1;~Xianyang_Zhang1", "gender": ";M;M", "homepage": ";http://www.guanxun.li;https://zhangxiany-tamu.github.io/", "dblp": ";295/6757;", "google_scholar": ";jxFkCp8AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0002-2449-4469;", "linkedin": ";;", "or_profile": "~Xingchi_Li1;~Guanxun_Li1;~Xianyang_Zhang1", "aff": ";Texas A&M University - College Station;Texas A&M University - College Station", "aff_domain": ";tamu.edu;tamu.edu", "position": ";Postdoc;Full Professor", "bibtex": "@inproceedings{\nli2024segmenting,\ntitle={Segmenting Watermarked Texts From Language Models},\nauthor={Xingchi Li and Guanxun Li and Xianyang Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=FAuFpGeLmx}\n}", "github": "", "reviewers": "XFsq;LBBH;SyZV", "pdf_size": 1529809, "rating": "5;6;7", "confidence": "2;3;4", "soundness": "3;3;4", "novelty": "3;3;3", "presentation": "2;3;4", "wc_summary": "177;89;53", "wc_strengths": "58;63;72", "wc_weaknesses": "132;290;87", "wc_questions": "23;80;27", "wc_limitations": "10;22;4", "wc_review": "400;544;243", "wc_reply_reviewers": "239;68;32", "wc_reply_authors": "441;69;33", "reply_reviewers": "2;1;1", "reply_authors": "3;2;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 106.33333333333333, 52.085399958998956 ], "wc_strengths_avg": [ 64.33333333333333, 5.792715732327588 ], "wc_weaknesses_avg": [ 169.66666666666666, 87.04915597267762 ], "wc_questions_avg": [ 43.333333333333336, 25.978623691198287 ], "wc_limitations_avg": [ 12.0, 7.483314773547883 ], "wc_review_avg": [ 395.6666666666667, 122.92093212937964 ], "wc_reply_reviewers_avg": [ 113.0, 90.29950165975447 ], "wc_reply_authors_avg": [ 181.0, 184.43427013437605 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=175754569598122207&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": ";tamu.edu;tamu.edu", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Texas A&M University", "aff_unique_dep": "", "aff_unique_url": "https://www.tamu.edu", "aff_unique_abbr": "TAMU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "College Station", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Dimension-free deterministic equivalents and scaling laws for random feature regression", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95995", "id": "FBLJIfW64D", "proceeding": "", "pdf": "https://openreview.net/pdf?id=FBLJIfW64D", "openreview": "https://openreview.net/forum?id=FBLJIfW64D", "poster": "/media/PosterPDFs/NeurIPS%202024/95995.png?t=1732110678.0163577", "project": "", "author_site": "Leonardo Defilippis, Bruno Loureiro, Theodor Misiakiewicz", "tldr": "", "abstract": "In this work we investigate the generalization performance of random feature ridge regression (RFRR). Our main contribution is a general deterministic equivalent for the test error of RFRR. Specifically, under a certain concentration property, we show that the test error is well approximated by a closed-form expression that only depends on the feature map eigenvalues. Notably, our approximation guarantee is non-asymptotic, multiplicative, and independent of the feature map dimension---allowing for infinite-dimensional features. We expect this deterministic equivalent to hold broadly beyond our theoretical analysis, and we empirically validate its predictions on various real and synthetic datasets. As an application, we derive sharp excess error rates under standard power-law assumptions of the spectrum and target decay. In particular, we provide a tight result for the smallest number of features achieving optimal minimax error rate.", "keywords": "random features;deterministic equivalents;error rates;random matrix theory;scaling laws", "primary_area": "learning_theory", "supplementary_material": "", "author": "Leonardo Defilippis;Bruno Loureiro;Theodor Misiakiewicz", "authorids": "~Leonardo_Defilippis1;~Bruno_Loureiro1;~Theodor_Misiakiewicz1", "gender": "M;M;", "homepage": ";https://brloureiro.github.io/;https://misiakie.github.io", "dblp": "358/3529;207/1834;168/8360", "google_scholar": "https://scholar.google.fr/citations?user=-df-QMIAAAAJ;DXl3ir8AAAAJ;E8Jst30AAAAJ", "orcid": ";0000-0002-6327-4688;", "linkedin": ";bruno-loureiro-43183b14a/;", "or_profile": "~Leonardo_Defilippis1;~Bruno_Loureiro1;~Theodor_Misiakiewicz1", "aff": "Ecole Normale Sup\u00e9rieure, Ecole Normale Sup\u00e9rieure de Paris;Ecole Normale Sup\u00e9rieure, Ecole Normale Sup\u00e9rieure de Paris;Toyota Technological Institute at Chicago", "aff_domain": "di.ens.fr;di.ens.fr;ttic.edu", "position": "PhD student;Researcher;Postdoc", "bibtex": "@inproceedings{\ndefilippis2024dimensionfree,\ntitle={Dimension-free deterministic equivalents and scaling laws for random feature regression},\nauthor={Leonardo Defilippis and Bruno Loureiro and Theodor Misiakiewicz},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=FBLJIfW64D}\n}", "github": "", "reviewers": "Gqjy;gFLF;b7dx;tRLP", "pdf_size": 1017740, "rating": "5;6;6;7", "confidence": "3;4;4;4", "soundness": "3;4;4;4", "novelty": "3;2;3;4", "presentation": "2;3;3;4", "wc_summary": "47;89;112;74", "wc_strengths": "92;189;75;133", "wc_weaknesses": "686;71;157;20", "wc_questions": "7;158;316;12", "wc_limitations": "13;18;1;7", "wc_review": "845;525;661;246", "wc_reply_reviewers": "87;57;40;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 80.5, 23.606143268225754 ], "wc_strengths_avg": [ 122.25, 43.928208477013946 ], "wc_weaknesses_avg": [ 233.5, 265.79926636467604 ], "wc_questions_avg": [ 123.25, 126.73865826968502 ], "wc_limitations_avg": [ 9.75, 6.378675411086537 ], "wc_review_avg": [ 569.25, 218.4632406149831 ], "wc_reply_reviewers_avg": [ 46.0, 31.44041984452498 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10795494223363311850&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 2, "email": "di.ens.fr;di.ens.fr;ttic.edu", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Ecole Normale Sup\u00e9rieure de Paris;Toyota Technological Institute at Chicago", "aff_unique_dep": ";", "aff_unique_url": "https://www.ens.psl.eu;https://www.tti-chicago.org", "aff_unique_abbr": "ENS Paris;TTI Chicago", "aff_campus_unique_index": "0;0;1", "aff_campus_unique": "Paris;Chicago", "aff_country_unique_index": "0;0;1", "aff_country_unique": "France;United States" }, { "title": "Masked Hard-Attention Transformers Recognize Exactly the Star-Free Languages", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95994", "id": "FBMsBdH0yz", "proceeding": "", "pdf": "https://openreview.net/pdf?id=FBMsBdH0yz", "openreview": "https://openreview.net/forum?id=FBMsBdH0yz", "poster": "/media/PosterPDFs/NeurIPS%202024/95994.png?t=1733264595.4434388", "project": "", "author_site": "Andy Yang, David Chiang, Dana Angluin", "tldr": "", "abstract": "The expressive power of transformers over inputs of unbounded size can be studied through their ability to recognize classes of formal languages. In this paper, we establish exact characterizations of transformers with hard attention (in which all attention is focused on exactly one position) and attention masking (in which each position only attends to positions on one side). With strict masking (each position cannot attend to itself) and without position embeddings, these transformers are expressively equivalent to linear temporal logic (LTL), which defines exactly the star-free languages. A key technique is the use of Boolean RASP as a convenient intermediate language between transformers and LTL. We then take numerous results known for LTL and apply them to transformers, showing how position embeddings, strict masking, and depth all increase expressive power.", "keywords": "Transformers;Formal Language Theory;Logic;Automata;Expressivity", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Andy Yang;David Chiang;Dana Angluin", "authorids": "~Andy_Yang1;~David_Chiang1;~Dana_Angluin1", "gender": ";M;F", "homepage": ";https://nd.edu/~dchiang;http://cpsc.yale.edu/people/dana-angluin/", "dblp": ";https://dblp.org/pers/hd/c/Chiang_0001:David;", "google_scholar": ";dok0514AAAAJ;https://scholar.google.com.tw/citations?user=bxi6JXYAAAAJ", "orcid": ";0000-0002-0435-4864;", "linkedin": ";;", "or_profile": "~Andy_Yang1;~David_Chiang1;~Dana_Angluin1", "aff": ";University of Notre Dame;Yale University", "aff_domain": ";nd.edu;yale.edu", "position": ";Associate Professor;Emeritus", "bibtex": "@inproceedings{\nyang2024masked,\ntitle={Masked Hard-Attention Transformers Recognize Exactly the Star-Free Languages},\nauthor={Andy Yang and David Chiang and Dana Angluin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=FBMsBdH0yz}\n}", "github": "", "reviewers": "rKKi;6BGB;6xK6;u2XQ", "pdf_size": 411195, "rating": "6;6;6;7", "confidence": "4;3;5;3", "soundness": "3;3;4;3", "novelty": "2;3;3;3", "presentation": "2;3;2;4", "wc_summary": "212;87;104;184", "wc_strengths": "56;48;72;93", "wc_weaknesses": "371;91;111;96", "wc_questions": "134;235;55;1", "wc_limitations": "39;21;7;13", "wc_review": "812;482;349;387", "wc_reply_reviewers": "70;116;48;15", "wc_reply_authors": "24;17;6;8", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 146.75, 52.54224490826405 ], "wc_strengths_avg": [ 67.25, 17.195566289017645 ], "wc_weaknesses_avg": [ 167.25, 117.86512418862503 ], "wc_questions_avg": [ 106.25, 88.10611499776846 ], "wc_limitations_avg": [ 20.0, 12.041594578792296 ], "wc_review_avg": [ 507.5, 182.35473670842774 ], "wc_reply_reviewers_avg": [ 62.25, 36.69042790701684 ], "wc_reply_authors_avg": [ 13.75, 7.224091638399945 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5761006184899010439&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 3, "email": ";nd.edu;yale.edu", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "University of Notre Dame;Yale University", "aff_unique_dep": ";", "aff_unique_url": "https://www.nd.edu;https://www.yale.edu", "aff_unique_abbr": "Notre Dame;Yale", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Rainbow Teaming: Open-Ended Generation of Diverse Adversarial Prompts", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95993", "id": "FCsEvaMorw", "proceeding": "", "pdf": "https://openreview.net/pdf?id=FCsEvaMorw", "openreview": "https://openreview.net/forum?id=FCsEvaMorw", "poster": "/media/PosterPDFs/NeurIPS%202024/95993.png?t=1733437645.7459922", "project": "", "author_site": "Mikayel Samvelyan, Sharath Chandra Raparthy, Andrei Lupu, Eric Hambro, Aram Markosyan, Manish Bhatt, Yuning Mao, Minqi Jiang, Jack Parker-Holder, Jakob Foerster, Tim Rockt\u00e4schel, Roberta Raileanu", "tldr": "", "abstract": "As large language models (LLMs) become increasingly prevalent across many real-world applications, understanding and enhancing their robustness to adversarial attacks is of paramount importance. Existing methods for identifying adversarial prompts tend to focus on specific domains, lack diversity, or require extensive human annotations. To address these limitations, we present Rainbow Teaming, a novel black-box approach for producing a diverse collection of adversarial prompts. Rainbow Teaming casts adversarial prompt generation as a quality-diversity problem and uses open-ended search to generate prompts that are both effective and diverse. Focusing on the safety domain, we use Rainbow Teaming to target various state-of-the-art LLMs, including the Llama 2 and Llama 3 models. Our approach reveals hundreds of effective adversarial prompts, with an attack success rate exceeding 90% across all tested models. Furthermore, we demonstrate that prompts generated by Rainbow Teaming are highly transferable and that fine-tuning models with synthetic data generated by our method significantly enhances their safety without sacrificing general performance or helpfulness. We additionally explore the versatility of Rainbow Teaming by applying it to question answering and cybersecurity, showcasing its potential to drive robust open-ended self-improvement in a wide range of applications.", "keywords": "open-endedness;adversarial robustness;safety", "primary_area": "generative_models", "supplementary_material": "", "author": "Mikayel Samvelyan;Sharath Chandra Raparthy;Andrei Lupu;Eric Hambro;Aram H. Markosyan;Manish Bhatt;Yuning Mao;Minqi Jiang;Jack Parker-Holder;Jakob Nicolaus Foerster;Tim Rockt\u00e4schel;Roberta Raileanu", "authorids": "~Mikayel_Samvelyan1;~Sharath_Chandra_Raparthy3;~Andrei_Lupu1;~Eric_Hambro1;~Aram_H._Markosyan1;~Manish_Bhatt1;~Yuning_Mao1;~Minqi_Jiang1;~Jack_Parker-Holder1;~Jakob_Nicolaus_Foerster1;~Tim_Rockt\u00e4schel1;~Roberta_Raileanu2", "gender": "M;M;M;M;M;;M;M;M;M;F;M", "homepage": "https://www.samvelyan.com/;https://sharathraparthy.github.io/;;https://erichambro.com/;https://scholar.google.com/citations?hl=en&user=Vo6IXHcAAAAJ&view_op=list_works&sortby=pubdate;https://morningmoni.github.io/;https://twitter.com/minqijiang;https://jparkerholder.github.io/;https://www.jakobfoerster.com;https://www.markosyanaram.com/;https://rraileanu.github.io/;http://rockt.ai", "dblp": "170/0101;302/4190;218/7027;290/1986;;178/3692;270/7949;237/9793.html;176/5095;321/0886;215/5579;43/11537", "google_scholar": "2Qs19WAAAAAJ;https://scholar.google.ca/citations?user=S1R0_UMAAAAJ;I6aB-YUAAAAJ;ehquBPIAAAAJ;https://scholar.google.com/citations?hl=en;steJe6IAAAAJ;;;6z4lQzMAAAAJ;NugC7bQAAAAJ;9hVXpJ0AAAAJ;https://scholar.google.co.uk/citations?user=mWBY8aIAAAAJ", "orcid": "0009-0001-6748-8755;;;;;;;;;;;", "linkedin": "samvelyan;;lupu-andrei;eric-hambro;;morningmoni/;minqi-jiang-585a6536/;;;markosyanaram/;roberta-raileanu-44b25660/;rockt/", "or_profile": "~Mikayel_Samvelyan1;~Sharath_Chandra_Raparthy3;~Andrei_Lupu1;~Eric_Hambro1;~Manish_Bhatt1;~Yuning_Mao1;~Minqi_Jiang1;~Jack_Parker-Holder1;~Jakob_Nicolaus_Foerster1;~Aram_Markosyan1;~Roberta_Raileanu1;~Tim_Rocktaeschel1", "aff": "Meta (FAIR);Meta Facebook;Meta AI;Anthropic;AI Security ;Meta;Google;Google DeepMind;University of Oxford, University of Oxford;Meta Facebook;Meta Facebook;Google DeepMind", "aff_domain": "fb.com;fb.com;meta.com;anthropic.com;gmail.com;meta.com;google.com;google.com;eng.ox.ac.uk;meta.com;fb.com;google.com", "position": "Research Assistant;Researcher;Researcher;Researcher;Researcher;Researcher;Researcher;Researcher;Associate Professor;Researcher;Researcher;Senior Staff Research Scientist", "bibtex": "@inproceedings{\nsamvelyan2024rainbow,\ntitle={Rainbow Teaming: Open-Ended Generation of Diverse Adversarial Prompts},\nauthor={Mikayel Samvelyan and Sharath Chandra Raparthy and Andrei Lupu and Eric Hambro and Aram H. Markosyan and Manish Bhatt and Yuning Mao and Minqi Jiang and Jack Parker-Holder and Jakob Nicolaus Foerster and Tim Rockt{\\\"a}schel and Roberta Raileanu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=FCsEvaMorw}\n}", "github": "", "reviewers": "sYeN;DdX9;hknM;w9N7;E172", "pdf_size": 3540938, "rating": "3;4;4;7;8", "confidence": "4;4;3;5;4", "soundness": "2;2;3;3;3", "novelty": "2;2;3;4;3", "presentation": "4;3;3;4;4", "wc_summary": "127;51;49;58;255", "wc_strengths": "44;17;37;40;382", "wc_weaknesses": "324;118;10;329;253", "wc_questions": "70;3;52;28;152", "wc_limitations": "1;1;93;8;33", "wc_review": "566;190;241;463;1075", "wc_reply_reviewers": "627;0;131;298;0", "wc_reply_authors": "1716;152;266;290;970", "reply_reviewers": "3;0;2;3;0", "reply_authors": "5;2;4;4;2", "rating_avg": [ 5.2, 1.9390719429665317 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.6, 0.4898979485566356 ], "wc_summary_avg": [ 108.0, 78.99367063252599 ], "wc_strengths_avg": [ 104.0, 139.3111625104033 ], "wc_weaknesses_avg": [ 206.8, 124.43375747762342 ], "wc_questions_avg": [ 61.0, 50.785824793932406 ], "wc_limitations_avg": [ 27.2, 34.94223805081752 ], "wc_review_avg": [ 507.0, 316.00189872847284 ], "wc_reply_reviewers_avg": [ 211.2, 234.97523273741 ], "wc_reply_authors_avg": [ 678.8, 593.2400525925402 ], "reply_reviewers_avg": [ 1.6, 1.3564659966250538 ], "reply_authors_avg": [ 3.4, 1.2000000000000002 ], "replies_avg": [ 32, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": 0.48924605479008165, "gs_citation": 71, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18363054933945064477&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 5, "email": "fb.com;fb.com;meta.com;anthropic.com;gmail.com;meta.com;google.com;google.com;eng.ox.ac.uk;meta.com;fb.com;google.com", "author_num": 12, "aff_unique_index": "0;0;0;1;2;0;3;3;4;0;0;3", "aff_unique_norm": "Meta;Anthropic;AI Security;Google;University of Oxford", "aff_unique_dep": "FAIR;;;Google;", "aff_unique_url": "https://meta.org;https://www.anthropic.com;;https://www.google.com;https://www.ox.ac.uk", "aff_unique_abbr": "Meta;Anthropic;;Google;Oxford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;0;0;2;2;0;0;2", "aff_country_unique": "United States;;United Kingdom" }, { "title": "DoFIT: Domain-aware Federated Instruction Tuning with Alleviated Catastrophic Forgetting", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95992", "id": "FDfrPugkGU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=FDfrPugkGU", "openreview": "https://openreview.net/forum?id=FDfrPugkGU", "poster": "/media/PosterPDFs/NeurIPS%202024/95992.png?t=1730821247.302524", "project": "", "author_site": "Binqian Xu, Xiangbo Shu, Haiyang Mei, Zechen Bai, Basura Fernando, Mike Zheng Shou, Jinhui Tang", "tldr": "", "abstract": "Federated Instruction Tuning (FIT) advances collaborative training on decentralized data, crucially enhancing model's capability and safeguarding data privacy. However, existing FIT methods are dedicated to handling data heterogeneity across different clients (i.e., client-aware data heterogeneity), while ignoring the variation between data from different domains (i.e., domain-aware data heterogeneity). When scarce data needs supplementation from related fields, these methods lack the ability to handle domain heterogeneity in cross-domain training. This leads to domain-information catastrophic forgetting in collaborative training and therefore makes model perform sub-optimally on the individual domain. To address this issue, we introduce DoFIT, a new Domain-aware FIT framework that alleviates catastrophic forgetting through two new designs. First, to reduce interference information from the other domain, DoFIT finely aggregates overlapping weights across domains on the inter-domain server side. Second, to retain more domain information, DoFIT initializes intra-domain weights by incorporating inter-domain information into a less-conflicted parameter space. Experimental results on diverse datasets consistently demonstrate that DoFIT excels in cross-domain collaborative training and exhibits significant advantages over conventional FIT methods in alleviating catastrophic forgetting. Code is available at [this link](https://github.com/1xbq1/DoFIT).", "keywords": "Federated Instruction Tuning;LLM;LoRA", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Binqian Xu;Xiangbo Shu;Haiyang Mei;Zechen Bai;Basura Fernando;Mike Zheng Shou;Jinhui Tang", "authorids": "~Binqian_Xu1;~Xiangbo_Shu1;~Haiyang_Mei1;~Zechen_Bai1;~Basura_Fernando1;~Mike_Zheng_Shou1;~Jinhui_Tang1", "gender": "F;M;M;M;M;M;", "homepage": "https://1xbq1.github.io/;https://shuxb104.github.io/;https://mhaiyang.github.io/;https://www.baizechen.site/;https://basurafernando.github.io/;https://imag-njust.net/jinhui-tang/;http://www.columbia.edu/~zs2262/", "dblp": "321/8328;169/3410;234/9586;256/5272;01/9558;75/1030;284/0807", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;FQfcm5oAAAAJ;yfq6OSkAAAAJ;aIdQ8GwAAAAJ;https://scholar.google.com.au/citations?user=GyvseMkAAAAJ;;h1-3lSoAAAAJ", "orcid": "0000-0003-3904-1400;;0000-0003-3549-9684;;0000-0002-6920-9916;;", "linkedin": ";;haiyang-mei-9832b622a/;;;;", "or_profile": "~Binqian_Xu1;~Xiangbo_Shu1;~Haiyang_Mei1;~Zechen_Bai1;~Basura_Fernando1;~Jinhui_Tang1;~Zheng_Shou1", "aff": "Nanjing University of Science and Technology;Nanjing University of Science and Technology;National University of Singapore;National University of Singapore;A*STAR;Nanjing University of Science and Technology;National University of Singapore", "aff_domain": "njust.edu.cn;njust.edu.cn;nus.edu.sg;u.nus.edu;astar.edu.sg;njust.edu.cn;nus.edu.sg", "position": "PhD student;Full Professor;Postdoc;PhD student;Principal Researcher;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nxu2024dofit,\ntitle={Do{FIT}: Domain-aware Federated Instruction Tuning with Alleviated Catastrophic Forgetting},\nauthor={Binqian Xu and Xiangbo Shu and Haiyang Mei and Zechen Bai and Basura Fernando and Mike Zheng Shou and Jinhui Tang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=FDfrPugkGU}\n}", "github": "", "reviewers": "zbxm;Vje9;DeWw;xCSC;ZeZJ", "pdf_size": 1665310, "rating": "4;6;7;7;8", "confidence": "4;4;5;5;4", "soundness": "2;3;3;4;3", "novelty": "2;3;3;3;3", "presentation": "2;2;3;3;3", "wc_summary": "72;87;100;94;120", "wc_strengths": "16;116;64;55;58", "wc_weaknesses": "270;54;125;63;73", "wc_questions": "65;19;8;80;69", "wc_limitations": "2;7;14;5;7", "wc_review": "425;283;311;297;327", "wc_reply_reviewers": "0;0;72;0;66", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;1;0;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.4, 1.3564659966250536 ], "confidence_avg": [ 4.4, 0.48989794855663565 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 94.6, 15.768322675541619 ], "wc_strengths_avg": [ 61.8, 31.939943644283407 ], "wc_weaknesses_avg": [ 117.0, 80.36665975390541 ], "wc_questions_avg": [ 48.2, 28.964806231010762 ], "wc_limitations_avg": [ 7.0, 3.9496835316262997 ], "wc_review_avg": [ 328.6, 50.365067258964324 ], "wc_reply_reviewers_avg": [ 27.6, 33.85616635119813 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.36115755925730764, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:vQzt5VEmYtEJ:scholar.google.com/&scioq=DoFIT:+Domain-aware+Federated+Instruction+Tuning+with+Alleviated+Catastrophic+Forgetting&hl=en&as_sdt=0,31", "gs_version_total": 2, "email": "njust.edu.cn;njust.edu.cn;nus.edu.sg;u.nus.edu;astar.edu.sg;njust.edu.cn;nus.edu.sg", "author_num": 7, "aff_unique_index": "0;0;1;1;2;0;1", "aff_unique_norm": "Nanjing University of Science and Technology;National University of Singapore;Agency for Science, Technology and Research", "aff_unique_dep": ";;", "aff_unique_url": "http://www.nust.edu.cn/;https://www.nus.edu.sg;https://www.a-star.edu.sg", "aff_unique_abbr": "NUST;NUS;A*STAR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1;1;0;1", "aff_country_unique": "China;Singapore" }, { "title": "Rethinking the Capacity of Graph Neural Networks for Branching Strategy", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95991", "id": "FEmag0szWo", "proceeding": "", "pdf": "https://openreview.net/pdf?id=FEmag0szWo", "openreview": "https://openreview.net/forum?id=FEmag0szWo", "poster": "/media/PosterPDFs/NeurIPS%202024/95991.png?t=1733711619.9412918", "project": "", "author_site": "Ziang Chen, Jialin Liu, Xiaohan Chen, Wang, Wotao Yin", "tldr": "", "abstract": "Graph neural networks (GNNs) have been widely used to predict properties and heuristics of mixed-integer linear programs (MILPs) and hence accelerate MILP solvers. This paper investigates the capacity of GNNs to represent strong branching (SB), the most effective yet computationally expensive heuristic employed in the branch-and-bound algorithm. In the literature, message-passing GNN (MP-GNN), as the simplest GNN structure, is frequently used as a fast approximation of SB and we find that not all MILPs's SB can be represented with MP-GNN. We precisely define a class of \"MP-tractable\" MILPs for which MP-GNNs can accurately approximate SB scores. Particularly, we establish a universal approximation theorem: for any data distribution over the MP-tractable class, there always exists an MP-GNN that can approximate the SB score with arbitrarily high accuracy and arbitrarily high probability, which lays a theoretical foundation of the existing works on imitating SB with MP-GNN. For MILPs without the MP-tractability, unfortunately, a similar result is impossible, which can be illustrated by two MILP instances with different SB scores that cannot be distinguished by any MP-GNN, regardless of the number of parameters. Recognizing this, we explore another GNN structure called the second-order folklore GNN (2-FGNN) that overcomes this limitation, and the aforementioned universal approximation theorem can be extended to the entire MILP space using 2-FGNN, regardless of the MP-tractability. A small-scale numerical experiment is conducted to directly validate our theoretical findings.", "keywords": "Mixed-integer linear programs;strong branching;message-passing graph neural networks;second-order folklore graph neural networks", "primary_area": "graph_neural_networks", "supplementary_material": "/attachment/b5e43f4b898f446b1c0c199894cb064146d67df6.zip", "author": "Ziang Chen;Jialin Liu;Xiaohan Chen;Xinshang Wang;Wotao Yin", "authorids": "~Ziang_Chen1;~Jialin_Liu1;~Xiaohan_Chen1;~Xinshang_Wang1;~Wotao_Yin1", "gender": "M;M;M;;M", "homepage": "https://sites.duke.edu/ziangchen/;https://liujl11git.github.io/;http://xiaohanchen.com;;http://wotaoyin.com", "dblp": ";;94/3802;196/7073;76/2265", "google_scholar": "odvrFvIAAAAJ;QS6Lj5sAAAAJ;https://scholar.google.com/citations?authuser=1;;kpQGGFUAAAAJ", "orcid": "0000-0002-8298-5223;;0000-0002-0360-0402;;0000-0001-6697-9731", "linkedin": ";;xiaohan-chen-400b00147/;;", "or_profile": "~Ziang_Chen1;~Jialin_Liu1;~Xiaohan_Chen1;~Xinshang_Wang1;~Wotao_Yin1", "aff": "Massachusetts Institute of Technology;Alibaba Group US;Alibaba Group;;Alibaba Group US", "aff_domain": "mit.edu;alibaba-inc.com;alibaba-inc.com;;alibaba-inc.com", "position": "Instructor;Researcher;Researcher;;Principal Researcher", "bibtex": "@inproceedings{\nchen2024rethinking,\ntitle={Rethinking the Capacity of Graph Neural Networks for Branching Strategy},\nauthor={Ziang Chen and Jialin Liu and Xiaohan Chen and Xinshang Wang and Wotao Yin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=FEmag0szWo}\n}", "github": "", "reviewers": "VcHk;xf7T;1NEU;uxqS;BDoF", "pdf_size": 499522, "rating": "5;5;7;8;8", "confidence": "4;4;3;2;5", "soundness": "3;3;3;3;4", "novelty": "2;3;3;3;3", "presentation": "3;3;3;4;3", "wc_summary": "78;139;101;42;146", "wc_strengths": "88;94;40;45;65", "wc_weaknesses": "250;261;41;37;77", "wc_questions": "169;156;84;16;20", "wc_limitations": "8;17;19;9;1", "wc_review": "593;667;285;149;309", "wc_reply_reviewers": "9;320;23;11;79", "wc_reply_authors": "0;681;0;0;238", "reply_reviewers": "1;2;1;1;1", "reply_authors": "1;3;1;1;2", "rating_avg": [ 6.6, 1.3564659966250536 ], "confidence_avg": [ 3.6, 1.019803902718557 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 101.2, 38.67505656104461 ], "wc_strengths_avg": [ 66.4, 21.84124538573751 ], "wc_weaknesses_avg": [ 133.2, 100.88488489362517 ], "wc_questions_avg": [ 89.0, 64.81357882419394 ], "wc_limitations_avg": [ 10.8, 6.523802572120037 ], "wc_review_avg": [ 400.6, 196.4908140346515 ], "wc_reply_reviewers_avg": [ 88.4, 118.57419618112534 ], "wc_reply_authors_avg": [ 183.8, 265.1387561259199 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.2602417193848081, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8833493182829326672&as_sdt=5,24&sciodt=0,24&hl=en", "gs_version_total": 4, "email": "mit.edu;alibaba-inc.com;alibaba-inc.com;;alibaba-inc.com", "author_num": 5, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Massachusetts Institute of Technology;Alibaba Group", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.alibaba.com", "aff_unique_abbr": "MIT;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;China" }, { "title": "AutoSurvey: Large Language Models Can Automatically Write Surveys", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95990", "id": "FExX8pMrdT", "proceeding": "", "pdf": "https://openreview.net/pdf?id=FExX8pMrdT", "openreview": "https://openreview.net/forum?id=FExX8pMrdT", "poster": "/media/PosterPDFs/NeurIPS%202024/95990.png?t=1731385815.4159586", "project": "", "author_site": "Yidong Wang, Qi Guo, Wenjin Yao, Hongbo Zhang, Xin Zhang, Zhen Wu, Meishan Zhang, Xinyu Dai, Min zhang, Qingsong Wen, Wei Ye, Shikun Zhang, Yue Zhang", "tldr": "", "abstract": "This paper introduces AutoSurvey, a speedy and well-organized methodology for automating the creation of comprehensive literature surveys in rapidly evolving fields like artificial intelligence. Traditional survey paper creation faces challenges due to the vast volume and complexity of information, prompting the need for efficient survey methods. While large language models (LLMs) offer promise in automating this process, challenges such as context window limitations, parametric knowledge constraints, and the lack of evaluation benchmarks remain. AutoSurvey addresses these challenges through a systematic approach that involves initial retrieval and outline generation, subsection drafting by specialized LLMs, integration and refinement, and rigorous evaluation and iteration. Our contributions include a comprehensive solution to the survey problem, a reliable evaluation method, and experimental validation demonstrating AutoSurvey's effectiveness.", "keywords": "large language model; AI applications", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/114b31fc0b1865cd0c42665ee47c9f7644beca11.zip", "author": "Yidong Wang;Qi Guo;Wenjin Yao;Hongbo Zhang;Xin Zhang;Zhen Wu;Meishan Zhang;Xinyu Dai;Min zhang;Qingsong Wen;Wei Ye;Shikun Zhang;Yue Zhang", "authorids": "~Yidong_Wang1;~Qi_Guo11;~Wenjin_Yao1;~Hongbo_Zhang5;~Xin_Zhang15;~Zhen_Wu2;~Meishan_Zhang1;~Xinyu_Dai1;~Min_zhang14;~Qingsong_Wen2;~Wei_Ye2;~Shikun_Zhang2;~Yue_Zhang7", "gender": "M;M;M;M;M;M;M;M;M;M;M;M;M", "homepage": "https://qianlanwyd.github.io/;;;https://hongbozhang.site/;https://izhx.github.io;https://wuzhen247.github.io/;https://zhangmeishan.github.io/;http://cs.nju.edu.cn/daixinyu;https://zhangmin-nlp-ai.github.io/;https://se.pku.edu.cn/kcl/weiye/;;http://frcchang.github.io;https://sites.google.com/site/qingsongwen8/", "dblp": "59/6759.html;;;;76/1584-97;16/4485-2;127/0273;39/5815;83/5342-?;09/5394-4;83/3715.html;47/722-4;27/561", "google_scholar": ";;FdntfpkAAAAJ;mv7nG38AAAAJ;Cn1zs9cAAAAJ;IoGlgtoAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?;RgLGFMIAAAAJ;uiklLscAAAAJ;;vjPJvwYAAAAJ", "orcid": ";0009-0009-4681-6404;;0000-0003-0425-3673;0000-0002-2550-3056;0000-0002-7678-103X;;;0000-0002-3895-5510;;;0000-0002-5214-2268;0000-0003-4516-2524", "linkedin": ";;;;;;;;;;;;qingsong-wen-22814156/", "or_profile": "~Yidong_Wang1;~Qi_Guo11;~Wenjin_Yao1;~Hongbo_Zhang5;~Xin_Zhang15;~Zhen_Wu2;~Meishan_Zhang1;~Xinyu_Dai1;~Min_zhang14;~Wei_Ye2;~Shikun_Zhang2;~Yue_Zhang7;~Qingsong_Wen1", "aff": "Peking University;Nanjing University;Peking University;Westlake University;Harbin Institute of Technology, Shenzhen;Nanjing University;Harbin Institute of Technology (Shenzhen), China;Nanjing University;Harbin Institute of Technology;Peking University;Peking University;Westlake University;Squirrel Ai Learning", "aff_domain": "pku.edu.cn;nju.edu.cn;pku.edu.cn;westlake.edu.cn;hit.edu.cn;nju.edu.cn;hit.edu.cn;nju.edu.cn;hit.edu.cn;pku.edu.cn;pku.edu.cn;westlake.edu.cn;squirrelai.com", "position": "PhD student;Undergrad student;MS student;PhD student;PhD student;Researcher;Associate Professor;Full Professor;Full Professor;Associate Professor;Full Professor;Full Professor;Principal Researcher", "bibtex": "@inproceedings{\nwang2024autosurvey,\ntitle={AutoSurvey: Large Language Models Can Automatically Write Surveys},\nauthor={Yidong Wang and Qi Guo and Wenjin Yao and Hongbo Zhang and Xin Zhang and Zhen Wu and Meishan Zhang and Xinyu Dai and Min zhang and Qingsong Wen and Wei Ye and Shikun Zhang and Yue Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=FExX8pMrdT}\n}", "github": "", "reviewers": "jk6Z;LdXj;EgXr;VjJG", "pdf_size": 830395, "rating": "5;5;6;8", "confidence": "4;4;4;4", "soundness": "2;2;2;3", "novelty": "2;2;2;3", "presentation": "3;3;2;4", "wc_summary": "236;97;153;114", "wc_strengths": "71;51;23;45", "wc_weaknesses": "207;150;87;66", "wc_questions": "57;60;134;44", "wc_limitations": "64;5;14;6", "wc_review": "635;363;411;275", "wc_reply_reviewers": "29;0;14;73", "wc_reply_authors": "61;76;64;25", "reply_reviewers": "1;0;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 150.0, 53.6423340282654 ], "wc_strengths_avg": [ 47.5, 17.109938632268673 ], "wc_weaknesses_avg": [ 127.5, 55.33760023709015 ], "wc_questions_avg": [ 73.75, 35.301380992816696 ], "wc_limitations_avg": [ 22.25, 24.355440870573457 ], "wc_review_avg": [ 421.0, 132.83071933856263 ], "wc_reply_reviewers_avg": [ 29.0, 27.39525506360545 ], "wc_reply_authors_avg": [ 56.5, 19.03286631067428 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 13, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17283204867963466673&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "pku.edu.cn;nju.edu.cn;pku.edu.cn;westlake.edu.cn;hit.edu.cn;nju.edu.cn;hit.edu.cn;nju.edu.cn;hit.edu.cn;pku.edu.cn;pku.edu.cn;westlake.edu.cn;squirrelai.com", "author_num": 13, "aff_unique_index": "0;1;0;2;3;1;3;1;3;0;0;2;4", "aff_unique_norm": "Peking University;Nanjing University;Westlake University;Harbin Institute of Technology;Squirrel Ai Learning", "aff_unique_dep": ";;;;", "aff_unique_url": "http://www.pku.edu.cn;https://www.nju.edu.cn;https://www.westlake.edu.cn;http://en.hhit.edu.cn/;https://www.squirrelai.com/", "aff_unique_abbr": "Peking U;Nanjing U;WU;HIT;", "aff_campus_unique_index": "1;1;2", "aff_campus_unique": ";Shenzhen;Harbin", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Consistency Diffusion Bridge Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95989", "id": "FFJFGx78OK", "proceeding": "", "pdf": "https://openreview.net/pdf?id=FFJFGx78OK", "openreview": "https://openreview.net/forum?id=FFJFGx78OK", "poster": "/media/PosterPDFs/NeurIPS%202024/95989.png?t=1731704889.7614758", "project": "", "author_site": "Guande He, Kaiwen Zheng, Jianfei Chen, Fan Bao, Jun Zhu", "tldr": "", "abstract": "Diffusion models (DMs) have become the dominant paradigm of generative modeling in a variety of domains by learning stochastic processes from noise to data. Recently, diffusion denoising bridge models (DDBMs), a new formulation of generative modeling that builds stochastic processes between fixed data endpoints based on a reference diffusion process, have achieved empirical success across tasks with coupled data distribution, such as image-to-image translation. However, DDBM's sampling process typically requires hundreds of network evaluations to achieve decent performance, which may impede their practical deployment due to high computational demands. In this work, inspired by the recent advance of consistency models in DMs, we tackle this problem by learning the consistency function of the probability-flow ordinary differential equation (PF-ODE) of DDBMs, which directly predicts the solution at a starting step given any point on the ODE trajectory. Based on a dedicated general-form ODE solver, we propose two paradigms: consistency bridge distillation and consistency bridge training, which is flexible to apply on DDBMs with broad design choices. Experimental results show that our proposed method could sample $4\\times$ to $50\\times$ faster than the base DDBM and produce better visual quality given the same step in various tasks with pixel resolution ranging from $64 \\times 64$ to $256 \\times 256$, as well as supporting downstream tasks such as semantic interpolation in the data space.", "keywords": "Diffusion Bridges;Consistency Models;Image Translation", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Guande He;Kaiwen Zheng;Jianfei Chen;Fan Bao;Jun Zhu", "authorids": "~Guande_He1;~Kaiwen_Zheng2;~Jianfei_Chen1;~Fan_Bao1;~Jun_Zhu2", "gender": ";M;M;M;M", "homepage": "https://guandehe.github.io/;https://zhengkw18.github.io/;http://ml.cs.tsinghua.edu.cn/~jianfei;https://baofff.github.io/;http://ml.cs.tsinghua.edu.cn/~jun", "dblp": "348/7035.html;;48/6809-1;71/3877;50/2644-1", "google_scholar": "3rddMeMAAAAJ;0d80xSIAAAAJ;di5RZ1MAAAAJ;;axsP38wAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Guande_He1;~Kaiwen_Zheng2;~Jianfei_Chen1;~Fan_Bao1;~Jun_Zhu2", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "mails.tsinghua.edu.cn;cs.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;mail.tsinghua.edu.cn", "position": "MS student;MS student;Associate Professor;PhD student;Professor", "bibtex": "@inproceedings{\nhe2024consistency,\ntitle={Consistency Diffusion Bridge Models},\nauthor={Guande He and Kaiwen Zheng and Jianfei Chen and Fan Bao and Jun Zhu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=FFJFGx78OK}\n}", "github": "", "reviewers": "NqhJ;7nJA;nr1H;2JER", "pdf_size": 28939010, "rating": "5;6;7;7", "confidence": "4;4;4;3", "soundness": "3;3;3;3", "novelty": "2;2;3;2", "presentation": "3;4;3;3", "wc_summary": "46;291;347;74", "wc_strengths": "65;123;79;50", "wc_weaknesses": "293;51;369;324", "wc_questions": "7;282;1;2", "wc_limitations": "13;25;1;2", "wc_review": "424;772;797;452", "wc_reply_reviewers": "153;17;0;542", "wc_reply_authors": "816;0;0;754", "reply_reviewers": "1;1;0;3", "reply_authors": "3;1;1;5", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 189.5, 131.3782706538642 ], "wc_strengths_avg": [ 79.25, 27.261465477849864 ], "wc_weaknesses_avg": [ 259.25, 123.23225024318918 ], "wc_questions_avg": [ 73.0, 120.68761328321975 ], "wc_limitations_avg": [ 10.25, 9.730750228014282 ], "wc_review_avg": [ 611.25, 173.75755379263373 ], "wc_reply_reviewers_avg": [ 178.0, 218.36093973052965 ], "wc_reply_authors_avg": [ 392.5, 393.1116253686731 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.5, 1.6583123951777 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:zMwljjEFnL4J:scholar.google.com/&scioq=Consistency+Diffusion+Bridge+Models&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": "mails.tsinghua.edu.cn;cs.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;mail.tsinghua.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Analysing Multi-Task Regression via Random Matrix Theory with Application to Time Series Forecasting", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95988", "id": "FFW6rPz48Z", "proceeding": "", "pdf": "https://openreview.net/pdf?id=FFW6rPz48Z", "openreview": "https://openreview.net/forum?id=FFW6rPz48Z", "poster": "", "project": "", "author_site": "Romain Ilbert, Malik Tiomoko, Cosme Louart, Ambroise Odonnat, Vasilii Feofanov, Themis Palpanas, Ievgen Redko", "tldr": "", "abstract": "In this paper, we introduce a novel theoretical framework for multi-task regression, applying random matrix theory to provide precise performance estimations, under high-dimensional, non-Gaussian data distributions. We formulate a multi-task optimization problem as a regularization technique to enable single-task models to leverage multi-task learning information. We derive a closed-form solution for multi-task optimization in the context of linear models. Our analysis provides valuable insights by linking the multi-task learning performance to various model statistics such as raw data covariances, signal-generating hyperplanes, noise levels, as well as the size and number of datasets. We finally propose a consistent estimation of training and testing errors, thereby offering a robust foundation for hyperparameter optimization in multi-task regression scenarios. Experimental validations on both synthetic and real-world datasets in regression and multivariate time series forecasting demonstrate improvements on univariate models, incorporating our method into the training loss and thus leveraging multivariate information.", "keywords": "Random Matrix Theory ; Optimization ; Regularization ; Multi-task regression ; Multi-task learning ; Multivariate Time Series Forecasting", "primary_area": "learning_theory", "supplementary_material": "", "author": "Romain Ilbert;Malik Tiomoko;Cosme Louart;Ambroise Odonnat;Vasilii Feofanov;Themis Palpanas;Ievgen Redko", "authorids": "~Romain_Ilbert1;~Malik_Tiomoko1;~Cosme_Louart1;~Ambroise_Odonnat1;~Vasilii_Feofanov1;~Themis_Palpanas1;~Ievgen_Redko2", "gender": ";M;M;M;M;Not Specified;", "homepage": "https://romilbert.github.io;;https://sds.cuhk.edu.cn/en/teacher/947;https://ambroiseodt.github.io/;;https://helios2.mi.parisdescartes.fr/~themisp/;", "dblp": ";228/9231;195/6037.html;359/3799;245/3361;p/ThemisPalpanas;150/3980", "google_scholar": "65uE37cAAAAJ;;https://scholar.google.fr/citations?user=Yq5vG04AAAAJ;M_OS-3kAAAAJ;https://scholar.google.ru/citations?user=UIteS6oAAAAJ;qUBdmWgAAAAJ;https://scholar.google.fr/citations?user=qJ1-XewAAAAJ", "orcid": "0000-0001-8572-6510;;0000-0001-6652-1018;;0000-0002-5777-4205;0000-0002-8031-0265;", "linkedin": "romain-ilbert/;;https://fr.linkedin.com/in/cosme-louart;ambroise-odonnat/;;;", "or_profile": "~Romain_Ilbert1;~Malik_Tiomoko1;~Cosme_Louart1;~Ambroise_Odonnat1;~Vasilii_Feofanov1;~Themis_Palpanas1;~Ievgen_Redko2", "aff": "University Paris Descartes;Huawei Technologies Ltd.;The Chinese University of Hong Kong;Huawei Technologies Ltd.;Huawei Noah's Ark Lab;Universite Paris Cite;Huawei Technologies Ltd.", "aff_domain": "parisdescartes.fr;huawei.com;cuhk.edu.cn;huawei.com;huawei.com;u-paris.fr;huawei.com", "position": "PhD student;Researcher;Assistant Professor;Intern;Researcher;Full Professor;Principal Researcher", "bibtex": "@inproceedings{\nilbert2024analysing,\ntitle={Analysing Multi-Task Regression via Random Matrix Theory with Application to Time Series Forecasting},\nauthor={Romain Ilbert and Malik Tiomoko and Cosme Louart and Ambroise Odonnat and Vasilii Feofanov and Themis Palpanas and Ievgen Redko},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=FFW6rPz48Z}\n}", "github": "", "reviewers": "EGU1;8z1C;7acX;Zuy6", "pdf_size": 2265170, "rating": "4;5;6;8", "confidence": "3;4;2;4", "soundness": "3;3;2;4", "novelty": "3;3;4;4", "presentation": "3;2;1;4", "wc_summary": "62;38;21;67", "wc_strengths": "58;20;29;105", "wc_weaknesses": "60;193;24;41", "wc_questions": "91;63;44;96", "wc_limitations": "1;10;23;3", "wc_review": "272;324;141;312", "wc_reply_reviewers": "0;97;30;78", "wc_reply_authors": "101;76;14;28", "reply_reviewers": "0;2;1;1", "reply_authors": "2;3;2;2", "rating_avg": [ 5.75, 1.479019945774904 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 47.0, 18.587630295441105 ], "wc_strengths_avg": [ 53.0, 33.1436268383531 ], "wc_weaknesses_avg": [ 79.5, 66.75514961409344 ], "wc_questions_avg": [ 73.5, 21.17191535974013 ], "wc_limitations_avg": [ 9.25, 8.613216588476108 ], "wc_review_avg": [ 262.25, 72.60294415517872 ], "wc_reply_reviewers_avg": [ 51.25, 38.362579423182694 ], "wc_reply_authors_avg": [ 54.75, 35.23758646672612 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.25482359571881275, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10739399569819472052&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "parisdescartes.fr;huawei.com;cuhk.edu.cn;huawei.com;huawei.com;u-paris.fr;huawei.com", "author_num": 7, "aff_unique_index": "0;1;2;1;1;3;1", "aff_unique_norm": "University Paris Descartes;Huawei;Chinese University of Hong Kong;Universit\u00e9 Paris Cit\u00e9", "aff_unique_dep": ";Huawei Technologies;;", "aff_unique_url": "https://www.univ-paris5.fr;https://www.huawei.com;https://www.cuhk.edu.hk;https://www.universite-paris.fr", "aff_unique_abbr": "UPD;Huawei;CUHK;UPC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;1;1;1;0;1", "aff_country_unique": "France;China" }, { "title": "Unveil Benign Overfitting for Transformer in Vision: Training Dynamics, Convergence, and Generalization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95987", "id": "FGJb0peY4R", "proceeding": "", "pdf": "https://openreview.net/pdf?id=FGJb0peY4R", "openreview": "https://openreview.net/forum?id=FGJb0peY4R", "poster": "/media/PosterPDFs/NeurIPS%202024/95987.png?t=1730182521.8955338", "project": "", "author_site": "Jiarui Jiang, Wei Huang, Miao Zhang, Taiji Suzuki, Liqiang Nie", "tldr": "", "abstract": "Transformers have demonstrated great power in the recent development of large foundational models. In particular, the Vision Transformer (ViT) has brought revolutionary changes to the field of vision, achieving significant accomplishments on the experimental side. However, their theoretical capabilities, particularly in terms of generalization when trained to overfit training data, are still not fully understood. To address this gap, this work delves deeply into the \\textit{benign overfitting} perspective of transformers in vision. To this end, we study the optimization of a Transformer composed of a self-attention layer with softmax followed by a fully connected layer under gradient descent on a certain data distribution model. By developing techniques that address the challenges posed by softmax and the interdependent nature of multiple weights in transformer optimization, we successfully characterized the training dynamics and achieved generalization in post-training. Our results establish a sharp condition that can distinguish between the small test error phase and the large test error regime, based on the signal-to-noise ratio in the data model. The theoretical results are further verified by experimental simulation. To the best of our knowledge, this is the first work to characterize benign overfitting for Transformers.", "keywords": "Vision Transformers;Training Dynamics;Benign Overfitting;Self-Attention;Feature Learning", "primary_area": "learning_theory", "supplementary_material": "/attachment/d63d2627836694199e110ccb5661da197a6fcfd1.zip", "author": "Jiarui Jiang;Wei Huang;Miao Zhang;Taiji Suzuki;Liqiang Nie", "authorids": "~Jiarui_Jiang1;~Wei_Huang6;~Miao_Zhang4;~Taiji_Suzuki1;~Liqiang_Nie2", "gender": ";M;M;M;M", "homepage": ";https://weihuang05.github.io/;https://sites.google.com/view/miaozhang;http://ibis.t.u-tokyo.ac.jp/suzuki/;https://liqiangnie.github.io/index.html", "dblp": ";81/6685-34;60/7041-1.html;08/312;92/8277", "google_scholar": ";RZfDh4MAAAAJ;6EUV_UMAAAAJ;x8osrBsAAAAJ;yywVMhUAAAAJ", "orcid": ";0000-0001-5674-7021;0000-0002-1262-4174;;0000-0003-1476-0273", "linkedin": ";;miao-zhang-71b13a177/;;", "or_profile": "~Jiarui_Jiang1;~Wei_Huang6;~Miao_Zhang4;~Taiji_Suzuki1;~Liqiang_Nie2", "aff": ";RIKEN AIP;Harbin Institute of Technology (Shenzhen);The University of Tokyo;Shandong University", "aff_domain": ";riken.jp;hit.edu.cn;tokyo.ac.jp;sdu.edu.cn", "position": ";Research Scientist;Full Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\njiang2024unveil,\ntitle={Unveil Benign Overfitting for Transformer in Vision: Training Dynamics, Convergence, and Generalization},\nauthor={Jiarui Jiang and Wei Huang and Miao Zhang and Taiji Suzuki and Liqiang Nie},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=FGJb0peY4R}\n}", "github": "", "reviewers": "4uNF;ysmG;UpG9;tKtT;GNLt", "pdf_size": 2997320, "rating": "5;5;6;6;6", "confidence": "3;1;3;3;4", "soundness": "4;3;3;3;3", "novelty": "3;2;3;3;3", "presentation": "4;3;2;3;3", "wc_summary": "49;49;60;133;48", "wc_strengths": "64;40;97;40;55", "wc_weaknesses": "64;73;77;42;48", "wc_questions": "40;47;105;3;20", "wc_limitations": "4;26;1;1;6", "wc_review": "221;235;340;219;177", "wc_reply_reviewers": "0;0;38;0;0", "wc_reply_authors": "29;29;94;29;29", "reply_reviewers": "0;0;1;0;0", "reply_authors": "2;2;2;2;2", "rating_avg": [ 5.6, 0.48989794855663565 ], "confidence_avg": [ 2.8, 0.9797958971132712 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 67.8, 32.89620038849471 ], "wc_strengths_avg": [ 59.2, 21.008569680013913 ], "wc_weaknesses_avg": [ 60.8, 13.70255450636851 ], "wc_questions_avg": [ 43.0, 34.63524216748022 ], "wc_limitations_avg": [ 7.6, 9.3936148526539 ], "wc_review_avg": [ 238.4, 54.374994252873265 ], "wc_reply_reviewers_avg": [ 7.6, 15.200000000000001 ], "wc_reply_authors_avg": [ 42.0, 26.0 ], "reply_reviewers_avg": [ 0.2, 0.4000000000000001 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.6666666666666665, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10361497095133129972&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 4, "email": ";riken.jp;hit.edu.cn;tokyo.ac.jp;sdu.edu.cn", "author_num": 5, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "RIKEN;Harbin Institute of Technology;University of Tokyo;Shandong University", "aff_unique_dep": "Advanced Institute for Computational Science;;;", "aff_unique_url": "https://www.aip.riken.jp;http://en.hhit.edu.cn/;https://www.u-tokyo.ac.jp;http://www.sdu.edu.cn", "aff_unique_abbr": "RIKEN AIP;HIT;UTokyo;SDU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "Japan;China" }, { "title": "Language Generation in the Limit", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95986", "id": "FGTDe6EA0B", "proceeding": "", "pdf": "https://openreview.net/pdf?id=FGTDe6EA0B", "openreview": "https://openreview.net/forum?id=FGTDe6EA0B", "poster": "/media/PosterPDFs/NeurIPS%202024/95986.png?t=1733114327.8340406", "project": "", "author_site": "Jon Kleinberg, Sendhil Mullainathan", "tldr": "", "abstract": "Although current large language models are complex, the most basic specifications of the underlying language generation problem itself are simple to state: given a finite set of training samples from an unknown language, produce valid new strings from the language that don't already appear in the training data. Here we ask what we can conclude about language generation using only this specification, without further assumptions. In particular, suppose that an adversary enumerates the strings of an unknown target language L that is known only to come from one of a possibly infinite list of candidates. A computational agent is trying to learn to generate from this language; we say that the agent generates from $L$ in the limit if after some finite point in the enumeration of $L$, the agent is able to produce new elements that come exclusively from $L$ and that have not yet been presented by the adversary. Our main result is that there is an agent that is able to generate in the limit for every countable list of candidate languages. This contrasts dramatically with negative results due to Gold and Angluin in a well-studied model of language learning where the goal is to identify an unknown language from samples; the difference between these results suggests that identifying a language is a fundamentally different problem than generating from it.", "keywords": "language generation;large language models;enumeration", "primary_area": "learning_theory", "supplementary_material": "", "author": "Jon Kleinberg;Sendhil Mullainathan", "authorids": "~Jon_Kleinberg3;~Sendhil_Mullainathan2", "gender": "M;M", "homepage": "http://www.cs.cornell.edu/home/kleinber/;https://www.chicagobooth.edu/faculty/directory/m/sendhil-mullainathan", "dblp": "https://dblp.uni-trier.de/pid/k/JonMKleinberg.html;25/169", "google_scholar": "VX7d5EQAAAAJ;oExfyEkAAAAJ", "orcid": "0000-0002-1929-2512;", "linkedin": ";", "or_profile": "~Jon_Kleinberg3;~Sendhil_Mullainathan2", "aff": ";University of Chicago", "aff_domain": ";uchicago.edu", "position": ";Assistant Professor", "bibtex": "@inproceedings{\nkleinberg2024language,\ntitle={Language Generation in the Limit},\nauthor={Jon Kleinberg and Sendhil Mullainathan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=FGTDe6EA0B}\n}", "github": "", "reviewers": "v4Eq;8y2i;4Kah", "pdf_size": 318166, "rating": "4;5;7", "confidence": "4;1;4", "soundness": "2;4;4", "novelty": "2;2;3", "presentation": "2;2;4", "wc_summary": "210;10;132", "wc_strengths": "19;3;100", "wc_weaknesses": "737;3;43", "wc_questions": "5;3;45", "wc_limitations": "1;3;9", "wc_review": "972;22;329", "wc_reply_reviewers": "1516;0;41", "wc_reply_authors": "900;0;40", "reply_reviewers": "3;0;1", "reply_authors": "3;1;2", "rating_avg": [ 5.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.0, 1.4142135623730951 ], "soundness_avg": [ 3.3333333333333335, 0.9428090415820634 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.9428090415820634 ], "wc_summary_avg": [ 117.33333333333333, 82.3056633666373 ], "wc_strengths_avg": [ 40.666666666666664, 42.460439103816256 ], "wc_weaknesses_avg": [ 261.0, 336.97873325577484 ], "wc_questions_avg": [ 17.666666666666668, 19.344824171395878 ], "wc_limitations_avg": [ 4.333333333333333, 3.39934634239519 ], "wc_review_avg": [ 441.0, 395.83919293908565 ], "wc_reply_reviewers_avg": [ 519.0, 705.1841367094602 ], "wc_reply_authors_avg": [ 313.3333333333333, 415.1572660517404 ], "reply_reviewers_avg": [ 1.3333333333333333, 1.247219128924647 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.18898223650461363, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2603006066335534072&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": ";uchicago.edu", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "University of Chicago", "aff_unique_dep": "", "aff_unique_url": "https://www.uchicago.edu", "aff_unique_abbr": "UChicago", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Scalable Early Childhood Reading Performance Prediction", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97781", "id": "FI89ORf7YH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=FI89ORf7YH", "openreview": "https://openreview.net/forum?id=FI89ORf7YH", "poster": "", "project": "", "author_site": "Zhongkai Shangguan, Zanming Huang, Eshed Ohn-Bar, Ola Ozernov-Palchik, Derek Kosty, Michael Stoolmiller, Hank Fien", "tldr": "", "abstract": "Models for student reading performance can empower educators and institutions to proactively identify at-risk students, thereby enabling early and tailored instructional interventions. However, there are no suitable publicly available educational datasets for modeling and predicting future reading performance. In this work, we introduce the Enhanced Core Reading Instruction (ECRI) dataset, a novel large-scale longitudinal tabular dataset collected across 44 schools with 6,916 students and 172 teachers. We leverage the dataset to empirically evaluate the ability of state-of-the-art machine learning models to recognize early childhood educational patterns in multivariate and partial measurements. Specifically, we demonstrate a simple self-supervised strategy in which a Multi-Layer Perception (MLP) network is pre-trained over masked inputs to outperform several strong baselines while generalizing over diverse educational settings. To facilitate future developments in precise modeling and responsible use of models for individualized and early intervention strategies, our data and code are available at https://ecri-data.github.io/.", "keywords": "Machine Learning for Education;Early Childhood Reading;Missing Data;Self-supervised Learning", "primary_area": "", "supplementary_material": "", "author": "Zhongkai Shangguan;Zanming Huang;Eshed Ohn-Bar;Ola Ozernov-Palchik;Derek Kosty;Michael Stoolmiller;Hank Fien", "authorids": "~Zhongkai_Shangguan2;~Zanming_Huang1;~Eshed_Ohn-Bar4;~Ola_Ozernov-Palchik1;~Derek_Kosty1;~Michael_Stoolmiller1;~Hank_Fien1", "gender": ";M;Not Specified;F;M;M;M", "homepage": ";https://tzmhuang.github.io/;https://eshed1.github.io/;;;;https://www.bu.edu/", "dblp": ";332/1254;121/0305;;;;", "google_scholar": ";https://scholar.google.com/citations?hl=en;p9zVBV4AAAAJ;JG4SKDsAAAAJ;7mMpOv0AAAAJ;;", "orcid": ";;;0000-0003-0055-6642;;0000-0002-9781-9367;", "linkedin": ";;;;;;", "or_profile": "~Zhongkai_Shangguan2;~Zanming_Huang1;~Eshed_Ohn-Bar4;~Ola_Ozernov-Palchik1;~Derek_Kosty1;~Michael_Stoolmiller1;~Hank_Fien1", "aff": ";Boston University, Boston University;Boston University;Boston University, Boston University;Oregon Research Institute;Boston University, Boston University;Boston University, Boston University", "aff_domain": ";bu.edu;bu.edu;bu.edu;ori.org;bu.edu;bu.edu", "position": ";Researcher;Assistant Professor;Researcher;Researcher;Researcher;Full Professor", "bibtex": "@inproceedings{\nshangguan2024scalable,\ntitle={Scalable Early Childhood Reading Performance Prediction},\nauthor={Zhongkai Shangguan and Zanming Huang and Eshed Ohn-Bar and Ola Ozernov-Palchik and Derek Kosty and Michael Stoolmiller and Hank Fien},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=FI89ORf7YH}\n}", "github": "", "reviewers": "pkea;4e3s;hdhn", "pdf_size": 413924, "rating": "7;7;7", "confidence": "4;4;2", "wc_summary_and_contributions": "106;105;55", "wc_strengths": "42;95;131", "wc_improvement": "47;102;140", "wc_limitations": "54;6;5", "wc_correctness": "262;6;1", "wc_clarity": "31;9;1", "wc_relation_to_prior_work": "56;5;1", "wc_documentation": "40;9;1", "wc_additional_feedback": "1;1;1", "wc_review": "639;338;336", "wc_reply_reviewers": "0;68;16", "wc_reply_authors": "268;0;0", "reply_reviewers": "0;1;1", "reply_authors": "2;1;3", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "wc_summary_and_contributions_avg": [ 88.66666666666667, 23.809428571238094 ], "wc_strengths_avg": [ 89.33333333333333, 36.55437350334734 ], "wc_improvement_avg": [ 96.33333333333333, 38.177945931591914 ], "wc_limitations_avg": [ 21.666666666666668, 22.866763848189994 ], "wc_correctness_avg": [ 89.66666666666667, 121.87516381755371 ], "wc_clarity_avg": [ 13.666666666666666, 12.684198393626966 ], "wc_relation_to_prior_work_avg": [ 20.666666666666668, 25.037749277618563 ], "wc_documentation_avg": [ 16.666666666666668, 16.81930108205715 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 437.6666666666667, 142.36650667282044 ], "wc_reply_reviewers_avg": [ 28.0, 29.028721409436322 ], "wc_reply_authors_avg": [ 89.33333333333333, 126.33641157199649 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11822061495133277214&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": ";bu.edu;bu.edu;bu.edu;ori.org;bu.edu;bu.edu", "author_num": 7, "aff_unique_index": "0;0;0;1;0;0", "aff_unique_norm": "Boston University;Oregon Research Institute", "aff_unique_dep": ";", "aff_unique_url": "https://www.bu.edu;https://www.ori.org", "aff_unique_abbr": "BU;", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Boston;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "ProxyFusion: Face Feature Aggregation Through Sparse Experts", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95985", "id": "FIs87Iro9j", "proceeding": "", "pdf": "https://openreview.net/pdf?id=FIs87Iro9j", "openreview": "https://openreview.net/forum?id=FIs87Iro9j", "poster": "/media/PosterPDFs/NeurIPS%202024/95985.png?t=1731651070.8304942", "project": "", "author_site": "Bhavin Jawade, Alexander Stone, Deen Dayal Mohan, Xiao Wang, Srirangaraj Setlur, Venu Govindaraju", "tldr": "", "abstract": "Face feature fusion is indispensable for robust face recognition, particularly in scenarios involving long-range, low-resolution media (unconstrained environments) where not all frames or features are equally informative. Existing methods often rely on large intermediate feature maps or face metadata information, making them incompatible with legacy biometric template databases that store pre-computed features. Additionally, real-time inference and generalization to large probe sets remains challenging. \nTo address these limitations, we introduce a linear time O(N) proxy based sparse expert selection and pooling approach for context driven feature-set attention. Our approach is order invariant on the feature-set, generalizes to large sets, is compatible with legacy template stores, and utilizes significantly less parameters making it suitable real-time inference and edge use-cases. Through qualitative experiments, we demonstrate that ProxyFusion learns discriminative information for importance weighting of face features without relying on intermediate features. Quantitative evaluations on challenging low-resolution face verification datasets such as IARPA BTS3.1 and DroneSURF show the superiority of ProxyFusion in unconstrained long-range face recognition setting. \nOur code and pretrained models are available at: https://github.com/bhavinjawade/ProxyFusion", "keywords": "Feature Fusion;Face Recognition;Pooling", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Bhavin Jawade;Alexander Stone;Deen Dayal Mohan;Xiao Wang;Srirangaraj Setlur;Venu Govindaraju", "authorids": "~Bhavin_Jawade1;~Alexander_Stone1;~Deen_Dayal_Mohan2;~Xiao_Wang22;~Srirangaraj_Setlur1;~Venu_Govindaraju3", "gender": "M;;M;M;M;", "homepage": "https://bhavinjawade.github.io;https://alexstone.dev;https://www.acsu.buffalo.edu/~dmohan/;https://wangxiaoshawn.github.io/;https://www.buffalo.edu/cubs/members/srirangaraj-setlur.html;https://www.buffalo.edu/cubs/members/venu-govindaraju.html", "dblp": "310/1267;;244/8194;;80/1388.html;g/VenuGovindaraju.html", "google_scholar": "H2HDUoUAAAAJ;;p4MdNqwAAAAJ;;BPEF3ZwAAAAJ;ruIgbscAAAAJ", "orcid": "0009-0008-6059-5364;;;0009-0000-0511-8495;0000-0002-7118-9280;", "linkedin": "https://linkedin.com/in/bhavinjawade;;;;setlur/;venu-govindaraju-1950725/", "or_profile": "~Bhavin_Jawade1;~Alexander_Stone1;~Deen_Dayal_Mohan2;~Xiao_Wang22;~Srirangaraj_Setlur1;~Venu_Govindaraju3", "aff": "NetFlix;State University of New York at Buffalo;Yahoo;State University of New York at Buffalo;State University of New York, Buffalo;State University of New York at Buffalo", "aff_domain": "netflix.com;buffalo.edu;yahoo-inc.com;buffalo.edu;buffalo.edu;cse.buffalo.edu", "position": "Research Intern;PhD student;Researcher;PhD student;Principal Researcher;Professor", "bibtex": "@inproceedings{\njawade2024proxyfusion,\ntitle={ProxyFusion: Face Feature Aggregation Through Sparse Experts},\nauthor={Bhavin Jawade and Alexander Stone and Deen Dayal Mohan and Xiao Wang and Srirangaraj Setlur and Venu Govindaraju},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=FIs87Iro9j}\n}", "github": "", "reviewers": "Cxak;BueQ;waA1;vG52", "pdf_size": 4942936, "rating": "4;5;5;7", "confidence": "5;3;3;4", "soundness": "3;3;3;4", "novelty": "3;3;2;3", "presentation": "3;2;4;4", "wc_summary": "117;126;251;89", "wc_strengths": "53;79;67;65", "wc_weaknesses": "114;348;209;571", "wc_questions": "5;22;46;41", "wc_limitations": "5;81;26;39", "wc_review": "294;656;599;805", "wc_reply_reviewers": "0;86;53;59", "wc_reply_authors": "0;68;192;12", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 5.25, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 145.75, 62.27910965966036 ], "wc_strengths_avg": [ 66.0, 9.219544457292887 ], "wc_weaknesses_avg": [ 310.5, 171.88731774043134 ], "wc_questions_avg": [ 28.5, 16.25576820700886 ], "wc_limitations_avg": [ 37.75, 27.761258977214993 ], "wc_review_avg": [ 588.5, 185.92269899073648 ], "wc_reply_reviewers_avg": [ 49.5, 31.164884084494844 ], "wc_reply_authors_avg": [ 68.0, 76.05261336732617 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.20751433915982243, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:asJCaXZit5YJ:scholar.google.com/&scioq=ProxyFusion:+Face+Feature+Aggregation+Through+Sparse+Experts&hl=en&as_sdt=0,33", "gs_version_total": 2, "email": "netflix.com;buffalo.edu;yahoo-inc.com;buffalo.edu;buffalo.edu;cse.buffalo.edu", "author_num": 6, "aff_unique_index": "0;1;2;1;1;1", "aff_unique_norm": "Netflix;State University of New York at Buffalo;Yahoo", "aff_unique_dep": ";;", "aff_unique_url": "https://www.netflix.com;https://www.buffalo.edu;https://www.yahoo.com", "aff_unique_abbr": "Netflix;SUNY Buffalo;Yahoo", "aff_campus_unique_index": "1;1;1;1", "aff_campus_unique": ";Buffalo", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "FJIetdSItj", "title": "LV-Eval: A Balanced Long-Context Benchmark with 5 Length Levels Up to 256K", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "State-of-the-art large language models (LLMs) are now claiming remarkable supported context lengths of 256k or even more. In contrast, the average context lengths of mainstream benchmarks are insufficient (5k-21k), and they suffer from potential knowledge leakage and inaccurate metrics, resulting in biased evaluation. This paper introduces LV-Eval, a challenging long-context benchmark with five length levels (16k, 32k, 64k, 128k, and 256k) reaching up to 256k words. LV-Eval features two main tasks, single-hop QA and multi-hop QA, comprising 11 bilingual datasets. The design of LV-Eval has incorporated three key techniques, namely confusing facts insertion (CFI), keyword and phrase replacement (KPR), and keyword-recall-based metric design. The advantages of LV-Eval include controllable evaluation across context lengths, challenging test instances with confusing facts, mitigated knowledge leakage, and more objective evaluations. We evaluate 10 LLMs on LV-Eval and conduct ablation studies on the techniques used in LV-Eval construction. The results reveal that: (i) Commercial LLMs generally outperform open-source LLMs when evaluated within length levels shorter than their claimed context length. However, their overall performance is surpassed by open-source LLMs with longer context lengths. (ii) Extremely long-context LLMs, such as Yi-6B-200k and Llama3-8B-1M, exhibit a relatively gentle degradation of performance, but their absolute performances may not necessarily be higher than those of LLMs with shorter context lengths. (iii) LLMs' performances can significantly degrade in the presence of confusing information, especially in the pressure test of \"needle in a haystack\". (iv) Issues related to knowledge leakage and inaccurate metrics introduce bias in evaluation, and these concerns are alleviated in LV-Eval. All datasets and evaluation codes are released at: https://github.com/infinigence/LVEval.", "keywords": "Large Language Models;Long-Context LLM;Confusing Fact Insertion;Keyword and Phrase Replacement", "primary_area": "", "supplementary_material": "/attachment/2b837a1f7a471d25de4da4caf5fb39de7b9d220a.zip", "author": "Tao Yuan;Xuefei Ning;Dong Zhou;Zhijie Yang;Shiyao Li;Minghui Zhuang;Zheyue Tan;Zhuyu Yao;Dahua Lin;Boxun Li;Guohao Dai;Shengen Yan;Yu Wang", "authorids": "~Tao_Yuan7;~Xuefei_Ning1;~Dong_Zhou8;~Zhijie_Yang3;~Shiyao_Li2;~Minghui_Zhuang1;~Zheyue_Tan1;~Zhuyu_Yao1;~Dahua_Lin1;~Boxun_Li2;~Guohao_Dai4;~Shengen_Yan1;~Yu_Wang3", "gender": "Not Specified;;;M;M;M;M;M;M;M;M;M;", "homepage": "https://nics-effalg.com/ningxuefei/;https://email.whu.edu.cn;https://github.com/followall;http://nicsefc.ee.tsinghua.edu.cn/people/ShiyaoLi;https://dblp.org/pid/218/6609.html;https://github.com/tanzeyy;https://scholar.google.com/citations?hl=zh-CN&user=O8_HpXcAAAAJ;http://dahua.site;;https://nicsefc.ee.tsinghua.edu.cn/people/guohao-dai/;;https://nicsefc.ee.tsinghua.edu.cn;https://github.com/yuantao2108", "dblp": "202/9525;;;;218/6609.html;;;53/6088;135/8082.html;147/1470;117/6968;w/YuWang2.html;", "google_scholar": "oVslpJsAAAAJ;;;;gRMUhbsAAAAJ;;;GMzzRRUAAAAJ;;gz3Tkl0AAAAJ;SvE3bdUAAAAJ;https://scholar.google.com.hk/citations?user=j8JGVvoAAAAJ;", "orcid": ";;;;;;;;;;;0000-0001-6108-5157;", "linkedin": ";;;;;;;;;;;;", "or_profile": "~Xuefei_Ning1;~Dong_Zhou8;~Zhijie_Yang3;~Shiyao_Li2;~Minghui_Zhuang1;~Zheyue_Tan1;~Zhuyu_Yao1;~Dahua_Lin1;~Boxun_Li2;~Guohao_Dai4;~Shengen_Yan1;~Yu_Wang3;~Yuan_tao1", "aff": "Tsinghua University;Wuhan University;Beijing University of Posts and Telecommunications;Tsinghua University;Peking University;Infinigence;Infinigence-AI;The Chinese University of Hong Kong;Infinigence-AI;Shanghai Jiaotong University;Tsinghua University;Tsinghua University;Infinigence-AI", "aff_domain": "tsinghua.edu.cn;whu.edu;bupt.edu.cn;tsinghua.edu.cn;pku.edu.cn;infini-ai.com;infini-ai.com;cuhk.edu.hk;infini-ai.com;sjtu.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;cloud.infini-ai.com", "position": "Research Assistant Professor;Undergrad student;Undergrad student;PhD student;MS student;Researcher;Researcher;Associate Professor;Principal Researcher;Associate Professor;Associate Professor;Full Professor;Researcher", "bibtex": "@misc{\nanonymous2024lveval,\ntitle={{LV}-Eval: A Balanced Long-Context Benchmark with 5 Length Levels Up to 256K},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=FJIetdSItj}\n}", "github": "", "project": "", "reviewers": "zkbL;9vgN;s1zq", "site": "https://openreview.net/forum?id=FJIetdSItj", "pdf_size": 2579446, "rating": "5;7;7", "confidence": "4;3;3", "wc_summary_and_contributions": "117;80;37", "wc_strengths": "20;74;6", "wc_improvement": "35;113;1", "wc_limitations": "29;65;1", "wc_correctness": "1;33;2", "wc_clarity": "3;15;3", "wc_relation_to_prior_work": "1;18;20", "wc_documentation": "1;25;2", "wc_additional_feedback": "1;1;1", "wc_review": "208;424;73", "wc_reply_reviewers": "104;11;0", "wc_reply_authors": "0;16;0", "reply_reviewers": "1;1;0", "reply_authors": "2;2;1", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 78.0, 32.69046751985457 ], "wc_strengths_avg": [ 33.333333333333336, 29.31817790306136 ], "wc_improvement_avg": [ 49.666666666666664, 46.88520259337931 ], "wc_limitations_avg": [ 31.666666666666668, 26.195843605851334 ], "wc_correctness_avg": [ 12.0, 14.854853303438128 ], "wc_clarity_avg": [ 7.0, 5.656854249492381 ], "wc_relation_to_prior_work_avg": [ 13.0, 8.524474568362947 ], "wc_documentation_avg": [ 9.333333333333334, 11.08552609887726 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 235.0, 144.5614056378811 ], "wc_reply_reviewers_avg": [ 38.333333333333336, 46.64999702274612 ], "wc_reply_authors_avg": [ 5.333333333333333, 7.542472332656507 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 13, 0 ], "corr_rating_confidence": -0.9999999999999998, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10368980344103555112&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;2;0;3;4;5;6;5;7;0;0;5", "aff_unique_norm": "Tsinghua University;Wuhan University;Beijing University of Posts and Telecommunications;Peking University;Infinigence;Infinigence-AI;Chinese University of Hong Kong;Shanghai Jiao Tong University", "aff_unique_dep": ";;;;;;;", "aff_unique_url": "https://www.tsinghua.edu.cn;http://www.whu.edu.cn/;http://www.bupt.edu.cn/;http://www.pku.edu.cn;;;https://www.cuhk.edu.hk;https://www.sjtu.edu.cn", "aff_unique_abbr": "THU;WHU;BUPT;Peking U;;;CUHK;SJTU", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Beijing;Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "China;" }, { "title": "Transformers to SSMs: Distilling Quadratic Knowledge to Subquadratic Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95984", "id": "FJlrSZBMCD", "proceeding": "", "pdf": "https://openreview.net/pdf?id=FJlrSZBMCD", "openreview": "https://openreview.net/forum?id=FJlrSZBMCD", "poster": "/media/PosterPDFs/NeurIPS%202024/95984.png?t=1731714697.8855808", "project": "", "author_site": "Aviv Bick, Kevin Li, Eric Xing, J. Zico Kolter, Albert Gu", "tldr": "", "abstract": "Transformer architectures have become a dominant paradigm for domains like language modeling but suffer in many inference settings due to their quadratic-time self-attention. Recently proposed subquadratic architectures, such as Mamba, have shown promise, but have been pretrained with substantially less computational resources than the strongest Transformer models. In this work, we present a method that is able to distill a pretrained Transformer architecture into alternative architectures such as state space models (SSMs). The key idea to our approach is that we can view both Transformers and SSMs as applying different forms of mixing matrices over the token sequences. We can thus progressively distill the Transformer architecture by matching different degrees of granularity in the SSM: first matching the mixing matrices themselves, then the hidden units at each block, and finally the end-to-end predictions. Our method, called MOHAWK, is able to distill a Mamba-2 variant based on the Phi-1.5 architecture (Phi-Mamba) using only 3B tokens. Despite using less than 1% of the training data typically used to train models from scratch, Phi-Mamba boasts substantially stronger performance compared to all past open-source non-Transformer models. MOHAWK allows models like SSMs to leverage computational resources invested in training Transformer-based architectures, highlighting a new avenue for building such models.", "keywords": "distillation;mamba;sub-quadratic;ssm;state-space-models", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Aviv Bick;Kevin Li;Eric P. Xing;J Zico Kolter;Albert Gu", "authorids": "~Aviv_Bick1;~Kevin_Li3;~Eric_Xing1;~J_Zico_Kolter1;~Albert_Gu1", "gender": "M;;M;M;M", "homepage": "https://avivbick.github.io;https://www.kevinyli.com;http://www.cs.cmu.edu/~epxing/;;http://www.zicokolter.com", "dblp": "317/9948;;36/3855;130/0612;67/2526", "google_scholar": "8QCKpT0AAAAJ;;https://scholar.google.com.tw/citations?user=5pKTRxEAAAAJ;DVCHv1kAAAAJ;UXh1I6UAAAAJ", "orcid": ";;;0000-0002-4946-6042;", "linkedin": ";;;;", "or_profile": "~Aviv_Bick1;~Kevin_Li3;~Eric_Xing1;~Albert_Gu1;~Zico_Kolter1", "aff": "Carnegie Mellon University;Carnegie Mellon University;School of Computer Science, Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "cmu.edu;cmu.edu;cs.cmu.edu;cmu.edu;cmu.edu", "position": "PhD student;PhD student;Full Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nbick2024transformers,\ntitle={Transformers to {SSM}s: Distilling Quadratic Knowledge to Subquadratic Models},\nauthor={Aviv Bick and Kevin Li and Eric P. Xing and J Zico Kolter and Albert Gu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=FJlrSZBMCD}\n}", "github": "", "reviewers": "Kdfj;2yXa;1qnq", "pdf_size": 534199, "rating": "5;6;6", "confidence": "4;4;4", "soundness": "3;3;3", "novelty": "3;2;3", "presentation": "2;1;2", "wc_summary": "72;18;92", "wc_strengths": "86;79;90", "wc_weaknesses": "234;276;338", "wc_questions": "3;2;24", "wc_limitations": "10;57;40", "wc_review": "405;432;584", "wc_reply_reviewers": "20;88;23", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 1.6666666666666667, 0.4714045207910317 ], "wc_summary_avg": [ 60.666666666666664, 31.25522178594945 ], "wc_strengths_avg": [ 85.0, 4.546060565661952 ], "wc_weaknesses_avg": [ 282.6666666666667, 42.71871824960211 ], "wc_questions_avg": [ 9.666666666666666, 10.143416036468626 ], "wc_limitations_avg": [ 35.666666666666664, 19.430788855719562 ], "wc_review_avg": [ 473.6666666666667, 78.79227260475625 ], "wc_reply_reviewers_avg": [ 43.666666666666664, 31.372316175606514 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8296114507467345691&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "cmu.edu;cmu.edu;cs.cmu.edu;cmu.edu;cmu.edu", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pittsburgh", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Efficient Prompt Optimization Through the Lens of Best Arm Identification", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95983", "id": "FLNnlfBGMo", "proceeding": "", "pdf": "https://openreview.net/pdf?id=FLNnlfBGMo", "openreview": "https://openreview.net/forum?id=FLNnlfBGMo", "poster": "", "project": "", "author_site": "Chengshuai Shi, Kun Yang, Zihan Chen, Jundong Li, Jing Yang, Cong Shen", "tldr": "", "abstract": "The remarkable instruction-following capability of large language models (LLMs) has sparked a growing interest in automatically finding good prompts, i.e., prompt optimization. Most existing works follow the scheme of selecting from a pre-generated pool of candidate prompts. However, these designs mainly focus on the generation strategy, while limited attention has been paid to the selection method. Especially, the cost incurred during the selection (e.g., accessing LLM and evaluating the responses) is rarely explicitly considered. To overcome this limitation, this work provides a principled framework, TRIPLE, to efficiently perform prompt selection under an explicit budget constraint. TRIPLE is built on a novel connection established between prompt optimization and fixed-budget best arm identification (BAI-FB) in multi-armed bandits (MAB); thus, it is capable of leveraging the rich toolbox from BAI-FB systematically and also incorporating unique characteristics of prompt optimization. Extensive experiments on multiple well-adopted tasks using various LLMs demonstrate the remarkable performance improvement of TRIPLE over baselines while satisfying the limited budget constraints. As an extension, variants of TRIPLE are proposed to efficiently select examples for few-shot prompts, also achieving superior empirical performance.", "keywords": "Prompt Optimization; Best-arm Identification; Limited Budget; Large Language Models", "primary_area": "bandits", "supplementary_material": "", "author": "Chengshuai Shi;Kun Yang;Zihan Chen;Jundong Li;Jing Yang;Cong Shen", "authorids": "~Chengshuai_Shi1;~Kun_Yang7;~Zihan_Chen5;~Jundong_Li2;~Jing_Yang3;~Cong_Shen1", "gender": "M;M;;M;;M", "homepage": "https://chengshuai-shi.github.io/;;;https://jundongli.github.io/;http://www.ee.psu.edu/yang;https://cshen317.github.io/", "dblp": "259/3938;;;144/7997.html;;79/6027-1.html", "google_scholar": "twvDiW8AAAAJ;-BzQrlgAAAAJ;;uY6ek7sAAAAJ;https://scholar.google.com/citations?hl=en;70LBhKcAAAAJ", "orcid": "0000-0002-2727-8251;;;;;0000-0002-3148-4453", "linkedin": ";;;;;cong-shen-3372404/", "or_profile": "~Chengshuai_Shi1;~Kun_Yang7;~Zihan_Chen5;~Jundong_Li2;~Jing_Yang3;~Cong_Shen1", "aff": "University of Virginia;University of Virginia, Charlottesville;;University of Virginia;Pennsylvania State University;University of Virginia", "aff_domain": "virginia.edu;virginia.edu;;virginia.edu;psu.edu;virginia.edu", "position": "PhD student;PhD student;;Assistant Professor;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nshi2024efficient,\ntitle={Efficient Prompt Optimization Through the Lens of Best Arm Identification},\nauthor={Chengshuai Shi and Kun Yang and Zihan Chen and Jundong Li and Jing Yang and Cong Shen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=FLNnlfBGMo}\n}", "github": "", "reviewers": "7VZU;gchr;cpR6;55GB", "pdf_size": 2087108, "rating": "5;6;6;7", "confidence": "4;4;4;4", "soundness": "2;3;2;3", "novelty": "2;3;2;3", "presentation": "1;3;3;4", "wc_summary": "50;108;50;95", "wc_strengths": "30;195;38;88", "wc_weaknesses": "264;241;91;176", "wc_questions": "35;33;25;8", "wc_limitations": "1;25;3;1", "wc_review": "380;602;207;368", "wc_reply_reviewers": "141;31;15;13", "wc_reply_authors": "118;19;34;21", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 75.75, 26.156978036462853 ], "wc_strengths_avg": [ 87.75, 65.78896184011418 ], "wc_weaknesses_avg": [ 193.0, 67.15281081235543 ], "wc_questions_avg": [ 25.25, 10.638961415476606 ], "wc_limitations_avg": [ 7.5, 10.136567466356647 ], "wc_review_avg": [ 389.25, 140.54781214946038 ], "wc_reply_reviewers_avg": [ 50.0, 53.0 ], "wc_reply_authors_avg": [ 48.0, 40.82278775390039 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11301010278078081243&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "virginia.edu;virginia.edu;;virginia.edu;psu.edu;virginia.edu", "author_num": 6, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "University of Virginia;Pennsylvania State University", "aff_unique_dep": ";", "aff_unique_url": "https://www.virginia.edu;https://www.psu.edu", "aff_unique_abbr": "UVA;PSU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Charlottesville", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "GuardT2I: Defending Text-to-Image Models from Adversarial Prompts", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95982", "id": "FMrNus3d0n", "proceeding": "", "pdf": "https://openreview.net/pdf?id=FMrNus3d0n", "openreview": "https://openreview.net/forum?id=FMrNus3d0n", "poster": "/media/PosterPDFs/NeurIPS%202024/95982.png?t=1730275970.441058", "project": "", "author_site": "Yijun Yang, Ruiyuan Gao, Xiao Yang, Jianyuan Zhong, Qiang Xu", "tldr": "", "abstract": "Recent advancements in Text-to-Image models have raised significant safety concerns about their potential misuse for generating inappropriate or Not-Safe-For-Work contents, despite existing countermeasures such as Not-Safe-For-Work classifiers or model fine-tuning for inappropriate concept removal. Addressing this challenge, our study unveils GuardT2I a novel moderation framework that adopts a generative approach to enhance Text-to-Image models\u2019 robustness against adversarial prompts. Instead of making a binary classification, GuardT2I utilizes a large language model to conditionally transform text guidance embeddings within the Text-to-Image models into natural language for effective adversarial prompt detection, without compromising the models\u2019 inherent performance. Our extensive experiments reveal that GuardT2I outperforms leading commercial solutions like OpenAI-Moderation and Microsoft Azure Moderator by a significant margin across diverse adversarial scenarios. Our framework is available at https://github.com/cure-lab/GuardT2I.", "keywords": "Text-to-Image generation", "primary_area": "generative_models", "supplementary_material": "", "author": "Yijun Yang;Ruiyuan Gao;Xiao Yang;Jianyuan Zhong;Qiang Xu", "authorids": "~Yijun_Yang5;~Ruiyuan_Gao2;~Xiao_Yang4;~Jianyuan_Zhong1;~Qiang_Xu1", "gender": ";;M;M;M", "homepage": ";;https://ml.cs.tsinghua.edu.cn/~xiaoyang/;;https://github.com/cure-lab", "dblp": ";;57/33851;239/5133;43/1230-1", "google_scholar": ";;bwkwp0MAAAAJ;LbLMWaAAAAAJ;https://scholar.google.com.tw/citations?user=eSiKPqUAAAAJ", "orcid": ";;0000-0001-9502-9962;;", "linkedin": ";;;;", "or_profile": "~Yijun_Yang5;~Ruiyuan_Gao2;~Xiao_Yang4;~Jianyuan_Zhong1;~Qiang_Xu1", "aff": ";;Tsinghua University;Department of Computer Science and Engineering, The Chinese University of Hong Kong;The Chinese University of Hong Kong", "aff_domain": ";;mail.tsinghua.edu.cn;cse.cuhk.edu.hk;cuhk.edu.hk", "position": ";;Postdoc;PhD student;Full Professor", "bibtex": "@inproceedings{\nyang2024guardti,\ntitle={GuardT2I: Defending Text-to-Image Models from Adversarial Prompts},\nauthor={Yijun Yang and Ruiyuan Gao and Xiao Yang and Jianyuan Zhong and Qiang Xu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=FMrNus3d0n}\n}", "github": "", "reviewers": "EP5J;7kN6;1Dsg;1mLf", "pdf_size": 3346416, "rating": "5;5;7;7", "confidence": "4;4;5;4", "soundness": "3;2;3;3", "novelty": "2;2;3;3", "presentation": "3;2;3;3", "wc_summary": "56;35;68;50", "wc_strengths": "37;30;44;72", "wc_weaknesses": "333;44;23;291", "wc_questions": "2;19;29;37", "wc_limitations": "1;1;2;5", "wc_review": "429;129;166;455", "wc_reply_reviewers": "0;9;45;45", "wc_reply_authors": "39;24;71;45", "reply_reviewers": "0;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 52.25, 11.882234638316145 ], "wc_strengths_avg": [ 45.75, 15.943258763502524 ], "wc_weaknesses_avg": [ 172.75, 140.23618470280772 ], "wc_questions_avg": [ 21.75, 13.06474263045392 ], "wc_limitations_avg": [ 2.25, 1.6393596310755 ], "wc_review_avg": [ 294.75, 148.1154532788527 ], "wc_reply_reviewers_avg": [ 24.75, 20.498475553074673 ], "wc_reply_authors_avg": [ 44.75, 16.97608612136496 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11480574566206138671&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": ";;mail.tsinghua.edu.cn;cse.cuhk.edu.hk;cuhk.edu.hk", "author_num": 5, "aff_unique_index": "0;1;1", "aff_unique_norm": "Tsinghua University;Chinese University of Hong Kong", "aff_unique_dep": ";Department of Computer Science and Engineering", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.cuhk.edu.hk", "aff_unique_abbr": "THU;CUHK", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Few-shot Algorithms for Consistent Neural Decoding (FALCON) Benchmark", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97780", "id": "FN02v4nD8y", "proceeding": "", "pdf": "https://openreview.net/pdf?id=FN02v4nD8y", "openreview": "https://openreview.net/forum?id=FN02v4nD8y", "poster": "", "project": "", "author_site": "Brianna Karpowicz, Joel Ye, Chaofei Fan, Pablo Tostado-Marcos, Fabio Rizzoglio, Clayton Washington, Thiago Scodeler, Diogo de Lucena, Samuel Nason-Tomaszewski, Matthew Mender, Xuan Ma, Ezequiel Arneodo, Leigh Hochberg, Cynthia Chestek, Jaimie Henderson, Timothy Gentner, Vikash Gilja, Lee Miller, Adam Rouse, Robert Gaunt, Jennifer Collinger, Chethan Pandarinath", "tldr": "", "abstract": "Intracortical brain-computer interfaces (iBCIs) can restore movement and communication abilities to individuals with paralysis by decoding their intended behavior from neural activity recorded with an implanted device. While this activity yields high-performance decoding over short timescales, neural data is often nonstationary, which can lead to decoder failure if not accounted for. To maintain performance, users must frequently recalibrate decoders, which requires the arduous collection of new neural and behavioral data. Aiming to reduce this burden, several approaches have been developed that either limit recalibration data requirements (few-shot approaches) or eliminate explicit recalibration entirely (zero-shot approaches). However, progress is limited by a lack of standardized datasets and comparison metrics, causing methods to be compared in an ad hoc manner. Here we introduce the FALCON benchmark suite (Few-shot Algorithms for COnsistent Neural decoding) to standardize evaluation of iBCI robustness. FALCON curates five datasets of neural and behavioral data that span movement and communication tasks to focus on behaviors of interest to modern-day iBCIs. Each dataset includes calibration data, optional few-shot recalibration data, and private evaluation data. We implement a flexible evaluation platform which only requires user-submitted code to return behavioral predictions on unseen data. We also seed the benchmark by applying baseline methods spanning several classes of possible approaches. FALCON aims to provide rigorous selection criteria for robust iBCI decoders, easing their translation to real-world devices. https://snel-repo.github.io/falcon/", "keywords": "brain computer interfaces;neural stability;few shot learning;benchmark", "primary_area": "", "supplementary_material": "", "author": "Brianna M. Karpowicz;Joel Ye;Chaofei Fan;Pablo Tostado-Marcos;Fabio Rizzoglio;Clayton B Washington;Thiago Scodeler;Diogo S de Lucena;Samuel R. Nason-Tomaszewski;Matthew Mender;Xuan Ma;Ezequiel Matias Arneodo;Leigh Hochberg;Cynthia Chestek;Jaimie M. Henderson;Timothy Q Gentner;Vikash Gilja;Lee E. Miller;Adam G. Rouse;Robert Gaunt;Jennifer L Collinger;Chethan Pandarinath", "authorids": "~Brianna_M._Karpowicz1;~Joel_Ye1;~Chaofei_Fan1;~Pablo_Tostado-Marcos1;~Fabio_Rizzoglio1;~Clayton_B_Washington1;~Thiago_Scodeler1;~Diogo_S_de_Lucena1;~Samuel_R._Nason-Tomaszewski1;~Matthew_Mender1;~Xuan_Ma2;~Ezequiel_Matias_Arneodo1;~Leigh_Hochberg1;~Cynthia_Chestek1;~Jaimie_M._Henderson1;~Timothy_Q_Gentner1;~Vikash_Gilja1;~Lee_E._Miller1;~Adam_G._Rouse1;~Robert_Gaunt1;~Jennifer_L_Collinger1;~Chethan_Pandarinath1", "gender": "F;M;;M;M;M;M;M;M;M;M;M;M;F;M;M;M;M;M;M;F;M", "homepage": ";https://joel99.github.io;https://fan.chaofei.me;https://pablotostado.com/;;;;;;;;;https://www.braingate.org;http://chestekresearch.engin.umich.edu/;https://profiles.stanford.edu/jaimie-henderson;http://Gentnerlab.ucsd.edu;;https://miller-limblab.squarespace.com/;https://www.kumc.edu/arouse.html;https://www.rnel.pitt.edu;https://www.rnel.pitt.edu;http://snel.gatech.edu", "dblp": ";;267/9685;;;;;;;;;;118/9329;;;;81/163;;;;;", "google_scholar": "VwGDFEQAAAAJ;CUrST4oAAAAJ;YM4x068AAAAJ;Us6MpYoAAAAJ;;https://scholar.google.com/citations?hl=en;;;https://scholar.google.com/citations?hl=en;;;yrSjCGwAAAAJ;37DEyrUAAAAJ;;Qi2OyV0AAAAJ;;https://scholar.google.com/citations?hl=en;24i8cx8AAAAJ;6GD9SU4AAAAJ;lAoUqf8AAAAJ;;M3-z9G4AAAAJ", "orcid": ";;;0000-0002-9539-9995;0000-0002-6744-4605;;0009-0007-4599-3869;0000-0001-6786-6907;0000-0002-7127-0986;0000-0003-1562-3289;0000-0003-3352-1905;0000-0002-7125-4919;0000-0003-0261-2273;;0000-0002-3276-2267;;;;0000-0003-3785-1442;0000-0001-6202-5818;0000-0002-4517-5395;0000-0003-1241-1432", "linkedin": ";joelye/;;pablo-tostado-marcos/;;www.linkedin.com/in/clayton-washington;thiagoscodeler;diogo-de-lucena/;;;;ezequiel-m-arneodo/;;;;;vikash-gilja-5b57227/;;;;;", "or_profile": "~Brianna_M._Karpowicz1;~Joel_Ye1;~Chaofei_Fan1;~Pablo_Tostado-Marcos1;~Fabio_Rizzoglio1;~Clayton_B_Washington1;~Thiago_Scodeler1;~Diogo_S_de_Lucena1;~Samuel_R._Nason-Tomaszewski1;~Matthew_Mender1;~Xuan_Ma2;~Ezequiel_Matias_Arneodo1;~Leigh_Hochberg1;~Cynthia_Chestek1;~Jaimie_M._Henderson1;~Timothy_Q_Gentner1;~Vikash_Gilja1;~Lee_E._Miller1;~Adam_G._Rouse1;~Robert_Gaunt1;~Jennifer_L_Collinger1;~Chethan_Pandarinath1", "aff": "Emory University;Carnegie Mellon University;Stanford University;University of California, San Diego;Northwestern University;Ohio State University, Columbus;Agency Enterprise Studio;Agency Enterprise Studio;Emory University;University of Michigan - Ann Arbor;Northwestern University;University of California, San Diego;Brown University;University of Michigan - Ann Arbor;Stanford University;University of California, San Diego;University of California, San Diego;Northwestern University;University of Kansas School of Medicine;University of Pittsburgh;University of Pittsburgh;Georgia Institute of Technology", "aff_domain": "emory.edu;cmu.edu;stanford.edu;ucsd.edu;northwestern.edu;osu.edu;ae.studio;ae.studio;emory.edu;umich.edu;northwestern.edu;ucsd.edu;brown.edu;umich.edu;stanford.edu;ucsd.edu;ucsd.edu;northwestern.edu;kumc.edu;pitt.edu;pitt.edu;gatech.edu", "position": "PhD student;PhD student;PhD student;Postdoc;Postdoc;Undergrad student;Researcher;Researcher;Postdoc;PhD student;Researcher;Researcher;Full Professor;Full Professor;Full Professor;Full Professor;Associate Professor;Full Professor;Assistant Professor;Associate Professor;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nkarpowicz2024fewshot,\ntitle={Few-shot Algorithms for Consistent Neural Decoding ({FALCON}) Benchmark},\nauthor={Brianna M. Karpowicz and Joel Ye and Chaofei Fan and Pablo Tostado-Marcos and Fabio Rizzoglio and Clayton B Washington and Thiago Scodeler and Diogo S de Lucena and Samuel R. Nason-Tomaszewski and Matthew Mender and Xuan Ma and Ezequiel Matias Arneodo and Leigh Hochberg and Cynthia Chestek and Jaimie M. Henderson and Timothy Q Gentner and Vikash Gilja and Lee E. Miller and Adam G. Rouse and Robert Gaunt and Jennifer L Collinger and Chethan Pandarinath},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=FN02v4nD8y}\n}", "github": "", "reviewers": "emfG;pDQb;N3aA;dJra;zXpq", "pdf_size": 3343370, "rating": "5;6;6;8;8", "confidence": "4;4;4;4;4", "wc_summary_and_contributions": "85;93;62;76;116", "wc_strengths": "110;78;2;19;30", "wc_improvement": "123;432;98;107;265", "wc_limitations": "143;10;30;31;1", "wc_correctness": "105;15;28;6;1", "wc_clarity": "90;118;22;2;2", "wc_relation_to_prior_work": "101;7;20;2;8", "wc_documentation": "38;23;64;10;28", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "796;777;327;254;452", "wc_reply_reviewers": "0;609;295;0;111", "wc_reply_authors": "0;1160;393;0;19", "reply_reviewers": "0;3;1;0;1", "reply_authors": "1;5;2;1;2", "rating_avg": [ 6.6, 1.2 ], "confidence_avg": [ 4.0, 0.0 ], "wc_summary_and_contributions_avg": [ 86.4, 18.0288657435791 ], "wc_strengths_avg": [ 47.8, 40.061952024333515 ], "wc_improvement_avg": [ 205.0, 128.7680084493039 ], "wc_limitations_avg": [ 43.0, 51.31471523841869 ], "wc_correctness_avg": [ 31.0, 38.12086043100287 ], "wc_clarity_avg": [ 46.8, 48.093242768605236 ], "wc_relation_to_prior_work_avg": [ 27.6, 37.17310855981781 ], "wc_documentation_avg": [ 32.6, 18.10635247641004 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 521.2, 225.76394752041347 ], "wc_reply_reviewers_avg": [ 203.0, 229.8616975487652 ], "wc_reply_authors_avg": [ 314.4, 448.59184120980177 ], "reply_reviewers_avg": [ 1.0, 1.0954451150103321 ], "reply_authors_avg": [ 2.2, 1.4696938456699071 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 22, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6727197002309595306&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "emory.edu;cmu.edu;stanford.edu;ucsd.edu;northwestern.edu;osu.edu;ae.studio;ae.studio;emory.edu;umich.edu;northwestern.edu;ucsd.edu;brown.edu;umich.edu;stanford.edu;ucsd.edu;ucsd.edu;northwestern.edu;kumc.edu;pitt.edu;pitt.edu;gatech.edu", "author_num": 22, "aff_unique_index": "0;1;2;3;4;5;6;6;0;7;4;3;8;7;2;3;3;4;9;10;10;11", "aff_unique_norm": "Emory University;Carnegie Mellon University;Stanford University;University of California, San Diego;Northwestern University;Ohio State University;Agency Enterprise Studio;University of Michigan;Brown University;University of Kansas;University of Pittsburgh;Georgia Institute of Technology", "aff_unique_dep": ";;;;;;;;;School of Medicine;;", "aff_unique_url": "https://www.emory.edu;https://www.cmu.edu;https://www.stanford.edu;https://www.ucsd.edu;https://www.northwestern.edu;https://www.osu.edu;;https://www.umich.edu;https://www.brown.edu;https://www.kumc.edu;https://www.pitt.edu;https://www.gatech.edu", "aff_unique_abbr": "Emory;CMU;Stanford;UCSD;NU;OSU;;UM;Brown;KU School of Medicine;Pitt;Georgia Tech", "aff_campus_unique_index": "1;2;3;4;2;4;1;2;2;5", "aff_campus_unique": ";Stanford;San Diego;Columbus;Ann Arbor;Kansas City", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States;" }, { "title": "Stabilizing Linear Passive-Aggressive Online Learning with Weighted Reservoir Sampling", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95981", "id": "FNOBf6JM7r", "proceeding": "", "pdf": "https://openreview.net/pdf?id=FNOBf6JM7r", "openreview": "https://openreview.net/forum?id=FNOBf6JM7r", "poster": "", "project": "", "author_site": "Skyler Wu, Fred Lu, Edward Raff, James Holt", "tldr": "", "abstract": "Online learning methods, like the seminal Passive-Aggressive (PA) classifier, are still highly effective for high-dimensional streaming data, out-of-core processing, and other throughput-sensitive applications. Many such algorithms rely on fast adaptation to individual errors as a key to their convergence. While such algorithms enjoy low theoretical regret, in real-world deployment they can be sensitive to individual outliers that cause the algorithm to over-correct. When such outliers occur at the end of the data stream, this can cause the final solution to have unexpectedly low accuracy. We design a weighted reservoir sampling (WRS) approach to obtain a stable ensemble model from the sequence of solutions without requiring additional passes over the data, hold-out sets, or a growing amount of memory. Our key insight is that good solutions tend to be error-free for more iterations than bad solutions, and thus, the number of passive rounds provides an estimate of a solution's relative quality. Our reservoir thus contains $K$ previous intermediate weight vectors with high survival times. We demonstrate our WRS approach on the Passive-Aggressive Classifier (PAC) and First-Order Sparse Online Learning (FSOL), where our method consistently and significantly outperforms the unmodified approach. We show that the risk of the ensemble classifier is bounded with respect to the regret of the underlying online learning method.", "keywords": "online learning;passive aggressive;weighted reservoir sampling;stability", "primary_area": "online_learning", "supplementary_material": "", "author": "Skyler Wu;Fred Lu;Edward Raff;James Holt", "authorids": "~Skyler_Wu1;~Fred_Lu1;~Edward_Raff1;~James_Holt1", "gender": "M;;M;M", "homepage": ";;http://www.edwardraff.com/;", "dblp": ";;204/3369;93/1248", "google_scholar": ";8BjErXQAAAAJ;debM2bUAAAAJ;GtVgGjkAAAAJ", "orcid": ";0000-0003-1026-5734;0000-0002-9900-1972;0000-0002-6368-8696", "linkedin": "skylerwu/;fl16180;edward-raff-09992040/;jeholt/", "or_profile": "~Skyler_Wu1;~Fred_Lu1;~Edward_Raff1;~James_Holt1", "aff": "Harvard University;Booz Allen Hamilton;Booz Allen Hamilton;Laboratory for Physical Sciences", "aff_domain": "harvard.edu;bah.com;boozallen.com;umd.edu", "position": "Undergrad student;Researcher;Principal Researcher;Principal Researcher", "bibtex": "@inproceedings{\nwu2024stabilizing,\ntitle={Stabilizing Linear Passive-Aggressive Online Learning with Weighted Reservoir Sampling},\nauthor={Skyler Wu and Fred Lu and Edward Raff and James Holt},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=FNOBf6JM7r}\n}", "github": "", "reviewers": "v2Wj;NvJo;eWmu;m761;We6T", "pdf_size": 18047437, "rating": "3;5;6;6;7", "confidence": "4;4;3;3;3", "soundness": "1;3;3;3;3", "novelty": "2;2;3;3;4", "presentation": "2;2;3;3;3", "wc_summary": "124;269;71;119;33", "wc_strengths": "37;80;31;184;50", "wc_weaknesses": "538;202;23;82;41", "wc_questions": "2;578;31;48;46", "wc_limitations": "7;4;1;1;17", "wc_review": "708;1133;157;434;187", "wc_reply_reviewers": "739;34;16;0;33", "wc_reply_authors": "877;680;38;615;294", "reply_reviewers": "1;1;1;0;1", "reply_authors": "3;3;2;1;2", "rating_avg": [ 5.4, 1.3564659966250536 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 2.6, 0.8000000000000002 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 123.2, 80.17081763335085 ], "wc_strengths_avg": [ 76.4, 56.39361666004407 ], "wc_weaknesses_avg": [ 177.2, 190.88362947094234 ], "wc_questions_avg": [ 141.0, 219.1182329246017 ], "wc_limitations_avg": [ 6.0, 5.932958789676531 ], "wc_review_avg": [ 523.8, 363.6192514155432 ], "wc_reply_reviewers_avg": [ 164.4, 287.5709303806628 ], "wc_reply_authors_avg": [ 500.8, 297.8693673407858 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 2.2, 0.7483314773547882 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8427009716003845, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=536893051634864351&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 3, "email": "harvard.edu;bah.com;boozallen.com;umd.edu", "author_num": 4, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Harvard University;Booz Allen Hamilton;Laboratory for Physical Sciences", "aff_unique_dep": ";;", "aff_unique_url": "https://www.harvard.edu;https://www.boozallen.com;", "aff_unique_abbr": "Harvard;BAH;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Pruning neural network models for gene regulatory dynamics using data and domain knowledge", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95980", "id": "FNtsZLwkGr", "proceeding": "", "pdf": "https://openreview.net/pdf?id=FNtsZLwkGr", "openreview": "https://openreview.net/forum?id=FNtsZLwkGr", "poster": "", "project": "", "author_site": "Intekhab Hossain, Jonas Fischer, Rebekka Burkholz, John Quackenbush", "tldr": "", "abstract": "The practical utility of machine learning models in the sciences often hinges on their interpretability. It is common to assess a model's merit for scientific discovery, and thus novel insights, by how well it aligns with already available domain knowledge - a dimension that is currently largely disregarded in the comparison of neural network models. While pruning can simplify deep neural network architectures and excels in identifying sparse models, as we show in the context of gene regulatory network inference, state-of-the-art techniques struggle with biologically meaningful structure learning. To address this issue, we propose DASH, a generalizable framework that guides network pruning by using domain-specific structural information in model fitting and leads to sparser, better interpretable models that are more robust to noise. Using both synthetic data with ground truth information, as well as real-world gene expression data, we show that DASH, using knowledge about gene interaction partners within the putative regulatory network, outperforms general pruning methods by a large margin and yields deeper insights into the biological systems being studied.", "keywords": "neural network pruning;sparsification;domain knowledge;gene regulation", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "", "author": "Intekhab Hossain;Jonas Fischer;Rebekka Burkholz;John Quackenbush", "authorids": "~Intekhab_Hossain1;~Jonas_Fischer1;~Rebekka_Burkholz1;~John_Quackenbush1", "gender": "M;;F;", "homepage": ";;https://sites.google.com/view/rebekkaburkholz/startseite;", "dblp": "371/9945;;194/3172;", "google_scholar": "Oz9gwRwAAAAJ;;https://scholar.google.ch/citations?user=vkWBb2wAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0003-0895-8128;;;0000-0002-2702-5879", "linkedin": "intekhabhossain;;;john-quackenbush/", "or_profile": "~Intekhab_Hossain1;~Jonas_Fischer1;~Rebekka_Burkholz1;~John_Quackenbush1", "aff": "Harvard University, Harvard University;;Helmholtz Center CISPA for Information Security;Dana-Farber Cancer Institute", "aff_domain": "g.harvard.edu;;cispa.saarland;dfci.org", "position": "PhD student;;Associate Professor;Professor", "bibtex": "@inproceedings{\nhossain2024pruning,\ntitle={Pruning neural network models for gene regulatory dynamics using data and domain knowledge},\nauthor={Intekhab Hossain and Jonas Fischer and Rebekka Burkholz and John Quackenbush},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=FNtsZLwkGr}\n}", "github": "", "reviewers": "QJVK;m1mG;pC8f;YHXc;GCM5", "pdf_size": 14656586, "rating": "3;4;5;5;5", "confidence": "4;4;3;4;4", "soundness": "2;2;3;3;3", "novelty": "2;3;3;2;2", "presentation": "2;3;3;4;2", "wc_summary": "130;37;25;41;165", "wc_strengths": "185;37;25;24;208", "wc_weaknesses": "864;128;25;22;164", "wc_questions": "209;89;108;94;237", "wc_limitations": "16;1;35;6;116", "wc_review": "1404;292;218;187;890", "wc_reply_reviewers": "153;204;83;0;0", "wc_reply_authors": "86;928;189;0;0", "reply_reviewers": "1;1;1;0;0", "reply_authors": "2;3;3;1;1", "rating_avg": [ 4.4, 0.7999999999999999 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 79.6, 56.77886930892513 ], "wc_strengths_avg": [ 95.8, 82.66897846229867 ], "wc_weaknesses_avg": [ 240.6, 316.68381707943337 ], "wc_questions_avg": [ 147.4, 62.669290725202885 ], "wc_limitations_avg": [ 34.8, 42.23458298598436 ], "wc_review_avg": [ 598.2, 477.88216120713275 ], "wc_reply_reviewers_avg": [ 88.0, 81.47883160674311 ], "wc_reply_authors_avg": [ 240.6, 350.6734093141366 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 2.0, 0.8944271909999159 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.37500000000000006, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:J8ewfEcmKxIJ:scholar.google.com/&scioq=Pruning+neural+network+models+for+gene+regulatory+dynamics+using+data+and+domain+knowledge&hl=en&as_sdt=0,33", "gs_version_total": 5, "email": "g.harvard.edu;;cispa.saarland;dfci.org", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "Harvard University;Helmholtz Center CISPA;Dana-Farber Cancer Institute", "aff_unique_dep": ";Information Security;", "aff_unique_url": "https://www.harvard.edu;https://www.cispa.de/;https://www.dana-farber.org", "aff_unique_abbr": "Harvard;CISPA;DFCI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;Germany" }, { "title": "DiffusionFake: Enhancing Generalization in Deepfake Detection via Guided Stable Diffusion", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95979", "id": "FNzpVTpNbN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=FNzpVTpNbN", "openreview": "https://openreview.net/forum?id=FNzpVTpNbN", "poster": "/media/PosterPDFs/NeurIPS%202024/95979.png?t=1731306665.7917924", "project": "", "author_site": "ke sun, Shen Chen, Taiping Yao, Hong Liu, Xiaoshuai Sun, Shouhong Ding, Rongrong Ji", "tldr": "", "abstract": "The rapid progress of Deepfake technology has made face swapping highly realistic, raising concerns about the malicious use of fabricated facial content. Existing methods often struggle to generalize to unseen domains due to the diverse nature of facial manipulations. In this paper, we revisit the generation process and identify a universal principle: Deepfake images inherently contain information from both source and target identities, while genuine faces maintain a consistent identity. Building upon this insight, we introduce DiffusionFake, a novel plug-and-play framework that reverses the generative process of face forgeries to enhance the generalization of detection models. DiffusionFake achieves this by injecting the features extracted by the detection model into a frozen pre-trained Stable Diffusion model, compelling it to reconstruct the corresponding target and source images. This guided reconstruction process constrains the detection network to capture the source and target related features to facilitate the reconstruction, thereby learning rich and disentangled representations that are more resilient to unseen forgeries. Extensive experiments demonstrate that DiffusionFake significantly improves cross-domain generalization of various detector architectures without introducing additional parameters during inference. The code are available in https://github.com/skJack/DiffusionFake.git.", "keywords": "Deepfake Detection;Face forgery Detection;Stable Diffusion", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/df156f8197d7e2973e0afc803a3c4997f0a18f38.zip", "author": "Ke Sun;Shen Chen;Taiping Yao;Hong Liu;Xiaoshuai Sun;Shouhong Ding;Rongrong Ji", "authorids": "~Ke_Sun7;~Shen_Chen1;~Taiping_Yao2;~Hong_Liu9;~Xiaoshuai_Sun3;~Shouhong_Ding3;~Rongrong_Ji5", "gender": "M;M;M;Non-Binary;M;M;M", "homepage": ";https://chenshen.xyz/;https://sndler.github.io/;https://lynnhongliu.github.io/hliu/;https://sites.google.com/view/xssun;;http://mac.xmu.edu.cn/rrji-en.html", "dblp": ";;226/6518;29/5010-9;26/5787.html;119/6735;86/5681", "google_scholar": "r9_7F_EAAAAJ;xMpvoLMAAAAJ;qkpaPuAAAAAJ;BC7N2dYAAAAJ;KPMK3B4AAAAJ;OGf40fkAAAAJ;", "orcid": ";;;0000-0001-5318-6388;0000-0003-3912-9306;0000-0002-3175-3553;", "linkedin": ";;;;;;", "or_profile": "~Ke_Sun7;~Shen_Chen1;~Taiping_Yao2;~Hong_Liu9;~Xiaoshuai_Sun3;~Shouhong_Ding3;~Rongrong_Ji5", "aff": "Xiamen University;Tencent YouTu Lab;Tencent Youtu Lab;Osaka University, Tokyo Institute of Technology;Xiamen University;Tencent Youtu Lab;Xiamen University", "aff_domain": "xmu.edu.cn;tencent.com;tencent.com;osaka-u.ac.jp;xmu.edu.cn;tencent.com;xmu.edu.cn", "position": "PhD student;Researcher;researcher;Assistant Professor;Associate Professor;researcher;Full Professor", "bibtex": "@inproceedings{\nsun2024diffusionfake,\ntitle={DiffusionFake: Enhancing Generalization in Deepfake Detection via Guided Stable Diffusion},\nauthor={Ke Sun and Shen Chen and Taiping Yao and Hong Liu and Xiaoshuai Sun and Shouhong Ding and Rongrong Ji},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=FNzpVTpNbN}\n}", "github": "", "reviewers": "NDSD;b2gp;VbH6;tkET", "pdf_size": 10858194, "rating": "5;7;7;7", "confidence": "5;5;5;3", "soundness": "2;3;3;2", "novelty": "2;3;3;3", "presentation": "2;4;3;3", "wc_summary": "72;63;37;44", "wc_strengths": "12;96;71;56", "wc_weaknesses": "85;91;214;83", "wc_questions": "33;49;21;160", "wc_limitations": "30;7;15;121", "wc_review": "232;306;358;464", "wc_reply_reviewers": "14;48;0;21", "wc_reply_authors": "119;132;126;115", "reply_reviewers": "1;1;0;1", "reply_authors": "3;3;3;3", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 4.5, 0.8660254037844386 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 54.0, 14.089002803605371 ], "wc_strengths_avg": [ 58.75, 30.53993287484437 ], "wc_weaknesses_avg": [ 118.25, 55.35961976025486 ], "wc_questions_avg": [ 65.75, 55.31444205630208 ], "wc_limitations_avg": [ 43.25, 45.641948906680135 ], "wc_review_avg": [ 340.0, 84.4393273303382 ], "wc_reply_reviewers_avg": [ 20.75, 17.455300054711177 ], "wc_reply_authors_avg": [ 123.0, 6.519202405202649 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.0, 0.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8672459791455160683&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 4, "email": "xmu.edu.cn;tencent.com;tencent.com;osaka-u.ac.jp;xmu.edu.cn;tencent.com;xmu.edu.cn", "author_num": 7, "aff_unique_index": "0;1;1;2;0;1;0", "aff_unique_norm": "Xiamen University;Tencent;Osaka University", "aff_unique_dep": ";YouTu Lab;", "aff_unique_url": "https://www.xmu.edu.cn;https://www.tencent.com;https://www.osaka-u.ac.jp", "aff_unique_abbr": "XMU;Tencent;Osaka U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;0;0", "aff_country_unique": "China;Japan" }, { "title": "Using Surrogates in Covariate-adjusted Response-adaptive Randomization Experiments with Delayed Outcomes", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95978", "id": "FOTMgW8w5t", "proceeding": "", "pdf": "https://openreview.net/pdf?id=FOTMgW8w5t", "openreview": "https://openreview.net/forum?id=FOTMgW8w5t", "poster": "/media/PosterPDFs/NeurIPS%202024/95978.png?t=1731356116.7748384", "project": "", "author_site": "Lei Shi, Waverly Wei, Jingshen Wang", "tldr": "", "abstract": "Covariate-adjusted response-adaptive randomization (CARA) designs are gaining increasing attention. These designs combine the advantages of randomized experiments with the ability to adaptively revise treatment allocations based on data collected across multiple stages, enhancing estimation efficiency. Yet, CARA designs often assume that primary outcomes are immediately observable, which is not the case in many clinical scenarios where there is a delay in observing primary outcomes. This assumption can lead to significant missingness and inefficient estimation of treatment effects. To tackle this practical challenge, we propose a CARA experimental strategy integrating delayed primary outcomes with immediately observed surrogate outcomes. Surrogate outcomes are intermediate clinical outcomes that are predictive or correlated with the primary outcome of interest. Our design goal is to improve the estimation efficiency of the average treatment effect (ATE) of the primary outcome utilizing surrogate outcomes. From a methodological perspective, our approach offers two benefits: First, we accommodate arm and covariates-dependent delay mechanisms without imposing any parametric modeling assumptions on the distribution of outcomes. Second, when primary outcomes are not fully observed, surrogate outcomes can guide the adaptive treatment allocation rule. From a theoretical standpoint, we prove the semiparametric efficiency bound of estimating ATE under delayed primary outcomes while incorporating surrogate outcomes. We show that the ATE estimator under our proposed design strategy attains this semiparametric efficiency bound and achieves asymptotic normality. Through theoretical investigations and a synthetic HIV study, we show that our design is more efficient than the design without incorporating any surrogate information.", "keywords": "Covariate-adjusted response-adaptive randomization design;Response-adaptive randomization design;Surrogate biomarker;Causal inference", "primary_area": "causal_inference", "supplementary_material": "/attachment/379978acbf33be04bef2cbbfa5f81105314b4455.zip", "author": "Lei Shi;Waverly Wei;Jingshen Wang", "authorids": "~Lei_Shi12;~Waverly_Wei1;~Jingshen_Wang1", "gender": "M;;", "homepage": "https://leishi-rocks.github.io/index.html;;https://sites.google.com/berkeley.edu/jingshenwang/", "dblp": ";;", "google_scholar": "https://scholar.google.com/citations?hl=en;;", "orcid": "0000-0002-5239-932X;;", "linkedin": "lei-shi-51837a222/;;", "or_profile": "~Lei_Shi12;~Waverly_Wei1;~Jingshen_Wang1", "aff": "University of California, Berkeley;;University of California, Berkeley", "aff_domain": "berkeley.edu;;berkeley.edu", "position": "PhD student;;Assistant Professor", "bibtex": "@inproceedings{\nshi2024using,\ntitle={Using Surrogates in Covariate-adjusted Response-adaptive Randomization Experiments with Delayed Outcomes},\nauthor={Lei Shi and Waverly Wei and Jingshen Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=FOTMgW8w5t}\n}", "github": "", "reviewers": "32pT;oWF4;GAcb;AMr4;Xjru", "pdf_size": 1436117, "rating": "5;6;6;6;7", "confidence": "2;3;3;3;4", "soundness": "3;3;3;3;3", "novelty": "3;2;3;2;3", "presentation": "3;3;1;3;4", "wc_summary": "82;144;69;108;124", "wc_strengths": "118;56;104;102;124", "wc_weaknesses": "190;100;139;709;71", "wc_questions": "114;172;1;175;65", "wc_limitations": "6;1;1;1;5", "wc_review": "510;473;314;1095;389", "wc_reply_reviewers": "0;548;0;444;24", "wc_reply_authors": "0;530;0;521;0", "reply_reviewers": "0;3;0;2;1", "reply_authors": "1;3;1;3;1", "rating_avg": [ 6.0, 0.6324555320336759 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.9797958971132712 ], "wc_summary_avg": [ 105.4, 27.25876005984131 ], "wc_strengths_avg": [ 100.8, 23.886397802933786 ], "wc_weaknesses_avg": [ 241.8, 236.98388130841303 ], "wc_questions_avg": [ 105.4, 66.1592019298903 ], "wc_limitations_avg": [ 2.8, 2.2271057451320084 ], "wc_review_avg": [ 556.2, 277.8412496372704 ], "wc_reply_reviewers_avg": [ 203.2, 241.48076527955595 ], "wc_reply_authors_avg": [ 210.2, 257.4571032230418 ], "reply_reviewers_avg": [ 1.2, 1.16619037896906 ], "reply_authors_avg": [ 1.8, 0.9797958971132713 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.9999999999999999, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11042700393638385269&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 0, "email": "berkeley.edu;;berkeley.edu", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "TuneTables: Context Optimization for Scalable Prior-Data Fitted Networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95977", "id": "FOfU3qhcIG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=FOfU3qhcIG", "openreview": "https://openreview.net/forum?id=FOfU3qhcIG", "poster": "/media/PosterPDFs/NeurIPS%202024/95977.png?t=1732542030.6902378", "project": "", "author_site": "Benjamin Feuer, Robin Schirrmeister, Valeriia Cherepanova, Chinmay Hegde, Frank Hutter, Micah Goldblum, Niv Cohen, Colin White", "tldr": "", "abstract": "While tabular classification has traditionally relied on from-scratch training, a recent breakthrough called prior-data fitted networks (PFNs) challenges this approach. Similar to large language models, PFNs make use of pretraining and in-context learning to achieve strong performance on new tasks in a single forward pass. However, current PFNs have limitations that prohibit their widespread adoption. Notably, TabPFN achieves very strong performance on small tabular datasets but is not designed to make predictions for datasets of size larger than 1000. In this work, we overcome these limitations and substantially improve the performance of PFNs via context optimization. We introduce TuneTables, a parameter-efficient fine-tuning strategy for PFNs that compresses large datasets into a smaller learned context. We conduct extensive experiments on nineteen algorithms over 98 datasets and find that TuneTables achieves the best performance on average, outperforming boosted trees such as CatBoost, while optimizing fewer than 5\\% of TabPFN's parameters. Furthermore, we show that TuneTables can be used as an interpretability tool and can even be used to mitigate biases by optimizing a fairness objective.", "keywords": "tabular data;prior-data fitted networks", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Benjamin Feuer;Robin Tibor Schirrmeister;Valeriia Cherepanova;Chinmay Hegde;Frank Hutter;Micah Goldblum;Niv Cohen;Colin White", "authorids": "~Benjamin_Feuer1;~Robin_Tibor_Schirrmeister1;~Valeriia_Cherepanova1;~Chinmay_Hegde1;~Frank_Hutter1;~Micah_Goldblum1;~Niv_Cohen1;~Colin_White1", "gender": "M;M;F;M;M;;M;M", "homepage": "https://penfever.github.io/;;https://www.vcherepanova.com/;https://chinmayhegde.github.io/;http://ml.informatik.uni-freiburg.de/~hutter/;;https://www.cs.huji.ac.il/w~nivc/;https://crwhite.ml/", "dblp": "322/5063.html;198/1371;;39/2056;89/5383;241/7231;259/2291;136/9162", "google_scholar": "VPXu100AAAAJ;https://scholar.google.de/citations?user=lpuMlzsAAAAJ;PySUqqUAAAAJ;eJAV17IAAAAJ;https://scholar.google.de/citations?user=YUrxwrkAAAAJ;pGDKzuUAAAAJ;https://scholar.google.co.il/citations?user=ZMdC3OQAAAAJ;LS6HY-gAAAAJ", "orcid": "0000-0002-7938-542X;0000-0002-5518-7445;;;0000-0002-2037-3694;;;", "linkedin": "benjaminfeuer/;;;;frank-hutter-9190b24b/;;niv-cohen-39b49521/;", "or_profile": "~Benjamin_Feuer1;~Robin_Tibor_Schirrmeister1;~Valeriia_Cherepanova1;~Chinmay_Hegde1;~Frank_Hutter1;~Micah_Goldblum1;~Niv_Cohen1;~Colin_White1", "aff": "Arthur AI;University of Freiburg, Albert-Ludwigs-Universit\u00e4t Freiburg;Amazon;New York University;Albert-Ludwigs-Universit\u00e4t Freiburg;New York University;Hebrew University of Jerusalem;Abacus.AI", "aff_domain": "arthur.ai;cs.uni-freiburg.de;amazon.com;nyu.edu;uni-freiburg.de;nyu.edu;huji.ac.il;abacus.ai", "position": "Intern;PhD student;Postdoc;Associate Professor;Full Professor;Postdoc;PhD student;Head of Research", "bibtex": "@inproceedings{\nfeuer2024tunetables,\ntitle={TuneTables: Context Optimization for Scalable Prior-Data Fitted Networks},\nauthor={Benjamin Feuer and Robin Tibor Schirrmeister and Valeriia Cherepanova and Chinmay Hegde and Frank Hutter and Micah Goldblum and Niv Cohen and Colin White},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=FOfU3qhcIG}\n}", "github": "", "reviewers": "YTTX;cmgc;7Ntu;FY4B", "pdf_size": 783806, "rating": "5;6;6;7", "confidence": "5;4;4;4", "soundness": "3;3;3;3", "novelty": "2;2;3;4", "presentation": "3;3;1;3", "wc_summary": "93;77;84;216", "wc_strengths": "36;50;20;148", "wc_weaknesses": "220;111;626;263", "wc_questions": "2;59;206;203", "wc_limitations": "6;1;1;9", "wc_review": "357;298;937;839", "wc_reply_reviewers": "17;10;39;118", "wc_reply_authors": "15;11;15;82", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 117.5, 57.151115474678186 ], "wc_strengths_avg": [ 63.5, 49.92744736114595 ], "wc_weaknesses_avg": [ 305.0, 193.4334510884816 ], "wc_questions_avg": [ 117.5, 89.30985387962518 ], "wc_limitations_avg": [ 4.25, 3.418698582794336 ], "wc_review_avg": [ 607.75, 283.153116705432 ], "wc_reply_reviewers_avg": [ 46.0, 42.924352062669506 ], "wc_reply_authors_avg": [ 30.75, 29.634228520412 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9889560494464791720&as_sdt=20000005&sciodt=0,21&hl=en", "gs_version_total": 4, "email": "arthur.ai;cs.uni-freiburg.de;amazon.com;nyu.edu;uni-freiburg.de;nyu.edu;huji.ac.il;abacus.ai", "author_num": 8, "aff_unique_index": "0;1;2;3;4;3;5;6", "aff_unique_norm": "Arthur AI;University of Freiburg;Amazon;New York University;Albert-Ludwigs-Universit\u00e4t Freiburg;Hebrew University of Jerusalem;Abacus.AI", "aff_unique_dep": ";;Amazon.com, Inc.;;;;", "aff_unique_url": "https://www.arthur.ai;https://www.uni-freiburg.de;https://www.amazon.com;https://www.nyu.edu;https://www.uni-freiburg.de;https://www.huji.ac.il;https://www.abacus.ai", "aff_unique_abbr": "Arthur AI;UoF;Amazon;NYU;Albert-Ludwigs-Universit\u00e4t;HUJI;Abacus.AI", "aff_campus_unique_index": "1;1;2", "aff_campus_unique": ";Freiburg;Jerusalem", "aff_country_unique_index": "0;1;0;0;1;0;2;0", "aff_country_unique": "United States;Germany;Israel" }, { "title": "SlowFocus: Enhancing Fine-grained Temporal Understanding in Video LLM", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95976", "id": "FOkKndty5B", "proceeding": "", "pdf": "https://openreview.net/pdf?id=FOkKndty5B", "openreview": "https://openreview.net/forum?id=FOkKndty5B", "poster": "/media/PosterPDFs/NeurIPS%202024/95976.png?t=1731258514.4533257", "project": "", "author_site": "Ming Nie, Dan Ding, Chunwei Wang, Yuanfan Guo, Jianhua Han, Hang Xu, Li Zhang", "tldr": "", "abstract": "Large language models (LLMs) have demonstrated exceptional capabilities in text understanding, which has paved the way for their expansion into video LLMs (Vid-LLMs) to analyze video data. However, current Vid-LLMs struggle to simultaneously retain high-quality frame-level semantic information (i.e., a sufficient number of tokens per frame) and comprehensive video-level temporal information (i.e., an adequate number of sampled frames per video). This limitation hinders the advancement of Vid-LLMs towards fine-grained video understanding. To address this issue, we introduce the SlowFocus mechanism, which significantly enhances the equivalent sampling frequency without compromising the quality of frame-level visual tokens. SlowFocus begins by identifying the query-related temporal segment based on the posed question, then performs dense sampling on this segment to extract local high-frequency features. A multi-frequency mixing attention module is further leveraged to aggregate these local high-frequency details with global low-frequency contexts for enhanced temporal comprehension. Additionally, to tailor Vid-LLMs to this innovative mechanism, we introduce a set of training strategies aimed at bolstering both temporal grounding and detailed temporal reasoning capabilities. Furthermore, we establish FineAction-CGR, a benchmark specifically devised to assess the ability of Vid-LLMs to process fine-grained temporal understanding tasks. Comprehensive experiments demonstrate the superiority of our mechanism across both existing public video understanding benchmarks and our proposed FineAction-CGR.", "keywords": "large language model;video understanding", "primary_area": "machine_vision", "supplementary_material": "", "author": "Ming Nie;Dan Ding;Chunwei Wang;Yuanfan Guo;Jianhua Han;Hang Xu;Li Zhang", "authorids": "~Ming_Nie1;~Dan_Ding2;~Chunwei_Wang1;~Yuanfan_Guo1;~Jianhua_Han1;~Hang_Xu1;~Li_Zhang5", "gender": ";;F;M;M;M;M", "homepage": ";;https://github.com/chunweiwang0224;;;;http://www.robots.ox.ac.uk/~lz/", "dblp": ";;;;29/6207;;89/5992-40", "google_scholar": ";;;https://scholar.google.com.hk/citations?user=FqNrU2QAAAAJ;OEPMQEMAAAAJ;https://scholar.google.com.hk/citations?user=J_8TX6sAAAAJ;-wOTCE8AAAAJ", "orcid": ";;;;;0000-0003-3645-8972;", "linkedin": ";;;;;;", "or_profile": "~Ming_Nie1;~Dan_Ding2;~Chunwei_Wang1;~Yuanfan_Guo1;~Jianhua_Han1;~Hang_Xu1;~Li_Zhang5", "aff": ";;Huawei Technologies Ltd.;Huawei Technologies Ltd.;Huawei Technologies Ltd.;Huawei Noah\u2018s Ark Lab;Fudan University", "aff_domain": ";;huawei.com;huawei.com;huawei.com;huawei.com;fudan.edu.cn", "position": ";;Researcher;Researcher;Researcher;Researcher;Associate Professor", "bibtex": "@inproceedings{\nnie2024slowfocus,\ntitle={SlowFocus: Enhancing Fine-grained Temporal Understanding in Video {LLM}},\nauthor={Ming Nie and Dan Ding and Chunwei Wang and Yuanfan Guo and Jianhua Han and Hang Xu and Li Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=FOkKndty5B}\n}", "github": "", "reviewers": "bGgU;6ATY;uGD4;et83", "pdf_size": 1193175, "rating": "4;6;6;7", "confidence": "5;5;4;5", "soundness": "3;3;2;4", "novelty": "3;3;2;4", "presentation": "3;3;2;4", "wc_summary": "43;91;69;84", "wc_strengths": "72;85;24;111", "wc_weaknesses": "220;213;180;94", "wc_questions": "3;9;1;53", "wc_limitations": "1;9;1;25", "wc_review": "339;407;275;367", "wc_reply_reviewers": "52;146;712;50", "wc_reply_authors": "302;361;1203;20", "reply_reviewers": "1;2;2;1", "reply_authors": "3;3;4;2", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 71.75, 18.403464347779742 ], "wc_strengths_avg": [ 73.0, 31.583223394707513 ], "wc_weaknesses_avg": [ 176.75, 50.10676102084428 ], "wc_questions_avg": [ 16.5, 21.277922830953212 ], "wc_limitations_avg": [ 9.0, 9.797958971132712 ], "wc_review_avg": [ 347.0, 48.08326112068523 ], "wc_reply_reviewers_avg": [ 240.0, 275.25624425251465 ], "wc_reply_authors_avg": [ 471.5, 441.55548915170334 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 3.0, 0.7071067811865476 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.13245323570650439, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16701072669914912055&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": ";;huawei.com;huawei.com;huawei.com;huawei.com;fudan.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Huawei;Fudan University", "aff_unique_dep": "Huawei Technologies;", "aff_unique_url": "https://www.huawei.com;https://www.fudan.edu.cn", "aff_unique_abbr": "Huawei;Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "AutoTimes: Autoregressive Time Series Forecasters via Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95975", "id": "FOvZztnp1H", "proceeding": "", "pdf": "https://openreview.net/pdf?id=FOvZztnp1H", "openreview": "https://openreview.net/forum?id=FOvZztnp1H", "poster": "/media/PosterPDFs/NeurIPS%202024/95975.png?t=1730376787.8811932", "project": "", "author_site": "Yong Liu, Guo Qin, Xiangdong Huang, Jianmin Wang, Mingsheng Long", "tldr": "", "abstract": "Foundation models of time series have not been fully developed due to the limited availability of time series corpora and the underexploration of scalable pre-training. Based on the similar sequential formulation of time series and natural language, increasing research demonstrates the feasibility of leveraging large language models (LLM) for time series. Nevertheless, the inherent autoregressive property and decoder-only architecture of LLMs have not been fully considered, resulting in insufficient utilization of LLM abilities. To fully revitalize the general-purpose token transition and multi-step generation capability of large language models, we propose AutoTimes to repurpose LLMs as autoregressive time series forecasters, which projects time series into the embedding space of language tokens and autoregressively generates future predictions with arbitrary lengths. Compatible with any decoder-only LLMs, the consequent forecaster exhibits the flexibility of the lookback length and scalability with larger LLMs. Further, we formulate time series as prompts, extending the context for prediction beyond the lookback window, termed in-context forecasting. By introducing LLM-embedded textual timestamps, AutoTimes can utilize chronological information to align multivariate time series. Empirically, AutoTimes achieves state-of-the-art with 0.1% trainable parameters and over $5\\times$ training/inference speedup compared to advanced LLM-based forecasters. Code is available at this repository: https://github.com/thuml/AutoTimes.", "keywords": "Time Series Forecasting;Large Language Models", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/979c96b5e73afc01e1a8f6b5b708b8f348e2c639.zip", "author": "Yong Liu;Guo Qin;Xiangdong Huang;Jianmin Wang;Mingsheng Long", "authorids": "~Yong_Liu15;~Guo_Qin1;~Xiangdong_Huang1;~Jianmin_Wang1;~Mingsheng_Long5", "gender": ";M;M;M;", "homepage": ";;;https://www.thss.tsinghua.edu.cn/en/faculty/jianminwang.htm;", "dblp": ";;;06/3456-1.html;", "google_scholar": ";;2u7MRD8AAAAJ;https://scholar.google.com.tw/citations?user=MiovcboAAAAJ;", "orcid": ";0009-0003-7468-0475;;0000-0001-6841-7943;", "linkedin": ";;;;", "or_profile": "~Yong_Liu15;~Guo_Qin1;~Xiangdong_Huang1;~Jianmin_Wang1;~Mingsheng_Long5", "aff": ";Tsinghua University;Tsinghua University;Tsinghua University;", "aff_domain": ";tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;", "position": ";Undergrad student;Associate Professor;Full Professor;", "bibtex": "@inproceedings{\nliu2024autotimes,\ntitle={AutoTimes: Autoregressive Time Series Forecasters via Large Language Models},\nauthor={Yong Liu and Guo Qin and Xiangdong Huang and Jianmin Wang and Mingsheng Long},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=FOvZztnp1H}\n}", "github": "", "reviewers": "acJd;k2uw;EAH7;1PWH", "pdf_size": 2423345, "rating": "4;7;7;7", "confidence": "4;4;4;4", "soundness": "2;3;3;3", "novelty": "3;3;3;3", "presentation": "2;3;3;3", "wc_summary": "87;52;130;88", "wc_strengths": "124;38;127;43", "wc_weaknesses": "265;70;116;147", "wc_questions": "62;4;384;89", "wc_limitations": "12;1;187;5", "wc_review": "550;165;944;372", "wc_reply_reviewers": "163;22;264;72", "wc_reply_authors": "1248;270;1114;97", "reply_reviewers": "2;1;2;1", "reply_authors": "4;4;4;2", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 89.25, 27.63489641739227 ], "wc_strengths_avg": [ 83.0, 42.54997062278657 ], "wc_weaknesses_avg": [ 149.5, 72.091955168382 ], "wc_questions_avg": [ 134.75, 147.14512394231758 ], "wc_limitations_avg": [ 51.25, 78.47411993772214 ], "wc_review_avg": [ 507.75, 286.35849472296087 ], "wc_reply_reviewers_avg": [ 130.25, 92.29402743406531 ], "wc_reply_authors_avg": [ 682.25, 504.71495668347296 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 3.5, 0.8660254037844386 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 58, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13067500517692328763&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": ";tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;", "author_num": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Reasoning Multi-Agent Behavioral Topology for Interactive Autonomous Driving", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95974", "id": "FSgwgQXTxo", "proceeding": "", "pdf": "https://openreview.net/pdf?id=FSgwgQXTxo", "openreview": "https://openreview.net/forum?id=FSgwgQXTxo", "poster": "/media/PosterPDFs/NeurIPS%202024/95974.png?t=1731577555.2413552", "project": "", "author_site": "Haochen Liu, Li Chen, Yu Qiao, Chen Lv, Hongyang Li", "tldr": "", "abstract": "Autonomous driving system aims for safe and social-consistent driving through the behavioral integration among interactive agents. However, challenges remain due to multi-agent scene uncertainty and heterogeneous interaction. Current dense and sparse behavioral representations struggle with inefficiency and inconsistency in multi-agent modeling, leading to instability of collective behavioral patterns when integrating prediction and planning (IPP). To address this, we initiate a topological formation that serves as a compliant behavioral foreground to guide downstream trajectory generations. Specifically, we introduce Behavioral Topology (BeTop), a pivotal topological formulation that explicitly represents the consensual behavioral pattern among multi-agent future. BeTop is derived from braid theory to distill compliant interactive topology from multi-agent future trajectories. A synergistic learning framework (BeTopNet) supervised by BeTop facilitates the consistency of behavior prediction and planning within the predicted topology priors. Through imitative contingency learning, BeTop also effectively manages behavioral uncertainty for prediction and planning. Extensive verification on large-scale real-world datasets, including nuPlan and WOMD, demonstrates that BeTop achieves state-of-the-art performance in both prediction and planning tasks. Further validations on the proposed interactive scenario benchmark showcase planning compliance in interactive cases. Code and model is available at https://github.com/OpenDriveLab/BeTop.", "keywords": "Trajectory Prediction;Autonomous Driving", "primary_area": "other", "supplementary_material": "", "author": "Haochen Liu;Li Chen;Yu Qiao;Chen Lv;Hongyang Li", "authorids": "~Haochen_Liu2;~Li_Chen15;~Yu_Qiao1;~Chen_Lv1;~Hongyang_Li1", "gender": "M;M;;M;M", "homepage": ";https://ilnehc.github.io/;;https://lvchen.wixsite.com/automan;https://datascience.hku.hk/people/hongyang-li/", "dblp": "200/1423;181/2847;;;95/8433-1", "google_scholar": "iizqKUsAAAAJ;ulZxvY0AAAAJ;;UKVs2CEAAAAJ;https://scholar.google.com.hk/citations?user=Hfrih1EAAAAJ", "orcid": "0000-0002-3628-8777;;;0000-0001-6897-4512;0000-0001-9110-5534", "linkedin": ";;;chen-lv-7964b590/;hongyangli2020/", "or_profile": "~Haochen_Liu2;~Li_Chen15;~Yu_Qiao1;~Chen_Lv1;~Hongyang_Li1", "aff": "Nanyang Technological University;Shanghai AI Laboratory;;Nanyang Technological University;Shanghai AI Lab", "aff_domain": "ntu.edu.sg;pjlab.org.cn;;ntu.edu.sg;pjlab.org.cn", "position": "PhD student;Researcher;;Assistant Professor;Researcher", "bibtex": "@inproceedings{\nliu2024reasoning,\ntitle={Reasoning Multi-Agent Behavioral Topology for Interactive Autonomous Driving},\nauthor={Haochen Liu and Li Chen and Yu Qiao and Chen Lv and Hongyang Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=FSgwgQXTxo}\n}", "github": "", "reviewers": "PWLC;M7XC;aQzs;PsGi", "pdf_size": 6841853, "rating": "5;6;7;7", "confidence": "2;3;3;4", "soundness": "3;2;3;4", "novelty": "3;3;3;4", "presentation": "3;2;3;3", "wc_summary": "87;66;98;117", "wc_strengths": "125;49;87;117", "wc_weaknesses": "91;185;51;77", "wc_questions": "76;2;51;58", "wc_limitations": "1;1;23;6", "wc_review": "380;303;310;375", "wc_reply_reviewers": "27;22;16;13", "wc_reply_authors": "47;18;20;16", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 92.0, 18.452642087245934 ], "wc_strengths_avg": [ 94.5, 29.845435161846776 ], "wc_weaknesses_avg": [ 101.0, 50.57667446560717 ], "wc_questions_avg": [ 46.75, 27.39867697535777 ], "wc_limitations_avg": [ 7.75, 9.03811374126261 ], "wc_review_avg": [ 342.0, 35.63004350263974 ], "wc_reply_reviewers_avg": [ 19.5, 5.408326913195984 ], "wc_reply_authors_avg": [ 25.25, 12.636751956100111 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.8528028654224417, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11365405236518866592&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "ntu.edu.sg;pjlab.org.cn;;ntu.edu.sg;pjlab.org.cn", "author_num": 5, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Nanyang Technological University;Shanghai AI Laboratory;Shanghai AI Lab", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ntu.edu.sg;https://www.shanghai-ai-lab.com;https://www.shanghaiailab.com", "aff_unique_abbr": "NTU;SAIL;SAIL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "Singapore;China" }, { "title": "Generalized Linear Bandits with Limited Adaptivity", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95973", "id": "FTPDBQuT4G", "proceeding": "", "pdf": "https://openreview.net/pdf?id=FTPDBQuT4G", "openreview": "https://openreview.net/forum?id=FTPDBQuT4G", "poster": "", "project": "", "author_site": "Ayush Sawarni, Nirjhar Das, Siddharth Barman, Gaurav Sinha", "tldr": "", "abstract": "We study the generalized linear contextual bandit problem within the constraints of limited adaptivity. In this paper, we present two algorithms, B-GLinCB and RS-GLinCB, that address, respectively, two prevalent limited adaptivity settings. Given a budget $M$ on the number of policy updates, in the first setting, the algorithm needs to decide upfront $M$ rounds at which it will update its policy, while in the second setting it can adaptively perform $M$ policy updates during its course. For the first setting, we design an algorithm B-GLinCB, that incurs $\\tilde{O}(\\sqrt{T})$ regret when $M = \\Omega( \\log{\\log T} )$ and the arm feature vectors are generated stochastically. For the second setting, we design an algorithm RS-GLinCB that updates its policy $\\tilde{O}(\\log^2 T)$ times and achieves a regret of $\\tilde{O}(\\sqrt{T})$ even when the arm feature vectors are adversarially generated. Notably, in these bounds, we manage to eliminate the dependence on a key instance dependent parameter $\\kappa$, that captures non-linearity of the underlying reward model. Our novel approach for removing this dependence for generalized linear contextual bandits might be of independent interest.", "keywords": "Contextual Bandits;Generalized Linear Contextual Bandits;Logistic Bandits;Optimal Design;Limited Adaptivity;Batched Bandits", "primary_area": "bandits", "supplementary_material": "", "author": "Ayush Sawarni;Nirjhar Das;Siddharth Barman;Gaurav Sinha", "authorids": "~Ayush_Sawarni1;~Nirjhar_Das1;~Siddharth_Barman1;~Gaurav_Sinha2", "gender": "M;M;M;M", "homepage": "https://sawarniayush.github.io/;https://nirjhar-das.github.io;http://www.csa.iisc.ac.in/~barman/;https://sinhagaurav.github.io/", "dblp": ";323/5270;63/478.html;29/2979-1", "google_scholar": "U8TSPdAAAAAJ;Zf3YrgYAAAAJ;https://scholar.google.co.in/citations?user=HcGQSKIAAAAJ;3Tt6250AAAAJ", "orcid": ";0000-0001-7753-4391;;0000-0002-3590-9543", "linkedin": "ayush-sawarni;nirjhar-das-874596195/;;gaurav-sinha-370376128/", "or_profile": "~Ayush_Sawarni1;~Nirjhar_Das1;~Siddharth_Barman1;~Gaurav_Sinha2", "aff": ";Microsoft Research;Indian Institute of Science;Microsoft Research", "aff_domain": ";research.microsoft.com;iisc.ac.in;research.microsoft.com", "position": ";Intern;Associate Professor;Principal Researcher", "bibtex": "@inproceedings{\nsawarni2024generalized,\ntitle={Generalized Linear Bandits with Limited Adaptivity},\nauthor={Ayush Sawarni and Nirjhar Das and Siddharth Barman and Gaurav Sinha},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=FTPDBQuT4G}\n}", "github": "", "reviewers": "5bc4;wYhM;bRFJ", "pdf_size": 1741016, "rating": "5;7;8", "confidence": "2;3;4", "soundness": "3;3;4", "novelty": "3;3;4", "presentation": "3;2;4", "wc_summary": "167;130;76", "wc_strengths": "99;25;43", "wc_weaknesses": "106;147;283", "wc_questions": "180;60;225", "wc_limitations": "28;1;1", "wc_review": "580;363;628", "wc_reply_reviewers": "11;85;119", "wc_reply_authors": "22;8;193", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 124.33333333333333, 37.366057086910075 ], "wc_strengths_avg": [ 55.666666666666664, 31.510139461590597 ], "wc_weaknesses_avg": [ 178.66666666666666, 75.64977785088922 ], "wc_questions_avg": [ 155.0, 69.6419413859206 ], "wc_limitations_avg": [ 10.0, 12.727922061357855 ], "wc_review_avg": [ 523.6666666666666, 115.2861175028845 ], "wc_reply_reviewers_avg": [ 71.66666666666667, 45.087569117095775 ], "wc_reply_authors_avg": [ 74.33333333333333, 84.10443243703364 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.9819805060619659, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:4Lq5ja710cgJ:scholar.google.com/&scioq=Generalized+Linear+Bandits+with+Limited+Adaptivity&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": ";research.microsoft.com;iisc.ac.in;research.microsoft.com", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Microsoft;Indian Institute of Science", "aff_unique_dep": "Microsoft Research;", "aff_unique_url": "https://www.microsoft.com/en-us/research;https://www.iisc.ac.in", "aff_unique_abbr": "MSR;IISc", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;India" }, { "title": "Vision Foundation Model Enables Generalizable Object Pose Estimation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95972", "id": "FTpKGuxEfy", "proceeding": "", "pdf": "https://openreview.net/pdf?id=FTpKGuxEfy", "openreview": "https://openreview.net/forum?id=FTpKGuxEfy", "poster": "/media/PosterPDFs/NeurIPS%202024/95972.png?t=1733235235.1556988", "project": "", "author_site": "Kai Chen, Yiyao Ma, Xingyu Lin, Stephen James, Jianshu Zhou, Yun-Hui Liu, Pieter Abbeel, DOU QI", "tldr": "", "abstract": "Object pose estimation plays a crucial role in robotic manipulation, however, its practical applicability still suffers from limited generalizability. This paper addresses the challenge of generalizable object pose estimation, particularly focusing on category-level object pose estimation for unseen object categories. Current methods either require impractical instance-level training or are confined to predefined categories, limiting their applicability. We propose VFM-6D, a novel framework that explores harnessing existing vision and language models, to elaborate object pose estimation into two stages: category-level object viewpoint estimation and object coordinate map estimation. Based on the two-stage framework, we introduce a 2D-to-3D feature lifting module and a shape-matching module, both of which leverage pre-trained vision foundation models to improve object representation and matching accuracy. VFM-6D is trained on cost-effective synthetic data and exhibits superior generalization capabilities. It can be applied to both instance-level unseen object pose estimation and category-level object pose estimation for novel categories. Evaluations on benchmark datasets demonstrate the effectiveness and versatility of VFM-6D in various real-world scenarios.", "keywords": "Object Pose Estimation;Vision Foundation Model", "primary_area": "machine_vision", "supplementary_material": "", "author": "Kai Chen;Yiyao Ma;Xingyu Lin;Stephen James;Jianshu Zhou;Yun-Hui Liu;Pieter Abbeel;Qi Dou", "authorids": "~Kai_Chen9;~Yiyao_Ma1;~Xingyu_Lin1;~Stephen_James1;~Jianshu_Zhou1;~Yun-Hui_Liu1;~Pieter_Abbeel2;~Qi_Dou2", "gender": "M;;M;M;M;;M;F", "homepage": ";https://github.com/Yiyao-Ma;https://xingyu-lin.github.io;https://stepjam.github.io/;https://msc.berkeley.edu/people/staff.html;http://www.mae.cuhk.edu.hk/~yhliu;https://people.eecs.berkeley.edu/~pabbeel/;https://www.cse.cuhk.edu.hk/~qdou", "dblp": "181/2839-28;385/7355;;163/5669;203/5150;;;165/7846", "google_scholar": "https://scholar.google.com.hk/citations?user=Hx3iRaMAAAAJ;;;OXtG-isAAAAJ;HVofSgQAAAAJ;;https://scholar.google.com.tw/citations?user=vtwH6GkAAAAJ;https://scholar.google.com.hk/citations?user=iHh7IJQAAAAJ", "orcid": ";;;;0000-0003-3900-3519;;;0000-0002-3416-9950", "linkedin": "kai-chen-2b537aaa/;;;;;;;", "or_profile": "~Kai_Chen9;~Yiyao_Ma1;~Xingyu_Lin1;~Stephen_James1;~Jianshu_Zhou1;~Yun-Hui_Liu1;~Pieter_Abbeel2;~Qi_Dou2", "aff": "The Chinese University of Hong Kong;The Chinese University of Hong Kong;University of California, Berkeley;Dyson;Chinese University of Hong Kong;The Chinese University of Hong Kong;Covariant;The Chinese University of Hong Kong", "aff_domain": "cuhk.edu.hk;cse.cuhk.edu.hk;berkeley.edu;dyson.com;cuhk.hk;cuhk.edu.hk;covariant.ai;cuhk.edu.hk", "position": "PhD student;PhD student;Postdoc;Principal Researcher;Lecturer;Full Professor;Founder;Assistant Professor", "bibtex": "@inproceedings{\nchen2024vision,\ntitle={Vision Foundation Model Enables Generalizable Object Pose Estimation},\nauthor={Kai Chen and Yiyao Ma and Xingyu Lin and Stephen James and Jianshu Zhou and Yun-Hui Liu and Pieter Abbeel and Qi Dou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=FTpKGuxEfy}\n}", "github": "", "reviewers": "gYMD;P7Hz;heqf;yRF6", "pdf_size": 34999653, "rating": "4;6;7;7", "confidence": "4;2;5;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "101;218;79;66", "wc_strengths": "51;94;136;64", "wc_weaknesses": "257;105;264;179", "wc_questions": "140;51;7;70", "wc_limitations": "15;11;9;19", "wc_review": "564;479;495;398", "wc_reply_reviewers": "32;44;165;29", "wc_reply_authors": "47;49;805;34", "reply_reviewers": "1;1;2;1", "reply_authors": "2;2;3;2", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 116.0, 60.203820476777054 ], "wc_strengths_avg": [ 86.25, 32.683137854251385 ], "wc_weaknesses_avg": [ 201.25, 64.8165680979794 ], "wc_questions_avg": [ 67.0, 47.94267410147248 ], "wc_limitations_avg": [ 13.5, 3.840572873934304 ], "wc_review_avg": [ 484.0, 59.03812327640505 ], "wc_reply_reviewers_avg": [ 67.5, 56.570752160458326 ], "wc_reply_authors_avg": [ 233.75, 329.8616187130597 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.18731716231633877, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:JfmrtioK8e4J:scholar.google.com/&scioq=Vision+Foundation+Model+Enables+Generalizable+Object+Pose+Estimation&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "cuhk.edu.hk;cse.cuhk.edu.hk;berkeley.edu;dyson.com;cuhk.hk;cuhk.edu.hk;covariant.ai;cuhk.edu.hk", "author_num": 8, "aff_unique_index": "0;0;1;2;0;0;3;0", "aff_unique_norm": "Chinese University of Hong Kong;University of California, Berkeley;Dyson;Covariant", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.berkeley.edu;https://www.dyson.com;", "aff_unique_abbr": "CUHK;UC Berkeley;;", "aff_campus_unique_index": "0;0;1;0;0;0", "aff_campus_unique": "Hong Kong SAR;Berkeley;", "aff_country_unique_index": "0;0;1;2;0;0;0", "aff_country_unique": "China;United States;United Kingdom;" }, { "title": "On Affine Homotopy between Language Encoders", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95971", "id": "FTpOwIaWUz", "proceeding": "", "pdf": "https://openreview.net/pdf?id=FTpOwIaWUz", "openreview": "https://openreview.net/forum?id=FTpOwIaWUz", "poster": "", "project": "", "author_site": "Robin Chan, Reda Boumasmoud, Anej Svete, Yuxin Ren, Qipeng Guo, Zhijing Jin, Shauli Ravfogel, Mrinmaya Sachan, Bernhard Sch\u00f6lkopf, Mennatallah El-Assady, Ryan Cotterell", "tldr": "", "abstract": "Pre-trained language encoders---functions that represent text as vectors---are an integral component of many NLP tasks.\n We tackle a natural question in language encoder analysis: What does it mean for two encoders to be similar?\n We contend that a faithful measure of similarity needs to be \\emph{intrinsic}, that is, task-independent, yet still be informative of \\emph{extrinsic} similarity---the performance on downstream tasks.\n It is common to consider two encoders similar if they are \\emph{homotopic}, i.e., if they can be aligned through some transformation.\n In this spirit, we study the properties of \\emph{affine} alignment of language encoders and its implications on extrinsic similarity.\n We find that while affine alignment is fundamentally an asymmetric notion of similarity, it is still informative of extrinsic similarity.\n We confirm this on datasets of natural language representations.\n Beyond providing useful bounds on extrinsic similarity, affine intrinsic similarity also allows us to begin uncovering the structure of the space of pre-trained encoders by defining an order over them.", "keywords": "language encoders;neural network similarity;metric spaces;homotopy", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/c8da933c5235ffbfd7ab276a364a87b415f14db4.zip", "author": "Robin Chan;Reda Boumasmoud;Anej Svete;Yuxin Ren;Qipeng Guo;Zhijing Jin;Shauli Ravfogel;Mrinmaya Sachan;Bernhard Sch\u00f6lkopf;Mennatallah El-Assady;Ryan Cotterell", "authorids": "~Robin_Chan2;~Reda_Boumasmoud1;~Anej_Svete1;~Yuxin_Ren1;~Qipeng_Guo1;~Zhijing_Jin1;~Shauli_Ravfogel1;~Mrinmaya_Sachan3;~Bernhard_Sch\u00f6lkopf1;~Mennatallah_El-Assady1;~Ryan_Cotterell1", "gender": ";;M;M;M;;M;;;;", "homepage": ";;https://anejsvete.github.io/;https://github.com/twinkle0331;;;https://github.com/Shaul1321;;;;", "dblp": ";;259/1164;;172/1046;;227/2231;;;183/8957;", "google_scholar": ";;https://scholar.google.com/citations?hl=en;;k3mPGKgAAAAJ;;;;;;", "orcid": ";;;;;;;;;0000-0001-8526-2613;", "linkedin": ";;anej-svete-95a68616a;;;;;;;;", "or_profile": "~Robin_Chan2;~Reda_Boumasmoud1;~Anej_Svete1;~Yuxin_Ren1;~Qipeng_Guo1;~Zhijing_Jin1;~Shauli_Ravfogel1;~Mrinmaya_Sachan3;~Bernhard_Sch\u00f6lkopf1;~Mennatallah_El-Assady1;~Ryan_Cotterell1", "aff": ";;Department of Computer Science, ETHZ - ETH Zurich;Tsinghua University;Shanghai AI Laboratory;;Bar-Ilan University;;;Department of Computer Science, ETHZ - ETH Zurich;", "aff_domain": ";;inf.ethz.ch;tsinghua.edu.cn;pjlab.org.cn;;biu.ac.il;;;inf.ethz.ch;", "position": ";;PhD student;MS student;Researcher;;PhD student;;;Assistant Professor;", "bibtex": "@inproceedings{\nchan2024on,\ntitle={On Affine Homotopy between Language Encoders},\nauthor={Robin Chan and Reda Boumasmoud and Anej Svete and Yuxin Ren and Qipeng Guo and Zhijing Jin and Shauli Ravfogel and Mrinmaya Sachan and Bernhard Sch{\\\"o}lkopf and Mennatallah El-Assady and Ryan Cotterell},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=FTpOwIaWUz}\n}", "github": "", "reviewers": "sF8t;6x9V;Jeko;nK2G", "pdf_size": 914242, "rating": "3;6;6;7", "confidence": "3;3;1;4", "soundness": "2;3;3;4", "novelty": "1;3;2;4", "presentation": "2;4;2;3", "wc_summary": "407;116;197;106", "wc_strengths": "21;98;67;61", "wc_weaknesses": "257;308;132;61", "wc_questions": "545;101;141;44", "wc_limitations": "30;30;5;3", "wc_review": "1260;653;542;275", "wc_reply_reviewers": "0;30;71;110", "wc_reply_authors": "0;0;0;260", "reply_reviewers": "0;1;1;2", "reply_authors": "1;1;1;2", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 2.75, 1.0897247358851685 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 1.118033988749895 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 206.5, 121.01756070917972 ], "wc_strengths_avg": [ 61.75, 27.39867697535777 ], "wc_weaknesses_avg": [ 189.5, 98.00127550190355 ], "wc_questions_avg": [ 207.75, 197.73893774368264 ], "wc_limitations_avg": [ 17.0, 13.019216566291536 ], "wc_review_avg": [ 682.5, 360.61509951747723 ], "wc_reply_reviewers_avg": [ 52.75, 41.565460420883106 ], "wc_reply_authors_avg": [ 65.0, 112.58330249197702 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.07647191129018727, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:K5_Ot7Q0UtIJ:scholar.google.com/&scioq=On+Affine+Homotopy+between+Language+Encoders&hl=en&as_sdt=0,5", "gs_version_total": 5, "email": ";;inf.ethz.ch;tsinghua.edu.cn;pjlab.org.cn;;biu.ac.il;;;inf.ethz.ch;", "author_num": 11, "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "ETH Zurich;Tsinghua University;Shanghai AI Laboratory;Bar-Ilan University", "aff_unique_dep": "Department of Computer Science;;;", "aff_unique_url": "https://www.ethz.ch;https://www.tsinghua.edu.cn;https://www.shanghai-ai-lab.com;https://www.biu.ac.il", "aff_unique_abbr": "ETHZ;THU;SAIL;BIU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Zurich;", "aff_country_unique_index": "0;1;1;2;0", "aff_country_unique": "Switzerland;China;Israel" }, { "title": "Conditioning non-linear and infinite-dimensional diffusion processes", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95970", "id": "FV4an2OuFM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=FV4an2OuFM", "openreview": "https://openreview.net/forum?id=FV4an2OuFM", "poster": "/media/PosterPDFs/NeurIPS%202024/95970.png?t=1733415313.7443202", "project": "", "author_site": "Elizabeth L. Baker, Gefan Yang, Michael Severinsen, Christy Hipsley, Stefan Sommer", "tldr": "", "abstract": "Generative diffusion models and many stochastic models in science and engineering naturally live in infinite dimensions before discretisation. To incorporate observed data for statistical and learning tasks, one needs to condition on observations. While recent work has treated conditioning linear processes in infinite dimensions, conditioning non-linear processes in infinite dimensions has not been explored. This paper conditions function valued stochastic processes without prior discretisation. To do so, we use an infinite-dimensional version of Girsanov's theorem to condition a function-valued stochastic process, leading to a stochastic differential equation (SDE) for the conditioned process involving the score. We apply this technique to do time series analysis for shapes of organisms in evolutionary biology, where we discretise via the Fourier basis and then learn the coefficients of the score function with score matching methods.", "keywords": "Doob's h-transform;bridges;infinite dimensions;conditioning;non-linear stochastic differential equations", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/4467c0c94f6756ba14c9ac137f26c90a1a061e51.zip", "author": "Elizabeth Louise Baker;Gefan Yang;Michael Lind Severinsen;Christy Anna Hipsley;Stefan Sommer", "authorids": "~Elizabeth_Louise_Baker1;~Gefan_Yang1;~Michael_Lind_Severinsen1;~Christy_Anna_Hipsley1;~Stefan_Sommer1", "gender": ";M;M;F;", "homepage": ";;https://globe.ku.dk/staff-list?pure=en/persons/583597;https://www.evomorpholab.com/;", "dblp": ";;;;", "google_scholar": ";tIdJ-YgAAAAJ;;https://scholar.google.com.au/citations?user=Eyi8JRoAAAAJ;", "orcid": ";;0009-0009-3998-7117;;", "linkedin": ";gefanyang/;michael-severinsen/;;", "or_profile": "~Elizabeth_Louise_Baker1;~Gefan_Yang1;~Michael_Lind_Severinsen1;~Christy_Anna_Hipsley1;~Stefan_Sommer1", "aff": ";University of Copenhagen;University of Copenhagen;University of Copenhagen;", "aff_domain": ";diku.dk;ucph.dk;ku.edu;", "position": ";PhD student;PhD student;Associate Professor;", "bibtex": "@inproceedings{\nbaker2024conditioning,\ntitle={Conditioning non-linear and infinite-dimensional diffusion processes},\nauthor={Elizabeth Louise Baker and Gefan Yang and Michael Lind Severinsen and Christy Anna Hipsley and Stefan Sommer},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=FV4an2OuFM}\n}", "github": "", "reviewers": "veYd;bXXx;haqz", "pdf_size": 1409332, "rating": "7;7;7", "confidence": "4;3;1", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "2;4;3", "wc_summary": "43;162;83", "wc_strengths": "75;271;82", "wc_weaknesses": "247;205;54", "wc_questions": "407;435;8", "wc_limitations": "31;58;1", "wc_review": "803;1131;228", "wc_reply_reviewers": "33;23;11", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 2.6666666666666665, 1.247219128924647 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 96.0, 49.44357052910587 ], "wc_strengths_avg": [ 142.66666666666666, 90.79035680560402 ], "wc_weaknesses_avg": [ 168.66666666666666, 82.8747381427068 ], "wc_questions_avg": [ 283.3333333333333, 195.02535447702405 ], "wc_limitations_avg": [ 30.0, 23.280893453645632 ], "wc_review_avg": [ 720.6666666666666, 373.2169461437796 ], "wc_reply_reviewers_avg": [ 22.333333333333332, 8.993825042154695 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7219154304680461398&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": ";diku.dk;ucph.dk;ku.edu;", "author_num": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Copenhagen", "aff_unique_dep": "", "aff_unique_url": "https://www.ku.dk", "aff_unique_abbr": "UCPH", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Denmark" }, { "title": "Policy Improvement using Language Feedback Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95969", "id": "FVgCwcwpJw", "proceeding": "", "pdf": "https://openreview.net/pdf?id=FVgCwcwpJw", "openreview": "https://openreview.net/forum?id=FVgCwcwpJw", "poster": "", "project": "", "author_site": "Victor Zhong, Dipendra Misra, Xingdi Yuan, Marc-Alexandre C\u00f4t\u00e9", "tldr": "", "abstract": "We introduce Language Feedback Models (LFMs) that identify desirable behaviour --- actions that help achieve tasks specified in the instruction - for imitation learning in instruction following. To train LFMs, we obtain feedback from Large Language Models (LLMs) on visual trajectories verbalized to language descriptions. First, by using LFMs to identify desirable behaviour to imitate, we improve in task-completion rate over strong behavioural cloning baselines on three distinct language grounding environments (Touchdown, ScienceWorld, and ALFWorld). Second, LFMs outperform using LLMs as experts to directly predict actions, when controlling for the number of LLM output tokens. Third, LFMs generalize to unseen environments, improving task-completion rate by 3.5-12.0% through one round of adaptation. Finally, LFMs can be modified to provide human-interpretable feedback without performance loss, allowing human verification of desirable behaviour for imitation learning.", "keywords": "instruction following;language feedback;language grounding;learning feedback model;imitation learning", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Victor Zhong;Dipendra Misra;Xingdi Yuan;Marc-Alexandre C\u00f4t\u00e9", "authorids": "~Victor_Zhong1;~Dipendra_Misra1;~Xingdi_Yuan2;~Marc-Alexandre_C\u00f4t\u00e92", "gender": "M;M;M;M", "homepage": "http://www.victorzhong.com;https://dipendramisra.com/;https://www.microsoft.com/en-us/research/people/macote;https://xingdi-eric-yuan.github.io/", "dblp": "182/8931;218/6569;118/9636;40/10147", "google_scholar": "lT3YoNkAAAAJ;rIoPIFsAAAAJ;https://scholar.google.ca/citations?user=L83CE5gAAAAJ;hYfE-B8AAAAJ", "orcid": ";;;", "linkedin": "victorzhong;;;", "or_profile": "~Victor_Zhong1;~Dipendra_Misra1;~Marc-Alexandre_Cote1;~Eric_Yuan1", "aff": "Microsoft;Microsoft Research;Microsoft;Microsoft Research", "aff_domain": "microsoft.com;microsoft.com;microsoft.com;microsoft.com", "position": "Postdoc;Researcher;Principal Researcher;Senior Researcher", "bibtex": "@inproceedings{\nzhong2024policy,\ntitle={Policy Improvement using Language Feedback Models},\nauthor={Victor Zhong and Dipendra Misra and Xingdi Yuan and Marc-Alexandre C{\\^o}t{\\'e}},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=FVgCwcwpJw}\n}", "github": "", "reviewers": "hZ53;8DkG;u2Y5;B45L", "pdf_size": 9027126, "rating": "4;5;6;7", "confidence": "4;4;4;3", "soundness": "2;3;3;3", "novelty": "2;2;3;2", "presentation": "2;3;2;3", "wc_summary": "167;50;98;252", "wc_strengths": "38;59;39;95", "wc_weaknesses": "124;137;149;233", "wc_questions": "55;226;281;301", "wc_limitations": "12;4;8;92", "wc_review": "396;476;575;973", "wc_reply_reviewers": "26;0;16;17", "wc_reply_authors": "142;117;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "3;3;1;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 141.75, 76.03412063014868 ], "wc_strengths_avg": [ 57.75, 23.080023830143677 ], "wc_weaknesses_avg": [ 160.75, 42.6402098962939 ], "wc_questions_avg": [ 215.75, 96.78681470117715 ], "wc_limitations_avg": [ 29.0, 36.4828726939094 ], "wc_review_avg": [ 605.0, 221.7239274413116 ], "wc_reply_reviewers_avg": [ 14.75, 9.364160400164021 ], "wc_reply_authors_avg": [ 64.75, 65.35049732021938 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 1.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.7745966692414834, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14083124900450795439&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "microsoft.com;microsoft.com;microsoft.com;microsoft.com", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Microsoft", "aff_unique_dep": "Microsoft Corporation", "aff_unique_url": "https://www.microsoft.com", "aff_unique_abbr": "Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Stacking Your Transformers: A Closer Look at Model Growth for Efficient LLM Pre-Training", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95968", "id": "FXJDcriMYH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=FXJDcriMYH", "openreview": "https://openreview.net/forum?id=FXJDcriMYH", "poster": "/media/PosterPDFs/NeurIPS%202024/95968.png?t=1730260052.5979257", "project": "", "author_site": "Wenyu Du, Tongxu Luo, Zihan Qiu, Zeyu Huang, Yikang Shen, Reynold Cheng, Yike Guo, Jie Fu", "tldr": "", "abstract": "LLMs are computationally expensive to pre-train due to their large scale.\nModel growth emerges as a promising approach by leveraging smaller models to accelerate the training of larger ones. \nHowever, the viability of these model growth methods in efficient LLM pre-training remains underexplored.\nThis work identifies three critical $\\underline{\\textit{O}}$bstacles: ($\\textit{O}$1) lack of comprehensive evaluation, ($\\textit{O}$2) untested viability for scaling, and ($\\textit{O}$3) lack of empirical guidelines.\nTo tackle $\\textit{O}$1, we summarize existing approaches into four atomic growth operators and systematically evaluate them in a standardized LLM pre-training setting.\nOur findings reveal that a depthwise stacking operator, called $G_{\\text{stack}}$, exhibits remarkable acceleration in training, leading to decreased loss and improved overall performance on eight standard NLP benchmarks compared to strong baselines. \nMotivated by these promising results, we conduct extensive experiments to delve deeper into $G_{\\text{stack}}$ to address $\\textit{O}$2 and $\\textit{O}$3.\nFor $\\textit{O}$2 (untested scalability), our study shows that $G_{\\text{stack}}$ is scalable and consistently performs well, with experiments up to 7B LLMs after growth and pre-training LLMs with 750B tokens.\nFor example, compared to a conventionally trained 7B model using 300B tokens, our $G_{\\text{stack}}$ model converges to the same loss with 194B tokens, resulting in a 54.6\\% speedup. \nWe further address $\\textit{O}$3 (lack of empirical guidelines) by formalizing guidelines to determine growth timing and growth factor for $G_{\\text{stack}}$, making it practical in general LLM pre-training.\nWe also provide in-depth discussions and comprehensive ablation studies of $G_{\\text{stack}}$. \nOur code and pre-trained model are available at https://llm-stacking.github.io/.", "keywords": "Efficient LLM pre-training;Model growth", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/b331d28cc289e31a4e82bd486ddb1c85849c3a9a.zip", "author": "Wenyu Du;Tongxu Luo;Zihan Qiu;Zeyu Huang;Yikang Shen;Reynold Cheng;Yike Guo;Jie Fu", "authorids": "~Wenyu_Du1;~Tongxu_Luo1;~Zihan_Qiu1;~Zeyu_Huang1;~Yikang_Shen1;~Reynold_Cheng1;~Yike_Guo1;~Jie_Fu2", "gender": ";M;M;;M;M;M;M", "homepage": ";https://tongxuluo.github.io;;;;https://reynold.hku.hk;https://cse.hkust.edu.hk/admin/people/faculty/profile/yikeguo;https://bigaidream.github.io/", "dblp": "38/10657;;313/9471;;152/8226;89/2619;g/YikeGuo;", "google_scholar": ";aJEGhzkAAAAJ;24eVHiYAAAAJ;https://scholar.google.com/citations?hl=en;qff5rRYAAAAJ;7R7MSb4AAAAJ;https://scholar.google.com.tw/citations?user=-0q6cIYAAAAJ;66osleIAAAAJ", "orcid": ";;;;;0000-0002-9480-9809;0009-0005-8401-282X;0000-0002-4494-843X", "linkedin": ";;zihan-qiu-33a172249/;;;;;", "or_profile": "~Wenyu_Du1;~Tongxu_Luo1;~Zihan_Qiu1;~Zeyu_Huang1;~Yikang_Shen1;~Reynold_Cheng1;~Yike_Guo1;~Jie_Fu1", "aff": "the University of Hong Kong, University of Hong Kong;University of Science and Technology Beijing;Tsinghua University;University of Edinburgh, University of Edinburgh;International Business Machines;The University of Hong Kong;Imperial College London;Hong Kong University of Science and Technology", "aff_domain": "cs.hku.hk;ustb.edu.cn;tsinghua.edu.cn;ed.ac.uk;ibm.com;cs.hku.hk;imperial.ac.uk;ust.hk", "position": "PhD student;Undergrad student;Undergrad student;PhD student;Researcher;Full Professor;Full Professor;Researcher", "bibtex": "@inproceedings{\ndu2024stacking,\ntitle={Stacking Your Transformers: A Closer Look at Model Growth for Efficient {LLM} Pre-Training},\nauthor={Wenyu Du and Tongxu Luo and Zihan Qiu and Zeyu Huang and Yikang Shen and Reynold Cheng and Yike Guo and Jie Fu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=FXJDcriMYH}\n}", "github": "", "reviewers": "uHvr;ox3X;FXhk;8DCi", "pdf_size": 5974023, "rating": "6;7;7;7", "confidence": "2;4;3;4", "soundness": "4;3;4;3", "novelty": "3;2;4;4", "presentation": "3;4;4;4", "wc_summary": "65;69;92;48", "wc_strengths": "57;147;62;36", "wc_weaknesses": "35;349;34;83", "wc_questions": "15;57;2;2", "wc_limitations": "34;2;20;4", "wc_review": "206;624;210;173", "wc_reply_reviewers": "15;55;0;15", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 68.5, 15.692354826475215 ], "wc_strengths_avg": [ 75.5, 42.4175671155242 ], "wc_weaknesses_avg": [ 125.25, 130.69119136345802 ], "wc_questions_avg": [ 19.0, 22.572106680591425 ], "wc_limitations_avg": [ 15.0, 13.0 ], "wc_review_avg": [ 303.25, 185.74091498644017 ], "wc_reply_reviewers_avg": [ 21.25, 20.42516829796024 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14277135670718911267&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 4, "email": "cs.hku.hk;ustb.edu.cn;tsinghua.edu.cn;ed.ac.uk;ibm.com;cs.hku.hk;imperial.ac.uk;ust.hk", "author_num": 8, "aff_unique_index": "0;1;2;3;4;0;5;6", "aff_unique_norm": "University of Hong Kong;University of Science and Technology Beijing;Tsinghua University;University of Edinburgh;International Business Machines Corporation;Imperial College London;Hong Kong University of Science and Technology", "aff_unique_dep": ";;;;;;", "aff_unique_url": "https://www.hku.hk;http://www.ustb.edu.cn;https://www.tsinghua.edu.cn;https://www.ed.ac.uk;https://www.ibm.com;https://www.imperial.ac.uk;https://www.ust.hk", "aff_unique_abbr": "HKU;USTB;THU;Edinburgh;IBM;ICL;HKUST", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;1;2;0;1;0", "aff_country_unique": "China;United Kingdom;United States" }, { "title": "NAVSIM: Data-Driven Non-Reactive Autonomous Vehicle Simulation and Benchmarking", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97779", "id": "FXTeJvHE0k", "proceeding": "", "pdf": "https://openreview.net/pdf?id=FXTeJvHE0k", "openreview": "https://openreview.net/forum?id=FXTeJvHE0k", "poster": "/media/PosterPDFs/NeurIPS%202024/97779.png?t=1733316146.061996", "project": "", "author_site": "Daniel Dauner, Marcel Hallgarten, Tianyu Li, Xinshuo Weng, Zhiyu Huang, Zetong Yang, Hongyang Li, Igor Gilitschenski, Boris Ivanovic, Marco Pavone, Andreas Geiger, Kashyap Chitta", "tldr": "", "abstract": "Benchmarking vision-based driving policies is challenging. On one hand, open-loop evaluation with real data is easy, but these results do not reflect closed-loop performance. On the other, closed-loop evaluation is possible in simulation, but is hard to scale due to its significant computational demands. Further, the simulators available today exhibit a large domain gap to real data. This has resulted in an inability to draw clear conclusions from the rapidly growing body of research on end-to-end autonomous driving. In this paper, we present NAVSIM, a middle ground between these evaluation paradigms, where we use large datasets in combination with a non-reactive simulator to enable large-scale real-world benchmarking. Specifically, we gather simulation-based metrics, such as progress and time to collision, by unrolling bird's eye view abstractions of the test scenes for a short simulation horizon. Our simulation is non-reactive, i.e., the evaluated policy and environment do not influence each other. As we demonstrate empirically, this decoupling allows open-loop metric computation while being better aligned with closed-loop evaluations than traditional displacement errors. NAVSIM enabled a new competition held at CVPR 2024, where 143 teams submitted 463 entries, resulting in several new insights. On a large set of challenging scenarios, we observe that simple methods with moderate compute requirements such as TransFuser can match recent large-scale end-to-end driving architectures such as UniAD. Our modular framework can potentially be extended with new datasets, data curation strategies, and metrics, and will be continually maintained to host future challenges. Our code is available at https://github.com/autonomousvision/navsim.", "keywords": "Autonomous Driving;Simulation;Motion Planning", "primary_area": "", "supplementary_material": "/attachment/e268342cdd44749325ab06aac758d06b30f705ea.pdf", "author": "Daniel Dauner;Marcel Hallgarten;Tianyu Li;Xinshuo Weng;Zhiyu Huang;Zetong Yang;Hongyang Li;Igor Gilitschenski;Boris Ivanovic;Marco Pavone;Andreas Geiger;Kashyap Chitta", "authorids": "~Daniel_Dauner1;~Marcel_Hallgarten1;~Tianyu_Li5;~Xinshuo_Weng3;~Zhiyu_Huang2;~Zetong_Yang1;~Hongyang_Li1;~Igor_Gilitschenski1;~Boris_Ivanovic1;~Marco_Pavone1;~Andreas_Geiger3;~Kashyap_Chitta1", "gender": ";M;M;F;M;M;M;M;;M;M;M", "homepage": "https://danieldauner.github.io/;https://uni-tuebingen.de/fr/fakultaeten/mathematisch-naturwissenschaftliche-fakultaet/fachbereiche/informatik/lehrstuehle/kognitive-systeme/the-chair/staff/marcel-hallgarten/;https://github.com/sephyli;http://www.xinshuoweng.com;https://mczhi.github.io/;;https://datascience.hku.hk/people/hongyang-li/;https://www.gilitschenski.org/igor;http://www.borisivanovic.com/;https://web.stanford.edu/~pavone/;http://www.cvlibs.net;https://kashyap7x.github.io/", "dblp": "349/4864;;;192/1952.html;08/10083;232/2108;95/8433-1;129/1281;203/8356;91/3382-1.html;40/5825-1;220/3765", "google_scholar": "tZqIYDcAAAAJ;;X6vTmEMAAAAJ;dthSEsoAAAAJ;aLZEVCsAAAAJ;oPiZSVYAAAAJ;https://scholar.google.com.hk/citations?user=Hfrih1EAAAAJ;Nuw1Y4oAAAAJ;ey9AQcEAAAAJ;RhOpyXcAAAAJ;https://scholar.google.ca/citations?hl=en;vX5i2CcAAAAJ", "orcid": ";;0009-0008-3838-160X;0000-0002-7894-4381;;;0000-0001-9110-5534;;0000-0002-8698-202X;;0000-0002-8151-3726;", "linkedin": ";;sephy-li/;xinshuoweng;zhiyu-huang-a611ab191/;;hongyangli2020/;igorgilitschenski/;boris-ivanovic-a3103064;;;", "or_profile": "~Daniel_Dauner1;~Marcel_Hallgarten1;~Tianyu_Li5;~Xinshuo_Weng3;~Zhiyu_Huang2;~Zetong_Yang1;~Hongyang_Li1;~Igor_Gilitschenski1;~Boris_Ivanovic1;~Marco_Pavone1;~Andreas_Geiger3;~Kashyap_Chitta1", "aff": "Eberhard-Karls-Universit\u00e4t T\u00fcbingen;Eberhard-Karls-Universit\u00e4t T\u00fcbingen;Shanghai AI Laboratory;NVIDIA;Nanyang Technological University;Shanghai Artificial Intelligence Laboratory;Shanghai AI Lab;University of Toronto;NVIDIA;Stanford University;University of Tuebingen;University of T\u00fcbingen", "aff_domain": "uni-tuebingen.de;uni-tuebingen.de;pjlab.org.cn;nvidia.com;ntu.edu.sg;pjlab.org.cn;pjlab.org.cn;toronto.edu;nvidia.com;stanford.edu;uni-tuebingen.de;uni-tuebingen.de", "position": "PhD student;PhD student;Intern;Researcher;PhD student;Researcher;Researcher;Assistant Professor;Researcher;Associate Professor;Professor;PhD student", "bibtex": "@inproceedings{\ndauner2024navsim,\ntitle={{NAVSIM}: Data-Driven Non-Reactive Autonomous Vehicle Simulation and Benchmarking},\nauthor={Daniel Dauner and Marcel Hallgarten and Tianyu Li and Xinshuo Weng and Zhiyu Huang and Zetong Yang and Hongyang Li and Igor Gilitschenski and Boris Ivanovic and Marco Pavone and Andreas Geiger and Kashyap Chitta},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=FXTeJvHE0k}\n}", "github": "", "reviewers": "Fqyh;WJMF;LVhg", "pdf_size": 3400291, "rating": "7;7;9", "confidence": "5;2;5", "wc_summary_and_contributions": "100;68;100", "wc_strengths": "111;92;78", "wc_improvement": "182;235;45", "wc_limitations": "30;37;14", "wc_correctness": "7;10;9", "wc_clarity": "6;11;7", "wc_relation_to_prior_work": "14;43;16", "wc_documentation": "10;4;22", "wc_additional_feedback": "1;1;1", "wc_review": "461;501;292", "wc_reply_reviewers": "12;21;28", "wc_reply_authors": "62;56;16", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 7.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 4.0, 1.4142135623730951 ], "wc_summary_and_contributions_avg": [ 89.33333333333333, 15.084944665313014 ], "wc_strengths_avg": [ 93.66666666666667, 13.523641850067197 ], "wc_improvement_avg": [ 154.0, 80.05414834139869 ], "wc_limitations_avg": [ 27.0, 9.626352718795768 ], "wc_correctness_avg": [ 8.666666666666666, 1.247219128924647 ], "wc_clarity_avg": [ 8.0, 2.160246899469287 ], "wc_relation_to_prior_work_avg": [ 24.333333333333332, 13.224556283251582 ], "wc_documentation_avg": [ 12.0, 7.483314773547883 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 418.0, 90.57961507241387 ], "wc_reply_reviewers_avg": [ 20.333333333333332, 6.548960901462833 ], "wc_reply_authors_avg": [ 44.666666666666664, 20.417857108151406 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": 0.49999999999999994, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=967103058964002798&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "uni-tuebingen.de;uni-tuebingen.de;pjlab.org.cn;nvidia.com;ntu.edu.sg;pjlab.org.cn;pjlab.org.cn;toronto.edu;nvidia.com;stanford.edu;uni-tuebingen.de;uni-tuebingen.de", "author_num": 12, "aff_unique_index": "0;0;1;2;3;4;5;6;2;7;8;9", "aff_unique_norm": "Eberhard Karls University of T\u00fcbingen;Shanghai AI Laboratory;NVIDIA;Nanyang Technological University;Shanghai Artificial Intelligence Laboratory;Shanghai AI Lab;University of Toronto;Stanford University;University of Tuebingen;University of T\u00fcbingen", "aff_unique_dep": ";;NVIDIA Corporation;;;;;;;", "aff_unique_url": "https://www.uni-tuebingen.de/;https://www.shanghai-ai-lab.com;https://www.nvidia.com;https://www.ntu.edu.sg;http://www.shailab.org/;https://www.shanghaiailab.com;https://www.utoronto.ca;https://www.stanford.edu;https://www.uni-tuebingen.de/;https://www.uni-tuebingen.de/", "aff_unique_abbr": "Uni T\u00fcbingen;SAIL;NVIDIA;NTU;Shanghai AI Lab;SAIL;U of T;Stanford;Uni T\u00fcbingen;Uni T\u00fcbingen", "aff_campus_unique_index": "0;0;2", "aff_campus_unique": "T\u00fcbingen;;Stanford", "aff_country_unique_index": "0;0;1;2;3;1;1;4;2;2;0;0", "aff_country_unique": "Germany;China;United States;Singapore;Canada" }, { "title": "Replay-and-Forget-Free Graph Class-Incremental Learning: A Task Profiling and Prompting Approach", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95967", "id": "FXdMgfCDer", "proceeding": "", "pdf": "https://openreview.net/pdf?id=FXdMgfCDer", "openreview": "https://openreview.net/forum?id=FXdMgfCDer", "poster": "/media/PosterPDFs/NeurIPS%202024/95967.png?t=1730602692.1729856", "project": "", "author_site": "Chaoxi Niu, Guansong Pang, Ling Chen, Bing Liu", "tldr": "", "abstract": "Class-incremental learning (CIL) aims to continually learn a sequence of tasks, with each task consisting of a set of unique classes. Graph CIL (GCIL) follows the same setting but needs to deal with graph tasks (e.g., node classification in a graph). The key characteristic of CIL lies in the absence of task identifiers (IDs) during inference, which causes a significant challenge in separating classes from different tasks (i.e., inter-task class separation). Being able to accurately predict the task IDs can help address this issue, but it is a challenging problem. In this paper, we show theoretically that accurate task ID prediction on graph data can be achieved by a Laplacian smoothing-based graph task profiling approach, in which each graph task is modeled by a task prototype based on Laplacian smoothing over the graph. It guarantees that the task prototypes of the same graph task are nearly the same with a large smoothing step, while those of different tasks are distinct due to differences in graph structure and node attributes. Further, to avoid the catastrophic forgetting of the knowledge learned in previous graph tasks, we propose a novel graph prompting approach for GCIL which learns a small discriminative graph prompt for each task, essentially resulting in a separate classification model for each task. The prompt learning requires the training of a single graph neural network (GNN) only once on the first task, and no data replay is required thereafter, thereby obtaining a GCIL model being both replay-free and forget-free. Extensive experiments on four GCIL benchmarks show that i) our task prototype-based method can achieve 100% task ID prediction accuracy on all four datasets, ii) our GCIL model significantly outperforms state-of-the-art competing methods by at least 18% in average CIL accuracy, and iii) our model is fully free of forgetting on the four datasets.", "keywords": "graph class-incremental learning;forget-free;graph prompting;task identification", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Chaoxi Niu;Guansong Pang;Ling Chen;Bing Liu", "authorids": "~Chaoxi_Niu1;~Guansong_Pang1;~Ling_Chen5;~Bing_Liu1", "gender": "M;;F;M", "homepage": "https://github.com/Niuchx;http://guansongpang.com/;https://profiles.uts.edu.au/Ling.Chen;https://www.cs.uic.edu/~liub/", "dblp": "243/2731.html;07/11150;17/1237-6;l/BingLiu1.html", "google_scholar": "ttzytYwAAAAJ;https://scholar.google.com.tw/citations?hl=en;https://scholar.google.com.au/citations?user=L5aYWQcAAAAJ;Kt1bjZoAAAAJ", "orcid": "0000-0003-4529-8560;0000-0002-9877-2716;0000-0002-6468-5729;", "linkedin": ";guansong-pang-5587b21b/;;", "or_profile": "~Chaoxi_Niu1;~Guansong_Pang1;~Ling_Chen5;~Bing_Liu1", "aff": "University of Technology Sydney;Singapore Management University;University of Technology Sydney;University of Illinois at Chicago", "aff_domain": "student.uts.edu.au;smu.edu.sg;uts.edu.au;uic.edu", "position": "PhD student;Assistant Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nniu2024replayandforgetfree,\ntitle={Replay-and-Forget-Free Graph Class-Incremental Learning: A Task Profiling and Prompting Approach},\nauthor={Chaoxi Niu and Guansong Pang and Ling Chen and Bing Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=FXdMgfCDer}\n}", "github": "", "reviewers": "j8FV;odiT;4w4P;aHCQ", "pdf_size": 721979, "rating": "5;5;5;7", "confidence": "3;3;4;5", "soundness": "1;3;2;3", "novelty": "2;3;2;3", "presentation": "3;2;3;3", "wc_summary": "84;79;62;111", "wc_strengths": "54;32;28;94", "wc_weaknesses": "376;43;101;91", "wc_questions": "2;41;50;236", "wc_limitations": "5;25;1;3", "wc_review": "521;220;242;535", "wc_reply_reviewers": "412;0;0;0", "wc_reply_authors": "1121;0;0;0", "reply_reviewers": "2;0;0;0", "reply_authors": "5;1;1;1", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 84.0, 17.592612085759182 ], "wc_strengths_avg": [ 52.0, 26.19160170741759 ], "wc_weaknesses_avg": [ 152.75, 130.74474176807266 ], "wc_questions_avg": [ 82.25, 90.58249002980654 ], "wc_limitations_avg": [ 8.5, 9.630680142129112 ], "wc_review_avg": [ 379.5, 148.78592003277728 ], "wc_reply_reviewers_avg": [ 103.0, 178.40123317959436 ], "wc_reply_authors_avg": [ 280.25, 485.4072388211779 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.0, 1.7320508075688772 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5040304815791641100&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "student.uts.edu.au;smu.edu.sg;uts.edu.au;uic.edu", "author_num": 4, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "University of Technology Sydney;Singapore Management University;University of Illinois at Chicago", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uts.edu.au;https://www.smu.edu.sg;https://www.uic.edu", "aff_unique_abbr": "UTS;SMU;UIC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Chicago", "aff_country_unique_index": "0;1;0;2", "aff_country_unique": "Australia;Singapore;United States" }, { "title": "The Challenges of the Nonlinear Regime for Physics-Informed Neural Networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95966", "id": "FY6vPtITtE", "proceeding": "", "pdf": "https://openreview.net/pdf?id=FY6vPtITtE", "openreview": "https://openreview.net/forum?id=FY6vPtITtE", "poster": "/media/PosterPDFs/NeurIPS%202024/95966.png?t=1731707182.0021377", "project": "", "author_site": "Andrea Bonfanti, Giuseppe Bruno, Cristina Cipriani", "tldr": "", "abstract": "The Neural Tangent Kernel (NTK) viewpoint is widely employed to analyze the training dynamics of overparameterized Physics-Informed Neural Networks (PINNs). However, unlike the case of linear Partial Differential Equations (PDEs), we show how the NTK perspective falls short in the nonlinear scenario. Specifically, we establish that the NTK yields a random matrix at initialization that is not constant during training, contrary to conventional belief. Another significant difference from the linear regime is that, even in the idealistic infinite-width limit, the Hessian does not vanish and hence it cannot be disregarded during training. This motivates the adoption of second-order optimization methods. We explore the convergence guarantees of such methods in both linear and nonlinear cases, addressing challenges such as spectral bias and slow convergence. Every theoretical result is supported by numerical examples with both linear and nonlinear PDEs, and we highlight the benefits of second-order methods in benchmark test cases.", "keywords": "Physics-Informed Neural Networks;Neural Tangent Kernel;Nonlinear PDEs;Second-order optimization", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "", "author": "Andrea Bonfanti;Giuseppe Bruno;Cristina Cipriani", "authorids": "~Andrea_Bonfanti1;~Giuseppe_Bruno1;~Cristina_Cipriani1", "gender": "M;M;F", "homepage": ";https://poisson.phc.dm.unipi.it/~bruno/;", "dblp": ";;", "google_scholar": ";;E5rRtokAAAAJ", "orcid": "0000-0002-0062-1187;;", "linkedin": ";;", "or_profile": "~Andrea_Bonfanti1;~Giuseppe_Bruno1;~Cristina_Cipriani1", "aff": "Basque Center for Applied Mathematics;Universit\u00e0 di Pisa, University of Pisa;Technische Universit\u00e4t M\u00fcnchen", "aff_domain": "bcamath.org;dm.unipi.it;tum.de", "position": "PhD student;MS student;PhD student", "bibtex": "@inproceedings{\nbonfanti2024the,\ntitle={The Challenges of the Nonlinear Regime for Physics-Informed Neural Networks},\nauthor={Andrea Bonfanti and Giuseppe Bruno and Cristina Cipriani},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=FY6vPtITtE}\n}", "github": "", "reviewers": "XYBU;ocfJ;4Qrf;vTvV", "pdf_size": 6698772, "rating": "6;7;7;7", "confidence": "3;5;3;4", "soundness": "3;3;3;4", "novelty": "3;3;4;4", "presentation": "4;4;3;3", "wc_summary": "73;100;49;82", "wc_strengths": "29;64;55;67", "wc_weaknesses": "71;94;92;29", "wc_questions": "89;256;134;30", "wc_limitations": "37;1;10;6", "wc_review": "299;515;340;214", "wc_reply_reviewers": "10;62;0;21", "wc_reply_authors": "0;32;0;12", "reply_reviewers": "1;1;0;1", "reply_authors": "1;2;1;2", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 76.0, 18.371173070873837 ], "wc_strengths_avg": [ 53.75, 14.956186011146023 ], "wc_weaknesses_avg": [ 71.5, 26.139051245215462 ], "wc_questions_avg": [ 127.25, 82.98004278138208 ], "wc_limitations_avg": [ 13.5, 13.937359864766353 ], "wc_review_avg": [ 342.0, 109.73376873141649 ], "wc_reply_reviewers_avg": [ 23.25, 23.573024837725004 ], "wc_reply_authors_avg": [ 11.0, 13.076696830622021 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5058501195557512746&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "bcamath.org;dm.unipi.it;tum.de", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Basque Center for Applied Mathematics;University of Pisa;Technische Universit\u00e4t M\u00fcnchen", "aff_unique_dep": ";;", "aff_unique_url": "https://www.bcamath.org/;https://www.unipi.it;https://www.tum.de", "aff_unique_abbr": "BCAM;UniPi;TUM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2", "aff_country_unique": "Spain;Italy;Germany" }, { "title": "StreamFlow: Streamlined Multi-Frame Optical Flow Estimation for Video Sequences", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95965", "id": "FYLcH4HAZr", "proceeding": "", "pdf": "https://openreview.net/pdf?id=FYLcH4HAZr", "openreview": "https://openreview.net/forum?id=FYLcH4HAZr", "poster": "/media/PosterPDFs/NeurIPS%202024/95965.png?t=1731505232.7296193", "project": "", "author_site": "SHANGKUN SUN, Jiaming Liu, Huaxia Li, Guoqing Liu, Thomas Li, Wei Gao", "tldr": "", "abstract": "Prior multi-frame optical flow methods typically estimate flow repeatedly in a pair-wise manner, leading to significant computational redundancy. To mitigate this, we implement a Streamlined In-batch Multi-frame (SIM) pipeline, specifically tailored to video inputs to minimize redundant calculations. It enables the simultaneous prediction of successive unidirectional flows in a single forward pass, boosting processing speed by 44.43% and reaching efficiencies on par with two-frame networks. Moreover, we investigate various spatiotemporal modeling methods for optical flow estimation within this pipeline. Notably, we propose a simple yet highly effective parameter-efficient Integrative spatiotemporal Coherence (ISC) modeling method, alongside a lightweight Global Temporal Regressor (GTR) to harness temporal cues. The proposed ISC and GTR bring powerful spatiotemporal modeling capabilities and significantly enhance accuracy, including in occluded areas, while adding modest computations to the SIM pipeline. Compared to the baseline, our approach, StreamFlow, achieves performance enhancements of 15.45% and 11.37% on the Sintel clean and final test sets respectively, with gains of 15.53% and 10.77% on occluded regions and only a 1.11% rise in latency. Furthermore, StreamFlow exhibits state-of-the-art cross-dataset testing results on Sintel and KITTI, demonstrating its robust cross-domain generalization capabilities. The code is available [here](https://github.com/littlespray/StreamFlow).", "keywords": "optical flow;low-level vision;computer vision", "primary_area": "machine_vision", "supplementary_material": "/attachment/42586f5d148c83098ae91f1cc5871c427accf785.zip", "author": "Shangkun Sun;Jiaming Liu;Huaxia Li;Guoqing Liu;Thomas H. Li;Wei Gao", "authorids": "~Shangkun_Sun3;~Jiaming_Liu7;~Huaxia_Li1;~Guoqing_Liu1;~Thomas_H._Li3;~Wei_Gao12", "gender": ";M;M;M;M;M", "homepage": ";https://jmliu88.github.io;;https://www.minieye.cc/;http://pku.edu.cn;https://gaowei262.github.io/", "dblp": ";71/10786;233/2259.html;;213/4037;28/2073-3", "google_scholar": ";https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;;;KdXy-kgAAAAJ", "orcid": ";;;;;0000-0001-7429-5495", "linkedin": ";;;;;", "or_profile": "~Shangkun_Sun3;~Jiaming_Liu7;~Huaxia_Li1;~Guoqing_Liu1;~Thomas_H._Li3;~Wei_Gao12", "aff": ";Tiamat AI;Xiaohongshu;MINIEYE;AIIT, Peking University;Shenzhen Graduate School, Peking University ", "aff_domain": ";bksp.com;xiaohongshu.com;minieye.cc;aiit.org.cn;pku.edu.cn", "position": ";Researcher;Researcher;Researcher;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nsun2024streamflow,\ntitle={StreamFlow: Streamlined Multi-Frame Optical Flow Estimation for Video Sequences},\nauthor={Shangkun Sun and Jiaming Liu and Huaxia Li and Guoqing Liu and Thomas H. Li and Wei Gao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=FYLcH4HAZr}\n}", "github": "", "reviewers": "zwXh;PByw;ba6H", "pdf_size": 4118868, "rating": "4;5;6", "confidence": "3;4;5", "soundness": "2;3;4", "novelty": "2;3;3", "presentation": "1;3;3", "wc_summary": "51;80;273", "wc_strengths": "23;55;169", "wc_weaknesses": "261;190;234", "wc_questions": "26;5;229", "wc_limitations": "1;5;29", "wc_review": "362;335;934", "wc_reply_reviewers": "0;45;131", "wc_reply_authors": "82;82;622", "reply_reviewers": "0;1;1", "reply_authors": "2;2;2", "rating_avg": [ 5.0, 0.816496580927726 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.9428090415820634 ], "wc_summary_avg": [ 134.66666666666666, 98.53031118504711 ], "wc_strengths_avg": [ 82.33333333333333, 62.659574066715635 ], "wc_weaknesses_avg": [ 228.33333333333334, 29.261275129806325 ], "wc_questions_avg": [ 86.66666666666667, 101.00935050226235 ], "wc_limitations_avg": [ 11.666666666666666, 12.36482466066094 ], "wc_review_avg": [ 543.6666666666666, 276.22736206892245 ], "wc_reply_reviewers_avg": [ 58.666666666666664, 54.346624141298385 ], "wc_reply_authors_avg": [ 262.0, 254.55844122715712 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5954169233801969934&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": ";bksp.com;xiaohongshu.com;minieye.cc;aiit.org.cn;pku.edu.cn", "author_num": 6, "aff_unique_index": "0;1;2;3;3", "aff_unique_norm": "Tiamat AI;Xiaohongshu;MINIEYE;Peking University", "aff_unique_dep": ";;;AIIT", "aff_unique_url": ";https://www.xiaohongshu.com;;http://www.pku.edu.cn", "aff_unique_abbr": ";XHS;;PKU", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Beijing;Shenzhen", "aff_country_unique_index": "1;1;1", "aff_country_unique": ";China" }, { "title": "CLIP in Mirror: Disentangling text from visual images through reflection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95964", "id": "FYm8coxdiR", "proceeding": "", "pdf": "https://openreview.net/pdf?id=FYm8coxdiR", "openreview": "https://openreview.net/forum?id=FYm8coxdiR", "poster": "/media/PosterPDFs/NeurIPS%202024/95964.png?t=1731688363.4189427", "project": "", "author_site": "Tiancheng Wang, Yuguang Yang, Linlin Yang, Shaohui Lin, Juan Zhang, Guodong Guo, Baochang Zhang", "tldr": "", "abstract": "The CLIP network excels in various tasks, but struggles with text-visual images i.e., images that contain both text and visual objects; it risks confusing textual and visual representations. To address this issue, we propose MirrorCLIP, a zero-shot framework, which disentangles the image features of CLIP by exploiting the difference in the mirror effect between visual objects and text in the images. Specifically, MirrorCLIP takes both original and flipped images as inputs, comparing their features dimension-wise in the latent space to generate disentangling masks. With disentangling masks, we further design filters to separate textual and visual factors more precisely, and then get disentangled representations. Qualitative experiments using stable diffusion models and class activation mapping (CAM) validate the effectiveness of our disentanglement. Moreover, our proposed MirrorCLIP reduces confusion when encountering text-visual images and achieves a substantial improvement on typographic defense, further demonstrating its superior ability of disentanglement. Our code is available at https://github.com/tcwangbuaa/MirrorCLIP", "keywords": "Disentanglement of CLIP;Flip invariance;Typographic attack;Text recognition", "primary_area": "machine_vision", "supplementary_material": "/attachment/aa0e8f84337b738ceb38fafa089118eaf44e0514.zip", "author": "Tiancheng Wang;Yuguang Yang;Linlin Yang;Shaohui Lin;Juan Zhang;Guodong Guo;Baochang Zhang", "authorids": "~Tiancheng_Wang1;~Yuguang_Yang1;~Linlin_Yang1;~Shaohui_Lin1;~Juan_Zhang3;~Guodong_Guo1;~Baochang_Zhang1", "gender": "M;M;M;M;F;M;M", "homepage": "https://github.com/Blank10492?tab=repositories;;https://www.mu4yang.com;https://sites.google.com/site/shaohuilin007/home;https://iai.buaa.edu.cn/info/1013/1101.htm;http://pages.cs.wisc.edu/~gdguo/;https://dblp.uni-trier.de/pid/80/3887-1.html", "dblp": ";348/9433.html;;183/0917.html;;92/4520;https://dblp.uni-trier.de/pid/80/3887-1.html", "google_scholar": ";xk0tOqQAAAAJ;https://scholar.google.com.hk/citations?user=gI55gF0AAAAJ;k8AMa1kAAAAJ;;f2Y5nygAAAAJ;", "orcid": ";0000-0002-6394-4645;0000-0001-6752-0252;0000-0003-0284-9940;;;", "linkedin": ";;;;;;", "or_profile": "~Tiancheng_Wang1;~Yuguang_Yang1;~Linlin_Yang1;~Shaohui_Lin1;~Juan_Zhang3;~Guodong_Guo1;~Baochang_Zhang1", "aff": "Beihang University;Institue of Artificial Intelligence of Beihang University;Communication University of China;East China Normal University;Beihang University;West Virginia University;Beihang University", "aff_domain": "buaa.edu.cn;buaa.edu.cn;cuc.edu.cn;ecnu.edu.cn;buaa.edu;wvu.edu;buaa.edu.cn", "position": "PhD student;MS student;Lecturer;Researcher;Assistant Professor;Full Professor;Professor", "bibtex": "@inproceedings{\nwang2024clip,\ntitle={{CLIP} in Mirror: Disentangling text from visual images through reflection},\nauthor={Tiancheng Wang and Yuguang Yang and Linlin Yang and Shaohui Lin and Juan Zhang and Guodong Guo and Baochang Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=FYm8coxdiR}\n}", "github": "", "reviewers": "ueRM;J5xD;b2Kp;xWuF", "pdf_size": 8514556, "rating": "3;6;7;7", "confidence": "4;4;5;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "91;112;96;154", "wc_strengths": "35;34;18;108", "wc_weaknesses": "124;57;59;80", "wc_questions": "69;32;4;2", "wc_limitations": "3;8;4;1", "wc_review": "322;243;181;345", "wc_reply_reviewers": "548;0;19;12", "wc_reply_authors": "1905;59;37;50", "reply_reviewers": "2;0;1;1", "reply_authors": "6;2;2;2", "rating_avg": [ 5.75, 1.6393596310755 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 113.25, 24.772716847370617 ], "wc_strengths_avg": [ 48.75, 34.86671048435743 ], "wc_weaknesses_avg": [ 80.0, 26.953663943887108 ], "wc_questions_avg": [ 26.75, 27.123559869604136 ], "wc_limitations_avg": [ 4.0, 2.5495097567963922 ], "wc_review_avg": [ 272.75, 65.09368248916326 ], "wc_reply_reviewers_avg": [ 144.75, 232.9156231342157 ], "wc_reply_authors_avg": [ 512.75, 803.8539590622167 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 3.0, 1.7320508075688772 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.44022545316281186, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:t-XndrUU8zEJ:scholar.google.com/&scioq=CLIP+in+Mirror:+Disentangling+text+from+visual+images+through+reflection&hl=en&as_sdt=0,44", "gs_version_total": 2, "email": "buaa.edu.cn;buaa.edu.cn;cuc.edu.cn;ecnu.edu.cn;buaa.edu;wvu.edu;buaa.edu.cn", "author_num": 7, "aff_unique_index": "0;0;1;2;0;3;0", "aff_unique_norm": "Beihang University;Communication University of China;East China Normal University;West Virginia University", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.buaa.edu.cn/;http://www.cuc.edu.cn/;http://www.ecnu.edu.cn;https://www.wvu.edu", "aff_unique_abbr": "BUAA;CUC;ECNU;WVU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;1;0", "aff_country_unique": "China;United States" }, { "title": "Edit Distance Robust Watermarks via Indexing Pseudorandom Codes", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95963", "id": "FZ45kf5pIA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=FZ45kf5pIA", "openreview": "https://openreview.net/forum?id=FZ45kf5pIA", "poster": "", "project": "", "author_site": "Noah Golowich, Ankur Moitra", "tldr": "", "abstract": "Motivated by the problem of detecting AI-generated text, we consider the problem of watermarking the output of language models with provable guarantees. We aim for watermarks which satisfy: (a) undetectability, a cryptographic notion introduced by Christ, Gunn, & Zamir (2023) which stipulates that it is computationally hard to distinguish watermarked language model outputs from the model's actual output distribution; and (b) robustness to channels which introduce a constant fraction of adversarial insertions, substitutions, and deletions to the watermarked text. Earlier schemes could only handle stochastic substitutions and deletions, and thus we are aiming for a more natural and appealing robustness guarantee that holds with respect to edit distance.\n Our main result is a watermarking scheme which achieves both (a) and (b) when the alphabet size for the language model is allowed to grow as a polynomial in the security parameter. To derive such a scheme, we follow an approach introduced by Christ & Gunn (2024), which proceeds via first constructing pseudorandom codes satisfying undetectability and robustness properties analogous to those above; our codes have the additional benefit of relying on weaker computational assumptions than used in previous work. Then we show that there is a generic transformation from such codes over large alphabets to watermarking schemes for arbitrary language models.", "keywords": "Watermarking;pseudorandom codes;language models", "primary_area": "learning_theory", "supplementary_material": "", "author": "Noah Golowich;Ankur Moitra", "authorids": "~Noah_Golowich1;~Ankur_Moitra1", "gender": ";M", "homepage": "https://noahgol.github.io;http://people.csail.mit.edu/moitra/", "dblp": "150/1861;04/952", "google_scholar": "roUlyWcAAAAJ;https://scholar.google.com.tw/citations?user=umFQktIAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Noah_Golowich1;~Ankur_Moitra1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu", "position": "PhD student;", "bibtex": "@inproceedings{\ngolowich2024edit,\ntitle={Edit Distance Robust Watermarks via Indexing Pseudorandom Codes},\nauthor={Noah Golowich and Ankur Moitra},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=FZ45kf5pIA}\n}", "github": "", "reviewers": "zeWE;YwkB;Qm1S;ab11", "pdf_size": 669585, "rating": "5;6;8;8", "confidence": "4;4;3;2", "soundness": "3;3;3;3", "novelty": "3;3;4;4", "presentation": "2;3;2;3", "wc_summary": "112;92;626;95", "wc_strengths": "44;69;301;160", "wc_weaknesses": "81;33;586;326", "wc_questions": "121;105;131;20", "wc_limitations": "103;86;1;7", "wc_review": "461;385;1645;608", "wc_reply_reviewers": "35;13;90;56", "wc_reply_authors": "0;0;0;13", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;2", "rating_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 231.25, 228.03659245831577 ], "wc_strengths_avg": [ 143.5, 100.65907808041955 ], "wc_weaknesses_avg": [ 256.5, 220.31398049147947 ], "wc_questions_avg": [ 94.25, 43.859862060886606 ], "wc_limitations_avg": [ 49.25, 45.6966902521397 ], "wc_review_avg": [ 774.75, 508.79385560362266 ], "wc_reply_reviewers_avg": [ 48.5, 28.376927247325423 ], "wc_reply_authors_avg": [ 3.25, 5.629165124598851 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.8703882797784892, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8408122487336199902&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": "mit.edu;mit.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "A Critical Evaluation of AI Feedback for Aligning Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95962", "id": "FZQYfmsmX9", "proceeding": "", "pdf": "https://openreview.net/pdf?id=FZQYfmsmX9", "openreview": "https://openreview.net/forum?id=FZQYfmsmX9", "poster": "", "project": "", "author_site": "Archit Sharma, Sedrick Scott Keh, Eric Mitchell, Chelsea Finn, Kushal Arora, Thomas Kollar", "tldr": "", "abstract": "Learning from AI feedback (LAIF) is a popular paradigm for improving the instruction-following abilities of powerful pre-trained language models. LAIF first performs supervised fine-tuning (SFT) using demonstrations from a teacher model and then further fine-tunes the model with reinforcement learning (RL) or direct preference optimization (DPO), using feedback from a critic model. While recent popular open-source models have demonstrated substantial improvements in performance from the RL step, in this paper we question whether the complexity of this RL step is truly warranted for AI feedback. We show that the improvements of the RL step are virtually entirely due to the widespread practice of using a weaker teacher model (e.g. GPT-3.5) for SFT data collection than the critic (e.g., GPT-4) used for AI feedback generation. Specifically, we show that simple supervised fine-tuning with GPT-4 as the teacher outperforms existing LAIF pipelines. More generally, we find that the gains from LAIF vary substantially across base model families, test-time evaluation protocols, and critic models. Finally, we provide a mechanistic explanation for when SFT may outperform the full two-step LAIF pipeline as well as suggestions for making LAIF maximally useful in practice.", "keywords": "reinforcement learning from human feedback;ai feedback;alignment;direct preference optimization", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Archit Sharma;Sedrick Keh;Eric Mitchell;Chelsea Finn;Kushal Arora;Thomas Kollar", "authorids": "~Archit_Sharma1;~Sedrick_Keh1;~Eric_Mitchell1;~Chelsea_Finn1;~Kushal_Arora1;~Thomas_Kollar1", "gender": "M;M;F;M;M;M", "homepage": ";https://ericmitchell.ai;https://ai.stanford.edu/~cbfinn/;http://kushalarora.github.io;http://tkollar.github.io;https://sedrickkeh.github.io", "dblp": "220/3163.html;238/0419;131/1783;;10/6653;244/9561", "google_scholar": "_0IIzxgAAAAJ;q77J4fgAAAAJ;vfPE6hgAAAAJ;;AEKT17QAAAAJ;IMYgXsYAAAAJ", "orcid": ";0000-0002-7487-1744;;;0000-0003-2598-8118;", "linkedin": ";;;;;", "or_profile": "~Archit_Sharma1;~Eric_Mitchell1;~Chelsea_Finn1;~Kushal_Arora1;~Thomas_Kollar1;~Sedrick_Scott_Keh1", "aff": "Stanford University;Stanford University;Google;McGill University;Toyota Research Institute;Toyota Research Institute", "aff_domain": "stanford.edu;stanford.edu;google.com;mcgill.ca;tri.global;tri.global", "position": "Graduate Student;PhD student;Research Scientist;PhD student;Principal Researcher;Research Engineer", "bibtex": "@inproceedings{\nsharma2024a,\ntitle={A Critical Evaluation of {AI} Feedback for Aligning Large Language Models},\nauthor={Archit Sharma and Sedrick Keh and Eric Mitchell and Chelsea Finn and Kushal Arora and Thomas Kollar},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=FZQYfmsmX9}\n}", "github": "", "reviewers": "6Bqz;pKmA;Sb9E", "pdf_size": 2868415, "rating": "6;6;6", "confidence": "4;3;3", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "4;3;3", "wc_summary": "122;64;148", "wc_strengths": "127;57;121", "wc_weaknesses": "336;168;131", "wc_questions": "50;109;294", "wc_limitations": "111;9;8", "wc_review": "746;407;702", "wc_reply_reviewers": "14;14;14", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 111.33333333333333, 35.11251755270318 ], "wc_strengths_avg": [ 101.66666666666667, 31.678944988044595 ], "wc_weaknesses_avg": [ 211.66666666666666, 89.20513189024248 ], "wc_questions_avg": [ 151.0, 103.94549853969947 ], "wc_limitations_avg": [ 42.666666666666664, 48.32068800098865 ], "wc_review_avg": [ 618.3333333333334, 150.51098151150154 ], "wc_reply_reviewers_avg": [ 14.0, 0.0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9070039248943937150&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "stanford.edu;stanford.edu;google.com;mcgill.ca;tri.global;tri.global", "author_num": 6, "aff_unique_index": "0;0;1;2;3;3", "aff_unique_norm": "Stanford University;Google;McGill University;Toyota Research Institute", "aff_unique_dep": ";Google;;", "aff_unique_url": "https://www.stanford.edu;https://www.google.com;https://www.mcgill.ca;https://www.tri.global", "aff_unique_abbr": "Stanford;Google;McGill;TRI", "aff_campus_unique_index": "0;0;1", "aff_campus_unique": "Stanford;Mountain View;", "aff_country_unique_index": "0;0;0;1;0;0", "aff_country_unique": "United States;Canada" }, { "title": "Enhancing Large Vision Language Models with Self-Training on Image Comprehension", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95961", "id": "FZW7Ctyjm3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=FZW7Ctyjm3", "openreview": "https://openreview.net/forum?id=FZW7Ctyjm3", "poster": "", "project": "", "author_site": "Yihe Deng, Pan Lu, Fan Yin, Ziniu Hu, Sheng Shen, Quanquan Gu, James Zou, Kai-Wei Chang, Wei Wang", "tldr": "", "abstract": "Large vision language models (LVLMs) integrate large language models (LLMs) with pre-trained vision encoders, thereby activating the perception capability of the model to understand image inputs for different queries and conduct subsequent reasoning. Improving this capability requires high-quality vision-language data, which is costly and labor-intensive to acquire. Self-training approaches have been effective in single-modal settings to alleviate the need for labeled data by leveraging model's own generation. However, effective self-training remains a challenge regarding the unique visual perception and reasoning capability of LVLMs. To address this, we introduce **S**elf-**T**raining on **I**mage **C**omprehension (**STIC**), which emphasizes a self-training approach specifically for image comprehension. First, the model self-constructs a preference dataset for image descriptions using unlabeled images. Preferred responses are generated through a step-by-step prompt, while dis-preferred responses are generated from either corrupted images or misleading prompts. To further self-improve reasoning on the extracted visual information, we let the model reuse a small portion of existing instruction-tuning data and append its self-generated image descriptions to the prompts. We validate the effectiveness of STIC across seven different benchmarks, demonstrating substantial performance gains of 4.0% on average while using 70% less supervised fine-tuning data than the current method. Further studies dive into various components of STIC and highlight its potential to leverage vast quantities of unlabeled images for self-training.", "keywords": "Large Vision-Language Models;multimodal preference alignment;self-training;synthetic data", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/5e1acb34b78a6deb941fa9d198f41ad9f7c7c839.zip", "author": "Yihe Deng;Pan Lu;Fan Yin;Ziniu Hu;Sheng Shen;Quanquan Gu;James Zou;Kai-Wei Chang;Wei Wang", "authorids": "~Yihe_Deng1;~Pan_Lu2;~Fan_Yin1;~Ziniu_Hu1;~Sheng_Shen2;~Quanquan_Gu1;~James_Zou1;~Kai-Wei_Chang1;~Wei_Wang13", "gender": "F;M;M;M;M;;M;F;M", "homepage": ";;http://acbull.github.io;https://sincerass.github.io;http://web.cs.ucla.edu/~qgu/;;http://kwchang.net;http://www.cs.ucla.edu/~weiwang;https://lupantech.github.io/", "dblp": "230/8011;;180/5436;138/5764-1.html;50/4597;;18/2428;w/WeiWang.html;", "google_scholar": "7Lix1poAAAAJ;klShdV0AAAAJ;x6ct1CsAAAAJ;https://scholar.google.com/citations?hl=en;GU9HgNAAAAAJ;23ZXZvEAAAAJ;fqDBtzYAAAAJ;UedS9LQAAAAJ;IyucsdQAAAAJ", "orcid": ";;;;;;0000-0001-5365-0072;0000-0002-8180-2886;", "linkedin": ";fan-y-60b666180/;;sheng-s-ab198a174/;;;kai-wei-chang-41239040;wei-wang-8800845/;pan-lu-9308909a/", "or_profile": "~Yihe_Deng1;~Fan_Yin1;~Ziniu_Hu1;~Sheng_Shen2;~Quanquan_Gu1;~James_Zou1;~Kai-Wei_Chang1;~Wei_Wang13;~Pan_Lu1", "aff": "University of California, Los Angeles;University of California, Los Angeles;Deepmind;University of California, Berkeley;University of California, Los Angeles;Stanford University;Amazon;University of California, Los Angeles;University of California, Los Angeles", "aff_domain": "ucla.edu;cs.ucla.edu;deepmind.com;berkeley.edu;cs.ucla.edu;stanford.edu;amazon.com;ucla.edu;ucla.edu", "position": "PhD student;PhD student;Visiting Researcher;PhD student;Associate Professor;Assistant Professor;Researcher;Full Professor;PhD student", "bibtex": "@inproceedings{\ndeng2024enhancing,\ntitle={Enhancing Large Vision Language Models with Self-Training on Image Comprehension},\nauthor={Yihe Deng and Pan Lu and Fan Yin and Ziniu Hu and Sheng Shen and Quanquan Gu and James Zou and Kai-Wei Chang and Wei Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=FZW7Ctyjm3}\n}", "github": "", "reviewers": "s8oQ;uhYN;B7oX;qFQt", "pdf_size": 12155954, "rating": "4;6;6;6", "confidence": "3;3;4;3", "soundness": "3;3;2;3", "novelty": "3;2;3;3", "presentation": "3;3;3;3", "wc_summary": "51;96;113;84", "wc_strengths": "35;30;69;63", "wc_weaknesses": "68;192;162;16", "wc_questions": "31;176;48;33", "wc_limitations": "1;5;2;6", "wc_review": "186;499;394;202", "wc_reply_reviewers": "0;43;17;11", "wc_reply_authors": "181;162;141;85", "reply_reviewers": "0;1;1;1", "reply_authors": "3;3;3;2", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 86.0, 22.68259244442751 ], "wc_strengths_avg": [ 49.25, 16.97608612136496 ], "wc_weaknesses_avg": [ 109.5, 70.75839172847274 ], "wc_questions_avg": [ 72.0, 60.40281450396165 ], "wc_limitations_avg": [ 3.5, 2.0615528128088303 ], "wc_review_avg": [ 320.25, 131.7163144792626 ], "wc_reply_reviewers_avg": [ 17.75, 15.801503093060482 ], "wc_reply_authors_avg": [ 142.25, 35.95396362016294 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8396534780905005752&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "ucla.edu;cs.ucla.edu;deepmind.com;berkeley.edu;cs.ucla.edu;stanford.edu;amazon.com;ucla.edu;ucla.edu", "author_num": 9, "aff_unique_index": "0;0;1;2;0;3;4;0;0", "aff_unique_norm": "University of California, Los Angeles;DeepMind;University of California, Berkeley;Stanford University;Amazon", "aff_unique_dep": ";;;;Amazon.com, Inc.", "aff_unique_url": "https://www.ucla.edu;https://deepmind.com;https://www.berkeley.edu;https://www.stanford.edu;https://www.amazon.com", "aff_unique_abbr": "UCLA;DeepMind;UC Berkeley;Stanford;Amazon", "aff_campus_unique_index": "0;0;2;0;3;0;0", "aff_campus_unique": "Los Angeles;;Berkeley;Stanford", "aff_country_unique_index": "0;0;1;0;0;0;0;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Artemis: Towards Referential Understanding in Complex Videos", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95960", "id": "FaNhyXY6Y1", "proceeding": "", "pdf": "https://openreview.net/pdf?id=FaNhyXY6Y1", "openreview": "https://openreview.net/forum?id=FaNhyXY6Y1", "poster": "", "project": "", "author_site": "Jihao Qiu, Yuan Zhang, Xi Tang, Lingxi Xie, Tianren Ma, Pengyu Yan, DAVID DOERMANN, Qixiang Ye, Yunjie Tian", "tldr": "", "abstract": "Videos carry rich visual information including object description, action, interaction, etc., but the existing multimodal large language models (MLLMs) fell short in referential understanding scenarios such as video-based referring. In this paper, we present Artemis, an MLLM that pushes video-based referential understanding to a finer level. Given a video, Artemis receives a natural-language question with a bounding box in any video frame and describes the referred target in the entire video. The key to achieving this goal lies in extracting compact, target-specific video features, where we set a solid baseline by tracking and selecting spatiotemporal features from the video. We train Artemis on the newly established ViderRef45K dataset with 45K video-QA pairs and design a computationally efficient, three-stage training procedure. Results are promising both quantitatively and qualitatively. Additionally, we show that Artemis can be integrated with video grounding and text summarization tools to understand more complex scenarios. Code and data are available at https://github.com/NeurIPS24Artemis/Artemis.", "keywords": "Video Referring;Multimodal;RoI Selection Mechanism", "primary_area": "machine_vision", "supplementary_material": "/attachment/2120e9d0c563ce94e32fb40dc3c9f329faaa2a59.zip", "author": "Jihao Qiu;Yuan Zhang;Xi Tang;Lingxi Xie;Tianren Ma;Pengyu Yan;David Doermann;Qixiang Ye;Yunjie Tian", "authorids": "~Jihao_Qiu1;~Yuan_Zhang31;~Xi_Tang2;~Lingxi_Xie1;~Tianren_Ma1;~Pengyu_Yan1;~David_Doermann2;~Qixiang_Ye1;~Yunjie_Tian1", "gender": "M;F;M;M;Not Specified;M;M;M;M", "homepage": "http://lamp.ucas.ac.cn/intro.html;https://github.com/alpacaduby;https://github.com/ncTimTang;http://lingxixie.com/;http://lamp.ucas.ac.cn/intro.html;;https://cse.buffalo.edu/~doermann/;http://people.ucas.ac.cn/~qxye?language=en;https://sunsmarterjie.github.io/", "dblp": ";;;123/2869;;;;06/4335;270/0554", "google_scholar": ";https://scholar.google.com/citations?hl=en;;EEMm7hwAAAAJ;;;RoGOW9AAAAAJ;https://scholar.google.com.hk/citations?user=tjEfgsEAAAAJ;https://scholar.google.com.hk/citations?user=DuetWVcAAAAJ", "orcid": ";;;;;0000-0003-1584-2350;0000-0003-1639-4561;;0000-0002-5103-3748", "linkedin": ";;;;;;david-doermann-bb7757/;;", "or_profile": "~Jihao_Qiu1;~Yuan_Zhang31;~Xi_Tang2;~Lingxi_Xie1;~Tianren_Ma1;~Pengyu_Yan1;~David_Doermann2;~Qixiang_Ye1;~Yunjie_Tian1", "aff": "University of Chinese Academy of Sciences;University of Chinese Academy of Sciences;University of Chinese Academy of Sciences;Huawei Technologies Ltd.;University of Chinese Academy of Sciences;State University of New York at Buffalo;State University of New York at Buffalo;University of Chinese Academy of Sciences;University of Chinese Academy of Sciences", "aff_domain": "ucas.ac.cn;ucas.ac.cn;ucas.ac.cn;huawei.com;ucas.ac.cn;buffalo.edu;buffalo.edu;ucas.ac.cn;ucas.ac.cn", "position": "MS student;MS student;MS student;Researcher;PhD student;PhD student;Full Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\nqiu2024artemis,\ntitle={Artemis: Towards Referential Understanding in Complex Videos},\nauthor={Jihao Qiu and Yuan Zhang and Xi Tang and Lingxi Xie and Tianren Ma and Pengyu Yan and David Doermann and Qixiang Ye and Yunjie Tian},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=FaNhyXY6Y1}\n}", "github": "", "reviewers": "xzsp;CDQu;U8HM", "pdf_size": 3304216, "rating": "7;7;7", "confidence": "5;4;4", "soundness": "2;3;3", "novelty": "4;3;4", "presentation": "4;3;4", "wc_summary": "72;169;76", "wc_strengths": "117;51;15", "wc_weaknesses": "227;208;501", "wc_questions": "16;51;2", "wc_limitations": "6;4;1", "wc_review": "438;483;595", "wc_reply_reviewers": "33;13;0", "wc_reply_authors": "16;16;0", "reply_reviewers": "1;1;0", "reply_authors": "2;2;1", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 105.66666666666667, 44.81319250201019 ], "wc_strengths_avg": [ 61.0, 42.23742416388575 ], "wc_weaknesses_avg": [ 312.0, 133.86809428189628 ], "wc_questions_avg": [ 23.0, 20.607442021431645 ], "wc_limitations_avg": [ 3.6666666666666665, 2.0548046676563256 ], "wc_review_avg": [ 505.3333333333333, 66.01178345989112 ], "wc_reply_reviewers_avg": [ 15.333333333333334, 13.572848714334887 ], "wc_reply_authors_avg": [ 10.666666666666666, 7.542472332656507 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6415670323638959075&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "ucas.ac.cn;ucas.ac.cn;ucas.ac.cn;huawei.com;ucas.ac.cn;buffalo.edu;buffalo.edu;ucas.ac.cn;ucas.ac.cn", "author_num": 9, "aff_unique_index": "0;0;0;1;0;2;2;0;0", "aff_unique_norm": "University of Chinese Academy of Sciences;Huawei;State University of New York at Buffalo", "aff_unique_dep": ";Huawei Technologies;", "aff_unique_url": "http://www.ucas.ac.cn;https://www.huawei.com;https://www.buffalo.edu", "aff_unique_abbr": "UCAS;Huawei;SUNY Buffalo", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Buffalo", "aff_country_unique_index": "0;0;0;0;0;1;1;0;0", "aff_country_unique": "China;United States" }, { "title": "Euclidean distance compression via deep random features", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95959", "id": "Fanbig8DR9", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Fanbig8DR9", "openreview": "https://openreview.net/forum?id=Fanbig8DR9", "poster": "/media/PosterPDFs/NeurIPS%202024/95959.png?t=1733773713.1822104", "project": "", "author_site": "Brett Leroux, Luis Rademacher", "tldr": "", "abstract": "Motivated by the problem of compressing point sets into as few bits as possible while maintaining information about approximate distances between points, we construct random nonlinear maps $\\varphi_\\ell$ that compress point sets in the following way. For a point set $S$, the map $\\varphi_\\ell:\\mathbb{R}^d \\to N^{-1/2}\\{-1,1\\}^N$ has the property that storing $\\varphi_\\ell(S)$ (a sketch of $S$) allows one to report squared distances between points up to some multiplicative $(1\\pm \\epsilon)$ error with high probability. The maps $\\varphi_\\ell$ are the $\\ell$-fold composition of a certain type of random feature mapping. \n\nCompared to existing techniques, our maps offer several advantages. The standard method for compressing point sets by random mappings relies on the Johnson-Lindenstrauss lemma and involves compressing point sets with a random linear map. The main advantage of our maps $\\varphi_\\ell$ over random linear maps is that ours map point sets directly into the discrete cube $N^{-1/2}\\{-1,1\\}^N$ and so there is no additional step needed to convert the sketch to bits. For some range of parameters, our maps $\\varphi_\\ell$ produce sketches using fewer bits of storage space. We validate the method with experiments, including an application to nearest neighbor search.", "keywords": "dimensionality reduction;random projection;random features;metric embedding", "primary_area": "learning_theory", "supplementary_material": "/attachment/81eb6cc00504d1463b58fce5ef887adedb4770f0.zip", "author": "Brett Leroux;Luis Rademacher", "authorids": "~Brett_Leroux1;~Luis_Rademacher1", "gender": "M;", "homepage": ";https://www.math.ucdavis.edu/~lrademac/", "dblp": "255/7533;31/5438", "google_scholar": ";", "orcid": "0000-0002-4583-0126;", "linkedin": ";", "or_profile": "~Brett_Leroux1;~Luis_Rademacher1", "aff": "University of California, Davis;University of California, Davis", "aff_domain": "ucdavis.edu;ucdavis.edu", "position": "Postdoc;Full Professor", "bibtex": "@inproceedings{\nleroux2024euclidean,\ntitle={Euclidean distance compression via deep random features},\nauthor={Brett Leroux and Luis Rademacher},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Fanbig8DR9}\n}", "github": "", "reviewers": "mnzD;f7H3;4gJb", "pdf_size": 2177302, "rating": "4;6;6", "confidence": "4;4;3", "soundness": "4;3;3", "novelty": "2;3;3", "presentation": "3;4;3", "wc_summary": "94;151;101", "wc_strengths": "66;142;24", "wc_weaknesses": "75;383;61", "wc_questions": "1;439;43", "wc_limitations": "1;141;11", "wc_review": "237;1256;240", "wc_reply_reviewers": "136;85;6", "wc_reply_authors": "221;23;0", "reply_reviewers": "2;2;1", "reply_authors": "2;2;1", "rating_avg": [ 5.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 115.33333333333333, 25.381533094401966 ], "wc_strengths_avg": [ 77.33333333333333, 48.83532419149983 ], "wc_weaknesses_avg": [ 173.0, 148.60237772884614 ], "wc_questions_avg": [ 161.0, 197.3220717507294 ], "wc_limitations_avg": [ 51.0, 63.77042156569664 ], "wc_review_avg": [ 577.6666666666666, 479.65566352911503 ], "wc_reply_reviewers_avg": [ 75.66666666666667, 53.48104544810453 ], "wc_reply_authors_avg": [ 81.33333333333333, 99.20461458800975 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.49999999999999983, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:vwKNcbl3BoAJ:scholar.google.com/&scioq=Euclidean+distance+compression+via+deep+random+features&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "ucdavis.edu;ucdavis.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of California, Davis", "aff_unique_dep": "", "aff_unique_url": "https://www.ucdavis.edu", "aff_unique_abbr": "UC Davis", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Davis", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Streaming Detection of Queried Event Start", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97778", "id": "FbDgxp7LAa", "proceeding": "", "pdf": "https://openreview.net/pdf?id=FbDgxp7LAa", "openreview": "https://openreview.net/forum?id=FbDgxp7LAa", "poster": "/media/PosterPDFs/NeurIPS%202024/97778.png?t=1733956560.440513", "project": "", "author_site": "Cristobal Eyzaguirre, Eric Tang, Shyamal Buch, Adrien Gaidon, Jiajun Wu, Juan Carlos Niebles", "tldr": "", "abstract": "Robotics, autonomous driving, augmented reality, and many embodied computer vision applications must quickly react to user-defined events unfolding in real time. We address this setting by proposing a novel task for multimodal video understanding---Streaming Detection of Queried Event Start (SDQES).\nThe goal of SDQES is to identify the beginning of a complex event as described by a natural language query, with high accuracy and low latency. \nWe introduce a new benchmark based on the Ego4D dataset, as well as new task-specific metrics to study streaming multimodal detection of diverse events in an egocentric video setting.\nInspired by parameter-efficient fine-tuning methods in NLP and for video tasks, we propose adapter-based baselines that enable image-to-video transfer learning, allowing for efficient online video modeling.\nWe evaluate three vision-language backbones and three adapter architectures on both short-clip and untrimmed video settings.", "keywords": "streaming;multimodal;video language", "primary_area": "", "supplementary_material": "/attachment/704b3cab338bfa2bd3dd4f7bcce9e65dacd6ff67.zip", "author": "Cristobal Eyzaguirre;Eric Tang;Shyamal Buch;Adrien Gaidon;Jiajun Wu;Juan Carlos Niebles", "authorids": "~Cristobal_Eyzaguirre1;~Eric_Tang2;~Shyamal_Buch1;~Adrien_Gaidon1;~Jiajun_Wu1;~Juan_Carlos_Niebles1", "gender": ";M;Unspecified;;M;M", "homepage": ";;https://cs.stanford.edu/~shyamal;https://adriengaidon.com/;https://jiajunwu.com;http://www.niebles.net/", "dblp": ";;207/8458;06/7548.html;117/4768;26/647", "google_scholar": ";;https://scholar.google.com/citations?hl=en;https://scholar.google.fr/citations?user=2StUgf4AAAAJ;2efgcS0AAAAJ;hqNhUCYAAAAJ", "orcid": ";;;;0000-0002-4176-343X;", "linkedin": ";erictang000/;;adrien-gaidon-63ab2358/;jiajunwu/;", "or_profile": "~Cristobal_Eyzaguirre1;~Eric_Tang2;~Shyamal_Buch1;~Adrien_Gaidon1;~Jiajun_Wu1;~Juan_Carlos_Niebles1", "aff": ";Stanford University;Google DeepMind;Stanford University;Stanford University;Stanford University", "aff_domain": ";stanford.edu;google.com;stanford.edu;stanford.edu;stanford.edu", "position": ";MS student;Researcher;Adjunct Professor;Assistant Professor;Adjunct Professor", "bibtex": "@inproceedings{\neyzaguirre2024streaming,\ntitle={Streaming Detection of Queried Event Start},\nauthor={Cristobal Eyzaguirre and Eric Tang and Shyamal Buch and Adrien Gaidon and Jiajun Wu and Juan Carlos Niebles},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=FbDgxp7LAa}\n}", "github": "", "reviewers": "E6vj;TeQt;h4vt", "pdf_size": 5854199, "rating": "6;7;8", "confidence": "3;4;5", "wc_summary_and_contributions": "42;183;53", "wc_strengths": "55;78;59", "wc_improvement": "1;51;72", "wc_limitations": "85;1;1", "wc_correctness": "13;1;1", "wc_clarity": "13;1;1", "wc_relation_to_prior_work": "18;1;1", "wc_documentation": "21;1;1", "wc_additional_feedback": "1;1;1", "wc_review": "249;318;190", "wc_reply_reviewers": "0;0;186", "wc_reply_authors": "59;59;694", "reply_reviewers": "0;0;2", "reply_authors": "2;2;5", "rating_avg": [ 7.0, 0.816496580927726 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "wc_summary_and_contributions_avg": [ 92.66666666666667, 64.03297761483705 ], "wc_strengths_avg": [ 64.0, 10.03327796219494 ], "wc_improvement_avg": [ 41.333333333333336, 29.78067979225607 ], "wc_limitations_avg": [ 29.0, 39.59797974644666 ], "wc_correctness_avg": [ 5.0, 5.656854249492381 ], "wc_clarity_avg": [ 5.0, 5.656854249492381 ], "wc_relation_to_prior_work_avg": [ 6.666666666666667, 8.013876853447538 ], "wc_documentation_avg": [ 7.666666666666667, 9.428090415820632 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 252.33333333333334, 52.30891149911478 ], "wc_reply_reviewers_avg": [ 62.0, 87.68124086713189 ], "wc_reply_authors_avg": [ 270.6666666666667, 299.34187070230513 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.9428090415820634 ], "reply_authors_avg": [ 3.0, 1.4142135623730951 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5188054494490205643&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 5, "email": ";stanford.edu;google.com;stanford.edu;stanford.edu;stanford.edu", "author_num": 6, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "Stanford University;Google", "aff_unique_dep": ";Google DeepMind", "aff_unique_url": "https://www.stanford.edu;https://deepmind.com", "aff_unique_abbr": "Stanford;DeepMind", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Efficient Availability Attacks against Supervised and Contrastive Learning Simultaneously", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95958", "id": "FbUSCraXEB", "proceeding": "", "pdf": "https://openreview.net/pdf?id=FbUSCraXEB", "openreview": "https://openreview.net/forum?id=FbUSCraXEB", "poster": "/media/PosterPDFs/NeurIPS%202024/95958.png?t=1731169159.6604888", "project": "", "author_site": "Yihan Wang, Yifan Zhu, Xiao-Shan Gao", "tldr": "", "abstract": "Availability attacks provide a tool to prevent the unauthorized use of private data and commercial datasets by generating imperceptible noise and crafting unlearnable examples before release. \nIdeally, the obtained unlearnability can prevent algorithms from training usable models. \nWhen supervised learning (SL) algorithms have failed, a malicious data collector possibly resorts to contrastive learning (CL) algorithms to bypass the protection.\nThrough evaluation, we have found that most existing methods are unable to achieve both supervised and contrastive unlearnability, which poses risks to data protection by availability attacks.\nDifferent from recent methods based on contrastive learning, we employ contrastive-like data augmentations in supervised learning frameworks to obtain attacks effective for both SL and CL.\nOur proposed AUE and AAP attacks achieve state-of-the-art worst-case unlearnability across SL and CL algorithms with less computation consumption, showcasing prospects in real-world applications. \nThe code is available at https://github.com/EhanW/AUE-AAP.", "keywords": "availability Attacks;indiscriminate attack;unlearnable examles;contrastive learning", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Yihan Wang;Yifan Zhu;Xiao-Shan Gao", "authorids": "~Yihan_Wang3;~Yifan_Zhu6;~Xiao-Shan_Gao2", "gender": "M;;M", "homepage": "https://ehanw.github.io/;https://github.com/hala64;http://www.mmrc.iss.ac.cn/~xgao/", "dblp": ";;13/3109", "google_scholar": ";https://scholar.google.com/citations?hl=en;_se7GmUAAAAJ", "orcid": ";;0000-0003-2021-9395", "linkedin": ";;", "or_profile": "~Yihan_Wang3;~Yifan_Zhu6;~Xiao-Shan_Gao2", "aff": "Academy of Mathematics and Systems Science, Chinese Academy of Sciences;Academy of Mathematics and Systems Science, Chinese Academy of Sciences, Chinese Academy of Sciences;Academy of Mathematics and Systems Science, Chinese Academy of Sciences, Chinese Academy of Sciences", "aff_domain": "amss.ac.cn;amss.ac.cn;amss.ac.cn", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nwang2024efficient,\ntitle={Efficient Availability Attacks against Supervised and Contrastive Learning Simultaneously},\nauthor={Yihan Wang and Yifan Zhu and Xiao-Shan Gao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=FbUSCraXEB}\n}", "github": "", "reviewers": "61jU;vjzY;J4Hn;5WZD", "pdf_size": 8272007, "rating": "5;6;6;6", "confidence": "3;4;3;4", "soundness": "3;2;3;3", "novelty": "3;3;2;2", "presentation": "3;3;3;3", "wc_summary": "73;84;82;99", "wc_strengths": "35;63;74;70", "wc_weaknesses": "203;132;75;193", "wc_questions": "38;61;8;21", "wc_limitations": "1;9;1;7", "wc_review": "350;349;240;390", "wc_reply_reviewers": "103;15;26;112", "wc_reply_authors": "186;0;27;636", "reply_reviewers": "2;1;1;2", "reply_authors": "3;1;2;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 84.5, 9.340770846134703 ], "wc_strengths_avg": [ 60.5, 15.239750654128171 ], "wc_weaknesses_avg": [ 150.75, 51.489683432703295 ], "wc_questions_avg": [ 32.0, 19.836834424877374 ], "wc_limitations_avg": [ 4.5, 3.570714214271425 ], "wc_review_avg": [ 332.25, 55.76905503951094 ], "wc_reply_reviewers_avg": [ 64.0, 43.78926809162263 ], "wc_reply_authors_avg": [ 212.25, 254.7649652130371 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=332700849713029080&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "amss.ac.cn;amss.ac.cn;amss.ac.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Chinese Academy of Sciences", "aff_unique_dep": "Academy of Mathematics and Systems Science", "aff_unique_url": "http://www.amss.cas.cn", "aff_unique_abbr": "AMSS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Probing the Decision Boundaries of In-context Learning in Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95957", "id": "FbXQrfkvtY", "proceeding": "", "pdf": "https://openreview.net/pdf?id=FbXQrfkvtY", "openreview": "https://openreview.net/forum?id=FbXQrfkvtY", "poster": "", "project": "", "author_site": "Siyan Zhao, Tung Nguyen, Aditya Grover", "tldr": "", "abstract": "In-context learning is an emergent paradigm in large language models (LLMs) that enables them to generalize to new tasks and domains by simply prompting these models with a few exemplars without explicit parameter updates. Many attempts have been made to understand in-context learning in LLMs as a function of model scale, pretraining data, and other factors. In this work, we propose a new mechanism to probe and understand in-context learning from the lens of decision boundaries for in-context binary classification. Decision boundaries are straightforward to visualize and provide important information about the qualitative behavior of the inductive biases of standard classifiers. To our surprise, we find that the decision boundaries learned by current LLMs in simple binary classification tasks are often irregularly non-smooth, regardless of task linearity. This paper investigates the factors influencing these decision boundaries and explores methods to enhance their generalizability. We assess various approaches, including training-free and fine-tuning methods for LLMs, the impact of model architecture, and the effectiveness of active prompting techniques for smoothing decision boundaries in a data-efficient manner. Our findings provide a deeper understanding of in-context learning dynamics and offer practical improvements for enhancing robustness and generalizability of in-context learning.", "keywords": "in-context learning; Large language models; LLM decision boundary", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Siyan Zhao;Tung Nguyen;Aditya Grover", "authorids": "~Siyan_Zhao1;~Tung_Nguyen2;~Aditya_Grover1", "gender": "F;M;M", "homepage": "https://siyan-zhao.github.io/;https://tung-nd.github.io/;https://aditya-grover.github.io", "dblp": "161/3857;;162/5052", "google_scholar": ";https://scholar.google.com.vn/citations?user=F9mgq3sAAAAJ;oOhnPUgAAAAJ", "orcid": ";;", "linkedin": ";tung-nguyen-40703616b/;", "or_profile": "~Siyan_Zhao1;~Tung_Nguyen2;~Aditya_Grover1", "aff": "University of California, Los Angeles;University of California, Los Angeles;University of California, Los Angeles", "aff_domain": "cs.ucla.edu;cs.ucla.edu;ucla.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nzhao2024probing,\ntitle={Probing the Decision Boundaries of In-context Learning in Large Language Models},\nauthor={Siyan Zhao and Tung Nguyen and Aditya Grover},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=FbXQrfkvtY}\n}", "github": "", "reviewers": "PktE;PtXB;AvnH;WmaA", "pdf_size": 4652988, "rating": "3;5;6;10", "confidence": "3;3;4;4", "soundness": "2;2;3;4", "novelty": "2;2;3;4", "presentation": "3;3;3;4", "wc_summary": "69;39;72;53", "wc_strengths": "48;37;45;76", "wc_weaknesses": "75;153;107;5", "wc_questions": "3;28;75;1", "wc_limitations": "13;1;30;1", "wc_review": "208;258;329;136", "wc_reply_reviewers": "0;80;28;0", "wc_reply_authors": "141;156;743;0", "reply_reviewers": "0;1;1;0", "reply_authors": "2;3;2;1", "rating_avg": [ 6.0, 2.5495097567963922 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 58.25, 13.254716141811564 ], "wc_strengths_avg": [ 51.5, 14.705441169852742 ], "wc_weaknesses_avg": [ 85.0, 53.87021440462252 ], "wc_questions_avg": [ 26.75, 29.819247140060394 ], "wc_limitations_avg": [ 11.25, 11.882234638316145 ], "wc_review_avg": [ 232.75, 70.48891756865046 ], "wc_reply_reviewers_avg": [ 27.0, 32.66496594212215 ], "wc_reply_authors_avg": [ 260.0, 285.42336975097186 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.7844645405527362, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7168312354359224237&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "cs.ucla.edu;cs.ucla.edu;ucla.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of California, Los Angeles", "aff_unique_dep": "", "aff_unique_url": "https://www.ucla.edu", "aff_unique_abbr": "UCLA", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Can Language Models Perform Robust Reasoning in Chain-of-thought Prompting with Noisy Rationales?", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95956", "id": "FbuODM02ra", "proceeding": "", "pdf": "https://openreview.net/pdf?id=FbuODM02ra", "openreview": "https://openreview.net/forum?id=FbuODM02ra", "poster": "/media/PosterPDFs/NeurIPS%202024/95956.png?t=1731659893.5049627", "project": "", "author_site": "Zhanke Zhou, Rong Tao, Jianing Zhu, Yiwen Luo, Zengmao Wang, Bo Han", "tldr": "", "abstract": "This paper investigates an under-explored challenge in large language models (LLMs): chain-of-thought prompting with noisy rationales, which include irrelevant or inaccurate reasoning thoughts within examples used for in-context learning. We construct NoRa dataset that is tailored to evaluate the robustness of reasoning in the presence of noisy rationales. Our findings on NoRa dataset reveal a prevalent vulnerability to such noise among current LLMs, with existing robust methods like self-correction and self-consistency showing limited efficacy. Notably, compared to prompting with clean rationales, base LLM drops by 1.4%-19.8% in accuracy with irrelevant thoughts and more drastically by 2.2%-40.4% with inaccurate thoughts.\n\nAddressing this challenge necessitates external supervision that should be accessible in practice. Here, we propose the method of contrastive denoising with noisy chain-of-thought (CD-CoT). It enhances LLMs' denoising-reasoning capabilities by contrasting noisy rationales with only one clean rationale, which can be the minimal requirement for denoising-purpose prompting. This method follows a principle of exploration and exploitation: (1) rephrasing and selecting rationales in the input space to achieve explicit denoising and (2) exploring diverse reasoning paths and voting on answers in the output space. Empirically, CD-CoT demonstrates an average improvement of 17.8% in accuracy over the base model and shows significantly stronger denoising capabilities than baseline methods. The source code is publicly available at: https://github.com/tmlr-group/NoisyRationales.", "keywords": "Large language models;Chain of thoughts;Noisy rationales;Reasoning robustness", "primary_area": "other", "supplementary_material": "", "author": "Zhanke Zhou;Rong Tao;Jianing Zhu;Yiwen Luo;Zengmao Wang;Bo Han", "authorids": "~Zhanke_Zhou1;~Rong_Tao1;~Jianing_Zhu2;~Yiwen_Luo5;~Zengmao_Wang1;~Bo_Han1", "gender": "M;M;M;F;M;M", "homepage": "https://andrewzhou924.github.io/;https://github.com/taorong007;https://zfancy.github.io/;https://github.com/laaambs;http://jszy.whu.edu.cn/wangzengmao/zh_CN/more/1231604/jsjjgd/index.htm;https://bhanml.github.io/", "dblp": "285/5311;;129/6807;;168/4719;241/0472-3", "google_scholar": "GVXErr0AAAAJ;;82uNA3MAAAAJ;;https://scholar.google.com.hk/citations?user=tTqiJpQAAAAJ;nTNjqHwAAAAJ", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Zhanke_Zhou1;~Rong_Tao1;~Jianing_Zhu2;~Yiwen_Luo5;~Zengmao_Wang1;~bo_han2", "aff": "Hong Kong Baptist University;;RIKEN;Wuhan University;Wuhan University;MBZUAI", "aff_domain": "hkbu.edu.hk;;riken.jp;whu.edu.cn;whu.edu.cn;mbzuai.ac.ae", "position": "PhD student;;Research Intern;MS student;Associate Professor ;Researcher", "bibtex": "@inproceedings{\nzhou2024can,\ntitle={Can Language Models Perform Robust Reasoning in Chain-of-thought Prompting with Noisy Rationales?},\nauthor={Zhanke Zhou and Rong Tao and Jianing Zhu and Yiwen Luo and Zengmao Wang and Bo Han},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=FbuODM02ra}\n}", "github": "", "reviewers": "qSQZ;HgRq;ajBs;bpvC", "pdf_size": 1038244, "rating": "5;6;7;8", "confidence": "5;3;3;2", "soundness": "3;3;3;4", "novelty": "2;3;3;4", "presentation": "3;4;2;4", "wc_summary": "106;67;118;46", "wc_strengths": "23;69;161;155", "wc_weaknesses": "105;70;5;152", "wc_questions": "109;30;165;57", "wc_limitations": "16;1;3;81", "wc_review": "359;237;452;491", "wc_reply_reviewers": "168;0;76;149", "wc_reply_authors": "2013;149;116;53", "reply_reviewers": "2;0;1;1", "reply_authors": "6;3;3;2", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 3.25, 1.0897247358851685 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 84.25, 29.03769102390891 ], "wc_strengths_avg": [ 102.0, 58.35237784358063 ], "wc_weaknesses_avg": [ 83.0, 53.61436374704078 ], "wc_questions_avg": [ 90.25, 51.65934087848973 ], "wc_limitations_avg": [ 25.25, 32.6984326841517 ], "wc_review_avg": [ 384.75, 97.85799660732893 ], "wc_reply_reviewers_avg": [ 98.25, 66.31129240182248 ], "wc_reply_authors_avg": [ 582.75, 826.4751584288545 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 3.5, 1.5 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.9233805168766388, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9305943135742001204&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "hkbu.edu.hk;;riken.jp;whu.edu.cn;whu.edu.cn;mbzuai.ac.ae", "author_num": 6, "aff_unique_index": "0;1;2;2;3", "aff_unique_norm": "Hong Kong Baptist University;RIKEN;Wuhan University;Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.hkbu.edu.hk;https://www.riken.jp;http://www.whu.edu.cn/;https://www.mbzuai.ac.ae", "aff_unique_abbr": "HKBU;RIKEN;WHU;MBZUAI", "aff_campus_unique_index": "0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;1;0;0;2", "aff_country_unique": "China;Japan;United Arab Emirates" }, { "title": "Target-Guided Adversarial Point Cloud Transformer Towards Recognition Against Real-world Corruptions", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95955", "id": "FcUyz33OED", "proceeding": "", "pdf": "https://openreview.net/pdf?id=FcUyz33OED", "openreview": "https://openreview.net/forum?id=FcUyz33OED", "poster": "/media/PosterPDFs/NeurIPS%202024/95955.png?t=1730858313.6462955", "project": "", "author_site": "Jie Wang, Tingfa Xu, Lihe Ding, Jianan Li", "tldr": "", "abstract": "Achieving robust 3D perception in the face of corrupted data presents an challenging hurdle within 3D vision research. Contemporary transformer-based point cloud recognition models, albeit advanced, tend to overfit to specific patterns, consequently undermining their robustness against corruption. In this work, we introduce the Target-Guided Adversarial Point Cloud Transformer, termed APCT, a novel architecture designed to augment global structure capture through an adversarial feature erasing mechanism predicated on patterns discerned at each step during training. Specifically, APCT integrates an Adversarial Significance Identifier and a Target-guided Promptor. The Adversarial Significance Identifier, is tasked with discerning token significance by integrating global contextual analysis, utilizing a structural salience index algorithm alongside an auxiliary supervisory mechanism. The Target-guided Promptor, is responsible for accentuating the propensity for token discard within the self-attention mechanism, utilizing the value derived above, consequently directing the model attention towards alternative segments in subsequent stages. By iteratively applying this strategy in multiple steps during training, the network progressively identifies and integrates an expanded array of object-associated patterns. Extensive experiments demonstrate that our method achieves state-of-the-art results on multiple corruption benchmarks.", "keywords": "Real-world point cloud;transformer", "primary_area": "machine_vision", "supplementary_material": "", "author": "Jie Wang;Tingfa Xu;Lihe Ding;Jianan Li", "authorids": "~Jie_Wang18;~Tingfa_Xu1;~Lihe_Ding1;~Jianan_Li1", "gender": ";M;M;", "homepage": ";;;", "dblp": ";93/1709;307/5395;", "google_scholar": "https://scholar.google.com.hk/citations?hl=zh-CN;vmDc8dwAAAAJ;6nJrd8oAAAAJ;", "orcid": ";0000-0001-5452-2662;0000-0003-1976-9496;", "linkedin": ";;;", "or_profile": "~Jie_Wang18;~Tingfa_Xu1;~Lihe_Ding1;~Jianan_Li1", "aff": "Beijing Institute of Technology;Beijing Institute of Technology, Tsinghua University;The Chinese University of Hong Kong;", "aff_domain": "bit.edu.cn;bit.edu.cn;ie.cuhk.edu;", "position": "MS student;Full Professor;PhD student;", "bibtex": "@inproceedings{\nwang2024targetguided,\ntitle={Target-Guided Adversarial Point Cloud Transformer Towards Recognition Against Real-world Corruptions},\nauthor={Jie Wang and Tingfa Xu and Lihe Ding and Jianan Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=FcUyz33OED}\n}", "github": "", "reviewers": "Tyq5;Y3PY;K9Vn", "pdf_size": 16224394, "rating": "5;6;6", "confidence": "4;4;4", "soundness": "3;3;3", "novelty": "2;3;2", "presentation": "2;3;3", "wc_summary": "45;65;95", "wc_strengths": "34;71;75", "wc_weaknesses": "14;47;63", "wc_questions": "278;8;11", "wc_limitations": "29;1;1", "wc_review": "400;192;245", "wc_reply_reviewers": "23;12;12", "wc_reply_authors": "63;63;63", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 68.33333333333333, 20.548046676563253 ], "wc_strengths_avg": [ 60.0, 18.457157599876172 ], "wc_weaknesses_avg": [ 41.333333333333336, 20.401524997465806 ], "wc_questions_avg": [ 99.0, 126.57803916951787 ], "wc_limitations_avg": [ 10.333333333333334, 13.199326582148887 ], "wc_review_avg": [ 279.0, 88.25342297422048 ], "wc_reply_reviewers_avg": [ 15.666666666666666, 5.185449728701348 ], "wc_reply_authors_avg": [ 63.0, 0.0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3023710664007709293&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "bit.edu.cn;bit.edu.cn;ie.cuhk.edu;", "author_num": 4, "aff_unique_index": "0;0;1", "aff_unique_norm": "Beijing Institute of Technology;Chinese University of Hong Kong", "aff_unique_dep": ";", "aff_unique_url": "http://www.bit.edu.cn/;https://www.cuhk.edu.hk", "aff_unique_abbr": "BIT;CUHK", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Hierarchical Programmatic Option Framework", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95954", "id": "FeCWZviCeP", "proceeding": "", "pdf": "https://openreview.net/pdf?id=FeCWZviCeP", "openreview": "https://openreview.net/forum?id=FeCWZviCeP", "poster": "", "project": "", "author_site": "Yu-An Lin, Chen-Tao Lee, Chih-Han Yang, Guan-Ting Liu, Shao-Hua Sun", "tldr": "", "abstract": "Deep reinforcement learning aims to learn deep neural network policies to solve large-scale decision-making problems. However, approximating policies using deep neural networks makes it difficult to interpret the learned decision-making process. To address this issue, prior works (Trivedi et al., 2021; Liu et al., 2023; Carvalho et al., 2024) proposed to use human-readable programs as policies to increase the interpretability of the decision-making pipeline. Nevertheless, programmatic policies generated by these methods struggle to effectively solve long and repetitive RL tasks and cannot generalize to even longer horizons during testing. To solve these problems, we propose the Hierarchical Programmatic Option framework (HIPO), which aims to solve long and repetitive RL problems with human-readable programs as options (low-level policies). Specifically, we propose a method that retrieves a set of effective, diverse, and compatible programs as options. Then, we learn a high-level policy to effectively reuse these programmatic options to solve reoccurring subtasks. Our proposed framework outperforms programmatic RL and deep RL baselines on various tasks. Ablation studies justify the effectiveness of our proposed search algorithm for retrieving a set of programmatic options.", "keywords": "Reinforcement Learning;Programmatic Reinforcement Learning;Hierarchical Reinforcement Learning;Program Synthesis", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Yu-An Lin;Chen-Tao Lee;Chih-Han Yang;Guan-Ting Liu;Shao-Hua Sun", "authorids": "~Yu-An_Lin2;~Chen-Tao_Lee1;~Chih-Han_Yang1;~Guan-Ting_Liu1;~Shao-Hua_Sun1", "gender": "M;M;M;M;M", "homepage": "https://github.com/AndyLinGitHub;https://github.com/boris-CTL;https://chyang25.github.io;http://shaohua0116.github.io;https://dannyliu15.github.io/", "dblp": ";;;158/9680;71/7317", "google_scholar": ";;;uXsfnaQAAAAJ;https://scholar.google.com/citations?hl=zh-TW", "orcid": ";;;0000-0001-7579-6734;0000-0002-7300-9036", "linkedin": ";;;shaohua0116/;", "or_profile": "~Yu-An_Lin2;~Chen-Tao_Lee1;~Chih-Han_Yang1;~Shao-Hua_Sun1;~Guan_Ting_Liu1", "aff": ";;National Taiwan University;National Taiwan University;Department of computer science and information engineering, National Taiwan University", "aff_domain": ";;ntu.edu.tw;ntu.edu.tw;csie.ntu.edu.tw", "position": ";;Undergrad student;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nlin2024hierarchical,\ntitle={Hierarchical Programmatic Option Framework},\nauthor={Yu-An Lin and Chen-Tao Lee and Chih-Han Yang and Guan-Ting Liu and Shao-Hua Sun},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=FeCWZviCeP}\n}", "github": "", "reviewers": "p5Z4;ukMP;x8GU;4qWN", "pdf_size": 5873267, "rating": "5;7;7;8", "confidence": "3;4;5;3", "soundness": "3;4;2;4", "novelty": "2;3;3;3", "presentation": "3;4;3;4", "wc_summary": "86;72;99;49", "wc_strengths": "44;111;213;59", "wc_weaknesses": "127;447;202;41", "wc_questions": "79;79;461;82", "wc_limitations": "9;47;46;1", "wc_review": "345;756;1021;232", "wc_reply_reviewers": "155;78;11;24", "wc_reply_authors": "516;31;0;0", "reply_reviewers": "2;1;1;1", "reply_authors": "3;2;1;1", "rating_avg": [ 6.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 76.5, 18.527007313648905 ], "wc_strengths_avg": [ 106.75, 66.19053935420077 ], "wc_weaknesses_avg": [ 204.25, 151.28677238939298 ], "wc_questions_avg": [ 175.25, 164.98238542341423 ], "wc_limitations_avg": [ 25.75, 20.94486810653149 ], "wc_review_avg": [ 588.5, 316.8189546097266 ], "wc_reply_reviewers_avg": [ 67.0, 56.67892024377317 ], "wc_reply_authors_avg": [ 136.75, 219.3255286098725 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.20751433915982243, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10090017400739757236&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 0, "email": ";;ntu.edu.tw;ntu.edu.tw;csie.ntu.edu.tw", "author_num": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "National Taiwan University", "aff_unique_dep": "", "aff_unique_url": "https://www.ntu.edu.tw", "aff_unique_abbr": "NTU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Taiwan", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "When LLM Meets DRL: Advancing Jailbreaking Efficiency via DRL-guided Search", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95953", "id": "FfFcDNDNol", "proceeding": "", "pdf": "https://openreview.net/pdf?id=FfFcDNDNol", "openreview": "https://openreview.net/forum?id=FfFcDNDNol", "poster": "/media/PosterPDFs/NeurIPS%202024/95953.png?t=1731354022.6243327", "project": "", "author_site": "Xuan Chen, Yuzhou Nie, Wenbo Guo, Xiangyu Zhang", "tldr": "", "abstract": "Recent studies developed jailbreaking attacks, which construct jailbreaking prompts to \"fool\" LLMs into responding to harmful questions.\nEarly-stage jailbreaking attacks require access to model internals or significant human efforts. \nMore advanced attacks utilize genetic algorithms for automatic and black-box attacks.\nHowever, the random nature of genetic algorithms significantly limits the effectiveness of these attacks.\nIn this paper, we propose RLbreaker, a black-box jailbreaking attack driven by deep reinforcement learning (DRL).\nWe model jailbreaking as a search problem and design an RL agent to guide the search, which is more effective and has less randomness than stochastic search, such as genetic algorithms.\nSpecifically, we design a customized DRL system for the jailbreaking problem, including a novel reward function and a customized proximal policy optimization (PPO) algorithm.\nThrough extensive experiments, we demonstrate that RLbreaker is much more effective than existing jailbreaking attacks against six state-of-the-art (SOTA) LLMs. \nWe also show that RLbreaker is robust against three SOTA defenses and its trained agents can transfer across different LLMs.\nWe further validate the key design choices of RLbreaker via a comprehensive ablation study.", "keywords": "jailbreaking attack; LLM security; deep reinforcement learning", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/75d6b26a741ed8448bfb62375b1b3efe9359189d.zip", "author": "Xuan Chen;Yuzhou Nie;Wenbo Guo;Xiangyu Zhang", "authorids": "~Xuan_Chen3;~Yuzhou_Nie1;~Wenbo_Guo1;~Xiangyu_Zhang3", "gender": ";M;M;M", "homepage": ";https://rucnyz.github.io/;https://henrygwb.github.io/;https://www.cs.purdue.edu/homes/xyzhang", "dblp": ";;144/1238-2.html;", "google_scholar": ";;KyPheRMAAAAJ;PXbu1wIAAAAJ", "orcid": ";0000-0002-8352-3303;;", "linkedin": ";;;", "or_profile": "~Xuan_Chen3;~Yuzhou_Nie1;~Wenbo_Guo1;~Xiangyu_Zhang3", "aff": ";Purdue University;University of California, Santa Barbara;Purdue University", "aff_domain": ";purdue.edu;ucsb.edu;cs.purdue.edu", "position": ";PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nchen2024when,\ntitle={When {LLM} Meets {DRL}: Advancing Jailbreaking Efficiency via {DRL}-guided Search},\nauthor={Xuan Chen and Yuzhou Nie and Wenbo Guo and Xiangyu Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=FfFcDNDNol}\n}", "github": "", "reviewers": "ipyX;5U1Z;5ZZd;pYsF", "pdf_size": 2017338, "rating": "5;6;6;6", "confidence": "4;4;3;3", "soundness": "2;2;3;2", "novelty": "2;2;3;2", "presentation": "2;3;3;2", "wc_summary": "24;96;84;104", "wc_strengths": "50;88;56;40", "wc_weaknesses": "139;27;169;169", "wc_questions": "2;10;4;7", "wc_limitations": "1;1;3;1", "wc_review": "216;222;316;321", "wc_reply_reviewers": "114;15;90;15", "wc_reply_authors": "232;22;240;50", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 77.0, 31.416556144810016 ], "wc_strengths_avg": [ 58.5, 17.96524422322168 ], "wc_weaknesses_avg": [ 126.0, 58.45511098270193 ], "wc_questions_avg": [ 5.75, 3.031088913245535 ], "wc_limitations_avg": [ 1.5, 0.8660254037844386 ], "wc_review_avg": [ 268.75, 49.82657423504048 ], "wc_reply_reviewers_avg": [ 58.5, 44.31986010808247 ], "wc_reply_authors_avg": [ 136.0, 100.52860289489753 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13727628699870263626&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": ";purdue.edu;ucsb.edu;cs.purdue.edu", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Purdue University;University of California, Santa Barbara", "aff_unique_dep": ";", "aff_unique_url": "https://www.purdue.edu;https://www.ucsb.edu", "aff_unique_abbr": "Purdue;UCSB", "aff_campus_unique_index": "1", "aff_campus_unique": ";Santa Barbara", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Is One GPU Enough? Pushing Image Generation at Higher-Resolutions with Foundation Models.", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95952", "id": "Ffb30OVVCa", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Ffb30OVVCa", "openreview": "https://openreview.net/forum?id=Ffb30OVVCa", "poster": "/media/PosterPDFs/NeurIPS%202024/95952.png?t=1731590649.0675256", "project": "", "author_site": "Athanasios Tragakis, Marco Aversa, Chaitanya Kaul, Roderick Murray-Smith, Daniele Faccio", "tldr": "", "abstract": "In this work, we introduce Pixelsmith, a zero-shot text-to-image generative framework to sample images at higher resolutions with a single GPU. We are the first to show that it is possible to scale the output of a pre-trained diffusion model by a factor of 1000, opening the road to gigapixel image generation at no extra cost. Our cascading method uses the image generated at the lowest resolution as baseline to sample at higher resolutions. For the guidance, we introduce the Slider, a mechanism that fuses the overall structure contained in the first-generated image with enhanced fine details. At each inference step, we denoise patches rather than the entire latent space, minimizing memory demands so that a single GPU can handle the process, regardless of the image's resolution. Our experimental results show that this method not only achieves higher quality and diversity compared to existing techniques but also reduces sampling time and ablation artifacts.", "keywords": "diffusion model;generative AI;image generation;foundation models;higher resolution", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Athanasios Tragakis;Marco Aversa;Chaitanya Kaul;Roderick Murray-Smith;Daniele Faccio", "authorids": "~Athanasios_Tragakis1;~Marco_Aversa1;~Chaitanya_Kaul1;~Roderick_Murray-Smith1;~Daniele_Faccio1", "gender": "M;M;M;M;M", "homepage": "https://github.com/Thanos-DB;https://marcoaversa.github.io;https://www.gla.ac.uk/schools/computing/staff/chaitanyakaul/;http://www.dcs.gla.ac.uk/~rod/;", "dblp": "322/2205;325/5090;236/4424;78/604;198/1001", "google_scholar": "https://scholar.google.com/citations?hl=en;XSd_7RgAAAAJ;GAGMBAwAAAAJ;https://scholar.google.co.uk/citations?user=laX7LzQAAAAJ;", "orcid": ";0000-0002-7724-7488;0000-0003-4893-6222;;", "linkedin": ";marco-aversa-5bb15b169/;;rodms/;", "or_profile": "~Athanasios_Tragakis1;~Marco_Aversa1;~Chaitanya_Kaul1;~Roderick_Murray-Smith1;~Daniele_Faccio1", "aff": "University of Glasgow;Dotphoton;University of Glasgow;University of Glasgow;", "aff_domain": "glasgow.ac.uk;dotphoton.com;glasgow.ac.uk;gla.ac.uk;", "position": "PhD student;Researcher;Postdoc;Professor;", "bibtex": "@inproceedings{\ntragakis2024is,\ntitle={Is One {GPU} Enough? Pushing Image Generation at Higher-Resolutions with Foundation Models.},\nauthor={Athanasios Tragakis and Marco Aversa and Chaitanya Kaul and Roderick Murray-Smith and Daniele Faccio},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Ffb30OVVCa}\n}", "github": "", "reviewers": "wkBK;SvAB;snt9;T7y4", "pdf_size": 49697996, "rating": "3;4;5;6", "confidence": "4;5;4;4", "soundness": "2;2;3;4", "novelty": "2;2;2;3", "presentation": "1;1;3;3", "wc_summary": "115;96;162;40", "wc_strengths": "55;38;76;42", "wc_weaknesses": "237;194;182;51", "wc_questions": "52;3;70;75", "wc_limitations": "32;21;3;10", "wc_review": "491;352;493;218", "wc_reply_reviewers": "0;63;10;57", "wc_reply_authors": "0;698;0;142", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;1;2", "rating_avg": [ 4.5, 1.118033988749895 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.0, 1.0 ], "wc_summary_avg": [ 103.25, 43.71141155350625 ], "wc_strengths_avg": [ 52.75, 14.821858857781638 ], "wc_weaknesses_avg": [ 166.0, 69.47301634447723 ], "wc_questions_avg": [ 50.0, 28.45171348091359 ], "wc_limitations_avg": [ 16.5, 11.01135777277262 ], "wc_review_avg": [ 388.5, 113.82991698143331 ], "wc_reply_reviewers_avg": [ 32.5, 27.807373122968663 ], "wc_reply_authors_avg": [ 210.0, 287.64909177676884 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.2581988897471611, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8003490898460223624&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "glasgow.ac.uk;dotphoton.com;glasgow.ac.uk;gla.ac.uk;", "author_num": 5, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Glasgow;Dotphoton", "aff_unique_dep": ";", "aff_unique_url": "https://www.gla.ac.uk;", "aff_unique_abbr": "Glasgow;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom;" }, { "title": "MoGenTS: Motion Generation based on Spatial-Temporal Joint Modeling", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95951", "id": "FisyQfoJCm", "proceeding": "", "pdf": "https://openreview.net/pdf?id=FisyQfoJCm", "openreview": "https://openreview.net/forum?id=FisyQfoJCm", "poster": "/media/PosterPDFs/NeurIPS%202024/95951.png?t=1731291695.6648345", "project": "", "author_site": "Weihao Yuan, Yisheng HE, Weichao Shen, Yuan Dong, Xiaodong Gu, Zilong Dong, Liefeng Bo, Qixing Huang", "tldr": "", "abstract": "Motion generation from discrete quantization offers many advantages over continuous regression, but at the cost of inevitable approximation errors. Previous methods usually quantize the entire body pose into one code, which not only faces the difficulty in encoding all joints within one vector but also loses the spatial relationship between different joints. Differently, in this work we quantize each individual joint into one vector, which i) simplifies the quantization process as the complexity associated with a single joint is markedly lower than that of the entire pose; ii) maintains a spatial-temporal structure that preserves both the spatial relationships among joints and the temporal movement patterns; iii) yields a 2D token map, which enables the application of various 2D operations widely used in 2D images. Grounded in the 2D motion quantization, we build a spatial-temporal modeling framework, where 2D joint VQVAE, temporal-spatial 2D masking technique, and spatial-temporal 2D attention are proposed to take advantage of spatial-temporal signals among the 2D tokens. Extensive experiments demonstrate that our method significantly outperforms previous methods across different datasets, with a $26.6\\%$ decrease of FID on HumanML3D and a $29.9\\%$ decrease on KIT-ML.", "keywords": "Motion Generation;Spation-Temporal;Joint", "primary_area": "generative_models", "supplementary_material": "/attachment/643a706474e745f3c7a7f53d5cdd50caac43798a.zip", "author": "Weihao Yuan;Yisheng HE;Weichao Shen;Yuan Dong;Xiaodong Gu;Zilong Dong;Liefeng Bo;Qixing Huang", "authorids": "~Weihao_Yuan1;~Yisheng_HE1;~Weichao_Shen3;~Yuan_Dong3;~Xiaodong_Gu3;~Zilong_Dong2;~Liefeng_Bo1;~Qixing_Huang1", "gender": "M;M;M;M;M;M;M;M", "homepage": "https://www.weihao-yuan.com;https://hyshkust.github.io;;https://fdyuandong.github.io/;;https://research.cs.washington.edu/istc/lfb/;https://www.cs.utexas.edu/~huangqx/;https://baike.baidu.com/item/%E8%91%A3%E5%AD%90%E9%BE%99/62931048", "dblp": "217/2047-1;254/0856;210/4808;66/8751;71/4467-4;17/6808;82/241;81/1423", "google_scholar": "m3tqxRQAAAAJ;UM4qFCsAAAAJ;7gTmYHkAAAAJ;https://scholar.google.com/citations?hl=en;aJPO514AAAAJ;FJwtMf0AAAAJ;https://scholar.google.com.tw/citations?user=pamL_rIAAAAJ;GHOQKCwAAAAJ", "orcid": ";;;0000-0002-8856-995X;0000-0003-2623-7973;;;0000-0002-6833-9102", "linkedin": ";;;;;;;", "or_profile": "~Weihao_Yuan1;~Yisheng_HE1;~Weichao_Shen3;~Yuan_Dong3;~Xiaodong_Gu3;~Liefeng_Bo1;~Qixing_Huang1;~Zlong_Dong1", "aff": "Alibaba Group;Alibaba Group;Alibaba Group;Alibaba Group;Alibaba Group;Alibaba Group;University of Texas at Austin;Alibaba Group", "aff_domain": "alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;utexas.edu;alibaba-inc.com", "position": "Researcher;Researcher;Researcher;Researcher;Researcher;Principal Researcher;Associate Professor;Researcher", "bibtex": "@inproceedings{\nyuan2024mogents,\ntitle={MoGen{TS}: Motion Generation based on Spatial-Temporal Joint Modeling},\nauthor={Weihao Yuan and Yisheng HE and Weichao Shen and Yuan Dong and Xiaodong Gu and Zilong Dong and Liefeng Bo and Qixing Huang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=FisyQfoJCm}\n}", "github": "", "reviewers": "g7ot;1rhT;SBXK", "pdf_size": 4990972, "rating": "5;6;6", "confidence": "5;4;4", "soundness": "3;3;4", "novelty": "3;3;3", "presentation": "3;3;2", "wc_summary": "105;99;153", "wc_strengths": "103;37;219", "wc_weaknesses": "103;236;339", "wc_questions": "39;47;109", "wc_limitations": "35;6;63", "wc_review": "385;425;883", "wc_reply_reviewers": "54;37;116", "wc_reply_authors": "31;48;84", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 119.0, 24.166091947189145 ], "wc_strengths_avg": [ 119.66666666666667, 75.23001764957625 ], "wc_weaknesses_avg": [ 226.0, 96.60572791851767 ], "wc_questions_avg": [ 65.0, 31.283648551066843 ], "wc_limitations_avg": [ 34.666666666666664, 23.27134623427608 ], "wc_review_avg": [ 564.3333333333334, 225.92230719627688 ], "wc_reply_reviewers_avg": [ 69.0, 33.95094500402996 ], "wc_reply_authors_avg": [ 54.333333333333336, 22.095751225568733 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.9999999999999998, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3559849122976865470&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 4, "email": "alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;utexas.edu;alibaba-inc.com", "author_num": 8, "aff_unique_index": "0;0;0;0;0;0;1;0", "aff_unique_norm": "Alibaba Group;University of Texas at Austin", "aff_unique_dep": ";", "aff_unique_url": "https://www.alibaba.com;https://www.utexas.edu", "aff_unique_abbr": "Alibaba;UT Austin", "aff_campus_unique_index": "1", "aff_campus_unique": ";Austin", "aff_country_unique_index": "0;0;0;0;0;0;1;0", "aff_country_unique": "China;United States" }, { "id": "FjeJB0OUhN", "title": "Can Long-Context Language Models Subsume Retrieval, SQL, and More?", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "Long-context language models (LCLMs) have the potential to revolutionize our approach to tasks traditionally reliant on external tools like retrieval systems or databases. Leveraging LCLMs\u2019 ability to natively ingest and process entire corpora of information offers numerous advantages. It enhances user-friendliness by eliminating the need for specialized knowledge of tools, provides robust end-to-end modeling that minimizes cascading errors in complex pipelines, and allows for the application of sophisticated prompting techniques across the entire system. To assess this paradigm shift, we introduce LOFT, a benchmark comprising of real-world tasks requiring context up to millions of tokens designed to evaluate LCLMs\u2019 performance on in-context retrieval and reasoning. Our findings reveal that LCLMs can already achieve textual and visual retrieval performance comparable to specialized systems such as Gecko and CLIP, while still facing challenges in areas like multi-hop compositional reasoning required in SQL-like tasks. Notably, prompting strategies significantly influence performance, emphasizing the need for continued research as context lengths grow. Overall, LOFT provides a rigorous testing ground for LCLMs, showcasing their potential to supplant existing paradigms and tackle novel tasks as model capabilities scale.", "keywords": "long context;long context language models;long context retrieval;long context reasoning", "primary_area": "", "supplementary_material": "/attachment/2832b40d32bfd85cd94beaeb81b6a8c003956dd0.pdf", "author": "Jinhyuk Lee;Anthony Chen;Zhuyun Dai;Dheeru Dua;Devendra Singh Sachan;Michael Boratko;Yi Luan;S\u00e9b Arnold;Vincent Perot;Siddharth Dalmia;Hexiang Hu;Xudong Lin;Panupong Pasupat;Aida Amini;Jeremy R. Cole;Sebastian Riedel;Iftekhar Naim;Ming-Wei Chang;Kelvin Guu", "authorids": "~Jinhyuk_Lee2;~Anthony_Chen1;~Zhuyun_Dai1;~Dheeru_Dua2;~Devendra_Singh_Sachan2;~Michael_Boratko1;~Yi_Luan1;~S\u00e9b_Arnold1;~Vincent_Perot1;~Siddharth_Dalmia1;~Hexiang_Hu1;~Xudong_Lin1;~Panupong_Pasupat1;~Aida_Amini1;~Jeremy_R._Cole1;~Sebastian_Riedel1;~Iftekhar_Naim1;~Ming-Wei_Chang3;~Kelvin_Guu1", "gender": "M;M;;F;M;M;F;;M;M;;M;M;F;M;M;M;;M", "homepage": "https://jhyuklee.github.io;https://anthonywchen.github.io/;;https://ddua.github.io/src/index.html;https://www.dsachan.com;https://people.cs.umass.edu/~mboratko/;;;;https://www.cs.cmu.edu/~sdalmia;;;https://ppasupat.github.io/;https://aidaamini.github.io/;https://jrc436.github.io;https://www.riedelcastro.org/;;;http://kelvinguu.com/", "dblp": "https://dblp.uni-trier.de/pers/hd/l/Lee:Jinhyuk;;148/4531;194/5251;167/3916;222/1939;125/7491;;227/2509;175/8966;;23/7723-3;124/9178;185/5527;189/4976;18/3348-1.html;11/8759;;164/5838", "google_scholar": "https://scholar.google.co.kr/citations?user=YWm_zVcAAAAJ;n3PSuQMAAAAJ;9bbHwJIAAAAJ;RDky42sAAAAJ;K6RYRTMAAAAJ;YKZGpnkAAAAJ;0i5Ys-4AAAAJ;;RrANep4AAAAJ;HSAe9OUAAAAJ;;https://scholar.google.com.hk/citations?hl=en;BqKXIA8AAAAJ;5pFhWN0AAAAJ;WCzWsG0AAAAJ;https://scholar.google.com.tw/citations?user=AcCtcrsAAAAJ;E8-dfNcAAAAJ;;", "orcid": "0000-0003-4972-239X;;;;;;;;;0000-0003-0437-5988;;;;;0000-0001-7147-5888;;;;", "linkedin": "jinhyuk-lee-73b27489/;anthony-chen-3526bb219/;;;devendra-singh-sachan-72985216/;michaelboratko/;;;vincentperot/;siddalmia/;;;;;jeremy-cole;;;;", "or_profile": "~Jinhyuk_Lee2;~Anthony_Chen1;~Zhuyun_Dai1;~Dheeru_Dua2;~Devendra_Singh_Sachan2;~Michael_Boratko1;~Yi_Luan1;~S\u00e9b_Arnold1;~Vincent_Perot1;~Siddharth_Dalmia1;~Hexiang_Hu1;~Xudong_Lin1;~Panupong_Pasupat1;~Aida_Amini1;~Jeremy_R._Cole1;~Sebastian_Riedel1;~Iftekhar_Naim1;~Ming-Wei_Chang3;~Kelvin_Guu1", "aff": "Google;Google DeepMind;Google;Google;Google DeepMind;Google;Google;;Google;Google Deepmind;;Columbia University;Google;University of Washington, Seattle;Google DeepMind;University College London;Google;;Google", "aff_domain": "google.com;google.com;google.com;google.com;google.com;google.com;google.com;;google.com;google.com;;columbia.edu;google.com;uw.edu;google.com;ucl.ac.uk;google.com;;google.com", "position": "Research Scientist;Researcher;Researcher;Researcher;Researcher;Researcher;Research Scientist;;Software Engineer;Researcher;;PhD student;Employee;PhD student;Researcher;Full Professor;Researcher;;Senior Research Scientist/Manager", "bibtex": "@misc{\nanonymous2024can,\ntitle={Can Long-Context Language Models Subsume Retrieval, {SQL}, and More?},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=FjeJB0OUhN}\n}", "github": "", "project": "", "reviewers": "VBfb;GsgK;LA2k;3QHy", "site": "https://openreview.net/forum?id=FjeJB0OUhN", "pdf_size": 714703, "rating": "2;5;6;7", "confidence": "5;4;4;3", "wc_summary_and_contributions": "73;80;119;95", "wc_strengths": "28;34;121;35", "wc_improvement": "288;29;143;79", "wc_limitations": "32;5;29;122", "wc_correctness": "59;29;28;7", "wc_clarity": "69;1;12;10", "wc_relation_to_prior_work": "37;1;16;9", "wc_documentation": "20;15;32;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "607;195;501;359", "wc_reply_reviewers": "39;30;0;14", "wc_reply_authors": "27;27;27;27", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.0, 1.8708286933869707 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 91.75, 17.62632973707232 ], "wc_strengths_avg": [ 54.5, 38.48701079585163 ], "wc_improvement_avg": [ 134.75, 97.26863574657557 ], "wc_limitations_avg": [ 47.0, 44.54772721475249 ], "wc_correctness_avg": [ 30.75, 18.525320510047862 ], "wc_clarity_avg": [ 23.0, 26.879360111431225 ], "wc_relation_to_prior_work_avg": [ 15.75, 13.36740438529485 ], "wc_documentation_avg": [ 17.0, 11.113055385446435 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 415.5, 154.75383678603902 ], "wc_reply_reviewers_avg": [ 20.75, 14.956186011146023 ], "wc_reply_authors_avg": [ 27.0, 0.0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 19, 0 ], "corr_rating_confidence": -0.9449111825230682, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4375945618978638718&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;0;0;0;0;0;0;1;2;0;3;0;4;0;0", "aff_unique_norm": "Google;DeepMind;Columbia University;University of Washington;University College London", "aff_unique_dep": "Google;DeepMind;;;", "aff_unique_url": "https://www.google.com;https://deepmind.com;https://www.columbia.edu;https://www.washington.edu;https://www.ucl.ac.uk", "aff_unique_abbr": "Google;DeepMind;Columbia;UW;UCL", "aff_campus_unique_index": "0;0;0;0;0;0;0;2;0;0", "aff_campus_unique": "Mountain View;;Seattle", "aff_country_unique_index": "0;1;0;0;1;0;0;0;1;0;0;0;1;1;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "UniAR: A Unified model for predicting human Attention and Responses on visual content", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95950", "id": "FjssnGuHih", "proceeding": "", "pdf": "https://openreview.net/pdf?id=FjssnGuHih", "openreview": "https://openreview.net/forum?id=FjssnGuHih", "poster": "/media/PosterPDFs/NeurIPS%202024/95950.png?t=1732616336.402533", "project": "", "author_site": "Peizhao Li, Junfeng He, Gang Li, Rachit Bhargava, Shaolei Shen, Nachiappan Valliappan, Youwei Liang, Hongxiang Gu, Venky Ramachandran, Golnaz farhadi, Yang Li, Kai Kohlhoff, Vidhya Navalpakkam", "tldr": "", "abstract": "Progress in human behavior modeling involves understanding both implicit, early-stage perceptual behavior, such as human attention, and explicit, later-stage behavior, such as subjective preferences or likes. Yet most prior research has focused on modeling implicit and explicit human behavior in isolation; and often limited to a specific type of visual content. We propose UniAR -- a unified model of human attention and preference behavior across diverse visual content. UniAR leverages a multimodal transformer to predict subjective feedback, such as satisfaction or aesthetic quality, along with the underlying human attention or interaction heatmaps and viewing order. We train UniAR on diverse public datasets spanning natural images, webpages, and graphic designs, and achieve SOTA performance on multiple benchmarks across various image domains and behavior modeling tasks. Potential applications include providing instant feedback on the effectiveness of UIs/visual content, and enabling designers and content-creation models to optimize their creation for human-centric improvements.", "keywords": "attention;saliency;scanpath;aesthetics", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "", "author": "Peizhao Li;Junfeng He;Gang Li;Rachit Bhargava;Shaolei Shen;NACHIAPPAN VALLIAPPAN;Youwei Liang;Hongxiang Gu;Venky Ramachandran;Golnaz farhadi;Yang Li;Kai J Kohlhoff;Vidhya Navalpakkam", "authorids": "~Peizhao_Li1;~Junfeng_He4;~Gang_Li13;~Rachit_Bhargava1;~Shaolei_Shen1;~NACHIAPPAN_VALLIAPPAN1;~Youwei_Liang1;~Hongxiang_Gu1;~Venky_Ramachandran1;~Golnaz_farhadi1;~Yang_Li2;~Kai_J_Kohlhoff1;~Vidhya_Navalpakkam1", "gender": "M;;;M;M;M;M;M;M;;M;;F", "homepage": "https://peizhaoli.com;;;;;https://sites.google.com/view/nachi/home;https://youweiliang.github.io/;https://hxgu.github.io//;;;http://yangl.org;https://research.google/people/KaiKohlhoff/;https://research.google/people/VidhyaNavalpakkam/", "dblp": "232/1771;;62/2655-21;;;;257/5626;169/2394;;;37/4190-58;181/3935;78/4730", "google_scholar": "h8UyqB4AAAAJ;;gmBt9v8AAAAJ;;;-gN2qN8AAAAJ;zMofZR4AAAAJ;;;;ZZdB48QAAAAJ;Du7j3mQAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0002-9490-2990;;;0000-0001-7174-5729;;;0000-0002-9003-5287;;;0000-0003-2068-2531;", "linkedin": "peizhao-li-099037182/;;;rachitbhargava99/;shaoleishen/;nachiappan-valliappan/;;;;;yang-li-127a2a41/;kai-kohlhoff/;", "or_profile": "~Peizhao_Li1;~Junfeng_He4;~Gang_Li13;~Rachit_Bhargava1;~Shaolei_Shen1;~NACHIAPPAN_VALLIAPPAN1;~Youwei_Liang1;~Hongxiang_Gu1;~Venky_Ramachandran1;~Golnaz_farhadi1;~Yang_Li2;~Kai_J_Kohlhoff1;~Vidhya_Navalpakkam1", "aff": "Brandeis University;;Google;Research, Google;Google;Research, Google;University of California, San Diego;Google;Google;;Google;Google Research;Research, Google", "aff_domain": "brandeis.edu;;google.com;research.google.com;google.com;research.google.com;ucsd.edu;google.com;google.com;;google.com;google.com;research.google.com", "position": "PhD student;;Software Engineer;Researcher;Researcher;Researcher;PhD student;Researcher;Researcher;;Research Scientist;Research Scientist;Researcher", "bibtex": "@inproceedings{\nli2024uniar,\ntitle={Uni{AR}: A Unified model for predicting human Attention and Responses on visual content},\nauthor={Peizhao Li and Junfeng He and Gang Li and Rachit Bhargava and Shaolei Shen and NACHIAPPAN VALLIAPPAN and Youwei Liang and Hongxiang Gu and Venky Ramachandran and Golnaz farhadi and Yang Li and Kai J Kohlhoff and Vidhya Navalpakkam},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=FjssnGuHih}\n}", "github": "", "reviewers": "Fbj6;FgZM;RXVY;ew4J", "pdf_size": 3066851, "rating": "3;5;5;7", "confidence": "4;3;4;4", "soundness": "2;3;3;4", "novelty": "2;3;3;4", "presentation": "3;3;3;4", "wc_summary": "98;42;107;66", "wc_strengths": "34;114;57;37", "wc_weaknesses": "676;600;227;72", "wc_questions": "155;6;41;136", "wc_limitations": "36;6;1;1", "wc_review": "999;768;433;312", "wc_reply_reviewers": "298;232;0;0", "wc_reply_authors": "509;121;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "2;2;1;1", "rating_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 78.25, 25.8879798362097 ], "wc_strengths_avg": [ 60.5, 32.12864765283469 ], "wc_weaknesses_avg": [ 393.75, 251.76017854299357 ], "wc_questions_avg": [ 84.5, 62.6039136156838 ], "wc_limitations_avg": [ 11.0, 14.577379737113251 ], "wc_review_avg": [ 628.0, 271.62566152703613 ], "wc_reply_reviewers_avg": [ 132.5, 134.53902779491162 ], "wc_reply_authors_avg": [ 157.5, 208.86419032471795 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 13, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7838603602107900693&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "brandeis.edu;;google.com;research.google.com;google.com;research.google.com;ucsd.edu;google.com;google.com;;google.com;google.com;research.google.com", "author_num": 13, "aff_unique_index": "0;1;1;1;1;2;1;1;1;1;1", "aff_unique_norm": "Brandeis University;Google;University of California, San Diego", "aff_unique_dep": ";Google;", "aff_unique_url": "https://www.brandeis.edu;https://www.google.com;https://www.ucsd.edu", "aff_unique_abbr": "Brandeis;Google;UCSD", "aff_campus_unique_index": "1;1;1;1;2;1;1;1;1;1", "aff_campus_unique": ";Mountain View;San Diego", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Approaching Human-Level Forecasting with Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95949", "id": "FlcdW7NPRY", "proceeding": "", "pdf": "https://openreview.net/pdf?id=FlcdW7NPRY", "openreview": "https://openreview.net/forum?id=FlcdW7NPRY", "poster": "/media/PosterPDFs/NeurIPS%202024/95949.png?t=1733733614.5583763", "project": "", "author_site": "Danny Halawi, Fred Zhang, Chen Yueh-Han, Jacob Steinhardt", "tldr": "", "abstract": "Forecasting future events is important for policy and decision making. In this work, we study whether language models (LMs) can forecast at the level of competitive human forecasters. Towards this goal, we develop a retrieval-augmented LM system designed to automatically search for relevant information, generate forecasts, and aggregate predictions. To facilitate our study, we collect a large dataset of questions from competitive forecasting platforms. Under a test set published after the knowledge cut-offs of our LMs, we evaluate the end-to-end performance of our system against the aggregates of human forecasts. On average, the system nears the crowd aggregate of competitive forecasters and, in a certain relaxed setting, surpasses it. Our work suggests that using LMs to forecasts the future could provide accurate predictions at scale and help to inform institutional decision making.", "keywords": "langauge models;forecasting;information retrieval;retrieval augmentation", "primary_area": "machine_learning_for_social_sciences", "supplementary_material": "/attachment/fc8d17cfb15b6f810eae9f6bdae4ee59bd8409b3.zip", "author": "Danny Halawi;Fred Zhang;Chen Yueh-Han;Jacob Steinhardt", "authorids": "~Danny_Halawi1;~Fred_Zhang1;~Chen_Yueh-Han1;~Jacob_Steinhardt1", "gender": "M;M;M;", "homepage": "https://dannyhalawi.me;http://fredzhang.me/;http://www.john-chen.cc/;", "dblp": "321/4165;232/9071;371/5285;35/10625", "google_scholar": ";guJ_kBQAAAAJ;NnWPLsEAAAAJ;", "orcid": ";;;", "linkedin": ";fred-zhang-0/;yueh-han-chen/;", "or_profile": "~Danny_Halawi1;~Fred_Zhang1;~Chen_Yueh-Han1;~Jacob_Steinhardt1", "aff": "University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;University of California, Berkeley", "aff_domain": "berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu", "position": "Researcher;PhD student;Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\nhalawi2024approaching,\ntitle={Approaching Human-Level Forecasting with Language Models},\nauthor={Danny Halawi and Fred Zhang and Chen Yueh-Han and Jacob Steinhardt},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=FlcdW7NPRY}\n}", "github": "", "reviewers": "z9p5;L9xK;1xsc;F1EJ", "pdf_size": 1292871, "rating": "5;6;7;8", "confidence": "3;4;4;4", "soundness": "3;4;3;4", "novelty": "2;3;3;4", "presentation": "3;4;4;4", "wc_summary": "73;45;75;54", "wc_strengths": "68;58;78;90", "wc_weaknesses": "114;18;154;111", "wc_questions": "1;1;168;2", "wc_limitations": "1;1;1;2", "wc_review": "257;123;476;259", "wc_reply_reviewers": "0;30;0;91", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 61.75, 12.676257334087218 ], "wc_strengths_avg": [ 73.5, 11.863810517704673 ], "wc_weaknesses_avg": [ 99.25, 49.886746736984165 ], "wc_questions_avg": [ 43.0, 72.16993834000414 ], "wc_limitations_avg": [ 1.25, 0.4330127018922193 ], "wc_review_avg": [ 278.75, 126.51951430510631 ], "wc_reply_reviewers_avg": [ 30.25, 37.15087482146282 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.7745966692414834, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9555594930762264063&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "Fm4FkfGTLu", "title": "gRNAde: Geometric Deep Learning for 3D RNA inverse design", "track": "main", "status": "Reject", "tldr": "", "abstract": "Computational RNA design tasks are often posed as inverse problems, where sequences are designed based on adopting a single desired secondary structure without considering 3D geometry and conformational diversity. We introduce gRNAde, a geometric RNA design pipeline operating on 3D RNA backbones to design sequences that explicitly account for structure and dynamics. Under the hood, gRNAde is a multi-state Graph Neural Network that generates candidate RNA sequences conditioned on one or more 3D backbone structures where the identities of the bases are unknown. On a single-state fixed backbone re-design benchmark of 14 RNA structures from the PDB identified by Das et al. [2010], gRNAde obtains higher native sequence recovery rates (56% on average) compared to Rosetta (45% on average), taking under a second to produce designs compared to the reported hours for Rosetta. We further demonstrate the utility of gRNAde on a new benchmark of multi-state design for structurally flexible RNAs, as well as zero-shot ranking of mutational fitness landscapes in a retrospective analysis of a recent RNA polymerase ribozyme structure. Open source code and tutorials are available at: https://anonymous.4open.science/r/geometric-rna-design", "keywords": "RNA Structure;RNA Design;Inverse Folding;Geometric Deep Learning;Graph Neural Networks", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "/attachment/432c5b32c120d88dff726daed6758aab263e0b72.zip", "author": "Chaitanya K. Joshi;Arian Rokkum Jamasb;Ramon Vi\u00f1as Torn\u00e9;Charles Harris;Simon V Mathis;Alex Morehead;Rishabh Anand;Pietro Lio", "authorids": "~Chaitanya_K._Joshi1;~Arian_Rokkum_Jamasb1;~Ramon_Vi\u00f1as_Torn\u00e91;~Charles_Harris2;~Simon_V_Mathis1;~Alex_Morehead1;~Rishabh_Anand1;~Pietro_Lio1", "gender": ";;;M;M;M;M;M", "homepage": "https://www.jamasb.io;;https://cch1999.github.io/;https://simonmathis.org;https://amorehead.github.io/;http://rish-16.github.io;https://www.cst.cam.ac.uk/people/pl219;http://www.chaitjo.com/", "dblp": "296/2021;217/4944;;338/5638;259/6116;;l/PietroLio.html;202/2132", "google_scholar": "https://scholar.google.co.uk/citations?user=hYm9a-UAAAAJ;5Em0-BAAAAAJ;;https://scholar.google.ch/citations?user=N6I6fT0AAAAJ;IYHJU5EAAAAJ;mn7sL5MAAAAJ;https://scholar.google.co.uk/citations?user=3YrWf7EAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-6727-7579;0000-0003-2411-4478;;0000-0002-5246-6481;0000-0002-0586-6191;;0000-0002-0540-5053;0000-0003-4722-1815", "linkedin": "jamasb/;ramon-vinas/;;simonmathis/;alexmorehead;https://linkedin.com/in/rishabhanand16/;;chaitjo", "or_profile": "~Arian_Rokkum_Jamasb1;~Ramon_Vi\u00f1as_Torn\u00e91;~Charles_Harris2;~Simon_V_Mathis1;~Alex_Morehead1;~Rishabh_Anand1;~Pietro_Lio1;~Chaitanya_Krishna_Joshi1", "aff": "Prescient Design / Roche / Genentech;EPFL - EPF Lausanne;University of Cambridge;University of Cambridge;University of Missouri, Columbia;National University of Singapore;University of Cambridge;Fundamental AI Research (FAIR), Meta", "aff_domain": "roche.com;epfl.ch;cam.ac.uk;cam.ac.uk;missouri.edu;nus.edu.sg;cam.ac.uk;meta.com", "position": "Researcher;Postdoc;PhD student;PhD student;PhD student;Undergrad student;Full Professor;Intern", "bibtex": "@misc{\nanonymous2024grnade,\ntitle={g{RNA}de: Geometric Deep Learning for 3D {RNA} inverse design},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=Fm4FkfGTLu}\n}", "github": "", "project": "", "reviewers": "E2ka;Hi6M;h3v6;vioU;QQ46", "site": "https://openreview.net/forum?id=Fm4FkfGTLu", "pdf_size": 12026378, "rating": "4;5;5;6;8", "confidence": "4;3;3;4;4", "soundness": "2;3;3;2;4", "novelty": "2;2;2;3;3", "presentation": "3;3;4;3;4", "wc_summary": "53;34;80;49;136", "wc_strengths": "94;38;105;102;86", "wc_weaknesses": "240;45;138;270;84", "wc_questions": "52;111;77;109;73", "wc_limitations": "30;7;32;30;78", "wc_review": "469;235;432;560;457", "wc_reply_reviewers": "520;9;64;277;77", "wc_reply_authors": "1284;50;303;661;17", "reply_reviewers": "2;1;1;1;1", "reply_authors": "5;3;2;3;2", "rating_avg": [ 5.6, 1.3564659966250536 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 70.4, 36.00333317902663 ], "wc_strengths_avg": [ 85.0, 24.413111231467404 ], "wc_weaknesses_avg": [ 155.4, 87.03930146778522 ], "wc_questions_avg": [ 84.4, 22.570777567465413 ], "wc_limitations_avg": [ 35.4, 23.2 ], "wc_review_avg": [ 430.6, 106.93661674094612 ], "wc_reply_reviewers_avg": [ 189.4, 188.61240680294605 ], "wc_reply_authors_avg": [ 463.0, 470.81418840132676 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 3.0, 1.0954451150103321 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.3611575592573077, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16790860332614182226&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "aff_unique_index": "0;1;2;2;3;4;2;5", "aff_unique_norm": "Roche;EPFL;University of Cambridge;University of Missouri;National University of Singapore;Meta", "aff_unique_dep": ";;;;;Fundamental AI Research (FAIR)", "aff_unique_url": "https://www.roche.com;https://www.epfl.ch;https://www.cam.ac.uk;https://www.missouri.edu;https://www.nus.edu.sg;https://meta.com", "aff_unique_abbr": "Roche;EPFL;Cambridge;MU;NUS;Meta", "aff_campus_unique_index": "1;2;2;3;2", "aff_campus_unique": ";Lausanne;Cambridge;Columbia", "aff_country_unique_index": "0;0;1;1;2;3;1;2", "aff_country_unique": "Switzerland;United Kingdom;United States;Singapore" }, { "title": "TabEBM: A Tabular Data Augmentation Method with Distinct Class-Specific Energy-Based Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95948", "id": "FmNoFIImZG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=FmNoFIImZG", "openreview": "https://openreview.net/forum?id=FmNoFIImZG", "poster": "/media/PosterPDFs/NeurIPS%202024/95948.png?t=1731278824.5527027", "project": "", "author_site": "Andrei Margeloiu, Xiangjian Jiang, Nikola Simidjievski, Mateja Jamnik", "tldr": "", "abstract": "Data collection is often difficult in critical fields such as medicine, physics, and chemistry, yielding typically only small tabular datasets. However, classification methods tend to struggle with these small datasets, leading to poor predictive performance. Increasing the training set with additional synthetic data, similar to data augmentation in images, is commonly believed to improve downstream tabular classification performance. However, current tabular generative methods that learn either the joint distribution $ p(\\mathbf{x}, y) $ or the class-conditional distribution $ p(\\mathbf{x} \\mid y) $ often overfit on small datasets, resulting in poor-quality synthetic data, usually worsening classification performance compared to using real data alone. To solve these challenges, we introduce TabEBM, a novel class-conditional generative method using Energy-Based Models (EBMs). Unlike existing tabular methods that use a shared model to approximate all class-conditional densities, our key innovation is to create distinct EBM generative models for each class, each modelling its class-specific data distribution individually. This approach creates robust energy landscapes, even in ambiguous class distributions. Our experiments show that TabEBM generates synthetic data with higher quality and better statistical fidelity than existing methods. When used for data augmentation, our synthetic data consistently leads to improved classification performance across diverse datasets of various sizes, especially small ones. Code is available at https://github.com/andreimargeloiu/TabEBM.", "keywords": "tabular data;data augmentation;synthetic data generation;energy based model", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "/attachment/c59df0d51254e57de4dc31499e608dec03952914.zip", "author": "Andrei Margeloiu;Xiangjian Jiang;Nikola Simidjievski;Mateja Jamnik", "authorids": "~Andrei_Margeloiu1;~Xiangjian_Jiang1;~Nikola_Simidjievski1;~Mateja_Jamnik1", "gender": "M;M;Unspecified;F", "homepage": ";https://silencex12138.github.io/;https://simidjievskin.github.io/;http://www.cl.cam.ac.uk/~mj201", "dblp": "280/0265;300/4620;;41/1392", "google_scholar": "35Ygi8wAAAAJ;1y8DKBYAAAAJ;;d5QiyJkAAAAJ", "orcid": ";;;0000-0003-2772-2532", "linkedin": "andreimargeloiu/;xiangjian-jiang-034b1a222/;;", "or_profile": "~Andrei_Margeloiu1;~Xiangjian_Jiang1;~Nikola_Simidjievski1;~Mateja_Jamnik1", "aff": "University of Cambridge;University of Cambridge;University of Cambridge;University of Cambridge", "aff_domain": "cam.ac.uk;cam.ac.uk;cam.ac.uk;cam.ac.uk", "position": "PhD student;PhD student;Principal Researcher;Professor in Artificial Intelligence", "bibtex": "@inproceedings{\nmargeloiu2024tabebm,\ntitle={Tab{EBM}: A Tabular Data Augmentation Method with Distinct Class-Specific Energy-Based Models},\nauthor={Andrei Margeloiu and Xiangjian Jiang and Nikola Simidjievski and Mateja Jamnik},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=FmNoFIImZG}\n}", "github": "", "reviewers": "iojh;tdoQ;XdB4;Cc3y", "pdf_size": 3497627, "rating": "5;5;6;7", "confidence": "4;4;4;4", "soundness": "3;2;2;4", "novelty": "3;2;3;4", "presentation": "3;3;3;4", "wc_summary": "64;47;139;49", "wc_strengths": "41;30;104;101", "wc_weaknesses": "40;132;381;18", "wc_questions": "88;52;675;153", "wc_limitations": "30;1;23;5", "wc_review": "263;262;1322;326", "wc_reply_reviewers": "34;40;132;23", "wc_reply_authors": "750;73;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 74.75, 37.67210506462308 ], "wc_strengths_avg": [ 69.0, 33.741665637605976 ], "wc_weaknesses_avg": [ 142.75, 144.04751820146018 ], "wc_questions_avg": [ 242.0, 252.59948535181144 ], "wc_limitations_avg": [ 14.75, 12.090802289343747 ], "wc_review_avg": [ 543.25, 450.3583989446627 ], "wc_reply_reviewers_avg": [ 57.25, 43.58540466715893 ], "wc_reply_authors_avg": [ 205.75, 315.6329949482468 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6382614691990421287&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "cam.ac.uk;cam.ac.uk;cam.ac.uk;cam.ac.uk", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Cambridge", "aff_unique_dep": "", "aff_unique_url": "https://www.cam.ac.uk", "aff_unique_abbr": "Cambridge", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Cambridge", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "How Far Can Transformers Reason? The Globality Barrier and Inductive Scratchpad", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95947", "id": "FoGwiFXzuN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=FoGwiFXzuN", "openreview": "https://openreview.net/forum?id=FoGwiFXzuN", "poster": "/media/PosterPDFs/NeurIPS%202024/95947.png?t=1733699931.5093327", "project": "", "author_site": "Emmanuel Abbe, Samy Bengio, Aryo Lotfi, Colin Sandon, Omid Saremi", "tldr": "", "abstract": "Can Transformers predict new syllogisms by composing established ones? More generally, what type of targets can be learned by such models from scratch? Recent works show that Transformers can be Turing-complete in terms of expressivity, but this does not address the learnability objective. This paper puts forward the notion of 'globality degree' of a target distribution to capture when weak learning is efficiently achievable by regular Transformers. This measure shows a contrast with the expressivity results of Transformers captured by $TC^0/TC^1$ classes (further studied here), since the globality relates to correlations with the more limited $NC^0$ class. We show here experimentally and theoretically under additional assumptions that distributions with high globality cannot be learned efficiently. In particular, syllogisms cannot be composed on long chains. Further, we develop scratchpad techniques and show that: (i) agnostic scratchpads cannot break the globality barrier, (ii) educated scratchpads can break the globality with intermediate steps, although not all such scratchpads can generalize out-of-distribution (OOD), (iii) a notion of 'inductive scratchpad', that composes the prior information more efficiently, can both break the globality barrier and improve the OOD generalization. In particular, some of our inductive scratchpads can achieve length generalizations of up to $6\\times$ for some arithmetic tasks depending on the input formatting.", "keywords": "Transformer;reasoning;scratchpad;length generalization", "primary_area": "learning_theory", "supplementary_material": "", "author": "Emmanuel Abbe;Samy Bengio;Aryo Lotfi;Colin Sandon;Omid Saremi", "authorids": "~Emmanuel_Abbe1;~Samy_Bengio1;~Aryo_Lotfi1;~Colin_Sandon1;~Omid_Saremi1", "gender": ";M;M;male;", "homepage": ";http://bengio.abracadoudou.com;https://aryol.github.io;https://math.mit.edu/directory/profile.php?pid=2079;", "dblp": "84/5016;b/SamyBengio;284/8194;120/3958;", "google_scholar": ";Vs-MdPcAAAAJ;M7_iyAgAAAAJ;;", "orcid": ";;;;", "linkedin": ";bengio;aryo/;;omidsaremi/", "or_profile": "~Emmanuel_Abbe1;~Samy_Bengio1;~Aryo_Lotfi1;~Colin_Sandon1;~Omid_Saremi1", "aff": "Swiss Federal Institute of Technology Lausanne;Apple;Apple;EPFL - EPF Lausanne;Apple", "aff_domain": "epfl.ch;apple.com;apple.com;epfl.ch;apple.com", "position": "Full Professor;Senior Director;Intern;Postdoc;ML", "bibtex": "@inproceedings{\nabbe2024how,\ntitle={How Far Can Transformers Reason? The Globality Barrier and Inductive Scratchpad},\nauthor={Emmanuel Abbe and Samy Bengio and Aryo Lotfi and Colin Sandon and Omid Saremi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=FoGwiFXzuN}\n}", "github": "", "reviewers": "eJe3;cmbZ;GsuG;apUt", "pdf_size": 1525568, "rating": "5;6;7;7", "confidence": "4;3;4;2", "soundness": "3;3;4;3", "novelty": "3;3;3;3", "presentation": "3;2;2;4", "wc_summary": "94;149;341;76", "wc_strengths": "94;91;267;87", "wc_weaknesses": "237;496;317;35", "wc_questions": "278;10;119;52", "wc_limitations": "33;1;55;1", "wc_review": "736;747;1099;251", "wc_reply_reviewers": "0;0;39;6", "wc_reply_authors": "430;0;540;0", "reply_reviewers": "0;0;1;1", "reply_authors": "2;1;2;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 165.0, 105.11184519358416 ], "wc_strengths_avg": [ 134.75, 76.39494420444326 ], "wc_weaknesses_avg": [ 271.25, 165.52397862545476 ], "wc_questions_avg": [ 114.75, 101.95434027053483 ], "wc_limitations_avg": [ 22.5, 22.863726730347352 ], "wc_review_avg": [ 708.25, 301.6764616273534 ], "wc_reply_reviewers_avg": [ 11.25, 16.20763708873073 ], "wc_reply_authors_avg": [ 242.5, 245.59875814018278 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.4545454545454545, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12752556670180222448&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "epfl.ch;apple.com;apple.com;epfl.ch;apple.com", "author_num": 5, "aff_unique_index": "0;1;1;2;1", "aff_unique_norm": "Swiss Federal Institute of Technology Lausanne;Apple;EPFL", "aff_unique_dep": ";Apple Inc.;", "aff_unique_url": "https://www.epfl.ch;https://www.apple.com;https://www.epfl.ch", "aff_unique_abbr": "EPFL;Apple;EPFL", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Lausanne;", "aff_country_unique_index": "0;1;1;0;1", "aff_country_unique": "Switzerland;United States" }, { "title": "Federated Black-Box Adaptation for Semantic Segmentation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95946", "id": "Fp3JVz5XE7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Fp3JVz5XE7", "openreview": "https://openreview.net/forum?id=Fp3JVz5XE7", "poster": "/media/PosterPDFs/NeurIPS%202024/95946.png?t=1730784043.623342", "project": "", "author_site": "Jay Paranjape, Shameema Sikder, S. Vedula, Vishal Patel", "tldr": "", "abstract": "Federated Learning (FL) is a form of distributed learning that allows multiple institutions or clients to collaboratively learn a global model to solve a task. This allows the model to utilize the information from every institute while preserving data privacy. However, recent studies show that the promise of protecting the privacy of data is not upheld by existing methods and that it is possible to recreate the training data from the different institutions. This is done by utilizing gradients transferred between the clients and the global server during training or by knowing the model architecture at the client end. In this paper, we propose a federated learning framework for semantic segmentation without knowing the model architecture nor transferring gradients between the client and the server, thus enabling better privacy preservation. We propose \\textit{BlackFed} - a black-box adaptation of neural networks that utilizes zero order optimization (ZOO) to update the client model weights and first order optimization (FOO) to update the server weights. We evaluate our approach on several computer vision and medical imaging datasets to demonstrate its effectiveness. To the best of our knowledge, this work is one of the first works in employing federated learning for segmentation, devoid of gradients or model information exchange. Code: https://github.com/JayParanjape/blackfed/tree/master", "keywords": "Federated Learning;Blackbox Learning;Split Networks;Segmentation", "primary_area": "machine_vision", "supplementary_material": "", "author": "Jay Nitin Paranjape;Shameema Sikder;S. Swaroop Vedula;Vishal M. Patel", "authorids": "~Jay_Nitin_Paranjape1;~Shameema_Sikder1;~S._Swaroop_Vedula2;~Vishal_M._Patel1", "gender": "M;F;;M", "homepage": "https://jayparanjape.github.io/website/;;https://aiforsurgery.jhu.edu;https://engineering.jhu.edu/vpatel36/", "dblp": "266/8000.html;;134/9780.html;76/6100", "google_scholar": "BcBltw8AAAAJ;kqfKr38AAAAJ;PfS4RUYAAAAJ;AkEXTbIAAAAJ", "orcid": ";;0000-0002-6992-2957;", "linkedin": "jay-paranjape-2bb70a157/;;;", "or_profile": "~Jay_Nitin_Paranjape1;~Shameema_Sikder1;~S._Swaroop_Vedula2;~Vishal_Patel2", "aff": "Johns Hopkins University;Johns Hopkins University;Johns Hopkins;Johns Hopkins University", "aff_domain": "jh.edu;jhu.edu;jhu.edu;jhu.edu", "position": "PhD student;Associate Professor;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nparanjape2024federated,\ntitle={Federated Black-Box Adaptation for Semantic Segmentation},\nauthor={Jay Nitin Paranjape and Shameema Sikder and S. Swaroop Vedula and Vishal M. Patel},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Fp3JVz5XE7}\n}", "github": "", "reviewers": "Ftdr;XV1b;ykrj;zJW7", "pdf_size": 1988453, "rating": "4;5;5;7", "confidence": "3;4;3;3", "soundness": "3;3;2;3", "novelty": "2;2;2;4", "presentation": "3;3;3;2", "wc_summary": "151;60;42;102", "wc_strengths": "208;41;17;50", "wc_weaknesses": "160;207;244;164", "wc_questions": "4;9;35;97", "wc_limitations": "4;11;47;91", "wc_review": "527;328;385;504", "wc_reply_reviewers": "0;20;89;103", "wc_reply_authors": "146;58;242;58", "reply_reviewers": "0;1;2;1", "reply_authors": "4;2;3;2", "rating_avg": [ 5.25, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 88.75, 42.020084483494315 ], "wc_strengths_avg": [ 79.0, 75.44865803975576 ], "wc_weaknesses_avg": [ 193.75, 34.368408458932166 ], "wc_questions_avg": [ 36.25, 36.99577678600627 ], "wc_limitations_avg": [ 38.25, 34.54978292261762 ], "wc_review_avg": [ 436.0, 82.41662453655816 ], "wc_reply_reviewers_avg": [ 53.0, 43.85772451917678 ], "wc_reply_authors_avg": [ 126.0, 76.0 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.13245323570650439, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:gkyhIVcOE_gJ:scholar.google.com/&scioq=Federated+Black-Box+Adaptation+for+Semantic+Segmentation&hl=en&as_sdt=0,5", "gs_version_total": 5, "email": "jh.edu;jhu.edu;jhu.edu;jhu.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Johns Hopkins University", "aff_unique_dep": "", "aff_unique_url": "https://www.jhu.edu", "aff_unique_abbr": "JHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Federated Transformer: Multi-Party Vertical Federated Learning on Practical Fuzzily Linked Data", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95945", "id": "FqWyzyErVT", "proceeding": "", "pdf": "https://openreview.net/pdf?id=FqWyzyErVT", "openreview": "https://openreview.net/forum?id=FqWyzyErVT", "poster": "/media/PosterPDFs/NeurIPS%202024/95945.png?t=1729747180.189609", "project": "", "author_site": "Zhaomin Wu, Junyi Hou, Yiqun Diao, Bingsheng He", "tldr": "", "abstract": "Federated Learning (FL) is an evolving paradigm that enables multiple parties to collaboratively train models without sharing raw data. Among its variants, Vertical Federated Learning (VFL) is particularly relevant in real-world, cross-organizational collaborations, where distinct features of a shared instance group are contributed by different parties. In these scenarios, parties are often linked using fuzzy identifiers, leading to a common practice termed as _multi-party fuzzy VFL_. Existing models generally address either multi-party VFL or fuzzy VFL between two parties. Extending these models to practical multi-party fuzzy VFL typically results in significant performance degradation and increased costs for maintaining privacy. To overcome these limitations, we introduce the _Federated Transformer (FeT)_, a novel framework that supports multi-party VFL with fuzzy identifiers. FeT innovatively encodes these identifiers into data representations and employs a transformer architecture distributed across different parties, incorporating three new techniques to enhance performance. Furthermore, we have developed a multi-party privacy framework for VFL that integrates differential privacy with secure multi-party computation, effectively protecting local representations while minimizing associated utility costs. Our experiments demonstrate that the FeT surpasses the baseline models by up to 46\\% in terms of accuracy when scaled to 50 parties. Additionally, in two-party fuzzy VFL settings, FeT also shows improved performance and privacy over cutting-edge VFL models.", "keywords": "vertical federated learning;federated learning;transformer;record linkage;entity alignment;differential privacy;fuzzy alignment", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Zhaomin Wu;Junyi Hou;Yiqun Diao;Bingsheng He", "authorids": "~Zhaomin_Wu1;~Junyi_Hou1;~Yiqun_Diao1;~Bingsheng_He1", "gender": "M;M;M;M", "homepage": "https://zhaominwu.com;https://www.junyi.dev;;http://www.comp.nus.edu.sg/~hebs/", "dblp": "254/0918;;;h/BingshengHe.html", "google_scholar": "QjehmgkAAAAJ;;rlL7Uc4AAAAJ;https://scholar.google.com.tw/citations?user=RogYLKYAAAAJ", "orcid": "0000-0002-6463-0031;0009-0003-0443-456X;;0000-0001-8618-4581", "linkedin": ";;;bingsheng-he-7734b131", "or_profile": "~Zhaomin_Wu1;~Junyi_Hou1;~Yiqun_Diao1;~Bingsheng_He1", "aff": "National University of Singapore;National University of Singapore;National University of Singapore;National University of Singapore", "aff_domain": "u.nus.edu;u.nus.edu;nus.edu.sg;nus.edu.sg", "position": "PhD student;MS student;PhD student;Full Professor", "bibtex": "@inproceedings{\nwu2024federated,\ntitle={Federated Transformer: Multi-Party Vertical Federated Learning on Practical Fuzzily Linked Data},\nauthor={Zhaomin Wu and Junyi Hou and Yiqun Diao and Bingsheng He},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=FqWyzyErVT}\n}", "github": "", "reviewers": "P7Jg;so53;jPVu;sPbx", "pdf_size": 7152052, "rating": "4;5;6;7", "confidence": "3;4;4;4", "soundness": "2;3;3;3", "novelty": "2;3;2;4", "presentation": "2;2;3;3", "wc_summary": "59;41;120;56", "wc_strengths": "118;39;95;148", "wc_weaknesses": "143;151;438;212", "wc_questions": "5;44;4;88", "wc_limitations": "160;1;1;111", "wc_review": "485;276;658;615", "wc_reply_reviewers": "19;17;8;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 69.0, 30.224162519414826 ], "wc_strengths_avg": [ 100.0, 39.91866731242415 ], "wc_weaknesses_avg": [ 236.0, 119.63904045084949 ], "wc_questions_avg": [ 35.25, 34.46284230878237 ], "wc_limitations_avg": [ 68.25, 69.44557221306482 ], "wc_review_avg": [ 508.5, 148.57742089563945 ], "wc_reply_reviewers_avg": [ 11.0, 7.582875444051551 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.7745966692414834, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12137591022360780806&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "u.nus.edu;u.nus.edu;nus.edu.sg;nus.edu.sg", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "National University of Singapore", "aff_unique_dep": "", "aff_unique_url": "https://www.nus.edu.sg", "aff_unique_abbr": "NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Singapore" }, { "title": "LLM Dataset Inference: Did you train on my dataset?", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95944", "id": "Fr9d1UMc37", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Fr9d1UMc37", "openreview": "https://openreview.net/forum?id=Fr9d1UMc37", "poster": "", "project": "", "author_site": "Pratyush Maini, Hengrui Jia, Nicolas Papernot, Adam Dziedzic", "tldr": "", "abstract": "The proliferation of large language models (LLMs) in the real world has come with a rise in copyright cases against companies for training their models on unlicensed data from the internet. Recent works have presented methods to identify if individual text sequences were members of the model's training data, known as membership inference attacks (MIAs). \nWe demonstrate that the apparent success of these MIAs is confounded by selecting non-members (text sequences not used for training) belonging to a different distribution from the members (e.g., temporally shifted recent Wikipedia articles compared with ones used to train the model). This distribution shift makes membership inference appear successful. \nHowever, most MIA methods perform no better than random guessing when discriminating between members and non-members from the same distribution (e.g., in this case, the same period of time).\nEven when MIAs work, we find that different MIAs succeed at inferring membership of samples from different distributions.\nInstead, we propose a new dataset inference method to accurately identify the datasets used to train large language models. This paradigm sits realistically in the modern-day copyright landscape, where authors claim that an LLM is trained over multiple documents (such as a book) written by them, rather than one particular paragraph.\nWhile dataset inference shares many of the challenges of membership inference, we solve it by selectively combining the MIAs that provide positive signal for a given distribution, and aggregating them to perform a statistical test on a given dataset. Our approach successfully distinguishes the train and test sets of different subsets of the Pile with statistically significant p-values $< 0.1$, without any false positives.", "keywords": "LLM;dataset inference;membership inference;copyright", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/70813078b1d4c45b0c715821416123859f6fd274.zip", "author": "Pratyush Maini;Hengrui Jia;Nicolas Papernot;Adam Dziedzic", "authorids": "~Pratyush_Maini1;~Hengrui_Jia1;~Nicolas_Papernot1;~Adam_Dziedzic1", "gender": "M;M;M;", "homepage": "https://pratyushmaini.github.io/;https://nick-jia.github.io/;https://www.papernot.fr;", "dblp": "248/8071;255/4934;162/1405;", "google_scholar": ";g2vBgnoAAAAJ;cGxq0cMAAAAJ;", "orcid": ";;;", "linkedin": ";;nicolaspapernot;", "or_profile": "~Pratyush_Maini1;~Hengrui_Jia1;~Nicolas_Papernot1;~Adam_Dziedzic1", "aff": "Carnegie Mellon University;University of Toronto;Google;", "aff_domain": "cmu.edu;utoronto.ca;google.com;", "position": "PhD student;PhD student;Research Scientist;", "bibtex": "@inproceedings{\nmaini2024llm,\ntitle={{LLM} Dataset Inference: Did you train on my dataset?},\nauthor={Pratyush Maini and Hengrui Jia and Nicolas Papernot and Adam Dziedzic},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Fr9d1UMc37}\n}", "github": "", "reviewers": "G1MR;pZrT;NHN3;Nmtj;bhaD;VFio", "pdf_size": 841447, "rating": "5;5;5;6;7;7", "confidence": "4;3;4;3;3;4", "soundness": "3;3;3;3;3;3", "novelty": "3;2;3;3;3;4", "presentation": "3;3;3;3;3;3", "wc_summary": "84;141;165;72;95;75", "wc_strengths": "26;63;143;46;115;79", "wc_weaknesses": "149;162;128;51;26;108", "wc_questions": "1;79;275;39;18;62", "wc_limitations": "6;1;57;11;6;9", "wc_review": "266;446;768;219;260;333", "wc_reply_reviewers": "19;25;0;19;26;0", "wc_reply_authors": "0;308;0;0;0;0", "reply_reviewers": "1;1;0;1;1;0", "reply_authors": "1;2;1;1;1;1", "rating_avg": [ 5.833333333333333, 0.8975274678557507 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.5773502691896257 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 105.33333333333333, 35.178907822096406 ], "wc_strengths_avg": [ 78.66666666666667, 39.88594851434386 ], "wc_weaknesses_avg": [ 104.0, 49.78955713801841 ], "wc_questions_avg": [ 79.0, 91.3874535517139 ], "wc_limitations_avg": [ 15.0, 19.03505538035898 ], "wc_review_avg": [ 382.0, 187.26184875729493 ], "wc_reply_reviewers_avg": [ 14.833333333333334, 10.82307206336948 ], "wc_reply_authors_avg": [ 51.333333333333336, 114.78482284498921 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.1666666666666667, 0.3726779962499649 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.18569533817705186, "gs_citation": 40, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9660050465752268060&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 6, "email": "cmu.edu;utoronto.ca;google.com;", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "Carnegie Mellon University;University of Toronto;Google", "aff_unique_dep": ";;Google", "aff_unique_url": "https://www.cmu.edu;https://www.utoronto.ca;https://www.google.com", "aff_unique_abbr": "CMU;U of T;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;Canada" }, { "title": "Structured Learning of Compositional Sequential Interventions", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95943", "id": "FsA0OSsdzJ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=FsA0OSsdzJ", "openreview": "https://openreview.net/forum?id=FsA0OSsdzJ", "poster": "/media/PosterPDFs/NeurIPS%202024/95943.png?t=1732796740.8794472", "project": "", "author_site": "Jialin Yu, Andreas Koukorinis, Nicolo Colombo, Yuchen Zhu, Ricardo Silva", "tldr": "", "abstract": "We consider sequential treatment regimes where each unit is exposed to combinations of interventions over time. When interventions are described by qualitative labels, such as \"close schools for a month due to a pandemic\" or \"promote this podcast to this user during this week\", it is unclear which appropriate structural assumptions allow us to generalize behavioral predictions to previously unseen combinations of interventions. Standard black-box approaches mapping sequences of categorical variables to outputs are applicable, but they rely on poorly understood assumptions on how reliable generalization can be obtained, and may underperform under sparse sequences, temporal variability, and large action spaces. To approach that, we pose an explicit model for composition, that is, how the effect of sequential interventions can be isolated into modules, clarifying which data conditions allow for the identification of their combined effect at different units and time steps. We show the identification properties of our compositional model, inspired by advances in causal matrix factorization methods. Our focus is on predictive models for novel compositions of interventions instead of matrix completion tasks and causal effect estimation. We compare our approach to flexible but generic black-box models to illustrate how structure aids prediction in sparse data conditions.", "keywords": "Causality;sequential data", "primary_area": "causal_inference", "supplementary_material": "/attachment/4e742f071daebbc1d829693354423a9aac4e78c0.zip", "author": "Jialin Yu;Andreas Koukorinis;Nicol\u00f2 Colombo;Yuchen Zhu;Ricardo Silva", "authorids": "~Jialin_Yu2;~Andreas_Koukorinis1;~Nicol\u00f2_Colombo1;~Yuchen_Zhu1;~Ricardo_Silva1", "gender": ";;;M;M", "homepage": "https://jialin-yu.github.io/;;https://yuchen-zhu.github.io;http://www.homepages.ucl.ac.uk/~ucgtrbd/;", "dblp": "167/1075;;;42/2642-1;149/2660", "google_scholar": "L8tFzjgAAAAJ;;7LoS1loAAAAJ;I-ANa0QAAAAJ;XE_zoYEAAAAJ", "orcid": ";;;;", "linkedin": ";andreaskoukorinis/;carolineyuchenzhu/;;", "or_profile": "~Jialin_Yu2;~Andreas_Koukorinis1;~Yuchen_Zhu1;~Ricardo_Silva1;~nicolo_colombo1", "aff": "University College London, University of London;;University College London;University College London;Royal Holloway, University of London", "aff_domain": "ucl.ac.uk;;ucl.ac.uk;ucl.ac.uk;rhul.ac.uk", "position": "Postdoc;;PhD student;Full Professor;Lecturer", "bibtex": "@inproceedings{\nyu2024structured,\ntitle={Structured Learning of Compositional Sequential Interventions},\nauthor={Jialin Yu and Andreas Koukorinis and Nicol{\\`o} Colombo and Yuchen Zhu and Ricardo Silva},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=FsA0OSsdzJ}\n}", "github": "", "reviewers": "yF1i;2QJb;Ni2f;MJr7", "pdf_size": 1099520, "rating": "3;5;5;7", "confidence": "4;3;3;2", "soundness": "2;3;3;3", "novelty": "2;2;2;3", "presentation": "1;2;3;2", "wc_summary": "84;50;46;140", "wc_strengths": "75;18;70;71", "wc_weaknesses": "96;52;46;432", "wc_questions": "126;46;27;124", "wc_limitations": "145;9;2;32", "wc_review": "526;175;191;799", "wc_reply_reviewers": "272;13;0;431", "wc_reply_authors": "870;18;29;354", "reply_reviewers": "2;1;0;2", "reply_authors": "3;2;2;3", "rating_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 80.0, 37.656340767525464 ], "wc_strengths_avg": [ 58.5, 23.4574082114798 ], "wc_weaknesses_avg": [ 156.5, 160.2271824629017 ], "wc_questions_avg": [ 80.75, 44.762568067527134 ], "wc_limitations_avg": [ 47.0, 57.65847726050351 ], "wc_review_avg": [ 422.75, 258.5114842710087 ], "wc_reply_reviewers_avg": [ 179.0, 181.48691412881536 ], "wc_reply_authors_avg": [ 317.75, 346.23718387833503 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:tzK8EBzCQacJ:scholar.google.com/&scioq=Structured+Learning+of+Compositional+Sequential+Interventions&hl=en&as_sdt=0,5", "gs_version_total": 7, "email": "ucl.ac.uk;;ucl.ac.uk;ucl.ac.uk;rhul.ac.uk", "author_num": 5, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "University College London;University of London", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucl.ac.uk;https://www.royalholloway.ac.uk", "aff_unique_abbr": "UCL;RHUL", "aff_campus_unique_index": "1", "aff_campus_unique": ";Royal Holloway", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Constrained Synthesis with Projected Diffusion Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95942", "id": "FsdB3I9Y24", "proceeding": "", "pdf": "https://openreview.net/pdf?id=FsdB3I9Y24", "openreview": "https://openreview.net/forum?id=FsdB3I9Y24", "poster": "/media/PosterPDFs/NeurIPS%202024/95942.png?t=1733286447.0996702", "project": "", "author_site": "Jacob K Christopher, Stephen Baek, Nando Fioretto", "tldr": "", "abstract": "This paper introduces an approach to endow generative diffusion processes the ability to satisfy and certify compliance with constraints and physical principles. The proposed method recast the traditional sampling process of generative diffusion models as a constrained optimization problem, steering the generated data distribution to remain within a specified region to ensure adherence to the given constraints.\nThese capabilities are validated on applications featuring both convex and challenging, non-convex, constraints as well as ordinary differential equations, in domains spanning from synthesizing new materials with precise morphometric properties, generating physics-informed motion, optimizing paths in planning scenarios, and human motion synthesis.", "keywords": "Constraint satisfaction;Generative diffusion models;physics-informed models", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/9d08fceab7fbc19942a2e76189673fef43cf13b1.zip", "author": "Jacob K Christopher;Stephen Baek;Ferdinando Fioretto", "authorids": "~Jacob_K_Christopher1;~Stephen_Baek1;~Ferdinando_Fioretto1", "gender": "M;;M", "homepage": "https://www.linkedin.com/in/jacob-christopher-834a02239;http://www.stephenbaek.com;http://nandofioretto.com", "dblp": ";;119/6404", "google_scholar": ";;ASf9Q04AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Jacob_K_Christopher1;~Stephen_Baek1;~Ferdinando_Fioretto1", "aff": "University of Virginia, Charlottesville;University of Virginia, Charlottesville;University of Virginia, Charlottesville", "aff_domain": "virginia.edu;virginia.edu;virginia.edu", "position": "PhD student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nchristopher2024constrained,\ntitle={Constrained Synthesis with Projected Diffusion Models},\nauthor={Jacob K Christopher and Stephen Baek and Ferdinando Fioretto},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=FsdB3I9Y24}\n}", "github": "", "reviewers": "e4oh;yvsu;Jdpw;p9zr", "pdf_size": 11166285, "rating": "4;5;5;7", "confidence": "3;4;3;3", "soundness": "2;2;2;4", "novelty": "2;2;3;3", "presentation": "2;3;3;4", "wc_summary": "70;89;57;102", "wc_strengths": "22;73;41;122", "wc_weaknesses": "184;127;46;115", "wc_questions": "155;130;85;106", "wc_limitations": "16;50;6;26", "wc_review": "447;469;235;471", "wc_reply_reviewers": "591;285;344;43", "wc_reply_authors": "3090;2177;1506;260", "reply_reviewers": "2;1;2;1", "reply_authors": "9;8;5;4", "rating_avg": [ 5.25, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 79.5, 17.269916039170543 ], "wc_strengths_avg": [ 64.5, 37.8714932369982 ], "wc_weaknesses_avg": [ 118.0, 49.06628170138838 ], "wc_questions_avg": [ 119.0, 26.182054923172092 ], "wc_limitations_avg": [ 24.5, 16.332482971061076 ], "wc_review_avg": [ 405.5, 98.887562413076 ], "wc_reply_reviewers_avg": [ 315.75, 194.87095088801718 ], "wc_reply_authors_avg": [ 1758.25, 1031.660403185079 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 6.5, 2.0615528128088303 ], "replies_avg": [ 38, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.13245323570650439, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17655877445749660700&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 3, "email": "virginia.edu;virginia.edu;virginia.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Virginia", "aff_unique_dep": "", "aff_unique_url": "https://www.virginia.edu", "aff_unique_abbr": "UVA", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Charlottesville", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Improving Gloss-free Sign Language Translation by Reducing Representation Density", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95941", "id": "FtzLbGoHW2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=FtzLbGoHW2", "openreview": "https://openreview.net/forum?id=FtzLbGoHW2", "poster": "", "project": "", "author_site": "Jinhui Ye, Xing Wang, Wenxiang Jiao, Junwei Liang, Hui Xiong", "tldr": "", "abstract": "Gloss-free sign language translation (SLT) aims to develop well-performing SLT systems with no requirement for the costly gloss annotations, but currently still lags behind gloss-based approaches significantly. In this paper, we identify **a representation density problem** that could be a bottleneck in restricting the performance of gloss-free SLT. Specifically, the representation density problem describes that the visual representations of semantically distinct sign gestures tend to be closely packed together in feature space, which makes gloss-free methods struggle with distinguishing different sign gestures and suffer from a sharp performance drop. To address the representation density problem, we introduce a simple but effective contrastive learning strategy, namely SignCL, which encourages gloss-free models to learn more discriminative feature representation in a self-supervised manner. Our experiments demonstrate that the proposed SignCL can significantly reduce the representation density and improve performance across various translation frameworks. Specifically, SignCLachieves a significant improvement in BLEU score for the Sign Language Transformer and GFSLT-VLP on the CSL-Daily dataset by 39\\% and 46\\%, respectively, without any increase of model parameters. Compared to Sign2GPT, a state-of-the-art method based on large-scale pre-trained vision and language models, SignCLachieves better performance with only 35\\% of its parameters. We will release our code and model to facilitate further research.", "keywords": "Gloss-free Sign Language Translation; Representation Density; Performance Drop; Contrastive Learning", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/9793305142c77858d72a0d2e15496392029d34dc.zip", "author": "Jinhui Ye;Xing Wang;Wenxiang Jiao;Junwei Liang;Hui Xiong", "authorids": "~Jinhui_Ye1;~Xing_Wang1;~Wenxiang_Jiao1;~Junwei_Liang1;~Hui_Xiong1", "gender": "M;M;M;M;M", "homepage": "https://jhuiye.com;http://xingwang4nlp.com/;https://wxjiao.github.io/;https://junweiliang.me/;https://www.hkust-gz.edu.cn/people/hui-xiong/", "dblp": "254/8172;02/3674-7;239/4883;62/10704-1;262/1686-1.html", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;6AqRKa0AAAAJ;CvtODukAAAAJ;bMedjfUAAAAJ;cVDF1tkAAAAJ", "orcid": ";0000-0002-0737-9653;;0000-0003-2219-5569;0000-0001-6016-6465", "linkedin": ";;;junweiliang/;", "or_profile": "~Jinhui_Ye1;~Xing_Wang1;~Wenxiang_Jiao1;~Junwei_Liang1;~Hui_Xiong1", "aff": "Hong Kong University of Science and Technology(Guangzhou));Tencent AI Lab;Tencent AI Lab;Hong Kong University of Science and Technology;Hong Kong University of Science and Technology (Guangzhou)", "aff_domain": "hkust.edu;tencent.com;tencent.com;ust.hk;hkust.edu", "position": "MS student;Researcher;Researcher;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nye2024improving,\ntitle={Improving Gloss-free Sign Language Translation by Reducing Representation Density},\nauthor={Jinhui Ye and Xing Wang and Wenxiang Jiao and Junwei Liang and Hui Xiong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=FtzLbGoHW2}\n}", "github": "", "reviewers": "rKGZ;9u2m;bAiC;UuUf;rTJg", "pdf_size": 1901534, "rating": "4;5;5;6;6", "confidence": "5;4;4;4;4", "soundness": "2;3;3;3;3", "novelty": "1;2;2;3;3", "presentation": "2;3;3;3;3", "wc_summary": "57;73;120;89;113", "wc_strengths": "36;114;42;97;88", "wc_weaknesses": "142;166;193;129;151", "wc_questions": "152;326;91;33;2", "wc_limitations": "10;6;4;8;1", "wc_review": "397;685;450;356;355", "wc_reply_reviewers": "604;139;0;0;0", "wc_reply_authors": "950;44;155;0;129", "reply_reviewers": "3;1;0;0;0", "reply_authors": "5;2;2;1;2", "rating_avg": [ 5.2, 0.7483314773547882 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.2, 0.7483314773547882 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 90.4, 23.69472515139182 ], "wc_strengths_avg": [ 75.4, 30.92959747555729 ], "wc_weaknesses_avg": [ 156.2, 21.994544778194435 ], "wc_questions_avg": [ 120.8, 114.70902318475211 ], "wc_limitations_avg": [ 5.8, 3.1240998703626617 ], "wc_review_avg": [ 448.6, 123.1951297738673 ], "wc_reply_reviewers_avg": [ 148.6, 233.9774348094277 ], "wc_reply_authors_avg": [ 255.6, 351.69452654256645 ], "reply_reviewers_avg": [ 0.8, 1.1661903789690604 ], "reply_authors_avg": [ 2.4, 1.3564659966250538 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8017837257372733, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15757329090917034024&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "hkust.edu;tencent.com;tencent.com;ust.hk;hkust.edu", "author_num": 5, "aff_unique_index": "0;1;1;0;0", "aff_unique_norm": "Hong Kong University of Science and Technology;Tencent", "aff_unique_dep": ";Tencent AI Lab", "aff_unique_url": "https://www.ust.hk;https://ai.tencent.com", "aff_unique_abbr": "HKUST;Tencent AI Lab", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "The Power of Extrapolation in Federated Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95940", "id": "FuTfZK7PK3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=FuTfZK7PK3", "openreview": "https://openreview.net/forum?id=FuTfZK7PK3", "poster": "/media/PosterPDFs/NeurIPS%202024/95940.png?t=1729068972.161373", "project": "", "author_site": "Hanmin Li, Kirill Acharya, Peter Richtarik", "tldr": "", "abstract": "We propose and study several server-extrapolation strategies for enhancing the theoretical and empirical convergence properties of the popular federated learning optimizer FedProx [Li et al., 2020]. While it has long been known that some form of extrapolation can help in the practice of FL, only a handful of works provide any theoretical guarantees. The phenomenon seems elusive, and our current theoretical understanding remains severely incomplete. In our work, we focus on smooth convex or strongly convex problems in the interpolation regime. In particular, we propose Extrapolated FedProx (FedExProx), and study three extrapolation strategies: a constant strategy (depending on various smoothness parameters and the number of participating devices), and two smoothness-adaptive strategies; one based on the notion of gradient diversity (FedExProx-GraDS), and the other one based on the stochastic Polyak stepsize (FedExProx-StoPS). Our theory is corroborated with carefully constructed numerical experiments.", "keywords": "Federated Learning;Optimization", "primary_area": "optimization", "supplementary_material": "/attachment/4efe172c482caa125236bdecd38788082bda9f32.zip", "author": "Hanmin Li;Kirill Acharya;Peter Richt\u00e1rik", "authorids": "~Hanmin_Li1;~Kirill_Acharya1;~Peter_Richt\u00e1rik1", "gender": "M;;M", "homepage": "https://cemse.kaust.edu.sa/ai/people/person/hanmin-li;;https://richtarik.org", "dblp": "340/3604;;62/8001", "google_scholar": "https://scholar.google.com/citations?hl=en;;https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-2587-640X;;0000-0003-4380-5848", "linkedin": "hanmin-li-034b8b245/;kirillacharya/;richtarik/", "or_profile": "~Hanmin_Li1;~Kirill_Acharya1;~Peter_Richtarik1", "aff": "King Abdullah University of Science and Technology;Moscow Institute of Physics and Technology;King Abdullah University of Science and Technology (KAUST)", "aff_domain": "kaust.edu.sa;phystech.edu;kaust.edu.sa", "position": "PhD student;Undergrad student;Full Professor", "bibtex": "@inproceedings{\nli2024the,\ntitle={The Power of Extrapolation in Federated Learning},\nauthor={Hanmin Li and Kirill Acharya and Peter Richt{\\'a}rik},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=FuTfZK7PK3}\n}", "github": "", "reviewers": "xABN;HgaS;eQ2L", "pdf_size": 4854010, "rating": "5;6;6", "confidence": "4;4;5", "soundness": "2;3;3", "novelty": "2;3;2", "presentation": "2;3;3", "wc_summary": "68;83;88", "wc_strengths": "58;48;45", "wc_weaknesses": "154;299;80", "wc_questions": "98;1;93", "wc_limitations": "13;1;1", "wc_review": "391;432;307", "wc_reply_reviewers": "141;163;53", "wc_reply_authors": "228;218;2", "reply_reviewers": "2;1;1", "reply_authors": "5;2;2", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 79.66666666666667, 8.498365855987975 ], "wc_strengths_avg": [ 50.333333333333336, 5.557777333511022 ], "wc_weaknesses_avg": [ 177.66666666666666, 90.95908726210678 ], "wc_questions_avg": [ 64.0, 44.594469014292194 ], "wc_limitations_avg": [ 5.0, 5.656854249492381 ], "wc_review_avg": [ 376.6666666666667, 52.02777036246017 ], "wc_reply_reviewers_avg": [ 119.0, 47.525431788324305 ], "wc_reply_authors_avg": [ 149.33333333333334, 104.26035786540453 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 3.0, 1.4142135623730951 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8742987381810873578&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "kaust.edu.sa;phystech.edu;kaust.edu.sa", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "King Abdullah University of Science and Technology;Moscow Institute of Physics and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.kast.kau.edu.sa;https://www.mipt.ru/en", "aff_unique_abbr": "KAUST;MIPT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Saudi Arabia;Russian Federation" }, { "title": "Scalable Neural Network Verification with Branch-and-bound Inferred Cutting Planes", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95939", "id": "FwhM1Zpyft", "proceeding": "", "pdf": "https://openreview.net/pdf?id=FwhM1Zpyft", "openreview": "https://openreview.net/forum?id=FwhM1Zpyft", "poster": "", "project": "", "author_site": "Duo Zhou, Christopher Brix, Grani A. Hanasusanto, Huan Zhang", "tldr": "", "abstract": "Recently, cutting-plane methods such as GCP-CROWN have been explored to enhance neural network verifiers and made significant advancements. However, GCP-CROWN currently relies on ${\\it generic}$ cutting planes (\"cuts\") generated from external mixed integer programming (MIP) solvers. Due to the poor scalability of MIP solvers, large neural networks cannot benefit from these cutting planes. In this paper, we exploit the structure of the neural network verification problem to generate efficient and scalable cutting planes ${\\it specific}$ to this problem setting. We propose a novel approach, Branch-and-bound Inferred Cuts with COnstraint Strengthening (BICCOS), that leverages the logical relationships of neurons within verified subproblems in the branch-and-bound search tree, and we introduce cuts that preclude these relationships in other subproblems. We develop a mechanism that assigns influence scores to neurons in each path to allow the strengthening of these cuts. Furthermore, we design a multi-tree search technique to identify more cuts, effectively narrowing the search space and accelerating the BaB algorithm. Our results demonstrate that BICCOS can generate hundreds of useful cuts during the branch-and-bound process and consistently increase the number of verifiable instances compared to other state-of-the-art neural network verifiers on a wide range of benchmarks, including large networks that previous cutting plane methods could not scale to.", "keywords": "Neuron Network Verification; AI Safety; Robustness; Formal Methods", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/cda22ebc5d8d692288129461489bcd5bae2daa64.zip", "author": "Duo Zhou;Christopher Brix;Grani A. Hanasusanto;Huan Zhang", "authorids": "~Duo_Zhou1;~Christopher_Brix1;~Grani_A._Hanasusanto1;~Huan_Zhang1", "gender": "M;M;;M", "homepage": "https://www.duo-zhou.com;https://christopher-brix.de/;http://grani.hanasusanto.com/;http://huan-zhang.com", "dblp": ";228/5443;;23/1797-1.html", "google_scholar": "QnBzRsIAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;LTa3GzEAAAAJ", "orcid": "0000-0003-2673-4451;;;", "linkedin": ";christopher-brix;;", "or_profile": "~Duo_Zhou1;~Christopher_Brix1;~Grani_A._Hanasusanto1;~Huan_Zhang1", "aff": "University of Illinois, Urbana Champaign;RWTH Aachen University, Rheinisch Westf\u00e4lische Technische Hochschule Aachen;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign", "aff_domain": "illinois.edu;cs.rwth-aachen.de;illinois.edu;uiuc.edu", "position": "PhD student;PhD student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhou2024scalable,\ntitle={Scalable Neural Network Verification with Branch-and-bound Inferred Cutting Planes},\nauthor={Duo Zhou and Christopher Brix and Grani A. Hanasusanto and Huan Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=FwhM1Zpyft}\n}", "github": "", "reviewers": "vA2E;iWFh;1a6g;SS1K", "pdf_size": 2116870, "rating": "4;6;6;7", "confidence": "4;5;5;4", "soundness": "3;3;2;4", "novelty": "2;3;2;3", "presentation": "2;2;1;4", "wc_summary": "236;111;94;110", "wc_strengths": "50;183;62;145", "wc_weaknesses": "766;731;432;67", "wc_questions": "240;168;131;730", "wc_limitations": "4;10;7;14", "wc_review": "1296;1203;726;1066", "wc_reply_reviewers": "72;368;268;495", "wc_reply_authors": "83;829;1128;86", "reply_reviewers": "1;3;1;1", "reply_authors": "2;3;3;2", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 1.0897247358851685 ], "wc_summary_avg": [ 137.75, 57.12431618846741 ], "wc_strengths_avg": [ 110.0, 55.80770556114989 ], "wc_weaknesses_avg": [ 499.0, 281.16987747623324 ], "wc_questions_avg": [ 317.25, 241.50297617213747 ], "wc_limitations_avg": [ 8.75, 3.6996621467371855 ], "wc_review_avg": [ 1072.75, 216.26762933920554 ], "wc_reply_reviewers_avg": [ 300.75, 154.6405105397677 ], "wc_reply_authors_avg": [ 531.5, 459.33130744594365 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.2294157338705618, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12908784612789743276&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 6, "email": "illinois.edu;cs.rwth-aachen.de;illinois.edu;uiuc.edu", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Illinois Urbana-Champaign;RWTH Aachen University", "aff_unique_dep": ";", "aff_unique_url": "https://illinois.edu;https://www.rwth-aachen.de", "aff_unique_abbr": "UIUC;RWTH", "aff_campus_unique_index": "0;1;0;0", "aff_campus_unique": "Urbana-Champaign;Aachen", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;Germany" }, { "title": "Neural Network Reparametrization for Accelerated Optimization in Molecular Simulations", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95938", "id": "FwxOHl0BEl", "proceeding": "", "pdf": "https://openreview.net/pdf?id=FwxOHl0BEl", "openreview": "https://openreview.net/forum?id=FwxOHl0BEl", "poster": "", "project": "", "author_site": "Nima Dehmamy, Csaba Both, Jeet Mohapatra, Subhro Das, Tommi Jaakkola", "tldr": "", "abstract": "We propose a novel approach to molecular simulations using neural network reparametrization, which offers a flexible alternative to traditional coarse-graining methods. \nUnlike conventional techniques that strictly reduce degrees of freedom, the complexity of the system can be adjusted in our model, sometimes increasing it to simplify the optimization process. \nOur approach also maintains continuous access to fine-grained modes and eliminates the need for force-matching, enhancing both the efficiency and accuracy of energy minimization.\nImportantly, our framework allows for the use of potentially arbitrary neural networks (e.g., Graph Neural Networks (GNN)) to perform the reparametrization, incorporating CG modes as needed. \nIn fact, our experiments using very weak molecular forces (Lennard-Jones potential) the GNN-based model is the sole model to find the correct configuration. \nSimilarly, in protein-folding scenarios, our GNN-based CG method consistently outperforms traditional optimization methods. \nIt not only recovers the target structures more accurately but also achieves faster convergence to the deepest energy states.\nThis work demonstrates significant advancements in molecular simulations by optimizing energy minimization and convergence speeds, offering a new, efficient framework for simulating complex molecular systems.", "keywords": "coarse-graining;molecular dynamics;protein-folding;repametrization;hessian;graph neural networks", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "/attachment/d860befb1b03b58d0caff71b941ab41fc2635a78.zip", "author": "Nima Dehmamy;Csaba Both;Jeet Mohapatra;Subhro Das;Tommi Jaakkola", "authorids": "~Nima_Dehmamy1;~Csaba_Both1;~Jeet_Mohapatra1;~Subhro_Das1;~Tommi_S._Jaakkola1", "gender": "M;;M;;", "homepage": ";https://www.networkscienceinstitute.org/people/csaba-both;;;", "dblp": "198/1338;;210/2304;;", "google_scholar": "gvHpUtgAAAAJ;N1Ei6bcAAAAJ;;;", "orcid": "0000-0003-1617-5502;;;;", "linkedin": "nima-dehmamy-57770a4a/;;;;", "or_profile": "~Nima_Dehmamy1;~Csaba_Both1;~Jeet_Mohapatra1;~Subhro_Das1;~Tommi_S._Jaakkola1", "aff": "International Business Machines;Northeastern University;;;", "aff_domain": "ibm.com;northeastern.edu;;;", "position": "Researcher;PhD student;;;", "bibtex": "@inproceedings{\ndehmamy2024neural,\ntitle={Neural Network Reparametrization for Accelerated Optimization in Molecular Simulations},\nauthor={Nima Dehmamy and Csaba Both and Jeet Mohapatra and Subhro Das and Tommi Jaakkola},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=FwxOHl0BEl}\n}", "github": "", "reviewers": "rxS5;qFP8;ti8n;fqNB", "pdf_size": 8741698, "rating": "5;6;6;6", "confidence": "3;4;3;3", "soundness": "2;3;3;3", "novelty": "3;3;2;3", "presentation": "1;2;2;2", "wc_summary": "65;105;65;163", "wc_strengths": "54;135;54;60", "wc_weaknesses": "131;364;100;263", "wc_questions": "58;169;64;134", "wc_limitations": "2;22;40;1", "wc_review": "310;795;323;621", "wc_reply_reviewers": "13;34;128;46", "wc_reply_authors": "215;98;182;59", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_avg": [ 99.5, 40.13415004706092 ], "wc_strengths_avg": [ 75.75, 34.29559009552103 ], "wc_weaknesses_avg": [ 214.5, 105.81233387464809 ], "wc_questions_avg": [ 106.25, 46.959423974320636 ], "wc_limitations_avg": [ 16.25, 16.068213964221414 ], "wc_review_avg": [ 512.25, 205.2405600752444 ], "wc_reply_reviewers_avg": [ 55.25, 43.631267458097064 ], "wc_reply_authors_avg": [ 138.5, 62.659795722616266 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:J1Mql9AKvP0J:scholar.google.com/&scioq=Neural+Network+Reparametrization+for+Accelerated+Optimization+in+Molecular+Simulations&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "ibm.com;northeastern.edu;;;", "author_num": 5, "aff_unique_index": "0;1", "aff_unique_norm": "International Business Machines Corporation;Northeastern University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ibm.com;https://www.northeastern.edu", "aff_unique_abbr": "IBM;NEU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Learning Plaintext-Ciphertext Cryptographic Problems via ANF-based SAT Instance Representation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95937", "id": "FzwAQJK4CG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=FzwAQJK4CG", "openreview": "https://openreview.net/forum?id=FzwAQJK4CG", "poster": "/media/PosterPDFs/NeurIPS%202024/95937.png?t=1731747153.5453365", "project": "", "author_site": "Xinhao Zheng, Yang Li, Cunxin Fan, Huaijin Wu, Xinhao Song, Junchi Yan", "tldr": "", "abstract": "Cryptographic problems, operating within binary variable spaces, can be routinely transformed into Boolean Satisfiability (SAT) problems regarding specific cryptographic conditions like plaintext-ciphertext matching. With the fast development of learning for discrete data, this SAT representation also facilitates the utilization of machine-learning approaches with the hope of automatically capturing patterns and strategies inherent in cryptographic structures in a data-driven manner. Existing neural SAT solvers consistently adopt conjunctive normal form (CNF) for instance representation, which in the cryptographic context can lead to scale explosion and a loss of high-level semantics. In particular, extensively used XOR operations in cryptographic problems can incur an exponential number of clauses. In this paper, we propose a graph structure based on Arithmetic Normal Form (ANF) to efficiently handle the XOR operation bottleneck. Additionally, we design an encoding method for AND operations in these ANF-based graphs, demonstrating improved efficiency over alternative general graph forms for SAT. We then propose CryptoANFNet, a graph learning approach that trains a classifier based on a message-passing scheme to predict plaintext-ciphertext satisfiability. \nUsing ANF-based SAT instances, CryptoANFNet demonstrates superior scalability and can naturally capture higher-order operational information. Empirically, CryptoANFNet achieves a 50x speedup over heuristic solvers and outperforms SOTA learning-based SAT solver NeuroSAT, with 96\\% vs. 91\\% accuracy on small-scale and 72\\% vs. 55\\% on large-scale datasets from real encryption algorithms. We also introduce a key-solving algorithm that simplifies ANF-based SAT instances from plaintext and ciphertext, enhancing key decryption accuracy from 76.5\\% to 82\\% and from 72\\% to 75\\% for datasets generated from two real encryption algorithms.", "keywords": "Boolean Satisfiability Problem;Cryptography", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Xinhao Zheng;Yang Li;Cunxin Fan;Huaijin Wu;Xinhao Song;Junchi Yan", "authorids": "~Xinhao_Zheng2;~Yang_Li32;~Cunxin_Fan1;~Huaijin_Wu1;~Xinhao_Song1;~Junchi_Yan2", "gender": "M;M;M;F;M;M", "homepage": "https://github.com/void-zxh;https://yangco-le.github.io;https://alfayoung.github.io/;https://github.com/wuhuaijin;https://github.com/sxh4396;http://thinklab.sjtu.edu.cn/", "dblp": ";;;314/5447;;60/7949.html", "google_scholar": ";ecE0xDIAAAAJ;;;;ga230VoAAAAJ", "orcid": ";0000-0002-5249-3471;;0009-0005-2626-3460;;0000-0001-9639-7679", "linkedin": ";;;;;", "or_profile": "~Xinhao_Zheng2;~Yang_Li32;~Cunxin_Fan1;~Huaijin_Wu1;~Xinhao_Song1;~Junchi_Yan1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "position": "MS student;PhD student;Undergrad student;PhD student;Undergrad student;Full Professor", "bibtex": "@inproceedings{\nzheng2024learning,\ntitle={Learning Plaintext-Ciphertext Cryptographic Problems via {ANF}-based {SAT} Instance Representation},\nauthor={Xinhao Zheng and Yang Li and Cunxin Fan and Huaijin Wu and Xinhao Song and Junchi Yan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=FzwAQJK4CG}\n}", "github": "", "reviewers": "ZcPU;6xjG;imRj;uZGu", "pdf_size": 789000, "rating": "5;5;5;6", "confidence": "4;2;1;4", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "3;3;3;3", "wc_summary": "81;101;49;83", "wc_strengths": "91;38;40;112", "wc_weaknesses": "121;51;113;29", "wc_questions": "63;110;1;46", "wc_limitations": "5;3;6;1", "wc_review": "361;303;209;271", "wc_reply_reviewers": "116;0;0;0", "wc_reply_authors": "1107;1254;1688;943", "reply_reviewers": "1;0;0;0", "reply_authors": "5;4;5;5", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 2.75, 1.299038105676658 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 78.5, 18.728320800328042 ], "wc_strengths_avg": [ 70.25, 32.12767498590584 ], "wc_weaknesses_avg": [ 78.5, 39.37956322764386 ], "wc_questions_avg": [ 55.0, 39.00640972968417 ], "wc_limitations_avg": [ 3.75, 1.920286436967152 ], "wc_review_avg": [ 286.0, 54.92722457943783 ], "wc_reply_reviewers_avg": [ 29.0, 50.22947341949744 ], "wc_reply_authors_avg": [ 1248.0, 276.83117598998854 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 4.75, 0.4330127018922193 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5555555555555555, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1750273519467086625&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Linguistic Collapse: Neural Collapse in (Large) Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95936", "id": "G0LfcMiRkc", "proceeding": "", "pdf": "https://openreview.net/pdf?id=G0LfcMiRkc", "openreview": "https://openreview.net/forum?id=G0LfcMiRkc", "poster": "", "project": "", "author_site": "Robert Wu, Vardan Papyan", "tldr": "", "abstract": "Neural collapse ($\\mathcal{NC}$) is a phenomenon observed in classification tasks where top-layer representations collapse into their class means, which become equinorm, equiangular and aligned with the classifiers.\nThese behaviors -- associated with generalization and robustness -- would manifest under specific conditions: models are trained towards zero loss, with noise-free labels belonging to balanced classes, which do not outnumber the model's hidden dimension.\nRecent studies have explored $\\mathcal{NC}$ in the absence of one or more of these conditions to extend and capitalize on the associated benefits of ideal geometries.\nLanguage modeling presents a curious frontier, as \\textit{training by token prediction} constitutes a classification task where none of the conditions exist: the vocabulary is imbalanced and exceeds the embedding dimension; different tokens might correspond to similar contextual embeddings; and large language models (LLMs) in particular are typically only trained for a few epochs.\nThis paper empirically investigates the impact of scaling the architectures and training of causal language models (CLMs) on their progression towards $\\mathcal{NC}$.\nWe find that $\\mathcal{NC}$ properties that develop with scale (and regularization) are linked to generalization.\nMoreover, there is evidence of some relationship between $\\mathcal{NC}$ and generalization independent of scale.\nOur work thereby underscores the generality of $\\mathcal{NC}$ as it extends to the novel and more challenging setting of language modeling.\nDownstream, we seek to inspire further research on the phenomenon to deepen our understanding of LLMs -- and neural networks at large -- and improve existing architectures based on $\\mathcal{NC}$-related properties.\nOur code is hosted on GitHub: [`https://github.com/rhubarbwu/linguistic-collapse`](https://github.com/rhubarbwu/linguistic-collapse).", "keywords": "neural collapse;uniformity;large language models;LLM;GPT;language modeling;geometry;unconstrained features;generative model;transformer;attention;causal;autoregressive", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Robert Wu;Vardan Papyan", "authorids": "~Robert_Wu1;~Vardan_Papyan1", "gender": "M;M", "homepage": "https://www.cs.toronto.edu/~rupert;https://sites.google.com/view/vardan-papyan", "dblp": ";173/9783", "google_scholar": "X0M3q_sAAAAJ;https://scholar.google.co.il/citations?user=VrE-Gd4AAAAJ", "orcid": "0009-0005-6465-0154;", "linkedin": "wu-robert/;", "or_profile": "~Robert_Wu1;~Vardan_Papyan1", "aff": "Department of Computer Science, University of Toronto;University of Toronto", "aff_domain": "cs.toronto.edu;toronto.edu", "position": "MS student;Assistant Professor", "bibtex": "@inproceedings{\nwu2024linguistic,\ntitle={Linguistic Collapse: Neural Collapse in (Large) Language Models},\nauthor={Robert Wu and Vardan Papyan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=G0LfcMiRkc}\n}", "github": "", "reviewers": "3JhA;XbAS;NZtr;Nr3s", "pdf_size": 1994196, "rating": "5;5;7;8", "confidence": "4;4;4;3", "soundness": "3;3;4;4", "novelty": "2;2;3;4", "presentation": "3;2;4;4", "wc_summary": "223;148;65;107", "wc_strengths": "45;79;64;76", "wc_weaknesses": "59;298;171;25", "wc_questions": "269;68;2;36", "wc_limitations": "1;1;1;7", "wc_review": "597;594;303;251", "wc_reply_reviewers": "135;81;9;46", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 135.75, 58.298263267442195 ], "wc_strengths_avg": [ 66.0, 13.360389215887388 ], "wc_weaknesses_avg": [ 138.25, 106.88632980882073 ], "wc_questions_avg": [ 93.75, 103.83731265783028 ], "wc_limitations_avg": [ 2.5, 2.598076211353316 ], "wc_review_avg": [ 436.25, 160.31122075512994 ], "wc_reply_reviewers_avg": [ 67.75, 46.42938186105863 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.7777777777777777, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8279278177994426934&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "cs.toronto.edu;toronto.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Toronto", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.utoronto.ca", "aff_unique_abbr": "U of T", "aff_campus_unique_index": "0", "aff_campus_unique": "Toronto;", "aff_country_unique_index": "0;0", "aff_country_unique": "Canada" }, { "title": "Diffusion of Thought: Chain-of-Thought Reasoning in Diffusion Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95935", "id": "G0v0TxX01N", "proceeding": "", "pdf": "https://openreview.net/pdf?id=G0v0TxX01N", "openreview": "https://openreview.net/forum?id=G0v0TxX01N", "poster": "/media/PosterPDFs/NeurIPS%202024/95935.png?t=1730806584.4839995", "project": "", "author_site": "Jiacheng Ye, Shansan Gong, Liheng Chen, Lin Zheng, Jiahui Gao, Han Shi, Chuan Wu, Xin Jiang, Zhenguo Li, Wei Bi, Lingpeng Kong", "tldr": "", "abstract": "Recently, diffusion models have garnered significant interest in the field of text processing due to their many potential advantages compared to conventional autoregressive models.\nIn this work, we propose Diffusion-of-Thought (DoT), a novel approach that integrates diffusion models with Chain-of-Thought, a well-established technique for improving the reasoning ability of autoregressive language models. In contrast to autoregressive language models that make decisions in a left-to-right, token-by-token manner, DoT allows reasoning steps to diffuse over time through a diffusion language model and offers greater flexibility in trading-off computation for reasoning performance. Our experimental results demonstrate the effectiveness of DoT in multi-digit multiplication, boolean logic, and grade school math problems. In addition to that, DoT showcases promising self-correction abilities and benefits from existing reasoning-enhancing techniques like self-consistency decoding. Our findings contribute to the understanding and development of reasoning with diffusion language models.", "keywords": "text diffusion model;mathematical reasoning", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/f2f9ebed21db41b02698eab658ae77a3875bd5e7.zip", "author": "Jiacheng Ye;Shansan Gong;Liheng Chen;Lin Zheng;Jiahui Gao;Han Shi;Chuan Wu;Xin Jiang;Zhenguo Li;Wei Bi;Lingpeng Kong", "authorids": "~Jiacheng_Ye2;~Shansan_Gong1;~Liheng_Chen2;~Lin_Zheng1;~Jiahui_Gao2;~Han_Shi1;~Chuan_Wu1;~Xin_Jiang1;~Zhenguo_Li1;~Wei_Bi1;~Lingpeng_Kong1", "gender": "M;F;M;M;;M;;M;M;F;M", "homepage": "https://jiacheng-ye.github.io/;https://summmeer.github.io/;;https://lzhengisme.github.io/;;https://han-shi.github.io/;https://i.cs.hku.hk/~cwu/;;http://www.ee.columbia.edu/~zgli/;https://scholar.google.com.hk/citations?hl=en&user=aSJcgQMAAAAJ&view_op=list_works&sortby=pubdate#d=gsc_md_iad&u=%2Fcitations%3Fview_op%3Dimport_lookup%26hl%3Den%26imq%3DWei%2BBi%26json%3D%26btnA%3D1;https://ikekonglp.github.io/", "dblp": ";320/4745;;;;;34/3772-1;42/4142-2;23/6479;38/1163;144/7656", "google_scholar": "gh0CyxgAAAAJ;F86VNoMAAAAJ;;3NXH0t8AAAAJ;;https://scholar.google.com.hk/citations?user=Johp_14AAAAJ;mY7MWXMAAAAJ;DUfcez0AAAAJ;XboZC1AAAAAJ;https://scholar.google.com.hk/citations?hl=en;f1hBi5wAAAAJ", "orcid": ";0000-0001-5028-2323;;;;;;0000-0002-9117-8247;;0000-0001-8457-0630;", "linkedin": ";;liheng-chen-b579b6243/;;;;;xin-jiang-9577b76/;;;", "or_profile": "~Jiacheng_Ye2;~Shansan_Gong1;~Liheng_Chen2;~Lin_Zheng1;~Jiahui_Gao2;~Han_Shi1;~Chuan_Wu1;~Xin_Jiang1;~Zhenguo_Li1;~Wei_Bi1;~Lingpeng_Kong1", "aff": "University of Hong Kong;University of Hong Kong;The Univerisity of Hong Kong;The University of Hong Kong;;Huawei Technologies Ltd.;The University of Hong Kong;Noah\u2019s Ark Lab, Huawei Technologies;Huawei Noah's Ark Lab;Hong Kong University of Science and Technology;Department of Computer Science, The University of Hong Kong", "aff_domain": "hku.hk;hku.hk;connect.hku.hk;hku.hk;;huawei.com;hku.hk;huawei.com;huawei.com;ust.hk;cs.hku.hk", "position": "PhD student;PhD student;Undergrad student;PhD student;;Principal Researcher;Full Professor;Principal Researcher;Principal Researcher;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nye2024diffusion,\ntitle={Diffusion of Thought: Chain-of-Thought Reasoning in Diffusion Language Models},\nauthor={Jiacheng Ye and Shansan Gong and Liheng Chen and Lin Zheng and Jiahui Gao and Han Shi and Chuan Wu and Xin Jiang and Zhenguo Li and Wei Bi and Lingpeng Kong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=G0v0TxX01N}\n}", "github": "", "reviewers": "Y1KX;5GKr;E7FJ;abfr;q6Ny", "pdf_size": 1629420, "rating": "5;5;5;6;7", "confidence": "4;3;2;3;4", "soundness": "2;3;3;3;3", "novelty": "2;3;3;2;3", "presentation": "3;2;2;3;3", "wc_summary": "129;64;56;75;37", "wc_strengths": "45;28;38;128;46", "wc_weaknesses": "253;44;37;234;93", "wc_questions": "34;64;75;279;110", "wc_limitations": "33;5;14;29;7", "wc_review": "494;205;220;745;293", "wc_reply_reviewers": "40;0;45;0;219", "wc_reply_authors": "110;222;230;384;600", "reply_reviewers": "1;0;1;0;1", "reply_authors": "3;4;4;5;3", "rating_avg": [ 5.6, 0.8 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 72.2, 30.992902413294566 ], "wc_strengths_avg": [ 57.0, 36.07769393960761 ], "wc_weaknesses_avg": [ 132.2, 93.09650906451864 ], "wc_questions_avg": [ 112.4, 86.7746506763352 ], "wc_limitations_avg": [ 17.6, 11.412274094149685 ], "wc_review_avg": [ 391.4, 204.62903019855224 ], "wc_reply_reviewers_avg": [ 60.8, 81.36682370597983 ], "wc_reply_authors_avg": [ 309.2, 169.53866815567474 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 3.8, 0.7483314773547882 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.4677071733467427, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17837604724712338751&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "hku.hk;hku.hk;connect.hku.hk;hku.hk;;huawei.com;hku.hk;huawei.com;huawei.com;ust.hk;cs.hku.hk", "author_num": 11, "aff_unique_index": "0;0;0;0;1;0;1;1;2;0", "aff_unique_norm": "University of Hong Kong;Huawei;Hong Kong University of Science and Technology", "aff_unique_dep": ";Huawei Technologies;", "aff_unique_url": "https://www.hku.hk;https://www.huawei.com;https://www.ust.hk", "aff_unique_abbr": "HKU;Huawei;HKUST", "aff_campus_unique_index": "0;0;0;0;0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "AmoebaLLM: Constructing Any-Shape Large Language Models for Efficient and Instant Deployment", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95934", "id": "G0yxFmP87g", "proceeding": "", "pdf": "https://openreview.net/pdf?id=G0yxFmP87g", "openreview": "https://openreview.net/forum?id=G0yxFmP87g", "poster": "", "project": "", "author_site": "Yonggan Fu, Zhongzhi Yu, Junwei Li, Jiayi Qian, Yongan Zhang, Xiangchi Yuan, Dachuan Shi, Roman Yakunin, Yingyan (Celine) Lin", "tldr": "", "abstract": "Motivated by the transformative capabilities of large language models (LLMs) across various natural language tasks, there has been a growing demand to deploy these models effectively across diverse real-world applications and platforms. However, the challenge of efficiently deploying LLMs has become increasingly pronounced due to the varying application-specific performance requirements and the rapid evolution of computational platforms, which feature diverse resource constraints and deployment flows. These varying requirements necessitate LLMs that can adapt their structures (depth and width) for optimal efficiency across different platforms and application specifications. To address this critical gap, we propose AmoebaLLM, a novel framework designed to enable the instant derivation of LLM subnets of arbitrary shapes, which achieve the accuracy-efficiency frontier and can be extracted immediately after a one-time fine-tuning. In this way, AmoebaLLM significantly facilitates rapid deployment tailored to various platforms and applications. Specifically, AmoebaLLM integrates three innovative components: (1) a knowledge-preserving subnet selection strategy that features a dynamic-programming approach for depth shrinking and an importance-driven method for width shrinking; (2) a shape-aware mixture of LoRAs to mitigate gradient conflicts among subnets during fine-tuning; and (3) an in-place distillation scheme with loss-magnitude balancing as the fine-tuning objective. Extensive experiments validate that AmoebaLLM not only sets new standards in LLM adaptability but also successfully delivers subnets that achieve state-of-the-art trade-offs between accuracy and efficiency.", "keywords": "Efficient Large Language Models;Model Compression", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Yonggan Fu;Zhongzhi Yu;Junwei Li;Jiayi Qian;Yongan Zhang;Xiangchi Yuan;Dachuan Shi;Roman Yakunin;Yingyan Celine Lin", "authorids": "~Yonggan_Fu1;~Zhongzhi_Yu1;~Junwei_Li4;~Jiayi_Qian1;~Yongan_Zhang1;~Xiangchi_Yuan1;~Dachuan_Shi2;~Roman_Yakunin1;~Yingyan_Celine_Lin1", "gender": "M;M;M;M;M;M;M;M;F", "homepage": "https://www.yongganfu.com/;;;https://jiayi-19.github.io/;;https://xiangchi-yuan.github.io/;https://www.dachuanshi.com;https://github.com/RomanAYakunin;https://eiclab.scs.gatech.edu/", "dblp": "244/8166;198/8338;;;137/8349;292/1211;283/0549;;120/6981", "google_scholar": "https://scholar.google.com/citations?hl=en;KjvcaBQAAAAJ;;-yGPXHkAAAAJ;s3Qbrl0AAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;AR9ONz4AAAAJ;dio8IesAAAAJ", "orcid": ";;;;;;;;", "linkedin": "yonggan-fu-b211831b0;zhongzhi-yu/;junwei-li-50b176197/;;yongan-zhang-141a71136/;;;ryakunin/;yingyan-celine-lin-a281211a/", "or_profile": "~Yonggan_Fu1;~Zhongzhi_Yu1;~Junwei_Li4;~Jiayi_Qian1;~Yongan_Zhang1;~Xiangchi_Yuan1;~Dachuan_Shi2;~Roman_Yakunin1;~Yingyan_Lin1", "aff": "Georgia Institute of Technology;Nvidia Research;Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology;Brandeis University;Tsinghua University;Georgia Institute of Technology;Georgia Institute of Technology", "aff_domain": "gatech.edu;nivida.com;gatech.edu;gatech.edu;gatech.edu;brandeis.edu;tsinghua.edu.cn;gatech.edu;gatech.edu", "position": "PhD student;Research Intern;MS student;MS student;PhD student;MS student;MS student;Undergrad student;Associate Professor", "bibtex": "@inproceedings{\nfu2024amoeballm,\ntitle={Amoeba{LLM}: Constructing Any-Shape Large Language Models for Efficient and Instant Deployment},\nauthor={Yonggan Fu and Zhongzhi Yu and Junwei Li and Jiayi Qian and Yongan Zhang and Xiangchi Yuan and Dachuan Shi and Roman Yakunin and Yingyan Celine Lin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=G0yxFmP87g}\n}", "github": "", "reviewers": "U1ac;LhJH;TKvU;wNCb", "pdf_size": 931011, "rating": "5;5;5;7", "confidence": "3;4;5;3", "soundness": "3;2;3;3", "novelty": "3;3;3;3", "presentation": "3;4;3;3", "wc_summary": "65;125;41;159", "wc_strengths": "77;70;22;70", "wc_weaknesses": "186;162;196;332", "wc_questions": "2;35;5;74", "wc_limitations": "1;6;1;10", "wc_review": "331;398;265;645", "wc_reply_reviewers": "0;110;10;38", "wc_reply_authors": "61;364;84;740", "reply_reviewers": "0;1;1;1", "reply_authors": "2;4;3;4", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 97.5, 46.869499677295465 ], "wc_strengths_avg": [ 59.75, 21.98152633462927 ], "wc_weaknesses_avg": [ 219.0, 66.40030120413611 ], "wc_questions_avg": [ 29.0, 29.008619408720573 ], "wc_limitations_avg": [ 4.5, 3.774917217635375 ], "wc_review_avg": [ 409.75, 143.73130313192044 ], "wc_reply_reviewers_avg": [ 39.5, 43.02034402465885 ], "wc_reply_authors_avg": [ 312.25, 274.2593435053763 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.25, 0.82915619758885 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17787634566590874124&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "gatech.edu;nivida.com;gatech.edu;gatech.edu;gatech.edu;brandeis.edu;tsinghua.edu.cn;gatech.edu;gatech.edu", "author_num": 9, "aff_unique_index": "0;1;0;0;0;2;3;0;0", "aff_unique_norm": "Georgia Institute of Technology;NVIDIA;Brandeis University;Tsinghua University", "aff_unique_dep": ";NVIDIA Research;;", "aff_unique_url": "https://www.gatech.edu;https://www.nvidia.com/research;https://www.brandeis.edu;https://www.tsinghua.edu.cn", "aff_unique_abbr": "Georgia Tech;NVIDIA;Brandeis;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;1;0;0", "aff_country_unique": "United States;China" }, { "title": "Continuous Temporal Domain Generalization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95933", "id": "G24fOpC3JE", "proceeding": "", "pdf": "https://openreview.net/pdf?id=G24fOpC3JE", "openreview": "https://openreview.net/forum?id=G24fOpC3JE", "poster": "/media/PosterPDFs/NeurIPS%202024/95933.png?t=1733221748.9491642", "project": "", "author_site": "Zekun CAI, Guangji Bai, Renhe Jiang, Xuan Song, Liang Zhao", "tldr": "", "abstract": "Temporal Domain Generalization (TDG) addresses the challenge of training predictive models under temporally varying data distributions. Traditional TDG approaches typically focus on domain data collected at fixed, discrete time intervals, which limits their capability to capture the inherent dynamics within continuous-evolving and irregularly-observed temporal domains. To overcome this, this work formalizes the concept of Continuous Temporal Domain Generalization (CTDG), where domain data are derived from continuous times and are collected at arbitrary times. CTDG tackles critical challenges including: 1) Characterizing the continuous dynamics of both data and models, 2) Learning complex high-dimensional nonlinear dynamics, and 3) Optimizing and controlling the generalization across continuous temporal domains. To address them, we propose a Koopman operator-driven continuous temporal domain generalization (Koodos) framework. We formulate the problem within a continuous dynamic system and leverage the Koopman theory to learn the underlying dynamics; the framework is further enhanced with a comprehensive optimization strategy equipped with analysis and control driven by prior knowledge of the dynamics patterns. Extensive experiments demonstrate the effectiveness and efficiency of our approach. The code can be found at: https://github.com/Zekun-Cai/Koodos.", "keywords": "Domain Generalization;Temporal Domain Generalization;Continuous Dynamics;Koopman Operator;Concept Drift;Neural ODEs", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Zekun Cai;Guangji Bai;Renhe Jiang;Xuan Song;Liang Zhao", "authorids": "~Zekun_Cai1;~Guangji_Bai1;~Renhe_Jiang1;~Xuan_Song2;~Liang_Zhao6", "gender": "M;M;M;;M", "homepage": "https://www.linkedin.com/in/zekun-cai;https://baithebest.github.io/;https://www.renhejiang.com/;;https://cs.emory.edu/~lzhao41/", "dblp": "199/2102;286/0892;213/1173;;63/5422-2", "google_scholar": "SkvqeScAAAAJ;gBMbU28AAAAJ;Yo2lwasAAAAJ;;qnvyqtwAAAAJ", "orcid": "0000-0002-5773-1395;0000-0003-3932-2472;0000-0003-2593-4638;;0000-0002-2648-9989", "linkedin": ";https://linkedin.com/in/guangji-bai/;renhejiang/;;", "or_profile": "~Zekun_Cai1;~Guangji_Bai1;~Renhe_Jiang1;~Xuan_Song2;~Liang_Zhao6", "aff": "The University of Tokyo;Emory University;The University of Tokyo;;Emory University", "aff_domain": "u-tokyo.ac.jp;emory.edu;u-tokyo.ac.jp;;emory.edu", "position": "PhD student;PhD student;Lecturer;;Associate Professor", "bibtex": "@inproceedings{\ncai2024continuous,\ntitle={Continuous Temporal Domain Generalization},\nauthor={Zekun Cai and Guangji Bai and Renhe Jiang and Xuan Song and Liang Zhao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=G24fOpC3JE}\n}", "github": "", "reviewers": "Drn2;nh98;xaRP;2Dt9", "pdf_size": 6281236, "rating": "5;6;6;6", "confidence": "5;3;4;3", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "86;72;82;73", "wc_strengths": "40;72;38;128", "wc_weaknesses": "209;130;48;111", "wc_questions": "194;23;137;69", "wc_limitations": "12;6;1;4", "wc_review": "541;303;306;385", "wc_reply_reviewers": "23;19;19;346", "wc_reply_authors": "129;25;20;746", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 78.25, 5.931905258852336 ], "wc_strengths_avg": [ 69.5, 36.36963018783666 ], "wc_weaknesses_avg": [ 124.5, 57.45650528878345 ], "wc_questions_avg": [ 105.75, 65.12056126907999 ], "wc_limitations_avg": [ 5.75, 4.02336923485777 ], "wc_review_avg": [ 383.75, 96.55924347259563 ], "wc_reply_reviewers_avg": [ 101.75, 141.02725800355051 ], "wc_reply_authors_avg": [ 230.0, 301.0739111912555 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17217079121895460213&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "u-tokyo.ac.jp;emory.edu;u-tokyo.ac.jp;;emory.edu", "author_num": 5, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "University of Tokyo;Emory University", "aff_unique_dep": ";", "aff_unique_url": "https://www.u-tokyo.ac.jp;https://www.emory.edu", "aff_unique_abbr": "UTokyo;Emory", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "Japan;United States" }, { "title": "Achievable distributional robustness when the robust risk is only partially identified", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95932", "id": "G2dYZJO4BE", "proceeding": "", "pdf": "https://openreview.net/pdf?id=G2dYZJO4BE", "openreview": "https://openreview.net/forum?id=G2dYZJO4BE", "poster": "/media/PosterPDFs/NeurIPS%202024/95932.png?t=1733946616.7985845", "project": "", "author_site": "Julia Kostin, Nicola Gnecco, Fanny Yang", "tldr": "", "abstract": "In safety-critical applications, machine learning models should generalize well under worst-case distribution shifts, that is, have a small robust risk. Invariance-based algorithms can provably take advantage of structural assumptions on the shifts when the training distributions are heterogeneous enough to identify the robust risk. However, in practice, such identifiability conditions are rarely satisfied \u2013 a scenario so far underexplored in the theoretical literature. In this paper, we aim to fill the gap and propose to study the more general setting of partially identifiable robustness. In particular, we define a new risk measure, the identifiable robust risk, and its corresponding (population) minimax quantity that is an algorithm-independent measure for the best achievable robustness under partial identifiability. We introduce these concepts broadly, and then study them within the framework of linear structural causal models for concreteness of the presentation. We use the introduced minimax quantity to show how previous approaches provably achieve suboptimal robustness in the partially identifiable case. We confirm our findings through empirical simulations and real-world experiments and demonstrate how the test error of existing robustness methods grows increasingly suboptimal as the proportion of previously unseen test directions increases.", "keywords": "distributional robustness;domain generalization;causal inference;partial identification", "primary_area": "causal_inference", "supplementary_material": "", "author": "Julia Kostin;Nicola Gnecco;Fanny Yang", "authorids": "~Julia_Kostin1;~Nicola_Gnecco1;~Fanny_Yang1", "gender": "F;M;", "homepage": "https://juliakostin.github.io;https://www.ngnecco.com/;http://www.fanny-yang.de", "dblp": ";329/0472;126/4852", "google_scholar": "igqc1B4AAAAJ;44HESVMAAAAJ;BfDKicQAAAAJ", "orcid": ";0000-0002-0044-5208;", "linkedin": "julia-kostin-math/;nicola-gnecco/;", "or_profile": "~Julia_Kostin1;~Nicola_Gnecco1;~Fanny_Yang1", "aff": "ETHZ - ETH Zurich;University of California, Berkeley;Swiss Federal Institute of Technology", "aff_domain": "ethz.ch;berkeley.edu;ethz.ch", "position": "PhD student;Postdoc;Professor", "bibtex": "@inproceedings{\nkostin2024achievable,\ntitle={Achievable distributional robustness when the robust risk is only partially identified},\nauthor={Julia Kostin and Nicola Gnecco and Fanny Yang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=G2dYZJO4BE}\n}", "github": "", "reviewers": "PdW8;2y9v;faJC;iX3g", "pdf_size": 1107791, "rating": "6;7;7;7", "confidence": "4;1;3;4", "soundness": "4;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "328;70;101;90", "wc_strengths": "223;27;80;26", "wc_weaknesses": "805;40;98;82", "wc_questions": "196;16;36;194", "wc_limitations": "6;1;16;18", "wc_review": "1558;154;331;410", "wc_reply_reviewers": "114;8;65;100", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.0, 1.224744871391589 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 147.25, 104.946117126838 ], "wc_strengths_avg": [ 89.0, 80.38967595406764 ], "wc_weaknesses_avg": [ 256.25, 317.52824677499166 ], "wc_questions_avg": [ 110.5, 84.79829007710002 ], "wc_limitations_avg": [ 10.25, 7.013380069552769 ], "wc_review_avg": [ 613.25, 553.2718025527779 ], "wc_reply_reviewers_avg": [ 71.75, 40.90461465409496 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.4714045207910316, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:RW4s8IUB3m0J:scholar.google.com/&scioq=Achievable+distributional+robustness+when+the+robust+risk+is+only+partially+identified&hl=en&as_sdt=0,44", "gs_version_total": 6, "email": "ethz.ch;berkeley.edu;ethz.ch", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "ETH Zurich;University of California, Berkeley;Swiss Federal Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ethz.ch;https://www.berkeley.edu;https://www.ethz.ch", "aff_unique_abbr": "ETHZ;UC Berkeley;ETH Zurich", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Switzerland;United States" }, { "title": "Hybrid Generative AI for De Novo Design of Co-Crystals with Enhanced Tabletability", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95931", "id": "G4vFNmraxj", "proceeding": "", "pdf": "https://openreview.net/pdf?id=G4vFNmraxj", "openreview": "https://openreview.net/forum?id=G4vFNmraxj", "poster": "/media/PosterPDFs/NeurIPS%202024/95931.png?t=1730227620.9842486", "project": "", "author_site": "Nina Gubina, Andrei Dmitrenko, Gleb Solovev, Lyubov Yamshchikova, Oleg Petrov, Ivan Lebedev, Nikita Serov, Grigorii Kirgizov, Nikolay Nikitin, Vladimir Vinogradov", "tldr": "", "abstract": "Co-crystallization is an accessible way to control physicochemical characteristics of organic crystals, which finds many biomedical applications. In this work, we present Generative Method for Co-crystal Design (GEMCODE), a novel pipeline for automated co-crystal screening based on the hybridization of deep generative models and evolutionary optimization for broader exploration of the target chemical space. GEMCODE enables fast *de novo* co-crystal design with target tabletability profiles, which is crucial for the development of pharmaceuticals. With a series of experimental studies highlighting validation and discovery cases, we show that GEMCODE is effective even under realistic computational constraints. Furthermore, we explore the potential of language models in generating co-crystals. Finally, we present numerous previously unknown co-crystals predicted by GEMCODE and discuss its potential in accelerating drug development.", "keywords": "Co-crystals;Tabletability;Generative Design;Evolutionary Optimization", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Nina Gubina;Andrei Dmitrenko;Gleb Vitalevich Solovev;Lyubov Yamshchikova;Oleg Petrov;Ivan Lebedev;Nikita Serov;Grigorii Kirgizov;Nikolay Nikitin;Vladimir Vinogradov", "authorids": "~Nina_Gubina1;~Andrei_Dmitrenko1;~Gleb_Vitalevich_Solovev1;~Lyubov_Yamshchikova1;~Oleg_Petrov1;~Ivan_Lebedev1;~Nikita_Serov1;~Grigorii_Kirgizov1;~Nikolay_Nikitin1;~Vladimir_Vinogradov1", "gender": "F;M;M;F;;M;M;M;M;M", "homepage": ";;https://github.com/SoloWayG;;https://orcid.org/0009-0000-3818-064X;;;;;", "dblp": ";;;;;;379/6157;;220/9846;", "google_scholar": "https://scholar.google.ru/citations?user=avr_srsAAAAJ;;https://scholar.google.com/citations?view_op=list_works;;;DnxDAJkAAAAJ;bFOt1bsAAAAJ;;eQBTGccAAAAJ;JO7dXjkAAAAJ", "orcid": "0000-0003-3299-2924;;0009-0005-5479-2482;;;0000-0003-3017-4186;;0000-0001-9094-2023;0000-0002-6839-9957;", "linkedin": ";andrei-dmitrenko/;;lyubov-yamschikova-614155221/;;;;gkirgizov;nikolay-nikitin-59415920a/;", "or_profile": "~Nina_Gubina1;~Andrei_Dmitrenko1;~Gleb_Vitalevich_Solovev1;~Lyubov_Yamshchikova1;~Oleg_Petrov1;~Ivan_Lebedev1;~Nikita_Serov1;~Grigorii_Kirgizov1;~Nikolay_Nikitin1;~Vladimir_Vinogradov1", "aff": "ITMO University;ITMO University;ITMO;ITMO University;;Ivanovo State University of Chemistry and Technology;ITMO University;;ITMO University;ITMO University", "aff_domain": "itmo.ru;itmo.ru;itmo.ru;ifmo.ru;;isuct.ru;itmo.ru;;itmo.ru;itmo.ru", "position": "MS student;Researcher;PhD student;MS student;;PhD student;Principal Researcher;;Researcher;Full Professor", "bibtex": "@inproceedings{\ngubina2024hybrid,\ntitle={Hybrid Generative {AI} for De Novo Design of Co-Crystals with Enhanced Tabletability},\nauthor={Nina Gubina and Andrei Dmitrenko and Gleb Vitalevich Solovev and Lyubov Yamshchikova and Oleg Petrov and Ivan Lebedev and Nikita Serov and Grigorii Kirgizov and Nikolay Nikitin and Vladimir Vinogradov},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=G4vFNmraxj}\n}", "github": "", "reviewers": "XDyo;qBrF;rNY2;3KPG", "pdf_size": 3997063, "rating": "4;5;5;8", "confidence": "5;3;3;4", "soundness": "2;2;3;3", "novelty": "2;2;3;4", "presentation": "3;3;4;4", "wc_summary": "87;59;155;146", "wc_strengths": "80;66;137;122", "wc_weaknesses": "455;280;167;398", "wc_questions": "65;158;161;191", "wc_limitations": "1;10;76;240", "wc_review": "688;573;696;1097", "wc_reply_reviewers": "317;21;125;228", "wc_reply_authors": "908;0;75;232", "reply_reviewers": "1;1;1;2", "reply_authors": "3;1;2;2", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 111.75, 40.12091100660602 ], "wc_strengths_avg": [ 101.25, 29.166547618804664 ], "wc_weaknesses_avg": [ 325.0, 110.92565077564342 ], "wc_questions_avg": [ 143.75, 47.261903262564445 ], "wc_limitations_avg": [ 81.75, 95.8446007868988 ], "wc_review_avg": [ 763.5, 198.60073010943339 ], "wc_reply_reviewers_avg": [ 172.75, 110.87013799937294 ], "wc_reply_authors_avg": [ 303.75, 358.76759538732034 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.10050378152592121, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13746056560622766690&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "itmo.ru;itmo.ru;itmo.ru;ifmo.ru;;isuct.ru;itmo.ru;;itmo.ru;itmo.ru", "author_num": 10, "aff_unique_index": "0;0;0;0;1;0;0;0", "aff_unique_norm": "ITMO University;Ivanovo State University of Chemistry and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.itmo.ru;http://www.isuct.ru", "aff_unique_abbr": "ITMO;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "Russian Federation" }, { "title": "Transferability Bound Theory: Exploring Relationship between Adversarial Transferability and Flatness", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95930", "id": "G522UpazH3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=G522UpazH3", "openreview": "https://openreview.net/forum?id=G522UpazH3", "poster": "/media/PosterPDFs/NeurIPS%202024/95930.png?t=1730709096.2472272", "project": "", "author_site": "Mingyuan Fan, Xiaodan Li, Cen Chen, Wenmeng Zhou, Yaliang Li", "tldr": "", "abstract": "A prevailing belief in attack and defense community is that the higher flatness of adversarial examples enables their better cross-model transferability, leading to a growing interest in employing sharpness-aware minimization and its variants. However, the theoretical relationship between the transferability of adversarial examples and their flatness has not been well established, making the belief questionable. To bridge this gap, we embark on a theoretical investigation and, for the first time, derive a theoretical bound for the transferability of adversarial examples with few practical assumptions. Our analysis challenges this belief by demonstrating that the increased flatness of adversarial examples does not necessarily guarantee improved transferability. Moreover, building upon the theoretical analysis, we propose TPA, a Theoretically Provable Attack that optimizes a surrogate of the derived bound to craft adversarial examples. Extensive experiments across widely used benchmark datasets and various real-world applications show that TPA can craft more transferable adversarial examples compared to state-of-the-art baselines. We hope that these results can recalibrate preconceived impressions within the community and facilitate the development of stronger adversarial attack and defense mechanisms.", "keywords": "Adversarial examples; transferability; transfer-based attack; theoretical analysis", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Mingyuan Fan;Xiaodan Li;Cen Chen;Wenmeng Zhou;Yaliang Li", "authorids": "~Mingyuan_Fan1;~Xiaodan_Li1;~Cen_Chen1;~Wenmeng_Zhou1;~Yaliang_Li1", "gender": ";F;F;;M", "homepage": ";;https://sites.google.com/site/chencenpersonalwebsite/;;https://sites.google.com/site/yaliangli/", "dblp": ";126/7789;152/6215-1.html;;https://dblp.org/pers/hd/l/Li:Yaliang", "google_scholar": ";YximuHAAAAAJ;https://scholar.google.com.sg/citations?user=3Mn4S9UAAAAJ;;CCPBcdYAAAAJ", "orcid": ";;0000-0003-0325-1705;;0000-0002-4204-6096", "linkedin": ";;;;", "or_profile": "~Mingyuan_Fan1;~Xiaodan_Li1;~Cen_Chen1;~Wenmeng_Zhou1;~Yaliang_Li1", "aff": ";Alibaba Group;East China Normal University;;Alibaba Group", "aff_domain": ";alibaba-inc.com;dase.ecnu.edu.cn;;alibaba-inc.com", "position": ";Researcher;Associate Professor;;Staff Engineer", "bibtex": "@inproceedings{\nfan2024transferability,\ntitle={Transferability Bound Theory: Exploring Relationship between Adversarial Transferability and Flatness},\nauthor={Mingyuan Fan and Xiaodan Li and Cen Chen and Wenmeng Zhou and Yaliang Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=G522UpazH3}\n}", "github": "", "reviewers": "QVii;x5ig;dRSh", "pdf_size": 20364595, "rating": "6;6;7", "confidence": "3;4;3", "soundness": "2;2;3", "novelty": "3;3;3", "presentation": "2;2;3", "wc_summary": "54;75;59", "wc_strengths": "14;55;36", "wc_weaknesses": "246;49;29", "wc_questions": "4;171;150", "wc_limitations": "7;3;42", "wc_review": "325;353;316", "wc_reply_reviewers": "246;272;91", "wc_reply_authors": "1354;1023;0", "reply_reviewers": "4;2;1", "reply_authors": "10;5;1", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 62.666666666666664, 8.9566858950296 ], "wc_strengths_avg": [ 35.0, 16.753109164172084 ], "wc_weaknesses_avg": [ 108.0, 97.92173745735248 ], "wc_questions_avg": [ 108.33333333333333, 74.27127454286901 ], "wc_limitations_avg": [ 17.333333333333332, 17.518244457961217 ], "wc_review_avg": [ 331.3333333333333, 15.755069730795297 ], "wc_reply_reviewers_avg": [ 203.0, 79.9041091976293 ], "wc_reply_authors_avg": [ 792.3333333333334, 576.329959504295 ], "reply_reviewers_avg": [ 2.3333333333333335, 1.247219128924647 ], "reply_authors_avg": [ 5.333333333333333, 3.6817870057290873 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:kgllJeV4RRgJ:scholar.google.com/&scioq=Transferability+Bound+Theory:+Exploring+Relationship+between+Adversarial+Transferability+and+Flatness&hl=en&as_sdt=0,48", "gs_version_total": 3, "email": ";alibaba-inc.com;dase.ecnu.edu.cn;;alibaba-inc.com", "author_num": 5, "aff_unique_index": "0;1;0", "aff_unique_norm": "Alibaba Group;East China Normal University", "aff_unique_dep": ";", "aff_unique_url": "https://www.alibaba.com;http://www.ecnu.edu.cn", "aff_unique_abbr": "Alibaba;ECNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Where Do Large Learning Rates Lead Us?", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95929", "id": "G5lMFOtFHa", "proceeding": "", "pdf": "https://openreview.net/pdf?id=G5lMFOtFHa", "openreview": "https://openreview.net/forum?id=G5lMFOtFHa", "poster": "/media/PosterPDFs/NeurIPS%202024/95929.png?t=1733764262.5980139", "project": "", "author_site": "Ildus Sadrtdinov, Maxim Kodryan, Eduard Pokonechny, Ekaterina Lobacheva, Dmitry Vetrov", "tldr": "", "abstract": "It is generally accepted that starting neural networks training with large learning rates (LRs) improves generalization. Following a line of research devoted to understanding this effect, we conduct an empirical study in a controlled setting focusing on two questions: 1) how large an initial LR is required for obtaining optimal quality, and 2) what are the key differences between models trained with different LRs? We discover that only a narrow range of initial LRs slightly above the convergence threshold lead to optimal results after fine-tuning with a small LR or weight averaging. By studying the local geometry of reached minima, we observe that using LRs from this optimal range allows for the optimization to locate a basin that only contains high-quality minima. Additionally, we show that these initial LRs result in a sparse set of learned features, with a clear focus on those most relevant for the task. In contrast, starting training with too small LRs leads to unstable minima and attempts to learn all features simultaneously, resulting in poor generalization. Conversely, using initial LRs that are too large fails to detect a basin with good solutions and extract meaningful patterns from the data.", "keywords": "neural network training;large learning rate;generalization;loss landscape;feature learning", "primary_area": "optimization_for_deep_networks", "supplementary_material": "/attachment/172ce085d7c778f62649e72cbfda7f5c2a673d2a.zip", "author": "Ildus Sadrtdinov;Maxim Kodryan;Eduard Pokonechny;Ekaterina Lobacheva;Dmitry Vetrov", "authorids": "~Ildus_Sadrtdinov1;~Maxim_Kodryan1;~Eduard_Pokonechny1;~Ekaterina_Lobacheva1;~Dmitry_P._Vetrov1", "gender": "Not Specified;M;M;;M", "homepage": ";https://maxbourdon.github.io/;https://www.linkedin.com/in/celidos;https://tipt0p.github.io/;https://constructor.university/faculty-member/dmitry-vetrov", "dblp": "298/1173;246/0265;;176/1464;89/3348", "google_scholar": "XhqNegUAAAAJ;BGVWciMAAAAJ;;https://scholar.google.com/citations?hl=en;https://scholar.google.ru/citations?user=7HU0UoUAAAAJ", "orcid": "0009-0000-1295-6091;0000-0002-6554-5672;;;", "linkedin": ";;;ekaterina-lobacheva-164412a8/;", "or_profile": "~Ildus_Sadrtdinov1;~Maxim_Kodryan1;~Eduard_Pokonechny1;~Ekaterina_Lobacheva1;~Dmitry_P._Vetrov1", "aff": "Constructor University;Higher School of Economics;;Universit\u00e9 de Montr\u00e9al;National Research University Higher School of Economics", "aff_domain": "constuctor.university;hse.ru;;umontreal.ca;hse.ru", "position": "PhD student;Researcher;;Postdoc;Full Professor", "bibtex": "@inproceedings{\nsadrtdinov2024where,\ntitle={Where Do Large Learning Rates Lead Us?},\nauthor={Ildus Sadrtdinov and Maxim Kodryan and Eduard Pokonechny and Ekaterina Lobacheva and Dmitry Vetrov},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=G5lMFOtFHa}\n}", "github": "", "reviewers": "NvNo;vwNi;Zzfe", "pdf_size": 13905317, "rating": "5;6;7", "confidence": "4;4;3", "soundness": "2;3;2", "novelty": "2;3;3", "presentation": "2;3;4", "wc_summary": "102;77;97", "wc_strengths": "125;89;142", "wc_weaknesses": "93;178;102", "wc_questions": "90;52;121", "wc_limitations": "12;7;33", "wc_review": "422;403;495", "wc_reply_reviewers": "13;38;48", "wc_reply_authors": "31;17;18", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 92.0, 10.801234497346433 ], "wc_strengths_avg": [ 118.66666666666667, 22.095751225568733 ], "wc_weaknesses_avg": [ 124.33333333333333, 38.12552367582058 ], "wc_questions_avg": [ 87.66666666666667, 28.21740991342441 ], "wc_limitations_avg": [ 17.333333333333332, 11.264496832477201 ], "wc_review_avg": [ 440.0, 39.65686153324121 ], "wc_reply_reviewers_avg": [ 33.0, 14.719601443879744 ], "wc_reply_authors_avg": [ 22.0, 6.377042156569663 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17361134976693036523&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "constuctor.university;hse.ru;;umontreal.ca;hse.ru", "author_num": 5, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Constructor University;Higher School of Economics;Universit\u00e9 de Montr\u00e9al;National Research University Higher School of Economics", "aff_unique_dep": ";;;", "aff_unique_url": ";https://www.hse.ru;https://www.umontreal.ca;https://hse.ru", "aff_unique_abbr": ";HSE;UdeM;HSE", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1;2;1", "aff_country_unique": ";Russian Federation;Canada" }, { "title": "An effective framework for estimating individualized treatment rules", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95928", "id": "G7L65B2P0y", "proceeding": "", "pdf": "https://openreview.net/pdf?id=G7L65B2P0y", "openreview": "https://openreview.net/forum?id=G7L65B2P0y", "poster": "/media/PosterPDFs/NeurIPS%202024/95928.png?t=1731641271.279995", "project": "", "author_site": "Joowon Lee, Jared Huling, Guanhua Chen", "tldr": "", "abstract": "Estimating individualized treatment rules (ITRs) is fundamental in causal inference, particularly for precision medicine applications. Traditional ITR estimation methods rely on inverse probability weighting (IPW) to address confounding factors and $L_{1}$-penalization for simplicity and interpretability. However, IPW can introduce statistical bias without precise propensity score modeling, while $L_1$-penalization makes the objective non-smooth, leading to computational bias and requiring subgradient methods. In this paper, we propose a unified ITR estimation framework formulated as a constrained, weighted, and smooth convex optimization problem. The optimal ITR can be robustly and effectively computed by projected gradient descent. Our comprehensive theoretical analysis reveals that weights that balance the spectrum of a `weighted design matrix' improve both the optimization and likelihood landscapes, yielding improved computational and statistical estimation guarantees. In particular, this is achieved by distributional covariate balancing weights, which are model-free alternatives to IPW. Extensive simulations and applications demonstrate that our framework achieves significant gains in both robustness and effectiveness for ITR learning against existing methods.", "keywords": "Covariate Balancing;Multicategory Treatment;Precision Medicine;Projected Gradient Descent;Convergence Analysis;Constrained Optimization", "primary_area": "causal_inference", "supplementary_material": "", "author": "Joowon Lee;Jared Davis Huling;Guanhua Chen", "authorids": "~Joowon_Lee1;~Jared_Davis_Huling1;~Guanhua_Chen2", "gender": "F;;", "homepage": "https://ljw9510.github.io/joowonlee/;https://jaredhuling.org;https://biostat.wiscweb.wisc.edu/staff/chen-guanhua/", "dblp": ";292/3841.html;85/3682-2", "google_scholar": "uBtJWX8AAAAJ;2-MWWU4AAAAJ;jvFDKDwAAAAJ", "orcid": ";;0000-0002-9314-2037", "linkedin": ";;", "or_profile": "~Joowon_Lee1;~Jared_Davis_Huling1;~Guanhua_Chen2", "aff": "University of Wisconsin - Madison;University of Minnesota - Twin Cities;University of Wisconsin - Madison", "aff_domain": "wisc.edu;umn.edu;wisc.edu", "position": "PhD student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nlee2024an,\ntitle={An effective framework for estimating individualized treatment rules},\nauthor={Joowon Lee and Jared Davis Huling and Guanhua Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=G7L65B2P0y}\n}", "github": "", "reviewers": "3MV8;1Znf;oq1w;kG5t", "pdf_size": 1395152, "rating": "5;5;6;7", "confidence": "3;3;4;3", "soundness": "2;2;4;3", "novelty": "2;2;2;3", "presentation": "3;2;4;3", "wc_summary": "97;44;69;109", "wc_strengths": "45;17;65;98", "wc_weaknesses": "126;148;266;265", "wc_questions": "83;140;39;157", "wc_limitations": "10;4;10;9", "wc_review": "361;353;449;638", "wc_reply_reviewers": "372;10;81;107", "wc_reply_authors": "451;26;677;158", "reply_reviewers": "3;1;2;1", "reply_authors": "4;2;3;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 79.75, 25.232667318379164 ], "wc_strengths_avg": [ 56.25, 29.524354353651834 ], "wc_weaknesses_avg": [ 201.25, 64.72007030280483 ], "wc_questions_avg": [ 104.75, 46.82080200082011 ], "wc_limitations_avg": [ 8.25, 2.48746859276655 ], "wc_review_avg": [ 450.25, 114.7549018560863 ], "wc_reply_reviewers_avg": [ 142.5, 137.1759818627153 ], "wc_reply_authors_avg": [ 328.0, 253.49260344238843 ], "reply_reviewers_avg": [ 1.75, 0.82915619758885 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:vwTwkajYbzYJ:scholar.google.com/&scioq=An+effective+framework+for+estimating+individualized+treatment+rules&hl=en&as_sdt=0,33", "gs_version_total": 0, "email": "wisc.edu;umn.edu;wisc.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Wisconsin-Madison;University of Minnesota", "aff_unique_dep": ";", "aff_unique_url": "https://www.wisc.edu;https://www.minnesota.edu", "aff_unique_abbr": "UW-Madison;UMN", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Madison;Twin Cities", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "L-TTA: Lightweight Test-Time Adaptation Using a Versatile Stem Layer", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95927", "id": "G7NZljVOol", "proceeding": "", "pdf": "https://openreview.net/pdf?id=G7NZljVOol", "openreview": "https://openreview.net/forum?id=G7NZljVOol", "poster": "/media/PosterPDFs/NeurIPS%202024/95927.png?t=1731397859.0486073", "project": "", "author_site": "Jin Shin, Hyun Kim", "tldr": "", "abstract": "Test-time adaptation (TTA) is the most realistic methodology for adapting deep learning models to the real world using only unlabeled data from the target domain. Numerous TTA studies in deep learning have aimed at minimizing entropy. However, this necessitates forward/backward processes across the entire model and is limited by the incapability to fully leverage data based solely on entropy. This study presents a groundbreaking TTA solution that involves a departure from the conventional focus on minimizing entropy. Our innovative approach uniquely remodels the stem layer (i.e., the first layer) to emphasize minimizing a new learning criterion, namely, uncertainty. This method requires minimal involvement of the model's backbone, with only the stem layer participating in the TTA process. This approach significantly reduces the memory required for training and enables rapid adaptation to the target domain with minimal parameter updates. Moreover, to maximize data leveraging, the stem layer applies a discrete wavelet transform to the input features. It extracts multi-frequency domains and focuses on minimizing their individual uncertainties. The proposed method integrated into ResNet-26 and ResNet-50 models demonstrates its robustness by achieving outstanding TTA performance while using the least amount of memory compared to existing studies on CIFAR-10-C, ImageNet-C, and Cityscapes-C benchmark datasets. The code is available at https://github.com/janus103/L_TTA.", "keywords": "Test-Time Adaptation;Domain Shift;Discrete Wavelet Transform;Gaussian Modeling", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/d1e56cf3e4abb082b57dcf4cda30aa782cff577e.zip", "author": "Jin Shin;Hyun Kim", "authorids": "~Jin_Shin1;~Hyun_Kim3", "gender": "M;M", "homepage": "https://idsl.seoultech.ac.kr/?p=people;https://idsl.seoultech.ac.kr/", "dblp": ";43/6179-1", "google_scholar": "PuGkS_wAAAAJ;https://scholar.google.co.kr/citations?user=mCuJP1UAAAAJ", "orcid": "0000-0002-5554-1619;0000-0002-7962-657X", "linkedin": ";", "or_profile": "~Jin_Shin1;~Hyun_Kim3", "aff": "Seoul National University of Science and Technology;Seoul National University of Science & Technology", "aff_domain": "seoultech.ac.kr;seoultech.ac.kr", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nshin2024ltta,\ntitle={L-{TTA}: Lightweight Test-Time Adaptation Using a Versatile Stem Layer},\nauthor={Jin Shin and Hyun Kim},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=G7NZljVOol}\n}", "github": "", "reviewers": "H2Kq;iLad;sFE2;ah9h", "pdf_size": 3814677, "rating": "4;5;5;8", "confidence": "4;4;3;4", "soundness": "3;3;2;3", "novelty": "2;4;2;3", "presentation": "2;2;1;2", "wc_summary": "68;69;81;94", "wc_strengths": "18;54;39;79", "wc_weaknesses": "293;223;171;452", "wc_questions": "4;159;152;216", "wc_limitations": "7;20;1;9", "wc_review": "390;525;444;850", "wc_reply_reviewers": "0;202;224;0", "wc_reply_authors": "0;679;1559;0", "reply_reviewers": "0;4;2;0", "reply_authors": "1;5;5;1", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_avg": [ 78.0, 10.559356040971437 ], "wc_strengths_avg": [ 47.5, 22.23173407541571 ], "wc_weaknesses_avg": [ 284.75, 105.82148883851521 ], "wc_questions_avg": [ 132.75, 78.36891922184458 ], "wc_limitations_avg": [ 9.25, 6.869315832017043 ], "wc_review_avg": [ 552.25, 178.49422259557872 ], "wc_reply_reviewers_avg": [ 106.5, 106.78365979867894 ], "wc_reply_authors_avg": [ 559.5, 640.1876677974983 ], "reply_reviewers_avg": [ 1.5, 1.6583123951777 ], "reply_authors_avg": [ 3.0, 2.0 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.19245008972987526, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:MWgu8GkwVq4J:scholar.google.com/&scioq=L-TTA:+Lightweight+Test-Time+Adaptation+Using+a+Versatile+Stem+Layer&hl=en&as_sdt=0,33", "gs_version_total": 2, "email": "seoultech.ac.kr;seoultech.ac.kr", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Seoul National University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.snust.ac.kr", "aff_unique_abbr": "SNUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "title": "Nimbus: Secure and Efficient Two-Party Inference for Transformers", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95926", "id": "G7QS68ICPJ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=G7QS68ICPJ", "openreview": "https://openreview.net/forum?id=G7QS68ICPJ", "poster": "/media/PosterPDFs/NeurIPS%202024/95926.png?t=1731032660.0819383", "project": "", "author_site": "Zhengyi Li, Kang Yang, Jin Tan, Wen-jie Lu, Haoqi Wu, Xiao Wang, Yu Yu, Derun Zhao, Yancheng Zheng, Minyi Guo, Jingwen Leng", "tldr": "", "abstract": "Transformer models have gained significant attention due to their power in machine learning tasks. Their extensive deployment has raised concerns about the potential leakage of sensitive information during inference. However, when being applied to Transformers, existing approaches based on secure two-party computation (2PC) bring about efficiency limitations in two folds: (1) resource-intensive matrix multiplications in linear layers, and (2) complex non-linear activation functions like $\\mathsf{GELU}$ and $\\mathsf{Softmax}$. This work presents a new two-party inference framework $\\mathsf{Nimbus}$ for Transformer models. Specifically, we propose a new 2PC paradigm to securely compute matrix multiplications based on an outer-product insight, which achieves $2.9\\times \\sim 12.5\\times$ performance improvements compared to the state-of-the-art (SOTA) protocol. Furthermore, through a new observation of utilizing the input distribution, we propose an approach of low-degree polynomial approximation for $\\mathsf{GELU}$ and $\\mathsf{Softmax}$, which improves the performance of the SOTA polynomial approximation by $2.9\\times \\sim 4.0\\times$, where the average accuracy loss of our approach is 0.08\\% compared to the non-2PC inference without privacy. Compared with the SOTA two-party inference, $\\mathsf{Nimbus}$ improves the end-to-end performance of $BERT_{base}$ inference by $2.7\\times \\sim 4.7\\times$ across different network settings.", "keywords": "Secure inferece;Transformer;Multi-party computation;homomorphic encryption", "primary_area": "privacy", "supplementary_material": "", "author": "Zhengyi Li;Kang Yang;Jin Tan;Wen-jie Lu;Haoqi Wu;Xiao Wang;Yu Yu;Derun Zhao;Yancheng Zheng;Minyi Guo;Jingwen Leng", "authorids": "~Zhengyi_Li3;~Kang_Yang1;~Jin_Tan2;~Wen-jie_Lu1;~Haoqi_Wu1;~Xiao_Wang11;~Yu_Yu1;~Derun_Zhao1;~Yancheng_Zheng1;~Minyi_Guo1;~Jingwen_Leng1", "gender": "M;;M;M;M;M;M;M;M;M;M", "homepage": ";;https://github.com/rivertalk;https://fionser.github.io;;https://wangxiao1254.github.io/;http://yuyu.hk/;;https://www.linkedin.com/in/yancheng-zheng-bab7746a/;http://www.cs.sjtu.edu.cn/~guo-my/;http://cs.sjtu.edu.cn/~leng-jw/", "dblp": ";https://dblp.uni-trier.de/pid/86/8501-2.html;;231/4234;;150/9413;;305/0380.html;;;131/5131", "google_scholar": "https://scholar.google.com.hk/citations?user=wYky-vsAAAAJ;;;;gCfQJOEAAAAJ;QbWLR8QAAAAJ;https://scholar.google.com.hk/citations?user=lpRkCB4AAAAJ;;;https://scholar.google.com.tw/citations?user=8R8FO9IAAAAJ;L1y8y2MAAAAJ", "orcid": "0009-0006-1802-6614;;;;0000-0003-0650-5459;;;;;;", "linkedin": "%E6%AD%A3%E4%B8%80-%E6%9D%8E-6642951b1/;;;;;;;;;;", "or_profile": "~Zhengyi_Li3;~Kang_Yang1;~Jin_Tan2;~Wen-jie_Lu1;~Haoqi_Wu1;~Xiao_Wang11;~Yu_Yu1;~Derun_Zhao1;~Yancheng_Zheng1;~Minyi_Guo1;~Jingwen_Leng1", "aff": "Shanghai Jiaotong University;State Key Laboratory of Cryptology;Alibaba Group;Ant Group;Ant Group;Northwestern University;Shanghai Jiaotong University;;Ant Group;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sklc.org;antgroup.com;antgroup.com;antgroup.com;northwestern.edu;sjtu.edu.cn;;antgroup.com;sjtu.edu.cn;sjtu.edu.cn", "position": "PhD student;Associate Professor;Researcher;Researcher;Researcher;Assistant Professor;Full Professor;;Senior Software Engineer;Full Professor;Full Professor", "bibtex": "@inproceedings{\nli2024nimbus,\ntitle={Nimbus: Secure and Efficient Two-Party Inference for Transformers},\nauthor={Zhengyi Li and Kang Yang and Jin Tan and Wen-jie Lu and Haoqi Wu and Xiao Wang and Yu Yu and Derun Zhao and Yancheng Zheng and Minyi Guo and Jingwen Leng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=G7QS68ICPJ}\n}", "github": "", "reviewers": "7qmF;dGsE;zib5;5SDb", "pdf_size": 1136778, "rating": "5;5;6;6", "confidence": "4;4;4;3", "soundness": "2;2;3;3", "novelty": "2;2;3;3", "presentation": "2;3;3;3", "wc_summary": "47;43;81;42", "wc_strengths": "30;99;114;22", "wc_weaknesses": "52;163;244;81", "wc_questions": "279;2;37;16", "wc_limitations": "7;2;29;10", "wc_review": "415;309;505;171", "wc_reply_reviewers": "99;0;0;0", "wc_reply_authors": "376;0;0;0", "reply_reviewers": "1;0;0;0", "reply_authors": "2;1;1;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 53.25, 16.13032857693854 ], "wc_strengths_avg": [ 66.25, 40.69628361410904 ], "wc_weaknesses_avg": [ 135.0, 74.94998332221296 ], "wc_questions_avg": [ 83.5, 113.55725428170584 ], "wc_limitations_avg": [ 12.0, 10.222524150130436 ], "wc_review_avg": [ 350.0, 124.47088012864695 ], "wc_reply_reviewers_avg": [ 24.75, 42.868257487329714 ], "wc_reply_authors_avg": [ 94.0, 162.81277591147446 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:3kr7xTjAVWgJ:scholar.google.com/&scioq=Nimbus:+Secure+and+Efficient+Two-Party+Inference+for+Transformers&hl=en&as_sdt=0,5", "gs_version_total": 5, "email": "sjtu.edu.cn;sklc.org;antgroup.com;antgroup.com;antgroup.com;northwestern.edu;sjtu.edu.cn;;antgroup.com;sjtu.edu.cn;sjtu.edu.cn", "author_num": 11, "aff_unique_index": "0;1;2;3;3;4;0;3;0;0", "aff_unique_norm": "Shanghai Jiao Tong University;State Key Laboratory of Cryptology;Alibaba Group;Ant Group;Northwestern University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.sjtu.edu.cn;;https://www.alibaba.com;https://www.antgroup.com;https://www.northwestern.edu", "aff_unique_abbr": "SJTU;;Alibaba;Ant Group;NU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;1;0;0;0;0", "aff_country_unique": "China;United States" }, { "title": "Confusion-Resistant Federated Learning via Diffusion-Based Data Harmonization on Non-IID Data", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95925", "id": "G89r8Mgi5r", "proceeding": "", "pdf": "https://openreview.net/pdf?id=G89r8Mgi5r", "openreview": "https://openreview.net/forum?id=G89r8Mgi5r", "poster": "", "project": "", "author_site": "xiaohong chen, Canran Xiao, Yongmei liu", "tldr": "", "abstract": "Federated learning has become a pivotal distributed learning paradigm, involving collaborative model updates across multiple nodes with private data. However, handling non-i.i.d. (not identically and independently distributed) data and ensuring model consistency across heterogeneous environments present significant challenges. These challenges often lead to model performance degradation and increased difficulty in achieving effective communication among participant models. In this work, we propose Confusion-Resistant Federated Learning via Consistent Diffusion (CRFed), a novel framework designed to address these issues. Our approach introduces a new diffusion-based data harmonization mechanism that includes data augmentation, noise injection, and iterative denoising to ensure consistent model updates across non-i.i.d. data distributions. This mechanism aims to reduce data distribution disparities among participating nodes, enhancing the coordination and consistency of model updates. Moreover, we design a confusion-resistant strategy leveraging an indicator function and adaptive learning rate adjustment to mitigate the adverse effects of data heterogeneity and model inconsistency. Specifically, we calculate importance sampling weights based on the optimal sampling probability, which guides the selection of clients and the sampling of their data, ensuring that model updates are robust and aligned across different nodes. Extensive experiments on benchmark datasets, including MNIST, FashionMNIST, CIFAR-10, CIFAR-100, and NIPD, demonstrate the effectiveness of CRFed in improving accuracy, convergence speed, and overall robustness in federated learning scenarios with severe data heterogeneity.", "keywords": "Federated Learning;Non-IID Data;Importance Sampling;Diffusion model", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "xiaohong chen;Canran Xiao;Yongmei liu", "authorids": "~xiaohong_chen2;~Canran_Xiao1;~Yongmei_liu2", "gender": "F;Not Specified;F", "homepage": "https://iecd.csu.edu.cn/info/1206/4985.htm;;https://bsoa.csu.edu.cn/blog/content2?name=%E5%88%98%E5%92%8F%E6%A2%85", "dblp": ";;", "google_scholar": ";;https://scholar.google.com.hk/citations?user=2cNErowAAAAJ", "orcid": ";;", "linkedin": ";https://www.linkedin.cn/incareer/in/%E7%B2%B2%E7%84%B6-%E8%82%96-7b363a276;", "or_profile": "~xiaohong_chen2;~Canran_Xiao1;~Yongmei_liu2", "aff": "Central South University;Central South University;Central South University", "aff_domain": "csu.edu.cn;csu.edu.cn;csu.edu.cn", "position": "Full Professor;PhD student;Full Professor", "bibtex": "@inproceedings{\nchen2024confusionresistant,\ntitle={Confusion-Resistant Federated Learning via Diffusion-Based Data Harmonization on Non-{IID} Data},\nauthor={xiaohong chen and Canran Xiao and Yongmei liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=G89r8Mgi5r}\n}", "github": "", "reviewers": "CpsC;nZWV;FLG7;cbew", "pdf_size": 2102214, "rating": "4;6;7;7", "confidence": "4;4;4;4", "soundness": "2;2;3;4", "novelty": "2;2;3;3", "presentation": "1;2;3;3", "wc_summary": "61;77;151;98", "wc_strengths": "53;43;137;143", "wc_weaknesses": "245;50;111;138", "wc_questions": "68;201;2;5", "wc_limitations": "6;1;18;17", "wc_review": "433;372;419;401", "wc_reply_reviewers": "88;66;0;50", "wc_reply_authors": "635;11;0;18", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;1;2", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 96.75, 33.95861451826325 ], "wc_strengths_avg": [ 94.0, 46.184412955021955 ], "wc_weaknesses_avg": [ 136.0, 70.5443123150265 ], "wc_questions_avg": [ 69.0, 80.63808033429368 ], "wc_limitations_avg": [ 10.5, 7.22841614740048 ], "wc_review_avg": [ 406.25, 22.796655456447994 ], "wc_reply_reviewers_avg": [ 51.0, 32.38826948140329 ], "wc_reply_authors_avg": [ 166.0, 270.85328131665676 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2973007343678587345&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 2, "email": "csu.edu.cn;csu.edu.cn;csu.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Central South University", "aff_unique_dep": "", "aff_unique_url": "https://www.csu.edu.cn", "aff_unique_abbr": "CSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Byzantine Robustness and Partial Participation Can Be Achieved at Once: Just Clip Gradient Differences", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95924", "id": "G8aS48B9bm", "proceeding": "", "pdf": "https://openreview.net/pdf?id=G8aS48B9bm", "openreview": "https://openreview.net/forum?id=G8aS48B9bm", "poster": "/media/PosterPDFs/NeurIPS%202024/95924.png?t=1733348824.219423", "project": "", "author_site": "Grigory Malinovsky, Peter Richtarik, Samuel Horv\u00e1th, Eduard Gorbunov", "tldr": "", "abstract": "Distributed learning has emerged as a leading paradigm for training large machine learning models. However, in real-world scenarios, participants may be unreliable or malicious, posing a significant challenge to the integrity and accuracy of the trained models. Byzantine fault tolerance mechanisms have been proposed to address these issues, but they often assume full participation from all clients, which is not always practical due to the unavailability of some clients or communication constraints. In our work, we propose the first distributed method with client sampling and provable tolerance to Byzantine workers. The key idea behind the developed method is the use of gradient clipping to control stochastic gradient differences in recursive variance reduction. This allows us to bound the potential harm caused by Byzantine workers, even during iterations when all sampled clients are Byzantine. Furthermore, we incorporate communication compression into the method to enhance communication efficiency. Under general assumptions, we prove convergence rates for the proposed method that match the existing state-of-the-art (SOTA) theoretical results. We also propose a heuristic on how to adjust any Byzantine-robust method to a partial participation scenario via clipping.", "keywords": "Byzantine robustness;distributed optimization;communication compression;non-convex optimization", "primary_area": "optimization", "supplementary_material": "", "author": "Grigory Malinovsky;Peter Richt\u00e1rik;Samuel Horv\u00e1th;Eduard Gorbunov", "authorids": "~Grigory_Malinovsky1;~Peter_Richt\u00e1rik1;~Samuel_Horv\u00e1th1;~Eduard_Gorbunov1", "gender": "M;M;M;M", "homepage": "https://grigory-malinovsky.github.io;https://sites.google.com/view/samuelhorvath;https://eduardgorbunov.github.io;https://richtarik.org", "dblp": "262/3277.html;234/8604;215/5512.html;62/8001", "google_scholar": "4w2W9KQAAAAJ;k252J7kAAAAJ;https://scholar.google.ru/citations?user=85j2RqQAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0003-0619-9260;;0000-0003-4380-5848", "linkedin": ";samuel-horvath/;;richtarik/", "or_profile": "~Grigory_Malinovsky1;~Samuel_Horv\u00e1th1;~Eduard_Gorbunov1;~Peter_Richtarik1", "aff": "Samsung;MBZUAI;Mohamed bin Zayed University of Artificial Intelligence;King Abdullah University of Science and Technology (KAUST)", "aff_domain": "samsung.com;mbzuai.ac.ae;mbzuai.ac.ae;kaust.edu.sa", "position": "Intern;Assistant Professor;Postdoc;Full Professor", "bibtex": "@inproceedings{\nmalinovsky2024byzantine,\ntitle={Byzantine Robustness and Partial Participation Can Be Achieved at Once: Just Clip Gradient Differences},\nauthor={Grigory Malinovsky and Peter Richt{\\'a}rik and Samuel Horv{\\'a}th and Eduard Gorbunov},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=G8aS48B9bm}\n}", "github": "", "reviewers": "LGpc;mV5C;oCpn;xeac;RwBg", "pdf_size": 1100599, "rating": "3;4;5;6;6", "confidence": "5;4;4;4;3", "soundness": "2;3;3;3;4", "novelty": "2;2;3;2;2", "presentation": "3;4;4;2;4", "wc_summary": "12;73;76;41;161", "wc_strengths": "14;33;69;19;109", "wc_weaknesses": "142;333;40;111;69", "wc_questions": "10;13;47;1;130", "wc_limitations": "2;10;18;1;9", "wc_review": "180;462;250;173;478", "wc_reply_reviewers": "366;702;0;597;51", "wc_reply_authors": "697;1432;0;1514;118", "reply_reviewers": "1;2;0;4;1", "reply_authors": "2;3;1;4;2", "rating_avg": [ 4.8, 1.16619037896906 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 3.4, 0.8 ], "wc_summary_avg": [ 72.6, 49.99439968636487 ], "wc_strengths_avg": [ 48.8, 35.72338169882577 ], "wc_weaknesses_avg": [ 139.0, 103.0824912388132 ], "wc_questions_avg": [ 40.2, 47.53693301002916 ], "wc_limitations_avg": [ 8.0, 6.164414002968976 ], "wc_review_avg": [ 308.6, 134.60104011485203 ], "wc_reply_reviewers_avg": [ 343.2, 281.7228425243505 ], "wc_reply_authors_avg": [ 752.2, 634.5910179005057 ], "reply_reviewers_avg": [ 1.6, 1.3564659966250536 ], "reply_authors_avg": [ 2.4, 1.019803902718557 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8134892168199606, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13921492914601240805&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "samsung.com;mbzuai.ac.ae;mbzuai.ac.ae;kaust.edu.sa", "author_num": 4, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Samsung;Mohamed bin Zayed University of Artificial Intelligence;King Abdullah University of Science and Technology", "aff_unique_dep": "Samsung;;", "aff_unique_url": "https://www.samsung.com;https://www.mbzuai.ac.ae;https://www.kaust.edu.sa", "aff_unique_abbr": "Samsung;MBZUAI;KAUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;2", "aff_country_unique": "South Korea;United Arab Emirates;Saudi Arabia" }, { "title": "Relational Concept Bottleneck Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95923", "id": "G99BSV9pt5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=G99BSV9pt5", "openreview": "https://openreview.net/forum?id=G99BSV9pt5", "poster": "/media/PosterPDFs/NeurIPS%202024/95923.png?t=1733394308.459014", "project": "", "author_site": "Pietro Barbiero, Francesco Giannini, Gabriele Ciravegna, Michelangelo Diligenti, Giuseppe Marra", "tldr": "", "abstract": "The design of interpretable deep learning models working in relational domains poses an open challenge: interpretable deep learning methods, such as Concept Bottleneck Models (CBMs), are not designed to solve relational problems, while relational deep learning models, such as Graph Neural Networks (GNNs), are not as interpretable as CBMs. To overcome these limitations, we propose Relational Concept Bottleneck Models (R-CBMs), a family of relational deep learning methods providing interpretable task predictions. As special cases, we show that R-CBMs are capable of both representing standard CBMs and message passing GNNs. To evaluate the effectiveness and versatility of these models, we designed a class of experimental problems, ranging from image classification to link prediction in knowledge graphs. In particular we show that R-CBMs (i) match generalization performance of existing relational black-boxes, (ii) support the generation of quantified concept-based explanations, (iii) effectively respond to test-time interventions, and (iv) withstand demanding settings including out-of-distribution scenarios, limited training data regimes, and scarce concept supervisions.", "keywords": "Concept Bottleneck Models;Neuro-symbolic Models;Message Passing;Logic-based explanations", "primary_area": "interpretability_and_explainability", "supplementary_material": "/attachment/2e99a41ae95adc488ec594059168b206b0cf3e12.zip", "author": "Pietro Barbiero;Francesco Giannini;Gabriele Ciravegna;Michelangelo Diligenti;Giuseppe Marra", "authorids": "~Pietro_Barbiero1;~Francesco_Giannini1;~Gabriele_Ciravegna1;~Michelangelo_Diligenti2;~Giuseppe_Marra1", "gender": "M;M;M;M;Not Specified", "homepage": "http://www.pietrobarbiero.eu/;https://www.francescogiannini.eu/;https://dbdmg.polito.it/dbdmg_web/gabriele-ciravegna/;https://www.giuseppemarra.com;", "dblp": "238/7860;198/0854;228/1667;150/7452;11/5691", "google_scholar": "https://scholar.google.it/citations?user=4gbToQoAAAAJ;RO8aInMAAAAJ;https://scholar.google.it/citations?view_op=list_works;https://scholar.google.it/citations?user=BBcsk7MAAAAJ;https://scholar.google.it/citations?user=qI-LOjIAAAAJ", "orcid": "0000-0003-3155-2564;0000-0001-8492-8110;0000-0002-6799-1043;;", "linkedin": ";https://www.linkedin.com/search/results/all/?fetchDeterministicClustersOnly=true&heroEntityKey=urn%3Ali%3Afsd_profile%3AACoAAEZY56YBnC1EDCTXy7QNDbkYThgd6vpD6i8&keywords=francesco%20giannini&origin=RICH_QUERY_SUGGESTION&position=0&searchId=95dc79fd-e2ea-4d21-b3dc-7ad787ee929e&sid=JQw&spellCorrectionEnabled=false;gabriele-ciravegna-5a1130124/;;", "or_profile": "~Pietro_Barbiero1;~Francesco_Giannini1;~Gabriele_Ciravegna1;~Giuseppe_Marra1;~Michelangelo_Diligenti1", "aff": "Universita della Svizzera Italiana;CINI;Polytechnic Institute of Turin;KU Leuven;Google Inc.", "aff_domain": "usi.ch;consorzio-cini.it;polito.it;kuleuven.be;google.com", "position": "Postdoc;Researcher;Postdoc;Assistant Professor;Researcher", "bibtex": "@inproceedings{\nbarbiero2024relational,\ntitle={Relational Concept Bottleneck Models},\nauthor={Pietro Barbiero and Francesco Giannini and Gabriele Ciravegna and Michelangelo Diligenti and Giuseppe Marra},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=G99BSV9pt5}\n}", "github": "", "reviewers": "raJm;CYvd;WYX8;XAga", "pdf_size": 626621, "rating": "5;6;6;7", "confidence": "4;3;4;3", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;4;2;4", "wc_summary": "32;128;489;39", "wc_strengths": "40;74;38;25", "wc_weaknesses": "128;275;69;54", "wc_questions": "4;2;42;15", "wc_limitations": "1;2;38;2", "wc_review": "205;481;676;135", "wc_reply_reviewers": "0;235;6;13", "wc_reply_authors": "32;516;0;0", "reply_reviewers": "0;2;1;1", "reply_authors": "2;3;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 172.0, 186.89167985761165 ], "wc_strengths_avg": [ 44.25, 18.115946014492316 ], "wc_weaknesses_avg": [ 131.5, 87.34557802201552 ], "wc_questions_avg": [ 15.75, 15.943258763502524 ], "wc_limitations_avg": [ 10.75, 15.738090735537142 ], "wc_review_avg": [ 374.25, 216.98775887132436 ], "wc_reply_reviewers_avg": [ 63.5, 99.12239908315375 ], "wc_reply_authors_avg": [ 137.0, 219.2053831455788 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.7071067811865476, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1521287300019023222&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 5, "email": "usi.ch;consorzio-cini.it;polito.it;kuleuven.be;google.com", "author_num": 5, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "Universita della Svizzera Italiana;Consorzio Interuniversitario Nazionale per l'Informatica;Polytechnic Institute of Turin;Katholieke Universiteit Leuven;Google", "aff_unique_dep": ";;;;Google", "aff_unique_url": "https://www.usi.ch;https://www.cini.it;https://www.polito.it;https://www.kuleuven.be;https://www.google.com", "aff_unique_abbr": "USI;CINI;Polito;KU Leuven;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;1;2;3", "aff_country_unique": "Switzerland;Italy;Belgium;United States" }, { "title": "Knowledge Composition using Task Vectors with Learned Anisotropic Scaling", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95922", "id": "G9OJUgKo4B", "proceeding": "", "pdf": "https://openreview.net/pdf?id=G9OJUgKo4B", "openreview": "https://openreview.net/forum?id=G9OJUgKo4B", "poster": "/media/PosterPDFs/NeurIPS%202024/95922.png?t=1731624814.7966058", "project": "", "author_site": "Frederic Z. Zhang, Paul Albert, Cristian Rodriguez-Opazo, Anton van den Hengel, Ehsan Abbasnejad", "tldr": "", "abstract": "Pre-trained models produce strong generic representations that can be adapted via fine-tuning on specialised datasets. The learned weight difference relative to the pre-trained model, known as a task vector, characterises the direction and stride of fine-tuning that enables the model to capture these specialised representations. The significance of task vectors is such that simple arithmetic operations on them can be used to combine diverse representations from different domains. This paper builds on these properties of task vectors and aims to answer (1) whether components of task vectors, particularly parameter blocks, exhibit similar characteristics, and (2) how such blocks can be used to enhance knowledge composition and transfer. To this end, we introduce aTLAS, an algorithm that linearly combines parameter blocks with different learned coefficients, resulting in anisotropic scaling at the task vector level. We show that such linear combinations explicitly exploit the low intrinsic dimensionality of pre-trained models, with only a few coefficients being the learnable parameters. Furthermore, composition of parameter blocks enables modular learning that effectively leverages the already learned representations, thereby reducing the dependency on large amounts of data. We demonstrate the effectiveness of our method in task arithmetic, few-shot recognition and test-time adaptation, with supervised or unsupervised objectives. In particular, we show that (1) learned anisotropic scaling allows task vectors to be more disentangled, causing less interference in composition; (2) task vector composition excels with scarce or no labelled data and is less prone to domain shift, thus leading to better generalisability; (3) mixing the most informative parameter blocks across different task vectors prior to training can reduce the memory footprint and improve the flexibility of knowledge transfer. Moreover, we show the potential of aTLAS as a parameter-efficient fine-tuning method, particularly with less data, and demonstrate that it can be easily scaled up for higher performance.", "keywords": "task vectors;task arithmetic;transfer learning;few-shot learning;test-time adaptation;parameter-efficient fine-tuning", "primary_area": "machine_vision", "supplementary_material": "", "author": "Frederic Z. Zhang;Paul Albert;Cristian Rodriguez-Opazo;Anton van den Hengel;Ehsan Abbasnejad", "authorids": "~Frederic_Z._Zhang1;~Paul_Albert2;~Cristian_Rodriguez-Opazo1;~Anton_van_den_Hengel1;~Ehsan_Abbasnejad3", "gender": "M;;M;M;M", "homepage": ";;;https://ehsanabb.github.io/;https://fredzzhang.com/", "dblp": ";v/AntonvandenHengel;247/5999;30/11191;281/7094", "google_scholar": "trUhFBEAAAAJ;https://scholar.google.com.au/citations?user=nMGZ2ZQAAAAJ;https://scholar.google.com/citations?hl=es;https://scholar.google.com/citations?hl=en;b1PQadgAAAAJ", "orcid": ";0000-0003-3027-8364;;;0000-0003-4484-9068", "linkedin": ";;;;", "or_profile": "~Paul_Albert2;~Anton_van_den_Hengel1;~Cristian_Rodriguez1;~Ehsan_M_Abbasnejad1;~Frederic_Zhang1", "aff": "University of Adelaide;University of Adelaide;University of Adelaide;University of Adelaide;Australian Institute for Machine Learning", "aff_domain": "adelaide.edu.au;adelaide.edu.au;adelaide.edu.au;adelaide.edu.au;adelaide.edu.au", "position": "Postdoc;Professor;Postdoc;Assistant Professor;Postdoc", "bibtex": "@inproceedings{\nzhang2024knowledge,\ntitle={Knowledge Composition using Task Vectors with Learned Anisotropic Scaling},\nauthor={Frederic Z. Zhang and Paul Albert and Cristian Rodriguez-Opazo and Anton van den Hengel and Ehsan Abbasnejad},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=G9OJUgKo4B}\n}", "github": "", "reviewers": "TrF6;GVMe;RiZ9", "pdf_size": 4846982, "rating": "5;7;7", "confidence": "4;4;4", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "81;69;74", "wc_strengths": "141;72;102", "wc_weaknesses": "242;22;280", "wc_questions": "3;21;75", "wc_limitations": "3;22;19", "wc_review": "470;206;550", "wc_reply_reviewers": "0;0;45", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;1", "reply_authors": "1;1;1", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 74.66666666666667, 4.921607686744467 ], "wc_strengths_avg": [ 105.0, 28.24889378365107 ], "wc_weaknesses_avg": [ 181.33333333333334, 113.72872206361163 ], "wc_questions_avg": [ 33.0, 30.59411708155671 ], "wc_limitations_avg": [ 14.666666666666666, 8.339997335464535 ], "wc_review_avg": [ 408.6666666666667, 146.98148031488714 ], "wc_reply_reviewers_avg": [ 15.0, 21.213203435596427 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3088810970482025098&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "adelaide.edu.au;adelaide.edu.au;adelaide.edu.au;adelaide.edu.au;adelaide.edu.au", "author_num": 5, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "University of Adelaide;Australian Institute for Machine Learning", "aff_unique_dep": ";", "aff_unique_url": "https://www.adelaide.edu.au;https://www.aiml.com.au", "aff_unique_abbr": "Adelaide;AIML", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Australia" }, { "title": "Metric from Human: Zero-shot Monocular Metric Depth Estimation via Test-time Adaptation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95921", "id": "GA8TVtxudf", "proceeding": "", "pdf": "https://openreview.net/pdf?id=GA8TVtxudf", "openreview": "https://openreview.net/forum?id=GA8TVtxudf", "poster": "/media/PosterPDFs/NeurIPS%202024/95921.png?t=1733782509.5717742", "project": "", "author_site": "Yizhou Zhao, Hengwei Bian, Kaihua Chen, Pengliang Ji, Liao Qu, Shao-yu Lin, Weichen Yu, Haoran Li, Hao Chen, Jun Shen, Bhiksha Raj, Min Xu", "tldr": "", "abstract": "Monocular depth estimation (MDE) is fundamental for deriving 3D scene structures from 2D images. While state-of-the-art monocular relative depth estimation (MRDE) excels in estimating relative depths for in-the-wild images, current monocular metric depth estimation (MMDE) approaches still face challenges in handling unseen scenes. Since MMDE can be viewed as the composition of MRDE and metric scale recovery, we attribute this difficulty to scene dependency, where MMDE models rely on scenes observed during supervised training for predicting scene scales during inference. To address this issue, we propose to use humans as landmarks for distilling scene-independent metric scale priors from generative painting models. Our approach, Metric from Human (MfH), bridges from generalizable MRDE to zero-shot MMDE in a generate-and-estimate manner. Specifically, MfH generates humans on the input image with generative painting and estimates human dimensions with an off-the-shelf human mesh recovery (HMR) model. Based on MRDE predictions, it propagates the metric information from painted humans to the contexts, resulting in metric depth estimations for the original input. Through this annotation-free test-time adaptation, MfH achieves superior zero-shot performance in MMDE, demonstrating its strong generalization ability.", "keywords": "Metric Depth Estimation;Human Mesh Recovery;Test-time Adaptation", "primary_area": "machine_vision", "supplementary_material": "", "author": "Yizhou Zhao;Hengwei Bian;Kaihua Chen;Pengliang Ji;Liao Qu;Shao-yu Lin;Weichen Yu;Haoran Li;Hao Chen;Jun Shen;Bhiksha Raj;Min Xu", "authorids": "~Yizhou_Zhao2;~Hengwei_Bian1;~Kaihua_Chen1;~Pengliang_Ji1;~Liao_Qu1;~Shao-yu_Lin1;~Weichen_Yu1;~Haoran_Li11;~Hao_Chen15;~Jun_Shen3;~Bhiksha_Raj1;~Min_Xu4", "gender": ";;M;;M;F;F;M;M;M;M;", "homepage": ";;;;https://github.com/QuLiao1117;;https://weichen-yu.github.io/;https://haoranli525.github.io/;https://hhhhhhao.github.io/;https://scholars.uow.edu.au/display/jun_shen;https://www.cs.cmu.edu/directory/bhikshar/;", "dblp": ";;;;299/1491;;325/1209;;;48/5700-1;60/3996;", "google_scholar": "nVKRaf4AAAAJ;;;;IDbqDdEAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=en;tktqkhwAAAAJ;Bf6gvGkAAAAJ;;", "orcid": "0000-0002-2975-0783;;;;;;;0000-0003-0868-9554;;0000-0002-9403-7140;;", "linkedin": ";;kaihuac/;;;claireshao-yulin;;haoran-li-0525/;haochen97/;jun-shen-him-his-37b95337/;;", "or_profile": "~Yizhou_Zhao2;~Hengwei_Bian1;~Kaihua_Chen1;~Pengliang_Ji1;~Liao_Qu1;~Shao-yu_Lin1;~Weichen_Yu1;~Haoran_Li11;~Hao_Chen15;~Jun_Shen3;~Bhiksha_Raj1;~Min_Xu4", "aff": "Microsoft;;Carnegie Mellon University;;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;University of Wollongong;Carnegie Mellon University;University of Wollongong;Mohamed bin Zayed University of Artificial Intelligence;", "aff_domain": "microsoft.com;;andrew.cmu.edu;;cmu.edu;andrew.cmu.edu;andrew.cmu.edu;uow.edu.au;andrew.cmu.edu;uow.edu.au;mbzuai.ac.ae;", "position": "Intern;;MS student;;MS student;Researcher;PhD student;PhD student;PhD student;Full Professor;Full Professor;", "bibtex": "@inproceedings{\nzhao2024metric,\ntitle={Metric from Human: Zero-shot Monocular Metric Depth Estimation via Test-time Adaptation},\nauthor={Yizhou Zhao and Hengwei Bian and Kaihua Chen and Pengliang Ji and Liao Qu and Shao-yu Lin and Weichen Yu and Haoran Li and Hao Chen and Jun Shen and Bhiksha Raj and Min Xu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=GA8TVtxudf}\n}", "github": "", "reviewers": "xSFg;4QJ3;2dzs;bPK1", "pdf_size": 27274616, "rating": "3;4;6;6", "confidence": "4;5;4;4", "soundness": "3;2;3;3", "novelty": "2;3;3;3", "presentation": "3;3;2;4", "wc_summary": "169;104;93;107", "wc_strengths": "105;130;94;128", "wc_weaknesses": "375;135;115;57", "wc_questions": "28;158;6;105", "wc_limitations": "8;13;44;25", "wc_review": "685;540;352;422", "wc_reply_reviewers": "483;89;64;16", "wc_reply_authors": "1499;757;703;16", "reply_reviewers": "2;1;1;1", "reply_authors": "5;3;2;2", "rating_avg": [ 4.75, 1.299038105676658 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 118.25, 29.76050234791073 ], "wc_strengths_avg": [ 114.25, 15.270478054075452 ], "wc_weaknesses_avg": [ 170.5, 121.49382700367948 ], "wc_questions_avg": [ 74.25, 60.73868207328835 ], "wc_limitations_avg": [ 22.5, 13.865424623862047 ], "wc_review_avg": [ 499.75, 126.30592820608223 ], "wc_reply_reviewers_avg": [ 163.0, 186.60519821269716 ], "wc_reply_authors_avg": [ 743.75, 524.8472992214021 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.0, 1.224744871391589 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1921276049972106975&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 0, "email": "microsoft.com;;andrew.cmu.edu;;cmu.edu;andrew.cmu.edu;andrew.cmu.edu;uow.edu.au;andrew.cmu.edu;uow.edu.au;mbzuai.ac.ae;", "author_num": 12, "aff_unique_index": "0;1;1;1;1;2;1;2;3", "aff_unique_norm": "Microsoft;Carnegie Mellon University;University of Wollongong;Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": "Microsoft Corporation;;;", "aff_unique_url": "https://www.microsoft.com;https://www.cmu.edu;https://www.uow.edu.au;https://mbzuai.ac.ae", "aff_unique_abbr": "Microsoft;CMU;UOW;MBZUAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;1;0;1;2", "aff_country_unique": "United States;Australia;United Arab Emirates" }, { "title": "Construction and Application of Materials Knowledge Graph in Multidisciplinary Materials Science via Large Language Model", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95920", "id": "GB5a0RRYuv", "proceeding": "", "pdf": "https://openreview.net/pdf?id=GB5a0RRYuv", "openreview": "https://openreview.net/forum?id=GB5a0RRYuv", "poster": "", "project": "", "author_site": "Yanpeng Ye, Jie Ren, Shaozhou Wang, Yuwei Wan, Imran Razzak, Bram Hoex, Haofen Wang, Tong Xie, Wenjie Zhang", "tldr": "", "abstract": "Knowledge in materials science is widely dispersed across extensive scientific literature, posing significant challenges for efficient discovery and integration of new materials. Traditional methods, often reliant on costly and time-consuming experimental approaches, further complicate rapid innovation. Addressing these challenges, the integration of artificial intelligence with materials science has opened avenues for accelerating the discovery process, though it also demands precise annotation, data extraction, and traceability of information. To tackle these issues, this article introduces the Materials Knowledge Graph (MKG), which utilizes advanced natural language processing techniques, integrated with large language models to extract and systematically organize a decade's worth of high-quality research into structured triples, contains 162,605 nodes and 731,772 edges. MKG categorizes information into comprehensive labels such as Name, Formula, and Application, structured around a meticulously designed ontology, thus enhancing data usability and integration. By implementing network-based algorithms, MKG not only facilitates efficient link prediction but also significantly reduces reliance on traditional experimental methods. This structured approach not only streamlines materials research but also lays the groundwork for more sophisticated materials knowledge graphs.", "keywords": "Large Language Model;Knowledge Graph;AI4Science;Material Science", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Yanpeng Ye;Jie Ren;Shaozhou Wang;Yuwei Wan;Imran Razzak;Bram Hoex;Haofen Wang;Tong Xie;Wenjie Zhang", "authorids": "~Yanpeng_Ye2;~Jie_Ren9;~Shaozhou_Wang1;~Yuwei_Wan1;~Imran_Razzak2;~Bram_Hoex1;~Haofen_Wang1;~Tong_Xie2;~Wenjie_Zhang3", "gender": "M;F;M;F;M;M;M;M;F", "homepage": ";;;https://yuweiwan.github.io/;https://imranrazzak.github.io/;https://www.unsw.edu.au/;https://tongji-kgllm.github.io/people/wang-haofen/;https://0xtong.github.io/;http://www.cse.unsw.edu.au/~zhangw/", "dblp": ";;;;59/8379.html;;63/4317.html;335/1539;98/5684-1", "google_scholar": "elN0n5cAAAAJ;;https://scholar.google.com.au/citations?hl=zh-CN;;https://scholar.google.com/citations?hl=en;https://scholar.google.com.au/citations?user=Dc5eNVQAAAAJ;1FhdXpsAAAAJ;https://scholar.google.com.au/citations?user=IWEN6kcAAAAJ;https://scholar.google.com.au/citations?user=yHTJo1kAAAAJ", "orcid": ";0000-0001-5226-3783;;0000-0001-6464-7741;0000-0002-3930-6600;0000-0002-2723-5286;0000-0003-3018-3824;0000-0002-1659-4865;0000-0001-6572-2600", "linkedin": ";;;;;;haofen-wang-99b78b32/;;", "or_profile": "~Yanpeng_Ye2;~Jie_Ren9;~Shaozhou_Wang1;~Yuwei_Wan1;~Imran_Razzak2;~Bram_Hoex1;~Haofen_Wang1;~Tong_Xie2;~Wenjie_Zhang3", "aff": "University of New South Wales;City University of Hong Kong;University of New South Wales;City University of Hong Kong;University of New South Wales;University of New South Wales;Tongji University;University of New South Wales;the university of new south wales", "aff_domain": "unsw.edu.au;my.cityu.edu.hk;unsw.edu.au;cityu.edu.hk;unsw.edu.au;unsw.edu.au;tongji.edu.cn;unsw.edu.au;cse.unsw.edu.au", "position": "MS student;PhD student;Postdoc;PhD student;Associate Professor;Full Professor;Researcher;PhD student;Full Professor", "bibtex": "@inproceedings{\nye2024construction,\ntitle={Construction and Application of Materials Knowledge Graph in Multidisciplinary Materials Science via Large Language Model},\nauthor={Yanpeng Ye and Jie Ren and Shaozhou Wang and Yuwei Wan and Imran Razzak and Bram Hoex and Haofen Wang and Tong Xie and Wenjie Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=GB5a0RRYuv}\n}", "github": "", "reviewers": "jF92;5h8v;is27;xvtf", "pdf_size": 6858919, "rating": "4;7;7;7", "confidence": "4;4;4;5", "soundness": "3;4;3;3", "novelty": "3;4;3;3", "presentation": "3;4;2;3", "wc_summary": "69;74;130;79", "wc_strengths": "36;92;60;130", "wc_weaknesses": "154;25;257;41", "wc_questions": "6;35;38;41", "wc_limitations": "1;73;18;20", "wc_review": "266;299;503;311", "wc_reply_reviewers": "109;0;20;50", "wc_reply_authors": "207;0;19;19", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;2;2", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 88.0, 24.50510150968569 ], "wc_strengths_avg": [ 79.5, 35.28101472463625 ], "wc_weaknesses_avg": [ 119.25, 93.79332332314492 ], "wc_questions_avg": [ 30.0, 14.017845768876187 ], "wc_limitations_avg": [ 28.0, 27.00925767213901 ], "wc_review_avg": [ 344.75, 92.83957938293344 ], "wc_reply_reviewers_avg": [ 44.75, 41.14228360215315 ], "wc_reply_authors_avg": [ 61.25, 84.5055471552016 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15794027172909682806&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "unsw.edu.au;my.cityu.edu.hk;unsw.edu.au;cityu.edu.hk;unsw.edu.au;unsw.edu.au;tongji.edu.cn;unsw.edu.au;cse.unsw.edu.au", "author_num": 9, "aff_unique_index": "0;1;0;1;0;0;2;0;0", "aff_unique_norm": "University of New South Wales;City University of Hong Kong;Tongji University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.unsw.edu.au;https://www.cityu.edu.hk;https://www.tongji.edu.cn", "aff_unique_abbr": "UNSW;CityU;Tongji", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;0;1;0;0;1;0;0", "aff_country_unique": "Australia;China" }, { "title": "Lightweight Frequency Masker for Cross-Domain Few-Shot Semantic Segmentation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95919", "id": "GCmmy4At6i", "proceeding": "", "pdf": "https://openreview.net/pdf?id=GCmmy4At6i", "openreview": "https://openreview.net/forum?id=GCmmy4At6i", "poster": "/media/PosterPDFs/NeurIPS%202024/95919.png?t=1731659556.2182903", "project": "", "author_site": "Jintao Tong, Yixiong Zou, Yuhua Li, Ruixuan Li", "tldr": "", "abstract": "Cross-domain few-shot segmentation (CD-FSS) is proposed to first pre-train the model on a large-scale source-domain dataset, and then transfer the model to data-scarce target-domain datasets for pixel-level segmentation. The significant domain gap between the source and target datasets leads to a sharp decline in the performance of existing few-shot segmentation (FSS) methods in cross-domain scenarios. In this work, we discover an intriguing phenomenon: simply filtering different frequency components for target domains can lead to a significant performance improvement, sometimes even as high as 14% mIoU. Then, we delve into this phenomenon for an interpretation, and find such improvements stem from the reduced inter-channel correlation in feature maps, which benefits CD-FSS with enhanced robustness against domain gaps and larger activated regions for segmentation. Based on this, we propose a lightweight frequency masker, which further reduces channel correlations by an Amplitude-Phase Masker (APM) module and an Adaptive Channel Phase Attention (ACPA) module. Notably, APM introduces only 0.01% additional parameters but improves the average performance by over 10%, and ACPA imports only 2.5% parameters but further improves the performance by over 1.5%, which significantly surpasses the state-of-the-art CD-FSS methods.", "keywords": "cross-domain few-shot segmentation;frequency component;feature disentanglement\uff0cchannel attention", "primary_area": "machine_vision", "supplementary_material": "/attachment/877c2798c581f3291b65aa807fd7bd24a96e5f6a.zip", "author": "Jintao Tong;Yixiong Zou;Yuhua Li;Ruixuan Li", "authorids": "~Jintao_Tong1;~Yixiong_Zou1;~Yuhua_Li2;~Ruixuan_Li1", "gender": ";;F;M", "homepage": ";;;http://idc.hust.edu.cn/rxli/index.html", "dblp": ";;79/5796-3;60/4429.html", "google_scholar": ";;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/scholar?q=ruixuan+li", "orcid": ";;;0000-0002-7791-5511", "linkedin": ";;;https://www.linkedin.cn/incareer/in/ruixuan-li-b367319", "or_profile": "~Jintao_Tong1;~Yixiong_Zou1;~Yuhua_Li2;~Ruixuan_Li1", "aff": ";;Huazhong University of Science and Technology;Huazhong University of Science and Technology", "aff_domain": ";;hust.edu.cn;hust.edu.cn", "position": ";;Full Professor;Full Professor", "bibtex": "@inproceedings{\ntong2024lightweight,\ntitle={Lightweight Frequency Masker for Cross-Domain Few-Shot Semantic Segmentation},\nauthor={Jintao Tong and Yixiong Zou and Yuhua Li and Ruixuan Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=GCmmy4At6i}\n}", "github": "", "reviewers": "jSMF;upoB;H2xr;79M1", "pdf_size": 3923514, "rating": "5;6;6;8", "confidence": "4;3;4;5", "soundness": "3;3;3;4", "novelty": "2;3;3;3", "presentation": "3;3;3;4", "wc_summary": "104;57;69;63", "wc_strengths": "98;115;67;75", "wc_weaknesses": "115;103;81;66", "wc_questions": "175;1;32;9", "wc_limitations": "79;1;49;81", "wc_review": "571;277;298;294", "wc_reply_reviewers": "0;2;0;0", "wc_reply_authors": "0;26;0;0", "reply_reviewers": "0;1;0;0", "reply_authors": "1;2;1;1", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 73.25, 18.25342433627181 ], "wc_strengths_avg": [ 88.75, 18.952242611363964 ], "wc_weaknesses_avg": [ 91.25, 19.00493356999703 ], "wc_questions_avg": [ 54.25, 70.63772009344582 ], "wc_limitations_avg": [ 52.5, 32.32259271778797 ], "wc_review_avg": [ 360.0, 122.07579612683261 ], "wc_reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "wc_reply_authors_avg": [ 6.5, 11.258330249197702 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.6488856845230502, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14737803022293385324&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": ";;hust.edu.cn;hust.edu.cn", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Huazhong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.hust.edu.cn", "aff_unique_abbr": "HUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "GL-NeRF: Gauss-Laguerre Quadrature Enables Training-Free NeRF Acceleration", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95918", "id": "GDNZajKrML", "proceeding": "", "pdf": "https://openreview.net/pdf?id=GDNZajKrML", "openreview": "https://openreview.net/forum?id=GDNZajKrML", "poster": "/media/PosterPDFs/NeurIPS%202024/95918.png?t=1733349219.055334", "project": "", "author_site": "Silong Yong, Yaqi Xie, Simon Stepputtis, Katia Sycara", "tldr": "", "abstract": "Volume rendering in neural radiance fields is inherently time-consuming due to the large number of MLP calls on the points sampled per ray. Previous works would address this issue by introducing new neural networks or data structures. In this work, we propose GL-NeRF, a new perspective of computing volume rendering with the Gauss-Laguerre quadrature. GL-NeRF significantly reduces the number of MLP calls needed for volume rendering, introducing no additional data structures or neural networks. The simple formulation makes adopting GL-NeRF in any NeRF model possible. In the paper, we first justify the use of the Gauss-Laguerre quadrature and then demonstrate this plug-and-play attribute by implementing it in two different NeRF models. We show that with a minimal drop in performance, GL-NeRF can significantly reduce the number of MLP calls, showing the potential to speed up any NeRF model. Code can be found in project page https://silongyong.github.io/GL-NeRF_project_page/.", "keywords": "Neural Radiance Fields;Real-Time NeRF;Gauss-Laguerre Quadrature;Neural Rendering", "primary_area": "machine_vision", "supplementary_material": "", "author": "Silong Yong;Yaqi Xie;Simon Stepputtis;Katia P. Sycara", "authorids": "~Silong_Yong1;~Yaqi_Xie1;~Simon_Stepputtis1;~Katia_P._Sycara1", "gender": "M;F;;F", "homepage": "https://github.com/SilongYong;https://yaqi-xie.me/;https://simonstepputtis.com/;", "dblp": ";237/8691;192/7092;s/KatiaPSycara", "google_scholar": "EitVAcwAAAAJ;lBCCo0EAAAAJ;WUQgzsAAAAAJ;VWv6a9kAAAAJ", "orcid": ";0009-0005-0458-9419;0009-0003-0519-3454;", "linkedin": ";yaqi-xie/;simon-stepputtis/;", "or_profile": "~Silong_Yong1;~Yaqi_Xie1;~Simon_Stepputtis1;~Katia_P._Sycara1", "aff": "Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "andrew.cmu.edu;cmu.edu;cmu.edu;cmu.edu", "position": "MS student;Postdoc;Postdoc;Full Professor", "bibtex": "@inproceedings{\nyong2024glnerf,\ntitle={{GL}-Ne{RF}: Gauss-Laguerre Quadrature Enables Training-Free Ne{RF} Acceleration},\nauthor={Silong Yong and Yaqi Xie and Simon Stepputtis and Katia P. Sycara},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=GDNZajKrML}\n}", "github": "", "reviewers": "PEjS;GERG;a3Mk;cdYt", "pdf_size": 7471084, "rating": "5;6;7;8", "confidence": "4;4;5;4", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "4;3;3;3", "wc_summary": "64;68;81;115", "wc_strengths": "28;65;23;106", "wc_weaknesses": "106;89;2;190", "wc_questions": "28;85;62;121", "wc_limitations": "25;74;1;34", "wc_review": "251;381;169;566", "wc_reply_reviewers": "56;49;29;94", "wc_reply_authors": "22;81;24;23", "reply_reviewers": "1;1;1;1", "reply_authors": "2;3;2;2", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 82.0, 20.062402647738878 ], "wc_strengths_avg": [ 55.5, 33.36540124140574 ], "wc_weaknesses_avg": [ 96.75, 66.74344537106246 ], "wc_questions_avg": [ 74.0, 33.87476937190864 ], "wc_limitations_avg": [ 33.5, 26.31064423384574 ], "wc_review_avg": [ 341.75, 149.92227152761527 ], "wc_reply_reviewers_avg": [ 57.0, 23.547823678633232 ], "wc_reply_authors_avg": [ 37.5, 25.124689052802225 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.2581988897471611, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14177618744348242678&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 4, "email": "andrew.cmu.edu;cmu.edu;cmu.edu;cmu.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Erasing Undesirable Concepts in Diffusion Models with Adversarial Preservation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95917", "id": "GDz8rkfikp", "proceeding": "", "pdf": "https://openreview.net/pdf?id=GDz8rkfikp", "openreview": "https://openreview.net/forum?id=GDz8rkfikp", "poster": "/media/PosterPDFs/NeurIPS%202024/95917.png?t=1730318174.088878", "project": "", "author_site": "Anh Bui, Tung-Long Vuong, Khanh Doan, Trung Le, Paul Montague, Tamas Abraham, Dinh Phung", "tldr": "", "abstract": "Diffusion models excel at generating visually striking content from text but can inadvertently produce undesirable or harmful content when trained on unfiltered internet data. A practical solution is to selectively removing target concepts from the model, but this may impact the remaining concepts. Prior approaches have tried to balance this by introducing a loss term to preserve neutral content or a regularization term to minimize changes in the model parameters, yet resolving this trade-off remains challenging. In this work, we propose to identify and preserving concepts most affected by parameter changes, termed as *adversarial concepts*. This approach ensures stable erasure with minimal impact on the other concepts. We demonstrate the effectiveness of our method using the Stable Diffusion model, showing that it outperforms state-of-the-art erasure methods in eliminating unwanted content while maintaining the integrity of other unrelated elements. Our code is available at \\url{https://github.com/tuananhbui89/Erasing-Adversarial-Preservation}.", "keywords": "Diffusion models;Erasing Concepts; Trustworthy GenAI", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Anh Tuan Bui;Long Tung Vuong;Khanh Doan;Trung Le;Paul Montague;Tamas Abraham;Dinh Phung", "authorids": "~Anh_Tuan_Bui2;~Long_Tung_Vuong1;~Khanh_Doan1;~Trung_Le2;~Paul_Montague1;~Tamas_Abraham1;~Dinh_Phung2", "gender": "M;M;;M;M;;", "homepage": "https://tuananhbui89.github.io/;;;;;;", "dblp": "120/0106;329/6838;;;50/805;;", "google_scholar": "jEjMZ7oAAAAJ;DCC657sAAAAJ;;https://scholar.google.com/citations?hl=en;;;", "orcid": ";;;;0000-0001-9461-7471;;", "linkedin": ";long-vuong-783477131/;;;;;", "or_profile": "~Anh_Tuan_Bui2;~Long_Tung_Vuong1;~Khanh_Doan1;~Trung_Le2;~Paul_Montague1;~Tamas_Abraham1;~Dinh_Phung2", "aff": "Monash University;Monash University;;Monash University;Defence Science and Technology Group;;", "aff_domain": "monash.edu;monash.edu;;monash.edu;dst.defence.gov.au;;", "position": "Postdoc;PhD student;;Assistant Professor;Researcher;;", "bibtex": "@inproceedings{\nbui2024erasing,\ntitle={Erasing Undesirable Concepts in Diffusion Models with Adversarial Preservation},\nauthor={Anh Tuan Bui and Long Tung Vuong and Khanh Doan and Trung Le and Paul Montague and Tamas Abraham and Dinh Phung},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=GDz8rkfikp}\n}", "github": "", "reviewers": "r1Xs;Jwe1;fKJG;Zhhp", "pdf_size": 46458647, "rating": "3;4;7;7", "confidence": "4;4;5;4", "soundness": "2;2;4;3", "novelty": "2;2;3;3", "presentation": "1;3;4;3", "wc_summary": "120;57;115;86", "wc_strengths": "49;17;34;87", "wc_weaknesses": "165;231;85;117", "wc_questions": "3;15;386;111", "wc_limitations": "1;1;7;12", "wc_review": "338;321;627;413", "wc_reply_reviewers": "0;0;412;81", "wc_reply_authors": "66;190;2002;397", "reply_reviewers": "0;0;2;2", "reply_authors": "2;3;6;4", "rating_avg": [ 5.25, 1.7853571071357126 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 94.5, 25.243811122728676 ], "wc_strengths_avg": [ 46.75, 25.849323008543184 ], "wc_weaknesses_avg": [ 149.5, 54.99772722576816 ], "wc_questions_avg": [ 128.75, 154.30874084121095 ], "wc_limitations_avg": [ 5.25, 4.602988159880492 ], "wc_review_avg": [ 424.75, 121.79157401068433 ], "wc_reply_reviewers_avg": [ 123.25, 169.95789919859564 ], "wc_reply_authors_avg": [ 663.75, 781.6349451630217 ], "reply_reviewers_avg": [ 1.0, 1.0 ], "reply_authors_avg": [ 3.75, 1.479019945774904 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.5659164584181102, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12614326824225456307&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "monash.edu;monash.edu;;monash.edu;dst.defence.gov.au;;", "author_num": 7, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Monash University;Defence Science and Technology Group", "aff_unique_dep": ";", "aff_unique_url": "https://www.monash.edu;https://www.dst.defence.gov.au/", "aff_unique_abbr": "Monash;DST Group", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Australia" }, { "title": "Consistency of Neural Causal Partial Identification", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95916", "id": "GEbnPxD9EF", "proceeding": "", "pdf": "https://openreview.net/pdf?id=GEbnPxD9EF", "openreview": "https://openreview.net/forum?id=GEbnPxD9EF", "poster": "/media/PosterPDFs/NeurIPS%202024/95916.png?t=1731192748.323203", "project": "", "author_site": "Jiyuan Tan, Jose Blanchet, Vasilis Syrgkanis", "tldr": "", "abstract": "Recent progress in Neural Causal Models (NCMs) showcased how identification and partial identification of causal effects can be automatically carried out via training of neural generative models that respect the constraints encoded in a given causal graph [Xia et al. 2022, Balazadeh et al. 2022]. However, formal consistency of these methods has only been proven for the case of discrete variables or only for linear causal models. In this work, we prove the consistency of partial identification via NCMs in a general setting with both continuous and categorical variables. Further, our results highlight the impact of the design of the underlying neural network architecture in terms of depth and connectivity as well as the importance of applying Lipschitz regularization in the training phase. In particular, we provide a counterexample showing that without Lipschitz regularization this method may not be asymptotically consistent. Our results are enabled by new results on the approximability of Structural Causal Models (SCMs) via neural generative models, together with an analysis of the sample complexity of the resulting architectures and how that translates into an error in the constrained optimization problem that defines the partial identification bounds.", "keywords": "Causal inference;Generative models;Partial identification", "primary_area": "causal_inference", "supplementary_material": "", "author": "Jiyuan Tan;Jose Blanchet;Vasilis Syrgkanis", "authorids": "~Jiyuan_Tan1;~Jose_Blanchet1;~Vasilis_Syrgkanis1", "gender": "M;M;", "homepage": "https://jiyuan-tan.github.io/;https://web.stanford.edu/~jblanche/;https://www.vsyrgkanis.com", "dblp": ";75/5093.html;", "google_scholar": ";https://scholar.google.co.in/citations?user=O24CcQQAAAAJ;G1WMpcUAAAAJ", "orcid": ";;", "linkedin": ";jose-blanchet;", "or_profile": "~Jiyuan_Tan1;~Jose_Blanchet1;~Vasilis_Syrgkanis1", "aff": "Stanford University;Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu", "position": "PhD student;Professor;Assistant Professor", "bibtex": "@inproceedings{\ntan2024consistency,\ntitle={Consistency of Neural Causal Partial Identification},\nauthor={Jiyuan Tan and Jose Blanchet and Vasilis Syrgkanis},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=GEbnPxD9EF}\n}", "github": "", "reviewers": "Mk7v;VZrh;zYjd;8521", "pdf_size": 693591, "rating": "5;6;6;7", "confidence": "2;3;2;4", "soundness": "3;3;3;4", "novelty": "3;2;3;4", "presentation": "2;3;2;3", "wc_summary": "118;19;51;103", "wc_strengths": "75;12;39;88", "wc_weaknesses": "203;8;79;152", "wc_questions": "44;25;84;36", "wc_limitations": "21;2;7;10", "wc_review": "461;66;260;389", "wc_reply_reviewers": "379;176;21;132", "wc_reply_authors": "371;367;0;195", "reply_reviewers": "2;1;1;2", "reply_authors": "2;2;1;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 72.75, 39.76414842543469 ], "wc_strengths_avg": [ 53.5, 29.937434759845406 ], "wc_weaknesses_avg": [ 110.5, 73.78516110980581 ], "wc_questions_avg": [ 47.25, 22.26404051379713 ], "wc_limitations_avg": [ 10.0, 6.96419413859206 ], "wc_review_avg": [ 294.0, 150.04499325202423 ], "wc_reply_reviewers_avg": [ 177.0, 129.58202035776415 ], "wc_reply_authors_avg": [ 233.25, 152.26026237991317 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8528028654224418, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:owK4xLmKpHIJ:scholar.google.com/&scioq=Consistency+of+Neural+Causal+Partial+Identification&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "stanford.edu;stanford.edu;stanford.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "GeoPlant: Spatial Plant Species Prediction Dataset", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97777", "id": "GHlJM45fWY", "proceeding": "", "pdf": "https://openreview.net/pdf?id=GHlJM45fWY", "openreview": "https://openreview.net/forum?id=GHlJM45fWY", "poster": "/media/PosterPDFs/NeurIPS%202024/97777.png?t=1733250154.5474842", "project": "", "author_site": "Lukas Picek, Christophe Botella, Maximilien Servajean, C\u00e9sar Leblanc, R\u00e9mi Palard, Theo Larcher, Benjamin Deneu, Diego Marcos, Pierre Bonnet, alexis joly", "tldr": "", "abstract": "The difficulty of monitoring biodiversity at fine scales and over large areas limits ecological knowledge and conservation efforts. To fill this gap, Species Distribution Models (SDMs) predict species across space from spatially explicit features. Yet, they face the challenge of integrating the rich but heterogeneous data made available over the past decade, notably millions of opportunistic species observations and standardized surveys, as well as multi-modal remote sensing data.\nIn light of that, we have designed and developed a new European-scale dataset for SDMs at high spatial resolution (10--50m), including more than 10k species (i.e., most of the European flora). The dataset comprises 5M heterogeneous Presence-Only records and 90k exhaustive Presence-Absence survey records, all accompanied by diverse environmental rasters (e.g., elevation, human footprint, and soil) traditionally used in SDMs. In addition, it provides Sentinel-2 RGB and NIR satellite images with 10 m resolution, a 20-year time series of climatic variables, and satellite time series from the Landsat program.\nIn addition to the data, we provide an openly accessible SDM benchmark (hosted on Kaggle), which has already attracted an active community and a set of strong baselines for single predictor/modality and multimodal approaches.\nAll resources, e.g., the dataset, pre-trained models, and baseline methods (in the form of notebooks), are available on Kaggle, allowing one to start with our dataset literally with two mouse clicks.", "keywords": "Species distribution models;Deep SDM;remote sensing;presence-only;presence-absence;species assemblages;benchmark dataset;biodiversity;prediction;Europe;plant species", "primary_area": "", "supplementary_material": "", "author": "Lukas Picek;Christophe Botella;Maximilien Servajean;C\u00e9sar Leblanc;R\u00e9mi Palard;Theo Larcher;Benjamin Deneu;Diego Marcos;Pierre Bonnet;Alexis Joly", "authorids": "~Lukas_Picek1;~Christophe_Botella1;~Maximilien_Servajean2;~C\u00e9sar_Leblanc1;~R\u00e9mi_Palard1;~Theo_Larcher1;~Benjamin_Deneu1;~Diego_Marcos1;~Pierre_Bonnet1;~Alexis_Joly2", "gender": "M;M;;M;M;;;;M;M", "homepage": "https://sites.google.com/view/picekl/;http://www.christophebotella.fr;http://maximiliense.github.io;https://github.com/cesar-leblanc/;;https://github.com/tlarcher;;;;http://www-sop.inria.fr/members/Alexis.Joly/wiki/pmwiki.php", "dblp": "224/1991;;;;;;;171/0518;;http://dblp.uni-trier.de/pers/hd/j/Joly:Alexis", "google_scholar": ";https://scholar.google.fr/citations?user=c7TVkqYAAAAJ;;rJFLqvQAAAAJ;;IU_y1C8AAAAJ;https://scholar.google.com/citations?hl=en;IUqydU0AAAAJ;https://scholar.google.fr/citations?user=HTF5OegAAAAJ;https://scholar.google.fr/citations?user=kbpkTGgAAAAJ", "orcid": "0000-0002-6041-9722;0000-0002-5249-911X;;0000-0002-5682-8179;;0009-0007-2726-4603;0000-0003-0640-5706;;0000-0002-2828-4389;0000-0002-2161-9940", "linkedin": ";;;;remi-palard/;;;;;", "or_profile": "~Lukas_Picek1;~Christophe_Botella1;~Maximilien_Servajean2;~C\u00e9sar_Leblanc1;~R\u00e9mi_Palard1;~Theo_Larcher1;~Benjamin_Deneu1;~Diego_Marcos1;~Pierre_Bonnet1;~alexis_joly1", "aff": "University of West Bohemia;INRIA;LIRMM;INRIA;Centre de coop\u00e9ration internationale en recherche agronomique pour le d\u00e9veloppement;INRIA;Swiss Federal Research Institute for Forest, Snow and Landscape Research (WSL);INRIA;Centre de coop\u00e9ration internationale en recherche agronomique pour le d\u00e9veloppement;", "aff_domain": "zcu.cz;inria.fr;lirmm.fr;inria.fr;cirad.fr;inria.fr;wsl.ch;inria.fr;cirad.fr;", "position": "Researcher;Researcher;Assistant Professor;PhD student;Research Engineer;Research engineer;Postdoc;Assistant Professor;Researcher;", "bibtex": "@inproceedings{\npicek2024geoplant,\ntitle={GeoPlant: Spatial Plant Species Prediction Dataset},\nauthor={Lukas Picek and Christophe Botella and Maximilien Servajean and C{\\'e}sar Leblanc and R{\\'e}mi Palard and Theo Larcher and Benjamin Deneu and Diego Marcos and Pierre Bonnet and Alexis Joly},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=GHlJM45fWY}\n}", "github": "", "reviewers": "QH2a;7yVJ;YHJc;faPk", "pdf_size": 13753075, "rating": "6;7;8;10", "confidence": "2;4;3;4", "wc_summary_and_contributions": "42;37;104;27", "wc_strengths": "54;36;58;2", "wc_improvement": "128;102;51;86", "wc_limitations": "15;24;43;32", "wc_correctness": "45;7;36;6", "wc_clarity": "24;47;15;5", "wc_relation_to_prior_work": "22;47;30;8", "wc_documentation": "25;1;12;11", "wc_additional_feedback": "1;1;1;1", "wc_review": "356;302;350;178", "wc_reply_reviewers": "24;100;13;181", "wc_reply_authors": "0;157;20;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;2;2;1", "rating_avg": [ 7.75, 1.479019945774904 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "wc_summary_and_contributions_avg": [ 52.5, 30.220026472523152 ], "wc_strengths_avg": [ 37.5, 22.107690969434145 ], "wc_improvement_avg": [ 91.75, 27.896012259819503 ], "wc_limitations_avg": [ 28.5, 10.307764064044152 ], "wc_correctness_avg": [ 23.5, 17.298843892006193 ], "wc_clarity_avg": [ 22.75, 15.530212490497354 ], "wc_relation_to_prior_work_avg": [ 26.75, 14.095655359010449 ], "wc_documentation_avg": [ 12.25, 8.525696452489967 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 296.5, 71.54544010627092 ], "wc_reply_reviewers_avg": [ 79.5, 67.5 ], "wc_reply_authors_avg": [ 44.25, 65.60630686145959 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.6625413488689132, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3577865293678229098&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "zcu.cz;inria.fr;lirmm.fr;inria.fr;cirad.fr;inria.fr;wsl.ch;inria.fr;cirad.fr;", "author_num": 10, "aff_unique_index": "0;1;2;1;3;1;4;1;3", "aff_unique_norm": "University of West Bohemia;INRIA;Laboratoire d'Informatique, de Robotique et de Micro\u00e9lectronique de Montpellier;Centre de coop\u00e9ration internationale en recherche agronomique pour le d\u00e9veloppement;Swiss Federal Research Institute for Forest, Snow and Landscape Research", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.zcu.cz;https://www.inria.fr;https://www.lirmm.fr;;https://www.wsl.ch", "aff_unique_abbr": "ZCU;INRIA;LIRMM;;WSL", "aff_campus_unique_index": "1", "aff_campus_unique": ";Montpellier", "aff_country_unique_index": "0;1;1;1;1;1;2;1;1", "aff_country_unique": "Czech Republic;France;Switzerland" }, { "title": "Differentiable Quantum Computing for Large-scale Linear Control", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95915", "id": "GHqw3xLAvd", "proceeding": "", "pdf": "https://openreview.net/pdf?id=GHqw3xLAvd", "openreview": "https://openreview.net/forum?id=GHqw3xLAvd", "poster": "", "project": "", "author_site": "Connor Clayton, Jiaqi Leng, Gengzhi Yang, Yi-Ling Qiao, Ming Lin, Xiaodi Wu", "tldr": "", "abstract": "As industrial models and designs grow increasingly complex, the demand for optimal control of large-scale dynamical systems has significantly increased. However, traditional methods for optimal control incur significant overhead as problem dimensions grow. In this paper, we introduce an end-to-end quantum algorithm for linear-quadratic control with provable speedups. Our algorithm, based on a policy gradient method, incorporates a novel quantum subroutine for solving the matrix Lyapunov equation. Specifically, we build a *quantum-assisted differentiable simulator* for efficient gradient estimation that is more accurate and robust than classical methods relying on stochastic approximation. Compared to the classical approaches, our method achieves a *super-quadratic* speedup. To the best of our knowledge, this is the first end-to-end quantum application to linear control problems with provable quantum advantage.", "keywords": "quantum computing;quantum algorithms;optimal control;differentiable physics;reinforcement learning", "primary_area": "learning_theory", "supplementary_material": "/attachment/da5f69c62b0914a8108acffe0e2b1ad0e21747d5.zip", "author": "Connor Clayton;Jiaqi Leng;Gengzhi Yang;Yi-Ling Qiao;Ming Lin;Xiaodi Wu", "authorids": "~Connor_Clayton1;~Jiaqi_Leng1;~Gengzhi_Yang1;~Yi-Ling_Qiao1;~Ming_Lin2;~Xiaodi_Wu1", "gender": "M;M;;;F;M", "homepage": ";https://jiaqileng.github.io/;https://genz17.github.io/;;http://www.cs.umd.edu/~lin;https://www.cs.umd.edu/~xwu/index.html", "dblp": ";271/0195;;226/5117;l/MingCLin.html;66/8037", "google_scholar": ";Rr83RUsAAAAJ;;ghpLm2cAAAAJ;ugFNit4AAAAJ;", "orcid": ";;;;0000-0003-3736-6949;", "linkedin": "connor-clayton-cmu;;;;mlin2/;", "or_profile": "~Connor_Clayton1;~Jiaqi_Leng1;~Gengzhi_Yang1;~Yi-Ling_Qiao1;~Ming_Lin2;~Xiaodi_Wu1", "aff": "University of Maryland, College Park;University of Maryland, College Park;University of Maryland, College Park;University of Maryland, College Park;Amazon;Department of Computer Science, University of Maryland, College Park", "aff_domain": "umd.edu;umd.edu;umd.edu;umd.edu;amazon.com;cs.umd.edu", "position": "PhD student;PhD student;PhD student;PhD student;Amazon Scholar;Assistant Professor", "bibtex": "@inproceedings{\nclayton2024differentiable,\ntitle={Differentiable Quantum Computing for Large-scale Linear Control},\nauthor={Connor Clayton and Jiaqi Leng and Gengzhi Yang and Yi-Ling Qiao and Ming Lin and Xiaodi Wu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=GHqw3xLAvd}\n}", "github": "", "reviewers": "dSDS;v7Mi;oizh;7tHw", "pdf_size": 929795, "rating": "5;5;5;6", "confidence": "2;3;2;2", "soundness": "2;3;2;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "71;33;62;48", "wc_strengths": "42;46;36;14", "wc_weaknesses": "31;107;127;7", "wc_questions": "64;67;64;24", "wc_limitations": "24;9;12;1", "wc_review": "232;262;301;94", "wc_reply_reviewers": "16;65;0;0", "wc_reply_authors": "24;38;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "2;2;1;1", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 2.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 53.5, 14.396180048887969 ], "wc_strengths_avg": [ 34.5, 12.359207094308275 ], "wc_weaknesses_avg": [ 68.0, 50.22947341949744 ], "wc_questions_avg": [ 54.75, 17.795715776557007 ], "wc_limitations_avg": [ 11.5, 8.261355820929152 ], "wc_review_avg": [ 222.25, 77.9819690697792 ], "wc_reply_reviewers_avg": [ 20.25, 26.649343331496933 ], "wc_reply_authors_avg": [ 15.5, 16.27114009527298 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:YYJwvrdbk-MJ:scholar.google.com/&scioq=Differentiable+Quantum+Computing+for+Large-scale+Linear+Control&hl=en&as_sdt=0,10", "gs_version_total": 4, "email": "umd.edu;umd.edu;umd.edu;umd.edu;amazon.com;cs.umd.edu", "author_num": 6, "aff_unique_index": "0;0;0;0;1;2", "aff_unique_norm": "University of Maryland;Amazon;University of Maryland, College Park", "aff_unique_dep": ";Amazon.com, Inc.;Department of Computer Science", "aff_unique_url": "https://www/umd.edu;https://www.amazon.com;https://www/umd.edu", "aff_unique_abbr": "UMD;Amazon;UMD", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "College Park;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Beyond Efficiency: Molecular Data Pruning for Enhanced Generalization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95914", "id": "GJ0qIevGjD", "proceeding": "", "pdf": "https://openreview.net/pdf?id=GJ0qIevGjD", "openreview": "https://openreview.net/forum?id=GJ0qIevGjD", "poster": "/media/PosterPDFs/NeurIPS%202024/95914.png?t=1731835258.9571745", "project": "", "author_site": "Dingshuo Chen, Zhixun Li, Yuyan Ni, Guibin Zhang, Ding Wang, Qiang Liu, Shu Wu, Jeffrey Yu, Liang Wang", "tldr": "", "abstract": "With the emergence of various molecular tasks and massive datasets, how to perform efficient training has become an urgent yet under-explored issue in the area. Data pruning (DP), as an oft-stated approach to saving training burdens, filters out less influential samples to form a coreset for training. However, the increasing reliance on pretrained models for molecular tasks renders traditional in-domain DP methods incompatible. Therefore, we propose a **Mol**ecular data **P**runing framework for **e**nhanced **G**eneralization (**MolPeg**), which focuses on the source-free data pruning scenario, where data pruning is applied with pretrained models. By maintaining two models with different updating paces during training, we introduce a novel scoring function to measure the informativeness of samples based on the loss discrepancy. As a plug-and-play framework, MolPeg realizes the perception of both source and target domain and consistently outperforms existing DP methods across four downstream tasks. Remarkably, it can surpass the performance obtained from full-dataset training, even when pruning up to 60-70% of the data on HIV and PCBA dataset. Our work suggests that the discovery of effective data-pruning metrics could provide a viable path to both enhanced efficiency and superior generalization in transfer learning.", "keywords": "Dynamic Data Pruning;Training Acceleration;Molecular Representation Learning", "primary_area": "other", "supplementary_material": "/attachment/9d4a8eca949a3484480fd742cab4a070333518d7.zip", "author": "Dingshuo Chen;Zhixun Li;Yuyan Ni;Guibin Zhang;Ding Wang;Qiang Liu;Shu Wu;Jeffrey Xu Yu;Liang Wang", "authorids": "~Dingshuo_Chen1;~Zhixun_Li1;~Yuyan_Ni1;~Guibin_Zhang1;~Ding_Wang5;~Qiang_Liu8;~Shu_Wu1;~Jeffrey_Xu_Yu1;~Liang_Wang3", "gender": "M;M;;;M;M;M;M;M", "homepage": ";;https://nyyxxx.github.io/;;https://www.wangding.site/;https://john-qiangliu.tech/;http://www.shuwu.name;http://www.se.cuhk.edu.hk/people/yu.html;", "dblp": "289/7535;;117/6286;;;61/3234-6;06/3577;y/JXuYu;56/4499-1", "google_scholar": "jvrhEfIAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;;wI7u19YAAAAJ;https://scholar.google.co.jp/citations?user=D-lKLcMAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=iHevumsAAAAJ;", "orcid": ";0000-0001-6750-9002;;;;0000-0002-9233-3827;0000-0003-2164-3577;;", "linkedin": ";;;;;;;;", "or_profile": "~Dingshuo_Chen1;~Zhixun_Li1;~Yuyan_Ni1;~Guibin_Zhang1;~Ding_Wang5;~Qiang_Liu8;~Shu_Wu1;~Jeffrey_Xu_Yu1;~Liang_Wang3", "aff": "Institute of automation, Chinese Academy of Sciences;The Chinese University of Hong Kong;University of Chinese Academy of Sciences;;Shandong University;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;The Chinese University of Hong Kong;Institute of Automation\uff0c CAS\uff0cChina", "aff_domain": "ia.ac.cn;se.cuhk.edu.hk;ucas.ac.cn;;sdu.edu.cn;nlpr.ia.ac.cn;ia.ac.cn;cuhk.edu.hk;ia.ac.cn", "position": "PhD student;PhD student;PhD student;;Undergrad student;Associate Professor;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nchen2024beyond,\ntitle={Beyond Efficiency: Molecular Data Pruning for Enhanced Generalization},\nauthor={Dingshuo Chen and Zhixun Li and Yuyan Ni and Guibin Zhang and Ding Wang and Qiang Liu and Shu Wu and Jeffrey Xu Yu and Liang Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=GJ0qIevGjD}\n}", "github": "", "reviewers": "zkte;uU9Z;mAK4", "pdf_size": 641409, "rating": "5;6;7", "confidence": "4;3;4", "soundness": "3;2;4", "novelty": "2;3;3", "presentation": "3;3;4", "wc_summary": "47;86;63", "wc_strengths": "68;58;41", "wc_weaknesses": "142;96;55", "wc_questions": "1;32;2", "wc_limitations": "14;1;2", "wc_review": "272;273;163", "wc_reply_reviewers": "0;50;31", "wc_reply_authors": "77;338;28", "reply_reviewers": "0;1;1", "reply_authors": "2;4;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 65.33333333333333, 16.006942938057293 ], "wc_strengths_avg": [ 55.666666666666664, 11.145502331533658 ], "wc_weaknesses_avg": [ 97.66666666666667, 35.537148012873644 ], "wc_questions_avg": [ 11.666666666666666, 14.38363267359428 ], "wc_limitations_avg": [ 5.666666666666667, 5.9066817155564495 ], "wc_review_avg": [ 236.0, 51.62040940041707 ], "wc_reply_reviewers_avg": [ 27.0, 20.607442021431645 ], "wc_reply_authors_avg": [ 147.66666666666666, 136.06452717573217 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 0.9428090415820634 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3201881997457270361&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "ia.ac.cn;se.cuhk.edu.hk;ucas.ac.cn;;sdu.edu.cn;nlpr.ia.ac.cn;ia.ac.cn;cuhk.edu.hk;ia.ac.cn", "author_num": 9, "aff_unique_index": "0;1;2;3;0;0;1;0", "aff_unique_norm": "Chinese Academy of Sciences;Chinese University of Hong Kong;University of Chinese Academy of Sciences;Shandong University", "aff_unique_dep": "Institute of Automation;;;", "aff_unique_url": "http://www.ia.cas.cn;https://www.cuhk.edu.hk;http://www.ucas.ac.cn;http://www.sdu.edu.cn", "aff_unique_abbr": "CAS;CUHK;UCAS;SDU", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Language Models as Hierarchy Encoders", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95913", "id": "GJMYvWzjE1", "proceeding": "", "pdf": "https://openreview.net/pdf?id=GJMYvWzjE1", "openreview": "https://openreview.net/forum?id=GJMYvWzjE1", "poster": "/media/PosterPDFs/NeurIPS%202024/95913.png?t=1728673479.0182958", "project": "", "author_site": "Yuan He, Moy Yuan, Jiaoyan Chen, Ian Horrocks", "tldr": "", "abstract": "Interpreting hierarchical structures latent in language is a key limitation of current language models (LMs). While previous research has implicitly leveraged these hierarchies to enhance LMs, approaches for their explicit encoding are yet to be explored. To address this, we introduce a novel approach to re-train transformer encoder-based LMs as Hierarchy Transformer encoders (HiTs), harnessing the expansive nature of hyperbolic space. Our method situates the output embedding space of pre-trained LMs within a Poincar\u00e9 ball with a curvature that adapts to the embedding dimension, followed by re-training on hyperbolic clustering and centripetal losses. These losses are designed to effectively cluster related entities (input as texts) and organise them hierarchically. We evaluate HiTs against pre-trained LMs, standard fine-tuned LMs, and several hyperbolic embedding baselines, focusing on their capabilities in simulating transitive inference, predicting subsumptions, and transferring knowledge across hierarchies. The results demonstrate that HiTs consistently outperform all baselines in these tasks, underscoring the effectiveness and transferability of our re-trained hierarchy encoders.", "keywords": "Language Models;Transformer Encoders;Hierarchy Encoders;Hyperbolic Embedding", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/c9cb768a0d996947343c50bc7271ae2843facd79.zip", "author": "Yuan He;Moy Yuan;Jiaoyan Chen;Ian Horrocks", "authorids": "~Yuan_He5;~Moy_Yuan1;~Jiaoyan_Chen1;~Ian_Horrocks1", "gender": "M;M;;", "homepage": "https://www.yuanhe.wiki/;https://chenjiaoyan.github.io/;http://www.cs.ox.ac.uk/ian.horrocks/;https://www.moyyuan.com", "dblp": "11/1735-8;56/8110-1;h/IanHorrocks;331/2259", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.ch/citations?user=5Cy4z8wAAAAJ;0ypdmcYAAAAJ;", "orcid": "0000-0002-4486-1262;0000-0003-4643-6750;;", "linkedin": "yuan-he-0557781aa/;;;", "or_profile": "~Yuan_He5;~Jiaoyan_Chen1;~Ian_Horrocks1;~Zhangdie_Yuan1", "aff": "University of Oxford;University of Oxford;University of Oxford;University of Cambridge", "aff_domain": "cs.ox.ac.uk;cs.ox.ac.uk;ox.ac.uk;cam.ac.uk", "position": "PhD student;Senior Researcher;Full Professor;PhD student", "bibtex": "@inproceedings{\nhe2024language,\ntitle={Language Models as Hierarchy Encoders},\nauthor={Yuan He and Moy Yuan and Jiaoyan Chen and Ian Horrocks},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=GJMYvWzjE1}\n}", "github": "", "reviewers": "c8m8;DpLC;fC9f;9j2x", "pdf_size": 1048574, "rating": "4;5;5;6", "confidence": "3;3;4;4", "soundness": "3;3;3;3", "novelty": "2;2;2;3", "presentation": "3;3;3;4", "wc_summary": "31;135;86;54", "wc_strengths": "66;81;36;38", "wc_weaknesses": "80;249;53;29", "wc_questions": "2;40;14;3", "wc_limitations": "1;1;1;9", "wc_review": "180;506;190;133", "wc_reply_reviewers": "0;92;9;5", "wc_reply_authors": "0;25;19;13", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 76.5, 39.01602234979881 ], "wc_strengths_avg": [ 55.25, 19.01808349965895 ], "wc_weaknesses_avg": [ 102.75, 86.34342765955033 ], "wc_questions_avg": [ 14.75, 15.31951369985353 ], "wc_limitations_avg": [ 3.0, 3.4641016151377544 ], "wc_review_avg": [ 252.25, 148.07493879789382 ], "wc_reply_reviewers_avg": [ 26.5, 37.95062581829185 ], "wc_reply_authors_avg": [ 14.25, 9.256754290786809 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.7071067811865476, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10362281598636036660&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 7, "email": "cs.ox.ac.uk;cs.ox.ac.uk;ox.ac.uk;cam.ac.uk", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "University of Oxford;University of Cambridge", "aff_unique_dep": ";", "aff_unique_url": "https://www.ox.ac.uk;https://www.cam.ac.uk", "aff_unique_abbr": "Oxford;Cambridge", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "On the Convergence of Loss and Uncertainty-based Active Learning Algorithms", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95912", "id": "GLUIuli3Sm", "proceeding": "", "pdf": "https://openreview.net/pdf?id=GLUIuli3Sm", "openreview": "https://openreview.net/forum?id=GLUIuli3Sm", "poster": "/media/PosterPDFs/NeurIPS%202024/95912.png?t=1732281458.479405", "project": "", "author_site": "Daniel Haimovich, Dima Karamshuk, Fridolin Linder, Niek Tax, Milan Vojnovic", "tldr": "", "abstract": "We investigate the convergence rates and data sample sizes required for training a machine learning model using a stochastic gradient descent (SGD) algorithm, where data points are sampled based on either their loss value or uncertainty value. These training methods are particularly relevant for active learning and data subset selection problems. For SGD with a constant step size update, we present convergence results for linear classifiers and linearly separable datasets using squared hinge loss and similar training loss functions. Additionally, we extend our analysis to more general classifiers and datasets, considering a wide range of loss-based sampling strategies and smooth convex training loss functions. We propose a novel algorithm called Adaptive-Weight Sampling (AWS) that utilizes SGD with an adaptive step size that achieves stochastic Polyak's step size in expectation. We establish convergence rate results for AWS for smooth convex training loss functions. Our numerical experiments demonstrate the efficiency of AWS on various datasets by using either exact or estimated loss values.", "keywords": "active learning;uncertainty sampling;loss-based sampling", "primary_area": "active_learning", "supplementary_material": "", "author": "Daniel Haimovich;Dima Karamshuk;Fridolin Linder;Niek Tax;Milan Vojnovic", "authorids": "~Daniel_Haimovich1;~Dima_Karamshuk1;~Fridolin_Linder1;~Niek_Tax1;~Milan_Vojnovic1", "gender": ";M;M;M;M", "homepage": ";https://karamsh.uk/;;;https://personal.lse.ac.uk/vojnovic/", "dblp": "274/1714.html;;;143/7361;00/1815", "google_scholar": ";;aQUGTp0AAAAJ;XkRvCC4AAAAJ;https://scholar.google.co.uk/citations?user=z4JhSBwAAAAJ", "orcid": ";;;0000-0001-7239-5206;", "linkedin": "daniel-haimovich-b9a49bb5/;;fridolinlinder/;niektax/;", "or_profile": "~Daniel_Haimovich1;~Dima_Karamshuk1;~Fridolin_Linder1;~Niek_Tax1;~Milan_Vojnovic1", "aff": ";Meta;;Meta Facebook;London School of Economics", "aff_domain": ";meta.com;;facebook.com;lse.ac.uk", "position": ";Researcher;;Researcher;Professor", "bibtex": "@inproceedings{\nhaimovich2024on,\ntitle={On the Convergence of Loss and Uncertainty-based Active Learning Algorithms},\nauthor={Daniel Haimovich and Dima Karamshuk and Fridolin Linder and Niek Tax and Milan Vojnovic},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=GLUIuli3Sm}\n}", "github": "", "reviewers": "7aAT;UMCS;ie4P", "pdf_size": 1420393, "rating": "5;6;6", "confidence": "4;4;2", "soundness": "2;3;3", "novelty": "2;3;3", "presentation": "2;3;2", "wc_summary": "325;95;62", "wc_strengths": "103;24;86", "wc_weaknesses": "220;30;229", "wc_questions": "327;117;62", "wc_limitations": "2;1;13", "wc_review": "977;267;452", "wc_reply_reviewers": "31;266;31", "wc_reply_authors": "282;1723;0", "reply_reviewers": "1;2;1", "reply_authors": "3;6;1", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 160.66666666666666, 116.979580364362 ], "wc_strengths_avg": [ 71.0, 33.95094500402996 ], "wc_weaknesses_avg": [ 159.66666666666666, 91.76176884859088 ], "wc_questions_avg": [ 168.66666666666666, 114.18795421973759 ], "wc_limitations_avg": [ 5.333333333333333, 5.436502143433364 ], "wc_review_avg": [ 565.3333333333334, 300.7305918740042 ], "wc_reply_reviewers_avg": [ 109.33333333333333, 110.78006238589245 ], "wc_reply_authors_avg": [ 668.3333333333334, 754.5958447333836 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 3.3333333333333335, 2.0548046676563256 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:MI-b-tb6-jsJ:scholar.google.com/&scioq=On+the+Convergence+of+Loss+and+Uncertainty-based+Active+Learning+Algorithms&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": ";meta.com;;facebook.com;lse.ac.uk", "author_num": 5, "aff_unique_index": "0;0;1", "aff_unique_norm": "Meta;London School of Economics", "aff_unique_dep": "Meta Platforms, Inc.;", "aff_unique_url": "https://meta.com;https://www.lse.ac.uk", "aff_unique_abbr": "Meta;LSE", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;United Kingdom" }, { "title": "DeepITE: Designing Variational Graph Autoencoders for Intervention Target Estimation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95911", "id": "GMsi9966DR", "proceeding": "", "pdf": "https://openreview.net/pdf?id=GMsi9966DR", "openreview": "https://openreview.net/forum?id=GMsi9966DR", "poster": "/media/PosterPDFs/NeurIPS%202024/95911.png?t=1733802063.986854", "project": "", "author_site": "Hongyuan Tao, Hang Yu, Jianguo Li", "tldr": "", "abstract": "Intervention Target Estimation (ITE) is vital for both understanding and decision-making in complex systems, yet it remains underexplored. Current ITE methods are hampered by their inability to learn from distinct intervention instances collaboratively and to incorporate rich insights from labeled data, which leads to inefficiencies such as the need for re-estimation of intervention targets with minor data changes or alterations in causal graphs. In this paper, we propose DeepITE, an innovative deep learning framework designed around a variational graph autoencoder. DeepITE can concurrently learn from both unlabeled and labeled data with different intervention targets and causal graphs, harnessing correlated information in a self or semi-supervised manner. The model's inference capabilities allow for the immediate identification of intervention targets on unseen samples and novel causal graphs, circumventing the need for retraining. Our extensive testing confirms that DeepITE not only surpasses 13 baseline methods in the Recall@k metric but also demonstrates expeditious inference times, particularly on large graphs. Moreover, incorporating a modest fraction of labeled data (5-10\\%) substantially enhances DeepITE's performance, further solidifying its practical applicability. Our source code is available at https://github.com/alipay/DeepITE.", "keywords": "Intervention Target Estimation; Intervention Recognition; VGAE; semi-supervised; self-supervised", "primary_area": "causal_inference", "supplementary_material": "", "author": "Hongyuan Tao;Hang Yu;Jianguo Li", "authorids": "~Hongyuan_Tao1;~Hang_Yu1;~Jianguo_Li2", "gender": "M;M;M", "homepage": ";;https://sites.google.com/site/leeplus/", "dblp": "285/7135.html;74/2568-2;70/6237", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;n44GlFcAAAAJ", "orcid": ";;", "linkedin": ";hang-yu-7ba38844/;", "or_profile": "~Hongyuan_Tao1;~Hang_Yu1;~jianguo_Li1", "aff": "Tsinghua University;Ant Group;Ant Group", "aff_domain": "tsinghua.edu.cn;antgroup.com;antgroup.com", "position": "MS student;Senior Algorithm Expert;Director", "bibtex": "@inproceedings{\ntao2024deepite,\ntitle={Deep{ITE}: Designing Variational Graph Autoencoders for Intervention Target Estimation},\nauthor={Hongyuan Tao and Hang Yu and Jianguo Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=GMsi9966DR}\n}", "github": "", "reviewers": "bhQC;HUbk;q8ww;GccQ", "pdf_size": 1141853, "rating": "4;5;5;6", "confidence": "3;3;4;3", "soundness": "2;3;3;3", "novelty": "2;3;2;3", "presentation": "2;3;2;4", "wc_summary": "83;93;48;113", "wc_strengths": "129;149;44;60", "wc_weaknesses": "211;395;144;68", "wc_questions": "72;158;206;142", "wc_limitations": "1;1;15;8", "wc_review": "496;796;457;391", "wc_reply_reviewers": "0;32;168;131", "wc_reply_authors": "69;85;356;522", "reply_reviewers": "0;1;1;1", "reply_authors": "2;3;3;3", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 84.25, 23.551804601770964 ], "wc_strengths_avg": [ 95.5, 44.432533126077786 ], "wc_weaknesses_avg": [ 204.5, 121.06300012803251 ], "wc_questions_avg": [ 144.5, 48.02863729068315 ], "wc_limitations_avg": [ 6.25, 5.80409338312195 ], "wc_review_avg": [ 535.0, 155.29166107682667 ], "wc_reply_reviewers_avg": [ 82.75, 68.95424207400151 ], "wc_reply_authors_avg": [ 258.0, 190.36149820801475 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:duXoe-8G5cEJ:scholar.google.com/&scioq=DeepITE:+Designing+Variational+Graph+Autoencoders+for+Intervention+Target+Estimation&hl=en&as_sdt=0,47", "gs_version_total": 0, "email": "tsinghua.edu.cn;antgroup.com;antgroup.com", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Tsinghua University;Ant Group", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.antgroup.com", "aff_unique_abbr": "THU;Ant Group", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Antigen-Specific Antibody Design via Direct Energy-based Preference Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95910", "id": "GN2GXjPyN8", "proceeding": "", "pdf": "https://openreview.net/pdf?id=GN2GXjPyN8", "openreview": "https://openreview.net/forum?id=GN2GXjPyN8", "poster": "/media/PosterPDFs/NeurIPS%202024/95910.png?t=1731581733.305357", "project": "", "author_site": "Xiangxin Zhou, Dongyu Xue, Ruizhe Chen, Zaixiang Zheng, Liang Wang, Quanquan Gu", "tldr": "", "abstract": "Antibody design, a crucial task with significant implications across various disciplines such as therapeutics and biology, presents considerable challenges due to its intricate nature. In this paper, we tackle antigen-specific antibody sequence-structure co-design as an optimization problem towards specific preferences, considering both rationality and functionality. Leveraging a pre-trained conditional diffusion model that jointly models sequences and structures of antibodies with equivariant neural networks, we propose direct energy-based preference optimization to guide the generation of antibodies with both rational structures and considerable binding affinities to given antigens. Our method involves fine-tuning the pre-trained diffusion model using a residue-level decomposed energy preference. Additionally, we employ gradient surgery to address conflicts between various types of energy, such as attraction and repulsion. Experiments on RAbD benchmark show that our approach effectively optimizes the energy of generated antibodies and achieves state-of-the-art performance in designing high-quality antibodies with low total energy and high binding affinity simultaneously, demonstrating the superiority of our approach.", "keywords": "direct preference optimization;diffusion model;antibody design", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Xiangxin Zhou;Dongyu Xue;Ruizhe Chen;Zaixiang Zheng;Liang Wang;Quanquan Gu", "authorids": "~Xiangxin_Zhou1;~Dongyu_Xue1;~Ruizhe_Chen3;~Zaixiang_Zheng2;~Liang_Wang3;~Quanquan_Gu1", "gender": "Not Specified;M;;M;M;M", "homepage": ";;;https://zhengzx-nlp.github.io/;;http://web.cs.ucla.edu/~qgu/", "dblp": "247/9275;;;205/2769;56/4499-1;50/4597", "google_scholar": "eQgIWcQAAAAJ;;;JPSrehMAAAAJ;;GU9HgNAAAAAJ", "orcid": ";0000-0003-1896-4222;0000-0002-0370-1203;;;", "linkedin": ";;;;;", "or_profile": "~Xiangxin_Zhou1;~Dongyu_Xue1;~Ruizhe_Chen3;~Zaixiang_Zheng2;~Liang_Wang3;~Quanquan_Gu1", "aff": "Institute of Automation, Chinese Academy of Sciences;ByteDance AI Lab;Hunan University;ByteDance Research;Institute of Automation\uff0c CAS\uff0cChina;University of California, Los Angeles", "aff_domain": "ia.ac.cn;bytedance.com;hnu.edu.cn;bytedance.com;ia.ac.cn;cs.ucla.edu", "position": "PhD student;Researcher;MS student;Research Scientist;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nzhou2024antigenspecific,\ntitle={Antigen-Specific Antibody Design via Direct Energy-based Preference Optimization},\nauthor={Xiangxin Zhou and Dongyu Xue and Ruizhe Chen and Zaixiang Zheng and Liang Wang and Quanquan Gu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=GN2GXjPyN8}\n}", "github": "", "reviewers": "FkeK;i9FY;X6SE;d9wd", "pdf_size": 2327233, "rating": "5;5;6;7", "confidence": "3;3;4;4", "soundness": "3;3;2;3", "novelty": "2;3;2;3", "presentation": "2;3;3;3", "wc_summary": "66;30;161;61", "wc_strengths": "94;18;80;95", "wc_weaknesses": "75;188;170;134", "wc_questions": "76;29;102;22", "wc_limitations": "5;1;1;4", "wc_review": "316;266;514;316", "wc_reply_reviewers": "16;43;13;44", "wc_reply_authors": "0;99;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 79.5, 49.03315205042401 ], "wc_strengths_avg": [ 71.75, 31.594105462886585 ], "wc_weaknesses_avg": [ 141.75, 43.16465568031326 ], "wc_questions_avg": [ 57.25, 33.14645531576491 ], "wc_limitations_avg": [ 2.75, 1.7853571071357126 ], "wc_review_avg": [ 353.0, 95.16827202382105 ], "wc_reply_reviewers_avg": [ 29.0, 14.543039572248986 ], "wc_reply_authors_avg": [ 24.75, 42.868257487329714 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.9045340337332909, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5828664415805720621&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "ia.ac.cn;bytedance.com;hnu.edu.cn;bytedance.com;ia.ac.cn;cs.ucla.edu", "author_num": 6, "aff_unique_index": "0;1;2;1;0;3", "aff_unique_norm": "Chinese Academy of Sciences;ByteDance;Hunan University;University of California, Los Angeles", "aff_unique_dep": "Institute of Automation;AI Lab;;", "aff_unique_url": "http://www.ia.cas.cn;https://www.bytedance.com;http://www.hunu.edu.cn/;https://www.ucla.edu", "aff_unique_abbr": "CAS;ByteDance;HNU;UCLA", "aff_campus_unique_index": "1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;0;0;0;0;1", "aff_country_unique": "China;United States" }, { "title": "MR-Ben: A Meta-Reasoning Benchmark for Evaluating System-2 Thinking in LLMs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95909", "id": "GN2qbxZlni", "proceeding": "", "pdf": "https://openreview.net/pdf?id=GN2qbxZlni", "openreview": "https://openreview.net/forum?id=GN2qbxZlni", "poster": "/media/PosterPDFs/NeurIPS%202024/95909.png?t=1733721484.6146278", "project": "", "author_site": "Zhongshen Zeng, Yinhong Liu, Yingjia Wan, Jingyao Li, Pengguang Chen, Jianbo Dai, Yuxuan Yao, Rongwu Xu, Zehan Qi, Wanru Zhao, Linling Shen, Jianqiao Lu, Haochen Tan, Yukang Chen, Hao Zhang, Zhan Shi, Bailin Wang, Zhijiang Guo, Jiaya Jia", "tldr": "", "abstract": "Large language models (LLMs) have shown increasing capability in problem-solving and decision-making, largely based on the step-by-step chain-of-thought reasoning processes. However, evaluating these reasoning abilities has become increasingly challenging. Existing outcome-based benchmarks are beginning to saturate, becoming less effective in tracking meaningful progress. To address this, we present a process-based benchmark MR-Ben that demands a meta-reasoning skill, where LMs are asked to locate and analyse potential errors in automatically generated reasoning steps. Our meta-reasoning paradigm is especially suited for system-2 slow thinking, mirroring the human cognitive process of carefully examining assumptions, conditions, calculations, and logic to identify mistakes. MR-Ben comprises 5,975 questions curated by human experts across a wide range of subjects, including physics, chemistry, logic, coding, and more. Through our designed metrics for assessing meta-reasoning on this benchmark, we identify interesting limitations and weaknesses of current LLMs (open-source and closed-source models). For example, with models like the o1 series from OpenAI demonstrating strong performance by effectively scrutinizing the solution space, many other state-of-the-art models fall significantly behind on MR-Ben, exposing potential shortcomings in their training strategies and inference methodologies.", "keywords": "Large Language Models;Reasoning;System-2;Slow Thinking;Resource and Evaluation;Analysis and Interpretability", "primary_area": "evaluation", "supplementary_material": "/attachment/fcad285f52468fc3421194c1c7d944b1659b33e3.zip", "author": "Zhongshen Zeng;Yinhong Liu;Yingjia Wan;Jingyao Li;Pengguang Chen;Jianbo Dai;Yuxuan Yao;Rongwu Xu;Zehan Qi;Wanru Zhao;Linling Shen;Jianqiao Lu;Haochen Tan;Yukang Chen;Hao Zhang;Zhan Shi;Bailin Wang;Zhijiang Guo;Jiaya Jia", "authorids": "~Zhongshen_Zeng1;~Yinhong_Liu1;~Yingjia_Wan2;~Jingyao_Li2;~Pengguang_Chen1;~Jianbo_Dai1;~Yuxuan_Yao2;~Rongwu_Xu1;~Zehan_Qi2;~Wanru_Zhao1;~Linling_Shen1;~Jianqiao_Lu1;~Haochen_Tan1;~Yukang_Chen1;~Hao_Zhang3;~Zhan_Shi3;~Bailin_Wang3;~Zhijiang_Guo2;~Jiaya_Jia1", "gender": "M;M;;F;M;M;;M;;;F;M;M;M;M;M;;M;M", "homepage": "https://www.linkedin.com/in/zhongshen-zeng;http://yinhongliu.com;;https://julietljy.github.io/;https://github.com/akuxcw;https://1e0ndavid.github.io/;;http://rongwuxu.com;;;;https://jianqiaolu.github.io/;https://scholars.cityu.edu.hk/en/persons/haochen-tan(6f087d1a-f724-44a4-83b4-9f3064fc52b7)/publications.html;https://yukangchen.com/;https://26hzhang.github.io/;https://aleczhanshi.github.io/;;https://cartus.github.io/;https://jiaya.me", "dblp": ";88/8538;;;189/7442.html;178/2976;;315/6942;;;;358/4791;269/9939;225/4601;55/2270-48;;;43/6147;31/5649", "google_scholar": ";fHQKF_AAAAAJ;;https://scholar.google.com/citations?hl=en;https://scholar.google.com.hk/citations?user=lMnVrgIAAAAJ;jU1A5BYAAAAJ;;HyjNrDMAAAAJ;;;;uIW6d6AAAAAJ;;6p0ygKUAAAAJ;u7nrZOUAAAAJ;w2I-wNQAAAAJ;;8b-u3icAAAAJ;https://scholar.google.com.tw/citations?user=XPAkzTEAAAAJ", "orcid": ";;;;;0009-0005-3183-888X;;;;;;;;;0000-0002-2725-6458;;;;", "linkedin": "zhongshen-zeng;yinhong-liu-3a4aa6136/;;;;jianbo-dai-9085a2161/;;;;;https://www.linkedin.com/me?trk=p_mwlite_feed-secondary_nav;jianqiao-lu-308620201/;;;https://linkedin.com/in/hzhang26;;;;", "or_profile": "~Zhongshen_Zeng1;~Yinhong_Liu1;~Yingjia_Wan2;~Jingyao_Li2;~Pengguang_Chen1;~Jianbo_Dai1;~Yuxuan_Yao2;~Rongwu_Xu1;~Zehan_Qi2;~Wanru_Zhao1;~Linling_Shen1;~Jianqiao_Lu1;~Haochen_Tan1;~Yukang_Chen1;~Hao_Zhang3;~Zhan_Shi3;~Bailin_Wang3;~Zhijiang_Guo2;~Jiaya_Jia1", "aff": "Department of Computer Science and Engineering, The Chinese University of Hong Kong;University of Cambridge;;Department of Computer Science and Engineering, The Chinese University of Hong Kong;SmartMore;Huawei Technologies Ltd.;;Tsinghua University;;;University of Texas at Austin;University of Hong Kong;City University of Hong Kong;NVIDIA;Huawei Technologies Ltd.;;;University of Cambridge;Department of Computer Science and Engineering, Hong Kong University of Science and Technology", "aff_domain": "cse.cuhk.edu.hk;cam.ac.uk;;cse.cuhk.edu.hk;smartmore.com;huawei.com;;tsinghua.edu.cn;;;utexas.edu;hku.hk;cityu.edu.hk;nvidia.com;huawei.com;;;cam.ac.uk;cse.ust.hk", "position": "PhD student;PhD student;;PhD student;Researcher;Researcher;;MS student;;;PhD student;PhD student;PhD student;Researcher;Principal Algorithm Engineer;;;Postdoc;Full Professor", "bibtex": "@inproceedings{\nzeng2024mrben,\ntitle={{MR}-Ben: A Meta-Reasoning Benchmark for Evaluating System-2 Thinking in {LLM}s},\nauthor={Zhongshen Zeng and Yinhong Liu and Yingjia Wan and Jingyao Li and Pengguang Chen and Jianbo Dai and Yuxuan Yao and Rongwu Xu and Zehan Qi and Wanru Zhao and Linling Shen and Jianqiao Lu and Haochen Tan and Yukang Chen and Hao Zhang and Zhan Shi and Bailin Wang and Zhijiang Guo and Jiaya Jia},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=GN2qbxZlni}\n}", "github": "", "reviewers": "JijF;AKte;TVR6;SioJ", "pdf_size": 1987339, "rating": "5;5;6;6", "confidence": "3;4;4;4", "soundness": "2;3;3;3", "novelty": "2;2;3;2", "presentation": "3;3;3;3", "wc_summary": "39;125;114;214", "wc_strengths": "66;121;104;76", "wc_weaknesses": "60;211;118;452", "wc_questions": "60;2;71;88", "wc_limitations": "1;2;1;23", "wc_review": "226;461;408;853", "wc_reply_reviewers": "17;0;16;137", "wc_reply_authors": "58;0;28;551", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;2;3", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 123.0, 62.092672675606416 ], "wc_strengths_avg": [ 91.75, 21.890351755967743 ], "wc_weaknesses_avg": [ 210.25, 149.60677625027552 ], "wc_questions_avg": [ 55.25, 32.32162588732194 ], "wc_limitations_avg": [ 6.75, 9.390819985496474 ], "wc_review_avg": [ 487.0, 228.57930789990593 ], "wc_reply_reviewers_avg": [ 42.5, 54.974994315597705 ], "wc_reply_authors_avg": [ 159.25, 227.10501425552013 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 19, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12463812151554143837&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "cse.cuhk.edu.hk;cam.ac.uk;;cse.cuhk.edu.hk;smartmore.com;huawei.com;;tsinghua.edu.cn;;;utexas.edu;hku.hk;cityu.edu.hk;nvidia.com;huawei.com;;;cam.ac.uk;cse.ust.hk", "author_num": 19, "aff_unique_index": "0;1;0;2;3;4;5;6;7;8;3;1;9", "aff_unique_norm": "Chinese University of Hong Kong;University of Cambridge;SmartMore;Huawei;Tsinghua University;University of Texas at Austin;University of Hong Kong;City University of Hong Kong;NVIDIA;Hong Kong University of Science and Technology", "aff_unique_dep": "Department of Computer Science and Engineering;;;Huawei Technologies;;;;;NVIDIA Corporation;Department of Computer Science and Engineering", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.cam.ac.uk;;https://www.huawei.com;https://www.tsinghua.edu.cn;https://www.utexas.edu;https://www.hku.hk;https://www.cityu.edu.hk;https://www.nvidia.com;https://www.ust.hk", "aff_unique_abbr": "CUHK;Cambridge;;Huawei;THU;UT Austin;HKU;CityU;NVIDIA;HKUST", "aff_campus_unique_index": "0;1;0;3;0;0;1;0", "aff_campus_unique": "Hong Kong SAR;Cambridge;;Austin", "aff_country_unique_index": "0;1;0;0;0;3;0;0;3;0;1;0", "aff_country_unique": "China;United Kingdom;;United States" }, { "title": "Visual Sketchpad: Sketching as a Visual Chain of Thought for Multimodal Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95908", "id": "GNSMl1P5VR", "proceeding": "", "pdf": "https://openreview.net/pdf?id=GNSMl1P5VR", "openreview": "https://openreview.net/forum?id=GNSMl1P5VR", "poster": "/media/PosterPDFs/NeurIPS%202024/95908.png?t=1733810514.7834277", "project": "", "author_site": "Yushi Hu, Weijia Shi, Xingyu Fu, Dan Roth, Mari Ostendorf, Luke Zettlemoyer, Noah Smith, Ranjay Krishna", "tldr": "", "abstract": "Humans draw to facilitate reasoning: we draw auxiliary lines when solving geometry problems; we mark and circle when reasoning on maps; we use sketches to amplify our ideas and relieve our limited-capacity working memory. However, such actions are missing in current multimodal language models (LMs). Current chain-of-thought and tool-use paradigms only use text as intermediate reasoning steps. In this work, we introduce Sketchpad, a framework that gives multimodal LMs a visual sketchpad and tools to draw on the sketchpad. The LM conducts planning and reasoning according to the visual artifacts it has drawn. Different from prior work, which uses text-to-image models to enable LMs to draw, Sketchpad enables LMs to draw with lines, boxes, marks, etc., which is closer to human sketching and better facilitates reasoning. \\name can also use specialist vision models during the sketching process (e.g., draw bounding boxes with object detection models, draw masks with segmentation models), to further enhance visual perception and reasoning. We experiment on a wide range of math tasks (including geometry, functions, graph, chess) and complex visual reasoning tasks. Sketchpad substantially improves performance on all tasks over strong base models with no sketching, yielding an average gain of 12.7% on math tasks, and 8.6% on vision tasks. GPT-4o with Sketchpad sets a new state of the art on all tasks, including V*Bench (80.3%), BLINK spatial reasoning (83.9%), and visual correspondence (80.8%). We will release all code and data.", "keywords": "multimodal;large language model;computer vision;reasoning", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/5f74eb235980ce3748566e5c18f1f071f62f3cce.zip", "author": "Yushi Hu;Weijia Shi;Xingyu Fu;Dan Roth;Mari Ostendorf;Luke Zettlemoyer;Noah A. Smith;Ranjay Krishna", "authorids": "~Yushi_Hu1;~Weijia_Shi1;~Xingyu_Fu1;~Dan_Roth3;~Mari_Ostendorf1;~Luke_Zettlemoyer1;~Noah_A._Smith2;~Ranjay_Krishna1", "gender": "M;;F;M;F;M;M;M", "homepage": "https://yushi-hu.github.io;https://weijiashi.notion.site/;https://zeyofu.github.io/;https://www.cis.upenn.edu/~danroth/;https://people.ece.uw.edu/ostendorf/;https://www.cs.washington.edu/people/faculty/lsz/;http://ranjaykrishna.com;https://homes.cs.washington.edu/~nasmith/", "dblp": "268/5766;132/80601;118/4769;r/DanRoth;85/2189;21/6793;167/3785;90/5204.html", "google_scholar": "mXN51X0AAAAJ;https://scholar.google.com/citations?hl=en;5p_uBNQAAAAJ;E-bpPWgAAAAJ;exS-GecAAAAJ;https://scholar.google.com.tw/citations?user=UjpbO6IAAAAJ;IcqahyAAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";0000-3200-0000-0011;0009-0006-9533-8427;;0000-0001-9385-9655;;0000-0001-8784-2531;0000-0002-2310-6380", "linkedin": ";weijia-shi-773768112;xingyu-fu-cheers;dan-roth-8667361/;mari-ostendorf-66820a1/;luke-zettlemoyer-a0109b226/;ranjay-krishna-1a344444/;", "or_profile": "~Yushi_Hu1;~Weijia_Shi1;~Xingyu_Fu1;~Dan_Roth3;~Mari_Ostendorf1;~Luke_Zettlemoyer1;~Ranjay_Krishna1;~Noah_Smith1", "aff": "University of Washington;University of Washington, Seattle;University of Pennsylvania;Amazon;University of Washington;Meta;University of Washington;Allen Institute for Artificial Intelligence", "aff_domain": "uw.edu;uw.edu;seas.upenn.edu;amazon.com;u.washington.edu;meta.com;cs.washington.edu;allenai.org", "position": "PhD student;PhD student;PhD student;VP and Distinguished Scientist;Full Professor;Researcher;Assistant Professor;Senior Director of NLP Research", "bibtex": "@inproceedings{\nhu2024visual,\ntitle={Visual Sketchpad: Sketching as a Visual Chain of Thought for Multimodal Language Models},\nauthor={Yushi Hu and Weijia Shi and Xingyu Fu and Dan Roth and Mari Ostendorf and Luke Zettlemoyer and Noah A. Smith and Ranjay Krishna},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=GNSMl1P5VR}\n}", "github": "", "reviewers": "jSat;2LyD;nzNU;BpTp", "pdf_size": 8523346, "rating": "5;5;5;7", "confidence": "4;3;4;4", "soundness": "3;4;3;3", "novelty": "3;2;2;3", "presentation": "3;3;3;4", "wc_summary": "98;143;53;85", "wc_strengths": "183;149;60;66", "wc_weaknesses": "197;180;76;38", "wc_questions": "7;42;110;31", "wc_limitations": "9;8;2;1", "wc_review": "494;522;301;221", "wc_reply_reviewers": "15;52;0;0", "wc_reply_authors": "46;46;46;46", "reply_reviewers": "1;1;0;0", "reply_authors": "2;2;2;2", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 94.75, 32.31389020220252 ], "wc_strengths_avg": [ 114.5, 52.92683629313205 ], "wc_weaknesses_avg": [ 122.75, 67.37720311796862 ], "wc_questions_avg": [ 47.5, 38.23937760999779 ], "wc_limitations_avg": [ 5.0, 3.5355339059327378 ], "wc_review_avg": [ 384.5, 127.08363387942603 ], "wc_reply_reviewers_avg": [ 16.75, 21.25294097295713 ], "wc_reply_authors_avg": [ 46.0, 0.0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 41, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4806526475848658211&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "uw.edu;uw.edu;seas.upenn.edu;amazon.com;u.washington.edu;meta.com;cs.washington.edu;allenai.org", "author_num": 8, "aff_unique_index": "0;0;1;2;0;3;0;4", "aff_unique_norm": "University of Washington;University of Pennsylvania;Amazon;Meta;Allen Institute for Artificial Intelligence", "aff_unique_dep": ";;Amazon.com, Inc.;Meta Platforms, Inc.;", "aff_unique_url": "https://www.washington.edu;https://www.upenn.edu;https://www.amazon.com;https://meta.com;https://allenai.org", "aff_unique_abbr": "UW;UPenn;Amazon;Meta;AI2", "aff_campus_unique_index": "1", "aff_campus_unique": ";Seattle", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Trap-MID: Trapdoor-based Defense against Model Inversion Attacks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95907", "id": "GNhrGRCerd", "proceeding": "", "pdf": "https://openreview.net/pdf?id=GNhrGRCerd", "openreview": "https://openreview.net/forum?id=GNhrGRCerd", "poster": "/media/PosterPDFs/NeurIPS%202024/95907.png?t=1733061541.4708605", "project": "", "author_site": "ZhenTing Liu, ShangTse Chen", "tldr": "", "abstract": "Model Inversion (MI) attacks pose a significant threat to the privacy of Deep Neural Networks by recovering training data distribution from well-trained models. While existing defenses often rely on regularization techniques to reduce information leakage, they remain vulnerable to recent attacks. In this paper, we propose the Trapdoor-based Model Inversion Defense (Trap-MID) to mislead MI attacks. A trapdoor is integrated into the model to predict a specific label when the input is injected with the corresponding trigger. Consequently, this trapdoor information serves as the \"shortcut\" for MI attacks, leading them to extract trapdoor triggers rather than private data. We provide theoretical insights into the impacts of trapdoor's effectiveness and naturalness on deceiving MI attacks. In addition, empirical experiments demonstrate the state-of-the-art defense performance of Trap-MID against various MI attacks without the requirements for extra data or large computational overhead. Our source code is publicly available at https://github.com/ntuaislab/Trap-MID.", "keywords": "model inversion attacks;privacy;defense;trapdoor;backdoor", "primary_area": "privacy", "supplementary_material": "/attachment/fbe8463cb74c3649041c1c1a09aa74e324b20428.zip", "author": "Zhen-Ting Liu;Shang-Tse Chen", "authorids": "~Zhen-Ting_Liu1;~Shang-Tse_Chen1", "gender": "M;M", "homepage": ";https://www.csie.ntu.edu.tw/~stchen", "dblp": "396/7578.html;24/9381", "google_scholar": "yXlHFbgAAAAJ;TLfsJRwAAAAJ", "orcid": ";", "linkedin": "zhen-ting-liu/;shang-tse-chen-5a908627/", "or_profile": "~Zhen-Ting_Liu1;~Shang-Tse_Chen1", "aff": "Department of computer science and informational engineering, National Taiwan University;National Taiwan University", "aff_domain": "csie.ntu.edu.tw;ntu.edu.tw", "position": "MS student;Assistant Professor", "bibtex": "@inproceedings{\nliu2024trapmid,\ntitle={Trap-{MID}: Trapdoor-based Defense against Model Inversion Attacks},\nauthor={Zhen-Ting Liu and Shang-Tse Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=GNhrGRCerd}\n}", "github": "", "reviewers": "P6SC;BGpu;MuQ4;bcsy", "pdf_size": 16278268, "rating": "4;5;5;6", "confidence": "4;5;3;5", "soundness": "2;2;3;3", "novelty": "2;3;2;3", "presentation": "2;3;3;3", "wc_summary": "78;54;113;126", "wc_strengths": "26;22;121;150", "wc_weaknesses": "117;232;296;973", "wc_questions": "2;44;183;62", "wc_limitations": "1;1;39;7", "wc_review": "224;353;752;1318", "wc_reply_reviewers": "56;303;23;290", "wc_reply_authors": "22;768;75;594", "reply_reviewers": "1;2;1;2", "reply_authors": "2;3;3;3", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 92.75, 28.43743131859838 ], "wc_strengths_avg": [ 79.75, 56.70262339610046 ], "wc_weaknesses_avg": [ 404.5, 334.4312335892089 ], "wc_questions_avg": [ 72.75, 67.27323018853785 ], "wc_limitations_avg": [ 12.0, 15.7797338380595 ], "wc_review_avg": [ 661.75, 425.95796447536935 ], "wc_reply_reviewers_avg": [ 168.0, 129.1104178600627 ], "wc_reply_authors_avg": [ 364.75, 322.72230710008256 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.4264014327112209, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=201340345595085171&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 4, "email": "csie.ntu.edu.tw;ntu.edu.tw", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "National Taiwan University", "aff_unique_dep": "Department of Computer Science and Informational Engineering", "aff_unique_url": "https://www.ntu.edu.tw", "aff_unique_abbr": "NTU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Taiwan", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "IncomeSCM: From tabular data set to time-series simulator and causal estimation benchmark", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97776", "id": "GNhwwbZEZ7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=GNhwwbZEZ7", "openreview": "https://openreview.net/forum?id=GNhwwbZEZ7", "poster": "/media/PosterPDFs/NeurIPS%202024/97776.png?t=1733849121.8138845", "project": "", "author_site": "Fredrik Johansson", "tldr": "", "abstract": "Evaluating observational estimators of causal effects demands information that is rarely available: unconfounded interventions and outcomes from the population of interest, created either by randomization or adjustment. As a result, it is customary to fall back on simulators when creating benchmark tasks. Simulators offer great control but are often too simplistic to make challenging tasks, either because they are hand-designed and lack the nuances of real-world data, or because they are fit to observational data without structural constraints. In this work, we propose a general, repeatable strategy for turning observational data into sequential structural causal models and challenging estimation tasks by following two simple principles: 1) fitting real-world data where possible, and 2) creating complexity by composing simple, hand-designed mechanisms. We implement these ideas in a highly configurable software package and apply it to the well-known Adult income data set to construct the IncomeSCM simulator. From this, we devise multiple estimation tasks and sample data sets to compare established estimators of causal effects. The tasks present a suitable challenge, with effect estimates varying greatly in quality between methods, despite similar performance in the modeling of factual outcomes, highlighting the need for dedicated causal estimators and model selection criteria.", "keywords": "Causal effects;benchmark;structural causal model;interventions", "primary_area": "", "supplementary_material": "/attachment/da2193348b933e52cd7a704f92f612cfd587a1b4.zip", "author": "Fredrik D. Johansson", "authorids": "~Fredrik_D._Johansson1", "gender": "M", "homepage": "http://www.fredjo.com", "dblp": "58/1342-2", "google_scholar": "ml-AyBQAAAAJ", "orcid": "", "linkedin": "", "or_profile": "~Fredrik_Daniel_Johansson1", "aff": "Chalmers University of Technology", "aff_domain": "chalmers.se", "position": "Associate Professor", "bibtex": "@inproceedings{\njohansson2024incomescm,\ntitle={Income{SCM}: From tabular data set to time-series simulator and causal estimation benchmark},\nauthor={Fredrik D. Johansson},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=GNhwwbZEZ7}\n}", "github": "", "reviewers": "6W4j;rn5w;dKd7", "pdf_size": 604204, "rating": "5;6;7", "confidence": "4;4;4", "wc_summary_and_contributions": "133;64;84", "wc_strengths": "51;41;10", "wc_improvement": "142;176;10", "wc_limitations": "105;30;3", "wc_correctness": "37;21;8", "wc_clarity": "63;12;8", "wc_relation_to_prior_work": "8;5;9", "wc_documentation": "29;15;1", "wc_additional_feedback": "1;1;1", "wc_review": "569;365;134", "wc_reply_reviewers": "0;177;362", "wc_reply_authors": "0;128;185", "reply_reviewers": "0;1;1", "reply_authors": "1;2;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 4.0, 0.0 ], "wc_summary_and_contributions_avg": [ 93.66666666666667, 28.986586936412884 ], "wc_strengths_avg": [ 34.0, 17.45470328211473 ], "wc_improvement_avg": [ 109.33333333333333, 71.59764117778059 ], "wc_limitations_avg": [ 46.0, 43.15089802078283 ], "wc_correctness_avg": [ 22.0, 11.86029791643813 ], "wc_clarity_avg": [ 27.666666666666668, 25.037749277618563 ], "wc_relation_to_prior_work_avg": [ 7.333333333333333, 1.699673171197595 ], "wc_documentation_avg": [ 15.0, 11.430952132988164 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 356.0, 177.70199773778572 ], "wc_reply_reviewers_avg": [ 179.66666666666666, 147.79791007393246 ], "wc_reply_authors_avg": [ 104.33333333333333, 77.3577547646144 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:SNzA2-JV4tkJ:scholar.google.com/&scioq=IncomeSCM:+From+tabular+data+set+to+time-series+simulator+and+causal+estimation+benchmark&hl=en&as_sdt=0,44", "gs_version_total": 4, "email": "chalmers.se", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "Chalmers University of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.chalmers.se", "aff_unique_abbr": "Chalmers", "aff_country_unique_index": "0", "aff_country_unique": "Sweden" }, { "title": "Simulation-Free Training of Neural ODEs on Paired Data", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95906", "id": "GOgKhunkfw", "proceeding": "", "pdf": "https://openreview.net/pdf?id=GOgKhunkfw", "openreview": "https://openreview.net/forum?id=GOgKhunkfw", "poster": "/media/PosterPDFs/NeurIPS%202024/95906.png?t=1733710266.2700424", "project": "", "author_site": "Semin Kim, Jaehoon Yoo, Jinwoo Kim, Yeonwoo Cha, Saehoon Kim, Seunghoon Hong", "tldr": "", "abstract": "In this work, we investigate a method for simulation-free training of Neural Ordinary Differential Equations (NODEs) for learning deterministic mappings between paired data. Despite the analogy of NODEs as continuous-depth residual networks, their application in typical supervised learning tasks has not been popular, mainly due to the large number of function evaluations required by ODE solvers and numerical instability in gradient estimation. To alleviate this problem, we employ the flow matching framework for simulation-free training of NODEs, which directly regresses the parameterized dynamics function to a predefined target velocity field. Contrary to generative tasks, however, we show that applying flow matching directly between paired data can often lead to an ill-defined flow that breaks the coupling of the data pairs (e.g., due to crossing trajectories). We propose a simple extension that applies flow matching in the embedding space of data pairs, where the embeddings are learned jointly with the dynamic function to ensure the validity of the flow which is also easier to learn. We demonstrate the effectiveness of our method on both regression and classification tasks, where our method outperforms existing NODEs with a significantly lower number of function evaluations. The code is available at https://github.com/seminkim/simulation-free-node.", "keywords": "Neural ODE;simulation-free training;flow matching", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/ccb64d4593a6a565b1c98634b63da22310a586ce.zip", "author": "Semin Kim;Jaehoon Yoo;Jinwoo Kim;Yeonwoo Cha;Saehoon Kim;Seunghoon Hong", "authorids": "~Semin_Kim1;~Jaehoon_Yoo1;~Jinwoo_Kim4;~Yeonwoo_Cha1;~Saehoon_Kim1;~Seunghoon_Hong2", "gender": "M;M;M;;M;M", "homepage": "https://seminkim.github.io;;https://jw9730.github.io/;https://github.com/Cha-Yeonwoo/;https://saehoonkim.github.io/;https://maga33.github.io/", "dblp": "22/7219;289/0340;;391/6246;43/10813;142/3014.html", "google_scholar": "https://scholar.google.co.kr/citations?user=ducPigcAAAAJ;L99pxh8AAAAJ;kSJAiE4AAAAJ;;https://scholar.google.com.sg/citations?user=_ZfueMIAAAAJ;hvr3ALkAAAAJ", "orcid": ";;;;;", "linkedin": ";;jw9730/;;saehoonkim/;seunghoon-hong-194489a4/", "or_profile": "~Semin_Kim1;~Jaehoon_Yoo1;~Jinwoo_Kim4;~Yeonwoo_Cha1;~Saehoon_Kim1;~Seunghoon_Hong1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Kakao Brain;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kakaobrain.com;kaist.ac.kr", "position": "MS student;PhD student;PhD student;Undergrad student;Researcher;Associate Professor", "bibtex": "@inproceedings{\nkim2024simulationfree,\ntitle={Simulation-Free Training of Neural {ODE}s on Paired Data},\nauthor={Semin Kim and Jaehoon Yoo and Jinwoo Kim and Yeonwoo Cha and Saehoon Kim and Seunghoon Hong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=GOgKhunkfw}\n}", "github": "", "reviewers": "YTiC;bNYt;4GBu;RQFX", "pdf_size": 1698095, "rating": "4;5;5;6", "confidence": "3;4;3;3", "soundness": "2;3;3;3", "novelty": "2;3;2;3", "presentation": "3;2;3;2", "wc_summary": "65;175;143;90", "wc_strengths": "15;64;55;62", "wc_weaknesses": "195;389;180;78", "wc_questions": "56;116;43;113", "wc_limitations": "4;1;16;13", "wc_review": "335;745;437;356", "wc_reply_reviewers": "389;219;17;0", "wc_reply_authors": "1075;484;28;0", "reply_reviewers": "1;2;1;0", "reply_authors": "4;3;2;1", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 118.25, 43.205179087697346 ], "wc_strengths_avg": [ 49.0, 19.912307751739878 ], "wc_weaknesses_avg": [ 210.5, 112.45999288635937 ], "wc_questions_avg": [ 82.0, 32.840523747346055 ], "wc_limitations_avg": [ 8.5, 6.18465843842649 ], "wc_review_avg": [ 468.25, 164.25799067320895 ], "wc_reply_reviewers_avg": [ 156.25, 159.62044825146933 ], "wc_reply_authors_avg": [ 396.75, 436.18308942461306 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6778620522586437764&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 4, "email": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kakaobrain.com;kaist.ac.kr", "author_num": 6, "aff_unique_index": "0;0;0;0;1;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Kakao Brain", "aff_unique_dep": ";", "aff_unique_url": "https://www.kaist.ac.kr;https://brain.kakao.com", "aff_unique_abbr": "KAIST;Kakao Brain", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "GP30inajOt", "title": "Retraction-free optimization over the Stiefel manifold with application to the LoRA fine-tuning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Optimization over the Stiefel manifold has played a significant role in various machine learning tasks. Many existing algorithms either use the retraction operator to keep each iterate staying on the manifold, or solve an unconstrained quadratic penalized problem. The retraction operator in the former corresponds to orthonormalization of matrices and can be computationally costly for large-scale matrices. The latter approach usually equips with an unknown large penalty parameter. To address the above issues, we propose a retraction-free and penalty parameter-free algorithm, which lands on the manifold. A key component of the analysis is the convex-like property of the quadratic penalty of the Stiefel manifold, which enables us to explicitly characterize the penalty parameter. As an application, we introduce a new algorithm, Manifold-LoRA, which employs the landing technique and a carefully designed step size strategy to accelerate low-rank adaptation (LoRA) in fine-tuning large language models. Numerical experiments on the benchmark datasets demonstrate the efficiency of our proposed method.", "keywords": "Manfold;Landing;LoRA;fine-tuning", "primary_area": "optimization_for_deep_networks", "supplementary_material": "/attachment/ea4f168f60ff45b9bae927c1c789ee0521bdeb3a.zip", "author": "Yuan Zhang;Jiang Hu;Jiaxi Cui;Lin Lin;Zaiwen Wen;Quanzheng Li", "authorids": "~Yuan_Zhang51;~Jiang_Hu2;~Jiaxi_Cui1;~Lin_Lin1;~Zaiwen_Wen1;~Quanzheng_Li1", "gender": "M;M;M;M;M;M", "homepage": "https://github.com/itongggg;https://hujiangpku.github.io/;https://github.com/JessyTsu1;https://math.berkeley.edu/~linlin/;http://bicmr.pku.edu.cn/~wenzw;https://camca.mgh.harvard.edu/people/faculty/", "dblp": ";;;;26/8184;", "google_scholar": ";WIlpQFwAAAAJ;QRV7CjgAAAAJ;;QfxrxDoAAAAJ;MHq2z7oAAAAJ", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Yuan_Zhang51;~Jiang_Hu2;~Jiaxi_Cui1;~Lin_Lin1;~Zaiwen_Wen1;~Quanzheng_Li1", "aff": "Peking University;Harvard University;;University of California, Berkeley;Peking University;Harvard University", "aff_domain": "pku.edu;harvard.edu;;berkeley.edu;pku.edu.cn;harvard.edu", "position": "MS student;Postdoc;;Associate Professor;Full Professor;Associate Professor", "bibtex": "@misc{\nanonymous2024retractionfree,\ntitle={Retraction-free optimization over the Stiefel manifold with application to the Lo{RA} fine-tuning},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=GP30inajOt}\n}", "github": "", "project": "", "reviewers": "L45X;yeZ7;mQbj;ohUo", "site": "https://openreview.net/forum?id=GP30inajOt", "pdf_size": 603879, "rating": "5;5;5;6", "confidence": "4;4;3;3", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "3;3;2;3", "wc_summary": "48;144;36;137", "wc_strengths": "52;37;42;74", "wc_weaknesses": "181;173;30;419", "wc_questions": "260;41;131;31", "wc_limitations": "20;11;7;3", "wc_review": "561;406;246;664", "wc_reply_reviewers": "245;274;165;12", "wc_reply_authors": "631;806;269;6", "reply_reviewers": "1;3;2;1", "reply_authors": "3;5;3;2", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 91.25, 49.49431785568925 ], "wc_strengths_avg": [ 51.25, 14.201672436723781 ], "wc_weaknesses_avg": [ 200.75, 139.59651679035548 ], "wc_questions_avg": [ 115.75, 91.93849846500649 ], "wc_limitations_avg": [ 10.25, 6.299801584177076 ], "wc_review_avg": [ 469.25, 158.26145298208277 ], "wc_reply_reviewers_avg": [ 174.0, 101.69316594540658 ], "wc_reply_authors_avg": [ 428.0, 311.2306218867289 ], "reply_reviewers_avg": [ 1.75, 0.82915619758885 ], "reply_authors_avg": [ 3.25, 1.0897247358851685 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:OXz-10eho4MJ:scholar.google.com/&scioq=Retraction-free+optimization+over+the+Stiefel+manifold+with+application+to+the+LoRA+fine-tuning&hl=en&as_sdt=0,5", "gs_version_total": 2, "aff_unique_index": "0;1;2;0;1", "aff_unique_norm": "Peking University;Harvard University;University of California, Berkeley", "aff_unique_dep": ";;", "aff_unique_url": "http://www.pku.edu.cn;https://www.harvard.edu;https://www.berkeley.edu", "aff_unique_abbr": "Peking U;Harvard;UC Berkeley", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;1;1;0;1", "aff_country_unique": "China;United States" }, { "title": "Differentially Private Set Representations", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95905", "id": "GQNvvQquO0", "proceeding": "", "pdf": "https://openreview.net/pdf?id=GQNvvQquO0", "openreview": "https://openreview.net/forum?id=GQNvvQquO0", "poster": "/media/PosterPDFs/NeurIPS%202024/95905.png?t=1733932471.0605724", "project": "", "author_site": "Sarvar Patel, Giuseppe Persiano, Joon Young Seo, Kevin Yeo", "tldr": "", "abstract": "We study the problem of differentially private (DP) mechanisms for representing\nsets of size $k$ from a large universe.\nOur first construction creates\n$(\\epsilon,\\delta)$-DP representations with error probability of \n$1/(e^\\epsilon + 1)$ using space at most $1.05 k \\epsilon \\cdot \\log(e)$ bits where\nthe time to construct a representation is $O(k \\log(1/\\delta))$ while decoding time is $O(\\log(1/\\delta))$.\nWe also present a second algorithm for pure $\\epsilon$-DP representations with the same error using space at most $k \\epsilon \\cdot \\log(e)$ bits, but requiring large decoding times.\nOur algorithms match the lower bounds on privacy-utility trade-offs (including constants but ignoring $\\delta$ factors) and we also present a new space lower bound\nmatching our constructions up to small constant factors.\nTo obtain our results, we design a new approach embedding sets into random linear systems\ndeviating from most prior approaches that inject noise into non-private solutions.", "keywords": "Differential Privacy;Data Structure", "primary_area": "privacy", "supplementary_material": "", "author": "Sarvar Patel;Giuseppe Persiano;Joon Young Seo;Kevin Yeo", "authorids": "~Sarvar_Patel1;~Giuseppe_Persiano1;~Joon_Young_Seo1;~Kevin_Yeo1", "gender": "M;M;;", "homepage": ";https://docenti.unisa.it/001364/home;;https://sites.google.com/corp/view/kevin-yeo", "dblp": ";p/GiuseppePersiano;175/1759.html;", "google_scholar": "WidPFpQAAAAJ;wbQQcfQAAAAJ;;_1VJr_gAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Sarvar_Patel1;~Giuseppe_Persiano1;~Joon_Young_Seo1;~Kevin_Yeo1", "aff": "Google;University of Salerno;Google;Google", "aff_domain": "google.com;unisa.it;google.com;google.com", "position": "Researcher;Full Professor;Software Engineer;Researcher", "bibtex": "@inproceedings{\npatel2024differentially,\ntitle={Differentially Private Set Representations},\nauthor={Sarvar Patel and Giuseppe Persiano and Joon Young Seo and Kevin Yeo},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=GQNvvQquO0}\n}", "github": "", "reviewers": "VHxw;1hbz;Gmoe;ys34", "pdf_size": 395775, "rating": "5;5;6;7", "confidence": "3;4;3;4", "soundness": "2;1;3;3", "novelty": "3;3;2;4", "presentation": "3;3;4;3", "wc_summary": "114;208;54;139", "wc_strengths": "50;72;49;46", "wc_weaknesses": "170;185;208;36", "wc_questions": "176;64;43;271", "wc_limitations": "1;2;1;41", "wc_review": "511;531;355;533", "wc_reply_reviewers": "24;59;0;5", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 128.75, 55.20586472468301 ], "wc_strengths_avg": [ 54.25, 10.353139620424328 ], "wc_weaknesses_avg": [ 149.75, 67.05361660641431 ], "wc_questions_avg": [ 138.5, 91.69651029346755 ], "wc_limitations_avg": [ 11.25, 17.181021506301654 ], "wc_review_avg": [ 482.5, 74.11308926228888 ], "wc_reply_reviewers_avg": [ 22.0, 23.16246964380094 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:q3KZZ6_pq2AJ:scholar.google.com/&scioq=Differentially+Private+Set+Representations&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": "google.com;unisa.it;google.com;google.com", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Google;University of Salerno", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://www.unisalento.it", "aff_unique_abbr": "Google;Unisalento", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;Italy" }, { "title": "Pre-training Differentially Private Models with Limited Public Data", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95904", "id": "GQrk0WGNiC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=GQrk0WGNiC", "openreview": "https://openreview.net/forum?id=GQrk0WGNiC", "poster": "/media/PosterPDFs/NeurIPS%202024/95904.png?t=1731362655.3260415", "project": "", "author_site": "Zhiqi Bu, Xinwei Zhang, Sheng Zha, Mingyi Hong, George Karypis", "tldr": "", "abstract": "The superior performance of large foundation models can be attributed to the use of massive amounts of high-quality data. However, such datasets often contain sensitive, private and copyrighted material that requires formal protection. While differential privacy (DP) is a prominent method used to gauge the degree of security provided to large foundation models, its application in large foundation models has been met with limited success because there are often significant performance compromises when applying DP during the pre-training phase. Consequently, DP is more commonly implemented during the model fine-tuning stage, hence not capable of protecting a substantial portion of the data used during the initial pre-training process. In this work, we first provide a theoretical understanding of the efficacy of DP training by analyzing the per-iteration improvement of loss through the lens of the Hessian. We observe that DP optimizers' deceleration can be significantly mitigated by the use of limited public data, and thus propose the DP continual pre-training strategy. Our DP continual pre-training on vision models, using only 10% of public data, have achieved DP accuracy of 41.5% on ImageNet-21k (with epsilon=8) and non-DP accuracy of 55.7% on Places365 and 60.0% on iNaturalist-2021, which are on par with state-of-the-art standard pre-training and outperform existing DP pertained models. Our DP pre-trained models are released in *fastDP* library (https://github.com/awslabs/fast-differential-privacy/releases/tag/v2.1)", "keywords": "Differential Privacy;Foundation Models;Pre-training", "primary_area": "privacy", "supplementary_material": "", "author": "Zhiqi Bu;Xinwei Zhang;Sheng Zha;Mingyi Hong;George Karypis", "authorids": "~Zhiqi_Bu1;~Xinwei_Zhang1;~Sheng_Zha1;~Mingyi_Hong1;~George_Karypis1", "gender": "M;M;M;M;M", "homepage": "https://sites.google.com/view/zhiqi-bu;https://564612540.github.io/;https://github.com/szha;http://people.ece.umn.edu/~mhong/mingyi.html;", "dblp": "245/2573;55/9870-1.html;218/5471;57/8053;", "google_scholar": "MEvTLxIAAAAJ;uq46meMAAAAJ;;qRnP-p0AAAAJ;ElqwScwAAAAJ", "orcid": ";0000-0001-7967-7150;;;", "linkedin": ";;shengzha/;;", "or_profile": "~Zhiqi_Bu1;~Xinwei_Zhang1;~Sheng_Zha1;~Mingyi_Hong1;~George_Karypis1", "aff": "Amazon;University of Southern California;Amazon;University of Minnesota, Minneapolis;University of Minnesota, Minneapolis", "aff_domain": "amazon.com;usc.edu;amazon.com;umn.edu;umn.edu", "position": "Researcher;Postdoc;Researcher;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nbu2024pretraining,\ntitle={Pre-training Differentially Private Models with Limited Public Data},\nauthor={Zhiqi Bu and Xinwei Zhang and Sheng Zha and Mingyi Hong and George Karypis},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=GQrk0WGNiC}\n}", "github": "", "reviewers": "wr8p;9edf;JCrd;KeLK", "pdf_size": 5282803, "rating": "4;5;7;7", "confidence": "4;4;4;3", "soundness": "3;3;3;2", "novelty": "2;3;3;3", "presentation": "2;3;3;2", "wc_summary": "41;64;167;87", "wc_strengths": "56;30;117;45", "wc_weaknesses": "402;111;280;30", "wc_questions": "42;241;139;148", "wc_limitations": "38;8;40;44", "wc_review": "579;454;743;354", "wc_reply_reviewers": "151;11;153;17", "wc_reply_authors": "228;0;24;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;2;1", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 89.75, 47.47301865270419 ], "wc_strengths_avg": [ 62.0, 33.0681115275729 ], "wc_weaknesses_avg": [ 205.75, 144.82122599950603 ], "wc_questions_avg": [ 142.5, 70.43614129124337 ], "wc_limitations_avg": [ 32.5, 14.309088021254185 ], "wc_review_avg": [ 532.5, 145.34183843615025 ], "wc_reply_reviewers_avg": [ 83.0, 69.03622237637282 ], "wc_reply_authors_avg": [ 63.0, 95.7653381970742 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5555555555555555, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11499894420917517752&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 6, "email": "amazon.com;usc.edu;amazon.com;umn.edu;umn.edu", "author_num": 5, "aff_unique_index": "0;1;0;2;2", "aff_unique_norm": "Amazon;University of Southern California;University of Minnesota", "aff_unique_dep": "Amazon.com, Inc.;;", "aff_unique_url": "https://www.amazon.com;https://www.usc.edu;https://www.minnesota.edu", "aff_unique_abbr": "Amazon;USC;UMN", "aff_campus_unique_index": "1;2;2", "aff_campus_unique": ";Los Angeles;Minneapolis", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "BehaviorGPT: Smart Agent Simulation for Autonomous Driving with Next-Patch Prediction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95903", "id": "GRmQjLzaPM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=GRmQjLzaPM", "openreview": "https://openreview.net/forum?id=GRmQjLzaPM", "poster": "/media/PosterPDFs/NeurIPS%202024/95903.png?t=1731555365.3662765", "project": "", "author_site": "Zikang Zhou, HU Haibo, Xinhong Chen, Jianping Wang, Nan Guan, Kui Wu, Yung-Hui Li, Yu-Kai Huang, Chun Jason Xue", "tldr": "", "abstract": "Simulating realistic behaviors of traffic agents is pivotal for efficiently validating the safety of autonomous driving systems. Existing data-driven simulators primarily use an encoder-decoder architecture to encode the historical trajectories before decoding the future. However, the heterogeneity between encoders and decoders complicates the models, and the manual separation of historical and future trajectories leads to low data utilization. Given these limitations, we propose BehaviorGPT, a homogeneous and fully autoregressive Transformer designed to simulate the sequential behavior of multiple agents. Crucially, our approach discards the traditional separation between \"history\" and \"future\" by modeling each time step as the \"current\" one for motion generation, leading to a simpler, more parameter- and data-efficient agent simulator. We further introduce the Next-Patch Prediction Paradigm (NP3) to mitigate the negative effects of autoregressive modeling, in which models are trained to reason at the patch level of trajectories and capture long-range spatial-temporal interactions. Despite having merely 3M model parameters, BehaviorGPT won first place in the 2024 Waymo Open Sim Agents Challenge with a realism score of 0.7473 and a minADE score of 1.4147, demonstrating its exceptional performance in traffic agent simulation.", "keywords": "Multi-Agent Systems;Transformers;Generative Models;Autonomous Driving", "primary_area": "robotics", "supplementary_material": "", "author": "Zikang Zhou;Haibo HU;Xinhong Chen;Jianping Wang;Nan Guan;Kui Wu;Yung-Hui Li;Yu-Kai Huang;Chun Jason Xue", "authorids": "~Zikang_Zhou1;~Haibo_HU3;~Xinhong_Chen3;~Jianping_Wang1;~Nan_Guan1;~Kui_Wu1;~Yung-Hui_Li3;~Yu-Kai_Huang3;~Chun_Jason_Xue1", "gender": "M;M;F;M;M;M;M;M;M", "homepage": "https://zikangzhou.github.io;https://mark-xhchen.github.io/;http://www.cs.cityu.edu.hk/~jianwang;https://www.cs.cityu.edu.hk/~nanguan/;http://webhome.cs.uvic.ca/~wkui/index.html;;https://www.cs.cityu.edu.hk/~jasonxue/;https://scholars.cityu.edu.hk/en/persons/haibo-hu(9e88f4e7-64e3-4884-86d6-ae6e65350001).html;https://www.hh-ri.com/ai/", "dblp": "68/7727;06/8349-3;21/1550-1;45/5411.html;w/KuiWu;;x/ChunJasonXue.html;;", "google_scholar": "LZVx3I0AAAAJ;bnhTniMAAAAJ;bow_liAAAAAJ;3C7SPAgAAAAJ;https://scholar.google.com.tw/citations?user=YW2PefUAAAAJ;;C6oyGQkAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.tw/citations?user=0yBU2_YAAAAJ", "orcid": "0000-0001-9472-4162;0000-0002-8563-148X;;;;;;;0000-0002-0475-3689", "linkedin": "zikang-zhou-03141822a/;;;;;yu-kai-huang-135a2691/;;;yung-hui-li-8a363120/", "or_profile": "~Zikang_Zhou1;~Xinhong_Chen3;~Jianping_Wang1;~Nan_Guan1;~Kui_Wu1;~Yu-Kai_Huang3;~Chun_Jason_Xue1;~HU_Haibo1;~Yung-hui_Li1", "aff": "City University of Hong Kong;City University of Hong Kong;City University of Hong Kong;City University of Hong Kong;University of Victoria;Carnegie Mellon University;Mohamed bin Zayed University of Artificial Intelligence;City University of Hong Kong;Hon Hai Research Institute", "aff_domain": "cityu.edu.hk;cityu.edu.hk;cityu.edu.hk;cityu.edu.hk;uvic.ca;andrew.cmu.edu;mbzuai.ac.ae;cityu.edu.hk;foxconn.com", "position": "PhD student;Postdoc;Full Professor;Associate Professor;Full Professor;PhD student;Full Professor;PhD student;Senior Director", "bibtex": "@inproceedings{\nzhou2024behaviorgpt,\ntitle={Behavior{GPT}: Smart Agent Simulation for Autonomous Driving with Next-Patch Prediction},\nauthor={Zikang Zhou and Haibo HU and Xinhong Chen and Jianping Wang and Nan Guan and Kui Wu and Yung-Hui Li and Yu-Kai Huang and Chun Jason Xue},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=GRmQjLzaPM}\n}", "github": "", "reviewers": "gNhc;HMYm;pxxg;QXEW;mLGo;b92q", "pdf_size": 2631327, "rating": "5;6;7;7;7;7", "confidence": "4;4;4;4;4;4", "soundness": "3;3;3;3;4;3", "novelty": "3;2;3;3;4;3", "presentation": "2;3;2;2;3;3", "wc_summary": "152;72;33;70;189;59", "wc_strengths": "106;96;58;107;79;52", "wc_weaknesses": "886;133;53;501;129;101", "wc_questions": "14;62;2;172;135;176", "wc_limitations": "2;42;2;32;1;7", "wc_review": "1160;405;148;882;533;395", "wc_reply_reviewers": "158;81;25;548;0;32", "wc_reply_authors": "162;17;6;196;0;7", "reply_reviewers": "2;1;1;1;0;1", "reply_authors": "3;2;2;2;1;2", "rating_avg": [ 6.5, 0.7637626158259734 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.1666666666666665, 0.3726779962499649 ], "novelty_avg": [ 3.0, 0.5773502691896257 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 95.83333333333333, 55.339608077960065 ], "wc_strengths_avg": [ 83.0, 21.8936824982307 ], "wc_weaknesses_avg": [ 300.5, 300.42622499819595 ], "wc_questions_avg": [ 93.5, 71.15183295835276 ], "wc_limitations_avg": [ 14.333333333333334, 16.397831834998456 ], "wc_review_avg": [ 587.1666666666666, 336.89237483538005 ], "wc_reply_reviewers_avg": [ 140.66666666666666, 189.189205705705 ], "wc_reply_authors_avg": [ 64.66666666666667, 81.5918024204774 ], "reply_reviewers_avg": [ 1.0, 0.5773502691896257 ], "reply_authors_avg": [ 2.0, 0.5773502691896257 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13044206185010959768&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "cityu.edu.hk;cityu.edu.hk;cityu.edu.hk;cityu.edu.hk;uvic.ca;andrew.cmu.edu;mbzuai.ac.ae;cityu.edu.hk;foxconn.com", "author_num": 9, "aff_unique_index": "0;0;0;0;1;2;3;0;4", "aff_unique_norm": "City University of Hong Kong;University of Victoria;Carnegie Mellon University;Mohamed bin Zayed University of Artificial Intelligence;Hon Hai Research Institute", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.cityu.edu.hk;https://www.uvic.ca;https://www.cmu.edu;https://mbzuai.ac.ae;https://www.honhai.com/", "aff_unique_abbr": "CityU;UVic;CMU;MBZUAI;HRI", "aff_campus_unique_index": "0;0;0;0;0;2", "aff_campus_unique": "Hong Kong SAR;;Taiwan", "aff_country_unique_index": "0;0;0;0;1;2;3;0;0", "aff_country_unique": "China;Canada;United States;United Arab Emirates" }, { "title": "Discrete Flow Matching", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95902", "id": "GTDKo3Sv9p", "proceeding": "", "pdf": "https://openreview.net/pdf?id=GTDKo3Sv9p", "openreview": "https://openreview.net/forum?id=GTDKo3Sv9p", "poster": "", "project": "", "author_site": "Itai Gat, Tal Remez, Neta Shaul, Felix Kreuk, Ricky T. Q. Chen, Gabriel Synnaeve, Yossi Adi, Yaron Lipman", "tldr": "", "abstract": "Despite Flow Matching and diffusion models having emerged as powerful generative paradigms for continuous variables such as images and videos, their application to high-dimensional discrete data, such as language, is still limited. In this work, we present Discrete Flow Matching, a novel discrete flow paradigm designed specifically for generating discrete data. Discrete Flow Matching offers several key contributions: (i) it works with a general family of probability paths interpolating between source and target distributions; (ii) it allows for a generic formula for sampling from these probability paths using learned posteriors such as the probability denoiser ($x$-prediction) and noise-prediction ($\\epsilon$-prediction); (iii) practically, focusing on specific probability paths defined with different schedulers improves generative perplexity compared to previous discrete diffusion and flow models; and (iv) by scaling Discrete Flow Matching models up to 1.7B parameters, we reach 6.7% Pass@1 and 13.4% Pass@10 on HumanEval and 6.7% Pass@1 and 20.6% Pass@10 on 1-shot MBPP coding benchmarks. Our approach is capable of generating high-quality discrete data in a non-autoregressive fashion, significantly closing the gap between autoregressive models and discrete flow models.", "keywords": "Language modeling;Flow matching;Diffusion models", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Itai Gat;Tal Remez;Neta Shaul;Felix Kreuk;Ricky T. Q. Chen;Gabriel Synnaeve;Yossi Adi;Yaron Lipman", "authorids": "~Itai_Gat1;~Tal_Remez2;~Neta_Shaul1;~Felix_Kreuk1;~Ricky_T._Q._Chen1;~Gabriel_Synnaeve1;~Yossi_Adi1;~Yaron_Lipman1", "gender": "M;M;M;M;;M;M;", "homepage": "https://www.linkedin.com/in/itaigat/;https://talremez.github.io/;;https://scholar.google.co.il/citations?user=UiERcYsAAAAJ&hl=en;;;http://adiyoss.github.io/;", "dblp": "221/4128;170/0030;;213/7459;;http://dblp.uni-trier.de/pers/hd/s/Synnaeve:Gabriel;171/0957.html;", "google_scholar": "TnJqhXIAAAAJ;https://scholar.google.co.il/citations?user=XqHYn7EAAAAJ;;;;wN9rBkcAAAAJ;https://scholar.google.co.il/citations?user=4W-HuYYAAAAJ;", "orcid": ";;;;;;0000-0003-2237-3898;", "linkedin": ";;neta-shaul-3364aa235/;;;;yossi-adi-31a32858?trk=nav_responsive_tab_profile_pic;", "or_profile": "~Itai_Gat1;~Tal_Remez2;~Neta_Shaul1;~Felix_Kreuk1;~Ricky_T._Q._Chen1;~Gabriel_Synnaeve1;~Yossi_Adi1;~Yaron_Lipman1", "aff": ";Meta;Weizmann Institute of Science;Meta Facebook;;Meta Facebook;Meta;", "aff_domain": ";meta.com;weizmann.ac.il;fb.com;;fb.com;meta.com;", "position": ";Researcher;PhD student;Researcher;;Research Scientist;Research Scientist;", "bibtex": "@inproceedings{\ngat2024discrete,\ntitle={Discrete Flow Matching},\nauthor={Itai Gat and Tal Remez and Neta Shaul and Felix Kreuk and Ricky T. Q. Chen and Gabriel Synnaeve and Yossi Adi and Yaron Lipman},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=GTDKo3Sv9p}\n}", "github": "", "reviewers": "oYBb;ppDj;vMd5;7VEG", "pdf_size": 2245174, "rating": "5;6;7;8", "confidence": "4;3;4;4", "soundness": "3;4;4;2", "novelty": "3;3;3;3", "presentation": "3;4;3;2", "wc_summary": "68;114;248;79", "wc_strengths": "59;79;41;12", "wc_weaknesses": "47;60;286;287", "wc_questions": "2;38;95;2", "wc_limitations": "25;18;34;2", "wc_review": "201;309;704;382", "wc_reply_reviewers": "0;16;17;40", "wc_reply_authors": "0;0;0;166", "reply_reviewers": "0;1;1;2", "reply_authors": "1;1;1;3", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 127.25, 71.75435526851314 ], "wc_strengths_avg": [ 47.75, 24.631027181179434 ], "wc_weaknesses_avg": [ 170.0, 116.59116604614606 ], "wc_questions_avg": [ 34.25, 38.028772002261654 ], "wc_limitations_avg": [ 19.75, 11.712706775122479 ], "wc_review_avg": [ 399.0, 187.49533327525782 ], "wc_reply_reviewers_avg": [ 18.25, 14.254385290148432 ], "wc_reply_authors_avg": [ 41.5, 71.88010851410841 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.2581988897471611, "gs_citation": 62, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12174064458047406738&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": ";meta.com;weizmann.ac.il;fb.com;;fb.com;meta.com;", "author_num": 8, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "Meta;Weizmann Institute of Science", "aff_unique_dep": "Meta Platforms, Inc.;", "aff_unique_url": "https://meta.com;https://www.weizmann.org.il", "aff_unique_abbr": "Meta;Weizmann", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "United States;Israel" }, { "title": "CoLoR-Filter: Conditional Loss Reduction Filtering for Targeted Language Model Pre-training", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95901", "id": "GUccmOMBv6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=GUccmOMBv6", "openreview": "https://openreview.net/forum?id=GUccmOMBv6", "poster": "", "project": "", "author_site": "David Brandfonbrener, Hanlin Zhang, Andreas Kirsch, Jonathan Richard Schwarz, Sham Kakade", "tldr": "", "abstract": "Selecting high-quality data for pre-training is crucial in shaping the downstream task performance of language models. A major challenge lies in identifying this optimal subset, a problem generally considered intractable, thus necessitating scalable and effective heuristics. In this work, we propose a data selection method, CoLoR-Filter (Conditional Loss Reduction Filtering), which leverages an empirical Bayes-inspired approach to derive a simple and computationally efficient selection criterion based on the relative loss values of two auxiliary models.\n\nIn addition to the modeling rationale, we evaluate CoLoR-Filter empirically on two language modeling tasks: (1) selecting data from C4 for domain adaptation to evaluation on Books and (2) selecting data from C4 for a suite of downstream multiple-choice question answering tasks. We demonstrate favorable scaling both as we subselect more aggressively and using small auxiliary models to select data for large target models. As one headline result, CoLoR-Filter data selected using a pair of 150m parameter auxiliary models can train a 1.2b parameter target model to match a 1.2b parameter model trained on 25b randomly selected tokens with 25x less data for Books and 11x less data for the downstream tasks. \n\nCode: https://github.com/davidbrandfonbrener/color-filter-olmo\n\nFiltered data: https://huggingface.co/datasets/davidbrandfonbrener/color-filtered-c4", "keywords": "data selection;language models;LLMs", "primary_area": "active_learning", "supplementary_material": "/attachment/6ce858d45af6fa231a390f0d5458dfc393f4514c.zip", "author": "David Brandfonbrener;Hanlin Zhang;Andreas Kirsch;Jonathan Richard Schwarz;Sham M. Kakade", "authorids": "~David_Brandfonbrener1;~Hanlin_Zhang1;~Andreas_Kirsch1;~Jonathan_Richard_Schwarz1;~Sham_M._Kakade1", "gender": "M;M;;M;M", "homepage": "https://davidbrandfonbrener.github.io;https://hanlin-zhang.com/;https://www.blackhc.net;https://shamulent.github.io;https://jonathan-schwarz.github.io", "dblp": "214/9461;;56/2914-2;s/SMKakade;211/7673", "google_scholar": "https://scholar.google.com/citations?hl=en;h5IXxToAAAAJ;WYQVZpYAAAAJ;https://scholar.google.com.tw/citations?user=wb-DKCIAAAAJ;Efs3XxQAAAAJ", "orcid": ";0000-0002-9292-1645;0000-0001-8244-7700;;", "linkedin": ";hanlin-zhang-931b46143/;blackhc;;schwarzjonathan/", "or_profile": "~David_Brandfonbrener1;~Hanlin_Zhang1;~Andreas_Kirsch1;~Sham_M._Kakade1;~Jonathan_Schwarz1", "aff": "Harvard University;Harvard University;Midjourney ;Harvard University;Harvard University", "aff_domain": "harvard.edu;harvard.edu;blackhc.net;harvard.edu;harvard.edu", "position": "Postdoc;PhD student;Researcher;Full Professor;Postdoc", "bibtex": "@inproceedings{\nbrandfonbrener2024colorfilter,\ntitle={CoLoR-Filter: Conditional Loss Reduction Filtering for Targeted Language Model Pre-training},\nauthor={David Brandfonbrener and Hanlin Zhang and Andreas Kirsch and Jonathan Richard Schwarz and Sham M. Kakade},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=GUccmOMBv6}\n}", "github": "", "reviewers": "UH6A;yqth;ZLEF;q13t", "pdf_size": 1086840, "rating": "6;6;7;7", "confidence": "5;4;4;2", "soundness": "3;3;4;3", "novelty": "2;3;4;3", "presentation": "3;3;4;3", "wc_summary": "78;127;95;90", "wc_strengths": "49;123;103;52", "wc_weaknesses": "131;225;111;100", "wc_questions": "59;57;40;21", "wc_limitations": "18;12;3;1", "wc_review": "335;544;352;264", "wc_reply_reviewers": "215;0;140;131", "wc_reply_authors": "376;0;18;384", "reply_reviewers": "2;0;1;1", "reply_authors": "3;1;2;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 97.5, 18.117670931993437 ], "wc_strengths_avg": [ 81.75, 32.057565409743766 ], "wc_weaknesses_avg": [ 141.75, 49.33241834737073 ], "wc_questions_avg": [ 44.25, 15.31951369985353 ], "wc_limitations_avg": [ 8.5, 6.87386354243376 ], "wc_review_avg": [ 373.75, 103.6879332420123 ], "wc_reply_reviewers_avg": [ 121.5, 77.35793430540916 ], "wc_reply_authors_avg": [ 194.5, 185.63068173122676 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.6882472016116854, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11101527068831580969&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "harvard.edu;harvard.edu;blackhc.net;harvard.edu;harvard.edu", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Harvard University;Midjourney", "aff_unique_dep": ";", "aff_unique_url": "https://www.harvard.edu;https://midjourney.com/", "aff_unique_abbr": "Harvard;Midjourney", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Nonparametric Evaluation of Noisy ICA Solutions", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95900", "id": "GVgRbz8MvG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=GVgRbz8MvG", "openreview": "https://openreview.net/forum?id=GVgRbz8MvG", "poster": "", "project": "", "author_site": "Syamantak Kumar, Derek Bean, peter j bickel, Purnamrita Sarkar", "tldr": "", "abstract": "Independent Component Analysis (ICA) was introduced in the 1980's as a model for Blind Source Separation (BSS), which refers to the process of recovering the sources underlying a mixture of signals, with little knowledge about the source signals or the mixing process. While there are many sophisticated algorithms for estimation, different methods have different shortcomings. In this paper, we develop a nonparametric score to adaptively pick the right algorithm for ICA with arbitrary Gaussian noise. The novelty of this score stems from the fact that it just assumes a finite second moment of the data and uses the characteristic function to evaluate the quality of the estimated mixing matrix without any knowledge of the parameters of the noise distribution. In addition, we propose some new contrast functions and algorithms that enjoy the same fast computability as existing algorithms like FASTICA and JADE but work in domains where the former may fail. While these also may have weaknesses, our proposed diagnostic, as shown by our simulations, can remedy them. Finally, we propose a theoretical framework to analyze the local and global convergence properties of our algorithms.", "keywords": "ICA;Independent Component Analysis;Kurtosis;Characteristic Function;Cumulant Generating Function;Blind Signal Separation;Convergence", "primary_area": "other", "supplementary_material": "/attachment/0072d6ad62fb7bbafc78e4651621e184a7e30b53.zip", "author": "Syamantak Kumar;Derek Bean;Peter Bickel;Purnamrita Sarkar", "authorids": "~Syamantak_Kumar1;~Derek_Bean1;~Peter_Bickel1;~Purnamrita_Sarkar1", "gender": "M;M;M;F", "homepage": "https://syamantakk.github.io/;https://pages.stat.wisc.edu/~derekb/;https://bickel.stat.berkeley.edu/;https://psarkar.github.io/", "dblp": "297/4951;;57/1825.html;25/6929", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;t4w1jE4AAAAJ;KfT3_0AAAAAJ", "orcid": ";;;", "linkedin": "syamantak-kumar/;;;", "or_profile": "~Syamantak_Kumar1;~Derek_Bean1;~Peter_Bickel1;~Purnamrita_Sarkar1", "aff": "University of Texas at Austin;University of Wisconsin - Madison;University of California, Berkeley;University of Texas, Austin", "aff_domain": "cs.utexas.edu;wisc.edu;berkeley.edu;utexas.edu", "position": "PhD student;Lecturer;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nkumar2024nonparametric,\ntitle={Nonparametric Evaluation of Noisy {ICA} Solutions},\nauthor={Syamantak Kumar and Derek Bean and Peter Bickel and Purnamrita Sarkar},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=GVgRbz8MvG}\n}", "github": "", "reviewers": "uqTT;c27m;N8km;Dz1S", "pdf_size": 16021927, "rating": "4;6;6;6", "confidence": "3;4;3;4", "soundness": "2;3;3;4", "novelty": "2;3;2;3", "presentation": "1;3;3;2", "wc_summary": "113;80;84;450", "wc_strengths": "47;43;30;144", "wc_weaknesses": "100;104;65;666", "wc_questions": "103;50;15;7", "wc_limitations": "1;16;53;9", "wc_review": "364;293;247;1276", "wc_reply_reviewers": "61;25;0;157", "wc_reply_authors": "173;89;0;245", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;1;2", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 181.75, 155.3968709466185 ], "wc_strengths_avg": [ 66.0, 45.46977017755863 ], "wc_weaknesses_avg": [ 233.75, 250.0203741697864 ], "wc_questions_avg": [ 43.75, 37.83764659700706 ], "wc_limitations_avg": [ 19.75, 19.917015338649513 ], "wc_review_avg": [ 545.0, 424.09609760053206 ], "wc_reply_reviewers_avg": [ 60.75, 59.65054484244046 ], "wc_reply_authors_avg": [ 126.75, 91.6689014879092 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:8VmfdCDQAnUJ:scholar.google.com/&scioq=Nonparametric+Evaluation+of+Noisy+ICA+Solutions&hl=en&as_sdt=0,33", "gs_version_total": 2, "email": "cs.utexas.edu;wisc.edu;berkeley.edu;utexas.edu", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of Texas at Austin;University of Wisconsin-Madison;University of California, Berkeley", "aff_unique_dep": ";;", "aff_unique_url": "https://www.utexas.edu;https://www.wisc.edu;https://www.berkeley.edu", "aff_unique_abbr": "UT Austin;UW-Madison;UC Berkeley", "aff_campus_unique_index": "0;1;2;0", "aff_campus_unique": "Austin;Madison;Berkeley", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Bridging Gaps: Federated Multi-View Clustering in Heterogeneous Hybrid Views", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95899", "id": "GVlJVX3iiq", "proceeding": "", "pdf": "https://openreview.net/pdf?id=GVlJVX3iiq", "openreview": "https://openreview.net/forum?id=GVlJVX3iiq", "poster": "/media/PosterPDFs/NeurIPS%202024/95899.png?t=1731309564.5239382", "project": "", "author_site": "Xinyue Chen, Yazhou Ren, Jie Xu, Fangfei Lin, Xiaorong Pu, Yang Yang", "tldr": "", "abstract": "Recently, federated multi-view clustering (FedMVC) has emerged to explore cluster structures in multi-view data distributed on multiple clients. Many existing approaches tend to assume that clients are isomorphic and all of them belong to either single-view clients or multi-view clients. While these methods have succeeded, they may encounter challenges in practical FedMVC scenarios involving heterogeneous hybrid views, where a mixture of single-view and multi-view clients exhibit varying degrees of heterogeneity. In this paper, we propose a novel FedMVC framework, which concurrently addresses two challenges associated with heterogeneous hybrid views, i.e., client gap and view gap. To address the client gap, we design a local-synergistic contrastive learning approach that helps single-view clients and multi-view clients achieve consistency for mitigating heterogeneity among all clients. To address the view gap, we develop a global-specific weighting aggregation method, which encourages global models to learn complementary features from hybrid views. The interplay between local-synergistic contrastive learning and global-specific weighting aggregation mutually enhances the exploration of the data cluster structures distributed on multiple clients. Theoretical analysis and extensive experiments demonstrate that our method can handle the heterogeneous hybrid views in FedMVC and outperforms state-of-the-art methods.", "keywords": "Multi-view learning;Clustering;Federated learning", "primary_area": "machine_vision", "supplementary_material": "/attachment/20d5cd3a909fee733ffb75cf469ceaa9a2c4eb36.zip", "author": "Xinyue Chen;Yazhou Ren;Jie Xu;Fangfei Lin;Xiaorong Pu;Yang Yang", "authorids": "~Xinyue_Chen4;~Yazhou_Ren1;~Jie_Xu8;~Fangfei_Lin1;~Xiaorong_Pu1;~Yang_Yang37", "gender": "F;M;M;;F;M", "homepage": ";https://yazhou-ren.github.io/;https://submissionsin.github.io;https://fffaby.github.io/;https://yjsjy.uestc.edu.cn/gmis/jcsjgl/dsfc/dsgrjj/10368?yxsh=08;http://cfm.uestc.edu.cn/~yangyang/", "dblp": ";157/2928;37/5126-44;;74/6232.html;", "google_scholar": "RBjE4q0AAAAJ;https://scholar.google.com/citations?hl=en;YT1_9swAAAAJ;;https://scholar.google.com.sg/citations?hl=zh-CN;", "orcid": "0000-0002-3105-8569;;0000-0003-1675-1821; 0000-0003-0878-5749;0000-0001-7387-7194;", "linkedin": ";;;;;", "or_profile": "~Xinyue_Chen4;~Yazhou_Ren1;~Jie_Xu8;~Fangfei_Lin1;~Xiaorong_Pu1;~Yang_Yang37", "aff": "University of Electronic Science and Technology of China;University of Electronic Science and Technology of China;University of Electronic Science and Technology of China;University of Electronic Science and Technology of China;University of Electronic Science and Technology of China;University of Electronic Science and Technology of China", "aff_domain": "uestc.edu.cn;uestc.edu.cn;uestc.edu.cn;uestc.edu.cn;uestc.edu.cn;uestc.edu.cn", "position": "MS student;Associate Professor;PhD student;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nchen2024bridging,\ntitle={Bridging Gaps: Federated Multi-View Clustering in Heterogeneous Hybrid Views},\nauthor={Xinyue Chen and Yazhou Ren and Jie Xu and Fangfei Lin and Xiaorong Pu and Yang Yang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=GVlJVX3iiq}\n}", "github": "", "reviewers": "uC6c;jVcj;s4wh;14Lq", "pdf_size": 2870507, "rating": "6;7;7;7", "confidence": "4;4;4;5", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;4", "wc_summary": "43;59;60;45", "wc_strengths": "30;88;74;39", "wc_weaknesses": "61;27;46;69", "wc_questions": "58;18;40;3", "wc_limitations": "3;7;1;13", "wc_review": "195;199;221;169", "wc_reply_reviewers": "15;14;15;0", "wc_reply_authors": "97;87;105;42", "reply_reviewers": "1;1;1;0", "reply_authors": "3;3;3;2", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 51.75, 7.790218225441442 ], "wc_strengths_avg": [ 57.75, 23.98306694315804 ], "wc_weaknesses_avg": [ 50.75, 16.005858302509115 ], "wc_questions_avg": [ 29.75, 20.956800805466468 ], "wc_limitations_avg": [ 6.0, 4.58257569495584 ], "wc_review_avg": [ 196.0, 18.466185312619388 ], "wc_reply_reviewers_avg": [ 11.0, 6.363961030678928 ], "wc_reply_authors_avg": [ 82.75, 24.375961519497032 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1683586242223220769&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "uestc.edu.cn;uestc.edu.cn;uestc.edu.cn;uestc.edu.cn;uestc.edu.cn;uestc.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "University of Electronic Science and Technology of China", "aff_unique_dep": "", "aff_unique_url": "https://www.uestc.edu.cn", "aff_unique_abbr": "UESTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Sample Selection via Contrastive Fragmentation for Noisy Label Regression", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95898", "id": "GYd5AfZaor", "proceeding": "", "pdf": "https://openreview.net/pdf?id=GYd5AfZaor", "openreview": "https://openreview.net/forum?id=GYd5AfZaor", "poster": "", "project": "", "author_site": "Chris Dongjoo Kim, Sangwoo Moon, Jihwan Moon, Dongyeon Woo, Gunhee Kim", "tldr": "", "abstract": "As with many other problems, real-world regression is plagued by the presence of noisy labels, an inevitable issue that demands our attention. \nFortunately, much real-world data often exhibits an intrinsic property of continuously ordered correlations between labels and features, where data points with similar labels are also represented with closely related features.\nIn response, we propose a novel approach named ConFrag, where we collectively model the regression data by transforming them into disjoint yet contrasting fragmentation pairs. \nThis enables the training of more distinctive representations, enhancing the ability to select clean samples.\nOur ConFrag framework leverages a mixture of neighboring fragments to discern noisy labels through neighborhood agreement among expert feature extractors.\nWe extensively perform experiments on four newly curated benchmark datasets of diverse domains, including age prediction, price prediction, and music production year estimation.\nWe also introduce a metric called Error Residual Ratio (ERR) to better account for varying degrees of label noise.\nOur approach consistently outperforms fourteen state-of-the-art baselines, being robust against symmetric and random Gaussian label noise.", "keywords": "Noisy Labels;Regression", "primary_area": "other", "supplementary_material": "", "author": "Chris Dongjoo Kim;Sangwoo Moon;Jihwan Moon;Dongyeon Woo;Gunhee Kim", "authorids": "~Chris_Dongjoo_Kim2;~Sangwoo_Moon3;~Jihwan_Moon2;~Dongyeon_Woo1;~Gunhee_Kim1", "gender": ";M;M;M;M", "homepage": ";;https://github.com/mnmjh1215;;http://vision.snu.ac.kr/gunhee/", "dblp": ";;;;45/115", "google_scholar": ";https://scholar.google.com/citations?view_op=list_works;;dKGgnbMAAAAJ;https://scholar.google.co.kr/citations?user=CiSdOV0AAAAJ", "orcid": ";;;;0000-0002-9543-7453", "linkedin": ";;;;", "or_profile": "~Chris_Dongjoo_Kim2;~Sangwoo_Moon3;~Jihwan_Moon2;~Dongyeon_Woo1;~Gunhee_Kim1", "aff": ";Seoul National University;Seoul National University;Seoul National University;Seoul National University", "aff_domain": ";snu.ac.kr;snu.ac.kr;snu.ac.kr;snu.ac.kr", "position": ";PhD student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nkim2024sample,\ntitle={Sample Selection via Contrastive Fragmentation for Noisy Label Regression},\nauthor={Chris Dongjoo Kim and Sangwoo Moon and Jihwan Moon and Dongyeon Woo and Gunhee Kim},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=GYd5AfZaor}\n}", "github": "", "reviewers": "2i51;wYcT;1GFp;T5Bx", "pdf_size": 4011431, "rating": "5;5;5;5", "confidence": "4;3;3;4", "soundness": "3;3;3;2", "novelty": "2;3;2;2", "presentation": "3;2;3;2", "wc_summary": "85;75;59;61", "wc_strengths": "46;32;31;36", "wc_weaknesses": "52;137;179;45", "wc_questions": "192;28;78;40", "wc_limitations": "41;2;8;1", "wc_review": "416;274;355;183", "wc_reply_reviewers": "13;15;12;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 70.0, 10.63014581273465 ], "wc_strengths_avg": [ 36.25, 5.931905258852336 ], "wc_weaknesses_avg": [ 103.25, 56.78192934376217 ], "wc_questions_avg": [ 84.5, 64.75144786026024 ], "wc_limitations_avg": [ 13.0, 16.38596960817394 ], "wc_review_avg": [ 307.0, 87.53570700005798 ], "wc_reply_reviewers_avg": [ 10.0, 5.873670062235365 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:gkb_42lxxr8J:scholar.google.com/&scioq=Sample+Selection+via+Contrastive+Fragmentation+for+Noisy+Label+Regression&hl=en&as_sdt=0,44", "gs_version_total": 0, "email": ";snu.ac.kr;snu.ac.kr;snu.ac.kr;snu.ac.kr", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Seoul National University", "aff_unique_dep": "", "aff_unique_url": "https://www.snu.ac.kr", "aff_unique_abbr": "SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "SpGesture: Source-Free Domain-adaptive sEMG-based Gesture Recognition with Jaccard Attentive Spiking Neural Network", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95897", "id": "GYqs5Z4joA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=GYqs5Z4joA", "openreview": "https://openreview.net/forum?id=GYqs5Z4joA", "poster": "/media/PosterPDFs/NeurIPS%202024/95897.png?t=1731139259.1717782", "project": "", "author_site": "Weiyu Guo, Ying Sun, Yijie Xu, Ziyue Qiao, Yongkui Yang, Hui Xiong", "tldr": "", "abstract": "Surface electromyography (sEMG) based gesture recognition offers a natural and intuitive interaction modality for wearable devices. Despite significant advancements in sEMG-based gesture recognition models, existing methods often suffer from high computational latency and increased energy consumption. Additionally, the inherent instability of sEMG signals, combined with their sensitivity to distribution shifts in real-world settings, compromises model robustness. \nTo tackle these challenges, we propose a novel SpGesture framework based on Spiking Neural Networks, which possesses several unique merits compared with existing methods: (1) Robustness: By utilizing membrane potential as a memory list, we pioneer the introduction of Source-Free Domain Adaptation into SNN for the first time. This enables SpGesture to mitigate the accuracy degradation caused by distribution shifts. (2) High Accuracy: With a novel Spiking Jaccard Attention, SpGesture enhances the SNNs' ability to represent sEMG features, leading to a notable rise in system accuracy. To validate SpGesture's performance, we collected a new sEMG gesture dataset which has different forearm postures, where SpGesture achieved the highest accuracy among the baselines ($89.26\\%$). Moreover, the actual deployment on the CPU demonstrated a latency below 100ms, well within real-time requirements. This impressive performance showcases SpGesture's potential to enhance the applicability of sEMG in real-world scenarios. The code is available at https://github.com/guoweiyu/SpGesture/.", "keywords": "Surface Electromyography;Spiking Neural Network;Spiking Jaccard Attention;Source-Free Domain Adaptation;Human-computer Interaction;Gesture Recognition", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "", "author": "Weiyu Guo;Ying Sun;Yijie Xu;Ziyue Qiao;Yongkui Yang;Hui Xiong", "authorids": "~Weiyu_Guo1;~Ying_Sun4;~Yijie_Xu2;~Ziyue_Qiao1;~Yongkui_Yang1;~Hui_Xiong1", "gender": "M;F;M;M;;M", "homepage": ";https://sunyinggilly.github.io;https://yjx.me/;https://www.gbu.edu.cn/detail/article/729;;https://www.hkust-gz.edu.cn/people/hui-xiong/", "dblp": ";10/5415-6.html;250/9213;259/6341.html;;262/1686-1.html", "google_scholar": "ES-56HMAAAAJ;;hBZs76kAAAAJ;orHYf14AAAAJ;VcoaR-sAAAAJ;cVDF1tkAAAAJ", "orcid": ";0000-0002-4763-6060;;;;0000-0001-6016-6465", "linkedin": ";;yijiexucn/;;;", "or_profile": "~Weiyu_Guo1;~Ying_Sun4;~Yijie_Xu2;~Ziyue_Qiao1;~Yongkui_Yang1;~Hui_Xiong1", "aff": "Hong Kong University of Science and Technology;Hong Kong University of Science and Technology (Guangzhou);Hong Kong University of Science and Technology (Guangzhou);Hong Kong University of Science and Technology;Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Chinese Academy of Sciences;Hong Kong University of Science and Technology (Guangzhou)", "aff_domain": "hkust.edu;hkust-gz.edu.cn;hkust-gz.edu.cn;ust.hk;siat.ac.cn;hkust.edu", "position": "PhD student;Assistant Professor;MS student;Postdoc;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nguo2024spgesture,\ntitle={SpGesture: Source-Free Domain-adaptive s{EMG}-based Gesture Recognition with Jaccard Attentive Spiking Neural Network},\nauthor={Weiyu Guo and Ying Sun and Yijie Xu and Ziyue Qiao and Yongkui Yang and Hui Xiong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=GYqs5Z4joA}\n}", "github": "", "reviewers": "4Y6C;7xEq;K5du", "pdf_size": 11583851, "rating": "5;7;7", "confidence": "3;4;4", "soundness": "2;4;3", "novelty": "3;4;3", "presentation": "2;3;4", "wc_summary": "113;132;74", "wc_strengths": "50;107;93", "wc_weaknesses": "98;64;64", "wc_questions": "88;54;35", "wc_limitations": "4;43;33", "wc_review": "353;400;299", "wc_reply_reviewers": "11;31;27", "wc_reply_authors": "53;29;0", "reply_reviewers": "1;1;1", "reply_authors": "2;2;1", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 106.33333333333333, 24.143091949642425 ], "wc_strengths_avg": [ 83.33333333333333, 24.25329301810833 ], "wc_weaknesses_avg": [ 75.33333333333333, 16.027753706895076 ], "wc_questions_avg": [ 59.0, 21.924111536540465 ], "wc_limitations_avg": [ 26.666666666666668, 16.539514973407037 ], "wc_review_avg": [ 350.6666666666667, 41.26607430915726 ], "wc_reply_reviewers_avg": [ 23.0, 8.640987597877148 ], "wc_reply_authors_avg": [ 27.333333333333332, 21.66923061752668 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.9999999999999998, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12901215899556874191&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "hkust.edu;hkust-gz.edu.cn;hkust-gz.edu.cn;ust.hk;siat.ac.cn;hkust.edu", "author_num": 6, "aff_unique_index": "0;0;0;0;1;0", "aff_unique_norm": "Hong Kong University of Science and Technology;Chinese Academy of Sciences", "aff_unique_dep": ";Shenzhen Institutes of Advanced Technology", "aff_unique_url": "https://www.ust.hk;http://www.cas.cn", "aff_unique_abbr": "HKUST;CAS", "aff_campus_unique_index": "0;0;0;0;1;0", "aff_campus_unique": "Hong Kong SAR;Shenzhen", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Navigating the Safety Landscape: Measuring Risks in Finetuning Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95896", "id": "GZnsqBwHAG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=GZnsqBwHAG", "openreview": "https://openreview.net/forum?id=GZnsqBwHAG", "poster": "", "project": "", "author_site": "ShengYun Peng, Pin-Yu Chen, Matthew Hull, Duen Horng Chau", "tldr": "", "abstract": "Safety alignment is crucial to ensure that large language models (LLMs) behave in ways that align with human preferences and prevent harmful actions during inference. However, recent studies show that the alignment can be easily compromised through finetuning with only a few adversarially designed training examples. We aim to measure the risks in finetuning LLMs through navigating the LLM safety landscape. We discover a new phenomenon observed universally in the model parameter space of popular open-source LLMs, termed as \u201csafety basin\u201d: random perturbations to model weights maintain the safety level of the original aligned model within its local neighborhood. However, outside this local region, safety is fully compromised, exhibiting a sharp, step-like drop. This safety basin contrasts sharply with the LLM capability landscape, where model performance peaks at the origin and gradually declines as random perturbation increases. Our discovery inspires us to propose the new VISAGE safety metric that measures the safety in LLM finetuning by probing its safety landscape. Visualizing the safety landscape of the aligned model enables us to understand how finetuning compromises safety by dragging the model away from the safety basin. The LLM safety landscape also highlights the system prompt\u2019s critical role in protecting a model, and that such protection transfers to its perturbed variants within the safety basin. These observations from our safety landscape research provide new\ninsights for future work on LLM safety community. Our code is publicly available at https://github.com/ShengYun-Peng/llm-landscape.", "keywords": "LLM safety; LLM Landscape; LLM Finetuning; Attack and Defense", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "ShengYun Peng;Pin-Yu Chen;Matthew Daniel Hull;Duen Horng Chau", "authorids": "~ShengYun_Peng1;~Pin-Yu_Chen1;~Matthew_Daniel_Hull1;~Duen_Horng_Chau1", "gender": "M;M;M;Not Specified", "homepage": "https://shengyun-peng.github.io/;http://www.pinyuchen.com;;https://faculty.cc.gatech.edu/~dchau", "dblp": "248/6490;39/8969;;10/2670", "google_scholar": "9Yr7ID0AAAAJ;jxwlCUUAAAAJ;mXauG9YAAAAJ;https://scholar.google.com.tw/citations?user=YON32W4AAAAJ", "orcid": "0000-0003-3063-2052;0000-0003-1039-8369;;0000-0001-9824-3323", "linkedin": "shengyun-anthony-peng-443bb6151/;pin-yu-chen-940062a2;;polochau", "or_profile": "~ShengYun_Peng1;~Pin-Yu_Chen1;~Matthew_Daniel_Hull1;~Duen_Chau1", "aff": "Georgia Institute of Technology;International Business Machines;Georgia Institute of Technology;Georgia Institute of Technology", "aff_domain": "gatech.edu;ibm.com;gatech.edu;gatech.edu", "position": "PhD student;Principal Researcher;PhD student;Full Professor", "bibtex": "@inproceedings{\npeng2024navigating,\ntitle={Navigating the Safety Landscape: Measuring Risks in Finetuning Large Language Models},\nauthor={ShengYun Peng and Pin-Yu Chen and Matthew Daniel Hull and Duen Horng Chau},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=GZnsqBwHAG}\n}", "github": "", "reviewers": "KrSC;jxYX;v82d;h1px", "pdf_size": 1471640, "rating": "4;7;8;8", "confidence": "5;3;5;3", "soundness": "2;3;3;3", "novelty": "2;4;3;4", "presentation": "3;3;3;4", "wc_summary": "62;101;108;131", "wc_strengths": "44;87;95;136", "wc_weaknesses": "167;143;433;115", "wc_questions": "159;58;386;137", "wc_limitations": "20;9;87;47", "wc_review": "452;398;1109;566", "wc_reply_reviewers": "229;19;225;5", "wc_reply_authors": "557;40;246;30", "reply_reviewers": "2;1;2;1", "reply_authors": "3;2;3;2", "rating_avg": [ 6.75, 1.6393596310755 ], "confidence_avg": [ 4.0, 1.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 100.5, 24.84451649760969 ], "wc_strengths_avg": [ 90.5, 32.65348373451139 ], "wc_weaknesses_avg": [ 214.5, 127.48627377094367 ], "wc_questions_avg": [ 185.0, 121.97335774668171 ], "wc_limitations_avg": [ 40.75, 30.06971067369954 ], "wc_review_avg": [ 631.25, 282.4175764714371 ], "wc_reply_reviewers_avg": [ 119.5, 107.6231852344094 ], "wc_reply_authors_avg": [ 218.25, 213.73625686813176 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.45749571099781405, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1515686444515508106&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "gatech.edu;ibm.com;gatech.edu;gatech.edu", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Georgia Institute of Technology;International Business Machines Corporation", "aff_unique_dep": ";", "aff_unique_url": "https://www.gatech.edu;https://www.ibm.com", "aff_unique_abbr": "Georgia Tech;IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "First-Order Minimax Bilevel Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95895", "id": "GZoAUVSkaw", "proceeding": "", "pdf": "https://openreview.net/pdf?id=GZoAUVSkaw", "openreview": "https://openreview.net/forum?id=GZoAUVSkaw", "poster": "", "project": "", "author_site": "Yifan Yang, Zhaofeng Si, Siwei Lyu, Kaiyi Ji", "tldr": "", "abstract": "Multi-block minimax bilevel optimization has been studied recently due to its great potential in multi-task learning, robust machine learning, and few-shot learning. However, due to the complex three-level optimization structure, existing algorithms often suffer from issues such as high computing costs due to the second-order model derivatives or high memory consumption in storing all blocks' parameters. In this paper, we tackle these challenges by proposing two novel fully first-order algorithms named FOSL and MemCS. FOSL features a fully single-loop structure by updating all three variables simultaneously, and MemCS is a memory-efficient double-loop algorithm with cold-start initialization. We provide a comprehensive convergence analysis for both algorithms under full and partial block participation, and show that their sample complexities match or outperform those of the same type of methods in standard bilevel optimization. We evaluate our methods in two applications: the recently proposed multi-task deep AUC maximization and a novel rank-based robust meta-learning. Our methods consistently improve over existing methods with better performance over various datasets.", "keywords": "Bilevel Optimization;Machine Learning;Meta Learning", "primary_area": "optimization", "supplementary_material": "/attachment/6dc237ef93ac8de6712a77dcfd8101781211e085.zip", "author": "Yifan Yang;Zhaofeng Si;Siwei Lyu;Kaiyi Ji", "authorids": "~Yifan_Yang13;~Zhaofeng_Si2;~Siwei_Lyu1;~Kaiyi_Ji1", "gender": ";M;M;M", "homepage": ";;https://www.cse.buffalo.edu/~siweilyu;https://cse.buffalo.edu/~kaiyiji/", "dblp": ";254/4388;51/4482;205/3164", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;wefAEM4AAAAJ;E0A3lSIAAAAJ", "orcid": ";;0000-0002-0992-685X;", "linkedin": ";zhaofeng-si-710855255/;siwei-lyu-0806022/;", "or_profile": "~Yifan_Yang13;~Zhaofeng_Si2;~Siwei_Lyu1;~Kaiyi_Ji1", "aff": ";State University of New York at Buffalo;State University of New York, Buffalo;State University of New York at Buffalo", "aff_domain": ";buffalo.edu;buffalo.edu;buffalo.edu", "position": ";PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nyang2024firstorder,\ntitle={First-Order Minimax Bilevel Optimization},\nauthor={Yifan Yang and Zhaofeng Si and Siwei Lyu and Kaiyi Ji},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=GZoAUVSkaw}\n}", "github": "", "reviewers": "pAAu;mptp;Mt97;iRgq", "pdf_size": 1192856, "rating": "5;6;6;6", "confidence": "2;4;3;3", "soundness": "2;2;3;3", "novelty": "2;3;2;3", "presentation": "3;3;3;3", "wc_summary": "83;64;109;35", "wc_strengths": "7;59;50;52", "wc_weaknesses": "58;93;71;41", "wc_questions": "4;154;17;2", "wc_limitations": "5;1;11;2", "wc_review": "157;371;258;132", "wc_reply_reviewers": "0;12;0;0", "wc_reply_authors": "0;33;0;0", "reply_reviewers": "0;1;0;0", "reply_authors": "1;2;1;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 72.75, 27.021981792607292 ], "wc_strengths_avg": [ 42.0, 20.481699148264042 ], "wc_weaknesses_avg": [ 65.75, 18.9917745353087 ], "wc_questions_avg": [ 44.25, 63.6253683682853 ], "wc_limitations_avg": [ 4.75, 3.897114317029974 ], "wc_review_avg": [ 229.5, 94.33583624476968 ], "wc_reply_reviewers_avg": [ 3.0, 5.196152422706632 ], "wc_reply_authors_avg": [ 8.25, 14.289419162443238 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:LSclRc7_HY0J:scholar.google.com/&scioq=First-Order+Minimax+Bilevel+Optimization&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": ";buffalo.edu;buffalo.edu;buffalo.edu", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "State University of New York at Buffalo", "aff_unique_dep": "", "aff_unique_url": "https://www.buffalo.edu", "aff_unique_abbr": "SUNY Buffalo", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Buffalo", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "MiSO: Optimizing brain stimulation to create neural activity states", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95894", "id": "Gb0mXhn5h3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Gb0mXhn5h3", "openreview": "https://openreview.net/forum?id=Gb0mXhn5h3", "poster": "/media/PosterPDFs/NeurIPS%202024/95894.png?t=1733714760.3470347", "project": "", "author_site": "Yuki Minai, Joana Soldado-Magraner, Matthew Smith, Byron M Yu", "tldr": "", "abstract": "Brain stimulation has the potential to create desired neural population activity states. However, it is challenging to search the large space of stimulation parameters, for example, selecting which subset of electrodes to be used for stimulation. In this scenario, creating a model that maps the configuration of stimulation parameters to the brain\u2019s response can be beneficial. Training such an expansive model usually requires more stimulation-response samples than can be collected in a given experimental session. Furthermore, changes in the properties of the recorded activity over time can make it challenging to merge stimulation-response samples across sessions. To address these challenges, we propose MiSO (MicroStimulation Optimization), a closed-loop stimulation framework to drive neural population activity toward specified states by optimizing over a large stimulation parameter space. MiSO consists of three key components: 1) a neural activity alignment method to merge stimulation-response samples across sessions, 2) a statistical model trained on the merged samples to predict the brain's response to untested stimulation parameter configurations, and 3) an online optimization algorithm to adaptively update the stimulation parameter configuration based on the model's predictions. In this study, we implemented MiSO with a factor analysis (FA) based alignment method, a convolutional neural network (CNN), and an epsilon greedy optimization algorithm. We tested MiSO in closed-loop experiments using electrical microstimulation in the prefrontal cortex of a non-human primate. Guided by the CNN predictions, MiSO successfully searched amongst thousands of stimulation parameter configurations to drive the neural population activity toward specified states. More broadly, MiSO increases the clinical viability of neuromodulation technologies by enabling the use of many-fold larger stimulation parameter spaces.", "keywords": "closed-loop optimization;microstimulation;neural population activity;dimensionality reduction;latent variable models;reinforcement learning", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "", "author": "Yuki Minai;Joana Soldado-Magraner;Matthew A. Smith;Byron M. Yu", "authorids": "~Yuki_Minai1;~Joana_Soldado-Magraner1;~Matthew_A._Smith2;~Byron_M._Yu1", "gender": "F;F;M;", "homepage": "https://yukiminai.com/;;http://www.smithlab.net;", "dblp": "396/6266;356/3004;74/6003-1;", "google_scholar": "EpIl66IAAAAJ;https://scholar.google.co.uk/citations?user=u4-ClbQAAAAJ;mhi9LhkAAAAJ;", "orcid": ";0000-0003-3607-7264;0000-0003-1192-9942;", "linkedin": "yuki-minai-95162a100/;;;", "or_profile": "~Yuki_Minai1;~Joana_Soldado-Magraner1;~Matthew_A._Smith2;~Byron_M._Yu1", "aff": "Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;", "aff_domain": "andrew.cmu.edu;cmu.edu;cmu.edu;", "position": "PhD student;Postdoc;Full Professor;", "bibtex": "@inproceedings{\nminai2024miso,\ntitle={Mi{SO}: Optimizing brain stimulation to create neural activity states},\nauthor={Yuki Minai and Joana Soldado-Magraner and Matthew A. Smith and Byron M. Yu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Gb0mXhn5h3}\n}", "github": "", "reviewers": "Yfry;5hk3;C2zx", "pdf_size": 2140762, "rating": "5;6;7", "confidence": "3;4;5", "soundness": "2;2;3", "novelty": "2;3;3", "presentation": "3;2;3", "wc_summary": "54;72;163", "wc_strengths": "41;41;132", "wc_weaknesses": "127;61;199", "wc_questions": "225;86;200", "wc_limitations": "5;47;81", "wc_review": "452;307;775", "wc_reply_reviewers": "102;30;16", "wc_reply_authors": "563;0;0", "reply_reviewers": "1;1;1", "reply_authors": "2;1;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 96.33333333333333, 47.70977072070481 ], "wc_strengths_avg": [ 71.33333333333333, 42.89781139198388 ], "wc_weaknesses_avg": [ 129.0, 56.356011214421486 ], "wc_questions_avg": [ 170.33333333333334, 60.49977043115307 ], "wc_limitations_avg": [ 44.333333333333336, 31.084115271666047 ], "wc_review_avg": [ 511.3333333333333, 195.6124285985485 ], "wc_reply_reviewers_avg": [ 49.333333333333336, 37.67698973585278 ], "wc_reply_authors_avg": [ 187.66666666666666, 265.40074520535086 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12005704492927543760&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "andrew.cmu.edu;cmu.edu;cmu.edu;", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Latent Learning Progress Drives Autonomous Goal Selection in Human Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95893", "id": "GbqzN9HiUC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=GbqzN9HiUC", "openreview": "https://openreview.net/forum?id=GbqzN9HiUC", "poster": "/media/PosterPDFs/NeurIPS%202024/95893.png?t=1731170517.47616", "project": "", "author_site": "Gaia Molinaro, C\u00e9dric Colas, Pierre-Yves Oudeyer, Anne Collins", "tldr": "", "abstract": "Humans are autotelic agents who learn by setting and pursuing their own goals. However, the precise mechanisms guiding human goal selection remain unclear. Learning progress, typically measured as the observed change in performance, can provide a valuable signal for goal selection in both humans and artificial agents. We hypothesize that human choices of goals may also be driven by _latent learning progress_, which humans can estimate through knowledge of their actions and the environment \u2013 even without experiencing immediate changes in performance. To test this hypothesis, we designed a hierarchical reinforcement learning task in which human participants (N = 175) repeatedly chose their own goals and learned goal-conditioned policies. Our behavioral and computational modeling results confirm the influence of latent learning progress on goal selection and uncover inter-individual differences, partially mediated by recognition of the task's hierarchical structure. By investigating the role of latent learning progress in human goal selection, we pave the way for more effective and personalized learning experiences as well as the advancement of more human-like autotelic machines.", "keywords": "goals;reinforcement learning;cognitive science;computational modeling;autotelic agents;curriculum development", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "", "author": "Gaia Molinaro;C\u00e9dric Colas;Pierre-Yves Oudeyer;Anne Collins", "authorids": "~Gaia_Molinaro1;~C\u00e9dric_Colas1;~Pierre-Yves_Oudeyer1;~Anne_Collins1", "gender": "F;M;M;", "homepage": "https://gaiamolinaro.github.io/;https://cedriccolas.com;http://www.pyoudeyer.com;", "dblp": ";215/3872;33/5513;", "google_scholar": "N3eRG0UAAAAJ;https://scholar.google.fr/citations?user=VBz8gZ4AAAAJ;https://scholar.google.fr/citations?user=gCqGj4sAAAAJ;9O4VlHYAAAAJ", "orcid": "0000-0001-6145-133X;0000-0003-0212-427X;;", "linkedin": "gaiamolinaro/;;pierreyvesoudeyer/;", "or_profile": "~Gaia_Molinaro1;~C\u00e9dric_Colas1;~Pierre-Yves_Oudeyer1;~Anne_Collins1", "aff": "University of California, Berkeley;Massachusetts Institute of Technology;Inria;University of California, Berkeley", "aff_domain": "berkeley.edu;mit.edu;inria.fr;berkeley.edu", "position": "PhD student;Postdoc;Research director;Associate Professor", "bibtex": "@inproceedings{\nmolinaro2024latent,\ntitle={Latent Learning Progress Drives Autonomous Goal Selection in Human Reinforcement Learning},\nauthor={Gaia Molinaro and C{\\'e}dric Colas and Pierre-Yves Oudeyer and Anne Collins},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=GbqzN9HiUC}\n}", "github": "", "reviewers": "kdQC;Grub;HMNe;y4se", "pdf_size": 17746245, "rating": "6;7;8;8", "confidence": "3;4;4;3", "soundness": "3;4;4;4", "novelty": "2;3;3;4", "presentation": "2;4;3;4", "wc_summary": "25;213;59;229", "wc_strengths": "34;120;46;113", "wc_weaknesses": "78;531;97;47", "wc_questions": "38;8;63;29", "wc_limitations": "1;3;16;28", "wc_review": "176;875;281;446", "wc_reply_reviewers": "66;305;39;64", "wc_reply_authors": "353;252;19;23", "reply_reviewers": "2;2;1;1", "reply_authors": "3;3;2;2", "rating_avg": [ 7.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 131.5, 90.48066091712637 ], "wc_strengths_avg": [ 78.25, 38.564070065282266 ], "wc_weaknesses_avg": [ 188.25, 198.6899280285742 ], "wc_questions_avg": [ 34.5, 19.72941965694886 ], "wc_limitations_avg": [ 12.0, 10.88577052853862 ], "wc_review_avg": [ 444.5, 266.53189302595666 ], "wc_reply_reviewers_avg": [ 118.5, 108.20004621071102 ], "wc_reply_authors_avg": [ 161.75, 145.21600290601583 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9177706265452660535&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "berkeley.edu;mit.edu;inria.fr;berkeley.edu", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of California, Berkeley;Massachusetts Institute of Technology;INRIA", "aff_unique_dep": ";;", "aff_unique_url": "https://www.berkeley.edu;https://web.mit.edu;https://www.inria.fr", "aff_unique_abbr": "UC Berkeley;MIT;Inria", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;France" }, { "title": "Instruction Tuning With Loss Over Instructions", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95892", "id": "GcZgo9ffGt", "proceeding": "", "pdf": "https://openreview.net/pdf?id=GcZgo9ffGt", "openreview": "https://openreview.net/forum?id=GcZgo9ffGt", "poster": "", "project": "", "author_site": "Zhengxiang Shi, Adam Yang, Bin Wu, Laurence Aitchison, Emine Yilmaz, Aldo Lipani", "tldr": "", "abstract": "Instruction tuning plays a crucial role in shaping the outputs of language models (LMs) to desired styles. In this work, we propose a simple yet effective method, Instruction Modelling (IM), which trains LMs by applying a loss function to the instruction and prompt part rather than solely to the output part. Through experiments across 21 diverse benchmarks, we show that, in many scenarios, IM can effectively improve the LM performance on both NLP tasks (*e.g.,* MMLU, TruthfulQA, and HumanEval) and open-ended generation benchmarks (*e.g.,* MT-Bench and AlpacaEval). Remarkably, in the most advantageous case, IM boosts model performance on AlpacaEval 1.0 by over 100%. We identify two key factors influencing the effectiveness of IM: (1) The ratio between instruction length and output length in the training data; and (2) The number of training examples. We observe that IM is especially beneficial when trained on datasets with lengthy instructions paired with brief outputs, or under the Superficial Alignment Hypothesis (SAH) where a small amount of training examples are used for instruction tuning. Further analysis substantiates our hypothesis that our improvement can be attributed to reduced overfitting to instruction tuning datasets. It is worth noting that we are not proposing \\ours as a replacement for the current instruction tuning process.\nInstead, our work aims to provide practical guidance for instruction tuning LMs, especially in low-resource scenarios.\nOur code is available at https://github.com/ZhengxiangShi/InstructionModelling.", "keywords": "Instruction Tuning;Language Models;Low-Resource Methods for NLP", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Zhengyan Shi;Adam X. Yang;Bin Wu;Laurence Aitchison;Emine Yilmaz;Aldo Lipani", "authorids": "~Zhengyan_Shi2;~Adam_X._Yang1;~Bin_Wu4;~Laurence_Aitchison1;~Emine_Yilmaz1;~Aldo_Lipani1", "gender": "M;M;;F;M;M", "homepage": ";https://github.com/Bingo-W;http://www.gatsby.ucl.ac.uk/~laurence/;https://sites.google.com/site/emineyilmaz/;;https://shizhengyan.github.io/", "dblp": ";98/4432-25;155/1918.html;36/3270;150/5264;219/7021", "google_scholar": ";2ZHjpDcAAAAJ;;https://scholar.google.com.tw/citations?user=ocmAN4YAAAAJ;;TF8l2ZEAAAAJ", "orcid": "0000-0003-4979-8386;0000-0002-8677-2321;;;;0000-0003-3074-3035", "linkedin": ";bin-wu-a915601a5/;;;;zhengxiang-shi/", "or_profile": "~Adam_X._Yang1;~Bin_Wu4;~Laurence_Aitchison1;~Emine_Yilmaz1;~Aldo_Lipani1;~Zhengxiang_Shi1", "aff": "University of Bristol;University College London, University of London;University of Bristol;Department of Computer Science, University College London;University College London, University of London;University College London", "aff_domain": "bristol.ac.uk;ucl.ac.uk;bristol.ac.uk;cs.ucl.ac.uk;ucl.ac.uk;ucl.ac.uk", "position": "PhD student;PhD student;Assistant Professor;Full Professor;Associate Professor;PhD student", "bibtex": "@inproceedings{\nshi2024instruction,\ntitle={Instruction Tuning With Loss Over Instructions},\nauthor={Zhengyan Shi and Adam X. Yang and Bin Wu and Laurence Aitchison and Emine Yilmaz and Aldo Lipani},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=GcZgo9ffGt}\n}", "github": "", "reviewers": "vkoa;kinw;b1op;z7Cy", "pdf_size": 725055, "rating": "6;7;7;7", "confidence": "5;4;4;4", "soundness": "3;3;3;4", "novelty": "3;3;3;4", "presentation": "3;2;4;4", "wc_summary": "125;36;80;63", "wc_strengths": "128;20;162;81", "wc_weaknesses": "112;229;131;80", "wc_questions": "74;156;137;14", "wc_limitations": "11;1;7;4", "wc_review": "450;442;517;242", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 76.0, 32.349652239243625 ], "wc_strengths_avg": [ 97.75, 53.312170280340304 ], "wc_weaknesses_avg": [ 138.0, 55.61025085359713 ], "wc_questions_avg": [ 95.25, 55.87206368123519 ], "wc_limitations_avg": [ 5.75, 3.6996621467371855 ], "wc_review_avg": [ 412.75, 102.79439430241321 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7893815839842292857&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "bristol.ac.uk;ucl.ac.uk;bristol.ac.uk;cs.ucl.ac.uk;ucl.ac.uk;ucl.ac.uk", "author_num": 6, "aff_unique_index": "0;1;0;1;1;1", "aff_unique_norm": "University of Bristol;University College London", "aff_unique_dep": ";", "aff_unique_url": "https://www.bristol.ac.uk;https://www.ucl.ac.uk", "aff_unique_abbr": "Bristol;UCL", "aff_campus_unique_index": "1", "aff_campus_unique": ";London", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "MeshXL: Neural Coordinate Field for Generative 3D Foundation Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95891", "id": "Gcks157FI3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Gcks157FI3", "openreview": "https://openreview.net/forum?id=Gcks157FI3", "poster": "", "project": "", "author_site": "Sijin Chen, Xin Chen, Anqi Pang, Xianfang Zeng, Wei Cheng, Yijun Fu, Fukun Yin, Billzb Wang, Jingyi Yu, Gang Yu, BIN FU, Tao Chen", "tldr": "", "abstract": "The polygon mesh representation of 3D data exhibits great flexibility, fast rendering speed, and storage efficiency, which is widely preferred in various applications. However, given its unstructured graph representation, the direct generation of high-fidelity 3D meshes is challenging. Fortunately, with a pre-defined ordering strategy, 3D meshes can be represented as sequences, and the generation process can be seamlessly treated as an auto-regressive problem. In this paper, we validate Neural Coordinate Field (NeurCF), an explicit coordinate representation with implicit neural embeddings, is a simple-yet-effective representation for large-scale sequential mesh modeling. After that, we present MeshXL, a family of generative pre-trained auto-regressive models that addresses 3D mesh generation with modern large language model approaches. Extensive experiments show that MeshXL is able to generate high-quality 3D meshes, and can also serve as foundation models for various down-stream applications.", "keywords": "3D;large generation models", "primary_area": "generative_models", "supplementary_material": "/attachment/fbda68621aea3dcb8aa935fb2b5a46485c6f7749.zip", "author": "Sijin Chen;Xin Chen;Anqi Pang;Xianfang Zeng;Wei Cheng;Yijun Fu;Fukun Yin;Zhibin Wang;Jingyi Yu;Gang Yu;BIN FU;Tao Chen", "authorids": "~Sijin_Chen1;~Xin_Chen16;~Anqi_Pang1;~Xianfang_Zeng2;~Wei_Cheng7;~Yijun_Fu1;~Fukun_Yin1;~Zhibin_Wang4;~Jingyi_Yu1;~Gang_YU2;~BIN_FU2;~Tao_Chen6", "gender": "M;M;M;M;;F;M;;;M;M;M", "homepage": "https://ch3cook-fdu.github.io/;https://chenxin.tech/;;;;;https://fukunyin.github.io/;;;https://skicyyu.org/;https://www.facebook.com/bin.fu.73/;https://eetchen.github.io/", "dblp": "96/9616;24/1518-40;239/4129;241/9439;;;272/0842;;;;;69/510-3", "google_scholar": "https://scholar.google.com/citations?hl=en;7qeAJZ4AAAAJ;;;;;HGFT79EAAAAJ;;;https://scholar.google.com.sg/citations?user=BJdigYsAAAAJ;;https://scholar.google.com.sg/citations?user=w3OoFL0AAAAJ", "orcid": ";0000-0002-9347-1367;;0000-0003-1251-2129;;;;;;0000-0001-5570-2710;;", "linkedin": ";xin-chen-cs/;\u5b89\u742a-\u5e9e-a89282157;;;https://www.linkedin.cn/incareer/in/ACoAACJqPK4Bspovycjf4sCq_LEUyOUoZDjW-Es;;;;;;", "or_profile": "~Sijin_Chen1;~Xin_Chen16;~Anqi_Pang1;~Xianfang_Zeng2;~Wei_Cheng7;~Yijun_Fu1;~Fukun_Yin1;~Zhibin_Wang4;~Jingyi_Yu1;~Gang_YU2;~BIN_FU2;~Tao_Chen6", "aff": "Fudan University;Tencent;ShanghaiTech University;Tencent PCG;;;Tencent PCG ;;;Tencent;Tencent;Fudan University", "aff_domain": "fudan.edu.cn;tencent.com;shanghaitech.edu.cn;tencent.com;;;tencent.com;;;tencent.com;tencent.com;fudan.edu.cn", "position": "MS student;Researcher;PhD student;Researcher;;;Intern;;;Research Scientist;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nchen2024meshxl,\ntitle={Mesh{XL}: Neural Coordinate Field for Generative 3D Foundation Models},\nauthor={Sijin Chen and Xin Chen and Anqi Pang and Xianfang Zeng and Wei Cheng and Yijun Fu and Fukun Yin and Zhibin Wang and Jingyi Yu and Gang Yu and BIN FU and Tao Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Gcks157FI3}\n}", "github": "", "reviewers": "e1fg;CF59;AHoX;XQPQ", "pdf_size": 14136400, "rating": "4;5;6;6", "confidence": "4;5;4;4", "soundness": "3;3;3;4", "novelty": "2;2;3;3", "presentation": "3;2;3;3", "wc_summary": "100;61;27;100", "wc_strengths": "32;27;37;82", "wc_weaknesses": "191;196;63;68", "wc_questions": "90;141;178;81", "wc_limitations": "24;9;1;3", "wc_review": "437;434;306;334", "wc_reply_reviewers": "185;38;0;0", "wc_reply_authors": "210;41;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "2;2;1;1", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 72.0, 30.47129797038518 ], "wc_strengths_avg": [ 44.5, 21.937410968480304 ], "wc_weaknesses_avg": [ 129.5, 64.04880951274582 ], "wc_questions_avg": [ 122.5, 39.37321424522006 ], "wc_limitations_avg": [ 9.25, 9.01041064547005 ], "wc_review_avg": [ 377.75, 58.601941094131 ], "wc_reply_reviewers_avg": [ 55.75, 76.21802608307303 ], "wc_reply_authors_avg": [ 62.75, 86.64691281286368 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17247527932778808249&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "fudan.edu.cn;tencent.com;shanghaitech.edu.cn;tencent.com;;;tencent.com;;;tencent.com;tencent.com;fudan.edu.cn", "author_num": 12, "aff_unique_index": "0;1;2;1;1;1;1;0", "aff_unique_norm": "Fudan University;Tencent;ShanghaiTech University", "aff_unique_dep": ";Tencent Holdings Limited;", "aff_unique_url": "https://www.fudan.edu.cn;https://www.tencent.com;https://www.shanghaitech.edu.cn", "aff_unique_abbr": "Fudan;Tencent;ShanghaiTech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Neuronal Competition Groups with Supervised STDP for Spike-Based Classification", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95890", "id": "GeE5qF6ICg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=GeE5qF6ICg", "openreview": "https://openreview.net/forum?id=GeE5qF6ICg", "poster": "/media/PosterPDFs/NeurIPS%202024/95890.png?t=1730197092.8120975", "project": "", "author_site": "Gaspard Goupy, Pierre Tirilly, Ioan Marius Bilasco", "tldr": "", "abstract": "Spike Timing-Dependent Plasticity (STDP) is a promising substitute to backpropagation for local training of Spiking Neural Networks (SNNs) on neuromorphic hardware. STDP allows SNNs to address classification tasks by combining unsupervised STDP for feature extraction and supervised STDP for classification. Unsupervised STDP is usually employed with Winner-Takes-All (WTA) competition to learn distinct patterns. However, WTA for supervised STDP classification faces unbalanced competition challenges. In this paper, we propose a method to effectively implement WTA competition in a spiking classification layer employing first-spike coding and supervised STDP training. We introduce the Neuronal Competition Group (NCG), an architecture that improves classification capabilities by promoting the learning of various patterns per class. An NCG is a group of neurons mapped to a specific class, implementing intra-class WTA and a novel competition regulation mechanism based on two-compartment thresholds. We incorporate our proposed architecture into spiking classification layers trained with state-of-the-art supervised STDP rules. On top of two different unsupervised feature extractors, we obtain significant accuracy improvements on image recognition datasets such as CIFAR-10 and CIFAR-100. We show that our competition regulation mechanism is crucial for ensuring balanced competition and improved class separation.", "keywords": "Spiking Neural Networks;STDP;Winner-Takes-All Competition;Local Learning;First-Spike-Based Classification", "primary_area": "other", "supplementary_material": "/attachment/ce9faf8805f095d6bb069a3e6aa13f5b2b8bbe7b.zip", "author": "Gaspard Goupy;Pierre Tirilly;Ioan Marius Bilasco", "authorids": "~Gaspard_Goupy1;~Pierre_Tirilly1;~Ioan_Marius_Bilasco1", "gender": ";M;M", "homepage": ";https://pro.univ-lille.fr/pierre-tirilly;https://pro.univ-lille.fr/marius-bilasco/", "dblp": ";83/2396;b/IoanMariusBilasco", "google_scholar": ";CKGKuOAAAAAJ;https://scholar.google.fr/citations?user=jNBdRCsAAAAJ", "orcid": ";0000-0003-2675-8023;0000-0001-7254-8727", "linkedin": ";;mariusbilasco/", "or_profile": "~Gaspard_Goupy1;~Pierre_Tirilly1;~Ioan_Marius_Bilasco1", "aff": ";Universit\u00e9 de Lille;Universit\u00e9 de Lille", "aff_domain": ";univ-lille.fr;univ-lille.fr", "position": ";Associate Professor;Full Professor", "bibtex": "@inproceedings{\ngoupy2024neuronal,\ntitle={Neuronal Competition Groups with Supervised {STDP} for Spike-Based Classification},\nauthor={Gaspard Goupy and Pierre Tirilly and Ioan Marius Bilasco},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=GeE5qF6ICg}\n}", "github": "", "reviewers": "AF7r;ygD7;k4WN;EvbR", "pdf_size": 747952, "rating": "5;5;6;6", "confidence": "4;4;4;4", "soundness": "2;3;3;3", "novelty": "2;2;2;2", "presentation": "2;3;3;3", "wc_summary": "46;52;68;68", "wc_strengths": "33;69;53;63", "wc_weaknesses": "96;164;56;130", "wc_questions": "103;131;8;76", "wc_limitations": "1;35;36;15", "wc_review": "279;451;221;352", "wc_reply_reviewers": "29;64;11;13", "wc_reply_authors": "20;42;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 58.5, 9.733961166965893 ], "wc_strengths_avg": [ 54.5, 13.665650368716449 ], "wc_weaknesses_avg": [ 111.5, 40.05933099790859 ], "wc_questions_avg": [ 79.5, 45.63167759353145 ], "wc_limitations_avg": [ 21.75, 14.618053906043718 ], "wc_review_avg": [ 325.75, 85.92838588033642 ], "wc_reply_reviewers_avg": [ 29.25, 21.241174637952582 ], "wc_reply_authors_avg": [ 15.5, 17.342145196024624 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1778000083292770994&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": ";univ-lille.fr;univ-lille.fr", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Universit\u00e9 de Lille", "aff_unique_dep": "", "aff_unique_url": "https://www.univ-lille.fr", "aff_unique_abbr": "UdeL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "France" }, { "title": "Continuous Heatmap Regression for Pose Estimation via Implicit Neural Representation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95889", "id": "GgIJeoSLjQ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=GgIJeoSLjQ", "openreview": "https://openreview.net/forum?id=GgIJeoSLjQ", "poster": "/media/PosterPDFs/NeurIPS%202024/95889.png?t=1730359248.4387698", "project": "", "author_site": "Shengxiang Hu, Huaijiang Sun, Dong Wei, Xiaoning Sun, Jin Wang", "tldr": "", "abstract": "Heatmap regression has dominated human pose estimation due to its superior performance and strong generalization. To meet the requirements of traditional explicit neural networks for output form, existing heatmap-based methods discretize the originally continuous heatmap representation into 2D pixel arrays, which leads to performance degradation due to the introduction of quantization errors. This problem is significantly exacerbated as the size of the input image decreases, which makes heatmap-based methods not much better than coordinate regression on low-resolution images. In this paper, we propose a novel neural representation for human pose estimation called NerPE to achieve continuous heatmap regression. Given any position within the image range, NerPE regresses the corresponding confidence scores for body joints according to the surrounding image features, which guarantees continuity in space and confidence during training. Thanks to the decoupling from spatial resolution, NerPE can output the predicted heatmaps at arbitrary resolution during inference without retraining, which easily achieves sub-pixel localization precision. To reduce the computational cost, we design progressive coordinate decoding to cooperate with continuous heatmap regression, in which localization no longer requires the complete generation of high-resolution heatmaps. The code is available at https://github.com/hushengxiang/NerPE.", "keywords": "Human pose estimation;continuous heatmap regression;implicit neural representation", "primary_area": "machine_vision", "supplementary_material": "", "author": "Shengxiang Hu;Huaijiang Sun;Dong Wei;Xiaoning Sun;Jin Wang", "authorids": "~Shengxiang_Hu1;~Huaijiang_Sun3;~Dong_Wei3;~Xiaoning_Sun1;~Jin_Wang24", "gender": ";M;M;F;", "homepage": ";;;;", "dblp": "166/3832-1;14/7280;34/4292-7;87/5000;92/1375-5", "google_scholar": "WlNT7GkAAAAJ;Bx6bgH4AAAAJ;https://scholar.google.com.hk/citations?user=XQM9rEMAAAAJ;https://scholar.google.com.hk/citations?hl=zh-CN;7WelfDoAAAAJ", "orcid": ";;0000-0002-7299-7693;;", "linkedin": ";;;;", "or_profile": "~Shengxiang_Hu1;~Huaijiang_Sun3;~Dong_Wei3;~Xiaoning_Sun1;~Jin_Wang24", "aff": "Nanjing University of Science and Technology;Nanjing University of Science and Technology;Nanjing University of Science and Technology;Nanjing University of Science and Technology;Nantong University", "aff_domain": "njust.edu.cn;njust.edu.cn;njust.edu.cn;njust.edu.cn;ntu.edu.cn", "position": "PhD student;Full Professor;Postdoc;PhD student;Associate Professor", "bibtex": "@inproceedings{\nhu2024continuous,\ntitle={Continuous Heatmap Regression for Pose Estimation via Implicit Neural Representation},\nauthor={Shengxiang Hu and Huaijiang Sun and Dong Wei and Xiaoning Sun and Jin Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=GgIJeoSLjQ}\n}", "github": "", "reviewers": "KNNC;xx7e;rvLc;k3b9", "pdf_size": 7916645, "rating": "5;5;6;6", "confidence": "5;5;4;5", "soundness": "3;2;3;3", "novelty": "3;2;3;3", "presentation": "3;3;3;4", "wc_summary": "64;52;150;138", "wc_strengths": "33;29;71;143", "wc_weaknesses": "121;603;387;174", "wc_questions": "2;4;219;131", "wc_limitations": "8;39;116;10", "wc_review": "228;727;943;596", "wc_reply_reviewers": "0;175;53;325", "wc_reply_authors": "0;173;125;469", "reply_reviewers": "0;1;1;2", "reply_authors": "1;2;2;3", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 101.0, 43.41658669218482 ], "wc_strengths_avg": [ 69.0, 45.760244754590204 ], "wc_weaknesses_avg": [ 321.25, 190.7149377998483 ], "wc_questions_avg": [ 89.0, 91.45764046814242 ], "wc_limitations_avg": [ 43.25, 43.757142274147654 ], "wc_review_avg": [ 623.5, 259.7927058252406 ], "wc_reply_reviewers_avg": [ 138.25, 125.10670445663574 ], "wc_reply_authors_avg": [ 191.75, 172.0775624536796 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:syTvLEnDRUYJ:scholar.google.com/&scioq=Continuous+Heatmap+Regression+for+Pose+Estimation+via+Implicit+Neural+Representation&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": "njust.edu.cn;njust.edu.cn;njust.edu.cn;njust.edu.cn;ntu.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Nanjing University of Science and Technology;Nantong University", "aff_unique_dep": ";", "aff_unique_url": "http://www.nust.edu.cn/;https://www.ntu.edu.cn/", "aff_unique_abbr": "NUST;NTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "A distributional simplicity bias in the learning dynamics of transformers", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95888", "id": "GgV6UczIWM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=GgV6UczIWM", "openreview": "https://openreview.net/forum?id=GgV6UczIWM", "poster": "", "project": "", "author_site": "Riccardo Rende, Federica Gerace, Alessandro Laio, Sebastian Goldt", "tldr": "", "abstract": "The remarkable capability of over-parameterised neural networks to generalise effectively has been explained by invoking a ``simplicity bias'': neural networks prevent overfitting by initially learning simple classifiers before progressing to more complex, non-linear functions. While simplicity biases have been described theoretically and experimentally in feed-forward networks for supervised learning, the extent to which they also explain the remarkable success of transformers trained with self-supervised techniques remains unclear. In our study, we demonstrate that transformers, trained on natural language data, also display a simplicity bias. Specifically, they sequentially learn many-body interactions among input tokens, reaching a saturation point in the prediction error for low-degree interactions while continuing to learn high-degree interactions. To conduct this analysis, we develop a procedure to generate \\textit{clones} of a given natural language data set, which rigorously capture the interactions between tokens up to a specified order. This approach opens up the possibilities of studying how interactions of different orders in the data affect learning, in natural language processing and beyond.", "keywords": "Transformers;Natural Language Processing;Sequential Learning;Simplicity Bias", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Riccardo Rende;Federica Gerace;Alessandro Laio;Sebastian Goldt", "authorids": "~Riccardo_Rende1;~Federica_Gerace1;~Alessandro_Laio1;~Sebastian_Goldt1", "gender": "M;F;M;M", "homepage": ";;https://people.sissa.it/~laio/;https://datascience.sissa.it/research-unit/12/theory-of-neural-networks", "dblp": ";;;234/8941", "google_scholar": "VV-SmWIAAAAJ;dvDLaPkAAAAJ;https://scholar.google.it/citations?user=ma-T1oEAAAAJ;R06wsMkAAAAJ", "orcid": "0000-0001-5656-4241;;;", "linkedin": ";;;", "or_profile": "~Riccardo_Rende1;~Federica_Gerace1;~Alessandro_Laio1;~Sebastian_Goldt1", "aff": "International Higher School for Advanced Studies Trieste;International Higher School for Advanced Studies Trieste;SISSA/ISAS;SISSA", "aff_domain": "sissa.it;sissa.it;sissa.it;sissa.it", "position": "PhD student;Postdoc;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nrende2024a,\ntitle={A distributional simplicity bias in the learning dynamics of transformers},\nauthor={Riccardo Rende and Federica Gerace and Alessandro Laio and Sebastian Goldt},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=GgV6UczIWM}\n}", "github": "", "reviewers": "Kh1E;NZGs;ex9y;fVhm", "pdf_size": 995341, "rating": "5;6;6;6", "confidence": "3;4;3;4", "soundness": "2;4;2;3", "novelty": "2;4;3;3", "presentation": "2;3;2;3", "wc_summary": "69;216;40;74", "wc_strengths": "22;231;142;121", "wc_weaknesses": "422;231;441;124", "wc_questions": "58;59;50;15", "wc_limitations": "7;6;2;1", "wc_review": "578;743;675;335", "wc_reply_reviewers": "194;0;323;0", "wc_reply_authors": "276;0;211;0", "reply_reviewers": "2;0;2;0", "reply_authors": "2;1;3;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 99.75, 68.36071605827429 ], "wc_strengths_avg": [ 129.0, 74.30679645900501 ], "wc_weaknesses_avg": [ 304.5, 132.6847768208546 ], "wc_questions_avg": [ 45.5, 17.95132307101624 ], "wc_limitations_avg": [ 4.0, 2.5495097567963922 ], "wc_review_avg": [ 582.75, 154.59038618232378 ], "wc_reply_reviewers_avg": [ 129.25, 137.06088975342308 ], "wc_reply_authors_avg": [ 121.75, 123.89990920093526 ], "reply_reviewers_avg": [ 1.0, 1.0 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16654335562815790614&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "sissa.it;sissa.it;sissa.it;sissa.it", "author_num": 4, "aff_unique_index": "0;0;1;1", "aff_unique_norm": "International Higher School for Advanced Studies;Scuola Internazionale Superiore di Studi Avanzati", "aff_unique_dep": ";", "aff_unique_url": "https://www.sissa.it;https://www.sissa.it", "aff_unique_abbr": "SISSA;SISSA", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Trieste;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Italy" }, { "title": "Improving Decision Sparsity", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95887", "id": "GhqdnLZMAz", "proceeding": "", "pdf": "https://openreview.net/pdf?id=GhqdnLZMAz", "openreview": "https://openreview.net/forum?id=GhqdnLZMAz", "poster": "/media/PosterPDFs/NeurIPS%202024/95887.png?t=1731376514.086432", "project": "", "author_site": "Yiyang Sun, Tong Wang, Cynthia Rudin", "tldr": "", "abstract": "Sparsity is a central aspect of interpretability in machine learning. Typically, sparsity is measured in terms of the size of a model globally, such as the number of variables it uses. However, this notion of sparsity is not particularly relevant for decision making; someone subjected to a decision does not care about variables that do not contribute to the decision. In this work, we dramatically expand a notion of *decision sparsity* called the *Sparse Explanation Value* (SEV) so that its explanations are more meaningful. SEV considers movement along a hypercube towards a reference point. By allowing flexibility in that reference and by considering how distances along the hypercube translate to distances in feature space, we can derive sparser and more meaningful explanations for various types of function classes. We present cluster-based SEV and its variant tree-based SEV, introduce a method that improves credibility of explanations, and propose algorithms that optimize decision sparsity in machine learning models.", "keywords": "sparse explanation;decision sparsity", "primary_area": "interpretability_and_explainability", "supplementary_material": "/attachment/7bb0771f77801312becd2e950cd7b2850f65853e.zip", "author": "Yiyang Sun;Tong Wang;Cynthia Rudin", "authorids": "~Yiyang_Sun3;~Tong_Wang4;~Cynthia_Rudin1", "gender": "M;F;", "homepage": "https://www.linkedin.com/in/%E5%A5%95%E6%89%AC-%E5%AD%99-b30434105/?originalSubdomain=cn;https://tongwang-ai.github.io/;", "dblp": ";https://dblp.uni-trier.de/pid/51/6856-11;", "google_scholar": "99vOyFEAAAAJ;KB6A0esAAAAJ;", "orcid": ";0000-0001-8687-4208;", "linkedin": "%E5%A5%95%E6%89%AC-%E5%AD%99-b30434105/?originalSubdomain=cn;;", "or_profile": "~Yiyang_Sun3;~Tong_Wang4;~Cynthia_Rudin1", "aff": "Duke University;Yale University;", "aff_domain": "duke.edu;yale.edu;", "position": "MS student;Assistant Professor;", "bibtex": "@inproceedings{\nsun2024improving,\ntitle={Improving Decision Sparsity},\nauthor={Yiyang Sun and Tong Wang and Cynthia Rudin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=GhqdnLZMAz}\n}", "github": "", "reviewers": "bvrF;xH4u;PbJJ", "pdf_size": 2972406, "rating": "5;7;8", "confidence": "4;4;2", "soundness": "3;4;4", "novelty": "2;3;4", "presentation": "3;3;4", "wc_summary": "24;62;33", "wc_strengths": "5;31;52", "wc_weaknesses": "136;11;29", "wc_questions": "28;87;18", "wc_limitations": "31;5;55", "wc_review": "224;196;187", "wc_reply_reviewers": "44;10;4", "wc_reply_authors": "102;0;0", "reply_reviewers": "1;1;1", "reply_authors": "3;1;1", "rating_avg": [ 6.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 39.666666666666664, 16.21384867602041 ], "wc_strengths_avg": [ 29.333333333333332, 19.22382780706162 ], "wc_weaknesses_avg": [ 58.666666666666664, 55.174470747096635 ], "wc_questions_avg": [ 44.333333333333336, 30.44484995674784 ], "wc_limitations_avg": [ 30.333333333333332, 20.417857108151406 ], "wc_review_avg": [ 202.33333333333334, 15.755069730795299 ], "wc_reply_reviewers_avg": [ 19.333333333333332, 17.613126418163876 ], "wc_reply_authors_avg": [ 34.0, 48.08326112068523 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.7559289460184546, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11526685388379225045&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "duke.edu;yale.edu;", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Duke University;Yale University", "aff_unique_dep": ";", "aff_unique_url": "https://www.duke.edu;https://www.yale.edu", "aff_unique_abbr": "Duke;Yale", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "CorDA: Context-Oriented Decomposition Adaptation of Large Language Models for Task-Aware Parameter-Efficient Fine-tuning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95886", "id": "Gi00NVru6n", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Gi00NVru6n", "openreview": "https://openreview.net/forum?id=Gi00NVru6n", "poster": "/media/PosterPDFs/NeurIPS%202024/95886.png?t=1733839168.351224", "project": "", "author_site": "Yibo Yang, Xiaojie Li, Zhongzhu Zhou, Shuaiwen Song, Jianlong Wu, Liqiang Nie, Bernard Ghanem", "tldr": "", "abstract": "Current parameter-efficient fine-tuning (PEFT) methods build adapters widely agnostic of the context of downstream task to learn, or the context of important knowledge to maintain. As a result, there is often a performance gap compared to full-parameter fine-tuning, and meanwhile the fine-tuned model suffers from catastrophic forgetting of the pre-trained world knowledge. In this paper, we propose **CorDA**, a Context-oriented Decomposition Adaptation method that builds learnable **task-aware adapters** from weight decomposition oriented by the context of downstream task or the world knowledge to maintain. Concretely, we collect a few data samples, and perform singular value decomposition for each linear layer of a pre-trained LLM multiplied by the covariance matrix of the input activation using these samples. The inverse of the covariance matrix is multiplied with the decomposed components to reconstruct the original weights. By doing so, the context of the representative samples is captured through deciding the factorizing orientation. Our method enables two options, the **knowledge-preserved adaptation** and the **instruction-previewed adaptation**. For the former, we use question-answering samples to obtain the covariance matrices, and use the decomposed components with the smallest $r$ singular values to initialize a learnable adapter, with the others frozen such that the world knowledge is better preserved. For the latter, we use the instruction data from the fine-tuning task, such as math or coding, to orientate the decomposition and train the largest $r$ components that most correspond to the task to learn. We conduct extensive experiments on Math, Code, and Instruction Following tasks. Our knowledge-preserved adaptation not only achieves better performance than LoRA on fine-tuning tasks, but also mitigates the forgetting of world knowledge. Our instruction-previewed adaptation is able to further enhance the fine-tuning performance to be comparable with full fine-tuning, surpassing \nthe state-of-the-art PEFT methods such as LoRA, DoRA, and PiSSA.", "keywords": "parameter-efficient fine-tuning;LLM;knowledge forgetting", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Yibo Yang;Xiaojie Li;Zhongzhu Zhou;Shuaiwen Leon Song;Jianlong Wu;Liqiang Nie;Bernard Ghanem", "authorids": "~Yibo_Yang2;~Xiaojie_Li3;~Zhongzhu_Zhou1;~Shuaiwen_Leon_Song1;~Jianlong_Wu2;~Liqiang_Nie2;~Bernard_Ghanem1", "gender": "M;F;M;M;M;M;", "homepage": "https://iboing.github.io/;https://xiaojieli0903.github.io;http://zhongzhuzhou.org/;https://shuaiwen-leon-song.github.io/;https://liqiangnie.github.io/index.html;https://ivul.kaust.edu.sa;https://jlwu1992.github.io/", "dblp": "28/7717/;;274/7781;23/7512.html;92/8277;37/2516;170/4643", "google_scholar": "DxXXnCcAAAAJ;0PcPQfQAAAAJ;https://scholar.google.com.hk/citations?user=BoZKZl4AAAAJ;vt_QcOMAAAAJ;yywVMhUAAAAJ;rVsGTeEAAAAJ;XGeEH-IAAAAJ", "orcid": ";0000-0001-6449-2727;;;0000-0003-1476-0273;0000-0002-5534-587X;", "linkedin": ";;zhongzhu-zhou/;shuaiwen-leon-song-1a663019/;;bernardghanem/;", "or_profile": "~Yibo_Yang2;~Xiaojie_Li3;~Zhongzhu_Zhou1;~Shuaiwen_Leon_Song1;~Liqiang_Nie2;~Bernard_Ghanem1;~Jianlong_Wu1", "aff": "King Abdullah University of Science and Technology;Harbin Institute of Technology;University of Sydney;University of Washington, Seattle;Shandong University;King Abdullah University of Science and Technology;Harbin Institute of Technology (Shenzhen)", "aff_domain": "kaust.edu.sa;hit.edu.cn;usyd.edu.au;uw.edu;sdu.edu.cn;kaust.edu.sa;hit.edu.cn", "position": "Research Scientist;PhD student;PhD student;Affiliated Assistant Professor;Full Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nyang2024corda,\ntitle={Cor{DA}: Context-Oriented Decomposition Adaptation of Large Language Models for Task-Aware Parameter-Efficient Fine-tuning},\nauthor={Yibo Yang and Xiaojie Li and Zhongzhu Zhou and Shuaiwen Leon Song and Jianlong Wu and Liqiang Nie and Bernard Ghanem},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Gi00NVru6n}\n}", "github": "", "reviewers": "kduk;Mgdn;HCBt;527n", "pdf_size": 1093004, "rating": "6;6;7;7", "confidence": "4;3;3;4", "soundness": "3;3;3;4", "novelty": "2;3;3;3", "presentation": "3;2;2;4", "wc_summary": "68;101;110;92", "wc_strengths": "68;85;186;141", "wc_weaknesses": "194;225;40;168", "wc_questions": "33;33;1;5", "wc_limitations": "6;9;1;26", "wc_review": "369;453;338;432", "wc_reply_reviewers": "147;42;15;17", "wc_reply_authors": "777;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "3;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 92.75, 15.642490210960657 ], "wc_strengths_avg": [ 120.0, 46.70653059262698 ], "wc_weaknesses_avg": [ 156.75, 70.36112207746548 ], "wc_questions_avg": [ 18.0, 15.066519173319364 ], "wc_limitations_avg": [ 10.5, 9.394147114027968 ], "wc_review_avg": [ 398.0, 46.427362621626486 ], "wc_reply_reviewers_avg": [ 55.25, 54.02950582783448 ], "wc_reply_authors_avg": [ 194.25, 336.4508693702544 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12098640571011139389&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "kaust.edu.sa;hit.edu.cn;usyd.edu.au;uw.edu;sdu.edu.cn;kaust.edu.sa;hit.edu.cn", "author_num": 7, "aff_unique_index": "0;1;2;3;4;0;1", "aff_unique_norm": "King Abdullah University of Science and Technology;Harbin Institute of Technology;University of Sydney;University of Washington;Shandong University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.kast.kau.edu.sa;http://www.hit.edu.cn/;https://www.sydney.edu.au;https://www.washington.edu;http://www.sdu.edu.cn", "aff_unique_abbr": "KAUST;HIT;USYD;UW;SDU", "aff_campus_unique_index": "1;2;3", "aff_campus_unique": ";Harbin;Seattle;Shenzhen", "aff_country_unique_index": "0;1;2;3;1;0;1", "aff_country_unique": "Saudi Arabia;China;Australia;United States" }, { "title": "HOI-Swap: Swapping Objects in Videos with Hand-Object Interaction Awareness", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95885", "id": "GkHXBasQwm", "proceeding": "", "pdf": "https://openreview.net/pdf?id=GkHXBasQwm", "openreview": "https://openreview.net/forum?id=GkHXBasQwm", "poster": "", "project": "", "author_site": "Zihui (Sherry) Xue, Romy Luo, Changan Chen, Kristen Grauman", "tldr": "", "abstract": "We study the problem of precisely swapping objects in videos, with a focus on those interacted with by hands, given one user-provided reference object image. Despite the great advancements that diffusion models have made in video editing recently, these models often fall short in handling the intricacies of hand-object interactions (HOI), failing to produce realistic edits---especially when object swapping results in object shape or functionality changes. To bridge this gap, we present HOI-Swap, a novel diffusion-based video editing framework trained in a self-supervised manner. Designed in two stages, the first stage focuses on object swapping in a single frame with HOI awareness; the model learns to adjust the interaction patterns, such as the hand grasp, based on changes in the object's properties. The second stage extends the single-frame edit across the entire sequence; we achieve controllable motion alignment with the original video by: (1) warping a new sequence from the stage-I edited frame based on sampled motion points and (2) conditioning video generation on the warped sequence. Comprehensive qualitative and quantitative evaluations demonstrate that HOI-Swap significantly outperforms existing methods, delivering high-quality video edits with realistic HOIs.", "keywords": "video editing;hand-object interaction", "primary_area": "machine_vision", "supplementary_material": "/attachment/62f08e9ab62a39401e309efb3104f17bb9332cd6.zip", "author": "Zihui Xue;Mi Luo;Changan Chen;Kristen Grauman", "authorids": "~Zihui_Xue1;~Mi_Luo1;~Changan_Chen2;~Kristen_Grauman1", "gender": "F;F;;F", "homepage": "https://zihuixue.github.io;https://romyluo.com/;;http://www.cs.utexas.edu/~grauman/", "dblp": "256/9549;257/3359;;57/4553", "google_scholar": "JCV9BQ0AAAAJ;eL-xIlAAAAAJ;;Jp6Mz1sAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Zihui_Xue1;~Mi_Luo1;~Changan_Chen2;~Kristen_Grauman1", "aff": "University of Texas, Austin;University of Texas at Austin;;University of Texas, Austin", "aff_domain": "utexas.edu;cs.utexas.edu;;utexas.edu", "position": "PhD student;PhD student;;Professor", "bibtex": "@inproceedings{\nxue2024hoiswap,\ntitle={{HOI}-Swap: Swapping Objects in Videos with Hand-Object Interaction Awareness},\nauthor={Zihui Xue and Mi Luo and Changan Chen and Kristen Grauman},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=GkHXBasQwm}\n}", "github": "", "reviewers": "NtRs;rdgi;gbKB;ih2o", "pdf_size": 12726105, "rating": "5;6;6;7", "confidence": "4;4;4;3", "soundness": "3;3;3;3", "novelty": "2;3;2;3", "presentation": "2;3;2;3", "wc_summary": "94;116;89;108", "wc_strengths": "28;53;126;114", "wc_weaknesses": "135;37;365;100", "wc_questions": "157;61;125;45", "wc_limitations": "1;7;60;17", "wc_review": "415;274;765;384", "wc_reply_reviewers": "204;80;28;69", "wc_reply_authors": "0;133;36;36", "reply_reviewers": "1;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 101.75, 10.779030568655049 ], "wc_strengths_avg": [ 80.25, 40.94126891047711 ], "wc_weaknesses_avg": [ 159.25, 123.87165737165222 ], "wc_questions_avg": [ 97.0, 45.78209256903839 ], "wc_limitations_avg": [ 21.25, 23.09085316743407 ], "wc_review_avg": [ 459.5, 183.9979619452346 ], "wc_reply_reviewers_avg": [ 95.25, 65.70911276223413 ], "wc_reply_authors_avg": [ 51.25, 49.43366767699924 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2586381491958297911&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 5, "email": "utexas.edu;cs.utexas.edu;;utexas.edu", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Active Set Ordering", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95884", "id": "GkJbXpd3wM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=GkJbXpd3wM", "openreview": "https://openreview.net/forum?id=GkJbXpd3wM", "poster": "/media/PosterPDFs/NeurIPS%202024/95884.png?t=1731243551.0488362", "project": "", "author_site": "Quoc Phong Nguyen, Sunil Gupta, Svetha Venkatesh, Bryan Kian Hsiang Low, Patrick Jaillet", "tldr": "", "abstract": "In this paper, we formalize the active set ordering problem, which involves actively discovering a set of inputs based on their orderings determined by expensive evaluations of a blackbox function. We then propose the mean prediction (MP) algorithm and theoretically analyze it in terms of the regret of predicted pairwise orderings between inputs. Notably, as a special case of this framework, we can cast Bayesian optimization as an active set ordering problem by recognizing that maximizers can be identified solely by comparison rather than by precisely estimating the function evaluations. As a result, we are able to construct the popular Gaussian process upper confidence bound (GP-UCB) algorithm through the lens of ordering with several nuanced insights. We empirically validate the performance of our proposed solution using various synthetic functions and real-world datasets.", "keywords": "active learning;Bayesian optimization;top-k set;contour line", "primary_area": "active_learning", "supplementary_material": "/attachment/38da6939901e1bd2a3fbb72cc4c64f9673f47288.zip", "author": "Quoc Phong Nguyen;Sunil Gupta;Svetha Venkatesh;Bryan Kian Hsiang Low;Patrick Jaillet", "authorids": "~Quoc_Phong_Nguyen2;~Sunil_Gupta2;~Svetha_Venkatesh1;~Bryan_Kian_Hsiang_Low1;~Patrick_Jaillet1", "gender": ";;F;M;M", "homepage": ";;https://www.deakin.edu.au/about-deakin/people/svetha-venkatesh;http://www.comp.nus.edu.sg/~lowkh;http://web.mit.edu/jaillet/www/", "dblp": ";;81/1984;97/4877;https://dblp.uni-trier.de/pers/hd/j/Jaillet:Patrick", "google_scholar": ";;AEkRUQcAAAAJ;https://scholar.google.com.tw/citations?user=2P-Q09UAAAAJ;ND0FM6EAAAAJ", "orcid": ";;;;0000-0002-8585-6566", "linkedin": ";;;;patrick-jaillet-1260445/", "or_profile": "~Quoc_Phong_Nguyen2;~Sunil_Gupta2;~Svetha_Venkatesh1;~Bryan_Kian_Hsiang_Low1;~Patrick_Jaillet1", "aff": ";;Deakin University;National University of Singapore;Massachusetts Institute of Technology", "aff_domain": ";;deakin.edu.au;nus.edu.sg;mit.edu", "position": ";;Full Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nnguyen2024active,\ntitle={Active Set Ordering},\nauthor={Quoc Phong Nguyen and Sunil Gupta and Svetha Venkatesh and Bryan Kian Hsiang Low and Patrick Jaillet},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=GkJbXpd3wM}\n}", "github": "", "reviewers": "i8Rj;gNpn;Qpjm;2jNL", "pdf_size": 902702, "rating": "5;5;6;6", "confidence": "4;4;3;2", "soundness": "2;3;3;3", "novelty": "2;2;2;2", "presentation": "3;3;2;2", "wc_summary": "89;53;153;65", "wc_strengths": "85;47;68;10", "wc_weaknesses": "118;115;124;169", "wc_questions": "205;3;60;93", "wc_limitations": "4;1;1;96", "wc_review": "501;219;406;433", "wc_reply_reviewers": "124;12;63;25", "wc_reply_authors": "426;70;648;47", "reply_reviewers": "2;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 90.0, 38.61346915261564 ], "wc_strengths_avg": [ 52.5, 27.98660393831306 ], "wc_weaknesses_avg": [ 131.5, 21.891779278989635 ], "wc_questions_avg": [ 90.25, 73.65926621953275 ], "wc_limitations_avg": [ 25.5, 40.721615881494685 ], "wc_review_avg": [ 389.75, 104.48295315504822 ], "wc_reply_reviewers_avg": [ 56.0, 43.502873468312416 ], "wc_reply_authors_avg": [ 297.75, 251.92694873712895 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.9045340337332909, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:oljMIHE1v_MJ:scholar.google.com/&scioq=Active+Set+Ordering&hl=en&as_sdt=0,33", "gs_version_total": 0, "email": ";;deakin.edu.au;nus.edu.sg;mit.edu", "author_num": 5, "aff_unique_index": "0;1;2", "aff_unique_norm": "Deakin University;National University of Singapore;Massachusetts Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.deakin.edu.au;https://www.nus.edu.sg;https://web.mit.edu", "aff_unique_abbr": "Deakin;NUS;MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2", "aff_country_unique": "Australia;Singapore;United States" }, { "title": "Learning Low-Rank Feature for Thorax Disease Classification", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95883", "id": "GkzrVxs9LS", "proceeding": "", "pdf": "https://openreview.net/pdf?id=GkzrVxs9LS", "openreview": "https://openreview.net/forum?id=GkzrVxs9LS", "poster": "", "project": "", "author_site": "Yancheng Wang, Rajeev Goel, Utkarsh Nath, Alvin Silva, Teresa Wu, Yingzhen Yang", "tldr": "", "abstract": "Deep neural networks, including Convolutional Neural Networks (CNNs) and Visual Transformers (ViT), have achieved stunning success in the medical image domain. We study thorax disease classification in this paper. Effective extraction of features for the disease areas is crucial for disease classification on radiographic images. While various neural architectures and training techniques, such as self-supervised learning with contrastive/restorative learning, have been employed for disease classification on radiographic images, there are no principled methods that can effectively reduce the adverse effect of noise and background or non-disease areas on the radiographic images for disease classification. To address this challenge, we propose a novel Low-Rank Feature Learning (LRFL) method in this paper, which is universally applicable to the training of all neural networks. The LRFL method is both empirically motivated by a Low Frequency Property (LFP) and theoretically motivated by our sharp generalization bound for neural networks with low-rank features. LFP not only widely exists in deep neural networks for generic machine learning but also exists in all the thorax medical datasets studied in this paper. In the empirical study, using a neural network such as a ViT or a CNN pre-trained on unlabeled chest X-rays by Masked Autoencoders (MAE), our novel LRFL method is applied on the pre-trained neural network and demonstrates better classification results in terms of both multi-class area under the receiver operating curve (mAUC) and classification accuracy than the current state-of-the-art. The code of LRFL is available at \\url{https://github.com/Statistical-Deep-Learning/LRFL}.", "keywords": "Low-Rank Feature Learning;Low Frequency Property;Thorax Disease Classification", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "", "author": "Yancheng Wang;Rajeev Goel;Utkarsh Nath;Alvin C Silva;Teresa Wu;Yingzhen Yang", "authorids": "~Yancheng_Wang2;~Rajeev_Goel1;~Utkarsh_Nath1;~Alvin_C_Silva1;~Teresa_Wu1;~Yingzhen_Yang1", "gender": "M;M;M;;F;M", "homepage": ";https://rajeevgl01.github.io/;;;http://faculty.engineering.asu.edu/twu/;http://yingzhenyang.com", "dblp": ";319/8843;267/2281.html;;;66/3838.html", "google_scholar": "https://scholar.google.com/citations?hl=en;vZVIC_8AAAAJ;8Gz0AuoAAAAJ;;XJvLrVAAAAAJ;", "orcid": ";;;;;", "linkedin": ";rajeevgoel1999/;utkarsh-nath-39793398/;;;yingzhen-yang-9b869122", "or_profile": "~Yancheng_Wang2;~Rajeev_Goel1;~Utkarsh_Nath1;~Alvin_C_Silva1;~Teresa_Wu1;~Yingzhen_Yang1", "aff": "Arizona State University;Arizona State University;Arizona State University;Mayo Clinic;Arizona State University;Arizona State University", "aff_domain": "asu.edu;asu.edu;asu.edu;mayo.edu;asu.edu;asu.edu", "position": "PhD student;PhD student;PhD student;Full Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nwang2024learning,\ntitle={Learning Low-Rank Feature for Thorax Disease Classification},\nauthor={Yancheng Wang and Rajeev Goel and Utkarsh Nath and Alvin C Silva and Teresa Wu and Yingzhen Yang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=GkzrVxs9LS}\n}", "github": "", "reviewers": "td2a;qSFW;JazK;cPxj", "pdf_size": 2249707, "rating": "4;4;5;7", "confidence": "3;5;4;4", "soundness": "2;3;2;3", "novelty": "2;2;2;3", "presentation": "3;2;2;3", "wc_summary": "67;53;54;35", "wc_strengths": "48;56;8;55", "wc_weaknesses": "863;169;86;47", "wc_questions": "31;7;266;46", "wc_limitations": "118;17;1;105", "wc_review": "1127;302;415;288", "wc_reply_reviewers": "692;170;65;17", "wc_reply_authors": "1943;487;177;0", "reply_reviewers": "5;2;2;1", "reply_authors": "7;3;2;1", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 52.25, 11.388041973930374 ], "wc_strengths_avg": [ 41.75, 19.727835664360143 ], "wc_weaknesses_avg": [ 291.25, 333.0273074388946 ], "wc_questions_avg": [ 87.5, 103.99158619811509 ], "wc_limitations_avg": [ 60.25, 51.76569810212164 ], "wc_review_avg": [ 533.0, 346.4628407203289 ], "wc_reply_reviewers_avg": [ 236.0, 269.0232331974322 ], "wc_reply_authors_avg": [ 651.75, 765.6100100573399 ], "reply_reviewers_avg": [ 2.5, 1.5 ], "reply_authors_avg": [ 3.25, 2.277608394786075 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:pPxrgRMiCqcJ:scholar.google.com/&scioq=Learning+Low-Rank+Feature+for+Thorax+Disease+Classification&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": "asu.edu;asu.edu;asu.edu;mayo.edu;asu.edu;asu.edu", "author_num": 6, "aff_unique_index": "0;0;0;1;0;0", "aff_unique_norm": "Arizona State University;Mayo Clinic", "aff_unique_dep": ";", "aff_unique_url": "https://www.asu.edu;https://www.mayoclinic.org", "aff_unique_abbr": "ASU;Mayo Clinic", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "SongCreator: Lyrics-based Universal Song Generation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95882", "id": "GlD9Juva5V", "proceeding": "", "pdf": "https://openreview.net/pdf?id=GlD9Juva5V", "openreview": "https://openreview.net/forum?id=GlD9Juva5V", "poster": "", "project": "", "author_site": "Shun Lei, Yixuan Zhou, Boshi Tang, Max W. Y. Lam, Feng liu, Hangyu Liu, Jingcheng Wu, Shiyin Kang, Zhiyong Wu, Helen Meng", "tldr": "", "abstract": "Music is an integral part of human culture, embodying human intelligence and creativity, of which songs compose an essential part. While various aspects of song generation have been explored by previous works, such as singing voice, vocal composition and instrumental arrangement, etc., generating songs with both vocals and accompaniment given lyrics remains a significant challenge, hindering the application of music generation models in the real world. In this light, we propose SongCreator, a song-generation system designed to tackle this challenge. The model features two novel designs: a meticulously designed dual-sequence language model (DSLM) to capture the information of vocals and accompaniment for song generation, and a series of attention mask strategies for DSLM, which allows our model to understand, generate and edit songs, making it suitable for various songrelated generation tasks by utilizing specific attention masks. Extensive experiments demonstrate the effectiveness of SongCreator by achieving state-of-the-art or competitive performances on all eight tasks. Notably, it surpasses previous works by a large margin in lyrics-to-song and lyrics-to-vocals. Additionally, it is able to independently control the acoustic conditions of the vocals and accompaniment in the generated song through different audio prompts, exhibiting its potential applicability. Our samples are available at https://thuhcsi.github.io/SongCreator/.", "keywords": "Song generation;Song editing;Music generation;Language Model;Diffusion Model", "primary_area": "speech_and_audio", "supplementary_material": "", "author": "Shun Lei;Yixuan Zhou;Boshi Tang;Max W. Y. Lam;Feng liu;Hangyu Liu;Jingcheng Wu;Shiyin Kang;Zhiyong Wu;Helen M. Meng", "authorids": "~Shun_Lei1;~Yixuan_Zhou1;~Boshi_Tang1;~Max_W._Y._Lam1;~Feng_liu23;~Hangyu_Liu3;~Jingcheng_Wu2;~Shiyin_Kang1;~Zhiyong_Wu2;~Helen_M._Meng1", "gender": "M;M;M;M;M;M;M;M;M;F", "homepage": "https://lg-lg.github.io/shunlei.github.io/;https://labmem-zhouyx.github.io/;https://github.com/TangYucopper;;https://github.com/Liu-Feng-deeplearning;;;https://scholar.google.com/citations?user=mnCHk8EAAAAJ;;http://www.se.cuhk.edu.hk/people/academic-staff/prof-meng-mei-ling-helen/", "dblp": "292/3774;281/6539-2.html;;200/9096;;;;;24/968-1.html;92/3270", "google_scholar": "kL2xyTYAAAAJ;ZI_Cm1cAAAAJ;;R0E0bKkAAAAJ;;;;mnCHk8EAAAAJ;7Xl6KdkAAAAJ;", "orcid": "0000-0003-3597-3913;0009-0002-6363-891X;;;;0000-0003-4724-3264;;;0000-0001-8533-0524;", "linkedin": ";;;maxingaussian/;;;jingchengwu/;;;", "or_profile": "~Shun_Lei1;~Yixuan_Zhou1;~Boshi_Tang1;~Max_W._Y._Lam1;~Feng_liu23;~Hangyu_Liu3;~Jingcheng_Wu2;~Shiyin_Kang1;~Zhiyong_Wu2;~Helen_M._Meng1", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;Independent Researcher;Tsinghua University;Kunlun Inc;;;Tsinghua University;The Chinese University of Hong Kong", "aff_domain": "mail.tsinghua.edu.cn;tsinghua.edu.cn;mails.tsinghua.edu.cn;gmail.com;tsinghua.edu.cn;kunlun-inc.com;;;tsinghua.edu.cn;cuhk.edu.hk", "position": "PhD student;PhD student;MS student;Researcher;Intern;Researcher;;;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nlei2024songcreator,\ntitle={SongCreator: Lyrics-based Universal Song Generation},\nauthor={Shun Lei and Yixuan Zhou and Boshi Tang and Max W. Y. Lam and Feng liu and Hangyu Liu and Jingcheng Wu and Shiyin Kang and Zhiyong Wu and Helen M. Meng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=GlD9Juva5V}\n}", "github": "", "reviewers": "pX2T;kmxW;nDSW;mcyd;H7YT", "pdf_size": 2677733, "rating": "3;4;6;6;8", "confidence": "4;3;4;4;4", "soundness": "3;3;3;3;4", "novelty": "3;2;2;3;3", "presentation": "2;2;3;3;4", "wc_summary": "88;172;67;79;196", "wc_strengths": "53;51;37;70;201", "wc_weaknesses": "141;197;85;177;236", "wc_questions": "275;49;27;138;190", "wc_limitations": "37;18;6;2;50", "wc_review": "594;487;222;466;873", "wc_reply_reviewers": "278;0;178;0;97", "wc_reply_authors": "689;0;257;0;108", "reply_reviewers": "1;0;1;0;1", "reply_authors": "2;1;2;1;2", "rating_avg": [ 5.4, 1.7435595774162693 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 120.4, 52.90217386837709 ], "wc_strengths_avg": [ 82.4, 60.218269652988205 ], "wc_weaknesses_avg": [ 167.2, 51.30458069217602 ], "wc_questions_avg": [ 135.8, 91.31352583270454 ], "wc_limitations_avg": [ 22.6, 18.32593790232849 ], "wc_review_avg": [ 528.4, 210.9697608663384 ], "wc_reply_reviewers_avg": [ 110.6, 106.97214590724073 ], "wc_reply_authors_avg": [ 210.8, 257.0022567994297 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.40147753427348315, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5864914565123376336&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "mail.tsinghua.edu.cn;tsinghua.edu.cn;mails.tsinghua.edu.cn;gmail.com;tsinghua.edu.cn;kunlun-inc.com;;;tsinghua.edu.cn;cuhk.edu.hk", "author_num": 10, "aff_unique_index": "0;0;0;1;0;2;0;3", "aff_unique_norm": "Tsinghua University;Independent Researcher;Kunlun Inc;Chinese University of Hong Kong", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.tsinghua.edu.cn;;https://www.kunlun.com;https://www.cuhk.edu.hk", "aff_unique_abbr": "THU;;;CUHK", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China;" }, { "title": "Abductive Reasoning in Logical Credal Networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95881", "id": "GlXUxNI6TN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=GlXUxNI6TN", "openreview": "https://openreview.net/forum?id=GlXUxNI6TN", "poster": "/media/PosterPDFs/NeurIPS%202024/95881.png?t=1730985108.4573054", "project": "", "author_site": "Radu Marinescu, Junkyu Lee, Debarun Bhattacharjya, Fabio Cozman, Alexander Gray", "tldr": "", "abstract": "Logical Credal Networks or LCNs were recently introduced as a powerful probabilistic logic framework for representing and reasoning with imprecise knowledge. Unlike many existing formalisms, LCNs have the ability to represent cycles and allow specifying marginal and conditional probability bounds on logic formulae which may be important in many realistic scenarios. Previous work on LCNs has focused exclusively on marginal inference, namely computing posterior lower and upper probability bounds on a query formula. In this paper, we explore abductive reasoning tasks such as solving MAP and Marginal MAP queries in LCNs given some evidence. We first formally define the MAP and Marginal MAP tasks for LCNs and subsequently show how to solve these tasks exactly using search-based approaches. We then propose several approximate schemes that allow us to scale MAP and Marginal MAP inference to larger problem instances. An extensive empirical evaluation demonstrates the effectiveness of our algorithms on both random LCN instances as well as LCNs derived from more realistic use-cases.", "keywords": "probabilistic logic;imprecise probabilities;MAP inference;search;message passing", "primary_area": "probabilistic_methods", "supplementary_material": "/attachment/6728ac1be1742f72c187809c2e15e2b9498e04b7.zip", "author": "Radu Marinescu;Junkyu Lee;Debarun Bhattacharjya;Fabio Cozman;Alexander G. Gray", "authorids": "~Radu_Marinescu2;~Junkyu_Lee1;~Debarun_Bhattacharjya1;~Fabio_Cozman1;~Alexander_G._Gray1", "gender": ";;M;M;M", "homepage": ";https://www.linkedin.com/in/junkyul/;https://researcher.watson.ibm.com/researcher/view.php?person=us-debarunb;http://sites.poli.usp.br/p/fabio.cozman/;", "dblp": "m/RaduMarinescu2;65/6241-1;98/5604;g/FabioGagliardiCozman;85/110.html", "google_scholar": ";kigtlXEAAAAJ;pwfVt-MAAAAJ;https://scholar.google.com.br/citations?user=tzbfgcMAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0002-6636-2886;;;0000-0003-0337-7359", "linkedin": ";junkyul/;;;alexander-gray-b554b64/", "or_profile": "~Radu_Marinescu2;~Junkyu_Lee1;~Debarun_Bhattacharjya1;~Fabio_Cozman1;~Alexander_G._Gray1", "aff": "International Business Machines;International Business Machines;International Business Machines;Universidade de Sao Paulo;International Business Machines", "aff_domain": "ibm.com;ibm.com;ibm.com;usp.br;ibm.com", "position": "Researcher;Researcher;Researcher;Full Professor;VP, Foundations of AI", "bibtex": "@inproceedings{\nmarinescu2024abductive,\ntitle={Abductive Reasoning in Logical Credal Networks},\nauthor={Radu Marinescu and Junkyu Lee and Debarun Bhattacharjya and Fabio Cozman and Alexander G. Gray},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=GlXUxNI6TN}\n}", "github": "", "reviewers": "56vh;AMr2;Pbqt;NnQo;ZTSx", "pdf_size": 513847, "rating": "5;5;6;6;7", "confidence": "2;3;4;3;3", "soundness": "4;3;3;3;4", "novelty": "2;2;3;3;3", "presentation": "2;2;3;2;3", "wc_summary": "94;167;59;106;139", "wc_strengths": "91;52;53;127;80", "wc_weaknesses": "317;103;41;458;26", "wc_questions": "181;38;147;110;77", "wc_limitations": "9;5;1;357;1", "wc_review": "692;365;301;1158;323", "wc_reply_reviewers": "32;132;25;793;60", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 113.0, 37.19677405367299 ], "wc_strengths_avg": [ 80.6, 27.717142709882634 ], "wc_weaknesses_avg": [ 189.0, 170.0670456026093 ], "wc_questions_avg": [ 110.6, 50.36109609609386 ], "wc_limitations_avg": [ 74.6, 141.23115803532872 ], "wc_review_avg": [ 567.8, 327.4137443663598 ], "wc_reply_reviewers_avg": [ 208.4, 294.74368525890424 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.42257712736425823, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:qxhFx56vs4gJ:scholar.google.com/&scioq=Abductive+Reasoning+in+Logical+Credal+Networks&hl=en&as_sdt=0,44", "gs_version_total": 3, "email": "ibm.com;ibm.com;ibm.com;usp.br;ibm.com", "author_num": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "International Business Machines Corporation;Universidade de Sao Paulo", "aff_unique_dep": ";", "aff_unique_url": "https://www.ibm.com;https://www.usp.br", "aff_unique_abbr": "IBM;USP", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "United States;Brazil" }, { "title": "Omnigrasp: Grasping Diverse Objects with Simulated Humanoids", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95880", "id": "Glt37xoU7e", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Glt37xoU7e", "openreview": "https://openreview.net/forum?id=Glt37xoU7e", "poster": "", "project": "", "author_site": "Zhengyi Luo, Jinkun Cao, Sammy Christen, Alexander Winkler, Kris Kitani, Weipeng Xu", "tldr": "", "abstract": "We present a method for controlling a simulated humanoid to grasp an object and move it to follow an object's trajectory. Due to the challenges in controlling a humanoid with dexterous hands, prior methods often use a disembodied hand and only consider vertical lifts or short trajectories. This limited scope hampers their applicability for object manipulation required for animation and simulation. To close this gap, we learn a controller that can pick up a large number (>1200) of objects and carry them to follow randomly generated trajectories. Our key insight is to leverage a humanoid motion representation that provides human-like motor skills and significantly speeds up training. Using only simplistic reward, state, and object representations, our method shows favorable scalability on diverse objects and trajectories. For training, we do not need a dataset of paired full-body motion and object trajectories. At test time, we only require the object mesh and desired trajectories for grasping and transporting. To demonstrate the capabilities of our method, we show state-of-the-art success rates in following object trajectories and generalizing to unseen objects. Code and models will be released.", "keywords": "Physics Simulation;Humanoid Control;Dexterous Manipulation", "primary_area": "robotics", "supplementary_material": "/attachment/80c7141d2f1de281a87376244b60956f4cf9a531.zip", "author": "Zhengyi Luo;Jinkun Cao;Sammy Christen;Alexander Winkler;Kris M. Kitani;Weipeng Xu", "authorids": "~Zhengyi_Luo1;~Jinkun_Cao1;~Sammy_Christen1;~Alexander_Winkler1;~Kris_M._Kitani1;~Weipeng_Xu1", "gender": "M;M;;M;M;Unspecified", "homepage": "https://zhengyiluo.github.io/;https://www.jinkuncao.com;https://alex-winkler.com/;http://www.cs.cmu.edu/~kkitani/;https://sites.google.com/view/xuweipeng;", "dblp": ";224/0126;161/9871.html;42/163;190/7431;243/6983", "google_scholar": "lHPTxGsAAAAJ;xDtTbmQAAAAJ;https://scholar.google.ch/citations?user=EElBh28AAAAJ;yv3sH74AAAAJ;https://scholar.google.de/citations?user=vy8xXDQAAAAJ;r1L_2qkAAAAJ", "orcid": ";;0000-0003-1839-0855;0000-0002-9389-4060;;", "linkedin": "zhengyi-zen-luo-726156105/;;;;;", "or_profile": "~Zhengyi_Luo1;~Jinkun_Cao1;~Alexander_Winkler1;~Kris_M._Kitani1;~Weipeng_Xu1;~Sammy_Joe_Christen1", "aff": "Meta Platforms, Inc.;Carnegie Mellon University;Meta Facebook;Carnegie Mellon University;Meta Reality Labs;ETH Zurich", "aff_domain": "meta.com;andrew.cmu.edu;fb.com;cmu.edu;meta.com;ethz.ch", "position": "Intern;PhD student;Researcher;Associate Professor;Researcher;PhD student", "bibtex": "@inproceedings{\nluo2024omnigrasp,\ntitle={Omnigrasp: Simulated Humanoid Grasping on Diverse Objects},\nauthor={Zhengyi Luo and Jinkun Cao and Sammy Christen and Alexander Winkler and Kris M. Kitani and Weipeng Xu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Glt37xoU7e}\n}", "github": "", "reviewers": "gXSs;rKeN;z5bR", "pdf_size": 8939851, "rating": "5;6;7", "confidence": "5;4;4", "soundness": "2;3;3", "novelty": "2;3;4", "presentation": "3;3;4", "wc_summary": "77;121;84", "wc_strengths": "106;61;71", "wc_weaknesses": "159;222;61", "wc_questions": "41;287;60", "wc_limitations": "20;87;52", "wc_review": "403;778;328", "wc_reply_reviewers": "70;0;28", "wc_reply_authors": "240;0;18", "reply_reviewers": "1;0;1", "reply_authors": "2;1;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 94.0, 19.30457631409368 ], "wc_strengths_avg": [ 79.33333333333333, 19.293061504650375 ], "wc_weaknesses_avg": [ 147.33333333333334, 66.24365797736583 ], "wc_questions_avg": [ 129.33333333333334, 111.7566801383951 ], "wc_limitations_avg": [ 53.0, 27.36177382164151 ], "wc_review_avg": [ 503.0, 196.85019685029528 ], "wc_reply_reviewers_avg": [ 32.666666666666664, 28.767265347188555 ], "wc_reply_authors_avg": [ 86.0, 109.14210919713803 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3507798343533792690&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 0, "email": "meta.com;andrew.cmu.edu;fb.com;cmu.edu;meta.com;ethz.ch", "author_num": 6, "aff_unique_index": "0;1;0;1;0;2", "aff_unique_norm": "Meta;Carnegie Mellon University;ETH Zurich", "aff_unique_dep": "Meta Platforms, Inc.;;", "aff_unique_url": "https://www.meta.com;https://www.cmu.edu;https://www.ethz.ch", "aff_unique_abbr": "Meta;CMU;ETHZ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;1", "aff_country_unique": "United States;Switzerland" }, { "title": "What Is Missing For Graph Homophily? Disentangling Graph Homophily For Graph Neural Networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95879", "id": "GmdGEF8xxU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=GmdGEF8xxU", "openreview": "https://openreview.net/forum?id=GmdGEF8xxU", "poster": "/media/PosterPDFs/NeurIPS%202024/95879.png?t=1730684178.2281647", "project": "", "author_site": "Yilun Zheng, Sitao Luan, Lihui Chen", "tldr": "", "abstract": "Graph homophily refers to the phenomenon that connected nodes tend to share similar characteristics. Understanding this concept and its related metrics is crucial for designing effective Graph Neural Networks (GNNs). The most widely used homophily metrics, such as edge or node homophily, quantify such \"similarity\" as label consistency across the graph topology. These metrics are believed to be able to reflect the performance of GNNs, especially on node-level tasks. However, many recent studies have empirically demonstrated that the performance of GNNs does not always align with homophily metrics, and how homophily influences GNNs still remains unclear and controversial. Then, a crucial question arises: What is missing in our current understanding of homophily? To figure out the missing part, in this paper, we disentangle the graph homophily into three aspects: label, structural, and feature homophily, which are derived from the three basic elements of graph data. We argue that the synergy of the three homophily can provide a more comprehensive understanding of GNN performance. Our new proposed structural and feature homophily consider the neighborhood consistency and feature dependencies among nodes, addressing the previously overlooked structural and feature aspects in graph homophily. To investigate their synergy, we propose a Contextual Stochastic Block Model with three types of Homophily (CSBM-3H), where the topology and feature generation are controlled by the three metrics. Based on the theoretical analysis of CSBM-3H, we derive a new composite metric, named Tri-Hom, that considers all three aspects and overcomes the limitations of conventional homophily metrics. The theoretical conclusions and the effectiveness of Tri-Hom have been verified through synthetic experiments on CSBM-3H. In addition, we conduct experiments on $31$ real-world benchmark datasets and calculate the correlations between homophily metrics and model performance. Tri-Hom has significantly higher correlation values than $17$ existing metrics that only focus on a single homophily aspect, demonstrating its superiority and the importance of homophily synergy. Our code is available at https://github.com/zylMozart/Disentangle_GraphHom.", "keywords": "Graph Neural Networks;Graph Homophily", "primary_area": "graph_neural_networks", "supplementary_material": "/attachment/8c04d9f8257666f5ecbcff88dff8c524e0d2c301.zip", "author": "Yilun Zheng;Sitao Luan;Lihui Chen", "authorids": "~Yilun_Zheng1;~Sitao_Luan1;~Lihui_Chen1", "gender": "M;M;Unspecified", "homepage": "https://github.com/zylMozart;;", "dblp": ";249/2879;56/1277", "google_scholar": "HuImaFsAAAAJ;Ouoi7yYAAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Yilun_Zheng1;~Sitao_Luan1;~Lihui_Chen1", "aff": "Nanyang Technological University;McGill University;Nanyang Technological University", "aff_domain": "ntu.edu.sg;mcgill.ca;ntu.edu.sg", "position": "PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nzheng2024what,\ntitle={What Is Missing For Graph Homophily? Disentangling Graph Homophily For Graph Neural Networks},\nauthor={Yilun Zheng and Sitao Luan and Lihui Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=GmdGEF8xxU}\n}", "github": "", "reviewers": "9nLU;SC6h;JQBx;bySE", "pdf_size": 3121784, "rating": "4;6;7;7", "confidence": "3;4;4;3", "soundness": "3;3;3;4", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "89;32;100;63", "wc_strengths": "57;61;94;55", "wc_weaknesses": "315;65;78;31", "wc_questions": "113;1;29;33", "wc_limitations": "24;4;11;6", "wc_review": "598;163;312;188", "wc_reply_reviewers": "115;0;39;34", "wc_reply_authors": "2243;669;644;171", "reply_reviewers": "1;0;1;1", "reply_authors": "6;3;3;3", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 71.0, 26.22022120425379 ], "wc_strengths_avg": [ 66.75, 15.880412463157246 ], "wc_weaknesses_avg": [ 122.25, 112.59967806348294 ], "wc_questions_avg": [ 44.0, 41.701318923986086 ], "wc_limitations_avg": [ 11.25, 7.790218225441442 ], "wc_review_avg": [ 315.25, 172.721415869602 ], "wc_reply_reviewers_avg": [ 47.0, 42.02975136733502 ], "wc_reply_authors_avg": [ 931.75, 782.6165648004136 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.75, 1.299038105676658 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.40824829046386296, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6151446342507245334&as_sdt=5,39&sciodt=0,39&hl=en", "gs_version_total": 0, "email": "ntu.edu.sg;mcgill.ca;ntu.edu.sg", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Nanyang Technological University;McGill University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ntu.edu.sg;https://www.mcgill.ca", "aff_unique_abbr": "NTU;McGill", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Singapore;Canada" }, { "title": "Towards Calibrated Robust Fine-Tuning of Vision-Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95878", "id": "GnAfyR8AhC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=GnAfyR8AhC", "openreview": "https://openreview.net/forum?id=GnAfyR8AhC", "poster": "/media/PosterPDFs/NeurIPS%202024/95878.png?t=1731747261.2672622", "project": "", "author_site": "Changdae Oh, Hyesu Lim, Mijoo Kim, Dongyoon Han, Sangdoo Yun, Jaegul Choo, Alexander Hauptmann, Zhi-Qi Cheng, Kyungwoo Song", "tldr": "", "abstract": "Improving out-of-distribution (OOD) generalization during in-distribution (ID) adaptation is a primary goal of robust fine-tuning of zero-shot models beyond naive fine-tuning. However, despite decent OOD generalization performance from recent robust fine-tuning methods, confidence calibration for reliable model output has not been fully addressed. This work proposes a robust fine-tuning method that improves both OOD accuracy and confidence calibration simultaneously in vision language models. Firstly, we show that both OOD classification and OOD calibration errors have a shared upper bound consisting of two terms of ID data: 1) ID calibration error and 2) the smallest singular value of the ID input covariance matrix. Based on this insight, we design a novel framework that conducts fine-tuning with a constrained multimodal contrastive loss enforcing a larger smallest singular value, which is further guided by the self-distillation of a moving-averaged model to achieve calibrated prediction as well. Starting from empirical evidence supporting our theoretical statements, we provide extensive experimental results on ImageNet distribution shift benchmarks that demonstrate the effectiveness of our theorem and its practical implementation.", "keywords": "robust fine-tuning;distribution shift;out-of-distribution generalization;vision-language model;uncertainty calibration", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Changdae Oh;Hyesu Lim;Mijoo Kim;Dongyoon Han;Sangdoo Yun;Jaegul Choo;Alexander G Hauptmann;Zhi-Qi Cheng;Kyungwoo Song", "authorids": "~Changdae_Oh1;~Hyesu_Lim1;~Mijoo_Kim2;~Dongyoon_Han1;~Sangdoo_Yun1;~Jaegul_Choo1;~Alexander_G_Hauptmann1;~Zhi-Qi_Cheng1;~Kyungwoo_Song1", "gender": "M;;F;M;M;M;M;Not Specified;", "homepage": ";https://sites.google.com/view/hyesulim;;https://dongyoonhan.github.io/;https://sangdooyun.github.io/;https://sites.google.com/site/jaegulchoo/;;https://faculty.washington.edu/zhiqics/;https://mlai.yonsei.ac.kr", "dblp": "315/4736;304/7506;80/7630;151/8876;124/3009.html;07/2074;h/AlexanderGHauptmann;188/1193;155/4867", "google_scholar": "7oAZaVcAAAAJ;https://scholar.google.co.kr/citations?hl=ko;;jcP7m1QAAAAJ;o0qtjzYAAAAJ;GHJYsLEAAAAJ;https://scholar.google.co.uk/citations?user=Py54GcEAAAAJ;uB2He2UAAAAJ;HWxRii4AAAAJ", "orcid": ";;;0000-0002-9130-8195;;;;0000-0002-1720-2085;0000-0003-0082-4280", "linkedin": "changedae-oh-440587215/;hyesulim/;mijoo-kim-8b2833204;https://linkedin.com/in/dongyoon-han-04961a120/en;;;;zhiqicheng/;kyungwoo-song-862863155/", "or_profile": "~Changdae_Oh1;~Hyesu_Lim1;~Mijoo_Kim2;~Dongyoon_Han1;~Sangdoo_Yun1;~Jaegul_Choo1;~Alexander_G_Hauptmann1;~Zhi-Qi_Cheng1;~Kyungwoo_Song1", "aff": "University of Seoul;Carnegie Mellon University;Chung-Ang University;NAVER;NAVER;Korea Advanced Institute of Science & Technology;School of Computer Science, Carnegie Mellon University;Carnegie Mellon University;Yonsei University", "aff_domain": "uos.ac.kr;andrew.cmu.edu;cau.ac.kr;navercorp.com;navercorp.com;kaist.ac.kr;cs.cmu.edu;cmu.edu;yonsei.ac.kr", "position": "MS student;Researcher;MS student;Research Scientist;Research Scientist;Associate Professor;Full Professor;Project Scientist & Instructor;Assistant Professor", "bibtex": "@inproceedings{\noh2024towards,\ntitle={Towards Calibrated Robust Fine-Tuning of Vision-Language Models},\nauthor={Changdae Oh and Hyesu Lim and Mijoo Kim and Dongyoon Han and Sangdoo Yun and Jaegul Choo and Alexander G Hauptmann and Zhi-Qi Cheng and Kyungwoo Song},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=GnAfyR8AhC}\n}", "github": "", "reviewers": "yVZD;g5ML;a8to;nZD2", "pdf_size": 2435451, "rating": "4;5;6;6", "confidence": "5;2;3;3", "soundness": "2;3;3;3", "novelty": "3;3;3;3", "presentation": "2;3;3;3", "wc_summary": "45;63;86;101", "wc_strengths": "32;53;58;148", "wc_weaknesses": "115;76;186;309", "wc_questions": "16;4;5;24", "wc_limitations": "1;16;12;48", "wc_review": "209;212;347;630", "wc_reply_reviewers": "144;402;130;37", "wc_reply_authors": "910;771;544;28", "reply_reviewers": "1;3;2;1", "reply_authors": "4;4;3;2", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 3.25, 1.0897247358851685 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 73.75, 21.41699091842736 ], "wc_strengths_avg": [ 72.75, 44.52737921773524 ], "wc_weaknesses_avg": [ 171.5, 88.64113040795452 ], "wc_questions_avg": [ 12.25, 8.257572282456872 ], "wc_limitations_avg": [ 19.25, 17.483921184905864 ], "wc_review_avg": [ 349.5, 171.26952443444222 ], "wc_reply_reviewers_avg": [ 178.25, 135.56986206380827 ], "wc_reply_authors_avg": [ 563.25, 335.50661319860745 ], "reply_reviewers_avg": [ 1.75, 0.82915619758885 ], "reply_authors_avg": [ 3.25, 0.82915619758885 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.6225430174794673, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3090790046660405214&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 7, "email": "uos.ac.kr;andrew.cmu.edu;cau.ac.kr;navercorp.com;navercorp.com;kaist.ac.kr;cs.cmu.edu;cmu.edu;yonsei.ac.kr", "author_num": 9, "aff_unique_index": "0;1;2;3;3;4;1;1;5", "aff_unique_norm": "University of Seoul;Carnegie Mellon University;Chung-Ang University;NAVER Corporation;Korea Advanced Institute of Science and Technology;Yonsei University", "aff_unique_dep": ";;;;;", "aff_unique_url": "http://www.useoul.edu;https://www.cmu.edu;http://www.cau.ac.kr;https://www.naver.com;https://www.kaist.ac.kr;https://www.yonsei.ac.kr", "aff_unique_abbr": "UOS;CMU;CAU;NAVER;KAIST;Yonsei", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pittsburgh", "aff_country_unique_index": "0;1;0;0;0;0;1;1;0", "aff_country_unique": "South Korea;United States" }, { "title": "Physical Consistency Bridges Heterogeneous Data in Molecular Multi-Task Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95877", "id": "GnF9tavqgc", "proceeding": "", "pdf": "https://openreview.net/pdf?id=GnF9tavqgc", "openreview": "https://openreview.net/forum?id=GnF9tavqgc", "poster": "/media/PosterPDFs/NeurIPS%202024/95877.png?t=1730386365.235898", "project": "", "author_site": "Yuxuan Ren, Dihan Zheng, Chang Liu, Peiran Jin, Yu Shi, Lin Huang, Jiyan He, Shengjie Luo, Tao Qin, Tie-Yan Liu", "tldr": "", "abstract": "In recent years, machine learning has demonstrated impressive capability in handling molecular science tasks. To support various molecular properties at scale, machine learning models are trained in the multi-task learning paradigm. Nevertheless, data of different molecular properties are often not aligned: some quantities, e.g. equilibrium structure, demand more cost to compute than others, e.g. energy, so their data are often generated by cheaper computational methods at the cost of lower accuracy, which cannot be directly overcome through multi-task learning. Moreover, it is not straightforward to leverage abundant data of other tasks to benefit a particular task. To handle such data heterogeneity challenges, we exploit the specialty of molecular tasks that there are physical laws connecting them, and design consistency training approaches that allow different tasks to exchange information directly so as to improve one another. Particularly, we demonstrate that the more accurate energy data can improve the accuracy of structure prediction. We also find that consistency training can directly leverage force and off-equilibrium structure data to improve structure prediction, demonstrating a broad capability for integrating heterogeneous data.", "keywords": "AI for science; molecule structure generation; diffusion model; physical prior;", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "", "author": "Yuxuan Ren;Dihan Zheng;Chang Liu;Peiran Jin;Yu Shi;Lin Huang;Jiyan He;Shengjie Luo;Tao Qin;Tie-Yan Liu", "authorids": "~Yuxuan_Ren2;~Dihan_Zheng1;~Chang_Liu10;~Peiran_Jin1;~Yu_Shi4;~Lin_Huang2;~Jiyan_He1;~Shengjie_Luo1;~Tao_Qin1;~Tie-Yan_Liu1", "gender": "M;M;M;M;M;;M;M;M;M", "homepage": "https://scholar.google.com/citations?user=hktsrwYAAAAJ&hl=zh-CN;https://changliu00.github.io/;;https://www.microsoft.com/en-us/research/people/yushi2/;https://www.researchgate.net/profile/Lin-Huang-3;http://home.ustc.edu.cn/~hejiyan;https://lsj2408.github.io;https://www.microsoft.com/en-us/research/people/taoqin/;http://member.acm.org/~tieyanliu;https://yux-ren.github.io/", "dblp": "295/5394;52/5716-30;;;;258/1955;274/2110;14/6841;l/TieYanLiu;", "google_scholar": "hktsrwYAAAAJ;rYd0GEsAAAAJ;;M9LWmQUAAAAJ;;Ep5qE5QAAAAJ;ImWO7WYAAAAJ;Bl4SRU0AAAAJ;Nh832fgAAAAJ;", "orcid": ";0000-0001-5207-5440;;;;0009-0003-4539-1826;;;0000-0002-0476-8020;", "linkedin": ";chang-liu-9ab479168/;peiran-jin-a702686b?original_referer=https%3A%2F%2Fwww.bing.com%2F;;linhuang6385/;;shengjie-luo-ba6137193/;;;", "or_profile": "~Dihan_Zheng1;~Chang_Liu10;~Peiran_Jin1;~Yu_Shi4;~Lin_Huang2;~Jiyan_He1;~Shengjie_Luo1;~Tao_Qin1;~Tie-Yan_Liu1;~Yuxuan_Ren_ustc1", "aff": "Tsinghua University;Microsoft;Microsoft Research;Microsoft Research;Microsoft;University of Science and Technology of China;Microsoft;;Microsoft;University of Science and Technology of China", "aff_domain": "tsinghua.edu.cn;microsoft.com;microsoft.com;microsoft.com;microsoft.com;ustc.edu;microsoft.com;;microsoft.com;mail.ustc.edu.cn", "position": "PhD student;Researcher;Researcher;Researcher;Researcher;PhD student;Intern;;Distinguished Scientist;Undergrad student", "bibtex": "@inproceedings{\nren2024physical,\ntitle={Physical Consistency Bridges Heterogeneous Data in Molecular Multi-Task Learning},\nauthor={Yuxuan Ren and Dihan Zheng and Chang Liu and Peiran Jin and Yu Shi and Lin Huang and Jiyan He and Shengjie Luo and Tao Qin and Tie-Yan Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=GnF9tavqgc}\n}", "github": "", "reviewers": "p18M;fhSX;B2Rx;3Qs1", "pdf_size": 720341, "rating": "5;5;6;7", "confidence": "3;4;4;3", "soundness": "2;3;2;4", "novelty": "3;3;3;4", "presentation": "3;3;3;4", "wc_summary": "53;121;154;81", "wc_strengths": "25;16;154;28", "wc_weaknesses": "96;440;44;41", "wc_questions": "78;64;36;93", "wc_limitations": "21;42;44;4", "wc_review": "273;683;432;247", "wc_reply_reviewers": "52;358;0;25", "wc_reply_authors": "757;580;0;27", "reply_reviewers": "1;2;0;1", "reply_authors": "2;3;1;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 102.25, 38.42769183804825 ], "wc_strengths_avg": [ 55.75, 56.89628722509053 ], "wc_weaknesses_avg": [ 155.25, 165.8483870889313 ], "wc_questions_avg": [ 67.75, 21.00446381129497 ], "wc_limitations_avg": [ 27.75, 16.406934509529805 ], "wc_review_avg": [ 408.75, 173.453704197979 ], "wc_reply_reviewers_avg": [ 108.75, 145.0747652074612 ], "wc_reply_authors_avg": [ 341.0, 333.56183834485626 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7099734969968034138&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "tsinghua.edu.cn;microsoft.com;microsoft.com;microsoft.com;microsoft.com;ustc.edu;microsoft.com;;microsoft.com;mail.ustc.edu.cn", "author_num": 10, "aff_unique_index": "0;1;1;1;1;2;1;1;2", "aff_unique_norm": "Tsinghua University;Microsoft;University of Science and Technology of China", "aff_unique_dep": ";Microsoft Corporation;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.microsoft.com;http://www.ustc.edu.cn", "aff_unique_abbr": "THU;Microsoft;USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1;0;1;1;0", "aff_country_unique": "China;United States" }, { "title": "Adaptive Preference Scaling for Reinforcement Learning with Human Feedback", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95876", "id": "GnaFrZRHPf", "proceeding": "", "pdf": "https://openreview.net/pdf?id=GnaFrZRHPf", "openreview": "https://openreview.net/forum?id=GnaFrZRHPf", "poster": "", "project": "", "author_site": "Ilgee Hong, Zichong Li, Alexander Bukharin, Yixiao Li, Haoming Jiang, Tianbao Yang, Tuo Zhao", "tldr": "", "abstract": "Reinforcement learning from human feedback (RLHF) is a prevalent approach to align AI systems with human values by learning rewards from human preference data. Due to various reasons, however, such data typically takes the form of rankings over pairs of trajectory segments, which fails to capture the varying strengths of preferences across different pairs. In this paper, we propose a novel adaptive preference loss, underpinned by distributionally robust optimization (DRO), designed to address this uncertainty in preference strength. By incorporating an adaptive scaling parameter into the loss for each pair, our method increases the flexibility of the reward function. Specifically, it assigns small scaling parameters to pairs with ambiguous preferences, leading to more comparable rewards, and large scaling parameters to those with clear preferences for more distinct rewards. Computationally, our proposed loss function is strictly convex and univariate with respect to each scaling parameter, enabling its efficient optimization through a simple second-order algorithm. Our method is versatile and can be readily adapted to various preference optimization frameworks, including direct preference optimization (DPO). Our experiments with robotic control and natural language generation with large language models (LLMs) show that our method not only improves policy performance but also aligns reward function selection more closely with policy optimization, simplifying the hyperparameter tuning process.", "keywords": "Reinforcement Learning from Human Feedback;Large Language Models;Alignment", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Ilgee Hong;Zichong Li;Alexander Bukharin;Yixiao Li;Haoming Jiang;Tianbao Yang;Tuo Zhao", "authorids": "~Ilgee_Hong1;~Zichong_Li2;~Alexander_Bukharin1;~Yixiao_Li2;~Haoming_Jiang1;~Tianbao_Yang1;~Tuo_Zhao2", "gender": "M;M;M;;M;M;M", "homepage": "https://ilgeehong.github.io/;https://github.com/zichongli5/zichongli5.github.io;https://abukharin3.github.io;https://yxli2123.github.io;https://hmjianggatech.github.io;https://people.tamu.edu/~tianbao-yang/publications.html;http://www2.isye.gatech.edu/~tzhao80", "dblp": ";;294/6372;;230/3684;56/7047;", "google_scholar": ";;;;XaFhuG8AAAAJ;https://scholar.google.com.tw/citations?user=BCxFU0EAAAAJ;EJXN6tYAAAAJ", "orcid": ";;;;;;", "linkedin": ";;;yixiao-li-90710b209/;;;", "or_profile": "~Ilgee_Hong1;~Zichong_Li2;~Alexander_Bukharin1;~Yixiao_Li2;~Haoming_Jiang1;~Tianbao_Yang1;~Tuo_Zhao1", "aff": "Amazon;Georgia Institute of Technology;NVIDIA;Georgia Institute of Technology;Amazon;Texas A&M University - College Station;Georgia Institute of Technology", "aff_domain": "amazon.com;gatech.edu;nvidia.com;gatech.edu;amazon.com;tamu.edu;gatech.edu", "position": "Intern;PhD student;Intern;PhD student;Principal Researcher;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nhong2024adaptive,\ntitle={Adaptive Preference Scaling for Reinforcement Learning with Human Feedback},\nauthor={Ilgee Hong and Zichong Li and Alexander Bukharin and Yixiao Li and Haoming Jiang and Tianbao Yang and Tuo Zhao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=GnaFrZRHPf}\n}", "github": "", "reviewers": "ZDuk;RUFd;4UPH;E9fU", "pdf_size": 4274305, "rating": "5;5;6;6", "confidence": "4;4;4;2", "soundness": "3;3;3;3", "novelty": "3;2;2;3", "presentation": "3;2;2;3", "wc_summary": "58;44;89;63", "wc_strengths": "29;38;34;39", "wc_weaknesses": "136;76;92;48", "wc_questions": "53;182;176;20", "wc_limitations": "12;2;11;16", "wc_review": "288;342;402;186", "wc_reply_reviewers": "161;71;32;0", "wc_reply_authors": "592;298;28;0", "reply_reviewers": "2;1;1;0", "reply_authors": "3;2;2;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 63.5, 16.28649747490233 ], "wc_strengths_avg": [ 35.0, 3.9370039370059056 ], "wc_weaknesses_avg": [ 88.0, 31.874754901018456 ], "wc_questions_avg": [ 107.75, 72.23010106596834 ], "wc_limitations_avg": [ 10.25, 5.11737237261468 ], "wc_review_avg": [ 304.5, 79.41504895169429 ], "wc_reply_reviewers_avg": [ 66.0, 60.3365560833563 ], "wc_reply_authors_avg": [ 229.5, 239.4634627662433 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13789400062889814518&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "amazon.com;gatech.edu;nvidia.com;gatech.edu;amazon.com;tamu.edu;gatech.edu", "author_num": 7, "aff_unique_index": "0;1;2;1;0;3;1", "aff_unique_norm": "Amazon;Georgia Institute of Technology;NVIDIA;Texas A&M University", "aff_unique_dep": "Amazon.com, Inc.;;NVIDIA Corporation;", "aff_unique_url": "https://www.amazon.com;https://www.gatech.edu;https://www.nvidia.com;https://www.tamu.edu", "aff_unique_abbr": "Amazon;Georgia Tech;NVIDIA;TAMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";College Station", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Universal In-Context Approximation By Prompting Fully Recurrent Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95875", "id": "GproaSYZk5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=GproaSYZk5", "openreview": "https://openreview.net/forum?id=GproaSYZk5", "poster": "", "project": "", "author_site": "Aleksandar Petrov, Tom Lamb, Alasdair Paren, Philip Torr, Adel Bibi", "tldr": "", "abstract": "Zero-shot and in-context learning enable solving tasks without model fine-tuning, making them essential for developing generative model solutions. Therefore, it is crucial to understand whether a pretrained model can be prompted to approximate any function, i.e., whether it is a universal in-context approximator. While it was recently shown that transformer models do possess this property, these results rely on their attention mechanism. Hence, these findings do not apply to fully recurrent architectures like RNNs, LSTMs, and the increasingly popular SSMs. We demonstrate that RNNs, LSTMs, GRUs, Linear RNNs, and linear gated architectures such as Mamba and Hawk/Griffin can also serve be universal in-context approximators. To streamline our argument, we introduce a programming language called LSRL that compiles to these fully recurrent architectures. LSRL may be of independent interest for further studies of fully recurrent models, such as constructing interpretability benchmarks. We also study the role of multiplicative gating and observe that architectures incorporating such gating (e.g., LSTMs, GRUs, Hawk/Griffin) can implement certain operations more stably, making them more viable candidates for practical in-context universal approximation.", "keywords": "prompting;universal approximation;in-context learning;recurrent models;rnn;ssm", "primary_area": "generative_models", "supplementary_material": "/attachment/cb8b2d1617521d05c1b1583200e3be731f347fab.zip", "author": "Aleksandar Petrov;Tom A. Lamb;Alasdair Paren;Philip Torr;Adel Bibi", "authorids": "~Aleksandar_Petrov1;~Tom_A._Lamb1;~Alasdair_Paren1;~Philip_Torr1;~Adel_Bibi1", "gender": "M;M;;M;M", "homepage": "https://p-petrov.com/;https://alasdair-p.github.io/Alasdair-P/;http://www.robots.ox.ac.uk/~tvg/;http://adelbibi.com;https://tomalamb.github.io/", "dblp": "49/8105;312/6594;;176/0964;", "google_scholar": "em54BT4AAAAJ;Mcq6dQIAAAAJ;;Q4j2laYAAAAJ;cfJt-hgAAAAJ", "orcid": ";0009-0003-5933-5243;;0000-0002-6169-3918;0009-0002-3666-3992", "linkedin": "aleksandar-petrov/;alasdair-paren-a66b88113/;;adel-bibi-ba3671ab/;tom-lamb-94809713b/", "or_profile": "~Aleksandar_Petrov1;~Alasdair_Paren1;~Philip_Torr1;~Adel_Bibi1;~Tom_Andrew_Lamb1", "aff": "Adobe Systems;University of Oxford;University of Oxford;University of Oxford;University of Oxford", "aff_domain": "adobe.com;oxford.ac.uk;ox.ac.uk;ox.ac.uk;ox.ac.uk", "position": "Intern;Postdoc;Full Professor;Senior Researcher;PhD student", "bibtex": "@inproceedings{\npetrov2024universal,\ntitle={Universal In-Context Approximation By Prompting Fully Recurrent Models},\nauthor={Aleksandar Petrov and Tom A. Lamb and Alasdair Paren and Philip Torr and Adel Bibi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=GproaSYZk5}\n}", "github": "", "reviewers": "uwk6;qz76;Aa6c;3Cyh", "pdf_size": 1436627, "rating": "4;7;7;8", "confidence": "2;4;4;3", "soundness": "3;4;4;4", "novelty": "2;3;3;3", "presentation": "2;4;4;4", "wc_summary": "55;119;88;43", "wc_strengths": "29;45;98;53", "wc_weaknesses": "196;141;168;62", "wc_questions": "2;279;5;51", "wc_limitations": "1;37;26;4", "wc_review": "283;621;385;213", "wc_reply_reviewers": "206;50;17;12", "wc_reply_authors": "745;58;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 6.5, 1.5 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.8660254037844386 ], "wc_summary_avg": [ 76.25, 29.67637949615822 ], "wc_strengths_avg": [ 56.25, 25.606395685453272 ], "wc_weaknesses_avg": [ 141.75, 49.98187171365234 ], "wc_questions_avg": [ 84.25, 114.1038452463369 ], "wc_limitations_avg": [ 17.0, 15.049916943292411 ], "wc_review_avg": [ 375.5, 154.37211535766426 ], "wc_reply_reviewers_avg": [ 71.25, 79.15609578547947 ], "wc_reply_authors_avg": [ 200.75, 315.1137691374339 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.7035264706814485, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11464376357361525110&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "adobe.com;oxford.ac.uk;ox.ac.uk;ox.ac.uk;ox.ac.uk", "author_num": 5, "aff_unique_index": "0;1;1;1;1", "aff_unique_norm": "Adobe;University of Oxford", "aff_unique_dep": "Adobe Systems Incorporated;", "aff_unique_url": "https://www.adobe.com;https://www.ox.ac.uk", "aff_unique_abbr": "Adobe;Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1", "aff_country_unique": "United States;United Kingdom" }, { "title": "Sparse Bayesian Generative Modeling for Compressive Sensing", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95874", "id": "GqefKjw1OR", "proceeding": "", "pdf": "https://openreview.net/pdf?id=GqefKjw1OR", "openreview": "https://openreview.net/forum?id=GqefKjw1OR", "poster": "/media/PosterPDFs/NeurIPS%202024/95874.png?t=1733169551.4324086", "project": "", "author_site": "Benedikt B\u00f6ck, Sadaf Syed, Wolfgang Utschick", "tldr": "", "abstract": "This work addresses the fundamental linear inverse problem in compressive sensing (CS) by introducing a new type of regularizing generative prior. Our proposed method utilizes ideas from classical dictionary-based CS and, in particular, sparse Bayesian learning (SBL), to integrate a strong regularization towards sparse solutions. At the same time, by leveraging the notion of conditional Gaussianity, it also incorporates the adaptability from generative models to training data. However, unlike most state-of-the-art generative models, it is able to learn from a few compressed and noisy data samples and requires no optimization algorithm for solving the inverse problem. Additionally, similar to Dirichlet prior networks, our model parameterizes a conjugate prior enabling its application for uncertainty quantification. We support our approach theoretically through the concept of variational inference and validate it empirically using different types of compressible signals.", "keywords": "Compressive sensing;variational inference;sparse bayesian learning;variational autoencoder;Gaussian mixture model;generative model", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Benedikt B\u00f6ck;Sadaf Syed;Wolfgang Utschick", "authorids": "~Benedikt_B\u00f6ck1;~Sadaf_Syed1;~Wolfgang_Utschick1", "gender": "M;F;M", "homepage": ";https://www.ce.cit.tum.de/msv/people/sadaf-syed/;https://www.ce.cit.tum.de/msv/", "dblp": "331/8681;;34/5115", "google_scholar": "https://scholar.google.de/citations?user=4P04LhwAAAAJ;;qflRi8QAAAAJ", "orcid": "0009-0009-8604-4269;;0000-0002-2871-4246", "linkedin": "benedikt-b%C3%B6ck-9bb575267/?originalSubdomain=de;;", "or_profile": "~Benedikt_B\u00f6ck1;~Sadaf_Syed1;~Wolfgang_Utschick1", "aff": "Technische Universit\u00e4t M\u00fcnchen;Technische Universit\u00e4t M\u00fcnchen;Technical University Munich", "aff_domain": "tum.de;mytum.de;tum.de", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nb{\\\"o}ck2024sparse,\ntitle={Sparse Bayesian Generative Modeling for Compressive Sensing},\nauthor={Benedikt B{\\\"o}ck and Sadaf Syed and Wolfgang Utschick},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=GqefKjw1OR}\n}", "github": "", "reviewers": "y1C8;34q1;K63t;WoRD", "pdf_size": 5878996, "rating": "5;6;6;7", "confidence": "4;3;5;2", "soundness": "2;2;3;3", "novelty": "3;2;2;3", "presentation": "2;2;2;4", "wc_summary": "186;36;89;58", "wc_strengths": "71;20;55;79", "wc_weaknesses": "205;54;219;62", "wc_questions": "135;48;92;104", "wc_limitations": "9;45;3;38", "wc_review": "606;203;458;341", "wc_reply_reviewers": "168;34;531;53", "wc_reply_authors": "286;0;524;0", "reply_reviewers": "2;1;2;1", "reply_authors": "2;1;2;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 92.25, 57.30783105300706 ], "wc_strengths_avg": [ 56.25, 22.64260364887395 ], "wc_weaknesses_avg": [ 135.0, 77.21075054679886 ], "wc_questions_avg": [ 94.75, 31.21998558615939 ], "wc_limitations_avg": [ 23.75, 18.046814123273947 ], "wc_review_avg": [ 402.0, 148.38632012419473 ], "wc_reply_reviewers_avg": [ 196.5, 199.81303761266432 ], "wc_reply_authors_avg": [ 202.5, 219.28691251417627 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.6324555320336759, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15444239039679903908&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "tum.de;mytum.de;tum.de", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Technische Universit\u00e4t M\u00fcnchen;Technical University of Munich", "aff_unique_dep": ";", "aff_unique_url": "https://www.tum.de;https://www.tum.de", "aff_unique_abbr": "TUM;TUM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "title": "SHED: Shapley-Based Automated Dataset Refinement for Instruction Fine-Tuning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95873", "id": "Gqou8PRgWq", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Gqou8PRgWq", "openreview": "https://openreview.net/forum?id=Gqou8PRgWq", "poster": "", "project": "", "author_site": "Yexiao He, Ziyao Wang, Zheyu Shen, Guoheng Sun, Yucong Dai, Yongkai Wu, Hongyi Wang, Ang Li", "tldr": "", "abstract": "The pre-trained Large Language Models (LLMs) can be adapted for many downstream tasks and tailored to align with human preferences through fine-tuning. Recent studies have discovered that LLMs can achieve desirable performance with only a small amount of high-quality data, suggesting that a large portion of the data in these extensive datasets is redundant or even harmful. Identifying high-quality data from vast datasets to curate small yet effective datasets has emerged as a critical challenge. In this paper, we introduce SHED, an automated dataset refinement framework based on Shapley value for instruction fine-tuning. SHED eliminates the need for human intervention or the use of commercial LLMs. Moreover, the datasets curated through SHED exhibit transferability, indicating they can be reused across different LLMs with consistently high performance. We conduct extensive experiments to evaluate the datasets curated by SHED. The results demonstrate SHED's superiority over state-of-the-art methods across various tasks and LLMs; notably, datasets comprising only 10% of the original data selected by SHED achieve performance comparable to or surpassing that of the full datasets.", "keywords": "Large Language Models;Data Selection;Shapley Value", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/665bf8ef97b52f091d31aa59589343d5001bbf31.zip", "author": "Yexiao He;Ziyao Wang;Zheyu Shen;Guoheng Sun;Yucong Dai;Yongkai Wu;Hongyi Wang;Ang Li", "authorids": "~Yexiao_He1;~Ziyao_Wang2;~Zheyu_Shen2;~Guoheng_Sun1;~Yucong_Dai1;~Yongkai_Wu1;~Hongyi_Wang1;~Ang_Li6", "gender": "M;M;M;M;M;M;M;M", "homepage": ";https://ziyaow-about.netlify.app;https://shenzheyu.github.io/;;https://xanadu12138.github.io/;https://www.yongkaiwu.com/;https://hwang595.github.io/;https://www.ang-li.com", "dblp": ";;377/3867;;;183/0976;15/832-1.html;33/2805-5", "google_scholar": "https://scholar.google.com/citations?hl=en;_PdzpfAAAAAJ;AHw5vnwAAAAJ;fMnmSXsAAAAJ;;sX6KTvwAAAAJ;zYdZORsAAAAJ;JVKSaWIAAAAJ", "orcid": ";;;;0009-0000-3729-3650;0000-0002-7313-9439;;", "linkedin": ";https://www.linkedin.cn/injobs/in/ziyao-wang-370229234;zheyushen/;guoheng-sun-1b640126a/;;;hongyi-wang-b89651102/;", "or_profile": "~Yexiao_He1;~Ziyao_Wang2;~Zheyu_Shen2;~Guoheng_Sun1;~Yucong_Dai1;~Yongkai_Wu1;~Hongyi_Wang1;~Ang_Li6", "aff": "University of Maryland, College Park;University of Maryland, College Park;University of Maryland, College Park;Sichuan University;Clemson University;Clemson University;Carnegie Mellon University;Duke University", "aff_domain": "umd.edu;umd.edu;umd.edu;scu.edu.cn;clemson.edu;clemson.edu;andrew.cmu.edu;duke.edu", "position": "PhD student;PhD student;PhD student;Undergrad student;PhD student;Assistant Professor;Researcher;PhD student", "bibtex": "@inproceedings{\nhe2024shed,\ntitle={{SHED}: Shapley-Based Automated Dataset Refinement for Instruction Fine-Tuning},\nauthor={Yexiao He and Ziyao Wang and Zheyu Shen and Guoheng Sun and Yucong Dai and Yongkai Wu and Hongyi Wang and Ang Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Gqou8PRgWq}\n}", "github": "", "reviewers": "tj1t;Df9d;kWYC", "pdf_size": 808569, "rating": "3;6;6", "confidence": "4;2;3", "soundness": "2;3;3", "novelty": "1;2;3", "presentation": "2;2;4", "wc_summary": "56;88;79", "wc_strengths": "35;43;175", "wc_weaknesses": "388;58;221", "wc_questions": "2;41;20", "wc_limitations": "91;7;10", "wc_review": "572;237;505", "wc_reply_reviewers": "320;22;88", "wc_reply_authors": "1010;0;424", "reply_reviewers": "3;1;1", "reply_authors": "3;1;3", "rating_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.0, 0.816496580927726 ], "presentation_avg": [ 2.6666666666666665, 0.9428090415820634 ], "wc_summary_avg": [ 74.33333333333333, 13.474255287605157 ], "wc_strengths_avg": [ 84.33333333333333, 64.19414995845719 ], "wc_weaknesses_avg": [ 222.33333333333334, 134.72523478876883 ], "wc_questions_avg": [ 21.0, 15.937377450509228 ], "wc_limitations_avg": [ 36.0, 38.91015291668744 ], "wc_review_avg": [ 438.0, 144.73654226444222 ], "wc_reply_reviewers_avg": [ 143.33333333333334, 127.7949746881917 ], "wc_reply_authors_avg": [ 478.0, 414.09499715242475 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.9428090415820634 ], "reply_authors_avg": [ 2.3333333333333335, 0.9428090415820634 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.8660254037844387, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4111541551361610725&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "umd.edu;umd.edu;umd.edu;scu.edu.cn;clemson.edu;clemson.edu;andrew.cmu.edu;duke.edu", "author_num": 8, "aff_unique_index": "0;0;0;1;2;2;3;4", "aff_unique_norm": "University of Maryland;Sichuan University;Clemson University;Carnegie Mellon University;Duke University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www/umd.edu;https://www.scu.edu.cn;https://www.clemson.edu;https://www.cmu.edu;https://www.duke.edu", "aff_unique_abbr": "UMD;SCU;Clemson;CMU;Duke", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "College Park;", "aff_country_unique_index": "0;0;0;1;0;0;0;0", "aff_country_unique": "United States;China" }, { "title": "MVSDet: Multi-View Indoor 3D Object Detection via Efficient Plane Sweeps", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95872", "id": "GqrWhROxrG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=GqrWhROxrG", "openreview": "https://openreview.net/forum?id=GqrWhROxrG", "poster": "/media/PosterPDFs/NeurIPS%202024/95872.png?t=1730048419.9556663", "project": "", "author_site": "Yating Xu, Chen Li, Gim Hee Lee", "tldr": "", "abstract": "The key challenge of multi-view indoor 3D object detection is to infer accurate geometry information from images for precise 3D detection. Previous method relies on NeRF for geometry reasoning. However, the geometry extracted from NeRF is generally inaccurate, which leads to sub-optimal detection performance. In this paper, we propose MVSDet which utilizes plane sweep for geometry-aware 3D object detection. To circumvent the requirement for a large number of depth planes for accurate depth prediction, we design a probabilistic sampling and soft weighting mechanism to decide the placement of pixel features on the 3D volume. We select multiple locations that score top in the probability volume for each pixel and use their probability score to indicate the confidence. We further apply recent pixel-aligned Gaussian Splatting to regularize depth prediction and improve detection performance with little computation overhead. Extensive experiments on ScanNet and ARKitScenes datasets are conducted to show the superiority of our model. Our code is available at https://github.com/Pixie8888/MVSDet.", "keywords": "3d object detection;plane sweep;Gaussian Splatting", "primary_area": "machine_vision", "supplementary_material": "", "author": "Yating Xu;Chen Li;Gim Hee Lee", "authorids": "~Yating_Xu1;~Chen_Li13;~Gim_Hee_Lee1", "gender": "F;F;", "homepage": ";https://chaneyddtt.github.io/;https://www.comp.nus.edu.sg/~leegh/", "dblp": ";164/3294-38;49/9455", "google_scholar": "rVUZ98UAAAAJ;6_rJ2pcAAAAJ;https://scholar.google.com.sg/citations?user=7hNKrPsAAAAJ", "orcid": "0000-0001-6928-5532;0009-0000-6807-3490;0000-0002-1583-0475", "linkedin": ";;", "or_profile": "~Yating_Xu1;~Chen_Li13;~Gim_Hee_Lee1", "aff": "National University of Singapore; National University of Singapore;National University of Singapore", "aff_domain": "u.nus.edu;nus.edu.sg;nus.edu.sg", "position": "PhD student;Postdoc;Associate Professor", "bibtex": "@inproceedings{\nxu2024mvsdet,\ntitle={{MVSD}et: Multi-View Indoor 3D Object Detection via Efficient Plane Sweeps},\nauthor={Yating Xu and Chen Li and Gim Hee Lee},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=GqrWhROxrG}\n}", "github": "", "reviewers": "ZuP3;YYFF;N557", "pdf_size": 6288809, "rating": "6;6;6", "confidence": "4;4;3", "soundness": "3;3;3", "novelty": "3;3;2", "presentation": "3;2;3", "wc_summary": "66;136;71", "wc_strengths": "110;108;27", "wc_weaknesses": "119;108;60", "wc_questions": "152;144;48", "wc_limitations": "7;8;3", "wc_review": "454;504;209", "wc_reply_reviewers": "45;57;31", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 91.0, 31.88521078284832 ], "wc_strengths_avg": [ 81.66666666666667, 38.66379299666406 ], "wc_weaknesses_avg": [ 95.66666666666667, 25.61683474245447 ], "wc_questions_avg": [ 114.66666666666667, 47.2534537244516 ], "wc_limitations_avg": [ 6.0, 2.160246899469287 ], "wc_review_avg": [ 389.0, 128.90565025113006 ], "wc_reply_reviewers_avg": [ 44.333333333333336, 10.624918300339484 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16317319464454113588&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "u.nus.edu;nus.edu.sg;nus.edu.sg", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "National University of Singapore", "aff_unique_dep": "", "aff_unique_url": "https://www.nus.edu.sg", "aff_unique_abbr": "NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Singapore" }, { "title": "Humanoid Locomotion as Next Token Prediction", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95871", "id": "GrMczQGTlA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=GrMczQGTlA", "openreview": "https://openreview.net/forum?id=GrMczQGTlA", "poster": "", "project": "", "author_site": "Ilija Radosavovic, Jathushan Rajasegaran, Baifeng Shi, Bike Zhang, Sarthak Kamat, Koushil Sreenath, Trevor Darrell, Jitendra Malik", "tldr": "", "abstract": "We cast real-world humanoid control as a next token prediction problem, akin to predicting the next word in language. Our model is a causal transformer trained via autoregressive prediction of sensorimotor sequences. To account for the multi-modal nature of the data, we perform prediction in a modality-aligned way, and for each input token predict the next token from the same modality. This general formulation enables us to leverage data with missing modalities, such as videos without actions. We train our model on a dataset of sequences from a prior neural network policy, a model-based controller, motion capture, and YouTube videos of humans. We show that our model enables a real humanoid robot to walk in San Francisco zero-shot. Our model can transfer to the real world even when trained on only 27 hours of walking data, and can generalize to commands not seen during training. These findings suggest a promising path toward learning challenging real-world control tasks by generative modeling of sensorimotor sequences.", "keywords": "Real-World Humanoid Control;Next Token Prediction", "primary_area": "robotics", "supplementary_material": "", "author": "Ilija Radosavovic;Bike Zhang;Baifeng Shi;Jathushan Rajasegaran;Sarthak Kamat;Trevor Darrell;Koushil Sreenath;Jitendra Malik", "authorids": "~Ilija_Radosavovic1;~Bike_Zhang1;~Baifeng_Shi1;~Jathushan_Rajasegaran2;~Sarthak_Kamat1;~Trevor_Darrell2;~Koushil_Sreenath1;~Jitendra_Malik2", "gender": "M;;;M;M;M;M;M", "homepage": "https://people.eecs.berkeley.edu/~ilija;;https://bfshi.github.io;https://brjathu.github.io/;;;https://people.eecs.berkeley.edu/~malik/;https://people.eecs.berkeley.edu/~trevor/", "dblp": "211/6740;245/9168;261/9376;211/4065;;;58/2944;d/TrevorDarrell", "google_scholar": "UKpinl8AAAAJ;https://scholar.google.ca/citations?user=uzzNsVMAAAAJ;LBEIm8gAAAAJ;;https://scholar.google.com/citations?view_op=list_works;o9aFV8cAAAAJ;oY9R5YQAAAAJ;https://scholar.google.com.tw/citations?user=bh-uRFMAAAAJ", "orcid": ";;;;;;0000-0003-3695-1580;", "linkedin": ";;baifeng-shi-09171b188/;;;;;", "or_profile": "~Ilija_Radosavovic1;~Bike_Zhang1;~Baifeng_Shi1;~Jathushan_Rajasegaran2;~Sarthak_Kamat1;~Koushil_Sreenath1;~Jitendra_Malik2;~trevor_darrell1", "aff": "University of California, Berkeley;University of California, Berkeley;NVIDIA;University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;Electrical Engineering & Computer Science Department", "aff_domain": "berkeley.edu;berkeley.edu;nvidia.com;berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu;eecs.berkeley.edu", "position": "PhD student;PhD student;Research Intern;PhD student;Undergrad student;Assistant Professor;Full Professor;Professor", "bibtex": "@inproceedings{\nradosavovic2024humanoid,\ntitle={Humanoid Locomotion as Next Token Prediction},\nauthor={Ilija Radosavovic and Jathushan Rajasegaran and Baifeng Shi and Bike Zhang and Sarthak Kamat and Koushil Sreenath and Trevor Darrell and Jitendra Malik},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=GrMczQGTlA}\n}", "github": "", "reviewers": "FS4P;GH97;qt3e;rJUi", "pdf_size": 13339868, "rating": "7;8;8;8", "confidence": "4;4;4;4", "soundness": "3;4;4;4", "novelty": "3;3;4;3", "presentation": "3;4;4;4", "wc_summary": "89;78;50;53", "wc_strengths": "54;56;34;37", "wc_weaknesses": "328;133;66;62", "wc_questions": "98;2;137;97", "wc_limitations": "64;6;1;39", "wc_review": "633;275;288;288", "wc_reply_reviewers": "524;22;265;66", "wc_reply_authors": "946;27;510;350", "reply_reviewers": "4;1;2;1", "reply_authors": "5;2;4;3", "rating_avg": [ 7.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 67.5, 16.5 ], "wc_strengths_avg": [ 45.25, 9.832980219648569 ], "wc_weaknesses_avg": [ 147.25, 108.10035846379049 ], "wc_questions_avg": [ 83.5, 49.741833500585805 ], "wc_limitations_avg": [ 27.5, 25.636887486588538 ], "wc_review_avg": [ 371.0, 151.35884513301494 ], "wc_reply_reviewers_avg": [ 219.25, 198.342349234852 ], "wc_reply_authors_avg": [ 458.25, 331.0108570726948 ], "reply_reviewers_avg": [ 2.0, 1.224744871391589 ], "reply_authors_avg": [ 3.5, 1.118033988749895 ], "replies_avg": [ 31, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 55, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10824280382918994834&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "berkeley.edu;berkeley.edu;nvidia.com;berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu;eecs.berkeley.edu", "author_num": 8, "aff_unique_index": "0;0;1;0;0;0;0;2", "aff_unique_norm": "University of California, Berkeley;NVIDIA;Electrical Engineering & Computer Science Department", "aff_unique_dep": ";NVIDIA Corporation;Electrical Engineering & Computer Science", "aff_unique_url": "https://www.berkeley.edu;https://www.nvidia.com;", "aff_unique_abbr": "UC Berkeley;NVIDIA;", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States;" }, { "title": "Bayesian Domain Adaptation with Gaussian Mixture Domain-Indexing", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95870", "id": "Grd7yzFm5V", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Grd7yzFm5V", "openreview": "https://openreview.net/forum?id=Grd7yzFm5V", "poster": "/media/PosterPDFs/NeurIPS%202024/95870.png?t=1731154855.0826623", "project": "", "author_site": "Yanfang Ling, Jiyong Li, Lingbo Li, Shangsong Liang", "tldr": "", "abstract": "Recent methods are proposed to improve performance of domain adaptation by inferring domain index under an adversarial variational bayesian framework, where domain index is unavailable. \nHowever, existing methods typically assume that the global domain indices are sampled from a vanilla gaussian prior, overlooking the inherent structures among different domains.\nTo address this challenge, we propose a Bayesian Domain Adaptation with Gaussian Mixture Domain-Indexing(GMDI) algorithm. \nGMDI employs a Gaussian Mixture Model for domain indices, with the number of component distributions in the \"domain-themes'' space adaptively determined by a Chinese Restaurant Process. \nBy dynamically adjusting the mixtures at the domain indices level, GMDI significantly improves domain adaptation performance. \nOur theoretical analysis demonstrates that GMDI achieves a more stringent evidence lower bound, closer to the log-likelihood. \nFor classification, GMDI outperforms all approaches, and surpasses the state-of-the-art method, VDI, by up to 3.4%, reaching 99.3%. \nFor regression, GMDI reduces MSE by up to 21% (from 3.160 to 2.493), achieving the lowest errors among all methods.", "keywords": "domain adaptation;dynamic Gaussian mixture model;structural variational inference", "primary_area": "probabilistic_methods", "supplementary_material": "/attachment/52ddb9a98582d6892a41d9986e0a0435e12df8f8.zip", "author": "Yanfang Ling;Jiyong Li;Lingbo Li;Shangsong Liang", "authorids": "~Yanfang_Ling1;~Jiyong_Li1;~Lingbo_Li1;~Shangsong_Liang1", "gender": "F;;M;M", "homepage": "https://github.com/YanfangLing;https://github.com/awslsky;;", "dblp": ";;88/9874-1;57/7731", "google_scholar": ";;;4uggVcIAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Yanfang_Ling1;~Jiyong_Li1;~Lingbo_Li1;~Shangsong_Liang1", "aff": "SUN YAT-SEN UNIVERSITY;SUN YAT-SEN UNIVERSITY;University of Warwick;SUN YAT-SEN UNIVERSITY", "aff_domain": "sysu.edu.cn;sysu.edu.cn;warwick.ac.uk;sysu.edu.cn", "position": "MS student;MS student;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nling2024bayesian,\ntitle={Bayesian Domain Adaptation with Gaussian Mixture Domain-Indexing},\nauthor={Yanfang Ling and Jiyong Li and Lingbo Li and Shangsong Liang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Grd7yzFm5V}\n}", "github": "", "reviewers": "uzxH;72He;TJok", "pdf_size": 1837688, "rating": "4;6;7", "confidence": "4;4;4", "soundness": "2;3;3", "novelty": "2;3;3", "presentation": "2;3;3", "wc_summary": "44;51;112", "wc_strengths": "47;113;115", "wc_weaknesses": "300;64;81", "wc_questions": "2;92;2", "wc_limitations": "8;14;2", "wc_review": "401;334;312", "wc_reply_reviewers": "526;13;9", "wc_reply_authors": "2162;8;8", "reply_reviewers": "5;1;1", "reply_authors": "8;2;2", "rating_avg": [ 5.666666666666667, 1.247219128924647 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 69.0, 30.539591789456953 ], "wc_strengths_avg": [ 91.66666666666667, 31.594654962860762 ], "wc_weaknesses_avg": [ 148.33333333333334, 107.46885853844152 ], "wc_questions_avg": [ 32.0, 42.42640687119285 ], "wc_limitations_avg": [ 8.0, 4.898979485566356 ], "wc_review_avg": [ 349.0, 37.85058343892029 ], "wc_reply_reviewers_avg": [ 182.66666666666666, 242.77882023676523 ], "wc_reply_authors_avg": [ 726.0, 1015.4053377838823 ], "reply_reviewers_avg": [ 2.3333333333333335, 1.8856180831641267 ], "reply_authors_avg": [ 4.0, 2.8284271247461903 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17317255590596352402&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "sysu.edu.cn;sysu.edu.cn;warwick.ac.uk;sysu.edu.cn", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Sun Yat-sen University;University of Warwick", "aff_unique_dep": ";", "aff_unique_url": "http://www.sysu.edu.cn;https://www.warwick.ac.uk", "aff_unique_abbr": "SYSU;Warwick", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "China;United Kingdom" }, { "title": "Dual Critic Reinforcement Learning under Partial Observability", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95869", "id": "GruuYVTGXV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=GruuYVTGXV", "openreview": "https://openreview.net/forum?id=GruuYVTGXV", "poster": "/media/PosterPDFs/NeurIPS%202024/95869.png?t=1731039415.2806687", "project": "", "author_site": "Jinqiu Li, Enmin Zhao, Tong Wei, Junliang Xing, SHIMING XIANG", "tldr": "", "abstract": "Partial observability in environments poses significant challenges that impede the formation of effective policies in reinforcement learning. Prior research has shown that borrowing the complete state information can enhance sample efficiency. This strategy, however, frequently encounters unstable learning with high variance in practical applications due to the over-reliance on complete information. This paper introduces DCRL, a Dual Critic Reinforcement Learning framework designed to adaptively harness full-state information during training to reduce variance for optimized online performance. In particular, DCRL incorporates two distinct critics: an oracle critic with access to complete state information and a standard critic functioning within the partially observable context. It innovates a synergistic strategy to meld the strengths of the oracle critic for efficiency improvement and the standard critic for variance reduction, featuring a novel mechanism for seamless transition and weighting between them. We theoretically prove that DCRL mitigates the learning variance while maintaining unbiasedness. Extensive experimental analyses across the Box2D and Box3D environments have verified DCRL's superior performance. The source code is available in the supplementary.", "keywords": "Reinforcement Learning;Partial Observability;POMDP", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/d83e230ebe957da4e1efe8fdb9d3aea3cbf1c4f1.zip", "author": "Jinqiu Li;Enmin Zhao;Tong Wei;Junliang Xing;Shiming Xiang", "authorids": "~Jinqiu_Li1;~Enmin_Zhao1;~Tong_Wei3;~Junliang_Xing1;~Shiming_Xiang1", "gender": "F;M;M;M;M", "homepage": ";https://github.com/ZhaoEnMin;;http://people.ucas.ac.cn/~jlxing?language=en;http://www.escience.cn/people/smxiang", "dblp": "199/9879.html;;;43/7659.html;81/6575.html", "google_scholar": ";;jT_InzgAAAAJ;jSwNd3MAAAAJ;0ggsACEAAAAJ", "orcid": ";;;0000-0001-6801-0510;", "linkedin": ";;;https://www.linkedin.cn/incareer/in/ACoAAAvlU14B40ZWH1pxg5JJDtQ6LlgMYkp0e5s;", "or_profile": "~Jinqiu_Li1;~Enmin_Zhao1;~Tong_Wei3;~Junliang_Xing1;~Shiming_Xiang1", "aff": "University of Chinese Academy of Sciences;JD.com;Tsinghua University;Tsinghua University;Institute of Automation, Chinese Academy of Sciences", "aff_domain": "ucas.ac.cn;jd.com;tsinghua.edu.cn;tsinghua.edu.cn;ia.ac.cn", "position": "PhD student;Researcher;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nli2024dual,\ntitle={Dual Critic Reinforcement Learning under Partial Observability},\nauthor={Jinqiu Li and Enmin Zhao and Tong Wei and Junliang Xing and Shiming Xiang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=GruuYVTGXV}\n}", "github": "", "reviewers": "RNGm;VBT3;gCno;Hmpg;3R7k", "pdf_size": 44782308, "rating": "3;5;5;6;7", "confidence": "4;4;4;4;5", "soundness": "2;3;3;4;3", "novelty": "2;2;2;2;4", "presentation": "2;4;3;3;4", "wc_summary": "60;69;67;76;77", "wc_strengths": "135;80;42;37;126", "wc_weaknesses": "440;178;65;175;3", "wc_questions": "26;68;27;2;67", "wc_limitations": "1;57;6;9;1", "wc_review": "662;452;207;299;274", "wc_reply_reviewers": "313;422;53;122;14", "wc_reply_authors": "608;626;97;88;19", "reply_reviewers": "1;2;1;1;1", "reply_authors": "3;3;2;2;2", "rating_avg": [ 5.2, 1.32664991614216 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.4, 0.8 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 69.8, 6.241794613730894 ], "wc_strengths_avg": [ 84.0, 40.87542048713383 ], "wc_weaknesses_avg": [ 172.2, 149.58529339477195 ], "wc_questions_avg": [ 38.0, 25.69824896758532 ], "wc_limitations_avg": [ 14.8, 21.32041275397829 ], "wc_review_avg": [ 378.8, 162.74323334627465 ], "wc_reply_reviewers_avg": [ 184.8, 156.962925558872 ], "wc_reply_authors_avg": [ 287.6, 270.3646426587619 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 2.4, 0.4898979485566356 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.6784005252999681, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:zhHnBEd6bfAJ:scholar.google.com/&scioq=Dual+Critic+Reinforcement+Learning+under+Partial+Observability&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "ucas.ac.cn;jd.com;tsinghua.edu.cn;tsinghua.edu.cn;ia.ac.cn", "author_num": 5, "aff_unique_index": "0;1;2;2;3", "aff_unique_norm": "University of Chinese Academy of Sciences;JD.com;Tsinghua University;Chinese Academy of Sciences", "aff_unique_dep": ";;;Institute of Automation", "aff_unique_url": "http://www.ucas.ac.cn;https://www.jd.com;https://www.tsinghua.edu.cn;http://www.ia.cas.cn", "aff_unique_abbr": "UCAS;JD;THU;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Achievable Fairness on Your Data With Utility Guarantees", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95868", "id": "GtEmIzLZmR", "proceeding": "", "pdf": "https://openreview.net/pdf?id=GtEmIzLZmR", "openreview": "https://openreview.net/forum?id=GtEmIzLZmR", "poster": "", "project": "", "author_site": "Muhammad Faaiz Taufiq, Jean-Francois Ton, Yang Liu", "tldr": "", "abstract": "In machine learning fairness, training models that minimize disparity across different sensitive groups often leads to diminished accuracy, a phenomenon known as the fairness-accuracy trade-off. The severity of this trade-off inherently depends on dataset characteristics such as dataset imbalances or biases and therefore, using a uniform fairness requirement across diverse datasets remains questionable. To address this, we present a computationally efficient approach to approximate the fairness-accuracy trade-off curve tailored to individual datasets, backed by rigorous statistical guarantees. By utilizing the You-Only-Train-Once (YOTO) framework, our approach mitigates the computational burden of having to train multiple models when approximating the trade-off curve. Crucially, we introduce a novel methodology for quantifying uncertainty in our estimates, thereby providing practitioners with a robust framework for auditing model fairness while avoiding false conclusions due to estimation errors. Our experiments spanning tabular (e.g., Adult), image (CelebA), and language (Jigsaw) datasets underscore that our approach not only reliably quantifies the optimum achievable trade-offs across various data modalities but also helps detect suboptimality in SOTA fairness methods.", "keywords": "fairness;dataset;accuracy-fairness trade-off;uncertainty quantification", "primary_area": "fairness", "supplementary_material": "", "author": "Muhammad Faaiz Taufiq;Jean-Francois Ton;Yang Liu", "authorids": "~Muhammad_Faaiz_Taufiq1;~Jean-Francois_Ton2;~Yang_Liu3", "gender": "M;Not Specified;M", "homepage": "https://faaizt.github.io/;https://savior287.github.io/JFT-webpage/;http://www.yliuu.com", "dblp": "322/2165;;51/3710-18", "google_scholar": "oDL6ahoAAAAJ;WWVOu4kAAAAJ;jKrIVCIAAAAJ", "orcid": ";;0000-0001-8420-6011", "linkedin": "muhammadftaufiq/;;", "or_profile": "~Muhammad_Faaiz_Taufiq1;~Jean-Francois_Ton2;~Yang_Liu3", "aff": "University of Oxford;Bytedance;University of California, Santa Cruz", "aff_domain": "ox.ac.uk;bytedance.com;ucsc.edu", "position": "PhD student;Researcher;Assistant Professor", "bibtex": "@inproceedings{\ntaufiq2024achievable,\ntitle={Achievable Fairness on Your Data With Utility Guarantees},\nauthor={Muhammad Faaiz Taufiq and Jean-Francois Ton and Yang Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=GtEmIzLZmR}\n}", "github": "", "reviewers": "NxJb;Vp4R;3zFr", "pdf_size": 2824482, "rating": "6;7;7", "confidence": "4;4;4", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "64;44;52", "wc_strengths": "90;172;54", "wc_weaknesses": "93;98;281", "wc_questions": "129;40;3", "wc_limitations": "7;22;13", "wc_review": "383;376;403", "wc_reply_reviewers": "0;32;16", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 53.333333333333336, 8.219218670625303 ], "wc_strengths_avg": [ 105.33333333333333, 49.378357832376544 ], "wc_weaknesses_avg": [ 157.33333333333334, 87.46935971463886 ], "wc_questions_avg": [ 57.333333333333336, 52.87931752795436 ], "wc_limitations_avg": [ 14.0, 6.164414002968976 ], "wc_review_avg": [ 387.3333333333333, 11.440668201153676 ], "wc_reply_reviewers_avg": [ 16.0, 13.063945294843617 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9997586313262987695&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "ox.ac.uk;bytedance.com;ucsc.edu", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Oxford;ByteDance;University of California, Santa Cruz", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ox.ac.uk;https://www.bytedance.com;https://www.ucsc.edu", "aff_unique_abbr": "Oxford;Bytedance;UCSC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Santa Cruz", "aff_country_unique_index": "0;1;2", "aff_country_unique": "United Kingdom;China;United States" }, { "title": "SR-CACO-2: A Dataset for Confocal Fluorescence Microscopy Image Super-Resolution", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97775", "id": "GtYd9PCaaB", "proceeding": "", "pdf": "https://openreview.net/pdf?id=GtYd9PCaaB", "openreview": "https://openreview.net/forum?id=GtYd9PCaaB", "poster": "/media/PosterPDFs/NeurIPS%202024/97775.png?t=1730986595.8406456", "project": "", "author_site": "Soufiane Belharbi, Mara Whitford, Phuong Hoang, Shakeeb Murtaza, Luke McCaffrey, Eric Granger", "tldr": "", "abstract": "Confocal fluorescence microscopy is one of the most accessible and widely used imaging techniques for the study of biological processes at the cellular and subcellular levels. Scanning confocal microscopy allows the capture of high-quality images from thick three-dimensional (3D) samples, yet suffers from well-known limitations such as photobleaching and phototoxicity of specimens caused by intense light exposure, which limits its use in some applications, especially for living cells. Cellular damage can be alleviated by changing imaging parameters to reduce light exposure, often at the expense of image quality.\nMachine/deep learning methods for single-image super-resolution (SISR) can be applied to restore image quality by upscaling lower-resolution (LR) images to produce high-resolution images (HR). These SISR methods have been successfully applied to photo-realistic images due partly to the abundance of publicly available datasets. In contrast, the lack of publicly available data partly limits their application and success in scanning confocal microscopy.\nIn this paper, we introduce a large scanning confocal microscopy dataset named SR-CACO-2 that is comprised of low- and high-resolution image pairs marked for three different fluorescent markers. It allows to evaluate the performance of SISR methods on three different upscaling levels (X2, X4, X8). SR-CACO-2 contains the human epithelial cell line Caco-2 (ATCC HTB-37), and it is composed of 2,200 unique images, captured with four resolutions and three markers, that have been translated in the form of 9,937 \n patches for experiments with SISR methods. Given the new SR-CACO-2 dataset, we also provide benchmarking results for 16 state-of-the-art methods that are representative of the main SISR families. Results show that these methods have limited success in producing high-resolution textures, indicating that SR-CACO-2 represents a challenging problem. The dataset is released under a Creative Commons license (CC BY-NC-SA 4.0), and it can be accessed freely. Our dataset, code and pretrained weights for SISR methods are publicly available: https://github.com/sbelharbi/sr-caco-2.", "keywords": "New Dataset;Confocal Fluorescence Microscopy;Image Super-resolution;Deep Learning;Benchmark", "primary_area": "", "supplementary_material": "", "author": "Soufiane Belharbi;Mara KM Whitford;Phuong Hoang;Shakeeb Murtaza;Luke McCaffrey;Eric Granger", "authorids": "~Soufiane_Belharbi1;~Mara_KM_Whitford1;~Phuong_Hoang1;~Shakeeb_Murtaza1;~Luke_McCaffrey1;~Eric_Granger1", "gender": "M;F;F;M;;M", "homepage": "https://sbelharbi.github.io;;;https://shakeebmurtaza.github.io/;https://mccaffreylab.mcgill.ca/;https://www.etsmtl.ca/en/study-at-ets/professors/egranger", "dblp": "162/0062;;99/6619;232/7733;;86/2306", "google_scholar": ";;;https://scholar.google.com.pk/citations?user=BYMo6wkAAAAJ;;https://scholar.google.com/citations?hl=en", "orcid": "0000-0001-6326-380X;;;0000-0002-8066-6907;;", "linkedin": "soufiane-belharbi;mara-whitford-1070232ab/;kristen-hoang/;shakeebmurtaza/;;", "or_profile": "~Soufiane_Belharbi1;~Mara_KM_Whitford1;~Phuong_Hoang1;~Shakeeb_Murtaza1;~Luke_McCaffrey1;~Eric_Granger1", "aff": "\u00c9cole de technologie sup\u00e9rieure (LIVIA Montreal);McGill University, McGill University;McGill University, McGill University;\u00c9cole de technologie sup\u00e9rieure, Universit\u00e9 du Qu\u00e9bec;McGill University;\u00c9cole de technologie sup\u00e9rieure", "aff_domain": "etsmtl.ca;mail.mcgill.ca;mail.mcgill.ca;etsmtl.ca;mcgill.ca;etsmtl.ca", "position": "Postdoc;PhD student;Undergrad student;PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nbelharbi2024srcaco,\ntitle={{SR}-{CACO}-2: A Dataset for Confocal Fluorescence Microscopy Image Super-Resolution},\nauthor={Soufiane Belharbi and Mara KM Whitford and Phuong Hoang and Shakeeb Murtaza and Luke McCaffrey and Eric Granger},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=GtYd9PCaaB}\n}", "github": "", "reviewers": "zpaL;zxUt;wsnJ;kHKG", "pdf_size": 8137201, "rating": "3;7;8;8", "confidence": "4;4;4;4", "wc_summary_and_contributions": "58;59;51;96", "wc_strengths": "66;24;45;5", "wc_improvement": "242;214;34;14", "wc_limitations": "17;9;7;2", "wc_correctness": "8;1;1;5", "wc_clarity": "3;1;1;5", "wc_relation_to_prior_work": "18;1;1;5", "wc_documentation": "22;1;1;2", "wc_additional_feedback": "1;1;1;1", "wc_review": "435;311;142;135", "wc_reply_reviewers": "0;23;0;91", "wc_reply_authors": "223;96;167;320", "reply_reviewers": "0;1;0;2", "reply_authors": "4;2;3;5", "rating_avg": [ 6.5, 2.0615528128088303 ], "confidence_avg": [ 4.0, 0.0 ], "wc_summary_and_contributions_avg": [ 66.0, 17.592612085759182 ], "wc_strengths_avg": [ 35.0, 22.814469093099667 ], "wc_improvement_avg": [ 126.0, 102.72292830717006 ], "wc_limitations_avg": [ 8.75, 5.402545696243577 ], "wc_correctness_avg": [ 3.75, 2.947456530637899 ], "wc_clarity_avg": [ 2.5, 1.6583123951777 ], "wc_relation_to_prior_work_avg": [ 6.25, 6.977642868476432 ], "wc_documentation_avg": [ 6.5, 8.958236433584458 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 255.75, 125.20258583591634 ], "wc_reply_reviewers_avg": [ 28.5, 37.286056375004314 ], "wc_reply_authors_avg": [ 201.5, 81.891696770796 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 3.5, 1.118033988749895 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11295766527353444385&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 6, "email": "etsmtl.ca;mail.mcgill.ca;mail.mcgill.ca;etsmtl.ca;mcgill.ca;etsmtl.ca", "author_num": 6, "aff_unique_index": "0;1;1;2;1;0", "aff_unique_norm": "\u00c9cole de technologie sup\u00e9rieure;McGill University;Universit\u00e9 du Qu\u00e9bec", "aff_unique_dep": "LIVIA;;", "aff_unique_url": "https://www.etsmtl.ca;https://www.mcgill.ca;https://www.etsmtl.ca", "aff_unique_abbr": "ETS;McGill;ETS", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Montreal;;\u00c9cole de technologie sup\u00e9rieure", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "Canada" }, { "title": "Skill-aware Mutual Information Optimisation for Zero-shot Generalisation in Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95867", "id": "GtbwJ6mruI", "proceeding": "", "pdf": "https://openreview.net/pdf?id=GtbwJ6mruI", "openreview": "https://openreview.net/forum?id=GtbwJ6mruI", "poster": "/media/PosterPDFs/NeurIPS%202024/95867.png?t=1731489191.7849786", "project": "", "author_site": "Xuehui Yu, Mhairi Dunion, Xin Li, Stefano Albrecht", "tldr": "", "abstract": "Meta-Reinforcement Learning (Meta-RL) agents can struggle to operate across tasks with varying environmental features that require different optimal skills (i.e., different modes of behaviour). Using context encoders based on contrastive learning to enhance the generalisability of Meta-RL agents is now widely studied but faces challenges such as the requirement for a large sample size, also referred to as the $\\log$-$K$ curse. To improve RL generalisation to different tasks, we first introduce Skill-aware Mutual Information (SaMI), an optimisation objective that aids in distinguishing context embeddings according to skills, thereby equipping RL agents with the ability to identify and execute different skills across tasks. We then propose Skill-aware Noise Contrastive Estimation (SaNCE), a $K$-sample estimator used to optimise the SaMI objective. We provide a framework for equipping an RL agent with SaNCE in practice and conduct experimental validation on modified MuJoCo and Panda-gym benchmarks. We empirically find that RL agents that learn by maximising SaMI achieve substantially improved zero-shot generalisation to unseen tasks. Additionally, the context encoder trained with SaNCE demonstrates greater robustness to a reduction in the number of available samples, thus possessing the potential to overcome the $\\log$-$K$ curse.", "keywords": "contrastive learning;reinforcement learning;meta reinforcement learning;zero-shot generalisation", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Xuehui Yu;Mhairi Dunion;Xin Li;Stefano V Albrecht", "authorids": "~Xuehui_Yu2;~Mhairi_Dunion1;~Xin_Li44;~Stefano_V_Albrecht1", "gender": "F;F;M;", "homepage": "https://yuxuehui.github.io/Homepage/;;https://github.com/loxs123;https://agents-lab.org/stefano-albrecht/", "dblp": ";;;118/3975", "google_scholar": "mUZEUNoAAAAJ;;;https://scholar.google.co.uk/citations?user=ceSFqCcAAAAJ", "orcid": "0000-0001-9058-0480;;;0000-0002-8735-1465", "linkedin": ";mhairi-dunion-012a1356/;;", "or_profile": "~Xuehui_Yu2;~Mhairi_Dunion1;~Xin_Li44;~Stefano_V_Albrecht1", "aff": "Harbin Institute of Technology;University of Edinburgh;Harbin Institute of Technology;University of Edinburgh", "aff_domain": "hit.edu.cn;ed.ac.uk;hit.edu.cn;ed.ac.uk", "position": "PhD student;PhD student;MS student;Associate Professor", "bibtex": "@inproceedings{\nyu2024skillaware,\ntitle={Skill-aware Mutual Information Optimisation for Zero-shot Generalisation in Reinforcement Learning},\nauthor={Xuehui Yu and Mhairi Dunion and Xin Li and Stefano V Albrecht},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=GtbwJ6mruI}\n}", "github": "", "reviewers": "hnsz;Qb7x;h8bu;TjCk;cm3M", "pdf_size": 24862565, "rating": "3;6;6;6;6", "confidence": "4;3;4;3;3", "soundness": "1;3;2;3;3", "novelty": "2;3;2;3;3", "presentation": "1;2;3;3;3", "wc_summary": "99;41;96;91;71", "wc_strengths": "58;19;147;48;38", "wc_weaknesses": "36;212;101;16;280", "wc_questions": "157;43;80;101;79", "wc_limitations": "1;4;37;13;23", "wc_review": "351;319;461;269;491", "wc_reply_reviewers": "195;52;29;21;131", "wc_reply_authors": "1200;0;0;0;449", "reply_reviewers": "2;1;1;1;2", "reply_authors": "3;1;1;1;2", "rating_avg": [ 5.4, 1.2 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 2.4, 0.8 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.4, 0.8 ], "wc_summary_avg": [ 79.6, 21.629609335353237 ], "wc_strengths_avg": [ 62.0, 44.411710167477224 ], "wc_weaknesses_avg": [ 129.0, 101.87443251375686 ], "wc_questions_avg": [ 92.0, 37.469987990390386 ], "wc_limitations_avg": [ 15.6, 13.169662106523463 ], "wc_review_avg": [ 378.2, 84.55625346477929 ], "wc_reply_reviewers_avg": [ 85.6, 67.13449188010586 ], "wc_reply_authors_avg": [ 329.8, 468.5639337379692 ], "reply_reviewers_avg": [ 1.4, 0.4898979485566356 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.6123724356957945, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:VVf_LVF-UiQJ:scholar.google.com/&scioq=Skill-aware+Mutual+Information+Optimisation+for+Zero-shot+Generalisation+in+Reinforcement+Learning&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": "hit.edu.cn;ed.ac.uk;hit.edu.cn;ed.ac.uk", "author_num": 4, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "Harbin Institute of Technology;University of Edinburgh", "aff_unique_dep": ";", "aff_unique_url": "http://www.hit.edu.cn/;https://www.ed.ac.uk", "aff_unique_abbr": "HIT;Edinburgh", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Harbin;", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "China;United Kingdom" }, { "title": "Boosting Generalization in Parametric PDE Neural Solvers through Adaptive Conditioning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95866", "id": "GuY0zB2xVU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=GuY0zB2xVU", "openreview": "https://openreview.net/forum?id=GuY0zB2xVU", "poster": "/media/PosterPDFs/NeurIPS%202024/95866.png?t=1731672464.5661404", "project": "", "author_site": "Armand Kassa\u00ef Koupa\u00ef, Jorge Mifsut Benet, Yuan Yin, Jean-No\u00ebl Vittaut, Patrick Gallinari", "tldr": "", "abstract": "Solving parametric partial differential equations (PDEs) presents significant challenges for data-driven methods due to the sensitivity of spatio-temporal dynamics to variations in PDE parameters. Machine learning approaches often struggle to capture this variability. To address this, data-driven approaches learn parametric PDEs by sampling a very large variety of trajectories with varying PDE parameters. We first show that incorporating conditioning mechanisms for learning parametric PDEs is essential and that among them, \\textit{adaptive conditioning}, allows stronger generalization. As existing adaptive conditioning methods do not scale well with respect to the number of parameters to adapt in the neural solver, we propose GEPS, a simple adaptation mechanism to boost GEneralization in Pde Solvers via a first-order optimization and low-rank rapid adaptation of a small set of context parameters. We demonstrate the versatility of our approach for both fully data-driven and for physics-aware neural solvers. Validation performed on a whole range of spatio-temporal forecasting problems demonstrates excellent performance for generalizing to unseen conditions including initial conditions, PDE coefficients, forcing terms and solution domain. *Project page*: https://geps-project.github.io", "keywords": "Deep Learning;Parametric PDEs;Meta-Learning;physics-aware", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "", "author": "Armand Kassa\u00ef Koupa\u00ef;Jorge Mifsut Benet;Yuan Yin;Jean-No\u00ebl Vittaut;Patrick Gallinari", "authorids": "~Armand_Kassa\u00ef_Koupa\u00ef1;~Jorge_Mifsut_Benet1;~Yuan_Yin1;~Jean-No\u00ebl_Vittaut1;~Patrick_Gallinari2", "gender": "M;;M;M;M", "homepage": "https://www.isir.upmc.fr/personnel/kassai/;;https://www.isir.upmc.fr/personnel/yin/;https://webia.lip6.fr/~vittaut/;", "dblp": "329/7749;;;12/3351;g/PatrickGallinari", "google_scholar": "uG3VApUAAAAJ;r-p2rkEAAAAJ;https://scholar.google.com/citations?hl=fr;https://scholar.google.fr/citations?hl=fr;rFaxB20AAAAJ", "orcid": ";0009-0004-1013-3867;0000-0003-1515-0696;0000-0001-6654-4199;", "linkedin": ";jorge-mifsut/;yuan-yin-nn/;vittaut/;", "or_profile": "~Armand_Kassa\u00ef_Koupa\u00ef1;~Jorge_Mifsut_Benet1;~Yuan_Yin1;~Jean-No\u00ebl_Vittaut1;~patrick_gallinari1", "aff": "Universit\u00e9 Pierre et Marie Curie - Paris 6, Sorbonne Universit\u00e9 - Facult\u00e9 des Sciences (Paris VI);Sorbonne Universit\u00e9 - Facult\u00e9 des Sciences (Paris VI);Valeo;Sorbonne Universit\u00e9 - Facult\u00e9 des Sciences (Paris VI);Sorbonne Universite", "aff_domain": "isir.upmc.fr;sorbonne-universite.fr;valeo.com;sorbonne-universite.fr;sorbonne-universite.fr", "position": "PhD student;PhD student;Postdoc;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nkoupa{\\\"\\i}2024boosting,\ntitle={Boosting Generalization in Parametric {PDE} Neural Solvers through Adaptive Conditioning},\nauthor={Armand Kassa{\\\"\\i} Koupa{\\\"\\i} and Jorge Mifsut Benet and Yuan Yin and Jean-No{\\\"e}l Vittaut and Patrick Gallinari},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=GuY0zB2xVU}\n}", "github": "", "reviewers": "1xSb;m65H;JETf;3TLD", "pdf_size": 8378464, "rating": "5;5;5;6", "confidence": "4;3;2;3", "soundness": "2;3;3;3", "novelty": "2;3;2;2", "presentation": "2;2;3;3", "wc_summary": "37;79;33;78", "wc_strengths": "16;31;55;54", "wc_weaknesses": "114;322;111;104", "wc_questions": "18;27;120;72", "wc_limitations": "2;4;3;5", "wc_review": "187;463;322;313", "wc_reply_reviewers": "205;189;126;28", "wc_reply_authors": "633;613;203;18", "reply_reviewers": "2;2;1;1", "reply_authors": "4;4;2;2", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 56.75, 21.798795838302627 ], "wc_strengths_avg": [ 39.0, 16.38596960817394 ], "wc_weaknesses_avg": [ 162.75, 92.01460481901772 ], "wc_questions_avg": [ 59.25, 40.604033050917494 ], "wc_limitations_avg": [ 3.5, 1.118033988749895 ], "wc_review_avg": [ 321.25, 97.7045930343093 ], "wc_reply_reviewers_avg": [ 137.0, 69.51618516575834 ], "wc_reply_authors_avg": [ 366.75, 264.56036645726056 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 3.0, 1.0 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:jv6X9yf4NQAJ:scholar.google.com/&scioq=Boosting+Generalization+in+Parametric+PDE+Neural+Solvers+through+Adaptive+Conditioning&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": "isir.upmc.fr;sorbonne-universite.fr;valeo.com;sorbonne-universite.fr;sorbonne-universite.fr", "author_num": 5, "aff_unique_index": "0;1;2;1;3", "aff_unique_norm": "Universit\u00e9 Pierre et Marie Curie - Paris 6;Sorbonne Universit\u00e9;Valeo;Sorbonne University", "aff_unique_dep": "Facult\u00e9 des Sciences;Facult\u00e9 des Sciences;;", "aff_unique_url": "https://www.upmc.fr;https://www.sorbonne-universite.fr;https://www.valeo.com;https://www.sorbonne-universite.fr", "aff_unique_abbr": "UPMC;Sorbonne U;;Sorbonne", "aff_campus_unique_index": "0;1;1", "aff_campus_unique": "Paris;Paris VI;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "France" }, { "title": "Value-Based Deep Multi-Agent Reinforcement Learning with Dynamic Sparse Training", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95865", "id": "Gug7wc0BSs", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Gug7wc0BSs", "openreview": "https://openreview.net/forum?id=Gug7wc0BSs", "poster": "/media/PosterPDFs/NeurIPS%202024/95865.png?t=1731561516.1098347", "project": "", "author_site": "Pihe Hu, Shaolong Li, Zhuoran Li, Ling Pan, Longbo Huang", "tldr": "", "abstract": "Deep Multi-agent Reinforcement Learning (MARL) relies on neural networks with numerous parameters in multi-agent scenarios, often incurring substantial computational overhead. Consequently, there is an urgent need to expedite training and enable model compression in MARL. This paper proposes the utilization of dynamic sparse training (DST), a technique proven effective in deep supervised learning tasks, to alleviate the computational burdens in MARL training. However, a direct adoption of DST fails to yield satisfactory MARL agents, leading to breakdowns in value learning within deep sparse value-based MARL models. Motivated by this challenge, we introduce an innovative Multi-Agent Sparse Training (MAST) framework aimed at simultaneously enhancing the reliability of learning targets and the rationality of sample distribution to improve value learning in sparse models. Specifically, MAST incorporates the Soft Mellowmax Operator with a hybrid TD-($\\lambda$) schema to establish dependable learning targets. Additionally, it employs a dual replay buffer mechanism to enhance the distribution of training samples. Building upon these aspects, MAST utilizes gradient-based topology evolution to exclusively train multiple MARL agents using sparse networks. Our comprehensive experimental investigation across various value-based MARL algorithms on multiple benchmarks demonstrates, for the first time, significant reductions in redundancy of up to $20\\times$ in Floating Point Operations (FLOPs) for both training and inference, with less than 3% performance degradation.", "keywords": "Multi-Agent Reinforcement Learning;Dynamic Sparse Training;Value Learning", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Pihe Hu;Shaolong Li;Zhuoran Li;Ling Pan;Longbo Huang", "authorids": "~Pihe_Hu1;~Shaolong_Li2;~Zhuoran_Li1;~Ling_Pan1;~Longbo_Huang2", "gender": "M;M;M;F;M", "homepage": "https://hupihe.top/;;;https://ling-pan.github.io/;http://people.iiis.tsinghua.edu.cn/~huang/", "dblp": "215/4280;;18/8638;199/9303/;79/7077", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=zh-CN;j948XtQAAAAJ;qZ_zlacAAAAJ;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Pihe_Hu1;~Shaolong_Li2;~Zhuoran_Li1;~Ling_Pan1;~Longbo_Huang2", "aff": "Tsinghua University;Central South University;Tsinghua University;Montreal Institute for Learning Algorithms (MILA);Tsinghua University", "aff_domain": "tsinghua.edu.cn;csu.edu.cn;tsinghua.edu.cn;mila.umontreal.ca;tsinghua.edu.cn", "position": "PhD student;Undergrad student;PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\nhu2024valuebased,\ntitle={Value-Based Deep Multi-Agent Reinforcement Learning with Dynamic Sparse Training},\nauthor={Pihe Hu and Shaolong Li and Zhuoran Li and Ling Pan and Longbo Huang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Gug7wc0BSs}\n}", "github": "", "reviewers": "8jXp;zAbe;fbhH", "pdf_size": 3979351, "rating": "5;6;7", "confidence": "3;4;3", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "4;3;3", "wc_summary": "49;50;99", "wc_strengths": "37;47;112", "wc_weaknesses": "99;31;14", "wc_questions": "63;46;178", "wc_limitations": "9;21;1", "wc_review": "257;195;404", "wc_reply_reviewers": "0;17;16", "wc_reply_authors": "144;90;18", "reply_reviewers": "0;1;1", "reply_authors": "3;3;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 66.0, 23.338094752285727 ], "wc_strengths_avg": [ 65.33333333333333, 33.24989557210001 ], "wc_weaknesses_avg": [ 48.0, 36.72419729097788 ], "wc_questions_avg": [ 95.66666666666667, 58.63067077524831 ], "wc_limitations_avg": [ 10.333333333333334, 8.219218670625303 ], "wc_review_avg": [ 285.3333333333333, 87.64448388549935 ], "wc_reply_reviewers_avg": [ 11.0, 7.788880963698615 ], "wc_reply_authors_avg": [ 84.0, 51.61395160225576 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:UIRhDW0YGiIJ:scholar.google.com/&scioq=Value-Based+Deep+Multi-Agent+Reinforcement+Learning+with+Dynamic+Sparse+Training&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": "tsinghua.edu.cn;csu.edu.cn;tsinghua.edu.cn;mila.umontreal.ca;tsinghua.edu.cn", "author_num": 5, "aff_unique_index": "0;1;0;2;0", "aff_unique_norm": "Tsinghua University;Central South University;Montreal Institute for Learning Algorithms", "aff_unique_dep": ";;Artificial Intelligence", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.csu.edu.cn;https://mila.quebec", "aff_unique_abbr": "THU;CSU;MILA", "aff_campus_unique_index": "1", "aff_campus_unique": ";Montreal", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "China;Canada" }, { "title": "Preference-based Pure Exploration", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95864", "id": "GvQU54uA7u", "proceeding": "", "pdf": "https://openreview.net/pdf?id=GvQU54uA7u", "openreview": "https://openreview.net/forum?id=GvQU54uA7u", "poster": "", "project": "", "author_site": "Apurv Shukla, Debabrota Basu", "tldr": "", "abstract": "We study the preference-based pure exploration problem for bandits with vector-valued rewards and a set of preferences imposed over them. Specifically, we aim to identify the most preferred policy over a set of arms according to the preferences induced on the reward vectors by an ordering cone $C$. First, to quantify the impact of preferences, we derive a novel lower bound on the sample complexity for identifying the most preferred arm with confidence level $1-\\delta$. Our lower bound shows that how the geometry of the preferences and reward vectors changes the hardness of this problem. We further explicate this geometry for Gaussian distributions of rewards, and provide a convex reformulation of the lower bound solvable with linear programming. Then, we leverage this convex reformulation of the lower bound to design the Track and Stop with Preferences (TSwP) algorithm that identifies the most preferred policy. Finally, we derive a new concentration result for vector-valued rewards, and show that TSwP achieves a matching sample complexity upper bound.", "keywords": "Pure exploration;multi-armed bandits;vector-valued rewards;preferences", "primary_area": "bandits", "supplementary_material": "", "author": "Apurv Shukla;Debabrota Basu", "authorids": "~Apurv_Shukla1;~Debabrota_Basu1", "gender": "M;", "homepage": ";https://debabrota-basu.github.io/", "dblp": "168/0656.html;126/2209", "google_scholar": ";https://scholar.google.co.in/citations?user=e26Maa4AAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Apurv_Shukla1;~Debabrota_Basu1", "aff": "University of Michigan - Ann Arbor;INRIA", "aff_domain": "umich.edu;inria.fr", "position": "Postdoc;Faculty", "bibtex": "@inproceedings{\nshukla2024preferencebased,\ntitle={Preference-based Pure Exploration},\nauthor={Apurv Shukla and Debabrota Basu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=GvQU54uA7u}\n}", "github": "", "reviewers": "owBL;a6bi;E3uE;maY1", "pdf_size": 507462, "rating": "4;6;7;7", "confidence": "4;2;4;3", "soundness": "2;3;4;3", "novelty": "2;3;3;3", "presentation": "2;1;2;3", "wc_summary": "91;75;50;166", "wc_strengths": "85;40;87;129", "wc_weaknesses": "154;149;58;16", "wc_questions": "106;5;22;1", "wc_limitations": "42;6;13;1", "wc_review": "478;275;230;313", "wc_reply_reviewers": "209;0;9;11", "wc_reply_authors": "798;40;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "3;2;1;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 95.5, 43.246387132337425 ], "wc_strengths_avg": [ 85.25, 31.483130403439873 ], "wc_weaknesses_avg": [ 94.25, 59.17083318663005 ], "wc_questions_avg": [ 33.5, 42.59401366389413 ], "wc_limitations_avg": [ 15.5, 15.88238017426859 ], "wc_review_avg": [ 324.0, 93.64026911537579 ], "wc_reply_reviewers_avg": [ 57.25, 87.71081746284206 ], "wc_reply_authors_avg": [ 209.5, 340.16282865710065 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.24618298195866545, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:NBrkpNBZV_kJ:scholar.google.com/&scioq=Preference-based+Pure+Exploration&hl=en&as_sdt=0,33", "gs_version_total": 7, "email": "umich.edu;inria.fr", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "University of Michigan;INRIA", "aff_unique_dep": ";", "aff_unique_url": "https://www.umich.edu;https://www.inria.fr", "aff_unique_abbr": "UM;INRIA", "aff_campus_unique_index": "0", "aff_campus_unique": "Ann Arbor;", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;France" }, { "id": "GxpkSJjbQR", "title": "Architecture of Decentralized Expert System for Early Alzheimer's Prediction Enhanced by Data Anomaly Detection", "track": "main", "status": "Reject", "tldr": "", "abstract": "Alzheimer\u2019s Disease poses a significant global health challenge, necessitating early and precise detection to enhance patient outcomes. Traditional diagnostic methodologies often result in delayed and imprecise predictions, particularly in the disease\u2019s early stages. Centralized data repositories struggle to manage the immense volumes of MRI data, alongside persistent privacy concerns that impede collaborative efforts. This paper presents an innovative approach that leverages the synergy of blockchain technology (due to crowdsourcing patients' longitudinal test data via Web3 application) and Federated Learning to address these challenges. Thus, our proposed decentralized expert system architecture presents a pioneering step towards revolutionizing disease diagnostics. Furthermore, the system integrates robust anomaly detection for patient-submitted data. It emphasizes AI-driven MRI analysis and incorporates a sophisticated data anomaly detection architecture. These mechanisms scrutinize patient-contributed data for various issues, including data quality problems. We acknowledge that performing an exhaustive check of the correctness and quality of MRI images and biological information directly on-chain is not practical due to the computational complexity and cost constraints of blockchain platforms. Instead, such checks are typically performed off-chain, and the blockchain is used to record the results securely. This comprehensive approach empowers to provide more precise early-stage Alzheimer\u2019s Disease prediction with more volume of data. Our system is designed to safeguard both data integrity and patient privacy, facilitating collaborative efforts.", "keywords": "Early-stage AD prediction;Anomaly detection;Decentralized expert system;Alzheimer's disease;Blockchain", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "/attachment/6984e037795e2a2bb56534802c4d1b1ef53ac6e9.zip", "author": "Stefan Behfar;Qumars Behfar;marzie hosseinpour", "authorids": "~Stefan_Behfar1;~Qumars_Behfar1;~marzie_hosseinpour2", "gender": "Not Specified;;F", "homepage": "https://scholar.google.com/citations?user=ucIHQQcAAAAJ&hl=en;;", "dblp": ";;", "google_scholar": "ucIHQQcAAAAJ;;", "orcid": ";0000-0001-5217-924X;", "linkedin": ";;https://linkedin.com/mehrnushh", "or_profile": "~Stefan_Behfar1;~Qumars_Behfar1;~marzie_hosseinpour2", "aff": ";;Hasselt University", "aff_domain": ";;uhasselt.be", "position": ";;PhD student", "bibtex": "@misc{\nanonymous2024architecture,\ntitle={Architecture of Decentralized Expert System for Early Alzheimer's Prediction Enhanced by Data Anomaly Detection},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=GxpkSJjbQR}\n}", "github": "", "project": "", "reviewers": "vAAp;ahuS;9nGY;moX6", "site": "https://openreview.net/forum?id=GxpkSJjbQR", "pdf_size": 983888, "rating": "2;3;3;5", "confidence": "4;4;5;5", "soundness": "1;2;1;3", "novelty": "1;2;2;3", "presentation": "1;1;2;3", "wc_summary": "48;66;82;116", "wc_strengths": "21;51;141;97", "wc_weaknesses": "16;87;240;47", "wc_questions": "56;25;5;57", "wc_limitations": "21;52;5;67", "wc_review": "162;281;473;384", "wc_reply_reviewers": "48;90;146;0", "wc_reply_authors": "357;708;571;0", "reply_reviewers": "1;1;3;0", "reply_authors": "2;2;3;1", "rating_avg": [ 3.25, 1.0897247358851685 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 1.75, 0.82915619758885 ], "novelty_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 1.75, 0.82915619758885 ], "wc_summary_avg": [ 78.0, 25.019992006393608 ], "wc_strengths_avg": [ 77.5, 45.57137259288994 ], "wc_weaknesses_avg": [ 97.5, 86.03632953584201 ], "wc_questions_avg": [ 35.75, 21.924586655168667 ], "wc_limitations_avg": [ 36.25, 24.508926945094924 ], "wc_review_avg": [ 325.0, 116.07109890063073 ], "wc_reply_reviewers_avg": [ 71.0, 53.749418601506754 ], "wc_reply_authors_avg": [ 409.0, 267.2218179715122 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.6882472016116854, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:W8kiLq55U4gJ:scholar.google.com/&scioq=Architecture+of+Decentralized+Expert+System+for+Early+Alzheimer%27s+Prediction+Enhanced+by+Data+Anomaly+Detection&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Hasselt University", "aff_unique_dep": "", "aff_unique_url": "https://www.uhasselt.be", "aff_unique_abbr": "UHasselt", "aff_country_unique_index": "0", "aff_country_unique": "Belgium" }, { "title": "PhyloGen: Language Model-Enhanced Phylogenetic Inference via Graph Structure Generation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95863", "id": "GxvDsFArxY", "proceeding": "", "pdf": "https://openreview.net/pdf?id=GxvDsFArxY", "openreview": "https://openreview.net/forum?id=GxvDsFArxY", "poster": "/media/PosterPDFs/NeurIPS%202024/95863.png?t=1730039988.8954074", "project": "", "author_site": "ChenRui Duan, Zelin Zang, Siyuan Li, Yongjie Xu, Stan Z. Li", "tldr": "", "abstract": "Phylogenetic trees elucidate evolutionary relationships among species, but phylogenetic inference remains challenging due to the complexity of combining continuous (branch lengths) and discrete parameters (tree topology). \n Traditional Markov Chain Monte Carlo methods face slow convergence and computational burdens. Existing Variational Inference methods, which require pre-generated topologies and typically treat tree structures and branch lengths independently, may overlook critical sequence features, limiting their accuracy and flexibility.\n We propose PhyloGen, a novel method leveraging a pre-trained genomic language model to generate and optimize phylogenetic trees without dependence on evolutionary models or aligned sequence constraints. PhyloGen views phylogenetic inference as a conditionally constrained tree structure generation problem, jointly optimizing tree topology and branch lengths through three core modules: (i) Feature Extraction, (ii) PhyloTree Construction, and (iii) PhyloTree Structure Modeling. \n Meanwhile, we introduce a Scoring Function to guide the model towards a more stable gradient descent.\n We demonstrate the effectiveness and robustness of PhyloGen on eight real-world benchmark datasets. Visualization results confirm PhyloGen provides deeper insights into phylogenetic relationships.", "keywords": "Phylogenetic Inference;Genome Language Model;Transformer;Graph Structure Generation;DNA;Large Language Models", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "ChenRui Duan;Zelin Zang;Siyuan Li;Yongjie Xu;Stan Z. Li", "authorids": "~ChenRui_Duan1;~Zelin_Zang2;~Siyuan_Li6;~Yongjie_Xu2;~Stan_Z._Li2", "gender": ";M;M;M;", "homepage": ";;https://lupin1998.github.io/;;", "dblp": ";226/7615;63/9705-2;123/9257.html;", "google_scholar": ";foERjnQAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.hk/citations?user=ciG27FYAAAAJ;", "orcid": ";;0000-0001-6806-2468;0000-0002-6045-1626;", "linkedin": ";;https://www.linkedin.cn/incareer/in/siyuan-li-lupin1998/;;", "or_profile": "~ChenRui_Duan1;~Zelin_Zang2;~Siyuan_Li6;~Yongjie_Xu2;~Stan_Z._Li2", "aff": ";National University of Singapore;Alibaba Group;Westlake University;", "aff_domain": ";nus.edu.sg;alibaba-inc.com;westlake.edu.cn;", "position": ";Intern;Intern;PhD student;", "bibtex": "@inproceedings{\nduan2024phylogen,\ntitle={PhyloGen: Language Model-Enhanced Phylogenetic Inference via Graph Structure Generation},\nauthor={ChenRui Duan and Zelin Zang and Siyuan Li and Yongjie Xu and Stan Z. Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=GxvDsFArxY}\n}", "github": "", "reviewers": "4WGb;QAeZ;XKPL;MKto", "pdf_size": 5706730, "rating": "5;5;6;7", "confidence": "1;2;2;4", "soundness": "2;2;3;2", "novelty": "2;2;3;2", "presentation": "2;2;2;3", "wc_summary": "84;54;71;76", "wc_strengths": "10;51;136;30", "wc_weaknesses": "92;22;258;252", "wc_questions": "241;55;228;99", "wc_limitations": "154;82;85;12", "wc_review": "581;264;778;469", "wc_reply_reviewers": "48;0;95;27", "wc_reply_authors": "112;281;865;113", "reply_reviewers": "1;0;1;1", "reply_authors": "3;3;5;3", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 2.25, 1.0897247358851685 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 71.25, 10.985786271359915 ], "wc_strengths_avg": [ 56.75, 47.996744681280205 ], "wc_weaknesses_avg": [ 156.0, 102.06860437960343 ], "wc_questions_avg": [ 155.75, 80.4032804057148 ], "wc_limitations_avg": [ 83.25, 50.2164066814821 ], "wc_review_avg": [ 523.0, 186.00134408116517 ], "wc_reply_reviewers_avg": [ 42.5, 34.7598906787694 ], "wc_reply_authors_avg": [ 342.75, 309.2687949017812 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.5, 0.8660254037844386 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.899228803025897, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18254452184294703654&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": ";nus.edu.sg;alibaba-inc.com;westlake.edu.cn;", "author_num": 5, "aff_unique_index": "0;1;2", "aff_unique_norm": "National University of Singapore;Alibaba Group;Westlake University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.nus.edu.sg;https://www.alibaba.com;https://www.westlake.edu.cn", "aff_unique_abbr": "NUS;Alibaba;WU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Singapore;China" }, { "title": "Learning from Snapshots of Discrete and Continuous Data Streams", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95862", "id": "GxwnQ8sxkL", "proceeding": "", "pdf": "https://openreview.net/pdf?id=GxwnQ8sxkL", "openreview": "https://openreview.net/forum?id=GxwnQ8sxkL", "poster": "/media/PosterPDFs/NeurIPS%202024/95862.png?t=1733877589.5071397", "project": "", "author_site": "Pramith Devulapalli, Steve Hanneke", "tldr": "", "abstract": "Imagine a smart camera trap selectively clicking pictures to understand animal movement patterns within a particular habitat. These \"snapshots\", or pieces of data captured from a data stream at adaptively chosen times, provide a glimpse of different animal movements unfolding through time. Learning a continuous-time process through snapshots, such as smart camera traps, is a central theme governing a wide array of online learning situations. In this paper, we adopt a learning-theoretic perspective in understanding the fundamental nature of learning different classes of functions from both discrete data streams and continuous data streams. In our first framework, the *update-and-deploy* setting, a learning algorithm discretely queries from a process to update a predictor designed to make predictions given as input the data stream. We construct a uniform sampling algorithm that can learn with bounded error any concept class with finite Littlestone dimension. Our second framework, known as the *blind-prediction* setting, consists of a learning algorithm generating predictions independently of observing the process, only engaging with the process when it chooses to make queries. Interestingly, we show a stark contrast in learnability where non-trivial concept classes are unlearnable. However, we show that adaptive learning algorithms are necessary to learn sets of time-dependent and data-dependent functions, called pattern classes, in either framework. Finally, we develop a theory of pattern classes under discrete data streams for the blind-prediction setting.", "keywords": "Learning Theory; Online Learning; Continuous Processes", "primary_area": "learning_theory", "supplementary_material": "", "author": "Pramith Devulapalli;Steve Hanneke", "authorids": "~Pramith_Devulapalli1;~Steve_Hanneke1", "gender": "M;M", "homepage": ";http://www.stevehanneke.com", "dblp": "269/4502;40/154", "google_scholar": ";fEhNO7YAAAAJ", "orcid": ";", "linkedin": "pramithsdevulapalli;", "or_profile": "~Pramith_Devulapalli1;~Steve_Hanneke1", "aff": "Purdue University;Purdue University", "aff_domain": "purdue.edu;purdue.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\ndevulapalli2024learning,\ntitle={Learning from Snapshots of Discrete and Continuous Data Streams},\nauthor={Pramith Devulapalli and Steve Hanneke},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=GxwnQ8sxkL}\n}", "github": "", "reviewers": "F4wW;cGKc;dysh;jfDD", "pdf_size": 359919, "rating": "4;6;6;7", "confidence": "4;2;2;3", "soundness": "2;4;3;3", "novelty": "3;4;3;4", "presentation": "2;3;2;3", "wc_summary": "70;149;205;24", "wc_strengths": "43;85;34;39", "wc_weaknesses": "68;27;59;74", "wc_questions": "193;20;4;50", "wc_limitations": "1;2;1;20", "wc_review": "375;283;303;207", "wc_reply_reviewers": "0;45;0;42", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 112.0, 69.86773218017026 ], "wc_strengths_avg": [ 50.25, 20.314711418083203 ], "wc_weaknesses_avg": [ 57.0, 18.12456896039186 ], "wc_questions_avg": [ 66.75, 74.7374571416502 ], "wc_limitations_avg": [ 6.0, 8.093207028119323 ], "wc_review_avg": [ 292.0, 59.824744044584094 ], "wc_reply_reviewers_avg": [ 21.75, 21.775846711436962 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.6225430174794673, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6022300360550514579&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "purdue.edu;purdue.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Purdue University", "aff_unique_dep": "", "aff_unique_url": "https://www.purdue.edu", "aff_unique_abbr": "Purdue", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Embedding Dimension of Contrastive Learning and $k$-Nearest Neighbors", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95861", "id": "H0qu4moFly", "proceeding": "", "pdf": "https://openreview.net/pdf?id=H0qu4moFly", "openreview": "https://openreview.net/forum?id=H0qu4moFly", "poster": "/media/PosterPDFs/NeurIPS%202024/95861.png?t=1731729111.8129022", "project": "", "author_site": "Dmitrii Avdiukhin, Vaggos Chatziafratis, Orr Fischer, Grigory Yaroslavtsev", "tldr": "", "abstract": "We study the embedding dimension of distance comparison data in two settings: contrastive learning and $k$-nearest neighbors ($k$-NN). In both cases, the goal is to find the smallest dimension $d$ of an $\\ell_p$-space in which a given dataset can be represented. We show that the arboricity of the associated graphs plays a key role in designing embeddings. Using this approach, for the most frequently used $\\ell_2$-distance, we get matching upper and lower bounds in both settings.\n \nIn contrastive learning, we are given $m$ labeled samples of the form $(x_i, y_i^+, z_i^-)$ representing the fact that the positive example $y_i$ is closer to the anchor $x_i$ than the negative example $z_i$. We show that for representing such dataset in:\n\n- $\\ell_2$: $d = \\Theta(\\sqrt{m})$ is necessary and sufficient.\n- $\\ell_p$ for $p \\ge 1$: $d = O(m)$ is sufficient and $d = \\tilde \\Omega(\\sqrt{m})$ is necessary.\n- $\\ell_\\infty$: $d = O(m^{2/3})$ is sufficient and $d = \\tilde \\Omega(\\sqrt{m})$ is necessary.\n\nWe also give results for the more general scenario when $t$ negatives are allowed.\n\nIn $k$-NN, for each of the $n$ data points we are given an ordered set of the closest $k$ points. We show that for preserving the ordering of the $k$-NN for every point in:\n- $\\ell_2$: $d = \\Theta(k)$ is necessary and sufficient.\n- $\\ell_p$ for $p \\ge 1$: $d = \\tilde O(k^2)$ is sufficient and $d=\\tilde \\Omega(k)$ is necessary.\n- $\\ell_\\infty$ : $d = \\tilde \\Omega(k)$ is necessary.\n\nFurthermore, if the goal is to not just preserve the ordering of the $k$-NN but also keep them as the nearest neighbors then $d = \\tilde O (\\mathrm{poly}(k))$ suffices in $\\ell_p$ for $p \\ge 1$.", "keywords": "Representation Learning;Ordinal Embedding;Metric Space;Nearest Neighbors;k-NN;Contrastive Learning;Embedding Dimension", "primary_area": "other", "supplementary_material": "/attachment/92aa10a2ef1ca77e4e29ef8e0bd2c32853225e6b.zip", "author": "Dmitrii Avdiukhin;Vaggos Chatziafratis;Orr Fischer;Grigory Yaroslavtsev", "authorids": "~Dmitrii_Avdiukhin1;~Vaggos_Chatziafratis1;~Orr_Fischer1;~Grigory_Yaroslavtsev1", "gender": ";M;;Unspecified", "homepage": "https://dyukha.github.io/;https://cs.stanford.edu/~vaggos/;;http://grigory.us", "dblp": "236/4913.html;193/9727;;31/7137", "google_scholar": ";https://scholar.google.com/citations?view_op=list_works;;AbRFE3IAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Dmitrii_Avdiukhin1;~Vaggos_Chatziafratis1;~Orr_Fischer1;~Grigory_Yaroslavtsev1", "aff": "Northwestern University;University of California, Santa Cruz;;George Mason University", "aff_domain": "northwestern.edu;ucsc.edu;;gmu.edu", "position": "Postdoc;Assistant Professor;;Assistant Professor", "bibtex": "@inproceedings{\navdiukhin2024embedding,\ntitle={Embedding Dimension of Contrastive Learning and \\$k\\$-Nearest Neighbors},\nauthor={Dmitrii Avdiukhin and Vaggos Chatziafratis and Orr Fischer and Grigory Yaroslavtsev},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=H0qu4moFly}\n}", "github": "", "reviewers": "wQgD;AWx5;x73K;ex1k", "pdf_size": 1309572, "rating": "5;6;6;8", "confidence": "1;3;4;4", "soundness": "3;3;4;4", "novelty": "3;3;3;4", "presentation": "2;3;4;3", "wc_summary": "29;74;356;80", "wc_strengths": "20;164;52;153", "wc_weaknesses": "93;621;62;165", "wc_questions": "4;292;9;136", "wc_limitations": "2;78;14;20", "wc_review": "148;1229;493;554", "wc_reply_reviewers": "0;319;0;13", "wc_reply_authors": "117;1895;0;28", "reply_reviewers": "0;3;0;1", "reply_authors": "1;5;1;2", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.0, 1.224744871391589 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 134.75, 129.25048355808963 ], "wc_strengths_avg": [ 97.25, 62.407431448506195 ], "wc_weaknesses_avg": [ 235.25, 225.82556874720808 ], "wc_questions_avg": [ 110.25, 117.51249933517711 ], "wc_limitations_avg": [ 28.5, 29.304436524185206 ], "wc_review_avg": [ 606.0, 391.5884319026802 ], "wc_reply_reviewers_avg": [ 83.0, 136.35798473136805 ], "wc_reply_authors_avg": [ 510.0, 800.7961663244898 ], "reply_reviewers_avg": [ 1.0, 1.224744871391589 ], "reply_authors_avg": [ 2.25, 1.6393596310755 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.7492686492653552, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:KHojxYiSmkoJ:scholar.google.com/&scioq=Embedding+Dimension+of+Contrastive+Learning+and+%24k%24-Nearest+Neighbors&hl=en&as_sdt=0,44", "gs_version_total": 0, "email": "northwestern.edu;ucsc.edu;;gmu.edu", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "Northwestern University;University of California, Santa Cruz;George Mason University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.northwestern.edu;https://www.ucsc.edu;https://www.gmu.edu", "aff_unique_abbr": "NU;UCSC;GMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Santa Cruz", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "LCM: Locally Constrained Compact Point Cloud Model for Masked Point Modeling", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95860", "id": "H1NklRKPYi", "proceeding": "", "pdf": "https://openreview.net/pdf?id=H1NklRKPYi", "openreview": "https://openreview.net/forum?id=H1NklRKPYi", "poster": "/media/PosterPDFs/NeurIPS%202024/95860.png?t=1731318696.0753655", "project": "", "author_site": "Yaohua Zha, Naiqi Li, Yanzi Wang, Tao Dai, Hang Guo, Bin Chen, Zhi Wang, Zhihao Ouyang, Shu-Tao Xia", "tldr": "", "abstract": "The pre-trained point cloud model based on Masked Point Modeling (MPM) has exhibited substantial improvements across various tasks. However, these models heavily rely on the Transformer, leading to quadratic complexity and limited decoder, hindering their practice application. To address this limitation, we first conduct a comprehensive analysis of existing Transformer-based MPM, emphasizing the idea that redundancy reduction is crucial for point cloud analysis. To this end, we propose a Locally constrained Compact point cloud Model (LCM) consisting of a locally constrained compact encoder and a locally constrained Mamba-based decoder. Our encoder replaces self-attention with our local aggregation layers to achieve an elegant balance between performance and efficiency. Considering the varying information density between masked and unmasked patches in the decoder inputs of MPM, we introduce a locally constrained Mamba-based decoder. This decoder ensures linear complexity while maximizing the perception of point cloud geometry information from unmasked patches with higher information density. Extensive experimental results show that our compact model significantly surpasses existing Transformer-based models in both performance and efficiency, especially our LCM-based Point-MAE model, compared to the Transformer-based model, achieved an improvement of 1.84%, 0.67%, and 0.60% in performance on the three variants of ScanObjectNN while reducing parameters by 88% and computation by 73%. The code is available at https://github.com/zyh16143998882/LCM.", "keywords": "Point Cloud;Self-supervised Learning; Masking Point Modeling", "primary_area": "machine_vision", "supplementary_material": "", "author": "Yaohua Zha;Naiqi Li;Yanzi Wang;Tao Dai;Hang Guo;Bin Chen;Zhi Wang;Zhihao Ouyang;Shu-Tao Xia", "authorids": "~Yaohua_Zha1;~Naiqi_Li1;~Yanzi_Wang1;~Tao_Dai3;~Hang_Guo3;~Bin_Chen4;~Zhi_Wang5;~Zhihao_Ouyang1;~Shu-Tao_Xia1", "gender": "M;M;F;M;M;M;M;M;M", "homepage": "https://github.com/zyh16143998882;https://naiqili.github.io/;https://github.com/AMlizi;https://csse.szu.edu.cn/pages/user/index?id=1204;https://github.com/csguoh;https://binchen17tsinghua.wixsite.com/website;http://zwang.inflexionlab.org/;;https://www.sigs.tsinghua.edu.cn/xst/list.htm", "dblp": "344/5717;117/4912;133/3176;54/875-1;;22/5523-11;95/6543-1;;03/6195", "google_scholar": "https://scholar.google.com.hk/citations?user=-zUO4_QAAAAJ;5K2l_wUAAAAJ;;MqJNdaAAAAAJ;https://scholar.google.com.hk/citations?user=fRwhfpoAAAAJ;Yl0wv7AAAAAJ;PK8BtpwAAAAJ;wjCvLM8AAAAJ;https://scholar.google.com.hk/citations?user=koAXTXgAAAAJ", "orcid": "0000-0001-9789-452X;;;0000-0003-0594-6404;0000-0003-1746-2693;0000-0002-4798-230X;0000-0002-5462-6178;;0000-0002-8639-982X", "linkedin": ";;;;;;zhi-wang-b159071a/;zhihao-ouyang-46416a129;", "or_profile": "~Yaohua_Zha1;~Naiqi_Li1;~Yanzi_Wang1;~Tao_Dai3;~Hang_Guo3;~Bin_Chen4;~Zhi_Wang5;~Zhihao_Ouyang1;~Shu-Tao_Xia1", "aff": "Computer Science, Tsinghua University, Tsinghua University;Tsinghua University;Beijing University of Posts and Telecommunications;Department of Software Engineering, Shenzhen University;Tsinghua University;Harbin Institute of Technology, Shenzhen;SIGS, Tsinghua University;Bytedance;Shenzhen International Graduate School, Tsinghua University", "aff_domain": "mails.tsinghua.edu.cn;tsinghua.edu.cn;bupt.edu.cn;szu.edu;tsinghua.edu.cn;hit.edu.cn;tsinghua.edu.cn;bytedance.com;sz.tsinghua.edu.cn", "position": "MS student;PhD student;Undergrad student;Assistant Professor;MS student;Assistant Professor;Associate Professor;Researcher;Full Professor", "bibtex": "@inproceedings{\nzha2024lcm,\ntitle={{LCM}: Locally Constrained Compact Point Cloud Model for Masked Point Modeling},\nauthor={Yaohua Zha and Naiqi Li and Yanzi Wang and Tao Dai and Hang Guo and Bin Chen and Zhi Wang and Zhihao Ouyang and Shu-Tao Xia},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=H1NklRKPYi}\n}", "github": "", "reviewers": "jpku;L8Jt;UfZW;y4yX", "pdf_size": 4618215, "rating": "6;6;6;8", "confidence": "4;5;5;5", "soundness": "3;4;2;3", "novelty": "3;4;3;4", "presentation": "3;3;3;2", "wc_summary": "65;112;98;88", "wc_strengths": "78;82;68;137", "wc_weaknesses": "102;149;92;120", "wc_questions": "3;23;123;32", "wc_limitations": "16;31;53;10", "wc_review": "264;397;434;387", "wc_reply_reviewers": "22;42;42;81", "wc_reply_authors": "0;0;0;122", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;2", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 90.75, 17.137313091613866 ], "wc_strengths_avg": [ 91.25, 26.901440481877547 ], "wc_weaknesses_avg": [ 115.75, 21.660736367907717 ], "wc_questions_avg": [ 45.25, 46.09975596464693 ], "wc_limitations_avg": [ 27.5, 16.590660023037056 ], "wc_review_avg": [ 370.5, 63.93160407810835 ], "wc_reply_reviewers_avg": [ 46.75, 21.39363223017541 ], "wc_reply_authors_avg": [ 30.5, 52.827549630850754 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=725295129710646109&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "mails.tsinghua.edu.cn;tsinghua.edu.cn;bupt.edu.cn;szu.edu;tsinghua.edu.cn;hit.edu.cn;tsinghua.edu.cn;bytedance.com;sz.tsinghua.edu.cn", "author_num": 9, "aff_unique_index": "0;0;1;2;0;3;0;4;0", "aff_unique_norm": "Tsinghua University;Beijing University of Posts and Telecommunications;Shenzhen University;Harbin Institute of Technology;ByteDance", "aff_unique_dep": "Computer Science;;Department of Software Engineering;;", "aff_unique_url": "https://www.tsinghua.edu.cn;http://www.bupt.edu.cn/;https://www.szu.edu.cn;http://en.hhit.edu.cn/;https://www.bytedance.com", "aff_unique_abbr": "THU;BUPT;SZU;HIT;Bytedance", "aff_campus_unique_index": "1;2;2;2", "aff_campus_unique": ";Beijing;Shenzhen", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "ART: Automatic Red-teaming for Text-to-Image Models to Protect Benign Users", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95859", "id": "H2ATO32ilj", "proceeding": "", "pdf": "https://openreview.net/pdf?id=H2ATO32ilj", "openreview": "https://openreview.net/forum?id=H2ATO32ilj", "poster": "/media/PosterPDFs/NeurIPS%202024/95859.png?t=1731313893.3261812", "project": "", "author_site": "Guanlin Li, Kangjie Chen, Shudong Zhang, Jie Zhang, Tianwei Zhang", "tldr": "", "abstract": "Large-scale pre-trained generative models are taking the world by storm, due to their abilities in generating creative content. Meanwhile, safeguards for these generative models are developed, to protect users' rights and safety, most of which are designed for large language models. Existing methods primarily focus on jailbreak and adversarial attacks, which mainly evaluate the model's safety under malicious prompts. Recent work found that manually crafted safe prompts can unintentionally trigger unsafe generations. To further systematically evaluate the safety risks of text-to-image models, we propose a novel Automatic Red-Teaming framework, ART. Our method leverages both vision language model and large language model to establish a connection between unsafe generations and their prompts, thereby more efficiently identifying the model's vulnerabilities. With our comprehensive experiments, we reveal the toxicity of the popular open-source text-to-image models. The experiments also validate the effectiveness, adaptability, and great diversity of ART. Additionally, we introduce three large-scale red-teaming datasets for studying the safety risks associated with text-to-image models. Datasets and models can be found in https://github.com/GuanlinLee/ART.", "keywords": "Red-teaming;Text-to-image Model", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/a123aa12d3604269d11d4365140955e0e6811d08.zip", "author": "Guanlin Li;Kangjie Chen;Shudong Zhang;Jie Zhang;Tianwei Zhang", "authorids": "~Guanlin_Li2;~Kangjie_Chen1;~Shudong_Zhang1;~Jie_Zhang11;~Tianwei_Zhang1", "gender": "M;M;M;M;M", "homepage": "https://guanlinlee.github.io/;https://kangjie.me;https://github.com/shudong-zhang;https://zjzac.github.io/;https://personal.ntu.edu.sg/tianwei.zhang/index.html", "dblp": ";204/3003;42/6194;84/6889-73;77/7902-4", "google_scholar": "3LB0_wMAAAAJ;vEPnP6oAAAAJ;https://scholar.google.com.hk/citations?user=tAw1cT8AAAAJ;7YkR3CoAAAAJ//;9vpiYDIAAAAJ", "orcid": ";0000-0001-5099-7054;0000-0002-7358-5543;0000-0002-4230-1077;", "linkedin": ";;;;", "or_profile": "~Guanlin_Li2;~Kangjie_Chen1;~Shudong_Zhang1;~Jie_Zhang11;~Tianwei_Zhang1", "aff": "Nanyang Technological University;Nanyang Technological University;Huawei Technologies Ltd.;Nanyang Technological University;Nanyang Technological University", "aff_domain": "ntu.edu.sg;ntu.edu.sg;huawei.com;ntu.edu.sg;ntu.edu.sg", "position": "PhD student;PhD student;Researcher;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nli2024art,\ntitle={{ART}: Automatic Red-teaming for Text-to-Image Models to Protect Benign Users},\nauthor={Guanlin Li and Kangjie Chen and Shudong Zhang and Jie Zhang and Tianwei Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=H2ATO32ilj}\n}", "github": "", "reviewers": "b5LW;N2jw;7SKE;vKvj", "pdf_size": 4072232, "rating": "4;5;6;6", "confidence": "4;2;3;4", "soundness": "3;3;3;2", "novelty": "3;3;3;3", "presentation": "3;2;4;2", "wc_summary": "109;64;99;44", "wc_strengths": "104;39;63;48", "wc_weaknesses": "67;62;177;44", "wc_questions": "67;59;24;36", "wc_limitations": "67;7;5;1", "wc_review": "414;231;368;173", "wc_reply_reviewers": "0;21;385;9", "wc_reply_authors": "0;86;960;0", "reply_reviewers": "0;1;3;1", "reply_authors": "1;2;3;1", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 79.0, 26.22022120425379 ], "wc_strengths_avg": [ 63.5, 24.904818810824544 ], "wc_weaknesses_avg": [ 87.5, 52.376044142336674 ], "wc_questions_avg": [ 46.5, 17.269916039170543 ], "wc_limitations_avg": [ 20.0, 27.2213151776324 ], "wc_review_avg": [ 296.5, 98.0573811602166 ], "wc_reply_reviewers_avg": [ 103.75, 162.5505690546791 ], "wc_reply_authors_avg": [ 261.5, 404.8045824839437 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.0909090909090909, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2094873406632049738&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "ntu.edu.sg;ntu.edu.sg;huawei.com;ntu.edu.sg;ntu.edu.sg", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Nanyang Technological University;Huawei", "aff_unique_dep": ";Huawei Technologies", "aff_unique_url": "https://www.ntu.edu.sg;https://www.huawei.com", "aff_unique_abbr": "NTU;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "Singapore;China" }, { "title": "Self-Retrieval: End-to-End Information Retrieval with One Large Language Model", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95858", "id": "H3at5y8VFW", "proceeding": "", "pdf": "https://openreview.net/pdf?id=H3at5y8VFW", "openreview": "https://openreview.net/forum?id=H3at5y8VFW", "poster": "/media/PosterPDFs/NeurIPS%202024/95858.png?t=1731502804.9130898", "project": "", "author_site": "Qiaoyu Tang, Jiawei Chen, Zhuoqun Li, Bowen Yu, Yaojie Lu, ChengFu, Haiyang Yu, Hongyu Lin, Fei Huang, Ben He, Xianpei Han, Le Sun, Yongbin Li", "tldr": "", "abstract": "The rise of large language models (LLMs) has significantly transformed both the construction and application of information retrieval (IR) systems. \nHowever, current interactions between IR systems and LLMs remain limited, with LLMs merely serving as part of components within IR systems, and IR systems being constructed independently of LLMs. This separated architecture restricts knowledge sharing and deep collaboration between them.\nIn this paper, we introduce Self-Retrieval, a novel end-to-end LLM-driven information retrieval architecture.\nSelf-Retrieval unifies all essential IR functions within a single LLM, leveraging the inherent capabilities of LLMs throughout the IR process.\nSpecifically, Self-Retrieval internalizes the retrieval corpus through self-supervised learning, transforms the retrieval process into sequential passage generation, and performs relevance assessment for reranking.\nExperimental results demonstrate that Self-Retrieval not only outperforms existing retrieval approaches by a significant margin, but also substantially enhances the performance of LLM-driven downstream applications like retrieval-augmented generation.", "keywords": "Large Language Model;Information Retrieval;Retrieval Augmented Generation", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Qiaoyu Tang;Jiawei Chen;Zhuoqun Li;Bowen Yu;Yaojie Lu;ChengFu;Haiyang Yu;Hongyu Lin;Fei Huang;Ben He;Xianpei Han;Le Sun;Yongbin Li", "authorids": "~Qiaoyu_Tang1;~Jiawei_Chen7;~Zhuoqun_Li1;~Bowen_Yu3;~Yaojie_Lu1;~ChengFu1;~Haiyang_Yu3;~Hongyu_Lin1;~Fei_Huang2;~Ben_He1;~Xianpei_Han1;~Le_Sun1;~Yongbin_Li2", "gender": "M;M;M;M;M;M;M;M;M;M;M;M;M", "homepage": ";https://chen700564.github.io;;https://yubowen-ph.github.io/;https://luyaojie.github.io;;;http://linhongyu.top/;https://sites.google.com/view/fei-huang;http://people.ucas.ac.cn/~benhe;http://www.icip.org.cn/team/homepage/;http://www.icip.org.cn/team/sunle/;https://yongbin-li.github.io/", "dblp": "347/9053;03/1390-11;;95/10266-2.html;15/3214;;90/6643-3;;h/FeiHuang.html;;57/2368;78/5897-1;", "google_scholar": ";https://scholar.google.com.hk/citations?user=E3uztpUAAAAJ;IrSf23QAAAAJ;oHoEp34AAAAJ;s0mCSI0AAAAJ;https://scholar.google.com/citations?hl=zh-CN;VhWV-1wAAAAJ;mu5lLakAAAAJ;9r98PpoAAAAJ;https://scholar.google.com/citations?view_op=list_works;pA88bm4AAAAJ;6bFNhtwAAAAJ;xF5VrokAAAAJ", "orcid": ";;;0000-0002-6804-1859;0000-0002-5842-7715;0009-0000-3693-0283;;;;;;;", "linkedin": ";;;;;;;;fei-huang-cas-cmu;;;;", "or_profile": "~Qiaoyu_Tang1;~Jiawei_Chen7;~Zhuoqun_Li1;~Bowen_Yu3;~Yaojie_Lu1;~ChengFu1;~Haiyang_Yu3;~Hongyu_Lin1;~Fei_Huang2;~Ben_He1;~Xianpei_Han1;~Le_Sun1;~Yongbin_Li2", "aff": "Institute of Software, Chinese Academy of Sciences;Institute of Software, Chinese Academy of Sciences;University of Chinese Academy of Sciences;Alibaba Group;Institute of Software, Chinese Academy of Sciences;Alibaba Group;Alibaba Group;Institute of Software, Chinese Academy of Sciences;Alibaba Group US;University of Chinese Academy of Sciences;Institute of Software, CAS;Institute of Software, Chinese Academy of Sciences;Alibaba Group", "aff_domain": "iscas.ac.cn;ucas.ac.cn;ucas.ac.cn;alibaba-inc.com;iscas.ac.cn;alibaba-inc.com;alibaba-inc.com;iscas.ac.cn;alibaba-inc.com;ucas.ac.cn;iscas.ac.cn;iscas.ac.cn;alibaba-inc.com", "position": "PhD student;PhD student;PhD student;Researcher;Postdoc;Instructor;Researcher;Associate Professor;Senior Research Director;Full Professor;Professor;Full Professor;Researcher", "bibtex": "@inproceedings{\ntang2024selfretrieval,\ntitle={Self-Retrieval: End-to-End Information Retrieval with One Large Language Model},\nauthor={Qiaoyu Tang and Jiawei Chen and Zhuoqun Li and Bowen Yu and Yaojie Lu and ChengFu and Haiyang Yu and Hongyu Lin and Fei Huang and Ben He and Xianpei Han and Le Sun and Yongbin Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=H3at5y8VFW}\n}", "github": "", "reviewers": "uLuX;iXLD;g3kT;xB99", "pdf_size": 1471890, "rating": "5;6;6;6", "confidence": "4;4;5;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;4;3", "wc_summary": "76;74;61;63", "wc_strengths": "85;42;99;56", "wc_weaknesses": "189;117;192;58", "wc_questions": "6;19;44;31", "wc_limitations": "1;5;1;1", "wc_review": "357;257;397;209", "wc_reply_reviewers": "0;72;37;0", "wc_reply_authors": "103;139;248;0", "reply_reviewers": "0;1;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 68.5, 6.576473218982953 ], "wc_strengths_avg": [ 70.5, 22.610838109190027 ], "wc_weaknesses_avg": [ 139.0, 55.57427462414602 ], "wc_questions_avg": [ 25.0, 14.089002803605371 ], "wc_limitations_avg": [ 2.0, 1.7320508075688772 ], "wc_review_avg": [ 305.0, 75.31268153505093 ], "wc_reply_reviewers_avg": [ 27.25, 29.92803869283786 ], "wc_reply_authors_avg": [ 122.5, 88.61292230820514 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 13, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17523853129429800829&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 3, "email": "iscas.ac.cn;ucas.ac.cn;ucas.ac.cn;alibaba-inc.com;iscas.ac.cn;alibaba-inc.com;alibaba-inc.com;iscas.ac.cn;alibaba-inc.com;ucas.ac.cn;iscas.ac.cn;iscas.ac.cn;alibaba-inc.com", "author_num": 13, "aff_unique_index": "0;0;1;2;0;2;2;0;2;1;0;0;2", "aff_unique_norm": "Chinese Academy of Sciences;University of Chinese Academy of Sciences;Alibaba Group", "aff_unique_dep": "Institute of Software;;", "aff_unique_url": "http://www.ios.ac.cn;http://www.ucas.ac.cn;https://www.alibaba.com", "aff_unique_abbr": "CAS;UCAS;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;1;0;0;0;0", "aff_country_unique": "China;United States" }, { "title": "LVD-2M: A Long-take Video Dataset with Temporally Dense Captions", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97774", "id": "H5bUdfM55S", "proceeding": "", "pdf": "https://openreview.net/pdf?id=H5bUdfM55S", "openreview": "https://openreview.net/forum?id=H5bUdfM55S", "poster": "/media/PosterPDFs/NeurIPS%202024/97774.png?t=1731330188.557254", "project": "", "author_site": "Tianwei Xiong, Yuqing Wang, Daquan Zhou, Zhijie Lin, Jiashi Feng, Xihui Liu", "tldr": "", "abstract": "The efficacy of video generation models heavily depends on the quality of their training datasets. Most previous video generation models are trained on short video clips, while recently there has been increasing interest in training long video generation models directly on longer videos. However, the lack of such high-quality long videos impedes the advancement long video generation. To promote research in long video generation, we desire a new dataset with four key features essential for training long video generation models: (1) long videos covering at least 10 seconds, (2) long-take videos without cuts, (3) large motion and diverse contents, and (4) temporally dense captions. To achieve this, we introduce a new pipeline for filtering high-quality long-take videos and generating temporally dense captions. Specifically, we define a set of metrics to quantitatively assess video quality including scene cuts, dynamic degrees, and semantic-level scores, enabling us to filter high-quality long-take videos from a large amount of source videos. Subsequently, we develop a hierarchical video captioning pipeline to annotate long videos with temporally-dense captions. With this pipeline, we curate the first long-take video dataset, LVD-2M, comprising 2 million long-take videos, each covering more than 10 seconds and annotated with temporally dense captions. We further validate the effectiveness of LVD-2M by fine-tuning video generation models to generate long videos with dynamic motions. We believe it will significantly contribute to future research in long video generation.", "keywords": "long video generation", "primary_area": "", "supplementary_material": "/attachment/f57dac0ce5fcfc1329de76f19efc005b506e7603.zip", "author": "Tianwei Xiong;Yuqing Wang;Daquan Zhou;Zhijie Lin;Jiashi Feng;Xihui Liu", "authorids": "~Tianwei_Xiong1;~Yuqing_Wang4;~Daquan_Zhou1;~Zhijie_Lin1;~Jiashi_Feng1;~Xihui_Liu1", "gender": "M;;M;F;M;M", "homepage": "https://github.com/SilentView;;;https://xh-liu.github.io/;;https://sites.google.com/site/jshfeng/", "dblp": "315/7910;;;184/3911;244/9623;56/8278", "google_scholar": "tTMKGSYAAAAJ;QC7nNe0AAAAJ;xXMj6_EAAAAJ;https://scholar.google.com.hk/citations?user=4YL23GMAAAAJ;DdCAbWwAAAAJ;https://scholar.google.com.sg/citations?user=Q8iay0gAAAAJ", "orcid": "0009-0007-4898-3834;;0000-0003-3461-8952;0000-0003-1831-9952;;0000-0001-6843-0064", "linkedin": "tianwei-xiong-633a70266/?originalSubdomain=hk;;;;;", "or_profile": "~Tianwei_Xiong1;~Yuqing_Wang4;~Zhijie_Lin1;~Xihui_Liu1;~Zhou_Daquan1;~Jiashi_Feng2", "aff": "Tsinghua University;The University of HongKong;ByteDance Inc.;University of Hong Kong;Bytedance;ByteDance", "aff_domain": "tsinghua.edu.cn;connect.hku.hk;bytedance.com;hku.hk;bytedance.com;bytedance.com", "position": "Undergrad student;PhD student;Researcher;Assistant Professor;Researcher;Research Lead", "bibtex": "@inproceedings{\nxiong2024lvdm,\ntitle={{LVD}-2M: A Long-take Video Dataset with Temporally Dense Captions},\nauthor={Tianwei Xiong and Yuqing Wang and Daquan Zhou and Zhijie Lin and Jiashi Feng and Xihui Liu},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=H5bUdfM55S}\n}", "github": "", "reviewers": "pwtS;28wF;fsrx;ouB8", "pdf_size": 20016778, "rating": "5;7;7;7", "confidence": "4;5;4;5", "wc_summary_and_contributions": "71;136;79;89", "wc_strengths": "122;44;4;75", "wc_improvement": "64;103;226;15", "wc_limitations": "40;16;5;25", "wc_correctness": "23;15;1;1", "wc_clarity": "1;5;1;1", "wc_relation_to_prior_work": "1;50;32;1", "wc_documentation": "1;29;1;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "324;399;350;209", "wc_reply_reviewers": "0;91;153;40", "wc_reply_authors": "162;41;66;267", "reply_reviewers": "0;1;1;1", "reply_authors": "2;3;2;3", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 4.5, 0.5 ], "wc_summary_and_contributions_avg": [ 93.75, 25.21284394906691 ], "wc_strengths_avg": [ 61.25, 43.170447067409434 ], "wc_improvement_avg": [ 102.0, 78.08649050892222 ], "wc_limitations_avg": [ 21.5, 12.816005617976296 ], "wc_correctness_avg": [ 10.0, 9.433981132056603 ], "wc_clarity_avg": [ 2.0, 1.7320508075688772 ], "wc_relation_to_prior_work_avg": [ 21.0, 20.988091861815356 ], "wc_documentation_avg": [ 8.0, 12.12435565298214 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 320.5, 69.7800114646021 ], "wc_reply_reviewers_avg": [ 71.0, 57.28437832428663 ], "wc_reply_authors_avg": [ 134.0, 89.08703609392334 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15245067412063674643&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 4, "email": "tsinghua.edu.cn;connect.hku.hk;bytedance.com;hku.hk;bytedance.com;bytedance.com", "author_num": 6, "aff_unique_index": "0;1;2;1;2;2", "aff_unique_norm": "Tsinghua University;University of Hong Kong;ByteDance", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.hku.hk;https://www.bytedance.com", "aff_unique_abbr": "THU;HKU;ByteDance", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Physics-informed Neural Networks for Functional Differential Equations: Cylindrical Approximation and Its Convergence Guarantees", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95857", "id": "H5z0XqEX57", "proceeding": "", "pdf": "https://openreview.net/pdf?id=H5z0XqEX57", "openreview": "https://openreview.net/forum?id=H5z0XqEX57", "poster": "/media/PosterPDFs/NeurIPS%202024/95857.png?t=1729751904.9208124", "project": "", "author_site": "Taiki Miyagawa, Takeru Yokota", "tldr": "", "abstract": "We propose the first learning scheme for functional differential equations (FDEs).\nFDEs play a fundamental role in physics, mathematics, and optimal control.\nHowever, the numerical analysis of FDEs has faced challenges due to its unrealistic computational costs and has been a long standing problem over decades.\nThus, numerical approximations of FDEs have been developed, but they often oversimplify the solutions. \nTo tackle these two issues, we propose a hybrid approach combining physics-informed neural networks (PINNs) with the *cylindrical approximation*. \nThe cylindrical approximation expands functions and functional derivatives with an orthonormal basis and transforms FDEs into high-dimensional PDEs. \nTo validate the reliability of the cylindrical approximation for FDE applications, we prove the convergence theorems of approximated functional derivatives and solutions.\nThen, the derived high-dimensional PDEs are numerically solved with PINNs.\nThrough the capabilities of PINNs, our approach can handle a broader class of functional derivatives more efficiently than conventional discretization-based methods, improving the scalability of the cylindrical approximation.\nAs a proof of concept, we conduct experiments on two FDEs and demonstrate that our model can successfully achieve typical $L^1$ relative error orders of PINNs $\\sim 10^{-3}$.\nOverall, our work provides a strong backbone for physicists, mathematicians, and machine learning experts to analyze previously challenging FDEs, thereby democratizing their numerical analysis, which has received limited attention.", "keywords": "Physics-informed neural network;Functional differential equation;Functional derivative", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "/attachment/a99995ac8da405c99530f8d7d4670eef6390d319.zip", "author": "Taiki Miyagawa;Takeru Yokota", "authorids": "~Taiki_Miyagawa1;~Takeru_Yokota1", "gender": "M;M", "homepage": "https://sites.google.com/view/taiki-miyagawa-kanaheinousagi;https://ithems.riken.jp/ja/members/takeru-yokota", "dblp": "267/2393;", "google_scholar": "https://scholar.google.co.jp/citations?hl=ja;", "orcid": "0000-0001-7651-6706;", "linkedin": "https://jp.linkedin.com/in/taiki-miyagawa-67a562192;", "or_profile": "~Taiki_Miyagawa1;~Takeru_Yokota1", "aff": "NEC;RIKEN", "aff_domain": "nec.com;riken.jp", "position": "Researcher;Postdoc", "bibtex": "@inproceedings{\nmiyagawa2024physicsinformed,\ntitle={Physics-informed Neural Networks for Functional Differential Equations: Cylindrical Approximation and Its Convergence Guarantees},\nauthor={Taiki Miyagawa and Takeru Yokota},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=H5z0XqEX57}\n}", "github": "", "reviewers": "5Xf5;33cu;LXGL", "pdf_size": 19790816, "rating": "4;6;8", "confidence": "3;3;4", "soundness": "3;3;4", "novelty": "1;3;4", "presentation": "2;3;4", "wc_summary": "25;63;57", "wc_strengths": "51;40;53", "wc_weaknesses": "191;149;28", "wc_questions": "139;4;4", "wc_limitations": "2;2;12", "wc_review": "408;258;154", "wc_reply_reviewers": "374;72;0", "wc_reply_authors": "523;62;0", "reply_reviewers": "2;1;0", "reply_authors": "2;2;1", "rating_avg": [ 6.0, 1.632993161855452 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 1.247219128924647 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 48.333333333333336, 16.679994670929073 ], "wc_strengths_avg": [ 48.0, 5.715476066494082 ], "wc_weaknesses_avg": [ 122.66666666666667, 69.10057082896559 ], "wc_questions_avg": [ 49.0, 63.63961030678928 ], "wc_limitations_avg": [ 5.333333333333333, 4.714045207910317 ], "wc_review_avg": [ 273.3333333333333, 104.26035786540453 ], "wc_reply_reviewers_avg": [ 148.66666666666666, 162.02331793774485 ], "wc_reply_authors_avg": [ 195.0, 233.30809387303017 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:pSyLrgNw9HEJ:scholar.google.com/&scioq=Physics-informed+Neural+Networks+for+Functional+Differential+Equations:+Cylindrical+Approximation+and+Its+Convergence+Guarantees&hl=en&as_sdt=0,44", "gs_version_total": 3, "email": "nec.com;riken.jp", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "NEC Corporation;RIKEN", "aff_unique_dep": ";", "aff_unique_url": "https://www.nec.com;https://www.riken.jp", "aff_unique_abbr": "NEC;RIKEN", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Japan" }, { "title": "OmniTokenizer: A Joint Image-Video Tokenizer for Visual Generation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95856", "id": "H6C4p8Dir7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=H6C4p8Dir7", "openreview": "https://openreview.net/forum?id=H6C4p8Dir7", "poster": "/media/PosterPDFs/NeurIPS%202024/95856.png?t=1732103268.3283308", "project": "", "author_site": "Junke Wang, Yi Jiang, Zehuan Yuan, BINGYUE PENG, Zuxuan Wu, Yu-Gang Jiang", "tldr": "", "abstract": "Tokenizer, serving as a translator to map the intricate visual data into a compact latent space, lies at the core of visual generative models. Based on the finding that existing tokenizers are tailored to either image or video inputs, this paper presents OmniTokenizer, a transformer-based tokenizer for joint image and video tokenization. OmniTokenizer is designed with a spatial-temporal decoupled architecture, which integrates window attention and causal attention for spatial and temporal modeling, respectively. To exploit the complementary nature of image and video data, we further propose a progressive training strategy, where OmniTokenizer is first trained on image data on a fixed resolution to develop the spatial encoding capacity and then jointly trained on image and video data on multiple resolutions to learn the temporal dynamics. OmniTokenizer, for the first time, handles both image and video inputs within a unified framework and proves the possibility of realizing their synergy. Extensive experiments demonstrate that OmniTokenizer achieves state-of-the-art (SOTA) reconstruction performance on various image and video datasets, e.g., 1.11 reconstruction FID on ImageNet and 42 reconstruction FVD on UCF-101, beating the previous SOTA methods by 13% and 26%, respectively. Additionally, we also show that when integrated with OmniTokenizer, both language model-based approaches and diffusion models can realize advanced visual synthesis performance, underscoring the superiority and versatility of our method.", "keywords": "visual generation;tokenizer;language model", "primary_area": "generative_models", "supplementary_material": "", "author": "Junke Wang;Yi Jiang;Zehuan Yuan;BINGYUE PENG;Zuxuan Wu;Yu-Gang Jiang", "authorids": "~Junke_Wang1;~Yi_Jiang2;~Zehuan_Yuan1;~BINGYUE_PENG1;~Zuxuan_Wu1;~Yu-Gang_Jiang1", "gender": "M;M;M;M;M;M", "homepage": "http://www.wangjunke.info;https://enjoyyi.github.io/;https://shallowyuan.github.io/;https://www.linkedin.com/in/bingyp/;https://zxwu.azurewebsites.net/;https://fvl.fudan.edu.cn/people/yugangjiang/", "dblp": ";;227/3298;57/11335.html;150/8447;24/5818", "google_scholar": "9GAfNeUAAAAJ;https://scholar.google.com.hk/citations?user=6dikuoYAAAAJ;;;7t12hVkAAAAJ;f3_FP8AAAAAJ", "orcid": ";0000-0002-2133-8719;;;;", "linkedin": ";;;;;", "or_profile": "~Junke_Wang1;~Yi_Jiang2;~Zehuan_Yuan1;~BINGYUE_PENG1;~Zuxuan_Wu1;~Yu-Gang_Jiang1", "aff": "Fudan University;Bytedance;ByteDance Inc.;ByteDance Inc.;Fudan University;Fudan University", "aff_domain": "fudan.edu.cn;bytedance.com;bytedance.com;bytedance.com;fudan.edu;fudan.edu.cn", "position": "PhD student;Researcher;Researcher;Researcher;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nwang2024omnitokenizer,\ntitle={OmniTokenizer: A Joint Image-Video Tokenizer for Visual Generation},\nauthor={Junke Wang and Yi Jiang and Zehuan Yuan and BINGYUE PENG and Zuxuan Wu and Yu-Gang Jiang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=H6C4p8Dir7}\n}", "github": "", "reviewers": "Nxg3;ZuXX;hbMY;pXcq", "pdf_size": 6177462, "rating": "4;6;6;6", "confidence": "3;5;4;5", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "2;2;2;3", "wc_summary": "88;112;64;85", "wc_strengths": "51;62;119;56", "wc_weaknesses": "90;360;186;130", "wc_questions": "45;48;105;2", "wc_limitations": "1;1;16;1", "wc_review": "275;583;490;274", "wc_reply_reviewers": "28;63;86;0", "wc_reply_authors": "72;18;21;0", "reply_reviewers": "1;1;1;0", "reply_authors": "3;2;2;1", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 87.25, 17.020208576865326 ], "wc_strengths_avg": [ 72.0, 27.4135003237456 ], "wc_weaknesses_avg": [ 191.5, 103.08612903780993 ], "wc_questions_avg": [ 50.0, 36.5991803186902 ], "wc_limitations_avg": [ 4.75, 6.49519052838329 ], "wc_review_avg": [ 405.5, 135.06387377829796 ], "wc_reply_reviewers_avg": [ 44.25, 32.85098933061225 ], "wc_reply_authors_avg": [ 27.75, 26.78035660703569 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 37, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2269276233029201318&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "fudan.edu.cn;bytedance.com;bytedance.com;bytedance.com;fudan.edu;fudan.edu.cn", "author_num": 6, "aff_unique_index": "0;1;1;1;0;0", "aff_unique_norm": "Fudan University;ByteDance", "aff_unique_dep": ";", "aff_unique_url": "https://www.fudan.edu.cn;https://www.bytedance.com", "aff_unique_abbr": "Fudan;Bytedance", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Learning the Infinitesimal Generator of Stochastic Diffusion Processes", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95855", "id": "H7SaaqfCUi", "proceeding": "", "pdf": "https://openreview.net/pdf?id=H7SaaqfCUi", "openreview": "https://openreview.net/forum?id=H7SaaqfCUi", "poster": "", "project": "", "author_site": "Vladimir Kostic, H\u00e9l\u00e8ne Halconruy, Timoth\u00e9e Devergne, Karim Lounici, Massimiliano Pontil", "tldr": "", "abstract": "We address data-driven learning of the infinitesimal generator of stochastic diffusion processes, essential for understanding numerical simulations of natural and physical systems. The unbounded nature of the generator poses significant challenges, rendering conventional analysis techniques for Hilbert-Schmidt operators ineffective. To overcome this, we introduce a novel framework based on the energy functional for these stochastic processes. Our approach integrates physical priors through an energy-based risk metric in both full and partial knowledge settings. We evaluate the statistical performance of a reduced-rank estimator in reproducing kernel Hilbert spaces (RKHS) in the partial knowledge setting. Notably, our approach provides learning bounds independent of the state space dimension and ensures non-spurious spectral estimation. Additionally, we elucidate how the distortion between the intrinsic energy-induced metric of the stochastic diffusion and the RKHS metric used for generator estimation impacts the spectral learning bounds.", "keywords": "Stochastic Diffusion Processes;Infinitesimal Generator;RKHS;non-asymptotic learning bounds", "primary_area": "learning_theory", "supplementary_material": "", "author": "Vladimir R Kostic;H\u00e9l\u00e8ne Halconruy;Timoth\u00e9e Devergne;Karim Lounici;Massimiliano Pontil", "authorids": "~Vladimir_R_Kostic1;~H\u00e9l\u00e8ne_Halconruy1;~Timoth\u00e9e_Devergne1;~Karim_Lounici1;~Massimiliano_Pontil3", "gender": "M;F;M;;", "homepage": "https://vladi-iit.github.io/;https://sites.google.com/view/helene-halconruy-math;;;", "dblp": "94/879;;;;", "google_scholar": "66gV7SAAAAAJ;;;;", "orcid": ";;0000-0001-8369-237X;;", "linkedin": "vladimir-kostic-77500652/;;;;", "or_profile": "~Vladimir_R_Kostic1;~H\u00e9l\u00e8ne_Halconruy1;~Timoth\u00e9e_Devergne1;~Karim_Lounici1;~Massimiliano_Pontil3", "aff": "University of Novi Sad;Telecom SudParis;Universit\u00e0 degli Studi di Genova, Istituto Italiano di Tecnologia;;", "aff_domain": "uns.ac.rs;telecom-sudparis.eu;iit.it;;", "position": "Associate Professor;Assistant Professor;Postdoc;;", "bibtex": "@inproceedings{\nkostic2024learning,\ntitle={Learning the Infinitesimal Generator of Stochastic Diffusion Processes},\nauthor={Vladimir R Kostic and H{\\'e}l{\\`e}ne Halconruy and Timoth{\\'e}e Devergne and Karim Lounici and Massimiliano Pontil},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=H7SaaqfCUi}\n}", "github": "", "reviewers": "nFzp;6wyC;PUzn;D2P3", "pdf_size": 2158359, "rating": "5;6;7;8", "confidence": "3;2;3;4", "soundness": "3;3;3;4", "novelty": "3;2;3;3", "presentation": "4;3;3;4", "wc_summary": "61;156;90;41", "wc_strengths": "42;155;19;54", "wc_weaknesses": "82;385;88;75", "wc_questions": "171;2;17;270", "wc_limitations": "4;7;11;18", "wc_review": "360;705;225;458", "wc_reply_reviewers": "0;13;34;84", "wc_reply_authors": "0;40;40;44", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 87.0, 43.47988040461933 ], "wc_strengths_avg": [ 67.5, 52.06006146750117 ], "wc_weaknesses_avg": [ 157.5, 131.4277367985921 ], "wc_questions_avg": [ 115.0, 111.28117540716399 ], "wc_limitations_avg": [ 10.0, 5.244044240850758 ], "wc_review_avg": [ 437.0, 175.45512246725656 ], "wc_reply_reviewers_avg": [ 32.75, 31.979485611873123 ], "wc_reply_authors_avg": [ 31.0, 17.97220075561143 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.6324555320336758, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17961806278380268558&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "uns.ac.rs;telecom-sudparis.eu;iit.it;;", "author_num": 5, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Novi Sad;Telecom SudParis;Universit\u00e0 degli Studi di Genova", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uns.ac.rs;https://www.telecom-sudparis.eu;https://www.unige.it", "aff_unique_abbr": "UNS;TSP;UniGe", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2", "aff_country_unique": "Serbia;France;Italy" }, { "title": "Ultrafast classical phylogenetic method beats large protein language models on variant effect prediction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95854", "id": "H7mENkYB2J", "proceeding": "", "pdf": "https://openreview.net/pdf?id=H7mENkYB2J", "openreview": "https://openreview.net/forum?id=H7mENkYB2J", "poster": "/media/PosterPDFs/NeurIPS%202024/95854.png?t=1731735554.1649737", "project": "", "author_site": "Sebastian Prillo, Wilson Wu, Yun Song", "tldr": "", "abstract": "Amino acid substitution rate matrices are fundamental to statistical phylogenetics and evolutionary biology. Estimating them typically requires reconstructed trees for massive amounts of aligned proteins, which poses a major computational bottleneck. In this paper, we develop a near-linear time method to estimate these rate matrices from multiple sequence alignments (MSAs) alone, thereby speeding up computation by orders of magnitude. Our method relies on a near-linear time cherry reconstruction algorithm which we call FastCherries and it can be easily applied to MSAs with millions of sequences. On both simulated and real data, we demonstrate the speed and accuracy of our method as applied to the classical model of protein evolution. By leveraging the unprecedented scalability of our method, we develop a new, rich phylogenetic model called SiteRM, which can estimate a general site-specific rate matrix for each column of an MSA. Remarkably, in variant effect prediction for both clinical and deep mutational scanning data in ProteinGym, we show that despite being an independent-sites model, our SiteRM model outperforms large protein language models that learn complex residue-residue interactions between different sites. We attribute our increased performance to conceptual advances in our probabilistic treatment of evolutionary data and our ability to handle extremely large MSAs. We anticipate that our work will have a lasting impact across both statistical phylogenetics and computational variant effect prediction. FastCherries and SiteRM are implemented in the CherryML package https://github.com/songlab-cal/CherryML.", "keywords": "protein language models;evolution;phylogenetics;rate estimation;variant effect prediction", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Sebastian Prillo;Wilson Y. Wu;Yun S. Song", "authorids": "~Sebastian_Prillo1;~Wilson_Y._Wu1;~Yun_S._Song1", "gender": ";M;", "homepage": "https://sprillo.github.io/;;", "dblp": ";;", "google_scholar": "dvJiM_kAAAAJ;;", "orcid": ";;", "linkedin": "sebasti%C3%A1n-prillo-7b135816;uw-nosliw/;", "or_profile": "~Sebastian_Prillo1;~Wilson_Y._Wu1;~Yun_S._Song1", "aff": "Electrical Engineering & Computer Science Department, University of California, Berkeley;University of California, Berkeley;", "aff_domain": "eecs.berkeley.edu;berkeley.edu;", "position": "PhD student;MS student;", "bibtex": "@inproceedings{\nprillo2024ultrafast,\ntitle={Ultrafast classical phylogenetic method beats large protein language models on variant effect prediction},\nauthor={Sebastian Prillo and Wilson Y. Wu and Yun S. Song},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=H7mENkYB2J}\n}", "github": "", "reviewers": "BDPQ;f7u7;cZPC;qr3c", "pdf_size": 1184432, "rating": "5;6;7;7", "confidence": "4;3;5;4", "soundness": "2;4;4;3", "novelty": "2;3;3;2", "presentation": "3;3;4;2", "wc_summary": "248;61;78;48", "wc_strengths": "59;73;22;150", "wc_weaknesses": "178;85;45;61", "wc_questions": "47;2;49;68", "wc_limitations": "1;6;1;94", "wc_review": "533;227;195;421", "wc_reply_reviewers": "88;32;111;0", "wc_reply_authors": "128;0;60;0", "reply_reviewers": "1;1;2;0", "reply_authors": "2;1;2;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 108.75, 81.09677860433175 ], "wc_strengths_avg": [ 76.0, 46.61008474568567 ], "wc_weaknesses_avg": [ 92.25, 51.51395442013746 ], "wc_questions_avg": [ 41.5, 24.23324163210527 ], "wc_limitations_avg": [ 25.5, 39.60113634733226 ], "wc_review_avg": [ 344.0, 139.2300254973761 ], "wc_reply_reviewers_avg": [ 57.75, 44.01349224953639 ], "wc_reply_authors_avg": [ 47.0, 52.79204485526205 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.42640143271122083, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:JaSnvOuuU6MJ:scholar.google.com/&scioq=Ultrafast+classical+phylogenetic+method+beats+large+protein+language+models+on+variant+effect+prediction&hl=en&as_sdt=0,48", "gs_version_total": 0, "email": "eecs.berkeley.edu;berkeley.edu;", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "Electrical Engineering & Computer Science Department", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Achieving Linear Convergence with Parameter-Free Algorithms in Decentralized Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95853", "id": "H7qVZ0Zu8E", "proceeding": "", "pdf": "https://openreview.net/pdf?id=H7qVZ0Zu8E", "openreview": "https://openreview.net/forum?id=H7qVZ0Zu8E", "poster": "", "project": "", "author_site": "Ilya Kuruzov, Gesualdo Scutari, Alexander Gasnikov", "tldr": "", "abstract": "This paper addresses the minimization of the sum of strongly convex, smooth\nfunctions over a network of agents without a centralized server. Existing decentralized algorithms require knowledge of functions and network parameters, such as the Lipschitz constant of the global gradient and/or network connectivity, for\nhyperparameter tuning. Agents usually cannot access this information, leading\nto conservative selections and slow convergence or divergence. This paper introduces a decentralized algorithm that eliminates the need for specific parameter\ntuning. Our approach employs an operator splitting technique with a novel variable\nmetric, enabling a local backtracking line-search to adaptively select the stepsize\nwithout global information or extensive communications. This results in favorable\nconvergence guarantees and dependence on optimization and network parameters\ncompared to existing nonadaptive methods. Notably, our method is the first adaptive decentralized algorithm that achieves linear convergence for strongly convex,\nsmooth objectives. Preliminary numerical experiments support our theoretical\nfindings, demonstrating superior performance in convergence speed and scalability.", "keywords": "decentralized optimization;linear convergence;parameter free", "primary_area": "optimization", "supplementary_material": "/attachment/d45f4cd3097b4ae4c5f5e5bb0b42e1cab0898993.zip", "author": "Ilya Kuruzov;Gesualdo Scutari;Alexander Gasnikov", "authorids": "~Ilya_Kuruzov1;~Gesualdo_Scutari1;~Alexander_Gasnikov1", "gender": "M;M;M", "homepage": ";https://engineering.purdue.edu/~gscutari/;https://arxiv.org/search/?query=Gasnikov&searchtype=all&source=header", "dblp": ";;153/1930", "google_scholar": "5luBvM8AAAAJ;https://scholar.google.com/citations?hl=en;AmeE8qkAAAAJ", "orcid": "0000-0002-2715-5489;0000-0002-6453-6870;", "linkedin": ";;", "or_profile": "~Ilya_Kuruzov1;~Gesualdo_Scutari1;~Alexander_Vladimirovich_Gasnikov1", "aff": "Moscow Institute of Physics and Technology;Purdue University;Moscow Institute of Physics and Technology", "aff_domain": "mipt.edu;purdue.edu;mipt.ru", "position": "PhD student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nkuruzov2024achieving,\ntitle={Achieving Linear Convergence with Parameter-Free Algorithms in Decentralized Optimization},\nauthor={Ilya Kuruzov and Gesualdo Scutari and Alexander Gasnikov},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=H7qVZ0Zu8E}\n}", "github": "", "reviewers": "E8C7;ioM9;H817;aLXV", "pdf_size": 2130448, "rating": "5;5;6;7", "confidence": "2;3;4;4", "soundness": "3;2;1;4", "novelty": "3;2;1;3", "presentation": "2;1;2;3", "wc_summary": "33;23;40;66", "wc_strengths": "43;17;29;50", "wc_weaknesses": "56;246;43;96", "wc_questions": "2;4;1;31", "wc_limitations": "1;4;1;1", "wc_review": "135;294;114;244", "wc_reply_reviewers": "15;91;153;10", "wc_reply_authors": "0;150;947;0", "reply_reviewers": "1;1;2;1", "reply_authors": "1;2;3;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.5, 1.118033988749895 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 40.5, 15.913830462839549 ], "wc_strengths_avg": [ 34.75, 12.735285626950029 ], "wc_weaknesses_avg": [ 110.25, 80.77244270170365 ], "wc_questions_avg": [ 9.5, 12.459935794377111 ], "wc_limitations_avg": [ 1.75, 1.299038105676658 ], "wc_review_avg": [ 196.75, 74.75083611572515 ], "wc_reply_reviewers_avg": [ 67.25, 59.00158896165424 ], "wc_reply_authors_avg": [ 274.25, 393.2101060501879 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8181818181818182, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:zU4czCVRiSIJ:scholar.google.com/&scioq=Achieving+Linear+Convergence+with+Parameter-Free+Algorithms+in+Decentralized+Optimization&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "mipt.edu;purdue.edu;mipt.ru", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Moscow Institute of Physics and Technology;Purdue University", "aff_unique_dep": ";", "aff_unique_url": "https://www.mipt.ru/en;https://www.purdue.edu", "aff_unique_abbr": "MIPT;Purdue", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Russian Federation;United States" }, { "title": "ESPACE: Dimensionality Reduction of Activations for Model Compression", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95852", "id": "HAcaANQNMK", "proceeding": "", "pdf": "https://openreview.net/pdf?id=HAcaANQNMK", "openreview": "https://openreview.net/forum?id=HAcaANQNMK", "poster": "/media/PosterPDFs/NeurIPS%202024/95852.png?t=1731971976.206134", "project": "", "author_site": "Charbel Sakr, Brucek Khailany", "tldr": "", "abstract": "We propose ESPACE, an LLM compression technique based on dimensionality reduction of activations. Unlike prior works on weight-centric tensor decomposition, ESPACE projects activations onto a pre-calibrated set of principal components. The activation-centrality of the approach enables retraining LLMs with no loss of expressivity; while at inference, weight decomposition is obtained as a byproduct of matrix multiplication associativity. Theoretical results on the construction of projection matrices with optimal computational accuracy are provided. Experimentally, we find ESPACE enables 50% compression of GPT3, Llama2, and Nemotron4 models with small accuracy degradation, as low as a 0.18 perplexity increase on GPT3-22B. At lower compression rates of 20% to 40%, ESPACE drives GPT3 models to outperforming their baseline, by up to a 0.38 decrease in perplexity for GPT3-8B. ESPACE also reduces GEMM execution time and prefill inference latency on existing hardware. Comparison with related works on compressing Llama2-7B via matrix factorization shows that ESPACE is a first step in advancing the state-of-the-art in tensor decomposition compression of LLMs.", "keywords": "Activation tensor decomposition;model compression;matrix factorization", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Charbel Sakr;Brucek Khailany", "authorids": "~Charbel_Sakr1;~Brucek_Khailany1", "gender": "M;M", "homepage": "https://sakr2.web.engr.illinois.edu;", "dblp": "99/402;", "google_scholar": "Ks1WOEUAAAAJ;c4-bwRcAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Charbel_Sakr1;~Brucek_Khailany1", "aff": "NVIDIA;NVIDIA", "aff_domain": "nvidia.com;nvidia.com", "position": "Researcher;Researcher", "bibtex": "@inproceedings{\nsakr2024espace,\ntitle={{ESPACE}: Dimensionality Reduction of Activations for Model Compression},\nauthor={Charbel Sakr and Brucek Khailany},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=HAcaANQNMK}\n}", "github": "", "reviewers": "ndkj;yYTB;1FAL;maTu", "pdf_size": 2013030, "rating": "5;5;6;6", "confidence": "4;4;4;4", "soundness": "3;1;3;3", "novelty": "3;3;2;3", "presentation": "3;2;3;3", "wc_summary": "64;25;15;59", "wc_strengths": "77;14;59;96", "wc_weaknesses": "170;145;109;114", "wc_questions": "95;4;45;112", "wc_limitations": "26;5;5;39", "wc_review": "432;193;233;420", "wc_reply_reviewers": "18;66;25;0", "wc_reply_authors": "40;57;39;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 40.75, 21.123150806638673 ], "wc_strengths_avg": [ 61.5, 30.385029208476993 ], "wc_weaknesses_avg": [ 134.5, 24.70323865407125 ], "wc_questions_avg": [ 64.0, 42.50294107470682 ], "wc_limitations_avg": [ 18.75, 14.49784466739798 ], "wc_review_avg": [ 319.5, 107.51860304152021 ], "wc_reply_reviewers_avg": [ 27.25, 24.159625411003375 ], "wc_reply_authors_avg": [ 34.0, 20.89258241577618 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9753096587473795970&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "nvidia.com;nvidia.com", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "NVIDIA", "aff_unique_dep": "NVIDIA Corporation", "aff_unique_url": "https://www.nvidia.com", "aff_unique_abbr": "NVIDIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "DOGS: Distributed-Oriented Gaussian Splatting for Large-Scale 3D Reconstruction Via Gaussian Consensus", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95851", "id": "HAocQ9dSAX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=HAocQ9dSAX", "openreview": "https://openreview.net/forum?id=HAocQ9dSAX", "poster": "/media/PosterPDFs/NeurIPS%202024/95851.png?t=1731382430.322351", "project": "", "author_site": "Yu Chen, Gim Hee Lee", "tldr": "", "abstract": "The recent advances in 3D Gaussian Splatting (3DGS) show promising results on the novel view synthesis (NVS) task. With its superior rendering performance and high-fidelity rendering quality, 3DGS is excelling at its previous NeRF counterparts. The most recent 3DGS method focuses either on improving the instability of rendering efficiency or reducing the model size. On the other hand, the training efficiency of 3DGS on large-scale scenes has not gained much attention. In this work, we propose DoGaussian, a method that trains 3DGS distributedly. Our method first decomposes a scene into $K$ blocks and then introduces the Alternating Direction Method of Multipliers (ADMM) into the training procedure of 3DGS. During training, our DoGaussian maintains one global 3DGS model on the master node and $K$ local 3DGS models on the slave nodes. The $K$ local 3DGS models are dropped after training and we only query the global 3DGS model during inference. The training time is reduced by scene decomposition, and the training convergence and stability are guaranteed through the consensus on the shared 3D Gaussians. Our method accelerates the training of 3DGS by $6+$ times when evaluated on large-scale scenes while concurrently achieving state-of-the-art rendering quality. Our code is publicly available at [https://github.com/AIBluefisher/DOGS](https://github.com/AIBluefisher/DOGS).", "keywords": "Gaussian Splatting;distributed training;3D Reconstruction;Novel View Synthesis", "primary_area": "machine_vision", "supplementary_material": "/attachment/bdf78bdcf1b657a22d54b51df879ceaa3ad84ac6.zip", "author": "Yu Chen;Gim Hee Lee", "authorids": "~Yu_Chen20;~Gim_Hee_Lee1", "gender": "M;", "homepage": "https://aibluefisher.github.io;https://www.comp.nus.edu.sg/~leegh/", "dblp": ";49/9455", "google_scholar": "https://scholar.google.com.sg/citations?user=eY18dlMAAAAJ;https://scholar.google.com.sg/citations?user=7hNKrPsAAAAJ", "orcid": "0009-0007-0127-2146;0000-0002-1583-0475", "linkedin": ";", "or_profile": "~Yu_Chen20;~Gim_Hee_Lee1", "aff": "National University of Singapore;National University of Singapore", "aff_domain": "nus.edu;nus.edu.sg", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nchen2024dogs,\ntitle={{DOGS}: Distributed-Oriented Gaussian Splatting for Large-Scale 3D Reconstruction Via Gaussian Consensus},\nauthor={Yu Chen and Gim Hee Lee},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=HAocQ9dSAX}\n}", "github": "", "reviewers": "eg7b;tFmz;zf9w;Y2Rh", "pdf_size": 21660733, "rating": "6;6;6;6", "confidence": "5;3;5;2", "soundness": "4;3;3;3", "novelty": "3;3;3;3", "presentation": "4;3;3;3", "wc_summary": "80;51;36;80", "wc_strengths": "55;30;56;108", "wc_weaknesses": "218;114;143;55", "wc_questions": "70;41;1;8", "wc_limitations": "9;6;1;1", "wc_review": "432;242;237;252", "wc_reply_reviewers": "17;43;9;10", "wc_reply_authors": "27;27;23;29", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.75, 1.299038105676658 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 61.75, 19.00493356999703 ], "wc_strengths_avg": [ 62.25, 28.39344114403888 ], "wc_weaknesses_avg": [ 132.5, 58.670691149840735 ], "wc_questions_avg": [ 30.0, 27.595289453093258 ], "wc_limitations_avg": [ 4.25, 3.418698582794336 ], "wc_review_avg": [ 290.75, 81.72935519138763 ], "wc_reply_reviewers_avg": [ 19.75, 13.77270852083932 ], "wc_reply_authors_avg": [ 26.5, 2.179449471770337 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17770242696086723833&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "nus.edu;nus.edu.sg", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "National University of Singapore", "aff_unique_dep": "", "aff_unique_url": "https://www.nus.edu.sg", "aff_unique_abbr": "NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Singapore" }, { "title": "PertEval: Unveiling Real Knowledge Capacity of LLMs with Knowledge-Invariant Perturbations", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97773", "id": "HB5q6pC5eb", "proceeding": "", "pdf": "https://openreview.net/pdf?id=HB5q6pC5eb", "openreview": "https://openreview.net/forum?id=HB5q6pC5eb", "poster": "/media/PosterPDFs/NeurIPS%202024/97773.png?t=1732794445.2246175", "project": "", "author_site": "Jiatong Li, Renjun Hu, Kunzhe Huang, Yan Zhuang, Qi Liu, Mengxiao Zhu, Xing Shi", "tldr": "", "abstract": "Expert-designed close-ended benchmarks are indispensable in assessing the knowledge capacity of large language models (LLMs). Despite their widespread use, concerns have mounted regarding their reliability due to limited test scenarios and an unavoidable risk of data contamination. To rectify this, we present PertEval, a toolkit devised for in-depth probing of LLMs' knowledge capacity through **knowledge-invariant perturbations**. These perturbations employ human-like restatement techniques to generate on-the-fly test samples from static benchmarks, meticulously retaining knowledge-critical content while altering irrelevant details. Our toolkit further includes a suite of **response consistency analyses** that compare performance on raw vs. perturbed test sets to precisely assess LLMs' genuine knowledge capacity. Six representative LLMs are re-evaluated using PertEval. Results reveal significantly inflated performance of the LLMs on raw benchmarks, including an absolute 25.8% overestimation for GPT-4. Additionally, through a nuanced response pattern analysis, we discover that PertEval retains LLMs' uncertainty to specious knowledge, and reveals their potential rote memorization to correct options which leads to overestimated performance. We also find that the detailed response consistency analyses by PertEval could illuminate various weaknesses in existing LLMs' knowledge mastery and guide the development of refinement. Our findings provide insights for advancing more robust and genuinely knowledgeable LLMs. Our code is available at https://github.com/aigc-apps/PertEval.", "keywords": "large language model evaluation;knowledge capacity;perturbation;text attack;trustworthy AI", "primary_area": "", "supplementary_material": "", "author": "Jiatong Li;Renjun Hu;Kunzhe Huang;Yan Zhuang;Qi Liu;Mengxiao Zhu;Xing Shi;Wei Lin", "authorids": "~Jiatong_Li5;~Renjun_Hu1;~Kunzhe_Huang1;~Yan_Zhuang4;~Qi_Liu3;~Mengxiao_Zhu1;~Xing_Shi2;~Wei_Lin5", "gender": "M;M;;M;M;F;M;M", "homepage": "https://cslijt.github.io;https://hurenjun.github.io/;;http://home.ustc.edu.cn/~zykb/;http://staff.ustc.edu.cn/~qiliuql/;http://staff.ustc.edu.cn/~mxzhu/;https://www.linkedin.cn/incareer/in/ACoAAAYCccoBA1UvKKQto4LKLb-BDvD9FS1Vhpo;", "dblp": "19/11348-2;175/4820;;;95/2446-3;;;", "google_scholar": "https://scholar.google.com/citations?hl=en;H3jomREAAAAJ;;7MX_P5cAAAAJ;5EoHAFwAAAAJ;;;LXSkrXkAAAAJ", "orcid": "0009-0000-8877-6927;0000-0002-1094-6890;;0000-0001-7351-377X;0000-0001-6956-5550;;;", "linkedin": ";;;;;;;", "or_profile": "~Jiatong_Li5;~Renjun_Hu1;~Kunzhe_Huang1;~Yan_Zhuang4;~Qi_Liu3;~Mengxiao_Zhu1;~Xing_Shi2;~Wei_Lin5", "aff": "University of Science and Technology of China;Alibaba Group;;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;;", "aff_domain": "ustc.edu.cn;alibaba-inc.com;;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;;", "position": "MS student;Researcher;;PhD student;Full Professor;Full Professor;;", "bibtex": "@inproceedings{\nli2024perteval,\ntitle={PertEval: Unveiling Real Knowledge Capacity of {LLM}s with Knowledge-Invariant Perturbations},\nauthor={Jiatong Li and Renjun Hu and Kunzhe Huang and Yan Zhuang and Qi Liu and Mengxiao Zhu and Xing Shi and Wei Lin},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=HB5q6pC5eb}\n}", "github": "", "reviewers": "XdWc;SFLD;GSDJ", "pdf_size": 542727, "rating": "6;6;8", "confidence": "3;3;4", "wc_summary_and_contributions": "51;37;219", "wc_strengths": "166;61;56", "wc_improvement": "60;141;310", "wc_limitations": "5;13;90", "wc_correctness": "19;27;87", "wc_clarity": "37;10;200", "wc_relation_to_prior_work": "1;11;225", "wc_documentation": "4;8;50", "wc_additional_feedback": "1;1;1", "wc_review": "344;309;1238", "wc_reply_reviewers": "0;0;232", "wc_reply_authors": "55;55;409", "reply_reviewers": "0;0;2", "reply_authors": "2;2;3", "rating_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 102.33333333333333, 82.6935440177903 ], "wc_strengths_avg": [ 94.33333333333333, 50.71708018234313 ], "wc_improvement_avg": [ 170.33333333333334, 104.14839839809774 ], "wc_limitations_avg": [ 36.0, 38.32318706301274 ], "wc_correctness_avg": [ 44.333333333333336, 30.346151137976115 ], "wc_clarity_avg": [ 82.33333333333333, 83.92986490053597 ], "wc_relation_to_prior_work_avg": [ 79.0, 103.31827847320466 ], "wc_documentation_avg": [ 20.666666666666668, 20.805982045769646 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 630.3333333333334, 429.9227320758413 ], "wc_reply_reviewers_avg": [ 77.33333333333333, 109.36584882351936 ], "wc_reply_authors_avg": [ 173.0, 166.87720036002523 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.9428090415820634 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.9999999999999998, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=842877002890733755&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "ustc.edu.cn;alibaba-inc.com;;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;;", "author_num": 8, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "University of Science and Technology of China;Alibaba Group", "aff_unique_dep": ";", "aff_unique_url": "http://www.ustc.edu.cn;https://www.alibaba.com", "aff_unique_abbr": "USTC;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Animate3D: Animating Any 3D Model with Multi-view Video Diffusion", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95850", "id": "HB6KaCFiMN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=HB6KaCFiMN", "openreview": "https://openreview.net/forum?id=HB6KaCFiMN", "poster": "/media/PosterPDFs/NeurIPS%202024/95850.png?t=1731675132.1251125", "project": "", "author_site": "Yanqin Jiang, Chaohui Yu, Chenjie Cao, Fan Wang, Weiming Hu, Jin Gao", "tldr": "", "abstract": "Recent advances in 4D generation mainly focus on generating 4D content by distilling pre-trained text or single-view image conditioned models. It is inconvenient for them to take advantage of various off-the-shelf 3D assets with multi-view attributes, and their results suffer from spatiotemporal inconsistency owing to the inherent ambiguity in the supervision signals. In this work, we present Animate3D, a novel framework for animating any static 3D model. The core idea is two-fold: 1) We propose a novel multi-view video diffusion model (MV-VDM) conditioned on multi-view renderings of the static 3D object, which is trained on our presented large-scale multi-view video dataset (MV-Video). 2) Based on MV-VDM, we introduce a framework combining reconstruction and 4D Score Distillation Sampling (4D-SDS) to leverage the multi-view video diffusion priors for animating 3D objects. Specifically, for MV-VDM, we design a new spatiotemporal attention module to enhance spatial and temporal consistency by integrating 3D and video diffusion models. Additionally, we leverage the static 3D model\u2019s multi-view renderings as conditions to preserve its identity. For animating 3D models, an effective two-stage pipeline is proposed: we first reconstruct coarse motions directly from generated multi-view videos, followed by the introduced 4D-SDS to model fine-level motions. Benefiting from accurate motion learning, we could achieve straightforward mesh animation. Qualitative and quantitative experiments demonstrate that Animate3D significantly outperforms previous approaches. Data, code, and models are open-released.", "keywords": "4D generation;Multi-view video diffusion model", "primary_area": "generative_models", "supplementary_material": "/attachment/417ce9e3aa40886fe6342d6729ff35bee4556ecf.zip", "author": "Yanqin Jiang;Chaohui Yu;Chenjie Cao;Fan Wang;Weiming Hu;Jin Gao", "authorids": "~Yanqin_Jiang1;~Chaohui_Yu1;~Chenjie_Cao1;~Fan_Wang6;~Weiming_Hu1;~Jin_Gao1", "gender": "F;M;M;F;M;M", "homepage": "https://anonymous1.com;https://richardych.github.io/;https://ewrfcas.github.io/;;http://weiminghu.people-ai.net/;https://people.ucas.edu.cn/~jgao?language=en", "dblp": "323/4685;14/10377;https://dblp.uni-trier.de/pid/193/0823;;;", "google_scholar": ";b1Q-k20AAAAJ;1INK-I0AAAAJ;WCRGTHsAAAAJ;;W1o3B-0AAAAJ", "orcid": ";0000-0002-7852-4491;;0000-0001-7320-1119;0000-0001-9237-8825;", "linkedin": ";;;;;", "or_profile": "~Yanqin_Jiang1;~Chaohui_Yu1;~Chenjie_Cao1;~Fan_Wang6;~Weiming_Hu1;~Jin_Gao1", "aff": "University of Chinese Academy of Sciences;Alibaba Group;Fudan University;Alibaba Group;Institute of automation, Chinese academy of science;Institute of automation, Chinese Academy of Sciences", "aff_domain": "ucas.ac.cn;alibaba-inc.com;fudan.edu.cn;alibaba-inc.com;nlpr.ia.ac.cn;ia.ac.cn", "position": "PhD student;Researcher;PhD student;Senior Staff Algorithm Engineer;Full Professor;Associate Professor", "bibtex": "@inproceedings{\njiang2024animated,\ntitle={Animate3D: Animating Any 3D Model with Multi-view Video Diffusion},\nauthor={Yanqin Jiang and Chaohui Yu and Chenjie Cao and Fan Wang and Weiming Hu and Jin Gao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=HB6KaCFiMN}\n}", "github": "", "reviewers": "G4mL;9MgC;Tnsn;2Yu3;C8Es", "pdf_size": 14742014, "rating": "5;5;5;6;6", "confidence": "4;5;3;4;4", "soundness": "3;3;2;2;4", "novelty": "3;3;2;3;4", "presentation": "2;2;2;3;3", "wc_summary": "159;84;70;73;94", "wc_strengths": "184;34;59;74;87", "wc_weaknesses": "508;362;132;172;144", "wc_questions": "294;6;158;37;33", "wc_limitations": "7;11;6;15;11", "wc_review": "1152;497;425;371;369", "wc_reply_reviewers": "80;14;14;15;27", "wc_reply_authors": "350;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "3;1;1;1;1", "rating_avg": [ 5.4, 0.48989794855663565 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 96.0, 32.62514367784455 ], "wc_strengths_avg": [ 87.6, 51.3209508875274 ], "wc_weaknesses_avg": [ 263.6, 147.93728400913676 ], "wc_questions_avg": [ 105.6, 107.83060789961263 ], "wc_limitations_avg": [ 10.0, 3.22490309931942 ], "wc_review_avg": [ 562.8, 298.2686037785405 ], "wc_reply_reviewers_avg": [ 30.0, 25.479403446705735 ], "wc_reply_authors_avg": [ 70.0, 140.0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.4, 0.8000000000000002 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12178418020793646779&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "ucas.ac.cn;alibaba-inc.com;fudan.edu.cn;alibaba-inc.com;nlpr.ia.ac.cn;ia.ac.cn", "author_num": 6, "aff_unique_index": "0;1;2;1;3;3", "aff_unique_norm": "University of Chinese Academy of Sciences;Alibaba Group;Fudan University;Chinese Academy of Sciences", "aff_unique_dep": ";;;Institute of Automation", "aff_unique_url": "http://www.ucas.ac.cn;https://www.alibaba.com;https://www.fudan.edu.cn;http://www.ia.cas.cn", "aff_unique_abbr": "UCAS;Alibaba;Fudan;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "The Importance of Online Data: Understanding Preference Fine-tuning via Coverage", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95849", "id": "HBj86RMdZ8", "proceeding": "", "pdf": "https://openreview.net/pdf?id=HBj86RMdZ8", "openreview": "https://openreview.net/forum?id=HBj86RMdZ8", "poster": "", "project": "", "author_site": "Yuda Song, Gokul Swamy, Aarti Singh, J. Bagnell, Wen Sun", "tldr": "", "abstract": "Learning from human preference data has emerged as the dominant paradigm for fine-tuning large language models (LLMs). The two most common families of techniques -- online reinforcement learning (RL) such as Proximal Policy Optimization (PPO) and offline contrastive methods such as Direct Preference Optimization (DPO) -- were positioned as equivalent in prior work due to the fact that both have to start from the same offline preference dataset. To further expand our theoretical understanding of the similarities and differences between online and offline techniques for preference fine-tuning, we conduct a rigorous analysis through the lens of *dataset coverage*, a concept that captures how the training data covers the test distribution and is widely used in RL. We prove that a global coverage condition is both necessary and sufficient for offline contrastive methods to converge to the optimal policy, but a weaker partial coverage condition suffices for online RL methods. This separation provides one explanation of why online RL methods can perform better than offline methods, especially when the offline preference data is not diverse enough. Finally, motivated by our preceding theoretical observations, we derive a hybrid preference optimization (HyPO) algorithm that uses offline data for contrastive-based preference optimization and online unlabeled data for KL regularization. Theoretically and empirically, we demonstrate that HyPO is more performant than its pure offline counterpart DPO, while still preserving its computation and memory efficiency.", "keywords": "Reinforcement Learning from Human Feedback;LLM finetuning", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/ab19e5c6f79244d3a83cf5d0238b904a29ca7ee5.zip", "author": "Yuda Song;Gokul Swamy;Aarti Singh;Drew Bagnell;Wen Sun", "authorids": "~Yuda_Song2;~Gokul_Swamy1;~Aarti_Singh2;~Drew_Bagnell2;~Wen_Sun1", "gender": "M;;;;F", "homepage": "https://yudasong.github.io/;https://gokul.dev/;https://robotwhisperer.org/;https://wensun.github.io;https://www.cs.cmu.edu/~aarti", "dblp": "250/4880-1;31/11509;;;64/5328", "google_scholar": "0QDCG8IAAAAJ;Sbpra_AAAAAJ;7t4jbPQAAAAJ;iOLC30YAAAAJ;vGBcNVAAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Yuda_Song2;~Gokul_Swamy1;~Drew_Bagnell2;~Wen_Sun1;~Aarti_Singh1", "aff": "Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Cornell University;University of Wisconsin - Madison", "aff_domain": "andrew.cmu.edu;cmu.edu;cmu.edu;cornell.edu;wisc.edu", "position": "PhD student;PhD student;Associate Professor;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nsong2024the,\ntitle={The Importance of Online Data: Understanding Preference Fine-tuning via Coverage},\nauthor={Yuda Song and Gokul Swamy and Aarti Singh and Drew Bagnell and Wen Sun},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=HBj86RMdZ8}\n}", "github": "", "reviewers": "MDLu;yufq;ewpX;YCrR", "pdf_size": 527905, "rating": "6;6;6;7", "confidence": "4;3;3;4", "soundness": "3;3;3;4", "novelty": "3;3;2;3", "presentation": "3;3;3;4", "wc_summary": "116;113;135;137", "wc_strengths": "242;53;56;53", "wc_weaknesses": "246;38;90;107", "wc_questions": "3;25;4;26", "wc_limitations": "1;69;9;116", "wc_review": "608;298;294;439", "wc_reply_reviewers": "184;0;0;32", "wc_reply_authors": "185;0;0;0", "reply_reviewers": "1;0;0;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 125.25, 10.825317547305483 ], "wc_strengths_avg": [ 101.0, 81.41560047067146 ], "wc_weaknesses_avg": [ 120.25, 76.92325721132718 ], "wc_questions_avg": [ 14.5, 11.01135777277262 ], "wc_limitations_avg": [ 48.75, 46.884832302142236 ], "wc_review_avg": [ 409.75, 128.49586569224707 ], "wc_reply_reviewers_avg": [ 54.0, 76.18398781896364 ], "wc_reply_authors_avg": [ 46.25, 80.10734985006057 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12968716869602037183&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "andrew.cmu.edu;cmu.edu;cmu.edu;cornell.edu;wisc.edu", "author_num": 5, "aff_unique_index": "0;0;0;1;2", "aff_unique_norm": "Carnegie Mellon University;Cornell University;University of Wisconsin-Madison", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cmu.edu;https://www.cornell.edu;https://www.wisc.edu", "aff_unique_abbr": "CMU;Cornell;UW-Madison", "aff_campus_unique_index": "1", "aff_campus_unique": ";Madison", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Adaptive Exploration for Data-Efficient General Value Function Evaluations", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95848", "id": "HC6iqpPt3L", "proceeding": "", "pdf": "https://openreview.net/pdf?id=HC6iqpPt3L", "openreview": "https://openreview.net/forum?id=HC6iqpPt3L", "poster": "/media/PosterPDFs/NeurIPS%202024/95848.png?t=1733567092.7141411", "project": "", "author_site": "Arushi Jain, Josiah Hanna, Doina Precup", "tldr": "", "abstract": "General Value Functions (GVFs) (Sutton et al., 2011) represent predictive knowledge in reinforcement learning. Each GVF computes the expected return for a given policy, based on a unique reward. Existing methods relying on fixed behavior policies or pre-collected data often face data efficiency issues when learning multiple GVFs in parallel using off-policy methods. To address this, we introduce *GVFExplorer*, which adaptively learns a single behavior policy that efficiently collects data for evaluating multiple GVFs in parallel. Our method optimizes the behavior policy by minimizing the total variance in return across GVFs, thereby reducing the required environmental interactions. We use an existing temporal-difference-style variance estimator to approximate the return variance. We prove that each behavior policy update decreases the overall mean squared error in GVF predictions. We empirically show our method's performance in tabular and nonlinear function approximation settings, including Mujoco environments, with stationary and non-stationary reward signals, optimizing data usage and reducing prediction errors across multiple GVFs.", "keywords": "general value functions;GVFs;multiple policy evaluations;exploration for GVFs;variance-minimization;reinforcement learning", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Arushi Jain;Josiah P. Hanna;Doina Precup", "authorids": "~Arushi_Jain2;~Josiah_P._Hanna1;~Doina_Precup1", "gender": "F;F;M", "homepage": "https://arushijain94.github.io/;http://cs.mcgill.ca/~dprecup/;https://pages.cs.wisc.edu/~jphanna/", "dblp": ";p/DoinaPrecup;135/6336", "google_scholar": "https://scholar.google.ca/citations?user=fELsicAAAAAJ;https://scholar.google.com.tw/citations?user=j54VcVEAAAAJ;", "orcid": "my-orcid?orcid=0000-0002-3081-0421;;", "linkedin": "arushi-jain-mcgill/;;", "or_profile": "~Arushi_Jain2;~Doina_Precup1;~Josiah_Hanna2", "aff": "McGill University, McGill University;McGill University;University of Wisconsin - Madison", "aff_domain": "mail.mcgill.ca;mcgill.ca;wisc.edu", "position": "PhD student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\njain2024adaptive,\ntitle={Adaptive Exploration for Data-Efficient General Value Function Evaluations},\nauthor={Arushi Jain and Josiah P. Hanna and Doina Precup},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=HC6iqpPt3L}\n}", "github": "", "reviewers": "3wne;pomH;A7c7", "pdf_size": 2119282, "rating": "6;6;7", "confidence": "4;4;4", "soundness": "3;4;4", "novelty": "2;3;3", "presentation": "3;4;3", "wc_summary": "89;56;69", "wc_strengths": "41;34;79", "wc_weaknesses": "58;89;105", "wc_questions": "170;166;4", "wc_limitations": "6;62;1", "wc_review": "364;407;258", "wc_reply_reviewers": "8;0;23", "wc_reply_authors": "9;457;0", "reply_reviewers": "1;0;1", "reply_authors": "2;1;1", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 71.33333333333333, 13.572848714334887 ], "wc_strengths_avg": [ 51.333333333333336, 19.770910168449223 ], "wc_weaknesses_avg": [ 84.0, 19.510680835549195 ], "wc_questions_avg": [ 113.33333333333333, 77.32758599332813 ], "wc_limitations_avg": [ 23.0, 27.65260686927485 ], "wc_review_avg": [ 343.0, 62.61522711502903 ], "wc_reply_reviewers_avg": [ 10.333333333333334, 9.533566430716727 ], "wc_reply_authors_avg": [ 155.33333333333334, 213.34218731626635 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2293453768210899163&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "mail.mcgill.ca;mcgill.ca;wisc.edu", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "McGill University;University of Wisconsin-Madison", "aff_unique_dep": ";", "aff_unique_url": "https://www.mcgill.ca;https://www.wisc.edu", "aff_unique_abbr": "McGill;UW-Madison", "aff_campus_unique_index": "1", "aff_campus_unique": ";Madison", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Canada;United States" }, { "title": "Enhancing Robustness in Deep Reinforcement Learning: A Lyapunov Exponent Approach", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95847", "id": "HCTikT7LS4", "proceeding": "", "pdf": "https://openreview.net/pdf?id=HCTikT7LS4", "openreview": "https://openreview.net/forum?id=HCTikT7LS4", "poster": "/media/PosterPDFs/NeurIPS%202024/95847.png?t=1732638247.4161093", "project": "", "author_site": "Rory Young, Nicolas Pugeault", "tldr": "", "abstract": "Deep reinforcement learning agents achieve state-of-the-art performance in a wide range of simulated control tasks. However, successful applications to real-world problems remain limited. One reason for this dichotomy is because the learnt policies are not robust to observation noise or adversarial attacks. In this paper, we investigate the robustness of deep RL policies to a single small state perturbation in deterministic continuous control tasks. We demonstrate that RL policies can be deterministically chaotic, as small perturbations to the system state have a large impact on subsequent state and reward trajectories. This unstable non-linear behaviour has two consequences: first, inaccuracies in sensor readings, or adversarial attacks, can cause significant performance degradation; second, even policies that show robust performance in terms of rewards may have unpredictable behaviour in practice. These two facets of chaos in RL policies drastically restrict the application of deep RL to real-world problems. To address this issue, we propose an improvement on the successful Dreamer V3 architecture, implementing Maximal Lyapunov Exponent regularisation. This new approach reduces the chaotic state dynamics, rendering the learnt policies more resilient to sensor noise or adversarial attacks and thereby improving the suitability of deep reinforcement learning for real-world applications.", "keywords": "Reinforcement Learning;Robust Reinforcement Learning;Stable Reinforcement Learning;Lyapunov Exponents;Chaos Theory", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/da1b6b8f8d7396c0ee97a429a1ee42ca910aa9a3.zip", "author": "Rory Young;Nicolas Pugeault", "authorids": "~Rory_Young1;~Nicolas_Pugeault1", "gender": "M;M", "homepage": ";http://pugeault.online.fr/", "dblp": "319/6058;35/1348", "google_scholar": ";https://scholar.google.co.uk/citations?user=O9mKQacAAAAJ", "orcid": ";0000-0002-3455-6280", "linkedin": "rory-young-90a09aa1/;nicolas-pugeault-6b01b25/", "or_profile": "~Rory_Young1;~Nicolas_Pugeault2", "aff": "University of Glasgow;University of Glasgow", "aff_domain": "gla.ac.uk;glasgow.ac.uk", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nyoung2024enhancing,\ntitle={Enhancing Robustness in Deep Reinforcement Learning: A Lyapunov Exponent Approach},\nauthor={Rory Young and Nicolas Pugeault},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=HCTikT7LS4}\n}", "github": "", "reviewers": "pxFr;TDUf;2W37", "pdf_size": 1101513, "rating": "6;6;7", "confidence": "4;4;4", "soundness": "3;3;4", "novelty": "2;3;3", "presentation": "2;3;3", "wc_summary": "39;69;25", "wc_strengths": "26;52;52", "wc_weaknesses": "105;49;143", "wc_questions": "69;27;249", "wc_limitations": "9;6;8", "wc_review": "248;203;477", "wc_reply_reviewers": "45;21;36", "wc_reply_authors": "54;0;0", "reply_reviewers": "1;1;1", "reply_authors": "2;1;1", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 44.333333333333336, 18.354533197248273 ], "wc_strengths_avg": [ 43.333333333333336, 12.256517540566824 ], "wc_weaknesses_avg": [ 99.0, 38.60915262818736 ], "wc_questions_avg": [ 115.0, 96.29122493768578 ], "wc_limitations_avg": [ 7.666666666666667, 1.247219128924647 ], "wc_review_avg": [ 309.3333333333333, 119.9731451432176 ], "wc_reply_reviewers_avg": [ 34.0, 9.899494936611665 ], "wc_reply_authors_avg": [ 18.0, 25.45584412271571 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:5wK95EKHVW8J:scholar.google.com/&scioq=Enhancing+Robustness+in+Deep+Reinforcement+Learning:+A+Lyapunov+Exponent+Approach&hl=en&as_sdt=0,5", "gs_version_total": 5, "email": "gla.ac.uk;glasgow.ac.uk", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Glasgow", "aff_unique_dep": "", "aff_unique_url": "https://www.gla.ac.uk", "aff_unique_abbr": "Glasgow", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "title": "SCOREQ: Speech Quality Assessment with Contrastive Regression", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95846", "id": "HDVsiUHQ1w", "proceeding": "", "pdf": "https://openreview.net/pdf?id=HDVsiUHQ1w", "openreview": "https://openreview.net/forum?id=HDVsiUHQ1w", "poster": "", "project": "", "author_site": "Alessandro Ragano, Jan Skoglund, Andrew Hines", "tldr": "", "abstract": "In this paper, we present SCOREQ, a novel approach for speech quality prediction. SCOREQ is a triplet loss function for contrastive regression that addresses the domain generalisation shortcoming exhibited by state of the art no-reference speech quality metrics. In the paper we: (i) illustrate the problem of L2 loss training failing at capturing the continuous nature of the mean opinion score (MOS) labels; (ii) demonstrate the lack of generalisation through a benchmarking evaluation across several speech domains; (iii) outline our approach and explore the impact of the architectural design decisions through incremental evaluation; (iv) evaluate the final model against state of the art models for a wide variety of data and domains. The results show that the lack of generalisation observed in state of the art speech quality metrics is addressed by SCOREQ. We conclude that using a triplet loss function for contrastive regression improves generalisation for speech quality prediction models but also has potential utility across a wide range of applications using regression-based predictive models.", "keywords": "Perceptual measures of audio quality;objective and subjective quality assessment;domain mismatch;contrastive learning;regression", "primary_area": "speech_and_audio", "supplementary_material": "", "author": "Alessandro Ragano;Jan Skoglund;Andrew Hines", "authorids": "~Alessandro_Ragano1;~Jan_Skoglund1;~Andrew_Hines1", "gender": ";M;", "homepage": ";;https://people.ucd.ie/Andrew.Hines", "dblp": ";;", "google_scholar": "QKxuZzoAAAAJ;0Mix1jMAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0009-0007-5097-0100;;0000-0001-9636-2556", "linkedin": ";;", "or_profile": "~Alessandro_Ragano1;~Jan_Skoglund1;~Andrew_Hines1", "aff": "University College Dublin;Google;University College Dublin", "aff_domain": "ucd.ie;google.com;ucd.ie", "position": "Postdoc;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nragano2024scoreq,\ntitle={{SCOREQ}: Speech Quality Assessment with Contrastive Regression},\nauthor={Alessandro Ragano and Jan Skoglund and Andrew Hines},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=HDVsiUHQ1w}\n}", "github": "", "reviewers": "NTty;FFmN;Sj6A;2CaS;XmpT", "pdf_size": 1671106, "rating": "6;6;6;6;8", "confidence": "4;3;4;4;5", "soundness": "3;3;4;3;3", "novelty": "3;3;4;3;3", "presentation": "3;2;3;3;4", "wc_summary": "52;25;116;536;91", "wc_strengths": "45;45;36;283;202", "wc_weaknesses": "50;3;201;195;220", "wc_questions": "56;134;24;137;147", "wc_limitations": "2;1;32;228;1", "wc_review": "205;208;409;1379;661", "wc_reply_reviewers": "9;18;229;0;29", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;0;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.4, 0.7999999999999999 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 164.0, 188.6170723980202 ], "wc_strengths_avg": [ 122.2, 101.5625915384203 ], "wc_weaknesses_avg": [ 133.8, 89.24438357678314 ], "wc_questions_avg": [ 99.6, 49.890279614369774 ], "wc_limitations_avg": [ 52.8, 88.402262414488 ], "wc_review_avg": [ 572.4, 436.56000733003475 ], "wc_reply_reviewers_avg": [ 57.0, 86.53554183108811 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.7905694150420948, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1765575821485235572&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "ucd.ie;google.com;ucd.ie", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University College Dublin;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.ucd.ie;https://www.google.com", "aff_unique_abbr": "UCD;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Ireland;United States" }, { "title": "Learning Representations for Hierarchies with Minimal Support", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95845", "id": "HFS800reZK", "proceeding": "", "pdf": "https://openreview.net/pdf?id=HFS800reZK", "openreview": "https://openreview.net/forum?id=HFS800reZK", "poster": "", "project": "", "author_site": "Benjamin Rozonoyer, Michael Boratko, Dhruvesh Patel, Wenlong Zhao, Shib Dasgupta, Hung Le, Andrew McCallum", "tldr": "", "abstract": "When training node embedding models to represent large directed graphs (digraphs), it is impossible to observe all entries of the adjacency matrix during training. As a consequence most methods employ sampling. For very large digraphs, however, this means many (most) entries may be unobserved during training. In general, observing every entry would be necessary to uniquely identify a graph, however if we know the graph has a certain property some entries can be omitted - for example, only half the entries would be required for a symmetric graph. \nIn this work, we develop a novel framework to identify a subset of entries required to uniquely distinguish a graph among all transitively-closed DAGs. We give an explicit algorithm to compute the provably minimal set of entries, and demonstrate empirically that one can train node embedding models with greater efficiency and performance, provided the energy function has an appropriate inductive bias. We achieve robust performance on synthetic hierarchies and a larger real-world taxonomy, observing improved convergence rates in a resource-constrained setting while reducing the set of training examples by as much as 99%.", "keywords": "graph embeddings;representation learning", "primary_area": "other", "supplementary_material": "", "author": "Benjamin Rozonoyer;Michael Boratko;Dhruvesh Patel;Wenlong Zhao;Shib Sankar Dasgupta;Hung Le;Andrew McCallum", "authorids": "~Benjamin_Rozonoyer1;~Michael_Boratko1;~Dhruvesh_Patel1;~Wenlong_Zhao1;~Shib_Sankar_Dasgupta2;~Hung_Le4;~Andrew_McCallum1", "gender": "M;;;M;;M;M", "homepage": "https://people.cs.umass.edu/~mboratko/;http://dhruveshp.com;;https://ssdasgupta.github.io/;https://hunglvosu.github.io;http://www.cs.umass.edu/~mccallum;https://brozonoyer.github.io/", "dblp": "222/1939;274/7280;03/4555-1;222/9398;;m/AndrewMcCallum;292/3497.html", "google_scholar": "YKZGpnkAAAAJ;6F2CvwoAAAAJ;i0lW2EAAAAAJ;0KpQR94AAAAJ;D8Vbc48AAAAJ;yILa1y0AAAAJ;Ly7pnhcAAAAJ", "orcid": ";0000-0003-3062-2292;;;;0009-0004-5487-2848;", "linkedin": "michaelboratko/;dhruveshp/;wenlong-zhao/;shib-sankar-dasgupta-iisc/;;andrew-mccallum-a412;benjamin-rozonoyer-b45590139/", "or_profile": "~Michael_Boratko1;~Dhruvesh_Patel1;~Wenlong_Zhao1;~Shib_Sankar_Dasgupta2;~Hung_Le4;~Andrew_McCallum1;~Benjamin_Vladimir_Rozonoyer1", "aff": "Google;College of Information and Computer Science, University of Massachusetts, Amherst;International Business Machines;University of Massachusetts, Amherst;Department of Computer Science, University of Massachusetts at Amherst;University of Massachusetts Amherst;Oracle", "aff_domain": "google.com;cics.umass.edu;ibm.com;umass.edu;cs.umass.edu;cs.umass.edu;oracle.com", "position": "Researcher;PhD student;Intern;PhD student;Assistant Professor;Distinguished Professor;Intern", "bibtex": "@inproceedings{\nrozonoyer2024learning,\ntitle={Learning Representations for Hierarchies with Minimal Support},\nauthor={Benjamin Rozonoyer and Michael Boratko and Dhruvesh Patel and Wenlong Zhao and Shib Sankar Dasgupta and Hung Le and Andrew McCallum},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=HFS800reZK}\n}", "github": "", "reviewers": "VFTZ;TmQr;eH8o;gCXR;ShEU", "pdf_size": 1174910, "rating": "3;5;6;7;7", "confidence": "2;2;4;4;4", "soundness": "2;3;2;3;3", "novelty": "1;2;3;3;3", "presentation": "1;2;3;3;2", "wc_summary": "35;33;84;129;136", "wc_strengths": "13;52;71;108;65", "wc_weaknesses": "257;141;301;101;46", "wc_questions": "12;6;21;83;29", "wc_limitations": "1;39;35;11;1", "wc_review": "318;271;512;432;277", "wc_reply_reviewers": "139;237;196;71;21", "wc_reply_authors": "566;283;397;0;0", "reply_reviewers": "1;3;1;1;1", "reply_authors": "2;2;2;1;1", "rating_avg": [ 5.6, 1.4966629547095764 ], "confidence_avg": [ 3.2, 0.9797958971132712 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.8 ], "presentation_avg": [ 2.2, 0.7483314773547882 ], "wc_summary_avg": [ 83.4, 44.11167645873369 ], "wc_strengths_avg": [ 61.8, 30.681590571546316 ], "wc_weaknesses_avg": [ 169.2, 95.60836783462 ], "wc_questions_avg": [ 30.2, 27.53470537340104 ], "wc_limitations_avg": [ 17.4, 16.46329250180534 ], "wc_review_avg": [ 362.0, 94.70163673347996 ], "wc_reply_reviewers_avg": [ 132.8, 79.00480998015247 ], "wc_reply_authors_avg": [ 249.2, 222.50878634337116 ], "reply_reviewers_avg": [ 1.4, 0.8000000000000002 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.8728715609439696, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:h_0c7OGt--QJ:scholar.google.com/&scioq=Learning+Representations+for+Hierarchies+with+Minimal+Support&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "google.com;cics.umass.edu;ibm.com;umass.edu;cs.umass.edu;cs.umass.edu;oracle.com", "author_num": 7, "aff_unique_index": "0;1;2;1;1;1;3", "aff_unique_norm": "Google;University of Massachusetts Amherst;International Business Machines Corporation;Oracle Corporation", "aff_unique_dep": "Google;College of Information and Computer Science;;", "aff_unique_url": "https://www.google.com;https://www.umass.edu;https://www.ibm.com;https://www.oracle.com", "aff_unique_abbr": "Google;UMass Amherst;IBM;Oracle", "aff_campus_unique_index": "0;1;1;1;1", "aff_campus_unique": "Mountain View;Amherst;", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Learning Group Actions on Latent Representations", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95844", "id": "HGNTcy4eEp", "proceeding": "", "pdf": "https://openreview.net/pdf?id=HGNTcy4eEp", "openreview": "https://openreview.net/forum?id=HGNTcy4eEp", "poster": "/media/PosterPDFs/NeurIPS%202024/95844.png?t=1731728914.0789804", "project": "", "author_site": "Yinzhu Jin, Aman Shrivastava, Tom Fletcher", "tldr": "", "abstract": "In this work, we introduce a new approach to model group actions in autoencoders. Diverging from prior research in this domain, we propose to learn the group actions on the latent space rather than strictly on the data space. This adaptation enhances the versatility of our model, enabling it to learn a broader range of scenarios prevalent in the real world, where groups can act on latent factors. Our method allows a wide flexibility in the encoder and decoder architectures and does not require group-specific layers. In addition, we show that our model theoretically serves as a superset of methods that learn group actions on the data space. We test our approach on five image datasets with diverse groups acting on them and demonstrate superior performance to recently proposed methods for modeling group actions.", "keywords": "group action;representation learning;image rendering", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Yinzhu Jin;Aman Shrivastava;Tom Fletcher", "authorids": "~Yinzhu_Jin1;~Aman_Shrivastava1;~Tom_Fletcher1", "gender": "F;M;M", "homepage": ";https://www.cs.virginia.edu/~as3ek/;http://www.sci.utah.edu/~fletcher/", "dblp": ";247/0898;20/546.html", "google_scholar": "KUNnxJMAAAAJ;8JKUMc8AAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Yinzhu_Jin1;~Aman_Shrivastava1;~Tom_Fletcher1", "aff": "University of Virginia, Charlottesville;University of Virginia, Charlottesville;University of Virginia", "aff_domain": "virginia.edu;virginia.edu;virginia.edu", "position": "PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\njin2024learning,\ntitle={Learning Group Actions on Latent Representations},\nauthor={Yinzhu Jin and Aman Shrivastava and Tom Fletcher},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=HGNTcy4eEp}\n}", "github": "", "reviewers": "GMqP;tMkz;yBaK;vFp4", "pdf_size": 2329384, "rating": "4;5;6;7", "confidence": "3;2;4;4", "soundness": "3;3;2;3", "novelty": "3;2;3;4", "presentation": "3;3;3;4", "wc_summary": "128;39;46;818", "wc_strengths": "41;40;120;97", "wc_weaknesses": "91;164;219;107", "wc_questions": "52;77;49;33", "wc_limitations": "2;6;19;1", "wc_review": "314;326;453;1056", "wc_reply_reviewers": "0;36;67;190", "wc_reply_authors": "117;67;66;0", "reply_reviewers": "0;1;1;1", "reply_authors": "3;2;2;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 257.75, 325.34779467517524 ], "wc_strengths_avg": [ 74.5, 34.96069221282668 ], "wc_weaknesses_avg": [ 145.25, 50.48947910208621 ], "wc_questions_avg": [ 52.75, 15.75396775418815 ], "wc_limitations_avg": [ 7.0, 7.176350047203662 ], "wc_review_avg": [ 537.25, 304.412035734463 ], "wc_reply_reviewers_avg": [ 73.25, 71.45409365459757 ], "wc_reply_authors_avg": [ 62.5, 41.56019730463271 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.674199862463242, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:hy8XOM7lo0gJ:scholar.google.com/&scioq=Learning+Group+Actions+on+Latent+Representations&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "virginia.edu;virginia.edu;virginia.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Virginia", "aff_unique_dep": "", "aff_unique_url": "https://www.virginia.edu", "aff_unique_abbr": "UVA", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Charlottesville;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Learning Image Priors Through Patch-Based Diffusion Models for Solving Inverse Problems", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95843", "id": "HGnxhHz6ss", "proceeding": "", "pdf": "https://openreview.net/pdf?id=HGnxhHz6ss", "openreview": "https://openreview.net/forum?id=HGnxhHz6ss", "poster": "", "project": "", "author_site": "Jason Hu, Bowen Song, Xiaojian Xu, Liyue Shen, Jeffrey Fessler", "tldr": "", "abstract": "Diffusion models can learn strong image priors from underlying data distribution and use them to solve inverse problems,\nbut the training process is computationally expensive and requires lots of data.\nSuch bottlenecks prevent most existing works from being feasible for high-dimensional and high-resolution data such as 3D images.\nThis paper proposes a method to learn an efficient data prior for the entire image by training diffusion models only on patches of images.\nSpecifically, we propose a patch-based position-aware diffusion inverse solver, called PaDIS, where we obtain the score function of the whole image through scores of patches and their positional encoding and utilize this as the prior for solving inverse problems.\nFirst of all, we show that this diffusion model achieves an improved memory efficiency and data efficiency\nwhile still maintaining the capability to generate entire images via positional encoding.\nAdditionally, the proposed PaDIS model is highly flexible and can be plugged in with different diffusion inverse solvers (DIS).\nWe demonstrate that the proposed PaDIS approach enables solving various inverse problems in both natural and medical image domains, including CT reconstruction, deblurring, and superresolution, given only patch-based priors.\nNotably, PaDIS outperforms previous DIS methods trained on entire image priors in the case of limited training data, demonstrating the data efficiency of our proposed approach by learning patch-based prior.", "keywords": "reconstruction;computed tomography;deblurring;superresolution", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Jason Hu;Bowen Song;Xiaojian Xu;Liyue Shen;Jeffrey A Fessler", "authorids": "~Jason_Hu1;~Bowen_Song3;~Xiaojian_Xu1;~Liyue_Shen1;~Jeffrey_A_Fessler2", "gender": "M;;F;F;M", "homepage": "https://jasonhu4.github.io/;https://web.stanford.edu/~bowens18/;https://xuxiaojian.github.io/;https://liyueshen.engin.umich.edu/;https://web.eecs.umich.edu/~fessler/", "dblp": ";;97/4269;159/2036;f/JeffreyAFessler", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;https://scholar.google.com/citations?hl=en;jdNOtqwAAAAJ;Ho4qk9wAAAAJ;J5f4Gq8AAAAJ", "orcid": ";;0000-0002-5264-8963;0000-0001-5942-3196;0000-0001-9998-3315", "linkedin": "jason-hu-7115221a2/;;xiaojian-xu-282587188/;;jeff-fessler-578ab746", "or_profile": "~Jason_Hu1;~Bowen_Song3;~Xiaojian_Xu1;~Liyue_Shen1;~Jeffrey_Fessler1", "aff": "University of Michigan - Ann Arbor;University of Michigan - Ann Arbor;University of Michigan, Ann Arbor;University of Michigan - Ann Arbor;University of Michigan - Ann Arbor", "aff_domain": "umich.edu;umich.edu;umich.edu;umich.edu;umich.edu", "position": "PhD student;PhD student;Postdoc;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nhu2024learning,\ntitle={Learning Image Priors Through Patch-Based Diffusion Models for Solving Inverse Problems},\nauthor={Jason Hu and Bowen Song and Xiaojian Xu and Liyue Shen and Jeffrey A Fessler},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=HGnxhHz6ss}\n}", "github": "", "reviewers": "L95h;P2vb;wfxx;mcvX;YkyD", "pdf_size": 23421430, "rating": "5;5;6;6;7", "confidence": "5;5;4;3;3", "soundness": "3;2;3;3;3", "novelty": "2;3;3;2;3", "presentation": "2;2;3;3;3", "wc_summary": "77;39;61;104;62", "wc_strengths": "43;18;22;76;47", "wc_weaknesses": "87;298;15;186;63", "wc_questions": "52;41;210;187;2", "wc_limitations": "33;51;2;26;1", "wc_review": "292;447;310;579;175", "wc_reply_reviewers": "22;213;64;317;21", "wc_reply_authors": "42;397;44;501;42", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 4.0, 0.8944271909999159 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 68.6, 21.453204888780604 ], "wc_strengths_avg": [ 41.2, 20.759576103572055 ], "wc_weaknesses_avg": [ 129.8, 100.91858104432504 ], "wc_questions_avg": [ 98.4, 83.72000955566118 ], "wc_limitations_avg": [ 22.6, 19.06410239166796 ], "wc_review_avg": [ 360.6, 139.19568958843516 ], "wc_reply_reviewers_avg": [ 127.4, 118.08911888908308 ], "wc_reply_authors_avg": [ 205.2, 201.76164154764402 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8964214570007952, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12251877019725746328&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "umich.edu;umich.edu;umich.edu;umich.edu;umich.edu", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of Michigan", "aff_unique_dep": "", "aff_unique_url": "https://www.umich.edu", "aff_unique_abbr": "UM", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Ann Arbor", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Approximating mutual information of high-dimensional variables using learned representations", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95842", "id": "HN05DQxyLl", "proceeding": "", "pdf": "https://openreview.net/pdf?id=HN05DQxyLl", "openreview": "https://openreview.net/forum?id=HN05DQxyLl", "poster": "", "project": "", "author_site": "Gokul Gowri, Xiaokang Lun, Allon Klein, Peng Yin", "tldr": "", "abstract": "Mutual information (MI) is a general measure of statistical dependence with widespread application across the sciences. However, estimating MI between multi-dimensional variables is challenging because the number of samples necessary to converge to an accurate estimate scales unfavorably with dimensionality. In practice, existing techniques can reliably estimate MI in up to tens of dimensions, but fail in higher dimensions, where sufficient sample sizes are infeasible. Here, we explore the idea that underlying low-dimensional structure in high-dimensional data can be exploited to faithfully approximate MI in high-dimensional settings with realistic sample sizes. We develop a method that we call latent MI (LMI) approximation, which applies a nonparametric MI estimator to low-dimensional representations learned by a simple, theoretically-motivated model architecture. Using several benchmarks, we show that unlike existing techniques, LMI can approximate MI well for variables with $> 10^3$ dimensions if their dependence structure is captured by low-dimensional representations. Finally, we showcase LMI on two open problems in biology. First, we approximate MI between protein language model (pLM) representations of interacting proteins, and find that pLMs encode non-trivial information about protein-protein interactions. Second, we quantify cell fate information contained in single-cell RNA-seq (scRNA-seq) measurements of hematopoietic stem cells, and find a sharp transition during neutrophil differentiation when fate information captured by scRNA-seq increases dramatically. An implementation of LMI is available at *latentmi.readthedocs.io.*", "keywords": "mutual information;high-dimensional data;computational biology;scRNA-seq;protein language models", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "/attachment/5483d72e0af951607cb750f7a83245310bb41b29.zip", "author": "Gokul Gowri;Xiaokang Lun;Allon M Klein;Peng Yin", "authorids": "~Gokul_Gowri1;~Xiaokang_Lun1;~Allon_M_Klein1;~Peng_Yin2", "gender": "M;;M;M", "homepage": "https://ggdna.github.io/;;https://www.klein.hms.harvard.edu;http://yin.hms.harvard.edu", "dblp": ";;217/5464;23/4378-3", "google_scholar": "sJJAZjoAAAAJ;n2MMDM4AAAAJ;ELmi3zEAAAAJ;GyGK1DAAAAAJ", "orcid": ";;0000-0001-8913-7879;", "linkedin": ";;;", "or_profile": "~Gokul_Gowri1;~Xiaokang_Lun1;~Allon_M_Klein1;~Peng_Yin2", "aff": "Harvard University;Harvard University;Harvard Medical School, Harvard University;Harvard Medical School, Harvard University", "aff_domain": "harvard.edu;harvard.edu;hms.harvard.edu;hms.harvard.edu", "position": "PhD student;Postdoc;Full Professor;Full Professor", "bibtex": "@inproceedings{\ngowri2024approximating,\ntitle={Approximating mutual information of high-dimensional variables using learned representations},\nauthor={Gokul Gowri and Xiaokang Lun and Allon M Klein and Peng Yin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=HN05DQxyLl}\n}", "github": "", "reviewers": "NtLr;8utg;x45D;13JG", "pdf_size": 4364142, "rating": "5;6;7;7", "confidence": "5;3;4;4", "soundness": "2;3;4;3", "novelty": "2;2;3;3", "presentation": "3;2;4;3", "wc_summary": "75;62;80;107", "wc_strengths": "31;60;57;80", "wc_weaknesses": "213;37;429;84", "wc_questions": "1;81;3;304", "wc_limitations": "11;14;40;23", "wc_review": "331;254;609;598", "wc_reply_reviewers": "186;10;28;93", "wc_reply_authors": "744;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 81.0, 16.38596960817394 ], "wc_strengths_avg": [ 57.0, 17.421251390184345 ], "wc_weaknesses_avg": [ 190.75, 151.89860927605625 ], "wc_questions_avg": [ 97.25, 123.6494541031217 ], "wc_limitations_avg": [ 22.0, 11.291589790636214 ], "wc_review_avg": [ 448.0, 157.9129507038609 ], "wc_reply_reviewers_avg": [ 79.25, 68.93248508504536 ], "wc_reply_authors_avg": [ 186.0, 322.1614502078112 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.42640143271122083, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12094394712568484471&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "harvard.edu;harvard.edu;hms.harvard.edu;hms.harvard.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Harvard University", "aff_unique_dep": "", "aff_unique_url": "https://www.harvard.edu", "aff_unique_abbr": "Harvard", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Online Feature Updates Improve Online (Generalized) Label Shift Adaptation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95841", "id": "HNH1ykRjXf", "proceeding": "", "pdf": "https://openreview.net/pdf?id=HNH1ykRjXf", "openreview": "https://openreview.net/forum?id=HNH1ykRjXf", "poster": "", "project": "", "author_site": "Ruihan Wu, Siddhartha Datta, Yi Su, Dheeraj Baby, Yu-Xiang Wang, Kilian Weinberger", "tldr": "", "abstract": "This paper addresses the prevalent issue of label shift in an online setting with missing labels, where data distributions change over time and obtaining timely labels is challenging. While existing methods primarily focus on adjusting or updating the final layer of a pre-trained classifier, we explore the untapped potential of enhancing feature representations using unlabeled data at test-time. Our novel method, Online Label Shift adaptation with Online Feature Updates (OLS-OFU), leverages self-supervised learning to refine the feature extraction process, thereby improving the prediction model. By carefully designing the algorithm, theoretically OLS-OFU maintains the similar online regret convergence to the results in the literature while taking the improved features into account. Empirically, it achieves substantial improvements over existing methods, which is as significant as the gains existing methods have over the baseline (i.e., without distribution shift adaptations).", "keywords": "label shift;online learning", "primary_area": "other", "supplementary_material": "", "author": "Ruihan Wu;Siddhartha Datta;Yi Su;Dheeraj Baby;Yu-Xiang Wang;Kilian Q Weinberger", "authorids": "~Ruihan_Wu1;~Siddhartha_Datta1;~Yi_Su2;~Dheeraj_Baby1;~Yu-Xiang_Wang1;~Kilian_Q_Weinberger1", "gender": "F;;F;;;M", "homepage": "https://sites.google.com/site/ruihanwu14/home;http://siddharthadatta.ml/;https://www.yisu.moe/;https://dheeraj-b.github.io/home/;http://www.cs.ucsb.edu/~yuxiangw/publications.html;http://www.cs.cornell.edu/~kilian/", "dblp": "195/5577;;;;62/1637-3.html;88/4801", "google_scholar": ";;https://scholar.google.com/citations?hl=en;L3YF8nIAAAAJ;HGNZ1fkAAAAJ;jsxk8vsAAAAJ", "orcid": ";;;;;0009-0008-9313-7239", "linkedin": ";;;;;", "or_profile": "~Ruihan_Wu1;~Siddhartha_Datta1;~Yi_Su2;~Dheeraj_Baby1;~Yu-Xiang_Wang1;~Kilian_Q_Weinberger1", "aff": "University of California, San Diego;University of Oxford;Google;University of California, Santa Barbara;UC Santa Barbara;ASAPP Inc.", "aff_domain": "ucsd.edu;ox.ac.uk;google.com;cs.ucsb.edu;ucsb.edu;asapp.com", "position": "Postdoc;PhD student;Researcher;PhD student;Assistant Professor;Principal Researcher", "bibtex": "@inproceedings{\nwu2024online,\ntitle={Online Feature Updates Improve Online (Generalized) Label Shift Adaptation},\nauthor={Ruihan Wu and Siddhartha Datta and Yi Su and Dheeraj Baby and Yu-Xiang Wang and Kilian Q Weinberger},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=HNH1ykRjXf}\n}", "github": "", "reviewers": "XJxy;Dj3x;ZntR;aV82;fST3", "pdf_size": 1385487, "rating": "3;5;5;6;7", "confidence": "4;4;3;4;3", "soundness": "2;3;3;3;2", "novelty": "2;2;2;3;3", "presentation": "1;2;3;3;3", "wc_summary": "58;84;152;51;67", "wc_strengths": "32;70;145;92;102", "wc_weaknesses": "406;167;84;124;280", "wc_questions": "2;52;14;48;68", "wc_limitations": "4;7;1;6;11", "wc_review": "502;380;396;321;528", "wc_reply_reviewers": "365;15;45;18;0", "wc_reply_authors": "0;0;0;0;1260", "reply_reviewers": "1;1;1;1;0", "reply_authors": "1;1;1;1;3", "rating_avg": [ 5.2, 1.32664991614216 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.4, 0.8 ], "wc_summary_avg": [ 82.4, 36.51082031398363 ], "wc_strengths_avg": [ 88.2, 37.204300826651746 ], "wc_weaknesses_avg": [ 212.2, 116.97247539485518 ], "wc_questions_avg": [ 36.8, 24.741867350707384 ], "wc_limitations_avg": [ 5.8, 3.3105890714493698 ], "wc_review_avg": [ 425.4, 77.74213786615339 ], "wc_reply_reviewers_avg": [ 88.6, 138.95985031655727 ], "wc_reply_authors_avg": [ 252.0, 504.0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.4, 0.8 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.492365963917331, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13826390929957704274&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "ucsd.edu;ox.ac.uk;google.com;cs.ucsb.edu;ucsb.edu;asapp.com", "author_num": 6, "aff_unique_index": "0;1;2;3;3;4", "aff_unique_norm": "University of California, San Diego;University of Oxford;Google;University of California, Santa Barbara;ASAPP Inc.", "aff_unique_dep": ";;Google;;", "aff_unique_url": "https://www.ucsd.edu;https://www.ox.ac.uk;https://www.google.com;https://www.ucsb.edu;https://www.asapp.com", "aff_unique_abbr": "UCSD;Oxford;Google;UCSB;ASAPP", "aff_campus_unique_index": "0;2;3;3", "aff_campus_unique": "San Diego;;Mountain View;Santa Barbara", "aff_country_unique_index": "0;1;0;0;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Theoretical Analysis of Weak-to-Strong Generalization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95840", "id": "HOSh0SKklE", "proceeding": "", "pdf": "https://openreview.net/pdf?id=HOSh0SKklE", "openreview": "https://openreview.net/forum?id=HOSh0SKklE", "poster": "", "project": "", "author_site": "Hunter Lang, David Sontag, Aravindan Vijayaraghavan", "tldr": "", "abstract": "Strong student models can learn from weaker teachers: when trained on the predictions of a weaker model, a strong pretrained student can learn to correct the weak model\u2019s errors and generalize to examples where the teacher is not confident, even when these examples are excluded from training. This enables learning from cheap, incomplete, and possibly incorrect label information, such as coarse logical rules or the generations of a language model. We show that existing weak supervision theory results fail to account for both of these effects, which we call pseudolabel correction and coverage expansion, respectively. We give a new bound based on expansion properties of the data distribution and student hypothesis class that directly accounts for pseudolabel correction and coverage expansion. Our bound generalizes results from the co-training and self-training literature and captures the intuition that weak-to-strong generalization occurs when the mistakes of the weak model are hard for the strong model to fit without incurring additional error. We show that these expansion properties can be checked from finite data and give empirical evidence that they hold in practice.", "keywords": "Weak supervision;weak-to-strong generalization;self-supervised learning;semi-supervised learning", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/271dd20588ce1e39614658eb79ad873c32e2ba3b.zip", "author": "Hunter Lang;David Sontag;Aravindan Vijayaraghavan", "authorids": "~Hunter_Lang1;~David_Sontag1;~Aravindan_Vijayaraghavan1", "gender": "M;M;M", "homepage": "http://web.mit.edu/hjl/www/;http://people.csail.mit.edu/dsontag/;http://www.cs.northwestern.edu/~aravindv", "dblp": "210/2358.html;12/673;84/7804", "google_scholar": ";LfcroyAAAAAJ;tokXOxkAAAAJ", "orcid": ";0000-0002-5034-7796;", "linkedin": ";;", "or_profile": "~Hunter_Lang1;~David_Sontag1;~Aravindan_Vijayaraghavan1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Northwestern University", "aff_domain": "mit.edu;mit.edu;northwestern.edu", "position": "PhD student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nlang2024theoretical,\ntitle={Theoretical Analysis of Weak-to-Strong Generalization},\nauthor={Hunter Lang and David Sontag and Aravindan Vijayaraghavan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=HOSh0SKklE}\n}", "github": "", "reviewers": "3HaP;cokD;KtpZ", "pdf_size": 892816, "rating": "6;7;7", "confidence": "3;3;4", "soundness": "3;3;3", "novelty": "3;3;4", "presentation": "2;3;3", "wc_summary": "115;175;96", "wc_strengths": "198;85;97", "wc_weaknesses": "101;146;29", "wc_questions": "61;33;113", "wc_limitations": "1;5;7", "wc_review": "476;444;342", "wc_reply_reviewers": "15;0;62", "wc_reply_authors": "66;0;10", "reply_reviewers": "1;0;1", "reply_authors": "2;1;2", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 128.66666666666666, 33.66831679124389 ], "wc_strengths_avg": [ 126.66666666666667, 50.6776303927307 ], "wc_weaknesses_avg": [ 92.0, 48.187135212627034 ], "wc_questions_avg": [ 69.0, 33.14614105241614 ], "wc_limitations_avg": [ 4.333333333333333, 2.494438257849294 ], "wc_review_avg": [ 420.6666666666667, 57.13920623257632 ], "wc_reply_reviewers_avg": [ 25.666666666666668, 26.411277052720408 ], "wc_reply_authors_avg": [ 25.333333333333332, 29.044027881055953 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2374466128305245747&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "mit.edu;mit.edu;northwestern.edu", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Massachusetts Institute of Technology;Northwestern University", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.northwestern.edu", "aff_unique_abbr": "MIT;NU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Finding good policies in average-reward Markov Decision Processes without prior knowledge", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95839", "id": "HPvIf4w5Dd", "proceeding": "", "pdf": "https://openreview.net/pdf?id=HPvIf4w5Dd", "openreview": "https://openreview.net/forum?id=HPvIf4w5Dd", "poster": "", "project": "", "author_site": "Adrienne Tuynman, R\u00e9my Degenne, Emilie Kaufmann", "tldr": "", "abstract": "We revisit the identification of an $\\varepsilon$-optimal policy in average-reward Markov Decision Processes (MDP). In such MDPs, two measures of complexity have appeared in the literature: the diameter, $D$, and the optimal bias span, $H$, which satisfy $H\\leq D$.\n Prior work have studied the complexity of $\\varepsilon$-optimal policy identification only when a generative model is available. In this case, it is known that there exists an MDP with $D \\simeq H$ for which the sample complexity to output an $\\varepsilon$-optimal policy is $\\Omega(SAD/\\varepsilon^2)$ where $S$ and $A$ are the sizes of the state and action spaces. Recently, an algorithm with a sample complexity of order $SAH/\\varepsilon^2$ has been proposed, but it requires the knowledge of $H$. We first show that the sample complexity required to estimate $H$ is not bounded by any function of $S,A$ and $H$, ruling out the possibility to easily make the previous algorithm agnostic to $H$. By relying instead on a diameter estimation procedure, we propose the first algorithm for $(\\varepsilon,\\delta)$-PAC policy identification that does not need any form of prior knowledge on the MDP. Its sample complexity scales in $SAD/\\varepsilon^2$ in the regime of small $\\varepsilon$, which is near-optimal. In the online setting, our first contribution is a lower bound which implies that a sample complexity polynomial in $H$ cannot be achieved in this setting. Then, we propose an online algorithm with a sample complexity in $SAD^2/\\varepsilon^2$, as well as a novel approach based on a data-dependent stopping rule that we believe is promising to further reduce this bound.", "keywords": "sample complexity;Markov decision process;best policy identification;average reward", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Adrienne Tuynman;R\u00e9my Degenne;Emilie Kaufmann", "authorids": "~Adrienne_Tuynman1;~R\u00e9my_Degenne1;~Emilie_Kaufmann1", "gender": "F;M;F", "homepage": "https://adriennetuynman.github.io/;https://remydegenne.github.io/;https://emiliekaufmann.github.io/", "dblp": "313/1776;157/1070;67/11350", "google_scholar": ";https://scholar.google.fr/citations?user=H-uIBOwAAAAJ;9GE1vx4AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Adrienne_Tuynman1;~R\u00e9my_Degenne1;~Emilie_Kaufmann1", "aff": "INRIA;INRIA;CNRS", "aff_domain": "inria.fr;inria.fr;cnrs.fr", "position": "PhD student;Researcher;Researcher", "bibtex": "@inproceedings{\ntuynman2024finding,\ntitle={Finding good policies in average-reward Markov Decision Processes without prior knowledge},\nauthor={Adrienne Tuynman and R{\\'e}my Degenne and Emilie Kaufmann},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=HPvIf4w5Dd}\n}", "github": "", "reviewers": "en8V;NtfV;FXz4;XRXp", "pdf_size": 485832, "rating": "4;4;6;8", "confidence": "4;3;4;3", "soundness": "3;4;3;4", "novelty": "2;2;3;4", "presentation": "2;3;4;4", "wc_summary": "64;137;115;175", "wc_strengths": "27;108;88;9", "wc_weaknesses": "136;269;83;162", "wc_questions": "8;69;265;1", "wc_limitations": "1;9;8;1", "wc_review": "236;592;559;348", "wc_reply_reviewers": "24;0;185;26", "wc_reply_authors": "0;0;76;0", "reply_reviewers": "1;0;2;1", "reply_authors": "1;1;2;1", "rating_avg": [ 5.5, 1.6583123951777 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 122.75, 40.139600147485275 ], "wc_strengths_avg": [ 58.0, 41.11569043564756 ], "wc_weaknesses_avg": [ 162.5, 67.75876327088622 ], "wc_questions_avg": [ 85.75, 106.81613876189309 ], "wc_limitations_avg": [ 4.75, 3.766629793329841 ], "wc_review_avg": [ 433.75, 147.63870596831984 ], "wc_reply_reviewers_avg": [ 58.75, 73.60494208950918 ], "wc_reply_authors_avg": [ 19.0, 32.90896534380867 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12384170693246813498&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "inria.fr;inria.fr;cnrs.fr", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "INRIA;Centre National de la Recherche Scientifique", "aff_unique_dep": ";", "aff_unique_url": "https://www.inria.fr;https://www.cnrs.fr", "aff_unique_abbr": "INRIA;CNRS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "France" }, { "title": "Is Score Matching Suitable for Estimating Point Processes?", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95838", "id": "HQgHCVZiHw", "proceeding": "", "pdf": "https://openreview.net/pdf?id=HQgHCVZiHw", "openreview": "https://openreview.net/forum?id=HQgHCVZiHw", "poster": "/media/PosterPDFs/NeurIPS%202024/95838.png?t=1731622415.2731416", "project": "", "author_site": "Haoqun Cao, Zizhuo Meng, Tianjun Ke, Feng Zhou", "tldr": "", "abstract": "Score matching estimators for point processes have gained widespread attention in recent years because they do not require the calculation of intensity integrals, thereby effectively addressing the computational challenges in maximum likelihood estimation (MLE). Some existing works have proposed score matching estimators for point processes. However, this work demonstrates that the incompleteness of the estimators proposed in those works renders them applicable only to specific problems, and they fail for more general point processes. To address this issue, this work introduces the weighted score matching estimator to point processes. Theoretically, we prove the consistency of the estimator we propose. Experimental results indicate that our estimator accurately estimates model parameters on synthetic data and yields results consistent with MLE on real data. In contrast, existing score matching estimators fail to perform effectively. Codes are publicly available at \\url{https://github.com/KenCao2007/WSM_TPP}.", "keywords": "point processes;score matching;parameter estimation", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Haoqun Cao;Zizhuo Meng;Tianjun Ke;Feng Zhou", "authorids": "~Haoqun_Cao1;~Zizhuo_Meng1;~Tianjun_Ke1;~Feng_Zhou9", "gender": "M;M;;", "homepage": "https://kencao2007.github.io/;https://github.com/waystogetthere?tab=repositories;https://keanson.github.io/;", "dblp": ";;;", "google_scholar": ";;https://scholar.google.com/citations?hl=en;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Haoqun_Cao1;~Zizhuo_Meng1;~Tianjun_Ke1;~Feng_Zhou9", "aff": "Renmin University of China;University of Technology Sydney;School of Statistics, Renmin University of China;", "aff_domain": "ruc.edu.cn;uts.edu.au;stat.ruc.edu.cn;", "position": "Undergrad student;PhD student;Undergrad student;", "bibtex": "@inproceedings{\ncao2024is,\ntitle={Is Score Matching Suitable for Estimating Point Processes?},\nauthor={Haoqun Cao and Zizhuo Meng and Tianjun Ke and Feng Zhou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=HQgHCVZiHw}\n}", "github": "", "reviewers": "B3db;Se8E;X8YN", "pdf_size": 605608, "rating": "5;6;7", "confidence": "4;4;4", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "3;3;3", "wc_summary": "72;493;160", "wc_strengths": "51;3;212", "wc_weaknesses": "166;3;248", "wc_questions": "12;3;48", "wc_limitations": "3;3;1", "wc_review": "304;505;669", "wc_reply_reviewers": "164;0;32", "wc_reply_authors": "776;82;38", "reply_reviewers": "3;0;1", "reply_authors": "4;2;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 241.66666666666666, 181.314337240299 ], "wc_strengths_avg": [ 88.66666666666667, 89.38431381151591 ], "wc_weaknesses_avg": [ 139.0, 101.82665008074589 ], "wc_questions_avg": [ 21.0, 19.44222209522358 ], "wc_limitations_avg": [ 2.3333333333333335, 0.9428090415820634 ], "wc_review_avg": [ 492.6666666666667, 149.26560964342127 ], "wc_reply_reviewers_avg": [ 65.33333333333333, 70.98043548909955 ], "wc_reply_authors_avg": [ 298.6666666666667, 338.003287294994 ], "reply_reviewers_avg": [ 1.3333333333333333, 1.247219128924647 ], "reply_authors_avg": [ 2.6666666666666665, 0.9428090415820634 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18229851972296730379&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "ruc.edu.cn;uts.edu.au;stat.ruc.edu.cn;", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Renmin University of China;University of Technology Sydney", "aff_unique_dep": ";", "aff_unique_url": "http://www.ruc.edu.cn;https://www.uts.edu.au", "aff_unique_abbr": "RUC;UTS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "China;Australia" }, { "title": "Neural Pfaffians: Solving Many Many-Electron Schr\u00f6dinger Equations", "status": "Oral", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95837", "id": "HRkniCWM3E", "proceeding": "", "pdf": "https://openreview.net/pdf?id=HRkniCWM3E", "openreview": "https://openreview.net/forum?id=HRkniCWM3E", "poster": "", "project": "", "author_site": "Nicholas Gao, Stephan G\u00fcnnemann", "tldr": "", "abstract": "Neural wave functions accomplished unprecedented accuracies in approximating the ground state of many-electron systems, though at a high computational cost. Recent works proposed amortizing the cost by learning generalized wave functions across different structures and compounds instead of solving each problem independently. Enforcing the permutation antisymmetry of electrons in such generalized neural wave functions remained challenging as existing methods require discrete orbital selection via non-learnable hand-crafted algorithms. This work tackles the problem by defining overparametrized, fully learnable neural wave functions suitable for generalization across molecules. We achieve this by relying on Pfaffians rather than Slater determinants. The Pfaffian allows us to enforce the antisymmetry on arbitrary electronic systems without any constraint on electronic spin configurations or molecular structure. Our empirical evaluation finds that a single neural Pfaffian calculates the ground state and ionization energies with chemical accuracy across various systems. On the TinyMol dataset, we outperform the `gold-standard' CCSD(T) CBS reference energies by 1.9m$E_h$ and reduce energy errors compared to previous generalized neural wave functions by up to an order of magnitude.", "keywords": "Machine Learning for Science;Pfaffian;Neural Network;Molecules;Electrons;Computational Physics;Computational Chemistry;Quantum Chemistry;Quantum Monte Carlo;Variational Monte Carlo;Neural Quantum States;Wave Function", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "/attachment/0ee9b986e54fa0b69ea82f622e4dd34adfa9fb94.zip", "author": "Nicholas Gao;Stephan G\u00fcnnemann", "authorids": "~Nicholas_Gao1;~Stephan_G\u00fcnnemann1", "gender": "M;M", "homepage": ";http://www.daml.in.tum.de", "dblp": ";43/3011", "google_scholar": "3GIKgWoAAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Nicholas_Gao1;~Stephan_G\u00fcnnemann1", "aff": "Technical University Munich;Technical University Munich", "aff_domain": "tum.de;tum.de", "position": "PhD student;Professor", "bibtex": "@inproceedings{\ngao2024neural,\ntitle={Neural Pfaffians: Solving Many Many-Electron Schr\\\"odinger Equations},\nauthor={Nicholas Gao and Stephan G{\\\"u}nnemann},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=HRkniCWM3E}\n}", "github": "", "reviewers": "F2V3;K3cB;7TwG;3Wr9", "pdf_size": 1515000, "rating": "6;6;6;8", "confidence": "3;3;5;3", "soundness": "2;4;4;4", "novelty": "3;4;3;4", "presentation": "3;4;4;3", "wc_summary": "99;77;136;48", "wc_strengths": "105;49;129;103", "wc_weaknesses": "132;26;454;139", "wc_questions": "35;61;47;226", "wc_limitations": "3;12;1;1", "wc_review": "374;225;767;517", "wc_reply_reviewers": "20;0;64;9", "wc_reply_authors": "0;0;487;0", "reply_reviewers": "1;0;2;1", "reply_authors": "1;1;3;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.5, 0.8660254037844386 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 90.0, 32.132538026119256 ], "wc_strengths_avg": [ 96.5, 29.270292106502797 ], "wc_weaknesses_avg": [ 187.75, 160.10680029280456 ], "wc_questions_avg": [ 92.25, 77.76687919673773 ], "wc_limitations_avg": [ 4.25, 4.548351349665063 ], "wc_review_avg": [ 470.75, 199.7853535672723 ], "wc_reply_reviewers_avg": [ 23.25, 24.57005290999594 ], "wc_reply_authors_avg": [ 121.75, 210.8771858215108 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3123634657344657403&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "tum.de;tum.de", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Technical University of Munich", "aff_unique_dep": "", "aff_unique_url": "https://www.tum.de", "aff_unique_abbr": "TUM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "title": "Navigating the Maze of Explainable AI: A Systematic Approach to Evaluating Methods and Metrics", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97772", "id": "HRkwnZewLC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=HRkwnZewLC", "openreview": "https://openreview.net/forum?id=HRkwnZewLC", "poster": "/media/PosterPDFs/NeurIPS%202024/97772.png?t=1732712833.0222847", "project": "", "author_site": "Lukas Klein, Carsten L\u00fcth, Udo Schlegel, Till Bungert, Mennatallah El-Assady, Paul Jaeger", "tldr": "", "abstract": "Explainable AI (XAI) is a rapidly growing domain with a myriad of proposed methods as well as metrics aiming to evaluate their efficacy. However, current studies are often of limited scope, examining only a handful of XAI methods and ignoring underlying design parameters for performance, such as the model architecture or the nature of input data. Moreover, they often rely on one or a few metrics and neglect thorough validation, increasing the risk of selection bias and ignoring discrepancies among metrics. These shortcomings leave practitioners confused about which method to choose for their problem. In response, we introduce LATEC, a large-scale benchmark that critically evaluates 17 prominent XAI methods using 20 distinct metrics. We systematically incorporate vital design parameters like varied architectures and diverse input modalities, resulting in 7,560 examined combinations. Through LATEC, we showcase the high risk of conflicting metrics leading to unreliable rankings and consequently propose a more robust evaluation scheme. Further, we comprehensively evaluate various XAI methods to assist practitioners in selecting appropriate methods aligning with their needs. Curiously, the emerging top-performing method, Expected Gradients, is not examined in any relevant related study. LATEC reinforces its role in future XAI research by publicly releasing all 326k saliency maps and 378k metric scores as a (meta-)evaluation dataset. The benchmark is hosted at: https://github.com/IML-DKFZ/latec.", "keywords": "Explainable AI;Metric Analysis;Evaluation;Attention;Benchmark", "primary_area": "", "supplementary_material": "", "author": "Lukas Klein;Carsten T. L\u00fcth;Udo Schlegel;Till J. Bungert;Mennatallah El-Assady;Paul F Jaeger", "authorids": "~Lukas_Klein1;~Carsten_T._L\u00fcth1;~Udo_Schlegel1;~Till_J._Bungert1;~Mennatallah_El-Assady1;~Paul_F_Jaeger1", "gender": "M;;;;M;M", "homepage": "https://lukaskln.github.io;https://udo-schl.github.io/;;https://pfjaeger.github.io;;https://github.com/tbung", "dblp": ";;183/8957;179/4749;244/2162;334/4062", "google_scholar": "https://scholar.google.de/citations?user=sxpuZg4AAAAJ;https://scholar.google.de/citations?user=OmfaIuYAAAAJ;;https://scholar.google.de/citations?user=9B9-8h0AAAAJ;3L6NkggAAAAJ;yWaPy7AAAAAJ", "orcid": ";0000-0002-8266-0162;0000-0001-8526-2613;;;", "linkedin": ";;;;carsten-l\u00fcth-530798190/;", "or_profile": "~Lukas_Klein1;~Udo_Schlegel1;~Mennatallah_El-Assady1;~Paul_F_Jaeger1;~Carsten_Tim_L\u00fcth1;~Till_Bungert1", "aff": "German Cancer Research Center;University of Konstanz;Department of Computer Science, ETHZ - ETH Zurich;German Cancer Research Center;German Cancer Research Center;Deutsches Krebsforschungszentrum", "aff_domain": "dkfz.de;uni-konstanz.de;inf.ethz.ch;dkfz.de;dkfz-heidelberg.de;dkfz-heidelberg.de", "position": "PhD student;PhD student;Assistant Professor;Research Group Leader;PhD student;PhD student", "bibtex": "@inproceedings{\nklein2024navigating,\ntitle={Navigating the Maze of Explainable {AI}: A Systematic Approach to Evaluating Methods and Metrics},\nauthor={Lukas Klein and Carsten T. L{\\\"u}th and Udo Schlegel and Till J. Bungert and Mennatallah El-Assady and Paul F Jaeger},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=HRkwnZewLC}\n}", "github": "", "reviewers": "JMZx;xgqg;AiHf;6Gad", "pdf_size": 15808210, "rating": "5;6;7;7", "confidence": "3;3;4;5", "wc_summary_and_contributions": "46;86;59;79", "wc_strengths": "27;64;56;113", "wc_improvement": "84;45;83;513", "wc_limitations": "1;1;16;3", "wc_correctness": "1;1;5;6", "wc_clarity": "6;1;4;7", "wc_relation_to_prior_work": "4;1;9;15", "wc_documentation": "1;1;9;4", "wc_additional_feedback": "1;1;1;1", "wc_review": "171;201;242;741", "wc_reply_reviewers": "54;0;0;39", "wc_reply_authors": "271;0;0;0", "reply_reviewers": "1;0;0;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "wc_summary_and_contributions_avg": [ 67.5, 15.88238017426859 ], "wc_strengths_avg": [ 65.0, 30.943496893531602 ], "wc_improvement_avg": [ 181.25, 192.18009132061522 ], "wc_limitations_avg": [ 5.25, 6.2599920127744575 ], "wc_correctness_avg": [ 3.25, 2.277608394786075 ], "wc_clarity_avg": [ 4.5, 2.29128784747792 ], "wc_relation_to_prior_work_avg": [ 7.25, 5.3091901453988255 ], "wc_documentation_avg": [ 3.75, 3.2691742076555053 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 338.75, 233.60262733967699 ], "wc_reply_reviewers_avg": [ 23.25, 23.84716964337697 ], "wc_reply_authors_avg": [ 67.75, 117.34644221279143 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.8181818181818182, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12027234436535455307&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "dkfz.de;uni-konstanz.de;inf.ethz.ch;dkfz.de;dkfz-heidelberg.de;dkfz-heidelberg.de", "author_num": 6, "aff_unique_index": "0;1;2;0;0;3", "aff_unique_norm": "German Cancer Research Center;University of Konstanz;ETH Zurich;Deutsches Krebsforschungszentrum", "aff_unique_dep": ";;Department of Computer Science;", "aff_unique_url": "https://www.dkfz.de;https://www.uni-konstanz.de;https://www.ethz.ch;https://www.dkfz.de", "aff_unique_abbr": "DKFZ;Uni Konstanz;ETHZ;DKFZ", "aff_campus_unique_index": "1", "aff_campus_unique": ";Zurich", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "Germany;Switzerland" }, { "title": "Schur Nets: exploiting local structure for equivariance in higher order graph neural networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95836", "id": "HRnSVflpgt", "proceeding": "", "pdf": "https://openreview.net/pdf?id=HRnSVflpgt", "openreview": "https://openreview.net/forum?id=HRnSVflpgt", "poster": "/media/PosterPDFs/NeurIPS%202024/95836.png?t=1733893686.9838364", "project": "", "author_site": "QINGQI ZHANG, Ruize Xu, Risi Kondor", "tldr": "", "abstract": "Recent works have shown that extending the message passing paradigm to subgraphs communicating with other subgraphs, especially via higher order messages, can boost the expressivity of graph neural networks. In such architectures, to faithfully account for local structure such as cycles, the local operations must be equivariant to the automorphism group of the local environment. However, enumerating the automorphism groups of all subgraphs of interest and finding appropriate equivariant operations for each one of them separately is generally not feasible. In this paper we propose a solution to this problem based on spectral graph theory that bypasses \nhaving to determine the automorphism group entirely and constructs a basis for equivariant operations directly from the graph Laplacian. \nWe show that this approach can boost the performance of GNNs on some standard benchmarks.", "keywords": "graph neural networks;equivariance;spectral graph theory;higher order message passing", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "QINGQI ZHANG;Ruize Xu;Risi Kondor", "authorids": "~QINGQI_ZHANG2;~Ruize_Xu3;~Risi_Kondor1", "gender": "M;M;M", "homepage": ";;http://people.cs.uchicago.edu/~risi/", "dblp": ";;90/869", "google_scholar": ";;v12-jLUAAAAJ", "orcid": ";;", "linkedin": "qingqi-zhang-7a89b3195/;richard-xu-722811170;", "or_profile": "~QINGQI_ZHANG2;~Ruize_Xu3;~Risi_Kondor1", "aff": "University of Chicago;University of Chicago;University of Chicago", "aff_domain": "uchicago.edu;uchicago.edu;uchicago.edu", "position": "MS student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nzhang2024schur,\ntitle={Schur Nets: exploiting local structure for equivariance in higher order graph neural networks},\nauthor={QINGQI ZHANG and Ruize Xu and Risi Kondor},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=HRnSVflpgt}\n}", "github": "", "reviewers": "uFzV;uvzr;fYBT;EFuW", "pdf_size": 652921, "rating": "4;5;6;7", "confidence": "3;2;4;4", "soundness": "2;3;2;4", "novelty": "3;3;3;3", "presentation": "1;2;3;3", "wc_summary": "47;41;93;105", "wc_strengths": "64;110;42;46", "wc_weaknesses": "274;271;45;63", "wc_questions": "19;79;73;63", "wc_limitations": "29;1;59;1", "wc_review": "433;502;312;278", "wc_reply_reviewers": "159;63;0;203", "wc_reply_authors": "369;0;0;930", "reply_reviewers": "1;1;0;2", "reply_authors": "2;1;1;3", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 71.5, 27.906092524751653 ], "wc_strengths_avg": [ 65.5, 26.995369973386175 ], "wc_weaknesses_avg": [ 163.25, 109.44033762740318 ], "wc_questions_avg": [ 58.5, 23.510635891017493 ], "wc_limitations_avg": [ 22.5, 23.973944189473706 ], "wc_review_avg": [ 381.25, 90.43609622269197 ], "wc_reply_reviewers_avg": [ 106.25, 79.53419076095513 ], "wc_reply_authors_avg": [ 324.75, 380.52948308902427 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.674199862463242, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:MlxSsIWKVGUJ:scholar.google.com/&scioq=Schur+Nets:+exploiting+local+structure+for+equivariance+in+higher+order+graph+neural+networks&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "uchicago.edu;uchicago.edu;uchicago.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Chicago", "aff_unique_dep": "", "aff_unique_url": "https://www.uchicago.edu", "aff_unique_abbr": "UChicago", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Time-FFM: Towards LM-Empowered Federated Foundation Model for Time Series Forecasting", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95835", "id": "HS0faHRhWD", "proceeding": "", "pdf": "https://openreview.net/pdf?id=HS0faHRhWD", "openreview": "https://openreview.net/forum?id=HS0faHRhWD", "poster": "/media/PosterPDFs/NeurIPS%202024/95835.png?t=1731289976.8003511", "project": "", "author_site": "Qingxiang Liu, Xu Liu, Chenghao Liu, Qingsong Wen, Yuxuan Liang", "tldr": "", "abstract": "Unlike natural language processing and computer vision, the development of Foundation Models (FMs) for time series forecasting is blocked due to data scarcity. \nWhile recent efforts are focused on building such FMs by unlocking the potential of language models (LMs) for time series analysis, dedicated parameters for various downstream forecasting tasks need training, which hinders the common knowledge sharing across domains.\nMoreover, data owners may hesitate to share the access to local data due to privacy concerns and copyright protection, which makes it impossible to simply construct a FM on cross-domain training instances.\nTo address these issues, we propose Time-FFM, a Federated Foundation Model for Time series forecasting by leveraging pretrained LMs.\nSpecifically, we begin by transforming time series into the modality of text tokens.\nTo bootstrap LMs for time series reasoning, we propose a prompt adaption module to determine domain-customized prompts dynamically instead of artificially.\nGiven the data heterogeneity across domains, we design a personalized federated training strategy by learning global encoders and local prediction heads. \nOur comprehensive experiments indicate that Time-FFM outperforms state-of-the-arts and promises effective few-shot and zero-shot forecaster.\nThe code is available at https://github.com/CityMind-Lab/NeurIPS24-Time-FFM/tree/main.", "keywords": "Foundation Model;Cross-Modality Adaption;Time Series Forecasting;Federated Learning", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Qingxiang Liu;Xu Liu;Chenghao Liu;Qingsong Wen;Yuxuan Liang", "authorids": "~Qingxiang_Liu1;~Xu_Liu9;~Chenghao_Liu1;~Qingsong_Wen2;~Yuxuan_Liang1", "gender": "M;;M;M;M", "homepage": ";;;https://yuxuanliang.com;https://sites.google.com/site/qingsongwen8/", "dblp": ";93/3167-14;;183/0977;27/561", "google_scholar": "f1_Rw48AAAAJ;JTzLTycAAAAJ;https://scholar.google.com/citations?hl=en;n9cODgcAAAAJ;vjPJvwYAAAAJ", "orcid": "0000-0003-2277-830X;0000-0003-2708-0584;;0000-0003-2817-7337;0000-0003-4516-2524", "linkedin": ";liuxu-187825160/;chenghao-liu-40a62a56/;yoshall/;qingsong-wen-22814156/", "or_profile": "~Qingxiang_Liu1;~Xu_Liu9;~Chenghao_Liu1;~Yuxuan_Liang1;~Qingsong_Wen1", "aff": "Chinese Academy of Sciences;National University of Singapore;Salesforce AI Research;The Hong Kong University of Science and Technology (Guangzhou);Squirrel Ai Learning", "aff_domain": "ict.ac.cn;nus.edu.sg;salesforce.com;hkust-gz.edu.cn;squirrelai.com", "position": "PhD student;PhD student;Researcher;Assistant Professor;Principal Researcher", "bibtex": "@inproceedings{\nliu2024timeffm,\ntitle={Time-{FFM}: Towards {LM}-Empowered Federated Foundation Model for Time Series Forecasting},\nauthor={Qingxiang Liu and Xu Liu and Chenghao Liu and Qingsong Wen and Yuxuan Liang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=HS0faHRhWD}\n}", "github": "", "reviewers": "PDR4;LqRs;7qPS;uvxC", "pdf_size": 623762, "rating": "5;6;7;7", "confidence": "4;4;5;5", "soundness": "3;3;3;3", "novelty": "2;3;3;2", "presentation": "3;3;4;3", "wc_summary": "67;48;41;107", "wc_strengths": "49;76;49;23", "wc_weaknesses": "36;131;283;511", "wc_questions": "21;78;76;29", "wc_limitations": "1;7;21;71", "wc_review": "174;340;470;741", "wc_reply_reviewers": "72;0;127;599", "wc_reply_authors": "343;130;571;1090", "reply_reviewers": "2;0;2;2", "reply_authors": "3;3;3;4", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 65.75, 25.645418694183956 ], "wc_strengths_avg": [ 49.25, 18.73999733191016 ], "wc_weaknesses_avg": [ 240.25, 179.43435429147897 ], "wc_questions_avg": [ 51.0, 26.16295090390226 ], "wc_limitations_avg": [ 25.0, 27.53179979587241 ], "wc_review_avg": [ 431.25, 207.334723334033 ], "wc_reply_reviewers_avg": [ 199.5, 235.00691479188436 ], "wc_reply_authors_avg": [ 533.5, 357.1417785697999 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 3.25, 0.4330127018922193 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.9045340337332909, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14090856663808768317&as_sdt=20005&sciodt=0,9&hl=en", "gs_version_total": 3, "email": "ict.ac.cn;nus.edu.sg;salesforce.com;hkust-gz.edu.cn;squirrelai.com", "author_num": 5, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "Chinese Academy of Sciences;National University of Singapore;Salesforce;Hong Kong University of Science and Technology;Squirrel Ai Learning", "aff_unique_dep": ";;Salesforce AI Research;;", "aff_unique_url": "https://www.cas.cn;https://www.nus.edu.sg;https://www.salesforce.com;https://www.ust.hk;https://www.squirrelai.com/", "aff_unique_abbr": "CAS;NUS;Salesforce AI;HKUST;", "aff_campus_unique_index": "1", "aff_campus_unique": ";Guangzhou", "aff_country_unique_index": "0;1;2;0;0", "aff_country_unique": "China;Singapore;United States" }, { "title": "Initializing Services in Interactive ML Systems for Diverse Users", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95834", "id": "HSJOt2hyDf", "proceeding": "", "pdf": "https://openreview.net/pdf?id=HSJOt2hyDf", "openreview": "https://openreview.net/forum?id=HSJOt2hyDf", "poster": "", "project": "", "author_site": "Avinandan Bose, Mihaela Curmei, Daniel Jiang, Jamie Morgenstern, Sarah Dean, Lillian Ratliff, Maryam Fazel", "tldr": "", "abstract": "This paper investigates ML systems serving a group of users, with multiple models/services, each aimed at specializing to a sub-group of users. We consider settings where upon deploying a set of services, users choose the one minimizing their personal losses and the learner iteratively learns by interacting with diverse users. Prior research shows that the outcomes of learning dynamics, which comprise both the services' adjustments and users' service selections, hinge significantly on the initial conditions. However, finding good initial conditions faces two main challenges: (i) \\emph{Bandit feedback:} Typically, data on user preferences are not available before deploying services \nand observing user behavior; (ii) \\emph{Suboptimal local solutions:} The total loss landscape (i.e., the sum of loss functions across all users and services) is not convex and gradient-based algorithms can get stuck in poor local minima.\n\nWe address these challenges with a randomized algorithm to adaptively select a minimal set of users for data collection in order to initialize a set of services. Under mild assumptions on the loss functions, we prove that our initialization leads to a total loss within a factor of the \\textit{globally optimal total loss,with complete user preference data}, and this factor scales logarithmically in the number of services. This result is a generalization of the well-known $k$-means++ guarantee to a broad problem class which is also of independent interest.\nThe theory is complemented by experiments on real as well as semi-synthetic datasets.", "keywords": "Algorithm design for multi-service ML systems;initialization;clustering;approximation ratio;preference learning", "primary_area": "active_learning", "supplementary_material": "/attachment/a68c7875e730878a402da38843ef409672c75c8c.zip", "author": "Avinandan Bose;Mihaela Curmei;Daniel L. Jiang;Jamie Heather Morgenstern;Sarah Dean;Lillian J. Ratliff;Maryam Fazel", "authorids": "~Avinandan_Bose1;~Mihaela_Curmei2;~Daniel_L._Jiang2;~Jamie_Heather_Morgenstern1;~Sarah_Dean2;~Lillian_J._Ratliff1;~Maryam_Fazel1", "gender": "M;F;;F;F;;F", "homepage": "https://avinandan22.github.io/;https://mcurmei627.github.io/;http://jamiemorgenstern.com;https://sdean.website/;;https://danieljiang.me;https://faculty.washington.edu/ratliffl/", "dblp": "305/7490;204/0161;64/8610;207/8292;10/2309;294/3583;127/7426", "google_scholar": "https://scholar.google.com/citations?pli=1;nc6hvFgAAAAJ;https://scholar.google.com/citations?hl=en;xhKqjpYAAAAJ;vlN_kRoAAAAJ;GgvlPkkAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;;;;0000-0001-8936-0229", "linkedin": ";mihaela-curmei/;;;;daniel-jiang/;", "or_profile": "~Avinandan_Bose1;~Mihaela_Curmei2;~Jamie_Heather_Morgenstern1;~Sarah_Dean2;~Maryam_Fazel1;~Daniel_Lu_Jiang1;~Lillian_Ratliff1", "aff": "Department of Computer Science;University of California, Berkeley;;Cornell University;University of Washington, Seattle;Department of Computer Science;University of Washington, Seattle", "aff_domain": "cs.washington.edu;berkeley.edu;;cornell.edu;uw.edu;cs.washington.edu;uw.edu", "position": "PhD student;PhD student;;Assistant Professor;Full Professor;PhD student;Associate Professor", "bibtex": "@inproceedings{\nbose2024initializing,\ntitle={Initializing Services in Interactive {ML} Systems for Diverse Users},\nauthor={Avinandan Bose and Mihaela Curmei and Daniel L. Jiang and Jamie Heather Morgenstern and Sarah Dean and Lillian J. Ratliff and Maryam Fazel},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=HSJOt2hyDf}\n}", "github": "", "reviewers": "qH2x;XL2L;vEYc", "pdf_size": 3368057, "rating": "6;6;7", "confidence": "3;3;2", "soundness": "3;3;3", "novelty": "3;2;3", "presentation": "3;3;3", "wc_summary": "121;136;88", "wc_strengths": "98;34;64", "wc_weaknesses": "69;142;11", "wc_questions": "54;73;16", "wc_limitations": "1;8;1", "wc_review": "343;393;180", "wc_reply_reviewers": "0;32;12", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 115.0, 20.049937655763422 ], "wc_strengths_avg": [ 65.33333333333333, 26.144895401503437 ], "wc_weaknesses_avg": [ 74.0, 53.59726361174297 ], "wc_questions_avg": [ 47.666666666666664, 23.697163449568293 ], "wc_limitations_avg": [ 3.3333333333333335, 3.2998316455372216 ], "wc_review_avg": [ 305.3333333333333, 90.94442747573316 ], "wc_reply_reviewers_avg": [ 14.666666666666666, 13.199326582148888 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.9999999999999997, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12887480297801347976&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "cs.washington.edu;berkeley.edu;;cornell.edu;uw.edu;cs.washington.edu;uw.edu", "author_num": 7, "aff_unique_index": "0;1;2;3;0;3", "aff_unique_norm": "Unknown Institution;University of California, Berkeley;Cornell University;University of Washington", "aff_unique_dep": "Department of Computer Science;;;", "aff_unique_url": ";https://www.berkeley.edu;https://www.cornell.edu;https://www.washington.edu", "aff_unique_abbr": ";UC Berkeley;Cornell;UW", "aff_campus_unique_index": "1;2;2", "aff_campus_unique": ";Berkeley;Seattle", "aff_country_unique_index": "1;1;1;1", "aff_country_unique": ";United States" }, { "title": "Preventing Model Collapse in Deep Canonical Correlation Analysis by Noise Regularization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95833", "id": "HSRs6yyuUK", "proceeding": "", "pdf": "https://openreview.net/pdf?id=HSRs6yyuUK", "openreview": "https://openreview.net/forum?id=HSRs6yyuUK", "poster": "/media/PosterPDFs/NeurIPS%202024/95833.png?t=1731676662.5804658", "project": "", "author_site": "Junlin He, Jinxiao Du, Susu Xu, Wei Ma", "tldr": "", "abstract": "Multi-View Representation Learning (MVRL) aims to learn a unified representation of an object from multi-view data.\nDeep Canonical Correlation Analysis (DCCA) and its variants share simple formulations and demonstrate state-of-the-art performance. However, with extensive experiments, we observe the issue of model collapse, i.e., the performance of DCCA-based methods will drop drastically when training proceeds. The model collapse issue could significantly hinder the wide adoption of DCCA-based methods because it is challenging to decide when to early stop. To this end, we develop NR-DCCA, which is equipped with a novel noise regularization approach to prevent model collapse. Theoretical analysis shows that the Correlation Invariant Property is the key to preventing model collapse, and our noise regularization forces the neural network to possess such a property. A framework to construct synthetic data with different common and complementary information is also developed to compare MVRL methods comprehensively. The developed NR-DCCA outperforms baselines stably and consistently in both synthetic and real-world datasets, and the proposed noise regularization approach can also be generalized to other DCCA-based methods such as DGCCA.", "keywords": "Multi-view representation learning; Canonical Correlation Analysis; Deep Canonical Correlation Analysis; Noise regularization; Model collapse", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/00f1ec80c456e665c5a9fbbe38382d72f3d5e930.zip", "author": "Junlin He;Jinxiao Du;Susu Xu;Wei Ma", "authorids": "~Junlin_He2;~Jinxiao_Du1;~Susu_Xu2;~Wei_Ma3", "gender": ";M;F;M", "homepage": ";;http://susu-xu.com/;http://polyu-mobility-ai-lab.com/", "dblp": ";;188/9306;", "google_scholar": "1wyJPxQAAAAJ;;MSCQE-YAAAAJ;syUpc-gAAAAJ", "orcid": ";0000-0003-0247-6339;0000-0001-7170-648X;0000-0001-8945-5877", "linkedin": ";;;", "or_profile": "~Junlin_He2;~Jinxiao_Du1;~Susu_Xu2;~Wei_Ma3", "aff": "Hong Kong Polytechnic University;Hong Kong Polytechnic University;Johns Hopkins University;Hong Kong Polytechnic University", "aff_domain": "polyu.edu.hk;polyu.edu.hk;jhu.edu;polyu.edu.hk", "position": "PhD student;PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nhe2024preventing,\ntitle={Preventing Model Collapse in Deep Canonical Correlation Analysis by Noise Regularization},\nauthor={Junlin He and Jinxiao Du and Susu Xu and Wei Ma},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=HSRs6yyuUK}\n}", "github": "", "reviewers": "21Qa;BL52;ForS;g1Nh", "pdf_size": 9763910, "rating": "4;5;5;5", "confidence": "4;5;3;4", "soundness": "3;3;3;3", "novelty": "2;2;2;3", "presentation": "3;3;3;1", "wc_summary": "41;60;87;48", "wc_strengths": "41;24;148;54", "wc_weaknesses": "68;107;161;81", "wc_questions": "8;2;62;181", "wc_limitations": "1;1;6;4", "wc_review": "159;194;464;368", "wc_reply_reviewers": "0;29;0;0", "wc_reply_authors": "0;9;0;0", "reply_reviewers": "0;1;0;0", "reply_authors": "1;2;1;1", "rating_avg": [ 4.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 59.0, 17.53567791675018 ], "wc_strengths_avg": [ 66.75, 48.100805606559234 ], "wc_weaknesses_avg": [ 104.25, 35.64670391494843 ], "wc_questions_avg": [ 63.25, 71.88662949394693 ], "wc_limitations_avg": [ 3.0, 2.1213203435596424 ], "wc_review_avg": [ 296.25, 125.08072393458554 ], "wc_reply_reviewers_avg": [ 7.25, 12.55736835487436 ], "wc_reply_authors_avg": [ 2.25, 3.897114317029974 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:8pxL4GgJFFYJ:scholar.google.com/&scioq=Preventing+Model+Collapse+in+Deep+Canonical+Correlation+Analysis+by+Noise+Regularization&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "polyu.edu.hk;polyu.edu.hk;jhu.edu;polyu.edu.hk", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Hong Kong Polytechnic University;Johns Hopkins University", "aff_unique_dep": ";", "aff_unique_url": "https://www.polyu.edu.hk;https://www.jhu.edu", "aff_unique_abbr": "PolyU;JHU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "China;United States" }, { "title": "LLM Processes: Numerical Predictive Distributions Conditioned on Natural Language", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95832", "id": "HShs7q1Njh", "proceeding": "", "pdf": "https://openreview.net/pdf?id=HShs7q1Njh", "openreview": "https://openreview.net/forum?id=HShs7q1Njh", "poster": "", "project": "", "author_site": "James Requeima, John Bronskill, Dami Choi, Richard Turner, David Duvenaud", "tldr": "", "abstract": "Machine learning practitioners often face significant challenges in formally integrating their prior knowledge and beliefs into predictive models, limiting the potential for nuanced and context-aware analyses. Moreover, the expertise needed to integrate this prior knowledge into probabilistic modeling typically limits the application of these models to specialists. Our goal is to build a regression model that can process numerical data and make probabilistic predictions at arbitrary locations, guided by natural language text which describes a user's prior knowledge. Large Language Models (LLMs) provide a useful starting point for designing such a tool since they 1) provide an interface where users can incorporate expert insights in natural language and 2) provide an opportunity for leveraging latent problem-relevant knowledge encoded in LLMs that users may not have themselves. We start by exploring strategies for eliciting explicit, coherent numerical predictive distributions from LLMs. We examine these joint predictive distributions, which we call LLM Processes, over arbitrarily-many quantities in settings such as forecasting, multi-dimensional regression, black-box optimization, and image modeling. We investigate the practical details of prompting to elicit coherent predictive distributions, and demonstrate their effectiveness at regression. Finally, we demonstrate the ability to usefully incorporate text into numerical predictions, improving predictive performance and giving quantitative structure that reflects qualitative descriptions. This lets us begin to explore the rich, grounded hypothesis space that LLMs implicitly encode.", "keywords": "Large Language Models;Probabilistic Regression;In-context Learning", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "James Requeima;John F Bronskill;Dami Choi;Richard E. Turner;David Duvenaud", "authorids": "~James_Requeima1;~John_F_Bronskill1;~Dami_Choi1;~Richard_E_Turner1;~David_Duvenaud2", "gender": "M;M;;M;M", "homepage": "http://jamesr.info;;;https://rich-turner-group.github.io/;https://www.cs.toronto.edu/~duvenaud/", "dblp": ";;209/9687;40/5352;86/9380", "google_scholar": "https://scholar.google.ca/citations?hl=en;https://scholar.google.co.nz/citations?user=aH2jZsoAAAAJ;giuZW04AAAAJ;https://scholar.google.co.uk/citations?user=DgLEyZgAAAAJ;https://scholar.google.ca/citations?user=ZLpO3XQAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~James_Requeima1;~John_F_Bronskill1;~Dami_Choi1;~Richard_E_Turner1;~David_Duvenaud2", "aff": "University of Toronto;University of Cambridge;Department of Computer Science, University of Toronto;Microsoft Research;Anthropic", "aff_domain": "cs.toronto;cam.ac.uk;cs.toronto.edu;research.microsoft.com;anthropic.com", "position": "Postdoc;Research Associate;PhD student;Researcher;Researcher", "bibtex": "@inproceedings{\nrequeima2024llm,\ntitle={{LLM} Processes: Numerical Predictive Distributions Conditioned on Natural Language},\nauthor={James Requeima and John F Bronskill and Dami Choi and Richard E. Turner and David Duvenaud},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=HShs7q1Njh}\n}", "github": "", "reviewers": "mWHr;dL9w;Qkoz", "pdf_size": 9198945, "rating": "6;7;9", "confidence": "3;3;4", "soundness": "4;4;4", "novelty": "3;3;4", "presentation": "3;3;4", "wc_summary": "241;49;86", "wc_strengths": "41;32;48", "wc_weaknesses": "9;38;53", "wc_questions": "101;90;2", "wc_limitations": "5;1;12", "wc_review": "397;210;201", "wc_reply_reviewers": "50;16;57", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 7.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 4.0, 0.0 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 125.33333333333333, 83.17184352625326 ], "wc_strengths_avg": [ 40.333333333333336, 6.548960901462833 ], "wc_weaknesses_avg": [ 33.333333333333336, 18.263503375736963 ], "wc_questions_avg": [ 64.33333333333333, 44.304501903180096 ], "wc_limitations_avg": [ 6.0, 4.546060565661952 ], "wc_review_avg": [ 269.3333333333333, 90.34870717884617 ], "wc_reply_reviewers_avg": [ 41.0, 17.90716802475106 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.944911182523068, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12363175659835159386&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "cs.toronto;cam.ac.uk;cs.toronto.edu;research.microsoft.com;anthropic.com", "author_num": 5, "aff_unique_index": "0;1;0;2;3", "aff_unique_norm": "University of Toronto;University of Cambridge;Microsoft;Anthropic", "aff_unique_dep": ";;Microsoft Research;", "aff_unique_url": "https://www.utoronto.ca;https://www.cam.ac.uk;https://www.microsoft.com/en-us/research;https://www.anthropic.com", "aff_unique_abbr": "U of T;Cambridge;MSR;Anthropic", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Cambridge;Toronto", "aff_country_unique_index": "0;1;0;2;2", "aff_country_unique": "Canada;United Kingdom;United States" }, { "title": "Noisy Label Learning with Instance-Dependent Outliers: Identifiability via Crowd Wisdom", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95831", "id": "HTLJptF7qM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=HTLJptF7qM", "openreview": "https://openreview.net/forum?id=HTLJptF7qM", "poster": "/media/PosterPDFs/NeurIPS%202024/95831.png?t=1733434724.756096", "project": "", "author_site": "Tri Nguyen, Shahana Ibrahim, Xiao Fu", "tldr": "", "abstract": "The generation of label noise is often modeled as a process involving a probability transition matrix (also interpreted as the _annotator confusion matrix_) imposed onto the label distribution. Under this model, learning the ``ground-truth classifier''---i.e., the classifier that can be learned if no noise was present---and the confusion matrix boils down to a model identification problem. Prior works along this line demonstrated appealing empirical performance, yet identifiability of the model was mostly established by assuming an instance-invariant confusion matrix. Having an (occasionally) instance-dependent confusion matrix across data samples is apparently more realistic, but inevitably introduces outliers to the model. Our interest lies in confusion matrix-based noisy label learning with such outliers taken into consideration. We begin with pointing out that under the model of interest, using labels produced by only one annotator is fundamentally insufficient to detect the outliers or identify the ground-truth classifier. Then, we prove that by employing a crowdsourcing strategy involving multiple annotators, a carefully designed loss function can establish the desired model identifiability under reasonable conditions. Our development builds upon a link between the noisy label model and a column-corrupted matrix factorization mode---based on which we show that crowdsourced annotations distinguish nominal data and instance-dependent outliers using a low-dimensional subspace. Experiments show that our learning scheme substantially improves outlier detection and the classifier's testing accuracy.", "keywords": "Noisy label;instance-dependent label noise;sample selection;end-to-end learning;identifiability;crowdsourcing", "primary_area": "learning_theory", "supplementary_material": "/attachment/e15faf63d77c25a54ae36fda5a95792b1bb06823.zip", "author": "Tri Nguyen;Shahana Ibrahim;Xiao Fu", "authorids": "~Tri_Nguyen2;~Shahana_Ibrahim1;~Xiao_Fu1", "gender": ";M;M", "homepage": "https://shahanaibrahimosu.github.io/;https://web.engr.oregonstate.edu/~fuxia/;https://ductri.github.io/", "dblp": "243/9707;60/4601-1;", "google_scholar": "2_NYo1AAAAAJ;pDnpH1MAAAAJ;https://scholar.google.com.vn/citations?user=0nNPBC0AAAAJ", "orcid": ";;0000-0003-1986-9841", "linkedin": ";;tri-nguyen-cs/", "or_profile": "~Shahana_Ibrahim1;~Xiao_Fu1;~Tri_Duc_Nguyen1", "aff": "University of Central Florida;Oregon State University;Oregon State University", "aff_domain": "ucf.edu;oregonstate.edu;oregonstate.edu", "position": "Assistant Professor;Associate Professor;PhD student", "bibtex": "@inproceedings{\nnguyen2024noisy,\ntitle={Noisy Label Learning with Instance-Dependent Outliers: Identifiability via Crowd Wisdom},\nauthor={Tri Nguyen and Shahana Ibrahim and Xiao Fu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=HTLJptF7qM}\n}", "github": "", "reviewers": "FFyM;okUb;BkAV;TTRF", "pdf_size": 1866087, "rating": "5;6;7;7", "confidence": "5;4;3;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "87;95;90;150", "wc_strengths": "44;54;47;87", "wc_weaknesses": "46;145;6;145", "wc_questions": "72;72;16;19", "wc_limitations": "1;2;1;52", "wc_review": "250;368;160;453", "wc_reply_reviewers": "0;0;0;223", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 105.5, 25.85053190942113 ], "wc_strengths_avg": [ 58.0, 17.131841699011815 ], "wc_weaknesses_avg": [ 85.5, 61.15758334008956 ], "wc_questions_avg": [ 44.75, 27.270634389394026 ], "wc_limitations_avg": [ 14.0, 21.94310825749169 ], "wc_review_avg": [ 307.75, 111.68342535936118 ], "wc_reply_reviewers_avg": [ 55.75, 96.5618325219649 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8528028654224417, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14629090465063240218&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "ucf.edu;oregonstate.edu;oregonstate.edu", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "University of Central Florida;Oregon State University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucf.edu;https://oregonstate.edu", "aff_unique_abbr": "UCF;OSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Local and Adaptive Mirror Descents in Extensive-Form Games", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95830", "id": "HU2uyDjAcy", "proceeding": "", "pdf": "https://openreview.net/pdf?id=HU2uyDjAcy", "openreview": "https://openreview.net/forum?id=HU2uyDjAcy", "poster": "", "project": "", "author_site": "C\u00f4me Fiegel, Pierre M\u00e9nard, Tadashi Kozuno, Remi Munos, Vianney Perchet, Michal Valko", "tldr": "", "abstract": "We study how to learn $\\epsilon$-optimal strategies in zero-sum imperfect information games (IIG) with *trajectory feedback*. In this setting, players update their policies sequentially, based on their observations over a fixed number of episodes denoted by $T$. Most existing procedures suffer from high variance due to the use of importance sampling over sequences of actions. To reduce this variance, we consider a *fixed sampling* approach, where players still update their policies over time, but with observations obtained through a given fixed sampling policy. Our approach is based on an adaptive Online Mirror Descent (OMD) algorithm that applies OMD locally to each information set, using individually decreasing learning rates and a *regularized loss*. We show that this approach guarantees a convergence rate of $\\tilde{\\mathcal{O}}(T^{-1/2})$ with high probability and has a near-optimal dependence on the game parameters when applied with the best theoretical choices of learning rates and sampling policies. To achieve these results, we generalize the notion of OMD stabilization, allowing for time-varying regularization with convex increments.", "keywords": "online learning;game theory;extensive-form games;mirror descent", "primary_area": "online_learning", "supplementary_material": "", "author": "C\u00f4me Fiegel;Pierre Menard;Tadashi Kozuno;Remi Munos;Vianney Perchet;Michal Valko", "authorids": "~C\u00f4me_Fiegel1;~Pierre_Menard2;~Tadashi_Kozuno1;~Remi_Munos1;~Vianney_Perchet3;~Michal_Valko1", "gender": "M;M;M;M;Not Specified;M", "homepage": ";;http://researchers.lille.inria.fr/~munos/;https://misovalko.github.io/research.html;https://menardprr.github.io/;", "dblp": ";207/8504;69/6815;03/5455;176/5039;83/7398", "google_scholar": ";4VJmx8QAAAAJ;https://scholar.google.com/citations?hl=en;jrazNCQAAAAJ;KXimUncAAAAJ;", "orcid": "0000-0001-9935-4981;;;;;", "linkedin": ";;;michalvalko/;;", "or_profile": "~C\u00f4me_Fiegel1;~Tadashi_Kozuno1;~Remi_Munos1;~Michal_Valko1;~Pierre_MENARD1;~Vianney_Perchet1", "aff": ";OMRON SINIC X;Google DeepMind;Meta;;", "aff_domain": ";sinicx.com;google.com;meta.com;;", "position": ";Researcher;Research scientist;Principal Researcher;;", "bibtex": "@inproceedings{\nfiegel2024local,\ntitle={Local and Adaptive Mirror Descents in Extensive-Form Games},\nauthor={C{\\^o}me Fiegel and Pierre Menard and Tadashi Kozuno and Remi Munos and Vianney Perchet and Michal Valko},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=HU2uyDjAcy}\n}", "github": "", "reviewers": "1VRY;HP7A;yah9", "pdf_size": 568020, "rating": "6;7;7", "confidence": "4;3;2", "soundness": "3;3;3", "novelty": "3;2;3", "presentation": "2;3;3", "wc_summary": "166;28;42", "wc_strengths": "55;23;85", "wc_weaknesses": "268;234;116", "wc_questions": "8;21;68", "wc_limitations": "1;28;1", "wc_review": "498;334;312", "wc_reply_reviewers": "274;200;0", "wc_reply_authors": "0;128;0", "reply_reviewers": "1;1;0", "reply_authors": "1;2;1", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 78.66666666666667, 62.017918557641245 ], "wc_strengths_avg": [ 54.333333333333336, 25.315783394730033 ], "wc_weaknesses_avg": [ 206.0, 65.13575566972925 ], "wc_questions_avg": [ 32.333333333333336, 25.772509040103607 ], "wc_limitations_avg": [ 10.0, 12.727922061357855 ], "wc_review_avg": [ 381.3333333333333, 82.98326471176115 ], "wc_reply_reviewers_avg": [ 158.0, 115.73533024390895 ], "wc_reply_authors_avg": [ 42.666666666666664, 60.339778661252055 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1516050743559626417&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": ";sinicx.com;google.com;meta.com;;", "author_num": 6, "aff_unique_index": "0;1;2", "aff_unique_norm": "OMRON Corporation;Google;Meta", "aff_unique_dep": "SINIC X;Google DeepMind;Meta Platforms, Inc.", "aff_unique_url": "https://www.omron.com;https://deepmind.com;https://meta.com", "aff_unique_abbr": "OMRON;DeepMind;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2", "aff_country_unique": "Japan;United Kingdom;United States" }, { "title": "HEALNet: Multimodal Fusion for Heterogeneous Biomedical Data", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95829", "id": "HUxtJcQpDS", "proceeding": "", "pdf": "https://openreview.net/pdf?id=HUxtJcQpDS", "openreview": "https://openreview.net/forum?id=HUxtJcQpDS", "poster": "/media/PosterPDFs/NeurIPS%202024/95829.png?t=1731481465.9701295", "project": "", "author_site": "Konstantin Hemker, Nikola Simidjievski, Mateja Jamnik", "tldr": "", "abstract": "Technological advances in medical data collection, such as high-throughput genomic sequencing and digital high-resolution histopathology, have contributed to the rising requirement for multimodal biomedical modelling, specifically for image, tabular and graph data. Most multimodal deep learning approaches use modality-specific architectures that are often trained separately and cannot capture the crucial cross-modal information that motivates the integration of different data sources. This paper presents the **H**ybrid **E**arly-fusion **A**ttention **L**earning **Net**work (HEALNet) \u2013 a flexible multimodal fusion architecture, which: a) preserves modality-specific structural information, b) captures the cross-modal interactions and structural information in a shared latent space, c) can effectively handle missing modalities during training and inference, and d) enables intuitive model inspection by learning on the raw data input instead of opaque embeddings. We conduct multimodal survival analysis on Whole Slide Images and Multi-omic data on four cancer datasets from The Cancer Genome Atlas (TCGA). HEALNet achieves state-of-the-art performance compared to other end-to-end trained fusion models, substantially improving over unimodal and multimodal baselines whilst being robust in scenarios with missing modalities. The code is available at https://github.com/konst-int-i/healnet.", "keywords": "multimodal;fusion;computational pathology;multiomic analyses", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "/attachment/2581cf2ade1eff2217fb3852d4a7f52dc306dfeb.zip", "author": "Konstantin Hemker;Nikola Simidjievski;Mateja Jamnik", "authorids": "~Konstantin_Hemker1;~Nikola_Simidjievski1;~Mateja_Jamnik1", "gender": ";Unspecified;F", "homepage": ";https://simidjievskin.github.io/;http://www.cl.cam.ac.uk/~mj201", "dblp": ";;41/1392", "google_scholar": "SJVH3nIAAAAJ;;d5QiyJkAAAAJ", "orcid": ";;0000-0003-2772-2532", "linkedin": "konstantin-hemker-b04250a6/;;", "or_profile": "~Konstantin_Hemker1;~Nikola_Simidjievski1;~Mateja_Jamnik1", "aff": "University of Cambridge;University of Cambridge;University of Cambridge", "aff_domain": "cam.ac.uk;cam.ac.uk;cam.ac.uk", "position": "PhD student;Principal Researcher;Professor in Artificial Intelligence", "bibtex": "@inproceedings{\nhemker2024healnet,\ntitle={{HEALN}et: Multimodal Fusion for Heterogeneous Biomedical Data},\nauthor={Konstantin Hemker and Nikola Simidjievski and Mateja Jamnik},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=HUxtJcQpDS}\n}", "github": "", "reviewers": "a9ao;YCoh;5LQ9;ZYCi", "pdf_size": 3495637, "rating": "4;5;5;7", "confidence": "4;4;3;4", "soundness": "2;3;3;4", "novelty": "2;3;3;3", "presentation": "3;3;2;4", "wc_summary": "66;80;53;87", "wc_strengths": "37;46;22;104", "wc_weaknesses": "128;141;137;103", "wc_questions": "44;58;88;200", "wc_limitations": "57;27;76;8", "wc_review": "332;352;376;502", "wc_reply_reviewers": "0;217;75;46", "wc_reply_authors": "142;257;408;0", "reply_reviewers": "0;2;2;1", "reply_authors": "3;2;3;1", "rating_avg": [ 5.25, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 71.5, 13.0862523283024 ], "wc_strengths_avg": [ 52.25, 31.083556746292725 ], "wc_weaknesses_avg": [ 127.25, 14.771171246722448 ], "wc_questions_avg": [ 97.5, 61.27601488347623 ], "wc_limitations_avg": [ 42.0, 26.277366686941825 ], "wc_review_avg": [ 390.5, 66.23254487032791 ], "wc_reply_reviewers_avg": [ 84.5, 81.038571063414 ], "wc_reply_authors_avg": [ 201.75, 149.88724929092535 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.13245323570650439, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9251220610465858889&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "cam.ac.uk;cam.ac.uk;cam.ac.uk", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Cambridge", "aff_unique_dep": "", "aff_unique_url": "https://www.cam.ac.uk", "aff_unique_abbr": "Cambridge", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Cambridge", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "BeanCounter: A low-toxicity, large-scale, and open dataset of business-oriented text", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97771", "id": "HV5JhUZGpP", "proceeding": "", "pdf": "https://openreview.net/pdf?id=HV5JhUZGpP", "openreview": "https://openreview.net/forum?id=HV5JhUZGpP", "poster": "/media/PosterPDFs/NeurIPS%202024/97771.png?t=1731710691.6061826", "project": "", "author_site": "Siyan Wang, Bradford Levy", "tldr": "", "abstract": "Many of the recent breakthroughs in language modeling have resulted from scaling effectively the same model architecture to larger datasets. In this vein, recent work has highlighted performance gains from increasing training dataset size and quality, suggesting a need for novel sources of large-scale datasets. In this work, we introduce BeanCounter, a public dataset consisting of more than 159B tokens extracted from businesses' disclosures. We show that this data is indeed novel: less than 0.1% of BeanCounter appears in Common Crawl-based datasets and it is an order of magnitude larger than datasets relying on similar sources. Given the data's provenance, we hypothesize that BeanCounter is comparatively more factual and less toxic than web-based datasets. Exploring this hypothesis, we find that many demographic identities occur with similar prevalence in BeanCounter but with significantly less toxic context relative to other datasets. To demonstrate the utility of BeanCounter, we evaluate and compare two LLMs continually pre-trained on BeanCounter with their base models. We find an 18-33% reduction in toxic generation and improved performance within the finance domain for the continually pretrained models. Collectively, our work suggests that BeanCounter is a novel source of low-toxicity and high-quality domain-specific data with sufficient scale to train multi-billion parameter LLMs.", "keywords": "toxicity;bias;finance;large-scale text corpora;large language models", "primary_area": "", "supplementary_material": "/attachment/af6d9b8b51ffbc25ff3968719ca3f157e129b644.zip", "author": "Siyan Wang;Bradford Levy", "authorids": "~Siyan_Wang1;~Bradford_Levy1", "gender": "F;Not Specified", "homepage": ";https://www.bradfordlynch.com/", "dblp": ";", "google_scholar": ";6ozY3XQAAAAJ", "orcid": ";0000-0001-5123-0197", "linkedin": "carriesyw;", "or_profile": "~Siyan_Wang1;~Bradford_Levy1", "aff": "University of Chicago;University of Chicago", "aff_domain": "uchicago.edu;uchicago.edu", "position": "Researcher;Assistant Professor", "bibtex": "@inproceedings{\nwang2024beancounter,\ntitle={BeanCounter: A low-toxicity, large-scale, and open dataset of business-oriented text},\nauthor={Siyan Wang and Bradford Levy},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=HV5JhUZGpP}\n}", "github": "", "reviewers": "M1Cw;Hg9p", "pdf_size": 7775699, "rating": "6;8", "confidence": "5;4", "wc_summary_and_contributions": "168;85", "wc_strengths": "105;48", "wc_improvement": "388;88", "wc_limitations": "12;44", "wc_correctness": "5;24", "wc_clarity": "15;12", "wc_relation_to_prior_work": "5;16", "wc_documentation": "12;2", "wc_additional_feedback": "1;1", "wc_review": "711;320", "wc_reply_reviewers": "0;62", "wc_reply_authors": "0;0", "reply_reviewers": "0;1", "reply_authors": "1;1", "rating_avg": [ 7.0, 1.0 ], "confidence_avg": [ 4.5, 0.5 ], "wc_summary_and_contributions_avg": [ 126.5, 41.5 ], "wc_strengths_avg": [ 76.5, 28.5 ], "wc_improvement_avg": [ 238.0, 150.0 ], "wc_limitations_avg": [ 28.0, 16.0 ], "wc_correctness_avg": [ 14.5, 9.5 ], "wc_clarity_avg": [ 13.5, 1.5 ], "wc_relation_to_prior_work_avg": [ 10.5, 5.5 ], "wc_documentation_avg": [ 7.0, 5.0 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 515.5, 195.5 ], "wc_reply_reviewers_avg": [ 31.0, 31.0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.9999999999999999, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15490464963667358496&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "uchicago.edu;uchicago.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Chicago", "aff_unique_dep": "", "aff_unique_url": "https://www.uchicago.edu", "aff_unique_abbr": "UChicago", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "No-regret Learning in Harmonic Games: Extrapolation in the Face of Conflicting Interests", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95828", "id": "HW9S9vY5gZ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=HW9S9vY5gZ", "openreview": "https://openreview.net/forum?id=HW9S9vY5gZ", "poster": "/media/PosterPDFs/NeurIPS%202024/95828.png?t=1733877473.992608", "project": "", "author_site": "Davide Legacci, Panayotis Mertikopoulos, Christos Papadimitriou, Georgios Piliouras, Bary Pradelski", "tldr": "", "abstract": "The long-run behavior of multi-agent online learning -- and, in particular, no-regret learning -- is relatively well-understood in potential games, where players have common interests. By contrast, in general harmonic games -- the strategic complement of potential games, where players have competing interests -- very little is known outside the narrow subclass of $2$-player zero-sum games with a fully-mixed equilibrium. Our paper seeks to partially fill this gap by focusing on the full class of (generalized) harmonic games and examining the convergence properties of \"follow-the-regularized-leader\" (FTRL), the most widely studied class of no-regret learning schemes. As a first result, we show that the continuous-time dynamics of FTRL are Poincar\u00e9 recurrent, i.e., they return arbitrarily close to their starting point infinitely often, and hence fail to converge. In discrete time, the standard, \"vanilla\" implementation of FTRL may lead to even worse outcomes, eventually trapping the players in a perpetual cycle of best-responses. However, if FTRL is augmented with a suitable extrapolation step -- which includes as special cases the optimistic and mirror-prox variants of FTRL -- we show that learning converges to a Nash equilibrium from any initial condition, and all players are guaranteed at most $\\mathcal{O}(1)$ regret. These results provide an in-depth understanding of no-regret learning in harmonic games, nesting prior work on $2$-player zero-sum games, and showing at a high level that potential and harmonic games are complementary not only from the strategic but also from the dynamic viewpoint.", "keywords": "learning in games;no-regret algorithms;FTRL;harmonic games;Poincar\u00e9 recurrence", "primary_area": "algorithmic_game_theory", "supplementary_material": "", "author": "Davide Legacci;Panayotis Mertikopoulos;Christos Papadimitriou;Georgios Piliouras;Bary Pradelski", "authorids": "~Davide_Legacci1;~Panayotis_Mertikopoulos1;~Christos_Papadimitriou2;~Georgios_Piliouras1;~Bary_Pradelski1", "gender": "M;M;M;;", "homepage": "https://davidelegacci.it/;http://polaris.imag.fr/panayotis.mertikopoulos/;;;https://barypradelski.com/", "dblp": ";49/6721;p/CHPapadimitriou;62/1236;46/11488", "google_scholar": "zzxwDuAAAAAJ;xsusqPYAAAAJ;;;", "orcid": "0009-0009-3069-2785;0000-0003-2026-9616;;;", "linkedin": ";;;;", "or_profile": "~Davide_Legacci1;~Panayotis_Mertikopoulos1;~Christos_Papadimitriou2;~Georgios_Piliouras1;~Bary_Pradelski1", "aff": "Universit\u00e9 Grenoble Alpes;French National Center for Scientific Research;Columbia University;Singapore University of Technology and Design;CNRS", "aff_domain": "univ-grenoble-alpes.fr;imag.fr;columbia.edu;sutd.edu.sg;cnrs.fr", "position": "PhD student;Principal Researcher;Full Professor;Associate Professor;Researcher", "bibtex": "@inproceedings{\nlegacci2024noregret,\ntitle={No-regret Learning in Harmonic Games: Extrapolation in the Face of Conflicting Interests},\nauthor={Davide Legacci and Panayotis Mertikopoulos and Christos Papadimitriou and Georgios Piliouras and Bary Pradelski},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=HW9S9vY5gZ}\n}", "github": "", "reviewers": "sMJW;aEEw;gj66;1Wrk", "pdf_size": 1579315, "rating": "6;7;7;7", "confidence": "4;3;4;5", "soundness": "4;4;4;3", "novelty": "3;4;3;3", "presentation": "4;4;3;3", "wc_summary": "66;89;121;138", "wc_strengths": "172;71;92;59", "wc_weaknesses": "142;33;110;163", "wc_questions": "3;50;74;76", "wc_limitations": "7;11;1;2", "wc_review": "390;254;398;438", "wc_reply_reviewers": "19;30;61;203", "wc_reply_authors": "14;14;175;32", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 103.5, 27.897132469126642 ], "wc_strengths_avg": [ 98.5, 44.048268978474056 ], "wc_weaknesses_avg": [ 112.0, 49.36091571273774 ], "wc_questions_avg": [ 50.75, 29.40556920040828 ], "wc_limitations_avg": [ 5.25, 4.02336923485777 ], "wc_review_avg": [ 370.0, 69.39740629158989 ], "wc_reply_reviewers_avg": [ 78.25, 73.65247789450129 ], "wc_reply_authors_avg": [ 58.75, 67.51805314136361 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6089344470158925018&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "univ-grenoble-alpes.fr;imag.fr;columbia.edu;sutd.edu.sg;cnrs.fr", "author_num": 5, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "Universit\u00e9 Grenoble Alpes;French National Center for Scientific Research;Columbia University;Singapore University of Technology and Design;Centre National de la Recherche Scientifique", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.univ-grenoble-alpes.fr;https://www.cnrs.fr;https://www.columbia.edu;https://www.sutd.edu.sg;https://www.cnrs.fr", "aff_unique_abbr": "UGA;CNRS;Columbia;SUTD;CNRS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;2;0", "aff_country_unique": "France;United States;Singapore" }, { "title": "Doing Experiments and Revising Rules with Natural Language and Probabilistic Reasoning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95827", "id": "HXdAfK488A", "proceeding": "", "pdf": "https://openreview.net/pdf?id=HXdAfK488A", "openreview": "https://openreview.net/forum?id=HXdAfK488A", "poster": "", "project": "", "author_site": "Top Piriyakulkij, Cassidy Langenfeld, Tuan Anh Le, Kevin Ellis", "tldr": "", "abstract": "We give a model of how to infer natural language rules by doing experiments. The\nmodel integrates Large Language Models (LLMs) with Monte Carlo algorithms for\nprobabilistic inference, interleaving online belief updates with experiment design\nunder information-theoretic criteria. We conduct a human-model comparison on a\nZendo-style task, finding that a critical ingredient for modeling the human data is to\nassume that humans also consider fuzzy, probabilistic rules, in addition to assuming\nthat humans perform approximately-Bayesian belief updates. We also compare\nwith recent algorithms for using LLMs to generate and revise hypotheses, finding\nthat our online inference method yields higher accuracy at recovering the true\nunderlying rule, and provides better support for designing optimal experiments.", "keywords": "induction;LLM;active learning", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "", "author": "Wasu Top Piriyakulkij;Cassidy Langenfeld;Tuan Anh Le;Kevin Ellis", "authorids": "~Wasu_Top_Piriyakulkij1;~Cassidy_Langenfeld1;~Tuan_Anh_Le1;~Kevin_Ellis1", "gender": "F;M;M;M", "homepage": ";https://www.tuananhle.co.uk;https://www.cs.cornell.edu/~ellisk/;https://www.cs.cornell.edu/~wp237/", "dblp": ";76/10097-1;;305/7203", "google_scholar": ";https://scholar.google.co.uk/citations?user=tkceMM0AAAAJ;L7XI6asAAAAJ;nlO1TkkAAAAJ", "orcid": ";;;", "linkedin": "cassidy-langenfeld-8a13231b5/;;;", "or_profile": "~Cassidy_Langenfeld1;~Tuan_Anh_Le1;~Kevin_Ellis1;~Top_Piriyakulkij1", "aff": "Department of Computer Science, Cornell University;Google Research;Cornell University;Cornell University", "aff_domain": "cs.cornell.edu;google.com;cornell.edu;cs.cornell.edu", "position": "MS student;Research Scientist;Assistant Professor;PhD student", "bibtex": "@inproceedings{\npiriyakulkij2024doing,\ntitle={Doing Experiments and Revising Rules with Natural Language and Probabilistic Reasoning},\nauthor={Wasu Top Piriyakulkij and Cassidy Langenfeld and Tuan Anh Le and Kevin Ellis},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=HXdAfK488A}\n}", "github": "", "reviewers": "zQ3i;nwGu;C8N9;i693", "pdf_size": 1368965, "rating": "4;7;7;7", "confidence": "4;3;4;4", "soundness": "2;3;3;4", "novelty": "2;3;3;3", "presentation": "2;3;3;4", "wc_summary": "144;124;91;111", "wc_strengths": "34;134;73;147", "wc_weaknesses": "335;227;149;119", "wc_questions": "74;190;2;133", "wc_limitations": "38;4;4;3", "wc_review": "625;679;319;513", "wc_reply_reviewers": "0;82;14;33", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 117.5, 19.29378138157474 ], "wc_strengths_avg": [ 97.0, 45.863929181874504 ], "wc_weaknesses_avg": [ 207.5, 83.50299395830068 ], "wc_questions_avg": [ 99.75, 69.76523131187913 ], "wc_limitations_avg": [ 12.25, 14.872373717735847 ], "wc_review_avg": [ 534.0, 137.81509351301113 ], "wc_reply_reviewers_avg": [ 32.25, 31.01914731258743 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2138071734431909740&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "cs.cornell.edu;google.com;cornell.edu;cs.cornell.edu", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Cornell University;Google", "aff_unique_dep": "Department of Computer Science;Google Research", "aff_unique_url": "https://www.cornell.edu;https://research.google", "aff_unique_abbr": "Cornell;Google Research", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Training for Stable Explanation for Free", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95826", "id": "HYa3eu8scG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=HYa3eu8scG", "openreview": "https://openreview.net/forum?id=HYa3eu8scG", "poster": "/media/PosterPDFs/NeurIPS%202024/95826.png?t=1731726559.1722283", "project": "", "author_site": "Chao Chen, Chenghua Guo, Rufeng Chen, Guixiang Ma, Ming Zeng, Xiangwen Liao, Xi Zhang, Sihong Xie", "tldr": "", "abstract": "To foster trust in machine learning models, explanations must be faithful and stable for consistent insights. Existing relevant works rely on the $\\ell_p$ distance for stability assessment, which diverges from human perception. Besides, existing adversarial training (AT) associated with intensive computations may lead to an arms race. To address these challenges, we introduce a novel metric to assess the stability of top-$k$ salient features. We introduce R2ET which trains for stable explanation by efficient and effective regularizer,\nand analyze R2ET by multi-objective optimization to prove numerical and statistical stability of explanations. Moreover, theoretical connections between R2ET and certified robustness justify R2ET's stability in all attacks. Extensive experiments across various data modalities and model architectures show that R2ET achieves superior stability against stealthy attacks, and generalizes effectively across different explanation methods. The code can be found at https://github.com/ccha005/R2ET.", "keywords": "accountable machine learning;stability;transparency;interpretability", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Chao Chen;Chenghua Guo;Rufeng Chen;Guixiang Ma;Ming Zeng;Xiangwen Liao;Xi Zhang;Sihong Xie", "authorids": "~Chao_Chen14;~Chenghua_Guo1;~Rufeng_Chen2;~Guixiang_Ma2;~Ming_Zeng2;~Xiangwen_Liao1;~Xi_Zhang12;~Sihong_Xie1", "gender": ";M;M;;M;M;M;M", "homepage": ";https://github.com/chenghuaguo;https://mail.google.com/mail/u/0/#inbox;https://www.linkedin.com/in/guixiangma/;;;https://www.linkedin.com/in/xi-zhang-a1128b51/;https://sihongxie.github.io/index.html", "dblp": ";337/0557;;66/8585;;50/6801;87/1222-8;67/1229", "google_scholar": ";;https://scholar.google.com.hk/citations?user=0paKpRsAAAAJ;2K81fLYAAAAJ;QXiEicQAAAAJ;;6sRtx0cAAAAJ;qRp1xZwAAAAJ", "orcid": ";;;;;;0000-0002-2111-7385;0000-0003-1060-8506", "linkedin": ";;;;;;;", "or_profile": "~Chao_Chen14;~Chenghua_Guo1;~Rufeng_Chen2;~Guixiang_Ma2;~Ming_Zeng2;~Xiangwen_Liao1;~Xi_Zhang12;~Sihong_Xie1", "aff": ";Beijing University of Posts and Telecommunications;Hangzhou Dianzi University;Intel;Carnegie Mellon University;Fuzhou University;Beijing University of Posts and Telecommunications;HKUST-GZ", "aff_domain": ";bupt.edu.cn;hdu.edu.cn;intel.com;cmu.edu;fzu.edu.cn;bupt.edu.cn;hkust-gz.edu.cn", "position": ";PhD student;MS student;Research Scientist;Postdoc;Full Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nchen2024training,\ntitle={Training for Stable Explanation for Free},\nauthor={Chao Chen and Chenghua Guo and Rufeng Chen and Guixiang Ma and Ming Zeng and Xiangwen Liao and Xi Zhang and Sihong Xie},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=HYa3eu8scG}\n}", "github": "", "reviewers": "4WDh;9MCa;X6uh;VSmU", "pdf_size": 4043896, "rating": "5;6;6;7", "confidence": "4;2;2;4", "soundness": "4;3;2;3", "novelty": "3;2;3;3", "presentation": "3;2;3;3", "wc_summary": "71;266;125;79", "wc_strengths": "48;69;44;185", "wc_weaknesses": "113;908;334;35", "wc_questions": "35;262;32;54", "wc_limitations": "45;52;10;1", "wc_review": "312;1557;545;354", "wc_reply_reviewers": "12;270;57;13", "wc_reply_authors": "4;618;35;15", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.0, 1.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 135.25, 78.25079871796837 ], "wc_strengths_avg": [ 86.5, 57.65630928181234 ], "wc_weaknesses_avg": [ 347.5, 341.68296709083995 ], "wc_questions_avg": [ 95.75, 96.35448873820046 ], "wc_limitations_avg": [ 27.0, 21.874642854227357 ], "wc_review_avg": [ 692.0, 507.0695218606616 ], "wc_reply_reviewers_avg": [ 88.0, 106.63723552305733 ], "wc_reply_authors_avg": [ 168.0, 260.04518838078894 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10190357099832666151&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": ";bupt.edu.cn;hdu.edu.cn;intel.com;cmu.edu;fzu.edu.cn;bupt.edu.cn;hkust-gz.edu.cn", "author_num": 8, "aff_unique_index": "0;1;2;3;4;0;5", "aff_unique_norm": "Beijing University of Posts and Telecommunications;Hangzhou Dianzi University;Intel;Carnegie Mellon University;Fuzhou University;Hong Kong University of Science and Technology (Guangzhou)", "aff_unique_dep": ";;Intel Corporation;;;", "aff_unique_url": "http://www.bupt.edu.cn/;http://www.hdu.edu.cn/;https://www.intel.com;https://www.cmu.edu;https://www.fznu.edu.cn;https://www.ust.hk", "aff_unique_abbr": "BUPT;HGHDU;Intel;CMU;FZU;HKUST", "aff_campus_unique_index": "0;0;2", "aff_campus_unique": "Beijing;;Guangzhou", "aff_country_unique_index": "0;0;1;1;0;0;0", "aff_country_unique": "China;United States" }, { "title": "A probability contrastive learning framework for 3D molecular representation learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95825", "id": "HYiR6tGQPv", "proceeding": "", "pdf": "https://openreview.net/pdf?id=HYiR6tGQPv", "openreview": "https://openreview.net/forum?id=HYiR6tGQPv", "poster": "/media/PosterPDFs/NeurIPS%202024/95825.png?t=1733471203.328255", "project": "", "author_site": "Jiayu Qin, Jian Chen, Rohan Sharma, Jingchen Sun, Changyou Chen", "tldr": "", "abstract": "Contrastive Learning (CL) plays a crucial role in molecular representation learning, enabling unsupervised learning from large scale unlabeled molecule datasets. It has inspired various applications in molecular property prediction and drug design.\nHowever, existing molecular representation learning methods often introduce potential false positive and false negative pairs through conventional graph augmentations like node masking and subgraph removal. The issue can lead to suboptimal performance when applying standard contrastive learning techniques to molecular datasets. \nTo address the issue of false positive and negative pairs in molecular representation learning, we propose a novel probability-based contrastive learning (CL) framework. Unlike conventional methods, our approach introduces a learnable weight distribution via Bayesian modeling to automatically identify and mitigate false positive and negative pairs. This method is particularly effective because it dynamically adjusts to the data, improving the accuracy of the learned representations. Our model is learned by a stochastic expectation-maximization process, which optimizes the model by iteratively refining the probability estimates of sample weights and updating the model parameters.\nExperimental results indicate that our method outperforms existing approaches in 13 out of 15 molecular property prediction benchmarks in MoleculeNet dataset and 8 out of 12 benchmarks in the QM9 benchmark, achieving new state-of-the-art results on average.", "keywords": "contrastive learning; molecular property prediction; expectation maximization", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "/attachment/1ee2dcc9fb27516d22b3566ab59012c332431805.zip", "author": "Jiayu Qin;Jian Chen;Rohan Sharma;Jingchen Sun;Changyou Chen", "authorids": "~Jiayu_Qin1;~Jian_Chen9;~Rohan_Sharma1;~Jingchen_Sun1;~Changyou_Chen1", "gender": "M;M;M;M;M", "homepage": ";https://puar-playground.github.io/CV/;;https://jingchensun.github.io/;https://www.cse.buffalo.edu/~changyou/", "dblp": "263/6664;49/6002-43;;274/1045;65/2802", "google_scholar": ";uBGjz-EAAAAJ;;https://scholar.google.com/citations?hl=en;LtEcKBcAAAAJ", "orcid": ";0000-0002-1999-1137;;;", "linkedin": "jiayu-qin-64314a24b;jian-chen-1a0b9a11b/;rs1561/;jingchensun/;", "or_profile": "~Jiayu_Qin1;~Jian_Chen9;~Rohan_Sharma1;~Jingchen_Sun1;~Changyou_Chen1", "aff": "State University of New York at Buffalo;Mohamed bin Zayed University of Artificial Intelligence;Amazon;NEC;State University of New York, Buffalo", "aff_domain": "buffalo.edu;mbzuai.ac.ae;amazon.com;nec-labs.com;buffalo.edu", "position": "PhD student;Researcher;Applied Scientist Intern;Intern;Assistant Professor", "bibtex": "@inproceedings{\nqin2024a,\ntitle={A probability contrastive learning framework for 3D molecular representation learning},\nauthor={Jiayu Qin and Jian Chen and Rohan Sharma and Jingchen Sun and Changyou Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=HYiR6tGQPv}\n}", "github": "", "reviewers": "oFgE;T3Hf;NjGA;Jokw", "pdf_size": 979440, "rating": "3;6;7;7", "confidence": "4;4;5;5", "soundness": "3;2;3;3", "novelty": "2;2;3;3", "presentation": "3;3;3;2", "wc_summary": "43;78;54;48", "wc_strengths": "28;61;66;36", "wc_weaknesses": "312;159;106;65", "wc_questions": "2;153;131;53", "wc_limitations": "1;1;6;3", "wc_review": "386;452;363;205", "wc_reply_reviewers": "45;16;35;23", "wc_reply_authors": "257;646;24;24", "reply_reviewers": "1;1;1;1", "reply_authors": "3;3;2;2", "rating_avg": [ 5.75, 1.6393596310755 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 55.75, 13.423393758658799 ], "wc_strengths_avg": [ 47.75, 16.099301227071937 ], "wc_weaknesses_avg": [ 160.5, 93.60154913247963 ], "wc_questions_avg": [ 84.75, 60.524271990665035 ], "wc_limitations_avg": [ 2.75, 2.0463381929681126 ], "wc_review_avg": [ 351.5, 90.67110895980042 ], "wc_reply_reviewers_avg": [ 29.75, 11.121488209767612 ], "wc_reply_authors_avg": [ 237.75, 254.17353815847943 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.7624928516630233, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:eBjUX5zoFRsJ:scholar.google.com/&scioq=A+probability+contrastive+learning+framework+for+3D+molecular+representation+learning&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "buffalo.edu;mbzuai.ac.ae;amazon.com;nec-labs.com;buffalo.edu", "author_num": 5, "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "State University of New York at Buffalo;Mohamed bin Zayed University of Artificial Intelligence;Amazon;NEC Corporation", "aff_unique_dep": ";;Amazon.com, Inc.;", "aff_unique_url": "https://www.buffalo.edu;https://mbzuai.ac.ae;https://www.amazon.com;https://www.nec.com", "aff_unique_abbr": "SUNY Buffalo;MBZUAI;Amazon;NEC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Buffalo;", "aff_country_unique_index": "0;1;0;2;0", "aff_country_unique": "United States;United Arab Emirates;Japan" }, { "id": "HYwfZEhyK4", "title": "Swarm Intelligence in Geo-Localization: A Multi-Agent Large Vision-Language Model Collaborative Framework", "track": "main", "status": "Reject", "tldr": "", "abstract": "Visual geo-localization demands in-depth knowledge and advanced reasoning skills to associate images with real-world geographic locations precisely.\nIn general, traditional methods based on data-matching are hindered by the impracticality of storing adequate visual records of global landmarks.\nRecently, Large Vision-Language Models (LVLMs) have demonstrated the capability of geo-localization through Visual Question Answering (VQA), enabling a solution that does not require external geo-tagged image records. However, the performance of a single LVLM is still limited by its intrinsic knowledge and reasoning capabilities.\nAlong this line, in this paper, we introduce a novel visual geo-localization framework called smileGeo that integrates the inherent knowledge of multiple LVLM agents via inter-agent communication to achieve effective geo-localization of images. \nFurthermore, our framework employs a dynamic learning strategy to optimize the communication patterns among agents, reducing unnecessary discussions among agents and improving the efficiency of the framework.\nTo validate the effectiveness of the proposed framework, we construct GeoGlobe, a novel dataset for visual geo-localization tasks. Extensive testing on the dataset demonstrates that our approach significantly outperforms state-of-the-art methods.\nThe source code is available at https://anonymous.4open.science/r/ViusalGeoLocalization-F8F5/ and the dataset will also be released after the paper is accepted.", "keywords": "Visual Geo-Localization;Large Vision-Language Model;Collaboration Social Network", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Xiao Han;Chen Zhu;Xiangyu Zhao;Hengshu Zhu", "authorids": "~Xiao_Han11;~Chen_Zhu5;~Xiangyu_Zhao1;~Hengshu_Zhu1", "gender": "M;;M;", "homepage": "https://hahahenha.github.io/;;https://zhaoxyai.github.io/;http://www.zhuhengshu.com/", "dblp": "01/2095-4;;08/890-1.html;61/10440", "google_scholar": ";;;55MQBzYAAAAJ", "orcid": "0000-0002-3478-964X;;0000-0003-2926-4416;0000-0003-4570-643X", "linkedin": ";;;", "or_profile": "~Xiao_Han11;~Chen_Zhu5;~Xiangyu_Zhao1;~Hengshu_Zhu1", "aff": "City University of Hong Kong;;City University of Hong Kong;Kanzhun Limited (BOSS Zhipin)", "aff_domain": "cityu.edu.hk;;cityu.edu.hk;kanzhun.com", "position": "PhD student;;Assistant Professor;Chief Research Scientist", "bibtex": "@misc{\nanonymous2024swarm,\ntitle={Swarm Intelligence in Geo-Localization: A Multi-Agent Large Vision-Language Model Collaborative Framework},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=HYwfZEhyK4}\n}", "github": "", "project": "", "reviewers": "Jv7J;mcak;EWQq", "site": "https://openreview.net/forum?id=HYwfZEhyK4", "pdf_size": 12602504, "rating": "4;4;6", "confidence": "4;3;4", "soundness": "3;4;4", "novelty": "3;3;4", "presentation": "3;3;3", "wc_summary": "66;128;55", "wc_strengths": "81;83;60", "wc_weaknesses": "21;217;202", "wc_questions": "155;57;3", "wc_limitations": "7;26;8", "wc_review": "330;511;328", "wc_reply_reviewers": "105;218;14", "wc_reply_authors": "95;597;17", "reply_reviewers": "1;2;1", "reply_authors": "2;2;2", "rating_avg": [ 4.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 83.0, 32.13513134665341 ], "wc_strengths_avg": [ 74.66666666666667, 10.402991022884823 ], "wc_weaknesses_avg": [ 146.66666666666666, 89.07050889916121 ], "wc_questions_avg": [ 71.66666666666667, 62.91440393282147 ], "wc_limitations_avg": [ 13.666666666666666, 8.73053390247253 ], "wc_review_avg": [ 389.6666666666667, 85.79950789809669 ], "wc_reply_reviewers_avg": [ 112.33333333333333, 83.44392661475662 ], "wc_reply_authors_avg": [ 236.33333333333334, 257.01015976459416 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.49999999999999983, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4988982369919296244&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "City University of Hong Kong;Kanzhun Limited", "aff_unique_dep": ";", "aff_unique_url": "https://www.cityu.edu.hk;https://www.zhipin.com", "aff_unique_abbr": "CityU;BOSS Zhipin", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Model-free Low-Rank Reinforcement Learning via Leveraged Entry-wise Matrix Estimation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95824", "id": "HavKlV22xJ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=HavKlV22xJ", "openreview": "https://openreview.net/forum?id=HavKlV22xJ", "poster": "", "project": "", "author_site": "Stefan Stojanovic, Yassir Jedra, Alexandre Proutiere", "tldr": "", "abstract": "We consider the problem of learning an $\\varepsilon$-optimal policy in controlled dynamical systems with low-rank latent structure. \nFor this problem, we present LoRa-PI (Low-Rank Policy Iteration), a model-free learning algorithm alternating between policy improvement and policy evaluation steps. In the latter, the algorithm estimates the low-rank matrix corresponding to the (state, action) value function of the current policy using the following two-phase procedure. The entries of the matrix are first sampled uniformly at random to estimate, via a spectral method, the *leverage scores* of its rows and columns. These scores are then used to extract a few important rows and columns whose entries are further sampled. The algorithm exploits these new samples to complete the matrix estimation using a CUR-like method. For this leveraged matrix estimation procedure, we establish entry-wise guarantees that remarkably, do not depend on the coherence of the matrix but only on its spikiness. These guarantees imply that LoRa-PI learns an $\\varepsilon$-optimal policy using $\\tilde{\\cal O}({(S+A)\\over \\mathrm{poly}(1-\\gamma)\\varepsilon^2})$ samples where $S$ (resp. $A$) denotes the number of states (resp. actions) and $\\gamma$ the discount factor. Our algorithm achieves this order-optimal (in $S$, $A$ and $\\varepsilon$) sample complexity under milder conditions than those assumed in previously proposed approaches.", "keywords": "Low-rank RL; Entry-wise matrix estimation; Matrix completion", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/07ebebf2fa0ebf3b8845e6e71c537744e88a1c27.zip", "author": "Stefan Stojanovic;Yassir Jedra;Alexandre Proutiere", "authorids": "~Stefan_Stojanovic1;~Yassir_Jedra1;~Alexandre_Proutiere1", "gender": "M;;M", "homepage": "https://www.kth.se/profile/stesto;https://sites.google.com/view/yassir-jedra/home?authuser=1;https://people.kth.se/~alepro/", "dblp": "315/5080;238/0358;p/AlexandreProutiere", "google_scholar": "jCkz9ykAAAAJ;tePNfWQAAAAJ;g5sya5cAAAAJ", "orcid": ";;", "linkedin": ";yassirjedra/;", "or_profile": "~Stefan_Stojanovic1;~Yassir_Jedra1;~Alexandre_Proutiere1", "aff": "KTH Royal Institute of Technology;Massachusetts Institute of Technology;KTH Royal Institute of Technology, Stockholm, Sweden", "aff_domain": "kth.se;mit.edu;kth.se", "position": "PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\nstojanovic2024modelfree,\ntitle={Model-free Low-Rank Reinforcement Learning via Leveraged Entry-wise Matrix Estimation},\nauthor={Stefan Stojanovic and Yassir Jedra and Alexandre Proutiere},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=HavKlV22xJ}\n}", "github": "", "reviewers": "ff2R;9Ehq;Fr8E;WY9t", "pdf_size": 1674360, "rating": "4;5;5;6", "confidence": "3;3;3;2", "soundness": "3;2;3;3", "novelty": "2;2;2;3", "presentation": "3;2;2;3", "wc_summary": "148;151;73;66", "wc_strengths": "42;107;24;73", "wc_weaknesses": "65;195;68;40", "wc_questions": "46;274;2;38", "wc_limitations": "12;1;2;93", "wc_review": "313;728;169;310", "wc_reply_reviewers": "0;377;39;0", "wc_reply_authors": "91;723;59;39", "reply_reviewers": "0;1;1;0", "reply_authors": "3;4;3;2", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 109.5, 40.09052257080219 ], "wc_strengths_avg": [ 61.5, 31.579265349276255 ], "wc_weaknesses_avg": [ 92.0, 60.45246066125018 ], "wc_questions_avg": [ 90.0, 107.5174404457249 ], "wc_limitations_avg": [ 27.0, 38.34709897762802 ], "wc_review_avg": [ 380.0, 209.1733730664589 ], "wc_reply_reviewers_avg": [ 104.0, 158.41874889040122 ], "wc_reply_authors_avg": [ 228.0, 286.3895947830507 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 3.0, 0.7071067811865476 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3897114783121108406&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "kth.se;mit.edu;kth.se", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "KTH Royal Institute of Technology;Massachusetts Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.kth.se;https://web.mit.edu", "aff_unique_abbr": "KTH;MIT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stockholm", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Sweden;United States" }, { "title": "Structured flexibility in recurrent neural networks via neuromodulation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95823", "id": "HbIBqn3grD", "proceeding": "", "pdf": "https://openreview.net/pdf?id=HbIBqn3grD", "openreview": "https://openreview.net/forum?id=HbIBqn3grD", "poster": "", "project": "", "author_site": "Julia Costacurta, Shaunak Bhandarkar, David Zoltowski, Scott Linderman", "tldr": "", "abstract": "A core aim in theoretical and systems neuroscience is to develop models which help us better understand biological intelligence. \nSuch models range broadly in both complexity and biological plausibility. \nOne widely-adopted example is task-optimized recurrent neural networks (RNNs), which have been used to generate hypotheses about how the brain\u2019s neural dynamics may organize to accomplish tasks. \nHowever, task-optimized RNNs typically have a fixed weight matrix representing the synaptic connectivity between neurons. From decades of neuroscience research, we know that synaptic weights are constantly changing, controlled in part by chemicals such as neuromodulators. \nIn this work we explore the computational implications of synaptic gain scaling, a form of neuromodulation, using task-optimized low-rank RNNs.\nIn our neuromodulated RNN (NM-RNN) model, a neuromodulatory subnetwork outputs a low-dimensional neuromodulatory signal that dynamically scales the low-rank recurrent weights of an output-generating RNN. \nIn empirical experiments, we find that the structured flexibility in the NM-RNN allows it to both train and generalize with a higher degree of accuracy than low-rank RNNs on a set of canonical tasks.\nAdditionally, via theoretical analyses we show how neuromodulatory gain scaling endows networks with gating mechanisms commonly found in artificial RNNs. \nWe end by analyzing the low-rank dynamics of trained NM-RNNs, to show how task computations are distributed.", "keywords": "recurrent neural networks;neuromodulation;low-rank recurrent neural networks;timing;biological computation", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "/attachment/03b51662d5e35c30dd6178eae266575c709f125c.zip", "author": "Julia C Costacurta;Shaunak Bhandarkar;David M. Zoltowski;Scott Linderman", "authorids": "~Julia_C_Costacurta1;~Shaunak_Bhandarkar1;~David_M._Zoltowski1;~Scott_Linderman1", "gender": "M;M;F;M", "homepage": ";https://www.davidzoltowski.com/;https://jcostacurta11.github.io/;https://web.stanford.edu/~swl1/", "dblp": ";158/5356;;142/2484", "google_scholar": ";ZnxTn6IAAAAJ;dI-yFpQAAAAJ;6mD3I24AAAAJ", "orcid": ";;;", "linkedin": "shaunak-bhandarkar-879160229/;;;", "or_profile": "~Shaunak_Bhandarkar1;~David_M._Zoltowski1;~Julia_Christina_Costacurta1;~Scott_W_Linderman1", "aff": "Stanford University;Stanford University;Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu;stanford.edu", "position": "Undergrad student;Postdoc;PhD student;Assistant Professor", "bibtex": "@inproceedings{\ncostacurta2024structured,\ntitle={Structured flexibility in recurrent neural networks via neuromodulation},\nauthor={Julia C Costacurta and Shaunak Bhandarkar and David M. Zoltowski and Scott Linderman},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=HbIBqn3grD}\n}", "github": "", "reviewers": "JBgz;rG4S;Eb4N;znSr", "pdf_size": 5734918, "rating": "5;6;6;7", "confidence": "4;4;4;2", "soundness": "2;3;3;4", "novelty": "3;3;2;3", "presentation": "2;3;3;3", "wc_summary": "84;123;104;104", "wc_strengths": "80;72;110;93", "wc_weaknesses": "99;277;295;303", "wc_questions": "50;64;184;356", "wc_limitations": "14;15;18;9", "wc_review": "327;551;711;865", "wc_reply_reviewers": "107;124;130;55", "wc_reply_authors": "0;457;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 103.75, 13.790848414800301 ], "wc_strengths_avg": [ 88.75, 14.376630342329875 ], "wc_weaknesses_avg": [ 243.5, 83.95683414707823 ], "wc_questions_avg": [ 163.5, 122.73854325353548 ], "wc_limitations_avg": [ 14.0, 3.24037034920393 ], "wc_review_avg": [ 613.5, 199.21533575505677 ], "wc_reply_reviewers_avg": [ 104.0, 29.52117883825102 ], "wc_reply_authors_avg": [ 114.25, 197.88680476474423 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17363945375166827108&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "stanford.edu;stanford.edu;stanford.edu;stanford.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Mixture of Nested Experts: Adaptive Processing of Visual Tokens", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95822", "id": "HbV5vRJMOY", "proceeding": "", "pdf": "https://openreview.net/pdf?id=HbV5vRJMOY", "openreview": "https://openreview.net/forum?id=HbV5vRJMOY", "poster": "/media/PosterPDFs/NeurIPS%202024/95822.png?t=1733558483.5664923", "project": "", "author_site": "Gagan Jain, Nidhi Hegde, Aditya Kusupati, Arsha Nagrani, Shyamal Buch, Prateek Jain, Anurag Arnab, Sujoy Paul", "tldr": "", "abstract": "The visual medium (images and videos) naturally contains a large amount of information redundancy, thereby providing a great opportunity for leveraging efficiency in processing. While Vision Transformer (ViT) based models scale effectively to large data regimes, they fail to capitalize on this inherent redundancy, leading to higher computational costs. Mixture of Experts (MoE) networks demonstrate scalability while maintaining same inference-time costs, but they come with a larger parameter footprint. We present Mixture of Nested Experts (MoNE), which utilizes a nested structure for experts, wherein individual experts fall on an increasing compute-accuracy curve. Given a compute budget, MoNE learns to dynamically choose tokens in a priority order, and thus redundant tokens are processed through cheaper nested experts. Using this framework, we achieve equivalent performance as the baseline models, while reducing inference time compute by over two-fold. We validate our approach on standard image and video datasets - ImageNet-21K, Kinetics400, and Something-Something-v2. We further highlight MoNE's adaptability by showcasing its ability to maintain strong performance across different inference-time compute budgets on videos, using only a single trained model.", "keywords": "Mixture of Experts;Matryoshka Representation Learning;Information Compression;Efficient Inference", "primary_area": "machine_vision", "supplementary_material": "", "author": "Gagan Jain;Nidhi Hegde;Aditya Kusupati;Arsha Nagrani;Shyamal Buch;Prateek Jain;Anurag Arnab;Sujoy Paul", "authorids": "~Gagan_Jain1;~Nidhi_Hegde2;~Aditya_Kusupati1;~Arsha_Nagrani2;~Shyamal_Buch1;~Prateek_Jain1;~Anurag_Arnab1;~Sujoy_Paul1", "gender": "M;F;M;;Unspecified;M;;M", "homepage": "https://gag-j.github.io/;https://www.linkedin.com/in/nidhi-hegde-7ab217195;http://www.adityakusupati.com/;;https://cs.stanford.edu/~shyamal;http://prateekjain.org;;https://intra.ece.ucr.edu/~supaul/", "dblp": "383/7063;15/6158-2;231/7662;;207/8458;https://dblp.uni-trier.de/pers/j/Jain_0002:Prateek.html;;138/6200", "google_scholar": "qsIjwG4AAAAJ;;https://scholar.google.co.in/citations?user=qULx8g8AAAAJ;;https://scholar.google.com/citations?hl=en;qYhRbJoAAAAJ;;Iq8BQUYAAAAJ", "orcid": "0009-0007-8394-9543;;0000-0001-8455-1851;;;;;", "linkedin": "gaganjain15/;;adityakusupati/;;;;;", "or_profile": "~Gagan_Jain1;~Nidhi_Hegde2;~Aditya_Kusupati1;~Arsha_Nagrani2;~Shyamal_Buch1;~Prateek_Jain1;~Anurag_Arnab1;~Sujoy_Paul1", "aff": "Google DeepMind;Google;Department of Computer Science, University of Washington;;Google DeepMind;Google;;Google", "aff_domain": "google.com;google.com;cs.washington.edu;;google.com;google.com;;google.com", "position": "Researcher;Researcher;PhD student;;Researcher;Researcher;;Researcher", "bibtex": "@inproceedings{\njain2024mixture,\ntitle={Mixture of Nested Experts: Adaptive Processing of Visual Tokens},\nauthor={Gagan Jain and Nidhi Hegde and Aditya Kusupati and Arsha Nagrani and Shyamal Buch and Prateek Jain and Anurag Arnab and Sujoy Paul},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=HbV5vRJMOY}\n}", "github": "", "reviewers": "R1sX;YGXi;kK61;E3Y8;LDxY", "pdf_size": 3493578, "rating": "4;6;6;6;7", "confidence": "4;4;4;4;4", "soundness": "3;2;3;3;3", "novelty": "2;3;3;3;3", "presentation": "3;2;3;3;4", "wc_summary": "129;13;83;110;131", "wc_strengths": "63;26;72;57;42", "wc_weaknesses": "363;56;106;385;96", "wc_questions": "61;98;2;5;920", "wc_limitations": "55;46;6;16;6", "wc_review": "671;239;269;573;1195", "wc_reply_reviewers": "387;0;20;95;297", "wc_reply_authors": "589;0;10;70;108", "reply_reviewers": "1;0;1;1;1", "reply_authors": "2;1;2;2;2", "rating_avg": [ 5.8, 0.9797958971132712 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 93.2, 43.655011167104284 ], "wc_strengths_avg": [ 52.0, 16.260381299342274 ], "wc_weaknesses_avg": [ 201.2, 142.24963971834867 ], "wc_questions_avg": [ 217.2, 353.2349926040737 ], "wc_limitations_avg": [ 25.8, 20.6920274502041 ], "wc_review_avg": [ 589.4, 346.1546475204399 ], "wc_reply_reviewers_avg": [ 159.8, 154.74029856504737 ], "wc_reply_authors_avg": [ 155.4, 220.3811244185854 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.8, 0.4000000000000001 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5611121213595065752&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "google.com;google.com;cs.washington.edu;;google.com;google.com;;google.com", "author_num": 8, "aff_unique_index": "0;0;1;0;0;0", "aff_unique_norm": "Google;University of Washington", "aff_unique_dep": "Google DeepMind;Department of Computer Science", "aff_unique_url": "https://deepmind.com;https://www.washington.edu", "aff_unique_abbr": "DeepMind;UW", "aff_campus_unique_index": "1;2;1;1", "aff_campus_unique": ";Mountain View;Seattle", "aff_country_unique_index": "0;1;1;0;1;1", "aff_country_unique": "United Kingdom;United States" }, { "id": "Hc2ZwCYgmB", "title": "AdaFace: A Versatile Face Encoder for Zero-Shot Diffusion Model Personalization", "track": "main", "status": "Reject", "tldr": "", "abstract": "Since the advent of diffusion models, personalizing these models -- conditioning them to render novel subjects -- has been widely studied. Recently, several methods propose training a dedicated image encoder on a large variety of subject images. This encoder maps the images to identity embeddings (ID embeddings). During inference, these ID embeddings, combined with conventional prompts, condition a diffusion model to generate new images of the subject. However, such methods often face challenges in achieving a good balance between authenticity and compositionality -- accurately capturing the subject's likeness while effectively integrating them into varied and complex scenes. A primary source for this issue is that the ID embeddings reside in the \\emph{image token space} (``image prompts\"), which is not fully composable with the text prompt encoded by the CLIP text encoder. In this work, we present AdaFace, an image encoder that maps human faces into the \\emph{text prompt space}. After being trained only on 400K face images with 2 GPUs, it achieves high authenticity of the generated subjects and high compositionality with various text prompts. In addition, as the ID embeddings are integrated in a normal text prompt, it is highly compatible with existing pipelines and can be used without modification to generate authentic videos. We showcase the generated images and videos of celebrities under various compositional prompts. The source code is released on an anonymous repository \\url{https://github.com/adaface-neurips/adaface}.", "keywords": "face encoder;diffusion model personalization;composability;zero-shot", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/76e26680fce6d239acbe561dbb4467af07434508.zip", "author": "Shaohua Li;Xiuchao Sui;HONG YANG;Pin Nean Lai;Weide Liu;Xinxing Xu;Yong Liu;Rick Siow Mong Goh", "authorids": "~Shaohua_Li2;~Xiuchao_Sui2;~HONG_YANG7;~Pin_Nean_Lai1;~Weide_Liu2;~Xinxing_Xu1;~Yong_Liu10;~Rick_Siow_Mong_Goh1", "gender": ";;F;M;;M;M;", "homepage": ";;;;;https://sites.google.com/site/xinxingxu666/;;https://sites.google.com/view/rickgoh/home", "dblp": ";;;;;15/10654;29/4867-26;https://dblp.uni-trier.de/pers/g/Goh:Rick_Siow_Mong", "google_scholar": ";;T6ZP9u4AAAAJ;;;https://scholar.google.com.sg/citations?user=neFbpuEAAAAJ;QujHYk0AAAAJ;https://scholar.google.com.sg/citations?user=fBsBJjoAAAAJ", "orcid": ";;0000-0002-8879-5025;;;0000-0003-1449-3072;;0000-0001-9116-1595", "linkedin": ";;;pin-nean-lai;;;liuyongsg;rickgoh/", "or_profile": "~Shaohua_Li2;~Xiuchao_Sui2;~HONG_YANG7;~Pin_Nean_Lai1;~Weide_Liu2;~Xinxing_Xu1;~Yong_Liu10;~Rick_Siow_Mong_Goh1", "aff": ";;Institute of High Performance Computing, Singapore, A*STAR;Singapore University of Technology and Design;;Institute of High Performance Computing;Institute of High Performance Computing, Singapore, A*STAR;Institute of High Performance Computing, Singapore, A*STAR", "aff_domain": ";;ihpc.a-star.edu.sg;sutd.edu.sg;;ihpc.a-star.edu.sg;ihpc.a-star.edu.sg;ihpc.a-star.edu.sg", "position": ";;Researcher;Undergrad student;;Scientist;Senior Scientist, Adjunct Assistant Professor;Director", "bibtex": "@misc{\nanonymous2024adaface,\ntitle={AdaFace: A Versatile Face Encoder for Zero-Shot Diffusion Model Personalization},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=Hc2ZwCYgmB}\n}", "github": "", "project": "", "reviewers": "jpRS;6DX2;Uvbt;jmG6", "site": "https://openreview.net/forum?id=Hc2ZwCYgmB", "pdf_size": 23467949, "rating": "3;4;5;5", "confidence": "5;3;3;4", "soundness": "1;2;2;2", "novelty": "2;2;2;2", "presentation": "1;3;2;2", "wc_summary": "149;99;61;88", "wc_strengths": "21;32;28;20", "wc_weaknesses": "144;106;128;338", "wc_questions": "168;1;87;6", "wc_limitations": "6;1;8;10", "wc_review": "488;239;312;462", "wc_reply_reviewers": "263;257;57;38", "wc_reply_authors": "575;211;0;0", "reply_reviewers": "2;1;1;1", "reply_authors": "3;2;1;1", "rating_avg": [ 4.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 1.75, 0.4330127018922193 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 99.25, 31.877695964420013 ], "wc_strengths_avg": [ 25.25, 4.968651728587948 ], "wc_weaknesses_avg": [ 179.0, 92.78469701410896 ], "wc_questions_avg": [ 65.5, 68.31727453580098 ], "wc_limitations_avg": [ 6.25, 3.344772040064913 ], "wc_review_avg": [ 375.25, 103.44412743118868 ], "wc_reply_reviewers_avg": [ 153.75, 106.4832733343599 ], "wc_reply_authors_avg": [ 196.5, 234.89199645794662 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.6363636363636364, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:58MrMb49P3wJ:scholar.google.com/&scioq=AdaFace:+A+Versatile+Face+Encoder+for+Zero-Shot+Diffusion+Model+Personalization&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "Institute of High Performance Computing;Singapore University of Technology and Design", "aff_unique_dep": ";", "aff_unique_url": "https://www.ihpc.a-star.edu.sg;https://www.sutd.edu.sg", "aff_unique_abbr": "IHPC;SUTD", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Singapore" }, { "title": "SciFIBench: Benchmarking Large Multimodal Models for Scientific Figure Interpretation", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97770", "id": "HcLFNuQwy5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=HcLFNuQwy5", "openreview": "https://openreview.net/forum?id=HcLFNuQwy5", "poster": "/media/PosterPDFs/NeurIPS%202024/97770.png?t=1733107910.814643", "project": "", "author_site": "Jonathan Roberts, Kai Han, Neil Houlsby, Samuel Albanie", "tldr": "", "abstract": "Large multimodal models (LMMs) have proven flexible and generalisable across many tasks and fields. Although they have strong potential to aid scientific research, their capabilities in this domain are not well characterised. A key aspect of scientific research is the ability to understand and interpret figures, which serve as a rich, compressed source of complex information. In this work, we present SciFIBench, a scientific figure interpretation benchmark consisting of 2000 questions split between two tasks across 8 categories. The questions are curated from arXiv paper figures and captions, using adversarial filtering to find hard negatives and human verification for\nquality control. We evaluate 28 LMMs on SciFIBench, finding it to be a challenging benchmark. Finally, we investigate the alignment and reasoning faithfulness of the LMMs on augmented question sets from our benchmark. We release SciFIBench to encourage progress in this domain.", "keywords": "AI4Science;Benchmark;LMMs;Scientific Figures", "primary_area": "", "supplementary_material": "", "author": "Jonathan Roberts;Kai Han;Neil Houlsby;Samuel Albanie", "authorids": "~Jonathan_Roberts1;~Kai_Han1;~Neil_Houlsby1;~Samuel_Albanie2", "gender": "M;M;M;Not Specified", "homepage": ";http://www.kaihan.org/;https://neilhoulsby.github.io/;https://samuelalbanie.com/", "dblp": "278/9312-4;51/4757-1.html;91/10669;188/5765", "google_scholar": ";tG8S_vMAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.co.uk/citations?user=QjufASUAAAAJ", "orcid": ";0000-0002-7995-9999;;", "linkedin": "jonathan-roberts1/;kaihancs/;;", "or_profile": "~Jonathan_Roberts1;~Kai_Han1;~Neil_Houlsby1;~samuel_Albanie1", "aff": "University of Cambridge;The University of Hong Kong;Google;University of Cambridge", "aff_domain": "cam.ac.uk;hku.hk;google.com;cam.ac.uk", "position": "PhD student;Assistant Professor;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nroberts2024scifibench,\ntitle={Sci{FIB}ench: Benchmarking Large Multimodal Models for Scientific Figure Interpretation},\nauthor={Jonathan Roberts and Kai Han and Neil Houlsby and Samuel Albanie},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=HcLFNuQwy5}\n}", "github": "", "reviewers": "X2MV;9Ksv;8auP", "pdf_size": 2269575, "rating": "5;6;7", "confidence": "5;4;3", "wc_summary_and_contributions": "66;54;146", "wc_strengths": "3;39;66", "wc_improvement": "138;30;56", "wc_limitations": "73;38;42", "wc_correctness": "1;19;3", "wc_clarity": "1;8;4", "wc_relation_to_prior_work": "1;8;8", "wc_documentation": "1;53;18", "wc_additional_feedback": "1;1;1", "wc_review": "285;250;344", "wc_reply_reviewers": "0;150;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;1;0", "reply_authors": "3;4;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "wc_summary_and_contributions_avg": [ 88.66666666666667, 40.83571421630281 ], "wc_strengths_avg": [ 36.0, 25.80697580112788 ], "wc_improvement_avg": [ 74.66666666666667, 46.02414825091522 ], "wc_limitations_avg": [ 51.0, 15.641824275533422 ], "wc_correctness_avg": [ 7.666666666666667, 8.055363982396381 ], "wc_clarity_avg": [ 4.333333333333333, 2.8674417556808756 ], "wc_relation_to_prior_work_avg": [ 5.666666666666667, 3.299831645537222 ], "wc_documentation_avg": [ 24.0, 21.64871050817269 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 293.0, 38.79003308411411 ], "wc_reply_reviewers_avg": [ 50.0, 70.71067811865476 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 3.0, 0.816496580927726 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2396391111796264269&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "cam.ac.uk;hku.hk;google.com;cam.ac.uk", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of Cambridge;University of Hong Kong;Google", "aff_unique_dep": ";;Google", "aff_unique_url": "https://www.cam.ac.uk;https://www.hku.hk;https://www.google.com", "aff_unique_abbr": "Cambridge;HKU;Google", "aff_campus_unique_index": "0;1;2;0", "aff_campus_unique": "Cambridge;Hong Kong SAR;Mountain View", "aff_country_unique_index": "0;1;2;0", "aff_country_unique": "United Kingdom;China;United States" }, { "title": "Safe LoRA: The Silver Lining of Reducing Safety Risks when Finetuning Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95821", "id": "HcifdQZFZV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=HcifdQZFZV", "openreview": "https://openreview.net/forum?id=HcifdQZFZV", "poster": "/media/PosterPDFs/NeurIPS%202024/95821.png?t=1733292390.6715004", "project": "", "author_site": "Chia-Yi Hsu, Yu-Lin Tsai, Chih-Hsun Lin, Pin-Yu Chen, Chia-Mu Yu, Chun-Ying Huang", "tldr": "", "abstract": "While large language models (LLMs) such as Llama-2 or GPT-4 have shown impressive zero-shot performance, fine-tuning is still necessary to enhance their performance for customized datasets, domain-specific tasks, or other private needs. However, fine-tuning all parameters of LLMs requires significant hardware resources, which can be impractical for typical users. Therefore, parameter-efficient fine-tuning such as LoRA have emerged, allowing users to fine-tune LLMs without the need for considerable computing resources, with little performance degradation compared to fine-tuning all parameters. Unfortunately, recent studies indicate that fine-tuning can increase the risk to the safety of LLMs, even when data does not contain malicious content. To address this challenge, we propose $\\textsf{Safe LoRA}$, a simple one-liner patch to the original LoRA implementation by introducing the projection of LoRA weights from selected layers to the safety-aligned subspace, effectively reducing the safety risks in LLM fine-tuning while maintaining utility. It is worth noting that $\\textsf{Safe LoRA}$ is a training-free and data-free approach, as it only requires the knowledge of the weights from the base and aligned LLMs. Our extensive experiments demonstrate that when fine-tuning on purely malicious data, $\\textsf{Safe LoRA}$ retains similar safety performance as the original aligned model. Moreover, when the fine-tuning dataset contains a mixture of both benign and malicious data, $\\textsf{Safe LoRA}$ mitigates the negative effect made by malicious data while preserving performance on downstream tasks. Our codes are available at https://github.com/IBM/SafeLoRA.", "keywords": "Larage Language Models;safety", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/0fb5001b6811d7da293ef50c6892c2b8946ee02c.zip", "author": "Chia-Yi Hsu;Yu-Lin Tsai;Chih-Hsun Lin;Pin-Yu Chen;Chia-Mu Yu;Chun-Ying Huang", "authorids": "~Chia-Yi_Hsu1;~Yu-Lin_Tsai1;~Chih-Hsun_Lin2;~Pin-Yu_Chen1;~Chia-Mu_Yu1;~Chun-Ying_Huang1", "gender": "F;M;M;M;Not Specified;M", "homepage": ";https://www.linkedin.com/in/uriah-tsai-bbb36516b/;http://www.pinyuchen.com;https://chiamuyu.weebly.com/;https://people.cs.nycu.edu.tw/~chuang/;", "dblp": "227/2154;;39/8969;91/1919.html;08/3422;21/8858", "google_scholar": ";https://scholar.google.com.tw/citations?hl=zh-TW;jxwlCUUAAAAJ;https://scholar.google.com.tw/citations?user=dW4W4isAAAAJ;ixq3XDUAAAAJ;https://scholar.google.com.tw/citations?user=T48_vqUAAAAJ", "orcid": ";;0000-0003-1039-8369;0000-0002-1677-2131;0000-0001-5503-9541;0000-0002-2668-0556", "linkedin": "chia-yi-hsu-136a86155;;pin-yu-chen-940062a2;chia-mu-yu-0b130988?originalSubdomain=tw;;", "or_profile": "~Chia-Yi_Hsu1;~Yu-Lin_Tsai1;~Pin-Yu_Chen1;~Chia-Mu_Yu1;~Chun-Ying_Huang1;~CHIH-HSUN_LIN1", "aff": "National Yang Ming Chiao Tung University;National Yang Ming Chiao Tung University;International Business Machines;National Yang Ming Chiao Tung University;National Yang Ming Chiao Tung University;National Yang Ming Chiao Tung University", "aff_domain": "nycu.edu.tw;nycu.edu.tw;ibm.com;nycu.edu.tw;nycu.edu.tw;nycu.edu.tw", "position": "PhD student;Undergrad student;Principal Researcher;Associate Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\nhsu2024safe,\ntitle={Safe Lo{RA}: The Silver Lining of Reducing Safety Risks when Finetuning Large Language Models},\nauthor={Chia-Yi Hsu and Yu-Lin Tsai and Chih-Hsun Lin and Pin-Yu Chen and Chia-Mu Yu and Chun-Ying Huang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=HcifdQZFZV}\n}", "github": "", "reviewers": "93t8;2xQA;X4Co;d4Du", "pdf_size": 577343, "rating": "5;6;7;7", "confidence": "4;3;4;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "40;39;59;60", "wc_strengths": "25;55;76;140", "wc_weaknesses": "131;111;297;73", "wc_questions": "116;37;153;29", "wc_limitations": "23;1;40;5", "wc_review": "335;243;625;307", "wc_reply_reviewers": "74;10;324;14", "wc_reply_authors": "715;27;315;44", "reply_reviewers": "1;1;4;1", "reply_authors": "2;2;5;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 49.5, 10.012492197250394 ], "wc_strengths_avg": [ 74.0, 42.19597137168429 ], "wc_weaknesses_avg": [ 153.0, 85.70880934886448 ], "wc_questions_avg": [ 83.75, 52.48511693804254 ], "wc_limitations_avg": [ 17.25, 15.530212490497354 ], "wc_review_avg": [ 377.5, 146.7336021502914 ], "wc_reply_reviewers_avg": [ 105.5, 128.67303524826016 ], "wc_reply_authors_avg": [ 275.25, 278.417290231767 ], "reply_reviewers_avg": [ 1.75, 1.299038105676658 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5701955460309258466&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "nycu.edu.tw;nycu.edu.tw;ibm.com;nycu.edu.tw;nycu.edu.tw;nycu.edu.tw", "author_num": 6, "aff_unique_index": "0;0;1;0;0;0", "aff_unique_norm": "National Yang Ming Chiao Tung University;International Business Machines Corporation", "aff_unique_dep": ";", "aff_unique_url": "https://www.nycu.edu.tw;https://www.ibm.com", "aff_unique_abbr": "NYCU;IBM", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Taiwan;", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "China;United States" }, { "title": "Hierarchical Object-Aware Dual-Level Contrastive Learning for Domain Generalized Stereo Matching", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95820", "id": "HcqV2bPFKz", "proceeding": "", "pdf": "https://openreview.net/pdf?id=HcqV2bPFKz", "openreview": "https://openreview.net/forum?id=HcqV2bPFKz", "poster": "/media/PosterPDFs/NeurIPS%202024/95820.png?t=1730741680.3788605", "project": "", "author_site": "Yikun Miao, Meiqing Wu, Siew Kei Lam, Changsheng Li, Thambipillai Srikanthan", "tldr": "", "abstract": "Stereo matching algorithms that leverage end-to-end convolutional neural networks have recently demonstrated notable advancements in performance. However, a common issue is their susceptibility to domain shifts, hindering their ability in generalizing to diverse, unseen realistic domains. We argue that existing stereo matching networks overlook the importance of extracting semantically and structurally meaningful features. To address this gap, we propose an effective hierarchical object-aware dual-level contrastive learning (HODC) framework for domain generalized stereo matching. Our framework guides the model in extracting features that support semantically and structurally driven matching by segmenting objects at different scales and enhances correspondence between intra- and inter-scale regions from the left feature map to the right using dual-level contrastive loss. HODC can be integrated with existing stereo matching models in the training stage, requiring no modifications to the architecture. Remarkably, using only synthetic datasets for training, HODC achieves state-of-the-art generalization performance with various existing stereo matching network architectures, across multiple realistic datasets.", "keywords": "Stereo Matching;Domain Generalization;Object-Aware;Contrastive Learning", "primary_area": "machine_vision", "supplementary_material": "", "author": "Yikun Miao;Meiqing Wu;Siew Kei Lam;Changsheng Li;Thambipillai Srikanthan", "authorids": "~Yikun_Miao1;~Meiqing_Wu1;~Siew_Kei_Lam1;~Changsheng_Li4;~Thambipillai_Srikanthan1", "gender": ";;;M;M", "homepage": ";;;;https://www.ntu.edu.sg/scse/about-us/past-chairs/prof-thambipillai-srikanthan", "dblp": ";;;;23/1694", "google_scholar": ";;;FfJnUioAAAAJ;", "orcid": ";;;0000-0001-9789-7632;", "linkedin": ";;;;", "or_profile": "~Yikun_Miao1;~Meiqing_Wu1;~Siew_Kei_Lam1;~Changsheng_Li4;~Thambipillai_Srikanthan1", "aff": ";;;Beijing Institute of Technology;Nanyang Technological University", "aff_domain": ";;;bit.edu.cn;ntu.edu.sg", "position": ";;;Full Professor;Full Professor", "bibtex": "@inproceedings{\nmiao2024hierarchical,\ntitle={Hierarchical Object-Aware Dual-Level Contrastive Learning for Domain Generalized Stereo Matching},\nauthor={Yikun Miao and Meiqing Wu and Siew Kei Lam and Changsheng Li and Thambipillai Srikanthan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=HcqV2bPFKz}\n}", "github": "", "reviewers": "NWJx;ApGF;jWZr;uUe2", "pdf_size": 25461424, "rating": "5;6;6;7", "confidence": "5;4;4;3", "soundness": "3;3;3;4", "novelty": "3;3;3;3", "presentation": "3;3;3;4", "wc_summary": "51;109;50;101", "wc_strengths": "33;276;52;27", "wc_weaknesses": "69;75;49;38", "wc_questions": "32;243;25;96", "wc_limitations": "6;19;6;1", "wc_review": "191;722;182;263", "wc_reply_reviewers": "0;199;0;30", "wc_reply_authors": "0;45;0;36", "reply_reviewers": "0;1;0;1", "reply_authors": "1;2;1;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 77.75, 27.39867697535777 ], "wc_strengths_avg": [ 97.0, 103.7569274795664 ], "wc_weaknesses_avg": [ 57.75, 14.922717580923388 ], "wc_questions_avg": [ 99.0, 87.62134443159384 ], "wc_limitations_avg": [ 8.0, 6.670832032063167 ], "wc_review_avg": [ 339.5, 223.05660716508714 ], "wc_reply_reviewers_avg": [ 57.25, 82.75075528356221 ], "wc_reply_authors_avg": [ 20.25, 20.498475553074673 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:9sHB3P1pz48J:scholar.google.com/&scioq=Hierarchical+Object-Aware+Dual-Level+Contrastive+Learning+for+Domain+Generalized+Stereo+Matching&hl=en&as_sdt=0,11", "gs_version_total": 0, "email": ";;;bit.edu.cn;ntu.edu.sg", "author_num": 5, "aff_unique_index": "0;1", "aff_unique_norm": "Beijing Institute of Technology;Nanyang Technological University", "aff_unique_dep": ";", "aff_unique_url": "http://www.bit.edu.cn/;https://www.ntu.edu.sg", "aff_unique_abbr": "BIT;NTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "China;Singapore" }, { "title": "Decomposed Prompt Decision Transformer for Efficient Unseen Task Generalization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95819", "id": "HcqnhqoXS3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=HcqnhqoXS3", "openreview": "https://openreview.net/forum?id=HcqnhqoXS3", "poster": "/media/PosterPDFs/NeurIPS%202024/95819.png?t=1731465867.3835514", "project": "", "author_site": "Hongling Zheng, Li Shen, Yong Luo, Tongliang Liu, Jialie Shen, Dacheng Tao", "tldr": "", "abstract": "Multi-task offline reinforcement learning aims to develop a unified policy for diverse tasks without requiring real-time interaction with the environment. Recent work explores sequence modeling, leveraging the scalability of the transformer architecture as a foundation for multi-task learning. Given the variations in task content and complexity, formulating policies becomes a challenging endeavor, requiring careful parameter sharing and adept management of conflicting gradients to extract rich cross-task knowledge from multiple tasks and transfer it to unseen tasks. In this paper, we propose the Decomposed Prompt Decision Transformer (DPDT) that adopts a two-stage paradigm to efficiently learn prompts for unseen tasks in a parameter-efficient manner. We incorporate parameters from pre-trained language models (PLMs) to initialize DPDT, thereby providing rich prior knowledge encoded in language models. During the decomposed prompt tuning phase, we learn both cross-task and task-specific prompts on training tasks to achieve prompt decomposition. In the test time adaptation phase, the cross-task prompt, serving as a good initialization, were further optimized on unseen tasks through test time adaptation, enhancing the model's performance on these tasks. Empirical evaluation on a series of Meta-RL benchmarks demonstrates the superiority of our approach. The project is available at https://github.com/ruthless-man/DPDT.", "keywords": "Offline Reinforcement Learning;Prompt Tuning", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Hongling Zheng;Li Shen;Yong Luo;Tongliang Liu;Jialie Shen;Dacheng Tao", "authorids": "~Hongling_Zheng2;~Li_Shen1;~Yong_Luo2;~Tongliang_Liu1;~Jialie_Shen2;~Dacheng_Tao1", "gender": "M;M;M;M;M;", "homepage": ";https://sites.google.com/site/mathshenli/home;;https://tongliang-liu.github.io/;https://www.city.ac.uk/about/find-contact/staff-directory?query=Jialie+shen;", "dblp": "328/9513;91/3680-8;57/5272-2.html;150/6667;33/7046;", "google_scholar": ";yVhgENIAAAAJ;zb1oVGIAAAAJ;https://scholar.google.com.au/citations?user=EiLdZ_YAAAAJ;d3h-zScAAAAJ;", "orcid": "0000-0001-9077-1594;;;;0000-0002-4560-8509;", "linkedin": ";;;;;", "or_profile": "~Hongling_Zheng2;~Li_Shen1;~Yong_Luo2;~Tongliang_Liu1;~Jialie_Shen2;~Dacheng_Tao1", "aff": "Wuhan University;JD Explore Academy;Wuhan University;Mohamed bin Zayed University of Artificial Intelligence;City, University of London;", "aff_domain": "whu.edu.cn;jd.com;whu.edu.cn;mbzuai.ac.ae;city.ac.uk;", "position": "MS student;Researcher;Professor;Affiliated Associate Professor;Full Professor;", "bibtex": "@inproceedings{\nzheng2024decomposed,\ntitle={Decomposed Prompt Decision Transformer for Efficient Unseen Task Generalization},\nauthor={Hongling Zheng and Li Shen and Yong Luo and Tongliang Liu and Jialie Shen and Dacheng Tao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=HcqnhqoXS3}\n}", "github": "", "reviewers": "zhrS;urHM;d5sD;6jy3", "pdf_size": 562583, "rating": "4;4;6;7", "confidence": "4;4;4;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "2;2;3;3", "wc_summary": "113;126;61;107", "wc_strengths": "95;93;28;85", "wc_weaknesses": "229;60;191;286", "wc_questions": "204;49;6;167", "wc_limitations": "8;4;10;7", "wc_review": "649;332;296;652", "wc_reply_reviewers": "0;34;10;203", "wc_reply_authors": "85;748;10;974", "reply_reviewers": "0;1;1;2", "reply_authors": "2;2;2;4", "rating_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 101.75, 24.508926945094924 ], "wc_strengths_avg": [ 75.25, 27.535204738661378 ], "wc_weaknesses_avg": [ 191.5, 83.1098670435707 ], "wc_questions_avg": [ 106.5, 81.50613473843549 ], "wc_limitations_avg": [ 7.25, 2.165063509461097 ], "wc_review_avg": [ 482.25, 168.7340733224917 ], "wc_reply_reviewers_avg": [ 61.75, 82.4814373056144 ], "wc_reply_authors_avg": [ 454.25, 415.37114427942635 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17489211325513233868&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "whu.edu.cn;jd.com;whu.edu.cn;mbzuai.ac.ae;city.ac.uk;", "author_num": 6, "aff_unique_index": "0;1;0;2;3", "aff_unique_norm": "Wuhan University;JD;Mohamed bin Zayed University of Artificial Intelligence;City, University of London", "aff_unique_dep": ";JD Explore Academy;;", "aff_unique_url": "http://www.whu.edu.cn/;;https://mbzuai.ac.ae;https://www.city.ac.uk", "aff_unique_abbr": "WHU;;MBZUAI;City, University of London", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;2;3", "aff_country_unique": "China;;United Arab Emirates;United Kingdom" }, { "title": "Classification Done Right for Vision-Language Pre-Training", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95818", "id": "Hd2EOwKItm", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Hd2EOwKItm", "openreview": "https://openreview.net/forum?id=Hd2EOwKItm", "poster": "", "project": "", "author_site": "Zilong Huang, Qinghao Ye, Bingyi Kang, Jiashi Feng, Haoqi Fan", "tldr": "", "abstract": "We introduce SuperClass, a super simple classification method for vision-language pre-training on image-text data. Unlike its contrastive counterpart CLIP who contrast with a text encoder, SuperClass directly utilizes tokenized raw text as supervised classification labels, without the need for additional text filtering or selection. Due to the absence of the text encoding as contrastive target, SuperClass does not require a text encoder and does not need to maintain a large batch size as CLIP does. SuperClass demonstrated superior performance on various downstream tasks, including classic computer vision benchmarks and vision language downstream tasks. We further explored the scaling behavior of SuperClass on model size, training length, or data size, and reported encouraging results and comparisons to CLIP. https://github.com/x-cls/superclass", "keywords": "Text Supervision;Visual Models;Pretraining;classification", "primary_area": "other", "supplementary_material": "", "author": "Zilong Huang;Qinghao Ye;Bingyi Kang;Jiashi Feng;Haoqi Fan", "authorids": "~Zilong_Huang1;~Qinghao_Ye1;~Bingyi_Kang1;~Jiashi_Feng1;~Haoqi_Fan2", "gender": "M;;;M;M", "homepage": "http://speedinghzl.github.io/;;https://bingykang.github.io/;https://haoqifan.github.io/;https://sites.google.com/site/jshfeng/", "dblp": "185/9199;254/3247;;137/5747;56/8278", "google_scholar": "GW9vw8UAAAAJ;ZYOhaGwAAAAJ;https://scholar.google.com.sg/citations?user=NmHgX-wAAAAJ;76B8lrgAAAAJ;https://scholar.google.com.sg/citations?user=Q8iay0gAAAAJ", "orcid": ";;;;0000-0001-6843-0064", "linkedin": ";;;;", "or_profile": "~Zilong_Huang1;~Qinghao_Ye1;~Bingyi_Kang1;~Haoqi_Fan2;~Jiashi_Feng2", "aff": "Bytedance;ByteDance Inc.;Bytedance;Facebook AI Research;ByteDance", "aff_domain": "bytedance.com;bytedance.com;bytedance.com;fb.com;bytedance.com", "position": "Researcher;Researcher;Researcher;Researcher;Research Lead", "bibtex": "@inproceedings{\nhuang2024classification,\ntitle={Classification Done Right for Vision-Language Pre-Training},\nauthor={Zilong Huang and Qinghao Ye and Bingyi Kang and Jiashi Feng and Haoqi Fan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Hd2EOwKItm}\n}", "github": "", "reviewers": "aefc;3a7k;W9bX;bBvT", "pdf_size": 502509, "rating": "3;6;6;7", "confidence": "4;3;5;5", "soundness": "2;2;2;4", "novelty": "2;3;3;3", "presentation": "3;3;3;4", "wc_summary": "13;76;136;120", "wc_strengths": "5;79;92;139", "wc_weaknesses": "49;107;195;136", "wc_questions": "268;5;35;126", "wc_limitations": "1;25;11;52", "wc_review": "336;292;469;573", "wc_reply_reviewers": "531;13;153;117", "wc_reply_authors": "1369;28;83;718", "reply_reviewers": "3;1;1;1", "reply_authors": "5;2;2;3", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 86.25, 47.65697745346425 ], "wc_strengths_avg": [ 78.75, 48.07481149209012 ], "wc_weaknesses_avg": [ 121.75, 52.6278205894943 ], "wc_questions_avg": [ 108.5, 102.29980449639187 ], "wc_limitations_avg": [ 22.25, 19.17517926904466 ], "wc_review_avg": [ 417.5, 110.93353866166895 ], "wc_reply_reviewers_avg": [ 203.5, 195.94578331773306 ], "wc_reply_authors_avg": [ 549.5, 545.3340719228902 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 3.0, 1.224744871391589 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.3015113445777637, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17001137822117783737&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "bytedance.com;bytedance.com;bytedance.com;fb.com;bytedance.com", "author_num": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "ByteDance;Meta", "aff_unique_dep": ";Facebook AI Research", "aff_unique_url": "https://www.bytedance.com;https://research.facebook.com", "aff_unique_abbr": "Bytedance;FAIR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "China;United States" }, { "title": "MINT-1T: Scaling Open-Source Multimodal Data by 10x: A Multimodal Dataset with One Trillion Tokens", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97769", "id": "HdIiSPLgzC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=HdIiSPLgzC", "openreview": "https://openreview.net/forum?id=HdIiSPLgzC", "poster": "", "project": "", "author_site": "Anas Awadalla, Le Xue, Oscar Lo, Manli Shu, Hannah Lee, Etash Guha, Sheng Shen, Mohamed Awadalla, Silvio Savarese, Caiming Xiong, Ran Xu, Yejin Choi, Ludwig Schmidt", "tldr": "", "abstract": "Multimodal interleaved datasets featuring free-form interleaved sequences of images and text are crucial for training frontier large multimodal models (LMMs). Despite the rapid progression of open-source LMMs, there remains a pronounced scarcity of large-scale, open-source multimodal interleaved datasets.\nIn response, we introduce MINT-1T, the most extensive and diverse open-source Multimodal INTerleaved dataset to date. MINT-1T comprises of one trillion text tokens and 3.4 billion images, a 10x scale-up from existing open-source datasets. Additionally, we include previously untapped sources such as PDFs and ArXiv papers. As scaling multimodal interleaved datasets requires substantial engineering effort, sharing the data curation process and releasing the dataset greatly benefits the community. Our experiments show that LMMs trained on MINT-1T rival the performance of models trained on the previous leading dataset, OBELICS. We release our data at https://github.com/mlfoundations/MINT-1T.", "keywords": "multimodal data;open-source;vision-language models", "primary_area": "", "supplementary_material": "", "author": "Anas Awadalla;Le Xue;Oscar Lo;Manli Shu;Hannah Lee;Etash Kumar Guha;Sheng Shen;Mohamed Awadalla;Silvio Savarese;Caiming Xiong;Ran Xu;Yejin Choi;Ludwig Schmidt", "authorids": "~Anas_Awadalla1;~Le_Xue1;~Oscar_Lo1;~Manli_Shu1;~Hannah_Lee2;~Etash_Kumar_Guha1;~Sheng_Shen2;~Mohamed_Awadalla1;~Silvio_Savarese1;~Caiming_Xiong1;~Ran_Xu1;~Yejin_Choi1;~Ludwig_Schmidt1", "gender": "M;M;M;F;F;M;M;M;M;M;M;F;M", "homepage": "https://github.com/anas-awadalla;;;https://azshue.github.io/;https://hannahyklee.github.io;https://etash.me/;https://sincerass.github.io;;;http://cmxiong.com/;;https://yejinc.github.io/;http://people.csail.mit.edu/ludwigs/", "dblp": ";304/2195;;263/3503;19/6146;331/5590;138/5764-1.html;;50/3578;80/7282;;89/579-1;141/2720", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?view_op=list_works;;https://scholar.google.com/citations?hl=en;lSqYhxYAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;;ImpbxLsAAAAJ;vaSdahkAAAAJ;sgBB2sUAAAAJ;vhP-tlcAAAAJ;SWMKy70AAAAJ", "orcid": ";0000-0003-2810-770X;;;0009-0004-0713-7787;;;;;;;;", "linkedin": ";le-tycho-xue-5abbb9157/;oscar-lo-33b7a21b6;manli-shu-a804a8164/;hannahyklee/;etash-guha-00097116a/;sheng-s-ab198a174/;mohamed-a;;caiming-xiong-150a1417;;;ludwig-schmidt-87ba3612/", "or_profile": "~Anas_Awadalla1;~Le_Xue1;~Oscar_Lo1;~Manli_Shu1;~Hannah_Lee2;~Etash_Kumar_Guha1;~Sheng_Shen2;~Mohamed_Awadalla1;~Silvio_Savarese1;~Caiming_Xiong1;~Ran_Xu1;~Yejin_Choi1;~Ludwig_Schmidt1", "aff": "Department of Computer Science, University of Washington;Salesforce;Department of Computer Science, University of Washington;Department of Computer Science, University of Maryland, College Park;Department of Computer Science, University of Washington;;University of California, Berkeley;Department of Computer Science, University of Washington;Stanford University;Salesforce Research;SalesForce.com;Department of Computer Science, University of Washington;University of Washington", "aff_domain": "cs.washington.edu;salesforce.com;cs.washington.edu;cs.umd.edu;cs.washington.edu;;berkeley.edu;cs.washington.edu;stanford.edu;salesforce.com;salesforce.com;cs.washington.edu;washington.edu", "position": "PhD student;Researcher;Undergrad student;PhD student;MS student;;PhD student;Undergrad student;Adjunct Professor;Research Scientist;senior manager;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nawadalla2024mintt,\ntitle={{MINT}-1T: Scaling Open-Source Multimodal Data by 10x: A Multimodal Dataset with One Trillion Tokens},\nauthor={Anas Awadalla and Le Xue and Oscar Lo and Manli Shu and Hannah Lee and Etash Kumar Guha and Sheng Shen and Mohamed Awadalla and Silvio Savarese and Caiming Xiong and Ran Xu and Yejin Choi and Ludwig Schmidt},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=HdIiSPLgzC}\n}", "github": "", "reviewers": "GFBY;f6h4;m5U3;YhHn", "pdf_size": 1986434, "rating": "6;7;8;8", "confidence": "4;3;4;3", "wc_summary_and_contributions": "78;88;170;108", "wc_strengths": "2;5;29;108", "wc_improvement": "2;26;32;91", "wc_limitations": "1;11;79;6", "wc_correctness": "1;9;9;3", "wc_clarity": "1;6;8;6", "wc_relation_to_prior_work": "1;11;5;8", "wc_documentation": "1;17;46;14", "wc_additional_feedback": "1;1;1;1", "wc_review": "88;174;379;345", "wc_reply_reviewers": "0;9;0;9", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "wc_summary_and_contributions_avg": [ 111.0, 35.73513677041127 ], "wc_strengths_avg": [ 36.0, 42.86607049870562 ], "wc_improvement_avg": [ 37.75, 32.72900090134131 ], "wc_limitations_avg": [ 24.25, 31.80703538527286 ], "wc_correctness_avg": [ 5.5, 3.570714214271425 ], "wc_clarity_avg": [ 5.25, 2.5860201081971503 ], "wc_relation_to_prior_work_avg": [ 6.25, 3.6996621467371855 ], "wc_documentation_avg": [ 19.5, 16.439282222773596 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 246.5, 120.03853547923683 ], "wc_reply_reviewers_avg": [ 4.5, 4.5 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 13, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14083241773088061733&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "cs.washington.edu;salesforce.com;cs.washington.edu;cs.umd.edu;cs.washington.edu;;berkeley.edu;cs.washington.edu;stanford.edu;salesforce.com;salesforce.com;cs.washington.edu;washington.edu", "author_num": 13, "aff_unique_index": "0;1;0;2;0;3;0;4;1;1;0;0", "aff_unique_norm": "University of Washington;Salesforce;University of Maryland, College Park;University of California, Berkeley;Stanford University", "aff_unique_dep": "Department of Computer Science;;Department of Computer Science;;", "aff_unique_url": "https://www.washington.edu;https://www.salesforce.com;https://www/umd.edu;https://www.berkeley.edu;https://www.stanford.edu", "aff_unique_abbr": "UW;Salesforce;UMD;UC Berkeley;Stanford", "aff_campus_unique_index": "0;0;2;0;3;0;4;0", "aff_campus_unique": "Seattle;;College Park;Berkeley;Stanford", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Empowering Active Learning for 3D Molecular Graphs with Geometric Graph Isomorphism", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95817", "id": "He2GCHeRML", "proceeding": "", "pdf": "https://openreview.net/pdf?id=He2GCHeRML", "openreview": "https://openreview.net/forum?id=He2GCHeRML", "poster": "/media/PosterPDFs/NeurIPS%202024/95817.png?t=1731434106.8167539", "project": "", "author_site": "Ronast Subedi, Lu Wei, Wenhan Gao, Shayok Chakraborty, Yi Liu", "tldr": "", "abstract": "Molecular learning is pivotal in many real-world applications, such as drug discovery. Supervised learning requires heavy human annotation, which is particularly challenging for molecular data, e.g., the commonly used density functional theory (DFT) is highly computationally expensive. Active learning (AL) automatically queries labels for most informative samples, thereby remarkably alleviating the annotation hurdle. In this paper, we present a principled AL paradigm for molecular learning, where we treat molecules as 3D molecular graphs. Specifically, we propose a new diversity sampling method to eliminate mutual redundancy built on distributions of 3D geometries. We first propose a set of new 3D graph isometries for 3D graph isomorphism analysis. Our method is provably at least as expressive as the Geometric Weisfeiler-Lehman (GWL) test. The moments of the distributions of the associated geometries are then extracted for efficient diversity computing. To ensure our AL paradigm selects samples with maximal uncertainties, we carefully design a Bayesian geometric graph neural network to compute uncertainties specifically for 3D molecular graphs. We pose active sampling as a quadratic programming (QP) problem using the proposed components. Experimental results demonstrate the effectiveness of our AL paradigm, as well as the proposed diversity and uncertainty methods.", "keywords": "Active learning;3D molecular graphs;graph neural networks;molecular diversity", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Ronast Subedi;Lu Wei;Wenhan Gao;Shayok Chakraborty;Yi Liu", "authorids": "~Ronast_Subedi1;~Lu_Wei2;~Wenhan_Gao1;~Shayok_Chakraborty1;~Yi_Liu12", "gender": "M;F;M;M;", "homepage": "https://sronast.github.io/;https://rombergliwei.github.io/;https://wenhangao21.github.io/;http://shayokch.com/;", "dblp": "317/7000;;223/3510-2.html;70/908;", "google_scholar": "ky1Tq6QAAAAJ;QuVuEikAAAAJ;https://scholar.google.com/citations?hl=en;5f3w1p0AAAAJ;", "orcid": "0000-0002-7569-724X;;0009-0007-9947-4298;;", "linkedin": "sronast/;;;;", "or_profile": "~Ronast_Subedi1;~Lu_Wei2;~Wenhan_Gao1;~Shayok_Chakraborty1;~Yi_Liu12", "aff": "Florida State University;State University of New York at Stony Brook;State University of New York at Stony Brook;Florida State University;", "aff_domain": "fsu.edu;stonybrook.edu;stonybrook.edu;fsu.edu;", "position": "PhD student;PhD student;PhD student;Associate Professor;", "bibtex": "@inproceedings{\nsubedi2024empowering,\ntitle={Empowering Active Learning for 3D Molecular Graphs with Geometric Graph Isomorphism},\nauthor={Ronast Subedi and Lu Wei and Wenhan Gao and Shayok Chakraborty and Yi Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=He2GCHeRML}\n}", "github": "", "reviewers": "yNCZ;Abqb;UZqH;JSdU", "pdf_size": 640850, "rating": "4;5;6;7", "confidence": "4;3;4;2", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "3;2;3;3", "wc_summary": "16;57;82;126", "wc_strengths": "37;55;78;83", "wc_weaknesses": "165;40;289;94", "wc_questions": "76;19;23;114", "wc_limitations": "77;5;86;2", "wc_review": "371;176;558;419", "wc_reply_reviewers": "90;20;320;46", "wc_reply_authors": "901;122;1589;64", "reply_reviewers": "1;1;2;1", "reply_authors": "4;3;6;2", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 70.25, 39.889691650851354 ], "wc_strengths_avg": [ 63.25, 18.471261462065875 ], "wc_weaknesses_avg": [ 147.0, 93.20139483934777 ], "wc_questions_avg": [ 58.0, 39.38908478246226 ], "wc_limitations_avg": [ 42.5, 39.14396505209967 ], "wc_review_avg": [ 381.0, 136.83749486160582 ], "wc_reply_reviewers_avg": [ 119.0, 118.71394189394942 ], "wc_reply_authors_avg": [ 669.0, 625.5913202722685 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.75, 1.479019945774904 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.674199862463242, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13810471184642762099&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 4, "email": "fsu.edu;stonybrook.edu;stonybrook.edu;fsu.edu;", "author_num": 5, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Florida State University;State University of New York at Stony Brook", "aff_unique_dep": ";", "aff_unique_url": "https://www.fsu.edu;https://www.stonybrook.edu", "aff_unique_abbr": "FSU;SUNY Stony Brook", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Stony Brook", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "SCAFFLSA: Taming Heterogeneity in Federated Linear Stochastic Approximation and TD Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95816", "id": "HeJ1cBAgiV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=HeJ1cBAgiV", "openreview": "https://openreview.net/forum?id=HeJ1cBAgiV", "poster": "/media/PosterPDFs/NeurIPS%202024/95816.png?t=1731486560.1590438", "project": "", "author_site": "Paul Mangold, Sergey Samsonov, Safwan Labbi, Ilya Levin, REDA ALAMI, Alexey Naumov, Eric Moulines", "tldr": "", "abstract": "In this paper, we analyze the sample and communication complexity of the federated linear stochastic approximation (FedLSA) algorithm. We explicitly quantify the effects of local training with agent heterogeneity. We show that the communication complexity of FedLSA scales polynomially with the inverse of the desired accuracy \u03f5. To overcome this, we propose SCAFFLSA a new variant of FedLSA that uses control variates to correct for client drift, and establish its sample and communication complexities. We show that for statistically heterogeneous agents, its communication complexity scales logarithmically with the desired accuracy, similar to Scaffnew. An important finding is that, compared to the existing results for Scaffnew, the sample complexity scales with the inverse of the number of agents, a property referred to as linear speed-up. Achieving this linear speed-up requires completely new theoretical arguments. We apply the proposed method to federated temporal difference learning with linear function approximation and analyze the corresponding complexity improvements.", "keywords": "Stochastic Approximation;Reinforcement learning;Federated Learning;Machine Learning", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/78b8b88fa57bc26e32b38c1d2782698db2f2aeaf.zip", "author": "Paul Mangold;Sergey Samsonov;Safwan Labbi;Ilya Levin;Reda ALAMI;Alexey Naumov;Eric Moulines", "authorids": "~Paul_Mangold1;~Sergey_Samsonov1;~Safwan_Labbi1;~Ilya_Levin1;~Reda_ALAMI1;~Alexey_Naumov1;~Eric_Moulines1", "gender": "M;M;M;;;M;M", "homepage": "http://www.pmangold.fr;https://www.hse.ru/org/persons/219484540;;;;https://www.hse.ru/en/staff/anaumov;", "dblp": "298/1535;23/8962;;;;196/2848;54/2358", "google_scholar": "https://scholar.google.fr/citations?user=3HUiM0sAAAAJ;https://scholar.google.ru/citations?user=8BwDmyMAAAAJ;lJp5xdQAAAAJ;;;5723KoYAAAAJ;https://scholar.google.fr/citations?user=_XE1LvQAAAAJ", "orcid": "0000-0002-0252-5287;;;;;;0000-0002-2058-0693", "linkedin": ";;https://linkedin.com/in/safwan-labbi-615051167;;;;", "or_profile": "~Paul_Mangold1;~Sergey_Samsonov1;~Safwan_Labbi1;~Ilya_Levin1;~Reda_ALAMI1;~Alexey_Naumov1;~Eric_Moulines1", "aff": "\u00c9cole Polytechnique;Higher School of Economics;\u00c9cole Polytechnique;;;Higher School of Economics;Ecole polytechnique", "aff_domain": "polytechnique.edu;hse.ru;polytechnique.edu;;;hse.ru;polytechnique.edu", "position": "Postdoc;PhD student;PhD student;;;Full Professor;Full Professor", "bibtex": "@inproceedings{\nmangold2024scafflsa,\ntitle={{SCAFFLSA}: Taming Heterogeneity in Federated Linear Stochastic Approximation and {TD} Learning},\nauthor={Paul Mangold and Sergey Samsonov and Safwan Labbi and Ilya Levin and Reda ALAMI and Alexey Naumov and Eric Moulines},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=HeJ1cBAgiV}\n}", "github": "", "reviewers": "czvr;WekN;bpaa", "pdf_size": 1654421, "rating": "6;6;7", "confidence": "2;3;2", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "2;3;2", "wc_summary": "50;59;100", "wc_strengths": "33;82;48", "wc_weaknesses": "25;29;97", "wc_questions": "76;16;80", "wc_limitations": "19;11;28", "wc_review": "203;197;353", "wc_reply_reviewers": "46;0;29", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;0;1", "reply_authors": "1;1;1", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 2.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 69.66666666666667, 21.761331658599286 ], "wc_strengths_avg": [ 54.333333333333336, 20.499322482029065 ], "wc_weaknesses_avg": [ 50.333333333333336, 33.03869784897031 ], "wc_questions_avg": [ 57.333333333333336, 29.272664533466862 ], "wc_limitations_avg": [ 19.333333333333332, 6.944222218666553 ], "wc_review_avg": [ 251.0, 72.16647421067486 ], "wc_reply_reviewers_avg": [ 25.0, 18.991226044325487 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3172093768768029371&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "polytechnique.edu;hse.ru;polytechnique.edu;;;hse.ru;polytechnique.edu", "author_num": 7, "aff_unique_index": "0;1;0;1;0", "aff_unique_norm": "Ecole Polytechnique;Higher School of Economics", "aff_unique_dep": ";", "aff_unique_url": "https://www.polytechnique.edu;https://www.hse.ru", "aff_unique_abbr": "X;HSE", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1;0", "aff_country_unique": "France;Russian Federation" }, { "title": "Unified Guidance for Geometry-Conditioned Molecular Generation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95815", "id": "HeoRsnaD44", "proceeding": "", "pdf": "https://openreview.net/pdf?id=HeoRsnaD44", "openreview": "https://openreview.net/forum?id=HeoRsnaD44", "poster": "/media/PosterPDFs/NeurIPS%202024/95815.png?t=1731054654.9363272", "project": "", "author_site": "Sirine Ayadi, Leon Hetzel, Johanna Sommer, Fabian Theis, Stephan G\u00fcnnemann", "tldr": "", "abstract": "Effectively designing molecular geometries is essential to advancing pharmaceutical innovations, a domain, which has experienced great attention through the success of generative models and, in particular, diffusion models. However, current molecular diffusion models are tailored towards a specific downstream task and lack adaptability. We introduce UniGuide, a framework for controlled geometric guidance of unconditional diffusion models that allows flexible conditioning during inference without the requirement of extra training or networks. We show how applications such as structure-based, fragment-based, and ligand-based drug design are formulated in the UniGuide\u00a0framework and demonstrate on-par or superior performance compared to specialised models. Offering a more versatile approach, UniGuide\u00a0has the potential to streamline the development of molecular generative models, allowing them to be readily used in diverse application scenarios.", "keywords": "generative models;conditional diffusion", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "/attachment/fc859350e23825b9b18cc80343bc56472e5ff00f.zip", "author": "Sirine Ayadi;Leon Hetzel;Johanna Sommer;Fabian J Theis;Stephan G\u00fcnnemann", "authorids": "~Sirine_Ayadi1;~Leon_Hetzel1;~Johanna_Sommer1;~Fabian_J_Theis1;~Stephan_G\u00fcnnemann1", "gender": "F;M;F;M;", "homepage": ";;https://johanna-sommer.com;http://www.daml.in.tum.de;https://www.helmholtz-munich.de/en/icb/pi/fabian-theis", "dblp": ";246/5214;https://dblp.uni-trier.de/pid/243/2320;43/3011;t/FabianJTheis", "google_scholar": ";;R3p8FGsAAAAJ;;sqWpn2AAAAAJ", "orcid": ";0000-0002-4823-9729;;;0000-0002-2419-1943", "linkedin": "sirine-ayadi-658781224/;;;;", "or_profile": "~Sirine_Ayadi1;~Leon_Hetzel1;~Johanna_Sommer1;~Stephan_G\u00fcnnemann1;~Fabian_J._Theis1", "aff": "Technische Universit\u00e4t M\u00fcnchen;Technische Universit\u00e4t M\u00fcnchen;Technische Universit\u00e4t M\u00fcnchen;Technical University Munich;Technical University Munich", "aff_domain": "tum.de;tum.de;tum.de;tum.de;tum.de", "position": "MS student;PhD student;PhD student;Professor;Full Professor", "bibtex": "@inproceedings{\nayadi2024unified,\ntitle={Unified Guidance for Geometry-Conditioned Molecular Generation},\nauthor={Sirine Ayadi and Leon Hetzel and Johanna Sommer and Fabian J Theis and Stephan G{\\\"u}nnemann},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=HeoRsnaD44}\n}", "github": "", "reviewers": "YA7u;cfg3;ZcJd;YdVq;D4nw", "pdf_size": 14829997, "rating": "4;5;6;6;7", "confidence": "3;2;4;2;4", "soundness": "2;3;2;3;4", "novelty": "2;2;2;3;3", "presentation": "2;3;3;3;3", "wc_summary": "61;75;67;60;98", "wc_strengths": "21;48;24;72;37", "wc_weaknesses": "126;119;310;220;59", "wc_questions": "107;3;4;41;76", "wc_limitations": "2;1;1;1;8", "wc_review": "317;246;406;394;278", "wc_reply_reviewers": "101;39;132;67;20", "wc_reply_authors": "423;59;1276;133;34", "reply_reviewers": "1;1;2;1;1", "reply_authors": "2;2;4;2;2", "rating_avg": [ 5.6, 1.0198039027185568 ], "confidence_avg": [ 3.0, 0.8944271909999159 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 72.2, 13.962807740565648 ], "wc_strengths_avg": [ 40.4, 18.51053753946654 ], "wc_weaknesses_avg": [ 166.8, 88.21201732190463 ], "wc_questions_avg": [ 46.2, 40.64185035157725 ], "wc_limitations_avg": [ 2.6, 2.727636339397171 ], "wc_review_avg": [ 328.2, 62.9043718671445 ], "wc_reply_reviewers_avg": [ 71.8, 40.64185035157725 ], "wc_reply_authors_avg": [ 385.0, 466.5417451847155 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 2.4, 0.8 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.43852900965351466, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7970007649556817562&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "tum.de;tum.de;tum.de;tum.de;tum.de", "author_num": 5, "aff_unique_index": "0;0;0;1;1", "aff_unique_norm": "Technische Universit\u00e4t M\u00fcnchen;Technical University of Munich", "aff_unique_dep": ";", "aff_unique_url": "https://www.tum.de;https://www.tum.de", "aff_unique_abbr": "TUM;TUM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Germany" }, { "title": "BiScope: AI-generated Text Detection by Checking Memorization of Preceding Tokens", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95814", "id": "Hew2JSDycr", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Hew2JSDycr", "openreview": "https://openreview.net/forum?id=Hew2JSDycr", "poster": "/media/PosterPDFs/NeurIPS%202024/95814.png?t=1733630920.046255", "project": "", "author_site": "Hanxi Guo, Siyuan Cheng, Xiaolong Jin, Zhuo Zhang, Kaiyuan Zhang, Guanhong Tao, Guangyu Shen, Xiangyu Zhang", "tldr": "", "abstract": "Detecting text generated by Large Language Models (LLMs) is a pressing need in\n order to identify and prevent misuse of these powerful models in a wide range of\n applications, which have highly undesirable consequences such as misinformation\n and academic dishonesty. Given a piece of subject text, many existing detection\n methods work by measuring the difficulty of LLM predicting the next token in\n the text from their prefix. In this paper, we make a critical observation that\n how well the current token\u2019s output logits memorizes the closely preceding input\n tokens also provides strong evidence. Therefore, we propose a novel bi-directional\n calculation method that measures the cross-entropy losses between an output\n logits and the ground-truth token (forward) and between the output logits and\n the immediately preceding input token (backward). A classifier is trained to\n make the final prediction based on the statistics of these losses. We evaluate our\n system, named BISCOPE, on texts generated by five latest commercial LLMs\n across five heterogeneous datasets, including both natural language and code.\n BISCOPE demonstrates superior detection accuracy and robustness compared to six\n existing baseline methods, exceeding the state-of-the-art non-commercial methods\u2019\n detection accuracy by over 0.30 F1 score, achieving over 0.95 detection F1 score\n on average. It also outperforms the best commercial tool GPTZero that is based on\n a commercial LLM trained with an enormous volume of data. Code is available at https://github.com/MarkGHX/BiScope.", "keywords": "Large Language Models;AI-text Detection;Paraphrase;Trustworthy AI", "primary_area": "other", "supplementary_material": "", "author": "Hanxi Guo;Siyuan Cheng;Xiaolong Jin;ZHUO ZHANG;Kaiyuan Zhang;Guanhong Tao;Guangyu Shen;Xiangyu Zhang", "authorids": "~Hanxi_Guo1;~Siyuan_Cheng1;~Xiaolong_Jin2;~ZHUO_ZHANG1;~Kaiyuan_Zhang1;~Guanhong_Tao1;~Guangyu_Shen1;~Xiangyu_Zhang3", "gender": "M;M;M;;M;;M;M", "homepage": "https://hanxiguo.me;https://www.cs.purdue.edu/homes/cheng535/;https://jinxiaolong1129.github.io/2018/07/01/welcome.html;https://www.cs.purdue.edu/homes/zhan3299/index.html;https://kaiyuanzhang.com/;;;https://www.cs.purdue.edu/homes/xyzhang", "dblp": "300/2967;263/7049;;16/1234-2.html;147/6644-2;;216/6403;", "google_scholar": "R3C7RsSZnjYC;GcL9AFMAAAAJ;w1-1dYwAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;;YiMTVwgAAAAJ;PXbu1wIAAAAJ", "orcid": "0000-0002-7566-1589;;;;0000-0001-6023-363X;;;", "linkedin": ";sycheng98/;;;kaiyuan-zhang/;;;", "or_profile": "~Hanxi_Guo1;~Siyuan_Cheng1;~Xiaolong_Jin2;~ZHUO_ZHANG1;~Kaiyuan_Zhang1;~Guanhong_Tao1;~Guangyu_Shen1;~Xiangyu_Zhang3", "aff": "Purdue University;Sony AI;Purdue University;Purdue University;Purdue University;;Purdue University;Purdue University", "aff_domain": "purdue.edu;sony.com;purdue.edu;purdue.edu;cs.purdue.edu;;purdue.edu;cs.purdue.edu", "position": "PhD student;Intern;PhD student;Postdoc;PhD student;;PhD student;Full Professor", "bibtex": "@inproceedings{\nguo2024biscope,\ntitle={BiScope: {AI}-generated Text Detection by Checking Memorization of Preceding Tokens},\nauthor={Hanxi Guo and Siyuan Cheng and Xiaolong Jin and ZHUO ZHANG and Kaiyuan Zhang and Guanhong Tao and Guangyu Shen and Xiangyu Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Hew2JSDycr}\n}", "github": "", "reviewers": "VTu5;V5gS;Kykm;Z5m7", "pdf_size": 892784, "rating": "5;6;7;7", "confidence": "4;4;4;5", "soundness": "2;3;4;3", "novelty": "3;3;3;3", "presentation": "4;3;4;2", "wc_summary": "151;49;127;80", "wc_strengths": "141;26;74;78", "wc_weaknesses": "215;152;177;254", "wc_questions": "239;7;142;2", "wc_limitations": "3;16;48;3", "wc_review": "749;250;568;417", "wc_reply_reviewers": "27;16;78;18", "wc_reply_authors": "25;14;14;26", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 101.75, 39.74528273896161 ], "wc_strengths_avg": [ 79.75, 40.855691158025955 ], "wc_weaknesses_avg": [ 199.5, 38.642593080692706 ], "wc_questions_avg": [ 97.5, 99.13753073382452 ], "wc_limitations_avg": [ 17.5, 18.39157415774952 ], "wc_review_avg": [ 496.0, 184.3569906458662 ], "wc_reply_reviewers_avg": [ 34.75, 25.31180554602931 ], "wc_reply_authors_avg": [ 19.75, 5.7608593109014565 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8337651055171778059&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 3, "email": "purdue.edu;sony.com;purdue.edu;purdue.edu;cs.purdue.edu;;purdue.edu;cs.purdue.edu", "author_num": 8, "aff_unique_index": "0;1;0;0;0;0;0", "aff_unique_norm": "Purdue University;Sony", "aff_unique_dep": ";Sony AI", "aff_unique_url": "https://www.purdue.edu;https://www.sony.com", "aff_unique_abbr": "Purdue;Sony AI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0;0;0", "aff_country_unique": "United States;Japan" }, { "title": "Asymptotics of Alpha-Divergence Variational Inference Algorithms with Exponential Families", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95813", "id": "HfQF8LoLhs", "proceeding": "", "pdf": "https://openreview.net/pdf?id=HfQF8LoLhs", "openreview": "https://openreview.net/forum?id=HfQF8LoLhs", "poster": "/media/PosterPDFs/NeurIPS%202024/95813.png?t=1733503312.424541", "project": "", "author_site": "Fran\u00e7ois Bertholom, randal douc, Fran\u00e7ois Roueff", "tldr": "", "abstract": "Recent works in Variational Inference have examined alternative criteria to the commonly used exclusive Kullback-Leibler divergence. Encouraging empirical results have been obtained with the family of alpha-divergences, but few works have focused on the asymptotic properties of the proposed algorithms, especially as the number of iterations goes to infinity. In this paper, we study a procedure that ensures a monotonic decrease in the alpha-divergence. We provide sufficient conditions to guarantee its convergence to a local minimizer of the alpha-divergence at a geometric rate when the variational family belongs to the class of exponential models. The sample-based version of this ideal procedure involves biased gradient estimators, thus hindering any theoretical study. We propose an alternative unbiased algorithm, we prove its almost sure convergence to a local minimizer of the alpha-divergence, and a law of the iterated logarithm. Our results are exemplified with toy and real-data experiments.", "keywords": "Variational inference;stochastic algorithms;asymptotic analysis;alpha divergence;exponential models", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Fran\u00e7ois Bertholom;randal douc;Fran\u00e7ois Roueff", "authorids": "~Fran\u00e7ois_Bertholom1;~randal_douc1;~Fran\u00e7ois_Roueff1", "gender": "M;M;Not Specified", "homepage": "https://github.com/frbm;http://www-public.it-sudparis.eu/~douc_ran/;https://perso.telecom-paristech.fr/roueff/", "dblp": "396/8137;;", "google_scholar": "rivrVhYAAAAJ;;zhJZ1iwAAAAJ", "orcid": ";;0000-0003-2372-0724", "linkedin": ";;", "or_profile": "~Fran\u00e7ois_Bertholom1;~randal_douc1;~Fran\u00e7ois_Roueff1", "aff": "Telecom SudParis;Telecom Sudparis;T\u00e9l\u00e9com Paris", "aff_domain": "telecom-sudparis.eu;telecom-sudparis.eu;telecom-paristech.fr", "position": "PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nbertholom2024asymptotics,\ntitle={Asymptotics of Alpha-Divergence Variational Inference Algorithms with Exponential Families},\nauthor={Fran{\\c{c}}ois Bertholom and randal douc and Fran{\\c{c}}ois Roueff},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=HfQF8LoLhs}\n}", "github": "", "reviewers": "pDqJ;FQDG;Ntw7;cgje;Hziy", "pdf_size": 4776263, "rating": "6;7;7;7;8", "confidence": "4;3;3;3;2", "soundness": "3;4;3;4;4", "novelty": "2;4;3;4;3", "presentation": "3;4;2;4;4", "wc_summary": "104;62;54;91;109", "wc_strengths": "50;43;85;38;44", "wc_weaknesses": "285;24;187;66;192", "wc_questions": "160;34;112;1;3", "wc_limitations": "62;1;1;1;21", "wc_review": "661;164;439;197;369", "wc_reply_reviewers": "0;12;644;0;104", "wc_reply_authors": "0;0;1617;0;49", "reply_reviewers": "0;1;4;0;1", "reply_authors": "1;1;5;1;2", "rating_avg": [ 7.0, 0.6324555320336759 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.6, 0.4898979485566356 ], "novelty_avg": [ 3.2, 0.7483314773547882 ], "presentation_avg": [ 3.4, 0.8 ], "wc_summary_avg": [ 84.0, 22.172054483064937 ], "wc_strengths_avg": [ 52.0, 16.93517050401324 ], "wc_weaknesses_avg": [ 150.8, 94.11354844016881 ], "wc_questions_avg": [ 62.0, 63.387695966961914 ], "wc_limitations_avg": [ 17.2, 23.70147674724088 ], "wc_review_avg": [ 366.0, 179.84882540622831 ], "wc_reply_reviewers_avg": [ 152.0, 249.06866523109645 ], "wc_reply_authors_avg": [ 333.2, 642.1804730759103 ], "reply_reviewers_avg": [ 1.2, 1.4696938456699067 ], "reply_authors_avg": [ 2.0, 1.5491933384829668 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.9999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:FV_liAUdO9AJ:scholar.google.com/&scioq=Asymptotics+of+Alpha-Divergence+Variational+Inference+Algorithms+with+Exponential+Families&hl=en&as_sdt=0,33", "gs_version_total": 0, "email": "telecom-sudparis.eu;telecom-sudparis.eu;telecom-paristech.fr", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Telecom SudParis;T\u00e9l\u00e9com Paris", "aff_unique_dep": ";", "aff_unique_url": "https://www.telecom-sudparis.eu;https://www.telecom-paris.fr", "aff_unique_abbr": "TSP;T\u00e9l\u00e9com Paris", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "France" }, { "title": "Blind Image Restoration via Fast Diffusion Inversion", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95812", "id": "HfSJlBRkKJ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=HfSJlBRkKJ", "openreview": "https://openreview.net/forum?id=HfSJlBRkKJ", "poster": "", "project": "", "author_site": "Hamadi Chihaoui, Abdelhak Lemkhenter, Paolo Favaro", "tldr": "", "abstract": "Image Restoration (IR) methods based on a pre-trained diffusion model have demonstrated state-of-the-art performance. However, they have two fundamental limitations: 1) they often assume that the degradation operator is completely known and 2) they alter the diffusion sampling process, which may result in restored images that do not lie onto the data manifold. To address these issues, we propose Blind Image Restoration via fast Diffusion inversion (BIRD) a blind IR method that jointly optimizes for the degradation model parameters and the restored image. To ensure that the restored images lie onto the data manifold, we propose a novel sampling technique on a pre-trained diffusion model. A key idea in our method is not to modify the reverse sampling, i.e., not to alter all the intermediate latents, once an initial noise is sampled. This is ultimately equivalent to casting the IR task as an optimization problem in the space of the input noise. Moreover, to mitigate the computational cost associated with inverting a fully unrolled diffusion model, we leverage the inherent capability of these models to skip ahead in the forward diffusion process using large time steps. We experimentally validate BIRD on several image restoration tasks and show that it achieves state of the art performance.", "keywords": "blind image restoration;diffusion models;unsupervised learning", "primary_area": "machine_vision", "supplementary_material": "/attachment/61b809c86d513d308459fd1df45702cdd05f52d6.zip", "author": "Hamadi Chihaoui;Abdelhak Lemkhenter;Paolo Favaro", "authorids": "~Hamadi_Chihaoui1;~Abdelhak_Lemkhenter1;~Paolo_Favaro1", "gender": "M;;M", "homepage": "https://cvg.unibe.ch/people/chihaoui;http://cvg.unibe.ch/people/lemkhenter;http://cvg.unibe.ch", "dblp": "209/9800.html;274/6892;02/4162", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;6VLy7_AAAAAJ;w_XDRRsAAAAJ", "orcid": ";;0000-0003-3546-8247", "linkedin": ";;paolo-favaro-25765b4", "or_profile": "~Hamadi_Chihaoui1;~Abdelhak_Lemkhenter1;~Paolo_Favaro1", "aff": "Universit\u00e4t Bern;Microsoft Research;Institute f\u00fcr Informatik, University of Bern", "aff_domain": "unibe.ch;microsoft.com;unibe.ch", "position": "PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\nchihaoui2024blind,\ntitle={Blind Image Restoration via Fast Diffusion Inversion},\nauthor={Hamadi Chihaoui and Abdelhak Lemkhenter and Paolo Favaro},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=HfSJlBRkKJ}\n}", "github": "", "reviewers": "cmXb;ebAr;PrQF;gDW8", "pdf_size": 9132751, "rating": "4;4;5;7", "confidence": "5;4;4;3", "soundness": "3;2;3;3", "novelty": "2;2;3;2", "presentation": "3;2;2;3", "wc_summary": "61;91;75;143", "wc_strengths": "48;194;24;40", "wc_weaknesses": "171;346;197;94", "wc_questions": "8;21;20;103", "wc_limitations": "5;58;9;2", "wc_review": "293;710;325;382", "wc_reply_reviewers": "23;247;0;50", "wc_reply_authors": "168;1760;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "2;4;1;1", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 92.5, 31.028212968200407 ], "wc_strengths_avg": [ 76.5, 68.3867677259278 ], "wc_weaknesses_avg": [ 202.0, 91.35918125727704 ], "wc_questions_avg": [ 38.0, 37.87479372881125 ], "wc_limitations_avg": [ 18.5, 22.940139493908923 ], "wc_review_avg": [ 427.5, 166.1873942271194 ], "wc_reply_reviewers_avg": [ 80.0, 98.02805720812792 ], "wc_reply_authors_avg": [ 482.0, 741.0344121564126 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8660254037844386, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3419044156369363756&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 4, "email": "unibe.ch;microsoft.com;unibe.ch", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Bern;Microsoft", "aff_unique_dep": ";Microsoft Research", "aff_unique_url": "https://www.unibe.ch;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "UniBE;MSR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Switzerland;United States" }, { "title": "Efficient Multi-task LLM Quantization and Serving for Multiple LoRA Adapters", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95811", "id": "HfpV6u0kbX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=HfpV6u0kbX", "openreview": "https://openreview.net/forum?id=HfpV6u0kbX", "poster": "/media/PosterPDFs/NeurIPS%202024/95811.png?t=1731423421.1639163", "project": "", "author_site": "Yifei Xia, Fangcheng Fu, Wentao Zhang, Jiawei Jiang, Bin CUI", "tldr": "", "abstract": "With the remarkable achievements of large language models (LLMs), the demand for fine-tuning and deploying LLMs in various downstream tasks has garnered widespread interest. Parameter-efficient fine-tuning techniques represented by LoRA and model quantization techniques represented by GPTQ and AWQ are of paramount significance. However, although these techniques have been widely adopted in single-task scenarios, research is scarce in multi-task scenarios. To be specific, we find that mainstream quantization methods would prevent the base LLM from being shared among tasks, so current LLM serving systems are infeasible to integrate LLM quantization with multiple LoRA adapters to achieve memory-efficient multi-task serving. Moreover, existing LLM serving systems lack support for dynamic task addition and overlook the workload differences among tasks, leading to inefficiencies in multi-task scenarios.\n\nThis work proposes LoRA-Inlaid, an efficient multi-task LLM serving system. On the one hand, LoRA-Inlaid designs a flexible and efficient multi-task quantization algorithm (MLGPTQ) that facilitates the sharing of a single quantized model for multiple LoRA adapters, which significantly reduces the memory consumption for model deployment. Meanwhile, it supports adding LoRA adapters for new tasks on the fly, without sacrificing the stability of online services. On the other hand, LoRA-Inlaid develops a novel multi-task scheduling algorithm guided by output length prediction and grouping among different tasks, which effectively shrinks the memory consumption and avoids frequent switching of LoRA adapters. Empirical results verify that LoRA-Inlaid outperforms existing state-of-the-art LLM serving systems by up to 1.58 times in terms of throughput, 1.76 times in terms of average latency, 2 times in terms of job completion time, and 10 times in terms of SLO Attainment, while maintaining the same level of model quality.", "keywords": "Multi-LoRA serving system; LLM serving; LoRA; Post Training Quantization; Multi-task Scheduling", "primary_area": "infrastructure", "supplementary_material": "", "author": "Yifei Xia;Fangcheng Fu;Wentao Zhang;Jiawei Jiang;Bin CUI", "authorids": "~Yifei_Xia2;~Fangcheng_Fu1;~Wentao_Zhang1;~Jiawei_Jiang1;~Bin_CUI2", "gender": "M;M;M;M;M", "homepage": ";https://ccchengff.github.io/;http://bluesjjw.github.io/;https://cuibinpku.github.io/index.html;https://zwt233.github.io/", "dblp": ";219/9677.html;185/1521-1;55/5031.html;41/3249-1.html", "google_scholar": "https://scholar.google.com/citations?hl=en;vFYm_QEAAAAJ;G_Hg-j0AAAAJ;IJAU8KoAAAAJ;JE4VON0AAAAJ", "orcid": "0009-0006-0316-8986;0000-0003-1658-0380;0000-0003-0051-0046;0000-0003-1681-4677;0000-0002-7532-5550", "linkedin": ";;;;", "or_profile": "~Yifei_Xia2;~Fangcheng_Fu1;~Jiawei_Jiang1;~Bin_CUI2;~Zhang_wen_tao1", "aff": "Peking University;Peking University;Wuhan University;Peking University;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;whu.edu.cn;pku.edu.cn;pku.edu.cn", "position": "PhD student;Postdoc;Full Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nxia2024efficient,\ntitle={Efficient Multi-task {LLM} Quantization and Serving for Multiple Lo{RA} Adapters},\nauthor={Yifei Xia and Fangcheng Fu and Wentao Zhang and Jiawei Jiang and Bin CUI},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=HfpV6u0kbX}\n}", "github": "", "reviewers": "RQ33;v1aq;gmzG", "pdf_size": 1004942, "rating": "6;6;7", "confidence": "3;5;4", "soundness": "3;3;3", "novelty": "3;2;3", "presentation": "3;3;3", "wc_summary": "264;123;209", "wc_strengths": "225;128;177", "wc_weaknesses": "369;117;264", "wc_questions": "158;167;264", "wc_limitations": "123;1;143", "wc_review": "1139;536;1057", "wc_reply_reviewers": "30;22;13", "wc_reply_authors": "23;24;14", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 198.66666666666666, 58.02489887013065 ], "wc_strengths_avg": [ 176.66666666666666, 39.60078562632593 ], "wc_weaknesses_avg": [ 250.0, 103.3537614216338 ], "wc_questions_avg": [ 196.33333333333334, 47.98842453018112 ], "wc_limitations_avg": [ 89.0, 62.758797524065635 ], "wc_review_avg": [ 910.6666666666666, 267.0359942446378 ], "wc_reply_reviewers_avg": [ 21.666666666666668, 6.944222218666553 ], "wc_reply_authors_avg": [ 20.333333333333332, 4.496912521077347 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3863659108916049679&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "pku.edu.cn;pku.edu.cn;whu.edu.cn;pku.edu.cn;pku.edu.cn", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Peking University;Wuhan University", "aff_unique_dep": ";", "aff_unique_url": "http://www.pku.edu.cn;http://www.whu.edu.cn/", "aff_unique_abbr": "Peking U;WHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Transferring disentangled representations: bridging the gap between synthetic and real images", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95810", "id": "HfztZgwpxI", "proceeding": "", "pdf": "https://openreview.net/pdf?id=HfztZgwpxI", "openreview": "https://openreview.net/forum?id=HfztZgwpxI", "poster": "/media/PosterPDFs/NeurIPS%202024/95810.png?t=1731682211.4379873", "project": "", "author_site": "Jacopo Dapueto, Nicoletta Noceti, Francesca Odone", "tldr": "", "abstract": "Developing meaningful and efficient representations that separate the fundamental structure of the data generation mechanism is crucial in representation learning. However, Disentangled Representation Learning has not fully shown its potential on real images, because of correlated generative factors, their resolution and limited access to ground truth labels. Specifically on the latter, we investigate the possibility of leveraging synthetic data to learn general-purpose disentangled representations applicable to real data, discussing the effect of fine-tuning and what properties of disentanglement are preserved after the transfer. We provide an extensive empirical study to address these issues. In addition, we propose a new interpretable intervention-based metric, to measure the quality of factors encoding in the representation. Our results indicate that some level of disentanglement, transferring a representation from synthetic to real data, is possible and effective.", "keywords": "Disentangled Representations;Transfer Learning;Syn2Real", "primary_area": "machine_vision", "supplementary_material": "/attachment/6f0266eff19aefbeedd83bb6318f8aadd9b05a06.zip", "author": "Jacopo Dapueto;Nicoletta Noceti;Francesca Odone", "authorids": "~Jacopo_Dapueto1;~Nicoletta_Noceti1;~Francesca_Odone1", "gender": "M;F;F", "homepage": ";https://ml.unige.it;http://malga.unige.it", "dblp": "365/5779;13/3585;73/2633", "google_scholar": "DDRoSJ0AAAAJ;7i3HX4wAAAAJ;https://scholar.google.it/citations?user=riK7DscAAAAJ", "orcid": ";0000-0002-6482-4768;0000-0002-3463-2263", "linkedin": ";nicoletta-noceti-494a43156/;", "or_profile": "~Jacopo_Dapueto1;~Nicoletta_Noceti1;~Francesca_Odone1", "aff": "University of Genoa;Universit\u00e0 degli Studi di Genova;University of Genoa", "aff_domain": "unige.it;unige.it;unige.it", "position": "PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\ndapueto2024transferring,\ntitle={Transferring disentangled representations: bridging the gap between synthetic and real images},\nauthor={Jacopo Dapueto and Nicoletta Noceti and Francesca Odone},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=HfztZgwpxI}\n}", "github": "", "reviewers": "R67v;GNcy;Fxij", "pdf_size": 17739276, "rating": "6;6;7", "confidence": "5;3;3", "soundness": "3;3;4", "novelty": "2;2;3", "presentation": "2;3;3", "wc_summary": "68;135;98", "wc_strengths": "108;69;62", "wc_weaknesses": "197;144;111", "wc_questions": "4;148;313", "wc_limitations": "32;10;18", "wc_review": "409;506;602", "wc_reply_reviewers": "18;238;27", "wc_reply_authors": "0;239;0", "reply_reviewers": "1;2;1", "reply_authors": "1;2;1", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 100.33333333333333, 27.402351886086144 ], "wc_strengths_avg": [ 79.66666666666667, 20.237478982214054 ], "wc_weaknesses_avg": [ 150.66666666666666, 35.424410918022176 ], "wc_questions_avg": [ 155.0, 126.24579200908045 ], "wc_limitations_avg": [ 20.0, 9.092121131323903 ], "wc_review_avg": [ 505.6666666666667, 78.79227260475625 ], "wc_reply_reviewers_avg": [ 94.33333333333333, 101.65409758369584 ], "wc_reply_authors_avg": [ 79.66666666666667, 112.66568046905657 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2451522109049797673&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "unige.it;unige.it;unige.it", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Genoa;Universit\u00e0 degli Studi di Genova", "aff_unique_dep": ";", "aff_unique_url": "https://www.unige.it;https://www.unige.it", "aff_unique_abbr": "UniGe;UniGe", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Italy" }, { "title": "Inferring Neural Signed Distance Functions by Overfitting on Single Noisy Point Clouds through Finetuning Data-Driven based Priors", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95809", "id": "Hgqs1b4ECy", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Hgqs1b4ECy", "openreview": "https://openreview.net/forum?id=Hgqs1b4ECy", "poster": "/media/PosterPDFs/NeurIPS%202024/95809.png?t=1730530971.9663427", "project": "", "author_site": "Chao Chen, Yu-Shen Liu, Zhizhong Han", "tldr": "", "abstract": "It is important to estimate an accurate signed distance function (SDF) from a point cloud in many computer vision applications. The latest methods learn neural SDFs using either a data-driven based or an overfitting-based strategy. However, these two kinds of methods are with either poor generalization or slow convergence, which limits their capability under challenging scenarios like highly noisy point clouds. To resolve this issue, we propose a method to prompt pros of both data-driven based and overfitting-based methods for better generalization, faster inference, and higher accuracy in learning neural SDFs. We introduce a novel statistical reasoning algorithm in local regions which is able to finetune data-driven based priors without signed distance supervision, clean point cloud, or point normals. This helps our method start with a good initialization, and converge to a minimum in a much faster way. Our numerical and visual comparisons with the stat-of-the-art methods show our superiority over these methods in surface reconstruction and point cloud denoising on widely used shape and scene benchmarks. The code is available at https://github.com/chenchao15/LocalN2NM.", "keywords": "point cloud;SDF;3D reconstruction", "primary_area": "machine_vision", "supplementary_material": "/attachment/20ce6075fd0814278cd6e30e8f1e9b8530be7237.zip", "author": "Chao Chen;Yu-Shen Liu;Zhizhong Han", "authorids": "~Chao_Chen9;~Yu-Shen_Liu1;~Zhizhong_Han2", "gender": "M;M;M", "homepage": ";https://yushen-liu.github.io/;https://h312h.github.io/", "dblp": ";44/2229.html;166/5173", "google_scholar": "L8gyzsQAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-9916-662X;0000-0001-7305-1915;", "linkedin": ";;", "or_profile": "~Chao_Chen9;~Yu-Shen_Liu1;~Zhizhong_Han2", "aff": "Tsinghua University;Tsinghua University;Wayne State University", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;wayne.edu", "position": "PhD student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nchen2024inferring,\ntitle={Inferring Neural Signed Distance Functions by Overfitting on Single Noisy Point Clouds through Finetuning Data-Driven based Priors},\nauthor={Chao Chen and Yu-Shen Liu and Zhizhong Han},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Hgqs1b4ECy}\n}", "github": "", "reviewers": "76DM;AdmT;mU7t;fwt4", "pdf_size": 26551255, "rating": "4;4;5;5", "confidence": "3;4;4;4", "soundness": "2;2;3;3", "novelty": "2;3;2;2", "presentation": "2;2;2;2", "wc_summary": "55;74;149;123", "wc_strengths": "56;37;118;94", "wc_weaknesses": "125;233;248;743", "wc_questions": "71;42;155;26", "wc_limitations": "7;12;40;8", "wc_review": "314;398;710;994", "wc_reply_reviewers": "0;0;108;428", "wc_reply_authors": "48;48;23;730", "reply_reviewers": "0;0;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 4.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 100.25, 37.51916177102042 ], "wc_strengths_avg": [ 76.25, 31.657345119261027 ], "wc_weaknesses_avg": [ 337.25, 239.01712804734308 ], "wc_questions_avg": [ 73.5, 49.741833500585805 ], "wc_limitations_avg": [ 16.75, 13.5531361684298 ], "wc_review_avg": [ 604.0, 269.1988112900947 ], "wc_reply_reviewers_avg": [ 134.0, 175.37388631150307 ], "wc_reply_authors_avg": [ 212.25, 299.09728768412464 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17703472860082516256&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 3, "email": "tsinghua.edu.cn;tsinghua.edu.cn;wayne.edu", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Tsinghua University;Wayne State University", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;https://wayne.edu", "aff_unique_abbr": "THU;WSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "China;United States" }, { "title": "Heterogeneity-Guided Client Sampling: Towards Fast and Efficient Non-IID Federated Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95808", "id": "HhnpPISAUH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=HhnpPISAUH", "openreview": "https://openreview.net/forum?id=HhnpPISAUH", "poster": "/media/PosterPDFs/NeurIPS%202024/95808.png?t=1731624332.6925325", "project": "", "author_site": "Huancheng Chen, Haris Vikalo", "tldr": "", "abstract": "Statistical heterogeneity of data present at client devices in a federated learning (FL) system renders the training of a global model in such systems difficult. Particularly challenging are the settings where due to communication resource constraints only a small fraction of clients can participate in any given round of FL. Recent approaches to training a global model in FL systems with non-IID data have focused on developing client selection methods that aim to sample clients with more informative updates of the model. However, existing client selection techniques either introduce significant computation overhead or perform well only in the scenarios where clients have data with similar heterogeneity profiles. In this paper, we propose HiCS-FL (Federated Learning via Hierarchical Clustered Sampling), a novel client selection method in which the server estimates statistical heterogeneity of a client's data using the client\u2019s update of the network\u2019s output layer and relies on this information to cluster and sample the clients. We analyze the ability of the proposed techniques to compare heterogeneity of different datasets, and characterize convergence of the training process that deploys the introduced client selection method. Extensive experimental results demonstrate that in non-IID settings HiCS-FL achieves faster convergence than state-of-the-art FL client selection schemes. Notably, HiCS-FL drastically reduces computation cost compared to existing selection schemes and is adaptable to different heterogeneity scenarios.", "keywords": "Federated Learning;Client Selection;Non-IID data", "primary_area": "other", "supplementary_material": "/attachment/432a5d78f8c9192faccfb9339d02087786c21241.zip", "author": "Huancheng Chen;Haris Vikalo", "authorids": "~Huancheng_Chen1;~Haris_Vikalo1", "gender": "M;", "homepage": "https://citychan.github.io/;", "dblp": "302/4540;", "google_scholar": "https://scholar.google.com.tw/citations?hl=zh-TW;", "orcid": ";", "linkedin": ";", "or_profile": "~Huancheng_Chen1;~Haris_Vikalo1", "aff": "University of Texas, Austin;", "aff_domain": "utexas.edu;", "position": "PhD student;", "bibtex": "@inproceedings{\nchen2024heterogeneityguided,\ntitle={Heterogeneity-Guided Client Sampling: Towards Fast and Efficient Non-{IID} Federated Learning},\nauthor={Huancheng Chen and Haris Vikalo},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=HhnpPISAUH}\n}", "github": "", "reviewers": "yLvJ;uaYT;Rb9m;7UbW", "pdf_size": 1751653, "rating": "5;5;6;6", "confidence": "4;3;2;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "3;2;3;3", "wc_summary": "87;90;55;59", "wc_strengths": "61;79;52;54", "wc_weaknesses": "229;2;45;47", "wc_questions": "42;123;16;14", "wc_limitations": "2;43;2;38", "wc_review": "421;337;170;212", "wc_reply_reviewers": "8;77;16;78", "wc_reply_authors": "29;43;29;124", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 72.75, 15.848895860595462 ], "wc_strengths_avg": [ 61.5, 10.641898326896381 ], "wc_weaknesses_avg": [ 80.75, 87.45963354599652 ], "wc_questions_avg": [ 48.75, 44.268357773922446 ], "wc_limitations_avg": [ 21.25, 19.330998422223306 ], "wc_review_avg": [ 285.0, 99.69202575933544 ], "wc_reply_reviewers_avg": [ 44.75, 32.87381176559846 ], "wc_reply_authors_avg": [ 56.25, 39.53084238920289 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3609508836144050886&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "utexas.edu;", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "WaterMax: breaking the LLM watermark detectability-robustness-quality trade-off", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95807", "id": "HjeKHxK2VH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=HjeKHxK2VH", "openreview": "https://openreview.net/forum?id=HjeKHxK2VH", "poster": "", "project": "", "author_site": "Eva Giboulot, Teddy Furon", "tldr": "", "abstract": "Watermarking is a technical means to dissuade malfeasant usage of Large Language Models.\nThis paper proposes a novel watermarking scheme, so-called WaterMax, that enjoys high detectability while sustaining the quality of the generated text of the original LLM.\nIts new design leaves the LLM untouched (no modification of the weights, logits or temperature).\nWaterMax balances robustness and computational complexity contrary to the watermarking techniques of the literature inherently provoking a trade-off between quality and robustness.\nIts performance is both theoretically proven and experimentally validated.\nIt outperforms all the SotA techniques under the most complete benchmark suite.", "keywords": "machine learning security;llm watermarking", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/e8d1b25c02dbecbc969789188b8b0af6cdcdd441.zip", "author": "Eva Giboulot;Teddy Furon", "authorids": "~Eva_Giboulot1;~Teddy_Furon1", "gender": "F;M", "homepage": ";http://people.rennes.inria.fr/Teddy.Furon", "dblp": ";00/3862", "google_scholar": "zQZJgEIAAAAJ;https://scholar.google.com/citations?hl=fr", "orcid": ";0000-0002-1565-765X", "linkedin": ";", "or_profile": "~Eva_Giboulot1;~Teddy_Furon1", "aff": "INRIA;INRIA", "aff_domain": "inria.fr;inria.fr", "position": "Postdoc;Researcher", "bibtex": "@inproceedings{\ngiboulot2024watermax,\ntitle={WaterMax: breaking the {LLM} watermark detectability-robustness-quality trade-off},\nauthor={Eva Giboulot and Teddy Furon},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=HjeKHxK2VH}\n}", "github": "", "reviewers": "VU2G;F9zP;5ir5;3Sjs", "pdf_size": 1016331, "rating": "5;6;7;8", "confidence": "4;4;4;4", "soundness": "3;3;4;3", "novelty": "3;3;4;3", "presentation": "4;2;3;3", "wc_summary": "73;66;93;69", "wc_strengths": "94;90;63;41", "wc_weaknesses": "361;114;22;228", "wc_questions": "43;212;41;28", "wc_limitations": "22;13;16;8", "wc_review": "593;495;235;374", "wc_reply_reviewers": "201;17;9;119", "wc_reply_authors": "645;0;0;41", "reply_reviewers": "1;1;1;2", "reply_authors": "2;1;1;2", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 75.25, 10.54454835448157 ], "wc_strengths_avg": [ 72.0, 21.50581316760657 ], "wc_weaknesses_avg": [ 181.25, 126.86483949463697 ], "wc_questions_avg": [ 81.0, 75.85182924623506 ], "wc_limitations_avg": [ 14.75, 5.0682837331783235 ], "wc_review_avg": [ 424.25, 133.99883395015047 ], "wc_reply_reviewers_avg": [ 86.5, 79.06168477840578 ], "wc_reply_authors_avg": [ 171.5, 273.8872943383829 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12769708310457671863&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "inria.fr;inria.fr", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "INRIA", "aff_unique_dep": "", "aff_unique_url": "https://www.inria.fr", "aff_unique_abbr": "INRIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "France" }, { "title": "SleeperNets: Universal Backdoor Poisoning Attacks Against Reinforcement Learning Agents", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95806", "id": "HkC4OYee3Q", "proceeding": "", "pdf": "https://openreview.net/pdf?id=HkC4OYee3Q", "openreview": "https://openreview.net/forum?id=HkC4OYee3Q", "poster": "/media/PosterPDFs/NeurIPS%202024/95806.png?t=1733626469.2094264", "project": "", "author_site": "Ethan Rathbun, Christopher Amato, Alina Oprea", "tldr": "", "abstract": "Reinforcement learning (RL) is an actively growing field that is seeing increased usage in real-world, safety-critical applications -- making it paramount to ensure the robustness of RL algorithms against adversarial attacks. In this work we explore a particularly stealthy form of training-time attacks against RL -- backdoor poisoning. Here the adversary intercepts the training of an RL agent with the goal of reliably inducing a particular action when the agent observes a pre-determined trigger at inference time. We uncover theoretical limitations of prior work by proving their inability to generalize across domains and MDPs. Motivated by this, we formulate a novel poisoning attack framework which interlinks the adversary's objectives with those of finding an optimal policy -- guaranteeing attack success in the limit. Using insights from our theoretical analysis we develop \"SleeperNets\" as a universal backdoor attack which exploits a newly proposed threat model and leverages dynamic reward poisoning techniques. We evaluate our attack in 6 environments spanning multiple domains and demonstrate significant improvements in attack success over existing methods, while preserving benign episodic return.", "keywords": "Reinforcement Learning;Backdoor Attacks;Adversarial Machine Learning;Security;Poisoning Attacks;Reinforcement Learning Theory", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/9c8088f5cda01d78122869b97fa61d12afe4aab2.zip", "author": "Ethan Rathbun;Christopher Amato;Alina Oprea", "authorids": "~Ethan_Rathbun1;~Christopher_Amato1;~Alina_Oprea1", "gender": ";M;F", "homepage": "https://ethanrathbun.com/;http://www.ccs.neu.edu/home/camato/index.html;http://www.ccs.neu.edu/home/alina/", "dblp": "303/1093;10/3254;35/3425", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;-8-sD-sAAAAJ;https://scholar.google.com.tw/citations?user=16J3izoAAAAJ", "orcid": ";;0000-0002-4979-5292", "linkedin": ";;alina-oprea-9588bb1", "or_profile": "~Ethan_Rathbun1;~Christopher_Amato1;~Alina_Oprea1", "aff": "Northeastern University;Northeastern University;Northeastern University", "aff_domain": "northeastern.edu;northeastern.edu;northeastern.edu", "position": "PhD student;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nrathbun2024sleepernets,\ntitle={SleeperNets: Universal Backdoor Poisoning Attacks Against Reinforcement Learning Agents},\nauthor={Ethan Rathbun and Christopher Amato and Alina Oprea},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=HkC4OYee3Q}\n}", "github": "", "reviewers": "VsP1;Es9J;YkW9", "pdf_size": 7415873, "rating": "6;7;7", "confidence": "4;3;3", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "80;59;91", "wc_strengths": "70;61;94", "wc_weaknesses": "99;1;212", "wc_questions": "64;1;36", "wc_limitations": "52;1;39", "wc_review": "365;123;472", "wc_reply_reviewers": "17;0;48", "wc_reply_authors": "0;0;62", "reply_reviewers": "1;0;1", "reply_authors": "1;1;2", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 76.66666666666667, 13.27487183449325 ], "wc_strengths_avg": [ 75.0, 13.92838827718412 ], "wc_weaknesses_avg": [ 104.0, 86.21291473246144 ], "wc_questions_avg": [ 33.666666666666664, 25.772509040103607 ], "wc_limitations_avg": [ 30.666666666666668, 21.63844315615664 ], "wc_review_avg": [ 320.0, 145.9885840285694 ], "wc_reply_reviewers_avg": [ 21.666666666666668, 19.871811414385174 ], "wc_reply_authors_avg": [ 20.666666666666668, 29.227080289043965 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.9999999999999997, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6004851992250214322&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 4, "email": "northeastern.edu;northeastern.edu;northeastern.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Northeastern University", "aff_unique_dep": "", "aff_unique_url": "https://www.northeastern.edu", "aff_unique_abbr": "NEU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "HDR-GS: Efficient High Dynamic Range Novel View Synthesis at 1000x Speed via Gaussian Splatting", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95805", "id": "HkMCCFrYkT", "proceeding": "", "pdf": "https://openreview.net/pdf?id=HkMCCFrYkT", "openreview": "https://openreview.net/forum?id=HkMCCFrYkT", "poster": "/media/PosterPDFs/NeurIPS%202024/95805.png?t=1729805767.068644", "project": "", "author_site": "Yuanhao Cai, Zihao Xiao, Yixun Liang, Minghan Qin, Yulun Zhang, Xiaokang Yang, Yaoyao Liu, Alan Yuille", "tldr": "", "abstract": "High dynamic range (HDR) novel view synthesis (NVS) aims to create photorealistic images from novel viewpoints using HDR imaging techniques. The rendered HDR images capture a wider range of brightness levels containing more details of the scene than normal low dynamic range (LDR) images. Existing HDR NVS methods are mainly based on NeRF. They suffer from long training time and slow inference speed. In this paper, we propose a new framework, High Dynamic Range Gaussian Splatting (HDR-GS), which can efficiently render novel HDR views and reconstruct LDR images with a user input exposure time. Specifically, we design a Dual Dynamic Range (DDR) Gaussian point cloud model that uses spherical harmonics to fit HDR color and employs an MLP-based tone-mapper to render LDR color. The HDR and LDR colors are then fed into two Parallel Differentiable Rasterization (PDR) processes to reconstruct HDR and LDR views. To establish the data foundation for the research of 3D Gaussian splatting-based methods in HDR NVS, we recalibrate the camera parameters and compute the initial positions for Gaussian point clouds. Comprehensive experiments show that HDR-GS surpasses the state-of-the-art NeRF-based method by 3.84 and 1.91 dB on LDR and HDR NVS while enjoying 1000$\\times$ inference speed and only costing 6.3\\% training time. Code and data are released at https://github.com/caiyuanhao1998/HDR-GS", "keywords": "Applications;Computer Vision;Low-level Vision;Computational Photography;High Dynamic Range Imaging;Novel View Synthesis;3D Gaussian Splatting", "primary_area": "machine_vision", "supplementary_material": "", "author": "Yuanhao Cai;Zihao Xiao;Yixun Liang;Minghan Qin;Yulun Zhang;Xiaokang Yang;Yaoyao Liu;Alan Yuille", "authorids": "~Yuanhao_Cai1;~Zihao_Xiao2;~Yixun_Liang1;~Minghan_Qin1;~Yulun_Zhang1;~Xiaokang_Yang1;~Yaoyao_Liu1;~Alan_Yuille1", "gender": "M;M;M;M;M;;M;M", "homepage": ";https://yixunliang.github.io;https://minghanqin.github.io/;http://yulunzhang.com/;https://icne.sjtu.edu.cn/info/1064/1078.htm;https://yaoyaoliu.web.illinois.edu/;;https://caiyuanhao1998.github.io", "dblp": "207/2005-1;320/7091;358/9023;166/2763-1.html;06/3071-1.html;12/10033-1;y/AlanLYuille;260/1004", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;ngEXyLkAAAAJ;ORmLjWoAAAAJ;yDEavdMAAAAJ;Qi2PSmEAAAAJ;;3YozQwcAAAAJ", "orcid": ";0000-0003-4750-8875;;0000-0002-2288-5079;0000-0003-4029-3322;0000-0002-5316-3028;;", "linkedin": ";;;yulun-zhang-1116b5b9/;;;;", "or_profile": "~Zihao_Xiao2;~Yixun_Liang1;~Minghan_Qin1;~Yulun_Zhang1;~Xiaokang_Yang1;~Yaoyao_Liu1;~Alan_Yuille1;~Cai_Yuanhao1", "aff": "Johns Hopkins University;Hong Kong University of Science and Technology;Tsinghua University;Swiss Federal Institute of Technology;Shanghai Jiaotong University;Johns Hopkins University;Johns Hopkins University;Johns Hopkins University", "aff_domain": "jhu.edu;hkust.edu;tsinghua.edu.cn;ethz.ch;sjtu.edu.cn;jhu.edu;johnshopkins.edu;jh.edu", "position": "PhD student;MS student;MS student;Postdoc;Full Professor;Postdoc;Full Professor;PhD student", "bibtex": "@inproceedings{\ncai2024hdrgs,\ntitle={{HDR}-{GS}: Efficient High Dynamic Range Novel View Synthesis at 1000x Speed via Gaussian Splatting},\nauthor={Yuanhao Cai and Zihao Xiao and Yixun Liang and Minghan Qin and Yulun Zhang and Xiaokang Yang and Yaoyao Liu and Alan Yuille},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=HkMCCFrYkT}\n}", "github": "", "reviewers": "E3vH;VwS8;tLXM", "pdf_size": 9527049, "rating": "5;6;7", "confidence": "4;5;5", "soundness": "4;3;3", "novelty": "2;2;3", "presentation": "4;3;4", "wc_summary": "81;58;107", "wc_strengths": "54;44;336", "wc_weaknesses": "152;35;254", "wc_questions": "29;94;117", "wc_limitations": "12;84;22", "wc_review": "328;315;836", "wc_reply_reviewers": "184;26;0", "wc_reply_authors": "853;13;0", "reply_reviewers": "2;1;0", "reply_authors": "4;2;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 82.0, 20.016659728003237 ], "wc_strengths_avg": [ 144.66666666666666, 135.35467811970477 ], "wc_weaknesses_avg": [ 147.0, 89.47625383307015 ], "wc_questions_avg": [ 80.0, 37.26481808176 ], "wc_limitations_avg": [ 39.333333333333336, 31.846855766656496 ], "wc_review_avg": [ 493.0, 242.5956855895559 ], "wc_reply_reviewers_avg": [ 70.0, 81.30600633819537 ], "wc_reply_authors_avg": [ 288.6666666666667, 399.07921797836354 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 2.3333333333333335, 1.247219128924647 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8521456206181711498&as_sdt=20000005&sciodt=0,21&hl=en", "gs_version_total": 4, "email": "jhu.edu;hkust.edu;tsinghua.edu.cn;ethz.ch;sjtu.edu.cn;jhu.edu;johnshopkins.edu;jh.edu", "author_num": 8, "aff_unique_index": "0;1;2;3;4;0;0;0", "aff_unique_norm": "Johns Hopkins University;Hong Kong University of Science and Technology;Tsinghua University;Swiss Federal Institute of Technology;Shanghai Jiao Tong University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.jhu.edu;https://www.ust.hk;https://www.tsinghua.edu.cn;https://www.ethz.ch;https://www.sjtu.edu.cn", "aff_unique_abbr": "JHU;HKUST;THU;ETH Zurich;SJTU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;1;2;1;0;0;0", "aff_country_unique": "United States;China;Switzerland" }, { "title": "Neural Embeddings Rank: Aligning 3D latent dynamics with movements", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95804", "id": "Hlcek7AYgP", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Hlcek7AYgP", "openreview": "https://openreview.net/forum?id=Hlcek7AYgP", "poster": "/media/PosterPDFs/NeurIPS%202024/95804.png?t=1730572364.7214544", "project": "", "author_site": "Chenggang Chen, Zhiyu Yang, Xiaoqin Wang", "tldr": "", "abstract": "Aligning neural dynamics with movements is a fundamental goal in neuroscience and brain-machine interfaces. However, there is still a lack of dimensionality reduction methods that can effectively align low-dimensional latent dynamics with movements. To address this gap, we propose Neural Embeddings Rank (NER), a technique that embeds neural dynamics into a 3D latent space and contrasts the embeddings based on movement ranks. NER learns to regress continuous representations of neural dynamics (i.e., embeddings) on continuous movements. We apply NER and six other dimensionality reduction techniques to neurons in the primary motor cortex (M1), dorsal premotor cortex (PMd), and primary somatosensory cortex (S1) as monkeys perform reaching tasks. Only NER aligns latent dynamics with both hand position and direction, visualizable in 3D. NER reveals consistent latent dynamics in M1 and PMd across sixteen sessions over a year. Using a linear regression decoder, NER explains 86\\% and 97\\% of the variance in velocity and position, respectively. Linear models trained on data from one session successfully decode velocity, position, and direction in held-out test data from different dates and cortical areas (64\\%, 88\\%, and 90\\%). NER also reveals distinct latent dynamics in S1 during consistent movements and in M1 during curved reaching tasks. The code is available at https://github.com/NeuroscienceAI/NER.", "keywords": "Dimensionality reduction;Latent dynamics;Brain-machine interfaces;Neural decoding;Contrastive learning", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "", "author": "Chenggang Chen;Zhiyu Yang;Xiaoqin Wang", "authorids": "~Chenggang_Chen1;~Zhiyu_Yang2;~Xiaoqin_Wang3", "gender": "M;M;Not Specified", "homepage": "https://sites.google.com/view/marmoset/cv;https://www.bme.jhu.edu/people/faculty/xiaoqin-wang/;https://dorazhiyuyang.github.io/", "dblp": ";;", "google_scholar": "lX2cBeEAAAAJ;877mtdkAAAAJ;Ot42lBoAAAAJ", "orcid": "0000-0003-4143-2427;;0009-0007-0220-2051", "linkedin": "chenggang-chen-a630b681/;;", "or_profile": "~Chenggang_Chen1;~Xiaoqin_Wang3;~Dora_Zhiyu_Yang1", "aff": "Johns Hopkins University;Johns Hopkins University;Mathematica Policy Research", "aff_domain": "jhu.edu;jhu.edu;mathematica-mpr.com", "position": "Postdoc;Full Professor;Researcher", "bibtex": "@inproceedings{\nchen2024neural,\ntitle={Neural Embeddings Rank: Aligning 3D latent dynamics with movements},\nauthor={Chenggang Chen and Zhiyu Yang and Xiaoqin Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Hlcek7AYgP}\n}", "github": "", "reviewers": "rR7L;f3GE;VFtX", "pdf_size": 12894927, "rating": "6;7;7", "confidence": "1;3;4", "soundness": "2;4;3", "novelty": "3;3;3", "presentation": "3;2;2", "wc_summary": "46;94;80", "wc_strengths": "49;98;76", "wc_weaknesses": "50;112;1042", "wc_questions": "19;58;286", "wc_limitations": "24;5;32", "wc_review": "188;367;1516", "wc_reply_reviewers": "122;29;419", "wc_reply_authors": "359;154;1244", "reply_reviewers": "2;1;4", "reply_authors": "3;3;6", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 2.6666666666666665, 1.247219128924647 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 73.33333333333333, 20.154955277107966 ], "wc_strengths_avg": [ 74.33333333333333, 20.038851153585515 ], "wc_weaknesses_avg": [ 401.3333333333333, 453.72630026873645 ], "wc_questions_avg": [ 121.0, 117.75398082442904 ], "wc_limitations_avg": [ 20.333333333333332, 11.32352516764202 ], "wc_review_avg": [ 690.3333333333334, 588.3900822489184 ], "wc_reply_reviewers_avg": [ 190.0, 166.3189706557854 ], "wc_reply_authors_avg": [ 585.6666666666666, 472.97521663989494 ], "reply_reviewers_avg": [ 2.3333333333333335, 1.247219128924647 ], "reply_authors_avg": [ 4.0, 1.4142135623730951 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.9449111825230683, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:BqL87q9o00oJ:scholar.google.com/&scioq=Neural+Embeddings+Rank:+Aligning+3D+latent+dynamics+with+movements&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "jhu.edu;jhu.edu;mathematica-mpr.com", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Johns Hopkins University;Mathematica Policy Research", "aff_unique_dep": ";", "aff_unique_url": "https://www.jhu.edu;https://www.mathematica.org", "aff_unique_abbr": "JHU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "SG-Nav: Online 3D Scene Graph Prompting for LLM-based Zero-shot Object Navigation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95803", "id": "HmCmxbCpp2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=HmCmxbCpp2", "openreview": "https://openreview.net/forum?id=HmCmxbCpp2", "poster": "/media/PosterPDFs/NeurIPS%202024/95803.png?t=1731400091.982451", "project": "", "author_site": "Hang Yin, Xiuwei Xu, Zhenyu Wu, Jie Zhou, Jiwen Lu", "tldr": "", "abstract": "In this paper, we propose a new framework for zero-shot object navigation.\nExisting zero-shot object navigation methods prompt LLM with the text of spatially closed objects, which lacks enough scene context for in-depth reasoning.\nTo better preserve the information of environment and fully exploit the reasoning ability of LLM, we propose to represent the observed scene with 3D scene graph. The scene graph encodes the relationships between objects, groups and rooms with a LLM-friendly structure, for which we design a hierarchical chain-of-thought prompt to help LLM reason the goal location according to scene context by traversing the nodes and edges.\nMoreover, benefit from the scene graph representation, we further design a re-perception mechanism to empower the object navigation framework with the ability to correct perception error.\nWe conduct extensive experiments on MP3D, HM3D and RoboTHOR environments, where SG-Nav surpasses previous state-of-the-art zero-shot methods by more than \\textbf{10\\%} SR on all benchmarks, while the decision process is explainable. To the best of our knowledge, SG-Nav is the first zero-shot method that achieves even higher performance than supervised object navigation methods on the challenging MP3D benchmark.\nCode of this project will be released in the final version.", "keywords": "navigation;scene graph;large language model", "primary_area": "robotics", "supplementary_material": "/attachment/ec56eb74a135d801244bc005369dbdfdced12d20.zip", "author": "Hang Yin;Xiuwei Xu;Zhenyu Wu;Jie Zhou;Jiwen Lu", "authorids": "~Hang_Yin5;~Xiuwei_Xu1;~Zhenyu_Wu6;~Jie_Zhou3;~Jiwen_Lu1", "gender": ";M;;M;M", "homepage": "https://www.au.tsinghua.edu.cn/;https://xuxw98.github.io/;https://github.com/Gary3410;https://www.tsinghua.edu.cn/publish/auen/1713/2011/20110506105532098625469/20110506105532098625469_.html;http://ivg.au.tsinghua.edu.cn/Jiwen_Lu/", "dblp": ";315/9374;;00/5012-1;http://dblp.uni-trier.de/pers/hd/l/Lu:Jiwen", "google_scholar": ";4G627acAAAAJ;https://scholar.google.com/citations?hl=zh-CN;;TN8uDQoAAAAJ", "orcid": ";;0009-0002-4827-6017;;0000-0002-6121-5529", "linkedin": ";;;;", "or_profile": "~Hang_Yin5;~Xiuwei_Xu1;~Zhenyu_Wu6;~Jie_Zhou3;~Jiwen_Lu1", "aff": "Tsinghua University;Tsinghua University;Beijing University of Posts and Telecommunications;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;bupt.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;PhD student;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nyin2024sgnav,\ntitle={{SG}-Nav: Online 3D Scene Graph Prompting for {LLM}-based Zero-shot Object Navigation},\nauthor={Hang Yin and Xiuwei Xu and Zhenyu Wu and Jie Zhou and Jiwen Lu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=HmCmxbCpp2}\n}", "github": "", "reviewers": "JsEs;aSmS;7ZKj;zzdn", "pdf_size": 2089846, "rating": "3;6;6;6", "confidence": "4;4;3;5", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "3;4;3;3", "wc_summary": "86;64;49;45", "wc_strengths": "98;71;71;43", "wc_weaknesses": "208;142;31;131", "wc_questions": "37;5;101;94", "wc_limitations": "6;4;1;9", "wc_review": "435;286;253;322", "wc_reply_reviewers": "1103;47;21;159", "wc_reply_authors": "1300;50;43;176", "reply_reviewers": "2;1;1;2", "reply_authors": "3;2;2;3", "rating_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 61.0, 16.077935190813527 ], "wc_strengths_avg": [ 70.75, 19.447043477094404 ], "wc_weaknesses_avg": [ 128.0, 63.27321708274363 ], "wc_questions_avg": [ 59.25, 39.96482828688246 ], "wc_limitations_avg": [ 5.0, 2.9154759474226504 ], "wc_review_avg": [ 324.0, 68.57477670397476 ], "wc_reply_reviewers_avg": [ 332.5, 447.8601902379804 ], "wc_reply_authors_avg": [ 392.25, 526.7553393179797 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14466499175345653491&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "tsinghua.edu.cn;tsinghua.edu.cn;bupt.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Tsinghua University;Beijing University of Posts and Telecommunications", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;http://www.bupt.edu.cn/", "aff_unique_abbr": "THU;BUPT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Beijing", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Periodic agent-state based Q-learning for POMDPs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95802", "id": "HmMSBhMAw4", "proceeding": "", "pdf": "https://openreview.net/pdf?id=HmMSBhMAw4", "openreview": "https://openreview.net/forum?id=HmMSBhMAw4", "poster": "/media/PosterPDFs/NeurIPS%202024/95802.png?t=1731553931.251709", "project": "", "author_site": "Amit Sinha, Matthieu Geist, Aditya Mahajan", "tldr": "", "abstract": "The standard approach for Partially Observable Markov Decision Processes (POMDPs) is to convert them to a fully observed belief-state MDP. However, the belief state depends on the system model and is therefore not viable in reinforcement learning (RL) settings. A widely used alternative is to use an agent state, which is a model-free, recursively updateable function of the observation history. Examples include frame stacking and recurrent neural networks. Since the agent state is model-free, it is used to adapt standard RL algorithms to POMDPs. However, standard RL algorithms like Q-learning learn a stationary policy. Our main thesis that we illustrate via examples is that because the agent state does not satisfy the Markov property, non-stationary agent-state based policies can outperform stationary ones. To leverage this feature, we propose PASQL (periodic agent-state based Q-learning), which is a variant of agent-state-based Q-learning that learns periodic policies. By combining ideas from periodic Markov chains and stochastic approximation, we rigorously establish that PASQL converges to a cyclic limit and characterize the approximation error of the converged periodic policy. Finally, we present a numerical experiment to highlight the salient features of PASQL and demonstrate the benefit of learning periodic policies over stationary policies.", "keywords": "POMDPs;RL;Q-learning;non-stationary policies;non-Markovian environments", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Amit Sinha;Matthieu Geist;Aditya Mahajan", "authorids": "~Amit_Sinha1;~Matthieu_Geist1;~Aditya_Mahajan1", "gender": "M;M;M", "homepage": ";;http://cim.mcgill.ca/~adityam/", "dblp": ";38/6508;84/6024", "google_scholar": "KiG2nq4AAAAJ;ectPLEUAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0001-8125-1191", "linkedin": ";;", "or_profile": "~Amit_Sinha1;~Matthieu_Geist1;~Aditya_Mahajan1", "aff": "McGill University, McGill University;Google;McGill University", "aff_domain": "mail.mcgill.ca;google.com;mcgill.ca", "position": "PhD student;Researcher;Associate Professor", "bibtex": "@inproceedings{\nsinha2024periodic,\ntitle={Periodic agent-state based Q-learning for {POMDP}s},\nauthor={Amit Sinha and Matthieu Geist and Aditya Mahajan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=HmMSBhMAw4}\n}", "github": "", "reviewers": "aMY6;4hWX;6ZcU;6y9j", "pdf_size": 894560, "rating": "5;6;7;8", "confidence": "2;3;2;3", "soundness": "2;3;3;4", "novelty": "2;2;3;3", "presentation": "2;2;3;4", "wc_summary": "107;112;101;81", "wc_strengths": "31;34;93;238", "wc_weaknesses": "103;290;40;208", "wc_questions": "6;271;41;104", "wc_limitations": "6;1;1;1", "wc_review": "253;708;276;632", "wc_reply_reviewers": "72;18;0;67", "wc_reply_authors": "238;178;0;0", "reply_reviewers": "2;1;0;1", "reply_authors": "3;3;1;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 2.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 100.25, 11.776565713313877 ], "wc_strengths_avg": [ 99.0, 83.97321001367042 ], "wc_weaknesses_avg": [ 160.25, 95.98535044474235 ], "wc_questions_avg": [ 105.5, 101.80004911590171 ], "wc_limitations_avg": [ 2.25, 2.165063509461097 ], "wc_review_avg": [ 467.25, 204.68436066294856 ], "wc_reply_reviewers_avg": [ 39.25, 30.96267914764483 ], "wc_reply_authors_avg": [ 104.0, 106.1414151026827 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 1.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.4472135954999579, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3685300322016476524&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "mail.mcgill.ca;google.com;mcgill.ca", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "McGill University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.mcgill.ca;https://www.google.com", "aff_unique_abbr": "McGill;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Canada;United States" }, { "title": "Beyond Single Stationary Policies: Meta-Task Players as Naturally Superior Collaborators", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95801", "id": "HpN4xeDJQF", "proceeding": "", "pdf": "https://openreview.net/pdf?id=HpN4xeDJQF", "openreview": "https://openreview.net/forum?id=HpN4xeDJQF", "poster": "/media/PosterPDFs/NeurIPS%202024/95801.png?t=1731605297.4923403", "project": "", "author_site": "Wang Haoming, Zhaoming Tian, Yunpeng Song, Xiangliang Zhang, Zhongmin Cai", "tldr": "", "abstract": "In human-AI collaborative tasks, the distribution of human behavior, influenced by mental models, is non-stationary, manifesting in various levels of initiative and different collaborative strategies. A significant challenge in human-AI collaboration is determining how to collaborate effectively with humans exhibiting non-stationary dynamics. Current collaborative agents involve initially running self-play (SP) multiple times to build a policy pool, followed by training the final adaptive policy against this pool. These agents themselves are a single policy network, which is $\\textbf{insufficient for handling non-stationary human dynamics}$. We discern that despite the inherent diversity in human behaviors, the $\\textbf{underlying meta-tasks within specific collaborative contexts tend to be strikingly similar}$. Accordingly, we propose $\\textbf{C}$ollaborative $\\textbf{B}$ayesian $\\textbf{P}$olicy $\\textbf{R}$euse ($\\textbf{CBPR}$), a novel Bayesian-based framework that $\\textbf{adaptively selects optimal collaborative policies matching the current meta-task from multiple policy networks}$ instead of just selecting actions relying on a single policy network. We provide theoretical guarantees for CBPR's rapid convergence to the optimal policy once human partners alter their policies. This framework shifts from directly modeling human behavior to identifying various meta-tasks that support human decision-making and training meta-task playing (MTP) agents tailored to enhance collaboration. Our method undergoes rigorous testing in a well-recognized collaborative cooking simulator, $\\textit{Overcooked}$. Both empirical results and user studies demonstrate CBPR's superior competitiveness compared to existing baselines.", "keywords": "human-AI collaboration;Bayesian policy reuse;reinforcement learning", "primary_area": "human-AI_interaction", "supplementary_material": "", "author": "Haoming Wang;Zhaoming Tian;Yunpeng Song;Xiangliang Zhang;Zhongmin Cai", "authorids": "~Haoming_Wang2;~Zhaoming_Tian2;~Yunpeng_Song1;~Xiangliang_Zhang1;~Zhongmin_Cai1", "gender": ";;;F;M", "homepage": ";https://tianzhaoming.com;;https://sites.nd.edu/xiangliang-zhang/;https://gr.xjtu.edu.cn/web/zmcai/english-version", "dblp": ";;;74/1890-1;", "google_scholar": ";;;BhRJe4wAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;0000-0002-3574-5665;", "linkedin": ";;;;", "or_profile": "~Haoming_Wang2;~Zhaoming_Tian2;~Yunpeng_Song1;~Xiangliang_Zhang1;~Zhongmin_Cai1", "aff": ";;;University of Notre Dame;Xi'an Jiaotong University", "aff_domain": ";;;nd.edu;xjtu.edu.cn", "position": ";;;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nwang2024beyond,\ntitle={Beyond Single Stationary Policies: Meta-Task Players as Naturally Superior Collaborators},\nauthor={Haoming Wang and Zhaoming Tian and Yunpeng Song and Xiangliang Zhang and Zhongmin Cai},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=HpN4xeDJQF}\n}", "github": "", "reviewers": "vD8B;3rF3;QDdz", "pdf_size": 8644314, "rating": "5;6;8", "confidence": "4;3;4", "soundness": "3;2;4", "novelty": "3;2;3", "presentation": "2;2;4", "wc_summary": "77;60;128", "wc_strengths": "41;117;84", "wc_weaknesses": "67;106;10", "wc_questions": "241;80;80", "wc_limitations": "3;9;49", "wc_review": "429;372;351", "wc_reply_reviewers": "45;12;0", "wc_reply_authors": "24;97;0", "reply_reviewers": "1;1;0", "reply_authors": "2;3;1", "rating_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.9428090415820634 ], "wc_summary_avg": [ 88.33333333333333, 28.89444391035911 ], "wc_strengths_avg": [ 80.66666666666667, 31.116269413639902 ], "wc_weaknesses_avg": [ 61.0, 39.42080668885405 ], "wc_questions_avg": [ 133.66666666666666, 75.8961278473561 ], "wc_limitations_avg": [ 20.333333333333332, 20.417857108151406 ], "wc_review_avg": [ 384.0, 32.95451410656816 ], "wc_reply_reviewers_avg": [ 19.0, 19.026297590440446 ], "wc_reply_authors_avg": [ 40.333333333333336, 41.24991582482994 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.18898223650461363, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:q3nOZ_ALBtcJ:scholar.google.com/&scioq=Beyond+Single+Stationary+Policies:+Meta-Task+Players+as+Naturally+Superior+Collaborators&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": ";;;nd.edu;xjtu.edu.cn", "author_num": 5, "aff_unique_index": "0;1", "aff_unique_norm": "University of Notre Dame;Xi'an Jiao Tong University", "aff_unique_dep": ";", "aff_unique_url": "https://www.nd.edu;https://www.xjtu.edu.cn", "aff_unique_abbr": "Notre Dame;XJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;China" }, { "title": "bit2bit: 1-bit quanta video reconstruction via self-supervised photon prediction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95800", "id": "HtlfNbyfOn", "proceeding": "", "pdf": "https://openreview.net/pdf?id=HtlfNbyfOn", "openreview": "https://openreview.net/forum?id=HtlfNbyfOn", "poster": "/media/PosterPDFs/NeurIPS%202024/95800.png?t=1733686147.8966138", "project": "", "author_site": "Yehe Liu, Alexander Krull, Hector Basevi, Ales Leonardis, Michael Jenkins", "tldr": "", "abstract": "Quanta image sensors, such as SPAD arrays, are an emerging sensor technology, producing 1-bit arrays representing photon detection events over exposures as short as a few nanoseconds. In practice, raw data are post-processed using heavy spatiotemporal binning to create more useful and interpretable images at the cost of degrading spatiotemporal resolution. In this work, we propose bit2bit, a new method for reconstructing high-quality image stacks at the original spatiotemporal resolution from sparse binary quanta image data. Inspired by recent work on Poisson denoising, we developed an algorithm that creates a dense image sequence from sparse binary photon data by predicting the photon arrival location probability distribution. However, due to the binary nature of the data, we show that the assumption of a Poisson distribution is inadequate. Instead, we model the process with a Bernoulli lattice process from the truncated Poisson. This leads to the proposal of a novel self-supervised solution based on a masked loss function. We evaluate our method using both simulated and real data. On simulated data from a conventional video, we achieve 34.35 mean PSNR with extremely photon-sparse binary input (<0.06 photons per pixel per frame). We also present a novel dataset containing a wide range of real SPAD high-speed videos under various challenging imaging conditions. The scenes cover strong/weak ambient light, strong motion, ultra-fast events, etc., which will be made available to the community, on which we demonstrate the promise of our approach. Both reconstruction quality and throughput substantially surpass the state-of-the-art methods (e.g., Quanta Burst Photography (QBP)). Our approach significantly enhances the visualization and usability of the data, enabling the application of existing analysis techniques.", "keywords": "Denoising; Self-supervised learning; Quanta imaging; Photon counting", "primary_area": "machine_vision", "supplementary_material": "/attachment/56be829b6c792b09005c6223baeecdca71ac3768.zip", "author": "Yehe Liu;Alexander Krull;Hector Basevi;Ales Leonardis;Michael W. Jenkins", "authorids": "~Yehe_Liu1;~Alexander_Krull3;~Hector_Basevi1;~Ales_Leonardis1;~Michael_W._Jenkins1", "gender": "Not Specified;;;;M", "homepage": ";;;;https://www.jenkinslab.com/", "dblp": ";150/4220;;;", "google_scholar": "b8TOCd8AAAAJ;https://scholar.google.de/citations?hl=en;;;https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-0221-5676;0000-0002-7778-7169;;;", "linkedin": "yeheliu/;;;;", "or_profile": "~Yehe_Liu1;~Alexander_Krull3;~Hector_Basevi1;~Ales_Leonardis1;~Michael_W._Jenkins1", "aff": "Case Western Reserve University;Birmingham University;;;Case Western Reserve University", "aff_domain": "case.edu;bham.ac.uk;;;case.edu", "position": "Postdoc;Assistant Professor;;;Associate Professor", "bibtex": "@inproceedings{\nliu2024bitbit,\ntitle={bit2bit: 1-bit quanta video reconstruction via self-supervised photon prediction},\nauthor={Yehe Liu and Alexander Krull and Hector Basevi and Ales Leonardis and Michael W. Jenkins},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=HtlfNbyfOn}\n}", "github": "", "reviewers": "iaWM;VmGM;Z4Sm;abBE", "pdf_size": 20156023, "rating": "5;6;6;8", "confidence": "4;5;3;4", "soundness": "3;3;3;4", "novelty": "3;4;3;3", "presentation": "2;3;3;4", "wc_summary": "91;116;22;150", "wc_strengths": "60;77;27;36", "wc_weaknesses": "328;114;53;44", "wc_questions": "177;177;57;70", "wc_limitations": "32;9;12;2", "wc_review": "688;493;171;302", "wc_reply_reviewers": "815;27;0;6", "wc_reply_authors": "1046;0;0;0", "reply_reviewers": "2;1;0;1", "reply_authors": "3;1;1;1", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 94.75, 46.932797700542 ], "wc_strengths_avg": [ 50.0, 19.710403344427025 ], "wc_weaknesses_avg": [ 134.75, 114.77668535029228 ], "wc_questions_avg": [ 120.25, 56.93581912996422 ], "wc_limitations_avg": [ 13.75, 11.143944544011335 ], "wc_review_avg": [ 413.5, 195.51790199365377 ], "wc_reply_reviewers_avg": [ 212.0, 348.28651998031734 ], "wc_reply_authors_avg": [ 261.5, 452.9312861792614 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:o6MI_GpK-DIJ:scholar.google.com/&scioq=bit2bit:+1-bit+quanta+video+reconstruction+via+self-supervised+photon+prediction&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "case.edu;bham.ac.uk;;;case.edu", "author_num": 5, "aff_unique_index": "0;1;0", "aff_unique_norm": "Case Western Reserve University;University of Birmingham", "aff_unique_dep": ";", "aff_unique_url": "https://www.case.edu;https://www.birmingham.ac.uk", "aff_unique_abbr": "CWRU;Birmingham", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Variance estimation in compound decision theory under boundedness", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95799", "id": "HvCppnDykt", "proceeding": "", "pdf": "https://openreview.net/pdf?id=HvCppnDykt", "openreview": "https://openreview.net/forum?id=HvCppnDykt", "poster": "", "project": "", "tldr": "", "abstract": "The normal means model is often studied under the assumption of a known variance. However, ignorance of the variance is a frequent issue in applications and basic theoretical questions still remain open in this setting. This article establishes that the sharp minimax rate of variance estimation in square error is $(\\frac{\\log\\log n}{\\log n})^2$ under arguably the most mild assumption imposed for identifiability: bounded means. The rate-optimal estimator proposed in this article achieves the optimal rate by estimating $O\\left(\\frac{\\log n}{\\log\\log n}\\right)$ cumulants and leveraging a variational representation of the noise variance in terms of the cumulants of the data distribution. The minimax lower bound involves a moment matching construction.", "keywords": "compound decision theory;empirical Bayes;normal means;variance estimation;minimax", "primary_area": "learning_theory", "supplementary_material": "", "author": "Subhodh Kotekal", "authorids": "~Subhodh_Kotekal1", "gender": "M", "homepage": "https://skotekal.github.io/", "dblp": "269/0500", "google_scholar": "", "orcid": "0009-0008-5525-0038", "linkedin": "", "or_profile": "~Subhodh_Kotekal1", "aff": "University of Chicago", "aff_domain": "uchicago.edu", "position": "PhD student", "bibtex": "@inproceedings{\nkotekal2024variance,\ntitle={Variance estimation in compound decision theory under boundedness},\nauthor={Subhodh Kotekal},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=HvCppnDykt}\n}", "github": "", "reviewers": "tWCj;uxch;RbL1;9ofA", "pdf_size": 479864, "rating": "5;6;7;7", "confidence": "2;3;4;3", "soundness": "3;3;4;4", "novelty": "2;2;3;4", "presentation": "3;3;4;3", "wc_summary": "80;58;86;18", "wc_strengths": "46;78;70;19", "wc_weaknesses": "17;113;337;36", "wc_questions": "32;27;138;1", "wc_limitations": "1;26;1;1", "wc_review": "176;302;632;75", "wc_reply_reviewers": "0;0;99;48", "wc_reply_authors": "0;0;47;0", "reply_reviewers": "0;0;2;1", "reply_authors": "1;1;2;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 60.5, 26.659894973536563 ], "wc_strengths_avg": [ 53.25, 23.01494079940246 ], "wc_weaknesses_avg": [ 125.75, 127.15222176588186 ], "wc_questions_avg": [ 49.5, 52.43329095145564 ], "wc_limitations_avg": [ 7.25, 10.825317547305483 ], "wc_review_avg": [ 296.25, 209.8646885495509 ], "wc_reply_reviewers_avg": [ 36.75, 40.93516214698557 ], "wc_reply_authors_avg": [ 11.75, 20.351596988934308 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.8528028654224417, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Fdq0g-xC01kJ:scholar.google.com/&scioq=Variance+estimation+in+compound+decision+theory+under+boundedness&hl=en&as_sdt=0,33", "gs_version_total": 0, "email": "uchicago.edu", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "University of Chicago", "aff_unique_dep": "", "aff_unique_url": "https://www.uchicago.edu", "aff_unique_abbr": "UChicago", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Mitigating Biases in Blackbox Feature Extractors for Image Classification Tasks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95798", "id": "HwO1mNluoL", "proceeding": "", "pdf": "https://openreview.net/pdf?id=HwO1mNluoL", "openreview": "https://openreview.net/forum?id=HwO1mNluoL", "poster": "", "project": "", "author_site": "Abhipsa Basu, Saswat Subhajyoti Mallick, Venkatesh Babu R", "tldr": "", "abstract": "In image classification, it is common to utilize a pretrained model to extract meaningful features of the input images, and then to train a classifier on top of it to make predictions for any downstream task. Trained on enormous amounts of data, these models have been shown to contain harmful biases which can hurt their performance when adapted for a downstream classification task. Further, very often they may be blackbox, either due to scale, or because of unavailability of model weights or architecture. Thus, during a downstream task, we cannot debias such models by updating the weights of the feature encoder, as only the classifier can be finetuned. In this regard, we investigate the suitability of some existing debiasing techniques and thereby motivate the need for more focused research towards this problem setting. Furthermore, we propose a simple method consisting of a clustering-based adaptive margin loss with a blackbox feature encoder, with no knowledge of the bias attribute. Our experiments demonstrate the effectiveness of our method across multiple benchmarks.", "keywords": "bias;fairness;spurious correlation", "primary_area": "fairness", "supplementary_material": "/attachment/482a21d602339c74642a97040d22e50c87e2d651.zip", "author": "Abhipsa Basu;Saswat Subhajyoti Mallick;Venkatesh Babu Radhakrishnan", "authorids": "~Abhipsa_Basu1;~Saswat_Subhajyoti_Mallick1;~Venkatesh_Babu_Radhakrishnan2", "gender": "F;;M", "homepage": ";;http://cds.iisc.ac.in/faculty/venky", "dblp": "203/8816;;20/6289", "google_scholar": "https://scholar.google.co.in/citations?user=tmn0akEAAAAJ;;cVg7HrEAAAAJ", "orcid": ";;0000-0002-1926-1804", "linkedin": "abhipsa-basu-526ba0152/;;venkatesh-babu-radhakrishnan-16568939", "or_profile": "~Abhipsa_Basu1;~Saswat_Subhajyoti_Mallick1;~Venkatesh_Babu_Radhakrishnan2", "aff": "Indian Institute of Science;;Indian Institute of Science", "aff_domain": "iisc.ac.in;;iisc.ac.in", "position": "PhD student;;Full Professor", "bibtex": "@inproceedings{\nbasu2024mitigating,\ntitle={Mitigating Biases in Blackbox Feature Extractors for Image Classification Tasks},\nauthor={Abhipsa Basu and Saswat Subhajyoti Mallick and Venkatesh Babu Radhakrishnan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=HwO1mNluoL}\n}", "github": "", "reviewers": "wKPv;oZ3K;pwPo;qb1H", "pdf_size": 1148894, "rating": "5;6;6;8", "confidence": "3;3;2;4", "soundness": "3;3;2;4", "novelty": "3;3;3;4", "presentation": "2;3;2;4", "wc_summary": "94;82;71;48", "wc_strengths": "59;76;134;53", "wc_weaknesses": "361;22;48;191", "wc_questions": "10;64;29;23", "wc_limitations": "11;39;70;7", "wc_review": "535;283;352;322", "wc_reply_reviewers": "45;0;31;20", "wc_reply_authors": "178;23;21;12", "reply_reviewers": "1;0;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 73.75, 16.946607330082326 ], "wc_strengths_avg": [ 80.5, 32.01952529317073 ], "wc_weaknesses_avg": [ 155.5, 134.9712932441562 ], "wc_questions_avg": [ 31.5, 19.981241202688086 ], "wc_limitations_avg": [ 31.75, 25.292044203662147 ], "wc_review_avg": [ 373.0, 96.67729826593211 ], "wc_reply_reviewers_avg": [ 24.0, 16.446884203398525 ], "wc_reply_authors_avg": [ 58.5, 69.11765331664553 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.6488856845230502, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7586813844236406211&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 0, "email": "iisc.ac.in;;iisc.ac.in", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Indian Institute of Science", "aff_unique_dep": "", "aff_unique_url": "https://www.iisc.ac.in", "aff_unique_abbr": "IISc", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "India" }, { "title": "Mixture of Adversarial LoRAs: Boosting Robust Generalization in Meta-Tuning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95797", "id": "HxGdbAmYYr", "proceeding": "", "pdf": "https://openreview.net/pdf?id=HxGdbAmYYr", "openreview": "https://openreview.net/forum?id=HxGdbAmYYr", "poster": "/media/PosterPDFs/NeurIPS%202024/95797.png?t=1731758290.3125253", "project": "", "author_site": "Xu Yang, Chen Liu, Ying Wei", "tldr": "", "abstract": "This paper introduces AMT, an \\textbf{A}dversarial \\textbf{M}eta-\\textbf{T}uning methodology, to boost the robust generalization of pre-trained models in the out-of-domain (OOD) few-shot learning. To address the challenge of transferring knowledge from source domains to unseen target domains, we construct the robust LoRAPool by meta-tuning LoRAs with dual perturbations applied to not only the inputs but also singular values and vectors of the weight matrices at various robustness levels. On top of that, we introduce a simple yet effective test-time merging mechanism to dynamically merge discriminative LoRAs for test-time task customization. Extensive evaluations demonstrate that AMT yields significant improvements, up to 12.92\\% in clean generalization and up to 49.72\\% in adversarial generalization, over previous state-of-the-art methods across a diverse range of OOD few-shot image classification tasks on three benchmarks, confirming the effectiveness of our approach to boost the robust generalization of pre-trained models. Our code is available at \\href{https://github.com/xyang583/AMT}{https://github.com/xyang583/AMT}.", "keywords": "meta-tuning;few-shot learning", "primary_area": "other", "supplementary_material": "", "author": "Xu Yang;Chen Liu;Ying Wei", "authorids": "~Xu_Yang10;~Chen_Liu1;~Ying_Wei1", "gender": ";M;F", "homepage": ";http://liuchen1993.cn/HomePage/index.html;https://wei-ying.net/", "dblp": ";10/2639-27;14/4899-1", "google_scholar": ";48PsswEAAAAJ;5UpFdKsAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Xu_Yang10;~Chen_Liu1;~Ying_Wei1", "aff": ";City University of Hong Kong;Nanyang Technological University", "aff_domain": ";cityu.edu.hk;ntu.edu.sg", "position": ";Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nyang2024mixture,\ntitle={Mixture of Adversarial Lo{RA}s: Boosting Robust Generalization in Meta-Tuning},\nauthor={Xu Yang and Chen Liu and Ying Wei},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=HxGdbAmYYr}\n}", "github": "", "reviewers": "cLZu;cdgf;VJpc;ek5A", "pdf_size": 1856717, "rating": "5;6;6;6", "confidence": "4;3;5;2", "soundness": "3;3;3;2", "novelty": "3;3;2;2", "presentation": "2;3;2;1", "wc_summary": "135;43;268;36", "wc_strengths": "21;26;45;42", "wc_weaknesses": "327;120;233;202", "wc_questions": "4;47;4;2", "wc_limitations": "6;6;4;8", "wc_review": "493;242;554;290", "wc_reply_reviewers": "39;0;69;0", "wc_reply_authors": "0;0;675;0", "reply_reviewers": "1;0;1;0", "reply_authors": "1;1;2;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 120.5, 93.6923155867118 ], "wc_strengths_avg": [ 33.5, 10.21028892833107 ], "wc_weaknesses_avg": [ 220.5, 74.06247362868729 ], "wc_questions_avg": [ 14.25, 18.925842121290138 ], "wc_limitations_avg": [ 6.0, 1.4142135623730951 ], "wc_review_avg": [ 394.75, 131.6422709466834 ], "wc_reply_reviewers_avg": [ 27.0, 29.008619408720573 ], "wc_reply_authors_avg": [ 168.75, 292.283573777248 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.2581988897471611, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:NHaksUvKDBwJ:scholar.google.com/&scioq=Mixture+of+Adversarial+LoRAs:+Boosting+Robust+Generalization+in+Meta-Tuning&hl=en&as_sdt=0,10", "gs_version_total": 5, "email": ";cityu.edu.hk;ntu.edu.sg", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "City University of Hong Kong;Nanyang Technological University", "aff_unique_dep": ";", "aff_unique_url": "https://www.cityu.edu.hk;https://www.ntu.edu.sg", "aff_unique_abbr": "CityU;NTU", "aff_campus_unique_index": "0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;1", "aff_country_unique": "China;Singapore" }, { "title": "Communication Bounds for the Distributed Experts Problem", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95796", "id": "HyxjSi3SzF", "proceeding": "", "pdf": "https://openreview.net/pdf?id=HyxjSi3SzF", "openreview": "https://openreview.net/forum?id=HyxjSi3SzF", "poster": "/media/PosterPDFs/NeurIPS%202024/95796.png?t=1733433891.116228", "project": "", "author_site": "Zhihao Jia, Qi Pang, Trung Tran, David Woodruff, Zhihao Zhang, Wenting Zheng", "tldr": "", "abstract": "In this work, we study the experts problem in the distributed setting where an expert's cost needs to be aggregated across multiple servers. Our study considers various communication models such as the message-passing model and the broadcast model, along with multiple aggregation functions, such as summing and taking the $\\ell_p$ norm of an expert's cost across servers. We propose the first communication-efficient protocols that achieve near-optimal regret in these settings, even against a strong adversary who can choose the inputs adaptively. Additionally, we give a conditional lower bound showing that the communication of our protocols is nearly optimal. Finally, we implement our protocols and demonstrate empirical savings on the HPO-B benchmarks.", "keywords": "Online Learning;Communication Bounds;Distributed Experts Problem", "primary_area": "learning_theory", "supplementary_material": "/attachment/2b7e4cb5320ea4f11c790e500612e797fc4a44ca.zip", "author": "Zhihao Jia;Qi Pang;Trung Tran;David Woodruff;Zhihao Zhang;Wenting Zheng", "authorids": "~Zhihao_Jia2;~Qi_Pang1;~Trung_Tran1;~David_Woodruff1;~Zhihao_Zhang2;~Wenting_Zheng1", "gender": "M;;M;M;;", "homepage": "https://www.cs.cmu.edu/~zhihaoj2/;;;http://www.cs.cmu.edu/~dwoodruf/;;https://wzheng.github.io/", "dblp": ";;;w/DPWoodruff;91/5464;94/4314", "google_scholar": "0IWLFR4AAAAJ;;https://scholar.google.com/citations?hl=vi;https://scholar.google.com.tw/citations?user=0G2t-6sAAAAJ;https://scholar.google.com/citations?hl=en;OeDgxpgAAAAJ", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Zhihao_Jia2;~Qi_Pang1;~Trung_Tran1;~David_Woodruff1;~Zhihao_Zhang2;~Wenting_Zheng1", "aff": "Carnegie Mellon University;;University of Pittsburgh;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "cs.cmu.edu;;pitt.edu;cmu.edu;andrew.cmu.edu;cmu.edu", "position": "Assistant Professor;;MS student;Full Professor;PhD student;Assistant Professor", "bibtex": "@inproceedings{\njia2024communication,\ntitle={Communication Bounds for the Distributed Experts Problem},\nauthor={Zhihao Jia and Qi Pang and Trung Tran and David Woodruff and Zhihao Zhang and Wenting Zheng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=HyxjSi3SzF}\n}", "github": "", "reviewers": "eSR8;rqWP;FQkW;B3J6", "pdf_size": 1758238, "rating": "5;6;6;7", "confidence": "4;3;3;4", "soundness": "3;2;3;3", "novelty": "2;1;3;3", "presentation": "3;2;2;3", "wc_summary": "112;85;485;136", "wc_strengths": "15;38;159;28", "wc_weaknesses": "117;33;727;196", "wc_questions": "37;497;49;6", "wc_limitations": "1;1;3;2", "wc_review": "282;654;1423;368", "wc_reply_reviewers": "13;76;273;0", "wc_reply_authors": "0;166;243;0", "reply_reviewers": "1;2;1;0", "reply_authors": "1;3;2;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 204.5, 162.9486115313659 ], "wc_strengths_avg": [ 60.0, 57.736470276593806 ], "wc_weaknesses_avg": [ 268.25, 271.05845771715 ], "wc_questions_avg": [ 147.25, 202.5368793578098 ], "wc_limitations_avg": [ 1.75, 0.82915619758885 ], "wc_review_avg": [ 681.75, 449.572227233845 ], "wc_reply_reviewers_avg": [ 90.5, 109.21652805322096 ], "wc_reply_authors_avg": [ 102.25, 105.81203854004515 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:D8geHVt8LF8J:scholar.google.com/&scioq=Communication+Bounds+for+the+Distributed+Experts+Problem&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "cs.cmu.edu;;pitt.edu;cmu.edu;andrew.cmu.edu;cmu.edu", "author_num": 6, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "Carnegie Mellon University;University of Pittsburgh", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://www.pitt.edu", "aff_unique_abbr": "CMU;Pitt", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Reinforcing LLM Agents via Policy Optimization with Action Decomposition", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95795", "id": "Hz6cSigMyU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Hz6cSigMyU", "openreview": "https://openreview.net/forum?id=Hz6cSigMyU", "poster": "/media/PosterPDFs/NeurIPS%202024/95795.png?t=1731667846.0570676", "project": "", "author_site": "Muning Wen, Ziyu Wan, Jun Wang, Weinan Zhang, Ying Wen", "tldr": "", "abstract": "Language models as intelligent agents push the boundaries of sequential decision-making agents but struggle with limited knowledge of environmental dynamics and exponentially huge action space. Recent efforts like GLAM and TWOSOME manually constrain the action space to a restricted subset and employ reinforcement learning to align agents' knowledge with specific environments. However, they overlook fine-grained credit assignments for intra-action tokens, which is essential for efficient language agent optimization, and rely on human's prior knowledge to restrict action space. This paper proposes decomposing language agent optimization from the action level to the token level, offering finer supervision for each intra-action token and manageable optimization complexity in environments with unrestricted action spaces. Beginning with the simplification of flattening all actions, we theoretically explore the discrepancies between action-level optimization and this naive token-level optimization. We then derive the Bellman backup with Action Decomposition (BAD) to integrate credit assignments for both intra-action and inter-action tokens, effectively eliminating the discrepancies. Implementing BAD within the PPO algorithm, we introduce Policy Optimization with Action Decomposition (POAD). POAD benefits from a finer-grained credit assignment process and lower optimization complexity, leading to enhanced learning efficiency and generalization abilities in aligning language agents with interactive environments. We validate POAD across diverse testbeds, with results affirming the advantages of our approach and the correctness of our theoretical analysis. The source code can be accessed directly with this link: https://github.com/morning9393/ADRL.", "keywords": "Reinforcement Learning;Language Agent;LLM agent;Large Language Models", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/5fd42868e263e8b2c8d9258a9009d10419da91a1.zip", "author": "Muning Wen;Ziyu Wan;Jun Wang;Weinan Zhang;Ying Wen", "authorids": "~Muning_Wen2;~Ziyu_Wan2;~Jun_Wang2;~Weinan_Zhang1;~Ying_Wen1", "gender": "M;M;M;M;M", "homepage": "https://github.com/morning9393;https://github.com/ziyuwan;http://www0.cs.ucl.ac.uk/staff/jun.wang/;http://wnzhang.net;https://yingwen.io", "dblp": "295/0261;;w/JunWang12;28/10261-1;41/4203-1", "google_scholar": "Zt1WFtQAAAAJ;VEtZ7gYAAAAJ;https://scholar.google.co.uk/citations?user=wIE1tY4AAAAJ;Qzss0GEAAAAJ;_A1CxG8AAAAJ", "orcid": "0009-0000-7868-1262;;;0000-0002-0127-2425;0000-0003-1247-2382", "linkedin": ";;;;wenying45", "or_profile": "~Muning_Wen2;~Ziyu_Wan2;~Jun_Wang2;~Weinan_Zhang1;~Ying_Wen1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;University College London;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;ucl.ac.uk;sjtu.edu.cn;sjtu.edu.cn", "position": "PhD student;PhD student;Professor;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nwen2024reinforcing,\ntitle={Reinforcing {LLM} Agents via Policy Optimization with Action Decomposition},\nauthor={Muning Wen and Ziyu Wan and Jun Wang and Weinan Zhang and Ying Wen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Hz6cSigMyU}\n}", "github": "", "reviewers": "9Wzo;671a;JxkT;H4uQ", "pdf_size": 4627653, "rating": "6;6;6;6", "confidence": "3;3;4;2", "soundness": "3;2;3;2", "novelty": "3;2;2;2", "presentation": "3;3;3;2", "wc_summary": "99;108;173;88", "wc_strengths": "131;64;60;93", "wc_weaknesses": "112;57;278;48", "wc_questions": "185;179;107;38", "wc_limitations": "63;21;1;5", "wc_review": "590;429;619;272", "wc_reply_reviewers": "165;20;47;8", "wc_reply_authors": "441;157;33;161", "reply_reviewers": "2;1;1;1", "reply_authors": "3;3;2;3", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 117.0, 33.09833832687073 ], "wc_strengths_avg": [ 87.0, 28.416544476765644 ], "wc_weaknesses_avg": [ 123.75, 92.36442767645994 ], "wc_questions_avg": [ 127.25, 59.976557920574265 ], "wc_limitations_avg": [ 22.5, 24.550967394381836 ], "wc_review_avg": [ 477.5, 138.9793150076658 ], "wc_reply_reviewers_avg": [ 60.0, 62.245481763739285 ], "wc_reply_authors_avg": [ 198.0, 149.4356048604214 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7347698561126805036&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 0, "email": "sjtu.edu.cn;sjtu.edu.cn;ucl.ac.uk;sjtu.edu.cn;sjtu.edu.cn", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Shanghai Jiao Tong University;University College London", "aff_unique_dep": ";", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.ucl.ac.uk", "aff_unique_abbr": "SJTU;UCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "China;United Kingdom" }, { "title": "ChatTracker: Enhancing Visual Tracking Performance via Chatting with Multimodal Large Language Model", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95794", "id": "HzANl2unCB", "proceeding": "", "pdf": "https://openreview.net/pdf?id=HzANl2unCB", "openreview": "https://openreview.net/forum?id=HzANl2unCB", "poster": "/media/PosterPDFs/NeurIPS%202024/95794.png?t=1731909344.1179078", "project": "", "author_site": "Yiming Sun, Fan Yu, Shaoxiang Chen, Yu Zhang, Junwei Huang, Yang Li, Chenhui Li, Changbo Wang", "tldr": "", "abstract": "Visual object tracking aims to locate a targeted object in a video sequence based on an initial bounding box. Recently, Vision-Language~(VL) trackers have proposed to utilize additional natural language descriptions to enhance versatility in various applications. However, VL trackers are still inferior to State-of-The-Art (SoTA) visual trackers in terms of tracking performance. We found that this inferiority primarily results from their heavy reliance on manual textual annotations, which include the frequent provision of ambiguous language descriptions. In this paper, we propose ChatTracker to leverage the wealth of world knowledge in the Multimodal Large Language Model (MLLM) to generate high-quality language descriptions and enhance tracking performance. To this end, we propose a novel reflection-based prompt optimization module to iteratively refine the ambiguous and inaccurate descriptions of the target with tracking feedback. To further utilize semantic information produced by MLLM, a simple yet effective VL tracking framework is proposed and can be easily integrated as a plug-and-play module to boost the performance of both VL and visual trackers. Experimental results show that our proposed ChatTracker achieves a performance comparable to existing methods.", "keywords": "Single object tracking;Visual object tracking;Vision-Language trackers;Multimodal learning", "primary_area": "machine_vision", "supplementary_material": "/attachment/c680b941293a4a7c970a12b64efa179cddfe019b.zip", "author": "Yiming Sun;Fan Yu;Shaoxiang Chen;Yu Zhang;Junwei Huang;Yang Li;Chenhui Li;Changbo Wang", "authorids": "~Yiming_Sun5;~Fan_Yu5;~Shaoxiang_Chen1;~Yu_Zhang79;~Junwei_Huang3;~Yang_Li46;~Chenhui_Li1;~Changbo_Wang1", "gender": "M;M;;M;M;M;;M", "homepage": "https://ieeexplore.ieee.org/author/37090051303;https://github.com/2469c;;https://github.com/10225102484;https://github.com/jwuang;http://ihpdep.github.io/;;", "dblp": ";;04/2928-1;;;37/4190-41;;https://dblp.uni-trier.de/pid/87/7045.html", "google_scholar": ";;WL5mbfEAAAAJ;;;N1ZDSHYAAAAJ;;", "orcid": ";;;;;0000-0001-9427-7665;;0000-0001-8940-6418", "linkedin": ";;;;;;;", "or_profile": "~Yiming_Sun5;~Fan_Yu5;~Shaoxiang_Chen1;~Yu_Zhang79;~Junwei_Huang3;~Yang_Li46;~Chenhui_Li1;~Changbo_Wang1", "aff": "East China Normal University;East China Normal University;Meituan Inc.;East China Normal University;East China Normal University;East China Normal University;;East China Normal University", "aff_domain": "ecnu.edu;stu.ecnu.edu.cn;meituan.com;ecnu.edu.cn;ecnu.edu.cn;ecnu.edu.cn;;ecnu.edu.cn", "position": "Undergrad student;Undergrad student;Software Engineer;Undergrad student;Undergrad student;Associate Professor;;Full Professor", "bibtex": "@inproceedings{\nsun2024chattracker,\ntitle={ChatTracker: Enhancing Visual Tracking Performance via Chatting with Multimodal Large Language Model},\nauthor={Yiming Sun and Fan Yu and Shaoxiang Chen and Yu Zhang and Junwei Huang and Yang Li and Chenhui Li and Changbo Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=HzANl2unCB}\n}", "github": "", "reviewers": "Bwy6;z5a5;aCWh;gPPX", "pdf_size": 0, "rating": "5;5;6;7", "confidence": "4;4;2;4", "soundness": "3;2;3;3", "novelty": "3;2;3;3", "presentation": "3;3;3;3", "wc_summary": "78;41;48;62", "wc_strengths": "26;36;61;191", "wc_weaknesses": "148;160;52;171", "wc_questions": "57;4;5;432", "wc_limitations": "12;1;6;26", "wc_review": "321;242;172;882", "wc_reply_reviewers": "17;238;22;92", "wc_reply_authors": "15;776;14;867", "reply_reviewers": "1;1;1;1", "reply_authors": "2;3;2;4", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 57.25, 14.16642156650719 ], "wc_strengths_avg": [ 78.5, 66.19101147436864 ], "wc_weaknesses_avg": [ 132.75, 47.32533676583823 ], "wc_questions_avg": [ 124.5, 178.82463476825558 ], "wc_limitations_avg": [ 11.25, 9.364160400164021 ], "wc_review_avg": [ 404.25, 280.82056103497837 ], "wc_reply_reviewers_avg": [ 92.25, 89.21988287371823 ], "wc_reply_authors_avg": [ 418.0, 404.7808048808639 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1709611390487397163&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 4, "email": "ecnu.edu;stu.ecnu.edu.cn;meituan.com;ecnu.edu.cn;ecnu.edu.cn;ecnu.edu.cn;;ecnu.edu.cn", "author_num": 8, "aff_unique_index": "0;0;1;0;0;0;0", "aff_unique_norm": "East China Normal University;Meituan Inc.", "aff_unique_dep": ";", "aff_unique_url": "http://www.ecnu.edu.cn;https://www.meituan.com", "aff_unique_abbr": "ECNU;Meituan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Terra: A Multimodal Spatio-Temporal Dataset Spanning the Earth", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97768", "id": "I0zpivK0A0", "proceeding": "", "pdf": "https://openreview.net/pdf?id=I0zpivK0A0", "openreview": "https://openreview.net/forum?id=I0zpivK0A0", "poster": "/media/PosterPDFs/NeurIPS%202024/97768.png?t=1731745061.3202405", "project": "", "author_site": "Wei Chen, Xixuan Hao, Yuankai Wu, Yuxuan Liang", "tldr": "", "abstract": "Since the inception of our planet, the meteorological environment, as reflected through spatio-temporal data, has always been a fundamental factor influencing human life, socio-economic progress, and ecological conservation. A comprehensive exploration of this data is thus imperative to gain a deeper understanding and more accurate forecasting of these environmental shifts. Despite the success of deep learning techniques within the realm of spatio-temporal data and earth science, existing public datasets are beset with limitations in terms of spatial scale, temporal coverage, and reliance on limited time series data. These constraints hinder their optimal utilization in practical applications. To address these issues, we introduce **Terra**, a multimodal spatio-temporal dataset spanning the earth. This dataset encompasses hourly time series data from 6,480,000 grid areas worldwide over the past 45 years, while also incorporating multimodal spatial supplementary information including geo-images and explanatory text. Through a detailed data analysis and evaluation of existing deep learning models within earth sciences, utilizing our constructed dataset. we aim to provide valuable opportunities for enhancing future research in spatio-temporal data mining, thereby advancing towards more spatio-temporal general intelligence. Our source code and data can be accessed at https://github.com/CityMind-Lab/NeurIPS24-Terra.", "keywords": "Spatio-temporal Data Mining;Multi-modality Time Series;Deep Learning", "primary_area": "", "supplementary_material": "", "author": "Wei Chen;Xixuan Hao;wu yuankai;Yuxuan Liang", "authorids": "~Wei_Chen50;~Xixuan_Hao2;~wu_yuankai1;~Yuxuan_Liang1", "gender": "M;M;M;M", "homepage": "https://onedean.github.io/;https://github.com/skyerhxx;https://kaimaoge.github.io/;https://yuxuanliang.com", "dblp": ";354/0596.html;06/9348.html;183/0977", "google_scholar": "RCfQIcQAAAAJ;;L9b-8ZwAAAAJ;n9cODgcAAAAJ", "orcid": ";0000-0003-0728-1944;0000-0003-4435-9413;0000-0003-2817-7337", "linkedin": ";;;yoshall/", "or_profile": "~Wei_Chen50;~Xixuan_Hao2;~wu_yuankai1;~Yuxuan_Liang1", "aff": "Hong Kong University of Science and Technology, Guangzhou;The Hong Kong University of Science and Technology (Guangzhou);Sichuan University;The Hong Kong University of Science and Technology (Guangzhou)", "aff_domain": "hkust-gz.edu.cn;hkust-gz.edu.cn;scu.edu.cn;hkust-gz.edu.cn", "position": "PhD student;PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nchen2024terra,\ntitle={Terra: A Multimodal Spatio-Temporal Dataset Spanning the Earth},\nauthor={Wei Chen and Xixuan Hao and wu yuankai and Yuxuan Liang},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=I0zpivK0A0}\n}", "github": "", "reviewers": "fRfp;uuE1;A8vf;EWMy", "pdf_size": 7511462, "rating": "6;7;7;7", "confidence": "5;3;5;4", "wc_summary_and_contributions": "57;74;54;53", "wc_strengths": "15;31;2;29", "wc_improvement": "29;23;42;40", "wc_limitations": "31;4;1;110", "wc_correctness": "10;3;1;129", "wc_clarity": "19;3;1;5", "wc_relation_to_prior_work": "12;1;1;1", "wc_documentation": "3;1;1;19", "wc_additional_feedback": "1;1;1;1", "wc_review": "177;141;104;387", "wc_reply_reviewers": "70;0;40;31", "wc_reply_authors": "36;73;241;43", "reply_reviewers": "2;0;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "wc_summary_and_contributions_avg": [ 59.5, 8.5 ], "wc_strengths_avg": [ 19.25, 11.712706775122479 ], "wc_improvement_avg": [ 33.5, 7.826237921249264 ], "wc_limitations_avg": [ 36.5, 44.01420225336363 ], "wc_correctness_avg": [ 35.75, 53.94151925928672 ], "wc_clarity_avg": [ 7.0, 7.0710678118654755 ], "wc_relation_to_prior_work_avg": [ 3.75, 4.763139720814412 ], "wc_documentation_avg": [ 6.0, 7.54983443527075 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 202.25, 109.74373558431479 ], "wc_reply_reviewers_avg": [ 35.25, 24.953707139421187 ], "wc_reply_authors_avg": [ 98.25, 83.58042533990839 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6910747037157485525&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "hkust-gz.edu.cn;hkust-gz.edu.cn;scu.edu.cn;hkust-gz.edu.cn", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Hong Kong University of Science and Technology;Sichuan University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ust.hk;https://www.scu.edu.cn", "aff_unique_abbr": "HKUST;SCU", "aff_campus_unique_index": "0;1;1", "aff_campus_unique": "Hong Kong SAR;Guangzhou;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "NVRC: Neural Video Representation Compression", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95793", "id": "I29aiMdm4u", "proceeding": "", "pdf": "https://openreview.net/pdf?id=I29aiMdm4u", "openreview": "https://openreview.net/forum?id=I29aiMdm4u", "poster": "/media/PosterPDFs/NeurIPS%202024/95793.png?t=1731779311.339851", "project": "", "author_site": "Ho Man Kwan, Ge Gao, Fan Zhang, Andrew Gower, David Bull", "tldr": "", "abstract": "Recent advances in implicit neural representation (INR)-based video coding have\ndemonstrated its potential to compete with both conventional and other learning-\nbased approaches. With INR methods, a neural network is trained to overfit a\nvideo sequence, with its parameters compressed to obtain a compact representation\nof the video content. However, although promising results have been achieved,\nthe best INR-based methods are still out-performed by the latest standard codecs,\nsuch as VVC VTM, partially due to the simple model compression techniques\nemployed. In this paper, rather than focusing on representation architectures, which\nis a common focus in many existing works, we propose a novel INR-based video\ncompression framework, Neural Video Representation Compression (NVRC),\ntargeting compression of the representation. Based on its novel quantization and\nentropy coding approaches, NVRC is the first framework capable of optimizing an\nINR-based video representation in a fully end-to-end manner for the rate-distortion\ntrade-off. To further minimize the additional bitrate overhead introduced by the\nentropy models, NVRC also compresses all the network, quantization and entropy\nmodel parameters hierarchically. Our experiments show that NVRC outperforms\nmany conventional and learning-based benchmark codecs, with a 23% average\ncoding gain over VVC VTM (Random Access) on the UVG dataset, measured\nin PSNR. As far as we are aware, this is the first time an INR-based video codec\nachieving such performance.", "keywords": "Video compression;Implicit neural representations", "primary_area": "machine_vision", "supplementary_material": "", "author": "Ho Man Kwan;Ge Gao;Fan Zhang;Andrew Peter Gower;David Bull", "authorids": "~Ho_Man_Kwan1;~Ge_Gao8;~Fan_Zhang6;~Andrew_Peter_Gower1;~David_Bull1", "gender": "M;M;M;M;M", "homepage": ";;https://fan-aaron-zhang.github.io/;;https://david-bull.github.io/", "dblp": "325/4462;;21/3626-17;;", "google_scholar": "https://scholar.google.com/citations?hl=zh-TW;j2_80ewAAAAJ;https://scholar.google.com/citations?hl=en;;https://scholar.google.co.uk/citations?hl=en", "orcid": "0000-0002-8283-4513;;0000-0001-6623-9936;;0000-0001-7634-190X", "linkedin": ";;fan-zhang-b32ba430/;andygower;dave-bull-968b756/", "or_profile": "~Ho_Man_Kwan1;~Ge_Gao8;~Fan_Zhang6;~Andrew_Peter_Gower1;~David_Bull1", "aff": "University of Bristol;University of Bristol;University of Bristol;BT Research & Network Strategy;University of Bristol", "aff_domain": "bristol.ac.uk;bristol.ac.uk;bristol.ac.uk;bt.com;bristol.ac.uk", "position": "PhD student;Researcher;Senior Lecturer;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nkwan2024nvrc,\ntitle={{NVRC}: Neural Video Representation Compression},\nauthor={Ho Man Kwan and Ge Gao and Fan Zhang and Andrew Peter Gower and David Bull},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=I29aiMdm4u}\n}", "github": "", "reviewers": "BGkd;m68p;YLxP;csHr", "pdf_size": 7134405, "rating": "5;6;7;7", "confidence": "5;4;4;4", "soundness": "1;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "32;87;71;36", "wc_strengths": "12;52;47;22", "wc_weaknesses": "230;166;74;61", "wc_questions": "96;17;65;55", "wc_limitations": "4;7;21;10", "wc_review": "374;329;278;184", "wc_reply_reviewers": "4;18;9;11", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 56.5, 23.243278598338918 ], "wc_strengths_avg": [ 33.25, 16.723860200324566 ], "wc_weaknesses_avg": [ 132.75, 69.21479249409045 ], "wc_questions_avg": [ 58.25, 28.207933281259724 ], "wc_limitations_avg": [ 10.5, 6.422616289332565 ], "wc_review_avg": [ 291.25, 70.62356193226167 ], "wc_reply_reviewers_avg": [ 10.5, 5.024937810560445 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2301992622946922526&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 4, "email": "bristol.ac.uk;bristol.ac.uk;bristol.ac.uk;bt.com;bristol.ac.uk", "author_num": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "University of Bristol;BT", "aff_unique_dep": ";Research & Network Strategy", "aff_unique_url": "https://www.bristol.ac.uk;https://www.bt.com", "aff_unique_abbr": "Bristol;BT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "OAM-TCD: A globally diverse dataset of high-resolution tree cover maps", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97767", "id": "I2Q3XwO2cz", "proceeding": "", "pdf": "https://openreview.net/pdf?id=I2Q3XwO2cz", "openreview": "https://openreview.net/forum?id=I2Q3XwO2cz", "poster": "/media/PosterPDFs/NeurIPS%202024/97767.png?t=1733972924.2657835", "project": "", "author_site": "Josh Veitch-Michaelis, Andrew Cottam, Daniella Schweizer, Eben Broadbent, David Dao, Ce Zhang, Angelica Almeyda Zambrano, Simeon Max", "tldr": "", "abstract": "Accurately quantifying tree cover is an important metric for ecosystem monitoring and for assessing progress in restored sites. Recent works have shown that deep learning-based segmentation algorithms are capable of accurately mapping trees at country and continental scales using high-resolution aerial and satellite imagery. Mapping at high (ideally sub-meter) resolution is necessary to identify individual trees, however there are few open-access datasets containing instance level annotations and those that exist are small or not geographically diverse. We present a novel open-access dataset for individual tree crown delineation (TCD) in high-resolution aerial imagery sourced from OpenAerialMap (OAM). Our dataset, OAM-TCD, comprises 5072 2048x2048 px images at 10 cm/px resolution with associated human-labeled instance masks for over 280k individual and 56k groups of trees. By sampling imagery from around the world, we are able to better capture the diversity and morphology of trees in different terrestrial biomes and in both urban and natural environments. Using our dataset, we train reference instance and semantic segmentation models that compare favorably to existing state-of-the-art models. We assess performance through k-fold cross-validation and comparison with existing datasets; additionally we demonstrate compelling results on independent aerial imagery captured over Switzerland and compare to municipal tree inventories and LIDAR-derived canopy maps in the city of Zurich. Our dataset, models and training/benchmark code are publicly released under permissive open-source licenses: Creative Commons (majority CC BY 4.0), and Apache 2.0 respectively.", "keywords": "ecology;trees;computer vision;segmentation;forests;dataset;instance segmentation;semantic segmentation", "primary_area": "", "supplementary_material": "/attachment/d820aecd1f315d1cef4abdf97c8fb37489931aad.pdf", "author": "Joshua Veitch-Michaelis;Andrew Cottam;Daniella Schweizer;Eben Broadbent;David Dao;Ce Zhang;Angelica Almeyda Zambrano;Simeon Max", "authorids": "~Joshua_Veitch-Michaelis1;~Andrew_Cottam1;~Daniella_Schweizer1;~Eben_Broadbent1;~David_Dao1;~Ce_Zhang1;~Angelica_Almeyda_Zambrano1;~Simeon_Max1", "gender": "M;M;F;;M;;;", "homepage": "https://josh.veitchmichael.is/;;;https://ffgs.ifas.ufl.edu/faculty/broadbent-eben/;https://daviddao.org;;;https://ch.linkedin.com/in/simeonmax/en", "dblp": ";;;;;97/919;;", "google_scholar": "Wb1ZLRAAAAAJ;;;;https://scholar.google.ca/citations?user=XHeNA_8AAAAJ;;;", "orcid": ";;;;;;;", "linkedin": ";andrew-cottam-7b805b17/;https://ch.linkedin.com/in/daniella-schweizer-6711755a;;;;;https://ch.linkedin.com/in/simeonmax/en", "or_profile": "~Joshua_Veitch-Michaelis1;~Andrew_Cottam1;~Daniella_Schweizer1;~Eben_Broadbent1;~David_Dao1;~Ce_Zhang1;~Angelica_Almeyda_Zambrano1;~Simeon_Max1", "aff": "University of Chicago;ETHZ - ETH Zurich;Swiss Federal Research Institute for Forest, Snow and Landscape Research (WSL);University of Florida;;University of Chicago;;Gainforest", "aff_domain": "chicago.edu;ethz.edu;wsl.ch;ufl.edu;;uchicago.edu;;gainforest.net", "position": "Researcher;Researcher;Researcher;Assistant Professor;;Associate Professor;;Researcher", "bibtex": "@inproceedings{\nveitch-michaelis2024oamtcd,\ntitle={{OAM}-{TCD}: A globally diverse dataset of high-resolution tree cover maps},\nauthor={Joshua Veitch-Michaelis and Andrew Cottam and Daniella Schweizer and Eben Broadbent and David Dao and Ce Zhang and Angelica Almeyda Zambrano and Simeon Max},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=I2Q3XwO2cz}\n}", "github": "", "reviewers": "heBU;AXXa;rHN3", "pdf_size": 4976306, "rating": "5;7;9", "confidence": "3;5;5", "wc_summary_and_contributions": "128;67;102", "wc_strengths": "129;9;106", "wc_improvement": "168;118;56", "wc_limitations": "1;25;15", "wc_correctness": "3;19;72", "wc_clarity": "1;1;17", "wc_relation_to_prior_work": "1;3;55", "wc_documentation": "1;70;33", "wc_additional_feedback": "1;1;1", "wc_review": "433;313;457", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "rating_avg": [ 7.0, 1.632993161855452 ], "confidence_avg": [ 4.333333333333333, 0.9428090415820634 ], "wc_summary_and_contributions_avg": [ 99.0, 24.99333244420733 ], "wc_strengths_avg": [ 81.33333333333333, 52.0021367082375 ], "wc_improvement_avg": [ 114.0, 45.81120678029194 ], "wc_limitations_avg": [ 13.666666666666666, 9.843215373488935 ], "wc_correctness_avg": [ 31.333333333333332, 29.48822740612863 ], "wc_clarity_avg": [ 6.333333333333333, 7.542472332656507 ], "wc_relation_to_prior_work_avg": [ 19.666666666666668, 24.997777679003566 ], "wc_documentation_avg": [ 34.666666666666664, 28.193773938387334 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 401.0, 62.99206299209449 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13135658561341361860&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 4, "email": "chicago.edu;ethz.edu;wsl.ch;ufl.edu;;uchicago.edu;;gainforest.net", "author_num": 8, "aff_unique_index": "0;1;2;3;0;4", "aff_unique_norm": "University of Chicago;ETH Zurich;Swiss Federal Research Institute for Forest, Snow and Landscape Research;University of Florida;Gainforest", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.uchicago.edu;https://www.ethz.ch;https://www.wsl.ch;https://www.ufl.edu;", "aff_unique_abbr": "UChicago;ETHZ;WSL;UF;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0;0", "aff_country_unique": "United States;Switzerland;" }, { "title": "Off to new Shores: A Dataset & Benchmark for (near-)coastal Flood Inundation Forecasting", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97766", "id": "I2VOdtAc3H", "proceeding": "", "pdf": "https://openreview.net/pdf?id=I2VOdtAc3H", "openreview": "https://openreview.net/forum?id=I2VOdtAc3H", "poster": "/media/PosterPDFs/NeurIPS%202024/97766.png?t=1730784061.416966", "project": "", "author_site": "Brandon Victor, Mathilde Letard, Peter Naylor, Karim Douch, Nicolas Longepe, Zhen He, Patrick Ebel", "tldr": "", "abstract": "Floods are among the most common and devastating natural hazards, imposing immense costs on our society and economy due to their disastrous consequences. Recent progress in weather prediction and spaceborne flood mapping demonstrated the feasibility of anticipating extreme events and reliably detecting their catastrophic effects afterwards. However, these efforts are rarely linked to one another and there is a critical lack of datasets and benchmarks to enable the direct forecasting of flood extent. To resolve this issue, we curate a novel dataset enabling a timely prediction of flood extent. Furthermore, we provide a representative evaluation of state-of-the-art methods, structured into two benchmark tracks for forecasting flood inundation maps i) in general and ii) focused on coastal regions. Altogether, our dataset and benchmark provide a comprehensive platform for evaluating flood forecasts, enabling future solutions for this critical challenge. Data, code \\& models are shared at https://github.com/Multihuntr/GFF under a CC0 license.", "keywords": "time series;multi-sensor;remote sensing;Earth observation;weather;semantic segmentation", "primary_area": "", "supplementary_material": "/attachment/751390aa9fbf193f396641bfe48e905d40e54be0.pdf", "author": "Brandon Victor;Mathilde Letard;Peter Jack Naylor;Karim Douch;Nicolas Long\u00e9p\u00e9;Zhen He;Patrick Ebel", "authorids": "~Brandon_Victor2;~Mathilde_Letard1;~Peter_Jack_Naylor1;~Karim_Douch1;~Nicolas_Long\u00e9p\u00e91;~Zhen_He7;~Patrick_Ebel1", "gender": "M;F;M;M;M;M;", "homepage": "https://scholars.latrobe.edu.au/bvictor;;https://peterjacknaylor.github.io/;;;https://scholars.latrobe.edu.au/zhe;https://www.pwjebel.com", "dblp": ";;;;;;238/9529-2", "google_scholar": ";319ap_gAAAAJ;_u5SKfMAAAAJ;;https://scholar.google.fr/citations?user=YVVkIX8AAAAJ;;vOSVVdkAAAAJ", "orcid": "0000-0001-5832-6087;0000-0003-1210-9671;;;;;", "linkedin": ";mathildeletard/;;karim-douch-a3a68125;nicolaslongepe/;;https://linkedin.com/in/pwjebel", "or_profile": "~Brandon_Victor2;~Mathilde_Letard1;~Peter_Jack_Naylor1;~Karim_Douch1;~Nicolas_Long\u00e9p\u00e91;~Zhen_He7;~Patrick_Ebel1", "aff": "La Trobe University;Universit\u00e9 de Rennes;ESA;European Space Agency;ESA;La Trobe University;European Space Agency", "aff_domain": "latrobe.edu.au;univ-rennes.fr;esa.it;esa.int;esa.int;ltu.edu.au;esa.int", "position": "PhD student;Postdoc;Postdoc;Researcher;Researcher;Associate Professor;Postdoc", "bibtex": "@inproceedings{\nvictor2024off,\ntitle={Off to new Shores: A Dataset \\& Benchmark for (near-)coastal Flood Inundation Forecasting},\nauthor={Brandon Victor and Mathilde Letard and Peter Jack Naylor and Karim Douch and Nicolas Long{\\'e}p{\\'e} and Zhen He and Patrick Ebel},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=I2VOdtAc3H}\n}", "github": "", "reviewers": "ZKym;CFWK;R2fu;vRcU", "pdf_size": 7213031, "rating": "5;7;8;9", "confidence": "5;3;3;4", "wc_summary_and_contributions": "123;117;57;61", "wc_strengths": "130;20;3;55", "wc_improvement": "175;4;3;41", "wc_limitations": "145;3;13;20", "wc_correctness": "20;3;7;12", "wc_clarity": "55;44;8;261", "wc_relation_to_prior_work": "37;1;13;6", "wc_documentation": "10;4;20;30", "wc_additional_feedback": "1;1;1;1", "wc_review": "696;197;125;487", "wc_reply_reviewers": "133;0;0;68", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;0;1", "reply_authors": "4;3;1;2", "rating_avg": [ 7.25, 1.479019945774904 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "wc_summary_and_contributions_avg": [ 89.5, 30.606371885605782 ], "wc_strengths_avg": [ 52.0, 48.780118901044105 ], "wc_improvement_avg": [ 55.75, 70.5314646097754 ], "wc_limitations_avg": [ 45.25, 57.90671377310234 ], "wc_correctness_avg": [ 10.5, 6.34428877022476 ], "wc_clarity_avg": [ 92.0, 99.10852637386957 ], "wc_relation_to_prior_work_avg": [ 14.25, 13.808964479641476 ], "wc_documentation_avg": [ 16.0, 9.899494936611665 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 376.25, 228.99931768457301 ], "wc_reply_reviewers_avg": [ 50.25, 55.2556558191105 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.560611910581388, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:GA48p6Rbw2wJ:scholar.google.com/&scioq=Off+to+new+Shores:+A+Dataset+%26+Benchmark+for+(near-)coastal+Flood+Inundation+Forecasting&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "latrobe.edu.au;univ-rennes.fr;esa.it;esa.int;esa.int;ltu.edu.au;esa.int", "author_num": 7, "aff_unique_index": "0;1;2;2;2;0;2", "aff_unique_norm": "La Trobe University;Universit\u00e9 de Rennes;European Space Agency", "aff_unique_dep": ";;", "aff_unique_url": "https://www.latrobe.edu.au;https://www.univ-rennes1.fr;https://www.esa.int", "aff_unique_abbr": "LTU;UR1;ESA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;2;2;0;2", "aff_country_unique": "Australia;France;Unknown" }, { "title": "Towards Understanding Evolving Patterns in Sequential Data", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95792", "id": "I2gVmVRgNk", "proceeding": "", "pdf": "https://openreview.net/pdf?id=I2gVmVRgNk", "openreview": "https://openreview.net/forum?id=I2gVmVRgNk", "poster": "", "project": "", "author_site": "QIUHAO Zeng, Long-Kai Huang, Qi CHEN, Charles Ling, Boyu Wang", "tldr": "", "abstract": "In many machine learning tasks, data is inherently sequential. Most existing algorithms learn from sequential data in an auto-regressive manner, which predicts the next unseen data point based on the observed sequence, implicitly assuming the presence of an \\emph{evolving pattern} embedded in the data that can be leveraged. However, identifying and assessing evolving patterns in learning tasks often relies on subjective judgments rooted in the prior knowledge of human experts, lacking a standardized quantitative measure. Furthermore, such measures enable us to determine the suitability of employing sequential models effectively and make informed decisions on the temporal order of time series data, and feature/data selection processes. To address this issue, we introduce the Evolving Rate (EvoRate), which quantitatively approximates the intensity of evolving patterns in the data with Mutual Information. Furthermore, in some temporal data with neural mutual information estimations, we only have snapshots at different timestamps, lacking correspondence, which hinders EvoRate estimation. To tackle this challenge, we propose EvoRate$_\\mathcal{W}$, aiming to establish correspondence with optimal transport for estimating the first-order EvoRate. Experiments on synthetic and real-world datasets including images and tabular data validate the efficacy of our EvoRate.", "keywords": "Time series;sequential data;autoregressive tasks;evolving domain generalization;temporal domain generalization", "primary_area": "other", "supplementary_material": "/attachment/ce125596cd1cecb9fa0551e93d0f619a22e23053.zip", "author": "QIUHAO Zeng;Long-Kai Huang;Qi CHEN;Charles Ling;Boyu Wang", "authorids": "~QIUHAO_Zeng1;~Long-Kai_Huang1;~Qi_CHEN6;~Charles_Ling1;~Boyu_Wang3", "gender": "M;;F;M;M", "homepage": "https://hardworkingpearl.github.io/;https://sites.google.com/site/longkaihugo/home;https://livreq.github.io/;http://cling.csd.uwo.ca/;https://sites.google.com/site/borriewang/", "dblp": ";133/2006;66/6320-15.html;;41/6565-4.html", "google_scholar": "https://scholar.google.ca/citations?user=MJdcPlgAAAAJ;CaP64WUAAAAJ;MqLoSeoAAAAJ;https://scholar.google.co.uk/citations?hl=en;qAZM5KcAAAAJ", "orcid": ";0000-0001-5263-1443;0000-0002-7213-0221;;0000-0002-7413-4162", "linkedin": ";;;;", "or_profile": "~QIUHAO_Zeng1;~Long-Kai_Huang1;~Qi_CHEN6;~Charles_Ling1;~Boyu_Wang3", "aff": "University of Western Ontario ;Tencent;Laval university;Western University;University of Western Ontario", "aff_domain": "western.ca;tencent.com;ulaval.ca;uwo.ca;uwo.ca", "position": "PhD student;Researcher;PhD student;Professor;Assistant Professor", "bibtex": "@inproceedings{\nzeng2024towards,\ntitle={Towards Understanding Evolving Patterns in Sequential Data},\nauthor={QIUHAO Zeng and Long-Kai Huang and Qi CHEN and Charles Ling and Boyu Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=I2gVmVRgNk}\n}", "github": "", "reviewers": "V8vA;8iST;oZmm;rYZr", "pdf_size": 2610001, "rating": "6;6;8;8", "confidence": "2;4;4;5", "soundness": "3;2;4;4", "novelty": "3;3;3;4", "presentation": "3;2;4;3", "wc_summary": "92;88;45;111", "wc_strengths": "86;6;83;197", "wc_weaknesses": "38;63;119;360", "wc_questions": "25;65;147;4", "wc_limitations": "14;46;7;18", "wc_review": "255;268;401;690", "wc_reply_reviewers": "0;7;21;51", "wc_reply_authors": "0;45;55;39", "reply_reviewers": "0;1;1;1", "reply_authors": "1;3;2;2", "rating_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 84.0, 24.13503677229434 ], "wc_strengths_avg": [ 93.0, 68.06981709979836 ], "wc_weaknesses_avg": [ 145.0, 127.54803016903084 ], "wc_questions_avg": [ 60.25, 54.66888969057265 ], "wc_limitations_avg": [ 21.25, 14.821858857781638 ], "wc_review_avg": [ 403.5, 175.00071428425656 ], "wc_reply_reviewers_avg": [ 19.75, 19.562400159489634 ], "wc_reply_authors_avg": [ 34.75, 20.8611480987984 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.6882472016116854, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17206745622780247873&as_sdt=20000005&sciodt=0,21&hl=en", "gs_version_total": 2, "email": "western.ca;tencent.com;ulaval.ca;uwo.ca;uwo.ca", "author_num": 5, "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "University of Western Ontario;Tencent;Laval University;Western University", "aff_unique_dep": ";Tencent Holdings Limited;;", "aff_unique_url": "https://www.uwo.ca;https://www.tencent.com;https://www.laval.ca;https://www.uwo.ca", "aff_unique_abbr": "UWO;Tencent;Laval;Western", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "Canada;China" }, { "title": "FedLPA: One-shot Federated Learning with Layer-Wise Posterior Aggregation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95791", "id": "I3IuclVLFZ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=I3IuclVLFZ", "openreview": "https://openreview.net/forum?id=I3IuclVLFZ", "poster": "/media/PosterPDFs/NeurIPS%202024/95791.png?t=1729418492.1734383", "project": "", "author_site": "Xiang Liu, Liangxi Liu, Feiyang Ye, Yunheng Shen, Xia Li, Linshan Jiang, Jialin Li", "tldr": "", "abstract": "Efficiently aggregating trained neural networks from local clients into a global model on a server is a widely researched topic in federated learning. Recently, motivated by diminishing privacy concerns, mitigating potential attacks, and reducing communication overhead, one-shot federated learning (i.e., limiting client-server communication into a single round) has gained popularity among researchers. However, the one-shot aggregation performances are sensitively affected by the non-identical training data distribution, which exhibits high statistical heterogeneity in some real-world scenarios. To address this issue, we propose a novel one-shot aggregation method with layer-wise posterior aggregation, named FedLPA. FedLPA aggregates local models to obtain a more accurate global model without requiring extra auxiliary datasets or exposing any private label information, e.g., label distributions. To effectively capture the statistics maintained in the biased local datasets in the practical non-IID scenario, we efficiently infer the posteriors of each layer in each local model using layer-wise Laplace approximation and aggregate them to train the global parameters. Extensive experimental results demonstrate that FedLPA significantly improves learning performance over state-of-the-art methods across several metrics.", "keywords": "One-shot Federated Learning", "primary_area": "other", "supplementary_material": "/attachment/a0eb4631a248b764f78d7403be3e1dd16c877194.zip", "author": "Xiang Liu;Liangxi Liu;Feiyang Ye;Yunheng Shen;Xia Li;Linshan Jiang;Jialin Li", "authorids": "~Xiang_Liu15;~Liangxi_Liu3;~Feiyang_Ye4;~Yunheng_Shen1;~Xia_Li3;~Linshan_Jiang1;~Jialin_Li2", "gender": "M;M;M;;;M;M", "homepage": "https://lebronlambert.github.io/;https://github.com/Glimmer0x;https://feiyang-ye.github.io/;;;https://sg.linkedin.com/in/linshan-jiang-059b297b;https://www.comp.nus.edu.sg/~lijl/", "dblp": ";252/7506;285/4704;;;183/1884;75/4924-1.html", "google_scholar": "Dj9s3oEAAAAJ;LOsVJ_8AAAAJ;3EX25cAAAAAJ;;;https://scholar.google.com.sg/citations?user=S01E5-cAAAAJ;eP9FYPQAAAAJ", "orcid": "0009-0006-8550-3767;0009-0000-4074-5144;;;;0000-0001-8501-9488;0000-0003-3530-7662", "linkedin": ";glimmer0x/en;;;;;", "or_profile": "~Xiang_Liu15;~Liangxi_Liu3;~Feiyang_Ye4;~Yunheng_Shen1;~Xia_Li3;~Linshan_Jiang1;~Jialin_Li2", "aff": "National University of Singapore;northeastern university;University of Technology Sydney;;;National University of Singapore;National University of Singapore", "aff_domain": "u.nus.edu;northeastern.edu;uts.edu.au;;;nus.edu.sg;nus.edu.sg", "position": "PhD student;MS student;PhD student;;;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nliu2024fedlpa,\ntitle={Fed{LPA}: One-shot Federated Learning with Layer-Wise Posterior Aggregation},\nauthor={Xiang Liu and Liangxi Liu and Feiyang Ye and Yunheng Shen and Xia Li and Linshan Jiang and Jialin Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=I3IuclVLFZ}\n}", "github": "", "reviewers": "4qUr;XECp;F77H", "pdf_size": 8133451, "rating": "5;6;6", "confidence": "4;3;5", "soundness": "2;3;3", "novelty": "2;3;3", "presentation": "3;3;3", "wc_summary": "83;82;83", "wc_strengths": "56;131;45", "wc_weaknesses": "139;196;96", "wc_questions": "4;4;90", "wc_limitations": "1;210;23", "wc_review": "283;623;337", "wc_reply_reviewers": "45;18;34", "wc_reply_authors": "363;203;50", "reply_reviewers": "1;1;1", "reply_authors": "4;3;2", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 82.66666666666667, 0.4714045207910317 ], "wc_strengths_avg": [ 77.33333333333333, 38.21285414912399 ], "wc_weaknesses_avg": [ 143.66666666666666, 40.95797304012438 ], "wc_questions_avg": [ 32.666666666666664, 40.54078878802872 ], "wc_limitations_avg": [ 78.0, 93.76922025199242 ], "wc_review_avg": [ 414.3333333333333, 149.18742872269397 ], "wc_reply_reviewers_avg": [ 32.333333333333336, 11.08552609887726 ], "wc_reply_authors_avg": [ 205.33333333333334, 127.79236631696313 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.0, 0.816496580927726 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=635590506757388169&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "u.nus.edu;northeastern.edu;uts.edu.au;;;nus.edu.sg;nus.edu.sg", "author_num": 7, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "National University of Singapore;Northeastern University;University of Technology Sydney", "aff_unique_dep": ";;", "aff_unique_url": "https://www.nus.edu.sg;https://www.northeastern.edu;https://www.uts.edu.au", "aff_unique_abbr": "NUS;NEU;UTS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;0;0", "aff_country_unique": "Singapore;United States;Australia" }, { "id": "I3kIEjoON6", "title": "Unleashing the power of novel conditional generative approaches for new materials discovery", "track": "main", "status": "Reject", "tldr": "", "abstract": "For a very long time, computational approaches to the design of new materials have relied on an iterative process of finding a candidate material and modeling its properties. AI has played a crucial role in this regard, helping to accelerate the discovery and optimization of crystal properties and structures through advanced computational methodologies and data-driven approaches. To address the problem of new materials design and fasten the process of new materials search, we have applied latest generative approaches to the problem of crystal structure design, trying to solve the inverse problem: by given properties generate a structure that satisfies them without utilizing supercomputer powers. In our work we propose two approaches: 1) conditional structure modification: optimization of the stability of an arbitrary atomic configuration, using the energy difference between the most energetically favorable structure and all its less stable polymorphs and 2) conditional structure generation. We used a representation for materials that includes the following information: lattice, atom coordinates, atom types, chemical features, space group and formation energy of the structure. The loss function was optimized to take into account the periodic boundary conditions of crystal structures. We have applied Diffusion models approach, Flow matching, usual Auto-Encoder (AE) and compared the results of the models and approaches. As a metric for the study, physical pymatgen matcher was employed: we compare target structure with generated one using default tolerances. So far, our modifier and generator produce structures with needed properties with accuracy 41% and 82% respectively. To prove the offered methodology efficiency, inference have been carried out, resulting in several potentially new structures with formation energy below the AFLOWLib-derived convex hulls.", "keywords": "new materials design;new materials;solid state physics;generative ai;crystal structure generation;diffusion;flow matching", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "/attachment/68d4720969ef6c3dd6195293026126717af7e19d.zip", "author": "Lev Novitskiy;Vladimir Lazarev;Mikhail Tiutiulnikov;Nikita Vakhrameev;Roman A. Eremin;Innokentiy S. Humonen;Andrey Kuznetsov;Denis Dimitrov;Semen Budennyy", "authorids": "~Lev_Novitskiy1;~Vladimir_Lazarev1;~Mikhail_Tiutiulnikov1;~Nikita_Vakhrameev1;~Roman_A._Eremin1;~Innokentiy_S._Humonen1;~Andrey_Kuznetsov2;~Denis_Dimitrov2;~Semen_Budennyy1", "gender": "M;M;M;M;M;M;M;M;M", "homepage": "https://github.com/leffff;;https://github.com/mtyutyulnikov;;;;;https://www.researchgate.net/profile/Semen-Budennyy;https://t.me/dendimitrov", "dblp": ";;;;;;50/11063;;82/8368", "google_scholar": ";Dw4n0asAAAAJ;;;ZVGVPwsAAAAJ;https://scholar.google.ru/citations?user=FRMpYwcAAAAJ;q0lIfCEAAAAJ;https://scholar.google.ru/citations?user=SXvNI1MAAAAJ;3JSIJpYAAAAJ", "orcid": "0009-0001-0112-4545;0000-0002-1307-7539;;0009-0004-3283-6280;0000-0002-2550-9239;;0000-0001-6446-8663;;0000-0002-9756-5424", "linkedin": ";;;;;;https://linkedin.com/in/andrey-kuznetsov-87;;denis-dimitrov-66bbb3116/", "or_profile": "~Lev_Novitskiy1;~Vladimir_Lazarev1;~Mikhail_Tiutiulnikov1;~Nikita_Vakhrameev1;~Roman_A._Eremin1;~Innokentiy_S._Humonen1;~Andrey_Kuznetsov2;~Semen_Budennyy1;~Denis_Valerievich_Dimitrov1", "aff": "NUST MISIS;AIRI;Novosibirsk State University;Moscow State Institute of Steel and Alloys;Artificial Intelligence Research Institute;Higher School of Economics, Higher School of Economics;Samara National Research University;Novosibirsk State University;Sber", "aff_domain": "edu.misis.ru;airi.net;g.nsu.ru;misis.ru;airi.net;edu.hse.ru;ssau.ru;nsu.ru;sberbank.com", "position": "Undergrad student;Researcher;MS student;Undergrad student;Principal Researcher;MS student;Associate Professor;Undergrad student;Principal Researcher", "bibtex": "@misc{\nanonymous2024unleashing,\ntitle={Unleashing the power of novel conditional generative approaches for new materials discovery},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=I3kIEjoON6}\n}", "github": "", "project": "", "reviewers": "THHC;NqQo;3FMg;QufJ", "site": "https://openreview.net/forum?id=I3kIEjoON6", "pdf_size": 1631409, "rating": "3;3;4;5", "confidence": "5;5;4;4", "soundness": "2;2;3;3", "novelty": "2;2;2;3", "presentation": "1;1;2;3", "wc_summary": "90;140;54;62", "wc_strengths": "68;32;35;29", "wc_weaknesses": "401;121;98;80", "wc_questions": "20;2;170;166", "wc_limitations": "1;12;49;15", "wc_review": "580;307;406;352", "wc_reply_reviewers": "134;8;55;96", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 3.75, 0.82915619758885 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 1.75, 0.82915619758885 ], "wc_summary_avg": [ 86.5, 33.65635155509284 ], "wc_strengths_avg": [ 41.0, 15.732132722552274 ], "wc_weaknesses_avg": [ 175.0, 131.28785168476176 ], "wc_questions_avg": [ 89.5, 78.77023549539508 ], "wc_limitations_avg": [ 19.25, 17.949582167838894 ], "wc_review_avg": [ 411.25, 103.54075284640344 ], "wc_reply_reviewers_avg": [ 73.25, 46.90082621873521 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.9045340337332909, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:SFY0OBT5B2kJ:scholar.google.com/&scioq=Unleashing+the+power+of+novel+conditional+generative+approaches+for+new+materials+discovery&hl=en&as_sdt=0,5", "gs_version_total": 3, "aff_unique_index": "0;1;2;3;1;4;5;2;6", "aff_unique_norm": "National University of Science and Technology MISiS;Artificial Intelligence Research Institute;Novosibirsk State University;Moscow State Institute of Steel and Alloys;Higher School of Economics;Samara National Research University;Sberbank", "aff_unique_dep": ";;;;;;", "aff_unique_url": "https://www.misis.ru;https://www.airi.jp;https://www.nsu.ru;http://www.msi.ru;https://www.hse.ru;https://snru.ru;https://www.sberbank.ru", "aff_unique_abbr": "MISIS;AIRI;NSU;MISiS;HSE;SNRU;Sber", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;2;0;0;0;0", "aff_country_unique": "Russian Federation;Japan;United States" }, { "title": "Real-world Image Dehazing with Coherence-based Pseudo Labeling and Cooperative Unfolding Network", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95790", "id": "I6tBNcJE2F", "proceeding": "", "pdf": "https://openreview.net/pdf?id=I6tBNcJE2F", "openreview": "https://openreview.net/forum?id=I6tBNcJE2F", "poster": "/media/PosterPDFs/NeurIPS%202024/95790.png?t=1732243586.8233566", "project": "", "author_site": "Chengyu Fang, Chunming He, Fengyang Xiao, Yulun Zhang, Longxiang Tang, Yuelin Zhang, Kai Li, Xiu Li", "tldr": "", "abstract": "Real-world Image Dehazing (RID) aims to alleviate haze-induced degradation in real-world settings. This task remains challenging due to the complexities in accurately modeling real haze distributions and the scarcity of paired real-world data. To address these challenges, we first introduce a cooperative unfolding network that jointly models atmospheric scattering and image scenes, effectively integrating physical knowledge into deep networks to restore haze-contaminated details. Additionally, we propose the first RID-oriented iterative mean-teacher framework, termed the Coherence-based Label Generator, to generate high-quality pseudo labels for network training. Specifically, we provide an optimal label pool to store the best pseudo-labels during network training, leveraging both global and local coherence to select high-quality candidates and assign weights to prioritize haze-free regions. We verify the effectiveness of our method, with experiments demonstrating that it achieves state-of-the-art performance on RID tasks. Code will be available at https://github.com/cnyvfang/CORUN-Colabator.", "keywords": "Semi-supervised Learning;Real-world image dehazing", "primary_area": "machine_vision", "supplementary_material": "", "author": "Chengyu Fang;Chunming He;Fengyang Xiao;Yulun Zhang;Longxiang Tang;Yuelin Zhang;Kai Li;Xiu Li", "authorids": "~Chengyu_Fang3;~Chunming_He1;~Fengyang_Xiao1;~Yulun_Zhang1;~Longxiang_Tang1;~Yuelin_Zhang1;~Kai_Li11;~Xiu_Li1", "gender": "M;M;F;M;;M;;F", "homepage": "https://chengyufang.tech;https://chunminghe.github.io;;http://yulunzhang.com/;https://scholar.google.com/citations?user=3oMQsq8AAAAJ;;;https://thusigsiclab.github.io/thu.github.io/introduction.html", "dblp": "361/2438;251/5104;;166/2763-1.html;347/9498;277/6243;https://dblp.uni-trier.de/pers/hd/l/Li_0012:Kai;13/1206-1", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;NqdaqA8AAAAJ;ORmLjWoAAAAJ;;Q5DbNDoAAAAJ;YsROc4UAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": "0000-0002-6522-3710;0000-0001-6479-7109;;0000-0002-2288-5079;0009-0005-2704-3718;;;0000-0003-0403-1923", "linkedin": ";https://www.linkedin.com/feed/?trk=guest_homepage-basic_nav-header-join;;yulun-zhang-1116b5b9/;;;;", "or_profile": "~Chengyu_Fang3;~Chunming_He1;~Fengyang_Xiao1;~Yulun_Zhang1;~Longxiang_Tang1;~Yuelin_Zhang1;~Kai_Li11;~Xiu_Li1", "aff": "Southwest University;Tsinghua University;SUN YAT-SEN UNIVERSITY;Swiss Federal Institute of Technology;Peking University;The Chinese University of Hong Kong;NEC-Labs;Tsinghua University", "aff_domain": "swu.edu.cn;tsinghua.edu.cn;sysu.edu.cn;ethz.ch;pku.edu.cn;cuhk.edu.hk;nec-labs.com;tsinghua.edu.cn", "position": "Undergrad student;MS student;MS student;Postdoc;Intern;PhD student;NEC Labs, America;Professor", "bibtex": "@inproceedings{\nfang2024realworld,\ntitle={Real-world Image Dehazing with Coherence-based Pseudo Labeling and Cooperative Unfolding Network},\nauthor={Chengyu Fang and Chunming He and Fengyang Xiao and Yulun Zhang and Longxiang Tang and Yuelin Zhang and Kai Li and Xiu Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=I6tBNcJE2F}\n}", "github": "", "reviewers": "ivCJ;Udmv;h5fd;wF18", "pdf_size": 19776243, "rating": "5;5;7;8", "confidence": "3;5;4;5", "soundness": "3;2;3;3", "novelty": "2;2;3;3", "presentation": "3;2;3;3", "wc_summary": "71;70;80;95", "wc_strengths": "50;37;100;153", "wc_weaknesses": "213;193;102;183", "wc_questions": "12;89;87;189", "wc_limitations": "13;6;13;85", "wc_review": "359;395;382;705", "wc_reply_reviewers": "170;62;67;66", "wc_reply_authors": "440;200;77;65", "reply_reviewers": "3;2;1;1", "reply_authors": "3;3;2;2", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 79.0, 10.024968827881711 ], "wc_strengths_avg": [ 85.0, 45.765707685995636 ], "wc_weaknesses_avg": [ 172.75, 42.25147926404471 ], "wc_questions_avg": [ 94.25, 62.89425649453215 ], "wc_limitations_avg": [ 29.25, 32.31389020220252 ], "wc_review_avg": [ 460.25, 141.89322570158168 ], "wc_reply_reviewers_avg": [ 91.25, 45.50480743833557 ], "wc_reply_authors_avg": [ 195.5, 150.72574431728643 ], "reply_reviewers_avg": [ 1.75, 0.82915619758885 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.4061811972299616, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10920532992362575964&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": "swu.edu.cn;tsinghua.edu.cn;sysu.edu.cn;ethz.ch;pku.edu.cn;cuhk.edu.hk;nec-labs.com;tsinghua.edu.cn", "author_num": 8, "aff_unique_index": "0;1;2;3;4;5;6;1", "aff_unique_norm": "Southwest University;Tsinghua University;Sun Yat-sen University;Swiss Federal Institute of Technology;Peking University;Chinese University of Hong Kong;NEC Laboratories", "aff_unique_dep": ";;;;;;", "aff_unique_url": "https://www.swu.edu.cn;https://www.tsinghua.edu.cn;http://www.sysu.edu.cn;https://www.ethz.ch;http://www.pku.edu.cn;https://www.cuhk.edu.hk;https://www.nec-labs.com", "aff_unique_abbr": "SWU;THU;SYSU;ETH Zurich;Peking U;CUHK;NEC-Labs", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;1;0;0;2;0", "aff_country_unique": "China;Switzerland;United States" }, { "title": "Revisiting Self-Supervised Heterogeneous Graph Learning from Spectral Clustering Perspective", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95789", "id": "I6tRENM5Ya", "proceeding": "", "pdf": "https://openreview.net/pdf?id=I6tRENM5Ya", "openreview": "https://openreview.net/forum?id=I6tRENM5Ya", "poster": "/media/PosterPDFs/NeurIPS%202024/95789.png?t=1731399115.7302303", "project": "", "author_site": "YUJIE MO, Zhihe Lu, Runpeng Yu, Xiaofeng Zhu, Xinchao Wang", "tldr": "", "abstract": "Self-supervised heterogeneous graph learning (SHGL) has shown promising potential in diverse scenarios. However, while existing SHGL methods share a similar essential with clustering approaches, they encounter two significant limitations: (i) noise in graph structures is often introduced during the message-passing process to weaken node representations, and (ii) cluster-level information may be inadequately captured and leveraged, diminishing the performance in downstream tasks. In this paper, we address these limitations by theoretically revisiting SHGL from the spectral clustering perspective and introducing a novel framework enhanced by rank and dual consistency constraints. Specifically, our framework incorporates a rank-constrained spectral clustering method that refines the affinity matrix to exclude noise effectively. Additionally, we integrate node-level and cluster-level consistency constraints that concurrently capture invariant and clustering information to facilitate learning in downstream tasks. We theoretically demonstrate that the learned representations are divided into distinct partitions based on the number of classes and exhibit enhanced generalization ability across tasks. Experimental results affirm the superiority of our method, showcasing remarkable improvements in several downstream tasks compared to existing methods.", "keywords": "Heterogeneous graph;Spectral clustering;Self-supervised learning", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Yujie Mo;Zhihe Lu;Runpeng Yu;Xiaofeng Zhu;Xinchao Wang", "authorids": "~Yujie_Mo1;~Zhihe_Lu1;~Runpeng_Yu2;~Xiaofeng_Zhu7;~Xinchao_Wang1", "gender": ";M;M;;M", "homepage": "https://yujiemo.github.io/;https://zhihelu.github.io/;https://sites.google.com/site/seanzhuxf/;https://yu-rp.github.io/;https://sites.google.com/site/sitexinchaowang/", "dblp": "282/0552;195/9141.html;60/4671-1;290/7625-1;", "google_scholar": "MH4nZY0AAAAJ;X4LKIhgAAAAJ;https://scholar.google.com/citations?hl=en;;https://scholar.google.com.tw/citations?user=w69Buq0AAAAJ", "orcid": "0000-0001-7784-6221;0000-0002-6917-8654;0000-0001-6840-0578;;", "linkedin": ";;;;", "or_profile": "~Yujie_Mo1;~Zhihe_Lu1;~Xiaofeng_Zhu7;~R_Yu1;~Xinchao_WANG3", "aff": "University of Electronic Science and Technology of China;National University of Singapore;University of Electronic Science and Technology of China;National University of Singapore;National University of Singapore", "aff_domain": "uestc.edu.cn;nus.edu;uestc.edu.cn;u.nus.edu;nus.edu", "position": "PhD student;Postdoc;Full Professor;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nmo2024revisiting,\ntitle={Revisiting Self-Supervised Heterogeneous Graph Learning from Spectral Clustering Perspective},\nauthor={Yujie Mo and Zhihe Lu and Runpeng Yu and Xiaofeng Zhu and Xinchao Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=I6tRENM5Ya}\n}", "github": "", "reviewers": "oBdx;v7xk;NZZJ;LzB2", "pdf_size": 1633904, "rating": "5;5;6;7", "confidence": "2;3;3;5", "soundness": "3;2;3;3", "novelty": "3;2;3;4", "presentation": "2;2;2;3", "wc_summary": "73;60;95;85", "wc_strengths": "40;36;62;126", "wc_weaknesses": "228;263;134;351", "wc_questions": "7;10;61;2", "wc_limitations": "16;9;12;12", "wc_review": "364;378;364;576", "wc_reply_reviewers": "13;66;135;39", "wc_reply_authors": "0;37;269;0", "reply_reviewers": "1;1;2;1", "reply_authors": "1;2;2;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 1.0897247358851685 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 78.25, 13.102957681378658 ], "wc_strengths_avg": [ 66.0, 36.02776706930364 ], "wc_weaknesses_avg": [ 244.0, 77.7270866558113 ], "wc_questions_avg": [ 20.0, 23.843238035132728 ], "wc_limitations_avg": [ 12.25, 2.48746859276655 ], "wc_review_avg": [ 420.5, 89.95971320541211 ], "wc_reply_reviewers_avg": [ 63.25, 45.46633369868303 ], "wc_reply_authors_avg": [ 76.5, 112.16171361030466 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.899228803025897, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18268401999122375011&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 4, "email": "uestc.edu.cn;nus.edu;uestc.edu.cn;u.nus.edu;nus.edu", "author_num": 5, "aff_unique_index": "0;1;0;1;1", "aff_unique_norm": "University of Electronic Science and Technology of China;National University of Singapore", "aff_unique_dep": ";", "aff_unique_url": "https://www.uestc.edu.cn;https://www.nus.edu.sg", "aff_unique_abbr": "UESTC;NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1;1", "aff_country_unique": "China;Singapore" }, { "title": "$\\texttt{pfl-research}$: simulation framework for accelerating research in Private Federated Learning", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97765", "id": "I79q7wIRkS", "proceeding": "", "pdf": "https://openreview.net/pdf?id=I79q7wIRkS", "openreview": "https://openreview.net/forum?id=I79q7wIRkS", "poster": "", "project": "", "author_site": "Filip Granqvist, Congzheng Song, \u00c1ine Cahill, Rogier van Dalen, Martin Pelikan, Yi Sheng Chan, Xiaojun Feng, Natarajan Krishnaswami, Vojta Jina, Mona Chitnis", "tldr": "", "abstract": "Federated learning (FL) is an emerging machine learning (ML) training paradigm where clients own their data and collaborate to train a global model, without revealing any data to the server and other participants. Researchers commonly perform experiments in a simulation environment to quickly iterate on ideas. However, existing open-source tools do not offer the efficiency required to simulate FL on larger and more realistic FL datasets. We introduce $\\texttt{pfl-research}$, a fast, modular, and easy-to-use Python framework for simulating FL. It supports TensorFlow, PyTorch, and non-neural network models, and is tightly integrated with state-of-the-art privacy algorithms. We study the speed of open-source FL frameworks and show that $\\texttt{pfl-research}$ is 7-72$\\times$ faster than alternative open-source frameworks on common cross-device setups. Such speedup will significantly boost the productivity of the FL research community and enable testing hypotheses on realistic FL datasets that were previously too resource intensive. We release a suite of benchmarks that evaluates an algorithm's overall performance on a diverse set of realistic scenarios.", "keywords": "Federated Learning;Differential Privacy", "primary_area": "", "supplementary_material": "", "author": "Filip Granqvist;Congzheng Song;\u00c1ine Cahill;Rogier van Dalen;Martin Pelikan;YI SHENG CHAN;Xiaojun Feng;Natarajan Krishnaswami;Vojta J;Mona Chitnis", "authorids": "~Filip_Granqvist1;~Congzheng_Song2;~\u00c1ine_Cahill1;~Rogier_van_Dalen2;~Martin_Pelikan1;~YI_SHENG_CHAN1;~Xiaojun_Feng1;~Natarajan_Krishnaswami1;~Vojta_J1;~Mona_Chitnis1", "gender": ";M;F;M;M;M;M;;F;M", "homepage": ";https://csong27.github.io/;;http://martinpelikan.net;;;;https://www.linkedin.com/in/vojtajina/;;https://www.vandalen.uk", "dblp": ";;;18/963.html;;;;;;97/4127", "google_scholar": ";lkPKfjgAAAAJ;;f59x6y4AAAAJ;;;;;;https://scholar.google.co.uk/citations?hl=en", "orcid": ";;;0009-0003-4101-5575;;;0000-0001-5915-6358;;;0000-0002-9603-5771", "linkedin": "filip-granqvist-112017149/;;aine-cahill/;pelikanmartin/;chanyisheng/;xiaojun-feng-44a85a69/;https://linkedin.com/in/nkrishnaswami;;monachitnis/;rogier-van-dalen/", "or_profile": "~Filip_Granqvist1;~Congzheng_Song2;~\u00c1ine_Cahill1;~Martin_Pelikan1;~YI_SHENG_CHAN1;~Xiaojun_Feng1;~Natarajan_Krishnaswami1;~Vojta_J1;~Mona_Chitnis1;~Rogier_C._van_Dalen1", "aff": "Apple;Apple;Apple;Apple;Apple;Apple;Apple;;Apple;Samsung", "aff_domain": "apple.com;apple.com;apple.com;apple.com;apple.com;apple.com;apple.com;;apple.com;samsung.com", "position": "Researcher;Researcher;Machine learning research engineer;Researcher;Researcher;Researcher;Researcher;;Researcher;Principal Researcher", "bibtex": "@inproceedings{\ngranqvist2024textttpflresearch,\ntitle={\\${\\textbackslash}texttt\\{pfl-research\\}\\$: simulation framework for accelerating research in Private Federated Learning},\nauthor={Filip Granqvist and Congzheng Song and {\\'A}ine Cahill and Rogier van Dalen and Martin Pelikan and YI SHENG CHAN and Xiaojun Feng and Natarajan Krishnaswami and Vojta J and Mona Chitnis},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=I79q7wIRkS}\n}", "github": "", "reviewers": "6DPY;Y2HC;daJH;YXDe", "pdf_size": 2483896, "rating": "5;6;7;8", "confidence": "4;3;4;4", "wc_summary_and_contributions": "137;68;156;61", "wc_strengths": "73;4;259;49", "wc_improvement": "34;6;953;55", "wc_limitations": "33;1;1;58", "wc_correctness": "20;1;8;4", "wc_clarity": "28;1;6;25", "wc_relation_to_prior_work": "25;1;377;18", "wc_documentation": "55;1;51;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "406;84;1812;272", "wc_reply_reviewers": "278;0;305;0", "wc_reply_authors": "152;0;0;0", "reply_reviewers": "1;0;1;0", "reply_authors": "2;1;1;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 105.5, 41.620307543313515 ], "wc_strengths_avg": [ 96.25, 97.17349175572524 ], "wc_improvement_avg": [ 262.0, 399.3275597801885 ], "wc_limitations_avg": [ 23.25, 23.94133454926855 ], "wc_correctness_avg": [ 8.25, 7.224091638399945 ], "wc_clarity_avg": [ 15.0, 11.683321445547923 ], "wc_relation_to_prior_work_avg": [ 105.25, 157.13747961578105 ], "wc_documentation_avg": [ 27.0, 26.038433132583073 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 643.5, 684.2607324697217 ], "wc_reply_reviewers_avg": [ 145.75, 146.06227267847095 ], "wc_reply_authors_avg": [ 38.0, 65.81793068761733 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.2581988897471611, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14125309513343647898&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "apple.com;apple.com;apple.com;apple.com;apple.com;apple.com;apple.com;;apple.com;samsung.com", "author_num": 10, "aff_unique_index": "0;0;0;0;0;0;0;0;1", "aff_unique_norm": "Apple;Samsung", "aff_unique_dep": "Apple Inc.;Samsung", "aff_unique_url": "https://www.apple.com;https://www.samsung.com", "aff_unique_abbr": "Apple;Samsung", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;1", "aff_country_unique": "United States;South Korea" }, { "title": "Rethinking Score Distillation as a Bridge Between Image Distributions", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95788", "id": "I8PkICj9kM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=I8PkICj9kM", "openreview": "https://openreview.net/forum?id=I8PkICj9kM", "poster": "", "project": "", "author_site": "David McAllister, Songwei Ge, Jia-Bin Huang, David Jacobs, Alexei Efros, Aleksander Holynski, Angjoo Kanazawa", "tldr": "", "abstract": "Score distillation sampling (SDS) has proven to be an important tool, enabling the use of large-scale diffusion priors for tasks operating in data-poor domains. Unfortunately, SDS has a number of characteristic artifacts that limit its utility in general-purpose applications. In this paper, we make progress toward understanding the behavior of SDS and its variants by viewing them as solving an optimal-cost transport path from some current source distribution to a target distribution. Under this new interpretation, we argue that these methods' characteristic artifacts are caused by (1) linear approximation of the optimal path and (2) poor estimates of the source distribution.\nWe show that by calibrating the text conditioning of the source distribution, we can produce high-quality generation and translation results with little extra overhead. Our method can be easily applied across many domains, matching or beating the performance of specialized methods. We demonstrate its utility in text-to-2D, text-to-3D, translating paintings to real images, optical illusion generation, and 3D sketch-to-real. We compare our method to existing approaches for score distillation sampling and show that it can produce high-frequency details with realistic colors.", "keywords": "Generative Models;Diffusion Models", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "David McAllister;Songwei Ge;Jia-Bin Huang;David W. Jacobs;Alexei A Efros;Aleksander Holynski;Angjoo Kanazawa", "authorids": "~David_McAllister2;~Songwei_Ge2;~Jia-Bin_Huang1;~David_W._Jacobs1;~Alexei_A_Efros1;~Aleksander_Holynski1;~Angjoo_Kanazawa1", "gender": "M;;M;M;;F;M", "homepage": ";https://songweige.github.io/;https://jbhuang0604.github.io/;http://www.cs.umd.edu/~djacobs;https://holynski.org;https://people.eecs.berkeley.edu/~kanazawa/;http://www.eecs.berkeley.edu/~efros/", "dblp": ";228/2581;51/1815-1.html;j/DavidWJacobs.html;230/7958;119/1305;40/6158", "google_scholar": ";https://scholar.google.com/scholar?hl=en;pp848fYAAAAJ;WH2KmRgAAAAJ;ypBMJMgAAAAJ;Ci-_QYIAAAAJ;https://scholar.google.com.tw/citations?user=d97bGd8AAAAJ", "orcid": ";;;;;;0000-0001-5720-8070", "linkedin": "davidrmcallister;;jia-bin-huang-070a7418/;;;;alexei-efros-890736a3/", "or_profile": "~David_McAllister2;~Songwei_Ge2;~Jia-Bin_Huang1;~David_W._Jacobs1;~Aleksander_Holynski1;~Angjoo_Kanazawa1;~Alyosha_Efros1", "aff": "University of California, Berkeley;NVIDIA;University of Maryland, College Park;University of Maryland, College Park;Google DeepMind;University of California, Berkeley;University of California, Berkeley", "aff_domain": "berkeley.edu;nvidia.com;umd.edu;umd.edu;google.com;berkeley.edu;berkeley.edu", "position": "MS student;Intern;Associate Professor;Professor;Researcher;Assistant Professor;Professor", "bibtex": "@inproceedings{\nmcallister2024rethinking,\ntitle={Rethinking Score Distillation as a Bridge Between Image Distributions},\nauthor={David McAllister and Songwei Ge and Jia-Bin Huang and David W. Jacobs and Alexei A Efros and Aleksander Holynski and Angjoo Kanazawa},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=I8PkICj9kM}\n}", "github": "", "reviewers": "XvCb;sBp4;Cgf2", "pdf_size": 16826919, "rating": "5;5;6", "confidence": "4;4;4", "soundness": "3;2;3", "novelty": "2;2;3", "presentation": "3;3;4", "wc_summary": "77;121;104", "wc_strengths": "98;70;100", "wc_weaknesses": "605;213;189", "wc_questions": "93;65;19", "wc_limitations": "23;71;1", "wc_review": "896;540;413", "wc_reply_reviewers": "6;363;11", "wc_reply_authors": "67;415;0", "reply_reviewers": "1;3;1", "reply_authors": "2;3;1", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 100.66666666666667, 18.116904322268255 ], "wc_strengths_avg": [ 89.33333333333333, 13.695092389449425 ], "wc_weaknesses_avg": [ 335.6666666666667, 190.69929790699865 ], "wc_questions_avg": [ 59.0, 30.506829836393468 ], "wc_limitations_avg": [ 31.666666666666668, 29.22708028904396 ], "wc_review_avg": [ 616.3333333333334, 204.43798298968048 ], "wc_reply_reviewers_avg": [ 126.66666666666667, 167.1253687771216 ], "wc_reply_authors_avg": [ 160.66666666666666, 181.90901266536764 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.9428090415820634 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10522506209579480742&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "berkeley.edu;nvidia.com;umd.edu;umd.edu;google.com;berkeley.edu;berkeley.edu", "author_num": 7, "aff_unique_index": "0;1;2;2;3;0;0", "aff_unique_norm": "University of California, Berkeley;NVIDIA;University of Maryland;Google", "aff_unique_dep": ";NVIDIA Corporation;;Google DeepMind", "aff_unique_url": "https://www.berkeley.edu;https://www.nvidia.com;https://www/umd.edu;https://deepmind.com", "aff_unique_abbr": "UC Berkeley;NVIDIA;UMD;DeepMind", "aff_campus_unique_index": "0;2;2;0;0", "aff_campus_unique": "Berkeley;;College Park", "aff_country_unique_index": "0;0;0;0;1;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Fair Online Bilateral Trade", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95787", "id": "I90ypQpLgL", "proceeding": "", "pdf": "https://openreview.net/pdf?id=I90ypQpLgL", "openreview": "https://openreview.net/forum?id=I90ypQpLgL", "poster": "/media/PosterPDFs/NeurIPS%202024/95787.png?t=1731347824.2120423", "project": "", "author_site": "Fran\u00e7ois Bachoc, Nicol\u00f2 Cesa-Bianchi, Tom Cesari, Roberto Colomboni", "tldr": "", "abstract": "In online bilateral trade, a platform posts prices to incoming pairs of buyers and sellers that have private valuations for a certain good. If the price is lower than the buyers' valuation and higher than the sellers' valuation, then a trade takes place. Previous work focused on the platform perspective, with the goal of setting prices maximizing the *gain from trade* (the sum of sellers' and buyers' utilities). Gain from trade is, however, potentially unfair to traders, as they may receive highly uneven shares of the total utility. In this work we enforce fairness by rewarding the platform with the _fair gain from trade_, defined as the minimum between sellers' and buyers' utilities.\nAfter showing that any no-regret learning algorithm designed to maximize the sum of the utilities may fail badly with fair gain from trade, we present our main contribution: a complete characterization of the regret regimes for fair gain from trade when, after each interaction, the platform only learns whether each trader accepted the current price. Specifically, we prove the following regret bounds: $\\Theta(\\ln T)$ in the deterministic setting, $\\Omega(T)$ in the stochastic setting, and $\\tilde{\\Theta}(T^{2/3})$ in the stochastic setting when sellers' and buyers' valuations are independent of each other. We conclude by providing tight regret bounds when, after each interaction, the platform is allowed to observe the true traders' valuations.", "keywords": "Regret minimization;online learning;two-sided markets;fairness", "primary_area": "bandits", "supplementary_material": "", "author": "Fran\u00e7ois Bachoc;Nicol\u00f2 Cesa-Bianchi;Tommaso Cesari;Roberto Colomboni", "authorids": "~Fran\u00e7ois_Bachoc1;~Nicol\u00f2_Cesa-Bianchi1;~Tommaso_Cesari1;~Roberto_Colomboni1", "gender": "M;M;M;M", "homepage": "http://cesa-bianchi.di.unimi.it/;https://sites.google.com/view/robertocolomboni/;https://www.math.univ-toulouse.fr/~fbachoc/;https://sites.google.com/view/tom-cesari/home", "dblp": "c/NicoloCesaBianchi;270/0380;130/6786;223/4631", "google_scholar": "https://scholar.google.it/citations?user=BWADJUkAAAAJ;XGtfiRcAAAAJ;Fv36axgAAAAJ;VFo06EEAAAAJ", "orcid": "0000-0001-8477-4748;0000-0001-9890-9543;;0000-0001-5010-1094", "linkedin": ";roberto-colomboni-a922441a6/;;", "or_profile": "~Nicol\u00f2_Cesa-Bianchi1;~Roberto_Colomboni1;~Francois_Bachoc1;~Tommaso_R._Cesari1", "aff": "University of Milan;Italian Institute of Technology (IIT);Institut de Math\u00e9matiques de Toulouse;University of Ottawa", "aff_domain": "unimi.it;iit.it;math.univ-toulouse.fr;uottawa.ca", "position": "Full Professor;PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nbachoc2024fair,\ntitle={Fair Online Bilateral Trade},\nauthor={Fran{\\c{c}}ois Bachoc and Nicol{\\`o} Cesa-Bianchi and Tommaso Cesari and Roberto Colomboni},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=I90ypQpLgL}\n}", "github": "", "reviewers": "6Ak7;mLA5;89Fg;rYk6", "pdf_size": 428270, "rating": "6;6;7;8", "confidence": "4;3;4;4", "soundness": "3;4;4;4", "novelty": "3;2;4;4", "presentation": "3;3;4;4", "wc_summary": "34;112;216;317", "wc_strengths": "40;15;200;101", "wc_weaknesses": "239;111;97;41", "wc_questions": "67;2;50;51", "wc_limitations": "2;3;7;9", "wc_review": "382;243;570;519", "wc_reply_reviewers": "41;14;20;42", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 169.75, 106.7529273603305 ], "wc_strengths_avg": [ 89.0, 71.31269171753371 ], "wc_weaknesses_avg": [ 122.0, 72.44998274671983 ], "wc_questions_avg": [ 42.5, 24.336187047275914 ], "wc_limitations_avg": [ 5.25, 2.8613807855648994 ], "wc_review_avg": [ 428.5, 127.26448836969408 ], "wc_reply_reviewers_avg": [ 29.25, 12.43734296383275 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6564193011133808769&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "unimi.it;iit.it;math.univ-toulouse.fr;uottawa.ca", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "University of Milan;Italian Institute of Technology;Institut de Math\u00e9matiques de Toulouse;University of Ottawa", "aff_unique_dep": ";;Math\u00e9matiques;", "aff_unique_url": "https://www.unimi.it;https://www.iit.it;https://www.imtoulouse.fr;https://www.uottawa.ca", "aff_unique_abbr": "UniMi;IIT;IMT;U Ottawa", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;2", "aff_country_unique": "Italy;France;Canada" }, { "title": "FedSSP: Federated Graph Learning with Spectral Knowledge and Personalized Preference", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95786", "id": "I96GFYalFO", "proceeding": "", "pdf": "https://openreview.net/pdf?id=I96GFYalFO", "openreview": "https://openreview.net/forum?id=I96GFYalFO", "poster": "/media/PosterPDFs/NeurIPS%202024/95786.png?t=1733021997.8278625", "project": "", "author_site": "Zihan Tan, Guancheng Wan, Wenke Huang, Mang Ye", "tldr": "", "abstract": "Personalized Federated Graph Learning (pFGL) facilitates the decentralized training of Graph Neural Networks (GNNs) without compromising privacy while accommodating personalized requirements for non-IID participants. In cross-domain scenarios, structural heterogeneity poses significant challenges for pFGL. Nevertheless, previous pFGL methods incorrectly share non-generic knowledge globally and fail to tailor personalized solutions locally under domain structural shift. We innovatively reveal that the spectral nature of graphs can well reflect inherent domain structural shifts. Correspondingly, our method overcomes it by sharing generic spectral knowledge. Moreover, we indicate the biased message-passing schemes for graph structures and propose the personalized preference module. Combining both strategies, we propose our pFGL framework $\\textbf{FedSSP}$ which $\\textbf{S}$hares generic $\\textbf{S}$pectral knowledge while satisfying graph $\\textbf{P}$references. Furthermore, We perform extensive experiments on cross-dataset and cross-domain settings to demonstrate the superiority of our framework. The code is available at https://github.com/OakleyTan/FedSSP.", "keywords": "Federated Learning; Graph Learning", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Zihan Tan;Guancheng Wan;Wenke Huang;Mang Ye", "authorids": "~Zihan_Tan2;~Guancheng_Wan1;~Wenke_Huang1;~Mang_Ye1", "gender": "M;;M;M", "homepage": ";;https://wenkehuang.github.io/;https://marswhu.github.io/", "dblp": ";;330/1664;156/0610", "google_scholar": "47OJpJgAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;j-HxRy0AAAAJ", "orcid": "0009-0008-1754-3841;;0000-0003-4819-293X;0000-0003-3989-7655", "linkedin": "zihan-tan-617018322/;;;", "or_profile": "~Zihan_Tan2;~Guancheng_Wan1;~Wenke_Huang1;~Mang_Ye1", "aff": "Wuhan University;;Wuhan University;Wuhan University", "aff_domain": "whu.edu.cn;;whu.edu.cn;whu.edu.cn", "position": "Undergrad student;;PhD student;Professor", "bibtex": "@inproceedings{\ntan2024fedssp,\ntitle={Fed{SSP}: Federated Graph Learning with Spectral Knowledge and Personalized Preference},\nauthor={Zihan Tan and Guancheng Wan and Wenke Huang and Mang Ye},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=I96GFYalFO}\n}", "github": "", "reviewers": "3kee;jSYo;cuMU;LD7P", "pdf_size": 0, "rating": "6;6;7;7", "confidence": "3;3;4;4", "soundness": "3;3;3;3", "novelty": "3;2;4;3", "presentation": "3;3;3;4", "wc_summary": "74;38;83;84", "wc_strengths": "106;62;167;143", "wc_weaknesses": "95;60;41;56", "wc_questions": "6;75;22;5", "wc_limitations": "9;1;49;25", "wc_review": "290;236;362;313", "wc_reply_reviewers": "10;16;0;13", "wc_reply_authors": "47;70;75;56", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 69.75, 18.73999733191016 ], "wc_strengths_avg": [ 119.5, 39.67681942898145 ], "wc_weaknesses_avg": [ 63.0, 19.78635893740938 ], "wc_questions_avg": [ 27.0, 28.521921393903323 ], "wc_limitations_avg": [ 21.0, 18.33030277982336 ], "wc_review_avg": [ 300.25, 45.30107614615794 ], "wc_reply_reviewers_avg": [ 9.75, 6.015604707757983 ], "wc_reply_authors_avg": [ 62.0, 11.113055385446435 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11935395115380587436&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "whu.edu.cn;;whu.edu.cn;whu.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Wuhan University", "aff_unique_dep": "", "aff_unique_url": "http://www.whu.edu.cn/", "aff_unique_abbr": "WHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "How Sparse Can We Prune A Deep Network: A Fundamental Limit Perspective", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95785", "id": "IAAPhOLhcX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=IAAPhOLhcX", "openreview": "https://openreview.net/forum?id=IAAPhOLhcX", "poster": "/media/PosterPDFs/NeurIPS%202024/95785.png?t=1730382393.7162025", "project": "", "author_site": "Qiaozhe Zhang, Ruijie Zhang, Jun Sun, Yingzhuang Liu", "tldr": "", "abstract": "Network pruning is a commonly used measure to alleviate the storage and computational burden of deep neural networks. However, the fundamental limit of network pruning is still lacking. To close the gap, in this work we'll take a first-principles approach, i.e. we'll directly impose the sparsity constraint on the loss function and leverage the framework of statistical dimension in convex geometry, thus enabling us to characterize the sharp phase transition point, which can be regarded as the fundamental limit of the pruning ratio. Through this limit, we're able to identify two key factors that determine the pruning ratio limit, namely, weight magnitude and network sharpness. Generally speaking, the flatter the loss landscape or the smaller the weight magnitude, the smaller pruning ratio. Moreover, we provide efficient countermeasures to address the challenges in the computation of the pruning limit, which mainly involves the accurate spectrum estimation of a large-scale and non-positive Hessian matrix. Moreover, through the lens of the pruning ratio threshold, we can also provide rigorous interpretations on several heuristics in existing pruning algorithms. Extensive experiments are performed which demonstrate that our theoretical pruning ratio threshold coincides very well with the experiments. All codes are available at: https://github.com/QiaozheZhang/Global-One-shot-Pruning", "keywords": "Network Pruning;Fundamental Limit;Convex Geometry", "primary_area": "learning_theory", "supplementary_material": "", "author": "Qiaozhe Zhang;Ruijie ZHANG;Jun Sun;Yingzhuang Liu", "authorids": "~Qiaozhe_Zhang1;~Ruijie_ZHANG1;~Jun_Sun13;~Yingzhuang_Liu1", "gender": "M;M;M;M", "homepage": "https://scholar.google.com/citations?view_op=list_works&hl=en&user=OllfhOIAAAAJ;https://github.com/K1seki221;;http://eic.hust.edu.cn/professor/liuyingzhuang/index.htm", "dblp": "228/1392;04/2964.html;;16/5714", "google_scholar": ";ZFNjMTwAAAAJ;;", "orcid": ";;0000-0002-5360-1024;", "linkedin": ";;;", "or_profile": "~Qiaozhe_Zhang1;~Ruijie_ZHANG1;~Jun_Sun13;~Yingzhuang_Liu1", "aff": "Huazhong University of Science and Technology;Huazhong University of Science and Technology;Huazhong University of Science and Technology;Huazhong University of Science and Technology", "aff_domain": "hust.edu.cn;hust.edu.cn;hust.edu.cn;hust.edu.cn", "position": "PhD student;MS student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nzhang2024how,\ntitle={How Sparse Can We Prune A Deep Network: A Fundamental Limit Perspective},\nauthor={Qiaozhe Zhang and Ruijie ZHANG and Jun Sun and Yingzhuang Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=IAAPhOLhcX}\n}", "github": "", "reviewers": "DBJe;byyM;LoL3;hJmV", "pdf_size": 1458645, "rating": "4;5;6;6", "confidence": "3;3;3;3", "soundness": "3;3;3;3", "novelty": "3;2;3;2", "presentation": "2;2;3;2", "wc_summary": "79;73;117;78", "wc_strengths": "72;66;59;72", "wc_weaknesses": "336;168;249;293", "wc_questions": "5;5;2;66", "wc_limitations": "7;1;115;1", "wc_review": "499;313;542;510", "wc_reply_reviewers": "497;61;68;47", "wc_reply_authors": "1586;89;220;51", "reply_reviewers": "2;1;2;1", "reply_authors": "4;2;3;2", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 86.75, 17.612140698961042 ], "wc_strengths_avg": [ 67.25, 5.356071321407137 ], "wc_weaknesses_avg": [ 261.5, 62.13091018164791 ], "wc_questions_avg": [ 19.5, 26.874709300753377 ], "wc_limitations_avg": [ 31.0, 48.55924216871593 ], "wc_review_avg": [ 466.0, 89.73572309844057 ], "wc_reply_reviewers_avg": [ 168.25, 189.95443532594862 ], "wc_reply_authors_avg": [ 486.5, 637.8849817953077 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16123610899980090478&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": "hust.edu.cn;hust.edu.cn;hust.edu.cn;hust.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Huazhong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.hust.edu.cn", "aff_unique_abbr": "HUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Full-Atom Peptide Design with Geometric Latent Diffusion", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95784", "id": "IAQNJUJe8q", "proceeding": "", "pdf": "https://openreview.net/pdf?id=IAQNJUJe8q", "openreview": "https://openreview.net/forum?id=IAQNJUJe8q", "poster": "/media/PosterPDFs/NeurIPS%202024/95784.png?t=1731483944.0022624", "project": "", "author_site": "Xiangzhe Kong, Yinjun Jia, Wenbing Huang, Yang Liu", "tldr": "", "abstract": "Peptide design plays a pivotal role in therapeutics, allowing brand new possibility to leverage target binding sites that are previously undruggable. Most existing methods are either inefficient or only concerned with the target-agnostic design of 1D sequences. In this paper, we propose a generative model for full-atom Peptide design with Geometric LAtent Diffusion (PepGLAD) given the binding site. We first establish a benchmark consisting of both 1D sequences and 3D structures from Protein Data Bank (PDB) and literature for systematic evaluation. We then identify two major challenges of leveraging current diffusion-based models for peptide design: the full-atom geometry and the variable binding geometry. To tackle the first challenge, PepGLAD derives a variational autoencoder that first encodes full-atom residues of variable size into fixed-dimensional latent representations, and then decodes back to the residue space after conducting the diffusion process in the latent space. For the second issue, PepGLAD explores a receptor-specific affine transformation to convert the 3D coordinates into a shared standard space, enabling better generalization ability across different binding shapes. Experimental Results show that our method not only improves diversity and binding affinity significantly in the task of sequence-structure co-design, but also excels at recovering reference structures for binding conformation generation.", "keywords": "Peptide Design;Geometric Latent Diffusion;Full-Atom;Affine Transformation", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Xiangzhe Kong;Yinjun Jia;Wenbing Huang;Yang Liu", "authorids": "~Xiangzhe_Kong1;~Yinjun_Jia1;~Wenbing_Huang1;~Yang_Liu19", "gender": "M;M;M;M", "homepage": "https://kxz18.github.io/;https://gsai.ruc.edu.cn/english/wenbing_huang;http://nlp.csai.tsinghua.edu.cn/~ly/;https://github.com/EBGU", "dblp": "293/7526;155/3181-1.html;51/3710-5;", "google_scholar": "0oSFYmkAAAAJ;0yNkmO4AAAAJ;https://scholar.google.com.hk/citations?user=lVhoKNcAAAAJ;", "orcid": ";;0000-0002-3087-242X;", "linkedin": ";;;", "or_profile": "~Xiangzhe_Kong1;~Wenbing_Huang1;~Yang_Liu19;~Yinjun_Harold_Jia1", "aff": "Tsinghua University;Renmin University of China;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;ruc.edu.cn;tsinghua.edu.cn;mails.tsinghua.edu.cn", "position": "PhD student;Associate Professor;Professor;PhD student", "bibtex": "@inproceedings{\nkong2024fullatom,\ntitle={Full-Atom Peptide Design with Geometric Latent Diffusion},\nauthor={Xiangzhe Kong and Yinjun Jia and Wenbing Huang and Yang Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=IAQNJUJe8q}\n}", "github": "", "reviewers": "8deM;XM8c;xBxR;KHMu", "pdf_size": 9248873, "rating": "5;6;6;7", "confidence": "4;3;4;4", "soundness": "2;2;3;3", "novelty": "2;3;3;2", "presentation": "3;3;3;3", "wc_summary": "80;36;60;56", "wc_strengths": "73;59;78;130", "wc_weaknesses": "139;234;165;12", "wc_questions": "124;111;97;310", "wc_limitations": "1;52;39;4", "wc_review": "417;492;439;512", "wc_reply_reviewers": "60;68;25;18", "wc_reply_authors": "38;55;33;47", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 58.0, 15.620499351813308 ], "wc_strengths_avg": [ 85.0, 26.89795531262553 ], "wc_weaknesses_avg": [ 137.5, 80.34457044505248 ], "wc_questions_avg": [ 160.5, 86.84037079607617 ], "wc_limitations_avg": [ 24.0, 22.01136070305514 ], "wc_review_avg": [ 465.0, 38.4642691338338 ], "wc_reply_reviewers_avg": [ 42.75, 21.579793789561567 ], "wc_reply_authors_avg": [ 43.25, 8.437268515343103 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6716229644698464025&as_sdt=8000005&sciodt=0,19&hl=en", "gs_version_total": 4, "email": "tsinghua.edu.cn;ruc.edu.cn;tsinghua.edu.cn;mails.tsinghua.edu.cn", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Tsinghua University;Renmin University of China", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;http://www.ruc.edu.cn", "aff_unique_abbr": "THU;RUC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Tackling Uncertain Correspondences for Multi-Modal Entity Alignment", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95783", "id": "IAse6CAG26", "proceeding": "", "pdf": "https://openreview.net/pdf?id=IAse6CAG26", "openreview": "https://openreview.net/forum?id=IAse6CAG26", "poster": "/media/PosterPDFs/NeurIPS%202024/95783.png?t=1733391341.1527607", "project": "", "author_site": "Liyi Chen, Ying Sun, Shengzhe Zhang, Yuyang Ye, Wei Wu, Hui Xiong", "tldr": "", "abstract": "Recently, multi-modal entity alignment has emerged as a pivotal endeavor for the integration of Multi-Modal Knowledge Graphs (MMKGs) originating from diverse data sources. Existing works primarily focus on fully depicting entity features by designing various modality encoders or fusion approaches. However, uncertain correspondences between inter-modal or intra-modal cues, such as weak inter-modal associations, description diversity, and modality absence, still severely hinder the effective exploration of aligned entity similarities. To this end, in this paper, we propose a novel Tackling uncertain correspondences method for Multi-modal Entity Alignment (TMEA). Specifically, to handle diverse attribute knowledge descriptions, we design alignment-augmented abstract representation that incorporates the large language model and in-context learning into attribute alignment and filtering for generating and embedding the attribute abstract. In order to mitigate the influence of the modality absence, we propose to unify all modality features into a shared latent subspace and generate pseudo features via variational autoencoders according to existing modal features. Then, we develop an inter-modal commonality enhancement mechanism based on cross-attention with orthogonal constraints, to address weak semantic associations between modalities. Extensive experiments on two real-world datasets validate the effectiveness of TMEA with a clear improvement over competitive baselines.", "keywords": "Entity alignment;knowledge graph;multi-modal learning", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Liyi Chen;Ying Sun;Shengzhe Zhang;Yuyang Ye;Wei Wu;Hui Xiong", "authorids": "~Liyi_Chen3;~Ying_Sun4;~Shengzhe_Zhang3;~Yuyang_Ye1;~Wei_Wu25;~Hui_Xiong1", "gender": "F;F;M;M;M;M", "homepage": "http://home.ustc.edu.cn/~liyichen/;https://sunyinggilly.github.io;;;https://github.com/U-rara;https://www.hkust-gz.edu.cn/people/hui-xiong/", "dblp": ";10/5415-6.html;350/8080.html;194/4226-2;95/6985-45.html;262/1686-1.html", "google_scholar": "J7JWDwkAAAAJ;;;q6Xx2FcAAAAJ;;cVDF1tkAAAAJ", "orcid": "0000-0003-2166-4386;0000-0002-4763-6060;;0000-0002-1513-7814;0009-0009-1590-601X;0000-0001-6016-6465", "linkedin": ";;;yuyang-ye-298b23135/;;", "or_profile": "~Liyi_Chen3;~Ying_Sun4;~Shengzhe_Zhang3;~Yuyang_Ye1;~Wei_Wu25;~Hui_Xiong1", "aff": "University of Science and Technology of China;Hong Kong University of Science and Technology (Guangzhou);University of Science and Technology of China;Rutgers University;University of Science and Technology of China;Hong Kong University of Science and Technology (Guangzhou)", "aff_domain": "ustc.edu.cn;hkust-gz.edu.cn;ustc.edu.cn;rutgers.edu;ustc.edu.cn;hkust.edu", "position": "PhD student;Assistant Professor;Undergrad student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nchen2024tackling,\ntitle={Tackling Uncertain Correspondences for Multi-Modal Entity Alignment},\nauthor={Liyi Chen and Ying Sun and Shengzhe Zhang and Yuyang Ye and Wei Wu and Hui Xiong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=IAse6CAG26}\n}", "github": "", "reviewers": "fuge;PXNL;qUrF;Sc2m", "pdf_size": 653541, "rating": "3;4;6;7", "confidence": "4;4;4;5", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "2;3;3;4", "wc_summary": "76;131;112;122", "wc_strengths": "50;26;91;96", "wc_weaknesses": "242;135;74;88", "wc_questions": "16;5;18;43", "wc_limitations": "4;1;62;7", "wc_review": "388;298;357;356", "wc_reply_reviewers": "328;0;0;50", "wc_reply_authors": "735;160;80;102", "reply_reviewers": "1;0;0;1", "reply_authors": "3;3;2;3", "rating_avg": [ 5.0, 1.5811388300841898 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 110.25, 20.8851023459307 ], "wc_strengths_avg": [ 65.75, 29.07210862665452 ], "wc_weaknesses_avg": [ 134.75, 65.91424352899759 ], "wc_questions_avg": [ 20.5, 13.901438774457844 ], "wc_limitations_avg": [ 18.5, 25.20416632225712 ], "wc_review_avg": [ 349.75, 32.529794035622174 ], "wc_reply_reviewers_avg": [ 94.5, 136.3479006072334 ], "wc_reply_authors_avg": [ 269.25, 270.4841723650388 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.7302967433402215, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4146648059979054355&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 0, "email": "ustc.edu.cn;hkust-gz.edu.cn;ustc.edu.cn;rutgers.edu;ustc.edu.cn;hkust.edu", "author_num": 6, "aff_unique_index": "0;1;0;2;0;1", "aff_unique_norm": "University of Science and Technology of China;Hong Kong University of Science and Technology;Rutgers University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.ustc.edu.cn;https://www.ust.hk;https://www.rutgers.edu", "aff_unique_abbr": "USTC;HKUST;Rutgers", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;1;0;0", "aff_country_unique": "China;United States" }, { "title": "Principled Bayesian Optimization in Collaboration with Human Experts", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95782", "id": "IDn9SiKgLy", "proceeding": "", "pdf": "https://openreview.net/pdf?id=IDn9SiKgLy", "openreview": "https://openreview.net/forum?id=IDn9SiKgLy", "poster": "/media/PosterPDFs/NeurIPS%202024/95782.png?t=1731276973.9002342", "project": "", "author_site": "Wenjie Xu, Masaki Adachi, Colin Jones, Michael A Osborne", "tldr": "", "abstract": "Bayesian optimisation for real-world problems is often performed interactively with human experts, and integrating their domain knowledge is key to accelerate the optimisation process. We consider a setup where experts provide advice on the next query point through binary accept/reject recommendations (labels). Experts\u2019 labels are often costly, requiring efficient use of their efforts, and can at the same time be unreliable, requiring careful adjustment of the degree to which any expert is trusted. We introduce the first principled approach that provides two key guarantees. (1) Handover guarantee: similar to a no-regret property, we establish a sublinear bound on the cumulative number of experts\u2019 binary labels. Initially, multiple labels per query are needed, but the number of expert labels required asymptotically converges to zero, saving both expert effort and computation time. (2) No-harm guarantee with data-driven trust level adjustment: our adaptive trust level ensures that the convergence rate will not be worse than the one without using advice, even if the advice from experts is adversarial. Unlike existing methods that employ a user-defined function that hand-tunes the trust level adjustment, our approach enables data-driven adjustments. Real-world applications empirically demonstrate that our method not only outperforms existing baselines, but also maintains robustness despite varying labelling accuracy, in tasks of battery design with human experts.", "keywords": "Bayesian optimisation;human-AI collaboration;knowledge elicitation", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Wenjie Xu;Masaki Adachi;Colin Jones;Michael A Osborne", "authorids": "~Wenjie_Xu3;~Masaki_Adachi1;~Colin_Jones1;~Michael_A_Osborne1", "gender": "M;M;M;M", "homepage": "https://jackiexuw.github.io/;https://www.masaki-adachi.com;http://la.epfl.ch;https://www.robots.ox.ac.uk/~mosb/", "dblp": "25/1820.html;317/2023;;59/6403", "google_scholar": "https://scholar.google.com/citations?hl=en;;SulkJJQAAAAJ;https://scholar.google.co.uk/citations?user=iTNcAakAAAAJ", "orcid": ";;0000-0001-7239-4799;0000-0003-1959-012X", "linkedin": ";masaki-adachi-b349311a2/;;", "or_profile": "~Wenjie_Xu3;~Masaki_Adachi1;~Colin_Jones1;~Michael_Osborne1", "aff": "EPFL - EPF Lausanne;University of Oxford;EPFL - EPF Lausanne;University of Oxford", "aff_domain": "epfl.ch;ox.ac.uk;epfl.ch;oxford.ac.uk", "position": "PhD student;PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nxu2024principled,\ntitle={Principled Bayesian Optimization in Collaboration with Human Experts},\nauthor={Wenjie Xu and Masaki Adachi and Colin Jones and Michael A Osborne},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=IDn9SiKgLy}\n}", "github": "", "reviewers": "iJqg;dzPz;72YW;Ct1E;sMdb", "pdf_size": 6681721, "rating": "6;6;6;7;9", "confidence": "3;4;3;3;4", "soundness": "3;3;3;3;4", "novelty": "3;3;3;3;4", "presentation": "3;3;4;3;3", "wc_summary": "70;80;123;107;93", "wc_strengths": "84;31;142;138;133", "wc_weaknesses": "147;250;76;327;54", "wc_questions": "130;2;49;134;120", "wc_limitations": "30;1;1;30;1", "wc_review": "461;364;391;736;401", "wc_reply_reviewers": "30;667;0;101;16", "wc_reply_authors": "77;2732;1607;34;20", "reply_reviewers": "1;4;0;1;1", "reply_authors": "3;9;3;2;2", "rating_avg": [ 6.8, 1.16619037896906 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 94.6, 18.874321179846444 ], "wc_strengths_avg": [ 105.6, 42.795326847682794 ], "wc_weaknesses_avg": [ 170.8, 103.79479755748841 ], "wc_questions_avg": [ 87.0, 52.56614880319843 ], "wc_limitations_avg": [ 12.6, 14.207040508142432 ], "wc_review_avg": [ 470.6, 136.43547925668014 ], "wc_reply_reviewers_avg": [ 162.8, 254.45109549774 ], "wc_reply_authors_avg": [ 894.0, 1100.688693500574 ], "reply_reviewers_avg": [ 1.4, 1.3564659966250538 ], "reply_authors_avg": [ 3.8, 2.638181191654584 ], "replies_avg": [ 35, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.4900980294098034, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1942676518358025548&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "epfl.ch;ox.ac.uk;epfl.ch;oxford.ac.uk", "author_num": 4, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "EPFL;University of Oxford", "aff_unique_dep": ";", "aff_unique_url": "https://www.epfl.ch;https://www.ox.ac.uk", "aff_unique_abbr": "EPFL;Oxford", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Lausanne;", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "Switzerland;United Kingdom" }, { "title": "Learning via Surrogate PAC-Bayes", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95781", "id": "IEyXWuXAQT", "proceeding": "", "pdf": "https://openreview.net/pdf?id=IEyXWuXAQT", "openreview": "https://openreview.net/forum?id=IEyXWuXAQT", "poster": "", "project": "", "author_site": "Antoine Picard, Roman Moscoviz, Benjamin Guedj", "tldr": "", "abstract": "PAC-Bayes learning is a comprehensive setting for (i) studying the generalisation ability of learning algorithms and (ii) deriving new learning algorithms by optimising a generalisation bound. However, optimising generalisation bounds might not always be viable for tractable or computational reasons, or both. For example, iteratively querying the empirical risk might prove computationally expensive.\nIn response, we introduce a novel principled strategy for building an iterative learning algorithm via the optimisation of a sequence of surrogate training objectives, inherited from PAC-Bayes generalisation bounds. The key argument is to replace the empirical risk (seen as a function of hypotheses) in the generalisation bound by its projection onto a constructible low dimensional functional space: these projections can be queried much more efficiently than the initial risk. On top of providing that generic recipe for learning via surrogate PAC-Bayes bounds, we (i) contribute theoretical results establishing that iteratively optimising our surrogates implies the optimisation of the original generalisation bounds, (ii) instantiate this strategy to the framework of meta-learning, introducing a meta-objective offering a closed form expression for meta-gradient, (iii) illustrate our approach with numerical experiments inspired by an industrial biochemical problem.", "keywords": "PAC-Bayes;Generalisation;Optimisation;Learning Theory", "primary_area": "learning_theory", "supplementary_material": "", "author": "Antoine Picard;Roman Moscoviz;Benjamin Guedj", "authorids": "~Antoine_Picard1;~Roman_Moscoviz1;~Benjamin_Guedj1", "gender": "M;;M", "homepage": "https://orcid.org/0000-0003-0340-1072;;https://bguedj.github.io", "dblp": ";;177/7258", "google_scholar": ";WPkDpy0AAAAJ;https://scholar.google.fr/citations?user=q-JTC2sAAAAJ", "orcid": "0000-0003-0340-1072;0000-0001-7563-7813;0000-0003-1237-7430", "linkedin": ";;benjaminguedj/", "or_profile": "~Antoine_Picard1;~Roman_Moscoviz1;~Benjamin_Guedj1", "aff": ";SUEZ;University College London, University of London", "aff_domain": ";suez.com;ucl.ac.uk", "position": ";Principal Researcher;Principal Researcher", "bibtex": "@inproceedings{\npicard2024learning,\ntitle={Learning via Surrogate {PAC}-Bayes},\nauthor={Antoine Picard and Roman Moscoviz and Benjamin Guedj},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=IEyXWuXAQT}\n}", "github": "", "reviewers": "7ntj;ku1E;2FBD;GVtZ", "pdf_size": 2597837, "rating": "5;5;6;7", "confidence": "4;4;3;1", "soundness": "2;2;3;3", "novelty": "2;2;2;3", "presentation": "1;3;3;3", "wc_summary": "113;102;84;58", "wc_strengths": "78;137;37;49", "wc_weaknesses": "322;178;98;2", "wc_questions": "454;747;13;18", "wc_limitations": "45;20;8;5", "wc_review": "1012;1184;240;132", "wc_reply_reviewers": "110;363;54;0", "wc_reply_authors": "83;96;30;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.0, 1.224744871391589 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 89.25, 20.801141795584204 ], "wc_strengths_avg": [ 75.25, 38.64178437908891 ], "wc_weaknesses_avg": [ 150.0, 117.23480711802276 ], "wc_questions_avg": [ 308.0, 310.3071059450621 ], "wc_limitations_avg": [ 19.5, 15.75595125658873 ], "wc_review_avg": [ 642.0, 461.618890428024 ], "wc_reply_reviewers_avg": [ 131.75, 139.06181179604988 ], "wc_reply_authors_avg": [ 52.25, 39.00240377207538 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.9847319278346617, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:r7MDp4ZlbOEJ:scholar.google.com/&scioq=Learning+via+Surrogate+PAC-Bayes&hl=en&as_sdt=0,21", "gs_version_total": 2, "email": ";suez.com;ucl.ac.uk", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "SUEZ;University College London", "aff_unique_dep": ";", "aff_unique_url": "https://www.suez.com;https://www.ucl.ac.uk", "aff_unique_abbr": "SUEZ;UCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "France;United Kingdom" }, { "title": "Sigmoid Gating is More Sample Efficient than Softmax Gating in Mixture of Experts", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95780", "id": "IG6kd5V4kd", "proceeding": "", "pdf": "https://openreview.net/pdf?id=IG6kd5V4kd", "openreview": "https://openreview.net/forum?id=IG6kd5V4kd", "poster": "/media/PosterPDFs/NeurIPS%202024/95780.png?t=1733245107.1909153", "project": "", "author_site": "Huy Nguyen, Nhat Ho, Alessandro Rinaldo", "tldr": "", "abstract": "The softmax gating function is arguably the most popular choice in mixture of experts modeling. Despite its widespread use in practice, the softmax gating may lead to unnecessary competition among experts, potentially causing the undesirable phenomenon of representation collapse due to its inherent structure. In response, the sigmoid gating function has been recently proposed as an alternative and has been demonstrated empirically to achieve superior performance. However, a rigorous examination of the sigmoid gating function is lacking in current literature. In this paper, we verify theoretically that the sigmoid gating, in fact, enjoys a higher sample efficiency than the softmax gating for the statistical task of expert estimation. Towards that goal, we consider a regression framework in which the unknown regression function is modeled as a mixture of experts, and study the rates of convergence of the least squares estimator under the over-specified case in which the number of fitted experts is larger than the true value. We show that two gating regimes naturally arise and, in each of them, we formulate an identifiability condition for the expert functions and derive the corresponding convergence rates. In both cases, we find that experts formulated as feed-forward networks with commonly used activation such as $\\mathrm{ReLU}$ and $\\mathrm{GELU}$ enjoy faster convergence rates under the sigmoid gating than those under softmax gating. Furthermore, given the same choice of experts, we demonstrate that the sigmoid gating function requires a smaller sample size than its softmax counterpart to attain the same error of expert estimation and, therefore, is more sample efficient.", "keywords": "mixture of experts;sigmoid gating function;softmax gating function;sample efficiency", "primary_area": "learning_theory", "supplementary_material": "", "author": "Huy Nguyen;Nhat Ho;Alessandro Rinaldo", "authorids": "~Huy_Nguyen5;~Nhat_Ho1;~Alessandro_Rinaldo1", "gender": "M;M;M", "homepage": "https://huynm99.github.io/;https://nhatptnk8912.github.io/;https://arinaldo.github.io", "dblp": "48/6075;203/4479;75/5558", "google_scholar": "_YYwzhQAAAAJ;https://scholar.google.ca/citations?user=Xs7cKMwAAAAJ;tBIzO-EAAAAJ", "orcid": ";;", "linkedin": "huy-nguyen-081199/;nhat-pham-minh-ho-267b8164/;", "or_profile": "~Huy_Nguyen5;~Nhat_Ho1;~Alessandro_Rinaldo1", "aff": "Microsoft AI;University of Texas, Austin;University of Texas at Austin", "aff_domain": "microsoft.com;utexas.edu;utexas.edu", "position": "Intern;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nnguyen2024sigmoid,\ntitle={Sigmoid Gating is More Sample Efficient than Softmax Gating in Mixture of Experts},\nauthor={Huy Nguyen and Nhat Ho and Alessandro Rinaldo},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=IG6kd5V4kd}\n}", "github": "", "reviewers": "PmAq;4kwq;tkzC", "pdf_size": 761251, "rating": "5;6;6", "confidence": "3;3;3", "soundness": "3;2;3", "novelty": "2;2;2", "presentation": "2;2;2", "wc_summary": "138;51;101", "wc_strengths": "35;47;71", "wc_weaknesses": "349;281;66", "wc_questions": "9;6;64", "wc_limitations": "15;9;62", "wc_review": "546;394;364", "wc_reply_reviewers": "38;313;18", "wc_reply_authors": "45;787;30", "reply_reviewers": "1;3;1", "reply_authors": "2;4;2", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 96.66666666666667, 35.64952859280034 ], "wc_strengths_avg": [ 51.0, 14.966629547095765 ], "wc_weaknesses_avg": [ 232.0, 120.61785384704316 ], "wc_questions_avg": [ 26.333333333333332, 26.662499674428297 ], "wc_limitations_avg": [ 28.666666666666668, 23.697163449568293 ], "wc_review_avg": [ 434.6666666666667, 79.67154796761236 ], "wc_reply_reviewers_avg": [ 123.0, 134.59816739713312 ], "wc_reply_authors_avg": [ 287.3333333333333, 353.37075273554956 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.9428090415820634 ], "reply_authors_avg": [ 2.6666666666666665, 0.9428090415820634 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12329165020275015237&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "microsoft.com;utexas.edu;utexas.edu", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Microsoft;University of Texas at Austin", "aff_unique_dep": "Microsoft AI;", "aff_unique_url": "https://www.microsoft.com;https://www.utexas.edu", "aff_unique_abbr": "Microsoft;UT Austin", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Austin", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Unveiling User Satisfaction and Creator Productivity Trade-Offs in Recommendation Platforms", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95779", "id": "IGAN7RldcF", "proceeding": "", "pdf": "https://openreview.net/pdf?id=IGAN7RldcF", "openreview": "https://openreview.net/forum?id=IGAN7RldcF", "poster": "/media/PosterPDFs/NeurIPS%202024/95779.png?t=1731526835.950306", "project": "", "author_site": "Fan Yao, Yiming Liao, Jingzhou Liu, Shaoliang Nie, Qifan Wang, Haifeng Xu, Hongning Wang", "tldr": "", "abstract": "On User-Generated Content (UGC) platforms, recommendation algorithms significantly impact creators' motivation to produce content as they compete for algorithmically allocated user traffic. This phenomenon subtly shapes the volume and diversity of the content pool, which is crucial for the platform's sustainability. In this work, we demonstrate, both theoretically and empirically, that a purely relevance-driven policy with low exploration strength boosts short-term user satisfaction but undermines the long-term richness of the content pool. In contrast, a more aggressive exploration policy may slightly compromise user satisfaction but promote higher content creation volume. Our findings reveal a fundamental trade-off between immediate user satisfaction and overall content production on UGC platforms. Building on this finding, we propose an efficient optimization method to identify the optimal exploration strength, balancing user and creator engagement. Our model can serve as a pre-deployment audit tool for recommendation algorithms on UGC platforms, helping to align their immediate objectives with sustainable, long-term goals.", "keywords": "Mechanism Design;Content Recommendation Platform;Content Creator Competition Game;Creator Productivity", "primary_area": "algorithmic_game_theory", "supplementary_material": "/attachment/924b16fd8542a68a78b498eebdbdbc937447cbcf.zip", "author": "Fan Yao;Yiming Liao;Jingzhou Liu;Shaoliang Nie;Qifan Wang;Haifeng Xu;Hongning Wang", "authorids": "~Fan_Yao2;~Yiming_Liao1;~Jingzhou_Liu1;~Shaoliang_Nie1;~Qifan_Wang2;~Haifeng_Xu1;~Hongning_Wang1", "gender": "M;M;M;M;M;M;M", "homepage": "https://github.com/MarcusYF/MarcusYF.github.io;;;https://snie2012.github.io;https://wqfcr.github.io/;http://www.haifeng-xu.com/;http://www.cs.virginia.edu/~hw5x/", "dblp": ";203/0243.html;61/11308;213/7860;33/8610;04/1895;05/6545", "google_scholar": "Vb4MZPMAAAAJ;https://scholar.google.com/citations?hl=en;;https://scholar.google.com/citations?hl=en;LrSyLosAAAAJ;nLgg388AAAAJ;qkdvKNoAAAAJ", "orcid": "0009-0006-4764-4198;;;;0000-0002-7570-5756;;0000-0002-6524-9195", "linkedin": ";yiming-liao-0382359a;;shaoliang-nie/;;;", "or_profile": "~Fan_Yao2;~Yiming_Liao1;~Jingzhou_Liu1;~Shaoliang_Nie1;~Qifan_Wang2;~Haifeng_Xu1;~Hongning_Wang1", "aff": "Meta Facebook;Meta;Meta;Meta Inc;Meta AI;University of Chicago;Tsinghua University", "aff_domain": "meta.com;meta.com;meta.com;meta.com;fb.com;cs.uchicago.edu;tsinghua.edu.cn", "position": "Intern;Research Scientist;Researcher;Researcher;Principal Researcher;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nyao2024unveiling,\ntitle={Unveiling User Satisfaction and Creator Productivity Trade-Offs in Recommendation Platforms},\nauthor={Fan Yao and Yiming Liao and Jingzhou Liu and Shaoliang Nie and Qifan Wang and Haifeng Xu and Hongning Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=IGAN7RldcF}\n}", "github": "", "reviewers": "1ynk;GVPh;1Tuv", "pdf_size": 1052501, "rating": "4;6;7", "confidence": "3;3;4", "soundness": "2;3;3", "novelty": "2;2;3", "presentation": "3;3;4", "wc_summary": "79;89;110", "wc_strengths": "109;19;139", "wc_weaknesses": "128;302;228", "wc_questions": "36;72;147", "wc_limitations": "28;6;41", "wc_review": "380;488;665", "wc_reply_reviewers": "0;26;103", "wc_reply_authors": "57;70;75", "reply_reviewers": "0;1;1", "reply_authors": "2;2;2", "rating_avg": [ 5.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 92.66666666666667, 12.918548250050733 ], "wc_strengths_avg": [ 89.0, 50.99019513592785 ], "wc_weaknesses_avg": [ 219.33333333333334, 71.29905718560067 ], "wc_questions_avg": [ 85.0, 46.238512086787566 ], "wc_limitations_avg": [ 25.0, 14.445299120013633 ], "wc_review_avg": [ 511.0, 117.48191350161096 ], "wc_reply_reviewers_avg": [ 43.0, 43.734044709661454 ], "wc_reply_authors_avg": [ 67.33333333333333, 7.586537784494029 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.7559289460184545, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12320290916810070933&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "meta.com;meta.com;meta.com;meta.com;fb.com;cs.uchicago.edu;tsinghua.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;0;0;1;2", "aff_unique_norm": "Meta;University of Chicago;Tsinghua University", "aff_unique_dep": "Meta Platforms, Inc.;;", "aff_unique_url": "https://meta.com;https://www.uchicago.edu;https://www.tsinghua.edu.cn", "aff_unique_abbr": "Meta;UChicago;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;1", "aff_country_unique": "United States;China" }, { "title": "OpenDlign: Open-World Point Cloud Understanding with Depth-Aligned Images", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95778", "id": "IGCaTQ4n1R", "proceeding": "", "pdf": "https://openreview.net/pdf?id=IGCaTQ4n1R", "openreview": "https://openreview.net/forum?id=IGCaTQ4n1R", "poster": "/media/PosterPDFs/NeurIPS%202024/95778.png?t=1731585706.0182023", "project": "", "author_site": "Ye Mao, JUNPENG JING, Krystian Mikolajczyk", "tldr": "", "abstract": "Recent open-world 3D representation learning methods using Vision-Language Models (VLMs) to align 3D point clouds with image-text information have shown superior 3D zero-shot performance. However, CAD-rendered images for this alignment often lack realism and texture variation, compromising alignment robustness. Moreover, the volume discrepancy between 3D and 2D pretraining datasets highlights the need for effective strategies to transfer the representational abilities of VLMs to 3D learning. In this paper, we present OpenDlign, a novel open-world 3D model using depth-aligned images generated from a diffusion model for robust multimodal alignment. These images exhibit greater texture diversity than CAD renderings due to the stochastic nature of the diffusion model. By refining the depth map projection pipeline and designing depth-specific prompts, OpenDlign leverages rich knowledge in pre-trained VLM for 3D representation learning with streamlined fine-tuning. Our experiments show that OpenDlign achieves high zero-shot and few-shot performance on diverse 3D tasks, despite only fine-tuning 6 million parameters on a limited ShapeNet dataset. In zero-shot classification, OpenDlign surpasses previous models by 8.0\\% on ModelNet40 and 16.4\\% on OmniObject3D. Additionally, using depth-aligned images for multimodal alignment consistently enhances the performance of other state-of-the-art models.", "keywords": "Open-World;3D Representation Learning;3D Shape Understanding", "primary_area": "machine_vision", "supplementary_material": "/attachment/9621f39b1930040a1a9b5beaef3eb37835df0eef.zip", "author": "Ye Mao;Junpeng Jing;Krystian Mikolajczyk", "authorids": "~Ye_Mao1;~Junpeng_Jing1;~Krystian_Mikolajczyk3", "gender": "M;M;", "homepage": "https://yebulabula.github.io/;https://tomtomtommi.github.io/;http://www.imperial.ac.uk/people/k.mikolajczyk", "dblp": "155/6947;315/5225;96/433", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com.hk/citations?user=OefyYf0AAAAJ;https://scholar.google.co.uk/citations?user=s1IAWfgAAAAJ", "orcid": "0009-0004-7853-5993;0000-0001-5669-8573;", "linkedin": ";;", "or_profile": "~Ye_Mao1;~Junpeng_Jing1;~Krystian_Mikolajczyk3", "aff": "Imperial College London;Imperial College London;Imperial College London", "aff_domain": "ic.ac.uk;imperial.ac.uk;imperial.ac.uk", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nmao2024opendlign,\ntitle={OpenDlign: Open-World Point Cloud Understanding with Depth-Aligned Images},\nauthor={Ye Mao and Junpeng Jing and Krystian Mikolajczyk},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=IGCaTQ4n1R}\n}", "github": "", "reviewers": "kzvy;XK9f;tXWH;ky3c", "pdf_size": 5406472, "rating": "5;5;5;7", "confidence": "4;3;3;4", "soundness": "2;2;3;3", "novelty": "2;3;3;3", "presentation": "2;3;2;3", "wc_summary": "37;104;65;78", "wc_strengths": "21;103;55;119", "wc_weaknesses": "85;237;236;84", "wc_questions": "6;9;26;157", "wc_limitations": "1;15;1;8", "wc_review": "150;468;383;446", "wc_reply_reviewers": "33;106;18;73", "wc_reply_authors": "0;61;46;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;2;2;1", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 71.0, 24.13503677229434 ], "wc_strengths_avg": [ 74.5, 38.84263121880391 ], "wc_weaknesses_avg": [ 160.5, 76.00164471904539 ], "wc_questions_avg": [ 49.5, 62.53199181219162 ], "wc_limitations_avg": [ 6.25, 5.80409338312195 ], "wc_review_avg": [ 361.75, 126.17126257591306 ], "wc_reply_reviewers_avg": [ 57.5, 34.471002306286366 ], "wc_reply_authors_avg": [ 26.75, 27.270634389394026 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15913104911142575981&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 2, "email": "ic.ac.uk;imperial.ac.uk;imperial.ac.uk", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Imperial College London", "aff_unique_dep": "", "aff_unique_url": "https://www.imperial.ac.uk", "aff_unique_abbr": "ICL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Provable Editing of Deep Neural Networks using Parametric Linear Relaxation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95777", "id": "IGhpUd496D", "proceeding": "", "pdf": "https://openreview.net/pdf?id=IGhpUd496D", "openreview": "https://openreview.net/forum?id=IGhpUd496D", "poster": "/media/PosterPDFs/NeurIPS%202024/95777.png?t=1734018655.9195552", "project": "", "author_site": "Zhe Tao, Aditya V Thakur", "tldr": "", "abstract": "Ensuring that a DNN satisfies a desired property is critical when deploying DNNs in safety-critical applications. There are efficient methods that can verify whether a DNN satisfies a property, as seen in the annual DNN verification competition (VNN-COMP). However, the problem of provably editing a DNN to satisfy a property remains challenging. We present PREPARED, the first efficient technique for provable editing of DNNs. Given a DNN $\\mathcal{N}$ with parameters $\\theta$, input polytope $P$, and output polytope $Q$, PREPARED finds new parameters $\\theta'$ such that $\\forall \\mathrm{x} \\in P . \\mathcal{N}(\\mathrm{x}; \\theta') \\in Q$ while minimizing the changes $\\lVert{\\theta' - \\theta}\\rVert$. Given a DNN and a property it violates from the VNN-COMP benchmarks, PREPARED is able to provably edit the DNN to satisfy this property within 45 seconds. PREPARED is efficient because it relaxes the NP-hard provable editing problem to solving a linear program. The key contribution is the novel notion of Parametric Linear Relaxation, which enables PREPARED to construct tight output bounds of the DNN that are parameterized by the new parameters $\\theta'$. We demonstrate that PREPARED is more efficient and effective compared to prior DNN editing approaches i) using the VNN-COMP benchmarks, ii) by editing CIFAR10 and TinyImageNet image-recognition DNNs, and BERT sentiment-classification DNNs for local robustness, and iii) by training a DNN to model a geodynamics process and satisfy physics constraints.", "keywords": "Provable editing;provable repair;provable training;trustworthiness;linear programming;local robustness;verification", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Zhe Tao;Aditya Thakur", "authorids": "~Zhe_Tao2;~Aditya_Thakur1", "gender": ";M", "homepage": "https://zhe-tao.com/;http://thakur.cs.ucdavis.edu/", "dblp": ";68/1945", "google_scholar": ";https://scholar.google.com.tw/citations?user=x8952qgAAAAJ", "orcid": "0000-0002-4047-699X;0000-0003-3166-1517", "linkedin": ";", "or_profile": "~Zhe_Tao2;~Aditya_Thakur1", "aff": "University of California, Davis;University of California, Davis", "aff_domain": "ucdavis.edu;ucdavis.edu", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\ntao2024provable,\ntitle={Provable Editing of Deep Neural Networks using Parametric Linear Relaxation},\nauthor={Zhe Tao and Aditya Thakur},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=IGhpUd496D}\n}", "github": "", "reviewers": "NEaR;UT9i;hnCj;rPq2", "pdf_size": 744389, "rating": "5;6;7;7", "confidence": "2;5;3;4", "soundness": "3;4;3;4", "novelty": "2;3;3;3", "presentation": "3;3;4;3", "wc_summary": "75;111;97;91", "wc_strengths": "31;104;30;57", "wc_weaknesses": "84;161;47;60", "wc_questions": "36;92;175;70", "wc_limitations": "46;43;8;10", "wc_review": "272;511;357;288", "wc_reply_reviewers": "17;600;18;4", "wc_reply_authors": "0;477;0;0", "reply_reviewers": "1;2;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 93.5, 12.913171570144957 ], "wc_strengths_avg": [ 55.5, 30.02082610455615 ], "wc_weaknesses_avg": [ 88.0, 44.1871021905714 ], "wc_questions_avg": [ 93.25, 51.24146270355678 ], "wc_limitations_avg": [ 26.75, 17.795715776557007 ], "wc_review_avg": [ 357.0, 94.47486438201433 ], "wc_reply_reviewers_avg": [ 159.75, 254.23844614849264 ], "wc_reply_authors_avg": [ 119.25, 206.54705880258862 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.40451991747794525, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:qvyUPhvxlYsJ:scholar.google.com/&scioq=Provable+Editing+of+Deep+Neural+Networks+using+Parametric+Linear+Relaxation&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "ucdavis.edu;ucdavis.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of California, Davis", "aff_unique_dep": "", "aff_unique_url": "https://www.ucdavis.edu", "aff_unique_abbr": "UC Davis", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Davis", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "SAMPa: Sharpness-aware Minimization Parallelized", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95776", "id": "IGn0ktYDwV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=IGn0ktYDwV", "openreview": "https://openreview.net/forum?id=IGn0ktYDwV", "poster": "/media/PosterPDFs/NeurIPS%202024/95776.png?t=1733889321.221525", "project": "", "author_site": "Wanyun Xie, Thomas Pethick, Volkan Cevher", "tldr": "", "abstract": "Sharpness-aware minimization (SAM) has been shown to improve the generalization of neural networks. However, each SAM update requires _sequentially_ computing two gradients, effectively doubling the per-iteration cost compared to base optimizers like SGD. We propose a simple modification of SAM, termed SAMPa, which allows us to fully parallelize the two gradient computations. SAMPa achieves a twofold speedup of SAM under the assumption that communication costs between devices are negligible. Empirical results show that SAMPa ranks among the most efficient variants of SAM in terms of computational time. Additionally, our method consistently outperforms SAM across both vision and language tasks. Notably, SAMPa theoretically maintains convergence guarantees even for _fixed_ perturbation sizes, which is established through a novel Lyapunov function. We in fact arrive at SAMPa by treating this convergence guarantee as a hard requirement---an approach we believe is promising for developing SAM-based methods in general. Our code is available at https://github.com/LIONS-EPFL/SAMPa.", "keywords": "Sharpness aware minimization;efficient learning;generalization;supervised learning;optimization", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Wanyun Xie;Thomas Pethick;Volkan Cevher", "authorids": "~Wanyun_Xie1;~Thomas_Pethick1;~Volkan_Cevher1", "gender": "F;M;M", "homepage": ";https://pethick.dk;http://lions.epfl.ch", "dblp": ";305/4521;70/5301", "google_scholar": "S4rh8MoAAAAJ;;https://scholar.google.ch/citations?user=hlWhzU8AAAAJ", "orcid": ";;", "linkedin": "wanyun-xie-71a287210/;;", "or_profile": "~Wanyun_Xie1;~Thomas_Pethick1;~Volkan_Cevher1", "aff": "EPFL - EPF Lausanne;Swiss Federal Institute of Technology Lausanne;Amazon Development Center Germany", "aff_domain": "epfl.ch;epfl.ch;amazon.de", "position": "PhD student;PhD student;Amazon Scholar", "bibtex": "@inproceedings{\nxie2024sampa,\ntitle={{SAMP}a: Sharpness-aware Minimization Parallelized},\nauthor={Wanyun Xie and Thomas Pethick and Volkan Cevher},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=IGn0ktYDwV}\n}", "github": "", "reviewers": "9hZ5;c6Fm;LruU;zVXP", "pdf_size": 2148769, "rating": "5;5;6;7", "confidence": "4;4;4;3", "soundness": "2;3;3;3", "novelty": "2;2;2;3", "presentation": "3;2;3;3", "wc_summary": "78;109;80;66", "wc_strengths": "47;37;54;69", "wc_weaknesses": "140;164;190;79", "wc_questions": "3;2;59;15", "wc_limitations": "1;22;6;8", "wc_review": "269;334;389;237", "wc_reply_reviewers": "52;29;55;130", "wc_reply_authors": "166;391;172;214", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 83.25, 15.801503093060482 ], "wc_strengths_avg": [ 51.75, 11.648497757221744 ], "wc_weaknesses_avg": [ 143.25, 41.09364306069735 ], "wc_questions_avg": [ 19.75, 23.23117517475171 ], "wc_limitations_avg": [ 9.25, 7.790218225441442 ], "wc_review_avg": [ 307.25, 58.72978375577421 ], "wc_reply_reviewers_avg": [ 66.5, 38.01644381054072 ], "wc_reply_authors_avg": [ 235.75, 91.52151386422757 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17747817037926274020&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 4, "email": "epfl.ch;epfl.ch;amazon.de", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "EPFL;Swiss Federal Institute of Technology Lausanne;Amazon", "aff_unique_dep": ";;Development Center", "aff_unique_url": "https://www.epfl.ch;https://www.epfl.ch;https://www.amazon.de", "aff_unique_abbr": "EPFL;EPFL;Amazon", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Lausanne;", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Switzerland;Germany" }, { "title": "Consistency Models for Scalable and Fast Simulation-Based Inference", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95775", "id": "IHjKpKljyH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=IHjKpKljyH", "openreview": "https://openreview.net/forum?id=IHjKpKljyH", "poster": "/media/PosterPDFs/NeurIPS%202024/95775.png?t=1733125256.9112792", "project": "", "author_site": "Marvin Schmitt, Valentin Pratz, Ullrich K\u00f6the, Paul-Christian B\u00fcrkner, Stefan Radev", "tldr": "", "abstract": "Simulation-based inference (SBI) is constantly in search of more expressive and efficient algorithms to accurately infer the parameters of complex simulation models.\nIn line with this goal, we present consistency models for posterior estimation (CMPE), a new conditional sampler for SBI that inherits the advantages of recent unconstrained architectures and overcomes their sampling inefficiency at inference time.\nCMPE essentially distills a continuous probability flow and enables rapid few-shot inference with an unconstrained architecture that can be flexibly tailored to the structure of the estimation problem.\nWe provide hyperparameters and default architectures that support consistency training over a wide range of different dimensions, including low-dimensional ones which are important in SBI workflows but were previously difficult to tackle even with unconditional consistency models. \nOur empirical evaluation demonstrates that CMPE not only outperforms current state-of-the-art algorithms on hard low-dimensional benchmarks, but also achieves competitive performance with much faster sampling speed on two realistic estimation problems with high data and/or parameter dimensions.", "keywords": "simulation-based inference;amortized inference;machine learning for science;probabilistic modeling;Bayesian inference", "primary_area": "probabilistic_methods", "supplementary_material": "/attachment/8084ea58df93979a3dcc4d70ba616f2cb6465e0b.zip", "author": "Marvin Schmitt;Valentin Pratz;Ullrich Koethe;Paul-Christian B\u00fcrkner;Stefan T. Radev", "authorids": "~Marvin_Schmitt1;~Valentin_Pratz1;~Ullrich_Koethe1;~Paul-Christian_B\u00fcrkner1;~Stefan_T._Radev1", "gender": ";M;M;;M", "homepage": ";;https://hci.iwr.uni-heidelberg.de/vislearn/people/ullrich-koethe/;;https://faculty.rpi.edu/stefan-radev", "dblp": ";340/7444;15/809;;", "google_scholar": ";;gt-yaNMAAAAJ;;JbDfkRkAAAAJ", "orcid": ";0000-0001-8371-3417;0000-0001-6036-1287;;0000-0002-6702-9559", "linkedin": ";;;;stefan-radev-21b713187/", "or_profile": "~Marvin_Schmitt1;~Valentin_Pratz1;~Ullrich_Koethe1;~Paul-Christian_B\u00fcrkner1;~Stefan_T._Radev1", "aff": ";Ruprecht-Karls-Universit\u00e4t Heidelberg;Heidelberg University;;Rensselaer Polytechnic Institute", "aff_domain": ";uni-heidelberg.de;uni-heidelberg.de;;epi.edu", "position": ";Undergrad student;Adjunct Professor;;Assistant Professor", "bibtex": "@inproceedings{\nschmitt2024consistency,\ntitle={Consistency Models for Scalable and Fast Simulation-Based Inference},\nauthor={Marvin Schmitt and Valentin Pratz and Ullrich Koethe and Paul-Christian B{\\\"u}rkner and Stefan T. Radev},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=IHjKpKljyH}\n}", "github": "", "reviewers": "iNhZ;GeMw;bX9J;XVP6", "pdf_size": 5521105, "rating": "5;5;6;7", "confidence": "4;4;4;4", "soundness": "3;2;4;4", "novelty": "2;2;2;4", "presentation": "3;3;4;3", "wc_summary": "52;76;33;50", "wc_strengths": "83;42;94;107", "wc_weaknesses": "87;167;108;98", "wc_questions": "431;18;61;76", "wc_limitations": "14;1;9;8", "wc_review": "667;304;305;339", "wc_reply_reviewers": "888;237;41;66", "wc_reply_authors": "2208;496;0;0", "reply_reviewers": "2;4;1;1", "reply_authors": "5;3;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 52.75, 15.31951369985353 ], "wc_strengths_avg": [ 81.5, 24.336187047275914 ], "wc_weaknesses_avg": [ 115.0, 30.92733418838423 ], "wc_questions_avg": [ 146.5, 165.62985841930796 ], "wc_limitations_avg": [ 8.0, 4.636809247747852 ], "wc_review_avg": [ 403.75, 152.63907592749635 ], "wc_reply_reviewers_avg": [ 308.0, 343.2542789245314 ], "wc_reply_authors_avg": [ 676.0, 907.3830503155765 ], "reply_reviewers_avg": [ 2.0, 1.224744871391589 ], "reply_authors_avg": [ 2.5, 1.6583123951777 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10625256466693540427&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": ";uni-heidelberg.de;uni-heidelberg.de;;epi.edu", "author_num": 5, "aff_unique_index": "0;1;2", "aff_unique_norm": "Ruprecht-Karls-Universit\u00e4t Heidelberg;Heidelberg University;Rensselaer Polytechnic Institute", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uni-heidelberg.de/;https://www.uni-heidelberg.de;https://www.rpi.edu", "aff_unique_abbr": "Uni Heidelberg;Uni Heidelberg;RPI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Germany;United States" }, { "title": "Rethinking Decoders for Transformer-based Semantic Segmentation: A Compression Perspective", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95774", "id": "IHjoPnNZb9", "proceeding": "", "pdf": "https://openreview.net/pdf?id=IHjoPnNZb9", "openreview": "https://openreview.net/forum?id=IHjoPnNZb9", "poster": "", "project": "", "author_site": "Qishuai Wen, Chun-Guang Li", "tldr": "", "abstract": "State-of-the-art methods for Transformer-based semantic segmentation typically adopt Transformer decoders that are used to extract additional embeddings from image embeddings via cross-attention, refine either or both types of embeddings via self-attention, and project image embeddings onto the additional embeddings via dot-product. Despite their remarkable success, these empirical designs still lack theoretical justifications or interpretations, thus hindering potentially principled improvements. In this paper, we argue that there are fundamental connections between semantic segmentation and compression, especially between the Transformer decoders and Principal Component Analysis (PCA). From such a perspective, we derive a white-box, fully attentional DEcoder for PrIncipled semantiC segemenTation (DEPICT), with the interpretations as follows: 1) the self-attention operator refines image embeddings to construct an ideal principal subspace that aligns with the supervision and retains most information; 2) the cross-attention operator seeks to find a low-rank approximation of the refined image embeddings, which is expected to be a set of orthonormal bases of the principal subspace and corresponds to the predefined classes; 3) the dot-product operation yields compact representation for image embeddings as segmentation masks. Experiments conducted on dataset ADE20K find that DEPICT consistently outperforms its black-box counterpart, Segmenter, and it is light weight and more robust.", "keywords": "Semantic Segmentation;Transformer;Decoder;Coding Rate;Principal Components", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Qishuai Wen;Chun-Guang Li", "authorids": "~Qishuai_Wen1;~Chun-Guang_Li3", "gender": "M;M", "homepage": ";https://teacher.bupt.edu.cn/cgli", "dblp": "393/2530;75/1999", "google_scholar": "https://scholar.google.com.hk/citations?user=tbbbWEMAAAAJ;KhXx4joAAAAJ", "orcid": ";", "linkedin": "wqs/;chun-guang-li-46923336/", "or_profile": "~Qishuai_Wen1;~Chun-guang_Li1", "aff": "Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications, P.R. China", "aff_domain": "bupt.edu.cn;bupt.edu.cn", "position": "Undergrad student;Associate Professor", "bibtex": "@inproceedings{\nwen2024rethinking,\ntitle={Rethinking Decoders for Transformer-based Semantic Segmentation: Compression is All You Need},\nauthor={Qishuai Wen and Chun-Guang Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=IHjoPnNZb9}\n}", "github": "", "reviewers": "V3V2;MHpG;ZoMh;CNaF", "pdf_size": 18873300, "rating": "5;5;5;6", "confidence": "3;4;3;4", "soundness": "3;2;3;3", "novelty": "3;3;2;3", "presentation": "2;2;3;3", "wc_summary": "69;55;39;47", "wc_strengths": "36;44;38;28", "wc_weaknesses": "212;62;225;72", "wc_questions": "111;119;2;48", "wc_limitations": "26;54;43;1", "wc_review": "454;334;347;196", "wc_reply_reviewers": "45;58;20;0", "wc_reply_authors": "76;63;22;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 52.5, 11.07925990308017 ], "wc_strengths_avg": [ 36.5, 5.722761571129799 ], "wc_weaknesses_avg": [ 142.75, 75.97162299174607 ], "wc_questions_avg": [ 70.0, 47.93224384482746 ], "wc_limitations_avg": [ 31.0, 19.987496091306685 ], "wc_review_avg": [ 332.75, 91.66071950404928 ], "wc_reply_reviewers_avg": [ 30.75, 22.398381637966615 ], "wc_reply_authors_avg": [ 40.25, 30.6135182558294 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:j_9SbmH259IJ:scholar.google.com/&scioq=Rethinking+Decoders+for+Transformer-based+Semantic+Segmentation:+A+Compression+Perspective&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "bupt.edu.cn;bupt.edu.cn", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Beijing University of Posts and Telecommunications", "aff_unique_dep": "", "aff_unique_url": "http://www.bupt.edu.cn/", "aff_unique_abbr": "BUPT", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Beijing", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Piecewise deterministic generative models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95773", "id": "IIoH8bf5BA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=IIoH8bf5BA", "openreview": "https://openreview.net/forum?id=IIoH8bf5BA", "poster": "", "project": "", "author_site": "Andrea Bertazzi, Dario Shariatian, Umut Simsekli, Eric Moulines, Alain Durmus", "tldr": "", "abstract": "We introduce a novel class of generative models based on piecewise deterministic Markov processes (PDMPs), a family of non-diffusive stochastic processes consisting of deterministic motion and random jumps at random times. Similarly to diffusions, such Markov processes admit time reversals that turn out to be PDMPs as well. We apply this observation to three PDMPs considered in the literature: the Zig-Zag process, Bouncy Particle Sampler, and Randomised Hamiltonian Monte Carlo. For these three particular instances, we show that the jump rates and kernels of the corresponding time reversals admit explicit expressions depending on some conditional densities of the PDMP under consideration before and after a jump. Based on these results, we propose efficient training procedures to learn these characteristics and consider methods to approximately simulate the reverse process. Finally, we provide bounds in the total variation distance between the data distribution and the resulting distribution of our model in the case where the base distribution is the standard $d$-dimensional Gaussian distribution. Promising numerical simulations support further investigations into this class of models.", "keywords": "generative modelling;piecewise deterministic Markov processes;time reversals", "primary_area": "generative_models", "supplementary_material": "/attachment/6a3852a180ffc10fff6c1f854f5ef3a8a6405386.zip", "author": "Andrea Bertazzi;Dario Shariatian;Umut Simsekli;Eric Moulines;Alain Oliviero Durmus", "authorids": "~Andrea_Bertazzi1;~Dario_Shariatian1;~Umut_Simsekli1;~Eric_Moulines1;~Alain_Oliviero_Durmus1", "gender": "M;;M;M;M", "homepage": ";http://darioshar.github.io/;https://www.di.ens.fr/~simsekli/;;", "dblp": ";;https://dblp.org/pers/s/Simsekli:Umut.html;54/2358;01/11275", "google_scholar": "g_vK250AAAAJ;KJMzBTwAAAAJ;https://scholar.google.fr/citations?user=CuArAkgAAAAJ;https://scholar.google.fr/citations?user=_XE1LvQAAAAJ;", "orcid": ";;;0000-0002-2058-0693;", "linkedin": ";dario-shariatian-900989204/;;;", "or_profile": "~Andrea_Bertazzi1;~Dario_Shariatian1;~Umut_Simsekli1;~Eric_Moulines1;~Alain_Durmus1", "aff": "\u00c9cole Polytechnique;INRIA;INRIA;Ecole polytechnique;\u00c9cole Polytechnique", "aff_domain": "polytechnique.edu;inria.fr;inria.fr;polytechnique.edu;polytechnique.fr", "position": "Postdoc;PhD student;Research Faculty;Full Professor;Full Professor", "bibtex": "@inproceedings{\nbertazzi2024piecewise,\ntitle={Piecewise deterministic generative models},\nauthor={Andrea Bertazzi and Dario Shariatian and Umut Simsekli and Eric Moulines and Alain Oliviero Durmus},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=IIoH8bf5BA}\n}", "github": "", "reviewers": "RL5x;jjQr;a5JX;LXdU;FgP1", "pdf_size": 909888, "rating": "6;7;7;7;7", "confidence": "4;4;5;3;4", "soundness": "3;3;4;3;3", "novelty": "4;3;4;3;4", "presentation": "2;2;4;3;2", "wc_summary": "74;311;224;88;221", "wc_strengths": "25;32;445;49;191", "wc_weaknesses": "63;218;448;84;482", "wc_questions": "105;1;104;69;239", "wc_limitations": "3;1;1;1;7", "wc_review": "270;563;1222;291;1140", "wc_reply_reviewers": "0;0;33;11;89", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.8, 0.39999999999999997 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.8 ], "wc_summary_avg": [ 183.6, 89.90350382493443 ], "wc_strengths_avg": [ 148.4, 160.27676063609474 ], "wc_weaknesses_avg": [ 259.0, 176.73256632550778 ], "wc_questions_avg": [ 103.6, 77.53605612874567 ], "wc_limitations_avg": [ 2.6, 2.3323807579381204 ], "wc_review_avg": [ 697.2, 409.1441799659382 ], "wc_reply_reviewers_avg": [ 26.6, 33.44607600302313 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16051730879050953874&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "polytechnique.edu;inria.fr;inria.fr;polytechnique.edu;polytechnique.fr", "author_num": 5, "aff_unique_index": "0;1;1;0;0", "aff_unique_norm": "Ecole Polytechnique;INRIA", "aff_unique_dep": ";", "aff_unique_url": "https://www.polytechnique.edu;https://www.inria.fr", "aff_unique_abbr": "X;INRIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "France" }, { "title": "Inflationary Flows: Calibrated Bayesian Inference with Diffusion-Based Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95772", "id": "IM4LtYRWdE", "proceeding": "", "pdf": "https://openreview.net/pdf?id=IM4LtYRWdE", "openreview": "https://openreview.net/forum?id=IM4LtYRWdE", "poster": "/media/PosterPDFs/NeurIPS%202024/95772.png?t=1731596067.2041218", "project": "", "author_site": "Daniela de Albuquerque, John Pearson", "tldr": "", "abstract": "Beyond estimating parameters of interest from data, one of the key goals of statistical inference is to properly quantify uncertainty in these estimates. In Bayesian inference, this uncertainty is provided by the posterior distribution, the computation of which typically involves an intractable high-dimensional integral. Among available approximation methods, sampling-based approaches come with strong theoretical guarantees but scale poorly to large problems, while variational approaches scale well but offer few theoretical guarantees. In particular, variational methods are known to produce overconfident estimates of posterior uncertainty and are typically non-identifiable, with many latent variable configurations generating equivalent predictions. Here, we address these challenges by showing how diffusion-based models (DBMs), which have recently produced state-of-the-art performance in generative modeling tasks, can be repurposed for performing calibrated, identifiable Bayesian inference. By exploiting a previously established connection between the stochastic and probability flow ordinary differential equations (pfODEs) underlying DBMs, we derive a class of models, \\emph{inflationary flows,} that uniquely and deterministically map high-dimensional data to a lower-dimensional Gaussian distribution via ODE integration. This map is both invertible and neighborhood-preserving, with controllable numerical error, with the result that uncertainties in the data are correctly propagated to the latent space. We demonstrate how such maps can be learned via standard DBM training using a novel noise schedule and are effective at both preserving and reducing intrinsic data dimensionality. The result is a class of highly expressive generative models, uniquely defined on a low-dimensional latent space, that afford principled Bayesian inference.", "keywords": "Deep Generative Models;Diffusion-Based Models;Probability Flow ODE;Inference;Bayesian Inference;Calibrated Inference;Compression;Dimension Reduction", "primary_area": "probabilistic_methods", "supplementary_material": "/attachment/ed2df92ef74124144976632f393fce4847f800b6.zip", "author": "Daniela F De Albuquerque;John Pearson", "authorids": "~Daniela_F_De_Albuquerque1;~John_Pearson1", "gender": "F;M", "homepage": "https://dannyfa.github.io//;https://pearsonlab.github.io", "dblp": "319/1284;21/9149", "google_scholar": ";4whjDosAAAAJ", "orcid": ";0000-0002-9876-7837", "linkedin": ";", "or_profile": "~Daniela_F_De_Albuquerque1;~John_Michael_Pearson1", "aff": "Duke University;Duke University", "aff_domain": "duke.edu;duke.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nalbuquerque2024inflationary,\ntitle={Inflationary Flows: Calibrated Bayesian Inference with Diffusion-Based Models},\nauthor={Daniela F De Albuquerque and John Pearson},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=IM4LtYRWdE}\n}", "github": "", "reviewers": "13gp;y6tA;pn8r;v9FZ", "pdf_size": 16801318, "rating": "5;6;7;7", "confidence": "3;4;3;4", "soundness": "2;4;3;4", "novelty": "2;4;3;3", "presentation": "2;4;3;4", "wc_summary": "45;69;119;94", "wc_strengths": "74;35;74;155", "wc_weaknesses": "341;246;233;316", "wc_questions": "80;33;29;88", "wc_limitations": "4;6;11;168", "wc_review": "544;389;466;821", "wc_reply_reviewers": "38;55;215;0", "wc_reply_authors": "13;27;30;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 81.75, 27.616797424755827 ], "wc_strengths_avg": [ 84.5, 43.706406853000395 ], "wc_weaknesses_avg": [ 284.0, 45.601535061881414 ], "wc_questions_avg": [ 57.5, 26.688012290165037 ], "wc_limitations_avg": [ 47.25, 69.7616477729705 ], "wc_review_avg": [ 555.0, 163.05980497964543 ], "wc_reply_reviewers_avg": [ 77.0, 82.12490487056895 ], "wc_reply_authors_avg": [ 17.5, 11.968709203585824 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8011268335150459012&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "duke.edu;duke.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Duke University", "aff_unique_dep": "", "aff_unique_url": "https://www.duke.edu", "aff_unique_abbr": "Duke", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "A Comprehensive Analysis on the Learning Curve in Kernel Ridge Regression", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95771", "id": "IMlDpZmLnL", "proceeding": "", "pdf": "https://openreview.net/pdf?id=IMlDpZmLnL", "openreview": "https://openreview.net/forum?id=IMlDpZmLnL", "poster": "/media/PosterPDFs/NeurIPS%202024/95771.png?t=1729114147.3789184", "project": "", "author_site": "Tin Sum Cheng, Aurelien Lucchi, Anastasis Kratsios, David Belius", "tldr": "", "abstract": "This paper conducts a comprehensive study of the learning curves of kernel ridge regression (KRR) under minimal assumptions.\nOur contributions are three-fold: 1) we analyze the role of key properties of the kernel, such as its spectral eigen-decay, the characteristics of the eigenfunctions, and the smoothness of the kernel; 2) we demonstrate the validity of the Gaussian Equivalent Property (GEP), which states that the generalization performance of KRR remains the same when the whitened features are replaced by standard Gaussian vectors, thereby shedding light on the success of previous analyzes under the Gaussian Design Assumption; 3) we derive novel bounds that improve over existing bounds across a broad range of setting such as (in)dependent feature vectors and various combinations of eigen-decay rates in the over/underparameterized regimes.", "keywords": "kernel ridge regression;generalization", "primary_area": "learning_theory", "supplementary_material": "/attachment/912ddae7bb28fefca97551801b137230e358f9ab.zip", "author": "Tin Sum Cheng;Aurelien Lucchi;Anastasis Kratsios;David Belius", "authorids": "~Tin_Sum_Cheng1;~Aurelien_Lucchi1;~Anastasis_Kratsios1;~David_Belius1", "gender": "M;M;Non-Binary;", "homepage": ";http://people.inf.ethz.ch/alucchi/;https://anastasiskratsios.github.io/;https://davidbelius.github.io/", "dblp": ";14/5780;;", "google_scholar": "5wfAh9kAAAAJ;https://scholar.google.ch/citations?user=V1ONSgIAAAAJ;https://scholar.google.ca/citations?user=9D-bHFgAAAAJ;", "orcid": "0000-0002-3000-311X;;0000-0001-6791-3371;0000-0003-3706-043X", "linkedin": "tin-sum-cheng;;anastasiskratsios/;", "or_profile": "~Tin_Sum_Cheng1;~Aurelien_Lucchi1;~Anastasis_Kratsios1;~David_Belius1", "aff": "University of Basel;University of Basel;McMaster University;UniDistance Suisse", "aff_domain": "unibas.ch;unibas.ch;mcmaster.ca;unidistance.ch", "position": "PhD student;Assistant Professor;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\ncheng2024a,\ntitle={A Comprehensive Analysis on the Learning Curve in Kernel Ridge Regression},\nauthor={Tin Sum Cheng and Aurelien Lucchi and Anastasis Kratsios and David Belius},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=IMlDpZmLnL}\n}", "github": "", "reviewers": "xgGe;v1Hz;x66w;8wF6", "pdf_size": 898689, "rating": "5;6;7;7", "confidence": "3;3;2;4", "soundness": "2;3;3;3", "novelty": "2;3;3;4", "presentation": "2;3;3;3", "wc_summary": "18;72;80;110", "wc_strengths": "17;67;127;92", "wc_weaknesses": "123;30;99;140", "wc_questions": "83;449;15;2", "wc_limitations": "1;9;1;1", "wc_review": "242;627;322;345", "wc_reply_reviewers": "95;18;10;8", "wc_reply_authors": "71;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 70.0, 33.19638534539566 ], "wc_strengths_avg": [ 75.75, 40.05855089740516 ], "wc_weaknesses_avg": [ 98.0, 41.87481343242021 ], "wc_questions_avg": [ 137.25, 182.59843235909776 ], "wc_limitations_avg": [ 3.0, 3.4641016151377544 ], "wc_review_avg": [ 384.0, 145.41148510348143 ], "wc_reply_reviewers_avg": [ 32.75, 36.134298111351214 ], "wc_reply_authors_avg": [ 17.75, 30.74390183434757 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14694246273871941493&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 3, "email": "unibas.ch;unibas.ch;mcmaster.ca;unidistance.ch", "author_num": 4, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "University of Basel;McMaster University;UniDistance Suisse", "aff_unique_dep": ";;", "aff_unique_url": "https://www.unibas.ch;https://www.mcmaster.ca;https://www.unidistance.ch", "aff_unique_abbr": "UniBas;McMaster;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Switzerland;Canada" }, { "title": "TimeXer: Empowering Transformers for Time Series Forecasting with Exogenous Variables", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95770", "id": "INAeUQ04lT", "proceeding": "", "pdf": "https://openreview.net/pdf?id=INAeUQ04lT", "openreview": "https://openreview.net/forum?id=INAeUQ04lT", "poster": "/media/PosterPDFs/NeurIPS%202024/95770.png?t=1731568804.236681", "project": "", "author_site": "Yuxuan Wang, Haixu Wu, Jiaxiang Dong, Guo Qin, Haoran Zhang, Yong Liu, Yun-Zhong Qiu, Jianmin Wang, Mingsheng Long", "tldr": "", "abstract": "Deep models have demonstrated remarkable performance in time series forecasting. However, due to the partially-observed nature of real-world applications, solely focusing on the target of interest, so-called endogenous variables, is usually insufficient to guarantee accurate forecasting. Notably, a system is often recorded into multiple variables, where the exogenous variables can provide valuable external information for endogenous variables. Thus, unlike well-established multivariate or univariate forecasting paradigms that either treat all the variables equally or ignore exogenous information, this paper focuses on a more practical setting: time series forecasting with exogenous variables. We propose a novel approach, TimeXer, to ingest external information to enhance the forecasting of endogenous variables. With deftly designed embedding layers, TimeXer empowers the canonical Transformer with the ability to reconcile endogenous and exogenous information, where patch-wise self-attention and variate-wise cross-attention are used simultaneously. Moreover, global endogenous tokens are learned to effectively bridge the causal information underlying exogenous series into endogenous temporal patches. Experimentally, TimeXer achieves consistent state-of-the-art performance on twelve real-world forecasting benchmarks and exhibits notable generality and scalability. Code is available at this repository: https://github.com/thuml/TimeXer.", "keywords": "Transformer;Time Series Forecasting;Exogenous Variable", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/08dff72696e4869778cf0b75de0ac950f6cae521.zip", "author": "Yuxuan Wang;Haixu Wu;Jiaxiang Dong;Guo Qin;Haoran Zhang;Yong Liu;Yunzhong Qiu;Jianmin Wang;Mingsheng Long", "authorids": "~Yuxuan_Wang5;~Haixu_Wu1;~Jiaxiang_Dong1;~Guo_Qin1;~Haoran_Zhang9;~Yong_Liu15;~Yunzhong_Qiu1;~Jianmin_Wang1;~Mingsheng_Long5", "gender": ";M;;M;M;;M;M;", "homepage": ";;;;https://www.thss.tsinghua.edu.cn/;;;https://www.thss.tsinghua.edu.cn/en/faculty/jianminwang.htm;", "dblp": ";286/8115;;;;;;06/3456-1.html;", "google_scholar": ";oLL_x0wAAAAJ;;;;;0kiLvzkAAAAJ;https://scholar.google.com.tw/citations?user=MiovcboAAAAJ;", "orcid": "0000-0002-4899-4716;;;0009-0003-7468-0475;0009-0004-3245-459X;;0009-0003-1034-1140;0000-0001-6841-7943;", "linkedin": ";;;;;;;;", "or_profile": "~Yuxuan_Wang5;~Haixu_Wu1;~Jiaxiang_Dong1;~Guo_Qin1;~Haoran_Zhang9;~Yong_Liu15;~Yunzhong_Qiu1;~Jianmin_Wang1;~Mingsheng_Long5", "aff": "Tsinghua University;Tsinghua University;;Tsinghua University;Tsinghua University;;South China University of Technology;Tsinghua University;", "aff_domain": "mail.tsinghua.edu.cn;tsinghua.edu.cn;;tsinghua.edu.cn;mails.tsinghua.edu.cn;;scut.edu.cn;tsinghua.edu.cn;", "position": "PhD student;PhD student;;Undergrad student;Undergrad student;;Undergrad student;Full Professor;", "bibtex": "@inproceedings{\nwang2024timexer,\ntitle={TimeXer: Empowering Transformers for Time Series Forecasting with Exogenous Variables},\nauthor={Yuxuan Wang and Haixu Wu and Jiaxiang Dong and Guo Qin and Haoran Zhang and Yong Liu and Yunzhong Qiu and Jianmin Wang and Mingsheng Long},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=INAeUQ04lT}\n}", "github": "", "reviewers": "1zKu;Mh7v;r8UR;N2A5", "pdf_size": 10978793, "rating": "4;6;6;8", "confidence": "4;5;4;4", "soundness": "3;3;3;3", "novelty": "2;3;2;3", "presentation": "2;3;3;4", "wc_summary": "36;21;45;58", "wc_strengths": "68;40;82;58", "wc_weaknesses": "234;105;163;17", "wc_questions": "61;48;132;59", "wc_limitations": "13;7;4;26", "wc_review": "412;221;426;218", "wc_reply_reviewers": "0;76;41;59", "wc_reply_authors": "0;102;0;0", "reply_reviewers": "0;1;1;2", "reply_authors": "1;2;1;1", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 40.0, 13.47219358530748 ], "wc_strengths_avg": [ 62.0, 15.297058540778355 ], "wc_weaknesses_avg": [ 129.75, 79.52790390799949 ], "wc_questions_avg": [ 75.0, 33.279122584587476 ], "wc_limitations_avg": [ 12.5, 8.440971508067067 ], "wc_review_avg": [ 319.25, 99.87836352283712 ], "wc_reply_reviewers_avg": [ 44.0, 28.257742301889582 ], "wc_reply_authors_avg": [ 25.5, 44.16729559300637 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 60, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17077474193538824533&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "mail.tsinghua.edu.cn;tsinghua.edu.cn;;tsinghua.edu.cn;mails.tsinghua.edu.cn;;scut.edu.cn;tsinghua.edu.cn;", "author_num": 9, "aff_unique_index": "0;0;0;0;1;0", "aff_unique_norm": "Tsinghua University;South China University of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.scut.edu.cn", "aff_unique_abbr": "THU;SCUT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Combining Observational Data and Language for Species Range Estimation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95769", "id": "IOKLUxB05h", "proceeding": "", "pdf": "https://openreview.net/pdf?id=IOKLUxB05h", "openreview": "https://openreview.net/forum?id=IOKLUxB05h", "poster": "", "project": "", "author_site": "Max Hamilton, Christian Lange, Elijah Cole, Alexander Shepard, Samuel Heinrich, Oisin Mac Aodha, Grant Van Horn, Subhransu Maji", "tldr": "", "abstract": "Species range maps (SRMs) are essential tools for research and policy-making in ecology, conservation, and environmental management. However, traditional SRMs rely on the availability of environmental covariates and high-quality observational data, both of which can be challenging to obtain due to geographic inaccessibility and resource constraints. We propose a novel approach combining millions of citizen science species observations with textual descriptions from Wikipedia, covering habitat preferences and range descriptions for tens of thousands of species. Our framework maps location, species, and text descriptions into a common space, facilitating the learning of rich spatial covariates at a global scale and enabling zero-shot range estimation from textual descriptions. Evaluated on held-out species, our zero-shot SRMs significantly outperform baselines and match the performance of SRMs obtained using tens of observations. Our approach also acts as a strong prior when combined with observational data, resulting in more accurate range estimation with less data. We present extensive quantitative and qualitative analyses of the learned representations in the context of range estimation and other spatial tasks, demonstrating the effectiveness of our approach.", "keywords": "Species range estimation;zero-shot learning;few-shot learning;implicit networks", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "", "author": "Max Hamilton;Christian Lange;Elijah Cole;Alexander Shepard;Samuel Heinrich;Oisin Mac Aodha;Grant Van Horn;Subhransu Maji", "authorids": "~Max_Hamilton1;~Christian_Lange1;~Elijah_Cole1;~Alexander_Shepard1;~Samuel_Heinrich1;~Oisin_Mac_Aodha4;~Grant_Van_Horn1;~Subhransu_Maji1", "gender": "M;M;M;M;M;M;M;M", "homepage": ";https://chris-lange.github.io/;https://elijahcole.me/;;https://www.birds.cornell.edu/home/staff/sam-heinrich/;https://gvh.codes/;https://people.cs.umass.edu/~smaji/;https://homepages.inf.ed.ac.uk/omacaod/", "dblp": "271/0431;;195/2520;04/11130;;144/8033;92/6598;90/8653", "google_scholar": "tYerDBYAAAAJ;ibwKxpwAAAAJ;-atuVWQAAAAJ;;;PxYY_nsAAAAJ;l7Qx0zAAAAAJ;IfZBjkUAAAAJ", "orcid": "0009-0001-2656-4299;0009-0008-3907-5057;0000-0001-6623-0966;;;0000-0003-2953-9651;0000-0002-3869-9334;0000-0002-5787-5073", "linkedin": ";christian-lange-38a24a2a9/;elicole/;;;;;oisin-mac-aodha-406273273/", "or_profile": "~Max_Hamilton1;~Christian_Lange1;~Elijah_Cole1;~Alexander_Shepard1;~Samuel_Heinrich1;~Grant_Van_Horn1;~Subhransu_Maji1;~Oisin_Mac_Aodha2", "aff": "University of Massachusetts at Amherst;University of Edinburgh, University of Edinburgh;Altos Labs;iNaturalist;Cornell University;University of Massachusetts at Amherst;University of Massachusetts at Amherst;University of Edinburgh, University of Edinburgh", "aff_domain": "umass.edu;ed.ac.uk;altoslabs.com;inaturalist.org;cornell.edu;umass.edu;cs.umass.edu;ed.ac.uk", "position": "PhD student;PhD student;Researcher;Software Developer;Researcher;Assistant Professor;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nhamilton2024combining,\ntitle={Combining Observational Data and Language for Species Range Estimation},\nauthor={Max Hamilton and Christian Lange and Elijah Cole and Alexander Shepard and Samuel Heinrich and Oisin Mac Aodha and Grant Van Horn and Subhransu Maji},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=IOKLUxB05h}\n}", "github": "", "reviewers": "CYPb;A4uw;2RN9;9uUS;q7Tg", "pdf_size": 8645306, "rating": "5;6;6;7;8", "confidence": "5;4;4;5;3", "soundness": "3;3;3;4;3", "novelty": "3;2;2;4;3", "presentation": "4;3;3;3;3", "wc_summary": "152;66;105;52;131", "wc_strengths": "113;28;65;54;133", "wc_weaknesses": "298;48;558;155;23", "wc_questions": "177;93;77;37;233", "wc_limitations": "8;6;9;16;79", "wc_review": "748;241;814;314;599", "wc_reply_reviewers": "62;42;23;15;13", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.4, 1.0198039027185568 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 101.2, 37.796296114831144 ], "wc_strengths_avg": [ 78.6, 38.71227195606065 ], "wc_weaknesses_avg": [ 216.4, 196.4083501279923 ], "wc_questions_avg": [ 123.4, 71.31227103381296 ], "wc_limitations_avg": [ 23.6, 27.90412155936825 ], "wc_review_avg": [ 543.2, 229.01825254769543 ], "wc_reply_reviewers_avg": [ 31.0, 18.579558659989747 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.6289709020331512, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5673167237926539285&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "umass.edu;ed.ac.uk;altoslabs.com;inaturalist.org;cornell.edu;umass.edu;cs.umass.edu;ed.ac.uk", "author_num": 8, "aff_unique_index": "0;1;2;3;4;0;0;1", "aff_unique_norm": "University of Massachusetts Amherst;University of Edinburgh;Altos Labs;iNaturalist;Cornell University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.umass.edu;https://www.ed.ac.uk;https://altoslabs.com;https://www.inaturalist.org;https://www.cornell.edu", "aff_unique_abbr": "UMass Amherst;Edinburgh;;iNaturalist;Cornell", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Amherst;", "aff_country_unique_index": "0;1;0;0;0;0;0;1", "aff_country_unique": "United States;United Kingdom" }, { "title": "Multimodal Large Language Models Make Text-to-Image Generative Models Align Better", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95768", "id": "IRXyPm9IPW", "proceeding": "", "pdf": "https://openreview.net/pdf?id=IRXyPm9IPW", "openreview": "https://openreview.net/forum?id=IRXyPm9IPW", "poster": "", "project": "", "author_site": "Xun Wu, Shaohan Huang, Guolong Wang, Jing Xiong, Furu Wei", "tldr": "", "abstract": "Recent studies have demonstrated the exceptional potentials of leveraging human preference datasets to refine text-to-image generative models, enhancing the alignment between generated images and textual prompts. Despite these advances, current human preference datasets are either prohibitively expensive to construct or suffer from a lack of diversity in preference dimensions, resulting in limited applicability for instruction tuning in open-source text-to-image generative models and hinder further exploration. To address these challenges and promote the alignment of generative models through instruction tuning, we leverage multimodal large language models to create VisionPrefer, a high-quality and fine-grained preference dataset that captures multiple preference aspects. We aggregate feedback from AI annotators across four aspects: prompt-following, aesthetic, fidelity, and harmlessness to construct VisionPrefer. To validate the effectiveness of VisionPrefer, we train a reward model VP-Score over VisionPrefer to guide the training of text-to-image generative models and the preference prediction accuracy of VP-Score is comparable to human annotators. Furthermore, we use two reinforcement learning methods to supervised fine-tune generative models to evaluate the performance of VisionPrefer, and extensive experimental results demonstrate that VisionPrefer significantly improves text-image alignment in compositional image generation across diverse aspects, e.g., aesthetic, and generalizes better than previous human-preference metrics across various image distributions. Moreover, VisionPrefer indicates that the integration of AI-generated synthetic data as a supervisory signal is a promising avenue for achieving improved alignment with human preferences in vision generative models.", "keywords": "Text-to-Image Generation;Reinforcement Learning from AI Feedback;Multimodal Large Language Model", "primary_area": "generative_models", "supplementary_material": "", "author": "Xun Wu;Shaohan Huang;Guolong Wang;Jing Xiong;Furu Wei", "authorids": "~Xun_Wu1;~Shaohan_Huang1;~Guolong_Wang1;~Jing_Xiong4;~Furu_Wei1", "gender": "M;M;;M;M", "homepage": "https://github.com/Yu-shui;;https://dieuroi.github.io/;https://www.microsoft.com/en-us/research/people/fuwei/;https://menik1126.github.io/", "dblp": ";176/0380;167/9550.html;72/5870;", "google_scholar": ";;vc0T1NoAAAAJ;G-V1VpwAAAAJ;https://scholar.google.com.hk/citations?user=dFX1hXkAAAAJ", "orcid": ";;0000-0003-4874-2639;;0000-0003-2986-6978", "linkedin": ";;;;", "or_profile": "~Xun_Wu1;~Shaohan_Huang1;~Guolong_Wang1;~Furu_Wei1;~jing_xiong3", "aff": "Tsinghua University;Microsoft;University of International Business and Economics;Microsoft Research;Sun Yat-Sen University", "aff_domain": "tsinghua.edu.cn;microsoft.com;uibe.edu.cn;microsoft.com;sysu.edu.cn", "position": "MS student;Researcher;Assistant Professor;Distinguished Scientist;MS student", "bibtex": "@inproceedings{\nwu2024multimodal,\ntitle={Multimodal Large Language Models Make Text-to-Image Generative Models Align Better},\nauthor={Xun Wu and Shaohan Huang and Guolong Wang and Jing Xiong and Furu Wei},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=IRXyPm9IPW}\n}", "github": "", "reviewers": "zspv;8ytx;AdFm", "pdf_size": 8204749, "rating": "4;6;7", "confidence": "3;3;4", "soundness": "2;3;3", "novelty": "3;4;3", "presentation": "3;4;3", "wc_summary": "60;61;69", "wc_strengths": "42;75;136", "wc_weaknesses": "61;74;198", "wc_questions": "136;4;2", "wc_limitations": "23;9;69", "wc_review": "322;223;474", "wc_reply_reviewers": "391;14;0", "wc_reply_authors": "755;0;0", "reply_reviewers": "1;1;0", "reply_authors": "2;1;1", "rating_avg": [ 5.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 63.333333333333336, 4.027681991198191 ], "wc_strengths_avg": [ 84.33333333333333, 38.938698260499436 ], "wc_weaknesses_avg": [ 111.0, 61.746794788609606 ], "wc_questions_avg": [ 47.333333333333336, 62.702117632146646 ], "wc_limitations_avg": [ 33.666666666666664, 25.62984371565478 ], "wc_review_avg": [ 339.6666666666667, 103.22897956592529 ], "wc_reply_reviewers_avg": [ 135.0, 181.10954327883073 ], "wc_reply_authors_avg": [ 251.66666666666666, 355.91041319722893 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.7559289460184545, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8760034356225286139&as_sdt=40000005&sciodt=0,22&hl=en", "gs_version_total": 2, "email": "tsinghua.edu.cn;microsoft.com;uibe.edu.cn;microsoft.com;sysu.edu.cn", "author_num": 5, "aff_unique_index": "0;1;2;1;3", "aff_unique_norm": "Tsinghua University;Microsoft;University of International Business and Economics;Sun Yat-sen University", "aff_unique_dep": ";Microsoft Corporation;;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.microsoft.com;http://www.uibe.edu.cn;http://www.sysu.edu.cn/", "aff_unique_abbr": "THU;Microsoft;UIBE;SYSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1;0", "aff_country_unique": "China;United States" }, { "title": "Exploiting LLM Quantization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95767", "id": "ISa7mMe7Vg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ISa7mMe7Vg", "openreview": "https://openreview.net/forum?id=ISa7mMe7Vg", "poster": "/media/PosterPDFs/NeurIPS%202024/95767.png?t=1733408136.5759432", "project": "", "author_site": "Kazuki Egashira, Mark Vero, Robin Staab, Jingxuan He, Martin Vechev", "tldr": "", "abstract": "Quantization leverages lower-precision weights to reduce the memory usage of large language models (LLMs) and is a key technique for enabling their deployment on commodity hardware. While LLM quantization's impact on utility has been extensively explored, this work for the first time studies its adverse effects from a security perspective. We reveal that widely used quantization methods can be exploited to produce a harmful quantized LLM, even though the full-precision counterpart appears benign, potentially tricking users into deploying the malicious quantized model. \nWe demonstrate this threat using a three-staged attack framework: (i) first, we obtain a malicious LLM through fine-tuning on an adversarial task; (ii) next, we quantize the malicious model and calculate constraints that characterize all full-precision models that map to the same quantized model; (iii) finally, using projected gradient descent, we tune out the poisoned behavior from the full-precision model while ensuring that its weights satisfy the constraints computed in step (ii). This procedure results in an LLM that exhibits benign behavior in full precision but when quantized, it follows the adversarial behavior injected in step (i). We experimentally demonstrate the feasibility and severity of such an attack across three diverse scenarios: vulnerable code generation, content injection, and over-refusal attack. In practice, the adversary could host the resulting full-precision model on an LLM community hub such as Hugging Face, exposing millions of users to the threat of deploying its malicious quantized version on their devices.", "keywords": "quantization;large language models;security;poisoning", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Kazuki Egashira;Mark Vero;Robin Staab;Jingxuan He;Martin Vechev", "authorids": "~Kazuki_Egashira1;~Mark_Vero1;~Robin_Staab1;~Jingxuan_He1;~Martin_Vechev1", "gender": "M;M;M;M;M", "homepage": ";https://www.sri.inf.ethz.ch/people/markvero;;https://www.sri.inf.ethz.ch/people/jingxuan;https://www.sri.inf.ethz.ch/people/martin", "dblp": ";319/4985;304/3512;;93/2189.html", "google_scholar": ";vguDYtQAAAAJ;;ylHZY58AAAAJ;https://scholar.google.ch/citations?user=aZ1Rh50AAAAJ", "orcid": ";;;;", "linkedin": "kazuki-egashira-7a7597282/;https://linkedin.com/in/mark-vero-9a32bb17a;robin-staab-b778a51a6/;;", "or_profile": "~Kazuki_Egashira1;~Mark_Vero1;~Robin_Staab1;~Jingxuan_He1;~Martin_Vechev1", "aff": "The University of Tokyo;ETHZ-ETH Zurich;ETHZ - ETH Zurich;ETHZ - ETH Zurich;Swiss Federal Institute of Technology", "aff_domain": "u-tokyo.ac.jp;inf.ethz.ch;ethz.ch;ethz.ch;ethz.ch", "position": "MS student;PhD student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\negashira2024exploiting,\ntitle={Exploiting {LLM} Quantization},\nauthor={Kazuki Egashira and Mark Vero and Robin Staab and Jingxuan He and Martin Vechev},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ISa7mMe7Vg}\n}", "github": "", "reviewers": "ve9y;DJAc;jt8j;6dDB", "pdf_size": 558316, "rating": "5;7;7;7", "confidence": "4;4;4;4", "soundness": "2;2;3;3", "novelty": "2;3;4;3", "presentation": "2;2;2;3", "wc_summary": "137;33;62;82", "wc_strengths": "24;34;61;128", "wc_weaknesses": "299;51;135;149", "wc_questions": "4;32;28;78", "wc_limitations": "17;1;5;31", "wc_review": "481;151;291;468", "wc_reply_reviewers": "0;25;14;0", "wc_reply_authors": "0;39;39;0", "reply_reviewers": "0;1;1;0", "reply_authors": "1;2;2;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 78.5, 38.00328933131973 ], "wc_strengths_avg": [ 61.75, 40.57323625248546 ], "wc_weaknesses_avg": [ 158.5, 89.3574283425838 ], "wc_questions_avg": [ 35.5, 26.77218706045511 ], "wc_limitations_avg": [ 13.5, 11.6940155635265 ], "wc_review_avg": [ 347.75, 136.14950422238047 ], "wc_reply_reviewers_avg": [ 9.75, 10.497023387608508 ], "wc_reply_authors_avg": [ 19.5, 19.5 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7311224751639622176&as_sdt=40000005&sciodt=0,22&hl=en", "gs_version_total": 5, "email": "u-tokyo.ac.jp;inf.ethz.ch;ethz.ch;ethz.ch;ethz.ch", "author_num": 5, "aff_unique_index": "0;1;1;1;2", "aff_unique_norm": "University of Tokyo;ETH Zurich;Swiss Federal Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.u-tokyo.ac.jp;https://www.ethz.ch;https://www.ethz.ch", "aff_unique_abbr": "UTokyo;ETHZ;ETH Zurich", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1", "aff_country_unique": "Japan;Switzerland" }, { "title": "Lower Bounds and Optimal Algorithms for Non-Smooth Convex Decentralized Optimization over Time-Varying Networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95766", "id": "IUKff7nYmW", "proceeding": "", "pdf": "https://openreview.net/pdf?id=IUKff7nYmW", "openreview": "https://openreview.net/forum?id=IUKff7nYmW", "poster": "", "project": "", "author_site": "Dmitry Kovalev, Ekaterina Borodich, Alexander Gasnikov, Dmitrii Feoktistov", "tldr": "", "abstract": "We consider the task of minimizing the sum of convex functions stored in a decentralized manner across the nodes of a communication network. This problem is relatively well-studied in the scenario when the objective functions are smooth, or the links of the network are fixed in time, or both. In particular, lower bounds on the number of decentralized communications and (sub)gradient computations required to solve the problem have been established, along with matching optimal algorithms. However, the remaining and most challenging setting of non-smooth decentralized optimization over time-varying networks is largely underexplored, as neither lower bounds nor optimal algorithms are known in the literature. We resolve this fundamental gap with the following contributions: (i) we establish the first lower bounds on the communication and subgradient computation complexities of solving non-smooth convex decentralized optimization problems over time-varying networks; (ii) we develop the first optimal algorithm that matches these lower bounds and offers substantially improved theoretical performance compared to the existing state of the art.", "keywords": "decentralized optimization;convex optimization;distributed optimization;non-smooth optimization;time-varying networks", "primary_area": "optimization", "supplementary_material": "", "author": "Dmitry Kovalev;Ekaterina Borodich;Alexander Gasnikov;Dmitrii Feoktistov", "authorids": "~Dmitry_Kovalev2;~Ekaterina_Borodich1;~Alexander_Gasnikov1;~Dmitrii_Feoktistov1", "gender": "M;M;M;F", "homepage": "https://www.dmitry-kovalev.com;https://arxiv.org/search/?query=Gasnikov&searchtype=all&source=header;https://github.com/TrandeLik;", "dblp": "136/8468.html;153/1930;;", "google_scholar": "qHFA5z4AAAAJ;AmeE8qkAAAAJ;https://scholar.google.com/citations?hl=ru;https://scholar.google.com/citations?hl=ru", "orcid": "0000-0003-1467-2994;;;", "linkedin": ";;;", "or_profile": "~Dmitry_Kovalev2;~Alexander_Vladimirovich_Gasnikov1;~Feoktistov_Dmitrii1;~Ekaterina_Dmitrievna_Borodich1", "aff": "Yandex;Moscow Institute of Physics and Technology;Lomonosov Moscow State University;Moscow Institute of Physics and Technology", "aff_domain": "yandex-team.ru;mipt.ru;msu.ru;phystech.edu", "position": "Researcher;Associate Professor;Undergrad student;PhD student", "bibtex": "@inproceedings{\nkovalev2024lower,\ntitle={Lower Bounds and Optimal Algorithms for Non-Smooth Convex Decentralized Optimization over Time-Varying Networks},\nauthor={Dmitry Kovalev and Ekaterina Borodich and Alexander Gasnikov and Dmitrii Feoktistov},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=IUKff7nYmW}\n}", "github": "", "reviewers": "3roP;ap5m;Eyy5;7QFG", "pdf_size": 506552, "rating": "7;7;7;8", "confidence": "3;2;2;3", "soundness": "4;3;3;4", "novelty": "3;3;3;4", "presentation": "4;3;3;4", "wc_summary": "47;21;34;30", "wc_strengths": "46;49;113;22", "wc_weaknesses": "259;36;51;1", "wc_questions": "25;38;97;1", "wc_limitations": "69;14;1;1", "wc_review": "446;158;296;55", "wc_reply_reviewers": "21;30;12;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 2.5, 0.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 33.0, 9.354143466934854 ], "wc_strengths_avg": [ 57.5, 33.70830758136635 ], "wc_weaknesses_avg": [ 86.75, 101.08999703234737 ], "wc_questions_avg": [ 40.25, 35.35091936569684 ], "wc_limitations_avg": [ 21.25, 28.07467720206236 ], "wc_review_avg": [ 238.75, 147.06694904022453 ], "wc_reply_reviewers_avg": [ 15.75, 11.098986440211556 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11605182395180550612&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "yandex-team.ru;mipt.ru;msu.ru;phystech.edu", "author_num": 4, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "Yandex;Moscow Institute of Physics and Technology;Lomonosov Moscow State University", "aff_unique_dep": ";;", "aff_unique_url": "https://yandex.com;https://www.mipt.ru/en;https://www.msu.ru", "aff_unique_abbr": "Yandex;MIPT;MSU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Moscow", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Russian Federation" }, { "title": "Putting Gale & Shapley to Work: Guaranteeing Stability Through Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95765", "id": "IVjs67Xa44", "proceeding": "", "pdf": "https://openreview.net/pdf?id=IVjs67Xa44", "openreview": "https://openreview.net/forum?id=IVjs67Xa44", "poster": "/media/PosterPDFs/NeurIPS%202024/95765.png?t=1731695445.326222", "project": "", "author_site": "Hadi Hosseini, Sanjukta Roy, Duohan Zhang", "tldr": "", "abstract": "Two-sided matching markets describe a large class of problems wherein participants from one side of the market must be matched to those from the other side according to their preferences. In many real-world applications (e.g. content matching or online labor markets), the knowledge about preferences may not be readily available and must be learned, i.e., one side of the market (aka agents) may not know their preferences over the other side (aka arms). Recent research on online settings has focused primarily on welfare optimization aspects (i.e. minimizing the overall regret) while paying little attention to the game-theoretic properties such as the stability of the final matching. In this paper, we exploit the structure of stable solutions to devise algorithms that improve the likelihood of finding stable solutions. We initiate the study of the sample complexity of finding a stable matching, and provide theoretical bounds on the number of samples needed to reach a stable matching with high probability. Finally, our empirical results demonstrate intriguing tradeoffs between stability and optimality of the proposed algorithms, further complementing our theoretical findings.", "keywords": "Online learning;Multi-armed bandits;Stable matching;Matching markets.", "primary_area": "bandits", "supplementary_material": "/attachment/fa7f5036e9023a6294b4755821c381c0da6178da.zip", "author": "Hadi Hosseini;Sanjukta Roy;Duohan Zhang", "authorids": "~Hadi_Hosseini4;~Sanjukta_Roy1;~Duohan_Zhang1", "gender": ";;M", "homepage": ";https://sites.google.com/view/sanjuktaroy/home?authuser=0;https://duohan0520.github.io/", "dblp": ";178/2824;", "google_scholar": ";https://scholar.google.com/citations?hl=en;", "orcid": ";0000-0003-3633-542X;", "linkedin": ";;duohan-zhang357", "or_profile": "~Hadi_Hosseini4;~Sanjukta_Roy1;~Duohan_Zhang1", "aff": ";University of Leeds;Pennsylvania State University", "aff_domain": ";leeds.ac.uk;psu.edu", "position": ";Assistant Professor;PhD student", "bibtex": "@inproceedings{\nhosseini2024putting,\ntitle={Putting Gale \\& Shapley to Work: Guaranteeing Stability Through Learning},\nauthor={Hadi Hosseini and Sanjukta Roy and Duohan Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=IVjs67Xa44}\n}", "github": "", "reviewers": "wAZr;ct3v;Uabi;ormW", "pdf_size": 756280, "rating": "4;5;6;7", "confidence": "4;5;4;3", "soundness": "2;3;3;3", "novelty": "1;2;3;3", "presentation": "3;3;3;3", "wc_summary": "157;69;107;86", "wc_strengths": "68;70;35;74", "wc_weaknesses": "202;128;21;186", "wc_questions": "467;5;17;7", "wc_limitations": "18;6;1;1", "wc_review": "912;278;181;354", "wc_reply_reviewers": "187;321;0;14", "wc_reply_authors": "143;246;0;0", "reply_reviewers": "1;4;0;1", "reply_authors": "2;4;1;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 104.75, 33.03312731183652 ], "wc_strengths_avg": [ 61.75, 15.594470173750693 ], "wc_weaknesses_avg": [ 134.25, 70.9449610613749 ], "wc_questions_avg": [ 124.0, 198.08331580423425 ], "wc_limitations_avg": [ 6.5, 6.946221994724902 ], "wc_review_avg": [ 431.25, 284.2528583849246 ], "wc_reply_reviewers_avg": [ 130.5, 132.36785863645298 ], "wc_reply_authors_avg": [ 97.25, 103.84453524379605 ], "reply_reviewers_avg": [ 1.5, 1.5 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.6324555320336758, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9363795735133035386&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 4, "email": ";leeds.ac.uk;psu.edu", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "University of Leeds;Pennsylvania State University", "aff_unique_dep": ";", "aff_unique_url": "https://www.leeds.ac.uk;https://www.psu.edu", "aff_unique_abbr": "Leeds;PSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "3D Gaussian Rendering Can Be Sparser: Efficient Rendering via Learned Fragment Pruning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95764", "id": "IVqzbuLfoL", "proceeding": "", "pdf": "https://openreview.net/pdf?id=IVqzbuLfoL", "openreview": "https://openreview.net/forum?id=IVqzbuLfoL", "poster": "", "project": "", "author_site": "Zhifan Ye, Chenxi Wan, Chaojian Li, Jihoon Hong, Sixu Li, Leshu Li, Yongan Zhang, Yingyan (Celine) Lin", "tldr": "", "abstract": "3D Gaussian splatting has recently emerged as a promising technique for novel view synthesis from sparse image sets, yet comes at the cost of requiring millions of 3D Gaussian primitives to reconstruct each 3D scene. This largely limits its application to resource-constrained devices and applications.\nDespite advances in Gaussian pruning techniques that aim to remove individual 3D Gaussian primitives, the significant reduction in primitives often fails to translate into commensurate increases in rendering speed, impeding efficiency and practical deployment. We identify that this discrepancy arises due to the overlooked impact of fragment count per Gaussian (i.e., the number of pixels each Gaussian is projected onto). To bridge this gap and meet the growing demands for efficient on-device 3D Gaussian rendering, we propose fragment pruning, an orthogonal enhancement to existing pruning methods that can significantly accelerate rendering by selectively pruning fragments within each Gaussian. Our pruning framework dynamically optimizes the pruning threshold for each Gaussian, markedly improving rendering speed and quality. Extensive experiments in both static and dynamic scenes validate the effectiveness of our approach. For instance, by integrating our fragment pruning technique with state-of-the-art Gaussian pruning methods, we achieve up to a 1.71$\\times$ speedup on an edge GPU device, the Jetson Orin NX, and enhance rendering quality by an average of 0.16 PSNR on the Tanks\\&Temples dataset. Our code is available at https://github.com/GATECH-EIC/Fragment-Pruning.", "keywords": "Computer Vision;Efficient Machine Learning;Neural Graphics", "primary_area": "machine_vision", "supplementary_material": "", "author": "Zhifan Ye;Chenxi Wan;Chaojian Li;Jihoon Hong;Sixu Li;Leshu Li;Yongan Zhang;Yingyan Celine Lin", "authorids": "~Zhifan_Ye1;~Chenxi_Wan1;~Chaojian_Li1;~Jihoon_Hong1;~Sixu_Li2;~Leshu_Li1;~Yongan_Zhang1;~Yingyan_Celine_Lin1", "gender": "M;F;;M;;M;M;", "homepage": "https://github.com/LemonAndRabbit;;https://licj15.github.io/;;;;;", "dblp": "168/9226.html;;249/5403;;;;137/8349;", "google_scholar": "zlPfnWEAAAAJ;;HvEBdf4AAAAJ;;;;s3Qbrl0AAAAJ;", "orcid": "0000-0003-0755-8843;0009-0008-0634-5188;;;;;;", "linkedin": "zhifan-ye/;;;jihoon-hong-1b839b205?utm_source=share&utm_campaign=share_via&utm_content=profile&utm_medium=ios_app;;leshu-li-a19990309;yongan-zhang-141a71136/;", "or_profile": "~Zhifan_Ye1;~Chenxi_Wan1;~Chaojian_Li1;~Jihoon_Hong1;~Sixu_Li2;~Leshu_Li1;~Yongan_Zhang1;~Yingyan_Celine_Lin1", "aff": "Georgia Institute of Technology;University of Science and Technology of China;Georgia Institute of Technology;Seoul National University;;University of Minnesota - Twin Cities;Georgia Institute of Technology;", "aff_domain": "gatech.edu;mail.ustc.edu.cn;gatech.edu;snu.ac.kr;;umn.edu;gatech.edu;", "position": "PhD student;Undergrad student;PhD student;Undergrad student;;MS student;PhD student;", "bibtex": "@inproceedings{\nye2024d,\ntitle={3D Gaussian Rendering Can Be Sparser: Efficient Rendering via Learned Fragment Pruning},\nauthor={Zhifan Ye and Chenxi Wan and Chaojian Li and Jihoon Hong and Sixu Li and Leshu Li and Yongan Zhang and Yingyan Celine Lin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=IVqzbuLfoL}\n}", "github": "", "reviewers": "7Aqo;TxBU;RCBw;ytrw", "pdf_size": 9394532, "rating": "5;6;6;6", "confidence": "5;4;5;5", "soundness": "3;3;4;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "67;70;81;117", "wc_strengths": "23;87;103;84", "wc_weaknesses": "191;163;186;437", "wc_questions": "96;77;7;109", "wc_limitations": "1;24;5;12", "wc_review": "378;421;382;759", "wc_reply_reviewers": "78;238;48;30", "wc_reply_authors": "708;1031;33;1016", "reply_reviewers": "1;1;1;1", "reply_authors": "3;4;2;5", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 83.75, 19.891895334532606 ], "wc_strengths_avg": [ 74.25, 30.457962834043908 ], "wc_weaknesses_avg": [ 244.25, 111.7841111249716 ], "wc_questions_avg": [ 72.25, 39.35336707322513 ], "wc_limitations_avg": [ 10.5, 8.73212459828649 ], "wc_review_avg": [ 485.0, 159.08331150689565 ], "wc_reply_reviewers_avg": [ 98.5, 82.34530952033637 ], "wc_reply_authors_avg": [ 697.0, 404.4545709965459 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.5, 1.118033988749895 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:AZXn6HbUmsMJ:scholar.google.com/&scioq=3D+Gaussian+Rendering+Can+Be+Sparser:+Efficient+Rendering+via+Learned+Fragment+Pruning&hl=en&as_sdt=0,33", "gs_version_total": 2, "email": "gatech.edu;mail.ustc.edu.cn;gatech.edu;snu.ac.kr;;umn.edu;gatech.edu;", "author_num": 8, "aff_unique_index": "0;1;0;2;3;0", "aff_unique_norm": "Georgia Institute of Technology;University of Science and Technology of China;Seoul National University;University of Minnesota", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.gatech.edu;http://www.ustc.edu.cn;https://www.snu.ac.kr;https://www.minnesota.edu", "aff_unique_abbr": "Georgia Tech;USTC;SNU;UMN", "aff_campus_unique_index": "1", "aff_campus_unique": ";Twin Cities", "aff_country_unique_index": "0;1;0;2;0;0", "aff_country_unique": "United States;China;South Korea" }, { "title": "On the Optimal Time Complexities in Decentralized Stochastic Asynchronous Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95763", "id": "IXRa8adMHX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=IXRa8adMHX", "openreview": "https://openreview.net/forum?id=IXRa8adMHX", "poster": "/media/PosterPDFs/NeurIPS%202024/95763.png?t=1730556736.7789433", "project": "", "author_site": "Alexander Tyurin, Peter Richtarik", "tldr": "", "abstract": "We consider the decentralized stochastic asynchronous optimization setup, where many workers asynchronously calculate stochastic gradients and asynchronously communicate with each other using edges in a multigraph. For both homogeneous and heterogeneous setups, we prove new time complexity lower bounds under the assumption that computation and communication speeds are bounded by constants. After that, we developed a new nearly optimal method, Fragile SGD, and a new optimal method, Amelie SGD, that converge with arbitrary heterogeneous computation and communication speeds and match our lower bounds (up to a logarithmic factor in the homogeneous setting). Our time complexities are new, nearly optimal, and provably improve all previous asynchronous/synchronous stochastic methods in the decentralized setup.", "keywords": "nonconvex and convex optimization;stochastic optimization;asynchronous methods;decentralized optimization;lower bounds", "primary_area": "optimization", "supplementary_material": "/attachment/63e8c79ca1bd88180b2a59b4c30e1938e4a4b768.zip", "author": "Alexander Tyurin;Peter Richt\u00e1rik", "authorids": "~Alexander_Tyurin1;~Peter_Richt\u00e1rik1", "gender": "M;M", "homepage": "https://k3nfalt.github.io/;https://richtarik.org", "dblp": "203/8919;62/8001", "google_scholar": ";https://scholar.google.com/citations?hl=en", "orcid": ";0000-0003-4380-5848", "linkedin": ";richtarik/", "or_profile": "~Alexander_Tyurin1;~Peter_Richtarik1", "aff": "KAUST;King Abdullah University of Science and Technology (KAUST)", "aff_domain": "kaust.edu.sa;kaust.edu.sa", "position": "Postdoc;Full Professor", "bibtex": "@inproceedings{\ntyurin2024on,\ntitle={On the Optimal Time Complexities in Decentralized Stochastic Asynchronous Optimization},\nauthor={Alexander Tyurin and Peter Richt{\\'a}rik},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=IXRa8adMHX}\n}", "github": "", "reviewers": "1dR2;YQkr;VUTZ;qqc5", "pdf_size": 789683, "rating": "4;6;6;7", "confidence": "4;3;2;3", "soundness": "3;3;3;3", "novelty": "2;3;3;4", "presentation": "2;3;2;4", "wc_summary": "38;78;80;74", "wc_strengths": "30;150;154;49", "wc_weaknesses": "154;383;86;25", "wc_questions": "26;2;33;4", "wc_limitations": "8;2;11;10", "wc_review": "256;615;364;162", "wc_reply_reviewers": "145;11;12;4", "wc_reply_authors": "735;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "3;1;1;1", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 67.5, 17.168284713389397 ], "wc_strengths_avg": [ 95.75, 56.66734068226601 ], "wc_weaknesses_avg": [ 162.0, 135.50830232867654 ], "wc_questions_avg": [ 16.25, 13.497684986693088 ], "wc_limitations_avg": [ 7.75, 3.491060010942235 ], "wc_review_avg": [ 349.25, 169.26218567654146 ], "wc_reply_reviewers_avg": [ 43.0, 58.970331523572085 ], "wc_reply_authors_avg": [ 183.75, 318.2643358907812 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.6488856845230502, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16040258040596534618&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "kaust.edu.sa;kaust.edu.sa", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "King Abdullah University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaust.edu.sa", "aff_unique_abbr": "KAUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Saudi Arabia" }, { "id": "IZtX4RNBeH", "title": "How Good is my Video LMM? Complex Video Reasoning and Robustness Evaluation Suite for Video-LMMs", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "Recent advancements in Large Language Models (LLMs) have led to the development of Video Large Multi-modal Models (Video-LMMs) that can handle a wide range of video understanding tasks. These models have the potential to be deployed in real-world applications such as robotics, AI assistants, medical surgery, and autonomous vehicles. The widespread adoption of Video-LMMs in our daily lives underscores the importance of ensuring and evaluating their robust performance in mirroring human-like reasoning and interaction capabilities in complex, real-world contexts. However, existing benchmarks for Video-LMMs primarily focus on general video comprehension abilities and neglect assessing their reasoning capabilities over complex videos in the real-world context, and robustness of these models through the lens of user prompts as text queries. In this paper, we present the Complex Video Reasoning and Robustness Evaluation Suite (CVRR-ES), a novel benchmark that comprehensively assesses the performance of Video-LMMs across 11 diverse real-world video dimensions. We evaluate 11 recent models, including both open-source and closed-source variants, and find that most of the Video-LMMs, especially open-source ones, struggle with robustness and reasoning when dealing with complex videos. Based on our analysis, we develop a training-free Dual-Step Contextual Prompting (DSCP) technique to effectively enhance the performance of existing Video-LMMs on CVRR-ES benchmark. Our findings provide valuable insights for building the next generation of human-centric AI systems with advanced robustness and reasoning capabilities.\nOur dataset and code are publicly available at: https://mbzuai-oryx.github.io/CVRR-Evaluation-Suite/.", "keywords": "Multi-modal Large language models;Video Understanding;Video Question Answering", "primary_area": "", "supplementary_material": "/attachment/685a04f8a3978f429ef11ca70f48bf42f13f3ca7.zip", "author": "Muhammad Uzair Khattak;Muhammad Ferjad Naeem;Jameel Hassan Abdul Samadh;Muzammal Naseer;Federico Tombari;Fahad Khan;Salman Khan", "authorids": "~Muhammad_Uzair_Khattak1;~Muhammad_Ferjad_Naeem1;~Jameel_Hassan_Abdul_Samadh1;~Muzammal_Naseer1;~Federico_Tombari1;~Fahad_Khan1;~Salman_Khan4", "gender": ";M;M;M;M;M;M", "homepage": "https://ferjad.github.io/;https://jameelhassan.github.io/;https://muzammal-naseer.com/;https://federicotombari.github.io/;https://sites.google.com/view/fahadkhans/home;https://salman-h-khan.github.io/;https://muzairkhattak.github.io/", "dblp": "213/8506;;;16/3539;05/8618;32/11535-1;324/2256.html", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;https://scholar.google.ch/citations?user=tM9xKA8AAAAJ;TFsE4BIAAAAJ;zvaeYnUAAAAJ;https://scholar.google.es/citations?user=M59O9lkAAAAJ;https://scholar.google.es/citations?user=M6fFL4gAAAAJ", "orcid": ";;0000-0001-7663-7161;0000-0001-5598-5212;;0000-0002-9502-1749;", "linkedin": ";jameelhassan;muzammalnaseer/;fedet/;;;muhammad-uzair-khattak-204ba1150/", "or_profile": "~Muhammad_Ferjad_Naeem1;~Jameel_Hassan_Abdul_Samadh1;~Muzammal_Naseer1;~Federico_Tombari1;~Fahad_Khan1;~Salman_Khan4;~Muhammd_Uzair_Khattak1", "aff": "ETHZ - ETH Zurich;Mohamed bin Zayed University of Artificial Intelligence;Mohamed bin Zayed University of Artificial Intelligence;Technical University Munich (TUM);Link\u00f6ping University;Australian National University;EPFL - EPF Lausanne", "aff_domain": "ethz.ch;mbzuai.ac.ae;mbzuai.ac.ae;in.tum.de;liu.se;anu.edu.au;epfl.ch", "position": "PhD student;MS student;Researcher;Lecturer;Associate Professor;Lecturer;PhD student", "bibtex": "@misc{\nanonymous2024how,\ntitle={How Good is my Video {LMM}? Complex Video Reasoning and Robustness Evaluation Suite for Video-{LMM}s},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=IZtX4RNBeH}\n}", "github": "", "project": "", "reviewers": "PnAk;dRBU;D6BR;GutX", "site": "https://openreview.net/forum?id=IZtX4RNBeH", "pdf_size": 1649175, "rating": "3;6;6;7", "confidence": "4;4;5;4", "wc_summary_and_contributions": "57;149;66;78", "wc_strengths": "16;99;37;56", "wc_improvement": "167;242;124;235", "wc_limitations": "1;42;4;1", "wc_correctness": "1;20;43;1", "wc_clarity": "5;50;152;1", "wc_relation_to_prior_work": "7;52;50;1", "wc_documentation": "8;11;21;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "263;666;498;375", "wc_reply_reviewers": "262;297;0;0", "wc_reply_authors": "860;2508;0;818", "reply_reviewers": "1;2;0;0", "reply_authors": "4;5;1;2", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 87.5, 36.28015986734347 ], "wc_strengths_avg": [ 52.0, 30.602287496198713 ], "wc_improvement_avg": [ 192.0, 48.98469148621842 ], "wc_limitations_avg": [ 12.0, 17.363755354185336 ], "wc_correctness_avg": [ 16.25, 17.282577932704367 ], "wc_clarity_avg": [ 52.0, 60.85638832530238 ], "wc_relation_to_prior_work_avg": [ 27.5, 23.606143268225754 ], "wc_documentation_avg": [ 10.25, 7.189401922274203 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 450.5, 149.6270363269954 ], "wc_reply_reviewers_avg": [ 139.75, 140.2967836409659 ], "wc_reply_authors_avg": [ 1046.5, 910.7879830125121 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 3.0, 1.5811388300841898 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.19245008972987526, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12109326187916367873&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;1;2;3;4;5", "aff_unique_norm": "ETH Zurich;Mohamed bin Zayed University of Artificial Intelligence;Technical University Munich;Link\u00f6ping University;Australian National University;EPFL", "aff_unique_dep": ";;;;;", "aff_unique_url": "https://www.ethz.ch;https://mbzuai.ac.ae;https://www.tum.de;https://www.liu.se;https://www.anu.edu.au;https://www.epfl.ch", "aff_unique_abbr": "ETHZ;MBZUAI;TUM;LiU;ANU;EPFL", "aff_campus_unique_index": "1", "aff_campus_unique": ";Lausanne", "aff_country_unique_index": "0;1;1;2;3;4;0", "aff_country_unique": "Switzerland;United Arab Emirates;Germany;Sweden;Australia" }, { "title": "Probablistic Emulation of a Global Climate Model with Spherical DYffusion", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95762", "id": "Ib2iHIJRTh", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Ib2iHIJRTh", "openreview": "https://openreview.net/forum?id=Ib2iHIJRTh", "poster": "", "project": "", "author_site": "Salva R\u00fchling Cachay, Brian Henn, Oliver Watt-Meyer, Christopher S. Bretherton, Rose Yu", "tldr": "", "abstract": "Data-driven deep learning models are transforming global weather forecasting. It is an open question if this success can extend to climate modeling, where the complexity of the data and long inference rollouts pose significant challenges. Here, we present the first conditional generative model that produces accurate and physically consistent global climate ensemble simulations by emulating a coarse version of the United States' primary operational global forecast model, FV3GFS.\nOur model integrates the dynamics-informed diffusion framework (DYffusion) with the Spherical Fourier Neural Operator (SFNO) architecture, enabling stable 100-year simulations at 6-hourly timesteps while maintaining low computational overhead compared to single-step deterministic baselines.\nThe model achieves near gold-standard performance for climate model emulation, outperforming existing approaches and demonstrating promising ensemble skill.\nThis work represents a significant advance towards efficient, data-driven climate simulations that can enhance our understanding of the climate system and inform adaptation strategies. Code is available at [https://github.com/Rose-STL-Lab/spherical-dyffusion](https://github.com/Rose-STL-Lab/spherical-dyffusion).", "keywords": "deep learning;climate modeling;climate science;ai for science;climate change", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "", "author": "Salva R\u00fchling Cachay;Brian Henn;Oliver Watt-Meyer;Christopher S. Bretherton;Rose Yu", "authorids": "~Salva_R\u00fchling_Cachay1;~Brian_Henn1;~Oliver_Watt-Meyer1;~Christopher_S._Bretherton1;~Rose_Yu1", "gender": "M;M;M;M;F", "homepage": "https://salvarc.github.io/;;https://oliverwm1.github.io/;https://atmos.uw.edu/~breth/;http://roseyu.com", "dblp": "280/1016;;;;164/7314", "google_scholar": "8RHc4eQAAAAJ;;R-T2dmgAAAAJ;;", "orcid": "0000-0002-7968-5035;0000-0001-6384-2051;;0000-0002-6712-8856;", "linkedin": "salva-rc;;;;", "or_profile": "~Salva_R\u00fchling_Cachay1;~Brian_Henn1;~Oliver_Watt-Meyer1;~Christopher_S._Bretherton1;~Rose_Yu1", "aff": "University of California, San Diego;Allen Institute for Artificial Intelligence;;;University of California, San Diego", "aff_domain": "ucsd.edu;allenai.org;;;ucsd.edu", "position": "PhD student;Researcher;;;Assistant Professor", "bibtex": "@inproceedings{\ncachay2024probablistic,\ntitle={Probablistic Emulation of a Global Climate Model with Spherical {DY}ffusion},\nauthor={Salva R{\\\"u}hling Cachay and Brian Henn and Oliver Watt-Meyer and Christopher S. Bretherton and Rose Yu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Ib2iHIJRTh}\n}", "github": "", "reviewers": "2sHD;u1ux;JrWU;mrTD", "pdf_size": 6866689, "rating": "8;8;8;8", "confidence": "4;4;5;4", "soundness": "3;4;4;4", "novelty": "4;3;3;4", "presentation": "4;4;3;3", "wc_summary": "90;158;95;32", "wc_strengths": "130;112;163;82", "wc_weaknesses": "210;115;144;153", "wc_questions": "107;1;79;261", "wc_limitations": "11;6;42;1", "wc_review": "548;392;523;529", "wc_reply_reviewers": "54;0;51;76", "wc_reply_authors": "50;0;46;222", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;2;3", "rating_avg": [ 8.0, 0.0 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 93.75, 44.60030829489859 ], "wc_strengths_avg": [ 121.75, 29.345996319770776 ], "wc_weaknesses_avg": [ 155.5, 34.4564943080401 ], "wc_questions_avg": [ 112.0, 94.38749917229505 ], "wc_limitations_avg": [ 15.0, 15.98436736314578 ], "wc_review_avg": [ 498.0, 61.89103327623477 ], "wc_reply_reviewers_avg": [ 45.25, 27.851166941440713 ], "wc_reply_authors_avg": [ 79.5, 84.58575530194194 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:IvtjKX2slmUJ:scholar.google.com/&scioq=Probablistic+Emulation+of+a+Global+Climate+Model+with+Spherical+DYffusion&hl=en&as_sdt=0,33", "gs_version_total": 2, "email": "ucsd.edu;allenai.org;;;ucsd.edu", "author_num": 5, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of California, San Diego;Allen Institute for Artificial Intelligence", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucsd.edu;https://allenai.org", "aff_unique_abbr": "UCSD;AI2", "aff_campus_unique_index": "0;0", "aff_campus_unique": "San Diego;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Improving Alignment and Robustness with Circuit Breakers", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95761", "id": "IbIB8SBKFV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=IbIB8SBKFV", "openreview": "https://openreview.net/forum?id=IbIB8SBKFV", "poster": "/media/PosterPDFs/NeurIPS%202024/95761.png?t=1732947094.3984618", "project": "", "author_site": "Andy Zou, Long Phan, Justin Wang, Derek Duenas, Maxwell Lin, Maksym Andriushchenko, J. Zico Kolter, Matt Fredrikson, Dan Hendrycks", "tldr": "", "abstract": "AI systems can take harmful actions and are highly vulnerable to adversarial attacks. We present an approach, inspired by recent advances in representation engineering, that interrupts the models as they respond with harmful outputs with \"circuit breakers.\" Existing techniques aimed at improving alignment, such as refusal training, are often bypassed. Techniques such as adversarial training try to plug these holes by countering specific attacks. As an alternative to refusal training and adversarial training, circuit-breaking directly controls the representations that are responsible for harmful outputs in the first place. Our technique can be applied to both text-only and multimodal language models to prevent the generation of harmful outputs without sacrificing utility -- even in the presence of powerful unseen attacks. Notably, while adversarial robustness in standalone image recognition remains an open challenge, circuit breakers allow the larger multimodal system to reliably withstand image \"hijacks\" that aim to produce harmful content. Finally, we extend our approach to AI agents, demonstrating considerable reductions in the rate of harmful actions when they are under attack. Our approach represents a significant step forward in the development of reliable safeguards to harmful behavior and adversarial attacks.", "keywords": "alignment;adversarial robustness;adversarial attacks;harmfulness;security;reliability;ML safety;AI safety", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/ea2f2df0229753ccc70300b6e5e999a17344af85.zip", "author": "Andy Zou;Long Phan;Justin Wang;Derek Duenas;Maxwell Lin;Maksym Andriushchenko;J Zico Kolter;Matt Fredrikson;Dan Hendrycks", "authorids": "~Andy_Zou1;~Long_Phan1;~Justin_Wang2;~Derek_Duenas1;~Maxwell_Lin1;~Maksym_Andriushchenko1;~J_Zico_Kolter1;~Matt_Fredrikson1;~Dan_Hendrycks1", "gender": ";M;;M;;M;M;;M", "homepage": ";;https://justinwang.xyz;;;https://www.andriushchenko.me/;https://cs.cmu.edu/~mfredrik;;http://www.zicokolter.com", "dblp": "274/2362;;;;;200/8865;38/2612;182/2504;67/2526", "google_scholar": ";fVRQn4wAAAAJ;;;y9aAz6IAAAAJ;ZNtuJYoAAAAJ;https://scholar.google.com.tw/citations?user=tMYCvLAAAAAJ;;UXh1I6UAAAAJ", "orcid": ";;;;;;;;", "linkedin": "andy-zou-09ba3616a/;long-phan-3110/;;dereksduenas/;;;;;", "or_profile": "~Andy_Zou1;~Long_Phan1;~Justin_Wang2;~Derek_Duenas1;~Maxwell_Lin1;~Maksym_Andriushchenko1;~Matt_Fredrikson1;~Dan_Hendrycks1;~Zico_Kolter1", "aff": "Carnegie Mellon University;Center for AI Safety;Carnegie Mellon University;Carnegie Mellon University;;Swiss Federal Institute of Technology Lausanne;Carnegie Mellon University;Center for AI Safety;Carnegie Mellon University", "aff_domain": "andrew.cmu.edu;safe.ai;cmu.edu;andrew.cmu.edu;;epfl.ch;cmu.edu;safe.ai;cmu.edu", "position": "PhD student;Research Engineer;Undergrad student;MS student;;PhD Student;Associate Professor;Executive and Research Director;Full Professor", "bibtex": "@inproceedings{\nzou2024improving,\ntitle={Improving Alignment and Robustness with Circuit Breakers},\nauthor={Andy Zou and Long Phan and Justin Wang and Derek Duenas and Maxwell Lin and Maksym Andriushchenko and J Zico Kolter and Matt Fredrikson and Dan Hendrycks},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=IbIB8SBKFV}\n}", "github": "", "reviewers": "hMND;D4PL;r61a;ywQ8;Lp6v", "pdf_size": 786276, "rating": "4;6;7;7;8", "confidence": "4;4;4;3;4", "soundness": "3;3;4;2;4", "novelty": "2;3;4;3;4", "presentation": "3;2;2;1;3", "wc_summary": "26;46;118;142;239", "wc_strengths": "14;92;78;72;95", "wc_weaknesses": "121;438;115;90;57", "wc_questions": "2;118;368;172;111", "wc_limitations": "1;47;32;43;5", "wc_review": "164;741;711;519;507", "wc_reply_reviewers": "217;274;196;11;38", "wc_reply_authors": "243;574;776;0;0", "reply_reviewers": "1;2;1;1;1", "reply_authors": "2;2;2;1;1", "rating_avg": [ 6.4, 1.3564659966250536 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 3.2, 0.7483314773547882 ], "presentation_avg": [ 2.2, 0.7483314773547882 ], "wc_summary_avg": [ 114.2, 75.88517641805942 ], "wc_strengths_avg": [ 70.2, 29.369371801249002 ], "wc_weaknesses_avg": [ 164.2, 138.74494585389408 ], "wc_questions_avg": [ 154.2, 120.30694078065488 ], "wc_limitations_avg": [ 25.6, 19.1373979422491 ], "wc_review_avg": [ 528.4, 205.8519856595996 ], "wc_reply_reviewers_avg": [ 147.2, 103.73697508603189 ], "wc_reply_authors_avg": [ 318.6, 310.8604831753306 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.22116293423234568, "gs_citation": 65, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8508477985653822382&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "andrew.cmu.edu;safe.ai;cmu.edu;andrew.cmu.edu;;epfl.ch;cmu.edu;safe.ai;cmu.edu", "author_num": 9, "aff_unique_index": "0;1;0;0;2;0;1;0", "aff_unique_norm": "Carnegie Mellon University;Center for AI Safety;Swiss Federal Institute of Technology Lausanne", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cmu.edu;https://www.centerforaisafety.org;https://www.epfl.ch", "aff_unique_abbr": "CMU;;EPFL", "aff_campus_unique_index": "1", "aff_campus_unique": ";Lausanne", "aff_country_unique_index": "0;0;0;0;1;0;0;0", "aff_country_unique": "United States;Switzerland" }, { "title": "WildGuard: Open One-stop Moderation Tools for Safety Risks, Jailbreaks, and Refusals of LLMs", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97764", "id": "Ich4tv4202", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Ich4tv4202", "openreview": "https://openreview.net/forum?id=Ich4tv4202", "poster": "", "project": "", "author_site": "Seungju Han, Kavel Rao, Allyson Ettinger, Liwei Jiang, Bill Yuchen Lin, Nathan Lambert, Nouha Dziri, Yejin Choi", "tldr": "", "abstract": "We introduce WildGuard---an open, light-weight moderation tool for LLM safety that achieves three goals: (1) identifying malicious intent in user prompts, (2) detecting safety risks of model responses, and (3) determining model refusal rate. Together, WildGuard serves the increasing needs for automatic safety moderation and evaluation of LLM interactions, providing a one-stop tool with enhanced accuracy and broad coverage across 13 risk categories. While existing open moderation tools such as Llama-Guard2 score reasonably well in classifying straightforward model interactions, they lag far behind a prompted GPT-4, especially in identifying adversarial jailbreaks and in evaluating models' refusals, a key measure for evaluating safety behaviors in model responses. \n\nTo address these challenges, we construct WildGuardMix, a large-scale and carefully balanced multi-task safety moderation dataset with 92K labeled examples that cover vanilla (direct) prompts and adversarial jailbreaks, paired with various refusal and compliance responses. WildGuardMix is a combination of WildGuardTrain, the training data of WildGuard, and WildGuardTest, a high-quality human-annotated moderation test set with 5K labeled items covering broad risk scenarios.\nThrough extensive evaluations on WildGuardTest and ten existing public benchmarks, we show that WildGuard establishes state-of-the-art performance in open-source safety moderation across all the three tasks compared to ten strong existing open-source moderation models (e.g., up to 25.3% improvement on refusal detection). Importantly, WildGuard matches and sometimes exceeds GPT-4 performance (e.g., up to 4.8% improvement on prompt harmfulness identification). WildGuard serves as a highly effective safety moderator in an LLM interface, reducing the success rate of jailbreak attacks from 79.8% to 2.4%. We will make all our data, models and training/evaluation code publicly available under CC BY 4.0 license.", "keywords": "Safety;LLMs;Moderation tool;Safety data;classifiers;benchmark", "primary_area": "", "supplementary_material": "/attachment/21179ec3f294d95215c6a3860d6f3e3fff70729e.zip", "author": "Seungju Han;Kavel Rao;Allyson Ettinger;Liwei Jiang;Bill Yuchen Lin;Nathan Lambert;Yejin Choi;Nouha Dziri", "authorids": "~Seungju_Han2;~Kavel_Rao1;~Allyson_Ettinger1;~Liwei_Jiang2;~Bill_Yuchen_Lin1;~Nathan_Lambert1;~Yejin_Choi1;~Nouha_Dziri2", "gender": "M;M;F;F;M;M;F;", "homepage": "https://seungjuhan.me;http://kavelrao.dev;https://aetting.github.io;https://liweijiang.me;http://yuchenlin.xyz/;https://natolambert.com;https://yejinc.github.io/;", "dblp": ";;165/0758;;190/4518;228/9584.html;89/579-1;", "google_scholar": "g_anRqAAAAAJ;;;lcPsDgUAAAAJ;https://scholar.google.com/citations?hl=en;O4jW7BsAAAAJ;vhP-tlcAAAAJ;", "orcid": ";;;;;0000-0002-9997-6817;;", "linkedin": "seungju-han-66b85017a/;;;;;nathan-lambert-55093468/;;", "or_profile": "~Seungju_Han2;~Kavel_Rao1;~Allyson_Ettinger1;~Liwei_Jiang2;~Bill_Yuchen_Lin1;~Nathan_Lambert1;~Yejin_Choi1;~Nouha_Dziri2", "aff": "Seoul National University;Department of Computer Science, University of Washington;Allen Institute for Artificial Intelligence;University of Washington;Allen Institute for Artificial Intelligence;Allen Institute for Artificial Intelligence;Department of Computer Science, University of Washington;", "aff_domain": "snu.ac.kr;cs.washington.edu;allenai.org;washington.edu;allenai.org;allenai.org;cs.washington.edu;", "position": "Undergrad student;Undergrad student;Researcher;PhD student;Researcher;Researcher;Full Professor;", "bibtex": "@inproceedings{\nhan2024wildguard,\ntitle={WildGuard: Open One-stop Moderation Tools for Safety Risks, Jailbreaks, and Refusals of {LLM}s},\nauthor={Seungju Han and Kavel Rao and Allyson Ettinger and Liwei Jiang and Bill Yuchen Lin and Nathan Lambert and Yejin Choi and Nouha Dziri},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=Ich4tv4202}\n}", "github": "", "reviewers": "Tk4P;rRrT;9fUj", "pdf_size": 2077800, "rating": "7;7;8", "confidence": "2;4;4", "wc_summary_and_contributions": "102;107;29", "wc_strengths": "92;68;18", "wc_improvement": "25;87;119", "wc_limitations": "4;1;12", "wc_correctness": "22;1;2", "wc_clarity": "37;1;1", "wc_relation_to_prior_work": "13;1;26", "wc_documentation": "5;1;8", "wc_additional_feedback": "1;1;1", "wc_review": "301;268;216", "wc_reply_reviewers": "20;0;18", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;0;1", "reply_authors": "1;1;1", "rating_avg": [ 7.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "wc_summary_and_contributions_avg": [ 79.33333333333333, 35.64952859280034 ], "wc_strengths_avg": [ 59.333333333333336, 30.825674724525044 ], "wc_improvement_avg": [ 77.0, 39.02136167109839 ], "wc_limitations_avg": [ 5.666666666666667, 4.642796092394707 ], "wc_correctness_avg": [ 8.333333333333334, 9.672412085697939 ], "wc_clarity_avg": [ 13.0, 16.97056274847714 ], "wc_relation_to_prior_work_avg": [ 13.333333333333334, 10.208928554075703 ], "wc_documentation_avg": [ 4.666666666666667, 2.8674417556808756 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 261.6666666666667, 34.988887124660344 ], "wc_reply_reviewers_avg": [ 12.666666666666666, 8.993825042154695 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 60, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3820090944644552663&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "snu.ac.kr;cs.washington.edu;allenai.org;washington.edu;allenai.org;allenai.org;cs.washington.edu;", "author_num": 8, "aff_unique_index": "0;1;2;1;2;2;1", "aff_unique_norm": "Seoul National University;University of Washington;Allen Institute for Artificial Intelligence", "aff_unique_dep": ";Department of Computer Science;", "aff_unique_url": "https://www.snu.ac.kr;https://www.washington.edu;https://allenai.org", "aff_unique_abbr": "SNU;UW;AI2", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Seattle", "aff_country_unique_index": "0;1;1;1;1;1;1", "aff_country_unique": "South Korea;United States" }, { "id": "IdEaeCbhUW", "title": "Achieving Precise Control with Slow Hardware: Model-Based Reinforcement Learning for Action Sequence Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Current reinforcement learning (RL) models are often claimed to explain animal behavior. However, they are designed for artificial agents that sense, think, and react much faster than the brain, and they tend to fail when operating under human-like sensory and reaction times. Despite using slow neurons, the brain achieves precise and low-latency control through a combination of predictive and sequence learning. The basal ganglia is hypothesized to learn compressed representations of action sequences, allowing the brain to produce a series of actions for a given input. We present the Hindsight-Sequence-Planner (HSP), a model of the basal ganglia and the prefrontal cortex that operates under \"brain-like\" conditions: slow information processing with quick sensing and actuation. Our \"temporal recall\" mechanism is inspired by the prefrontal cortex's role in sequence learning, where the agent uses an environmental model to replay memories at a finer temporal resolution than its processing speed while addressing the credit assignment problem caused by scalar rewards in sequence learning. HSP employs model-based training to achieve model-free control, resulting in precise and efficient behavior that appears low-latency despite running on slow hardware. We test HSP on various continuous control tasks, demonstrating that it not can achieve comparable performance 'human-like' frequencies by relying on significantly fewer observations and actor calls (actor sample complexity).", "keywords": "Action Sequence Learning;Basal Ganglia;Prefrontal Cortex;Reinforcement Learning;Model Based", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/1706a9c37bf703c26c69bcc00d727560380dcb9f.zip", "author": "Devdhar Patel;Hava T Siegelmann", "authorids": "~Devdhar_Patel1;~Hava_T_Siegelmann1", "gender": "M;F", "homepage": ";https://www.cics.umass.edu/faculty/directory/siegelmann_hava", "dblp": "232/9338;s/HavaTSiegelmann.html", "google_scholar": "PnQtZegAAAAJ;https://scholar.google.co.il/citations?user=A2fiOI0AAAAJ", "orcid": "0000-0003-3866-0744;0000-0003-4938-8723", "linkedin": ";hava-siegelmann-4b272a/", "or_profile": "~Devdhar_Patel1;~Hava_T_Siegelmann1", "aff": "Department of Computer Science, University of Massachusetts, Amherst;University of Massachusetts at Amherst", "aff_domain": "cs.umass.edu;umass.edu", "position": "PhD student;Full Professor", "bibtex": "@misc{\nanonymous2024achieving,\ntitle={Achieving Precise Control with Slow Hardware: Model-Based Reinforcement Learning for Action Sequence Learning},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=IdEaeCbhUW}\n}", "github": "", "project": "", "reviewers": "2YSe;nHii;twUe", "site": "https://openreview.net/forum?id=IdEaeCbhUW", "pdf_size": 1649431, "rating": "4;4;5", "confidence": "3;2;3", "soundness": "3;2;3", "novelty": "2;2;3", "presentation": "2;3;3", "wc_summary": "50;76;73", "wc_strengths": "146;63;91", "wc_weaknesses": "559;197;82", "wc_questions": "308;129;65", "wc_limitations": "3;3;87", "wc_review": "1066;468;398", "wc_reply_reviewers": "179;0;111", "wc_reply_authors": "602;161;0", "reply_reviewers": "1;0;1", "reply_authors": "2;2;1", "rating_avg": [ 4.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 66.33333333333333, 11.61416759345623 ], "wc_strengths_avg": [ 100.0, 34.477045503735766 ], "wc_weaknesses_avg": [ 279.3333333333333, 203.25080292310997 ], "wc_questions_avg": [ 167.33333333333334, 102.84076148211965 ], "wc_limitations_avg": [ 31.0, 39.59797974644666 ], "wc_review_avg": [ 644.0, 299.764351894395 ], "wc_reply_reviewers_avg": [ 96.66666666666667, 73.77593705869745 ], "wc_reply_authors_avg": [ 254.33333333333334, 254.47243902281875 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:6S_WJjkF8_YJ:scholar.google.com/&scioq=Achieving+Precise+Control+with+Slow+Hardware:+Model-Based+Reinforcement+Learning+for+Action+Sequence+Learning&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "University of Massachusetts Amherst", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.umass.edu", "aff_unique_abbr": "UMass Amherst", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Amherst", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "ARC: A Generalist Graph Anomaly Detector with In-Context Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95760", "id": "IdIVfzjPK4", "proceeding": "", "pdf": "https://openreview.net/pdf?id=IdIVfzjPK4", "openreview": "https://openreview.net/forum?id=IdIVfzjPK4", "poster": "/media/PosterPDFs/NeurIPS%202024/95760.png?t=1730440461.6498373", "project": "", "author_site": "Yixin Liu, Shiyuan Li, Yu Zheng, Qingfeng Chen, Chengqi Zhang, Shirui Pan", "tldr": "", "abstract": "Graph anomaly detection (GAD), which aims to identify abnormal nodes that differ from the majority within a graph, has garnered significant attention. However, current GAD methods necessitate training specific to each dataset, resulting in high training costs, substantial data requirements, and limited generalizability when being applied to new datasets and domains. To address these limitations, this paper proposes ARC, a generalist GAD approach that enables a ``one-for-all'' GAD model to detect anomalies across various graph datasets on-the-fly. Equipped with in-context learning, ARC can directly extract dataset-specific patterns from the target dataset using few-shot normal samples at the inference stage, without the need for retraining or fine-tuning on the target dataset. ARC comprises three components that are well-crafted for capturing universal graph anomaly patterns: 1) smoothness-based feature **A**lignment module that unifies the features of different datasets into a common and anomaly-sensitive space; 2) ego-neighbor **R**esidual graph encoder that learns abnormality-related node embeddings; and 3) cross-attentive in-**C**ontext anomaly scoring module that predicts node abnormality by leveraging few-shot normal samples. Extensive experiments on multiple benchmark datasets from various domains demonstrate the superior anomaly detection performance, efficiency, and generalizability of ARC.", "keywords": "Graph Anomaly Detection;Graph Neural Networks;In-Context Learning;Artificial general intelligence", "primary_area": "graph_neural_networks", "supplementary_material": "/attachment/875a78bfe37a594634553a6d1d3006995c4ea140.zip", "author": "Yixin Liu;Shiyuan Li;Yu Zheng;Qingfeng Chen;Chengqi Zhang;Shirui Pan", "authorids": "~Yixin_Liu3;~Shiyuan_Li1;~Yu_Zheng5;~Qingfeng_Chen1;~Chengqi_Zhang1;~Shirui_Pan1", "gender": "M;M;F;M;M;", "homepage": "https://yixinliu233.giuhub.io;https://shiy-li.github.io;;;https://research.polyu.edu.hk/en/persons/chengqi-zhang;", "dblp": "140/7348-1;;;62/1656;71/964;91/8171", "google_scholar": ";MEGrkOEAAAAJ;https://scholar.google.com.au/citations?user=j4pGvBgAAAAJ;;https://scholar.google.com.au/citations?user=B6lBmqEAAAAJ;https://scholar.google.com.au/citations?user=frWRJN4AAAAJ", "orcid": ";0000-0002-4381-7497;;0000-0002-5506-8913;0000-0001-5715-7154;0000-0003-0794-527X", "linkedin": ";;;;chengqi-zhang-55aa8910/;", "or_profile": "~Yixin_Liu3;~Shiyuan_Li1;~Yu_Zheng5;~Qingfeng_Chen1;~Chengqi_Zhang1;~Shirui_Pan1", "aff": "Monash University;Guangxi University;Latrobe University;Guangxi University;University of Technology Sydney;Griffith University", "aff_domain": "monash.edu;gxu.edu.cn;latrobe.edu.au;gxu.edu.cn;uts.edu.au;griffith.edu.au", "position": "PhD student;MS student;PhD student;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nliu2024arc,\ntitle={{ARC}: A Generalist Graph Anomaly Detector with In-Context Learning},\nauthor={Yixin Liu and Shiyuan Li and Yu Zheng and Qingfeng Chen and Chengqi Zhang and Shirui Pan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=IdIVfzjPK4}\n}", "github": "", "reviewers": "HCAf;RwhU;B6wn;VWTk;bLtA", "pdf_size": 1440941, "rating": "4;4;7;7;8", "confidence": "4;4;5;4;4", "soundness": "2;3;4;4;3", "novelty": "2;3;4;3;3", "presentation": "3;3;3;3;3", "wc_summary": "32;73;64;43;90", "wc_strengths": "33;55;95;62;137", "wc_weaknesses": "306;119;74;72;87", "wc_questions": "247;4;44;38;39", "wc_limitations": "6;1;19;7;18", "wc_review": "624;252;296;222;371", "wc_reply_reviewers": "340;0;23;17;26", "wc_reply_authors": "1259;52;0;0;0", "reply_reviewers": "2;0;1;1;1", "reply_authors": "4;2;1;1;1", "rating_avg": [ 6.0, 1.6733200530681511 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 60.4, 20.771133815947554 ], "wc_strengths_avg": [ 76.4, 36.24141277599426 ], "wc_weaknesses_avg": [ 131.6, 88.80675649971683 ], "wc_questions_avg": [ 74.4, 87.46336375877617 ], "wc_limitations_avg": [ 10.2, 7.0823724838503095 ], "wc_review_avg": [ 353.0, 144.48252489488132 ], "wc_reply_reviewers_avg": [ 81.2, 129.7126054013256 ], "wc_reply_authors_avg": [ 262.2, 498.80673612131585 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.8, 1.1661903789690604 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.29880715233359845, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14781969789065373892&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "monash.edu;gxu.edu.cn;latrobe.edu.au;gxu.edu.cn;uts.edu.au;griffith.edu.au", "author_num": 6, "aff_unique_index": "0;1;2;1;3;4", "aff_unique_norm": "Monash University;Guangxi University;La Trobe University;University of Technology Sydney;Griffith University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.monash.edu;http://www.gxu.edu.cn;https://www.latrobe.edu.au;https://www.uts.edu.au;https://www.griffith.edu.au", "aff_unique_abbr": "Monash;GXU;LaTrobe;UTS;Griffith", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1;0;0", "aff_country_unique": "Australia;China" }, { "title": "DASH: Warm-Starting Neural Network Training in Stationary Settings without Loss of Plasticity", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95759", "id": "IdQuUYMA1t", "proceeding": "", "pdf": "https://openreview.net/pdf?id=IdQuUYMA1t", "openreview": "https://openreview.net/forum?id=IdQuUYMA1t", "poster": "", "project": "", "author_site": "Baekrok Shin, Junsoo Oh, Hanseul Cho, Chulhee Yun", "tldr": "", "abstract": "Warm-starting neural network training by initializing networks with previously learned weights is appealing, as practical neural networks are often deployed under a continuous influx of new data. However, it often leads to *loss of plasticity*, where the network loses its ability to learn new information, resulting in worse generalization than training from scratch. This occurs even under stationary data distributions, and its underlying mechanism is poorly understood. We develop a framework emulating real-world neural network training and identify noise memorization as the primary cause of plasticity loss when warm-starting on stationary data. Motivated by this, we propose **Direction-Aware SHrinking (DASH)**, a method aiming to mitigate plasticity loss by selectively forgetting memorized noise while preserving learned features. We validate our approach on vision tasks, demonstrating improvements in test accuracy and training efficiency.", "keywords": "loss of plasticity;warm-starting;incremental learning;generalization;Direction-Aware SHrinking", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Baekrok Shin;Junsoo Oh;Hanseul Cho;Chulhee Yun", "authorids": "~Baekrok_Shin1;~Junsoo_Oh1;~Hanseul_Cho1;~Chulhee_Yun1", "gender": "M;M;M;M", "homepage": ";https://junsoo424.github.io/;https://hanseuljo.github.io/;https://chulheeyun.github.io/", "dblp": ";;233/5755-2;138/0148.html", "google_scholar": "https://scholar.google.co.kr/citations?hl=ko;;IczOXwsAAAAJ;Ukl64ggAAAAJ", "orcid": ";;0009-0001-0410-0290;", "linkedin": "baekrok-shin-803a10336;;hanseul-cho-66b01a260/;", "or_profile": "~Baekrok_Shin1;~Junsoo_Oh1;~Hanseul_Cho1;~Chulhee_Yun1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.edu;kaist.ac.kr;kaist.ac.kr", "position": "MS student;MS student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nshin2024dash,\ntitle={{DASH}: Warm-Starting Neural Network Training in Stationary Settings without Loss of Plasticity},\nauthor={Baekrok Shin and Junsoo Oh and Hanseul Cho and Chulhee Yun},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=IdQuUYMA1t}\n}", "github": "", "reviewers": "rFKW;t732;hPM9;LBum", "pdf_size": 7564440, "rating": "5;5;6;7", "confidence": "3;2;4;3", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;2", "wc_summary": "127;129;37;151", "wc_strengths": "141;25;84;112", "wc_weaknesses": "247;165;254;183", "wc_questions": "142;63;217;83", "wc_limitations": "92;45;57;30", "wc_review": "749;427;649;559", "wc_reply_reviewers": "215;70;426;104", "wc_reply_authors": "335;275;1233;100", "reply_reviewers": "1;1;2;1", "reply_authors": "3;3;5;3", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 111.0, 43.749285708454714 ], "wc_strengths_avg": [ 90.5, 42.85148772213166 ], "wc_weaknesses_avg": [ 212.25, 38.854697270729055 ], "wc_questions_avg": [ 126.25, 59.90565499182861 ], "wc_limitations_avg": [ 56.0, 22.880122377295102 ], "wc_review_avg": [ 596.0, 118.47784603038663 ], "wc_reply_reviewers_avg": [ 203.75, 139.06900265695444 ], "wc_reply_authors_avg": [ 485.75, 439.9791898487927 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.5, 0.8660254037844386 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.42640143271122083, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15689843960684948749&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "kaist.ac.kr;kaist.edu;kaist.ac.kr;kaist.ac.kr", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Teach Better or Show Smarter? On Instructions and Exemplars in Automatic Prompt Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95758", "id": "IdtoJVWVnX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=IdtoJVWVnX", "openreview": "https://openreview.net/forum?id=IdtoJVWVnX", "poster": "/media/PosterPDFs/NeurIPS%202024/95758.png?t=1733844261.2669966", "project": "", "author_site": "Xingchen Wan, Ruoxi Sun, Hootan Nakhost, Sercan Arik", "tldr": "", "abstract": "Large language models have demonstrated remarkable capabilities but their performance is heavily reliant on effective prompt engineering. Automatic prompt optimization (APO) methods are designed to automate this and can be broadly categorized into those targeting instructions (instruction optimization, IO) vs. those targeting exemplars (exemplar optimization, EO). Despite their shared objective, these have evolved rather independently, with IO receiving more research attention recently. This paper seeks to bridge this gap by comprehensively comparing the performance of representative IO and EO techniques both isolation and combination on a diverse set of challenging tasks. Our findings reveal that intelligently reusing model-generated input-output pairs obtained from evaluating prompts on the validation set as exemplars, consistently improves performance on top of IO methods but is currently under-investigated. We also find that despite the recent focus on IO, how we select exemplars can outweigh how we optimize instructions, with EO strategies as simple as random search outperforming state-of-the-art IO methods with seed instructions without any optimization. Moreover, we observe a synergy between EO and IO, with optimal combinations surpassing the individual contributions. We conclude that studying exemplar optimization both as a standalone method and its optimal combination with instruction optimization remain a crucial aspect of APO and deserve greater consideration in future research, even in the era of highly capable instruction-following models.", "keywords": "instruction optimization;prompting;in-context learning", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Xingchen Wan;Ruoxi Sun;Hootan Nakhost;Sercan O Arik", "authorids": "~Xingchen_Wan1;~Ruoxi_Sun2;~Hootan_Nakhost1;~Sercan_O_Arik1", "gender": "M;F;M;M", "homepage": "https://xingchen.one;;;https://www.sercanarik.com/", "dblp": "255/7214;72/7683;56/5649;", "google_scholar": "6KkohssAAAAJ;ut1-7LAAAAAJ;https://scholar.google.ca/citations?user=Bk-fDi0AAAAJ;", "orcid": "0000-0003-0074-0597;;;0000-0001-6333-1729", "linkedin": ";;;", "or_profile": "~Xingchen_Wan1;~Ruoxi_Sun2;~Hootan_Nakhost1;~Sercan_O_Arik1", "aff": "Google;Google;;Google", "aff_domain": "google.com;google.com;;google.com", "position": "Research Scientist;Google;;Research Scientist", "bibtex": "@inproceedings{\nwan2024teach,\ntitle={Teach Better or Show Smarter? On Instructions and Exemplars in Automatic Prompt Optimization},\nauthor={Xingchen Wan and Ruoxi Sun and Hootan Nakhost and Sercan O Arik},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=IdtoJVWVnX}\n}", "github": "", "reviewers": "dxC3;uqie;Hw9e;1T7N;1DvC;JLax", "pdf_size": 3305638, "rating": "5;5;5;6;7;7", "confidence": "4;3;3;5;3;4", "soundness": "2;2;3;2;4;3", "novelty": "2;3;2;3;3;3", "presentation": "3;3;4;3;4;4", "wc_summary": "70;12;115;50;67;65", "wc_strengths": "40;100;77;48;129;101", "wc_weaknesses": "319;227;97;259;73;230", "wc_questions": "67;79;2;82;26;142", "wc_limitations": "1;8;40;19;5;1", "wc_review": "497;426;331;458;300;539", "wc_reply_reviewers": "77;0;47;28;20;16", "wc_reply_authors": "1600;624;69;909;31;28", "reply_reviewers": "1;0;1;1;1;1", "reply_authors": "4;3;2;4;2;2", "rating_avg": [ 5.833333333333333, 0.8975274678557507 ], "confidence_avg": [ 3.6666666666666665, 0.7453559924999298 ], "soundness_avg": [ 2.6666666666666665, 0.7453559924999299 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 63.166666666666664, 30.39416976256393 ], "wc_strengths_avg": [ 82.5, 31.191612120354836 ], "wc_weaknesses_avg": [ 200.83333333333334, 87.56410350264669 ], "wc_questions_avg": [ 66.33333333333333, 44.52964056545807 ], "wc_limitations_avg": [ 12.333333333333334, 13.780017739062925 ], "wc_review_avg": [ 425.1666666666667, 85.37254958253398 ], "wc_reply_reviewers_avg": [ 31.333333333333332, 24.790231050870737 ], "wc_reply_authors_avg": [ 543.5, 578.7644166670926 ], "reply_reviewers_avg": [ 0.8333333333333334, 0.372677996249965 ], "reply_authors_avg": [ 2.8333333333333335, 0.8975274678557507 ], "replies_avg": [ 31, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.16609095970748, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4381118379003535144&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "google.com;google.com;;google.com", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Dynamic Rescaling for Training GNNs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95757", "id": "IfZwSRpqHl", "proceeding": "", "pdf": "https://openreview.net/pdf?id=IfZwSRpqHl", "openreview": "https://openreview.net/forum?id=IfZwSRpqHl", "poster": "/media/PosterPDFs/NeurIPS%202024/95757.png?t=1731444481.9828444", "project": "", "author_site": "Nimrah Mustafa, Rebekka Burkholz", "tldr": "", "abstract": "Graph neural networks (GNNs) with a rescale invariance, such as GATs, can be re-parameterized during optimization through dynamic rescaling of network parameters and gradients while keeping the loss invariant. In this work, we explore dynamic rescaling as a tool to influence GNN training dynamics in two key ways: i) balancing the network with respect to various criteria, and ii) controlling the relative learning speeds of different layers. We gain novel insights, unique to GNNs, that reveal distinct training modes for different tasks. For heterophilic graphs, achieving balance based on relative gradients leads to faster training and better generalization. In contrast, homophilic graphs benefit from delaying the learning of later layers. Additionally, we show that training in balance supports larger learning rates, which can improve generalization. Moreover, controlling layer-wise training speeds is linked to grokking-like phenomena, which may be of independent interest.", "keywords": "graph neural network;rescale invariance;generalization;network balance", "primary_area": "graph_neural_networks", "supplementary_material": "/attachment/16808d4d8045160faaa252ac2bc12ac84ca05d4e.zip", "author": "Nimrah Mustafa;Rebekka Burkholz", "authorids": "~Nimrah_Mustafa1;~Rebekka_Burkholz1", "gender": "F;F", "homepage": "https://cispa.de/en/people/c01nimu;https://sites.google.com/view/rebekkaburkholz/startseite", "dblp": ";194/3172", "google_scholar": ";https://scholar.google.ch/citations?user=vkWBb2wAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Nimrah_Mustafa1;~Rebekka_Burkholz1", "aff": "CISPA, saarland university, saarland informatics campus;Helmholtz Center CISPA for Information Security", "aff_domain": "cispa.saarland;cispa.saarland", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nmustafa2024dynamic,\ntitle={Dynamic Rescaling for Training {GNN}s},\nauthor={Nimrah Mustafa and Rebekka Burkholz},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=IfZwSRpqHl}\n}", "github": "", "reviewers": "TkPU;uEaK;GhMV;2KpP", "pdf_size": 1516134, "rating": "3;4;6;7", "confidence": "3;4;4;3", "soundness": "3;2;3;3", "novelty": "2;2;3;3", "presentation": "1;2;3;3", "wc_summary": "245;49;100;134", "wc_strengths": "40;38;63;261", "wc_weaknesses": "111;90;102;207", "wc_questions": "151;3;99;6", "wc_limitations": "16;3;23;21", "wc_review": "563;183;387;629", "wc_reply_reviewers": "118;0;27;514", "wc_reply_authors": "602;34;0;1121", "reply_reviewers": "1;0;1;3", "reply_authors": "2;2;1;4", "rating_avg": [ 5.0, 1.5811388300841898 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 132.0, 71.91314205345223 ], "wc_strengths_avg": [ 100.5, 93.18395784683112 ], "wc_weaknesses_avg": [ 127.5, 46.5 ], "wc_questions_avg": [ 64.75, 63.00148807766369 ], "wc_limitations_avg": [ 15.75, 7.790218225441442 ], "wc_review_avg": [ 440.5, 172.99349698760355 ], "wc_reply_reviewers_avg": [ 164.75, 206.32422906677732 ], "wc_reply_authors_avg": [ 439.25, 460.5536749391975 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:zAz5JrKmrLYJ:scholar.google.com/&scioq=Dynamic+Rescaling+for+Training+GNNs&hl=en&as_sdt=0,44", "gs_version_total": 0, "email": "cispa.saarland;cispa.saarland", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Saarland University;Helmholtz Center CISPA", "aff_unique_dep": "CISPA;Information Security", "aff_unique_url": "https://www.uni-saarland.de;https://www.cispa.de/", "aff_unique_abbr": "Saarland U;CISPA", "aff_campus_unique_index": "0", "aff_campus_unique": "Saarland Informatics Campus;", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "id": "IfpNsorodK", "title": "PFDiff: Training-free Acceleration of Diffusion Models through the Gradient Guidance of Past and Future", "track": "main", "status": "Reject", "tldr": "", "abstract": "Diffusion Probabilistic Models (DPMs) have shown remarkable potential in image generation, but their sampling efficiency is hindered by the need for numerous denoising steps. Most existing solutions accelerate the sampling process by proposing fast ODE solvers. However, the inevitable discretization errors of the ODE solvers are significantly magnified when the number of function evaluations (NFE) is fewer. In this work, we propose PFDiff, a novel training-free and orthogonal timestep-skipping strategy, which enables existing fast ODE solvers to operate with fewer NFE. Based on two key observations: a significant similarity in the model's outputs at time step size that is not excessively large during the denoising process of existing ODE solvers, and a high resemblance between the denoising process and SGD. PFDiff, by employing gradient replacement from past time steps and foresight updates inspired by Nesterov momentum, rapidly updates intermediate states, thereby reducing unnecessary NFE while correcting for discretization errors inherent in first-order ODE solvers. Experimental results demonstrate that PFDiff exhibits flexible applicability across various pre-trained DPMs, particularly excelling in conditional DPMs and surpassing previous state-of-the-art training-free methods. For instance, using DDIM as a baseline, we achieved 16.46 FID (4 NFE) compared to 138.81 FID with DDIM on ImageNet 64x64 with classifier guidance, and 13.06 FID (10 NFE) on Stable Diffusion with 7.5 guidance scale.", "keywords": "diffusion models;accelerated sampling;training-free sampler;orthogonal sampling method", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/93be98c3fe5c3ee530639bce39f67508d4924fb1.zip", "author": "Guangyi Wang;Yuren Cai;lijiang Li;Wei Peng;Song-Zhi Su", "authorids": "~Guangyi_Wang1;~Yuren_Cai1;~lijiang_Li1;~Wei_Peng4;~Song-Zhi_Su1", "gender": "M;M;;M;M", "homepage": "https://github.com/onefly123;https://github.com/molisi;https://github.com/lilijiangg;https://xiaoiker.github.io/;https://imt.xmu.edu.cn/info/1004/1183.htm", "dblp": ";;310/1558;16/5560-9.html;36/7815", "google_scholar": ";;;TDFM0QYAAAAJ;", "orcid": ";;;0000-0002-2892-5764;", "linkedin": ";;;;", "or_profile": "~Guangyi_Wang1;~Yuren_Cai1;~lijiang_Li1;~Wei_Peng4;~Song-Zhi_Su1", "aff": "Xiamen University;Xiamen University;Microsoft;Stanford University;Xiamen University", "aff_domain": "xmu.edu.cn;xmu.edu.cn;microsoft.com;stanford.edu;xmu.edu.cn", "position": "MS student;MS student;Intern;Postdoc;Associate Professor", "bibtex": "@misc{\nanonymous2024pfdiff,\ntitle={{PFD}iff: Training-free Acceleration of Diffusion Models through the Gradient Guidance of Past and Future},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=IfpNsorodK}\n}", "github": "", "project": "", "reviewers": "FCeN;7w4h;sKuL;NAJn;GukH", "site": "https://openreview.net/forum?id=IfpNsorodK", "pdf_size": 4444452, "rating": "4;4;5;6;6", "confidence": "3;4;5;4;3", "soundness": "3;2;3;3;3", "novelty": "2;1;2;4;3", "presentation": "4;2;2;2;3", "wc_summary": "63;78;52;60;64", "wc_strengths": "49;29;30;56;47", "wc_weaknesses": "62;317;159;126;96", "wc_questions": "55;3;41;18;33", "wc_limitations": "18;1;45;1;10", "wc_review": "247;428;327;261;250", "wc_reply_reviewers": "0;27;197;25;15", "wc_reply_authors": "0;28;471;18;34", "reply_reviewers": "0;1;2;1;1", "reply_authors": "1;2;3;2;2", "rating_avg": [ 5.0, 0.8944271909999159 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 1.019803902718557 ], "presentation_avg": [ 2.6, 0.8 ], "wc_summary_avg": [ 63.4, 8.42852300228219 ], "wc_strengths_avg": [ 42.2, 10.796295661012623 ], "wc_weaknesses_avg": [ 152.0, 88.52796168443052 ], "wc_questions_avg": [ 30.0, 18.04438970982394 ], "wc_limitations_avg": [ 15.0, 16.284962388657824 ], "wc_review_avg": [ 302.6, 69.1508495970946 ], "wc_reply_reviewers_avg": [ 52.8, 72.73073628116245 ], "wc_reply_authors_avg": [ 110.2, 180.76769622916592 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:2Raqmut5XbMJ:scholar.google.com/&scioq=PFDiff:+Training-free+Acceleration+of+Diffusion+Models+through+the+Gradient+Guidance+of+Past+and+Future&hl=en&as_sdt=0,5", "gs_version_total": 4, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Xiamen University;Microsoft;Stanford University", "aff_unique_dep": ";Microsoft Corporation;", "aff_unique_url": "https://www.xmu.edu.cn;https://www.microsoft.com;https://www.stanford.edu", "aff_unique_abbr": "XMU;Microsoft;Stanford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;1;1;0", "aff_country_unique": "China;United States" }, { "title": "Contrastive dimension reduction: when and how?", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95756", "id": "IgU8gMKy4D", "proceeding": "", "pdf": "https://openreview.net/pdf?id=IgU8gMKy4D", "openreview": "https://openreview.net/forum?id=IgU8gMKy4D", "poster": "/media/PosterPDFs/NeurIPS%202024/95756.png?t=1733835054.1889036", "project": "", "author_site": "Sam Hawke, YueEn Ma, Didong Li", "tldr": "", "abstract": "Dimension reduction (DR) is an important and widely studied technique in exploratory data analysis. However, traditional DR methods are not applicable to datasets with with a contrastive structure, where data are split into a foreground group of interest (case or treatment group), and a background group (control group). This type of data, common in biomedical studies, necessitates contrastive dimension reduction (CDR) methods to effectively capture information unique to or enriched in the foreground group relative to the background group. Despite the development of various CDR methods, two critical questions remain underexplored: when should these methods be applied, and how can the information unique to the foreground group be quantified? In this work, we address these gaps by proposing a hypothesis test to determine the existence of contrastive information, and introducing a contrastive dimension estimator (CDE) to quantify the unique components in the foreground group. We provide theoretical support for our methods and validate their effectiveness through extensive simulated, semi-simulated, and real experiments involving images, gene expressions, protein expressions, and medical sensors, demonstrating their ability to identify the unique information in the foreground group.", "keywords": "Contrastive methods;dimension estimation;case-control data;dimension reduction", "primary_area": "evaluation", "supplementary_material": "", "author": "Sam Hawke;Yueen Ma;Didong Li", "authorids": "~Sam_Hawke1;~Yueen_Ma2;~Didong_Li1", "gender": "M;F;", "homepage": ";;https://sites.google.com/view/didongli/", "dblp": ";;211/6299", "google_scholar": ";;YBVhMxoAAAAJ", "orcid": ";;0000-0001-9146-705X", "linkedin": "sam-hawke-b69283160/;https://linkedin.com/in/yueen-ma;", "or_profile": "~Sam_Hawke1;~Yueen_Ma2;~Didong_Li1", "aff": "University of North Carolina at Chapel Hill;University of North Carolina at Chapel Hill;University of North Carolina at Chapel Hill", "aff_domain": "unc.edu;ad.unc.edu;unc.edu", "position": "PhD student;Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\nhawke2024contrastive,\ntitle={Contrastive dimension reduction: when and how?},\nauthor={Sam Hawke and Yueen Ma and Didong Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=IgU8gMKy4D}\n}", "github": "", "reviewers": "xiqq;e5iP;Bh4k;Casz", "pdf_size": 2140302, "rating": "3;4;7;7", "confidence": "2;4;4;4", "soundness": "2;2;3;3", "novelty": "2;2;3;4", "presentation": "2;3;3;3", "wc_summary": "95;41;122;127", "wc_strengths": "35;27;68;58", "wc_weaknesses": "200;458;61;90", "wc_questions": "65;3;203;160", "wc_limitations": "1;4;1;41", "wc_review": "396;533;455;476", "wc_reply_reviewers": "49;18;28;9", "wc_reply_authors": "421;41;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 5.25, 1.7853571071357126 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 96.25, 34.14216601213227 ], "wc_strengths_avg": [ 47.0, 16.62828914831589 ], "wc_weaknesses_avg": [ 202.25, 156.4966053944941 ], "wc_questions_avg": [ 107.75, 78.42631892419789 ], "wc_limitations_avg": [ 11.75, 16.931848688197046 ], "wc_review_avg": [ 465.0, 49.005101775223366 ], "wc_reply_reviewers_avg": [ 26.0, 14.882876066137216 ], "wc_reply_authors_avg": [ 115.5, 177.1729381141488 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.7276068751089989, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:SrTEvvN5cp8J:scholar.google.com/&scioq=Contrastive+dimension+reduction:+when+and+how%3F&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": "unc.edu;ad.unc.edu;unc.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of North Carolina", "aff_unique_dep": "", "aff_unique_url": "https://www.unc.edu", "aff_unique_abbr": "UNC", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Chapel Hill", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "GAMap: Zero-Shot Object Goal Navigation with Multi-Scale Geometric-Affordance Guidance", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95755", "id": "IjHrALdQNP", "proceeding": "", "pdf": "https://openreview.net/pdf?id=IjHrALdQNP", "openreview": "https://openreview.net/forum?id=IjHrALdQNP", "poster": "/media/PosterPDFs/NeurIPS%202024/95755.png?t=1731609177.305228", "project": "", "author_site": "shuaihang yuan, Hao Huang, Yu Hao, Congcong Wen, Anthony Tzes, Yi Fang", "tldr": "", "abstract": "Zero-Shot Object Goal Navigation (ZS-OGN) enables robots to navigate toward objects of unseen categories without prior training. Traditional approaches often leverage categorical semantic information for navigation guidance, which struggles when only partial objects are observed or detailed and functional representations of the environment are lacking. To resolve the above two issues, we propose \\textit{Geometric-part and Affordance Maps} (GAMap), a novel method that integrates object parts and affordance attributes for navigation guidance. Our method includes a multi-scale scoring approach to capture geometric-part and affordance attributes of objects at different scales. Comprehensive experiments conducted on the HM3D and Gibson benchmark datasets demonstrate improvements in Success Rates and Success weighted by Path Length, underscoring the efficacy of our geometric-part and affordance-guided navigation approach in enhancing robot autonomy and versatility, without any additional task-specific training or fine-tuning with the semantics of unseen objects and/or the locomotions of the robot.", "keywords": "zero-shot object goal navigation", "primary_area": "robotics", "supplementary_material": "", "author": "Shuaihang Yuan;Hao Huang;Yu Hao;Congcong Wen;Anthony Tzes;Yi Fang", "authorids": "~Shuaihang_Yuan1;~Hao_Huang3;~Yu_Hao1;~Congcong_Wen1;~Anthony_Tzes1;~Yi_Fang2", "gender": "M;M;M;;M;M", "homepage": ";https://nyuair.github.io/website/;;http://www.wencc.xyz/;https://nyuad.nyu.edu/en/academics/divisions/engineering/faculty/anthony-tzes.html;http://mmvc.engineering.nyu.edu/", "dblp": "257/3707;04/5616-3;33/32703;218/4638;11/637;96/361-6", "google_scholar": "s2YA4rEAAAAJ;SsZ7BooAAAAJ;;https://scholar.google.com.hk/citations?user=OTBgvCYAAAAJ;https://scholar.google.gr/citations?user=d0ZJVd4AAAAJ;j-cyhzwAAAAJ", "orcid": ";0000-0002-9131-5854;;0000-0001-6448-003X;0000-0003-3709-2810;", "linkedin": ";;;;;", "or_profile": "~Shuaihang_Yuan1;~Hao_Huang3;~Yu_Hao1;~Congcong_Wen1;~Anthony_Tzes1;~Yi_Fang2", "aff": "New York University;New York University;New York University;New York University;New York University;New York University", "aff_domain": "nyu.edu;nyu.edu;nyu.edu;nyu.edu;nyu.edu;nyu.edu", "position": "Postdoc;Postdoc;PhD student;Postdoc;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nyuan2024gamap,\ntitle={{GAM}ap: Zero-Shot Object Goal Navigation with Multi-Scale Geometric-Affordance Guidance},\nauthor={Shuaihang Yuan and Hao Huang and Yu Hao and Congcong Wen and Anthony Tzes and Yi Fang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=IjHrALdQNP}\n}", "github": "", "reviewers": "fGDQ;suJv;13Wq;KUEn", "pdf_size": 9306158, "rating": "4;5;6;6", "confidence": "3;3;5;4", "soundness": "2;3;3;2", "novelty": "2;2;2;2", "presentation": "2;3;3;3", "wc_summary": "78;137;86;73", "wc_strengths": "52;22;66;32", "wc_weaknesses": "109;133;128;274", "wc_questions": "30;97;74;50", "wc_limitations": "8;9;114;6", "wc_review": "277;398;468;435", "wc_reply_reviewers": "55;27;27;265", "wc_reply_authors": "303;0;0;329", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;2", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 93.5, 25.53918557824427 ], "wc_strengths_avg": [ 43.0, 17.11724276862369 ], "wc_weaknesses_avg": [ 161.0, 65.85210702779372 ], "wc_questions_avg": [ 62.75, 25.17315037892556 ], "wc_limitations_avg": [ 34.25, 46.05635135353212 ], "wc_review_avg": [ 394.5, 72.21668782213706 ], "wc_reply_reviewers_avg": [ 93.5, 99.673216061287 ], "wc_reply_authors_avg": [ 158.0, 158.26717916232664 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.8181818181818182, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3531673764466026023&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "nyu.edu;nyu.edu;nyu.edu;nyu.edu;nyu.edu;nyu.edu", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "New York University", "aff_unique_dep": "", "aff_unique_url": "https://www.nyu.edu", "aff_unique_abbr": "NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "$\\texttt{dattri}$: A Library for Efficient Data Attribution", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97763", "id": "IkA54A6KKe", "proceeding": "", "pdf": "https://openreview.net/pdf?id=IkA54A6KKe", "openreview": "https://openreview.net/forum?id=IkA54A6KKe", "poster": "", "project": "", "author_site": "Junwei Deng, Ting-Wei Li, Shiyuan Zhang, Shixuan Liu, Yijun Pan, Hao Huang, Xinhe Wang, Pingbang Hu, Xingjian Zhang, Jiaqi Ma", "tldr": "", "abstract": "Data attribution methods aim to quantify the influence of individual training samples on the prediction of artificial intelligence (AI) models. As training data plays an increasingly crucial role in the modern development of large-scale AI models, data attribution has found broad applications in improving AI performance and safety. However, despite a surge of new data attribution methods being developed recently, there lacks a comprehensive library that facilitates the development, benchmarking, and deployment of different data attribution methods. In this work, we introduce $\\texttt{dattri}$, an open-source data attribution library that addresses the above needs. Specifically, $\\texttt{dattri}$ highlights three novel design features. Firstly, $\\texttt{dattri}$ proposes a unified and easy-to-use API, allowing users to integrate different data attribution methods into their PyTorch-based machine learning pipeline with a few lines of code changed. Secondly, $\\texttt{dattri}$ modularizes low-level utility functions that are commonly used in data attribution methods, such as Hessian-vector product, inverse-Hessian-vector product or random projection, making it easier for researchers to develop new data attribution methods. Thirdly, $\\texttt{dattri}$ provides a comprehensive benchmark framework with pre-trained models and ground truth annotations for a variety of benchmark settings, including generative AI settings. We have implemented a variety of state-of-the-art efficient data attribution methods that can be applied to large-scale neural network models, and will continuously update the library in the future. Using the developed $\\texttt{dattri}$ library, we are able to perform a comprehensive and fair benchmark analysis across a wide range of data attribution methods. The source code of $\\texttt{dattri}$ is available at https://github.com/TRAIS-Lab/dattri.", "keywords": "data attribution;data-centric AI;benchmark", "primary_area": "", "supplementary_material": "", "author": "Junwei Deng;Ting Wei Li;Shiyuan Zhang;Shixuan Liu;Yijun Pan;Hao Huang;Xinhe Wang;Pingbang Hu;Xingjian Zhang;Jiaqi Ma", "authorids": "~Junwei_Deng1;~Ting_Wei_Li1;~Shiyuan_Zhang2;~Shixuan_Liu2;~Yijun_Pan2;~Hao_Huang15;~Xinhe_Wang1;~Pingbang_Hu1;~Xingjian_Zhang1;~Jiaqi_Ma1", "gender": "M;M;M;M;M;;M;M;M;", "homepage": "https://theaperdeng.github.io/;https://tingwl0122.github.io/;https://seanzh30.github.io;;https://charles-pyj.github.io/;;;https://pbb.wtf;https://xingjian-zhang.github.io;https://jiaqima.github.io", "dblp": "245/4347;336/2460;78/6377;;260/4003;;;331/2848;36/5276-2;155/2199-1", "google_scholar": "DagyP9QAAAAJ;ema4jhsAAAAJ;;;;;RzoX39oAAAAJ;lPlQpqoAAAAJ;https://scholar.google.com/citations?hl=en;Z9X2A1MAAAAJ", "orcid": ";;;0009-0009-9308-5878;;;;;0009-0001-0716-3449;0000-0001-8292-5901", "linkedin": ";ting-wei-li-3940ab241/;;shixuan-liu-164b40290;yijun-pan-1b83bb2aa/;https://www.linkedin.com/feed/?trk=hb_signin;xinhe-wang-8bb9442b5/;;;", "or_profile": "~Junwei_Deng1;~Ting_Wei_Li1;~Shiyuan_Zhang2;~Shixuan_Liu2;~Yijun_Pan2;~Hao_Huang15;~Xinhe_Wang1;~Pingbang_Hu1;~Xingjian_Zhang1;~Jiaqi_Ma1", "aff": "University of Illinois, Urbana Champaign;University of Michigan - Ann Arbor;University of Illinois, Urbana Champaign;University of Michigan - Ann Arbor;Shanghai Jiaotong University;University of Illinois, Urbana Champaign;Shanghai Jiaotong University;University of Illinois, Urbana Champaign;University of Michigan - Ann Arbor;University of Illinois Urbana-Champaign", "aff_domain": "illinois.edu;umich.edu;illinois.edu;umich.edu;sjtu.edu.cn;illinois.edu;sjtu.edu.cn;illinois.edu;umich.edu;illinois.edu", "position": "PhD student;Researcher;MS student;Undergrad student;Undergrad student;Undergrad student;Undergrad student;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\ndeng2024textttdattri,\ntitle={\\${\\textbackslash}texttt\\{dattri\\}\\$: A Library for Efficient Data Attribution},\nauthor={Junwei Deng and Ting Wei Li and Shiyuan Zhang and Shixuan Liu and Yijun Pan and Hao Huang and Xinhe Wang and Pingbang Hu and Xingjian Zhang and Jiaqi Ma},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=IkA54A6KKe}\n}", "github": "", "reviewers": "eHpZ;W7KE;Ffeh", "pdf_size": 571452, "rating": "6;7;7", "confidence": "2;4;3", "wc_summary_and_contributions": "51;65;57", "wc_strengths": "1;97;44", "wc_improvement": "13;113;205", "wc_limitations": "1;1;26", "wc_correctness": "1;1;31", "wc_clarity": "1;8;35", "wc_relation_to_prior_work": "1;7;30", "wc_documentation": "1;1;25", "wc_additional_feedback": "1;1;1", "wc_review": "71;294;454", "wc_reply_reviewers": "33;12;72", "wc_reply_authors": "331;10;44", "reply_reviewers": "1;1;1", "reply_authors": "3;2;3", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "wc_summary_and_contributions_avg": [ 57.666666666666664, 5.734883511361751 ], "wc_strengths_avg": [ 47.333333333333336, 39.2626483512709 ], "wc_improvement_avg": [ 110.33333333333333, 78.4063489492755 ], "wc_limitations_avg": [ 9.333333333333334, 11.785113019775793 ], "wc_correctness_avg": [ 11.0, 14.142135623730951 ], "wc_clarity_avg": [ 14.666666666666666, 14.659088951530682 ], "wc_relation_to_prior_work_avg": [ 12.666666666666666, 12.498888839501785 ], "wc_documentation_avg": [ 9.0, 11.313708498984761 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 273.0, 157.06262020820444 ], "wc_reply_reviewers_avg": [ 39.0, 24.859605789312106 ], "wc_reply_authors_avg": [ 128.33333333333334, 143.97762171794463 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "email": "illinois.edu;umich.edu;illinois.edu;umich.edu;sjtu.edu.cn;illinois.edu;sjtu.edu.cn;illinois.edu;umich.edu;illinois.edu", "author_num": 10, "aff_unique_index": "0;1;0;1;2;0;2;0;1;0", "aff_unique_norm": "University of Illinois Urbana-Champaign;University of Michigan;Shanghai Jiao Tong University", "aff_unique_dep": ";;", "aff_unique_url": "https://illinois.edu;https://www.umich.edu;https://www.sjtu.edu.cn", "aff_unique_abbr": "UIUC;UM;SJTU", "aff_campus_unique_index": "0;1;0;1;0;0;1;0", "aff_campus_unique": "Urbana-Champaign;Ann Arbor;", "aff_country_unique_index": "0;0;0;0;1;0;1;0;0;0", "aff_country_unique": "United States;China" }, { "title": "AMBROSIA: A Benchmark for Parsing Ambiguous Questions into Database Queries", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97762", "id": "IlFk5U9cEg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=IlFk5U9cEg", "openreview": "https://openreview.net/forum?id=IlFk5U9cEg", "poster": "/media/PosterPDFs/NeurIPS%202024/97762.png?t=1733407727.15077", "project": "", "author_site": "Irina Saparina, Mirella Lapata", "tldr": "", "abstract": "Practical semantic parsers are expected to understand user utterances and map them to executable programs, even when these are ambiguous. We introduce a new benchmark, AMBROSIA, which we hope will inform and inspire the development of text-to-SQL parsers capable of recognizing and interpreting ambiguous requests. Our dataset contains questions showcasing three different types of ambiguity (scope ambiguity, attachment ambiguity, and vagueness), their interpretations, and corresponding SQL queries. In each case, the ambiguity persists even when the database context is provided. This is achieved through a novel approach that involves controlled generation of databases from scratch. We benchmark various LLMs on AMBROSIA, revealing that even the most advanced models struggle to identify and interpret ambiguity in questions.", "keywords": "ambiguity;semantic parsing;text-to-SQL;databases;code generation;LLMs", "primary_area": "", "supplementary_material": "/attachment/dd67712a5a08c3e228300553488da7f0f42ceaec.zip", "author": "Irina Saparina;Mirella Lapata", "authorids": "~Irina_Saparina1;~Mirella_Lapata1", "gender": ";F", "homepage": ";https://homepages.inf.ed.ac.uk/mlap/", "dblp": ";59/6701", "google_scholar": ";j67B9Q4AAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Irina_Saparina1;~Mirella_Lapata1", "aff": ";Edinburgh University, University of Edinburgh", "aff_domain": ";inf.ed.ac.uk", "position": ";Full Professor", "bibtex": "@inproceedings{\nsaparina2024ambrosia,\ntitle={{AMBROSIA}: A Benchmark for Parsing Ambiguous Questions into Database Queries},\nauthor={Irina Saparina and Mirella Lapata},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=IlFk5U9cEg}\n}", "github": "", "reviewers": "zcDZ;HeH4;7sxp;1VY6", "pdf_size": 1624916, "rating": "7;7;7;9", "confidence": "4;5;4;3", "wc_summary_and_contributions": "69;85;69;28", "wc_strengths": "48;48;35;7", "wc_improvement": "391;70;99;10", "wc_limitations": "38;15;8;15", "wc_correctness": "1;4;21;9", "wc_clarity": "63;1;8;6", "wc_relation_to_prior_work": "1;30;1;6", "wc_documentation": "1;19;1;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "613;273;243;83", "wc_reply_reviewers": "247;30;11;16", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 62.75, 21.09946681790798 ], "wc_strengths_avg": [ 34.5, 16.740669042783207 ], "wc_improvement_avg": [ 142.5, 147.01785605837136 ], "wc_limitations_avg": [ 19.0, 11.335784048754634 ], "wc_correctness_avg": [ 8.75, 7.628073151196179 ], "wc_clarity_avg": [ 19.5, 25.243811122728676 ], "wc_relation_to_prior_work_avg": [ 9.5, 12.010412149464313 ], "wc_documentation_avg": [ 5.5, 7.794228634059948 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 303.0, 193.00259065618783 ], "wc_reply_reviewers_avg": [ 76.0, 98.97221832413376 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11975366841839963226&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": ";inf.ed.ac.uk", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "University of Edinburgh", "aff_unique_dep": "", "aff_unique_url": "https://www.ed.ac.uk", "aff_unique_abbr": "Edinburgh", "aff_country_unique_index": "0", "aff_country_unique": "United Kingdom" }, { "title": "LM-HT SNN: Enhancing the Performance of SNN to ANN Counterpart through Learnable Multi-hierarchical Threshold Model", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95754", "id": "IlIDNMvwmX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=IlIDNMvwmX", "openreview": "https://openreview.net/forum?id=IlIDNMvwmX", "poster": "/media/PosterPDFs/NeurIPS%202024/95754.png?t=1730816805.766468", "project": "", "author_site": "Zecheng Hao, Xinyu Shi, Yujia Liu, Zhaofei Yu, Tiejun Huang", "tldr": "", "abstract": "Compared to traditional Artificial Neural Network (ANN), Spiking Neural Network (SNN) has garnered widespread academic interest for its intrinsic ability to transmit information in a more energy-efficient manner. However, despite previous efforts to optimize the learning algorithm of SNNs through various methods, SNNs still lag behind ANNs in terms of performance. The recently proposed multi-threshold model provides more possibilities for further enhancing the learning capability of SNNs. In this paper, we rigorously analyze the relationship among the multi-threshold model, vanilla spiking model and quantized ANNs from a mathematical perspective, then propose a novel LM-HT model, which is an equidistant multi-threshold model that can dynamically regulate the global input current and membrane potential leakage on the time dimension. The LM-HT model can also be transformed into a vanilla single threshold model through reparameterization, thereby achieving more flexible hardware deployment. In addition, we note that the LM-HT model can seamlessly integrate with ANN-SNN Conversion framework under special initialization. This novel hybrid learning framework can effectively improve the relatively poor performance of converted SNNs under low time latency. Extensive experimental results have demonstrated that our model can outperform previous state-of-the-art works on various types of datasets, which promote SNNs to achieve a brand-new level of performance comparable to quantized ANNs. Code is available at https://github.com/hzc1208/LMHT_SNN.", "keywords": "Spiking Neural Networks;Learnable Multi-hierarchical Threshold Model;STBP Training", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "/attachment/8658cb0ecc143d4bb1111593aac7a8e0371be06e.zip", "author": "Zecheng Hao;Xinyu Shi;Yujia Liu;Zhaofei Yu;Tiejun Huang", "authorids": "~Zecheng_Hao1;~Xinyu_Shi2;~Yujia_Liu1;~Zhaofei_Yu1;~Tiejun_Huang1", "gender": ";;F;M;M", "homepage": "https://hzc1208.github.io/;;;https://yuzhaofei.github.io;https://idm.pku.edu.cn/~tjhuang/", "dblp": "339/6969;;42/10221.html;166/0573;h/TiejunHuang", "google_scholar": "txTkX7YAAAAJ;;iDyKEuwAAAAJ;qaUgD50AAAAJ;https://scholar.google.com.tw/citations?user=knvEK4AAAAAJ", "orcid": "0000-0001-9074-2857;;0000-0001-7356-3937;;0000-0002-4234-6099", "linkedin": ";;;;", "or_profile": "~Zecheng_Hao1;~Xinyu_Shi2;~Yujia_Liu1;~Zhaofei_Yu1;~Tiejun_Huang1", "aff": "Peking University;;Peking University;Peking University;Peking University", "aff_domain": "pku.edu.cn;;pku.edu.cn;pku.edu.cn;pku.edu.cn", "position": "PhD student;;Researcher;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nhao2024lmht,\ntitle={{LM}-{HT} {SNN}: Enhancing the Performance of {SNN} to {ANN} Counterpart through Learnable Multi-hierarchical Threshold Model},\nauthor={Zecheng Hao and Xinyu Shi and Yujia Liu and Zhaofei Yu and Tiejun Huang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=IlIDNMvwmX}\n}", "github": "", "reviewers": "zyi4;59n9;wxgA;JrEi", "pdf_size": 574144, "rating": "6;7;7;7", "confidence": "5;5;4;4", "soundness": "4;3;4;3", "novelty": "4;3;3;3", "presentation": "3;3;3;2", "wc_summary": "68;60;76;63", "wc_strengths": "54;56;30;44", "wc_weaknesses": "98;217;95;59", "wc_questions": "40;2;25;25", "wc_limitations": "1;6;64;16", "wc_review": "261;341;290;207", "wc_reply_reviewers": "0;51;21;10", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 66.75, 6.057020719792859 ], "wc_strengths_avg": [ 46.0, 10.295630140987 ], "wc_weaknesses_avg": [ 117.25, 59.60023070425147 ], "wc_questions_avg": [ 23.0, 13.583077707206124 ], "wc_limitations_avg": [ 21.75, 24.983744715314394 ], "wc_review_avg": [ 274.75, 48.478732450426136 ], "wc_reply_reviewers_avg": [ 20.5, 19.11151485361639 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5903829312473547987&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 3, "email": "pku.edu.cn;;pku.edu.cn;pku.edu.cn;pku.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "DMesh: A Differentiable Mesh Representation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95753", "id": "Io1qKqCVIK", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Io1qKqCVIK", "openreview": "https://openreview.net/forum?id=Io1qKqCVIK", "poster": "", "project": "", "author_site": "Sanghyun Son, Matheus Gadelha, Yang Zhou, Zexiang Xu, Ming Lin, Yi Zhou", "tldr": "", "abstract": "We present a differentiable representation, DMesh, for general 3D triangular meshes. DMesh considers both the geometry and connectivity information of a mesh. In our design, we first get a set of convex tetrahedra that compactly tessellates the domain based on Weighted Delaunay Triangulation (WDT), and select triangular faces on the tetrahedra to define the final mesh. We formulate probability of faces to exist on the actual surface in a differentiable manner based on the WDT. This enables DMesh to represent meshes of various topology in a differentiable way, and allows us to reconstruct the mesh under various observations, such as point clouds and multi-view images using gradient-based optimization. We publicize the source code and supplementary material at our project page (https://sonsang.github.io/dmesh-project).", "keywords": "Differentiable Mesh;3D Reconstruction", "primary_area": "machine_vision", "supplementary_material": "/attachment/e13adfe1bc9abb9f3251ab12db1a9fa9382360c7.zip", "author": "Sanghyun Son;Matheus Gadelha;Yang Zhou;Zexiang Xu;Ming Lin;Yi Zhou", "authorids": "~Sanghyun_Son3;~Matheus_Gadelha1;~Yang_Zhou10;~Zexiang_Xu1;~Ming_Lin2;~Yi_Zhou1", "gender": "M;M;M;M;F;F", "homepage": "https://sanghyun.phd.sh/;http://mgadelha.me/;https://yzhou359.github.io/;https://cseweb.ucsd.edu/~zex014/;http://www.cs.umd.edu/~lin;http://zhouyisjtu.github.io", "dblp": "68/6424-3;192/1466;07/4580-9;154/0366;l/MingCLin.html;01/1901-23", "google_scholar": "WzuMJR8AAAAJ;VhqmvXsAAAAJ;UuwugFEAAAAJ;_RRIYvEAAAAJ;ugFNit4AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;;0000-0003-3736-6949;0000-0002-2632-7664", "linkedin": ";;;;mlin2/;", "or_profile": "~Sanghyun_Son3;~Matheus_Gadelha1;~Yang_Zhou10;~Zexiang_Xu1;~Ming_Lin2;~Yi_Zhou1", "aff": "University of Maryland, College Park;Adobe Systems;Adobe Research;Adobe Research;Amazon;Adobe Systems", "aff_domain": "umd.edu;adobe.com;adobe.com;adobe.com;amazon.com;adobe.com", "position": "PhD student;Researcher;Research Scientist;Researcher;Amazon Scholar;Research Scientist", "bibtex": "@inproceedings{\nson2024dmesh,\ntitle={{DM}esh: A Differentiable Mesh Representation},\nauthor={Sanghyun Son and Matheus Gadelha and Yang Zhou and Zexiang Xu and Ming Lin and Yi Zhou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Io1qKqCVIK}\n}", "github": "", "reviewers": "jRqE;8dk4;cnRb;7cDg;yemP", "pdf_size": 34173830, "rating": "6;6;6;7;8", "confidence": "3;3;4;4;3", "soundness": "3;3;2;4;4", "novelty": "3;3;2;4;4", "presentation": "3;3;3;3;4", "wc_summary": "64;108;75;29;181", "wc_strengths": "85;92;45;56;177", "wc_weaknesses": "76;176;60;32;364", "wc_questions": "48;146;79;36;132", "wc_limitations": "4;14;1;23;31", "wc_review": "277;536;260;176;885", "wc_reply_reviewers": "11;19;21;11;270", "wc_reply_authors": "6;11;15;11;649", "reply_reviewers": "1;1;1;1;2", "reply_authors": "2;2;2;2;3", "rating_avg": [ 6.6, 0.8 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 3.2, 0.7483314773547882 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 91.4, 51.41439487147544 ], "wc_strengths_avg": [ 91.0, 46.41982335166734 ], "wc_weaknesses_avg": [ 141.6, 121.34183120424711 ], "wc_questions_avg": [ 88.2, 44.01090773887765 ], "wc_limitations_avg": [ 14.6, 11.288932633336067 ], "wc_review_avg": [ 426.8, 258.857026174682 ], "wc_reply_reviewers_avg": [ 66.4, 101.88149979265127 ], "wc_reply_authors_avg": [ 138.4, 255.3159611148508 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.1020620726159658, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:JXWVUq3sSoQJ:scholar.google.com/&scioq=DMesh:+A+Differentiable+Mesh+Representation&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": "umd.edu;adobe.com;adobe.com;adobe.com;amazon.com;adobe.com", "author_num": 6, "aff_unique_index": "0;1;1;1;2;1", "aff_unique_norm": "University of Maryland;Adobe;Amazon", "aff_unique_dep": ";Adobe Systems Incorporated;Amazon.com, Inc.", "aff_unique_url": "https://www/umd.edu;https://www.adobe.com;https://www.amazon.com", "aff_unique_abbr": "UMD;Adobe;Amazon", "aff_campus_unique_index": "0", "aff_campus_unique": "College Park;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Addressing Spectral Bias of Deep Neural Networks by Multi-Grade Deep Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95752", "id": "IoRT7EhFap", "proceeding": "", "pdf": "https://openreview.net/pdf?id=IoRT7EhFap", "openreview": "https://openreview.net/forum?id=IoRT7EhFap", "poster": "/media/PosterPDFs/NeurIPS%202024/95752.png?t=1731968589.2136903", "project": "", "author_site": "RONGLONG FANG, Yuesheng Xu", "tldr": "", "abstract": "Deep neural networks (DNNs) have showcased their remarkable precision in approximating smooth functions. However, they suffer from the {\\it spectral bias}, wherein DNNs typically exhibit a tendency to prioritize the learning of lower-frequency components of a function, struggling to effectively capture its high-frequency features. This paper is to address this issue. Notice that a function having only low frequency components may be well-represented by a shallow neural network (SNN), a network having only a few layers. By observing that composition of low frequency functions can effectively approximate a high-frequency function, we propose to learn a function containing high-frequency components by composing several SNNs, each of which learns certain low-frequency information from the given data. We implement the proposed idea by exploiting the multi-grade deep learning (MGDL) model, a recently introduced model that trains a DNN incrementally, grade by grade, a current grade learning from the residue of the previous grade only an SNN (with trainable parameters) composed with the SNNs (with fixed parameters) trained in the preceding grades as features. We apply MGDL to synthetic, manifold, colored images, and MNIST datasets, all characterized by presence of high-frequency features. Our study reveals that MGDL excels at representing functions containing high-frequency information. Specifically, the neural networks learned in each grade adeptly capture some low-frequency information, allowing their compositions with SNNs learned in the previous grades effectively representing the high-frequency features. Our experimental results underscore the efficacy of MGDL in addressing the spectral bias inherent in DNNs. By leveraging MGDL, we offer insights into overcoming spectral bias limitation of DNNs, thereby enhancing the performance and applicability of deep learning models in tasks requiring the representation of high-frequency information. This study confirms that the proposed method offers a promising solution to address the spectral bias of DNNs. The code is available on GitHub: \\href{https://github.com/Ronglong-Fang/AddressingSpectralBiasviaMGDL}{\\texttt{Addressing Spectral Bias via MGDL}}.", "keywords": "deep neural network;spectral bias;multi-grade deep learning", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Ronglong Fang;Yuesheng Xu", "authorids": "~Ronglong_Fang1;~Yuesheng_Xu1", "gender": "M;M", "homepage": ";", "dblp": "381/3159.html;", "google_scholar": ";sOkv66IAAAAJ", "orcid": ";", "linkedin": "ronglong-fang-5b496a24a/;", "or_profile": "~Ronglong_Fang1;~Yuesheng_Xu1", "aff": "Old Dominion University;Old Dominion University", "aff_domain": "odu.edu;odu.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nfang2024addressing,\ntitle={Addressing Spectral Bias of Deep Neural Networks by Multi-Grade Deep Learning},\nauthor={Ronglong Fang and Yuesheng Xu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=IoRT7EhFap}\n}", "github": "", "reviewers": "GtgE;otLk;vDtJ", "pdf_size": 8908528, "rating": "4;6;6", "confidence": "4;3;4", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "3;2;3", "wc_summary": "87;85;86", "wc_strengths": "48;38;63", "wc_weaknesses": "433;67;74", "wc_questions": "5;74;80", "wc_limitations": "4;2;2", "wc_review": "577;266;305", "wc_reply_reviewers": "0;123;23", "wc_reply_authors": "0;583;0", "reply_reviewers": "0;1;1", "reply_authors": "1;2;1", "rating_avg": [ 5.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 86.0, 0.816496580927726 ], "wc_strengths_avg": [ 49.666666666666664, 10.274023338281626 ], "wc_weaknesses_avg": [ 191.33333333333334, 170.9080324489038 ], "wc_questions_avg": [ 53.0, 34.02939905434711 ], "wc_limitations_avg": [ 2.6666666666666665, 0.9428090415820634 ], "wc_review_avg": [ 382.6666666666667, 138.33373493917608 ], "wc_reply_reviewers_avg": [ 48.666666666666664, 53.393715818332865 ], "wc_reply_authors_avg": [ 194.33333333333334, 274.82883562117144 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.49999999999999983, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12240104574885752105&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "odu.edu;odu.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Old Dominion University", "aff_unique_dep": "", "aff_unique_url": "https://www.odu.edu", "aff_unique_abbr": "ODU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Dense Connector for MLLMs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95751", "id": "Ioabr42B44", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Ioabr42B44", "openreview": "https://openreview.net/forum?id=Ioabr42B44", "poster": "", "project": "", "author_site": "Huanjin Yao, Wenhao Wu, Taojiannan Yang, YuXin Song, Mengxi Zhang, Haocheng Feng, Yifan Sun, Zhiheng Li, Wanli Ouyang, Jingdong Wang", "tldr": "", "abstract": "*Do we fully leverage the potential of visual encoder in Multimodal Large Language Models (MLLMs)?* The recent outstanding performance of MLLMs in multimodal understanding has garnered broad attention from both academia and industry. In the current MLLM rat race, the focus seems to be predominantly on the linguistic side. We witness the rise of larger and higher-quality instruction datasets, as well as the involvement of larger-sized LLMs. Yet, scant attention has been directed towards the visual signals utilized by MLLMs, often assumed to be the final high-level features extracted by a frozen visual encoder. In this paper, we introduce the **Dense Connector** - a simple, effective, and plug-and-play vision-language connector that significantly enhances existing MLLMs by leveraging multi-layer visual features, with minimal additional computational overhead. Building on this, we also propose the Efficient Dense Connector, which achieves performance comparable to LLaVA-v1.5 with only 25% of the visual tokens. Furthermore, our model, trained solely on images, showcases remarkable zero-shot capabilities in video understanding as well. Experimental results across various vision encoders, image resolutions, training dataset scales, varying sizes of LLMs (2.7B\u219270B), and diverse architectures of MLLMs (e.g., LLaVA-v1.5, LLaVA-NeXT and Mini-Gemini) validate the versatility and scalability of our approach, achieving state-of-the-art performance across 19 image and video benchmarks. We hope that this work will provide valuable experience and serve as a basic module for future MLLM development. Code is available at https://github.com/HJYao00/DenseConnector.", "keywords": "Multimodal Large Language Models;Vision-Language Model", "primary_area": "machine_vision", "supplementary_material": "", "author": "Huanjin Yao;Wenhao Wu;Taojiannan Yang;YuXin Song;Mengxi Zhang;Haocheng Feng;Yifan Sun;Zhiheng Li;Wanli Ouyang;Jingdong Wang", "authorids": "~Huanjin_Yao1;~Wenhao_Wu2;~Taojiannan_Yang1;~YuXin_Song1;~Mengxi_Zhang1;~Haocheng_Feng1;~Yifan_Sun2;~Zhiheng_Li3;~Wanli_Ouyang1;~Jingdong_Wang1", "gender": "M;M;M;M;M;;M;M;;M", "homepage": "https://github.com/HJYao00;https://whwu95.github.io/;;https://github.com/byrsongyuxin;;;https://yifansun-reid.github.io;https://www.sigs.tsinghua.edu.cn/lzh/list.htm;;https://jingdongwang2017.github.io/", "dblp": "362/0783;;249/8103;;;;99/10261-3.html;89/6935-1;;49/3441", "google_scholar": "pDtsCBQAAAAJ;Kn5d1ckAAAAJ;Z_--q5UAAAAJ;;;;uUZEL7UAAAAJ;;;z5SPCmgAAAAJ", "orcid": ";0000-0002-8511-743X;;;0000-0002-6011-1218;;0000-0003-3532-6521;;;0000-0002-4888-4445", "linkedin": ";wenhao-w-usyd/;;;;;;;;", "or_profile": "~Huanjin_Yao1;~Wenhao_Wu2;~Taojiannan_Yang1;~YuXin_Song1;~Mengxi_Zhang1;~Haocheng_Feng1;~Yifan_Sun2;~Zhiheng_Li3;~Wanli_Ouyang1;~Jingdong_Wang1", "aff": "Tsinghua University;The Chinese University of Hong Kong;Amazon;Baidu;Tianjin University;;Baidu;Tsinghua University;;Baidu", "aff_domain": "tsinghua.edu.cn;cuhk.edu.hk;amazon.com;baidu.com;tju.edu.cn;;baidu.com;mail.tsinghua.edu.cn;;baidu.com", "position": "MS student;Honorary Research Assistant;Researcher;Researcher;MS student;;Senior Expert;Associate Professor;;Chief Scientist for Computer Vision", "bibtex": "@inproceedings{\nyao2024dense,\ntitle={Dense Connector for {MLLM}s},\nauthor={Huanjin Yao and Wenhao Wu and Taojiannan Yang and YuXin Song and Mengxi Zhang and Haocheng Feng and Yifan Sun and Zhiheng Li and Wanli Ouyang and Jingdong Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Ioabr42B44}\n}", "github": "", "reviewers": "2pJ5;qnn9;sj7M", "pdf_size": 12042793, "rating": "5;6;7", "confidence": "5;4;5", "soundness": "2;3;4", "novelty": "2;3;3", "presentation": "2;3;4", "wc_summary": "68;79;44", "wc_strengths": "36;51;125", "wc_weaknesses": "193;17;120", "wc_questions": "29;27;114", "wc_limitations": "14;5;6", "wc_review": "340;179;409", "wc_reply_reviewers": "236;23;0", "wc_reply_authors": "921;115;0", "reply_reviewers": "2;1;0", "reply_authors": "3;2;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 63.666666666666664, 14.613540144521982 ], "wc_strengths_avg": [ 70.66666666666667, 38.90444133457716 ], "wc_weaknesses_avg": [ 110.0, 72.19879962067698 ], "wc_questions_avg": [ 56.666666666666664, 40.54901012629312 ], "wc_limitations_avg": [ 8.333333333333334, 4.027681991198191 ], "wc_review_avg": [ 309.3333333333333, 96.3685056898201 ], "wc_reply_reviewers_avg": [ 86.33333333333333, 106.24604567804971 ], "wc_reply_authors_avg": [ 345.3333333333333, 409.7562961349371 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1247051249099214241&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "tsinghua.edu.cn;cuhk.edu.hk;amazon.com;baidu.com;tju.edu.cn;;baidu.com;mail.tsinghua.edu.cn;;baidu.com", "author_num": 10, "aff_unique_index": "0;1;2;3;4;3;0;3", "aff_unique_norm": "Tsinghua University;Chinese University of Hong Kong;Amazon;Baidu;Tianjin University", "aff_unique_dep": ";;Amazon.com, Inc.;Baidu, Inc.;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.cuhk.edu.hk;https://www.amazon.com;https://www.baidu.com;http://www.tju.edu.cn", "aff_unique_abbr": "THU;CUHK;Amazon;Baidu;TJU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;1;0;0;0;0;0", "aff_country_unique": "China;United States" }, { "title": "Real-time Stereo-based 3D Object Detection for Streaming Perception", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95750", "id": "IpHB5RC3za", "proceeding": "", "pdf": "https://openreview.net/pdf?id=IpHB5RC3za", "openreview": "https://openreview.net/forum?id=IpHB5RC3za", "poster": "/media/PosterPDFs/NeurIPS%202024/95750.png?t=1731514600.0551734", "project": "", "author_site": "Changcai Li, Zonghua Gu, Gang Chen, Libo Huang, Wei Zhang, Huihui Zhou", "tldr": "", "abstract": "The ability to promptly respond to environmental changes is crucial for the perception system of autonomous driving. Recently, a new task called streaming perception was proposed. It jointly evaluate the latency and accuracy into a single metric for video online perception. In this work, we introduce StreamDSGN, the first real-time stereo-based 3D object detection framework designed for streaming perception. StreamDSGN is an end-to-end framework that directly predicts the 3D properties of objects in the next moment by leveraging historical information, thereby alleviating the accuracy degradation of streaming perception. Further, StreamDSGN applies three strategies to enhance the perception accuracy: (1) A feature-flow-based fusion method, which generates a pseudo-next feature at the current moment to address the misalignment issue between feature and ground truth. (2) An extra regression loss for explicit supervision of object motion consistency in consecutive frames. (3) A large kernel backbone with a large receptive field for effectively capturing long-range spatial contextual features caused by changes in object positions. Experiments on the KITTI Tracking dataset show that, compared with the strong baseline, StreamDSGN significantly improves the streaming average precision by up to 4.33%. Our code is available at https://github.com/weiyangdaren/streamDSGN-pytorch.", "keywords": "3D object detection; Streaming perception; Real-time perception", "primary_area": "machine_vision", "supplementary_material": "", "author": "Changcai Li;Zonghua Gu;Gang Chen;Libo Huang;Wei Zhang;Huihui Zhou", "authorids": "~Changcai_Li1;~Zonghua_Gu1;~Gang_Chen12;~Libo_Huang2;~Wei_Zhang78;~Huihui_Zhou1", "gender": "M;M;M;;M;M", "homepage": "https://github.com/weiyangdaren;https://dblp.org/pid/93/1025.html;https://gitee.com/SYSURICLab_Gang_0;;;https://www.researchgate.net/profile/Huihui_Zhou4/research", "dblp": "323/5731;93/1025.html;;;;", "google_scholar": ";;;;https://scholar.google.com.tw/citations?hl=en;c2mrU24AAAAJ", "orcid": ";;;;0000-0002-2358-0779;", "linkedin": ";;;;;", "or_profile": "~Changcai_Li1;~Zonghua_Gu1;~Gang_Chen12;~Libo_Huang2;~Wei_Zhang78;~Huihui_Zhou1", "aff": "SUN YAT-SEN UNIVERSITY;Umea University;SUN YAT-SEN UNIVERSITY;;Peng Cheng Laboratory;Pengcheng Lab", "aff_domain": "sysu.edu.cn;umu.se;sysu.edu.cn;;pcl.ac.cn;pcl.ac.cn", "position": "PhD student;Full Professor;Full Professor;;Researcher;Full Professor", "bibtex": "@inproceedings{\nli2024realtime,\ntitle={Real-time Stereo-based 3D Object Detection for Streaming Perception},\nauthor={Changcai Li and Zonghua Gu and Gang Chen and Libo Huang and Wei Zhang and Huihui Zhou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=IpHB5RC3za}\n}", "github": "", "reviewers": "D5z5;Ytfb;uBXC;NjQK", "pdf_size": 3827014, "rating": "5;5;5;7", "confidence": "3;4;3;2", "soundness": "3;3;2;4", "novelty": "2;3;2;4", "presentation": "3;3;3;3", "wc_summary": "112;70;122;96", "wc_strengths": "109;75;50;31", "wc_weaknesses": "85;121;44;169", "wc_questions": "8;8;220;6", "wc_limitations": "12;16;7;3", "wc_review": "326;290;443;305", "wc_reply_reviewers": "31;30;39;17", "wc_reply_authors": "0;114;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;3;1;1", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 100.0, 19.6468827043885 ], "wc_strengths_avg": [ 66.25, 29.200813344836817 ], "wc_weaknesses_avg": [ 104.75, 46.023771031935226 ], "wc_questions_avg": [ 60.5, 92.09098761550992 ], "wc_limitations_avg": [ 9.5, 4.924428900898052 ], "wc_review_avg": [ 341.0, 60.261928279802 ], "wc_reply_reviewers_avg": [ 29.25, 7.8859051477937525 ], "wc_reply_authors_avg": [ 28.5, 49.363448015713004 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:QATLtbSVjUAJ:scholar.google.com/&scioq=Real-time+Stereo-based+3D+Object+Detection+for+Streaming+Perception&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": "sysu.edu.cn;umu.se;sysu.edu.cn;;pcl.ac.cn;pcl.ac.cn", "author_num": 6, "aff_unique_index": "0;1;0;2;3", "aff_unique_norm": "Sun Yat-sen University;Ume\u00e5 University;Pengcheng Laboratory;Pengcheng Lab", "aff_unique_dep": ";;Peng Cheng Laboratory;", "aff_unique_url": "http://www.sysu.edu.cn;https://www.umu.se;http://www.pcl.ac.cn;", "aff_unique_abbr": "SYSU;UMU;PCL;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "China;Sweden" }, { "title": "Smoke and Mirrors in Causal Downstream Tasks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95749", "id": "Iq2IAWozNr", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Iq2IAWozNr", "openreview": "https://openreview.net/forum?id=Iq2IAWozNr", "poster": "/media/PosterPDFs/NeurIPS%202024/95749.png?t=1731331709.7757926", "project": "", "author_site": "Riccardo Cadei, Lukas Lindorfer, Sylvia Cremer, Cordelia Schmid, Francesco Locatello", "tldr": "", "abstract": "Machine Learning and AI have the potential to transform data-driven scientific discovery, enabling accurate predictions for several scientific phenomena. As many scientific questions are inherently causal, this paper looks at the causal inference task of treatment effect estimation, where the outcome of interest is recorded in high-dimensional observations in a Randomized Controlled Trial (RCT). Despite being the simplest possible causal setting and a perfect fit for deep learning, we theoretically find that many common choices in the literature may lead to biased estimates. To test the practical impact of these considerations, we recorded ISTAnt, the first real-world benchmark for causal inference downstream tasks on high-dimensional observations as an RCT studying how garden ants (Lasius neglectus) respond to microparticles applied onto their colony members by hygienic grooming. Comparing 6 480 models fine-tuned from state-of-the-art visual backbones, we find that the sampling and modeling choices significantly affect the accuracy of the causal estimate, and that classification accuracy is not a proxy thereof. We further validated the analysis, repeating it on a synthetically generated visual data set controlling the causal model. Our results suggest that future benchmarks should carefully consider real downstream scientific questions, especially causal ones. Further, we highlight guidelines for representation learning methods to help answer causal questions in the sciences.", "keywords": "AI for Science;Randomized Controlled Trial;Representation Learning", "primary_area": "causal_inference", "supplementary_material": "/attachment/0f010a07b9c10596dd19b1f118366d6a21e83eb9.zip", "author": "Riccardo Cadei;Lukas Lindorfer;Sylvia Cremer;Cordelia Schmid;Francesco Locatello", "authorids": "~Riccardo_Cadei1;~Lukas_Lindorfer1;~Sylvia_Cremer1;~Cordelia_Schmid1;~Francesco_Locatello1", "gender": "M;M;F;F;M", "homepage": "https://www.riccardocadei.com/;;https://socialimmunity.ista.ac.at/;https://cordeliaschmid.github.io/;https://twitter.com/FrancescoLocat8", "dblp": "307/5482;;;s/CordeliaSchmid;195/6074", "google_scholar": "dYnjzcMAAAAJ;;https://scholar.google.com.tw/citations?user=_Gm-T9IAAAAJ;IvqCXP4AAAAJ;", "orcid": ";;0000-0002-2193-3868;;", "linkedin": ";lukas-lindorfer-672612215/;;cordelia-schmid-47985a9;", "or_profile": "~Riccardo_Cadei1;~Lukas_Lindorfer1;~Sylvia_Cremer1;~Cordelia_Schmid1;~Francesco_Locatello1", "aff": "Institute of Science and Technology;Institute of Science and Technology Austria;ISTA (Institute of Science and Technology Austria);Inria;Institute of Science and Technology", "aff_domain": "ist.ac.at;ista.ac.at;ista.ac.at;inria.fr;ist.ac.at", "position": "PhD student;PhD student;Full Professor;Researcher;Assistant Professor", "bibtex": "@inproceedings{\ncadei2024smoke,\ntitle={Smoke and Mirrors in Causal Downstream Tasks},\nauthor={Riccardo Cadei and Lukas Lindorfer and Sylvia Cremer and Cordelia Schmid and Francesco Locatello},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Iq2IAWozNr}\n}", "github": "", "reviewers": "Zfn9;ZnDR;WAFK;Erid", "pdf_size": 4453014, "rating": "6;6;7;8", "confidence": "3;2;1;3", "soundness": "3;2;3;3", "novelty": "3;2;3;4", "presentation": "3;3;3;3", "wc_summary": "22;121;96;134", "wc_strengths": "35;64;60;108", "wc_weaknesses": "42;230;46;76", "wc_questions": "28;54;38;12", "wc_limitations": "3;1;9;59", "wc_review": "130;470;249;389", "wc_reply_reviewers": "16;74;10;25", "wc_reply_authors": "0;0;0;29", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;2", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 2.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 93.25, 43.34382885717412 ], "wc_strengths_avg": [ 66.75, 26.280934153869037 ], "wc_weaknesses_avg": [ 98.5, 77.05030824078513 ], "wc_questions_avg": [ 33.0, 15.264337522473747 ], "wc_limitations_avg": [ 18.0, 23.853720883753127 ], "wc_review_avg": [ 309.5, 130.34665319830808 ], "wc_reply_reviewers_avg": [ 31.25, 25.252475126212875 ], "wc_reply_authors_avg": [ 7.25, 12.55736835487436 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0909090909090909, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8900174276827547956&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "ist.ac.at;ista.ac.at;ista.ac.at;inria.fr;ist.ac.at", "author_num": 5, "aff_unique_index": "0;1;1;2;0", "aff_unique_norm": "Institute of Science and Technology;Institute of Science and Technology Austria;INRIA", "aff_unique_dep": ";;", "aff_unique_url": ";https://www.ist.ac.at;https://www.inria.fr", "aff_unique_abbr": ";IST Austria;Inria", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1;1;2", "aff_country_unique": ";Austria;France" }, { "title": "Randomized Sparse Matrix Compression for Large-Scale Constrained Optimization in Cancer Radiotherapy", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95748", "id": "ItzD2Cnu9y", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ItzD2Cnu9y", "openreview": "https://openreview.net/forum?id=ItzD2Cnu9y", "poster": "/media/PosterPDFs/NeurIPS%202024/95748.png?t=1733338188.5476828", "project": "", "author_site": "Shima Adeli, Mojtaba Tefagh, Gourav Jhanwar, Masoud Zarepisheh", "tldr": "", "abstract": "Radiation therapy, treating over half of all cancer patients, involves using specialized machines to direct high-energy beams at tumors, aiming to damage cancer cells while minimizing harm to nearby healthy tissues. Customizing the shape and intensity of radiation beams for each patient leads to solving large-scale constrained optimization problems that need to be solved within tight clinical time-frame. At the core of these challenges is a large matrix that is commonly sparsified for computational efficiency by neglecting small elements. Such a crude approximation can degrade the quality of treatment, potentially causing unnecessary radiation exposure to healthy tissues\u2014this may lead to significant radiation-induced side effects\u2014or delivering inadequate radiation to the tumor, which is crucial for effective tumor treatment. In this work, we demonstrate, for the first time, that randomized sketch tools can effectively sparsify this matrix without sacrificing treatment quality. We also develop a novel randomized sketch method with desirable theoretical guarantees that outperforms existing techniques in practical application. Beyond developing a novel randomized sketch method, this work emphasizes the potential of harnessing scientific computing tools, crucial in today's big data analysis, to tackle computationally intensive challenges in healthcare. The application of these tools could have a profound impact on the lives of numerous cancer patients. Code and sample data available at https://github.com/PortPy-Project/CompressRTP", "keywords": "Sparsification;Cancer Radiotherapy;Optimization;Randomization;Sketching", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "", "author": "Shima Adeli;Mojtaba Tefagh;Gourav Jhanwar;Masoud Zarepisheh", "authorids": "~Shima_Adeli1;~Mojtaba_Tefagh1;~Gourav_Jhanwar1;~Masoud_Zarepisheh1", "gender": "F;M;M;M", "homepage": ";https://sharif.edu/~mtefagh/;;https://masoudzp.github.io/", "dblp": ";264/4410.html;;", "google_scholar": ";nWCxa3sAAAAJ;https://scholar.google.com/citations?hl=en;", "orcid": ";0000-0002-2198-2440;;", "linkedin": "shima-adeli-23028a2b2/;mtefagh;gourav-jhanwar;masoud-zarepisheh-457a492b/", "or_profile": "~Shima_Adeli1;~Mojtaba_Tefagh1;~Gourav_Jhanwar1;~Masoud_Zarepisheh1", "aff": "Sharif University of Technology;Sharif University of Technology, Sharif University of Technology;Memorial Sloan Kettering Cancer Centre;Memorial Sloan Kettering Cancer Centre", "aff_domain": "sharif.edu;math.sharif.edu;mskcc.org;mskcc.org", "position": "MS student;Assistant Professor;Researcher;Associate Professor", "bibtex": "@inproceedings{\nadeli2024randomized,\ntitle={Randomized Sparse Matrix Compression for Large-Scale Constrained Optimization in Cancer Radiotherapy},\nauthor={Shima Adeli and Mojtaba Tefagh and Gourav Jhanwar and Masoud Zarepisheh},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ItzD2Cnu9y}\n}", "github": "", "reviewers": "hEqB;rbxv;oYrg;C2Tr", "pdf_size": 3346805, "rating": "3;3;5;6", "confidence": "3;3;5;3", "soundness": "3;2;2;3", "novelty": "2;2;1;3", "presentation": "2;2;2;3", "wc_summary": "22;127;91;112", "wc_strengths": "20;72;157;169", "wc_weaknesses": "55;93;181;193", "wc_questions": "19;2;157;102", "wc_limitations": "3;2;83;9", "wc_review": "119;296;669;585", "wc_reply_reviewers": "0;0;28;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 88.0, 40.193283020922784 ], "wc_strengths_avg": [ 104.5, 61.46747107210447 ], "wc_weaknesses_avg": [ 130.5, 58.23014683134502 ], "wc_questions_avg": [ 70.0, 62.88481533724974 ], "wc_limitations_avg": [ 24.25, 34.02480712656576 ], "wc_review_avg": [ 417.25, 220.89180043632223 ], "wc_reply_reviewers_avg": [ 7.0, 12.12435565298214 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:MTpDnhl04XgJ:scholar.google.com/&scioq=Randomized+Sparse+Matrix+Compression+for+Large-Scale+Constrained+Optimization+in+Cancer+Radiotherapy&hl=en&as_sdt=0,33", "gs_version_total": 2, "email": "sharif.edu;math.sharif.edu;mskcc.org;mskcc.org", "author_num": 4, "aff_unique_index": "0;0;1;1", "aff_unique_norm": "Sharif University of Technology;Memorial Sloan Kettering Cancer Center", "aff_unique_dep": ";", "aff_unique_url": "https://www.sharif.edu;https://www.mskcc.org", "aff_unique_abbr": "SUT;MSKCC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1", "aff_country_unique": "Iran;United States" }, { "title": "Stable-Pose: Leveraging Transformers for Pose-Guided Text-to-Image Generation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95747", "id": "IwNTiNPxFt", "proceeding": "", "pdf": "https://openreview.net/pdf?id=IwNTiNPxFt", "openreview": "https://openreview.net/forum?id=IwNTiNPxFt", "poster": "/media/PosterPDFs/NeurIPS%202024/95747.png?t=1731429330.2653472", "project": "", "author_site": "Jiajun Wang, Morteza Ghahremani Boozandani, Yitong Li, Bj\u00f6rn Ommer, Christian Wachinger", "tldr": "", "abstract": "Controllable text-to-image (T2I) diffusion models have shown impressive performance in generating high-quality visual content through the incorporation of various conditions. Current methods, however, exhibit limited performance when guided by skeleton human poses, especially in complex pose conditions such as side or rear perspectives of human figures. To address this issue, we present Stable-Pose, a novel adapter model that introduces a coarse-to-fine attention masking strategy into a vision Transformer (ViT) to gain accurate pose guidance for T2I models. Stable-Pose is designed to adeptly handle pose conditions within pre-trained Stable Diffusion, providing a refined and efficient way of aligning pose representation during image synthesis. We leverage the query-key self-attention mechanism of ViTs to explore the interconnections among different anatomical parts in human pose skeletons. Masked pose images are used to smoothly refine the attention maps based on target pose-related features in a hierarchical manner, transitioning from coarse to fine levels. \nAdditionally, our loss function is formulated to allocate increased emphasis to the pose region, thereby augmenting the model's precision in capturing intricate pose details. We assessed the performance of Stable-Pose across five public datasets under a wide range of indoor and outdoor human pose scenarios. Stable-Pose achieved an AP score of 57.1 in the LAION-Human dataset, marking around 13\\% improvement over the established technique ControlNet. The project link and code is available at https://github.com/ai-med/StablePose.", "keywords": "Pose-guided text-to-image (T2I);Diffusion models;Stable Diffusion;vision Transformers", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Jiajun Wang;MORTEZA GHAHREMANI;Yitong Li;Bj\u00f6rn Ommer;Christian Wachinger", "authorids": "~Jiajun_Wang2;~MORTEZA_GHAHREMANI3;~Yitong_Li4;~Bj\u00f6rn_Ommer2;~Christian_Wachinger1", "gender": "M;M;F;M;", "homepage": ";https://mogvision.github.io/;https://yiiitong.github.io/;https://ai-med.de/people/christian-wachinger/;https://ommer-lab.com/people/ommer/", "dblp": ";152/6299;;79/5985;11/4098", "google_scholar": ";yhXUlXsAAAAJ;8rYJ5qAAAAAJ;https://scholar.google.de/citations?user=UOIBNdUAAAAJ;zWbvIUcAAAAJ", "orcid": ";0000-0001-6423-6475;0009-0009-3874-6055;0000-0002-3652-1874;", "linkedin": "jiajun-wang-a82725251/;morteza-ghahremani-3a040421a/;yitong-li-29ba12211/;;", "or_profile": "~Jiajun_Wang2;~MORTEZA_GHAHREMANI3;~Yitong_Li4;~Christian_Wachinger1;~Bjorn_Ommer1", "aff": "Technische Universit\u00e4t M\u00fcnchen;Technische Universit\u00e4t M\u00fcnchen;Technische Universit\u00e4t M\u00fcnchen;Technische Universit\u00e4t M\u00fcnchen;Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen", "aff_domain": "tum.de;tum.de;tum.de;tum.de;lmu.de", "position": "MS student;AI Scientist;PhD student;Professor;Full Professor", "bibtex": "@inproceedings{\nwang2024stablepose,\ntitle={Stable-Pose: Leveraging Transformers for Pose-Guided Text-to-Image Generation},\nauthor={Jiajun Wang and MORTEZA GHAHREMANI and Yitong Li and Bj{\\\"o}rn Ommer and Christian Wachinger},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=IwNTiNPxFt}\n}", "github": "", "reviewers": "RKmi;qFuv;ypoD;pWbK", "pdf_size": 30416061, "rating": "4;5;5;8", "confidence": "5;4;4;4", "soundness": "3;3;2;3", "novelty": "2;2;2;3", "presentation": "3;3;3;4", "wc_summary": "51;29;50;77", "wc_strengths": "64;43;59;95", "wc_weaknesses": "41;42;313;27", "wc_questions": "44;60;4;140", "wc_limitations": "50;8;36;5", "wc_review": "250;182;462;344", "wc_reply_reviewers": "79;0;137;39", "wc_reply_authors": "118;0;153;26", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;2;2", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 51.75, 17.020208576865326 ], "wc_strengths_avg": [ 65.25, 18.846418757949746 ], "wc_weaknesses_avg": [ 105.75, 119.80270239022157 ], "wc_questions_avg": [ 62.0, 49.4368283772331 ], "wc_limitations_avg": [ 24.75, 18.93904696651867 ], "wc_review_avg": [ 309.5, 105.17010031372985 ], "wc_reply_reviewers_avg": [ 63.75, 50.68222074850312 ], "wc_reply_authors_avg": [ 74.25, 63.160015041163504 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5773502691896258, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4521130324361547210&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "tum.de;tum.de;tum.de;tum.de;lmu.de", "author_num": 5, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Technische Universit\u00e4t M\u00fcnchen;Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen", "aff_unique_dep": ";", "aff_unique_url": "https://www.tum.de;https://www.lmu.de", "aff_unique_abbr": "TUM;LMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Germany" }, { "title": "SSDM: Scalable Speech Dysfluency Modeling", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95746", "id": "IxEhb4NCvy", "proceeding": "", "pdf": "https://openreview.net/pdf?id=IxEhb4NCvy", "openreview": "https://openreview.net/forum?id=IxEhb4NCvy", "poster": "/media/PosterPDFs/NeurIPS%202024/95746.png?t=1731219745.2264805", "project": "", "author_site": "Jiachen Lian, Xuanru Zhou, Zoe Ezzes, Jet Vonk, Brittany Morin, David Paul Baquirin, Zachary Miller, Maria Luisa Gorno Tempini, Gopala Anumanchipalli", "tldr": "", "abstract": "Speech dysfluency modeling is the core module for spoken language learning, and speech therapy. However, there are three challenges. First, current state-of-the-art solutions~~\\cite{lian2023unconstrained-udm, lian-anumanchipalli-2024-towards-hudm} suffer from poor scalability. Second, there is a lack of a large-scale dysfluency corpus. Third, there is not an effective learning framework. In this paper, we propose \\textit{SSDM: Scalable Speech Dysfluency Modeling}, which (1) adopts articulatory gestures as scalable forced alignment; (2) introduces connectionist subsequence aligner (CSA) to achieve dysfluency alignment; (3) introduces a large-scale simulated dysfluency corpus called Libri-Dys; and (4) develops an end-to-end system by leveraging the power of large language models (LLMs). We expect SSDM to serve as a standard in the area of dysfluency modeling. Demo is available at \\url{https://berkeley-speech-group.github.io/SSDM/}.", "keywords": "Speech Dysfluency;Disfluency;Stutter;Alignment;Articulatory;Scaling", "primary_area": "speech_and_audio", "supplementary_material": "", "author": "Jiachen Lian;Xuanru Zhou;Zoe Ezzes;Jet M.J. Vonk;Brittany T. Morin;David Paul Galang Baquirin;Zachary A. Miller;Maria Luisa Gorno-Tempini;Gopala Anumanchipalli", "authorids": "~Jiachen_Lian1;~Xuanru_Zhou2;~Zoe_Ezzes1;~Jet_M.J._Vonk1;~Brittany_T._Morin1;~David_Paul_Galang_Baquirin1;~Zachary_A._Miller1;~Maria_Luisa_Gorno-Tempini1;~Gopala_Anumanchipalli1", "gender": "M;F;F;;F;M;M;F;M", "homepage": "https://jlian2.github.io;;;;https://datawithcoffee.com/;https://profiles.ucsf.edu/david.baquirin;https://memory.ucsf.edu/people/zachary-miller-md;;http://people.eecs.berkeley.edu/~gopala/", "dblp": "249/9914;;;;;;;;54/7824", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;;https://scholar.google.com/citations?view_op=list_works;https://scholar.google.com/citations?hl=en;;;;;VecEj6kAAAAJ", "orcid": ";;;;;;;0000-0002-7426-7782;0000-0002-9714-7740", "linkedin": "jiachenlian/en;xuanru-zhou-5286412a1/;;;;;;;", "or_profile": "~Jiachen_Lian1;~Xuanru_Zhou2;~Zoe_Ezzes1;~Jet_M.J._Vonk1;~Brittany_T._Morin1;~David_Paul_Galang_Baquirin1;~Zachary_A._Miller1;~Maria_Luisa_Gorno-Tempini1;~Gopala_Anumanchipalli1", "aff": "Electrical Engineering & Computer Science Department, University of California Berkeley;Zhejiang University;University of California, San Francisco;University of California, San Francisco;University of California, San Francisco;University of California, San Francisco;University of California, San Francisco;University of California, San Francisco;University of California, Berkeley", "aff_domain": "eecs.berkeley.edu;zju.edu.cn;ucsf.edu;ucsf.edu;ucsf.edu;ucsf.edu;ucsf.edu;ucsf.edu;berkeley.edu", "position": "PhD student;Undergrad student;Researcher;Assistant Professor;Researcher;Researcher;Associate Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nlian2024ssdm,\ntitle={{SSDM}: Scalable Speech Dysfluency Modeling},\nauthor={Jiachen Lian and Xuanru Zhou and Zoe Ezzes and Jet M.J. Vonk and Brittany T. Morin and David Paul Galang Baquirin and Zachary A. Miller and Maria Luisa Gorno-Tempini and Gopala Anumanchipalli},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=IxEhb4NCvy}\n}", "github": "", "reviewers": "px45;nbR5;YZds;oU79", "pdf_size": 8204899, "rating": "3;7;7;9", "confidence": "4;4;4;4", "soundness": "2;3;4;4", "novelty": "2;3;4;4", "presentation": "2;3;3;4", "wc_summary": "10;116;170;304", "wc_strengths": "5;105;76;174", "wc_weaknesses": "56;131;160;310", "wc_questions": "1;30;38;113", "wc_limitations": "1;6;1;10", "wc_review": "73;388;445;911", "wc_reply_reviewers": "658;31;0;105", "wc_reply_authors": "2065;7;71;23", "reply_reviewers": "3;1;0;1", "reply_authors": "5;2;2;2", "rating_avg": [ 6.5, 2.179449471770337 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 150.0, 105.91506030777681 ], "wc_strengths_avg": [ 90.0, 60.625902055144714 ], "wc_weaknesses_avg": [ 164.25, 92.31027840928658 ], "wc_questions_avg": [ 45.5, 41.33098111586513 ], "wc_limitations_avg": [ 4.5, 3.774917217635375 ], "wc_review_avg": [ 454.25, 299.3521129038511 ], "wc_reply_reviewers_avg": [ 198.5, 268.0209879841502 ], "wc_reply_authors_avg": [ 541.5, 879.9083759119469 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7624901549894190241&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "eecs.berkeley.edu;zju.edu.cn;ucsf.edu;ucsf.edu;ucsf.edu;ucsf.edu;ucsf.edu;ucsf.edu;berkeley.edu", "author_num": 9, "aff_unique_index": "0;1;2;2;2;2;2;2;0", "aff_unique_norm": "University of California, Berkeley;Zhejiang University;University of California, San Francisco", "aff_unique_dep": "Electrical Engineering & Computer Science Department;;", "aff_unique_url": "https://www.berkeley.edu;https://www.zju.edu.cn;https://www.ucsf.edu", "aff_unique_abbr": "UC Berkeley;ZJU;UCSF", "aff_campus_unique_index": "0;2;2;2;2;2;2;0", "aff_campus_unique": "Berkeley;;San Francisco", "aff_country_unique_index": "0;1;0;0;0;0;0;0;0", "aff_country_unique": "United States;China" }, { "title": "NeuralSolver: Learning Algorithms For Consistent and Efficient Extrapolation Across General Tasks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95745", "id": "IxRf7Q3s5e", "proceeding": "", "pdf": "https://openreview.net/pdf?id=IxRf7Q3s5e", "openreview": "https://openreview.net/forum?id=IxRf7Q3s5e", "poster": "", "project": "", "author_site": "Bernardo Esteves, Miguel Vasco, Francisco S. Melo", "tldr": "", "abstract": "We contribute NeuralSolver, a novel recurrent solver that can efficiently and consistently extrapolate, i.e., learn algorithms from smaller problems (in terms of observation size) and execute those algorithms in large problems. Contrary to previous recurrent solvers, NeuralSolver can be naturally applied in both same-size problems, where the input and output sizes are the same, and in different-size problems, where the size of the input and output differ. To allow for this versatility, we design NeuralSolver with three main components: a recurrent module, that iteratively processes input information at different scales, a processing module, responsible for aggregating the previously processed information, and a curriculum-based training scheme, that improves the extrapolation performance of the method.\nTo evaluate our method we introduce a set of novel different-size tasks and we show that NeuralSolver consistently outperforms the prior state-of-the-art recurrent solvers in extrapolating to larger problems, considering smaller training problems and requiring less parameters than other approaches.", "keywords": "Deep learning;algorithm synthesis;recurrent networks;algorithmic reasoning;sequential decision making;extrapolation", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/96fdc2373ba7e77f90a64ac10b935d0f672a38d0.zip", "author": "Bernardo Esteves;Miguel Vasco;Francisco S. Melo", "authorids": "~Bernardo_Esteves1;~Miguel_Vasco1;~Francisco_S._Melo1", "gender": ";M;M", "homepage": ";https://www.miguelvasco.com/;http://gaips.inesc-id.pt/~fmelo", "dblp": ";237/9737;86/839", "google_scholar": "UDncSsUAAAAJ;https://scholar.google.pt/citations?user=Of2hDmMAAAAJ;5AEeWU4AAAAJ", "orcid": "0000-0002-1524-5006;;0000-0001-5705-7372", "linkedin": ";;", "or_profile": "~Bernardo_Esteves1;~Miguel_Vasco1;~Francisco_S._Melo1", "aff": "Instituto Superior T\u00e9cnico;KTH Royal Institute of Technology;INESC-ID", "aff_domain": "tecnico.ulisboa.pt;kth.se;inesc-id.pt", "position": "PhD student;Postdoc;Senior Researcher", "bibtex": "@inproceedings{\nesteves2024neuralsolver,\ntitle={NeuralSolver: Learning Algorithms For Consistent and Efficient Extrapolation Across General Tasks},\nauthor={Bernardo Esteves and Miguel Vasco and Francisco S. Melo},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=IxRf7Q3s5e}\n}", "github": "", "reviewers": "dmPv;SKBz;tqxp", "pdf_size": 3396419, "rating": "5;6;7", "confidence": "2;3;3", "soundness": "2;3;4", "novelty": "3;3;3", "presentation": "2;3;3", "wc_summary": "112;143;71", "wc_strengths": "86;25;53", "wc_weaknesses": "212;23;127", "wc_questions": "2;73;58", "wc_limitations": "1;7;28", "wc_review": "413;271;337", "wc_reply_reviewers": "26;15;64", "wc_reply_authors": "18;14;28", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 108.66666666666667, 29.48822740612863 ], "wc_strengths_avg": [ 54.666666666666664, 24.931015935086872 ], "wc_weaknesses_avg": [ 120.66666666666667, 77.288780269555 ], "wc_questions_avg": [ 44.333333333333336, 30.554141381415967 ], "wc_limitations_avg": [ 12.0, 11.575836902790225 ], "wc_review_avg": [ 340.3333333333333, 58.01915392542554 ], "wc_reply_reviewers_avg": [ 35.0, 20.992061991778385 ], "wc_reply_authors_avg": [ 20.0, 5.887840577551898 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:or5flDvk90AJ:scholar.google.com/&scioq=NeuralSolver:+Learning+Algorithms+For+Consistent+and+Efficient+Extrapolation+Across+General+Tasks&hl=en&as_sdt=0,48", "gs_version_total": 2, "email": "tecnico.ulisboa.pt;kth.se;inesc-id.pt", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Instituto Superior T\u00e9cnico;KTH Royal Institute of Technology;INESC-ID", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ist.utl.pt;https://www.kth.se;https://www.inesc-id.pt", "aff_unique_abbr": "IST;KTH;INESC-ID", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Portugal;Sweden" }, { "title": "ChatCam: Empowering Camera Control through Conversational AI", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95744", "id": "IxazPgGF8h", "proceeding": "", "pdf": "https://openreview.net/pdf?id=IxazPgGF8h", "openreview": "https://openreview.net/forum?id=IxazPgGF8h", "poster": "/media/PosterPDFs/NeurIPS%202024/95744.png?t=1733596050.0406344", "project": "", "author_site": "Xinhang Liu, Yu-Wing Tai, Chi-Keung Tang", "tldr": "", "abstract": "Cinematographers adeptly capture the essence of the world, crafting compelling visual narratives through intricate camera movements. Witnessing the strides made by large language models in perceiving and interacting with the 3D world, this study explores their capability to control cameras with human language guidance. We introduce ChatCam, a system that navigates camera movements through conversations with users, mimicking a professional cinematographer's workflow. To achieve this, we propose CineGPT, a GPT-based autoregressive model for text-conditioned camera trajectory generation. We also develop an Anchor Determinator to ensure precise camera trajectory placement. ChatCam understands user requests and employs our proposed tools to generate trajectories, which can be used to render high-quality video footage on radiance field representations. Our experiments, including comparisons to state-of-the-art approaches and user studies, demonstrate our approach's ability to interpret and execute complex instructions for camera operation, showing promising applications in real-world production settings. Project page: https://xinhangliu.com/chatcam.", "keywords": "camera operation;LLM", "primary_area": "machine_vision", "supplementary_material": "/attachment/6c2ab3934f8a3302bdfa08eed67731f43db213fb.zip", "author": "Xinhang Liu;Yu-Wing Tai;Chi-Keung Tang", "authorids": "~Xinhang_Liu1;~Yu-Wing_Tai2;~Chi-Keung_Tang1", "gender": ";M;Not Specified", "homepage": "https://xinhangliu.com;https://yuwingtai.github.io/;http://www.cse.ust.hk/~cktang/", "dblp": "291/3884;40/566;34/4366", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;nFhLmFkAAAAJ;https://scholar.google.com.tw/citations?user=EWfpM74AAAAJ", "orcid": ";0000-0002-3148-0380;", "linkedin": ";;", "or_profile": "~Xinhang_Liu1;~Yu-Wing_Tai2;~ChiKeung_Tang1", "aff": "Hong Kong University of Science and Technology;Dartmouth College;Hong Kong University of Science and Technology", "aff_domain": "ust.hk;dartmouth.edu;ust.hk", "position": "PhD student;Associate Professor;Professor", "bibtex": "@inproceedings{\nliu2024chatcam,\ntitle={ChatCam: Empowering Camera Control through Conversational {AI}},\nauthor={Xinhang Liu and Yu-Wing Tai and Chi-Keung Tang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=IxazPgGF8h}\n}", "github": "", "reviewers": "kHgE;Shd5;oNWk;dvT5", "pdf_size": 16237650, "rating": "4;5;6;6", "confidence": "5;4;4;4", "soundness": "3;3;3;3", "novelty": "3;3;3;4", "presentation": "3;3;3;3", "wc_summary": "84;27;59;67", "wc_strengths": "71;72;32;37", "wc_weaknesses": "89;204;120;221", "wc_questions": "483;25;71;38", "wc_limitations": "16;15;1;4", "wc_review": "743;343;283;367", "wc_reply_reviewers": "84;81;11;31", "wc_reply_authors": "420;21;16;37", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 59.25, 20.69269194667528 ], "wc_strengths_avg": [ 53.0, 18.587630295441105 ], "wc_weaknesses_avg": [ 158.5, 55.42788107081129 ], "wc_questions_avg": [ 154.25, 190.54313816036515 ], "wc_limitations_avg": [ 9.0, 6.59545297913646 ], "wc_review_avg": [ 434.0, 181.0055247775603 ], "wc_reply_reviewers_avg": [ 51.75, 31.5703579327191 ], "wc_reply_authors_avg": [ 123.5, 171.3600011671335 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3744816264250790955&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "ust.hk;dartmouth.edu;ust.hk", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Hong Kong University of Science and Technology;Dartmouth College", "aff_unique_dep": ";", "aff_unique_url": "https://www.ust.hk;https://www.dartmouth.edu", "aff_unique_abbr": "HKUST;Dartmouth", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "China;United States" }, { "title": "Counterfactual Fairness by Combining Factual and Counterfactual Predictions", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95743", "id": "J0Itri0UiN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=J0Itri0UiN", "openreview": "https://openreview.net/forum?id=J0Itri0UiN", "poster": "", "project": "", "author_site": "Zeyu Zhou, TIanci Liu, Ruqi Bai, Jing Gao, Murat Kocaoglu, David Inouye", "tldr": "", "abstract": "In high-stakes domains such as healthcare and hiring, the role of machine learning (ML) in decision-making raises significant fairness concerns. \nThis work focuses on Counterfactual Fairness (CF), which posits that an ML model's outcome on any individual should remain unchanged if they had belonged to a different demographic group.\nPrevious works have proposed methods that guarantee CF. \nNotwithstanding, their effects on the model's predictive performance remain largely unclear.\nTo fill this gap, we provide a theoretical study on the inherent trade-off between CF and predictive performance in a model-agnostic manner. \nWe first propose a simple but effective method to cast an optimal but potentially unfair predictor into a fair one with a minimal loss of performance.\nBy analyzing the excess risk incurred by perfect CF, we quantify this inherent trade-off. \nFurther analysis on our method's performance with access to only incomplete causal knowledge is also conducted. \nBuilt upon this, we propose a practical algorithm that can be applied in such scenarios. \nExperiments on both synthetic and semi-synthetic datasets demonstrate the validity of our analysis and methods.", "keywords": "Counterfactual Fairness;Fairness;Trustworthy ML", "primary_area": "fairness", "supplementary_material": "/attachment/9be61e24afe05c4952dfad89f9e1f6d30874b238.zip", "author": "Zeyu Zhou;Tianci Liu;Ruqi Bai;Jing Gao;Murat Kocaoglu;David I. Inouye", "authorids": "~Zeyu_Zhou1;~Tianci_Liu1;~Ruqi_Bai1;~Jing_Gao1;~Murat_Kocaoglu1;~David_I._Inouye1", "gender": "Unspecified;M;M;M;M;F", "homepage": "https://zyzhou3.github.io/;https://lliutianc.github.io;https://ruqibai.netlify.app/;https://www.muratkocaoglu.com;http://davidinouye.com;https://engineering.purdue.edu/~jinggao/", "dblp": ";148/1911-3;;74/11343;76/10817;67/4834-4", "google_scholar": ";;;7N7bzdwAAAAJ;SVMQ_g4AAAAJ;Ftj1h4cAAAAJ", "orcid": ";;;;;", "linkedin": "zyzhou3/;;ruqi-bai/;mkocaoglu/;;", "or_profile": "~Zeyu_Zhou1;~Tianci_Liu1;~Ruqi_Bai1;~Murat_Kocaoglu1;~David_I_Inouye1;~Jing_Gao2", "aff": "Purdue University;Purdue University;Purdue University;Purdue University;Purdue University;Purdue University", "aff_domain": "purdue.edu;purdue.edu;purdue.edu;purdue.edu;purdue.edu;purdue.edu", "position": "PhD student;PhD student;PhD student;Assistant Professor;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nzhou2024counterfactual,\ntitle={Counterfactual Fairness by Combining Factual and Counterfactual Predictions},\nauthor={Zeyu Zhou and Tianci Liu and Ruqi Bai and Jing Gao and Murat Kocaoglu and David I. Inouye},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=J0Itri0UiN}\n}", "github": "", "reviewers": "Kz4o;pyBD;e7ax", "pdf_size": 3111588, "rating": "6;6;7", "confidence": "3;2;4", "soundness": "3;3;3", "novelty": "3;2;3", "presentation": "4;3;3", "wc_summary": "55;85;76", "wc_strengths": "80;135;27", "wc_weaknesses": "54;136;136", "wc_questions": "70;121;127", "wc_limitations": "1;21;24", "wc_review": "260;498;390", "wc_reply_reviewers": "29;25;23", "wc_reply_authors": "78;78;78", "reply_reviewers": "1;1;1", "reply_authors": "3;3;3", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 72.0, 12.569805089976535 ], "wc_strengths_avg": [ 80.66666666666667, 44.0933353492591 ], "wc_weaknesses_avg": [ 108.66666666666667, 38.655170704864595 ], "wc_questions_avg": [ 106.0, 25.573423705088842 ], "wc_limitations_avg": [ 15.333333333333334, 10.208928554075703 ], "wc_review_avg": [ 382.6666666666667, 97.30136461301844 ], "wc_reply_reviewers_avg": [ 25.666666666666668, 2.494438257849294 ], "wc_reply_authors_avg": [ 78.0, 0.0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9703092952407338774&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 5, "email": "purdue.edu;purdue.edu;purdue.edu;purdue.edu;purdue.edu;purdue.edu", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Purdue University", "aff_unique_dep": "", "aff_unique_url": "https://www.purdue.edu", "aff_unique_abbr": "Purdue", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Multi-model Ensemble Conformal Prediction in Dynamic Environments", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95742", "id": "J1Y70keorq", "proceeding": "", "pdf": "https://openreview.net/pdf?id=J1Y70keorq", "openreview": "https://openreview.net/forum?id=J1Y70keorq", "poster": "", "project": "", "author_site": "Erfan Hajihashemi, Yanning Shen", "tldr": "", "abstract": "Conformal prediction is an uncertainty quantification method that constructs a prediction set for a previously unseen datum, ensuring the true label is included with a predetermined coverage probability. Adaptive conformal prediction has been developed to address data distribution shifts in dynamic environments. However, the efficiency of prediction sets varies depending on the learning model used. Employing a single fixed model may not consistently offer the best performance in dynamic environments with unknown data distribution shifts. To address this issue, we introduce a novel adaptive conformal prediction framework, where the model used for creating prediction sets is selected \u2018on the fly\u2019 from multiple candidate models. The proposed algorithm is proven to achieve strongly adaptive regret over all intervals while maintaining valid coverage. Experiments on both real and synthetic datasets corroborate that the proposed approach consistently yields more efficient prediction sets while maintaining valid coverage, outperforming alternative methods.", "keywords": "uncertainty quantification;conformal prediction;dynamic environments", "primary_area": "online_learning", "supplementary_material": "", "author": "Erfan Hajihashemi;Yanning Shen", "authorids": "~Erfan_Hajihashemi1;~Yanning_Shen1", "gender": "M;F", "homepage": ";https://sites.google.com/uci.edu/yanning-shen/home", "dblp": ";120/7392.html", "google_scholar": ";MfzntAIAAAAJ", "orcid": ";", "linkedin": "erfan-hajihashemi-9bba12212;", "or_profile": "~Erfan_Hajihashemi1;~Yanning_Shen1", "aff": "University of California, Irvine;University of California, Irvine", "aff_domain": "uci.edu;uci.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nhajihashemi2024multimodel,\ntitle={Multi-model Ensemble Conformal Prediction in Dynamic Environments},\nauthor={Erfan Hajihashemi and Yanning Shen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=J1Y70keorq}\n}", "github": "", "reviewers": "4Km4;76a7;xzo3;moxN", "pdf_size": 921377, "rating": "5;5;5;6", "confidence": "4;4;4;4", "soundness": "3;2;3;3", "novelty": "2;2;3;3", "presentation": "3;3;3;3", "wc_summary": "63;39;40;94", "wc_strengths": "28;24;21;50", "wc_weaknesses": "7;163;16;65", "wc_questions": "170;4;90;212", "wc_limitations": "6;12;1;8", "wc_review": "274;242;168;429", "wc_reply_reviewers": "30;88;0;24", "wc_reply_authors": "0;85;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;2;1;1", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 59.0, 22.371857321197094 ], "wc_strengths_avg": [ 30.75, 11.388041973930374 ], "wc_weaknesses_avg": [ 62.75, 61.94503612074175 ], "wc_questions_avg": [ 119.0, 79.55501241279521 ], "wc_limitations_avg": [ 6.75, 3.960744879438715 ], "wc_review_avg": [ 278.25, 95.14823960536526 ], "wc_reply_reviewers_avg": [ 35.5, 32.32259271778797 ], "wc_reply_authors_avg": [ 21.25, 36.80607966083864 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10403326771391513862&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "uci.edu;uci.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of California, Irvine", "aff_unique_dep": "", "aff_unique_url": "https://www.uci.edu", "aff_unique_abbr": "UCI", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Irvine", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Stochastic Taylor Derivative Estimator: Efficient amortization for arbitrary differential operators", "status": "Oral", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95741", "id": "J2wI2rCG2u", "proceeding": "", "pdf": "https://openreview.net/pdf?id=J2wI2rCG2u", "openreview": "https://openreview.net/forum?id=J2wI2rCG2u", "poster": "/media/PosterPDFs/NeurIPS%202024/95741.png?t=1731743946.581372", "project": "", "author_site": "Zekun Shi, Zheyuan Hu, Min Lin, Kenji Kawaguchi", "tldr": "", "abstract": "Optimizing neural networks with loss that contain high-dimensional and high-order differential operators\n is expensive to evaluate with back-propagation due to $\\mathcal{O}(d^{k})$ scaling of the derivative tensor size and the $\\mathcal{O}(2^{k-1}L)$ scaling in the computation graph, where $d$ is the dimension of the domain, $L$ is the number of ops in the forward computation graph, and $k$ is the derivative order. In previous works, the polynomial scaling in $d$ was addressed by amortizing the computation over the optimization process via randomization. Separately, the exponential scaling in $k$ for univariate functions ($d=1$) was addressed with high-order auto-differentiation (AD). In this work, we show how to efficiently perform arbitrary contraction of the derivative tensor of arbitrary order for multivariate functions, by properly constructing the input tangents to univariate high-order AD, which can be used to efficiently randomize any differential operator.\n When applied to Physics-Informed Neural Networks (PINNs), our method provides >1000$\\times$ speed-up and >30$\\times$ memory reduction over randomization with first-order AD, and we can now solve 1-million-dimensional PDEs in 8 minutes on a single NVIDIA A100 GPU. This work opens the possibility of using high-order differential operators in large-scale problems.", "keywords": "AI for Science;Automatic Differentiation;Deep Learning;Randomization", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "", "author": "Zekun Shi;Zheyuan Hu;Min Lin;Kenji Kawaguchi", "authorids": "~Zekun_Shi3;~Zheyuan_Hu1;~Min_Lin1;~Kenji_Kawaguchi1", "gender": "M;M;;M", "homepage": ";https://linmin.me;https://ml.comp.nus.edu.sg/#members;https://zekun-shi.github.io/", "dblp": "270/0713;;;", "google_scholar": "On2YFigAAAAJ;BGONmkIAAAAJ;aLl3rYoAAAAJ;X9vcv1oAAAAJ", "orcid": ";;;", "linkedin": ";min-lin-08a3a422/;;", "or_profile": "~Zheyuan_Hu1;~Min_Lin1;~Kenji_Kawaguchi1;~ZEKUN_SHI2", "aff": "National University of Singapore;Sea AI Lab;National University of Singapore;Sea AI Lab", "aff_domain": "nus.edu.sg;sea.com;nus.edu;sea.com", "position": "PhD student;Principal Researcher;Presidential Young Professor;Researcher", "bibtex": "@inproceedings{\nshi2024stochastic,\ntitle={Stochastic Taylor Derivative Estimator: Efficient amortization for arbitrary differential operators},\nauthor={Zekun Shi and Zheyuan Hu and Min Lin and Kenji Kawaguchi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=J2wI2rCG2u}\n}", "github": "", "reviewers": "cUDy;SkVJ;yjY8;xY8N", "pdf_size": 1216395, "rating": "7;7;7;8", "confidence": "4;4;3;5", "soundness": "3;4;3;4", "novelty": "3;4;3;4", "presentation": "3;3;2;4", "wc_summary": "45;68;44;183", "wc_strengths": "70;25;78;245", "wc_weaknesses": "505;1;72;225", "wc_questions": "439;1;93;71", "wc_limitations": "1;1;7;3", "wc_review": "1060;96;294;727", "wc_reply_reviewers": "202;1;15;12", "wc_reply_authors": "462;0;0;0", "reply_reviewers": "2;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 85.0, 57.389023340705144 ], "wc_strengths_avg": [ 104.5, 83.59575348066431 ], "wc_weaknesses_avg": [ 200.75, 193.4119631770486 ], "wc_questions_avg": [ 151.0, 169.7115199389835 ], "wc_limitations_avg": [ 3.0, 2.449489742783178 ], "wc_review_avg": [ 544.25, 375.1495535116629 ], "wc_reply_reviewers_avg": [ 57.5, 83.5897721016154 ], "wc_reply_authors_avg": [ 115.5, 200.05186827420533 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1203589014546452738&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "nus.edu.sg;sea.com;nus.edu;sea.com", "author_num": 4, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "National University of Singapore;Sea AI Lab", "aff_unique_dep": ";", "aff_unique_url": "https://www.nus.edu.sg;", "aff_unique_abbr": "NUS;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Singapore;" }, { "title": "DiffuBox: Refining 3D Object Detection with Point Diffusion", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95740", "id": "J2wOOtkBx0", "proceeding": "", "pdf": "https://openreview.net/pdf?id=J2wOOtkBx0", "openreview": "https://openreview.net/forum?id=J2wOOtkBx0", "poster": "/media/PosterPDFs/NeurIPS%202024/95740.png?t=1733854525.199767", "project": "", "author_site": "Xiangyu Chen, Zhenzhen Liu, Katie Luo, Siddhartha Datta, Adhitya Polavaram, Yan Wang, Yurong You, Boyi Li, Marco Pavone, Wei-Lun (Harry) Chao, Mark Campbell, Bharath Hariharan, Kilian Weinberger", "tldr": "", "abstract": "Ensuring robust 3D object detection and localization is crucial for many applications in robotics and autonomous driving. Recent models, however, face difficulties in maintaining high performance when applied to domains with differing sensor setups or geographic locations, often resulting in poor localization accuracy due to domain shift. To overcome this challenge, we introduce a novel diffusion-based box refinement approach. This method employs a domain-agnostic diffusion model, conditioned on the LiDAR points surrounding a coarse bounding box, to simultaneously refine the box's location, size, and orientation. We evaluate this approach under various domain adaptation settings, and our results reveal significant improvements across different datasets, object classes and detectors. Our PyTorch implementation is available at https://github.com/cxy1997/DiffuBox.", "keywords": "Domain Adaptation;Denoising Diffusion Models;3D Object Detection", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/b11599306a1ae877e0ec65ecbf79d3788f529e89.zip", "author": "Xiangyu Chen;Zhenzhen Liu;Katie Z Luo;Siddhartha Datta;Adhitya Polavaram;Yan Wang;Yurong You;Boyi Li;Marco Pavone;Wei-Lun Chao;Mark Campbell;Bharath Hariharan;Kilian Q Weinberger", "authorids": "~Xiangyu_Chen1;~Zhenzhen_Liu1;~Katie_Z_Luo1;~Siddhartha_Datta1;~Adhitya_Polavaram1;~Yan_Wang10;~Yurong_You1;~Boyi_Li1;~Marco_Pavone1;~Wei-Lun_Chao1;~Mark_Campbell1;~Bharath_Hariharan3;~Kilian_Q_Weinberger1", "gender": "M;;F;;;M;M;F;M;M;M;M;M", "homepage": "https://www.cs.cornell.edu/~xchen/;https://www.cs.cornell.edu/~zliu/;https://www.cs.cornell.edu/~katieluo/;http://siddharthadatta.ml/;;https://www.cs.cornell.edu/~yanwang/;http://yurongyou.com;https://sites.google.com/site/boyilics/home;https://web.stanford.edu/~pavone/;https://sites.google.com/view/wei-lun-harry-chao;http://campbell.mae.cornell.edu;http://www.cs.cornell.edu/~kilian/;http://home.bharathh.info", "dblp": ";81/891;207/8564;;;59/2227;199/1968;;91/3382-1.html;64/8842;;88/4801;05/8412", "google_scholar": "xBv-PMEAAAAJ;;qlmK27YAAAAJ;;;nZsD8XwAAAAJ;rdwkreIAAAAJ;;RhOpyXcAAAAJ;PGKakWwAAAAJ;e1iAhHQAAAAJ;jsxk8vsAAAAJ;TpglobcAAAAJ", "orcid": ";0000-0003-1083-8512;;;;;;;;0000-0003-1269-7231;;0009-0008-9313-7239;", "linkedin": ";;katieluo;;adhitya-polavaram/;;yurong-you/;;;;;;", "or_profile": "~Xiangyu_Chen1;~Zhenzhen_Liu1;~Katie_Z_Luo1;~Siddhartha_Datta1;~Adhitya_Polavaram1;~Yan_Wang10;~Yurong_You1;~Boyi_Li1;~Marco_Pavone1;~Wei-Lun_Chao1;~Mark_Campbell1;~Kilian_Q_Weinberger1;~Bharath_Hariharan2", "aff": "Cornell University;Cornell University;Cornell University;University of Oxford;Cornell University;NVIDIA;NVIDIA;University of California, Berkeley;Stanford University;Ohio State University;Cornell University;ASAPP Inc.;Cornell University", "aff_domain": "cornell.edu;cornell.edu;cornell.edu;ox.ac.uk;cornell.edu;nvidia.com;nvidia.com;berkeley.edu;stanford.edu;osu.edu;cornell.edu;asapp.com;cornell.edu", "position": "PhD student;PhD student;PhD student;PhD student;Undergrad student;Researcher;Research Scientist;Postdoc;Associate Professor;Assistant Professor;Full Professor;Principal Researcher;Assistant Professor", "bibtex": "@inproceedings{\nchen2024diffubox,\ntitle={DiffuBox: Refining 3D Object Detection with Point Diffusion},\nauthor={Xiangyu Chen and Zhenzhen Liu and Katie Z Luo and Siddhartha Datta and Adhitya Polavaram and Yan Wang and Yurong You and Boyi Li and Marco Pavone and Wei-Lun Chao and Mark Campbell and Bharath Hariharan and Kilian Q Weinberger},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=J2wOOtkBx0}\n}", "github": "", "reviewers": "7LbH;DshE;spFc;YzBS;uVLB", "pdf_size": 23520729, "rating": "5;5;5;6;7", "confidence": "3;2;4;3;4", "soundness": "3;3;3;3;3", "novelty": "2;3;2;3;3", "presentation": "3;3;3;2;2", "wc_summary": "96;122;19;71;35", "wc_strengths": "80;112;22;50;70", "wc_weaknesses": "157;61;90;135;23", "wc_questions": "48;3;6;21;70", "wc_limitations": "1;3;6;1;1", "wc_review": "382;301;143;278;199", "wc_reply_reviewers": "62;0;0;17;0", "wc_reply_authors": "169;145;145;53;56", "reply_reviewers": "1;0;0;1;0", "reply_authors": "2;2;2;2;2", "rating_avg": [ 5.6, 0.8 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 68.6, 37.93995255663876 ], "wc_strengths_avg": [ 66.8, 30.055947830670718 ], "wc_weaknesses_avg": [ 93.2, 48.56500797899656 ], "wc_questions_avg": [ 29.6, 25.726251184344754 ], "wc_limitations_avg": [ 2.4, 1.9595917942265426 ], "wc_review_avg": [ 260.6, 82.82173627738071 ], "wc_reply_reviewers_avg": [ 15.8, 24.019991673603883 ], "wc_reply_authors_avg": [ 113.6, 49.05344024632727 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 13, 0 ], "corr_rating_confidence": 0.46770717334674267, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4105242760527472942&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 4, "email": "cornell.edu;cornell.edu;cornell.edu;ox.ac.uk;cornell.edu;nvidia.com;nvidia.com;berkeley.edu;stanford.edu;osu.edu;cornell.edu;asapp.com;cornell.edu", "author_num": 13, "aff_unique_index": "0;0;0;1;0;2;2;3;4;5;0;6;0", "aff_unique_norm": "Cornell University;University of Oxford;NVIDIA;University of California, Berkeley;Stanford University;Ohio State University;ASAPP Inc.", "aff_unique_dep": ";;NVIDIA Corporation;;;;", "aff_unique_url": "https://www.cornell.edu;https://www.ox.ac.uk;https://www.nvidia.com;https://www.berkeley.edu;https://www.stanford.edu;https://www.osu.edu;https://www.asapp.com", "aff_unique_abbr": "Cornell;Oxford;NVIDIA;UC Berkeley;Stanford;OSU;ASAPP", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Berkeley;Stanford", "aff_country_unique_index": "0;0;0;1;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Uniform Last-Iterate Guarantee for Bandits and Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95739", "id": "J3w0AXtEhp", "proceeding": "", "pdf": "https://openreview.net/pdf?id=J3w0AXtEhp", "openreview": "https://openreview.net/forum?id=J3w0AXtEhp", "poster": "/media/PosterPDFs/NeurIPS%202024/95739.png?t=1731921922.0782764", "project": "", "author_site": "Junyan Liu, Yunfan Li, Ruosong Wang, Lin Yang", "tldr": "", "abstract": "Existing metrics for reinforcement learning (RL) such as regret, PAC bounds, or uniform-PAC (Dann et al., 2017), typically evaluate the cumulative performance, while allowing the play of an arbitrarily bad policy at any finite time t. Such a behavior can be highly detrimental in high-stakes applications. This paper introduces a stronger metric, uniform last-iterate (ULI) guarantee, capturing both cumulative and instantaneous performance of RL algorithms. Specifically, ULI characterizes the instantaneous performance since it ensures that the per-round suboptimality of the played policy is bounded by a function, monotonically decreasing w.r.t. (large) round t, preventing revisits to bad policies when sufficient samples are available. We demonstrate that a near-optimal ULI guarantee directly implies near-optimal cumulative performance across aforementioned metrics, but not the other way around. \nTo examine the achievability of ULI, we first provide two positive results for bandit problems with finite arms, showing that some elimination-based algorithms and high-probability adversarial algorithms with stronger analysis or additional designs, can attain near-optimal ULI guarantees. We also provide a negative result, indicating that optimistic algorithms cannot achieve a near-optimal ULI guarantee. Furthermore, we propose an efficient algorithm for linear bandits with infinitely many arms, which achieves the ULI guarantee, given access to an optimization oracle. Finally, we propose an algorithm that achieves a near-optimal ULI guarantee for the online reinforcement learning setting.", "keywords": "uniform last-iterate;bandits;reinforcement learning", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Junyan Liu;Yunfan Li;Ruosong Wang;Lin Yang", "authorids": "~Junyan_Liu1;~Yunfan_Li4;~Ruosong_Wang1;~Lin_Yang12", "gender": ";M;M;", "homepage": ";https://scholar.google.com/citations?user=kUQDprYAAAAJ&hl=en;http://www.cs.cmu.edu/~ruosongw/;", "dblp": ";;183/6164;", "google_scholar": ";;n8ZpnWMAAAAJ;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Junyan_Liu1;~Yunfan_Li4;~Ruosong_Wang1;~Lin_Yang12", "aff": ";University of California, Los Angeles;Peking University;", "aff_domain": ";ucla.edu;pku.edu.cn;", "position": ";PhD student;Assistant Professor;", "bibtex": "@inproceedings{\nliu2024uniform,\ntitle={Uniform Last-Iterate Guarantee for Bandits and Reinforcement Learning},\nauthor={Junyan Liu and Yunfan Li and Ruosong Wang and Lin Yang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=J3w0AXtEhp}\n}", "github": "", "reviewers": "vbbn;rjuw;upT1;HueZ", "pdf_size": 681274, "rating": "5;6;6;6", "confidence": "2;4;3;3", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "2;3;3;3", "wc_summary": "105;32;112;224", "wc_strengths": "52;160;77;76", "wc_weaknesses": "203;273;20;73", "wc_questions": "123;2;51;32", "wc_limitations": "12;1;11;13", "wc_review": "495;468;271;418", "wc_reply_reviewers": "14;11;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 118.25, 68.62352002047112 ], "wc_strengths_avg": [ 91.25, 40.93516214698557 ], "wc_weaknesses_avg": [ 142.25, 100.65628395683997 ], "wc_questions_avg": [ 52.0, 44.55894971832258 ], "wc_limitations_avg": [ 9.25, 4.815340071064556 ], "wc_review_avg": [ 413.0, 86.51300480274628 ], "wc_reply_reviewers_avg": [ 6.25, 6.339361166552983 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3571467198546650919&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": ";ucla.edu;pku.edu.cn;", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "University of California, Los Angeles;Peking University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucla.edu;http://www.pku.edu.cn", "aff_unique_abbr": "UCLA;Peking U", "aff_campus_unique_index": "0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;China" }, { "title": "State Chrono Representation for Enhancing Generalization in Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95738", "id": "J42SwBemEA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=J42SwBemEA", "openreview": "https://openreview.net/forum?id=J42SwBemEA", "poster": "", "project": "", "author_site": "Jianda Chen, Wen zheng terence Ng, Zichen Chen, Sinno Pan, Tianwei Zhang", "tldr": "", "abstract": "In reinforcement learning with image-based inputs, it is crucial to establish a robust and generalizable state representation. Recent advancements in metric learning, such as deep bisimulation metric approaches, have shown promising results in learning structured low-dimensional representation space from pixel observations, where the distance between states is measured based on task-relevant features. However, these approaches face challenges in demanding generalization tasks and scenarios with non-informative rewards. This is because they fail to capture sufficient long-term information in the learned representations. To address these challenges, we propose a novel State Chrono Representation (SCR) approach. SCR augments state metric-based representations by incorporating extensive temporal information into the update step of bisimulation metric learning. It learns state distances within a temporal framework that considers both future dynamics and cumulative rewards over current and long-term future states. Our learning strategy effectively incorporates future behavioral information into the representation space without introducing a significant number of additional parameters for modeling dynamics. Extensive experiments conducted in DeepMind Control and Meta-World environments demonstrate that SCR achieves better performance comparing to other recent metric-based methods in demanding generalization tasks. The codes of SCR are available in https://github.com/jianda-chen/SCR.", "keywords": "bisimulation metric;deep reinforcement learning;representation learning", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Jianda Chen;Wen zheng terence Ng;Zichen Chen;Sinno Jialin Pan;Tianwei Zhang", "authorids": "~Jianda_Chen1;~Wen_zheng_terence_Ng1;~Zichen_Chen1;~Sinno_Jialin_Pan1;~Tianwei_Zhang1", "gender": ";;F;;M", "homepage": ";;;;https://personal.ntu.edu.sg/tianwei.zhang/index.html", "dblp": "176/6660;;23/7781;;77/7902-4", "google_scholar": "jEOSgcUAAAAJ;;X4goIzYAAAAJ;;9vpiYDIAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Jianda_Chen1;~Wen_zheng_terence_Ng1;~Zichen_Chen1;~Sinno_Jialin_Pan1;~Tianwei_Zhang1", "aff": "Nanyang Technological University;;University of California, Santa Barbara;;Nanyang Technological University", "aff_domain": "ntu.edu.sg;;ucsb.edu;;ntu.edu.sg", "position": "Researcher;;PhD student;;Assistant Professor", "bibtex": "@inproceedings{\nchen2024state,\ntitle={State Chrono Representation for Enhancing Generalization in Reinforcement Learning},\nauthor={Jianda Chen and Wen zheng terence Ng and Zichen Chen and Sinno Jialin Pan and Tianwei Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=J42SwBemEA}\n}", "github": "", "reviewers": "EHt5;UVpA;b4Fc;R7iQ", "pdf_size": 6755786, "rating": "4;5;6;7", "confidence": "4;4;3;3", "soundness": "2;2;3;3", "novelty": "2;3;2;3", "presentation": "2;3;2;3", "wc_summary": "133;46;171;84", "wc_strengths": "54;54;84;55", "wc_weaknesses": "249;240;82;82", "wc_questions": "119;189;35;56", "wc_limitations": "35;8;9;57", "wc_review": "590;537;381;334", "wc_reply_reviewers": "1170;232;14;11", "wc_reply_authors": "3143;1097;123;113", "reply_reviewers": "4;2;1;1", "reply_authors": "9;4;3;3", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 108.5, 47.468410548490034 ], "wc_strengths_avg": [ 61.75, 12.852528934026953 ], "wc_weaknesses_avg": [ 163.25, 81.31228381985099 ], "wc_questions_avg": [ 99.75, 60.08899649686288 ], "wc_limitations_avg": [ 27.25, 20.30240133580262 ], "wc_review_avg": [ 460.5, 106.00117923872357 ], "wc_reply_reviewers_avg": [ 356.75, 478.0059492307601 ], "wc_reply_authors_avg": [ 1119.0, 1235.0214573034752 ], "reply_reviewers_avg": [ 2.0, 1.224744871391589 ], "reply_authors_avg": [ 4.75, 2.48746859276655 ], "replies_avg": [ 34, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8944271909999159, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:9O66YYNCCOQJ:scholar.google.com/&scioq=State+Chrono+Representation+for+Enhancing+Generalization+in+Reinforcement+Learning&hl=en&as_sdt=0,5", "gs_version_total": 5, "email": "ntu.edu.sg;;ucsb.edu;;ntu.edu.sg", "author_num": 5, "aff_unique_index": "0;1;0", "aff_unique_norm": "Nanyang Technological University;University of California, Santa Barbara", "aff_unique_dep": ";", "aff_unique_url": "https://www.ntu.edu.sg;https://www.ucsb.edu", "aff_unique_abbr": "NTU;UCSB", "aff_campus_unique_index": "1", "aff_campus_unique": ";Santa Barbara", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Singapore;United States" }, { "title": "WaveAttack: Asymmetric Frequency Obfuscation-based Backdoor Attacks Against Deep Neural Networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95737", "id": "J6NByZlLNj", "proceeding": "", "pdf": "https://openreview.net/pdf?id=J6NByZlLNj", "openreview": "https://openreview.net/forum?id=J6NByZlLNj", "poster": "/media/PosterPDFs/NeurIPS%202024/95737.png?t=1731287330.6313283", "project": "", "author_site": "Jun Xia, Zhihao Yue, Yingbo Zhou, Zhiwei Ling, Yiyu Shi, Xian Wei, Mingsong Chen", "tldr": "", "abstract": "Due to the increasing popularity of Artificial Intelligence (AI), more and more backdoor attacks are designed to mislead Deep Neural Network (DNN) predictions by manipulating training samples or processes. Although backdoor attacks have been investigated in various scenarios, they still suffer from the problems of both low fidelity of poisoned samples and non-negligible transfer in latent space, which make them easily identified by existing backdoor detection algorithms. To overcome this weakness, this paper proposes a novel frequency-based backdoor attack method named WaveAttack, which obtains high-frequency image features through Discrete Wavelet Transform (DWT) to generate highly stealthy backdoor triggers. By introducing an asymmetric frequency obfuscation method, our approach adds an adaptive residual to the training and inference stages to improve the impact of triggers, thus further enhancing the effectiveness of WaveAttack. Comprehensive experimental results show that, WaveAttack can not only achieve higher effectiveness than state-of-the-art backdoor attack methods, but also outperform them in the fidelity of images (i.e., by up to 28.27\\% improvement in PSNR, 1.61\\% improvement in SSIM, and 70.59\\% reduction in IS). Our code is available at https://github.com/BililiCode/WaveAttack.", "keywords": "backdoor attack;", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/2e668d688c896f5bf7e59c78e766686c1f3068db.zip", "author": "Jun Xia;Zhihao Yue;Yingbo Zhou;Zhiwei Ling;Yiyu Shi;Xian Wei;Mingsong Chen", "authorids": "~Jun_Xia2;~Zhihao_Yue1;~Yingbo_Zhou2;~Zhiwei_Ling1;~Yiyu_Shi1;~Xian_Wei1;~Mingsong_Chen1", "gender": "M;M;M;M;M;M;M", "homepage": "https://www.researchgate.net/profile/Jun-Xia-23;;;https://lingandzero.github.io/;;https://www.researchgate.net/;https://faculty.ecnu.edu.cn/_s43/cms/main.psp", "dblp": ";314/6918.html;;314/6682;94/5536;139/0725;95/573.html", "google_scholar": "K4JXEHUAAAAJ;;;https://scholar.google.com.hk/citations?user=oQ6YLw0AAAAJ;;https://scholar.google.com/citations?hl=en;93A6b7YAAAAJ", "orcid": "0000-0003-0245-8499;;0000-0001-6034-9667;;;;0000-0002-3922-0989", "linkedin": ";;;;;;", "or_profile": "~Jun_Xia2;~Zhihao_Yue1;~Yingbo_Zhou2;~Zhiwei_Ling1;~Yiyu_Shi1;~Xian_Wei1;~Mingsong_Chen1", "aff": "University of Notre Dame;East China Normal University;East China Normal University;East China Normal University;University of Notre Dame;East China Normal University;East China Normal University", "aff_domain": "nd.edu;ecnu.edu.cn;ecnu.edu.cn;ecnu.edu.cn;nd.edu;ecnu.edu.cn;ecnu.edu.cn", "position": "Postdoc;MS student;PhD student;MS student;Full Professor;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nxia2024waveattack,\ntitle={WaveAttack: Asymmetric Frequency Obfuscation-based Backdoor Attacks Against Deep Neural Networks},\nauthor={Jun Xia and Zhihao Yue and Yingbo Zhou and Zhiwei Ling and Yiyu Shi and Xian Wei and Mingsong Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=J6NByZlLNj}\n}", "github": "", "reviewers": "h6uR;vbLQ;gVJn", "pdf_size": 2601164, "rating": "4;4;7", "confidence": "4;4;3", "soundness": "3;2;3", "novelty": "1;2;3", "presentation": "3;3;4", "wc_summary": "33;47;40", "wc_strengths": "25;19;34", "wc_weaknesses": "250;97;30", "wc_questions": "3;34;62", "wc_limitations": "1;29;4", "wc_review": "312;226;170", "wc_reply_reviewers": "66;39;0", "wc_reply_authors": "87;179;0", "reply_reviewers": "1;1;0", "reply_authors": "2;2;1", "rating_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.0, 0.816496580927726 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 40.0, 5.715476066494082 ], "wc_strengths_avg": [ 26.0, 6.164414002968976 ], "wc_weaknesses_avg": [ 125.66666666666667, 92.07364202395577 ], "wc_questions_avg": [ 33.0, 24.097026095903757 ], "wc_limitations_avg": [ 11.333333333333334, 12.552113589175152 ], "wc_review_avg": [ 236.0, 58.40091323486874 ], "wc_reply_reviewers_avg": [ 35.0, 27.09243436828813 ], "wc_reply_authors_avg": [ 88.66666666666667, 73.08594636149658 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7075121470820762058&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "nd.edu;ecnu.edu.cn;ecnu.edu.cn;ecnu.edu.cn;nd.edu;ecnu.edu.cn;ecnu.edu.cn", "author_num": 7, "aff_unique_index": "0;1;1;1;0;1;1", "aff_unique_norm": "University of Notre Dame;East China Normal University", "aff_unique_dep": ";", "aff_unique_url": "https://www.nd.edu;http://www.ecnu.edu.cn", "aff_unique_abbr": "Notre Dame;ECNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;0;1;1", "aff_country_unique": "United States;China" }, { "title": "Transcoders find interpretable LLM feature circuits", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95736", "id": "J6zHcScAo0", "proceeding": "", "pdf": "https://openreview.net/pdf?id=J6zHcScAo0", "openreview": "https://openreview.net/forum?id=J6zHcScAo0", "poster": "", "project": "", "author_site": "Jacob Dunefsky, Philippe Chlenski, Neel Nanda", "tldr": "", "abstract": "A key goal in mechanistic interpretability is circuit analysis: finding sparse subgraphs of models corresponding to specific behaviors or capabilities. However, MLP sublayers make fine-grained circuit analysis on transformer-based language models difficult. In particular, interpretable features\u2014such as those found by sparse autoencoders (SAEs)\u2014are typically linear combinations of extremely many neurons, each with its own nonlinearity to account for. Circuit analysis in this setting thus either yields intractably large circuits or fails to disentangle local and global behavior. To address this we explore **transcoders**, which seek to faithfully approximate a densely activating MLP layer with a wider, sparsely-activating MLP layer. We introduce a novel method for using transcoders to perform weights-based circuit analysis through MLP sublayers. The resulting circuits neatly factorize into input-dependent and input-invariant terms. We then successfully train transcoders on language models with 120M, 410M, and 1.4B parameters, and find them to perform at least on par with SAEs in terms of sparsity, faithfulness, and human-interpretability. Finally, we apply transcoders to reverse-engineer unknown circuits in the model, and we obtain novel insights regarding the \"greater-than circuit\" in GPT2-small. Our results suggest that transcoders can prove effective in decomposing model computations involving MLPs into interpretable circuits. Code is available at https://github.com/jacobdunefsky/transcoder_circuits/", "keywords": "mechanistic interpretability;transcoders;sparse autoencoders;circuit analysis", "primary_area": "interpretability_and_explainability", "supplementary_material": "/attachment/2d5ceb27181deb1882cc0c2df9977a969385b754.zip", "author": "Jacob Dunefsky;Philippe Chlenski;Neel Nanda", "authorids": "~Jacob_Dunefsky1;~Philippe_Chlenski1;~Neel_Nanda1", "gender": "M;M;M", "homepage": "https://jacobdunefsky.github.io;http://www.chlenski.com;https://neelnanda.io", "dblp": ";;285/6389", "google_scholar": ";_8s9f44AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0002-2951-4385;", "linkedin": ";;https://linkedin.com/in/neel-nanda-993580151", "or_profile": "~Jacob_Dunefsky1;~Philippe_Chlenski1;~Neel_Nanda1", "aff": "Department of Computer Science, Yale University;Columbia University;Google DeepMind", "aff_domain": "cs.yale.edu;columbia.edu;deepmind.com", "position": "PhD student;PhD student;Researcher", "bibtex": "@inproceedings{\ndunefsky2024transcoders,\ntitle={Transcoders find interpretable {LLM} feature circuits},\nauthor={Jacob Dunefsky and Philippe Chlenski and Neel Nanda},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=J6zHcScAo0}\n}", "github": "", "reviewers": "sL5K;Q9a1;GwPg;xRyp", "pdf_size": 1052520, "rating": "6;6;7;7", "confidence": "3;4;4;4", "soundness": "3;3;3;3", "novelty": "3;2;4;3", "presentation": "4;3;3;2", "wc_summary": "467;112;113;79", "wc_strengths": "97;60;97;66", "wc_weaknesses": "137;489;441;222", "wc_questions": "104;75;71;110", "wc_limitations": "3;2;2;7", "wc_review": "808;738;724;484", "wc_reply_reviewers": "10;288;226;13", "wc_reply_authors": "0;1605;267;26", "reply_reviewers": "1;2;1;1", "reply_authors": "1;4;2;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 192.75, 158.92824638811064 ], "wc_strengths_avg": [ 80.0, 17.131841699011815 ], "wc_weaknesses_avg": [ 322.25, 146.86281864379424 ], "wc_questions_avg": [ 90.0, 17.190113437671084 ], "wc_limitations_avg": [ 3.5, 2.0615528128088303 ], "wc_review_avg": [ 688.5, 122.28143767555238 ], "wc_reply_reviewers_avg": [ 134.25, 124.69638126264933 ], "wc_reply_authors_avg": [ 474.5, 660.9442109588373 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17831671049608135594&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "cs.yale.edu;columbia.edu;deepmind.com", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Yale University;Columbia University;Google", "aff_unique_dep": "Department of Computer Science;;Google DeepMind", "aff_unique_url": "https://www.yale.edu;https://www.columbia.edu;https://deepmind.com", "aff_unique_abbr": "Yale;Columbia;DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;United Kingdom" }, { "title": "Causal Temporal Representation Learning with Nonstationary Sparse Transition", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95735", "id": "J709rtAUD1", "proceeding": "", "pdf": "https://openreview.net/pdf?id=J709rtAUD1", "openreview": "https://openreview.net/forum?id=J709rtAUD1", "poster": "", "project": "", "author_site": "Xiangchen Song, Zijian Li, Guangyi Chen, Yujia Zheng, Yewen Fan, Xinshuai Dong, Kun Zhang", "tldr": "", "abstract": "Causal Temporal Representation Learning (Ctrl) methods aim to identify the temporal causal dynamics of complex nonstationary temporal sequences. Despite the success of existing Ctrl methods, they require either directly observing the domain variables or assuming a Markov prior on them. Such requirements limit the application of these methods in real-world scenarios when we do not have such prior knowledge of the domain variables. To address this problem, this work adopts a sparse transition assumption, aligned with intuitive human understanding, and presents identifiability results from a theoretical perspective. In particular, we explore under what conditions on the significance of the variability of the transitions we can build a model to identify the distribution shifts. Based on the theoretical result, we introduce a novel framework, *Causal Temporal Representation Learning with Nonstationary Sparse Transition* (CtrlNS), designed to leverage the constraints on transition sparsity and conditional independence to reliably identify both distribution shifts and latent factors. Our experimental evaluations on synthetic and real-world datasets demonstrate significant improvements over existing baselines, highlighting the effectiveness of our approach.", "keywords": "Causal Representation Learning;Identifiability;Nonlinear ICA;Temporal Distribution Shift", "primary_area": "causal_inference", "supplementary_material": "", "author": "Xiangchen Song;Zijian Li;Guangyi Chen;Yujia Zheng;Yewen Fan;Xinshuai Dong;Kun Zhang", "authorids": "~Xiangchen_Song1;~Zijian_Li1;~Guangyi_Chen1;~Yujia_Zheng1;~Yewen_Fan1;~Xinshuai_Dong1;~Kun_Zhang1", "gender": "M;M;M;M;M;M;M", "homepage": "https://xiangchensong.github.io/;;https://chengy12.github.io/;https://yjzheng.com;https://tofuwen.github.io/;https://dongxinshuai.github.io/;http://www.andrew.cmu.edu/user/kunz1/", "dblp": "261/9024;27/10487;c/GuangyiChen-2;245/6109-1.html;200/1168;279/6151.html;96/3115-1", "google_scholar": "foR8BIoAAAAJ;j3ilESoAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.co.uk/citations?user=ioiW248AAAAJ;Q9_yaekAAAAJ;A7JyL1sAAAAJ;RGoypN4AAAAJ", "orcid": ";;;0009-0003-5225-6366;;;", "linkedin": ";;;;yewen-fan;;", "or_profile": "~Xiangchen_Song1;~Zijian_Li1;~Guangyi_Chen1;~Yujia_Zheng1;~Yewen_Fan1;~Xinshuai_Dong1;~Kun_Zhang1", "aff": "Carnegie Mellon University;Mohamed bin Zayed University of Artificial Intelligence;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "cmu.edu;mbzuai.ac.ae;cmu.edu;cmu.edu;cmu.edu;cmu.edu;cmu.edu", "position": "PhD student;Postdoc;Postdoc;PhD student;PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nsong2024causal,\ntitle={Causal Temporal Representation Learning with Nonstationary Sparse Transition},\nauthor={Xiangchen Song and Zijian Li and Guangyi Chen and Yujia Zheng and Yewen Fan and Xinshuai Dong and Kun Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=J709rtAUD1}\n}", "github": "", "reviewers": "Zo2n;1QG3;6hAo;3MKp", "pdf_size": 5407773, "rating": "4;5;6;7", "confidence": "2;4;2;3", "soundness": "2;3;3;3", "novelty": "1;2;3;3", "presentation": "2;3;3;3", "wc_summary": "144;91;109;44", "wc_strengths": "77;79;103;95", "wc_weaknesses": "359;123;37;118", "wc_questions": "230;4;22;337", "wc_limitations": "19;13;12;1", "wc_review": "829;310;283;595", "wc_reply_reviewers": "197;0;0;0", "wc_reply_authors": "921;58;58;85", "reply_reviewers": "1;0;0;0", "reply_authors": "3;2;2;2", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 97.0, 36.04857833535187 ], "wc_strengths_avg": [ 88.5, 10.897247358851684 ], "wc_weaknesses_avg": [ 159.25, 120.2713078834682 ], "wc_questions_avg": [ 148.25, 140.5851610234878 ], "wc_limitations_avg": [ 11.25, 6.49519052838329 ], "wc_review_avg": [ 504.25, 223.82065923412878 ], "wc_reply_reviewers_avg": [ 49.25, 85.3035022727672 ], "wc_reply_authors_avg": [ 280.5, 369.95709210663875 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.1348399724926484, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14654968911302051756&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "cmu.edu;mbzuai.ac.ae;cmu.edu;cmu.edu;cmu.edu;cmu.edu;cmu.edu", "author_num": 7, "aff_unique_index": "0;1;0;0;0;0;0", "aff_unique_norm": "Carnegie Mellon University;Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://mbzuai.ac.ae", "aff_unique_abbr": "CMU;MBZUAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0;0;0", "aff_country_unique": "United States;United Arab Emirates" }, { "id": "J8m0DEjxEO", "title": "AttnGCG: Enhancing Adversarial Attacks on Language Models with Attention Manipulation", "track": "main", "status": "Reject", "tldr": "", "abstract": "This paper studies the vulnerabilities of transformer-based Large Language Models (LLMs) to jailbreaking attacks, with a particular focus on the optimization-based Greedy Coordinate Gradient (GCG) strategy. Noting a positive correlation between the effectiveness of attacks and the internal behaviors of models---for instance, attacks are less effective when models robustly focus on system instructions specialized for mitigating harmful behaviors and ensuring safety alignment---we introduce an enhanced method that additionally manipulates models\u2019 attention scores to enhance the large language model (LLM) jailbreaking. We term this novel strategy AttnGCG. Empirically, AttnGCG demonstrates consistent performance enhancements across diverse LLMs, with an average improvement of 7\\% in the Llama-2 series and 10\\% in the Gemma series. This strategy also exhibits stronger attack transferability when testing on unknown or closed-sourced LLMs, such as GPT-3.5 and GPT-4. Moreover, we show that AttnGCG is able to offer enhanced interpretability by visualizing models' attention scores across different input components, thus providing clear insights into how targeted attention manipulation contributes to more successful jailbreaking.", "keywords": "AttnGCG;Adversarial Attacks;Attention Mechanism;Optimization-based Attacks", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Zijun Wang;Haoqin Tu;Jieru Mei;Bingchen Zhao;Yisen Wang;Cihang Xie", "authorids": "~Zijun_Wang4;~Haoqin_Tu1;~Jieru_Mei2;~Bingchen_Zhao1;~Yisen_Wang1;~Cihang_Xie3", "gender": "M;M;M;M;M;F", "homepage": "https://www.haqtu.me;https://meijieru.com/;http://bzhao.me/;https://yisenwang.github.io/;https://cihangxie.github.io/;https://asillycat.github.io/", "dblp": "309/7386;198/9332.html;120/3602;172/1346-1;175/3366;", "google_scholar": "https://scholar.google.com/citations?hl=en;nHKExN0AAAAJ;lEcqFJEAAAAJ;uMWPDboAAAAJ;X3vVZPcAAAAJ;n5wjgV0AAAAJ", "orcid": ";;;;;0009-0009-0461-9840", "linkedin": ";meijieru/;;;;", "or_profile": "~Haoqin_Tu1;~Jieru_Mei2;~Bingchen_Zhao1;~Yisen_Wang1;~cihang_xie1;~Wang_Zijun1", "aff": "University of Chinese Academy of Sciences;Johns Hopkins University;University of Edinburgh, University of Edinburgh;Peking University;University of California, Santa Cruz;Zhejiang University", "aff_domain": "ucas.ac.cn;jhu.edu;ed.ac.uk;pku.edu.cn;ucsc.edu;zju.edu.cn", "position": "MS student;PhD student;PhD student;Assistant Professor;Assistant Professor;Undergrad student", "bibtex": "@misc{\nanonymous2024attngcg,\ntitle={Attn{GCG}: Enhancing Adversarial Attacks on Language Models with Attention Manipulation},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=J8m0DEjxEO}\n}", "github": "", "project": "", "reviewers": "yXuJ;48Wr;CeR5;Hgf8", "site": "https://openreview.net/forum?id=J8m0DEjxEO", "pdf_size": 704816, "rating": "4;5;5;5", "confidence": "3;4;4;3", "soundness": "3;3;2;2", "novelty": "2;2;2;2", "presentation": "3;3;3;3", "wc_summary": "79;52;54;39", "wc_strengths": "39;150;36;9", "wc_weaknesses": "294;574;147;119", "wc_questions": "13;122;40;2", "wc_limitations": "7;9;9;1", "wc_review": "432;907;286;170", "wc_reply_reviewers": "173;73;89;57", "wc_reply_authors": "305;131;131;250", "reply_reviewers": "1;1;1;1", "reply_authors": "4;3;4;4", "rating_avg": [ 4.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 56.0, 14.474114826130128 ], "wc_strengths_avg": [ 58.5, 54.1040663906143 ], "wc_weaknesses_avg": [ 283.5, 180.4113355640382 ], "wc_questions_avg": [ 44.25, 46.970070257558696 ], "wc_limitations_avg": [ 6.5, 3.278719262151 ], "wc_review_avg": [ 448.75, 280.38489171137593 ], "wc_reply_reviewers_avg": [ 98.0, 44.75488800120049 ], "wc_reply_authors_avg": [ 204.25, 75.78711961804592 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.75, 0.4330127018922193 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:tuCSWDDAos8J:scholar.google.com/&scioq=AttnGCG:+Enhancing+Adversarial+Attacks+on+Language+Models+with+Attention+Manipulation&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;4;5", "aff_unique_norm": "University of Chinese Academy of Sciences;Johns Hopkins University;University of Edinburgh;Peking University;University of California, Santa Cruz;Zhejiang University", "aff_unique_dep": ";;;;;", "aff_unique_url": "http://www.ucas.ac.cn;https://www.jhu.edu;https://www.ed.ac.uk;http://www.pku.edu.cn;https://www.ucsc.edu;https://www.zju.edu.cn", "aff_unique_abbr": "UCAS;JHU;Edinburgh;Peking U;UCSC;ZJU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Santa Cruz", "aff_country_unique_index": "0;1;2;0;1;0", "aff_country_unique": "China;United States;United Kingdom" }, { "title": "On the Stability and Generalization of Meta-Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95734", "id": "J8rOw29df2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=J8rOw29df2", "openreview": "https://openreview.net/forum?id=J8rOw29df2", "poster": "/media/PosterPDFs/NeurIPS%202024/95734.png?t=1733444337.4791706", "project": "", "author_site": "Yunjuan Wang, Raman Arora", "tldr": "", "abstract": "We focus on developing a theoretical understanding of meta-learning. Given multiple tasks drawn i.i.d. from some (unknown) task distribution, the goal is to find a good pre-trained model that can be adapted to a new, previously unseen, task with little computational and statistical overhead. We introduce a novel notion of stability for meta-learning algorithms, namely *uniform meta-stability*. We instantiate two uniformly meta-stable learning algorithms based on regularized empirical risk minimization and gradient descent and give explicit generalization bounds for convex learning problems with smooth losses and for weakly convex learning problems with non-smooth losses. Finally, we extend our results to stochastic and adversarially robust variants of our meta-learning algorithm.", "keywords": "meta learning;stability analysis;generalization guarantees", "primary_area": "optimization", "supplementary_material": "/attachment/ebe9bc4d0632e9a0cb70394e8feea6312e841439.zip", "author": "Yunjuan Wang;Raman Arora", "authorids": "~Yunjuan_Wang1;~Raman_Arora1", "gender": "F;M", "homepage": "https://yunjuanwang.github.io/;http://www.cs.jhu.edu/~raman/Home.html", "dblp": "31/560;", "google_scholar": "t_VSEEwAAAAJ;Spe0xdkAAAAJ", "orcid": ";", "linkedin": "yunjuan-wang-12ab85169/;", "or_profile": "~Yunjuan_Wang1;~Raman_Arora1", "aff": "Johns Hopkins University;Johns Hopkins University", "aff_domain": "jhu.edu;jhu.edu", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nwang2024on,\ntitle={On the Stability and Generalization of Meta-Learning},\nauthor={Yunjuan Wang and Raman Arora},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=J8rOw29df2}\n}", "github": "", "reviewers": "zAce;VSfZ;SWCq;S84Y", "pdf_size": 676187, "rating": "4;5;6;6", "confidence": "4;3;3;3", "soundness": "2;3;3;3", "novelty": "3;2;3;2", "presentation": "3;3;3;3", "wc_summary": "122;50;66;128", "wc_strengths": "72;47;103;33", "wc_weaknesses": "229;287;159;85", "wc_questions": "29;2;1;131", "wc_limitations": "5;16;36;12", "wc_review": "457;402;365;389", "wc_reply_reviewers": "103;96;0;95", "wc_reply_authors": "389;415;46;299", "reply_reviewers": "1;2;0;1", "reply_authors": "3;2;2;3", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 91.5, 34.04041715373065 ], "wc_strengths_avg": [ 63.75, 26.621185172715357 ], "wc_weaknesses_avg": [ 190.0, 75.69015788066504 ], "wc_questions_avg": [ 40.75, 53.30279073369424 ], "wc_limitations_avg": [ 17.25, 11.519006033508273 ], "wc_review_avg": [ 403.25, 33.751851801049376 ], "wc_reply_reviewers_avg": [ 73.5, 42.547032799009614 ], "wc_reply_authors_avg": [ 287.25, 145.7847299959773 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:7UHGB9Tt82kJ:scholar.google.com/&scioq=On+the+Stability+and+Generalization+of+Meta-Learning&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "jhu.edu;jhu.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Johns Hopkins University", "aff_unique_dep": "", "aff_unique_url": "https://www.jhu.edu", "aff_unique_abbr": "JHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "RAGChecker: A Fine-grained Framework for Diagnosing Retrieval-Augmented Generation", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97761", "id": "J9oefdGUuM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=J9oefdGUuM", "openreview": "https://openreview.net/forum?id=J9oefdGUuM", "poster": "/media/PosterPDFs/NeurIPS%202024/97761.png?t=1733167482.7401803", "project": "", "author_site": "Dongyu Ru, Lin Qiu, Xiangkun Hu, Tianhang Zhang, Peng Shi, Shuaichen Chang, Cheng Jiayang, Cunxiang Wang, Shichao Sun, Huanyu Li, Zizhao Zhang, Binjie Wang, Jiarong Jiang, Tong He, Zhiguo Wang, Pengfei Liu, Yue Zhang, Zheng Zhang", "tldr": "", "abstract": "Despite Retrieval-Augmented Generation (RAG) has shown promising capability in leveraging external knowledge, a comprehensive evaluation of RAG systems is still challenging due to the modular nature of RAG, evaluation of long-form responses and reliability of measurements. In this paper, we propose a fine-grained evaluation framework, RAGChecker, that incorporates a suite of diagnostic metrics for both the retrieval and generation modules. Meta evaluation verifies that RAGChecker has significantly better correlations with human judgments than other evaluation metrics. Using RAGChecker, we evaluate 8 RAG systems and conduct an in-depth analysis of their performance, revealing insightful patterns and trade-offs in the design choices of RAG architectures. The metrics of RAGChecker can guide researchers and practitioners in developing more effective RAG systems.", "keywords": "Retrieval Augmented Generation;RAG;Benchmark;RAG Evaluation", "primary_area": "", "supplementary_material": "/attachment/a2698cde871bc9fa0e5b2798c3bbe074457584ae.zip", "author": "Dongyu Ru;Lin Qiu;Xiangkun Hu;Tianhang Zhang;Peng Shi;Shuaichen Chang;Cheng Jiayang;Cunxiang Wang;Shichao Sun;Huanyu Li;Zizhao Zhang;Binjie Wang;Jiarong Jiang;Tong He;Zhiguo Wang;Pengfei Liu;Yue Zhang;Zheng Zhang", "authorids": "~Dongyu_Ru1;~Lin_Qiu2;~Xiangkun_Hu1;~Tianhang_Zhang1;~Peng_Shi2;~Shuaichen_Chang1;~Cheng_Jiayang1;~Cunxiang_Wang1;~Shichao_Sun1;~Huanyu_Li2;~Zizhao_Zhang4;~Binjie_Wang1;~Jiarong_Jiang1;~Tong_He5;~Zhiguo_Wang4;~Pengfei_Liu1;~Yue_Zhang7;~Zheng_Zhang1", "gender": ";M;M;M;M;M;;Not Specified;M;;;M;F;M;;M;M;M", "homepage": ";;;;;https://shuaichenchang.github.io/;;https://wangcunxiang.github.io/;https://shichaosun.github.io;;;;;https://hetong007.github.io/;;http://pfliu.com/;http://frcchang.github.io;https://shanghai.nyu.edu/academics/faculty/directory/zheng-zhang", "dblp": ";;224/5990;173/9526;;230/4596;;213/1862.html;;;;;13/11107;02/1554-2;;34/3381-3;47/722-4;", "google_scholar": ";U4GJuPIAAAAJ;_-0MpawAAAAJ;xYyNYs8AAAAJ;XTbDLrkAAAAJ;https://scholar.google.com/citations?hl=en;;https://scholar.google.com.sg/citations?hl=en;https://scholar.google.com/citations?hl=en;;;;;hV5D8GYAAAAJ;;oIz_CYEAAAAJ;;https://scholar.google.com.hk/citations?user=k0KiE4wAAAAJ", "orcid": ";;;;;;;;;;;;;;;;0000-0002-5214-2268;", "linkedin": ";;;;;;;;;;;binjie-wang-91514b25a/;;;;;;", "or_profile": "~Dongyu_Ru1;~Lin_Qiu2;~Xiangkun_Hu1;~Tianhang_Zhang1;~Peng_Shi2;~Shuaichen_Chang1;~Cheng_Jiayang1;~Cunxiang_Wang1;~Shichao_Sun1;~Huanyu_Li2;~Zizhao_Zhang4;~Binjie_Wang1;~Jiarong_Jiang1;~Tong_He5;~Zhiguo_Wang4;~Pengfei_Liu1;~Yue_Zhang7;~Zheng_Zhang1", "aff": ";Amazon;Amazon;Shanghai Jiaotong University;Amazon AWS;Amazon;;Westlake University;The Hong Kong Polytechnic University;;;Fudan University;Amazon;Amazon;;Shanghai Jiaotong University;Westlake University;Amazon", "aff_domain": ";amazon.com;amazon.com;sjtu.edu.cn;amazon.com;amazon.com;;westlake.edu.cn;polyu.edu.hk;;;fudan.edu.cn;amazon.com;amazon.com;;sjtu.edu;westlake.edu.cn;amazon.com", "position": ";Researcher;Applied Scientist;MS student;Researcher;Researcher;;PhD student;PhD student;;;Undergrad student;Researcher;Researcher;;Associate Professor;Full Professor;Senior Principal Scientist", "bibtex": "@inproceedings{\nru2024ragchecker,\ntitle={{RAGC}hecker: A Fine-grained Framework for Diagnosing Retrieval-Augmented Generation},\nauthor={Dongyu Ru and Lin Qiu and Xiangkun Hu and Tianhang Zhang and Peng Shi and Shuaichen Chang and Cheng Jiayang and Cunxiang Wang and Shichao Sun and Huanyu Li and Zizhao Zhang and Binjie Wang and Jiarong Jiang and Tong He and Zhiguo Wang and Pengfei Liu and Yue Zhang and Zheng Zhang},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=J9oefdGUuM}\n}", "github": "", "reviewers": "EEuk;xC1v;dzR1;Kzxa", "pdf_size": 2568694, "rating": "6;6;7;8", "confidence": "3;4;4;4", "wc_summary_and_contributions": "84;67;197;64", "wc_strengths": "73;66;44;62", "wc_improvement": "62;61;111;39", "wc_limitations": "31;31;111;27", "wc_correctness": "39;7;1;1", "wc_clarity": "15;10;1;1", "wc_relation_to_prior_work": "30;16;1;1", "wc_documentation": "26;4;1;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "361;263;468;197", "wc_reply_reviewers": "5;121;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;2;0;0", "reply_authors": "1;4;2;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 103.0, 54.80419691957907 ], "wc_strengths_avg": [ 61.25, 10.709224995301948 ], "wc_improvement_avg": [ 68.25, 26.337947907914163 ], "wc_limitations_avg": [ 50.0, 35.25620512760839 ], "wc_correctness_avg": [ 12.0, 15.7797338380595 ], "wc_clarity_avg": [ 6.75, 6.015604707757983 ], "wc_relation_to_prior_work_avg": [ 12.0, 12.062338081814818 ], "wc_documentation_avg": [ 8.0, 10.464224768228174 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 322.25, 102.39964599548183 ], "wc_reply_reviewers_avg": [ 31.5, 51.713151131989626 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 18, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8922889958797290394&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": ";amazon.com;amazon.com;sjtu.edu.cn;amazon.com;amazon.com;;westlake.edu.cn;polyu.edu.hk;;;fudan.edu.cn;amazon.com;amazon.com;;sjtu.edu;westlake.edu.cn;amazon.com", "author_num": 18, "aff_unique_index": "0;0;1;0;0;2;3;4;0;0;1;2;0", "aff_unique_norm": "Amazon;Shanghai Jiao Tong University;Westlake University;Hong Kong Polytechnic University;Fudan University", "aff_unique_dep": "Amazon.com, Inc.;;;;", "aff_unique_url": "https://www.amazon.com;https://www.sjtu.edu.cn;https://www.westlake.edu.cn;https://www.polyu.edu.hk;https://www.fudan.edu.cn", "aff_unique_abbr": "Amazon;SJTU;WU;PolyU;Fudan", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;1;0;0;1;1;1;0;0;1;1;0", "aff_country_unique": "United States;China" }, { "title": "SpecExec: Massively Parallel Speculative Decoding For Interactive LLM Inference on Consumer Devices", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95733", "id": "JAhNsZ9dvG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=JAhNsZ9dvG", "openreview": "https://openreview.net/forum?id=JAhNsZ9dvG", "poster": "/media/PosterPDFs/NeurIPS%202024/95733.png?t=1733303289.5023873", "project": "", "author_site": "Ruslan Svirschevski, Avner May, Zhuoming Chen, Beidi Chen, Zhihao Jia, Max Ryabinin", "tldr": "", "abstract": "As large language models gain widespread adoption, running them efficiently becomes a crucial task. Recent works on LLM inference use speculative decoding to achieve extreme speedups. However, most of these works implicitly design their algorithms for high-end datacenter hardware. In this work, we ask the opposite question: how fast can we run LLMs on consumer machines? Consumer GPUs can no longer fit the largest available models and must offload them to RAM or SSD. With parameter offloading, hundreds or thousands of tokens can be processed in batches within the same time as just one token, making it a natural fit for speculative decoding. We propose SpecExec (Speculative Execution), a simple parallel decoding method that can generate up to 20 tokens per target model iteration for popular LLM families. SpecExec takes the most probable continuations from the draft model to build a \"cache\" tree for the target model, which then gets validated in a single pass. Using SpecExec, we demonstrate inference of 50B+ parameter LLMs on consumer GPUs with RAM offloading at 4--6 tokens per second with 4-bit quantization or 2--3 tokens per second with 16-bit weights. Our code is available at https://github.com/yandex-research/specexec .", "keywords": "speculative decoding;offloading;large language models;inference", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Ruslan Svirschevski;Avner May;Zhuoming Chen;Beidi Chen;Zhihao Jia;Max Ryabinin", "authorids": "~Ruslan_Svirschevski1;~Avner_May1;~Zhuoming_Chen1;~Beidi_Chen1;~Zhihao_Jia2;~Max_Ryabinin1", "gender": ";M;M;F;M;Not Specified", "homepage": ";https://avnermay.github.io/index.html;;https://www.andrew.cmu.edu/user/beidic/;https://www.cs.cmu.edu/~zhihaoj2/;https://mryab.github.io/", "dblp": ";146/7842;226/5729;192/1339;;276/0192", "google_scholar": ";Gx5baHUAAAAJ;4Bb5KRYAAAAJ;;0IWLFR4AAAAJ;930PERsAAAAJ", "orcid": ";;;;;", "linkedin": ";avnermay/;zhuoming-chen-325075234/;;;", "or_profile": "~Ruslan_Svirschevski1;~Avner_May1;~Zhuoming_Chen1;~Beidi_Chen1;~Zhihao_Jia2;~Max_Ryabinin1", "aff": ";Together.ai;Carnegie Mellon University;Meta Facebook;Carnegie Mellon University;Together AI", "aff_domain": ";together.ai;cmu.edu;fb.com;cs.cmu.edu;together.ai", "position": ";Researcher;PhD student;Researcher;Assistant Professor;Researcher", "bibtex": "@inproceedings{\nsvirschevski2024specexec,\ntitle={SpecExec: Massively Parallel Speculative Decoding For Interactive {LLM} Inference on Consumer Devices},\nauthor={Ruslan Svirschevski and Avner May and Zhuoming Chen and Beidi Chen and Zhihao Jia and Max Ryabinin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=JAhNsZ9dvG}\n}", "github": "", "reviewers": "PkkW;BH6v;ph7U;fKWn", "pdf_size": 802421, "rating": "5;5;6;7", "confidence": "3;3;4;3", "soundness": "3;3;3;3", "novelty": "3;3;3;4", "presentation": "3;1;3;3", "wc_summary": "104;130;79;91", "wc_strengths": "76;15;88;79", "wc_weaknesses": "68;111;85;31", "wc_questions": "108;17;14;89", "wc_limitations": "15;2;17;11", "wc_review": "371;275;283;301", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 101.0, 18.934096228761486 ], "wc_strengths_avg": [ 64.5, 28.91798748184251 ], "wc_weaknesses_avg": [ 73.75, 29.046299247924853 ], "wc_questions_avg": [ 57.0, 42.053537306628556 ], "wc_limitations_avg": [ 11.25, 5.7608593109014565 ], "wc_review_avg": [ 307.5, 37.851684242580276 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=968895456765233137&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 5, "email": ";together.ai;cmu.edu;fb.com;cs.cmu.edu;together.ai", "author_num": 6, "aff_unique_index": "0;1;2;1;3", "aff_unique_norm": "Together.ai;Carnegie Mellon University;Meta;Together AI", "aff_unique_dep": ";;Meta Platforms, Inc.;", "aff_unique_url": "https://www.together.ai;https://www.cmu.edu;https://meta.com;https://www.together.ai", "aff_unique_abbr": "Together.ai;CMU;Meta;Together AI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "HumanSplat: Generalizable Single-Image Human Gaussian Splatting with Structure Priors", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95732", "id": "JBAUg7o8Yv", "proceeding": "", "pdf": "https://openreview.net/pdf?id=JBAUg7o8Yv", "openreview": "https://openreview.net/forum?id=JBAUg7o8Yv", "poster": "/media/PosterPDFs/NeurIPS%202024/95732.png?t=1733753802.1509643", "project": "", "author_site": "Panwang Pan, Zhuo Su, Chenguo Lin, Zhen Fan, Yongjie Zhang, Zeming Li, Tingting Shen, Yadong Mu, Yebin Liu", "tldr": "", "abstract": "Despite recent advancements in high-fidelity human reconstruction techniques, the requirements for densely captured images or time-consuming per-instance optimization significantly hinder their applications in broader scenarios. To tackle these issues, we present **HumanSplat**, which predicts the 3D Gaussian Splatting properties of any human from a single input image in a generalizable manner.\nSpecifically, HumanSplat comprises a 2D multi-view diffusion model and a latent reconstruction Transformer with human structure priors that adeptly integrate geometric priors and semantic features within a unified framework. A hierarchical loss that incorporates human semantic information is devised to achieve high-fidelity texture modeling and impose stronger constraints on the estimated multiple views. Comprehensive experiments on standard benchmarks and in-the-wild images demonstrate that HumanSplat surpasses existing state-of-the-art methods in achieving photorealistic novel-view synthesis. Project page: https://humansplat.github.io.", "keywords": "Single Image Human Reconstruction;Gaussian Splatting;Human Prior;Diffusion;Latent Reconstruction Transformer", "primary_area": "machine_vision", "supplementary_material": "/attachment/71bbf3b30128fb765d70bd825ab608136cb64112.zip", "author": "Panwang Pan;Zhuo Su;Chenguo Lin;Zhen Fan;Yongjie zhang;Zeming Li;Tingting Shen;Yadong MU;Yebin Liu", "authorids": "~Panwang_Pan1;~Zhuo_Su3;~Chenguo_Lin1;~Zhen_Fan2;~Yongjie_zhang2;~Zeming_Li2;~Tingting_Shen2;~Yadong_MU1;~Yebin_Liu1", "gender": "M;M;M;;M;;F;M;M", "homepage": "https://paulpanwang.github.io/;https://suzhuo.github.io/;https://chenguolin.github.io;;;;https://github.com/BreakTT;http://www.muyadong.com/;http://liuyebin.com", "dblp": ";274/0946;286/8465;;56/8048.html;;;55/1817;84/1411.html", "google_scholar": "https://scholar.google.com/citations?hl=en;iaqDkqMAAAAJ;jZ7MDcMAAAAJ;;https://scholar.google.com/citations?hl=en;;https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=Fqqx4HsAAAAJ;https://scholar.google.com/scholar?hl=zh-CN", "orcid": "0000-0001-8631-012X;0000-0002-7728-0835;;;;;;;", "linkedin": ";;;;;;;;", "or_profile": "~Panwang_Pan1;~Zhuo_Su3;~Chenguo_Lin1;~Zhen_Fan2;~Yongjie_zhang2;~Zeming_Li2;~Tingting_Shen2;~Yadong_MU1;~Yebin_Liu1", "aff": "ByteDance;ByteDance;Peking University;;ByteDance Inc.;;Xiamen University;Peking University;Tsinghua University", "aff_domain": "bytedance.com;bytedance.com;pku.edu.cn;;bytedance.com;;xmu.edu.cn;pku.edu.cn;tsinghua.edu.cn", "position": "Researcher;Researcher;PhD student;;Researcher;;MS student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\npan2024humansplat,\ntitle={HumanSplat: Generalizable Single-Image Human Gaussian Splatting with Structure Priors},\nauthor={Panwang Pan and Zhuo Su and Chenguo Lin and Zhen Fan and Yongjie zhang and Zeming Li and Tingting Shen and Yadong MU and Yebin Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=JBAUg7o8Yv}\n}", "github": "", "reviewers": "eC4L;6Egy;zEoa;2EjT", "pdf_size": 26714392, "rating": "5;5;6;6", "confidence": "5;4;4;4", "soundness": "3;2;3;3", "novelty": "3;2;2;3", "presentation": "2;3;3;3", "wc_summary": "96;25;35;82", "wc_strengths": "53;29;87;91", "wc_weaknesses": "246;301;49;137", "wc_questions": "5;5;21;27", "wc_limitations": "5;7;3;15", "wc_review": "405;367;195;352", "wc_reply_reviewers": "12;137;0;20", "wc_reply_authors": "157;212;85;124", "reply_reviewers": "1;1;0;1", "reply_authors": "3;2;2;3", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 59.5, 30.120590963658067 ], "wc_strengths_avg": [ 65.0, 25.495097567963924 ], "wc_weaknesses_avg": [ 183.25, 97.42272578818559 ], "wc_questions_avg": [ 14.5, 9.733961166965893 ], "wc_limitations_avg": [ 7.5, 4.55521678957215 ], "wc_review_avg": [ 329.75, 80.16038610186456 ], "wc_reply_reviewers_avg": [ 42.25, 55.165093129623195 ], "wc_reply_authors_avg": [ 144.5, 46.56447143477525 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7669345239168826421&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "bytedance.com;bytedance.com;pku.edu.cn;;bytedance.com;;xmu.edu.cn;pku.edu.cn;tsinghua.edu.cn", "author_num": 9, "aff_unique_index": "0;0;1;0;2;1;3", "aff_unique_norm": "ByteDance;Peking University;Xiamen University;Tsinghua University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.bytedance.com;http://www.pku.edu.cn;https://www.xmu.edu.cn;https://www.tsinghua.edu.cn", "aff_unique_abbr": "ByteDance;Peking U;XMU;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Poseidon: Efficient Foundation Models for PDEs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95731", "id": "JC1VKK3UXk", "proceeding": "", "pdf": "https://openreview.net/pdf?id=JC1VKK3UXk", "openreview": "https://openreview.net/forum?id=JC1VKK3UXk", "poster": "/media/PosterPDFs/NeurIPS%202024/95731.png?t=1732646812.9040663", "project": "", "author_site": "Maximilian Herde, Bogdan Raonic, Tobias Rohner, Roger K\u00e4ppeli, Roberto Molinaro, Emmanuel de B\u00e9zenac, Siddhartha Mishra", "tldr": "", "abstract": "We introduce Poseidon, a foundation model for learning the solution operators of PDEs. It is based on a multiscale operator transformer, with time-conditioned layer norms that enable continuous-in-time evaluations. A novel training strategy leveraging the semi-group property of time-dependent PDEs to allow for significant scaling-up of the training data is also proposed. Poseidon is pretrained on a diverse, large scale dataset for the governing equations of fluid dynamics. It is then evaluated on a suite of 15 challenging downstream tasks that include a wide variety of PDE types and operators. We show that Poseidon exhibits excellent performance across the board by outperforming baselines significantly, both in terms of sample efficiency and accuracy. Poseidon also generalizes very well to new physics that is not seen during pretraining. Moreover, Poseidon scales with respect to model and data size, both for pretraining and for downstream tasks. Taken together, our results showcase the surprising ability of Poseidon to learn effective representations from a very small set of PDEs during pretraining in order to generalize well to unseen and unrelated PDEs downstream, demonstrating its potential as an effective, general purpose PDE foundation model. Finally, the Poseidon model as well as underlying pretraining and downstream datasets are open sourced, with code being available at https://github.com/camlab-ethz/poseidon and pretrained models and datasets at https://huggingface.co/camlab-ethz.", "keywords": "PDEs;operators;foundation models;transformers;sample efficiency", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "/attachment/8fc706a9a78a4a9edf76596a97ae2f3e5c88e5aa.zip", "author": "Maximilian Herde;Bogdan Raonic;Tobias Rohner;Roger K\u00e4ppeli;Roberto Molinaro;Emmanuel de Bezenac;Siddhartha Mishra", "authorids": "~Maximilian_Herde1;~Bogdan_Raonic1;~Tobias_Rohner1;~Roger_K\u00e4ppeli1;~Roberto_Molinaro1;~Emmanuel_de_Bezenac2;~Siddhartha_Mishra1", "gender": ";M;M;;M;M;M", "homepage": ";https://www.linkedin.com/in/bogdan-raoni%C4%87-210066167;;https://people.math.ethz.ch/~karoger/;;;http://www.sam.math.ethz.ch/", "dblp": ";339/6810;339/6659;;249/2799;;07/2856.html", "google_scholar": ";DN9CCpkAAAAJ;;https://scholar.google.com/citations?hl=en;2ohT8yYAAAAJ;https://scholar.google.fr/citations?user=KvZw5gYAAAAJ;FmEqyNcAAAAJ", "orcid": ";;;0009-0002-5330-8618;;;", "linkedin": ";bogdan-raoni%C4%87-210066167;tobias-rohner-502a27214/;;;;", "or_profile": "~Maximilian_Herde1;~Bogdan_Raonic1;~Tobias_Rohner1;~Roger_K\u00e4ppeli1;~Roberto_Molinaro1;~Emmanuel_de_Bezenac2;~Siddhartha_Mishra1", "aff": ";ETHZ - ETH Zurich;ETHZ - ETH Zurich;ETHZ - ETH Zurich;ETHZ - ETH Zurich;ETHZ - ETH Zurich;Swiss Federal Institute of Technology", "aff_domain": ";ethz.ch;ethz.ch;ethz.ch;ethz.ch;ethz.ch;ethz.ch", "position": ";PhD student;PhD student;Researcher;PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\nherde2024poseidon,\ntitle={Poseidon: Efficient Foundation Models for {PDE}s},\nauthor={Maximilian Herde and Bogdan Raonic and Tobias Rohner and Roger K{\\\"a}ppeli and Roberto Molinaro and Emmanuel de Bezenac and Siddhartha Mishra},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=JC1VKK3UXk}\n}", "github": "", "reviewers": "KcRd;oEdC;UfXd", "pdf_size": 11239000, "rating": "6;6;7", "confidence": "4;4;5", "soundness": "3;3;3", "novelty": "3;2;3", "presentation": "2;3;3", "wc_summary": "47;19;117", "wc_strengths": "57;31;141", "wc_weaknesses": "206;138;48", "wc_questions": "391;2;19", "wc_limitations": "38;2;6", "wc_review": "739;192;331", "wc_reply_reviewers": "302;22;106", "wc_reply_authors": "1553;17;762", "reply_reviewers": "2;1;1", "reply_authors": "4;2;2", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 61.0, 41.21488404286329 ], "wc_strengths_avg": [ 76.33333333333333, 46.94204464609052 ], "wc_weaknesses_avg": [ 130.66666666666666, 64.71132478597201 ], "wc_questions_avg": [ 137.33333333333334, 179.50363660816333 ], "wc_limitations_avg": [ 15.333333333333334, 16.110727964792762 ], "wc_review_avg": [ 420.6666666666667, 232.1383686989771 ], "wc_reply_reviewers_avg": [ 143.33333333333334, 117.318180839781 ], "wc_reply_authors_avg": [ 777.3333333333334, 627.1631012388688 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 0.9428090415820634 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.9999999999999998, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6468752265684595111&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 5, "email": ";ethz.ch;ethz.ch;ethz.ch;ethz.ch;ethz.ch;ethz.ch", "author_num": 7, "aff_unique_index": "0;0;0;0;0;1", "aff_unique_norm": "ETH Zurich;Swiss Federal Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.ethz.ch;https://www.ethz.ch", "aff_unique_abbr": "ETHZ;ETH Zurich", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "Switzerland" }, { "title": "SimGen: Simulator-conditioned Driving Scene Generation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95730", "id": "JCyBN5syv3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=JCyBN5syv3", "openreview": "https://openreview.net/forum?id=JCyBN5syv3", "poster": "", "project": "", "author_site": "Yunsong Zhou, Michael Simon, Zhenghao (Mark) Peng, Sicheng Mo, Hongzi Zhu, Minyi Guo, Bolei Zhou", "tldr": "", "abstract": "Controllable synthetic data generation can substantially lower the annotation cost of training data. Prior works use diffusion models to generate driving images conditioned on the 3D object layout. However, those models are trained on small-scale datasets like nuScenes, which lack appearance and layout diversity. Moreover, overfitting often happens, where the trained models can only generate images based on the layout data from the validation set of the same dataset. In this work, we introduce a simulator-conditioned scene generation framework called SimGen that can learn to generate diverse driving scenes by mixing data from the simulator and the real world. It uses a novel cascade diffusion pipeline to address challenging sim-to-real gaps and multi-condition conflicts. A driving video dataset DIVA is collected to enhance the generative diversity of SimGen, which contains over 147.5 hours of real-world driving videos from 73 locations worldwide and simulated driving data from the MetaDrive simulator. SimGen achieves superior generation quality and diversity while preserving controllability based on the text prompt and the layout pulled from a simulator. We further demonstrate the improvements brought by SimGen for synthetic data augmentation on the BEV detection and segmentation task and showcase its capability in safety-critical data generation.", "keywords": "Autonomous Driving;Generative Models;Simulators", "primary_area": "machine_vision", "supplementary_material": "/attachment/3d85d1adb4a217323a8c1673120fd3e4274a1fd2.zip", "author": "Yunsong Zhou;Michael Simon;Zhenghao Peng;Sicheng Mo;Hongzi Zhu;Minyi Guo;Bolei Zhou", "authorids": "~Yunsong_Zhou1;~Michael_Simon1;~Zhenghao_Peng1;~Sicheng_Mo2;~Hongzi_Zhu1;~Minyi_Guo1;~Bolei_Zhou5", "gender": "M;;M;M;M;M;M", "homepage": ";;https://pengzhenghao.github.io;http://www.cs.sjtu.edu.cn/en/PeopleDetail.aspx?id=130;http://www.cs.sjtu.edu.cn/~guo-my/;https://boleizhou.github.io/;https://sichengmo.github.io/", "dblp": "239/4113;;220/3963;29/5901;;46/8066;319/6786", "google_scholar": "https://scholar.google.com.hk/citations?user=bTsmnwcAAAAJ;;JZ8ws6IAAAAJ;https://scholar.google.com.tw/citations?user=HkaEi3MAAAAJ;https://scholar.google.com.tw/citations?user=8R8FO9IAAAAJ;9D4aG8AAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": "0000-0001-5101-331X;;;;;;", "linkedin": ";michael-simon02/;;;;;", "or_profile": "~Yunsong_Zhou1;~Michael_Simon1;~Zhenghao_Peng1;~Hongzi_Zhu1;~Minyi_Guo1;~Bolei_Zhou5;~SICHENG_MO1", "aff": "Shanghai Jiaotong University;University of California, Los Angeles;University of California, Los Angeles;Shanghai Jiaotong University;Shanghai Jiaotong University;University of California, Los Angeles;University of California, Los Angeles", "aff_domain": "sjtu.edu.cn;ucla.edu;cs.ucla.edu;sjtu.edu.cn;sjtu.edu.cn;ucla.edu;ucla.edu", "position": "PhD student;Undergrad student;PhD student;Full Professor;Full Professor;Assistant Professor;MS student", "bibtex": "@inproceedings{\nzhou2024simgen,\ntitle={SimGen: Simulator-conditioned Driving Scene Generation},\nauthor={Yunsong Zhou and Michael Simon and Zhenghao Peng and Sicheng Mo and Hongzi Zhu and Minyi Guo and Bolei Zhou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=JCyBN5syv3}\n}", "github": "", "reviewers": "xuJP;iVy1;u5Ry;azzq;vm6z", "pdf_size": 39145094, "rating": "5;5;6;8;9", "confidence": "4;4;3;5;5", "soundness": "3;3;3;4;4", "novelty": "3;2;3;4;4", "presentation": "3;3;4;3;4", "wc_summary": "71;47;74;85;36", "wc_strengths": "35;31;72;142;226", "wc_weaknesses": "16;290;236;282;93", "wc_questions": "59;136;70;6;190", "wc_limitations": "14;39;12;7;13", "wc_review": "195;543;464;522;558", "wc_reply_reviewers": "19;0;300;23;30", "wc_reply_authors": "0;0;466;0;0", "reply_reviewers": "1;0;2;1;1", "reply_authors": "1;1;3;1;1", "rating_avg": [ 6.6, 1.624807680927192 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 3.2, 0.7483314773547882 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 62.6, 18.18350901228913 ], "wc_strengths_avg": [ 101.2, 74.0308043992499 ], "wc_weaknesses_avg": [ 183.4, 109.58759053834517 ], "wc_questions_avg": [ 92.2, 64.04498419080139 ], "wc_limitations_avg": [ 17.0, 11.260550608207398 ], "wc_review_avg": [ 456.4, 134.54605159572685 ], "wc_reply_reviewers_avg": [ 74.4, 113.23709639513017 ], "wc_reply_authors_avg": [ 93.2, 186.4 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.4, 0.8 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.723746864455746, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2025210704474157128&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "sjtu.edu.cn;ucla.edu;cs.ucla.edu;sjtu.edu.cn;sjtu.edu.cn;ucla.edu;ucla.edu", "author_num": 7, "aff_unique_index": "0;1;1;0;0;1;1", "aff_unique_norm": "Shanghai Jiao Tong University;University of California, Los Angeles", "aff_unique_dep": ";", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.ucla.edu", "aff_unique_abbr": "SJTU;UCLA", "aff_campus_unique_index": "1;1;1;1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;1;1;0;0;1;1", "aff_country_unique": "China;United States" }, { "title": "Large language model validity via enhanced conformal prediction methods", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95729", "id": "JD3NYpeQ3R", "proceeding": "", "pdf": "https://openreview.net/pdf?id=JD3NYpeQ3R", "openreview": "https://openreview.net/forum?id=JD3NYpeQ3R", "poster": "/media/PosterPDFs/NeurIPS%202024/95729.png?t=1733434964.1578863", "project": "", "author_site": "John Cherian, Isaac Gibbs, Emmanuel Candes", "tldr": "", "abstract": "We develop new conformal inference methods for obtaining validity guarantees on the output of large language models (LLMs). Prior work in conformal language modeling identifies a subset of the text that satisfies a high-probability guarantee of correctness. These methods work by filtering claims from the LLM's original response if a scoring function evaluated on the claim fails to exceed a threshold calibrated via split conformal prediction. Existing methods in this area suffer from two deficiencies. First, the guarantee stated is not conditionally valid. The trustworthiness of the filtering step may vary based on the topic of the response. Second, because the scoring function is imperfect, the filtering step can remove many valuable and accurate claims. We address both of these challenges via two new conformal methods. First, we generalize the conditional conformal procedure of Gibbs et al. (2023) in order to adaptively issue weaker guarantees when they are required to preserve the utility of the output. Second, we show how to systematically improve the quality of the scoring function via a novel algorithm for differentiating through the conditional conformal procedure. We demonstrate the efficacy of our approach on biography and medical question-answering datasets.", "keywords": "conformal inference;large language models;conditional guarantees;calibration;boosting", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/f8a5c2039e383a08d7acf7b958fcea32d5046870.zip", "author": "John Cherian;Isaac Gibbs;Emmanuel Candes", "authorids": "~John_Cherian1;~Isaac_Gibbs1;~Emmanuel_Candes1", "gender": ";M;", "homepage": ";https://statistics.stanford.edu/people/phds;http://statweb.stanford.edu/~candes/", "dblp": ";;", "google_scholar": ";;nRQi4O8AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~John_Cherian1;~Isaac_Gibbs1;~Emmanuel_Candes1", "aff": ";Stanford University;Stanford University", "aff_domain": ";stanford.edu;stanford.edu", "position": ";PhD student;Full Professor", "bibtex": "@inproceedings{\ncherian2024large,\ntitle={Large language model validity via enhanced conformal prediction methods},\nauthor={John Cherian and Isaac Gibbs and Emmanuel Candes},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=JD3NYpeQ3R}\n}", "github": "", "reviewers": "tBPp;CDbE;R2Vh;9DaN", "pdf_size": 704943, "rating": "5;6;7;7", "confidence": "5;4;3;4", "soundness": "3;2;3;3", "novelty": "2;2;4;3", "presentation": "3;3;3;3", "wc_summary": "84;48;81;201", "wc_strengths": "132;47;81;235", "wc_weaknesses": "380;297;55;234", "wc_questions": "18;11;28;82", "wc_limitations": "1;4;12;8", "wc_review": "615;407;257;760", "wc_reply_reviewers": "916;208;10;40", "wc_reply_authors": "1613;241;0;0", "reply_reviewers": "2;2;1;1", "reply_authors": "4;2;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 103.5, 58.03662636645931 ], "wc_strengths_avg": [ 123.75, 70.99779926166725 ], "wc_weaknesses_avg": [ 241.5, 119.4790776663429 ], "wc_questions_avg": [ 34.75, 27.94078560098123 ], "wc_limitations_avg": [ 6.25, 4.14578098794425 ], "wc_review_avg": [ 509.75, 192.44658349786312 ], "wc_reply_reviewers_avg": [ 293.5, 367.23664032882124 ], "wc_reply_authors_avg": [ 463.5, 670.9174688439704 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8528028654224417, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6059612613355974981&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": ";stanford.edu;stanford.edu", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Non-convolutional graph neural networks.", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95728", "id": "JDAQwysFOc", "proceeding": "", "pdf": "https://openreview.net/pdf?id=JDAQwysFOc", "openreview": "https://openreview.net/forum?id=JDAQwysFOc", "poster": "", "project": "", "author_site": "Yuanqing Wang, Kyunghyun Cho", "tldr": "", "abstract": "Rethink convolution-based graph neural networks (GNN)---they characteristically suffer from limited expressiveness, over-smoothing, and over-squashing, and require specialized sparse kernels for efficient computation.\nHere, we design a simple graph learning module entirely free of convolution operators, coined _random walk with unifying memory_ (RUM) neural network, where an RNN merges the topological and semantic graph features along the random walks terminating at each node.\nRelating the rich literature on RNN behavior and graph topology, we theoretically show and experimentally verify that RUM attenuates the aforementioned symptoms and is more expressive than the Weisfeiler-Lehman (WL) isomorphism test.\nOn a variety of node- and graph-level classification and regression tasks, RUM not only achieves competitive performance, but is also robust, memory-efficient, scalable, and faster than the simplest convolutional GNNs.", "keywords": "Graph neural networks", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Yuanqing Wang;Kyunghyun Cho", "authorids": "~Yuanqing_Wang1;~Kyunghyun_Cho1", "gender": "M;M", "homepage": "https://wangyq.net;http://kyunghyuncho.me", "dblp": "83/7566;41/9736", "google_scholar": "Njp5EY4AAAAJ;https://scholar.google.fi/citations?user=0RAmmIAAAAAJ", "orcid": ";", "linkedin": "yuanqing-wang/;", "or_profile": "~Yuanqing_Wang1;~Kyunghyun_Cho1", "aff": "New York University;Genentech", "aff_domain": "nyu.edu;gene.com", "position": "Postdoc;Senior Director of Frontier Research", "bibtex": "@inproceedings{\nwang2024nonconvolutional,\ntitle={Non-convolutional graph neural networks.},\nauthor={Yuanqing Wang and Kyunghyun Cho},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=JDAQwysFOc}\n}", "github": "", "reviewers": "jBUd;xhjS;wXB7;P1Jd", "pdf_size": 743416, "rating": "6;6;7;7", "confidence": "2;3;4;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "2;3;3;3", "wc_summary": "53;78;38;120", "wc_strengths": "22;95;46;160", "wc_weaknesses": "103;73;11;99", "wc_questions": "91;127;11;134", "wc_limitations": "6;11;1;17", "wc_review": "275;384;107;530", "wc_reply_reviewers": "18;28;15;18", "wc_reply_authors": "49;170;43;148", "reply_reviewers": "1;1;1;1", "reply_authors": "3;4;2;4", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 72.25, 31.051368729896595 ], "wc_strengths_avg": [ 80.75, 52.77961254120761 ], "wc_weaknesses_avg": [ 71.5, 36.77974986320598 ], "wc_questions_avg": [ 90.75, 48.84861819949465 ], "wc_limitations_avg": [ 8.75, 5.931905258852336 ], "wc_review_avg": [ 324.0, 154.53640347827434 ], "wc_reply_reviewers_avg": [ 19.75, 4.9180788932265 ], "wc_reply_authors_avg": [ 102.5, 57.07232253903813 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.25, 0.82915619758885 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.9045340337332909, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6445447931372986777&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "nyu.edu;gene.com", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "New York University;Genentech", "aff_unique_dep": ";", "aff_unique_url": "https://www.nyu.edu;https://www.genentech.com", "aff_unique_abbr": "NYU;Genentech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Binary Search with Distributional Predictions", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95727", "id": "JEKXTLjEIq", "proceeding": "", "pdf": "https://openreview.net/pdf?id=JEKXTLjEIq", "openreview": "https://openreview.net/forum?id=JEKXTLjEIq", "poster": "", "project": "", "author_site": "Michael Dinitz, Sungjin Im, Thomas Lavastida, Ben Moseley, Aidin Niaparast, Sergei Vassilvitskii", "tldr": "", "abstract": "Algorithms with (machine-learned) predictions is a powerful framework for combining traditional worst-case algorithms with modern machine learning. However, the vast majority of work in this space assumes that the prediction itself is non-probabilistic, even if it is generated by some stochastic process (such as a machine learning system). This is a poor fit for modern ML, particularly modern neural networks, which naturally generate a *distribution*. We initiate the study of algorithms with *distributional* predictions, where the prediction itself is a distribution. We focus on one of the simplest yet fundamental settings: binary search (or searching a sorted array). \n This setting has one of the simplest algorithms with a point prediction, but what happens if the prediction is a distribution? We show that this is a richer setting: there are simple distributions where using the classical prediction-based algorithm with any single prediction does poorly. \n Motivated by this, as our main result, we give an algorithm with query complexity\n $O(H(p) + \\log \\eta)$, where $H(p)$ is the entropy of the true distribution $p$ and $\\eta$ is the earth mover's distance between $p$ and the predicted distribution $\\hat p$. This also yields the first *distributionally-robust* algorithm for the classical problem of computing an optimal binary search tree given a distribution over target keys.\n We complement this with a lower bound showing that this query complexity is essentially optimal (up to constants), and experiments validating the practical usefulness of our algorithm.", "keywords": "learning-augmented algorithms;algorithms with predictions;distribution predictions", "primary_area": "optimization", "supplementary_material": "/attachment/0b87fbd007151313a273011e99febc61b8bb0973.zip", "author": "Michael Dinitz;Sungjin Im;Thomas Lavastida;Benjamin Moseley;Aidin Niaparast;Sergei Vassilvitskii", "authorids": "~Michael_Dinitz1;~Sungjin_Im1;~Thomas_Lavastida1;~Benjamin_Moseley1;~Aidin_Niaparast1;~Sergei_Vassilvitskii2", "gender": "M;;M;M;;", "homepage": "http://www.cs.jhu.edu/~mdinitz/;https://sites.google.com/view/sungjinim/;;http://www.andrew.cmu.edu/user/moseleyb/;;http://theory.stanford.edu/~sergei", "dblp": "27/2346;18/7116.html;257/4172;28/5638;;31/6854.html", "google_scholar": "https://scholar.google.com.tw/citations?user=Q2yN84AAAAAJ;muR7xmMAAAAJ;T1RxzcUAAAAJ;qq-SXN8AAAAJ;;b3HMX-sAAAAJ", "orcid": ";;;;;0000-0003-0235-1624", "linkedin": ";;;;;", "or_profile": "~Michael_Dinitz1;~Sungjin_Im1;~Thomas_Lavastida1;~Benjamin_Moseley1;~Aidin_Niaparast1;~Sergei_Vassilvitskii2", "aff": "Johns Hopkins University;University of California at Merced;University of Texas at Dallas;RelationalAI;;Google", "aff_domain": "jhu.edu;ucmerced.edu;utdallas.edu;relational.ai;;google.com", "position": "Associate Professor;Associate Professor;Assistant Professor;Researcher;;Scientist", "bibtex": "@inproceedings{\ndinitz2024binary,\ntitle={Binary Search with Distributional Predictions},\nauthor={Michael Dinitz and Sungjin Im and Thomas Lavastida and Benjamin Moseley and Aidin Niaparast and Sergei Vassilvitskii},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=JEKXTLjEIq}\n}", "github": "", "reviewers": "kf39;EXym;ud7J;mBjQ", "pdf_size": 1121965, "rating": "5;6;7;7", "confidence": "5;4;4;4", "soundness": "3;2;3;3", "novelty": "3;3;4;3", "presentation": "1;4;3;4", "wc_summary": "75;265;739;310", "wc_strengths": "82;33;142;146", "wc_weaknesses": "225;173;167;175", "wc_questions": "127;81;208;76", "wc_limitations": "81;14;12;11", "wc_review": "590;566;1268;718", "wc_reply_reviewers": "876;13;61;145", "wc_reply_authors": "683;0;0;341", "reply_reviewers": "2;1;1;1", "reply_authors": "2;1;1;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 1.224744871391589 ], "wc_summary_avg": [ 347.25, 242.7657049502668 ], "wc_strengths_avg": [ 100.75, 46.61209606958263 ], "wc_weaknesses_avg": [ 185.0, 23.280893453645632 ], "wc_questions_avg": [ 123.0, 52.94808778416837 ], "wc_limitations_avg": [ 29.5, 29.75315109362368 ], "wc_review_avg": [ 785.5, 284.5008787332651 ], "wc_reply_reviewers_avg": [ 273.75, 350.90410014703446 ], "wc_reply_authors_avg": [ 256.0, 283.11923283309454 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=45469104466679378&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 4, "email": "jhu.edu;ucmerced.edu;utdallas.edu;relational.ai;;google.com", "author_num": 6, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "Johns Hopkins University;University of California, Merced;University of Texas at Dallas;RelationalAI;Google", "aff_unique_dep": ";;;;Google", "aff_unique_url": "https://www.jhu.edu;https://www.ucmerced.edu;https://www.utdallas.edu;https://www.relationalai.com;https://www.google.com", "aff_unique_abbr": "JHU;UC Merced;UT Dallas;RelationalAI;Google", "aff_campus_unique_index": "1;2;3", "aff_campus_unique": ";Merced;Dallas;Mountain View", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "What Makes and Breaks Safety Fine-tuning? A Mechanistic Study", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95726", "id": "JEflV4nRlH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=JEflV4nRlH", "openreview": "https://openreview.net/forum?id=JEflV4nRlH", "poster": "/media/PosterPDFs/NeurIPS%202024/95726.png?t=1733852276.314296", "project": "", "author_site": "Samyak Jain, Ekdeep S Lubana, Kemal Oksuz, Tom Joy, Philip Torr, Amartya Sanyal, Puneet Dokania", "tldr": "", "abstract": "Safety fine-tuning helps align Large Language Models (LLMs) with human preferences for their safe deployment. To better understand the underlying factors that make models safe via safety fine-tuning, we design a synthetic data generation framework that captures salient aspects of an unsafe input by modeling the interaction between the task the model is asked to perform (e.g., \u201cdesign\u201d) versus the specific concepts the task is asked to be performed upon (e.g., a \u201ccycle\u201d vs. a \u201cbomb\u201d). Using this, we investigate three well-known safety fine-tuning methods\u2014supervised safety fine-tuning, direct preference optimization, and unlearning\u2014and provide significant evidence demonstrating that these methods minimally transform MLP weights to specifically align unsafe inputs into its weights\u2019 null space. This yields a clustering of inputs based on whether the model deems them safe or not. Correspondingly, when an adversarial input (e.g., a jailbreak) is provided, its activations are closer to safer samples, leading to the model processing such an input as if it were safe. Code is available at https://github.com/fiveai/understanding_safety_finetuning.", "keywords": "Mechanistic Interpretability;AI Safety;Safety fine tuning;Large Language Models", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Samyak Jain;Ekdeep Singh Lubana;Kemal Oksuz;Tom Joy;Philip Torr;Amartya Sanyal;Puneet K. Dokania", "authorids": "~Samyak_Jain1;~Ekdeep_Singh_Lubana1;~Kemal_Oksuz1;~Tom_Joy1;~Philip_Torr1;~Amartya_Sanyal1;~Puneet_K._Dokania1", "gender": "M;M;M;M;;M;M", "homepage": "https://samyakjain0112.github.io/;https://ekdeepslubana.github.io/;https://kemaloksuz.github.io/;https://thwjoy.github.io;http://www.robots.ox.ac.uk/~tvg/;https://amartya18x.github.io;http://puneetkdokania.github.io/", "dblp": "249/4464.html;228/2683;202/7522;;;203/8807;150/4211", "google_scholar": "https://scholar.google.co.in/citations?hl=en;https://scholar.google.co.in/citations?user=OP7S3vsAAAAJ;nWHRjrkAAAAJ;thwjoy;;;https://scholar.google.fr/citations?user=WsM7ybkAAAAJ", "orcid": "0000-0003-3785-4782;;0000-0002-0066-1517;;;0000-0002-4190-0449;", "linkedin": "samyak-jain-276738178/;;;;;;", "or_profile": "~Samyak_Jain1;~Ekdeep_Singh_Lubana1;~Kemal_Oksuz1;~Tom_Joy1;~Philip_Torr1;~Amartya_Sanyal1;~Puneet_Dokania1", "aff": "Five AI;University of Michigan;Five AI;;University of Oxford;Max-Planck Institute;University of Oxford", "aff_domain": "five.ai;umich.edu;five.ai;;ox.ac.uk;mpg.de;oxford.ac.uk", "position": "Intern;PhD student;Researcher;;Full Professor;Postdoc;Senior Researcher", "bibtex": "@inproceedings{\njain2024what,\ntitle={What Makes and Breaks Safety Fine-tuning? A Mechanistic Study},\nauthor={Samyak Jain and Ekdeep Singh Lubana and Kemal Oksuz and Tom Joy and Philip Torr and Amartya Sanyal and Puneet K. Dokania},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=JEflV4nRlH}\n}", "github": "", "reviewers": "SrSG;K9Ts;dExx;sLPA", "pdf_size": 6881171, "rating": "5;5;6;7", "confidence": "2;4;4;5", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;2;3;3", "wc_summary": "41;35;80;68", "wc_strengths": "99;13;147;144", "wc_weaknesses": "2;46;117;69", "wc_questions": "90;2;2;39", "wc_limitations": "1;2;6;6", "wc_review": "233;98;352;326", "wc_reply_reviewers": "17;37;18;21", "wc_reply_authors": "40;43;40;27", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 56.0, 18.614510468986285 ], "wc_strengths_avg": [ 100.75, 54.112729555992644 ], "wc_weaknesses_avg": [ 58.5, 41.47589661478098 ], "wc_questions_avg": [ 33.25, 36.07890657988404 ], "wc_limitations_avg": [ 3.75, 2.277608394786075 ], "wc_review_avg": [ 252.25, 99.43936594729473 ], "wc_reply_reviewers_avg": [ 23.25, 8.073877630977572 ], "wc_reply_authors_avg": [ 37.5, 6.18465843842649 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.7608859102526822, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16645725232502158561&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 6, "email": "five.ai;umich.edu;five.ai;;ox.ac.uk;mpg.de;oxford.ac.uk", "author_num": 7, "aff_unique_index": "0;1;0;2;3;2", "aff_unique_norm": "Five AI;University of Michigan;University of Oxford;Max-Planck-Gesellschaft zur F\u00f6rderung der Wissenschaften e.V.", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.five.ai;https://www.umich.edu;https://www.ox.ac.uk;https://www.mpg.de", "aff_unique_abbr": "Five AI;UM;Oxford;MPG", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;2;0", "aff_country_unique": "United Kingdom;United States;Germany" }, { "title": "Explicit Eigenvalue Regularization Improves Sharpness-Aware Minimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95725", "id": "JFUhBY34SC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=JFUhBY34SC", "openreview": "https://openreview.net/forum?id=JFUhBY34SC", "poster": "", "project": "", "author_site": "Haocheng Luo, Tuan Truong, Tung Pham, Mehrtash Harandi, Dinh Phung, Trung Le", "tldr": "", "abstract": "Sharpness-Aware Minimization (SAM) has attracted significant attention for its effectiveness in improving generalization across various tasks. However, its underlying principles remain poorly understood. In this work, we analyze SAM\u2019s training dynamics using the maximum eigenvalue of the Hessian as a measure of sharpness and propose a third-order stochastic differential equation (SDE), which reveals that the dynamics are driven by a complex mixture of second- and third-order terms. We show that alignment between the perturbation vector and the top eigenvector is crucial for SAM\u2019s effectiveness in regularizing sharpness, but find that this alignment is often inadequate in practice, which limits SAM's efficiency. Building on these insights, we introduce Eigen-SAM, an algorithm that explicitly aims to regularize the top Hessian eigenvalue by aligning the perturbation vector with the leading eigenvector. We validate the effectiveness of our theory and the practical advantages of our proposed approach through comprehensive experiments. Code is available at https://github.com/RitianLuo/EigenSAM.", "keywords": "Optimization;Sharpness-Aware Minimization;stochastic differential equation", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Haocheng Luo;Tuan Truong;Tung Pham;Mehrtash Harandi;Dinh Phung;Trung Le", "authorids": "~Haocheng_Luo1;~Tuan_Truong1;~Tung_Pham1;~Mehrtash_Harandi2;~Dinh_Phung2;~Trung_Le2", "gender": ";M;M;M;M;M", "homepage": "http://monash.edu;;;;https://research.monash.edu/en/persons/dinh-phung;https://sites.google.com/site/mehrtashharandi/", "dblp": ";;38/10862-1;;71/5859;92/5921", "google_scholar": ";;KcUuEKsAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.au/citations?user=OtA9SwIAAAAJ;--M1XEkAAAAJ", "orcid": ";;;;0000-0002-9977-8247;0000-0002-6937-6300", "linkedin": ";tuan-truong-620a5119b/;;;https://linkedin.com/in/dinh-phung-6b537a6;mehrtash-harandi-b99358155/", "or_profile": "~Haocheng_Luo1;~Tuan_Truong1;~Tung_Pham1;~Trung_Le2;~Dinh_Phung1;~Mehrtash_T._Harandi1", "aff": "Monash University;University of British Columbia;VinAI Research;Monash University;Monash University;Australian National University", "aff_domain": "monash.edu;cs.ubc.ca;vinai.io;monash.edu;monash.edu;anu.edu.au", "position": "PhD student;Undergrad student;Researcher;Assistant Professor;Full Professor;Adjunct", "bibtex": "@inproceedings{\nluo2024explicit,\ntitle={Explicit Eigenvalue Regularization Improves Sharpness-Aware Minimization},\nauthor={Haocheng Luo and Tuan Truong and Tung Pham and Mehrtash Harandi and Dinh Phung and Trung Le},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=JFUhBY34SC}\n}", "github": "", "reviewers": "B3hY;WYKu;zC7c;4ndr;eUoc", "pdf_size": 770407, "rating": "4;5;6;6;6", "confidence": "3;3;3;3;5", "soundness": "3;3;3;3;3", "novelty": "3;3;2;3;3", "presentation": "4;2;2;3;2", "wc_summary": "73;109;68;49;110", "wc_strengths": "101;58;31;16;37", "wc_weaknesses": "93;194;57;94;26", "wc_questions": "179;22;32;20;500", "wc_limitations": "76;13;5;1;1", "wc_review": "522;396;193;180;674", "wc_reply_reviewers": "361;12;48;0;65", "wc_reply_authors": "961;0;23;0;33", "reply_reviewers": "3;1;1;0;1", "reply_authors": "5;1;2;1;2", "rating_avg": [ 5.4, 0.7999999999999999 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.6, 0.8 ], "wc_summary_avg": [ 81.8, 23.99499947905813 ], "wc_strengths_avg": [ 48.6, 29.4659125092029 ], "wc_weaknesses_avg": [ 92.8, 56.54520315641284 ], "wc_questions_avg": [ 150.6, 184.68741159050336 ], "wc_limitations_avg": [ 19.2, 28.736040089058893 ], "wc_review_avg": [ 393.0, 190.25246384738358 ], "wc_reply_reviewers_avg": [ 97.2, 133.98119271002182 ], "wc_reply_authors_avg": [ 203.4, 379.02010500763686 ], "reply_reviewers_avg": [ 1.2, 0.9797958971132712 ], "reply_authors_avg": [ 2.2, 1.4696938456699071 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.37500000000000006, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13014636160384432210&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "monash.edu;cs.ubc.ca;vinai.io;monash.edu;monash.edu;anu.edu.au", "author_num": 6, "aff_unique_index": "0;1;2;0;0;3", "aff_unique_norm": "Monash University;University of British Columbia;VinAI Research;Australian National University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.monash.edu;https://www.ubc.ca;https://www.vinai.io/;https://www.anu.edu.au", "aff_unique_abbr": "Monash;UBC;VinAI;ANU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;0;0;0", "aff_country_unique": "Australia;Canada;Vietnam" }, { "title": "Architect: Generating Vivid and Interactive 3D Scenes with Hierarchical 2D Inpainting", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95724", "id": "JHg9eNuw6p", "proceeding": "", "pdf": "https://openreview.net/pdf?id=JHg9eNuw6p", "openreview": "https://openreview.net/forum?id=JHg9eNuw6p", "poster": "", "project": "", "author_site": "Yian Wang, Xiaowen Qiu, Jiageng Liu, Zhehuan Chen, Jiting Cai, Yufei Wang, Tsun-Hsuan Johnson Wang, Zhou Xian, Chuang Gan", "tldr": "", "abstract": "Creating large-scale interactive 3D environments is essential for the development of Robotics and Embodied AI research. However, generating diverse embodied environments with realistic detail and considerable complexity remains a significant challenge. Current methods, including manual design, procedural generation, diffusion-based scene generation, and large language model (LLM) guided scene design, are hindered by limitations such as excessive human effort, reliance on predefined rules or training datasets, and limited 3D spatial reasoning ability. Since pre-trained 2D image generative models better capture scene and object configuration than LLMs, we address these challenges by introducing $\\textit{Architect}$, a generative framework that creates complex and realistic 3D embodied environments leveraging diffusion-based 2D image inpainting. In detail, we utilize foundation visual perception models to obtain each generated object from the image and leverage pre-trained depth estimation models to lift the generated 2D image to 3D space. While there are still challenges that the camera parameters and scale of depth are still absent in the generated image, we address those problems by ''controlling'' the diffusion model by $\\textit{hierarchical inpainting}$. Specifically, having access to ground-truth depth and camera parameters in simulation, we first render a photo-realistic image of only the background. Then, we inpaint the foreground in this image, passing the geometric cues to the inpainting model in the background, which informs the camera parameters.\nThis process effectively controls the camera parameters and depth scale for the generated image, facilitating the back-projection from 2D image to 3D point clouds. Our pipeline is further extended to a hierarchical and iterative inpainting process to continuously generate the placement of large furniture and small objects to enrich the scene. This iterative structure brings the flexibility for our method to generate or refine scenes from various starting points, such as text, floor plans, or pre-arranged environments. Experimental results demonstrate that $\\textit{Architect}$ outperforms existing methods in producing realistic and complex environments, making it highly suitable for Embodied AI and robotics applications.", "keywords": "3D Scene Generation", "primary_area": "generative_models", "supplementary_material": "", "author": "Yian Wang;Xiaowen Qiu;Jiageng Liu;Zhehuan Chen;Jiting Cai;Yufei Wang;Tsun-Hsuan Wang;Zhou Xian;Chuang Gan", "authorids": "~Yian_Wang1;~Xiaowen_Qiu1;~Jiageng_Liu1;~Zhehuan_Chen1;~Jiting_Cai1;~Yufei_Wang4;~Tsun-Hsuan_Wang2;~Zhou_Xian1;~Chuang_Gan1", "gender": "M;M;M;M;;M;M;M;M", "homepage": "http://None;https://jiagengliu02.github.io;https://www.cnblogs.com/ACMLCZH;https://Caijiting.github.io;https://yufeiwang63.github.io/;https://zswang666.github.io/;;http://people.csail.mit.edu/ganchuang/;http://wangyian-me.github.io/", "dblp": ";;;383/6918;;217/1809.html;258/5020;139/6993;71/10046", "google_scholar": ";;LvNUzlEAAAAJ;;HQl9718AAAAJ;xE3WSuYAAAAJ;;PTeSCbIAAAAJ;dUf3wx4AAAAJ", "orcid": ";;;;;;;;", "linkedin": ";;;;;;;;", "or_profile": "~Xiaowen_Qiu1;~Jiageng_Liu1;~Zhehuan_Chen1;~Jiting_Cai1;~Yufei_Wang4;~Tsun-Hsuan_Wang2;~Zhou_Xian1;~Chuang_Gan1;~\u9038\u5b89_\u738b1", "aff": "University of Massachusetts at Amherst;Zhejiang University;University of Massachusetts at Amherst;Shanghai Jiaotong University;School of Computer Science, Carnegie Mellon University;Liquid AI;Carnegie Mellon University;University of Massachusetts at Amherst;NVIDIA", "aff_domain": "umass.edu;zju.edu.cn;umass.edu;sjtu.edu.cn;cs.cmu.edu;liquid.ai;cmu.edu;umass.edu;nvidia.com", "position": "MS student;Undergrad student;MS student;Undergrad student;PhD student;Researcher;PhD student;Assistant Professor;Intern", "bibtex": "@inproceedings{\nwang2024architect,\ntitle={Architect: Generating Vivid and Interactive 3D Scenes with Hierarchical 2D Inpainting},\nauthor={Yian Wang and Xiaowen Qiu and Jiageng Liu and Zhehuan Chen and Jiting Cai and Yufei Wang and Tsun-Hsuan Wang and Zhou Xian and Chuang Gan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=JHg9eNuw6p}\n}", "github": "", "reviewers": "4D8M;Te9g;YpUS;yxqm;YmnU", "pdf_size": 7927393, "rating": "5;5;5;6;7", "confidence": "4;4;4;4;4", "soundness": "3;3;2;3;3", "novelty": "2;2;2;3;3", "presentation": "2;4;3;3;3", "wc_summary": "47;100;134;130;114", "wc_strengths": "36;126;78;128;130", "wc_weaknesses": "75;133;125;199;110", "wc_questions": "57;175;82;2;120", "wc_limitations": "7;32;18;4;6", "wc_review": "222;566;437;463;480", "wc_reply_reviewers": "28;28;21;36;27", "wc_reply_authors": "58;46;102;96;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;2;1", "rating_avg": [ 5.6, 0.8 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 105.0, 31.41973901864877 ], "wc_strengths_avg": [ 99.6, 37.25372464600016 ], "wc_weaknesses_avg": [ 128.4, 40.51468869434887 ], "wc_questions_avg": [ 87.2, 58.28001372683435 ], "wc_limitations_avg": [ 13.4, 10.49952379872535 ], "wc_review_avg": [ 433.6, 114.30940468745344 ], "wc_reply_reviewers_avg": [ 28.0, 4.774934554525329 ], "wc_reply_authors_avg": [ 60.4, 37.03835849494413 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16085663004227420541&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "umass.edu;zju.edu.cn;umass.edu;sjtu.edu.cn;cs.cmu.edu;liquid.ai;cmu.edu;umass.edu;nvidia.com", "author_num": 9, "aff_unique_index": "0;1;0;2;3;4;3;0;5", "aff_unique_norm": "University of Massachusetts Amherst;Zhejiang University;Shanghai Jiao Tong University;Carnegie Mellon University;Liquid AI;NVIDIA", "aff_unique_dep": ";;;School of Computer Science;;NVIDIA Corporation", "aff_unique_url": "https://www.umass.edu;https://www.zju.edu.cn;https://www.sjtu.edu.cn;https://www.cmu.edu;;https://www.nvidia.com", "aff_unique_abbr": "UMass Amherst;ZJU;SJTU;CMU;;NVIDIA", "aff_campus_unique_index": "0;0;2;0", "aff_campus_unique": "Amherst;;Pittsburgh", "aff_country_unique_index": "0;1;0;1;0;2;0;0;0", "aff_country_unique": "United States;China;Unknown" }, { "title": "PowerPM: Foundation Model for Power Systems", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95723", "id": "JInTfcxH3Q", "proceeding": "", "pdf": "https://openreview.net/pdf?id=JInTfcxH3Q", "openreview": "https://openreview.net/forum?id=JInTfcxH3Q", "poster": "", "project": "", "author_site": "Shihao Tu, Yupeng Zhang, Jing Zhang, Zhendong Fu, Yin Zhang, YANG YANG", "tldr": "", "abstract": "The proliferation of abundant electricity time series (ETS) data presents numerous opportunities for various applications within power systems, including demand-side management, grid stability, and consumer behavior analysis. Deep learning models have advanced ETS modeling by effectively capturing sequence dependence. However, learning a generic representation of ETS data for various applications is challenging due to the inherently complex hierarchical structure of ETS data. Moreover, ETS data exhibits intricate temporal dependencies and is susceptible to the influence of exogenous variables. Furthermore, different instances exhibit diverse electricity consumption behavior. In this paper, we propose a foundation model PowerPM for ETS data, providing a large-scale, off-the-shelf model for power systems. PowerPM consists of a temporal encoder and a hierarchical encoder. The temporal encoder captures temporal dependencies within ETS data, taking into account exogenous variables. The hierarchical encoder models correlations between different levels of hierarchy. Furthermore, PowerPM leverages a novel self-supervised pre-training framework consisting of masked ETS modeling and dual-view contrastive learning. This framework enables PowerPM to capture temporal dependency within ETS windows and aware the discrepancy across ETS windows, providing two different perspectives to learn generic representation. Our experiments span five real-world scenario datasets, including both private and public data. Through pre-training on massive ETS data, PowerPM achieves SOTA\nperformance on diverse downstream tasks within the private dataset. Notably, when transferred to public datasets, PowerPM retains its edge, showcasing its remarkable generalization ability across various tasks and domains. Moreover, ablation studies and few-shot experiments further substantiate the effectiveness of our model.", "keywords": "time series pre-training;power systems;foundation model", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "/attachment/78ee840a38e4a076d2de333a8815e48c72a0cd65.zip", "author": "Shihao Tu;Yupeng Zhang;Jing Zhang;Zhendong Fu;Yin Zhang;Yang Yang", "authorids": "~Shihao_Tu4;~Yupeng_Zhang5;~Jing_Zhang24;~Zhendong_Fu1;~Yin_Zhang1;~Yang_Yang35", "gender": ";M;;;;M", "homepage": ";;https://xiaojingzi.github.io/;;https://person.zju.edu.cn/en/0099160;http://yangy.org", "dblp": ";;05/3499-1.html;;;", "google_scholar": ";;T7Wa3GQAAAAJ;;;", "orcid": ";0009-0005-8667-0391;;;;0000-0002-5058-4417", "linkedin": ";;;;;", "or_profile": "~Shihao_Tu4;~Yupeng_Zhang5;~Jing_Zhang24;~Zhendong_Fu1;~Yin_Zhang1;~Yang_Yang35", "aff": ";Zhejiang University;Renmin University of China;;Zhejiang University;Zhejiang University", "aff_domain": ";zju.edu.cn;ruc.edu.cn;;zju.edu.cn;zju.edu.cn", "position": ";MS student;Associate Professor;;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\ntu2024powerpm,\ntitle={Power{PM}: Foundation Model for Power Systems},\nauthor={Shihao Tu and Yupeng Zhang and Jing Zhang and Zhendong Fu and Yin Zhang and Yang Yang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=JInTfcxH3Q}\n}", "github": "", "reviewers": "W2Rh;vBXw;Vuy7;Q8uG", "pdf_size": 1004514, "rating": "3;3;6;7", "confidence": "4;3;3;4", "soundness": "3;2;3;3", "novelty": "2;1;3;3", "presentation": "2;2;3;2", "wc_summary": "105;25;118;65", "wc_strengths": "58;14;355;31", "wc_weaknesses": "60;189;453;48", "wc_questions": "52;36;354;40", "wc_limitations": "32;23;362;9", "wc_review": "307;287;1642;193", "wc_reply_reviewers": "0;0;0;8", "wc_reply_authors": "0;0;0;21", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;2", "rating_avg": [ 4.75, 1.7853571071357126 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 78.25, 36.42372166596928 ], "wc_strengths_avg": [ 114.5, 139.73635890490348 ], "wc_weaknesses_avg": [ 187.5, 162.9486115313659 ], "wc_questions_avg": [ 120.5, 134.93980139306564 ], "wc_limitations_avg": [ 106.5, 147.74048192692482 ], "wc_review_avg": [ 607.25, 598.9617579612241 ], "wc_reply_reviewers_avg": [ 2.0, 3.4641016151377544 ], "wc_reply_authors_avg": [ 5.25, 9.093266739736606 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.14002800840280097, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7089399342488130523&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": ";zju.edu.cn;ruc.edu.cn;;zju.edu.cn;zju.edu.cn", "author_num": 6, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Zhejiang University;Renmin University of China", "aff_unique_dep": ";", "aff_unique_url": "https://www.zju.edu.cn;http://www.ruc.edu.cn", "aff_unique_abbr": "ZJU;RUC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Hamiltonian Score Matching and Generative Flows", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95722", "id": "JJGfCvjpTV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=JJGfCvjpTV", "openreview": "https://openreview.net/forum?id=JJGfCvjpTV", "poster": "/media/PosterPDFs/NeurIPS%202024/95722.png?t=1731425547.9317546", "project": "", "author_site": "Peter Holderrieth, Yilun Xu, Tommi Jaakkola", "tldr": "", "abstract": "Classical Hamiltonian mechanics has been widely used in machine learning in the form of Hamiltonian Monte Carlo for applications with predetermined force fields. In this paper, we explore the potential of deliberately designing force fields for Hamiltonian systems, introducing Hamiltonian velocity predictors (HVPs) as a core tool for constructing energy-based and generative models. We present two innovations: Hamiltonian Score Matching (HSM), which utilizes score functions to augment data by simulating Hamiltonian trajectories, and Hamiltonian Generative Flows (HGFs), a novel generative model that encompasses diffusion models and OT-flow matching as HGFs with zero force fields. We showcase the extended design space of force fields by introducing Oscillation HGFs, a generative model inspired by harmonic oscillators. Our experiments demonstrate that HSM and HGFs rival leading score-matching and generative modeling techniques. Overall, our work systematically elucidates the synergy between Hamiltonian dynamics, force fields, and generative models, thereby opening new avenues for applications of machine learning in physical sciences and dynamical systems.", "keywords": "Hamiltonian dynamics;score matching;generative models;diffusion models;flow matching;Hamiltonian Monte Carlo", "primary_area": "generative_models", "supplementary_material": "", "author": "Peter Holderrieth;Yilun Xu;Tommi Jaakkola", "authorids": "~Peter_Holderrieth2;~Yilun_Xu1;~Tommi_S._Jaakkola1", "gender": "M;M;", "homepage": "https://www.peterholderrieth.com/;http://yilun-xu.com;", "dblp": ";;", "google_scholar": ";;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Peter_Holderrieth2;~Yilun_Xu1;~Tommi_S._Jaakkola1", "aff": "MIT;Massachusetts Institute of Technology;", "aff_domain": "mit.edu;mit.edu;", "position": "PhD student;PhD student;", "bibtex": "@inproceedings{\nholderrieth2024hamiltonian,\ntitle={Hamiltonian Score Matching and Generative Flows},\nauthor={Peter Holderrieth and Yilun Xu and Tommi Jaakkola},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=JJGfCvjpTV}\n}", "github": "", "reviewers": "6n8r;XPUP;WwWf;8ET8;g7vb", "pdf_size": 5312985, "rating": "6;6;6;7;8", "confidence": "4;3;3;3;3", "soundness": "3;3;3;3;4", "novelty": "2;3;2;3;4", "presentation": "3;3;3;3;3", "wc_summary": "81;119;70;73;63", "wc_strengths": "59;24;50;40;53", "wc_weaknesses": "122;47;213;40;116", "wc_questions": "78;21;4;2;94", "wc_limitations": "6;1;12;22;69", "wc_review": "346;212;349;177;395", "wc_reply_reviewers": "13;18;20;12;24", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.6, 0.8 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 81.2, 19.762590923257 ], "wc_strengths_avg": [ 45.2, 12.253978945632312 ], "wc_weaknesses_avg": [ 107.6, 62.656524001894645 ], "wc_questions_avg": [ 39.8, 38.62848689762518 ], "wc_limitations_avg": [ 22.0, 24.51938009004306 ], "wc_review_avg": [ 295.8, 85.23708113256812 ], "wc_reply_reviewers_avg": [ 17.4, 4.454211490264018 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.375, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:HGHwm6K0A2AJ:scholar.google.com/&scioq=Hamiltonian+Score+Matching+and+Generative+Flows&hl=en&as_sdt=0,33", "gs_version_total": 5, "email": "mit.edu;mit.edu;", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Smoothed Energy Guidance: Guiding Diffusion Models with Reduced Energy Curvature of Attention", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95721", "id": "JK728xy8G7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=JK728xy8G7", "openreview": "https://openreview.net/forum?id=JK728xy8G7", "poster": "", "project": "", "tldr": "", "abstract": "Conditional diffusion models have shown remarkable success in visual content generation, producing high-quality samples across various domains, largely due to classifier-free guidance (CFG). Recent attempts to extend guidance to unconditional models have relied on heuristic techniques, resulting in suboptimal generation quality and unintended effects. In this work, we propose Smoothed Energy Guidance (SEG), a novel training- and condition-free approach that leverages the energy-based perspective of the self-attention mechanism to enhance image generation. By defining the energy of self-attention, we introduce a method to reduce the curvature of the energy landscape of attention and use the output as the unconditional prediction. Practically, we control the curvature of the energy landscape by adjusting the Gaussian kernel parameter while keeping the guidance scale parameter fixed. Additionally, we present a query blurring method that is equivalent to blurring the entire attention weights without incurring quadratic complexity in the number of tokens. In our experiments, SEG achieves a Pareto improvement in both quality and the reduction of side effects. The code is available at https://github.com/SusungHong/SEG-SDXL.", "keywords": "Diffusion Models;Diffusion Guidance", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/9a92ff272072b9c9231561dbb595dbea1b4084d5.zip", "author": "Susung Hong", "authorids": "~Susung_Hong1", "gender": "M", "homepage": "https://susunghong.github.io/", "dblp": "330/5127", "google_scholar": "HigIHvUAAAAJ", "orcid": "", "linkedin": "", "or_profile": "~Susung_Hong1", "aff": "Korea University", "aff_domain": "korea.ac.kr", "position": "Undergrad student", "bibtex": "@inproceedings{\nhong2024smoothed,\ntitle={Smoothed Energy Guidance: Guiding Diffusion Models with Reduced Energy Curvature of Attention},\nauthor={Susung Hong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=JK728xy8G7}\n}", "github": "", "reviewers": "2oF8;cNNp;Rhhf;9qC6;CTSN", "pdf_size": 31785203, "rating": "4;5;5;6;8", "confidence": "4;1;3;4;4", "soundness": "2;2;2;2;3", "novelty": "2;2;2;2;3", "presentation": "2;2;2;3;3", "wc_summary": "83;50;111;147;171", "wc_strengths": "35;27;55;94;73", "wc_weaknesses": "139;28;26;624;470", "wc_questions": "21;28;146;116;182", "wc_limitations": "5;15;36;1;3", "wc_review": "283;148;374;982;899", "wc_reply_reviewers": "115;14;0;62;0", "wc_reply_authors": "261;53;53;360;53", "reply_reviewers": "2;1;0;1;0", "reply_authors": "3;2;2;3;2", "rating_avg": [ 5.6, 1.3564659966250536 ], "confidence_avg": [ 3.2, 1.16619037896906 ], "soundness_avg": [ 2.2, 0.39999999999999997 ], "novelty_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 112.4, 43.33866633850193 ], "wc_strengths_avg": [ 56.8, 24.547097588106013 ], "wc_weaknesses_avg": [ 257.4, 244.86045005267798 ], "wc_questions_avg": [ 98.6, 64.04873144723477 ], "wc_limitations_avg": [ 12.0, 12.93058390019569 ], "wc_review_avg": [ 537.2, 338.07537621069065 ], "wc_reply_reviewers_avg": [ 38.2, 44.65153972709116 ], "wc_reply_authors_avg": [ 156.0, 129.9753822844926 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 2.4, 0.4898979485566356 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.30343304245450425, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13002489595109653487&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "korea.ac.kr", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "Korea University", "aff_unique_dep": "", "aff_unique_url": "https://www.korea.ac.kr", "aff_unique_abbr": "KU", "aff_country_unique_index": "0", "aff_country_unique": "South Korea" }, { "title": "SpatialRGPT: Grounded Spatial Reasoning in Vision-Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95720", "id": "JKEIYQUSUc", "proceeding": "", "pdf": "https://openreview.net/pdf?id=JKEIYQUSUc", "openreview": "https://openreview.net/forum?id=JKEIYQUSUc", "poster": "", "project": "", "author_site": "An-Chieh Cheng, Hongxu Yin, Yang Fu, Qiushan Guo, Ruihan Yang, Jan Kautz, Xiaolong Wang, Sifei Liu", "tldr": "", "abstract": "Vision Language Models (VLMs) have demonstrated remarkable performance in 2D vision and language tasks. However, their ability to reason about spatial arrangements remains limited. In this work, we introduce Spatial Region GPT (SpatialRGPT) to enhance VLMs\u2019 spatial perception and reasoning capabilities. SpatialRGPT advances VLMs\u2019 spatial understanding through two key innovations: (i) a data curation pipeline that enables effective learning of regional representation from 3D scene graphs, and (ii) a flexible ``plugin'' module for integrating depth information into the visual encoder of existing VLMs. During inference, when provided with user-specified region proposals, SpatialRGPT can accurately perceive their relative directions and distances. Additionally, we propose SpatialRGBT-Bench, a benchmark with ground-truth 3D annotations encompassing indoor, outdoor, and simulated environments, for evaluating 3D spatial cognition in Vision-Language Models (VLMs). Our results demonstrate that SpatialRGPT significantly enhances performance in spatial reasoning tasks, both with and without local region prompts. The model also exhibits strong generalization capabilities, effectively reasoning about complex spatial relations and functioning as a region-aware dense reward annotator for robotic tasks. Code, dataset, and benchmark are released at https://www.anjiecheng.me/SpatialRGPT.", "keywords": "Vision-Language Models;Spatial Reasoning", "primary_area": "generative_models", "supplementary_material": "", "author": "An-Chieh Cheng;Hongxu Yin;Yang Fu;Qiushan Guo;Ruihan Yang;Jan Kautz;Xiaolong Wang;Sifei Liu", "authorids": "~An-Chieh_Cheng1;~Hongxu_Yin2;~Yang_Fu1;~Qiushan_Guo1;~Ruihan_Yang2;~Jan_Kautz1;~Xiaolong_Wang3;~Sifei_Liu2", "gender": "M;M;M;;M;F;M;M", "homepage": "https://beckman.illinois.edu/directory/person/yangfu2;https://guoqiushan.github.io/;http://rchalyang.github.io/;http://jankautz.com;https://xiaolonw.github.io/;https://www.sifeiliu.net;https://www.anjiecheng.me/;https://hongxu-yin.github.io/", "dblp": "66/3764;231/1814;;48/6214;91/952-4;118/1301;;166/3425", "google_scholar": "bioUtz4AAAAJ;https://scholar.google.com/citations?hl=zh-CN;b-o1o7cAAAAJ;P9FclNEAAAAJ;Y8O9N_0AAAAJ;j4pcHV4AAAAJ;Zoiu7FsAAAAJ;4gdSoOYAAAAJ", "orcid": ";;;;;;;", "linkedin": ";;;;;;;", "or_profile": "~Yang_Fu1;~Qiushan_Guo1;~Ruihan_Yang2;~Jan_Kautz1;~Xiaolong_Wang3;~Sifei_Liu2;~Anchieh_Cheng1;~Hongxu_Yin1", "aff": "University of California, San Diego;The University of Hong Kong;University of California, San Diego;NVIDIA;University of California, San Diego;NVIDIA;University of California, San Diego;NVIDIA", "aff_domain": "ucsd.edu;hku.hk;ucsd.edu;nvidia.com;ucsd.edu;nvidia.com;ucsd.edu;nvidia.com", "position": "PhD student;PhD student;PhD student;VP Research;Assistant Professor;Researcher;PhD student;Senior Research Scientist", "bibtex": "@inproceedings{\ncheng2024spatialrgpt,\ntitle={Spatial{RGPT}: Grounded Spatial Reasoning in Vision-Language Models},\nauthor={An-Chieh Cheng and Hongxu Yin and Yang Fu and Qiushan Guo and Ruihan Yang and Jan Kautz and Xiaolong Wang and Sifei Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=JKEIYQUSUc}\n}", "github": "", "reviewers": "hHf2;y3Gs;v4dm;ojLE", "pdf_size": 2840984, "rating": "6;6;7;7", "confidence": "4;5;4;4", "soundness": "3;2;3;4", "novelty": "3;3;3;4", "presentation": "3;3;4;3", "wc_summary": "62;92;77;77", "wc_strengths": "47;62;86;66", "wc_weaknesses": "81;422;197;84", "wc_questions": "56;7;92;110", "wc_limitations": "2;6;15;24", "wc_review": "248;589;467;361", "wc_reply_reviewers": "26;50;47;39", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 77.0, 10.606601717798213 ], "wc_strengths_avg": [ 65.25, 13.91716565971678 ], "wc_weaknesses_avg": [ 196.0, 138.60555544421732 ], "wc_questions_avg": [ 66.25, 39.347013863824536 ], "wc_limitations_avg": [ 11.75, 8.496322733983215 ], "wc_review_avg": [ 416.25, 126.27227526262446 ], "wc_reply_reviewers_avg": [ 40.5, 9.287087810503355 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 61, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15748159516202324347&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "ucsd.edu;hku.hk;ucsd.edu;nvidia.com;ucsd.edu;nvidia.com;ucsd.edu;nvidia.com", "author_num": 8, "aff_unique_index": "0;1;0;2;0;2;0;2", "aff_unique_norm": "University of California, San Diego;University of Hong Kong;NVIDIA", "aff_unique_dep": ";;NVIDIA Corporation", "aff_unique_url": "https://www.ucsd.edu;https://www.hku.hk;https://www.nvidia.com", "aff_unique_abbr": "UCSD;HKU;NVIDIA", "aff_campus_unique_index": "0;1;0;0;0", "aff_campus_unique": "San Diego;Hong Kong SAR;", "aff_country_unique_index": "0;1;0;0;0;0;0;0", "aff_country_unique": "United States;China" }, { "title": "Federated Learning over Connected Modes", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95719", "id": "JL2eMCfDW8", "proceeding": "", "pdf": "https://openreview.net/pdf?id=JL2eMCfDW8", "openreview": "https://openreview.net/forum?id=JL2eMCfDW8", "poster": "/media/PosterPDFs/NeurIPS%202024/95719.png?t=1733892538.0030763", "project": "", "author_site": "Dennis Grinwald, Philipp Wiesner, Shinichi Nakajima", "tldr": "", "abstract": "Statistical heterogeneity in federated learning poses two major challenges: slow global training due to conflicting gradient signals, and the need of personalization for local distributions. In this work, we tackle both challenges by leveraging recent advances in \\emph{linear mode connectivity} --- identifying a linearly connected low-loss region in the parameter space of neural networks, which we call solution simplex. We propose federated learning over connected modes (\\textsc{Floco}), where clients are assigned local subregions in this simplex based on their gradient signals, and together learn the shared global solution simplex. This allows personalization of the client models to fit their local distributions within the degrees of freedom in the solution simplex and homogenizes the update signals for the global simplex training. Our experiments show that \\textsc{Floco} accelerates the global training process, and significantly improves the local accuracy with minimal computational overhead in cross-silo federated learning settings.", "keywords": "Federated Learning;Linear Mode Connectivity", "primary_area": "other", "supplementary_material": "/attachment/b91b391e947ccb182e0f23baa7decb343ead4850.zip", "author": "Dennis Grinwald;Philipp Wiesner;Shinichi Nakajima", "authorids": "~Dennis_Grinwald1;~Philipp_Wiesner1;~Shinichi_Nakajima2", "gender": "M;M;M", "homepage": "https://www.linkedin.com/in/dennis-grinwald/;https://philippwiesner.org;https://web.ml.tu-berlin.de/author/dr.-shinichi-nakajima/", "dblp": ";;97/6115.html", "google_scholar": "WkhZ4MYAAAAJ;sZIsBrEAAAAJ;hXSvID4AAAAJ", "orcid": ";0000-0001-5352-7525;0000-0003-3970-4569", "linkedin": ";philippwiesner;", "or_profile": "~Dennis_Grinwald1;~Philipp_Wiesner1;~Shinichi_Nakajima2", "aff": "Technical University of Berlin;Technische Universit\u00e4t Berlin;BIFOLD, TU Berlin", "aff_domain": "tu.berlin;tu-berlin.de;tu-berlin.de", "position": "PhD student;PhD student;Postdoc", "bibtex": "@inproceedings{\ngrinwald2024federated,\ntitle={Federated Learning over Connected Modes},\nauthor={Dennis Grinwald and Philipp Wiesner and Shinichi Nakajima},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=JL2eMCfDW8}\n}", "github": "", "reviewers": "bjGi;a16f;5sib;44Bh", "pdf_size": 1294978, "rating": "3;5;6;6", "confidence": "4;4;4;3", "soundness": "3;3;3;2", "novelty": "2;3;3;2", "presentation": "3;3;3;3", "wc_summary": "31;143;50;33", "wc_strengths": "9;23;47;42", "wc_weaknesses": "25;15;22;280", "wc_questions": "60;374;225;112", "wc_limitations": "14;15;41;13", "wc_review": "139;570;385;480", "wc_reply_reviewers": "29;315;13;606", "wc_reply_authors": "244;1012;26;135", "reply_reviewers": "1;4;1;2", "reply_authors": "3;5;2;2", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 64.25, 46.06177916668005 ], "wc_strengths_avg": [ 30.25, 15.188400179084036 ], "wc_weaknesses_avg": [ 85.5, 112.35323760355107 ], "wc_questions_avg": [ 192.75, 120.45201326669472 ], "wc_limitations_avg": [ 20.75, 11.712706775122479 ], "wc_review_avg": [ 393.5, 160.83920541957423 ], "wc_reply_reviewers_avg": [ 240.75, 242.7080293274205 ], "wc_reply_authors_avg": [ 354.25, 387.49475802906034 ], "reply_reviewers_avg": [ 2.0, 1.224744871391589 ], "reply_authors_avg": [ 3.0, 1.224744871391589 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.4714045207910316, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:G_NfRgMp8yEJ:scholar.google.com/&scioq=Federated+Learning+over+Connected+Modes&hl=en&as_sdt=0,11", "gs_version_total": 4, "email": "tu.berlin;tu-berlin.de;tu-berlin.de", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Technical University of Berlin;Technische Universit\u00e4t Berlin", "aff_unique_dep": ";", "aff_unique_url": "https://www.tu-berlin.de;https://www.tu-berlin.de", "aff_unique_abbr": "TU Berlin;TU Berlin", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "title": "A Practitioner's Guide to Real-World Continual Multimodal Pretraining", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97760", "id": "JLvtwGlezU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=JLvtwGlezU", "openreview": "https://openreview.net/forum?id=JLvtwGlezU", "poster": "", "project": "", "author_site": "Karsten Roth, Vishaal Udandarao, Sebastian Dziadzio, Ameya Prabhu, Mehdi Cherti, Oriol Vinyals, Olivier Henaff, Samuel Albanie, Matthias Bethge, Zeynep Akata", "tldr": "", "abstract": "Multimodal foundation models serve numerous applications at the intersection of vision and language. Still, despite being pretrained on extensive data, they become outdated over time.\nTo keep models updated, research into continual pretraining mainly explores scenarios with either (1) infrequent, indiscriminate updates on large-scale new data, or (2) frequent, sample-level updates.\nHowever, practical model deployment often operates in the gap between these two limit cases, as real-world applications demand adaptation to specific subdomains, tasks or concepts --- spread over the entire, varying life cycle of a model. \nIn this work, we complement current perspectives on continual pretraining through a research test bed and offer comprehensive guidance for effective continual model updates in such scenarios.\nWe first introduce FoMo-in-Flux, a continual multimodal pretraining benchmark with realistic compute constraints and practical deployment requirements, constructed over 63 datasets with diverse visual and semantic coverage.\nUsing FoMo-in-Flux, we explore the complex landscape of practical continual pretraining through multiple perspectives: (1) data mixtures and stream orderings that emulate real-world deployment settings, (2) methods ranging from simple fine-tuning and traditional continual learning strategies to parameter-efficient updates and model merging, (3) meta-learning-rate schedules and mechanistic design choices, and (4) model and compute scaling. Together, our insights provide a practitioner's guide to continual multimodal pretraining for real-world deployment. Benchmark and code is provided here: https://github.com/ExplainableML/fomo_in_flux.", "keywords": "continual pretraining;multimodal learning;vision-language;CLIP", "primary_area": "", "supplementary_material": "/attachment/9a8115ea138a4a325a7da185d5e5be20d01586ac.pdf", "author": "Vishaal Udandarao;Karsten Roth;Sebastian Dziadzio;Ameya Prabhu;Mehdi Cherti;Oriol Vinyals;Olivier J Henaff;Samuel Albanie;Zeynep Akata;Matthias Bethge", "authorids": "~Vishaal_Udandarao1;~Karsten_Roth1;~Sebastian_Dziadzio1;~Ameya_Prabhu1;~Mehdi_Cherti2;~Oriol_Vinyals1;~Olivier_J_Henaff1;~Samuel_Albanie2;~Zeynep_Akata1;~Matthias_Bethge1", "gender": "M;Not Specified;M;M;;;F;M;M;Not Specified", "homepage": "https://vishaal27.github.io/;https://karroth.com/;https://sebastiandziadzio.com/;https://drimpossible.github.io/;;https://www.olivierhenaff.com/;https://eml-unitue.de/people/zeynep-akata;https://bethgelab.org;;https://samuelalbanie.com/", "dblp": "247/4693;234/7803;170/6288.html;181/4512;05/726;156/0035.html;117/4838;77/3005;;188/5765", "google_scholar": "jUOcawkAAAAJ;93ZjIs0AAAAJ;https://scholar.google.co.uk/citations?user=8vAIQXoAAAAJ;0kK7sSAAAAAJ;https://scholar.google.co.uk/citations?user=NkzyCvUAAAAJ;Sx75CVsAAAAJ;jQl9RtkAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.fr/citations?user=JgOyYi8AAAAJ;https://scholar.google.co.uk/citations?user=QjufASUAAAAJ", "orcid": ";;;;;0000-0001-8183-9489;0000-0002-1432-7747;;;", "linkedin": "vishaal-udandarao/?originalSubdomain=de;;sebastiandziadzio/;;;;zeynep-akata-36182045/?ppe=1;;mehdi-cherti;", "or_profile": "~Vishaal_Udandarao1;~Karsten_Roth1;~Sebastian_Dziadzio1;~Ameya_Prabhu1;~Oriol_Vinyals1;~Olivier_J_Henaff1;~Zeynep_Akata1;~Matthias_Bethge1;~mehdi_cherti1;~samuel_Albanie1", "aff": "University of Cambridge;University of Tuebingen;Eberhard-Karls-Universit\u00e4t T\u00fcbingen;University of Oxford;Electrical Engineering & Computer Science Department;Google DeepMind;Helmholtz Munich;University of Tuebingen;Forschungszentrum J\u00fclich;University of Cambridge", "aff_domain": "cam.ac.uk;uni-tuebingen.de;uni-tuebingen.de;ox.ac.uk;eecs.berkeley.edu;google.com;helmholtz-munich.de;uni-tuebingen.de;fz-juelich.de;cam.ac.uk", "position": "PhD student;PhD student;PhD student;PhD student;Researcher;Research Scientist;Researcher;Full Professor;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nudandarao2024a,\ntitle={A Practitioner's Guide to Real-World Continual Multimodal Pretraining},\nauthor={Vishaal Udandarao and Karsten Roth and Sebastian Dziadzio and Ameya Prabhu and Mehdi Cherti and Oriol Vinyals and Olivier J Henaff and Samuel Albanie and Zeynep Akata and Matthias Bethge},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=JLvtwGlezU}\n}", "github": "", "reviewers": "Fxhw;4Xf5;jRa8", "pdf_size": 3562879, "rating": "7;7;8", "confidence": "4;2;4", "wc_summary_and_contributions": "25;41;108", "wc_strengths": "16;48;213", "wc_improvement": "27;25;113", "wc_limitations": "1;27;4", "wc_correctness": "10;6;21", "wc_clarity": "9;15;5", "wc_relation_to_prior_work": "7;5;18", "wc_documentation": "10;10;11", "wc_additional_feedback": "1;1;1", "wc_review": "106;178;494", "wc_reply_reviewers": "33;66;0", "wc_reply_authors": "0;711;0", "reply_reviewers": "1;1;0", "reply_authors": "1;2;2", "rating_avg": [ 7.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "wc_summary_and_contributions_avg": [ 58.0, 35.95367389665021 ], "wc_strengths_avg": [ 92.33333333333333, 86.3185315496556 ], "wc_improvement_avg": [ 55.0, 41.02032016777376 ], "wc_limitations_avg": [ 10.666666666666666, 11.614167593456232 ], "wc_correctness_avg": [ 12.333333333333334, 6.342099196813483 ], "wc_clarity_avg": [ 9.666666666666666, 4.109609335312651 ], "wc_relation_to_prior_work_avg": [ 10.0, 5.715476066494082 ], "wc_documentation_avg": [ 10.333333333333334, 0.4714045207910317 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 259.3333333333333, 168.51772079583267 ], "wc_reply_reviewers_avg": [ 33.0, 26.94438717061496 ], "wc_reply_authors_avg": [ 237.0, 335.1686142824235 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:8gzH_V2A4KwJ:scholar.google.com/&scioq=A+Practitioner%27s+Guide+to+Real-World+Continual+Multimodal+Pretraining&hl=en&as_sdt=0,21", "gs_version_total": 2, "email": "cam.ac.uk;uni-tuebingen.de;uni-tuebingen.de;ox.ac.uk;eecs.berkeley.edu;google.com;helmholtz-munich.de;uni-tuebingen.de;fz-juelich.de;cam.ac.uk", "author_num": 10, "aff_unique_index": "0;1;2;3;4;5;6;1;7;0", "aff_unique_norm": "University of Cambridge;University of Tuebingen;Eberhard Karls University of T\u00fcbingen;University of Oxford;Electrical Engineering & Computer Science Department;Google;Helmholtz Zentrum M\u00fcnchen;Forschungszentrum J\u00fclich", "aff_unique_dep": ";;;;Electrical Engineering & Computer Science;Google DeepMind;;", "aff_unique_url": "https://www.cam.ac.uk;https://www.uni-tuebingen.de/;https://www.uni-tuebingen.de/;https://www.ox.ac.uk;;https://deepmind.com;https://www.helmholtz-muenchen.de;https://www.fz-juelich.de", "aff_unique_abbr": "Cambridge;Uni T\u00fcbingen;Uni T\u00fcbingen;Oxford;;DeepMind;HMGU;FZJ", "aff_campus_unique_index": "0;2;0", "aff_campus_unique": "Cambridge;;T\u00fcbingen", "aff_country_unique_index": "0;1;1;0;0;1;1;1;0", "aff_country_unique": "United Kingdom;Germany;" }, { "title": "Shape analysis for time series", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95718", "id": "JM0IQSliol", "proceeding": "", "pdf": "https://openreview.net/pdf?id=JM0IQSliol", "openreview": "https://openreview.net/forum?id=JM0IQSliol", "poster": "/media/PosterPDFs/NeurIPS%202024/95718.png?t=1731436553.3897247", "project": "", "author_site": "Thibaut Germain, Samuel Gruffaz, Charles Truong, Alain Durmus, Laurent Oudre", "tldr": "", "abstract": "Analyzing inter-individual variability of physiological functions is particularly appealing in medical and biological contexts to describe or quantify health conditions. Such analysis can be done by comparing individuals to a reference one with time series as biomedical data.\nThis paper introduces an unsupervised representation learning (URL) algorithm for time series tailored to inter-individual studies. The idea is to represent time series as deformations of a reference time series. The deformations are diffeomorphisms parameterized and learned by our method called TS-LDDMM. Once the deformations and the reference time series are learned, the vector representations of individual time series are given by the parametrization of their corresponding deformation. At the crossroads between URL for time series and shape analysis, the proposed algorithm handles irregularly sampled multivariate time series of variable lengths and provides shape-based representations of temporal data.\nIn this work, we establish a representation theorem for the graph of a time series and derive its consequences on the LDDMM framework. We showcase the advantages of our representation compared to existing methods using synthetic data and real-world examples motivated by biomedical applications.", "keywords": "Machine learning for sciences;Machine learning for healthcare;Representation learning for time series;Shape analysis;LDDMM;Kernel methods", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "", "author": "Thibaut Germain;Samuel Gruffaz;Charles Truong;Alain Oliviero Durmus;Laurent Oudre", "authorids": "~Thibaut_Germain1;~Samuel_Gruffaz1;~Charles_Truong1;~Alain_Oliviero_Durmus1;~Laurent_Oudre2", "gender": "M;M;Unspecified;;", "homepage": ";https://www.linkedin.com/in/samuel-gruffaz;;;", "dblp": "329/2806;;207/9811;;", "google_scholar": "VLKQed4AAAAJ;Ndn1EvEAAAAJ;https://scholar.google.fr/citations?user=3byuqG4AAAAJ;;", "orcid": "0009-0002-4687-0581;;0000-0002-8527-8161;;", "linkedin": ";;;;", "or_profile": "~Thibaut_Germain1;~Samuel_Gruffaz1;~Charles_Truong1;~Alain_Oliviero_Durmus1;~Laurent_Oudre2", "aff": "Ecole Normale Superieure;Ecole Normale Superieure;Ecole Normale Superieure;;", "aff_domain": "ens-paris-saclay.fr;ens-paris-saclay.fr;ens-paris-saclay.fr;;", "position": "PhD student;PhD student;Postdoc;;", "bibtex": "@inproceedings{\ngermain2024shape,\ntitle={Shape analysis for time series},\nauthor={Thibaut Germain and Samuel Gruffaz and Charles Truong and Alain Oliviero Durmus and Laurent Oudre},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=JM0IQSliol}\n}", "github": "", "reviewers": "HVwJ;LvUA;xHtc;xHE6", "pdf_size": 5688395, "rating": "5;6;7;7", "confidence": "4;4;3;3", "soundness": "3;3;4;4", "novelty": "3;2;3;4", "presentation": "1;3;3;3", "wc_summary": "211;88;67;56", "wc_strengths": "216;51;34;127", "wc_weaknesses": "783;157;41;71", "wc_questions": "79;43;78;13", "wc_limitations": "9;1;23;27", "wc_review": "1298;340;243;294", "wc_reply_reviewers": "80;36;19;35", "wc_reply_authors": "41;0;0;84", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 105.5, 61.985885490166226 ], "wc_strengths_avg": [ 107.0, 72.01735901850331 ], "wc_weaknesses_avg": [ 263.0, 303.2259883321349 ], "wc_questions_avg": [ 53.25, 27.38955092731533 ], "wc_limitations_avg": [ 15.0, 10.488088481701515 ], "wc_review_avg": [ 543.75, 436.8159652531029 ], "wc_reply_reviewers_avg": [ 42.5, 22.677080940897135 ], "wc_reply_authors_avg": [ 31.25, 34.751798514609284 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.9045340337332909, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17781263230529365812&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "ens-paris-saclay.fr;ens-paris-saclay.fr;ens-paris-saclay.fr;;", "author_num": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "Ecole Normale Superieure", "aff_unique_dep": "", "aff_unique_url": "https://www.ens.fr", "aff_unique_abbr": "ENS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "France" }, { "title": "Unpacking DPO and PPO: Disentangling Best Practices for Learning from Preference Feedback", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95717", "id": "JMBWTlazjW", "proceeding": "", "pdf": "https://openreview.net/pdf?id=JMBWTlazjW", "openreview": "https://openreview.net/forum?id=JMBWTlazjW", "poster": "", "project": "", "author_site": "Hamish Ivison, Yizhong Wang, Jiacheng Liu, Zeqiu Wu, Valentina Pyatkin, Nathan Lambert, Noah Smith, Yejin Choi, Hanna Hajishirzi", "tldr": "", "abstract": "Learning from preference feedback has emerged as an essential step for improving the generation quality and performance of modern language models (LMs). Despite its widespread use, the way preference-based learning is applied varies wildly, with differing data, learning algorithms, and evaluations used, making disentangling the impact of each aspect difficult. In this work, we identify four core aspects of preference-based learning: preference data, learning algorithm, reward model, and policy training prompts, systematically investigate the impact of these components on downstream model performance, and suggest a recipe for strong learning for preference feedback. Our findings indicate that all aspects are important for performance, with better preference data leading to the largest improvements, followed by the choice of learning algorithm, the use of improved reward models, and finally the use of additional unlabeled prompts for policy training. Notably, PPO outperforms DPO by up to 2.5% in math and 1.2% in general domains. High-quality preference data leads to improvements of up to 8% in instruction following and truthfulness. Despite significant gains of up to 5% in mathematical evaluation when scaling up reward models, we surprisingly observe marginal improvements in other categories.", "keywords": "RLHF;learning from preferences;PPO;DPO;Language Models", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Hamish Ivison;Yizhong Wang;Jiacheng Liu;Zeqiu Wu;Valentina Pyatkin;Nathan Lambert;Noah A. Smith;Yejin Choi;Hannaneh Hajishirzi", "authorids": "~Hamish_Ivison1;~Yizhong_Wang2;~Jiacheng_Liu2;~Zeqiu_Wu1;~Valentina_Pyatkin1;~Nathan_Lambert1;~Noah_A._Smith2;~Yejin_Choi1;~Hannaneh_Hajishirzi1", "gender": ";M;M;F;;M;;F;F", "homepage": "https://hamishivi.github.io;https://yizhong-wang.com;https://github.com/liujch1998;http://ellenmellon.github.io/;;https://natolambert.com;;https://yejinc.github.io/;https://homes.cs.washington.edu/~hannaneh/", "dblp": "288/1956;79/3601;289/6273;188/5861;;228/9584.html;;89/579-1;52/1296", "google_scholar": ";y5zpqdAAAAAJ;GJfoBZAAAAAJ;Ug_hZoYAAAAJ;;O4jW7BsAAAAJ;;vhP-tlcAAAAJ;LOV6_WIAAAAJ", "orcid": "0000-0002-0069-7659;;0000-0003-3308-2869;;;0000-0002-9997-6817;;;", "linkedin": ";;liujch1998/;;;nathan-lambert-55093468/;;;", "or_profile": "~Hamish_Ivison1;~Yizhong_Wang2;~Jiacheng_Liu2;~Zeqiu_Wu1;~Valentina_Pyatkin1;~Nathan_Lambert1;~Noah_A._Smith2;~Yejin_Choi1;~Hannaneh_Hajishirzi1", "aff": "University of Washington;Department of Computer Science, University of Washington;Meta Facebook;University of Washington, Seattle;;Allen Institute for Artificial Intelligence;;Department of Computer Science, University of Washington;University of Washington", "aff_domain": "uw.edu;cs.washington.edu;meta.com;uw.edu;;allenai.org;;cs.washington.edu;uw.edu", "position": "PhD student;PhD student;Intern;PhD student;;Researcher;;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nivison2024unpacking,\ntitle={Unpacking {DPO} and {PPO}: Disentangling Best Practices for Learning from Preference Feedback},\nauthor={Hamish Ivison and Yizhong Wang and Jiacheng Liu and Zeqiu Wu and Valentina Pyatkin and Nathan Lambert and Noah A. Smith and Yejin Choi and Hannaneh Hajishirzi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=JMBWTlazjW}\n}", "github": "", "reviewers": "P8L3;wF89;WxZd;mnCL", "pdf_size": 840732, "rating": "4;5;5;6", "confidence": "4;4;4;3", "soundness": "2;3;3;2", "novelty": "2;2;3;3", "presentation": "4;4;3;3", "wc_summary": "144;77;51;37", "wc_strengths": "97;69;91;47", "wc_weaknesses": "491;36;84;118", "wc_questions": "42;17;441;78", "wc_limitations": "17;6;1;1", "wc_review": "791;205;668;281", "wc_reply_reviewers": "253;11;11;20", "wc_reply_authors": "303;0;40;0", "reply_reviewers": "2;1;1;1", "reply_authors": "2;1;2;1", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 77.25, 41.12405014100629 ], "wc_strengths_avg": [ 76.0, 19.72308292331602 ], "wc_weaknesses_avg": [ 182.25, 180.62166951946824 ], "wc_questions_avg": [ 144.5, 172.55216602523424 ], "wc_limitations_avg": [ 6.25, 6.53356717268599 ], "wc_review_avg": [ 486.25, 248.5632464786377 ], "wc_reply_reviewers_avg": [ 73.75, 103.5552388824438 ], "wc_reply_authors_avg": [ 85.75, 126.48789467771215 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 45, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1218964789365706425&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "uw.edu;cs.washington.edu;meta.com;uw.edu;;allenai.org;;cs.washington.edu;uw.edu", "author_num": 9, "aff_unique_index": "0;0;1;0;2;0;0", "aff_unique_norm": "University of Washington;Meta;Allen Institute for Artificial Intelligence", "aff_unique_dep": ";Meta Platforms, Inc.;", "aff_unique_url": "https://www.washington.edu;https://meta.com;https://allenai.org", "aff_unique_abbr": "UW;Meta;AI2", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Seattle", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "RA-PbRL: Provably Efficient Risk-Aware Preference-Based Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95716", "id": "JNDcFOczOf", "proceeding": "", "pdf": "https://openreview.net/pdf?id=JNDcFOczOf", "openreview": "https://openreview.net/forum?id=JNDcFOczOf", "poster": "/media/PosterPDFs/NeurIPS%202024/95716.png?t=1733900058.8519156", "project": "", "author_site": "Yujie Zhao, Jose Aguilar Escamilla, Weyl Lu, Huazheng Wang", "tldr": "", "abstract": "Reinforcement Learning from Human Feedback (RLHF) has recently surged in popularity, particularly for aligning large language models and other AI systems with human intentions. At its core, RLHF can be viewed as a specialized instance of Preference-based Reinforcement Learning (PbRL), where the preferences specifically originate from human judgments rather than arbitrary evaluators. Despite this connection, most existing approaches in both RLHF and PbRL primarily focus on optimizing a mean reward objective, neglecting scenarios that necessitate risk-awareness, such as AI safety, healthcare, and autonomous driving. These scenarios often operate under a one-episode-reward setting, which makes conventional risk-sensitive objectives inapplicable. To address this, we explore and prove the applicability of two risk-aware objectives to PbRL : nested and static quantile risk objectives. We also introduce Risk-AwarePbRL (RA-PbRL), an algorithm designed to optimize both nested and static objectives. Additionally, we provide a theoretical analysis of the regret upper bounds, demonstrating that they are sublinear with respect to the number of episodes, and present empirical results to support our findings. Our code is available in https://github.com/aguilarjose11/PbRLNeurips.", "keywords": "Reinforcement Learning Human Feedback;Risk-aware Reinforcement Learning", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Yujie Zhao;Jose Efraim Aguilar Escamilla;Weyl Lu;Huazheng Wang", "authorids": "~Yujie_Zhao2;~Jose_Efraim_Aguilar_Escamilla1;~Weyl_Lu1;~Huazheng_Wang1", "gender": "F;M;;", "homepage": "https://norahyujiezhao.github.io/;http://www.aguilar.phd;;https://huazhengwang.github.io/", "dblp": ";;;163/2233", "google_scholar": "Knx9mj0AAAAJ;;;w3PrbKwAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Yujie_Zhao2;~Jose_Efraim_Aguilar_Escamilla1;~Weyl_Lu1;~Huazheng_Wang1", "aff": "University of Science and Technology of China;Oregon State University;;Oregon State University", "aff_domain": "ustc.edu.cn;oregonstate.edu;;oregonstate.edu", "position": "Undergrad student;PhD student;;Assistant Professor", "bibtex": "@inproceedings{\nzhao2024rapbrl,\ntitle={{RA}-Pb{RL}: Provably Efficient Risk-Aware Preference-Based Reinforcement Learning},\nauthor={Yujie Zhao and Jose Efraim Aguilar Escamilla and Weyl Lu and Huazheng Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=JNDcFOczOf}\n}", "github": "", "reviewers": "LP4v;QEr2;TVSN;EoFm", "pdf_size": 1921989, "rating": "4;5;6;7", "confidence": "3;2;4;2", "soundness": "3;3;2;3", "novelty": "2;3;3;2", "presentation": "2;3;3;2", "wc_summary": "87;24;58;75", "wc_strengths": "95;49;63;34", "wc_weaknesses": "193;84;238;65", "wc_questions": "5;2;467;39", "wc_limitations": "5;1;19;6", "wc_review": "385;160;845;219", "wc_reply_reviewers": "12;22;18;28", "wc_reply_authors": "27;59;60;16", "reply_reviewers": "1;1;1;1", "reply_authors": "2;3;3;2", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 61.0, 23.717082451262844 ], "wc_strengths_avg": [ 60.25, 22.531921799970814 ], "wc_weaknesses_avg": [ 145.0, 72.58443359288546 ], "wc_questions_avg": [ 128.25, 196.11651511282776 ], "wc_limitations_avg": [ 7.75, 6.7592529172978875 ], "wc_review_avg": [ 402.25, 268.603215729075 ], "wc_reply_reviewers_avg": [ 20.0, 5.830951894845301 ], "wc_reply_authors_avg": [ 40.5, 19.397164741270824 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.1348399724926484, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8713029713277719121&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "ustc.edu.cn;oregonstate.edu;;oregonstate.edu", "author_num": 4, "aff_unique_index": "0;1;1", "aff_unique_norm": "University of Science and Technology of China;Oregon State University", "aff_unique_dep": ";", "aff_unique_url": "http://www.ustc.edu.cn;https://oregonstate.edu", "aff_unique_abbr": "USTC;OSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "China;United States" }, { "title": "ShiftAddLLM: Accelerating Pretrained LLMs via Post-Training Multiplication-Less Reparameterization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95715", "id": "JNl6h3U3oW", "proceeding": "", "pdf": "https://openreview.net/pdf?id=JNl6h3U3oW", "openreview": "https://openreview.net/forum?id=JNl6h3U3oW", "poster": "", "project": "", "author_site": "Haoran You, Yipin Guo, Yichao Fu, Wei Zhou, Huihong Shi, Xiaofan Zhang, Souvik Kundu, Amir Yazdanbakhsh, Yingyan (Celine) Lin", "tldr": "", "abstract": "Large language models (LLMs) have shown impressive performance on language tasks but face challenges when deployed on resource-constrained devices due to their extensive parameters and reliance on dense multiplications, resulting in high memory demands and latency bottlenecks. Shift-and-add reparameterization offers a promising solution by replacing costly multiplications with hardware-friendly primitives in both the attention and multi-layer perceptron (MLP) layers of an LLM. However, current reparameterization techniques require training from scratch or full parameter fine-tuning to restore accuracy, which is resource-intensive for LLMs. To address this, we propose accelerating pretrained LLMs through post-training shift-and-add reparameterization, creating efficient multiplication-free models, dubbed ShiftAddLLM. Specifically, we quantize each weight matrix into binary matrices paired with group-wise scaling factors. The associated multiplications are reparameterized into (1) shifts between activations and scaling factors and (2) queries and adds according to the binary matrices. To reduce accuracy loss, we present a multi-objective optimization method to minimize both weight and output activation reparameterization errors. Additionally, based on varying sensitivity across layers to reparameterization, we develop an automated bit allocation strategy to further reduce memory usage and latency. Experiments on five LLM families and eight tasks consistently validate the effectiveness of ShiftAddLLM, achieving average perplexity reductions of 5.6 and 22.7 points at comparable or lower latency compared to the most competitive quantized LLMs at 3- and 2-bit precision, respectively, and more than 80% memory and energy reductions over the original LLMs. Codes and models are available at https://github.com/GATECH-EIC/ShiftAddLLM.", "keywords": "Large Language Models (LLMs); Efficient LLMs; Multiplication-less networks; Hardware acceleration", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Haoran You;Yipin Guo;Yichao Fu;Wei Zhou;Huihong Shi;Xiaofan Zhang;Souvik Kundu;Amir Yazdanbakhsh;Yingyan Celine Lin", "authorids": "~Haoran_You1;~Yipin_Guo1;~Yichao_Fu1;~Wei_Zhou25;~Huihong_Shi1;~Xiaofan_Zhang1;~Souvik_Kundu2;~Amir_Yazdanbakhsh1;~Yingyan_Celine_Lin1", "gender": "M;M;;M;F;M;M;M;", "homepage": "http://haoranyou.com/;https://github.com/coco-alen?tab=repositories;;;https://shihuihong214.github.io/huihong.shi/;https://www.xiaofanzhang.net/;https://ksouvik52.github.io;https://www.ayazdan.com/;", "dblp": "230/4247;302/4321.html;;;253/3178;28/9804-1.html;126/2210;44/8745;", "google_scholar": "z5Eku1sAAAAJ;;;;https://scholar.google.com/citations?hl=en;gG24R6MAAAAJ;https://scholar.google.com/citations?hl=en;Vdu_sqwAAAAJ;", "orcid": "0000-0002-2873-2153;;;;0000-0002-7845-0154;;0000-0002-3533-9405;0000-0001-8199-7671;", "linkedin": "haoran-you-b4b958165/;;;wei-zhou3/;;;souvik-kundu-64922b50/;ayazdanb/;", "or_profile": "~Haoran_You1;~Yipin_Guo1;~Yichao_Fu1;~Wei_Zhou25;~Huihong_Shi1;~Xiaofan_Zhang1;~Souvik_Kundu2;~Amir_Yazdanbakhsh1;~Yingyan_Celine_Lin1", "aff": "Georgia Institute of Technology;Zhejiang University;;Georgia Institute of Technology;Nanjing University;Google;Intel;Google DeepMind;", "aff_domain": "gatech.edu;zju.edu.cn;;gatech.edu;nju.edu.cn;google.com;intel.com;google.com;", "position": "PhD student;MS student;;Undergrad student;PhD student;Researcher;Researcher;Researcher;", "bibtex": "@inproceedings{\nyou2024shiftaddllm,\ntitle={ShiftAdd{LLM}: Accelerating Pretrained {LLM}s via Post-Training Multiplication-Less Reparameterization},\nauthor={Haoran You and Yipin Guo and Yichao Fu and Wei Zhou and Huihong Shi and Xiaofan Zhang and Souvik Kundu and Amir Yazdanbakhsh and Yingyan Celine Lin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=JNl6h3U3oW}\n}", "github": "", "reviewers": "7Vce;4dkb;3fe7", "pdf_size": 845727, "rating": "5;7;7", "confidence": "4;4;5", "soundness": "2;3;3", "novelty": "2;3;3", "presentation": "2;3;3", "wc_summary": "78;212;54", "wc_strengths": "78;377;25", "wc_weaknesses": "295;163;552", "wc_questions": "4;1;107", "wc_limitations": "58;1;55", "wc_review": "513;754;793", "wc_reply_reviewers": "675;9;50", "wc_reply_authors": "1453;42;298", "reply_reviewers": "3;1;1", "reply_authors": "4;2;2", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 114.66666666666667, 69.51898221988645 ], "wc_strengths_avg": [ 160.0, 154.96020994651067 ], "wc_weaknesses_avg": [ 336.6666666666667, 161.51848879376695 ], "wc_questions_avg": [ 37.333333333333336, 49.27699485786671 ], "wc_limitations_avg": [ 38.0, 26.19160170741759 ], "wc_review_avg": [ 686.6666666666666, 123.82873477329709 ], "wc_reply_reviewers_avg": [ 244.66666666666666, 304.7516292910598 ], "wc_reply_authors_avg": [ 597.6666666666666, 613.7753841774874 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.9428090415820634 ], "reply_authors_avg": [ 2.6666666666666665, 0.9428090415820634 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3024385457534052466&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "email": "gatech.edu;zju.edu.cn;;gatech.edu;nju.edu.cn;google.com;intel.com;google.com;", "author_num": 9, "aff_unique_index": "0;1;0;2;3;4;3", "aff_unique_norm": "Georgia Institute of Technology;Zhejiang University;Nanjing University;Google;Intel", "aff_unique_dep": ";;;Google;Intel Corporation", "aff_unique_url": "https://www.gatech.edu;https://www.zju.edu.cn;https://www.nju.edu.cn;https://www.google.com;https://www.intel.com", "aff_unique_abbr": "Georgia Tech;ZJU;Nanjing U;Google;Intel", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;0;1;0;0;2", "aff_country_unique": "United States;China;United Kingdom" }, { "title": "Binding in hippocampal-entorhinal circuits enables compositionality in cognitive maps", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95714", "id": "JO6T4rEJ32", "proceeding": "", "pdf": "https://openreview.net/pdf?id=JO6T4rEJ32", "openreview": "https://openreview.net/forum?id=JO6T4rEJ32", "poster": "", "project": "", "author_site": "Christopher Kymn, Sonia Mazelet, Anthony Thomas, Denis Kleyko, Edward Frady, Fritz Sommer, Bruno Olshausen", "tldr": "", "abstract": "We propose a normative model for spatial representation in the hippocampal formation that combines optimality principles, such as maximizing coding range and spatial information per neuron, with an algebraic framework for computing in distributed representation. Spatial position is encoded in a residue number system, with individual residues represented by high-dimensional, complex-valued vectors. These are composed into a single vector representing position by a similarity-preserving, conjunctive vector-binding operation. Self-consistency between the vectors representing position and the individual residues is enforced by a modular attractor network whose modules correspond to the grid cell modules in entorhinal cortex. The vector binding operation can also be used to bind different contexts to spatial representations, yielding a model for entorhinal cortex and hippocampus. We provide model analysis of scaling, similarity preservation and convergence behavior as well as experiments demonstrating noise robustness, sub-integer resolution in representing position, and path integration. The model formalizes the computations in the cognitive map and makes testable experimental predictions.", "keywords": "cognitive maps;compositionality;hippocampus;entorhinal cortex", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "/attachment/8d4c9072dc9f238f7059f1e77cf5cafc0a5b3ca7.zip", "author": "Christopher Kymn;Sonia Mazelet;Anthony Hitchcock Thomas;Denis Kleyko;Edward Paxon Frady;Friedrich Sommer;Bruno Olshausen", "authorids": "~Christopher_Kymn1;~Sonia_Mazelet1;~Anthony_Hitchcock_Thomas1;~Denis_Kleyko1;~Edward_Paxon_Frady1;~Friedrich_Sommer1;~Bruno_Olshausen1", "gender": ";F;M;;M;M;M", "homepage": ";;;https://www.oru.se/english/employee/denis_kleyko;https://epaxon.github.io;https://www.rctn.org/wiki/Fritz_Sommer;http://redwood.berkeley.edu/bruno/", "dblp": "301/8016.html;;128/1625;;;;30/3869", "google_scholar": "-tUVfeIAAAAJ;;vUdhSFcAAAAJ;6OPdyrkAAAAJ;;lA-oLkgAAAAJ;4aqK_74AAAAJ", "orcid": ";;;;;;", "linkedin": ";sonia-mazelet-0a4679225;anthony-thomas-7a85bb67/;;;;", "or_profile": "~Christopher_Kymn1;~Sonia_Mazelet1;~Anthony_Hitchcock_Thomas1;~Denis_Kleyko1;~Edward_Paxon_Frady1;~Friedrich_Sommer1;~Bruno_Olshausen1", "aff": "University of California, Berkeley;University of California, Berkeley;;\u00d6rebro University;Intel;University of California, Berkeley;UC Berkeley", "aff_domain": "berkeley.edu;berkeley.edu;;oru.se;intel.com;berkeley.edu;", "position": "PhD student;Intern;;Assistant Professor;Principal Researcher;Full Professor;Full Professor", "bibtex": "@inproceedings{\nkymn2024binding,\ntitle={Binding in hippocampal-entorhinal circuits enables compositionality in cognitive maps},\nauthor={Christopher Kymn and Sonia Mazelet and Anthony Hitchcock Thomas and Denis Kleyko and Edward Paxon Frady and Friedrich Sommer and Bruno Olshausen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=JO6T4rEJ32}\n}", "github": "", "reviewers": "ide5;aqeG;BzEm;METM", "pdf_size": 5180352, "rating": "5;7;7;7", "confidence": "2;2;2;3", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "316;54;80;93", "wc_strengths": "23;33;54;81", "wc_weaknesses": "149;48;19;34", "wc_questions": "480;42;89;104", "wc_limitations": "18;31;7;72", "wc_review": "986;208;249;384", "wc_reply_reviewers": "84;0;0;16", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 2.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 135.75, 105.01041615001819 ], "wc_strengths_avg": [ 47.75, 22.219079638904937 ], "wc_weaknesses_avg": [ 62.5, 50.98284025042151 ], "wc_questions_avg": [ 178.75, 175.42430703867694 ], "wc_limitations_avg": [ 32.0, 24.606909598728564 ], "wc_review_avg": [ 456.75, 312.42389073180686 ], "wc_reply_reviewers_avg": [ 25.0, 34.68429039204925 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3011815788635498843&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 7, "email": "berkeley.edu;berkeley.edu;;oru.se;intel.com;berkeley.edu;", "author_num": 7, "aff_unique_index": "0;0;1;2;0;0", "aff_unique_norm": "University of California, Berkeley;\u00d6rebro University;Intel", "aff_unique_dep": ";;Intel Corporation", "aff_unique_url": "https://www.berkeley.edu;https://www.oru.se;https://www.intel.com", "aff_unique_abbr": "UC Berkeley;\u00d6rebro U;Intel", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "United States;Sweden" }, { "title": "Visual Perception by Large Language Model\u2019s Weights", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95713", "id": "JPtobPtxKT", "proceeding": "", "pdf": "https://openreview.net/pdf?id=JPtobPtxKT", "openreview": "https://openreview.net/forum?id=JPtobPtxKT", "poster": "/media/PosterPDFs/NeurIPS%202024/95713.png?t=1732963852.409149", "project": "", "author_site": "Feipeng Ma, Hongwei Xue, Yizhou Zhou, Guangting Wang, Fengyun Rao, Shilin Yan, Yueyi Zhang, Siying Wu, Mike Zheng Shou, Xiaoyan Sun", "tldr": "", "abstract": "Existing Multimodal Large Language Models (MLLMs) follow the paradigm that perceives visual information by aligning visual features with the input space of Large Language Models (LLMs) and concatenating visual tokens with text tokens to form a unified sequence input for LLMs. These methods demonstrate promising results on various vision-language tasks but are limited by the high computational effort due to the extended input sequence resulting from the involvement of visual tokens. In this paper, instead of input space alignment, we propose a novel parameter space alignment paradigm that represents visual information as model weights. For each input image, we use a vision encoder to extract visual features, convert features into perceptual weights, and merge the perceptual weights with LLM's weights. In this way, the input of LLM does not require visual tokens, which reduces the length of the input sequence and greatly improves efficiency. Following this paradigm, we propose VLoRA with the perceptual weights generator. The perceptual weights generator is designed to convert visual features to perceptual weights with low-rank property, exhibiting a form similar to LoRA. The experimental results show that our VLoRA achieves comparable performance on various benchmarks for MLLMs, while significantly reducing the computational costs for both training and inference. Code and models are released at \\url{https://github.com/FeipengMa6/VLoRA}.", "keywords": "Multi-modal large language model;vision language pre-training;multi-modal learning", "primary_area": "generative_models", "supplementary_material": "", "author": "Feipeng Ma;Hongwei Xue;Yizhou Zhou;Guangting Wang;Fengyun Rao;Shilin Yan;Yueyi Zhang;Siying Wu;Mike Zheng Shou;Xiaoyan Sun", "authorids": "~Feipeng_Ma1;~Hongwei_Xue1;~Yizhou_Zhou1;~Guangting_Wang1;~Fengyun_Rao2;~Shilin_Yan1;~Yueyi_Zhang2;~Siying_Wu1;~Mike_Zheng_Shou1;~Xiaoyan_Sun1", "gender": "M;;M;M;M;M;;;;F", "homepage": "https://github.com/FeipengMa6;https://hellwayxue.github.io/;;http://home.ustc.edu.cn/~flylight;;https://scholar.google.com/citations?user=2VhjOykAAAAJ&hl=en;;;;", "dblp": ";272/6488;213/4083;227/7195;249/9074;166/3197.html;;;;13/1574-1.html", "google_scholar": "ZDxabCwAAAAJ;k5CJa5YAAAAJ;dHBNmSkAAAAJ;cKY8e8sAAAAJ;https://scholar.google.com.hk/citations?user=38dACd4AAAAJ;2VhjOykAAAAJ;LatWlFAAAAAJ;;;https://scholar.google.com/citations?hl=zh-CN", "orcid": "0009-0006-4535-7590;;;;;;;;;", "linkedin": ";;;;;;;;;", "or_profile": "~Feipeng_Ma1;~Hongwei_Xue1;~Yizhou_Zhou1;~Guangting_Wang1;~Fengyun_Rao2;~Shilin_Yan1;~Yueyi_Zhang2;~Siying_Wu1;~Mike_Zheng_Shou1;~Xiaoyan_Sun1", "aff": "University of Science and Technology of China;University of Science and Technology of China;WeChat AI;Tencent;WeChat Vision, Tencent Inc.;Fudan University;University of Science and Technology of China;;;University of Science and Technology of China", "aff_domain": "ustc.edu.cn;ustc.edu.cn;tencent.com;tencent.com;tencent.com;fudan.edu.cn;ustc.edu.cn;;;ustc.edu.cn", "position": "PhD student;PhD student;Researcher;Engineer;Researcher;MS student;Associate Researcher;;;Full Professor", "bibtex": "@inproceedings{\nma2024visual,\ntitle={Visual Perception by Large Language Model{\\textquoteright}s Weights},\nauthor={Feipeng Ma and Hongwei Xue and Yizhou Zhou and Guangting Wang and Fengyun Rao and Shilin Yan and Yueyi Zhang and Siying Wu and Mike Zheng Shou and Xiaoyan Sun},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=JPtobPtxKT}\n}", "github": "", "reviewers": "s5rD;cXpd;MX94", "pdf_size": 1166828, "rating": "5;5;6", "confidence": "5;4;5", "soundness": "2;4;3", "novelty": "2;2;3", "presentation": "3;4;3", "wc_summary": "45;81;59", "wc_strengths": "64;23;77", "wc_weaknesses": "202;186;130", "wc_questions": "81;2;42", "wc_limitations": "22;1;36", "wc_review": "414;293;344", "wc_reply_reviewers": "0;27;62", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;1;1", "reply_authors": "1;1;1", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 61.666666666666664, 14.817407180595247 ], "wc_strengths_avg": [ 54.666666666666664, 23.01207412545761 ], "wc_weaknesses_avg": [ 172.66666666666666, 30.868898407440604 ], "wc_questions_avg": [ 41.666666666666664, 32.25247621845836 ], "wc_limitations_avg": [ 19.666666666666668, 14.38363267359428 ], "wc_review_avg": [ 350.3333333333333, 49.600627236177395 ], "wc_reply_reviewers_avg": [ 29.666666666666668, 25.381533094401966 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11205270691902478774&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 4, "email": "ustc.edu.cn;ustc.edu.cn;tencent.com;tencent.com;tencent.com;fudan.edu.cn;ustc.edu.cn;;;ustc.edu.cn", "author_num": 10, "aff_unique_index": "0;0;1;2;2;3;0;0", "aff_unique_norm": "University of Science and Technology of China;WeChat;Tencent;Fudan University", "aff_unique_dep": ";WeChat AI;Tencent Holdings Limited;", "aff_unique_url": "http://www.ustc.edu.cn;https://www.wechat.com;https://www.tencent.com;https://www.fudan.edu.cn", "aff_unique_abbr": "USTC;WeChat AI;Tencent;Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Hints-In-Browser: Benchmarking Language Models for Programming Feedback Generation", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97759", "id": "JRMSC08gSF", "proceeding": "", "pdf": "https://openreview.net/pdf?id=JRMSC08gSF", "openreview": "https://openreview.net/forum?id=JRMSC08gSF", "poster": "/media/PosterPDFs/NeurIPS%202024/97759.png?t=1733410184.742139", "project": "", "author_site": "Nachiket Kotalwar, Alkis Gotovos, Adish Singla", "tldr": "", "abstract": "Generative AI and large language models hold great promise in enhancing programming education by generating individualized feedback and hints for learners. Recent works have primarily focused on improving the quality of generated feedback to achieve human tutors' quality. While quality is an important performance criterion, it is not the only criterion to optimize for real-world educational deployments. In this paper, we benchmark language models for programming feedback generation across several performance criteria, including quality, cost, time, and data privacy. The key idea is to leverage recent advances in the new paradigm of in-browser inference that allow running these models directly in the browser, thereby providing direct benefits across cost and data privacy. To boost the feedback quality of small models compatible with in-browser inference engines, we develop a fine-tuning pipeline based on GPT-4 generated synthetic data. We showcase the efficacy of fine-tuned Llama3-8B and Phi3-3.8B 4-bit quantized models using WebLLM's in-browser inference engine on three different Python programming datasets. We will release the full implementation along with a web app and datasets to facilitate further research on in-browser language models.", "keywords": "generative AI;language models;in-browser model inference;WebLLM;programming feedback", "primary_area": "", "supplementary_material": "/attachment/6a86dab2ce04d46c7d35ca817578c4c9c430d20f.pdf", "author": "Nachiket Kotalwar;Alkis Gotovos;Adish Singla", "authorids": "~Nachiket_Kotalwar1;~Alkis_Gotovos1;~Adish_Singla2", "gender": "M;M;", "homepage": "https://nachiketdk.github.io/;http://people.csail.mit.edu/alkisg/;https://machineteaching.mpi-sws.org/adishsingla.html", "dblp": "380/3774.html;81/10871;58/657", "google_scholar": "8ZbAeL8AAAAJ;UJ9-UuIAAAAJ;kXz2seUAAAAJ", "orcid": "0009-0003-4319-014X;;", "linkedin": "nachiket-kotalwar/;;", "or_profile": "~Nachiket_Kotalwar1;~Alkis_Gotovos1;~Adish_Kumar_Singla1", "aff": "BITS Pilani, Birla Institute of Technology and Science;MPI-SWS;Max Planck Institute for Software Systems (MPI-SWS)", "aff_domain": "pilani.bits-pilani.ac.in;mpi-sws.org;mpi-sws.org", "position": "Undergrad student;Postdoc;Researcher", "bibtex": "@inproceedings{\nkotalwar2024hintsinbrowser,\ntitle={Hints-In-Browser: Benchmarking Language Models for Programming Feedback Generation},\nauthor={Nachiket Kotalwar and Alkis Gotovos and Adish Singla},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=JRMSC08gSF}\n}", "github": "", "reviewers": "Hsuw;5xx1;AZYp;7NvE", "pdf_size": 1863594, "rating": "6;6;7;9", "confidence": "3;4;3;4", "wc_summary_and_contributions": "50;63;114;141", "wc_strengths": "49;77;90;60", "wc_improvement": "32;77;69;93", "wc_limitations": "4;161;49;1", "wc_correctness": "40;80;28;130", "wc_clarity": "7;69;26;90", "wc_relation_to_prior_work": "35;73;29;5", "wc_documentation": "35;24;33;19", "wc_additional_feedback": "1;1;1;1", "wc_review": "253;625;439;540", "wc_reply_reviewers": "26;24;0;16", "wc_reply_authors": "38;79;38;38", "reply_reviewers": "1;1;0;1", "reply_authors": "2;6;2;3", "rating_avg": [ 7.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "wc_summary_and_contributions_avg": [ 92.0, 37.04726710568541 ], "wc_strengths_avg": [ 69.0, 15.700318468107582 ], "wc_improvement_avg": [ 67.75, 22.37604746151563 ], "wc_limitations_avg": [ 53.75, 64.77412677913921 ], "wc_correctness_avg": [ 69.5, 39.88420740092499 ], "wc_clarity_avg": [ 48.0, 33.05298776207682 ], "wc_relation_to_prior_work_avg": [ 35.5, 24.387496796514398 ], "wc_documentation_avg": [ 27.75, 6.53356717268599 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 464.25, 138.60262443402723 ], "wc_reply_reviewers_avg": [ 16.5, 10.23474474522936 ], "wc_reply_authors_avg": [ 48.25, 17.75352077758099 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.25, 1.6393596310755 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.40824829046386296, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11150930257187252793&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "pilani.bits-pilani.ac.in;mpi-sws.org;mpi-sws.org", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Birla Institute of Technology and Science;Max Planck Institute for Software Systems", "aff_unique_dep": ";", "aff_unique_url": "https://www.bits-pilani.ac.in;https://www.mpi-sws.org", "aff_unique_abbr": "BITS Pilani;MPI-SWS", "aff_campus_unique_index": "0", "aff_campus_unique": "Pilani;", "aff_country_unique_index": "0;1;1", "aff_country_unique": "India;Germany" }, { "title": "A Foundation Model for Zero-shot Logical Query Reasoning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95712", "id": "JRSyMBBJi6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=JRSyMBBJi6", "openreview": "https://openreview.net/forum?id=JRSyMBBJi6", "poster": "", "project": "", "author_site": "Michael Galkin, Jincheng Zhou, Bruno Ribeiro, Jian Tang, Zhaocheng Zhu", "tldr": "", "abstract": "Complex logical query answering (CLQA) in knowledge graphs (KGs) goes beyond simple KG completion and aims at answering compositional queries comprised of multiple projections and logical operations. Existing CLQA methods that learn parameters bound to certain entity or relation vocabularies can only be applied to the graph they are trained on which requires substantial training time before being deployed on a new graph. Here we present UltraQuery, the first foundation model for inductive reasoning that can zero-shot answer logical queries on any KG. The core idea of UltraQuery is to derive both projections and logical operations as vocabulary-independent functions which generalize to new entities and relations in any KG.\nWith the projection operation initialized from a pre-trained inductive KG completion model, UltraQuery can solve CLQA on any KG after finetuning on a single dataset. Experimenting on 23 datasets, UltraQuery in the zero-shot inference mode shows competitive or better query answering performance than best available baselines and sets a new state of the art on 15 of them.", "keywords": "graph neural networks;knowledge graph;knowledge graph reasoning;complex logical query answering;query reasoning", "primary_area": "graph_neural_networks", "supplementary_material": "/attachment/46f2a3e430234bad3daca63d451808a810143f87.zip", "author": "Mikhail Galkin;Jincheng Zhou;Bruno Ribeiro;Jian Tang;Zhaocheng Zhu", "authorids": "~Mikhail_Galkin1;~Jincheng_Zhou1;~Bruno_Ribeiro1;~Jian_Tang1;~Zhaocheng_Zhu1", "gender": "M;M;M;;M", "homepage": "https://migalkin.github.io/;https://www.jinchengzhou.net/;https://www.cs.purdue.edu/homes/ribeirob/;http://www.jian-tang.com;https://kiddozhu.github.io/", "dblp": "160/8154;;15/606;181/2667-5;195/0435", "google_scholar": "yfYRbG4AAAAJ;https://scholar.google.com/citations?hl=en;KIEleCsAAAAJ;https://scholar.google.ca/citations?user=1ir6WUEAAAAJ;Qd8JumkAAAAJ", "orcid": ";0000-0003-2328-6614;0000-0002-3527-6192;;", "linkedin": ";jincheng-zhou-042096176/;;;", "or_profile": "~Mikhail_Galkin1;~Jincheng_Zhou1;~Bruno_Ribeiro1;~Jian_Tang1;~Zhaocheng_Zhu1", "aff": "Intel;Purdue University;Stanford University;Mila, HEC Montreal;Universit\u00e9 de Montr\u00e9al", "aff_domain": "intel.com;purdue.edu;stanford.edu;hec.ca;mila.quebec", "position": "Researcher;PhD student;Visiting Associate Professor;Assistant Professor;PhD student", "bibtex": "@inproceedings{\ngalkin2024a,\ntitle={A Foundation Model for Zero-shot Logical Query Reasoning},\nauthor={Mikhail Galkin and Jincheng Zhou and Bruno Ribeiro and Jian Tang and Zhaocheng Zhu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=JRSyMBBJi6}\n}", "github": "", "reviewers": "vHv9;5vKU;TgMu;rSrG", "pdf_size": 862811, "rating": "5;6;7;7", "confidence": "4;3;4;4", "soundness": "2;4;3;4", "novelty": "2;4;4;3", "presentation": "3;4;3;3", "wc_summary": "55;124;70;90", "wc_strengths": "89;116;63;59", "wc_weaknesses": "92;65;40;96", "wc_questions": "34;10;162;6", "wc_limitations": "5;30;1;1", "wc_review": "275;345;336;252", "wc_reply_reviewers": "0;0;10;105", "wc_reply_authors": "40;44;0;0", "reply_reviewers": "0;0;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 84.75, 25.839649765428323 ], "wc_strengths_avg": [ 81.75, 22.884219453588535 ], "wc_weaknesses_avg": [ 73.25, 22.598395960775623 ], "wc_questions_avg": [ 53.0, 63.835726674018524 ], "wc_limitations_avg": [ 9.25, 12.090802289343747 ], "wc_review_avg": [ 302.0, 39.477841886303764 ], "wc_reply_reviewers_avg": [ 28.75, 44.21184795956849 ], "wc_reply_authors_avg": [ 21.0, 21.047565179849187 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1532024298767915589&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "intel.com;purdue.edu;stanford.edu;hec.ca;mila.quebec", "author_num": 5, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "Intel;Purdue University;Stanford University;HEC Montreal;Universit\u00e9 de Montr\u00e9al", "aff_unique_dep": "Intel Corporation;;;HEC Business School;", "aff_unique_url": "https://www.intel.com;https://www.purdue.edu;https://www.stanford.edu;https://www.hec.ca;https://www.umontreal.ca", "aff_unique_abbr": "Intel;Purdue;Stanford;HEC;UdeM", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Stanford;Montreal", "aff_country_unique_index": "0;0;0;1;1", "aff_country_unique": "United States;Canada" }, { "title": "MOTIVE: A Drug-Target Interaction Graph For Inductive Link Prediction", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97758", "id": "JU0QvhhfVp", "proceeding": "", "pdf": "https://openreview.net/pdf?id=JU0QvhhfVp", "openreview": "https://openreview.net/forum?id=JU0QvhhfVp", "poster": "/media/PosterPDFs/NeurIPS%202024/97758.png?t=1731386301.927901", "project": "", "author_site": "John Arevalo, Ellen Su, Anne Carpenter, Shantanu Singh", "tldr": "", "abstract": "Drug-target interaction (DTI) prediction is crucial for identifying new therapeutics and detecting mechanisms of action. While structure-based methods accurately model physical interactions between a drug and its protein target, cell-based assays such as Cell Painting can better capture complex DTI interactions. This paper introduces MOTIVE, a Morphological cOmpound Target Interaction Graph dataset comprising Cell Painting features for 11,000 genes and 3,600 compounds, along with their relationships extracted from seven publicly available databases. We provide random, cold-source (new drugs), and cold-target (new genes) data splits to enable rigorous evaluation under realistic use cases. Our benchmark results show that graph neural networks that use Cell Painting features consistently outperform those that learn from graph structure alone, feature-based models, and topological heuristics. MOTIVE accelerates both graph ML research and drug discovery by promoting the development of more reliable DTI prediction models. MOTIVE resources are available at https://github.com/carpenter-singh-lab/motive.", "keywords": "Drug-target interaction (DTI);Cell Painting;Link Prediction;Graph Neural Networks;Drug Discovery;Computer Vision", "primary_area": "", "supplementary_material": "/attachment/ff329964dbfc89e1648815769c46a24653c405db.pdf", "author": "John Arevalo;Ellen Su;Anne E Carpenter;Shantanu Singh", "authorids": "~John_Arevalo1;~Ellen_Su1;~Anne_E_Carpenter1;~Shantanu_Singh1", "gender": "M;F;F;Unspecified", "homepage": "https://scholar.google.com/citations?user=OTOILtIAAAAJ&hl=en;;;", "dblp": ";;84/7069;00/5284", "google_scholar": "OTOILtIAAAAJ;;;", "orcid": "0000-0002-1138-5036;0009-0007-8934-8642;0000-0003-1555-8261;", "linkedin": "~johnarevalo/;;;", "or_profile": "~John_Arevalo1;~Ellen_Su1;~Anne_E_Carpenter1;~Shantanu_Singh1", "aff": "Broad Institute of MIT and Harvard;Broad Institute;Broad Institute;", "aff_domain": "broadinstitute.org;broadinstitute.org;broadinstitute.org;", "position": "Postdoc;Researcher;Institute Scientist;", "bibtex": "@inproceedings{\narevalo2024motive,\ntitle={{MOTIVE}: A Drug-Target Interaction Graph For Inductive Link Prediction},\nauthor={John Arevalo and Ellen Su and Anne E Carpenter and Shantanu Singh},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=JU0QvhhfVp}\n}", "github": "", "reviewers": "AitS;QTzg;gY9F;NgGB", "pdf_size": 497102, "rating": "6;7;8;8", "confidence": "5;3;4;4", "wc_summary_and_contributions": "63;77;47;81", "wc_strengths": "5;57;30;77", "wc_improvement": "5;223;112;52", "wc_limitations": "5;1;17;5", "wc_correctness": "1;15;1;5", "wc_clarity": "1;17;1;7", "wc_relation_to_prior_work": "1;1;1;6", "wc_documentation": "1;11;1;5", "wc_additional_feedback": "1;1;1;1", "wc_review": "83;403;211;239", "wc_reply_reviewers": "29;23;0;0", "wc_reply_authors": "36;0;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "2;1;1;1", "rating_avg": [ 7.25, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 67.0, 13.341664064126334 ], "wc_strengths_avg": [ 42.25, 27.215574585152524 ], "wc_improvement_avg": [ 98.0, 81.52606945020715 ], "wc_limitations_avg": [ 7.0, 6.0 ], "wc_correctness_avg": [ 5.5, 5.722761571129799 ], "wc_clarity_avg": [ 6.5, 6.5383484153110105 ], "wc_relation_to_prior_work_avg": [ 2.25, 2.165063509461097 ], "wc_documentation_avg": [ 4.5, 4.092676385936225 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 234.0, 113.9254141971843 ], "wc_reply_reviewers_avg": [ 13.0, 13.171939872319491 ], "wc_reply_authors_avg": [ 9.0, 15.588457268119896 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.42640143271122083, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:VuIeYwFAgJ8J:scholar.google.com/&scioq=MOTIVE:+A+Drug-Target+Interaction+Graph+For+Inductive+Link+Prediction&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": "broadinstitute.org;broadinstitute.org;broadinstitute.org;", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Broad Institute", "aff_unique_dep": "", "aff_unique_url": "https://www.broadinstitute.org", "aff_unique_abbr": "Broad", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Meteor: Mamba-based Traversal of Rationale for Large Language and Vision Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95711", "id": "JVKABhr6mP", "proceeding": "", "pdf": "https://openreview.net/pdf?id=JVKABhr6mP", "openreview": "https://openreview.net/forum?id=JVKABhr6mP", "poster": "", "project": "", "author_site": "Byung-Kwan Lee, Chae Won Kim, Beomchan Park, Yong Man Ro", "tldr": "", "abstract": "The rapid development of large language and vision models (LLVMs) has been driven by advances in visual instruction tuning. Recently, open-source LLVMs have curated high-quality visual instruction tuning datasets and utilized additional vision encoders or multiple computer vision models in order to narrow the performance gap with powerful closed-source LLVMs. These advancements are attributed to multifaceted information required for diverse capabilities, including fundamental image understanding, real-world knowledge about common-sense and non-object concepts (e.g., charts, diagrams, symbols, signs, and math problems), and step-by-step procedures for solving complex questions. Drawing from the multifaceted information, we present a new efficient LLVM, Mamba-based traversal of rationales (Meteor), which leverages multifaceted rationale to enhance understanding and answering capabilities. To embed lengthy rationales containing abundant information, we employ the Mamba architecture, capable of processing sequential data with linear time complexity. We introduce a new concept of traversal of rationale that facilitates efficient embedding of rationale. Subsequently, the backbone multimodal language model (MLM) is trained to generate answers with the aid of rationale. Through these steps, Meteor achieves significant improvements in vision language performances across multiple evaluation benchmarks requiring diverse capabilities, without scaling up the model size or employing additional vision encoders and computer vision models.", "keywords": "Large Language and Vision Model", "primary_area": "other", "supplementary_material": "", "author": "Byung-Kwan Lee;Chae Won Kim;Beomchan Park;Yong Man Ro", "authorids": "~Byung-Kwan_Lee1;~Chae_Won_Kim1;~Beomchan_Park1;~Yong_Man_Ro3", "gender": "M;;;", "homepage": "https://sites.google.com/view/byungkwanlee;;;", "dblp": "68/55.html/;;369/3270;", "google_scholar": "https://scholar.google.co.kr/citations?hl=en;;;", "orcid": ";;;", "linkedin": "byung-kwan-lee-82333716a/;;beomchan-park-8a8766156;", "or_profile": "~Byung-Kwan_Lee1;~Chae_Won_Kim1;~Beomchan_Park1;~Yong_Man_Ro3", "aff": "KAIST;;KAIST;", "aff_domain": "kaist.ac.kr;;kaist.ac.kr;", "position": "PhD student;;PhD student;", "bibtex": "@inproceedings{\nlee2024meteor,\ntitle={Meteor: Mamba-based Traversal of Rationale for Large Language and Vision Models},\nauthor={Byung-Kwan Lee and Chae Won Kim and Beomchan Park and Yong Man Ro},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=JVKABhr6mP}\n}", "github": "", "reviewers": "zsWf;XZUR;iRmr;Hj2J", "pdf_size": 8428846, "rating": "5;5;6;7", "confidence": "5;4;4;5", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "54;74;68;27", "wc_strengths": "20;61;91;66", "wc_weaknesses": "98;237;151;82", "wc_questions": "3;3;5;9", "wc_limitations": "10;4;1;2", "wc_review": "185;379;316;186", "wc_reply_reviewers": "17;27;50;31", "wc_reply_authors": "0;0;67;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;2;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 55.75, 18.115946014492316 ], "wc_strengths_avg": [ 59.5, 25.480384612481814 ], "wc_weaknesses_avg": [ 142.0, 60.502066080424065 ], "wc_questions_avg": [ 5.0, 2.449489742783178 ], "wc_limitations_avg": [ 4.25, 3.491060010942235 ], "wc_review_avg": [ 266.5, 84.00744014669176 ], "wc_reply_reviewers_avg": [ 31.25, 11.96609794377432 ], "wc_reply_authors_avg": [ 16.75, 29.011851026778693 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=813571878899441999&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "kaist.ac.kr;;kaist.ac.kr;", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "id": "JVtwC9RzlI", "title": "TextGraphBART: Unifying Graph and Text with Structure Token", "track": "main", "status": "Reject", "tldr": "", "abstract": "We propose a novel encoding method called ``Structure Token'' to unify the processing and generation of both graphs and texts with a single transformer-based model. This method allows graphs with text labels to be generated by a series of tokens, enabling both graph and text data to be handled interchangeably. By utilizing structure tokens, our model learns a unified representation, enhancing the ability to process diverse data without requiring extra modules or models. Additionally, the model can be trained like most transformer models with simply cross-entropy loss. To demonstrate the effectiveness of our method, we introduce a pre-training scheme inspired by mBART but adapted to leverage structure tokens. Our model, named TextGraphBART, uses the same architecture as normal Transformer Encoder-Decoder models with small modifications on the input and output to accommodate structure tokens. The evaluations show that this approach achieves comparable results against baseline models of similar sizes on both text-to-graph and graph-to-text generation tasks, without needing specialized loss functions or sampling techniques. These findings suggest that our approach can effectively bridge the gap between textual and structural data representations, and the design of encoding method could offer a new direction for future improvement.", "keywords": "transformer;text generation;graph generation;text-to-graph;graph-to-text", "primary_area": "generative_models", "supplementary_material": "/attachment/e86d94c266215c44a4e20bc25778df6310632ed0.zip", "author": "Ching-Wen Cheng;PING-CHENG YEH", "authorids": "~Ching-Wen_Cheng1;~PING-CHENG_YEH1", "gender": ";M", "homepage": "https://github.com/chengchingwen;https://www.ee.ntu.edu.tw/profile1.php?teacher_id=942016&p=3", "dblp": ";", "google_scholar": ";https://scholar.google.com.tw/citations?user=UKvYiSAAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Ching-Wen_Cheng1;~PING-CHENG_YEH1", "aff": "Independent Researcher;National Taiwan University", "aff_domain": "gmail.com;ntu.edu.tw", "position": "Independent Researcher;Full Professor", "bibtex": "@misc{\nanonymous2024textgraphbart,\ntitle={TextGraph{BART}: Unifying Graph and Text with Structure Token},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=JVtwC9RzlI}\n}", "github": "", "project": "", "reviewers": "XEWk;Njzi;eg5E;3Tkr", "site": "https://openreview.net/forum?id=JVtwC9RzlI", "pdf_size": 480436, "rating": "3;3;4;4", "confidence": "4;3;4;3", "soundness": "2;1;2;2", "novelty": "2;2;2;2", "presentation": "2;1;2;3", "wc_summary": "138;104;77;87", "wc_strengths": "107;57;68;36", "wc_weaknesses": "346;197;78;297", "wc_questions": "2;12;25;63", "wc_limitations": "17;4;8;41", "wc_review": "610;374;256;524", "wc_reply_reviewers": "15;10;9;14", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 3.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 1.75, 0.4330127018922193 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 101.5, 23.178653972998518 ], "wc_strengths_avg": [ 67.0, 25.79728667902886 ], "wc_weaknesses_avg": [ 229.5, 102.63649448417459 ], "wc_questions_avg": [ 25.5, 23.13547060251855 ], "wc_limitations_avg": [ 17.5, 14.361406616345072 ], "wc_review_avg": [ 441.0, 136.1653406708183 ], "wc_reply_reviewers_avg": [ 12.0, 2.5495097567963922 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:nJETAI8JTI8J:scholar.google.com/&scioq=TextGraphBART:+Unifying+Graph+and+Text+with+Structure+Token&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "Independent Researcher;National Taiwan University", "aff_unique_dep": ";", "aff_unique_url": ";https://www.ntu.edu.tw", "aff_unique_abbr": ";NTU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Taiwan", "aff_country_unique_index": "1", "aff_country_unique": ";China" }, { "id": "JWF1dN4TOd", "title": "Large-Scale Contextual Market Equilibrium Computation through Deep Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Market equilibrium is one of the most fundamental solution concepts in economics and social optimization analysis.\nExisting works on market equilibrium computation primarily focus on settings with a relatively small number of buyers.\nMotivated by this, our paper investigates the computation of market equilibrium in scenarios with a large-scale buyer population, where buyers and goods are represented by their contexts.\nBuilding on this realistic and generalized contextual market model, we introduce MarketFCNet, a deep learning-based method for approximating market equilibrium.\nWe start by parameterizing the allocation of each good to each buyer using a neural network, which depends solely on the context of the buyer and the good.\nNext, we propose an efficient method to estimate the loss function of the training algorithm unbiasedly, enabling us to optimize the network parameters through gradient descent.\nTo evaluate the approximated solution, we introduce a metric called Nash Gap, which quantifies the deviation of the given allocation and price pair from the market equilibrium.\nExperimental results indicate that MarketFCNet delivers competitive performance and significantly lower running times compared to existing methods as the market scale expands, demonstrating the potential of deep learning-based methods to accelerate the approximation of large-scale contextual market equilibrium.", "keywords": "market equilibrium;contextual market;equilibrium measure;neural networks", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Yunxuan Ma;Yide Bian;Hao Xu;Weitao Yang;Jingshu Zhao;Zhijian Duan;Feng Wang;Xiaotie Deng", "authorids": "~Yunxuan_Ma1;~Yide_Bian1;~Hao_Xu26;~Weitao_Yang2;~Jingshu_Zhao1;~Zhijian_Duan1;~Feng_Wang23;~Xiaotie_Deng1", "gender": "M;M;;;F;M;;M", "homepage": "http://dagame.pku.edu.cn;;;https://mail.whu.edu.cn/coremail/XT6/index.jsp?sid=BAJEMbLLtMXsUTGEOmcYoDUvMBWrvoVb#setting.account.personal;;https://zjduan.github.io/;;https://cfcs.pku.edu.cn/english/people/faculty/xiaotiedeng/index.htm", "dblp": "338/9043;;;;;170/9206-1;;d/XiaotieDeng", "google_scholar": "mplXCJAAAAAJ;;;;_JtWTXEAAAAJ;7pZHSbkAAAAJ;;https://scholar.google.com.tw/citations?user=OBUwP_oAAAAJ", "orcid": ";;;;;0000-0002-4696-2139;;0000-0002-5282-6467", "linkedin": ";bian1d/;;;;;;", "or_profile": "~Yunxuan_Ma1;~Yide_Bian1;~Hao_Xu26;~Weitao_Yang2;~Jingshu_Zhao1;~Zhijian_Duan1;~Feng_Wang23;~Xiaotie_Deng1", "aff": "Peking University;Peking University;;Wuhan University;Wuhan University;Peking University;;Peking University", "aff_domain": "pku.edu.cn;stu.pku.edu.cn;;whu.edu.cn;whu.edu;pku.edu.cn;;pku.edu.cn", "position": "PhD student;Undergrad student;;MS student;Undergrad student;PhD student;;Full Professor", "bibtex": "@misc{\nanonymous2024largescale,\ntitle={Large-Scale Contextual Market Equilibrium Computation through Deep Learning},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=JWF1dN4TOd}\n}", "github": "", "project": "", "reviewers": "5naV;m3RG;84Ug;Hy1m", "site": "https://openreview.net/forum?id=JWF1dN4TOd", "pdf_size": 571271, "rating": "5;5;5;6", "confidence": "4;1;3;3", "soundness": "2;3;3;3", "novelty": "3;3;3;3", "presentation": "2;3;3;4", "wc_summary": "60;22;109;81", "wc_strengths": "78;22;98;37", "wc_weaknesses": "101;22;130;140", "wc_questions": "3;22;82;2", "wc_limitations": "1;22;1;1", "wc_review": "243;110;420;261", "wc_reply_reviewers": "0;0;0;37", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;1", "reply_authors": "1;0;1;1", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 2.75, 1.0897247358851685 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 68.0, 31.741140496207755 ], "wc_strengths_avg": [ 58.75, 30.55630049596973 ], "wc_weaknesses_avg": [ 98.25, 46.29457311607917 ], "wc_questions_avg": [ 27.25, 32.59888801784503 ], "wc_limitations_avg": [ 6.25, 9.093266739736606 ], "wc_review_avg": [ 258.5, 109.97840697155056 ], "wc_reply_reviewers_avg": [ 9.25, 16.021469970012117 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 0.75, 0.4330127018922193 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.13245323570650439, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:-qFBIaB-beMJ:scholar.google.com/&scioq=Large-Scale+Contextual+Market+Equilibrium+Computation+through+Deep+Learning&hl=en&as_sdt=0,33", "gs_version_total": 3, "aff_unique_index": "0;0;1;1;0;0", "aff_unique_norm": "Peking University;Wuhan University", "aff_unique_dep": ";", "aff_unique_url": "http://www.pku.edu.cn;http://www.whu.edu.cn/", "aff_unique_abbr": "Peking U;WHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Mind the Gap Between Prototypes and Images in Cross-domain Finetuning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95710", "id": "JWLiK3kKWQ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=JWLiK3kKWQ", "openreview": "https://openreview.net/forum?id=JWLiK3kKWQ", "poster": "/media/PosterPDFs/NeurIPS%202024/95710.png?t=1731484338.3139474", "project": "", "author_site": "Hongduan Tian, Feng Liu, Zhanke Zhou, Tongliang Liu, Chengqi Zhang, Bo Han", "tldr": "", "abstract": "In _cross-domain few-shot classification_ (CFC), recent works mainly focus on adapting a simple transformation head on top of a frozen pre-trained backbone with few labeled data to project embeddings into a task-specific metric space where classification can be performed by measuring similarities between image instance and prototype representations. Technically, an _assumption_ implicitly adopted in such a framework is that the prototype and image instance embeddings share the same representation transformation. However, in this paper, we find that there naturally exists a gap, which resembles the modality gap, between the prototype and image instance embeddings extracted from the frozen pre-trained backbone, and simply applying the same transformation during the adaptation phase constrains exploring the optimal representation distributions and shrinks the gap between prototype and image representations. To solve this problem, we propose a simple yet effective method, _contrastive prototype-image adaptation_ (CoPA), to adapt different transformations for prototypes and images similarly to CLIP by treating prototypes as text prompts. \n Extensive experiments on Meta-Dataset demonstrate that CoPA achieves the _state-of-the-art_ performance more efficiently. Meanwhile, further analyses also indicate that CoPA can learn better representation clusters, enlarge the gap, and achieve the minimum validation loss at the enlarged gap.", "keywords": "few-shot classification;cross domain adaptation;representation gap;computer vision;deep learning", "primary_area": "evaluation", "supplementary_material": "/attachment/c10c001d17667cd72b836d216e877a04057defdb.zip", "author": "Hongduan Tian;Feng Liu;Zhanke Zhou;Tongliang Liu;Chengqi Zhang;Bo Han", "authorids": "~Hongduan_Tian1;~Feng_Liu2;~Zhanke_Zhou1;~Tongliang_Liu1;~Chengqi_Zhang1;~Bo_Han1", "gender": "M;M;M;M;M;M", "homepage": "https://hongduantian.github.io/;https://fengliu90.github.io/index.html;https://andrewzhou924.github.io/;https://tongliang-liu.github.io/;https://research.polyu.edu.hk/en/persons/chengqi-zhang;https://bhanml.github.io/", "dblp": "270/0676;77/1318-3;285/5311;150/6667;71/964;241/0472-3", "google_scholar": "07lUB9kAAAAJ;https://scholar.google.com/citations?hl=en;GVXErr0AAAAJ;https://scholar.google.com.au/citations?user=EiLdZ_YAAAAJ;https://scholar.google.com.au/citations?user=B6lBmqEAAAAJ;nTNjqHwAAAAJ", "orcid": ";0000-0002-5005-9129;;;0000-0001-5715-7154;", "linkedin": ";alexfengliu;;;chengqi-zhang-55aa8910/;", "or_profile": "~Hongduan_Tian1;~Feng_Liu2;~Zhanke_Zhou1;~Tongliang_Liu1;~Chengqi_Zhang1;~bo_han2", "aff": "Hong Kong Baptist University;University of Melbourne;Hong Kong Baptist University;Mohamed bin Zayed University of Artificial Intelligence;University of Technology Sydney;MBZUAI", "aff_domain": "hkbu.edu.hk;unimelb.edu.au;hkbu.edu.hk;mbzuai.ac.ae;uts.edu.au;mbzuai.ac.ae", "position": "PhD student;Assistant Professor;PhD student;Affiliated Associate Professor;Full Professor;Researcher", "bibtex": "@inproceedings{\ntian2024mind,\ntitle={Mind the Gap Between Prototypes and Images in Cross-domain Finetuning},\nauthor={Hongduan Tian and Feng Liu and Zhanke Zhou and Tongliang Liu and Chengqi Zhang and Bo Han},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=JWLiK3kKWQ}\n}", "github": "", "reviewers": "eGag;Ms7f;PX84", "pdf_size": 2141045, "rating": "4;6;7", "confidence": "3;3;4", "soundness": "2;3;3", "novelty": "2;3;3", "presentation": "2;3;4", "wc_summary": "97;73;95", "wc_strengths": "40;69;92", "wc_weaknesses": "154;116;55", "wc_questions": "78;108;2", "wc_limitations": "1;6;87", "wc_review": "370;372;331", "wc_reply_reviewers": "162;130;17", "wc_reply_authors": "778;270;112", "reply_reviewers": "1;1;1", "reply_authors": "6;3;3", "rating_avg": [ 5.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 88.33333333333333, 10.873004286866728 ], "wc_strengths_avg": [ 67.0, 21.275964529643932 ], "wc_weaknesses_avg": [ 108.33333333333333, 40.778534658431376 ], "wc_questions_avg": [ 62.666666666666664, 44.61190673152429 ], "wc_limitations_avg": [ 31.333333333333332, 39.41516910474387 ], "wc_review_avg": [ 357.6666666666667, 18.87385022252275 ], "wc_reply_reviewers_avg": [ 103.0, 62.198606629623676 ], "wc_reply_authors_avg": [ 386.6666666666667, 284.1329892067367 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 4.0, 1.4142135623730951 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.7559289460184545, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:aYmDBCxKCEEJ:scholar.google.com/&scioq=Mind+the+Gap+Between+Prototypes+and+Images+in+Cross-domain+Finetuning&hl=en&as_sdt=0,44", "gs_version_total": 7, "email": "hkbu.edu.hk;unimelb.edu.au;hkbu.edu.hk;mbzuai.ac.ae;uts.edu.au;mbzuai.ac.ae", "author_num": 6, "aff_unique_index": "0;1;0;2;3;2", "aff_unique_norm": "Hong Kong Baptist University;University of Melbourne;Mohamed bin Zayed University of Artificial Intelligence;University of Technology Sydney", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.hkbu.edu.hk;https://www.unimelb.edu.au;https://mbzuai.ac.ae;https://www.uts.edu.au", "aff_unique_abbr": "HKBU;UniMelb;MBZUAI;UTS", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;1;0;2;1;2", "aff_country_unique": "China;Australia;United Arab Emirates" }, { "title": "Near-Minimax-Optimal Distributional Reinforcement Learning with a Generative Model", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95709", "id": "JXKbf1d4ib", "proceeding": "", "pdf": "https://openreview.net/pdf?id=JXKbf1d4ib", "openreview": "https://openreview.net/forum?id=JXKbf1d4ib", "poster": "", "project": "", "author_site": "Mark Rowland, Kevin Li, Remi Munos, Clare Lyle, Yunhao Tang, Will Dabney", "tldr": "", "abstract": "We propose a new algorithm for model-based distributional reinforcement learning (RL), and prove that it is minimax-optimal for approximating return distributions in the generative model regime (up to logarithmic factors), the first result of this kind for any distributional RL algorithm. Our analysis also provides new theoretical perspectives on categorical approaches to distributional RL, as well as introducing a new distributional Bellman equation, the stochastic categorical CDF Bellman equation, which we expect to be of independent interest. Finally, we provide an experimental study comparing a variety of model-based distributional RL algorithms, with several key takeaways for practitioners.", "keywords": "Reinforcement learning;Distributional reinforcement learning;dynamic programming;TD learning;sample complexity;theory", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Mark Rowland;Li Kevin Wenliang;Remi Munos;Clare Lyle;Yunhao Tang;Will Dabney", "authorids": "~Mark_Rowland1;~Li_Kevin_Wenliang1;~Remi_Munos1;~Clare_Lyle1;~Yunhao_Tang1;~Will_Dabney1", "gender": "M;;M;;M;M", "homepage": "http://sites.google.com/view/markrowland;https://kevin-w-li.github.io/;http://researchers.lille.inria.fr/~munos/;;https://robintyh1.github.io;", "dblp": "86/4090;255/7009;69/6815;192/1910;210/2229;https://dblp.uni-trier.de/pers/hd/d/Dabney:Will", "google_scholar": "https://scholar.google.co.uk/citations?user=-0U84zMAAAAJ;https://scholar.google.co.uk/citations?user=MW45NMEAAAAJ;https://scholar.google.com/citations?hl=en;;;https://scholar.google.co.uk/citations?user=dR-7QW8AAAAJ", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Mark_Rowland1;~Li_Kevin_Wenliang1;~Remi_Munos1;~Clare_Lyle1;~Yunhao_Tang1;~Will_Dabney1", "aff": "Google DeepMind;Google DeepMind;Google DeepMind;Google DeepMind;Google DeepMind;Google DeepMind", "aff_domain": "google.com;deepmind.com;google.com;google.com;deepmind.com;google.com", "position": "Research Scientist;Researcher;Research scientist;Researcher;Research Scientist;Research Scientist", "bibtex": "@inproceedings{\nrowland2024nearminimaxoptimal,\ntitle={Near-Minimax-Optimal Distributional Reinforcement Learning with a Generative Model},\nauthor={Mark Rowland and Li Kevin Wenliang and Remi Munos and Clare Lyle and Yunhao Tang and Will Dabney},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=JXKbf1d4ib}\n}", "github": "", "reviewers": "Wuoe;1p2t;WbyR;HAEX", "pdf_size": 962720, "rating": "5;5;6;7", "confidence": "4;3;3;4", "soundness": "3;2;3;3", "novelty": "2;2;3;3", "presentation": "3;2;4;3", "wc_summary": "54;68;39;47", "wc_strengths": "114;122;63;29", "wc_weaknesses": "123;145;113;33", "wc_questions": "175;50;12;12", "wc_limitations": "8;10;13;1", "wc_review": "474;395;240;122", "wc_reply_reviewers": "10;110;11;8", "wc_reply_authors": "32;234;0;32", "reply_reviewers": "1;2;1;1", "reply_authors": "2;2;1;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 52.0, 10.653637876331258 ], "wc_strengths_avg": [ 82.0, 38.05916446797013 ], "wc_weaknesses_avg": [ 103.5, 42.31725416422951 ], "wc_questions_avg": [ 62.25, 66.91926105389987 ], "wc_limitations_avg": [ 8.0, 4.415880433163924 ], "wc_review_avg": [ 307.75, 136.33116848321956 ], "wc_reply_reviewers_avg": [ 34.75, 43.45903243285566 ], "wc_reply_authors_avg": [ 74.5, 93.00940812627505 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3484052952589515754&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 5, "email": "google.com;deepmind.com;google.com;google.com;deepmind.com;google.com", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google DeepMind", "aff_unique_url": "https://deepmind.com", "aff_unique_abbr": "DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Energy-Guided Continuous Entropic Barycenter Estimation for General Costs", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95708", "id": "JZHFRLoqDq", "proceeding": "", "pdf": "https://openreview.net/pdf?id=JZHFRLoqDq", "openreview": "https://openreview.net/forum?id=JZHFRLoqDq", "poster": "", "project": "", "author_site": "Alexander Kolesov, Petr Mokrov, Igor Udovichenko, Milena Gazdieva, Gudmund Pammer, Anastasis Kratsios, Evgeny Burnaev, Aleksandr Korotin", "tldr": "", "abstract": "Optimal transport (OT) barycenters are a mathematically grounded way of averaging probability distributions while capturing their geometric properties. In short, the barycenter task is to take the average of a collection of probability distributions w.r.t. given OT discrepancies. We propose a novel algorithm for approximating the continuous Entropic OT (EOT) barycenter for arbitrary OT cost functions. Our approach is built upon the dual reformulation of the EOT problem based on weak OT, which has recently gained the attention of the ML community. Beyond its novelty, our method enjoys several advantageous properties: (i) we establish quality bounds for the recovered solution; (ii) this approach seamlessly interconnects with the Energy-Based Models (EBMs) learning procedure enabling the use of well-tuned algorithms for the problem of interest; (iii) it provides an intuitive optimization scheme avoiding min-max, reinforce and other intricate technical tricks. For validation, we consider several low-dimensional scenarios and image-space setups, including *non-Euclidean* cost functions. Furthermore, we investigate the practical task of learning the barycenter on an image manifold generated by a pretrained generative model, opening up new directions for real-world applications. Our code is available at https://github.com/justkolesov/EnergyGuidedBarycenters.", "keywords": "energy-based model;generative model;optimal transport;entropic optimal transport barycenters;general optimal transport cost", "primary_area": "generative_models", "supplementary_material": "/attachment/7a2345fe5eb3def31730d52f0503c7f4b1d72603.zip", "author": "Alexander Kolesov;Petr Mokrov;Igor Udovichenko;Milena Gazdieva;Gudmund Pammer;Anastasis Kratsios;Evgeny Burnaev;Alexander Korotin", "authorids": "~Alexander_Kolesov1;~Petr_Mokrov1;~Igor_Udovichenko1;~Milena_Gazdieva1;~Gudmund_Pammer1;~Anastasis_Kratsios1;~Evgeny_Burnaev1;~Alexander_Korotin2", "gender": "M;M;;F;M;Non-Binary;M;", "homepage": "https://github.com/Kolessov;https://github.com/PetrMokrov;;;https://people.math.ethz.ch/~gpammer/;https://anastasiskratsios.github.io/;http://faculty.skoltech.ru/people/evgenyburnaev;", "dblp": "287/4380;;;309/6585;;;144/7845;", "google_scholar": "WyAI_wUAAAAJ;CRsi4IkAAAAJ;;h52_Zx8AAAAJ;ipItetYAAAAJ;https://scholar.google.ca/citations?user=9D-bHFgAAAAJ;https://scholar.google.ru/citations?user=pCRdcOwAAAAJ;", "orcid": ";;;0000-0003-0047-1577;0000-0003-2494-8739;0000-0001-6791-3371;0000-0001-8424-0690;", "linkedin": ";;;;;anastasiskratsios/;;", "or_profile": "~Alexander_Kolesov1;~Petr_Mokrov1;~Igor_Udovichenko1;~Milena_Gazdieva1;~Gudmund_Pammer1;~Anastasis_Kratsios1;~Evgeny_Burnaev1;~Alexander_Korotin2", "aff": "The Skolkovo Institute of Science and Technology;Skolkovo Institute of Science and Technology;;Skolkovo Institute of Science and Technology;ETHZ - ETH Zurich;McMaster University;Skolkovo Institute of Science and Technology;", "aff_domain": "skoltech.ru;skolkovotech.ru;;skoltech.ru;ethz.ch;mcmaster.ca;skoltech.ru;", "position": "PhD student;PhD student;;PhD student;Postdoc;Assistant Professor;Full Professor;", "bibtex": "@inproceedings{\nkolesov2024energyguided,\ntitle={Energy-Guided Continuous Entropic Barycenter Estimation for General Costs},\nauthor={Alexander Kolesov and Petr Mokrov and Igor Udovichenko and Milena Gazdieva and Gudmund Pammer and Anastasis Kratsios and Evgeny Burnaev and Alexander Korotin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=JZHFRLoqDq}\n}", "github": "", "reviewers": "wGnY;unyK;kw6j;LoLb;xS43", "pdf_size": 0, "rating": "6;6;6;7;7", "confidence": "3;3;4;4;4", "soundness": "3;3;3;3;4", "novelty": "3;2;3;3;3", "presentation": "3;2;3;2;3", "wc_summary": "118;50;63;278;116", "wc_strengths": "40;23;28;136;79", "wc_weaknesses": "177;347;88;342;147", "wc_questions": "3;101;82;288;96", "wc_limitations": "2;11;39;17;1", "wc_review": "340;532;300;1061;439", "wc_reply_reviewers": "0;24;25;40;243", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;1;1;1;2", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.4, 0.48989794855663565 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 125.0, 81.25023076890305 ], "wc_strengths_avg": [ 61.2, 42.24405283587265 ], "wc_weaknesses_avg": [ 220.2, 105.46544457783317 ], "wc_questions_avg": [ 114.0, 93.92976099192417 ], "wc_limitations_avg": [ 14.0, 13.827508813954884 ], "wc_review_avg": [ 534.4, 275.37218450671446 ], "wc_reply_reviewers_avg": [ 66.4, 89.22466026833612 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.6666666666666665, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16650540745146647880&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "skoltech.ru;skolkovotech.ru;;skoltech.ru;ethz.ch;mcmaster.ca;skoltech.ru;", "author_num": 8, "aff_unique_index": "0;0;0;1;2;0", "aff_unique_norm": "Skolkovo Institute of Science and Technology;ETH Zurich;McMaster University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.skoltech.ru;https://www.ethz.ch;https://www.mcmaster.ca", "aff_unique_abbr": "Skoltech;ETHZ;McMaster", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;2;0", "aff_country_unique": "Russian Federation;Switzerland;Canada" }, { "title": "When LLMs Meet Cunning Texts: A Fallacy Understanding Benchmark for Large Language Models", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97757", "id": "Jaye8aWpmZ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Jaye8aWpmZ", "openreview": "https://openreview.net/forum?id=Jaye8aWpmZ", "poster": "/media/PosterPDFs/NeurIPS%202024/97757.png?t=1731139989.3742223", "project": "", "author_site": "Yinghui Li, Qingyu Zhou, Yuanzhen Luo, Shirong Ma, Yangning Li, Hai-Tao Zheng, Xuming Hu, Philip S Yu", "tldr": "", "abstract": "Recently, Large Language Models (LLMs) make remarkable evolutions in language understanding and generation. Following this, various benchmarks for measuring all kinds of capabilities of LLMs have sprung up. In this paper, we challenge the reasoning and understanding abilities of LLMs by proposing a FaLlacy Understanding Benchmark (FLUB) containing cunning texts that are easy for humans to understand but difficult for models to grasp. Specifically, the cunning texts that FLUB focuses on mainly consist of the tricky, humorous, and misleading texts collected from the real internet environment. And we design three tasks with increasing difficulty in the FLUB benchmark to evaluate the fallacy understanding ability of LLMs. Based on FLUB, we investigate the performance of multiple representative and advanced LLMs, reflecting our FLUB is challenging and worthy of more future study. Interesting discoveries and valuable insights are achieved in our extensive experiments and detailed analyses. We hope that our benchmark can encourage the community to improve LLMs' ability to understand fallacies. Our data and codes are available at https://github.com/THUKElab/FLUB.", "keywords": "Cunning Texts;Fallacy Understanding;LLMs Evaluation", "primary_area": "", "supplementary_material": "/attachment/d17cefa2fa7368c1e76c922faff133393c89de72.pdf", "author": "Yinghui Li;Qingyu Zhou;Yuanzhen Luo;Shirong Ma;Yangning Li;Hai-Tao Zheng;Xuming Hu;Philip S. Yu", "authorids": "~Yinghui_Li1;~Qingyu_Zhou1;~Yuanzhen_Luo1;~Shirong_Ma1;~Yangning_Li1;~Hai-Tao_Zheng2;~Xuming_Hu1;~Philip_S._Yu1", "gender": "M;M;M;;M;M;M;M", "homepage": "https://github.com/geekjuruo;https://res.qyzhou.me/;https://github.com/LuoYuanzhen;;https://github.com/HUSTLyn;https://www.sigs.tsinghua.edu.cn/fg3/105069.jhtml;https://xuminghu.github.io/;https://cs.uic.edu/profiles/philip-yu/", "dblp": "243/8822.html;199/2091;;;315/0403;20/134-2;262/3664;y/PhilipSYu", "google_scholar": "xTM9pKsAAAAJ;buLOsq0AAAAJ;;;https://scholar.google.com.hk/citations?user=BmX7lQkAAAAJ;https://scholar.google.com.hk/citations?user=7VPeORoAAAAJ;dbBKbXoAAAAJ;D0lL1r0AAAAJ", "orcid": ";0000-0002-4389-1582;;0009-0008-1686-407X;;0000-0001-5128-5649;0000-0001-6075-4224;0000-0002-3491-5968", "linkedin": ";;;;;;;", "or_profile": "~Yinghui_Li1;~Qingyu_Zhou1;~Yuanzhen_Luo1;~Shirong_Ma1;~Yangning_Li1;~Hai-Tao_Zheng2;~Xuming_Hu1;~Philip_S._Yu1", "aff": "Tsinghua University;OPPO Research Institute;China university of petroleum;Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University;University of Illinois Chicago", "aff_domain": "tsinghua.edu.cn;oppo.com;cup.edu;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;uic.edu", "position": "PhD student;Researcher;MS student;MS student;PhD student;Associate Professor;PhD student;Full Professor", "bibtex": "@inproceedings{\nli2024when,\ntitle={When {LLM}s Meet Cunning Texts: A Fallacy Understanding Benchmark for Large Language Models},\nauthor={Yinghui Li and Qingyu Zhou and Yuanzhen Luo and Shirong Ma and Yangning Li and Hai-Tao Zheng and Xuming Hu and Philip S. Yu},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=Jaye8aWpmZ}\n}", "github": "", "reviewers": "SvrS;VwUX;2iuj", "pdf_size": 1118937, "rating": "7;7;8", "confidence": "4;4;4", "wc_summary_and_contributions": "59;146;50", "wc_strengths": "69;72;41", "wc_improvement": "117;167;73", "wc_limitations": "1;144;9", "wc_correctness": "1;60;1", "wc_clarity": "1;10;1", "wc_relation_to_prior_work": "1;1;1", "wc_documentation": "1;1;1", "wc_additional_feedback": "1;1;1", "wc_review": "251;602;178", "wc_reply_reviewers": "0;35;0", "wc_reply_authors": "475;1273;308", "reply_reviewers": "0;1;0", "reply_authors": "1;3;1", "rating_avg": [ 7.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "wc_summary_and_contributions_avg": [ 85.0, 43.289721643826724 ], "wc_strengths_avg": [ 60.666666666666664, 13.960261060914616 ], "wc_improvement_avg": [ 119.0, 38.40138886377245 ], "wc_limitations_avg": [ 51.333333333333336, 65.60657148656848 ], "wc_correctness_avg": [ 20.666666666666668, 27.812866726670865 ], "wc_clarity_avg": [ 4.0, 4.242640687119285 ], "wc_relation_to_prior_work_avg": [ 1.0, 0.0 ], "wc_documentation_avg": [ 1.0, 0.0 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 343.6666666666667, 185.08436514795685 ], "wc_reply_reviewers_avg": [ 11.666666666666666, 16.49915822768611 ], "wc_reply_authors_avg": [ 685.3333333333334, 421.09882714420166 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6828341189417044133&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "tsinghua.edu.cn;oppo.com;cup.edu;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;uic.edu", "author_num": 8, "aff_unique_index": "0;1;2;0;0;0;0;3", "aff_unique_norm": "Tsinghua University;OPPO Research Institute;China University of Petroleum;University of Illinois at Chicago", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.oppo.com/en;http://www.cup.edu.cn;https://www.uic.edu", "aff_unique_abbr": "THU;OPPO RI;CUP;UIC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Chicago", "aff_country_unique_index": "0;0;0;0;0;0;0;1", "aff_country_unique": "China;United States" }, { "title": "Open LLMs are Necessary for Current Private Adaptations and Outperform their Closed Alternatives", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95707", "id": "Jf40H5pRW0", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Jf40H5pRW0", "openreview": "https://openreview.net/forum?id=Jf40H5pRW0", "poster": "", "project": "", "author_site": "Vincent Hanke, Tom Blanchard, Franziska Boenisch, Iyiola Olatunji, Michael Backes, Adam Dziedzic", "tldr": "", "abstract": "While open Large Language Models (LLMs) have made significant progress, they still fall short of matching the performance of their closed, proprietary counterparts, making the latter attractive even for the use on highly *private* data. \nRecently, various new methods have been proposed to adapt closed LLMs to private data without leaking private information to third parties and/or the LLM provider. \nIn this work, we analyze the privacy protection and performance of the four most recent methods for private adaptation of closed LLMs. \nBy examining their threat models and thoroughly comparing their performance under different privacy levels according to differential privacy (DP), various LLM architectures, and multiple datasets for classification and generation tasks, we find that: (1) all the methods leak query data, i.e., the (potentially sensitive) user data that is queried at inference time, to the LLM provider, (2) three out of four methods also leak large fractions of private training data to the LLM provider while the method that protects private data requires a local open LLM, (3) all the methods exhibit lower performance compared to three private gradient-based adaptation methods for *local open LLMs*, and (4) the private adaptation methods for closed LLMs incur higher monetary training and query costs than running the alternative methods on local open LLMs.\nThis yields the conclusion that, to achieve truly *privacy-preserving LLM adaptations* that yield high performance and more privacy at lower costs, taking into account current methods and models, one should use open LLMs.", "keywords": "LLM;privacy;differential privacy;dpsgd;LoRA;private fine-tuning;PromptPATE;PromptDPSGD;Adaptations;soft prompt;prefix tuning;hard prompts", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/b5a8f5481cb70b99eecf34cb9c35b62af88d23f4.zip", "author": "Vincent Hanke;Tom Blanchard;Franziska Boenisch;Iyiola Emmanuel Olatunji;Michael Backes;Adam Dziedzic", "authorids": "~Vincent_Hanke1;~Tom_Blanchard1;~Franziska_Boenisch2;~Iyiola_Emmanuel_Olatunji1;~Michael_Backes3;~Adam_Dziedzic1", "gender": "M;M;;M;;", "homepage": ";;;;;", "dblp": "393/3556;;;213/8073;;", "google_scholar": "https://scholar.google.de/citations?user=QAilX5kAAAAJ;;;https://scholar.google.com/citations?hl=en;;", "orcid": ";;;;;", "linkedin": ";tom-blanchard-308056247/;;;;", "or_profile": "~Vincent_Hanke1;~Tom_Blanchard1;~Franziska_Boenisch2;~Iyiola_Emmanuel_Olatunji1;~Michael_Backes3;~Adam_Dziedzic1", "aff": "CISPA Helmholtz Center for Information Security;CISPA Helmholtz Center for Information Security;;L3S Research Center, Leibniz university, Hannover;;", "aff_domain": "cispa.de;cispa.de;;l3s.de;;", "position": "PhD student;Intern;;PhD student;;", "bibtex": "@inproceedings{\nhanke2024open,\ntitle={Open {LLM}s are Necessary for Current Private Adaptations and Outperform their Closed Alternatives},\nauthor={Vincent Hanke and Tom Blanchard and Franziska Boenisch and Iyiola Emmanuel Olatunji and Michael Backes and Adam Dziedzic},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Jf40H5pRW0}\n}", "github": "", "reviewers": "k2x5;gGr3;my5W;1UUG", "pdf_size": 731944, "rating": "4;5;7;7", "confidence": "4;4;4;2", "soundness": "2;2;3;3", "novelty": "1;2;3;3", "presentation": "2;3;4;3", "wc_summary": "35;29;50;67", "wc_strengths": "33;75;88;56", "wc_weaknesses": "328;76;206;12", "wc_questions": "40;17;215;1", "wc_limitations": "8;2;37;3", "wc_review": "444;199;596;139", "wc_reply_reviewers": "415;23;608;0", "wc_reply_authors": "2162;57;609;0", "reply_reviewers": "2;1;2;0", "reply_authors": "6;3;3;1", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 45.25, 14.703315952532613 ], "wc_strengths_avg": [ 63.0, 20.724381776062707 ], "wc_weaknesses_avg": [ 155.5, 121.67477141955106 ], "wc_questions_avg": [ 68.25, 85.85270816928258 ], "wc_limitations_avg": [ 12.5, 14.326548781894402 ], "wc_review_avg": [ 344.5, 184.76539178103675 ], "wc_reply_reviewers_avg": [ 261.5, 259.27254000375746 ], "wc_reply_authors_avg": [ 707.0, 873.0661486966494 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 3.25, 1.7853571071357126 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5555555555555555, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13795266123074507684&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "cispa.de;cispa.de;;l3s.de;;", "author_num": 6, "aff_unique_index": "0;0;1", "aff_unique_norm": "CISPA Helmholtz Center for Information Security;Leibniz University Hannover", "aff_unique_dep": ";L3S Research Center", "aff_unique_url": "https://www.cispa.de/;https://www.uni-hannover.de", "aff_unique_abbr": "CISPA;LUH", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hannover", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "title": "APIGen: Automated PIpeline for Generating Verifiable and Diverse Function-Calling Datasets", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97756", "id": "Jfg3vw2bjx", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Jfg3vw2bjx", "openreview": "https://openreview.net/forum?id=Jfg3vw2bjx", "poster": "", "project": "", "author_site": "Zuxin Liu, Thai Hoang, Jianguo Zhang, Ming Zhu, Tian Lan, Shirley kokane, Juntao Tan, Weiran Yao, Zhiwei Liu, Yihao Feng, Rithesh R N, Liangwei Yang, Silvio Savarese, Juan Carlos Niebles, Huan Wang, Shelby Heinecke, Caiming Xiong", "tldr": "", "abstract": "The advancement of function-calling agent models requires diverse, reliable, and high-quality datasets. This paper presents APIGen, an automated data generation pipeline designed to synthesize high-quality datasets for function-calling applications. We leverage APIGen and collect 3,673 executable APIs across 21 different categories to generate diverse function-calling datasets in a scalable and structured manner. Each data in our dataset is verified through three hierarchical stages: format checking, actual function executions, and semantic verification, improving its reliability and correctness. We demonstrate that models trained with our curated datasets, even with only 7B parameters, can achieve state-of-the-art performance on the Berkeley Function-Calling Benchmark, outperforming multiple GPT-4 models. Moreover, our 1B model achieves exceptional performance, surpassing GPT-3.5-Turbo and Claude-3 Haiku. We release a dataset containing 60,000 high-quality entries, aiming to advance the field of function-calling agent domains. The dataset and models are available on the project homepage \\url{https://apigen-pipeline.github.io/}.", "keywords": "LLM Agent;function-calling;data synthesis;data generation", "primary_area": "", "supplementary_material": "/attachment/0d8ff49a19b7eae9f741a57c7038764113fbc8a3.pdf", "author": "Zuxin Liu;Thai Quoc Hoang;Jianguo Zhang;Ming Zhu;Tian Lan;Shirley Kokane;Juntao Tan;Weiran Yao;Zhiwei Liu;Yihao Feng;Rithesh R N;Liangwei Yang;Silvio Savarese;Juan Carlos Niebles;Huan Wang;Shelby Heinecke;Caiming Xiong", "authorids": "~Zuxin_Liu1;~Thai_Quoc_Hoang1;~Jianguo_Zhang3;~Ming_Zhu1;~Tian_Lan13;~Shirley_Kokane1;~Juntao_Tan1;~Weiran_Yao1;~Zhiwei_Liu3;~Yihao_Feng1;~Rithesh_R_N1;~Liangwei_Yang1;~Silvio_Savarese1;~Juan_Carlos_Niebles1;~Huan_Wang1;~Shelby_Heinecke1;~Caiming_Xiong1", "gender": "M;M;M;;;F;M;M;;M;M;M;M;M;M;F;M", "homepage": "https://www.zuxin.me;;https://jianguoz.github.io/;;;;;;https://sites.google.com/view/zhiwei-jim;;https://ritheshrn.github.io/;https://yangliangwei.github.io/;;http://www.niebles.net/;http://www.cs.yale.edu/homes/wang-huan/;http://www.shelbyh.ai;http://cmxiong.com/", "dblp": "227/3137;250/5655.html;;;;;;192/3295;90/9499-1.html;204/3696;352/4275;260/5064.html;50/3578;26/647;70/6155-16.html;;80/7282", "google_scholar": "5ApCTCoAAAAJ;3v-x1aQAAAAJ;mAAVFEsAAAAJ;;;;hbrLcKIAAAAJ;rr_leUAAAAAJ;https://scholar.google.com/citations?;uqnNle0AAAAJ;https://scholar.google.ca/citations?user=Y1XpJucAAAAJ;j5HiJocAAAAJ;ImpbxLsAAAAJ;hqNhUCYAAAAJ;7NpTttkAAAAJ;tS937l8AAAAJ;vaSdahkAAAAJ", "orcid": "0000-0001-7412-5074;;;;;;;;0000-0003-1525-1067;;;0000-0001-5660-766X;;;;;", "linkedin": "zuxin-liu/;https://linkedin.com/in/quocthai9120;jianguo-zhang-3b267712a;;tian-lan-770b4b165/;https://www.linkedin.com/shirley-kokane/;;;;;rithesh-r-n/;liangwei-yang-602515148/;;;huanwangyale/;shelbyheinecke;caiming-xiong-150a1417", "or_profile": "~Zuxin_Liu1;~Thai_Quoc_Hoang1;~Jianguo_Zhang3;~Ming_Zhu1;~Tian_Lan13;~Shirley_Kokane1;~Juntao_Tan1;~Weiran_Yao1;~Zhiwei_Liu3;~Yihao_Feng1;~Rithesh_R_N1;~Liangwei_Yang1;~Silvio_Savarese1;~Juan_Carlos_Niebles1;~Huan_Wang1;~Shelby_Heinecke1;~Caiming_Xiong1", "aff": "Salesforce AI Research;Salesforce Research;SalesForce AI Research;;SalesForce;SalesForce.com;Rutgers University;SalesForce.com;Salesforce AI Research;Salesforce AI Research;SalesForce.com;University of Illinois, Chicago;Stanford University;Stanford University;Salesforce.com;Salesforce Research;Salesforce Research", "aff_domain": "salesforce.com;salesforce.com;salesforce.com;;salesforce.com;salesforce.com;rutgers.edu;salesforce.com;salesforce.com;salesforce.com;salesforce.com;uic.edu;stanford.edu;stanford.edu;salesforce.com;salesforce.com;salesforce.com", "position": "Researcher;Machine Learning Engineer;Researcher;;Researcher;Researcher;PhD student;Researcher;Researcher;Researcher;Researcher;PhD student;Adjunct Professor;Adjunct Professor;Researcher;Researcher;Research Scientist", "bibtex": "@inproceedings{\nliu2024apigen,\ntitle={{APIG}en: Automated {PI}peline for Generating Verifiable and Diverse Function-Calling Datasets},\nauthor={Zuxin Liu and Thai Quoc Hoang and Jianguo Zhang and Ming Zhu and Tian Lan and Shirley Kokane and Juntao Tan and Weiran Yao and Zhiwei Liu and Yihao Feng and Rithesh R N and Liangwei Yang and Silvio Savarese and Juan Carlos Niebles and Huan Wang and Shelby Heinecke and Caiming Xiong},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=Jfg3vw2bjx}\n}", "github": "", "reviewers": "8WHo;7yiE;2yY8", "pdf_size": 2007450, "rating": "6;7;7", "confidence": "4;4;3", "wc_summary_and_contributions": "64;142;129", "wc_strengths": "2;54;51", "wc_improvement": "59;94;10", "wc_limitations": "8;126;5", "wc_correctness": "8;39;23", "wc_clarity": "5;11;1", "wc_relation_to_prior_work": "104;42;1", "wc_documentation": "5;23;1", "wc_additional_feedback": "1;1;1", "wc_review": "256;532;222", "wc_reply_reviewers": "12;78;20", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 111.66666666666667, 34.120700787384514 ], "wc_strengths_avg": [ 35.666666666666664, 23.837412238374835 ], "wc_improvement_avg": [ 54.333333333333336, 34.451253807211266 ], "wc_limitations_avg": [ 46.333333333333336, 56.346152387619945 ], "wc_correctness_avg": [ 23.333333333333332, 12.657891697365017 ], "wc_clarity_avg": [ 5.666666666666667, 4.109609335312651 ], "wc_relation_to_prior_work_avg": [ 49.0, 42.339894504670966 ], "wc_documentation_avg": [ 9.666666666666666, 9.568466729604882 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 336.6666666666667, 138.8172259563712 ], "wc_reply_reviewers_avg": [ 36.666666666666664, 29.4089933334837 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 17, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 37, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2008015533532704650&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "salesforce.com;salesforce.com;salesforce.com;;salesforce.com;salesforce.com;rutgers.edu;salesforce.com;salesforce.com;salesforce.com;salesforce.com;uic.edu;stanford.edu;stanford.edu;salesforce.com;salesforce.com;salesforce.com", "author_num": 17, "aff_unique_index": "0;0;0;0;0;1;0;0;0;0;2;3;3;0;0;0", "aff_unique_norm": "Salesforce;Rutgers University;University of Illinois at Chicago;Stanford University", "aff_unique_dep": "Salesforce AI Research;;;", "aff_unique_url": "https://www.salesforce.com;https://www.rutgers.edu;https://www.uic.edu;https://www.stanford.edu", "aff_unique_abbr": "Salesforce AI;Rutgers;UIC;Stanford", "aff_campus_unique_index": "1;2;2", "aff_campus_unique": ";Chicago;Stanford", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Unravelling in Collaborative Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95706", "id": "JfxqomOs60", "proceeding": "", "pdf": "https://openreview.net/pdf?id=JfxqomOs60", "openreview": "https://openreview.net/forum?id=JfxqomOs60", "poster": "/media/PosterPDFs/NeurIPS%202024/95706.png?t=1731674904.5243876", "project": "", "author_site": "Aymeric Capitaine, Etienne Boursier, Antoine Scheid, Eric Moulines, Michael Jordan, El-Mahdi El-Mhamdi, Alain Durmus", "tldr": "", "abstract": "Collaborative learning offers a promising avenue for leveraging decentralized data. However, collaboration in groups of strategic learners is not a given. In this work, we consider strategic agents who wish to train a model together but have sampling distributions of different quality. The collaboration is organized by a benevolent aggregator who gathers samples so as to maximize total welfare, but is unaware of data quality. This setting allows us to shed light on the deleterious effect of adverse selection in collaborative learning. More precisely, we demonstrate that when data quality indices are private, the coalition may undergo a phenomenon known as unravelling, wherein it shrinks up to the point that it becomes empty or solely comprised of the worst agent. We show how this issue can be addressed without making use of external transfers, by proposing a novel method inspired by probabilistic verification. This approach makes the grand coalition a Nash equilibrium with high probability despite information asymmetry, thereby breaking unravelling.", "keywords": "Collaborative learning;unravelling;adverse selection", "primary_area": "algorithmic_game_theory", "supplementary_material": "", "author": "Aymeric Capitaine;Etienne Boursier;Antoine Scheid;Eric Moulines;Michael Jordan;El-Mahdi El-Mhamdi;Alain Oliviero Durmus", "authorids": "~Aymeric_Capitaine1;~Etienne_Boursier1;~Antoine_Scheid1;~Eric_Moulines1;~Michael_Jordan1;~El-Mahdi_El-Mhamdi1;~Alain_Oliviero_Durmus1", "gender": "M;M;M;M;M;M;M", "homepage": "https://fr.linkedin.com/in/aymeric-capitaine-ab00a818b;https://eboursier.github.io/;;;http://www.cs.berkeley.edu/~jordan/;;https://elmahdielmhamdi.com", "dblp": ";203/8633;;54/2358;j/MichaelIJordan;01/11275;198/0984", "google_scholar": ";https://scholar.google.fr/citations?user=-9todDUAAAAJ;M9zQVwgAAAAJ;https://scholar.google.fr/citations?user=_XE1LvQAAAAJ;https://scholar.google.com.tw/citations?user=yxUduqMAAAAJ;;https://scholar.google.ch/citations?user=kNA-WLQAAAAJ", "orcid": ";;;0000-0002-2058-0693;0000-0001-8935-817X;;", "linkedin": ";;antoine-scheid-687735239/;;;;mahdielmhamdi/", "or_profile": "~Aymeric_Capitaine1;~Etienne_Boursier1;~Antoine_Scheid1;~Eric_Moulines1;~Michael_Jordan1;~Alain_Durmus1;~El_Mahdi_El_Mhamdi1", "aff": "\u00c9cole Polytechnique;INRIA;\u00c9cole Polytechnique;Ecole polytechnique;University of California, Berkeley;\u00c9cole Polytechnique;Ecole polytechnique", "aff_domain": "polytechnique.fr;inria.fr;polytechnique.edu;polytechnique.edu;berkeley.edu;polytechnique.fr;polytechnique.edu", "position": "PhD student;Researcher;PhD student;Full Professor;Full Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\ncapitaine2024unravelling,\ntitle={Unravelling in Collaborative Learning},\nauthor={Aymeric Capitaine and Etienne Boursier and Antoine Scheid and Eric Moulines and Michael Jordan and El-Mahdi El-Mhamdi and Alain Oliviero Durmus},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=JfxqomOs60}\n}", "github": "", "reviewers": "DYwo;MuDH;K1qa;pCrN", "pdf_size": 495338, "rating": "5;6;7;7", "confidence": "3;3;2;4", "soundness": "3;3;3;4", "novelty": "3;2;3;3", "presentation": "2;3;2;4", "wc_summary": "89;48;350;99", "wc_strengths": "71;39;18;124", "wc_weaknesses": "88;35;6;177", "wc_questions": "63;30;20;87", "wc_limitations": "21;21;10;21", "wc_review": "332;173;404;508", "wc_reply_reviewers": "11;0;57;0", "wc_reply_authors": "0;0;27;0", "reply_reviewers": "1;0;1;0", "reply_authors": "1;1;2;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 146.5, 119.03465881834585 ], "wc_strengths_avg": [ 63.0, 39.956226048014095 ], "wc_weaknesses_avg": [ 76.5, 65.04805915628843 ], "wc_questions_avg": [ 50.0, 26.636441203734407 ], "wc_limitations_avg": [ 18.25, 4.763139720814412 ], "wc_review_avg": [ 354.25, 121.92287521216025 ], "wc_reply_reviewers_avg": [ 17.0, 23.526580712037184 ], "wc_reply_authors_avg": [ 6.75, 11.691342951089922 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5669990908982020966&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 6, "email": "polytechnique.fr;inria.fr;polytechnique.edu;polytechnique.edu;berkeley.edu;polytechnique.fr;polytechnique.edu", "author_num": 7, "aff_unique_index": "0;1;0;0;2;0;0", "aff_unique_norm": "Ecole Polytechnique;INRIA;University of California, Berkeley", "aff_unique_dep": ";;", "aff_unique_url": "https://www.polytechnique.edu;https://www.inria.fr;https://www.berkeley.edu", "aff_unique_abbr": "X;INRIA;UC Berkeley", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;0;0;0;1;0;0", "aff_country_unique": "France;United States" }, { "title": "Shadowcast: Stealthy Data Poisoning Attacks Against Vision-Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95705", "id": "JhqyeppMiD", "proceeding": "", "pdf": "https://openreview.net/pdf?id=JhqyeppMiD", "openreview": "https://openreview.net/forum?id=JhqyeppMiD", "poster": "", "project": "", "author_site": "Yuancheng Xu, Jiarui Yao, Manli Shu, Yanchao Sun, Zichu Wu, Ning Yu, Tom Goldstein, Furong Huang", "tldr": "", "abstract": "Vision-Language Models (VLMs) excel in generating textual responses from visual inputs, but their versatility raises security concerns. This study takes the first step in exposing VLMs\u2019 susceptibility to data poisoning attacks that can manipulate responses to innocuous, everyday prompts. We introduce Shadowcast, a stealthy data poisoning attack where poison samples are visually indistinguishable from benign images with matching texts. Shadowcast demonstrates effectiveness in two attack types. The first is a traditional Label Attack, tricking VLMs into misidentifying class labels, such as confusing Donald Trump for Joe Biden. The second is a novel Persuasion Attack, leveraging VLMs\u2019 text generation capabilities to craft persuasive and seemingly rational narratives for misinformation, such as portraying junk food as healthy. We show that Shadowcast effectively achieves the attacker\u2019s intentions using as few as 50 poison samples. Crucially, the poisoned samples demonstrate transferability across different VLM architectures, posing a significant concern in black-box settings. Moreover, Shadowcast remains potent under realistic conditions involving various text prompts, training data augmentation, and image compression techniques. This work reveals how poisoned VLMs can disseminate convincing yet deceptive misinformation to everyday, benign users, emphasizing the importance of data integrity for responsible VLM deployments. Our code is available at: https://github.com/umd-huang-lab/VLM-Poisoning.", "keywords": "Data Poisoning Attacks;AI Security;Vision-Language Models;Multimodality", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Yuancheng Xu;Jiarui Yao;Manli Shu;Yanchao Sun;Zichu Wu;Ning Yu;Tom Goldstein;Furong Huang", "authorids": "~Yuancheng_Xu1;~Jiarui_Yao2;~Manli_Shu1;~Yanchao_Sun1;~Zichu_Wu1;~Ning_Yu2;~Tom_Goldstein1;~Furong_Huang1", "gender": "M;;F;F;M;;M;F", "homepage": "https://yuancheng-xu.github.io;https://maxwelljryao.github.io/;https://azshue.github.io/;https://ycsun2017.github.io/home/index.html;;;https://www.cs.umd.edu/~tomg/;https://furong-huang.com", "dblp": ";;263/3503;132/6840;;;25/8184;72/8513", "google_scholar": "OPB0QgwAAAAJ;84fexSEAAAAJ;https://scholar.google.com/citations?hl=en;bloBY_QAAAAJ;;;KmSuVtgAAAAJ;13yyuCcAAAAJ", "orcid": ";;;0000-0002-1137-9939;;;;", "linkedin": "yuancheng-xu/;;manli-shu-a804a8164/;;zichu-wu-9a5495227/;;;", "or_profile": "~Yuancheng_Xu1;~Jiarui_Yao2;~Manli_Shu1;~Yanchao_Sun1;~Zichu_Wu1;~Ning_Yu2;~Tom_Goldstein1;~Furong_Huang1", "aff": "University of Maryland, College Park;Tsinghua University;Department of Computer Science, University of Maryland, College Park;J.P. Morgan AI Research;University of Waterloo;;University of Maryland, College Park;University of Maryland", "aff_domain": "umd.edu;tsinghua.edu.cn;cs.umd.edu;jpmchase.com;uwaterloo.ca;;umd.edu;cs.umd.edu", "position": "PhD student;Undergrad student;PhD student;Researcher;Undergrad student;;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nxu2024shadowcast,\ntitle={Shadowcast: Stealthy Data Poisoning Attacks Against Vision-Language Models},\nauthor={Yuancheng Xu and Jiarui Yao and Manli Shu and Yanchao Sun and Zichu Wu and Ning Yu and Tom Goldstein and Furong Huang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=JhqyeppMiD}\n}", "github": "", "reviewers": "u5mc;BULD;Tzvw;RqR6", "pdf_size": 9553625, "rating": "5;5;5;6", "confidence": "4;3;3;4", "soundness": "3;3;3;2", "novelty": "2;2;2;3", "presentation": "3;2;3;4", "wc_summary": "83;55;84;90", "wc_strengths": "23;62;28;49", "wc_weaknesses": "148;95;138;31", "wc_questions": "4;79;38;467", "wc_limitations": "4;30;1;43", "wc_review": "262;321;289;680", "wc_reply_reviewers": "32;55;103;40", "wc_reply_authors": "149;442;164;17", "reply_reviewers": "1;1;1;1", "reply_authors": "3;4;3;2", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 78.0, 13.546217184144066 ], "wc_strengths_avg": [ 40.5, 15.787653403846944 ], "wc_weaknesses_avg": [ 103.0, 46.09229870596605 ], "wc_questions_avg": [ 147.0, 186.65074336846345 ], "wc_limitations_avg": [ 19.5, 17.64227876437735 ], "wc_review_avg": [ 388.0, 169.87495401029545 ], "wc_reply_reviewers_avg": [ 57.5, 27.536339626028727 ], "wc_reply_authors_avg": [ 193.0, 154.72071613071083 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.0, 0.7071067811865476 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4897721346712184874&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "umd.edu;tsinghua.edu.cn;cs.umd.edu;jpmchase.com;uwaterloo.ca;;umd.edu;cs.umd.edu", "author_num": 8, "aff_unique_index": "0;1;2;3;4;0;0", "aff_unique_norm": "University of Maryland;Tsinghua University;University of Maryland, College Park;J.P. Morgan;University of Waterloo", "aff_unique_dep": ";;Department of Computer Science;AI Research;", "aff_unique_url": "https://www/umd.edu;https://www.tsinghua.edu.cn;https://www/umd.edu;https://www.jpmorgan.com;https://uwaterloo.ca", "aff_unique_abbr": "UMD;THU;UMD;JPM;UW", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "College Park;", "aff_country_unique_index": "0;1;0;0;2;0;0", "aff_country_unique": "United States;China;Canada" }, { "title": "Mutual Information Estimation via Normalizing Flows", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95704", "id": "JiQXsLvDls", "proceeding": "", "pdf": "https://openreview.net/pdf?id=JiQXsLvDls", "openreview": "https://openreview.net/forum?id=JiQXsLvDls", "poster": "", "project": "", "author_site": "Ivan Butakov, Aleksandr Tolmachev, Sofia Malanchuk, Anna Neopryatnaya, Alexey Frolov", "tldr": "", "abstract": "We propose a novel approach to the problem of mutual information (MI) estimation via introducing a family of estimators based on normalizing flows. The estimator maps original data to the target distribution, for which MI is easier to estimate. We additionally explore the target distributions with known closed-form expressions for MI. Theoretical guarantees are provided to demonstrate that our approach yields MI estimates for the original data. Experiments with high-dimensional data are conducted to highlight the practical advantages of the proposed method.", "keywords": "Normalizing flows;information theory;mutual information", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Ivan Butakov;Alexander Tolmachev;Sofia Malanchuk;Anna Neopryatnaya;Alexey Frolov", "authorids": "~Ivan_Butakov1;~Alexander_Tolmachev1;~Sofia_Malanchuk1;~Anna_Neopryatnaya1;~Alexey_Frolov1", "gender": ";M;F;F;M", "homepage": "https://github.com/VanessB;https://github.com/Alexandr-Tolmachev;https://github.com/DCAM-Fox;;https://faculty.skoltech.ru/people/alexeyfrolov", "dblp": "338/2406;328/7821;;330/3342;54/9671", "google_scholar": ";https://scholar.google.ru/citations?user=qyUGmKQAAAAJ;;;https://scholar.google.ru/citations?hl=ru", "orcid": "0000-0002-0424-6695;0009-0006-7711-3005;;;0000-0002-6734-0179", "linkedin": ";;;;", "or_profile": "~Ivan_Butakov1;~Alexander_Tolmachev1;~Sofia_Malanchuk1;~Anna_Neopryatnaya1;~Alexey_Frolov1", "aff": "Sirius University of Science and Technology;Skolkovo Institute of Science and Technology;Moscow Institute of Physics and Technology;Skolkovo Institute of Science and Technology;Skolkovo Institute of Science and Technology", "aff_domain": "siriusuniversity.ru;skoltech.ru;mipt.ru;skolkovotech.ru;skolkovotech.ru", "position": "MS student;MS student;MS student;Researcher;Full Professor", "bibtex": "@inproceedings{\nbutakov2024mutual,\ntitle={Mutual Information Estimation via Normalizing Flows},\nauthor={Ivan Butakov and Alexander Tolmachev and Sofia Malanchuk and Anna Neopryatnaya and Alexey Frolov},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=JiQXsLvDls}\n}", "github": "", "reviewers": "2t4W;m9Tu;4miK;cdSA", "pdf_size": 1248067, "rating": "4;7;7;7", "confidence": "4;4;4;4", "soundness": "3;3;4;3", "novelty": "3;4;3;3", "presentation": "2;4;4;3", "wc_summary": "72;439;79;26", "wc_strengths": "31;111;104;47", "wc_weaknesses": "434;260;114;43", "wc_questions": "5;231;43;18", "wc_limitations": "8;14;83;6", "wc_review": "550;1055;423;140", "wc_reply_reviewers": "0;40;57;54", "wc_reply_authors": "78;132;22;33", "reply_reviewers": "0;1;1;1", "reply_authors": "2;3;2;2", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 154.0, 165.7995778040463 ], "wc_strengths_avg": [ 73.25, 34.802119188348286 ], "wc_weaknesses_avg": [ 212.75, 149.7921476580131 ], "wc_questions_avg": [ 74.25, 91.52424542163678 ], "wc_limitations_avg": [ 27.75, 32.03416145304884 ], "wc_review_avg": [ 542.0, 331.2846208322988 ], "wc_reply_reviewers_avg": [ 37.75, 22.71976012197312 ], "wc_reply_authors_avg": [ 66.25, 43.37265843823733 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11756593064250539999&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "siriusuniversity.ru;skoltech.ru;mipt.ru;skolkovotech.ru;skolkovotech.ru", "author_num": 5, "aff_unique_index": "1;2;1;1", "aff_unique_norm": ";Skolkovo Institute of Science and Technology;Moscow Institute of Physics and Technology", "aff_unique_dep": ";;", "aff_unique_url": ";https://www.skoltech.ru;https://www.mipt.ru/en", "aff_unique_abbr": ";Skoltech;MIPT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1;1;1;1", "aff_country_unique": ";Russian Federation" }, { "title": "FACT or Fiction: Can Truthful Mechanisms Eliminate Federated Free Riding?", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95703", "id": "JiRGxrqHh0", "proceeding": "", "pdf": "https://openreview.net/pdf?id=JiRGxrqHh0", "openreview": "https://openreview.net/forum?id=JiRGxrqHh0", "poster": "", "project": "", "author_site": "Marco Bornstein, Amrit Singh Bedi, Abdirisak Mohamed, Furong Huang", "tldr": "", "abstract": "Standard federated learning (FL) approaches are vulnerable to the free-rider dilemma: participating agents can contribute little to nothing yet receive a well-trained aggregated model. While prior mechanisms attempt to solve the free-rider dilemma, none have addressed the issue of truthfulness. In practice, adversarial agents can provide false information to the server in order to cheat its way out of contributing to federated training. In an effort to make free-riding-averse federated mechanisms truthful, and consequently less prone to breaking down in practice, we propose FACT. FACT is the first federated mechanism that: (1) eliminates federated free riding by using a penalty system, (2) ensures agents provide truthful information by creating a competitive environment, and (3) encourages agent participation by offering better performance than training alone. Empirically, FACT avoids free-riding when agents are untruthful, and reduces agent loss by over 4x.", "keywords": "Federated Learning;Truthfulness;Free-Riding", "primary_area": "infrastructure", "supplementary_material": "/attachment/97edb8d124a32f8838c8ba201856924412bb484f.zip", "author": "Marco Bornstein;Amrit Bedi;Abdirisak Mohamed;Furong Huang", "authorids": "~Marco_Bornstein1;~Amrit_Bedi1;~Abdirisak_Mohamed1;~Furong_Huang1", "gender": "M;M;M;F", "homepage": "https://marcobornstein.github.io;https://sites.google.com/view/amritsinghbedi/home;;https://furong-huang.com", "dblp": "332/0431;176/2707.html;332/1919;72/8513", "google_scholar": ";91WLA6QAAAAJ;IaxIgBsAAAAJ;13yyuCcAAAAJ", "orcid": ";;;", "linkedin": ";;abdirisak-mohamed-0524981a/;", "or_profile": "~Marco_Bornstein1;~Amrit_Bedi1;~Abdirisak_Mohamed1;~Furong_Huang1", "aff": "University of Maryland, College Park;University of Maryland, College Park;Montgomery College;University of Maryland", "aff_domain": "umd.edu;umd.edu;montgomerycollege.edu;cs.umd.edu", "position": "PhD student;Researcher;Lecturer;Assistant Professor", "bibtex": "@inproceedings{\nbornstein2024fact,\ntitle={{FACT} or Fiction: Can Truthful Mechanisms Eliminate Federated Free Riding?},\nauthor={Marco Bornstein and Amrit Bedi and Abdirisak Mohamed and Furong Huang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=JiRGxrqHh0}\n}", "github": "", "reviewers": "dpi3;k1Mz;hdyq", "pdf_size": 3113943, "rating": "5;6;6", "confidence": "5;3;2", "soundness": "2;3;2", "novelty": "1;3;3", "presentation": "2;3;2", "wc_summary": "79;60;42", "wc_strengths": "79;62;52", "wc_weaknesses": "101;150;172", "wc_questions": "132;270;7", "wc_limitations": "96;59;1", "wc_review": "487;601;274", "wc_reply_reviewers": "0;155;19", "wc_reply_authors": "43;517;28", "reply_reviewers": "0;2;1", "reply_authors": "2;3;2", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 1.247219128924647 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.9428090415820634 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 60.333333333333336, 15.107025591499546 ], "wc_strengths_avg": [ 64.33333333333333, 11.14550233153366 ], "wc_weaknesses_avg": [ 141.0, 29.676028485406647 ], "wc_questions_avg": [ 136.33333333333334, 107.41301390220626 ], "wc_limitations_avg": [ 52.0, 39.098167049961134 ], "wc_review_avg": [ 454.0, 135.52121605121465 ], "wc_reply_reviewers_avg": [ 58.0, 69.02656493457187 ], "wc_reply_authors_avg": [ 196.0, 227.06386766722704 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.9449111825230683, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:DnbPGMkfKXIJ:scholar.google.com/&scioq=FACT+or+Fiction:+Can+Truthful+Mechanisms+Eliminate+Federated+Free+Riding%3F&hl=en&as_sdt=0,14", "gs_version_total": 6, "email": "umd.edu;umd.edu;montgomerycollege.edu;cs.umd.edu", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Maryland;Montgomery College", "aff_unique_dep": ";", "aff_unique_url": "https://www/umd.edu;https://www.montgomerycollege.edu", "aff_unique_abbr": "UMD;", "aff_campus_unique_index": "0;0", "aff_campus_unique": "College Park;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Distribution Guidance Network for Weakly Supervised Point Cloud Semantic Segmentation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95702", "id": "Jj2PEAZPWk", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Jj2PEAZPWk", "openreview": "https://openreview.net/forum?id=Jj2PEAZPWk", "poster": "/media/PosterPDFs/NeurIPS%202024/95702.png?t=1728899916.2419353", "project": "", "author_site": "Zhiyi Pan, Wei Gao, Shan Liu, Ge Li", "tldr": "", "abstract": "Despite alleviating the dependence on dense annotations inherent to fully supervised methods, weakly supervised point cloud semantic segmentation suffers from inadequate supervision signals. In response to this challenge, we introduce a novel perspective that imparts auxiliary constraints by regulating the feature space under weak supervision. Our initial investigation identifies which distributions accurately characterize the feature space, subsequently leveraging this priori to guide the alignment of the weakly supervised embeddings. Specifically, we analyze the superiority of the mixture of von Mises-Fisher distributions (moVMF) among several common distribution candidates. Accordingly, we develop a Distribution Guidance Network (DGNet), which comprises a weakly supervised learning branch and a distribution alignment branch. Leveraging reliable clustering initialization derived from the weakly supervised learning branch, the distribution alignment branch alternately updates the parameters of the moVMF and the network, ensuring alignment with the moVMF-defined latent space. Extensive experiments validate the rationality and effectiveness of our distribution choice and network design. Consequently, DGNet achieves state-of-the-art performance under multiple datasets and various weakly supervised settings.", "keywords": "distribution guidance;weak supervision;point cloud;semantic segmentation", "primary_area": "machine_vision", "supplementary_material": "", "author": "Zhiyi Pan;Wei Gao;Shan Liu;Ge Li", "authorids": "~Zhiyi_Pan1;~Wei_Gao12;~Shan_Liu2;~Ge_Li2", "gender": "M;M;F;M", "homepage": "http://panzhiyi.top/;https://gaowei262.github.io/;https://www.linkedin.com/in/shanliu/;https://dblp.org/pid/24/712-2.html", "dblp": "58/2784;28/2073-3;49/4215-1;24/712-2.html", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;KdXy-kgAAAAJ;bdBZ43wAAAAJ;", "orcid": ";0000-0001-7429-5495;0000-0002-1442-1207;", "linkedin": ";;shanliu/;", "or_profile": "~Zhiyi_Pan1;~Wei_Gao12;~Shan_Liu2;~Ge_Li2", "aff": "Peking University;Shenzhen Graduate School, Peking University ;Tencent Media Lab;Peking University Shenzhen Graduate School", "aff_domain": "pku.edu.cn;pku.edu.cn;tencent.com;pku.edu.cn", "position": "PhD student;Assistant Professor;Distinguished Scientist;Full Professor", "bibtex": "@inproceedings{\npan2024distribution,\ntitle={Distribution Guidance Network for Weakly Supervised Point Cloud Semantic Segmentation},\nauthor={Zhiyi Pan and Wei Gao and Shan Liu and Ge Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Jj2PEAZPWk}\n}", "github": "", "reviewers": "fVFW;bWaK;KHTz;ruvS", "pdf_size": 893606, "rating": "4;4;5;7", "confidence": "4;5;4;5", "soundness": "2;2;3;3", "novelty": "2;2;3;3", "presentation": "2;2;3;3", "wc_summary": "55;68;146;83", "wc_strengths": "32;185;94;42", "wc_weaknesses": "128;236;135;219", "wc_questions": "7;6;2;3", "wc_limitations": "6;21;10;1", "wc_review": "228;516;387;348", "wc_reply_reviewers": "24;126;0;84", "wc_reply_authors": "342;1091;275;216", "reply_reviewers": "1;1;0;1", "reply_authors": "3;4;3;2", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 88.0, 34.92134018046845 ], "wc_strengths_avg": [ 88.25, 60.615076507416866 ], "wc_weaknesses_avg": [ 179.5, 48.43810483493342 ], "wc_questions_avg": [ 4.5, 2.0615528128088303 ], "wc_limitations_avg": [ 9.5, 7.365459931328117 ], "wc_review_avg": [ 369.75, 102.77736861780419 ], "wc_reply_reviewers_avg": [ 58.5, 49.545433694741234 ], "wc_reply_authors_avg": [ 481.0, 354.99366191525166 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.0, 0.7071067811865476 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.40824829046386296, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11647598620017728437&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "pku.edu.cn;pku.edu.cn;tencent.com;pku.edu.cn", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Peking University;Tencent", "aff_unique_dep": ";Media Lab", "aff_unique_url": "http://www.pku.edu.cn;https://www.tencent.com", "aff_unique_abbr": "Peking U;Tencent", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Overcoming the Sim-to-Real Gap: Leveraging Simulation to Learn to Explore for Real-World RL", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95701", "id": "JjQl8hXJAS", "proceeding": "", "pdf": "https://openreview.net/pdf?id=JjQl8hXJAS", "openreview": "https://openreview.net/forum?id=JjQl8hXJAS", "poster": "/media/PosterPDFs/NeurIPS%202024/95701.png?t=1733525070.6577857", "project": "", "author_site": "Andrew Wagenmaker, Kevin Huang, Liyiming Ke, Kevin Jamieson, Abhishek Gupta", "tldr": "", "abstract": "In order to mitigate the sample complexity of real-world reinforcement learning, common practice is to first train a policy in a simulator where samples are cheap, and then deploy this policy in the real world, with the hope that it generalizes effectively. Such \\emph{direct sim2real} transfer is not guaranteed to succeed, however, and in cases where it fails, it is unclear how to best utilize the simulator. In this work, we show that in many regimes, while direct sim2real transfer may fail, we can utilize the simulator to learn a set of \\emph{exploratory} policies which enable efficient exploration in the real world. In particular, in the setting of low-rank MDPs, we show that coupling these exploratory policies with simple, practical approaches---least-squares regression oracles and naive randomized exploration---yields a polynomial sample complexity in the real world, an exponential improvement over direct sim2real transfer, or learning without access to a simulator. To the best of our knowledge, this is the first evidence that simulation transfer yields a provable gain in reinforcement learning in settings where direct sim2real transfer fails. We validate our theoretical results on several realistic robotic simulators and a real-world robotic sim2real task, demonstrating that transferring exploratory policies can yield substantial gains in practice as well.", "keywords": "reinforcement learning;sim2real transfer;exploration;sample complexity;naive exploration", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/d0608a0b1ebe1e8c439d47b9d48cc38186ea11da.zip", "author": "Andrew Wagenmaker;Kevin Huang;Liyiming Ke;Kevin Jamieson;Abhishek Gupta", "authorids": "~Andrew_Wagenmaker1;~Kevin_Huang2;~Liyiming_Ke1;~Kevin_Jamieson1;~Abhishek_Gupta1", "gender": "M;;F;M;M", "homepage": "https://wagenmaker.github.io;http://kevinhuang8.github.io;http://kayke.xyz/;;https://homes.cs.washington.edu/~abhgupta/", "dblp": "195/1036;;178/8670;85/10260;18/6404-4", "google_scholar": "ym8AZSIAAAAJ;;EhOtO3cAAAAJ;;1wLVDP4AAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Andrew_Wagenmaker1;~Kevin_Huang2;~Liyiming_Ke1;~Kevin_Jamieson1;~Abhishek_Gupta1", "aff": "University of Washington, Seattle;University of Washington;Paul G Allen School of Computer Science & Engineering, University of Washington;University of Washington;University of Washington", "aff_domain": "uw.edu;cs.washington.edu;cs.washington.edu;washington.edu;uw.edu", "position": "PhD student;PhD student;PhD student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nwagenmaker2024overcoming,\ntitle={Overcoming the Sim-to-Real Gap: Leveraging Simulation to Learn to Explore for Real-World {RL}},\nauthor={Andrew Wagenmaker and Kevin Huang and Liyiming Ke and Kevin Jamieson and Abhishek Gupta},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=JjQl8hXJAS}\n}", "github": "", "reviewers": "ceeA;3Mfq;emUx", "pdf_size": 13418384, "rating": "5;6;7", "confidence": "4;2;2", "soundness": "2;3;3", "novelty": "2;3;3", "presentation": "3;3;3", "wc_summary": "45;35;42", "wc_strengths": "23;109;37", "wc_weaknesses": "66;367;169", "wc_questions": "43;135;115", "wc_limitations": "3;9;12", "wc_review": "180;655;375", "wc_reply_reviewers": "12;50;43", "wc_reply_authors": "40;0;0", "reply_reviewers": "1;1;1", "reply_authors": "2;1;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 2.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 40.666666666666664, 4.189935029992179 ], "wc_strengths_avg": [ 56.333333333333336, 37.67698973585278 ], "wc_weaknesses_avg": [ 200.66666666666666, 124.90618701872039 ], "wc_questions_avg": [ 97.66666666666667, 39.50808637341081 ], "wc_limitations_avg": [ 8.0, 3.7416573867739413 ], "wc_review_avg": [ 403.3333333333333, 194.95013607472953 ], "wc_reply_reviewers_avg": [ 35.0, 16.51262143533445 ], "wc_reply_authors_avg": [ 13.333333333333334, 18.856180831641264 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8660254037844387, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13297653788614078023&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "uw.edu;cs.washington.edu;cs.washington.edu;washington.edu;uw.edu", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of Washington", "aff_unique_dep": "", "aff_unique_url": "https://www.washington.edu", "aff_unique_abbr": "UW", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Seattle;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "JkqrZqBO7d", "title": "PETRA: Parallel End-to-end Training with Reversible Architectures", "track": "main", "status": "Reject", "tldr": "", "abstract": "We propose a new training approach for reversible architectures that enhances computational efficiency by enabling parallel gradient computation across layers, named PETRA (Parallel End-to-end Training of Reversible Architecture). This method diverges from conventional back-propagation by employing an approximate inversion of activations that effectively preserves gradient quality. By reducing the reliance on synchronous operations, our approach achieves high parallelization with only a slight increase in communication overhead. We have tested this method on benchmark datasets including CIFAR-10, ImageNet-32, and ImageNet, and multiple revertible architectures, where PETRA achieved competitive performance with minimal accuracy loss compared to traditional non-revertible training methods. Our method offers a reduced memory footprint compared to delayed gradient or checkpointing techniques. Unlike pipelining strategies, it eliminates the occurrence of bubble effects, enhancing operational efficiency, while being more parallelizable.", "keywords": "Model parallelism;delayed gradient;reversible architecture;memory reduction", "primary_area": "optimization_for_deep_networks", "supplementary_material": "/attachment/b3e07a0fbeed946dc86937a7b9723757364c05f4.zip", "author": "Stephane Rivaud;Louis Fournier;Thomas Pumir;Eugene Belilovsky;Michael Eickenberg;Edouard Oyallon", "authorids": "~Stephane_Rivaud1;~Louis_Fournier1;~Thomas_Pumir1;~Eugene_Belilovsky1;~Michael_Eickenberg5;~Edouard_Oyallon1", "gender": "M;M;M;M;M;", "homepage": ";;;http://eugenium.github.io;http://eickenberg.github.io;", "dblp": ";276/1445;166/3746;42/11445;117/7268;", "google_scholar": "wK1ARdQAAAAJ;https://scholar.google.com/citations?view_op=list_works;He-W7wQAAAAJ;https://scholar.google.fr/citations?user=CffJDoEAAAAJ;GW0werQAAAAJ;", "orcid": ";;;;;", "linkedin": ";fournier-louis/;;;;", "or_profile": "~Stephane_Rivaud1;~Louis_Fournier1;~Thomas_Pumir1;~Eugene_Belilovsky1;~Michael_Eickenberg5;~Edouard_Oyallon1", "aff": "Universit\u00e9 Pierre et Marie Curie - Paris 6, Computer Science Lab - Pierre and Marie Curie University, Paris, France;Universit\u00e9 Pierre et Marie Curie - Paris 6, Sorbonne Universit\u00e9 - Facult\u00e9 des Sciences (Paris VI);;Concordia University, Montreal;Flatiron Institute;", "aff_domain": "isir.upmc.fr;isir.upmc.fr;;concordia.ca;flatironinstitute.org;", "position": "Postdoc;PhD student;;Assistant Professor;Researcher;", "bibtex": "@misc{\nanonymous2024petra,\ntitle={{PETRA}: Parallel End-to-end Training with Reversible Architectures},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=JkqrZqBO7d}\n}", "github": "", "project": "", "reviewers": "YFuR;NpfE;uNwa;E8Gz", "site": "https://openreview.net/forum?id=JkqrZqBO7d", "pdf_size": 853193, "rating": "3;3;3;5", "confidence": "4;5;3;3", "soundness": "1;3;2;3", "novelty": "2;2;2;3", "presentation": "3;4;3;3", "wc_summary": "88;67;27;113", "wc_strengths": "43;71;91;76", "wc_weaknesses": "144;149;162;130", "wc_questions": "44;133;79;6", "wc_limitations": "1;2;13;1", "wc_review": "320;422;372;326", "wc_reply_reviewers": "71;65;199;0", "wc_reply_authors": "96;67;220;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 73.75, 31.522809202226885 ], "wc_strengths_avg": [ 70.25, 17.36915369268175 ], "wc_weaknesses_avg": [ 146.25, 11.453711188955307 ], "wc_questions_avg": [ 65.5, 46.74665763453041 ], "wc_limitations_avg": [ 4.25, 5.0682837331783235 ], "wc_review_avg": [ 360.0, 41.060930335295616 ], "wc_reply_reviewers_avg": [ 83.75, 72.12965756192109 ], "wc_reply_authors_avg": [ 95.75, 79.73824364757478 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:HrqbYkRvo4gJ:scholar.google.com/&scioq=PETRA:+Parallel+End-to-end+Training+with+Reversible+Architectures&hl=en&as_sdt=0,33", "gs_version_total": 14, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Universit\u00e9 Pierre et Marie Curie - Paris 6;Concordia University;Flatiron Institute", "aff_unique_dep": "Computer Science Lab;;", "aff_unique_url": "https://www.upmc.fr;https://www.concordia.ca;https://flatironinstitute.org", "aff_unique_abbr": "UPMC;Concordia;Flatiron", "aff_campus_unique_index": "0;0;1", "aff_campus_unique": "Paris;Montreal;", "aff_country_unique_index": "0;0;1;2", "aff_country_unique": "France;Canada;United States" }, { "title": "LiveScene: Language Embedding Interactive Radiance Fields for Physical Scene Control and Rendering", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95700", "id": "Jkt42QYyEH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Jkt42QYyEH", "openreview": "https://openreview.net/forum?id=Jkt42QYyEH", "poster": "/media/PosterPDFs/NeurIPS%202024/95700.png?t=1730897257.956851", "project": "", "author_site": "Delin Qu, Qizhi Chen, Pingrui Zhang, Xianqiang Gao, Bin Zhao, Zhigang Wang, Dong Wang, Xuelong Li", "tldr": "", "abstract": "This paper scales object-level reconstruction to complex scenes, advancing interactive scene reconstruction. We introduce two datasets, OmniSim and InterReal, featuring 28 scenes with multiple interactive objects. To tackle the challenge of inaccurate interactive motion recovery in complex scenes, we propose LiveScene, a scene-level language-embedded interactive radiance field that efficiently reconstructs and controls multiple objects. By decomposing the interactive scene into local deformable fields, LiveScene enables separate reconstruction of individual object motions, reducing memory consumption. Additionally, our interaction-aware language embedding localizes individual interactive objects, allowing for arbitrary control using natural language. Our approach demonstrates significant superiority in novel view synthesis, interactive scene control, and language grounding performance through extensive experiments. Project page: https://livescenes.github.io.", "keywords": "Interactive Scene Reconstruction; Controllable NeRF; Language Embedding; Dataset;", "primary_area": "machine_vision", "supplementary_material": "/attachment/cf40e10f73ee58d7344e22ee35a359a728ba7672.zip", "author": "Delin Qu;Qizhi Chen;Pingrui Zhang;Xianqiang Gao;Bin Zhao;Zhigang Wang;Dong Wang;Xuelong Li", "authorids": "~Delin_Qu1;~Qizhi_Chen1;~Pingrui_Zhang1;~Xianqiang_Gao1;~Bin_Zhao7;~Zhigang_Wang3;~Dong_Wang1;~Xuelong_Li2", "gender": "M;M;F;M;M;M;M;M", "homepage": "https://delinqu.github.io/;https://github.com/Tavish9;https://github.com/zhangpingrui;https://goxq.github.io/;https://iopen.nwpu.edu.cn/info/1347/2105.htm;https://redwang.github.io/;;", "dblp": "73/2731;;364/9381.html;;73/4325-1.html;40/3934-28;l/XuelongLi;35/1989-2", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;;;oZSREOkAAAAJ;https://scholar.google.com.hk/citations?user=DQB0hqwAAAAJ;dasL9V4AAAAJ;ahUibskAAAAJ;cw3EaAYAAAAJ", "orcid": "0009-0003-8775-0380;;;;;;;", "linkedin": ";;;;;;;", "or_profile": "~Delin_Qu1;~Qizhi_Chen1;~Pingrui_Zhang1;~Xianqiang_Gao1;~Bin_Zhao7;~Dong_Wang1;~Xuelong_Li2;~Zhi.gang_Wang1", "aff": "Shanghai Artificial Intelligence Laboratory;Southern University of Science and Technology;University of Electronic Science and Technology of China;University of Science and Technology of China;Northwest Polytechnical University Xi'an;Shanghai AI Laboratory;Northwestern Polytechnical University;Shanghai AI Lab", "aff_domain": "pjlab.org.cn;sustech.edu.cn;uestc.edu.cn;ustc.edu.cn;nwpu.edu.cn;pjlab.org.cn;nwpu.edu.cn;pjlab.org.cn", "position": "Intern;Undergrad student;Undergrad student;PhD student;Associate Professor;Researcher;Full Professor;Researcher", "bibtex": "@inproceedings{\nqu2024livescene,\ntitle={LiveScene: Language Embedding Interactive Radiance Fields for Physical Scene Control and Rendering},\nauthor={Delin Qu and Qizhi Chen and Pingrui Zhang and Xianqiang Gao and Bin Zhao and Zhigang Wang and Dong Wang and Xuelong Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Jkt42QYyEH}\n}", "github": "", "reviewers": "aZ7V;a4DY;PajG;94zG", "pdf_size": 9345445, "rating": "5;6;6;7", "confidence": "4;4;3;3", "soundness": "2;3;3;3", "novelty": "3;3;3;3", "presentation": "1;3;3;3", "wc_summary": "68;169;64;30", "wc_strengths": "69;107;47;30", "wc_weaknesses": "236;62;224;30", "wc_questions": "4;51;79;87", "wc_limitations": "6;44;9;3", "wc_review": "383;433;423;180", "wc_reply_reviewers": "33;0;0;0", "wc_reply_authors": "19;0;0;0", "reply_reviewers": "1;0;0;0", "reply_authors": "2;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 82.75, 51.93926741878441 ], "wc_strengths_avg": [ 63.25, 28.795615985771168 ], "wc_weaknesses_avg": [ 138.0, 92.79008567729636 ], "wc_questions_avg": [ 55.25, 32.46825372575495 ], "wc_limitations_avg": [ 15.5, 16.590660023037056 ], "wc_review_avg": [ 354.75, 102.61182924010272 ], "wc_reply_reviewers_avg": [ 8.25, 14.289419162443238 ], "wc_reply_authors_avg": [ 4.75, 8.227241335952167 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.7071067811865476, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ApGM_Vj3qMAJ:scholar.google.com/&scioq=LiveScene:+Language+Embedding+Interactive+Radiance+Fields+for+Physical+Scene+Control+and+Rendering&hl=en&as_sdt=0,31", "gs_version_total": 0, "email": "pjlab.org.cn;sustech.edu.cn;uestc.edu.cn;ustc.edu.cn;nwpu.edu.cn;pjlab.org.cn;nwpu.edu.cn;pjlab.org.cn", "author_num": 8, "aff_unique_index": "0;1;2;3;4;5;6;7", "aff_unique_norm": "Shanghai Artificial Intelligence Laboratory;Southern University of Science and Technology;University of Electronic Science and Technology of China;University of Science and Technology of China;Northwest Polytechnical University;Shanghai AI Laboratory;Northwestern Polytechnical University;Shanghai AI Lab", "aff_unique_dep": ";;;;;;;", "aff_unique_url": "http://www.shailab.org/;https://www.sustech.edu.cn;https://www.uestc.edu.cn;http://www.ustc.edu.cn;http://www.nwpu.edu.cn;https://www.shanghai-ai-lab.com;https://www.nwpu.edu.cn;https://www.shanghaiailab.com", "aff_unique_abbr": "Shanghai AI Lab;SUSTech;UESTC;USTC;NWPU;SAIL;NWPU;SAIL", "aff_campus_unique_index": "1", "aff_campus_unique": ";Xi'an", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "The Implicit Bias of Gradient Descent on Separable Multiclass Data", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95699", "id": "JlWn80mTJi", "proceeding": "", "pdf": "https://openreview.net/pdf?id=JlWn80mTJi", "openreview": "https://openreview.net/forum?id=JlWn80mTJi", "poster": "", "project": "", "author_site": "Hrithik Ravi, Clay Scott, Daniel Soudry, Yutong Wang", "tldr": "", "abstract": "Implicit bias describes the phenomenon where optimization-based training algorithms, without explicit regularization, show a preference for simple estimators even when more complex estimators have equal objective values. Multiple works have developed the theory of implicit bias for binary classification under the assumption that the loss satisfies an *exponential tail property*. However, there is a noticeable gap in analysis for multiclass classification, with only a handful of results which themselves are restricted to the cross-entropy loss. In this work, we employ the framework of Permutation Equivariant and Relative Margin-based (PERM) losses [Wang and Scott, 2024] to introduce a multiclass extension of the exponential tail property. This class of losses includes not only cross-entropy but also other losses. Using this framework, we extend the implicit bias result of Soudry et al. [2018] to multiclass classification. Furthermore, our proof techniques closely mirror those of the binary case, thus illustrating the power of the PERM framework for bridging the binary-multiclass gap.", "keywords": "gradient descent;multiclass classification;hard-margin SVM;implicit bias", "primary_area": "learning_theory", "supplementary_material": "", "author": "Hrithik Ravi;Clayton Scott;Daniel Soudry;Yutong Wang", "authorids": "~Hrithik_Ravi1;~Clayton_Scott1;~Daniel_Soudry1;~Yutong_Wang1", "gender": "M;M;M;M", "homepage": ";https://soudry.github.io/;https://yutongwang.me/;http://web.eecs.umich.edu/~cscott/", "dblp": ";126/1779;90/3631;96/8859.html", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.co.il/citations?user=AEBWEm8AAAAJ;GH7ryE4AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0001-9368-6352;0000-0001-7472-6750;", "linkedin": "hrithik-ravi/;daniel-soudry-2aa3a88/;;", "or_profile": "~Hrithik_Ravi1;~Daniel_Soudry1;~Yutong_Wang1;~Clayton_D._Scott1", "aff": "University of Michigan - Ann Arbor;Technion - Israel Institute of Technology, Technion;University of Michigan - Ann Arbor;University of Michigan", "aff_domain": "umich.edu;technion.ac.il;umich.edu;umich.edu", "position": "MS student;Associate Professor;Postdoc;Professor", "bibtex": "@inproceedings{\nravi2024the,\ntitle={The Implicit Bias of Gradient Descent on Separable Multiclass Data},\nauthor={Hrithik Ravi and Clayton Scott and Daniel Soudry and Yutong Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=JlWn80mTJi}\n}", "github": "", "reviewers": "gcyJ;RrKo;47NU;AJJ5", "pdf_size": 1640616, "rating": "4;6;6;7", "confidence": "4;3;4;3", "soundness": "4;2;3;3", "novelty": "1;3;3;3", "presentation": "3;2;3;4", "wc_summary": "67;83;71;96", "wc_strengths": "35;30;75;101", "wc_weaknesses": "79;105;130;48", "wc_questions": "50;111;99;14", "wc_limitations": "26;30;1;1", "wc_review": "257;359;376;260", "wc_reply_reviewers": "126;0;10;15", "wc_reply_authors": "0;34;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 79.25, 11.321991874224253 ], "wc_strengths_avg": [ 60.25, 29.286302258905955 ], "wc_weaknesses_avg": [ 90.5, 30.450779957170226 ], "wc_questions_avg": [ 68.5, 38.88765871070152 ], "wc_limitations_avg": [ 14.5, 13.573871960498227 ], "wc_review_avg": [ 313.0, 54.84067833278505 ], "wc_reply_reviewers_avg": [ 37.75, 51.236583609760714 ], "wc_reply_authors_avg": [ 8.5, 14.722431864335457 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.6882472016116854, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13946176383300688989&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "umich.edu;technion.ac.il;umich.edu;umich.edu", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Michigan;Technion - Israel Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.umich.edu;https://www.technion.ac.il", "aff_unique_abbr": "UM;Technion", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Ann Arbor;", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;Israel" }, { "title": "VLG-CBM: Training Concept Bottleneck Models with Vision-Language Guidance", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95698", "id": "Jm2aK3sDJD", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Jm2aK3sDJD", "openreview": "https://openreview.net/forum?id=Jm2aK3sDJD", "poster": "", "project": "", "author_site": "Divyansh Srivastava, Ge Yan, Lily Weng", "tldr": "", "abstract": "Concept Bottleneck Models (CBMs) provide interpretable prediction by introducing an intermediate Concept Bottleneck Layer (CBL), which encodes human-understandable concepts to explain models' decision. Recent works proposed to utilize Large Language Models and pre-trained Vision-Language Models to automate the training of CBMs, making it more scalable and automated. However, existing approaches still fall short in two aspects: First, the concepts predicted by CBL often mismatch the input image, raising doubts about the faithfulness of interpretation. Second, it has been shown that concept values encode unintended information: even a set of random concepts could achieve comparable test accuracy to state-of-the-art CBMs. To address these critical limitations, in this work, we propose a novel framework called Vision-Language-Guided Concept Bottleneck Model (VLG-CBM) to enable faithful interpretability with the benefits of boosted performance. Our method leverages off-the-shelf open-domain grounded object detectors to provide visually grounded concept annotation, which largely enhances the faithfulness of concept prediction while further improving the model performance. In addition, we propose a new metric called Number of Effective Concepts (NEC) to control the information leakage and provide better interpretability. Extensive evaluations across five standard benchmarks show that our method, VLG-CBM, outperforms existing methods by at least 4.27\\% and up to 51.09\\% on *Accuracy at NEC=5* (denoted as ANEC-5), and by at least 0.45\\% and up to 29.78\\% on *average accuracy* (denoted as ANEC-avg), while preserving both faithfulness and interpretability of the learned concepts as demonstrated in extensive experiments.", "keywords": "Interpretable machine learning;concept bottleneck models", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Divyansh Srivastava;Ge Yan;Tsui-Wei Weng", "authorids": "~Divyansh_Srivastava1;~Ge_Yan2;~Tsui-Wei_Weng1", "gender": "M;;F", "homepage": ";https://windymount.github.io;https://lilywenglab.github.io", "dblp": "250/5706;;177/9197", "google_scholar": "kw6DWjsAAAAJ;;v8GM4xoAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Divyansh_Srivastava1;~Ge_Yan2;~Tsui-Wei_Weng1", "aff": "University of California, San Diego;University of California, San Diego;University of California, San Diego", "aff_domain": "ucsd.edu;ucsd.edu;ucsd.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nsrivastava2024vlgcbm,\ntitle={{VLG}-{CBM}: Training Concept Bottleneck Models with Vision-Language Guidance},\nauthor={Divyansh Srivastava and Ge Yan and Tsui-Wei Weng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Jm2aK3sDJD}\n}", "github": "", "reviewers": "QueL;i5YZ;UnrN;hsJW;MvHs", "pdf_size": 16194562, "rating": "4;5;5;5;7", "confidence": "4;4;3;3;4", "soundness": "2;3;3;3;3", "novelty": "2;2;2;3;3", "presentation": "3;3;3;3;4", "wc_summary": "79;106;83;48;155", "wc_strengths": "20;88;130;53;115", "wc_weaknesses": "515;137;134;185;127", "wc_questions": "15;115;2;15;129", "wc_limitations": "8;16;2;7;5", "wc_review": "637;462;351;308;531", "wc_reply_reviewers": "57;287;0;0;0", "wc_reply_authors": "2120;1605;0;165;0", "reply_reviewers": "1;2;0;0;0", "reply_authors": "4;5;1;2;1", "rating_avg": [ 5.2, 0.9797958971132712 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 94.2, 35.57189902155914 ], "wc_strengths_avg": [ 81.2, 40.27604747241218 ], "wc_weaknesses_avg": [ 219.6, 149.11954935554223 ], "wc_questions_avg": [ 55.2, 54.92686046006999 ], "wc_limitations_avg": [ 7.6, 4.673328578219168 ], "wc_review_avg": [ 457.8, 119.4443803617399 ], "wc_reply_reviewers_avg": [ 68.8, 111.31109558350417 ], "wc_reply_authors_avg": [ 778.0, 902.3558056553967 ], "reply_reviewers_avg": [ 0.6, 0.7999999999999999 ], "reply_authors_avg": [ 2.6, 1.624807680927192 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.16666666666666663, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12727032080121518044&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "ucsd.edu;ucsd.edu;ucsd.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of California, San Diego", "aff_unique_dep": "", "aff_unique_url": "https://www.ucsd.edu", "aff_unique_abbr": "UCSD", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "San Diego", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "What Makes Partial-Label Learning Algorithms Effective?", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95697", "id": "JpqEzPTuv6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=JpqEzPTuv6", "openreview": "https://openreview.net/forum?id=JpqEzPTuv6", "poster": "", "project": "", "author_site": "Jiaqi Lv, Yangfan Liu, Shiyu Xia, Ning Xu, Miao Xu, Gang Niu, Min-Ling Zhang, Masashi Sugiyama, Xin Geng", "tldr": "", "abstract": "A partial label (PL) specifies a set of candidate labels for an instance and partial-label learning (PLL) trains multi-class classifiers with PLs.\nRecently, many methods that incorporate techniques from other domains have shown strong potential.\nThe expectation that stronger techniques would enhance performance has resulted in prominent PLL methods becoming not only highly complicated but also quite different from one another, making it challenging to choose the best direction for future algorithm design.\nWhile it is exciting to see higher performance, this leaves open a fundamental question: what makes a PLL method effective?\nWe present a comprehensive empirical analysis of this question and summarize the success of PLL so far into some minimal algorithm design principles.\nOur findings reveal that high accuracy on benchmark-simulated datasets with PLs can misleadingly amplify the perceived effectiveness of some general techniques, which may improve representation learning but have limited impact on addressing the inherent challenges of PLs. \nWe further identify the common behavior among successful PLL methods as a progressive transition from uniform to one-hot pseudo-labels, highlighting the critical role of mini-batch PL purification in achieving top performance.\nBased on our findings, we introduce a minimal working algorithm that is surprisingly simple yet effective, and propose an improved strategy to implement the design principles, suggesting a promising direction for improvements in PLL.", "keywords": "partial-label learning;algorithm design principles", "primary_area": "other", "supplementary_material": "", "author": "Jiaqi Lv;Yangfan Liu;Shiyu Xia;Ning Xu;Miao Xu;Gang Niu;Min-Ling Zhang;Masashi Sugiyama;Xin Geng", "authorids": "~Jiaqi_Lv1;~Yangfan_Liu1;~Shiyu_Xia1;~Ning_Xu5;~Miao_Xu3;~Gang_Niu1;~Min-Ling_Zhang2;~Masashi_Sugiyama1;~Xin_Geng1", "gender": "F;M;M;M;F;M;M;M;M", "homepage": ";https://palm.seu.edu.cn/homepage/liuyangfan/index.html;http://palm.seu.edu.cn/homepage/xiashiyu/demo/index.html;http://palm.seu.edu.cn/xuning/;https://researchers.uq.edu.au/researcher/26509;https://niug1984.github.io;http://palm.seu.edu.cn/zhangml/;http://www.ms.k.u-tokyo.ac.jp/sugi/;http://palm.seu.edu.cn/xgeng/index.htm", "dblp": "191/9417;;294/8485;04/5856-9;;26/3367-1;84/271.html;35/1228;", "google_scholar": "PK8L9mYAAAAJ;;https://scholar.google.com/citations?hl=en;;vqpIjSAAAAAJ;https://scholar.google.co.jp/citations?user=HOkcy00AAAAJ;uFHCIM0AAAAJ;https://scholar.google.co.jp/citations?user=GkYIrlIAAAAJ;ZOCxkIcAAAAJ", "orcid": ";;;;0000-0001-9409-6960;;0000-0003-1880-5918;0000-0001-6658-6743;", "linkedin": ";;;;;;;;", "or_profile": "~Jiaqi_Lv1;~Yangfan_Liu1;~Shiyu_Xia1;~Ning_Xu5;~Miao_Xu3;~Gang_Niu1;~Min-Ling_Zhang2;~Masashi_Sugiyama1;~Xin_Geng1", "aff": "Southeast University;Southeast University;Southeast University;Southeast University;University of Queensland;Southeast University;Southeast University;The University of Tokyo;Southeast University, China", "aff_domain": "seu.edu.cn;seu.edu.cn;seu.edu.cn;seu.edu.cn;uq.edu.au;seu.edu.cn;seu.edu.cn;u-tokyo.ac.jp;seu.edu.cn", "position": "Associate Professor;MS student;PhD student;Associate Professor;Lecturer;Adjunct Full Professor;Full Professor;Full Professor;Professor", "bibtex": "@inproceedings{\nlv2024what,\ntitle={What Makes Partial-Label Learning Algorithms Effective?},\nauthor={Jiaqi Lv and Yangfan Liu and Shiyu Xia and Ning Xu and Miao Xu and Gang Niu and Min-Ling Zhang and Masashi Sugiyama and Xin Geng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=JpqEzPTuv6}\n}", "github": "", "reviewers": "b5oN;RRwY;TEwD;ygK7;PELT", "pdf_size": 604777, "rating": "6;6;6;7;7", "confidence": "4;4;4;5;4", "soundness": "3;3;3;4;3", "novelty": "2;3;2;3;3", "presentation": "3;3;3;4;3", "wc_summary": "72;75;67;66;45", "wc_strengths": "33;26;73;39;38", "wc_weaknesses": "54;111;294;90;74", "wc_questions": "55;150;6;2;48", "wc_limitations": "25;1;9;1;1", "wc_review": "239;363;449;198;206", "wc_reply_reviewers": "11;108;20;5;46", "wc_reply_authors": "46;379;257;8;16", "reply_reviewers": "1;2;1;1;1", "reply_authors": "3;4;3;2;2", "rating_avg": [ 6.4, 0.48989794855663565 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 65.0, 10.526157893552615 ], "wc_strengths_avg": [ 41.8, 16.26530048907797 ], "wc_weaknesses_avg": [ 124.6, 86.74468283416569 ], "wc_questions_avg": [ 52.2, 53.37564987894761 ], "wc_limitations_avg": [ 7.4, 9.329523031752482 ], "wc_review_avg": [ 291.0, 98.71777955363461 ], "wc_reply_reviewers_avg": [ 38.0, 37.6988063471511 ], "wc_reply_authors_avg": [ 141.2, 149.95919444969022 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 2.8, 0.7483314773547882 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.6123724356957945, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2899261675321285021&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "seu.edu.cn;seu.edu.cn;seu.edu.cn;seu.edu.cn;uq.edu.au;seu.edu.cn;seu.edu.cn;u-tokyo.ac.jp;seu.edu.cn", "author_num": 9, "aff_unique_index": "0;0;0;0;1;0;0;2;0", "aff_unique_norm": "Southeast University;University of Queensland;University of Tokyo", "aff_unique_dep": ";;", "aff_unique_url": "https://www.seu.edu.cn/;https://www.uq.edu.au;https://www.u-tokyo.ac.jp", "aff_unique_abbr": "SEU;UQ;UTokyo", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;0;0;2;0", "aff_country_unique": "China;Australia;Japan" }, { "title": "Resfusion: Denoising Diffusion Probabilistic Models for Image Restoration Based on Prior Residual Noise", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95696", "id": "JrIPBXWiS8", "proceeding": "", "pdf": "https://openreview.net/pdf?id=JrIPBXWiS8", "openreview": "https://openreview.net/forum?id=JrIPBXWiS8", "poster": "/media/PosterPDFs/NeurIPS%202024/95696.png?t=1728111883.0962336", "project": "", "author_site": "Zhenning Shi, haoshuai zheng, Chen Xu, Changsheng Dong, Bin Pan, Xie xueshuo, Along He, Tao Li, Huazhu Fu", "tldr": "", "abstract": "Recently, research on denoising diffusion models has expanded its application to the field of image restoration. Traditional diffusion-based image restoration methods utilize degraded images as conditional input to effectively guide the reverse generation process, without modifying the original denoising diffusion process. However, since the degraded images already include low-frequency information, starting from Gaussian white noise will result in increased sampling steps. We propose Resfusion, a general framework that incorporates the residual term into the diffusion forward process, starting the reverse process directly from the noisy degraded images. The form of our inference process is consistent with the DDPM. We introduced a weighted residual noise, named resnoise, as the prediction target and explicitly provide the quantitative relationship between the residual term and the noise term in resnoise. By leveraging a smooth equivalence transformation, Resfusion determine the optimal acceleration step and maintains the integrity of existing noise schedules, unifying the training and inference processes. The experimental results demonstrate that Resfusion exhibits competitive performance on ISTD dataset, LOL dataset and Raindrop dataset with only five sampling steps. Furthermore, Resfusion can be easily applied to image generation and emerges with strong versatility. Our code and model are available at https://github.com/nkicsl/Resfusion.", "keywords": "Diffusion based models; Image restoration; Shadow removal; Low-light enhancement; Deraining", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/c1923520da2479df64a4b3eff1f6220a3b4b797d.zip", "author": "Zhenning Shi;Haoshuai Zheng;Chen Xu;Changsheng Dong;Bin Pan;Xie xueshuo;Along He;Tao Li;Huazhu Fu", "authorids": "~Zhenning_Shi1;~Haoshuai_Zheng1;~Chen_Xu26;~Changsheng_Dong1;~Bin_Pan1;~Xie_xueshuo1;~Along_He1;~Tao_Li10;~Huazhu_Fu4", "gender": "M;M;M;M;M;;M;M;M", "homepage": "https://www.flashszn.cn/;;https://xdeadlocked.github.io;https://webvpn.nankai.edu.cn/;;;http://ics.nankai.edu.cn;https://hzfu.github.io;", "dblp": ";;;;;244/2466;75/4601-22;63/7767;243/9296.html", "google_scholar": "https://scholar.google.com.hk/citations?user=IJiK74oAAAAJ;;;;sgWhbbAAAAAJ;9dwOZYwAAAAJ;;https://scholar.google.com/citations?hl=en;ybe7pW8AAAAJ", "orcid": ";;;;;;;0000-0002-9702-5524;", "linkedin": ";%E8%B1%AA%E5%B8%85-%E9%83%91-ab85b823b/;;;;;;;", "or_profile": "~Zhenning_Shi1;~Haoshuai_Zheng1;~Chen_Xu26;~Changsheng_Dong1;~Bin_Pan1;~Xie_xueshuo1;~Tao_Li10;~Huazhu_Fu4;~He_along1", "aff": "Nankai University;Nankai University;Nankai University;Nankai University;;;Nankai University;Institute of High Performance Computing, Singapore, A*STAR;", "aff_domain": "nankai.edu.cn;nankai.edu.cn;nankai.edu.cn;nku.nankai.edu.cn;;;nankai.edu.cn;ihpc.a-star.edu.sg;", "position": "PhD student;MS student;Undergrad student;MS student;;;Full Professor;Principal Scientist;", "bibtex": "@inproceedings{\nshi2024resfusion,\ntitle={Resfusion: Denoising Diffusion Probabilistic Models for Image Restoration Based on Prior Residual Noise},\nauthor={Zhenning Shi and Haoshuai Zheng and Chen Xu and Changsheng Dong and Bin Pan and Xie xueshuo and Along He and Tao Li and Huazhu Fu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=JrIPBXWiS8}\n}", "github": "", "reviewers": "4Jy9;DA7x;KnJY;vAm8", "pdf_size": 13372600, "rating": "5;5;6;6", "confidence": "4;3;4;3", "soundness": "3;2;3;3", "novelty": "3;2;3;3", "presentation": "2;2;3;2", "wc_summary": "91;54;35;333", "wc_strengths": "28;48;29;44", "wc_weaknesses": "149;57;58;141", "wc_questions": "18;2;3;63", "wc_limitations": "4;2;7;1", "wc_review": "290;163;132;582", "wc_reply_reviewers": "148;9;0;33", "wc_reply_authors": "180;32;0;12", "reply_reviewers": "2;1;0;1", "reply_authors": "3;2;1;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 128.25, 119.91533471579021 ], "wc_strengths_avg": [ 37.25, 8.870597499605086 ], "wc_weaknesses_avg": [ 101.25, 43.84275880918079 ], "wc_questions_avg": [ 21.5, 24.78406746278746 ], "wc_limitations_avg": [ 3.5, 2.29128784747792 ], "wc_review_avg": [ 291.75, 177.72503340835246 ], "wc_reply_reviewers_avg": [ 47.5, 59.26423879541523 ], "wc_reply_authors_avg": [ 56.0, 72.49827584156743 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14737424621707919386&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 3, "email": "nankai.edu.cn;nankai.edu.cn;nankai.edu.cn;nku.nankai.edu.cn;;;nankai.edu.cn;ihpc.a-star.edu.sg;", "author_num": 9, "aff_unique_index": "0;0;0;0;0;1", "aff_unique_norm": "Nankai University;Institute of High Performance Computing", "aff_unique_dep": ";", "aff_unique_url": "http://www.nankai.edu.cn;https://www.ihpc.a-star.edu.sg", "aff_unique_abbr": "NKU;IHPC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;1", "aff_country_unique": "China;Singapore" }, { "title": "Enhancing vision-language models for medical imaging: bridging the 3D gap with innovative slice selection", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97755", "id": "JrJW21IP9p", "proceeding": "", "pdf": "https://openreview.net/pdf?id=JrJW21IP9p", "openreview": "https://openreview.net/forum?id=JrJW21IP9p", "poster": "/media/PosterPDFs/NeurIPS%202024/97755.png?t=1731903814.032592", "project": "", "author_site": "Yuli Wang, Peng jian, Yuwei Dai, Craig Jones, Haris Sair, Jinglai Shen, Nicolas Loizou, jing wu, Wen-Chi Hsu, Maliha Imami, Zhicheng Jiao, Paul Zhang, Harrison Bai", "tldr": "", "abstract": "Recent approaches to vision-language tasks are built on the remarkable capabilities of large vision-language models (VLMs). These models excel in zero-shot and few-shot learning, enabling them to learn new tasks without parameter updates. However, their primary challenge lies in their design, which primarily accommodates 2D input, thus limiting their effectiveness for medical images, particularly radiological images like MRI and CT, which are typically 3D. To bridge the gap between state-of-the-art 2D VLMs and 3D medical image data, we developed an innovative, one-pass, unsupervised representative slice selection method called Vote-MI, which selects representative 2D slices from 3D medical imaging. To evaluate the effectiveness of vote-MI when implemented with VLMs, we introduce BrainMD, a robust, multimodal dataset comprising 2,453 annotated 3D MRI brain scans with corresponding textual radiology reports and electronic health records. Based on BrainMD, we further develop two benchmarks, BrainMD-select (including the most representative 2D slice of 3D image) and BrainBench (including various vision-language downstream tasks). Extensive experiments on the BrainMD dataset and its two corresponding benchmarks demonstrate that our representative selection method significantly improves performance in zero-shot and few-shot learning tasks. On average, Vote-MI achieves a 14.6\\% and 16.6\\% absolute gain for zero-shot and few-shot learning, respectively, compared to randomly selecting examples. Our studies represent a significant step toward integrating AI in medical imaging to enhance patient care and facilitate medical research. We hope this work will serve as a foundation for data selection as vision-language models are increasingly applied to new tasks.", "keywords": "Vision language model;representative slice selection;brain tumor dataset", "primary_area": "", "supplementary_material": "/attachment/2381ac4837c341d9c9fa5e60bed79f1aa29b9d43.pdf", "author": "Yuli Wang;Peng jian;Yuwei Dai;Craig Jones;Haris I. Sair;Jinglai Shen;Nicolas Loizou;jing wu;Wen-Chi Hsu;Maliha Rubaiyat Imami;Zhicheng Jiao;Paul J Zhang;Harrison Bai", "authorids": "~Yuli_Wang1;~Peng_jian1;~Yuwei_Dai1;~Craig_Jones2;~Haris_I._Sair1;~Jinglai_Shen1;~Nicolas_Loizou1;~jing_wu12;~Wen-Chi_Hsu1;~Maliha_Rubaiyat_Imami1;~Zhicheng_Jiao1;~Paul_J_Zhang1;~Harrison_Bai1", "gender": "M;M;F;M;M;M;M;F;M;F;M;;M", "homepage": ";;;;;https://jinglai-shen.github.io;https://nicolasloizou.github.io/;;;;https://sites.brown.edu/airadiology/people/;;", "dblp": ";;;;;;173/4958;;;;;;", "google_scholar": "Ceb-6xkAAAAJ;;;;DcUpmR8AAAAJ;;https://scholar.google.co.uk/citations?user=mvDmzAQAAAAJ;;;https://scholar.google.ca/citations?user=gbV0xY4AAAAJ;lQoZLmkAAAAJ;;Kh96AEArs2gC", "orcid": ";0000-0002-0264-5633;0000-0003-4789-0242;0000-0002-0629-3006;;;;0009-0002-7509-9781;0000-0002-8303-4382;;0000-0002-6968-0919;;", "linkedin": ";;;;;;;;;;;;", "or_profile": "~Yuli_Wang1;~Peng_jian1;~Yuwei_Dai1;~Craig_Jones2;~Haris_I._Sair1;~Jinglai_Shen1;~Nicolas_Loizou1;~jing_wu12;~Wen-Chi_Hsu1;~Maliha_Rubaiyat_Imami1;~Zhicheng_Jiao1;~Paul_J_Zhang1;~Harrison_Bai1", "aff": "Johns Hopkins University;Central South University;Central South University;Johns Hopkins University;Johns Hopkins University;University of Maryland, Baltimore County;Johns Hopkins University;Central South University;Johns Hopkins University;University of Toronto;Brown University;;Johns Hopkins University", "aff_domain": "jh.edu;csu.edu.cn;csu.edu.cn;jhu.edu;jhu.edu;umbc.edu;jhu.edu;csu.edu.cn;jh.edu;utoronto.ca;brown.edu;;jh.edu", "position": "PhD student;PhD student;PhD student;Assistant Professor;Faculty;Full Professor;Assistant Professor;Lecturer;Postdoc;Undergrad student;Assistant Professor;;Associate Professor", "bibtex": "@inproceedings{\nwang2024enhancing,\ntitle={Enhancing vision-language models for medical imaging: bridging the 3D gap with innovative slice selection},\nauthor={Yuli Wang and Peng jian and Yuwei Dai and Craig Jones and Haris I. Sair and Jinglai Shen and Nicolas Loizou and jing wu and Wen-Chi Hsu and Maliha Rubaiyat Imami and Zhicheng Jiao and Paul J Zhang and Harrison Bai},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=JrJW21IP9p}\n}", "github": "", "reviewers": "7Ndp;3dm4;Y3Xm", "pdf_size": 2316677, "rating": "5;6;8", "confidence": "4;4;4", "wc_summary_and_contributions": "86;64;97", "wc_strengths": "71;79;114", "wc_improvement": "255;41;168", "wc_limitations": "15;83;1", "wc_correctness": "8;17;1", "wc_clarity": "3;17;1", "wc_relation_to_prior_work": "11;15;11", "wc_documentation": "22;20;1", "wc_additional_feedback": "1;1;1", "wc_review": "472;337;395", "wc_reply_reviewers": "0;35;37", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 4.0, 0.0 ], "wc_summary_and_contributions_avg": [ 82.33333333333333, 13.719410418171117 ], "wc_strengths_avg": [ 88.0, 18.672618098881223 ], "wc_improvement_avg": [ 154.66666666666666, 87.87238221167989 ], "wc_limitations_avg": [ 33.0, 35.81433604950212 ], "wc_correctness_avg": [ 8.666666666666666, 6.548960901462833 ], "wc_clarity_avg": [ 7.0, 7.118052168020874 ], "wc_relation_to_prior_work_avg": [ 12.333333333333334, 1.8856180831641267 ], "wc_documentation_avg": [ 14.333333333333334, 9.46337971105226 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 401.3333333333333, 55.29516756060655 ], "wc_reply_reviewers_avg": [ 24.0, 16.990193249832878 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 13, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2295953366577255289&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "jh.edu;csu.edu.cn;csu.edu.cn;jhu.edu;jhu.edu;umbc.edu;jhu.edu;csu.edu.cn;jh.edu;utoronto.ca;brown.edu;;jh.edu", "author_num": 13, "aff_unique_index": "0;1;1;0;0;2;0;1;0;3;4;0", "aff_unique_norm": "Johns Hopkins University;Central South University;University of Maryland, Baltimore County;University of Toronto;Brown University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.jhu.edu;https://www.csu.edu.cn;https://www.umbc.edu;https://www.utoronto.ca;https://www.brown.edu", "aff_unique_abbr": "JHU;CSU;UMBC;U of T;Brown", "aff_campus_unique_index": "1", "aff_campus_unique": ";Baltimore County", "aff_country_unique_index": "0;1;1;0;0;0;0;1;0;2;0;0", "aff_country_unique": "United States;China;Canada" }, { "title": "Online Control with Adversarial Disturbance for Continuous-time Linear Systems", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95695", "id": "JrYdk3HEnc", "proceeding": "", "pdf": "https://openreview.net/pdf?id=JrYdk3HEnc", "openreview": "https://openreview.net/forum?id=JrYdk3HEnc", "poster": "/media/PosterPDFs/NeurIPS%202024/95695.png?t=1729084856.614769", "project": "", "author_site": "Jingwei Li, Jing Dong, Can Chang, Baoxiang Wang, Jingzhao Zhang", "tldr": "", "abstract": "We study online control for continuous-time linear systems with finite sampling rates, where the objective is to design an online procedure that learns under non-stochastic noise and performs comparably to a fixed optimal linear controller. \nWe present a novel two-level online algorithm, by integrating a higher-level learning strategy and a lower-level feedback control strategy. This method offers a practical and robust solution for online control, which achieves sublinear regret. Our work provides the first nonasymptotic results for controlling continuous-time linear systems with finite number of interactions with the system. Moreover, we examine how to train an agent in domain randomization environments from a non-stochastic control perspective. By applying our method to the SAC (Soft Actor-Critic) algorithm, we achieved improved results in multiple reinforcement learning tasks within domain randomization environments. Our work provides new insights into non-asymptotic analyses of controlling continuous-time systems. Furthermore, our work brings practical intuition into controller learning under non-stochastic environments.", "keywords": "control and dynamical system;online learning;continuous-time linear system.", "primary_area": "online_learning", "supplementary_material": "", "author": "Jingwei Li;Jing Dong;Can Chang;Baoxiang Wang;Jingzhao Zhang", "authorids": "~Jingwei_Li2;~Jing_Dong3;~Can_Chang1;~Baoxiang_Wang1;~Jingzhao_Zhang2", "gender": ";F;M;;M", "homepage": ";;https://github.com/cititude;;https://sites.google.com/view/jingzhao/home", "dblp": ";;324/0128;;220/5559", "google_scholar": ";g5pYW10AAAAJ;;;8NudxYsAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Jingwei_Li2;~Jing_Dong3;~Can_Chang1;~Baoxiang_Wang1;~Jingzhao_Zhang2", "aff": ";The Chinese University of Hong Kong, Shenzhen;Tsinghua University;;Tsinghua University", "aff_domain": ";cuhk.edu.cn;tsinghua.edu.cn;;mail.tsinghua.edu.cn", "position": ";PhD student;PhD student;;Assistant Professor", "bibtex": "@inproceedings{\nli2024online,\ntitle={Online Control with Adversarial Disturbance for Continuous-time Linear Systems},\nauthor={Jingwei Li and Jing Dong and Can Chang and Baoxiang Wang and Jingzhao Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=JrYdk3HEnc}\n}", "github": "", "reviewers": "3oRE;H9Pv;ks8t;2dzb", "pdf_size": 2450880, "rating": "4;6;6;7", "confidence": "4;4;3;4", "soundness": "2;2;3;3", "novelty": "2;3;3;3", "presentation": "3;2;2;3", "wc_summary": "77;53;41;71", "wc_strengths": "59;32;30;40", "wc_weaknesses": "175;299;64;39", "wc_questions": "376;122;12;255", "wc_limitations": "17;77;28;29", "wc_review": "704;583;175;434", "wc_reply_reviewers": "198;274;0;48", "wc_reply_authors": "161;149;0;22", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;1;2", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 60.5, 14.309088021254185 ], "wc_strengths_avg": [ 40.25, 11.453711188955307 ], "wc_weaknesses_avg": [ 144.25, 102.9693522364786 ], "wc_questions_avg": [ 191.25, 137.04264847119674 ], "wc_limitations_avg": [ 37.75, 23.14492384951828 ], "wc_review_avg": [ 474.0, 197.34614260228145 ], "wc_reply_reviewers_avg": [ 130.0, 110.66164647247935 ], "wc_reply_authors_avg": [ 83.0, 72.54309064273454 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.13245323570650439, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ExNsaqBq6OoJ:scholar.google.com/&scioq=Online+Control+with+Adversarial+Disturbance+for+Continuous-time+Linear+Systems&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": ";cuhk.edu.cn;tsinghua.edu.cn;;mail.tsinghua.edu.cn", "author_num": 5, "aff_unique_index": "0;1;1", "aff_unique_norm": "Chinese University of Hong Kong;Tsinghua University", "aff_unique_dep": ";", "aff_unique_url": "https://www.cuhk.edu.cn;https://www.tsinghua.edu.cn", "aff_unique_abbr": "CUHK;THU", "aff_campus_unique_index": "0", "aff_campus_unique": "Shenzhen;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Few-Shot Diffusion Models Escape the Curse of Dimensionality", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95694", "id": "JrraNaaZm5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=JrraNaaZm5", "openreview": "https://openreview.net/forum?id=JrraNaaZm5", "poster": "/media/PosterPDFs/NeurIPS%202024/95694.png?t=1731676788.9039674", "project": "", "author_site": "Ruofeng Yang, Bo Jiang, Cheng Chen, ruinan Jin, Baoxiang Wang, Shuai Li", "tldr": "", "abstract": "While diffusion models have demonstrated impressive performance, there is a growing need for generating samples tailored to specific user-defined concepts. The customized requirements promote the development of few-shot diffusion models, which use limited $n_{ta}$ target samples to fine-tune a pre-trained diffusion model trained on $n_s$ source samples. Despite the empirical success, no theoretical work specifically analyzes few-shot diffusion models. Moreover, the existing results for diffusion models without a fine-tuning phase can not explain why few-shot models generate great samples due to the curse of dimensionality. In this work, we analyze few-shot diffusion models under a linear structure distribution with a latent dimension $d$. From the approximation perspective, we prove that few-shot models have a $\\widetilde{O}(n_s^{-2/d}+n_{ta}^{-1/2})$ bound to approximate the target score function, which is better than $n_{ta}^{-2/d}$ results. From the optimization perspective, we consider a latent Gaussian special case and prove that the optimization problem has a closed-form minimizer. This means few-shot models can directly obtain an approximated minimizer without a complex optimization process. Furthermore, we also provide the accuracy bound $\\widetilde{O}(1/n_{ta}+1/\\sqrt{n_s})$ for the empirical solution, which still has better dependence on $n_{ta}$ compared to $n_s$. The results of the real-world experiments also show that the models obtained by only fine-tuning the encoder and decoder specific to the target distribution can produce novel images with the target feature, which supports our theoretical results.", "keywords": "Diffusion models;Few-shot learning", "primary_area": "generative_models", "supplementary_material": "", "author": "Ruofeng Yang;Bo Jiang;Cheng Chen;Ruinan Jin;Baoxiang Wang;Shuai Li", "authorids": "~Ruofeng_Yang1;~Bo_Jiang2;~Cheng_Chen9;~Ruinan_Jin3;~Baoxiang_Wang1;~Shuai_Li3", "gender": "M;M;M;;;F", "homepage": "https://github.com/wanshuiyin;https://jhc.sjtu.edu.cn/~bjiang/;https://chengchen8.github.io/;;;http://shuaili8.github.io", "dblp": "350/4546;34/2005-3.html;10/217-15;;;57/2281-10", "google_scholar": "https://scholar.google.com.hk/citations?user=Cw9HDacAAAAJ;WxAIZtMAAAAJ;https://scholar.google.com/citations?hl=en;;;https://scholar.google.com.hk/citations?user=kMZgQxcAAAAJ", "orcid": ";;0000-0002-9094-0869;;;", "linkedin": ";;;;;", "or_profile": "~Ruofeng_Yang1;~Bo_Jiang2;~Cheng_Chen9;~Ruinan_Jin3;~Baoxiang_Wang1;~Shuai_Li3", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;East China Normal University;;;John Hopcroft Center, Shanghai Jiao Tong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;ecnu.edu.cn;;;sjtu.edu.cn", "position": "PhD student;Associate Professor;Associate Professor;;;Assistant Professor", "bibtex": "@inproceedings{\nyang2024fewshot,\ntitle={Few-Shot Diffusion Models Escape the Curse of Dimensionality},\nauthor={Ruofeng Yang and Bo Jiang and Cheng Chen and Ruinan Jin and Baoxiang Wang and Shuai Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=JrraNaaZm5}\n}", "github": "", "reviewers": "XEHa;fbjU;7GpV;kXcP", "pdf_size": 1210084, "rating": "4;6;7;7", "confidence": "4;3;2;4", "soundness": "2;4;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "124;91;91;108", "wc_strengths": "35;118;89;71", "wc_weaknesses": "229;144;31;115", "wc_questions": "41;44;30;8", "wc_limitations": "1;9;9;6", "wc_review": "430;406;250;308", "wc_reply_reviewers": "49;76;0;25", "wc_reply_authors": "45;45;0;39", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;1;2", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 103.5, 13.720422734012244 ], "wc_strengths_avg": [ 78.25, 30.07802353878991 ], "wc_weaknesses_avg": [ 129.75, 70.75088338671115 ], "wc_questions_avg": [ 30.75, 14.13108276106258 ], "wc_limitations_avg": [ 6.25, 3.2691742076555053 ], "wc_review_avg": [ 348.5, 72.95717922178736 ], "wc_reply_reviewers_avg": [ 37.5, 28.182441342083905 ], "wc_reply_authors_avg": [ 32.25, 18.779976038323372 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.4923659639173309, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4908787959701553018&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "sjtu.edu.cn;sjtu.edu.cn;ecnu.edu.cn;;;sjtu.edu.cn", "author_num": 6, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Shanghai Jiao Tong University;East China Normal University", "aff_unique_dep": ";", "aff_unique_url": "https://www.sjtu.edu.cn;http://www.ecnu.edu.cn", "aff_unique_abbr": "SJTU;ECNU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shanghai", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Adaptive Layer Sparsity for Large Language Models via Activation Correlation Assessment", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95693", "id": "Jup0qZxH7U", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Jup0qZxH7U", "openreview": "https://openreview.net/forum?id=Jup0qZxH7U", "poster": "/media/PosterPDFs/NeurIPS%202024/95693.png?t=1730253310.8839724", "project": "", "author_site": "Wei Li, Lujun Li, Mark Lee, Shengjie Sun", "tldr": "", "abstract": "Large Language Models (LLMs) have revolutionized the field of natural language processing with their impressive capabilities. However, their enormous size presents challenges for deploying them in real-world applications. Traditional compression techniques, like pruning, often lead to suboptimal performance due to their uniform pruning ratios and lack of consideration for the varying importance of features across different layers. To address these limitations, we present a novel Adaptive Layer Sparsity (ALS) approach to optimize LLMs. Our approach consists of two key steps. Firstly, we estimate the correlation matrix between intermediate layers by leveraging the concept of information orthogonality. This novel perspective allows for a precise measurement of the importance of each layer across the model. Secondly, we employ a linear optimization algorithm to develop an adaptive sparse allocation strategy based on evaluating the correlation matrix. This strategy enables us to selectively prune features in intermediate layers, achieving fine-grained optimization of the LLM model. Considering the varying importance across different layers, we can significantly reduce the model size without sacrificing performance. We conduct extensive experiments on publicly available language processing datasets, including the LLaMA-V1|V2|V3 family and OPT, covering various benchmarks. Our experimental results validate the effectiveness of our ALS method, showcasing its superiority over previous approaches. The performance gains demonstrate its potential for enhancing LLMs' efficiency and resource utilization. Notably, our approach surpasses the state-of-the-art models Wanda and SparseGPT, showcasing its ability to excel even under high sparsity levels. Codes at: https://github.com/lliai/ALS.", "keywords": "layerwise sparsity allocation;large language models;model compression;network pruning.", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Wei Li;Lujun Li;Mark G. Lee;Shengjie Sun", "authorids": "~Wei_Li93;~Lujun_Li1;~Mark_G._Lee1;~Shengjie_Sun1", "gender": "M;;;M", "homepage": ";;http://www.cs.bham.ac.uk/~mgl;", "dblp": ";;l/MarkLee;", "google_scholar": "https://scholar.google.com/citations?hl=en;;https://scholar.google.co.uk/citations?user=1AIdLa0AAAAJ;", "orcid": "0000-0002-4776-5911;;;0009-0004-8521-6384", "linkedin": ";;mark-lee-80356b10/;", "or_profile": "~Wei_Li93;~Lujun_Li1;~Mark_G._Lee1;~Shengjie_Sun1", "aff": "University of Birmingham;;University of Birmingham;Nanjing University of Information Science and Technology", "aff_domain": "bham.ac.uk;;bham.ac.uk;nuist.edu.cn", "position": "PhD student;;Full Professor;MS student", "bibtex": "@inproceedings{\nli2024adaptive,\ntitle={Adaptive Layer Sparsity for Large Language Models via Activation Correlation Assessment},\nauthor={Wei Li and Lujun Li and Mark G. Lee and Shengjie Sun},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Jup0qZxH7U}\n}", "github": "", "reviewers": "7kYG;ZqVF;8iME;2MGm", "pdf_size": 3581269, "rating": "5;6;6;8", "confidence": "4;3;2;5", "soundness": "3;2;3;4", "novelty": "2;3;2;4", "presentation": "2;3;2;4", "wc_summary": "75;55;81;120", "wc_strengths": "49;62;58;114", "wc_weaknesses": "94;103;38;57", "wc_questions": "95;1;44;20", "wc_limitations": "40;1;17;1", "wc_review": "353;222;238;312", "wc_reply_reviewers": "21;0;27;0", "wc_reply_authors": "151;0;0;0", "reply_reviewers": "1;0;1;0", "reply_authors": "3;1;1;1", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 82.75, 23.562417108607512 ], "wc_strengths_avg": [ 70.75, 25.410381736605217 ], "wc_weaknesses_avg": [ 73.0, 26.56124997058685 ], "wc_questions_avg": [ 40.0, 35.220732530712645 ], "wc_limitations_avg": [ 14.75, 15.974589196595948 ], "wc_review_avg": [ 281.25, 53.560129760858494 ], "wc_reply_reviewers_avg": [ 12.0, 12.186057606953941 ], "wc_reply_authors_avg": [ 37.75, 65.38491798572512 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5129891760425771, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15308036999248465383&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "bham.ac.uk;;bham.ac.uk;nuist.edu.cn", "author_num": 4, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Birmingham;Nanjing University of Information Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.birmingham.ac.uk;http://www.nuist.edu.cn", "aff_unique_abbr": "Birmingham;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United Kingdom;China" }, { "title": "Connecting Joint-Embedding Predictive Architecture with Contrastive Self-supervised Learning", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95692", "id": "JvQnJWIj6m", "proceeding": "", "pdf": "https://openreview.net/pdf?id=JvQnJWIj6m", "openreview": "https://openreview.net/forum?id=JvQnJWIj6m", "poster": "", "project": "", "author_site": "Shentong Mo, Peter Tong", "tldr": "", "abstract": "In recent advancements in unsupervised visual representation learning, the Joint-Embedding Predictive Architecture (JEPA) has emerged as a significant method for extracting visual features from unlabeled imagery through an innovative masking strategy. Despite its success, two primary limitations have been identified: the inefficacy of Exponential Moving Average (EMA) from I-JEPA in preventing entire collapse and the inadequacy of I-JEPA prediction in accurately learning the mean of patch representations. Addressing these challenges, this study introduces a novel framework, namely C-JEPA (Contrastive-JEPA), which integrates the Image-based Joint-Embedding Predictive Architecture with the Variance-Invariance-Covariance Regularization (VICReg) strategy. This integration is designed to effectively learn the variance/covariance for preventing entire collapse and ensuring invariance in the mean of augmented views, thereby overcoming the identified limitations. Through empirical and theoretical evaluations, our work demonstrates that C-JEPA significantly enhances the stability and quality of visual representation learning. When pre-trained on the ImageNet-1K dataset, C-JEPA exhibits rapid and improved convergence in both linear probing and fine-tuning performance metrics.", "keywords": "Joint-Embedding Predictive Architecture;Contrastive Self-supervised Learning", "primary_area": "machine_vision", "supplementary_material": "", "author": "Shentong Mo;Shengbang Tong", "authorids": "~Shentong_Mo1;~Shengbang_Tong1", "gender": ";M", "homepage": ";https://tsb0601.github.io/petertongsb/", "dblp": ";306/1406", "google_scholar": ";https://scholar.google.com/citations?hl=en", "orcid": ";", "linkedin": ";", "or_profile": "~Shentong_Mo1;~Shengbang_Tong1", "aff": ";New York University", "aff_domain": ";nyu.edu", "position": ";PhD student", "bibtex": "@inproceedings{\nmo2024connecting,\ntitle={Connecting Joint-Embedding Predictive Architecture with Contrastive Self-supervised Learning},\nauthor={Shentong Mo and Shengbang Tong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=JvQnJWIj6m}\n}", "github": "", "reviewers": "Rspm;1VXy;aesY", "pdf_size": 5641170, "rating": "5;7;8", "confidence": "3;5;4", "soundness": "3;4;4", "novelty": "2;2;4", "presentation": "2;3;4", "wc_summary": "55;71;51", "wc_strengths": "47;74;66", "wc_weaknesses": "171;65;56", "wc_questions": "5;7;30", "wc_limitations": "1;61;39", "wc_review": "279;278;242", "wc_reply_reviewers": "54;23;0", "wc_reply_authors": "30;30;0", "reply_reviewers": "1;1;0", "reply_authors": "2;2;1", "rating_avg": [ 6.666666666666667, 1.247219128924647 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.9428090415820634 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 59.0, 8.640987597877148 ], "wc_strengths_avg": [ 62.333333333333336, 11.323525167642018 ], "wc_weaknesses_avg": [ 97.33333333333333, 52.21962168465882 ], "wc_questions_avg": [ 14.0, 11.343133018115703 ], "wc_limitations_avg": [ 33.666666666666664, 24.783507060588143 ], "wc_review_avg": [ 266.3333333333333, 17.21110752456745 ], "wc_reply_reviewers_avg": [ 25.666666666666668, 22.125902367034783 ], "wc_reply_authors_avg": [ 20.0, 14.142135623730951 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.6546536707079772, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2753672343367634654&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": ";nyu.edu", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "New York University", "aff_unique_dep": "", "aff_unique_url": "https://www.nyu.edu", "aff_unique_abbr": "NYU", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Semantic Routing via Autoregressive Modeling", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95691", "id": "JvlrUFJMbI", "proceeding": "", "pdf": "https://openreview.net/pdf?id=JvlrUFJMbI", "openreview": "https://openreview.net/forum?id=JvlrUFJMbI", "poster": "", "project": "", "author_site": "Eric Zhao, Pranjal Awasthi, Zhengdao Chen, Sreenivas Gollapudi, Daniel Delling", "tldr": "", "abstract": "We study learning-based approaches to semantic route planning, which concerns producing routes in response to rich queries that specify various criteria and preferences. Semantic routing is already widely found in industry applications, especially navigational services like Google Maps; however, existing implementations only support limited route criteria and narrow query sets as they rely on repurposing classical route optimization algorithms. We argue for a learning-based approach to semantic routing as a more scalable and general alternative. To foster interest in this important application of graph learning, we are releasing a large-scale publicly-licensed benchmark for semantic routing consisting of real-world multi-objective navigation problems---expressed via natural language queries---on the richly annotated road networks of US cities. In addition to being intractable with existing approaches to semantic routing, our benchmark poses a significant scaling challenge for graph learning methods. As a proof-of-concept, we show that---at scale---even a standard transformer network is a powerful semantic routing system and achieves non-trivial performance on our benchmark. In the process, we demonstrate a simple solution to the challenge of scaling up graph learning: an autoregressive approach that decomposes semantic routing into smaller ``next-edge'' prediction problems.", "keywords": "semantic routing;routing on graphs;autoregressive modeling", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Eric Zhao;Pranjal Awasthi;Zhengdao Chen;Sreenivas Gollapudi;Daniel Delling", "authorids": "~Eric_Zhao1;~Pranjal_Awasthi3;~Zhengdao_Chen1;~Sreenivas_Gollapudi2;~Daniel_Delling1", "gender": "M;;;M;", "homepage": "https://eric-zhao.com;https://www.cs.rutgers.edu/~pa336/;;https://www.sreenivasgollapudi.com;http://www.danieldelling.com", "dblp": "294/8327.html;57/679;;https://dblp.uni-trier.de/pers/g/Gollapudi:Sreenivas.html;", "google_scholar": "6OfjaHQAAAAJ;;;Ysd-WJgAAAAJ;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Eric_Zhao1;~Pranjal_Awasthi3;~Zhengdao_Chen1;~Sreenivas_Gollapudi2;~Daniel_Delling1", "aff": "University of California, Berkeley;Rutgers University;;Google;Google", "aff_domain": "berkeley.edu;rutgers.edu;;google.com;google.com", "position": "PhD student;Assistant Professor;;Researcher;Researcher", "bibtex": "@inproceedings{\nzhao2024semantic,\ntitle={Semantic Routing via Autoregressive Modeling},\nauthor={Eric Zhao and Pranjal Awasthi and Zhengdao Chen and Sreenivas Gollapudi and Daniel Delling},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=JvlrUFJMbI}\n}", "github": "", "reviewers": "u4su;oN8k;o9bU", "pdf_size": 8323869, "rating": "6;6;7", "confidence": "3;4;2", "soundness": "3;3;4", "novelty": "2;3;4", "presentation": "3;3;3", "wc_summary": "50;91;51", "wc_strengths": "45;81;73", "wc_weaknesses": "156;302;74", "wc_questions": "56;2;44", "wc_limitations": "1;1;13", "wc_review": "308;477;255", "wc_reply_reviewers": "12;70;0", "wc_reply_authors": "0;223;0", "reply_reviewers": "1;1;0", "reply_authors": "1;2;1", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 64.0, 19.096247449870006 ], "wc_strengths_avg": [ 66.33333333333333, 15.4344492037203 ], "wc_weaknesses_avg": [ 177.33333333333334, 94.29504523332896 ], "wc_questions_avg": [ 34.0, 23.15167380558045 ], "wc_limitations_avg": [ 5.0, 5.656854249492381 ], "wc_review_avg": [ 346.6666666666667, 94.66549295047038 ], "wc_reply_reviewers_avg": [ 27.333333333333332, 30.5650490302604 ], "wc_reply_authors_avg": [ 74.33333333333333, 105.12320813640007 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:hZyt97xHXH8J:scholar.google.com/&scioq=Semantic+Routing+via+Autoregressive+Modeling&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": "berkeley.edu;rutgers.edu;;google.com;google.com", "author_num": 5, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "University of California, Berkeley;Rutgers University;Google", "aff_unique_dep": ";;Google", "aff_unique_url": "https://www.berkeley.edu;https://www.rutgers.edu;https://www.google.com", "aff_unique_abbr": "UC Berkeley;Rutgers;Google", "aff_campus_unique_index": "0;2;2", "aff_campus_unique": "Berkeley;;Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "RoboMamba: Efficient Vision-Language-Action Model for Robotic Reasoning and Manipulation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95690", "id": "JxOQeg1NkH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=JxOQeg1NkH", "openreview": "https://openreview.net/forum?id=JxOQeg1NkH", "poster": "/media/PosterPDFs/NeurIPS%202024/95690.png?t=1731402927.5108073", "project": "", "author_site": "Jiaming Liu, Mengzhen Liu, Zhenyu Wang, Pengju An, Xiaoqi Li, Kaichen Zhou, Senqiao Yang, Renrui Zhang, Yandong Guo, Shanghang Zhang", "tldr": "", "abstract": "A fundamental objective in robot manipulation is to enable models to comprehend visual scenes and execute actions. Although existing Vision-Language-Action (VLA) models for robots can handle a range of basic tasks, they still face challenges in two areas: (1) insufficient reasoning ability to tackle complex tasks, and (2) high computational costs for VLA model fine-tuning and inference. The recently proposed state space model (SSM) known as Mamba demonstrates promising capabilities in non-trivial sequence modeling with linear inference complexity. Inspired by this, we introduce RoboMamba, an end-to-end robotic VLA model that leverages Mamba to deliver both robotic reasoning and action capabilities, while maintaining efficient fine-tuning and inference. Specifically, we first integrate the vision encoder with Mamba, aligning visual tokens with language embedding through co-training, empowering our model with visual common sense and robotic-related reasoning. To further equip RoboMamba with SE(3) pose prediction abilities, we explore an efficient fine-tuning strategy with a simple policy head. We find that once RoboMamba possesses sufficient reasoning capability, it can acquire manipulation skills with minimal fine-tuning parameters (0.1\\% of the model) and time. In experiments, RoboMamba demonstrates outstanding reasoning capabilities on general and robotic evaluation benchmarks. Meanwhile, our model showcases impressive pose prediction results in both simulation and real-world experiments, achieving inference speeds 3 times faster than existing VLA models.", "keywords": "Robot Manipulation;Multimodal Large Language Model;State Space Model", "primary_area": "robotics", "supplementary_material": "/attachment/a8ec83e43dde9f6210ec7009438418ab8d320803.zip", "author": "Jiaming Liu;Mengzhen Liu;Zhenyu Wang;Pengju An;Xiaoqi Li;Kaichen Zhou;Senqiao Yang;Renrui Zhang;Yandong Guo;Shanghang Zhang", "authorids": "~Jiaming_Liu2;~Mengzhen_Liu1;~Zhenyu_Wang11;~Pengju_An1;~Xiaoqi_Li3;~Kaichen_Zhou1;~Senqiao_Yang1;~Renrui_Zhang1;~Yandong_Guo2;~Shanghang_Zhang4", "gender": "M;F;M;M;;M;;M;M;", "homepage": "https://github.com/liujiaming1996;https://scholar.google.com/lmz;https://github.com/VONHornheim;https://apj.com;;http://zalex97.github.io/;;;;", "dblp": ";;;;;;;244/1748;28/4272;", "google_scholar": "cPki5sUAAAAJ;;;https://scholar.google.cz/citations?hl=zh-CN;;;;YlL3xN4AAAAJ;fWDoWsQAAAAJ;", "orcid": "0000-0002-6770-4390;;;;;;;;;", "linkedin": ";;;;;;;;;", "or_profile": "~Jiaming_Liu2;~Mengzhen_Liu1;~Zhenyu_Wang11;~Pengju_An1;~Xiaoqi_Li3;~Kaichen_Zhou1;~Senqiao_Yang1;~Renrui_Zhang1;~Yandong_Guo2;~Shanghang_Zhang4", "aff": "Peking University;Beijing Jiaotong University;Peking University;Peking University;;Department of Computer Science, University of Oxford;;MMLab of CUHK & Shanghai AI Laboratory;AI^2 Robotics;", "aff_domain": "pku.edu.cn;bjtu.edu.cn;pku.edu.cn;stu.pku.edu.cn;;cs.ox.ac.uk;;pjlab.org.cn;ai2robotics.com;", "position": "PhD student;Undergrad student;MS student;MS student;;PhD student;;PhD student;Chief Scientist;", "bibtex": "@inproceedings{\nliu2024robomamba,\ntitle={RoboMamba: Efficient Vision-Language-Action Model for Robotic Reasoning and Manipulation},\nauthor={Jiaming Liu and Mengzhen Liu and Zhenyu Wang and Pengju An and Xiaoqi Li and Kaichen Zhou and Senqiao Yang and Renrui Zhang and Yandong Guo and Shanghang Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=JxOQeg1NkH}\n}", "github": "", "reviewers": "16K4;iHpb;CsAK;QK1g;vV5n", "pdf_size": 4916576, "rating": "5;5;6;6;7", "confidence": "4;3;4;4;5", "soundness": "2;3;3;3;3", "novelty": "2;2;3;3;3", "presentation": "3;3;2;3;3", "wc_summary": "52;71;88;60;94", "wc_strengths": "34;30;111;111;60", "wc_weaknesses": "235;43;89;178;118", "wc_questions": "54;29;49;55;8", "wc_limitations": "13;24;2;6;8", "wc_review": "388;197;339;410;288", "wc_reply_reviewers": "161;44;43;43;44", "wc_reply_authors": "74;104;101;79;84", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;3;3;3;3", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 73.0, 16.0 ], "wc_strengths_avg": [ 69.2, 35.650525942824466 ], "wc_weaknesses_avg": [ 132.6, 67.36349159596762 ], "wc_questions_avg": [ 39.0, 18.121810064118872 ], "wc_limitations_avg": [ 10.6, 7.578918128598565 ], "wc_review_avg": [ 324.4, 76.342910607338 ], "wc_reply_reviewers_avg": [ 67.0, 47.00212761141776 ], "wc_reply_authors_avg": [ 88.4, 11.976643937263894 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.8, 0.39999999999999997 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.8451542547285165, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17132014612810128855&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 2, "email": "pku.edu.cn;bjtu.edu.cn;pku.edu.cn;stu.pku.edu.cn;;cs.ox.ac.uk;;pjlab.org.cn;ai2robotics.com;", "author_num": 10, "aff_unique_index": "0;1;0;0;2;3;4", "aff_unique_norm": "Peking University;Beijing Jiao Tong University;University of Oxford;Chinese University of Hong Kong;AI^2 Robotics", "aff_unique_dep": ";;Department of Computer Science;MMLab;", "aff_unique_url": "http://www.pku.edu.cn;http://www.njtu.edu.cn/en;https://www.ox.ac.uk;https://www.cuhk.edu.hk;http://ai2robotics.org/", "aff_unique_abbr": "Peking U;BJTU;Oxford;CUHK;AI^2 Robotics", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Oxford;Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;1;0;2", "aff_country_unique": "China;United Kingdom;United States" }, { "title": "Collaborative Cognitive Diagnosis with Disentangled Representation Learning for Learner Modeling", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95689", "id": "JxlQ2pbyzS", "proceeding": "", "pdf": "https://openreview.net/pdf?id=JxlQ2pbyzS", "openreview": "https://openreview.net/forum?id=JxlQ2pbyzS", "poster": "", "project": "", "author_site": "Weibo Gao, Qi Liu, Linan Yue, Fangzhou Yao, Hao Wang, Yin Gu, zheng zhang", "tldr": "", "abstract": "Learners sharing similar implicit cognitive states often display comparable observable problem-solving performances. Leveraging collaborative connections among such similar learners proves valuable in comprehending human learning. Motivated by the success of collaborative modeling in various domains, such as recommender systems, we aim to investigate how collaborative signals among learners contribute to the diagnosis of human cognitive states (i.e., knowledge proficiency) in the context of intelligent education.\nThe primary challenges lie in identifying implicit collaborative connections and disentangling the entangled cognitive factors of learners for improved explainability and controllability in learner Cognitive Diagnosis (CD). However, there has been no work on CD capable of simultaneously modeling collaborative and disentangled cognitive states. To address this gap, we present Coral, a $\\underline{Co}$llabo$\\underline{ra}$tive cognitive diagnosis model with disentang$\\underline{l}$ed representation learning. Specifically, Coral first introduces a disentangled state encoder to achieve the initial disentanglement of learners' states.\nSubsequently, a meticulously designed collaborative representation learning procedure captures collaborative signals. It dynamically constructs a collaborative graph of learners by iteratively searching for optimal neighbors in a context-aware manner. Using the constructed graph, collaborative information is extracted through node representation learning. Finally, a decoding process aligns the initial cognitive states and collaborative states, achieving co-disentanglement with practice performance reconstructions.\nExtensive experiments demonstrate the superior performance of Coral, showcasing significant improvements over state-of-the-art methods across several real-world datasets.\nOur code is available at https://github.com/bigdata-ustc/Coral.", "keywords": "Learner Modeling;Intelligent Education System;Cognitive Diagnosis;Disentangled Representation Learning", "primary_area": "other", "supplementary_material": "", "author": "Weibo Gao;Qi Liu;Linan Yue;Fangzhou Yao;Hao Wang;Yin Gu;Zheng Zhang", "authorids": "~Weibo_Gao1;~Qi_Liu3;~Linan_Yue1;~Fangzhou_Yao1;~Hao_Wang32;~Yin_Gu1;~Zheng_Zhang20", "gender": ";M;M;;M;M;", "homepage": ";http://staff.ustc.edu.cn/~qiliuql/;https://yuelinan.github.io/;;http://staff.ustc.edu.cn/~wanghao3/;;", "dblp": ";95/2446-3;297/1080;;181/2812-76;293/9760;", "google_scholar": ";5EoHAFwAAAAJ;https://scholar.google.com.hk/citations?user=XDaNgG4AAAAJ;;ou4Miu4AAAAJ;;", "orcid": ";0000-0001-6956-5550;0000-0002-5980-6098;;0000-0001-9921-2078;;", "linkedin": ";;;;;;", "or_profile": "~Weibo_Gao1;~Qi_Liu3;~Linan_Yue1;~Fangzhou_Yao1;~Hao_Wang32;~Yin_Gu1;~Zheng_Zhang20", "aff": ";University of Science and Technology of China;University of Science and Technology of China;;University of Science and Technology of China;University of Science and Technology of China;", "aff_domain": ";ustc.edu.cn;ustc.edu.cn;;ustc.edu.cn;ustc.edu.cn;", "position": ";Full Professor;PhD student;;Associate Researcher;PhD student;", "bibtex": "@inproceedings{\ngao2024collaborative,\ntitle={Collaborative Cognitive Diagnosis with Disentangled Representation Learning for Learner Modeling},\nauthor={Weibo Gao and Qi Liu and Linan Yue and Fangzhou Yao and Hao Wang and Yin Gu and Zheng Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=JxlQ2pbyzS}\n}", "github": "", "reviewers": "Ty5J;k7ko;t9qg;UPTU;bUz3;vXdf", "pdf_size": 933194, "rating": "5;5;6;7;7;7", "confidence": "3;4;3;4;4;4", "soundness": "3;3;3;3;3;3", "novelty": "2;3;4;3;3;3", "presentation": "3;3;3;3;3;3", "wc_summary": "157;38;99;85;90;227", "wc_strengths": "52;37;223;184;90;282", "wc_weaknesses": "121;56;138;247;77;504", "wc_questions": "4;25;123;81;2;248", "wc_limitations": "1;20;114;28;1;103", "wc_review": "335;176;697;625;260;1364", "wc_reply_reviewers": "14;0;82;17;111;38", "wc_reply_authors": "366;0;442;65;65;63", "reply_reviewers": "1;0;1;1;1;1", "reply_authors": "3;1;2;2;2;2", "rating_avg": [ 6.166666666666667, 0.8975274678557507 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.5773502691896257 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 116.0, 60.569519287069355 ], "wc_strengths_avg": [ 144.66666666666666, 91.02319606683905 ], "wc_weaknesses_avg": [ 190.5, 152.77079782035133 ], "wc_questions_avg": [ 80.5, 86.56163507389788 ], "wc_limitations_avg": [ 44.5, 46.385162857678246 ], "wc_review_avg": [ 576.1666666666666, 399.1496864864386 ], "wc_reply_reviewers_avg": [ 43.666666666666664, 39.86086914367133 ], "wc_reply_authors_avg": [ 166.83333333333334, 170.6550289000851 ], "reply_reviewers_avg": [ 0.8333333333333334, 0.372677996249965 ], "reply_authors_avg": [ 2.0, 0.5773502691896257 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.5252257314388902, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4505785548314380300&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": ";ustc.edu.cn;ustc.edu.cn;;ustc.edu.cn;ustc.edu.cn;", "author_num": 7, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Science and Technology of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ustc.edu.cn", "aff_unique_abbr": "USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Fine Tuning Out-of-Vocabulary Item Recommendation with User Sequence Imagination", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95688", "id": "JyWAFGCJPl", "proceeding": "", "pdf": "https://openreview.net/pdf?id=JyWAFGCJPl", "openreview": "https://openreview.net/forum?id=JyWAFGCJPl", "poster": "", "project": "", "author_site": "Ruochen Liu, Hao Chen, Yuanchen Bei, Qijie Shen, Fangwei Zhong, Senzhang Wang, Jianxin Wang", "tldr": "", "abstract": "Recommending out-of-vocabulary (OOV) items is a challenging problem since the in-vocabulary (IV) items have well-trained behavioral embeddings but the OOV items only have content features. Current OOV recommendation models often generate 'makeshift' embeddings for OOV items from content features and then jointly recommend with the `makeshift' OOV item embeddings and the behavioral IV item embeddings. However, merely using the 'makeshift' embedding will result in suboptimal recommendation performance due to the substantial gap between the content feature and the behavioral embeddings. To bridge the gap, we propose a novel **User Sequence IMagination (USIM)** fine-tuning framework, which first imagines the user sequences and then refines the generated OOV embeddings with the user behavioral embeddings. Specifically, we frame the user sequence imagination as a reinforcement learning problem and develop a recommendation-focused reward function to evaluate to what extent a user can help recommend the OOV items. Besides, we propose an embedding-driven transition function to model the embedding transition after imaging a user. USIM has been deployed on a prominent e-commerce platform for months, offering recommendations for millions of OOV items and billions of users. Extensive experiments demonstrate that USIM outperforms traditional generative models in OOV item recommendation performance across traditional collaborative filtering and GNN-based collaborative filtering models.", "keywords": "Recommendation; Reinforcement Learning;", "primary_area": "other", "supplementary_material": "", "author": "Ruochen Liu;Hao Chen;Yuanchen Bei;Qijie Shen;Fangwei Zhong;Senzhang Wang;Jianxin Wang", "authorids": "~Ruochen_Liu5;~Hao_Chen18;~Yuanchen_Bei1;~Qijie_Shen1;~Fangwei_Zhong3;~Senzhang_Wang2;~Jianxin_Wang1", "gender": "M;M;M;M;M;;M", "homepage": "https://space.bilibili.com/15291300?spm_id_from=333.1007.0.0;;https://github.com/YuanchenBei;;https://senzhangwangcsu.github.io/index.html;https://faculty.csu.edu.cn/wangjianxin1/zh_CN/index/106082/list/;https://fangweizhong.xyz/", "dblp": "03/6999-1;175/3324-62;331/2167;304/3573;118/5055;75/2669-1.html;207/1900", "google_scholar": ";https://scholar.google.com/citations?view_op=list_works;7XFbwrcAAAAJ;https://scholar.google.com/citations?hl=zh-CN;zdWyGRMAAAAJ;7pgY2F0AAAAJ;ejDz1bYAAAAJ", "orcid": "0009-0000-6597-2044;0000-0001-6816-5344;0000-0003-2834-2873;;0000-0002-3615-4859;0000-0003-1516-0480;0000-0002-0428-4552", "linkedin": ";hao-chen-phd-37136b1b5/;;;;;", "or_profile": "~Ruochen_Liu5;~Hao_Chen18;~Yuanchen_Bei1;~Qijie_Shen1;~Senzhang_Wang2;~Jianxin_Wang1;~fangwei_zhong1", "aff": "Central South University;Hong Kong Polytechnic University;Zhejiang University;Alibaba Group;Central South University;Central South University;Peking University", "aff_domain": "csu.edu.cn;polyu.edu.hk;zju.edu.cn;alibaba-inc.com;csu.edu.cn;csu.edu.cn;pku.edu.cn", "position": "MS student;Postdoc;MS student;Researcher;Full Professor;Full Professor;Postdoc", "bibtex": "@inproceedings{\nliu2024fine,\ntitle={Fine Tuning Out-of-Vocabulary Item Recommendation with User Sequence Imagination},\nauthor={Ruochen Liu and Hao Chen and Yuanchen Bei and Qijie Shen and Fangwei Zhong and Senzhang Wang and Jianxin Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=JyWAFGCJPl}\n}", "github": "", "reviewers": "wA5P;axrp;bkbE;VoqV", "pdf_size": 1506624, "rating": "5;6;7;7", "confidence": "3;4;4;4", "soundness": "2;3;4;3", "novelty": "3;3;3;3", "presentation": "2;3;3;3", "wc_summary": "78;79;147;168", "wc_strengths": "30;61;100;207", "wc_weaknesses": "233;182;73;127", "wc_questions": "6;80;40;2", "wc_limitations": "1;40;1;1", "wc_review": "348;442;361;505", "wc_reply_reviewers": "12;17;21;9", "wc_reply_authors": "26;31;26;29", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 118.0, 40.193283020922784 ], "wc_strengths_avg": [ 99.5, 66.83748948008146 ], "wc_weaknesses_avg": [ 153.75, 59.82213219202405 ], "wc_questions_avg": [ 32.0, 31.400636936215164 ], "wc_limitations_avg": [ 10.75, 16.887495373796554 ], "wc_review_avg": [ 414.0, 63.698508616764336 ], "wc_reply_reviewers_avg": [ 14.75, 4.602988159880492 ], "wc_reply_authors_avg": [ 28.0, 2.1213203435596424 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12293500618796019314&as_sdt=20000005&sciodt=0,21&hl=en", "gs_version_total": 2, "email": "csu.edu.cn;polyu.edu.hk;zju.edu.cn;alibaba-inc.com;csu.edu.cn;csu.edu.cn;pku.edu.cn", "author_num": 7, "aff_unique_index": "0;1;2;3;0;0;4", "aff_unique_norm": "Central South University;Hong Kong Polytechnic University;Zhejiang University;Alibaba Group;Peking University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.csu.edu.cn;https://www.polyu.edu.hk;https://www.zju.edu.cn;https://www.alibaba.com;http://www.pku.edu.cn", "aff_unique_abbr": "CSU;PolyU;ZJU;Alibaba;Peking U", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "REDUCR: Robust Data Downsampling using Class Priority Reweighting", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95687", "id": "Jz7Z7KkR94", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Jz7Z7KkR94", "openreview": "https://openreview.net/forum?id=Jz7Z7KkR94", "poster": "", "project": "", "author_site": "William Bankes, George Hughes, Ilija Bogunovic, Zi Wang", "tldr": "", "abstract": "Modern machine learning models are becoming increasingly expensive to train for real-world image and text classification tasks, where massive web-scale data is collected in a streaming fashion. To reduce the training cost, online batch selection techniques have been developed to choose the most informative datapoints. However, many existing techniques are not robust to class imbalance and distributional shifts, and can suffer from poor worst-class generalization performance. This work introduces REDUCR, a robust and efficient data downsampling method that uses class priority reweighting. REDUCR reduces the training data while preserving worst-class generalization performance. REDUCR assigns priority weights to datapoints in a class-aware manner using an online learning algorithm. We demonstrate the data efficiency and robust performance of REDUCR on vision and text classification tasks. On web-scraped datasets with imbalanced class distributions, REDUCR significantly improves worst-class test accuracy (and average accuracy), surpassing state-of-the-art methods by around 15\\%.", "keywords": "Class Robustness;Online Batch Selection;Robust Machine Learning;Training Efficiency;Data Downsampling;Class Imbalance", "primary_area": "active_learning", "supplementary_material": "/attachment/7afd80c21f3b45f15161b3325769efa2bf2dc15d.zip", "author": "William Bankes;George Hughes;Ilija Bogunovic;Zi Wang", "authorids": "~William_Bankes1;~George_Hughes1;~Ilija_Bogunovic2;~Zi_Wang1", "gender": "M;M;F;M", "homepage": "https://github.com/williambankes;https://github.com/ghughes1200;http://zi-wang.com/;http://ilijabogunovic.com/", "dblp": ";;78/8711-4;142/2725", "google_scholar": ";;U0egIsIAAAAJ;xMvt3NEAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~William_Bankes1;~George_Hughes1;~Zi_Wang1;~Ilija_Bogunovic1", "aff": "University College London, University of London;;Google DeepMind;Swiss Federal Institute of Technology", "aff_domain": "ucl.ac.uk;;google.com;ethz.ch", "position": "PhD student;;Research scientist;Postdoc", "bibtex": "@inproceedings{\nbankes2024reducr,\ntitle={{REDUCR}: Robust Data Downsampling using Class Priority Reweighting},\nauthor={William Bankes and George Hughes and Ilija Bogunovic and Zi Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Jz7Z7KkR94}\n}", "github": "", "reviewers": "75mt;9L5X;L9Bs", "pdf_size": 1258799, "rating": "6;7;7", "confidence": "3;1;3", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "3;3;4", "wc_summary": "47;43;314", "wc_strengths": "15;71;80", "wc_weaknesses": "57;31;141", "wc_questions": "32;25;43", "wc_limitations": "19;70;5", "wc_review": "170;240;583", "wc_reply_reviewers": "19;23;0", "wc_reply_authors": "13;14;0", "reply_reviewers": "1;1;0", "reply_authors": "2;2;1", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 2.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 134.66666666666666, 126.8183302558778 ], "wc_strengths_avg": [ 55.333333333333336, 28.755675768252935 ], "wc_weaknesses_avg": [ 76.33333333333333, 46.94204464609052 ], "wc_questions_avg": [ 33.333333333333336, 7.408703590297623 ], "wc_limitations_avg": [ 31.333333333333332, 27.932458220182166 ], "wc_review_avg": [ 331.0, 180.46791035158208 ], "wc_reply_reviewers_avg": [ 14.0, 10.03327796219494 ], "wc_reply_authors_avg": [ 9.0, 6.377042156569663 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=529242377943779083&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "ucl.ac.uk;;google.com;ethz.ch", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "University College London;Google;Swiss Federal Institute of Technology", "aff_unique_dep": ";Google DeepMind;", "aff_unique_url": "https://www.ucl.ac.uk;https://deepmind.com;https://www.ethz.ch", "aff_unique_abbr": "UCL;DeepMind;ETH Zurich", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United Kingdom;Switzerland" }, { "title": "D-CPT Law: Domain-specific Continual Pre-Training Scaling Law for Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95686", "id": "JzKFN5fWOk", "proceeding": "", "pdf": "https://openreview.net/pdf?id=JzKFN5fWOk", "openreview": "https://openreview.net/forum?id=JzKFN5fWOk", "poster": "/media/PosterPDFs/NeurIPS%202024/95686.png?t=1733331360.850055", "project": "", "author_site": "Haoran Que, Jiaheng Liu, Ge Zhang, Chenchen Zhang, Xingwei Qu, Yinghao Ma, Feiyu Duan, ZhiqiBai zhiqi, JiakaiWang, Yuanxing Zhang, Xu Tan, Jie Fu, Jiamang Wang, Lin Qu, Wenbo Su, Bo Zheng", "tldr": "", "abstract": "Continual Pre-Training (CPT) on Large Language Models (LLMs) has been widely used to expand the model\u2019s fundamental understanding of specific downstream domains (e.g., math and code). For the CPT on domain-specific LLMs, one important question is how to choose the optimal mixture ratio between the general-corpus (e.g., Dolma, Slim-pajama) and the downstream domain-corpus. Existing methods usually adopt laborious human efforts by grid-searching on a set of mixture ratios, which require high GPU training consumption costs. Besides, we cannot guarantee the selected ratio is optimal for the specific domain. To address the limitations of existing methods, inspired by the Scaling Law for performance prediction, we propose to investigate the Scaling Law of the Domain-specific Continual Pre-Training (D-CPT Law) to decide the optimal mixture ratio with acceptable training costs for LLMs of different sizes. Specifically, by fitting the D-CPT Law, we can easily predict the general and downstream performance of arbitrary mixture ratios, model sizes, and dataset sizes using small-scale training costs on limited experiments. Moreover, we also extend our standard D-CPT Law on cross-domain settings and propose the Cross-Domain D-CPT Law to predict the D-CPT law of target domains, where very small training costs (about 1\\% of the normal training costs) are needed for the target domains. Comprehensive experimental results on six downstream domains demonstrate the effectiveness and generalizability of our proposed D-CPT Law and Cross-Domain D-CPT Law.", "keywords": "Large Language Models;Scaling Law;Continual Pre-training", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Haoran Que;Jiaheng Liu;Ge Zhang;Chenchen Zhang;Xingwei Qu;Yinghao Ma;Feiyu Duan;ZhiqiBai;JiakaiWang;Yuanxing Zhang;Xu Tan;Jie Fu;Jiamang Wang;Lin Qu;Wenbo Su;Bo Zheng", "authorids": "~Haoran_Que1;~Jiaheng_Liu1;~Ge_Zhang5;~Chenchen_Zhang3;~Xingwei_Qu1;~Yinghao_Ma1;~Feiyu_Duan1;~ZhiqiBai1;~JiakaiWang1;~Yuanxing_Zhang3;~Xu_Tan1;~Jie_Fu2;~Jiamang_Wang1;~Lin_Qu2;~Wenbo_Su2;~Bo_Zheng5", "gender": "M;M;M;M;M;M;M;;M;M;M;M;M;M;M;M", "homepage": ";https://liujiaheng.github.io/;;https://orcid.org/my-orcid?orcid=0000-0002-3265-7133;https://nicolaus625.github.io/;https://github.com/dfy37;https://github.com/BaiZhiqi;https://github.com/BlcDec;;https://tan-xu.github.io/;;https://www.linkedin.com/in/%E7%90%B3-%E6%9B%B2-05a54498/;;;https://scholar.google.com/citations?hl=zh-CN&user=WmzUNaQAAAAJ;https://bigaidream.github.io/", "dblp": "358/4966;225/1962;;;248/7435;;;;194/7059.html;96/10484-3;;;;33/1610-7;;", "google_scholar": "onEik5gAAAAJ;yFI_RjUAAAAJ;qyTrq4kAAAAJ;https://scholar.google.com/citations?hl=zh-CN;RiYt9toAAAAJ;;;;COdftTMAAAAJ;tob-U1oAAAAJ;;;;3gHhO9QAAAAJ;https://scholar.google.com/citations?hl=zh-CN;66osleIAAAAJ", "orcid": ";;;;0009-0009-9500-4015;;;;0000-0003-1460-8124;0000-0001-5631-0639;;;0009-0009-3800-7543;0000-0002-4037-6315;;0000-0002-4494-843X", "linkedin": ";;ge-zhang-792797169/;;nicolaus625/;;;;;;%E5%AE%B6%E5%BF%99-%E7%8E%8B-b78784114/;;https://www.linkedin.cn/incareer/in/wenbo-su-b923b0b1;bo-zheng-0315254/;;", "or_profile": "~Haoran_Que1;~Jiaheng_Liu1;~Ge_Zhang5;~Xingwei_Qu1;~Yinghao_Ma1;~Feiyu_Duan1;~ZhiqiBai1;~JiakaiWang1;~Yuanxing_Zhang3;~Xu_Tan1;~Jiamang_Wang1;~Lin_Qu2;~Wenbo_Su2;~Bo_Zheng5;~Zhang_Chenchen1;~Jie_Fu1", "aff": "Beihang University;Alibaba Group;University of Waterloo;Hong Kong University of Science and Technology;Yamaha;Beihang University;;;Alibaba Group;Microsoft;Alibaba Group;;Alibaba Group;Alibaba Group;Beijing University of Posts and Telecommunications;Hong Kong University of Science and Technology", "aff_domain": "buaa.edu.cn;alibaba-inc.com;cs.uwaterloo.ca;ust.hk;bp.music.yamaha.com;buaa.edu.cn;;;alibaba-inc.com;microsoft.com;alibaba-inc.com;;alibaba-inc.com;alibaba-inc.com;bupt.edu.cn;ust.hk", "position": "MS student;Researcher;PhD student;Researcher;Intern;MS student;;;Researcher;Principal Researcher;Researcher;;Researcher;Principal Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\nque2024dcpt,\ntitle={D-{CPT} Law: Domain-specific Continual Pre-Training Scaling Law for Large Language Models},\nauthor={Haoran Que and Jiaheng Liu and Ge Zhang and Chenchen Zhang and Xingwei Qu and Yinghao Ma and Feiyu Duan and ZhiqiBai and JiakaiWang and Yuanxing Zhang and Xu Tan and Jie Fu and Jiamang Wang and Lin Qu and Wenbo Su and Bo Zheng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=JzKFN5fWOk}\n}", "github": "", "reviewers": "sFHX;rpUb;ycBX", "pdf_size": 1674317, "rating": "5;6;7", "confidence": "3;5;4", "soundness": "2;3;3", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "79;85;92", "wc_strengths": "45;75;81", "wc_weaknesses": "28;75;26", "wc_questions": "69;84;41", "wc_limitations": "14;18;9", "wc_review": "235;337;249", "wc_reply_reviewers": "15;12;15", "wc_reply_authors": "87;14;87", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 85.33333333333333, 5.312459150169742 ], "wc_strengths_avg": [ 67.0, 15.748015748023622 ], "wc_weaknesses_avg": [ 43.0, 22.642143596988927 ], "wc_questions_avg": [ 64.66666666666667, 17.82008853949821 ], "wc_limitations_avg": [ 13.666666666666666, 3.681787005729087 ], "wc_review_avg": [ 273.6666666666667, 45.146674542232034 ], "wc_reply_reviewers_avg": [ 14.0, 1.4142135623730951 ], "wc_reply_authors_avg": [ 62.666666666666664, 34.41253001774532 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 16, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2538230965289460211&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "buaa.edu.cn;alibaba-inc.com;cs.uwaterloo.ca;ust.hk;bp.music.yamaha.com;buaa.edu.cn;;;alibaba-inc.com;microsoft.com;alibaba-inc.com;;alibaba-inc.com;alibaba-inc.com;bupt.edu.cn;ust.hk", "author_num": 16, "aff_unique_index": "0;1;2;3;4;0;1;5;1;1;1;6;3", "aff_unique_norm": "Beihang University;Alibaba Group;University of Waterloo;Hong Kong University of Science and Technology;Yamaha Corporation;Microsoft;Beijing University of Posts and Telecommunications", "aff_unique_dep": ";;;;;Microsoft Corporation;", "aff_unique_url": "http://www.buaa.edu.cn/;https://www.alibaba.com;https://uwaterloo.ca;https://www.ust.hk;https://yamaha.com;https://www.microsoft.com;http://www.bupt.edu.cn/", "aff_unique_abbr": "BUAA;Alibaba;UW;HKUST;Yamaha;Microsoft;BUPT", "aff_campus_unique_index": "1;2;1", "aff_campus_unique": ";Hong Kong SAR;Beijing", "aff_country_unique_index": "0;0;1;0;2;0;0;3;0;0;0;0;0", "aff_country_unique": "China;Canada;Japan;United States" }, { "title": "Rejection via Learning Density Ratios", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95685", "id": "JzcIKnnOpJ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=JzcIKnnOpJ", "openreview": "https://openreview.net/forum?id=JzcIKnnOpJ", "poster": "/media/PosterPDFs/NeurIPS%202024/95685.png?t=1733473920.519439", "project": "", "author_site": "Alexander Soen, Hisham Husain, Philip Schulz, Vu Nguyen", "tldr": "", "abstract": "Classification with rejection emerges as a learning paradigm which allows models to abstain from making predictions. \nThe predominant approach is to alter the supervised learning pipeline by augmenting typical loss functions, letting model rejection incur a lower loss than an incorrect prediction.\nInstead, we propose a different distributional perspective, where we seek to find an idealized data distribution which maximizes a pretrained model's performance.\nThis can be formalized via the optimization of a loss's risk with a $ \\phi$-divergence regularization term.\nThrough this idealized distribution, a rejection decision can be made by utilizing the density ratio between this distribution and the data distribution.\nWe focus on the setting where our $ \\phi $-divergences are specified by the family of $ \\alpha $-divergence.\nOur framework is tested empirically over clean and noisy datasets.", "keywords": "Rejection;Distributional Robust Optimization;Variational Inference;Density Ratio", "primary_area": "optimization", "supplementary_material": "", "author": "Alexander Soen;Hisham Husain;Philip Schulz;Vu Nguyen", "authorids": "~Alexander_Soen1;~Hisham_Husain1;~Philip_Schulz1;~Vu_Nguyen1", "gender": "M;;;M", "homepage": "https://alexandersoen.github.io/;;;http://ntienvu.github.io", "dblp": "245/9661.html;222/3235;184/3773;68/11111", "google_scholar": "apRX4awAAAAJ;bwq3crYAAAAJ;;https://scholar.google.com.au/citations?user=5RQyC9cAAAAJ", "orcid": ";;;0000-0002-0294-4561", "linkedin": ";;;tienvunguyen/", "or_profile": "~Alexander_Soen1;~Hisham_Husain1;~Philip_Schulz1;~Vu_Nguyen1", "aff": "Australian National University;Amazon;;Amazon", "aff_domain": "anu.edu.au;amazon.com;;amazon.com", "position": "PhD student;Researcher;;Machine Learning Scientist", "bibtex": "@inproceedings{\nsoen2024rejection,\ntitle={Rejection via Learning Density Ratios},\nauthor={Alexander Soen and Hisham Husain and Philip Schulz and Vu Nguyen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=JzcIKnnOpJ}\n}", "github": "", "reviewers": "MrTu;dzAs;MiAk;m8bi", "pdf_size": 997613, "rating": "5;6;6;6", "confidence": "4;3;3;3", "soundness": "3;2;3;3", "novelty": "3;3;3;3", "presentation": "3;3;2;2", "wc_summary": "103;52;97;78", "wc_strengths": "45;44;84;33", "wc_weaknesses": "78;176;372;19", "wc_questions": "123;177;187;259", "wc_limitations": "1;8;51;9", "wc_review": "350;457;791;398", "wc_reply_reviewers": "112;245;103;35", "wc_reply_authors": "720;73;67;21", "reply_reviewers": "2;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 82.5, 19.880895352071043 ], "wc_strengths_avg": [ 51.5, 19.345542122153102 ], "wc_weaknesses_avg": [ 161.25, 133.97644382502472 ], "wc_questions_avg": [ 186.5, 48.42261868176896 ], "wc_limitations_avg": [ 17.25, 19.727835664360143 ], "wc_review_avg": [ 499.0, 172.79322903401047 ], "wc_reply_reviewers_avg": [ 123.75, 76.07028000474298 ], "wc_reply_authors_avg": [ 220.25, 289.23120077197757 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14493884433213254779&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "anu.edu.au;amazon.com;;amazon.com", "author_num": 4, "aff_unique_index": "0;1;1", "aff_unique_norm": "Australian National University;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.anu.edu.au;https://www.amazon.com", "aff_unique_abbr": "ANU;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Australia;United States" }, { "title": "Progressive Exploration-Conformal Learning for Sparsely Annotated Object Detection in Aerial Images", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95684", "id": "Jzog9gvOf6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Jzog9gvOf6", "openreview": "https://openreview.net/forum?id=Jzog9gvOf6", "poster": "/media/PosterPDFs/NeurIPS%202024/95684.png?t=1730036019.7015145", "project": "", "author_site": "Zihan Lu, Chenxu Wang, Chunyan Xu, Xiangwei Zheng, Zhen Cui", "tldr": "", "abstract": "The ability to detect aerial objects with limited annotation is pivotal to the development of real-world aerial intelligence systems. In this work, we focus on a demanding but practical sparsely annotated object detection (SAOD) in aerial images, which encompasses a wider variety of aerial scenes with the same number of annotated objects. Although most existing SAOD methods rely on fixed thresholding to filter pseudo-labels for enhancing detector performance, adapting to aerial objects proves challenging due to the imbalanced probabilities/confidences associated with predicted aerial objects. To address this problem, we propose a novel Progressive Exploration-Conformal Learning (PECL) framework to address the SAOD task, which can adaptively perform the selection of high-quality pseudo-labels in aerial images. Specifically, the pseudo-label exploration can be formulated as a decision-making paradigm by adopting a conformal pseudo-label explorer and a multi-clue selection evaluator. The conformal pseudo-label explorer learns an adaptive policy by maximizing the cumulative reward, which can decide how to select these high-quality candidates by leveraging their essential characteristics and inter-instance contextual information. The multi-clue selection evaluator is designed to evaluate the explorer-guided pseudo-label selections by providing an instructive feedback for policy optimization. Finally, the explored pseudo-labels can be adopted to guide the optimization of aerial object detector in a closed-looping progressive fashion. Comprehensive evaluations on two public datasets demonstrate the superiority of our PECL when compared with other state-of-the-art methods in the sparsely annotated aerial object detection task.", "keywords": "Aerial object detection; Sparse annotation; Conformal exploratory learning", "primary_area": "machine_vision", "supplementary_material": "", "author": "Zihan Lu;Chenxu Wang;Chunyan Xu;Xiangwei Zheng;Zhen Cui", "authorids": "~Zihan_Lu1;~Chenxu_Wang9;~Chunyan_Xu3;~Xiangwei_Zheng1;~Zhen_Cui4", "gender": "F;M;F;;M", "homepage": "https://vgg-ai.cn/students/;https://vgg-ai.cn/students/;;;http://aip.seu.edu.cn/zcui/", "dblp": ";;70/8453.html;;59/8491-1", "google_scholar": ";;VM_IRfMAAAAJ;;ChRyl3kAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Zihan_Lu1;~Chenxu_Wang9;~Chunyan_Xu3;~Xiangwei_Zheng1;~Zhen_Cui4", "aff": "Nanjing University of Science and Technology;Nanjing University of Science and Technology;Nanjing University of Science and Technology;;Nanjing University of Science and Technology", "aff_domain": "njust.edu.cn;njust.edu.cn;njust.edu.cn;;njust.edu.cn", "position": "MS student;MS student;Full Professor;;Full Professor", "bibtex": "@inproceedings{\nlu2024progressive,\ntitle={Progressive Exploration-Conformal Learning for Sparsely Annotated Object Detection in Aerial Images},\nauthor={Zihan Lu and Chenxu Wang and Chunyan Xu and Xiangwei Zheng and Zhen Cui},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Jzog9gvOf6}\n}", "github": "", "reviewers": "1ik3;hP3V;RekV", "pdf_size": 2765036, "rating": "4;5;7", "confidence": "3;4;5", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "1;3;3", "wc_summary": "71;135;58", "wc_strengths": "49;61;48", "wc_weaknesses": "142;212;162", "wc_questions": "8;4;100", "wc_limitations": "10;73;1", "wc_review": "280;485;369", "wc_reply_reviewers": "0;133;112", "wc_reply_authors": "0;418;79", "reply_reviewers": "0;1;1", "reply_authors": "1;2;2", "rating_avg": [ 5.333333333333333, 1.247219128924647 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.9428090415820634 ], "wc_summary_avg": [ 88.0, 33.65511352924941 ], "wc_strengths_avg": [ 52.666666666666664, 5.90668171555645 ], "wc_weaknesses_avg": [ 172.0, 29.43920288775949 ], "wc_questions_avg": [ 37.333333333333336, 44.34210439550904 ], "wc_limitations_avg": [ 28.0, 32.03123475609393 ], "wc_review_avg": [ 378.0, 83.93251257210561 ], "wc_reply_reviewers_avg": [ 81.66666666666667, 58.37998134825175 ], "wc_reply_authors_avg": [ 165.66666666666666, 181.3180140587863 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.9819805060619659, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14499134015838615622&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": "njust.edu.cn;njust.edu.cn;njust.edu.cn;;njust.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Nanjing University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.nust.edu.cn/", "aff_unique_abbr": "NUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Image Reconstruction Via Autoencoding Sequential Deep Image Prior", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95683", "id": "K1EG2ABzNE", "proceeding": "", "pdf": "https://openreview.net/pdf?id=K1EG2ABzNE", "openreview": "https://openreview.net/forum?id=K1EG2ABzNE", "poster": "/media/PosterPDFs/NeurIPS%202024/95683.png?t=1730376997.8801856", "project": "", "author_site": "Ismail Alkhouri, Shijun Liang, Evan Bell, Qing Qu, Rongrong Wang, Saiprasad Ravishankar", "tldr": "", "abstract": "Recently, Deep Image Prior (DIP) has emerged as an effective unsupervised one-shot learner, delivering competitive results across various image recovery problems. This method only requires the noisy measurements and a forward operator, relying solely on deep networks initialized with random noise to learn and restore the structure of the data. However, DIP is notorious for its vulnerability to overfitting due to the overparameterization of the network. Building upon insights into the impact of the DIP input and drawing inspiration from the gradual denoising process in cutting-edge diffusion models, we introduce Autoencoding Sequential DIP (aSeqDIP) for image reconstruction. This method progressively denoises and reconstructs the image through a sequential optimization of network weights. This is achieved using an input-adaptive DIP objective, combined with an autoencoding regularization term. Compared to diffusion models, our method does not require training data and outperforms other DIP-based methods in mitigating noise overfitting while maintaining a similar number of parameter updates as Vanilla DIP. Through extensive experiments, we validate the effectiveness of our method in various image reconstruction tasks, such as MRI and CT reconstruction, as well as in image restoration tasks like image denoising, inpainting, and non-linear deblurring.", "keywords": "Image Reconstruction;Deep Image Prior;Generative Models", "primary_area": "generative_models", "supplementary_material": "", "author": "Ismail Alkhouri;Shijun Liang;Evan Bell;Qing Qu;Rongrong Wang;Saiprasad Ravishankar", "authorids": "~Ismail_Alkhouri1;~Shijun_Liang2;~Evan_Bell1;~Qing_Qu2;~Rongrong_Wang1;~Saiprasad_Ravishankar1", "gender": "M;M;M;M;;", "homepage": "https://sites.google.com/view/ismailalkhouri/about;;;https://qingqu.engin.umich.edu/;https://users.math.msu.edu/users/wangron6/;", "dblp": "241/7238.html;;;127/6874-1;;", "google_scholar": "https://scholar.google.com/citations?hl=en;x4Iugg8AAAAJ;;JfblW3MAAAAJ;;", "orcid": ";;0000-0003-1584-7901;0000-0001-9136-558X;;", "linkedin": ";;;qing-q-1a0b9746/;;", "or_profile": "~Ismail_Alkhouri1;~Shijun_Liang2;~Evan_Bell1;~Qing_Qu2;~Rongrong_Wang1;~Saiprasad_Ravishankar1", "aff": "Michigan State University;Michigan State University;Michigan State University;University of Michigan;Michigan State University;", "aff_domain": "msu.edu;msu.edu;msu.edu;umich.edu;msu.edu;", "position": "Postdoc;PhD student;Undergrad student;Assistant Professor;Associate Professor;", "bibtex": "@inproceedings{\nalkhouri2024image,\ntitle={Image Reconstruction Via Autoencoding Sequential Deep Image Prior},\nauthor={Ismail Alkhouri and Shijun Liang and Evan Bell and Qing Qu and Rongrong Wang and Saiprasad Ravishankar},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=K1EG2ABzNE}\n}", "github": "", "reviewers": "MAra;qnNG;Q1ff;hjHc", "pdf_size": 11448872, "rating": "3;5;6;7", "confidence": "4;4;4;4", "soundness": "2;3;3;3", "novelty": "1;2;3;3", "presentation": "3;3;2;2", "wc_summary": "60;140;106;124", "wc_strengths": "38;39;160;70", "wc_weaknesses": "179;86;804;252", "wc_questions": "10;87;111;72", "wc_limitations": "7;10;50;48", "wc_review": "294;362;1231;566", "wc_reply_reviewers": "101;38;329;0", "wc_reply_authors": "919;321;954;56", "reply_reviewers": "1;1;3;0", "reply_authors": "3;2;4;2", "rating_avg": [ 5.25, 1.479019945774904 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 107.5, 29.94578434437809 ], "wc_strengths_avg": [ 76.75, 49.75628101054178 ], "wc_weaknesses_avg": [ 330.25, 279.7752446161025 ], "wc_questions_avg": [ 70.0, 37.32961290986018 ], "wc_limitations_avg": [ 28.75, 20.29008378494283 ], "wc_review_avg": [ 613.25, 370.4371572885204 ], "wc_reply_reviewers_avg": [ 117.0, 127.6028996535737 ], "wc_reply_authors_avg": [ 562.5, 385.75542769998714 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17916141680441877036&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "msu.edu;msu.edu;msu.edu;umich.edu;msu.edu;", "author_num": 6, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Michigan State University;University of Michigan", "aff_unique_dep": ";", "aff_unique_url": "https://www.msu.edu;https://www.umich.edu", "aff_unique_abbr": "MSU;UM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "An Analytical Study of Utility Functions in Multi-Objective Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95682", "id": "K3h2kZFz8h", "proceeding": "", "pdf": "https://openreview.net/pdf?id=K3h2kZFz8h", "openreview": "https://openreview.net/forum?id=K3h2kZFz8h", "poster": "/media/PosterPDFs/NeurIPS%202024/95682.png?t=1730304738.5302894", "project": "", "author_site": "Manel Rodr\u00edguez Soto, Juan A Rodr\u00edguez-Aguilar, Maite L\u00f3pez-S\u00e1nchez", "tldr": "", "abstract": "Multi-objective reinforcement learning (MORL) is an excellent framework for multi-objective sequential decision-making. MORL employs a utility function to aggregate multiple objectives into one that expresses a user's preferences. However, MORL still misses two crucial theoretical analyses of the properties of utility functions: (1) a characterisation of the utility functions for which an associated optimal policy exists, and (2) a characterisation of the types of preferences that can be expressed as utility functions. As a result, we formally characterise the families of preferences and utility functions that MORL should focus on: those for which an optimal policy is guaranteed to exist. We expect our theoretical results to promote the development of novel MORL algorithms that exploit our theoretical findings.", "keywords": "reinforcement learning;multi-objective decision making; multi-objective reinforcement learning;learning theory;markov decision processes", "primary_area": "learning_theory", "supplementary_material": "", "author": "Manel Rodriguez-Soto;Juan Antonio Rodriguez Aguilar;Maite L\u00f3pez-S\u00e1nchez", "authorids": "~Manel_Rodriguez-Soto1;~Juan_Antonio_Rodriguez_Aguilar2;~Maite_L\u00f3pez-S\u00e1nchez1", "gender": "M;M;F", "homepage": "https://iiia.csic.es/en-us/people/person/?person_id=41;https://www.iiia.csic.es/~jar/Jariiia/Home.html;http://www.maia.ub.es/~maite/", "dblp": "266/5674;04/6342.html;29/2991", "google_scholar": "https://scholar.google.es/citations?user=iQuC93oAAAAJ;gmFYg7gAAAAJ;ABr4UU4AAAAJ", "orcid": ";0000-0002-2940-6886;0000-0002-1838-5928", "linkedin": ";juan-antonio-rodriguez-aguilar-69707a1/;https://linkedin.com/in/maite-lopez-sanchez-67b497", "or_profile": "~Manel_Rodriguez-Soto1;~Juan_Antonio_Rodriguez_Aguilar2;~Maite_L\u00f3pez-S\u00e1nchez1", "aff": "Artificial Intelligence Research Institute, Spanish National Research Council;Spanish National Research Council;Universitat de Barcelona", "aff_domain": "iiia.csic.es;iiia.csic.es;ub.edu", "position": "Postdoc;Full Professor;Full Professor", "bibtex": "@inproceedings{\nrodriguez-soto2024an,\ntitle={An Analytical Study of Utility Functions in Multi-Objective Reinforcement Learning},\nauthor={Manel Rodriguez-Soto and Juan Antonio Rodriguez Aguilar and Maite L{\\'o}pez-S{\\'a}nchez},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=K3h2kZFz8h}\n}", "github": "", "reviewers": "nkkJ;WWNq;59cy", "pdf_size": 355849, "rating": "4;6;7", "confidence": "3;4;4", "soundness": "2;3;4", "novelty": "2;3;3", "presentation": "3;3;3", "wc_summary": "64;161;186", "wc_strengths": "41;68;101", "wc_weaknesses": "81;107;122", "wc_questions": "188;451;33", "wc_limitations": "23;20;8", "wc_review": "397;807;450", "wc_reply_reviewers": "9;347;92", "wc_reply_authors": "0;736;0", "reply_reviewers": "1;3;1", "reply_authors": "1;3;1", "rating_avg": [ 5.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 137.0, 52.61812108643434 ], "wc_strengths_avg": [ 70.0, 24.535688292770594 ], "wc_weaknesses_avg": [ 103.33333333333333, 16.937794687883336 ], "wc_questions_avg": [ 224.0, 172.53598658444176 ], "wc_limitations_avg": [ 17.0, 6.48074069840786 ], "wc_review_avg": [ 551.3333333333334, 182.07385558857396 ], "wc_reply_reviewers_avg": [ 149.33333333333334, 143.82010367894406 ], "wc_reply_authors_avg": [ 245.33333333333334, 346.9537273021993 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.9428090415820634 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.9449111825230683, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:G1lysngYtroJ:scholar.google.com/&scioq=An+Analytical+Study+of+Utility+Functions+in+Multi-Objective+Reinforcement+Learning&hl=en&as_sdt=0,5", "gs_version_total": 9, "email": "iiia.csic.es;iiia.csic.es;ub.edu", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Spanish National Research Council;University of Barcelona", "aff_unique_dep": "Artificial Intelligence Research Institute;", "aff_unique_url": "https://www.csic.es;https://www.ub.edu", "aff_unique_abbr": "CSIC;UB", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Spain" }, { "title": "Even Sparser Graph Transformers", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95681", "id": "K3k4bWuNnk", "proceeding": "", "pdf": "https://openreview.net/pdf?id=K3k4bWuNnk", "openreview": "https://openreview.net/forum?id=K3k4bWuNnk", "poster": "/media/PosterPDFs/NeurIPS%202024/95681.png?t=1733504164.0419865", "project": "", "author_site": "Hamed Shirzad, Honghao Lin, Balaji Venkatachalam, Ameya Velingker, David Woodruff, Danica J. Sutherland", "tldr": "", "abstract": "Graph Transformers excel in long-range dependency modeling, but generally require quadratic memory complexity in the number of nodes in an input graph, and hence have trouble scaling to large graphs. Sparse attention variants such as Exphormer can help, but may require high-degree augmentations to the input graph for good performance, and do not attempt to sparsify an already-dense input graph. As the learned attention mechanisms tend to use few of these edges, however, such high-degree connections may be unnecessary. We show (empirically and with theoretical backing) that attention scores on graphs are usually quite consistent across network widths, and use this observation to propose a two-stage procedure, which we call Spexphormer: first, train a narrow network on the full augmented graph. Next, use only the active connections to train a wider network on a much sparser graph. We establish theoretical conditions when a narrow network's attention scores can match those of a wide network, and show that Spexphormer achieves good performance with drastically reduced memory requirements on various graph datasets.", "keywords": "Graph Transformers;Expander Graphs;Sparsification", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Hamed Shirzad;Honghao Lin;Balaji Venkatachalam;Ameya Velingker;David Woodruff;Danica J. Sutherland", "authorids": "~Hamed_Shirzad1;~Honghao_Lin1;~Balaji_Venkatachalam1;~Ameya_Velingker1;~David_Woodruff1;~Danica_J._Sutherland1", "gender": "M;M;;M;M;F", "homepage": "https://sites.google.com/view/hamedshirzad/home;https://honghlin.github.io;https://ijalabv.wordpress.com/;http://www.ameyavelingker.com;http://www.cs.cmu.edu/~dwoodruf/;http://www.djsutherland.ml", "dblp": "295/9054;https://dblp.uni-trier.de/pid/264/2663.html;39/1070.html;117/3666.html;w/DPWoodruff;92/10966", "google_scholar": "https://scholar.google.ca/citations?user=A2CbSLIAAAAJ;;e_YXLdYAAAAJ;6dFFudUAAAAJ;https://scholar.google.com.tw/citations?user=0G2t-6sAAAAJ;https://scholar.google.co.uk/citations?user=uO_NqicAAAAJ", "orcid": ";;;;;0000-0002-1525-3532", "linkedin": "hamed-shirzad-84181473/?originalSubdomain=ir;;;ameya-velingker-5811b711;;", "or_profile": "~Hamed_Shirzad1;~Honghao_Lin1;~Balaji_Venkatachalam1;~Ameya_Velingker1;~David_Woodruff1;~Danica_J._Sutherland2", "aff": "University of British Columbia;Carnegie Mellon University;Google;Google;Carnegie Mellon University;University of British Columbia", "aff_domain": "cs.ubc.ca;cmu.edu;google.com;google.com;cmu.edu;cs.ubc.ca", "position": "PhD student;PhD student;Researcher;Research Scientist;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nshirzad2024even,\ntitle={Even Sparser Graph Transformers},\nauthor={Hamed Shirzad and Honghao Lin and Balaji Venkatachalam and Ameya Velingker and David Woodruff and Danica J. Sutherland},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=K3k4bWuNnk}\n}", "github": "", "reviewers": "X47Y;vT4j;UFNp", "pdf_size": 850133, "rating": "3;4;7", "confidence": "3;4;4", "soundness": "1;2;3", "novelty": "2;2;3", "presentation": "3;3;2", "wc_summary": "116;86;121", "wc_strengths": "48;75;109", "wc_weaknesses": "664;248;4", "wc_questions": "235;49;169", "wc_limitations": "146;3;12", "wc_review": "1209;461;415", "wc_reply_reviewers": "51;126;95", "wc_reply_authors": "1000;608;88", "reply_reviewers": "1;1;1", "reply_authors": "3;2;2", "rating_avg": [ 4.666666666666667, 1.699673171197595 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.0, 0.816496580927726 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 107.66666666666667, 15.456030825826174 ], "wc_strengths_avg": [ 77.33333333333333, 24.957742063113177 ], "wc_weaknesses_avg": [ 305.3333333333333, 272.476706445809 ], "wc_questions_avg": [ 151.0, 76.99350621968063 ], "wc_limitations_avg": [ 53.666666666666664, 65.39283005209533 ], "wc_review_avg": [ 695.0, 363.9377236103268 ], "wc_reply_reviewers_avg": [ 90.66666666666667, 30.771559740918054 ], "wc_reply_authors_avg": [ 565.3333333333334, 373.5427983808846 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.6933752452815364, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9232502564221333600&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "cs.ubc.ca;cmu.edu;google.com;google.com;cmu.edu;cs.ubc.ca", "author_num": 6, "aff_unique_index": "0;1;2;2;1;0", "aff_unique_norm": "University of British Columbia;Carnegie Mellon University;Google", "aff_unique_dep": ";;Google", "aff_unique_url": "https://www.ubc.ca;https://www.cmu.edu;https://www.google.com", "aff_unique_abbr": "UBC;CMU;Google", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;1;1;1;0", "aff_country_unique": "Canada;United States" }, { "title": "ProvNeRF: Modeling per Point Provenance in NeRFs as a Stochastic Field", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95680", "id": "K5PA3SK2jB", "proceeding": "", "pdf": "https://openreview.net/pdf?id=K5PA3SK2jB", "openreview": "https://openreview.net/forum?id=K5PA3SK2jB", "poster": "", "project": "", "author_site": "Kiyohiro Nakayama, Mikaela Angelina Uy, Yang You, Ke Li, Leonidas Guibas", "tldr": "", "abstract": "Neural radiance fields (NeRFs) have gained popularity with multiple works showing promising results across various applications. However, to the best of our knowledge, existing works do not explicitly model the distribution of training camera poses, or consequently the triangulation quality, a key factor affecting reconstruction quality dating back to classical vision literature. We close this gap with ProvNeRF, an approach that models the provenance for each point -- i.e., the locations where it is likely visible -- of NeRFs as a stochastic field. We achieve this by extending implicit maximum likelihood estimation (IMLE) to functional space with an optimizable objective. We show that modeling per-point provenance during the NeRF optimization enriches the model with information on triangulation leading to improvements in novel view synthesis and uncertainty estimation under the challenging sparse, unconstrained view setting against competitive baselines. The code will be available at https://github.com/georgeNakayama/ProvNeRF.", "keywords": "NeRF;Reconstruction;Stochastic Process;Sparse View;Novel View Synthesis;Uncertainty Estimation", "primary_area": "machine_vision", "supplementary_material": "/attachment/275f6969c4c7ac63e617c3a1be738cf098a620c0.zip", "author": "Kiyohiro Nakayama;Mikaela Angelina Uy;Yang You;Ke Li;Leonidas Guibas", "authorids": "~Kiyohiro_Nakayama1;~Mikaela_Angelina_Uy1;~Yang_You2;~Ke_Li1;~Leonidas_Guibas1", "gender": "M;F;M;M;M", "homepage": "https://georgenakayama.github.io/;http://mikacuy.github.io;https://qq456cvb.github.io;http://www.sfu.ca/~keli/;http://geometry.stanford.edu/", "dblp": "346/0509;218/5350;33/8167;75/6627-11;g/LeonidasJGuibas", "google_scholar": "Dh06_JMAAAAJ;PcX1zXwAAAAJ;1YV1_KUAAAAJ;vQc8tI4AAAAJ;https://scholar.google.com.tw/citations?user=5JlEyTAAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Kiyohiro_Nakayama1;~Mikaela_Angelina_Uy1;~Yang_You2;~Ke_Li1;~Leonidas_Guibas1", "aff": "Stanford University;Stanford University;Stanford University;Simon Fraser University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu;sfu.ca;stanford.edu", "position": "Undergrad student;PhD student;Postdoc;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nnakayama2024provnerf,\ntitle={ProvNe{RF}: Modeling per Point Provenance in Ne{RF}s as a Stochastic Field},\nauthor={Kiyohiro Nakayama and Mikaela Angelina Uy and Yang You and Ke Li and Leonidas Guibas},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=K5PA3SK2jB}\n}", "github": "", "reviewers": "u5T3;GKL5;o98v;GM1F", "pdf_size": 12673397, "rating": "5;5;6;6", "confidence": "4;3;2;3", "soundness": "2;3;3;3", "novelty": "3;3;2;3", "presentation": "3;2;2;2", "wc_summary": "59;74;91;131", "wc_strengths": "78;141;100;196", "wc_weaknesses": "139;368;63;413", "wc_questions": "64;122;368;128", "wc_limitations": "20;26;52;1", "wc_review": "360;731;674;869", "wc_reply_reviewers": "193;30;120;0", "wc_reply_authors": "446;0;430;0", "reply_reviewers": "1;1;2;0", "reply_authors": "2;1;2;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 88.75, 26.892145693491994 ], "wc_strengths_avg": [ 128.75, 44.92980636504012 ], "wc_weaknesses_avg": [ 245.75, 148.0800037142085 ], "wc_questions_avg": [ 170.5, 116.73367123499543 ], "wc_limitations_avg": [ 24.75, 18.239723133863627 ], "wc_review_avg": [ 658.5, 186.35248858010993 ], "wc_reply_reviewers_avg": [ 85.75, 76.05384605659336 ], "wc_reply_authors_avg": [ 219.0, 219.07304717833273 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.7071067811865475, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:bafYF_RsIY4J:scholar.google.com/&scioq=ProvNeRF:+Modeling+per+Point+Provenance+in+NeRFs+as+a+Stochastic+Field&hl=en&as_sdt=0,44", "gs_version_total": 0, "email": "stanford.edu;stanford.edu;stanford.edu;sfu.ca;stanford.edu", "author_num": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Stanford University;Simon Fraser University", "aff_unique_dep": ";", "aff_unique_url": "https://www.stanford.edu;https://www.sfu.ca", "aff_unique_abbr": "Stanford;SFU", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "United States;Canada" }, { "title": "GMAI-MMBench: A Comprehensive Multimodal Evaluation Benchmark Towards General Medical AI", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97754", "id": "K6b8LCXBeQ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=K6b8LCXBeQ", "openreview": "https://openreview.net/forum?id=K6b8LCXBeQ", "poster": "/media/PosterPDFs/NeurIPS%202024/97754.png?t=1731731965.6829484", "project": "", "author_site": "pengcheng chen, Jin Ye, Guoan Wang, Yanjun Li, Zhongying Deng, Wei Li, Tianbin Li, Haodong Duan, Ziyan Huang, Yanzhou Su, Benyou Wang, Shaoting Zhang, Bin Fu, Jianfei Cai, Bohan Zhuang, Eric Seibel, Junjun He, Yu Qiao", "tldr": "", "abstract": "Large Vision-Language Models (LVLMs) are capable of handling diverse data types such as imaging, text, and physiological signals, and can be applied in various fields. In the medical field, LVLMs have a high potential to offer substantial assistance for diagnosis and treatment. Before that, it is crucial to develop benchmarks to evaluate LVLMs' effectiveness in various medical applications. Current benchmarks are often built upon specific academic literature, mainly focusing on a single domain, and lacking varying perceptual granularities. Thus, they face specific challenges, including limited clinical relevance, incomplete evaluations, and insufficient guidance for interactive LVLMs. To address these limitations, we developed the GMAI-MMBench, the most comprehensive general medical AI benchmark with well-categorized data structure and multi-perceptual granularity to date. It is constructed from 284 datasets across 38 medical image modalities, 18 clinical-related tasks, 18 departments, and 4 perceptual granularities in a Visual Question Answering (VQA) format. Additionally, we implemented a lexical tree structure that allows users to customize evaluation tasks, accommodating various assessment needs and substantially supporting medical AI research and applications. We evaluated 50 LVLMs, and the results show that even the advanced GPT-4o only achieves an accuracy of 53.96\\%, indicating significant room for improvement. Moreover, we identified five key insufficiencies in current cutting-edge LVLMs that need to be addressed to advance the development of better medical applications. We believe that GMAI-MMBench will stimulate the community to build the next generation of LVLMs toward GMAI.", "keywords": "Multimodal large language models;Medical benchmark;Multimodal data;Visual question and answer (VQA)", "primary_area": "", "supplementary_material": "/attachment/46e3eaf723222608d9c8d657067c699d01546af3.pdf", "author": "pengcheng chen;Jin Ye;Guoan Wang;Yanjun Li;Zhongying Deng;Wei Li;Tianbin Li;Haodong Duan;Ziyan Huang;Yanzhou Su;Benyou Wang;Shaoting Zhang;Bin Fu;Jianfei Cai;Bohan Zhuang;Eric J Seibel;Junjun He;Yu Qiao", "authorids": "~pengcheng_chen1;~Jin_Ye2;~Guoan_Wang3;~Yanjun_Li6;~Zhongying_Deng1;~Wei_Li106;~Tianbin_Li2;~Haodong_Duan1;~Ziyan_Huang1;~Yanzhou_Su1;~Benyou_Wang2;~Shaoting_Zhang4;~Bin_Fu1;~Jianfei_Cai1;~Bohan_Zhuang1;~Eric_J_Seibel1;~Junjun_He2;~Yu_Qiao1", "gender": "M;M;M;M;M;M;M;M;M;M;M;M;M;M;M;M;M;", "homepage": ";https://yejin0111.github.io/;https://tousenkaname.github.io/;;;https://github.com/mar-cry;;https://kennymckormick.github.io;https://github.com/Ziyan-Huang;;https://wabyking.github.io/old.html;;;https://jianfei-cai.github.io/;https://bohanzhuang.github.io/;https://www.me.washington.edu/facultyfinder/eric-seibel;https://junjun2016.github.io/;", "dblp": ";;;;241/0938;;;211/7919;;241/9746;169/1793;53/3894;;83/6096;145/1096;;128/7027;", "google_scholar": "-slpk2kAAAAJ;UFBrJOAAAAAJ;;;https://scholar.google.com/citations?hl=zh-CN;TGIvczYAAAAJ;;vi3W-m8AAAAJ;BshL3fUAAAAJ;;Jk4vJU8AAAAJ;oiBMWK4AAAAJ;9WhK1y4AAAAJ;https://scholar.google.com.tw/citations?user=N6czCoUAAAAJ;https://scholar.google.com.au/citations?user=DFuDBBwAAAAJ;01a-2x0AAAAJ;Z4LgebkAAAAJ;", "orcid": ";0000-0003-0667-9889;;;;;0009-0001-3617-8324;0000-0002-3052-4177;0000-0002-1533-5239;0000-0002-3377-469X;0000-0002-1501-9914;0000-0002-8719-448X;;;;;;", "linkedin": ";;;yanjun-li-73922a2aa/;;;;haodong-duan-bb9349166/;;;;;;;bohan-zhuang/;;;", "or_profile": "~pengcheng_chen1;~Jin_Ye2;~Guoan_Wang3;~Yanjun_Li6;~Zhongying_Deng1;~Wei_Li106;~Tianbin_Li2;~Haodong_Duan1;~Ziyan_Huang1;~Yanzhou_Su1;~Benyou_Wang2;~Shaoting_Zhang4;~Bin_Fu1;~Jianfei_Cai1;~Bohan_Zhuang1;~Eric_J_Seibel1;~Junjun_He2;~Yu_Qiao1", "aff": "University of Washington;Monash University;East China Normal University;Shanghai Artificial Intelligence Laboratory;University of Cambridge;Shanghai Jiaotong University;ShangHai Artificial Intelligence Laboratory;Shanghai Artificial Intelligence Laboratory;Shanghai Jiaotong University;Alibaba Group;The Chinese University of Hong Kong, Shenzhen;Shanghai Artificial Intelligence Laboratory;Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences;Monash University;Monash University;University of Washington;Shanghai AI Laboratory;", "aff_domain": "uw.edu;monash.edu;ecnu.edu.cn;pjlab.org.cn;cam.ac.uk;sjtu.edu.cn;pjlab.org.cn;pjlab.org.cn;sjtu.edu.cn;alibaba-inc.com;cuhk.edu.cn;pjlab.org.cn;siat.ac.cn;monash.edu;monash.edu;uw.edu;pjlab.org.cn;", "position": "PhD student;PhD student;MS student;Intern;Postdoc;PhD student;Researcher;Postdoc;PhD student;Researcher;Assistant Professor;Full Professor;Assistant Research Fellow;Full Professor;Assistant Professor;Full Professor;Researcher;", "bibtex": "@inproceedings{\nchen2024gmaimmbench,\ntitle={{GMAI}-{MMB}ench: A Comprehensive Multimodal Evaluation Benchmark Towards General Medical {AI}},\nauthor={pengcheng chen and Jin Ye and Guoan Wang and Yanjun Li and Zhongying Deng and Wei Li and Tianbin Li and Haodong Duan and Ziyan Huang and Yanzhou Su and Benyou Wang and Shaoting Zhang and Bin Fu and Jianfei Cai and Bohan Zhuang and Eric J Seibel and Junjun He and Yu Qiao},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=K6b8LCXBeQ}\n}", "github": "", "reviewers": "4iGG;cNaC;4FHY", "pdf_size": 33204702, "rating": "5;6;8", "confidence": "5;3;3", "wc_summary_and_contributions": "29;103;13", "wc_strengths": "31;88;44", "wc_improvement": "36;25;37", "wc_limitations": "2;29;8", "wc_correctness": "31;15;11", "wc_clarity": "1;14;7", "wc_relation_to_prior_work": "1;23;10", "wc_documentation": "13;28;7", "wc_additional_feedback": "1;1;1", "wc_review": "145;326;138", "wc_reply_reviewers": "114;0;0", "wc_reply_authors": "83;113;0", "reply_reviewers": "1;0;0", "reply_authors": "3;2;1", "rating_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "wc_summary_and_contributions_avg": [ 48.333333333333336, 39.20317447463775 ], "wc_strengths_avg": [ 54.333333333333336, 24.390344173235622 ], "wc_improvement_avg": [ 32.666666666666664, 5.436502143433364 ], "wc_limitations_avg": [ 13.0, 11.575836902790225 ], "wc_correctness_avg": [ 19.0, 8.640987597877148 ], "wc_clarity_avg": [ 7.333333333333333, 5.312459150169743 ], "wc_relation_to_prior_work_avg": [ 11.333333333333334, 9.030811456096044 ], "wc_documentation_avg": [ 16.0, 8.831760866327848 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 203.0, 87.02107024546795 ], "wc_reply_reviewers_avg": [ 38.0, 53.74011537017761 ], "wc_reply_authors_avg": [ 65.33333333333333, 47.79353745248642 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 18, 0 ], "corr_rating_confidence": -0.7559289460184546, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15106722245034671297&as_sdt=4005&sciodt=0,6&hl=en", "gs_version_total": 5, "email": "uw.edu;monash.edu;ecnu.edu.cn;pjlab.org.cn;cam.ac.uk;sjtu.edu.cn;pjlab.org.cn;pjlab.org.cn;sjtu.edu.cn;alibaba-inc.com;cuhk.edu.cn;pjlab.org.cn;siat.ac.cn;monash.edu;monash.edu;uw.edu;pjlab.org.cn;", "author_num": 18, "aff_unique_index": "0;1;2;3;4;5;3;3;5;6;7;3;8;1;1;0;9", "aff_unique_norm": "University of Washington;Monash University;East China Normal University;Shanghai Artificial Intelligence Laboratory;University of Cambridge;Shanghai Jiao Tong University;Alibaba Group;Chinese University of Hong Kong;Chinese Academy of Sciences;Shanghai AI Laboratory", "aff_unique_dep": ";;;;;;;;Shenzhen Institutes of Advanced Technology;", "aff_unique_url": "https://www.washington.edu;https://www.monash.edu;http://www.ecnu.edu.cn;http://www.shailab.org/;https://www.cam.ac.uk;https://www.sjtu.edu.cn;https://www.alibaba.com;https://www.cuhk.edu.cn;http://www.siat.cas.cn;https://www.shanghai-ai-lab.com", "aff_unique_abbr": "UW;Monash;ECNU;Shanghai AI Lab;Cambridge;SJTU;Alibaba;CUHK;SIAT;SAIL", "aff_campus_unique_index": "1;2;2", "aff_campus_unique": ";Cambridge;Shenzhen", "aff_country_unique_index": "0;1;2;2;3;2;2;2;2;2;2;2;2;1;1;0;2", "aff_country_unique": "United States;Australia;China;United Kingdom" }, { "title": "SmallToLarge (S2L): Scalable Data Selection for Fine-tuning Large Language Models by Summarizing Training Trajectories of Small Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95679", "id": "K9IGlMQpif", "proceeding": "", "pdf": "https://openreview.net/pdf?id=K9IGlMQpif", "openreview": "https://openreview.net/forum?id=K9IGlMQpif", "poster": "", "project": "", "author_site": "Yu Yang, Siddhartha Mishra, Jeffrey Chiang, Baharan Mirzasoleiman", "tldr": "", "abstract": "Despite the effectiveness of data selection for pretraining and instruction fine-tuning\nlarge language models (LLMs), improving data efficiency in supervised fine-tuning\n(SFT) for specialized domains poses significant challenges due to the complexity\nof fine-tuning data. To bridge this gap, we introduce an effective and scalable\ndata selection method for SFT, SmallToLarge (S2L), which trains a small\nmodel, clusters loss trajectories of the examples, and samples from these clusters to\nguide data selection for larger models. We prove that during fine-tuning, samples\nwithin the same loss trajectory cluster exhibit similar gradients. Then, we show\nthat S2L subsets have a bounded gradient error w.r.t. the full data, hence guarantee\nconvergence to the neighborhood of the optimal solution. We demonstrate through\nextensive experiments that S2L significantly improves data efficiency in SFT for\nmathematical problem-solving, reducing the training data requirement to just $11$%\nof the original MathInstruct dataset to match full dataset performance while\noutperforming state-of-the-art data selection algorithms by an average of $4.7$%\nacross $6$ in- and out-domain evaluation datasets. Remarkably, selecting only 50K\ndata for SFT, S2L achieves a $32.7$% accuracy on the challenging MATH\nbenchmark, improving Phi-2 by $16.6$%. In clinical text summarization on the\nMIMIC-III dataset, S2L again outperforms training on the full dataset using\nonly $50$% of the data. Notably, S2L can perform scalable data selection using a\nreference model $100\\times$ smaller than the target model, proportionally reducing the\ncomputational cost.", "keywords": "data selection", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Yu Yang;Siddhartha Mishra;Jeffrey N Chiang;Baharan Mirzasoleiman", "authorids": "~Yu_Yang4;~Siddhartha_Mishra3;~Jeffrey_N_Chiang1;~Baharan_Mirzasoleiman1", "gender": "F;M;M;F", "homepage": "https://sites.google.com/view/yuyang0901/home;;;http://web.cs.ucla.edu/~baharan/", "dblp": "16/4505-7;;;52/10075", "google_scholar": "KK6Yj4IAAAAJ;;4Hb-E48AAAAJ;x63j7HEAAAAJ", "orcid": ";;0000-0002-6843-1355;", "linkedin": ";siddhartham04;;", "or_profile": "~Yu_Yang4;~Siddhartha_Mishra3;~Jeffrey_N_Chiang1;~Baharan_Mirzasoleiman1", "aff": "University of California, Los Angeles;University of California, Los Angeles;;University of California, Los Angeles", "aff_domain": "ucla.edu;ucla.edu;;ucla.edu", "position": "PhD student;Undergrad student;;Assistant Professor", "bibtex": "@inproceedings{\nyang2024smalltolarge,\ntitle={SmallToLarge (S2L): Scalable Data Selection for Fine-tuning Large Language Models by Summarizing Training Trajectories of Small Models},\nauthor={Yu Yang and Siddhartha Mishra and Jeffrey N Chiang and Baharan Mirzasoleiman},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=K9IGlMQpif}\n}", "github": "", "reviewers": "dErU;5vbK;mU1m;Ho1K", "pdf_size": 786869, "rating": "4;5;5;7", "confidence": "3;4;4;5", "soundness": "3;3;2;3", "novelty": "2;2;3;3", "presentation": "3;4;3;3", "wc_summary": "82;78;81;92", "wc_strengths": "44;54;60;155", "wc_weaknesses": "260;149;71;167", "wc_questions": "39;43;2;4", "wc_limitations": "3;1;1;21", "wc_review": "428;325;215;439", "wc_reply_reviewers": "0;0;0;15", "wc_reply_authors": "64;64;64;47", "reply_reviewers": "0;0;0;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.25, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 83.25, 5.261891294962297 ], "wc_strengths_avg": [ 78.25, 44.6787141712919 ], "wc_weaknesses_avg": [ 161.75, 67.22862113713177 ], "wc_questions_avg": [ 22.0, 19.06567596493762 ], "wc_limitations_avg": [ 6.5, 8.411301920630361 ], "wc_review_avg": [ 351.75, 90.61284401231428 ], "wc_reply_reviewers_avg": [ 3.75, 6.49519052838329 ], "wc_reply_authors_avg": [ 59.75, 7.361215932167728 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.9733285267845754, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11810225389285725908&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "ucla.edu;ucla.edu;;ucla.edu", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of California, Los Angeles", "aff_unique_dep": "", "aff_unique_url": "https://www.ucla.edu", "aff_unique_abbr": "UCLA", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "BrainBits: How Much of the Brain are Generative Reconstruction Methods Using?", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95678", "id": "KAAUvi4kpb", "proceeding": "", "pdf": "https://openreview.net/pdf?id=KAAUvi4kpb", "openreview": "https://openreview.net/forum?id=KAAUvi4kpb", "poster": "/media/PosterPDFs/NeurIPS%202024/95678.png?t=1734046321.9479892", "project": "", "author_site": "David Mayo, Christopher Wang, Asa Harbin, Abdulrahman Alabdulkareem, Albert Shaw, Boris Katz, Andrei Barbu", "tldr": "", "abstract": "When evaluating stimuli reconstruction results it is tempting to assume that higher fidelity text and image generation is due to an improved understanding of the brain or more powerful signal extraction from neural recordings. However, in practice, new reconstruction methods could improve performance for at least three other reasons: learning more about the distribution of stimuli, becoming better at reconstructing text or images in general, or exploiting weaknesses in current image and/or text evaluation metrics. Here we disentangle how much of the reconstruction is due to these other factors vs. productively using the neural recordings. We introduce BrainBits, a method that uses a bottleneck to quantify the amount of signal extracted from neural recordings that is actually necessary to reproduce a method's reconstruction fidelity. We find that it takes surprisingly little information from the brain to produce reconstructions with high fidelity. In these cases, it is clear that the priors of the methods' generative models are so powerful that the outputs they produce extrapolate far beyond the neural signal they decode. Given that reconstructing stimuli can be improved independently by either improving signal extraction from the brain or by building more powerful generative models, improving the latter may fool us into thinking we are improving the former. We propose that methods should report a method-specific random baseline, a reconstruction ceiling, and a curve of performance as a function of bottleneck size, with the ultimate goal of using more of the neural recordings.", "keywords": "diffusion models;fMRI;computational neuroscience;generative AI", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "/attachment/b062c70b12adaae293d0b1928394ddbf3e9d056e.zip", "author": "David Mayo;Christopher Wang;Asa Harbin;Abdulrahman Alabdulkareem;Albert Eaton Shaw;Boris Katz;Andrei Barbu", "authorids": "~David_Mayo1;~Christopher_Wang1;~Asa_Harbin1;~Abdulrahman_Alabdulkareem1;~Albert_Eaton_Shaw1;~Boris_Katz1;~Andrei_Barbu3", "gender": ";;Not Specified;M;M;M;M", "homepage": "http://david-mayo.com;https://czlwang.github.io/;https://asaharbin.com;http://abdulrahman-kareem.com/;;http://people.csail.mit.edu/boris/boris.html;https://0xab.com", "dblp": "190/8836;;;;267/9598;k/BorisKatz;58/8365", "google_scholar": "QjVd0f8AAAAJ;;;;Q9CPYwEAAAAJ;FdNuUb8AAAAJ;t1rjgHgAAAAJ", "orcid": ";;;;0000-0002-5308-7790;;", "linkedin": ";;;;ashaw596/;;andrei-barbu-1166131", "or_profile": "~David_Mayo1;~Christopher_Wang1;~Asa_Harbin1;~Abdulrahman_Alabdulkareem1;~Albert_Eaton_Shaw1;~Boris_Katz1;~Andrei_Barbu3", "aff": "Massachusetts Institute of Technology;Computer Science and Artificial Intelligence Laboratory, Electrical Engineering & Computer Science;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Google;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;csail.mit.edu;mit.edu;mit.edu;google.com;mit.edu;mit.edu", "position": "PhD student;PhD student;Researcher;MS student;Researcher;Principal Research Scientist;Researcher", "bibtex": "@inproceedings{\nmayo2024brainbits,\ntitle={BrainBits: How Much of the Brain are Generative Reconstruction Methods Using?},\nauthor={David Mayo and Christopher Wang and Asa Harbin and Abdulrahman Alabdulkareem and Albert Eaton Shaw and Boris Katz and Andrei Barbu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=KAAUvi4kpb}\n}", "github": "", "reviewers": "eVVq;khsL;BfHp;eceS", "pdf_size": 38200378, "rating": "4;6;6;7", "confidence": "3;4;4;4", "soundness": "2;2;3;4", "novelty": "2;3;3;3", "presentation": "2;3;2;4", "wc_summary": "43;93;152;138", "wc_strengths": "21;136;144;142", "wc_weaknesses": "344;182;201;140", "wc_questions": "136;21;155;135", "wc_limitations": "7;17;25;19", "wc_review": "551;449;677;574", "wc_reply_reviewers": "109;0;317;0", "wc_reply_authors": "97;30;447;0", "reply_reviewers": "2;0;2;0", "reply_authors": "2;2;2;1", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 106.5, 42.652666974059194 ], "wc_strengths_avg": [ 110.75, 51.90074662276064 ], "wc_weaknesses_avg": [ 216.75, 76.71171683647812 ], "wc_questions_avg": [ 111.75, 52.99705180479382 ], "wc_limitations_avg": [ 17.0, 6.48074069840786 ], "wc_review_avg": [ 562.75, 81.01967353674044 ], "wc_reply_reviewers_avg": [ 106.5, 129.42275688610562 ], "wc_reply_authors_avg": [ 143.5, 178.70996055060837 ], "reply_reviewers_avg": [ 1.0, 1.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.9271726499455306, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:E13K0Cqt6rEJ:scholar.google.com/&scioq=BrainBits:+How+Much+of+the+Brain+are+Generative+Reconstruction+Methods+Using%3F&hl=en&as_sdt=0,33", "gs_version_total": 5, "email": "mit.edu;csail.mit.edu;mit.edu;mit.edu;google.com;mit.edu;mit.edu", "author_num": 7, "aff_unique_index": "0;0;0;0;1;0;0", "aff_unique_norm": "Massachusetts Institute of Technology;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://web.mit.edu;https://www.google.com", "aff_unique_abbr": "MIT;Google", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Cambridge;Mountain View", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "SpaceByte: Towards Deleting Tokenization from Large Language Modeling", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95677", "id": "KEe4IUp20I", "proceeding": "", "pdf": "https://openreview.net/pdf?id=KEe4IUp20I", "openreview": "https://openreview.net/forum?id=KEe4IUp20I", "poster": "", "project": "", "tldr": "", "abstract": "Tokenization is widely used in large language models because it significantly improves performance. However, tokenization imposes several disadvantages, such as performance biases, increased adversarial vulnerability, decreased character-level modeling performance, and increased modeling complexity. To address these disadvantages without sacrificing performance, we propose SpaceByte, a novel byte-level decoder architecture that closes the performance gap between byte-level and subword autoregressive language modeling. SpaceByte consists of a byte-level Transformer model, but with extra larger transformer blocks inserted in the middle of the layers. We find that performance is significantly improved by applying these larger blocks only after certain bytes, such as space characters, which typically denote word boundaries. Our experiments show that for a fixed training and inference compute budget, SpaceByte outperforms other byte-level architectures and roughly matches the performance of tokenized Transformer architectures.", "keywords": "byte level language model;model architecture;tokenization;efficient pretraining", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/4d139f44640788b164298bbc116d4b6298cce0b2.zip", "author": "Kevin Slagle", "authorids": "~Kevin_Slagle1", "gender": "M", "homepage": "https://profiles.rice.edu/faculty/kevin-slagle", "dblp": "363/4717", "google_scholar": "OT8tybMAAAAJ", "orcid": "0000-0002-8036-3447", "linkedin": "kevin-slagle-45a18871/", "or_profile": "~Kevin_Slagle1", "aff": "Rice University", "aff_domain": "rice.edu", "position": "Assistant Professor", "bibtex": "@inproceedings{\nslagle2024spacebyte,\ntitle={SpaceByte: Towards Deleting Tokenization from Large Language Modeling},\nauthor={Kevin Slagle},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=KEe4IUp20I}\n}", "github": "", "reviewers": "zbbv;HARv;ZB8e;8HNC", "pdf_size": 477299, "rating": "5;6;7;8", "confidence": "4;4;4;4", "soundness": "3;3;4;4", "novelty": "2;2;3;3", "presentation": "3;2;4;4", "wc_summary": "64;110;94;76", "wc_strengths": "57;19;96;76", "wc_weaknesses": "55;86;172;81", "wc_questions": "48;200;6;13", "wc_limitations": "52;19;9;10", "wc_review": "276;434;377;256", "wc_reply_reviewers": "0;101;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 86.0, 17.4928556845359 ], "wc_strengths_avg": [ 62.0, 28.398943642325854 ], "wc_weaknesses_avg": [ 98.5, 44.03691633164157 ], "wc_questions_avg": [ 66.75, 78.56008846736363 ], "wc_limitations_avg": [ 22.5, 17.471405209656147 ], "wc_review_avg": [ 335.75, 72.94647009965595 ], "wc_reply_reviewers_avg": [ 25.25, 43.73428289111415 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1406628469678127396&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "rice.edu", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "Rice University", "aff_unique_dep": "", "aff_unique_url": "https://www.rice.edu", "aff_unique_abbr": "Rice", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Rethinking LLM Memorization through the Lens of Adversarial Compression", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95676", "id": "KFmRMvzAZy", "proceeding": "", "pdf": "https://openreview.net/pdf?id=KFmRMvzAZy", "openreview": "https://openreview.net/forum?id=KFmRMvzAZy", "poster": "", "project": "", "author_site": "Avi Schwarzschild, Zhili Feng, Pratyush Maini, Zachary Lipton, J. Zico Kolter", "tldr": "", "abstract": "Large language models (LLMs) trained on web-scale datasets raise substantial concerns regarding permissible data usage. \nOne major question is whether these models \"memorize\" all their training data or they integrate many data sources in some way more akin to how a human would learn and synthesize information. The answer hinges, to a large degree, on \\emph{how we define memorization.} In this work, we propose the Adversarial Compression Ratio (ACR) as a metric for assessing memorization in LLMs. A given string from the training data is considered memorized if it can be elicited by a prompt (much) shorter than the string itself---in other words, if these strings can be ``compressed'' with the model by computing adversarial prompts of fewer tokens. The ACR overcomes the limitations of existing notions of memorization by (i) offering an adversarial view of measuring memorization, especially for monitoring unlearning and compliance; and (ii) allowing for the flexibility to measure memorization for arbitrary strings at a reasonably low compute. Our definition serves as a practical tool for determining when model owners may be violating terms around data usage, providing a potential legal tool and a critical lens through which to address such scenarios.", "keywords": "memorization;input-output compression", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/2e94338eed3f46763f6d622d18cac50419086173.zip", "author": "Avi Schwarzschild;Zhili Feng;Pratyush Maini;Zachary Chase Lipton;J Zico Kolter", "authorids": "~Avi_Schwarzschild1;~Zhili_Feng1;~Pratyush_Maini1;~Zachary_Chase_Lipton1;~J_Zico_Kolter1", "gender": "M;;M;Unspecified;M", "homepage": "https://cs.umd.edu/~avi1;https://zhilif.github.io/;https://pratyushmaini.github.io/;http://zacklipton.com;http://www.zicokolter.com", "dblp": "249/9334.html;189/7590;248/8071;;67/2526", "google_scholar": "WNvQ7AcAAAAJ;_lnL4aQAAAAJ;;MN9Kfg8AAAAJ;UXh1I6UAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Avi_Schwarzschild1;~Zhili_Feng1;~Pratyush_Maini1;~Zachary_Chase_Lipton1;~Zico_Kolter1", "aff": "Carnegie Mellon University;Microsoft Research;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "cmu.edu;microsoft.com;cmu.edu;cmu.edu;cmu.edu", "position": "Postdoc;Intern;PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nschwarzschild2024rethinking,\ntitle={Rethinking {LLM} Memorization through the Lens of Adversarial Compression},\nauthor={Avi Schwarzschild and Zhili Feng and Pratyush Maini and Zachary Chase Lipton and J Zico Kolter},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=KFmRMvzAZy}\n}", "github": "", "reviewers": "uS5V;4S4z;Um3a;KdDi;kzBJ", "pdf_size": 1197138, "rating": "4;5;6;7;7", "confidence": "3;3;3;5;4", "soundness": "2;2;2;3;3", "novelty": "2;3;3;3;3", "presentation": "4;2;3;3;4", "wc_summary": "135;106;433;40;164", "wc_strengths": "6;33;65;138;51", "wc_weaknesses": "26;90;71;105;115", "wc_questions": "16;72;78;50;2", "wc_limitations": "11;1;7;13;1", "wc_review": "194;302;654;346;333", "wc_reply_reviewers": "0;0;0;21;135", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;0;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.8, 1.16619037896906 ], "confidence_avg": [ 3.6, 0.8 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 175.6, 135.10677259116213 ], "wc_strengths_avg": [ 58.6, 44.32877169514175 ], "wc_weaknesses_avg": [ 81.4, 31.42355804169859 ], "wc_questions_avg": [ 43.6, 30.07723391537194 ], "wc_limitations_avg": [ 6.6, 4.963869458396343 ], "wc_review_avg": [ 365.8, 153.69632396384762 ], "wc_reply_reviewers_avg": [ 31.2, 52.53341793563408 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.7717436331412899, "gs_citation": 40, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15811574706327032040&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 6, "email": "cmu.edu;microsoft.com;cmu.edu;cmu.edu;cmu.edu", "author_num": 5, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "Carnegie Mellon University;Microsoft", "aff_unique_dep": ";Microsoft Research", "aff_unique_url": "https://www.cmu.edu;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "CMU;MSR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Causal Imitation for Markov Decision Processes: a Partial Identification Approach", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95675", "id": "KHX0dKXdqH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=KHX0dKXdqH", "openreview": "https://openreview.net/forum?id=KHX0dKXdqH", "poster": "/media/PosterPDFs/NeurIPS%202024/95675.png?t=1733521862.6616583", "project": "", "author_site": "Kangrui Ruan, Junzhe Zhang, Xuan Di, Elias Bareinboim", "tldr": "", "abstract": "Imitation learning enables an agent to learn from expert demonstrations when the performance measure is unknown and the reward signal is not specified. Standard imitation methods do not generally apply when the learner and the expert's sensory capabilities mismatch and demonstrations are contaminated with unobserved confounding bias. To address these challenges, recent advancements in causal imitation learning have been pursued. However, these methods often require access to underlying causal structures that might not always be available, posing practical challenges.\nIn this paper, we investigate robust imitation learning within the framework of canonical Markov Decision Processes (MDPs) using partial identification, allowing the agent to achieve expert performance even when the system dynamics are not uniquely determined from the confounded expert demonstrations. Specifically, first, we theoretically demonstrate that when unobserved confounders (UCs) exist in an MDP, the learner is generally unable to imitate expert performance. We then explore imitation learning in partially identifiable settings --- either transition distribution or reward function is non-identifiable from the available data and knowledge. Augmenting the celebrated GAIL method (Ho \\& Ermon, 2016), our analysis leads to two novel causal imitation algorithms that can obtain effective policies guaranteed to achieve expert performance.", "keywords": "Causal Inference;Imitation Learning", "primary_area": "causal_inference", "supplementary_material": "/attachment/5286501a4aa9f16efa10108e8c5d4ea46f85505f.zip", "author": "Kangrui Ruan;Junzhe Zhang;Xuan Di;Elias Bareinboim", "authorids": "~Kangrui_Ruan1;~Junzhe_Zhang3;~Xuan_Di1;~Elias_Bareinboim2", "gender": ";;F;M", "homepage": "https://darrenruan.github.io/;;https://sharondi-columbia.wixsite.com/ditectlab;https://causalai.net", "dblp": "324/0593;;;85/9005", "google_scholar": "https://scholar.google.com/citations?authuser=2;;https://scholar.google.com/citations?hl=en;r5U-D7YAAAAJ", "orcid": ";;0000-0003-2925-7697;", "linkedin": "kangrui-ruan/;;;", "or_profile": "~Kangrui_Ruan1;~Junzhe_Zhang3;~Xuan_Di1;~Elias_Bareinboim2", "aff": "Columbia University;;Columbia University;Columbia University", "aff_domain": "columbia.edu;;columbia.edu;columbia.edu", "position": "PhD student;;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nruan2024causal,\ntitle={Causal Imitation for Markov Decision Processes: a Partial Identification Approach},\nauthor={Kangrui Ruan and Junzhe Zhang and Xuan Di and Elias Bareinboim},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=KHX0dKXdqH}\n}", "github": "", "reviewers": "P2hy;S4Sm;1FKA", "pdf_size": 556921, "rating": "4;4;4", "confidence": "2;3;4", "soundness": "2;3;3", "novelty": "2;2;3", "presentation": "3;3;2", "wc_summary": "93;106;97", "wc_strengths": "39;84;75", "wc_weaknesses": "110;120;54", "wc_questions": "252;9;34", "wc_limitations": "10;10;8", "wc_review": "504;329;268", "wc_reply_reviewers": "110;72;134", "wc_reply_authors": "198;156;0", "reply_reviewers": "1;1;2", "reply_authors": "3;2;1", "rating_avg": [ 4.0, 0.0 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 98.66666666666667, 5.436502143433364 ], "wc_strengths_avg": [ 66.0, 19.44222209522358 ], "wc_weaknesses_avg": [ 94.66666666666667, 29.044027881055953 ], "wc_questions_avg": [ 98.33333333333333, 109.13701887484783 ], "wc_limitations_avg": [ 9.333333333333334, 0.9428090415820634 ], "wc_review_avg": [ 367.0, 100.0233306117461 ], "wc_reply_reviewers_avg": [ 105.33333333333333, 25.525586292102197 ], "wc_reply_authors_avg": [ 118.0, 85.18215775618742 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6409151008289180105&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "columbia.edu;;columbia.edu;columbia.edu", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Columbia University", "aff_unique_dep": "", "aff_unique_url": "https://www.columbia.edu", "aff_unique_abbr": "Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Accelerating Pre-training of Multimodal LLMs via Chain-of-Sight", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95674", "id": "KHcB1drMRX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=KHcB1drMRX", "openreview": "https://openreview.net/forum?id=KHcB1drMRX", "poster": "/media/PosterPDFs/NeurIPS%202024/95674.png?t=1733898312.6370788", "project": "", "author_site": "Ziyuan Huang, Kaixiang Ji, Biao Gong, Zhiwu Qing, Qinglong Zhang, Kecheng Zheng, Jian Wang, Jingdong Chen, Ming Yang", "tldr": "", "abstract": "This paper introduces Chain-of-Sight, a vision-language bridge module that accelerates the pre-training of Multimodal Large Language Models (MLLMs). \nOur approach employs a sequence of visual resamplers that capture visual details at various spacial scales.\nThis architecture not only leverages global and local visual contexts effectively, but also facilitates the flexible extension of visual tokens through a compound token scaling strategy, allowing up to a 16x increase in the token count post pre-training.\nConsequently, Chain-of-Sight requires significantly fewer visual tokens in the pre-training phase compared to the fine-tuning phase. \nThis intentional reduction of visual tokens during pre-training notably accelerates the pre-training process, cutting down the wall-clock training time by $\\sim$73\\%.\nEmpirical results on a series of vision-language benchmarks reveal that the pre-train acceleration through Chain-of-Sight is achieved without sacrificing performance, matching or surpassing the standard pipeline of utilizing all visual tokens throughout the entire training process. \nFurther scaling up the number of visual tokens for pre-training leads to stronger performances, competitive to existing approaches in a series of benchmarks.", "keywords": "Chain-of-Sight;MLLMs;pre-training efficiency;3.7x speedup", "primary_area": "machine_vision", "supplementary_material": "", "author": "Ziyuan Huang;Kaixiang Ji;Biao Gong;Zhiwu Qing;Qing-Long Zhang;Kecheng Zheng;Jian Wang;Jingdong Chen;Ming Yang", "authorids": "~Ziyuan_Huang1;~Kaixiang_Ji1;~Biao_Gong1;~Zhiwu_Qing1;~Qing-Long_Zhang1;~Kecheng_Zheng2;~Jian_Wang25;~Jingdong_Chen1;~Ming_Yang2", "gender": "M;M;M;M;M;M;M;M;M", "homepage": "https://huang-ziyuan.github.io/;;https://github.com/Biao-Gong/;;https://zkcys001.github.io/;;http://users.ece.northwestern.edu/~mya671/;;", "dblp": ";297/0560;252/9466;267/5389;228/1362;33/5656;98/2604-7;39/449-108.html;165/0559", "google_scholar": "A9D-disAAAAJ;https://scholar.google.com/citations?hl=en;BwdpTiQAAAAJ;q9refl4AAAAJ;hMDQifQAAAAJ;8SCEv-YAAAAJ;uBHJx08AAAAJ;;LYR7l98AAAAJ", "orcid": ";0000-0002-4669-8622;0000-0002-6156-0816;;;0000-0002-1872-2592;0000-0003-1691-6817;;", "linkedin": "ziyuan-huang-731b78177/;;;;;;ming-yang-29ba294/;;", "or_profile": "~Ziyuan_Huang1;~Kaixiang_Ji1;~Biao_Gong1;~Zhiwu_Qing1;~Kecheng_Zheng2;~Jingdong_Chen1;~Ming_Yang2;~Wang_Jian1;~Qinglong_Zhang1", "aff": "National University of Singapore;Ant Group;Ant Group;Huazhong University of Science and Technology, Tsinghua University;Zhejiang University;Ant Group;Ant Group;AntGroup;Shanghai Artificial Intelligence Laboratory", "aff_domain": "u.nus.edu;antgroup.com;antgroup.com;hust.edu.cn;zju.edu.cn;antgroup.com;antgroup.com;antgroup.com;shlab.org.cn", "position": "PhD student;Researcher;Researcher;PhD student;Postdoc;Senior Staff Algorithm Engineer;Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\nhuang2024accelerating,\ntitle={Accelerating Pre-training of Multimodal {LLM}s via Chain-of-Sight},\nauthor={Ziyuan Huang and Kaixiang Ji and Biao Gong and Zhiwu Qing and Qing-Long Zhang and Kecheng Zheng and Jian Wang and Jingdong Chen and Ming Yang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=KHcB1drMRX}\n}", "github": "", "reviewers": "TSJ3;cV6y;Q826", "pdf_size": 1261428, "rating": "5;6;7", "confidence": "4;3;5", "soundness": "2;3;4", "novelty": "2;3;4", "presentation": "2;3;4", "wc_summary": "118;52;75", "wc_strengths": "70;36;66", "wc_weaknesses": "114;42;70", "wc_questions": "67;2;67", "wc_limitations": "27;20;1", "wc_review": "396;152;279", "wc_reply_reviewers": "50;0;21", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;0;1", "reply_authors": "1;1;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 81.66666666666667, 27.35365098523819 ], "wc_strengths_avg": [ 57.333333333333336, 15.173075568988056 ], "wc_weaknesses_avg": [ 75.33333333333333, 29.634814361190493 ], "wc_questions_avg": [ 45.333333333333336, 30.64129385141706 ], "wc_limitations_avg": [ 16.0, 10.98483803552272 ], "wc_review_avg": [ 275.6666666666667, 99.64046478325069 ], "wc_reply_reviewers_avg": [ 23.666666666666668, 20.499322482029065 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8590310140669160306&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "u.nus.edu;antgroup.com;antgroup.com;hust.edu.cn;zju.edu.cn;antgroup.com;antgroup.com;antgroup.com;shlab.org.cn", "author_num": 9, "aff_unique_index": "0;1;1;2;3;1;1;1;4", "aff_unique_norm": "National University of Singapore;Ant Group;Huazhong University of Science and Technology;Zhejiang University;Shanghai Artificial Intelligence Laboratory", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.nus.edu.sg;https://www.antgroup.com;http://www.hust.edu.cn;https://www.zju.edu.cn;http://www.shailab.org/", "aff_unique_abbr": "NUS;Ant Group;HUST;ZJU;Shanghai AI Lab", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1;1;1;1;1", "aff_country_unique": "Singapore;China" }, { "title": "Score-based generative models are provably robust: an uncertainty quantification perspective", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95673", "id": "KI5TANE02e", "proceeding": "", "pdf": "https://openreview.net/pdf?id=KI5TANE02e", "openreview": "https://openreview.net/forum?id=KI5TANE02e", "poster": "/media/PosterPDFs/NeurIPS%202024/95673.png?t=1733952381.2577894", "project": "", "author_site": "Nikiforos Mimikos-Stamatopoulos, Benjamin Zhang, Markos Katsoulakis", "tldr": "", "abstract": "Through an uncertainty quantification (UQ) perspective, we show that score-based generative models (SGMs) are provably robust to the multiple sources of error in practical implementation. Our primary tool is the Wasserstein uncertainty propagation (WUP) theorem, a *model-form UQ* bound that describes how the $L^2$ error from learning the score function propagates to a Wasserstein-1 ($\\mathbf{d}_1$) ball around the true data distribution under the evolution of the Fokker-Planck equation. We show how errors due to (a) finite sample approximation, (b) early stopping, (c) score-matching objective choice, (d) score function parametrization expressiveness, and (e) reference distribution choice, impact the quality of the generative model in terms of a $\\mathbf{d}_1$ bound of computable quantities. The WUP theorem relies on Bernstein estimates for Hamilton-Jacobi-Bellman partial differential equations (PDE) and the regularizing properties of diffusion processes. Specifically, *PDE regularity theory* shows that *stochasticity* is the key mechanism ensuring SGM algorithms are provably robust. The WUP theorem applies to integral probability metrics beyond $\\mathbf{d}_1$, such as the total variation distance and the maximum mean discrepancy. Sample complexity and generalization bounds in $\\mathbf{d}_1$ follow directly from the WUP theorem. Our approach requires minimal assumptions, is agnostic to the manifold hypothesis and avoids absolute continuity assumptions for the target distribution. Additionally, our results clarify the *trade-offs* among multiple error sources in SGMs.", "keywords": "Score-based generative modeling;uncertainty quantification;Hamilton-Jacobi equations;generalization", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Nikiforos Mimikos-Stamatopoulos;Benjamin Zhang;Markos Katsoulakis", "authorids": "~Nikiforos_Mimikos-Stamatopoulos1;~Benjamin_Zhang1;~Markos_Katsoulakis1", "gender": ";M;M", "homepage": "https://math.uchicago.edu/~nmimikos/;https://benjzhang.github.io/;https://www.math.umass.edu/directory/faculty/markos-katsoulakis", "dblp": ";;", "google_scholar": ";;2PpEwFQAAAAJ", "orcid": ";;0000-0003-4354-1766", "linkedin": ";;", "or_profile": "~Nikiforos_Mimikos-Stamatopoulos1;~Benjamin_Zhang1;~Markos_Katsoulakis1", "aff": "University of Chicago;University of Massachusetts at Amherst;University of Massachusetts at Amherst", "aff_domain": "uchicago.edu;umass.edu;umass.edu", "position": "PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\nmimikos-stamatopoulos2024scorebased,\ntitle={Score-based generative models are provably robust: an uncertainty quantification perspective},\nauthor={Nikiforos Mimikos-Stamatopoulos and Benjamin Zhang and Markos Katsoulakis},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=KI5TANE02e}\n}", "github": "", "reviewers": "DZY4;PXMJ;gUwF", "pdf_size": 409175, "rating": "6;6;7", "confidence": "4;4;2", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "2;3;2", "wc_summary": "54;41;69", "wc_strengths": "46;60;67", "wc_weaknesses": "50;110;240", "wc_questions": "84;102;46", "wc_limitations": "14;1;18", "wc_review": "248;314;440", "wc_reply_reviewers": "35;11;17", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 54.666666666666664, 11.440668201153676 ], "wc_strengths_avg": [ 57.666666666666664, 8.73053390247253 ], "wc_weaknesses_avg": [ 133.33333333333334, 79.3025150224688 ], "wc_questions_avg": [ 77.33333333333333, 23.34285520001546 ], "wc_limitations_avg": [ 11.0, 7.2571803523590805 ], "wc_review_avg": [ 334.0, 79.64923100695951 ], "wc_reply_reviewers_avg": [ 21.0, 10.198039027185569 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2038427999179749223&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "uchicago.edu;umass.edu;umass.edu", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "University of Chicago;University of Massachusetts Amherst", "aff_unique_dep": ";", "aff_unique_url": "https://www.uchicago.edu;https://www.umass.edu", "aff_unique_abbr": "UChicago;UMass Amherst", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Amherst", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Data-Driven Discovery of Dynamical Systems in Pharmacology using Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95672", "id": "KIrZmlTA92", "proceeding": "", "pdf": "https://openreview.net/pdf?id=KIrZmlTA92", "openreview": "https://openreview.net/forum?id=KIrZmlTA92", "poster": "", "project": "", "author_site": "Samuel Holt, Zhaozhi Qian, Tennison Liu, Jim Weatherall, Mihaela van der Schaar", "tldr": "", "abstract": "The discovery of dynamical systems is crucial across a range of fields, including pharmacology, epidemiology, and physical sciences. *Accurate* and *interpretable* modeling of these systems is essential for understanding complex temporal processes, optimizing interventions, and minimizing adverse effects. In pharmacology, for example, precise modeling of drug dynamics is vital to maximize therapeutic efficacy while minimizing patient harm, as in chemotherapy. However, current models, often developed by human experts, are limited by high cost, lack of scalability, and restriction to existing human knowledge. In this paper, we present the **Data-Driven Discovery (D3)** framework, a novel approach leveraging Large Language Models (LLMs) to iteratively discover and refine interpretable models of dynamical systems, demonstrated here with pharmacological applications. Unlike traditional methods, D3 enables the LLM to propose, acquire, and integrate new features, validate, and compare dynamical systems models, uncovering new insights into pharmacokinetics. Experiments on a pharmacokinetic Warfarin dataset reveal that D3 identifies a new plausible model that is well-fitting, highlighting its potential for precision dosing in clinical applications.", "keywords": "ODE Discovery;LLM Discovery;LLM", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "", "author": "Samuel Holt;Zhaozhi Qian;Tennison Liu;Jim Weatherall;Mihaela van der Schaar", "authorids": "~Samuel_Holt1;~Zhaozhi_Qian1;~Tennison_Liu1;~Jim_Weatherall1;~Mihaela_van_der_Schaar2", "gender": ";;M;M;F", "homepage": "https://samholt.github.io/;;https://www.vanderschaar-lab.com/research-team/#Tennison;;https://www.vanderschaar-lab.com", "dblp": "322/3656;194/2443;256/9899;;", "google_scholar": "Ey5aInIAAAAJ;PuTDB5gAAAAJ;LtdHRjsAAAAJ;;DZ3S--MAAAAJ", "orcid": ";0000-0002-4561-0342;;;", "linkedin": ";;;james-weatherall-91b04518/;", "or_profile": "~Samuel_Holt1;~Zhaozhi_Qian1;~Tennison_Liu1;~Jim_Weatherall1;~Mihaela_van_der_Schaar2", "aff": "Google DeepMind;University of Cambridge;University of Cambridge;;University of California, Los Angeles", "aff_domain": "google.com;cam.ac.uk;cam.ac.uk;;ucla.edu", "position": "Intern;Postdoc;PhD student;;Full Professor", "bibtex": "@inproceedings{\nholt2024datadriven,\ntitle={Data-Driven Discovery of Dynamical Systems in Pharmacology using Large Language Models},\nauthor={Samuel Holt and Zhaozhi Qian and Tennison Liu and Jim Weatherall and Mihaela van der Schaar},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=KIrZmlTA92}\n}", "github": "", "reviewers": "2i8U;b77T;Re2Z;QBVa;QDKe", "pdf_size": 837757, "rating": "3;4;5;7;7", "confidence": "2;3;3;4;3", "soundness": "2;3;2;3;3", "novelty": "3;2;2;4;3", "presentation": "2;3;2;3;3", "wc_summary": "36;58;85;95;203", "wc_strengths": "43;92;87;81;283", "wc_weaknesses": "96;260;143;299;343", "wc_questions": "14;215;109;1;130", "wc_limitations": "32;33;44;1;34", "wc_review": "221;658;468;477;993", "wc_reply_reviewers": "111;250;15;12;0", "wc_reply_authors": "383;736;44;44;0", "reply_reviewers": "1;2;1;1;0", "reply_authors": "2;4;2;2;1", "rating_avg": [ 5.2, 1.6 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 95.4, 57.64234554561429 ], "wc_strengths_avg": [ 117.2, 84.67915918335515 ], "wc_weaknesses_avg": [ 228.2, 93.74305307594797 ], "wc_questions_avg": [ 93.8, 79.00734143103412 ], "wc_limitations_avg": [ 28.8, 14.551975810864997 ], "wc_review_avg": [ 563.4, 255.8551152507997 ], "wc_reply_reviewers_avg": [ 77.6, 94.95388354353919 ], "wc_reply_authors_avg": [ 241.4, 283.15621130393737 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.2, 0.9797958971132712 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.7905694150420948, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10581620094404977074&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "google.com;cam.ac.uk;cam.ac.uk;;ucla.edu", "author_num": 5, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Google;University of Cambridge;University of California, Los Angeles", "aff_unique_dep": "Google DeepMind;;", "aff_unique_url": "https://deepmind.com;https://www.cam.ac.uk;https://www.ucla.edu", "aff_unique_abbr": "DeepMind;Cambridge;UCLA", "aff_campus_unique_index": "1;1;2", "aff_campus_unique": ";Cambridge;Los Angeles", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "RectifID: Personalizing Rectified Flow with Anchored Classifier Guidance", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95671", "id": "KKrj1vCQaG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=KKrj1vCQaG", "openreview": "https://openreview.net/forum?id=KKrj1vCQaG", "poster": "/media/PosterPDFs/NeurIPS%202024/95671.png?t=1730112235.928306", "project": "", "author_site": "Zhicheng Sun, Zhenhao Yang, Yang Jin, Haozhe Chi, Kun Xu, Kun Xu, Liwei Chen, Hao Jiang, Yang Song, Kun Gai, Yadong Mu", "tldr": "", "abstract": "Customizing diffusion models to generate identity-preserving images from user-provided reference images is an intriguing new problem. The prevalent approaches typically require training on extensive domain-specific images to achieve identity preservation, which lacks flexibility across different use cases. To address this issue, we exploit classifier guidance, a training-free technique that steers diffusion models using an existing classifier, for personalized image generation. Our study shows that based on a recent rectified flow framework, the major limitation of vanilla classifier guidance in requiring a special classifier can be resolved with a simple fixed-point solution, allowing flexible personalization with off-the-shelf image discriminators. Moreover, its solving procedure proves to be stable when anchored to a reference flow trajectory, with a convergence guarantee. The derived method is implemented on rectified flow with different off-the-shelf image discriminators, delivering advantageous personalization results for human faces, live subjects, and certain objects. Code is available at https://github.com/feifeiobama/RectifID.", "keywords": "Personalized Image Generation;Rectified Flow;Classifier Guidance", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Zhicheng Sun;Zhenhao Yang;Yang Jin;Haozhe Chi;Kun Xu;Kun Xu;Liwei Chen;Hao Jiang;Yang Song;Kun Gai;Yadong MU", "authorids": "~Zhicheng_Sun1;~Zhenhao_Yang1;~Yang_Jin1;~Haozhe_Chi1;~Kun_Xu4;~Kun_Xu6;~Liwei_Chen3;~Hao_Jiang10;~Yang_Song6;~Kun_Gai1;~Yadong_MU1", "gender": "M;M;M;M;;M;M;;M;M;M", "homepage": "https://feifeiobama.github.io;https://github.com/JeffreyYzh;;;https://sites.google.com/view/kunxu2/home;;;https://jianghao.com;http://sonyis.me;;http://www.muyadong.com/", "dblp": "331/1484-1;;51/3584;330/9145.html;;;;;24/4470-8;59/2902;55/1817", "google_scholar": "Xa8dgkYAAAAJ;;O3NlYwEAAAAJ;l5NbV2EAAAAJ;;MGTIEMIAAAAJ;https://scholar.google.com/citations?hl=en;;tvWB_yUAAAAJ;PXO4ygEAAAAJ;https://scholar.google.com.tw/citations?user=Fqqx4HsAAAAJ", "orcid": ";;;;;;;;;;", "linkedin": "zhicheng-sun;;;;;;;;;;", "or_profile": "~Zhicheng_Sun1;~Zhenhao_Yang1;~Yang_Jin1;~Haozhe_Chi1;~Kun_Xu4;~Kun_Xu6;~Liwei_Chen3;~Hao_Jiang10;~Yang_Song6;~Kun_Gai1;~Yadong_MU1", "aff": "Peking University;University of Electronic Science and Technology of China;Peking University;Zhejiang University;Tencent AI Lab;Kuaishou- \u5feb\u624b\u79d1\u6280;Kuaishou- \u5feb\u624b\u79d1\u6280;Peking University;Kuaishou Technology;Kuaishou- \u5feb\u624b\u79d1\u6280;Peking University", "aff_domain": "pku.edu.cn;uestc.edu.cn;pku.edu.cn;zju.edu.cn;tencent.com;kuaishou.com;kuaishou.com;pku.edu.cn;kuaishou.com;kuaishou.com;pku.edu.cn", "position": "PhD student;Undergrad student;PhD student;Undergrad student;Researcher;Researcher;Researcher;PhD student;Researcher;Instructor;Associate Professor", "bibtex": "@inproceedings{\nsun2024rectifid,\ntitle={Rectif{ID}: Personalizing Rectified Flow with Anchored Classifier Guidance},\nauthor={Zhicheng Sun and Zhenhao Yang and Yang Jin and Haozhe Chi and Kun Xu and Kun Xu and Liwei Chen and Hao Jiang and Yang Song and Kun Gai and Yadong MU},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=KKrj1vCQaG}\n}", "github": "", "reviewers": "bvm9;Fu9U;dDUa", "pdf_size": 20707788, "rating": "5;6;7", "confidence": "4;4;3", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;2;3", "wc_summary": "43;88;50", "wc_strengths": "44;15;86", "wc_weaknesses": "51;6;74", "wc_questions": "51;6;66", "wc_limitations": "1;33;13", "wc_review": "190;148;289", "wc_reply_reviewers": "19;39;43", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 60.333333333333336, 19.770910168449223 ], "wc_strengths_avg": [ 48.333333333333336, 29.147136318265567 ], "wc_weaknesses_avg": [ 43.666666666666664, 28.241026106633512 ], "wc_questions_avg": [ 41.0, 25.495097567963924 ], "wc_limitations_avg": [ 15.666666666666666, 13.199326582148888 ], "wc_review_avg": [ 209.0, 59.1100668245266 ], "wc_reply_reviewers_avg": [ 33.666666666666664, 10.498677165349081 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=818550623619325065&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 6, "email": "pku.edu.cn;uestc.edu.cn;pku.edu.cn;zju.edu.cn;tencent.com;kuaishou.com;kuaishou.com;pku.edu.cn;kuaishou.com;kuaishou.com;pku.edu.cn", "author_num": 11, "aff_unique_index": "0;1;0;2;3;4;4;0;4;4;0", "aff_unique_norm": "Peking University;University of Electronic Science and Technology of China;Zhejiang University;Tencent;Kuaishou Technology", "aff_unique_dep": ";;;Tencent AI Lab;", "aff_unique_url": "http://www.pku.edu.cn;https://www.uestc.edu.cn;https://www.zju.edu.cn;https://ai.tencent.com;https://www.kuaishou.com", "aff_unique_abbr": "Peking U;UESTC;ZJU;Tencent AI Lab;Kuaishou", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Oracle-Efficient Reinforcement Learning for Max Value Ensembles", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95670", "id": "KLL70pTQ17", "proceeding": "", "pdf": "https://openreview.net/pdf?id=KLL70pTQ17", "openreview": "https://openreview.net/forum?id=KLL70pTQ17", "poster": "/media/PosterPDFs/NeurIPS%202024/95670.png?t=1731339092.836654", "project": "", "author_site": "Marcel Hussing, Michael Kearns, Aaron Roth, Sikata Sengupta, Jessica Sorrell", "tldr": "", "abstract": "Reinforcement learning (RL) in large or infinite state spaces is notoriously challenging, both theoretically (where worst-case sample and computational complexities must scale with state space cardinality) and experimentally (where function approximation and policy gradient techniques often scale poorly and suffer from instability and high variance). One line of research attempting to address these difficulties\nmakes the natural assumption that we are given a collection of base or *constituent* policies (possibly heuristic) upon which we would like to improve in a scalable manner. In this work we aim to compete with the *max-following policy*, which at each state follows the action of whichever constituent policy has the highest value. The max-following policy is always at least as good as the best constituent policy, and may be considerably better. Our main result is an efficient algorithm that learns to compete with the max-following policy, given only access to the constituent policies (but not their value functions). In contrast to prior work in similar settings, our theoretical results require only the minimal assumption of an ERM oracle for value function approximation for the constituent policies (and not the global optimal policy or the max-following policy itself) on samplable distributions. We illustrate our algorithm's experimental effectiveness and behavior on several robotic simulation testbeds.", "keywords": "Reinforcement Learning Theory;Ensembling;Max-Following;Learning Theory", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/bb469519a489aabca86558d48f18b35b9afd94e1.zip", "author": "Marcel Hussing;Michael Kearns;Aaron Roth;Sikata Bela Sengupta;Jessica Sorrell", "authorids": "~Marcel_Hussing1;~Michael_Kearns2;~Aaron_Roth1;~Sikata_Bela_Sengupta1;~Jessica_Sorrell1", "gender": "M;M;F;F;", "homepage": "https://marcelhussing.github.io/;http://www.cis.upenn.edu/~aaroth/;https://psamathe50.github.io/sikatasengupta/;https://jess-sorrell.github.io/;https://www.cis.upenn.edu/~mkearns/", "dblp": "250/2621;80/3311;378/1471;222/3271;78/6858", "google_scholar": "LEDpvJEAAAAJ;https://scholar.google.com.tw/citations?user=kLUQrrYAAAAJ;;hBsSfjAAAAAJ;8iQk0DIAAAAJ", "orcid": ";;;;", "linkedin": "marcel-hussing-628264158/;;;;", "or_profile": "~Marcel_Hussing1;~Aaron_Roth1;~Sikata_Bela_Sengupta1;~Jessica_Sorrell1;~Michael_J._Kearns1", "aff": "School of Engineering and Applied Science, University of Pennsylvania;University of Pennsylvania;University of Pennsylvania;University of Pennsylvania;University of Pennsylvania", "aff_domain": "seas.upenn.edu;upenn.edu;upenn.edu;upenn.edu;upenn.edu", "position": "PhD student;Full Professor;PhD student;Postdoc;Professor", "bibtex": "@inproceedings{\nhussing2024oracleefficient,\ntitle={Oracle-Efficient Reinforcement Learning for Max Value Ensembles},\nauthor={Marcel Hussing and Michael Kearns and Aaron Roth and Sikata Bela Sengupta and Jessica Sorrell},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=KLL70pTQ17}\n}", "github": "", "reviewers": "Z6cc;BR1J;Qu2b", "pdf_size": 647069, "rating": "5;5;6", "confidence": "3;2;3", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "2;2;2", "wc_summary": "127;73;267", "wc_strengths": "48;52;43", "wc_weaknesses": "65;34;245", "wc_questions": "28;24;55", "wc_limitations": "28;5;17", "wc_review": "296;188;627", "wc_reply_reviewers": "294;0;0", "wc_reply_authors": "147;0;0", "reply_reviewers": "1;0;0", "reply_authors": "2;1;1", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 155.66666666666666, 81.75301557469031 ], "wc_strengths_avg": [ 47.666666666666664, 3.6817870057290873 ], "wc_weaknesses_avg": [ 114.66666666666667, 93.0244890099137 ], "wc_questions_avg": [ 35.666666666666664, 13.767917618708923 ], "wc_limitations_avg": [ 16.666666666666668, 9.392668535736913 ], "wc_review_avg": [ 370.3333333333333, 186.7696144689732 ], "wc_reply_reviewers_avg": [ 98.0, 138.59292911256333 ], "wc_reply_authors_avg": [ 49.0, 69.29646455628166 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4205865845963923266&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "seas.upenn.edu;upenn.edu;upenn.edu;upenn.edu;upenn.edu", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of Pennsylvania", "aff_unique_dep": "School of Engineering and Applied Science", "aff_unique_url": "https://www.upenn.edu", "aff_unique_abbr": "UPenn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Model-Based Transfer Learning for Contextual Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95669", "id": "KLv1VLuMo8", "proceeding": "", "pdf": "https://openreview.net/pdf?id=KLv1VLuMo8", "openreview": "https://openreview.net/forum?id=KLv1VLuMo8", "poster": "", "project": "", "author_site": "Jung-Hoon Cho, Vindula Jayawardana, Sirui Li, Cathy Wu", "tldr": "", "abstract": "Deep reinforcement learning (RL) is a powerful approach to complex decision-making. However, one issue that limits its practical application is its brittleness, sometimes failing to train in the presence of small changes in the environment. Motivated by the success of zero-shot transfer\u2014where pre-trained models perform well on related tasks\u2014we consider the problem of selecting a good set of training tasks to maximize generalization performance across a range of tasks. Given the high cost of training, it is critical to select training tasks strategically, but not well understood how to do so. We hence introduce Model-Based Transfer Learning (MBTL), which layers on top of existing RL methods to effectively solve contextual RL problems. MBTL models the generalization performance in two parts: 1) the performance set point, modeled using Gaussian processes, and 2) performance loss (generalization gap), modeled as a linear function of contextual similarity. MBTL combines these two pieces of information within a Bayesian optimization (BO) framework to strategically select training tasks. We show theoretically that the method exhibits sublinear regret in the number of training tasks and discuss conditions to further tighten regret bounds. We experimentally validate our methods using urban traffic and standard continuous control benchmarks. The experimental results suggest that MBTL can achieve up to 43x improved sample efficiency compared with canonical independent training and multi-task training. Further experiments demonstrate the efficacy of BO and the insensitivity to the underlying RL algorithm and hyperparameters. This work lays the foundations for investigating explicit modeling of generalization, thereby enabling principled yet effective methods for contextual RL. Code is available at https://github.com/jhoon-cho/MBTL/.", "keywords": "Deep Reinforcement Learning;Zero-Shot Transfer;Generalization;Bayesian Optimization", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/ba5d353a0770b800a2dec44aa738ff3c1ea44be4.zip", "author": "Jung-Hoon Cho;Vindula Jayawardana;Sirui Li;Cathy Wu", "authorids": "~Jung-Hoon_Cho1;~Vindula_Jayawardana1;~Sirui_Li1;~Cathy_Wu1", "gender": "M;;;F", "homepage": "https://www.junghooncho.com;https://vindulamj.github.io;https://siruil.github.io/;http://wucathy.com", "dblp": ";202/2003;;155/3740", "google_scholar": "1QvtDvEAAAAJ;3ZqqJosAAAAJ;Q4VMj_sAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-3294-321X;;;0000-0001-8594-303X", "linkedin": "junghoon-cho/;vindulajayawardana/;;cathywu/", "or_profile": "~Jung-Hoon_Cho1;~Vindula_Jayawardana1;~Sirui_Li1;~Cathy_Wu1", "aff": "Massachusetts Institute of Technology;NVIDIA;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;nvidia.com;mit.edu;mit.edu", "position": "PhD student;Intern;PhD student;Assistant Professor", "bibtex": "@inproceedings{\ncho2024modelbased,\ntitle={Model-Based Transfer Learning for Contextual Reinforcement Learning},\nauthor={Jung-Hoon Cho and Vindula Jayawardana and Sirui Li and Cathy Wu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=KLv1VLuMo8}\n}", "github": "", "reviewers": "21Qs;ECSQ;wJEK", "pdf_size": 32436648, "rating": "4;6;7", "confidence": "4;3;4", "soundness": "2;3;4", "novelty": "2;3;3", "presentation": "3;3;4", "wc_summary": "91;76;88", "wc_strengths": "67;59;86", "wc_weaknesses": "341;91;85", "wc_questions": "40;46;13", "wc_limitations": "1;1;7", "wc_review": "540;273;279", "wc_reply_reviewers": "1007;43;32", "wc_reply_authors": "1250;136;18", "reply_reviewers": "2;1;1", "reply_authors": "4;3;2", "rating_avg": [ 5.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 85.0, 6.48074069840786 ], "wc_strengths_avg": [ 70.66666666666667, 11.32352516764202 ], "wc_weaknesses_avg": [ 172.33333333333334, 119.29049510427151 ], "wc_questions_avg": [ 33.0, 14.352700094407323 ], "wc_limitations_avg": [ 3.0, 2.8284271247461903 ], "wc_review_avg": [ 364.0, 124.47489706764172 ], "wc_reply_reviewers_avg": [ 360.6666666666667, 457.0487452729255 ], "wc_reply_authors_avg": [ 468.0, 555.0519495206432 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 3.0, 0.816496580927726 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.18898223650461363, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:e1yHrXct0yYJ:scholar.google.com/&scioq=Model-Based+Transfer+Learning+for+Contextual+Reinforcement+Learning&hl=en&as_sdt=0,33", "gs_version_total": 5, "email": "mit.edu;nvidia.com;mit.edu;mit.edu", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Massachusetts Institute of Technology;NVIDIA", "aff_unique_dep": ";NVIDIA Corporation", "aff_unique_url": "https://web.mit.edu;https://www.nvidia.com", "aff_unique_abbr": "MIT;NVIDIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "KOALA: Empirical Lessons Toward Memory-Efficient and Fast Diffusion Models for Text-to-Image Synthesis", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95668", "id": "KNDUBpWV9b", "proceeding": "", "pdf": "https://openreview.net/pdf?id=KNDUBpWV9b", "openreview": "https://openreview.net/forum?id=KNDUBpWV9b", "poster": "/media/PosterPDFs/NeurIPS%202024/95668.png?t=1733385571.6590126", "project": "", "author_site": "Youngwan Lee, Kwanyong Park, Yoorhim Cho, Yong-Ju Lee, Sung Ju Hwang", "tldr": "", "abstract": "As text-to-image (T2I) synthesis models increase in size, they demand higher inference costs due to the need for more expensive GPUs with larger memory, which makes it challenging to reproduce these models in addition to the restricted access to training datasets. Our study aims to reduce these inference costs and explores how far the generative capabilities of T2I models can be extended using only publicly available datasets and open-source models. To this end, by using the de facto standard text-to-image model, Stable Diffusion XL (SDXL), we present three key practices in building an efficient T2I model: (1) Knowledge distillation: we explore how to effectively distill the generation capability of SDXL into an efficient U-Net and find that self-attention is the most crucial part. (2) Data: despite fewer samples, high-resolution images with rich captions are more crucial than a larger number of low-resolution images with short captions. (3) Teacher: Step-distilled Teacher allows T2I models to reduce the noising steps. Based on these findings, we build two types of efficient text-to-image models, called KOALA-Turbo & -Lightning, with two compact U-Nets (1B & 700M), reducing the model size up to 54% and 69% of the SDXL U-Net. In particular, the KOALA-Lightning-700M is 4 times faster than SDXL while still maintaining satisfactory generation quality. Moreover, unlike SDXL, our KOALA models can generate 1024px high-resolution images on consumer-grade GPUs with 8GB of VRAMs (3060Ti). We believe that our KOALA models will have a significant practical impact, serving as cost-effective alternatives to SDXL for academic researchers and general users in resource-constrained environments.", "keywords": "text-to-image synthesis;diffusion model", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Youngwan Lee;Kwanyong Park;Yoorhim Cho;Yong-Ju Lee;Sung Ju Hwang", "authorids": "~Youngwan_Lee1;~Kwanyong_Park1;~Yoorhim_Cho1;~Yong-Ju_Lee2;~Sung_Ju_Hwang1", "gender": "M;M;F;M;", "homepage": "https://youngwanlee.github.io/;https://pkyong95.github.io/;https://ofzlo.github.io;https://yongjulee-etri.github.io/;", "dblp": "184/5625;241/9707;363/7348;30/2142;", "google_scholar": "EqemKYsAAAAJ;5EwTX0YAAAAJ;OLsmyoIAAAAJ;https://scholar.google.co.kr/citations?user=6goOQh8AAAAJ;", "orcid": "0000-0001-8644-155X;;0009-0004-7179-9291;0000-0001-6538-4701;", "linkedin": "youngwanlee/;;;;", "or_profile": "~Youngwan_Lee1;~Kwanyong_Park1;~Yoorhim_Cho1;~Yong-Ju_Lee2;~Sung_Ju_Hwang1", "aff": "Electronics and Telecommunication Research Institute;ETRI (Electronics and Telecommunication Research Institute);Sookmyung Women's University;Electronics and Telecommunication Research Institute;", "aff_domain": "etri.re.kr;etri.re.kr;sookmyung.ac.kr;etri.re.kr;", "position": "Researcher;Researcher;Undergrad student;Researcher;", "bibtex": "@inproceedings{\nlee2024koala,\ntitle={{KOALA}: Empirical Lessons Toward Memory-Efficient and Fast Diffusion Models for Text-to-Image Synthesis},\nauthor={Youngwan Lee and Kwanyong Park and Yoorhim Cho and Yong-Ju Lee and Sung Ju Hwang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=KNDUBpWV9b}\n}", "github": "", "reviewers": "9zYS;EitJ;dMSC;C2ZB", "pdf_size": 19817137, "rating": "5;6;6;7", "confidence": "4;4;4;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "78;79;27;108", "wc_strengths": "69;22;41;174", "wc_weaknesses": "99;75;112;41", "wc_questions": "19;50;6;214", "wc_limitations": "7;11;9;21", "wc_review": "272;237;195;558", "wc_reply_reviewers": "67;22;31;33", "wc_reply_authors": "54;59;64;147", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;3", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 73.0, 29.16333314283537 ], "wc_strengths_avg": [ 76.5, 58.72180174347514 ], "wc_weaknesses_avg": [ 81.75, 27.012728481217888 ], "wc_questions_avg": [ 72.25, 83.38577516579191 ], "wc_limitations_avg": [ 12.0, 5.385164807134504 ], "wc_review_avg": [ 315.5, 142.6367764638559 ], "wc_reply_reviewers_avg": [ 38.25, 17.108112110925624 ], "wc_reply_authors_avg": [ 81.0, 38.268786236304905 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9417009384357166376&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "etri.re.kr;etri.re.kr;sookmyung.ac.kr;etri.re.kr;", "author_num": 5, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Electronics and Telecommunication Research Institute;Sookmyung Women's University", "aff_unique_dep": ";", "aff_unique_url": "http://www.etri.re.kr;https://www.sookmyung.ac.kr", "aff_unique_abbr": "ETRI;SWU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Generalized Fast Exact Conformalization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95667", "id": "KNZYJ5zQsG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=KNZYJ5zQsG", "openreview": "https://openreview.net/forum?id=KNZYJ5zQsG", "poster": "/media/PosterPDFs/NeurIPS%202024/95667.png?t=1728416461.6172872", "project": "", "tldr": "", "abstract": "Conformal prediction converts nearly any point estimator into a prediction interval under standard assumptions while ensuring valid coverage. However, the extensive computational demands of full conformal prediction are daunting in practice, as it necessitates a comprehensive number of trainings across the entire latent label space. Unfortunately, existing efforts to expedite conformalization often carry strong assumptions and are developed specifically for certain models, or they only offer approximate solution sets. To address this gap, we develop a method for fast exact conformalization of generalized statistical estimation. Our analysis reveals that the structure of the solution path is inherently piecewise smooth, and indicates that utilizing second-order information of difference equations suffices to approximate the entire solution spectrum arbitrarily. We provide a unified view that not only encompasses existing work but also attempts to offer geometric insights. Practically, our framework integrates seamlessly with well-studied numerical solvers. The significant speedups of our algorithm as compared to the existing standard methods are demonstrated across numerous benchmarks.", "keywords": "Conformal Prediction;Generalized Statistical Estimation;Numerical Optimization", "primary_area": "optimization", "supplementary_material": "/attachment/8e546e23d9e63c61cc443bab956fb9913fc81edf.zip", "author": "Diyang Li", "authorids": "~Diyang_Li1", "gender": "M", "homepage": "", "dblp": "127/2830", "google_scholar": "_EhwwgMAAAAJ", "orcid": "", "linkedin": "", "or_profile": "~Diyang_Li1", "aff": "Cornell University", "aff_domain": "cornell.edu", "position": "PhD student", "bibtex": "@inproceedings{\nli2024generalized,\ntitle={Generalized Fast Exact Conformalization},\nauthor={Diyang Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=KNZYJ5zQsG}\n}", "github": "", "reviewers": "KUWB;jxtf;ji5B;x8xk;4Aeq", "pdf_size": 559031, "rating": "3;6;6;8;8", "confidence": "1;3;2;3;4", "soundness": "2;3;3;2;4", "novelty": "2;2;3;4;4", "presentation": "1;2;2;2;2", "wc_summary": "158;50;110;138;128", "wc_strengths": "33;39;73;26;63", "wc_weaknesses": "54;111;60;92;54", "wc_questions": "96;143;109;52;186", "wc_limitations": "51;32;22;49;53", "wc_review": "392;375;374;357;484", "wc_reply_reviewers": "370;0;0;295;18", "wc_reply_authors": "1629;0;0;165;22", "reply_reviewers": "1;0;0;1;1", "reply_authors": "4;1;1;2;2", "rating_avg": [ 6.2, 1.8330302779823362 ], "confidence_avg": [ 2.6, 1.019803902718557 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 3.0, 0.8944271909999159 ], "presentation_avg": [ 1.8, 0.4000000000000001 ], "wc_summary_avg": [ 116.8, 36.8260777167485 ], "wc_strengths_avg": [ 46.8, 18.07097119692243 ], "wc_weaknesses_avg": [ 74.2, 23.18965286501719 ], "wc_questions_avg": [ 117.2, 45.09279321576786 ], "wc_limitations_avg": [ 41.4, 12.240914998479484 ], "wc_review_avg": [ 396.4, 45.17787068908848 ], "wc_reply_reviewers_avg": [ 136.6, 161.83398901343313 ], "wc_reply_authors_avg": [ 363.2, 635.8897388698766 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 2.0, 1.0954451150103321 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.8987170342729169, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:OltM152LYowJ:scholar.google.com/&scioq=Generalized+Fast+Exact+Conformalization&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": "cornell.edu", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "Cornell University", "aff_unique_dep": "", "aff_unique_url": "https://www.cornell.edu", "aff_unique_abbr": "Cornell", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Multi-Object Hallucination in Vision Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95666", "id": "KNrwaFEi1u", "proceeding": "", "pdf": "https://openreview.net/pdf?id=KNrwaFEi1u", "openreview": "https://openreview.net/forum?id=KNrwaFEi1u", "poster": "/media/PosterPDFs/NeurIPS%202024/95666.png?t=1731107952.8122218", "project": "", "author_site": "Xuweiyi Chen, Ziqiao Ma, Xuejun Zhang, Sihan Xu, Shengyi Qian, Jianing Yang, David Fouhey, Joyce Chai", "tldr": "", "abstract": "Large vision language models (LVLMs) often suffer from object hallucination, producing objects not present in the given images. \nWhile current benchmarks for object hallucination primarily concentrate on the presence of a single object class rather than individual entities, this work systematically investigates multi-object hallucination, examining how models misperceive (e.g., invent nonexistent objects or become distracted) when tasked with focusing on multiple objects simultaneously.\nWe introduce Recognition-based Object Probing Evaluation (ROPE), an automated evaluation protocol that considers the distribution of object classes within a single image during testing and uses visual referring prompts to eliminate ambiguity. \nWith comprehensive empirical studies and analysis of potential factors leading to multi-object hallucination, we found that (1) LVLMs suffer more hallucinations when focusing on multiple objects compared to a single object. \n(2) The tested object class distribution affects hallucination behaviors, indicating that LVLMs may follow shortcuts and spurious correlations.\n(3) Hallucinatory behaviors are influenced by data-specific factors, salience and frequency, and model intrinsic behaviors.\nWe hope to enable LVLMs to recognize and reason about multiple objects that often occur in realistic visual scenes, provide insights, and quantify our progress towards mitigating the issues.", "keywords": "Large Vision Language Models;Object Hallucination;Visual Prompting", "primary_area": "machine_vision", "supplementary_material": "/attachment/fe9b1de30118ce5bd441722b1fa4d5c4cc16253b.zip", "author": "Xuweiyi Chen;Ziqiao Ma;Xuejun Zhang;Sihan Xu;Shengyi Qian;Jianing Yang;David Fouhey;Joyce Chai", "authorids": "~Xuweiyi_Chen1;~Ziqiao_Ma1;~Xuejun_Zhang4;~Sihan_Xu2;~Shengyi_Qian1;~Jianing_Yang1;~David_Fouhey2;~Joyce_Chai2", "gender": "M;Not Specified;F;M;M;M;;F", "homepage": "https://xuweiyichen.github.io/;http://mars-tin.github.io/;https://xuejunzhang2002.github.io/;https://sihanxu.github.io/;https://jasonqsy.github.io/;https://jedyang.com/;;https://web.eecs.umich.edu/~chaijy/", "dblp": ";287/7595-1.html;38/1691;;250/4431-1;;29/8613;c/JoyceYChai", "google_scholar": "QgoY8GEAAAAJ;WbybssYAAAAJ;l2Kpd2wAAAAJ;;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;FLcpd34AAAAJ;", "orcid": ";0000-0002-0760-4638;;;0000-0003-0262-2412;;;0000-0002-9658-2230", "linkedin": ";;;;jasonqsy/;;;", "or_profile": "~Xuweiyi_Chen1;~Ziqiao_Ma1;~Xuejun_Zhang4;~Sihan_Xu2;~Shengyi_Qian1;~Jianing_Yang1;~David_Fouhey2;~Joyce_Y_Chai1", "aff": "Electrical Engineering and Computer Science, University of Michigan - Ann Arbor;Adobe Research;Shanghai Jiaotong University;University of Michigan - Ann Arbor;University of Michigan;University of Michigan - Ann Arbor;New York University;University of Michigan", "aff_domain": "eecs.umich.edu;adobe.com;sjtu.edu.cn;umich.edu;umich.edu;umich.edu;nyu.edu;umich.edu", "position": "MS student;Research Intern;Undergrad student;Undergrad student;PhD student;PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nchen2024multiobject,\ntitle={Multi-Object Hallucination in Vision Language Models},\nauthor={Xuweiyi Chen and Ziqiao Ma and Xuejun Zhang and Sihan Xu and Shengyi Qian and Jianing Yang and David Fouhey and Joyce Chai},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=KNrwaFEi1u}\n}", "github": "", "reviewers": "i299;uGDi;i1pJ;4TNS", "pdf_size": 2063557, "rating": "5;6;6;7", "confidence": "4;4;5;4", "soundness": "2;3;3;3", "novelty": "2;2;3;4", "presentation": "2;2;4;3", "wc_summary": "52;64;57;197", "wc_strengths": "36;95;54;58", "wc_weaknesses": "16;178;69;204", "wc_questions": "95;367;34;427", "wc_limitations": "6;6;7;10", "wc_review": "205;710;221;896", "wc_reply_reviewers": "127;110;46;20", "wc_reply_authors": "725;33;36;23", "reply_reviewers": "2;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 92.5, 60.48346881586737 ], "wc_strengths_avg": [ 60.75, 21.44032415799724 ], "wc_weaknesses_avg": [ 116.75, 77.12773495961099 ], "wc_questions_avg": [ 230.75, 168.97984347252782 ], "wc_limitations_avg": [ 7.25, 1.6393596310755 ], "wc_review_avg": [ 508.0, 302.29373132766085 ], "wc_reply_reviewers_avg": [ 75.75, 44.13827703932268 ], "wc_reply_authors_avg": [ 204.25, 300.6936771866013 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10878864597217016797&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "eecs.umich.edu;adobe.com;sjtu.edu.cn;umich.edu;umich.edu;umich.edu;nyu.edu;umich.edu", "author_num": 8, "aff_unique_index": "0;1;2;0;0;0;3;0", "aff_unique_norm": "University of Michigan;Adobe;Shanghai Jiao Tong University;New York University", "aff_unique_dep": "Department of Electrical Engineering and Computer Science;Adobe Research;;", "aff_unique_url": "https://www.umich.edu;https://research.adobe.com;https://www.sjtu.edu.cn;https://www.nyu.edu", "aff_unique_abbr": "UM;Adobe;SJTU;NYU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Ann Arbor;", "aff_country_unique_index": "0;0;1;0;0;0;0;0", "aff_country_unique": "United States;China" }, { "title": "Goal-Conditioned On-Policy Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95665", "id": "KP7EUORJYI", "proceeding": "", "pdf": "https://openreview.net/pdf?id=KP7EUORJYI", "openreview": "https://openreview.net/forum?id=KP7EUORJYI", "poster": "/media/PosterPDFs/NeurIPS%202024/95665.png?t=1730797975.4648044", "project": "", "author_site": "Xudong Gong, Feng Dawei, Kele Xu, Bo Ding, Huaimin Wang", "tldr": "", "abstract": "Existing Goal-Conditioned Reinforcement Learning (GCRL) algorithms are built upon Hindsight Experience Replay (HER), which densifies rewards through hindsight replay and leverages historical goal-achieving information to construct a learning curriculum. However, when the task is characterized by a non-Markovian reward (NMR), whose computation depends on multiple steps of states and actions, HER can no longer densify rewards by treating a single encountered state as the hindsight goal. The lack of informative rewards hinders policy learning, resulting in rolling out failed trajectories. Consequently, the replay buffer is overwhelmed with failed trajectories, impeding the establishment of an applicable curriculum. To circumvent these limitations, we deviate from existing HER-based methods and propose an on-policy GCRL framework, GCPO, which is applicable to both multi-goal Markovian reward (MR) and NMR problems.\nGCPO consists of (1) Pre-training from Demonstrations, which pre-trains the policy to possess an initial goal-achieving capability, thereby diminishing the difficulty of subsequent online learning. (2) Online Self-Curriculum Learning, which first estimates the policy's goal-achieving capability based on historical evaluation information and then selects progressively challenging goals for learning based on its current capability. We evaluate GCPO on a challenging multi-goal long-horizon task: fixed-wing UAV velocity vector control. Experimental results demonstrate that GCPO is capable of effectively addressing both multi-goal MR and NMR problems.", "keywords": "Goal-conditioned reinforcement learning;on-policy reinforcement learning", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Gong Xudong;Feng Dawei;Kele Xu;Bo Ding;Huaimin Wang", "authorids": "~Gong_Xudong1;~Feng_Dawei1;~Kele_Xu2;~Bo_Ding1;~Huaimin_Wang1", "gender": "M;;;M;M", "homepage": "https://github.com/GongXudong;;;;", "dblp": "119/3474;;;;02/661", "google_scholar": ";;;XS0voEAAAAAJ;", "orcid": "0000-0002-2253-2927;;;;", "linkedin": ";;;;", "or_profile": "~Gong_Xudong1;~Feng_Dawei1;~Kele_Xu2;~Bo_Ding1;~Huaimin_Wang1", "aff": "National University of Defense Technology;;;National University of Defense Technology;National University of Defense Technology", "aff_domain": "nudt.edu.cn;;;nudt.edu.cn;nudt.edu.cn", "position": "PhD student;;;Full Professor;Full Professor", "bibtex": "@inproceedings{\nxudong2024goalconditioned,\ntitle={Goal-Conditioned On-Policy Reinforcement Learning},\nauthor={Gong Xudong and Feng Dawei and Kele Xu and Bo Ding and Huaimin Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=KP7EUORJYI}\n}", "github": "", "reviewers": "emMz;aw7R;U5WJ", "pdf_size": 0, "rating": "6;7;7", "confidence": "3;3;4", "soundness": "2;2;3", "novelty": "2;2;4", "presentation": "3;3;4", "wc_summary": "67;114;71", "wc_strengths": "18;54;203", "wc_weaknesses": "100;59;237", "wc_questions": "263;10;10", "wc_limitations": "62;20;1", "wc_review": "510;257;522", "wc_reply_reviewers": "43;23;49", "wc_reply_authors": "52;44;60", "reply_reviewers": "1;1;1", "reply_authors": "2;2;3", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.9428090415820634 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 84.0, 21.275964529643932 ], "wc_strengths_avg": [ 91.66666666666667, 80.08467740807572 ], "wc_weaknesses_avg": [ 132.0, 76.10957013849617 ], "wc_questions_avg": [ 94.33333333333333, 119.26534376013103 ], "wc_limitations_avg": [ 27.666666666666668, 25.48637980482037 ], "wc_review_avg": [ 429.6666666666667, 122.19201646952591 ], "wc_reply_reviewers_avg": [ 38.333333333333336, 11.115554667022044 ], "wc_reply_authors_avg": [ 52.0, 6.531972647421808 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13306767717554059445&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 0, "email": "nudt.edu.cn;;;nudt.edu.cn;nudt.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "National University of Defense Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.nudt.edu.cn/", "aff_unique_abbr": "NUDT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Task-Agnostic Machine-Learning-Assisted Inference", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95664", "id": "KQp7dk5YYH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=KQp7dk5YYH", "openreview": "https://openreview.net/forum?id=KQp7dk5YYH", "poster": "", "project": "", "author_site": "Jiacheng Miao, Qiongshi Lu", "tldr": "", "abstract": "Machine learning (ML) is playing an increasingly important role in scientific research. In conjunction with classical statistical approaches, ML-assisted analytical strategies have shown great promise in accelerating research findings. This has also opened a whole field of methodological research focusing on integrative approaches that leverage both ML and statistics to tackle data science challenges. One type of study that has quickly gained popularity employs ML to predict unobserved outcomes in massive samples, and then uses predicted outcomes in downstream statistical inference. However, existing methods designed to ensure the validity of this type of post-prediction inference are limited to very basic tasks such as linear regression analysis. This is because any extension of these approaches to new, more sophisticated statistical tasks requires task-specific algebraic derivations and software implementations, which ignores the massive library of existing software tools already developed for the same scientific problem given observed data. This severely constrains the scope of application for post-prediction inference. To address this challenge, we introduce a novel statistical framework named PSPS for task-agnostic ML-assisted inference. It provides a post-prediction inference solution that can be easily plugged into almost any established data analysis routines. It delivers valid and efficient inference that is robust to arbitrary choice of ML model, allowing nearly all existing statistical frameworks to be incorporated into the analysis of ML-predicted data. Through extensive experiments, we showcase our method\u2019s validity, versatility, and superiority compared to existing approaches. Our software is available at https://github.com/qlu-lab/psps.", "keywords": "AI for Science;Scientific Machine Learning;Machine Learning-Assisted Inference;post-prediction inference", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "/attachment/812ed6adbcba8b9d2e787b51d9ccda23acd8b677.zip", "author": "Jiacheng Miao;Qiongshi Lu", "authorids": "~Jiacheng_Miao1;~Qiongshi_Lu1", "gender": "M;M", "homepage": "https://jiachengmiao.com;https://qlu-lab.org/people.html", "dblp": "187/1082;", "google_scholar": "cLIcazgAAAAJ;cFsy0TgAAAAJ", "orcid": "0000-0002-4524-7408;0000-0002-4514-0969", "linkedin": ";", "or_profile": "~Jiacheng_Miao1;~Qiongshi_Lu1", "aff": "University of Wisconsin - Madison;University of Wisconsin - Madison", "aff_domain": "wisc.edu;wisc.edu", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nmiao2024taskagnostic,\ntitle={Task-Agnostic Machine-Learning-Assisted Inference},\nauthor={Jiacheng Miao and Qiongshi Lu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=KQp7dk5YYH}\n}", "github": "", "reviewers": "5S3y;DsrD;hhpT;TkNf", "pdf_size": 2012982, "rating": "6;7;7;7", "confidence": "2;3;1;4", "soundness": "3;1;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "55;64;51;53", "wc_strengths": "35;53;32;36", "wc_weaknesses": "180;307;9;370", "wc_questions": "3;82;9;114", "wc_limitations": "1;28;23;8", "wc_review": "274;534;124;581", "wc_reply_reviewers": "130;195;18;11", "wc_reply_authors": "133;1013;0;574", "reply_reviewers": "2;2;1;1", "reply_authors": "2;3;1;2", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 2.5, 1.118033988749895 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 55.75, 4.968651728587948 ], "wc_strengths_avg": [ 39.0, 8.215838362577491 ], "wc_weaknesses_avg": [ 216.5, 137.9682934590408 ], "wc_questions_avg": [ 52.0, 47.41835087811469 ], "wc_limitations_avg": [ 15.0, 10.931605554537724 ], "wc_review_avg": [ 378.25, 187.6677582857535 ], "wc_reply_reviewers_avg": [ 88.5, 77.52580215644338 ], "wc_reply_authors_avg": [ 430.0, 398.03705857620844 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.2581988897471611, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16155451380798766681&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "wisc.edu;wisc.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Wisconsin-Madison", "aff_unique_dep": "", "aff_unique_url": "https://www.wisc.edu", "aff_unique_abbr": "UW-Madison", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Madison", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Superposed Decoding: Multiple Generations from a Single Autoregressive Inference Pass", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95663", "id": "KSOkkHm9I7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=KSOkkHm9I7", "openreview": "https://openreview.net/forum?id=KSOkkHm9I7", "poster": "", "project": "", "author_site": "Ethan Shen, Alan Fan, Sarah Pratt, Jae Sung Park, Matthew Wallingford, Sham Kakade, Ari Holtzman, Ranjay Krishna, Ali Farhadi, Aditya Kusupati", "tldr": "", "abstract": "Many applications today provide users with multiple auto-complete drafts as they type, including GitHub's code completion, Gmail's smart compose, and Apple's messaging auto-suggestions. Under the hood, language models support this by running an autoregressive inference pass to provide a draft. Consequently, providing $k$ drafts to the user requires running an expensive language model $k$ times. To alleviate the computation cost of running $k$ inference passes, we propose Superposed Decoding, a new decoding algorithm that generates $k$ drafts at the computation cost of one autoregressive inference pass. We achieve this by feeding a superposition of the most recent token embeddings from the $k$ drafts as input to the next decoding step of the language model. At every inference step we combine the $k$ drafts with the top-$k$ tokens to get $k^2$ new drafts and cache the $k$ most likely options, using an n-gram interpolation with minimal compute overhead to filter out incoherent generations. Our experiments show that $k$ drafts from Superposed Decoding are at least as coherent and factual as Nucleus Sampling and Greedy Decoding respectively, while being at least $2.44\\times$ faster for $k\\ge3$. In a compute-normalized setting, user evaluations demonstrably favor text generated by Superposed Decoding over Nucleus Sampling. Superposed Decoding can also be combined with other decoding strategies, resulting in universal coverage gains when scaling inference time compute. Code and more examples open-sourced at https://github.com/RAIVNLab/SuperposedDecoding.", "keywords": "Efficient Decoding;Superposition;Autoregressive Modelling", "primary_area": "generative_models", "supplementary_material": "/attachment/f673fb255e816d58cc5361c80b37a12bf271ac8f.zip", "author": "Ethan Shen;Alan Fan;Sarah M Pratt;Jae Sung Park;Matthew Wallingford;Sham M. Kakade;Ari Holtzman;Ranjay Krishna;Ali Farhadi;Aditya Kusupati", "authorids": "~Ethan_Shen2;~Alan_Fan1;~Sarah_M_Pratt1;~Jae_Sung_Park2;~Matthew_Wallingford1;~Sham_M._Kakade1;~Ari_Holtzman1;~Ranjay_Krishna1;~Ali_Farhadi3;~Aditya_Kusupati1", "gender": "M;;F;;M;M;M;M;M;M", "homepage": "https://ethanlshen.github.io/;;;https://homes.cs.washington.edu/~jspark96/;https://shamulent.github.io;http://ariholtzman.com;http://ranjaykrishna.com;https://homes.cs.washington.edu/~ali/;http://www.adityakusupati.com/;https://mattwallingford.github.io/", "dblp": "285/5161.html;;;;s/SMKakade;https://dblp.uni-trier.de/pers/hd/h/Holtzman:Ari;167/3785;37/5826;231/7662;263/1795", "google_scholar": ";;;hD2WqqcAAAAJ;https://scholar.google.com.tw/citations?user=wb-DKCIAAAAJ;https://scholar.google.com/citations?authuser=2;IcqahyAAAAAJ;jeOFRDsAAAAJ;https://scholar.google.co.in/citations?user=qULx8g8AAAAJ;", "orcid": ";;;;;;0000-0001-8784-2531;;0000-0001-8455-1851;", "linkedin": "ethanlshen/;https://linkedin.com/in/alan-fan;sarahpratt;;;;ranjay-krishna-1a344444/;;adityakusupati/;", "or_profile": "~Ethan_Shen2;~Alan_Fan1;~Sarah_M_Pratt1;~Jae_Sung_Park2;~Sham_M._Kakade1;~Ari_Holtzman1;~Ranjay_Krishna1;~Ali_Farhadi3;~Aditya_Kusupati1;~Matthew_C_Wallingford2", "aff": "University of Washington;Department of Computer Science;University of Washington;University of Washington;Harvard University;Meta;University of Washington;University of Washington;Department of Computer Science, University of Washington;University of Washington", "aff_domain": "cs.washington.edu;cs.washington.edu;uw.edu;washington.edu;harvard.edu;meta.com;cs.washington.edu;cs.uw.edu;cs.washington.edu;washington.edu", "position": "Undergrad student;Undergrad student;PhD student;PhD student;Full Professor;Postdoc;Assistant Professor;Full Professor;PhD student;PhD student", "bibtex": "@inproceedings{\nshen2024superposed,\ntitle={Superposed Decoding: Multiple Generations from a Single Autoregressive Inference Pass},\nauthor={Ethan Shen and Alan Fan and Sarah M Pratt and Jae Sung Park and Matthew Wallingford and Sham M. Kakade and Ari Holtzman and Ranjay Krishna and Ali Farhadi and Aditya Kusupati},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=KSOkkHm9I7}\n}", "github": "", "reviewers": "ZGM9;E6Fn;Lot4;Tak7", "pdf_size": 794711, "rating": "4;4;7;7", "confidence": "4;4;3;3", "soundness": "2;1;4;3", "novelty": "2;2;3;3", "presentation": "3;3;3;3", "wc_summary": "30;49;99;46", "wc_strengths": "15;19;45;31", "wc_weaknesses": "149;246;29;160", "wc_questions": "2;15;76;170", "wc_limitations": "1;27;39;10", "wc_review": "197;356;288;417", "wc_reply_reviewers": "246;11;27;115", "wc_reply_authors": "1610;66;14;46", "reply_reviewers": "7;1;1;1", "reply_authors": "6;4;2;2", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 1.118033988749895 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 56.0, 25.855366947695792 ], "wc_strengths_avg": [ 27.5, 11.6940155635265 ], "wc_weaknesses_avg": [ 146.0, 77.28842086625913 ], "wc_questions_avg": [ 65.75, 66.35651814253066 ], "wc_limitations_avg": [ 19.25, 14.737282653189496 ], "wc_review_avg": [ 314.5, 81.7572626743337 ], "wc_reply_reviewers_avg": [ 99.75, 93.26139340584614 ], "wc_reply_authors_avg": [ 434.0, 679.2171964843058 ], "reply_reviewers_avg": [ 2.5, 2.598076211353316 ], "reply_authors_avg": [ 3.5, 1.6583123951777 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6338957787346254230&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 5, "email": "cs.washington.edu;cs.washington.edu;uw.edu;washington.edu;harvard.edu;meta.com;cs.washington.edu;cs.uw.edu;cs.washington.edu;washington.edu", "author_num": 10, "aff_unique_index": "0;1;0;0;2;3;0;0;0;0", "aff_unique_norm": "University of Washington;Unknown Institution;Harvard University;Meta", "aff_unique_dep": ";Department of Computer Science;;Meta Platforms, Inc.", "aff_unique_url": "https://www.washington.edu;;https://www.harvard.edu;https://meta.com", "aff_unique_abbr": "UW;;Harvard;Meta", "aff_campus_unique_index": "1", "aff_campus_unique": ";Seattle", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States;" }, { "title": "Banded Square Root Matrix Factorization for Differentially Private Model Training", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95662", "id": "KSyTvgoSrX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=KSyTvgoSrX", "openreview": "https://openreview.net/forum?id=KSyTvgoSrX", "poster": "/media/PosterPDFs/NeurIPS%202024/95662.png?t=1731330575.6165063", "project": "", "author_site": "Kalinin Nikita, Christoph Lampert", "tldr": "", "abstract": "Current state-of-the-art methods for differentially private model training are based on matrix factorization techniques. However, these methods suffer from high computational overhead because they require numerically solving a demanding optimization problem to determine an approximately optimal factorization prior to the actual model training. In this work, we present a new matrix factorization approach, BSR, which overcomes this computational bottleneck. By exploiting properties of the standard matrix square root, BSR allows to efficiently handle also large-scale problems. For the key scenario of stochastic gradient descent with momentum and weight decay, we even derive analytical expressions for BSR that render the computational overhead negligible. We prove bounds on the approximation quality that hold both in the centralized and in the federated learning setting. Our numerical experiments demonstrate that models trained using BSR perform on par with the best existing methods, while completely avoiding their computational overhead.", "keywords": "Differential Privacy;Machine Learning;Federated Learning", "primary_area": "privacy", "supplementary_material": "", "author": "Nikita Kalinin;Christoph H. Lampert", "authorids": "~Nikita_Kalinin1;~Christoph_H._Lampert6", "gender": ";M", "homepage": "https://npkalinin.github.io/;http://cvml.ist.ac.at/", "dblp": "383/7978;67/2136", "google_scholar": "nblAbqUAAAAJ;https://scholar.google.at/citations?user=iCf3SwgAAAAJ", "orcid": ";0000-0001-8622-7887", "linkedin": ";", "or_profile": "~Nikita_Kalinin1;~Christoph_H_Lampert1", "aff": "Institute of Science and Technology;Institute of Science and Technology Austria", "aff_domain": "ist.ac.at;ist.ac.at", "position": "PhD student;Professor", "bibtex": "@inproceedings{\nnikita2024banded,\ntitle={Banded Square Root Matrix Factorization for Differentially Private Model Training},\nauthor={Nikita Kalinin and Christoph H. Lampert},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=KSyTvgoSrX}\n}", "github": "", "reviewers": "VxsQ;Cfm2;1wZc;sFEq", "pdf_size": 1144656, "rating": "6;6;7;7", "confidence": "4;4;5;3", "soundness": "3;4;4;4", "novelty": "3;3;3;3", "presentation": "3;4;4;3", "wc_summary": "118;293;71;81", "wc_strengths": "113;155;59;193", "wc_weaknesses": "118;139;90;242", "wc_questions": "65;102;316;1", "wc_limitations": "80;1;25;1", "wc_review": "494;690;561;518", "wc_reply_reviewers": "47;28;326;0", "wc_reply_authors": "0;0;143;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;2;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 140.75, 89.62805085462921 ], "wc_strengths_avg": [ 130.0, 49.80963762164909 ], "wc_weaknesses_avg": [ 147.25, 57.39936846342475 ], "wc_questions_avg": [ 121.0, 118.23916440841418 ], "wc_limitations_avg": [ 26.75, 32.26743714644843 ], "wc_review_avg": [ 565.75, 75.64514194579847 ], "wc_reply_reviewers_avg": [ 100.25, 131.4046707693452 ], "wc_reply_authors_avg": [ 35.75, 61.92081637058736 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2432745065898842967&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "ist.ac.at;ist.ac.at", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Institute of Science and Technology;Institute of Science and Technology Austria", "aff_unique_dep": ";", "aff_unique_url": ";https://www.ist.ac.at", "aff_unique_abbr": ";IST Austria", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1", "aff_country_unique": ";Austria" }, { "title": "Accelerating Blockwise Parallel Language Models with Draft Refinement", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95661", "id": "KT6F5Sw0eg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=KT6F5Sw0eg", "openreview": "https://openreview.net/forum?id=KT6F5Sw0eg", "poster": "", "project": "", "author_site": "Taehyeon Kim, Ananda Theertha Suresh, Kishore Papineni, Michael D Riley, Sanjiv Kumar, Adrian Benton", "tldr": "", "abstract": "Autoregressive language models have achieved remarkable advancements, yet their potential is often limited by the slow inference speeds associated with sequential token generation. Blockwise parallel decoding (BPD) was proposed by Stern et al. [42] as a method to improve inference speed of language models by simultaneously predicting multiple future tokens, termed block drafts, which are subsequently verified by the autoregressive model. This paper advances the understanding and improvement of block drafts in two ways. First, we analyze token distributions generated across multiple prediction heads. Second, leveraging these insights, we propose algorithms to improve BPD inference speed by refining the block drafts using task-independent \\ngram and neural language models as lightweight rescorers. Experiments demonstrate that by refining block drafts of open-sourced Vicuna and Medusa LLMs, the mean accepted token length are increased by 5-25% relative. This results in over a 3x speedup in wall clock time compared to standard autoregressive decoding in open-source 7B and 13B LLMs.", "keywords": "Blockwise parallel decoding;Language model;speculative decoding", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Taehyeon Kim;Ananda Theertha Suresh;Kishore A Papineni;Michael Riley;Sanjiv Kumar;Adrian Benton", "authorids": "~Taehyeon_Kim1;~Ananda_Theertha_Suresh1;~Kishore_A_Papineni1;~Michael_Riley2;~Sanjiv_Kumar1;~Adrian_Benton1", "gender": "M;M;M;M;;M", "homepage": "https://taehyeon.oopy.io/;https://theertha.info;;https://research.google/people/author125/;http://www.sanjivk.com/;", "dblp": ";119/3884;;73/924;;21/9076", "google_scholar": "https://scholar.google.co.kr/citations?user=wDEaSpwAAAAJ;K6ef57QAAAAJ;aTySmnAAAAAJ;bJrIZK0AAAAJ;https://scholar.google.com/citations?hl=en;k9g68LwAAAAJ", "orcid": ";;;;;", "linkedin": "taehyeon-k-6a1239207/;;;;;adrian-benton-293a7662/", "or_profile": "~Taehyeon_Kim1;~Ananda_Theertha_Suresh1;~Kishore_A_Papineni1;~Michael_Riley2;~Sanjiv_Kumar1;~Adrian_Benton1", "aff": "Korea Advanced Institute of Science & Technology;Google;Google;;Google;Google", "aff_domain": "kaist.ac.kr;google.com;google.com;;google.com;google.com", "position": "PhD student;Research Scientist;Researcher;;Research Scientist;Researcher", "bibtex": "@inproceedings{\nkim2024accelerating,\ntitle={Accelerating Blockwise Parallel Language Models with Draft Refinement},\nauthor={Taehyeon Kim and Ananda Theertha Suresh and Kishore A Papineni and Michael Riley and Sanjiv Kumar and Adrian Benton},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=KT6F5Sw0eg}\n}", "github": "", "reviewers": "rNoJ;vMaK;FLJ6", "pdf_size": 2862458, "rating": "4;6;6", "confidence": "3;4;5", "soundness": "3;2;3", "novelty": "2;2;3", "presentation": "3;3;3", "wc_summary": "55;78;59", "wc_strengths": "107;78;24", "wc_weaknesses": "126;127;30", "wc_questions": "9;55;4", "wc_limitations": "7;7;1", "wc_review": "304;345;118", "wc_reply_reviewers": "0;11;0", "wc_reply_authors": "246;249;229", "reply_reviewers": "0;1;0", "reply_authors": "3;3;3", "rating_avg": [ 5.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 64.0, 10.03327796219494 ], "wc_strengths_avg": [ 69.66666666666667, 34.393151773120316 ], "wc_weaknesses_avg": [ 94.33333333333333, 45.49236810230432 ], "wc_questions_avg": [ 22.666666666666668, 22.954060400915758 ], "wc_limitations_avg": [ 5.0, 2.8284271247461903 ], "wc_review_avg": [ 255.66666666666666, 98.77359071240765 ], "wc_reply_reviewers_avg": [ 3.6666666666666665, 5.185449728701348 ], "wc_reply_authors_avg": [ 241.33333333333334, 8.806563209081938 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 3.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18375017579044240923&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "kaist.ac.kr;google.com;google.com;;google.com;google.com", "author_num": 6, "aff_unique_index": "0;1;1;1;1", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.kaist.ac.kr;https://www.google.com", "aff_unique_abbr": "KAIST;Google", "aff_campus_unique_index": "1;1;1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;1;1;1", "aff_country_unique": "South Korea;United States" }, { "id": "KUw2V04z5M", "title": "OCTrack: Benchmarking the Open-Corpus Multi-Object Tracking", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "We study a novel yet practical problem of open-corpus multi-object tracking (OCMOT), which extends the MOT into localizing, associating, and recognizing generic-category objects of both seen (base) and unseen (novel) classes, but without the category text list as prompt.\nTo study this problem, the top priority is to build a benchmark. In this work, we build OCTrackB, a large-scale and comprehensive benchmark, to provide a standard evaluation platform for the OCMOT problem. Compared to previous datasets, OCTrackB has more abundant and balanced base/novel classes and the corresponding samples for evaluation with less bias. We also propose a new multi-granularity recognition metric to better evaluate the generative object recognition in OCMOT. By conducting the extensive benchmark evaluation, we report and analyze the results of various state-of-the-art methods, which demonstrate the rationale of OCMOT, as well as the usefulness and advantages of OCTrackB.", "keywords": "multi-object tracking;open-corpus", "primary_area": "", "supplementary_material": "/attachment/45a031e2dcfb5fd253cadb00021b52dd2746be61.pdf", "author": "Zekun Qian;Ruize Han;Wei Feng;Junhui Hou;Linqi Song;Song Wang", "authorids": "~Zekun_Qian1;~Ruize_Han1;~Wei_Feng1;~Junhui_Hou2;~Linqi_Song1;~Song_Wang2", "gender": "M;M;M;M;M;M", "homepage": ";https://www.ruizehan.cn/;;http://www.cityu.edu.hk/stfprofile/csjhhou.htm;https://sites.google.com/site/aisquaredlab/;http://www.cse.sc.edu/~songwang/", "dblp": ";205/4022;17/1152-5;122/2673.html;137/7963.html;62/3151-2", "google_scholar": "ZxANT60AAAAJ;ef0Fw9QAAAAJ;https://scholar.google.co.jp/citations?user=7ory1i8AAAAJ;j6eefhwAAAAJ;UcGN3MoAAAAJ;eycXl_QAAAAJ", "orcid": ";0000-0002-6587-8936;;0000-0003-3431-2021;0000-0003-2756-4984;0000-0003-4152-5295", "linkedin": ";;;;;", "or_profile": "~Zekun_Qian1;~Ruize_Han1;~Wei_Feng1;~Junhui_Hou2;~Linqi_Song1;~Song_Wang2", "aff": "Tianjin University;City University of Hong Kong;Tianjin University;City University of Hong Kong;City University of Hong Kong;University of South Carolina", "aff_domain": "tju.edu.cn;cityu.edu.hk;tju.edu.cn;cityu.edu.hk;cityu.edu.hk;sc.edu", "position": "MS student;Postdoc;Full Professor;Assistant Professor;Assistant Professor;Full Professor", "bibtex": "@misc{\nanonymous2024octrack,\ntitle={{OCT}rack: Benchmarking the Open-Corpus Multi-Object Tracking},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=KUw2V04z5M}\n}", "github": "", "project": "", "reviewers": "SS2z;oy8h;gHx8;H893", "site": "https://openreview.net/forum?id=KUw2V04z5M", "pdf_size": 2874691, "rating": "4;7;7;8", "confidence": "5;4;2;4", "wc_summary_and_contributions": "44;70;51;21", "wc_strengths": "13;5;29;53", "wc_improvement": "25;5;81;26", "wc_limitations": "1;1;5;17", "wc_correctness": "1;1;3;38", "wc_clarity": "1;1;4;48", "wc_relation_to_prior_work": "1;1;1;17", "wc_documentation": "1;1;3;13", "wc_additional_feedback": "1;1;1;1", "wc_review": "88;86;178;234", "wc_reply_reviewers": "61;0;0;60", "wc_reply_authors": "108;0;0;138", "reply_reviewers": "1;0;0;1", "reply_authors": "4;1;1;4", "rating_avg": [ 6.5, 1.5 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "wc_summary_and_contributions_avg": [ 46.5, 17.528548142958105 ], "wc_strengths_avg": [ 25.0, 18.33030277982336 ], "wc_improvement_avg": [ 34.25, 28.261059781968545 ], "wc_limitations_avg": [ 6.0, 6.557438524302 ], "wc_correctness_avg": [ 10.75, 15.75396775418815 ], "wc_clarity_avg": [ 13.5, 19.956202043475106 ], "wc_relation_to_prior_work_avg": [ 5.0, 6.928203230275509 ], "wc_documentation_avg": [ 4.5, 4.9749371855331 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 146.5, 62.71164166245371 ], "wc_reply_reviewers_avg": [ 30.25, 30.252066045148055 ], "wc_reply_authors_avg": [ 61.5, 62.40793218814416 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.5, 1.5 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5353033790313108, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5300678657719548430&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;0;1;1;2", "aff_unique_norm": "Tianjin University;City University of Hong Kong;University of South Carolina", "aff_unique_dep": ";;", "aff_unique_url": "http://www.tju.edu.cn;https://www.cityu.edu.hk;https://www.sc.edu", "aff_unique_abbr": "TJU;CityU;USC", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;1", "aff_country_unique": "China;United States" }, { "title": "TopoFR: A Closer Look at Topology Alignment on Face Recognition", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95660", "id": "KVAx5tys2p", "proceeding": "", "pdf": "https://openreview.net/pdf?id=KVAx5tys2p", "openreview": "https://openreview.net/forum?id=KVAx5tys2p", "poster": "/media/PosterPDFs/NeurIPS%202024/95660.png?t=1731309737.523698", "project": "", "author_site": "Jun Dan, Yang Liu, Jiankang Deng, Haoyu Xie, Siyuan Li, Baigui Sun, Shan Luo", "tldr": "", "abstract": "The field of face recognition (FR) has undergone significant advancements with the rise of deep learning. Recently, the success of unsupervised learning and graph neural networks has demonstrated the effectiveness of data structure information. Considering that the FR task can leverage large-scale training data, which intrinsically contains significant structure information, we aim to investigate how to encode such critical structure information into the latent space. As revealed from our observations, directly aligning the structure information between the input and latent spaces inevitably suffers from an overfitting problem, leading to a structure collapse phenomenon in the latent space. To address this problem, we propose TopoFR, a novel FR model that leverages a topological structure alignment strategy called PTSA and a hard sample mining strategy named SDE. Concretely, PTSA uses persistent homology to align the topological structures of the input and latent spaces, effectively preserving the structure information and improving the generalization performance of FR model. To mitigate the impact of hard samples on the latent space structure, SDE accurately identifies hard samples by automatically computing structure damage score (SDS) for each sample, and directs the model to prioritize optimizing these samples. Experimental results on popular face benchmarks demonstrate the superiority of our TopoFR over the state-of-the-art methods. Code and models are available at: https://github.com/modelscope/facechain/tree/main/face_module/TopoFR.", "keywords": "Face Recognition;Structure Alignment;Face Perception and Understanding", "primary_area": "machine_vision", "supplementary_material": "", "author": "Jun Dan;Yang Liu;Jiankang Deng;Haoyu Xie;Siyuan Li;Baigui Sun;Shan Luo", "authorids": "~Jun_Dan1;~Yang_Liu51;~Jiankang_Deng1;~Haoyu_Xie4;~Siyuan_Li6;~Baigui_Sun1;~Shan_Luo1", "gender": "M;M;M;M;M;M;M", "homepage": ";;https://jiankangdeng.github.io/;;https://lupin1998.github.io/;;https://shanluo.github.io/", "dblp": "156/9683;27/3367-5;156/7808;;63/9705-2;186/8016;93/622-1", "google_scholar": "https://scholar.google.com.hk/citations?user=NB9Mn5MAAAAJ;t1emSE0AAAAJ;Z_UoQFsAAAAJ;https://scholar.google.com.hk/citations?hl=zh-CN;https://scholar.google.com/citations?hl=zh-CN;ZNhTHywAAAAJ;https://scholar.google.co.uk/citations?hl=en", "orcid": "0000-0001-7945-3608;;0000-0002-3709-6216;;0000-0001-6806-2468;0000-0001-7722-4748;0000-0003-4760-0372", "linkedin": ";;jiankang-deng-b45b21b4/?originalSubdomain=uk;;https://www.linkedin.cn/incareer/in/siyuan-li-lupin1998/;;", "or_profile": "~Jun_Dan1;~Yang_Liu51;~Jiankang_Deng1;~Haoyu_Xie4;~Siyuan_Li6;~Baigui_Sun1;~Shan_Luo1", "aff": "Zhejiang University;Alibaba Group;Imperial College London;;Alibaba Group;Alibaba Group;King's College London, University of London", "aff_domain": "zju.edu.cn;alibaba-inc.com;imperial.ac.uk;;alibaba-inc.com;alibaba-inc.com;kcl.ac.uk", "position": "PhD student;Researcher at Alibaba Group;Lecturer;;Intern;Researcher;Associate Professor", "bibtex": "@inproceedings{\ndan2024topofr,\ntitle={Topo{FR}: A Closer Look at Topology Alignment on Face Recognition},\nauthor={Jun Dan and Yang Liu and Jiankang Deng and Haoyu Xie and Siyuan Li and Baigui Sun and Shan Luo},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=KVAx5tys2p}\n}", "github": "", "reviewers": "68Et;SWuk;GA4X", "pdf_size": 17052391, "rating": "6;6;7", "confidence": "4;4;4", "soundness": "3;3;4", "novelty": "2;3;3", "presentation": "3;3;3", "wc_summary": "71;61;238", "wc_strengths": "48;74;151", "wc_weaknesses": "48;270;116", "wc_questions": "19;41;138", "wc_limitations": "1;25;30", "wc_review": "187;471;673", "wc_reply_reviewers": "12;39;65", "wc_reply_authors": "38;44;39", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 123.33333333333333, 81.18428966794554 ], "wc_strengths_avg": [ 91.0, 43.734044709661454 ], "wc_weaknesses_avg": [ 144.66666666666666, 92.87027990099357 ], "wc_questions_avg": [ 66.0, 51.697840058039816 ], "wc_limitations_avg": [ 18.666666666666668, 12.657891697365017 ], "wc_review_avg": [ 443.6666666666667, 199.34782556013886 ], "wc_reply_reviewers_avg": [ 38.666666666666664, 21.63844315615664 ], "wc_reply_authors_avg": [ 40.333333333333336, 2.6246692913372702 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17058788301119815867&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "zju.edu.cn;alibaba-inc.com;imperial.ac.uk;;alibaba-inc.com;alibaba-inc.com;kcl.ac.uk", "author_num": 7, "aff_unique_index": "0;1;2;1;1;3", "aff_unique_norm": "Zhejiang University;Alibaba Group;Imperial College London;King's College London", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.zju.edu.cn;https://www.alibaba.com;https://www.imperial.ac.uk;https://www.kcl.ac.uk", "aff_unique_abbr": "ZJU;Alibaba;ICL;KCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;1", "aff_country_unique": "China;United Kingdom" }, { "title": "Deep Homomorphism Networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95659", "id": "KXUijdMFdG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=KXUijdMFdG", "openreview": "https://openreview.net/forum?id=KXUijdMFdG", "poster": "/media/PosterPDFs/NeurIPS%202024/95659.png?t=1733487033.212945", "project": "", "author_site": "Takanori Maehara, Hoang NT", "tldr": "", "abstract": "Many real-world graphs are large and have some characteristic subgraph patterns, such as triangles in social networks, cliques in web graphs, and cycles in molecular networks.\nDetecting such subgraph patterns is important in many applications; therefore, establishing graph neural networks (GNNs) that can detect such patterns and run fast on large graphs is demanding.\nIn this study, we propose a new GNN layer, named \\emph{graph homomorphism layer}.\nIt enumerates local subgraph patterns that match the predefined set of patterns $\\mathcal{P}^\\bullet$, applies non-linear transformations to node features, and aggregates them along with the patterns. \nBy stacking these layers, we obtain a deep GNN model called \\emph{deep homomorphism network (DHN)}.\nThe expressive power of the DHN is completely characterised by the set of patterns generated from $\\mathcal{P}^\\bullet$ by graph-theoretic operations;\nhence, it serves as a useful theoretical tool to analyse the expressive power of many GNN models.\nFurthermore, the model runs in the same time complexity as the graph homomorphisms, which is fast in many real-word graphs.\nThus, it serves as a practical and lightweight model that solves difficult problems using domain knowledge.", "keywords": "graph homomorphism;subgraph counting;graph neural network expressivity", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Takanori Maehara;Hoang NT", "authorids": "~Takanori_Maehara1;~Hoang_NT1", "gender": "M;M", "homepage": "https://tmaehara.gitlab.io;https://gearons.org/", "dblp": "05/8510;241/5325", "google_scholar": "3ei4ZqoAAAAJ;iuSBSHsAAAAJ", "orcid": "0000-0002-2101-1484;", "linkedin": ";https://linkedin.com/in/hoang-nt", "or_profile": "~Takanori_Maehara1;~Hoang_Thai_Nguyen1", "aff": "Roku;The University of Tokyo", "aff_domain": "roku.com;u-tokyo.ac.jp", "position": "Senior Software Engineer;Researcher", "bibtex": "@inproceedings{\nmaehara2024deep,\ntitle={Deep Homomorphism Networks},\nauthor={Takanori Maehara and Hoang NT},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=KXUijdMFdG}\n}", "github": "", "reviewers": "AtgF;2WwS;X5N6", "pdf_size": 493743, "rating": "7;7;7", "confidence": "4;4;4", "soundness": "3;3;4", "novelty": "2;3;4", "presentation": "2;3;3", "wc_summary": "95;96;319", "wc_strengths": "138;36;87", "wc_weaknesses": "723;237;247", "wc_questions": "17;238;169", "wc_limitations": "11;23;25", "wc_review": "984;630;847", "wc_reply_reviewers": "96;154;104", "wc_reply_authors": "0;266;78", "reply_reviewers": "1;1;1", "reply_authors": "1;2;2", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 170.0, 105.35970134100924 ], "wc_strengths_avg": [ 87.0, 41.641325627314025 ], "wc_weaknesses_avg": [ 402.3333333333333, 226.78232343421791 ], "wc_questions_avg": [ 141.33333333333334, 92.31949354761913 ], "wc_limitations_avg": [ 19.666666666666668, 6.182412330330469 ], "wc_review_avg": [ 820.3333333333334, 145.7448302875802 ], "wc_reply_reviewers_avg": [ 118.0, 25.664502073226878 ], "wc_reply_authors_avg": [ 114.66666666666667, 111.64626679333658 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11727622309281132009&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "roku.com;u-tokyo.ac.jp", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Roku;University of Tokyo", "aff_unique_dep": ";", "aff_unique_url": "https://www.roku.com;https://www.u-tokyo.ac.jp", "aff_unique_abbr": "Roku;UTokyo", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;Japan" }, { "title": "Pre-trained Text-to-Image Diffusion Models Are Versatile Representation Learners for Control", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95658", "id": "KY07A73F3Y", "proceeding": "", "pdf": "https://openreview.net/pdf?id=KY07A73F3Y", "openreview": "https://openreview.net/forum?id=KY07A73F3Y", "poster": "", "project": "", "author_site": "Gunshi Gupta, Karmesh Yadav, Yarin Gal, Dhruv Batra, Zsolt Kira, Cong Lu, Tim G. J. Rudner", "tldr": "", "abstract": "Embodied AI agents require a fine-grained understanding of the physical world mediated through visual and language inputs. Such capabilities are difficult to learn solely from task-specific data. This has led to the emergence of pre-trained vision-language models as a tool for transferring representations learned from internet-scale data to downstream tasks and new domains. However, commonly used contrastively trained representations such as in CLIP have been shown to fail at enabling embodied agents to gain a sufficiently fine-grained scene understanding\u2014a capability vital for control. To address this shortcoming, we consider representations from pre-trained text-to-image diffusion models, which are explicitly optimized to generate images from text prompts and as such, contain text-conditioned representations that reflect highly fine-grained visuo-spatial information. Using pre-trained text-to-image diffusion models, we construct Stable Control Representations which allow learning downstream control policies that generalize to complex, open-ended environments. We show that policies learned using Stable Control Representations are competitive with state-of-the-art representation learning approaches across a broad range of simulated control settings, encompassing challenging manipulation and navigation tasks. Most notably, we show that Stable Control Representations enable learning policies that exhibit state-of-the-art performance on OVMM, a difficult open-vocabulary navigation benchmark.", "keywords": "Embodied AI;Representation Learning for Control;Diffusion Models;Foundation Models", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Gunshi Gupta;Karmesh Yadav;Yarin Gal;Dhruv Batra;Zsolt Kira;Cong Lu;Tim G. J. Rudner", "authorids": "~Gunshi_Gupta1;~Karmesh_Yadav1;~Yarin_Gal1;~Dhruv_Batra1;~Zsolt_Kira1;~Cong_Lu1;~Tim_G._J._Rudner2", "gender": "F;M;;Not Specified;M;M;Not Specified", "homepage": ";https://www.karmeshyadav.com;http://www.cs.ox.ac.uk/people/yarin.gal/website//;https://dhruvbatra.com;https://faculty.cc.gatech.edu/~zk15;https://conglu.co.uk;https://timrudner.com", "dblp": "218/5542;264/3702;67/9076;67/6586;36/4127;;230/3480", "google_scholar": "w4UK_9kAAAAJ;VsTyEcQAAAAJ;https://scholar.google.co.uk/citations?user=SIayDoQAAAAJ;_bs7PqgAAAAJ;2a5XgNAAAAAJ;yMGBji4AAAAJ;https://scholar.google.de/citations?user=MbBntPgAAAAJ", "orcid": ";;;;0000-0002-2626-2004;0000-0001-5564-838X;", "linkedin": ";karmesh-yadav/;;;;cong-lu-530b74104/;trudner", "or_profile": "~Gunshi_Gupta1;~Karmesh_Yadav1;~Yarin_Gal1;~Dhruv_Batra1;~Zsolt_Kira1;~Cong_Lu1;~Tim_Georg_Johann_Rudner1", "aff": "University of Oxford;Georgia Institute of Technology;University of Oxford;Georgia Institute of Technology;Georgia Institute of Technology;University of British Columbia;New York University", "aff_domain": "ox.ac.uk;gatech.edu;ox.ac.uk;gatech.edu;gatech.edu;ubc.ca;nyu.edu", "position": "PhD student;PhD student;Associate Professor;Associate Professor;Assistant Professor;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\ngupta2024pretrained,\ntitle={Pre-trained Text-to-Image Diffusion Models Are Versatile Representation Learners for Control},\nauthor={Gunshi Gupta and Karmesh Yadav and Yarin Gal and Dhruv Batra and Zsolt Kira and Cong Lu and Tim G. J. Rudner},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=KY07A73F3Y}\n}", "github": "", "reviewers": "ZVD8;sdTb;QYDZ;9EUL;zJpU", "pdf_size": 2431339, "rating": "6;6;6;7;8", "confidence": "4;3;4;3;3", "soundness": "2;3;2;3;4", "novelty": "2;2;3;3;3", "presentation": "4;3;4;3;4", "wc_summary": "78;83;52;99;161", "wc_strengths": "45;89;68;79;245", "wc_weaknesses": "93;131;69;149;257", "wc_questions": "89;6;1;2;62", "wc_limitations": "1;29;5;1;18", "wc_review": "306;338;195;330;743", "wc_reply_reviewers": "60;60;21;155;22", "wc_reply_authors": "43;0;24;456;29", "reply_reviewers": "1;1;1;2;1", "reply_authors": "2;1;2;4;2", "rating_avg": [ 6.6, 0.8 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.6, 0.4898979485566356 ], "wc_summary_avg": [ 94.6, 36.477938538245276 ], "wc_strengths_avg": [ 105.2, 71.4154045007098 ], "wc_weaknesses_avg": [ 139.8, 64.96275856211773 ], "wc_questions_avg": [ 32.0, 36.56774535024001 ], "wc_limitations_avg": [ 10.8, 11.034491379306978 ], "wc_review_avg": [ 382.4, 187.45943561208117 ], "wc_reply_reviewers_avg": [ 63.6, 48.836871316659916 ], "wc_reply_authors_avg": [ 110.4, 173.3558190543369 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 2.2, 0.9797958971132712 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.6123724356957947, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15898370030190564257&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "ox.ac.uk;gatech.edu;ox.ac.uk;gatech.edu;gatech.edu;ubc.ca;nyu.edu", "author_num": 7, "aff_unique_index": "0;1;0;1;1;2;3", "aff_unique_norm": "University of Oxford;Georgia Institute of Technology;University of British Columbia;New York University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.ox.ac.uk;https://www.gatech.edu;https://www.ubc.ca;https://www.nyu.edu", "aff_unique_abbr": "Oxford;Georgia Tech;UBC;NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1;1;2;1", "aff_country_unique": "United Kingdom;United States;Canada" }, { "title": "DiffuPac: Contextual Mimicry in Adversarial Packets Generation via Diffusion Model", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95657", "id": "KYHVBsEHuC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=KYHVBsEHuC", "openreview": "https://openreview.net/forum?id=KYHVBsEHuC", "poster": "/media/PosterPDFs/NeurIPS%202024/95657.png?t=1731502829.6993525", "project": "", "author_site": "Abdullah Bin Jasni, Akiko Manada, Kohei Watabe", "tldr": "", "abstract": "In domains of cybersecurity, recent advancements in Machine Learning (ML) and Deep Learning (DL) have significantly enhanced Network Intrusion Detection Systems (NIDS), improving the effectiveness of cybersecurity operations. However, attackers have also leveraged ML/DL to develop sophisticated models that generate adversarial packets capable of evading NIDS detection. Consequently, defenders must study and analyze these models to prepare for the evasion attacks that exploit NIDS detection mechanisms. Unfortunately, conventional generation models often rely on unrealistic assumptions about attackers' knowledge of NIDS components, making them impractical for real-world scenarios. To address this issue, we present DiffuPac, a first-of-its-kind generation model designed to generate adversarial packets that evade detection without relying on specific NIDS components. DiffuPac integrates a pre-trained Bidirectional Encoder Representations from Transformers (BERT) with diffusion model, which, through its capability for conditional denoising and classifier-free guidance, effectively addresses the real-world constraint of limited attacker knowledge. By concatenating malicious packets with contextually relevant normal packets and applying targeted noising only to the malicious packets, DiffuPac seamlessly blends adversarial packets into genuine network traffic. Through evaluations on real-world datasets, we demonstrate that DiffuPac achieves strong evasion capabilities against sophisticated NIDS, outperforming conventional methods by an average of 6.69 percentage points, while preserving the functionality and practicality of the generated adversarial packets.", "keywords": "Network Intrusion Detection System;Adversarial Machine Learning;Cybersecurity;Adversarial Sample Generation", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Abdullah Bin Jasni;Akiko Manada;Kohei Watabe", "authorids": "~Abdullah_Bin_Jasni1;~Akiko_Manada1;~Kohei_Watabe1", "gender": "M;F;M", "homepage": "https://github.com/AJNatsu;;https://cn.ics.saitama-u.ac.jp/en/kwatabe", "dblp": "396/8754;08/3039.html;86/7403.html", "google_scholar": ";;https://scholar.google.co.jp/citations?user=XbCJ0-YAAAAJ", "orcid": ";0000-0002-1337-6301;0000-0002-9246-9740", "linkedin": "abdullah-jasni-7861b51a9/;;wkouw1082/", "or_profile": "~Abdullah_Bin_Jasni1;~Akiko_Manada1;~Kohei_Watabe1", "aff": "Graduate School of Engineering, Nagaoka University of Technology;Nagaoka University of Technology;Nagaoka University of Technology", "aff_domain": "nagaokaut.ac.jp;nagaokaut.ac.jp;nagaokaut.ac.jp", "position": "MS student;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\njasni2024diffupac,\ntitle={DiffuPac: Contextual Mimicry in Adversarial Packets Generation via Diffusion Model},\nauthor={Abdullah Bin Jasni and Akiko Manada and Kohei Watabe},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=KYHVBsEHuC}\n}", "github": "", "reviewers": "ogST;CanA;rUa8;vp4b;BP27", "pdf_size": 6024382, "rating": "4;4;5;5;6", "confidence": "3;5;4;4;2", "soundness": "2;3;3;3;3", "novelty": "2;2;3;3;3", "presentation": "2;2;3;3;3", "wc_summary": "57;46;45;118;77", "wc_strengths": "46;50;32;67;27", "wc_weaknesses": "174;279;90;26;91", "wc_questions": "73;14;48;650;4", "wc_limitations": "50;8;3;1;23", "wc_review": "400;397;218;862;222", "wc_reply_reviewers": "32;101;26;245;37", "wc_reply_authors": "0;220;48;211;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;2;2;2;1", "rating_avg": [ 4.8, 0.7483314773547882 ], "confidence_avg": [ 3.6, 1.019803902718557 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 68.6, 27.251421981247145 ], "wc_strengths_avg": [ 44.4, 14.150618361046984 ], "wc_weaknesses_avg": [ 132.0, 87.23989912878167 ], "wc_questions_avg": [ 157.8, 247.32197637897042 ], "wc_limitations_avg": [ 17.0, 18.20988742414406 ], "wc_review_avg": [ 419.8, 235.07479660737775 ], "wc_reply_reviewers_avg": [ 88.2, 82.94431867222734 ], "wc_reply_authors_avg": [ 95.8, 99.33458612185386 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.6289709020331509, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:vQ9zwgnyd48J:scholar.google.com/&scioq=DiffuPac:+Contextual+Mimicry+in+Adversarial+Packets+Generation+via+Diffusion+Model&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "nagaokaut.ac.jp;nagaokaut.ac.jp;nagaokaut.ac.jp", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Nagaoka University of Technology", "aff_unique_dep": "Graduate School of Engineering", "aff_unique_url": "https://www.nut.ac.jp", "aff_unique_abbr": "NUT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Japan" }, { "title": "Beyond Concept Bottleneck Models: How to Make Black Boxes Intervenable?", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95656", "id": "KYHma7hzjr", "proceeding": "", "pdf": "https://openreview.net/pdf?id=KYHma7hzjr", "openreview": "https://openreview.net/forum?id=KYHma7hzjr", "poster": "", "project": "", "author_site": "Sonia Laguna, Ri\u010dards Marcinkevi\u010ds, Moritz Vandenhirtz, Julia Vogt", "tldr": "", "abstract": "Recently, interpretable machine learning has re-explored concept bottleneck models (CBM). An advantage of this model class is the user's ability to intervene on predicted concept values, affecting the downstream output. In this work, we introduce a method to perform such concept-based interventions on *pretrained* neural networks, which are not interpretable by design, only given a small validation set with concept labels. Furthermore, we formalise the notion of *intervenability* as a measure of the effectiveness of concept-based interventions and leverage this definition to fine-tune black boxes. Empirically, we explore the intervenability of black-box classifiers on synthetic tabular and natural image benchmarks. We focus on backbone architectures of varying complexity, from simple, fully connected neural nets to Stable Diffusion. We demonstrate that the proposed fine-tuning improves intervention effectiveness and often yields better-calibrated predictions. To showcase the practical utility of our techniques, we apply them to deep chest X-ray classifiers and show that fine-tuned black boxes are more intervenable than CBMs. Lastly, we establish that our methods are still effective under vision-language-model-based concept annotations, alleviating the need for a human-annotated validation set.", "keywords": "interpretability;explainability;concepts;concept bottleneck models;model interventions;healthcare", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Sonia Laguna;Ri\u010dards Marcinkevi\u010ds;Moritz Vandenhirtz;Julia E Vogt", "authorids": "~Sonia_Laguna1;~Ri\u010dards_Marcinkevi\u010ds1;~Moritz_Vandenhirtz1;~Julia_E_Vogt1", "gender": "F;M;F;M", "homepage": "https://mds.inf.ethz.ch/team/detail/sonia-laguna;;http://mds.inf.ethz.ch;https://rmarcinkevics.github.io/", "dblp": "313/3156;;13/8412;234/8553", "google_scholar": "PljVnCQAAAAJ;H2cG0BwAAAAJ;UoeV-8kAAAAJ;https://scholar.google.ch/citations?user=XcxXOJsAAAAJ", "orcid": "0000-0003-3504-2051;;;0000-0001-8901-5062", "linkedin": ";moritz-simon-vandenhirtz-488b0b16b/;julia-vogt-50b53895;ri%C4%8Dards-m-668568106?lipi=urn%3Ali%3Apage%3Ad_flagship3_profile_view_base_contact_details%3Byeq5%2FsReRoWG3HN7r6A5Lw%3D%3D", "or_profile": "~Sonia_Laguna1;~Moritz_Vandenhirtz1;~Julia_E_Vogt1;~Ricards_Marcinkevics1", "aff": "Department of Computer Science, ETHZ - ETH Zurich;ETHZ - ETH Zurich;Swiss Federal Institute of Technology;Swiss Federal Institute of Technology", "aff_domain": "inf.ethz.ch;ethz.ch;ethz.ch;inf.ethz.ch", "position": "PhD student;PhD student;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nlaguna2024beyond,\ntitle={Beyond Concept Bottleneck Models: How to Make Black Boxes Intervenable?},\nauthor={Sonia Laguna and Ri{\\v{c}}ards Marcinkevi{\\v{c}}s and Moritz Vandenhirtz and Julia E Vogt},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=KYHma7hzjr}\n}", "github": "", "reviewers": "ajS6;Q4Tj;W2n5", "pdf_size": 19423930, "rating": "7;7;7", "confidence": "5;5;4", "soundness": "3;4;3", "novelty": "2;4;3", "presentation": "3;2;4", "wc_summary": "52;103;238", "wc_strengths": "244;70;128", "wc_weaknesses": "291;581;145", "wc_questions": "450;174;75", "wc_limitations": "11;63;1", "wc_review": "1048;991;587", "wc_reply_reviewers": "174;26;21", "wc_reply_authors": "311;0;0", "reply_reviewers": "2;1;1", "reply_authors": "2;1;1", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 131.0, 78.4729252672538 ], "wc_strengths_avg": [ 147.33333333333334, 72.33870947762954 ], "wc_weaknesses_avg": [ 339.0, 181.20338481018135 ], "wc_questions_avg": [ 233.0, 158.67577004697347 ], "wc_limitations_avg": [ 25.0, 27.17842281418601 ], "wc_review_avg": [ 875.3333333333334, 205.20612942978957 ], "wc_reply_reviewers_avg": [ 73.66666666666667, 70.97573920400939 ], "wc_reply_authors_avg": [ 103.66666666666667, 146.60680596601085 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2536299489321927736&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 9, "email": "inf.ethz.ch;ethz.ch;ethz.ch;inf.ethz.ch", "author_num": 4, "aff_unique_index": "0;0;1;1", "aff_unique_norm": "ETH Zurich;Swiss Federal Institute of Technology", "aff_unique_dep": "Department of Computer Science;", "aff_unique_url": "https://www.ethz.ch;https://www.ethz.ch", "aff_unique_abbr": "ETHZ;ETH Zurich", "aff_campus_unique_index": "0", "aff_campus_unique": "Zurich;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Switzerland" }, { "title": "SpreadsheetBench: Towards Challenging Real World Spreadsheet Manipulation", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97753", "id": "KYxzmRLF6i", "proceeding": "", "pdf": "https://openreview.net/pdf?id=KYxzmRLF6i", "openreview": "https://openreview.net/forum?id=KYxzmRLF6i", "poster": "/media/PosterPDFs/NeurIPS%202024/97753.png?t=1730530888.4028013", "project": "", "author_site": "Zeyao Ma, Bohan Zhang, Jing Zhang, Jifan Yu, Xiaokang Zhang, Xiaohan Zhang, Sijia Luo, Xi Wang, Jie Tang", "tldr": "", "abstract": "We introduce SpreadsheetBench, a challenging spreadsheet manipulation benchmark exclusively derived from real-world scenarios, designed to immerse current large language models (LLMs) in the actual workflow of spreadsheet users. \nUnlike existing benchmarks that rely on synthesized queries and simplified spreadsheet files, SpreadsheetBench is built from 912 real questions gathered from online Excel forums, which reflect the intricate needs of users. The associated spreadsheets from the forums contain a variety of tabular data such as multiple tables, non-standard relational tables, and abundant non-textual elements. Furthermore, we propose a more reliable evaluation metric akin to online judge platforms, where multiple spreadsheet files are created as test cases for each instruction, ensuring the evaluation of robust solutions capable of handling spreadsheets with varying values.\nOur comprehensive evaluation of various LLMs under both single-round and multi-round inference settings reveals a substantial gap between the state-of-the-art (SOTA) models and human performance, highlighting the benchmark's difficulty.", "keywords": "Spreadsheet Manipulation;Large Language Models;Benchmark", "primary_area": "", "supplementary_material": "/attachment/383d2451304383a411749076e95196704b27873b.zip", "author": "Zeyao Ma;Bohan Zhang;Jing Zhang;Jifan Yu;Xiaokang Zhang;Xiaohan Zhang;Sijia Luo;Xi Wang;Jie Tang", "authorids": "~Zeyao_Ma1;~Bohan_Zhang4;~Jing_Zhang24;~Jifan_Yu2;~Xiaokang_Zhang1;~Xiaohan_Zhang6;~Sijia_Luo1;~Xi_Wang26;~Jie_Tang1", "gender": "M;M;;M;M;F;F;M;", "homepage": "https://github.com/Kaka23333;https://github.com/finallymint;https://xiaojingzi.github.io/;https://yujifan0326.github.io/;https://github.com/PaulZhang2718;;https://github.com/lsjia;https://github.com/Wonsici;", "dblp": ";;05/3499-1.html;239/6130.html;;;;;", "google_scholar": ";;T7Wa3GQAAAAJ;https://scholar.google.com.tw/citations?hl=zh-CN;;https://scholar.google.com.hk/citations?user=RKyE8o0AAAAJ;;;", "orcid": ";;;0000-0003-3430-4048;;0000-0003-3295-7758;;;", "linkedin": ";;;;;;;;", "or_profile": "~Zeyao_Ma1;~Bohan_Zhang4;~Jing_Zhang24;~Jifan_Yu2;~Xiaokang_Zhang1;~Xiaohan_Zhang6;~Sijia_Luo1;~Xi_Wang26;~Jie_Tang1", "aff": "Renmin University of China;Beijing Institute of Technology;Renmin University of China;Tsinghua University;Renmin University of China;Beijing Knowledge Atlas Technology Co., Ltd. ;Renmin University of China;Renmin University of China;", "aff_domain": "ruc.edu.cn;bit.edu.cn;ruc.edu.cn;tsinghua.edu.cn;ruc.edu.cn;zhipuai.cn;ruc.edu.cn;ruc.edu.cn;", "position": "MS student;Undergrad student;Associate Professor;Postdoc;MS student;Researcher;Undergrad student;Undergrad student;", "bibtex": "@inproceedings{\nma2024spreadsheetbench,\ntitle={SpreadsheetBench: Towards Challenging Real World Spreadsheet Manipulation},\nauthor={Zeyao Ma and Bohan Zhang and Jing Zhang and Jifan Yu and Xiaokang Zhang and Xiaohan Zhang and Sijia Luo and Xi Wang and Jie Tang},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=KYxzmRLF6i}\n}", "github": "", "reviewers": "VgK3;Co7Y;wseN;35VV", "pdf_size": 6889681, "rating": "7;8;8;9", "confidence": "3;5;4;4", "wc_summary_and_contributions": "62;185;49;95", "wc_strengths": "69;107;11;12", "wc_improvement": "235;40;2;6", "wc_limitations": "97;13;1;23", "wc_correctness": "37;25;7;1", "wc_clarity": "72;4;1;1", "wc_relation_to_prior_work": "12;9;1;1", "wc_documentation": "55;20;7;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "640;404;80;141", "wc_reply_reviewers": "108;93;5;0", "wc_reply_authors": "61;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "3;2;1;1", "rating_avg": [ 8.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 97.75, 53.09131284871377 ], "wc_strengths_avg": [ 49.75, 40.54241606022019 ], "wc_improvement_avg": [ 70.75, 95.97232674057663 ], "wc_limitations_avg": [ 33.5, 37.47999466382032 ], "wc_correctness_avg": [ 17.5, 14.309088021254185 ], "wc_clarity_avg": [ 19.5, 30.335622624235025 ], "wc_relation_to_prior_work_avg": [ 5.75, 4.866980583482946 ], "wc_documentation_avg": [ 20.75, 20.932928605429293 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 316.25, 223.06991616979641 ], "wc_reply_reviewers_avg": [ 51.5, 49.31784666832079 ], "wc_reply_authors_avg": [ 15.25, 26.413774815425377 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13641771151784788312&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "ruc.edu.cn;bit.edu.cn;ruc.edu.cn;tsinghua.edu.cn;ruc.edu.cn;zhipuai.cn;ruc.edu.cn;ruc.edu.cn;", "author_num": 9, "aff_unique_index": "0;1;0;2;0;3;0;0", "aff_unique_norm": "Renmin University of China;Beijing Institute of Technology;Tsinghua University;Beijing Knowledge Atlas Technology Co., Ltd.", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.ruc.edu.cn;http://www.bit.edu.cn/;https://www.tsinghua.edu.cn;", "aff_unique_abbr": "RUC;BIT;THU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "A StrongREJECT for Empty Jailbreaks", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97752", "id": "KZLE5BaaOH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=KZLE5BaaOH", "openreview": "https://openreview.net/forum?id=KZLE5BaaOH", "poster": "", "project": "", "author_site": "Alexandra Souly, Qingyuan Lu, Dillon Bowen, Tu Trinh, Elvis Hsieh, Sana Pandey, Pieter Abbeel, Justin Svegliato, Scott Emmons, Olivia Watkins, Sam Toyer", "tldr": "", "abstract": "Most jailbreak papers claim the jailbreaks they propose are highly effective, often boasting near-100% attack success rates. However, it is perhaps more common than not for jailbreak developers to substantially exaggerate the effectiveness of their jailbreaks. We suggest this problem arises because jailbreak researchers lack a standard, high-quality benchmark for evaluating jailbreak performance, leaving researchers to create their own. To create a benchmark, researchers must choose a dataset of forbidden prompts to which a victim model will respond, along with an evaluation method that scores the harmfulness of the victim model\u2019s responses. We show that existing benchmarks suffer from significant shortcomings and introduce the StrongREJECT benchmark to address these issues. StrongREJECT's dataset contains prompts that victim models must answer with specific, harmful information, while its automated evaluator measures the extent to which a response gives useful information to forbidden prompts. In doing so, the StrongREJECT evaluator achieves state-of-the-art agreement with human judgments of jailbreak effectiveness. Notably, we find that existing evaluation methods significantly overstate jailbreak effectiveness compared to human judgments and the StrongREJECT evaluator. We describe a surprising and novel phenomenon that explains this discrepancy: jailbreaks bypassing a victim model\u2019s safety fine-tuning tend to reduce its capabilities. Together, our findings underscore the need for researchers to use a high-quality benchmark, such as StrongREJECT, when developing new jailbreak attacks. We release the StrongREJECT code and data at https://strong-reject.readthedocs.io/.", "keywords": "large language models;LLMs;jailbreaks;adversarial robustness;benchmark", "primary_area": "", "supplementary_material": "/attachment/7eb0cd12c3c4c2a1e0b335e630ca2203d8eb91fd.pdf", "author": "Alexandra Souly;Qingyuan Lu;Dillon Bowen;Tu Trinh;Elvis Hsieh;Sana Pandey;Pieter Abbeel;Justin Svegliato;Scott Emmons;Olivia Watkins;Sam Toyer", "authorids": "~Alexandra_Souly1;~Qingyuan_Lu1;~Dillon_Bowen1;~Tu_Trinh1;~Elvis_Hsieh1;~Sana_Pandey1;~Pieter_Abbeel2;~Justin_Svegliato2;~Scott_Emmons1;~Olivia_Watkins1;~Sam_Toyer1", "gender": "F;;;;M;F;M;M;M;;", "homepage": ";https://qylu4156.github.io/;;;https://elvishh77.github.io;;https://people.eecs.berkeley.edu/~pabbeel/;https://www.justinsvegliato.com/;http://scottemmons.com/;https://people.eecs.berkeley.edu/~oliviawatkins/;https://www.qxcv.net/", "dblp": ";;;;;;;133/5067.html;180/5699;;203/9103", "google_scholar": "ylO2-BwAAAAJ;;hx_d1RQAAAAJ;;;;https://scholar.google.com.tw/citations?user=vtwH6GkAAAAJ;3Orv6wUAAAAJ;LoT0z6oAAAAJ;;J8E8GQYAAAAJ", "orcid": ";;;;;;;;0000-0002-7946-7046;;0000-0002-6665-6593", "linkedin": ";qingyuanlu/;;;elvis-hsieh/;sana-pandey/;;;scott-emmons-5258005b/;;", "or_profile": "~Alexandra_Souly1;~Qingyuan_Lu1;~Dillon_Bowen1;~Tu_Trinh1;~Elvis_Hsieh1;~Sana_Pandey1;~Pieter_Abbeel2;~Justin_Svegliato2;~Scott_Emmons1;~Olivia_Watkins1;~Sam_Toyer1", "aff": "uk aisi;University of California, Berkeley;FAR AI;;University of California, Berkeley;University of California, Berkeley;Covariant;Microsoft;University of California, Berkeley;University of California, Berkeley;University of California, Berkeley", "aff_domain": "dsit.gov.uk;berkeley.edu;far.ai;;berkeley.edu;berkeley.edu;covariant.ai;microsoft.com;berkeley.edu;berkeley.edu;berkeley.edu", "position": "Researcher;Intern;Researcher;;Undergrad student;Undergrad student;Founder;Senior Research Scientist;PhD student;PhD student;PhD student", "bibtex": "@inproceedings{\nsouly2024a,\ntitle={A Strong{REJECT} for Empty Jailbreaks},\nauthor={Alexandra Souly and Qingyuan Lu and Dillon Bowen and Tu Trinh and Elvis Hsieh and Sana Pandey and Pieter Abbeel and Justin Svegliato and Scott Emmons and Olivia Watkins and Sam Toyer},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=KZLE5BaaOH}\n}", "github": "", "reviewers": "jYzv;uK1y;Z3a8", "pdf_size": 495419, "rating": "5;7;9", "confidence": "4;3;4", "wc_summary_and_contributions": "65;144;61", "wc_strengths": "24;29;29", "wc_improvement": "30;122;9", "wc_limitations": "26;11;1", "wc_correctness": "11;9;1", "wc_clarity": "7;6;1", "wc_relation_to_prior_work": "39;10;1", "wc_documentation": "19;13;1", "wc_additional_feedback": "1;1;1", "wc_review": "222;345;105", "wc_reply_reviewers": "0;29;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;1;0", "reply_authors": "2;1;1", "rating_avg": [ 7.0, 1.632993161855452 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 90.0, 38.21866908549625 ], "wc_strengths_avg": [ 27.333333333333332, 2.3570226039551585 ], "wc_improvement_avg": [ 53.666666666666664, 49.07364080870934 ], "wc_limitations_avg": [ 12.666666666666666, 10.274023338281626 ], "wc_correctness_avg": [ 7.0, 4.320493798938574 ], "wc_clarity_avg": [ 4.666666666666667, 2.6246692913372702 ], "wc_relation_to_prior_work_avg": [ 16.666666666666668, 16.21384867602041 ], "wc_documentation_avg": [ 11.0, 7.483314773547883 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 224.0, 97.98979538707079 ], "wc_reply_reviewers_avg": [ 9.666666666666666, 13.67073110293992 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 71, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7222528681428217429&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 4, "email": "dsit.gov.uk;berkeley.edu;far.ai;;berkeley.edu;berkeley.edu;covariant.ai;microsoft.com;berkeley.edu;berkeley.edu;berkeley.edu", "author_num": 11, "aff_unique_index": "0;1;2;1;1;3;4;1;1;1", "aff_unique_norm": "UK Atomic, Molecular and Optical Sciences Institute;University of California, Berkeley;FAR AI;Covariant;Microsoft", "aff_unique_dep": ";;;;Microsoft Corporation", "aff_unique_url": ";https://www.berkeley.edu;https://www.far.ai;;https://www.microsoft.com", "aff_unique_abbr": "UK AMO;UC Berkeley;FAR AI;;Microsoft", "aff_campus_unique_index": "1;1;1;1;1;1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;1;1;1;1;1;1;1;1", "aff_country_unique": "United Kingdom;United States;" }, { "title": "Brain Treebank: Large-scale intracranial recordings from naturalistic language stimuli", "status": "Oral", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97751", "id": "KZlJF8kguO", "proceeding": "", "pdf": "https://openreview.net/pdf?id=KZlJF8kguO", "openreview": "https://openreview.net/forum?id=KZlJF8kguO", "poster": "", "project": "", "author_site": "Christopher Wang, Adam Yaari, Aaditya Singh, Vighnesh Subramaniam, Dana Rosenfarb, Jan DeWitt, Pranav Misra, Joseph Madsen, Scellig Stone, Gabriel Kreiman, Boris Katz, Ignacio Cases, Andrei Barbu", "tldr": "", "abstract": "We present the Brain Treebank, a large-scale dataset of electrophysiological neural responses, recorded from intracranial probes while 10 subjects watched one or more Hollywood movies. Subjects watched on average 2.6 Hollywood movies, for an average viewing time of 4.3 hours, and a total of 43 hours. The audio track for each movie was transcribed with manual corrections. Word onsets were manually annotated on spectrograms of the audio track for each movie. Each transcript was automatically parsed and manually corrected into the universal dependencies (UD) formalism, assigning a part of speech to every word and a dependency parse to every sentence. In total, subjects heard over 38,000 sentences (223,000 words), while they had on average 168 electrodes implanted. This is the largest dataset of intracranial recordings featuring grounded naturalistic language, one of the largest English UD treebanks in general, and one of only a few UD treebanks aligned to multimodal features. We hope that this dataset serves as a bridge between linguistic concepts, perception, and their neural representations. To that end, we present an analysis of which electrodes are sensitive to language features while also mapping out a rough time course of language processing across these electrodes. The Brain Treebank is available at https://BrainTreebank.dev/", "keywords": "neuroscience;multimodal", "primary_area": "", "supplementary_material": "/attachment/cc674fdc984a860401a5ef0bedd2b4173a956520.zip", "author": "Christopher Wang;Adam Uri Yaari;Aaditya K Singh;Vighnesh Subramaniam;Dana Rosenfarb;Jan DeWitt;Pranav Misra;Joseph R. Madsen;Scellig Stone;Gabriel Kreiman;Boris Katz;Ignacio Cases;Andrei Barbu", "authorids": "~Christopher_Wang1;~Adam_Uri_Yaari1;~Aaditya_K_Singh1;~Vighnesh_Subramaniam1;~Dana_Rosenfarb1;~Jan_DeWitt1;~Pranav_Misra2;~Joseph_R._Madsen1;~Scellig_Stone1;~Gabriel_Kreiman1;~Boris_Katz1;~Ignacio_Cases2;~Andrei_Barbu3", "gender": ";M;;F;M;M;M;;M;M;Non-Binary;M;M", "homepage": "https://czlwang.github.io/;;https://vsubramaniam851.github.io;https://www.linkedin.com/in/dana-rosenfarb/;;;https://www.childrenshospital.org/directory/joseph-madsen;https://www.childrenshospital.org/;http://klab.tch.harvard.edu;http://people.csail.mit.edu/boris/boris.html;;https://0xab.com;https://aadityasingh.github.io/", "dblp": ";292/7968;;;339/3497.html;;;;12/1367;k/BorisKatz;;58/8365;", "google_scholar": ";https://scholar.google.co.il/citations?user=s28yMP0AAAAJ;https://scholar.google.com/citations?hl=en;;;;CgGqzY8AAAAJ;;WxZ_6nsAAAAJ;FdNuUb8AAAAJ;9-TdgYMAAAAJ;t1rjgHgAAAAJ;9OPKqmMAAAAJ", "orcid": ";0000-0002-1703-9097;;;;;;;0000-0003-3505-8475;;;;", "linkedin": ";adam-yaari-b0192ab4/;vighnesh-subramaniam-34549717b/;;;pranav-misra-54b473b9;;;kreiman/;;;andrei-barbu-1166131;", "or_profile": "~Christopher_Wang1;~Adam_Uri_Yaari1;~Vighnesh_Subramaniam1;~Dana_Rosenfarb1;~Jan_DeWitt1;~Pranav_Misra2;~Joseph_R._Madsen1;~Scellig_Stone1;~Gabriel_Kreiman1;~Boris_Katz1;~Ignacio_Cases2;~Andrei_Barbu3;~Aaditya_Singh1", "aff": "Computer Science and Artificial Intelligence Laboratory, Electrical Engineering & Computer Science;;Massachusetts Institute of Technology;;Massachusetts Institute of Technology;Harvard University, Harvard University;Harvard University;Harvard University;Harvard Medical School;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;University College London, University of London", "aff_domain": "csail.mit.edu;;mit.edu;;mit.edu;g.harvard.edu;harvard.edu;harvard.edu;harvard.edu;mit.edu;mit.edu;mit.edu;ucl.ac.uk", "position": "PhD student;;MS student;;Researcher;PhD student;Full Professor;Associate Professor;Full Professor;Principal Research Scientist;Postdoc;Researcher;PhD student", "bibtex": "@inproceedings{\nwang2024brain,\ntitle={Brain Treebank: Large-scale intracranial recordings from naturalistic language stimuli},\nauthor={Christopher Wang and Adam Uri Yaari and Aaditya K Singh and Vighnesh Subramaniam and Dana Rosenfarb and Jan DeWitt and Pranav Misra and Joseph R. Madsen and Scellig Stone and Gabriel Kreiman and Boris Katz and Ignacio Cases and Andrei Barbu},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=KZlJF8kguO}\n}", "github": "", "reviewers": "8tGE;PEtz;ySui;B8dp", "pdf_size": 14144519, "rating": "5;5;6;8", "confidence": "4;4;5;4", "wc_summary_and_contributions": "55;63;23;48", "wc_strengths": "9;20;2;18", "wc_improvement": "55;45;2;9", "wc_limitations": "3;68;9;1", "wc_correctness": "1;37;1;1", "wc_clarity": "2;4;1;1", "wc_relation_to_prior_work": "1;58;1;1", "wc_documentation": "1;13;5;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "128;309;45;81", "wc_reply_reviewers": "166;123;0;0", "wc_reply_authors": "155;162;0;0", "reply_reviewers": "1;2;0;0", "reply_authors": "2;3;1;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 47.25, 14.972892172189045 ], "wc_strengths_avg": [ 12.25, 7.224091638399945 ], "wc_improvement_avg": [ 27.75, 22.664675157610354 ], "wc_limitations_avg": [ 20.25, 27.725214156071004 ], "wc_correctness_avg": [ 10.0, 15.588457268119896 ], "wc_clarity_avg": [ 2.0, 1.224744871391589 ], "wc_relation_to_prior_work_avg": [ 15.25, 24.681724007856502 ], "wc_documentation_avg": [ 5.0, 4.898979485566356 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 140.75, 101.49969211775965 ], "wc_reply_reviewers_avg": [ 72.25, 73.83215762796046 ], "wc_reply_authors_avg": [ 79.25, 79.28863411612032 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 13, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10707109780004448848&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "csail.mit.edu;;mit.edu;;mit.edu;g.harvard.edu;harvard.edu;harvard.edu;harvard.edu;mit.edu;mit.edu;mit.edu;ucl.ac.uk", "author_num": 13, "aff_unique_index": "0;0;0;1;1;1;1;0;0;0;2", "aff_unique_norm": "Massachusetts Institute of Technology;Harvard University;University College London", "aff_unique_dep": "Computer Science and Artificial Intelligence Laboratory;;", "aff_unique_url": "https://www.csail.mit.edu;https://www.harvard.edu;https://www.ucl.ac.uk", "aff_unique_abbr": "CSAIL;Harvard;UCL", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Cambridge;;Boston", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;1", "aff_country_unique": "United States;United Kingdom" }, { "title": "ALI-Agent: Assessing LLMs' Alignment with Human Values via Agent-based Evaluation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95655", "id": "KZrfBTrPey", "proceeding": "", "pdf": "https://openreview.net/pdf?id=KZrfBTrPey", "openreview": "https://openreview.net/forum?id=KZrfBTrPey", "poster": "", "project": "", "author_site": "jingnan zheng, Han Wang, An Zhang, Nguyen Duy Tai, Jun Sun, Tat-Seng Chua", "tldr": "", "abstract": "Large Language Models (LLMs) can elicit unintended and even harmful content when misaligned with human values, posing severe risks to users and society. To mitigate these risks, current evaluation benchmarks predominantly employ expert-designed contextual scenarios to assess how well LLMs align with human values. However, the labor-intensive nature of these benchmarks limits their test scope, hindering their ability to generalize to the extensive variety of open-world use cases and identify rare but crucial long-tail risks. Additionally, these static tests fail to adapt to the rapid evolution of LLMs, making it hard to evaluate timely alignment issues. To address these challenges, we propose ALI-Agent, an evaluation framework that leverages the autonomous abilities of LLM-powered agents to conduct in-depth and adaptive alignment assessments. ALI-Agent operates through two principal stages: Emulation and Refinement. During the Emulation stage, ALI-Agent automates the generation of realistic test scenarios. In the Refinement stage, it iteratively refines the scenarios to probe long-tail risks. Specifically, ALI-Agent incorporates a memory module to guide test scenario generation, a tool-using module to reduce human labor in tasks such as evaluating feedback from target LLMs, and an action module to refine tests. Extensive experiments across three aspects of human values--stereotypes, morality, and legality--demonstrate that ALI-Agent, as a general evaluation framework, effectively identifies model misalignment. Systematic analysis also validates that the generated test scenarios represent meaningful use cases, as well as integrate enhanced measures to probe long-tail risks.", "keywords": "Large language models;Alignment;Agent;Evaluation", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Jingnan Zheng;Han Wang;An Zhang;Tai D. Nguyen;Jun Sun;Tat-Seng Chua", "authorids": "~Jingnan_Zheng1;~Han_Wang19;~An_Zhang2;~Tai_D._Nguyen1;~Jun_Sun12;~Tat-Seng_Chua2", "gender": "F;M;M;M;F;M", "homepage": "https://github.com/SophieZheng998;https://rookiehb.github.io/;https://taidn.me/;https://sunjun.site;https://github.com/anzhang314;http://www.comp.nus.edu.sg/~chuats/", "dblp": "326/4665;67/1771-19;;;78/5581-3;", "google_scholar": "UlgiSa0AAAAJ;5C6OSIgAAAAJ;v4qK0LwAAAAJ;https://scholar.google.com.sg/citations?user=DVsEyn0AAAAJ;https://scholar.google.com.sg/citations?user=BcX7GJcAAAAJ;https://scholar.google.com.tw/citations?user=Z9DWCBEAAAAJ", "orcid": "0000-0001-5103-8007;;;;;0000-0001-6097-7807", "linkedin": ";;;;;", "or_profile": "~Jingnan_Zheng1;~Han_Wang19;~Tai_D._Nguyen1;~Jun_Sun12;~AN_ZHANG1;~Tat-seng_Chua1", "aff": "National University of Singapore;Zhejiang University;Singapore Management University;Singapore Management University;National University of Singapore;National University of Singapore", "aff_domain": "nus.edu;zju.edu.cn;smu.edu.sg;smu.edu.sg;nus.edu.sg;nus.edu.sg", "position": "PhD student;Undergrad student;Postdoc;Full Professor;Postdoc;Full Professor", "bibtex": "@inproceedings{\nzheng2024aliagent,\ntitle={{ALI}-Agent: Assessing {LLM}s' Alignment with Human Values via Agent-based Evaluation},\nauthor={Jingnan Zheng and Han Wang and An Zhang and Tai D. Nguyen and Jun Sun and Tat-Seng Chua},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=KZrfBTrPey}\n}", "github": "", "reviewers": "77qx;sLAD;iWss", "pdf_size": 2220733, "rating": "7;7;7", "confidence": "4;4;3", "soundness": "3;3;4", "novelty": "3;3;3", "presentation": "4;3;3", "wc_summary": "114;59;63", "wc_strengths": "134;122;91", "wc_weaknesses": "123;404;154", "wc_questions": "18;97;57", "wc_limitations": "9;21;1", "wc_review": "398;703;366", "wc_reply_reviewers": "23;27;21", "wc_reply_authors": "7;214;8", "reply_reviewers": "1;1;1", "reply_authors": "2;3;2", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 78.66666666666667, 25.037749277618563 ], "wc_strengths_avg": [ 115.66666666666667, 18.116904322268255 ], "wc_weaknesses_avg": [ 227.0, 125.79613136605857 ], "wc_questions_avg": [ 57.333333333333336, 32.25247621845836 ], "wc_limitations_avg": [ 10.333333333333334, 8.219218670625303 ], "wc_review_avg": [ 489.0, 151.8837274584301 ], "wc_reply_reviewers_avg": [ 23.666666666666668, 2.494438257849294 ], "wc_reply_authors_avg": [ 76.33333333333333, 97.34588960106238 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7230684578557332811&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "nus.edu;zju.edu.cn;smu.edu.sg;smu.edu.sg;nus.edu.sg;nus.edu.sg", "author_num": 6, "aff_unique_index": "0;1;2;2;0;0", "aff_unique_norm": "National University of Singapore;Zhejiang University;Singapore Management University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.nus.edu.sg;https://www.zju.edu.cn;https://www.smu.edu.sg", "aff_unique_abbr": "NUS;ZJU;SMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0;0", "aff_country_unique": "Singapore;China" }, { "title": "PointMamba: A Simple State Space Model for Point Cloud Analysis", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95654", "id": "Kc37srXvan", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Kc37srXvan", "openreview": "https://openreview.net/forum?id=Kc37srXvan", "poster": "/media/PosterPDFs/NeurIPS%202024/95654.png?t=1731334614.34854", "project": "", "author_site": "Dingkang Liang, Xin Zhou, Wei Xu, xingkui zhu, Zhikang Zou, Xiaoqing Ye, Xiao Tan, Xiang Bai", "tldr": "", "abstract": "Transformers have become one of the foundational architectures in point cloud analysis tasks due to their excellent global modeling ability. However, the attention mechanism has quadratic complexity, making the design of a linear complexity method with global modeling appealing. In this paper, we propose PointMamba, transferring the success of Mamba, a recent representative state space model (SSM), from NLP to point cloud analysis tasks. Unlike traditional Transformers, PointMamba employs a linear complexity algorithm, presenting global modeling capacity while significantly reducing computational costs. Specifically, our method leverages space-filling curves for effective point tokenization and adopts an extremely simple, non-hierarchical Mamba encoder as the backbone. Comprehensive evaluations demonstrate that PointMamba achieves superior performance across multiple datasets while significantly reducing GPU memory usage and FLOPs. This work underscores the potential of SSMs in 3D vision-related tasks and presents a simple yet effective Mamba-based baseline for future research. The code is available at https://github.com/LMD0311/PointMamba.", "keywords": "Point cloud analysis; State space model; Mamba; PointMamba", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Dingkang Liang;Xin Zhou;Wei Xu;Xingkui Zhu;Zhikang Zou;Xiaoqing Ye;Xiao Tan;Xiang Bai", "authorids": "~Dingkang_Liang2;~Xin_Zhou17;~Wei_Xu15;~Xingkui_Zhu1;~Zhikang_Zou2;~Xiaoqing_Ye1;~Xiao_Tan3;~Xiang_Bai1", "gender": ";M;;M;M;F;M;M", "homepage": ";https://lmd0311.github.io/;;;https://bigteacher-777.github.io/;https://shuluoshu.github.io/;;http://mclab.eic.hust.edu.cn/~xbai", "dblp": ";05/3403-13;;300/4541;229/8175;177/0181;116/7143-1.html;59/2741", "google_scholar": ";SWU7N_sAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;T-YePFgAAAAJ;bmN_nycAAAAJ;;UeltiQ4AAAAJ", "orcid": ";0009-0009-4752-6118;;0009-0008-0561-4390;;0000-0003-3268-880X;;", "linkedin": ";;;;;;;", "or_profile": "~Dingkang_Liang2;~Xin_Zhou17;~Wei_Xu15;~Xingkui_Zhu1;~Zhikang_Zou2;~Xiaoqing_Ye1;~Xiao_Tan3;~Xiang_Bai1", "aff": ";;;Huazhong University of Science and Technology;Baidu;Baidu Inc.;Baidu;Huazhong University of Science and Technology", "aff_domain": ";;;hust.edu.cn;baidu.com;baidu.com;baidu.com;hust.edu.cn", "position": ";;;PhD student;Researcher;Researcher and Developer;Researcher;Full Professor", "bibtex": "@inproceedings{\nliang2024pointmamba,\ntitle={PointMamba: A Simple State Space Model for Point Cloud Analysis},\nauthor={Dingkang Liang and Xin Zhou and Wei Xu and Xingkui Zhu and Zhikang Zou and Xiaoqing Ye and Xiao Tan and Xiang Bai},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Kc37srXvan}\n}", "github": "", "reviewers": "Daqu;MyQv;vmtW;QPV3", "pdf_size": 2920174, "rating": "3;5;5;8", "confidence": "5;5;5;5", "soundness": "2;3;3;4", "novelty": "2;2;3;4", "presentation": "3;3;4;4", "wc_summary": "44;117;41;70", "wc_strengths": "26;101;25;124", "wc_weaknesses": "290;205;92;207", "wc_questions": "4;100;185;2", "wc_limitations": "4;40;15;10", "wc_review": "368;563;358;413", "wc_reply_reviewers": "70;45;33;95", "wc_reply_authors": "474;30;288;97", "reply_reviewers": "2;1;1;2", "reply_authors": "6;2;3;3", "rating_avg": [ 5.25, 1.7853571071357126 ], "confidence_avg": [ 5.0, 0.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 68.0, 30.454884665682123 ], "wc_strengths_avg": [ 69.0, 44.25494322671762 ], "wc_weaknesses_avg": [ 198.5, 70.40774105167698 ], "wc_questions_avg": [ 72.75, 75.95187621119047 ], "wc_limitations_avg": [ 17.25, 13.699908758820257 ], "wc_review_avg": [ 425.5, 82.0441954071097 ], "wc_reply_reviewers_avg": [ 60.75, 23.85765076448224 ], "wc_reply_authors_avg": [ 222.25, 173.45658678758787 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 3.5, 1.5 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 151, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8418518663911018355&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": ";;;hust.edu.cn;baidu.com;baidu.com;baidu.com;hust.edu.cn", "author_num": 8, "aff_unique_index": "0;1;1;1;0", "aff_unique_norm": "Huazhong University of Science and Technology;Baidu", "aff_unique_dep": ";Baidu, Inc.", "aff_unique_url": "http://www.hust.edu.cn;https://www.baidu.com", "aff_unique_abbr": "HUST;Baidu", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Conformalized Time Series with Semantic Features", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95653", "id": "KcDcaVOW1S", "proceeding": "", "pdf": "https://openreview.net/pdf?id=KcDcaVOW1S", "openreview": "https://openreview.net/forum?id=KcDcaVOW1S", "poster": "", "project": "", "author_site": "Baiting Chen, Zhimei Ren, Lu Cheng", "tldr": "", "abstract": "Conformal prediction is a powerful tool for uncertainty quantification, but its application to time-series data is constrained by the violation of the exchangeability assumption. Current solutions for time-series prediction typically operate in the output space and rely on manually selected weights to address distribution drift, leading to overly conservative predictions. To enable dynamic weight learning in the semantically rich latent space, we introduce a novel approach called Conformalized Time Series with Semantic Features (CT-SSF). CT-SSF utilizes the inductive bias in deep representation learning to dynamically adjust weights, prioritizing semantic features relevant to the current prediction. Theoretically, we show that CT-SSF surpasses previous methods defined in the output space. Experiments on synthetic and benchmark datasets demonstrate that CT-SSF significantly outperforms existing state-of-the-art (SOTA) conformal prediction techniques in terms of prediction efficiency while maintaining a valid coverage guarantee.", "keywords": "conformal prediction;time series;neural network;uncertainty qualification", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Baiting Chen;Zhimei Ren;Lu Cheng", "authorids": "~Baiting_Chen1;~Zhimei_Ren1;~Lu_Cheng2", "gender": ";F;F", "homepage": ";https://zhimeir.github.io/;https://lcheng.org/", "dblp": "389/9945;;17/4969-1", "google_scholar": ";X3gGi_0AAAAJ;9rpkTSkAAAAJ", "orcid": ";;0000-0002-2503-2522", "linkedin": "baiting-chen-89931b257?trk=contact-info;;", "or_profile": "~Baiting_Chen1;~Zhimei_Ren1;~Lu_Cheng2", "aff": "UCLA, University of California, Los Angeles;The Wharton School, University of Pennsylvania;University of Illinois at Chicago", "aff_domain": "stat.ucla.edu;wharton.upenn.edu;uic.edu", "position": "MS student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nchen2024conformalized,\ntitle={Conformalized Time Series with Semantic Features},\nauthor={Baiting Chen and Zhimei Ren and Lu Cheng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=KcDcaVOW1S}\n}", "github": "", "reviewers": "8Q7C;aroC;phaJ;iFYG", "pdf_size": 336335, "rating": "4;6;6;6", "confidence": "4;3;3;3", "soundness": "2;3;3;3", "novelty": "2;3;2;3", "presentation": "2;3;2;3", "wc_summary": "25;81;51;106", "wc_strengths": "15;47;65;226", "wc_weaknesses": "218;84;190;157", "wc_questions": "3;80;222;20", "wc_limitations": "3;7;8;17", "wc_review": "264;299;536;526", "wc_reply_reviewers": "0;23;34;28", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 65.75, 30.53993287484437 ], "wc_strengths_avg": [ 88.25, 81.52108622926954 ], "wc_weaknesses_avg": [ 162.25, 50.071823413972055 ], "wc_questions_avg": [ 81.25, 86.1492164793157 ], "wc_limitations_avg": [ 8.75, 5.11737237261468 ], "wc_review_avg": [ 406.25, 125.41207079065396 ], "wc_reply_reviewers_avg": [ 21.25, 12.871965661856 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:zPNcunLvKpgJ:scholar.google.com/&scioq=Conformalized+Time+Series+with+Semantic+Features&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "stat.ucla.edu;wharton.upenn.edu;uic.edu", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of California, Los Angeles;University of Pennsylvania;University of Illinois at Chicago", "aff_unique_dep": ";The Wharton School;", "aff_unique_url": "https://www.ucla.edu;https://www.wharton.upenn.edu;https://www.uic.edu", "aff_unique_abbr": "UCLA;UPenn Wharton;UIC", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Los Angeles;;Chicago", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "On Neural Networks as Infinite Tree-Structured Probabilistic Graphical Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95652", "id": "KcmhSrHzJB", "proceeding": "", "pdf": "https://openreview.net/pdf?id=KcmhSrHzJB", "openreview": "https://openreview.net/forum?id=KcmhSrHzJB", "poster": "", "project": "", "author_site": "Boyao Li, Alexander Thomson, houssam nassif, Matthew Engelhard, David Page", "tldr": "", "abstract": "Deep neural networks (DNNs) lack the precise semantics and definitive probabilistic interpretation of probabilistic graphical models (PGMs). In this paper, we propose an innovative solution by constructing infinite tree-structured PGMs that correspond exactly to neural networks. Our research reveals that DNNs, during forward propagation, indeed perform approximations of PGM inference that are precise in this alternative PGM structure. Not only does our research complement existing studies that describe neural networks as kernel machines or infinite-sized Gaussian processes, it also elucidates a more direct approximation that DNNs make to exact inference in PGMs. Potential benefits include improved pedagogy and interpretation of DNNs, and algorithms that can merge the strengths of PGMs and DNNs.", "keywords": "probabilistic graphical model; neural network; hamiltonian monte carlo; calibration", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Boyao Li;Alexander Joseph Thomson;houssam nassif;Matthew M. Engelhard;David Page", "authorids": "~Boyao_Li1;~Alexander_Joseph_Thomson1;~houssam_nassif1;~Matthew_M._Engelhard1;~David_Page2", "gender": "M;M;M;;M", "homepage": ";;http://pages.cs.wisc.edu/~hous21/;;https://scholars.duke.edu/person/david.page", "dblp": ";;49/7789;180/0789;p/DavidPage", "google_scholar": ";;https://scholar.google.com/citations?hl=en;0gpvxyoAAAAJ;z-P08tgAAAAJ", "orcid": ";;;0000-0003-4112-9639;0000-0003-0576-2912", "linkedin": "boyao-li/;alexander-thomson-22a443254;houssamnassif;;", "or_profile": "~Boyao_Li1;~Alexander_Joseph_Thomson1;~houssam_nassif1;~Matthew_M._Engelhard1;~David_Page1", "aff": "Duke University;Duke University;Meta;Duke University;Duke University", "aff_domain": "duke.edu;duke.edu;meta.com;duke.edu;duke.edu", "position": "PhD student;MS student;Principal Researcher;Assistant Professor;Dept Chair, Biostatistics & Bioinformatics", "bibtex": "@inproceedings{\nli2024on,\ntitle={On Neural Networks as Infinite Tree-Structured Probabilistic Graphical Models},\nauthor={Boyao Li and Alexander Joseph Thomson and houssam nassif and Matthew M. Engelhard and David Page},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=KcmhSrHzJB}\n}", "github": "", "reviewers": "zi4w;ymC3;BE6o;7PML", "pdf_size": 521077, "rating": "3;5;6;7", "confidence": "3;4;3;3", "soundness": "2;3;3;3", "novelty": "3;2;3;4", "presentation": "2;2;3;2", "wc_summary": "111;313;118;306", "wc_strengths": "92;2;59;115", "wc_weaknesses": "640;2;47;132", "wc_questions": "472;2;35;32", "wc_limitations": "8;2;6;25", "wc_review": "1323;321;265;610", "wc_reply_reviewers": "219;140;48;27", "wc_reply_authors": "336;203;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 5.25, 1.479019945774904 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 212.0, 97.56280028781461 ], "wc_strengths_avg": [ 67.0, 42.47940677551889 ], "wc_weaknesses_avg": [ 205.25, 255.307045535371 ], "wc_questions_avg": [ 135.25, 194.8504234021574 ], "wc_limitations_avg": [ 10.25, 8.78564169540279 ], "wc_review_avg": [ 629.75, 421.11600242688473 ], "wc_reply_reviewers_avg": [ 108.5, 76.65670225100999 ], "wc_reply_authors_avg": [ 134.75, 142.71891080021595 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.09759000729485331, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10907371032081505533&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "duke.edu;duke.edu;meta.com;duke.edu;duke.edu", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Duke University;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://www.duke.edu;https://meta.com", "aff_unique_abbr": "Duke;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "DiffuLT: Diffusion for Long-tail Recognition Without External Knowledge", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95651", "id": "Kcsj9FGnKR", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Kcsj9FGnKR", "openreview": "https://openreview.net/forum?id=Kcsj9FGnKR", "poster": "/media/PosterPDFs/NeurIPS%202024/95651.png?t=1731404715.9644716", "project": "", "author_site": "Jie Shao, Ke Zhu, Hanxiao Zhang, Jianxin Wu", "tldr": "", "abstract": "This paper introduces a novel pipeline for long-tail (LT) recognition that diverges from conventional strategies. Instead, it leverages the long-tailed dataset itself to generate a balanced proxy dataset without utilizing external data or model. We deploy a diffusion model trained from scratch on only the long-tailed dataset to create this proxy and verify the effectiveness of the data produced. Our analysis identifies approximately-in-distribution (AID) samples, which slightly deviate from the real data distribution and incorporate a blend of class information, as the crucial samples for enhancing the generative model's performance in long-tail classification. We promote the generation of AID samples during the training of a generative model by utilizing a feature extractor to guide the process and filter out detrimental samples during generation. Our approach, termed Diffusion model for Long-Tail recognition (DiffuLT), represents a pioneer application of generative models in long-tail recognition. DiffuLT achieves state-of-the-art results on CIFAR10-LT, CIFAR100-LT, and ImageNet-LT, surpassing leading competitors by significant margins. Comprehensive ablations enhance the interpretability of our pipeline. Notably, the entire generative process is conducted without relying on external data or pre-trained model weights, which leads to its generalizability to real-world long-tailed scenarios.", "keywords": "Long-tail learning; long-tail classification;diffusion model", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Jie Shao;Ke Zhu;Hanxiao Zhang;Jianxin Wu", "authorids": "~Jie_Shao7;~Ke_Zhu2;~Hanxiao_Zhang2;~Jianxin_Wu1", "gender": "M;M;F;M", "homepage": "https://hehesangsj.github.io/;http://www.lamda.nju.edu.cn/zhuk/;http://www.lamda.nju.edu.cn/zhanghx/;https://cs.nju.edu.cn/wujx/", "dblp": "02/5139-1;;192/0995;w/JianxinWu", "google_scholar": "https://scholar.google.cz/citations?hl=zh-CN;bos3kG8AAAAJ;SCX1oYoAAAAJ;0JRtCV4AAAAJ", "orcid": ";;;", "linkedin": "jie-shao-41a480162/;;https://www.linkedin.cn/incareer/in/ACoAAD6etrEBz44xoM9BSu4QNrqOatnXJ6yv-1o;", "or_profile": "~Jie_Shao7;~Ke_Zhu2;~Hanxiao_Zhang2;~Jianxin_Wu3", "aff": "Nanjing University;Nanjing University;Nanjing University;Nanjing university", "aff_domain": "nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn", "position": "PhD student;PhD student;MS student;Full Professor", "bibtex": "@inproceedings{\nshao2024diffult,\ntitle={Diffu{LT}: Diffusion for Long-tail Recognition Without External Knowledge},\nauthor={Jie Shao and Ke Zhu and Hanxiao Zhang and Jianxin Wu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Kcsj9FGnKR}\n}", "github": "", "reviewers": "VPct;vaue;PgsY", "pdf_size": 2809890, "rating": "4;5;6", "confidence": "4;3;4", "soundness": "2;3;3", "novelty": "2;2;3", "presentation": "2;3;3", "wc_summary": "74;81;111", "wc_strengths": "20;65;117", "wc_weaknesses": "178;155;127", "wc_questions": "7;6;12", "wc_limitations": "16;12;2", "wc_review": "295;319;369", "wc_reply_reviewers": "49;21;10", "wc_reply_authors": "596;71;517", "reply_reviewers": "1;1;1", "reply_authors": "3;3;2", "rating_avg": [ 5.0, 0.816496580927726 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 88.66666666666667, 16.048537489614297 ], "wc_strengths_avg": [ 67.33333333333333, 39.634440691006205 ], "wc_weaknesses_avg": [ 153.33333333333334, 20.8539897594894 ], "wc_questions_avg": [ 8.333333333333334, 2.6246692913372702 ], "wc_limitations_avg": [ 10.0, 5.887840577551898 ], "wc_review_avg": [ 327.6666666666667, 30.825674724525047 ], "wc_reply_reviewers_avg": [ 26.666666666666668, 16.418147141366333 ], "wc_reply_authors_avg": [ 394.6666666666667, 231.12815108121777 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:22cha_72K7EJ:scholar.google.com/&scioq=DiffuLT:+Diffusion+for+Long-tail+Recognition+Without+External+Knowledge&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Nanjing University", "aff_unique_dep": "", "aff_unique_url": "https://www.nju.edu.cn", "aff_unique_abbr": "Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Information-theoretic Limits of Online Classification with Noisy Labels", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95650", "id": "Ke3MSP8Nr6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Ke3MSP8Nr6", "openreview": "https://openreview.net/forum?id=Ke3MSP8Nr6", "poster": "", "project": "", "author_site": "Changlong Wu, Ananth Grama, Wojciech Szpankowski", "tldr": "", "abstract": "We study online classification with general hypothesis classes where the true labels are determined by some function within the class, but are corrupted by *unknown* stochastic noise, and the features are generated adversarially. Predictions are made using observed *noisy* labels and noiseless features, while the performance is measured via minimax risk when comparing against *true* labels. The noisy mechanism is modeled via a general noisy kernel that specifies, for any individual data point, a set of distributions from which the actual noisy label distribution is chosen. We show that minimax risk is *tightly* characterized (up to a logarithmic factor of the hypothesis class size) by the *Hellinger gap* of the noisy label distributions induced by the kernel, *independent* of other properties such as the means and variances of the noise. Our main technique is based on a novel reduction to an online comparison scheme of two hypotheses, along with a new *conditional* version of Le Cam-Birg\u00e9 testing suitable for online settings. Our work provides the first comprehensive characterization of noisy online classification with guarantees that apply to the *ground truth* while addressing *general* noisy observations.", "keywords": "Online classification;noisy label;pairwise testing;Hellinger divergence;Le Cam-Birge testing", "primary_area": "learning_theory", "supplementary_material": "", "author": "Changlong Wu;Ananth Grama;Wojciech Szpankowski", "authorids": "~Changlong_Wu1;~Ananth_Grama1;~Wojciech_Szpankowski2", "gender": "M;M;M", "homepage": "https://changlongwu1993.github.io/;https://www.cs.purdue.edu/homes/ayg/;https://www.cs.purdue.edu/homes/spa/", "dblp": "204/4267;;s/WSzpankowski.html", "google_scholar": "-T9eX0kAAAAJ;https://scholar.google.com.tw/citations?user=bpsZlEQAAAAJ;xQMGL_8AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Changlong_Wu1;~Ananth_Grama1;~Wojciech_Szpankowski2", "aff": "Purdue University;Purdue University; Purdue University", "aff_domain": "purdue.edu;purdue.edu;cs.purdue.edu", "position": "Postdoc;Full Professor;Full Professor", "bibtex": "@inproceedings{\nwu2024informationtheoretic,\ntitle={Information-theoretic Limits of Online Classification with Noisy Labels},\nauthor={Changlong Wu and Ananth Grama and Wojciech Szpankowski},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Ke3MSP8Nr6}\n}", "github": "", "reviewers": "uBfk;GopC;crkZ;JscE", "pdf_size": 520746, "rating": "4;6;6;6", "confidence": "3;3;3;3", "soundness": "2;3;3;3", "novelty": "2;3;3;2", "presentation": "3;3;3;4", "wc_summary": "96;209;208;143", "wc_strengths": "54;82;64;79", "wc_weaknesses": "81;238;445;150", "wc_questions": "58;123;254;168", "wc_limitations": "70;1;2;6", "wc_review": "359;653;973;546", "wc_reply_reviewers": "272;10;4;84", "wc_reply_authors": "449;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 164.0, 47.502631506054485 ], "wc_strengths_avg": [ 69.75, 11.366067921669305 ], "wc_weaknesses_avg": [ 228.5, 136.8219646109498 ], "wc_questions_avg": [ 150.75, 71.29296949910278 ], "wc_limitations_avg": [ 19.75, 29.07210862665452 ], "wc_review_avg": [ 632.75, 222.84790216647767 ], "wc_reply_reviewers_avg": [ 92.5, 108.3178194019802 ], "wc_reply_authors_avg": [ 112.25, 194.42270314960646 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16421334438064991813&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "purdue.edu;purdue.edu;cs.purdue.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Purdue University", "aff_unique_dep": "", "aff_unique_url": "https://www.purdue.edu", "aff_unique_abbr": "Purdue", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Scaling Continuous Latent Variable Models as Probabilistic Integral Circuits", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95649", "id": "Ke40kfOT2E", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Ke40kfOT2E", "openreview": "https://openreview.net/forum?id=Ke40kfOT2E", "poster": "/media/PosterPDFs/NeurIPS%202024/95649.png?t=1732736632.9048297", "project": "", "author_site": "Gennaro Gala, Cassio de Campos, Antonio Vergari, Erik Quaeghebeur", "tldr": "", "abstract": "Probabilistic integral circuits (PICs) have been recently introduced as probabilistic models enjoying the key ingredient behind expressive generative models: continuous latent variables (LVs). PICs are symbolic computational graphs defining continuous LV models as hierarchies of functions that are summed and multiplied together, or integrated over some LVs. They are tractable if LVs can be analytically integrated out, otherwise they can be approximated by tractable probabilistic circuits (PC) encoding a hierarchical numerical quadrature process, called QPCs.\n\nSo far, only tree-shaped PICs have been explored, and training them via numerical quadrature requires memory-intensive processing at scale. In this paper, we address these issues, and present: (i) a pipeline for building DAG-shaped PICs out of arbitrary variable decompositions, (ii) a procedure for training PICs using tensorized circuit architectures, and (iii) neural functional sharing techniques to allow scalable training. In extensive experiments, we showcase the effectiveness of functional sharing and the superiority of QPCs over traditional PCs.", "keywords": "Latent variable models;probabilistic ML;tractable models", "primary_area": "probabilistic_methods", "supplementary_material": "/attachment/48ddb382d8263464b70aba1a1b57994a4b46c500.zip", "author": "Gennaro Gala;Cassio de Campos;Antonio Vergari;Erik Quaeghebeur", "authorids": "~Gennaro_Gala1;~Cassio_de_Campos1;~Antonio_Vergari3;~Erik_Quaeghebeur2", "gender": "M;;M;M", "homepage": "https://uai.win.tue.nl/author/gennaro-gala/;https://member.acm.org/~cassiopc;https://ac.erikquaeghebeur.name/;http://nolovedeeplearning.com", "dblp": "308/9738;05/2010;56/1177;http://dblp.uni-trier.de/pers/hd/v/Vergari:Antonio", "google_scholar": "TGxSR8IAAAAJ;;wlkN3LQAAAAJ;YK0NLaUAAAAJ", "orcid": ";;0000-0003-1462-401X;0000-0003-0036-5678", "linkedin": ";;;", "or_profile": "~Gennaro_Gala1;~Cassio_de_Campos1;~Erik_Quaeghebeur2;~antonio_vergari2", "aff": "Eindhoven University of Technology;Eindhoven University of Technology;Eindhoven University of Technology;University of Edinburgh, University of Edinburgh", "aff_domain": "tue.nl;tue.nl;tue.nl;ed.ac.uk", "position": "PhD student;Full Professor;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\ngala2024scaling,\ntitle={Scaling Continuous Latent Variable Models as Probabilistic Integral Circuits},\nauthor={Gennaro Gala and Cassio de Campos and Antonio Vergari and Erik Quaeghebeur},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Ke40kfOT2E}\n}", "github": "", "reviewers": "fmPL;6CM1;WQnV;iaCH", "pdf_size": 2206916, "rating": "6;7;7;8", "confidence": "5;4;4;4", "soundness": "3;4;4;4", "novelty": "3;3;4;3", "presentation": "4;4;3;4", "wc_summary": "57;104;90;118", "wc_strengths": "56;112;93;59", "wc_weaknesses": "131;34;199;27", "wc_questions": "99;149;71;145", "wc_limitations": "11;11;1;30", "wc_review": "354;410;454;379", "wc_reply_reviewers": "17;13;0;44", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 92.25, 22.63155982251334 ], "wc_strengths_avg": [ 80.0, 23.50531854708632 ], "wc_weaknesses_avg": [ 97.75, 71.46109081171376 ], "wc_questions_avg": [ 116.0, 32.57299494980466 ], "wc_limitations_avg": [ 13.25, 10.497023387608508 ], "wc_review_avg": [ 399.25, 37.31872854211408 ], "wc_reply_reviewers_avg": [ 18.5, 16.00781059358212 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17206607687714698666&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "tue.nl;tue.nl;tue.nl;ed.ac.uk", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Eindhoven University of Technology;University of Edinburgh", "aff_unique_dep": ";", "aff_unique_url": "https://www.tue.nl;https://www.ed.ac.uk", "aff_unique_abbr": "TU/e;Edinburgh", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "Netherlands;United Kingdom" }, { "id": "Kg0hQIG9Uh", "title": "LLAVIDAL: Benchmarking \\underline{L}arge \\underline{LA}nguage \\underline{VI}sion Models for \\underline{D}aily \\underline{A}ctivities of \\underline{L}iving", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "Large Language Vision Models (LLVMs) have demonstrated effectiveness in processing internet videos, yet they struggle with the visually perplexing dynamics present in Activities of Daily Living (ADL) due to limited pertinent datasets and models tailored to relevant cues. \nTo this end, we propose a framework for curating ADL multiview datasets to fine-tune LLVMs, resulting in the creation of \\textbf{\\datasetname}, comprising 100K RGB video-instruction pairs, language descriptions, 3D skeletons, and action-conditioned object trajectories. We introduce \\textbf{\\modelname}, an LLVM capable of incorporating 3D poses and relevant object trajectories to understand the intricate spatiotemporal relationships within ADLs. Furthermore, we present a novel benchmark, \\textbf{ADLMCQ}, for quantifying LLVM effectiveness in ADL scenarios. When trained on \\datasetname, \\modelname~consistently achieves state-of-the-art performance across all ADL evaluation metrics. Qualitative analysis reveals \\modelname's temporal reasoning capabilities in understanding ADL. The link to the dataset is provided at: \\href{https://adl-x.github.io/}{https://adl-x.github.io/}", "keywords": "Activities of Daily Living", "primary_area": "", "supplementary_material": "/attachment/5323bc71d43ce6bfd89463dba1f0d8056bd4c2d9.zip", "author": "Rajatsubhra Chakraborty;Arkaprava Sinha;Dominick Reilly;Manish Kumar Govind;Pu Wang;Francois Bremond;Srijan Das", "authorids": "~Rajatsubhra_Chakraborty1;~Arkaprava_Sinha1;~Dominick_Reilly1;~Manish_Kumar_Govind3;~Pu_Wang1;~Francois_Bremond1;~Srijan_Das1", "gender": "M;M;M;M;M;M;M", "homepage": ";;https://dominickrei.github.io/;https://webpages.charlotte.edu/pwang13;http://www-sop.inria.fr/members/Francois.Bremond/;https://srijandas07.github.io/;https://github.com/ManishGovind", "dblp": "290/9967;;317/7360;15/4476-1;90/6418;173/0062;", "google_scholar": "39r7ciQAAAAJ;;YlFKOTkAAAAJ;0buJlAUAAAAJ;h-oGBzsAAAAJ;ZDTF5AEAAAAJ;", "orcid": ";;;;0000-0003-2988-2142;;0009-0003-6381-6293", "linkedin": "rajatsubhra-chakraborty-6bb133168?utm_source=share&utm_campaign=share_via&utm_content=profile&utm_medium=ios_app;arkaprava-sinha/;;;francois-bremond-05263a5/;;", "or_profile": "~Rajatsubhra_Chakraborty1;~Arkaprava_Sinha1;~Dominick_Reilly1;~Pu_Wang1;~Francois_Bremond1;~Srijan_Das1;~Manish_kumar_Govind2", "aff": "University of North Carolina at Charlotte;University of North Carolina at Charlotte;University of North Carolina at Charlotte;University of North Carolina at Charlotte;inria;University of North Carolina at Charlotte;University of North Carolina at Charlotte", "aff_domain": "uncc.edu;uncc.edu;uncc.edu;uncc.edu;inria.fr;uncc.edu;charlotte.edu", "position": "PhD student;PhD student;PhD student;Associate Professor;Researcher;Assistant Professor;MS student", "bibtex": "@misc{\nanonymous2024llavidal,\ntitle={{LLAVIDAL}: Benchmarking {\\textbackslash}underline\\{L\\}arge {\\textbackslash}underline\\{{LA}\\}nguage {\\textbackslash}underline\\{{VI}\\}sion Models for {\\textbackslash}underline\\{D\\}aily {\\textbackslash}underline\\{A\\}ctivities of {\\textbackslash}underline\\{L\\}iving},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=Kg0hQIG9Uh}\n}", "github": "", "project": "", "reviewers": "56PD;SD4k;Edmn", "site": "https://openreview.net/forum?id=Kg0hQIG9Uh", "pdf_size": 4770870, "rating": "4;6;7", "confidence": "3;3;3", "wc_summary_and_contributions": "34;132;62", "wc_strengths": "27;116;63", "wc_improvement": "126;103;5", "wc_limitations": "3;4;14", "wc_correctness": "1;41;8", "wc_clarity": "11;6;5", "wc_relation_to_prior_work": "12;3;5", "wc_documentation": "7;18;4", "wc_additional_feedback": "1;1;1", "wc_review": "222;424;167", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "44;0;0", "reply_reviewers": "0;0;0", "reply_authors": "2;1;1", "rating_avg": [ 5.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.0, 0.0 ], "wc_summary_and_contributions_avg": [ 76.0, 41.21488404286329 ], "wc_strengths_avg": [ 68.66666666666667, 36.55437350334734 ], "wc_improvement_avg": [ 78.0, 52.46586191674227 ], "wc_limitations_avg": [ 7.0, 4.96655480858378 ], "wc_correctness_avg": [ 16.666666666666668, 17.441967269268172 ], "wc_clarity_avg": [ 7.333333333333333, 2.6246692913372702 ], "wc_relation_to_prior_work_avg": [ 6.666666666666667, 3.858612300930075 ], "wc_documentation_avg": [ 9.666666666666666, 6.018490028422597 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 271.0, 110.49283536350521 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 14.666666666666666, 20.741798914805393 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:bNI6ieauJZ0J:scholar.google.com/&scioq=LLAVIDAL:+Benchmarking+%5Cunderline%7BL%7Darge+%5Cunderline%7BLA%7Dnguage+%5Cunderline%7BVI%7Dsion+Models+for+%5Cunderline%7BD%7Daily+%5Cunderline%7BA%7Dctivities+of+%5Cunderline%7BL%7Diving&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;1;0;0", "aff_unique_norm": "University of North Carolina at Charlotte;INRIA", "aff_unique_dep": ";", "aff_unique_url": "https://www.uncc.edu;https://www.inria.fr", "aff_unique_abbr": "UNCC;INRIA", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Charlotte;", "aff_country_unique_index": "0;0;0;0;1;0;0", "aff_country_unique": "United States;France" }, { "id": "KgGhxmQFFy", "title": "UniMoT: Unified Molecule-Text Language Model with Discrete Token Representation", "track": "main", "status": "Reject", "tldr": "", "abstract": "The remarkable success of Large Language Models (LLMs) across diverse tasks has driven the research community to extend their capabilities to molecular applications, leading to the development of molecular LLMs. However, most molecular LLMs employ adapter-based architectures that do not treat molecule and text modalities equally and lack a supervision signal for the molecule modality. To address these issues, we introduce UniMoT, a unified molecule-text LLM adopting a tokenizer-based architecture that expands the vocabulary of LLM with molecule tokens.\nSpecifically, we introduce a Vector Quantization-driven tokenizer that incorporates a Q-Former to bridge the modality gap between molecule and text. This tokenizer transforms molecules into sequences of molecule tokens with causal dependency, encapsulating high-level molecular and textual information. \nEquipped with this tokenizer, UniMoT can unify molecule and text modalities under a shared token representation and an autoregressive training paradigm, enabling it to interpret molecules as a foreign language and generate them as text.\nFollowing a four-stage training scheme, UniMoT emerges as a multi-modal generalist capable of performing both molecule-to-text and text-to-molecule tasks. Extensive experiments demonstrate that UniMoT achieves state-of-the-art performance across a wide range of molecule comprehension and generation tasks.", "keywords": "Large Language Models;Tokenization;Molecule Generation;Molecule Comprehension;Multi-modal Learning", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Juzheng Zhang;Yatao Bian;Yongqiang Chen;Quanming Yao", "authorids": "~Juzheng_Zhang2;~Yatao_Bian1;~Yongqiang_Chen1;~Quanming_Yao3", "gender": "M;;M;M", "homepage": "https://juzhengz.github.io/;https://lfhase.win;https://lars-group.github.io/;https://yataobian.com", "dblp": "133/2742;76/5774-2;158/1014;222/2694", "google_scholar": "d8lJm7MAAAAJ;huQ_Ig8AAAAJ;https://scholar.google.com/schhp?hl=en;oZBTlBkAAAAJ", "orcid": ";;;0000-0002-2368-4084", "linkedin": ";;;", "or_profile": "~Juzheng_Zhang2;~Yongqiang_Chen1;~quanming_yao1;~An_Bian1", "aff": "University of Maryland, College Park;Department of Computer Science and Engineering, The Chinese University of Hong Kong;Department of Electronic Engineering;Tencent AI Lab", "aff_domain": "umd.edu;cse.cuhk.edu.hk;tsinghua.edu.cn;tencent.com", "position": "PhD student;PhD student;Assistant Professor;Senior researcher ", "bibtex": "@misc{\nanonymous2024unimot,\ntitle={UniMoT: Unified Molecule-Text Language Model with Discrete Token Representation},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=KgGhxmQFFy}\n}", "github": "", "project": "", "reviewers": "pDSz;d9hA;2Dfq;dTUx", "site": "https://openreview.net/forum?id=KgGhxmQFFy", "pdf_size": 712489, "rating": "2;4;5;6", "confidence": "5;4;4;2", "soundness": "2;2;2;3", "novelty": "1;2;2;3", "presentation": "3;2;3;3", "wc_summary": "83;42;82;16", "wc_strengths": "25;24;49;11", "wc_weaknesses": "636;101;314;67", "wc_questions": "100;67;102;9", "wc_limitations": "15;1;7;1", "wc_review": "859;235;554;104", "wc_reply_reviewers": "431;0;18;0", "wc_reply_authors": "848;0;48;0", "reply_reviewers": "2;0;1;0", "reply_authors": "3;1;2;1", "rating_avg": [ 4.25, 1.479019945774904 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 55.75, 28.287585616308792 ], "wc_strengths_avg": [ 27.25, 13.718144918318949 ], "wc_weaknesses_avg": [ 279.5, 226.55076693756743 ], "wc_questions_avg": [ 69.5, 37.59321747336878 ], "wc_limitations_avg": [ 6.0, 5.744562646538029 ], "wc_review_avg": [ 438.0, 293.0281556437879 ], "wc_reply_reviewers_avg": [ 112.25, 184.1770547598153 ], "wc_reply_authors_avg": [ 224.0, 360.7991130809498 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8919017444789036, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12231748412498409946&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "University of Maryland;Chinese University of Hong Kong;Institution Name Not Provided;Tencent", "aff_unique_dep": ";Department of Computer Science and Engineering;Department of Electronic Engineering;Tencent AI Lab", "aff_unique_url": "https://www/umd.edu;https://www.cuhk.edu.hk;;https://ai.tencent.com", "aff_unique_abbr": "UMD;CUHK;;Tencent AI Lab", "aff_campus_unique_index": "0;1", "aff_campus_unique": "College Park;Hong Kong SAR;", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United States;China;" }, { "title": "Towards Visual Text Design Transfer Across Languages", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97750", "id": "KgeQqLI7OD", "proceeding": "", "pdf": "https://openreview.net/pdf?id=KgeQqLI7OD", "openreview": "https://openreview.net/forum?id=KgeQqLI7OD", "poster": "/media/PosterPDFs/NeurIPS%202024/97750.png?t=1731681171.7832596", "project": "", "author_site": "Yejin Choi, Jiwan Chung, Sumin Shim, Giyeong Oh, Youngjae Yu", "tldr": "", "abstract": "Visual text design plays a critical role in conveying themes, emotions, and atmospheres in multimodal formats such as film posters and album covers. Translating these visual and textual elements across languages extends the concept of translation beyond mere text, requiring the adaptation of aesthetic and stylistic features. To address this, we introduce a novel task of Multimodal Style Translation (MuST-Bench), a benchmark designed to evaluate the ability of visual text generation models to perform translation across different writing systems while preserving design intent.\nOur initial experiments on MuST-Bench reveal that existing visual text generation models struggle with the proposed task due to the inadequacy of textual descriptions in conveying visual design.\nIn response, we introduce SIGIL, a framework for multimodal style translation that eliminates the need for style descriptions.\nSIGIL enhances image generation models through three innovations: glyph latent for multilingual settings, pre-trained VAEs for stable style guidance, and an OCR model with reinforcement learning feedback for optimizing readable character generation. SIGIL outperforms existing baselines by achieving superior style consistency and legibility while maintaining visual fidelity, setting itself apart from traditional description-based approaches. We release MuST-Bench publicly for broader use and exploration https://huggingface.co/datasets/yejinc/MuST-Bench.", "keywords": "Creative AI;Multilinguality;Multimodality;Image Generation", "primary_area": "", "supplementary_material": "/attachment/c2552086f1f2b9fcdba521c868096b1ef28d12bc.zip", "author": "Yejinchoi;Jiwan Chung;Sumin Shim;Giyeong Oh;Youngjae Yu", "authorids": "~Yejinchoi1;~Jiwan_Chung1;~Sumin_Shim1;~Giyeong_Oh1;~Youngjae_Yu1", "gender": "F;M;F;M;M", "homepage": "https://mirlab.yonsei.ac.kr/people/yejin_choi.html;https://jiwanchung.github.io/;https://github.com/use08174;https://github.com/BootsofLagrangian;https://yj-yu.github.io/home/", "dblp": ";277/2798;;;188/6210", "google_scholar": "https://scholar.google.com/citations?hl=ko;https://scholar.google.co.kr/citations?user=l4UBOZAAAAAJ;;LzzcroEAAAAJ;https://scholar.google.co.kr/citations?user=WDO24ZYAAAAJ", "orcid": ";;;;", "linkedin": ";chung-jiwan-81231b245/;;;", "or_profile": "~Yejinchoi1;~Jiwan_Chung1;~Sumin_Shim1;~Giyeong_Oh1;~Youngjae_Yu1", "aff": "Yonsei University;Yonsei University;Yonsei University;Yonsei University;Yonsei University", "aff_domain": "yonsei.ac.kr;yonsei.ac.kr;yonsei.ac.kr;yonsei.ac.kr;yonsei.ac.kr", "position": "MS student;PhD student;Undergrad student;Intern;Assistant Professor", "bibtex": "@inproceedings{\nyejinchoi2024towards,\ntitle={Towards Visual Text Design Transfer Across Languages},\nauthor={Yejinchoi and Jiwan Chung and Sumin Shim and Giyeong Oh and Youngjae Yu},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=KgeQqLI7OD}\n}", "github": "", "reviewers": "Rme8;5qTD;5Mds", "pdf_size": 17764607, "rating": "6;7;7", "confidence": "3;3;3", "wc_summary_and_contributions": "62;72;52", "wc_strengths": "72;5;25", "wc_improvement": "266;14;21", "wc_limitations": "7;16;13", "wc_correctness": "30;8;1", "wc_clarity": "25;5;1", "wc_relation_to_prior_work": "3;7;1", "wc_documentation": "3;18;1", "wc_additional_feedback": "1;1;1", "wc_review": "469;146;116", "wc_reply_reviewers": "20;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;0;0", "reply_authors": "2;1;1", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.0 ], "wc_summary_and_contributions_avg": [ 62.0, 8.16496580927726 ], "wc_strengths_avg": [ 34.0, 28.083209693100727 ], "wc_improvement_avg": [ 100.33333333333333, 117.17887560857072 ], "wc_limitations_avg": [ 12.0, 3.7416573867739413 ], "wc_correctness_avg": [ 13.0, 12.355835328567093 ], "wc_clarity_avg": [ 10.333333333333334, 10.498677165349081 ], "wc_relation_to_prior_work_avg": [ 3.6666666666666665, 2.494438257849294 ], "wc_documentation_avg": [ 7.333333333333333, 7.586537784494029 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 243.66666666666666, 159.80474196830193 ], "wc_reply_reviewers_avg": [ 6.666666666666667, 9.428090415820632 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11492213295485564453&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "yonsei.ac.kr;yonsei.ac.kr;yonsei.ac.kr;yonsei.ac.kr;yonsei.ac.kr", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Yonsei University", "aff_unique_dep": "", "aff_unique_url": "https://www.yonsei.ac.kr", "aff_unique_abbr": "Yonsei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "EffiLearner: Enhancing Efficiency of Generated Code via Self-Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95648", "id": "KhwOuB0fs9", "proceeding": "", "pdf": "https://openreview.net/pdf?id=KhwOuB0fs9", "openreview": "https://openreview.net/forum?id=KhwOuB0fs9", "poster": "/media/PosterPDFs/NeurIPS%202024/95648.png?t=1731517718.8312752", "project": "", "author_site": "Dong HUANG, Jianbo Dai, Han Weng, Puzhen Wu, Yuhao QING, Heming Cui, Zhijiang Guo, Jie Zhang", "tldr": "", "abstract": "Large language models (LLMs) have shown remarkable progress in code generation, but their generated code often suffers from inefficiency, resulting in longer execution times and higher memory consumption. To address this issue, we propose EffiLearner, a self-optimization framework that utilizes execution overhead profiles to improve the efficiency of LLM-generated code. EffiLearner first generates code using an LLM, then executes it locally to capture execution time and memory usage profiles. These profiles are fed back to the LLM, which then revises the code to reduce overhead. To evaluate the effectiveness of EffiLearner, we conduct extensive experiments on EffiBench and two commonly used code generation benchmarks with 16 open-source and 6 closed-source models. Our evaluation results demonstrate that through iterative self-optimization, EffiLearner significantly enhances the efficiency of LLM-generated code. For example, the execution time (ET) of StarCoder2-15B for the EffiBench decreases from 0.93 (s) to 0.12 (s) which reduces 87.1\\% execution time requirement compared with the initial code. The total memory usage (TMU) of StarCoder2-15B also decreases from 22.02 (Mb*s) to 2.03 (Mb*s), which decreases 90.8\\% total memory consumption during the execution process.", "keywords": "Code Generation;Efficiency", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Dong HUANG;Jianbo Dai;Han Weng;Puzhen Wu;Yuhao QING;Heming Cui;Zhijiang Guo;Jie Zhang", "authorids": "~Dong_HUANG4;~Jianbo_Dai1;~Han_Weng2;~Puzhen_Wu1;~Yuhao_QING1;~Heming_Cui1;~Zhijiang_Guo2;~Jie_Zhang21", "gender": "M;M;M;;M;M;F;M", "homepage": "https://huangd1999.github.io/;https://1e0ndavid.github.io/;https://www.linkedin.com/in/puzhen-wu-751344241/;;https://www.cs.hku.hk/people/academic-staff/heming;https://cartus.github.io/;https://sites.google.com/view/jie-zhang;", "dblp": "94/3756-5.html;178/2976;339/2168;;59/5565.html;43/6147;84/6889-50;349/5252", "google_scholar": "UER9hrAAAAAJ;jU1A5BYAAAAJ;;https://scholar.google.com/citations?view_op=list_works;lW9bpFIAAAAJ;8b-u3icAAAAJ;rPWRqf8AAAAJ;https://scholar.google.com/citations?view_op=list_works", "orcid": ";0009-0005-3183-888X;0000-0003-1510-215X;;0000-0001-7746-440X;;;", "linkedin": ";jianbo-dai-9085a2161/;puzhen-wu-751344241/;;;;jie-zhang-5326aa187/;", "or_profile": "~Dong_HUANG4;~Jianbo_Dai1;~Puzhen_Wu1;~Yuhao_QING1;~Heming_Cui1;~Zhijiang_Guo2;~Jie_Zhang21;~han_weng1", "aff": "The University of Hong Kong;Huawei Technologies Ltd.;University College Dublin;The University of Hong Kong;the University of Hong Kong, University of Hong Kong;University of Cambridge;King's College London, University of London;Beijing University of Posts and Telecommunications", "aff_domain": "cs.hku.hk;huawei.com;ucdconnect.ie;hku.hk;cs.hku.hk;cam.ac.uk;kcl.ac.uk;bupt.edu.cn", "position": "PhD student;Researcher;Undergrad student;PhD student;Associate Professor;Postdoc;Lecturer;MS student", "bibtex": "@inproceedings{\nhuang2024effilearner,\ntitle={EffiLearner: Enhancing Efficiency of Generated Code via Self-Optimization},\nauthor={Dong HUANG and Jianbo Dai and Han Weng and Puzhen Wu and Yuhao QING and Heming Cui and Zhijiang Guo and Jie Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=KhwOuB0fs9}\n}", "github": "", "reviewers": "H3VZ;pc3i;5c1j;VC11", "pdf_size": 647117, "rating": "3;4;6;8", "confidence": "5;5;4;5", "soundness": "2;1;3;3", "novelty": "2;2;3;4", "presentation": "3;4;2;4", "wc_summary": "115;106;104;119", "wc_strengths": "24;42;65;96", "wc_weaknesses": "290;302;186;49", "wc_questions": "37;101;412;106", "wc_limitations": "6;92;75;13", "wc_review": "472;643;842;383", "wc_reply_reviewers": "306;604;128;116", "wc_reply_authors": "384;1824;948;231", "reply_reviewers": "1;2;1;1", "reply_authors": "3;5;3;2", "rating_avg": [ 5.25, 1.920286436967152 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 111.0, 6.2048368229954285 ], "wc_strengths_avg": [ 56.75, 26.920020430898635 ], "wc_weaknesses_avg": [ 206.75, 101.6350702267677 ], "wc_questions_avg": [ 164.0, 145.7446396955991 ], "wc_limitations_avg": [ 46.5, 37.566607512523674 ], "wc_review_avg": [ 585.0, 175.3467992294128 ], "wc_reply_reviewers_avg": [ 288.5, 197.08056728150547 ], "wc_reply_authors_avg": [ 846.75, 624.210451290268 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.25, 1.0897247358851685 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.22549380840084865, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=532846265601662732&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 3, "email": "cs.hku.hk;huawei.com;ucdconnect.ie;hku.hk;cs.hku.hk;cam.ac.uk;kcl.ac.uk;bupt.edu.cn", "author_num": 8, "aff_unique_index": "0;1;2;0;0;3;4;5", "aff_unique_norm": "University of Hong Kong;Huawei;University College Dublin;University of Cambridge;King's College London;Beijing University of Posts and Telecommunications", "aff_unique_dep": ";Huawei Technologies;;;;", "aff_unique_url": "https://www.hku.hk;https://www.huawei.com;https://www.ucd.ie;https://www.cam.ac.uk;https://www.kcl.ac.uk;http://www.bupt.edu.cn/", "aff_unique_abbr": "HKU;Huawei;UCD;Cambridge;KCL;BUPT", "aff_campus_unique_index": "0;0;0;2;3", "aff_campus_unique": "Hong Kong SAR;;Cambridge;Beijing", "aff_country_unique_index": "0;0;1;0;0;2;2;0", "aff_country_unique": "China;Ireland;United Kingdom" }, { "title": "Synatra: Turning Indirect Knowledge into Direct Demonstrations for Digital Agents at Scale", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95647", "id": "KjNEzWRIqn", "proceeding": "", "pdf": "https://openreview.net/pdf?id=KjNEzWRIqn", "openreview": "https://openreview.net/forum?id=KjNEzWRIqn", "poster": "", "project": "", "author_site": "Tianyue Ou, Frank F. Xu, Aman Madaan, Jiarui Liu, Robert Lo, Abishek Sridhar, Sudipta Sengupta, Dan Roth, Graham Neubig, Shuyan Zhou", "tldr": "", "abstract": "LLMs can now act as autonomous agents that interact with digital environments and complete specific objectives (e.g., arranging an online meeting). However, accuracy is still far from satisfactory, partly due to a lack of large-scale, direct demonstrations for digital tasks. Obtaining supervised data from humans is costly, and automatic data collection through exploration or reinforcement learning relies on complex environmental and content setup, resulting in datasets that lack comprehensive coverage of various scenarios. On the other hand, there is abundant knowledge that may indirectly assist task completion, such as online tutorials that were created for human consumption. In this work, we present Synatra, an approach that effectively transforms this indirect knowledge into direct supervision at scale. We define different types of indirect knowledge, and carefully study the available sources to obtain it, methods to encode the structure of direct demonstrations, and finally methods to transform indirect knowledge into direct demonstrations. We use 100k such synthetically-created demonstrations to finetune a 7B CodeLlama, and demonstrate that the resulting agent surpasses all comparably sized models on three web-based task benchmarks Mind2Web, MiniWoB++ and WebArena, as well as surpassing GPT-3.5 on WebArena and Mind2Web. In addition, while synthetic demonstrations prove to be only 3% the cost of human demonstrations (at $0.031 each), we show that the synthetic demonstrations can be more effective than an identical number of human demonstrations collected from limited domains.", "keywords": "AI agents;sythetic data;web navigation", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/bfd52c10447deb73b61e9124f9458b43b51473a6.zip", "author": "Tianyue Ou;Frank F. Xu;Aman Madaan;Jiarui Liu;Robert Lo;Abishek Sridhar;Sudipta Sengupta;Dan Roth;Graham Neubig;Shuyan Zhou", "authorids": "~Tianyue_Ou1;~Frank_F._Xu1;~Aman_Madaan1;~Jiarui_Liu1;~Robert_Lo1;~Abishek_Sridhar1;~Sudipta_Sengupta1;~Dan_Roth3;~Graham_Neubig1;~Shuyan_Zhou1", "gender": ";M;;M;;M;M;M;M;Non-Binary", "homepage": ";https://frankxfz.me/;https://madaan.github.io;https://jiarui-liu.github.io/;https://robertlo.tech;;https://people.csail.mit.edu/sudipta/;https://www.cis.upenn.edu/~danroth/;http://phontron.com;https://shuyanzhou.github.io/", "dblp": ";190/4519;138/1043;134/1248-4;;;88/4889;r/DanRoth;03/8155;", "google_scholar": ";1hXyfIkAAAAJ;jW9ts2cAAAAJ;mSIhZTAAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;h8M0U0oAAAAJ;E-bpPWgAAAAJ;wlosgkoAAAAJ;t6YzEpgAAAAJ", "orcid": ";;;;;;;;;", "linkedin": ";;amnmadaan/;https://linkedin.com/in/jia-rui-liu;robert1003/;abishek-sridhar5/;sudiptasengupta;dan-roth-8667361/;;", "or_profile": "~Tianyue_Ou1;~Frank_F._Xu1;~Aman_Madaan1;~Jiarui_Liu1;~Robert_Lo1;~Abishek_Sridhar1;~Sudipta_Sengupta1;~Dan_Roth3;~Graham_Neubig1;~Shuyan_Zhou1", "aff": ";Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;;Amazon AWS;Amazon;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": ";cmu.edu;cmu.edu;cmu.edu;cmu.edu;;amazon.com;amazon.com;cmu.edu;cs.cmu.edu", "position": ";PhD student;PhD student;MS student;MS student;;Vice President & Distinguished Scientist;VP and Distinguished Scientist;Associate Professor;PhD student", "bibtex": "@inproceedings{\nou2024synatra,\ntitle={Synatra: Turning Indirect Knowledge into Direct Demonstrations for Digital Agents at Scale},\nauthor={Tianyue Ou and Frank F. Xu and Aman Madaan and Jiarui Liu and Robert Lo and Abishek Sridhar and Sudipta Sengupta and Dan Roth and Graham Neubig and Shuyan Zhou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=KjNEzWRIqn}\n}", "github": "", "reviewers": "JodV;UgDQ;Gani;5MC7", "pdf_size": 2685685, "rating": "5;6;6;6", "confidence": "5;4;4;2", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "134;151;94;146", "wc_strengths": "43;41;39;35", "wc_weaknesses": "294;98;127;105", "wc_questions": "8;179;71;27", "wc_limitations": "6;39;9;4", "wc_review": "485;508;340;317", "wc_reply_reviewers": "0;60;144;0", "wc_reply_authors": "0;69;330;0", "reply_reviewers": "0;2;2;0", "reply_authors": "1;2;2;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 131.25, 22.37604746151563 ], "wc_strengths_avg": [ 39.5, 2.958039891549808 ], "wc_weaknesses_avg": [ 156.0, 80.38967595406764 ], "wc_questions_avg": [ 71.25, 66.27358070905781 ], "wc_limitations_avg": [ 14.5, 14.256577429383253 ], "wc_review_avg": [ 412.5, 84.78354793236716 ], "wc_reply_reviewers_avg": [ 51.0, 59.016946718718 ], "wc_reply_authors_avg": [ 99.75, 135.886671531832 ], "reply_reviewers_avg": [ 1.0, 1.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.6622661785325219, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3043881580823710315&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": ";cmu.edu;cmu.edu;cmu.edu;cmu.edu;;amazon.com;amazon.com;cmu.edu;cs.cmu.edu", "author_num": 10, "aff_unique_index": "0;0;0;0;1;1;0;0", "aff_unique_norm": "Carnegie Mellon University;Amazon", "aff_unique_dep": ";Amazon Web Services", "aff_unique_url": "https://www.cmu.edu;https://aws.amazon.com", "aff_unique_abbr": "CMU;AWS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Fast Sampling via Discrete Non-Markov Diffusion Models with Predetermined Transition Time", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95646", "id": "KkYZmepjHn", "proceeding": "", "pdf": "https://openreview.net/pdf?id=KkYZmepjHn", "openreview": "https://openreview.net/forum?id=KkYZmepjHn", "poster": "", "project": "", "author_site": "Zixiang Chen, Huizhuo Yuan, Yongqian Li, Yiwen Kou, Junkai Zhang, Quanquan Gu", "tldr": "", "abstract": "Discrete diffusion models have emerged as powerful tools for high-quality data generation. Despite their success in discrete spaces, such as text generation tasks, the acceleration of discrete diffusion models remains under-explored. In this paper, we propose discrete non-Markov diffusion models (DNDM), which naturally induce the predetermined transition time set. This enables a training-free sampling algorithm that significantly reduces the number of function evaluations (i.e., calls to the neural network), making the sampling process much faster. Furthermore, we study the transition from finite to infinite step sampling, offering new insights into bridging the gap between discrete and continuous-time processes for discrete diffusion models. Extensive experiments on natural language generation and machine translation tasks demonstrate the superior performance of our method in terms of both generation speed and sample quality compared to existing methods for discrete diffusion models. Codes are available at \\url{https://github.com/uclaml/DNDM}.", "keywords": "discrete Diffusion;sampling acceleration;text generation", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Zixiang Chen;Huizhuo Yuan;Yongqian Li;Yiwen Kou;Junkai Zhang;Quanquan Gu", "authorids": "~Zixiang_Chen1;~Huizhuo_Yuan1;~Yongqian_Li4;~Yiwen_Kou1;~Junkai_Zhang2;~Quanquan_Gu1", "gender": "M;;;F;;M", "homepage": "https://sites.google.com/view/zxchen;;https://github.com/YLtrees2;https://evankou.github.io/;;http://web.cs.ucla.edu/~qgu/", "dblp": "137/3624;;;323/9058;;50/4597", "google_scholar": "6nrCHr0AAAAJ;;;https://scholar.google.com/citations?hl=en;;GU9HgNAAAAAJ", "orcid": ";;;;;", "linkedin": ";;yongqian-li-318970192/;yiwen-kou-5a444916b/;;", "or_profile": "~Zixiang_Chen1;~Huizhuo_Yuan1;~Yongqian_Li4;~Yiwen_Kou1;~Junkai_Zhang2;~Quanquan_Gu1", "aff": " University of California, Los Angeles;;University of California, Los Angeles;University of California, Los Angeles;;University of California, Los Angeles", "aff_domain": "cs.ucla.edu;;ucla.edu;ucla.edu;;cs.ucla.edu", "position": "PhD student;;MS student;PhD student;;Associate Professor", "bibtex": "@inproceedings{\nchen2024fast,\ntitle={Fast Sampling via Discrete Non-Markov Diffusion Models with Predetermined Transition Time},\nauthor={Zixiang Chen and Huizhuo Yuan and Yongqian Li and Yiwen Kou and Junkai Zhang and Quanquan Gu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=KkYZmepjHn}\n}", "github": "", "reviewers": "JGuY;Uw1Q;em1Z;oHup", "pdf_size": 639878, "rating": "4;5;6;7", "confidence": "4;3;4;4", "soundness": "2;3;3;3", "novelty": "1;3;2;3", "presentation": "3;2;3;3", "wc_summary": "53;89;161;125", "wc_strengths": "42;77;143;219", "wc_weaknesses": "462;136;161;367", "wc_questions": "78;8;45;134", "wc_limitations": "1;1;1;9", "wc_review": "636;311;511;854", "wc_reply_reviewers": "639;119;91;40", "wc_reply_authors": "1863;649;21;17", "reply_reviewers": "3;1;1;1", "reply_authors": "6;3;2;2", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 107.0, 40.24922359499622 ], "wc_strengths_avg": [ 120.25, 67.56987124451253 ], "wc_weaknesses_avg": [ 281.5, 137.459994180125 ], "wc_questions_avg": [ 66.25, 46.29457311607917 ], "wc_limitations_avg": [ 3.0, 3.4641016151377544 ], "wc_review_avg": [ 578.0, 197.05202358768102 ], "wc_reply_reviewers_avg": [ 222.25, 242.2719288320461 ], "wc_reply_authors_avg": [ 637.5, 752.8404545453174 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 3.25, 1.6393596310755 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.2581988897471611, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13609203218037642665&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "cs.ucla.edu;;ucla.edu;ucla.edu;;cs.ucla.edu", "author_num": 6, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of California, Los Angeles", "aff_unique_dep": "", "aff_unique_url": "https://www.ucla.edu", "aff_unique_abbr": "UCLA", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "BackTime: Backdoor Attacks on Multivariate Time Series Forecasting", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95645", "id": "Kl13lipxTW", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Kl13lipxTW", "openreview": "https://openreview.net/forum?id=Kl13lipxTW", "poster": "/media/PosterPDFs/NeurIPS%202024/95645.png?t=1730078277.078816", "project": "", "author_site": "Xiao Lin, Zhining Liu, Dongqi Fu, Ruizhong Qiu, Hanghang Tong", "tldr": "", "abstract": "Multivariate Time Series (MTS) forecasting is a fundamental task with numerous real-world applications, such as transportation, climate, and epidemiology. While a myriad of powerful deep learning models have been developed for this task, few works have explored the robustness of MTS forecasting models to malicious attacks, which is crucial for their trustworthy employment in high-stake scenarios. To address this gap, we dive deep into the backdoor attacks on MTS forecasting models and propose an effective attack method named BackTime. By subtly injecting a few \\textit{stealthy triggers} into the MTS data, BackTime can alter the predictions of the forecasting model according to the attacker's intent. Specifically, BackTime first identifies vulnerable timestamps in the data for poisoning, and then adaptively synthesizes stealthy and effective triggers by solving a bi-level optimization problem with a GNN-based trigger generator. Extensive experiments across multiple datasets and state-of-the-art MTS forecasting models demonstrate the effectiveness, versatility, and stealthiness of BackTime attacks.", "keywords": "backdoor attack; multivariate time series forecasting", "primary_area": "other", "supplementary_material": "", "author": "Xiao Lin;Zhining Liu;Dongqi Fu;Ruizhong Qiu;Hanghang Tong", "authorids": "~Xiao_Lin8;~Zhining_Liu1;~Dongqi_Fu1;~Ruizhong_Qiu1;~Hanghang_Tong3", "gender": "M;M;M;M;", "homepage": "https://xiaolin-cs.github.io/;https://zhiningliu.com/;https://dongqifu.github.io/;https://q-rz.github.io/;http://tonghanghang.org", "dblp": ";195/4399-2;273/0228;330/9860;58/1757", "google_scholar": "fWQkSSwAAAAJ;5WORAUQAAAAJ;WByXZAcAAAAJ;REKarmcAAAAJ;RaINcuUAAAAJ", "orcid": ";0000-0003-1828-2109;0000-0002-8726-9234;0009-0000-3253-8890;0000-0003-4405-3887", "linkedin": ";zhiningliu/;;ruizhong-qiu/;htong/", "or_profile": "~Xiao_Lin8;~Zhining_Liu1;~Dongqi_Fu1;~Ruizhong_Qiu1;~Hanghang_Tong3", "aff": "University of Illinois Urbana-Champaign;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;University of Illinois Urbana-Champaign;University of Illinois, Urbana Champaign", "aff_domain": "illinois.edu;illinois.edu;illinois.edu;illinois.edu;illinois.edu", "position": "PhD student;PhD student;PhD student;MS student;Associate Professor", "bibtex": "@inproceedings{\nlin2024backtime,\ntitle={BackTime: Backdoor Attacks on Multivariate Time Series Forecasting},\nauthor={Xiao Lin and Zhining Liu and Dongqi Fu and Ruizhong Qiu and Hanghang Tong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Kl13lipxTW}\n}", "github": "", "reviewers": "mt9q;Bpy4;PmgC", "pdf_size": 2425427, "rating": "6;7;8", "confidence": "2;2;4", "soundness": "2;3;4", "novelty": "3;3;4", "presentation": "4;3;3", "wc_summary": "69;90;33", "wc_strengths": "118;88;41", "wc_weaknesses": "42;75;60", "wc_questions": "135;24;60", "wc_limitations": "27;23;32", "wc_review": "391;300;226", "wc_reply_reviewers": "149;63;22", "wc_reply_authors": "442;67;80", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 7.0, 0.816496580927726 ], "confidence_avg": [ 2.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 64.0, 23.53720459187964 ], "wc_strengths_avg": [ 82.33333333333333, 31.689465477067017 ], "wc_weaknesses_avg": [ 59.0, 13.490737563232042 ], "wc_questions_avg": [ 73.0, 46.238512086787566 ], "wc_limitations_avg": [ 27.333333333333332, 3.6817870057290873 ], "wc_review_avg": [ 305.6666666666667, 67.4800382006083 ], "wc_reply_reviewers_avg": [ 78.0, 52.92132525425517 ], "wc_reply_authors_avg": [ 196.33333333333334, 173.79361962460595 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.8660254037844387, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12748468822660742025&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "illinois.edu;illinois.edu;illinois.edu;illinois.edu;illinois.edu", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of Illinois Urbana-Champaign", "aff_unique_dep": "", "aff_unique_url": "https://illinois.edu", "aff_unique_abbr": "UIUC", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Urbana-Champaign", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "TACT: Advancing Complex Aggregative Reasoning with Information Extraction Tools", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97749", "id": "Km2XEjH0I5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Km2XEjH0I5", "openreview": "https://openreview.net/forum?id=Km2XEjH0I5", "poster": "/media/PosterPDFs/NeurIPS%202024/97749.png?t=1728545689.353594", "project": "", "author_site": "Avi Caciularu, Alon Jacovi, Eyal Ben-David, Sasha Goldshtein, Tal Schuster, Jonathan Herzig, Gal Elidan, Amir Globerson", "tldr": "", "abstract": "Large Language Models (LLMs) often do not perform well on queries that require the aggregation of information across texts. To better evaluate this setting and facilitate modeling efforts, we introduce TACT - Text And Calculations through Tables, a dataset crafted to evaluate LLMs' reasoning and computational abilities using complex instructions. TACT contains challenging instructions that demand stitching information scattered across one or more texts, and performing complex integration on this information to generate the answer. We construct this dataset by leveraging an existing dataset of texts and their associated tables. For each such tables, we formulate new queries, and gather their respective answers. We demonstrate that all contemporary LLMs perform poorly on this dataset, achieving an accuracy below 38%. To pinpoint the difficulties and thoroughly dissect the problem, we analyze model performance across three components: table-generation, Pandas command-generation, and execution. Unexpectedly, we discover that each component presents substantial challenges for current LLMs. These insights lead us to propose a focused modeling framework, which we refer to as _IE as a tool_. Specifically, we propose to add \"tools\" for each of the above steps, and implement each such tool with few-shot prompting. This approach shows an improvement over existing prompting techniques, offering a promising direction for enhancing model capabilities in these tasks.", "keywords": "Complex Reasoning;Information Seeking;Large Language Models;Tool-Assisted Models;Tablar Data", "primary_area": "", "supplementary_material": "", "author": "Avi Caciularu;Alon Jacovi;Eyal Ben-David;Sasha Goldshtein;Tal Schuster;Jonathan Herzig;Gal Elidan;Amir Globerson", "authorids": "~Avi_Caciularu1;~Alon_Jacovi1;~Eyal_Ben-David1;~Sasha_Goldshtein1;~Tal_Schuster1;~Jonathan_Herzig2;~Gal_Elidan2;~Amir_Globerson1", "gender": "M;M;M;M;Not Specified;M;M;M", "homepage": "http://aviclu.github.io/;https://alonjacovi.github.io/;https://eyalbd2.github.io/;;https://people.csail.mit.edu/tals/;https://jonathanherzig.github.io/;;http://www.cs.tau.ac.il/~gamir/", "dblp": "https://dblp.uni-trier.de/pid/207/8509;218/5900;234/9089;;190/7491;133/3687.html;54/2055;08/4162.html", "google_scholar": "https://scholar.google.co.il/citations?user=fPG_0aQAAAAJ;cX9TtloAAAAJ;ArqbkI4AAAAJ;;oo8QRmIAAAAJ;https://scholar.google.co.il/citations?view_op=list_works;;https://scholar.google.com.tw/citations?user=5JserkUAAAAJ", "orcid": ";;;;;;;", "linkedin": "avicaciularu/;;eyal-bd/;https://il.linkedin.com/in/sashag;;;;", "or_profile": "~Avi_Caciularu1;~Alon_Jacovi1;~Eyal_Ben-David1;~Sasha_Goldshtein1;~Tal_Schuster1;~Jonathan_Herzig2;~Gal_Elidan2;~Amir_Globerson1", "aff": "Google;Google;Technion - Israel Institute of Technology, Technion;Google;Google;Research, Google;Hebrew University of Jerusalem, Technion;Tel Aviv University", "aff_domain": "google.com;google.com;technion.ac.il;google.com;google.com;research.google.com;huji.ac.il;tau.ac.il", "position": "Researcher;Researcher;PhD student;Researcher;Researcher;Researcher;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\ncaciularu2024tact,\ntitle={{TACT}: Advancing Complex Aggregative Reasoning with Information Extraction Tools},\nauthor={Avi Caciularu and Alon Jacovi and Eyal Ben-David and Sasha Goldshtein and Tal Schuster and Jonathan Herzig and Gal Elidan and Amir Globerson},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=Km2XEjH0I5}\n}", "github": "", "reviewers": "THwC;MjrW;67VL", "pdf_size": 805185, "rating": "7;7;7", "confidence": "4;3;3", "wc_summary_and_contributions": "86;66;146", "wc_strengths": "23;2;130", "wc_improvement": "16;2;1", "wc_limitations": "17;1;1", "wc_correctness": "16;1;1", "wc_clarity": "32;1;1", "wc_relation_to_prior_work": "1;1;1", "wc_documentation": "8;1;1", "wc_additional_feedback": "1;1;1", "wc_review": "200;76;283", "wc_reply_reviewers": "9;0;5", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;0;1", "reply_authors": "1;1;1", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 99.33333333333333, 33.9934634239519 ], "wc_strengths_avg": [ 51.666666666666664, 56.04958122551458 ], "wc_improvement_avg": [ 6.333333333333333, 6.847546194724712 ], "wc_limitations_avg": [ 6.333333333333333, 7.542472332656507 ], "wc_correctness_avg": [ 6.0, 7.0710678118654755 ], "wc_clarity_avg": [ 11.333333333333334, 14.613540144521982 ], "wc_relation_to_prior_work_avg": [ 1.0, 0.0 ], "wc_documentation_avg": [ 3.3333333333333335, 3.2998316455372216 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 186.33333333333334, 85.05815004388991 ], "wc_reply_reviewers_avg": [ 4.666666666666667, 3.6817870057290873 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14599893465368707371&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "google.com;google.com;technion.ac.il;google.com;google.com;research.google.com;huji.ac.il;tau.ac.il", "author_num": 8, "aff_unique_index": "0;0;1;0;0;0;2;3", "aff_unique_norm": "Google;Technion - Israel Institute of Technology;Hebrew University of Jerusalem;Tel Aviv University", "aff_unique_dep": "Google;;;", "aff_unique_url": "https://www.google.com;https://www.technion.ac.il;https://www.huji.ac.il;https://www.tau.ac.il", "aff_unique_abbr": "Google;Technion;HUJI;TAU", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;1;0;0;0;1;1", "aff_country_unique": "United States;Israel" }, { "title": "E.T. Bench: Towards Open-Ended Event-Level Video-Language Understanding", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97748", "id": "KoSSEp6Du5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=KoSSEp6Du5", "openreview": "https://openreview.net/forum?id=KoSSEp6Du5", "poster": "", "project": "", "author_site": "Ye Liu, Zongyang Ma, Zhongang Qi, Yang Wu, Ying Shan, Chang Chen", "tldr": "", "abstract": "Recent advances in Video Large Language Models (Video-LLMs) have demonstrated their great potential in general-purpose video understanding. To verify the significance of these models, a number of benchmarks have been proposed to diagnose their capabilities in different scenarios. However, existing benchmarks merely evaluate models through video-level question-answering, lacking fine-grained event-level assessment and task diversity. To fill this gap, we introduce E.T. Bench (Event-Level & Time-Sensitive Video Understanding Benchmark), a large-scale and high-quality benchmark for open-ended event-level video understanding. Categorized within a 3-level task taxonomy, E.T. Bench encompasses 7.3K samples under 12 tasks with 7K videos (251.4h total length) under 8 domains, providing comprehensive evaluations. We extensively evaluated 8 Image-LLMs and 12 Video-LLMs on our benchmark, and the results reveal that state-of-the-art models for coarse-level (video-level) understanding struggle to solve our fine-grained tasks, e.g., grounding event-of-interests within videos, largely due to the short video context length, improper time representations, and lack of multi-event training data. Focusing on these issues, we further propose a strong baseline model, E.T. Chat, together with an instruction-tuning dataset E.T. Instruct 164K tailored for fine-grained event-level understanding. Our simple but effective solution demonstrates superior performance in multiple scenarios.", "keywords": "Event-Level Video Understanding;Large Language Models", "primary_area": "", "supplementary_material": "", "author": "Ye Liu;Zongyang Ma;Zhongang Qi;Yang Wu;Ying Shan;Chang Wen Chen", "authorids": "~Ye_Liu8;~Zongyang_Ma3;~Zhongang_Qi1;~Yang_Wu1;~Ying_Shan2;~Chang_Wen_Chen1", "gender": "M;M;M;M;M;M", "homepage": "https://yeliu.dev/;https://scholar.google.com/citations?user=qtdueToAAAAJ&hl=zh-CN&authuser=1;;;;https://chenlab.comp.polyu.edu.hk/", "dblp": "96/2615;117/4275;08/9990;56/1428-1;68/5910;29/4638", "google_scholar": "2eiaBcsAAAAJ;;zJvrrusAAAAJ;https://scholar.google.com.hk/citations?user=vwOQ-UIAAAAJ;4oXBp9UAAAAJ;w2HXPUUAAAAJ", "orcid": "0000-0001-9597-0525;;;;0000-0001-7673-8325;0000-0002-6720-234X", "linkedin": ";;;;YingShanProfile/;chang-wen-chen-7b72095/", "or_profile": "~Ye_Liu8;~Zongyang_Ma3;~Zhongang_Qi1;~Yang_Wu1;~Ying_Shan2;~Chang_Wen_Chen1", "aff": "Harvard University;Institute of Automation, Chinese Academy of Sciences;Tencent PCG ARC Lab;Tencent AI Lab;Tencent PCG ARC Lab;Hong Kong Polytechnic University", "aff_domain": "seas.harvard.edu;ia.ac.cn;tencent.com;tencent.com;arc.tencent.com;polyu.edu.hk", "position": "Researcher;PhD student;Researcher;Principal Researcher;Director;Full Professor", "bibtex": "@inproceedings{\nliu2024et,\ntitle={E.T. Bench: Towards Open-Ended Event-Level Video-Language Understanding},\nauthor={Ye Liu and Zongyang Ma and Zhongang Qi and Yang Wu and Ying Shan and Chang Wen Chen},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=KoSSEp6Du5}\n}", "github": "", "reviewers": "ct4u;T7B9;yb1y;77Mr", "pdf_size": 6466287, "rating": "6;6;8;8", "confidence": "4;4;3;5", "wc_summary_and_contributions": "55;139;68;50", "wc_strengths": "21;110;4;70", "wc_improvement": "41;32;4;66", "wc_limitations": "7;31;22;1", "wc_correctness": "1;1;1;4", "wc_clarity": "12;1;6;9", "wc_relation_to_prior_work": "1;1;7;31", "wc_documentation": "2;1;25;14", "wc_additional_feedback": "1;1;1;1", "wc_review": "141;317;138;246", "wc_reply_reviewers": "0;0;10;9", "wc_reply_authors": "70;70;35;17", "reply_reviewers": "0;0;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 7.0, 1.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 78.0, 35.82596823534571 ], "wc_strengths_avg": [ 51.25, 41.68557904119841 ], "wc_improvement_avg": [ 35.75, 22.16275028059469 ], "wc_limitations_avg": [ 15.25, 11.882234638316145 ], "wc_correctness_avg": [ 1.75, 1.299038105676658 ], "wc_clarity_avg": [ 7.0, 4.06201920231798 ], "wc_relation_to_prior_work_avg": [ 10.0, 12.36931687685298 ], "wc_documentation_avg": [ 10.5, 9.810708435174291 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 210.5, 75.31434126379915 ], "wc_reply_reviewers_avg": [ 4.75, 4.763139720814412 ], "wc_reply_authors_avg": [ 48.0, 22.901964981197573 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7475457804325890752&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "seas.harvard.edu;ia.ac.cn;tencent.com;tencent.com;arc.tencent.com;polyu.edu.hk", "author_num": 6, "aff_unique_index": "0;1;2;2;2;3", "aff_unique_norm": "Harvard University;Chinese Academy of Sciences;Tencent;Hong Kong Polytechnic University", "aff_unique_dep": ";Institute of Automation;PCG ARC Lab;", "aff_unique_url": "https://www.harvard.edu;http://www.ia.cas.cn;https://www.tencent.com;https://www.polyu.edu.hk", "aff_unique_abbr": "Harvard;CAS;Tencent;PolyU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;1;1;1;1", "aff_country_unique": "United States;China" }, { "title": "Learning to Price Homogeneous Data", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95644", "id": "KoyTqNs6SZ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=KoyTqNs6SZ", "openreview": "https://openreview.net/forum?id=KoyTqNs6SZ", "poster": "/media/PosterPDFs/NeurIPS%202024/95644.png?t=1731182672.4828913", "project": "", "author_site": "Keran Chen, Joon Suk Huh, Kirthevasan Kandasamy", "tldr": "", "abstract": "We study a data pricing problem, where a seller has access to $N$ homogeneous data points (e.g. drawn i.i.d. from some distribution).\nThere are $m$ types of buyers in the market, where buyers of the same type $i$ have the same valuation curve $v_i:[N]\\rightarrow [0,1]$, where $v_i(n)$ is the value for having $n$ data points.\n*A priori*, the seller is unaware of the\ndistribution of buyers, but can repeat the market for $T$ rounds so as to learn the revenue-optimal pricing curve $p:[N] \\rightarrow [0, 1]$.\nTo solve this online learning problem,\nwe first develop novel discretization schemes to approximate any pricing curve.\nWhen compared to prior work,\nthe size of our discretization schemes scales gracefully with the approximation parameter, which translates to better regret in online learning.\nUnder assumptions like smoothness and diminishing returns which are satisfied by data, the discretization size can be reduced further.\nWe then turn to the online learning problem, \nboth in the stochastic and adversarial settings.\nOn each round, the seller chooses an *anonymous* pricing curve $p_t$.\nA new buyer appears and may choose to purchase some amount of data.\nShe then reveals her type *only if* she makes a purchase.\nOur online algorithms build on classical algorithms such as UCB and FTPL, but require novel ideas to account for the asymmetric nature of this feedback and to deal with the vastness of the space of pricing curves.\nUsing the improved discretization schemes previously developed, we are able to achieve \n$\\widetilde{O}(m\\sqrt{T})$ regret in the stochastic setting and $\\widetilde{\\mathcal{O}}(m^{3/2}\\sqrt{T})$ regret in the adversarial setting.", "keywords": "Learning to price;Revenue maximization;Data marketplaces", "primary_area": "online_learning", "supplementary_material": "", "author": "Keran Chen;Joon Suk Huh;Kirthevasan Kandasamy", "authorids": "~Keran_Chen1;~Joon_Suk_Huh1;~Kirthevasan_Kandasamy1", "gender": "F;;M", "homepage": "https://kchen429.github.io/keranchen.github.io/;;https://people.eecs.berkeley.edu/~kandasamy/research.html", "dblp": ";;128/3628", "google_scholar": ";;kohOJPcAAAAJ", "orcid": ";;", "linkedin": "keran-chen-1b861931a/;;", "or_profile": "~Keran_Chen1;~Joon_Suk_Huh1;~Kirthevasan_Kandasamy1", "aff": "University of Wisconsin - Madison;;Department of Computer Science, University of Wisconsin - Madison", "aff_domain": "wisc.edu;;cs.wisc.edu", "position": "PhD student;;Assistant Professor", "bibtex": "@inproceedings{\nchen2024learning,\ntitle={Learning to Price Homogeneous Data},\nauthor={Keran Chen and Joon Suk Huh and Kirthevasan Kandasamy},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=KoyTqNs6SZ}\n}", "github": "", "reviewers": "3dYq;oJtn;oHSK;kUrw", "pdf_size": 442378, "rating": "5;6;6;7", "confidence": "4;3;2;4", "soundness": "3;3;3;3", "novelty": "2;3;3;2", "presentation": "3;3;2;3", "wc_summary": "158;105;25;300", "wc_strengths": "87;55;17;54", "wc_weaknesses": "156;56;103;183", "wc_questions": "110;114;4;53", "wc_limitations": "26;2;4;6", "wc_review": "537;332;153;596", "wc_reply_reviewers": "0;15;15;33", "wc_reply_authors": "29;49;49;13", "reply_reviewers": "0;1;1;1", "reply_authors": "2;3;3;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 147.0, 100.22225301798 ], "wc_strengths_avg": [ 53.25, 24.78280654001883 ], "wc_weaknesses_avg": [ 124.5, 48.91063279083598 ], "wc_questions_avg": [ 70.25, 45.22374929171618 ], "wc_limitations_avg": [ 9.5, 9.630680142129112 ], "wc_review_avg": [ 404.5, 175.16920391438674 ], "wc_reply_reviewers_avg": [ 15.75, 11.691342951089922 ], "wc_reply_authors_avg": [ 35.0, 15.0996688705415 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10065958406553824929&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "wisc.edu;;cs.wisc.edu", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "University of Wisconsin-Madison", "aff_unique_dep": "", "aff_unique_url": "https://www.wisc.edu", "aff_unique_abbr": "UW-Madison", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Madison", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Privacy Backdoors: Enhancing Membership Inference through Poisoning Pre-trained Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95643", "id": "KppBAWJbry", "proceeding": "", "pdf": "https://openreview.net/pdf?id=KppBAWJbry", "openreview": "https://openreview.net/forum?id=KppBAWJbry", "poster": "/media/PosterPDFs/NeurIPS%202024/95643.png?t=1731716420.2114425", "project": "", "author_site": "Yuxin Wen, Leo Marchyok, Sanghyun Hong, Jonas Geiping, Tom Goldstein, Nicholas Carlini", "tldr": "", "abstract": "It is commonplace to produce application-specific models by fine-tuning large pre-trained models using a small bespoke dataset. The widespread availability of foundation model checkpoints on the web poses considerable risks, including the vulnerability to backdoor attacks. In this paper, we unveil a new vulnerability: the privacy backdoor attack. This black-box privacy attack aims to amplify the privacy leakage that arises when fine-tuning a model: when a victim fine-tunes a backdoored model, their training data will be leaked at a significantly higher rate than if they had fine-tuned a typical model. We conduct extensive experiments on various datasets and models, including both vision-language models (CLIP) and large language models, demonstrating the broad applicability and effectiveness of such an attack. Additionally, we carry out multiple ablation studies with different fine-tuning methods and inference strategies to thoroughly analyze this new threat. Our findings highlight a critical privacy concern within the machine learning community and call for a re-evaluation of safety protocols in the use of open-source pre-trained models.", "keywords": "Privacy;Membership inference attack", "primary_area": "privacy", "supplementary_material": "/attachment/208d65f07f5f7c41e7c3f9a6027bf6ecd291d074.zip", "author": "Yuxin Wen;Leo Marchyok;Sanghyun Hong;Jonas Geiping;Tom Goldstein;Nicholas Carlini", "authorids": "~Yuxin_Wen2;~Leo_Marchyok1;~Sanghyun_Hong1;~Jonas_Geiping1;~Tom_Goldstein1;~Nicholas_Carlini1", "gender": ";M;M;M;M;", "homepage": "https://yuxinwenrick.github.io/;;http://www.sanghyun-hong.com;https://jonasgeiping.github.io/;https://www.cs.umd.edu/~tomg/;http://nicholas.carlini.com", "dblp": ";;135/8991;190/7229;25/8184;145/1806", "google_scholar": "oUYfjg0AAAAJ;;https://scholar.google.com/citations?hl=en;https://scholar.google.de/citations?user=206vNCEAAAAJ;KmSuVtgAAAAJ;", "orcid": ";;;;;", "linkedin": ";leo-marchyok-a8a94b241/;;;;", "or_profile": "~Yuxin_Wen2;~Leo_Marchyok1;~Sanghyun_Hong1;~Jonas_Geiping1;~Tom_Goldstein1;~Nicholas_Carlini1", "aff": "University of Maryland, College Park;Oregon State University;Oregon State University;Max Planck Institute for Intelligent Systems, Max-Planck Institute;University of Maryland, College Park;Google", "aff_domain": "umd.edu;oregonstate.edu;oregonstate.edu;tuebingen.mpg.de;umd.edu;google.com", "position": "PhD student;Undergrad student;Assistant Professor;Principal Researcher;Full Professor;Researcher", "bibtex": "@inproceedings{\nwen2024privacy,\ntitle={Privacy Backdoors: Enhancing Membership Inference through Poisoning Pre-trained Models},\nauthor={Yuxin Wen and Leo Marchyok and Sanghyun Hong and Jonas Geiping and Tom Goldstein and Nicholas Carlini},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=KppBAWJbry}\n}", "github": "", "reviewers": "PrmA;Tc4h;rKHH;h5kd", "pdf_size": 362966, "rating": "4;4;7;7", "confidence": "4;4;4;4", "soundness": "1;2;3;4", "novelty": "2;2;2;4", "presentation": "2;3;3;4", "wc_summary": "132;73;144;64", "wc_strengths": "112;15;64;208", "wc_weaknesses": "1504;300;310;350", "wc_questions": "104;88;339;57", "wc_limitations": "40;1;23;1", "wc_review": "1892;477;880;680", "wc_reply_reviewers": "242;172;52;189", "wc_reply_authors": "32;138;28;122", "reply_reviewers": "1;1;1;2", "reply_authors": "2;2;2;3", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.5, 1.118033988749895 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 103.25, 35.152347005569915 ], "wc_strengths_avg": [ 99.75, 71.2894627557257 ], "wc_weaknesses_avg": [ 616.0, 513.0282643285846 ], "wc_questions_avg": [ 147.0, 112.13161909113771 ], "wc_limitations_avg": [ 16.25, 16.391689967785506 ], "wc_review_avg": [ 982.25, 544.2271469708214 ], "wc_reply_reviewers_avg": [ 163.75, 69.49235569470933 ], "wc_reply_authors_avg": [ 80.0, 50.33885179461288 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15779457384468914776&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "umd.edu;oregonstate.edu;oregonstate.edu;tuebingen.mpg.de;umd.edu;google.com", "author_num": 6, "aff_unique_index": "0;1;1;2;0;3", "aff_unique_norm": "University of Maryland;Oregon State University;Max Planck Institute for Intelligent Systems;Google", "aff_unique_dep": ";;Intelligent Systems;Google", "aff_unique_url": "https://www/umd.edu;https://oregonstate.edu;https://www.mpi-is.mpg.de;https://www.google.com", "aff_unique_abbr": "UMD;OSU;MPI-IS;Google", "aff_campus_unique_index": "0;0;2", "aff_campus_unique": "College Park;;Mountain View", "aff_country_unique_index": "0;0;0;1;0;0", "aff_country_unique": "United States;Germany" }, { "title": "DiMSUM: Diffusion Mamba - A Scalable and Unified Spatial-Frequency Method for Image Generation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95642", "id": "KqbLzSIXkm", "proceeding": "", "pdf": "https://openreview.net/pdf?id=KqbLzSIXkm", "openreview": "https://openreview.net/forum?id=KqbLzSIXkm", "poster": "/media/PosterPDFs/NeurIPS%202024/95642.png?t=1733417694.3688025", "project": "", "author_site": "Hao Phung, Quan Dao, Trung Dao, Viet Hoang Phan, Dimitris Metaxas, Anh Tran", "tldr": "", "abstract": "We introduce a novel state-space architecture for diffusion models, effectively harnessing spatial and frequency information to enhance the inductive bias towards local features in input images for image generation tasks. While state-space networks, including Mamba, a revolutionary advancement in recurrent neural networks, typically scan input sequences from left to right, they face difficulties in designing effective scanning strategies, especially in the processing of image data. Our method demonstrates that integrating wavelet transformation into Mamba enhances the local structure awareness of visual inputs and better captures long-range relations of frequencies by disentangling them into wavelet subbands, representing both low- and high-frequency components. These wavelet-based outputs are then processed and seamlessly fused with the original Mamba outputs through a cross-attention fusion layer, combining both spatial and frequency information to optimize the order awareness of state-space models which is essential for the details and overall quality of image generation. Besides, we introduce a globally-shared transformer to supercharge the performance of Mamba, harnessing its exceptional power to capture global relationships. Through extensive experiments on standard benchmarks, our method demonstrates superior results compared to DiT and DIFFUSSM, achieving faster training convergence and delivering high-quality outputs. The codes and pretrained models are released at https://github.com/VinAIResearch/DiMSUM.git.", "keywords": "Diffusion models;Mamba;Wavelet transformation", "primary_area": "generative_models", "supplementary_material": "/attachment/1281fd9115978629e9ed30829686033a665680cb.zip", "author": "Hao Phung;Quan Dao;Trung Tuan Dao;Hoang Phan;Dimitris N. Metaxas;Anh Tuan Tran", "authorids": "~Hao_Phung1;~Quan_Dao1;~Trung_Tuan_Dao1;~Hoang_Phan1;~Dimitris_N._Metaxas1;~Anh_Tuan_Tran2", "gender": "M;M;M;M;M;M", "homepage": "https://hao-pt.github.io/;https://github.com/quandao10;https://trung-dt.com/;https://sites.google.com/site/anhttranusc/;https://www.cs.rutgers.edu/~dnm/;https://viethoang1512.github.io/", "dblp": "334/7596;334/7610;366/0792;150/5269-1;m/DNMetaxas;295/0299", "google_scholar": "KxSdvGoAAAAJ;g0RS3_kAAAAJ;FZmxEYYAAAAJ;FYZ5ODQAAAAJ;https://scholar.google.com.tw/citations?user=a7VNhCIAAAAJ;", "orcid": "0000-0002-6834-4139;0009-0006-0996-0472;0009-0002-5217-4558;0000-0002-3120-4036;;", "linkedin": ";;trung-dao-6a8625144/;https://linkedin.com/in/anh-tran-97814b19;dimitris-metaxas-1bb74914/;", "or_profile": "~Hao_Phung1;~Quan_Dao1;~Trung_Tuan_Dao1;~Anh_Tuan_Tran2;~Dimitris_Metaxas1;~Hoang_Viet_Phan1", "aff": "VinAI Research;VinAI Research;VinAI Research;VinAI Research;Rutgers University;New York University", "aff_domain": "vinai.io;vinai.io;vinai.io;vinai.io;cs.rutgers.edu;nyu.edu", "position": "Intern;Intern;Researcher;Research Scientist;Full Professor;PhD student", "bibtex": "@inproceedings{\nphung2024dimsum,\ntitle={Di{MSUM}: Diffusion Mamba - A Scalable and Unified Spatial-Frequency Method for Image Generation},\nauthor={Hao Phung and Quan Dao and Trung Tuan Dao and Hoang Phan and Dimitris N. Metaxas and Anh Tuan Tran},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=KqbLzSIXkm}\n}", "github": "", "reviewers": "C5pc;RLU7;eitf;nhSz", "pdf_size": 48002647, "rating": "4;4;5;8", "confidence": "5;4;4;5", "soundness": "3;2;3;4", "novelty": "2;2;3;4", "presentation": "2;3;3;4", "wc_summary": "73;49;63;89", "wc_strengths": "48;37;40;76", "wc_weaknesses": "129;244;111;280", "wc_questions": "44;24;31;2", "wc_limitations": "3;31;16;2", "wc_review": "297;385;261;449", "wc_reply_reviewers": "315;169;16;46", "wc_reply_authors": "1244;844;64;63", "reply_reviewers": "2;1;1;1", "reply_authors": "5;4;3;3", "rating_avg": [ 5.25, 1.6393596310755 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 68.5, 14.585952145814822 ], "wc_strengths_avg": [ 50.25, 15.400892831261439 ], "wc_weaknesses_avg": [ 191.0, 72.41201557752692 ], "wc_questions_avg": [ 25.25, 15.22128443988877 ], "wc_limitations_avg": [ 13.0, 11.76860229593982 ], "wc_review_avg": [ 348.0, 73.72245248226622 ], "wc_reply_reviewers_avg": [ 136.5, 117.92900406600575 ], "wc_reply_authors_avg": [ 553.75, 510.24032327913875 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.75, 0.82915619758885 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.45749571099781405, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13894948032551939471&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "vinai.io;vinai.io;vinai.io;vinai.io;cs.rutgers.edu;nyu.edu", "author_num": 6, "aff_unique_index": "0;0;0;0;1;2", "aff_unique_norm": "VinAI Research;Rutgers University;New York University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.vinai.io/;https://www.rutgers.edu;https://www.nyu.edu", "aff_unique_abbr": "VinAI;Rutgers;NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;1", "aff_country_unique": "Vietnam;United States" }, { "title": "Decentralized Noncooperative Games with Coupled Decision-Dependent Distributions", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95641", "id": "KqgSzXbufw", "proceeding": "", "pdf": "https://openreview.net/pdf?id=KqgSzXbufw", "openreview": "https://openreview.net/forum?id=KqgSzXbufw", "poster": "/media/PosterPDFs/NeurIPS%202024/95641.png?t=1731089524.7738438", "project": "", "author_site": "Wenjing YAN, Xuanyu Cao", "tldr": "", "abstract": "Distribution variations in machine learning, driven by the dynamic nature of deployment environments, significantly impact the performance of learning models. This paper explores endogenous distribution shifts in learning systems, where deployed models influence environments and subsequently alter data distributions. This phenomenon is formulated by a decision-dependent distribution mapping within the recently proposed framework of performative prediction (PP) Perdomo et al. (2020). We investigate the performative effect in a decentralized noncooperative game, where players aim to minimize private cost functions while simultaneously managing coupled inequality constraints. Under performativity, we examine two equilibrium concepts for the studied game: performative stable equilibrium (PSE) and Nash equilibrium (NE), and establish sufficient conditions for their existence and uniqueness. Notably, we provide the first upper bound on the distance between the PSE and NE in the literature, which is challenging to evaluate due to the absence of strong convexity on the joint cost function. Furthermore, we develop a decentralized stochastic primal-dual algorithm for efficiently computing the PSE point. By carefully bounding the performative effect in theoretical analysis, we prove that the proposed algorithm achieves sublinear convergence rates for both performative regrets and constraint violation and maintains the same order of convergence rate as the case without performativity. Numerical experiments validate the effectiveness of our algorithm and theoretical results.", "keywords": "Performative Prediction;Decision-Dependent Distribution;Decentralized Noncooperative Games. Cournot Competition", "primary_area": "algorithmic_game_theory", "supplementary_material": "", "author": "Wenjing Yan;Xuanyu Cao", "authorids": "~Wenjing_Yan2;~Xuanyu_Cao1", "gender": "F;M", "homepage": ";https://labs.wsu.edu/xuanyu/", "dblp": ";117/3366", "google_scholar": "sGUUkfEAAAAJ;jvrZYmAAAAAJ", "orcid": "0000-0002-4790-1397;", "linkedin": ";", "or_profile": "~Wenjing_Yan2;~Xuanyu_Cao1", "aff": "Hong Kong University of Science and Technology;Hong Kong University of Science and Technology", "aff_domain": "hkust.edu;ust.hk", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nyan2024decentralized,\ntitle={Decentralized Noncooperative Games with Coupled Decision-Dependent Distributions},\nauthor={Wenjing Yan and Xuanyu Cao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=KqgSzXbufw}\n}", "github": "", "reviewers": "ReB3;dWNU;pBFf;QNeF", "pdf_size": 11514039, "rating": "4;5;6;6", "confidence": "4;3;2;3", "soundness": "3;3;3;3", "novelty": "2;2;2;3", "presentation": "3;3;2;3", "wc_summary": "66;78;84;105", "wc_strengths": "20;100;64;148", "wc_weaknesses": "332;106;1;137", "wc_questions": "51;52;50;83", "wc_limitations": "17;9;6;7", "wc_review": "486;345;205;480", "wc_reply_reviewers": "127;5;5;13", "wc_reply_authors": "112;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 83.25, 14.13108276106258 ], "wc_strengths_avg": [ 83.0, 47.02127178203499 ], "wc_weaknesses_avg": [ 144.0, 119.6724696828807 ], "wc_questions_avg": [ 59.0, 13.874436925511608 ], "wc_limitations_avg": [ 9.75, 4.322904116447646 ], "wc_review_avg": [ 379.0, 115.19765622615766 ], "wc_reply_reviewers_avg": [ 37.5, 51.7759596724194 ], "wc_reply_authors_avg": [ 28.0, 48.49742261192856 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.8528028654224417, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:4w3ac_HtQt8J:scholar.google.com/&scioq=Decentralized+Noncooperative+Games+with+Coupled+Decision-Dependent+Distributions&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "hkust.edu;ust.hk", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Hong Kong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.ust.hk", "aff_unique_abbr": "HKUST", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "GUIDE: Real-Time Human-Shaped Agents", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95640", "id": "KrHFICMPjm", "proceeding": "", "pdf": "https://openreview.net/pdf?id=KrHFICMPjm", "openreview": "https://openreview.net/forum?id=KrHFICMPjm", "poster": "", "project": "", "author_site": "Lingyu Zhang, Zhengran Ji, Nicholas Waytowich, Boyuan Chen", "tldr": "", "abstract": "The recent rapid advancement of machine learning has been driven by increasingly powerful models with the growing availability of training data and computational resources. However, real-time decision-making tasks with limited time and sparse learning signals remain challenging. One way of improving the learning speed and performance of these agents is to leverage human guidance. In this work, we introduce GUIDE, a framework for real-time human-guided reinforcement learning by enabling continuous human feedback and grounding such feedback into dense rewards to accelerate policy learning. Additionally, our method features a simulated feedback module that learns and replicates human feedback patterns in an online fashion, effectively reducing the need for human input while allowing continual training. We demonstrate the performance of our framework on challenging tasks with sparse rewards and visual observations. Our human study involving 50 subjects offers strong quantitative and qualitative evidence of the effectiveness of our approach. With only 10 minutes of human feedback, our algorithm achieves up to 30\\% increase in success rate compared to its RL baseline.", "keywords": "Human-guided Reinforcement Learning;Real-time decision making", "primary_area": "human-AI_interaction", "supplementary_material": "/attachment/87d1fd82f5f895d1eef5d9f42ab70b8d000b175e.zip", "author": "Lingyu Zhang;Zhengran Ji;Nicholas R Waytowich;Boyuan Chen", "authorids": "~Lingyu_Zhang3;~Zhengran_Ji1;~Nicholas_R_Waytowich1;~Boyuan_Chen1", "gender": "M;M;M;Not Specified", "homepage": ";;http://nicholaswaytowich.com;http://boyuanchen.com/", "dblp": ";;27/9826;193/7174-1", "google_scholar": "https://scholar.google.com/citations?hl=en;Kmij7_cAAAAJ;leelUAgAAAAJ;5DBpY6EAAAAJ", "orcid": ";;;", "linkedin": "lingyuz/;;;boyuan-chen-b30854a0/", "or_profile": "~Lingyu_Zhang3;~Zhengran_Ji1;~Nicholas_R_Waytowich1;~Boyuan_Chen1", "aff": "Duke University;Duke University;DEVCOM Army Research Laboratory;Duke University", "aff_domain": "duke.edu;duke.edu;army.mil;duke.edu", "position": "Research Associate;PhD student;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nzhang2024guide,\ntitle={{GUIDE}: Real-Time Human-Shaped Agents},\nauthor={Lingyu Zhang and Zhengran Ji and Nicholas R Waytowich and Boyuan Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=KrHFICMPjm}\n}", "github": "", "reviewers": "JtyH;fsCr;Xw63;sJ5r", "pdf_size": 5825276, "rating": "3;4;5;6", "confidence": "4;2;3;4", "soundness": "3;2;3;3", "novelty": "2;2;3;2", "presentation": "2;3;2;3", "wc_summary": "50;121;100;90", "wc_strengths": "37;48;89;124", "wc_weaknesses": "481;115;198;114", "wc_questions": "19;245;61;82", "wc_limitations": "3;6;4;46", "wc_review": "590;535;452;456", "wc_reply_reviewers": "216;334;0;30", "wc_reply_authors": "1055;854;75;82", "reply_reviewers": "2;1;0;1", "reply_authors": "4;3;2;3", "rating_avg": [ 4.5, 1.118033988749895 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 90.25, 25.791229129298976 ], "wc_strengths_avg": [ 74.5, 34.528973341239094 ], "wc_weaknesses_avg": [ 227.0, 150.55729806289696 ], "wc_questions_avg": [ 101.75, 85.75947469521954 ], "wc_limitations_avg": [ 14.75, 18.07449860992 ], "wc_review_avg": [ 508.25, 57.6470944627741 ], "wc_reply_reviewers_avg": [ 145.0, 136.94159338929865 ], "wc_reply_authors_avg": [ 516.5, 443.73443634678614 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 3.0, 0.7071067811865476 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.1348399724926484, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1517335566320805946&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "duke.edu;duke.edu;army.mil;duke.edu", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Duke University;United States Army Research Laboratory", "aff_unique_dep": ";Army Research Laboratory", "aff_unique_url": "https://www.duke.edu;https://www.arl.army.mil", "aff_unique_abbr": "Duke;ARL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Proportional Fairness in Clustering: A Social Choice Perspective", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95639", "id": "KsLX5pFpOs", "proceeding": "", "pdf": "https://openreview.net/pdf?id=KsLX5pFpOs", "openreview": "https://openreview.net/forum?id=KsLX5pFpOs", "poster": "/media/PosterPDFs/NeurIPS%202024/95639.png?t=1734018382.9147103", "project": "", "author_site": "Leon Kellerhals, Jannik Peters", "tldr": "", "abstract": "We study the proportional clustering problem of Chen et al. (ICML'19) and relate it to the area of multiwinner voting in computational social choice. We show that any clustering satisfying a weak proportionality notion of Brill and Peters (EC'23) simultaneously obtains the best known approximations to the proportional fairness notion of Chen et al., but also to individual fairness (Jung et al., FORC'20) and the ``core'' (Li et al., ICML'21). In fact, we show that any approximation to proportional fairness is also an approximation to individual fairness and vice versa. Finally, we also study stronger notions of proportional representation, in which deviations do not only happen to single, but multiple candidate centers, and show that stronger proportionality notions of Brill and Peters imply approximations to these stronger guarantees.", "keywords": "clustering;fair clustering;proportional clustering;social choice;fairness;multiwinner voting", "primary_area": "algorithmic_game_theory", "supplementary_material": "", "author": "Leon Kellerhals;Jannik Peters", "authorids": "~Leon_Kellerhals1;~Jannik_Peters1", "gender": ";M", "homepage": "https://kellerhals.io/;https://sites.google.com/view/jannikpeters", "dblp": "194/2547;187/9581-1", "google_scholar": "FHiOZ4wAAAAJ;https://scholar.google.de/citations?user=fcfJZkcAAAAJ", "orcid": "0000-0001-6565-3983;", "linkedin": ";", "or_profile": "~Leon_Kellerhals1;~Jannik_Peters1", "aff": "Technische Universit\u00e4t Berlin;Technische Universit\u00e4t Berlin", "aff_domain": "tu-berlin.de;tu-berlin.de", "position": "Postdoc;PhD student", "bibtex": "@inproceedings{\nkellerhals2024proportional,\ntitle={Proportional Fairness in Clustering: A Social Choice Perspective},\nauthor={Leon Kellerhals and Jannik Peters},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=KsLX5pFpOs}\n}", "github": "", "reviewers": "qn3s;e2hb;M2yM;g3Lo", "pdf_size": 328363, "rating": "6;6;7;8", "confidence": "3;3;4;3", "soundness": "4;3;4;4", "novelty": "2;3;4;3", "presentation": "3;3;3;4", "wc_summary": "354;124;99;54", "wc_strengths": "171;35;44;79", "wc_weaknesses": "117;19;65;48", "wc_questions": "32;9;20;90", "wc_limitations": "2;1;17;12", "wc_review": "676;188;245;283", "wc_reply_reviewers": "28;0;15;4", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "1;0;1;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 157.75, 116.04821196382132 ], "wc_strengths_avg": [ 82.25, 53.8115926172047 ], "wc_weaknesses_avg": [ 62.25, 35.632674611934476 ], "wc_questions_avg": [ 37.75, 31.24399942388938 ], "wc_limitations_avg": [ 8.0, 6.745368781616021 ], "wc_review_avg": [ 348.0, 192.36553745408764 ], "wc_reply_reviewers_avg": [ 11.75, 10.871407452579449 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 0.75, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8154641302079810836&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "tu-berlin.de;tu-berlin.de", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Technische Universit\u00e4t Berlin", "aff_unique_dep": "", "aff_unique_url": "https://www.tu-berlin.de", "aff_unique_abbr": "TU Berlin", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "title": "Learning to Handle Complex Constraints for Vehicle Routing Problems", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95638", "id": "Ktx95ZuRjP", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Ktx95ZuRjP", "openreview": "https://openreview.net/forum?id=Ktx95ZuRjP", "poster": "/media/PosterPDFs/NeurIPS%202024/95638.png?t=1731742652.69473", "project": "", "author_site": "Jieyi Bi, Yining Ma, Jianan Zhou, Wen Song, Zhiguang Cao, Yaoxin Wu, Jie Zhang", "tldr": "", "abstract": "Vehicle Routing Problems (VRPs) can model many real-world scenarios and often involve complex constraints. While recent neural methods excel in constructing solutions based on feasibility masking, they struggle with handling complex constraints, especially when obtaining the masking itself is NP-hard. In this paper, we propose a novel Proactive Infeasibility Prevention (PIP) framework to advance the capabilities of neural methods towards more complex VRPs. Our PIP integrates the Lagrangian multiplier as a basis to enhance constraint awareness and introduces preventative infeasibility masking to proactively steer the solution construction process. Moreover, we present PIP-D, which employs an auxiliary decoder and two adaptive strategies to learn and predict these tailored masks, potentially enhancing performance while significantly reducing computational costs during training. To verify our PIP designs, we conduct extensive experiments on the highly challenging Traveling Salesman Problem with Time Window (TSPTW), and TSP with Draft Limit (TSPDL) variants under different constraint hardness levels. Notably, our PIP is generic to boost many neural methods, and exhibits both a significant reduction in infeasible rate and a substantial improvement in solution quality.", "keywords": "vehicle routing problem;learning to optimize;constraint handling", "primary_area": "optimization", "supplementary_material": "", "author": "Jieyi Bi;Yining Ma;Jianan Zhou;Wen Song;Zhiguang Cao;Yaoxin Wu;Jie Zhang", "authorids": "~Jieyi_Bi1;~Yining_Ma1;~Jianan_Zhou1;~Wen_Song1;~Zhiguang_Cao1;~Yaoxin_Wu2;~Jie_Zhang9", "gender": "F;M;M;M;M;M;M", "homepage": "https://jieyibi.github.io/;https://yining043.github.io/;https://royalskye.github.io/;https://songwenas12.github.io/;https://zhiguangcaosg.github.io/;https://personal.ntu.edu.sg/zhangj/;https://research.tue.nl/en/persons/yaoxin-wu", "dblp": "331/2378;160/6245-1;296/2326-2;50/5489;178/8621;84/6889-2;192/4964", "google_scholar": "https://scholar.google.com/citations?hl=en;4_VyBTsAAAAJ;9T58m-EAAAAJ;s8Nz-xoAAAAJ;https://scholar.google.com.sg/citations?user=2R-cOkYAAAAJ;IFV_RdMAAAAJ;0qRnmK8AAAAJ", "orcid": "0000-0001-9480-3434;0000-0002-6639-8547;0000-0002-4896-148X;0000-0001-7624-1861;0000-0002-4499-759X;;0000-0002-3625-6599", "linkedin": "jieyi-bi-9003b5292/;yiningma/;;;;;", "or_profile": "~Jieyi_Bi1;~Yining_Ma1;~Jianan_Zhou1;~Wen_Song1;~Zhiguang_Cao1;~Jie_Zhang9;~YAOXIN_WU1", "aff": "Nanyang Technological University;Nanyang Technological University;Nanyang Technological University;Shandong University;Singapore Management University;Nanyang Technological University;Eindhoven University of Technology", "aff_domain": "ntu.edu.sg;ntu.edu.sg;ntu.edu.sg;sdu.edu.cn;smu.edu.sg;ntu.edu.sg;tue.nl", "position": "PhD student;Research Fellow;PhD student;Associate Professor;Assistant Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nbi2024learning,\ntitle={Learning to Handle Complex Constraints for Vehicle Routing Problems},\nauthor={Jieyi Bi and Yining Ma and Jianan Zhou and Wen Song and Zhiguang Cao and Yaoxin Wu and Jie Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Ktx95ZuRjP}\n}", "github": "", "reviewers": "Seke;Xb4p;DEtT;azSu", "pdf_size": 0, "rating": "5;5;6;7", "confidence": "4;4;5;4", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "3;3;2;2", "wc_summary": "107;77;71;123", "wc_strengths": "75;35;70;74", "wc_weaknesses": "209;229;84;100", "wc_questions": "30;126;40;142", "wc_limitations": "51;27;1;11", "wc_review": "472;494;266;450", "wc_reply_reviewers": "0;169;25;57", "wc_reply_authors": "128;2194;93;72", "reply_reviewers": "0;2;1;1", "reply_authors": "3;8;3;3", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 94.5, 21.37171027316251 ], "wc_strengths_avg": [ 63.5, 16.560495161679196 ], "wc_weaknesses_avg": [ 155.5, 64.1424196612507 ], "wc_questions_avg": [ 84.5, 49.947472408521335 ], "wc_limitations_avg": [ 22.5, 18.887826767524103 ], "wc_review_avg": [ 420.5, 90.54694914794203 ], "wc_reply_reviewers_avg": [ 62.75, 64.58473116766841 ], "wc_reply_authors_avg": [ 621.75, 907.95935344045 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 4.25, 2.165063509461097 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13036512484171785066&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "email": "ntu.edu.sg;ntu.edu.sg;ntu.edu.sg;sdu.edu.cn;smu.edu.sg;ntu.edu.sg;tue.nl", "author_num": 7, "aff_unique_index": "0;0;0;1;2;0;3", "aff_unique_norm": "Nanyang Technological University;Shandong University;Singapore Management University;Eindhoven University of Technology", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.ntu.edu.sg;http://www.sdu.edu.cn;https://www.smu.edu.sg;https://www.tue.nl", "aff_unique_abbr": "NTU;SDU;SMU;TU/e", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;0;2", "aff_country_unique": "Singapore;China;Netherlands" }, { "title": "How to Solve Contextual Goal-Oriented Problems with Offline Datasets?", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95637", "id": "Ku31aRq3sW", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Ku31aRq3sW", "openreview": "https://openreview.net/forum?id=Ku31aRq3sW", "poster": "/media/PosterPDFs/NeurIPS%202024/95637.png?t=1733737142.2437449", "project": "", "author_site": "Ying Fan, Jingling Li, Adith Swaminathan, Aditya Modi, Ching-An Cheng", "tldr": "", "abstract": "We present a novel method, Contextual goal-Oriented Data Augmentation (CODA), which uses commonly available unlabeled trajectories and context-goal pairs to solve Contextual Goal-Oriented (CGO) problems. By carefully constructing an action-augmented MDP that is equivalent to the original MDP, CODA creates a fully labeled transition dataset under training contexts without additional approximation error. We conduct a novel theoretical analysis to demonstrate CODA's capability to solve CGO problems in the offline data setup. Empirical results also showcase the effectiveness of CODA, which outperforms other baseline methods across various context-goal relationships of CGO problem. This approach offers a promising direction to solving CGO problems using offline datasets.", "keywords": "offline RL;goal-conditioned RL", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Ying Fan;Jingling Li;Adith Swaminathan;Aditya Modi;Ching-An Cheng", "authorids": "~Ying_Fan2;~Jingling_Li1;~Adith_Swaminathan1;~Aditya_Modi1;~Ching-An_Cheng1", "gender": ";;M;M;M", "homepage": "https://yingfan-bot.github.io/;;https://adith387.github.io/;https://adityamodi.github.io;http://www.chinganc.com", "dblp": ";;;147/9396-2;123/6369", "google_scholar": "1aj4dZcAAAAJ;;WNHLjp0AAAAJ;BN0O9wgAAAAJ;bMZFLZ_V4goC", "orcid": ";;;;", "linkedin": "ying-fan-5b7b07147/;;adith-swaminathan-98198a21/;aditya-modi-170a587a/;", "or_profile": "~Ying_Fan2;~Jingling_Li1;~Adith_Swaminathan1;~Aditya_Modi1;~Ching-An_Cheng1", "aff": "University of Wisconsin-Madison;;Microsoft;Microsoft;Microsoft Research", "aff_domain": "cs.wisc.edu;;microsoft.com;microsoft.com;microsoft.com", "position": "Graduate student;;Researcher;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nfan2024how,\ntitle={How to Solve Contextual Goal-Oriented Problems with Offline Datasets?},\nauthor={Ying Fan and Jingling Li and Adith Swaminathan and Aditya Modi and Ching-An Cheng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Ku31aRq3sW}\n}", "github": "", "reviewers": "ek4e;Kh9A;ug4M;CzDa", "pdf_size": 2295066, "rating": "4;5;7;7", "confidence": "2;3;2;2", "soundness": "3;3;4;3", "novelty": "2;2;4;3", "presentation": "1;3;4;3", "wc_summary": "67;75;37;66", "wc_strengths": "57;41;24;78", "wc_weaknesses": "85;119;1;46", "wc_questions": "4;18;1;75", "wc_limitations": "1;7;1;10", "wc_review": "214;260;64;275", "wc_reply_reviewers": "0;91;0;68", "wc_reply_authors": "40;40;0;0", "reply_reviewers": "0;1;0;1", "reply_authors": "2;2;1;1", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 2.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 61.25, 14.428704030508076 ], "wc_strengths_avg": [ 50.0, 19.937402037376884 ], "wc_weaknesses_avg": [ 62.75, 44.024850936715275 ], "wc_questions_avg": [ 24.5, 29.85381047705636 ], "wc_limitations_avg": [ 4.75, 3.897114317029974 ], "wc_review_avg": [ 203.25, 83.47866493901302 ], "wc_reply_reviewers_avg": [ 39.75, 40.57323625248546 ], "wc_reply_authors_avg": [ 20.0, 20.0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6876377497058502898&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "cs.wisc.edu;;microsoft.com;microsoft.com;microsoft.com", "author_num": 5, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "University of Wisconsin-Madison;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.wisc.edu;https://www.microsoft.com", "aff_unique_abbr": "UW-Madison;Microsoft", "aff_campus_unique_index": "0", "aff_campus_unique": "Madison;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Dual-Perspective Activation: Efficient Channel Denoising via Joint Forward-Backward Criterion for Artificial Neural Networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95636", "id": "Ku35qKpveg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Ku35qKpveg", "openreview": "https://openreview.net/forum?id=Ku35qKpveg", "poster": "/media/PosterPDFs/NeurIPS%202024/95636.png?t=1733120231.718665", "project": "", "author_site": "Tian Qiu, Chenchao Gao, Zunlei Feng, Jie Lei, Bingde Hu, Xingen Wang, Yi Gao, Mingli Song", "tldr": "", "abstract": "The design of Artificial Neural Network (ANN) is inspired by the working patterns of the human brain. Connections in biological neural networks are sparse, as they only exist between few neurons. Meanwhile, the sparse representation in ANNs has been shown to possess significant advantages. Activation responses of ANNs are typically expected to promote sparse representations, where key signals get activated while irrelevant/redundant signals are suppressed. It can be observed that samples of each category are only correlated with sparse and specific channels in ANNs. However, existing activation mechanisms often struggle to suppress signals from other irrelevant channels entirely, and these signals have been verified to be detrimental to the network's final decision. To address the issue of channel noise interference in ANNs, a novel end-to-end trainable Dual-Perspective Activation (DPA) mechanism is proposed. DPA efficiently identifies irrelevant channels and applies channel denoising under the guidance of a joint criterion established online from both forward and backward propagation perspectives while preserving activation responses from relevant channels. Extensive experiments demonstrate that DPA successfully denoises channels and facilitates sparser neural representations. Moreover, DPA is parameter-free, fast, applicable to many mainstream ANN architectures, and achieves remarkable performance compared to other existing activation counterparts across multiple tasks and domains. Code is available at https://github.com/horrible-dong/DPA.", "keywords": "artificial neural network;activation;joint forward-backward criterion;channel denoising;neural response;neural representation;deep learning", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Tian Qiu;Chenchao Gao;Zunlei Feng;Jie Lei;Bingde Hu;Xingen Wang;Yi Gao;Mingli Song", "authorids": "~Tian_Qiu2;~Chenchao_Gao1;~Zunlei_Feng1;~Jie_Lei4;~Bingde_Hu1;~Xingen_Wang1;~Yi_Gao6;~Mingli_Song1", "gender": "M;M;M;M;M;M;M;M", "homepage": "https://github.com/horrible-dong;https://blog.csdn.net/ganlesama;https://person.zju.edu.cn/en/zunleifeng;http://www.cs.zjut.edu.cn/staffs/jielei.html;https://www.vipazoo.cn/people/hubingde;https://person.zju.edu.cn/xingenwang;https://yi-gao.github.io/;https://person.zju.edu.cn/msong", "dblp": ";;191/2455;61/5501-2;353/7326;;38/4304-1.html;71/5333", "google_scholar": ";;wMtjcGwAAAAJ;;;S8C1Y0gAAAAJ;oyKWoTkAAAAJ;7oLbhAwAAAAJ", "orcid": ";;;0000-0003-2523-5810;0000-0003-2556-9239;;;0000-0003-2621-6048", "linkedin": ";;;;;;;", "or_profile": "~Tian_Qiu2;~Chenchao_Gao1;~Zunlei_Feng1;~Jie_Lei4;~Bingde_Hu1;~Xingen_Wang1;~Yi_Gao6;~Mingli_Song1", "aff": "Zhejiang University;Dalian University of Technology;Zhejiang University;Zhejiang University of Technology;Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;dlut.edu.cn;zju.edu.cn;zjut.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn", "position": "MS student;Undergrad student;Associate Professor;Assistant Professor;Postdoc;Researcher;Full Professor;Full Professor", "bibtex": "@inproceedings{\nqiu2024dualperspective,\ntitle={Dual-Perspective Activation: Efficient Channel Denoising via Joint Forward-Backward Criterion for Artificial Neural Networks},\nauthor={Tian Qiu and Chenchao Gao and Zunlei Feng and Jie Lei and Bingde Hu and Xingen Wang and Yi Gao and Mingli Song},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Ku35qKpveg}\n}", "github": "", "reviewers": "YhhA;A1s7;4FwJ;EUgL", "pdf_size": 4122760, "rating": "5;6;7;8", "confidence": "3;4;4;4", "soundness": "2;3;3;4", "novelty": "2;3;3;4", "presentation": "3;3;3;4", "wc_summary": "117;100;118;100", "wc_strengths": "25;134;89;99", "wc_weaknesses": "139;112;127;186", "wc_questions": "153;170;136;14", "wc_limitations": "24;12;23;1", "wc_review": "458;528;493;400", "wc_reply_reviewers": "157;0;53;0", "wc_reply_authors": "110;0;45;0", "reply_reviewers": "1;0;1;0", "reply_authors": "2;1;2;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 108.75, 8.757139944068497 ], "wc_strengths_avg": [ 86.75, 39.37242055043098 ], "wc_weaknesses_avg": [ 141.0, 27.685736399814257 ], "wc_questions_avg": [ 118.25, 61.377418485954585 ], "wc_limitations_avg": [ 15.0, 9.354143466934854 ], "wc_review_avg": [ 469.75, 47.26719263929264 ], "wc_reply_reviewers_avg": [ 52.5, 64.0956316764255 ], "wc_reply_authors_avg": [ 38.75, 45.05205322735025 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.7745966692414834, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:clqtlc073gQJ:scholar.google.com/&scioq=Dual-Perspective+Activation:+Efficient+Channel+Denoising+via+Joint+Forward-Backward+Criterion+for+Artificial+Neural+Networks&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": "zju.edu.cn;dlut.edu.cn;zju.edu.cn;zjut.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn", "author_num": 8, "aff_unique_index": "0;1;0;2;0;0;0;0", "aff_unique_norm": "Zhejiang University;Dalian University of Technology;Zhejiang University of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.zju.edu.cn;http://www.dlut.edu.cn/;https://www.zjut.edu.cn", "aff_unique_abbr": "ZJU;DUT;ZJUT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Style Adaptation and Uncertainty Estimation for Multi-Source Blended-Target Domain Adaptation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95635", "id": "KvAaIJhqhI", "proceeding": "", "pdf": "https://openreview.net/pdf?id=KvAaIJhqhI", "openreview": "https://openreview.net/forum?id=KvAaIJhqhI", "poster": "/media/PosterPDFs/NeurIPS%202024/95635.png?t=1731241052.4576101", "project": "", "author_site": "Yuwu Lu, Haoyu Huang, Xue Hu", "tldr": "", "abstract": "Blended-target domain adaptation (BTDA), which implicitly mixes multiple sub-target domains into a fine domain, has attracted more attention in recent years. Most previously developed BTDA approaches focus on utilizing a single source domain, which makes it difficult to obtain sufficient feature information for learning domain-invariant representations. Furthermore, different feature distributions derived from different domains may increase the uncertainty of models. To overcome these issues, we propose a style adaptation and uncertainty estimation (SAUE) approach for multi-source blended-target domain adaptation (MBDA). Specifically, we exploit the extra knowledge acquired from the blended-target domain, where a similarity factor is adopted to select more useful target style information for augmenting the source features. \\!Then, to mitigate the negative impact of the domain-specific attributes, we devise a function to estimate and mitigate uncertainty in category prediction. Finally, we construct a simple and lightweight adversarial learning strategy for MBDA, effectively aligning multi-source and blended-target domains without the requirements of domain labels of the target domains. Extensive experiments conducted on several challenging DA benchmarks, including the ImageCLEF-DA, Office-Home, VisDA 2017, and DomainNet datasets, demonstrate the superiority of our method over the state-of-the-art (SOTA) approaches.", "keywords": "Domain adaptation;Transfer learning;Multi-source and blended-target;Style transfer;Uncertainty estimation", "primary_area": "machine_vision", "supplementary_material": "/attachment/ddd46bb881e1006832e9665b484e4bac991240f8.zip", "author": "Yuwu Lu;Haoyu Huang;Xue Hu", "authorids": "~Yuwu_Lu1;~Haoyu_Huang4;~Xue_Hu2", "gender": "M;;", "homepage": ";https://www.scholat.com/hyhuang99;", "dblp": "116/9927;;", "google_scholar": ";ZeknZLkAAAAJ;", "orcid": ";0009-0006-5648-4237;0009-0001-1027-6065", "linkedin": ";;", "or_profile": "~Yuwu_Lu1;~Haoyu_Huang4;~Xue_Hu2", "aff": "South China Normal University;South China Normal University;South China Normal University", "aff_domain": "scnu.edu.cn;scnu.edu.cn;scnu.edu.cn", "position": "Associate Professor;MS student;MS student", "bibtex": "@inproceedings{\nlu2024style,\ntitle={Style Adaptation and Uncertainty Estimation for Multi-Source Blended-Target Domain Adaptation},\nauthor={Yuwu Lu and Haoyu Huang and Xue Hu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=KvAaIJhqhI}\n}", "github": "", "reviewers": "NQ8Y;cWFB;LwRV;Sfz7", "pdf_size": 3836022, "rating": "5;5;6;7", "confidence": "3;5;4;5", "soundness": "3;3;2;3", "novelty": "2;2;2;3", "presentation": "3;3;2;4", "wc_summary": "130;92;109;123", "wc_strengths": "18;23;26;148", "wc_weaknesses": "49;146;10;109", "wc_questions": "4;6;291;6", "wc_limitations": "7;8;1;10", "wc_review": "208;275;437;396", "wc_reply_reviewers": "0;0;53;46", "wc_reply_authors": "53;51;80;47", "reply_reviewers": "0;0;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 113.5, 14.534441853748634 ], "wc_strengths_avg": [ 53.75, 54.49025142169928 ], "wc_weaknesses_avg": [ 78.5, 52.55711179279166 ], "wc_questions_avg": [ 76.75, 123.69998989490662 ], "wc_limitations_avg": [ 6.5, 3.3541019662496847 ], "wc_review_avg": [ 329.0, 91.80141611108186 ], "wc_reply_reviewers_avg": [ 24.75, 24.873429598670143 ], "wc_reply_authors_avg": [ 57.75, 13.026415470113028 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.4545454545454545, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7724172665755006067&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 0, "email": "scnu.edu.cn;scnu.edu.cn;scnu.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "South China Normal University", "aff_unique_dep": "", "aff_unique_url": "http://www.scnu.edu.cn", "aff_unique_abbr": "SCNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "QBB: Quantization with Binary Bases for LLMs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95634", "id": "Kw6MRGFx0R", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Kw6MRGFx0R", "openreview": "https://openreview.net/forum?id=Kw6MRGFx0R", "poster": "", "project": "", "author_site": "Adrian Bulat, Yassine Ouali, Georgios Tzimiropoulos", "tldr": "", "abstract": "Current post-training quantization methods for LLMs compress the weights down to 4-bits, with moderate to low degradation in accuracy. However, further reducing the number of bits or accelerating the network while avoiding large accuracy drops, especially for smaller, sub 7B models, remains an actively researched and open problem. To address this, in this work, we introduce Quantization with Binary Bases (QBB), a new approach for low-bit quantization that effectively removes (nearly) all multiplications, reducing the implementation to summations. Our novel approach works by decomposing the original weights into a set of binary (1-bit) matrices using an iterative process. For a given layer, starting from a weight matrix, we first construct an initial approximation using an analytical solution, where each new binary matrix, paired with a scaling vector, approximates the residual error of the previous estimation. Secondly, using gradient descent and a progressive learning curriculum, we find the optimal set of binary matrices and scaling vectors that minimize the $\\ell_2$ distance between the produced approximation and original weights. Thirdly, as previous steps are input agnostic, we holistically optimize the scaling vectors alone, calibrating them in student-teacher fashion, with the teacher providing both the data, \n by autoregressive generation starting from a random token, and the target logits.\n When evaluated across multiple LLM families, our approach matches and outperforms all prior works, setting a new state-of-the-art result using a summation-only based approach.", "keywords": "LLMs; Binarization; Network quantization;", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Adrian Bulat;Yassine Ouali;Georgios Tzimiropoulos", "authorids": "~Adrian_Bulat1;~Yassine_Ouali1;~Georgios_Tzimiropoulos1", "gender": ";M;M", "homepage": "https://www.adrianbulat.com;https://yassouali.github.io;https://ytzimiro.github.io/", "dblp": "185/6878;261/3456;03/3273", "google_scholar": "https://scholar.google.co.uk/citations?user=5sKcsg0AAAAJ;u3VGGscAAAAJ;https://scholar.google.co.uk/citations?user=D4JkWxf-8fwC", "orcid": "0000-0002-3185-4979;;", "linkedin": ";;", "or_profile": "~Adrian_Bulat1;~Yassine_Ouali1;~Georgios_Tzimiropoulos1", "aff": "Samsung AI Center Cambridge;Samsung;Queen Mary University London", "aff_domain": "samsung.com;samsung.com;qmul.ac.uk", "position": "Research Scientist;Researcher;Associate Professor", "bibtex": "@inproceedings{\nbulat2024qbb,\ntitle={{QBB}: Quantization with Binary Bases for {LLM}s},\nauthor={Adrian Bulat and Yassine Ouali and Georgios Tzimiropoulos},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Kw6MRGFx0R}\n}", "github": "", "reviewers": "waud;XBMG;L58J;vSy1", "pdf_size": 840642, "rating": "3;5;6;6", "confidence": "4;5;4;4", "soundness": "2;3;3;3", "novelty": "2;3;3;2", "presentation": "3;3;3;3", "wc_summary": "47;50;71;175", "wc_strengths": "65;34;64;19", "wc_weaknesses": "221;296;93;69", "wc_questions": "37;2;4;78", "wc_limitations": "8;14;4;19", "wc_review": "378;396;236;360", "wc_reply_reviewers": "179;0;0;0", "wc_reply_authors": "177;0;0;0", "reply_reviewers": "1;0;0;0", "reply_authors": "2;1;1;1", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 85.75, 52.351575907512085 ], "wc_strengths_avg": [ 45.5, 19.72941965694886 ], "wc_weaknesses_avg": [ 169.75, 93.01444780247851 ], "wc_questions_avg": [ 30.25, 30.873734791890662 ], "wc_limitations_avg": [ 11.25, 5.717298313014636 ], "wc_review_avg": [ 342.5, 62.791321056337075 ], "wc_reply_reviewers_avg": [ 44.75, 77.50927363870726 ], "wc_reply_authors_avg": [ 44.25, 76.64324823492282 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9462362459703452871&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "samsung.com;samsung.com;qmul.ac.uk", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Samsung;Queen Mary University of London", "aff_unique_dep": "AI Center;", "aff_unique_url": "https://www.samsung.com/global/innovation/ai-research/;https://www.qmul.ac.uk", "aff_unique_abbr": "SAC;QMUL", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Cambridge;;London", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United Kingdom;South Korea" }, { "title": "Noise Contrastive Alignment of Language Models with Explicit Rewards", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95633", "id": "KwRLDkyVOl", "proceeding": "", "pdf": "https://openreview.net/pdf?id=KwRLDkyVOl", "openreview": "https://openreview.net/forum?id=KwRLDkyVOl", "poster": "/media/PosterPDFs/NeurIPS%202024/95633.png?t=1733016102.2043471", "project": "", "author_site": "Huayu Chen, Guande He, Lifan Yuan, Ganqu Cui, Hang Su, Jun Zhu", "tldr": "", "abstract": "User intentions are typically formalized as evaluation rewards to be maximized when fine-tuning language models (LMs). Existing alignment methods, such as Direct Preference Optimization (DPO), are mainly tailored for pairwise preference data where rewards are implicitly defined rather than explicitly given. In this paper, we introduce a general framework for LM alignment, leveraging Noise Contrastive Estimation (NCE) to bridge the gap in handling reward datasets explicitly annotated with scalar evaluations. Our framework comprises two parallel algorithms, NCA and InfoNCA, both enabling the direct extraction of an LM policy from reward data as well as preference data. Notably, we show that the DPO loss is a special case of our proposed InfoNCA objective under pairwise preference settings, thereby integrating and extending current alignment theories. By comparing NCA and InfoNCA, we demonstrate that the well-observed decreasing-likelihood trend of DPO/InfoNCA is caused by their focus on adjusting relative likelihood across different responses.\nIn contrast, NCA optimizes the absolute likelihood for each response, thereby effectively preventing the chosen likelihood from decreasing. We evaluate our methods in both reward and preference settings with Mistral-8$\\times$7B and 7B models. Experiments suggest that InfoNCA/NCA surpasses various preference baselines when reward datasets are available. We also find NCA significantly outperforms DPO in complex reasoning tasks like math and coding.", "keywords": "noise contrastive estimation;reinforcement learning;language model;alignment;contrastive", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/978c9b97e73333e6e4e5174342e7ae0af0d517fb.zip", "author": "Huayu Chen;Guande He;Lifan Yuan;Ganqu Cui;Hang Su;Jun Zhu", "authorids": "~Huayu_Chen1;~Guande_He1;~Lifan_Yuan1;~Ganqu_Cui1;~Hang_Su3;~Jun_Zhu2", "gender": "M;;;M;;M", "homepage": "https://chendrag.github.io/;https://guandehe.github.io/;;https://cgq15.github.io/;;http://ml.cs.tsinghua.edu.cn/~jun", "dblp": "259/3113;348/7035.html;;232/3064;;50/2644-1", "google_scholar": "0FBCHc4AAAAJ;3rddMeMAAAAJ;;3IVSzZgAAAAJ;;axsP38wAAAAJ", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Huayu_Chen1;~Guande_He1;~Lifan_Yuan1;~Ganqu_Cui1;~Hang_Su3;~Jun_Zhu2", "aff": "Tsinghua University;Tsinghua University;;Tsinghua University;;Tsinghua University", "aff_domain": "tsinghua.edu.cn;mails.tsinghua.edu.cn;;tsinghua.edu.cn;;mail.tsinghua.edu.cn", "position": "PhD student;MS student;;PhD student;;Professor", "bibtex": "@inproceedings{\nchen2024noise,\ntitle={Noise Contrastive Alignment of Language Models with Explicit Rewards},\nauthor={Huayu Chen and Guande He and Lifan Yuan and Ganqu Cui and Hang Su and Jun Zhu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=KwRLDkyVOl}\n}", "github": "", "reviewers": "EyJu;xAZC;WFsu;UYN1;EZsK", "pdf_size": 1190331, "rating": "6;7;7;7;7", "confidence": "4;4;3;3;4", "soundness": "3;3;3;3;2", "novelty": "3;3;3;3;3", "presentation": "3;3;3;3;3", "wc_summary": "107;70;222;53;61", "wc_strengths": "141;40;140;65;79", "wc_weaknesses": "267;8;6;44;179", "wc_questions": "2;16;61;8;38", "wc_limitations": "45;8;7;8;6", "wc_review": "562;142;436;178;363", "wc_reply_reviewers": "24;0;0;0;19", "wc_reply_authors": "366;0;0;0;334", "reply_reviewers": "1;0;0;0;1", "reply_authors": "4;1;1;1;3", "rating_avg": [ 6.8, 0.39999999999999997 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 102.6, 62.496719913928274 ], "wc_strengths_avg": [ 93.0, 40.74800608618783 ], "wc_weaknesses_avg": [ 100.8, 104.4631992617496 ], "wc_questions_avg": [ 25.0, 21.743964679883014 ], "wc_limitations_avg": [ 14.8, 15.118200951171406 ], "wc_review_avg": [ 336.2, 157.7369962944648 ], "wc_reply_reviewers_avg": [ 8.6, 10.650821564555477 ], "wc_reply_authors_avg": [ 140.0, 171.76262690119756 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 2.0, 1.2649110640673518 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.408248290463863, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13171652967440108816&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "tsinghua.edu.cn;mails.tsinghua.edu.cn;;tsinghua.edu.cn;;mail.tsinghua.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Why the Metric Backbone Preserves Community Structure", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95632", "id": "Kx8I0rP7w2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Kx8I0rP7w2", "openreview": "https://openreview.net/forum?id=Kx8I0rP7w2", "poster": "/media/PosterPDFs/NeurIPS%202024/95632.png?t=1732817692.4238875", "project": "", "author_site": "Maximilien Dreveton, Charbel Chucri, Matthias Grossglauser, Patrick Thiran", "tldr": "", "abstract": "The metric backbone of a weighted graph is the union of all-pairs shortest paths. It is obtained by removing all edges $(u,v)$ that are not the shortest path between $u$ and $v$. In networks with well-separated communities, the metric backbone tends to preserve many inter-community edges, because these edges serve as bridges connecting two communities, but tends to delete many intra-community edges because the communities are dense. This suggests that the metric backbone would dilute or destroy the community structure of the network. However, this is not borne out by prior empirical work, which instead showed that the metric backbone of real networks preserves the community structure of the original network well. In this work, we analyze the metric backbone of a broad class of weighted random graphs with communities, and we formally prove the robustness of the community structure with respect to the deletion of all the edges that are not in the metric backbone. An empirical comparison of several graph sparsification techniques confirms our theoretical finding and shows that the metric backbone is an efficient sparsifier in the presence of communities.", "keywords": "community detection;graph sparsification;stochastic block model", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Maximilien Dreveton;Charbel Chucri;Matthias Grossglauser;Patrick Thiran", "authorids": "~Maximilien_Dreveton1;~Charbel_Chucri1;~Matthias_Grossglauser1;~Patrick_Thiran1", "gender": ";;;", "homepage": "https://maximiliendreveton.fr/;;;https://people.epfl.ch/patrick.thiran", "dblp": "244/6486;;;t/PThiran", "google_scholar": "y7tyN8QAAAAJ;;;https://scholar.google.ch/citations?user=7Ek7pqgAAAAJ", "orcid": "0000-0001-6613-0615;;;", "linkedin": "maximilien-dreveton-42889a107/;charbelchucri/;;", "or_profile": "~Maximilien_Dreveton1;~Charbel_Chucri1;~Matthias_Grossglauser1;~Patrick_Thiran1", "aff": "EPFL - EPF Lausanne;EPFL - EPF Lausanne;;EPFL", "aff_domain": "epfl.ch;epfl.ch;;epfl.ch", "position": "Postdoc;MS student;;Full Professor", "bibtex": "@inproceedings{\ndreveton2024why,\ntitle={Why the Metric Backbone Preserves Community Structure},\nauthor={Maximilien Dreveton and Charbel Chucri and Matthias Grossglauser and Patrick Thiran},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Kx8I0rP7w2}\n}", "github": "", "reviewers": "uKT3;bM2e;Qqxh;JByP", "pdf_size": 866421, "rating": "5;7;7;7", "confidence": "4;3;4;4", "soundness": "3;3;3;3", "novelty": "2;3;4;3", "presentation": "3;3;3;3", "wc_summary": "57;66;75;104", "wc_strengths": "40;27;83;20", "wc_weaknesses": "225;170;124;57", "wc_questions": "27;115;11;105", "wc_limitations": "35;9;1;15", "wc_review": "384;387;294;301", "wc_reply_reviewers": "0;48;0;10", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 75.5, 17.64227876437735 ], "wc_strengths_avg": [ 42.5, 24.45914961727002 ], "wc_weaknesses_avg": [ 144.0, 61.656305435859515 ], "wc_questions_avg": [ 64.5, 45.986411036305064 ], "wc_limitations_avg": [ 15.0, 12.569805089976535 ], "wc_review_avg": [ 341.5, 44.08230937689177 ], "wc_reply_reviewers_avg": [ 14.5, 19.767397400770797 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:b6nTda-ayGAJ:scholar.google.com/&scioq=Why+the+Metric+Backbone+Preserves+Community+Structure&hl=en&as_sdt=0,5", "gs_version_total": 7, "email": "epfl.ch;epfl.ch;;epfl.ch", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "EPFL", "aff_unique_dep": "", "aff_unique_url": "https://www.epfl.ch", "aff_unique_abbr": "EPFL", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Lausanne;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Switzerland" }, { "title": "Bayesian Optimization of Functions over Node Subsets in Graphs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95631", "id": "KxjGi1krBi", "proceeding": "", "pdf": "https://openreview.net/pdf?id=KxjGi1krBi", "openreview": "https://openreview.net/forum?id=KxjGi1krBi", "poster": "/media/PosterPDFs/NeurIPS%202024/95631.png?t=1733410472.2962916", "project": "", "author_site": "Huidong Liang, Xingchen Wan, Xiaowen Dong", "tldr": "", "abstract": "We address the problem of optimizing over functions defined on node subsets in a graph. The optimization of such functions is often a non-trivial task given their combinatorial, black-box and expensive-to-evaluate nature. Although various algorithms have been introduced in the literature, most are either task-specific or computationally inefficient and only utilize information about the graph structure without considering the characteristics of the function. To address these limitations, we utilize Bayesian Optimization (BO), a sample-efficient black-box solver, and propose a novel framework for combinatorial optimization on graphs. More specifically, we map each $k$-node subset in the original graph to a node in a new combinatorial graph and adopt a local modeling approach to efficiently traverse the latter graph by progressively sampling its subgraphs using a recursive algorithm. Extensive experiments under both synthetic and real-world setups demonstrate the effectiveness of the proposed BO framework on various types of graphs and optimization tasks, where its behavior is analyzed in detail with ablation studies.", "keywords": "Bayesian Optimization;Graphs", "primary_area": "optimization", "supplementary_material": "", "author": "Huidong Liang;Xingchen Wan;Xiaowen Dong", "authorids": "~Huidong_Liang2;~Xingchen_Wan1;~Xiaowen_Dong1", "gender": ";M;", "homepage": ";https://xingchen.one;https://web.media.mit.edu/~xdong/", "dblp": ";255/7214;91/9827-1", "google_scholar": ";6KkohssAAAAJ;_8tUq8kAAAAJ", "orcid": ";0000-0003-0074-0597;", "linkedin": ";;", "or_profile": "~Huidong_Liang2;~Xingchen_Wan1;~Xiaowen_Dong1", "aff": ";Google;Massachusetts Institute of Technology", "aff_domain": ";google.com;mit.edu", "position": ";Research Scientist;Research Affiliate", "bibtex": "@inproceedings{\nliang2024bayesian,\ntitle={Bayesian Optimization of Functions over Node Subsets in Graphs},\nauthor={Huidong Liang and Xingchen Wan and Xiaowen Dong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=KxjGi1krBi}\n}", "github": "", "reviewers": "Y7Gf;bfWL;u7DA;yrTA", "pdf_size": 23893768, "rating": "5;5;5;6", "confidence": "2;3;4;2", "soundness": "4;3;3;3", "novelty": "2;3;2;3", "presentation": "3;2;3;3", "wc_summary": "78;134;98;66", "wc_strengths": "78;47;35;42", "wc_weaknesses": "33;145;133;41", "wc_questions": "46;159;22;63", "wc_limitations": "5;27;46;12", "wc_review": "240;512;334;224", "wc_reply_reviewers": "95;60;52;18", "wc_reply_authors": "29;216;405;21", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;4;2", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 94.0, 25.768197453450252 ], "wc_strengths_avg": [ 50.5, 16.439282222773596 ], "wc_weaknesses_avg": [ 88.0, 51.254268114957995 ], "wc_questions_avg": [ 72.5, 52.02163011671203 ], "wc_limitations_avg": [ 22.5, 15.724185193516387 ], "wc_review_avg": [ 327.5, 114.51091651017383 ], "wc_reply_reviewers_avg": [ 56.25, 27.371289702898547 ], "wc_reply_authors_avg": [ 167.75, 157.64100830684887 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13883306201689106986&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": ";google.com;mit.edu", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Google;Massachusetts Institute of Technology", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://web.mit.edu", "aff_unique_abbr": "Google;MIT", "aff_campus_unique_index": "0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "CLAVE: An Adaptive Framework for Evaluating Values of LLM Generated Responses", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97747", "id": "Kxta8IInyN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Kxta8IInyN", "openreview": "https://openreview.net/forum?id=Kxta8IInyN", "poster": "", "project": "", "author_site": "Jing Yao, Xiaoyuan Yi, Xing Xie", "tldr": "", "abstract": "The rapid progress in Large Language Models (LLMs) poses potential risks such as generating unethical content. Assessing the values embedded in LLMs' generated responses can help expose their misalignment, but this relies on reference-free value evaluators, e.g. fine-tuned LLMs or closed-source models like GPT-4. Nevertheless, two key challenges emerge in open-ended value evaluation: the evaluator should adapt to changing human value definitions with minimal annotation, against their own bias (adaptability); and remain robust across varying value expressions and scenarios (generalizability). To handle these challenges, we introduce CLAVE, a novel framework that integrates two complementary LLMs: a large model to extract high-level value concepts from diverse responses, leveraging its extensive knowledge and generalizability, and a small model fine-tuned on these concepts to adapt to human value annotations. This dual-model framework enables adaptation to any value system using <100 human-labeled samples per value type. We also present ValEval, a comprehensive dataset comprising 13k+ (text,value,label) tuples across diverse domains, covering three major value systems. We benchmark the performance of 15+ popular LLM evaluators and fully analyze their strengths and weaknesses. Our findings reveal that CLAVE combining a large prompt-based model and a small fine-tuned one serves as an optimal balance in value evaluation.", "keywords": "large language model;value evaluation;value alignment;value concept", "primary_area": "", "supplementary_material": "/attachment/bd67c154a41b2414cf56b61e824f09f78347da2e.zip", "author": "Jing Yao;Xiaoyuan Yi;Xing Xie", "authorids": "~Jing_Yao4;~Xiaoyuan_Yi1;~Xing_Xie3", "gender": "F;M;M", "homepage": ";;http://research.microsoft.com/en-us/people/xingx/", "dblp": "24/5678.html;179/2248;08/6809-1", "google_scholar": "https://scholar.google.jp/citations?user=2FH8EjkAAAAJ;BdpXcLgAAAAJ;5EQfAFIAAAAJ", "orcid": ";0000-0003-2710-1613;0000-0002-8608-8482", "linkedin": ";xiaoyuan-yi-471212a5/;xingx/", "or_profile": "~Jing_Yao4;~Xiaoyuan_Yi1;~Xing_Xie3", "aff": "Microsoft;Microsoft Research;Microsoft Research Asia", "aff_domain": "microsoft.com;research.microsoft.com;microsoft.com", "position": "Researcher;Senior Researcher;Senior Principal Researcher", "bibtex": "@inproceedings{\nyao2024clave,\ntitle={{CLAVE}: An Adaptive Framework for Evaluating Values of {LLM} Generated Responses},\nauthor={Jing Yao and Xiaoyuan Yi and Xing Xie},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=Kxta8IInyN}\n}", "github": "", "reviewers": "CKLm;zRAV;1FUw", "pdf_size": 2511275, "rating": "6;7;7", "confidence": "3;3;3", "wc_summary_and_contributions": "24;81;91", "wc_strengths": "25;80;104", "wc_improvement": "195;62;124", "wc_limitations": "28;27;68", "wc_correctness": "11;5;40", "wc_clarity": "20;7;14", "wc_relation_to_prior_work": "7;8;14", "wc_documentation": "8;17;36", "wc_additional_feedback": "1;1;1", "wc_review": "319;288;492", "wc_reply_reviewers": "34;17;28", "wc_reply_authors": "610;351;0", "reply_reviewers": "1;1;1", "reply_authors": "4;3;1", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.0 ], "wc_summary_and_contributions_avg": [ 65.33333333333333, 29.51082663852182 ], "wc_strengths_avg": [ 69.66666666666667, 33.06895153396242 ], "wc_improvement_avg": [ 127.0, 54.33844556726542 ], "wc_limitations_avg": [ 41.0, 19.096247449870006 ], "wc_correctness_avg": [ 18.666666666666668, 15.2825245151302 ], "wc_clarity_avg": [ 13.666666666666666, 5.312459150169743 ], "wc_relation_to_prior_work_avg": [ 9.666666666666666, 3.0912061651652345 ], "wc_documentation_avg": [ 20.333333333333332, 11.671427600007732 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 366.3333333333333, 89.75646061550233 ], "wc_reply_reviewers_avg": [ 26.333333333333332, 7.039570693980958 ], "wc_reply_authors_avg": [ 320.3333333333333, 249.97377640242365 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.6666666666666665, 1.247219128924647 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12682872980773876135&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "microsoft.com;research.microsoft.com;microsoft.com", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Microsoft", "aff_unique_dep": "Microsoft Corporation", "aff_unique_url": "https://www.microsoft.com", "aff_unique_abbr": "Microsoft", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;China" }, { "title": "The Minimax Rate of HSIC Estimation for Translation-Invariant Kernels", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95630", "id": "KyNO0n1bJ9", "proceeding": "", "pdf": "https://openreview.net/pdf?id=KyNO0n1bJ9", "openreview": "https://openreview.net/forum?id=KyNO0n1bJ9", "poster": "/media/PosterPDFs/NeurIPS%202024/95630.png?t=1731239128.6265826", "project": "", "author_site": "Florian Kalinke, Zoltan Szabo", "tldr": "", "abstract": "Kernel techniques are among the most influential approaches in data science and statistics. Under mild conditions, the reproducing kernel Hilbert space associated to a kernel is capable of encoding the independence of $M\\ge2$ random variables. Probably the most widespread independence measure relying on kernels is the so-called Hilbert-Schmidt independence criterion (HSIC; also referred to as distance covariance in the statistics literature). Despite various existing HSIC estimators designed since its introduction close to two decades ago, the fundamental question of the rate at which HSIC can be estimated is still open. In this work, we prove that the minimax optimal rate of HSIC estimation on $\\mathbb{R}^d$ for Borel measures containing the Gaussians with continuous bounded translation-invariant characteristic kernels is $\\mathcal{O}\\left(n^{-1/2}\\right)$. Specifically, our result implies the optimality in the minimax sense of many of the most-frequently used estimators (including the U-statistic, the V-statistic, and the Nystr\u00f6m-based one) on $\\mathbb{R}^d$.", "keywords": "kernel method;Hilbert-Schmidt independence criterion;minimax rate;translation-invariant kernels", "primary_area": "learning_theory", "supplementary_material": "", "author": "Florian Kalinke;Zolt\u00e1n Szab\u00f3", "authorids": "~Florian_Kalinke1;~Zolt\u00e1n_Szab\u00f31", "gender": ";M", "homepage": "https://flopska.com/;https://zoltansz.github.io/", "dblp": "278/7911;73/2909-1", "google_scholar": "KYWxZHEAAAAJ;aFW7v7EAAAAJ", "orcid": "0000-0002-0443-6288;0000-0001-6183-7603", "linkedin": ";zoltan-szabo-a8972159", "or_profile": "~Florian_Kalinke1;~Zoltan_Szabo1", "aff": "Karlsruhe Institut f\u00fcr Technologie;London School of Economics", "aff_domain": "kit.edu;lse.ac.uk", "position": "PhD student;Professor of Data Science", "bibtex": "@inproceedings{\nkalinke2024the,\ntitle={The Minimax Rate of {HSIC} Estimation for Translation-Invariant Kernels},\nauthor={Florian Kalinke and Zolt{\\'a}n Szab{\\'o}},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=KyNO0n1bJ9}\n}", "github": "", "reviewers": "tUTX;6YpR;g9Na", "pdf_size": 499536, "rating": "5;6;7", "confidence": "3;2;4", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;2;4", "wc_summary": "31;82;88", "wc_strengths": "117;49;93", "wc_weaknesses": "43;57;114", "wc_questions": "41;1;2", "wc_limitations": "1;1;1", "wc_review": "233;190;298", "wc_reply_reviewers": "8;0;8", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;0;1", "reply_authors": "1;1;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 67.0, 25.573423705088842 ], "wc_strengths_avg": [ 86.33333333333333, 28.158282775923833 ], "wc_weaknesses_avg": [ 71.33333333333333, 30.706495874470743 ], "wc_questions_avg": [ 14.666666666666666, 18.624953392931992 ], "wc_limitations_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 240.33333333333334, 44.39469437769438 ], "wc_reply_reviewers_avg": [ 5.333333333333333, 3.7712361663282534 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7659445184290189543&as_sdt=800005&sciodt=0,15&hl=en", "gs_version_total": 7, "email": "kit.edu;lse.ac.uk", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Karlsruhe Institute of Technology;London School of Economics", "aff_unique_dep": ";", "aff_unique_url": "https://www.kit.edu;https://www.lse.ac.uk", "aff_unique_abbr": "KIT;LSE", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Germany;United Kingdom" }, { "title": "Injecting Undetectable Backdoors in Obfuscated Neural Networks and Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95629", "id": "KyVBzkConO", "proceeding": "", "pdf": "https://openreview.net/pdf?id=KyVBzkConO", "openreview": "https://openreview.net/forum?id=KyVBzkConO", "poster": "", "project": "", "author_site": "Alkis Kalavasis, Amin Karbasi, Argyris Oikonomou, Katerina Sotiraki, Grigoris Velegkas, Manolis Zampetakis", "tldr": "", "abstract": "As ML models become increasingly complex and integral to high-stakes domains such as finance and healthcare, they also become more susceptible to sophisticated adversarial attacks. We investigate the threat posed by undetectable backdoors, as defined in Goldwasser et al. [2022], in models developed by insidious external expert firms. When such backdoors exist, they allow the designer of the model to sell information on how to slightly perturb their input to change the outcome of the model. We develop a general strategy to plant backdoors to obfuscated neural networks, that satisfy the security properties of the celebrated notion of indistinguishability obfuscation. Applying obfuscation before releasing neural networks is a strategy that is well motivated to protect sensitive information of the external expert firm. Our method to plant backdoors ensures that even if the weights and architecture of the obfuscated model are accessible, the existence of\nthe backdoor is still undetectable. Finally, we introduce the notion of undetectable backdoors to language models and extend our neural network backdoor attacks to such models based on the existence of steganographic functions.", "keywords": "backdoors;white-box undetectable;obfuscation;theory", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Alkis Kalavasis;Amin Karbasi;Argyris Oikonomou;Katerina Sotiraki;Grigoris Velegkas;Manolis Zampetakis", "authorids": "~Alkis_Kalavasis1;~Amin_Karbasi3;~Argyris_Oikonomou1;~Katerina_Sotiraki1;~Grigoris_Velegkas1;~Manolis_Zampetakis2", "gender": "M;;;M;M;M", "homepage": "https://alkisk.github.io/;http://aroikonomou.github.io;https://sotiraki.com/;;https://mzampet.com/;http://seas.yale.edu/faculty-research/faculty-directory/amin-karbasi", "dblp": "269/9425;254/2082;;254/1885;;49/7411", "google_scholar": "NgVIFJwAAAAJ;;;Ty1kgP0AAAAJ;;https://scholar.google.com.tw/citations?user=VusVB38AAAAJ", "orcid": ";0000-0002-6456-0109;;;;", "linkedin": ";;;;;", "or_profile": "~Alkis_Kalavasis1;~Argyris_Oikonomou1;~Katerina_Sotiraki1;~Grigoris_Velegkas1;~Manolis_Zampetakis2;~amin_karbasi1", "aff": "Yale University;Meta;Yale University;Yale University;Yale University;Google", "aff_domain": "yale.edu;meta.com;yale.edu;yale.edu;yale.edu;google.com", "position": "Postdoc;Intern;Assistant Professor;PhD student;Assistant Professor;Researcher", "bibtex": "@inproceedings{\nkalavasis2024injecting,\ntitle={Injecting Undetectable Backdoors in Obfuscated Neural Networks and Language Models},\nauthor={Alkis Kalavasis and Amin Karbasi and Argyris Oikonomou and Katerina Sotiraki and Grigoris Velegkas and Manolis Zampetakis},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=KyVBzkConO}\n}", "github": "", "reviewers": "iKKq;NKsd;JK5n;paZi", "pdf_size": 554429, "rating": "5;5;6;8", "confidence": "4;3;2;4", "soundness": "2;4;3;4", "novelty": "2;2;3;4", "presentation": "2;4;2;4", "wc_summary": "66;54;14;53", "wc_strengths": "19;24;32;117", "wc_weaknesses": "310;38;38;54", "wc_questions": "71;117;1;372", "wc_limitations": "9;26;1;22", "wc_review": "475;259;86;618", "wc_reply_reviewers": "453;157;11;49", "wc_reply_authors": "2278;0;0;963", "reply_reviewers": "3;1;1;1", "reply_authors": "4;1;1;3", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 1.0 ], "wc_summary_avg": [ 46.75, 19.587942719948924 ], "wc_strengths_avg": [ 48.0, 40.10610926031095 ], "wc_weaknesses_avg": [ 110.0, 115.65465835840769 ], "wc_questions_avg": [ 140.25, 140.0310233483995 ], "wc_limitations_avg": [ 14.5, 10.012492197250394 ], "wc_review_avg": [ 359.5, 203.1409609113829 ], "wc_reply_reviewers_avg": [ 167.5, 173.31690627287344 ], "wc_reply_authors_avg": [ 810.25, 934.1617566032127 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.25, 1.299038105676658 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.24618298195866545, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10777830221457342593&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "yale.edu;meta.com;yale.edu;yale.edu;yale.edu;google.com", "author_num": 6, "aff_unique_index": "0;1;0;0;0;2", "aff_unique_norm": "Yale University;Meta;Google", "aff_unique_dep": ";Meta Platforms, Inc.;Google", "aff_unique_url": "https://www.yale.edu;https://meta.com;https://www.google.com", "aff_unique_abbr": "Yale;Meta;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "A Structure-Aware Framework for Learning Device Placements on Computation Graphs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95628", "id": "Kzno1r3Xef", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Kzno1r3Xef", "openreview": "https://openreview.net/forum?id=Kzno1r3Xef", "poster": "/media/PosterPDFs/NeurIPS%202024/95628.png?t=1731374352.364087", "project": "", "author_site": "Shukai Duan, Heng Ping, Nikos Kanakaris, Xiongye Xiao, Panagiotis Kyriakis, Nesreen K. Ahmed, Peiyu Zhang, Guixiang Ma, Mihai Capot\u0103, Shahin Nazarian, Theodore Willke, Paul Bogdan", "tldr": "", "abstract": "Computation graphs are Directed Acyclic Graphs (DAGs) where the nodes correspond to mathematical operations and are used widely as abstractions in optimizations of neural networks. The device placement problem aims to identify optimal allocations of those nodes to a set of (potentially heterogeneous) devices. Existing approaches rely on two types of architectures known as grouper-placer and encoder-placer, respectively. In this work, we bridge the gap between encoder-placer and grouper-placer techniques and propose a novel framework for the task of device placement, relying on smaller computation graphs extracted from the OpenVINO toolkit. The framework consists of five steps, including graph coarsening, node representation learning and policy optimization. It facilitates end-to-end training and takes into account the DAG nature of the computation graphs. We also propose a model variant, inspired by graph parsing networks and complex network analysis, enabling graph representation learning and jointed, personalized graph partitioning, using an unspecified number of groups. To train the entire framework, we use reinforcement learning using the execution time of the placement as a reward. We demonstrate the flexibility and effectiveness of our approach through multiple experiments with three benchmark models, namely Inception-V3, ResNet, and BERT. The robustness of the proposed framework is also highlighted through an ablation study. The suggested placements improve the inference speed for the benchmark models by up to $58.2\\%$ over CPU execution and by up to $60.24\\%$ compared to other commonly used baselines.", "keywords": "device placement;heterogeneous computing;computation graphs;graph pooling", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "/attachment/c06523f4ced96cab83e5b7ffb889a77ae667f59f.zip", "author": "Shukai Duan;Heng Ping;Nikos Kanakaris;Xiongye Xiao;Panagiotis Kyriakis;Nesreen K. Ahmed;Peiyu Zhang;Guixiang Ma;Mihai Capot\u0103;Shahin Nazarian;Theodore L. Willke;Paul Bogdan", "authorids": "~Shukai_Duan2;~Heng_Ping1;~Nikos_Kanakaris1;~Xiongye_Xiao1;~Panagiotis_Kyriakis1;~Nesreen_K._Ahmed2;~Peiyu_Zhang1;~Guixiang_Ma2;~Mihai_Capot\u01031;~Shahin_Nazarian1;~Theodore_L._Willke1;~Paul_Bogdan1", "gender": "M;M;M;M;M;M;;;M;M;M;F", "homepage": "https://www.linkedin.com/in/sk-duan-224271283/;https://cps.usc.edu/studs.html;https://nkanak.github.io/;;;https://zpykillcc.github.io/;https://www.linkedin.com/in/guixiangma/;https://orcid.org/0000-0002-7323-2393;http://sportlab.usc.edu/~shahin/;;https://cps.usc.edu/;http://nesreenahmed.com", "dblp": ";276/4292;160/5308;301/0208;225/3791;79/;66/8585;04/8524;;40/3641;05/5539;33/11518", "google_scholar": "8AxaJXoAAAAJ;7c8rCeQAAAAJ;rX-SfF8AAAAJ;AvIxA64AAAAJ;https://scholar.google.com/citations?hl=en;;2K81fLYAAAAJ;;22r6J9IAAAAJ;l0SYN0EAAAAJ;Xw_v8-gAAAAJ;AFV0nLcAAAAJ", "orcid": "0009-0002-3937-9436;0009-0008-8127-3401;0000-0001-9352-5807;0000-0002-3181-7166;;0009-0005-1290-5274;;0000-0002-7323-2393;;;0000-0003-2118-0816;", "linkedin": "sk-duan-224271283/;;nkanak/;;;;;;shahin-nazarian;ted-willke/;paul-bogdan-4b098a6/;nkahmed/", "or_profile": "~Shukai_Duan2;~Heng_Ping1;~Nikos_Kanakaris1;~Xiongye_Xiao1;~Panagiotis_Kyriakis1;~Peiyu_Zhang1;~Guixiang_Ma2;~Mihai_Capot\u01031;~Shahin_Nazarian1;~Theodore_L._Willke1;~Paul_Bogdan1;~Nesreen_Ahmed1", "aff": "University of Southern California;University of Southern California;University of Southern California;University of Southern California;;Huazhong University of Science and Technology;Intel;Intel Labs;University of Southern California;Intel Corporation;University of Southern California;Intel AI Research", "aff_domain": "usc.edu;usc.edu;usc.edu;usc.edu;;hust.edu.cn;intel.com;intel.com;usc.edu;intel.com;usc.edu;intel.com", "position": "PhD student;PhD student;Postdoc;PhD student;;Undergrad student;Research Scientist;Researcher;Full Professor;Senior Principal Engineer;Jack Munushian Early Career Chair associate professor;Principal Researcher", "bibtex": "@inproceedings{\nduan2024a,\ntitle={A Structure-Aware Framework for Learning Device Placements on Computation Graphs},\nauthor={Shukai Duan and Heng Ping and Nikos Kanakaris and Xiongye Xiao and Panagiotis Kyriakis and Nesreen K. Ahmed and Peiyu Zhang and Guixiang Ma and Mihai Capot{\\u{a}} and Shahin Nazarian and Theodore L. Willke and Paul Bogdan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Kzno1r3Xef}\n}", "github": "", "reviewers": "orCV;RMDa;wsma;PWmk", "pdf_size": 892249, "rating": "4;6;6;7", "confidence": "3;3;4;3", "soundness": "2;3;4;4", "novelty": "2;3;3;4", "presentation": "2;3;3;4", "wc_summary": "59;66;98;74", "wc_strengths": "29;9;86;48", "wc_weaknesses": "168;107;163;1", "wc_questions": "115;2;6;36", "wc_limitations": "8;15;28;15", "wc_review": "379;199;381;174", "wc_reply_reviewers": "32;10;34;14", "wc_reply_authors": "36;18;35;22", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 74.25, 14.703315952532613 ], "wc_strengths_avg": [ 43.0, 28.398943642325854 ], "wc_weaknesses_avg": [ 109.75, 67.1988653178013 ], "wc_questions_avg": [ 39.75, 45.38928838393482 ], "wc_limitations_avg": [ 16.5, 7.22841614740048 ], "wc_review_avg": [ 283.25, 97.1554810599999 ], "wc_reply_reviewers_avg": [ 22.5, 10.618380290797651 ], "wc_reply_authors_avg": [ 27.75, 7.8859051477937525 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": 0.13245323570650439, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6322161653050844731&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "usc.edu;usc.edu;usc.edu;usc.edu;;hust.edu.cn;intel.com;intel.com;usc.edu;intel.com;usc.edu;intel.com", "author_num": 12, "aff_unique_index": "0;0;0;0;1;2;2;0;2;0;2", "aff_unique_norm": "University of Southern California;Huazhong University of Science and Technology;Intel", "aff_unique_dep": ";;Intel Corporation", "aff_unique_url": "https://www.usc.edu;http://www.hust.edu.cn;https://www.intel.com", "aff_unique_abbr": "USC;HUST;Intel", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;0;0;0;1;0;0;0;0;0;0", "aff_country_unique": "United States;China" }, { "title": "Benchmarking LLMs via Uncertainty Quantification", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97746", "id": "L0oSfTroNE", "proceeding": "", "pdf": "https://openreview.net/pdf?id=L0oSfTroNE", "openreview": "https://openreview.net/forum?id=L0oSfTroNE", "poster": "/media/PosterPDFs/NeurIPS%202024/97746.png?t=1731225335.7910569", "project": "", "author_site": "Fanghua Ye, Mingming Yang, Jianhui Pang, Longyue Wang, Derek Wong, Emine Yilmaz, Shuming Shi, Zhaopeng Tu", "tldr": "", "abstract": "The proliferation of open-source Large Language Models (LLMs) from various institutions has highlighted the urgent need for comprehensive evaluation methods. However, current evaluation platforms, such as the widely recognized HuggingFace open LLM leaderboard, neglect a crucial aspect -- uncertainty, which is vital for thoroughly assessing LLMs. To bridge this gap, we introduce a new benchmarking approach for LLMs that integrates uncertainty quantification. Our examination involves nine LLMs (LLM series) spanning five representative natural language processing tasks. Our findings reveal that: I) LLMs with higher accuracy may exhibit lower certainty; II) Larger-scale LLMs may display greater uncertainty compared to their smaller counterparts; and III) Instruction-finetuning tends to increase the uncertainty of LLMs. These results underscore the significance of incorporating uncertainty in the evaluation of LLMs. Our implementation is available at https://github.com/smartyfh/LLM-Uncertainty-Bench.", "keywords": "LLMs;Uncertainty Quantification;Benchmarking", "primary_area": "", "supplementary_material": "/attachment/0987a5aa89a8028183e1acb4d22482bf0bdeabf8.zip", "author": "Fanghua Ye;Mingming Yang;Jianhui Pang;Longyue Wang;Derek F. Wong;Emine Yilmaz;Shuming Shi;Zhaopeng Tu", "authorids": "~Fanghua_Ye1;~Mingming_Yang1;~Jianhui_Pang1;~Longyue_Wang3;~Derek_F._Wong1;~Emine_Yilmaz1;~Shuming_Shi1;~Zhaopeng_Tu1", "gender": "M;M;M;M;M;F;M;M", "homepage": "https://www.fanghuaye.xyz/;;;http://longyuewang.com/;https://www.fst.um.edu.mo/personal/derek-wong/;https://sites.google.com/site/emineyilmaz/;;http://www.zptu.net", "dblp": "203/0957;https://dblp.uni-trier.de/pid/29/3866;178/4471;127/3421;123/0533;36/3270;s/ShumingShi;71/9281", "google_scholar": "UXN7iUsAAAAJ;wh6aMMcAAAAJ;sOH2i3UAAAAJ;r1ctChkAAAAJ;KjQBe8oAAAAJ;https://scholar.google.com.tw/citations?user=ocmAN4YAAAAJ;Lg31AKMAAAAJ;IvE2zRgAAAAJ", "orcid": ";0000-0001-9896-3232;;0000-0002-9062-6183;0000-0002-5307-7322;;;", "linkedin": "fanghua-ye-81084587/;;;vincentwang0229/;derek-wong-6209445/;;;tuzhaopeng", "or_profile": "~Fanghua_Ye1;~Mingming_Yang1;~Jianhui_Pang1;~Longyue_Wang3;~Derek_F._Wong1;~Emine_Yilmaz1;~Shuming_Shi1;~Zhaopeng_Tu1", "aff": "University College London;Tencent AI Lab;University of Macau;Tencent AI Lab;University of Macau;Department of Computer Science, University College London;Tencent AI Lab;Tencent AI Lab", "aff_domain": "ucl.ac.uk;tencent.com;um.mo;tencent.com;um.edu.mo;cs.ucl.ac.uk;tencent.com;tencent.com", "position": "PhD student;Researcher;PhD student;Senior Researcher;Associate Professor;Full Professor;Principal Researcher;Principal Researcher", "bibtex": "@inproceedings{\nye2024benchmarking,\ntitle={Benchmarking {LLM}s via Uncertainty Quantification},\nauthor={Fanghua Ye and Mingming Yang and Jianhui Pang and Longyue Wang and Derek F. Wong and Emine Yilmaz and Shuming Shi and Zhaopeng Tu},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=L0oSfTroNE}\n}", "github": "", "reviewers": "MsFo;zVYp;XKVJ;exNK", "pdf_size": 1875243, "rating": "6;7;7;7", "confidence": "4;3;5;4", "wc_summary_and_contributions": "77;68;79;13", "wc_strengths": "22;20;94;31", "wc_improvement": "47;15;539;21", "wc_limitations": "20;47;39;58", "wc_correctness": "22;27;13;20", "wc_clarity": "6;6;11;1", "wc_relation_to_prior_work": "2;31;54;20", "wc_documentation": "4;16;7;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "201;231;837;166", "wc_reply_reviewers": "330;0;47;4", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "4;0;1;1", "reply_authors": "6;2;4;2", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 59.25, 27.021981792607292 ], "wc_strengths_avg": [ 41.75, 30.44975369358511 ], "wc_improvement_avg": [ 155.5, 221.7402759987459 ], "wc_limitations_avg": [ 41.0, 13.874436925511608 ], "wc_correctness_avg": [ 20.5, 5.024937810560445 ], "wc_clarity_avg": [ 6.0, 3.5355339059327378 ], "wc_relation_to_prior_work_avg": [ 26.75, 18.833148966649205 ], "wc_documentation_avg": [ 7.0, 5.612486080160912 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 358.75, 277.07433569351025 ], "wc_reply_reviewers_avg": [ 95.25, 136.77970426931037 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.5, 1.5 ], "reply_authors_avg": [ 3.5, 1.6583123951777 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 67, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9279554149955282111&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "ucl.ac.uk;tencent.com;um.mo;tencent.com;um.edu.mo;cs.ucl.ac.uk;tencent.com;tencent.com", "author_num": 8, "aff_unique_index": "0;1;2;1;2;0;1;1", "aff_unique_norm": "University College London;Tencent;University of Macau", "aff_unique_dep": ";Tencent AI Lab;", "aff_unique_url": "https://www.ucl.ac.uk;https://ai.tencent.com;https://www.um.edu.mo", "aff_unique_abbr": "UCL;Tencent AI Lab;UM", "aff_campus_unique_index": "1;1;2", "aff_campus_unique": ";Macau SAR;London", "aff_country_unique_index": "0;1;1;1;1;0;1;1", "aff_country_unique": "United Kingdom;China" }, { "title": "CondTSF: One-line Plugin of Dataset Condensation for Time Series Forecasting", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95627", "id": "L1jajNWON5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=L1jajNWON5", "openreview": "https://openreview.net/forum?id=L1jajNWON5", "poster": "/media/PosterPDFs/NeurIPS%202024/95627.png?t=1729698678.0886054", "project": "", "author_site": "Jianrong Ding, Zhanyu Liu, Guanjie Zheng, Haiming Jin, Linghe Kong", "tldr": "", "abstract": "\\textit{Dataset condensation} is a newborn technique that generates a small dataset that can be used in training deep neural networks (DNNs) to lower storage and training costs. The objective of dataset condensation is to ensure that the model trained with the synthetic dataset can perform comparably to the model trained with full datasets. However, existing methods predominantly concentrate on classification tasks, posing challenges in their adaptation to time series forecasting (TS-forecasting). This challenge arises from disparities in the evaluation of synthetic data. In classification, the synthetic data is considered well-distilled if the model trained with the full dataset and the model trained with the synthetic dataset yield identical labels for the same input, regardless of variations in output logits distribution. Conversely, in TS-forecasting, the effectiveness of synthetic data distillation is determined by the distance between predictions of the two models. The synthetic data is deemed well-distilled only when all data points within the predictions are similar. Consequently, TS-forecasting has a more rigorous evaluation methodology compared to classification. To mitigate this gap, we theoretically analyze the optimization objective of dataset condensation for TS-forecasting and propose a new one-line plugin of dataset condensation for TS-forecasting designated as Dataset \\textbf{Cond}ensation for \\textbf{T}ime \\textbf{S}eries \\textbf{F}orecasting (CondTSF) based on our analysis. Plugging CondTSF into previous dataset condensation methods facilitates a reduction in the distance between the predictions of the model trained with the full dataset and the model trained with the synthetic dataset, thereby enhancing performance. We conduct extensive experiments on eight commonly used time series datasets. CondTSF consistently improves the performance of all previous dataset condensation methods across all datasets, particularly at low condensing ratios.", "keywords": "Dataset condensation;Time series forecasting", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/2d527cc7a9bf4381c2e6e74451f5c936fdb63ec2.zip", "author": "Jianrong Ding;Zhanyu Liu;Guanjie Zheng;Haiming Jin;Linghe Kong", "authorids": "~Jianrong_Ding1;~Zhanyu_Liu1;~Guanjie_Zheng1;~Haiming_Jin2;~Linghe_Kong1", "gender": "M;M;M;M;M", "homepage": "https://rafadd.github.io/;https://zhyliu00.github.io/;http://jhc.sjtu.edu.cn/~gjzheng/;https://www.cs.sjtu.edu.cn/~jinhaiming/;https://www.cs.sjtu.edu.cn/~linghe.kong/", "dblp": ";02/10777;204/3356;14/3177;23/7909", "google_scholar": "O-xoSwUAAAAJ;VAnWz0IAAAAJ;jJpqDQIAAAAJ;5Sn9JVoAAAAJ;https://scholar.google.com.tw/citations?user=-wm2X-8AAAAJ", "orcid": ";0000-0001-6207-5460;;0000-0001-5178-7198;0000-0001-9266-3044", "linkedin": ";;;;", "or_profile": "~Jianrong_Ding1;~Zhanyu_Liu1;~Guanjie_Zheng1;~Haiming_Jin2;~Linghe_Kong1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "position": "Undergrad student;PhD student;Assistant Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nding2024condtsf,\ntitle={Cond{TSF}: One-line Plugin of Dataset Condensation for Time Series Forecasting},\nauthor={Jianrong Ding and Zhanyu Liu and Guanjie Zheng and Haiming Jin and Linghe Kong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=L1jajNWON5}\n}", "github": "", "reviewers": "DVXT;TXkn;s2gY;ivQc", "pdf_size": 4128880, "rating": "4;4;5;7", "confidence": "3;3;3;3", "soundness": "2;3;2;3", "novelty": "2;3;2;3", "presentation": "3;1;3;3", "wc_summary": "92;92;51;82", "wc_strengths": "14;30;28;48", "wc_weaknesses": "148;191;406;8", "wc_questions": "6;158;23;73", "wc_limitations": "6;5;1;15", "wc_review": "266;476;509;226", "wc_reply_reviewers": "15;29;32;0", "wc_reply_authors": "68;206;40;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 79.25, 16.813313177360374 ], "wc_strengths_avg": [ 30.0, 12.083045973594572 ], "wc_weaknesses_avg": [ 188.25, 142.76970091724644 ], "wc_questions_avg": [ 65.0, 59.07198997833068 ], "wc_limitations_avg": [ 6.75, 5.11737237261468 ], "wc_review_avg": [ 369.25, 124.6061294640035 ], "wc_reply_reviewers_avg": [ 19.0, 12.708265027138834 ], "wc_reply_authors_avg": [ 78.5, 77.4774160642958 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12152590914359270046&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "ACES: Generating a Diversity of Challenging Programming Puzzles with Autotelic Generative Models", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95626", "id": "L1mMK39Z7P", "proceeding": "", "pdf": "https://openreview.net/pdf?id=L1mMK39Z7P", "openreview": "https://openreview.net/forum?id=L1mMK39Z7P", "poster": "", "project": "", "author_site": "Julien Pourcel, C\u00e9dric Colas, Gaia Molinaro, Pierre-Yves Oudeyer, Laetitia Teodorescu", "tldr": "", "abstract": "The ability to invent novel and interesting problems is a remarkable feature of human intelligence that drives innovation, art, and science. We propose a method that aims to automate this process by harnessing the power of state-of-the-art generative models to produce a diversity of challenging yet solvable problems, here in the context of Python programming puzzles. Inspired by the intrinsically motivated literature, Autotelic CodE Search (ACES) jointly optimizes for the diversity and difficulty of generated problems. We represent problems in a space of LLM-generated semantic descriptors describing the programming skills required to solve them (e.g. string manipulation, dynamic programming, etc.) and measure their difficulty empirically as a linearly decreasing function of the success rate of \\textit{Llama-3-70B}, a state-of-the-art LLM problem solver. ACES iteratively prompts a large language model to generate difficult problems achieving a diversity of target semantic descriptors (goal-directed exploration) using previously generated problems as in-context examples. ACES generates problems that are more diverse and more challenging than problems produced by baseline methods and three times more challenging than problems found in existing Python programming benchmarks on average across 11 state-of-the-art code LLMs.", "keywords": "diversity search;code generation;quality-diversity;open-endedness;generative models;evolutionary algorithms;code models", "primary_area": "generative_models", "supplementary_material": "", "author": "Julien Pourcel;C\u00e9dric Colas;Gaia Molinaro;Pierre-Yves Oudeyer;Laetitia Teodorescu", "authorids": "~Julien_Pourcel1;~C\u00e9dric_Colas1;~Gaia_Molinaro1;~Pierre-Yves_Oudeyer1;~Laetitia_Teodorescu1", "gender": "M;M;F;M;F", "homepage": "https://www.linkedin.com/in/julien-pourcel-b527ab150/;https://cedriccolas.com;https://gaiamolinaro.github.io/;http://www.pyoudeyer.com;", "dblp": ";215/3872;;33/5513;", "google_scholar": "WgMxQxoAAAAJ;https://scholar.google.fr/citations?user=VBz8gZ4AAAAJ;N3eRG0UAAAAJ;https://scholar.google.fr/citations?user=gCqGj4sAAAAJ;https://scholar.google.fr/citations?user=kgfJ_3gAAAAJ", "orcid": ";0000-0003-0212-427X;0000-0001-6145-133X;;", "linkedin": "julien-pourcel-b527ab150/;;gaiamolinaro/;pierreyvesoudeyer/;", "or_profile": "~Julien_Pourcel1;~C\u00e9dric_Colas1;~Gaia_Molinaro1;~Pierre-Yves_Oudeyer1;~Laetitia_Teodorescu1", "aff": "INRIA;Massachusetts Institute of Technology;University of California, Berkeley;Inria;INRIA", "aff_domain": "inria.fr;mit.edu;berkeley.edu;inria.fr;inria.fr", "position": "PhD student;Postdoc;PhD student;Research director;Postdoc", "bibtex": "@inproceedings{\npourcel2024aces,\ntitle={{ACES}: Generating a Diversity of Challenging Programming Puzzles with Autotelic Generative Models},\nauthor={Julien Pourcel and C{\\'e}dric Colas and Gaia Molinaro and Pierre-Yves Oudeyer and Laetitia Teodorescu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=L1mMK39Z7P}\n}", "github": "", "reviewers": "NuHw;ESVE;qqHU;LkwK", "pdf_size": 1748398, "rating": "4;6;6;6", "confidence": "4;3;4;4", "soundness": "2;4;2;3", "novelty": "2;2;3;3", "presentation": "3;3;2;2", "wc_summary": "67;79;106;39", "wc_strengths": "45;77;64;109", "wc_weaknesses": "104;136;378;173", "wc_questions": "586;160;57;150", "wc_limitations": "21;7;7;7", "wc_review": "823;459;612;478", "wc_reply_reviewers": "605;69;272;45", "wc_reply_authors": "678;0;811;0", "reply_reviewers": "2;1;2;1", "reply_authors": "2;1;3;1", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 72.75, 24.06631463269771 ], "wc_strengths_avg": [ 73.75, 23.31710745354149 ], "wc_weaknesses_avg": [ 197.75, 106.89334637852816 ], "wc_questions_avg": [ 238.25, 204.7515262458378 ], "wc_limitations_avg": [ 10.5, 6.06217782649107 ], "wc_review_avg": [ 593.0, 145.29452845857617 ], "wc_reply_reviewers_avg": [ 247.75, 224.3182727733075 ], "wc_reply_authors_avg": [ 372.25, 375.20819220800604 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:9yVp_jFyLG8J:scholar.google.com/&scioq=ACES:+Generating+a+Diversity+of+Challenging+Programming+Puzzles+with+Autotelic+Generative+Models&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": "inria.fr;mit.edu;berkeley.edu;inria.fr;inria.fr", "author_num": 5, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "INRIA;Massachusetts Institute of Technology;University of California, Berkeley", "aff_unique_dep": ";;", "aff_unique_url": "https://www.inria.fr;https://web.mit.edu;https://www.berkeley.edu", "aff_unique_abbr": "INRIA;MIT;UC Berkeley", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;1;1;0;0", "aff_country_unique": "France;United States" }, { "title": "Adversarial Schr\u00f6dinger Bridge Matching", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95625", "id": "L3Knnigicu", "proceeding": "", "pdf": "https://openreview.net/pdf?id=L3Knnigicu", "openreview": "https://openreview.net/forum?id=L3Knnigicu", "poster": "", "project": "", "author_site": "Nikita Gushchin, Daniil Selikhanovych, Sergei Kholkin, Evgeny Burnaev, Aleksandr Korotin", "tldr": "", "abstract": "The Schr\u00f6dinger Bridge (SB) problem offers a powerful framework for combining optimal transport and diffusion models. A promising recent approach to solve the SB problem is the Iterative Markovian Fitting (IMF) procedure, which alternates between Markovian and reciprocal projections of continuous-time stochastic processes. However, the model built by the IMF procedure has a long inference time due to using many steps of numerical solvers for stochastic differential equations. To address this limitation, we propose a novel Discrete-time IMF (D-IMF) procedure in which learning of stochastic processes is replaced by learning just a few transition probabilities in discrete time. Its great advantage is that in practice it can be naturally implemented using the Denoising Diffusion GAN (DD-GAN), an already well-established adversarial generative modeling technique. We show that our D-IMF procedure can provide the same quality of unpaired domain translation as the IMF, using only several generation steps instead of hundreds.", "keywords": "Schr\u00f6dinger Bridge;Bridge Matching;GANs;Optimal Transport;Entropy regularized OT;Unpaired Learning", "primary_area": "generative_models", "supplementary_material": "/attachment/db1ca40eca0f830b53b2138098718b41e4046b9a.zip", "author": "Nikita Gushchin;Daniil Selikhanovych;Sergei Kholkin;Evgeny Burnaev;Alexander Korotin", "authorids": "~Nikita_Gushchin1;~Daniil_Selikhanovych1;~Sergei_Kholkin1;~Evgeny_Burnaev1;~Alexander_Korotin2", "gender": "M;M;M;M;M", "homepage": ";https://github.com/Daniil-Selikhanovych;http://GitHub.com/skholkin;http://faculty.skoltech.ru/people/evgenyburnaev;https://akorotin.netlify.app", "dblp": "332/1999;243/8927;367/9264;144/7845;209/9906", "google_scholar": "UaRTbNoAAAAJ;N9bInaYAAAAJ;KwhztSMAAAAJ;https://scholar.google.ru/citations?user=pCRdcOwAAAAJ;https://scholar.google.ru/citations?user=1rIIvjAAAAAJ", "orcid": ";0000-0001-8789-5924;;0000-0001-8424-0690;0000-0003-4286-925X", "linkedin": "nikita-gushchin-937522145/;;;;", "or_profile": "~Nikita_Gushchin1;~Daniil_Selikhanovych1;~Sergei_Kholkin1;~Evgeny_Burnaev1;~Alexander_Andreevich_Korotin1", "aff": "Skolkovo Institute of Science and Technology;Skolkovo Institute of Science and Technology;Skolkovo Institute of Science and Technology;Skolkovo Institute of Science and Technology;Skolkovo Institute of Science and Technology", "aff_domain": "skoltech.ru;skoltech.ru;skoltech.ru;skoltech.ru;skoltech.ru", "position": "PhD student;PhD student;MS student;Full Professor;Head of Research Group", "bibtex": "@inproceedings{\ngushchin2024adversarial,\ntitle={Adversarial Schr\\\"odinger Bridge Matching},\nauthor={Nikita Gushchin and Daniil Selikhanovych and Sergei Kholkin and Evgeny Burnaev and Alexander Korotin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=L3Knnigicu}\n}", "github": "", "reviewers": "LCNa;RgPk;xtGA;xGf5", "pdf_size": 27563730, "rating": "6;6;6;7", "confidence": "1;3;4;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "86;64;167;75", "wc_strengths": "69;54;98;67", "wc_weaknesses": "131;203;340;95", "wc_questions": "35;49;129;24", "wc_limitations": "6;8;114;6", "wc_review": "327;378;848;267", "wc_reply_reviewers": "29;20;68;13", "wc_reply_authors": "58;0;58;58", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;2;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.0, 1.224744871391589 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 98.0, 40.58940748520481 ], "wc_strengths_avg": [ 72.0, 16.077935190813527 ], "wc_weaknesses_avg": [ 192.25, 93.74799997866621 ], "wc_questions_avg": [ 59.25, 41.233329964968874 ], "wc_limitations_avg": [ 33.5, 46.48386816950586 ], "wc_review_avg": [ 455.0, 230.27483579410062 ], "wc_reply_reviewers_avg": [ 32.5, 21.266170318136737 ], "wc_reply_authors_avg": [ 43.5, 25.11473670974872 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.4714045207910316, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4844127006406729894&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "skoltech.ru;skoltech.ru;skoltech.ru;skoltech.ru;skoltech.ru", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Skolkovo Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.skoltech.ru", "aff_unique_abbr": "Skoltech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Russian Federation" }, { "title": "Counter-Current Learning: A Biologically Plausible Dual Network Approach for Deep Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95624", "id": "L3RYBqzRmF", "proceeding": "", "pdf": "https://openreview.net/pdf?id=L3RYBqzRmF", "openreview": "https://openreview.net/forum?id=L3RYBqzRmF", "poster": "/media/PosterPDFs/NeurIPS%202024/95624.png?t=1731373468.712693", "project": "", "author_site": "Chia-Hsiang Kao, Bharath Hariharan", "tldr": "", "abstract": "Despite its widespread use in neural networks, error backpropagation has faced criticism for its lack of biological plausibility, suffering from issues such as the backward locking problem and the weight transport problem. \nThese limitations have motivated researchers to explore more biologically plausible learning algorithms that could potentially shed light on how biological neural systems adapt and learn. \nInspired by the counter-current exchange mechanisms observed in biological systems, we propose counter-current learning (CCL), a biologically plausible framework for credit assignment in deep learning. \nThis framework employs a feedforward network to process input data and a feedback network to process targets, with each network enhancing the other through anti-parallel signal propagation. \nBy leveraging the more informative signals from the bottom layer of the feedback network to guide the updates of the top layer of the feedforward network and vice versa, CCL enables the simultaneous transformation of source inputs to target outputs and the dynamic mutual influence of these transformations.\nExperimental results on MNIST, FashionMNIST, CIFAR10, CIFAR100, and STL-10 datasets using multi-layer perceptrons and convolutional neural networks demonstrate that CCL achieves comparable performance to other biological plausible algorithms while offering a more biologically realistic learning mechanism. \nFurthermore, we showcase the applicability of our approach to an autoencoder task, underscoring its potential for unsupervised representation learning.\nOur work presents a promising direction for biologically inspired and plausible learning algorithms, offering insights into the mechanisms of learning and adaptation in neural networks.", "keywords": "biologically plausible algorithm;backward locking problem;biological inspired algorithm;target propagation", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/b243edb4d78132885388326a7aef4bb330d193ba.zip", "author": "Chia Hsiang Kao;Bharath Hariharan", "authorids": "~Chia_Hsiang_Kao1;~Bharath_Hariharan3", "gender": "M;M", "homepage": "https://iandrover.github.io;http://home.bharathh.info", "dblp": "241/3791;05/8412", "google_scholar": "https://scholar.google.com.tw/citations?user=W_i9B0sAAAAJ;TpglobcAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Chia_Hsiang_Kao1;~Bharath_Hariharan2", "aff": "Cornell University;Cornell University", "aff_domain": "cornell.edu;cornell.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nkao2024countercurrent,\ntitle={Counter-Current Learning: A Biologically Plausible Dual Network Approach for Deep Learning},\nauthor={Chia Hsiang Kao and Bharath Hariharan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=L3RYBqzRmF}\n}", "github": "", "reviewers": "kHhR;myCb;ijwx;EBaT;sTDo", "pdf_size": 1980708, "rating": "4;6;6;6;6", "confidence": "4;4;2;3;4", "soundness": "2;3;3;2;2", "novelty": "2;3;3;2;2", "presentation": "2;3;3;3;3", "wc_summary": "19;35;148;129;184", "wc_strengths": "14;48;91;106;88", "wc_weaknesses": "191;41;45;264;140", "wc_questions": "158;75;108;74;388", "wc_limitations": "9;8;12;33;25", "wc_review": "391;207;404;606;825", "wc_reply_reviewers": "1299;29;278;7;47", "wc_reply_authors": "1179;29;494;0;31", "reply_reviewers": "3;1;2;1;1", "reply_authors": "5;2;3;1;2", "rating_avg": [ 5.6, 0.7999999999999999 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 103.0, 64.71784916079952 ], "wc_strengths_avg": [ 69.4, 33.70222544580699 ], "wc_weaknesses_avg": [ 136.2, 85.70974273675077 ], "wc_questions_avg": [ 160.6, 117.73801425198235 ], "wc_limitations_avg": [ 17.4, 9.891410415102591 ], "wc_review_avg": [ 486.6, 211.14412139578977 ], "wc_reply_reviewers_avg": [ 332.0, 493.2877456414258 ], "wc_reply_authors_avg": [ 346.6, 455.0211423659345 ], "reply_reviewers_avg": [ 1.6, 0.8 ], "reply_authors_avg": [ 2.6, 1.3564659966250536 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.37500000000000017, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:WvGkGyX26vgJ:scholar.google.com/&scioq=Counter-Current+Learning:+A+Biologically+Plausible+Dual+Network+Approach+for+Deep+Learning&hl=en&as_sdt=0,31", "gs_version_total": 3, "email": "cornell.edu;cornell.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Cornell University", "aff_unique_dep": "", "aff_unique_url": "https://www.cornell.edu", "aff_unique_abbr": "Cornell", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Proximal Causal Inference With Text Data", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95623", "id": "L4RwA0qyUd", "proceeding": "", "pdf": "https://openreview.net/pdf?id=L4RwA0qyUd", "openreview": "https://openreview.net/forum?id=L4RwA0qyUd", "poster": "/media/PosterPDFs/NeurIPS%202024/95623.png?t=1730845888.671237", "project": "", "author_site": "Jacob Chen, Rohit Bhattacharya, Katherine Keith", "tldr": "", "abstract": "Recent text-based causal methods attempt to mitigate confounding bias by estimating proxies of confounding variables that are partially or imperfectly measured from unstructured text data. These approaches, however, assume analysts have supervised labels of the confounders given text for a subset of instances, a constraint that is sometimes infeasible due to data privacy or annotation costs. In this work, we address settings in which an important confounding variable is completely unobserved. We propose a new causal inference method that uses two instances of pre-treatment text data, infers two proxies using two zero-shot models on the separate instances, and applies these proxies in the proximal g-formula. We prove, under certain assumptions about the instances of text and accuracy of the zero-shot predictions, that our method of inferring text-based proxies satisfies identification conditions of the proximal g-formula while other seemingly reasonable proposals do not. To address untestable assumptions associated with our method and the proximal g-formula, we further propose an odds ratio falsification heuristic that flags when to proceed with downstream effect estimation using the inferred proxies. We evaluate our method in synthetic and semi-synthetic settings---the latter with real-world clinical notes from MIMIC-III and open large language models for zero-shot prediction---and find that our method produces estimates with low bias. We believe that this text-based design of proxies allows for the use of proximal causal inference in a wider range of scenarios, particularly those for which obtaining suitable proxies from structured data is difficult.", "keywords": "proximal causal inference;directed acyclic graphs;electronic health records;causal inference", "primary_area": "causal_inference", "supplementary_material": "/attachment/4f23be0fad635d8039701336e6745c78101ea873.zip", "author": "Jacob M. Chen;Rohit Bhattacharya;Katherine A. Keith", "authorids": "~Jacob_M._Chen1;~Rohit_Bhattacharya1;~Katherine_A._Keith1", "gender": "M;F;", "homepage": "http://www.cs.jhu.edu/~rohit/;https://kakeith.github.io/;https://jacobmchen.github.io/", "dblp": "244/2394;203/8348;354/4788", "google_scholar": "KIvKEtcAAAAJ;;UljRWXsAAAAJ", "orcid": ";;", "linkedin": ";;jacobmchen/", "or_profile": "~Rohit_Bhattacharya1;~Katherine_A._Keith1;~Jacob_Morris_Chen1", "aff": "Williams College;Williams College;Johns Hopkins University", "aff_domain": "williams.edu;williams.edu;jhu.edu", "position": "Assistant Professor;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nchen2024proximal,\ntitle={Proximal Causal Inference With Text Data},\nauthor={Jacob M. Chen and Rohit Bhattacharya and Katherine A. Keith},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=L4RwA0qyUd}\n}", "github": "", "reviewers": "uHfh;iLUt;dab7", "pdf_size": 1055548, "rating": "5;6;6", "confidence": "4;4;3", "soundness": "4;3;2", "novelty": "4;3;2", "presentation": "3;3;2", "wc_summary": "79;139;103", "wc_strengths": "23;118;54", "wc_weaknesses": "109;155;101", "wc_questions": "48;63;210", "wc_limitations": "17;11;50", "wc_review": "276;486;518", "wc_reply_reviewers": "82;10;148", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 107.0, 24.657656011875904 ], "wc_strengths_avg": [ 65.0, 39.5558676641869 ], "wc_weaknesses_avg": [ 121.66666666666667, 23.79542439676633 ], "wc_questions_avg": [ 107.0, 73.08898685848642 ], "wc_limitations_avg": [ 26.0, 17.146428199482248 ], "wc_review_avg": [ 426.6666666666667, 107.33540370674015 ], "wc_reply_reviewers_avg": [ 80.0, 56.356011214421486 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12708495664564569920&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "williams.edu;williams.edu;jhu.edu", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Williams College;Johns Hopkins University", "aff_unique_dep": ";", "aff_unique_url": "https://www.williams.edu;https://www.jhu.edu", "aff_unique_abbr": "Williams;JHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Simple and Effective Masked Diffusion Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95622", "id": "L4uaAR4ArM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=L4uaAR4ArM", "openreview": "https://openreview.net/forum?id=L4uaAR4ArM", "poster": "/media/PosterPDFs/NeurIPS%202024/95622.png?t=1733802815.3554227", "project": "", "author_site": "Subham Sahoo, Marianne Arriola, Aaron Gokaslan, Edgar Marroquin, Alexander Rush, Yair Schiff, Justin Chiu, Volodymyr Kuleshov", "tldr": "", "abstract": "While diffusion models excel at generating high-quality images, prior work reports a significant performance gap between diffusion and autoregressive (AR) methods in language modeling.\nIn this work, we show that simple masked discrete diffusion is more performant than previously thought.\nWe apply an effective training recipe that improves the performance of masked diffusion models and derive a simplified, Rao-Blackwellized objective that results in additional improvements.\nOur objective has a simple form—it is a mixture of classical masked language modeling losses—and can be used to train encoder-only language models that admit efficient samplers, including ones that can generate arbitrary lengths of text semi-autoregressively like a traditional language model.\nOn language modeling benchmarks, a range of masked diffusion models trained with modern engineering practices achieves a new state-of-the-art among diffusion models, and approaches AR perplexity. We provide the code, along with a blog post and video tutorial on the project page: https://s-sahoo.com/mdlm", "keywords": "Text Diffusion;Generative Models", "primary_area": "generative_models", "supplementary_material": "", "author": "Subham Sekhar Sahoo;Marianne Arriola;Yair Schiff;Aaron Gokaslan;Edgar Mariano Marroquin;Justin T Chiu;Alexander M Rush;Volodymyr Kuleshov", "authorids": "~Subham_Sekhar_Sahoo1;~Marianne_Arriola1;~Yair_Schiff1;~Aaron_Gokaslan1;~Edgar_Mariano_Marroquin1;~Justin_T_Chiu1;~Alexander_M_Rush1;~Volodymyr_Kuleshov1", "gender": "M;;M;M;M;M;;M", "homepage": ";https://mariannearriola.github.io;https://github.com/yair-schiff;https://skylion007.github.io/;https://www.cs.cornell.edu/~emarro/;http://rush.seas.harvard.edu/;https://www.cs.cornell.edu/~kuleshov/;", "dblp": ";;;220/6816;;http://dblp.uni-trier.de/pers/hd/r/Rush:Alexander_M=;81/8612;278/2437.html", "google_scholar": "Z7DoDbAAAAAJ;;GhFrOdQAAAAJ;Mt2wyL4AAAAJ;;LIjnUGgAAAAJ;RY_t8XAAAAAJ;043r6toAAAAJ", "orcid": ";;;0000-0002-3575-2961;;0000-0002-9900-1606;;", "linkedin": "shakeh3r/;marianne-arriola;yair-schiff;aarongokaslan/;;sasha-rush-a69b6917/;;", "or_profile": "~Subham_Sekhar_Sahoo1;~Marianne_Arriola1;~Yair_Schiff1;~Aaron_Gokaslan1;~Edgar_Mariano_Marroquin1;~Alexander_M_Rush1;~Volodymyr_Kuleshov1;~Justin_Chiu1", "aff": "Department of Computer Science, Cornell University;Cornell University;Department of Computer Science, Cornell University;Cornell University;Department of Computer Science, Cornell University;School of Engineering and Applied Sciences, Harvard University;Cornell University;Cornell University", "aff_domain": "cs.cornell.edu;cornell.edu;cs.cornell.edu;cornell.edu;cs.cornell.edu;seas.harvard.edu;cornell.edu;cornell.edu", "position": "PhD student;PhD student;PhD student;PhD student;PhD student;Assistant Professor;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nsahoo2024simple,\ntitle={Simple and Effective Masked Diffusion Language Models},\nauthor={Subham Sekhar Sahoo and Marianne Arriola and Aaron Gokaslan and Edgar Mariano Marroquin and Alexander M Rush and Yair Schiff and Justin T Chiu and Volodymyr Kuleshov},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=L4uaAR4ArM}\n}", "github": "", "reviewers": "DasC;BSmY;p1DW;wXn1", "pdf_size": 824161, "rating": "5;5;5;6", "confidence": "3;3;5;4", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "3;2;3;3", "wc_summary": "52;96;160;66", "wc_strengths": "43;30;140;24", "wc_weaknesses": "76;262;185;305", "wc_questions": "33;67;287;2", "wc_limitations": "54;9;1;1", "wc_review": "258;464;773;398", "wc_reply_reviewers": "159;0;0;105", "wc_reply_authors": "1463;1354;2243;2387", "reply_reviewers": "1;0;0;3", "reply_authors": "3;3;4;7", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 93.5, 41.55418149837631 ], "wc_strengths_avg": [ 59.25, 47.12417108024289 ], "wc_weaknesses_avg": [ 207.0, 86.99712638932392 ], "wc_questions_avg": [ 97.25, 111.93832006958118 ], "wc_limitations_avg": [ 16.25, 22.038318901404434 ], "wc_review_avg": [ 473.25, 188.36848860677307 ], "wc_reply_reviewers_avg": [ 66.0, 68.70589494359272 ], "wc_reply_authors_avg": [ 1861.75, 457.72555915089555 ], "reply_reviewers_avg": [ 1.0, 1.224744871391589 ], "reply_authors_avg": [ 4.25, 1.6393596310755 ], "replies_avg": [ 33, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 74, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9188889716967718793&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "cs.cornell.edu;cornell.edu;cs.cornell.edu;cornell.edu;cs.cornell.edu;seas.harvard.edu;cornell.edu;cornell.edu", "author_num": 8, "aff_unique_index": "0;0;0;0;0;1;0;0", "aff_unique_norm": "Cornell University;Harvard University", "aff_unique_dep": "Department of Computer Science;School of Engineering and Applied Sciences", "aff_unique_url": "https://www.cornell.edu;https://www.harvard.edu", "aff_unique_abbr": "Cornell;Harvard", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "3DCoMPaT200: Language Grounded Large-Scale 3D Vision Dataset for Compositional Recognition", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97745", "id": "L4yLhMjCOR", "proceeding": "", "pdf": "https://openreview.net/pdf?id=L4yLhMjCOR", "openreview": "https://openreview.net/forum?id=L4yLhMjCOR", "poster": "/media/PosterPDFs/NeurIPS%202024/97745.png?t=1730396988.7747543", "project": "", "author_site": "Mahmoud Ahmed, Xiang Li, Arpit Prajapati, Mohamed Elhoseiny", "tldr": "", "abstract": "Understanding objects in 3D at the part level is essential for humans and robots to navigate and interact with the environment. Current datasets for part-level 3D object understanding encompass a limited range of categories. For instance, the ShapeNet-Part and PartNet datasets only include 16, and 24 object categories respectively. The 3DCoMPaT dataset, specifically designed for compositional understanding of parts and materials, contains only 42 object categories. To foster richer and fine-grained part-level 3D understanding, we introduce 3DCoMPaT200, a large-scale dataset tailored for compositional understanding of object parts and materials, with 200 object categories with approximately 5 times larger object vocabulary compared to 3DCoMPaT and almost 4 times larger part categories. Concretely, 3DCoMPaT200 significantly expands upon 3DCoMPaT, featuring 1,031 fine-grained part categories and 293 distinct material classes for compositional application to 3D object parts. Additionally, to address the complexities of compositional 3D modeling, we propose a novel task of Compositional Part Shape Retrieval using ULIP to provide a strong 3D foundational model for 3D Compositional Understanding. This method evaluates the model shape retrieval performance given one, three, or six parts described in text format. These results show that the model's performance improves with an increasing number of style compositions, highlighting the critical role of the compositional dataset. Such results underscore the dataset's effectiveness in enhancing models' capability to understand complex 3D shapes from a compositional perspective. Code and Data can be found here: https://github.com/3DCoMPaT200/3DCoMPaT200/", "keywords": "3D Parts Dataset;3D Shapes Dataset;3D Part Segmentation;3D Shape Classification;3D Shape Retrieval;3D Dataset;3D Compositional Understanding", "primary_area": "", "supplementary_material": "/attachment/ef5bf21601606ca9d4fd61539b25fb4707f2e43a.pdf", "author": "Mahmoud Ahmed;Xiang Li;Arpit Prajapati;Mohamed Elhoseiny", "authorids": "~Mahmoud_Ahmed2;~Xiang_Li18;~Arpit_Prajapati1;~Mohamed_Elhoseiny1", "gender": ";M;;M", "homepage": ";http://xiangli.ac.cn;https://polynine.com;http://www.mohamed-elhoseiny.com", "dblp": ";40/1491-46;;125/2894", "google_scholar": ";4Apl5FgAAAAJ;;iRBUTOAAAAAJ", "orcid": ";0000-0002-9946-7000;;0000-0001-9659-1551", "linkedin": ";;;mohamed-elhoseiny-8a836215/", "or_profile": "~Mahmoud_Ahmed2;~Xiang_Li18;~Arpit_Prajapati1;~Mohamed_Elhoseiny1", "aff": ";King Abdullah University of Science and Technology;;KAUST", "aff_domain": ";kaust.edu.sa;;kaust.edu.sa", "position": ";Postdoc;;Associate Professor", "bibtex": "@inproceedings{\nahmed2024dcompat,\ntitle={3{DC}o{MP}aT200: Language Grounded Large-Scale 3D Vision Dataset for Compositional Recognition},\nauthor={Mahmoud Ahmed and Xiang Li and Arpit Prajapati and Mohamed Elhoseiny},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=L4yLhMjCOR}\n}", "github": "", "reviewers": "v45i;VGWS;noGH;YkJk", "pdf_size": 5212023, "rating": "6;6;6;7", "confidence": "3;2;4;4", "wc_summary_and_contributions": "54;152;61;53", "wc_strengths": "74;35;70;86", "wc_improvement": "126;131;375;19", "wc_limitations": "73;28;15;94", "wc_correctness": "9;17;8;53", "wc_clarity": "1;9;17;11", "wc_relation_to_prior_work": "1;13;8;11", "wc_documentation": "1;8;6;13", "wc_additional_feedback": "1;1;1;1", "wc_review": "340;394;561;341", "wc_reply_reviewers": "0;21;95;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "wc_summary_and_contributions_avg": [ 80.0, 41.68333000133266 ], "wc_strengths_avg": [ 66.25, 18.978606376654742 ], "wc_improvement_avg": [ 162.75, 130.45377533824 ], "wc_limitations_avg": [ 52.5, 32.20636583037583 ], "wc_correctness_avg": [ 21.75, 18.376275465937052 ], "wc_clarity_avg": [ 9.5, 5.722761571129799 ], "wc_relation_to_prior_work_avg": [ 8.25, 4.548351349665063 ], "wc_documentation_avg": [ 7.0, 4.301162633521313 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 409.0, 90.43505957315448 ], "wc_reply_reviewers_avg": [ 29.0, 39.05764969887461 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:V87dBewZ6sYJ:scholar.google.com/&scioq=3DCoMPaT200:+Language+Grounded+Large-Scale+3D+Vision+Dataset+for+Compositional+Recognition&hl=en&as_sdt=0,44", "gs_version_total": 0, "email": ";kaust.edu.sa;;kaust.edu.sa", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "King Abdullah University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kast.kau.edu.sa", "aff_unique_abbr": "KAUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Saudi Arabia" }, { "id": "L5aY1mWvXQ", "title": "Rethinking Evaluation Strategy for Temporal Link Prediction through Counterfactual Analysis", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "In response to critiques of existing evaluation methods for Temporal Link Prediction (TLP) models, we propose a novel approach to verify if these models truly capture temporal patterns in the data. Our method involves a sanity check formulated as a counterfactual question: ``What if a TLP model is tested on a temporally distorted version of the data instead of the real data?'' Ideally, a TLP model that effectively learns temporal patterns should perform worse on temporally distorted data compared to real data. We provide an in-depth analysis of this hypothesis and introduce two data distortion techniques to assess well-known TLP models.\nOur contributions are threefold: (1) We introduce simple techniques to distort temporal patterns within a graph, generating temporally distorted test splits of well-known datasets for sanity checks. These distortion methods are applicable to any temporal graph dataset. (2) We perform counterfactual analysis on TLP models such as JODIE, TGAT, TGN, and CAWN to evaluate their capability in capturing temporal patterns across different datasets. (3) We propose an alternative evaluation strategy for TLP, addressing the limitations of binary classification and ranking methods, and introduce two metrics -- average time difference (ATD) and average count difference (ACD) -- to provide a comprehensive measure of a model's predictive performance. The code and datasets are available at: https://github.com/Aniq55/TLPCF.git", "keywords": "temporal link prediction;dynamic graphs;temporal graphs;evaluation;counterfactual;causality", "primary_area": "", "supplementary_material": "", "author": "Aniq Ur Rahman;Alexander Modell;Justin Coon", "authorids": "~Aniq_Ur_Rahman1;~Alexander_Modell1;~Justin_Coon1", "gender": "M;M;M", "homepage": ";http://alexandermodell.github.io/;https://eng.ox.ac.uk/people/justin-coon/", "dblp": "230/7857;;", "google_scholar": "b_-8-GMAAAAJ;https://scholar.google.com/citations?hl=en;", "orcid": "0000-0003-3685-7201;0000-0002-0074-8145;", "linkedin": "aniq55/;;", "or_profile": "~Aniq_Ur_Rahman1;~Alexander_Modell1;~Justin_Coon1", "aff": "University of Oxford;Imperial College London;University of Oxford", "aff_domain": "oxford.ac.uk;imperial.ac.uk;oxford.ac.uk", "position": "PhD student;Postdoc;Full Professor", "bibtex": "@misc{\nanonymous2024rethinking,\ntitle={Rethinking Evaluation Strategy for Temporal Link Prediction through Counterfactual Analysis},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=L5aY1mWvXQ}\n}", "github": "", "project": "", "reviewers": "zVKR;xHrs;K4d4;LMD1", "site": "https://openreview.net/forum?id=L5aY1mWvXQ", "pdf_size": 446362, "rating": "4;5;6;6", "confidence": "3;4;4;4", "wc_summary_and_contributions": "135;59;136;73", "wc_strengths": "71;14;62;63", "wc_improvement": "21;64;353;2", "wc_limitations": "239;16;11;2", "wc_correctness": "6;20;5;1", "wc_clarity": "5;1;6;15", "wc_relation_to_prior_work": "10;1;4;22", "wc_documentation": "5;1;10;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "493;177;588;180", "wc_reply_reviewers": "40;114;40;4", "wc_reply_authors": "50;0;0;0", "reply_reviewers": "2;2;1;1", "reply_authors": "4;5;3;2", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 100.75, 35.102528398962946 ], "wc_strengths_avg": [ 52.5, 22.5 ], "wc_improvement_avg": [ 110.0, 142.0827223838282 ], "wc_limitations_avg": [ 67.0, 99.4308805150593 ], "wc_correctness_avg": [ 8.0, 7.176350047203662 ], "wc_clarity_avg": [ 6.75, 5.11737237261468 ], "wc_relation_to_prior_work_avg": [ 9.25, 8.042853971072706 ], "wc_documentation_avg": [ 4.25, 3.6996621467371855 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 359.5, 184.09304712563156 ], "wc_reply_reviewers_avg": [ 49.5, 40.03436024217197 ], "wc_reply_authors_avg": [ 12.5, 21.650635094610966 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 3.5, 1.118033988749895 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:lxzwbtnG0AoJ:scholar.google.com/&scioq=Rethinking+Evaluation+Strategy+for+Temporal+Link+Prediction+through+Counterfactual+Analysis&hl=en&as_sdt=0,22", "gs_version_total": 0, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Oxford;Imperial College London", "aff_unique_dep": ";", "aff_unique_url": "https://www.ox.ac.uk;https://www.imperial.ac.uk", "aff_unique_abbr": "Oxford;ICL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "LoCo: Learning 3D Location-Consistent Image Features with a Memory-Efficient Ranking Loss", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95621", "id": "L6ICzOxAfi", "proceeding": "", "pdf": "https://openreview.net/pdf?id=L6ICzOxAfi", "openreview": "https://openreview.net/forum?id=L6ICzOxAfi", "poster": "/media/PosterPDFs/NeurIPS%202024/95621.png?t=1733957024.390939", "project": "", "author_site": "Dominik Kloepfer, Jo\u00e3o Henriques, Dylan Campbell", "tldr": "", "abstract": "Image feature extractors are rendered substantially more useful if different views of the same 3D location yield similar features while still being distinct from other locations. A feature extractor that achieves this goal even under significant viewpoint changes must recognise not just semantic categories in a scene, but also understand how different objects relate to each other in three dimensions. Existing work addresses this task by posing it as a patch retrieval problem, training the extracted features to facilitate retrieval of all image patches that project from the same 3D location. However, this approach uses a loss formulation that requires substantial memory and computation resources, limiting its applicability for large-scale training. We present a method for memory-efficient learning of location-consistent features that reformulates and approximates the smooth average precision objective. This novel loss function enables improvements in memory efficiency by three orders of magnitude, mitigating a key bottleneck of previous methods and allowing much larger models to be trained with the same computational resources. We showcase the improved location consistency of our trained feature extractor directly on a multi-view consistency task, as well as the downstream task of scene-stable panoptic segmentation, significantly outperforming previous state-of-the-art.", "keywords": "Feature Learning;Self-Supervised Learning;3D Vision", "primary_area": "machine_vision", "supplementary_material": "", "author": "Dominik Kloepfer;Joao F. Henriques;Dylan Campbell", "authorids": "~Dominik_Kloepfer1;~Joao_F._Henriques1;~Dylan_Campbell1", "gender": "M;M;M", "homepage": "https://github.com/dakloepfer;http://www.robots.ox.ac.uk/~joao/;https://sites.google.com/view/djcampbell", "dblp": "358/5106;31/8617.html;139/6663", "google_scholar": ";aCQjyp0AAAAJ;https://scholar.google.com.au/citations?user=FayBF1AAAAAJ", "orcid": ";;0000-0002-4717-6850", "linkedin": ";;", "or_profile": "~Dominik_Kloepfer1;~Joao_F._Henriques1;~Dylan_Campbell1", "aff": "University of Oxford;University of Oxford;Australian National University", "aff_domain": "ox.ac.uk;ox.ac.uk;anu.edu.au", "position": "PhD student;Principal Researcher;Lecturer", "bibtex": "@inproceedings{\nkloepfer2024loco,\ntitle={LoCo: Learning 3D Location-Consistent Image Features with a Memory-Efficient Ranking Loss},\nauthor={Dominik Kloepfer and Joao F. Henriques and Dylan Campbell},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=L6ICzOxAfi}\n}", "github": "", "reviewers": "jhZZ;aW6W;YTBi", "pdf_size": 2907797, "rating": "3;6;6", "confidence": "5;4;2", "soundness": "2;3;3", "novelty": "1;3;4", "presentation": "2;4;3", "wc_summary": "113;156;87", "wc_strengths": "46;130;41", "wc_weaknesses": "190;334;69", "wc_questions": "87;34;15", "wc_limitations": "9;8;8", "wc_review": "445;662;220", "wc_reply_reviewers": "143;46;60", "wc_reply_authors": "251;26;21", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 3.6666666666666665, 1.247219128924647 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 1.247219128924647 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 118.66666666666667, 28.452689777164398 ], "wc_strengths_avg": [ 72.33333333333333, 40.827550610940264 ], "wc_weaknesses_avg": [ 197.66666666666666, 108.32153781938085 ], "wc_questions_avg": [ 45.333333333333336, 30.466739606039603 ], "wc_limitations_avg": [ 8.333333333333334, 0.4714045207910317 ], "wc_review_avg": [ 442.3333333333333, 180.45559626185667 ], "wc_reply_reviewers_avg": [ 83.0, 42.80965623158713 ], "wc_reply_authors_avg": [ 99.33333333333333, 107.26395273136058 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.7559289460184546, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:MkhucPrx7ogJ:scholar.google.com/&scioq=LoCo:+Learning+3D+Location-Consistent+Image+Features+with+a+Memory-Efficient+Ranking+Loss&hl=en&as_sdt=0,44", "gs_version_total": 2, "email": "ox.ac.uk;ox.ac.uk;anu.edu.au", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Oxford;Australian National University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ox.ac.uk;https://www.anu.edu.au", "aff_unique_abbr": "Oxford;ANU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United Kingdom;Australia" }, { "title": "Distribution Learning with Valid Outputs Beyond the Worst-Case", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95620", "id": "L7i5FjgKjc", "proceeding": "", "pdf": "https://openreview.net/pdf?id=L7i5FjgKjc", "openreview": "https://openreview.net/forum?id=L7i5FjgKjc", "poster": "", "project": "", "author_site": "Nicholas Rittler, Kamalika Chaudhuri", "tldr": "", "abstract": "Generative models at times produce \"invalid\" outputs, such as images with generation artifacts and unnatural sounds. Validity-constrained distribution learning attempts to address this problem by requiring that the learned distribution have a provably small fraction of its mass in invalid parts of space -- something which standard loss minimization does not always ensure. To this end, a learner in this model can guide the learning via \"validity queries\", which allow it to ascertain the validity of individual examples. Prior work on this problem takes a worst-case stance, showing that proper learning requires an exponential number of validity queries, and demonstrating an improper algorithm which -- while generating guarantees in a wide-range of settings -- makes a relatively large polynomial number of validity queries. In this work, we take a first step towards characterizing regimes where guaranteeing validity is easier than in the worst-case. We show that when the data distribution lies in the model class and the log-loss is minimized, the number samples required to ensure validity has a weak dependence on the validity requirement. Additionally, we show that when the validity region belongs to a VC-class, a limited number of validity queries are often sufficient.", "keywords": "distribution learning;trustworthy machine learning;learning theory", "primary_area": "learning_theory", "supplementary_material": "", "author": "Nicholas Rittler;Kamalika Chaudhuri", "authorids": "~Nicholas_Rittler1;~Kamalika_Chaudhuri1", "gender": "M;F", "homepage": ";http://cseweb.ucsd.edu/users/kamalika", "dblp": ";56/6435", "google_scholar": ";I-DJ7EsAAAAJ", "orcid": ";", "linkedin": "nicholas-r-515909111/;", "or_profile": "~Nicholas_Rittler1;~Kamalika_Chaudhuri1", "aff": "University of California, San Diego;University of California, San Diego", "aff_domain": "ucsd.edu;ucsd.edu", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nrittler2024distribution,\ntitle={Distribution Learning with Valid Outputs Beyond the Worst-Case},\nauthor={Nicholas Rittler and Kamalika Chaudhuri},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=L7i5FjgKjc}\n}", "github": "", "reviewers": "HLjo;CS1K;yKWM;KhXL", "pdf_size": 374049, "rating": "4;6;6;8", "confidence": "3;2;3;4", "soundness": "3;3;3;4", "novelty": "2;3;3;4", "presentation": "2;3;2;4", "wc_summary": "187;31;94;927", "wc_strengths": "15;60;82;141", "wc_weaknesses": "43;30;111;121", "wc_questions": "21;1;10;425", "wc_limitations": "16;1;10;12", "wc_review": "282;123;307;1626", "wc_reply_reviewers": "51;65;16;30", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 309.75, 360.66423096836206 ], "wc_strengths_avg": [ 74.5, 45.356917884706405 ], "wc_weaknesses_avg": [ 76.25, 40.17072939342775 ], "wc_questions_avg": [ 114.25, 179.55135059363937 ], "wc_limitations_avg": [ 9.75, 5.494315243958978 ], "wc_review_avg": [ 584.5, 605.4372386961344 ], "wc_reply_reviewers_avg": [ 40.5, 18.848076824970764 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:fJQyuGm69awJ:scholar.google.com/&scioq=Distribution+Learning+with+Valid+Outputs+Beyond+the+Worst-Case&hl=en&as_sdt=0,44", "gs_version_total": 3, "email": "ucsd.edu;ucsd.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of California, San Diego", "aff_unique_dep": "", "aff_unique_url": "https://www.ucsd.edu", "aff_unique_abbr": "UCSD", "aff_campus_unique_index": "0;0", "aff_campus_unique": "San Diego", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Symmetries in Overparametrized Neural Networks: A Mean Field View", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95619", "id": "L86glqNCUj", "proceeding": "", "pdf": "https://openreview.net/pdf?id=L86glqNCUj", "openreview": "https://openreview.net/forum?id=L86glqNCUj", "poster": "/media/PosterPDFs/NeurIPS%202024/95619.png?t=1733522341.5732841", "project": "", "author_site": "Javier Maass, Joaquin Fontbona", "tldr": "", "abstract": "We develop a Mean-Field (MF) view of the learning dynamics of overparametrized Artificial Neural Networks (NN) under distributional symmetries of the data w.r.t. the action of a general compact group $G$. We consider for this a class of generalized shallow NNs given by an ensemble of $N$ multi-layer units, jointly trained using stochastic gradient descent (SGD) and possibly symmetry-leveraging (SL) techniques, such as Data Augmentation (DA), Feature Averaging (FA) or Equivariant Architectures (EA). We introduce the notions of weakly and strongly invariant laws (WI and SI) on the parameter space of each single unit, corresponding, respectively, to $G$-invariant distributions, and to distributions supported on parameters fixed by the group action (which encode EA). This allows us to define symmetric models compatible with taking $N\\to\\infty$ and give an interpretation of the asymptotic dynamics of DA, FA and EA in terms of Wasserstein Gradient Flows describing their MF limits. When activations respect the group action, we show that, for symmetric data, DA, FA and freely-trained models obey the exact same MF dynamic, which stays in the space of WI parameter laws and attains therein the population risk's minimizer. We also provide a counterexample to the general attainability of such an optimum over SI laws.\nDespite this, and quite remarkably, we show that the space of SI laws is also preserved by these MF distributional dynamics even when freely trained. This sharply contrasts the finite-$N$ setting, in which EAs are generally not preserved by unconstrained SGD. We illustrate the validity of our findings as $N$ gets larger, in a teacher-student experimental setting, training a student NN to learn from a WI, SI or arbitrary teacher model through various SL schemes. We lastly deduce a data-driven heuristic to discover the largest subspace of parameters supporting SI distributions for a problem, that could be used for designing EA with minimal generalization error.", "keywords": "Overparametrized Neural Networks;Mean Field Limit of Neural Networks;Symmetries in Neural Networks;Wasserstein Gradient Flow;Data Augmentation;Feature Averaging;Equivariant Architechtures.", "primary_area": "probabilistic_methods", "supplementary_material": "/attachment/0b679568c60caa99837ae51ecb87a0c5b15d7c99.zip", "author": "Javier Maass Mart\u00ednez;Joaquin Fontbona", "authorids": "~Javier_Maass_Mart\u00ednez1;~Joaquin_Fontbona1", "gender": "M;M", "homepage": ";", "dblp": "347/7448;", "google_scholar": ";https://scholar.google.com/citations?hl=en", "orcid": "0009-0001-5937-757X;", "linkedin": "javier-maass-4a88121bb/;", "or_profile": "~Javier_Maass_Mart\u00ednez1;~Joaquin_Fontbona1", "aff": "Universidad de Chile;", "aff_domain": "uchile.cl;", "position": "Undergrad student;", "bibtex": "@inproceedings{\nmart{\\'\\i}nez2024symmetries,\ntitle={Symmetries in Overparametrized Neural Networks: A Mean Field View},\nauthor={Javier Maass Mart{\\'\\i}nez and Joaquin Fontbona},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=L86glqNCUj}\n}", "github": "", "reviewers": "aSNH;4cbm;2QUz;j4Mu", "pdf_size": 4938885, "rating": "7;7;7;7", "confidence": "2;3;3;3", "soundness": "4;3;3;3", "novelty": "4;3;3;3", "presentation": "3;3;2;2", "wc_summary": "64;96;165;789", "wc_strengths": "78;57;30;246", "wc_weaknesses": "121;94;73;542", "wc_questions": "88;339;49;5", "wc_limitations": "90;6;6;36", "wc_review": "441;592;323;1618", "wc_reply_reviewers": "29;224;47;208", "wc_reply_authors": "17;242;43;627", "reply_reviewers": "1;1;1;2", "reply_authors": "2;2;2;3", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 278.5, 296.9886361462337 ], "wc_strengths_avg": [ 102.75, 84.43747686898277 ], "wc_weaknesses_avg": [ 207.5, 193.87173594931264 ], "wc_questions_avg": [ 120.25, 129.66374782490286 ], "wc_limitations_avg": [ 34.5, 34.30378987808781 ], "wc_review_avg": [ 743.5, 513.81635824485 ], "wc_reply_reviewers_avg": [ 127.0, 89.406375611586 ], "wc_reply_authors_avg": [ 232.25, 243.96247149920416 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9081442093760592747&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "email": "uchile.cl;", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Universidad de Chile", "aff_unique_dep": "", "aff_unique_url": "https://www.uchile.cl", "aff_unique_abbr": "UCH", "aff_country_unique_index": "0", "aff_country_unique": "Chile" }, { "title": "Pessimistic Backward Policy for GFlowNets", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95618", "id": "L8Q21Qrjmd", "proceeding": "", "pdf": "https://openreview.net/pdf?id=L8Q21Qrjmd", "openreview": "https://openreview.net/forum?id=L8Q21Qrjmd", "poster": "/media/PosterPDFs/NeurIPS%202024/95618.png?t=1733960324.3493552", "project": "", "author_site": "Hyosoon Jang, Yunhui Jang, Minsu Kim, Jinkyoo Park, Sungsoo Ahn", "tldr": "", "abstract": "This paper studies Generative Flow Networks (GFlowNets), which learn to sample objects proportionally to a given reward function through the trajectory of state transitions. In this work, we observe that GFlowNets tend to under-exploit the high-reward objects due to training on insufficient number of trajectories, which may lead to a large gap between the estimated flow and the (known) reward value. In response to this challenge, we propose a pessimistic backward policy for GFlowNets (PBP-GFN), which maximizes the observed flow to align closely with the true reward for the object. We extensively evaluate PBP-GFN across eight benchmarks, including hyper-grid environment, bag generation, structured set generation, molecular generation, and four RNA sequence generation tasks. In particular, PBP-GFN enhances the discovery of high-reward objects, maintains the diversity of the objects, and consistently outperforms existing methods.", "keywords": "Generative flow networks;generative models;reinforcement learning", "primary_area": "generative_models", "supplementary_material": "/attachment/599690f6fed053e869c54dc7f21c0b13f753bc87.zip", "author": "Hyosoon Jang;Yunhui Jang;Minsu Kim;Jinkyoo Park;Sungsoo Ahn", "authorids": "~Hyosoon_Jang3;~Yunhui_Jang1;~Minsu_Kim2;~Jinkyoo_Park1;~Sungsoo_Ahn1", "gender": "M;F;M;M;M", "homepage": "https://hsjang0.github.io/hsjang/;https://yunhuijang.github.io;https://minsuukim.github.io/;http://silab.kaist.ac.kr/;https://sungsooahn.super.site/", "dblp": "341/1374;;;156/7535;90/5164", "google_scholar": "https://scholar.google.co.kr/citations?user=NOJd3GEAAAAJ;https://scholar.google.co.kr/citations?user=mYHCTYQAAAAJ;https://scholar.google.ca/citations?user=VvyLuhAAAAAJ;sH2a0nkAAAAJ;XTenHs0AAAAJ", "orcid": ";;;0000-0003-2620-1479;", "linkedin": ";;;;", "or_profile": "~Hyosoon_Jang3;~Yunhui_Jang1;~Minsu_Kim2;~Jinkyoo_Park1;~Sungsoo_Ahn1", "aff": "POSTECH;Pohang University of Science and Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Pohang University of Science and Technology", "aff_domain": "postech.edu;postech.edu;kaist.ac.kr;kaist.ac.kr;postech.ac.kr", "position": "MS student;PhD student;PhD student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\njang2024pessimistic,\ntitle={Pessimistic Backward Policy for {GF}lowNets},\nauthor={Hyosoon Jang and Yunhui Jang and Minsu Kim and Jinkyoo Park and Sungsoo Ahn},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=L8Q21Qrjmd}\n}", "github": "", "reviewers": "u41F;CTvM;smvU;WPBt", "pdf_size": 2315327, "rating": "3;5;5;6", "confidence": "4;4;3;3", "soundness": "3;3;2;2", "novelty": "3;2;2;2", "presentation": "3;3;3;3", "wc_summary": "100;93;50;39", "wc_strengths": "89;30;52;43", "wc_weaknesses": "377;111;57;218", "wc_questions": "6;99;39;127", "wc_limitations": "1;5;7;5", "wc_review": "573;338;205;432", "wc_reply_reviewers": "0;41;20;17", "wc_reply_authors": "42;102;21;24", "reply_reviewers": "0;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 70.5, 26.405491853021786 ], "wc_strengths_avg": [ 53.5, 21.937410968480304 ], "wc_weaknesses_avg": [ 190.75, 122.14821938939593 ], "wc_questions_avg": [ 67.75, 47.76701267611363 ], "wc_limitations_avg": [ 4.5, 2.179449471770337 ], "wc_review_avg": [ 387.0, 134.30003723007675 ], "wc_reply_reviewers_avg": [ 19.5, 14.568802284333465 ], "wc_reply_authors_avg": [ 47.25, 32.61422235773835 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.6882472016116854, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13142766891356129523&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "postech.edu;postech.edu;kaist.ac.kr;kaist.ac.kr;postech.ac.kr", "author_num": 5, "aff_unique_index": "0;0;1;1;0", "aff_unique_norm": "Pohang University of Science and Technology;Korea Advanced Institute of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.postech.ac.kr;https://www.kaist.ac.kr", "aff_unique_abbr": "POSTECH;KAIST", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Pohang;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Transformers Learn to Achieve Second-Order Convergence Rates for In-Context Linear Regression", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95617", "id": "L8h6cozcbn", "proceeding": "", "pdf": "https://openreview.net/pdf?id=L8h6cozcbn", "openreview": "https://openreview.net/forum?id=L8h6cozcbn", "poster": "/media/PosterPDFs/NeurIPS%202024/95617.png?t=1731742640.3456016", "project": "", "author_site": "Deqing Fu, Tian-qi Chen, Robin Jia, Vatsal Sharan", "tldr": "", "abstract": "Transformers excel at *in-context learning* (ICL)---learning from demonstrations without parameter updates---but how they do so remains a mystery. Recent work suggests that Transformers may internally run Gradient Descent (GD), a first-order optimization method, to perform ICL. In this paper, we instead demonstrate that Transformers learn to approximate second-order optimization methods for ICL. For in-context linear regression, Transformers share a similar convergence rate as *Iterative Newton's Method*, both *exponentially* faster than GD. Empirically, predictions from successive Transformer layers closely match different iterations of Newton\u2019s Method linearly, with each middle layer roughly computing 3 iterations; thus, Transformers and Newton\u2019s method converge at roughly the same rate. In contrast, Gradient Descent converges exponentially more slowly. We also show that Transformers can learn in-context on ill-conditioned data, a setting where Gradient Descent struggles but Iterative Newton succeeds. Finally, to corroborate our empirical findings, we prove that Transformers can implement $k$ iterations of Newton's method with $k + \\mathcal O(1)$ layers.", "keywords": "transformers;in-context learning;linear regression", "primary_area": "interpretability_and_explainability", "supplementary_material": "/attachment/d80a259b9f8ecdd848237b09d8d36e8c96f62d7c.zip", "author": "Deqing Fu;Tian-qi Chen;Robin Jia;Vatsal Sharan", "authorids": "~Deqing_Fu1;~Tian-qi_Chen1;~Robin_Jia1;~Vatsal_Sharan1", "gender": "M;M;M;", "homepage": "https://deqingfu.github.io/;https://robinjia.github.io/;https://vatsalsharan.github.io/;https://www.linkedin.com/in/tianqi-chen-27a722227/", "dblp": "304/3030;182/2556;126/2543;", "google_scholar": "fsbgfqEAAAAJ;ajZ-_O0AAAAJ;Ize17HEAAAAJ;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Deqing_Fu1;~Robin_Jia1;~Vatsal_Sharan1;~Tianqi_CHEN3", "aff": "University of Southern California;University of Southern California;University of Southern California;University of Southern California", "aff_domain": "usc.edu;usc.edu;usc.edu;usc.edu", "position": "PhD student;Assistant Professor;Assistant Professor;MS student", "bibtex": "@inproceedings{\nfu2024transformers,\ntitle={Transformers Learn to Achieve Second-Order Convergence Rates for In-Context Linear Regression},\nauthor={Deqing Fu and Tian-qi Chen and Robin Jia and Vatsal Sharan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=L8h6cozcbn}\n}", "github": "", "reviewers": "J6bG;F4MD;p5Dq;dusD;vdXr", "pdf_size": 7286416, "rating": "3;6;7;7;7", "confidence": "3;3;4;3;4", "soundness": "3;3;3;3;4", "novelty": "2;3;3;3;2", "presentation": "3;3;4;3;4", "wc_summary": "59;87;84;125;457", "wc_strengths": "30;44;135;33;25", "wc_weaknesses": "229;119;28;40;197", "wc_questions": "186;50;48;55;45", "wc_limitations": "1;1;17;84;1", "wc_review": "505;301;312;337;725", "wc_reply_reviewers": "337;12;18;18;40", "wc_reply_authors": "484;16;16;19;19", "reply_reviewers": "1;1;1;1;1", "reply_authors": "5;2;2;2;2", "rating_avg": [ 6.0, 1.5491933384829668 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 162.4, 148.80268814776164 ], "wc_strengths_avg": [ 53.4, 41.27275130155488 ], "wc_weaknesses_avg": [ 122.6, 80.79752471456041 ], "wc_questions_avg": [ 76.8, 54.69698346344157 ], "wc_limitations_avg": [ 20.8, 32.20186330012597 ], "wc_review_avg": [ 436.0, 162.28616700138062 ], "wc_reply_reviewers_avg": [ 85.0, 126.36138650711301 ], "wc_reply_authors_avg": [ 110.8, 186.60482308879372 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.6, 1.2000000000000002 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.52704627669473, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13090935451276526216&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "usc.edu;usc.edu;usc.edu;usc.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Southern California", "aff_unique_dep": "", "aff_unique_url": "https://www.usc.edu", "aff_unique_abbr": "USC", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "LISA: Layerwise Importance Sampling for Memory-Efficient Large Language Model Fine-Tuning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95616", "id": "L8ifDX5XNq", "proceeding": "", "pdf": "https://openreview.net/pdf?id=L8ifDX5XNq", "openreview": "https://openreview.net/forum?id=L8ifDX5XNq", "poster": "/media/PosterPDFs/NeurIPS%202024/95616.png?t=1731252191.6103804", "project": "", "author_site": "Rui Pan, Xiang Liu, SHIZHE DIAO, Renjie Pi, Jipeng Zhang, Chi Han, Tong Zhang", "tldr": "", "abstract": "The machine learning community has witnessed impressive advancements since large language models (LLMs) first appeared. Yet, their massive memory consumption has become a significant roadblock to large-scale training. For instance, a 7B model typically requires at least 60 GB of GPU memory with full parameter training, which presents challenges for researchers without access to high-resource environments. Parameter Efficient Fine-Tuning techniques such as Low-Rank Adaptation (LoRA) have been proposed to alleviate this problem. However, in most large-scale fine-tuning settings, their performance does not reach the level of full parameter training because they confine the parameter search to a low-rank subspace. Attempting to complement this deficiency, we investigate the layerwise properties of LoRA on fine-tuning tasks and observe an unexpected but consistent skewness of weight norms across different layers. Utilizing this key observation, a surprisingly simple training strategy is discovered, which outperforms both LoRA and full parameter training in a wide range of settings with memory costs as low as LoRA. We name it Layerwise Importance Sampled AdamW (LISA), a promising alternative for LoRA, which applies the idea of importance sampling to different layers in LLMs and randomly freeze most middle layers during optimization. Experimental results show that with similar or less GPU memory consumption, LISA surpasses LoRA or even full parameter tuning in downstream fine-tuning tasks, where LISA consistently outperforms LoRA by over 10%-35% in terms of MT-Bench score while achieving on-par or better performance in MMLU, AGIEval and WinoGrande. On large models, specifically LLaMA-2-70B, LISA surpasses LoRA on MT-Bench, GSM8K, and PubMedQA, demonstrating its effectiveness across different domains.", "keywords": "memory efficient training;large language models;parameter efficient fine-tuning", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Rui Pan;Xiang Liu;Shizhe Diao;Renjie Pi;Jipeng Zhang;Chi Han;Tong Zhang", "authorids": "~Rui_Pan4;~Xiang_Liu10;~Shizhe_Diao2;~Renjie_Pi1;~Jipeng_Zhang1;~Chi_Han1;~Tong_Zhang2", "gender": "M;M;M;M;M;M;M", "homepage": ";https://dominic789654.github.io/;;https://2003pro.github.io/;https://glaciohound.github.io;http://tongzhang-ml.org;https://shizhediao.github.io/", "dblp": "74/9957;31/5736-1;67/2156;;255/6993;07/4227-1;221/3896", "google_scholar": ";VtK5lwUAAAAJ;XUq0HwcAAAAJ;q0De288AAAAJ;https://scholar.google.com.sg/citations?user=DcSvbuAAAAAJ;LurWtuYAAAAJ;NDFQrLQAAAAJ", "orcid": "0000-0001-7217-0656;;;;0000-0001-6235-5841;0000-0002-5511-2558;", "linkedin": ";;;;chi-han-b01a93141/;;", "or_profile": "~Rui_Pan4;~Xiang_Liu10;~Renjie_Pi1;~Jipeng_Zhang1;~Chi_Han1;~Tong_Zhang2;~SHIZHE_DIAO1", "aff": "Hong Kong University of Science and Technology;Hong Kong University of Science and Technology (Guang Zhou));Hong Kong University of Science and Technology;Department of Computer Science and Engineering, The Hong Kong University of Science and Technology;University of Illinois, Urbana Champaign;UIUC;Hong Kong University of Science and Technology", "aff_domain": "ust.hk;hkust-gz.edu.cn;ust.hk;cse.ust.hk;illinois.edu;illinois.edu;ust.hk", "position": "MS student;PhD student;PhD student;PhD student;PhD student;Full Professor;PhD student", "bibtex": "@inproceedings{\npan2024lisa,\ntitle={{LISA}: Layerwise Importance Sampling for Memory-Efficient Large Language Model Fine-Tuning},\nauthor={Rui Pan and Xiang Liu and Shizhe Diao and Renjie Pi and Jipeng Zhang and Chi Han and Tong Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=L8ifDX5XNq}\n}", "github": "", "reviewers": "kABp;K468;Rt8C", "pdf_size": 1730671, "rating": "5;7;7", "confidence": "4;3;4", "soundness": "3;3;3", "novelty": "2;3;4", "presentation": "2;3;3", "wc_summary": "109;103;71", "wc_strengths": "31;30;135", "wc_weaknesses": "322;56;162", "wc_questions": "2;62;1", "wc_limitations": "7;6;6", "wc_review": "471;257;375", "wc_reply_reviewers": "14;30;0", "wc_reply_authors": "216;66;0", "reply_reviewers": "1;1;0", "reply_authors": "3;2;1", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 94.33333333333333, 16.679994670929073 ], "wc_strengths_avg": [ 65.33333333333333, 49.2634640366356 ], "wc_weaknesses_avg": [ 180.0, 109.33739829841693 ], "wc_questions_avg": [ 21.666666666666668, 28.522895287041873 ], "wc_limitations_avg": [ 6.333333333333333, 0.4714045207910317 ], "wc_review_avg": [ 367.6666666666667, 87.51888685052819 ], "wc_reply_reviewers_avg": [ 14.666666666666666, 12.256517540566824 ], "wc_reply_authors_avg": [ 94.0, 90.37698822156003 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.49999999999999983, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15497823023591772899&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "ust.hk;hkust-gz.edu.cn;ust.hk;cse.ust.hk;illinois.edu;illinois.edu;ust.hk", "author_num": 7, "aff_unique_index": "0;0;0;0;1;1;0", "aff_unique_norm": "Hong Kong University of Science and Technology;University of Illinois Urbana-Champaign", "aff_unique_dep": ";", "aff_unique_url": "https://www.ust.hk;https://illinois.edu", "aff_unique_abbr": "HKUST;UIUC", "aff_campus_unique_index": "0;0;0;0;1;1;0", "aff_campus_unique": "Hong Kong SAR;Urbana-Champaign", "aff_country_unique_index": "0;0;0;0;1;1;0", "aff_country_unique": "China;United States" }, { "title": "EfficientCAPER: An End-to-End Framework for Fast and Robust Category-Level Articulated Object Pose Estimation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95615", "id": "LBXSP79oCd", "proceeding": "", "pdf": "https://openreview.net/pdf?id=LBXSP79oCd", "openreview": "https://openreview.net/forum?id=LBXSP79oCd", "poster": "", "project": "", "author_site": "Xinyi Yu, Haonan Jiang, Li Zhang, Lin Yuanbo Wu, Linlin Ou, Liu Liu", "tldr": "", "abstract": "Human life is populated with articulated objects. Pose estimation for category-level articulated objects is a significant challenge due to their inherent complexity and diverse kinematic structures. Current methods for this task usually meet the problems of insufficient consideration of kinematic constraints, self-occlusion, and optimization requirements. In this paper, we propose EfficientCAPER, an end-to-end Category-level Articulated object Pose EstimatoR, eliminating the need for optimization functions as post-processing and utilizing the kinematic structure for joint-centric pose modeling, thus enhancing the efficiency and applicability. Given a partial point cloud as input, the EfficientCAPER firstly estimates the pose for the free part of an articulated object using decoupled rotation representation. Next, we canonicalize the input point cloud to estimate constrained parts' poses by predicting the joint parameters and states as replacements. Evaluations on three diverse datasets, ArtImage, ReArtMix, and RobotArm, show EfficientCAPER's effectiveness and generalization ability to real-world scenarios. The framework exhibits excellent static pose estimation performance for articulated objects, contributing to the advancement of category-level pose estimation. Codes will be made publicly available.", "keywords": "Articulated Object;Pose Estimation;Joint-centric Pose Modeling", "primary_area": "robotics", "supplementary_material": "", "author": "Xinyi Yu;Haonan Jiang;Li Zhang;Lin Yuanbo Wu;Linlin Ou;Liu Liu", "authorids": "~Xinyi_Yu1;~Haonan_Jiang3;~Li_Zhang25;~Lin_Yuanbo_Wu1;~Linlin_Ou1;~Liu_Liu13", "gender": "M;;;F;F;M", "homepage": "https://www.researchgate.net/profile/Yu_Xinyi;https://github.com/wxycwymds;;https://sites.google.com/site/linwuuq/home;https://www.researchgate.net/profile/Linlin_Ou;http://home.ustc.edu.cn/~liuliu66/", "dblp": ";;;65/6292-1;17/7817.html;74/7037-12", "google_scholar": ";;;https://scholar.google.com.au/citations?user=mMiJUegAAAAJ;;-_aPWUIAAAAJ", "orcid": "0000-0001-8716-7687;;;0000-0001-6119-058X;;", "linkedin": ";;;;;", "or_profile": "~Xinyi_Yu1;~Haonan_Jiang3;~Li_Zhang25;~Lin_Yuanbo_Wu1;~Linlin_Ou1;~Liu_Liu13", "aff": "Zhejiang University of Technology;Zhejiang University of Technology;;Swansea University;Zhejiang University of Technology;Hefei University of Technology", "aff_domain": "zjut.edu.cn;zjut.edu.cn;;swansea.ac.uk;zjut.edu.cn;hfut.edu.cn", "position": "Associate Professor;MS student;;Associate Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nyu2024efficientcaper,\ntitle={Efficient{CAPER}: An End-to-End Framework for Fast and Robust Category-Level Articulated Object Pose Estimation},\nauthor={Xinyi Yu and Haonan Jiang and Li Zhang and Lin Yuanbo Wu and Linlin Ou and Liu Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=LBXSP79oCd}\n}", "github": "", "reviewers": "UsR2;Gf45;cXQd;UxV6", "pdf_size": 2081004, "rating": "4;5;5;5", "confidence": "4;3;4;3", "soundness": "3;2;3;3", "novelty": "3;2;3;3", "presentation": "3;1;3;3", "wc_summary": "63;74;69;54", "wc_strengths": "105;54;57;33", "wc_weaknesses": "156;109;141;129", "wc_questions": "7;12;42;6", "wc_limitations": "1;13;7;8", "wc_review": "332;262;316;230", "wc_reply_reviewers": "58;0;0;0", "wc_reply_authors": "163;0;0;0", "reply_reviewers": "1;0;0;0", "reply_authors": "2;1;1;1", "rating_avg": [ 4.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 65.0, 7.44983221287567 ], "wc_strengths_avg": [ 62.25, 26.356925086208367 ], "wc_weaknesses_avg": [ 133.75, 17.195566289017645 ], "wc_questions_avg": [ 16.75, 14.7542366796795 ], "wc_limitations_avg": [ 7.25, 4.264680527307995 ], "wc_review_avg": [ 285.0, 41.0 ], "wc_reply_reviewers_avg": [ 14.5, 25.11473670974872 ], "wc_reply_authors_avg": [ 40.75, 70.58107040843176 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:BAmYqVdo-AIJ:scholar.google.com/&scioq=EfficientCAPER:+An+End-to-End+Framework+for+Fast+and+Robust+Category-Level+Articulated+Object+Pose+Estimation&hl=en&as_sdt=0,44", "gs_version_total": 3, "email": "zjut.edu.cn;zjut.edu.cn;;swansea.ac.uk;zjut.edu.cn;hfut.edu.cn", "author_num": 6, "aff_unique_index": "0;0;1;0;2", "aff_unique_norm": "Zhejiang University of Technology;Swansea University;Hefei University of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.zjut.edu.cn;https://www.swansea.ac.uk;http://www.hfut.edu.cn/", "aff_unique_abbr": "ZJUT;Swansea;HUT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "China;United Kingdom" }, { "title": "SciInstruct: a Self-Reflective Instruction Annotated Dataset for Training Scientific Language Models", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97744", "id": "LC1QAqhePv", "proceeding": "", "pdf": "https://openreview.net/pdf?id=LC1QAqhePv", "openreview": "https://openreview.net/forum?id=LC1QAqhePv", "poster": "/media/PosterPDFs/NeurIPS%202024/97744.png?t=1731142206.6639361", "project": "", "author_site": "Dan Zhang, Ziniu Hu, Sining Zhoubian, Zhengxiao Du, Kaiyu Yang, Zihan Wang, Yisong Yue, Yuxiao Dong, Jie Tang", "tldr": "", "abstract": "Large Language Models (LLMs) have shown promise in assisting scientific discovery. However, such applications are currently limited by LLMs' deficiencies in understanding intricate scientific concepts, deriving symbolic equations, and solving advanced numerical calculations. To bridge these gaps, we introduce SciInstruct, a suite of scientific instructions for training scientific language models capable of college-level scientific reasoning. Central to our approach is a novel self-reflective instruction annotation framework to address the data scarcity challenge in the science domain. This framework leverages existing LLMs to generate step-by-step reasoning for unlabelled scientific questions, followed by a process of self-reflective critic-and-revise. Applying this framework, we curated a diverse and high-quality dataset encompassing physics, chemistry, math, and formal proofs. We analyze the curated SciInstruct from multiple interesting perspectives (e.g., domain, scale, source, question type, answer length, etc.). To verify the effectiveness of SciInstruct, we fine-tuned different language models with SciInstruct, i.e., ChatGLM3 (6B and 32B), Llama3-8B-Instruct, and Mistral-7B: MetaMath, enhancing their scientific and mathematical reasoning capabilities, without sacrificing the language understanding capabilities of the base model. We release all codes and SciInstruct at https://github.com/THUDM/SciGLM.", "keywords": "self-reflective instruction;scientific reasoning;scientific language model", "primary_area": "", "supplementary_material": "/attachment/1e4f31e29c08ceeae77ac96eabdee4a37a81473c.zip", "author": "Dan Zhang;Ziniu Hu;Sining Zhoubian;Zhengxiao Du;Kaiyu Yang;Zihan Wang;Yisong Yue;Yuxiao Dong;Jie Tang", "authorids": "~Dan_Zhang7;~Ziniu_Hu1;~Sining_Zhoubian1;~Zhengxiao_Du1;~Kaiyu_Yang1;~Zihan_Wang7;~Yisong_Yue1;~Yuxiao_Dong1;~Jie_Tang1", "gender": "F;M;M;M;M;M;M;M;", "homepage": "https://zhangdan0602.github.io/;http://acbull.github.io;https://github.com/zhoubiansining/zhou;https://duzx16.github.io;https://yangky11.github.io;https://pka.moe;http://www.yisongyue.com;https://keg.cs.tsinghua.edu.cn/yuxiao/;", "dblp": ";180/5436;367/1870;234/0081;177/9276;;28/1244;17/9267;", "google_scholar": "https://scholar.google.ca/citations?hl=en;x6ct1CsAAAAJ;https://scholar.google.com/citations?view_op=list_works;A8x07E0AAAAJ;FciCu4EAAAAJ;;tEk4qo8AAAAJ;https://scholar.google.com.hk/citations?hl=en;", "orcid": "0000-0003-1115-3945;;;;0000-0002-2777-612X;;0000-0001-9127-1989;0000-0002-6092-2002;", "linkedin": ";;;;kaiyuy;;yisongyue/;;", "or_profile": "~Dan_Zhang7;~Ziniu_Hu1;~Sining_Zhoubian1;~Zhengxiao_Du1;~Kaiyu_Yang1;~Zihan_Wang7;~Yisong_Yue1;~Yuxiao_Dong1;~Jie_Tang1", "aff": "California Institute of Technology;Deepmind;Tsinghua University;Tsinghua University;California Institute of Technology;Tsinghua University;California Institute of Technology;Tsinghua University;", "aff_domain": "caltech.edu;deepmind.com;mails.tsinghua.edu.cn;tsinghua.edu.cn;caltech.edu;tsinghua.edu.cn;caltech.edu;tsinghua.edu.cn;", "position": "Researcher;Visiting Researcher;Undergrad student;PhD student;Postdoc;Undergrad student;Full Professor;Associate Professor;", "bibtex": "@inproceedings{\nzhang2024sciinstruct,\ntitle={SciInstruct: a Self-Reflective Instruction Annotated Dataset for Training Scientific Language Models},\nauthor={Dan Zhang and Ziniu Hu and Sining Zhoubian and Zhengxiao Du and Kaiyu Yang and Zihan Wang and Yisong Yue and Yuxiao Dong and Jie Tang},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=LC1QAqhePv}\n}", "github": "", "reviewers": "Wv6V;kniG;UPD9;KU1m", "pdf_size": 1066775, "rating": "6;6;7;7", "confidence": "4;4;4;4", "wc_summary_and_contributions": "59;80;58;75", "wc_strengths": "3;34;65;45", "wc_improvement": "13;91;80;127", "wc_limitations": "14;1;45;1", "wc_correctness": "1;1;11;1", "wc_clarity": "1;8;3;13", "wc_relation_to_prior_work": "66;1;5;35", "wc_documentation": "1;1;14;7", "wc_additional_feedback": "1;1;1;1", "wc_review": "159;218;282;305", "wc_reply_reviewers": "0;0;19;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;1;0", "reply_authors": "2;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.0, 0.0 ], "wc_summary_and_contributions_avg": [ 68.0, 9.669539802906858 ], "wc_strengths_avg": [ 36.75, 22.431841208425134 ], "wc_improvement_avg": [ 77.75, 41.227266462864115 ], "wc_limitations_avg": [ 15.25, 17.977416388346796 ], "wc_correctness_avg": [ 3.5, 4.330127018922194 ], "wc_clarity_avg": [ 6.25, 4.656984002549289 ], "wc_relation_to_prior_work_avg": [ 26.75, 26.1951808545007 ], "wc_documentation_avg": [ 5.75, 5.356071321407137 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 241.0, 57.074512700504066 ], "wc_reply_reviewers_avg": [ 4.75, 8.227241335952167 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9023814843470287265&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "caltech.edu;deepmind.com;mails.tsinghua.edu.cn;tsinghua.edu.cn;caltech.edu;tsinghua.edu.cn;caltech.edu;tsinghua.edu.cn;", "author_num": 9, "aff_unique_index": "0;1;2;2;0;2;0;2", "aff_unique_norm": "California Institute of Technology;DeepMind;Tsinghua University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.caltech.edu;https://deepmind.com;https://www.tsinghua.edu.cn", "aff_unique_abbr": "Caltech;DeepMind;THU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Pasadena;", "aff_country_unique_index": "0;1;2;2;0;2;0;2", "aff_country_unique": "United States;United Kingdom;China" }, { "title": "Partial observation can induce mechanistic mismatches in data-constrained models of neural dynamics", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95614", "id": "LCEgP7Ir6k", "proceeding": "", "pdf": "https://openreview.net/pdf?id=LCEgP7Ir6k", "openreview": "https://openreview.net/forum?id=LCEgP7Ir6k", "poster": "", "project": "", "author_site": "William Qian, Jacob Zavatone-Veth, Ben Ruben, Cengiz Pehlevan", "tldr": "", "abstract": "One of the central goals of neuroscience is to gain a mechanistic understanding of how the dynamics of neural circuits give rise to their observed function. A popular approach towards this end is to train recurrent neural networks (RNNs) to reproduce experimental recordings of neural activity. These trained RNNs are then treated as surrogate models of biological neural circuits, whose properties can be dissected via dynamical systems analysis. How reliable are the mechanistic insights derived from this procedure? While recent advances in population-level recording technologies have allowed simultaneous recording of up to tens of thousands of neurons, this represents only a tiny fraction of most cortical circuits. Here we show that observing only a subset of neurons in a circuit can create mechanistic mismatches between a simulated teacher network and a data-constrained student, even when the two networks have matching single-unit dynamics. In particular, we show that partial observation of models of low-dimensional cortical dynamics based on functionally feedforward or low-rank connectivity can lead to surrogate models with spurious attractor structure. In total, our results illustrate the challenges inherent in accurately uncovering neural mechanisms from single-trial data, and suggest the need for new methods of validating data-constrained models for neural dynamics.", "keywords": "neuroscience;recurrent neural networks;dynamical systems", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "/attachment/a1bab835c380df626f6f357e0f48b099f529584b.zip", "author": "William Qian;Jacob A Zavatone-Veth;Benjamin Samuel Ruben;Cengiz Pehlevan", "authorids": "~William_Qian1;~Jacob_A_Zavatone-Veth1;~Benjamin_Samuel_Ruben1;~Cengiz_Pehlevan2", "gender": ";M;M;", "homepage": ";https://jzv.io;https://benruben.me;https://pehlevan.seas.harvard.edu/", "dblp": ";270/9915;;145/3480", "google_scholar": "qvHp8KwAAAAJ;i_HogJkAAAAJ;7Kju8PcAAAAJ;veDLTPEAAAAJ", "orcid": ";0000-0002-4060-1738;;0000-0001-9767-6063", "linkedin": ";;;", "or_profile": "~William_Qian1;~Jacob_A_Zavatone-Veth1;~Benjamin_Samuel_Ruben1;~Cengiz_Pehlevan2", "aff": "Harvard University;Harvard University;Harvard University;School of Engineering and Applied Sciences, Harvard University", "aff_domain": "g.harvard.edu;harvard.edu;harvard.edu;seas.harvard.edu", "position": "PhD student;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nqian2024partial,\ntitle={Partial observation can induce mechanistic mismatches in data-constrained models of neural dynamics},\nauthor={William Qian and Jacob A Zavatone-Veth and Benjamin Samuel Ruben and Cengiz Pehlevan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=LCEgP7Ir6k}\n}", "github": "", "reviewers": "VFVu;njnf;Tqmw", "pdf_size": 16443662, "rating": "6;6;7", "confidence": "4;4;5", "soundness": "3;4;3", "novelty": "3;3;3", "presentation": "4;4;3", "wc_summary": "136;58;69", "wc_strengths": "110;55;59", "wc_weaknesses": "54;169;585", "wc_questions": "258;7;2", "wc_limitations": "54;7;4", "wc_review": "612;296;719", "wc_reply_reviewers": "22;93;65", "wc_reply_authors": "16;35;55", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 87.66666666666667, 34.4705993887867 ], "wc_strengths_avg": [ 74.66666666666667, 25.037749277618563 ], "wc_weaknesses_avg": [ 269.3333333333333, 228.09403518919316 ], "wc_questions_avg": [ 89.0, 119.5184783481896 ], "wc_limitations_avg": [ 21.666666666666668, 22.895899681432525 ], "wc_review_avg": [ 542.3333333333334, 179.57790014982976 ], "wc_reply_reviewers_avg": [ 60.0, 29.20045661743437 ], "wc_reply_authors_avg": [ 35.333333333333336, 15.923427883328248 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.9999999999999998, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3698559050931898180&as_sdt=5,38&sciodt=0,38&hl=en", "gs_version_total": 5, "email": "g.harvard.edu;harvard.edu;harvard.edu;seas.harvard.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Harvard University", "aff_unique_dep": "", "aff_unique_url": "https://www.harvard.edu", "aff_unique_abbr": "Harvard", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "LDwsvLQTLx", "title": "Open-vocabulary vs. Closed-set: Best Practice for Few-shot Object Detection Considering Text Describability", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "Open-vocabulary object detection (OVD), detecting specific classes of objects using only their linguistic descriptions (e.g., class names) without any image samples, has garnered significant attention. However, in real-world applications, the target class concepts is often hard to describe in text and the only way to specify target objects is to provide their image examples, yet it is often challenging to obtain a good number of samples. Thus, there is a high demand from practitioners for few-shot object detection (FSOD). A natural question arises: Can the benefits of OVD extend to FSOD for object classes that are difficult to describe in text? Compared to traditional methods that learn only predefined classes (referred to in this paper as closed-set object detection, COD), can the extra cost of OVD be justified? To answer these questions, we propose a method to quantify the ``text-describability'' of object detection datasets using the zero-shot image classification accuracy with CLIP. This allows us to categorize various OD datasets with different text-describability and emprically evaluate the FSOD performance of OVD and COD methods within each category. Our findings reveal that: i) there is little difference between OVD and COD for object classes with low text-describability under equal conditions in OD pretraining; and ii) although OVD can learn from more diverse data than OD-specific data, thereby increasing the volume of training data, it can be counterproductive for classes with low-text-describability. These findings provide practitioners with valuable guidance amidst the recent advancements of OVD methods.", "keywords": "object detection;open-vocabulary object detection;few-shot object detection", "primary_area": "", "supplementary_material": "/attachment/f7a9ca4ac6fbd004c85c5cbd62a68598597810c3.pdf", "author": "Yusuke Hosoya;Masanori Suganuma;Takayuki Okatani", "authorids": "~Yusuke_Hosoya1;~Masanori_Suganuma1;~Takayuki_Okatani1", "gender": "M;M;M", "homepage": ";https://sites.google.com/site/suganumamasanori/eng;", "dblp": "251/3274.html;179/9075;18/4811", "google_scholar": ";https://scholar.google.co.jp/citations?user=NpWGfwgAAAAJ;https://scholar.google.co.jp/citations?hl=en", "orcid": ";;", "linkedin": ";;", "or_profile": "~Yusuke_Hosoya1;~Masanori_Suganuma1;~Takayuki_Okatani1", "aff": "Tohoku University;Tohoku University;Tohoku University", "aff_domain": "tohoku.ac.jp;tohoku.ac.jp;tohoku.ac.jp", "position": "PhD student;Assistant Professor;Full Professor", "bibtex": "@misc{\nanonymous2024openvocabulary,\ntitle={Open-vocabulary vs. Closed-set: Best Practice for Few-shot Object Detection Considering Text Describability},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=LDwsvLQTLx}\n}", "github": "", "project": "", "reviewers": "s2ko;qkEt;kNtc;L1R6", "site": "https://openreview.net/forum?id=LDwsvLQTLx", "pdf_size": 1120121, "rating": "5;5;5;7", "confidence": "5;3;4;4", "wc_summary_and_contributions": "60;62;103;106", "wc_strengths": "4;31;22;30", "wc_improvement": "65;130;103;55", "wc_limitations": "1;10;227;37", "wc_correctness": "1;1;6;1", "wc_clarity": "1;6;6;1", "wc_relation_to_prior_work": "1;1;1;1", "wc_documentation": "1;1;1;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "135;243;470;233", "wc_reply_reviewers": "0;0;0;29", "wc_reply_authors": "633;424;585;297", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 82.75, 21.787324296480282 ], "wc_strengths_avg": [ 21.75, 10.825317547305483 ], "wc_improvement_avg": [ 88.25, 30.028111828751403 ], "wc_limitations_avg": [ 68.75, 92.32111080354265 ], "wc_correctness_avg": [ 2.25, 2.165063509461097 ], "wc_clarity_avg": [ 3.5, 2.5 ], "wc_relation_to_prior_work_avg": [ 1.0, 0.0 ], "wc_documentation_avg": [ 1.0, 0.0 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 270.25, 122.80345068441684 ], "wc_reply_reviewers_avg": [ 7.25, 12.55736835487436 ], "wc_reply_authors_avg": [ 484.75, 133.19980292778214 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:laOMlFadAYoJ:scholar.google.com/&scioq=Open-vocabulary+vs.+Closed-set:+Best+Practice+for+Few-shot+Object+Detection+Considering+Text+Describability&hl=en&as_sdt=0,5", "gs_version_total": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Tohoku University", "aff_unique_dep": "", "aff_unique_url": "https://www.tohoku.ac.jp", "aff_unique_abbr": "Tohoku U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Japan" }, { "title": "A Best-of-both-worlds Algorithm for Bandits with Delayed Feedback with Robustness to Excessive Delays", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95613", "id": "LDzrQB4X5w", "proceeding": "", "pdf": "https://openreview.net/pdf?id=LDzrQB4X5w", "openreview": "https://openreview.net/forum?id=LDzrQB4X5w", "poster": "/media/PosterPDFs/NeurIPS%202024/95613.png?t=1732360328.7990255", "project": "", "author_site": "Saeed Masoudian, Julian Zimmert, Yevgeny Seldin", "tldr": "", "abstract": "We propose a new best-of-both-worlds algorithm for bandits with variably delayed feedback. In contrast to prior work, which required prior knowledge of the maximal delay $d_{\\max}$ and had a linear dependence of the regret on it, our algorithm can tolerate arbitrary excessive delays up to order $T$ (where $T$ is the time horizon). The algorithm is based on three technical innovations, which may all be of independent interest: (1) We introduce the first implicit exploration scheme that works in best-of-both-worlds setting. (2) We introduce the first control of distribution drift that does not rely on boundedness of delays. The control is based on the implicit exploration scheme and adaptive skipping of observations with excessive delays. (3) We introduce a procedure relating standard regret with drifted regret that does not rely on boundedness of delays. At the conceptual level, we demonstrate that complexity of best-of-both-worlds bandits with delayed feedback is characterized by the amount of information missing at the time of decision making (measured by the number of outstanding observations) rather than the time that the information is missing (measured by the delays).", "keywords": "Best-of-both-worlds;delayed bandit feedback", "primary_area": "bandits", "supplementary_material": "", "author": "Saeed Masoudian;Julian Zimmert;Yevgeny Seldin", "authorids": "~Saeed_Masoudian1;~Julian_Zimmert1;~Yevgeny_Seldin2", "gender": "M;;M", "homepage": ";;https://sites.google.com/site/yevgenyseldin/", "dblp": ";190/7636;34/39", "google_scholar": "https://scholar.google.dk/citations?hl=en;;fpWsD9oAAAAJ", "orcid": "0000-0003-2665-5518;;", "linkedin": ";;", "or_profile": "~Saeed_Masoudian1;~Julian_Zimmert1;~Yevgeny_Seldin2", "aff": ";Google;University of Copenhagen", "aff_domain": ";google.com;di.ku.dk", "position": ";Postdoc;Full Professor", "bibtex": "@inproceedings{\nmasoudian2024a,\ntitle={A Best-of-both-worlds Algorithm for Bandits with Delayed Feedback with Robustness to Excessive Delays},\nauthor={Saeed Masoudian and Julian Zimmert and Yevgeny Seldin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=LDzrQB4X5w}\n}", "github": "", "reviewers": "bmXx;9WBQ;oEfL;2mU9", "pdf_size": 435506, "rating": "6;6;6;6", "confidence": "3;3;3;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "2;3;2;2", "wc_summary": "68;60;70;161", "wc_strengths": "91;89;61;57", "wc_weaknesses": "291;166;149;131", "wc_questions": "118;50;32;27", "wc_limitations": "1;1;1;2", "wc_review": "569;366;313;378", "wc_reply_reviewers": "132;16;30;35", "wc_reply_authors": "136;0;0;17", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 89.75, 41.30602256330183 ], "wc_strengths_avg": [ 74.5, 15.580436450882884 ], "wc_weaknesses_avg": [ 184.25, 62.86244904551524 ], "wc_questions_avg": [ 56.75, 36.38251640554842 ], "wc_limitations_avg": [ 1.25, 0.4330127018922193 ], "wc_review_avg": [ 406.5, 96.9548864163122 ], "wc_reply_reviewers_avg": [ 53.25, 45.996603135449035 ], "wc_reply_authors_avg": [ 38.25, 56.86112468110352 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5867675152986292992&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 4, "email": ";google.com;di.ku.dk", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Google;University of Copenhagen", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://www.ku.dk", "aff_unique_abbr": "Google;UCPH", "aff_campus_unique_index": "0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;Denmark" }, { "title": "Robot Policy Learning with Temporal Optimal Transport Reward", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95612", "id": "LEed5Is4oi", "proceeding": "", "pdf": "https://openreview.net/pdf?id=LEed5Is4oi", "openreview": "https://openreview.net/forum?id=LEed5Is4oi", "poster": "/media/PosterPDFs/NeurIPS%202024/95612.png?t=1732888196.7095418", "project": "", "author_site": "Yuwei Fu, Haichao Zhang, Di Wu, Wei Xu, Benoit Boulet", "tldr": "", "abstract": "Reward specification is one of the most tricky problems in Reinforcement Learning, which usually requires tedious hand engineering in practice. One promising approach to tackle this challenge is to adopt existing expert video demonstrations for policy learning. Some recent work investigates how to learn robot policies from only a single/few expert video demonstrations. For example, reward labeling via Optimal Transport (OT) has been shown to be an effective strategy to generate a proxy reward by measuring the alignment between the robot trajectory and the expert demonstrations. However, previous work mostly overlooks that the OT reward is invariant to temporal order information, which could bring extra noise to the reward signal. To address this issue, in this paper, we introduce the Temporal Optimal Transport (TemporalOT) reward to incorporate temporal order information for learning a more accurate OT-based proxy reward. Extensive experiments on the Meta-world benchmark tasks validate the efficacy of the proposed method. Our code is available at: https://github.com/fuyw/TemporalOT.", "keywords": "Reinforcement Learning;Imitation Learning;Optimal Transport", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Yuwei Fu;Haichao Zhang;Di Wu;Wei Xu;Benoit Boulet", "authorids": "~Yuwei_Fu1;~Haichao_Zhang4;~Di_Wu11;~Wei_Xu13;~Benoit_Boulet1", "gender": "M;M;M;M;M", "homepage": "http://mcgillialab.com/people/;http://mcgillialab.com/people/;;https://www.mcgill.ca/ece/benoit-boulet;https://sites.google.com/site/hczhang1/", "dblp": "200/1646;52/328-44.html;;;", "google_scholar": ";https://scholar.google.ca/citations?user=IbcoTsgAAAAJ;Gxz1fqwAAAAJ;https://scholar.google.ca/citations?user=kkGyLY4AAAAJ;_OsT-RgAAAAJ", "orcid": ";;;0000-0002-3191-3967;", "linkedin": ";;;benoit-boulet-97078012/;", "or_profile": "~Yuwei_Fu1;~Di_Wu11;~Wei_Xu13;~Benoit_Boulet1;~Haichao_Zhang2", "aff": "McGill University;McGill University;Horizon Robotics;McGill University;Horizon Robotics", "aff_domain": "mcgill.ca;mcgill.ca;horizon.auto;mcgill.ca;horizon.ai", "position": "PhD student;Adjunct Professor;Researcher;Full Professor;Research Scientist", "bibtex": "@inproceedings{\nfu2024robot,\ntitle={Robot Policy Learning with Temporal Optimal Transport Reward},\nauthor={Yuwei Fu and Haichao Zhang and Di Wu and Wei Xu and Benoit Boulet},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=LEed5Is4oi}\n}", "github": "", "reviewers": "GZSS;M6BJ;FnLe;hYj8", "pdf_size": 9867611, "rating": "5;5;6;7", "confidence": "4;4;4;3", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "2;3;3;3", "wc_summary": "38;90;166;55", "wc_strengths": "22;37;83;42", "wc_weaknesses": "248;81;96;2", "wc_questions": "502;22;6;33", "wc_limitations": "60;39;1;11", "wc_review": "870;269;352;143", "wc_reply_reviewers": "164;105;37;0", "wc_reply_authors": "979;790;35;0", "reply_reviewers": "2;2;1;0", "reply_authors": "3;3;2;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 87.25, 49.18015351745051 ], "wc_strengths_avg": [ 46.0, 22.594247055390007 ], "wc_weaknesses_avg": [ 106.75, 89.02633037478294 ], "wc_questions_avg": [ 140.75, 208.7886191821767 ], "wc_limitations_avg": [ 27.75, 23.25268801665734 ], "wc_review_avg": [ 408.5, 276.64282025745763 ], "wc_reply_reviewers_avg": [ 76.5, 63.00991985394046 ], "wc_reply_authors_avg": [ 451.0, 438.7943709757453 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1492287554026885677&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "mcgill.ca;mcgill.ca;horizon.auto;mcgill.ca;horizon.ai", "author_num": 5, "aff_unique_index": "0;0;1;0;1", "aff_unique_norm": "McGill University;Horizon Robotics", "aff_unique_dep": ";", "aff_unique_url": "https://www.mcgill.ca;https://www.horizon-robotics.com/", "aff_unique_abbr": "McGill;Horizon Robotics", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;1", "aff_country_unique": "Canada;China" }, { "title": "RL-GPT: Integrating Reinforcement Learning and Code-as-policy", "status": "Oral", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95611", "id": "LEzx6QRkRH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=LEzx6QRkRH", "openreview": "https://openreview.net/forum?id=LEzx6QRkRH", "poster": "/media/PosterPDFs/NeurIPS%202024/95611.png?t=1733408511.702013", "project": "", "author_site": "Shaoteng Liu, Haoqi Yuan, Minda Hu, Yanwei Li, Yukang Chen, Shu Liu, Zongqing Lu, Jiaya Jia", "tldr": "", "abstract": "Large Language Models (LLMs) have demonstrated proficiency in utilizing various tools by coding, yet they face limitations in handling intricate logic and precise control. In embodied tasks, high-level planning is amenable to direct coding, while low-level actions often necessitate task-specific refinement, such as Reinforcement Learning (RL). To seamlessly integrate both modalities, we introduce a two-level hierarchical framework, RL-GPT, comprising a slow agent and a fast agent. The slow agent analyzes actions suitable for coding, while the fast agent executes coding tasks. This decomposition effectively focuses each agent on specific tasks, proving highly efficient within our pipeline. Our approach outperforms traditional RL methods and existing GPT agents, demonstrating superior efficiency. In the Minecraft game, it rapidly obtains diamonds within a single day on an RTX3090. Additionally, it achieves SOTA performance across all designated MineDojo tasks.", "keywords": "Agent;Large Language Models (LLMs);Reinforcement Learning (RL)", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Shaoteng Liu;Haoqi Yuan;Minda Hu;Yanwei Li;Yukang Chen;Shu Liu;Zongqing Lu;Jiaya Jia", "authorids": "~Shaoteng_Liu1;~Haoqi_Yuan1;~Minda_Hu1;~Yanwei_Li1;~Yukang_Chen1;~Shu_Liu4;~Zongqing_Lu2;~Jiaya_Jia1", "gender": "M;M;M;;M;M;;M", "homepage": "https://www.shaotengliu.com/;;;;https://yukangchen.com/;https://shuliu1993.github.io/;;https://jiaya.me", "dblp": "02/10511;254/2084;260/5462.html;;225/4601;57/1180-5;;31/5649", "google_scholar": "v4JMf6kAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=en;;6p0ygKUAAAAJ;BUEDUFkAAAAJ;;https://scholar.google.com.tw/citations?user=XPAkzTEAAAAJ", "orcid": ";;0000-0003-1048-1998;;;;;", "linkedin": ";;;;;;;", "or_profile": "~Shaoteng_Liu1;~Haoqi_Yuan1;~Minda_Hu1;~Yanwei_Li1;~Yukang_Chen1;~Shu_Liu4;~Zongqing_Lu2;~Jiaya_Jia1", "aff": "Adobe Systems;Peking University;The Chinese University of Hong Kong;;NVIDIA;SmartMore Ltd.;;Department of Computer Science and Engineering, Hong Kong University of Science and Technology", "aff_domain": "adobe.com;pku.edu.cn;cse.cuhk.edu.hk;;nvidia.com;smartmore.com;;cse.ust.hk", "position": "Intern;PhD student;PhD student;;Researcher;Principal Researcher;;Full Professor", "bibtex": "@inproceedings{\nliu2024rlgpt,\ntitle={{RL}-{GPT}: Integrating Reinforcement Learning and Code-as-policy},\nauthor={Shaoteng Liu and Haoqi Yuan and Minda Hu and Yanwei Li and Yukang Chen and Shu Liu and Zongqing Lu and Jiaya Jia},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=LEzx6QRkRH}\n}", "github": "", "reviewers": "ep8Y;m13r;LWBp;efny", "pdf_size": 6221613, "rating": "5;5;7;8", "confidence": "3;3;4;4", "soundness": "2;3;3;3", "novelty": "3;3;4;3", "presentation": "2;3;3;4", "wc_summary": "27;125;227;61", "wc_strengths": "40;79;181;38", "wc_weaknesses": "315;67;127;57", "wc_questions": "2;27;230;68", "wc_limitations": "17;17;144;1", "wc_review": "401;315;909;225", "wc_reply_reviewers": "123;0;32;13", "wc_reply_authors": "44;0;38;25", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;2;2", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 110.0, 76.16429609731846 ], "wc_strengths_avg": [ 84.5, 58.06246636166948 ], "wc_weaknesses_avg": [ 141.5, 103.68582352472299 ], "wc_questions_avg": [ 81.75, 88.7760525141775 ], "wc_limitations_avg": [ 44.75, 57.67310898503739 ], "wc_review_avg": [ 462.5, 265.1919116413621 ], "wc_reply_reviewers_avg": [ 42.0, 48.130032204435516 ], "wc_reply_authors_avg": [ 26.75, 16.90229274388537 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.9622504486493761, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14330112339733601870&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "adobe.com;pku.edu.cn;cse.cuhk.edu.hk;;nvidia.com;smartmore.com;;cse.ust.hk", "author_num": 8, "aff_unique_index": "0;1;2;3;4;5", "aff_unique_norm": "Adobe;Peking University;Chinese University of Hong Kong;NVIDIA;SmartMore;Hong Kong University of Science and Technology", "aff_unique_dep": "Adobe Systems Incorporated;;;NVIDIA Corporation;;Department of Computer Science and Engineering", "aff_unique_url": "https://www.adobe.com;http://www.pku.edu.cn;https://www.cuhk.edu.hk;https://www.nvidia.com;https://www.smartmore.com;https://www.ust.hk", "aff_unique_abbr": "Adobe;Peking U;CUHK;NVIDIA;SmartMore;HKUST", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;1;0;1;1", "aff_country_unique": "United States;China" }, { "title": "emg2qwerty: A Large Dataset with Baselines for Touch Typing using Surface Electromyography", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97743", "id": "LFCWIE5iS2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=LFCWIE5iS2", "openreview": "https://openreview.net/forum?id=LFCWIE5iS2", "poster": "/media/PosterPDFs/NeurIPS%202024/97743.png?t=1731357788.8750048", "project": "", "author_site": "Viswanath Sivakumar, Jeffrey Seely, Alan Du, Sean Bittner, Adam Berenzweig, Anuoluwapo Bolarinwa, Alex Gramfort, Michael Mandel", "tldr": "", "abstract": "Surface electromyography (sEMG) non-invasively measures signals generated by muscle activity with sufficient sensitivity to detect individual spinal neurons and richness to identify dozens of gestures and their nuances. Wearable wrist-based sEMG sensors have the potential to offer low friction, subtle, information rich, always available human-computer inputs. To this end, we introduce emg2qwerty, a large-scale dataset of non-invasive electromyographic signals recorded at the wrists while touch typing on a QWERTY keyboard, together with ground-truth annotations and reproducible baselines. With 1,135 sessions spanning 108 users and 346 hours of recording, this is the largest such public dataset to date. These data demonstrate non-trivial, but well defined hierarchical relationships both in terms of the generative process, from neurons to muscles and muscle combinations, as well as in terms of domain shift across users and user sessions. Applying standard modeling techniques from the closely related field of Automatic Speech Recognition (ASR), we show strong baseline performance on predicting key-presses using sEMG signals alone. We believe the richness of this task and dataset will facilitate progress in several problems of interest to both the machine learning and neuroscientific communities. Dataset and code can be accessed at https://github.com/facebookresearch/emg2qwerty.", "keywords": "Electromyography;EMG;sEMG;Non-Invasive;Neuromotor;Neuromotor Interface;ASR;Speech;Dataset;Benchmark;QWERTY;Typing", "primary_area": "", "supplementary_material": "", "author": "Viswanath Sivakumar;Jeffrey Seely;Alan Du;Sean R Bittner;Adam Berenzweig;Anuoluwapo Bolarinwa;Alexandre Gramfort;Michael I Mandel", "authorids": "~Viswanath_Sivakumar2;~Jeffrey_Seely1;~Alan_Du1;~Sean_R_Bittner1;~Adam_Berenzweig1;~Anuoluwapo_Bolarinwa1;~Alexandre_Gramfort1;~Michael_I_Mandel1", "gender": "M;M;M;M;F;M;M;M", "homepage": "http://jsseely.com/;;https://srbittner.github.io/;;;http://alexandre.gramfort.net;http://mr-pc.org;", "dblp": ";;;19/5978;;15/7980;86/3894;223/3203", "google_scholar": "https://scholar.google.com/citations?hl=en;;ufJo49UAAAAJ;kwylb3sAAAAJ;;fhxshS0AAAAJ;7-9jOvEAAAAJ;hLPvq9AAAAAJ", "orcid": ";;;;;0000-0001-9791-4404;;", "linkedin": "jeffrey-s-44a5b426/;alanhdu;;;anuoluwapo-bolarinwa-34012761/;alexandregramfort/;michaelimandel;viswanath-sivakumar-56b76318/", "or_profile": "~Jeffrey_Seely1;~Alan_Du1;~Sean_R_Bittner1;~Adam_Berenzweig1;~Anuoluwapo_Bolarinwa1;~Alexandre_Gramfort1;~Michael_I_Mandel1;~Viswanath_Sivakumar1", "aff": "Sakana AI;Meta Facebook;;Meta Facebook;Meta Facebook;Meta;Brooklyn College, CUNY;Meta", "aff_domain": "sakana.ai;meta.com;;meta.com;meta.com;meta.com;brooklyn.cuny.edu;meta.com", "position": "Researcher;Researcher;;Researcher;Researcher;Researcher;Associate Professor;Researcher", "bibtex": "@inproceedings{\nsivakumar2024emgqwerty,\ntitle={emg2qwerty: A Large Dataset with Baselines for Touch Typing using Surface Electromyography},\nauthor={Viswanath Sivakumar and Jeffrey Seely and Alan Du and Sean R Bittner and Adam Berenzweig and Anuoluwapo Bolarinwa and Alexandre Gramfort and Michael I Mandel},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=LFCWIE5iS2}\n}", "github": "", "reviewers": "hCYB;zAj9;mbq7", "pdf_size": 1026618, "rating": "3;7;8", "confidence": "4;5;4", "wc_summary_and_contributions": "70;24;21", "wc_strengths": "2;14;16", "wc_improvement": "2;50;31", "wc_limitations": "84;45;1", "wc_correctness": "2;29;1", "wc_clarity": "39;31;5", "wc_relation_to_prior_work": "105;18;1", "wc_documentation": "102;12;1", "wc_additional_feedback": "1;1;1", "wc_review": "407;224;78", "wc_reply_reviewers": "274;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;0;0", "reply_authors": "2;1;1", "rating_avg": [ 6.0, 2.160246899469287 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 38.333333333333336, 22.425184255405547 ], "wc_strengths_avg": [ 10.666666666666666, 6.182412330330469 ], "wc_improvement_avg": [ 27.666666666666668, 19.737161790783283 ], "wc_limitations_avg": [ 43.333333333333336, 33.9050963065371 ], "wc_correctness_avg": [ 10.666666666666666, 12.970050972229147 ], "wc_clarity_avg": [ 25.0, 14.514360704718161 ], "wc_relation_to_prior_work_avg": [ 41.333333333333336, 45.55094827650561 ], "wc_documentation_avg": [ 38.333333333333336, 45.242556171030934 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 236.33333333333334, 134.596516382194 ], "wc_reply_reviewers_avg": [ 91.33333333333333, 129.1648386967427 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.3273268353539885, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10705712425861268321&as_sdt=8005&sciodt=0,7&hl=en", "gs_version_total": 3, "email": "sakana.ai;meta.com;;meta.com;meta.com;meta.com;brooklyn.cuny.edu;meta.com", "author_num": 8, "aff_unique_index": "0;1;1;1;1;2;1", "aff_unique_norm": "Sakana AI;Meta;Brooklyn College", "aff_unique_dep": ";Meta Platforms, Inc.;", "aff_unique_url": ";https://meta.com;https://www.brooklyncollege.edu", "aff_unique_abbr": ";Meta;BC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Brooklyn", "aff_country_unique_index": "1;1;1;1;1;1", "aff_country_unique": ";United States" }, { "title": "Non-geodesically-convex optimization in the Wasserstein space", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95610", "id": "LGG1IQhbOr", "proceeding": "", "pdf": "https://openreview.net/pdf?id=LGG1IQhbOr", "openreview": "https://openreview.net/forum?id=LGG1IQhbOr", "poster": "/media/PosterPDFs/NeurIPS%202024/95610.png?t=1731269836.8151336", "project": "", "author_site": "Hoang Phuc Hau Luu, Hanlin Yu, Bernardo Williams, Petrus Mikkola, Marcelo Hartmann, Kai Puolam\u00e4ki, Arto Klami", "tldr": "", "abstract": "We study a class of optimization problems in the Wasserstein space (the space of probability measures) where the objective function is nonconvex along generalized geodesics. Specifically, the objective exhibits some difference-of-convex structure along these geodesics. The setting also encompasses sampling problems where the logarithm of the target distribution is difference-of-convex. We derive multiple convergence insights for a novel semi Forward-Backward Euler scheme under several nonconvex (and possibly nonsmooth) regimes. Notably, the semi Forward-Backward Euler is just a slight modification of the Forward-Backward Euler whose convergence is---to our knowledge---still unknown in our very general non-geodesically-convex setting.", "keywords": "Wasserstein space;DC programming;nonconvex optimization;KL divergence", "primary_area": "optimization", "supplementary_material": "", "author": "Hoang Phuc Hau Luu;Hanlin Yu;Bernardo Williams;Petrus Mikkola;Marcelo Hartmann;Kai Puolam\u00e4ki;Arto Klami", "authorids": "~Hoang_Phuc_Hau_Luu1;~Hanlin_Yu1;~Bernardo_Williams1;~Petrus_Mikkola1;~Marcelo_Hartmann1;~Kai_Puolam\u00e4ki1;~Arto_Klami1", "gender": "M;;M;;;M;M", "homepage": "https://mcs-hub.github.io/luuhoangphuchau/#home;;https://researchportal.helsinki.fi/en/persons/bernardo-williams-moreno-sanchez;https://petrus-mikkola.github.io/;;http://www.iki.fi/kaip;https://www.cs.helsinki.fi/u/aklami/", "dblp": "308/4102;;322/7097;258/3117;;71/3034;21/5316", "google_scholar": "eLk_7PsAAAAJ;;T2lhmNwAAAAJ;https://scholar.google.com/citations?hl=en;;3Z9pgDAAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;0000-0002-6212-8777;;0000-0003-1819-1047;", "linkedin": "hoang-phuc-hau-luu/;;;;;kaipuolamaki;", "or_profile": "~Hoang_Phuc_Hau_Luu1;~Hanlin_Yu1;~Bernardo_Williams1;~Petrus_Mikkola1;~Marcelo_Hartmann1;~Kai_Puolam\u00e4ki1;~Arto_Klami1", "aff": "University of Helsinki;;University of Helsinki;University of Helsinki;;University of Helsinki;University of Helsinki", "aff_domain": "helsinki.fi;;helsinki.fi;helsinki.fi;;helsinki.fi;helsinki.fi", "position": "Postdoc;;PhD student;Postdoc;;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nluu2024nongeodesicallyconvex,\ntitle={Non-geodesically-convex optimization in the Wasserstein space},\nauthor={Hoang Phuc Hau Luu and Hanlin Yu and Bernardo Williams and Petrus Mikkola and Marcelo Hartmann and Kai Puolam{\\\"a}ki and Arto Klami},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=LGG1IQhbOr}\n}", "github": "", "reviewers": "7Mmt;7EW7;b2tW;TQTP", "pdf_size": 987719, "rating": "3;6;7;7", "confidence": "4;3;3;4", "soundness": "4;3;3;3", "novelty": "1;3;3;3", "presentation": "3;3;4;3", "wc_summary": "164;305;246;79", "wc_strengths": "96;51;82;80", "wc_weaknesses": "335;106;239;58", "wc_questions": "106;138;71;191", "wc_limitations": "6;7;1;10", "wc_review": "707;607;639;418", "wc_reply_reviewers": "802;246;9;0", "wc_reply_authors": "1337;17;0;0", "reply_reviewers": "3;1;1;0", "reply_authors": "3;2;1;1", "rating_avg": [ 5.75, 1.6393596310755 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 198.5, 85.24816713572204 ], "wc_strengths_avg": [ 77.25, 16.361158271956175 ], "wc_weaknesses_avg": [ 184.5, 109.29890209878597 ], "wc_questions_avg": [ 126.5, 44.13898503590675 ], "wc_limitations_avg": [ 6.0, 3.24037034920393 ], "wc_review_avg": [ 592.75, 107.15963559101907 ], "wc_reply_reviewers_avg": [ 264.25, 325.7640058385825 ], "wc_reply_authors_avg": [ 338.5, 576.5260184935282 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.45749571099781405, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14475779342782413793&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "helsinki.fi;;helsinki.fi;helsinki.fi;;helsinki.fi;helsinki.fi", "author_num": 7, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of Helsinki", "aff_unique_dep": "", "aff_unique_url": "https://www.helsinki.fi", "aff_unique_abbr": "UH", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Finland" }, { "title": "Where's Waldo: Diffusion Features For Personalized Segmentation and Retrieval", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95609", "id": "LGXeIx75sc", "proceeding": "", "pdf": "https://openreview.net/pdf?id=LGXeIx75sc", "openreview": "https://openreview.net/forum?id=LGXeIx75sc", "poster": "/media/PosterPDFs/NeurIPS%202024/95609.png?t=1731480743.9247892", "project": "", "author_site": "Dvir Samuel, Rami Ben-Ari, Matan Levy, Nir Darshan, Gal Chechik", "tldr": "", "abstract": "Personalized retrieval and segmentation aim to locate specific instances within a dataset based on an input image and a short description of the reference instance. While supervised methods are effective, they require extensive labeled data for training. Recently, self-supervised foundation models have been introduced to these tasks showing comparable results to supervised methods. However, a significant flaw in these models is evident: they struggle to locate a desired instance when other instances within the same class are presented. In this paper, we explore text-to-image diffusion models for these tasks. Specifically, we propose a novel approach called PDM for Personalized Diffusion Features Matching, that leverages intermediate features of pre-trained text-to-image models for personalization tasks without any additional training. PDM demonstrates superior performance on popular retrieval and segmentation benchmarks, outperforming even supervised methods. We also highlight notable shortcomings in current instance and segmentation datasets and propose new benchmarks for these tasks.", "keywords": "Tex-to-image diffusion model;instance retrieval", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Dvir Samuel;Rami Ben-Ari;Matan Levy;Nir Darshan;Gal Chechik", "authorids": "~Dvir_Samuel1;~Rami_Ben-Ari2;~Matan_Levy1;~Nir_Darshan1;~Gal_Chechik1", "gender": "M;M;;M;", "homepage": "https://chechiklab.biu.ac.il/~dvirsamuel/;http://www.benarirami.com/;https://levymsn.github.io;;https://chechiklab.biu.ac.il/~gal/", "dblp": "262/3701;07/1624;307/5294;;c/GalChechik", "google_scholar": "_CWxQ1gAAAAJ;https://scholar.google.co.il/citations?user=C4i_vUMAAAAJ;t6c3DWMAAAAJ;;Wk2gAZUAAAAJ", "orcid": ";;;;0000-0001-9164-5303", "linkedin": "dvir-samuel-b47859105/;;;nirdarshan/;", "or_profile": "~Dvir_Samuel1;~Rami_Ben-Ari2;~Matan_Levy1;~Nir_Darshan1;~Gal_Chechik1", "aff": "Bar-Ilan University;Bar-Ilan University;Hebrew University of Jerusalem;OriginAI;NVIDIA", "aff_domain": "biu.ac.il;biu.ac.il;huji.ac.il;originai.co;nvidia.com", "position": "PhD student;Lecturer;PhD student;Principal Researcher;Principal Researcher", "bibtex": "@inproceedings{\nsamuel2024wheres,\ntitle={Where's Waldo: Diffusion Features For Personalized Segmentation and Retrieval},\nauthor={Dvir Samuel and Rami Ben-Ari and Matan Levy and Nir Darshan and Gal Chechik},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=LGXeIx75sc}\n}", "github": "", "reviewers": "Axpb;8kmZ;JGDB;CWZR", "pdf_size": 6104809, "rating": "4;4;5;7", "confidence": "4;2;5;4", "soundness": "3;3;3;4", "novelty": "2;2;3;3", "presentation": "2;3;3;4", "wc_summary": "81;71;113;115", "wc_strengths": "79;48;93;72", "wc_weaknesses": "279;81;175;6", "wc_questions": "126;61;101;180", "wc_limitations": "1;7;37;1", "wc_review": "566;268;519;374", "wc_reply_reviewers": "85;6;21;63", "wc_reply_authors": "119;38;10;10", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 95.0, 19.339079605813716 ], "wc_strengths_avg": [ 73.0, 16.294170736800325 ], "wc_weaknesses_avg": [ 135.25, 102.338592427295 ], "wc_questions_avg": [ 117.0, 43.1335136523794 ], "wc_limitations_avg": [ 11.5, 14.9248115565993 ], "wc_review_avg": [ 431.75, 118.0939774078255 ], "wc_reply_reviewers_avg": [ 43.75, 31.68102744546016 ], "wc_reply_authors_avg": [ 44.25, 44.64512851364637 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.37463432463267754, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12465268280442876743&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 5, "email": "biu.ac.il;biu.ac.il;huji.ac.il;originai.co;nvidia.com", "author_num": 5, "aff_unique_index": "0;0;1;2;3", "aff_unique_norm": "Bar-Ilan University;Hebrew University of Jerusalem;OriginAI;NVIDIA", "aff_unique_dep": ";;;NVIDIA Corporation", "aff_unique_url": "https://www.biu.ac.il;https://www.huji.ac.il;;https://www.nvidia.com", "aff_unique_abbr": "BIU;HUJI;;NVIDIA", "aff_campus_unique_index": "1", "aff_campus_unique": ";Jerusalem", "aff_country_unique_index": "0;0;0;2", "aff_country_unique": "Israel;;United States" }, { "title": "Seeing Beyond the Crop: Using Language Priors for Out-of-Bounding Box Keypoint Prediction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95608", "id": "LGus3wXPxc", "proceeding": "", "pdf": "https://openreview.net/pdf?id=LGus3wXPxc", "openreview": "https://openreview.net/forum?id=LGus3wXPxc", "poster": "", "project": "", "author_site": "Bavesh Balaji, Jerrin Bright, Yuhao Chen, Sirisha Rambhatla, John Zelek, David Clausi", "tldr": "", "abstract": "Accurate estimation of human pose and the pose of interacting objects, like a hockey stick, is crucial for action recognition and performance analysis, particularly in sports. Existing methods capture the object along with the human in the bounding boxes, assuming all keypoints are visible within the bounding box. This necessitates larger bounding boxes to capture the object, introducing unnecessary visual features and hindering performance in real-world cluttered environments. We propose a simple image and text-based multimodal solution TokenCLIPose that addresses this limitation. Our approach focuses solely on human keypoints within the bounding box, treating objects as unseen. TokenCLIPose leverages the rich semantic representations endowed by language for inducing keypoint-specific context, even for occluded keypoints. We evaluate the performance of TokenCLIPose on a real-world Ice-Hockey dataset, and demonstrate its generalizability through zero-shot transfer to a smaller Lacrosse dataset. Additionally, we showcase its flexibility on CrowdPose, a popular occlusion benchmark with keypoints within the bounding box. Our method significantly improves over state-of-the-art approaches on all three datasets, with gains of 4.36\\%, 2.35\\%, and 3.8\\%, respectively.", "keywords": "2D pose estimation;out-of-image keypoint prediction;multimodal pose estimation;CLIP", "primary_area": "machine_vision", "supplementary_material": "", "author": "Bavesh Balaji;Jerrin Bright;Yuhao Chen;Sirisha Rambhatla;John S. Zelek;David Anthony Clausi", "authorids": "~Bavesh_Balaji1;~Jerrin_Bright1;~Yuhao_Chen1;~Sirisha_Rambhatla1;~John_S._Zelek1;~David_Anthony_Clausi1", "gender": "M;M;M;F;M;Not Specified", "homepage": "https://bavesh-b.github.io/;https://jerrinbright.github.io/;;;http://www.uwaterloo.ca;https://uwaterloo.ca/systems-design-engineering/profile/dclausi", "dblp": "356/8803;317/5404;;123/4808.html;81/7001;c/DAClausi", "google_scholar": "https://scholar.google.ca/citations?user=qFFJVHsAAAAJ;ArQ2dvkAAAAJ;NwxXuCYAAAAJ;EOSZeBMAAAAJ;CrdYsxEAAAAJ;https://scholar.google.ca/citations?user=Q5qzD7EAAAAJ", "orcid": "0009-0008-4859-852X;;;;;0000-0002-6383-0875", "linkedin": ";jerriebright/;yuhao-chen-789a2164/;;;david-clausi-6290861/", "or_profile": "~Bavesh_Balaji1;~Jerrin_Bright1;~Yuhao_Chen1;~Sirisha_Rambhatla1;~John_S._Zelek1;~David_Anthony_Clausi1", "aff": "University of Waterloo;University of Waterloo;University of Waterloo;University of Waterloo;University of Waterloo;University of Waterloo", "aff_domain": "uwaterloo.ca;uwaterloo.ca;uwaterloo.ca;uwaterloo.ca;waterloo.ca;uwaterloo.ca", "position": "MS student;PhD student;Assistant Professor;Assistant Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nbalaji2024seeing,\ntitle={Seeing Beyond the Crop: Using Language Priors for Out-of-Bounding Box Keypoint Prediction},\nauthor={Bavesh Balaji and Jerrin Bright and Yuhao Chen and Sirisha Rambhatla and John S. Zelek and David Anthony Clausi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=LGus3wXPxc}\n}", "github": "", "reviewers": "AetR;F7X8;9Npr;9aCe", "pdf_size": 10471090, "rating": "4;4;4;5", "confidence": "5;4;3;4", "soundness": "3;2;2;3", "novelty": "3;2;2;3", "presentation": "3;2;2;2", "wc_summary": "66;66;208;117", "wc_strengths": "37;15;63;47", "wc_weaknesses": "169;154;307;147", "wc_questions": "2;2;39;207", "wc_limitations": "21;6;21;6", "wc_review": "295;243;638;524", "wc_reply_reviewers": "0;25;283;418", "wc_reply_authors": "0;0;304;224", "reply_reviewers": "0;1;2;1", "reply_authors": "1;1;3;2", "rating_avg": [ 4.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 114.25, 57.99299526666992 ], "wc_strengths_avg": [ 40.5, 17.399712641305314 ], "wc_weaknesses_avg": [ 194.25, 65.57962717185879 ], "wc_questions_avg": [ 62.5, 84.78354793236716 ], "wc_limitations_avg": [ 13.5, 7.5 ], "wc_review_avg": [ 425.0, 162.16812263820532 ], "wc_reply_reviewers_avg": [ 181.5, 175.83301737728326 ], "wc_reply_authors_avg": [ 132.0, 134.99629624548965 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:RuemgkLVsXQJ:scholar.google.com/&scioq=Seeing+Beyond+the+Crop:+Using+Language+Priors+for+Out-of-Bounding+Box+Keypoint+Prediction&hl=en&as_sdt=0,44", "gs_version_total": 2, "email": "uwaterloo.ca;uwaterloo.ca;uwaterloo.ca;uwaterloo.ca;waterloo.ca;uwaterloo.ca", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "University of Waterloo", "aff_unique_dep": "", "aff_unique_url": "https://uwaterloo.ca", "aff_unique_abbr": "UW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "Canada" }, { "title": "Warped Diffusion: Solving Video Inverse Problems with Image Diffusion Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95607", "id": "LH94zPv8cu", "proceeding": "", "pdf": "https://openreview.net/pdf?id=LH94zPv8cu", "openreview": "https://openreview.net/forum?id=LH94zPv8cu", "poster": "/media/PosterPDFs/NeurIPS%202024/95607.png?t=1733772295.2284887", "project": "", "author_site": "Giannis Daras, Weili Nie, Karsten Kreis, Alex Dimakis, Morteza Mardani, Nikola Kovachki, Arash Vahdat", "tldr": "", "abstract": "Using image models naively for solving inverse video problems often suffers from flickering, texture-sticking, and temporal inconsistency in generated videos. To tackle these problems, in this paper, we view frames as continuous functions in the 2D space, and videos as a sequence of continuous warping transformations between different frames. This perspective allows us to train function space diffusion models only on **images** and utilize them to solve temporally correlated inverse problems. The function space diffusion models need to be equivariant with respect to the underlying spatial transformations. To ensure temporal consistency, we introduce a simple post-hoc test-time guidance towards (self)-equivariant solutions. Our method allows us to deploy state-of-the-art latent diffusion models such as Stable Diffusion XL to solve video inverse problems. We demonstrate the effectiveness of our method for video inpainting and $8\\times$ video super-resolution, outperforming existing techniques based on noise transformations. We provide generated video results in the following URL: https://giannisdaras.github.io/warped_diffusion.github.io/.", "keywords": "noise warping;inverse problems;diffusion models;equivariance", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/34ca46ab090c1a409dedc8252491ca7d71ca69d0.zip", "author": "Giannis Daras;Weili Nie;Karsten Kreis;Alex Dimakis;Morteza Mardani;Nikola Borislavov Kovachki;Arash Vahdat", "authorids": "~Giannis_Daras1;~Weili_Nie1;~Karsten_Kreis1;~Alex_Dimakis1;~Morteza_Mardani1;~Nikola_Borislavov_Kovachki1;~Arash_Vahdat3", "gender": "M;M;;M;M;M;M", "homepage": "https://giannisdaras.github.io/;https://weilinie.github.io/;https://karstenkreis.github.io/;https://people.eecs.berkeley.edu/~alexdimakis/;http://web.stanford.edu/~morteza/;http://www.its.caltech.edu/~nkovachk/;http://latentspace.cc/", "dblp": "254/2703;147/4786;238/6834;19/5000.html;74/258;;92/8108", "google_scholar": "LaScvbQAAAAJ;zW7BH7oAAAAJ;https://scholar.google.de/citations?user=rFd-DiAAAAAJ;JSFmVQEAAAAJ;H7edsyEAAAAJ;;https://scholar.google.ca/citations?user=p9-nlRIAAAAJ", "orcid": ";;;;;;", "linkedin": ";;karstenkreis;alex-dimakis-b1b20320/;;;", "or_profile": "~Giannis_Daras1;~Weili_Nie1;~Karsten_Kreis1;~Alex_Dimakis1;~Morteza_Mardani1;~Nikola_Borislavov_Kovachki1;~Arash_Vahdat3", "aff": "University of Texas, Austin;NVIDIA;NVIDIA;University of Texas at Austin;;NVIDIA;NVIDIA", "aff_domain": "utexas.edu;nvidia.com;nvidia.com;utexas.edu;;nvidia.com;nvidia.com", "position": "PhD student;Research Scientist;Research Scientist;Full Professor;;Researcher;Research Scientist", "bibtex": "@inproceedings{\ndaras2024warped,\ntitle={Warped Diffusion: Solving Video Inverse Problems with Image Diffusion Models},\nauthor={Giannis Daras and Weili Nie and Karsten Kreis and Alex Dimakis and Morteza Mardani and Nikola Borislavov Kovachki and Arash Vahdat},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=LH94zPv8cu}\n}", "github": "", "reviewers": "WsM3;zjYi;ebKQ;kW4a", "pdf_size": 18418162, "rating": "5;5;6;6", "confidence": "4;3;4;3", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "2;2;3;3", "wc_summary": "61;109;82;66", "wc_strengths": "68;84;20;53", "wc_weaknesses": "161;205;71;123", "wc_questions": "88;53;84;3", "wc_limitations": "7;37;12;14", "wc_review": "385;488;269;259", "wc_reply_reviewers": "48;67;46;0", "wc_reply_authors": "422;74;41;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 79.5, 18.714967272212903 ], "wc_strengths_avg": [ 56.25, 23.62599204266352 ], "wc_weaknesses_avg": [ 140.0, 49.28488612140643 ], "wc_questions_avg": [ 57.0, 33.99264626356707 ], "wc_limitations_avg": [ 17.5, 11.543396380615196 ], "wc_review_avg": [ 350.25, 93.68931369158385 ], "wc_reply_reviewers_avg": [ 40.25, 24.641174890820444 ], "wc_reply_authors_avg": [ 134.25, 168.18795289794093 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2369070074329650386&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "utexas.edu;nvidia.com;nvidia.com;utexas.edu;;nvidia.com;nvidia.com", "author_num": 7, "aff_unique_index": "0;1;1;0;1;1", "aff_unique_norm": "University of Texas at Austin;NVIDIA", "aff_unique_dep": ";NVIDIA Corporation", "aff_unique_url": "https://www.utexas.edu;https://www.nvidia.com", "aff_unique_abbr": "UT Austin;NVIDIA", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Austin;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Association Pattern-aware Fusion for Biological Entity Relationship Prediction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95606", "id": "LI5KmimXbM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=LI5KmimXbM", "openreview": "https://openreview.net/forum?id=LI5KmimXbM", "poster": "/media/PosterPDFs/NeurIPS%202024/95606.png?t=1731608989.7681825", "project": "", "author_site": "Lingxiang Jia, Yuchen Ying, Zunlei Feng, Zipeng Zhong, Shaolun Yao, Jiacong Hu, Mingjiang Duan, Xingen Wang, Jie Song, Mingli Song", "tldr": "", "abstract": "Deep learning-based methods significantly advance the exploration of associations among triple-wise biological entities (e.g., drug-target protein-adverse reaction), thereby facilitating drug discovery and safeguarding human health. However, existing researches only focus on entity-centric information mapping and aggregation, neglecting the crucial role of potential association patterns among different entities. To address the above limitation, we propose a novel association pattern-aware fusion method for biological entity relationship prediction, which effectively integrates the related association pattern information into entity representation learning. Additionally, to enhance the missing information of the low-order message passing, we devise a bind-relation module that considers the strong bind of low-order entity associations. Extensive experiments conducted on three biological datasets quantitatively demonstrate that the proposed method achieves about 4%-23% hit@1 improvements compared with state-of-the-art baselines. Furthermore, the interpretability of association patterns is elucidated in detail, thus revealing the intrinsic biological mechanisms and promoting it to be deployed in real-world scenarios. Our data and code are available at https://github.com/hry98kki/PatternBERP.", "keywords": "association pattern;deep learning;interpretable machine learning;biological network", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "", "author": "Lingxiang Jia;Yuchen Ying;Zunlei Feng;Zipeng Zhong;Shaolun Yao;Jiacong Hu;Mingjiang Duan;Xingen Wang;Jie Song;Mingli Song", "authorids": "~Lingxiang_Jia1;~Yuchen_Ying1;~Zunlei_Feng1;~Zipeng_Zhong2;~Shaolun_Yao1;~Jiacong_Hu1;~Mingjiang_Duan1;~Xingen_Wang1;~Jie_Song3;~Mingli_Song1", "gender": ";M;M;M;F;M;;M;M;M", "homepage": ";https://github.com/Ying-Yuchen;https://person.zju.edu.cn/en/zunleifeng;https://www.vipazoo.cn/people/zhongzipeng;https://www.vipazoo.cn/people/yaoshaolun;https://jiaconghu.com;;https://person.zju.edu.cn/xingenwang;https://person.zju.edu.cn/en/NB20021;https://person.zju.edu.cn/msong", "dblp": ";341/3164.html;191/2455;;;136/3061;;;09/4756-11.html;71/5333", "google_scholar": ";;wMtjcGwAAAAJ;;;;https://scholar.google.com.hk/citations?user=OD6dsfoAAAAJ;S8C1Y0gAAAAJ;4OjO-WYAAAAJ;7oLbhAwAAAAJ", "orcid": ";;;;;;;;0000-0003-3671-6521;0000-0003-2621-6048", "linkedin": ";;;;;;;;;", "or_profile": "~Lingxiang_Jia1;~Yuchen_Ying1;~Zunlei_Feng1;~Zipeng_Zhong2;~Shaolun_Yao1;~Jiacong_Hu1;~Mingjiang_Duan1;~Xingen_Wang1;~Jie_Song3;~Mingli_Song1", "aff": ";Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University", "aff_domain": ";zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn", "position": ";PhD student;Associate Professor;MS student;PhD student;PhD student;PhD student;Researcher;Associate Professor;Full Professor", "bibtex": "@inproceedings{\njia2024association,\ntitle={Association Pattern-aware Fusion for Biological Entity Relationship Prediction},\nauthor={Lingxiang Jia and Yuchen Ying and Zunlei Feng and Zipeng Zhong and Shaolun Yao and Jiacong Hu and Mingjiang Duan and Xingen Wang and Jie Song and Mingli Song},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=LI5KmimXbM}\n}", "github": "", "reviewers": "m3DY;UqdA;4Nwa;ugjG", "pdf_size": 1850707, "rating": "6;7;7;7", "confidence": "3;3;2;3", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "89;100;93;102", "wc_strengths": "59;146;91;30", "wc_weaknesses": "108;144;126;29", "wc_questions": "347;38;4;72", "wc_limitations": "79;7;8;98", "wc_review": "682;435;322;331", "wc_reply_reviewers": "9;33;12;8", "wc_reply_authors": "39;46;26;37", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 96.0, 5.244044240850758 ], "wc_strengths_avg": [ 81.5, 43.03777410601064 ], "wc_weaknesses_avg": [ 101.75, 43.88835266901687 ], "wc_questions_avg": [ 115.25, 135.9436923876941 ], "wc_limitations_avg": [ 48.0, 41.054841371024686 ], "wc_review_avg": [ 442.5, 145.23171141317587 ], "wc_reply_reviewers_avg": [ 15.5, 10.21028892833107 ], "wc_reply_authors_avg": [ 37.0, 7.176350047203662 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13827597732973687634&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 0, "email": ";zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn", "author_num": 10, "aff_unique_index": "0;0;0;0;0;0;0;0;0", "aff_unique_norm": "Zhejiang University", "aff_unique_dep": "", "aff_unique_url": "https://www.zju.edu.cn", "aff_unique_abbr": "ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Towards Principled Graph Transformers", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95605", "id": "LJCQH6U0pl", "proceeding": "", "pdf": "https://openreview.net/pdf?id=LJCQH6U0pl", "openreview": "https://openreview.net/forum?id=LJCQH6U0pl", "poster": "/media/PosterPDFs/NeurIPS%202024/95605.png?t=1733408801.7883754", "project": "", "author_site": "Luis M\u00fcller, Daniel Kusuma, Blai Bonet, Christopher Morris", "tldr": "", "abstract": "The expressive power of graph learning architectures based on the $k$-dimensional Weisfeiler-Leman ($k$-WL) hierarchy is well understood. However, such architectures often fail to deliver solid predictive performance on real-world tasks, limiting their practical impact. In contrast, global attention-based models such as graph transformers demonstrate strong performance in practice, but comparing their expressive power with the $k$-WL hierarchy remains challenging, particularly since these architectures rely on positional or structural encodings for their expressivity and predictive performance. To address this, we show that the recently proposed Edge Transformer, a global attention model operating on node pairs instead of nodes, has 3-WL expressive power when provided with the right tokenization. Empirically, we demonstrate that the Edge Transformer surpasses other theoretically aligned architectures regarding predictive performance while not relying on positional or structural encodings.", "keywords": "graph transformers;expressivity;Weisfeiler and Leman;Weisfeiler and Lehman", "primary_area": "graph_neural_networks", "supplementary_material": "/attachment/fbf52c6976a7dad61e113f5ef0ab6875bf8c0d95.zip", "author": "Luis M\u00fcller;Daniel Kusuma;Blai Bonet;Christopher Morris", "authorids": "~Luis_M\u00fcller1;~Daniel_Kusuma1;~Blai_Bonet1;~Christopher_Morris1", "gender": ";M;M;M", "homepage": "https://luis-mueller.github.io/;https://ksmdnl.github.io/;http://bonetblai.github.io;http://christophermorris.info", "dblp": "264/5619;354/8929;b/BBonet;156/7303", "google_scholar": "iPxfRnEAAAAJ;https://scholar.google.de/citations?user=pjkEms0AAAAJ;;", "orcid": ";;;", "linkedin": "luis-m%C3%BCller-58a5ba236/;daniel-kusuma/;;", "or_profile": "~Luis_M\u00fcller1;~Daniel_Kusuma1;~Blai_Bonet1;~Christopher_Morris1", "aff": "RWTH Aachen University, Rheinisch Westf\u00e4lische Technische Hochschule Aachen;Rheinisch Westf\u00e4lische Technische Hochschule Aachen;;Rheinisch Westf\u00e4lische Technische Hochschule Aachen", "aff_domain": "cs.rwth-aachen.de;rwth-aachen.de;;rwth-aachen.de", "position": "PhD student;MS student;;Assistant Professor", "bibtex": "@inproceedings{\nm{\\\"u}ller2024towards,\ntitle={Towards Principled Graph Transformers},\nauthor={Luis M{\\\"u}ller and Daniel Kusuma and Blai Bonet and Christopher Morris},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=LJCQH6U0pl}\n}", "github": "", "reviewers": "ckA7;szHE;Ff1y;YYTZ", "pdf_size": 526855, "rating": "1;5;6;7", "confidence": "5;4;4;4", "soundness": "2;2;3;3", "novelty": "2;2;2;3", "presentation": "2;4;3;3", "wc_summary": "60;53;45;68", "wc_strengths": "37;49;37;34", "wc_weaknesses": "60;208;79;178", "wc_questions": "56;172;127;1", "wc_limitations": "2;2;4;8", "wc_review": "215;484;292;289", "wc_reply_reviewers": "43;16;31;98", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 4.75, 2.277608394786075 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 56.5, 8.5 ], "wc_strengths_avg": [ 39.25, 5.7608593109014565 ], "wc_weaknesses_avg": [ 131.25, 63.013391433884905 ], "wc_questions_avg": [ 89.0, 65.50954128979991 ], "wc_limitations_avg": [ 4.0, 2.449489742783178 ], "wc_review_avg": [ 320.0, 99.58162481100617 ], "wc_reply_reviewers_avg": [ 47.0, 30.95965116082544 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.950586375786717, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13884673652871735980&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "cs.rwth-aachen.de;rwth-aachen.de;;rwth-aachen.de", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "RWTH Aachen University", "aff_unique_dep": "", "aff_unique_url": "https://www.rwth-aachen.de", "aff_unique_abbr": "RWTH", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Aachen", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "title": "Double-Ended Synthesis Planning with Goal-Constrained Bidirectional Search", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95604", "id": "LJNqVIKSCr", "proceeding": "", "pdf": "https://openreview.net/pdf?id=LJNqVIKSCr", "openreview": "https://openreview.net/forum?id=LJNqVIKSCr", "poster": "/media/PosterPDFs/NeurIPS%202024/95604.png?t=1733254252.8049095", "project": "", "author_site": "Kevin Yu, Jihye Roh, Ziang Li, Wenhao Gao, Runzhong Wang, Connor Coley", "tldr": "", "abstract": "Computer-aided synthesis planning (CASP) algorithms have demonstrated expert-level abilities in planning retrosynthetic routes to molecules of low to moderate complexity. However, current search methods assume the sufficiency of reaching arbitrary building blocks, failing to address the common real-world constraint where using specific molecules is desired. To this end, we present a formulation of synthesis planning with starting material constraints. Under this formulation, we propose Double-Ended Synthesis Planning ($\\texttt{DESP}$), a novel CASP algorithm under a _bidirectional graph search_ scheme that interleaves expansions from the target and from the goal starting materials to ensure constraint satisfiability. The search algorithm is guided by a goal-conditioned cost network learned offline from a partially observed hypergraph of valid chemical reactions. We demonstrate the utility of $\\texttt{DESP}$ in improving solve rates and reducing the number of search expansions by biasing synthesis planning towards expert goals on multiple new benchmarks. $\\texttt{DESP}$ can make use of existing one-step retrosynthesis models, and we anticipate its performance to scale as these one-step model capabilities improve.", "keywords": "Retrosynthesis;synthesis planning;chemistry;bidirectional search", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Kevin Yu;Jihye Roh;Ziang Li;Wenhao Gao;Runzhong Wang;Connor W. Coley", "authorids": "~Kevin_Yu1;~Jihye_Roh1;~Ziang_Li2;~Wenhao_Gao1;~Runzhong_Wang1;~Connor_W._Coley1", "gender": ";F;M;M;M;M", "homepage": "https://ykevu.github.io;;https://ziang.li/;https://wenhao-gao.github.io;http://runzhong.wang;https://coley.mit.edu", "dblp": ";;;177/0968;239/4351;206/6284", "google_scholar": "8k6ikroAAAAJ;;aeY8FgkAAAAJ;s4eywrUAAAAJ;uoM0g3cAAAAJ;l015S80AAAAJ", "orcid": "0009-0002-2547-2373;0009-0005-0123-0646;;0000-0002-6506-8044;0000-0002-9566-738X;0000-0002-8271-8723", "linkedin": ";;;;;", "or_profile": "~Kevin_Yu1;~Jihye_Roh1;~Ziang_Li2;~Wenhao_Gao1;~Runzhong_Wang1;~Connor_Coley1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Georgia Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;gatech.edu;mit.edu;mit.edu;mit.edu", "position": "MS student;PhD student;PhD student;PhD student;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nyu2024doubleended,\ntitle={Double-Ended Synthesis Planning with Goal-Constrained Bidirectional Search},\nauthor={Kevin Yu and Jihye Roh and Ziang Li and Wenhao Gao and Runzhong Wang and Connor W. Coley},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=LJNqVIKSCr}\n}", "github": "", "reviewers": "FuFE;pGH2;qRkp;WuVQ", "pdf_size": 1021231, "rating": "7;7;7;8", "confidence": "3;4;4;3", "soundness": "3;3;4;4", "novelty": "3;3;3;4", "presentation": "3;3;3;4", "wc_summary": "55;158;32;35", "wc_strengths": "28;107;16;172", "wc_weaknesses": "55;40;3;55", "wc_questions": "185;31;69;252", "wc_limitations": "12;13;1;5", "wc_review": "335;349;121;519", "wc_reply_reviewers": "24;4;5;24", "wc_reply_authors": "284;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 70.0, 51.570340313013254 ], "wc_strengths_avg": [ 80.75, 63.22726864257225 ], "wc_weaknesses_avg": [ 38.25, 21.25294097295713 ], "wc_questions_avg": [ 134.25, 88.54200980325668 ], "wc_limitations_avg": [ 7.75, 4.968651728587948 ], "wc_review_avg": [ 331.0, 141.23030836190935 ], "wc_reply_reviewers_avg": [ 14.25, 9.756408150543928 ], "wc_reply_authors_avg": [ 71.0, 122.97560733739029 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15642279773183930926&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "mit.edu;mit.edu;gatech.edu;mit.edu;mit.edu;mit.edu", "author_num": 6, "aff_unique_index": "0;0;1;0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology;Georgia Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.gatech.edu", "aff_unique_abbr": "MIT;Georgia Tech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Efficient and Sharp Off-Policy Evaluation in Robust Markov Decision Processes", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95603", "id": "LKGuc2rY5v", "proceeding": "", "pdf": "https://openreview.net/pdf?id=LKGuc2rY5v", "openreview": "https://openreview.net/forum?id=LKGuc2rY5v", "poster": "/media/PosterPDFs/NeurIPS%202024/95603.png?t=1733074043.7100887", "project": "", "author_site": "Andrew Bennett, Nathan Kallus, Miruna Oprescu, Wen Sun, Kaiwen Wang", "tldr": "", "abstract": "We study the evaluation of a policy under best- and worst-case perturbations to a Markov decision process (MDP), using transition observations from the original MDP, whether they are generated under the same or a different policy. This is an important problem when there is the possibility of a shift between historical and future environments, \\emph{e.g.} due to unmeasured confounding, distributional shift, or an adversarial environment. We propose a perturbation model that allows changes in the transition kernel densities up to a given multiplicative factor or its reciprocal, extending the classic marginal sensitivity model (MSM) for single time-step decision-making to infinite-horizon RL. We characterize the sharp bounds on policy value under this model -- \\emph{i.e.}, the tightest possible bounds based on transition observations from the original MDP -- and we study the estimation of these bounds from such transition observations. We develop an estimator with several important guarantees: it is semiparametrically efficient, and remains so even when certain necessary nuisance functions, such as worst-case Q-functions, are estimated at slow, nonparametric rates. Our estimator is also asymptotically normal, enabling straightforward statistical inference using Wald confidence intervals. Moreover, when certain nuisances are estimated inconsistently, the estimator still provides valid, albeit possibly not sharp, bounds on the policy value. We validate these properties in numerical simulations. The combination of accounting for environment shifts from train to test (robustness), being insensitive to nuisance-function estimation (orthogonality), and addressing the challenge of learning from finite samples (inference) together leads to credible and reliable policy evaluation.", "keywords": "Sensitivity Analysis;Robust MDPs;Semiparametric Efficiency", "primary_area": "causal_inference", "supplementary_material": "", "author": "Andrew Bennett;Nathan Kallus;Miruna Oprescu;Wen Sun;Kaiwen Wang", "authorids": "~Andrew_Bennett5;~Nathan_Kallus1;~Miruna_Oprescu1;~Wen_Sun1;~Kaiwen_Wang1", "gender": ";F;;M;M", "homepage": "http://nathankallus.com/;https://mirunaoprescu.com;https://wensun.github.io;https://kaiwenw.github.io/;https://awbennett.net/", "dblp": "142/2900;218/5317;;220/3822;57/6380", "google_scholar": "K2WfIlsAAAAJ;KLSUWBAAAAAJ;iOLC30YAAAAJ;HsMheBUAAAAJ;", "orcid": "0000-0003-1672-0507;;;;", "linkedin": ";;;kaiwenw/;", "or_profile": "~Nathan_Kallus1;~Miruna_Oprescu1;~Wen_Sun1;~Kaiwen_Wang1;~Andrew_Bennett4", "aff": "Cornell University;Brookhaven National Laboratory;Cornell University;Department of Computer Science, Cornell University;Morgan Stanley", "aff_domain": "cornell.edu;bnl.gov;cornell.edu;cs.cornell.edu;morganstanley.com", "position": "Associate Professor;Intern;Assistant Professor;PhD student;Researcher", "bibtex": "@inproceedings{\nbennett2024efficient,\ntitle={Efficient and Sharp Off-Policy Evaluation in Robust Markov Decision Processes},\nauthor={Andrew Bennett and Nathan Kallus and Miruna Oprescu and Wen Sun and Kaiwen Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=LKGuc2rY5v}\n}", "github": "", "reviewers": "xbQo;kuGv;ghkS;QdT5", "pdf_size": 663892, "rating": "4;6;6;6", "confidence": "2;3;4;3", "soundness": "2;3;3;3", "novelty": "3;2;2;3", "presentation": "1;2;1;3", "wc_summary": "65;154;58;89", "wc_strengths": "33;97;37;99", "wc_weaknesses": "473;170;188;135", "wc_questions": "34;3;89;3", "wc_limitations": "15;1;1;1", "wc_review": "620;425;373;327", "wc_reply_reviewers": "184;67;67;34", "wc_reply_authors": "795;0;976;0", "reply_reviewers": "1;1;2;1", "reply_authors": "3;1;4;1", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 1.75, 0.82915619758885 ], "wc_summary_avg": [ 91.5, 37.8714932369982 ], "wc_strengths_avg": [ 66.5, 31.539657575820318 ], "wc_weaknesses_avg": [ 241.5, 135.00833307614755 ], "wc_questions_avg": [ 32.25, 35.12388788275011 ], "wc_limitations_avg": [ 4.5, 6.06217782649107 ], "wc_review_avg": [ 436.25, 111.60953140301235 ], "wc_reply_reviewers_avg": [ 88.0, 57.039460025494634 ], "wc_reply_authors_avg": [ 442.75, 447.3507432652817 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 1.299038105676658 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3791772393347531482&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "cornell.edu;bnl.gov;cornell.edu;cs.cornell.edu;morganstanley.com", "author_num": 5, "aff_unique_index": "0;1;0;0;2", "aff_unique_norm": "Cornell University;Brookhaven National Laboratory;Morgan Stanley", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cornell.edu;https://www.bnl.gov;https://www.morganstanley.com", "aff_unique_abbr": "Cornell;BNL;Morgan Stanley", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "NeuroGauss4D-PCI: 4D Neural Fields and Gaussian Deformation Fields for Point Cloud Interpolation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95602", "id": "LKdCkV31T7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=LKdCkV31T7", "openreview": "https://openreview.net/forum?id=LKdCkV31T7", "poster": "/media/PosterPDFs/NeurIPS%202024/95602.png?t=1729934943.6499104", "project": "", "author_site": "Chaokang Jiang, Dalong Du, Jiuming Liu, Siting Zhu, Zhenqiang Liu, Zhuang Ma, Zhujin Liang, Jie Zhou", "tldr": "", "abstract": "Point Cloud Interpolation confronts challenges from point sparsity, complex spatiotemporal dynamics, and the difficulty of deriving complete 3D point clouds from sparse temporal information. This paper presents NeuroGauss4D-PCI, which excels at modeling complex non-rigid deformations across varied dynamic scenes. The method begins with an iterative Gaussian cloud soft clustering module, offering structured temporal point cloud representations. The proposed temporal radial basis function Gaussian residual utilizes Gaussian parameter interpolation over time, enabling smooth parameter transitions and capturing temporal residuals of Gaussian distributions. Additionally, a 4D Gaussian deformation field tracks the evolution of these parameters, creating continuous spatiotemporal deformation fields. A 4D neural field transforms low-dimensional spatiotemporal coordinates ($x,y,z,t$) into a high-dimensional latent space. Finally, we adaptively and efficiently fuse the latent features from neural fields and the geometric features from Gaussian deformation fields.\nNeuroGauss4D-PCI outperforms existing methods in point cloud frame interpolation, delivering leading performance on both object-level (DHB) and large-scale autonomous driving datasets (NL-Drive), with scalability to auto-labeling and point cloud densification tasks.", "keywords": "Neural Fields;Point Cloud Interpolation;Gaussian Deformation Fields", "primary_area": "machine_vision", "supplementary_material": "/attachment/6a0fb6e15d8054ce9895fc8c80e9de8f465e97a9.zip", "author": "Chaokang Jiang;Dalong Du;Jiuming Liu;Siting Zhu;Zhenqiang Liu;Zhuang Ma;Zhujin Liang;Jie Zhou", "authorids": "~Chaokang_Jiang1;~Dalong_Du3;~Jiuming_Liu1;~Siting_Zhu2;~Zhenqiang_Liu1;~Zhuang_Ma2;~Zhujin_Liang2;~Jie_Zhou3", "gender": ";M;;F;;M;;M", "homepage": ";;;https://irmv.sjtu.edu.cn/team/;;;;https://www.tsinghua.edu.cn/publish/auen/1713/2011/20110506105532098625469/20110506105532098625469_.html", "dblp": ";159/2057.html;;182/7236;;;;00/5012-1", "google_scholar": ";;;;;;;", "orcid": ";;;;;0009-0009-3803-3418;;", "linkedin": ";;;;;;;", "or_profile": "~Chaokang_Jiang1;~Dalong_Du3;~Jiuming_Liu1;~Siting_Zhu2;~Zhenqiang_Liu1;~Zhuang_Ma2;~Zhujin_Liang2;~Jie_Zhou3", "aff": ";PhiGent Robotics;;Shanghai Jiaotong University;;;;Tsinghua University", "aff_domain": ";phigent.ai;;sjtu.edu.cn;;;;tsinghua.edu.cn", "position": ";Researcher;;PhD student;;;;Full Professor", "bibtex": "@inproceedings{\njiang2024neurogaussdpci,\ntitle={NeuroGauss4D-{PCI}: 4D Neural Fields and Gaussian Deformation Fields for Point Cloud Interpolation},\nauthor={Chaokang Jiang and Dalong Du and Jiuming Liu and Siting Zhu and Zhenqiang Liu and Zhuang Ma and Zhujin Liang and Jie Zhou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=LKdCkV31T7}\n}", "github": "", "reviewers": "Z54i;ZfCH;xq5Q;2Dkp;ddrR", "pdf_size": 40247676, "rating": "4;5;5;6;8", "confidence": "4;4;4;4;3", "soundness": "3;3;3;3;4", "novelty": "2;2;2;3;3", "presentation": "1;3;2;1;3", "wc_summary": "54;89;174;196;54", "wc_strengths": "15;74;33;53;75", "wc_weaknesses": "68;109;336;347;65", "wc_questions": "62;168;351;50;25", "wc_limitations": "29;59;7;17;21", "wc_review": "228;499;901;663;240", "wc_reply_reviewers": "0;67;18;49;26", "wc_reply_authors": "107;0;31;64;46", "reply_reviewers": "0;1;1;1;1", "reply_authors": "2;1;2;2;2", "rating_avg": [ 5.6, 1.3564659966250536 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.0, 0.8944271909999159 ], "wc_summary_avg": [ 113.4, 60.24483380340592 ], "wc_strengths_avg": [ 50.0, 23.340951137432253 ], "wc_weaknesses_avg": [ 185.0, 128.77111477346153 ], "wc_questions_avg": [ 131.2, 120.27202500997478 ], "wc_limitations_avg": [ 26.6, 17.681628884240276 ], "wc_review_avg": [ 506.2, 256.4226199070589 ], "wc_reply_reviewers_avg": [ 32.0, 23.53720459187964 ], "wc_reply_authors_avg": [ 49.6, 35.556152772762125 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.8, 0.4000000000000001 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.8846517369293829, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13322741759747333672&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": ";phigent.ai;;sjtu.edu.cn;;;;tsinghua.edu.cn", "author_num": 8, "aff_unique_index": "0;1;2", "aff_unique_norm": "PhiGent Robotics;Shanghai Jiao Tong University;Tsinghua University", "aff_unique_dep": ";;", "aff_unique_url": ";https://www.sjtu.edu.cn;https://www.tsinghua.edu.cn", "aff_unique_abbr": ";SJTU;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1;1", "aff_country_unique": ";China" }, { "title": "Almost Free: Self-concordance in Natural Exponential Families and an Application to Bandits", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95601", "id": "LKwVYvx66I", "proceeding": "", "pdf": "https://openreview.net/pdf?id=LKwVYvx66I", "openreview": "https://openreview.net/forum?id=LKwVYvx66I", "poster": "/media/PosterPDFs/NeurIPS%202024/95601.png?t=1733425610.2623458", "project": "", "author_site": "Shuai Liu, Alex Ayoub, Flore Sentenac, Xiaoqi Tan, Csaba Szepesvari", "tldr": "", "abstract": "We prove that single-parameter natural exponential families with subexponential tails are self-concordant with polynomial-sized parameters. For subgaussian natural exponential families we establish an exact characterization of the growth rate of the self-concordance parameter. Applying these findings to bandits allows us to fill gaps in the literature: We show that optimistic algorithms for generalized linear bandits enjoy regret bounds that are both second-order (scale with the variance of the optimal arm's reward distribution) and free of an exponential dependence on the bound of the problem parameter in the leading term. To the best of our knowledge, ours is the first regret bound for generalized linear bandits with subexponential tails, broadening the class of problems to include Poisson, exponential and gamma bandits.", "keywords": "bandits;generalized linear models;natural exponential families;self-concordance.", "primary_area": "bandits", "supplementary_material": "", "author": "Shuai Liu;Alex Ayoub;Flore Sentenac;Xiaoqi Tan;Csaba Szepesvari", "authorids": "~Shuai_Liu10;~Alex_Ayoub1;~Flore_Sentenac1;~Xiaoqi_Tan1;~Csaba_Szepesvari1", "gender": "M;M;;;M", "homepage": "https://sliu24k.github.io/;;https://floresentenac.github.io/;https://xiaoqitan.org;https://sites.ualberta.ca/~szepesva/", "dblp": ";266/8071;;;http://dblp.uni-trier.de/pers/hd/s/Szepesv=aacute=ri:Csaba", "google_scholar": ";eh0TSgYAAAAJ;;drR_WcAAAAAJ;https://scholar.google.ca/citations?user=zvC19mQAAAAJ", "orcid": ";;;;", "linkedin": ";;flore-sentenac-6843a312a/;;csaba-szepesvari-09376b1?trk=hp-identity-name", "or_profile": "~Shuai_Liu10;~Alex_Ayoub1;~Flore_Sentenac1;~Xiaoqi_Tan1;~Csaba_Szepesvari1", "aff": "University of Alberta;NetFlix;Ensae ParisTech;University of Alberta;Google DeepMind", "aff_domain": "ualberta.ca;netflix.com;ensae.fr;ualberta.ca;google.com", "position": "PhD student;Intern;PhD student;Assistant Professor;Research Scientist", "bibtex": "@inproceedings{\nliu2024almost,\ntitle={Almost Free: Self-concordance in Natural Exponential Families and an Application to Bandits},\nauthor={Shuai Liu and Alex Ayoub and Flore Sentenac and Xiaoqi Tan and Csaba Szepesvari},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=LKwVYvx66I}\n}", "github": "", "reviewers": "ySJz;Ca74;n2uQ", "pdf_size": 606329, "rating": "5;6;8", "confidence": "5;2;3", "soundness": "3;3;4", "novelty": "3;3;4", "presentation": "1;3;4", "wc_summary": "52;82;26", "wc_strengths": "17;77;89", "wc_weaknesses": "96;116;260", "wc_questions": "33;29;21", "wc_limitations": "26;5;1", "wc_review": "224;309;397", "wc_reply_reviewers": "80;18;35", "wc_reply_authors": "37;0;0", "reply_reviewers": "1;1;1", "reply_authors": "2;1;1", "rating_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.3333333333333335, 1.247219128924647 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 1.247219128924647 ], "wc_summary_avg": [ 53.333333333333336, 22.88133640230735 ], "wc_strengths_avg": [ 61.0, 31.496031496047245 ], "wc_weaknesses_avg": [ 157.33333333333334, 73.05401350294787 ], "wc_questions_avg": [ 27.666666666666668, 4.988876515698588 ], "wc_limitations_avg": [ 10.666666666666666, 10.96458946893235 ], "wc_review_avg": [ 310.0, 70.63049388661152 ], "wc_reply_reviewers_avg": [ 44.333333333333336, 26.157641755751268 ], "wc_reply_authors_avg": [ 12.333333333333334, 17.441967269268172 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.49999999999999994, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=530495237748231091&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "ualberta.ca;netflix.com;ensae.fr;ualberta.ca;google.com", "author_num": 5, "aff_unique_index": "0;1;2;0;3", "aff_unique_norm": "University of Alberta;Netflix;ENSAE ParisTech;Google", "aff_unique_dep": ";;;Google DeepMind", "aff_unique_url": "https://www.ualberta.ca;https://www.netflix.com;https://www.ensae.fr;https://deepmind.com", "aff_unique_abbr": "UAlberta;Netflix;Ensae;DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;0;3", "aff_country_unique": "Canada;United States;France;United Kingdom" }, { "title": "NeuralFluid: Nueral Fluidic System Design and Control with Differentiable Simulation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95600", "id": "LLsOmvJbBm", "proceeding": "", "pdf": "https://openreview.net/pdf?id=LLsOmvJbBm", "openreview": "https://openreview.net/forum?id=LLsOmvJbBm", "poster": "", "project": "", "author_site": "Yifei Li, Yuchen Sun, Pingchuan Ma, Eftychios Sifakis, Tao Du, Bo Zhu, Wojciech Matusik", "tldr": "", "abstract": "We present NeuralFluid, a novel framework to explore neural control and design of complex fluidic systems with dynamic solid boundaries. Our system features a fast differentiable Navier-Stokes solver with solid-fluid interface handling, a low-dimensional differentiable parametric geometry representation, a control-shape co-design algorithm, and gym-like simulation environments to facilitate various fluidic control design applications. Additionally, we present a benchmark of design, control, and learning tasks on high-fidelity, high-resolution dynamic fluid environments that pose challenges for existing differentiable fluid simulators. These tasks include designing the control of artificial hearts, identifying robotic end-effector shapes, and controlling a fluid gate. By seamlessly incorporating our differentiable fluid simulator into a learning framework, we demonstrate successful design, control, and learning results that surpass gradient-free solutions in these benchmark tasks.", "keywords": "differentiable simulation;fluid learning;fluid simulation;fluid control;fluid optimization;fluid design;inverse problems;navier-stokes;adjoint method;fluid dynamics", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "/attachment/641e706a52b3f33590dd9c4aedbb04ed552981e8.zip", "author": "Yifei Li;Yuchen Sun;Pingchuan Ma;Eftychios Sifakis;Tao Du;Bo Zhu;Wojciech Matusik", "authorids": "~Yifei_Li7;~Yuchen_Sun5;~Pingchuan_Ma3;~Eftychios_Sifakis1;~Tao_Du1;~Bo_Zhu2;~Wojciech_Matusik2", "gender": "F;M;M;M;;M;M", "homepage": "https://people.csail.mit.edu/liyifei/;;https://people.csail.mit.edu/pcma;https://pages.cs.wisc.edu/~sifakis/;https://people.iiis.tsinghua.edu.cn/~taodu/;https://faculty.cc.gatech.edu/~bozhu/;https://cdfg.mit.edu/wojciech", "dblp": "38/1978-2;;215/4446-2;;51/3026-1;;", "google_scholar": "N4ejGnoAAAAJ;ctDp4E8AAAAJ;EtCZmkwAAAAJ;;https://scholar.google.com/citations?hl=en;atNjbs0AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-3770-0575;0009-0005-0896-6672;;;0000-0001-7337-7667;;0000-0003-0212-5643", "linkedin": ";;;;;;wojciech-matusik-67238126/", "or_profile": "~Yifei_Li7;~Yuchen_Sun5;~Pingchuan_Ma3;~Eftychios_Sifakis1;~Tao_Du1;~Bo_Zhu2;~Wojciech_Matusik2", "aff": "Massachusetts Institute of Technology;Georgia Institute of Technology;Massachusetts Institute of Technology;Department of Computer Science, University of Wisconsin - Madison;Shanghai Qi Zhi Institute;Georgia Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;gatech.edu;mit.edu;cs.wisc.edu;sqz.ac.cn;gatech.edu;mit.edu", "position": "PhD student;PhD student;PhD student;Full Professor;Principal investigator;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nli2024neuralfluid,\ntitle={NeuralFluid: Nueral Fluidic System Design and Control with Differentiable Simulation},\nauthor={Yifei Li and Yuchen Sun and Pingchuan Ma and Eftychios Sifakis and Tao Du and Bo Zhu and Wojciech Matusik},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=LLsOmvJbBm}\n}", "github": "", "reviewers": "ueL6;QmuX;grLC;ZAVx", "pdf_size": 33223414, "rating": "5;5;7;8", "confidence": "3;4;4;3", "soundness": "3;2;3;4", "novelty": "3;2;3;4", "presentation": "2;2;3;4", "wc_summary": "163;33;170;84", "wc_strengths": "65;84;68;59", "wc_weaknesses": "250;119;9;99", "wc_questions": "5;31;135;156", "wc_limitations": "37;45;9;11", "wc_review": "520;312;391;409", "wc_reply_reviewers": "64;72;26;405", "wc_reply_authors": "316;105;105;231", "reply_reviewers": "1;1;1;1", "reply_authors": "4;3;3;3", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 112.5, 56.984647055149864 ], "wc_strengths_avg": [ 69.0, 9.246621004453464 ], "wc_weaknesses_avg": [ 119.25, 86.11148297410746 ], "wc_questions_avg": [ 81.75, 64.83585042243219 ], "wc_limitations_avg": [ 25.5, 15.771810295587505 ], "wc_review_avg": [ 408.0, 74.24621202458749 ], "wc_reply_reviewers_avg": [ 141.75, 152.977735308116 ], "wc_reply_authors_avg": [ 189.25, 89.44935718047391 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.25, 0.4330127018922193 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.19245008972987526, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4229086426757438870&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 2, "email": "mit.edu;gatech.edu;mit.edu;cs.wisc.edu;sqz.ac.cn;gatech.edu;mit.edu", "author_num": 7, "aff_unique_index": "0;1;0;2;3;1;0", "aff_unique_norm": "Massachusetts Institute of Technology;Georgia Institute of Technology;University of Wisconsin-Madison;Shanghai Qi Zhi Institute", "aff_unique_dep": ";;Department of Computer Science;", "aff_unique_url": "https://web.mit.edu;https://www.gatech.edu;https://www.wisc.edu;https://www.qz.io", "aff_unique_abbr": "MIT;Georgia Tech;UW-Madison;", "aff_campus_unique_index": "1", "aff_campus_unique": ";Madison", "aff_country_unique_index": "0;0;0;0;1;0;0", "aff_country_unique": "United States;China" }, { "title": "Where does In-context Learning Happen in Large Language Models?", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95599", "id": "LLuSjg59an", "proceeding": "", "pdf": "https://openreview.net/pdf?id=LLuSjg59an", "openreview": "https://openreview.net/forum?id=LLuSjg59an", "poster": "/media/PosterPDFs/NeurIPS%202024/95599.png?t=1731754790.2034507", "project": "", "author_site": "Suzanna Sia, David Mueller, Kevin Duh", "tldr": "", "abstract": "Self-supervised large language models have demonstrated the ability to perform various tasks via in-context learning, but little is known about where the model locates the task with respect to prompt instructions and demonstration examples. In this work, we attempt to characterize the region where large language models transition from recognizing the task to performing the task. Through a series of layer-wise context-masking experiments on GPTNeo2.7B, Bloom3B, Starcoder2-7B, Llama3.1-8B, Llama3.1-8B-Instruct, on Machine Translation and Code generation, we demonstrate evidence of a \"task recognition\" point where the task is encoded into the input representations and attention to context is no longer necessary. Taking advantage of this redundancy results in 45% computational savings when prompting with 5 examples, and task recognition achieved at layer 14 / 32 using an example with Machine Translation. Our findings also have implications for resource and parameter efficient fine-tuning; we observe a correspondence between strong fine-tuning performance of individual LoRA layers and the task recognition layers.", "keywords": "Explainability;self-attention layers;redundancy;LLM;in-context learning;machine translation;text to code", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Suzanna Sia;David Mueller;Kevin Duh", "authorids": "~Suzanna_Sia1;~David_Mueller1;~Kevin_Duh1", "gender": ";M;M", "homepage": ";https://www.damueller.com;https://cs.jhu.edu/~kevinduh/", "dblp": ";224/2296;58/3217", "google_scholar": ";TMv0Lw8AAAAJ;M3BSiiQAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Suzanna_Sia1;~David_Mueller1;~Kevin_Duh1", "aff": ";Johns Hopkins University;Johns Hopkins University", "aff_domain": ";jhu.edu;jhu.edu", "position": ";PhD student;Assistant Research Professor", "bibtex": "@inproceedings{\nsia2024where,\ntitle={Where does In-context Learning {\\textbackslash}{\\textbackslash} Happen in Large Language Models?},\nauthor={Suzanna Sia and David Mueller and Kevin Duh},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=LLuSjg59an}\n}", "github": "", "reviewers": "m9xg;z6j9;iAtv;NuLE", "pdf_size": 2461806, "rating": "3;4;6;8", "confidence": "4;4;3;4", "soundness": "1;3;3;4", "novelty": "3;1;2;4", "presentation": "3;3;2;3", "wc_summary": "35;74;208;122", "wc_strengths": "13;17;98;85", "wc_weaknesses": "200;104;228;34", "wc_questions": "10;6;84;121", "wc_limitations": "2;1;27;1", "wc_review": "260;202;645;363", "wc_reply_reviewers": "35;68;0;22", "wc_reply_authors": "20;218;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "2;3;1;1", "rating_avg": [ 5.25, 1.920286436967152 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 1.0897247358851685 ], "novelty_avg": [ 2.5, 1.118033988749895 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 109.75, 64.55375666837678 ], "wc_strengths_avg": [ 53.25, 38.55110244856819 ], "wc_weaknesses_avg": [ 141.5, 77.24474092130804 ], "wc_questions_avg": [ 55.25, 49.04780830985214 ], "wc_limitations_avg": [ 7.75, 11.121488209767612 ], "wc_review_avg": [ 367.5, 170.27404382347885 ], "wc_reply_reviewers_avg": [ 31.25, 24.631027181179434 ], "wc_reply_authors_avg": [ 59.5, 91.87355441039603 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.22549380840084865, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2933479229432033696&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": ";jhu.edu;jhu.edu", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Johns Hopkins University", "aff_unique_dep": "", "aff_unique_url": "https://www.jhu.edu", "aff_unique_abbr": "JHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Semantic Density: Uncertainty Quantification for Large Language Models through Confidence Measurement in Semantic Space", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95598", "id": "LOH6qzI7T6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=LOH6qzI7T6", "openreview": "https://openreview.net/forum?id=LOH6qzI7T6", "poster": "/media/PosterPDFs/NeurIPS%202024/95598.png?t=1733561512.3820007", "project": "", "author_site": "Xin Qiu, Risto Miikkulainen", "tldr": "", "abstract": "With the widespread application of Large Language Models (LLMs) to various domains, concerns regarding the trustworthiness of LLMs in safety-critical scenarios have been raised, due to their unpredictable tendency to hallucinate and generate misinformation. Existing LLMs do not have an inherent functionality to provide the users with an uncertainty/confidence metric for each response it generates, making it difficult to evaluate trustworthiness. Although several studies aim to develop uncertainty quantification methods for LLMs, they have fundamental limitations, such as being restricted to classification tasks, requiring additional training and data, considering only lexical instead of semantic information, and being prompt-wise but not response-wise. A new framework is proposed in this paper to address these issues. Semantic density extracts uncertainty/confidence information for each response from a probability distribution perspective in semantic space. It has no restriction on task types and is \"off-the-shelf\" for new models and tasks. Experiments on seven state-of-the-art LLMs, including the latest Llama 3 and Mixtral-8x22B models, on four free-form question-answering benchmarks demonstrate the superior performance and robustness of semantic density compared to prior approaches.", "keywords": "uncertainty quantification;large language models;trustworthy AI", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/6286f734baaf55cdddcb108459e1173775752133.zip", "author": "Xin Qiu;Risto Miikkulainen", "authorids": "~Xin_Qiu1;~Risto_Miikkulainen1", "gender": "M;", "homepage": "https://vsonicv.github.io/;http://www.cs.utexas.edu/users/risto", "dblp": "83/7479-1;m/RistoMiikkulainen", "google_scholar": "https://scholar.google.com/citations?hl=en;", "orcid": ";", "linkedin": "xin-qiu-4a5ba0116/;", "or_profile": "~Xin_Qiu1;~Risto_Miikkulainen1", "aff": "Cognizant;The University of Texas, Austin", "aff_domain": "cognizant.com;cs.utexas.edu", "position": "Associate Director;Full Professor", "bibtex": "@inproceedings{\nqiu2024semantic,\ntitle={Semantic Density: Uncertainty Quantification for Large Language Models through Confidence Measurement in Semantic Space},\nauthor={Xin Qiu and Risto Miikkulainen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=LOH6qzI7T6}\n}", "github": "", "reviewers": "koQE;kNB4;mViE;rbw2", "pdf_size": 956563, "rating": "5;5;6;7", "confidence": "4;4;3;3", "soundness": "3;3;3;3", "novelty": "2;3;2;3", "presentation": "3;3;3;3", "wc_summary": "24;65;88;41", "wc_strengths": "58;46;32;42", "wc_weaknesses": "225;56;55;35", "wc_questions": "237;171;16;253", "wc_limitations": "18;1;1;1", "wc_review": "562;339;192;372", "wc_reply_reviewers": "265;0;0;9", "wc_reply_authors": "1185;94;0;8", "reply_reviewers": "3;0;0;1", "reply_authors": "8;3;1;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 54.5, 24.212600025606502 ], "wc_strengths_avg": [ 44.5, 9.313968005098578 ], "wc_weaknesses_avg": [ 92.75, 76.81267799002974 ], "wc_questions_avg": [ 169.25, 93.66529506706313 ], "wc_limitations_avg": [ 5.25, 7.361215932167728 ], "wc_review_avg": [ 366.25, 131.77324273159556 ], "wc_reply_reviewers_avg": [ 68.5, 113.50881023074817 ], "wc_reply_authors_avg": [ 321.75, 499.7581289984186 ], "reply_reviewers_avg": [ 1.0, 1.224744871391589 ], "reply_authors_avg": [ 3.5, 2.692582403567252 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.9045340337332909, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16060056348493942009&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 3, "email": "cognizant.com;cs.utexas.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Cognizant;University of Texas at Austin", "aff_unique_dep": ";", "aff_unique_url": "https://www.cognizant.com;https://www.utexas.edu", "aff_unique_abbr": "Cognizant;UT Austin", "aff_campus_unique_index": "1", "aff_campus_unique": ";Austin", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Cross-Modality Perturbation Synergy Attack for Person Re-identification", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95597", "id": "LONd7ACEjy", "proceeding": "", "pdf": "https://openreview.net/pdf?id=LONd7ACEjy", "openreview": "https://openreview.net/forum?id=LONd7ACEjy", "poster": "/media/PosterPDFs/NeurIPS%202024/95597.png?t=1731718472.0874865", "project": "", "author_site": "Yunpeng Gong, Zhun Zhong, Yansong Qu, Zhiming Luo, Rongrong Ji, Min", "tldr": "", "abstract": "In recent years, there has been significant research focusing on addressing security concerns in single-modal person re-identification (ReID) systems that are based on RGB images. However, the safety of cross-modality scenarios, which are more commonly encountered in practical applications involving images captured by infrared cameras, has not received adequate attention. The main challenge in cross-modality ReID lies in effectively dealing with visual differences between different modalities. For instance, infrared images are typically grayscale, unlike visible images that contain color information. Existing attack methods have primarily focused on the characteristics of the visible image modality, overlooking the features of other modalities and the variations in data distribution among different modalities. This oversight can potentially undermine the effectiveness of these methods in image retrieval across diverse modalities. This study represents the first exploration into the security of cross-modality ReID models and proposes a universal perturbation attack specifically designed for cross-modality ReID. This attack optimizes perturbations by leveraging gradients from diverse modality data, thereby disrupting the discriminator and reinforcing the differences between modalities. We conducted experiments on three widely used cross-modality datasets, namely RegDB, SYSU, and LLCM. The results not only demonstrate the effectiveness of our method but also provide insights for future improvements in the robustness of cross-modality ReID systems.", "keywords": "Computer Vision; Adversarial Attack\uff1bPerson Re-identification", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Yunpeng Gong;Zhun Zhong;Yansong Qu;Zhiming Luo;Rongrong Ji;Min Jiang", "authorids": "~Yunpeng_Gong1;~Zhun_Zhong1;~Yansong_Qu1;~Zhiming_Luo1;~Rongrong_Ji5;~Min_Jiang1", "gender": "M;M;M;M;M;M", "homepage": "http://zhunzhong.site;https://quyans.github.io/;https://sites.google.com/view/zhimingluo;http://mac.xmu.edu.cn/rrji-en.html;https://minjiang.xmu.edu.cn/;", "dblp": "32/6525;;75/9709;86/5681;35/994-5.html;", "google_scholar": "nZizkQ0AAAAJ;https://scholar.google.com.hk/citations?view_op=list_works;RdRCIIYAAAAJ;;22w5ZwcAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";0009-0003-4325-6858;;;0000-0003-2946-6974;0000-0002-6498-2555", "linkedin": ";;;;;", "or_profile": "~Zhun_Zhong1;~Yansong_Qu1;~Zhiming_Luo1;~Rongrong_Ji5;~Min_Jiang1;~fmonkey1", "aff": "University of Nottingham;Xiamen University;Xiamen University;Xiamen University;Xiamen University;Xiamen University", "aff_domain": "nottingham.ac.uk;xmu.edu.cn;xmu.edu.cn;xmu.edu.cn;xmu.edu.cn;xmu.edu.cn", "position": "Assistant Professor;PhD student;Associate Professor;Full Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\ngong2024crossmodality,\ntitle={Cross-Modality Perturbation Synergy Attack for Person Re-identification},\nauthor={Yunpeng Gong and Zhun Zhong and Yansong Qu and Zhiming Luo and Rongrong Ji and Min Jiang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=LONd7ACEjy}\n}", "github": "", "reviewers": "tAG1;Th4d;fT1U;Zepn", "pdf_size": 1915879, "rating": "4;6;6;7", "confidence": "5;5;5;4", "soundness": "2;3;4;4", "novelty": "3;4;3;4", "presentation": "2;4;3;4", "wc_summary": "73;98;105;69", "wc_strengths": "49;119;124;175", "wc_weaknesses": "182;154;45;98", "wc_questions": "4;78;66;79", "wc_limitations": "15;8;11;1", "wc_review": "323;457;351;422", "wc_reply_reviewers": "199;28;30;25", "wc_reply_authors": "749;106;106;0", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;2;1", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 86.25, 15.514106484100203 ], "wc_strengths_avg": [ 116.75, 44.83511458667191 ], "wc_weaknesses_avg": [ 119.75, 52.69902750525858 ], "wc_questions_avg": [ 56.75, 30.8818312280862 ], "wc_limitations_avg": [ 8.75, 5.11737237261468 ], "wc_review_avg": [ 388.25, 53.64408168661292 ], "wc_reply_reviewers_avg": [ 70.5, 74.21084826357936 ], "wc_reply_authors_avg": [ 240.25, 296.89760440259533 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.6622661785325219, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16960969531436702130&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "nottingham.ac.uk;xmu.edu.cn;xmu.edu.cn;xmu.edu.cn;xmu.edu.cn;xmu.edu.cn", "author_num": 6, "aff_unique_index": "0;1;1;1;1;1", "aff_unique_norm": "University of Nottingham;Xiamen University", "aff_unique_dep": ";", "aff_unique_url": "https://www.nottingham.ac.uk;https://www.xmu.edu.cn", "aff_unique_abbr": "UoN;XMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1;1", "aff_country_unique": "United Kingdom;China" }, { "title": "Is Function Similarity Over-Engineered? Building a Benchmark", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97742", "id": "LOcLhezm1C", "proceeding": "", "pdf": "https://openreview.net/pdf?id=LOcLhezm1C", "openreview": "https://openreview.net/forum?id=LOcLhezm1C", "poster": "/media/PosterPDFs/NeurIPS%202024/97742.png?t=1731360580.4851344", "project": "", "author_site": "Rebecca Saul, Chang Liu, Noah Fleischmann, Richard Zak, Kristopher Micinski, Edward Raff, James Holt", "tldr": "", "abstract": "Binary analysis is a core component of many critical security tasks, including reverse engineering, malware analysis, and vulnerability detection. Manual analysis is often time-consuming, but identifying commonly-used or previously-seen functions can reduce the time it takes to understand a new file. However, given the complexity of assembly, and the NP-hard nature of determining function equivalence, this task is extremely difficult. Common approaches often use sophisticated disassembly and decompilation tools, graph analysis, and other expensive pre-processing steps to perform function similarity searches over some corpus. In this work, we identify a number of discrepancies between the current research environment and the underlying application need. To remedy this, we build a new benchmark, REFuSe-Bench, for binary function similarity detection consisting of high-quality datasets and tests that better reflect real-world use cases. In doing so, we address issues like data duplication and accurate labeling, experiment with real malware, and perform the first serious evaluation of ML binary function similarity models on Windows data. Our benchmark reveals that a new, simple baseline \u2014 one which looks at only the raw bytes of a function, and requires no disassembly or other pre-processing --- is able to achieve state-of-the-art performance in multiple settings. Our findings challenge conventional assumptions that complex models with highly-engineered features are being used to their full potential, and demonstrate that simpler approaches can provide significant value.", "keywords": "binary function similarity detection;triplet learning;feature engineering", "primary_area": "", "supplementary_material": "/attachment/9ab469d27eecb5047febdf96c5081bc48af81c68.zip", "author": "Rebecca Saul;Chang Liu;Noah Fleischmann;Richard J Zak;Kristopher Micinski;Edward Raff;James Holt", "authorids": "~Rebecca_Saul1;~Chang_Liu33;~Noah_Fleischmann1;~Richard_J_Zak1;~Kristopher_Micinski1;~Edward_Raff1;~James_Holt1", "gender": "F;M;M;M;M;M;M", "homepage": ";https://changliu98.github.io/;https://nfleis.com;https://rjzak.github.io/;;http://www.edwardraff.com/;", "dblp": ";;;;;204/3369;93/1248", "google_scholar": "emPX1wsAAAAJ;7BK4ze8AAAAJ;O0JSndYAAAAJ;n68SmzgAAAAJ;HpJLJWUAAAAJ;debM2bUAAAAJ;GtVgGjkAAAAJ", "orcid": ";;;0000-0003-4272-2565;;0000-0002-9900-1972;0000-0002-6368-8696", "linkedin": ";;noah-fleischmann/;https://linkedin.com/in/rjzak;;edward-raff-09992040/;jeholt/", "or_profile": "~Rebecca_Saul1;~Chang_Liu33;~Noah_Fleischmann1;~Richard_J_Zak1;~Kristopher_Micinski1;~Edward_Raff1;~James_Holt1", "aff": "BOOZ ALLEN HAMILTON;Syracuse University;Booz Allen Hamilton;Booz Allen Hamilton, Inc.;Syracuse University;Booz Allen Hamilton;Laboratory for Physical Sciences", "aff_domain": "bah.com;syr.edu;boozallen.com;bah.com;syr.edu;boozallen.com;umd.edu", "position": "Researcher;PhD student;Researcher;Researcher;Assistant Professor;Principal Researcher;Principal Researcher", "bibtex": "@inproceedings{\nsaul2024is,\ntitle={Is Function Similarity Over-Engineered? Building a Benchmark},\nauthor={Rebecca Saul and Chang Liu and Noah Fleischmann and Richard J Zak and Kristopher Micinski and Edward Raff and James Holt},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=LOcLhezm1C}\n}", "github": "", "reviewers": "ym5A;4C4P;wxy9", "pdf_size": 361378, "rating": "5;6;7", "confidence": "4;3;4", "wc_summary_and_contributions": "62;79;62", "wc_strengths": "84;30;21", "wc_improvement": "2;16;11", "wc_limitations": "9;1;45", "wc_correctness": "31;1;29", "wc_clarity": "17;1;11", "wc_relation_to_prior_work": "29;1;17", "wc_documentation": "9;1;9", "wc_additional_feedback": "1;1;1", "wc_review": "244;131;206", "wc_reply_reviewers": "120;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;0;0", "reply_authors": "3;1;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 67.66666666666667, 8.013876853447538 ], "wc_strengths_avg": [ 45.0, 27.820855486487112 ], "wc_improvement_avg": [ 9.666666666666666, 5.792715732327589 ], "wc_limitations_avg": [ 18.333333333333332, 19.136933459209764 ], "wc_correctness_avg": [ 20.333333333333332, 13.695092389449425 ], "wc_clarity_avg": [ 9.666666666666666, 6.599663291074443 ], "wc_relation_to_prior_work_avg": [ 15.666666666666666, 11.469767022723502 ], "wc_documentation_avg": [ 6.333333333333333, 3.7712361663282534 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 193.66666666666666, 46.949145063805176 ], "wc_reply_reviewers_avg": [ 40.0, 56.568542494923804 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7479437318606424412&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "bah.com;syr.edu;boozallen.com;bah.com;syr.edu;boozallen.com;umd.edu", "author_num": 7, "aff_unique_index": "0;1;0;0;1;0;2", "aff_unique_norm": "Booz Allen Hamilton;Syracuse University;Laboratory for Physical Sciences", "aff_unique_dep": ";;", "aff_unique_url": "https://www.boozallen.com;https://www.syracuse.edu;", "aff_unique_abbr": "BAH;Syracuse;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "MAC Advice for facility location mechanism design", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95596", "id": "LPbqZszt8Y", "proceeding": "", "pdf": "https://openreview.net/pdf?id=LPbqZszt8Y", "openreview": "https://openreview.net/forum?id=LPbqZszt8Y", "poster": "/media/PosterPDFs/NeurIPS%202024/95596.png?t=1731330531.9525297", "project": "", "author_site": "Zohar Barak, Anupam Gupta, Inbal Talgam-Cohen", "tldr": "", "abstract": "Algorithms with predictions are gaining traction across various domains, as a way to surpass traditional worst-case bounds through (machine-learned) advice. We study the canonical problem of $k$-facility location mechanism design,\nwhere the $n$ agents are strategic and might misreport their locations. We receive a prediction for each agent's location, and these predictions are crucially allowed to be only \"mostly\" and \"approximately\" correct (MAC for short): a $\\delta$-fraction of the predicted locations are allowed to be arbitrarily incorrect, and the remainder of the predictions are required to be correct up to an $\\varepsilon$-error. Moreover, we make no assumption on the independence of the errors.\nCan such \"flawed\" predictions allow us to beat the current best bounds for strategyproof\nfacility location?\n\nWe show how natural robustness of the $1$-median (also known as the geometric median) of a set of points leads to an algorithm for single-facility location with MAC predictions. We extend our results to a natural \"balanced\" variant of the $k$-facility case, and show that without balancedness, robustness completely breaks down even for $k=2$ facilities on a line. As our main result, for this \"unbalanced\" setting we devise a truthful random mechanism, which outperforms the best known mechanism (with no predictions) by Lu et al.~[2010]. En route, we introduce the problem of \"second\" facility location, in which the first facility location is already fixed. Our robustness findings may be of independent interest, as quantitative versions of classic breakdown-point results in robust statistics.", "keywords": "Algorithms with Predictions;MAC Predictions;Facility Location;Discrete Optimization", "primary_area": "algorithmic_game_theory", "supplementary_material": "", "author": "Zohar Barak;Anupam Gupta;Inbal Talgam-Cohen", "authorids": "~Zohar_Barak2;~Anupam_Gupta2;~Inbal_Talgam-Cohen2", "gender": "M;M;F", "homepage": ";https://cs.nyu.edu/~anupamg/;http://www.inbaltalgam.com/", "dblp": ";27/2931;07/8319", "google_scholar": "WugwwA8AAAAJ;QuwaU-8AAAAJ;R1YK5BsAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Zohar_Barak2;~Anupam_Gupta2;~Inbal_Talgam-Cohen2", "aff": "Tel Aviv University;New York University;Tel Aviv University", "aff_domain": "tau.ac.il;cs.nyu.edu;tau.ac.il", "position": "MS student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nbarak2024mac,\ntitle={{MAC} Advice for facility location mechanism design},\nauthor={Zohar Barak and Anupam Gupta and Inbal Talgam-Cohen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=LPbqZszt8Y}\n}", "github": "", "reviewers": "8TXK;JMjQ;4KnM;HjD5", "pdf_size": 851714, "rating": "4;5;7;8", "confidence": "3;2;4;3", "soundness": "4;3;3;4", "novelty": "2;3;3;3", "presentation": "2;3;3;4", "wc_summary": "213;182;101;125", "wc_strengths": "38;53;108;85", "wc_weaknesses": "159;261;124;54", "wc_questions": "2;40;144;358", "wc_limitations": "1;1;1;18", "wc_review": "413;537;478;640", "wc_reply_reviewers": "0;0;13;407", "wc_reply_authors": "0;0;0;538", "reply_reviewers": "0;0;1;2", "reply_authors": "1;1;1;3", "rating_avg": [ 6.0, 1.5811388300841898 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 155.25, 44.46557657334491 ], "wc_strengths_avg": [ 71.0, 27.285527299284507 ], "wc_weaknesses_avg": [ 149.5, 74.65420282877582 ], "wc_questions_avg": [ 136.0, 138.31124321616085 ], "wc_limitations_avg": [ 5.25, 7.361215932167728 ], "wc_review_avg": [ 517.0, 83.46556176052492 ], "wc_reply_reviewers_avg": [ 105.0, 174.44053428030998 ], "wc_reply_authors_avg": [ 134.5, 232.960833618014 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.447213595499958, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2225756817756020753&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "tau.ac.il;cs.nyu.edu;tau.ac.il", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Tel Aviv University;New York University", "aff_unique_dep": ";", "aff_unique_url": "https://www.tau.ac.il;https://www.nyu.edu", "aff_unique_abbr": "TAU;NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Israel;United States" }, { "title": "Visual Pinwheel Centers Act as Geometric Saliency Detectors", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95595", "id": "LPkcoml66W", "proceeding": "", "pdf": "https://openreview.net/pdf?id=LPkcoml66W", "openreview": "https://openreview.net/forum?id=LPkcoml66W", "poster": "/media/PosterPDFs/NeurIPS%202024/95595.png?t=1733461524.0875273", "project": "", "author_site": "Haixin Zhong, Mingyi Huang, Wei Dai, Haoyu Wang, Anna Roe, Yuguo Yu", "tldr": "", "abstract": "During natural evolution, the primary visual cortex (V1) of lower mammals typically forms salt-and-pepper organizations, while higher mammals and primates develop pinwheel structures with distinct topological properties. Despite the general belief that V1 neurons primarily serve as edge detectors, the functional advantages of pinwheel structures over salt-and-peppers are not well recognized. To this end, we propose a two-dimensional self-evolving spiking neural network that integrates Hebbian-like plasticity and empirical morphological data. Through extensive exposure to image data, our network evolves from salt-and-peppers to pinwheel structures, with neurons becoming localized bandpass filters responsive to various orientations. This transformation is accompanied by an increase in visual field overlap. Our findings indicate that neurons in pinwheel centers (PCs) respond more effectively to complex spatial textures in natural images, exhibiting quicker responses than those in salt-and-pepper organizations. PCs act as first-order stage processors with heightened sensitivity and reduced latency to intricate contours, while adjacent iso-orientation domains serve as second-order stage processors that refine edge representations for clearer perception. This study presents the first theoretical evidence that pinwheel structures function as crucial detectors of spatial contour saliency in the visual cortex.", "keywords": "Visual Contour Saliency;Orentation Map;Population Coding;Hebbian Learning", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "/attachment/6b468e3b102623d1543f57615ae5d5f8ef01d2c2.zip", "author": "Haixin Zhong;Mingyi Huang;Wei P Dai;Haoyu Wang;Anna Wang Roe;yuguo yu", "authorids": "~Haixin_Zhong2;~Mingyi_Huang2;~Wei_P_Dai1;~Haoyu_Wang15;~Anna_Wang_Roe1;~yuguo_yu1", "gender": ";F;M;;F;M", "homepage": ";;https://iics.fudan.edu.cn/41/d4/c33358a410068/page.htm;;http://www.ziint.zju.edu.cn;https://iics.fudan.edu.cn/41/dd/c33358a410077/page.htm", "dblp": ";;;;257/4038.html;36/5929.html", "google_scholar": ";;u8FKRuAAAAAJ;;https://scholar.google.ca/citations?user=yoDf0BQAAAAJ;yVjZpTsAAAAJ", "orcid": ";0000-0001-8096-7876;;;;0000-0003-0603-2890", "linkedin": ";;;;;", "or_profile": "~Haixin_Zhong2;~Mingyi_Huang2;~Wei_P_Dai1;~Haoyu_Wang15;~Anna_Wang_Roe1;~yuguo_yu1", "aff": ";Fudan University;Fudan University;;Zhejiang University;Fudan University", "aff_domain": ";fudan.edu.cn;fudan.edu.cn;;zju.edu.cn;fudan.edu.cn", "position": ";PhD student;Researcher;;Full Professor;Full Professor", "bibtex": "@inproceedings{\nzhong2024visual,\ntitle={Visual Pinwheel Center Act as Geometric Saliency Detector},\nauthor={Haixin Zhong and Mingyi Huang and Wei P Dai and Haoyu Wang and Anna Wang Roe and yuguo yu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=LPkcoml66W}\n}", "github": "", "reviewers": "TFr2;FA4k;Boae;indp", "pdf_size": 2046464, "rating": "4;6;6;7", "confidence": "4;3;5;4", "soundness": "2;3;3;3", "novelty": "2;3;2;2", "presentation": "2;4;3;3", "wc_summary": "64;75;52;81", "wc_strengths": "40;78;43;52", "wc_weaknesses": "173;79;117;35", "wc_questions": "89;202;100;66", "wc_limitations": "2;19;52;140", "wc_review": "368;453;364;374", "wc_reply_reviewers": "178;74;0;8", "wc_reply_authors": "1221;125;808;17", "reply_reviewers": "1;1;0;1", "reply_authors": "4;2;4;2", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 68.0, 11.067971810589327 ], "wc_strengths_avg": [ 53.25, 14.956186011146023 ], "wc_weaknesses_avg": [ 101.0, 50.695167422546305 ], "wc_questions_avg": [ 114.25, 52.12664865498261 ], "wc_limitations_avg": [ 53.25, 53.2136025843017 ], "wc_review_avg": [ 389.75, 36.69042790701684 ], "wc_reply_reviewers_avg": [ 65.0, 71.28113354878694 ], "wc_reply_authors_avg": [ 542.75, 495.3051458444582 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.0, 1.0 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12989658645343111271&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 2, "email": ";fudan.edu.cn;fudan.edu.cn;;zju.edu.cn;fudan.edu.cn", "author_num": 6, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Fudan University;Zhejiang University", "aff_unique_dep": ";", "aff_unique_url": "https://www.fudan.edu.cn;https://www.zju.edu.cn", "aff_unique_abbr": "Fudan;ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Near-Optimal Dynamic Regret for Adversarial Linear Mixture MDPs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95594", "id": "LPyPRS2XcF", "proceeding": "", "pdf": "https://openreview.net/pdf?id=LPyPRS2XcF", "openreview": "https://openreview.net/forum?id=LPyPRS2XcF", "poster": "", "project": "", "author_site": "Long-Fei Li, Peng Zhao, Zhi-Hua Zhou", "tldr": "", "abstract": "We study episodic linear mixture MDPs with the unknown transition and adversarial rewards under full-information feedback, employing *dynamic regret* as the performance measure. We start with in-depth analyses of the strengths and limitations of the two most popular methods: occupancy-measure-based and policy-based methods. We observe that while the occupancy-measure-based method is effective in addressing non-stationary environments, it encounters difficulties with the unknown transition. In contrast, the policy-based method can deal with the unknown transition effectively but faces challenges in handling non-stationary environments. Building on this, we propose a novel algorithm that combines the benefits of both methods. Specifically, it employs (i) an *occupancy-measure-based global optimization* with a two-layer structure to handle non-stationary environments; and (ii) a *policy-based variance-aware value-targeted regression* to tackle the unknown transition. We bridge these two parts by a novel conversion. Our algorithm enjoys an $\\widetilde{\\mathcal{O}}(d \\sqrt{H^3 K} + \\sqrt{HK(H + \\bar{P}_K)})$ dynamic regret, where $d$ is the feature mapping dimension, $H$ is the episode length, $K$ is the number of episodes, $\\bar{P}_K$ is the non-stationarity measure. We show it is minimax optimal up to logarithmic factors by establishing a matching lower bound. To the best of our knowledge, this is the **first** work that achieves **near-optimal** dynamic regret for adversarial linear mixture MDPs with the unknown transition without prior knowledge of the non-stationarity measure.", "keywords": "reinforcement learning;dynamic regret;linear mixture MDPs;function approximation", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Long-Fei Li;Peng Zhao;Zhi-Hua Zhou", "authorids": "~Long-Fei_Li1;~Peng_Zhao1;~Zhi-Hua_Zhou2", "gender": ";;", "homepage": ";;", "dblp": ";;", "google_scholar": ";;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Long-Fei_Li1;~Peng_Zhao1;~Zhi-Hua_Zhou2", "aff": ";;", "aff_domain": ";;", "position": ";;", "bibtex": "@inproceedings{\nli2024nearoptimal,\ntitle={Near-Optimal Dynamic Regret for Adversarial Linear Mixture {MDP}s},\nauthor={Long-Fei Li and Peng Zhao and Zhi-Hua Zhou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=LPyPRS2XcF}\n}", "github": "", "reviewers": "QFuD;PUu1;xu5Q;pW8k", "pdf_size": 502319, "rating": "5;6;6;7", "confidence": "4;3;1;3", "soundness": "3;3;3;4", "novelty": "3;3;2;3", "presentation": "3;3;2;4", "wc_summary": "73;51;118;98", "wc_strengths": "56;51;22;35", "wc_weaknesses": "177;34;236;22", "wc_questions": "56;1;56;58", "wc_limitations": "1;7;39;11", "wc_review": "363;144;471;224", "wc_reply_reviewers": "0;14;12;13", "wc_reply_authors": "75;0;17;0", "reply_reviewers": "0;1;1;1", "reply_authors": "2;1;2;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 2.75, 1.0897247358851685 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 85.0, 25.288337232803585 ], "wc_strengths_avg": [ 41.0, 13.435028842544403 ], "wc_weaknesses_avg": [ 117.25, 91.75340593133315 ], "wc_questions_avg": [ 42.75, 24.118198523106987 ], "wc_limitations_avg": [ 14.5, 14.585952145814822 ], "wc_review_avg": [ 300.5, 125.81832140034297 ], "wc_reply_reviewers_avg": [ 9.75, 5.673402858955108 ], "wc_reply_authors_avg": [ 23.0, 30.813957876261206 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.3244428422615251, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:SIwwO5YpwQ0J:scholar.google.com/&scioq=Near-Optimal+Dynamic+Regret+for+Adversarial+Linear+Mixture+MDPs&hl=en&as_sdt=0,5", "gs_version_total": 5, "email": ";;", "author_num": 3 }, { "title": "How Molecules Impact Cells: Unlocking Contrastive PhenoMolecular Retrieval", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95593", "id": "LQBlSGeOGm", "proceeding": "", "pdf": "https://openreview.net/pdf?id=LQBlSGeOGm", "openreview": "https://openreview.net/forum?id=LQBlSGeOGm", "poster": "", "project": "", "author_site": "Philip Fradkin, Puria Azadi Moghadam, Karush Suri, Frederik Wenkel, Ali Bashashati, Maciej Sypetkowski, Dominique Beaini", "tldr": "", "abstract": "Predicting molecular impact on cellular function is a core challenge in therapeutic design. Phenomic experiments, designed to capture cellular morphology, utilize microscopy based techniques and demonstrate a high throughput solution for uncovering molecular impact on the cell. In this work, we learn a joint latent space between molecular structures and microscopy phenomic experiments, aligning paired samples with contrastive learning. Specifically, we study the problem of Contrastive PhenoMolecular Retrieval, which consists of zero-shot molecular structure identification conditioned on phenomic experiments. We assess challenges in multi-modal learning of phenomics and molecular modalities such as experimental batch effect, inactive molecule perturbations, and encoding perturbation concentration. We demonstrate improved multi-modal learner retrieval through (1) a uni-modal pre-trained phenomics model, (2) a novel inter sample similarity aware loss, and (3) models conditioned on a representation of molecular concentration. Following this recipe, we propose MolPhenix, a molecular phenomics model. MolPhenix leverages a pre-trained phenomics model to demonstrate significant performance gains across perturbation concentrations, molecular scaffolds, and activity thresholds. In particular, we demonstrate an 8.1 times improvement in zero shot molecular retrieval of active molecules over the previous state-of-the-art, reaching 77.33% in top-1% accuracy. These results open the door for machine learning to be applied in virtual phenomics screening, which can significantly benefit drug discovery applications.", "keywords": "Multi-Modality;Contrastive Learning;CLIP;Cell Morphology;Molecules;Molecular Retrieval;Zero-Shot Learning;Cell-Painting", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Philip Fradkin;Puria Azadi Moghadam;Karush Suri;Frederik Wenkel;Ali Bashashati;Maciej Sypetkowski;Dominique Beaini", "authorids": "~Philip_Fradkin1;~Puria_Azadi_Moghadam2;~Karush_Suri1;~Frederik_Wenkel1;~Ali_Bashashati1;~Maciej_Sypetkowski1;~Dominique_Beaini1", "gender": "M;;M;;;;M", "homepage": "https://phil-fradkin.github.io/;https://www.puriam.net/;https://karush17.github.io/;;https://www.bme.ubc.ca/person/ali-bashashati/;;", "dblp": ";232/1909;252/3260;261/3715;22/9218;;201/8526", "google_scholar": "siqpD8oAAAAJ;;https://scholar.google.co.in/citations?user=ZFCHp9gAAAAJ;;https://scholar.google.ca/citations?user=syvMubEAAAAJ;;https://scholar.google.ca/citations?hl=en", "orcid": ";;;;;;0000-0002-4613-9388", "linkedin": "phil-fradkin-330a417b/;;;https://www.linkedin.com/mwlite/in/frederik-wenkel-669388185;;;dbeaini/", "or_profile": "~Philip_Fradkin1;~Puria_Azadi_Moghadam2;~Karush_Suri1;~Frederik_Wenkel1;~Ali_Bashashati1;~Maciej_Sypetkowski1;~Dominique_Beaini1", "aff": "University of Toronto;University of British Columbia;Recursion;University of Montreal;University of British Columbia;;Mila - Institut Qu\u00e9b\u00e9cois d'intelligence artificielle", "aff_domain": "toronto.edu;ubc.ca;valencelabs.com;umontreal.ca;ubc.ca;;mila.quebec", "position": "PhD student;PhD student;Researcher;PhD student;Assistant Professor;;Associate Professor", "bibtex": "@inproceedings{\nfradkin2024how,\ntitle={How Molecules Impact Cells: Unlocking Contrastive PhenoMolecular Retrieval},\nauthor={Philip Fradkin and Puria Azadi Moghadam and Karush Suri and Frederik Wenkel and Ali Bashashati and Maciej Sypetkowski and Dominique Beaini},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=LQBlSGeOGm}\n}", "github": "", "reviewers": "kfG5;Vz3t;r9h5;YXwu;ELHC", "pdf_size": 3416714, "rating": "5;6;6;7;8", "confidence": "3;3;4;3;3", "soundness": "3;3;3;3;4", "novelty": "3;2;3;3;3", "presentation": "2;3;3;3;4", "wc_summary": "67;58;153;77;91", "wc_strengths": "87;47;106;73;93", "wc_weaknesses": "109;195;146;42;75", "wc_questions": "42;57;50;67;61", "wc_limitations": "7;21;10;7;15", "wc_review": "312;378;465;266;335", "wc_reply_reviewers": "0;13;0;0;11", "wc_reply_authors": "0;25;0;0;0", "reply_reviewers": "0;1;0;0;1", "reply_authors": "1;2;1;1;1", "rating_avg": [ 6.4, 1.0198039027185568 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 89.2, 33.730698184294965 ], "wc_strengths_avg": [ 81.2, 20.123617964968425 ], "wc_weaknesses_avg": [ 113.4, 53.50364473566264 ], "wc_questions_avg": [ 55.4, 8.685620300243386 ], "wc_limitations_avg": [ 12.0, 5.366563145999495 ], "wc_review_avg": [ 351.2, 67.41928507482113 ], "wc_reply_reviewers_avg": [ 4.8, 5.912698199637792 ], "wc_reply_authors_avg": [ 5.0, 10.0 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.1961161351381841, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8205554703440748493&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "toronto.edu;ubc.ca;valencelabs.com;umontreal.ca;ubc.ca;;mila.quebec", "author_num": 7, "aff_unique_index": "0;1;2;3;1;4", "aff_unique_norm": "University of Toronto;University of British Columbia;Recursion;University of Montreal;Mila - Quebec Artificial Intelligence Institute", "aff_unique_dep": ";;;;Artificial Intelligence", "aff_unique_url": "https://www.utoronto.ca;https://www.ubc.ca;;https://wwwumontreal.ca;https://mila.quebec", "aff_unique_abbr": "U of T;UBC;;UM;Mila", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Canada;" }, { "title": "Mind the Graph When Balancing Data for Fairness or Robustness", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95592", "id": "LQR22jM5l3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=LQR22jM5l3", "openreview": "https://openreview.net/forum?id=LQR22jM5l3", "poster": "/media/PosterPDFs/NeurIPS%202024/95592.png?t=1732887762.8621683", "project": "", "author_site": "Jessica Schrouff, Alexis Bellot, Amal Rannen-Triki, Alan Malek, Isabela Albuquerque, Arthur Gretton, Alexander D'Amour, Silvia Chiappa", "tldr": "", "abstract": "Failures of fairness or robustness in machine learning predictive settings can be due to undesired dependencies between covariates, outcomes and auxiliary factors of variation. A common strategy to mitigate these failures is data balancing, which attempts to remove those undesired dependencies. In this work, we define conditions on the training distribution for data balancing to lead to fair or robust models. Our results display that in many cases, the balanced distribution does not correspond to selectively removing the undesired dependencies in a causal graph of the task, leading to multiple failure modes and even interference with other mitigation techniques such as regularization. Overall, our results highlight the importance of taking the causal graph into account before performing data balancing.", "keywords": "Fairness;Robustness;Data balancing;Causality;Fine-tuning", "primary_area": "fairness", "supplementary_material": "", "author": "Jessica Schrouff;Alexis Bellot;Amal Rannen-Triki;Alan Malek;Isabela Albuquerque;Arthur Gretton;Alexander Nicholas D'Amour;Silvia Chiappa", "authorids": "~Jessica_Schrouff1;~Alexis_Bellot1;~Amal_Rannen-Triki1;~Alan_Malek1;~Isabela_Albuquerque1;~Arthur_Gretton1;~Alexander_D'Amour1;~Silvia_Chiappa1", "gender": "F;M;F;M;F;M;M;F", "homepage": ";;http://amal.rannen.triki.me;http://www.alanmalek.com;;http://www.gatsby.ucl.ac.uk/~gretton/;http://www.alexdamour.com;https://csilviavr.github.io/", "dblp": "96/9449;217/4339;180/5447;https://dblp.uni-trier.de/pers/hd/m/Malek:Alan;210/2719;56/2574;209/4892;", "google_scholar": "https://scholar.google.co.uk/citations?user=2YWm2nMAAAAJ;;sSwp5n0AAAAJ;NJqT9ukAAAAJ;;OUv7J6QAAAAJ;okP0uukAAAAJ;https://scholar.google.co.uk/citations?user=GAvF3gUAAAAJ", "orcid": "0000-0003-4992-3183;;;;;;;0000-0002-1882-6842", "linkedin": "jessica-schrouff/;;;;;;;", "or_profile": "~Jessica_Schrouff1;~Alexis_Bellot1;~Amal_Rannen-Triki1;~Alan_Malek1;~Isabela_Albuquerque1;~Arthur_Gretton1;~Alexander_D'Amour1;~Silvia_Chiappa1", "aff": "Google DeepMind;Google DeepMind;Google DeepMind;Google DeepMind;Google DeepMind;University College London;Google;Google DeepMind", "aff_domain": "google.com;deepmind.com;google.com;deepmind.com;deepmind.com;ucl.ac.uk;google.com;google.com", "position": "Senior Researcher;Researcher;Researcher;Research Scientist;Researcher;Professor;Research Scientist;Researcher", "bibtex": "@inproceedings{\nschrouff2024mind,\ntitle={Mind the Graph When Balancing Data for Fairness or Robustness},\nauthor={Jessica Schrouff and Alexis Bellot and Amal Rannen-Triki and Alan Malek and Isabela Albuquerque and Arthur Gretton and Alexander Nicholas D'Amour and Silvia Chiappa},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=LQR22jM5l3}\n}", "github": "", "reviewers": "rHnX;AEL7;hyP8;YT39", "pdf_size": 2179003, "rating": "4;4;6;7", "confidence": "3;3;4;4", "soundness": "3;3;3;3", "novelty": "2;2;2;3", "presentation": "3;2;3;3", "wc_summary": "59;56;96;84", "wc_strengths": "45;50;44;134", "wc_weaknesses": "177;175;132;81", "wc_questions": "31;326;8;218", "wc_limitations": "1;13;10;8", "wc_review": "313;620;290;525", "wc_reply_reviewers": "0;0;0;61", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 73.75, 16.82817577754642 ], "wc_strengths_avg": [ 68.25, 38.028772002261654 ], "wc_weaknesses_avg": [ 141.25, 39.15593824696326 ], "wc_questions_avg": [ 145.75, 132.14835413277004 ], "wc_limitations_avg": [ 8.0, 4.415880433163924 ], "wc_review_avg": [ 437.0, 139.83740558234052 ], "wc_reply_reviewers_avg": [ 15.25, 26.413774815425377 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.9622504486493761, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12686229608374115390&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "google.com;deepmind.com;google.com;deepmind.com;deepmind.com;ucl.ac.uk;google.com;google.com", "author_num": 8, "aff_unique_index": "0;0;0;0;0;1;0;0", "aff_unique_norm": "Google;University College London", "aff_unique_dep": "Google DeepMind;", "aff_unique_url": "https://deepmind.com;https://www.ucl.ac.uk", "aff_unique_abbr": "DeepMind;UCL", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;0;0;1;0", "aff_country_unique": "United Kingdom;United States" }, { "title": "Neural decoding from stereotactic EEG: accounting for electrode variability across subjects", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95591", "id": "LR1nnsD7H0", "proceeding": "", "pdf": "https://openreview.net/pdf?id=LR1nnsD7H0", "openreview": "https://openreview.net/forum?id=LR1nnsD7H0", "poster": "/media/PosterPDFs/NeurIPS%202024/95591.png?t=1731104034.6156924", "project": "", "author_site": "Georgios Mentzelopoulos, Evangelos Chatzipantazis, Ashwin Ramayya, Michelle Hedlund, Vivek Buch, Kostas Daniilidis, Konrad Kording, Flavia Vitale", "tldr": "", "abstract": "Deep learning based neural decoding from stereotactic electroencephalography (sEEG) would likely benefit from scaling up both dataset and model size. To achieve this, combining data across multiple subjects is crucial. However, in sEEG cohorts, each subject has a variable number of electrodes placed at distinct locations in their brain, solely based on clinical needs. Such heterogeneity in electrode number/placement poses a significant challenge for data integration, since there is no clear correspondence of the neural activity recorded at distinct sites between individuals. Here we introduce seegnificant: a training framework and architecture that can be used to decode behavior across subjects using sEEG data. We tokenize the neural activity within electrodes using convolutions and extract long-term temporal dependencies between tokens using self-attention in the time dimension. The 3D location of each electrode is then mixed with the tokens, followed by another self-attention in the electrode dimension to extract effective spatiotemporal neural representations. Subject-specific heads are then used for downstream decoding tasks. Using this approach, we construct a multi-subject model trained on the combined data from 21 subjects performing a behavioral task. We demonstrate that our model is able to decode the trial-wise response time of the subjects during the behavioral task solely from neural data. We also show that the neural representations learned by pretraining our model across individuals can be transferred in a few-shot manner to new subjects. This work introduces a scalable approach towards sEEG data integration for multi-subject model training, paving the way for cross-subject generalization for sEEG decoding.", "keywords": "sEEG;Neural Decoding;Transformers;Multi-Subject Training", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "", "author": "Georgios Mentzelopoulos;Evangelos Chatzipantazis;Ashwin G Ramayya;Michelle Hedlund;Vivek Buch;Kostas Daniilidis;Konrad Kording;Flavia Vitale", "authorids": "~Georgios_Mentzelopoulos1;~Evangelos_Chatzipantazis1;~Ashwin_G_Ramayya1;~Michelle_Hedlund1;~Vivek_Buch1;~Kostas_Daniilidis1;~Konrad_Kording1;~Flavia_Vitale1", "gender": "M;;M;F;M;M;M;F", "homepage": ";https://www.grasp.upenn.edu/people/evangelos-chatzipantazis/;https://med.stanford.edu/profiles/ashwin-ramayya;;https://med.stanford.edu/profiles/vivek-buch;http://www.cis.upenn.edu/~kostas;http://www.kordinglab.com;https://vitalelab.med.upenn.edu", "dblp": ";306/8423;;;;d/KostasDaniilidis;;", "google_scholar": "https://scholar.google.com/citations?hl=en;qQsYhTgAAAAJ;hK_0l0YAAAAJ;;uvZ63bMAAAAJ;dGs2BcIAAAAJ;MiFqJGcAAAAJ;https://scholar.google.it/citations?user=jgyWJHIAAAAJ", "orcid": "0000-0001-5596-7559;;;;;0000-0003-0498-0758;0000-0001-8408-4499;", "linkedin": "georgios-mentzelopoulos-aa284411b/;;;michelle-hedlund-473016170/;;;;", "or_profile": "~Georgios_Mentzelopoulos1;~Evangelos_Chatzipantazis1;~Ashwin_G_Ramayya1;~Michelle_Hedlund1;~Vivek_Buch1;~Kostas_Daniilidis1;~Konrad_Kording1;~Flavia_Vitale1", "aff": "University of Pennsylvania;School of Engineering and Applied Science, University of Pennsylvania;Stanford University;Stanford University;Stanford University;University of Pennsylvania;University of Pennsylvania;University of Pennsylvania", "aff_domain": "seas.upenn.edu;seas.upenn.edu;stanford.edu;stanford.edu;stanford.edu;upenn.edu;upenn.edu;upenn.edu", "position": "PhD student;PhD student;Assistant Professor;PhD student;Assistant Professor;Full Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nmentzelopoulos2024neural,\ntitle={Neural decoding from stereotactic {EEG}: accounting for electrode variability across subjects},\nauthor={Georgios Mentzelopoulos and Evangelos Chatzipantazis and Ashwin G Ramayya and Michelle Hedlund and Vivek Buch and Kostas Daniilidis and Konrad Kording and Flavia Vitale},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=LR1nnsD7H0}\n}", "github": "", "reviewers": "pdN2;j3GS;VKBz;o6UF", "pdf_size": 5294064, "rating": "4;5;6;7", "confidence": "4;3;4;4", "soundness": "2;2;3;4", "novelty": "2;3;1;3", "presentation": "3;4;3;3", "wc_summary": "95;160;83;119", "wc_strengths": "75;160;80;85", "wc_weaknesses": "5;539;165;49", "wc_questions": "194;704;82;50", "wc_limitations": "5;13;20;37", "wc_review": "374;1576;430;340", "wc_reply_reviewers": "123;375;0;18", "wc_reply_authors": "529;800;0;21", "reply_reviewers": "2;2;0;1", "reply_authors": "2;2;1;2", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 114.25, 29.422567868899545 ], "wc_strengths_avg": [ 100.0, 34.8209706929603 ], "wc_weaknesses_avg": [ 189.5, 210.0779617189771 ], "wc_questions_avg": [ 257.5, 263.273147130504 ], "wc_limitations_avg": [ 18.75, 11.797775213996918 ], "wc_review_avg": [ 680.0, 518.3030001842552 ], "wc_reply_reviewers_avg": [ 129.0, 149.59445176877384 ], "wc_reply_authors_avg": [ 337.5, 340.82876932559554 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.2581988897471611, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:8QCX5Nro54cJ:scholar.google.com/&scioq=Neural+decoding+from+stereotactic+EEG:+accounting+for+electrode+variability+across+subjects&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "seas.upenn.edu;seas.upenn.edu;stanford.edu;stanford.edu;stanford.edu;upenn.edu;upenn.edu;upenn.edu", "author_num": 8, "aff_unique_index": "0;0;1;1;1;0;0;0", "aff_unique_norm": "University of Pennsylvania;Stanford University", "aff_unique_dep": ";", "aff_unique_url": "https://www.upenn.edu;https://www.stanford.edu", "aff_unique_abbr": "UPenn;Stanford", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "On provable privacy vulnerabilities of graph representations", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95590", "id": "LSqDcfX3xU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=LSqDcfX3xU", "openreview": "https://openreview.net/forum?id=LSqDcfX3xU", "poster": "/media/PosterPDFs/NeurIPS%202024/95590.png?t=1731150016.2143846", "project": "", "author_site": "Ruofan Wu, Guanhua Fang, Mingyang Zhang, Qiying Pan, Tengfei LIU, Weiqiang Wang", "tldr": "", "abstract": "Graph representation learning (GRL) is critical for extracting insights from complex network structures, but it also raises security concerns due to potential privacy vulnerabilities in these representations. This paper investigates the structural vulnerabilities in graph neural models where sensitive topological information can be inferred through edge reconstruction attacks. Our research primarily addresses the theoretical underpinnings of similarity-based edge reconstruction attacks (SERA), furnishing a non-asymptotic analysis of their reconstruction capacities. Moreover, we present empirical corroboration indicating that such attacks can perfectly reconstruct sparse graphs as graph size increases. Conversely, we establish that sparsity is a critical factor for SERA's effectiveness, as demonstrated through analysis and experiments on (dense) stochastic block models. Finally, we explore the resilience of private graph representations produced via noisy aggregation (NAG) mechanism against SERA. Through theoretical analysis and empirical assessments, we affirm the mitigation of SERA using NAG . In parallel, we also empirically delineate instances wherein SERA demonstrates both efficacy and deficiency in its capacity to function as an instrument for elucidating the trade-off between privacy and utility.", "keywords": "privacy;graph representation learning;non-asymptotic analysis;privacy attacks", "primary_area": "privacy", "supplementary_material": "/attachment/f6e346666f2fd7ef6a5ea5794f6923237b8c15f2.gz", "author": "Ruofan Wu;Guanhua Fang;Mingyang Zhang;Qiying Pan;Tengfei LIU;Weiqiang Wang", "authorids": "~Ruofan_Wu1;~Guanhua_Fang1;~Mingyang_Zhang8;~Qiying_Pan1;~Tengfei_LIU2;~Weiqiang_Wang4", "gender": "M;;M;Not Specified;;M", "homepage": "https://rorschach1989.github.io/;https://sites.google.com/view/hyperfang2020/home;https://github.com/mingyangzhang;;;https://www.linkedin.com/in/weiqiang-wang-489b925/", "dblp": ";274/1306;76/4874-4.html;;;", "google_scholar": ";VG7zQQsAAAAJ;;FOJuAjoAAAAJ;;", "orcid": ";;;;;0000-0002-6159-619X", "linkedin": ";;;;;weiqiang-wang-489b925/", "or_profile": "~Ruofan_Wu1;~Guanhua_Fang1;~Mingyang_Zhang8;~Qiying_Pan1;~Tengfei_LIU2;~Weiqiang_Wang4", "aff": "Ant Group;Fudan University;Ant Group;Shanghai Jiaotong University;;Ant Group", "aff_domain": "antgroup.com;fudan.edu.cn;antgroup.com;sjtu.edu.cn;;antgroup.com", "position": "Researcher;Assistant Professor;Researcher;MS student;;Researcher", "bibtex": "@inproceedings{\nwu2024on,\ntitle={On provable privacy vulnerabilities of graph representations},\nauthor={Ruofan Wu and Guanhua Fang and Mingyang Zhang and Qiying Pan and Tengfei LIU and Weiqiang Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=LSqDcfX3xU}\n}", "github": "", "reviewers": "w4Le;VCKp;M6Kg", "pdf_size": 1152265, "rating": "5;5;6", "confidence": "3;4;3", "soundness": "3;2;3", "novelty": "2;2;3", "presentation": "2;2;1", "wc_summary": "66;57;103", "wc_strengths": "57;5;50", "wc_weaknesses": "195;216;416", "wc_questions": "51;67;70", "wc_limitations": "1;9;50", "wc_review": "370;354;689", "wc_reply_reviewers": "0;97;97", "wc_reply_authors": "0;57;14", "reply_reviewers": "0;1;1", "reply_authors": "1;2;2", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 1.6666666666666667, 0.4714045207910317 ], "wc_summary_avg": [ 75.33333333333333, 19.90533150244482 ], "wc_strengths_avg": [ 37.333333333333336, 23.041026211713937 ], "wc_weaknesses_avg": [ 275.6666666666667, 99.60031236006351 ], "wc_questions_avg": [ 62.666666666666664, 8.339997335464536 ], "wc_limitations_avg": [ 20.0, 21.463146709340332 ], "wc_review_avg": [ 471.0, 154.28761021762787 ], "wc_reply_reviewers_avg": [ 64.66666666666667, 45.72623851673008 ], "wc_reply_authors_avg": [ 23.666666666666668, 24.25329301810833 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17001395000819008670&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "antgroup.com;fudan.edu.cn;antgroup.com;sjtu.edu.cn;;antgroup.com", "author_num": 6, "aff_unique_index": "0;1;0;2;0", "aff_unique_norm": "Ant Group;Fudan University;Shanghai Jiao Tong University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.antgroup.com;https://www.fudan.edu.cn;https://www.sjtu.edu.cn", "aff_unique_abbr": "Ant Group;Fudan;SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Risk-sensitive control as inference with R\u00e9nyi divergence", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95589", "id": "LUIXdWn6Z5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=LUIXdWn6Z5", "openreview": "https://openreview.net/forum?id=LUIXdWn6Z5", "poster": "", "project": "", "author_site": "Kaito Ito, Kenji Kashima", "tldr": "", "abstract": "This paper introduces the risk-sensitive control as inference (RCaI) that extends CaI by using R\u00e9nyi divergence variational inference. RCaI is shown to be equivalent to log-probability regularized risk-sensitive control, which is an extension of the maximum entropy (MaxEnt) control. We also prove that the risk-sensitive optimal policy can be obtained by solving a soft Bellman equation, which reveals several equivalences between RCaI, MaxEnt control, the optimal posterior for CaI, and linearly-solvable control. Moreover, based on RCaI, we derive the risk-sensitive reinforcement learning (RL) methods: the policy gradient and the soft actor-critic. As the risk-sensitivity parameter vanishes, we recover the risk-neutral CaI and RL, which means that RCaI is a unifying framework. Furthermore, we give another risk-sensitive generalization of the MaxEnt control using R\u00e9nyi entropy regularization. We show that in both of our extensions, the optimal policies have the same structure even though the derivations are very different.", "keywords": "risk-sensitive control;optimal control;reinforcement learning;variational inference;R\u00e9nyi divergence", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Kaito Ito;Kenji Kashima", "authorids": "~Kaito_Ito1;~Kenji_Kashima1", "gender": ";M", "homepage": ";", "dblp": "234/2151;", "google_scholar": "https://scholar.google.co.jp/citations?user=uyQyeBkAAAAJ;", "orcid": ";0000-0001-8460-2102", "linkedin": ";", "or_profile": "~Kaito_Ito1;~Kenji_Kashima1", "aff": "Tokyo Institute of Technology;Kyoto University, Kyoto University", "aff_domain": "titech.ac.jp;i.kyoto-u.ac.jp", "position": "Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nito2024risksensitive,\ntitle={Risk-sensitive control as inference with R\\'enyi divergence},\nauthor={Kaito Ito and Kenji Kashima},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=LUIXdWn6Z5}\n}", "github": "", "reviewers": "qM7b;pMhb;4yoD;5JeB", "pdf_size": 857339, "rating": "3;6;6;7", "confidence": "5;4;4;3", "soundness": "3;3;4;3", "novelty": "2;3;3;3", "presentation": "3;4;3;3", "wc_summary": "89;184;166;68", "wc_strengths": "32;119;97;22", "wc_weaknesses": "299;33;254;13", "wc_questions": "3;13;83;115", "wc_limitations": "13;23;21;1", "wc_review": "436;372;621;219", "wc_reply_reviewers": "579;0;0;0", "wc_reply_authors": "1016;0;0;0", "reply_reviewers": "2;0;0;0", "reply_authors": "3;1;1;1", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 126.75, 49.230960786886946 ], "wc_strengths_avg": [ 67.5, 41.391424232562954 ], "wc_weaknesses_avg": [ 149.75, 127.94017156468097 ], "wc_questions_avg": [ 53.5, 47.01861333557169 ], "wc_limitations_avg": [ 14.5, 8.645808232895291 ], "wc_review_avg": [ 412.0, 144.1405564024227 ], "wc_reply_reviewers_avg": [ 144.75, 250.714354395595 ], "wc_reply_authors_avg": [ 254.0, 439.9409051224948 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.9428090415820634, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14111527190375330481&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "titech.ac.jp;i.kyoto-u.ac.jp", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Tokyo Institute of Technology;Kyoto University", "aff_unique_dep": ";", "aff_unique_url": "https://www.titech.ac.jp;https://www.kyoto-u.ac.jp", "aff_unique_abbr": "Titech;Kyoto U", "aff_campus_unique_index": "1", "aff_campus_unique": ";Kyoto", "aff_country_unique_index": "0;0", "aff_country_unique": "Japan" }, { "title": "Talking Heads: Understanding Inter-Layer Communication in Transformer Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95588", "id": "LUsx0chTsL", "proceeding": "", "pdf": "https://openreview.net/pdf?id=LUsx0chTsL", "openreview": "https://openreview.net/forum?id=LUsx0chTsL", "poster": "", "project": "", "author_site": "Jack Merullo, Carsten Eickhoff, Ellie Pavlick", "tldr": "", "abstract": "Although it is known that transformer language models (LMs) pass features from early layers to later layers, it is not well understood how this information is represented and routed by the model. We analyze a mechanism used in two LMs to selectively inhibit items in a context in one task, and find that it underlies a commonly used abstraction across many context-retrieval behaviors. Specifically, we find that models write into low-rank subspaces of the residual stream to represent features which are then read out by later layers, forming low-rank *communication channels* (Elhage et al., 2021) between layers. A particular 3D subspace in model activations in GPT-2 can be traversed to positionally index items in lists, and we show that this mechanism can explain an otherwise arbitrary-seeming sensitivity of the model to the order of items in the prompt. That is, the model has trouble copying the correct information from context when many items ``crowd\" this limited space. By decomposing attention heads with the Singular Value Decomposition (SVD), we find that previously described interactions between heads separated by one or more layers can be predicted via analysis of their weight matrices alone. We show that it is possible to manipulate the internal model representations as well as edit model weights based on the mechanism we discover in order to significantly improve performance on our synthetic Laundry List task, which requires recall from a list, often improving task accuracy by over 20\\%. Our analysis reveals a surprisingly intricate interpretable structure learned from language model pretraining, and helps us understand why sophisticated LMs sometimes fail in simple domains, facilitating future analysis of more complex behaviors.", "keywords": "mechanistic interpretability;interpretability;attention;subspaces;circuit;LLM;LM;language model;transformer", "primary_area": "interpretability_and_explainability", "supplementary_material": "/attachment/6c4a8afb12b9151fba5f4add85ae5a148bf55f16.zip", "author": "Jack Merullo;Carsten Eickhoff;Ellie Pavlick", "authorids": "~Jack_Merullo2;~Carsten_Eickhoff1;~Ellie_Pavlick1", "gender": "M;F;M", "homepage": "https://health-nlp.org;http://cs.brown.edu/people/epavlick/;https://jmerullo.github.io/", "dblp": "42/8700;141/4059;248/8361", "google_scholar": "QQi1_rAAAAAJ;sFyrSa8AAAAJ;7w0xLF4AAAAJ", "orcid": "0000-0001-9895-4061;;", "linkedin": ";;", "or_profile": "~Carsten_Eickhoff1;~Ellie_Pavlick1;~jack_merullo1", "aff": "Eberhard-Karls-Universit\u00e4t T\u00fcbingen;Brown University;Brown University", "aff_domain": "uni-tuebingen.de;brown.edu;brown.edu", "position": "Full Professor;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nmerullo2024talking,\ntitle={Talking Heads: Understanding Inter-Layer Communication in Transformer Language Models},\nauthor={Jack Merullo and Carsten Eickhoff and Ellie Pavlick},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=LUsx0chTsL}\n}", "github": "", "reviewers": "Veos;WKbg;7Dor;erzS;2UBx", "pdf_size": 20912864, "rating": "5;5;6;6;7", "confidence": "4;4;4;4;4", "soundness": "2;2;3;2;3", "novelty": "3;2;4;3;3", "presentation": "1;3;3;3;3", "wc_summary": "174;139;92;106;238", "wc_strengths": "87;54;64;45;126", "wc_weaknesses": "715;126;353;95;126", "wc_questions": "135;283;151;122;53", "wc_limitations": "33;73;11;15;34", "wc_review": "1144;675;671;383;577", "wc_reply_reviewers": "403;0;121;195;107", "wc_reply_authors": "267;0;34;131;19", "reply_reviewers": "1;0;1;1;1", "reply_authors": "2;1;2;2;2", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.6, 0.8000000000000002 ], "wc_summary_avg": [ 149.8, 52.422895761298804 ], "wc_strengths_avg": [ 75.2, 29.006206232459977 ], "wc_weaknesses_avg": [ 283.0, 235.017446160918 ], "wc_questions_avg": [ 148.8, 74.96772638942707 ], "wc_limitations_avg": [ 33.2, 21.949031869310318 ], "wc_review_avg": [ 690.0, 250.48752463945186 ], "wc_reply_reviewers_avg": [ 165.2, 134.21534934574362 ], "wc_reply_authors_avg": [ 90.2, 99.28423842685204 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.8, 0.4000000000000001 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10214055635034591143&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 4, "email": "uni-tuebingen.de;brown.edu;brown.edu", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Eberhard Karls University of T\u00fcbingen;Brown University", "aff_unique_dep": ";", "aff_unique_url": "https://www.uni-tuebingen.de/;https://www.brown.edu", "aff_unique_abbr": "Uni T\u00fcbingen;Brown", "aff_campus_unique_index": "0", "aff_campus_unique": "T\u00fcbingen;", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Germany;United States" }, { "title": "Modeling Latent Neural Dynamics with Gaussian Process Switching Linear Dynamical Systems", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95587", "id": "LX1lwP90kt", "proceeding": "", "pdf": "https://openreview.net/pdf?id=LX1lwP90kt", "openreview": "https://openreview.net/forum?id=LX1lwP90kt", "poster": "/media/PosterPDFs/NeurIPS%202024/95587.png?t=1733507635.3140218", "project": "", "author_site": "Amber Hu, David Zoltowski, Aditya Nair, David Anderson, Lea Duncker, Scott Linderman", "tldr": "", "abstract": "Understanding how the collective activity of neural populations relates to computation and ultimately behavior is a key goal in neuroscience. To this end, statistical methods which describe high-dimensional neural time series in terms of low-dimensional latent dynamics have played a fundamental role in characterizing neural systems. Yet, what constitutes a successful method involves two opposing criteria: (1) methods should be expressive enough to capture complex nonlinear dynamics, and (2) they should maintain a notion of interpretability often only warranted by simpler linear models. In this paper, we develop an approach that balances these two objectives: the Gaussian Process Switching Linear Dynamical System (gpSLDS). Our method builds on previous work modeling the latent state evolution via a stochastic differential equation whose nonlinear dynamics are described by a Gaussian process (GP-SDEs). We propose a novel kernel function which enforces smoothly interpolated locally linear dynamics, and therefore expresses flexible -- yet interpretable -- dynamics akin to those of recurrent switching linear dynamical systems (rSLDS). Our approach resolves key limitations of the rSLDS such as artifactual oscillations in dynamics near discrete state boundaries, while also providing posterior uncertainty estimates of the dynamics. To fit our models, we leverage a modified learning objective which improves the estimation accuracy of kernel hyperparameters compared to previous GP-SDE fitting approaches. We apply our method to synthetic data and data recorded in two neuroscience experiments and demonstrate favorable performance in comparison to the rSLDS.", "keywords": "gaussian process;switching;slds;neural;neuroscience;dynamics;probabilistic;time series", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Amber Hu;David M. Zoltowski;Aditya Nair;David Anderson;Lea Duncker;Scott Linderman", "authorids": "~Amber_Hu1;~David_M._Zoltowski1;~Aditya_Nair1;~David_Anderson10;~Lea_Duncker1;~Scott_Linderman1", "gender": "F;M;M;M;F;M", "homepage": ";https://www.davidzoltowski.com/;https://adinair.people.caltech.edu;https://davidandersonlab.caltech.edu/;https://leaduncker.github.io/;https://web.stanford.edu/~swl1/", "dblp": "174/0482;158/5356;;;231/7708;142/2484", "google_scholar": "sbhr01wAAAAJ;ZnxTn6IAAAAJ;https://scholar.google.com.sg/citations?user=cOSBxdQAAAAJ;C88XVPQAAAAJ;8qFtz34AAAAJ;6mD3I24AAAAJ", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Amber_Hu1;~David_M._Zoltowski1;~Aditya_Nair1;~David_Anderson10;~Lea_Duncker1;~Scott_W_Linderman1", "aff": "Stanford University;Stanford University;California Institute of Technology;California Institute of Technology;Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;caltech.edu;caltech.edu;stanford.edu;stanford.edu", "position": "PhD student;Postdoc;PhD student;Full Professor;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nhu2024modeling,\ntitle={Modeling Latent Neural Dynamics with Gaussian Process Switching Linear Dynamical Systems},\nauthor={Amber Hu and David M. Zoltowski and Aditya Nair and David Anderson and Lea Duncker and Scott Linderman},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=LX1lwP90kt}\n}", "github": "", "reviewers": "E7YR;9ds8;3Mtv;rveQ", "pdf_size": 2770538, "rating": "6;6;7;7", "confidence": "4;3;4;3", "soundness": "2;4;4;3", "novelty": "3;3;3;3", "presentation": "3;3;4;3", "wc_summary": "76;80;101;135", "wc_strengths": "26;62;108;102", "wc_weaknesses": "29;185;176;40", "wc_questions": "110;20;43;1", "wc_limitations": "1;1;34;24", "wc_review": "242;348;462;302", "wc_reply_reviewers": "17;24;48;15", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 98.0, 23.37733945512192 ], "wc_strengths_avg": [ 74.5, 33.11721606657178 ], "wc_weaknesses_avg": [ 107.5, 73.17274082607538 ], "wc_questions_avg": [ 43.5, 41.17341375208036 ], "wc_limitations_avg": [ 15.0, 14.439529078193651 ], "wc_review_avg": [ 338.5, 80.60241931852914 ], "wc_reply_reviewers_avg": [ 26.0, 13.133925536563698 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17911043659575364766&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "stanford.edu;stanford.edu;caltech.edu;caltech.edu;stanford.edu;stanford.edu", "author_num": 6, "aff_unique_index": "0;0;1;1;0;0", "aff_unique_norm": "Stanford University;California Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.stanford.edu;https://www.caltech.edu", "aff_unique_abbr": "Stanford;Caltech", "aff_campus_unique_index": "0;0;1;1;0;0", "aff_campus_unique": "Stanford;Pasadena", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "NeuralPlane: An Efficiently Parallelizable Platform for Fixed-wing Aircraft Control with Reinforcement Learning", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97741", "id": "LXgbgMOygH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=LXgbgMOygH", "openreview": "https://openreview.net/forum?id=LXgbgMOygH", "poster": "/media/PosterPDFs/NeurIPS%202024/97741.png?t=1731687872.8500218", "project": "", "author_site": "Chuanyi Xue, Qihan Liu, Xiaoteng Ma, Xinyao Qin, Gui Ning, Yang Qi, Jinsheng Ren, Bin Liang, Jun Yang", "tldr": "", "abstract": "Reinforcement learning (RL) demonstrates superior potential over traditional flight control methods for fixed-wing aircraft, particularly under extreme operational conditions. However, the high demand for training samples and the lack of efficient computation in existing simulators hinder its further application. In this paper, we introduce NeuralPlane, the first benchmark platform for large-scale parallel simulations of fixed-wing aircraft. NeuralPlane significantly boosts high-fidelity simulation via GPU-accelerated Flight Dynamics Model (FDM) computation, achieving a single-step simulation time of just 0.2 seconds at a parallel scale of $10^{6}$, far exceeding current platforms. We also provide clear code templates, comprehensive evaluation/visualization tools and hierarchical frameworks for integrating RL and traditional control methods. We believe that NeuralPlane can accelerate the development of RL-based fixed-wing flight control and serve as a new challenging benchmark for the RL community. Our NeuralPlane is open-source and accessible at https://github.com/xuecy22/NeuralPlane.", "keywords": "reinforcement learning;benchmark;fixed-wing aircraft;control", "primary_area": "", "supplementary_material": "/attachment/e1de27f9ebf44c009852ebc650ba7a0f92591294.pdf", "author": "Chuanyi Xue;Qihan Liu;Xiaoteng Ma;Xinyao Qin;Ning Gui;Yang Qi;Jinsheng Ren;Bin Liang;Jun Yang", "authorids": "~Chuanyi_Xue1;~Qihan_Liu1;~Xiaoteng_Ma1;~Xinyao_Qin1;~Ning_Gui2;~Yang_Qi4;~Jinsheng_Ren1;~Bin_Liang5;~Jun_Yang6", "gender": "M;M;M;F;M;M;M;M;M", "homepage": ";https://github.com/liuqh16;https://xtma.github.io/;;;https://github.com/Style71;;;", "dblp": ";;238/3249;;;;;;", "google_scholar": ";a3J4_OQAAAAJ;CeDFnNMAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;;;;ZrgN9ssAAAAJ", "orcid": "0009-0009-4785-4090;0000-0001-6637-8346;0000-0002-7250-6268;0009-0002-7672-3574;;;0000-0002-5013-1880;0000-0002-7163-345X;", "linkedin": ";;;;;;;;", "or_profile": "~Chuanyi_Xue1;~Qihan_Liu1;~Xiaoteng_Ma1;~Xinyao_Qin1;~Ning_Gui2;~Yang_Qi4;~Jinsheng_Ren1;~Bin_Liang5;~Jun_Yang6", "aff": "Tsinghua University;Tsinghua University;;Tsinghua University;Shenzhen International Graduate School, Tsinghua University;Tsinghua University;Tsinghua University;;Tsinghua University", "aff_domain": "mails.tsinghua.edu.cn;tsinghua.edu.cn;;mails.tsinghua.edu.cn;tsinghua.edu.cn;mails.tsinghua.edu.cn;tsinghua.edu.cn;;tsinghua.edu.cn", "position": "PhD student;PhD student;;Undergrad student;MS student;Researcher;Researcher;;Assistant Professor", "bibtex": "@inproceedings{\nxue2024neuralplane,\ntitle={NeuralPlane: An Efficiently Parallelizable Platform for Fixed-wing Aircraft Control with Reinforcement Learning},\nauthor={Chuanyi Xue and Qihan Liu and Xiaoteng Ma and Xinyao Qin and Ning Gui and Yang Qi and Jinsheng Ren and Bin Liang and Jun Yang},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=LXgbgMOygH}\n}", "github": "", "reviewers": "HNDj;YwUv;EGTs;nViW", "pdf_size": 9786832, "rating": "6;6;7;8", "confidence": "3;3;3;3", "wc_summary_and_contributions": "13;34;73;76", "wc_strengths": "54;21;100;117", "wc_improvement": "33;14;271;58", "wc_limitations": "17;17;7;7", "wc_correctness": "12;4;6;8", "wc_clarity": "5;18;5;6", "wc_relation_to_prior_work": "19;25;8;7", "wc_documentation": "16;1;5;7", "wc_additional_feedback": "1;1;1;1", "wc_review": "170;135;476;287", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "219;705;817;231", "reply_reviewers": "0;0;0;0", "reply_authors": "2;2;3;2", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.0 ], "wc_summary_and_contributions_avg": [ 49.0, 26.580067720004024 ], "wc_strengths_avg": [ 73.0, 37.84838173555112 ], "wc_improvement_avg": [ 94.0, 103.3755290192026 ], "wc_limitations_avg": [ 12.0, 5.0 ], "wc_correctness_avg": [ 7.5, 2.958039891549808 ], "wc_clarity_avg": [ 8.5, 5.5 ], "wc_relation_to_prior_work_avg": [ 14.75, 7.562241731127087 ], "wc_documentation_avg": [ 7.25, 5.494315243958978 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 267.0, 133.14841343403234 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 493.0, 270.94279839109953 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:YK64CppIuvsJ:scholar.google.com/&scioq=NeuralPlane:+An+Efficiently+Parallelizable+Platform+for+Fixed-wing+Aircraft+Control+with+Reinforcement+Learning&hl=en&as_sdt=0,33", "gs_version_total": 0, "email": "mails.tsinghua.edu.cn;tsinghua.edu.cn;;mails.tsinghua.edu.cn;tsinghua.edu.cn;mails.tsinghua.edu.cn;tsinghua.edu.cn;;tsinghua.edu.cn", "author_num": 9, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "STL: Still Tricky Logic (for System Validation, Even When Showing Your Work)", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95586", "id": "LXz1xIEBkF", "proceeding": "", "pdf": "https://openreview.net/pdf?id=LXz1xIEBkF", "openreview": "https://openreview.net/forum?id=LXz1xIEBkF", "poster": "", "project": "", "author_site": "Isabelle Hurley, Rohan Paleja, Ashley Suh, Jaime D Pena, Ho Chit Siu", "tldr": "", "abstract": "As learned control policies become increasingly common in autonomous systems, there is increasing need to ensure that they are interpretable and can be checked by human stakeholders. Formal specifications have been proposed as ways to produce human-interpretable policies for autonomous systems that can still be learned from examples. Previous work showed that despite claims of interpretability, humans are unable to use formal specifications presented in a variety of ways to validate even simple robot behaviors. This work uses active learning, a standard pedagogical method, to attempt to improve humans' ability to validate policies in signal temporal logic (STL). Results show that overall validation accuracy is not high, at 65\\% $\\pm$ 15% (mean $\\pm$ standard deviation), and that the three conditions of no active learning, active learning, and active learning with feedback do not significantly differ from each other. Our results suggest that the utility of formal specifications for human interpretability is still unsupported but point to other avenues of development which may enable improvements in system validation.", "keywords": "Explainability;Formal Methods;Human Experiments;Robotics", "primary_area": "interpretability_and_explainability", "supplementary_material": "/attachment/31faef665a2c0967511c84b7dbcd0c45d31a50bc.zip", "author": "Isabelle Hurley;Rohan R Paleja;Ashley Suh;Jaime Daniel Pena;Ho Chit Siu", "authorids": "~Isabelle_Hurley1;~Rohan_R_Paleja1;~Ashley_Suh1;~Jaime_Daniel_Pena1;~Ho_Chit_Siu1", "gender": "F;M;;;M", "homepage": ";https://rohanpaleja.com/;;https://www.researchgate.net/profile/Jaime_Pena9;", "dblp": ";237/8623;;;", "google_scholar": ";xjnQbKgAAAAJ;yGeHTOAAAAAJ;tyV3s-QAAAAJ;YlQdrzgAAAAJ", "orcid": ";;0000-0001-6513-8447;0009-0008-6472-5605;", "linkedin": "https://linkedin.com/in/izzy-h-a5b401206;;;jdpena/;", "or_profile": "~Isabelle_Hurley1;~Rohan_R_Paleja1;~Ashley_Suh1;~Jaime_Daniel_Pena1;~Ho_Chit_Siu1", "aff": "MIT Lincoln Laboratory, Massachusetts Institute of Technology;Massachusetts Institute of Technology;MIT Lincoln Laboratory, Massachusetts Institute of Technology;MIT Lincoln Laboratory, Massachusetts Institute of Technology;MIT Lincoln Laboratory, Massachusetts Institute of Technology", "aff_domain": "ll.mit.edu;mit.edu;ll.mit.edu;ll.mit.edu;ll.mit.edu", "position": "Researcher;Researcher;Researcher;Technical Staff;Researcher", "bibtex": "@inproceedings{\nhurley2024stl,\ntitle={{STL}: Still Tricky Logic (for System Validation, Even When Showing Your Work)},\nauthor={Isabelle Hurley and Rohan R Paleja and Ashley Suh and Jaime Daniel Pena and Ho Chit Siu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=LXz1xIEBkF}\n}", "github": "", "reviewers": "14rd;d2Sv;QXA3;67SW", "pdf_size": 2492861, "rating": "4;5;5;6", "confidence": "3;2;3;4", "soundness": "2;3;3;3", "novelty": "1;2;3;2", "presentation": "3;3;3;3", "wc_summary": "47;188;72;74", "wc_strengths": "26;77;91;66", "wc_weaknesses": "67;273;145;56", "wc_questions": "160;27;19;95", "wc_limitations": "34;68;11;82", "wc_review": "334;633;338;373", "wc_reply_reviewers": "115;41;19;17", "wc_reply_authors": "506;210;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 95.25, 54.59567290546019 ], "wc_strengths_avg": [ 65.0, 24.197107265125723 ], "wc_weaknesses_avg": [ 135.25, 86.61516899481292 ], "wc_questions_avg": [ 75.25, 57.150568676085804 ], "wc_limitations_avg": [ 48.75, 27.922884879610844 ], "wc_review_avg": [ 419.5, 124.19440406073053 ], "wc_reply_reviewers_avg": [ 48.0, 39.81205847478876 ], "wc_reply_authors_avg": [ 179.0, 207.3475343475297 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16181155915230984315&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "ll.mit.edu;mit.edu;ll.mit.edu;ll.mit.edu;ll.mit.edu", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "Lincoln Laboratory", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Towards Effective Planning Strategies for Dynamic Opinion Networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95585", "id": "LYivxMp5es", "proceeding": "", "pdf": "https://openreview.net/pdf?id=LYivxMp5es", "openreview": "https://openreview.net/forum?id=LYivxMp5es", "poster": "/media/PosterPDFs/NeurIPS%202024/95585.png?t=1731604138.458866", "project": "", "author_site": "Bharath Muppasani, Protik Nag, Vignesh Narayanan, Biplav Srivastava, Michael Huhns", "tldr": "", "abstract": "In this study, we investigate the under-explored intervention planning aimed at disseminating accurate information within dynamic opinion networks by leveraging learning strategies. Intervention planning involves identifying key nodes (search) and exerting control (e.g., disseminating accurate/official information through the nodes) to mitigate the influence of misinformation. However, as the network size increases, the problem becomes computationally intractable. To address this, we first introduce a ranking algorithm to identify key nodes for disseminating accurate information, which facilitates the training of neural network (NN) classifiers that provide generalized solutions for the search and planning problems. Second, we mitigate the complexity of label generation\u2014which becomes challenging as the network grows\u2014by developing a reinforcement learning (RL)-based centralized dynamic planning framework. We analyze these NN-based planners for opinion networks governed by two dynamic propagation models. Each model incorporates both binary and continuous opinion and trust representations. Our experimental results demonstrate that the ranking algorithm-based classifiers provide plans that enhance infection rate control, especially with increased action budgets for small networks. Further, we observe that the reward strategies focusing on key metrics, such as the number of susceptible nodes and infection rates, outperform those prioritizing faster blocking strategies. Additionally, our findings reveal that graph convolutional network (GCN)-based planners facilitate scalable centralized plans that achieve lower infection rates (higher control) across various network configurations (e.g., Watts-Strogatz topology, varying action budgets, varying initial infected nodes, and varying degree of infected nodes).", "keywords": "Opinion networks;Dynamic Planning;Misinformation Spread;Network dynamics.", "primary_area": "machine_learning_for_social_sciences", "supplementary_material": "/attachment/5c8f687be2ce3e78a63030f6fe38821d24e31e5c.zip", "author": "Bharath Chandra Muppasani;Protik Nag;Vignesh Narayanan;Biplav Srivastava;Michael Huhns", "authorids": "~Bharath_Chandra_Muppasani1;~Protik_Nag1;~Vignesh_Narayanan1;~Biplav_Srivastava2;~Michael_Huhns1", "gender": "M;M;;M;M", "homepage": ";;https://vigsnar.github.io;https://sites.google.com/site/biplavsrivastava/;https://cse.sc.edu/~huhns/", "dblp": ";;156/0202;33/4411;h/MichaelNHuhns", "google_scholar": "4jmlBaYAAAAJ;BJqZS4cAAAAJ;rirGB10AAAAJ;mPC6wp4AAAAJ;https://scholar.google.com.tw/citations?user=hvtmB0UAAAAJ", "orcid": ";;;;", "linkedin": "bharath-9798/;;;;", "or_profile": "~Bharath_Chandra_Muppasani1;~Protik_Nag1;~Vignesh_Narayanan1;~Biplav_Srivastava2;~Michael_Huhns1", "aff": "University of South Carolina, University of South Carolina;University of South Carolina;University of South Carolina;University of South Carolina;University of South Carolina", "aff_domain": "cse.sc.edu;sc.edu;sc.edu;sc.edu;sc.edu", "position": "PhD student;PhD student;Assistant Professor;Full Professor;Emeritus", "bibtex": "@inproceedings{\nmuppasani2024towards,\ntitle={Towards Effective Planning Strategies for Dynamic Opinion Networks},\nauthor={Bharath Chandra Muppasani and Protik Nag and Vignesh Narayanan and Biplav Srivastava and Michael Huhns},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=LYivxMp5es}\n}", "github": "", "reviewers": "VE7T;Z3oY;jjEE;BrMA", "pdf_size": 2276515, "rating": "5;6;7;8", "confidence": "4;3;4;3", "soundness": "2;3;3;4", "novelty": "2;3;3;3", "presentation": "3;3;3;4", "wc_summary": "78;75;39;60", "wc_strengths": "48;135;15;58", "wc_weaknesses": "60;24;415;76", "wc_questions": "45;7;9;231", "wc_limitations": "29;13;16;10", "wc_review": "260;254;494;435", "wc_reply_reviewers": "15;11;12;13", "wc_reply_authors": "14;24;14;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 63.0, 15.443445211480501 ], "wc_strengths_avg": [ 64.0, 43.971581731841304 ], "wc_weaknesses_avg": [ 143.75, 157.73454757915275 ], "wc_questions_avg": [ 73.0, 92.46621004453465 ], "wc_limitations_avg": [ 17.0, 7.245688373094719 ], "wc_review_avg": [ 360.75, 105.84747280875439 ], "wc_reply_reviewers_avg": [ 12.75, 1.479019945774904 ], "wc_reply_authors_avg": [ 13.0, 8.54400374531753 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.4472135954999579, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3329773590717263610&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "cse.sc.edu;sc.edu;sc.edu;sc.edu;sc.edu", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of South Carolina", "aff_unique_dep": "", "aff_unique_url": "https://www.sc.edu", "aff_unique_abbr": "USC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "LLM-Check: Investigating Detection of Hallucinations in Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95584", "id": "LYx4w3CAgy", "proceeding": "", "pdf": "https://openreview.net/pdf?id=LYx4w3CAgy", "openreview": "https://openreview.net/forum?id=LYx4w3CAgy", "poster": "/media/PosterPDFs/NeurIPS%202024/95584.png?t=1733617258.8149695", "project": "", "author_site": "Gaurang Sriramanan, Siddhant Bharti, Vinu Sankar Sadasivan, Shoumik Saha, Priyatham Kattakinda, Soheil Feizi", "tldr": "", "abstract": "While Large Language Models (LLMs) have become immensely popular due to their outstanding performance on a broad range of tasks, these models are prone to producing hallucinations\u2014 outputs that are fallacious or fabricated yet often appear plausible or tenable at a glance. In this paper, we conduct a comprehensive investigation into the nature of hallucinations within LLMs and furthermore explore effective techniques for detecting such inaccuracies in various real-world settings. Prior approaches to detect hallucinations in LLM outputs, such as consistency checks or retrieval-based methods, typically assume access to multiple model responses or large databases. These techniques, however, tend to be computationally expensive in practice, thereby limiting their applicability to real-time analysis. In contrast, in this work, we seek to identify hallucinations within a single response in both white-box and black-box settings by analyzing the internal hidden states, attention maps, and output prediction probabilities of an auxiliary LLM. In addition, we also study hallucination detection in scenarios where ground-truth references are also available, such as in the setting of Retrieval-Augmented Generation (RAG). We demonstrate that the proposed detection methods are extremely compute-efficient, with speedups of up to 45x and 450x over other baselines, while achieving significant improvements in detection performance over diverse datasets.", "keywords": "Large Language Models;Hallucinations in Language Models;Hallucination Detection;Eigen-analysis of LM Embeddings", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Gaurang Sriramanan;Siddhant Bharti;Vinu Sankar Sadasivan;Shoumik Saha;Priyatham Kattakinda;Soheil Feizi", "authorids": "~Gaurang_Sriramanan1;~Siddhant_Bharti2;~Vinu_Sankar_Sadasivan1;~Shoumik_Saha1;~Priyatham_Kattakinda1;~Soheil_Feizi2", "gender": "M;M;M;M;M;M", "homepage": "https://gaurangsriramanan.github.io/;;https://vinusankars.github.io/;https://shoumiksaha.github.io/;https://priyathamkat.com/;https://www.cs.umd.edu/~sfeizi/", "dblp": "262/3916;278/3575;244/8052;307/5377;;57/2132", "google_scholar": "t76Uk8oAAAAJ;R2XkMmIAAAAJ;y1IKIw0AAAAJ;https://scholar.google.com/citations?view_op=list_works;D9ebp-YAAAAJ;lptAmrMAAAAJ", "orcid": ";;;0009-0007-7461-5306;;", "linkedin": "gaurang-sriramanan-16141a1a3/;siddhant-bharti;vinusankars/;shoumik-saha/;priyathamkat/;", "or_profile": "~Gaurang_Sriramanan1;~Siddhant_Bharti2;~Vinu_Sankar_Sadasivan1;~Shoumik_Saha1;~Priyatham_Kattakinda1;~Soheil_Feizi2", "aff": "University of Maryland, College Park;University of Maryland, College Park;University of Maryland, College Park;University of Maryland, College Park;University of Maryland, College Park;University of Maryland, College Park", "aff_domain": "umd.edu;umd.edu;umd.edu;umd.edu;umd.edu;umd.edu", "position": "PhD student;MS student;PhD student;MS student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nsriramanan2024llmcheck,\ntitle={{LLM}-Check: Investigating Detection of Hallucinations in Large Language Models},\nauthor={Gaurang Sriramanan and Siddhant Bharti and Vinu Sankar Sadasivan and Shoumik Saha and Priyatham Kattakinda and Soheil Feizi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=LYx4w3CAgy}\n}", "github": "", "reviewers": "C5LP;2eXD;gixa;JTvH", "pdf_size": 1598043, "rating": "5;5;6;6", "confidence": "4;3;4;3", "soundness": "2;2;3;3", "novelty": "2;3;2;3", "presentation": "2;2;3;3", "wc_summary": "73;56;113;92", "wc_strengths": "52;23;116;50", "wc_weaknesses": "227;150;153;48", "wc_questions": "65;51;103;49", "wc_limitations": "37;14;123;1", "wc_review": "454;294;608;240", "wc_reply_reviewers": "325;28;49;20", "wc_reply_authors": "2056;1118;483;39", "reply_reviewers": "2;1;1;1", "reply_authors": "6;4;3;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 83.5, 21.266170318136737 ], "wc_strengths_avg": [ 60.25, 34.16412592179112 ], "wc_weaknesses_avg": [ 144.5, 63.68084484364196 ], "wc_questions_avg": [ 67.0, 21.6794833886788 ], "wc_limitations_avg": [ 43.75, 47.536170438940495 ], "wc_review_avg": [ 399.0, 144.05901568454507 ], "wc_reply_reviewers_avg": [ 105.5, 127.17016159461306 ], "wc_reply_authors_avg": [ 924.0, 757.7542477611063 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.75, 1.479019945774904 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1704796485781492813&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "umd.edu;umd.edu;umd.edu;umd.edu;umd.edu;umd.edu", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "University of Maryland", "aff_unique_dep": "", "aff_unique_url": "https://www/umd.edu", "aff_unique_abbr": "UMD", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "College Park", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Zero-shot Image Editing with Reference Imitation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95583", "id": "LZV0U6UHb6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=LZV0U6UHb6", "openreview": "https://openreview.net/forum?id=LZV0U6UHb6", "poster": "/media/PosterPDFs/NeurIPS%202024/95583.png?t=1733439167.9446118", "project": "", "author_site": "Xi Chen, Yutong Feng, Mengting Chen, Yiyang Wang, Shilong Zhang, Yu Liu, Yujun Shen, Hengshuang Zhao", "tldr": "", "abstract": "Image editing serves as a practical yet challenging task considering the diverse demands from users, where one of the hardest parts is to precisely describe how the edited image should look like. In this work, we present a new form of editing, termed imitative editing, to help users exercise their creativity more conveniently. Concretely, to edit an image region of interest, users are free to directly draw inspiration from some in-the-wild references (e.g., some relative pictures come across online), without having to cope with the fit between the reference and the source. Such a design requires the system to automatically figure out what to expect from the reference to perform the editing. For this purpose, we propose a generative training framework, dubbed MimicBrush, which randomly selects two frames from a video clip, masks some regions of one frame, and learns to recover the masked regions using the information from the other frame. That way, our model, developed from a diffusion prior, is able to capture the semantic correspondence between separate images in a self-supervised manner. We experimentally show the effectiveness of our method under various test cases as well as its superiority over existing alternatives. We also construct a benchmark to facilitate further research.", "keywords": "Image Editing;Image Customization;Diffusion Model", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Xi Chen;Yutong Feng;Mengting Chen;Yiyang Wang;Shilong Zhang;Yu Liu;Yujun Shen;Hengshuang Zhao", "authorids": "~Xi_Chen30;~Yutong_Feng2;~Mengting_Chen1;~Yiyang_Wang2;~Shilong_Zhang1;~Yu_Liu23;~Yujun_Shen1;~Hengshuang_Zhao2", "gender": "M;M;;M;M;M;;M", "homepage": ";;;;https://jshilong.github.io/;https://github.com/liuyuyuil;;https://hszhao.github.io", "dblp": ";;;;;97/2274-63;;185/7848", "google_scholar": "INISnXkAAAAJ;https://scholar.google.com.hk/citations?user=mZwJLeUAAAAJ;;nKr8TJwAAAAJ;https://scholar.google.com/citations?hl=zh-CN;8zksQb4AAAAJ;;4uE10I0AAAAJ", "orcid": ";;;;0009-0005-4336-4941;;;0000-0001-8277-2706", "linkedin": ";;;yiyang-wang-370633265/;;;;hengshuang-zhao-347b8391/?originalSubdomain=hk", "or_profile": "~Xi_Chen30;~Yutong_Feng2;~Mengting_Chen1;~Yiyang_Wang2;~Shilong_Zhang1;~Yu_Liu23;~Yujun_Shen1;~Hengshuang_Zhao2", "aff": "the University of Hong Kong, University of Hong Kong;Alibaba Group;;Peking University;Alibaba Group;Alibaba Group;;The University of Hong Kong", "aff_domain": "cs.hku.hk;alibaba-inc.com;;stu.pku.edu.cn;alibaba.com;alibaba-inc.com;;hku.hk", "position": "PhD student;Researcher;;Undergrad student;Intern;Researcher;;Assistant Professor", "bibtex": "@inproceedings{\nchen2024zeroshot,\ntitle={Zero-shot Image Editing with Reference Imitation},\nauthor={Xi Chen and Yutong Feng and Mengting Chen and Yiyang Wang and Shilong Zhang and Yu Liu and Yujun Shen and Hengshuang Zhao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=LZV0U6UHb6}\n}", "github": "", "reviewers": "iASj;q758;KHnJ;8FSf", "pdf_size": 17531072, "rating": "4;5;5;7", "confidence": "5;4;4;5", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "3;3;3;3", "wc_summary": "89;77;83;63", "wc_strengths": "63;71;123;45", "wc_weaknesses": "386;97;113;8", "wc_questions": "39;23;26;127", "wc_limitations": "9;23;7;10", "wc_review": "586;291;352;253", "wc_reply_reviewers": "175;234;10;35", "wc_reply_authors": "495;1216;59;30", "reply_reviewers": "1;2;1;1", "reply_authors": "4;6;2;2", "rating_avg": [ 5.25, 1.0897247358851685 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 78.0, 9.643650760992955 ], "wc_strengths_avg": [ 75.5, 28.99568933479596 ], "wc_weaknesses_avg": [ 151.0, 141.45140508315922 ], "wc_questions_avg": [ 53.75, 42.716361034151774 ], "wc_limitations_avg": [ 12.25, 6.299801584177076 ], "wc_review_avg": [ 370.5, 129.3338702737995 ], "wc_reply_reviewers_avg": [ 113.5, 93.77766258550061 ], "wc_reply_authors_avg": [ 450.0, 479.07775986785276 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.5, 1.6583123951777 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.2294157338705618, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10086013795779046570&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "cs.hku.hk;alibaba-inc.com;;stu.pku.edu.cn;alibaba.com;alibaba-inc.com;;hku.hk", "author_num": 8, "aff_unique_index": "0;1;2;1;1;0", "aff_unique_norm": "University of Hong Kong;Alibaba Group;Peking University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.hku.hk;https://www.alibaba.com;http://www.pku.edu.cn", "aff_unique_abbr": "HKU;Alibaba;Peking U", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "The Secretary Problem with Predicted Additive Gap", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95582", "id": "Lbuxdzg1pd", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Lbuxdzg1pd", "openreview": "https://openreview.net/forum?id=Lbuxdzg1pd", "poster": "/media/PosterPDFs/NeurIPS%202024/95582.png?t=1729670491.4392226", "project": "", "author_site": "Alexander Braun, Sherry Sarkar", "tldr": "", "abstract": "The secretary problem is one of the fundamental problems in online decision making; a tight competitive ratio for this problem of $1/e \\approx 0.368$ has been known since the 1960s. Much more recently, the study of algorithms with predictions was introduced: The algorithm is equipped with a (possibly erroneous) additional piece of information upfront which can be used to improve the algorithm's performance. Complementing previous work on secretary problems with prior knowledge, we tackle the following question: \n\n_What is the weakest piece of information that allows us to break the $1/e$ barrier?_\n\nTo this end, we introduce the secretary problem with predicted additive gap. As in the classical problem, weights are fixed by an adversary and elements appear in random order. In contrast to previous variants of predictions, our algorithm only has access to a much weaker piece of information: an _additive gap_ $c$. This gap is the difference between the highest and $k$-th highest weight in the sequence.\nUnlike previous pieces of advice, knowing an exact additive gap does not make the problem trivial. \n\nOur contribution is twofold. First, we show that for any index $k$ and any gap $c$, we can obtain a competitive ratio of $0.4$ when knowing the exact gap (even if we do not know $k$), hence beating the prevalent bound for the classical problem by a constant. Second, a slightly modified version of our algorithm allows to prove standard robustness-consistency properties as well as improved guarantees when knowing a range for the error of the prediction.", "keywords": "Secretary Problem;Competitive Analysis;Online Algorithms;Predictions;Robustness;Consistency", "primary_area": "optimization", "supplementary_material": "", "author": "Alexander Braun;Sherry Sarkar", "authorids": "~Alexander_Braun2;~Sherry_Sarkar1", "gender": "M;F", "homepage": "https://nerva.cs.uni-bonn.de/doku.php/staff/alexanderbraun;https://sherrysarkar.github.io/", "dblp": ";259/9655", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "~Alexander_Braun2;~Sherry_Sarkar1", "aff": "Rheinische Friedrich-Wilhelms Universit\u00e4t Bonn;Carnegie Mellon University", "aff_domain": "uni-bonn.de;cmu.edu", "position": "PhD student;PhD student", "bibtex": "@inproceedings{\nbraun2024the,\ntitle={The Secretary Problem with Predicted Additive Gap},\nauthor={Alexander Braun and Sherry Sarkar},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Lbuxdzg1pd}\n}", "github": "", "reviewers": "jGey;wmbT;QX7w;rRpH", "pdf_size": 430154, "rating": "6;6;6;7", "confidence": "4;3;3;4", "soundness": "4;4;3;3", "novelty": "3;3;3;3", "presentation": "3;4;3;3", "wc_summary": "131;282;88;243", "wc_strengths": "47;43;56;186", "wc_weaknesses": "196;74;102;185", "wc_questions": "3;45;20;34", "wc_limitations": "8;23;6;3", "wc_review": "385;467;272;651", "wc_reply_reviewers": "13;151;25;92", "wc_reply_authors": "0;147;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 186.0, 79.20542910684848 ], "wc_strengths_avg": [ 83.0, 59.65316420777694 ], "wc_weaknesses_avg": [ 139.25, 52.342024225281925 ], "wc_questions_avg": [ 25.5, 15.724185193516387 ], "wc_limitations_avg": [ 10.0, 7.713624310270756 ], "wc_review_avg": [ 443.75, 138.24141022139494 ], "wc_reply_reviewers_avg": [ 70.25, 55.494932201057786 ], "wc_reply_authors_avg": [ 36.75, 63.65286717815624 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=499750245296566863&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 3, "email": "uni-bonn.de;cmu.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Rheinische Friedrich-Wilhelms Universit\u00e4t Bonn;Carnegie Mellon University", "aff_unique_dep": ";", "aff_unique_url": "https://www.uni-bonn.de/;https://www.cmu.edu", "aff_unique_abbr": "Uni Bonn;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Germany;United States" }, { "title": "Dealing with Synthetic Data Contamination in Online Continual Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95581", "id": "Lc8gemv97Y", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Lc8gemv97Y", "openreview": "https://openreview.net/forum?id=Lc8gemv97Y", "poster": "", "project": "", "author_site": "Maorong Wang, Nicolas MICHEL, Jiafeng Mao, Toshihiko Yamasaki", "tldr": "", "abstract": "Image generation has shown remarkable results in generating high-fidelity realistic images, in particular with the advancement of diffusion-based models. However, the prevalence of AI-generated images may have side effects for the machine learning community that are not clearly identified. Meanwhile, the success of deep learning in computer vision is driven by the massive dataset collected on the Internet. The extensive quantity of synthetic data being added to the Internet would become an obstacle for future researchers to collect \"clean\" datasets without AI-generated content. Prior research has shown that using datasets contaminated by synthetic images may result in performance degradation when used for training. In this paper, we investigate the potential impact of contaminated datasets on Online Continual Learning (CL) research. We experimentally show that contaminated datasets might hinder the training of existing online CL methods. Also, we propose Entropy Selection with Real-synthetic similarity Maximization (ESRM), a method to alleviate the performance deterioration caused by synthetic images when training online CL models. Experiments show that our method can significantly alleviate performance deterioration, especially when the contamination is severe. For reproducibility, the source code of our work is available at https://github.com/maorong-wang/ESRM.", "keywords": "Online Continual Learning;Image Generation;Replay-based method;Entropy Selection", "primary_area": "online_learning", "supplementary_material": "/attachment/1f7335eb5371418c8bc715b2c98b548a425b3b5b.zip", "author": "Maorong Wang;Nicolas Michel;Jiafeng Mao;Toshihiko Yamasaki", "authorids": "~Maorong_Wang1;~Nicolas_Michel1;~Jiafeng_Mao1;~Toshihiko_Yamasaki1", "gender": "M;M;M;M", "homepage": "https://maorong.wang;https://nicolas1203.github.io/;https://ut-mao.github.io/;http://www.cvm.t.u-tokyo.ac.jp/en/", "dblp": "282/7285;254/6754;274/7104;81/881", "google_scholar": "ZaasPpgAAAAJ;OyXkV0QAAAAJ;Sn_2wLQAAAAJ;rE9iY5MAAAAJ", "orcid": ";;;0000-0002-1784-2314", "linkedin": ";nicolas-michel-4166b7113/;;", "or_profile": "~Maorong_Wang1;~Nicolas_Michel1;~Jiafeng_Mao1;~Toshihiko_Yamasaki1", "aff": "The University of Tokyo;Ecole Sup\u00e9rieure d'Ing\u00e9nieurs en Electronique et Electrotechnique;The University of Tokyo;The University of Tokyo", "aff_domain": "u-tokyo.ac.jp;esiee.fr;u-tokyo.ac.jp;u-tokyo.ac.jp", "position": "PhD student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nwang2024dealing,\ntitle={Dealing with Synthetic Data Contamination in Online Continual Learning},\nauthor={Maorong Wang and Nicolas Michel and Jiafeng Mao and Toshihiko Yamasaki},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Lc8gemv97Y}\n}", "github": "", "reviewers": "ttuB;h2ny;pFAa", "pdf_size": 7098720, "rating": "5;7;7", "confidence": "3;3;3", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "3;3;4", "wc_summary": "32;73;107", "wc_strengths": "23;123;101", "wc_weaknesses": "97;162;89", "wc_questions": "5;6;49", "wc_limitations": "26;9;9", "wc_review": "183;373;355", "wc_reply_reviewers": "16;28;0", "wc_reply_authors": "8;8;0", "reply_reviewers": "1;1;0", "reply_authors": "2;2;1", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 70.66666666666667, 30.663043264200347 ], "wc_strengths_avg": [ 82.33333333333333, 42.905581092544224 ], "wc_weaknesses_avg": [ 116.0, 32.69046751985457 ], "wc_questions_avg": [ 20.0, 20.51016008388688 ], "wc_limitations_avg": [ 14.666666666666666, 8.013876853447538 ], "wc_review_avg": [ 303.6666666666667, 85.64007369346562 ], "wc_reply_reviewers_avg": [ 14.666666666666666, 11.469767022723502 ], "wc_reply_authors_avg": [ 5.333333333333333, 3.7712361663282534 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:dCGbURWq06sJ:scholar.google.com/&scioq=Dealing+with+Synthetic+Data+Contamination+in+Online+Continual+Learning&hl=en&as_sdt=0,23", "gs_version_total": 3, "email": "u-tokyo.ac.jp;esiee.fr;u-tokyo.ac.jp;u-tokyo.ac.jp", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Tokyo;Ecole Sup\u00e9rieure d'Ing\u00e9nieurs en Electronique et Electrotechnique", "aff_unique_dep": ";", "aff_unique_url": "https://www.u-tokyo.ac.jp;https://www.esiee.fr", "aff_unique_abbr": "UTokyo;ESIEE", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "Japan;France" }, { "title": "UKnow: A Unified Knowledge Protocol with Multimodal Knowledge Graph Datasets for Reasoning and Vision-Language Pre-Training", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97740", "id": "LdRZ9SFBku", "proceeding": "", "pdf": "https://openreview.net/pdf?id=LdRZ9SFBku", "openreview": "https://openreview.net/forum?id=LdRZ9SFBku", "poster": "/media/PosterPDFs/NeurIPS%202024/97740.png?t=1730447417.213944", "project": "", "author_site": "Biao Gong, Shuai Tan, Yutong Feng, Xiaoying Xie, Yuyuan Li, Chaochao Chen, Kecheng Zheng, Yujun Shen, Deli Zhao", "tldr": "", "abstract": "This work presents a unified knowledge protocol, called UKnow, which facilitates knowledge-based studies from the perspective of data. Particularly focusing on visual and linguistic modalities, we categorize data knowledge into five unit types, namely, in-image, in-text, cross-image, cross-text, and image-text, and set up an efficient pipeline to help construct the multimodal knowledge graph from any data collection. Thanks to the logical information naturally contained in knowledge graph, organizing datasets under UKnow format opens up more possibilities of data usage compared to the commonly used image-text pairs. Following UKnow protocol, we collect, from public international news, a large-scale multimodal knowledge graph dataset that consists of 1,388,568 nodes (with 571,791 vision-related ones) and 3,673,817 triplets. The dataset is also annotated with rich event tags, including 11 coarse labels and 9,185 fine labels. Experiments on four benchmarks demonstrate the potential of UKnow in supporting common-sense reasoning and boosting vision-language pre-training with a single dataset, benefiting from its unified form of knowledge organization. Code, dataset, and models will be made publicly available. See Appendix to download the dataset.", "keywords": "knowledge protocol;multimodal;knowledge graph;dataset", "primary_area": "", "supplementary_material": "/attachment/4dfc8640e081a39cc3f688dc3bd5fe0e56e5d29d.pdf", "author": "Biao Gong;Shuai Tan;Yutong Feng;Xiaoying Xie;Yuyuan Li;Chaochao Chen;Kecheng Zheng;Yujun Shen;Deli Zhao", "authorids": "~Biao_Gong1;~Shuai_Tan1;~Yutong_Feng2;~Xiaoying_Xie1;~Yuyuan_Li1;~Chaochao_Chen3;~Kecheng_Zheng2;~Yujun_Shen1;~Deli_Zhao1", "gender": "M;;M;F;;;M;;M", "homepage": "https://github.com/Biao-Gong/;;;http://;;https://sites.google.com/site/ccchomepage/;https://zkcys001.github.io/;;https://zhaodeli.github.io", "dblp": "252/9466;;;;35/11288;26/1492-1;228/1362;;77/1992", "google_scholar": "BwdpTiQAAAAJ;;https://scholar.google.com.hk/citations?user=mZwJLeUAAAAJ;;v4e49qEAAAAJ;qZTMyzwAAAAJ;hMDQifQAAAAJ;;https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-6156-0816;;;;0000-0003-4896-2885;0000-0003-1419-964X;;;0000-0002-8838-578X", "linkedin": ";;;;;ccchomepage/;;;", "or_profile": "~Biao_Gong1;~Shuai_Tan1;~Yutong_Feng2;~Xiaoying_Xie1;~Yuyuan_Li1;~Chaochao_Chen3;~Kecheng_Zheng2;~Yujun_Shen1;~Deli_Zhao1", "aff": "Ant Group;;Alibaba Group;Xi'an Jiaotong University;Hangzhou Dianzi University;Zhejiang University;Zhejiang University;;Alibaba Group", "aff_domain": "antgroup.com;;alibaba-inc.com;xjtu.edu.cn;hdu.edu.cn;zju.edu.cn;zju.edu.cn;;alibaba-inc.com", "position": "Researcher;;Researcher;MS student;Associate Professor;Distinguished Research Fellow;Postdoc;;Director", "bibtex": "@inproceedings{\ngong2024uknow,\ntitle={{UK}now: A Unified Knowledge Protocol with Multimodal Knowledge Graph Datasets for Reasoning and Vision-Language Pre-Training},\nauthor={Biao Gong and Shuai Tan and Yutong Feng and Xiaoying Xie and Yuyuan Li and Chaochao Chen and Kecheng Zheng and Yujun Shen and Deli Zhao},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=LdRZ9SFBku}\n}", "github": "", "reviewers": "MhVk;2tvC;4Up8;q6Dq", "pdf_size": 4791089, "rating": "6;6;7;8", "confidence": "4;3;4;4", "wc_summary_and_contributions": "88;79;76;96", "wc_strengths": "34;48;109;85", "wc_improvement": "54;72;43;45", "wc_limitations": "60;1;54;27", "wc_correctness": "22;1;23;7", "wc_clarity": "15;1;16;48", "wc_relation_to_prior_work": "8;1;33;28", "wc_documentation": "1;1;19;21", "wc_additional_feedback": "1;1;1;1", "wc_review": "283;205;374;358", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 84.75, 7.854139036202504 ], "wc_strengths_avg": [ 69.0, 29.67322024991558 ], "wc_improvement_avg": [ 53.5, 11.4564392373896 ], "wc_limitations_avg": [ 35.5, 23.47871376374779 ], "wc_correctness_avg": [ 13.25, 9.496709956611289 ], "wc_clarity_avg": [ 20.0, 17.219175357722563 ], "wc_relation_to_prior_work_avg": [ 17.5, 13.35102992281869 ], "wc_documentation_avg": [ 10.5, 9.526279441628825 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 305.0, 67.18258702967607 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:kbnJ1P_sy_8J:scholar.google.com/&scioq=UKnow:+A+Unified+Knowledge+Protocol+with+Multimodal+Knowledge+Graph+Datasets+for+Reasoning+and+Vision-Language+Pre-Training&hl=en&as_sdt=0,33", "gs_version_total": 2, "email": "antgroup.com;;alibaba-inc.com;xjtu.edu.cn;hdu.edu.cn;zju.edu.cn;zju.edu.cn;;alibaba-inc.com", "author_num": 9, "aff_unique_index": "0;1;2;3;4;4;1", "aff_unique_norm": "Ant Group;Alibaba Group;Xi'an Jiao Tong University;Hangzhou Dianzi University;Zhejiang University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.antgroup.com;https://www.alibaba.com;https://www.xjtu.edu.cn;http://www.hdu.edu.cn/;https://www.zju.edu.cn", "aff_unique_abbr": "Ant Group;Alibaba;XJTU;HGHDU;ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "A Kernel Perspective on Distillation-based Collaborative Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95580", "id": "LdZ0u1FuXb", "proceeding": "", "pdf": "https://openreview.net/pdf?id=LdZ0u1FuXb", "openreview": "https://openreview.net/forum?id=LdZ0u1FuXb", "poster": "/media/PosterPDFs/NeurIPS%202024/95580.png?t=1730981937.4726973", "project": "", "author_site": "Sejun Park, Kihun Hong, Ganguk Hwang", "tldr": "", "abstract": "Over the past decade, there is a growing interest in collaborative learning that can enhance AI models of multiple parties.\nHowever, it is still challenging to enhance performance them without sharing private data and models from individual parties.\nOne recent promising approach is to develop distillation-based algorithms that exploit unlabeled public data but the results are still unsatisfactory in both theory and practice.\nTo tackle this problem, we rigorously analyze a representative distillation-based algorithm in the view of kernel regression.\nThis work provides the first theoretical results to prove the (nearly) minimax optimality of the nonparametric collaborative learning algorithm that does not directly share local data or models in massively distributed statistically heterogeneous environments.\nInspired by our theoretical results, we also propose a practical distillation-based collaborative learning algorithm based on neural network architecture.\nOur algorithm successfully bridges the gap between our theoretical assumptions and practical settings with neural networks through feature kernel matching.\nWe simulate various regression tasks to verify our theory and demonstrate the practical feasibility of our proposed algorithm.", "keywords": "Kernel Regression;Distillation-based Collaborative Learning;Generalization Analysis;Kernel Distillation", "primary_area": "learning_theory", "supplementary_material": "/attachment/61080a73cec5c426293fc39f207a6ba35e95dba9.zip", "author": "Sejun Park;Kihun Hong;Ganguk Hwang", "authorids": "~Sejun_Park2;~Kihun_Hong1;~Ganguk_Hwang1", "gender": "M;;M", "homepage": ";;https://sites.google.com/view/probai-lab/home", "dblp": ";90/4214;125/1390", "google_scholar": "ibWcz5YAAAAJ;;", "orcid": ";;", "linkedin": ";hong-kihun-b8450b26a/;", "or_profile": "~Sejun_Park2;~Kihun_Hong1;~Ganguk_Hwang1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr;kaist.edu", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\npark2024a,\ntitle={A Kernel Perspective on Distillation-based Collaborative Learning},\nauthor={Sejun Park and Kihun Hong and Ganguk Hwang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=LdZ0u1FuXb}\n}", "github": "", "reviewers": "Zad8;1rrx;UpcW;UU4M;Dtec", "pdf_size": 759780, "rating": "5;5;6;6;7", "confidence": "4;1;3;3;1", "soundness": "2;2;3;3;4", "novelty": "3;2;3;3;4", "presentation": "1;2;2;3;4", "wc_summary": "63;41;72;86;68", "wc_strengths": "24;19;156;42;44", "wc_weaknesses": "81;3;132;38;23", "wc_questions": "117;3;40;43;13", "wc_limitations": "37;29;1;1;44", "wc_review": "322;95;401;210;192", "wc_reply_reviewers": "32;43;194;17;11", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;2;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 2.4, 1.2000000000000002 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.4, 1.019803902718557 ], "wc_summary_avg": [ 66.0, 14.656056768449009 ], "wc_strengths_avg": [ 57.0, 50.45393939029935 ], "wc_weaknesses_avg": [ 55.4, 46.089478191882364 ], "wc_questions_avg": [ 43.2, 39.96198193283211 ], "wc_limitations_avg": [ 22.4, 18.106352476410038 ], "wc_review_avg": [ 244.0, 106.57767120743443 ], "wc_reply_reviewers_avg": [ 59.4, 68.23078484086197 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.3563483225498992, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:3HIpyKGF6FYJ:scholar.google.com/&scioq=A+Kernel+Perspective+on+Distillation-based+Collaborative+Learning&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "kaist.ac.kr;kaist.ac.kr;kaist.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "AFBench: A Large-scale Benchmark for Airfoil Design", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97739", "id": "LdxNWDNvC3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=LdxNWDNvC3", "openreview": "https://openreview.net/forum?id=LdxNWDNvC3", "poster": "/media/PosterPDFs/NeurIPS%202024/97739.png?t=1729065041.849627", "project": "", "author_site": "Jian Liu, Jianyu Wu, Hairun Xie, Guoqing zhang, Jing Wang, Liu Wei, Wanli Ouyang, Junjun Jiang, Xianming Liu, SHIXIANG TANG, Miao Zhang", "tldr": "", "abstract": "Data-driven generative models have emerged as promising approaches towards achieving efficient mechanical inverse design. However, due to prohibitively high cost in time and money, there is still lack of open-source and large-scale benchmarks in this field. It is mainly the case for airfoil inverse design, which requires to generate and edit diverse geometric-qualified and aerodynamic-qualified airfoils following the multimodal instructions, \\emph{i.e.,} dragging points and physical parameters. This paper presents the open-source endeavors in airfoil inverse design, \\emph{AFBench}, including a large-scale dataset with 200 thousand airfoils and high-quality aerodynamic and geometric labels, two novel and practical airfoil inverse design tasks, \\emph{i.e.,} conditional generation on multimodal physical parameters, controllable editing, and comprehensive metrics to evaluate various existing airfoil inverse design methods. Our aim is to establish \\emph{AFBench} as an ecosystem for training and evaluating airfoil inverse design methods, with a specific focus on data-driven controllable inverse design models by multimodal instructions capable of bridging the gap between ideas and execution, the academic research and industrial applications. We have provided baseline models, comprehensive experimental observations, and analysis to accelerate future research. Our baseline model is trained on an RTX 3090 GPU within 16 hours. The codebase, datasets and benchmarks will be available at \\url{https://hitcslj.github.io/afbench/}.", "keywords": "Engineer Design;Generative models;Inverse Design", "primary_area": "", "supplementary_material": "", "author": "Jian Liu;Jianyu Wu;Hairun Xie;Guoqing zhang;Jing Wang;Liu Wei;Wanli Ouyang;Junjun Jiang;Xianming Liu;SHIXIANG TANG;Miao Zhang", "authorids": "~Jian_Liu17;~Jianyu_Wu2;~Hairun_Xie2;~Guoqing_zhang4;~Jing_Wang58;~Liu_Wei3;~Wanli_Ouyang1;~Junjun_Jiang2;~Xianming_Liu5;~SHIXIANG_TANG1;~Miao_Zhang10", "gender": "M;M;M;M;F;M;;M;M;M;M", "homepage": "https://hitcslj.github.io/;https://uanu2002.github.io/;;;https://www.aero.sjtu.edu.cn/Data/View/5801;;;http://homepage.hit.edu.cn/jiangjunjun;http://homepage.hit.edu.cn/xmliu;;https://www.scholarmate.com/P/1Rdjdt", "dblp": "35/295-36;;;;;;;https://dblp.uni-trier.de/pers/hd/j/Jiang:Junjun;89/58201.html;260/6757;", "google_scholar": "lgtXgTUAAAAJ;WmA0pzkAAAAJ;;;;;;WNH2_rgAAAAJ;;TJ4ihdkAAAAJ;", "orcid": "0009-0006-8369-9735;0009-0006-9173-1766;0000-0002-3582-7599;0009-0007-7880-8586;;0009-0002-2874-9788;;0000-0002-5694-505X;0000-0002-8857-1785;;", "linkedin": ";;;;;;;;;shixiang-tang-80441a1a3/;", "or_profile": "~Jian_Liu17;~Jianyu_Wu2;~Hairun_Xie2;~Guoqing_zhang4;~Jing_Wang58;~Liu_Wei3;~Wanli_Ouyang1;~Junjun_Jiang2;~Xianming_Liu5;~SHIXIANG_TANG1;~Miao_Zhang10", "aff": "Harbin Institute of Technology;Beihang University;Shanghai Aircraft Design and Research Institute;Harbin Institute of Technology;Shanghai Jiaotong University;Shanghai Aircraft Design and Research Institute;;Harbin Institute of Technology;Harbin Institute of Technology;The Chinese University of Hong Kong;Shanghai Aircraft Design and Research Institute", "aff_domain": "hit.edu.cn;buaa.edu.cn;comac.cc;hit.edu.cn;sjtu.edu.cn;sadri.comac.cc;;hit.edu.cn;hit.edu.cn;cuhk.edu.hk;comac.cc", "position": "MS student;Undergrad student;Postdoc;PhD student;Assistant Professor;MS student;;Full Professor;Full Professor;Postdoc;Researcher", "bibtex": "@inproceedings{\nliu2024afbench,\ntitle={{AFB}ench: A Large-scale Benchmark for Airfoil Design},\nauthor={Jian Liu and Jianyu Wu and Hairun Xie and Guoqing zhang and Jing Wang and Liu Wei and Wanli Ouyang and Junjun Jiang and Xianming Liu and SHIXIANG TANG and Miao Zhang},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=LdxNWDNvC3}\n}", "github": "", "reviewers": "c69g;jE49;LYMe;nSXb", "pdf_size": 2036572, "rating": "5;7;7;8", "confidence": "3;4;4;4", "wc_summary_and_contributions": "61;49;59;52", "wc_strengths": "60;5;94;2", "wc_improvement": "60;4;492;41", "wc_limitations": "1;1;7;8", "wc_correctness": "1;1;8;1", "wc_clarity": "1;1;5;1", "wc_relation_to_prior_work": "1;1;11;1", "wc_documentation": "1;1;8;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "187;64;685;108", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "80;62;61;61", "reply_reviewers": "0;0;0;0", "reply_authors": "10;2;7;2", "rating_avg": [ 6.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 55.25, 4.9180788932265 ], "wc_strengths_avg": [ 40.25, 38.68058298423125 ], "wc_improvement_avg": [ 149.25, 198.90874163796823 ], "wc_limitations_avg": [ 4.25, 3.2691742076555053 ], "wc_correctness_avg": [ 2.75, 3.031088913245535 ], "wc_clarity_avg": [ 2.0, 1.7320508075688772 ], "wc_relation_to_prior_work_avg": [ 3.5, 4.330127018922194 ], "wc_documentation_avg": [ 2.75, 3.031088913245535 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 261.0, 248.73178325256305 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 66.0, 8.093207028119323 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 5.25, 3.418698582794336 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.9271726499455306, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10125115373024877235&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "hit.edu.cn;buaa.edu.cn;comac.cc;hit.edu.cn;sjtu.edu.cn;sadri.comac.cc;;hit.edu.cn;hit.edu.cn;cuhk.edu.hk;comac.cc", "author_num": 11, "aff_unique_index": "0;1;2;0;3;2;0;0;4;2", "aff_unique_norm": "Harbin Institute of Technology;Beihang University;Shanghai Aircraft Design and Research Institute;Shanghai Jiao Tong University;Chinese University of Hong Kong", "aff_unique_dep": ";;;;", "aff_unique_url": "http://www.hit.edu.cn/;http://www.buaa.edu.cn/;;https://www.sjtu.edu.cn;https://www.cuhk.edu.hk", "aff_unique_abbr": "HIT;BUAA;;SJTU;CUHK", "aff_campus_unique_index": "0;0;0;0;2", "aff_campus_unique": "Harbin;;Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Beyond Accuracy: Tracking more like Human via Visual Search", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95579", "id": "LezAEImfoc", "proceeding": "", "pdf": "https://openreview.net/pdf?id=LezAEImfoc", "openreview": "https://openreview.net/forum?id=LezAEImfoc", "poster": "/media/PosterPDFs/NeurIPS%202024/95579.png?t=1731162280.3462563", "project": "", "author_site": "Dailing Zhang, Shiyu Hu, Xiaokun Feng, Xuchen Li, wu meiqi, Jing Zhang, Kaiqi Huang", "tldr": "", "abstract": "Human visual search ability enables efficient and accurate tracking of an arbitrary moving target, which is a significant research interest in cognitive neuroscience. The recently proposed Central-Peripheral Dichotomy (CPD) theory sheds light on how humans effectively process visual information and track moving targets in complex environments. However, existing visual object tracking algorithms still fall short of matching human performance in maintaining tracking over time, particularly in complex scenarios requiring robust visual search skills. These scenarios often involve Spatio-Temporal Discontinuities (i.e., STDChallenge), prevalent in long-term tracking and global instance tracking. To address this issue, we conduct research from a human-like modeling perspective: (1) Inspired by the CPD, we pro- pose a new tracker named CPDTrack to achieve human-like visual search ability. The central vision of CPDTrack leverages the spatio-temporal continuity of videos to introduce priors and enhance localization precision, while the peripheral vision improves global awareness and detects object movements. (2) To further evaluate and analyze STDChallenge, we create the STDChallenge Benchmark. Besides, by incorporating human subjects, we establish a human baseline, creating a high- quality environment specifically designed to assess trackers\u2019 visual search abilities in videos across STDChallenge. (3) Our extensive experiments demonstrate that the proposed CPDTrack not only achieves state-of-the-art (SOTA) performance in this challenge but also narrows the behavioral differences with humans. Additionally, CPDTrack exhibits strong generalizability across various challenging benchmarks. In summary, our research underscores the importance of human-like modeling and offers strategic insights for advancing intelligent visual target tracking. Code and models are available at https://github.com/ZhangDailing8/CPDTrack.", "keywords": "visual object tracking;central-peripheral dichotomy;human behaviour", "primary_area": "machine_vision", "supplementary_material": "/attachment/e985d5f9d8a4420c6674695dc634159b34eaa314.zip", "author": "Dailing Zhang;Shiyu Hu;Xiaokun Feng;Xuchen Li;Meiqi Wu;Jing Zhang;Kaiqi Huang", "authorids": "~Dailing_Zhang2;~Shiyu_Hu1;~Xiaokun_Feng1;~Xuchen_Li1;~Meiqi_Wu2;~Jing_Zhang47;~Kaiqi_Huang1", "gender": "M;F;M;M;F;M;F", "homepage": "https://github.com/zdl-hub;https://huuuuusy.github.io/;https://github.com/XiaokunFeng;;;https://people.ucas.ac.cn/~huangkaiqi?language=en;", "dblp": "156/8892;;314/9776;232/2889;;89/7026;335/6876", "google_scholar": "ApH4wOcAAAAJ;49W-Rx4AAAAJ;https://scholar.google.com.hk/citations?user=NqXtIPIAAAAJ;9zHkraUAAAAJ;;caQ-OmYAAAAJ;fGc7NVAAAAAJ", "orcid": ";0000-0002-5872-7566;;0009-0009-2565-8857;0000-0001-8825-263X;;0009-0007-3155-4013", "linkedin": ";hushiyu1995/;;;;;https://www.linkedin.cn/incareer/in/ACoAADtQ7fQBfSKm1Qf8f8r0YhJ9nk4Qu9V7EMw", "or_profile": "~Dailing_Zhang2;~Shiyu_Hu1;~Xiaokun_Feng1;~Xuchen_Li1;~Jing_Zhang47;~Kaiqi_Huang1;~wu_meiqi2", "aff": "Institute of Automation, Chinese Academy of Sciences;Chinese academy of science;Institute of automation, Chinese academy of science;Beijing University of Posts and Telecommunications;Institute of Automation, Chinese Academy of Sciences;Institute of automation, Chinese academy of science;University of Chinese Academy of Sciences", "aff_domain": "ia.ac.cn;ia.ac.cn;ia.ac.cn;bupt.edu.cn;ia.ac.cn;nlpr.ia.ac.cn;ucas.ac.cn", "position": "PhD student;PhD student;PhD student;Undergrad student;Engineer;Professor;PhD student", "bibtex": "@inproceedings{\nzhang2024beyond,\ntitle={Beyond Accuracy: Tracking more like Human via Visual Search},\nauthor={Dailing Zhang and Shiyu Hu and Xiaokun Feng and Xuchen Li and Meiqi Wu and Jing Zhang and Kaiqi Huang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=LezAEImfoc}\n}", "github": "", "reviewers": "17n1;x2Xs;16gz", "pdf_size": 5511928, "rating": "6;6;7", "confidence": "4;5;5", "soundness": "2;3;3", "novelty": "3;2;4", "presentation": "3;2;3", "wc_summary": "57;72;69", "wc_strengths": "71;89;80", "wc_weaknesses": "79;171;140", "wc_questions": "3;9;88", "wc_limitations": "14;24;159", "wc_review": "224;365;536", "wc_reply_reviewers": "28;49;36", "wc_reply_authors": "25;33;26", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 66.0, 6.48074069840786 ], "wc_strengths_avg": [ 80.0, 7.3484692283495345 ], "wc_weaknesses_avg": [ 130.0, 38.21866908549625 ], "wc_questions_avg": [ 33.333333333333336, 38.7327022323801 ], "wc_limitations_avg": [ 65.66666666666667, 66.12278141625791 ], "wc_review_avg": [ 375.0, 127.5695888525161 ], "wc_reply_reviewers_avg": [ 37.666666666666664, 8.65383665716478 ], "wc_reply_authors_avg": [ 28.0, 3.559026084010437 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16033489192752318705&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "ia.ac.cn;ia.ac.cn;ia.ac.cn;bupt.edu.cn;ia.ac.cn;nlpr.ia.ac.cn;ucas.ac.cn", "author_num": 7, "aff_unique_index": "0;0;0;1;0;0;2", "aff_unique_norm": "Chinese Academy of Sciences;Beijing University of Posts and Telecommunications;University of Chinese Academy of Sciences", "aff_unique_dep": "Institute of Automation;;", "aff_unique_url": "http://www.ia.cas.cn;http://www.bupt.edu.cn/;http://www.ucas.ac.cn", "aff_unique_abbr": "CAS;BUPT;UCAS", "aff_campus_unique_index": "1", "aff_campus_unique": ";Beijing", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Can LLMs Implicitly Learn Numeric Parameter Constraints in Data Science APIs?", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95578", "id": "LfC5rujSTk", "proceeding": "", "pdf": "https://openreview.net/pdf?id=LfC5rujSTk", "openreview": "https://openreview.net/forum?id=LfC5rujSTk", "poster": "", "project": "", "author_site": "Yinlin Deng, Chunqiu Steven Xia, Zhezhen Cao, Meiziniu Li, LINGMING ZHANG", "tldr": "", "abstract": "Data science (DS) programs, typically built on popular DS libraries (such as PyTorch and NumPy) with thousands of APIs, serve as the cornerstone for various mission-critical domains such as financial systems, autonomous driving software, and coding assistants. Recently, large language models (LLMs) have been widely applied to generate DS programs across diverse scenarios, such as assisting users for DS programming or detecting critical vulnerabilities in DS frameworks. Such applications have all operated under the assumption, that LLMs can implicitly model the numerical parameter constraints in DS library APIs and produce valid code. However, this assumption has not been rigorously studied in the literature. In this paper, we empirically investigate the proficiency of LLMs to handle these implicit numerical constraints when generating DS programs. We studied 28 widely used APIs from PyTorch and NumPy, and scrutinized the LLMs\u2019 generation performance in different levels of granularity: full programs, all parameters, and individual parameters of a single API. We evaluated both state-of-the-art open-source and closed-source models. The results show that LLMs are great at generating simple DS programs, particularly those that follow common patterns seen in training data. However, as we increase the difficulty by providing more complex/unusual inputs, the performance of LLMs drops significantly. We also observe that GPT-4-Turbo can sustain much higher performance overall, but still cannot handle arithmetic API constraints well. In summary, while LLMs exhibit the ability to memorize common patterns of popular DS API usage through massive training, they overall lack genuine comprehension of the underlying numerical constraints.", "keywords": "Program Synthesis;Evaluation;Dataset;Data Science;LLM4Code", "primary_area": "evaluation", "supplementary_material": "", "author": "Yinlin Deng;Chunqiu Steven Xia;Zhezhen Cao;Meiziniu Li;LINGMING ZHANG", "authorids": "~Yinlin_Deng1;~Chunqiu_Steven_Xia1;~Zhezhen_Cao1;~Meiziniu_Li1;~LINGMING_ZHANG2", "gender": "F;M;M;M;M", "homepage": "https://dengyinlin.github.io;https://steven-site.github.io/;;;http://lingming.cs.illinois.edu/", "dblp": "311/4562;324/4827;;;27/7057-1", "google_scholar": "FsP6Nl0AAAAJ;-PCjRp8AAAAJ;;https://scholar.google.com.hk/citations?user=USiLED8AAAAJ;zzbWQE4AAAAJ", "orcid": "0000-0002-4628-4219;;;;", "linkedin": ";;\u54f2\u632f-\u66f9-4310ab30a/;;", "or_profile": "~Yinlin_Deng1;~Chunqiu_Steven_Xia1;~Zhezhen_Cao1;~Meiziniu_Li1;~LINGMING_ZHANG2", "aff": "University of Illinois Urbana-Champaign;University of Illinois, Urbana Champaign;Southern University of Science and Technology;Hong Kong University of Science and Technology;University of Illinois Urbana-Champaign", "aff_domain": "illinois.edu;cs.illinois.edu;mail.sustech.edu.cn;hkust.edu.hk;cs.illinois.edu", "position": "PhD student;PhD student;Undergrad student;PhD student;Associate Professor", "bibtex": "@inproceedings{\ndeng2024can,\ntitle={Can {LLM}s Implicitly Learn Numeric Parameter Constraints in Data Science {API}s?},\nauthor={Yinlin Deng and Chunqiu Steven Xia and Zhezhen Cao and Meiziniu Li and LINGMING ZHANG},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=LfC5rujSTk}\n}", "github": "", "reviewers": "YxwD;qmKc;WbTY;bNWw;xaud", "pdf_size": 4346718, "rating": "3;4;6;6;6", "confidence": "3;5;4;4;3", "soundness": "2;2;3;3;3", "novelty": "1;2;2;3;3", "presentation": "3;2;3;3;3", "wc_summary": "67;104;115;114;156", "wc_strengths": "16;18;99;67;86", "wc_weaknesses": "146;314;54;218;101", "wc_questions": "2;55;126;24;98", "wc_limitations": "10;7;30;25;100", "wc_review": "241;498;424;448;541", "wc_reply_reviewers": "730;0;0;151;0", "wc_reply_authors": "3314;748;0;550;416", "reply_reviewers": "3;0;0;1;0", "reply_authors": "7;2;1;3;2", "rating_avg": [ 5.0, 1.2649110640673518 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.2, 0.7483314773547882 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 111.2, 28.40704138061548 ], "wc_strengths_avg": [ 57.2, 34.3709179394441 ], "wc_weaknesses_avg": [ 166.6, 91.40590790534276 ], "wc_questions_avg": [ 61.0, 45.78209256903839 ], "wc_limitations_avg": [ 34.4, 33.93287491504367 ], "wc_review_avg": [ 430.4, 102.98077490483357 ], "wc_reply_reviewers_avg": [ 176.2, 283.00840976903845 ], "wc_reply_authors_avg": [ 1005.6, 1179.9575585587816 ], "reply_reviewers_avg": [ 0.8, 1.1661903789690602 ], "reply_authors_avg": [ 3.0, 2.0976176963403033 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:IK3LtS2JGusJ:scholar.google.com/&scioq=Can+LLMs+Implicitly+Learn+Numeric+Parameter+Constraints+in+Data+Science+APIs%3F&hl=en&as_sdt=0,33", "gs_version_total": 2, "email": "illinois.edu;cs.illinois.edu;mail.sustech.edu.cn;hkust.edu.hk;cs.illinois.edu", "author_num": 5, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "University of Illinois Urbana-Champaign;Southern University of Science and Technology;Hong Kong University of Science and Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://illinois.edu;https://www.sustech.edu.cn;https://www.ust.hk", "aff_unique_abbr": "UIUC;SUSTech;HKUST", "aff_campus_unique_index": "0;0;2;0", "aff_campus_unique": "Urbana-Champaign;;Hong Kong SAR", "aff_country_unique_index": "0;0;1;1;0", "aff_country_unique": "United States;China" }, { "title": "Enhancing Protein Mutation Effect Prediction through a Retrieval-Augmented Framework", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95577", "id": "LgeHswiWef", "proceeding": "", "pdf": "https://openreview.net/pdf?id=LgeHswiWef", "openreview": "https://openreview.net/forum?id=LgeHswiWef", "poster": "", "project": "", "author_site": "Ruihan Guo, Rui Wang, Ruidong Wu, Zhizhou Ren, Jiahan Li, Shitong Luo, Zuofan Wu, Qiang Liu, Jian Peng, Jianzhu Ma", "tldr": "", "abstract": "Predicting the effects of protein mutations is crucial for analyzing protein functions and understanding genetic diseases. \nHowever, existing models struggle to effectively extract mutation-related local structure motifs from protein databases, which hinders their predictive accuracy and robustness. To tackle this problem, we design a novel retrieval-augmented framework for incorporating similar structure information in known protein structures. We create a vector database consisting of local structure motif embeddings from a pre-trained protein structure encoder, which allows for efficient retrieval of similar local structure motifs during mutation effect prediction. \nOur findings demonstrate that leveraging this method results in the SOTA performance across multiple protein mutation prediction datasets, and offers a scalable solution for studying mutation effects.", "keywords": "Computational Structural Biology;Protein Engineering;Retrieval-Augmented Framework", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "", "author": "Ruihan Guo;Rui Wang;Ruidong Wu;Zhizhou Ren;Jiahan Li;Shitong Luo;Zuofan Wu;qiang liu;Jian Peng;Jianzhu Ma", "authorids": "~Ruihan_Guo2;~Rui_Wang1;~Ruidong_Wu1;~Zhizhou_Ren1;~Jiahan_Li2;~Shitong_Luo1;~Zuofan_Wu1;~qiang_liu4;~Jian_Peng1;~Jianzhu_Ma2", "gender": ";M;M;M;;;M;;M;M", "homepage": ";https://www.ruiwang1998.com;;;;https://luost.me;;;http://jianpeng.web.engr.illinois.edu/;https://majianzhu.com/", "dblp": ";06/2293;224/4293;https://dblp.uni-trier.de/pid/239/5714.html;;271/0339;;;29/4181-1;24/9080.html", "google_scholar": ";;lNeJlFYAAAAJ;xgpMeDgAAAAJ;;z1BrjyIAAAAJ;;;https://scholar.google.com.tw/citations?user=4wcAVXAAAAAJ;", "orcid": ";;0009-0002-6402-4717;;;;;;;", "linkedin": ";;ruidong-wu-0b7182224/;;;;zuofan-wu-b08398213/;;;", "or_profile": "~Ruihan_Guo2;~Rui_Wang1;~Ruidong_Wu1;~Zhizhou_Ren1;~Jiahan_Li2;~Shitong_Luo1;~Zuofan_Wu1;~qiang_liu4;~Jian_Peng1;~Jianzhu_Ma2", "aff": ";Helixon;Helixon AI;University of Illinois, Urbana Champaign;;Massachusetts Institute of Technology;Helixon Research;;University of Illinois, Urbana Champaign;Tsinghua University", "aff_domain": ";helixon.com;helixon.com;illinois.edu;;mit.edu;helixon.com;;illinois.edu;tsinghua.edu.cn", "position": ";Researcher;Researcher;PhD student;;PhD student;Researcher;;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nguo2024enhancing,\ntitle={Enhancing Protein Mutation Effect Prediction through a Retrieval-Augmented Framework},\nauthor={Ruihan Guo and Rui Wang and Ruidong Wu and Zhizhou Ren and Jiahan Li and Shitong Luo and Zuofan Wu and qiang liu and Jian Peng and Jianzhu Ma},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=LgeHswiWef}\n}", "github": "", "reviewers": "4tC9;95k8;pY2f", "pdf_size": 1855534, "rating": "5;6;6", "confidence": "5;3;3", "soundness": "4;3;3", "novelty": "2;3;4", "presentation": "2;2;2", "wc_summary": "124;97;93", "wc_strengths": "103;151;66", "wc_weaknesses": "163;287;91", "wc_questions": "49;126;117", "wc_limitations": "24;77;46", "wc_review": "463;738;413", "wc_reply_reviewers": "53;38;10", "wc_reply_authors": "667;1405;1915", "reply_reviewers": "1;1;1", "reply_authors": "3;3;3", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 104.66666666666667, 13.767917618708921 ], "wc_strengths_avg": [ 106.66666666666667, 34.79782879561437 ], "wc_weaknesses_avg": [ 180.33333333333334, 80.94991592885621 ], "wc_questions_avg": [ 97.33333333333333, 34.373762603991366 ], "wc_limitations_avg": [ 49.0, 21.740898478827106 ], "wc_review_avg": [ 538.0, 142.88690166235207 ], "wc_reply_reviewers_avg": [ 33.666666666666664, 17.82008853949821 ], "wc_reply_authors_avg": [ 1329.0, 512.3202123672264 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13036681448264340335&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": ";helixon.com;helixon.com;illinois.edu;;mit.edu;helixon.com;;illinois.edu;tsinghua.edu.cn", "author_num": 10, "aff_unique_index": "0;1;2;3;4;2;5", "aff_unique_norm": "Helixon;Helixon AI;University of Illinois Urbana-Champaign;Massachusetts Institute of Technology;Helixon Research;Tsinghua University", "aff_unique_dep": ";;;;;", "aff_unique_url": ";;https://illinois.edu;https://web.mit.edu;;https://www.tsinghua.edu.cn", "aff_unique_abbr": ";Helixon AI;UIUC;MIT;;THU", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "1;1;1;1;1;2", "aff_country_unique": ";United States;China" }, { "title": "Rule Extrapolation in Language Modeling: A Study of Compositional Generalization on OOD Prompts", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95576", "id": "Li2rpRZWjy", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Li2rpRZWjy", "openreview": "https://openreview.net/forum?id=Li2rpRZWjy", "poster": "", "project": "", "author_site": "Anna M\u00e9sz\u00e1ros, Szilvia Ujv\u00e1ry, Wieland Brendel, Patrik Reizinger, Ferenc Huszar", "tldr": "", "abstract": "LLMs show remarkable emergent abilities, such as inferring concepts from presumably out-of-distribution prompts, known as in-context learning. Though this success is often attributed to the Transformer architecture, our systematic understanding is limited. In complex real-world data sets, even defining what is out-of-distribution is not obvious. To better understand the OOD behaviour of autoregressive LLMs, we focus on formal languages, which are defined by the intersection of rules. We define a new scenario of OOD compositional generalization, termed \\textit{rule extrapolation}. Rule extrapolation describes OOD scenarios, where the prompt violates at least one rule. We evaluate rule extrapolation in formal languages with varying complexity in linear and recurrent architectures, the Transformer, and state space models to understand the architectures' influence on rule extrapolation. We also lay the first stones of a normative theory of rule extrapolation, inspired by the Solomonoff prior in algorithmic information theory.", "keywords": "Language models;autoregressive models;out-of-distribution generalization;compositional generalization;rule extrapolation;formal languages", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/bb0b079fd4f4f516ded7a2b78a94f67cf1dce2b7.zip", "author": "Anna M\u00e9sz\u00e1ros;Szilvia Ujv\u00e1ry;Wieland Brendel;Patrik Reizinger;Ferenc Husz\u00e1r", "authorids": "~Anna_M\u00e9sz\u00e1ros1;~Szilvia_Ujv\u00e1ry1;~Wieland_Brendel1;~Patrik_Reizinger1;~Ferenc_Husz\u00e1r1", "gender": ";F;M;M;M", "homepage": ";;;https://rpatrik96.github.io/;", "dblp": ";;37/11107;249/5412;http://dblp.uni-trier.de/pers/hd/h/Huszar:Ferenc", "google_scholar": ";;v-JL-hsAAAAJ;zIT0fdIAAAAJ;https://scholar.google.co.uk/citations?user=koQCVT4AAAAJ", "orcid": ";;;0000-0001-9861-0293;", "linkedin": "anna-m%C3%A9sz%C3%A1ros-2ba244222/;szilviaujvary/;;patrik-reizinger/;", "or_profile": "~Anna_M\u00e9sz\u00e1ros1;~Szilvia_Ujv\u00e1ry1;~Wieland_Brendel1;~Patrik_Reizinger1;~Ferenc_Huszar1", "aff": "University of Cambridge;University of Cambridge;ELLIS Institute T\u00fcbingen;Eberhard-Karls-Universit\u00e4t T\u00fcbingen;University of Cambridge", "aff_domain": "cam.ac.uk;cam.ac.uk;tue.ellis.eu;uni-tuebingen.de;cam.ac.uk", "position": "PhD student;PhD student;Principal Researcher;PhD student;Associate Professor", "bibtex": "@inproceedings{\nm{\\'e}sz{\\'a}ros2024rule,\ntitle={Rule Extrapolation in Language Modeling: A Study of Compositional Generalization on {OOD} Prompts},\nauthor={Anna M{\\'e}sz{\\'a}ros and Szilvia Ujv{\\'a}ry and Wieland Brendel and Patrik Reizinger and Ferenc Husz{\\'a}r},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Li2rpRZWjy}\n}", "github": "", "reviewers": "AxjF;57oB;Nwv7;3gdK", "pdf_size": 1044714, "rating": "6;6;6;8", "confidence": "3;2;4;3", "soundness": "4;3;4;3", "novelty": "4;2;3;3", "presentation": "4;2;3;3", "wc_summary": "50;71;66;254", "wc_strengths": "83;35;34;128", "wc_weaknesses": "69;27;102;244", "wc_questions": "62;112;62;88", "wc_limitations": "4;3;4;59", "wc_review": "268;248;268;773", "wc_reply_reviewers": "19;30;17;38", "wc_reply_authors": "59;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 110.25, 83.35578864122155 ], "wc_strengths_avg": [ 70.0, 38.903727327853815 ], "wc_weaknesses_avg": [ 110.5, 81.53066907612128 ], "wc_questions_avg": [ 81.0, 20.808652046684813 ], "wc_limitations_avg": [ 17.5, 23.96351393264352 ], "wc_review_avg": [ 389.25, 221.7085643361573 ], "wc_reply_reviewers_avg": [ 26.0, 8.514693182963201 ], "wc_reply_authors_avg": [ 14.75, 25.54774941164094 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3192345139489945069&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "cam.ac.uk;cam.ac.uk;tue.ellis.eu;uni-tuebingen.de;cam.ac.uk", "author_num": 5, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "University of Cambridge;ELLIS Institute;Eberhard Karls University of T\u00fcbingen", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cam.ac.uk;https://ellis.eu/;https://www.uni-tuebingen.de/", "aff_unique_abbr": "Cambridge;ELLIS;Uni T\u00fcbingen", "aff_campus_unique_index": "0;0;1;1;0", "aff_campus_unique": "Cambridge;T\u00fcbingen", "aff_country_unique_index": "0;0;1;1;0", "aff_country_unique": "United Kingdom;Germany" }, { "title": "Perception of Knowledge Boundary for Large Language Models through Semi-open-ended Question Answering", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95575", "id": "Li9YTHoItP", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Li9YTHoItP", "openreview": "https://openreview.net/forum?id=Li9YTHoItP", "poster": "/media/PosterPDFs/NeurIPS%202024/95575.png?t=1730711812.923348", "project": "", "author_site": "Zhihua Wen, Zhiliang Tian, Zexin Jian, Zhen Huang, Pei Ke, Yifu Gao, Minlie Huang, Dongsheng Li", "tldr": "", "abstract": "Large Language Models (LLMs) are widely used for knowledge-seeking purposes yet suffer from hallucinations. The knowledge boundary of an LLM limits its factual understanding, beyond which it may begin to hallucinate. Investigating the perception of LLMs' knowledge boundary is crucial for detecting hallucinations and LLMs' reliable generation. Current studies perceive LLMs' knowledge boundary on questions with concrete answers (close-ended questions) while paying limited attention to semi-open-ended questions that correspond to many potential answers. Some researchers achieve it by judging whether the question is answerable or not. However, this paradigm is not so suitable for semi-open-ended questions, which are usually ``partially answerable questions'' containing both answerable answers and ambiguous (unanswerable) answers. Ambiguous answers are essential for knowledge-seeking, but it may go beyond the knowledge boundary of LLMs. In this paper, we perceive the LLMs' knowledge boundary with semi-open-ended questions by discovering more ambiguous answers. First, we apply an LLM-based approach to construct semi-open-ended questions and obtain answers from a target LLM. Unfortunately, the output probabilities of mainstream black-box LLMs are inaccessible to sample more low-probability ambiguous answers. Therefore, we apply an open-sourced auxiliary model to explore ambiguous answers for the target LLM. We calculate the nearest semantic representation for existing answers to estimate their probabilities, with which we reduce the generation probability of high-probability existing answers to achieve a more effective generation. Finally, we compare the results from the RAG-based evaluation and LLM self-evaluation to categorize four types of ambiguous answers that are beyond the knowledge boundary of the target LLM. Following our method, we construct a dataset to perceive the knowledge boundary for GPT-4. We find that GPT-4 performs poorly on semi-open-ended questions and is often unaware of its knowledge boundary. Besides, our auxiliary model, LLaMA-2-13B, is effective in discovering many ambiguous answers, including correct answers neglected by GPT-4 and delusive wrong answers GPT-4 struggles to identify.", "keywords": "large language model;knowledge boundary;question answering", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/9d0f8d7fb1b0c479a4864dec645c34f06ac4b747.zip", "author": "Zhihua Wen;Zhiliang Tian;Zexin Jian;Zhen Huang;Pei Ke;Yifu Gao;Minlie Huang;Dongsheng Li", "authorids": "~Zhihua_Wen2;~Zhiliang_Tian2;~Zexin_Jian1;~Zhen_Huang3;~Pei_Ke2;~Yifu_Gao1;~Minlie_Huang1;~Dongsheng_Li3", "gender": ";M;M;M;M;M;M;", "homepage": ";https://scholar.google.com.hk/citations?hl=en&user=ClvGvccAAAAJ#;https://xinchen-jzx.github.io/;;https://kepei1106.github.io/;;http://coai.cs.tsinghua.edu.cn/hml;", "dblp": ";203/9265;;22/3870-6;10/2179;180/4498.html;;", "google_scholar": ";https://scholar.google.com.hk/citations?hl=en;;;W_zPCtEAAAAJ;MOhORmIAAAAJ;https://scholar.google.com/citations?hl=zh-CN;", "orcid": ";;;0000-0003-4819-373X;;0000-0002-1743-8055;;", "linkedin": ";;;;;;;", "or_profile": "~Zhihua_Wen2;~Zhiliang_Tian2;~Zexin_Jian1;~Zhen_Huang3;~Pei_Ke2;~Yifu_Gao1;~Minlie_Huang1;~Dongsheng_Li3", "aff": ";National University of Defense Technology;National University of Defense Technology;National University of Defense Technology;Tsinghua University;National University of Defense Technology;Tsinghua University;", "aff_domain": ";nudt.edu.cn;nudt.edu.cn;nudt.edu.cn;tsinghua.edu.cn;nudt.edu.cn;tsinghua.edu.cn;", "position": ";Assistant Professor;Undergrad student;Full Professor;Postdoc;PhD student;Full Professor;", "bibtex": "@inproceedings{\nwen2024perception,\ntitle={Perception of Knowledge Boundary for Large Language Models through Semi-open-ended Question Answering},\nauthor={Zhihua Wen and Zhiliang Tian and Zexin Jian and Zhen Huang and Pei Ke and Yifu Gao and Minlie Huang and Dongsheng Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Li9YTHoItP}\n}", "github": "", "reviewers": "e3tW;frNN;hUqe;bcni", "pdf_size": 638264, "rating": "3;6;7;8", "confidence": "4;3;4;4", "soundness": "3;2;3;4", "novelty": "2;2;3;4", "presentation": "2;3;3;3", "wc_summary": "189;67;130;114", "wc_strengths": "42;63;240;152", "wc_weaknesses": "140;149;177;147", "wc_questions": "36;52;43;19", "wc_limitations": "5;1;16;12", "wc_review": "412;332;606;444", "wc_reply_reviewers": "0;13;23;31", "wc_reply_authors": "374;165;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "4;3;1;1", "rating_avg": [ 6.0, 1.8708286933869707 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 125.0, 43.60619222083029 ], "wc_strengths_avg": [ 124.25, 78.5569061254324 ], "wc_weaknesses_avg": [ 153.25, 14.113380176272443 ], "wc_questions_avg": [ 37.5, 12.093386622447824 ], "wc_limitations_avg": [ 8.5, 5.852349955359813 ], "wc_review_avg": [ 448.5, 99.66318277077048 ], "wc_reply_reviewers_avg": [ 16.75, 11.583932838203095 ], "wc_reply_authors_avg": [ 134.75, 153.68047208412656 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 1.299038105676658 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=206660155729022009&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": ";nudt.edu.cn;nudt.edu.cn;nudt.edu.cn;tsinghua.edu.cn;nudt.edu.cn;tsinghua.edu.cn;", "author_num": 8, "aff_unique_index": "0;0;0;1;0;1", "aff_unique_norm": "National University of Defense Technology;Tsinghua University", "aff_unique_dep": ";", "aff_unique_url": "http://www.nudt.edu.cn/;https://www.tsinghua.edu.cn", "aff_unique_abbr": "NUDT;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "ControlMLLM: Training-Free Visual Prompt Learning for Multimodal Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95574", "id": "LjnDqVcrE9", "proceeding": "", "pdf": "https://openreview.net/pdf?id=LjnDqVcrE9", "openreview": "https://openreview.net/forum?id=LjnDqVcrE9", "poster": "/media/PosterPDFs/NeurIPS%202024/95574.png?t=1730096872.4686131", "project": "", "author_site": "Mingrui Wu, Xinyue Cai, Jiayi Ji, Jiale Li, Oucheng Huang, Gen Luo, Hao Fei, GUANNAN JIANG, Xiaoshuai Sun, Rongrong Ji", "tldr": "", "abstract": "In this work, we propose a training-free method to inject visual prompts into Multimodal Large Language Models (MLLMs) through learnable latent variable optimization. We observe that attention, as the core module of MLLMs, connects text prompt tokens and visual tokens, ultimately determining the final results. Our approach involves adjusting visual tokens from the MLP output during inference, controlling the attention response to ensure text prompt tokens attend to visual tokens in referring regions. We optimize a learnable latent variable based on an energy function, enhancing the strength of referring regions in the attention map. This enables detailed region description and reasoning without the need for substantial training costs or model retraining. Our method offers a promising direction for integrating referring abilities into MLLMs, and supports referring with box, mask, scribble and point. The results demonstrate that our method exhibits out-of-domain generalization and interpretability.", "keywords": "Training-Free\uff1bVisual Prompt\uff1bMultimodal Large Language Models", "primary_area": "generative_models", "supplementary_material": "", "author": "Mingrui Wu;Xinyue Cai;Jiayi Ji;Jiale Li;Oucheng Huang;Gen Luo;Hao Fei;GUANNAN JIANG;Xiaoshuai Sun;Rongrong Ji", "authorids": "~Mingrui_Wu2;~Xinyue_Cai2;~Jiayi_Ji1;~Jiale_Li5;~Oucheng_Huang1;~Gen_Luo1;~Hao_Fei1;~GUANNAN_JIANG1;~Xiaoshuai_Sun3;~Rongrong_Ji5", "gender": "Not Specified;F;M;M;M;M;M;M;M;M", "homepage": ";https://github.com/yzlmhzz;https://scholar.google.com/citations?user=xp_rICcAAAAJ&hl=zh-CN;https://i.csdn.net/#/user-center/profile?spm=1001.2101.3001.5111;https://haofei.vip/;;https://sites.google.com/view/xssun;http://mac.xmu.edu.cn/rrji-en.html;https://github.com/luogen1996;https://scholar.google.com/citations?hl=zh-CN&user=fED_ASAAAAAJ", "dblp": ";;250/9459;;81/3569-1;135/6446.html;26/5787.html;86/5681;195/2078;", "google_scholar": "sbCKwnYAAAAJ;;xp_rICcAAAAJ;https://scholar.google.com/citations?view_op=list_works;YGDX46AAAAAJ;yw-rcj4AAAAJ;KPMK3B4AAAAJ;;EyZqU9gAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";;0000-0002-9956-6308;;0000-0003-3026-6347;;0000-0003-3912-9306;;;", "linkedin": ";;;;;;;;;", "or_profile": "~Mingrui_Wu2;~Xinyue_Cai2;~Jiayi_Ji1;~Oucheng_Huang1;~Hao_Fei1;~GUANNAN_JIANG1;~Xiaoshuai_Sun3;~Rongrong_Ji5;~Gen_Luogen1;~JialeLi2", "aff": "Xiamen University;Xiamen University;Xiamen University;Xiamen University;National University of Singapore;Contemporary Amperex Technology Co., Limited;Xiamen University;Xiamen University;Xiamen University;Xiamen University", "aff_domain": "xmu.edu.cn;xmu.edu.cn;xmu.edu.cn;xmu.edu.cn;nus.edu.sg;catl.com;xmu.edu.cn;xmu.edu.cn;xmu.edu.cn;xmu.edu.cn", "position": "PhD student;Undergrad student;Postdoc;MS student;Postdoc;Researcher;Associate Professor;Full Professor;PhD student;Undergrad student", "bibtex": "@inproceedings{\nwu2024controlmllm,\ntitle={Control{MLLM}: Training-Free Visual Prompt Learning for Multimodal Large Language Models},\nauthor={Mingrui Wu and Xinyue Cai and Jiayi Ji and Jiale Li and Oucheng Huang and Gen Luo and Hao Fei and GUANNAN JIANG and Xiaoshuai Sun and Rongrong Ji},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=LjnDqVcrE9}\n}", "github": "", "reviewers": "LggR;1iuY;hD7U;VBnv", "pdf_size": 6733310, "rating": "3;4;5;6", "confidence": "5;4;4;4", "soundness": "2;3;3;3", "novelty": "1;2;3;4", "presentation": "1;3;2;4", "wc_summary": "49;54;46;118", "wc_strengths": "11;37;43;75", "wc_weaknesses": "226;142;109;71", "wc_questions": "2;4;34;2", "wc_limitations": "2;8;5;1", "wc_review": "290;245;237;267", "wc_reply_reviewers": "33;0;0;37", "wc_reply_authors": "0;39;42;23", "reply_reviewers": "1;0;0;1", "reply_authors": "1;2;2;2", "rating_avg": [ 4.5, 1.118033988749895 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 1.118033988749895 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 66.75, 29.72688177390962 ], "wc_strengths_avg": [ 41.5, 22.776083947860748 ], "wc_weaknesses_avg": [ 137.0, 57.197027894812855 ], "wc_questions_avg": [ 10.5, 13.592277219068187 ], "wc_limitations_avg": [ 4.0, 2.7386127875258306 ], "wc_review_avg": [ 259.75, 20.632195714465293 ], "wc_reply_reviewers_avg": [ 17.5, 17.55704986607944 ], "wc_reply_authors_avg": [ 26.0, 16.658331248957683 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.7745966692414834, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17408371847920000854&as_sdt=5,39&sciodt=0,39&hl=en", "gs_version_total": 5, "email": "xmu.edu.cn;xmu.edu.cn;xmu.edu.cn;xmu.edu.cn;nus.edu.sg;catl.com;xmu.edu.cn;xmu.edu.cn;xmu.edu.cn;xmu.edu.cn", "author_num": 10, "aff_unique_index": "0;0;0;0;1;2;0;0;0;0", "aff_unique_norm": "Xiamen University;National University of Singapore;Contemporary Amperex Technology Co., Limited", "aff_unique_dep": ";;", "aff_unique_url": "https://www.xmu.edu.cn;https://www.nus.edu.sg;https://www.catl.com.cn", "aff_unique_abbr": "XMU;NUS;CATL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;0;0;0;0;0", "aff_country_unique": "China;Singapore" }, { "id": "LlcygqLdwO", "title": "Visual-TCAV: Explainability of Image Classification through Concept-based Saliency Maps", "track": "main", "status": "Reject", "tldr": "", "abstract": "Convolutional Neural Networks (CNNs) have seen significant performance improvements in recent years. However, due to their size and complexity, their decision-making process remains a black-box, leading to opacity and trust issues. State-of-the-art saliency methods can generate local explanations that highlight the area in the input image where a class is identified but do not explain how different features contribute to the prediction. On the other hand, concept-based methods, such as TCAV (Testing with Concept Activation Vectors), provide global explainability, but cannot compute the attribution of a concept in a specific prediction nor show the locations where the network detects these concepts. This paper introduces a novel explainability framework, Visual-TCAV, which aims to bridge the gap between these methods. Visual-TCAV uses Concept Activation Vectors (CAVs) to generate saliency maps that show where concepts are recognized by the network. Moreover, it can estimate the attribution of these concepts to the output of any class using a generalization of Integrated Gradients. Visual-TCAV can provide both local and global explanations for any CNN-based image classification model without requiring any modifications. This framework is evaluated on widely used CNNs and its validity is further confirmed through experiments where a ground truth for explanations is known.", "keywords": "explainability;concept activation vectors;image classification", "primary_area": "interpretability_and_explainability", "supplementary_material": "/attachment/e7ae48f7a9f40ba84303f68bbdbc48bf8a935545.zip", "author": "Antonio De Santis;Riccardo Campi;Matteo Bianchi;Marco Brambilla", "authorids": "~Antonio_De_Santis2;~Riccardo_Campi1;~Matteo_Bianchi1;~Marco_Brambilla1", "gender": "M;M;M;M", "homepage": ";;;https://marco-brambilla.com/", "dblp": "88/2568;;;b/MarcoBrambilla", "google_scholar": "Xwx26YgAAAAJ;;S6MMstgAAAAJ;YQV_ChkAAAAJ", "orcid": "0009-0006-7579-1080;0000-0003-0716-7631;0009-0001-5345-1898;0000-0002-8753-2434", "linkedin": "antonio-dee/;;;marcobrambi/", "or_profile": "~Antonio_De_Santis2;~Riccardo_Campi1;~Matteo_Bianchi1;~Marco_Brambilla1", "aff": "Polytechnic Institute of Milan;Polytechnic Institute of Milan;Polytechnic Institute of Milan;Polytechnic Institute of Milan", "aff_domain": "polimi.it;polimi.it;polimi.it;polimi.it", "position": "PhD student;Researcher;Researcher;Full Professor", "bibtex": "@misc{\nanonymous2024visualtcav,\ntitle={Visual-{TCAV}: Explainability of Image Classification through Concept-based Saliency Maps},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=LlcygqLdwO}\n}", "github": "", "project": "", "reviewers": "Z23K;FKau;oLg1", "site": "https://openreview.net/forum?id=LlcygqLdwO", "pdf_size": 18102673, "rating": "5;6;8", "confidence": "4;4;4", "soundness": "3;3;3", "novelty": "2;2;3", "presentation": "3;3;4", "wc_summary": "59;81;233", "wc_strengths": "71;88;152", "wc_weaknesses": "160;360;133", "wc_questions": "56;152;124", "wc_limitations": "2;57;141", "wc_review": "348;738;783", "wc_reply_reviewers": "86;35;79", "wc_reply_authors": "149;22;69", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 124.33333333333333, 77.36206362868617 ], "wc_strengths_avg": [ 103.66666666666667, 34.87437773240151 ], "wc_weaknesses_avg": [ 217.66666666666666, 101.24667347073132 ], "wc_questions_avg": [ 110.66666666666667, 40.309910554215925 ], "wc_limitations_avg": [ 66.66666666666667, 57.15670467135845 ], "wc_review_avg": [ 623.0, 195.3202498462461 ], "wc_reply_reviewers_avg": [ 66.66666666666667, 22.57333727111602 ], "wc_reply_authors_avg": [ 80.0, 52.42772803266099 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:37MfeBdmwHQJ:scholar.google.com/&scioq=Visual-TCAV:+Explainability+of+Image+Classification+through+Concept-based+Saliency+Maps&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Polytechnic Institute of Milan", "aff_unique_dep": "", "aff_unique_url": "https://www.polimi.it/", "aff_unique_abbr": "Politecnico di Milano", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Italy" }, { "title": "MaskLLM: Learnable Semi-Structured Sparsity for Large Language Models", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95573", "id": "Llu9nJal7b", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Llu9nJal7b", "openreview": "https://openreview.net/forum?id=Llu9nJal7b", "poster": "/media/PosterPDFs/NeurIPS%202024/95573.png?t=1733386583.9671946", "project": "", "author_site": "Gongfan Fang, Hongxu Yin, Saurav Muralidharan, Greg Heinrich, Jeff Pool, Jan Kautz, Pavlo Molchanov, Xinchao Wang", "tldr": "", "abstract": "Large Language Models (LLMs) are distinguished by their massive parameter counts, which typically result in significant redundancy. This work introduces MaskLLM, a learnable pruning method that establishes Semi-structured (or ``N:M'') Sparsity in LLMs, aimed at reducing computational overhead during inference. Instead of developing a new importance criterion, MaskLLM explicitly models N:M patterns as a learnable distribution through Gumbel Softmax sampling. This approach facilitates end-to-end training on large-scale datasets and offers two notable advantages: 1) High-quality Masks - our method effectively scales to large datasets and learns accurate masks; 2) Transferability - the probabilistic modeling of mask distribution enables the transfer learning of sparsity across domains or tasks. We assessed MaskLLM using 2:4 sparsity on various LLMs, including LLaMA-2, Nemotron-4, and GPT-3, with sizes ranging from 843M to 15B parameters, and our empirical results show substantial improvements over state-of-the-art methods. For instance, leading approaches achieve a perplexity (PPL) of 10 or greater on Wikitext compared to the dense model's 5.12 PPL, but MaskLLM achieves a significantly lower 6.72 PPL solely by learning the masks with frozen weights. Furthermore, MaskLLM's learnable nature allows customized masks for lossless application of 2:4 sparsity to downstream tasks or domains. Code is available at https://github.com/NVlabs/MaskLLM.", "keywords": "Sparsity;Pruning;LLMs;Semi-structured Pruning", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Gongfan Fang;Hongxu Yin;Saurav Muralidharan;Greg Heinrich;Jeff Pool;Jan Kautz;Pavlo Molchanov;Xinchao Wang", "authorids": "~Gongfan_Fang2;~Hongxu_Yin2;~Saurav_Muralidharan1;~Greg_Heinrich1;~Jeff_Pool1;~Jan_Kautz1;~Pavlo_Molchanov1;~Xinchao_Wang1", "gender": "M;M;M;;;M;M;M", "homepage": "https://fangggf.github.io/;https://sauravm.com;;;http://jankautz.com;;https://hongxu-yin.github.io/;https://sites.google.com/site/sitexinchaowang/", "dblp": "243/5768;31/8395;;04/1535;48/6214;165/8169.html;166/3425;", "google_scholar": "489YZ_kAAAAJ;GXlChWcAAAAJ;VrjibvwAAAAJ;DagH37xI9soC;P9FclNEAAAAJ;J9PoyoIAAAAJ;4gdSoOYAAAAJ;https://scholar.google.com.tw/citations?user=w69Buq0AAAAJ", "orcid": ";;;;;;;", "linkedin": ";;gheinrich/;;;;;", "or_profile": "~Gongfan_Fang2;~Saurav_Muralidharan1;~Greg_Heinrich1;~Jeff_Pool1;~Jan_Kautz1;~Pavlo_Molchanov1;~Hongxu_Yin1;~Xinchao_WANG3", "aff": "National University of Singapore;NVIDIA;NVIDIA;NVIDIA;NVIDIA;NVIDIA Research;NVIDIA;National University of Singapore", "aff_domain": "u.nus.edu;nvidia.com;nvidia.com;nvidia.com;nvidia.com;nvidia.com;nvidia.com;nus.edu", "position": "PhD student;Researcher;Researcher;Senior Architect;VP Research;Research Scientist;Senior Research Scientist;Assistant Professor", "bibtex": "@inproceedings{\nfang2024maskllm,\ntitle={Mask{LLM}: Learnable Semi-Structured Sparsity for Large Language Models},\nauthor={Gongfan Fang and Hongxu Yin and Saurav Muralidharan and Greg Heinrich and Jeff Pool and Jan Kautz and Pavlo Molchanov and Xinchao Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Llu9nJal7b}\n}", "github": "", "reviewers": "2tsd;AwSn;qjsy;HusL", "pdf_size": 1437016, "rating": "7;7;8;8", "confidence": "4;5;5;4", "soundness": "3;3;4;3", "novelty": "3;3;4;3", "presentation": "2;4;4;3", "wc_summary": "199;27;117;78", "wc_strengths": "133;20;172;143", "wc_weaknesses": "234;54;72;133", "wc_questions": "95;154;2;4", "wc_limitations": "87;2;1;4", "wc_review": "748;257;364;362", "wc_reply_reviewers": "61;8;38;49", "wc_reply_authors": "23;24;27;21", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 7.5, 0.5 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 105.25, 62.83460431959447 ], "wc_strengths_avg": [ 117.0, 57.80570906061096 ], "wc_weaknesses_avg": [ 123.25, 70.32558211632521 ], "wc_questions_avg": [ 63.75, 64.23540690304687 ], "wc_limitations_avg": [ 23.5, 36.67764987018661 ], "wc_review_avg": [ 432.75, 187.08470675071226 ], "wc_reply_reviewers_avg": [ 39.0, 19.6596032513375 ], "wc_reply_authors_avg": [ 23.75, 2.165063509461097 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12409646714357286864&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "u.nus.edu;nvidia.com;nvidia.com;nvidia.com;nvidia.com;nvidia.com;nvidia.com;nus.edu", "author_num": 8, "aff_unique_index": "0;1;1;1;1;1;1;0", "aff_unique_norm": "National University of Singapore;NVIDIA", "aff_unique_dep": ";NVIDIA Corporation", "aff_unique_url": "https://www.nus.edu.sg;https://www.nvidia.com", "aff_unique_abbr": "NUS;NVIDIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1;1;1;0", "aff_country_unique": "Singapore;United States" }, { "title": "An Improved Empirical Fisher Approximation for Natural Gradient Descent", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95572", "id": "LmjLRHVCMG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=LmjLRHVCMG", "openreview": "https://openreview.net/forum?id=LmjLRHVCMG", "poster": "/media/PosterPDFs/NeurIPS%202024/95572.png?t=1733499456.6658902", "project": "", "author_site": "Xiaodong Wu, Wenyi Yu, Chao Zhang, Phil Woodland", "tldr": "", "abstract": "Approximate Natural Gradient Descent (NGD) methods are an important family of optimisers for deep learning models, which use approximate Fisher information matrices to pre-condition gradients during training. The empirical Fisher (EF) method approximates the Fisher information matrix empirically by reusing the per-sample gradients collected during back-propagation. Despite its ease of implementation, the EF approximation has its theoretical and practical limitations. This paper investigates the *inversely-scaled projection* issue of EF, which is shown to be a major cause of its poor empirical approximation quality. An improved empirical Fisher (iEF) method is proposed to address this issue, which is motivated as a generalised NGD method from a loss reduction perspective, meanwhile retaining the practical convenience of EF. The exact iEF and EF methods are experimentally evaluated using practical deep learning setups, including widely-used setups for parameter-efficient fine-tuning of pre-trained models (T5-base with LoRA and Prompt-Tuning on GLUE tasks, and ViT with LoRA for CIFAR100). Optimisation experiments show that applying exact iEF directly as an optimiser provides strong convergence and generalisation. It achieves the best test performance and the lowest training loss for the majority of the tasks, even when compared to well-tuned AdamW/Adafactor baselines. Additionally, under a novel empirical evaluation framework, the proposed iEF method shows consistently better approximation quality to exact Natural Gradient updates than both the EF and the more expensive sampled Fisher methods, meanwhile demonstrating the superior property of being robust to the choice of damping across tasks and training stages. Improving existing approximate NGD optimisers with iEF is expected to lead to better convergence and robustness. Furthermore, the iEF method also serves as a better approximation method to the Fisher information matrix itself, which enables the improvement of a variety of Fisher-based methods, not limited to the scope of optimisation.", "keywords": "Empirical Fisher;Natural Gradient Descent;Second-order Optimisation;Deep Learning", "primary_area": "optimization", "supplementary_material": "/attachment/30d593111200e38ee7f4b4b51e91ec4e083eb9bf.zip", "author": "Xiaodong Wu;Wenyi Yu;Chao Zhang;Phil Woodland", "authorids": "~Xiaodong_Wu4;~Wenyi_Yu2;~Chao_Zhang20;~Phil_Woodland1", "gender": "M;M;M;M", "homepage": ";https://github.com/Yu-Doit;http://mi.eng.cam.ac.uk/~cz277/;http://www.eng.cam.ac.uk/profiles/pw117", "dblp": "26/373;;94/3019-31.html;42/153", "google_scholar": ";CGqr-V8AAAAJ;https://scholar.google.co.uk/citations?view_op=list_works;https://scholar.google.co.uk/citations?user=V_SMuA8AAAAJ", "orcid": ";;;", "linkedin": "xiaodongwu1998;;;", "or_profile": "~Xiaodong_Wu4;~Wenyi_Yu2;~Chao_Zhang20;~Phil_Woodland1", "aff": "University of Cambridge;Tsinghua University;University College London;University of Cambridge", "aff_domain": "cam.ac.uk;tsinghua.edu.cn;ucl.ac.uk;cam.ac.uk", "position": "PhD student;PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nwu2024an,\ntitle={An Improved Empirical Fisher Approximation for Natural Gradient Descent},\nauthor={Xiaodong Wu and Wenyi Yu and Chao Zhang and Phil Woodland},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=LmjLRHVCMG}\n}", "github": "", "reviewers": "4Ck3;Sh4Z;Khnn;13ZJ", "pdf_size": 9848225, "rating": "5;5;6;7", "confidence": "3;2;3;5", "soundness": "4;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;4", "wc_summary": "35;157;86;76", "wc_strengths": "56;58;86;75", "wc_weaknesses": "37;134;425;166", "wc_questions": "201;136;94;94", "wc_limitations": "15;19;1;14", "wc_review": "344;504;692;425", "wc_reply_reviewers": "276;0;118;23", "wc_reply_authors": "746;118;508;11", "reply_reviewers": "2;0;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 1.0897247358851685 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 88.5, 43.92322847879013 ], "wc_strengths_avg": [ 68.75, 12.397076268217438 ], "wc_weaknesses_avg": [ 190.5, 143.47909255358428 ], "wc_questions_avg": [ 131.25, 43.76856748855279 ], "wc_limitations_avg": [ 12.25, 6.7592529172978875 ], "wc_review_avg": [ 491.25, 128.97165386238947 ], "wc_reply_reviewers_avg": [ 104.25, 108.57802494059284 ], "wc_reply_authors_avg": [ 345.75, 295.9952491172789 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.899228803025897, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15061031143720506447&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 4, "email": "cam.ac.uk;tsinghua.edu.cn;ucl.ac.uk;cam.ac.uk", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of Cambridge;Tsinghua University;University College London", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cam.ac.uk;https://www.tsinghua.edu.cn;https://www.ucl.ac.uk", "aff_unique_abbr": "Cambridge;THU;UCL", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United Kingdom;China" }, { "title": "eXponential FAmily Dynamical Systems (XFADS): Large-scale nonlinear Gaussian state-space modeling", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95571", "id": "Ln8ogihZ2S", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Ln8ogihZ2S", "openreview": "https://openreview.net/forum?id=Ln8ogihZ2S", "poster": "", "project": "", "author_site": "Matthew Dowling, Yuan Zhao, Memming Park", "tldr": "", "abstract": "State-space graphical models and the variational autoencoder framework provide a principled apparatus for learning dynamical systems from data. State-of-the-art probabilistic approaches are often able to scale to large problems at the cost of flexibility of the variational posterior or expressivity of the dynamics model. However, those consolidations can be detrimental if the ultimate goal is to learn a generative model capable of explaining the spatiotemporal structure of the data and making accurate forecasts. We introduce a low-rank structured variational autoencoding framework for nonlinear Gaussian state-space graphical models capable of capturing dense covariance structures that are important for learning dynamical systems with predictive capabilities. Our inference algorithm exploits the covariance structures that arise naturally from sample based approximate Gaussian message passing and low-rank amortized posterior updates -- effectively performing approximate variational smoothing with time complexity scaling linearly in the state dimensionality. In comparisons with other deep state-space model architectures our approach consistently demonstrates the ability to learn a more predictive generative model. Furthermore, when applied to neural physiological recordings, our approach is able to learn a dynamical system capable of forecasting population spiking and behavioral correlates from a small portion of single trials.", "keywords": "variational inference;nonlinear state-space model;dynamical system", "primary_area": "probabilistic_methods", "supplementary_material": "/attachment/75b1290efbeab8480415529049eb98275f648111.zip", "author": "Matthew Dowling;Yuan Zhao;Il Memming Park", "authorids": "~Matthew_Dowling2;~Yuan_Zhao1;~Il_Memming_Park1", "gender": "M;;M", "homepage": ";;http://catniplab.github.io/", "dblp": ";65/2105-4;00/4652-2", "google_scholar": "https://scholar.google.com/citations?hl=en;XLpD5N0AAAAJ;CsmltusAAAAJ", "orcid": ";0000-0002-6123-8579;0000-0002-4255-7750", "linkedin": ";;memming/", "or_profile": "~Matthew_Dowling2;~Yuan_Zhao1;~Il_Memming_Park1", "aff": "State University of New York, Stony Brook;National Institute of Mental Health;Champalimaud Centre for the Unknown", "aff_domain": "stonybrook.edu;nih.gov;fchampalimaud.org", "position": "PhD student;Researcher;Associate Professor", "bibtex": "@inproceedings{\ndowling2024exponential,\ntitle={eXponential {FA}mily Dynamical Systems ({XFADS}): Large-scale nonlinear Gaussian state-space modeling},\nauthor={Matthew Dowling and Yuan Zhao and Il Memming Park},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Ln8ogihZ2S}\n}", "github": "", "reviewers": "ehDs;S7ai;UuWx;e1Je", "pdf_size": 6731065, "rating": "6;6;7;7", "confidence": "4;3;4;3", "soundness": "3;4;3;3", "novelty": "3;3;3;4", "presentation": "3;4;3;3", "wc_summary": "120;56;117;68", "wc_strengths": "38;64;102;104", "wc_weaknesses": "805;40;366;302", "wc_questions": "26;39;30;83", "wc_limitations": "7;28;1;10", "wc_review": "996;227;616;567", "wc_reply_reviewers": "484;380;26;59", "wc_reply_authors": "801;282;56;0", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;2;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 90.25, 28.586491565073178 ], "wc_strengths_avg": [ 77.0, 27.586228448267445 ], "wc_weaknesses_avg": [ 378.25, 274.9967045257088 ], "wc_questions_avg": [ 44.5, 22.721135535003526 ], "wc_limitations_avg": [ 11.5, 10.062305898749054 ], "wc_review_avg": [ 601.5, 272.61740590065045 ], "wc_reply_reviewers_avg": [ 237.25, 198.53384472175014 ], "wc_reply_authors_avg": [ 284.75, 316.2003913659817 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7237768050671162906&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "stonybrook.edu;nih.gov;fchampalimaud.org", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "State University of New York;National Institute of Mental Health;Champalimaud Centre for the Unknown", "aff_unique_dep": ";;", "aff_unique_url": "https://www.stonybrook.edu;https://www.nimh.nih.gov;https://www.champalimaud.org", "aff_unique_abbr": "SUNY Stony Brook;NIMH;CCU", "aff_campus_unique_index": "0", "aff_campus_unique": "Stony Brook;", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;Portugal" }, { "id": "LnJ2EGKTXh", "title": "Robo-Instruct: Simulator-Augmented Instruction Alignment For Finetuning CodeLLMs", "track": "main", "status": "Reject", "tldr": "", "abstract": "Large language models (LLMs) have shown great promise at generating robot programs from natural language given domain-specific robot application programming interfaces (APIs). However, the performance gap between proprietary LLMs and smaller open-weight LLMs remains wide. This raises a question: Can we fine-tune smaller open-weight LLMs for generating domain-specific robot programs to close the performance gap with proprietary LLMs? While Self-Instruct is a promising solution by generating a diverse set of training data, it cannot verify the correctness of these programs. In contrast, a robot simulator with a well-defined world can identify execution errors but limits the diversity of programs that it can verify. In this work, we introduce Robo-Instruct, which brings the best of both worlds --- it promotes the diversity of Self-Instruct while providing the correctness of simulator-based checking. Robo-Instruct introduces RoboSim to synthesize a consistent world state on the fly by inferring properties relevant to the program being checked, and simulating actions accordingly. Furthermore, the instructions and programs generated by Self-Instruct may be subtly inconsistent --- such as the program missing a step implied by the instruction. Robo-Instruct further addresses this with InstAlign, an instruction-program alignment procedure that revises the task instruction to reflect the actual results of the generated program. Given a few seed task descriptions and the robot APIs, Robo-Instruct is capable of generating a training dataset using only a small open-weight model. This dataset can then be used to fine-tune small open-weight language models, enabling them to match or even exceed the performance of several proprietary LLMs, such as GPT-3.5-Turbo and Gemini-Pro.", "keywords": "Self-Instruct;Code LLMs Fine-tuning;Domain-Specific Program Generation;Code LLMs for Robotics", "primary_area": "robotics", "supplementary_material": "", "author": "Zichao Hu;Junyi Jessy Li;Arjun Guha;Joydeep Biswas", "authorids": "~Zichao_Hu1;~Junyi_Jessy_Li2;~Arjun_Guha3;~Joydeep_Biswas1", "gender": "M;F;M;M", "homepage": ";https://jessyli.com;https://khoury.northeastern.edu/~arjunguha;https://www.joydeepb.com/", "dblp": ";148/9553;15/2016;84/73", "google_scholar": "Qk-v-okAAAAJ;tJGm3-YAAAAJ;yMU0f9EAAAAJ;https://scholar.google.com.tw/citations?user=f28F1YUAAAAJ", "orcid": "0009-0007-6433-8878;;;0000-0002-1211-1731", "linkedin": ";;;", "or_profile": "~Zichao_Hu1;~Junyi_Jessy_Li2;~Arjun_Guha3;~Joydeep_Biswas1", "aff": "University of Texas at Austin;University of Texas at Austin;Roblox Research;The University of Texas at Austin", "aff_domain": "utexas.edu;utexas.edu;roblox.com;cs.utexas.edu", "position": "PhD student;Associate Professor;Visiting Professor;Associate Professor", "bibtex": "@misc{\nanonymous2024roboinstruct,\ntitle={Robo-Instruct: Simulator-Augmented Instruction Alignment For Finetuning Code{LLM}s},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=LnJ2EGKTXh}\n}", "github": "", "project": "", "reviewers": "A73B;afBU;QEDf;BHTn", "site": "https://openreview.net/forum?id=LnJ2EGKTXh", "pdf_size": 1492700, "rating": "3;4;7;8", "confidence": "4;3;4;4", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "3;3;4;3", "wc_summary": "136;127;61;138", "wc_strengths": "106;71;74;93", "wc_weaknesses": "78;154;50;99", "wc_questions": "30;111;30;26", "wc_limitations": "1;47;2;8", "wc_review": "351;510;217;364", "wc_reply_reviewers": "0;491;103;0", "wc_reply_authors": "0;1265;10;0", "reply_reviewers": "0;2;1;0", "reply_authors": "1;3;2;1", "rating_avg": [ 5.5, 2.0615528128088303 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 115.5, 31.737202145116697 ], "wc_strengths_avg": [ 86.0, 14.300349646075091 ], "wc_weaknesses_avg": [ 95.25, 38.114137796885814 ], "wc_questions_avg": [ 49.25, 35.688758734369 ], "wc_limitations_avg": [ 14.5, 18.953891421024867 ], "wc_review_avg": [ 360.5, 103.7364448976347 ], "wc_reply_reviewers_avg": [ 148.5, 202.16391864029546 ], "wc_reply_authors_avg": [ 318.75, 546.3329456476151 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.42008402520840293, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7442991126913299146&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Texas at Austin;Roblox Corporation", "aff_unique_dep": ";Research", "aff_unique_url": "https://www.utexas.edu;https://www.roblox.com", "aff_unique_abbr": "UT Austin;Roblox", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Austin;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Tolerant Algorithms for Learning with Arbitrary Covariate Shift", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95570", "id": "LnNfwc2Ah1", "proceeding": "", "pdf": "https://openreview.net/pdf?id=LnNfwc2Ah1", "openreview": "https://openreview.net/forum?id=LnNfwc2Ah1", "poster": "", "project": "", "author_site": "Surbhi Goel, Abhishek Shetty, Konstantinos Stavropoulos, Arsen Vasilyan", "tldr": "", "abstract": "We study the problem of learning under arbitrary distribution shift, where the learner is trained on a labeled set from one distribution but evaluated on a different, potentially adversarially generated test distribution. We focus on two frameworks: *PQ learning* [GKKM'20], allowing abstention on adversarially generated parts of the test distribution, and *TDS learning* [KSV'23], permitting abstention on the entire test distribution if distribution shift is detected. All prior known algorithms either rely on learning primitives that are computationally hard even for simple function classes, or end up abstaining entirely even in the presence of a tiny amount of distribution shift.\n \n We address both these challenges for natural function classes, including intersections of halfspaces and decision trees, and standard training distributions, including Gaussians. For PQ learning, we give efficient learning algorithms, while for TDS learning, our algorithms can tolerate moderate amounts of distribution shift. At the core of our approach is an improved analysis of spectral outlier-removal techniques from learning with nasty noise. \n Our analysis can (1) handle arbitrarily large fraction of outliers, which is crucial for handling arbitrary distribution shifts, and (2) obtain stronger bounds on polynomial moments of the distribution after outlier removal, yielding new insights into polynomial regression under distribution shifts. Lastly, our techniques lead to novel results for tolerant *testable learning* [RV'23], and learning with nasty noise.", "keywords": "pac learning;distribution shift;distribution testing;outlier removal;testable learning", "primary_area": "learning_theory", "supplementary_material": "", "author": "Surbhi Goel;Abhishek Shetty;Konstantinos Stavropoulos;Arsen Vasilyan", "authorids": "~Surbhi_Goel1;~Abhishek_Shetty1;~Konstantinos_Stavropoulos1;~Arsen_Vasilyan1", "gender": "F;M;;", "homepage": "https://www.surbhigoel.com;https://ashettyv.github.io/;;", "dblp": "190/7815;223/4770;;", "google_scholar": "https://scholar.google.co.in/citations?user=Zqz4CQoAAAAJ;https://scholar.google.co.in/citations?user=M-y2aLUAAAAJ;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Surbhi_Goel1;~Abhishek_Shetty1;~Konstantinos_Stavropoulos1;~Arsen_Vasilyan1", "aff": "University of Pennsylvania;University of California, Berkeley;;", "aff_domain": "upenn.edu;berkeley.edu;;", "position": "Assistant Professor;PhD student;;", "bibtex": "@inproceedings{\ngoel2024tolerant,\ntitle={Tolerant Algorithms for Learning with Arbitrary Covariate Shift},\nauthor={Surbhi Goel and Abhishek Shetty and Konstantinos Stavropoulos and Arsen Vasilyan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=LnNfwc2Ah1}\n}", "github": "", "reviewers": "QNbu;tKBR;wKf2;wPZr;JMSW", "pdf_size": 587106, "rating": "6;6;7;8;8", "confidence": "3;4;4;4;4", "soundness": "3;2;4;4;3", "novelty": "2;3;3;3;4", "presentation": "2;2;2;4;3", "wc_summary": "126;120;396;282;464", "wc_strengths": "68;116;60;166;144", "wc_weaknesses": "308;60;38;93;68", "wc_questions": "2;219;91;120;18", "wc_limitations": "1;8;1;2;3", "wc_review": "505;523;586;663;697", "wc_reply_reviewers": "34;20;15;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;0;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 7.0, 0.8944271909999159 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.6, 0.8 ], "wc_summary_avg": [ 277.6, 138.99870502993903 ], "wc_strengths_avg": [ 110.8, 41.44586831036358 ], "wc_weaknesses_avg": [ 113.4, 98.87689315507441 ], "wc_questions_avg": [ 90.0, 78.06407624509497 ], "wc_limitations_avg": [ 3.0, 2.6076809620810595 ], "wc_review_avg": [ 594.8, 75.3562207120288 ], "wc_reply_reviewers_avg": [ 13.8, 12.874781551544865 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5590169943749476, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3626420435690342066&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "upenn.edu;berkeley.edu;;", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "University of Pennsylvania;University of California, Berkeley", "aff_unique_dep": ";", "aff_unique_url": "https://www.upenn.edu;https://www.berkeley.edu", "aff_unique_abbr": "UPenn;UC Berkeley", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Exploratory Retrieval-Augmented Planning For Continual Embodied Instruction Following", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95569", "id": "LpXV29Ggl3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=LpXV29Ggl3", "openreview": "https://openreview.net/forum?id=LpXV29Ggl3", "poster": "", "project": "", "author_site": "Minjong Yoo, Jinwoo Jang, Wei-Jin Park, Honguk Woo", "tldr": "", "abstract": "This study presents an Exploratory Retrieval-Augmented Planning (ExRAP) framework, designed to tackle continual instruction following tasks of embodied agents in dynamic, non-stationary environments. The framework enhances Large Language Models' (LLMs) embodied reasoning capabilities by efficiently exploring the physical environment and establishing the environmental context memory, thereby effectively grounding the task planning process in time-varying environment contexts. In ExRAP, given multiple continual instruction following tasks, each instruction is decomposed into queries on the environmental context memory and task executions conditioned on the query results. To efficiently handle these multiple tasks that are performed continuously and simultaneously, we implement an exploration-integrated task planning scheme by incorporating the information-based exploration into the LLM-based planning process. Combined with memory-augmented query evaluation, this integrated scheme not only allows for a better balance between the validity of the environmental context memory and the load of environment exploration, but also improves overall task performance. Furthermore, we devise a temporal consistency refinement scheme for query evaluation to address the inherent decay of knowledge in the memory. Through experiments with VirtualHome, ALFRED, and CARLA, our approach demonstrates robustness against a variety of embodied instruction following scenarios involving different instruction scales and types, and non-stationarity degrees, and it consistently outperforms other state-of-the-art LLM-based task planning approaches in terms of both goal success rate and execution efficiency.", "keywords": "Continual instruction;Embodied planning;Retrieval augmented planning;Integrated task planning", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/3895264395781f5d996b65ed6488da66e968734d.zip", "author": "Minjong Yoo;Jinwoo Jang;Wei-Jin Park;Honguk Woo", "authorids": "~Minjong_Yoo2;~Jinwoo_Jang1;~Wei-Jin_Park1;~Honguk_Woo1", "gender": "Not Specified;M;M;M", "homepage": ";http://www.acryl.ai;https://sites.google.com/view/csi-agent-group/about;", "dblp": ";;63/6072;253/2606.html", "google_scholar": ";;https://scholar.google.co.kr/citations?user=Gaxjc7UAAAAJ;O6L-PkgAAAAJ", "orcid": ";;0000-0001-6948-3440;", "linkedin": "doldam0;;;", "or_profile": "~Jinwoo_Jang1;~Wei-Jin_Park1;~Honguk_Woo1;~minjong_Yoo1", "aff": "Sungkyunkwan University;Acryl Inc.;Sungkyunkwan University;Sungkyunkwan University", "aff_domain": "skku.edu;acryl.ai;skku.edu;skku.edu", "position": "Undergrad student;Researcher;Associate Professor;PhD student", "bibtex": "@inproceedings{\nyoo2024exploratory,\ntitle={Exploratory Retrieval-Augmented Planning For Continual Embodied Instruction Following},\nauthor={Minjong Yoo and Jinwoo Jang and Wei-Jin Park and Honguk Woo},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=LpXV29Ggl3}\n}", "github": "", "reviewers": "K6e1;kQXj;Jxmz", "pdf_size": 1293108, "rating": "5;6;7", "confidence": "4;4;3", "soundness": "3;3;3", "novelty": "3;2;3", "presentation": "3;2;3", "wc_summary": "88;367;83", "wc_strengths": "46;75;77", "wc_weaknesses": "282;282;224", "wc_questions": "3;148;31", "wc_limitations": "10;16;13", "wc_review": "429;888;428", "wc_reply_reviewers": "130;16;11", "wc_reply_authors": "507;510;95", "reply_reviewers": "2;1;1", "reply_authors": "3;3;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 179.33333333333334, 132.71607120298415 ], "wc_strengths_avg": [ 66.0, 14.165686240583852 ], "wc_weaknesses_avg": [ 262.6666666666667, 27.34146220587984 ], "wc_questions_avg": [ 60.666666666666664, 62.8030430968295 ], "wc_limitations_avg": [ 13.0, 2.449489742783178 ], "wc_review_avg": [ 581.6666666666666, 216.61076201846993 ], "wc_reply_reviewers_avg": [ 52.333333333333336, 54.95654849262481 ], "wc_reply_authors_avg": [ 370.6666666666667, 194.92961692763765 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=118260951279666854&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "skku.edu;acryl.ai;skku.edu;skku.edu", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Sungkyunkwan University;Acryl Inc.", "aff_unique_dep": ";", "aff_unique_url": "https://www.skku.edu;", "aff_unique_abbr": "SKKU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea;" }, { "title": "Probabilistic Graph Rewiring via Virtual Nodes", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95568", "id": "LpvSHL9lcK", "proceeding": "", "pdf": "https://openreview.net/pdf?id=LpvSHL9lcK", "openreview": "https://openreview.net/forum?id=LpvSHL9lcK", "poster": "/media/PosterPDFs/NeurIPS%202024/95568.png?t=1733875270.3235853", "project": "", "author_site": "Chendi Qian, Andrei Manolache, Christopher Morris, Mathias Niepert", "tldr": "", "abstract": "Message-passing graph neural networks (MPNNs) have emerged as a powerful paradigm for graph-based machine learning. Despite their effectiveness, MPNNs face challenges such as under-reaching and over-squashing, where limited receptive fields and structural bottlenecks hinder information flow in the graph. While graph transformers hold promise in addressing these issues, their scalability is limited due to quadratic complexity regarding the number of nodes, rendering them impractical for larger graphs. Here, we propose implicitly rewired message-passing neural networks (IPR-MPNNs), a novel approach that integrates implicit probabilistic graph rewiring into MPNNs. By introducing a small number of virtual nodes, i.e., adding additional nodes to a given graph and connecting them to existing nodes, in a differentiable, end-to-end manner, IPR-MPNNs enable long-distance message propagation, circumventing quadratic complexity. Theoretically, we demonstrate that IPR-MPNNs surpass the expressiveness of traditional MPNNs. Empirically, we validate our approach by showcasing its ability to mitigate under-reaching and over-squashing effects, achieving state-of-the-art performance across multiple graph datasets. Notably, IPR-MPNNs outperform graph transformers while maintaining significantly faster computational efficiency.", "keywords": "probabilistic;graph;rewiring;virtual;nodes;long-range", "primary_area": "graph_neural_networks", "supplementary_material": "/attachment/1de15ceb316bdbb828f3a7520cefc758ef5beec6.zip", "author": "Chendi Qian;Andrei Manolache;Christopher Morris;Mathias Niepert", "authorids": "~Chendi_Qian1;~Andrei_Manolache1;~Christopher_Morris1;~Mathias_Niepert1", "gender": ";M;M;M", "homepage": "https://github.com/chendiqian;https://andreimano.github.io;http://christophermorris.info;http://www.matlog.net", "dblp": "322/9379;290/2275;156/7303;n/MathiasNiepert", "google_scholar": ";0H7Htc4AAAAJ;;https://scholar.google.de/citations?user=p5vLzq0AAAAJ", "orcid": ";;;", "linkedin": ";andreimano/;;", "or_profile": "~Chendi_Qian1;~Andrei_Manolache1;~Christopher_Morris1;~Mathias_Niepert1", "aff": "Rheinisch Westf\u00e4lische Technische Hochschule Aachen;Universit\u00e4t Stuttgart;Rheinisch Westf\u00e4lische Technische Hochschule Aachen;NEC", "aff_domain": "rwth-aachen.de;uni-stuttgart.de;rwth-aachen.de;neclab.eu", "position": "PhD student;PhD student;Assistant Professor;Research Scientist", "bibtex": "@inproceedings{\nqian2024probabilistic,\ntitle={Probabilistic Graph Rewiring via Virtual Nodes},\nauthor={Chendi Qian and Andrei Manolache and Christopher Morris and Mathias Niepert},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=LpvSHL9lcK}\n}", "github": "", "reviewers": "5s4E;YQwu;UsNW;bePC", "pdf_size": 686215, "rating": "5;5;5;7", "confidence": "5;3;4;4", "soundness": "3;3;2;4", "novelty": "3;3;2;3", "presentation": "2;2;3;4", "wc_summary": "70;43;50;42", "wc_strengths": "116;60;32;88", "wc_weaknesses": "1281;173;214;229", "wc_questions": "13;14;85;78", "wc_limitations": "121;1;1;48", "wc_review": "1601;291;382;485", "wc_reply_reviewers": "432;19;74;75", "wc_reply_authors": "1510;11;774;131", "reply_reviewers": "1;1;2;1", "reply_authors": "5;2;4;3", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 51.25, 11.255554184490428 ], "wc_strengths_avg": [ 74.0, 31.304951684997057 ], "wc_weaknesses_avg": [ 474.25, 466.22814962204933 ], "wc_questions_avg": [ 47.5, 34.09178786746157 ], "wc_limitations_avg": [ 42.75, 49.08347481586853 ], "wc_review_avg": [ 689.75, 530.5682684631639 ], "wc_reply_reviewers_avg": [ 150.0, 164.38217664941658 ], "wc_reply_authors_avg": [ 606.5, 596.8854580235642 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.5, 1.118033988749895 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15222261025630786164&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "rwth-aachen.de;uni-stuttgart.de;rwth-aachen.de;neclab.eu", "author_num": 4, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "RWTH Aachen University;University of Stuttgart;NEC Corporation", "aff_unique_dep": ";;", "aff_unique_url": "https://www.rwth-aachen.de;https://www.uni-stuttgart.de;https://www.nec.com", "aff_unique_abbr": "RWTH;Uni Stuttgart;NEC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Aachen;", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "Germany;Japan" }, { "title": "Spherical Frustum Sparse Convolution Network for LiDAR Point Cloud Semantic Segmentation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95567", "id": "LqdcdqIeVD", "proceeding": "", "pdf": "https://openreview.net/pdf?id=LqdcdqIeVD", "openreview": "https://openreview.net/forum?id=LqdcdqIeVD", "poster": "/media/PosterPDFs/NeurIPS%202024/95567.png?t=1731467612.6053388", "project": "", "author_site": "Yu Zheng, Guangming Wang, Jiuming Liu, Marc Pollefeys, Hesheng Wang", "tldr": "", "abstract": "LiDAR point cloud semantic segmentation enables the robots to obtain fine-grained semantic information of the surrounding environment. Recently, many works project the point cloud onto the 2D image and adopt the 2D Convolutional Neural Networks (CNNs) or vision transformer for LiDAR point cloud semantic segmentation. However, since more than one point can be projected onto the same 2D position but only one point can be preserved, the previous 2D projection-based segmentation methods suffer from inevitable quantized information loss, which results in incomplete geometric structure, especially for small objects. To avoid quantized information loss, in this paper, we propose a novel spherical frustum structure, which preserves all points projected onto the same 2D position. Additionally, a hash-based representation is proposed for memory-efficient spherical frustum storage. Based on the spherical frustum structure, the Spherical Frustum sparse Convolution (SFC) and Frustum Farthest Point Sampling (F2PS) are proposed to convolve and sample the points stored in spherical frustums respectively. Finally, we present the Spherical Frustum sparse Convolution Network (SFCNet) to adopt 2D CNNs for LiDAR point cloud semantic segmentation without quantized information loss. Extensive experiments on the SemanticKITTI and nuScenes datasets demonstrate that our SFCNet outperforms previous 2D projection-based semantic segmentation methods based on conventional spherical projection and shows better performance on small object segmentation by preserving complete geometric structure. Codes will be available at https://github.com/IRMVLab/SFCNet.", "keywords": "LiDAR Point Cloud Semantic Segmentation;2D Projection;Quantized Information Loss", "primary_area": "machine_vision", "supplementary_material": "/attachment/adc4ed77808cf07c9f07636f3c2ae24f75251167.zip", "author": "Yu Zheng;Guangming Wang;Jiuming Liu;Marc Pollefeys;Hesheng Wang", "authorids": "~Yu_Zheng13;~Guangming_Wang1;~Jiuming_Liu1;~Marc_Pollefeys2;~Hesheng_Wang1", "gender": "M;M;;M;M", "homepage": "https://github.com/COMoER;https://guangmingw.github.io/;;;https://irmv.sjtu.edu.cn/wanghesheng", "dblp": ";85/2594-1;;p/MarcPollefeys;08/2190-1", "google_scholar": "M4cXM9kAAAAJ;https://scholar.google.com.hk/citations?user=GGHfHSIAAAAJ;;YYH0BjEAAAAJ;https://scholar.google.com.hk/citations?user=q6AY9XsAAAAJ", "orcid": ";;;;", "linkedin": ";;;marc-pollefeys-30a7075/;", "or_profile": "~Yu_Zheng13;~Guangming_Wang1;~Jiuming_Liu1;~Marc_Pollefeys2;~Hesheng_Wang1", "aff": "Shanghai Jiaotong University;University of Cambridge;;Swiss Federal Institute of Technology;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;cam.ac.uk;;ethz.ch;sjtu.edu.cn", "position": "PhD student;Postdoc;;Full Professor;Full Professor", "bibtex": "@inproceedings{\nzheng2024spherical,\ntitle={Spherical Frustum Sparse Convolution Network for Li{DAR} Point Cloud Semantic Segmentation},\nauthor={Yu Zheng and Guangming Wang and Jiuming Liu and Marc Pollefeys and Hesheng Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=LqdcdqIeVD}\n}", "github": "", "reviewers": "MsCy;bPaa;AnTM;BSNT", "pdf_size": 9461016, "rating": "5;6;7;8", "confidence": "4;4;4;5", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "130;99;79;110", "wc_strengths": "57;78;68;207", "wc_weaknesses": "92;57;214;125", "wc_questions": "60;105;34;90", "wc_limitations": "7;63;5;45", "wc_review": "346;402;400;577", "wc_reply_reviewers": "0;52;74;42", "wc_reply_authors": "0;48;37;41", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 104.5, 18.445866745696716 ], "wc_strengths_avg": [ 102.5, 60.788568004189734 ], "wc_weaknesses_avg": [ 122.0, 58.30523132618548 ], "wc_questions_avg": [ 72.25, 27.38955092731533 ], "wc_limitations_avg": [ 30.0, 24.839484696748443 ], "wc_review_avg": [ 431.25, 87.09585236967372 ], "wc_reply_reviewers_avg": [ 42.0, 26.870057685088806 ], "wc_reply_authors_avg": [ 31.5, 18.607794065928395 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.7745966692414834, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12456078775392483753&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "sjtu.edu.cn;cam.ac.uk;;ethz.ch;sjtu.edu.cn", "author_num": 5, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Shanghai Jiao Tong University;University of Cambridge;Swiss Federal Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.cam.ac.uk;https://www.ethz.ch", "aff_unique_abbr": "SJTU;Cambridge;ETH Zurich", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;1;2;0", "aff_country_unique": "China;United Kingdom;Switzerland" }, { "title": "Opponent Modeling based on Subgoal Inference", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95566", "id": "Lt6wO0oZ8k", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Lt6wO0oZ8k", "openreview": "https://openreview.net/forum?id=Lt6wO0oZ8k", "poster": "/media/PosterPDFs/NeurIPS%202024/95566.png?t=1731716825.660633", "project": "", "author_site": "XiaoPeng Yu, Jiechuan Jiang, Zongqing Lu", "tldr": "", "abstract": "When an agent is in a multi-agent environment, it may face previously unseen opponents, and it is a challenge to cooperate with other agents to accomplish the task together or to maximize its own rewards. Most opponent modeling methods deal with the non-stationarity caused by unknown opponent policies via predicting the opponent\u2019s actions. However, focusing on the opponent\u2019s action is shortsighted, which also constrains the adaptability to unknown opponents in complex tasks. In this paper, we propose opponent modeling based on subgoal inference, which infers the opponent\u2019s subgoals through historical trajectories. As subgoals are likely to be shared by different opponent policies, predicting subgoals can yield better generalization to unknown opponents. Additionally, we design two subgoal selection modes for cooperative games and general-sum games respectively. Empirically, we show that our method achieves more effective adaptation than existing methods in a variety of tasks.", "keywords": "multi-agent;deep reinforment learning", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/2af7477a77b3d15406fa3becc1fa9b6cbb21f465.zip", "author": "XiaoPeng Yu;Jiechuan Jiang;Zongqing Lu", "authorids": "~XiaoPeng_Yu1;~Jiechuan_Jiang1;~Zongqing_Lu2", "gender": ";;", "homepage": ";;", "dblp": ";220/4026;", "google_scholar": ";;", "orcid": ";;", "linkedin": ";;", "or_profile": "~XiaoPeng_Yu1;~Jiechuan_Jiang1;~Zongqing_Lu2", "aff": ";Tsinghua University;", "aff_domain": ";mail.tsinghua.edu.cn;", "position": ";Intern;", "bibtex": "@inproceedings{\nyu2024opponent,\ntitle={Opponent Modeling based on Subgoal Inference},\nauthor={XiaoPeng Yu and Jiechuan Jiang and Zongqing Lu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Lt6wO0oZ8k}\n}", "github": "", "reviewers": "fQcX;FFHk;gEZB;FFHE", "pdf_size": 1226712, "rating": "5;6;6;7", "confidence": "4;3;4;3", "soundness": "3;3;3;4", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "124;43;78;62", "wc_strengths": "97;24;88;114", "wc_weaknesses": "480;438;286;57", "wc_questions": "552;20;77;372", "wc_limitations": "1;77;29;1", "wc_review": "1254;602;558;606", "wc_reply_reviewers": "52;325;31;39", "wc_reply_authors": "2471;723;122;8", "reply_reviewers": "2;3;1;2", "reply_authors": "7;6;4;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 76.75, 29.961433543807612 ], "wc_strengths_avg": [ 80.75, 34.06886408438062 ], "wc_weaknesses_avg": [ 315.25, 165.64929067158724 ], "wc_questions_avg": [ 255.25, 217.2594934634618 ], "wc_limitations_avg": [ 27.0, 31.04834939252005 ], "wc_review_avg": [ 755.0, 288.7126599233224 ], "wc_reply_reviewers_avg": [ 111.75, 123.3478313550749 ], "wc_reply_authors_avg": [ 831.0, 985.0474607855197 ], "reply_reviewers_avg": [ 2.0, 0.7071067811865476 ], "reply_authors_avg": [ 4.75, 1.920286436967152 ], "replies_avg": [ 33, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.7071067811865476, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13465833812538168609&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": ";mail.tsinghua.edu.cn;", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "title": "Posture-Informed Muscular Force Learning for Robust Hand Pressure Estimation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95565", "id": "LtS7pP8rEn", "proceeding": "", "pdf": "https://openreview.net/pdf?id=LtS7pP8rEn", "openreview": "https://openreview.net/forum?id=LtS7pP8rEn", "poster": "/media/PosterPDFs/NeurIPS%202024/95565.png?t=1733737014.6601195", "project": "", "author_site": "Kyungjin Seo, Junghoon Seo, Hanseok Jeong, Sangpil Kim, Sang Ho Yoon", "tldr": "", "abstract": "We present PiMForce, a novel framework that enhances hand pressure estimation by leveraging 3D hand posture information to augment forearm surface electromyography (sEMG) signals. Our approach utilizes detailed spatial information from 3D hand poses in conjunction with dynamic muscle activity from sEMG to enable accurate and robust whole-hand pressure measurements under diverse hand-object interactions. We also developed a multimodal data collection system that combines a pressure glove, an sEMG armband, and a markerless finger-tracking module. We created a comprehensive dataset from 21 participants, capturing synchronized data of hand posture, sEMG signals, and exerted hand pressure across various hand postures and hand-object interaction scenarios using our collection system. Our framework enables precise hand pressure estimation in complex and natural interaction scenarios. Our approach substantially mitigates the limitations of traditional sEMG-based or vision-based methods by integrating 3D hand posture information with sEMG signals.\nVideo demos, data, and code are available online.", "keywords": "Hand Pressure Estimation;Multi-modality;Hand-Object Interaction", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Kyungjin Seo;Junghoon Seo;Hanseok Jeong;Sangpil Kim;Sang Ho Yoon", "authorids": "~Kyungjin_Seo1;~Junghoon_Seo1;~Hanseok_Jeong1;~Sangpil_Kim4;~Sang_Ho_Yoon1", "gender": "M;M;M;M;M", "homepage": ";https://mikigom.github.io;;https://kuaicv.com/;https://hcitech.org/", "dblp": ";211/7655;;182/2231;138/0107", "google_scholar": ";https://scholar.google.com/citations?hl=en;https://scholar.google.co.kr/citations?user=bpftXC8AAAAJ;mzH6yYgAAAAJ;ejaRQn8AAAAJ", "orcid": "0009-0004-8552-5988;;0000-0001-8357-3047;0000-0002-7349-0018;0000-0002-3780-5350", "linkedin": ";;jeong-hanseok-88b385152/;spkim921;", "or_profile": "~Kyungjin_Seo1;~Junghoon_Seo1;~Hanseok_Jeong1;~Sangpil_Kim4;~Sang_Ho_Yoon1", "aff": "Korea Advanced Institute of Science & Technology;SI Analytics;Korea Advanced Institute of Science & Technology;Korea University;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;si-analytics.ai;kaist.ac.kr;korea.ac.kr;kaist.ac.kr", "position": "MS student;Researcher;MS student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nseo2024postureinformed,\ntitle={Posture-Informed Muscular Force Learning for Robust Hand Pressure Estimation},\nauthor={Kyungjin Seo and Junghoon Seo and Hanseok Jeong and Sangpil Kim and Sang Ho Yoon},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=LtS7pP8rEn}\n}", "github": "", "reviewers": "3PTe;YYDP;nF88;saS6", "pdf_size": 42351806, "rating": "4;5;5;6", "confidence": "4;4;4;3", "soundness": "2;3;3;3", "novelty": "3;2;3;3", "presentation": "2;3;3;3", "wc_summary": "67;149;74;48", "wc_strengths": "37;62;81;27", "wc_weaknesses": "102;147;155;172", "wc_questions": "47;189;28;54", "wc_limitations": "2;7;1;8", "wc_review": "255;554;339;309", "wc_reply_reviewers": "0;73;103;11", "wc_reply_authors": "104;986;82;461", "reply_reviewers": "0;1;1;1", "reply_authors": "2;3;2;3", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 84.5, 38.43501008195523 ], "wc_strengths_avg": [ 51.75, 21.158627082114755 ], "wc_weaknesses_avg": [ 144.0, 25.874698065871222 ], "wc_questions_avg": [ 79.5, 63.93160407810835 ], "wc_limitations_avg": [ 4.5, 3.0413812651491097 ], "wc_review_avg": [ 364.25, 113.61200420730196 ], "wc_reply_reviewers_avg": [ 46.75, 42.769001625008734 ], "wc_reply_authors_avg": [ 408.25, 365.91827981121685 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:GvCXo2IUm9AJ:scholar.google.com/&scioq=Posture-Informed+Muscular+Force+Learning+for+Robust+Hand+Pressure+Estimation&hl=en&as_sdt=0,22", "gs_version_total": 4, "email": "kaist.ac.kr;si-analytics.ai;kaist.ac.kr;korea.ac.kr;kaist.ac.kr", "author_num": 5, "aff_unique_index": "0;1;0;2;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;SI Analytics;Korea University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.kaist.ac.kr;;https://www.korea.ac.kr", "aff_unique_abbr": "KAIST;;KU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea;" }, { "title": "Two-way Deconfounder for Off-policy Evaluation in Causal Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95564", "id": "Lu9Rasfmjj", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Lu9Rasfmjj", "openreview": "https://openreview.net/forum?id=Lu9Rasfmjj", "poster": "", "project": "", "author_site": "Shuguang Yu, Shuxing Fang, Ruixin Peng, Zhengling Qi, Fan Zhou, Chengchun Shi", "tldr": "", "abstract": "This paper studies off-policy evaluation (OPE) in the presence of unmeasured confounders. Inspired by the two-way fixed effects regression model widely used in the panel data literature, we propose a two-way unmeasured confounding assumption to model the system dynamics in causal reinforcement learning and develop a two-way deconfounder algorithm that devises a neural tensor network to simultaneously learn both the unmeasured confounders and the system dynamics, based on which a model-based estimator can be constructed for consistent policy value estimation. We illustrate the effectiveness of the proposed estimator through theoretical results and numerical experiments.", "keywords": "off-policy evaluation;unmeasured confounding;two-way deconfounder;neural tensor network", "primary_area": "causal_inference", "supplementary_material": "", "author": "Shuguang Yu;Shuxing Fang;Ruixin Peng;Zhengling Qi;Fan Zhou;Chengchun Shi", "authorids": "~Shuguang_Yu1;~Shuxing_Fang3;~Ruixin_Peng1;~Zhengling_Qi1;~Fan_Zhou7;~Chengchun_Shi1", "gender": "M;;M;;;M", "homepage": "https://www.linkedin.com/in/%E6%9B%99%E5%85%89-%E4%BA%8E-6564162b2/;;;https://sites.google.com/view/statsqizl/home?authuser=0;;https://callmespring.github.io/", "dblp": ";;;173/0201;;", "google_scholar": ";;;;4QJkjl0AAAAJ;dDGy3N0AAAAJ", "orcid": ";;;;;", "linkedin": ";%E6%95%B0%E7%BA%BF-%E6%96%B9-b70898275/;\u9510\u65b0-\u5f6d-95a4182b2;;;", "or_profile": "~Shuguang_Yu1;~Shuxing_Fang3;~Ruixin_Peng1;~Zhengling_Qi1;~Fan_Zhou7;~Chengchun_Shi1", "aff": "Shanghai University of Finance and Economics;Shanghai University of Finance and Economics;Shanghai University of Finance and Economics;George Washington University;Shanghai University of Finance and Economics;London School of Economics", "aff_domain": "sufe.edu.cn;sufe.edu.cn;sufe.edu.cn;gwu.edu;shufe.edu;lse.ac.uk", "position": "PhD student;MS student;MS student;Assistant Professor;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nyu2024twoway,\ntitle={Two-way Deconfounder for Off-policy Evaluation in Causal Reinforcement Learning},\nauthor={Shuguang Yu and Shuxing Fang and Ruixin Peng and Zhengling Qi and Fan Zhou and Chengchun Shi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Lu9Rasfmjj}\n}", "github": "", "reviewers": "P4Ev;wVkG;GM6x;7kZ2", "pdf_size": 796759, "rating": "4;6;7;7", "confidence": "3;4;3;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "2;2;4;3", "wc_summary": "113;26;186;56", "wc_strengths": "77;33;114;111", "wc_weaknesses": "286;162;170;67", "wc_questions": "1;18;279;120", "wc_limitations": "1;13;1;12", "wc_review": "478;252;750;366", "wc_reply_reviewers": "100;26;236;78", "wc_reply_authors": "687;26;163;30", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 95.25, 61.00563498563063 ], "wc_strengths_avg": [ 83.75, 32.706077416896086 ], "wc_weaknesses_avg": [ 171.25, 77.65750124746482 ], "wc_questions_avg": [ 104.5, 110.54976255062695 ], "wc_limitations_avg": [ 6.75, 5.7608593109014565 ], "wc_review_avg": [ 461.5, 184.7396817145683 ], "wc_reply_reviewers_avg": [ 110.0, 77.54998388136518 ], "wc_reply_authors_avg": [ 226.5, 271.5257814646705 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.40824829046386296, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:nYMimHslLDYJ:scholar.google.com/&scioq=Two-way+Deconfounder+for+Off-policy+Evaluation+in+Causal+Reinforcement+Learning&hl=en&as_sdt=0,5", "gs_version_total": 5, "email": "sufe.edu.cn;sufe.edu.cn;sufe.edu.cn;gwu.edu;shufe.edu;lse.ac.uk", "author_num": 6, "aff_unique_index": "0;0;0;1;0;2", "aff_unique_norm": "Shanghai University of Finance and Economics;George Washington University;London School of Economics", "aff_unique_dep": ";;", "aff_unique_url": "http://www.sufe.edu.cn;https://www.gwu.edu;https://www.lse.ac.uk", "aff_unique_abbr": "SUFE;GWU;LSE", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;2", "aff_country_unique": "China;United States;United Kingdom" }, { "title": "Chain of Agents: Large Language Models Collaborating on Long-Context Tasks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95563", "id": "LuCLf4BJsr", "proceeding": "", "pdf": "https://openreview.net/pdf?id=LuCLf4BJsr", "openreview": "https://openreview.net/forum?id=LuCLf4BJsr", "poster": "", "project": "", "author_site": "Yusen Zhang, Ruoxi Sun, Yanfei Chen, Tomas Pfister, Rui Zhang, Sercan Arik", "tldr": "", "abstract": "Addressing the challenge of effectively processing long contexts has become a critical issue for Large Language Models (LLMs). Two common strategies have emerged: 1) reducing the input length, such as retrieving relevant chunks by Retrieval-Augmented Generation (RAG), and 2) expanding the context window limit of LLMs. However, both strategies have drawbacks: input reduction has no guarantee of covering the part with needed information, while window extension struggles with focusing on the pertinent information for solving the task. To mitigate these limitations, we propose Chain-of-Agents (CoA), a novel framework that harnesses multi-agent collaboration through natural language to enable information aggregation and context reasoning across various LLMs over long-context tasks. CoA consists of multiple worker agents who sequentially communicate to handle different segmented portions of the text, followed by a manager agent who synthesizes these contributions into a coherent final output. CoA processes the entire input by interleaving reading and reasoning, and it mitigates long context focus issues by assigning each agent a short context. We perform a comprehensive evaluation of CoA on a wide range of long-context tasks in question answering, summarization, and code completion, demonstrating significant improvements by up to 10% over strong baselines of RAG, Full-Context, and multi-agent LLMs.", "keywords": "Large Language Models;Long Context Tasks;Multi-agent Collaboration;LLM Agents", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Yusen Zhang;Ruoxi Sun;Yanfei Chen;Tomas Pfister;Rui Zhang;Sercan O Arik", "authorids": "~Yusen_Zhang1;~Ruoxi_Sun2;~Yanfei_Chen1;~Tomas_Pfister1;~Rui_Zhang7;~Sercan_O_Arik1", "gender": "M;F;M;M;M;M", "homepage": "https://www.yuszh.com;;https://sites.google.com/site/yanfeichen1990;http://tomas.pfister.fi;https://ryanzhumich.github.io/;https://www.sercanarik.com/", "dblp": "38/10863-1.html;72/7683;58/8788;14/8360;60/2536-37;", "google_scholar": "FGyMx88AAAAJ;ut1-7LAAAAAJ;qyua6O4AAAAJ;ahSpJOAAAAAJ;nhuB5CEAAAAJ;", "orcid": ";;;0009-0004-4088-8718;;0000-0001-6333-1729", "linkedin": ";;;;;", "or_profile": "~Yusen_Zhang1;~Ruoxi_Sun2;~Yanfei_Chen1;~Tomas_Pfister1;~Rui_Zhang7;~Sercan_O_Arik1", "aff": "Pennsylvania State University;Google;Google;Google;Pennsylvania State University;Google", "aff_domain": "psu.edu;google.com;google.com;google.com;psu.edu;google.com", "position": "PhD student;Google;Software Engineer;Head of Research @ Cloud AI;Assistant Professor;Research Scientist", "bibtex": "@inproceedings{\nzhang2024chain,\ntitle={Chain of Agents: Large Language Models Collaborating on Long-Context Tasks},\nauthor={Yusen Zhang and Ruoxi Sun and Yanfei Chen and Tomas Pfister and Rui Zhang and Sercan O Arik},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=LuCLf4BJsr}\n}", "github": "", "reviewers": "KMmx;FfHC;vGWD;FiRU", "pdf_size": 2683436, "rating": "5;5;5;7", "confidence": "5;4;3;4", "soundness": "2;2;3;3", "novelty": "2;2;3;3", "presentation": "3;2;3;3", "wc_summary": "95;111;119;76", "wc_strengths": "35;98;87;41", "wc_weaknesses": "292;55;119;225", "wc_questions": "137;95;42;3", "wc_limitations": "6;1;3;15", "wc_review": "565;360;370;360", "wc_reply_reviewers": "475;23;0;0", "wc_reply_authors": "1512;111;75;75", "reply_reviewers": "2;1;0;0", "reply_authors": "4;3;2;2", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 100.25, 16.452583383772897 ], "wc_strengths_avg": [ 65.25, 27.60774347895894 ], "wc_weaknesses_avg": [ 172.75, 91.79426725019378 ], "wc_questions_avg": [ 69.25, 50.95279678290486 ], "wc_limitations_avg": [ 6.25, 5.356071321407137 ], "wc_review_avg": [ 413.75, 87.41960592452931 ], "wc_reply_reviewers_avg": [ 124.5, 202.57899693699738 ], "wc_reply_authors_avg": [ 443.25, 617.2181036716275 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10102257986977578858&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "psu.edu;google.com;google.com;google.com;psu.edu;google.com", "author_num": 6, "aff_unique_index": "0;1;1;1;0;1", "aff_unique_norm": "Pennsylvania State University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.psu.edu;https://www.google.com", "aff_unique_abbr": "PSU;Google", "aff_campus_unique_index": "1;1;1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Are Your Models Still Fair? Fairness Attacks on Graph Neural Networks via Node Injections", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95562", "id": "LuqrIkGuru", "proceeding": "", "pdf": "https://openreview.net/pdf?id=LuqrIkGuru", "openreview": "https://openreview.net/forum?id=LuqrIkGuru", "poster": "/media/PosterPDFs/NeurIPS%202024/95562.png?t=1733134816.4040267", "project": "", "author_site": "Zihan Luo, Hong Huang, Yongkang Zhou, Jiping Zhang, Nuo Chen, Hai Jin", "tldr": "", "abstract": "Despite the remarkable capabilities demonstrated by Graph Neural Networks (GNNs) in graph-related tasks, recent research has revealed the fairness vulnerabilities in GNNs when facing malicious adversarial attacks. However, all existing fairness attacks require manipulating the connectivity between existing nodes, which may be prohibited in reality. To this end, we introduce a Node Injection-based Fairness Attack (NIFA), exploring the vulnerabilities of GNN fairness in such a more realistic setting. In detail, NIFA first designs two insightful principles for node injection operations, namely the uncertainty-maximization principle and homophily-increase principle, and then optimizes injected nodes\u2019 feature matrix to further ensure the effectiveness of fairness attacks. Comprehensive experiments on three real-world datasets consistently demonstrate that NIFA can significantly undermine the fairness of mainstream GNNs, even including fairness-aware GNNs, by injecting merely 1% of nodes. We sincerely hope that our work can stimulate increasing attention from researchers on the vulnerability of GNN fairness, and encourage the development of corresponding defense mechanisms. Our code and data are released at: https://github.com/CGCL-codes/NIFA.", "keywords": "Fairness;Graph Neural Network;Attack", "primary_area": "graph_neural_networks", "supplementary_material": "/attachment/0c41347633312265e788654aab089f2349d90bf1.zip", "author": "Zihan Luo;Hong Huang;Yongkang Zhou;Jiping Zhang;Nuo Chen;Hai Jin", "authorids": "~Zihan_Luo2;~Hong_Huang2;~Yongkang_Zhou1;~Jiping_Zhang2;~Nuo_Chen12;~Hai_Jin1", "gender": "M;;M;M;F;M", "homepage": "https://luozhhh.github.io/;http://faculty.hust.edu.cn/honghuang/en/index.htm;https://github.com/KonkonYoumu;https://github.com/Zjiping;https://github.com/lvelf;http://www.linkedin.com/in/jinhust", "dblp": "167/1837-1;74/3859-1;;;;98/4156", "google_scholar": "JWUJkawAAAAJ;-3d0B50AAAAJ;;;;", "orcid": "0000-0002-7142-448X;0000-0002-5282-551X;;;;0000-0002-3934-7605", "linkedin": ";;;;;jinhust", "or_profile": "~Zihan_Luo2;~Hong_Huang2;~Yongkang_Zhou1;~Jiping_Zhang2;~Nuo_Chen12;~Hai_Jin1", "aff": "Huazhong University of Science and Technology;Huazhong University of Science and Technology;Huazhong University of Science and Technology;Huazhong University of Science and Technology;Huazhong University of Science and Technology;Huazhong University of Science and Technology", "aff_domain": "hust.edu.cn;hust.edu.cn;hust.edu.cn;hust.edu.cn;hust.edu.cn;hust.edu.cn", "position": "PhD student;Associate Professor;Undergrad student;Undergrad student;Undergrad student;Full Professor", "bibtex": "@inproceedings{\nluo2024are,\ntitle={Are Your Models Still Fair? Fairness Attacks on Graph Neural Networks via Node Injections},\nauthor={Zihan Luo and Hong Huang and Yongkang Zhou and Jiping Zhang and Nuo Chen and Hai Jin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=LuqrIkGuru}\n}", "github": "", "reviewers": "U6JX;wXHX;ZRPp;L3XD", "pdf_size": 3118804, "rating": "4;5;6;7", "confidence": "5;5;4;4", "soundness": "2;3;3;3", "novelty": "1;2;2;3", "presentation": "3;3;3;4", "wc_summary": "83;34;84;64", "wc_strengths": "37;57;34;61", "wc_weaknesses": "545;141;107;102", "wc_questions": "6;175;4;4", "wc_limitations": "10;65;17;7", "wc_review": "681;472;246;238", "wc_reply_reviewers": "18;79;55;17", "wc_reply_authors": "9;11;40;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 66.25, 20.253086184579377 ], "wc_strengths_avg": [ 47.25, 11.882234638316145 ], "wc_weaknesses_avg": [ 223.75, 186.07978799429023 ], "wc_questions_avg": [ 47.25, 73.7610161264065 ], "wc_limitations_avg": [ 24.75, 23.519938350259338 ], "wc_review_avg": [ 409.25, 182.86795099196578 ], "wc_reply_reviewers_avg": [ 42.25, 26.166533969939543 ], "wc_reply_authors_avg": [ 15.0, 15.016657417681207 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.8944271909999159, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2526414852197396141&as_sdt=5,24&sciodt=0,24&hl=en", "gs_version_total": 6, "email": "hust.edu.cn;hust.edu.cn;hust.edu.cn;hust.edu.cn;hust.edu.cn;hust.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Huazhong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.hust.edu.cn", "aff_unique_abbr": "HUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Can Graph Neural Networks Expose Training Data Properties? An Efficient Risk Assessment Approach", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95561", "id": "Luxk3z1tSG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Luxk3z1tSG", "openreview": "https://openreview.net/forum?id=Luxk3z1tSG", "poster": "", "project": "", "author_site": "Hanyang Yuan, Jiarong Xu, Renhong Huang, Mingli Song, Chunping Wang, YANG YANG", "tldr": "", "abstract": "Graph neural networks (GNNs) have attracted considerable attention due to their diverse applications. However, the scarcity and quality limitations of graph data present challenges to their training process in practical settings. To facilitate the development of effective GNNs, companies and researchers often seek external collaboration. Yet, directly sharing data raises privacy concerns, motivating data owners to train GNNs on their private graphs and share the trained models. Unfortunately, these models may still inadvertently disclose sensitive properties of their training graphs (\\textit{e.g.}, average default rate in a transaction network), leading to severe consequences for data owners. \nIn this work, we study graph property inference attack to identify the risk of sensitive property information leakage from shared models.\nExisting approaches typically train numerous shadow models for developing such attack, which is computationally intensive and impractical. To address this issue, we propose an efficient graph property inference attack by leveraging model approximation techniques. Our method only requires training a small set of models on graphs, while generating a sufficient number of approximated shadow models for attacks.\nTo enhance diversity while reducing errors in the approximated models, we apply edit distance to quantify the diversity within a group of approximated models and introduce a theoretically guaranteed criterion to evaluate each model's error. Subsequently, we propose a novel selection mechanism to ensure that the retained approximated models achieve high diversity and low error.\nExtensive experiments across six real-world scenarios demonstrate our method's substantial improvement, with average increases of 2.7\\% in attack accuracy and 4.1\\% in ROC-AUC, while being 6.5$\\times$ faster compared to the best baseline.", "keywords": "graph neural network;property inference attack;efficiency", "primary_area": "privacy", "supplementary_material": "", "author": "Hanyang Yuan;Jiarong Xu;Renhong Huang;Mingli Song;Chunping Wang;Yang Yang", "authorids": "~Hanyang_Yuan1;~Jiarong_Xu2;~Renhong_Huang1;~Mingli_Song1;~Chunping_Wang1;~Yang_Yang35", "gender": "M;F;M;M;F;M", "homepage": "https://github.com/xxx08796;https://galina0217.github.io/;https://github.com/renH2;https://person.zju.edu.cn/msong;;http://yangy.org", "dblp": "371/9445;;325/0914;71/5333;54/2715-1;", "google_scholar": ";;;7oLbhAwAAAAJ;Rmy5RogAAAAJ;", "orcid": ";0000-0003-2973-1889;0000-0002-7808-9768;0000-0003-2621-6048;0000-0003-1854-8667;0000-0002-5058-4417", "linkedin": ";;;;https://linkedin.com/in/chunping-wang-7b94a15/;", "or_profile": "~Hanyang_Yuan1;~Jiarong_Xu2;~Renhong_Huang1;~Mingli_Song1;~Chunping_Wang1;~Yang_Yang35", "aff": "Zhejiang University;Fudan University;Zhejiang University;Zhejiang University;Finvolution Group;Zhejiang University", "aff_domain": "zju.edu.cn;fudan.edu.cn;zju.edu.cn;zju.edu.cn;xinye.com;zju.edu.cn", "position": "PhD student;Assistant Professor;MS student;Full Professor;Principal Scientist;Associate Professor", "bibtex": "@inproceedings{\nyuan2024can,\ntitle={Can Graph Neural Networks Expose Training Data Properties? An Efficient Risk Assessment Approach},\nauthor={Hanyang Yuan and Jiarong Xu and Renhong Huang and Mingli Song and Chunping Wang and Yang Yang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Luxk3z1tSG}\n}", "github": "", "reviewers": "xZDY;j8ts;5YZA", "pdf_size": 646170, "rating": "5;5;6", "confidence": "4;3;4", "soundness": "2;3;3", "novelty": "2;2;3", "presentation": "4;2;3", "wc_summary": "64;75;88", "wc_strengths": "55;43;13", "wc_weaknesses": "187;62;11", "wc_questions": "37;5;288", "wc_limitations": "13;2;2", "wc_review": "356;187;402", "wc_reply_reviewers": "270;14;0", "wc_reply_authors": "596;74;100", "reply_reviewers": "3;1;0", "reply_authors": "5;3;3", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 75.66666666666667, 9.809292646374775 ], "wc_strengths_avg": [ 37.0, 17.663521732655695 ], "wc_weaknesses_avg": [ 86.66666666666667, 73.93841281018203 ], "wc_questions_avg": [ 110.0, 126.54116589737376 ], "wc_limitations_avg": [ 5.666666666666667, 5.185449728701348 ], "wc_review_avg": [ 315.0, 92.43736618200816 ], "wc_reply_reviewers_avg": [ 94.66666666666667, 124.11106137470405 ], "wc_reply_authors_avg": [ 256.6666666666667, 240.17956245738773 ], "reply_reviewers_avg": [ 1.3333333333333333, 1.247219128924647 ], "reply_authors_avg": [ 3.6666666666666665, 0.9428090415820634 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3190012241080691392&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "zju.edu.cn;fudan.edu.cn;zju.edu.cn;zju.edu.cn;xinye.com;zju.edu.cn", "author_num": 6, "aff_unique_index": "0;1;0;0;2;0", "aff_unique_norm": "Zhejiang University;Fudan University;FinVolution Group", "aff_unique_dep": ";;", "aff_unique_url": "https://www.zju.edu.cn;https://www.fudan.edu.cn;https://www.finvolutiongroup.com", "aff_unique_abbr": "ZJU;Fudan;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "The Surprising Ineffectiveness of Pre-Trained Visual Representations for Model-Based Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95560", "id": "LvAy07mCxU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=LvAy07mCxU", "openreview": "https://openreview.net/forum?id=LvAy07mCxU", "poster": "", "project": "", "author_site": "Moritz Schneider, Robert Krug, Narunas Vaskevicius, Luigi Palmieri, Joschka Boedecker", "tldr": "", "abstract": "Visual Reinforcement Learning (RL) methods often require extensive amounts of data. As opposed to model-free RL, model-based RL (MBRL) offers a potential solution with efficient data utilization through planning. Additionally, RL lacks generalization capabilities for real-world tasks. Prior work has shown that incorporating pre-trained visual representations (PVRs) enhances sample efficiency and generalization. While PVRs have been extensively studied in the context of model-free RL, their potential in MBRL remains largely unexplored. In this paper, we benchmark a set of PVRs on challenging control tasks in a model-based RL setting. We investigate the data efficiency, generalization capabilities, and the impact of different properties of PVRs on the performance of model-based agents. Our results, perhaps surprisingly, reveal that for MBRL current PVRs are not more sample efficient than learning representations from scratch, and that they do not generalize better to out-of-distribution (OOD) settings. To explain this, we analyze the quality of the trained dynamics model. Furthermore, we show that data diversity and network architecture are the most important contributors to OOD generalization performance.", "keywords": "reinforcement learning;rl;model-based reinforcement learning;representation learning;pvr;visual representations", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Moritz Schneider;Robert Krug;Narunas Vaskevicius;Luigi Palmieri;Joschka Boedecker", "authorids": "~Moritz_Schneider1;~Robert_Krug1;~Narunas_Vaskevicius2;~Luigi_Palmieri1;~Joschka_Boedecker1", "gender": "M;M;M;M;M", "homepage": "https://www.schneimo.com;;;https://palmieri.github.io/;https://nr.informatik.uni-freiburg.de", "dblp": ";77/9125-3;60/3997;153/7592;84/5457", "google_scholar": "Sl-SkEAAAAAJ;https://scholar.google.com/citations?hl=de;https://scholar.google.com/citations?hl=en;L-3aQNcAAAAJ;https://scholar.google.de/citations?user=2mv2dDkAAAAJ", "orcid": ";;;0000-0002-4908-5434;", "linkedin": "moritztaylor/;robert-krug-74ab62164/;;lpalmieri/;", "or_profile": "~Moritz_Schneider1;~Robert_Krug1;~Narunas_Vaskevicius2;~Luigi_Palmieri1;~Joschka_B\u00f6decker1", "aff": "Bosch Corporate Research;Bosch Corporate Research;Robert Bosch GmbH, Bosch;Bosch;Albert-Ludwigs-Universit\u00e4t Freiburg", "aff_domain": "bosch.com;bosch.com;de.bosch.com;bosch.com;uni-freiburg.de", "position": "PhD student;Researcher;Research Scientist;Group Leader;Full Professor", "bibtex": "@inproceedings{\nschneider2024the,\ntitle={The Surprising Ineffectiveness of Pre-Trained Visual Representations for Model-Based Reinforcement Learning},\nauthor={Moritz Schneider and Robert Krug and Narunas Vaskevicius and Luigi Palmieri and Joschka Boedecker},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=LvAy07mCxU}\n}", "github": "", "reviewers": "32eA;HgP2;DQ5X;mY2R", "pdf_size": 3261744, "rating": "5;5;6;6", "confidence": "4;3;4;4", "soundness": "3;2;3;3", "novelty": "2;2;3;3", "presentation": "3;2;3;3", "wc_summary": "109;42;112;87", "wc_strengths": "159;48;208;72", "wc_weaknesses": "127;65;350;83", "wc_questions": "257;117;42;70", "wc_limitations": "9;7;41;122", "wc_review": "661;279;753;434", "wc_reply_reviewers": "136;82;115;40", "wc_reply_authors": "200;147;303;43", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 87.5, 27.98660393831306 ], "wc_strengths_avg": [ 121.75, 64.69302512636119 ], "wc_weaknesses_avg": [ 156.25, 114.1126088563398 ], "wc_questions_avg": [ 121.5, 82.69371197376498 ], "wc_limitations_avg": [ 44.75, 46.59600304747179 ], "wc_review_avg": [ 531.75, 186.47704282297056 ], "wc_reply_reviewers_avg": [ 93.25, 36.27240686803124 ], "wc_reply_authors_avg": [ 173.25, 93.81464437922259 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15179676831032486165&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "bosch.com;bosch.com;de.bosch.com;bosch.com;uni-freiburg.de", "author_num": 5, "aff_unique_index": "0;0;1;1;2", "aff_unique_norm": "Bosch;Robert Bosch GmbH;Albert-Ludwigs-Universit\u00e4t Freiburg", "aff_unique_dep": "Corporate Research;;", "aff_unique_url": "https://research.bosch.com;https://www.bosch.com;https://www.uni-freiburg.de", "aff_unique_abbr": "Bosch;Bosch;Albert-Ludwigs-Universit\u00e4t", "aff_campus_unique_index": "1", "aff_campus_unique": ";Freiburg", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Germany" }, { "title": "Demystify Mamba in Vision: A Linear Attention Perspective", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95559", "id": "LvJ1R88KAk", "proceeding": "", "pdf": "https://openreview.net/pdf?id=LvJ1R88KAk", "openreview": "https://openreview.net/forum?id=LvJ1R88KAk", "poster": "/media/PosterPDFs/NeurIPS%202024/95559.png?t=1731409654.8464823", "project": "", "author_site": "Dongchen Han, Ziyi Wang, Zhuofan Xia, Yizeng Han, Yifan Pu, Chunjiang Ge, Jun Song, Shiji Song, Bo Zheng, Gao Huang", "tldr": "", "abstract": "Mamba is an effective state space model with linear computation complexity. It has recently shown impressive efficiency in dealing with high-resolution inputs across various vision tasks. In this paper, we reveal that the powerful Mamba model shares surprising similarities with linear attention Transformer, which typically underperform conventional Transformer in practice. By exploring the similarities and disparities between the effective Mamba and subpar linear attention Transformer, we provide comprehensive analyses to demystify the key factors behind Mamba\u2019s success. Specifically, we reformulate the selective state space model and linear attention within a unified formulation, rephrasing Mamba as a variant of linear attention Transformer with six major distinctions: input gate, forget gate, shortcut, no attention normalization, single-head, and modified block design. For each design, we meticulously analyze its pros and cons, and empirically evaluate its impact on model performance in vision tasks. Interestingly, the results highlight the forget gate and block design as the core contributors to Mamba\u2019s success, while the other four designs are less crucial. Based on these findings, we propose a Mamba-\nInspired Linear Attention (MILA) model by incorporating the merits of these two key designs into linear attention. The resulting model outperforms various vision Mamba models in both image classification and high-resolution dense prediction tasks, while enjoying parallelizable computation and fast inference speed. Code is available at https://github.com/LeapLabTHU/MLLA.", "keywords": "Mamba;Linear Attention Transformer", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Dongchen Han;Ziyi Wang;Zhuofan Xia;Yizeng Han;Yifan Pu;Chunjiang Ge;Jun Song;Shiji Song;Bo Zheng;Gao Huang", "authorids": "~Dongchen_Han1;~Ziyi_Wang21;~Zhuofan_Xia2;~Yizeng_Han1;~Yifan_Pu1;~Chunjiang_Ge1;~Jun_Song5;~Shiji_Song1;~Bo_Zheng5;~Gao_Huang1", "gender": "M;F;;M;M;M;;M;M;M", "homepage": "https://github.com/tian-qing001;https://wzy526.github.io/;;https://yizenghan.top/;https://yifanpu001.github.io/;https://john-ge.github.io/;;;;http://www.gaohuang.net", "dblp": ";;;217/9548;222/2710;307/5106;;72/5351;33/1610-7;", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;fk-PUq4AAAAJ;;25mubAsAAAAJ;oM9rnYQAAAAJ;ZueTF6oAAAAJ;;;3gHhO9QAAAAJ;-P9LwcgAAAAJ", "orcid": ";;;;0000-0002-0404-1737;0009-0008-4276-3133;;;0000-0002-4037-6315;", "linkedin": ";ziyi-wang0526;;;;;;;bo-zheng-0315254/;", "or_profile": "~Dongchen_Han1;~Ziyi_Wang21;~Zhuofan_Xia2;~Yizeng_Han1;~Yifan_Pu1;~Chunjiang_Ge1;~Jun_Song5;~Shiji_Song1;~Bo_Zheng5;~Gao_Huang1", "aff": "Tsinghua University;Tsinghua University;;Tsinghua University;Tsinghua University;Control science and technology, Tsinghua University, Tsinghua University;Alibaba Group;Tsinghua University;Alibaba Group;Tsinghua University", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;;tsinghua.edu.cn;tsinghua.edu.cn;mails.tsinghua.edu.cn;alibaba-inc.com;mail.tsinghua.edu.cn;alibaba-inc.com;tsinghua.edu.cn", "position": "PhD student;Intern;;PhD student;PhD student;PhD student;Researcher;Full Professor;Principal Researcher;Associate Professor", "bibtex": "@inproceedings{\nhan2024demystify,\ntitle={Demystify Mamba in Vision: A Linear Attention Perspective},\nauthor={Dongchen Han and Ziyi Wang and Zhuofan Xia and Yizeng Han and Yifan Pu and Chunjiang Ge and Jun Song and Shiji Song and Bo Zheng and Gao Huang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=LvJ1R88KAk}\n}", "github": "", "reviewers": "ii6x;XBcf;dwbt;QFdu", "pdf_size": 872711, "rating": "6;7;7;7", "confidence": "4;5;3;5", "soundness": "3;3;4;3", "novelty": "2;3;3;3", "presentation": "3;2;4;3", "wc_summary": "77;48;106;160", "wc_strengths": "20;71;75;157", "wc_weaknesses": "105;345;3;135", "wc_questions": "41;5;56;41", "wc_limitations": "6;5;14;1", "wc_review": "249;474;254;494", "wc_reply_reviewers": "31;416;13;0", "wc_reply_authors": "9;1295;43;43", "reply_reviewers": "1;3;1;0", "reply_authors": "2;3;2;2", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 97.75, 41.37858745776612 ], "wc_strengths_avg": [ 80.75, 49.07328703072579 ], "wc_weaknesses_avg": [ 147.0, 124.34629065637624 ], "wc_questions_avg": [ 35.75, 18.779976038323372 ], "wc_limitations_avg": [ 6.5, 4.716990566028302 ], "wc_review_avg": [ 367.75, 116.4782705057042 ], "wc_reply_reviewers_avg": [ 115.0, 174.13069804029385 ], "wc_reply_authors_avg": [ 347.5, 547.2154511707432 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 65, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14208795727448439065&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "tsinghua.edu.cn;tsinghua.edu.cn;;tsinghua.edu.cn;tsinghua.edu.cn;mails.tsinghua.edu.cn;alibaba-inc.com;mail.tsinghua.edu.cn;alibaba-inc.com;tsinghua.edu.cn", "author_num": 10, "aff_unique_index": "0;0;0;0;0;1;0;1;0", "aff_unique_norm": "Tsinghua University;Alibaba Group", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.alibaba.com", "aff_unique_abbr": "THU;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "A Framework for Bilevel Optimization on Riemannian Manifolds", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95558", "id": "LvNDqNJKlD", "proceeding": "", "pdf": "https://openreview.net/pdf?id=LvNDqNJKlD", "openreview": "https://openreview.net/forum?id=LvNDqNJKlD", "poster": "", "project": "", "author_site": "Andi Han, Bamdev Mishra, Pratik Kumar Jawanpuria, Akiko Takeda", "tldr": "", "abstract": "Bilevel optimization has gained prominence in various applications. In this study, we introduce a framework for solving bilevel optimization problems, where the variables in both the lower and upper levels are constrained on Riemannian manifolds. We present several hypergradient estimation strategies on manifolds and analyze their estimation errors. Furthermore, we provide comprehensive convergence and complexity analyses for the proposed hypergradient descent algorithm on manifolds. We also extend our framework to encompass stochastic bilevel optimization and incorporate the use of general retraction. The efficacy of the proposed framework is demonstrated through several applications.", "keywords": "Bilevel optimization;Riemannian optimization;Riemannian hypergradient", "primary_area": "optimization_for_deep_networks", "supplementary_material": "/attachment/a13b976de287b46a95e536da5eb9987e6b410b92.zip", "author": "Andi Han;Bamdev Mishra;Pratik Jawanpuria;Akiko Takeda", "authorids": "~Andi_Han1;~Bamdev_Mishra1;~Pratik_Jawanpuria1;~Akiko_Takeda2", "gender": "M;;M;", "homepage": "https://github.com/andyjm3;https://bamdevmishra.in;https://pratikjawanpuria.com;http://www.or.mist.i.u-tokyo.ac.jp/takeda/index-e.html", "dblp": "268/7976.html;133/8291;32/9841;", "google_scholar": "AKHQHs0AAAAJ;https://scholar.google.co.in/citations?user=25IuNrMAAAAJ;_GUZDtMAAAAJ;", "orcid": "0000-0003-4655-655X;;;", "linkedin": ";;;", "or_profile": "~Andi_Han1;~Bamdev_Mishra1;~Pratik_Jawanpuria1;~Akiko_Takeda2", "aff": "RIKEN AIP;Microsoft;Microsoft;RIKEN", "aff_domain": "riken.jp;microsoft.com;microsoft.com;riken.jp", "position": "Postdoc;Applied Scientist;Principal Researcher;Team leader", "bibtex": "@inproceedings{\nhan2024a,\ntitle={A Framework for Bilevel Optimization on Riemannian Manifolds},\nauthor={Andi Han and Bamdev Mishra and Pratik Jawanpuria and Akiko Takeda},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=LvNDqNJKlD}\n}", "github": "", "reviewers": "Bnza;kqCt;CDbu;JBnb;4rfq", "pdf_size": 690066, "rating": "6;6;6;7;8", "confidence": "4;3;4;3;4", "soundness": "3;4;3;3;4", "novelty": "3;3;3;3;4", "presentation": "3;2;3;3;4", "wc_summary": "83;71;82;61;64", "wc_strengths": "75;49;81;53;46", "wc_weaknesses": "128;47;149;105;44", "wc_questions": "58;14;26;4;62", "wc_limitations": "1;24;5;4;10", "wc_review": "345;205;343;227;226", "wc_reply_reviewers": "17;15;12;111;451", "wc_reply_authors": "0;0;0;232;601", "reply_reviewers": "1;1;1;2;2", "reply_authors": "1;1;1;2;2", "rating_avg": [ 6.6, 0.8 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 72.2, 9.019977827023745 ], "wc_strengths_avg": [ 60.8, 14.344336861632886 ], "wc_weaknesses_avg": [ 94.6, 42.44808594035778 ], "wc_questions_avg": [ 32.8, 23.310083654933546 ], "wc_limitations_avg": [ 8.8, 8.133879763065101 ], "wc_review_avg": [ 269.2, 61.580516399263814 ], "wc_reply_reviewers_avg": [ 121.2, 169.07560439046196 ], "wc_reply_authors_avg": [ 166.6, 235.05199424808117 ], "reply_reviewers_avg": [ 1.4, 0.4898979485566356 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.1020620726159658, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6591694642085610273&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "riken.jp;microsoft.com;microsoft.com;riken.jp", "author_num": 4, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "RIKEN;Microsoft", "aff_unique_dep": "Advanced Institute for Computational Science;Microsoft Corporation", "aff_unique_url": "https://www.aip.riken.jp;https://www.microsoft.com", "aff_unique_abbr": "RIKEN AIP;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "Japan;United States" }, { "title": "Learning 1D Causal Visual Representation with De-focus Attention Networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95557", "id": "LxRmdXf72k", "proceeding": "", "pdf": "https://openreview.net/pdf?id=LxRmdXf72k", "openreview": "https://openreview.net/forum?id=LxRmdXf72k", "poster": "", "project": "", "author_site": "Tao Chenxin, Xizhou Zhu, Shiqian Su, Lewei Lu, Changyao Tian, Xuan Luo, Gao Huang, Hongsheng Li, Yu Qiao, Jie Zhou, Jifeng Dai", "tldr": "", "abstract": "Modality differences have led to the development of heterogeneous architectures for vision and language models. While images typically require 2D non-causal modeling, texts utilize 1D causal modeling. This distinction poses significant challenges in constructing unified multi-modal models. This paper explores the feasibility of representing images using 1D causal modeling. We identify an \"over-focus\" issue in existing 1D causal vision models, where attention overly concentrates on a small proportion of visual tokens. The issue of \"over-focus\" hinders the model's ability to extract diverse visual features and to receive effective gradients for optimization. To address this, we propose De-focus Attention Networks, which employ learnable bandpass filters to create varied attention patterns. During training, large and scheduled drop path rates, and an auxiliary loss on globally pooled features for global understanding tasks are introduced. These two strategies encourage the model to attend to a broader range of tokens and enhance network optimization. Extensive experiments validate the efficacy of our approach, demonstrating that 1D causal visual representation can perform comparably to 2D non-causal representation in tasks such as global perception, dense prediction, and multi-modal understanding. Code shall be released.", "keywords": "visual causal modeling; vision model; representation learning;", "primary_area": "machine_vision", "supplementary_material": "", "author": "Chenxin Tao;Xizhou Zhu;Shiqian Su;Lewei Lu;Changyao Tian;Xuan Luo;Gao Huang;Hongsheng Li;Yu Qiao;Jie Zhou;Jifeng Dai", "authorids": "~Chenxin_Tao2;~Xizhou_Zhu1;~Shiqian_Su1;~Lewei_Lu1;~Changyao_Tian1;~Xuan_Luo6;~Gao_Huang1;~Hongsheng_Li3;~Yu_Qiao1;~Jie_Zhou3;~Jifeng_Dai1", "gender": ";;M;M;M;F;M;M;;M;M", "homepage": ";;https://shiqian-su.github.io;;https://github.com/ChangyaoTian;https://drive.google.com/file/d/1gTHnHlvvqnxLNWbUr1OEHU2rU3aa74RC/view?usp=sharing;http://www.gaohuang.net;http://www.ee.cuhk.edu.hk/~hsli;;https://www.tsinghua.edu.cn/publish/auen/1713/2011/20110506105532098625469/20110506105532098625469_.html;https://jifengdai.org/", "dblp": ";170/1608;155/0896;247/6438;307/5415;;;27/7402-1;;00/5012-1;14/9399", "google_scholar": ";02RXI00AAAAJ;e5JcW4UAAAAJ;https://scholar.google.com.hk/citations?user=zdgKJXIAAAAJ;https://scholar.google.com/citations?view_op=list_works;;-P9LwcgAAAAJ;BN2Ze-QAAAAJ;;;SH_-B_AAAAAJ", "orcid": ";;;;0000-0002-3285-4671;;;;;;", "linkedin": ";;;lewei-lu-94015977/;;;;;;;", "or_profile": "~Chenxin_Tao2;~Xizhou_Zhu1;~Shiqian_Su1;~Lewei_Lu1;~Changyao_Tian1;~Xuan_Luo6;~Gao_Huang1;~Hongsheng_Li3;~Yu_Qiao1;~Jie_Zhou3;~Jifeng_Dai1", "aff": ";Tsinghua University;Tsinghua University;SenseTime;The Chinese University of Hong Kong, The Chinese University of Hong Kong;Tsinghua University;Tsinghua University;The Chinese University of Hong Kong;;Tsinghua University;Tsinghua University", "aff_domain": ";tsinghua.edu.cn;mail.tsinghua.edu.cn;sensetime.com;ee.cuhk.edu.hk;tsinghua.edu.cn;tsinghua.edu.cn;cuhk.edu.hk;;tsinghua.edu.cn;tsinghua.edu.cn", "position": ";Postdoc;Undergrad student;Researcher;PhD student;Undergrad student;Associate Professor;Associate Professor;;Full Professor;Associate Professor", "bibtex": "@inproceedings{\ntao2024learning,\ntitle={Learning 1D Causal Visual Representation with De-focus Attention Networks},\nauthor={Chenxin Tao and Xizhou Zhu and Shiqian Su and Lewei Lu and Changyao Tian and Xuan Luo and Gao Huang and Hongsheng Li and Yu Qiao and Jie Zhou and Jifeng Dai},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=LxRmdXf72k}\n}", "github": "", "reviewers": "J7C5;WFuU;4dxw;WAfJ", "pdf_size": 3739130, "rating": "4;4;6;7", "confidence": "4;2;4;4", "soundness": "3;2;3;4", "novelty": "3;3;3;3", "presentation": "4;3;3;4", "wc_summary": "68;69;132;123", "wc_strengths": "36;58;105;54", "wc_weaknesses": "77;39;71;79", "wc_questions": "73;6;8;2", "wc_limitations": "110;7;11;7", "wc_review": "364;179;327;265", "wc_reply_reviewers": "171;0;0;26", "wc_reply_authors": "599;0;0;0", "reply_reviewers": "2;0;0;1", "reply_authors": "3;1;1;1", "rating_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 98.0, 29.67322024991558 ], "wc_strengths_avg": [ 63.25, 25.488968201949643 ], "wc_weaknesses_avg": [ 66.5, 16.147755261955144 ], "wc_questions_avg": [ 22.25, 29.38005275693017 ], "wc_limitations_avg": [ 33.75, 44.053234841496035 ], "wc_review_avg": [ 283.75, 70.06202609117153 ], "wc_reply_reviewers_avg": [ 49.25, 71.08929244267381 ], "wc_reply_authors_avg": [ 149.75, 259.3746084334394 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.5555555555555555, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:jhJMRScwV6kJ:scholar.google.com/&scioq=Learning+1D+Causal+Visual+Representation+with+De-focus+Attention+Networks&hl=en&as_sdt=0,44", "gs_version_total": 4, "email": ";tsinghua.edu.cn;mail.tsinghua.edu.cn;sensetime.com;ee.cuhk.edu.hk;tsinghua.edu.cn;tsinghua.edu.cn;cuhk.edu.hk;;tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 11, "aff_unique_index": "0;0;1;2;0;0;2;0;0", "aff_unique_norm": "Tsinghua University;SenseTime;Chinese University of Hong Kong", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.sensetime.com;https://www.cuhk.edu.hk", "aff_unique_abbr": "THU;SenseTime;CUHK", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Paths to Equilibrium in Games", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95556", "id": "LxxIiInmuF", "proceeding": "", "pdf": "https://openreview.net/pdf?id=LxxIiInmuF", "openreview": "https://openreview.net/forum?id=LxxIiInmuF", "poster": "/media/PosterPDFs/NeurIPS%202024/95556.png?t=1733076724.0495224", "project": "", "author_site": "Bora Yongacoglu, Gurdal Arslan, Lacra Pavel, Serdar Yuksel", "tldr": "", "abstract": "In multi-agent reinforcement learning (MARL) and game theory, agents repeatedly interact and revise their strategies as new data arrives, producing a sequence of strategy profiles. This paper studies sequences of strategies satisfying a pairwise constraint inspired by policy updating in reinforcement learning, where an agent who is best responding in one period does not switch its strategy in the next period. This constraint merely requires that optimizing agents do not switch strategies, but does not constrain the non-optimizing agents in any way, and thus allows for exploration. Sequences with this property are called satisficing paths, and arise naturally in many MARL algorithms. A fundamental question about strategic dynamics is such: for a given game and initial strategy profile, is it always possible to construct a satisficing path that terminates at an equilibrium? The resolution of this question has implications about the capabilities or limitations of a class of MARL algorithms. We answer this question in the affirmative for normal-form games. Our analysis reveals a counterintuitive insight that suboptimal, and perhaps even reward deteriorating, strategic updates are key to driving play to equilibrium along a satisficing path.", "keywords": "game theory;multi-agent reinforcement learning;strategic dynamics", "primary_area": "algorithmic_game_theory", "supplementary_material": "", "author": "Bora Yongacoglu;Gurdal Arslan;Lacra Pavel;Serdar Yuksel", "authorids": "~Bora_Yongacoglu1;~Gurdal_Arslan1;~Lacra_Pavel1;~Serdar_Yuksel1", "gender": ";M;;", "homepage": ";https://www2.hawaii.edu/~gurdal/;;https://mast.queensu.ca/~yuksel/Research.html", "dblp": ";;;", "google_scholar": "SbLefpQAAAAJ;;;e02rxnYAAAAJ", "orcid": ";;;0000-0001-6099-5001", "linkedin": ";;;", "or_profile": "~Bora_Yongacoglu1;~Gurdal_Arslan1;~Lacra_Pavel1;~Serdar_Yuksel1", "aff": "University of Toronto;University of Hawaii at Manoa;;Queen's University", "aff_domain": "utoronto.ca;manoa.hawaii.edu;;queensu.ca", "position": "Postdoc;Assistant Professor;;Full Professor", "bibtex": "@inproceedings{\nyongacoglu2024paths,\ntitle={Paths to Equilibrium in Games},\nauthor={Bora Yongacoglu and Gurdal Arslan and Lacra Pavel and Serdar Yuksel},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=LxxIiInmuF}\n}", "github": "", "reviewers": "HTV5;1c5w;sDku;a3ET", "pdf_size": 434374, "rating": "6;6;6;6", "confidence": "4;2;4;2", "soundness": "3;4;4;3", "novelty": "4;3;3;3", "presentation": "4;3;4;2", "wc_summary": "100;103;38;118", "wc_strengths": "91;138;36;71", "wc_weaknesses": "125;127;251;20", "wc_questions": "48;105;7;43", "wc_limitations": "1;5;1;3", "wc_review": "365;478;333;255", "wc_reply_reviewers": "5;10;11;9", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.0, 1.0 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 89.75, 30.646166155002163 ], "wc_strengths_avg": [ 84.0, 36.871398129173244 ], "wc_weaknesses_avg": [ 130.75, 81.81190316818207 ], "wc_questions_avg": [ 50.75, 35.088281519618484 ], "wc_limitations_avg": [ 2.5, 1.6583123951777 ], "wc_review_avg": [ 357.75, 80.12919255801846 ], "wc_reply_reviewers_avg": [ 8.75, 2.277608394786075 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13353011888281646476&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "utoronto.ca;manoa.hawaii.edu;;queensu.ca", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Toronto;University of Hawaii;Queen's University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.utoronto.ca;https://www.hawaii.edu;https://www.queensu.ca", "aff_unique_abbr": "U of T;UH;Queen's", "aff_campus_unique_index": "1", "aff_campus_unique": ";Manoa", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Canada;United States" }, { "title": "PEAC: Unsupervised Pre-training for Cross-Embodiment Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95555", "id": "LyAFfdx8YF", "proceeding": "", "pdf": "https://openreview.net/pdf?id=LyAFfdx8YF", "openreview": "https://openreview.net/forum?id=LyAFfdx8YF", "poster": "/media/PosterPDFs/NeurIPS%202024/95555.png?t=1729777253.9994962", "project": "", "author_site": "Chengyang Ying, Hao Zhongkai, Xinning Zhou, Xuezhou Xu, Hang Su, Xingxing Zhang, Jun Zhu", "tldr": "", "abstract": "Designing generalizable agents capable of adapting to diverse embodiments has achieved significant attention in Reinforcement Learning (RL), which is critical for deploying RL agents in various real-world applications. Previous Cross-Embodiment RL approaches have focused on transferring knowledge across embodiments within specific tasks. These methods often result in knowledge tightly coupled with those tasks and fail to adequately capture the distinct characteristics of different embodiments. To address this limitation, we introduce the notion of Cross-Embodiment Unsupervised RL (CEURL), which leverages unsupervised learning to enable agents to acquire embodiment-aware and task-agnostic knowledge through online interactions within reward-free environments. We formulate CEURL as a novel Controlled Embodiment Markov Decision Process (CE-MDP) and systematically analyze CEURL's pre-training objectives under CE-MDP. Based on these analyses, we develop a novel algorithm Pre-trained Embodiment-Aware Control (PEAC) for handling CEURL, incorporating an intrinsic reward function specifically designed for cross-embodiment pre-training. PEAC not only provides an intuitive optimization strategy for cross-embodiment pre-training but also can integrate flexibly with existing unsupervised RL methods, facilitating cross-embodiment exploration and skill discovery. Extensive experiments in both simulated (e.g., DMC and Robosuite) and real-world environments (e.g., legged locomotion) demonstrate that PEAC significantly improves adaptation performance and cross-embodiment generalization, demonstrating its effectiveness in overcoming the unique challenges of CEURL. The project page and code are in https://yingchengyang.github.io/ceurl.", "keywords": "cross-embodiment reinforcement learning;unsupervised reinforcement learning;cross-embodiment exploration;cross-embodiment skill discovery", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/b8f31eb67a1b106dae444c22ee1d2e55d941ecab.zip", "author": "Chengyang Ying;Zhongkai Hao;Xinning Zhou;Xuezhou Xu;Hang Su;Xingxing Zhang;Jun Zhu", "authorids": "~Chengyang_Ying1;~Zhongkai_Hao1;~Xinning_Zhou1;~Xuezhou_Xu1;~Hang_Su3;~Xingxing_Zhang3;~Jun_Zhu2", "gender": "M;;M;F;M;M;M", "homepage": "https://yingchengyang.github.io/;;https://xuxuezhou.github.io/;https://indussky8.github.io/;http://ml.cs.tsinghua.edu.cn/~jun;;https://haozhongkai.github.io/", "dblp": "296/2065;293/3297;;;50/2644-1;26/5371-6;270/0220.html", "google_scholar": "vM6KE18AAAAJ;lWRfV70AAAAJ;https://scholar.google.gr/citations?user=ivTfnKgAAAAJ;https://scholar.google.com.hk/citations?user=RKjiLyAAAAAJ;axsP38wAAAAJ;dxN1_X0AAAAJ;dfSzq27ZiVoC", "orcid": ";;;0000-0002-2909-1589;;;", "linkedin": "%E9%93%96%E9%98%B3-%E5%BA%94-9b682a203/;%E8%BE%9B%E5%AE%81-%E5%91%A8-5a49821a3/;;;;;", "or_profile": "~Chengyang_Ying1;~Xinning_Zhou1;~Xuezhou_Xu1;~Xingxing_Zhang3;~Jun_Zhu2;~Hang_Su2;~Hao_Zhongkai1", "aff": "Tsinghua University;Tsinghua University;National University of Singapore;Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;nus.edu.sg;mail.tsinghua.edu.cn;mail.tsinghua.edu.cn;tsinghua.edu.cn;mails.tsinghua.edu.cn", "position": "PhD student;PhD student;Undergrad student;Researcher;Professor;Associate Professor;PhD student", "bibtex": "@inproceedings{\nying2024peac,\ntitle={{PEAC}: Unsupervised Pre-training for Cross-Embodiment Reinforcement Learning},\nauthor={Chengyang Ying and Zhongkai Hao and Xinning Zhou and Xuezhou Xu and Hang Su and Xingxing Zhang and Jun Zhu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=LyAFfdx8YF}\n}", "github": "", "reviewers": "3BHi;RG3H;xr64", "pdf_size": 11796286, "rating": "5;6;6", "confidence": "3;4;3", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;3;2", "wc_summary": "94;46;196", "wc_strengths": "86;55;50", "wc_weaknesses": "138;210;129", "wc_questions": "75;3;108", "wc_limitations": "1;24;34", "wc_review": "394;338;517", "wc_reply_reviewers": "0;25;96", "wc_reply_authors": "135;28;404", "reply_reviewers": "0;1;1", "reply_authors": "3;2;3", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 112.0, 62.545983084447556 ], "wc_strengths_avg": [ 63.666666666666664, 15.923427883328248 ], "wc_weaknesses_avg": [ 159.0, 36.24913792078372 ], "wc_questions_avg": [ 62.0, 43.840620433565945 ], "wc_limitations_avg": [ 19.666666666666668, 13.816254517375139 ], "wc_review_avg": [ 416.3333333333333, 74.76333028668235 ], "wc_reply_reviewers_avg": [ 40.333333333333336, 40.663934334438856 ], "wc_reply_authors_avg": [ 189.0, 158.17922324586965 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8846331800934336410&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "tsinghua.edu.cn;tsinghua.edu.cn;nus.edu.sg;mail.tsinghua.edu.cn;mail.tsinghua.edu.cn;tsinghua.edu.cn;mails.tsinghua.edu.cn", "author_num": 7, "aff_unique_index": "0;0;1;0;0;0;0", "aff_unique_norm": "Tsinghua University;National University of Singapore", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.nus.edu.sg", "aff_unique_abbr": "THU;NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;0;0", "aff_country_unique": "China;Singapore" }, { "title": "Bridging The Gap between Low-rank and Orthogonal Adaptation via Householder Reflection Adaptation", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95554", "id": "LzLeAscHnj", "proceeding": "", "pdf": "https://openreview.net/pdf?id=LzLeAscHnj", "openreview": "https://openreview.net/forum?id=LzLeAscHnj", "poster": "/media/PosterPDFs/NeurIPS%202024/95554.png?t=1731467778.6043546", "project": "", "author_site": "Shen Yuan, Haotian Liu, Hongteng Xu", "tldr": "", "abstract": "While following different technical routes, both low-rank and orthogonal adaptation techniques can efficiently adapt large-scale pre-training models in specific tasks or domains based on a small piece of trainable parameters. In this study, we bridge the gap between these two techniques, proposing a simple but effective adaptation method based on Householder reflections. Given a pre-trained model, our method fine-tunes its layers by multiplying each frozen weight matrix with an orthogonal matrix constructed by a chain of learnable Householder reflections (HRs). This HR-based orthogonal fine-tuning is equivalent to an adaptive low-rank adaptation. Moreover, we show that the orthogonality of the reflection planes corresponding to the HRs impacts the model capacity and regularity. The analysis motivates us to regularize the orthogonality of the HRs, leading to different implementations of the proposed Householder reflection adaptation (HRA) method. Compared with state-of-the-art methods, HRA achieves superior performance with fewer learnable parameters when adapting large language models and conditional image generators. The code of the experiments is available at https://github.com/DaShenZi721/HRA, and the method has been merged into the [PEFT](https://github.com/huggingface/peft) package.", "keywords": "Orthogonal fine-tuning;Householder reflection;Conditional text-to-image generation;Large language models", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Shen Yuan;Haotian Liu;Hongteng Xu", "authorids": "~Shen_Yuan1;~Haotian_Liu9;~Hongteng_Xu1", "gender": "M;;M", "homepage": ";https://github.com/HaotianLiu123;https://hongtengxu.github.io", "dblp": "205/1211;;38/10816", "google_scholar": "13aLBpkAAAAJ;;7gYVOO8AAAAJ", "orcid": ";;0000-0003-4192-5360", "linkedin": ";;", "or_profile": "~Shen_Yuan1;~Haotian_Liu9;~Hongteng_Xu1", "aff": "Renmin University of China;Beijing Institute of Technology;Renmin University of China", "aff_domain": "ruc.edu.cn;bit.edu.cn;ruc.edu.cn", "position": "PhD student;Undergrad student;Associate Professor", "bibtex": "@inproceedings{\nyuan2024bridging,\ntitle={Bridging The Gap between Low-rank and Orthogonal Adaptation via Householder Reflection Adaptation},\nauthor={Shen Yuan and Haotian Liu and Hongteng Xu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=LzLeAscHnj}\n}", "github": "", "reviewers": "mtnZ;7LSp;1vEG;dePa", "pdf_size": 7187092, "rating": "5;5;7;7", "confidence": "4;4;4;3", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;4;3", "wc_summary": "62;41;106;92", "wc_strengths": "30;29;67;23", "wc_weaknesses": "70;130;31;9", "wc_questions": "101;5;29;5", "wc_limitations": "19;8;16;40", "wc_review": "282;213;249;169", "wc_reply_reviewers": "15;11;24;0", "wc_reply_authors": "23;26;23;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 75.25, 25.370997221236692 ], "wc_strengths_avg": [ 37.25, 17.383541066192468 ], "wc_weaknesses_avg": [ 60.0, 45.94017849334066 ], "wc_questions_avg": [ 35.0, 39.344631145812 ], "wc_limitations_avg": [ 20.75, 11.818946653572814 ], "wc_review_avg": [ 228.25, 42.020084483494315 ], "wc_reply_reviewers_avg": [ 12.5, 8.616843969807043 ], "wc_reply_authors_avg": [ 18.0, 10.464224768228174 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11862401897655970802&as_sdt=8005&sciodt=0,7&hl=en", "gs_version_total": 5, "email": "ruc.edu.cn;bit.edu.cn;ruc.edu.cn", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Renmin University of China;Beijing Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "http://www.ruc.edu.cn;http://www.bit.edu.cn/", "aff_unique_abbr": "RUC;BIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Estimating the Hallucination Rate of Generative AI", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95553", "id": "Lzl8qJYXv5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Lzl8qJYXv5", "openreview": "https://openreview.net/forum?id=Lzl8qJYXv5", "poster": "", "project": "", "author_site": "Andrew Jesson, Nicolas Beltran Velez, Quentin Chu, Sweta Karlekar, Jannik Kossen, Yarin Gal, John Cunningham, David Blei", "tldr": "", "abstract": "This paper presents a method for estimating the hallucination rate for in-context learning (ICL) with generative AI. In ICL, a conditional generative model (CGM) is prompted with a dataset and a prediction question and asked to generate a response. One interpretation of ICL assumes that the CGM computes the posterior predictive of an unknown Bayesian model, which implicitly defines a joint distribution over observable datasets and latent mechanisms. This joint distribution factorizes into two components: the model prior over mechanisms and the model likelihood of datasets given a mechanism. With this perspective, we define a \\textit{hallucination} as a generated response to the prediction question with low model likelihood given the mechanism. We develop a new method that takes an ICL problem and estimates the probability that a CGM will generate a hallucination. Our method only requires generating prediction questions and responses from the CGM and evaluating its response log probability. We empirically evaluate our method using large language models for synthetic regression and natural language ICL tasks.", "keywords": "Uncertainty Quantification;Large Language Models;Conditional Generative Models;Hallucination Prediction", "primary_area": "generative_models", "supplementary_material": "", "author": "Andrew Jesson;Nicolas Beltran-Velez;Quentin Chu;Sweta Karlekar;Jannik Kossen;Yarin Gal;John Patrick Cunningham;David Blei", "authorids": "~Andrew_Jesson1;~Nicolas_Beltran-Velez1;~Quentin_Chu1;~Sweta_Karlekar2;~Jannik_Kossen2;~Yarin_Gal1;~John_Patrick_Cunningham1;~David_Blei2", "gender": "M;M;M;F;Unspecified;;M;M", "homepage": "https://oatml.cs.ox.ac.uk/members/andrew_jesson/;;;https://sweta.dev;https://jlko.eu;http://www.cs.ox.ac.uk/people/yarin.gal/website//;stat.columbia.edu/~cunningham;http://www.cs.columbia.edu/~blei/", "dblp": ";;;;250/2339;67/9076;51/4077;86/1910", "google_scholar": "ElJ_fC4AAAAJ;;;P9lDUcEAAAAJ;i1FIOV0AAAAJ;https://scholar.google.co.uk/citations?user=SIayDoQAAAAJ;88cU_4UAAAAJ;https://scholar.google.com.tw/citations?user=8OYE6iEAAAAJ", "orcid": ";;;;;;;", "linkedin": ";nicolas-beltran-velez-1950141a9;quentinchu/;swetakarlekar/;;;;", "or_profile": "~Andrew_Jesson1;~Nicolas_Beltran-Velez1;~Quentin_Chu1;~Sweta_Karlekar2;~Jannik_Kossen2;~Yarin_Gal1;~John_Patrick_Cunningham1;~David_Blei2", "aff": "Columbia University;Columbia University;Columbia University;Columbia University;University of Oxford;University of Oxford;Columbia University;Columbia University", "aff_domain": "columbia.edu;columbia.edu;columbia.edu;columbia.edu;oxford.ac.uk;ox.ac.uk;columbia.edu;columbia.edu", "position": "Postdoc;PhD student;MS student;PhD student;PhD student;Associate Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\njesson2024estimating,\ntitle={Estimating the Hallucination Rate of Generative {AI}},\nauthor={Andrew Jesson and Nicolas Beltran-Velez and Quentin Chu and Sweta Karlekar and Jannik Kossen and Yarin Gal and John Patrick Cunningham and David Blei},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Lzl8qJYXv5}\n}", "github": "", "reviewers": "6dN5;Vp93;yYgH;VGJP", "pdf_size": 8669393, "rating": "2;4;6;6", "confidence": "5;5;3;3", "soundness": "2;2;3;2", "novelty": "1;4;3;3", "presentation": "2;2;2;3", "wc_summary": "17;47;50;81", "wc_strengths": "2;45;44;87", "wc_weaknesses": "80;321;261;212", "wc_questions": "39;2;5;78", "wc_limitations": "4;6;5;70", "wc_review": "142;421;365;528", "wc_reply_reviewers": "0;96;80;22", "wc_reply_authors": "83;150;178;125", "reply_reviewers": "0;1;1;1", "reply_authors": "2;2;2;3", "rating_avg": [ 4.5, 1.6583123951777 ], "confidence_avg": [ 4.0, 1.0 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 1.0897247358851685 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 48.75, 22.65364209128413 ], "wc_strengths_avg": [ 44.5, 30.054117854297438 ], "wc_weaknesses_avg": [ 218.5, 88.79329929673747 ], "wc_questions_avg": [ 31.0, 30.78148794324277 ], "wc_limitations_avg": [ 21.25, 28.154706533721853 ], "wc_review_avg": [ 364.0, 140.9166420264122 ], "wc_reply_reviewers_avg": [ 49.5, 39.68311983702894 ], "wc_reply_authors_avg": [ 134.0, 34.90701935141412 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.9045340337332909, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16272526908492384600&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 4, "email": "columbia.edu;columbia.edu;columbia.edu;columbia.edu;oxford.ac.uk;ox.ac.uk;columbia.edu;columbia.edu", "author_num": 8, "aff_unique_index": "0;0;0;0;1;1;0;0", "aff_unique_norm": "Columbia University;University of Oxford", "aff_unique_dep": ";", "aff_unique_url": "https://www.columbia.edu;https://www.ox.ac.uk", "aff_unique_abbr": "Columbia;Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;1;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Are High-Degree Representations Really Unnecessary in Equivariant Graph Neural Networks?", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95552", "id": "M0ncNVuGYN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=M0ncNVuGYN", "openreview": "https://openreview.net/forum?id=M0ncNVuGYN", "poster": "/media/PosterPDFs/NeurIPS%202024/95552.png?t=1728621135.4678006", "project": "", "author_site": "Jiacheng Cen, Anyi Li, Ning Lin, Yuxiang Ren, Zihe Wang, Wenbing Huang", "tldr": "", "abstract": "Equivariant Graph Neural Networks (GNNs) that incorporate E(3) symmetry have achieved significant success in various scientific applications. As one of the most successful models, EGNN leverages a simple scalarization technique to perform equivariant message passing over only Cartesian vectors (i.e., 1st-degree steerable vectors), enjoying greater efficiency and efficacy compared to equivariant GNNs using higher-degree steerable vectors. This success suggests that higher-degree representations might be unnecessary. In this paper, we disprove this hypothesis by exploring the expressivity of equivariant GNNs on symmetric structures, including $k$-fold rotations and regular polyhedra. We theoretically demonstrate that equivariant GNNs will always degenerate to a zero function if the degree of the output representations is fixed to 1 or other specific values. Based on this theoretical insight, we propose HEGNN, a high-degree version of EGNN to increase the expressivity by incorporating high-degree steerable vectors while maintaining EGNN's efficiency through the scalarization trick. Our extensive experiments demonstrate that HEGNN not only aligns with our theoretical analyses on toy datasets consisting of symmetric structures, but also shows substantial improvements on more complicated datasets such as $N$-body and MD17. Our theoretical findings and empirical results potentially open up new possibilities for the research of equivariant GNNs.", "keywords": "Geometric Graph Neural Networks;High Degree Steerable Vectors", "primary_area": "graph_neural_networks", "supplementary_material": "/attachment/12fe3539b151d2e978245fd4be4f38ab76ccb1aa.zip", "author": "Jiacheng Cen;Anyi Li;Ning Lin;Yuxiang Ren;Zihe Wang;Wenbing Huang", "authorids": "~Jiacheng_Cen1;~Anyi_Li1;~Ning_Lin2;~Yuxiang_Ren1;~Zihe_Wang1;~Wenbing_Huang1", "gender": ";M;M;M;M;M", "homepage": ";;;https://yuxiangren.github.io/;https://gsai.ruc.edu.cn/english/zihe;https://gsai.ruc.edu.cn/english/wenbing_huang", "dblp": ";;;;137/7822-1;155/3181-1.html", "google_scholar": ";0fl1-ZQAAAAJ;y1C1q3cAAAAJ;SSC2dgcAAAAJ;SL1QGDoAAAAJ;0yNkmO4AAAAJ", "orcid": ";;0000-0001-8883-7005;;0000-0001-7752-4687;", "linkedin": ";;;;;", "or_profile": "~Jiacheng_Cen1;~Anyi_Li1;~Ning_Lin2;~Yuxiang_Ren1;~Zihe_Wang1;~Wenbing_Huang1", "aff": ";Shandong University;Renmin University of China;Huawei Technologies Ltd.;Renmin University of China;Renmin University of China", "aff_domain": ";sdu.edu.cn;ruc.edu.cn;huawei.com;ruc.edu.cn;ruc.edu.cn", "position": ";Undergrad student;Undergrad student;Researcher;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\ncen2024are,\ntitle={Are High-Degree Representations Really Unnecessary in Equivariant Graph Neural Networks?},\nauthor={Jiacheng Cen and Anyi Li and Ning Lin and Yuxiang Ren and Zihe Wang and Wenbing Huang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=M0ncNVuGYN}\n}", "github": "", "reviewers": "vhjv;op9c;3KcU;7wT3", "pdf_size": 600656, "rating": "6;6;7;7", "confidence": "4;4;3;3", "soundness": "3;4;4;3", "novelty": "2;2;3;2", "presentation": "3;4;3;3", "wc_summary": "93;56;255;199", "wc_strengths": "95;43;263;163", "wc_weaknesses": "205;54;252;353", "wc_questions": "142;252;253;5", "wc_limitations": "41;44;43;33", "wc_review": "576;449;1066;753", "wc_reply_reviewers": "0;179;72;103", "wc_reply_authors": "0;755;29;26", "reply_reviewers": "0;2;1;1", "reply_authors": "1;3;2;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 150.75, 79.85729459479579 ], "wc_strengths_avg": [ 141.0, 82.29216244576394 ], "wc_weaknesses_avg": [ 216.0, 107.73810839252748 ], "wc_questions_avg": [ 163.0, 101.76689048998206 ], "wc_limitations_avg": [ 40.25, 4.322904116447646 ], "wc_review_avg": [ 711.0, 231.6559949580412 ], "wc_reply_reviewers_avg": [ 88.5, 64.23589339302443 ], "wc_reply_authors_avg": [ 202.5, 319.1852910144827 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14427355575105377921&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": ";sdu.edu.cn;ruc.edu.cn;huawei.com;ruc.edu.cn;ruc.edu.cn", "author_num": 6, "aff_unique_index": "0;1;2;1;1", "aff_unique_norm": "Shandong University;Renmin University of China;Huawei", "aff_unique_dep": ";;Huawei Technologies", "aff_unique_url": "http://www.sdu.edu.cn;http://www.ruc.edu.cn;https://www.huawei.com", "aff_unique_abbr": "SDU;RUC;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "FedAvP: Augment Local Data via Shared Policy in Federated Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95551", "id": "M1PRU0x1Iz", "proceeding": "", "pdf": "https://openreview.net/pdf?id=M1PRU0x1Iz", "openreview": "https://openreview.net/forum?id=M1PRU0x1Iz", "poster": "", "project": "", "author_site": "Minui Hong, Junhyeog Yun, Insu Jeon, Gunhee Kim", "tldr": "", "abstract": "Federated Learning (FL) allows multiple clients to collaboratively train models without directly sharing their private data. While various data augmentation techniques have been actively studied in the FL environment, most of these methods share input-level or feature-level data information over communication, posing potential privacy leakage. In response to this challenge, we introduce a federated data augmentation algorithm named FedAvP that shares only the augmentation policies, not the data-related information. \nFor data security and efficient policy search, we interpret the policy loss as a meta update loss in standard FL algorithms and utilize the first-order gradient information to further enhance privacy and reduce communication costs. Moreover, we propose a meta-learning method to search for adaptive personalized policies tailored to heterogeneous clients. Our approach outperforms existing best performing augmentation policy search methods and federated data augmentation methods, in the benchmarks for heterogeneous FL.", "keywords": "federated learning;data augmentation", "primary_area": "machine_vision", "supplementary_material": "/attachment/9e78a848bbd37bbf2394e390910b972ed74c5bb0.zip", "author": "Minui Hong;Junhyeog Yun;Insu Jeon;Gunhee Kim", "authorids": "~Minui_Hong2;~Junhyeog_Yun1;~Insu_Jeon2;~Gunhee_Kim1", "gender": ";;;M", "homepage": ";;;http://vision.snu.ac.kr/gunhee/", "dblp": ";;;45/115", "google_scholar": ";;;https://scholar.google.co.kr/citations?user=CiSdOV0AAAAJ", "orcid": ";;;0000-0002-9543-7453", "linkedin": ";;;", "or_profile": "~Minui_Hong2;~Junhyeog_Yun1;~Insu_Jeon2;~Gunhee_Kim1", "aff": ";;;Seoul National University", "aff_domain": ";;;snu.ac.kr", "position": ";;;Full Professor", "bibtex": "@inproceedings{\nhong2024fedavp,\ntitle={FedAvP: Augment Local Data via Shared Policy in Federated Learning},\nauthor={Minui Hong and Junhyeog Yun and Insu Jeon and Gunhee Kim},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=M1PRU0x1Iz}\n}", "github": "", "reviewers": "1shA;866Z;WeMd;zQTG", "pdf_size": 3914417, "rating": "5;6;6;6", "confidence": "4;2;3;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;2", "wc_summary": "53;27;105;58", "wc_strengths": "34;19;86;99", "wc_weaknesses": "55;73;77;103", "wc_questions": "80;22;46;31", "wc_limitations": "2;3;36;13", "wc_review": "224;144;350;304", "wc_reply_reviewers": "106;20;14;12", "wc_reply_authors": "190;0;0;0", "reply_reviewers": "2;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 60.75, 28.12805538959279 ], "wc_strengths_avg": [ 59.5, 33.737960815674676 ], "wc_weaknesses_avg": [ 77.0, 17.146428199482248 ], "wc_questions_avg": [ 44.75, 22.083647796503186 ], "wc_limitations_avg": [ 13.5, 13.683932183404009 ], "wc_review_avg": [ 255.5, 78.59230242205658 ], "wc_reply_reviewers_avg": [ 38.0, 39.370039370059054 ], "wc_reply_authors_avg": [ 47.5, 82.27241335952168 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:_jI1fl27cdoJ:scholar.google.com/&scioq=FedAvP:+Augment+Local+Data+via+Shared+Policy+in+Federated+Learning&hl=en&as_sdt=0,33", "gs_version_total": 2, "email": ";;;snu.ac.kr", "author_num": 4, "aff_unique_index": "0", "aff_unique_norm": "Seoul National University", "aff_unique_dep": "", "aff_unique_url": "https://www.snu.ac.kr", "aff_unique_abbr": "SNU", "aff_country_unique_index": "0", "aff_country_unique": "South Korea" }, { "title": "Identifiability Guarantees for Causal Disentanglement from Purely Observational Data", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95550", "id": "M20p6tq9Hq", "proceeding": "", "pdf": "https://openreview.net/pdf?id=M20p6tq9Hq", "openreview": "https://openreview.net/forum?id=M20p6tq9Hq", "poster": "/media/PosterPDFs/NeurIPS%202024/95550.png?t=1731601095.1498663", "project": "", "author_site": "Ryan Welch, Jiaqi Zhang, Caroline Uhler", "tldr": "", "abstract": "Causal disentanglement aims to learn about latent causal factors behind data, hold- ing the promise to augment existing representation learning methods in terms of interpretability and extrapolation. Recent advances establish identifiability results assuming that interventions on (single) latent factors are available; however, it re- mains debatable whether such assumptions are reasonable due to the inherent nature of intervening on latent variables. Accordingly, we reconsider the fundamentals and ask what can be learned using just observational data.\n\nWe provide a precise characterization of latent factors that can be identified in nonlinear causal models with additive Gaussian noise and linear mixing, without any interventions or graphical restrictions. In particular, we show that the causal variables can be identified up to a _layer_-wise transformation and that further disen- tanglement is not possible. We transform these theoretical results into a practical algorithm consisting of solving a quadratic program over the score estimation of the observed data. We provide simulation results to support our theoretical guarantees and demonstrate that our algorithm can derive meaningful causal representations from purely observational data.", "keywords": "causality;disentanglement;identifiability theory", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Ryan Welch;Jiaqi Zhang;Caroline Uhler", "authorids": "~Ryan_Welch1;~Jiaqi_Zhang2;~Caroline_Uhler1", "gender": "M;F;F", "homepage": ";;https://www.carolineuhler.com/", "dblp": ";;66/10813", "google_scholar": "rdbnBUQAAAAJ;;https://scholar.google.com.tw/citations?user=dIJFcaoAAAAJ", "orcid": ";0000-0001-9039-6843;", "linkedin": "ryan-welch-a859b414b/;vicky-jiaqi-zhang-34b490180/;", "or_profile": "~Ryan_Welch1;~Jiaqi_Zhang2;~Caroline_Uhler1", "aff": "Massachusetts Institute of Technology;Apple;Electrical Engineering & Computer Science, Massachusetts Institute of Technology", "aff_domain": "mit.edu;apple.com;eecs.mit.edu", "position": "Undergrad student;Intern;Associate Professor", "bibtex": "@inproceedings{\nwelch2024identifiability,\ntitle={Identifiability Guarantees for Causal Disentanglement from Purely Observational Data},\nauthor={Ryan Welch and Jiaqi Zhang and Caroline Uhler},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=M20p6tq9Hq}\n}", "github": "", "reviewers": "2B2i;gAcJ;1YMg;zdJm", "pdf_size": 2500239, "rating": "4;6;6;6", "confidence": "4;4;3;3", "soundness": "2;3;3;2", "novelty": "1;3;3;3", "presentation": "2;3;4;2", "wc_summary": "253;20;58;65", "wc_strengths": "54;93;47;46", "wc_weaknesses": "172;260;79;63", "wc_questions": "33;194;197;4", "wc_limitations": "4;14;2;2", "wc_review": "516;581;383;180", "wc_reply_reviewers": "93;85;8;12", "wc_reply_authors": "561;291;9;16", "reply_reviewers": "1;2;1;1", "reply_authors": "2;3;2;2", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 99.0, 90.54556863811723 ], "wc_strengths_avg": [ 60.0, 19.300259065618782 ], "wc_weaknesses_avg": [ 143.5, 79.09646009778187 ], "wc_questions_avg": [ 107.0, 89.09826036461094 ], "wc_limitations_avg": [ 5.5, 4.9749371855331 ], "wc_review_avg": [ 415.0, 153.30198955003814 ], "wc_reply_reviewers_avg": [ 49.5, 39.62638010214912 ], "wc_reply_authors_avg": [ 219.25, 227.73710172038284 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=596728731008923397&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 5, "email": "mit.edu;apple.com;eecs.mit.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Massachusetts Institute of Technology;Apple", "aff_unique_dep": ";Apple Inc.", "aff_unique_url": "https://web.mit.edu;https://www.apple.com", "aff_unique_abbr": "MIT;Apple", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Perceptual Fairness in Image Restoration", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95549", "id": "M2QREVHK1V", "proceeding": "", "pdf": "https://openreview.net/pdf?id=M2QREVHK1V", "openreview": "https://openreview.net/forum?id=M2QREVHK1V", "poster": "/media/PosterPDFs/NeurIPS%202024/95549.png?t=1729765560.4783206", "project": "", "author_site": "Guy Ohayon, Michael Elad, Tomer Michaeli", "tldr": "", "abstract": "Fairness in image restoration tasks is the desire to treat different sub-groups of images equally well. Existing definitions of fairness in image restoration are highly restrictive. They consider a reconstruction to be a correct outcome for a group (e.g., women) *only* if it falls within the group's set of ground truth images (e.g., natural images of women); otherwise, it is considered *entirely* incorrect. Consequently, such definitions are prone to controversy, as errors in image restoration can manifest in various ways. In this work we offer an alternative approach towards fairness in image restoration, by considering the *Group Perceptual Index* (GPI), which we define as the statistical distance between the distribution of the group's ground truth images and the distribution of their reconstructions. We assess the fairness of an algorithm by comparing the GPI of different groups, and say that it achieves perfect *Perceptual Fairness* (PF) if the GPIs of all groups are identical. We motivate and theoretically study our new notion of fairness, draw its connection to previous ones, and demonstrate its utility on state-of-the-art face image restoration algorithms.", "keywords": "fairness;bias;inverse problems;image restoration;image processing;machine learning;computer vision;responsible AI;super-resolution;deblurring;denoising", "primary_area": "fairness", "supplementary_material": "", "author": "Guy Ohayon;Michael Elad;Tomer Michaeli", "authorids": "~Guy_Ohayon1;~Michael_Elad1;~Tomer_Michaeli1", "gender": "M;M;M", "homepage": ";https://elad.cs.technion.ac.il/;https://tomer.net.technion.ac.il/", "dblp": "287/4281;e/MichaelElad;70/3188.html", "google_scholar": "Gso71ogAAAAJ;UpZbV44AAAAJ;n2EbR2cAAAAJ", "orcid": ";0000-0001-8131-6928;", "linkedin": "ohayonguy/;michael-elad-5553852a3/;", "or_profile": "~Guy_Ohayon1;~Michael_Elad1;~Tomer_Michaeli1", "aff": "Verily;Verily;Technion, Technion", "aff_domain": "verily.com;verily.com;technion.ac.il", "position": "Intern;Principal Researcher;Associate Professor", "bibtex": "@inproceedings{\nohayon2024perceptual,\ntitle={Perceptual Fairness in Image Restoration},\nauthor={Guy Ohayon and Michael Elad and Tomer Michaeli},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=M2QREVHK1V}\n}", "github": "", "reviewers": "ziAr;msyK;HsYQ;xBjq", "pdf_size": 17366819, "rating": "6;6;6;6", "confidence": "3;1;4;4", "soundness": "3;2;3;3", "novelty": "3;3;3;4", "presentation": "4;2;3;4", "wc_summary": "38;55;79;90", "wc_strengths": "54;40;70;99", "wc_weaknesses": "42;92;68;71", "wc_questions": "12;6;35;76", "wc_limitations": "6;9;8;45", "wc_review": "152;202;260;381", "wc_reply_reviewers": "20;23;18;42", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.0, 1.224744871391589 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 65.5, 20.303940504246953 ], "wc_strengths_avg": [ 65.75, 21.935986415021322 ], "wc_weaknesses_avg": [ 68.25, 17.75352077758099 ], "wc_questions_avg": [ 32.25, 27.48067502809929 ], "wc_limitations_avg": [ 17.0, 16.20185174601965 ], "wc_review_avg": [ 248.75, 85.3855227775763 ], "wc_reply_reviewers_avg": [ 25.75, 9.54921462739214 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4541200708352589566&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 6, "email": "verily.com;verily.com;technion.ac.il", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Verily;Technion - Israel Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.verily.com;https://www.technion.ac.il/en/", "aff_unique_abbr": "Verily;Technion", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;Israel" }, { "title": "Reducing Transformer Key-Value Cache Size with Cross-Layer Attention", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95548", "id": "M2UzLRoqic", "proceeding": "", "pdf": "https://openreview.net/pdf?id=M2UzLRoqic", "openreview": "https://openreview.net/forum?id=M2UzLRoqic", "poster": "", "project": "", "author_site": "William Brandon, Mayank Mishra, Aniruddha Nrusimha, Rameswar Panda, Jonathan Ragan-Kelley", "tldr": "", "abstract": "Key-value (KV) caching plays an essential role in accelerating decoding for transformer-based autoregressive large language models (LLMs). However, the amount of memory required to store the KV cache can become prohibitive at long sequence lengths and large batch sizes. Since the invention of the transformer, two of the most effective interventions discovered for reducing the size of the KV cache have been Multi-Query Attention (MQA) and its generalization, Grouped-Query Attention (GQA). MQA and GQA both modify the design of the attention block so that multiple query heads can share a single key/value head, reducing the number of distinct key/value heads by a large factor while only minimally degrading accuracy. In this paper, we show that it is possible to take Multi-Query Attention a step further by also sharing key and value heads between adjacent layers, yielding a new attention design we call Cross-Layer Attention (CLA). With CLA, we find that it is possible to reduce the size of the KV cache by another $2\\times$ while maintaining nearly the same accuracy as unmodified MQA. In experiments training 1B- and 3B-parameter models from scratch, we demonstrate that CLA provides a Pareto improvement over the memory/accuracy tradeoffs which are possible with traditional MQA, potentially enabling future models to operate at longer sequence lengths and larger batch sizes than would otherwise be possible.", "keywords": "transformers;attention;KV cache;LLMs", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "William Brandon;Mayank Mishra;Aniruddha Nrusimha;Rameswar Panda;Jonathan Ragan-Kelley", "authorids": "~William_Brandon1;~Mayank_Mishra1;~Aniruddha_Nrusimha1;~Rameswar_Panda1;~Jonathan_Ragan-Kelley1", "gender": "M;M;;M;M", "homepage": "https://github.com/exists-forall;https://mayank31398.github.io/;https://anin.dev;https://rpand002.github.io/;https://people.csail.mit.edu/jrk", "dblp": ";;250/2656;126/0986;", "google_scholar": ";YsbtW6cAAAAJ;;_ySuu6gAAAAJ;https://scholar.google.com.tw/citations?user=nBcay4oAAAAJ", "orcid": ";;;;", "linkedin": ";mayank31398;;;", "or_profile": "~William_Brandon1;~Mayank_Mishra1;~Aniruddha_Nrusimha1;~Rameswar_Panda1;~Jonathan_Ragan-Kelley1", "aff": "Massachusetts Institute of Technology;International Business Machines;MIT;MIT-IBM Watson AI Lab;Adobe Systems", "aff_domain": "mit.edu;ibm.com;mit.edu;ibm.com;adobe.com", "position": "PhD student;Researcher;Ph.D. candidate;Research Scientist;Researcher", "bibtex": "@inproceedings{\nbrandon2024reducing,\ntitle={Reducing Transformer Key-Value Cache Size with Cross-Layer Attention},\nauthor={William Brandon and Mayank Mishra and Aniruddha Nrusimha and Rameswar Panda and Jonathan Ragan-Kelley},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=M2UzLRoqic}\n}", "github": "", "reviewers": "mqB5;skpH;8ZcC", "pdf_size": 584314, "rating": "6;7;8", "confidence": "3;4;5", "soundness": "3;3;3", "novelty": "2;3;4", "presentation": "2;3;3", "wc_summary": "53;82;74", "wc_strengths": "25;99;59", "wc_weaknesses": "195;160;206", "wc_questions": "47;40;4", "wc_limitations": "12;10;3", "wc_review": "332;391;346", "wc_reply_reviewers": "37;39;13", "wc_reply_authors": "81;0;0", "reply_reviewers": "1;1;1", "reply_authors": "3;1;1", "rating_avg": [ 7.0, 0.816496580927726 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 69.66666666666667, 12.229290885229426 ], "wc_strengths_avg": [ 61.0, 30.243456592570013 ], "wc_weaknesses_avg": [ 187.0, 19.61292091114087 ], "wc_questions_avg": [ 30.333333333333332, 18.83849486792639 ], "wc_limitations_avg": [ 8.333333333333334, 3.858612300930075 ], "wc_review_avg": [ 356.3333333333333, 25.170529504870483 ], "wc_reply_reviewers_avg": [ 29.666666666666668, 11.8133634311129 ], "wc_reply_authors_avg": [ 27.0, 38.18376618407357 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 40, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9454233464041767014&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 4, "email": "mit.edu;ibm.com;mit.edu;ibm.com;adobe.com", "author_num": 5, "aff_unique_index": "0;1;0;0;2", "aff_unique_norm": "Massachusetts Institute of Technology;International Business Machines Corporation;Adobe", "aff_unique_dep": ";;Adobe Systems Incorporated", "aff_unique_url": "https://web.mit.edu;https://www.ibm.com;https://www.adobe.com", "aff_unique_abbr": "MIT;IBM;Adobe", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "LogiCity: Advancing Neuro-Symbolic AI with Abstract Urban Simulation", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97738", "id": "M32Ldpp4Oy", "proceeding": "", "pdf": "https://openreview.net/pdf?id=M32Ldpp4Oy", "openreview": "https://openreview.net/forum?id=M32Ldpp4Oy", "poster": "/media/PosterPDFs/NeurIPS%202024/97738.png?t=1731468597.118385", "project": "", "author_site": "Bowen Li, Zhaoyu Li, Qiwei Du, Jinqi Luo, Wenshan Wang, Yaqi Xie, Simon Stepputtis, Chen Wang, Katia Sycara, Pradeep Ravikumar, Alexander Gray, Xujie Si, Sebastian Scherer", "tldr": "", "abstract": "Recent years have witnessed the rapid development of Neuro-Symbolic (NeSy) AI systems, which integrate symbolic reasoning into deep neural networks.\nHowever, most of the existing benchmarks for NeSy AI fail to provide long-horizon reasoning tasks with complex multi-agent interactions.\nFurthermore, they are usually constrained by fixed and simplistic logical rules over limited entities, making them far from real-world complexities.\nTo address these crucial gaps, we introduce LogiCity, the first simulator based on customizable first-order logic (FOL) for an urban-like environment with multiple dynamic agents.\nLogiCity models diverse urban elements using semantic and spatial concepts, such as $\\texttt{IsAmbulance}(\\texttt{X})$ and $\\texttt{IsClose}(\\texttt{X}, \\texttt{Y})$. \nThese concepts are used to define FOL rules that govern the behavior of various agents. \nSince the concepts and rules are abstractions, they can be universally applied to cities with any agent compositions, facilitating the instantiation of diverse scenarios.\nBesides, a key feature of LogiCity is its support for user-configurable abstractions, enabling customizable simulation complexities for logical reasoning.\nTo explore various aspects of NeSy AI, LogiCity introduces two tasks, one features long-horizon sequential decision-making, and the other focuses on one-step visual reasoning, varying in difficulty and agent behaviors.\nOur extensive evaluation reveals the advantage of NeSy frameworks in abstract reasoning. \nMoreover, we highlight the significant challenges of handling more complex abstractions in long-horizon multi-agent scenarios or under high-dimensional, imbalanced data.\nWith its flexible design, various features, and newly raised challenges, we believe LogiCity represents a pivotal step forward in advancing the next generation of NeSy AI.\nAll the code and data are open-sourced at our website.", "keywords": "Neuro-Symbolic AI;Abstract Reasoning;Compositional Generalization;Sequential Decision-Making", "primary_area": "", "supplementary_material": "/attachment/2b43aa2cc6b5d82163ace4ff581438b68383b0a7.zip", "author": "Bowen Li;Zhaoyu Li;Qiwei Du;Jinqi Luo;Wenshan Wang;Yaqi Xie;Simon Stepputtis;Chen Wang;Katia P. Sycara;Pradeep Kumar Ravikumar;Alexander G. Gray;Xujie Si;Sebastian Scherer", "authorids": "~Bowen_Li7;~Zhaoyu_Li3;~Qiwei_Du1;~Jinqi_Luo1;~Wenshan_Wang2;~Yaqi_Xie1;~Simon_Stepputtis1;~Chen_Wang2;~Katia_P._Sycara1;~Pradeep_Kumar_Ravikumar1;~Alexander_G._Gray1;~Xujie_Si1;~Sebastian_Scherer1", "gender": "M;M;M;;F;F;;M;F;M;M;M;M", "homepage": "https://jaraxxus-me.github.io/;https://www.zhaoyu-li.com/;https://sairlab.org/qiweid/;;http://www.wangwenshan.com;https://yaqi-xie.me/;https://simonstepputtis.com/;https://sairlab.org/chenw/;;http://www.cs.cmu.edu/~pradeepr/;;https://xujie.si;https://theairlab.org", "dblp": "75/10470-7;;295/5071;;;237/8691;192/7092;82/4206-33;s/KatiaPSycara;94/3594;85/110.html;142/8449;253/5743", "google_scholar": "XIAMHVMAAAAJ;;https://scholar.google.com/citations?hl=en;;https://scholar.google.com/citations?hl=en;lBCCo0EAAAAJ;WUQgzsAAAAAJ;vZfmKl4AAAAJ;VWv6a9kAAAAJ;https://scholar.google.com.tw/citations?user=Q4DTPw4AAAAJ;https://scholar.google.com/citations?hl=en;Ru-jrx4AAAAJ;gxoPfIYAAAAJ", "orcid": ";;;;;0009-0005-0458-9419;0009-0003-0519-3454;0000-0002-4630-0805;;;0000-0003-0337-7359;;0000-0002-8373-4688", "linkedin": ";zhaoyu-li-9171892a5/;;;;yaqi-xie/;simon-stepputtis/;wang-chen/;;;alexander-gray-b554b64/;;sebastian-scherer-a026961a/", "or_profile": "~Bowen_Li7;~Zhaoyu_Li3;~Qiwei_Du1;~Jinqi_Luo1;~Wenshan_Wang2;~Yaqi_Xie1;~Simon_Stepputtis1;~Chen_Wang2;~Katia_P._Sycara1;~Pradeep_Kumar_Ravikumar1;~Alexander_G._Gray1;~Xujie_Si1;~Sebastian_Scherer1", "aff": "School of Computer Science, Carnegie Mellon University;University of Toronto;Tongji University;;School of Computer Science, Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;University at Buffalo;Carnegie Mellon University;Carnegie Mellon University;International Business Machines;University of Toronto;Near Earth Autonomy Inc.", "aff_domain": "cs.cmu.edu;cs.toronto.edu;tongji.edu.cn;;cs.cmu.edu;cmu.edu;cmu.edu;buffalo.edu;cmu.edu;cmu.edu;ibm.com;toronto.edu;nearearth.aero", "position": "PhD student;PhD student;Undergrad student;;Researcher;Postdoc;Postdoc;Assistant Professor;Full Professor;Full Professor;VP, Foundations of AI;Assistant Professor;Senior Scientist", "bibtex": "@inproceedings{\nli2024logicity,\ntitle={LogiCity: Advancing Neuro-Symbolic {AI} with Abstract Urban Simulation},\nauthor={Bowen Li and Zhaoyu Li and Qiwei Du and Jinqi Luo and Wenshan Wang and Yaqi Xie and Simon Stepputtis and Chen Wang and Katia P. Sycara and Pradeep Kumar Ravikumar and Alexander G. Gray and Xujie Si and Sebastian Scherer},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=M32Ldpp4Oy}\n}", "github": "", "reviewers": "CmLB;x36W;fGrq;p8yp", "pdf_size": 9318996, "rating": "4;6;8;8", "confidence": "2;3;4;4", "wc_summary_and_contributions": "84;177;54;45", "wc_strengths": "89;476;74;84", "wc_improvement": "303;442;45;167", "wc_limitations": "61;408;284;1", "wc_correctness": "96;374;125;1", "wc_clarity": "2;314;226;1", "wc_relation_to_prior_work": "30;233;61;1", "wc_documentation": "65;374;55;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "731;2799;925;302", "wc_reply_reviewers": "0;60;7;12", "wc_reply_authors": "151;132;48;0", "reply_reviewers": "0;3;1;1", "reply_authors": "4;4;2;1", "rating_avg": [ 6.5, 1.6583123951777 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "wc_summary_and_contributions_avg": [ 90.0, 52.263754170553035 ], "wc_strengths_avg": [ 180.75, 170.54819700014423 ], "wc_improvement_avg": [ 239.25, 148.42906554984438 ], "wc_limitations_avg": [ 188.5, 164.85827246456273 ], "wc_correctness_avg": [ 149.0, 137.7624767489319 ], "wc_clarity_avg": [ 135.75, 137.80851751615356 ], "wc_relation_to_prior_work_avg": [ 81.25, 90.14536871076628 ], "wc_documentation_avg": [ 123.75, 146.51855684519964 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 1189.25, 956.3379044563694 ], "wc_reply_reviewers_avg": [ 19.75, 23.62599204266352 ], "wc_reply_authors_avg": [ 82.75, 61.519813881382966 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 13, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:HUV9_K640NEJ:scholar.google.com/&scioq=LogiCity:+Advancing+Neuro-Symbolic+AI+with+Abstract+Urban+Simulation&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "cs.cmu.edu;cs.toronto.edu;tongji.edu.cn;;cs.cmu.edu;cmu.edu;cmu.edu;buffalo.edu;cmu.edu;cmu.edu;ibm.com;toronto.edu;nearearth.aero", "author_num": 13, "aff_unique_index": "0;1;2;0;0;0;3;0;0;4;1;5", "aff_unique_norm": "Carnegie Mellon University;University of Toronto;Tongji University;University at Buffalo;International Business Machines Corporation;Near Earth Autonomy", "aff_unique_dep": "School of Computer Science;;;;;", "aff_unique_url": "https://www.cmu.edu;https://www.utoronto.ca;https://www.tongji.edu.cn;https://www.buffalo.edu;https://www.ibm.com;https://www.nearearthautonomy.com", "aff_unique_abbr": "CMU;U of T;Tongji;UB;IBM;NEA", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Pittsburgh;", "aff_country_unique_index": "0;1;2;0;0;0;0;0;0;0;1;0", "aff_country_unique": "United States;Canada;China" }, { "title": "Meta 3D AssetGen: Text-to-Mesh Generation with High-Quality Geometry, Texture, and PBR Materials", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95547", "id": "M3BIsgGQNb", "proceeding": "", "pdf": "https://openreview.net/pdf?id=M3BIsgGQNb", "openreview": "https://openreview.net/forum?id=M3BIsgGQNb", "poster": "/media/PosterPDFs/NeurIPS%202024/95547.png?t=1733096463.322024", "project": "", "author_site": "Yawar Siddiqui, Tom Monnier, Filippos Kokkinos, Mahendra Kariya, Yanir Kleiman, Emilien Garreau, Oran Gafni, Natalia Neverova, Andrea Vedaldi, Roman Shapovalov, David Novotny", "tldr": "", "abstract": "We present Meta 3D AssetGen (AssetGen), a significant advancement in text-to-3D generation which produces faithful, high-quality meshes with texture and material control. Compared to works that bake shading in the 3D object\u2019s appearance, AssetGen outputs physically-based rendering (PBR) materials, supporting realistic relighting. AssetGen generates first several views of the object with separate shaded and albedo appearance channels, and then reconstructs colours, metalness and roughness in 3D, using a deferred shading loss for efficient supervision. It also uses a sign-distance function to represent 3D shape more reliably and introduces a\ncorresponding loss for direct shape supervision. This is implemented using fused kernels for high memory efficiency. After mesh extraction, a texture refinement transformer operating in UV space significantly improves sharpness and details. AssetGen achieves 17% improvement in Chamfer Distance and 40% in LPIPS over the best concurrent work for few-view reconstruction, and a human preference of 72% over the best industry competitors of comparable speed, including those that support PBR. Project page with generated assets: https://assetgen.github.io", "keywords": "text to 3d;3d generative models;sparse view reconstruction;3d shape generation", "primary_area": "generative_models", "supplementary_material": "/attachment/77be05df3edba3e80f4403a9518534d5d790305b.zip", "author": "Yawar Siddiqui;Tom Monnier;Filippos Kokkinos;Mahendra Kariya;Yanir Kleiman;Emilien Garreau;Oran Gafni;Natalia Neverova;Andrea Vedaldi;Roman Shapovalov;David Novotny", "authorids": "~Yawar_Siddiqui1;~Tom_Monnier1;~Filippos_Kokkinos1;~Mahendra_Kariya1;~Yanir_Kleiman1;~Emilien_Garreau1;~Oran_Gafni1;~Natalia_Neverova1;~Andrea_Vedaldi1;~Roman_Shapovalov1;~David_Novotny1", "gender": "M;M;M;M;M;;;F;M;M;M", "homepage": ";https://www.tmonnier.com;https://fkokkinos.github.io/;;http://www.yanirk.com;https://github.com/EmGarr;;https://nneverova.github.io/;https://www.robots.ox.ac.uk/~vedaldi/;http://shapovalov.ro;https://d-novotny.github.io/", "dblp": "213/7787;267/9457;186/7080;;01/1609;;;119/1495;99/2825;82/9941;161/9863", "google_scholar": "u26UK5QAAAAJ;ZfV1DqMAAAAJ;uuXQjUIAAAAJ;;ZbsbJhoAAAAJ;;;https://scholar.google.fr/citations?user=cLPaHcIAAAAJ;bRT7t28AAAAJ;icaQKyIAAAAJ;2glXz7cAAAAJ", "orcid": ";;;;0000-0002-6004-1299;;;;0000-0003-1374-2858;;", "linkedin": "yawarnihal/;;;mahendrakariya/;;;;;;;", "or_profile": "~Yawar_Siddiqui1;~Tom_Monnier1;~Filippos_Kokkinos1;~Mahendra_Kariya1;~Yanir_Kleiman1;~Emilien_Garreau1;~Oran_Gafni1;~Natalia_Neverova1;~Andrea_Vedaldi1;~Roman_Shapovalov1;~David_Novotny1", "aff": "Technical University Munich;Meta Facebook;Meta AI;Meta Facebook;Meta AI;;;Meta GenAI;Meta;Meta;Meta", "aff_domain": "tum.de;facebook.com;fb.com;facebook.com;meta.com;;;meta.com;meta.com;meta.com;meta.com", "position": "PhD student;Researcher;Researcher;Researcher;Researcher;;;Principal Researcher;Researcher;Engineer;Researcher", "bibtex": "@inproceedings{\nsiddiqui2024meta,\ntitle={Meta 3D AssetGen: Text-to-Mesh Generation with High-Quality Geometry, Texture, and {PBR} Materials},\nauthor={Yawar Siddiqui and Tom Monnier and Filippos Kokkinos and Mahendra Kariya and Yanir Kleiman and Emilien Garreau and Oran Gafni and Natalia Neverova and Andrea Vedaldi and Roman Shapovalov and David Novotny},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=M3BIsgGQNb}\n}", "github": "", "reviewers": "Xevg;cPQZ;Zgd2;z8qF", "pdf_size": 12949559, "rating": "6;6;6;7", "confidence": "4;4;4;5", "soundness": "3;4;3;4", "novelty": "3;3;3;3", "presentation": "4;4;4;4", "wc_summary": "95;54;72;100", "wc_strengths": "35;22;49;134", "wc_weaknesses": "43;76;156;142", "wc_questions": "26;84;46;177", "wc_limitations": "20;19;14;8", "wc_review": "219;255;337;561", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "67;67;67;67", "reply_reviewers": "0;0;0;0", "reply_authors": "2;2;2;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 4.0, 0.0 ], "wc_summary_avg": [ 80.25, 18.471261462065875 ], "wc_strengths_avg": [ 60.0, 43.777848279695064 ], "wc_weaknesses_avg": [ 104.25, 46.51007955271631 ], "wc_questions_avg": [ 83.25, 57.99730597191563 ], "wc_limitations_avg": [ 15.25, 4.763139720814412 ], "wc_review_avg": [ 343.0, 132.92855223765886 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 67.0, 0.0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16613287370270193685&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "tum.de;facebook.com;fb.com;facebook.com;meta.com;;;meta.com;meta.com;meta.com;meta.com", "author_num": 11, "aff_unique_index": "0;1;1;1;1;1;1;1;1", "aff_unique_norm": "Technical University of Munich;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://www.tum.de;https://meta.com", "aff_unique_abbr": "TUM;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1;1;1;1;1", "aff_country_unique": "Germany;United States" }, { "title": "Meta-Controller: Few-Shot Imitation of Unseen Embodiments and Tasks in Continuous Control", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95546", "id": "M5D5rMwLjj", "proceeding": "", "pdf": "https://openreview.net/pdf?id=M5D5rMwLjj", "openreview": "https://openreview.net/forum?id=M5D5rMwLjj", "poster": "", "project": "", "author_site": "Seongwoong Cho, Donggyun Kim, Jinwoo Lee, Seunghoon Hong", "tldr": "", "abstract": "Generalizing across robot embodiments and tasks is crucial for adaptive robotic systems. Modular policy learning approaches adapt to new embodiments but are limited to specific tasks, while few-shot imitation learning (IL) approaches often focus on a single embodiment.\nIn this paper, we introduce a few-shot behavior cloning framework to simultaneously generalize to unseen embodiments and tasks using a few (e.g., five) reward-free demonstrations. Our framework leverages a joint-level input-output representation to unify the state and action spaces of heterogeneous embodiments and employs a novel structure-motion state encoder that is parameterized to capture both shared knowledge across all embodiments and embodiment-specific knowledge. A matching-based policy network then predicts actions from a few demonstrations, producing an adaptive policy that is robust to over-fitting. Evaluated in the DeepMind Control suite, our framework termed Meta-Controller demonstrates superior few-shot generalization to unseen embodiments and tasks over modular policy learning and few-shot IL approaches.", "keywords": "few-shot learning;imitation learning;behavior cloning;transformers;meta-learning", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Seongwoong Cho;Donggyun Kim;Jinwoo Lee;Seunghoon Hong", "authorids": "~Seongwoong_Cho1;~Donggyun_Kim1;~Jinwoo_Lee2;~Seunghoon_Hong2", "gender": "M;;M;M", "homepage": "https://www.github.com/seongwoongcho;;https://github.com/SaintExup3ry;https://maga33.github.io/", "dblp": ";;;142/3014.html", "google_scholar": ";g_CtB50AAAAJ;;hvr3ALkAAAAJ", "orcid": ";;;", "linkedin": ";%EB%8F%99%EA%B7%A0-%EA%B9%80-37a890187/;;seunghoon-hong-194489a4/", "or_profile": "~Seongwoong_Cho1;~Donggyun_Kim1;~Jinwoo_Lee2;~Seunghoon_Hong1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "position": "MS student;PhD student;Undergrad student;Associate Professor", "bibtex": "@inproceedings{\ncho2024metacontroller,\ntitle={Meta-Controller: Few-Shot Imitation of Unseen Embodiments and Tasks in Continuous Control},\nauthor={Seongwoong Cho and Donggyun Kim and Jinwoo Lee and Seunghoon Hong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=M5D5rMwLjj}\n}", "github": "", "reviewers": "TTwU;iQ1f;dCFx;EmwS", "pdf_size": 12040476, "rating": "4;5;6;7", "confidence": "4;4;4;4", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "3;2;3;4", "wc_summary": "64;48;52;161", "wc_strengths": "81;39;36;189", "wc_weaknesses": "49;67;56;182", "wc_questions": "100;107;87;100", "wc_limitations": "56;17;5;2", "wc_review": "350;278;236;634", "wc_reply_reviewers": "438;71;13;13", "wc_reply_authors": "413;309;0;0", "reply_reviewers": "2;2;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 81.25, 46.418611569067856 ], "wc_strengths_avg": [ 86.25, 61.93292742959919 ], "wc_weaknesses_avg": [ 88.5, 54.36221113972462 ], "wc_questions_avg": [ 98.5, 7.22841614740048 ], "wc_limitations_avg": [ 20.0, 21.529050141610984 ], "wc_review_avg": [ 374.5, 155.26992625746945 ], "wc_reply_reviewers_avg": [ 133.75, 177.24753171765187 ], "wc_reply_authors_avg": [ 180.5, 184.2070845543135 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:6ViN6MG_zgAJ:scholar.google.com/&scioq=Meta-Controller:+Few-Shot+Imitation+of+Unseen+Embodiments+and+Tasks+in+Continuous+Control&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "$\\texttt{Model-GLUE}$: Democratized LLM Scaling for A Large Model Zoo in the Wild", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97737", "id": "M5JW7O9vc7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=M5JW7O9vc7", "openreview": "https://openreview.net/forum?id=M5JW7O9vc7", "poster": "/media/PosterPDFs/NeurIPS%202024/97737.png?t=1733940896.1045895", "project": "", "author_site": "Xinyu Zhao, Guoheng Sun, Ruisi Cai, Yukun Zhou, Pingzhi Li, Peihao Wang, Bowen Tan, Yexiao He, Li Chen, Yi Liang, Beidi Chen, Binhang Yuan, Hongyi Wang, Ang Li, Zhangyang "Atlas" Wang, Tianlong Chen", "tldr": "", "abstract": "As Large Language Models (LLMs) excel across tasks and specialized domains, scaling LLMs based on existing models has gained significant attention, which is challenged by potential performance drop when combining disparate models. \nVarious techniques have been proposed to aggregate pre-trained LLMs, including model merging, Mixture-of-Experts, and stacking. Despite their merits, a comprehensive comparison and synergistic application of them to a diverse model zoo is yet to be adequately addressed.\nIn light of this research gap, this paper introduces $\\texttt{Model-GLUE}$, a holistic LLM scaling guideline. \nFirst, our work starts with a benchmarking of existing LLM scaling techniques, especially selective merging, and variants of mixture. \nUtilizing the insights from the benchmark results, we formulate a strategy for the selection and aggregation of a heterogeneous model zoo characterizing different architectures and initialization.\nOur methodology involves clustering mergeable models, selecting a merging strategy, and integrating model clusters through model-level mixture. Finally, evidenced by our experiments on a diverse Llama-2-based model zoo, $\\texttt{Model-GLUE}$ shows an average performance enhancement of 5.61\\%, achieved without additional training.\nCodes are available at https://github.com/Model-GLUE/Model-GLUE.", "keywords": "Large Language Models;Merging;Mixture of Experts", "primary_area": "", "supplementary_material": "", "author": "Xinyu Zhao;Guoheng Sun;Ruisi Cai;Yukun Zhou;Pingzhi Li;Peihao Wang;Bowen Tan;Yexiao He;Li Chen;Yi Liang;Beidi Chen;Binhang Yuan;Hongyi Wang;Ang Li;Zhangyang Wang;Tianlong Chen", "authorids": "~Xinyu_Zhao3;~Guoheng_Sun1;~Ruisi_Cai1;~Yukun_Zhou4;~Pingzhi_Li1;~Peihao_Wang1;~Bowen_Tan2;~Yexiao_He1;~Li_Chen3;~Yi_Liang1;~Beidi_Chen1;~Binhang_Yuan1;~Hongyi_Wang1;~Ang_Li6;~Zhangyang_Wang1;~Tianlong_Chen1", "gender": ";M;F;M;M;M;M;M;F;M;F;M;M;M;M;M", "homepage": "https://zhaocinyu.github.io/;;https://cairuisi.github.io;https://marcoszyk.github.io/;https://pingzhili.github.io;https://peihaowang.github.io/;https://bowentan.me;;;https://research.google/people/108265/;https://www.andrew.cmu.edu/user/beidic/;https://binhangyuan.github.io/site/;https://hwang595.github.io/;https://www.ang-li.com;https://vita-group.github.io;https://tianlong-chen.github.io", "dblp": ";;341/1491;;358/4405;239/4075;;;c/LiChen;;192/1339;141/0690.html;15/832-1.html;33/2805-5;119/4026;", "google_scholar": "https://scholar.google.com/citations?hl=en;fMnmSXsAAAAJ;B0chY1AAAAAJ;;QUfhEyQAAAAJ;fqf2tBsAAAAJ;;https://scholar.google.com/citations?hl=en;;9vQ7gbgAAAAJ;;TflKxcIAAAAJ;zYdZORsAAAAJ;JVKSaWIAAAAJ;pxFyKAIAAAAJ;LE3ctn0AAAAJ", "orcid": "0009-0000-0253-5488;;;;;;;;;0000-0002-6622-8919;;0000-0002-3188-2769;;;;0000-0001-7774-8197", "linkedin": ";guoheng-sun-1b640126a/;;;pingzhili/;peihao-wang-25a411162/;;;;;;;hongyi-wang-b89651102/;;;tianlong-chen-783862167/", "or_profile": "~Xinyu_Zhao3;~Guoheng_Sun1;~Ruisi_Cai1;~Yukun_Zhou4;~Pingzhi_Li1;~Peihao_Wang1;~Bowen_Tan2;~Yexiao_He1;~Li_Chen3;~Yi_Liang1;~Beidi_Chen1;~Binhang_Yuan1;~Hongyi_Wang1;~Ang_Li6;~Zhangyang_Wang1;~Tianlong_Chen1", "aff": "Peking University;Sichuan University;University of Texas at Austin;Tsinghua University;University of North Carolina at Chapel Hill;University of Texas, Austin;Carnegie Mellon University;University of Maryland, College Park;;Research, Google;Meta Facebook;Hong Kong University of Science and Technology;Carnegie Mellon University;Duke University;University of Texas at Austin;Harvard University", "aff_domain": "pku.edu.cn;scu.edu.cn;utexas.edu;mails.tsinghua.edu.cn;unc.edu;utexas.edu;cmu.edu;umd.edu;;research.google.com;fb.com;ust.hk;andrew.cmu.edu;duke.edu;utexas.edu;harvard.edu", "position": "MS student;Undergrad student;PhD student;MS student;PhD student;PhD student;PhD student;PhD student;;Researcher;Researcher;Assistant Professor;Researcher;PhD student;Associate Professor;Postdoc", "bibtex": "@inproceedings{\nzhao2024textttmodelglue,\ntitle={\\${\\textbackslash}texttt\\{Model-{GLUE}\\}\\$: Democratized {LLM} Scaling for A Large Model Zoo in the Wild},\nauthor={Xinyu Zhao and Guoheng Sun and Ruisi Cai and Yukun Zhou and Pingzhi Li and Peihao Wang and Bowen Tan and Yexiao He and Li Chen and Yi Liang and Beidi Chen and Binhang Yuan and Hongyi Wang and Ang Li and Zhangyang Wang and Tianlong Chen},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=M5JW7O9vc7}\n}", "github": "", "reviewers": "9iRC;5WA2;Qy6M", "pdf_size": 963113, "rating": "6;6;7", "confidence": "2;4;2", "wc_summary_and_contributions": "45;58;90", "wc_strengths": "44;12;2", "wc_improvement": "83;4;2", "wc_limitations": "55;4;14", "wc_correctness": "22;4;1", "wc_clarity": "157;13;1", "wc_relation_to_prior_work": "24;34;1", "wc_documentation": "24;7;3", "wc_additional_feedback": "1;1;1", "wc_review": "455;137;115", "wc_reply_reviewers": "0;74;0", "wc_reply_authors": "0;36;0", "reply_reviewers": "0;2;0", "reply_authors": "1;7;3", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 2.6666666666666665, 0.9428090415820634 ], "wc_summary_and_contributions_avg": [ 64.33333333333333, 18.909139471577113 ], "wc_strengths_avg": [ 19.333333333333332, 17.9133717900592 ], "wc_improvement_avg": [ 29.666666666666668, 37.72119946248911 ], "wc_limitations_avg": [ 24.333333333333332, 22.065558884580486 ], "wc_correctness_avg": [ 9.0, 9.273618495495704 ], "wc_clarity_avg": [ 57.0, 70.8801805866774 ], "wc_relation_to_prior_work_avg": [ 19.666666666666668, 13.816254517375139 ], "wc_documentation_avg": [ 11.333333333333334, 9.104333522498441 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 235.66666666666666, 155.35193021724004 ], "wc_reply_reviewers_avg": [ 24.666666666666668, 34.883934538536344 ], "wc_reply_authors_avg": [ 12.0, 16.97056274847714 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.9428090415820634 ], "reply_authors_avg": [ 3.6666666666666665, 2.494438257849294 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 16, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12333752382588164878&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 3, "email": "pku.edu.cn;scu.edu.cn;utexas.edu;mails.tsinghua.edu.cn;unc.edu;utexas.edu;cmu.edu;umd.edu;;research.google.com;fb.com;ust.hk;andrew.cmu.edu;duke.edu;utexas.edu;harvard.edu", "author_num": 16, "aff_unique_index": "0;1;2;3;4;2;5;6;7;8;9;5;10;2;11", "aff_unique_norm": "Peking University;Sichuan University;University of Texas at Austin;Tsinghua University;University of North Carolina;Carnegie Mellon University;University of Maryland;Google;Meta;Hong Kong University of Science and Technology;Duke University;Harvard University", "aff_unique_dep": ";;;;;;;Google Research;Meta Platforms, Inc.;;;", "aff_unique_url": "http://www.pku.edu.cn;https://www.scu.edu.cn;https://www.utexas.edu;https://www.tsinghua.edu.cn;https://www.unc.edu;https://www.cmu.edu;https://www/umd.edu;https://research.google;https://meta.com;https://www.ust.hk;https://www.duke.edu;https://www.harvard.edu", "aff_unique_abbr": "Peking U;SCU;UT Austin;THU;UNC;CMU;UMD;Google;Meta;HKUST;Duke;Harvard", "aff_campus_unique_index": "1;2;1;3;4;5;1", "aff_campus_unique": ";Austin;Chapel Hill;College Park;Mountain View;Hong Kong SAR", "aff_country_unique_index": "0;0;1;0;1;1;1;1;1;1;0;1;1;1;1", "aff_country_unique": "China;United States" }, { "title": "Structured Multi-Track Accompaniment Arrangement via Style Prior Modelling", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95545", "id": "M75dBr10dZ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=M75dBr10dZ", "openreview": "https://openreview.net/forum?id=M75dBr10dZ", "poster": "/media/PosterPDFs/NeurIPS%202024/95545.png?t=1731045529.6893349", "project": "", "author_site": "Jingwei Zhao, Gus Xia, Ziyu Wang, Ye Wang", "tldr": "", "abstract": "In the realm of music AI, arranging rich and structured multi-track accompaniments from a simple lead sheet presents significant challenges. Such challenges include maintaining track cohesion, ensuring long-term coherence, and optimizing computational efficiency. In this paper, we introduce a novel system that leverages prior modelling over disentangled style factors to address these challenges. Our method presents a two-stage process: initially, a piano arrangement is derived from the lead sheet by retrieving piano texture styles; subsequently, a multi-track orchestration is generated by infusing orchestral function styles into the piano arrangement. Our key design is the use of vector quantization and a unique multi-stream Transformer to model the long-term flow of the orchestration style, which enables flexible, controllable, and structured music generation. Experiments show that by factorizing the arrangement task into interpretable sub-stages, our approach enhances generative capacity while improving efficiency. Additionally, our system supports a variety of music genres and provides style control at different composition hierarchies. We further show that our system achieves superior coherence, structure, and overall arrangement quality compared to existing baselines.", "keywords": "symbolic music generation;style transfer;accompaniment arrangement", "primary_area": "generative_models", "supplementary_material": "/attachment/d37e25a955616209ab93f5a8085082ccbc2e195e.zip", "author": "Jingwei Zhao;Gus Xia;Ziyu Wang;Ye Wang", "authorids": "~Jingwei_Zhao2;~Gus_Xia1;~Ziyu_Wang10;~Ye_Wang3", "gender": "M;M;M;M", "homepage": "https://zhaojw1998.github.io/;http://www.musicxlab.com;;https://smcnus.comp.nus.edu.sg/", "dblp": "188/2761;;;44/6292-7", "google_scholar": "https://scholar.google.ca/citations?hl=en;;Y0phDUIAAAAJ;https://scholar.google.com.sg/citations?user=CdgLLL8AAAAJ", "orcid": ";;;0000-0002-0123-1260", "linkedin": "jingweizhao1998/;;;", "or_profile": "~Jingwei_Zhao2;~Gus_Xia1;~Ziyu_Wang10;~Ye_Wang3", "aff": "National University of Singapore;New York University;New York University;National University of Singapore", "aff_domain": "u.nus.edu;nyu.edu;nyu.edu;nus.edu.sg", "position": "PhD student;Assistant Professor;PhD student;Associate Professor", "bibtex": "@inproceedings{\nzhao2024structured,\ntitle={Structured Multi-Track Accompaniment Arrangement via Style Prior Modelling},\nauthor={Jingwei Zhao and Gus Xia and Ziyu Wang and Ye Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=M75dBr10dZ}\n}", "github": "", "reviewers": "ANaR;AQ7F;Y3LG;iTts", "pdf_size": 3020039, "rating": "6;7;7;8", "confidence": "4;4;4;4", "soundness": "4;3;4;4", "novelty": "3;3;3;4", "presentation": "2;3;4;3", "wc_summary": "92;81;90;204", "wc_strengths": "44;41;79;221", "wc_weaknesses": "40;125;112;811", "wc_questions": "197;216;72;23", "wc_limitations": "138;3;13;7", "wc_review": "511;466;366;1266", "wc_reply_reviewers": "0;56;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 116.75, 50.543916547889324 ], "wc_strengths_avg": [ 96.25, 73.5573755649289 ], "wc_weaknesses_avg": [ 272.0, 312.87137932383655 ], "wc_questions_avg": [ 127.0, 81.64251343509703 ], "wc_limitations_avg": [ 40.25, 56.548098995456954 ], "wc_review_avg": [ 652.25, 358.21388513009936 ], "wc_reply_reviewers_avg": [ 14.0, 24.24871130596428 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17754173913265176348&as_sdt=4005&sciodt=0,6&hl=en", "gs_version_total": 4, "email": "u.nus.edu;nyu.edu;nyu.edu;nus.edu.sg", "author_num": 4, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "National University of Singapore;New York University", "aff_unique_dep": ";", "aff_unique_url": "https://www.nus.edu.sg;https://www.nyu.edu", "aff_unique_abbr": "NUS;NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "Singapore;United States" }, { "id": "M7SO74I9mo", "title": "The RealHumanEval: Evaluating Large Language Models' Abilities to Support Programmers", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "Evaluation of large language models for code has primarily relied on static benchmarks, including HumanEval, or more recently using human preferences of LLM responses. As LLMs are increasingly used as programmer assistants, we study whether gains on existing benchmarks or more preferred LLM responses translate to programmer productivity when coding with LLMs, including time spent coding. We introduce RealHumanEval, a web interface to measure the ability of LLMs to assist programmers, through either autocomplete or chat support. We conducted a user study (N=213) using RealHumanEval in which users interacted with six LLMs of varying base model performance. Despite static benchmarks not incorporating humans-in-the-loop, we find that improvements in benchmark performance lead to increased programmer productivity; however gaps in benchmark versus human performance are not proportional---a trend that holds across both forms of LLM support. In contrast, we find that programmer preferences do not correlate with their actual performance, motivating the need for better, human-centric proxy signals. We also open-source RealHumanEval to enable human-centric evaluation of new models and the study data to facilitate efforts to improve code models.", "keywords": "code LLM;evaluation;ai-assisted programming", "primary_area": "", "supplementary_material": "/attachment/ff418c7f10b065ceb9484b26e5572e7a3e8574f0.zip", "author": "Hussein Mozannar;Valerie Chen;Mohammed Alsobay;Subhro Das;Sebastian Zhao;Dennis Wei;Manish Nagireddy;Prasanna Sattigeri;Ameet Talwalkar;David Sontag", "authorids": "~Hussein_Mozannar1;~Valerie_Chen2;~Mohammed_Alsobay1;~Subhro_Das1;~Sebastian_Zhao1;~Dennis_Wei1;~Manish_Nagireddy1;~Prasanna_Sattigeri1;~Ameet_Talwalkar1;~David_Sontag1", "gender": "M;F;;;M;M;M;;M;M", "homepage": "https://husseinmozannar.github.io/;https://valeriechen.github.io/;;;;https://sites.google.com/site/dennislwei/;https://research.ibm.com/people/manish-nagireddy;;http://www.cs.cmu.edu/~atalwalk/;http://people.csail.mit.edu/dsontag/", "dblp": ";234/6033;;;;59/8761;318/9229;00/7428;56/5528;12/673", "google_scholar": "XCfZyIkAAAAJ;94yn2j0AAAAJ;;;;r4ldy4AAAAAJ;15MCWDUAAAAJ;m-s38ikAAAAJ;https://scholar.google.com.tw/citations?user=TW7U1W0AAAAJ;LfcroyAAAAAJ", "orcid": ";;0000-0001-5350-2061;;;;;0000-0003-4435-0486;;0000-0002-5034-7796", "linkedin": ";;;;https://linkedin.com/in/sebbyzhao;dennis-wei-4886036b/;mnagireddy/;prasannasattigeri/;;", "or_profile": "~Hussein_Mozannar1;~Valerie_Chen2;~Mohammed_Alsobay1;~Subhro_Das1;~Sebastian_Zhao1;~Dennis_Wei1;~Manish_Nagireddy1;~Prasanna_Sattigeri1;~Ameet_Talwalkar1;~David_Sontag1", "aff": "Massachusetts Institute of Technology;Carnegie Mellon University;Massachusetts Institute of Technology;;UC Berkeley, University of California, Berkeley;International Business Machines;IBM Research;IBM Research;Carnegie Mellon University;Massachusetts Institute of Technology", "aff_domain": "mit.edu;cmu.edu;mit.edu;;cs.berkeley.edu;ibm.com;ibm.com;ibm.com;cmu.edu;mit.edu", "position": "PhD student;PhD student;PhD student;;Undergrad student;Research Staff Member;Researcher;Researcher;Associate Professor;Assistant Professor", "bibtex": "@misc{\nanonymous2024the,\ntitle={The RealHumanEval: Evaluating Large Language Models' Abilities to Support Programmers},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=M7SO74I9mo}\n}", "github": "", "project": "", "reviewers": "eCRo;37bo;nsQc", "site": "https://openreview.net/forum?id=M7SO74I9mo", "pdf_size": 2284870, "rating": "3;7;7", "confidence": "4;4;3", "wc_summary_and_contributions": "47;87;90", "wc_strengths": "80;82;43", "wc_improvement": "52;229;9", "wc_limitations": "26;62;35", "wc_correctness": "1;15;16", "wc_clarity": "1;16;7", "wc_relation_to_prior_work": "1;15;12", "wc_documentation": "1;9;49", "wc_additional_feedback": "1;1;1", "wc_review": "210;516;262", "wc_reply_reviewers": "385;0;29", "wc_reply_authors": "678;0;0", "reply_reviewers": "1;0;1", "reply_authors": "3;1;1", "rating_avg": [ 5.666666666666667, 1.8856180831641267 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 74.66666666666667, 19.601587237318874 ], "wc_strengths_avg": [ 68.33333333333333, 17.93197020841702 ], "wc_improvement_avg": [ 96.66666666666667, 95.20620894785287 ], "wc_limitations_avg": [ 41.0, 15.297058540778355 ], "wc_correctness_avg": [ 10.666666666666666, 6.847546194724712 ], "wc_clarity_avg": [ 8.0, 6.164414002968976 ], "wc_relation_to_prior_work_avg": [ 9.333333333333334, 6.018490028422597 ], "wc_documentation_avg": [ 19.666666666666668, 20.997354330698162 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 329.3333333333333, 133.6895242301688 ], "wc_reply_reviewers_avg": [ 138.0, 175.05618145802984 ], "wc_reply_authors_avg": [ 226.0, 319.6122650963195 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6862392999159553317&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;0;2;3;4;4;1;0", "aff_unique_norm": "Massachusetts Institute of Technology;Carnegie Mellon University;University of California, Berkeley;International Business Machines Corporation;IBM", "aff_unique_dep": ";;;;IBM Research", "aff_unique_url": "https://web.mit.edu;https://www.cmu.edu;https://www.berkeley.edu;https://www.ibm.com;https://www.ibm.com/research", "aff_unique_abbr": "MIT;CMU;UC Berkeley;IBM;IBM", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Building a stable classifier with the inflated argmax", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95544", "id": "M7zNXntzsp", "proceeding": "", "pdf": "https://openreview.net/pdf?id=M7zNXntzsp", "openreview": "https://openreview.net/forum?id=M7zNXntzsp", "poster": "", "project": "", "author_site": "Jake Soloff, Rina Barber, Rebecca Willett", "tldr": "", "abstract": "We propose a new framework for algorithmic stability in the context of multiclass classification. In practice, classification algorithms often operate by first assigning a continuous score (for instance, an estimated probability) to each possible label, then taking the maximizer---i.e., selecting the class that has the highest score. A drawback of this type of approach is that it is inherently unstable, meaning that it is very sensitive to slight perturbations of the training data, since taking the maximizer is discontinuous. Motivated by this challenge, we propose a pipeline for constructing stable classifiers from data, using bagging (i.e., resampling and averaging) to produce stable continuous scores, and then using a stable relaxation of argmax, which we call the \"inflated argmax\", to convert these scores to a set of candidate labels. The resulting stability guarantee places no distributional assumptions on the data, does not depend on the number of classes or dimensionality of the covariates, and holds for any base classifier. Using a common benchmark data set, we demonstrate that the inflated argmax provides necessary protection against unstable classifiers, without loss of accuracy.", "keywords": "Algorithmic stability;set-valued classification;bagging", "primary_area": "learning_theory", "supplementary_material": "/attachment/288bf8559376bbc14b957f1665d80f42b3278481.zip", "author": "Jake A Soloff;Rina Foygel Barber;Rebecca Willett", "authorids": "~Jake_A_Soloff1;~Rina_Foygel_Barber2;~Rebecca_Willett1", "gender": ";F;F", "homepage": "http://jake-soloff.github.io/;https://willett.psd.uchicago.edu/;http://www.stat.uchicago.edu/~rina", "dblp": ";w/RebeccaWillett;", "google_scholar": ";bGRVPl8AAAAJ;", "orcid": ";0000-0002-8109-7582;", "linkedin": ";rebecca-willett-90b95973/;", "or_profile": "~Jake_A_Soloff1;~Rebecca_Willett1;~Rina_Barber1", "aff": "University of Chicago;University of Chicago;University of Chicago", "aff_domain": "uchicago.edu;uchicago.edu;uchicago.edu", "position": "Postdoc;Full Professor;Full Professor", "bibtex": "@inproceedings{\nsoloff2024building,\ntitle={Building a stable classifier with the inflated argmax},\nauthor={Jake A Soloff and Rina Foygel Barber and Rebecca Willett},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=M7zNXntzsp}\n}", "github": "", "reviewers": "mVCr;sBU3;LzW3;m1xX;FNaQ", "pdf_size": 906296, "rating": "4;6;7;7;7", "confidence": "3;3;3;4;3", "soundness": "2;3;3;3;3", "novelty": "2;2;3;2;3", "presentation": "2;2;3;3;3", "wc_summary": "108;42;91;137;417", "wc_strengths": "24;41;129;151;106", "wc_weaknesses": "258;157;100;145;302", "wc_questions": "24;54;342;68;133", "wc_limitations": "12;7;13;1;35", "wc_review": "426;301;675;502;993", "wc_reply_reviewers": "0;80;226;51;569", "wc_reply_authors": "0;19;19;0;756", "reply_reviewers": "0;1;1;1;2", "reply_authors": "1;2;2;1;3", "rating_avg": [ 6.2, 1.16619037896906 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 159.0, 132.6363449436089 ], "wc_strengths_avg": [ 90.2, 49.507171197716396 ], "wc_weaknesses_avg": [ 192.4, 75.30365196987461 ], "wc_questions_avg": [ 124.2, 114.57818291454966 ], "wc_limitations_avg": [ 13.6, 11.516944039110374 ], "wc_review_avg": [ 579.4, 239.6802870492273 ], "wc_reply_reviewers_avg": [ 185.2, 206.07901397279636 ], "wc_reply_authors_avg": [ 158.8, 298.7208730571066 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.34299717028501764, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5279726212343778091&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "uchicago.edu;uchicago.edu;uchicago.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Chicago", "aff_unique_dep": "", "aff_unique_url": "https://www.uchicago.edu", "aff_unique_abbr": "UChicago", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Scaling Sign Language Translation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95543", "id": "M80WgiO2Lb", "proceeding": "", "pdf": "https://openreview.net/pdf?id=M80WgiO2Lb", "openreview": "https://openreview.net/forum?id=M80WgiO2Lb", "poster": "/media/PosterPDFs/NeurIPS%202024/95543.png?t=1731710208.80966", "project": "", "author_site": "Biao Zhang, Garrett Tanzer, Orhan Firat", "tldr": "", "abstract": "Sign language translation (SLT) addresses the problem of translating information from a sign language in video to a spoken language in text. Existing studies, while showing progress, are often limited to narrow domains and/or few sign languages and struggle with open-domain tasks. In this paper, we push forward the frontier of SLT by scaling pretraining data, model size, and number of translation directions. We perform large-scale SLT pretraining on different data including 1) noisy multilingual Youtube SLT data,\n2) parallel text corpora, and 3) SLT data augmented by translating video captions to other languages with off-the-shelf machine translation models. We unify different pretraining tasks with task-specific prompts under the encoder-decoder architecture, and initialize the SLT model with pretrained (m/By)T5 models across model sizes. SLT pretraining results on How2Sign and FLEURS-ASL\\#0 (ASL to 42 spoken languages) demonstrate the significance of data/model scaling and cross-lingual cross-modal transfer, as well as the feasibility of zero-shot SLT. We finetune the pretrained SLT models on 5 downstream open-domain SLT benchmarks covering 5 sign languages. Experiments show substantial quality improvements over the vanilla baselines, surpassing the previous state-of-the-art (SOTA) by wide margins.", "keywords": "Sign Language Translation;Model and Data Scaling;American Sign Language;Large-scale Pretraining;Video-to-text generation", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Biao Zhang;Garrett Tanzer;Orhan Firat", "authorids": "~Biao_Zhang2;~Garrett_Tanzer1;~Orhan_Firat1", "gender": "M;M;M", "homepage": ";;", "dblp": "https://dblp.uni-trier.de/pers/hd/z/Zhang_0002:Biao;238/9928.html;120/2225", "google_scholar": "gqPKjaIAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.tr/citations?user=dLaR9lgAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Biao_Zhang2;~Garrett_Tanzer1;~Orhan_Firat1", "aff": "Google DeepMind;Google;Google", "aff_domain": "google.com;google.com;google.com", "position": "Researcher;Researcher;Research Scientist", "bibtex": "@inproceedings{\nzhang2024scaling,\ntitle={Scaling Sign Language Translation},\nauthor={Biao Zhang and Garrett Tanzer and Orhan Firat},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=M80WgiO2Lb}\n}", "github": "", "reviewers": "DP5W;XL93;jVKg;brKp", "pdf_size": 1173397, "rating": "6;6;7;7", "confidence": "4;4;4;5", "soundness": "3;2;4;4", "novelty": "3;3;3;3", "presentation": "3;3;4;3", "wc_summary": "152;67;66;60", "wc_strengths": "52;70;40;117", "wc_weaknesses": "220;117;24;82", "wc_questions": "73;2;22;26", "wc_limitations": "2;1;9;9", "wc_review": "499;257;161;294", "wc_reply_reviewers": "0;0;22;13", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 86.25, 38.05505879643336 ], "wc_strengths_avg": [ 69.75, 29.294837429144405 ], "wc_weaknesses_avg": [ 110.75, 71.28595583984267 ], "wc_questions_avg": [ 30.75, 26.03243169586737 ], "wc_limitations_avg": [ 5.25, 3.766629793329841 ], "wc_review_avg": [ 302.75, 123.26470500512302 ], "wc_reply_reviewers_avg": [ 8.75, 9.310612224768036 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7268831296726021332&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "google.com;google.com;google.com", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google DeepMind", "aff_unique_url": "https://deepmind.com", "aff_unique_abbr": "DeepMind", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "Improving robustness to corruptions with multiplicative weight perturbations", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95542", "id": "M8dy0ZuSb1", "proceeding": "", "pdf": "https://openreview.net/pdf?id=M8dy0ZuSb1", "openreview": "https://openreview.net/forum?id=M8dy0ZuSb1", "poster": "/media/PosterPDFs/NeurIPS%202024/95542.png?t=1734039792.423308", "project": "", "author_site": "Quoc Trung Trinh, Markus Heinonen, Luigi Acerbi, Samuel Kaski", "tldr": "", "abstract": "Deep neural networks (DNNs) excel on clean images but struggle with corrupted ones. Incorporating specific corruptions into the data augmentation pipeline can improve robustness to those corruptions but may harm performance on clean images and other types of distortion. In this paper, we introduce an alternative approach that improves the robustness of DNNs to a wide range of corruptions without compromising accuracy on clean images. We first demonstrate that input perturbations can be mimicked by multiplicative perturbations in the weight space. Leveraging this, we propose Data Augmentation via Multiplicative Perturbation (DAMP), a training method that optimizes DNNs under random multiplicative weight perturbations. We also examine the recently proposed Adaptive Sharpness-Aware Minimization (ASAM) and show that it optimizes DNNs under adversarial multiplicative weight perturbations. Experiments on image classification datasets (CIFAR-10/100, TinyImageNet and ImageNet) and neural network architectures (ResNet50, ViT-S/16, ViT-B/16) show that DAMP enhances model generalization performance in the presence of corruptions across different settings. Notably, DAMP is able to train a ViT-S/16 on ImageNet from scratch, reaching the top-1 error of 23.7% which is comparable to ResNet50 without extensive data augmentations.", "keywords": "covariate shift;corruption robustness;generalization;regularization;training method;deep learning", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Trung Trinh;Markus Heinonen;Luigi Acerbi;Samuel Kaski", "authorids": "~Trung_Trinh1;~Markus_Heinonen1;~Luigi_Acerbi1;~Samuel_Kaski1", "gender": "M;M;M;M", "homepage": "https://trungtr.com;https://users.aalto.fi/~heinom10/;http://luigiacerbi.com/;https://people.aalto.fi/samuel.kaski", "dblp": "323/9628;22/7709;72/1450;64/5826", "google_scholar": "QL_WEh8AAAAJ;hFtfHZoAAAAJ;https://scholar.google.co.uk/citations?user=QYBZoGwAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0001-7471-7336;0000-0003-1925-9154", "linkedin": ";;luigi-acerbi-719b492/;samuel-kaski-27790/", "or_profile": "~Trung_Trinh1;~Markus_Heinonen1;~Luigi_Acerbi1;~Samuel_Kaski1", "aff": "Aalto University;Aalto University;University of Helsinki;Aalto University", "aff_domain": "aalto.fi;aalto.fi;helsinki.fi;aalto.fi", "position": "PhD student;Researcher;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\ntrinh2024improving,\ntitle={Improving robustness to corruptions with multiplicative weight perturbations},\nauthor={Trung Trinh and Markus Heinonen and Luigi Acerbi and Samuel Kaski},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=M8dy0ZuSb1}\n}", "github": "", "reviewers": "XEeL;dwU7;Vu1M;dM3x", "pdf_size": 634762, "rating": "5;6;6;6", "confidence": "4;4;3;2", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "2;4;3;3", "wc_summary": "16;64;95;74", "wc_strengths": "8;46;97;58", "wc_weaknesses": "96;205;400;79", "wc_questions": "66;47;2;47", "wc_limitations": "4;1;13;28", "wc_review": "190;363;607;286", "wc_reply_reviewers": "424;99;29;33", "wc_reply_authors": "1403;118;19;19", "reply_reviewers": "3;1;1;1", "reply_authors": "4;2;2;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 62.25, 28.951468011138918 ], "wc_strengths_avg": [ 52.25, 31.751968442917047 ], "wc_weaknesses_avg": [ 195.0, 127.84952092205899 ], "wc_questions_avg": [ 40.5, 23.542514733987108 ], "wc_limitations_avg": [ 11.5, 10.5 ], "wc_review_avg": [ 361.5, 154.42231056424458 ], "wc_reply_reviewers_avg": [ 146.25, 162.75038402412451 ], "wc_reply_authors_avg": [ 389.75, 586.3946516638773 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:MUt19YJm6asJ:scholar.google.com/&scioq=Improving+robustness+to+corruptions+with+multiplicative+weight+perturbations&hl=en&as_sdt=0,48", "gs_version_total": 6, "email": "aalto.fi;aalto.fi;helsinki.fi;aalto.fi", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Aalto University;University of Helsinki", "aff_unique_dep": ";", "aff_unique_url": "https://www.aalto.fi;https://www.helsinki.fi", "aff_unique_abbr": "Aalto;UH", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Finland" }, { "title": "Is Your HD Map Constructor Reliable under Sensor Corruptions?", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97736", "id": "M91nJrBrqG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=M91nJrBrqG", "openreview": "https://openreview.net/forum?id=M91nJrBrqG", "poster": "/media/PosterPDFs/NeurIPS%202024/97736.png?t=1733882296.4482534", "project": "", "author_site": "Xiaoshuai Hao, Mengchuan Wei, Yifan Yang, Haimei Zhao, Hui Zhang, Yi ZHOU, Qiang Wang, Weiming Li, Lingdong Kong, Jing Zhang", "tldr": "", "abstract": "Driving systems often rely on high-definition (HD) maps for precise environmental information, which is crucial for planning and navigation. While current HD map constructors perform well under ideal conditions, their resilience to real-world challenges, \\eg, adverse weather and sensor failures, is not well understood, raising safety concerns. This work introduces MapBench, the first comprehensive benchmark designed to evaluate the robustness of HD map construction methods against various sensor corruptions. Our benchmark encompasses a total of 29 types of corruptions that occur from cameras and LiDAR sensors. Extensive evaluations across 31 HD map constructors reveal significant performance degradation of existing methods under adverse weather conditions and sensor failures, underscoring critical safety concerns. We identify effective strategies for enhancing robustness, including innovative approaches that leverage multi-modal fusion, advanced data augmentation, and architectural techniques. These insights provide a pathway for developing more reliable HD map construction methods, which are essential for the advancement of autonomous driving technology. The benchmark toolkit and affiliated code and model checkpoints have been made publicly accessible.", "keywords": "HD Map Construction;Sensor Corruptions", "primary_area": "", "supplementary_material": "/attachment/bc4447fb38021e5878522fdfd8e5ee53ea8172d7.zip", "author": "Xiaoshuai Hao;Mengchuan Wei;Yifan Yang;Haimei Zhao;Hui Zhang;Yi ZHOU;Qiang Wang;Weiming Li;Lingdong Kong;Jing Zhang", "authorids": "~Xiaoshuai_Hao1;~Mengchuan_Wei1;~Yifan_Yang23;~Haimei_Zhao2;~Hui_Zhang10;~Yi_ZHOU12;~Qiang_Wang1;~Weiming_Li1;~Lingdong_Kong1;~Jing_Zhang17", "gender": "M;M;;F;M;F;M;M;;M", "homepage": "https://github.com/haoshuai714;https://github.com/weimengchuan;;;;;;;;", "dblp": "271/8403;;;231/1005;z/HuiZhang;;64/5630-23;;;05/3499-37.html", "google_scholar": "https://scholar.google.com.hk/citations?user=ui0lvY4AAAAJ;;;5EtQNJ4AAAAJ;;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=zh-CN;zRszq9gAAAAJ;;https://scholar.google.com/citations?hl=en", "orcid": ";;;;0000-0002-9912-1157;;0000-0001-5632-4408;;;0000-0001-6595-7661", "linkedin": ";;;;;yi-zhou-439b82137/;;weiming-li-b95a269;;", "or_profile": "~Xiaoshuai_Hao1;~Mengchuan_Wei1;~Yifan_Yang23;~Haimei_Zhao2;~Hui_Zhang10;~Yi_ZHOU12;~Qiang_Wang1;~Weiming_Li1;~Lingdong_Kong1;~Jing_Zhang17", "aff": "Samsung Rearch China-Beijing(SRCB);Samsung;;University of Sydney;Samsung Rearch China-Beijing(SRCB);Samsung;Samsung;Samsung;;The University of Sydney", "aff_domain": "samsung.com;samsung.com;;sydney.edu.au;samsung.com;samsung.com;samsung.com;samsung.com;;sydney.edu.au", "position": "Researcher;Researcher;;PhD student;Principle Researcher;Researcher;Researcher;Researcher;;Research Fellow", "bibtex": "@inproceedings{\nhao2024is,\ntitle={Is Your {HD} Map Constructor Reliable under Sensor Corruptions?},\nauthor={Xiaoshuai Hao and Mengchuan Wei and Yifan Yang and Haimei Zhao and Hui Zhang and Yi ZHOU and Qiang Wang and Weiming Li and Lingdong Kong and Jing Zhang},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=M91nJrBrqG}\n}", "github": "", "reviewers": "iTL7;XfNn;48Ly", "pdf_size": 6108803, "rating": "6;6;8", "confidence": "4;4;5", "wc_summary_and_contributions": "79;60;85", "wc_strengths": "185;70;6", "wc_improvement": "398;84;22", "wc_limitations": "118;13;1", "wc_correctness": "41;15;1", "wc_clarity": "73;8;1", "wc_relation_to_prior_work": "35;36;12", "wc_documentation": "11;21;1", "wc_additional_feedback": "1;1;1", "wc_review": "941;308;130", "wc_reply_reviewers": "27;10;0", "wc_reply_authors": "21;21;0", "reply_reviewers": "1;1;0", "reply_authors": "3;2;1", "rating_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 74.66666666666667, 10.656244908763854 ], "wc_strengths_avg": [ 87.0, 74.05853540724841 ], "wc_improvement_avg": [ 168.0, 164.5924259091732 ], "wc_limitations_avg": [ 44.0, 52.55473337388365 ], "wc_correctness_avg": [ 19.0, 16.57307052620807 ], "wc_clarity_avg": [ 27.333333333333332, 32.417416443771224 ], "wc_relation_to_prior_work_avg": [ 27.666666666666668, 11.08552609887726 ], "wc_documentation_avg": [ 11.0, 8.16496580927726 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 459.6666666666667, 348.0252225853114 ], "wc_reply_reviewers_avg": [ 12.333333333333334, 11.145502331533658 ], "wc_reply_authors_avg": [ 14.0, 9.899494936611665 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.9999999999999997, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17649818942247836119&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "samsung.com;samsung.com;;sydney.edu.au;samsung.com;samsung.com;samsung.com;samsung.com;;sydney.edu.au", "author_num": 10, "aff_unique_index": "0;0;1;0;0;0;0;1", "aff_unique_norm": "Samsung;University of Sydney", "aff_unique_dep": "Samsung Research China;", "aff_unique_url": "https://www.samsung.com/cn;https://www.sydney.edu.au", "aff_unique_abbr": "SRCB;USYD", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Beijing;", "aff_country_unique_index": "0;1;2;0;1;1;1;2", "aff_country_unique": "China;South Korea;Australia" }, { "title": "Online Classification with Predictions", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95541", "id": "MB0DD5qAz8", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MB0DD5qAz8", "openreview": "https://openreview.net/forum?id=MB0DD5qAz8", "poster": "", "project": "", "author_site": "Vinod Raman, Ambuj Tewari", "tldr": "", "abstract": "We study online classification when the learner has access to predictions about future examples. We design an online learner whose expected regret is never worse than the worst-case regret, gracefully improves with the quality of the predictions, and can be significantly better than the worst-case regret when the predictions of future examples are accurate. As a corollary, we show that if the learner is always guaranteed to observe data where future examples are easily predictable, then online learning can be as easy as transductive online learning. Our results complement recent work in online algorithms with predictions and smoothed online classification, which go beyond a worse-case analysis by using machine-learned predictions and distributional assumptions respectively.", "keywords": "Online Learning;Algorithms with Predictions", "primary_area": "online_learning", "supplementary_material": "", "author": "Vinod Raman;Ambuj Tewari", "authorids": "~Vinod_Raman1;~Ambuj_Tewari1", "gender": "M;M", "homepage": "https://vinodkraman.github.io;https://www.ambujtewari.com", "dblp": "126/5382;24/567", "google_scholar": "Wn5QzOgAAAAJ;ttbl4FsAAAAJ", "orcid": ";0000-0001-6969-7844", "linkedin": ";", "or_profile": "~Vinod_Raman1;~Ambuj_Tewari1", "aff": "Apple;University of Michigan - Ann Arbor", "aff_domain": "apple.com;umich.edu", "position": "Intern;Full Professor", "bibtex": "@inproceedings{\nraman2024online,\ntitle={Online Classification with Predictions},\nauthor={Vinod Raman and Ambuj Tewari},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=MB0DD5qAz8}\n}", "github": "", "reviewers": "f4Qh;HoBJ;EJB4;yULk", "pdf_size": 390478, "rating": "6;6;6;7", "confidence": "3;2;3;3", "soundness": "3;3;4;4", "novelty": "3;3;3;3", "presentation": "2;3;3;3", "wc_summary": "128;121;91;188", "wc_strengths": "45;84;67;12", "wc_weaknesses": "37;84;180;103", "wc_questions": "205;33;108;1", "wc_limitations": "1;1;35;1", "wc_review": "416;323;481;305", "wc_reply_reviewers": "10;18;13;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 132.0, 35.19232870953555 ], "wc_strengths_avg": [ 52.0, 26.91653766738954 ], "wc_weaknesses_avg": [ 101.0, 51.55094567512802 ], "wc_questions_avg": [ 86.75, 78.54417546833119 ], "wc_limitations_avg": [ 9.5, 14.722431864335457 ], "wc_review_avg": [ 381.25, 71.35255776774929 ], "wc_reply_reviewers_avg": [ 10.25, 6.5717197140474575 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11788035864726594522&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 4, "email": "apple.com;umich.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Apple;University of Michigan", "aff_unique_dep": "Apple Inc.;", "aff_unique_url": "https://www.apple.com;https://www.umich.edu", "aff_unique_abbr": "Apple;UM", "aff_campus_unique_index": "1", "aff_campus_unique": ";Ann Arbor", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "A Unified Confidence Sequence for Generalized Linear Models, with Applications to Bandits", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95540", "id": "MDdOQayWTA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MDdOQayWTA", "openreview": "https://openreview.net/forum?id=MDdOQayWTA", "poster": "", "project": "", "author_site": "Junghyun Lee, Se-Young Yun, Kwang-Sung Jun", "tldr": "", "abstract": "We present a unified likelihood ratio-based confidence sequence (CS) for *any* (self-concordant) generalized linear model (GLM) that is guaranteed to be convex and numerically tight. We show that this is on par or improves upon known CSs for various GLMs, including Gaussian, Bernoulli, and Poisson. In particular, for the first time, our CS for Bernoulli has a $\\mathrm{poly}(S)$-free radius where $S$ is the norm of the unknown parameter. Our first technical novelty is its derivation, which utilizes a time-uniform PAC-Bayesian bound with a uniform prior/posterior, despite the latter being a rather unpopular choice for deriving CSs. As a direct application of our new CS, we propose a simple and natural optimistic algorithm called **OFUGLB**, applicable to *any* generalized linear bandits (**GLB**; Filippi et al. (2010)). Our analysis shows that the celebrated optimistic approach simultaneously attains state-of-the-art regrets for various self-concordant (not necessarily bounded) **GLB**s, and even $\\mathrm{poly}(S)$-free for bounded **GLB**s, including logistic bandits. The regret analysis, our second technical novelty, follows from combining our new CS with a new proof technique that completely avoids the previously widely used self-concordant control lemma (Faury et al., 2020, Lemma 9). Numerically, **OFUGLB** outperforms or is at par with prior algorithms for logistic bandits.", "keywords": "generalized linear model;bandit;UCB;anytime-valid;time-uniform;confidence sequence;PAC-Bayes", "primary_area": "bandits", "supplementary_material": "", "author": "Junghyun Lee;Se-Young Yun;Kwang-Sung Jun", "authorids": "~Junghyun_Lee1;~Se-Young_Yun1;~Kwang-Sung_Jun1", "gender": "M;M;M", "homepage": "https://nick-jhlee.github.io/;https://fbsqkd.github.io;http://kwangsungjun.github.io", "dblp": ";23/8862;88/8411", "google_scholar": "sYtSnnQAAAAJ;X_IAjb8AAAAJ;VgvC7o8AAAAJ", "orcid": "0000-0002-3898-6464;;", "linkedin": "junghyun-nick-lee-89799a197/;seyoung-yun-395130ab/;", "or_profile": "~Junghyun_Lee1;~Se-Young_Yun1;~Kwang-Sung_Jun1", "aff": "Korea Advanced Institute of Science & Technology;KAIST;University of Arizona", "aff_domain": "kaist.ac.kr;kaist.ac.kr;cs.arizona.edu", "position": "PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nlee2024a,\ntitle={A Unified Confidence Sequence for Generalized Linear Models, with Applications to Bandits},\nauthor={Junghyun Lee and Se-Young Yun and Kwang-Sung Jun},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=MDdOQayWTA}\n}", "github": "", "reviewers": "HLad;aQNL;q1y1;TTTd", "pdf_size": 39047005, "rating": "3;6;7;7", "confidence": "4;4;4;2", "soundness": "3;4;4;3", "novelty": "3;3;2;3", "presentation": "3;3;4;4", "wc_summary": "8;80;78;69", "wc_strengths": "54;19;189;105", "wc_weaknesses": "665;138;341;151", "wc_questions": "3;27;441;132", "wc_limitations": "1;12;40;1", "wc_review": "731;276;1089;458", "wc_reply_reviewers": "0;0;56;15", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 1.6393596310755 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 58.75, 29.59201750472583 ], "wc_strengths_avg": [ 91.75, 63.9350256119445 ], "wc_weaknesses_avg": [ 323.75, 212.77614410454947 ], "wc_questions_avg": [ 150.75, 174.45683563563796 ], "wc_limitations_avg": [ 13.5, 15.945218719101975 ], "wc_review_avg": [ 638.5, 306.3874181489834 ], "wc_reply_reviewers_avg": [ 17.75, 22.916969694966216 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.44022545316281186, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17291259414383938068&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "kaist.ac.kr;kaist.ac.kr;cs.arizona.edu", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;University of Arizona", "aff_unique_dep": ";", "aff_unique_url": "https://www.kaist.ac.kr;https://www.arizona.edu", "aff_unique_abbr": "KAIST;UA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "South Korea;United States" }, { "title": "From Similarity to Superiority: Channel Clustering for Time Series Forecasting", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95539", "id": "MDgn9aazo0", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MDgn9aazo0", "openreview": "https://openreview.net/forum?id=MDgn9aazo0", "poster": "/media/PosterPDFs/NeurIPS%202024/95539.png?t=1733027336.4766433", "project": "", "author_site": "Jialin Chen, Jan Eric Lenssen, Aosong Feng, Weihua Hu, Matthias Fey, Leandros Tassiulas, Jure Leskovec, Rex Ying", "tldr": "", "abstract": "Time series forecasting has attracted significant attention in recent decades.\n Previous studies have demonstrated that the Channel-Independent (CI) strategy improves forecasting performance by treating different channels individually, while it leads to poor generalization on unseen instances and ignores potentially necessary interactions between channels. Conversely, the Channel-Dependent (CD) strategy mixes all channels with even irrelevant and indiscriminate information, which, however, results in oversmoothing issues and limits forecasting accuracy.\n There is a lack of channel strategy that effectively balances individual channel treatment for improved forecasting performance without overlooking essential interactions between channels. Motivated by our observation of a correlation between the time series model's performance boost against channel mixing and the intrinsic similarity on a pair of channels, we developed a novel and adaptable \\textbf{C}hannel \\textbf{C}lustering \\textbf{M}odule (CCM). CCM dynamically groups channels characterized by intrinsic similarities and leverages cluster information instead of individual channel identities, combining the best of CD and CI worlds. Extensive experiments on real-world datasets demonstrate that CCM can (1) boost the performance of CI and CD models by an average margin of 2.4% and 7.2% on long-term and short-term forecasting, respectively; (2) enable zero-shot forecasting with mainstream time series forecasting models; (3) uncover intrinsic time series patterns among channels and improve interpretability of complex time series models.", "keywords": "Deep Learning;Time Series Forecasting", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Jialin Chen;Jan Eric Lenssen;Aosong Feng;Weihua Hu;Matthias Fey;Leandros Tassiulas;Jure Leskovec;Rex Ying", "authorids": "~Jialin_Chen2;~Jan_Eric_Lenssen1;~Aosong_Feng1;~Weihua_Hu1;~Matthias_Fey2;~Leandros_Tassiulas1;~Jure_Leskovec1;~Zhitao_Ying1", "gender": "F;M;M;M;M;M;;M", "homepage": "https://github.com/Cather-learner;https://janericlenssen.github.io/;;http://web.stanford.edu/~weihuahu/;http://rusty1s.github.io;;http://cs.stanford.edu/~jure/;https://www.cs.yale.edu/homes/ying-rex", "dblp": ";195/9868;260/0450;42/1232;180/9174;03/3843;l/JureLeskovec;209/4936", "google_scholar": "rHyMKPYAAAAJ;https://scholar.google.de/citations?user=enXCzCgAAAAJ;lIuUXKkAAAAJ;wAFMjfkAAAAJ;https://scholar.google.de/citations?user=5HaSBN0AAAAJ;https://scholar.google.com/citations?hl=en;Q_kKkIUAAAAJ;6fqNXooAAAAJ", "orcid": "0009-0007-0909-4620;0000-0003-4093-9840;;;;;0000-0002-5411-923X;", "linkedin": ";jan-eric-lenssen-08700b190/;;weihua-hu-a8284228/;;;leskovec/;rex-ying-92770148/", "or_profile": "~Jialin_Chen2;~Jan_Eric_Lenssen1;~Aosong_Feng1;~Weihua_Hu1;~Matthias_Fey2;~Leandros_Tassiulas1;~Jure_Leskovec1;~Zhitao_Ying1", "aff": "Yale University;Kumo;Yale University;;TU Dortmund University;Yale University;Kumo.AI;Yale University", "aff_domain": "yale.edu;kumo.ai;yale.edu;;udo.edu;yale.edu;kumo.ai;yale.edu", "position": "PhD student;Researcher;PhD student;;PhD student;Full Professor;Chief Scientist;Assistant Professor", "bibtex": "@inproceedings{\nchen2024from,\ntitle={From Similarity to Superiority: Channel Clustering for Time Series Forecasting},\nauthor={Jialin Chen and Jan Eric Lenssen and Aosong Feng and Weihua Hu and Matthias Fey and Leandros Tassiulas and Jure Leskovec and Rex Ying},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=MDgn9aazo0}\n}", "github": "", "reviewers": "yXcm;SRyH;9puR;rxdY", "pdf_size": 5048846, "rating": "4;5;7;8", "confidence": "3;4;4;4", "soundness": "3;2;3;3", "novelty": "3;2;3;3", "presentation": "3;3;3;3", "wc_summary": "83;77;115;123", "wc_strengths": "41;67;83;79", "wc_weaknesses": "50;36;42;196", "wc_questions": "88;28;123;104", "wc_limitations": "54;74;26;39", "wc_review": "316;282;389;541", "wc_reply_reviewers": "0;23;12;17", "wc_reply_authors": "124;91;66;85", "reply_reviewers": "0;2;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 1.5811388300841898 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 99.5, 19.817921182606415 ], "wc_strengths_avg": [ 67.5, 16.393596310755 ], "wc_weaknesses_avg": [ 81.0, 66.58077800686922 ], "wc_questions_avg": [ 85.75, 35.56947427219019 ], "wc_limitations_avg": [ 48.25, 17.865819320702872 ], "wc_review_avg": [ 382.0, 99.60672668048078 ], "wc_reply_reviewers_avg": [ 13.0, 8.455767262643882 ], "wc_reply_authors_avg": [ 91.5, 20.910523666326483 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.7302967433402215, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14603623960562897708&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "yale.edu;kumo.ai;yale.edu;;udo.edu;yale.edu;kumo.ai;yale.edu", "author_num": 8, "aff_unique_index": "0;1;0;2;0;3;0", "aff_unique_norm": "Yale University;Kumo;Technische Universit\u00e4t Dortmund;Kumo.AI", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.yale.edu;;https://www.tu-dortmund.de;https://www.kumo.ai", "aff_unique_abbr": "Yale;;TU Dortmund;Kumo.AI", "aff_campus_unique_index": "1", "aff_campus_unique": ";Dortmund", "aff_country_unique_index": "0;0;2;0;0;0", "aff_country_unique": "United States;;Germany" }, { "title": "From Text to Trajectory: Exploring Complex Constraint Representation and Decomposition in Safe Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95538", "id": "MDpIQ9hQ7H", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MDpIQ9hQ7H", "openreview": "https://openreview.net/forum?id=MDpIQ9hQ7H", "poster": "/media/PosterPDFs/NeurIPS%202024/95538.png?t=1731724806.011619", "project": "", "author_site": "Pusen Dong, Tianchen Zhu, yue qiu, Haoyi Zhou, Jianxin Li", "tldr": "", "abstract": "Safe reinforcement learning (RL) requires the agent to finish a given task while obeying specific constraints. Giving constraints in natural language form has great potential for practical scenarios due to its flexible transfer capability and accessibility. Previous safe RL methods with natural language constraints typically need to design cost functions manually for each constraint, which requires domain expertise and lacks flexibility. In this paper, we harness the dual role of text in this task, using it not only to provide constraint but also as a training signal. We introduce the Trajectory-level Textual Constraints Translator (TTCT) to replace the manually designed cost function. Our empirical results demonstrate that TTCT effectively comprehends textual constraint and trajectory, and the policies trained by TTCT can achieve a lower violation rate than the standard cost function. Extra studies are conducted to demonstrate that the TTCT has zero-shot transfer capability to adapt to constraint-shift environments.", "keywords": "Safe RL;Multimodal Learning;Temporal Credit Assignment;Conditioned Reinforcement Learning", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Pusen Dong;Tianchen Zhu;Yue Qiu;Haoyi Zhou;Jianxin Li", "authorids": "~Pusen_Dong1;~Tianchen_Zhu1;~Yue_Qiu7;~Haoyi_Zhou1;~Jianxin_Li3", "gender": "M;M;;M;M", "homepage": ";https://zhutc.tk;http://qiuyue.com;https://www.zhouhaoyi.com/;http://myjianxin.github.io", "dblp": "314/6442.html;https://dblp.uni-trier.de/pid/163/4107;;162/1287;l/JianxinLi-2.html", "google_scholar": ";P60wcZwAAAAJ;;mbrFlN0AAAAJ;EY2lqD0AAAAJ", "orcid": ";;;0000-0002-2393-3634;0000-0001-5152-0055", "linkedin": ";;;haoyi-zhou-54a7a69a/;", "or_profile": "~Pusen_Dong1;~Tianchen_Zhu1;~Yue_Qiu7;~Haoyi_Zhou1;~Jianxin_Li3", "aff": "Beihang University;Beihang University;Beihang University;Beihang University;Beihang University ", "aff_domain": "buaa.edu.cn;buaa.edu.cn;buaa.edu.cn;buaa.edu.cn;buaa.edu.cn", "position": "MS student;PhD student;MS student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\ndong2024from,\ntitle={From Text to Trajectory: Exploring Complex Constraint Representation and Decomposition in Safe Reinforcement Learning},\nauthor={Pusen Dong and Tianchen Zhu and Yue Qiu and Haoyi Zhou and Jianxin Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=MDpIQ9hQ7H}\n}", "github": "", "reviewers": "cY35;z2Bi;T5E9", "pdf_size": 0, "rating": "5;6;7", "confidence": "3;3;4", "soundness": "1;3;3", "novelty": "2;2;4", "presentation": "2;3;3", "wc_summary": "73;98;82", "wc_strengths": "103;88;114", "wc_weaknesses": "391;243;78", "wc_questions": "159;85;70", "wc_limitations": "6;13;9", "wc_review": "732;527;353", "wc_reply_reviewers": "87;0;17", "wc_reply_authors": "341;37;0", "reply_reviewers": "1;0;1", "reply_authors": "3;2;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.9428090415820634 ], "novelty_avg": [ 2.6666666666666665, 0.9428090415820634 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 84.33333333333333, 10.338708279513883 ], "wc_strengths_avg": [ 101.66666666666667, 10.656244908763854 ], "wc_weaknesses_avg": [ 237.33333333333334, 127.84452363015876 ], "wc_questions_avg": [ 104.66666666666667, 38.90444133457716 ], "wc_limitations_avg": [ 9.333333333333334, 2.8674417556808756 ], "wc_review_avg": [ 537.3333333333334, 154.89853309684875 ], "wc_reply_reviewers_avg": [ 34.666666666666664, 37.65043898224237 ], "wc_reply_authors_avg": [ 126.0, 152.7765252473909 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ODjEJVNOH6YJ:scholar.google.com/&scioq=From+Text+to+Trajectory:+Exploring+Complex+Constraint+Representation+and+Decomposition+in+Safe+Reinforcement+Learning&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "buaa.edu.cn;buaa.edu.cn;buaa.edu.cn;buaa.edu.cn;buaa.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Beihang University", "aff_unique_dep": "", "aff_unique_url": "http://www.buaa.edu.cn/", "aff_unique_abbr": "BUAA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Robust Offline Active Learning on Graphs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95537", "id": "MDsl1ifiNS", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MDsl1ifiNS", "openreview": "https://openreview.net/forum?id=MDsl1ifiNS", "poster": "/media/PosterPDFs/NeurIPS%202024/95537.png?t=1734073628.575719", "project": "", "author_site": "Yuanchen Wu, Yubai Yuan", "tldr": "", "abstract": "We consider the problem of active learning on graphs for node-level tasks, which has crucial applications in many real-world networks where labeling node responses is expensive. In this paper, we propose an offline active learning method that selects nodes to query by explicitly incorporating information from both the network structure and node covariates. Building on graph signal recovery theories and the random spectral sparsification technique, the proposed method adopts a two-stage biased sampling strategy that takes both informativeness and representativeness into consideration for node querying. Informativeness refers to the complexity of graph signals that are learnable from the responses of queried nodes, while representativeness refers to the capacity of queried nodes to control generalization errors given noisy node-level information. We establish a theoretical relationship between generalization error and the number of nodes selected by the proposed method. Our theoretical results demonstrate the trade-off between Informativeness and representativeness in active learning. Extensive numerical experiments show that the proposed method is competitive with existing graph-based active learning methods, especially when node covariates and responses contain noises. Additionally, the proposed method is applicable to both regression and classification tasks on graphs.", "keywords": "Offline active learning;graph semi-supervised learning;graph signal recovery;network sampling", "primary_area": "active_learning", "supplementary_material": "", "author": "Yuanchen Wu;Yubai Yuan", "authorids": "~Yuanchen_Wu2;~Yubai_Yuan1", "gender": "M;M", "homepage": "https://science.psu.edu/stat/people/yqw5734;", "dblp": ";", "google_scholar": ";NV2IMRMAAAAJ", "orcid": ";", "linkedin": "yuanchen-wu-b0a89a17a/;", "or_profile": "~Yuanchen_Wu2;~Yubai_Yuan1", "aff": "Pennsylvania State University;Pennsylvania State University", "aff_domain": "psu.edu;psu.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nwu2024robust,\ntitle={Robust Offline Active Learning on Graphs},\nauthor={Yuanchen Wu and Yubai Yuan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=MDsl1ifiNS}\n}", "github": "", "reviewers": "hH2h;emFb;7FJA;4PW9", "pdf_size": 3173292, "rating": "5;5;6;6", "confidence": "3;4;2;2", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;2", "wc_summary": "44;57;68;76", "wc_strengths": "21;37;51;68", "wc_weaknesses": "50;118;197;81", "wc_questions": "8;2;60;212", "wc_limitations": "14;1;59;14", "wc_review": "137;215;435;451", "wc_reply_reviewers": "12;10;28;17", "wc_reply_authors": "29;342;223;30", "reply_reviewers": "1;1;1;1", "reply_authors": "2;3;3;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 61.25, 12.028611723719408 ], "wc_strengths_avg": [ 44.25, 17.340343133859836 ], "wc_weaknesses_avg": [ 111.5, 54.92039693957064 ], "wc_questions_avg": [ 70.5, 84.75110618747108 ], "wc_limitations_avg": [ 22.0, 22.01136070305514 ], "wc_review_avg": [ 309.5, 136.43588237703452 ], "wc_reply_reviewers_avg": [ 16.75, 6.977642868476432 ], "wc_reply_authors_avg": [ 156.0, 133.31354019753581 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.9045340337332909, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:z_WSmLnrO8IJ:scholar.google.com/&scioq=Robust+Offline+Active+Learning+on+Graphs&hl=en&as_sdt=0,15", "gs_version_total": 4, "email": "psu.edu;psu.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Pennsylvania State University", "aff_unique_dep": "", "aff_unique_url": "https://www.psu.edu", "aff_unique_abbr": "PSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Approximately Pareto-optimal Solutions for Bi-Objective k-Clustering", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95536", "id": "MFKfm5scHi", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MFKfm5scHi", "openreview": "https://openreview.net/forum?id=MFKfm5scHi", "poster": "/media/PosterPDFs/NeurIPS%202024/95536.png?t=1731580784.5991416", "project": "", "author_site": "Anna Arutyunova, Jan Eube, Heiko R\u00f6glin, Melanie Schmidt, Sarah Sturm, Julian Wargalla", "tldr": "", "abstract": "As a major unsupervised learning method, clustering has received a lot of attention over multiple decades. The various clustering problems that have been studied intensively include, e.g., the $k$-means problem and the $k$-center problem. However, in applications, it is common that good clusterings should optimize multiple objectives (e.g., visualizing data on a map by clustering districts into areas that are both geographically compact but also homogeneous with respect to the data). We study combinations of different objectives, for example optimizing $k$-center and $k$-means simultaneously or optimizing $k$-center with respect to two different metrics. Usually these objectives are conflicting and cannot be optimized simultaneously, making it necessary to find trade-offs. We develop novel algorithms for computing the set of Pareto-optimal solutions (approximately) for various combinations of two objectives. Our algorithms achieve provable approximation guarantees and we demonstrate in several experiments that the (approximate) Pareto set contains good clusterings that cannot be found by considering one of the objectives separately.", "keywords": "multi-criteria clustering;approximation algorithms;Pareto-optimal solutions;k-means;single linkage;k-median;k-center", "primary_area": "optimization", "supplementary_material": "", "author": "Anna Arutyunova;Jan Eube;Heiko R\u00f6glin;Melanie Schmidt;Sarah Sturm;Julian Wargalla", "authorids": "~Anna_Arutyunova1;~Jan_Eube1;~Heiko_R\u00f6glin1;~Melanie_Schmidt1;~Sarah_Sturm1;~Julian_Wargalla1", "gender": "F;M;M;F;F;Not Specified", "homepage": "https://www.algo.hhu.de/unser-team;https://tcs.cs.uni-bonn.de/doku.php/staff/janeube;http://roeglin.org/index.html;;https://nerva.cs.uni-bonn.de/doku.php/staff/sarah;", "dblp": "274/2301;234/1069;59/3127;67/7224-1;;294/9479.html", "google_scholar": ";;NLLMDCkAAAAJ;https://scholar.google.com/citations?hl=de;;", "orcid": ";;0009-0006-8438-3986;;;", "linkedin": ";;;;;", "or_profile": "~Anna_Arutyunova1;~Jan_Eube1;~Heiko_R\u00f6glin1;~Melanie_Schmidt1;~Sarah_Sturm1;~Julian_Wargalla1", "aff": "Heinrich-Heine Universit\u00e4t D\u00fcsseldorf;Rheinische Friedrich-Wilhelms Universit\u00e4t Bonn;University of Bonn;Heinrich Heine University D\u00fcsseldorf;Rheinische Friedrich-Wilhelms Universit\u00e4t Bonn;Heinrich-Heine Universit\u00e4t D\u00fcsseldorf", "aff_domain": "uni-duesseldorf.de;uni-bonn.de;uni-bonn.de;hhu.de;uni-bonn.de;uni-duesseldorf.de", "position": "Postdoc;PhD student;Full Professor;Associate Professor;PhD student;PhD student", "bibtex": "@inproceedings{\narutyunova2024approximately,\ntitle={Approximately Pareto-optimal Solutions for Bi-Objective k-Clustering},\nauthor={Anna Arutyunova and Jan Eube and Heiko R{\\\"o}glin and Melanie Schmidt and Sarah Sturm and Julian Wargalla},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=MFKfm5scHi}\n}", "github": "", "reviewers": "Xxjr;CrwW;2DdM", "pdf_size": 6664444, "rating": "4;6;7", "confidence": "3;4;4", "soundness": "2;3;4", "novelty": "2;3;3", "presentation": "2;3;3", "wc_summary": "102;70;140", "wc_strengths": "84;77;79", "wc_weaknesses": "201;106;570", "wc_questions": "99;94;2", "wc_limitations": "14;1;25", "wc_review": "500;348;816", "wc_reply_reviewers": "18;67;23", "wc_reply_authors": "36;0;0", "reply_reviewers": "1;1;1", "reply_authors": "2;1;1", "rating_avg": [ 5.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 104.0, 28.61235164516658 ], "wc_strengths_avg": [ 80.0, 2.943920288775949 ], "wc_weaknesses_avg": [ 292.3333333333333, 200.13384410327893 ], "wc_questions_avg": [ 65.0, 44.594469014292194 ], "wc_limitations_avg": [ 13.333333333333334, 9.809292646374773 ], "wc_review_avg": [ 554.6666666666666, 194.93132693905878 ], "wc_reply_reviewers_avg": [ 36.0, 22.015146301277824 ], "wc_reply_authors_avg": [ 12.0, 16.97056274847714 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.9449111825230683, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:G4COjK-H4J8J:scholar.google.com/&scioq=Approximately+Pareto-optimal+Solutions+for+Bi-Objective+k-Clustering&hl=en&as_sdt=0,33", "gs_version_total": 2, "email": "uni-duesseldorf.de;uni-bonn.de;uni-bonn.de;hhu.de;uni-bonn.de;uni-duesseldorf.de", "author_num": 6, "aff_unique_index": "0;1;2;3;1;0", "aff_unique_norm": "Heinrich-Heine Universit\u00e4t;Rheinische Friedrich-Wilhelms Universit\u00e4t Bonn;University of Bonn;Heinrich Heine University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.hhu.de;https://www.uni-bonn.de/;https://www.uni-bonn.de/;https://www.hhu.de", "aff_unique_abbr": "HHU;Uni Bonn;UBonn;HHU", "aff_campus_unique_index": "1", "aff_campus_unique": ";D\u00fcsseldorf", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "Germany" }, { "title": "Estimating Ego-Body Pose from Doubly Sparse Egocentric Video Data", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95535", "id": "MHCnLo2QeA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MHCnLo2QeA", "openreview": "https://openreview.net/forum?id=MHCnLo2QeA", "poster": "", "project": "", "author_site": "Seunggeun Chi, Pin-Hao Huang, Enna Sachdeva, Hengbo Ma, Karthik Ramani, Kwonjoon Lee", "tldr": "", "abstract": "We study the problem of estimating the body movements of a camera wearer from egocentric videos. Current methods for ego-body pose estimation rely on temporally dense sensor data, such as IMU measurements from spatially sparse body parts like the head and hands. However, we propose that even temporally sparse observations, such as hand poses captured intermittently from egocentric videos during natural or periodic hand movements, can effectively constrain overall body motion. Naively applying diffusion models to generate full-body pose from head pose and sparse hand pose leads to suboptimal results. To overcome this, we develop a two-stage approach that decomposes the problem into temporal completion and spatial completion. First, our method employs masked autoencoders to impute hand trajectories by leveraging the spatiotemporal correlations between the head pose sequence and intermittent hand poses, providing uncertainty estimates. Subsequently, we employ conditional diffusion models to generate plausible full-body motions based on these temporally dense trajectories of the head and hands, guided by the uncertainty estimates from the imputation. The effectiveness of our methods was rigorously tested and validated through comprehensive experiments conducted on various HMD setup with AMASS and Ego-Exo4D datasets. Project page: https://sgchi.github.io/dsposer", "keywords": "pose estimation;egocentric pose estimation.", "primary_area": "machine_vision", "supplementary_material": "/attachment/9e96167085effec4bb24992e6967949f7992c226.zip", "author": "Seunggeun Chi;Pin-Hao Huang;Enna Sachdeva;Hengbo Ma;Karthik Ramani;Kwonjoon Lee", "authorids": "~Seunggeun_Chi1;~Pin-Hao_Huang1;~Enna_Sachdeva2;~Hengbo_Ma1;~Karthik_Ramani1;~Kwonjoon_Lee1", "gender": "M;M;F;;M;M", "homepage": "https://engineering.purdue.edu/people/seunggeun.chi.1;https://github.com/haogerhuang;https://ennasachdeva.github.io/;;https://engineering.purdue.edu/~ramani/;https://kjunelee.github.io", "dblp": "295/6391;293/9980;199/2077;;01/6965.html;127/7948", "google_scholar": "https://scholar.google.com/citations?hl=en;S99su_8AAAAJ;QIZZA0oAAAAJhl=enoi=ao;;wQ6njfUAAAAJ;C6Wu8M0AAAAJ", "orcid": ";0009-0002-1545-0058;0000-0002-2689-6357;;0000-0001-8639-5135;0000-0002-1433-551X", "linkedin": "seunggeun-chi-963050153/;pin-hao-huang-3639371aa/;enna-sachdeva/;;karthikramani1/;", "or_profile": "~Seunggeun_Chi1;~Pin-Hao_Huang1;~Enna_Sachdeva2;~Hengbo_Ma1;~Karthik_Ramani1;~Kwonjoon_Lee1", "aff": "Purdue University;Honda Research Institution US;Honda Research Institution US;;Purdue University;Honda Research Institute USA", "aff_domain": "purdue.edu;honda-ri.com;honda-ri.com;;purdue.edu;honda-ri.com", "position": "PhD student;Researcher;Researcher;;Full Professor;Sr Research Scientist", "bibtex": "@inproceedings{\nchi2024estimating,\ntitle={Estimating Ego-Body Pose from Doubly Sparse Egocentric Video Data},\nauthor={Seunggeun Chi and Pin-Hao Huang and Enna Sachdeva and Hengbo Ma and Karthik Ramani and Kwonjoon Lee},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=MHCnLo2QeA}\n}", "github": "", "reviewers": "TnAd;jBAY;2EAc;VMPC;9VxB", "pdf_size": 19366936, "rating": "5;5;6;7;7", "confidence": "4;4;5;4;4", "soundness": "4;3;3;3;3", "novelty": "3;2;3;3;3", "presentation": "4;3;3;3;3", "wc_summary": "93;120;71;90;55", "wc_strengths": "45;148;50;182;157", "wc_weaknesses": "75;339;200;89;77", "wc_questions": "141;16;97;20;71", "wc_limitations": "4;1;7;3;24", "wc_review": "358;624;425;384;384", "wc_reply_reviewers": "37;136;163;25;24", "wc_reply_authors": "48;895;821;38;39", "reply_reviewers": "1;1;2;1;1", "reply_authors": "2;4;5;2;2", "rating_avg": [ 6.0, 0.8944271909999159 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 85.8, 21.939917957914062 ], "wc_strengths_avg": [ 116.4, 57.37107285034855 ], "wc_weaknesses_avg": [ 156.0, 102.68008570311967 ], "wc_questions_avg": [ 69.0, 47.290591030351905 ], "wc_limitations_avg": [ 7.8, 8.328265125462805 ], "wc_review_avg": [ 435.0, 96.90407628165082 ], "wc_reply_reviewers_avg": [ 77.0, 59.983331017875294 ], "wc_reply_authors_avg": [ 368.2, 400.6192207071448 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 3.0, 1.2649110640673518 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9855797911767296306&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "purdue.edu;honda-ri.com;honda-ri.com;;purdue.edu;honda-ri.com", "author_num": 6, "aff_unique_index": "0;1;1;0;1", "aff_unique_norm": "Purdue University;Honda Research Institute", "aff_unique_dep": ";Honda Research Institute", "aff_unique_url": "https://www.purdue.edu;https://honda-ri.com", "aff_unique_abbr": "Purdue;HRI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Memory-Efficient Gradient Unrolling for Large-Scale Bi-level Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95534", "id": "MI8Z9gutIn", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MI8Z9gutIn", "openreview": "https://openreview.net/forum?id=MI8Z9gutIn", "poster": "", "project": "", "author_site": "Qianli Shen, Yezhen Wang, Zhouhao Yang, Xiang Li, Haonan Wang, Yang Zhang, Jonathan Scarlett, Zhanxing Zhu, Kenji Kawaguchi", "tldr": "", "abstract": "Bi-level optimizaiton (BO) has become a fundamental mathematical framework for addressing hierarchical machine learning problems.\nAs deep learning models continue to grow in size, the demand for scalable bi-level optimization has become increasingly critical.\nTraditional gradient-based bi-level optimizaiton algorithms, due to their inherent characteristics, are ill-suited to meet the demands of large-scale applications.\nIn this paper, we introduce **F**orward **G**radient **U**nrolling with **F**orward **G**radient, abbreviated as **$($FG$)^2$U**, which achieves an unbiased stochastic approximation of the meta gradient for bi-level optimizaiton.\n$($FG$)^2$U circumvents the memory and approximation issues associated with classical bi-level optimizaiton approaches, and delivers significantly more accurate gradient estimates than existing large-scale bi-level optimizaiton approaches.\nAdditionally, $($FG$)^2$U is inherently designed to support parallel computing, enabling it to effectively leverage large-scale distributed computing systems to achieve significant computational efficiency.\nIn practice, $($FG$)^2$U and other methods can be strategically placed at different stages of the training process to achieve a more cost-effective two-phase paradigm.\nFurther, $($FG$)^2$U is easy to implement within popular deep learning frameworks, and can be conveniently adapted to address more challenging zeroth-order bi-level optimizaiton scenarios.\nWe provide a thorough convergence analysis and a comprehensive practical discussion for $($FG$)^2$U, complemented by extensive empirical evaluations, showcasing its superior performance in diverse large-scale bi-level optimizaiton tasks.", "keywords": "bi-level optimization;large-scale optimization;meta learning", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Qianli Shen;Yezhen Wang;Zhouhao Yang;Xiang Li;Haonan Wang;Yang Zhang;Jonathan Scarlett;Zhanxing Zhu;Kenji Kawaguchi", "authorids": "~Qianli_Shen1;~Yezhen_Wang1;~Zhouhao_Yang1;~Xiang_Li28;~Haonan_Wang1;~Yang_Zhang22;~Jonathan_Scarlett1;~Zhanxing_Zhu1;~Kenji_Kawaguchi1", "gender": "M;M;M;F;M;M;M;M;", "homepage": "https://shenqianli.github.io/;;https://zhouhao-yang.github.io/;https://south7x.github.io/;http://charles-haonan-wang.me/;https://github.com/YaNgZhAnG-V5;https://www.comp.nus.edu.sg/~scarlett/;https://zhanxingzhu.github.io/;https://ml.comp.nus.edu.sg/#members", "dblp": "22/10357.html;;;;;;78/9667;87/7756.html;", "google_scholar": "p3ekN2kAAAAJ;g-VEnLEAAAAJ;ETcGKeoAAAAJ;https://scholar.google.com.sg/citations?user=AvKGnksAAAAJ;cLziVZMAAAAJ;Ri0ENAUAAAAJ;https://scholar.google.co.uk/citations?user=a4D08aQAAAAJ;a2sHceIAAAAJ;aLl3rYoAAAAJ", "orcid": ";;;;0009-0006-6963-8987;;;;", "linkedin": ";;;;;;;;", "or_profile": "~Qianli_Shen1;~Yezhen_Wang1;~Zhouhao_Yang1;~Xiang_Li28;~Haonan_Wang1;~Yang_Zhang22;~Jonathan_Scarlett1;~Zhanxing_Zhu1;~Kenji_Kawaguchi1", "aff": "National University of Singapore;National University of Singapore;National University of Singapore;National University of Singapore;National University of Singapore;National University of Singapore;National University of Singapore;University of Southampton;National University of Singapore", "aff_domain": "u.nus.edu;nus.edu;nus.edu.sg;u.nus.edu;u.nus.edu;nus.edu;nus.edu.sg;soton.ac.uk;nus.edu", "position": "PhD student;PhD student;PhD student;PhD student;PhD student;PhD student;Associate Professor;Associate Professor;Presidential Young Professor", "bibtex": "@inproceedings{\nshen2024memoryefficient,\ntitle={Memory-Efficient Gradient Unrolling for Large-Scale Bi-level Optimization},\nauthor={Qianli Shen and Yezhen Wang and Zhouhao Yang and Xiang Li and Haonan Wang and Yang Zhang and Jonathan Scarlett and Zhanxing Zhu and Kenji Kawaguchi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=MI8Z9gutIn}\n}", "github": "", "reviewers": "J9FE;4Mx8;cwe5", "pdf_size": 1201220, "rating": "5;5;6", "confidence": "4;4;3", "soundness": "2;2;3", "novelty": "2;2;3", "presentation": "3;3;4", "wc_summary": "56;35;75", "wc_strengths": "25;35;62", "wc_weaknesses": "190;38;116", "wc_questions": "3;2;24", "wc_limitations": "3;1;19", "wc_review": "277;111;296", "wc_reply_reviewers": "69;17;159", "wc_reply_authors": "277;25;169", "reply_reviewers": "2;1;1", "reply_authors": "2;2;2", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 55.333333333333336, 16.33673433979046 ], "wc_strengths_avg": [ 40.666666666666664, 15.627610892974722 ], "wc_weaknesses_avg": [ 114.66666666666667, 62.06090198793082 ], "wc_questions_avg": [ 9.666666666666666, 10.143416036468626 ], "wc_limitations_avg": [ 7.666666666666667, 8.055363982396381 ], "wc_review_avg": [ 228.0, 83.09432391365048 ], "wc_reply_reviewers_avg": [ 81.66666666666667, 58.6590904198905 ], "wc_reply_authors_avg": [ 157.0, 103.22790320451152 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.9999999999999997, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6218674478364796814&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "u.nus.edu;nus.edu;nus.edu.sg;u.nus.edu;u.nus.edu;nus.edu;nus.edu.sg;soton.ac.uk;nus.edu", "author_num": 9, "aff_unique_index": "0;0;0;0;0;0;0;1;0", "aff_unique_norm": "National University of Singapore;University of Southampton", "aff_unique_dep": ";", "aff_unique_url": "https://www.nus.edu.sg;https://www.southampton.ac.uk", "aff_unique_abbr": "NUS;Southampton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;1;0", "aff_country_unique": "Singapore;United Kingdom" }, { "id": "MICrZCQzoN", "title": "Generalization bounds for mixing processes via delayed online-to-PAC conversions", "track": "main", "status": "Reject", "tldr": "", "abstract": "We study the generalization error of statistical learning algorithms in a non-i.i.d. setting, where the training data is sampled from a stationary mixing process. \nWe develop an analytic framework for this scenario based on a reduction to online learning with delayed feedback. In particular, we show that the existence of an online learning algorithm with bounded regret (against a fixed statistical learning algorithm in a specially constructed game of online learning with delayed feedback) implies low generalization error of said statistical learning method even if the data sequence is sampled from a mixing time series. The rates demonstrate a trade-off between the amount of delay in the online learning game and the degree of dependence between consecutive data points, with near-optimal rates recovered in a number of well-studied settings when the delay is tuned appropriately as a function of the mixing time of the process.", "keywords": "generalization;online learning;mixing processes;learning theory;statistical learning theory", "primary_area": "learning_theory", "supplementary_material": "", "author": "Baptiste Ab\u00e9l\u00e8s;Eugenio Clerico;Gergely Neu", "authorids": "~Baptiste_Ab\u00e9l\u00e8s1;~Eugenio_Clerico1;~Gergely_Neu1", "gender": "M;M;M", "homepage": ";https://github.com/eclerico;http://cs.bme.hu/~gergo", "dblp": ";;83/7606", "google_scholar": "PRca6iwAAAAJ;;https://scholar.google.ch/citations?user=uz27G84AAAAJ", "orcid": ";;", "linkedin": "https://www.linkedin.com/feed/;;", "or_profile": "~Baptiste_Ab\u00e9l\u00e8s1;~Eugenio_Clerico1;~Gergely_Neu1", "aff": "Universitat Pompeu Fabra;Universitat Pompeu Fabra;Universitat Pompeu Fabra", "aff_domain": "upf.edu;upf.edu;upf.edu", "position": "PhD student;Postdoc;Assistant Professor", "bibtex": "@misc{\nanonymous2024generalization,\ntitle={Generalization bounds for mixing processes via delayed online-to-{PAC} conversions},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=MICrZCQzoN}\n}", "github": "", "project": "", "reviewers": "DR73;yvPp;BFxn;saej;dxR2", "site": "https://openreview.net/forum?id=MICrZCQzoN", "pdf_size": 377401, "rating": "4;5;6;7;7", "confidence": "5;3;3;3;3", "soundness": "3;3;3;4;3", "novelty": "2;3;2;4;3", "presentation": "4;3;4;3;4", "wc_summary": "90;67;78;60;245", "wc_strengths": "55;45;28;57;57", "wc_weaknesses": "172;85;56;27;74", "wc_questions": "98;46;18;15;2", "wc_limitations": "22;4;5;1;4", "wc_review": "437;247;185;160;382", "wc_reply_reviewers": "68;0;12;0;10", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;0;1;0;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.8, 1.16619037896906 ], "confidence_avg": [ 3.4, 0.8000000000000002 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.6, 0.4898979485566356 ], "wc_summary_avg": [ 108.0, 69.25027075759344 ], "wc_strengths_avg": [ 48.4, 11.128342194594845 ], "wc_weaknesses_avg": [ 82.8, 48.7253527437206 ], "wc_questions_avg": [ 35.8, 34.248503616946536 ], "wc_limitations_avg": [ 7.2, 7.520638270785267 ], "wc_review_avg": [ 282.2, 109.12634878891532 ], "wc_reply_reviewers_avg": [ 18.0, 25.487251715318386 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.7717436331412897, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2151879313760952764&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Universitat Pompeu Fabra", "aff_unique_dep": "", "aff_unique_url": "https://www.upf.edu/", "aff_unique_abbr": "UPF", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Spain" }, { "title": "A Single-Step, Sharpness-Aware Minimization is All You Need to Achieve Efficient and Accurate Sparse Training", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95533", "id": "MJgMMqMDu4", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MJgMMqMDu4", "openreview": "https://openreview.net/forum?id=MJgMMqMDu4", "poster": "/media/PosterPDFs/NeurIPS%202024/95533.png?t=1733258474.9930763", "project": "", "author_site": "Jie Ji, Gen Li, Jingjing Fu, Fatemeh Afghah, Linke Guo, Xiaoyong Yuan, Xiaolong Ma", "tldr": "", "abstract": "Sparse training stands as a landmark approach in addressing the considerable training resource demands imposed by the continuously expanding size of Deep Neural Networks (DNNs). However, the training of a sparse DNN encounters great challenges in achieving optimal generalization ability despite the efforts from the state-of-the-art sparse training methodologies. To unravel the mysterious reason behind the difficulty of sparse training, we connect the network sparsity with neural loss functions structure, and identify the cause of such difficulty lies in chaotic loss surface. In light of such revelation, we propose $S^{2} - SAM$, characterized by a **S**ingle-step **S**harpness_**A**ware **M**inimization that is tailored for **S**parse training. For the first time, $S^{2} - SAM$ innovates the traditional SAM-style optimization by approximating sharpness perturbation through prior gradient information, incurring *zero extra cost*. Therefore, $S^{2} - SAM$ not only exhibits the capacity to improve generalization but also aligns with the efficiency goal of sparse training. Additionally, we study the generalization result of $S^{2} - SAM$ and provide theoretical proof for convergence. Through extensive experiments, $S^{2} - SAM$ demonstrates its universally applicable plug-and-play functionality, enhancing accuracy across various sparse training methods. Code available at https://github.com/jjsrf/SSAM-NEURIPS2024.", "keywords": "sparse training;efficient learning algorithm", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Jie Ji;Gen Li;Jingjing Fu;Fatemeh Afghah;Linke Guo;Xiaoyong Yuan;Xiaolong Ma", "authorids": "~Jie_Ji1;~Gen_Li4;~Jingjing_Fu2;~Fatemeh_Afghah1;~Linke_Guo2;~Xiaoyong_Yuan1;~Xiaolong_Ma2", "gender": ";M;F;F;M;M;M", "homepage": ";https://coulsonlee.github.io;;https://sites.google.com/g.clemson.edu/is-win-lab/home;http://cecas.clemson.edu/~linkeg/index.html;https://sites.google.com/view/xiaoyong-yuan;https://xiaolongma2016.com", "dblp": ";28/538-12;;70/8821.html;;150/3870;", "google_scholar": ";;;https://scholar.google.com.tw/citations?user=67mA71QAAAAJ;https://scholar.google.com/citations?hl=en;wl_qADcAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;0000-0002-2315-1173;;0000-0003-0782-4187;0000-0003-3753-7648", "linkedin": ";;jingjing-fu-725ba1181/;fatemeh-afghah-57b53816/;;xiaoyongyuan/;xiaolong-ma-66b98910b/", "or_profile": "~Jie_Ji1;~Gen_Li4;~Jingjing_Fu2;~Fatemeh_Afghah1;~Linke_Guo2;~Xiaoyong_Yuan1;~Xiaolong_Ma2", "aff": ";Clemson University;Clemson University;Clemson University;Clemson University;Clemson University;Clemson University", "aff_domain": ";clemson.edu;clemson.edu;clemson.edu;clemson.edu;clemson.edu;clemson.edu", "position": ";PhD student;PhD student;Associate Professor;Associate Professor;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nji2024a,\ntitle={A Single-Step, Sharpness-Aware Minimization is All You Need to Achieve Efficient and Accurate Sparse Training},\nauthor={Jie Ji and Gen Li and Jingjing Fu and Fatemeh Afghah and Linke Guo and Xiaoyong Yuan and Xiaolong Ma},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=MJgMMqMDu4}\n}", "github": "", "reviewers": "uQD3;UgUE;Z2wG;ZjXc", "pdf_size": 4076235, "rating": "6;7;7;7", "confidence": "3;4;3;3", "soundness": "3;3;3;3", "novelty": "3;3;3;4", "presentation": "3;3;3;3", "wc_summary": "22;97;109;85", "wc_strengths": "44;43;47;70", "wc_weaknesses": "30;138;184;24", "wc_questions": "40;19;158;287", "wc_limitations": "7;5;2;1", "wc_review": "143;302;500;467", "wc_reply_reviewers": "0;36;36;23", "wc_reply_authors": "0;63;35;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 78.25, 33.566166000900374 ], "wc_strengths_avg": [ 51.0, 11.067971810589327 ], "wc_weaknesses_avg": [ 94.0, 68.97825744392213 ], "wc_questions_avg": [ 126.0, 106.99299042460679 ], "wc_limitations_avg": [ 3.75, 2.384848003542364 ], "wc_review_avg": [ 353.0, 142.57103492645342 ], "wc_reply_reviewers_avg": [ 23.75, 14.703315952532613 ], "wc_reply_authors_avg": [ 24.5, 26.424420523447623 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:VYp1xWPsPhMJ:scholar.google.com/&scioq=A+Single-Step,+Sharpness-Aware+Minimization+is+All+You+Need+to+Achieve+Efficient+and+Accurate+Sparse+Training&hl=en&as_sdt=0,10", "gs_version_total": 3, "email": ";clemson.edu;clemson.edu;clemson.edu;clemson.edu;clemson.edu;clemson.edu", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Clemson University", "aff_unique_dep": "", "aff_unique_url": "https://www.clemson.edu", "aff_unique_abbr": "Clemson", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "How to Boost Any Loss Function", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95532", "id": "MLgFu6dQYc", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MLgFu6dQYc", "openreview": "https://openreview.net/forum?id=MLgFu6dQYc", "poster": "/media/PosterPDFs/NeurIPS%202024/95532.png?t=1731244587.0110612", "project": "", "author_site": "Richard Nock, Yishay Mansour", "tldr": "", "abstract": "Boosting is a highly successful ML-born optimization setting in which one is required to computationally efficiently learn arbitrarily good models based on the access to a weak learner oracle, providing classifiers performing at least slightly differently from random guessing. A key difference with gradient-based optimization is that boosting's original model does not requires access to first order information about a loss, yet the decades long history of boosting has quickly evolved it into a first order optimization setting -- sometimes even wrongfully *defining* it as such. Owing to recent progress extending gradient-based optimization to use only a loss' zeroth ($0^{th}$) order information to learn, this begs the question: what loss functions be efficiently optimized with boosting and what is the information really needed for boosting to meet the *original* boosting blueprint's requirements ?\n\nWe provide a constructive formal answer essentially showing that *any* loss function can be optimized with boosting and thus boosting can achieve a feat not yet known to be possible in the classical $0^{th}$ order setting, since loss functions are not required to be be convex, nor differentiable or Lipschitz -- and in fact not required to be continuous either. Some tools we use are rooted in quantum calculus, the mathematical field -- not to be confounded with quantum computation -- that studies calculus without passing to the limit, and thus without using first order information.", "keywords": "boosting;loss functions;zeroth-order optimisation", "primary_area": "learning_theory", "supplementary_material": "", "author": "Richard Nock;Yishay Mansour", "authorids": "~Richard_Nock1;~Yishay_Mansour2", "gender": ";M", "homepage": "http://users.cecs.anu.edu.au/~rnock/;https://www.cs.tau.ac.il/~mansour/", "dblp": "n/RichardNock;m/YishayMansour", "google_scholar": "https://scholar.google.fr/citations?user=0J2s3YQAAAAJ;OEJUgwkAAAAJ", "orcid": ";0000-0001-6891-2645", "linkedin": ";", "or_profile": "~Richard_Nock1;~Yishay_Mansour1", "aff": "Google Research;School of Computer Science, Tel Aviv University", "aff_domain": "google.com;cs.tau.ac.il", "position": "Researcher;Full Professor", "bibtex": "@inproceedings{\nnock2024how,\ntitle={How to Boost Any Loss Function},\nauthor={Richard Nock and Yishay Mansour},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=MLgFu6dQYc}\n}", "github": "", "reviewers": "bfG5;TGRx;8gCx;XK3t", "pdf_size": 5136036, "rating": "5;6;6;7", "confidence": "1;1;2;3", "soundness": "3;2;3;3", "novelty": "2;2;3;3", "presentation": "1;3;2;3", "wc_summary": "69;302;45;99", "wc_strengths": "67;186;52;77", "wc_weaknesses": "2467;41;278;90", "wc_questions": "114;173;86;5", "wc_limitations": "33;1;52;1", "wc_review": "2750;703;513;272", "wc_reply_reviewers": "3306;489;69;36", "wc_reply_authors": "4196;999;45;0", "reply_reviewers": "18;3;1;1", "reply_authors": "10;4;2;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 1.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 128.75, 101.83902739127078 ], "wc_strengths_avg": [ 95.5, 53.002358438092166 ], "wc_weaknesses_avg": [ 719.0, 1013.0782299506786 ], "wc_questions_avg": [ 94.5, 60.466933112239126 ], "wc_limitations_avg": [ 21.75, 21.810261346439663 ], "wc_review_avg": [ 1059.5, 987.8892903559589 ], "wc_reply_reviewers_avg": [ 975.0, 1357.6002725397486 ], "wc_reply_authors_avg": [ 1310.0, 1713.3331549935056 ], "reply_reviewers_avg": [ 5.75, 7.119515432949071 ], "reply_authors_avg": [ 4.25, 3.491060010942235 ], "replies_avg": [ 48, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.8528028654224418, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:t01Mig-ys9MJ:scholar.google.com/&scioq=How+to+Boost+Any+Loss+Function&hl=en&as_sdt=0,44", "gs_version_total": 7, "email": "google.com;cs.tau.ac.il", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Google;Tel Aviv University", "aff_unique_dep": "Google Research;School of Computer Science", "aff_unique_url": "https://research.google;https://www.tau.ac.il", "aff_unique_abbr": "Google Research;TAU", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Mountain View;Tel Aviv", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;Israel" }, { "title": "Swift Sampler: Efficient Learning of Sampler by 10 Parameters", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95531", "id": "MLhZ8ZNOEk", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MLhZ8ZNOEk", "openreview": "https://openreview.net/forum?id=MLhZ8ZNOEk", "poster": "", "project": "", "author_site": "Jiawei Yao, Chuming Li, Canran Xiao", "tldr": "", "abstract": "Data selection is essential for training deep learning models. An effective data sampler assigns proper sampling probability for training data and helps the model converge to a good local minimum with high performance. Previous studies in data sampling are mainly based on heuristic rules or learning through a huge amount of time-consuming trials. In this paper, we propose an automatic swift sampler search algorithm, SS, to explore automatically learning effective samplers efficiently. In particular, SS utilizes a novel formulation to map a sampler to a low dimension of hyper-parameters and uses an approximated local minimum to quickly examine the quality of a sampler. Benefiting from its low computational expense, SS can be applied on large-scale data sets with high efficiency. Comprehensive experiments on various tasks demonstrate that SS powered sampling can achieve obvious improvements (e.g., 1.5% on ImageNet) and transfer among different neural networks. Project page: https://github.com/Alexander-Yao/Swift-Sampler.", "keywords": "Reinforcement learning;AutoML", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Jiawei Yao;Chuming Li;Canran Xiao", "authorids": "~Jiawei_Yao3;~Chuming_Li3;~Canran_Xiao1", "gender": ";M;Not Specified", "homepage": ";https://scholar.google.com.sg/citations?user=ZfB7vEcAAAAJ&hl=en;", "dblp": ";241/6082;", "google_scholar": ";https://scholar.google.com.sg/citations?user=ZfB7vEcAAAAJ;", "orcid": ";;", "linkedin": ";;https://www.linkedin.cn/incareer/in/%E7%B2%B2%E7%84%B6-%E8%82%96-7b363a276", "or_profile": "~Jiawei_Yao3;~Chuming_Li3;~Canran_Xiao1", "aff": ";;Central South University", "aff_domain": ";;csu.edu.cn", "position": ";;PhD student", "bibtex": "@inproceedings{\nyao2024swift,\ntitle={Swift Sampler: Efficient Learning of Sampler by 10 Parameters},\nauthor={Jiawei Yao and Chuming Li and Canran Xiao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=MLhZ8ZNOEk}\n}", "github": "", "reviewers": "uBSk;m5nd;Vq6T;SsT2;QTHD", "pdf_size": 3305938, "rating": "5;6;6;7;8", "confidence": "2;3;3;5;4", "soundness": "2;3;3;4;3", "novelty": "2;3;2;4;2", "presentation": "2;3;2;3;3", "wc_summary": "139;103;66;85;95", "wc_strengths": "37;71;82;70;61", "wc_weaknesses": "85;74;81;60;468", "wc_questions": "4;2;82;60;42", "wc_limitations": "25;1;84;37;21", "wc_review": "290;251;395;312;687", "wc_reply_reviewers": "17;24;31;96;78", "wc_reply_authors": "30;53;34;24;40", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 6.4, 1.0198039027185568 ], "confidence_avg": [ 3.4, 1.019803902718557 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.6, 0.8 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 97.6, 24.113066996962456 ], "wc_strengths_avg": [ 64.2, 15.14463601411404 ], "wc_weaknesses_avg": [ 153.6, 157.43011147807778 ], "wc_questions_avg": [ 38.0, 31.266595593380487 ], "wc_limitations_avg": [ 33.6, 27.738781516137294 ], "wc_review_avg": [ 387.0, 157.2221358460697 ], "wc_reply_reviewers_avg": [ 49.2, 31.694794525284433 ], "wc_reply_authors_avg": [ 36.2, 9.88736567544662 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8076923076923078, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6501237905469677439&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": ";;csu.edu.cn", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "Central South University", "aff_unique_dep": "", "aff_unique_url": "https://www.csu.edu.cn", "aff_unique_abbr": "CSU", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "title": "RETR: Multi-View Radar Detection Transformer for Indoor Perception", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95530", "id": "MLipvjWK5F", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MLipvjWK5F", "openreview": "https://openreview.net/forum?id=MLipvjWK5F", "poster": "/media/PosterPDFs/NeurIPS%202024/95530.png?t=1733162441.2697997", "project": "", "author_site": "Ryoma Yataka, Adriano Cardace, Perry Wang, Petros Boufounos, Ryuhei Takahashi", "tldr": "", "abstract": "Indoor radar perception has seen rising interest due to affordable costs driven by emerging automotive imaging radar developments and the benefits of reduced privacy concerns and reliability under hazardous conditions (e.g., fire and smoke). However, existing radar perception pipelines fail to account for distinctive characteristics of the multi-view radar setting. In this paper, we propose Radar dEtection TRansformer (RETR), an extension of the popular DETR architecture, tailored for multi-view radar perception. RETR inherits the advantages of DETR, eliminating the need for hand-crafted components for object detection and segmentation in the image plane. More importantly, RETR incorporates carefully designed modifications such as 1) depth-prioritized feature similarity via a tunable positional encoding (TPE); 2) a tri-plane loss from both radar and camera coordinates; and 3) a learnable radar-to-camera transformation via reparameterization, to account for the unique multi-view radar setting. Evaluated on two indoor radar perception datasets, our approach outperforms existing state-of-the-art methods by a margin of 15.38+ AP for object detection and 11.91+ IoU for instance segmentation, respectively. Our implementation is available at https://github.com/merlresearch/radar-detection-transformer.", "keywords": "indoor monitoring;radar perception;object detection;instance segmentation;detection transformer;multi-view radar heatmaps", "primary_area": "machine_vision", "supplementary_material": "", "author": "Ryoma Yataka;Adriano Cardace;Pu Perry Wang;Petros Boufounos;Ryuhei Takahashi", "authorids": "~Ryoma_Yataka1;~Adriano_Cardace1;~Pu_Perry_Wang2;~Petros_Boufounos1;~Ryuhei_Takahashi2", "gender": "M;M;M;M;Not Specified", "homepage": ";https://github.com/adricarda;http://boufounos.com;;https://www.merl.com/people/pwang", "dblp": "199/9334;;98/6845;120/7100;", "google_scholar": "MA56tMUAAAAJ;0uhdTI0AAAAJ;cilyP7wAAAAJ;qbqkxrMAAAAJ;TFoCwB0AAAAJ", "orcid": "0009-0004-7311-6431;;0000-0003-1369-0947;0000-0002-9421-563X;0000-0002-4718-3102", "linkedin": "ryoma-yataka-92724328a/;adriano-cardace/;petrosb/;ryuhei-takahashi-8994a843/;pu-perry-wang-75a29526/", "or_profile": "~Ryoma_Yataka1;~Adriano_Cardace1;~Petros_Boufounos1;~Ryuhei_Takahashi2;~PU_Perry_WANG1", "aff": "Mitsubishi Electric Research Labs;University of Bologna;Mitsubishi Electric Research Laboratories;Mitsubishi Electric Corporation;Mitsubishi Electric Research Labs", "aff_domain": "merl.com;unibo.it;merl.com;mitsubishielectric.co.jp;merl.com", "position": "Visiting Resarcher;PhD student;Deputy Director/Distinguished Research Scientist;Principal Researcher;Researcher", "bibtex": "@inproceedings{\nyataka2024retr,\ntitle={{RETR}: Multi-View Radar Detection Transformer for Indoor Perception},\nauthor={Ryoma Yataka and Adriano Cardace and Pu Perry Wang and Petros Boufounos and Ryuhei Takahashi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=MLipvjWK5F}\n}", "github": "", "reviewers": "XrXa;kPes;DJHb;H5qV", "pdf_size": 3947528, "rating": "6;6;6;7", "confidence": "4;2;4;5", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "61;59;61;43", "wc_strengths": "92;52;57;12", "wc_weaknesses": "218;146;167;107", "wc_questions": "162;30;6;16", "wc_limitations": "1;1;12;29", "wc_review": "534;288;303;207", "wc_reply_reviewers": "135;129;35;0", "wc_reply_authors": "50;34;34;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 56.0, 7.54983443527075 ], "wc_strengths_avg": [ 53.25, 28.367014294775544 ], "wc_weaknesses_avg": [ 159.5, 40.05308976845607 ], "wc_questions_avg": [ 53.5, 63.21985447626402 ], "wc_limitations_avg": [ 10.75, 11.453711188955307 ], "wc_review_avg": [ 333.0, 121.65730557594969 ], "wc_reply_reviewers_avg": [ 74.75, 58.6104726136891 ], "wc_reply_authors_avg": [ 29.5, 18.2414363469547 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.6622661785325219, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2274577954924655941&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "merl.com;unibo.it;merl.com;mitsubishielectric.co.jp;merl.com", "author_num": 5, "aff_unique_index": "0;1;0;2;0", "aff_unique_norm": "Mitsubishi Electric Research Laboratories;University of Bologna;Mitsubishi Electric Corporation", "aff_unique_dep": ";;", "aff_unique_url": "https://www.merl.com;https://www.unibo.it;https://www.mitsubishielectric.com", "aff_unique_abbr": "MERL;Unibo;MEC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;2;0", "aff_country_unique": "United States;Italy;Japan" }, { "title": "Adaptive Randomized Smoothing: Certified Adversarial Robustness for Multi-Step Defences", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95529", "id": "MN4nt01TeO", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MN4nt01TeO", "openreview": "https://openreview.net/forum?id=MN4nt01TeO", "poster": "/media/PosterPDFs/NeurIPS%202024/95529.png?t=1731568745.5473402", "project": "", "author_site": "Saiyue Lyu, Shadab Shaikh, Frederick Shpilevskiy, Evan Shelhamer, Mathias L\u00e9cuyer", "tldr": "", "abstract": "We propose Adaptive Randomized Smoothing (ARS) to certify the predictions of our test-time adaptive models against adversarial examples.\nARS extends the analysis of randomized smoothing using $f$-Differential Privacy to certify the adaptive composition of multiple steps.\nFor the first time, our theory covers the sound adaptive composition of general and high-dimensional functions of noisy inputs.\nWe instantiate ARS on deep image classification to certify predictions against adversarial examples of bounded $L_{\\infty}$ norm.\nIn the $L_{\\infty}$ threat model, ARS enables flexible adaptation through high-dimensional input-dependent masking.\nWe design adaptivity benchmarks, based on CIFAR-10 and CelebA, and show that ARS improves standard test accuracy by 1 to 15\\% points.\nOn ImageNet, ARS improves certified test accuracy by up to 1.6% points over standard RS without adaptivity. Our code is available at [https://github.com/ubc-systopia/adaptive-randomized-smoothing](https://github.com/ubc-systopia/adaptive-randomized-smoothing).", "keywords": "Robustness;Adversarial examples;Adaptive defenses;Certified test-time defenses;Randomized Smoothing", "primary_area": "privacy", "supplementary_material": "/attachment/ed7862d0aa3f4b70cdd792cadb217c15a30dfe19.zip", "author": "Saiyue Lyu;Shadab Shaikh;Frederick Shpilevskiy;Evan Shelhamer;Mathias L\u00e9cuyer", "authorids": "~Saiyue_Lyu1;~Shadab_Shaikh1;~Frederick_Shpilevskiy1;~Evan_Shelhamer2;~Mathias_L\u00e9cuyer2", "gender": "F;M;M;M;M", "homepage": "https://saiyuelyu.github.io/;https://greninja.github.io/;;http://imaginarynumber.net;http://mathias.lecuyer.me", "dblp": "346/4001;;;150/6541;130/0417", "google_scholar": ";https://scholar.google.ca/citations?user=Qg0JXqYAAAAJ;;-ltRSM0AAAAJ;WeIvMTUAAAAJ", "orcid": "0000-0002-3822-9746;;;;", "linkedin": "https://linkedin.com/in/saiyue-lyu-ubc;shadab-shaikh-1251bb110/;frederick-shpilevskiy-3b5a96237/;;", "or_profile": "~Saiyue_Lyu1;~Shadab_Shaikh1;~Frederick_Shpilevskiy1;~Evan_G_Shelhamer1;~Mathias_L\u00e9cuyer1", "aff": "University of British Columbia;University of British Columbia;University of British Columbia;Google DeepMind;University of British Columbia", "aff_domain": "cs.ubc.ca;cs.ubc.ca;ubc.ca;google.com;ubc.ca", "position": "PhD student;MS student;PhD student;Senior Research Scientist;Assistant Professor", "bibtex": "@inproceedings{\nlyu2024adaptive,\ntitle={Adaptive Randomized Smoothing: Certified Adversarial Robustness for Multi-Step Defences},\nauthor={Saiyue Lyu and Shadab Shaikh and Frederick Shpilevskiy and Evan Shelhamer and Mathias L{\\'e}cuyer},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=MN4nt01TeO}\n}", "github": "", "reviewers": "Zvfp;FN7A;NkYs;7UWe", "pdf_size": 10000258, "rating": "6;7;7;8", "confidence": "4;3;4;4", "soundness": "2;4;3;4", "novelty": "4;4;3;3", "presentation": "3;3;3;3", "wc_summary": "85;81;56;84", "wc_strengths": "85;57;345;146", "wc_weaknesses": "111;39;1;184", "wc_questions": "2;16;1;63", "wc_limitations": "6;4;1;8", "wc_review": "289;197;404;485", "wc_reply_reviewers": "18;39;35;39", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 76.5, 11.926860441876563 ], "wc_strengths_avg": [ 158.25, 112.51972049378722 ], "wc_weaknesses_avg": [ 83.75, 70.07629770471611 ], "wc_questions_avg": [ 20.5, 25.243811122728676 ], "wc_limitations_avg": [ 4.75, 2.5860201081971503 ], "wc_review_avg": [ 343.75, 109.67537326127503 ], "wc_reply_reviewers_avg": [ 32.75, 8.671072598012312 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:KQfrgTX5s58J:scholar.google.com/&scioq=Adaptive+Randomized+Smoothing:+Certified+Adversarial+Robustness+for+Multi-Step+Defences&hl=en&as_sdt=0,10", "gs_version_total": 5, "email": "cs.ubc.ca;cs.ubc.ca;ubc.ca;google.com;ubc.ca", "author_num": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "University of British Columbia;Google", "aff_unique_dep": ";Google DeepMind", "aff_unique_url": "https://www.ubc.ca;https://deepmind.com", "aff_unique_abbr": "UBC;DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "Canada;United Kingdom" }, { "title": "Sample and Computationally Efficient Robust Learning of Gaussian Single-Index Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95528", "id": "MN7d0S2i1d", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MN7d0S2i1d", "openreview": "https://openreview.net/forum?id=MN7d0S2i1d", "poster": "", "project": "", "author_site": "Puqian Wang, Nikos Zarifis, Ilias Diakonikolas, Jelena Diakonikolas", "tldr": "", "abstract": "A single-index model (SIM) is a function of the form $\\sigma(\\mathbf{w}^{\\ast} \\cdot \\mathbf{x})$, where\n$\\sigma: \\mathbb{R} \\to \\mathbb{R}$ is a known link function and $\\mathbf{w}^{\\ast}$ is a hidden unit vector. \nWe study the task of learning SIMs in the agnostic (a.k.a. adversarial label noise) model \nwith respect to the $L^2_2$-loss under the Gaussian distribution. \nOur main result is a sample and computationally efficient agnostic proper learner \nthat attains $L^2_2$-error of $O(\\mathrm{OPT})+\\epsilon$, where $\\mathrm{OPT}$ is the optimal loss. The sample complexity of our algorithm is \n$\\tilde{O}(d^{\\lceil k^{\\ast}/2\\rceil}+d/\\epsilon)$, where \n$k^{\\ast}$ is the information-exponent of $\\sigma$ \ncorresponding to the degree of its first non-zero Hermite coefficient. \nThis sample bound nearly matches known CSQ lower bounds, even in the realizable setting. \nPrior algorithmic work in this setting had focused \non learning in the realizable case or in the presence \nof semi-random noise. Prior computationally efficient robust learners required \nsignificantly stronger assumptions on the link function.", "keywords": "single index models;agnostic learning;adversarial label noise", "primary_area": "learning_theory", "supplementary_material": "", "author": "Puqian Wang;Nikos Zarifis;Ilias Diakonikolas;Jelena Diakonikolas", "authorids": "~Puqian_Wang1;~Nikos_Zarifis1;~Ilias_Diakonikolas1;~Jelena_Diakonikolas2", "gender": ";;M;F", "homepage": ";;http://www.iliasdiakonikolas.org/;http://www.jelena-diakonikolas.com/", "dblp": ";;d/IliasDiakonikolas;147/5178", "google_scholar": ";;Vb3FLmkAAAAJ;J8ixfu8AAAAJ", "orcid": ";;;0000-0003-3439-0310", "linkedin": ";;;", "or_profile": "~Puqian_Wang1;~Nikos_Zarifis1;~Ilias_Diakonikolas1;~Jelena_Diakonikolas2", "aff": ";;University of Wisconsin - Madison;University of Wisconsin, Madison", "aff_domain": ";;wisc.edu;wisc.edu", "position": ";;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nwang2024sample,\ntitle={Sample and Computationally Efficient Robust Learning of Gaussian Single-Index Models},\nauthor={Puqian Wang and Nikos Zarifis and Ilias Diakonikolas and Jelena Diakonikolas},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=MN7d0S2i1d}\n}", "github": "", "reviewers": "9oLo;z8KF;wBuC", "pdf_size": 617168, "rating": "4;7;7", "confidence": "3;3;4", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "2;4;3", "wc_summary": "87;95;132", "wc_strengths": "77;1;93", "wc_weaknesses": "370;1;259", "wc_questions": "145;735;127", "wc_limitations": "41;1;1", "wc_review": "720;833;612", "wc_reply_reviewers": "231;49;0", "wc_reply_authors": "1098;257;0", "reply_reviewers": "1;1;0", "reply_authors": "3;2;1", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 104.66666666666667, 19.601587237318874 ], "wc_strengths_avg": [ 57.0, 40.13311184877976 ], "wc_weaknesses_avg": [ 210.0, 154.57684173251826 ], "wc_questions_avg": [ 335.6666666666667, 282.4669105498121 ], "wc_limitations_avg": [ 14.333333333333334, 18.856180831641264 ], "wc_review_avg": [ 721.6666666666666, 90.23056885310851 ], "wc_reply_reviewers_avg": [ 93.33333333333333, 99.37918404888534 ], "wc_reply_authors_avg": [ 451.6666666666667, 468.91529678136493 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16347685385274726901&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": ";;wisc.edu;wisc.edu", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "University of Wisconsin-Madison;University of Wisconsin", "aff_unique_dep": ";", "aff_unique_url": "https://www.wisc.edu;https://www.wisc.edu", "aff_unique_abbr": "UW-Madison;UW", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Madison", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Advancing Fine-Grained Classification by Structure and Subject Preserving Augmentation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95527", "id": "MNg331t8Tj", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MNg331t8Tj", "openreview": "https://openreview.net/forum?id=MNg331t8Tj", "poster": "/media/PosterPDFs/NeurIPS%202024/95527.png?t=1729950095.6732986", "project": "", "author_site": "Eyal Michaeli, Ohad Fried", "tldr": "", "abstract": "Fine-grained visual classification (FGVC) involves classifying closely related subcategories. This task is inherently difficult due to the subtle differences between classes and the high intra-class variance. Moreover, FGVC datasets are typically small and challenging to gather, thus highlighting a significant need for effective data augmentation.\nRecent advancements in text-to-image diffusion models have introduced new possibilities for data augmentation in image classification. While these models have been used to generate training data for classification tasks, their effectiveness in full-dataset training of FGVC models remains under-explored. Recent techniques that rely on text-to-image generation or Img2Img methods, such as SDEdit, often struggle to generate images that accurately represent the class while modifying them to a degree that significantly increases the dataset's diversity. To address these challenges, we present SaSPA: Structure and Subject Preserving Augmentation. Contrary to recent methods, our method does not use real images as guidance, thereby increasing generation flexibility and promoting greater diversity. To ensure accurate class representation, we employ conditioning mechanisms, specifically by conditioning on image edges and subject representation.\nWe conduct extensive experiments and benchmark SaSPA against both traditional and generative data augmentation techniques. SaSPA consistently outperforms all established baselines across multiple settings, including full dataset training and contextual bias. Additionally, our results reveal interesting patterns in using synthetic data for FGVC models; for instance, we find a relationship between the amount of real data used and the optimal proportion of synthetic data.", "keywords": "Fine-grained Visual Classification;Data Augmentation;Synthetic Data;Diffusion Models;Image Classification", "primary_area": "machine_vision", "supplementary_material": "/attachment/e9d39d23150a279318ff04111bf22846897bca27.zip", "author": "Eyal Michaeli;Ohad Fried", "authorids": "~Eyal_Michaeli1;~Ohad_Fried1", "gender": "M;", "homepage": ";https://www.ohadf.com/", "dblp": ";159/4877", "google_scholar": ";YZcVsRMAAAAJ", "orcid": ";0000-0001-7109-4006", "linkedin": "eyal-michaeli-807b75151?utm_source=share&utm_campaign=share_via&utm_content=profile&utm_medium=ios_app;ohadfried/", "or_profile": "~Eyal_Michaeli1;~Ohad_Fried1", "aff": "Reichman University;Reichman University", "aff_domain": "runi.ac.il;runi.ac.il", "position": "MS student;Assistant Professor", "bibtex": "@inproceedings{\nmichaeli2024advancing,\ntitle={Advancing Fine-Grained Classification by Structure and Subject Preserving Augmentation},\nauthor={Eyal Michaeli and Ohad Fried},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=MNg331t8Tj}\n}", "github": "", "reviewers": "yjvN;Z7K4;JRLS", "pdf_size": 8202536, "rating": "6;7;7", "confidence": "4;4;4", "soundness": "2;3;3", "novelty": "3;3;3", "presentation": "3;3;4", "wc_summary": "66;104;139", "wc_strengths": "62;118;111", "wc_weaknesses": "221;282;77", "wc_questions": "15;140;159", "wc_limitations": "1;1;7", "wc_review": "365;645;493", "wc_reply_reviewers": "0;132;109", "wc_reply_authors": "0;24;18", "reply_reviewers": "0;1;1", "reply_authors": "1;2;2", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 103.0, 29.81051268708183 ], "wc_strengths_avg": [ 97.0, 24.91318258807306 ], "wc_weaknesses_avg": [ 193.33333333333334, 85.94701209983329 ], "wc_questions_avg": [ 104.66666666666667, 63.87661717891941 ], "wc_limitations_avg": [ 3.0, 2.8284271247461903 ], "wc_review_avg": [ 501.0, 114.44940658066632 ], "wc_reply_reviewers_avg": [ 80.33333333333333, 57.57507176625044 ], "wc_reply_authors_avg": [ 14.0, 10.198039027185569 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:uD5nxcDKR6gJ:scholar.google.com/&scioq=Advancing+Fine-Grained+Classification+by+Structure+and+Subject+Preserving+Augmentation&hl=en&as_sdt=0,44", "gs_version_total": 3, "email": "runi.ac.il;runi.ac.il", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Reichman University", "aff_unique_dep": "", "aff_unique_url": "https://www Reichman.ac.il", "aff_unique_abbr": "RUT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Israel" }, { "title": "Generalizing Consistency Policy to Visual RL with Prioritized Proximal Experience Regularization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95526", "id": "MOFwt8OeXr", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MOFwt8OeXr", "openreview": "https://openreview.net/forum?id=MOFwt8OeXr", "poster": "", "project": "", "author_site": "Haoran Li, Zhennan Jiang, YUHUI CHEN, Dongbin Zhao", "tldr": "", "abstract": "With high-dimensional state spaces, visual reinforcement learning (RL) faces significant challenges in exploitation and exploration, resulting in low sample efficiency and training stability. As a time-efficient diffusion model, although consistency models have been validated in online state-based RL, it is still an open question whether it can be extended to visual RL. In this paper, we investigate the impact of non-stationary distribution and the actor-critic framework on consistency policy in online RL, and find that consistency policy was unstable during the training, especially in visual RL with the high-dimensional state space. To this end, we suggest sample-based entropy regularization to stabilize the policy training, and propose a consistency policy with prioritized proximal experience regularization (CP3ER) to improve sample efficiency. CP3ER achieves new state-of-the-art (SOTA) performance in 21 tasks across DeepMind control suite and Meta-world. To our knowledge, CP3ER is the first method to apply diffusion/consistency models to visual RL and demonstrates the potential of consistency models in visual RL.", "keywords": "Visual Reinforcement Learning;Reinforcement Learning;Consistency Model;Dormant Neuron Phenomenon;Diffusion Model", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/2153c6f60627b82ce3950ffddabc56591b4034d4.zip", "author": "Haoran Li;Zhennan Jiang;YUHUI CHEN;Dongbin Zhao", "authorids": "~Haoran_Li7;~Zhennan_Jiang1;~YUHUI_CHEN1;~Dongbin_Zhao1", "gender": "M;M;M;M", "homepage": ";https://jzndd.github.io/;https://cccedric.github.io/;http://people.ucas.ac.cn/~zhaodongbin?language=en", "dblp": ";;;40/255", "google_scholar": "kalE5UIAAAAJ;;dtqKvlEAAAAJ;", "orcid": "0000-0003-2559-9585;;0009-0000-1217-5877;0000-0001-8218-9633", "linkedin": ";%E9%9C%87%E5%8D%97-%E6%B1%9F-012809309/;yuhui-chen2000/;", "or_profile": "~Haoran_Li7;~Zhennan_Jiang1;~YUHUI_CHEN1;~Dongbin_Zhao1", "aff": "Institute of Automation, Chinese Academy of Sciences;Chinese academy of science;Institute of Automation, Chinese Academy of Science;Institute of Automation, Chinese Academy of Sciences", "aff_domain": "ia.ac.cn;ia.ac.cn;ia.ac.cn;ia.ac.cn", "position": "Associate Professor;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nli2024generalizing,\ntitle={Generalizing Consistency Policy to Visual {RL} with Prioritized Proximal Experience Regularization},\nauthor={Haoran Li and Zhennan Jiang and YUHUI CHEN and Dongbin Zhao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=MOFwt8OeXr}\n}", "github": "", "reviewers": "ceLx;UMZF;TpQn;Xhh1", "pdf_size": 6556025, "rating": "5;5;6;7", "confidence": "4;3;3;4", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "2;3;3;3", "wc_summary": "78;78;77;112", "wc_strengths": "135;63;66;88", "wc_weaknesses": "223;33;24;41", "wc_questions": "316;182;112;21", "wc_limitations": "13;1;6;7", "wc_review": "765;357;285;269", "wc_reply_reviewers": "263;0;12;0", "wc_reply_authors": "235;22;22;0", "reply_reviewers": "1;0;1;0", "reply_authors": "3;2;2;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 86.25, 14.872373717735847 ], "wc_strengths_avg": [ 88.0, 28.801041647829337 ], "wc_weaknesses_avg": [ 80.25, 82.63587296083946 ], "wc_questions_avg": [ 157.75, 107.73201706085337 ], "wc_limitations_avg": [ 6.75, 4.264680527307995 ], "wc_review_avg": [ 419.0, 202.49444436823447 ], "wc_reply_reviewers_avg": [ 68.75, 112.2572380739879 ], "wc_reply_authors_avg": [ 69.75, 95.82894917507966 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=238316102987395625&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "ia.ac.cn;ia.ac.cn;ia.ac.cn;ia.ac.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Chinese Academy of Sciences", "aff_unique_dep": "Institute of Automation", "aff_unique_url": "http://www.ia.cas.cn", "aff_unique_abbr": "CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Probing Social Bias in Labor Market Text Generation by ChatGPT: A Masked Language Model Approach", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95525", "id": "MP7j58lbWO", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MP7j58lbWO", "openreview": "https://openreview.net/forum?id=MP7j58lbWO", "poster": "/media/PosterPDFs/NeurIPS%202024/95525.png?t=1729540630.4633658", "project": "", "author_site": "Lei Ding, Yang Hu, Nicole Denier, Enze Shi, Junxi Zhang, Qirui Hu, Karen Hughes, Linglong Kong, Bei Jiang", "tldr": "", "abstract": "As generative large language models (LLMs) such as ChatGPT gain widespread adoption in various domains, their potential to propagate and amplify social biases, particularly in high-stakes areas such as the labor market, has become a pressing concern. AI algorithms are not only widely used in the selection of job applicants, individual job seekers may also make use of generative LLMs to help develop their job application materials. Against this backdrop, this research builds on a novel experimental design to examine social biases within ChatGPT-generated job applications in response to real job advertisements. By simulating the process of job application creation, we examine the language patterns and biases that emerge when the model is prompted with diverse job postings. Notably, we present a novel bias evaluation framework based on Masked Language Models to quantitatively assess social bias based on validated inventories of social cues/words, enabling a systematic analysis of the language used. Our findings show that the increasing adoption of generative AI, not only by employers but also increasingly by individual job seekers, can reinforce and exacerbate gender and social inequalities in the labor market through the use of biased and gendered language.", "keywords": "social bias;LLM;NLP;sociology;labor market", "primary_area": "machine_learning_for_social_sciences", "supplementary_material": "", "author": "Lei Ding;Yang Hu;Nicole Denier;Enze Shi;Junxi Zhang;Qirui Hu;Karen D. Hughes;Linglong Kong;Bei Jiang", "authorids": "~Lei_Ding6;~Yang_Hu23;~Nicole_Denier1;~Enze_Shi1;~Junxi_Zhang1;~Qirui_Hu1;~Karen_D._Hughes1;~Linglong_Kong2;~Bei_Jiang1", "gender": "M;M;;M;M;M;F;M;F", "homepage": ";;;;https://junxi-zhang.github.io/;;https://apps.ualberta.ca/directory/person/khughes;https://www.ualberta.ca/~lkong;https://www.ualberta.ca/~bei1", "dblp": ";;;;;;;35/8525;190/4697", "google_scholar": "ICUOaR4AAAAJ;;;;https://scholar.google.ca/citations?hl=en;;https://scholar.google.ca/citations?user=25ROoTkAAAAJ;https://scholar.google.ca/citations?hl=en;https://scholar.google.ca/citations?user=MfOZ8G0AAAAJ", "orcid": ";0000-0003-2027-8491;0000-0001-8905-1899;;0000-0001-5318-2045;0000-0002-4846-3886;0000-0001-8581-350X;0000-0003-3011-9216;0000-0002-0033-839X", "linkedin": ";;;enze-shi-554b621bb/;;;;;", "or_profile": "~Lei_Ding6;~Yang_Hu23;~Nicole_Denier1;~Enze_Shi1;~Junxi_Zhang1;~Qirui_Hu1;~Karen_D._Hughes1;~Linglong_Kong2;~Bei_Jiang1", "aff": "University of Alberta;Lancaster University;University of Alberta;University of Alberta;University of Alberta;Tsinghua University;University of Alberta;University of Alberta;University of Alberta", "aff_domain": "ualberta.ca;lancaster.ac.uk;ualberta.ca;ualberta.ca;ualberta.ca;tsinghua.edu.cn;ualberta.ca;ualberta.ca;ualberta.ca", "position": "PhD student;Full Professor;Assistant Professor;PhD student;Postdoc;PhD student;Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nding2024probing,\ntitle={Probing Social Bias in Labor Market Text Generation by Chat{GPT}: A Masked Language Model Approach},\nauthor={Lei Ding and Yang Hu and Nicole Denier and Enze Shi and Junxi Zhang and Qirui Hu and Karen D. Hughes and Linglong Kong and Bei Jiang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=MP7j58lbWO}\n}", "github": "", "reviewers": "HqWb;ZJXv;e7QL;Bg7r", "pdf_size": 1441157, "rating": "3;5;7;8", "confidence": "4;3;4;4", "soundness": "2;3;1;3", "novelty": "2;2;2;3", "presentation": "4;3;3;4", "wc_summary": "178;102;89;99", "wc_strengths": "96;91;35;41", "wc_weaknesses": "306;105;236;50", "wc_questions": "41;47;132;103", "wc_limitations": "1;21;49;6", "wc_review": "622;366;541;299", "wc_reply_reviewers": "15;0;32;39", "wc_reply_authors": "34;0;14;14", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;2;2", "rating_avg": [ 5.75, 1.920286436967152 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 117.0, 35.54574517435244 ], "wc_strengths_avg": [ 65.75, 27.887048965424793 ], "wc_weaknesses_avg": [ 174.25, 101.74078582358207 ], "wc_questions_avg": [ 80.75, 38.21239981995373 ], "wc_limitations_avg": [ 19.25, 18.686559340873856 ], "wc_review_avg": [ 457.0, 129.92882667060454 ], "wc_reply_reviewers_avg": [ 21.5, 15.173990905493518 ], "wc_reply_authors_avg": [ 15.5, 12.114041439585717 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.22549380840084865, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14727771447217372000&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "ualberta.ca;lancaster.ac.uk;ualberta.ca;ualberta.ca;ualberta.ca;tsinghua.edu.cn;ualberta.ca;ualberta.ca;ualberta.ca", "author_num": 9, "aff_unique_index": "0;1;0;0;0;2;0;0;0", "aff_unique_norm": "University of Alberta;Lancaster University;Tsinghua University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ualberta.ca;https://www.lancaster.ac.uk;https://www.tsinghua.edu.cn", "aff_unique_abbr": "UAlberta;Lancaster;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0;2;0;0;0", "aff_country_unique": "Canada;United Kingdom;China" }, { "title": "G-Retriever: Retrieval-Augmented Generation for Textual Graph Understanding and Question Answering", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95524", "id": "MPJ3oXtTZl", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MPJ3oXtTZl", "openreview": "https://openreview.net/forum?id=MPJ3oXtTZl", "poster": "/media/PosterPDFs/NeurIPS%202024/95524.png?t=1731391325.8258104", "project": "", "author_site": "Xiaoxin He, Yijun Tian, Yifei Sun, Nitesh Chawla, Thomas Laurent, Yann LeCun, Xavier Bresson, Bryan Hooi", "tldr": "", "abstract": "Given a graph with textual attributes, we enable users to `chat with their graph': that is, to ask questions about the graph using a conversational interface. In response to a user's questions, our method provides textual replies and highlights the relevant parts of the graph. While existing works integrate large language models (LLMs) and graph neural networks (GNNs) in various ways, they mostly focus on either conventional graph tasks (such as node, edge, and graph classification), or on answering simple graph queries on small or synthetic graphs. In contrast, we develop a flexible question-answering framework targeting real-world textual graphs, applicable to multiple applications including scene graph understanding, common sense reasoning, and knowledge graph reasoning. Toward this goal, we first develop a Graph Question Answering (GraphQA) benchmark with data collected from different tasks. Then, we propose our \\textit{G-Retriever} method, introducing the first retrieval-augmented generation (RAG) approach for general textual graphs, which can be fine-tuned to enhance graph understanding via soft prompting. To resist hallucination and to allow for textual graphs that greatly exceed the LLM's context window size, \\textit{G-Retriever} performs RAG over a graph by formulating this task as a Prize-Collecting Steiner Tree optimization problem. Empirical evaluations show that our method outperforms baselines on textual graph tasks from multiple domains, scales well with larger graph sizes, and mitigates hallucination.~\\footnote{Our codes and datasets are available at: \\url{https://github.com/XiaoxinHe/G-Retriever}}", "keywords": "Retrieval Augmented Generation;Graph Question Answering;Graph Neural Network;Large Language Model;Textual Graphs", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Xiaoxin He;Yijun Tian;Yifei Sun;Nitesh V Chawla;Thomas Laurent;Yann LeCun;Xavier Bresson;Bryan Hooi", "authorids": "~Xiaoxin_He1;~Yijun_Tian1;~Yifei_Sun1;~Nitesh_V_Chawla1;~Thomas_Laurent1;~Yann_LeCun1;~Xavier_Bresson6;~Bryan_Hooi1", "gender": "F;;M;M;M;M;;M", "homepage": "https://xiaoxinhe.github.io/;https://www.yijuntian.com/;https://sunefei.github.io/;http://thomaslaurent.lmu.build/homepage.html;http://yann.lecun.com;https://www.comp.nus.edu.sg/cs/people/xaviercs/;http://bhooi.github.io;http://niteshchawla.nd.edu", "dblp": "72/5872;234/9123-1;27/3389-2;47/8889-1;l/YannLeCun;95/378;169/9975;c/NiteshVChawla.html", "google_scholar": "icT6GMsAAAAJ;dbaBgV0AAAAJ;9mxdFawAAAAJ;_Ag_9uAAAAAJ;WLN3QrAAAAAJ;https://scholar.google.com.sg/citations?hl=en;;hDLBEhkAAAAJ", "orcid": ";0000-0003-2795-6080;0000-0002-6814-5527;;;;0000-0002-5645-1754;", "linkedin": "he-xiaoxin-a130601b4/;yijun-tian/;yifeis;;;;;", "or_profile": "~Xiaoxin_He1;~Yijun_Tian1;~Yifei_Sun1;~Thomas_Laurent1;~Yann_LeCun1;~Xavier_Bresson6;~Bryan_Hooi1;~Nitesh_Chawla1", "aff": "National University of Singapore;University of Notre Dame;National University of Singapore;Loyola Marymount University;New York University;National University of Singapore;National University of Singapore;University of Notre Dame", "aff_domain": "nus.edu;nd.edu;comp.nus.edu.sg;lmu.edu;nyu.edu;nus.edu.sg;nus.edu.sg;nd.edu", "position": "PhD student;PhD student;PhD student;Full Professor;Full Professor;Associate Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nhe2024gretriever,\ntitle={G-Retriever: Retrieval-Augmented Generation for Textual Graph Understanding and Question Answering},\nauthor={Xiaoxin He and Yijun Tian and Yifei Sun and Nitesh V Chawla and Thomas Laurent and Yann LeCun and Xavier Bresson and Bryan Hooi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=MPJ3oXtTZl}\n}", "github": "", "reviewers": "GQPK;RHZZ;mCtm;GHV4", "pdf_size": 4640869, "rating": "5;6;7;7", "confidence": "4;4;4;4", "soundness": "3;3;4;3", "novelty": "2;3;4;3", "presentation": "2;3;3;3", "wc_summary": "106;82;96;85", "wc_strengths": "37;57;56;79", "wc_weaknesses": "428;168;110;49", "wc_questions": "59;8;1;39", "wc_limitations": "12;12;1;1", "wc_review": "642;327;264;253", "wc_reply_reviewers": "106;18;22;28", "wc_reply_authors": "48;10;21;15", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 92.25, 9.496709956611289 ], "wc_strengths_avg": [ 57.25, 14.872373717735847 ], "wc_weaknesses_avg": [ 188.75, 144.3976713801161 ], "wc_questions_avg": [ 26.75, 23.47738273317535 ], "wc_limitations_avg": [ 6.5, 5.5 ], "wc_review_avg": [ 371.5, 158.70491485773212 ], "wc_reply_reviewers_avg": [ 43.5, 36.25948151863179 ], "wc_reply_authors_avg": [ 23.5, 14.67140075112121 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 152, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18049217919872131885&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "nus.edu;nd.edu;comp.nus.edu.sg;lmu.edu;nyu.edu;nus.edu.sg;nus.edu.sg;nd.edu", "author_num": 8, "aff_unique_index": "0;1;0;2;3;0;0;1", "aff_unique_norm": "National University of Singapore;University of Notre Dame;Loyola Marymount University;New York University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.nus.edu.sg;https://www.nd.edu;https://www.lmu.edu;https://www.nyu.edu", "aff_unique_abbr": "NUS;Notre Dame;LMU;NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1;1;0;0;1", "aff_country_unique": "Singapore;United States" }, { "title": "Adversarially Robust Dense-Sparse Tradeoffs via Heavy-Hitters", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95523", "id": "MPidsCd9e7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MPidsCd9e7", "openreview": "https://openreview.net/forum?id=MPidsCd9e7", "poster": "", "project": "", "author_site": "David Woodruff, Samson Zhou", "tldr": "", "abstract": "In the adversarial streaming model, the input is a sequence of adaptive updates that defines an underlying dataset and the goal is to approximate, collect, or compute some statistic while using space sublinear in the size of the dataset. In 2022, Ben-Eliezer, Eden, and Onak showed a dense-sparse trade-off technique that elegantly combined sparse recovery with known techniques using differential privacy and sketch switching to achieve adversarially robust algorithms for $L_p$ estimation and other algorithms on turnstile streams. However, there has been no progress since, either in terms of achievability or impossibility. In this work, we first give improved algorithms for adversarially robust $L_p$-heavy hitters, utilizing deterministic turnstile heavy-hitter algorithms with better tradeoffs. We then utilize our heavy-hitter algorithm to reduce the problem to estimating the frequency moment of the tail vector. We give a new algorithm for this problem in the classical streaming setting, which achieves additive error and uses space independent in the size of the tail. We then leverage these ingredients to give an improved algorithm for adversarially robust $L_p$ estimation on turnstile streams. We believe that our results serve as an important conceptual message, demonstrating that there is no inherent barrier at the previous state-of-the-art.", "keywords": "streaming algorithms;adversarial robustness;heavy-hitters;norm estimation;differential privacy", "primary_area": "optimization", "supplementary_material": "/attachment/1df162e18d6e68db985b7cc834d38bd59ae70ee3.zip", "author": "David Woodruff;Samson Zhou", "authorids": "~David_Woodruff1;~Samson_Zhou1", "gender": "M;", "homepage": "http://www.cs.cmu.edu/~dwoodruf/;https://samsonzhou.github.io/", "dblp": "w/DPWoodruff;179/2683", "google_scholar": "https://scholar.google.com.tw/citations?user=0G2t-6sAAAAJ;NpjsgocAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~David_Woodruff1;~Samson_Zhou1", "aff": "Carnegie Mellon University;Texas A&M University - College Station", "aff_domain": "cmu.edu;tamu.edu", "position": "Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nwoodruff2024adversarially,\ntitle={Adversarially Robust Dense-Sparse Tradeoffs via Heavy-Hitters},\nauthor={David Woodruff and Samson Zhou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=MPidsCd9e7}\n}", "github": "", "reviewers": "KENe;B1cC;VAPi;pbML", "pdf_size": 531660, "rating": "6;7;7;8", "confidence": "3;3;4;4", "soundness": "4;3;4;3", "novelty": "3;3;3;3", "presentation": "2;3;4;3", "wc_summary": "427;108;158;132", "wc_strengths": "37;41;36;101", "wc_weaknesses": "101;127;24;53", "wc_questions": "86;119;130;56", "wc_limitations": "28;15;5;23", "wc_review": "679;410;353;365", "wc_reply_reviewers": "14;14;211;5", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 206.25, 128.67084945705457 ], "wc_strengths_avg": [ 53.75, 27.343874999714288 ], "wc_weaknesses_avg": [ 76.25, 40.18317433951678 ], "wc_questions_avg": [ 97.75, 29.03769102390891 ], "wc_limitations_avg": [ 17.75, 8.699856320652657 ], "wc_review_avg": [ 451.75, 132.91233012779514 ], "wc_reply_reviewers_avg": [ 61.0, 86.68044762228676 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.7071067811865476, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2578025581727852299&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "cmu.edu;tamu.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Carnegie Mellon University;Texas A&M University", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://www.tamu.edu", "aff_unique_abbr": "CMU;TAMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";College Station", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Boosting Sample Efficiency and Generalization in Multi-agent Reinforcement Learning via Equivariance", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95522", "id": "MQIET1VfoV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MQIET1VfoV", "openreview": "https://openreview.net/forum?id=MQIET1VfoV", "poster": "/media/PosterPDFs/NeurIPS%202024/95522.png?t=1731723134.1544464", "project": "", "author_site": "Josh McClellan, Naveed Haghani, John Winder, Furong Huang, Pratap Tokekar", "tldr": "", "abstract": "Multi-Agent Reinforcement Learning (MARL) struggles with sample inefficiency and poor generalization [1]. These challenges are partially due to a lack of structure or inductive bias in the neural networks typically used in learning the policy. One such form of structure that is commonly observed in multi-agent scenarios is symmetry. The field of Geometric Deep Learning has developed Equivariant Graph Neural Networks (EGNN) that are equivariant (or symmetric) to rotations, translations, and reflections of nodes. Incorporating equivariance has been shown to improve learning efficiency and decrease error [ 2 ]. In this paper, we demonstrate that EGNNs improve the sample efficiency and generalization in MARL. However, we also show that a naive application of EGNNs to MARL results in poor early exploration due to a bias in the EGNN structure. To mitigate this bias, we present Exploration-enhanced Equivariant Graph Neural Networks or E2GN2. We compare E2GN2 to other common function approximators using common MARL benchmarks MPE and SMACv2. E2GN2 demonstrates a significant improvement in sample efficiency, greater final reward convergence, and a 2x-5x gain in over standard GNNs in our generalization tests. These results pave the way for more reliable and effective solutions in complex multi-agent systems.", "keywords": "Equivariant Graph Neural Networks;Reinforcement Learning;Multi-agent Reinforcement Learning;Symmetry;generalization;sample efficiency;MARL", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Joshua McClellan;Naveed Haghani;John Winder;Furong Huang;Pratap Tokekar", "authorids": "~Joshua_McClellan1;~Naveed_Haghani1;~John_Winder2;~Furong_Huang1;~Pratap_Tokekar1", "gender": "M;;;F;M", "homepage": ";;;https://furong-huang.com;https://tokekar.com/", "dblp": ";;;72/8513;", "google_scholar": "zCnVnW4AAAAJ;;https://scholar.google.co.in/citations?user=Z5EffH0AAAAJ;13yyuCcAAAAJ;FKAovywAAAAJ", "orcid": ";;;;", "linkedin": ";;;;prataptokekar/", "or_profile": "~Joshua_McClellan1;~Naveed_Haghani1;~John_Winder2;~Furong_Huang1;~Pratap_Tokekar1", "aff": "University of Maryland, College Park;;Johns Hopkins University Applied Physics Laboratory;University of Maryland;University of Maryland, College Park", "aff_domain": "umd.edu;;jhuapl.edu;cs.umd.edu;umd.edu", "position": "PhD student;;Researcher;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nmcclellan2024boosting,\ntitle={Boosting Sample Efficiency and Generalization in Multi-agent Reinforcement Learning via Equivariance},\nauthor={Joshua McClellan and Naveed Haghani and John Winder and Furong Huang and Pratap Tokekar},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=MQIET1VfoV}\n}", "github": "", "reviewers": "wBzN;2qF5;TKRS;7FjK;N4Ry", "pdf_size": 1592426, "rating": "4;4;6;6;6", "confidence": "5;5;2;3;3", "soundness": "2;2;3;2;3", "novelty": "2;2;2;3;3", "presentation": "3;2;3;2;3", "wc_summary": "88;68;41;116;177", "wc_strengths": "69;36;33;205;13", "wc_weaknesses": "324;348;1;849;3", "wc_questions": "80;283;1;64;196", "wc_limitations": "18;16;1;71;2", "wc_review": "579;751;77;1305;391", "wc_reply_reviewers": "216;245;0;410;0", "wc_reply_authors": "1115;1618;0;923;53", "reply_reviewers": "1;3;0;1;0", "reply_authors": "4;8;1;3;2", "rating_avg": [ 5.2, 0.9797958971132712 ], "confidence_avg": [ 3.6, 1.2000000000000002 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 98.0, 46.505913602465654 ], "wc_strengths_avg": [ 71.2, 69.27163921836988 ], "wc_weaknesses_avg": [ 305.0, 310.4081184505328 ], "wc_questions_avg": [ 124.8, 101.12052215055063 ], "wc_limitations_avg": [ 21.6, 25.663982543634962 ], "wc_review_avg": [ 620.6, 408.74324459249476 ], "wc_reply_reviewers_avg": [ 174.2, 156.87753185207882 ], "wc_reply_authors_avg": [ 741.8, 626.8254621503501 ], "reply_reviewers_avg": [ 1.0, 1.0954451150103321 ], "reply_authors_avg": [ 3.6, 2.4166091947189146 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.9525793444156806, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13601325498334599313&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "umd.edu;;jhuapl.edu;cs.umd.edu;umd.edu", "author_num": 5, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Maryland;Johns Hopkins University", "aff_unique_dep": ";Applied Physics Laboratory", "aff_unique_url": "https://www/umd.edu;https://www.jhuapl.edu", "aff_unique_abbr": "UMD;JHU APL", "aff_campus_unique_index": "0;0", "aff_campus_unique": "College Park;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Reinforcement Learning with Adaptive Regularization for Safe Control of Critical Systems", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95521", "id": "MRO2QhydPF", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MRO2QhydPF", "openreview": "https://openreview.net/forum?id=MRO2QhydPF", "poster": "/media/PosterPDFs/NeurIPS%202024/95521.png?t=1732142554.2431257", "project": "", "author_site": "Haozhe Tian, Homayoun Hamedmoghadam, Robert Shorten, Pietro Ferraro", "tldr": "", "abstract": "Reinforcement Learning (RL) is a powerful method for controlling dynamic systems, but its learning mechanism can lead to unpredictable actions that undermine the safety of critical systems. Here, we propose RL with Adaptive Regularization (RL-AR), an algorithm that enables safe RL exploration by combining the RL policy with a policy regularizer that hard-codes the safety constraints. RL-AR performs policy combination via a \"focus module,\" which determines the appropriate combination depending on the state\u2014relying more on the safe policy regularizer for less-exploited states while allowing unbiased convergence for well-exploited states. In a series of critical control applications, we demonstrate that RL-AR not only ensures safety during training but also achieves a return competitive with the standards of model-free RL that disregards safety.", "keywords": "Reinforcement Learning;Safe Crticial System Control;Policy Regularization", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/bdb9f172c78f23c203234932151800455670010e.zip", "author": "Haozhe Tian;Homayoun Hamedmoghadam;Robert Noel Shorten;Pietro Ferraro", "authorids": "~Haozhe_Tian1;~Homayoun_Hamedmoghadam1;~Robert_Noel_Shorten1;~Pietro_Ferraro1", "gender": "M;M;;M", "homepage": "https://haozhetian.netlify.app/;;;", "dblp": "290/9100;;;", "google_scholar": "zyDCl1EAAAAJ;;https://scholar.google.com/citations?hl=en;vdb51oMAAAAJ", "orcid": "0000-0003-4455-3400;0000-0003-3467-724X;;", "linkedin": ";;;", "or_profile": "~Haozhe_Tian1;~Homayoun_Hamedmoghadam1;~Robert_Noel_Shorten1;~Pietro_Ferraro1", "aff": "Imperial College London;Imperial College London;Imperial College London;Imperial College London", "aff_domain": "imperial.ac.uk;imperial.ac.uk;imperial.ac.uk;imperial.ac.uk", "position": "PhD student;Postdoc;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\ntian2024reinforcement,\ntitle={Reinforcement Learning with Adaptive Regularization for Safe Control of Critical Systems},\nauthor={Haozhe Tian and Homayoun Hamedmoghadam and Robert Noel Shorten and Pietro Ferraro},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=MRO2QhydPF}\n}", "github": "", "reviewers": "QQKo;ChN3;zn6h;ta4e", "pdf_size": 1016229, "rating": "3;5;6;7", "confidence": "4;4;4;4", "soundness": "1;3;3;3", "novelty": "1;2;3;3", "presentation": "4;3;4;4", "wc_summary": "78;59;44;175", "wc_strengths": "28;65;79;147", "wc_weaknesses": "149;526;81;401", "wc_questions": "199;82;82;54", "wc_limitations": "6;119;5;36", "wc_review": "460;851;291;813", "wc_reply_reviewers": "332;60;889;78", "wc_reply_authors": "452;0;887;66", "reply_reviewers": "1;1;4;1", "reply_authors": "2;1;3;2", "rating_avg": [ 5.25, 1.479019945774904 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 89.0, 51.09305236526782 ], "wc_strengths_avg": [ 79.75, 43.06608294238054 ], "wc_weaknesses_avg": [ 289.25, 181.367548089508 ], "wc_questions_avg": [ 104.25, 55.88548559330947 ], "wc_limitations_avg": [ 41.5, 46.446205442425544 ], "wc_review_avg": [ 603.75, 236.32326906168169 ], "wc_reply_reviewers_avg": [ 339.75, 334.8539793701129 ], "wc_reply_authors_avg": [ 351.25, 354.23253309090626 ], "reply_reviewers_avg": [ 1.75, 1.299038105676658 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1793987782159847753&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 2, "email": "imperial.ac.uk;imperial.ac.uk;imperial.ac.uk;imperial.ac.uk", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Imperial College London", "aff_unique_dep": "", "aff_unique_url": "https://www.imperial.ac.uk", "aff_unique_abbr": "ICL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "UDA: A Benchmark Suite for Retrieval Augmented Generation in Real-World Document Analysis", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97735", "id": "MS4oxVfBHn", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MS4oxVfBHn", "openreview": "https://openreview.net/forum?id=MS4oxVfBHn", "poster": "/media/PosterPDFs/NeurIPS%202024/97735.png?t=1731328004.3423798", "project": "", "author_site": "Yulong Hui, YAO LU, Huanchen Zhang", "tldr": "", "abstract": "The use of Retrieval-Augmented Generation (RAG) has improved Large Language Models (LLMs) in collaborating with external data, yet significant challenges exist in real-world scenarios. In areas such as academic literature and finance question answering, data are often found in raw text and tables in HTML or PDF formats, which can be lengthy and highly unstructured. In this paper, we introduce a benchmark suite, namely Unstructured Document Analysis (UDA), that involves 2,965 real-world documents and 29,590 expert-annotated Q&A pairs. We revisit popular LLM- and RAG-based solutions for document analysis and evaluate the design choices and answer qualities across multiple document domains and diverse query types. Our evaluation yields interesting findings and highlights the importance of data parsing and retrieval. We hope our benchmark can shed light and better serve real-world document analysis applications. The benchmark suite and code can be found at https://github.com/qinchuanhui/UDA-Benchmark", "keywords": "Retrieval Augmented Generation;large language model;document analysis;benchmark", "primary_area": "", "supplementary_material": "/attachment/7eb1b53bf84d1bca8704ac7b68aac0dcefdbed44.pdf", "author": "Yulong Hui;Yao Lu;Huanchen Zhang", "authorids": "~Yulong_Hui1;~Yao_Lu10;~Huanchen_Zhang1", "gender": ";;M", "homepage": "https://qinchuanhui.github.io/;http://yao.lu/;http://people.iiis.tsinghua.edu.cn/~huanchen/index.html", "dblp": "344/5481;;", "google_scholar": "https://scholar.google.com/citations?hl=en;t-sqBkIAAAAJ;filGbagAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Yulong_Hui1;~Yao_Lu10;~Huanchen_Zhang1", "aff": "Tsinghua University;National University of Singapore;Tsinghua University", "aff_domain": "mail.tsinghua.edu.cn;nus.edu;tsinghua.edu.cn", "position": "PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nhui2024uda,\ntitle={{UDA}: A Benchmark Suite for Retrieval Augmented Generation in Real-World Document Analysis},\nauthor={Yulong Hui and Yao Lu and Huanchen Zhang},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=MS4oxVfBHn}\n}", "github": "", "reviewers": "zcFM;FaEw;snvC;ubM5", "pdf_size": 2883472, "rating": "6;6;6;8", "confidence": "4;4;3;4", "wc_summary_and_contributions": "93;147;57;67", "wc_strengths": "74;28;92;89", "wc_improvement": "5;20;46;59", "wc_limitations": "196;11;17;31", "wc_correctness": "11;19;1;1", "wc_clarity": "24;1;37;1", "wc_relation_to_prior_work": "33;1;1;1", "wc_documentation": "29;1;1;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "466;229;253;251", "wc_reply_reviewers": "13;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 91.0, 34.899856733230294 ], "wc_strengths_avg": [ 70.75, 25.606395685453272 ], "wc_improvement_avg": [ 32.5, 21.195518394226642 ], "wc_limitations_avg": [ 63.75, 76.69867991041306 ], "wc_correctness_avg": [ 8.0, 7.54983443527075 ], "wc_clarity_avg": [ 15.75, 15.449514555480375 ], "wc_relation_to_prior_work_avg": [ 9.0, 13.856406460551018 ], "wc_documentation_avg": [ 8.0, 12.12435565298214 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 299.75, 96.4452564929971 ], "wc_reply_reviewers_avg": [ 3.25, 5.629165124598851 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14291083852987649337&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 6, "email": "mail.tsinghua.edu.cn;nus.edu;tsinghua.edu.cn", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Tsinghua University;National University of Singapore", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.nus.edu.sg", "aff_unique_abbr": "THU;NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "China;Singapore" }, { "title": "Learning Disentangled Representations for Perceptual Point Cloud Quality Assessment via Mutual Information Minimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95520", "id": "MSSRhxwZP7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MSSRhxwZP7", "openreview": "https://openreview.net/forum?id=MSSRhxwZP7", "poster": "/media/PosterPDFs/NeurIPS%202024/95520.png?t=1730294197.5742393", "project": "", "author_site": "Ziyu Shan, Yujie Zhang, Yipeng Liu, YILING XU", "tldr": "", "abstract": "No-Reference Point Cloud Quality Assessment (NR-PCQA) aims to objectively assess the human perceptual quality of point clouds without relying on pristine-quality point clouds for reference. It is becoming increasingly significant with the rapid advancement of immersive media applications such as virtual reality (VR) and augmented reality (AR). However, current NR-PCQA models attempt to indiscriminately learn point cloud content and distortion representations within a single network, overlooking their distinct contributions to quality information. To address this issue, we propose DisPA, a novel disentangled representation learning framework for NR-PCQA. The framework trains a dual-branch disentanglement network to minimize mutual information (MI) between representations of point cloud content and distortion. Specifically, to fully disentangle representations, the two branches adopt different philosophies: the content-aware encoder is pretrained by a masked auto-encoding strategy, which can allow the encoder to capture semantic information from rendered images of distorted point clouds; the distortion-aware encoder takes a mini-patch map as input, which forces the encoder to focus on low-level distortion patterns. Furthermore, we utilize an MI estimator to estimate the tight upper bound of the actual MI and further minimize it to achieve explicit representation disentanglement. Extensive experimental results demonstrate that DisPA outperforms state-of-the-art methods on multiple PCQA datasets.", "keywords": "Point Cloud Quality Assessment;Low-Level Vision", "primary_area": "machine_vision", "supplementary_material": "/attachment/9fecc54da7752cdc41733045faab26a85d2dfce7.zip", "author": "Ziyu Shan;Yujie Zhang;Yipeng Liu;Yiling Xu", "authorids": "~Ziyu_Shan1;~Yujie_Zhang3;~Yipeng_Liu2;~Yiling_Xu1", "gender": ";M;M;F", "homepage": "https://zyshan0929.github.io;https://scholar.google.com/citations?user=AisjLHwAAAAJ&hl=zh-CN;;", "dblp": "332/0929;;26/6297;164/8950", "google_scholar": "McMyJ_0AAAAJ;AisjLHwAAAAJ;g-i2eMkAAAAJ;638kRwkAAAAJ", "orcid": "0000-0002-3346-4261;;;", "linkedin": ";;;", "or_profile": "~Ziyu_Shan1;~Yujie_Zhang3;~Yipeng_Liu2;~Yiling_Xu1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "position": "MS student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nshan2024learning,\ntitle={Learning Disentangled Representations for Perceptual Point Cloud Quality Assessment via Mutual Information Minimization},\nauthor={Ziyu Shan and Yujie Zhang and Yipeng Liu and Yiling Xu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=MSSRhxwZP7}\n}", "github": "", "reviewers": "nBew;hUdG;2ATd;oeD7", "pdf_size": 24800988, "rating": "4;6;6;7", "confidence": "4;4;3;5", "soundness": "2;3;3;3", "novelty": "2;3;2;3", "presentation": "2;3;2;3", "wc_summary": "75;43;47;178", "wc_strengths": "13;12;64;104", "wc_weaknesses": "111;61;139;341", "wc_questions": "2;4;64;77", "wc_limitations": "2;4;10;23", "wc_review": "203;124;324;723", "wc_reply_reviewers": "0;19;56;19", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 85.75, 54.66888969057265 ], "wc_strengths_avg": [ 48.25, 38.447204059593204 ], "wc_weaknesses_avg": [ 163.0, 106.49882628461216 ], "wc_questions_avg": [ 36.75, 34.06886408438062 ], "wc_limitations_avg": [ 9.75, 8.1967981553775 ], "wc_review_avg": [ 343.5, 230.3915146006901 ], "wc_reply_reviewers_avg": [ 23.5, 20.303940504246953 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.3244428422615251, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:kEUgtt6Ad2kJ:scholar.google.com/&scioq=Learning+Disentangled+Representations+for+Perceptual+Point+Cloud+Quality+Assessment+via+Mutual+Information+Minimization&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Analysis of Corrected Graph Convolutions", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95519", "id": "MSsQDWUWpd", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MSsQDWUWpd", "openreview": "https://openreview.net/forum?id=MSsQDWUWpd", "poster": "/media/PosterPDFs/NeurIPS%202024/95519.png?t=1731521968.246546", "project": "", "author_site": "Robert Wang, Aseem Baranwal, Kimon Fountoulakis", "tldr": "", "abstract": "Machine learning for node classification on graphs is a prominent area driven by applications such as recommendation systems. State-of-the-art models often use multiple graph convolutions on the data, as empirical evidence suggests they can enhance performance. However, it has been shown empirically and theoretically, that too many graph convolutions can degrade performance significantly, a phenomenon known as oversmoothing. In this paper, we provide a rigorous theoretical analysis, based on the two-class contextual stochastic block model (CSBM), of the performance of vanilla graph convolution from which we remove the principal eigenvector to avoid oversmoothing. We perform a spectral analysis for $k$ rounds of corrected graph convolutions, and we provide results for partial and exact classification. For partial classification, we show that each round of convolution can reduce the misclassification error exponentially up to a saturation level, after which performance does not worsen. We also extend this analysis to the multi-class setting with features distributed according to a Gaussian mixture model. For exact classification, we show that the separability threshold can be improved exponentially up to $O({\\log{n}}/{\\log\\log{n}})$ corrected convolutions.", "keywords": "Node classification;partial classification;exact classification;contextual stochastic block model;graph convolution;spectral analysis", "primary_area": "learning_theory", "supplementary_material": "/attachment/ffcc902cf45dd5d63f855ec3591582edf36ffe2b.zip", "author": "Robert Wang;Aseem Baranwal;Kimon Fountoulakis", "authorids": "~Robert_Wang4;~Aseem_Baranwal1;~Kimon_Fountoulakis1", "gender": "M;M;M", "homepage": ";https://aseemrb.me;https://opallab.ca", "dblp": ";285/5304;149/5799", "google_scholar": ";DPt626YAAAAJ;https://scholar.google.ca/citations?user=K-SafJUAAAAJ", "orcid": "0000-0002-6036-3888;0000-0001-5318-6054;", "linkedin": ";aseemrb/;", "or_profile": "~Robert_Wang4;~Aseem_Baranwal1;~Kimon_Fountoulakis1", "aff": "University of Waterloo;University of Waterloo;University of Waterloo", "aff_domain": "uwaterloo.ca;uwaterloo.ca;uwaterloo.ca", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nwang2024analysis,\ntitle={Analysis of Corrected Graph Convolutions},\nauthor={Robert Wang and Aseem Baranwal and Kimon Fountoulakis},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=MSsQDWUWpd}\n}", "github": "", "reviewers": "xoAh;kcVu;uSHp;eGdd", "pdf_size": 892542, "rating": "5;6;7;7", "confidence": "3;4;3;3", "soundness": "3;2;3;4", "novelty": "2;3;3;3", "presentation": "3;1;3;3", "wc_summary": "54;63;60;71", "wc_strengths": "35;33;57;50", "wc_weaknesses": "148;183;95;157", "wc_questions": "20;51;110;5", "wc_limitations": "27;5;3;15", "wc_review": "284;335;325;298", "wc_reply_reviewers": "24;577;32;36", "wc_reply_authors": "25;838;120;22", "reply_reviewers": "1;3;1;1", "reply_authors": "2;3;3;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 62.0, 6.123724356957945 ], "wc_strengths_avg": [ 43.75, 10.084022015049353 ], "wc_weaknesses_avg": [ 145.75, 31.995116814914116 ], "wc_questions_avg": [ 46.5, 40.23990556648959 ], "wc_limitations_avg": [ 12.5, 9.526279441628825 ], "wc_review_avg": [ 310.5, 20.426698215815495 ], "wc_reply_reviewers_avg": [ 167.25, 236.60872236669553 ], "wc_reply_authors_avg": [ 251.25, 341.04499336597803 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5190939653868725975&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "uwaterloo.ca;uwaterloo.ca;uwaterloo.ca", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Waterloo", "aff_unique_dep": "", "aff_unique_url": "https://uwaterloo.ca", "aff_unique_abbr": "UW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Canada" }, { "title": "Aligning Diffusion Models by Optimizing Human Utility", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95518", "id": "MTMShU5QaC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MTMShU5QaC", "openreview": "https://openreview.net/forum?id=MTMShU5QaC", "poster": "/media/PosterPDFs/NeurIPS%202024/95518.png?t=1733632996.479002", "project": "", "author_site": "Shufan Li, Konstantinos Kallidromitis, Akash Gokul, Yusuke Kato, Kazuki Kozuka", "tldr": "", "abstract": "We present Diffusion-KTO, a novel approach for aligning text-to-image diffusion models by formulating the alignment objective as the maximization of expected human utility. Unlike previous methods, Diffusion-KTO does not require collecting pairwise preference data nor training a complex reward model. Instead, our objective uses per-image binary feedback signals, e.g. likes or dislikes, to align the model with human preferences. After fine-tuning using Diffusion-KTO, text-to-image diffusion models exhibit improved performance compared to existing techniques, including supervised fine-tuning and Diffusion-DPO, both in terms of human judgment and automatic evaluation metrics such as PickScore and ImageReward. Overall, Diffusion-KTO unlocks the potential of leveraging readily available per-image binary preference signals and broadens the applicability of aligning text-to-image diffusion models with human preferences.", "keywords": "text-to-image; diffusion; computer vision;", "primary_area": "generative_models", "supplementary_material": "/attachment/68fd21aad671c8820f65d815e2ac7c65e2ae5917.zip", "author": "Shufan Li;Konstantinos Kallidromitis;Akash Gokul;Yusuke Kato;Kazuki Kozuka", "authorids": "~Shufan_Li1;~Konstantinos_Kallidromitis1;~Akash_Gokul1;~Yusuke_Kato1;~Kazuki_Kozuka1", "gender": "M;M;;;M", "homepage": ";https://github.com/KKallidromitis;;;https://kazukikozuka.net/", "dblp": "218/8196;;;;35/5062", "google_scholar": ";5EuNtbQAAAAJ;;;_Fq6uvQAAAAJ", "orcid": ";;;;", "linkedin": "shufan-li-126b70187/;kkall/;;yusuke-kato-b1a875175/;", "or_profile": "~Shufan_Li1;~Konstantinos_Kallidromitis1;~Akash_Gokul1;~Yusuke_Kato1;~Kazuki_Kozuka1", "aff": "UCLA Computer Science Department, University of California, Los Angeles;Panasonic;;Panasonic Holdings Corporation;Panasonic Corporation", "aff_domain": "cs.ucla.edu;us.panasonic.com;;jp.panasonic.com;jp.panasonic.com", "position": "PhD student;AI Research Engineer;;Researcher;Researcher", "bibtex": "@inproceedings{\nli2024aligning,\ntitle={Aligning Diffusion Models by Optimizing Human Utility},\nauthor={Shufan Li and Konstantinos Kallidromitis and Akash Gokul and Yusuke Kato and Kazuki Kozuka},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=MTMShU5QaC}\n}", "github": "", "reviewers": "7SUT;s9JR;YybC;MiLc", "pdf_size": 7121732, "rating": "5;6;7;7", "confidence": "4;4;4;4", "soundness": "3;3;3;3", "novelty": "2;4;3;3", "presentation": "3;3;3;3", "wc_summary": "46;105;201;16", "wc_strengths": "42;32;221;62", "wc_weaknesses": "202;168;142;31", "wc_questions": "17;2;67;43", "wc_limitations": "5;7;167;1", "wc_review": "312;314;798;153", "wc_reply_reviewers": "22;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 92.0, 70.6080731928014 ], "wc_strengths_avg": [ 89.25, 76.82894962187105 ], "wc_weaknesses_avg": [ 135.75, 64.11074402937467 ], "wc_questions_avg": [ 32.25, 24.853319697778804 ], "wc_limitations_avg": [ 45.0, 70.46985170979147 ], "wc_review_avg": [ 394.25, 242.08508318357823 ], "wc_reply_reviewers_avg": [ 5.5, 9.526279441628825 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11105352598580360169&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "cs.ucla.edu;us.panasonic.com;;jp.panasonic.com;jp.panasonic.com", "author_num": 5, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "University of California, Los Angeles;Panasonic Corporation;Panasonic Holdings Corporation", "aff_unique_dep": "Computer Science Department;;", "aff_unique_url": "https://www.ucla.edu;https://www.panasonic.com;https://www.panasonic.com/global", "aff_unique_abbr": "UCLA;Panasonic;PHC", "aff_campus_unique_index": "0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "United States;Japan" }, { "title": "DePLM: Denoising Protein Language Models for Property Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95517", "id": "MU27zjHBcW", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MU27zjHBcW", "openreview": "https://openreview.net/forum?id=MU27zjHBcW", "poster": "/media/PosterPDFs/NeurIPS%202024/95517.png?t=1730861045.274962", "project": "", "author_site": "Zeyuan Wang, Keyan Ding, Ming Qin, Xiaotong Li, Xiang Zhuang, Yu Zhao, Jianhua Yao, Qiang Zhang, Huajun Chen", "tldr": "", "abstract": "Protein optimization is a fundamental biological task aimed at enhancing theperformance of proteins by modifying their sequences. Computational methodsprimarily rely on evolutionary information (EI) encoded by protein languagemodels (PLMs) to predict fitness landscape for optimization. However, thesemethods suffer from a few limitations. (1) Evolutionary processes involve thesimultaneous consideration of multiple functional properties, often overshadowingthe specific property of interest. (2) Measurements of these properties tend to betailored to experimental conditions, leading to reduced generalizability of trainedmodels to novel proteins. To address these limitations, we introduce DenoisingProtein Language Models (DePLM), a novel approach that refines the evolutionaryinformation embodied in PLMs for improved protein optimization. Specifically, weconceptualize EI as comprising both property-relevant and irrelevant information,with the latter acting as \u201cnoise\u201d for the optimization task at hand. Our approachinvolves denoising this EI in PLMs through a diffusion process conducted in therank space of property values, thereby enhancing model generalization and ensuringdataset-agnostic learning. Extensive experimental results have demonstrated thatDePLM not only surpasses the state-of-the-art in mutation effect prediction butalso exhibits strong generalization capabilities for novel proteins.", "keywords": "protein language model;protein engineering;diffusion model;evolutionary information", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "/attachment/23272fd7c0ba46c300961aee5a4c977d4ac8c224.zip", "author": "Zeyuan Wang;Keyan Ding;Ming Qin;Xiaotong Li;Xiang Zhuang;Yu Zhao;Jianhua Yao;Qiang Zhang;Huajun Chen", "authorids": "~Zeyuan_Wang3;~Keyan_Ding1;~Ming_Qin3;~Xiaotong_Li3;~Xiang_Zhuang1;~Yu_Zhao8;~Jianhua_Yao3;~Qiang_Zhang6;~Huajun_Chen1", "gender": "M;M;M;;;M;M;;M", "homepage": ";;;https://github.com/MercuryDemo;;;;https://qiangairesearcher.github.io;", "dblp": ";195/3500;76/3104;;;57/2056-9;;72/3527-26;94/5089", "google_scholar": ";A7u-ZowAAAAJ;;;;7XOW0wcAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=zh-CN;", "orcid": ";;0000-0001-8607-8965;;;;;;", "linkedin": ";;;;;;;;", "or_profile": "~Zeyuan_Wang3;~Keyan_Ding1;~Ming_Qin3;~Xiaotong_Li3;~Xiang_Zhuang1;~Yu_Zhao8;~Jianhua_Yao3;~Qiang_Zhang6;~Huajun_Chen1", "aff": "Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University;;Tencent AI Lab;Tencent AI Lab;Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;;tencent.com;tencent.com;zju.edu.cn;zju.edu.cn", "position": "PhD student;Researcher;PhD student;MS student;;Researcher;Principal Researcher;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nwang2024deplm,\ntitle={De{PLM}: Denoising Protein Language Models for Property Optimization},\nauthor={Zeyuan Wang and Keyan Ding and Ming Qin and Xiaotong Li and Xiang Zhuang and Yu Zhao and Jianhua Yao and Qiang Zhang and Huajun Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=MU27zjHBcW}\n}", "github": "", "reviewers": "FrCy;5NpU;ZGbQ", "pdf_size": 4735637, "rating": "5;6;7", "confidence": "4;3;2", "soundness": "2;2;3", "novelty": "2;3;3", "presentation": "2;3;3", "wc_summary": "71;79;106", "wc_strengths": "167;38;134", "wc_weaknesses": "461;12;75", "wc_questions": "111;173;115", "wc_limitations": "170;9;9", "wc_review": "980;311;439", "wc_reply_reviewers": "94;21;39", "wc_reply_authors": "16;18;17", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 85.33333333333333, 14.974051630144135 ], "wc_strengths_avg": [ 113.0, 54.71745608121781 ], "wc_weaknesses_avg": [ 182.66666666666666, 198.48481609992797 ], "wc_questions_avg": [ 133.0, 28.331372481167705 ], "wc_limitations_avg": [ 62.666666666666664, 75.8961278473561 ], "wc_review_avg": [ 576.6666666666666, 289.9475048272627 ], "wc_reply_reviewers_avg": [ 51.333333333333336, 31.05192783422991 ], "wc_reply_authors_avg": [ 17.0, 0.816496580927726 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3805856832464759562&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 3, "email": "zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;;tencent.com;tencent.com;zju.edu.cn;zju.edu.cn", "author_num": 9, "aff_unique_index": "0;0;0;0;1;1;0;0", "aff_unique_norm": "Zhejiang University;Tencent", "aff_unique_dep": ";Tencent AI Lab", "aff_unique_url": "https://www.zju.edu.cn;https://ai.tencent.com", "aff_unique_abbr": "ZJU;Tencent AI Lab", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "ConceptMix: A Compositional Image Generation Benchmark with Controllable Difficulty", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97734", "id": "MU2s9wwWLo", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MU2s9wwWLo", "openreview": "https://openreview.net/forum?id=MU2s9wwWLo", "poster": "", "project": "", "author_site": "Xindi Wu, Dingli Yu, Yangsibo Huang, Olga Russakovsky, Sanjeev Arora", "tldr": "", "abstract": "Compositionality is a critical capability in Text-to-Image (T2I) models, as it reflects their ability to understand and combine multiple concepts from text descriptions. Existing evaluations of compositional capability rely heavily on human-designed text prompts or fixed templates, limiting their diversity and complexity, and yielding low discriminative power. We propose ConceptMix, a scalable, controllable, and customizable benchmark which automatically evaluates compositional generation ability of T2I models. This is done in two stages. First, ConceptMix generates the text prompts: concretely, using categories of visual concepts (e.g., objects, colors, shapes, spatial relationships), it randomly samples an object and k-tuples of visual concepts, then uses GPT-4o to generate text prompts for image generation based on these sampled concepts. Second, ConceptMix evaluates the images generated in response to these prompts: concretely, it checks how many of the k concepts actually appeared in the image by generating one question per visual concept and using a strong VLM to answer them. Through administering ConceptMix to a diverse set of T2I models (proprietary as well as open ones) using increasing values of k, we show that our ConceptMix has higher discrimination power than earlier benchmarks. Specifically, ConceptMix reveals that the performance of several models, especially open models, drops dramatically with increased k. Importantly, it also provides insight into the lack of prompt diversity in widely-used training datasets. Additionally, we conduct extensive human studies to validate the design of ConceptMix and compare our automatic grading with human judgement. We hope it will guide future T2I model development.", "keywords": "Text-to-image generation;Compositional T2I generation;Compositional T2I benchmark", "primary_area": "", "supplementary_material": "/attachment/bd1dc7cc43ff90efe3b2b7a146be9ef93dee96cc.zip", "author": "Xindi Wu;Dingli Yu;Yangsibo Huang;Olga Russakovsky;Sanjeev Arora", "authorids": "~Xindi_Wu1;~Dingli_Yu1;~Yangsibo_Huang2;~Olga_Russakovsky1;~Sanjeev_Arora1", "gender": "F;;F;F;", "homepage": "https://xindiwu.github.io/;https://dingliyu.net/;https://hazelsuko07.github.io/yangsibo/;http://cs.princeton.edu/~olgarus;http://www.cs.princeton.edu/~arora/", "dblp": "235/0784;39/578;;52/6883;a/SArora", "google_scholar": "hvnUnrUAAAAJ;KJLJstYAAAAJ;NMPUDa0AAAAJ;TB5OwW8AAAAJ;RUP4S68AAAAJ", "orcid": ";0000-0002-8824-8611;;0000-0001-5272-3241;", "linkedin": ";;;;", "or_profile": "~Xindi_Wu1;~Dingli_Yu1;~Yangsibo_Huang2;~Olga_Russakovsky1;~Sanjeev_Arora1", "aff": "Princeton University;Princeton University;Princeton University;Princeton University;Princeton University", "aff_domain": "cs.princeton.edu;princeton.edu;princeton.edu;princeton.edu;princeton.edu", "position": "PhD student;PhD student;PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nwu2024conceptmix,\ntitle={ConceptMix: A Compositional Image Generation Benchmark with Controllable Difficulty},\nauthor={Xindi Wu and Dingli Yu and Yangsibo Huang and Olga Russakovsky and Sanjeev Arora},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=MU2s9wwWLo}\n}", "github": "", "reviewers": "UaRX;r238;a4xv", "pdf_size": 20515329, "rating": "5;6;8", "confidence": "5;4;4", "wc_summary_and_contributions": "70;62;77", "wc_strengths": "101;24;123", "wc_improvement": "173;48;109", "wc_limitations": "13;1;27", "wc_correctness": "8;34;7", "wc_clarity": "9;58;8", "wc_relation_to_prior_work": "19;30;7", "wc_documentation": "54;46;36", "wc_additional_feedback": "1;1;1", "wc_review": "448;304;395", "wc_reply_reviewers": "162;41;25", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 69.66666666666667, 6.128258770283411 ], "wc_strengths_avg": [ 82.66666666666667, 42.444735310230826 ], "wc_improvement_avg": [ 110.0, 51.03593505234 ], "wc_limitations_avg": [ 13.666666666666666, 10.624918300339486 ], "wc_correctness_avg": [ 16.333333333333332, 12.498888839501783 ], "wc_clarity_avg": [ 25.0, 23.338094752285727 ], "wc_relation_to_prior_work_avg": [ 18.666666666666668, 9.392668535736915 ], "wc_documentation_avg": [ 45.333333333333336, 7.363574011458175 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 382.3333333333333, 59.46614349545649 ], "wc_reply_reviewers_avg": [ 76.0, 61.16098974564315 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.7559289460184544, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17836203587605637955&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 4, "email": "cs.princeton.edu;princeton.edu;princeton.edu;princeton.edu;princeton.edu", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Princeton University", "aff_unique_dep": "", "aff_unique_url": "https://www.princeton.edu", "aff_unique_abbr": "Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Noisy Ostracods: A Fine-Grained, Imbalanced Real-World Dataset for Benchmarking Robust Machine Learning and Label Correction Methods", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97733", "id": "MUnPBKBaCY", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MUnPBKBaCY", "openreview": "https://openreview.net/forum?id=MUnPBKBaCY", "poster": "/media/PosterPDFs/NeurIPS%202024/97733.png?t=1733515835.2123806", "project": "", "author_site": "Jiamian Hu, Hong Yuanyuan, Yihua Chen, He Wang, Moriaki Yasuhara", "tldr": "", "abstract": "We present the Noisy Ostracods, a noisy dataset for genus and species classification\nof crustacean ostracods with specialists\u2019 annotations. Over the 71466 specimens\ncollected, 5.58% of them are estimated to be noisy (possibly problematic) at genus\nlevel. The dataset is created to addressing a real-world challenge: creating a\nclean fine-grained taxonomy dataset. The Noisy Ostracods dataset has diverse\nnoises from multiple sources. Firstly, the noise is open-set, including new classes\ndiscovered during curation that were not part of the original annotation. The\ndataset has pseudo-classes, where annotators misclassified samples that should\nbelong to an existing class into a new pseudo-class. The Noisy Ostracods dataset\nis highly imbalanced with a imbalance factor \u03c1 = 22429. This presents a unique\nchallenge for robust machine learning methods, as existing approaches have not\nbeen extensively evaluated on fine-grained classification tasks with such diverse\nreal-world noise. Initial experiments using current robust learning techniques\nhave not yielded significant performance improvements on the Noisy Ostracods\ndataset compared to cross-entropy training on the raw, noisy data. On the other\nhand, noise detection methods have underperformed in error hit rate compared\nto naive cross-validation ensembling for identifying problematic labels. These\nfindings suggest that the fine-grained, imbalanced nature, and complex noise\ncharacteristics of the dataset present considerable challenges for existing noiserobust\nalgorithms. By openly releasing the Noisy Ostracods dataset, our goal\nis to encourage further research into the development of noise-resilient machine\nlearning methods capable of effectively handling diverse, real-world noise in finegrained\nclassification tasks. The dataset, along with its evaluation protocols, can be\naccessed at https://github.com/H-Jamieu/Noisy_ostracods.", "keywords": "learning with noisy labels;label noise;robust machine learning;label noise correction;computer vision;fine-grained classification;imbalanced dataset", "primary_area": "", "supplementary_material": "/attachment/5999f6f462ee78993dcbd3343a4740e9735d0a88.pdf", "author": "Jiamian Hu;Hong Yuanyuan;Yihua Chen;He Wang;Moriaki Yasuhara", "authorids": "~Jiamian_Hu1;~Hong_Yuanyuan1;~Yihua_Chen2;~He_Wang17;~Moriaki_Yasuhara1", "gender": "F;M;M;M;Not Specified", "homepage": "https://scholar.google.com/citations?user=2FBfN-sAAAAJ&hl=en;https://github.com/y1huac00;http://english.nigpas.cas.cn/en_sourcedb_nigpas/zlyjy/202005/t20200507_644405.html;https://moriakiyasuhara.com/;https://github.com/H-Jamieu", "dblp": ";;;;394/6662", "google_scholar": "2FBfN-sAAAAJ;;_XSxDOUAAAAJ;https://scholar.google.com/citations?hl=en;", "orcid": ";;;0000-0003-0990-1764;", "linkedin": ";;;;", "or_profile": "~Hong_Yuanyuan1;~Yihua_Chen2;~He_Wang17;~Moriaki_Yasuhara1;~Hu_Jiamian1", "aff": "University of Hong Kong;The University of Tokyo;University of Hong Kong;University of Hong Kong;University of Hong Kong", "aff_domain": "hku.hk;u-tokyo.ac.jp;hku.hk;hku.hk;hku.hk", "position": "Researcher;MS student;Researcher;Associate Professor;PhD student", "bibtex": "@inproceedings{\nhu2024noisy,\ntitle={Noisy Ostracods: A Fine-Grained, Imbalanced Real-World Dataset for Benchmarking Robust Machine Learning and Label Correction Methods},\nauthor={Jiamian Hu and Hong Yuanyuan and Yihua Chen and He Wang and Moriaki Yasuhara},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=MUnPBKBaCY}\n}", "github": "", "reviewers": "Q9DE;mtcB;GKNX;x9Zo", "pdf_size": 7887846, "rating": "6;7;7;7", "confidence": "4;4;4;4", "wc_summary_and_contributions": "50;62;201;23", "wc_strengths": "4;44;66;18", "wc_improvement": "33;55;65;176", "wc_limitations": "62;1;4;1", "wc_correctness": "5;1;24;9", "wc_clarity": "7;1;18;6", "wc_relation_to_prior_work": "11;1;18;1", "wc_documentation": "7;16;4;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "180;182;401;236", "wc_reply_reviewers": "18;12;39;41", "wc_reply_authors": "29;269;15;18", "reply_reviewers": "1;1;1;1", "reply_authors": "4;2;2;4", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.0 ], "wc_summary_and_contributions_avg": [ 84.0, 69.01086870921131 ], "wc_strengths_avg": [ 33.0, 23.853720883753127 ], "wc_improvement_avg": [ 82.25, 55.35058716942396 ], "wc_limitations_avg": [ 17.0, 26.009613607279906 ], "wc_correctness_avg": [ 9.75, 8.699856320652657 ], "wc_clarity_avg": [ 8.0, 6.2048368229954285 ], "wc_relation_to_prior_work_avg": [ 7.75, 7.189401922274203 ], "wc_documentation_avg": [ 7.0, 5.612486080160912 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 249.75, 90.16755236780024 ], "wc_reply_reviewers_avg": [ 27.5, 12.698425099200294 ], "wc_reply_authors_avg": [ 82.75, 107.65773311750532 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.0, 1.0 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:lJYg640oBOcJ:scholar.google.com/&scioq=Noisy+Ostracods:+A+Fine-Grained,+Imbalanced+Real-World+Dataset+for+Benchmarking+Robust+Machine+Learning+and+Label+Correction+Methods&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "hku.hk;u-tokyo.ac.jp;hku.hk;hku.hk;hku.hk", "author_num": 5, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "University of Hong Kong;University of Tokyo", "aff_unique_dep": ";", "aff_unique_url": "https://www.hku.hk;https://www.u-tokyo.ac.jp", "aff_unique_abbr": "HKU;UTokyo", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "China;Japan" }, { "title": "Marrying Causal Representation Learning with Dynamical Systems for Science", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95516", "id": "MWHRxKz4mq", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MWHRxKz4mq", "openreview": "https://openreview.net/forum?id=MWHRxKz4mq", "poster": "/media/PosterPDFs/NeurIPS%202024/95516.png?t=1731429758.0759637", "project": "", "author_site": "Dingling Yao, Caroline Muller, Francesco Locatello", "tldr": "", "abstract": "Causal representation learning promises to extend causal models to hidden causal variables from raw entangled measurements. However, most progress has focused on proving identifiability results in different settings, and we are not aware of any successful real-world application. At the same time, the field of dynamical systems benefited from deep learning and scaled to countless applications but does not allow parameter identification. In this paper, we draw a clear connection between the two and their key assumptions, allowing us to apply identifiable methods developed in causal representation learning to dynamical systems. At the same time, we can leverage scalable differentiable solvers developed for differential equations to build models that are both identifiable and practical. Overall, we learn explicitly controllable models that isolate the trajectory-specific parameters for further downstream tasks such as out-of-distribution classification or treatment effect estimation. We experiment with a wind simulator with partially known factors of variation. We also apply the resulting model to real-world climate data and successfully answer downstream causal questions in line with existing literature on climate change.", "keywords": "Causal representation learning", "primary_area": "causal_inference", "supplementary_material": "/attachment/7e587c51b8434995ae91338b8d74555b17b35e07.zip", "author": "Dingling Yao;Caroline Muller;Francesco Locatello", "authorids": "~Dingling_Yao1;~Caroline_Muller1;~Francesco_Locatello1", "gender": "F;F;M", "homepage": ";https://www.lmd.ens.fr/muller/;https://twitter.com/FrancescoLocat8", "dblp": "298/8057;;195/6074", "google_scholar": ";o9VDVDgAAAAJ;", "orcid": ";0000-0001-5836-5350;", "linkedin": "dingling-yao-b28b161a2/;;", "or_profile": "~Dingling_Yao1;~Caroline_Muller1;~Francesco_Locatello1", "aff": "Institute of Science and Technology;Institute of Science and Technology Austria;Institute of Science and Technology", "aff_domain": "ist.ac.at;ista.ac.at;ist.ac.at", "position": "PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nyao2024marrying,\ntitle={Marrying Causal Representation Learning with Dynamical Systems for Science},\nauthor={Dingling Yao and Caroline Muller and Francesco Locatello},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=MWHRxKz4mq}\n}", "github": "", "reviewers": "yeqY;aFgN;3NaZ;bh3V", "pdf_size": 2595855, "rating": "5;6;6;6", "confidence": "3;3;4;3", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "75;156;108;67", "wc_strengths": "45;100;163;76", "wc_weaknesses": "175;371;84;91", "wc_questions": "60;181;82;3", "wc_limitations": "9;40;97;1", "wc_review": "364;848;534;238", "wc_reply_reviewers": "17;155;11;21", "wc_reply_authors": "47;47;48;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 101.5, 35.01785258978626 ], "wc_strengths_avg": [ 96.0, 43.31858723458095 ], "wc_weaknesses_avg": [ 180.25, 115.80452279595991 ], "wc_questions_avg": [ 81.5, 64.2748006609122 ], "wc_limitations_avg": [ 36.75, 37.71190130449538 ], "wc_review_avg": [ 496.0, 228.766256252971 ], "wc_reply_reviewers_avg": [ 51.0, 60.149812967290266 ], "wc_reply_authors_avg": [ 35.5, 20.5 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7100518977045371842&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "ist.ac.at;ista.ac.at;ist.ac.at", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Institute of Science and Technology;Institute of Science and Technology Austria", "aff_unique_dep": ";", "aff_unique_url": ";https://www.ist.ac.at", "aff_unique_abbr": ";IST Austria", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1", "aff_country_unique": ";Austria" }, { "title": "Structured Unrestricted-Rank Matrices for Parameter Efficient Finetuning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95515", "id": "MXOzgjlWDF", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MXOzgjlWDF", "openreview": "https://openreview.net/forum?id=MXOzgjlWDF", "poster": "/media/PosterPDFs/NeurIPS%202024/95515.png?t=1733107594.4418871", "project": "", "author_site": "Arijit Sehanobish, Kumar Avinava Dubey, Krzysztof M Choromanski, Somnath Basu Roy Chowdhury, Deepali Jain, Vikas Sindhwani, Snigdha Chaturvedi", "tldr": "", "abstract": "Recent efforts to scale Transformer models have demonstrated rapid progress across a wide range of tasks (Wei at. al 2022). However, fine-tuning these models for downstream tasks is quite expensive due to their large parameter counts. Parameter-efficient fine-tuning (PEFT) approaches have emerged as a viable alternative, allowing us to fine-tune models by updating only a small number of parameters.\n In this work, we propose a general framework for parameter efficient fine-tuning (PEFT), based on *structured unrestricted-rank matrices* (SURM) which can serve as a drop-in replacement for popular approaches such as Adapters and LoRA. Unlike other methods like LoRA, SURMs give us more flexibility in finding the right balance between compactness and expressiveness. This is achieved by using *low displacement rank matrices* (LDRMs), which hasn't been used in this context before. SURMs remain competitive with baselines, often providing significant quality improvements while using a smaller parameter budget. SURMs achieve: **5**-**7**% accuracy gains on various image classification tasks while replacing low-rank matrices in LoRA and: up to **12x** reduction of the number of parameters in adapters (with virtually no loss in quality) on the GLUE benchmark.", "keywords": "Low Displacement Rank;Structured Matrices;Transformers;Vision Transformers;Fine-tuning", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Arijit Sehanobish;Kumar Avinava Dubey;Krzysztof Marcin Choromanski;Somnath Basu Roy Chowdhury;Deepali Jain;Vikas Sindhwani;Snigdha Chaturvedi", "authorids": "~Arijit_Sehanobish1;~Kumar_Avinava_Dubey1;~Krzysztof_Marcin_Choromanski1;~Somnath_Basu_Roy_Chowdhury3;~Deepali_Jain1;~Vikas_Sindhwani1;~Snigdha_Chaturvedi2", "gender": "M;;;F;M;F;M", "homepage": "https://github.com/arijitthegame/;;https://www.cs.unc.edu/~somnath/;;http://vikas.sindhwani.org;https://sites.google.com/site/snigdhac/;https://sites.google.com/site/kumaravinavadubey/", "dblp": "249/5322;78/11411;190/7535;84/8010;26/4825;77/8700;10/7789", "google_scholar": "MEby6-QAAAAJ;;https://scholar.google.co.in/citations?user=xGbyrIUAAAAJ;;https://scholar.google.com/citations?hl=en;gZD3EesAAAAJ;tBbUAfsAAAAJ", "orcid": "0000-0003-2769-2003;;;;;;", "linkedin": "arijit-sehanobish-b76627112/;;;;vikassindhwani;;", "or_profile": "~Arijit_Sehanobish1;~Krzysztof_Marcin_Choromanski1;~Somnath_Basu_Roy_Chowdhury3;~Deepali_Jain1;~Vikas_Sindhwani1;~Snigdha_Chaturvedi2;~Kumar_A_Dubey1", "aff": "Kensho Technologies;Google Brain Robotics & Columbia University;Department of Computer Science, University of North Carolina, Chapel Hill;Google;Google;Department of Computer Science, University of North Carolina, Chapel Hill;Google Research", "aff_domain": "kensho.com;columbia.edu;cs.unc.edu;google.com;google.com;cs.unc.edu;google.com", "position": "Applied Scientist;research scientist & adjunct assistant professor;PhD student;Researcher;Senior Staff Research Scientist;Assistant Professor;Research Scientist", "bibtex": "@inproceedings{\nsehanobish2024structured,\ntitle={Structured Unrestricted-Rank Matrices for Parameter Efficient Finetuning},\nauthor={Arijit Sehanobish and Kumar Avinava Dubey and Krzysztof Marcin Choromanski and Somnath Basu Roy Chowdhury and Deepali Jain and Vikas Sindhwani and Snigdha Chaturvedi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=MXOzgjlWDF}\n}", "github": "", "reviewers": "Kv7A;GJ7U;o41Z;aa5x", "pdf_size": 4811148, "rating": "5;5;6;6", "confidence": "4;4;3;2", "soundness": "2;3;3;3", "novelty": "3;2;2;3", "presentation": "2;3;3;3", "wc_summary": "97;69;44;84", "wc_strengths": "74;60;33;48", "wc_weaknesses": "166;75;556;68", "wc_questions": "231;4;43;4", "wc_limitations": "5;1;7;11", "wc_review": "573;209;683;215", "wc_reply_reviewers": "201;17;324;39", "wc_reply_authors": "684;565;738;769", "reply_reviewers": "2;1;2;1", "reply_authors": "3;3;3;4", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 73.5, 19.704060495238032 ], "wc_strengths_avg": [ 53.75, 15.105876340020794 ], "wc_weaknesses_avg": [ 216.25, 199.92795577407378 ], "wc_questions_avg": [ 70.5, 94.02260366528891 ], "wc_limitations_avg": [ 6.0, 3.605551275463989 ], "wc_review_avg": [ 420.0, 211.61521684415797 ], "wc_reply_reviewers_avg": [ 145.25, 125.29639859149982 ], "wc_reply_authors_avg": [ 689.0, 77.78495998584816 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 3.25, 0.4330127018922193 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.9045340337332909, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3734060838326970258&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "kensho.com;columbia.edu;cs.unc.edu;google.com;google.com;cs.unc.edu;google.com", "author_num": 7, "aff_unique_index": "0;1;2;1;1;2;1", "aff_unique_norm": "Kensho Technologies;Google;University of North Carolina", "aff_unique_dep": ";Google Brain Robotics;Department of Computer Science", "aff_unique_url": "https://www.kensho.com;https://ai.google;https://www.unc.edu", "aff_unique_abbr": ";Google;UNC", "aff_campus_unique_index": "1;2;1;1;2;1", "aff_campus_unique": ";Mountain View;Chapel Hill", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "SAND: Smooth imputation of sparse and noisy functional data with Transformer networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95514", "id": "MXRO5kukST", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MXRO5kukST", "openreview": "https://openreview.net/forum?id=MXRO5kukST", "poster": "/media/PosterPDFs/NeurIPS%202024/95514.png?t=1731327303.8505833", "project": "", "author_site": "Ju-Sheng Hong, Junwen Yao, Jonas Mueller, Jane-Ling Wang", "tldr": "", "abstract": "Although the transformer architecture has come to dominate other models for text and image data, its application to irregularly-spaced longitudinal data has been limited. We introduce a variant of the transformer that enables it to more smoothly impute such functional data. We augment the vanilla transformer with a simple module we call SAND (self-attention on derivatives), which naturally encourages smoothness by modeling the sub-derivative of the imputed curve. On the theoretical front, we prove the number of hidden nodes required by a network with SAND to achieve an $\\epsilon$ prediction error bound for functional imputation. Extensive experiments over various types of functional data demonstrate that transformers with SAND produce better imputations than both their standard counterparts as well as transformers augmented with alternative approaches to encode the inductive bias of smoothness. SAND also outperforms standard statistical methods for functional imputation like kernel smoothing and PACE.", "keywords": "functional data;sparse functional data;imputation;Transformer;self-attention", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "/attachment/35444ce0832a65a97a8496dc8dec71a742c6b069.zip", "author": "Ju-Sheng Hong;Junwen Yao;Jonas Mueller;Jane-Ling Wang", "authorids": "~Ju-Sheng_Hong1;~Junwen_Yao1;~Jonas_Mueller1;~Jane-Ling_Wang1", "gender": ";M;M;F", "homepage": ";;;http://www.stat.ucdavis.edu/~wang/", "dblp": ";;178/3250;116/9099", "google_scholar": ";https://scholar.google.com/citations?hl=en;HeVcLzAAAAAJ;bHkDO7AAAAAJ", "orcid": ";;;0000-0003-3731-5391", "linkedin": "ju-sheng-hong-1b0081174/;;;", "or_profile": "~Ju-Sheng_Hong1;~Junwen_Yao1;~Jonas_Mueller1;~Jane-Ling_Wang1", "aff": "University of California, Davis;Waymo;Cleanlab;University of California, Davis", "aff_domain": "ucdavis.edu;google.com;cleanlab.ai;ucdavis.edu", "position": "PhD student;Software Engineer;Researcher;Full Professor", "bibtex": "@inproceedings{\nhong2024sand,\ntitle={{SAND}: Smooth imputation of sparse and noisy functional data with Transformer networks},\nauthor={Ju-Sheng Hong and Junwen Yao and Jonas Mueller and Jane-Ling Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=MXRO5kukST}\n}", "github": "", "reviewers": "92dE;pcLe;zTxC", "pdf_size": 687079, "rating": "5;6;8", "confidence": "5;3;3", "soundness": "2;3;4", "novelty": "3;3;4", "presentation": "3;3;4", "wc_summary": "76;83;64", "wc_strengths": "23;42;151", "wc_weaknesses": "35;164;5", "wc_questions": "30;103;1", "wc_limitations": "7;1;11", "wc_review": "171;393;232", "wc_reply_reviewers": "96;28;7", "wc_reply_authors": "402;90;0", "reply_reviewers": "2;1;1", "reply_authors": "3;2;1", "rating_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 74.33333333333333, 7.845734863959881 ], "wc_strengths_avg": [ 72.0, 56.39739946723312 ], "wc_weaknesses_avg": [ 68.0, 68.97825744392213 ], "wc_questions_avg": [ 44.666666666666664, 42.913349386357105 ], "wc_limitations_avg": [ 6.333333333333333, 4.109609335312651 ], "wc_review_avg": [ 265.3333333333333, 93.64590517238624 ], "wc_reply_reviewers_avg": [ 43.666666666666664, 37.985377303495206 ], "wc_reply_authors_avg": [ 164.0, 172.2556240010758 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.7559289460184546, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:61AdpWOmpGMJ:scholar.google.com/&scioq=SAND:+Smooth+imputation+of+sparse+and+noisy+functional+data+with+Transformer+networks&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": "ucdavis.edu;google.com;cleanlab.ai;ucdavis.edu", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of California, Davis;Waymo;Cleanlab", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ucdavis.edu;https://www.waymo.com;https://www.cleanlab.ai", "aff_unique_abbr": "UC Davis;Waymo;", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Davis;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "ReNO: Enhancing One-step Text-to-Image Models through Reward-based Noise Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95513", "id": "MXY0qsGgeO", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MXY0qsGgeO", "openreview": "https://openreview.net/forum?id=MXY0qsGgeO", "poster": "", "project": "", "author_site": "Luca Eyring, Shyamgopal Karthik, Karsten Roth, Alexey Dosovitskiy, Zeynep Akata", "tldr": "", "abstract": "Text-to-Image (T2I) models have made significant advancements in recent years, but they still struggle to accurately capture intricate details specified in complex compositional prompts. While fine-tuning T2I models with reward objectives has shown promise, it suffers from \"reward hacking\" and may not generalize well to unseen prompt distributions. In this work, we propose Reward-based Noise Optimization (ReNO), a novel approach that enhances T2I models at inference by optimizing the initial noise based on the signal from one or multiple human preference reward models. Remarkably, solving this optimization problem with gradient ascent for 50 iterations yields impressive results on four different one-step models across two competitive benchmarks, T2I-CompBench and GenEval. Within a computational budget of 20-50 seconds, ReNO-enhanced one-step models consistently surpass the performance of all current open-source Text-to-Image models. Extensive user studies demonstrate that our model is preferred nearly twice as often compared to the popular SDXL model and is on par with the proprietary Stable Diffusion 3 with 8B parameters. Moreover, given the same computational resources, a ReNO-optimized one-step model outperforms widely-used open-source models such as SDXL and PixArt-alpha, highlighting the efficiency and effectiveness of ReNO in enhancing T2I model performance at inference time.", "keywords": "Text-to-Image Generation;Diffusion Models;Test-Time Training;Reward Models;Learning From Human Feedback", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Luca Eyring;Shyamgopal Karthik;Karsten Roth;Alexey Dosovitskiy;Zeynep Akata", "authorids": "~Luca_Eyring1;~Shyamgopal_Karthik1;~Karsten_Roth1;~Alexey_Dosovitskiy1;~Zeynep_Akata1", "gender": ";Not Specified;;F;M", "homepage": "https://sgk98.github.io/;https://karroth.com/;;https://eml-unitue.de/people/zeynep-akata;https://lucaeyring.com/", "dblp": "251/8983;234/7803;135/4956.html;117/4838;361/7132", "google_scholar": "MofhemMAAAAJ;93ZjIs0AAAAJ;FXNJRDoAAAAJ;jQl9RtkAAAAJ;", "orcid": ";;;0000-0002-1432-7747;", "linkedin": ";;;zeynep-akata-36182045/?ppe=1;luca-eyring-74abb01b2/", "or_profile": "~Shyamgopal_Karthik1;~Karsten_Roth1;~Alexey_Dosovitskiy1;~Zeynep_Akata1;~Luca_Vincent_Eyring1", "aff": "Snap Inc.;University of Tuebingen;Inceptive;Helmholtz Munich;Eberhard-Karls-Universit\u00e4t T\u00fcbingen", "aff_domain": "snapchat.com;uni-tuebingen.de;inceptive.team;helmholtz-munich.de;uni-tuebingen.de", "position": "Intern;PhD student;Researcher;Researcher;PhD student", "bibtex": "@inproceedings{\neyring2024reno,\ntitle={Re{NO}: Enhancing One-step Text-to-Image Models through Reward-based Noise Optimization},\nauthor={Luca Eyring and Shyamgopal Karthik and Karsten Roth and Alexey Dosovitskiy and Zeynep Akata},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=MXY0qsGgeO}\n}", "github": "", "reviewers": "EAgt;mwA2;8y7L;UuFQ", "pdf_size": 17597151, "rating": "3;4;6;7", "confidence": "4;3;5;4", "soundness": "2;3;3;3", "novelty": "1;2;4;3", "presentation": "2;3;4;3", "wc_summary": "77;79;91;101", "wc_strengths": "297;63;77;118", "wc_weaknesses": "845;86;120;170", "wc_questions": "56;37;39;138", "wc_limitations": "91;22;7;78", "wc_review": "1366;287;334;605", "wc_reply_reviewers": "1389;27;0;27", "wc_reply_authors": "2169;125;0;0", "reply_reviewers": "3;1;0;1", "reply_authors": "5;2;1;1", "rating_avg": [ 5.0, 1.5811388300841898 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 1.118033988749895 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 87.0, 9.695359714832659 ], "wc_strengths_avg": [ 138.75, 93.57450240316537 ], "wc_weaknesses_avg": [ 305.25, 313.0538092724636 ], "wc_questions_avg": [ 67.5, 41.36725758374611 ], "wc_limitations_avg": [ 49.5, 35.69663849720307 ], "wc_review_avg": [ 648.0, 431.94038940576047 ], "wc_reply_reviewers_avg": [ 360.75, 593.7627367054959 ], "wc_reply_authors_avg": [ 573.5, 922.574793715935 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.25, 1.6393596310755 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.447213595499958, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2726326514820859731&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "snapchat.com;uni-tuebingen.de;inceptive.team;helmholtz-munich.de;uni-tuebingen.de", "author_num": 5, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "Snap Inc.;University of Tuebingen;Inceptive;Helmholtz Zentrum M\u00fcnchen;Eberhard Karls University of T\u00fcbingen", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.snapinc.com;https://www.uni-tuebingen.de/;;https://www.helmholtz-muenchen.de;https://www.uni-tuebingen.de/", "aff_unique_abbr": "Snap;Uni T\u00fcbingen;;HMGU;Uni T\u00fcbingen", "aff_campus_unique_index": "1", "aff_campus_unique": ";T\u00fcbingen", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "United States;Germany;" }, { "title": "SLTrain: a sparse plus low rank approach for parameter and memory efficient pretraining", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95512", "id": "MXze4H7opg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MXze4H7opg", "openreview": "https://openreview.net/forum?id=MXze4H7opg", "poster": "", "project": "", "author_site": "Andi Han, Jiaxiang Li, Wei Huang, Mingyi Hong, Akiko Takeda, Pratik Kumar Jawanpuria, Bamdev Mishra", "tldr": "", "abstract": "Large language models (LLMs) have shown impressive capabilities across various tasks. However, training LLMs from scratch requires significant computational power and extensive memory capacity. Recent studies have explored low-rank structures on weights for efficient fine-tuning in terms of parameters and memory, either through low-rank adaptation or factorization. While effective for fine-tuning, low-rank structures are generally less suitable for pretraining because they restrict parameters to a low-dimensional subspace. In this work, we propose to parameterize the weights as a sum of low-rank and sparse matrices for pretraining, which we call SLTrain. The low-rank component is learned via matrix factorization, while for the sparse component, we employ a simple strategy of uniformly selecting the sparsity support at random and learning only the non-zero entries with the fixed support. While being simple, the random fixed-support sparse learning strategy significantly enhances pretraining when combined with low-rank learning. Our results show that SLTrain adds minimal extra parameters and memory costs compared to pretraining with low-rank parameterization, yet achieves substantially better performance, which is comparable to full-rank training. Remarkably, when combined with quantization and per-layer updates, SLTrain can reduce memory requirements by up to 73% when pretraining the LLaMA 7B model.", "keywords": "LLM pretraining;memory efficiency;parameter efficiency;low rank;sparse", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/110f0a842fe93b1dceac7bda2cafc0b69c545689.zip", "author": "Andi Han;Jiaxiang Li;Wei Huang;Mingyi Hong;Akiko Takeda;Pratik Jawanpuria;Bamdev Mishra", "authorids": "~Andi_Han1;~Jiaxiang_Li1;~Wei_Huang6;~Mingyi_Hong1;~Akiko_Takeda2;~Pratik_Jawanpuria1;~Bamdev_Mishra1", "gender": "M;M;M;M;;M;", "homepage": "https://github.com/andyjm3;https://jasonjiaxiangli.github.io/;https://weihuang05.github.io/;http://people.ece.umn.edu/~mhong/mingyi.html;http://www.or.mist.i.u-tokyo.ac.jp/takeda/index-e.html;https://pratikjawanpuria.com;https://bamdevmishra.in", "dblp": "268/7976.html;;81/6685-34;57/8053;;32/9841;133/8291", "google_scholar": "AKHQHs0AAAAJ;h5OWvc0AAAAJ;RZfDh4MAAAAJ;qRnP-p0AAAAJ;;_GUZDtMAAAAJ;https://scholar.google.co.in/citations?user=25IuNrMAAAAJ", "orcid": "0000-0003-4655-655X;0009-0001-5555-6511;0000-0001-5674-7021;;;;", "linkedin": ";jiaxiang-li-9aa485118/;;;;;", "or_profile": "~Andi_Han1;~Jiaxiang_Li1;~Wei_Huang6;~Mingyi_Hong1;~Akiko_Takeda2;~Pratik_Jawanpuria1;~Bamdev_Mishra1", "aff": "RIKEN AIP;University of Minnesota - Twin Cities;RIKEN AIP;University of Minnesota, Minneapolis;RIKEN;Microsoft;Microsoft", "aff_domain": "riken.jp;umn.edu;riken.jp;umn.edu;riken.jp;microsoft.com;microsoft.com", "position": "Postdoc;Postdoc;Research Scientist;Associate Professor;Team leader;Principal Researcher;Applied Scientist", "bibtex": "@inproceedings{\nhan2024sltrain,\ntitle={{SLT}rain: a sparse plus low rank approach for parameter and memory efficient pretraining},\nauthor={Andi Han and Jiaxiang Li and Wei Huang and Mingyi Hong and Akiko Takeda and Pratik Jawanpuria and Bamdev Mishra},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=MXze4H7opg}\n}", "github": "", "reviewers": "AVXH;Rr3S;Wf5L", "pdf_size": 6762893, "rating": "5;5;6", "confidence": "4;4;3", "soundness": "4;2;3", "novelty": "3;3;3", "presentation": "3;2;3", "wc_summary": "75;57;43", "wc_strengths": "95;73;38", "wc_weaknesses": "184;208;154", "wc_questions": "2;2;5", "wc_limitations": "2;1;1", "wc_review": "358;341;241", "wc_reply_reviewers": "88;207;19", "wc_reply_authors": "387;287;0", "reply_reviewers": "1;2;1", "reply_authors": "3;3;1", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 58.333333333333336, 13.097921802925667 ], "wc_strengths_avg": [ 68.66666666666667, 23.471022323045258 ], "wc_weaknesses_avg": [ 182.0, 22.090722034374522 ], "wc_questions_avg": [ 3.0, 1.4142135623730951 ], "wc_limitations_avg": [ 1.3333333333333333, 0.4714045207910317 ], "wc_review_avg": [ 313.3333333333333, 51.61610429141493 ], "wc_reply_reviewers_avg": [ 104.66666666666667, 77.65021284938989 ], "wc_reply_authors_avg": [ 224.66666666666666, 164.02506583514065 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.9428090415820634 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.9999999999999997, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11351647536132539848&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "riken.jp;umn.edu;riken.jp;umn.edu;riken.jp;microsoft.com;microsoft.com", "author_num": 7, "aff_unique_index": "0;1;0;1;0;2;2", "aff_unique_norm": "RIKEN;University of Minnesota;Microsoft", "aff_unique_dep": "Advanced Institute for Computational Science;;Microsoft Corporation", "aff_unique_url": "https://www.aip.riken.jp;https://www.minnesota.edu;https://www.microsoft.com", "aff_unique_abbr": "RIKEN AIP;UMN;Microsoft", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Twin Cities;Minneapolis", "aff_country_unique_index": "0;1;0;1;0;1;1", "aff_country_unique": "Japan;United States" }, { "title": "TopoLogic: An Interpretable Pipeline for Lane Topology Reasoning on Driving Scenes", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95511", "id": "MXzr10iX2d", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MXzr10iX2d", "openreview": "https://openreview.net/forum?id=MXzr10iX2d", "poster": "/media/PosterPDFs/NeurIPS%202024/95511.png?t=1731413774.9425929", "project": "", "author_site": "Fu Yanping, Wenbin Liao, Xinyuan Liu, Hang Xu, Yike Ma, Yucheng Zhang, Feng Dai", "tldr": "", "abstract": "As an emerging task that integrates perception and reasoning, topology reasoning in autonomous driving scenes has recently garnered widespread attention. However, existing work often emphasizes \"perception over reasoning\": they typically boost reasoning performance by enhancing the perception of lanes and directly adopt vanilla MLPs to learn lane topology from lane query. This paradigm overlooks the geometric features intrinsic to the lanes themselves and are prone to being influenced by inherent endpoint shifts in lane detection.\n To tackle this issue, we propose an interpretable method for lane topology reasoning based on lane geometric distance and lane query similarity, named TopoLogic. This method mitigates the impact of endpoint shifts in geometric space, and introduces explicit similarity calculation in semantic space as a complement. By integrating results from both spaces, our methods provides more comprehensive information for lane topology. Ultimately, our approach significantly outperforms the existing state-of-the-art methods on the mainstream benchmark OpenLane-V2 (23.9 v.s. 10.9 in TOP$_{ll}$ and 44.1 v.s. 39.8 in OLS on subsetA). Additionally, our proposed geometric distance topology reasoning method can be incorporated into well-trained models without re-training, significantly enhancing the performance of lane topology reasoning. The code is released at https://github.com/Franpin/TopoLogic.", "keywords": "Autonomous Driving;Topology Reasoning;Lane Detection;High Definition Map Learning", "primary_area": "machine_vision", "supplementary_material": "/attachment/7d3956889f69dab809cf42b51d89cfcba90a55a5.zip", "author": "Yanping Fu;Wenbin Liao;Xinyuan Liu;Hang Xu;Yike Ma;Yucheng Zhang;Feng Dai", "authorids": "~Yanping_Fu2;~Wenbin_Liao1;~Xinyuan_Liu3;~Hang_Xu8;~Yike_Ma1;~Yucheng_Zhang7;~Feng_Dai1", "gender": "M;M;;M;M;M;", "homepage": "https://github.com/Franpin;;;;https://people.ucas.edu.cn/~ykma;;", "dblp": ";288/7054;;;94/10052;;", "google_scholar": ";;;;;;CVg2ja8AAAAJ", "orcid": "0009-0006-8711-8862;0000-0002-3248-4391;;0000-0002-1067-8670;;;0000-0002-6660-6166", "linkedin": ";;;;;https://www.linkedin.zhangyucheng.cn;", "or_profile": "~Yanping_Fu2;~Wenbin_Liao1;~Xinyuan_Liu3;~Hang_Xu8;~Yike_Ma1;~Yucheng_Zhang7;~Feng_Dai1", "aff": "Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;;Hangzhou Dianzi University;The Institute of Computing Technology of the Chinese Academy of Sciences;Institute of Computing Technology, CAS;ICT, Chinese Academy of Sciences", "aff_domain": "ict.ac.cn;ict.ac.cn;;hdu.edu.cn;ict.ac.cn;ict.cas.cn;ict.ac.cn", "position": "MS student;PhD student;;PhD student;Associate Professor;Researcher;Associate Professor", "bibtex": "@inproceedings{\nfu2024topologic,\ntitle={TopoLogic: An Interpretable Pipeline for Lane Topology Reasoning on Driving Scenes},\nauthor={Yanping Fu and Wenbin Liao and Xinyuan Liu and Hang Xu and Yike Ma and Yucheng Zhang and Feng Dai},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=MXzr10iX2d}\n}", "github": "", "reviewers": "E2Wq;SnEK;FMUS;8GTK", "pdf_size": 1801649, "rating": "4;4;5;5", "confidence": "4;4;3;4", "soundness": "3;2;3;3", "novelty": "2;2;3;2", "presentation": "3;2;3;3", "wc_summary": "131;124;82;80", "wc_strengths": "89;46;69;85", "wc_weaknesses": "92;121;53;67", "wc_questions": "176;145;19;178", "wc_limitations": "3;1;5;10", "wc_review": "491;437;228;420", "wc_reply_reviewers": "0;48;39;112", "wc_reply_authors": "0;270;372;652", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 4.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 104.25, 23.39203924415313 ], "wc_strengths_avg": [ 72.25, 16.90229274388537 ], "wc_weaknesses_avg": [ 83.25, 25.8879798362097 ], "wc_questions_avg": [ 129.5, 65.12488003827723 ], "wc_limitations_avg": [ 4.75, 3.344772040064913 ], "wc_review_avg": [ 394.0, 99.3604549103918 ], "wc_reply_reviewers_avg": [ 49.75, 40.21426985536353 ], "wc_reply_authors_avg": [ 323.5, 233.33398809431944 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2016053281943186505&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "ict.ac.cn;ict.ac.cn;;hdu.edu.cn;ict.ac.cn;ict.cas.cn;ict.ac.cn", "author_num": 7, "aff_unique_index": "0;0;1;0;0;2", "aff_unique_norm": "Chinese Academy of Sciences;Hangzhou Dianzi University;Institute of Computing Technology, Chinese Academy of Sciences", "aff_unique_dep": "Institute of Computing Technology;;", "aff_unique_url": "http://www.ict.ac.cn;http://www.hdu.edu.cn/;http://www.ict.cas.cn", "aff_unique_abbr": "CAS;HGHDU;ICT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "DEPrune: Depth-wise Separable Convolution Pruning for Maximizing GPU Parallelism", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95510", "id": "MYI443zCvv", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MYI443zCvv", "openreview": "https://openreview.net/forum?id=MYI443zCvv", "poster": "/media/PosterPDFs/NeurIPS%202024/95510.png?t=1733542874.5984657", "project": "", "author_site": "Cheonjun Park, Mincheol Park, Hyunchan Moon, Myung Kuk Yoon, Seokjin Go, Suhyun Kim, Won Woo Ro", "tldr": "", "abstract": "Depth-wise Separable Convolution (DSConv) has a powerful representation even with fewer parameters and computation, leading to its adoption by almost all of the state-of-the-art CNN models. \nDSConv models are already compact making it hard to apply pruning, and there are few previous pruning techniques that target depth-wise convolution (DW-conv).\nIn this paper, we present Depth-wise Separable Convolution Pruning (DEPrune), a novel pruning method applied to both point-wise and depth-wise convolutions. \nDEPrune is optimized by analyzing the computation of DSConv on GPUs.\nDEPrune employs a fine-grained pruning approach, yet it achieves the structured sparsity typically absent in fine-grained pruning, enabling practical hardware acceleration. \nMoreover, this method maintains a high pruning ratio without causing any accuracy drop.\nWe additionally represent techniques that further enhance DEPrune performance: 1) balanced workload tuning (BWT), and 2) hardware-aware sparsity recalibration (HSR).\nExperiment results show that DEPrune achieves up to $3.74\\times$ practical speedup in DSConv inference on GPUs while maintaining the accuracy of EfficientNet-B0 on ImageNet.", "keywords": "Pruning;Depth-wise Separable Convolution;GPU", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Cheonjun Park;Mincheol Park;Hyunchan Moon;Myung Kuk Yoon;Seokjin Go;Suhyun Kim;Won Woo Ro", "authorids": "~Cheonjun_Park1;~Mincheol_Park1;~Hyunchan_Moon1;~Myung_Kuk_Yoon1;~Seokjin_Go1;~Suhyun_Kim1;~Won_Woo_Ro1", "gender": "M;M;M;;;;M", "homepage": ";;;;;https://kdst.tistory.com/;http://escal.yonsei.ac.kr/", "dblp": "254/7925;270/1814;;;;45/6898-1;r/WonWooRo", "google_scholar": "https://scholar.google.com/citations?hl=ko;kSIW-XAAAAAJ;https://scholar.google.com/citations?hl=ko;;;;GVfD5LQAAAAJ", "orcid": ";;;;;;0000-0001-5390-6445", "linkedin": "cheonjun-park-ba7217183;mincheol-park-66b166186;%ED%98%84%EC%B0%AC-%EB%AC%B8-576710163/;;;;", "or_profile": "~Cheonjun_Park1;~Mincheol_Park1;~Hyunchan_Moon1;~Myung_Kuk_Yoon1;~Seokjin_Go1;~Suhyun_Kim1;~Won_Woo_Ro1", "aff": "Yonsei University;Korea Institute of Science and Technology;LG Corporation;;;Korea Institute of Science and Technology;Yonsei University", "aff_domain": "yonsei.ac.kr;kist.re.kr;lge.com;;;kist.re.kr;yonsei.ac.kr", "position": "PhD student;Research assistant;Researcher;;;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\npark2024deprune,\ntitle={{DEP}rune: Depth-wise Separable Convolution Pruning for Maximizing {GPU} Parallelism},\nauthor={Cheonjun Park and Mincheol Park and Hyunchan Moon and Myung Kuk Yoon and Seokjin Go and Suhyun Kim and Won Woo Ro},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=MYI443zCvv}\n}", "github": "", "reviewers": "VAUi;bd55;CSWm;cCXz", "pdf_size": 2443524, "rating": "6;6;7;7", "confidence": "3;4;4;4", "soundness": "2;3;3;4", "novelty": "2;3;3;4", "presentation": "3;2;2;3", "wc_summary": "102;40;21;101", "wc_strengths": "78;21;144;63", "wc_weaknesses": "81;29;286;36", "wc_questions": "2;31;136;66", "wc_limitations": "2;1;5;7", "wc_review": "265;122;592;273", "wc_reply_reviewers": "18;8;73;0", "wc_reply_authors": "0;0;539;0", "reply_reviewers": "1;1;2;0", "reply_authors": "1;1;3;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 66.0, 36.13170353027933 ], "wc_strengths_avg": [ 76.5, 44.21820891895103 ], "wc_weaknesses_avg": [ 108.0, 104.68763059693346 ], "wc_questions_avg": [ 58.75, 50.026867781223324 ], "wc_limitations_avg": [ 3.75, 2.384848003542364 ], "wc_review_avg": [ 313.0, 171.92003955327604 ], "wc_reply_reviewers_avg": [ 24.75, 28.577744837547975 ], "wc_reply_authors_avg": [ 134.75, 233.3938463199062 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:w_cu1SywNGQJ:scholar.google.com/&scioq=DEPrune:+Depth-wise+Separable+Convolution+Pruning+for+Maximizing+GPU+Parallelism&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "yonsei.ac.kr;kist.re.kr;lge.com;;;kist.re.kr;yonsei.ac.kr", "author_num": 7, "aff_unique_index": "0;1;2;1;0", "aff_unique_norm": "Yonsei University;Korea Institute of Science and Technology;LG", "aff_unique_dep": ";;LG Corporation", "aff_unique_url": "https://www.yonsei.ac.kr;https://www.kist.re.kr;https://www.lg.com", "aff_unique_abbr": "Yonsei;KIST;LG", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "T2VSafetyBench: Evaluating the Safety of Text-to-Video Generative Models", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97732", "id": "MYyGhe9MBg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MYyGhe9MBg", "openreview": "https://openreview.net/forum?id=MYyGhe9MBg", "poster": "", "project": "", "author_site": "Yibo Miao, Yifan Zhu, Yinpeng Dong, Lijia Yu, Jun Zhu, Xiao-Shan Gao", "tldr": "", "abstract": "The recent development of Sora leads to a new era in text-to-video (T2V) generation. Along with this comes the rising concern about its safety risks. The generated videos may contain illegal or unethical content, and there is a lack of comprehensive quantitative understanding of their safety, posing a challenge to their reliability and practical deployment. Previous evaluations primarily focus on the quality of video generation. While some evaluations of text-to-image models have considered safety, they cover limited aspects and do not address the unique temporal risk inherent in video generation. To bridge this research gap, we introduce T2VSafetyBench, the first comprehensive benchmark for conducting safety-critical assessments of text-to-video models. We define 4 primary categories with 14 critical aspects of video generation safety and construct a malicious prompt dataset including real-world prompts, LLM-generated prompts, and jailbreak attack-based prompts. We then conduct a thorough safety evaluation on 9 recently released T2V models. Based on our evaluation results, we draw several important findings, including: 1) no single model excels in all aspects, with different models showing various strengths; 2) the correlation between GPT-4 assessments and manual reviews is generally high; 3) there is a trade-off between the usability and safety of text-to-video generative models. This indicates that as the field of video generation rapidly advances, safety risks are set to surge, highlighting the urgency of prioritizing video safety. We hope that T2VSafetyBench can provide insights for better understanding the safety of video generation in the era of generative AIs. Our code is publicly available at \\url{https://github.com/yibo-miao/T2VSafetyBench}.", "keywords": "text-to-video\u00a0generation;safety risk", "primary_area": "", "supplementary_material": "/attachment/23185c14c05bcf7267df27dd9c480016d2e005c4.pdf", "author": "Yibo Miao;Yifan Zhu;Lijia Yu;Jun Zhu;Xiao-Shan Gao;Yinpeng Dong", "authorids": "~Yibo_Miao1;~Yifan_Zhu6;~Lijia_Yu2;~Jun_Zhu2;~Xiao-Shan_Gao2;~Yinpeng_Dong2", "gender": "M;;M;M;M;M", "homepage": "http://www.amss.ac.cn/;https://github.com/hala64;;http://ml.cs.tsinghua.edu.cn/~jun;http://www.mmrc.iss.ac.cn/~xgao/;https://dongyp13.github.io", "dblp": "332/0699;;175/8873.html;50/2644-1;13/3109;183/0980", "google_scholar": ";https://scholar.google.com/citations?hl=en;;axsP38wAAAAJ;_se7GmUAAAAJ;6_4ad84AAAAJ", "orcid": ";;;;0000-0003-2021-9395;", "linkedin": ";;;;;", "or_profile": "~Yibo_Miao1;~Yifan_Zhu6;~Lijia_Yu2;~Jun_Zhu2;~Xiao-Shan_Gao2;~Yinpeng_Dong2", "aff": "Intel;Academy of Mathematics and Systems Science, Chinese Academy of Sciences, Chinese Academy of Sciences;Institute of Software, Chinese Academy of Sciences;Tsinghua University;Academy of Mathematics and Systems Science, Chinese Academy of Sciences, Chinese Academy of Sciences;Tsinghua University", "aff_domain": "intel.com;amss.ac.cn;ios.ac.cn;mail.tsinghua.edu.cn;amss.ac.cn;tsinghua.edu.cn", "position": "Intern;PhD student;Postdoc;Professor;Full Professor;Postdoc", "bibtex": "@inproceedings{\nmiao2024tvsafetybench,\ntitle={T2{VS}afetyBench: Evaluating the Safety of Text-to-Video Generative Models},\nauthor={Yibo Miao and Yifan Zhu and Lijia Yu and Jun Zhu and Xiao-Shan Gao and Yinpeng Dong},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=MYyGhe9MBg}\n}", "github": "", "reviewers": "4PQa;sQtg;53CX;zpBN;JP3C", "pdf_size": 5886269, "rating": "5;6;6;7;7", "confidence": "4;4;4;5;3", "wc_summary_and_contributions": "77;130;93;22;325", "wc_strengths": "2;20;2;20;54", "wc_improvement": "2;175;107;2;25", "wc_limitations": "6;60;7;1;18", "wc_correctness": "15;1;7;1;13", "wc_clarity": "1;1;5;1;16", "wc_relation_to_prior_work": "1;23;1;1;39", "wc_documentation": "4;13;12;1;29", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "109;424;235;50;520", "wc_reply_reviewers": "0;20;22;162;0", "wc_reply_authors": "60;32;46;532;0", "reply_reviewers": "0;1;1;5;0", "reply_authors": "3;3;3;5;1", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "wc_summary_and_contributions_avg": [ 129.4, 103.79325604296264 ], "wc_strengths_avg": [ 19.6, 18.99052395275075 ], "wc_improvement_avg": [ 62.2, 68.35612627994655 ], "wc_limitations_avg": [ 18.4, 21.527656630483495 ], "wc_correctness_avg": [ 7.4, 5.851495535331117 ], "wc_clarity_avg": [ 4.8, 5.81033561853358 ], "wc_relation_to_prior_work_avg": [ 13.0, 15.543487382180357 ], "wc_documentation_avg": [ 11.8, 9.744742172063868 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 267.6, 179.85171670017496 ], "wc_reply_reviewers_avg": [ 40.8, 61.32666630430844 ], "wc_reply_authors_avg": [ 134.0, 199.9919998399936 ], "reply_reviewers_avg": [ 1.4, 1.8547236990991407 ], "reply_authors_avg": [ 3.0, 1.2649110640673518 ], "replies_avg": [ 31, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17438492247629372962&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "intel.com;amss.ac.cn;ios.ac.cn;mail.tsinghua.edu.cn;amss.ac.cn;tsinghua.edu.cn", "author_num": 6, "aff_unique_index": "0;1;1;2;1;2", "aff_unique_norm": "Intel;Chinese Academy of Sciences;Tsinghua University", "aff_unique_dep": "Intel Corporation;Academy of Mathematics and Systems Science;", "aff_unique_url": "https://www.intel.com;http://www.cas.cn;https://www.tsinghua.edu.cn", "aff_unique_abbr": "Intel;CAS;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1;1", "aff_country_unique": "United States;China" }, { "title": "On Sparse Canonical Correlation Analysis", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95509", "id": "MZ47wPr6C3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MZ47wPr6C3", "openreview": "https://openreview.net/forum?id=MZ47wPr6C3", "poster": "", "project": "", "author_site": "Yongchun Li, Santanu Dey, Weijun Xie", "tldr": "", "abstract": "The classical Canonical Correlation Analysis (CCA) identifies the correlations between two sets of multivariate variables based on their covariance, which has been widely applied in diverse fields such as computer vision, natural language processing, and speech analysis. Despite its popularity, CCA can encounter challenges in explaining correlations between two variable sets within high-dimensional data contexts. Thus, this paper studies Sparse Canonical Correlation Analysis (SCCA) that enhances the interpretability of CCA. We first show that SCCA generalizes three well-known sparse optimization problems, sparse PCA, sparse SVD, and sparse regression, which are all classified as NP-hard problems. This result motivates us to develop strong formulations and efficient algorithms. Our main contributions include (i) the introduction of a combinatorial formulation that captures the essence of SCCA and allows the development of exact and approximation algorithms; (ii) the establishment of the complexity results for two low-rank special cases of SCCA; and (iii) the derivation of an equivalent mixed-integer semidefinite programming model that facilitates a specialized branch-and-cut algorithm with analytical cuts. The effectiveness of our proposed formulations and algorithms is validated through numerical experiments.", "keywords": "Sparse canonical correlation analysis;low rank;exact and approximation algorithms", "primary_area": "optimization", "supplementary_material": "/attachment/f7843d8559faab0e99ea066d41645f79a58853c9.zip", "author": "Yongchun Li;Santanu Dey;Weijun Xie", "authorids": "~Yongchun_Li1;~Santanu_Dey1;~Weijun_Xie1", "gender": "F;M;M", "homepage": "https://sites.google.com/view/yongchun-li/home;https://www2.isye.gatech.edu/~sdey30/;https://sites.google.com/site/weijunxieor/home?authuser=0", "dblp": ";https://dblp.uni-trier.de/pid/13/5858.html;134/6996-1", "google_scholar": "C0HcOSAAAAAJ;https://scholar.google.co.in/citations?user=8Je5mV4AAAAJ;RJwKpY8AAAAJ", "orcid": ";;0000-0001-5157-1194", "linkedin": ";;", "or_profile": "~Yongchun_Li1;~Santanu_Dey1;~Weijun_Xie1", "aff": "Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology", "aff_domain": "gatech.edu;gatech.edu;gatech.edu", "position": "PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nli2024on,\ntitle={On Sparse Canonical Correlation Analysis},\nauthor={Yongchun Li and Santanu Dey and Weijun Xie},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=MZ47wPr6C3}\n}", "github": "", "reviewers": "Dvxm;qq9k;WJMv;f5tE", "pdf_size": 927137, "rating": "6;6;6;7", "confidence": "2;3;4;4", "soundness": "3;2;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "10;205;28;55", "wc_strengths": "15;56;17;35", "wc_weaknesses": "43;35;104;58", "wc_questions": "77;27;26;5", "wc_limitations": "2;37;1;18", "wc_review": "147;360;176;171", "wc_reply_reviewers": "9;99;13;10", "wc_reply_authors": "0;65;0;0", "reply_reviewers": "1;2;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 74.5, 77.02759245880661 ], "wc_strengths_avg": [ 30.75, 16.528384676065595 ], "wc_weaknesses_avg": [ 60.0, 26.711420778386163 ], "wc_questions_avg": [ 33.75, 26.47050245084139 ], "wc_limitations_avg": [ 14.5, 14.637281168304447 ], "wc_review_avg": [ 213.5, 85.28921385497699 ], "wc_reply_reviewers_avg": [ 32.75, 38.27776769875694 ], "wc_reply_authors_avg": [ 16.25, 28.145825622994256 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 341, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12452877432190267926&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 11, "email": "gatech.edu;gatech.edu;gatech.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Georgia Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.gatech.edu", "aff_unique_abbr": "Georgia Tech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Active Classification with Few Queries under Misspecification", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95508", "id": "Ma0993KZlq", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Ma0993KZlq", "openreview": "https://openreview.net/forum?id=Ma0993KZlq", "poster": "", "project": "", "author_site": "Vasilis Kontonis, Mingchen Ma, Christos Tzamos", "tldr": "", "abstract": "We study pool-based active learning, where a learner has a large pool $S$ of unlabeled examples and can adaptively ask a labeler questions to learn these labels. The goal of the learner is to output a labeling for $S$ that can compete with the best hypothesis from a given hypothesis class $\\mathcal{H}$. We focus on halfspace learning, one of the most important problems in active learning.\n\nIt is well known that in the standard active learning model, learning the labels of an arbitrary pool of examples labeled by some halfspace up to error $\\epsilon$ requires at least $\\Omega(1/\\epsilon)$ queries. To overcome this difficulty, previous work designs simple but powerful query languages to achieve $O(\\log(1/\\epsilon))$ query complexity, but only focuses on the realizable setting where data are perfectly labeled by some halfspace.\nHowever, when labels are noisy, such queries are too fragile and lead to high query complexity even under the simple random classification noise model.\n \nIn this work, we propose a new query language called threshold statistical queries and study their power for learning under various noise models. Our main algorithmic result is the first query-efficient algorithm for learning halfspaces under the popular Massart noise model. With an arbitrary dataset corrupted with Massart noise at noise rate $\\eta$, our algorithm uses only $\\mathrm{polylog(1/\\epsilon)}$ threshold statistical queries and computes an $(\\eta + \\epsilon)$-accurate labeling in polynomial time. For the harder case of agnostic noise, we show that it is impossible to beat $O(1/\\epsilon)$ query complexity even for the much simpler problem of learning singleton functions (and thus for learning halfspaces) using a reduction from agnostic distributed learning.", "keywords": "Active Learning;Label Noise;Linear Separator", "primary_area": "learning_theory", "supplementary_material": "", "author": "Vasilis Kontonis;Mingchen Ma;Christos Tzamos", "authorids": "~Vasilis_Kontonis1;~Mingchen_Ma1;~Christos_Tzamos1", "gender": "M;;", "homepage": "http://vkonton.github.io/;https://mmingchen.github.io/;https://tzamos.com", "dblp": "203/8777;270/6320;79/8819", "google_scholar": "7_44KWAAAAAJ;w84UnLsAAAAJ;wB01auEAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Vasilis_Kontonis1;~Mingchen_Ma1;~Christos_Tzamos1", "aff": ", University of Texas at Austin;University of Wisconsin - Madison;University of Wisconsin, Madison", "aff_domain": "cs.utexas.edu;wisc.edu;wisc.edu", "position": "Postdoc;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nkontonis2024active,\ntitle={Active Classification with Few Queries under Misspecification},\nauthor={Vasilis Kontonis and Mingchen Ma and Christos Tzamos},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Ma0993KZlq}\n}", "github": "", "reviewers": "DNzP;WAF8;vqip;VrwT", "pdf_size": 367348, "rating": "7;7;7;7", "confidence": "3;3;3;3", "soundness": "4;4;4;4", "novelty": "3;3;3;4", "presentation": "3;3;3;4", "wc_summary": "112;81;204;395", "wc_strengths": "128;44;163;191", "wc_weaknesses": "67;68;155;127", "wc_questions": "1;59;191;1", "wc_limitations": "1;11;9;33", "wc_review": "309;263;722;747", "wc_reply_reviewers": "13;38;54;22", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 4.0, 0.0 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 198.0, 122.40302283849039 ], "wc_strengths_avg": [ 131.5, 55.22906843320825 ], "wc_weaknesses_avg": [ 104.25, 38.06162765831225 ], "wc_questions_avg": [ 63.0, 77.60154637634484 ], "wc_limitations_avg": [ 13.5, 11.863810517704673 ], "wc_review_avg": [ 510.25, 225.01263853392769 ], "wc_reply_reviewers_avg": [ 31.75, 15.658464164789597 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6221469013055636897&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 0, "email": "cs.utexas.edu;wisc.edu;wisc.edu", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Texas at Austin;University of Wisconsin-Madison;University of Wisconsin", "aff_unique_dep": ";;", "aff_unique_url": "https://www.utexas.edu;https://www.wisc.edu;https://www.wisc.edu", "aff_unique_abbr": "UT Austin;UW-Madison;UW", "aff_campus_unique_index": "0;1;1", "aff_campus_unique": "Austin;Madison", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "BLoB: Bayesian Low-Rank Adaptation by Backpropagation for Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95507", "id": "MaDykgj4Ru", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MaDykgj4Ru", "openreview": "https://openreview.net/forum?id=MaDykgj4Ru", "poster": "/media/PosterPDFs/NeurIPS%202024/95507.png?t=1731732028.1365483", "project": "", "author_site": "Yibin Wang, Haizhou Shi, Ligong Han, Dimitris Metaxas, Hao Wang", "tldr": "", "abstract": "Large Language Models (LLMs) often suffer from overconfidence during inference, particularly when adapted to downstream domain-specific tasks with limited data. Previous work addresses this issue by employing approximate Bayesian estimation after the LLMs are trained, enabling them to quantify uncertainty. However, such post-training approaches' performance is severely limited by the parameters learned during training. In this paper, we go beyond post-training Bayesianization and propose Bayesian Low-Rank Adaptation by Backpropagation (BLoB), an algorithm that continuously and jointly adjusts both the mean and covariance of LLM parameters throughout the whole fine-tuning process. Our empirical results verify the effectiveness of BLoB in terms of generalization and uncertainty estimation, when evaluated on both in-distribution and out-of-distribution data.", "keywords": "Bayesian Neural Network;Finetuning;Large Language Models", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Yibin Wang;Haizhou Shi;Ligong Han;Dimitris N. Metaxas;Hao Wang", "authorids": "~Yibin_Wang6;~Haizhou_Shi1;~Ligong_Han1;~Dimitris_N._Metaxas1;~Hao_Wang3", "gender": "M;M;M;M;M", "homepage": "https://haizhou-shi.github.io;https://phymhan.github.io;https://www.cs.rutgers.edu/~dnm/;http://www.wanghao.in;https://yibinwang.netlify.app/", "dblp": "245/0213;187/1675;m/DNMetaxas;w/HaoWang-14;56/8805-5", "google_scholar": "JKwP43sAAAAJ;n2v43R4AAAAJ;https://scholar.google.com.tw/citations?user=a7VNhCIAAAAJ;NrOA9QoAAAAJ;https://scholar.google.ca/citations?user=FZKXya0AAAAJ", "orcid": "0000-0002-8431-3703;0000-0003-3166-0848;;;0000-0001-6099-2628", "linkedin": "haizhou-shi-229206180/;ligongh/;dimitris-metaxas-1bb74914/;;", "or_profile": "~Haizhou_Shi1;~Ligong_Han1;~Dimitris_Metaxas1;~Hao_Wang4;~YiBin_Wang2", "aff": "Rutgers University, New Brunswick;Rutgers University;Rutgers University;Rutgers University;Huazhong University of Science and Technology", "aff_domain": "rutgers.edu;rutgers.edu;cs.rutgers.edu;cs.rutgers.edu;hust.edu", "position": "PhD student;PhD student;Full Professor;Assistant Professor;Undergrad student", "bibtex": "@inproceedings{\nwang2024blob,\ntitle={{BL}oB: Bayesian Low-Rank Adaptation by Backpropagation for Large Language Models},\nauthor={Yibin Wang and Haizhou Shi and Ligong Han and Dimitris N. Metaxas and Hao Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=MaDykgj4Ru}\n}", "github": "", "reviewers": "Adn3;Zor8;Lm4V;AGd6", "pdf_size": 1578346, "rating": "5;6;6;7", "confidence": "4;5;3;3", "soundness": "2;4;3;4", "novelty": "2;3;2;3", "presentation": "2;1;3;4", "wc_summary": "77;45;137;69", "wc_strengths": "49;38;38;112", "wc_weaknesses": "238;227;95;55", "wc_questions": "310;6;153;125", "wc_limitations": "5;25;15;48", "wc_review": "679;341;438;409", "wc_reply_reviewers": "114;160;72;62", "wc_reply_authors": "848;761;139;39", "reply_reviewers": "1;1;1;1", "reply_authors": "3;4;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 82.0, 33.8673884437522 ], "wc_strengths_avg": [ 59.25, 30.784533454317607 ], "wc_weaknesses_avg": [ 153.75, 80.10422897700221 ], "wc_questions_avg": [ 148.5, 108.35243421354224 ], "wc_limitations_avg": [ 23.25, 15.943258763502524 ], "wc_review_avg": [ 466.75, 127.4997549017252 ], "wc_reply_reviewers_avg": [ 102.0, 38.7556447501522 ], "wc_reply_authors_avg": [ 446.75, 360.80630191281307 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.4264014327112209, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3402016406972900638&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "rutgers.edu;rutgers.edu;cs.rutgers.edu;cs.rutgers.edu;hust.edu", "author_num": 5, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Rutgers University;Huazhong University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.rutgers.edu;http://www.hust.edu.cn", "aff_unique_abbr": "Rutgers;HUST", "aff_campus_unique_index": "0", "aff_campus_unique": "New Brunswick;", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "United States;China" }, { "title": "Online Composite Optimization Between Stochastic and Adversarial Environments", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95506", "id": "MbEB5aKmMK", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MbEB5aKmMK", "openreview": "https://openreview.net/forum?id=MbEB5aKmMK", "poster": "/media/PosterPDFs/NeurIPS%202024/95506.png?t=1731511083.2668993", "project": "", "author_site": "Yibo Wang, SIJIA CHEN, Wei Jiang, Wenhao Yang, Yuanyu Wan, Lijun Zhang", "tldr": "", "abstract": "We study online composite optimization under the Stochastically Extended Adversarial (SEA) model. Specifically, each loss function consists of two parts: a fixed non-smooth and convex regularizer, and a time-varying function which can be chosen either stochastically, adversarially, or in a manner that interpolates between the two extremes. In this setting, we show that for smooth and convex time-varying functions, optimistic composite mirror descent (OptCMD) can obtain an $\\mathcal{O}(\\sqrt{\\sigma_{1:T}^2} + \\sqrt{\\Sigma_{1:T}^2})$ regret bound, where $\\sigma_{1:T}^2$ and $\\Sigma_{1:T}^2$ denote the cumulative stochastic variance and the cumulative adversarial variation of time-varying functions, respectively. For smooth and strongly convex time-varying functions, we establish an $\\mathcal{O}((\\sigma_{\\max}^2 + \\Sigma_{\\max}^2)\\log(\\sigma_{1:T}^2 + \\Sigma_{1:T}^2))$ regret bound, where $\\sigma_{\\max}^2$ and $\\Sigma_{\\max}^2$ denote the maximal stochastic variance and the maximal adversarial variation, respectively. For smooth and exp-concave time-varying functions, we achieve an $\\mathcal{O}(d \\log (\\sigma_{1:T}^2 + \\Sigma_{1:T}^2))$ bound where $d$ denotes the dimensionality. Moreover, to deal with the unknown function type in practical problems, we propose a multi-level \\textit{universal} algorithm that is able to achieve the desirable bounds for three types of time-varying functions simultaneously. It should be noticed that all our findings match existing bounds for the SEA model without the regularizer, which implies that there is \\textit{no price} in regret bounds for the benefits gained from the regularizer.", "keywords": "Online Composite Optimization;Online Learning", "primary_area": "online_learning", "supplementary_material": "", "author": "Yibo Wang;Sijia Chen;Wei Jiang;Wenhao Yang;Yuanyu Wan;Lijun Zhang", "authorids": "~Yibo_Wang2;~Sijia_Chen3;~Wei_Jiang8;~Wenhao_Yang3;~Yuanyu_Wan1;~Lijun_Zhang1", "gender": ";F;M;M;M;", "homepage": ";http://www.lamda.nju.edu.cn/chensj/;http://www.lamda.nju.edu.cn/jiangw/?AspxAutoDetectCookieSupport=1;http://www.lamda.nju.edu.cn/yangwh/;https://yuanyuwan.github.io/;", "dblp": ";;;233/4699;221/3499;", "google_scholar": ";;;ycccau7cWYIC;CEymMc8AAAAJ;", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Yibo_Wang2;~Sijia_Chen3;~Wei_Jiang8;~Wenhao_Yang3;~Yuanyu_Wan1;~Lijun_Zhang1", "aff": ";Nanjing University;Nanjing University;Nanjing University;Zhejiang University;", "aff_domain": ";nju.edu.cn;nju.edu.cn;nju.edu.cn;zju.edu.cn;", "position": ";MS student;PhD student;PhD student;Researcher;", "bibtex": "@inproceedings{\nwang2024online,\ntitle={Online Composite Optimization Between Stochastic and Adversarial Environments},\nauthor={Yibo Wang and Sijia Chen and Wei Jiang and Wenhao Yang and Yuanyu Wan and Lijun Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=MbEB5aKmMK}\n}", "github": "", "reviewers": "ZP8k;qCfX;tdoW;wJwu", "pdf_size": 892682, "rating": "5;6;7;8", "confidence": "3;3;5;4", "soundness": "4;3;4;3", "novelty": "2;3;3;4", "presentation": "3;3;4;4", "wc_summary": "91;73;86;108", "wc_strengths": "71;49;100;84", "wc_weaknesses": "76;12;53;30", "wc_questions": "4;31;36;79", "wc_limitations": "4;10;1;25", "wc_review": "246;175;276;326", "wc_reply_reviewers": "0;68;0;27", "wc_reply_authors": "0;28;0;18", "reply_reviewers": "0;1;0;1", "reply_authors": "1;2;1;2", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 89.5, 12.539936203984453 ], "wc_strengths_avg": [ 76.0, 18.66815470259447 ], "wc_weaknesses_avg": [ 42.75, 24.076700355322778 ], "wc_questions_avg": [ 37.5, 26.874709300753377 ], "wc_limitations_avg": [ 10.0, 9.246621004453464 ], "wc_review_avg": [ 255.75, 54.68260692395709 ], "wc_reply_reviewers_avg": [ 23.75, 27.82422505659412 ], "wc_reply_authors_avg": [ 11.5, 12.031209415515965 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.674199862463242, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4055462415651881993&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": ";nju.edu.cn;nju.edu.cn;nju.edu.cn;zju.edu.cn;", "author_num": 6, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Nanjing University;Zhejiang University", "aff_unique_dep": ";", "aff_unique_url": "https://www.nju.edu.cn;https://www.zju.edu.cn", "aff_unique_abbr": "Nanjing U;ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "DiffPhyCon: A Generative Approach to Control Complex Physical Systems", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95505", "id": "MbZuh8L0Xg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MbZuh8L0Xg", "openreview": "https://openreview.net/forum?id=MbZuh8L0Xg", "poster": "/media/PosterPDFs/NeurIPS%202024/95505.png?t=1733670919.40452", "project": "", "author_site": "Long Wei, Peiyan Hu, Ruiqi Feng, Haodong Feng, Yixuan Du, Tao Zhang, Rui Wang, Yue Wang, Zhi-Ming Ma, Tailin Wu", "tldr": "", "abstract": "Controlling the evolution of complex physical systems is a fundamental task across science and engineering. \nClassical techniques suffer from limited applicability or huge computational costs. On the other hand, recent deep learning and reinforcement learning-based approaches often struggle to optimize long-term control sequences under the constraints of system dynamics. In this work, we introduce Diffusion Physical systems Control (DiffPhyCon), a new class of method to address the physical systems control problem. DiffPhyCon excels by simultaneously minimizing both the learned generative energy function and the predefined control objectives across the entire trajectory and control sequence. Thus, it can explore globally and plan near-optimal control sequences. Moreover, we enhance DiffPhyCon with prior reweighting, enabling the discovery of control sequences that significantly deviate from the training distribution. We test our method on three tasks: 1D Burgers' equation, 2D jellyfish movement control, and 2D high-dimensional smoke control, where our generated jellyfish dataset is released as a benchmark for complex physical system control research. Our method outperforms widely applied classical approaches and state-of-the-art deep learning and reinforcement learning methods. Notably, DiffPhyCon unveils an intriguing fast-close-slow-open pattern observed in the jellyfish, aligning with established findings in the field of fluid dynamics. The project website, jellyfish dataset, and code can be found at https://github.com/AI4Science-WestlakeU/diffphycon.", "keywords": "Physical systems control;physical simulation;generative models;prior reweighting", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "/attachment/1376f4eee3f6a657e91bcc8cc1d79edacbe0f0f2.zip", "author": "Long Wei;Peiyan Hu;Ruiqi Feng;Haodong Feng;Yixuan Du;Tao Zhang;Rui Wang;Yue Wang;Zhi-Ming Ma;Tailin Wu", "authorids": "~Long_Wei1;~Peiyan_Hu1;~Ruiqi_Feng1;~Haodong_Feng1;~Yixuan_Du1;~Tao_Zhang35;~Rui_Wang56;~Yue_Wang15;~Zhi-Ming_Ma1;~Tailin_Wu1", "gender": "M;Not Specified;;;M;M;F;M;;M", "homepage": "https://longweizju.github.io/;https://peiyannn.github.io/;https://weenming.github.io/;;https://github.com/Duyx5520;https://zhangtao167.github.io/;;https://scholar.google.com/citations?hl=zh-CN&user=fGv5irIAAAAJ;http://homepage.amss.ac.cn/research/homePage/8eb59241e2e74d828fb84eec0efadba5/myHomePage.html;http://tailin.org", "dblp": ";318/3127;301/3170;;;15/4777-33;;33/4822-17.html;;200/8994", "google_scholar": "GU42ydUAAAAJ;TcyZu_8AAAAJ;7yE7WQQAAAAJ;https://scholar.google.cz/citations?user=0GOKl_gAAAAJ;;AUvVBewAAAAJ;https://scholar.google.ca/citations?hl=zh-CN;https://scholar.google.com/citations?hl=zh-CN;;https://scholar.google.com/citations?hl=en", "orcid": ";;0009-0001-8813-5211;;;;;;;0009-0003-1586-0820", "linkedin": ";;;;;%E6%B6%9B-%E5%BC%A0-2a4043292/;;;;", "or_profile": "~Long_Wei1;~Peiyan_Hu1;~Ruiqi_Feng1;~Haodong_Feng1;~Yixuan_Du1;~Tao_Zhang35;~Rui_Wang56;~Yue_Wang15;~Zhi-Ming_Ma1;~Tailin_Wu1", "aff": "Westlake University;Chinese Academy of Sciences;Fudan University;Westlake University;Jilin University;Wuhan University;Fudan University;Microsoft Research Aisa;Academy of Mathematics and Systems Science, Chinese Academy of Sciences, Chinese Academy of Sciences;Westlake University", "aff_domain": "westlake.edu;amss.ac.cn;fudan.edu.cn;westlake.edu;jlu.edu.cn;whu.edu.cn;fudan.edu.cn;microsoft.com;amss.ac.cn;westlake.edu", "position": "Postdoc;PhD student;Undergrad student;PhD student;Undergrad student;Undergrad student;PhD student;Researcher;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nwei2024diffphycon,\ntitle={DiffPhyCon: A Generative Approach to Control Complex Physical Systems},\nauthor={Long Wei and Peiyan Hu and Ruiqi Feng and Haodong Feng and Yixuan Du and Tao Zhang and Rui Wang and Yue Wang and Zhi-Ming Ma and Tailin Wu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=MbZuh8L0Xg}\n}", "github": "", "reviewers": "Mp9v;8Er5;n6Yf;DFgi", "pdf_size": 0, "rating": "5;6;8;8", "confidence": "4;4;4;5", "soundness": "2;3;4;3", "novelty": "2;3;3;3", "presentation": "3;3;4;3", "wc_summary": "32;116;59;61", "wc_strengths": "36;86;54;95", "wc_weaknesses": "193;37;125;12", "wc_questions": "49;85;226;180", "wc_limitations": "10;8;7;8", "wc_review": "320;332;471;356", "wc_reply_reviewers": "54;112;36;66", "wc_reply_authors": "124;0;39;79", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;2;2", "rating_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 67.0, 30.520484924063705 ], "wc_strengths_avg": [ 67.75, 23.836683913665507 ], "wc_weaknesses_avg": [ 91.75, 71.96309818233232 ], "wc_questions_avg": [ 135.0, 71.06686991840853 ], "wc_limitations_avg": [ 8.25, 1.0897247358851685 ], "wc_review_avg": [ 369.75, 59.876435264634786 ], "wc_reply_reviewers_avg": [ 67.0, 28.089143810376278 ], "wc_reply_authors_avg": [ 60.5, 46.08958667638494 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.5555555555555555, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:yci9QRK0-nUJ:scholar.google.com/&scioq=DiffPhyCon:+A+Generative+Approach+to+Control+Complex+Physical+Systems&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "westlake.edu;amss.ac.cn;fudan.edu.cn;westlake.edu;jlu.edu.cn;whu.edu.cn;fudan.edu.cn;microsoft.com;amss.ac.cn;westlake.edu", "author_num": 10, "aff_unique_index": "0;1;2;0;3;4;2;5;1;0", "aff_unique_norm": "Westlake University;Chinese Academy of Sciences;Fudan University;Jilin University;Wuhan University;Microsoft", "aff_unique_dep": ";;;;;Microsoft Research", "aff_unique_url": "https://www.westlake.edu.cn;https://www.cas.cn;https://www.fudan.edu.cn;http://www.jlu.edu.cn;http://www.whu.edu.cn/;https://www.microsoft.com/en-us/research/group/asia", "aff_unique_abbr": "WU;CAS;Fudan;JLU;WHU;MSR Asia", "aff_campus_unique_index": "1", "aff_campus_unique": ";Beijing", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "MbbTN9rPzC", "title": "Quantile Activation: Departing from single point estimation for better generalization across distortions", "track": "main", "status": "Reject", "tldr": "", "abstract": "A classifier is, in its essence, a function which takes an input and returns the class of the input and implicitly assumes an underlying distribution which does not change. We argue in this article that one has to move away from this basic tenet to obtain generalisation across distributions. Specifically, the class of the sample should depend on the points from its \u201ccontext distribution\u201d for better generalisation across distributions. \nHow does one achieve this? \u2013 The key idea is to \u201cadapt\u201d the outputs of each neuron of the network to its context distribution. We propose quantile activation,QACT, which, in simple terms, outputs the relative quantile of the sample in its context distribution, instead of the actual values in traditional networks. The scope of this article is to validate the proposed activation across several experimental settings, and compare it with conventional techniques. For this, we use the datasets developed to test robustness against distortions \u2013 CIFAR10C, CIFAR100C, MNISTC, TinyImagenetC, and show that we achieve a significantly better generalisation across distortions than the conventional classifiers, across different architectures. Although this paper is only a proof of concept, we surprisingly find that this approach outperforms DINOv2(small) at large distortions, even though DINOv2 is trained with a far bigger network on a considerably larger dataset.", "keywords": "Uncertainty Quantification;Context Distribution;Robust Inference;Generalization across distortions;Neuronal Activation", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Aditya Challa;Sravan Danda;Laurent Najman;Snehanshu Saha", "authorids": "~Aditya_Challa1;~Sravan_Danda1;~Laurent_Najman1;~Snehanshu_Saha1", "gender": ";M;M;Not Specified", "homepage": ";;https://laurentnajman.org;https://www.bits-pilani.ac.in/goa/snehanshus/profile", "dblp": ";189/3118.html;68/4192;130/3938", "google_scholar": ";5kSGWFoAAAAJ;https://scholar.google.fr/citations?user=j-2_cT0AAAAJ;C-Qm2LcAAAAJ", "orcid": ";;0000-0002-6190-0235;0000-0002-8458-604X", "linkedin": ";;;snehanshusaha/", "or_profile": "~Aditya_Challa1;~Sravan_Danda1;~Laurent_Najman1;~Snehanshu_Saha1", "aff": ";Birla Institute of Technology and Science, Goa;Ecole Sup\u00e9rieure d'Ing\u00e9nieurs en Electronique et Electrotechnique;Birla Institute of Technology and Science, Dhirubhai Ambani Institute Of Information and Communication Technology", "aff_domain": ";bits-pilani.ac.in;esiee.fr;bits-pilani.ac.in", "position": ";Assistant Professor;Full Professor;Full Professor", "bibtex": "@misc{\nanonymous2024quantile,\ntitle={Quantile Activation: Departing from single point estimation for better generalization across distortions},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=MbbTN9rPzC}\n}", "github": "", "project": "", "reviewers": "L8Qz;gJMP;v4L1", "site": "https://openreview.net/forum?id=MbbTN9rPzC", "pdf_size": 2389222, "rating": "4;4;5", "confidence": "2;3;3", "soundness": "3;3;2", "novelty": "2;3;3", "presentation": "3;2;3", "wc_summary": "47;72;66", "wc_strengths": "29;45;52", "wc_weaknesses": "83;224;152", "wc_questions": "3;1;233", "wc_limitations": "16;1;6", "wc_review": "178;343;509", "wc_reply_reviewers": "0;18;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;1;0", "reply_authors": "1;1;1", "rating_avg": [ 4.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 61.666666666666664, 10.656244908763853 ], "wc_strengths_avg": [ 42.0, 9.626352718795768 ], "wc_weaknesses_avg": [ 153.0, 57.56735185849702 ], "wc_questions_avg": [ 79.0, 108.89750532802239 ], "wc_limitations_avg": [ 7.666666666666667, 6.236095644623235 ], "wc_review_avg": [ 343.3333333333333, 135.13038970646915 ], "wc_reply_reviewers_avg": [ 6.0, 8.48528137423857 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Wv-R5bxO9usJ:scholar.google.com/&scioq=Quantile+Activation:+Departing+from+single+point+estimation+for+better+generalization+across+distortions&hl=en&as_sdt=0,44", "gs_version_total": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Birla Institute of Technology and Science;Ecole Sup\u00e9rieure d'Ing\u00e9nieurs en Electronique et Electrotechnique", "aff_unique_dep": ";", "aff_unique_url": "https://www.bits-goa.ac.in;https://www.esiee.fr", "aff_unique_abbr": "BITS Goa;ESIEE", "aff_campus_unique_index": "0", "aff_campus_unique": "Goa;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "India;France" }, { "title": "OpenMathInstruct-1: A 1.8 Million Math Instruction Tuning Dataset", "status": "Oral", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97731", "id": "Mbd3QxXjq5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Mbd3QxXjq5", "openreview": "https://openreview.net/forum?id=Mbd3QxXjq5", "poster": "/media/PosterPDFs/NeurIPS%202024/97731.png?t=1733324349.8403435", "project": "", "author_site": "Shubham Toshniwal, Ivan Moshkov, Sean Narenthiran, Daria Gitman, Fei Jia, Igor Gitman", "tldr": "", "abstract": "Recent work has shown the immense potential of synthetically generated datasets for training large language models (LLMs), especially for acquiring targeted skills. Current large-scale math instruction tuning datasets such as MetaMathQA (Yu et al., 2024) and MAmmoTH (Yue et al., 2024) are constructed using outputs from closed-source LLMs with commercially restrictive licenses. A key reason limiting the use of open-source LLMs in these data generation pipelines has been the wide gap between the mathematical skills of the best closed-source LLMs, such as GPT-4, and the best open-source LLMs. Building on the recent progress in open-source LLMs, our proposed prompting novelty, and some brute-force scaling, we construct OpenMathInstruct-1, a math instruction tuning dataset with 1.8M problem-solution pairs. The dataset is constructed by synthesizing code-interpreter solutions for GSM8K and MATH, two popular math reasoning benchmarks, using the recently released and permissively licensed Mixtral model. Our best model, OpenMath-CodeLlama-70B, trained on a subset of OpenMathInstruct-1, achieves a score of 84.6% on GSM8K and 50.7% on MATH, which is competitive with the best gpt-distilled models. We will release our code, models, and the OpenMathInstruct-1 dataset under a commercially permissive license.", "keywords": "Mathematical Reasoning;Synthetic Data;Open-source;Permissive License", "primary_area": "", "supplementary_material": "", "author": "Shubham Toshniwal;Ivan Moshkov;Sean Narenthiran;Daria Gitman;Fei Jia;Igor Gitman", "authorids": "~Shubham_Toshniwal1;~Ivan_Moshkov1;~Sean_Narenthiran1;~Daria_Gitman1;~Fei_Jia1;~Igor_Gitman2", "gender": ";M;M;F;F;M", "homepage": ";;https://github.com/SeanNaren;;;", "dblp": ";;;;121/0045;", "google_scholar": ";SJD5bowAAAAJ;;;https://scholar.google.com/citations?view_op=list_works;8r9aWLIAAAAJ", "orcid": ";;;;;", "linkedin": ";i-vainn/;;daria-gitman/;fei-jia;", "or_profile": "~Shubham_Toshniwal1;~Ivan_Moshkov1;~Sean_Narenthiran1;~Daria_Gitman1;~Fei_Jia1;~Igor_Gitman2", "aff": ";NVIDIA;NVIDIA;NVIDIA;NVIDIA;NVIDIA", "aff_domain": ";nvidia.com;nvidia.com;nvidia.com;nvidia.com;nvidia.com", "position": ";Researcher;Researcher;Intern;Researcher;Researcher", "bibtex": "@inproceedings{\ntoshniwal2024openmathinstruct,\ntitle={OpenMathInstruct-1: A 1.8 Million Math Instruction Tuning Dataset},\nauthor={Shubham Toshniwal and Ivan Moshkov and Sean Narenthiran and Daria Gitman and Fei Jia and Igor Gitman},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=Mbd3QxXjq5}\n}", "github": "", "reviewers": "vaWD;Sz3o;XvnS", "pdf_size": 1123929, "rating": "5;7;9", "confidence": "4;4;5", "wc_summary_and_contributions": "62;67;49", "wc_strengths": "12;58;21", "wc_improvement": "56;25;63", "wc_limitations": "45;15;1", "wc_correctness": "1;13;1", "wc_clarity": "1;6;5", "wc_relation_to_prior_work": "1;9;1", "wc_documentation": "1;6;1", "wc_additional_feedback": "1;1;1", "wc_review": "180;200;143", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "rating_avg": [ 7.0, 1.632993161855452 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 59.333333333333336, 7.586537784494029 ], "wc_strengths_avg": [ 30.333333333333332, 19.90533150244482 ], "wc_improvement_avg": [ 48.0, 16.51262143533445 ], "wc_limitations_avg": [ 20.333333333333332, 18.354533197248273 ], "wc_correctness_avg": [ 5.0, 5.656854249492381 ], "wc_clarity_avg": [ 4.0, 2.160246899469287 ], "wc_relation_to_prior_work_avg": [ 3.6666666666666665, 3.7712361663282534 ], "wc_documentation_avg": [ 2.6666666666666665, 2.3570226039551585 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 174.33333333333334, 23.612614331233114 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 82, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4645373668754685734&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": ";nvidia.com;nvidia.com;nvidia.com;nvidia.com;nvidia.com", "author_num": 6, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "NVIDIA", "aff_unique_dep": "NVIDIA Corporation", "aff_unique_url": "https://www.nvidia.com", "aff_unique_abbr": "NVIDIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Theoretical Investigations and Practical Enhancements on Tail Task Risk Minimization in Meta Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95504", "id": "McrzOo0hwr", "proceeding": "", "pdf": "https://openreview.net/pdf?id=McrzOo0hwr", "openreview": "https://openreview.net/forum?id=McrzOo0hwr", "poster": "/media/PosterPDFs/NeurIPS%202024/95504.png?t=1731072721.7885606", "project": "", "author_site": "Yiqin Lv, Qi Wang, Dong Liang, Zheng Xie", "tldr": "", "abstract": "Meta learning is a promising paradigm in the era of large models and \ntask distributional robustness has become an indispensable consideration in real-world scenarios.\nRecent advances have examined the effectiveness of tail task risk minimization in fast adaptation robustness improvement \\citep{wang2023simple}.\nThis work contributes to more theoretical investigations and practical enhancements in the field.\nSpecifically, we reduce the distributionally robust strategy to a max-min optimization problem, constitute the Stackelberg equilibrium as the solution concept, and estimate the convergence rate.\nIn the presence of tail risk, we further derive the generalization bound, establish connections with estimated quantiles, and practically improve the studied strategy.\nAccordingly, extensive evaluations demonstrate the significance of our proposal in boosting robustness.", "keywords": "Meta learning;Few-shot learning;Fast adaptation robustness", "primary_area": "other", "supplementary_material": "/attachment/ebb6c8f44f18a078a8afb9d95dc936061fa1c7d1.zip", "author": "Yiqin Lv;Cheems Wang;Dong Liang;Zheng Xie", "authorids": "~Yiqin_Lv1;~Cheems_Wang1;~Dong_Liang8;~Zheng_Xie4", "gender": "F;M;;M", "homepage": "https://dblp.org/pid/291/3737;;;https://sites.google.com/view/albert-q-wang-at-ai-community/home", "dblp": "291/3737;;;375/3186", "google_scholar": ";;8SuC0QoAAAAJ;Mvbvv3IAAAAJ", "orcid": "0000-0003-1181-0212;0000-0002-4899-6113;;0000-0001-6135-6965", "linkedin": ";;;qi-cheems-wang-518a421a1/", "or_profile": "~Yiqin_Lv1;~Dong_Liang8;~Zheng_Xie4;~Qi_Wang11", "aff": "National University of Defense Technology;National University of Defense Technology;National University of Defense Technology;Tsinghua University", "aff_domain": "nudt.edu.cn;nudt.edu.cn;nudt.edu.cn;cs.tsinghua.edu.cn", "position": "PhD student;Lecturer;Full Professor;Postdoc", "bibtex": "@inproceedings{\nlv2024theoretical,\ntitle={Theoretical Investigations and Practical Enhancements on Tail Task Risk Minimization in Meta Learning},\nauthor={Yiqin Lv and Cheems Wang and Dong Liang and Zheng Xie},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=McrzOo0hwr}\n}", "github": "", "reviewers": "6Ej4;ci3Y;8vBx", "pdf_size": 3615365, "rating": "5;6;8", "confidence": "3;2;4", "soundness": "3;3;4", "novelty": "3;3;3", "presentation": "3;2;4", "wc_summary": "41;61;113", "wc_strengths": "32;33;72", "wc_weaknesses": "202;54;3", "wc_questions": "2;19;61", "wc_limitations": "7;1;1", "wc_review": "284;168;250", "wc_reply_reviewers": "102;111;115", "wc_reply_authors": "454;115;16", "reply_reviewers": "2;1;1", "reply_authors": "3;2;2", "rating_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 71.66666666666667, 30.346151137976115 ], "wc_strengths_avg": [ 45.666666666666664, 18.62495339293199 ], "wc_weaknesses_avg": [ 86.33333333333333, 84.39720901125159 ], "wc_questions_avg": [ 27.333333333333332, 24.796953217863056 ], "wc_limitations_avg": [ 3.0, 2.8284271247461903 ], "wc_review_avg": [ 234.0, 48.68949236402724 ], "wc_reply_reviewers_avg": [ 109.33333333333333, 5.436502143433363 ], "wc_reply_authors_avg": [ 195.0, 187.54732736032258 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.6546536707079772, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7450241811377627435&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 3, "email": "nudt.edu.cn;nudt.edu.cn;nudt.edu.cn;cs.tsinghua.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "National University of Defense Technology;Tsinghua University", "aff_unique_dep": ";", "aff_unique_url": "http://www.nudt.edu.cn/;https://www.tsinghua.edu.cn", "aff_unique_abbr": "NUDT;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "Md1mEoPEaQ", "title": "SETLEXSEM CHALLENGE: Using Set Operations to Evaluate the Lexical and Semantic Robustness of Language Models", "track": "Datasets & Benchmarks", "status": "Poster", "tldr": "", "abstract": "Set theory is foundational to mathematics and, when sets are finite, to reasoning about the world. An intelligent system should perform set operations consistently, regardless of superficial variations in the operands. Initially designed for semantically-oriented NLP tasks, large language models (LLMs) are now being evaluated on algorithmic tasks. Because sets are comprised of arbitrary symbols (e.g. numbers, words), they provide an opportunity to test, systematically, the invariance of LLMs\u2019 algorithmic abilities under simple lexical or semantic variations. To this end, we present the SETLEXSEM CHALLENGE, a synthetic benchmark that evaluates the performance of LLMs on set operations. SETLEXSEM assesses the robustness of LLMs\u2019 instruction-following abilities under various conditions, focusing on the set operations and the nature and construction of the set members. Evaluating seven LLMs with SETLEXSEM, we find that they exhibit poor robust- ness to variation in both operation and operands. We show \u2013 via the framework\u2019s systematic sampling of set members along lexical and semantic dimensions \u2013 that LLMs are not only not robust to variation along these dimensions but demonstrate unique failure modes in particular, easy-to-create semantic groupings of \"deceptive\" sets. We find that rigorously measuring language model robustness to variation in frequency and length is challenging and present an analysis that measures them in- dependently. The code for reproducing the results of this paper, and for generating the SETLEXSEM CHALLENGE dataset, is available https://github.com/amazon-science/SetLexSem-Challenge.", "keywords": "benchmark;set theory;semantics", "primary_area": "", "supplementary_material": "/attachment/8e08f03d49ab34fb9710152ae997aee187b8c8ba.zip", "author": "Nicholas Andrew Dronen;Bardiya Akhbari;Manish Gawali", "authorids": "~Nicholas_Andrew_Dronen1;~Bardiya_Akhbari1;~Manish_Gawali1", "gender": "M;M;M", "homepage": ";https://sites.google.com/view/bardiya-akhbari/;", "dblp": ";;", "google_scholar": "NNwlJ7cAAAAJ;86sxAzoAAAAJ;IcVJ8Q4AAAAJ", "orcid": ";0000-0003-1874-7551;", "linkedin": "ndronen/;;", "or_profile": "~Nicholas_Andrew_Dronen1;~Bardiya_Akhbari1;~Manish_Gawali1", "aff": "Amazon;;", "aff_domain": "amazon.com;;", "position": "Researcher;;", "bibtex": "@inproceedings{\ndronen2024setlexsem,\ntitle={{SETLEXSEM} {CHALLENGE}: Using Set Operations to Evaluate the Lexical and Semantic Robustness of Language Models},\nauthor={Nicholas Andrew Dronen and Bardiya Akhbari and Manish Gawali},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=Md1mEoPEaQ}\n}", "github": "", "project": "", "reviewers": "JGs7;kxVp;dn4D;Tk7z", "site": "https://openreview.net/forum?id=Md1mEoPEaQ", "pdf_size": 1542040, "rating": "4;4;7;8", "confidence": "4;5;3;3", "wc_summary_and_contributions": "38;71;69;129", "wc_strengths": "36;35;97;163", "wc_improvement": "86;95;18;188", "wc_limitations": "27;63;31;87", "wc_correctness": "108;71;34;4", "wc_clarity": "145;55;49;11", "wc_relation_to_prior_work": "19;26;11;6", "wc_documentation": "9;16;40;27", "wc_additional_feedback": "1;1;1;1", "wc_review": "469;433;350;616", "wc_reply_reviewers": "351;30;46;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 1.7853571071357126 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "wc_summary_and_contributions_avg": [ 76.75, 32.88141572377929 ], "wc_strengths_avg": [ 82.75, 52.69902750525858 ], "wc_improvement_avg": [ 96.75, 60.51187899908579 ], "wc_limitations_avg": [ 52.0, 24.55605831561735 ], "wc_correctness_avg": [ 54.25, 39.06644980030819 ], "wc_clarity_avg": [ 65.0, 49.17316341257699 ], "wc_relation_to_prior_work_avg": [ 15.5, 7.632168761236874 ], "wc_documentation_avg": [ 23.0, 11.726039399558575 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 467.0, 96.24188277460078 ], "wc_reply_reviewers_avg": [ 106.75, 141.98129278183094 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8866206949335731, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:mcUOb6Qr-VgJ:scholar.google.com/&scioq=SETLEXSEM+CHALLENGE:+Using+Set+Operations+to+Evaluate+the+Lexical+and+Semantic+Robustness+of+Language+Models&hl=en&as_sdt=0,44", "gs_version_total": 5, "aff_unique_index": "0", "aff_unique_norm": "Amazon", "aff_unique_dep": "Amazon.com, Inc.", "aff_unique_url": "https://www.amazon.com", "aff_unique_abbr": "Amazon", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "Mdd3f8Cui8", "title": "Latent Feature Mining with Large Language Models", "track": "main", "status": "Reject", "tldr": "", "abstract": "Predictive modeling often faces challenges due to limited data availability and quality, especially in domains where collected features are weakly correlated with outcomes and where additional data collection is constrained by ethical or practical difficulties. Traditional machine learning (ML) models struggle to incorporate unobserved yet critical factors. We propose a framework that leverages large language models (LLMs) to augment observed features with latent features, enhancing the predictive power of ML models in downstream tasks. Our novel approach transforms the latent feature mining task to a text-to-text propositional reasoning task. We validate our framework with a case study in the criminal justice system, a domain characterized by limited and ethically challenging data collection. Our results show that inferred latent features align well with ground truth labels and significantly enhance the downstream classifier. Our framework is generalizable across various domains with minimal domain-specific customization, ensuring easy transfer to other areas facing similar challenges in data availability.", "keywords": "criminal justice; large language models; feature extraction; AI for social good;", "primary_area": "machine_learning_for_social_sciences", "supplementary_material": "", "author": "Bingxuan Li;Pengyi Shi;Amy R Ward", "authorids": "~Bingxuan_Li2;~Pengyi_Shi1;~Amy_R_Ward1", "gender": "M;F;F", "homepage": "https://bingxuanli.com/;https://web.ics.purdue.edu/~shi178/;https://www.chicagobooth.edu/faculty/directory/w/amy-ward", "dblp": ";;", "google_scholar": "l5Qe_mkAAAAJ;8xJkl2cAAAAJ;TBVpIfEAAAAJ", "orcid": "0000-0002-9193-5308;0000-0003-0905-7858;0000-0003-2744-2960", "linkedin": ";;", "or_profile": "~Bingxuan_Li2;~Pengyi_Shi1;~Amy_R_Ward1", "aff": "University of California, Los Angeles;Purdue University;University of Chicago", "aff_domain": "ucla.edu;purdue.edu;uchicago.edu", "position": "MS student;Associate Professor;Full Professor", "bibtex": "@misc{\nanonymous2024latent,\ntitle={Latent Feature Mining with Large Language Models},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=Mdd3f8Cui8}\n}", "github": "", "project": "", "reviewers": "QvzM;U9i4;imv4;bSc5", "site": "https://openreview.net/forum?id=Mdd3f8Cui8", "pdf_size": 1549044, "rating": "3;4;5;6", "confidence": "3;4;3;4", "soundness": "2;3;3;2", "novelty": "2;2;3;2", "presentation": "2;3;3;3", "wc_summary": "63;71;45;101", "wc_strengths": "24;75;27;44", "wc_weaknesses": "49;231;88;162", "wc_questions": "30;82;11;73", "wc_limitations": "1;23;1;2", "wc_review": "167;482;172;382", "wc_reply_reviewers": "234;34;13;0", "wc_reply_authors": "1099;0;0;481", "reply_reviewers": "1;1;1;0", "reply_authors": "3;1;1;2", "rating_avg": [ 4.5, 1.118033988749895 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 70.0, 20.223748416156685 ], "wc_strengths_avg": [ 42.5, 20.254629100529094 ], "wc_weaknesses_avg": [ 132.5, 69.86594306241061 ], "wc_questions_avg": [ 49.0, 29.45335295004628 ], "wc_limitations_avg": [ 6.75, 9.390819985496474 ], "wc_review_avg": [ 300.75, 135.94001434456302 ], "wc_reply_reviewers_avg": [ 70.25, 95.31624992623242 ], "wc_reply_authors_avg": [ 395.0, 451.4039211172185 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.4472135954999579, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:DqoL0IUL3YwJ:scholar.google.com/&scioq=Latent+Feature+Mining+with+Large+Language+Models&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of California, Los Angeles;Purdue University;University of Chicago", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ucla.edu;https://www.purdue.edu;https://www.uchicago.edu", "aff_unique_abbr": "UCLA;Purdue;UChicago", "aff_campus_unique_index": "0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Differential Privacy in Scalable General Kernel Learning via $K$-means Nystr{\\\"o}m Random Features", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95503", "id": "MdmzAezNHq", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MdmzAezNHq", "openreview": "https://openreview.net/forum?id=MdmzAezNHq", "poster": "", "project": "", "author_site": "Bonwoo Lee, Jeongyoun Ahn, Cheolwoo Park", "tldr": "", "abstract": "As the volume of data invested in statistical learning increases and concerns regarding privacy grow, the privacy leakage issue has drawn significant attention. Differential privacy has emerged as a widely accepted concept capable of mitigating privacy concerns, and numerous differentially private (DP) versions of machine learning algorithms have been developed. However, existing works on DP kernel learning algorithms have exhibited practical limitations, including scalability, restricted choice of kernels, or dependence on test data availability. We propose DP scalable kernel empirical risk minimization (ERM) algorithms and a DP kernel mean embedding (KME) release algorithm suitable for general kernels. Our approaches address the shortcomings of previous algorithms by employing Nystr\u00f6m methods, classical techniques in non-private scalable kernel learning. These methods provide data-dependent low-rank approximations of the kernel matrix for general kernels in a DP manner. We present excess empirical risk bounds and computational complexities for the scalable kernel DP ERM, KME algorithms, contrasting them with established methodologies. Furthermore, we develop a private data-generating algorithm capable of learning diverse kernel models. We conduct experiments to demonstrate the performance of our algorithms, comparing them with existing methods to highlight their superiority.", "keywords": "differential privacy;kernel learning;kernel mean embedding;kernel empirical risk minimization", "primary_area": "privacy", "supplementary_material": "/attachment/02365fb0fb85a8f95f3a212d1991bbf04ba5f525.zip", "author": "Bonwoo Lee;Jeongyoun Ahn;Cheolwoo Park", "authorids": "~Bonwoo_Lee1;~Jeongyoun_Ahn1;~Cheolwoo_Park3", "gender": ";;M", "homepage": ";;https://sites.google.com/view/parkcw/home?authuser=1", "dblp": ";;46/4369", "google_scholar": ";;", "orcid": ";;0000-0003-4634-7045", "linkedin": ";;", "or_profile": "~Bonwoo_Lee1;~Jeongyoun_Ahn1;~Cheolwoo_Park3", "aff": ";;Korea Advanced Institute of Science & Technology", "aff_domain": ";;kaist.ac.kr", "position": ";;Full Professor", "bibtex": "@inproceedings{\nlee2024differential,\ntitle={Differential Privacy in Scalable General Kernel Learning via \\$K\\$-means Nystr\\{{\\textbackslash}''o\\}m Random Features},\nauthor={Bonwoo Lee and Jeongyoun Ahn and Cheolwoo Park},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=MdmzAezNHq}\n}", "github": "", "reviewers": "PMbb;XDHr;7ffV", "pdf_size": 1173515, "rating": "5;5;7", "confidence": "3;4;2", "soundness": "3;4;3", "novelty": "3;3;2", "presentation": "3;3;2", "wc_summary": "70;77;38", "wc_strengths": "14;97;25", "wc_weaknesses": "41;135;13", "wc_questions": "84;142;1", "wc_limitations": "4;31;13", "wc_review": "213;482;90", "wc_reply_reviewers": "78;34;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;0", "reply_authors": "1;1;1", "rating_avg": [ 5.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 61.666666666666664, 16.97710877099579 ], "wc_strengths_avg": [ 45.333333333333336, 36.80881536926839 ], "wc_weaknesses_avg": [ 63.0, 52.17917847826532 ], "wc_questions_avg": [ 75.66666666666667, 57.86382481501048 ], "wc_limitations_avg": [ 16.0, 11.224972160321824 ], "wc_review_avg": [ 261.6666666666667, 163.6914441529822 ], "wc_reply_reviewers_avg": [ 37.333333333333336, 31.93048003954146 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:rtOY-2sh68UJ:scholar.google.com/&scioq=Differential+Privacy+in+Scalable+General+Kernel+Learning+via+%24K%24-means+Nystr%7B%5C%22o%7Dm+Random+Features&hl=en&as_sdt=0,44", "gs_version_total": 2, "email": ";;kaist.ac.kr", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_country_unique_index": "0", "aff_country_unique": "South Korea" }, { "title": "Covariate Shift Corrected Conditional Randomization Test", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95502", "id": "Me5esZTRqW", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Me5esZTRqW", "openreview": "https://openreview.net/forum?id=Me5esZTRqW", "poster": "/media/PosterPDFs/NeurIPS%202024/95502.png?t=1731608538.647182", "project": "", "author_site": "Bowen Xu, Yiwen Huang, Chuan Hong, Shuangning Li, Molei Liu", "tldr": "", "abstract": "Conditional independence tests are crucial across various disciplines in determining the independence of an outcome variable $Y$ from a treatment variable $X$, conditioning on a set of confounders $Z$. The Conditional Randomization Test (CRT) offers a powerful framework for such testing by assuming known distributions of $X \\mid Z$; it controls the Type-I error exactly, allowing for the use of flexible, black-box test statistics. In practice, testing for conditional independence often involves using data from a source population to draw conclusions about a target population. This can be challenging due to covariate shift---differences in the distribution of $X$, $Z$, and surrogate variables, which can affect the conditional distribution of $Y \\mid X, Z$---rendering traditional CRT approaches invalid. To address this issue, we propose a novel Covariate Shift Corrected Pearson Chi-squared Conditional Randomization (csPCR) test. This test adapts to covariate shifts by integrating importance weights and employing the control variates method to reduce variance in the test statistics and thus enhance power. Theoretically, we establish that the csPCR test controls the Type-I error asymptotically. Empirically, through simulation studies, we demonstrate that our method not only maintains control over Type-I errors but also exhibits superior power, confirming its efficacy and practical utility in real-world scenarios where covariate shifts are prevalent. Finally, we apply our methodology to a real-world dataset to assess the impact of a COVID-19 treatment on the 90-day mortality rate among patients.", "keywords": "conditional independence test;conditional randomization test;model-X framework;surrogate variable;importance weight;control covariates", "primary_area": "causal_inference", "supplementary_material": "/attachment/6e4108a5b57a9a18ca58f4eb9d5a79eb3097484d.zip", "author": "Bowen Xu;Yiwen Huang;Chuan Hong;Shuangning Li;Molei Liu", "authorids": "~Bowen_Xu4;~Yiwen_Huang3;~Chuan_Hong1;~Shuangning_Li1;~Molei_Liu1", "gender": "M;M;F;;M", "homepage": ";http://www.huangyiwen.com;https://scholars.duke.edu/person/chuan.hong;https://lsn235711.github.io/;https://moleibobliu.github.io/", "dblp": ";;;341/1499;", "google_scholar": ";;o6JIv7QAAAAJ;;FTe6r1cAAAAJ", "orcid": ";;0000-0001-7056-9559;;", "linkedin": "bowen-xu-540543213/;;chuan-hong-a02554b4;;", "or_profile": "~Bowen_Xu4;~Yiwen_Huang3;~Chuan_Hong1;~Shuangning_Li1;~Molei_Liu1", "aff": "Harvard University;Peking University;Duke University;University of Chicago Booth School of Business;Columbia University", "aff_domain": "harvard.edu;pku.edu;duke.edu;chicagobooth.edu;columbia.edu", "position": "MS student;Undergrad student;Assistant Professor;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nxu2024covariate,\ntitle={Covariate Shift Corrected Conditional Randomization Test},\nauthor={Bowen Xu and Yiwen Huang and Chuan Hong and Shuangning Li and Molei Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Me5esZTRqW}\n}", "github": "", "reviewers": "G2Ah;MwvS;REEU;r237", "pdf_size": 1679782, "rating": "4;5;7;7", "confidence": "1;3;4;3", "soundness": "2;3;4;3", "novelty": "2;2;3;3", "presentation": "2;3;4;3", "wc_summary": "65;71;90;42", "wc_strengths": "315;77;111;29", "wc_weaknesses": "2;147;222;104", "wc_questions": "2;95;247;50", "wc_limitations": "2;17;40;5", "wc_review": "386;407;710;230", "wc_reply_reviewers": "375;19;9;187", "wc_reply_authors": "137;26;0;320", "reply_reviewers": "2;1;1;1", "reply_authors": "2;2;1;2", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 2.75, 1.0897247358851685 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 67.0, 17.131841699011815 ], "wc_strengths_avg": [ 133.0, 109.04127658827184 ], "wc_weaknesses_avg": [ 118.75, 79.540477117 ], "wc_questions_avg": [ 98.5, 91.82728352728289 ], "wc_limitations_avg": [ 16.0, 14.949916387726054 ], "wc_review_avg": [ 433.25, 173.7978351418682 ], "wc_reply_reviewers_avg": [ 147.5, 149.17355663789746 ], "wc_reply_authors_avg": [ 120.75, 126.01859981764596 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.8388704928078611, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3021780602323064627&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "harvard.edu;pku.edu;duke.edu;chicagobooth.edu;columbia.edu", "author_num": 5, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "Harvard University;Peking University;Duke University;University of Chicago;Columbia University", "aff_unique_dep": ";;;Booth School of Business;", "aff_unique_url": "https://www.harvard.edu;http://www.pku.edu.cn;https://www.duke.edu;https://\u5e03\u65af\u829d\u52a0\u54e5\u5927\u5b66.com;https://www.columbia.edu", "aff_unique_abbr": "Harvard;Peking U;Duke;UChicago;Columbia", "aff_campus_unique_index": "1", "aff_campus_unique": ";Chicago", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "United States;China" }, { "title": "A Modular Conditional Diffusion Framework for Image Reconstruction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95501", "id": "MeCC0Is5hs", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MeCC0Is5hs", "openreview": "https://openreview.net/forum?id=MeCC0Is5hs", "poster": "", "project": "", "author_site": "Magauiya Zhussip, Iaroslav Koshelev, Stamatios Lefkimmiatis", "tldr": "", "abstract": "Diffusion Probabilistic Models (DPMs) have been recently utilized to deal with various blind image restoration (IR) tasks, where they have demonstrated outstanding performance in terms of perceptual quality. However, the task-specific nature of existing solutions and the excessive computational costs related to their training, make such models impractical and challenging to use for different IR tasks than those that were initially trained for. This hinders their wider adoption especially by those who lack access to powerful computational resources and vast amounts of training data. In this work we aim to address the above issues and enable the successful adoption of DPMs in practical IR-related applications. Towards this goal, we propose a modular diffusion probabilistic IR framework (DP-IR), which allows us to combine the performance benefits of existing pre-trained state-of-the-art IR networks and generative DPMs, while it requires only the additional training of a small module (0.7M params) related to the particular IR task of interest. Moreover, the architecture of our proposed framework allows us to employ a sampling strategy that leads to at least four times reduction of neural function evaluations without any performance loss, while it can also be combined with existing acceleration techniques (e.g. DDIM). We evaluate our model on four benchmarks for the tasks of burst JDD-SR, dynamic scene deblurring, and super-resolution. Our method outperforms existing approaches in terms of perceptual quality while retaining a competitive performance in relation to fidelity metrics.", "keywords": "diffusion model; image restoration; super-resolution; motion deblurring", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Magauiya Zhussip;Iaroslav Sergeevich Koshelev;Stamatios Lefkimmiatis", "authorids": "~Magauiya_Zhussip1;~Iaroslav_Sergeevich_Koshelev1;~Stamatios_Lefkimmiatis1", "gender": ";M;M", "homepage": ";;https://slefkimmiatis.github.io/", "dblp": ";271/7403;52/101", "google_scholar": ";;3Bawtm4AAAAJ", "orcid": ";;", "linkedin": ";iaroslav-koshelev;stamatis-lefkimmiatis-37855930/", "or_profile": "~Magauiya_Zhussip1;~Iaroslav_Sergeevich_Koshelev1;~Stamatios_Lefkimmiatis1", "aff": ";Huawei Technologies Ltd.;Huawei Technologies Ltd.", "aff_domain": ";huawei.com;huawei.com", "position": ";Researcher;Senior Team Lead", "bibtex": "@inproceedings{\nzhussip2024a,\ntitle={A Modular Conditional Diffusion Framework for Image Reconstruction},\nauthor={Magauiya Zhussip and Iaroslav Sergeevich Koshelev and Stamatios Lefkimmiatis},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=MeCC0Is5hs}\n}", "github": "", "reviewers": "B8jE;PdT6;hKXQ;WWsb", "pdf_size": 13348499, "rating": "5;5;5;6", "confidence": "2;3;4;4", "soundness": "3;2;2;3", "novelty": "2;2;3;3", "presentation": "2;3;2;3", "wc_summary": "32;83;27;46", "wc_strengths": "50;49;55;73", "wc_weaknesses": "252;47;24;70", "wc_questions": "22;10;211;136", "wc_limitations": "4;30;23;1", "wc_review": "360;219;340;326", "wc_reply_reviewers": "52;0;0;0", "wc_reply_authors": "81;0;0;442", "reply_reviewers": "1;0;0;0", "reply_authors": "2;1;1;2", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 47.0, 21.920310216782973 ], "wc_strengths_avg": [ 56.75, 9.65336728815391 ], "wc_weaknesses_avg": [ 98.25, 90.24515222437158 ], "wc_questions_avg": [ 94.75, 83.20268925942239 ], "wc_limitations_avg": [ 14.5, 12.298373876248844 ], "wc_review_avg": [ 311.25, 54.61398630387641 ], "wc_reply_reviewers_avg": [ 13.0, 22.516660498395403 ], "wc_reply_authors_avg": [ 130.75, 182.71750737135181 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:c_gpF1pKnsAJ:scholar.google.com/&scioq=A+Modular+Conditional+Diffusion+Framework+for+Image+Reconstruction&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": ";huawei.com;huawei.com", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Huawei", "aff_unique_dep": "Huawei Technologies", "aff_unique_url": "https://www.huawei.com", "aff_unique_abbr": "Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Robust group and simultaneous inferences for high-dimensional single index model", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95500", "id": "MelYGfpy4x", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MelYGfpy4x", "openreview": "https://openreview.net/forum?id=MelYGfpy4x", "poster": "/media/PosterPDFs/NeurIPS%202024/95500.png?t=1729665601.481785", "project": "", "author_site": "Weichao Yang, Hongwei Shi, Xu Guo, Changliang Zou", "tldr": "", "abstract": "The high-dimensional single index model (SIM), which assumes that the response is independent of the predictors given a linear combination of predictors, has drawn attention due to its flexibility and interpretability, but its efficiency is adversely affected by outlying observations and heavy-tailed distributions. This paper introduces a robust procedure by recasting the SIM into a pseudo-linear model with transformed responses. It relaxes the distributional conditions on random errors from sub-Gaussian to more general distributions and thus it is robust with substantial efficiency gain for heavy-tailed random errors. Under this paradigm, we provide asymptotically honest group inference procedures based on the idea of orthogonalization, which enjoys the feature that it does not require the zero and nonzero coefficients to be well-separated. Asymptotic null distribution and bootstrap implementation are both established. Moreover, we develop a multiple testing procedure for determining if the individual coefficients are relevant simultaneously, and show that it is able to control the false discovery rate asymptotically. Numerical results indicate that the new procedures can be highly competitive among existing methods, especially for heavy-tailed errors.", "keywords": "FDR control;high-dimensional inference;honest test;outliers;robustness", "primary_area": "probabilistic_methods", "supplementary_material": "/attachment/78fc2c09fdd95bdd11ddd8fcfe554fb5fea8e41b.zip", "author": "Weichao Yang;Hongwei Shi;Xu Guo;Changliang Zou", "authorids": "~Weichao_Yang1;~Hongwei_Shi2;~Xu_Guo3;~Changliang_Zou2", "gender": "M;;M;M", "homepage": ";https://havanashw.github.io/;https://stat.bnu.edu.cn/zwjl/232830.htm;http://web.stat.nankai.edu.cn/chlzou/", "dblp": ";;;", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;a2SulVYAAAAJ;;LPwSdmwAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Weichao_Yang1;~Hongwei_Shi2;~Xu_Guo3;~Changliang_Zou2", "aff": "Beijing Normal University;Beijing Normal University;Beijing Normal University;Nankai University", "aff_domain": "bnu.edu.cn;bnu.edu.cn;bnu.edu.cn;nankai.edu.cn", "position": "PhD student;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nyang2024robust,\ntitle={Robust group and simultaneous inferences for high-dimensional single index model},\nauthor={Weichao Yang and Hongwei Shi and Xu Guo and Changliang Zou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=MelYGfpy4x}\n}", "github": "", "reviewers": "Tg8m;BqNs;wCvo", "pdf_size": 641742, "rating": "5;6;6", "confidence": "2;4;3", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "122;102;127", "wc_strengths": "19;17;76", "wc_weaknesses": "24;6;114", "wc_questions": "1;174;65", "wc_limitations": "1;6;25", "wc_review": "167;305;407", "wc_reply_reviewers": "10;18;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;0", "reply_authors": "1;1;1", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 117.0, 10.801234497346433 ], "wc_strengths_avg": [ 37.333333333333336, 27.35365098523819 ], "wc_weaknesses_avg": [ 48.0, 47.24404724407086 ], "wc_questions_avg": [ 80.0, 71.41895173318261 ], "wc_limitations_avg": [ 10.666666666666666, 10.338708279513881 ], "wc_review_avg": [ 293.0, 98.34632682515398 ], "wc_reply_reviewers_avg": [ 9.333333333333334, 7.363574011458175 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:lCxrx4RUAzYJ:scholar.google.com/&scioq=Robust+group+and+simultaneous+inferences+for+high-dimensional+single+index+model&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "bnu.edu.cn;bnu.edu.cn;bnu.edu.cn;nankai.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Beijing Normal University;Nankai University", "aff_unique_dep": ";", "aff_unique_url": "https://www.bnu.edu.cn;http://www.nankai.edu.cn", "aff_unique_abbr": "BNU;NKU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Unveiling and Mitigating Backdoor Vulnerabilities based on Unlearning Weight Changes and Backdoor Activeness", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95499", "id": "MfGRUVFtn9", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MfGRUVFtn9", "openreview": "https://openreview.net/forum?id=MfGRUVFtn9", "poster": "/media/PosterPDFs/NeurIPS%202024/95499.png?t=1731493468.0364716", "project": "", "author_site": "Weilin Lin, Li Liu, Shaokui Wei, Jianze Li, Hui Xiong", "tldr": "", "abstract": "The security threat of backdoor attacks is a central concern for deep neural networks (DNNs). Recently, without poisoned data, unlearning models with clean data and then learning a pruning mask have contributed to backdoor defense. Additionally, vanilla fine-tuning with those clean data can help recover the lost clean accuracy. However, the behavior of clean unlearning is still under-explored, and vanilla fine-tuning unintentionally induces back the backdoor effect. In this work, we first investigate model unlearning from the perspective of weight changes and gradient norms, and find two interesting observations in the backdoored model: 1) the weight changes between poison and clean unlearning are positively correlated, making it possible for us to identify the backdoored-related neurons without using poisoned data; 2) the neurons of the backdoored model are more active (*i.e.*, larger gradient norm) than those in the clean model, suggesting the need to suppress the gradient norm during fine-tuning. Then, we propose an effective two-stage defense method. In the first stage, an efficient *Neuron Weight Change (NWC)-based Backdoor Reinitialization* is proposed based on observation 1). In the second stage, based on observation 2), we design an *Activeness-Aware Fine-Tuning* to replace the vanilla fine-tuning. Extensive experiments, involving eight backdoor attacks on three benchmark datasets, demonstrate the superior performance of our proposed method compared to recent state-of-the-art backdoor defense approaches. The code is available at https://github.com/linweiii/TSBD.git.", "keywords": "Deep Neural Network; AI security; Backdoor Defense", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/b340fc35d75523ead46b0434a368d01415533281.zip", "author": "Weilin Lin;Li Liu;Shaokui Wei;Jianze Li;Hui Xiong", "authorids": "~Weilin_Lin1;~Li_Liu8;~Shaokui_Wei1;~Jianze_Li1;~Hui_Xiong1", "gender": "M;F;M;M;M", "homepage": ";https://liliu-avril.github.io/;https://shawkui.github.io/;http://www.sribd.cn/teacher/21;https://www.hkust-gz.edu.cn/people/hui-xiong/", "dblp": "257/4919;33/4528-36;323/4243;195/6246;262/1686-1.html", "google_scholar": "https://scholar.google.com.hk/citations?user=9U1TAFsAAAAJ;KQ2S01UAAAAJ;WHkEfnsAAAAJ;;cVDF1tkAAAAJ", "orcid": ";;;0000-0002-0760-7994;0000-0001-6016-6465", "linkedin": ";;;;", "or_profile": "~Weilin_Lin1;~Li_Liu8;~Shaokui_Wei1;~Jianze_Li1;~Hui_Xiong1", "aff": "The Hong Kong University of Science and Technology (Guangzhou);The Hong Kong University of Science and Technology (Guangzhou);The Chinese University of Hong Kong, Shenzhen;;Hong Kong University of Science and Technology (Guangzhou)", "aff_domain": "hkust-gz.edu;hkust-gz.edu.cn;cuhk.edu.cn;;hkust.edu", "position": "PhD student;Assistant Professor;PhD student;;Full Professor", "bibtex": "@inproceedings{\nlin2024unveiling,\ntitle={Unveiling and Mitigating Backdoor Vulnerabilities based on Unlearning Weight Changes and Backdoor Activeness},\nauthor={Weilin Lin and Li Liu and Shaokui Wei and Jianze Li and Hui Xiong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=MfGRUVFtn9}\n}", "github": "", "reviewers": "Vmzu;kmL5;P1hB;obuU", "pdf_size": 1718746, "rating": "4;5;6;8", "confidence": "3;3;4;3", "soundness": "2;2;3;4", "novelty": "3;3;3;4", "presentation": "2;3;3;4", "wc_summary": "94;96;90;82", "wc_strengths": "110;28;70;50", "wc_weaknesses": "155;146;68;25", "wc_questions": "140;20;194;2", "wc_limitations": "121;12;27;5", "wc_review": "620;302;449;164", "wc_reply_reviewers": "0;12;119;0", "wc_reply_authors": "121;106;485;0", "reply_reviewers": "0;1;2;0", "reply_authors": "3;3;3;1", "rating_avg": [ 5.75, 1.479019945774904 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 90.5, 5.361902647381804 ], "wc_strengths_avg": [ 64.5, 30.17863482664516 ], "wc_weaknesses_avg": [ 98.5, 54.270157545376634 ], "wc_questions_avg": [ 89.0, 80.55432949258531 ], "wc_limitations_avg": [ 41.25, 46.72459202604128 ], "wc_review_avg": [ 383.75, 169.59123650707897 ], "wc_reply_reviewers_avg": [ 32.75, 50.03686141236279 ], "wc_reply_authors_avg": [ 178.0, 183.27984068085613 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.09759000729485331, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15350695209279932884&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 3, "email": "hkust-gz.edu;hkust-gz.edu.cn;cuhk.edu.cn;;hkust.edu", "author_num": 5, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Hong Kong University of Science and Technology;Chinese University of Hong Kong", "aff_unique_dep": ";", "aff_unique_url": "https://www.ust.hk;https://www.cuhk.edu.cn", "aff_unique_abbr": "HKUST;CUHK", "aff_campus_unique_index": "0;0;1;2", "aff_campus_unique": "Guangzhou;Shenzhen;Hong Kong SAR", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Universality in Transfer Learning for Linear Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95498", "id": "MhWaMOkoN3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MhWaMOkoN3", "openreview": "https://openreview.net/forum?id=MhWaMOkoN3", "poster": "/media/PosterPDFs/NeurIPS%202024/95498.png?t=1731644789.8039474", "project": "", "author_site": "Reza Ghane, Danil Akhtiamov, Babak Hassibi", "tldr": "", "abstract": "We study the problem of transfer learning and fine-tuning in linear models for both regression and binary classification. In particular, we consider the use of stochastic gradient descent (SGD) on a linear model initialized with pretrained weights and using a small training data set from the target distribution. In the asymptotic regime of large models, we provide an exact and rigorous analysis and relate the generalization errors (in regression) and classification errors (in binary classification) for the pretrained and fine-tuned models. In particular, we give conditions under which the fine-tuned model outperforms the pretrained one. An important aspect of our work is that all the results are \"universal\", in the sense that they depend only on the first and second order statistics of the target distribution. They thus extend well beyond the standard Gaussian assumptions commonly made in the literature. Furthermore, our universality results extend beyond standard SGD training to the test error of a classification task trained using ridge regression.", "keywords": "Gaussian Universality;Transfer Learning;Linear Regression;Binary Classification", "primary_area": "learning_theory", "supplementary_material": "", "author": "Reza Ghane;Danil Akhtiamov;Babak Hassibi", "authorids": "~Reza_Ghane1;~Danil_Akhtiamov1;~Babak_Hassibi1", "gender": ";M;M", "homepage": ";https://www.cms.caltech.edu/people/danil-akhtiamov;https://ee.caltech.edu/people/hassibi", "dblp": ";295/8391;09/1803.html", "google_scholar": "2f09HrEAAAAJ;;1XoZPhEAAAAJ", "orcid": "0000-0002-3232-9359;;", "linkedin": ";;babak-hassibi-2853614/", "or_profile": "~Reza_Ghane1;~Danil_Akhtiamov1;~Babak_Hassibi1", "aff": "California Institute of Technology;Deparment of Computing + Mathematical Sciences, California Institute of Technology;California Institute of Technology", "aff_domain": "caltech.edu;cms.caltech.edu;caltech.edu", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nghane2024universality,\ntitle={Universality in Transfer Learning for Linear Models},\nauthor={Reza Ghane and Danil Akhtiamov and Babak Hassibi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=MhWaMOkoN3}\n}", "github": "", "reviewers": "KpbC;LHAw;hU8n;vrcM", "pdf_size": 558243, "rating": "5;5;6;7", "confidence": "4;3;3;3", "soundness": "2;3;3;4", "novelty": "3;2;2;2", "presentation": "3;2;3;3", "wc_summary": "113;69;59;76", "wc_strengths": "67;90;53;94", "wc_weaknesses": "125;53;14;196", "wc_questions": "58;39;43;54", "wc_limitations": "35;10;4;14", "wc_review": "398;261;173;434", "wc_reply_reviewers": "0;0;291;210", "wc_reply_authors": "0;0;941;0", "reply_reviewers": "0;0;4;1", "reply_authors": "1;1;4;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 79.25, 20.40067400847335 ], "wc_strengths_avg": [ 76.0, 16.80773631397161 ], "wc_weaknesses_avg": [ 97.0, 69.65988802747245 ], "wc_questions_avg": [ 48.5, 7.762087348130012 ], "wc_limitations_avg": [ 15.75, 11.669940016983807 ], "wc_review_avg": [ 316.5, 105.02499702451793 ], "wc_reply_reviewers_avg": [ 125.25, 128.48224585521535 ], "wc_reply_authors_avg": [ 235.25, 407.4649524805784 ], "reply_reviewers_avg": [ 1.25, 1.6393596310755 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:XL51N9PbsZcJ:scholar.google.com/&scioq=Universality+in+Transfer+Learning+for+Linear+Models&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "caltech.edu;cms.caltech.edu;caltech.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "California Institute of Technology;", "aff_unique_dep": ";", "aff_unique_url": "https://www.caltech.edu;", "aff_unique_abbr": "Caltech;", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Pasadena;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States;" }, { "title": "On the Worst Prompt Performance of Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95497", "id": "Mi853QaJx6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Mi853QaJx6", "openreview": "https://openreview.net/forum?id=Mi853QaJx6", "poster": "/media/PosterPDFs/NeurIPS%202024/95497.png?t=1731390302.2532747", "project": "", "author_site": "Bowen Cao, Deng Cai, Zhisong Zhang, Yuexian Zou, Wai Lam", "tldr": "", "abstract": "The performance of large language models (LLMs) is acutely sensitive to the phrasing of prompts, which raises significant concerns about their reliability in real-world scenarios. Existing studies often divide prompts into task-level instructions and case-level inputs and primarily focus on evaluating and improving robustness against variations in tasks-level instructions. However, this setup fails to fully address the diversity of real-world user queries and assumes the existence of task-specific datasets. To address these limitations, we introduce RobustAlpacaEval, a new benchmark that consists of semantically equivalent case-level queries and emphasizes the importance of using the worst prompt performance to gauge the lower bound of model performance. Extensive experiments on RobustAlpacaEval with ChatGPT and six open-source LLMs from the Llama, Mistral, and Gemma families uncover substantial variability in model performance; for instance, a difference of 45.48% between the worst and best performance for the Llama-2-70B-chat model, with its worst performance dipping as low as 9.38%. We further illustrate the difficulty in identifying the worst prompt from both model-agnostic and model-dependent perspectives, emphasizing the absence of a shortcut to characterize the worst prompt. We also attempt to enhance the worst prompt performance using existing prompt engineering and prompt consistency methods, but find that their impact is limited. These findings underscore the need to create more resilient LLMs that can maintain high performance across diverse prompts.", "keywords": "Natural Language Processing;Large Language Model;Instruction following;Model Robustness", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/e70dcafa3a8729e48e08622382b8cbf0d3e3b5f0.zip", "author": "Bowen Cao;Deng Cai;Zhisong Zhang;Yuexian Zou;Wai Lam", "authorids": "~Bowen_Cao1;~Deng_Cai1;~Zhisong_Zhang1;~Yuexian_Zou3;~Wai_Lam1", "gender": "M;M;M;F;M", "homepage": ";https://jcyk.github.io/;https://zzsfornlp.github.io/;https://web.pkusz.edu.cn/adsp;http://www.se.cuhk.edu.hk/~textmine", "dblp": "265/5536;c/DCai-2;174/7415;https://dblp.uni-trier.de/pid/51/6518.html;48/1707", "google_scholar": "jaI8ym8AAAAJ;KpbRLYcAAAAJ;373vlUEAAAAJ;;ewA4NAcAAAAJ", "orcid": ";;;;", "linkedin": "bowen-cao-0ba2a61a3/;;;;", "or_profile": "~Bowen_Cao1;~Deng_Cai1;~Zhisong_Zhang1;~Yuexian_Zou3;~Wai_Lam1", "aff": "Peking University;Tencent AI Lab;Tencent;Peking University;The Chinese University of Hong Kong", "aff_domain": "stu.pku.edu.cn;tencent.com;tencent.com;pku.edu.cn;cuhk.edu.hk", "position": "MS student;Research Scientist;Researcher;Full Professor;Professor", "bibtex": "@inproceedings{\ncao2024on,\ntitle={On the Worst Prompt Performance of Large Language Models},\nauthor={Bowen Cao and Deng Cai and Zhisong Zhang and Yuexian Zou and Wai Lam},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Mi853QaJx6}\n}", "github": "", "reviewers": "7q8k;Wkdh;4kDi;H71g;y4yW", "pdf_size": 2626971, "rating": "5;6;6;6;6", "confidence": "4;3;3;3;4", "soundness": "3;3;3;3;3", "novelty": "2;3;2;3;3", "presentation": "3;3;3;4;3", "wc_summary": "56;60;42;50;110", "wc_strengths": "79;71;57;52;47", "wc_weaknesses": "123;209;105;21;99", "wc_questions": "111;2;68;138;133", "wc_limitations": "1;7;19;22;18", "wc_review": "370;349;291;283;407", "wc_reply_reviewers": "71;16;24;0;39", "wc_reply_authors": "281;123;121;71;41", "reply_reviewers": "1;1;1;0;1", "reply_authors": "3;3;3;2;2", "rating_avg": [ 5.8, 0.39999999999999997 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 63.6, 23.979991659714983 ], "wc_strengths_avg": [ 61.2, 11.973303637676613 ], "wc_weaknesses_avg": [ 111.4, 60.04531622033478 ], "wc_questions_avg": [ 90.4, 50.63832540675097 ], "wc_limitations_avg": [ 13.4, 8.014985963805552 ], "wc_review_avg": [ 340.0, 47.15930449020638 ], "wc_reply_reviewers_avg": [ 30.0, 24.058262613912916 ], "wc_reply_authors_avg": [ 127.4, 82.82415106718571 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 2.6, 0.4898979485566356 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.6123724356957948, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12944476798162515752&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "stu.pku.edu.cn;tencent.com;tencent.com;pku.edu.cn;cuhk.edu.hk", "author_num": 5, "aff_unique_index": "0;1;1;0;2", "aff_unique_norm": "Peking University;Tencent;Chinese University of Hong Kong", "aff_unique_dep": ";Tencent AI Lab;", "aff_unique_url": "http://www.pku.edu.cn;https://ai.tencent.com;https://www.cuhk.edu.hk", "aff_unique_abbr": "Peking U;Tencent AI Lab;CUHK", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "EDT: An Efficient Diffusion Transformer Framework Inspired by Human-like Sketching", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95496", "id": "MihOCXte41", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MihOCXte41", "openreview": "https://openreview.net/forum?id=MihOCXte41", "poster": "/media/PosterPDFs/NeurIPS%202024/95496.png?t=1731684936.3542662", "project": "", "author_site": "Xinwang Chen, Ning Liu, Yichen Zhu, Feifei Feng, Jian Tang", "tldr": "", "abstract": "Transformer-based Diffusion Probabilistic Models (DPMs) have shown more potential than CNN-based DPMs, yet their extensive computational requirements hinder widespread practical applications. To reduce the computation budget of transformer-based DPMs, this work proposes the Efficient Diffusion Transformer (EDT) framework. This framework includes a lightweight-design diffusion model architecture, and a training-free Attention Modulation Matrix and its alternation arrangement in EDT inspired by human-like sketching. Additionally, we propose a token relation-enhanced masking training strategy tailored explicitly for EDT to augment its token relation learning capability. Our extensive experiments demonstrate the efficacy of EDT. The EDT framework reduces training and inference costs and surpasses existing transformer-based diffusion models in image synthesis performance, thereby achieving a significant overall enhancement. With lower FID, EDT-S, EDT-B, and EDT-XL attained speed-ups of 3.93x, 2.84x, and 1.92x respectively in the training phase, and 2.29x, 2.29x, and 2.22x respectively in inference, compared to the corresponding sizes of MDTv2. Our code is available at https://github.com/xinwangChen/EDT.", "keywords": "Efficient AI; Lightweight architecture; Optimization; Diffusion", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Xinwang Chen;Ning Liu;Yichen Zhu;Feifei Feng;Jian Tang", "authorids": "~Xinwang_Chen2;~Ning_Liu4;~Yichen_Zhu1;~Feifei_Feng1;~Jian_Tang5", "gender": "M;M;M;M;M", "homepage": ";;;https://ecs.syr.edu/faculty/tang;https://github.com/xinwangChen", "dblp": "83/622-7;;27/4916.html;181/2667-8;372/1439", "google_scholar": "OFOJM5MAAAAJ;eyKyrbsAAAAJ;;;https://scholar.google.com.hk/citations?hl=zh-CN", "orcid": "0000-0003-4943-6625;0000-0001-5126-838X;;;", "linkedin": ";;fengff/;;", "or_profile": "~Ning_Liu4;~Yichen_Zhu1;~Feifei_Feng1;~Jian_Tang5;~xinwang_chen1", "aff": "Midea Group;Midea Group;Midea Group;x-humanoid;University of Science and Technology Beijing", "aff_domain": "midea.com;midea.com;midea.com;x-humanoid.com;ustb.edu.cn", "position": "Researcher;Researcher;Researcher;Researcher;MS student", "bibtex": "@inproceedings{\nchen2024edt,\ntitle={{EDT}: An Efficient Diffusion Transformer Framework Inspired by Human-like Sketching},\nauthor={Xinwang Chen and Ning Liu and Yichen Zhu and Feifei Feng and Jian Tang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=MihOCXte41}\n}", "github": "", "reviewers": "8pvq;QU5R;FhNv;MD5e", "pdf_size": 14072829, "rating": "5;5;5;6", "confidence": "5;4;5;1", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;2;3", "wc_summary": "87;106;83;142", "wc_strengths": "33;225;34;79", "wc_weaknesses": "137;89;311;47", "wc_questions": "18;162;28;64", "wc_limitations": "4;10;6;32", "wc_review": "279;592;462;364", "wc_reply_reviewers": "310;0;140;6", "wc_reply_authors": "701;265;354;16", "reply_reviewers": "2;0;1;1", "reply_authors": "4;2;2;2", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 1.6393596310755 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 104.5, 23.32916629457641 ], "wc_strengths_avg": [ 92.75, 78.58236125238284 ], "wc_weaknesses_avg": [ 146.0, 100.44401425669923 ], "wc_questions_avg": [ 68.0, 56.90342696182718 ], "wc_limitations_avg": [ 13.0, 11.180339887498949 ], "wc_review_avg": [ 424.25, 116.50402353567021 ], "wc_reply_reviewers_avg": [ 114.0, 126.24579200908045 ], "wc_reply_authors_avg": [ 334.0, 245.44551330183242 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.9684959969581861, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8459796995390502174&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "midea.com;midea.com;midea.com;x-humanoid.com;ustb.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;1;2", "aff_unique_norm": "Midea Group;x-humanoid;University of Science and Technology Beijing", "aff_unique_dep": ";;", "aff_unique_url": "https://www.mideaglobal.com;;http://www.ustb.edu.cn", "aff_unique_abbr": "Midea;;USTB", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China;" }, { "title": "LG-CAV: Train Any Concept Activation Vector with Language Guidance", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95495", "id": "MjD9Y05Q6i", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MjD9Y05Q6i", "openreview": "https://openreview.net/forum?id=MjD9Y05Q6i", "poster": "/media/PosterPDFs/NeurIPS%202024/95495.png?t=1731143007.444777", "project": "", "author_site": "Qihan Huang, Jie Song, Mengqi Xue, Haofei Zhang, Bingde Hu, Huiqiong Wang, Hao Jiang, Xingen Wang, Mingli Song", "tldr": "", "abstract": "Concept activation vector (CAV) has attracted broad research interest in explainable AI, by elegantly attributing model predictions to specific concepts. However, the training of CAV often necessitates a large number of high-quality images, which are expensive to curate and thus limited to a predefined set of concepts. To address this issue, we propose Language-Guided CAV (LG-CAV) to harness the abundant concept knowledge within the certain pre-trained vision-language models (e.g., CLIP). This method allows training any CAV without labeled data, by utilizing the corresponding concept descriptions as guidance. To bridge the gap between vision-language model and the target model, we calculate the activation values of concept descriptions on a common pool of images (probe images) with vision-language model and utilize them as language guidance to train the LG-CAV. Furthermore, after training high-quality LG-CAVs related to all the predicted classes in the target model, we propose the activation sample reweighting (ASR), serving as a model correction technique, to improve the performance of the target model in return. Experiments on four datasets across nine architectures demonstrate that LG-CAV achieves significantly superior quality to previous CAV methods given any concept, and our model correction method achieves state-of-the-art performance compared to existing concept-based methods. Our code is available at https://github.com/hqhQAQ/LG-CAV.", "keywords": "Explainability for Computer Vision;Concept Activation Vector;Cross-Modal Model", "primary_area": "interpretability_and_explainability", "supplementary_material": "/attachment/ef9c42695f74a0029af4060cc8fb761d2f9e625e.zip", "author": "Qihan Huang;Jie Song;Mengqi Xue;Haofei Zhang;Bingde Hu;Huiqiong Wang;Hao Jiang;Xingen Wang;Mingli Song", "authorids": "~Qihan_Huang1;~Jie_Song3;~Mengqi_Xue2;~Haofei_Zhang1;~Bingde_Hu1;~Huiqiong_Wang1;~Hao_Jiang13;~Xingen_Wang1;~Mingli_Song1", "gender": "M;M;F;;M;F;M;M;M", "homepage": "https://github.com/hqhQAQ;https://person.zju.edu.cn/en/NB20021;https://xmq1221.github.io/;;https://www.vipazoo.cn/people/hubingde;;;https://person.zju.edu.cn/xingenwang;https://person.zju.edu.cn/msong", "dblp": ";09/4756-11.html;223/7789;270/0826;353/7326;19/1682;;;71/5333", "google_scholar": ";4OjO-WYAAAAJ;https://scholar.google.co.jp/citations?user=G4rnrFEAAAAJ;7-zeA3UAAAAJ;;;0TvdOEcAAAAJ;S8C1Y0gAAAAJ;7oLbhAwAAAAJ", "orcid": ";0000-0003-3671-6521;0000-0003-4936-4887;;0000-0003-2556-9239;;;;0000-0003-2621-6048", "linkedin": ";;;;;;;;", "or_profile": "~Qihan_Huang1;~Jie_Song3;~Mengqi_Xue2;~Haofei_Zhang1;~Bingde_Hu1;~Huiqiong_Wang1;~Hao_Jiang13;~Xingen_Wang1;~Mingli_Song1", "aff": "Zhejiang University;Zhejiang University;Hangzhou City University;Zhejiang University;Zhejiang University;Zhejiang University;Alibaba Group;Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn;hzcu.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;alibaba-inc.com;zju.edu.cn;zju.edu.cn", "position": "PhD student;Associate Professor;Researcher;PhD student;Postdoc;Lecturer;Researcher;Researcher;Full Professor", "bibtex": "@inproceedings{\nhuang2024lgcav,\ntitle={{LG}-{CAV}: Train Any Concept Activation Vector with Language Guidance},\nauthor={Qihan Huang and Jie Song and Mengqi Xue and Haofei Zhang and Bingde Hu and Huiqiong Wang and Hao Jiang and Xingen Wang and Mingli Song},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=MjD9Y05Q6i}\n}", "github": "", "reviewers": "V8uU;sN5p;TRvX", "pdf_size": 3585099, "rating": "5;6;7", "confidence": "3;4;5", "soundness": "2;3;3", "novelty": "2;3;3", "presentation": "3;3;4", "wc_summary": "115;62;78", "wc_strengths": "70;25;86", "wc_weaknesses": "119;104;44", "wc_questions": "140;3;9", "wc_limitations": "2;1;9", "wc_review": "446;195;226", "wc_reply_reviewers": "27;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;0;0", "reply_authors": "1;1;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 85.0, 22.19609575278199 ], "wc_strengths_avg": [ 60.333333333333336, 25.82419193099542 ], "wc_weaknesses_avg": [ 89.0, 32.4037034920393 ], "wc_questions_avg": [ 50.666666666666664, 63.215680192672316 ], "wc_limitations_avg": [ 4.0, 3.559026084010437 ], "wc_review_avg": [ 289.0, 111.73480508179475 ], "wc_reply_reviewers_avg": [ 9.0, 12.727922061357855 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:MUp9N1zMLIkJ:scholar.google.com/&scioq=LG-CAV:+Train+Any+Concept+Activation+Vector+with+Language+Guidance&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "zju.edu.cn;zju.edu.cn;hzcu.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;alibaba-inc.com;zju.edu.cn;zju.edu.cn", "author_num": 9, "aff_unique_index": "0;0;1;0;0;0;2;0;0", "aff_unique_norm": "Zhejiang University;Hangzhou City University;Alibaba Group", "aff_unique_dep": ";;", "aff_unique_url": "https://www.zju.edu.cn;http://www.hghedu.com;https://www.alibaba.com", "aff_unique_abbr": "ZJU;;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Scalable Kernel Inverse Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95494", "id": "Mktgayam7U", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Mktgayam7U", "openreview": "https://openreview.net/forum?id=Mktgayam7U", "poster": "/media/PosterPDFs/NeurIPS%202024/95494.png?t=1731754083.5425513", "project": "", "author_site": "Youyuan Long, Tolga Ok, Pedro Zattoni Scroccaro, Peyman Mohajerin Esfahani", "tldr": "", "abstract": "Inverse Optimization (IO) is a framework for learning the unknown objective function of an expert decision-maker from a past dataset.\nIn this paper, we extend the hypothesis class of IO objective functions to a reproducing kernel Hilbert space (RKHS), thereby enhancing feature representation to an infinite-dimensional space.\nWe demonstrate that a variant of the representer theorem holds for a specific training loss, allowing the reformulation of the problem as a finite-dimensional convex optimization program.\nTo address scalability issues commonly associated with kernel methods, we propose the Sequential Selection Optimization (SSO) algorithm to efficiently train the proposed Kernel Inverse Optimization (KIO) model.\nFinally, we validate the generalization capabilities of the proposed KIO model and the effectiveness of the SSO algorithm through learning-from-demonstration tasks on the MuJoCo benchmark.", "keywords": "Optimization;Imitation Learning;Inverse Optimization", "primary_area": "optimization", "supplementary_material": "/attachment/afb8964b9822d30676c0200b13955a53dfeef391.zip", "author": "Youyuan Long;Tolga Ok;Pedro Zattoni Scroccaro;Peyman Mohajerin Esfahani", "authorids": "~Youyuan_Long1;~Tolga_Ok1;~Pedro_Zattoni_Scroccaro1;~Peyman_Mohajerin_Esfahani2", "gender": "M;M;;", "homepage": ";https://tolgaok.github.io;https://sites.google.com/view/pzattoniscroccaro/home;https://mohajerinesfahani.github.io/", "dblp": ";;;19/7734", "google_scholar": ";https://scholar.google.com.tr/citations?user=fUZamLEAAAAJ;RB8JjJAAAAAJ;ZTan-7YAAAAJ", "orcid": ";;;0000-0003-1286-8782", "linkedin": "youyuan-long-082432260/;;pedrozattoniscroccaro/;", "or_profile": "~Youyuan_Long1;~Tolga_Ok1;~Pedro_Zattoni_Scroccaro1;~Peyman_Mohajerin_Esfahani2", "aff": "Delft University of Technology;Delft University of Technology;Delft University of Technology;Delft University of Technology", "aff_domain": "student.tudelft.nl;tudelft.nl;tudelft.nl;tudelft.nl", "position": "MS student;PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nlong2024scalable,\ntitle={Scalable Kernel Inverse Optimization},\nauthor={Youyuan Long and Tolga Ok and Pedro Zattoni Scroccaro and Peyman Mohajerin Esfahani},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Mktgayam7U}\n}", "github": "", "reviewers": "o6JR;tcjN;9Go2", "pdf_size": 400226, "rating": "6;6;7", "confidence": "2;3;4", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "2;3;3", "wc_summary": "148;43;104", "wc_strengths": "104;45;46", "wc_weaknesses": "81;35;126", "wc_questions": "47;1;266", "wc_limitations": "1;1;34", "wc_review": "381;125;576", "wc_reply_reviewers": "13;13;172", "wc_reply_authors": "0;0;174", "reply_reviewers": "1;1;1", "reply_authors": "1;1;2", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 98.33333333333333, 43.05293898859352 ], "wc_strengths_avg": [ 65.0, 27.58018612458347 ], "wc_weaknesses_avg": [ 80.66666666666667, 37.15134213217905 ], "wc_questions_avg": [ 104.66666666666667, 115.61526235272265 ], "wc_limitations_avg": [ 12.0, 15.556349186104045 ], "wc_review_avg": [ 360.6666666666667, 184.68050489666985 ], "wc_reply_reviewers_avg": [ 66.0, 74.95331880577403 ], "wc_reply_authors_avg": [ 58.0, 82.02438661763951 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8082924594814651465&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "student.tudelft.nl;tudelft.nl;tudelft.nl;tudelft.nl", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Delft University of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.tudelft.nl", "aff_unique_abbr": "TU Delft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Netherlands" }, { "title": "Implicit Regularization of Decentralized Gradient Descent for Sparse Regression", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95493", "id": "MlADRQI0Wf", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MlADRQI0Wf", "openreview": "https://openreview.net/forum?id=MlADRQI0Wf", "poster": "/media/PosterPDFs/NeurIPS%202024/95493.png?t=1731467974.0740352", "project": "", "author_site": "Tongle Wu, Ying Sun", "tldr": "", "abstract": "We consider learning a sparse model from linear measurements taken by a network of agents. Different from existing decentralized methods designed based on the LASSO regression with explicit $\\ell_1$ norm regularization, we exploit the implicit regularization of decentralized optimization method applied to an over-parameterized nonconvex least squares formulation without penalization. Our first result shows that despite nonconvexity, if the network connectivity is good, the well-known decentralized gradient descent algorithm (DGD) with small initialization and early stopping can compute the statistically optimal solution. Sufficient conditions on the initialization scale, choice of step size, network connectivity, and stopping time are further provided to achieve convergence. Our result recovers the convergence rate of gradient descent in the centralized setting, showing its tightness. \nBased on the analysis of DGD, we further propose a communication-efficient version, termed T-DGD, by truncating the iterates before transmission. In the high signal-to-noise ratio (SNR) regime, we show that T-DGD achieves comparable statistical accuracy to DGD, while the communication cost is logarithmic in the number of parameters. Numerical results are provided to validate the effectiveness of DGD and T-DGD for sparse learning through implicit regularization.", "keywords": "Decentralized optimization;Implicit regularization;Sparse regression", "primary_area": "optimization", "supplementary_material": "/attachment/c7c36d89fc9ae540d5ca9ec66b7b0d9197f73a57.zip", "author": "Tongle Wu;Ying Sun", "authorids": "~Tongle_Wu2;~Ying_Sun5", "gender": ";F", "homepage": ";https://ysunac.github.io", "dblp": ";", "google_scholar": ";M9uQsUQAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Tongle_Wu2;~Ying_Sun5", "aff": ";Pennsylvania State University", "aff_domain": ";psu.edu", "position": ";Assistant Professor", "bibtex": "@inproceedings{\nwu2024implicit,\ntitle={Implicit Regularization of Decentralized Gradient Descent for Sparse Regression},\nauthor={Tongle Wu and Ying Sun},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=MlADRQI0Wf}\n}", "github": "", "reviewers": "E5yh;BLgr;Agu7;rjVx", "pdf_size": 1732665, "rating": "5;5;6;6", "confidence": "4;4;2;2", "soundness": "3;4;4;3", "novelty": "3;2;4;3", "presentation": "2;3;4;3", "wc_summary": "64;43;45;88", "wc_strengths": "67;12;23;18", "wc_weaknesses": "71;141;7;41", "wc_questions": "45;1;34;2", "wc_limitations": "21;1;1;1", "wc_review": "268;198;110;150", "wc_reply_reviewers": "0;22;40;14", "wc_reply_authors": "0;38;18;78", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;3", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.0, 1.0 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 60.0, 18.12456896039186 ], "wc_strengths_avg": [ 30.0, 21.714050750608465 ], "wc_weaknesses_avg": [ 65.0, 49.37610758251404 ], "wc_questions_avg": [ 20.5, 19.397164741270824 ], "wc_limitations_avg": [ 6.0, 8.660254037844387 ], "wc_review_avg": [ 181.5, 58.862127042776834 ], "wc_reply_reviewers_avg": [ 19.0, 14.45683229480096 ], "wc_reply_authors_avg": [ 33.5, 28.99568933479596 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2103476100979190407&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": ";psu.edu", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Pennsylvania State University", "aff_unique_dep": "", "aff_unique_url": "https://www.psu.edu", "aff_unique_abbr": "PSU", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Intrinsic Robustness of Prophet Inequality to Strategic Reward Signaling", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95492", "id": "Mmcy1p15Hc", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Mmcy1p15Hc", "openreview": "https://openreview.net/forum?id=Mmcy1p15Hc", "poster": "/media/PosterPDFs/NeurIPS%202024/95492.png?t=1731607840.8597703", "project": "", "author_site": "Wei Tang, Haifeng Xu, Ruimin Zhang, Derek Zhu", "tldr": "", "abstract": "Prophet inequality concerns a basic optimal stopping problem and states that simple threshold stopping policies --- i.e., accepting the first reward larger than a certain threshold --- can achieve tight $\\frac{1}{2}$-approximation to the optimal prophet value. Motivated by its economic applications, this paper studies the robustness of this approximation to natural strategic manipulations in which each random reward is associated with a self-interested player who may selectively reveal his realized reward to the searcher in order to maximize his probability of being selected. \n\nWe say a threshold policy is $\\alpha$(-strategically)-robust if it (a) achieves the $\\alpha$-approximation to the prophet value for strategic players; and (b) meanwhile remains a $\\frac{1}{2}$-approximation in the standard non-strategic setting.\nStarting with a characterization of each player's optimal information revealing strategy, we demonstrate the intrinsic robustness of prophet inequalities to strategic reward signaling through the following results:\n(1) for arbitrary reward distributions, there is a threshold policy that is $\\frac{1-\\frac{1}{e}}{2}$-robust, and this ratio is tight;\n(2) for i.i.d. reward distributions, there is a threshold policy that is $\\frac{1}{2}$-robust, which is tight for the setting; \nand (3) for log-concave (but non-identical) reward distributions, the $\\frac{1}{2}$-robustness can also be achieved under certain regularity assumptions.", "keywords": "prophet inequality;optimal stopping;robustness;strategic information revealing", "primary_area": "algorithmic_game_theory", "supplementary_material": "", "author": "Wei Tang;Haifeng Xu;Ruimin Zhang;Derek Zhu", "authorids": "~Wei_Tang1;~Haifeng_Xu1;~Ruimin_Zhang1;~Derek_Zhu1", "gender": "M;M;M;", "homepage": "https://wtang.org/;http://www.haifeng-xu.com/;https://cs.uchicago.edu/people/ruimin-zhang/;", "dblp": ";04/1895;;", "google_scholar": ";nLgg388AAAAJ;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Wei_Tang1;~Haifeng_Xu1;~Ruimin_Zhang1;~Derek_Zhu1", "aff": "Columbia University;University of Chicago;University of Chicago;", "aff_domain": "columbia.edu;cs.uchicago.edu;uchicago.edu;", "position": "Postdoc;Assistant Professor;PhD student;", "bibtex": "@inproceedings{\ntang2024intrinsic,\ntitle={Intrinsic Robustness of Prophet Inequality to Strategic Reward Signaling},\nauthor={Wei Tang and Haifeng Xu and Ruimin Zhang and Derek Zhu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Mmcy1p15Hc}\n}", "github": "", "reviewers": "1242;vri4;BRGn;mXL5", "pdf_size": 395151, "rating": "6;6;7;7", "confidence": "3;3;3;4", "soundness": "4;3;3;3", "novelty": "3;2;3;3", "presentation": "3;3;4;3", "wc_summary": "99;193;146;168", "wc_strengths": "32;29;118;72", "wc_weaknesses": "25;247;49;32", "wc_questions": "231;74;5;262", "wc_limitations": "2;16;9;16", "wc_review": "389;559;327;550", "wc_reply_reviewers": "11;295;4;303", "wc_reply_authors": "35;484;0;1511", "reply_reviewers": "1;2;1;2", "reply_authors": "2;4;1;5", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 151.5, 34.57238782612506 ], "wc_strengths_avg": [ 62.75, 36.134298111351214 ], "wc_weaknesses_avg": [ 88.25, 92.06892798333215 ], "wc_questions_avg": [ 143.0, 106.8994854992296 ], "wc_limitations_avg": [ 10.75, 5.80409338312195 ], "wc_review_avg": [ 456.25, 100.71587511410503 ], "wc_reply_reviewers_avg": [ 153.25, 145.79844820847717 ], "wc_reply_authors_avg": [ 507.5, 609.995286867038 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 3.0, 1.5811388300841898 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Js7Pb5W2ycwJ:scholar.google.com/&scioq=Intrinsic+Robustness+of+Prophet+Inequality+to+Strategic+Reward+Signaling&hl=en&as_sdt=0,44", "gs_version_total": 4, "email": "columbia.edu;cs.uchicago.edu;uchicago.edu;", "author_num": 4, "aff_unique_index": "0;1;1", "aff_unique_norm": "Columbia University;University of Chicago", "aff_unique_dep": ";", "aff_unique_url": "https://www.columbia.edu;https://www.uchicago.edu", "aff_unique_abbr": "Columbia;UChicago", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Unsupervised Discovery of Formulas for Mathematical Constants", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95491", "id": "MncgmW8b6q", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MncgmW8b6q", "openreview": "https://openreview.net/forum?id=MncgmW8b6q", "poster": "/media/PosterPDFs/NeurIPS%202024/95491.png?t=1733683337.377171", "project": "", "author_site": "Michael Shalyt, Uri Seligmann, Itay Beit Halachmi, Ofir David, Rotem Elimelech, Ido Kaminer", "tldr": "", "abstract": "Ongoing efforts that span over decades show a rise of AI methods for accelerating scientific discovery, yet accelerating discovery in mathematics remains a persistent challenge for AI.\nSpecifically, AI methods were not effective in creation of formulas for mathematical constants because each such formula must be correct for infinite digits of precision, with 'near-true' formulas providing no insight toward the correct ones. Consequently, formula discovery lacks a clear distance metric needed to guide automated discovery in this realm.\n\nIn this work, we propose a systematic methodology for categorization, characterization, and pattern identification of such formulas. The key to our methodology is introducing metrics based on the convergence dynamics of the formulas, rather than on the numerical value of the formula. These metrics enable the first automated clustering of mathematical formulas.\nWe demonstrate this methodology on Polynomial Continued Fraction formulas, which are ubiquitous in their intrinsic connections to mathematical constants, and generalize many mathematical functions and structures.\nWe test our methodology on a set of 1,768,900 such formulas, identifying many known formulas for mathematical constants, and discover previously unknown formulas for $\\pi$, $\\ln(2)$, Gauss', and Lemniscate's constants. The uncovered patterns enable a direct generalization of individual formulas to infinite families, unveiling rich mathematical structures. \nThis success paves the way towards a generative model that creates formulas fulfilling specified mathematical properties, accelerating the rate of discovery of useful formulas.", "keywords": "AI for Science;Automated Conjecture Generation;Experimental Mathematics;Mathematical Constants;Irrationality Measure;Unsupervised Learning;Formula Generation;Continued Fractions", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "/attachment/572df311e17f69eb8c732ae68457506741877ee2.zip", "author": "Michael Shalyt;Uri Seligmann;Itay Beit Halachmi;Ofir David;Rotem Elimelech;Ido Kaminer", "authorids": "~Michael_Shalyt1;~Uri_Seligmann1;~Itay_Beit_Halachmi1;~Ofir_David2;~Rotem_Elimelech1;~Ido_Kaminer1", "gender": ";M;Not Specified;Not Specified;M;M", "homepage": "http://shalyt.com;;;https://prove-me-wrong.com/;;https://kaminer.technion.ac.il", "dblp": ";;;;;", "google_scholar": ";;;;;RTSiWiMAAAAJ", "orcid": ";;;;;", "linkedin": "michael-shalyt-26858853/;uri-seligmann-06a6742b2/;itay-beit-halachmi-6831a7173/;;https://il.linkedin.com/in/rotem-elimelech-8b4b00145;", "or_profile": "~Michael_Shalyt1;~Uri_Seligmann1;~Itay_Beit_Halachmi1;~Ofir_David2;~Rotem_Elimelech1;~Ido_Kaminer1", "aff": "Technion - Israel Institute of Technology, Technion;Technion - Israel Institute of Technology, Technion - Israel Institute of Technology;Technion - Israel Institute of Technology, Technion - Israel Institute of Technology;Technion - Israel Institute of Technology, Technion;Electrical Engineering Department, Technion \u2013 Israel Institute of Technology, Technion - Israel Institute of Technology;Technion - Israel Institute of Technology, Technion", "aff_domain": "technion.ac.il;campus.technion.ac.il;campus.technion.ac.il;technion.il;ee.technion.ac.il;technion.ac.il", "position": "Principal Researcher;Undergrad student;MS student;Researcher;MS student;Associate Professor", "bibtex": "@inproceedings{\nshalyt2024unsupervised,\ntitle={Unsupervised Discovery of Formulas for Mathematical Constants},\nauthor={Michael Shalyt and Uri Seligmann and Itay Beit Halachmi and Ofir David and Rotem Elimelech and Ido Kaminer},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=MncgmW8b6q}\n}", "github": "", "reviewers": "YVDn;Dgb8;zxY9;uW6k", "pdf_size": 14440536, "rating": "4;5;5;7", "confidence": "3;3;3;3", "soundness": "2;3;2;3", "novelty": "2;2;3;3", "presentation": "3;3;2;3", "wc_summary": "28;64;126;123", "wc_strengths": "18;50;137;34", "wc_weaknesses": "22;47;145;110", "wc_questions": "125;153;58;249", "wc_limitations": "1;1;75;1", "wc_review": "194;315;541;517", "wc_reply_reviewers": "0;15;0;29", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.25, 1.0897247358851685 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 85.25, 41.27574953892418 ], "wc_strengths_avg": [ 59.75, 46.0129057982649 ], "wc_weaknesses_avg": [ 81.0, 48.92340952959023 ], "wc_questions_avg": [ 146.25, 68.63444834775026 ], "wc_limitations_avg": [ 19.5, 32.04293994002423 ], "wc_review_avg": [ 391.75, 144.01280325026661 ], "wc_reply_reviewers_avg": [ 11.0, 12.062338081814818 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:SRxhw6tR8rsJ:scholar.google.com/&scioq=Unsupervised+Discovery+of+Formulas+for+Mathematical+Constants&hl=en&as_sdt=0,5", "gs_version_total": 5, "email": "technion.ac.il;campus.technion.ac.il;campus.technion.ac.il;technion.il;ee.technion.ac.il;technion.ac.il", "author_num": 6, "aff_unique_index": "0;0;0;0;1;0", "aff_unique_norm": "Technion - Israel Institute of Technology;Technion \u2013 Israel Institute of Technology", "aff_unique_dep": ";Electrical Engineering Department", "aff_unique_url": "https://www.technion.ac.il;https://www.technion.ac.il", "aff_unique_abbr": "Technion;Technion", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "Israel" }, { "title": "You Only Look Around: Learning Illumination-Invariant Feature for Low-light Object Detection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95490", "id": "MocRdX0n7B", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MocRdX0n7B", "openreview": "https://openreview.net/forum?id=MocRdX0n7B", "poster": "/media/PosterPDFs/NeurIPS%202024/95490.png?t=1730690224.6782527", "project": "", "author_site": "Mingbo Hong, Shen Cheng, Haibin Huang, Haoqiang Fan, Shuaicheng Liu", "tldr": "", "abstract": "In this paper, we introduce YOLA, a novel framework for object detection in low-light scenarios. Unlike previous works, we propose to tackle this challenging problem from the perspective of feature learning. Specifically, we propose to learn illumination-invariant features through the Lambertian image formation model. We observe that, under the Lambertian assumption, it is feasible to approximate illumination-invariant feature maps by exploiting the interrelationships between neighboring color channels and spatially adjacent pixels. By incorporating additional constraints, these relationships can be characterized in the form of convolutional kernels, which can be trained in a detection-driven manner within a network. Towards this end, we introduce a novel module dedicated to the extraction of illumination-invariant features from low-light images, which can be easily integrated into existing object detection frameworks. Our empirical findings reveal significant improvements in low-light object detection tasks, as well as promising results in both well-lit and over-lit scenarios.", "keywords": "object detection\uff1blowlight object detection\uff1billumination invariant feature;", "primary_area": "machine_vision", "supplementary_material": "/attachment/673794ccb131080d9081e5f95a82379ef1e00271.zip", "author": "MingboHong;Shen Cheng;Haibin Huang;Haoqiang Fan;Shuaicheng Liu", "authorids": "~MingboHong1;~Shen_Cheng1;~Haibin_Huang1;~Haoqiang_Fan1;~Shuaicheng_Liu2", "gender": "M;M;M;M;M", "homepage": "https://mingbohong.github.io/;https://brotherhuang.github.io/;;http://www.liushuaicheng.org/;https://github.com/Sumching", "dblp": "227/6712;;143/7181;49/8652;176/7871", "google_scholar": "6DpdNBkAAAAJ;YDl1M80AAAAJ;;1DP9DAUAAAAJ;https://scholar.google.com.hk/citations?user=gBE3HvwAAAAJ", "orcid": ";;;0000-0002-8815-5335;", "linkedin": ";;;;", "or_profile": "~MingboHong1;~Haibin_Huang1;~Haoqiang_Fan1;~Shuaicheng_Liu2;~Sum_Ching1", "aff": "Megvii Technology Inc.;Kuaishou Technology;Megvii Technology Inc.;University of Electronic Science and Technology of China;Megvii Technology Inc.", "aff_domain": "megvii.com;kuaishou.com;megvii.com;uestc.edu.cn;megvii.com", "position": "Researcher;Sr.Research Scientist;Researcher;Full Professor;Researcher", "bibtex": "@inproceedings{\nmingbohong2024you,\ntitle={You Only Look Around: Learning Illumination-Invariant Feature for Low-light Object Detection},\nauthor={MingboHong and Shen Cheng and Haibin Huang and Haoqiang Fan and Shuaicheng Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=MocRdX0n7B}\n}", "github": "", "reviewers": "yjJM;N71E;F2ap;krE6", "pdf_size": 5372280, "rating": "3;5;5;6", "confidence": "4;5;4;4", "soundness": "2;3;3;4", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "28;40;60;54", "wc_strengths": "13;46;39;56", "wc_weaknesses": "165;257;93;115", "wc_questions": "2;35;3;79", "wc_limitations": "41;23;3;2", "wc_review": "249;401;198;306", "wc_reply_reviewers": "0;20;0;14", "wc_reply_authors": "74;214;0;9", "reply_reviewers": "0;1;0;1", "reply_authors": "2;3;1;2", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 45.5, 12.439855304624729 ], "wc_strengths_avg": [ 38.5, 15.913830462839549 ], "wc_weaknesses_avg": [ 157.5, 63.093185051953114 ], "wc_questions_avg": [ 29.75, 31.379730719048563 ], "wc_limitations_avg": [ 17.25, 16.068213964221414 ], "wc_review_avg": [ 288.5, 75.3541637867477 ], "wc_reply_reviewers_avg": [ 8.5, 8.760707733967616 ], "wc_reply_authors_avg": [ 74.25, 85.58730922280476 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.13245323570650439, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:FdSBYxZDfBwJ:scholar.google.com/&scioq=You+Only+Look+Around:+Learning+Illumination-Invariant+Feature+for+Low-light+Object+Detection&hl=en&as_sdt=0,14", "gs_version_total": 4, "email": "megvii.com;kuaishou.com;megvii.com;uestc.edu.cn;megvii.com", "author_num": 5, "aff_unique_index": "0;1;0;2;0", "aff_unique_norm": "Megvii Technology;Kuaishou Technology;University of Electronic Science and Technology of China", "aff_unique_dep": ";;", "aff_unique_url": "https://www.megvii.com;https://www.kuaishou.com;https://www.uestc.edu.cn", "aff_unique_abbr": "Megvii;Kuaishou;UESTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "WildPPG: A Real-World PPG Dataset of Long Continuous Recordings", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97729", "id": "MojU63gze2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MojU63gze2", "openreview": "https://openreview.net/forum?id=MojU63gze2", "poster": "/media/PosterPDFs/NeurIPS%202024/97729.png?t=1731039439.9789164", "project": "", "author_site": "Manuel Meier, Berken Utku Demirel, Christian Holz", "tldr": "", "abstract": "Reflective photoplethysmography (PPG) has become the default sensing technique in wearable devices to monitor cardiac activity via a person\u2019s heart rate (HR). However, PPG-based HR estimates can be substantially impacted by factors such as the wearer\u2019s activities, sensor placement and resulting motion artifacts, as well as environmental characteristics such as temperature and ambient light. These and other factors can significantly impact and decrease HR prediction reliability. In this paper, we show that state-of-the-art HR estimation methods struggle when processing representative data from everyday activities in outdoor environments, likely because they rely on existing datasets that captured controlled conditions. We introduce a novel multimodal dataset and benchmark results for continuous PPG recordings during outdoor activities from 16 participants over 13.5 hours, captured from four wearable sensors, each worn at a different location on the body, totaling 216 hours. Our recordings include accelerometer, temperature, and altitude data, as well as a synchronized Lead I-based electrocardiogram for ground-truth HR references. Participants completed a round trip from Zurich to Jungfraujoch, a tall mountain in Switzerland over the course of one day. The trip included outdoor and indoor activities such as walking, hiking, stair climbing, eating, drinking, and resting at various temperatures and altitudes (up to 3,571 m above sea level) as well as using cars, trains, cable cars, and lifts for transport\u2014all of which impacted participants\u2019 physiological dynamics. We also present a novel method that estimates HR values more robustly in such real-world scenarios than existing baselines.\n\nDataset & code for HR estimation: https://siplab.org/projects/WildPPG", "keywords": "Photoplethysmography;Dataset;Heart Rate Monitoring;multimodal", "primary_area": "", "supplementary_material": "/attachment/27fa79055e90f46173b565814b8018fba9c7522a.zip", "author": "Manuel Meier;Berken Utku Demirel;Christian Holz", "authorids": "~Manuel_Meier1;~Berken_Utku_Demirel2;~Christian_Holz1", "gender": "M;M;M", "homepage": ";https://berken-demirel.github.io/BerkenUtku-Demirel/;https://siplab.org", "dblp": "292/0054;283/8117;79/7439-1", "google_scholar": "L6f-xg0AAAAJ;https://scholar.google.ch/citations?user=zbgxpdIAAAAJ;OfXP9jMAAAAJ", "orcid": "0000-0001-6593-7695;;0000-0001-9655-9519", "linkedin": ";;", "or_profile": "~Manuel_Meier1;~Berken_Utku_Demirel2;~Christian_Holz1", "aff": "Department of Computer Science, ETHZ - ETH Zurich;Department of Computer Science, ETHZ - ETH Zurich;ETH Zurich", "aff_domain": "inf.ethz.ch;inf.ethz.ch;inf.ethz.ch", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nmeier2024wildppg,\ntitle={Wild{PPG}: A Real-World {PPG} Dataset of Long Continuous Recordings},\nauthor={Manuel Meier and Berken Utku Demirel and Christian Holz},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=MojU63gze2}\n}", "github": "", "reviewers": "6H9n;nZ7a;ZUNz;gCKp", "pdf_size": 4894102, "rating": "6;7;7;9", "confidence": "4;4;4;5", "wc_summary_and_contributions": "60;87;20;53", "wc_strengths": "41;95;12;21", "wc_improvement": "57;180;51;60", "wc_limitations": "16;66;29;15", "wc_correctness": "1;28;20;4", "wc_clarity": "1;25;1;1", "wc_relation_to_prior_work": "15;32;1;13", "wc_documentation": "14;28;1;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "206;542;136;169", "wc_reply_reviewers": "10;228;60;14", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;2;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 7.25, 1.0897247358851685 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 55.0, 23.86419912756345 ], "wc_strengths_avg": [ 42.25, 32.21315724979468 ], "wc_improvement_avg": [ 87.0, 53.791263231123324 ], "wc_limitations_avg": [ 31.5, 20.670026608594387 ], "wc_correctness_avg": [ 13.25, 11.166355717063647 ], "wc_clarity_avg": [ 7.0, 10.392304845413264 ], "wc_relation_to_prior_work_avg": [ 15.25, 11.053845484717073 ], "wc_documentation_avg": [ 11.0, 11.157956802210698 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 263.25, 162.8302413558366 ], "wc_reply_reviewers_avg": [ 78.0, 88.80315309717331 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.9271726499455306, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:zLzS8TVnUrIJ:scholar.google.com/&scioq=WildPPG:+A+Real-World+PPG+Dataset+of+Long+Continuous+Recordings&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "inf.ethz.ch;inf.ethz.ch;inf.ethz.ch", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "ETH Zurich", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.ethz.ch", "aff_unique_abbr": "ETHZ", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Zurich;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Switzerland" }, { "title": "CLIPCEIL: Domain Generalization through CLIP via Channel rEfinement and Image-text aLignment", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95489", "id": "MqeCU0tXAY", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MqeCU0tXAY", "openreview": "https://openreview.net/forum?id=MqeCU0tXAY", "poster": "/media/PosterPDFs/NeurIPS%202024/95489.png?t=1731644361.2689402", "project": "", "author_site": "Xi Yu, Shinjae Yoo, Yuewei Lin", "tldr": "", "abstract": "Domain generalization (DG) is a fundamental yet challenging topic in machine learning. Recently, the remarkable zero-shot capabilities of the large pre-trained vision-language model (e.g., CLIP) have made it popular for various downstream tasks. However, the effectiveness of this capacity often degrades when there are shifts in data distribution during testing compared to the training data. In this paper, we propose a novel method, known as CLIPCEIL, a model that utilizes Channel rEfinement and Image-text aLignment to facilitate the CLIP to the inaccessible $\\textit{out-of-distribution}$ test datasets that exhibit domain shifts. Specifically, we refine the feature channels in the visual domain to ensure they contain domain-invariant and class-relevant features by using a lightweight adapter. This is achieved by minimizing the inter-domain variance while maximizing the inter-class variance. In the meantime, we ensure the image-text alignment by aligning text embeddings of the class descriptions and their corresponding image embedding while further removing the domain-specific features. Moreover, our model integrates multi-scale CLIP features by utilizing a self-attention fusion module, technically implemented through one Transformer layer. Extensive experiments on five widely used benchmark datasets demonstrate that CLIPCEIL outperforms the existing state-of-the-art methods. The source code is available at \\url{https://github.com/yuxi120407/CLIPCEIL}.", "keywords": "Domain generalization;CLIP;domain invariant;image-text alignment", "primary_area": "machine_vision", "supplementary_material": "", "author": "Xi Yu;Shinjae Yoo;Yuewei Lin", "authorids": "~Xi_Yu1;~Shinjae_Yoo1;~Yuewei_Lin1", "gender": "M;M;M", "homepage": "https://www.bnl.gov/staff/xyu1;;https://ywlincq.github.io/", "dblp": ";69/1062;41/1100", "google_scholar": ";https://scholar.google.com/citations?hl=en;wOFhljYAAAAJ", "orcid": "0000-0002-2029-1680;;", "linkedin": ";;", "or_profile": "~Xi_Yu1;~Shinjae_Yoo1;~Yuewei_Lin1", "aff": "Brookhaven National Laboratory;Brookhaven National Lab;Brookhaven national laboratory", "aff_domain": "bnl.gov;bnl.gov;bnl.gov", "position": "Postdoc;Scientist;Scientist", "bibtex": "@inproceedings{\nyu2024clipceil,\ntitle={{CLIPCEIL}: Domain Generalization through {CLIP} via Channel rEfinement and Image-text aLignment},\nauthor={Xi Yu and Shinjae Yoo and Yuewei Lin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=MqeCU0tXAY}\n}", "github": "", "reviewers": "dJc5;cn9a;wrXY;k7YB", "pdf_size": 1736824, "rating": "4;5;6;6", "confidence": "3;4;4;5", "soundness": "2;2;4;3", "novelty": "1;3;3;3", "presentation": "2;3;4;3", "wc_summary": "77;97;48;132", "wc_strengths": "16;24;62;89", "wc_weaknesses": "116;117;99;550", "wc_questions": "151;17;82;101", "wc_limitations": "3;4;39;10", "wc_review": "363;259;330;882", "wc_reply_reviewers": "316;362;52;389", "wc_reply_authors": "825;1088;61;341", "reply_reviewers": "2;3;2;1", "reply_authors": "4;4;3;3", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 88.5, 30.56550343115585 ], "wc_strengths_avg": [ 47.75, 29.4819860253681 ], "wc_weaknesses_avg": [ 220.5, 190.37134763403867 ], "wc_questions_avg": [ 87.75, 47.996744681280205 ], "wc_limitations_avg": [ 14.0, 14.679918255903198 ], "wc_review_avg": [ 458.5, 247.37875818267017 ], "wc_reply_reviewers_avg": [ 279.75, 134.05665779811162 ], "wc_reply_authors_avg": [ 578.75, 401.4239498335893 ], "reply_reviewers_avg": [ 2.0, 0.7071067811865476 ], "reply_authors_avg": [ 3.5, 0.5 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8528028654224417, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3567462568535716816&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "bnl.gov;bnl.gov;bnl.gov", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Brookhaven National Laboratory", "aff_unique_dep": "", "aff_unique_url": "https://www.bnl.gov", "aff_unique_abbr": "BNL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Learning in Markov Games with Adaptive Adversaries: Policy Regret, Fundamental Barriers, and Efficient Algorithms", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95488", "id": "Mqx2gquLk0", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Mqx2gquLk0", "openreview": "https://openreview.net/forum?id=Mqx2gquLk0", "poster": "/media/PosterPDFs/NeurIPS%202024/95488.png?t=1733680311.8140976", "project": "", "author_site": "Thanh Nguyen-Tang, Raman Arora", "tldr": "", "abstract": "We study learning in a dynamically evolving environment modeled as a Markov game between a learner and a strategic opponent that can adapt to the learner's strategies. While most existing works in Markov games focus on external regret as the learning objective, external regret becomes inadequate when the adversaries are adaptive. In this work, we focus on \\emph{policy regret} -- a counterfactual notion that aims to compete with the return that would have been attained if the learner had followed the best fixed sequence of policy, in hindsight. We show that if the opponent has unbounded memory or if it is non-stationary, then sample-efficient learning is not possible. For memory-bounded and stationary, we show that learning is still statistically hard if the set of feasible strategies for the learner is exponentially large. To guarantee learnability, we introduce a new notion of \\emph{consistent} adaptive adversaries, wherein, the adversary responds similarly to similar strategies of the learner. We provide algorithms that achieve $\\sqrt{T}$ policy regret against memory-bounded, stationary, and consistent adversaries.", "keywords": "multi-agent reinforcement learning;policy regret", "primary_area": "learning_theory", "supplementary_material": "", "author": "Thanh Nguyen-Tang;Raman Arora", "authorids": "~Thanh_Nguyen-Tang1;~Raman_Arora1", "gender": "M;M", "homepage": "http://www.cs.jhu.edu/~raman/Home.html;https://thanhnguyentang.github.io/", "dblp": ";287/5102.html", "google_scholar": "Spe0xdkAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0002-1917-2190", "linkedin": ";thanhnguyentang/", "or_profile": "~Raman_Arora1;~Thanh_Tang_Nguyen2", "aff": "Johns Hopkins University;Johns Hopkins University", "aff_domain": "jhu.edu;jhu.edu", "position": "Associate Professor;Postdoc", "bibtex": "@inproceedings{\nnguyen-tang2024learning,\ntitle={Learning in Markov Games with Adaptive Adversaries: Policy Regret, Fundamental Barriers, and Efficient Algorithms},\nauthor={Thanh Nguyen-Tang and Raman Arora},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Mqx2gquLk0}\n}", "github": "", "reviewers": "qNT8;yS7W;Gp14;1MKG", "pdf_size": 467204, "rating": "5;6;6;7", "confidence": "2;3;2;3", "soundness": "3;4;3;4", "novelty": "3;2;3;3", "presentation": "2;4;3;3", "wc_summary": "113;108;116;66", "wc_strengths": "40;86;46;80", "wc_weaknesses": "183;321;18;13", "wc_questions": "11;2;30;74", "wc_limitations": "1;1;6;1", "wc_review": "348;518;216;234", "wc_reply_reviewers": "26;9;0;9", "wc_reply_authors": "75;5;0;5", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;1;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 2.5, 0.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 100.75, 20.26542622300355 ], "wc_strengths_avg": [ 63.0, 20.223748416156685 ], "wc_weaknesses_avg": [ 133.75, 127.93235517256767 ], "wc_questions_avg": [ 29.25, 27.74324242045259 ], "wc_limitations_avg": [ 2.25, 2.165063509461097 ], "wc_review_avg": [ 329.0, 120.28715642162301 ], "wc_reply_reviewers_avg": [ 11.0, 9.40744386111339 ], "wc_reply_authors_avg": [ 21.25, 31.09963826156182 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.7071067811865476, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12401636441287196266&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "jhu.edu;jhu.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Johns Hopkins University", "aff_unique_dep": "", "aff_unique_url": "https://www.jhu.edu", "aff_unique_abbr": "JHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Beyond Slow Signs in High-fidelity Model Extraction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95487", "id": "Mrs9a1XQAp", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Mrs9a1XQAp", "openreview": "https://openreview.net/forum?id=Mrs9a1XQAp", "poster": "", "project": "", "author_site": "Hanna Foerster, Robert Mullins, I Shumailov, Jamie Hayes", "tldr": "", "abstract": "Deep neural networks, costly to train and rich in intellectual property value, are\nincreasingly threatened by model extraction attacks that compromise their confiden-\ntiality. Previous attacks have succeeded in reverse-engineering model parameters\nup to a precision of float64 for models trained on random data with at most three\nhidden layers using cryptanalytical techniques. However, the process was identified\nto be very time consuming and not feasible for larger and deeper models trained on\nstandard benchmarks. Our study evaluates the feasibility of parameter extraction\nmethods of Carlini et al. [1] further enhanced by Canales-Mart\u00ednez et al. [2] for\nmodels trained on standard benchmarks. We introduce a unified codebase that\nintegrates previous methods and reveal that computational tools can significantly\ninfluence performance. We develop further optimisations to the end-to-end attack\nand improve the efficiency of extracting weight signs by up to 14.8 times com-\npared to former methods through the identification of easier and harder to extract\nneurons. Contrary to prior assumptions, we identify extraction of weights, not\nextraction of weight signs, as the critical bottleneck. With our improvements, a\n16,721 parameter model with 2 hidden layers trained on MNIST is extracted within\nonly 98 minutes compared to at least 150 minutes previously. Finally, addressing\nmethodological deficiencies observed in previous studies, we propose new ways of\nrobust benchmarking for future model extraction attacks.", "keywords": "model extraction;cryptanalytic extraction", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Hanna Foerster;Robert D. Mullins;Ilia Shumailov;Jamie Hayes", "authorids": "~Hanna_Foerster1;~Robert_D._Mullins1;~Ilia_Shumailov1;~Jamie_Hayes2", "gender": "F;M;Unspecified;M", "homepage": "https://www.cst.cam.ac.uk/people/hf390;;https://www.cl.cam.ac.uk/~is410/;https://www.csat.cam.ac.uk/~rdm34", "dblp": "380/0224.html;;213/8587;31/789", "google_scholar": "-30Md_sAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.co.uk/citations?hl=en;zjXO2HMAAAAJ", "orcid": "0009-0000-9019-4061;;;", "linkedin": "hannafoerster/;;ilia-shumailov/;", "or_profile": "~Hanna_Foerster1;~Jamie_Hayes2;~I_Shumailov1;~Robert_Mullins1", "aff": "University of Cambridge;Google DeepMind;Google DeepMind;University of Cambridge", "aff_domain": "cam.ac.uk;google.com;google.com;cam.ac.uk", "position": "MS student;Researcher;Research Scientist;Full Professor", "bibtex": "@inproceedings{\nfoerster2024beyond,\ntitle={Beyond Slow Signs in High-fidelity Model Extraction},\nauthor={Hanna Foerster and Robert D. Mullins and Ilia Shumailov and Jamie Hayes},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Mrs9a1XQAp}\n}", "github": "", "reviewers": "PoZg;aZHM;yp6m;r46i", "pdf_size": 414813, "rating": "5;6;7;7", "confidence": "2;2;4;3", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;2;3;3", "wc_summary": "58;76;115;59", "wc_strengths": "14;66;35;94", "wc_weaknesses": "103;79;44;178", "wc_questions": "2;6;113;125", "wc_limitations": "2;6;37;2", "wc_review": "179;233;344;458", "wc_reply_reviewers": "313;0;58;0", "wc_reply_authors": "489;0;17;0", "reply_reviewers": "2;0;2;0", "reply_authors": "4;1;2;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 77.0, 23.075961518428652 ], "wc_strengths_avg": [ 52.25, 30.384000724065288 ], "wc_weaknesses_avg": [ 101.0, 49.15790882452182 ], "wc_questions_avg": [ 61.5, 57.673650829473246 ], "wc_limitations_avg": [ 11.75, 14.669270602180601 ], "wc_review_avg": [ 303.5, 107.2159036710506 ], "wc_reply_reviewers_avg": [ 92.75, 129.3471588400766 ], "wc_reply_authors_avg": [ 126.5, 209.40451284535393 ], "reply_reviewers_avg": [ 1.0, 1.0 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8181818181818182, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:oEIOoVsbGkIJ:scholar.google.com/&scioq=Beyond+Slow+Signs+in+High-fidelity+Model+Extraction&hl=en&as_sdt=0,44", "gs_version_total": 3, "email": "cam.ac.uk;google.com;google.com;cam.ac.uk", "author_num": 4, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "University of Cambridge;Google", "aff_unique_dep": ";Google DeepMind", "aff_unique_url": "https://www.cam.ac.uk;https://deepmind.com", "aff_unique_abbr": "Cambridge;DeepMind", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "The State of Data Curation at NeurIPS: An Assessment of Dataset Development Practices in the Datasets and Benchmarks Track", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97728", "id": "MsCSn0rlpP", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MsCSn0rlpP", "openreview": "https://openreview.net/forum?id=MsCSn0rlpP", "poster": "/media/PosterPDFs/NeurIPS%202024/97728.png?t=1732213968.3113282", "project": "", "author_site": "Eshta Bhardwaj, Harshit Gujral, Siyi Wu, Ciara Zogheib, Tegan Maharaj, Christoph Becker", "tldr": "", "abstract": "Data curation is a field with origins in librarianship and archives, whose scholarship and thinking on data issues go back centuries, if not millennia. The field of machine learning is increasingly observing the importance of data curation to the advancement of both applications and fundamental understanding of machine learning models -- evidenced not least by the creation of the Datasets and Benchmarks track itself. This work provides an analysis of recent dataset development practices at NeurIPS through the lens of data curation. We present an evaluation framework for dataset documentation, consisting of a rubric and toolkit developed through a thorough literature review of data curation principles. We use the framework to systematically assess the strengths and weaknesses in current dataset development practices of 60 datasets published in the NeurIPS Datasets and Benchmarks track from 2021-2023. We summarize key findings and trends. Results indicate greater need for documentation about environmental footprint, ethical considerations, and data management. We suggest targeted strategies and resources to improve documentation in these areas and provide recommendations for the NeurIPS peer-review process that prioritize rigorous data curation in ML. We also provide guidelines for dataset developers on the use of our rubric as a standalone tool. Finally, we provide results in the format of a dataset that showcases aspects of recommended data curation practices. Our rubric and results are of interest for improving data curation practices broadly in the field of ML as well as to data curation and science and technology studies scholars studying practices in ML. Our aim is to support continued improvement in interdisciplinary research on dataset practices, ultimately improving the reusability and reproducibility of new datasets and benchmarks, enabling standardized and informed human oversight, and strengthening the foundation of rigorous and responsible ML research.", "keywords": "Data curation;datasets;documentation;data practices;transparency;accountability", "primary_area": "", "supplementary_material": "/attachment/b99153c4c111488498c82c4cd90d13afbdbc8f0a.pdf", "author": "Eshta Bhardwaj;Harshit Gujral;Siyi Wu;Ciara Zogheib;Tegan Maharaj;Christoph Becker", "authorids": "~Eshta_Bhardwaj1;~Harshit_Gujral1;~Siyi_Wu1;~Ciara_Zogheib1;~Tegan_Maharaj1;~Christoph_Becker1", "gender": "F;M;F;;F;M", "homepage": ";https://www.cs.toronto.edu/~harshit/;https://reynawu541.github.io/;;http://teganmaharaj.com;https://www.christoph-becker.info/", "dblp": ";;;;;", "google_scholar": ";https://scholar.google.co.in/citations?user=MeEmmxgAAAAJ;;https://scholar.google.ca/citations?hl=en;https://scholar.google.ca/citations?user=XpscC-EAAAAJ;", "orcid": "0000-0001-8523-5201;;;;;0000-0002-8364-0593", "linkedin": ";;;;;", "or_profile": "~Eshta_Bhardwaj1;~Harshit_Gujral1;~Siyi_Wu1;~Ciara_Zogheib1;~Tegan_Maharaj1;~Christoph_Becker1", "aff": "University of Toronto;Department of Computer Science, University of Toronto;University of Toronto;University of Toronto;Ecole Polytechnique de Montreal;University of Toronto", "aff_domain": "utoronto.ca;cs.toronto.edu;utoronto.ca;utoronto.ca;polymtl.ca;utoronto.ca", "position": "PhD student;PhD student;PhD student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nbhardwaj2024the,\ntitle={The State of Data Curation at Neur{IPS}: An Assessment of Dataset Development Practices in the Datasets and Benchmarks Track},\nauthor={Eshta Bhardwaj and Harshit Gujral and Siyi Wu and Ciara Zogheib and Tegan Maharaj and Christoph Becker},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=MsCSn0rlpP}\n}", "github": "", "reviewers": "qURt;trge;wK8h;MJgK", "pdf_size": 1000239, "rating": "6;6;7;8", "confidence": "3;3;3;4", "wc_summary_and_contributions": "98;58;77;304", "wc_strengths": "41;97;181;56", "wc_improvement": "51;743;273;49", "wc_limitations": "68;24;11;56", "wc_correctness": "31;16;19;11", "wc_clarity": "6;12;5;12", "wc_relation_to_prior_work": "30;51;20;54", "wc_documentation": "8;12;32;29", "wc_additional_feedback": "1;1;1;1", "wc_review": "334;1014;619;572", "wc_reply_reviewers": "0;38;0;42", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 134.25, 99.02114673139269 ], "wc_strengths_avg": [ 93.75, 54.38462558481027 ], "wc_improvement_avg": [ 279.0, 282.93815578673724 ], "wc_limitations_avg": [ 39.75, 23.112496619794236 ], "wc_correctness_avg": [ 19.25, 7.361215932167728 ], "wc_clarity_avg": [ 8.75, 3.2691742076555053 ], "wc_relation_to_prior_work_avg": [ 38.75, 14.236836024903848 ], "wc_documentation_avg": [ 20.25, 10.40132203135736 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 634.75, 244.16528725435154 ], "wc_reply_reviewers_avg": [ 20.0, 20.049937655763422 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:qXCfxMK1EgQJ:scholar.google.com/&scioq=The+State+of+Data+Curation+at+NeurIPS:+An+Assessment+of+Dataset+Development+Practices+in+the+Datasets+and+Benchmarks+Track&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "utoronto.ca;cs.toronto.edu;utoronto.ca;utoronto.ca;polymtl.ca;utoronto.ca", "author_num": 6, "aff_unique_index": "0;0;0;0;1;0", "aff_unique_norm": "University of Toronto;Ecole Polytechnique de Montreal", "aff_unique_dep": ";", "aff_unique_url": "https://www.utoronto.ca;https://www.polymtl.ca", "aff_unique_abbr": "U of T;Polytechnique Montreal", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Toronto;Montreal", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "Canada" }, { "title": "A Study of Plasticity Loss in On-Policy Deep Reinforcement Learning", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95486", "id": "MsUf8kpKTF", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MsUf8kpKTF", "openreview": "https://openreview.net/forum?id=MsUf8kpKTF", "poster": "", "project": "", "author_site": "Arthur Juliani, Jordan Ash", "tldr": "", "abstract": "Continual learning with deep neural networks presents challenges distinct from both the fixed-dataset and convex continual learning regimes. One such challenge is plasticity loss, wherein a neural network trained in an online fashion displays a degraded ability to fit new tasks. This problem has been extensively studied in both supervised learning and off-policy reinforcement learning (RL), where a number of remedies have been proposed. Still, plasticity loss has received less attention in the on-policy deep RL setting. Here we perform an extensive set of experiments examining plasticity loss and a variety of mitigation methods in on-policy deep RL. We demonstrate that plasticity loss is pervasive under domain shift in this regime, and that a number of methods developed to resolve it in other settings fail, sometimes even performing worse than applying no intervention at all. In contrast, we find that a class of ``regenerative'' methods are able to consistently mitigate plasticity loss in a variety of contexts, including in gridworld tasks and more challenging environments like Montezuma's Revenge and ProcGen.", "keywords": "plasticity loss;reinforcement learning;regularization;continual learning;optimization", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Arthur Juliani;Jordan T. Ash", "authorids": "~Arthur_Juliani1;~Jordan_T._Ash1", "gender": "M;", "homepage": ";http://www.jordantash.com", "dblp": ";176/5225", "google_scholar": "lzxv-hQAAAAJ;bmRNH-UAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Arthur_Juliani1;~Jordan_T._Ash1", "aff": "Microsoft Research;Microsoft Research", "aff_domain": "microsoft.com;research.microsoft.com", "position": "Postdoc;Postdoc", "bibtex": "@inproceedings{\njuliani2024a,\ntitle={A Study of Plasticity Loss in On-Policy Deep Reinforcement Learning},\nauthor={Arthur Juliani and Jordan T. Ash},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=MsUf8kpKTF}\n}", "github": "", "reviewers": "NjbE;gfpF;UXgL", "pdf_size": 15076523, "rating": "6;7;7", "confidence": "3;4;4", "soundness": "2;3;3", "novelty": "3;3;3", "presentation": "3;4;3", "wc_summary": "126;118;67", "wc_strengths": "64;147;94", "wc_weaknesses": "133;83;42", "wc_questions": "152;124;52", "wc_limitations": "1;23;6", "wc_review": "476;495;261", "wc_reply_reviewers": "126;57;17", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 103.66666666666667, 26.132142830026183 ], "wc_strengths_avg": [ 101.66666666666667, 34.31552936434983 ], "wc_weaknesses_avg": [ 86.0, 37.2111094522411 ], "wc_questions_avg": [ 109.33333333333333, 42.1215173304835 ], "wc_limitations_avg": [ 10.0, 9.41629792788369 ], "wc_review_avg": [ 410.6666666666667, 106.11419425421946 ], "wc_reply_reviewers_avg": [ 66.66666666666667, 45.02098276236192 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.9999999999999997, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14502349751452088342&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "microsoft.com;research.microsoft.com", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Microsoft", "aff_unique_dep": "Microsoft Research", "aff_unique_url": "https://www.microsoft.com/en-us/research", "aff_unique_abbr": "MSR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "LRM-Zero: Training Large Reconstruction Models with Synthesized Data", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95485", "id": "MtRvzJBsBA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MtRvzJBsBA", "openreview": "https://openreview.net/forum?id=MtRvzJBsBA", "poster": "/media/PosterPDFs/NeurIPS%202024/95485.png?t=1733731875.3200846", "project": "", "author_site": "Desai Xie, Sai Bi, Zhixin Shu, Kai Zhang, Zexiang Xu, Yi Zhou, Soeren Pirk, Arie Kaufman, Xin Sun, Hao Tan", "tldr": "", "abstract": "We present LRM-Zero, a Large Reconstruction Model (LRM) trained entirely on synthesized 3D data, achieving high-quality sparse-view 3D reconstruction. The core of LRM-Zero is our procedural 3D dataset, Zeroverse, which is automatically synthesized from simple primitive shapes with random texturing and augmentations (e.g., height fields, boolean differences, and wireframes). Unlike previous 3D datasets (e.g., Objaverse) which are often captured or crafted by humans to approximate real 3D data, Zeroverse completely ignores realistic global semantics but is rich in complex geometric and texture details that are locally similar to or even more intricate than real objects. We demonstrate that our LRM-Zero, trained with our fully synthesized Zeroverse, can achieve high visual quality in the reconstruction of real-world objects, competitive with models trained on Objaverse. We also analyze several critical design choices of Zeroverse that contribute to LRM-Zero's capability and training stability. Our work demonstrates that 3D reconstruction, one of the core tasks in 3D vision, can potentially be addressed without the semantics of real-world objects. The Zeroverse's procedural synthesis code and interactive visualization are available at: https://desaixie.github.io/lrm-zero/.", "keywords": "3D Reconstruction;Transformer;Pre-training;Synthetic Data", "primary_area": "machine_vision", "supplementary_material": "", "author": "Desai Xie;Sai Bi;Zhixin Shu;Kai Zhang;Zexiang Xu;Yi Zhou;Soren Pirk;Arie Kaufman;Xin Sun;Hao Tan", "authorids": "~Desai_Xie1;~Sai_Bi1;~Zhixin_Shu1;~Kai_Zhang7;~Zexiang_Xu1;~Yi_Zhou1;~Soren_Pirk1;~Arie_Kaufman1;~Xin_Sun11;~Hao_Tan1", "gender": "M;M;M;M;M;F;Not Specified;;M;M", "homepage": "https://desaixie.github.io/;https://sai-bi.github.io/;https://zhixinshu.github.io/;https://kai-46.github.io/website/;https://cseweb.ucsd.edu/~zex014/;http://zhouyisjtu.github.io;http://www.pirk.io;;https://www.sunxin.name/;http://www.cs.unc.edu/~airsplay/", "dblp": "365/8156;165/9898;129/3987;55/957-45;154/0366;01/1901-23;79/9280;;;94/877-2", "google_scholar": "X4rEjA4AAAAJ;-q4nE1kAAAAJ;gp6HUP0AAAAJ;6B7FPMoAAAAJ;_RRIYvEAAAAJ;https://scholar.google.com/citations?hl=en;X9AjIugAAAAJ;;m3kAqRQAAAAJ;OV1Y3FUAAAAJ", "orcid": "0009-0008-9023-552X;;;;;0000-0002-2632-7664;0000-0003-1937-9797;;0000-0002-8710-2645;", "linkedin": "desai-xie/;;;;;;;;;hao-tan-23677180/", "or_profile": "~Desai_Xie1;~Sai_Bi1;~Zhixin_Shu1;~Kai_Zhang7;~Zexiang_Xu1;~Yi_Zhou1;~Soren_Pirk1;~Arie_Kaufman1;~Xin_Sun11;~Hao_Tan1", "aff": "Adobe Systems;Adobe Systems;Adobe Systems;Adobe Systems;Adobe Research;Adobe Systems;Kiel University;;Adobe Systems;Adobe Systems", "aff_domain": "adobe.com;adobe.com;adobe.com;adobe.com;adobe.com;adobe.com;uni-kiel.de;;adobe.com;adobe.com", "position": "Intern;Researcher;Researcher;Researcher;Researcher;Research Scientist;Full Professor;;Researcher;Research Scientist", "bibtex": "@inproceedings{\nxie2024lrmzero,\ntitle={{LRM}-Zero: Training Large Reconstruction Models with Synthesized Data},\nauthor={Desai Xie and Sai Bi and Zhixin Shu and Kai Zhang and Zexiang Xu and Yi Zhou and Soren Pirk and Arie Kaufman and Xin Sun and Hao Tan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=MtRvzJBsBA}\n}", "github": "", "reviewers": "xM78;3hvP;NfXg;CePT", "pdf_size": 10888843, "rating": "5;6;6;7", "confidence": "4;4;4;4", "soundness": "3;3;3;4", "novelty": "2;3;3;3", "presentation": "3;3;3;4", "wc_summary": "123;97;148;81", "wc_strengths": "74;87;60;109", "wc_weaknesses": "182;69;175;153", "wc_questions": "4;218;106;56", "wc_limitations": "9;17;45;8", "wc_review": "392;488;534;407", "wc_reply_reviewers": "25;156;0;24", "wc_reply_authors": "0;282;32;32", "reply_reviewers": "1;2;0;1", "reply_authors": "1;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 112.25, 25.508576988926684 ], "wc_strengths_avg": [ 82.5, 18.034688796871432 ], "wc_weaknesses_avg": [ 144.75, 45.02429899509819 ], "wc_questions_avg": [ 96.0, 79.13279977354523 ], "wc_limitations_avg": [ 19.75, 14.989579713921268 ], "wc_review_avg": [ 455.25, 58.31541391433315 ], "wc_reply_reviewers_avg": [ 51.25, 61.29997960847948 ], "wc_reply_authors_avg": [ 86.5, 113.62548129711047 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14672888019815864492&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "adobe.com;adobe.com;adobe.com;adobe.com;adobe.com;adobe.com;uni-kiel.de;;adobe.com;adobe.com", "author_num": 10, "aff_unique_index": "0;0;0;0;0;0;1;0;0", "aff_unique_norm": "Adobe;Kiel University", "aff_unique_dep": "Adobe Systems Incorporated;", "aff_unique_url": "https://www.adobe.com;https://www.uni-kiel.de", "aff_unique_abbr": "Adobe;CAU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;1;0;0", "aff_country_unique": "United States;Germany" }, { "title": "A Unifying Post-Processing Framework for Multi-Objective Learn-to-Defer Problems", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95484", "id": "Mtsi1eDdbH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Mtsi1eDdbH", "openreview": "https://openreview.net/forum?id=Mtsi1eDdbH", "poster": "", "project": "", "author_site": "Mohammad-Amin Charusaie, Samira Samadi", "tldr": "", "abstract": "Learn-to-Defer is a paradigm that enables learning algorithms to work not in isolation but as a team with human experts. In this paradigm, we permit the system to defer a subset of its tasks to the expert. Although there are currently systems that follow this paradigm and are designed to optimize the accuracy of the final human-AI team, the general methodology for developing such systems under a set of constraints (e.g., algorithmic fairness, expert intervention budget, defer of anomaly, etc.) remains largely unexplored. In this paper, using a d-dimensional generalization to the fundamental lemma of Neyman and Pearson (d-GNP), we obtain the Bayes optimal solution for learn-to-defer systems under various constraints. Furthermore, we design a generalizable algorithm to estimate that solution and apply this algorithm to the COMPAS, Hatespeech, and ACSIncome datasets. Our algorithm shows improvements in terms of constraint violation over a set of learn-to-defer baselines and can control multiple constraint violations at once. The use of d-GNP is beyond learn-to-defer applications and can potentially obtain a solution to decision-making problems with a set of controlled expected performance measures.", "keywords": "Learn-to-Defer;Human-AI Teaming;Neyman-Pearson Lemma;Hypothesis Testing;Bayes Optimal;PAC Generalization", "primary_area": "learning_theory", "supplementary_material": "", "author": "Mohammad-Amin Charusaie;Samira Samadi", "authorids": "~Mohammad-Amin_Charusaie1;~Samira_Samadi1", "gender": "F;", "homepage": "http://www.samirasamadi.com;http://Charusaie.github.io", "dblp": "https://dblp.uni-trier.de/pers/hd/s/Samadi:Samira;", "google_scholar": "s8xc2K4AAAAJ;", "orcid": ";", "linkedin": "samira-samadi-200662108/;", "or_profile": "~Samira_Samadi1;~Amin_Charusaie1", "aff": "Max Planck Institute for Intelligent Systems, Max-Planck Institute;Max-Planck-Institute for Intelligent Systems, Max-Planck Institute", "aff_domain": "tuebingen.mpg.de;is.mpg.de", "position": "Research Group Leader;PhD student", "bibtex": "@inproceedings{\ncharusaie2024a,\ntitle={A Unifying Post-Processing Framework for Multi-Objective Learn-to-Defer Problems},\nauthor={Mohammad-Amin Charusaie and Samira Samadi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Mtsi1eDdbH}\n}", "github": "", "reviewers": "4QNG;Zrm7;7rh1;xkzY", "pdf_size": 1227940, "rating": "4;5;6;7", "confidence": "5;3;3;4", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "3;1;3;3", "wc_summary": "64;74;56;78", "wc_strengths": "39;27;47;85", "wc_weaknesses": "100;121;36;98", "wc_questions": "102;46;137;89", "wc_limitations": "1;2;1;4", "wc_review": "306;270;277;354", "wc_reply_reviewers": "412;52;13;27", "wc_reply_authors": "1483;43;0;0", "reply_reviewers": "2;1;1;1", "reply_authors": "4;2;1;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 68.0, 8.602325267042627 ], "wc_strengths_avg": [ 49.5, 21.696773953747133 ], "wc_weaknesses_avg": [ 88.75, 31.75984099456419 ], "wc_questions_avg": [ 93.5, 32.56148031032987 ], "wc_limitations_avg": [ 2.0, 1.224744871391589 ], "wc_review_avg": [ 301.75, 33.04826016600571 ], "wc_reply_reviewers_avg": [ 126.0, 165.71209973927674 ], "wc_reply_authors_avg": [ 381.5, 636.1935633122988 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.40451991747794525, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15394107700419884135&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 3, "email": "tuebingen.mpg.de;is.mpg.de", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Max Planck Institute for Intelligent Systems;Max-Planck-Institute for Intelligent Systems", "aff_unique_dep": "Intelligent Systems;Intelligent Systems", "aff_unique_url": "https://www.mpi-is.mpg.de;https://www.mpi-is.mpg.de", "aff_unique_abbr": "MPI-IS;MPI-IS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "title": "Data-Efficient Operator Learning via Unsupervised Pretraining and In-Context Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95483", "id": "MuPlJ9fT4b", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MuPlJ9fT4b", "openreview": "https://openreview.net/forum?id=MuPlJ9fT4b", "poster": "/media/PosterPDFs/NeurIPS%202024/95483.png?t=1730093434.4662087", "project": "", "author_site": "Wuyang Chen, Jialin Song, Pu Ren, Shashank Subramanian, Dmitriy Morozov, Michael Mahoney", "tldr": "", "abstract": "Recent years have witnessed the promise of coupling machine learning methods and physical domain-specific insights for solving scientific problems based on partial differential equations (PDEs). However, being data-intensive, these methods still require a large amount of PDE data. This reintroduces the need for expensive numerical PDE solutions, partially undermining the original goal of avoiding these expensive simulations. In this work, seeking data efficiency, we design unsupervised pretraining for PDE operator learning. To reduce the need for training data with heavy simulation costs, we mine unlabeled PDE data without simulated solutions,\nand we pretrain neural operators with physics-inspired reconstruction-based proxy tasks. To improve out-of-distribution performance, we further assist neural operators in flexibly leveraging a similarity-based method that learns in-context examples, without incurring extra training costs or designs. Extensive empirical evaluations on a diverse set of PDEs demonstrate that our method is highly data-efficient, more generalizable, and even outperforms conventional vision-pretrained models. We provide our code at https://github.com/delta-lab-ai/data_efficient_nopt.", "keywords": "scientific machine learning;unsupervised pretraining;neural operators;foundation models", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "/attachment/8e242643b1257c2cb337c06ecaf328d867fea6d9.zip", "author": "Wuyang Chen;Jialin Song;Pu Ren;Shashank Subramanian;Dmitriy Morozov;Michael W. Mahoney", "authorids": "~Wuyang_Chen1;~Jialin_Song3;~Pu_Ren1;~Shashank_Subramanian1;~Dmitriy_Morozov1;~Michael_W._Mahoney1", "gender": ";F;M;;M;", "homepage": ";;https://paulpuren.github.io/;;https://mrzv.org;", "dblp": ";;;;80/5570;", "google_scholar": ";;FiuAyGwAAAAJ;;;", "orcid": ";;0000-0002-6354-385X;;;", "linkedin": ";jialin-song-961492149/;;;;", "or_profile": "~Wuyang_Chen1;~Jialin_Song3;~Pu_Ren1;~Shashank_Subramanian1;~Dmitriy_Morozov1;~Michael_W._Mahoney1", "aff": ";Simon Fraser University;Lawrence Berkeley National Lab;;Lawrence Berkeley National Lab;", "aff_domain": ";sfu.ca;lbl.gov;;lbl.gov;", "position": ";PhD student;Postdoc;;Researcher;", "bibtex": "@inproceedings{\nchen2024dataefficient,\ntitle={Data-Efficient Operator Learning via Unsupervised Pretraining and In-Context Learning},\nauthor={Wuyang Chen and Jialin Song and Pu Ren and Shashank Subramanian and Dmitriy Morozov and Michael W. Mahoney},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=MuPlJ9fT4b}\n}", "github": "", "reviewers": "oEEz;oUb1;u3i6;6vSy", "pdf_size": 8047121, "rating": "5;5;6;6", "confidence": "4;4;4;3", "soundness": "3;2;2;3", "novelty": "2;2;3;2", "presentation": "2;2;3;3", "wc_summary": "108;48;159;42", "wc_strengths": "83;30;161;37", "wc_weaknesses": "118;408;368;44", "wc_questions": "56;28;825;105", "wc_limitations": "5;13;34;21", "wc_review": "370;527;1547;249", "wc_reply_reviewers": "125;464;463;399", "wc_reply_authors": "301;811;1887;688", "reply_reviewers": "1;2;2;3", "reply_authors": "2;5;5;3", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 89.25, 47.82977629050757 ], "wc_strengths_avg": [ 77.75, 52.19853925159209 ], "wc_weaknesses_avg": [ 234.5, 156.35456501170665 ], "wc_questions_avg": [ 253.5, 331.1045907262538 ], "wc_limitations_avg": [ 18.25, 10.709224995301948 ], "wc_review_avg": [ 673.25, 513.9982368646803 ], "wc_reply_reviewers_avg": [ 362.75, 139.76833511207036 ], "wc_reply_authors_avg": [ 921.75, 588.2054806783085 ], "reply_reviewers_avg": [ 2.0, 0.7071067811865476 ], "reply_authors_avg": [ 3.75, 1.299038105676658 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10915750673716448279&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": ";sfu.ca;lbl.gov;;lbl.gov;", "author_num": 6, "aff_unique_index": "0;1;1", "aff_unique_norm": "Simon Fraser University;Lawrence Berkeley National Laboratory", "aff_unique_dep": ";", "aff_unique_url": "https://www.sfu.ca;https://www.lbl.gov", "aff_unique_abbr": "SFU;LBNL", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Canada;United States" }, { "title": "A Concept-Based Explainability Framework for Large Multimodal Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95482", "id": "MvjLRFntW6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MvjLRFntW6", "openreview": "https://openreview.net/forum?id=MvjLRFntW6", "poster": "/media/PosterPDFs/NeurIPS%202024/95482.png?t=1733796092.177381", "project": "", "author_site": "Jayneel Parekh, Pegah KHAYATAN, Mustafa Shukor, Alasdair Newson, Matthieu Cord", "tldr": "", "abstract": "Large multimodal models (LMMs) combine unimodal encoders and large language models (LLMs) to perform multimodal tasks. Despite recent advancements towards the interpretability of these models, understanding internal representations of LMMs remains largely a mystery. In this paper, we present a novel framework for the interpretation of LMMs. We propose a dictionary learning based approach, applied to the representation of tokens. The elements of the learned dictionary correspond to our proposed concepts. We show that these concepts are well semantically grounded in both vision and text. Thus we refer to these as ``multi-modal concepts''. \nWe qualitatively and quantitatively evaluate the results of the learnt concepts. We show that the extracted multimodal concepts are useful to interpret representations of test samples. Finally, we evaluate the disentanglement between different concepts and the quality of grounding concepts visually and textually. Our implementation is publicly available: https://github.com/mshukor/xl-vlms.", "keywords": "Large multimodal models;interpretability;concept extraction;multimodal concepts;multimodal representations;vision-language models;vision-language interpretations", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Jayneel Parekh;Pegah KHAYATAN;Mustafa Shukor;Alasdair Newson;Matthieu Cord", "authorids": "~Jayneel_Parekh1;~Pegah_KHAYATAN1;~Mustafa_Shukor1;~Alasdair_Newson1;~Matthieu_Cord1", "gender": ";F;M;M;M", "homepage": ";https://pegah-kh.github.io/;https://twitter.com/MustafaShukor1;https://sites.google.com/site/alasdairnewson/;https://cord.isir.upmc.fr/", "dblp": ";379/9185;;123/3502;68/3117", "google_scholar": "CF7ncpUAAAAJ;;lhp9mRgAAAAJ;https://scholar.google.fr/citations?user=hQ07GR8AAAAJ;SpAotDcAAAAJ", "orcid": ";0009-0001-9707-4836;;;", "linkedin": "jayneel-parekh/;;;;", "or_profile": "~Jayneel_Parekh1;~Pegah_KHAYATAN1;~Mustafa_Shukor1;~Alasdair_Newson1;~Matthieu_Cord1", "aff": "Sorbonne Universit\u00e9;Valeo;Universit\u00e9 Pierre et Marie Curie - Paris 6, Sorbonne Universit\u00e9 - Facult\u00e9 des Sciences (Paris VI);Sorbonne Universit\u00e9 - Facult\u00e9 des Sciences (Paris VI);Sorbonne Universit\u00e9", "aff_domain": "isir.upmc.fr;valeo.com;isir.upmc.fr;sorbonne-universite.fr;isir.upmc.fr", "position": "Postdoc;Intern;PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nparekh2024a,\ntitle={A Concept-Based Explainability Framework for Large Multimodal Models},\nauthor={Jayneel Parekh and Pegah KHAYATAN and Mustafa Shukor and Alasdair Newson and Matthieu Cord},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=MvjLRFntW6}\n}", "github": "", "reviewers": "XzMc;Go37;afLg;17ab;8A1z", "pdf_size": 18105177, "rating": "4;4;6;7;8", "confidence": "2;4;3;3;4", "soundness": "2;3;3;3;4", "novelty": "2;2;3;3;4", "presentation": "2;3;3;4;4", "wc_summary": "45;53;92;156;65", "wc_strengths": "11;13;132;133;78", "wc_weaknesses": "62;84;107;326;101", "wc_questions": "25;59;76;150;29", "wc_limitations": "14;10;1;51;66", "wc_review": "157;219;408;816;339", "wc_reply_reviewers": "26;0;28;153;30", "wc_reply_authors": "145;19;11;30;6", "reply_reviewers": "1;0;1;1;1", "reply_authors": "3;2;2;2;2", "rating_avg": [ 5.8, 1.6 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 82.2, 40.18656491913684 ], "wc_strengths_avg": [ 73.4, 53.94293280866364 ], "wc_weaknesses_avg": [ 136.0, 96.2766846126309 ], "wc_questions_avg": [ 67.8, 45.25218226781997 ], "wc_limitations_avg": [ 28.4, 25.381883302859936 ], "wc_review_avg": [ 387.8, 231.47647828667172 ], "wc_reply_reviewers_avg": [ 47.4, 53.91697320881431 ], "wc_reply_authors_avg": [ 42.2, 52.03614128660964 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.36748420762958356, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11503076700482223904&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "isir.upmc.fr;valeo.com;isir.upmc.fr;sorbonne-universite.fr;isir.upmc.fr", "author_num": 5, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "Sorbonne Universit\u00e9;Valeo;Universit\u00e9 Pierre et Marie Curie - Paris 6", "aff_unique_dep": ";;Facult\u00e9 des Sciences", "aff_unique_url": "https://www.sorbonne-universite.fr;https://www.valeo.com;https://www.upmc.fr", "aff_unique_abbr": "Sorbonne U;;UPMC", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Paris;Paris VI", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "France" }, { "title": "Generating compositional scenes via Text-to-image RGBA Instance Generation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95481", "id": "MwFeh4RqvA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MwFeh4RqvA", "openreview": "https://openreview.net/forum?id=MwFeh4RqvA", "poster": "/media/PosterPDFs/NeurIPS%202024/95481.png?t=1731670290.748117", "project": "", "author_site": "Alessandro Fontanella, Petru-Daniel Tudosiu, Yongxin Yang, Shifeng Zhang, Sarah Parisot", "tldr": "", "abstract": "Text-to-image diffusion generative models can generate high quality images at the cost of tedious prompt engineering. Controllability can be improved by introducing layout conditioning, however existing methods lack layout editing ability and fine-grained control over object attributes. The concept of multi-layer generation holds great potential to address these limitations, however generating image instances concurrently to scene composition limits control over fine-grained object attributes, relative positioning in 3D space and scene manipulation abilities. In this work, we propose a novel multi-stage generation paradigm that is designed for fine-grained control, flexibility and interactivity. To ensure control over instance attributes, we devise a novel training paradigm to adapt a diffusion model to generate isolated scene components as RGBA images with transparency information. To build complex images, we employ these pre-generated instances and introduce a multi-layer composite generation process that smoothly assembles components in realistic scenes. Our experiments show that our RGBA diffusion model is capable of generating diverse and high quality instances with precise control over object attributes. Through multi-layer composition, we demonstrate that our approach allows to build and manipulate images from highly complex prompts with fine-grained control over object appearance and location, granting a higher degree of control than competing methods.", "keywords": "RGBA generation;scene composition;diffusion models", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Alessandro Fontanella;Petru-Daniel Tudosiu;Yongxin Yang;Shifeng Zhang;Sarah Parisot", "authorids": "~Alessandro_Fontanella1;~Petru-Daniel_Tudosiu1;~Yongxin_Yang1;~Shifeng_Zhang5;~Sarah_Parisot1", "gender": "M;M;M;;M", "homepage": ";https://kclpure.kcl.ac.uk/portal/en/persons/dan-tudosiu(51c96242-5d86-43b9-990e-2eeab07cef4e).html;https://github.com/zsffq999;https://parisots.github.io/;", "dblp": ";258/4838;;20/10169;150/4258", "google_scholar": "Bc81Y9YAAAAJ;sv39zkwAAAAJ;;https://scholar.google.co.uk/citations?user=N-AmfK4AAAAJ;https://scholar.google.co.uk/citations?user=F7PtrL8AAAAJ", "orcid": ";0000-0001-6435-5079;;;", "linkedin": ";https://linkedin.com/in/petru-daniel-tudosiu;;;", "or_profile": "~Alessandro_Fontanella1;~Petru-Daniel_Tudosiu1;~Shifeng_Zhang5;~Sarah_Parisot1;~Yongxin_Yang3", "aff": "University of Edinburgh, University of Edinburgh;Huawei Technologies Ltd.;Huawei Technologies Ltd.;Huawei Technologies Ltd.;Queen Mary University of London", "aff_domain": "ed.ac.uk;huawei.com;huawei.com;huawei.com;qmul.ac.uk", "position": "PhD student;Researcher;Researcher;Senior research scientist;Assistant Professor", "bibtex": "@inproceedings{\nfontanella2024generating,\ntitle={Generating compositional scenes via Text-to-image {RGBA} Instance Generation},\nauthor={Alessandro Fontanella and Petru-Daniel Tudosiu and Yongxin Yang and Shifeng Zhang and Sarah Parisot},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=MwFeh4RqvA}\n}", "github": "", "reviewers": "tSA3;S8j5;GbJP;wSp1", "pdf_size": 49545407, "rating": "4;4;4;7", "confidence": "4;4;3;4", "soundness": "2;3;3;3", "novelty": "2;2;1;3", "presentation": "3;3;3;3", "wc_summary": "49;88;69;73", "wc_strengths": "41;31;39;65", "wc_weaknesses": "203;108;171;42", "wc_questions": "50;1;1;31", "wc_limitations": "11;1;1;1", "wc_review": "354;229;281;212", "wc_reply_reviewers": "0;25;33;17", "wc_reply_authors": "0;88;92;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;1", "rating_avg": [ 4.75, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 69.75, 13.91716565971678 ], "wc_strengths_avg": [ 44.0, 12.68857754044952 ], "wc_weaknesses_avg": [ 131.0, 61.71304562246138 ], "wc_questions_avg": [ 20.75, 20.8611480987984 ], "wc_limitations_avg": [ 3.5, 4.330127018922194 ], "wc_review_avg": [ 269.0, 55.2675311552814 ], "wc_reply_reviewers_avg": [ 18.75, 12.214233500306108 ], "wc_reply_authors_avg": [ 45.0, 45.02221673796171 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:pzRSvMxDDp4J:scholar.google.com/&scioq=Generating+compositional+scenes+via+Text-to-image+RGBA+Instance+Generation&hl=en&as_sdt=0,10", "gs_version_total": 3, "email": "ed.ac.uk;huawei.com;huawei.com;huawei.com;qmul.ac.uk", "author_num": 5, "aff_unique_index": "0;1;1;1;2", "aff_unique_norm": "University of Edinburgh;Huawei;Queen Mary University of London", "aff_unique_dep": ";Huawei Technologies;", "aff_unique_url": "https://www.ed.ac.uk;https://www.huawei.com;https://www.qmul.ac.uk", "aff_unique_abbr": "Edinburgh;Huawei;QMUL", "aff_campus_unique_index": "1", "aff_campus_unique": ";London", "aff_country_unique_index": "0;1;1;1;0", "aff_country_unique": "United Kingdom;China" }, { "title": "Free-Rider and Conflict Aware Collaboration Formation for Cross-Silo Federated Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95480", "id": "MwJo3zuiTm", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MwJo3zuiTm", "openreview": "https://openreview.net/forum?id=MwJo3zuiTm", "poster": "/media/PosterPDFs/NeurIPS%202024/95480.png?t=1731392953.0208955", "project": "", "author_site": "Mengmeng Chen, Xiaohu Wu, Xiaoli Tang, Tiantian He, Yew Soon Ong, QIQI LIU, Qicheng Lao, Han Yu", "tldr": "", "abstract": "Federated learning (FL) is a machine learning paradigm that allows multiple FL participants (FL-PTs) to collaborate on training models without sharing private data. Due to data heterogeneity, negative transfer may occur in the FL training process. This necessitates FL-PT selection based on their data complementarity. In cross-silo FL, organizations that engage in business activities are key sources of FL-PTs. The resulting FL ecosystem has two features: (i) self-interest, and (ii) competition among FL-PTs. This requires the desirable FL-PT selection strategy to simultaneously mitigate the problems of free riders and conflicts of interest among competitors. To this end, we propose an optimal FL collaboration formation strategy -FedEgoists- which ensures that: (1) a FL-PT can benefit from FL if and only if it benefits the FL ecosystem, and (2) a FL-PT will not contribute to its competitors or their supporters. It provides an efficient clustering solution to group FL-PTs into coalitions, ensuring that within each coalition, FL-PTs share the same interest. We theoretically prove that the FL-PT coalitions formed are optimal since no coalitions can collaborate together to improve the utility of any of their members. Extensive experiments on widely adopted benchmark datasets demonstrate the effectiveness of FedEgoists compared to nine state-of-the-art baseline methods, and its ability to establish efficient collaborative networks in cross-silos FL with FL-PTs that engage in business activities.", "keywords": "Federated learning;business sectors;competition;self-interest", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "/attachment/871cd38a71efb7af0211ea8dc0e52dd2ffcd76f1.zip", "author": "Mengmeng Chen;Xiaohu Wu;Xiaoli Tang;Tiantian He;Yew-Soon Ong;QIQI LIU;Qicheng Lao;Han Yu", "authorids": "~Mengmeng_Chen2;~Xiaohu_Wu2;~Xiaoli_Tang1;~Tiantian_He1;~Yew-Soon_Ong1;~QIQI_LIU3;~Qicheng_Lao2;~Han_Yu1", "gender": ";M;F;Unspecified;;;;M", "homepage": ";;https://scholar.google.com.sg/citations?user=Azooe2AAAAAJ&hl=en&oi=ao;https://he-tiantian.github.io/;;https://scholar.google.com.hk/citations?user=BDKEG8QAAAAJ&hl=zh-CN;;https://sites.google.com/site/hanyushomepage/home", "dblp": ";;;151/4420;;;222/3004;35/1096-1", "google_scholar": ";W61Kw-4AAAAJ;https://scholar.google.com.sg/citations?user=Azooe2AAAAAJ;;;https://scholar.google.com.hk/citations?user=BDKEG8QAAAAJ;;https://scholar.google.com.sg/citations?hl=en", "orcid": ";0000-0003-3699-5241;;;;;;0000-0001-6893-8650", "linkedin": ";;;;;;qicheng-lao-02909871;", "or_profile": "~Mengmeng_Chen2;~Xiaohu_Wu2;~Xiaoli_Tang1;~Tiantian_He1;~Yew-Soon_Ong1;~QIQI_LIU3;~Qicheng_Lao2;~Han_Yu1", "aff": ";BUPT Shenzhen Research Institute;Nanyang Technological University;A*STAR;;Westlake University;Beijing University of Posts and Telecommunications;Nanyang Technological University", "aff_domain": ";bupt.edu.cn;ntu.edu.sg;a-star.edu.sg;;westlake.edu.cn;bupt.edu.cn;ntu.edu.sg", "position": ";Associate Professor;PhD student;Senior Scientist;;Postdoc;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nchen2024freerider,\ntitle={Free-Rider and Conflict Aware Collaboration Formation for Cross-Silo Federated Learning},\nauthor={Mengmeng Chen and Xiaohu Wu and Xiaoli Tang and Tiantian He and Yew-Soon Ong and QIQI LIU and Qicheng Lao and Han Yu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=MwJo3zuiTm}\n}", "github": "", "reviewers": "jdA3;2xqy;gR3Y;BcXD", "pdf_size": 974725, "rating": "4;6;6;7", "confidence": "4;4;4;5", "soundness": "3;2;3;3", "novelty": "3;2;2;3", "presentation": "3;2;3;4", "wc_summary": "50;60;53;118", "wc_strengths": "22;73;62;99", "wc_weaknesses": "34;11;137;63", "wc_questions": "4;125;57;120", "wc_limitations": "10;9;58;4", "wc_review": "120;278;367;404", "wc_reply_reviewers": "0;13;22;17", "wc_reply_authors": "448;18;563;20", "reply_reviewers": "0;1;1;1", "reply_authors": "3;2;5;2", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 70.25, 27.806249297594956 ], "wc_strengths_avg": [ 64.0, 27.721832551258224 ], "wc_weaknesses_avg": [ 61.25, 47.457217575412066 ], "wc_questions_avg": [ 76.5, 49.70160963188214 ], "wc_limitations_avg": [ 20.25, 21.9131809648896 ], "wc_review_avg": [ 292.25, 109.4860150886861 ], "wc_reply_reviewers_avg": [ 13.0, 8.154753215150045 ], "wc_reply_authors_avg": [ 262.25, 246.62560187458234 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.0, 1.224744871391589 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.6622661785325219, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5735076113782868055&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": ";bupt.edu.cn;ntu.edu.sg;a-star.edu.sg;;westlake.edu.cn;bupt.edu.cn;ntu.edu.sg", "author_num": 8, "aff_unique_index": "0;1;2;3;0;1", "aff_unique_norm": "Beijing University of Posts and Telecommunications;Nanyang Technological University;Agency for Science, Technology and Research;Westlake University", "aff_unique_dep": "Research Institute;;;", "aff_unique_url": "http://www.bupt.edu.cn/;https://www.ntu.edu.sg;https://www.a-star.edu.sg;https://www.westlake.edu.cn", "aff_unique_abbr": "BUPT;NTU;A*STAR;WU", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Shenzhen;;Beijing", "aff_country_unique_index": "0;1;1;0;0;1", "aff_country_unique": "China;Singapore" }, { "title": "DiffTORI: Differentiable Trajectory Optimization for Deep Reinforcement and Imitation Learning", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95479", "id": "Mwj57TcHWX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Mwj57TcHWX", "openreview": "https://openreview.net/forum?id=Mwj57TcHWX", "poster": "/media/PosterPDFs/NeurIPS%202024/95479.png?t=1733629561.0546255", "project": "", "author_site": "Weikang Wan, Ziyu Wang, Yufei Wang, Zackory Erickson, David Held", "tldr": "", "abstract": "This paper introduces DiffTORI, which utilizes $\\textbf{Diff}$erentiable $\\textbf{T}$rajectory $\\textbf{O}$ptimization as the policy representation to generate actions for deep $\\textbf{R}$einforcement and $\\textbf{I}$mitation learning. Trajectory optimization is a powerful and widely used algorithm in control, parameterized by a cost and a dynamics function. The key to our approach is to leverage the recent progress in differentiable trajectory optimization, which enables computing the gradients of the loss with respect to the parameters of trajectory optimization. As a result, the cost and dynamics functions of trajectory optimization can be learned end-to-end. DiffTORI addresses the \u201cobjective mismatch\u201d issue of prior model-based RL algorithms, as the dynamics model in DiffTORI is learned to directly maximize task performance by differentiating the policy gradient loss through the trajectory optimization process. We further benchmark DiffTORI for imitation learning on standard robotic manipulation task suites with high-dimensional sensory observations and compare our method to feedforward policy classes as well as Energy-Based Models (EBM) and Diffusion. Across 15 model based RL tasks and 35 imitation learning tasks with high-dimensional image and point cloud inputs, DiffTORI outperforms prior state-of-the-art methods in both domains.", "keywords": "imitation learning;model-based reinforcement learning;differentiable trajectory optimization", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Weikang Wan;Ziyu Wang;Yufei Wang;Zackory Erickson;David Held", "authorids": "~Weikang_Wan1;~Ziyu_Wang15;~Yufei_Wang4;~Zackory_Erickson1;~David_Held1", "gender": "M;M;;M;M", "homepage": "https://wkwan7.github.io/;https://github.com/wadiuvatzy;https://yufeiwang63.github.io/;https://zackory.com;http://davheld.github.io/", "dblp": "314/9770;;;;22/11147", "google_scholar": "MVE-fyQAAAAJ;;HQl9718AAAAJ;wElkTtIAAAAJ;0QtU-NsAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Weikang_Wan1;~Ziyu_Wang15;~Yufei_Wang4;~Zackory_Erickson1;~David_Held1", "aff": "Peking University;Tsinghua University;School of Computer Science, Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "pku.edu.cn;tsinghua.edu.cn;cs.cmu.edu;cmu.edu;cmu.edu", "position": "Undergrad student;Undergrad student;PhD student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nwan2024difftori,\ntitle={Diff{TORI}: Differentiable Trajectory Optimization for Deep Reinforcement and Imitation Learning},\nauthor={Weikang Wan and Ziyu Wang and Yufei Wang and Zackory Erickson and David Held},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Mwj57TcHWX}\n}", "github": "", "reviewers": "epML;XFZZ;kMzm;fycy", "pdf_size": 11108224, "rating": "5;6;6;8", "confidence": "4;4;4;4", "soundness": "3;3;3;3", "novelty": "2;3;3;4", "presentation": "2;3;3;2", "wc_summary": "83;115;107;133", "wc_strengths": "63;41;74;106", "wc_weaknesses": "227;423;59;72", "wc_questions": "4;94;106;25", "wc_limitations": "1;44;22;10", "wc_review": "378;717;368;346", "wc_reply_reviewers": "79;548;0;10", "wc_reply_authors": "239;126;0;55", "reply_reviewers": "1;1;0;1", "reply_authors": "3;2;1;2", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 109.5, 17.96524422322168 ], "wc_strengths_avg": [ 71.0, 23.441416339462084 ], "wc_weaknesses_avg": [ 195.25, 147.16720932327283 ], "wc_questions_avg": [ 57.25, 43.59687488800086 ], "wc_limitations_avg": [ 19.25, 16.11482236948332 ], "wc_review_avg": [ 452.25, 153.29118533040312 ], "wc_reply_reviewers_avg": [ 159.25, 226.49655074636345 ], "wc_reply_authors_avg": [ 105.0, 89.3336442780658 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9676566282454441000&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "pku.edu.cn;tsinghua.edu.cn;cs.cmu.edu;cmu.edu;cmu.edu", "author_num": 5, "aff_unique_index": "0;1;2;2;2", "aff_unique_norm": "Peking University;Tsinghua University;Carnegie Mellon University", "aff_unique_dep": ";;School of Computer Science", "aff_unique_url": "http://www.pku.edu.cn;https://www.tsinghua.edu.cn;https://www.cmu.edu", "aff_unique_abbr": "Peking U;THU;CMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pittsburgh", "aff_country_unique_index": "0;0;1;1;1", "aff_country_unique": "China;United States" }, { "title": "Why are Visually-Grounded Language Models Bad at Image Classification?", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95478", "id": "MwmmBg1VYg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MwmmBg1VYg", "openreview": "https://openreview.net/forum?id=MwmmBg1VYg", "poster": "/media/PosterPDFs/NeurIPS%202024/95478.png?t=1730249152.4117134", "project": "", "author_site": "Yuhui Zhang, Alyssa Unell, Xiaohan Wang, Dhruba Ghosh, Yuchang Su, Ludwig Schmidt, Serena Yeung", "tldr": "", "abstract": "Image classification is one of the most fundamental capabilities of machine vision intelligence. In this work, we revisit the image classification task using visually-grounded language models (VLMs) such as GPT-4V and LLaVA. We find that existing proprietary and public VLMs, despite often using CLIP as a vision encoder and having many more parameters, significantly underperform CLIP on standard image classification benchmarks like ImageNet. To understand the reason, we explore several hypotheses concerning the inference algorithms, training objectives, and data processing in VLMs. Our analysis reveals that the primary cause is data-related: critical information for image classification is encoded in the VLM's latent space but can only be effectively decoded with enough training data. Specifically, there is a strong correlation between the frequency of class exposure during VLM training and instruction-tuning and the VLM's performance in those classes; when trained with sufficient data, VLMs can match the accuracy of state-of-the-art classification models. Based on these findings, we enhance a VLM by integrating classification-focused datasets into its training, and demonstrate that the enhanced classification performance of the VLM transfers to its general capabilities, resulting in an improvement of 11.8% on the newly collected ImageWikiQA dataset.", "keywords": "vision-language models;image classification", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Yuhui Zhang;Alyssa Unell;Xiaohan Wang;Dhruba Ghosh;Yuchang Su;Ludwig Schmidt;Serena Yeung-Levy", "authorids": "~Yuhui_Zhang3;~Alyssa_Unell1;~Xiaohan_Wang2;~Dhruba_Ghosh1;~Yuchang_Su2;~Ludwig_Schmidt1;~Serena_Yeung-Levy1", "gender": "M;;M;;M;M;F", "homepage": "https://cs.stanford.edu/~yuhuiz/;;https://wxh1996.github.io/;https://djghosh13.github.io/;;http://people.csail.mit.edu/ludwigs/;http://ai.stanford.edu/~syyeung/", "dblp": ";;;292/8318;;141/2720;147/5023", "google_scholar": "X-Agfu8AAAAJ;;iGA10XoAAAAJ;lHuZ55oAAAAJ;https://scholar.google.com/citations?hl=en;SWMKy70AAAAJ;Tw2m5kUAAAAJ", "orcid": ";;;0000-0002-8518-2696;;;0000-0003-0529-0628", "linkedin": ";alyssa-unell-a8a9b81a9/;%E6%99%93%E6%99%97-%E6%B1%AA-883895bb/;dhruba-ghosh-b82467170/;;ludwig-schmidt-87ba3612/;", "or_profile": "~Yuhui_Zhang3;~Alyssa_Unell1;~Xiaohan_Wang2;~Dhruba_Ghosh1;~Yuchang_Su2;~Ludwig_Schmidt1;~Serena_Yeung1", "aff": "Stanford University;Stanford University;Stanford University;University of Washington;Tsinghua University;University of Washington;Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu;uw.edu;mails.tsinghua.edu.cn;washington.edu;stanford.edu", "position": "PhD student;PhD student;Postdoc;PhD student;Undergrad student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhang2024why,\ntitle={Why are Visually-Grounded Language Models Bad at Image Classification?},\nauthor={Yuhui Zhang and Alyssa Unell and Xiaohan Wang and Dhruba Ghosh and Yuchang Su and Ludwig Schmidt and Serena Yeung-Levy},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=MwmmBg1VYg}\n}", "github": "", "reviewers": "CuMn;8fB5;fPsb;SHnR", "pdf_size": 2154084, "rating": "3;5;5;7", "confidence": "4;4;3;4", "soundness": "3;2;2;3", "novelty": "3;2;2;3", "presentation": "3;3;2;3", "wc_summary": "111;230;93;103", "wc_strengths": "79;82;40;223", "wc_weaknesses": "32;237;274;273", "wc_questions": "40;78;10;170", "wc_limitations": "13;4;1;5", "wc_review": "275;631;418;774", "wc_reply_reviewers": "18;64;0;336", "wc_reply_authors": "99;247;88;126", "reply_reviewers": "1;2;0;1", "reply_authors": "3;4;3;4", "rating_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 134.25, 55.647888549342106 ], "wc_strengths_avg": [ 106.0, 69.55213871621778 ], "wc_weaknesses_avg": [ 204.0, 100.4166320885141 ], "wc_questions_avg": [ 74.5, 60.17266821406543 ], "wc_limitations_avg": [ 5.75, 4.437059837324712 ], "wc_review_avg": [ 524.5, 191.8234865703363 ], "wc_reply_reviewers_avg": [ 104.5, 135.6788487569083 ], "wc_reply_authors_avg": [ 140.0, 63.30481814206561 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 3.5, 0.5 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 39, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5405062384445291783&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "stanford.edu;stanford.edu;stanford.edu;uw.edu;mails.tsinghua.edu.cn;washington.edu;stanford.edu", "author_num": 7, "aff_unique_index": "0;0;0;1;2;1;0", "aff_unique_norm": "Stanford University;University of Washington;Tsinghua University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.stanford.edu;https://www.washington.edu;https://www.tsinghua.edu.cn", "aff_unique_abbr": "Stanford;UW;THU", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0;0;1;0;0", "aff_country_unique": "United States;China" }, { "title": "SlimGPT: Layer-wise Structured Pruning for Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95477", "id": "MxF0IKJtKW", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MxF0IKJtKW", "openreview": "https://openreview.net/forum?id=MxF0IKJtKW", "poster": "/media/PosterPDFs/NeurIPS%202024/95477.png?t=1730184115.6194956", "project": "", "author_site": "Gui Ling, Ziyang Wang, YuliangYan, Qingwen Liu", "tldr": "", "abstract": "Large language models (LLMs) have garnered significant attention for their remarkable capabilities across various domains, whose vast parameter scales present challenges for practical deployment. Structured pruning is an effective method to balance model performance with efficiency, but performance restoration under computational resource constraints is a principal challenge in pruning LLMs. Therefore, we present a low-cost and fast structured pruning method for LLMs named SlimGPT based on the Optimal Brain Surgeon framework. We propose Batched Greedy Pruning for rapid and near-optimal pruning, which enhances the accuracy of head-wise pruning error estimation through grouped Cholesky decomposition and improves the pruning efficiency of FFN via Dynamic Group Size, thereby achieving approximate local optimal pruning results within one hour. Besides, we explore the limitations of layer-wise pruning from the perspective of error accumulation and propose Incremental Pruning Ratio, a non-uniform pruning strategy to reduce performance degradation. Experimental results on the LLaMA benchmark show that SlimGPT outperforms other methods and achieves state-of-the-art results.", "keywords": "Model Compression;Structured Pruning;Layer-wise Pruning;Large Language Model", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/cfa8bbd6ea12c544dcfafc93467552fb49de9600.zip", "author": "Gui Ling;Ziyang Wang;YuliangYan;Qingwen Liu", "authorids": "~Gui_Ling3;~Ziyang_Wang9;~YuliangYan1;~Qingwen_Liu2", "gender": ";M;M;M", "homepage": "https://github.com/iMurphL;;https://yyl8781697.github.io/;http://liuqingwen.com", "dblp": ";;;", "google_scholar": ";;;", "orcid": ";0000-0002-9310-7813;;0000-0002-9463-7410", "linkedin": ";;;", "or_profile": "~Gui_Ling3;~Ziyang_Wang9;~YuliangYan1;~Qingwen_Liu2", "aff": "Alibaba Group;Alibaba Group;Ningbo University;Alibaba Group", "aff_domain": "alibaba-inc.com;alibaba-inc.com;nbu.edu.cn;alibaba-inc.com", "position": "Researcher;Researcher;MS student;Principal Researcher", "bibtex": "@inproceedings{\nling2024slimgpt,\ntitle={Slim{GPT}: Layer-wise Structured Pruning for Large Language Models},\nauthor={Gui Ling and Ziyang Wang and YuliangYan and Qingwen Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=MxF0IKJtKW}\n}", "github": "", "reviewers": "QvFZ;8hG2;jqTK", "pdf_size": 518427, "rating": "4;6;6", "confidence": "4;4;5", "soundness": "2;4;2", "novelty": "2;4;3", "presentation": "3;4;3", "wc_summary": "75;92;46", "wc_strengths": "36;45;63", "wc_weaknesses": "108;180;358", "wc_questions": "85;15;151", "wc_limitations": "2;24;9", "wc_review": "306;356;627", "wc_reply_reviewers": "0;0;538", "wc_reply_authors": "113;115;1304", "reply_reviewers": "0;0;4", "reply_authors": "2;2;5", "rating_avg": [ 5.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.9428090415820634 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 71.0, 18.991226044325487 ], "wc_strengths_avg": [ 48.0, 11.224972160321824 ], "wc_weaknesses_avg": [ 215.33333333333334, 105.07563413507857 ], "wc_questions_avg": [ 83.66666666666667, 55.529771794556794 ], "wc_limitations_avg": [ 11.666666666666666, 9.177266598624136 ], "wc_review_avg": [ 429.6666666666667, 141.02088103855007 ], "wc_reply_reviewers_avg": [ 179.33333333333334, 253.61563218557504 ], "wc_reply_authors_avg": [ 510.6666666666667, 560.9719739483921 ], "reply_reviewers_avg": [ 1.3333333333333333, 1.8856180831641267 ], "reply_authors_avg": [ 3.0, 1.4142135623730951 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10287576491956752217&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 4, "email": "alibaba-inc.com;alibaba-inc.com;nbu.edu.cn;alibaba-inc.com", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Alibaba Group;Ningbo University", "aff_unique_dep": ";", "aff_unique_url": "https://www.alibaba.com;https://www.ningbou.edu.cn", "aff_unique_abbr": "Alibaba;NBU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Equivariant spatio-hemispherical networks for diffusion MRI deconvolution", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95476", "id": "MxWpCherzD", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MxWpCherzD", "openreview": "https://openreview.net/forum?id=MxWpCherzD", "poster": "/media/PosterPDFs/NeurIPS%202024/95476.png?t=1733715206.3156128", "project": "", "author_site": "Axel Elaldi, Guido Gerig, Neel Dey", "tldr": "", "abstract": "Each voxel in a diffusion MRI (dMRI) image contains a spherical signal corresponding to the direction and strength of water diffusion in the brain. This paper advances the analysis of such spatio-spherical data by developing convolutional network layers that are equivariant to the $\\mathbf{E(3) \\times SO(3)}$ group and account for the physical symmetries of dMRI including rotations, translations, and reflections of space alongside voxel-wise rotations. Further, neuronal fibers are typically antipodally symmetric, a fact we leverage to construct highly efficient spatio-*hemispherical* graph convolutions to accelerate the analysis of high-dimensional dMRI data. In the context of sparse spherical fiber deconvolution to recover white matter microstructure, our proposed equivariant network layers yield substantial performance and efficiency gains, leading to better and more practical resolution of crossing neuronal fibers and fiber tractography. These gains are experimentally consistent across both simulation and in vivo human datasets.", "keywords": "Geometric Deep Learning;Diffusion MRI;Spherical Networks;Biomedical Image Analysis", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "", "author": "Axel Elaldi;Guido Gerig;Neel Dey", "authorids": "~Axel_Elaldi1;~Guido_Gerig1;~Neel_Dey1", "gender": "M;M;M", "homepage": "https://github.com/AxelElaldi;http://engineering.nyu.edu/people/guido-gerig/;https://www.neeldey.com/", "dblp": ";https://dblp.uni-trier.de/pid/g/GuidoGerig;239/1845", "google_scholar": "KkFBYC4AAAAJ;https://scholar.google.com.tw/citations?user=P5CovF0AAAAJ;yEmcuHcAAAAJ", "orcid": "0000-0003-3949-8400;;0000-0003-1427-6406", "linkedin": "axel-elaldi-222069102/;;neel-dey/", "or_profile": "~Axel_Elaldi1;~Guido_Gerig1;~Neel_Dey1", "aff": "New York University;New York University;Massachusetts Institute of Technology", "aff_domain": "nyu.edu;nyu.edu;mit.edu", "position": "PhD student;Full Professor;Postdoc", "bibtex": "@inproceedings{\nelaldi2024equivariant,\ntitle={Equivariant spatio-hemispherical networks for diffusion {MRI} deconvolution},\nauthor={Axel Elaldi and Guido Gerig and Neel Dey},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=MxWpCherzD}\n}", "github": "", "reviewers": "rCgS;zoNj;JrFU;F5CU", "pdf_size": 41122387, "rating": "3;6;6;7", "confidence": "4;3;4;4", "soundness": "2;3;3;4", "novelty": "2;3;3;3", "presentation": "2;3;3;4", "wc_summary": "70;45;41;97", "wc_strengths": "31;32;102;32", "wc_weaknesses": "112;233;63;238", "wc_questions": "38;388;57;84", "wc_limitations": "30;38;1;10", "wc_review": "281;736;264;461", "wc_reply_reviewers": "49;27;18;22", "wc_reply_authors": "512;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 63.25, 22.431841208425134 ], "wc_strengths_avg": [ 49.25, 30.457962834043908 ], "wc_weaknesses_avg": [ 161.5, 76.02137857208326 ], "wc_questions_avg": [ 141.75, 143.10900565652744 ], "wc_limitations_avg": [ 19.75, 14.872373717735847 ], "wc_review_avg": [ 435.5, 189.89009979459172 ], "wc_reply_reviewers_avg": [ 29.0, 11.979148550710939 ], "wc_reply_authors_avg": [ 128.0, 221.70250336881628 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.19245008972987526, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:6nhkER6Jza4J:scholar.google.com/&scioq=Equivariant+spatio-hemispherical+networks+for+diffusion+MRI+deconvolution&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "nyu.edu;nyu.edu;mit.edu", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "New York University;Massachusetts Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.nyu.edu;https://web.mit.edu", "aff_unique_abbr": "NYU;MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Boosting Weakly Supervised Referring Image Segmentation via Progressive Comprehension", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95475", "id": "MxdyGXoK9h", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MxdyGXoK9h", "openreview": "https://openreview.net/forum?id=MxdyGXoK9h", "poster": "/media/PosterPDFs/NeurIPS%202024/95475.png?t=1731603058.3805604", "project": "", "author_site": "Zaiquan Yang, Yuhao LIU, Jiaying Lin, Gerhard Hancke, Rynson Lau", "tldr": "", "abstract": "This paper explores the weakly-supervised referring image segmentation (WRIS) problem, and focuses on a challenging setup where target localization is learned directly from image-text pairs. \nWe note that the input text description typically already contains detailed information on how to localize the target object, and we also observe that humans often follow a step-by-step comprehension process (\\ie, progressively utilizing target-related attributes and relations as cues) to identify the target object. \nHence, we propose a novel Progressive Comprehension Network (PCNet) to leverage target-related textual cues from the input description for progressively localizing the target object.\nSpecifically, we first use a Large Language Model (LLM) to decompose the input text description into short phrases. These short phrases are taken as target-related cues and fed into a Conditional Referring Module (CRM) in multiple stages, to allow updating the referring text embedding and enhance the response map for target localization in a multi-stage manner.\nBased on the CRM, we then propose a Region-aware Shrinking (RaS) loss to constrain the visual localization to be conducted progressively in a coarse-to-fine manner across different stages.\nFinally, we introduce an Instance-aware Disambiguation (IaD) loss to suppress instance localization ambiguity by differentiating overlapping response maps generated by different referring texts on the same image. \nExtensive experiments show that our method outperforms SOTA methods on three common benchmarks.", "keywords": "multi-modal;segmentation;weakly-supervised", "primary_area": "machine_vision", "supplementary_material": "", "author": "Zaiquan Yang;Yuhao LIU;Jiaying Lin;Gerhard Petrus Hancke;Rynson W. H. Lau", "authorids": "~Zaiquan_Yang1;~Yuhao_LIU3;~Jiaying_Lin1;~Gerhard_Petrus_Hancke1;~Rynson_W._H._Lau1", "gender": "M;;;M;", "homepage": "https://github.com/zaiquanyang;;https://jiaying.link;https://scholars.cityu.edu.hk/en/persons/gerhard-petrus-hancke(9e59c8eb-ba32-4075-97f7-e44e82367742).html;", "dblp": "322/9391;;;44/2703-2;", "google_scholar": ";;;C2iR3xUAAAAJ;", "orcid": ";;;0000-0002-2388-3542;", "linkedin": ";;;gerhard-hancke-0522772/;", "or_profile": "~Zaiquan_Yang1;~Yuhao_LIU3;~Jiaying_Lin1;~Gerhard_Petrus_Hancke1;~Rynson_W._H._Lau1", "aff": "City University of Hong Kong;;City University of Hong Kong;University of Pretoria;", "aff_domain": "cityu.edu.hk;;cityu.edu.hk;up.ac.za;", "position": "PhD student;;Postdoc;Visiting/Extraordinary Faculty;", "bibtex": "@inproceedings{\nyang2024boosting,\ntitle={Boosting Weakly Supervised Referring Image Segmentation via Progressive Comprehension},\nauthor={Zaiquan Yang and Yuhao LIU and Jiaying Lin and Gerhard Petrus Hancke and Rynson W. H. Lau},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=MxdyGXoK9h}\n}", "github": "", "reviewers": "Tfcw;kTSb;K7SG;oLx4", "pdf_size": 4077292, "rating": "5;5;7;7", "confidence": "5;3;4;4", "soundness": "3;2;4;2", "novelty": "2;2;4;2", "presentation": "2;1;4;2", "wc_summary": "43;127;72;69", "wc_strengths": "38;36;127;89", "wc_weaknesses": "146;211;74;209", "wc_questions": "1;127;67;224", "wc_limitations": "3;8;1;1", "wc_review": "231;509;341;592", "wc_reply_reviewers": "13;77;94;151", "wc_reply_authors": "33;126;23;335", "reply_reviewers": "1;1;1;2", "reply_authors": "2;3;2;3", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.25, 1.0897247358851685 ], "wc_summary_avg": [ 77.75, 30.589009464185008 ], "wc_strengths_avg": [ 72.5, 37.963798545456434 ], "wc_weaknesses_avg": [ 160.0, 56.11149614829389 ], "wc_questions_avg": [ 104.75, 82.01333732997335 ], "wc_limitations_avg": [ 3.25, 2.8613807855648994 ], "wc_review_avg": [ 418.25, 140.9385947851049 ], "wc_reply_reviewers_avg": [ 83.75, 49.190319169527655 ], "wc_reply_authors_avg": [ 129.25, 125.39612234834058 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14520159052605988276&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 3, "email": "cityu.edu.hk;;cityu.edu.hk;up.ac.za;", "author_num": 5, "aff_unique_index": "0;0;1", "aff_unique_norm": "City University of Hong Kong;University of Pretoria", "aff_unique_dep": ";", "aff_unique_url": "https://www.cityu.edu.hk;https://www.up.ac.za", "aff_unique_abbr": "CityU;UP", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;1", "aff_country_unique": "China;South Africa" }, { "title": "Quantifying the Gain in Weak-to-Strong Generalization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95474", "id": "MyVyH5Jo1l", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MyVyH5Jo1l", "openreview": "https://openreview.net/forum?id=MyVyH5Jo1l", "poster": "/media/PosterPDFs/NeurIPS%202024/95474.png?t=1731448303.284654", "project": "", "author_site": "Moses Charikar, Chirag Pabbaraju, Kirankumar Shiragur", "tldr": "", "abstract": "Recent advances in large language models have shown capabilities that are extraordinary and near-superhuman. These models operate with such complexity that reliably evaluating and aligning them proves challenging for humans. This leads to the natural question: can guidance from weak models (like humans) adequately direct the capabilities of strong models? In a recent and somewhat surprising work, Burns et al. (2023) empirically demonstrated that when strong models (like GPT-4) are finetuned using labels generated by weak supervisors (like GPT-2), the strong models outperform their weaker counterparts---a phenomenon they term *weak-to-strong generalization*.\n\nIn this work, we present a theoretical framework for understanding weak-to-strong generalization. Specifically, we show that the improvement in performance achieved by strong models over their weaker counterparts is quantified by the *misfit error* incurred by the strong model on labels generated by the weaker model. Our theory reveals several curious algorithmic insights. For instance, we can predict the amount by which the strong model will improve over the weak model, and also choose among different weak models to train the strong model, based on its misfit error. We validate our theoretical findings through various empirical assessments.", "keywords": "Weak-to-Strong Generalization", "primary_area": "learning_theory", "supplementary_material": "/attachment/9fd2c16995712eb01f0e1bbc6f5b4dfa75b7c168.zip", "author": "Moses Charikar;Chirag Pabbaraju;Kirankumar Shiragur", "authorids": "~Moses_Charikar1;~Chirag_Pabbaraju1;~Kirankumar_Shiragur1", "gender": "M;M;M", "homepage": "https://profiles.stanford.edu/moses-charikar;https://web.stanford.edu/~cpabbara/;https://sites.google.com/view/kiran-shiragur", "dblp": "https://dblp.uni-trier.de/pers/hd/c/Charikar:Moses;231/7619;", "google_scholar": "zX3ba1kAAAAJ;IAGcpHkAAAAJ;", "orcid": ";0000-0002-3424-691X;", "linkedin": ";chirag-pabbaraju-277a4ba5/;", "or_profile": "~Moses_Charikar1;~Chirag_Pabbaraju1;~Kirankumar_Shiragur1", "aff": "Stanford University;Stanford University;Microsoft Research", "aff_domain": "stanford.edu;cs.stanford.edu;microsoft.com", "position": "Full Professor;PhD student;Researcher", "bibtex": "@inproceedings{\ncharikar2024quantifying,\ntitle={Quantifying the Gain in Weak-to-Strong Generalization},\nauthor={Moses Charikar and Chirag Pabbaraju and Kirankumar Shiragur},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=MyVyH5Jo1l}\n}", "github": "", "reviewers": "8aZZ;322n;oUtk", "pdf_size": 894058, "rating": "5;7;7", "confidence": "4;4;5", "soundness": "3;4;3", "novelty": "2;3;3", "presentation": "1;4;3", "wc_summary": "109;188;89", "wc_strengths": "76;63;24", "wc_weaknesses": "579;246;83", "wc_questions": "4;1;130", "wc_limitations": "4;10;34", "wc_review": "772;508;360", "wc_reply_reviewers": "0;323;0", "wc_reply_authors": "37;659;37", "reply_reviewers": "0;2;0", "reply_authors": "2;3;2", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 1.247219128924647 ], "wc_summary_avg": [ 128.66666666666666, 42.74212078136611 ], "wc_strengths_avg": [ 54.333333333333336, 22.095751225568733 ], "wc_weaknesses_avg": [ 302.6666666666667, 206.4175918429004 ], "wc_questions_avg": [ 45.0, 60.11655346075655 ], "wc_limitations_avg": [ 16.0, 12.96148139681572 ], "wc_review_avg": [ 546.6666666666666, 170.40605101410637 ], "wc_reply_reviewers_avg": [ 107.66666666666667, 152.26366021550322 ], "wc_reply_authors_avg": [ 244.33333333333334, 293.2136119320217 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.9428090415820634 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4194314678738124329&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "stanford.edu;cs.stanford.edu;microsoft.com", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Stanford University;Microsoft", "aff_unique_dep": ";Microsoft Research", "aff_unique_url": "https://www.stanford.edu;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "Stanford;MSR", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Multi-modal Situated Reasoning in 3D Scenes", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97727", "id": "Myc4q2g9xZ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Myc4q2g9xZ", "openreview": "https://openreview.net/forum?id=Myc4q2g9xZ", "poster": "/media/PosterPDFs/NeurIPS%202024/97727.png?t=1731422243.9377327", "project": "", "author_site": "Xiongkun Linghu, Xuesong Niu, Jiangyong Huang, Xiaojian (Shawn) Ma, Baoxiong Jia, Siyuan Huang", "tldr": "", "abstract": "Situation awareness is essential for understanding and reasoning about 3D scenes\n in embodied AI agents. However, existing datasets and benchmarks for situated\n understanding suffer from severe limitations in data modality, scope, diversity, and\n scale. To address these limitations, we propose Multi-modal Situated Question\n Answering (MSQA), a large-scale multi-modal situated reasoning dataset, scalably\n collected leveraging 3D scene graphs and vision-language models (VLMs) across\n a diverse range of real-world 3D scenes. MSQA includes 251K situated question\nanswering pairs across 9 distinct question categories, covering complex scenarios\n and object modalities within 3D scenes. We introduce a novel interleaved multi\nmodal input setting in our benchmark to provide both texts, images, and point\n clouds for situation and question description, aiming to resolve ambiguity in\n describing situations with single-modality inputs (e.g., texts). Additionally, we\n devise the Multi-modal Next-step Navigation (MSNN) benchmark to evaluate\n models\u2019 grounding of actions and transitions between situations. Comprehensive\n evaluations on reasoning and navigation tasks highlight the limitations of existing\n vision-language models and underscore the importance of handling multi-modal\n interleaved inputs and situation modeling. Experiments on data scaling and cross\ndomain transfer further demonstrate the effectiveness of leveraging MSQA as\n a pre-training dataset for developing more powerful situated reasoning models,\n contributing to advancements in 3D scene understanding for embodied AI.", "keywords": "Multi-modal Reasoning; Situated Question-Answering; 3D Scene Understanding", "primary_area": "", "supplementary_material": "", "author": "Xiongkun Linghu;Jiangyong Huang;Xuesong Niu;Xiaojian Ma;Baoxiong Jia;Siyuan Huang", "authorids": "~Xiongkun_Linghu1;~Jiangyong_Huang1;~Xuesong_Niu1;~Xiaojian_Ma1;~Baoxiong_Jia1;~Siyuan_Huang2", "gender": ";;M;;M;M", "homepage": ";https://huangjy-pku.github.io/;https://nxsedson.github.io/;;https://buzz-beater.github.io/;https://siyuanhuang.com/", "dblp": ";334/4572;213/8652;;206/8738;62/885-1", "google_scholar": ";;iuPSV-0AAAAJ;;qIBUK6sAAAAJ;1NN7Ee8AAAAJ", "orcid": "0000-0002-4393-4386;0000-0001-9125-6893;;;0000-0002-4968-3290;", "linkedin": ";;;;baoxiong-jia-2b6094122?trk=public_post-text;", "or_profile": "~Xiongkun_Linghu1;~Jiangyong_Huang1;~Xuesong_Niu1;~Xiaojian_Ma1;~Baoxiong_Jia1;~Siyuan_Huang2", "aff": "Beijing Institute for General Artificial Intelligence;Peking University;Kuaishou Technology;;Beijing Institute for General Artificial Intelligence;Beijing Institute for General Artificial Intelligence", "aff_domain": "bigai.ai;pku.edu.cn;kuaishou.com;;bigai.ai;bigai.ai", "position": "Researcher;PhD student;Researcher;;Researcher;Researcher", "bibtex": "@inproceedings{\nlinghu2024multimodal,\ntitle={Multi-modal Situated Reasoning in 3D Scenes},\nauthor={Xiongkun Linghu and Jiangyong Huang and Xuesong Niu and Xiaojian Ma and Baoxiong Jia and Siyuan Huang},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=Myc4q2g9xZ}\n}", "github": "", "reviewers": "RR5c;cvGg;pZjR", "pdf_size": 9905465, "rating": "6;7;7", "confidence": "4;4;4", "wc_summary_and_contributions": "157;80;70", "wc_strengths": "6;2;62", "wc_improvement": "5;70;2", "wc_limitations": "1;1;23", "wc_correctness": "1;1;3", "wc_clarity": "1;1;3", "wc_relation_to_prior_work": "1;5;2", "wc_documentation": "4;1;2", "wc_additional_feedback": "1;1;1", "wc_review": "177;162;168", "wc_reply_reviewers": "90;0;67", "wc_reply_authors": "306;33;0", "reply_reviewers": "1;0;1", "reply_authors": "4;2;1", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "wc_summary_and_contributions_avg": [ 102.33333333333333, 38.87015421745698 ], "wc_strengths_avg": [ 23.333333333333332, 27.39018477889885 ], "wc_improvement_avg": [ 25.666666666666668, 31.372316175606514 ], "wc_limitations_avg": [ 8.333333333333334, 10.370899457402697 ], "wc_correctness_avg": [ 1.6666666666666667, 0.9428090415820634 ], "wc_clarity_avg": [ 1.6666666666666667, 0.9428090415820634 ], "wc_relation_to_prior_work_avg": [ 2.6666666666666665, 1.699673171197595 ], "wc_documentation_avg": [ 2.3333333333333335, 1.247219128924647 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 169.0, 6.164414002968976 ], "wc_reply_reviewers_avg": [ 52.333333333333336, 38.177945931591914 ], "wc_reply_authors_avg": [ 113.0, 137.13497001129946 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 1.247219128924647 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2015549437148981848&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 4, "email": "bigai.ai;pku.edu.cn;kuaishou.com;;bigai.ai;bigai.ai", "author_num": 6, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "Beijing Institute for General Artificial Intelligence;Peking University;Kuaishou Technology", "aff_unique_dep": ";;", "aff_unique_url": "http://www.bigaiai.org/;http://www.pku.edu.cn;https://www.kuaishou.com", "aff_unique_abbr": "BIGAI;Peking U;Kuaishou", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "IQA-EVAL: Automatic Evaluation of Human-Model Interactive Question Answering", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95473", "id": "MzM99vV5Rx", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MzM99vV5Rx", "openreview": "https://openreview.net/forum?id=MzM99vV5Rx", "poster": "/media/PosterPDFs/NeurIPS%202024/95473.png?t=1731747440.8958092", "project": "", "author_site": "Ruosen Li, Ruochen Li, Barry Wang, Xinya Du", "tldr": "", "abstract": "To evaluate Large Language Models (LLMs) for question answering (QA), traditional methods typically focus on directly assessing the immediate responses generated by the models based on the given question and context. In the common use case of humans seeking AI assistant\u2019s help in finding information, these non-interactive evaluations do not account for the dynamic nature of human-model conversations, and interaction-aware evaluations have shown that accurate models are not necessarily preferred by humans Lee et al. Recent works in human-computer interaction (HCI) have employed human evaluators to conduct interactions and evaluations, but they are often prohibitively expensive and time-consuming to scale. In this work, we introduce an automated evaluation framework IQA-EVAL to Interactive Question Answering Evaluations, more specifically, we introduce LLM-based Evaluation Agent (LEA) that can: (1) simulate human behaviors to generate interactions with IQA models; (2) automatically evaluate the generated interactions. Moreover, we propose assigning personas to LEAs to better simulate groups of real human evaluators. We show that: (1) our evaluation framework with GPT-4 (or Claude) as the backbone model achieves a high correlation with human evaluations on the IQA task; (2) assigning personas to LEA to better represent the crowd further significantly improves correlations. Finally, we use our automated metric to evaluate five recent LLMs with over 1000 questions from complex and ambiguous question answering tasks, which would cost $5k if evaluated by humans.", "keywords": "Interactive Question Answering;Automatic Evaluation;Question Answering;LLM Evaluation", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/d2d2f9646bbacbc6b09bd589a94c9d4acae9f9bf.zip", "author": "Ruosen Li;Ruochen Li;Barry Wang;Xinya Du", "authorids": "~Ruosen_Li1;~Ruochen_Li3;~Barry_Wang1;~Xinya_Du1", "gender": "M;F;M;M", "homepage": ";;https://BarryW.XYZ;https://xinyadu.github.io", "dblp": "351/0775;;320/5158;200/8114", "google_scholar": "tN-RVAkAAAAJ;;2S7N5bQAAAAJ;R-lKQqkAAAAJ", "orcid": ";;0009-0008-3810-8494;", "linkedin": "ruosenli/?locale=en_US;katherine-li-7144401b7;barry-w/;", "or_profile": "~Ruosen_Li1;~Ruochen_Li3;~Barry_Wang1;~Xinya_Du1", "aff": "University of Texas at Dallas;University of Texas at Dallas;Cornell University;University of Texas at Dallas", "aff_domain": "utd.edu;utdallas.edu;cornell.edu;utdallas.edu", "position": "PhD student;PhD student;MS student;Assistant Professor", "bibtex": "@inproceedings{\nli2024iqaeval,\ntitle={{IQA}-{EVAL}: Automatic Evaluation of Human-Model Interactive Question Answering},\nauthor={Ruosen Li and Ruochen Li and Barry Wang and Xinya Du},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=MzM99vV5Rx}\n}", "github": "", "reviewers": "RXxM;unmC;RAi2;JewS", "pdf_size": 1169837, "rating": "5;5;6;7", "confidence": "3;4;4;3", "soundness": "2;3;3;3", "novelty": "2;2;2;3", "presentation": "3;3;1;3", "wc_summary": "71;77;142;29", "wc_strengths": "35;42;58;34", "wc_weaknesses": "79;43;148;158", "wc_questions": "242;73;75;2", "wc_limitations": "1;38;15;5", "wc_review": "428;273;438;228", "wc_reply_reviewers": "20;45;0;25", "wc_reply_authors": "132;460;613;0", "reply_reviewers": "1;1;0;1", "reply_authors": "4;4;5;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 79.75, 40.41890028192257 ], "wc_strengths_avg": [ 42.25, 9.60143218483576 ], "wc_weaknesses_avg": [ 107.0, 47.859168400631454 ], "wc_questions_avg": [ 98.0, 88.1844657521947 ], "wc_limitations_avg": [ 14.75, 14.359230480774379 ], "wc_review_avg": [ 341.75, 92.69405320731207 ], "wc_reply_reviewers_avg": [ 22.5, 16.00781059358212 ], "wc_reply_authors_avg": [ 301.25, 245.8590805725914 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.5, 1.5 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14480447089278005376&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "utd.edu;utdallas.edu;cornell.edu;utdallas.edu", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Texas at Dallas;Cornell University", "aff_unique_dep": ";", "aff_unique_url": "https://www.utdallas.edu;https://www.cornell.edu", "aff_unique_abbr": "UT Dallas;Cornell", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Dallas;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "OptEx: Expediting First-Order Optimization with Approximately Parallelized Iterations", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95472", "id": "MzNjnbgcPN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MzNjnbgcPN", "openreview": "https://openreview.net/forum?id=MzNjnbgcPN", "poster": "/media/PosterPDFs/NeurIPS%202024/95472.png?t=1733470501.7524137", "project": "", "author_site": "Yao Shu, Jiongfeng Fang, Ying He, Fei Yu", "tldr": "", "abstract": "First-order optimization (FOO) algorithms are pivotal in numerous computational domains, such as reinforcement learning and deep learning. However, their application to complex tasks often entails significant optimization inefficiency due to their need of many sequential iterations for convergence. In response, we introduce first-order optimization expedited with approximately parallelized iterations (OptEx), the first general framework that enhances the time efficiency of FOO by leveraging parallel computing to directly mitigate its requirement of many sequential iterations for convergence. To achieve this, OptEx utilizes a kernelized gradient estimation that is based on the history of evaluated gradients to predict the gradients required by the next few sequential iterations in FOO, which helps to break the inherent iterative dependency and hence enables the approximate parallelization of iterations in FOO. We further establish theoretical guarantees for the estimation error of our kernelized gradient estimation and the iteration complexity of SGD-based OptEx, confirming that the estimation error diminishes to zero as the history of gradients accumulates and that our SGD-based OptEx enjoys an effective acceleration rate of \u0398(\u221aN ) over standard SGD given parallelism of N, in terms of the sequential iterations required for convergence. Finally, we provide extensive empirical studies, including synthetic functions, reinforcement learning tasks, and neural network training on various datasets, to underscore the substantial efficiency improvements achieved by our OptEx in practice.", "keywords": "Iteration Parallelization;First-Order Optimization;Iteration Complexity;Time Efficiency", "primary_area": "optimization", "supplementary_material": "/attachment/1e1a4b2c9a6bf1c2bfdff74537f1486c7bae6864.zip", "author": "Yao Shu;Jiongfeng Fang;Ying Tiffany He;Fei Yu", "authorids": "~Yao_Shu1;~Jiongfeng_Fang1;~Ying_Tiffany_He1;~Fei_Yu13", "gender": "M;;F;M", "homepage": "https://yao.notion.site;https://github.com/alex-llljdk;;https://csse.szu.edu.cn/pages/user/index?id=1199", "dblp": "44/1338;;;16/6654.html", "google_scholar": "https://scholar.google.com.au/citations?hl=en;;https://scholar.google.ca/citations?user=wSdlXDYAAAAJ;", "orcid": ";;;", "linkedin": "yao-shu-a5640514b;;;", "or_profile": "~Yao_Shu1;~Jiongfeng_Fang1;~Ying_Tiffany_He1;~Fei_Yu13", "aff": "Guangming Lab;Shenzhen University;Shenzhen University;GM Lab", "aff_domain": "gml.ac.cn;szu.edu.cn;szu.edu.cn;gml.ac.cn", "position": "Researcher;MS student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nshu2024optex,\ntitle={OptEx: Expediting First-Order Optimization with Approximately Parallelized Iterations},\nauthor={Yao Shu and Jiongfeng Fang and Ying Tiffany He and Fei Yu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=MzNjnbgcPN}\n}", "github": "", "reviewers": "ieTR;1m6n;YWBT;RYzW", "pdf_size": 1143627, "rating": "3;6;7;7", "confidence": "4;3;3;3", "soundness": "2;3;4;3", "novelty": "2;3;3;3", "presentation": "2;3;4;3", "wc_summary": "59;30;58;111", "wc_strengths": "14;69;116;96", "wc_weaknesses": "113;74;49;299", "wc_questions": "45;2;15;251", "wc_limitations": "1;1;6;31", "wc_review": "232;176;244;788", "wc_reply_reviewers": "0;14;20;439", "wc_reply_authors": "74;40;41;670", "reply_reviewers": "0;1;1;2", "reply_authors": "2;2;2;3", "rating_avg": [ 5.75, 1.6393596310755 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 64.5, 29.261749776799064 ], "wc_strengths_avg": [ 73.75, 38.316934898292686 ], "wc_weaknesses_avg": [ 133.75, 98.09529805245509 ], "wc_questions_avg": [ 78.25, 100.94893511077767 ], "wc_limitations_avg": [ 9.75, 12.43734296383275 ], "wc_review_avg": [ 360.0, 248.43510218968655 ], "wc_reply_reviewers_avg": [ 118.25, 185.3272443544122 ], "wc_reply_authors_avg": [ 206.25, 268.09548205816526 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.9684959969581861, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:1HPOT_ciGAUJ:scholar.google.com/&scioq=OptEx:+Expediting+First-Order+Optimization+with+Approximately+Parallelized+Iterations&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "gml.ac.cn;szu.edu.cn;szu.edu.cn;gml.ac.cn", "author_num": 4, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Guangming Lab;Shenzhen University;GM Research Laboratory", "aff_unique_dep": ";;", "aff_unique_url": ";https://www.szu.edu.cn;https://www.gm.com/research", "aff_unique_abbr": ";SZU;GM Lab", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "China;United States" }, { "title": "MO-DDN: A Coarse-to-Fine Attribute-based Exploration Agent for Multi-Object Demand-driven Navigation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95471", "id": "MzTdZhMjeC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=MzTdZhMjeC", "openreview": "https://openreview.net/forum?id=MzTdZhMjeC", "poster": "/media/PosterPDFs/NeurIPS%202024/95471.png?t=1731224619.1618464", "project": "", "author_site": "Hongcheng Wang, Peiqi Liu, Wenzhe Cai, Mingdong Wu, Zhengyu Qian, Hao Dong", "tldr": "", "abstract": "The process of satisfying daily demands is a fundamental aspect of humans' daily lives. With the advancement of embodied AI, robots are increasingly capable of satisfying human demands. Demand-driven navigation (DDN) is a task in which an agent must locate an object to satisfy a specified demand instruction, such as \"I am thirsty.\" The previous study typically assumes that each demand instruction requires only one object to be fulfilled and does not consider individual preferences. However, the realistic human demand may involve multiple objects. In this paper, we introduce the Multi-object Demand-driven Navigation (MO-DDN) benchmark, which addresses these nuanced aspects, including multi-object search and personal preferences, thus making the MO-DDN task more reflective of real-life scenarios compared to DDN. Building upon previous work, we employ the concept of ``attribute'' to tackle this new task. However, instead of solely relying on attribute features in an end-to-end manner like DDN, we propose a modular method that involves constructing a coarse-to-fine attribute-based exploration agent (C2FAgent). Our experimental results illustrate that this coarse-to-fine exploration strategy capitalizes on the advantages of attributes at various decision-making levels, resulting in superior performance compared to baseline methods. Code and video can be found at https://sites.google.com/view/moddn.", "keywords": "Mudolar Object Navigation;Demand-driven Navigation;Attribute Learning", "primary_area": "robotics", "supplementary_material": "/attachment/d6c640cadeb1f93b7e357a68cefe5117b65fb4f6.zip", "author": "Hongcheng Wang;Peiqi Liu;Wenzhe Cai;Mingdong Wu;Zhengyu Qian;Hao Dong", "authorids": "~Hongcheng_Wang6;~Peiqi_Liu2;~Wenzhe_Cai1;~Mingdong_Wu1;~Zhengyu_Qian1;~Hao_Dong3", "gender": "M;M;M;M;M;M", "homepage": ";https://wzcai99.github.io/;https://aaronanima.github.io/;https://github.com/qzyqzyqzyqzy;https://zsdonghao.github.io;http://peiqil.com", "dblp": "76/1170;261/2706;315/5136;;14/1525-3.html;", "google_scholar": ";NHQcCyAAAAAJ;https://scholar.google.com/citations?hl=en;;xLFL4sMAAAAJ;", "orcid": ";;;;0000-0003-2261-9122;", "linkedin": "%E9%B8%BF%E9%93%96-%E7%8E%8B-4a66451b9/;;;;;peiqi-liu-780703317/", "or_profile": "~Hongcheng_Wang6;~Wenzhe_Cai1;~Mingdong_Wu1;~Zhengyu_Qian1;~Hao_Dong3;~\u5218\u6c9b\u6dc71", "aff": "Peking University;Southeast University;Center on Frontiers of Computing Studies,Peking University;Peking University;Peking University;Peking University", "aff_domain": "pku.edu.cn;seu.edu.cn;pku.edu.cn;stu.pku.edu.cn;pku.edu.cn;pku.edu.cn", "position": "PhD student;PhD student;PhD student;Undergrad student;Assistant Professor;Undergrad student", "bibtex": "@inproceedings{\nwang2024moddn,\ntitle={{MO}-{DDN}: A Coarse-to-Fine Attribute-based Exploration Agent for Multi-Object Demand-driven Navigation},\nauthor={Hongcheng Wang and Peiqi Liu and Wenzhe Cai and Mingdong Wu and Zhengyu Qian and Hao Dong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=MzTdZhMjeC}\n}", "github": "", "reviewers": "9oyu;AuxK;xf5s;XjVA", "pdf_size": 19532074, "rating": "4;5;5;5", "confidence": "1;3;4;4", "soundness": "2;3;2;2", "novelty": "1;2;3;2", "presentation": "1;2;3;2", "wc_summary": "79;15;120;227", "wc_strengths": "93;54;46;77", "wc_weaknesses": "163;61;115;246", "wc_questions": "160;30;129;4", "wc_limitations": "1;1;8;1", "wc_review": "496;161;418;555", "wc_reply_reviewers": "0;13;18;655", "wc_reply_authors": "107;200;147;1538", "reply_reviewers": "0;1;1;3", "reply_authors": "2;3;3;6", "rating_avg": [ 4.75, 0.4330127018922193 ], "confidence_avg": [ 3.0, 1.224744871391589 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 110.25, 77.09531438420885 ], "wc_strengths_avg": [ 67.5, 18.607794065928395 ], "wc_weaknesses_avg": [ 146.25, 67.9609262738524 ], "wc_questions_avg": [ 80.75, 65.33519342590179 ], "wc_limitations_avg": [ 2.75, 3.031088913245535 ], "wc_review_avg": [ 407.5, 150.3836759758186 ], "wc_reply_reviewers_avg": [ 171.5, 279.2261628143036 ], "wc_reply_authors_avg": [ 498.0, 601.3497318532702 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 3.5, 1.5 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.9428090415820632, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:1GDIi32hticJ:scholar.google.com/&scioq=MO-DDN:+A+Coarse-to-Fine+Attribute-based+Exploration+Agent+for+Multi-Object+Demand-driven+Navigation&hl=en&as_sdt=0,34", "gs_version_total": 3, "email": "pku.edu.cn;seu.edu.cn;pku.edu.cn;stu.pku.edu.cn;pku.edu.cn;pku.edu.cn", "author_num": 6, "aff_unique_index": "0;1;0;0;0;0", "aff_unique_norm": "Peking University;Southeast University", "aff_unique_dep": ";", "aff_unique_url": "http://www.pku.edu.cn;https://www.seu.edu.cn/", "aff_unique_abbr": "Peking U;SEU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "DiffCut: Catalyzing Zero-Shot Semantic Segmentation with Diffusion Features and Recursive Normalized Cut", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95470", "id": "N0xNf9Qqmc", "proceeding": "", "pdf": "https://openreview.net/pdf?id=N0xNf9Qqmc", "openreview": "https://openreview.net/forum?id=N0xNf9Qqmc", "poster": "/media/PosterPDFs/NeurIPS%202024/95470.png?t=1733480213.156115", "project": "", "author_site": "Paul Couairon, Mustafa Shukor, Jean-Emmanuel HAUGEARD, Matthieu Cord, Nicolas THOME", "tldr": "", "abstract": "Foundation models have emerged as powerful tools across various domains including language, vision, and multimodal tasks. While prior works have addressed unsupervised semantic segmentation, they significantly lag behind supervised models. In this paper, we use a diffusion UNet encoder as a foundation vision encoder and introduce DiffCut, an unsupervised zero-shot segmentation method that solely harnesses the output features from the final self-attention block. Through extensive experimentation, we demonstrate that using these diffusion features in a graph based segmentation algorithm, significantly outperforms previous state-of-the-art methods on zero-shot segmentation. Specifically, we leverage a recursive Normalized Cut algorithm that regulates the granularity of detected objects and produces well-defined segmentation maps that precisely capture intricate image details. Our work highlights the remarkably accurate semantic knowledge embedded within diffusion UNet encoders that could then serve as foundation vision encoders for downstream tasks.", "keywords": "Segmentation;Image Segmentation;Zero-Shot Segmentation;Diffusion Features;Normalized Cut", "primary_area": "machine_vision", "supplementary_material": "", "author": "Paul Couairon;Mustafa Shukor;Jean-Emmanuel HAUGEARD;Matthieu Cord;Nicolas THOME", "authorids": "~Paul_Couairon1;~Mustafa_Shukor1;~Jean-Emmanuel_HAUGEARD1;~Matthieu_Cord1;~Nicolas_THOME2", "gender": "M;M;M;M;", "homepage": "https://www.linkedin.com/in/paul-couairon/;https://twitter.com/MustafaShukor1;;https://cord.isir.upmc.fr/;", "dblp": ";;92/6849;68/3117;", "google_scholar": ";lhp9mRgAAAAJ;;SpAotDcAAAAJ;", "orcid": ";;;;", "linkedin": ";;jean-emmanuel-haugeard-5a33345b;;", "or_profile": "~Paul_Couairon1;~Mustafa_Shukor1;~Jean-Emmanuel_HAUGEARD1;~Matthieu_Cord1;~Nicolas_THOME2", "aff": "Universit\u00e9 Pierre et Marie Curie - Paris 6, Sorbonne Universit\u00e9 - Facult\u00e9 des Sciences (Paris VI);Universit\u00e9 Pierre et Marie Curie - Paris 6, Sorbonne Universit\u00e9 - Facult\u00e9 des Sciences (Paris VI);Thales SIX;Sorbonne Universit\u00e9;", "aff_domain": "isir.upmc.fr;isir.upmc.fr;thalesgroup.com;isir.upmc.fr;", "position": "PhD student;PhD student;Researcher;Full Professor;", "bibtex": "@inproceedings{\ncouairon2024diffcut,\ntitle={DiffCut: Catalyzing Zero-Shot Semantic Segmentation with Diffusion Features and Recursive Normalized Cut},\nauthor={Paul Couairon and Mustafa Shukor and Jean-Emmanuel HAUGEARD and Matthieu Cord and Nicolas THOME},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=N0xNf9Qqmc}\n}", "github": "", "reviewers": "jdn9;2hwn;oADK;qiwu", "pdf_size": 42879441, "rating": "5;6;6;6", "confidence": "5;4;4;3", "soundness": "2;3;3;3", "novelty": "2;3;2;3", "presentation": "3;2;3;2", "wc_summary": "174;72;60;67", "wc_strengths": "95;87;116;59", "wc_weaknesses": "330;93;383;87", "wc_questions": "5;100;92;34", "wc_limitations": "6;106;15;81", "wc_review": "610;458;666;328", "wc_reply_reviewers": "56;82;45;29", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 93.25, 46.81546218932373 ], "wc_strengths_avg": [ 89.25, 20.42516829796024 ], "wc_weaknesses_avg": [ 223.25, 134.5778120642478 ], "wc_questions_avg": [ 57.75, 39.70122794070733 ], "wc_limitations_avg": [ 52.0, 42.54997062278657 ], "wc_review_avg": [ 515.5, 132.3281904962053 ], "wc_reply_reviewers_avg": [ 53.0, 19.300259065618782 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13659240952167031109&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "isir.upmc.fr;isir.upmc.fr;thalesgroup.com;isir.upmc.fr;", "author_num": 5, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Universit\u00e9 Pierre et Marie Curie - Paris 6;Thales;Sorbonne Universit\u00e9", "aff_unique_dep": "Facult\u00e9 des Sciences;SIX;", "aff_unique_url": "https://www.upmc.fr;https://www.thalesgroup.com;https://www.sorbonne-universite.fr", "aff_unique_abbr": "UPMC;Thales;Sorbonne U", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Paris;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "France" }, { "title": "Mirror and Preconditioned Gradient Descent in Wasserstein Space", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95469", "id": "N12B6wvA55", "proceeding": "", "pdf": "https://openreview.net/pdf?id=N12B6wvA55", "openreview": "https://openreview.net/forum?id=N12B6wvA55", "poster": "/media/PosterPDFs/NeurIPS%202024/95469.png?t=1731700146.2484825", "project": "", "author_site": "Cl\u00e9ment Bonet, Th\u00e9o Uscidda, Adam David, Pierre-Cyril Aubin-Frankowski, Anna Korba", "tldr": "", "abstract": "As the problem of minimizing functionals on the Wasserstein space encompasses many applications in machine learning, different optimization algorithms on $\\mathbb{R}^d$ have received their counterpart analog on the Wasserstein space. We focus here on lifting two explicit algorithms: mirror descent and preconditioned gradient descent. These algorithms have been introduced to better capture the geometry of the function to minimize and are provably convergent under appropriate (namely relative) smoothness and convexity conditions. Adapting these notions to the Wasserstein space, we prove guarantees of convergence of some Wasserstein-gradient-based discrete-time schemes for new pairings of objective functionals and regularizers. The difficulty here is to carefully select along which curves the functionals should be smooth and convex. We illustrate the advantages of adapting the geometry induced by the regularizer on ill conditioned optimization tasks, and showcase the improvement of choosing different discrepancies and geometries in a computational biology task of aligning single-cells.", "keywords": "wasserstein gradient flows;mirror descent;preconditioned gradient descent", "primary_area": "optimization", "supplementary_material": "/attachment/bcd8cab5f23fe480a23bcb02ad1f8517773ffed4.zip", "author": "Cl\u00e9ment Bonet;Th\u00e9o Uscidda;Adam David;Pierre-Cyril Aubin-Frankowski;Anna Korba", "authorids": "~Cl\u00e9ment_Bonet1;~Th\u00e9o_Uscidda1;~Adam_David1;~Pierre-Cyril_Aubin-Frankowski1;~Anna_Korba2", "gender": "M;M;;;", "homepage": "https://clbonet.github.io;https://theouscidda6.github.io/;;https://pcaubin.github.io/;", "dblp": "304/8220;;;;182/8959.html", "google_scholar": "wjCPk5kAAAAJ;xnQZonMAAAAJ;;60oOCO0AAAAJ;https://scholar.google.fr/citations?user=dbH6E3kAAAAJ", "orcid": "0000-0002-3390-1169;;;;", "linkedin": "cl\u00e9ment-bonet-2840a9153;th\u00e9o-uscidda-926335174/;adam-david-4689081a7/;;", "or_profile": "~Cl\u00e9ment_Bonet1;~Th\u00e9o_Uscidda1;~Adam_David1;~Pierre-Cyril_Aubin-Frankowski1;~Anna_Korba2", "aff": "Ecole Nationale de la Statistique et de l'Administration Economique;Ecole Nationale de la Statistique et de l'Administration Economique;Technische Universit\u00e4t Berlin;Technische Universit\u00e4t Wien;Ensae ParisTech", "aff_domain": "ensae.fr;ensae.fr;tu-berlin.de;tuwien.ac.at;ensae.fr", "position": "Postdoc;PhD student;PhD student;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nbonet2024mirror,\ntitle={Mirror and Preconditioned Gradient Descent in Wasserstein Space},\nauthor={Cl{\\'e}ment Bonet and Th{\\'e}o Uscidda and Adam David and Pierre-Cyril Aubin-Frankowski and Anna Korba},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=N12B6wvA55}\n}", "github": "", "reviewers": "mqrt;KGf7;PBU2;3j2f;P753", "pdf_size": 3659958, "rating": "6;6;7;7;9", "confidence": "4;3;3;3;5", "soundness": "4;4;4;4;4", "novelty": "3;3;4;3;4", "presentation": "3;2;3;3;4", "wc_summary": "71;39;79;36;73", "wc_strengths": "118;46;77;106;56", "wc_weaknesses": "253;152;179;22;46", "wc_questions": "12;1;237;90;55", "wc_limitations": "9;1;45;6;7", "wc_review": "463;239;617;260;237", "wc_reply_reviewers": "0;26;110;0;28", "wc_reply_authors": "0;0;197;0;11", "reply_reviewers": "0;1;2;0;1", "reply_authors": "1;1;2;1;2", "rating_avg": [ 7.0, 1.0954451150103321 ], "confidence_avg": [ 3.6, 0.8 ], "soundness_avg": [ 4.0, 0.0 ], "novelty_avg": [ 3.4, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 59.6, 18.260339536821323 ], "wc_strengths_avg": [ 80.6, 27.782008566696543 ], "wc_weaknesses_avg": [ 130.4, 85.71254283942345 ], "wc_questions_avg": [ 79.0, 85.12813870865497 ], "wc_limitations_avg": [ 13.6, 15.919798993705918 ], "wc_review_avg": [ 363.2, 152.56264287170694 ], "wc_reply_reviewers_avg": [ 32.8, 40.44947465666272 ], "wc_reply_authors_avg": [ 41.6, 77.81670771755896 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.6846531968814578, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10724275156738900370&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "ensae.fr;ensae.fr;tu-berlin.de;tuwien.ac.at;ensae.fr", "author_num": 5, "aff_unique_index": "0;0;1;2;3", "aff_unique_norm": "Ecole Nationale de la Statistique et de l'Administration Economique;Technische Universit\u00e4t Berlin;Technische Universit\u00e4t Wien;ENSAE ParisTech", "aff_unique_dep": ";;;", "aff_unique_url": "https://ensae.fr;https://www.tu-berlin.de;https://www.tuwien.ac.at;https://www.ensae.fr", "aff_unique_abbr": "ENSAE;TU Berlin;TU Wien;Ensae", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;2;0", "aff_country_unique": "France;Germany;Austria" }, { "title": "Towards Global Optimal Visual In-Context Learning Prompt Selection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95468", "id": "N2PwbxJ3o6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=N2PwbxJ3o6", "openreview": "https://openreview.net/forum?id=N2PwbxJ3o6", "poster": "/media/PosterPDFs/NeurIPS%202024/95468.png?t=1732849956.639082", "project": "", "author_site": "Chengming Xu, Chen Liu, Yikai Wang, Yuan Yao, Yanwei Fu", "tldr": "", "abstract": "Visual In-Context Learning (VICL) is a prevailing way to transfer visual foundation models to new tasks by leveraging contextual information contained in in-context examples to enhance learning and prediction of query sample. The fundamental problem in VICL is how to select the best prompt to activate its power as much as possible, which is equivalent to the ranking problem to test the in-context behavior of each candidate in the alternative set and select the best one. To utilize more appropriate ranking metric and leverage more comprehensive information among the alternative set, we propose a novel in-context example selection framework to approximately identify the global optimal prompt, i.e. choosing the best performing in-context examples from all alternatives for each query sample. Our method, dubbed Partial2Global, adopts a transformer-based list-wise ranker to provide a more comprehensive comparison within several alternatives, and a consistency-aware ranking aggregator to generate globally consistent ranking. The effectiveness of Partial2Global is validated through experiments on foreground segmentation, single object detection and image colorization, demonstrating that Partial2Global selects consistently better in-context examples compared with other methods, and thus establish the new state-of-the-arts.", "keywords": "in-context learning", "primary_area": "machine_vision", "supplementary_material": "", "author": "Chengming Xu;Chen Liu;Yikai Wang;Yuan Yao;Yanwei Fu", "authorids": "~Chengming_Xu1;~Chen_Liu4;~Yikai_Wang1;~Yuan_Yao1;~Yanwei_Fu2", "gender": "M;M;M;Unspecified;M", "homepage": "https://chmxu.github.io;;https://yikai-wang.github.io;https://yao-lab.github.io/;http://yanweifu.github.io", "dblp": "115/9183-1;;85/9555-2;25/4120-11.html;63/9065", "google_scholar": "pjcYzvYAAAAJ;https://scholar.google.com/citations?hl=en;x8HOE_cAAAAJ;OOlHr-wAAAAJ;https://scholar.google.co.uk/citations?user=Vg54TcsAAAAJ", "orcid": "0000-0003-3891-2227;;0000-0001-6107-5063;0000-0001-5814-1162;0000-0002-6595-6893", "linkedin": ";;;;", "or_profile": "~Chengming_Xu1;~Chen_Liu4;~Yikai_Wang1;~Yuan_Yao1;~Yanwei_Fu2", "aff": "Tencent;Hong Kong University of Science and Technology;Fudan University;Hong Kong University of Science and Technology;Fudan University,", "aff_domain": "tencent.com;connect.ust.hk;fudan.edu.cn;ust.hk;fudan.edu.cn", "position": "Researcher;PhD student;PhD student;Full Professor;Professor", "bibtex": "@inproceedings{\nxu2024towards,\ntitle={Towards Global Optimal Visual In-Context Learning Prompt Selection},\nauthor={Chengming Xu and Chen Liu and Yikai Wang and Yuan Yao and Yanwei Fu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=N2PwbxJ3o6}\n}", "github": "", "reviewers": "dbja;dHhp;PwDC;crFX", "pdf_size": 1631583, "rating": "5;5;6;6", "confidence": "5;4;3;3", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;2", "wc_summary": "59;78;64;47", "wc_strengths": "60;99;37;99", "wc_weaknesses": "105;159;19;101", "wc_questions": "3;3;5;3", "wc_limitations": "1;13;5;1", "wc_review": "228;352;130;251", "wc_reply_reviewers": "10;32;0;31", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 62.0, 11.113055385446435 ], "wc_strengths_avg": [ 73.75, 26.52710877574109 ], "wc_weaknesses_avg": [ 96.0, 50.00999900019995 ], "wc_questions_avg": [ 3.5, 0.8660254037844386 ], "wc_limitations_avg": [ 5.0, 4.898979485566356 ], "wc_review_avg": [ 240.25, 78.9125306906324 ], "wc_reply_reviewers_avg": [ 18.25, 13.718144918318949 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.9045340337332909, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4793881757419904005&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "tencent.com;connect.ust.hk;fudan.edu.cn;ust.hk;fudan.edu.cn", "author_num": 5, "aff_unique_index": "0;1;2;1;2", "aff_unique_norm": "Tencent;Hong Kong University of Science and Technology;Fudan University", "aff_unique_dep": "Tencent Holdings Limited;;", "aff_unique_url": "https://www.tencent.com;https://www.ust.hk;https://www.fudan.edu.cn", "aff_unique_abbr": "Tencent;HKUST;Fudan", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Geometry of naturalistic object representations in recurrent neural network models of working memory", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95467", "id": "N2RaC7LO6k", "proceeding": "", "pdf": "https://openreview.net/pdf?id=N2RaC7LO6k", "openreview": "https://openreview.net/forum?id=N2RaC7LO6k", "poster": "/media/PosterPDFs/NeurIPS%202024/95467.png?t=1731472787.3892217", "project": "", "author_site": "Xiaoxuan Lei, Takuya Ito, Pouya Bashivan", "tldr": "", "abstract": "Working memory is a central cognitive ability crucial for intelligent decision-making. Recent experimental and computational work studying working memory has primarily used categorical (i.e., one-hot) inputs, rather than ecologically-relevant, multidimensional naturalistic ones. Moreover, studies have primarily investigated working memory during single or few number of cognitive tasks. As a result, an understanding of how naturalistic object information is maintained in working memory in neural networks is still lacking. To bridge this gap, we developed sensory-cognitive models, comprising of a convolutional neural network (CNN) coupled with a recurrent neural network (RNN), and trained them on nine distinct N-back tasks using naturalistic stimuli. By examining the RNN\u2019s latent space, we found that: 1) Multi-task RNNs represent both task-relevant and irrelevant information simultaneously while performing tasks; 2) While the latent subspaces used to maintain specific object properties in vanilla RNNs are largely shared across tasks, they are highly task-specific in gated RNNs such as GRU and LSTM; 3) Surprisingly, RNNs embed objects in new representational spaces in which individual object features are less orthogonalized relative to the perceptual space; 4) Interestingly, the transformation of WM encodings (i.e., embedding of visual inputs in the RNN latent space) into memory was shared across stimuli, yet the transformations governing the retention of a memory in the face of incoming distractor stimuli were distinct across time. Our findings indicate that goal-driven RNNs employ chronological memory subspaces to track information over short time spans, enabling testable predictions with neural data.", "keywords": "Working memory;geometry;recurrent neural networks", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "", "author": "Xiaoxuan Lei;Takuya Ito;Pouya Bashivan", "authorids": "~Xiaoxuan_Lei1;~Takuya_Ito1;~Pouya_Bashivan1", "gender": "F;M;M", "homepage": "https://rainyfields.github.io/;https://ito-takuya.github.io;http://www.bashivanlab.org", "dblp": ";32/3223;172/1407", "google_scholar": ";Ym08_RMAAAAJ;B0rM8BcAAAAJ", "orcid": ";0000-0002-2060-4608;", "linkedin": "xiaoxuanlei/;;", "or_profile": "~Xiaoxuan_Lei1;~Takuya_Ito1;~Pouya_Bashivan2", "aff": "McGill University;International Business Machines;McGill University", "aff_domain": "mail.mcgill.ca;ibm.com;mcgill.ca", "position": "PhD student;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nlei2024geometry,\ntitle={Geometry of naturalistic object representations in recurrent neural network models of working memory},\nauthor={Xiaoxuan Lei and Takuya Ito and Pouya Bashivan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=N2RaC7LO6k}\n}", "github": "", "reviewers": "VPbf;CRdT;8PnP;YsNL", "pdf_size": 30664221, "rating": "5;6;7;7", "confidence": "4;3;4;4", "soundness": "2;2;3;3", "novelty": "3;2;4;3", "presentation": "1;2;2;2", "wc_summary": "53;74;110;424", "wc_strengths": "21;69;87;106", "wc_weaknesses": "151;69;43;913", "wc_questions": "22;74;374;10", "wc_limitations": "40;27;1;41", "wc_review": "287;313;615;1494", "wc_reply_reviewers": "1020;116;567;778", "wc_reply_authors": "1315;26;1175;423", "reply_reviewers": "4;1;3;1", "reply_authors": "5;2;5;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_avg": [ 165.25, 150.77362998880142 ], "wc_strengths_avg": [ 70.75, 31.56243811875122 ], "wc_weaknesses_avg": [ 294.0, 359.59560620230053 ], "wc_questions_avg": [ 120.0, 148.60686390607938 ], "wc_limitations_avg": [ 27.25, 16.13032857693854 ], "wc_review_avg": [ 677.25, 488.85804432370753 ], "wc_reply_reviewers_avg": [ 620.25, 332.3359557736719 ], "wc_reply_authors_avg": [ 734.75, 531.5131113152337 ], "reply_reviewers_avg": [ 2.25, 1.299038105676658 ], "reply_authors_avg": [ 3.5, 1.5 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:PL81Nc6S5PIJ:scholar.google.com/&scioq=Geometry+of+naturalistic+object+representations+in+recurrent+neural+network+models+of+working+memory&hl=en&as_sdt=0,44", "gs_version_total": 5, "email": "mail.mcgill.ca;ibm.com;mcgill.ca", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "McGill University;International Business Machines Corporation", "aff_unique_dep": ";", "aff_unique_url": "https://www.mcgill.ca;https://www.ibm.com", "aff_unique_abbr": "McGill;IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Canada;United States" }, { "title": "Understanding Scaling Laws with Statistical and Approximation Theory for Transformer Neural Networks on Intrinsically Low-dimensional Data", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95466", "id": "N2wYPMpifA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=N2wYPMpifA", "openreview": "https://openreview.net/forum?id=N2wYPMpifA", "poster": "/media/PosterPDFs/NeurIPS%202024/95466.png?t=1731264640.4305763", "project": "", "author_site": "Alexander Havrilla, Wenjing Liao", "tldr": "", "abstract": "When training deep neural networks, a model's generalization error is often observed to follow a power scaling law dependent both on the model size and the data size. Perhaps the best known example of such scaling laws are for transformer-based large language models (**LLMs**), where networks with billions of parameters are trained on trillions of tokens of text. Yet, despite sustained widespread interest, a rigorous understanding of why transformer scaling laws exist is still missing. To answer this question, we establish novel statistical estimation and mathematical approximation theories for transformers when the input data are concentrated on a low-dimensional manifold. Our theory predicts a power law between the generalization error and both the training data size and the network size for transformers, where the power depends on the intrinsic dimension $d$ of the training data. Notably, the constructed model architecture is shallow, requiring only logarithmic depth in $d$. By leveraging low-dimensional data structures under a manifold hypothesis, we are able to explain transformer scaling laws in a way which respects the data geometry. Moreover, we test our theory with empirical observation by training LLMs on natural language datasets. We find the observed empirical scaling laws closely agree with our theoretical predictions. Taken together, these results rigorously show the intrinsic dimension of data to be a crucial quantity affecting transformer scaling laws in both theory and practice.", "keywords": "scaling laws;LLMs;approximation theory;statistical theory", "primary_area": "learning_theory", "supplementary_material": "", "author": "Alexander Havrilla;Wenjing Liao", "authorids": "~Alexander_Havrilla2;~Wenjing_Liao1", "gender": "M;F", "homepage": "https://dahoas.github.io/;https://people.math.gatech.edu/~wliao60/", "dblp": ";39/9829", "google_scholar": ";cihT3uEAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Alexander_Havrilla2;~Wenjing_Liao1", "aff": "Georgia Institute of Technology;Georgia Institute of Technology", "aff_domain": "gatech.edu;gatech.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nhavrilla2024understanding,\ntitle={Understanding Scaling Laws with Statistical and Approximation Theory for Transformer Neural Networks on Intrinsically Low-dimensional Data},\nauthor={Alexander Havrilla and Wenjing Liao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=N2wYPMpifA}\n}", "github": "", "reviewers": "e2w2;DzYy;1VDq;TAmh", "pdf_size": 1312578, "rating": "6;6;6;6", "confidence": "3;3;1;3", "soundness": "3;4;3;2", "novelty": "3;3;3;2", "presentation": "3;3;4;3", "wc_summary": "92;213;131;160", "wc_strengths": "21;86;86;53", "wc_weaknesses": "64;135;132;359", "wc_questions": "40;139;260;140", "wc_limitations": "58;36;1;14", "wc_review": "275;609;610;726", "wc_reply_reviewers": "17;0;790;1080", "wc_reply_authors": "0;724;1038;2105", "reply_reviewers": "1;0;2;4", "reply_authors": "1;2;3;7", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 2.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 149.0, 44.130488327232456 ], "wc_strengths_avg": [ 61.5, 26.98610753702727 ], "wc_weaknesses_avg": [ 172.5, 111.35640978408023 ], "wc_questions_avg": [ 144.75, 77.95952475483672 ], "wc_limitations_avg": [ 27.25, 21.718367802392518 ], "wc_review_avg": [ 555.0, 168.50964364095012 ], "wc_reply_reviewers_avg": [ 471.75, 474.49888039910064 ], "wc_reply_authors_avg": [ 966.75, 757.3345941524129 ], "reply_reviewers_avg": [ 1.75, 1.479019945774904 ], "reply_authors_avg": [ 3.25, 2.277608394786075 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16006205031937154024&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "gatech.edu;gatech.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Georgia Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.gatech.edu", "aff_unique_abbr": "Georgia Tech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "AvaTaR: Optimizing LLM Agents for Tool Usage via Contrastive Reasoning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95465", "id": "N4quRxE19p", "proceeding": "", "pdf": "https://openreview.net/pdf?id=N4quRxE19p", "openreview": "https://openreview.net/forum?id=N4quRxE19p", "poster": "", "project": "", "author_site": "Shirley Wu, Shiyu Zhao, Qian Huang, Kexin Huang, Michihiro Yasunaga, Kaidi Cao, Vassilis Ioannidis, Karthik Subbian, Jure Leskovec, James Zou", "tldr": "", "abstract": "Large language model (LLM) agents have demonstrated impressive capabilities in utilizing external tools and knowledge to boost accuracy and reduce hallucinations. However, developing prompting techniques that enable LLM agents to effectively use these tools and knowledge remains a heuristic and labor-intensive task. Here, we introduce AvaTaR, a novel and automated framework that optimizes an LLM agent to effectively leverage provided tools, improving performance on a given task. During optimization, we design a comparator module to iteratively deliver insightful and comprehensive prompts to the LLM agent by contrastively reasoning between positive and negative examples sampled from training data. We demon- strate AvaTaR on four complex multimodal retrieval datasets featuring textual, visual, and relational information, and three general question-answering (QA) datasets. We find AvaTaR consistently outperforms state-of-the-art approaches across all seven tasks, exhibiting strong generalization ability when applied to novel cases and achieving an average relative improvement of 14% on the Hit@1 metric for the retrieval datasets and 13% for the QA datasets. Code and dataset are available at https://github.com/zou-group/avatar.", "keywords": "LLM agents;Tool utilization;Automatic prompt optimization;Complex retrieval;Question-Answering tasks", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Shirley Wu;Shiyu Zhao;Qian Huang;Kexin Huang;Michihiro Yasunaga;Kaidi Cao;Vassilis N. Ioannidis;Karthik Subbian;Jure Leskovec;James Zou", "authorids": "~Shirley_Wu1;~Shiyu_Zhao5;~Qian_Huang2;~Kexin_Huang1;~Michihiro_Yasunaga1;~Kaidi_Cao1;~Vassilis_N._Ioannidis1;~Karthik_Subbian1;~Jure_Leskovec1;~James_Zou1", "gender": "M;F;M;;M;;M;;;F", "homepage": "https://shiyu-zhao.netlify.app/;https://q-hwang.github.io/;https://www.kexinhuang.com/;;https://ai.stanford.edu/~kaidicao/;https://scholar.google.com/citations?hl=en&user=mjmiI4sAAAAJ&view_op=list_works&authuser=1;http://mailtosuka.googlepages.com;http://cs.stanford.edu/~jure/;;https://cs.stanford.edu/~shirwu", "dblp": ";07/4378.html;;202/1809;203/8207;;32/5843;l/JureLeskovec;;79/4173-2", "google_scholar": ";L3hkmG0AAAAJ;ogEXTOgAAAAJ;SieJYoEAAAAJ;https://scholar.google.com.hk/citations?user=4Zw1PJ8AAAAJ;;;Q_kKkIUAAAAJ;23ZXZvEAAAAJ;r2cVEucAAAAJ", "orcid": ";;;;;0000-0002-8367-0733;;0000-0002-5411-923X;;", "linkedin": "shiyu-zhao-1124a0266/;qian-huang-b20315149/;;;;;;leskovec/;;", "or_profile": "~Shiyu_Zhao5;~Qian_Huang2;~Kexin_Huang1;~Michihiro_Yasunaga1;~Kaidi_Cao1;~Vassilis_N._Ioannidis1;~Karthik_Subbian1;~Jure_Leskovec1;~James_Zou1;~Yingxin_Wu1", "aff": "Stanford University;Stanford University;Stanford University;Stanford University;Stanford University;Amazon Web Services;Amazon;Kumo.AI;Stanford University;Computer Science Department, Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu;amazon.com;amazon.com;kumo.ai;stanford.edu;cs.stanford.edu", "position": "MS student;PhD student;PhD student;PhD student;PhD student;Applied Scientist II;Researcher;Chief Scientist;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nwu2024avatar,\ntitle={AvaTaR: Optimizing {LLM} Agents for Tool Usage via Contrastive Reasoning},\nauthor={Shirley Wu and Shiyu Zhao and Qian Huang and Kexin Huang and Michihiro Yasunaga and Kaidi Cao and Vassilis N. Ioannidis and Karthik Subbian and Jure Leskovec and James Zou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=N4quRxE19p}\n}", "github": "", "reviewers": "JqLP;CkbF;vMWD", "pdf_size": 4231240, "rating": "4;6;6", "confidence": "5;4;3", "soundness": "2;3;3", "novelty": "2;3;3", "presentation": "3;3;3", "wc_summary": "47;86;69", "wc_strengths": "41;93;111", "wc_weaknesses": "497;88;94", "wc_questions": "58;94;8", "wc_limitations": "53;47;1", "wc_review": "696;408;283", "wc_reply_reviewers": "534;0;3", "wc_reply_authors": "883;264;0", "reply_reviewers": "1;0;1", "reply_authors": "3;2;1", "rating_avg": [ 5.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 67.33333333333333, 15.96524001977073 ], "wc_strengths_avg": [ 81.66666666666667, 29.67977238606942 ], "wc_weaknesses_avg": [ 226.33333333333334, 191.40590958019612 ], "wc_questions_avg": [ 53.333333333333336, 35.264083081168515 ], "wc_limitations_avg": [ 33.666666666666664, 23.22833518691246 ], "wc_review_avg": [ 462.3333333333333, 172.92837309771411 ], "wc_reply_reviewers_avg": [ 179.0, 251.0258950785755 ], "wc_reply_authors_avg": [ 382.3333333333333, 370.06696090782754 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1331455691983742636&as_sdt=8005&sciodt=0,7&hl=en", "gs_version_total": 3, "email": "stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu;amazon.com;amazon.com;kumo.ai;stanford.edu;cs.stanford.edu", "author_num": 10, "aff_unique_index": "0;0;0;0;0;1;1;2;0;0", "aff_unique_norm": "Stanford University;Amazon;Kumo.AI", "aff_unique_dep": ";Amazon Web Services;", "aff_unique_url": "https://www.stanford.edu;https://aws.amazon.com;https://www.kumo.ai", "aff_unique_abbr": "Stanford;AWS;Kumo.AI", "aff_campus_unique_index": "0;0;0;0;0;0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "On Divergence Measures for Training GFlowNets", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95464", "id": "N5H4z0Pzvn", "proceeding": "", "pdf": "https://openreview.net/pdf?id=N5H4z0Pzvn", "openreview": "https://openreview.net/forum?id=N5H4z0Pzvn", "poster": "/media/PosterPDFs/NeurIPS%202024/95464.png?t=1733184242.7037454", "project": "", "author_site": "Tiago Silva, Eliezer de Souza da Silva, Diego Mesquita", "tldr": "", "abstract": "Generative Flow Networks (GFlowNets) are amortized samplers of unnormalized distributions over compositional objects with applications to causal discovery, NLP, and drug design. Recently, it was shown that GFlowNets can be framed as a hierarchical variational inference (HVI) method for discrete distributions. Despite this equivalence, attempts to train GFlowNets using traditional divergence measures as learning objectives were unsuccessful. Instead, current approaches for training these models rely on minimizing the log-squared difference between a proposal (forward policy) and a target (backward policy) distributions. In this work, we first formally extend the relationship between GFlowNets and HVI to distributions on arbitrary measurable topological spaces. Then, we empirically show that the ineffectiveness of divergence-based learning of GFlowNets is due to large gradient variance of the corresponding stochastic objectives. To address this issue, we devise a collection of provably variance-reducing control variates for gradient estimation based on the REINFORCE leave-one-out estimator. Our experimental results suggest that the resulting algorithms often accelerate training convergence when compared against previous approaches. All in all, our work contributes by narrowing the gap between GFlowNet training and HVI, paving the way for algorithmic advancements inspired by the divergence minimization viewpoint.", "keywords": "GFlowNets;Variational Inference", "primary_area": "generative_models", "supplementary_material": "/attachment/d395552a57d47c507134ffb67eebc904e4fa7add.zip", "author": "Tiago Silva;Eliezer de Souza da Silva;Diego Mesquita", "authorids": "~Tiago_Silva4;~Eliezer_de_Souza_da_Silva1;~Diego_Mesquita1", "gender": "M;M;M", "homepage": "https://github.com/tiagodsilva;https://sereliezer.github.io/;https://weakly-informative.github.io", "dblp": ";152/1350;163/4293", "google_scholar": ";EK_dBqoAAAAJ;", "orcid": ";0000-0002-5462-9782;", "linkedin": ";eliezersilva/?originalSubdomain=no;", "or_profile": "~Tiago_Silva4;~Eliezer_de_Souza_da_Silva1;~Diego_Mesquita1", "aff": "Escola de Matem\u00e1tica Aplicada;Get\u00falio Vargas Foundation (FGV);Getulio Vargas Foundation", "aff_domain": "fgv.br;fgv.br;fgv.br", "position": "PhD student;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nsilva2024on,\ntitle={On Divergence Measures for Training {GF}lowNets},\nauthor={Tiago Silva and Eliezer de Souza da Silva and Diego Mesquita},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=N5H4z0Pzvn}\n}", "github": "", "reviewers": "NozV;Gn1e;3sBX;HRNm", "pdf_size": 869624, "rating": "4;5;7;7", "confidence": "5;4;4;4", "soundness": "2;2;3;3", "novelty": "2;2;3;4", "presentation": "2;2;4;4", "wc_summary": "103;112;123;127", "wc_strengths": "38;88;156;70", "wc_weaknesses": "90;356;335;2", "wc_questions": "99;2;105;50", "wc_limitations": "1;2;54;6", "wc_review": "331;560;773;255", "wc_reply_reviewers": "0;53;482;0", "wc_reply_authors": "0;19;751;0", "reply_reviewers": "0;1;2;0", "reply_authors": "1;2;3;1", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 1.0 ], "wc_summary_avg": [ 116.25, 9.41740410091868 ], "wc_strengths_avg": [ 88.0, 43.15089802078283 ], "wc_weaknesses_avg": [ 195.75, 153.1280101744942 ], "wc_questions_avg": [ 64.0, 41.67133307202926 ], "wc_limitations_avg": [ 15.75, 22.16275028059469 ], "wc_review_avg": [ 479.75, 203.1469603513673 ], "wc_reply_reviewers_avg": [ 133.75, 202.223113169588 ], "wc_reply_authors_avg": [ 192.5, 322.54340793139767 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.7777777777777777, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1042196021529620332&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "fgv.br;fgv.br;fgv.br", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Escola de Matem\u00e1tica Aplicada;Get\u00falio Vargas Foundation;Getulio Vargas Foundation", "aff_unique_dep": "Escola de Matem\u00e1tica Aplicada;;", "aff_unique_url": ";https://www.fgv.br;https://fgv.br", "aff_unique_abbr": ";FGV;FGV", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Brazil" }, { "title": "Natural Counterfactuals With Necessary Backtracking", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95463", "id": "N6zJ8DclC2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=N6zJ8DclC2", "openreview": "https://openreview.net/forum?id=N6zJ8DclC2", "poster": "", "project": "", "author_site": "GUANG-YUAN HAO, Jiji Zhang, Biwei Huang, Hao Wang, Kun Zhang", "tldr": "", "abstract": "Counterfactual reasoning is pivotal in human cognition and especially important for providing explanations and making decisions. While Judea Pearl's influential approach is theoretically elegant, its generation of a counterfactual scenario often requires too much deviation from the observed scenarios to be feasible, as we show using simple examples. To mitigate this difficulty, we propose a framework of natural counterfactuals and a method for generating counterfactuals that are more feasible with respect to the actual data distribution. Our methodology incorporates a certain amount of backtracking when needed, allowing changes in causally preceding variables to minimize deviations from realistic scenarios. Specifically, we introduce a novel optimization framework that permits but also controls the extent of backtracking with a \"naturalness'' criterion. Empirical experiments demonstrate the effectiveness of our method. The code is available at https://github.com/GuangyuanHao/natural_counterfactuals.", "keywords": "causal model;counterfactual reasoning;counterfactual generation;normalizing flows", "primary_area": "causal_inference", "supplementary_material": "", "author": "Guang-Yuan Hao;Jiji Zhang;Biwei Huang;Hao Wang;Kun Zhang", "authorids": "~Guang-Yuan_Hao1;~Jiji_Zhang1;~Biwei_Huang1;~Hao_Wang3;~Kun_Zhang1", "gender": ";M;F;M;M", "homepage": "https://github.com/GuangyuanHao;;;http://www.andrew.cmu.edu/user/kunz1/;http://www.wanghao.in", "dblp": ";99/3659;165/3288;96/3115-1;w/HaoWang-14", "google_scholar": ";5GAikocAAAAJ;;RGoypN4AAAAJ;NrOA9QoAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Guang-Yuan_Hao1;~Jiji_Zhang1;~Biwei_Huang1;~Kun_Zhang1;~Hao_Wang4", "aff": "Hong Kong University of Science and Technology;The Chinese University of Hong Kong;University of California, San Diego;Carnegie Mellon University;Rutgers University", "aff_domain": "ust.hk;cuhk.edu.hk;ucsd.edu;cmu.edu;cs.rutgers.edu", "position": "MS student;Professor;Assistant Professor;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nhao2024natural,\ntitle={Natural Counterfactuals With Necessary Backtracking},\nauthor={Guang-Yuan Hao and Jiji Zhang and Biwei Huang and Hao Wang and Kun Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=N6zJ8DclC2}\n}", "github": "", "reviewers": "JBox;v2As;DQof;fYWn", "pdf_size": 3419161, "rating": "5;6;6;7", "confidence": "4;4;3;3", "soundness": "2;2;3;3", "novelty": "2;3;3;4", "presentation": "3;3;3;3", "wc_summary": "38;47;81;227", "wc_strengths": "21;32;57;25", "wc_weaknesses": "286;16;73;98", "wc_questions": "246;822;39;129", "wc_limitations": "1;1;4;34", "wc_review": "592;918;254;513", "wc_reply_reviewers": "695;545;0;17", "wc_reply_authors": "2779;1261;0;0", "reply_reviewers": "3;2;0;1", "reply_authors": "8;4;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 98.25, 76.04398398295555 ], "wc_strengths_avg": [ 33.75, 13.988834833537782 ], "wc_weaknesses_avg": [ 118.25, 101.30739114200898 ], "wc_questions_avg": [ 309.0, 305.1384931469643 ], "wc_limitations_avg": [ 10.0, 13.910427743243556 ], "wc_review_avg": [ 569.25, 237.00777940818736 ], "wc_reply_reviewers_avg": [ 314.25, 310.3734645552032 ], "wc_reply_authors_avg": [ 1010.0, 1143.7396994071685 ], "reply_reviewers_avg": [ 1.5, 1.118033988749895 ], "reply_authors_avg": [ 3.5, 2.8722813232690143 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.7071067811865476, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:xJTi9kh2JnAJ:scholar.google.com/&scioq=Natural+Counterfactuals+With+Necessary+Backtracking&hl=en&as_sdt=0,5", "gs_version_total": 6, "email": "ust.hk;cuhk.edu.hk;ucsd.edu;cmu.edu;cs.rutgers.edu", "author_num": 5, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "Hong Kong University of Science and Technology;Chinese University of Hong Kong;University of California, San Diego;Carnegie Mellon University;Rutgers University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.ust.hk;https://www.cuhk.edu.hk;https://www.ucsd.edu;https://www.cmu.edu;https://www.rutgers.edu", "aff_unique_abbr": "HKUST;CUHK;UCSD;CMU;Rutgers", "aff_campus_unique_index": "0;0;1", "aff_campus_unique": "Hong Kong SAR;San Diego;", "aff_country_unique_index": "0;0;1;1;1", "aff_country_unique": "China;United States" }, { "title": "TFG: Unified Training-Free Guidance for Diffusion Models", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95462", "id": "N8YbGX98vc", "proceeding": "", "pdf": "https://openreview.net/pdf?id=N8YbGX98vc", "openreview": "https://openreview.net/forum?id=N8YbGX98vc", "poster": "", "project": "", "author_site": "Haotian Ye, Haowei Lin, Jiaqi Han, Minkai Xu, Sheng Liu, Yitao Liang, Jianzhu Ma, James Zou, Stefano Ermon", "tldr": "", "abstract": "Given an unconditional diffusion model and a predictor for a target property of interest (e.g., a classifier), the goal of training-free guidance is to generate samples with desirable target properties without additional training. Existing methods, though effective in various individual applications, often lack theoretical grounding and rigorous testing on extensive benchmarks. As a result, they could even fail on simple tasks, and applying them to a new problem becomes unavoidably difficult. This paper introduces a novel algorithmic framework encompassing existing methods as special cases, unifying the study of training-free guidance into the analysis of an algorithm-agnostic design space. Via theoretical and empirical investigation, we propose an efficient and effective hyper-parameter searching strategy that can be readily applied to any downstream task. We systematically benchmark across 7 diffusion models on 16 tasks with 40 targets, and improve performance by 8.5% on average. Our framework and benchmark offer a solid foundation for conditional generation in a training-free manner.", "keywords": "diffusion model;conditional generation;training-free guidance", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Haotian Ye;Haowei Lin;Jiaqi Han;Minkai Xu;Sheng Liu;Yitao Liang;Jianzhu Ma;James Zou;Stefano Ermon", "authorids": "~Haotian_Ye1;~Haowei_Lin1;~Jiaqi_Han2;~Minkai_Xu1;~Sheng_Liu2;~Yitao_Liang1;~Jianzhu_Ma2;~James_Zou1;~Stefano_Ermon1", "gender": "M;M;M;M;;M;M;;M", "homepage": "https://haotianye.com;https://linhaowei1.github.io/;https://hanjq17.github.io;https://minkaixu.com;https://shengliu66.github.io/;https://web.cs.ucla.edu/~yliang/;https://majianzhu.com/;;http://cs.stanford.edu/~ermon/", "dblp": "284/0539;235/2798;235/0412;257/3355;;173/4969;24/9080.html;;47/8135", "google_scholar": "VU4chlsAAAAJ;Ng-DmJgAAAAJ;AKppgMAAAAAJ;https://scholar.google.com/citations?hl=en;rzhzR-cAAAAJ;KVzR1XEAAAAJ;;23ZXZvEAAAAJ;", "orcid": ";0009-0006-9809-4835;;;;;;;", "linkedin": ";;;;;;;;", "or_profile": "~Haotian_Ye1;~Haowei_Lin1;~Jiaqi_Han2;~Minkai_Xu1;~Sheng_Liu2;~Yitao_Liang1;~Jianzhu_Ma2;~James_Zou1;~Stefano_Ermon1", "aff": "Stanford University;Peking University;Computer Science Department, Stanford University;Stanford University;Stanford University;Peking University;Tsinghua University;Stanford University;Stanford University", "aff_domain": "stanford.edu;pku.edu.cn;cs.stanford.edu;stanford.edu;stanford.edu;pku.edu.cn;tsinghua.edu.cn;stanford.edu;stanford.edu", "position": "PhD student;PhD student;PhD student;PhD student;Postdoc;Assistant Professor;Associate Professor;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nye2024tfg,\ntitle={{TFG}: Unified Training-Free Guidance for Diffusion Models},\nauthor={Haotian Ye and Haowei Lin and Jiaqi Han and Minkai Xu and Sheng Liu and Yitao Liang and Jianzhu Ma and James Zou and Stefano Ermon},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=N8YbGX98vc}\n}", "github": "", "reviewers": "DCpp;aTDj;J9FB;Xm6V", "pdf_size": 19628041, "rating": "5;6;6;8", "confidence": "3;3;4;4", "soundness": "3;3;3;3", "novelty": "3;3;3;4", "presentation": "3;3;3;4", "wc_summary": "56;195;74;104", "wc_strengths": "39;100;72;113", "wc_weaknesses": "116;189;165;210", "wc_questions": "5;121;189;489", "wc_limitations": "6;18;17;1", "wc_review": "222;623;517;917", "wc_reply_reviewers": "28;18;0;272", "wc_reply_authors": "54;133;54;1659", "reply_reviewers": "1;1;0;2", "reply_authors": "2;3;2;5", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 107.25, 53.48539520280279 ], "wc_strengths_avg": [ 81.0, 28.416544476765644 ], "wc_weaknesses_avg": [ 170.0, 35.00714212842859 ], "wc_questions_avg": [ 201.0, 178.8183435780569 ], "wc_limitations_avg": [ 10.5, 7.22841614740048 ], "wc_review_avg": [ 569.75, 248.56123491003177 ], "wc_reply_reviewers_avg": [ 79.5, 111.5918903863538 ], "wc_reply_authors_avg": [ 475.0, 684.3431156956282 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 3.0, 1.224744871391589 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.6882472016116854, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16292430723111541115&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "stanford.edu;pku.edu.cn;cs.stanford.edu;stanford.edu;stanford.edu;pku.edu.cn;tsinghua.edu.cn;stanford.edu;stanford.edu", "author_num": 9, "aff_unique_index": "0;1;0;0;0;1;2;0;0", "aff_unique_norm": "Stanford University;Peking University;Tsinghua University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.stanford.edu;http://www.pku.edu.cn;https://www.tsinghua.edu.cn", "aff_unique_abbr": "Stanford;Peking U;THU", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;1;0;0;0;1;1;0;0", "aff_country_unique": "United States;China" }, { "title": "Scaling laws for learning with real and surrogate data", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95461", "id": "NAcHv7vtL2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=NAcHv7vtL2", "openreview": "https://openreview.net/forum?id=NAcHv7vtL2", "poster": "", "project": "", "author_site": "Ayush Jain, Andrea Montanari, Eren Sasoglu", "tldr": "", "abstract": "Collecting large quantities of high-quality data can be prohibitively expensive or impractical, and a bottleneck in machine learning. One may instead augment a small set of $n$ data points from the target distribution with data from more accessible sources, e.g. data collected under different circumstances or synthesized by generative models. We refer to such data as `surrogate data'. We study a weighted empirical risk minimization (ERM) approach for integrating surrogate data into training. We analyze mathematically this method under several classical statistical models, and validate our findings empirically on datasets from different domains. Our main findings are: $(i)$ Integrating surrogate data can significantly reduce the test error on the original distribution. Surprisingly, this can happen even when the surrogate data is unrelated to the original ones. We trace back this behavior to the classical Stein's paradox. $(ii)$ In order to reap the benefit of surrogate data, it is crucial to use optimally weighted ERM. $(iii)$ The test error of models trained on mixtures of real and surrogate data is approximately described by a scaling law. This scaling law can be used to predict the optimal weighting scheme, and to choose the amount of surrogate data to add.", "keywords": "Machine learning;Synthetic data;Surrogate data;Scaling laws;Linear regression", "primary_area": "learning_theory", "supplementary_material": "", "author": "Ayush Jain;Andrea Montanari;Eren Sasoglu", "authorids": "~Ayush_Jain4;~Andrea_Montanari1;~Eren_Sasoglu1", "gender": "M;M;", "homepage": ";;https://www.ocf.berkeley.edu/~sasoglu/", "dblp": ";83/5094;", "google_scholar": "i1NrG8UAAAAJ;;lQbIarsAAAAJ", "orcid": ";;", "linkedin": ";;eren-sasoglu-141635a7", "or_profile": "~Ayush_Jain4;~Andrea_Montanari1;~Eren_Sasoglu1", "aff": "Granica;Stanford University;Granica", "aff_domain": "granica.ai;stanford.edu;granica.ai", "position": "Researcher;Full Professor;Researcher", "bibtex": "@inproceedings{\njain2024scaling,\ntitle={Scaling laws for learning with real and surrogate data},\nauthor={Ayush Jain and Andrea Montanari and Eren Sasoglu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=NAcHv7vtL2}\n}", "github": "", "reviewers": "XfMG;3jgb;yPKz;zfeC;Fovm", "pdf_size": 1068053, "rating": "3;4;6;8;9", "confidence": "3;4;3;4;3", "soundness": "2;2;3;4;4", "novelty": "2;3;2;3;4", "presentation": "1;3;2;4;4", "wc_summary": "119;83;89;53;62", "wc_strengths": "52;12;58;37;85", "wc_weaknesses": "355;150;154;1;119", "wc_questions": "2;7;53;177;67", "wc_limitations": "11;41;67;17;12", "wc_review": "539;293;421;285;345", "wc_reply_reviewers": "339;121;29;12;7", "wc_reply_authors": "1010;323;18;0;0", "reply_reviewers": "2;2;1;1;1", "reply_authors": "3;2;2;1;1", "rating_avg": [ 6.0, 2.280350850198276 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.8944271909999159 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 2.8, 1.16619037896906 ], "wc_summary_avg": [ 81.2, 23.051247254758255 ], "wc_strengths_avg": [ 48.8, 24.07820591323199 ], "wc_weaknesses_avg": [ 155.8, 114.04806004487757 ], "wc_questions_avg": [ 61.2, 63.17087936699947 ], "wc_limitations_avg": [ 29.6, 21.648094604375693 ], "wc_review_avg": [ 376.6, 94.57610691924256 ], "wc_reply_reviewers_avg": [ 101.6, 125.68468482675208 ], "wc_reply_authors_avg": [ 270.2, 389.7981015859364 ], "reply_reviewers_avg": [ 1.4, 0.4898979485566356 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6499375815276699310&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "granica.ai;stanford.edu;granica.ai", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Granica;Stanford University", "aff_unique_dep": ";", "aff_unique_url": ";https://www.stanford.edu", "aff_unique_abbr": ";Stanford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "1", "aff_country_unique": ";United States" }, { "title": "The Power of Hard Attention Transformers on Data Sequences: A formal language theoretic perspective", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95460", "id": "NBq1vmfP4X", "proceeding": "", "pdf": "https://openreview.net/pdf?id=NBq1vmfP4X", "openreview": "https://openreview.net/forum?id=NBq1vmfP4X", "poster": "/media/PosterPDFs/NeurIPS%202024/95460.png?t=1733391815.8840196", "project": "", "author_site": "Pascal Bergstr\u00e4\u00dfer, Chris K\u00f6cher, Anthony Lin, Georg Zetzsche", "tldr": "", "abstract": "Formal language theory has recently been successfully employed to unravel \n the power of transformer encoders. This setting is primarily applicable in\n Natural Language Processing (NLP), as a token embedding function (where\n a bounded number of tokens is admitted) is first applied before feeding \n the input to the transformer. \n On certain kinds of data (e.g. time \n series), we want our transformers to be able to handle arbitrary\n input sequences of numbers (or tuples thereof) without a priori\n limiting the values of these numbers. In this\n paper, we initiate the study of the expressive power of transformer encoders\n on sequences of data (i.e. tuples of numbers).\n Our results indicate an increase in expressive power of \n hard attention transformers over data sequences, in stark contrast to the \n case of strings.\n In particular, we prove that Unique Hard Attention Transformers (UHAT) over \n inputs as data sequences no longer lie within the circuit complexity \n class AC0 (even without positional encodings), unlike the case of string\n inputs, \n but are still within the complexity class TC0 (even with positional\n encodings). Over strings, UHAT without positional encodings capture only\n regular languages. In contrast, we show that over data sequences\n UHAT can capture non-regular properties.\n Finally, we show that UHAT capture languages \n definable in an extension of linear temporal logic with unary numeric\n predicates and arithmetics.", "keywords": "Theory;Attention;Circuit complexity;Formal languages;Data Sequences;Expressiveness", "primary_area": "other", "supplementary_material": "", "author": "Pascal Bergstr\u00e4\u00dfer;Chris K\u00f6cher;Anthony Widjaja Lin;Georg Zetzsche", "authorids": "~Pascal_Bergstr\u00e4\u00dfer1;~Chris_K\u00f6cher2;~Anthony_Widjaja_Lin1;~Georg_Zetzsche1", "gender": ";M;M;M", "homepage": ";https://people.mpi-sws.org/~ckoecher/;https://anthonywlin.github.io/;http://zetzsche.xyz", "dblp": "287/2003;200/1026;38/2655;24/651.html", "google_scholar": ";;https://scholar.google.co.uk/citations?user=__5nnYUAAAAJ;BHNdXKxbM-8C", "orcid": "0000-0002-4681-2149;0000-0003-4575-9339;0000-0003-4715-5096;0000-0002-6421-4388", "linkedin": ";;;", "or_profile": "~Pascal_Bergstr\u00e4\u00dfer1;~Chris_K\u00f6cher2;~Anthony_Widjaja_Lin1;~Georg_Zetzsche1", "aff": "Universit\u00e4t Kaiserslautern;MPI-SWS;Universit\u00e4t Kaiserslautern;MPI-SWS", "aff_domain": "uni-kl.de;mpi-sws.org;uni-kl.de;mpi-sws.org", "position": "PhD student;Postdoc;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nbergstr{\\\"a}{\\ss}er2024the,\ntitle={The Power of Hard Attention Transformers on Data Sequences: A formal language theoretic perspective},\nauthor={Pascal Bergstr{\\\"a}{\\ss}er and Chris K{\\\"o}cher and Anthony Widjaja Lin and Georg Zetzsche},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=NBq1vmfP4X}\n}", "github": "", "reviewers": "TvTa;exed;iac6", "pdf_size": 540030, "rating": "6;6;6", "confidence": "4;3;4", "soundness": "3;4;4", "novelty": "2;3;3", "presentation": "3;3;3", "wc_summary": "112;132;126", "wc_strengths": "89;142;48", "wc_weaknesses": "162;153;184", "wc_questions": "6;23;82", "wc_limitations": "11;40;1", "wc_review": "380;490;441", "wc_reply_reviewers": "39;25;4", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 123.33333333333333, 8.379870059984357 ], "wc_strengths_avg": [ 93.0, 38.47943173523573 ], "wc_weaknesses_avg": [ 166.33333333333334, 13.02134998974974 ], "wc_questions_avg": [ 37.0, 32.567877834864625 ], "wc_limitations_avg": [ 17.333333333333332, 16.539514973407037 ], "wc_review_avg": [ 437.0, 44.99629614386796 ], "wc_reply_reviewers_avg": [ 22.666666666666668, 14.38363267359428 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5258833736300279341&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "uni-kl.de;mpi-sws.org;uni-kl.de;mpi-sws.org", "author_num": 4, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "University of Kaiserslautern;Max Planck Institute for Software Systems", "aff_unique_dep": ";", "aff_unique_url": "https://www.uni-kl.de;https://www.mpi-sws.org", "aff_unique_abbr": "Uni KL;MPI-SWS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "id": "NC0Bjl4uTf", "title": "Chinese Inertial GAN for Writing Signal Generation and Recognition", "track": "main", "status": "Reject", "tldr": "", "abstract": "Disabled people constitute a significant part of the global population, deserving of inclusive consideration and empathetic support. However, the current human-computer interaction based on keyboards may not meet the requirements of disabled people.\u00a0The small size, ease of wearing, and low cost of inertial sensors make inertial sensor-based writing recognition\u00a0a promising human-computer interaction option for disabled people. However, accurate recognition relies on massive inertial signal samples, which are hard to collect for the Chinese context due to the vast number of characters. Therefore, we design a Chinese inertial generative adversarial network (CI-GAN) containing Chinese glyph encoding (CGE), forced optimal transport (FOT), and semantic\u00a0relevance alignment (SRA) to acquire unlimited high-quality training samples. Unlike existing vectorization focusing on the meaning of Chinese characters, CGE represents the shape and stroke features, providing glyph guidance for GAN to generate writing signals. FOT constrains feature consistency between generated and\u00a0real\u00a0signals through the designed forced feature matching mechanism, meanwhile addressing GANs' mode collapse and mixing issues by introducing Wasserstein distance. SRA captures the semantic relevance between various Chinese glyphs and injects this information into the GAN to establish batch-level constraints and set higher standards of generated\u00a0signal quality.\u00a0By utilizing the massive training samples provided by CI-GAN, the performance of six widely used classifiers is improved from 6.7\\%\u00a0to 98.4\\%, indicating\u00a0that CI-GAN constructs a flexible and efficient data platform for Chinese inertial writing recognition.\u00a0Furthermore, we\u00a0release\u00a0the first Chinese writing recognition dataset based on inertial sensors in GitHub.", "keywords": "Inertial Sensor;Signal Generation;Generative Adversarial Network;Chinese Character;Writing Recognition", "primary_area": "generative_models", "supplementary_material": "/attachment/59d2e940a30c3ab8d02fdc78e1eb0e1c000e7107.zip", "author": "Yifeng Wang;Yi Zhao", "authorids": "~Yifeng_Wang1;~Yi_Zhao7", "gender": "M;M", "homepage": "https://www.aminer.cn/profile/yifeng-wang/5618dc6545cedb3397d0eb38;", "dblp": ";51/4138-7", "google_scholar": "https://scholar.google.com.hk/citations?user=3KKVM5gAAAAJ;https://scholar.google.co.jp/citations?user=JMoMObMAAAAJ", "orcid": ";0000-0003-1664-8613", "linkedin": ";", "or_profile": "~Yifeng_Wang1;~Yi_Zhao7", "aff": "National University of Singapore;Harbin Institute of Technology", "aff_domain": "u.nus.edu;hit.edu.cn", "position": "Researcher;Full Professor", "bibtex": "@misc{\nanonymous2024chinese,\ntitle={Chinese Inertial {GAN} for Writing Signal Generation and Recognition},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=NC0Bjl4uTf}\n}", "github": "", "project": "", "reviewers": "NmsP;3apC;NSpH", "site": "https://openreview.net/forum?id=NC0Bjl4uTf", "pdf_size": 2292468, "rating": "3;5;6", "confidence": "4;2;3", "soundness": "2;2;3", "novelty": "2;2;3", "presentation": "3;3;3", "wc_summary": "35;63;72", "wc_strengths": "15;50;33", "wc_weaknesses": "185;78;79", "wc_questions": "2;8;48", "wc_limitations": "1;58;22", "wc_review": "238;257;254", "wc_reply_reviewers": "184;0;0", "wc_reply_authors": "862;201;106", "reply_reviewers": "1;0;0", "reply_authors": "3;2;2", "rating_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 56.666666666666664, 15.755069730795297 ], "wc_strengths_avg": [ 32.666666666666664, 14.29063407348401 ], "wc_weaknesses_avg": [ 114.0, 50.20624131187941 ], "wc_questions_avg": [ 19.333333333333332, 20.417857108151406 ], "wc_limitations_avg": [ 27.0, 23.53720459187964 ], "wc_review_avg": [ 249.66666666666666, 8.339997335464536 ], "wc_reply_reviewers_avg": [ 61.333333333333336, 86.73843182554982 ], "wc_reply_authors_avg": [ 389.6666666666667, 336.23437592779766 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.6546536707079772, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17106858299232719365&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "National University of Singapore;Harbin Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.nus.edu.sg;http://www.hit.edu.cn/", "aff_unique_abbr": "NUS;HIT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Harbin", "aff_country_unique_index": "0;1", "aff_country_unique": "Singapore;China" }, { "title": "Multivariate Stochastic Dominance via Optimal Transport and Applications to Models Benchmarking", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95459", "id": "NCX3Kgb1nh", "proceeding": "", "pdf": "https://openreview.net/pdf?id=NCX3Kgb1nh", "openreview": "https://openreview.net/forum?id=NCX3Kgb1nh", "poster": "/media/PosterPDFs/NeurIPS%202024/95459.png?t=1733712877.223725", "project": "", "author_site": "Gabriel Rioux, Apoorva Nitsure, Mattia Rigotti, Kristjan Greenewald, Youssef Mroueh", "tldr": "", "abstract": "Stochastic dominance is an important concept in probability theory, econometrics and social choice theory for robustly modeling agents' preferences between random outcomes. While many works have been dedicated to the univariate case,\nlittle has been done in the multivariate scenario, wherein an agent has to decide between different multivariate outcomes. By exploiting a characterization of multivariate first stochastic dominance in terms of couplings, we introduce a statistic that assesses multivariate almost stochastic dominance under the framework of Optimal Transport with a smooth cost. Further, we introduce an entropic regularization of this statistic, and establish a central limit theorem (CLT) and consistency of the bootstrap procedure for the empirical statistic. Armed with this CLT, we propose a hypothesis testing framework as well as an efficient implementation using the Sinkhorn algorithm. We showcase our method in comparing and benchmarking Large Language Models that are evaluated on multiple metrics. Our multivariate stochastic dominance test allows us to capture the dependencies between the metrics in order to make an informed and statistically significant decision on the relative performance of the models.", "keywords": "Optimal Transport;Stochastic dominance;hypothesis testing;Central limit theorem;LLM benchmarking", "primary_area": "learning_theory", "supplementary_material": "/attachment/38d6fe13862a8453cfa0b5e818aa4bd8792b5829.zip", "author": "Gabriel Rioux;Apoorva Nitsure;Mattia Rigotti;Kristjan Greenewald;Youssef Mroueh", "authorids": "~Gabriel_Rioux1;~Apoorva_Nitsure1;~Mattia_Rigotti1;~Kristjan_Greenewald1;~Youssef_Mroueh1", "gender": "M;;;;", "homepage": "https://people.cam.cornell.edu/ger84/;;http://www.matrig.net;https://researcher.watson.ibm.com/researcher/view.php?person=ibm-Kristjan.H.Greenewald;", "dblp": ";;01/9816;146/0563;http://dblp.uni-trier.de/pers/hd/m/Mroueh:Youssef", "google_scholar": "1FZoiTAAAAAJ;;TmHt7CwAAAAJ;L3zNUG4AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0001-6466-2810;;", "linkedin": ";apoorvanitsure/;;;", "or_profile": "~Gabriel_Rioux1;~Apoorva_Nitsure1;~Mattia_Rigotti1;~Kristjan_Greenewald1;~Youssef_Mroueh1", "aff": "Cornell University;International Business Machines;International Business Machines;MIT-IBM Watson AI Lab, IBM Research;IBM", "aff_domain": "cornell.edu;ibm.com;ibm.com;ibm.com;us.ibm.com", "position": "PhD student;Researcher;Researcher;Research Scientist;Research Staff member", "bibtex": "@inproceedings{\nrioux2024multivariate,\ntitle={Multivariate Stochastic Dominance via Optimal Transport and Applications to Models Benchmarking},\nauthor={Gabriel Rioux and Apoorva Nitsure and Mattia Rigotti and Kristjan Greenewald and Youssef Mroueh},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=NCX3Kgb1nh}\n}", "github": "", "reviewers": "a9kp;RTCD;w1Lw;i6xT", "pdf_size": 683369, "rating": "6;6;6;6", "confidence": "3;3;2;1", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "155;116;55;77", "wc_strengths": "61;118;22;47", "wc_weaknesses": "37;71;143;111", "wc_questions": "136;61;2;42", "wc_limitations": "11;71;9;2", "wc_review": "400;437;231;279", "wc_reply_reviewers": "46;0;27;0", "wc_reply_authors": "473;614;0;855", "reply_reviewers": "1;0;1;0", "reply_authors": "2;2;1;3", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 2.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 100.75, 38.18622133702155 ], "wc_strengths_avg": [ 62.0, 35.220732530712645 ], "wc_weaknesses_avg": [ 90.5, 40.05933099790859 ], "wc_questions_avg": [ 60.25, 48.64347335460329 ], "wc_limitations_avg": [ 23.25, 27.770262872360426 ], "wc_review_avg": [ 336.75, 84.51146371942684 ], "wc_reply_reviewers_avg": [ 18.25, 19.447043477094404 ], "wc_reply_authors_avg": [ 485.5, 311.8128445077271 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12911223703035260903&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 5, "email": "cornell.edu;ibm.com;ibm.com;ibm.com;us.ibm.com", "author_num": 5, "aff_unique_index": "0;1;1;2;1", "aff_unique_norm": "Cornell University;International Business Machines Corporation;IBM", "aff_unique_dep": ";;AI Lab", "aff_unique_url": "https://www.cornell.edu;https://www.ibm.com;https://www.ibmwatsonai.org/", "aff_unique_abbr": "Cornell;IBM;MIT-IBM AI Lab", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Harmony4D: A Video Dataset for In-The-Wild Close Human Interactions", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97726", "id": "NCaGHtbkKo", "proceeding": "", "pdf": "https://openreview.net/pdf?id=NCaGHtbkKo", "openreview": "https://openreview.net/forum?id=NCaGHtbkKo", "poster": "/media/PosterPDFs/NeurIPS%202024/97726.png?t=1730337113.0633488", "project": "", "author_site": "Rawal Khirodkar, Jyun-Ting Song, Jinkun Cao, Zhengyi Luo, Kris Kitani", "tldr": "", "abstract": "Understanding how humans interact with each other is key to building realistic multi-human virtual reality systems. This area remains relatively unexplored due to the lack of large-scale datasets. Recent datasets focusing on this issue mainly consist of activities captured entirely in controlled indoor environments with choreographed actions, significantly affecting their diversity. To address this, we introduce Harmony4D, a multi-view video dataset for human-human interaction featuring in-the-wild activities such as wrestling, dancing, MMA,\nand more. We use a flexible multi-view capture system to record these dynamic activities and provide annotations for human detection, tracking, 2D/3D pose estimation, and mesh recovery for closely interacting subjects. We propose a novel markerless algorithm to track 3D human poses in severe occlusion and close interaction to obtain our annotations with minimal manual intervention. Harmony4D consists of 1.66 million images and 3.32 million human instances from more than 20 synchronized cameras with 208 video sequences spanning diverse environments and 24 unique subjects. We rigorously evaluate existing state-of-the-art methods for mesh recovery and highlight their significant limitations in modeling close interaction scenarios. Additionally, we fine-tune a pre-trained HMR2.0 model on Harmony4D and demonstrate an improved performance of 54.8% PVE in scenes with severe occlusion and contact. \u201cHarmony\u2014a cohesive alignment of human behaviors.\" Code and data are available at https://jyuntins.github.io/harmony4d/.", "keywords": "Human Pose Estimation;3D Dataset;Multi-person Interaction", "primary_area": "", "supplementary_material": "/attachment/a7f1c270c1baeb0b983bcdf505b872c79b7bbeef.pdf", "author": "Rawal Khirodkar;Jyun-Ting Song;Jinkun Cao;Zhengyi Luo;Kris M. Kitani", "authorids": "~Rawal_Khirodkar1;~Jyun-Ting_Song1;~Jinkun_Cao1;~Zhengyi_Luo1;~Kris_M._Kitani1", "gender": "M;M;M;M;M", "homepage": "https://rawalkhirodkar.github.io/;https://jyuntins.github.io/;https://www.jinkuncao.com;https://zhengyiluo.github.io/;http://www.cs.cmu.edu/~kkitani/", "dblp": "230/4108;;224/0126;;42/163", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;N1dz8cAAAAAJ;xDtTbmQAAAAJ;lHPTxGsAAAAJ;yv3sH74AAAAJ", "orcid": ";;;;0000-0002-9389-4060", "linkedin": "rawalkhirodkar/;;;zhengyi-zen-luo-726156105/;", "or_profile": "~Rawal_Khirodkar1;~Jyun-Ting_Song1;~Jinkun_Cao1;~Zhengyi_Luo1;~Kris_M._Kitani1", "aff": "Meta;Carnegie Mellon University;Carnegie Mellon University;Meta Platforms, Inc.;Carnegie Mellon University", "aff_domain": "meta.com;cmu.edu;andrew.cmu.edu;meta.com;cmu.edu", "position": "Researcher;MS student;PhD student;Intern;Associate Professor", "bibtex": "@inproceedings{\nkhirodkar2024harmonyd,\ntitle={Harmony4D: A Video Dataset for In-The-Wild Close Human Interactions},\nauthor={Rawal Khirodkar and Jyun-Ting Song and Jinkun Cao and Zhengyi Luo and Kris M. Kitani},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=NCaGHtbkKo}\n}", "github": "", "reviewers": "nxLE;ebSy;h7vn", "pdf_size": 17423243, "rating": "6;6;7", "confidence": "4;5;4", "wc_summary_and_contributions": "51;71;113", "wc_strengths": "21;86;65", "wc_improvement": "60;86;42", "wc_limitations": "1;19;22", "wc_correctness": "1;19;13", "wc_clarity": "1;3;6", "wc_relation_to_prior_work": "1;11;11", "wc_documentation": "1;27;11", "wc_additional_feedback": "1;1;1", "wc_review": "138;323;284", "wc_reply_reviewers": "0;24;80", "wc_reply_authors": "14;11;15", "reply_reviewers": "0;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 78.33333333333333, 25.837096500101467 ], "wc_strengths_avg": [ 57.333333333333336, 27.0842307543625 ], "wc_improvement_avg": [ 62.666666666666664, 18.06162291219209 ], "wc_limitations_avg": [ 14.0, 9.273618495495704 ], "wc_correctness_avg": [ 11.0, 7.483314773547883 ], "wc_clarity_avg": [ 3.3333333333333335, 2.0548046676563256 ], "wc_relation_to_prior_work_avg": [ 7.666666666666667, 4.714045207910317 ], "wc_documentation_avg": [ 13.0, 10.708252269472673 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 248.33333333333334, 79.62551238279238 ], "wc_reply_reviewers_avg": [ 34.666666666666664, 33.51948023993743 ], "wc_reply_authors_avg": [ 13.333333333333334, 1.699673171197595 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2063198431760330858&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "meta.com;cmu.edu;andrew.cmu.edu;meta.com;cmu.edu", "author_num": 5, "aff_unique_index": "0;1;1;0;1", "aff_unique_norm": "Meta;Carnegie Mellon University", "aff_unique_dep": "Meta Platforms, Inc.;", "aff_unique_url": "https://meta.com;https://www.cmu.edu", "aff_unique_abbr": "Meta;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "DiPEx: Dispersing Prompt Expansion for Class-Agnostic Object Detection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95458", "id": "NDs9Ejz4Pe", "proceeding": "", "pdf": "https://openreview.net/pdf?id=NDs9Ejz4Pe", "openreview": "https://openreview.net/forum?id=NDs9Ejz4Pe", "poster": "/media/PosterPDFs/NeurIPS%202024/95458.png?t=1731756703.4988358", "project": "", "author_site": "Jia S Lim, Zhuoxiao Chen, Zhi Chen, Mahsa Baktashmotlagh, Xin Yu, Zi Huang, Yadan Luo", "tldr": "", "abstract": "Class-agnostic object detection (OD) can be a cornerstone or a bottleneck for many downstream vision tasks. Despite considerable advancements in bottom-up and multi-object discovery methods that leverage basic visual cues to identify salient objects, consistently achieving a high recall rate remains difficult due to the diversity of object types and their contextual complexity. In this work, we investigate using vision-language models (VLMs) to enhance object detection via a self-supervised prompt learning strategy. Our initial findings indicate that manually crafted text queries often result in undetected objects, primarily because detection confidence diminishes when the query words exhibit semantic overlap. To address this, we propose a Dispersing Prompt Expansion (DiPEx) approach. DiPEx progressively learns to expand a set of distinct, non-overlapping hyperspherical prompts to enhance recall rates, thereby improving performance in downstream tasks such as out-of-distribution OD. Specifically, DiPEx initiates the process by self-training generic parent prompts and selecting the one with the highest semantic uncertainty for further expansion. The resulting child prompts are expected to inherit semantics from their parent prompts while capturing more fine-grained semantics. We apply dispersion losses to ensure high inter-class discrepancy among child prompts while preserving semantic consistency between parent-child prompt pairs. To prevent excessive growth of the prompt sets, we utilize the maximum angular coverage (MAC) of the semantic space as a criterion for early termination. We demonstrate the effectiveness of DiPEx through extensive class-agnostic OD and OOD-OD experiments on MS-COCO and LVIS, surpassing other prompting methods by up to 20.1% in AR and achieving a 21.3% AP improvement over SAM.", "keywords": "class-agnostic object detection;VLM;prompting;out-of-distribution object detection", "primary_area": "machine_vision", "supplementary_material": "/attachment/b371899466c4739c75828e026df4a8460ad87f96.zip", "author": "Jia Syuen Lim;Zhuoxiao Chen;Zhi Chen;Mahsa Baktashmotlagh;Xin Yu;Zi Huang;Yadan Luo", "authorids": "~Jia_Syuen_Lim1;~Zhuoxiao_Chen1;~Zhi_Chen4;~Mahsa_Baktashmotlagh1;~Xin_Yu1;~Zi_Huang1;~Yadan_Luo1", "gender": "M;M;M;F;M;F;F", "homepage": "https://linkedin.com/in/jason-lim-a10a7a189;https://zhuoxiao-chen.github.io/;https://uqzhichen.github.io/;;https://sites.google.com/view/xinyus-homepage/Home;https://staff.itee.uq.edu.au/huang/;https://sites.google.com/view/yadanluo/home", "dblp": ";301/7822;05/1539-10;119/1507;54/1184-2;70/6862;182/2414", "google_scholar": ";t3cg17IAAAAJ;https://scholar.google.com.au/citations?user=9ZypKEYAAAAJ;https://scholar.google.com.au/citations?user=3kaiBBYAAAAJ;oxdtuSEAAAAJ;https://scholar.google.com.au/citations?user=iAWMsgEAAAAJ;3IfL11AAAAAJ", "orcid": ";;0000-0002-9385-144X;;0000-0002-0269-5649;;0000-0001-6272-2971", "linkedin": ";;simon-chen-qld/;;;;", "or_profile": "~Jia_Syuen_Lim1;~Zhuoxiao_Chen1;~Zhi_Chen4;~Mahsa_Baktashmotlagh1;~Xin_Yu1;~Zi_Huang1;~Yadan_Luo1", "aff": "University of Queensland;The University of Queensland;University of Queensland;The University of Queensland;University of Queensland;University of Queensland;The University of Queensland", "aff_domain": "uq.edu.au;uq.edu.au;uq.edu.au;uq.edu.au;uq.edu.au;uq.edu.au;uq.edu.au", "position": "PhD student;PhD student;Postdoc;Assistant Professor;Senior Lecturer;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nlim2024dipex,\ntitle={Di{PE}x: Dispersing Prompt Expansion for Class-Agnostic Object Detection},\nauthor={Jia Syuen Lim and Zhuoxiao Chen and Zhi Chen and Mahsa Baktashmotlagh and Xin Yu and Zi Huang and Yadan Luo},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=NDs9Ejz4Pe}\n}", "github": "", "reviewers": "UZCf;Jt2M;fTsG;86g8", "pdf_size": 40278354, "rating": "5;5;6;6", "confidence": "4;4;4;4", "soundness": "2;2;3;3", "novelty": "2;2;3;3", "presentation": "2;3;2;3", "wc_summary": "154;47;163;97", "wc_strengths": "98;41;110;42", "wc_weaknesses": "53;252;242;316", "wc_questions": "2;5;94;60", "wc_limitations": "2;32;25;33", "wc_review": "309;377;634;548", "wc_reply_reviewers": "11;88;30;14", "wc_reply_authors": "103;169;114;104", "reply_reviewers": "1;1;1;1", "reply_authors": "3;3;3;3", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 115.25, 46.83147979724749 ], "wc_strengths_avg": [ 72.75, 31.538666744172936 ], "wc_weaknesses_avg": [ 215.75, 98.15899092798377 ], "wc_questions_avg": [ 40.25, 38.68058298423125 ], "wc_limitations_avg": [ 23.0, 12.509996003196804 ], "wc_review_avg": [ 467.0, 129.91728137549677 ], "wc_reply_reviewers_avg": [ 35.75, 31.01914731258743 ], "wc_reply_authors_avg": [ 122.5, 27.189152248645048 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.0, 0.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11876710093808678770&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "uq.edu.au;uq.edu.au;uq.edu.au;uq.edu.au;uq.edu.au;uq.edu.au;uq.edu.au", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "University of Queensland", "aff_unique_dep": "", "aff_unique_url": "https://www.uq.edu.au", "aff_unique_abbr": "UQ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "Australia" }, { "title": "Neural Residual Diffusion Models for Deep Scalable Vision Generation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95457", "id": "NG16csOmcA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=NG16csOmcA", "openreview": "https://openreview.net/forum?id=NG16csOmcA", "poster": "/media/PosterPDFs/NeurIPS%202024/95457.png?t=1731135088.740458", "project": "", "author_site": "Zhiyuan Ma, Liangliang Zhao, Biqing Qi, Bowen Zhou", "tldr": "", "abstract": "The most advanced diffusion models have recently adopted increasingly deep stacked networks (e.g., U-Net or Transformer) to promote the generative emergence capabilities of vision generation models similar to large language models (LLMs). However, progressively deeper stacked networks will intuitively cause numerical propagation errors and reduce noisy prediction capabilities on generative data, which hinders massively deep scalable training of vision generation models. In this paper, we first uncover the nature that neural networks being able to effectively perform generative denoising lies in the fact that the intrinsic residual unit has consistent dynamic property with the input signal's reverse diffusion process, thus supporting excellent generative abilities.\nAfterwards, we stand on the shoulders of two common types of deep stacked networks to propose a unified and massively scalable Neural Residual Diffusion Models framework (Neural-RDM for short), which is a simple yet meaningful change to the common architecture of deep generative networks by introducing a series of learnable gated residual parameters that conform to the generative dynamics. Experimental results on various generative tasks show that the proposed neural residual models obtain state-of-the-art scores on image's and video's generative benchmarks. Rigorous theoretical proofs and extensive experiments also demonstrate the advantages of this simple gated residual mechanism consistent with dynamic modeling in improving the fidelity and consistency of generated content and supporting large-scale scalable training.", "keywords": "Vision generation;Diffusion models;Deep neural network", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/15f1dfc380889703f3b78cfa74b1919c71de0eb9.zip", "author": "Zhiyuan Ma;Liangliang Zhao;Biqing Qi;Bowen Zhou", "authorids": "~Zhiyuan_Ma1;~Liangliang_Zhao2;~Biqing_Qi1;~Bowen_Zhou8", "gender": "M;M;;F", "homepage": ";https://biqing-qi.github.io/;http://web.ee.tsinghua.edu.cn/zhoubowen/zh_CN/index.htm?eqid=b894e49b0000ec7d0000000464857b51;https://github.com/lyongo", "dblp": "138/5978-5;233/4949.html;;", "google_scholar": "https://scholar.google.com.hk/citations?hl=zh-CN;;https://scholar.google.com/citations?hl=en;", "orcid": ";0000-0002-4072-0577;0000-0003-1062-9526;", "linkedin": ";;;", "or_profile": "~Zhiyuan_Ma1;~Biqing_Qi1;~Bowen_Zhou8;~lyon_zhao1", "aff": "Tsinghua University;Harbin Institute of Technology;Tsinghua University;Northwest Polytechnical University Xi'an", "aff_domain": "tsinghua.edu.cn;hit.edu.cn;tsinghua.edu.cn;nwpu.edu.cn", "position": "Postdoc;PhD student;Full Professor;MS student", "bibtex": "@inproceedings{\nma2024neural,\ntitle={Neural Residual Diffusion Models for Deep Scalable Vision Generation},\nauthor={Zhiyuan Ma and Liangliang Zhao and Biqing Qi and Bowen Zhou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=NG16csOmcA}\n}", "github": "", "reviewers": "jEd7;6rnE;zq4Z", "pdf_size": 3888423, "rating": "5;6;6", "confidence": "2;4;4", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;2;3", "wc_summary": "28;72;62", "wc_strengths": "61;100;33", "wc_weaknesses": "51;148;138", "wc_questions": "3;161;2", "wc_limitations": "1;15;16", "wc_review": "144;496;251", "wc_reply_reviewers": "23;159;0", "wc_reply_authors": "18;91;59", "reply_reviewers": "1;1;0", "reply_authors": "2;3;2", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 54.0, 18.83259585576738 ], "wc_strengths_avg": [ 64.66666666666667, 27.47524137999317 ], "wc_weaknesses_avg": [ 112.33333333333333, 43.560940710177 ], "wc_questions_avg": [ 55.333333333333336, 74.71873184743494 ], "wc_limitations_avg": [ 10.666666666666666, 6.847546194724712 ], "wc_review_avg": [ 297.0, 147.33861227345216 ], "wc_reply_reviewers_avg": [ 60.666666666666664, 70.16330158201572 ], "wc_reply_authors_avg": [ 56.0, 29.87752778706208 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12678181512946624558&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "tsinghua.edu.cn;hit.edu.cn;tsinghua.edu.cn;nwpu.edu.cn", "author_num": 4, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Tsinghua University;Harbin Institute of Technology;Northwest Polytechnical University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tsinghua.edu.cn;http://www.hit.edu.cn/;http://www.nwpu.edu.cn", "aff_unique_abbr": "THU;HIT;NWPU", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Harbin;Xi'an", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Understanding Bias in Large-Scale Visual Datasets", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95456", "id": "NGIIHlAEBt", "proceeding": "", "pdf": "https://openreview.net/pdf?id=NGIIHlAEBt", "openreview": "https://openreview.net/forum?id=NGIIHlAEBt", "poster": "", "project": "", "author_site": "Boya Zeng, Yida Yin, Zhuang Liu", "tldr": "", "abstract": "A recent study has shown that large-scale visual datasets are very biased: they can be easily classified by modern neural networks. However, the concrete forms of bias among these datasets remain unclear. In this study, we propose a framework to identify the unique visual attributes distinguishing these datasets. Our approach applies various transformations to extract semantic, structural, boundary, color, and frequency information from datasets, and assess how much each type of information reflects their bias. We further decompose their semantic bias with object-level analysis, and leverage natural language methods to generate detailed, open-ended descriptions of each dataset's characteristics. Our work aims to help researchers understand the bias in existing large-scale pre-training datasets, and build more diverse and representative ones in the future. Our project page and code are available at boyazeng.github.io/understand_bias.", "keywords": "dataset bias;visual datasets", "primary_area": "machine_vision", "supplementary_material": "", "author": "Boya Zeng;Yida Yin;Zhuang Liu", "authorids": "~Boya_Zeng1;~Yida_Yin1;~Zhuang_Liu1", "gender": ";;", "homepage": "https://boyazeng.github.io/;;", "dblp": ";;", "google_scholar": "5aHEADIAAAAJ;;", "orcid": "0000-0001-8631-4884;;", "linkedin": ";;", "or_profile": "~Boya_Zeng1;~Yida_Yin1;~Zhuang_Liu1", "aff": "University of Pennsylvania;;", "aff_domain": "seas.upenn.edu;;", "position": "Undergrad student;;", "bibtex": "@inproceedings{\nzeng2024understanding,\ntitle={Understanding Bias in Large-Scale Visual Datasets},\nauthor={Boya Zeng and Yida Yin and Zhuang Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=NGIIHlAEBt}\n}", "github": "", "reviewers": "aLTB;n4tv;yVzd", "pdf_size": 10196981, "rating": "6;6;8", "confidence": "3;4;4", "soundness": "3;3;4", "novelty": "2;3;3", "presentation": "3;3;4", "wc_summary": "50;38;78", "wc_strengths": "39;26;69", "wc_weaknesses": "174;250;547", "wc_questions": "13;65;11", "wc_limitations": "1;6;21", "wc_review": "277;385;726", "wc_reply_reviewers": "86;122;199", "wc_reply_authors": "828;185;179", "reply_reviewers": "2;1;1", "reply_authors": "5;3;3", "rating_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 55.333333333333336, 16.759740119968715 ], "wc_strengths_avg": [ 44.666666666666664, 18.00617178142601 ], "wc_weaknesses_avg": [ 323.6666666666667, 160.93960219770506 ], "wc_questions_avg": [ 29.666666666666668, 24.997777679003566 ], "wc_limitations_avg": [ 9.333333333333334, 8.498365855987974 ], "wc_review_avg": [ 462.6666666666667, 191.35365745713415 ], "wc_reply_reviewers_avg": [ 135.66666666666666, 47.13338048088165 ], "wc_reply_authors_avg": [ 397.3333333333333, 304.53717160453317 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 3.6666666666666665, 0.9428090415820634 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.49999999999999983, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6854880365025338247&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "seas.upenn.edu;;", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "University of Pennsylvania", "aff_unique_dep": "", "aff_unique_url": "https://www.upenn.edu", "aff_unique_abbr": "UPenn", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Integrating Suboptimal Human Knowledge with Hierarchical Reinforcement Learning for Large-Scale Multiagent Systems", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95455", "id": "NGpMCH5q7Y", "proceeding": "", "pdf": "https://openreview.net/pdf?id=NGpMCH5q7Y", "openreview": "https://openreview.net/forum?id=NGpMCH5q7Y", "poster": "/media/PosterPDFs/NeurIPS%202024/95455.png?t=1730160417.8286858", "project": "", "author_site": "Dingbang Liu, Shohei Kato, Wen Gu, Fenghui Ren, Jun Yan, Guoxin Su", "tldr": "", "abstract": "Due to the exponential growth of agent interactions and the curse of dimensionality, learning efficient coordination from scratch is inherently challenging in large-scale multi-agent systems. While agents' learning is data-driven, sampling from millions of steps, human learning processes are quite different. Inspired by the concept of Human-on-the-Loop and the daily human hierarchical control, we propose a novel knowledge-guided multi-agent reinforcement learning framework (hhk-MARL), which combines human abstract knowledge with hierarchical reinforcement learning to address the learning difficulties among a large number of agents. In this work, fuzzy logic is applied to represent human suboptimal knowledge, and agents are allowed to freely decide how to leverage the proposed prior knowledge. Additionally, a graph-based group controller is built to enhance agent coordination. The proposed framework is end-to-end and compatible with various existing algorithms. We conduct experiments in challenging domains of the StarCraft Multi-agent Challenge combined with three famous algorithms: IQL, QMIX, and Qatten. The results show that our approach can greatly accelerate the training process and improve the final performance, even based on low-performance human prior knowledge.", "keywords": "Multi-agent system;Multi-agent reinforcement learning;Transfer learning;Human agent interaction;Scalability", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/061c18df0dcd95eab6194e0d8b946fa6e7c72653.zip", "author": "Dingbang Liu;Shohei Kato;Wen Gu;Fenghui Ren;Jun Yan;Guoxin Su", "authorids": "~Dingbang_Liu1;~Shohei_Kato1;~Wen_Gu1;~Fenghui_Ren2;~Jun_Yan9;~Guoxin_Su1", "gender": "M;M;;M;M;", "homepage": ";https://www.katolab.nitech.ac.jp;;https://scholars.uow.edu.au/fenghui-ren;https://scholars.uow.edu.au/jun-yan;", "dblp": ";;;;89/5901-5.html;", "google_scholar": ";https://scholar.google.co.jp/citations?user=GR3enaoAAAAJ;https://scholar.google.co.jp/citations?user=deamBAYAAAAJ;https://scholar.google.com.au/citations?user=BCVN5oEAAAAJ;https://scholar.google.com/citations?hl=en;", "orcid": "0009-0009-4597-1103;;;;0000-0002-6474-1049;0000-0002-2087-4894", "linkedin": ";;;;;", "or_profile": "~Dingbang_Liu1;~Shohei_Kato1;~Wen_Gu1;~Fenghui_Ren2;~Jun_Yan9;~Guoxin_Su1", "aff": "Nagoya Institute of Technology, Tokyo Institute of Technology;Nagoya Institute of Technology, Tokyo Institute of Technology;Japan Advanced Institute of Science and Technology;University of Wollongong;University of Wollongong;University of Wollongong", "aff_domain": "nitech.ac.jp;nitech.ac.jp;jaist.ac.jp;uow.edu.au;uow.edu.au;uow.edu.au", "position": "PhD student;Full Professor;Assistant Professor;Lecturer;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nliu2024integrating,\ntitle={Integrating Suboptimal Human Knowledge with Hierarchical Reinforcement Learning for Large-Scale Multiagent Systems},\nauthor={Dingbang Liu and Shohei Kato and Wen Gu and Fenghui Ren and Jun Yan and Guoxin Su},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=NGpMCH5q7Y}\n}", "github": "", "reviewers": "8Cur;kCt8;JxHm", "pdf_size": 5967598, "rating": "6;6;6", "confidence": "3;4;2", "soundness": "2;3;3", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "74;59;105", "wc_strengths": "73;42;81", "wc_weaknesses": "97;139;79", "wc_questions": "304;279;63", "wc_limitations": "15;1;11", "wc_review": "563;520;339", "wc_reply_reviewers": "28;0;26", "wc_reply_authors": "74;43;88", "reply_reviewers": "1;0;1", "reply_authors": "3;2;3", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 79.33333333333333, 19.154343864744856 ], "wc_strengths_avg": [ 65.33333333333333, 16.81930108205715 ], "wc_weaknesses_avg": [ 105.0, 25.13961017995307 ], "wc_questions_avg": [ 215.33333333333334, 108.19837747807908 ], "wc_limitations_avg": [ 9.0, 5.887840577551898 ], "wc_review_avg": [ 474.0, 97.06011882676977 ], "wc_reply_reviewers_avg": [ 18.0, 12.754084313139327 ], "wc_reply_authors_avg": [ 68.33333333333333, 18.803073034893938 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:5ru9I75PRIEJ:scholar.google.com/&scioq=Integrating+Suboptimal+Human+Knowledge+with+Hierarchical+Reinforcement+Learning+for+Large-Scale+Multiagent+Systems&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "nitech.ac.jp;nitech.ac.jp;jaist.ac.jp;uow.edu.au;uow.edu.au;uow.edu.au", "author_num": 6, "aff_unique_index": "0;0;1;2;2;2", "aff_unique_norm": "Nagoya Institute of Technology;Japan Advanced Institute of Science and Technology;University of Wollongong", "aff_unique_dep": ";;", "aff_unique_url": "https://www.nitech.ac.jp;https://www.jaist.ac.jp;https://www.uow.edu.au", "aff_unique_abbr": "NIT;JAIST;UOW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;1;1", "aff_country_unique": "Japan;Australia" }, { "title": "UniAudio 1.5: Large Language Model-Driven Audio Codec is A Few-Shot Audio Task Learner", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95454", "id": "NGrINZyZKk", "proceeding": "", "pdf": "https://openreview.net/pdf?id=NGrINZyZKk", "openreview": "https://openreview.net/forum?id=NGrINZyZKk", "poster": "", "project": "", "author_site": "Dongchao Yang, Haohan Guo, Yuanyuan Wang, Rongjie Huang, Xiang Li, Xu Tan, Xixin Wu, Helen Meng", "tldr": "", "abstract": "Large Language models (LLMs) have demonstrated supreme capabilities in textual understanding and generation, but cannot be directly applied to cross-modal tasks without fine-tuning. This paper proposes a cross-modal in-context learning approach, empowering the frozen LLMs to achieve multiple audio tasks in a few-shot style without any parameter update. \nSpecifically, we propose a novel LLM-driven audio codec model, LLM-Codec, which transfers the audio modality into textual space by representing audio tokens with words or sub-words from the LLM vocabulary, while maintaining high audio reconstruction quality.\nThe key idea is to reduce the modality heterogeneity between text and audio by compressing the audio modality into the well-trained textual space of LLMs. Thus, the audio representation can be viewed as a new \\textit{foreign language}, and LLMs can learn the new \\textit{foreign language} with several demonstrations. In experiments, we investigate the performance of the proposed approach across multiple audio understanding and generation tasks, \\textit{e.g.} speech emotion classification, audio classification, text-to-speech generation, speech enhancement, etc. Experimental results show that LLMs equipped with the LLM-Codec, named as UniAudio 1.5, prompted by only a few examples, can perform effectively in simple scenarios, validating our cross-modal in-context learning approach.\nTo facilitate research on few-shot audio task learning and multi-modal LLMs, we have open-sourced the LLM-Codec model.", "keywords": "Audio undertanding and generation;in-context learning;Large language models;few-shot learning.", "primary_area": "speech_and_audio", "supplementary_material": "", "author": "Dongchao Yang;Haohan Guo;Yuanyuan Wang;Rongjie Huang;Xiang Li;Xu Tan;Xixin Wu;Helen M. Meng", "authorids": "~Dongchao_Yang2;~Haohan_Guo1;~Yuanyuan_Wang7;~Rongjie_Huang1;~Xiang_Li25;~Xu_Tan1;~Xixin_Wu1;~Helen_M._Meng1", "gender": ";M;F;M;;M;;F", "homepage": ";https://hhguo.github.io/;;;;https://tan-xu.github.io/;https://www1.se.cuhk.edu.hk/~wuxx/;http://www.se.cuhk.edu.hk/people/academic-staff/prof-meng-mei-ling-helen/", "dblp": ";;;212/8936-1;;96/10484-3;125/2836;92/3270", "google_scholar": ";;;iRHBUsgAAAAJ;;tob-U1oAAAAJ;;", "orcid": ";;0009-0005-8766-3118;;;0000-0001-5631-0639;;", "linkedin": ";;;;;;;", "or_profile": "~Dongchao_Yang2;~Haohan_Guo1;~Yuanyuan_Wang7;~Rongjie_Huang1;~Xiang_Li25;~Xu_Tan1;~Xixin_Wu1;~Helen_M._Meng1", "aff": ";Chinese University of Hong Kong, The Chinese University of Hong Kong;Tsinghua University;Zhejiang University;;Microsoft;The Chinese University of Hong Kong;The Chinese University of Hong Kong", "aff_domain": ";se.cuhk.edu.hk;tsinghua.edu.cn;zju.edu.cn;;microsoft.com;cuhk.edu.hk;cuhk.edu.hk", "position": ";PhD student;MS student;MS student;;Principal Researcher;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nyang2024uniaudio,\ntitle={UniAudio 1.5: Large Language Model-Driven Audio Codec is A Few-Shot Audio Task Learner},\nauthor={Dongchao Yang and Haohan Guo and Yuanyuan Wang and Rongjie Huang and Xiang Li and Xu Tan and Xixin Wu and Helen M. Meng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=NGrINZyZKk}\n}", "github": "", "reviewers": "4YYB;XMxu;oXws;Gqom", "pdf_size": 1163977, "rating": "5;6;6;7", "confidence": "4;3;2;5", "soundness": "1;2;3;4", "novelty": "3;2;3;3", "presentation": "1;3;4;3", "wc_summary": "46;167;62;199", "wc_strengths": "25;91;59;22", "wc_weaknesses": "220;101;113;43", "wc_questions": "137;3;1;41", "wc_limitations": "9;1;8;15", "wc_review": "437;363;243;320", "wc_reply_reviewers": "107;49;105;113", "wc_reply_authors": "438;19;138;30", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 2.5, 1.118033988749895 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 118.5, 65.72860868754184 ], "wc_strengths_avg": [ 49.25, 28.145825622994256 ], "wc_weaknesses_avg": [ 119.25, 63.90764821208804 ], "wc_questions_avg": [ 45.5, 55.17925334761245 ], "wc_limitations_avg": [ 8.25, 4.968651728587948 ], "wc_review_avg": [ 340.75, 70.25800666116282 ], "wc_reply_reviewers_avg": [ 93.5, 25.8602010819715 ], "wc_reply_authors_avg": [ 156.25, 169.18388664408914 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.31622776601683794, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9610513952615411361&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": ";se.cuhk.edu.hk;tsinghua.edu.cn;zju.edu.cn;;microsoft.com;cuhk.edu.hk;cuhk.edu.hk", "author_num": 8, "aff_unique_index": "0;1;2;3;0;0", "aff_unique_norm": "Chinese University of Hong Kong;Tsinghua University;Zhejiang University;Microsoft", "aff_unique_dep": ";;;Microsoft Corporation", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.tsinghua.edu.cn;https://www.zju.edu.cn;https://www.microsoft.com", "aff_unique_abbr": "CUHK;THU;ZJU;Microsoft", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;1;0;0", "aff_country_unique": "China;United States" }, { "title": "Enhancing LLM Reasoning via Vision-Augmented Prompting", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95453", "id": "NGuGVT7ar2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=NGuGVT7ar2", "openreview": "https://openreview.net/forum?id=NGuGVT7ar2", "poster": "/media/PosterPDFs/NeurIPS%202024/95453.png?t=1731744459.9567811", "project": "", "author_site": "Ziyang Xiao, Dongxiang Zhang, Xiongwei Han, Xiaojin Fu, Wing Yin YU, Tao Zhong, Sai Wu, Yuan Wang, Jianwei Yin, Gang Chen", "tldr": "", "abstract": "Verbal and visual-spatial information processing are two critical subsystems that activate different brain regions and often collaborate together for cognitive reasoning. Despite the rapid advancement of LLM-based reasoning, the mainstream frameworks, such as Chain-of-Thought (CoT) and its variants, primarily focus on the verbal dimension, resulting in limitations in tackling reasoning problems with visual and spatial clues. To bridge the gap, we propose a novel dual-modality reasoning framework called Vision-Augmented Prompting (VAP). Upon receiving a textual problem description, VAP automatically synthesizes an image from the visual and spatial clues by utilizing external drawing tools. Subsequently, VAP formulates a chain of thought in both modalities and iteratively refines the synthesized image. Finally, a conclusive reasoning scheme based on self-alignment is proposed for final result generation. Extensive experiments are conducted across four versatile tasks, including solving geometry problems, Sudoku, time series prediction, and travelling salesman problem. The results validated the superiority of VAP over existing LLMs-based reasoning frameworks.", "keywords": "Multimodal Large Language Models;Dual-Modality Reasoning", "primary_area": "other", "supplementary_material": "/attachment/bc60447e259d05fe8f7290a0910c41190a82e9ab.zip", "author": "Ziyang Xiao;Dongxiang Zhang;Xiongwei Han;Xiaojin Fu;Wing Yin YU;Tao Zhong;Sai Wu;Yuan Jessica Wang;Jianwei Yin;Gang Chen", "authorids": "~Ziyang_Xiao2;~Dongxiang_Zhang2;~Xiongwei_Han1;~Xiaojin_Fu1;~Wing_Yin_YU1;~Tao_Zhong2;~Sai_Wu2;~Yuan_Jessica_Wang1;~Jianwei_Yin1;~Gang_Chen6", "gender": "M;M;M;;M;;M;F;M;M", "homepage": ";https://person.zju.edu.cn/zhangdongxiang;;;;;https://person.zju.edu.cn/0011057;https://www.suss.edu.sg/about-suss/faculty-and-staff/detail/wang-yuan;https://person.zju.edu.cn/0001038;", "dblp": ";89/6013;299/1779.html;;;;30/1186.html;;74/3786;67/6383-1", "google_scholar": "uSb7TOAAAAAJ;nYN9A3IAAAAJ;SEJtThAAAAAJ;;;;RMaqDKAAAAAJ;;0s1A5fwAAAAJ;", "orcid": ";;;0000-0002-3357-0355;http://orcid.org/0000-0002-9559-1055;;;;0000-0003-4703-7348;0000-0002-7483-0045", "linkedin": ";;;;;;;;;", "or_profile": "~Ziyang_Xiao2;~Dongxiang_Zhang2;~Xiongwei_Han1;~Xiaojin_Fu1;~Wing_Yin_YU1;~Tao_Zhong2;~Sai_Wu2;~Yuan_Jessica_Wang1;~Jianwei_Yin1;~Gang_Chen6", "aff": "Zhejiang University;Zhejiang University;Huawei Technologies Ltd.;;City University of Hong Kong;;Zhejiang University;Singapore University of Social Sciences;Zhejiang University;College of Computer Science and Technology, Zhejiang University", "aff_domain": "zjut.edu.cn;zju.edu.cn;huawei.com;;cityu.edu.hk;;zju.edu.cn;suss.edu.sg;zju.edu.cn;cs.zju.edu.cn", "position": "PhD student;Researcher;Researcher;;PhD student;;Full Professor;Assistant Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nxiao2024enhancing,\ntitle={Enhancing {LLM} Reasoning via Vision-Augmented Prompting},\nauthor={Ziyang Xiao and Dongxiang Zhang and Xiongwei Han and Xiaojin Fu and Wing Yin YU and Tao Zhong and Sai Wu and Yuan Jessica Wang and Jianwei Yin and Gang Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=NGuGVT7ar2}\n}", "github": "", "reviewers": "gmDj;L2VS;VnVX;vhJU", "pdf_size": 867906, "rating": "4;5;5;8", "confidence": "4;4;3;5", "soundness": "2;3;3;4", "novelty": "3;3;3;4", "presentation": "3;3;3;4", "wc_summary": "64;93;96;138", "wc_strengths": "56;219;82;113", "wc_weaknesses": "141;239;112;118", "wc_questions": "94;7;53;58", "wc_limitations": "44;12;11;2", "wc_review": "399;570;354;429", "wc_reply_reviewers": "96;15;0;97", "wc_reply_authors": "440;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 97.75, 26.38536526182649 ], "wc_strengths_avg": [ 117.5, 61.97781861279082 ], "wc_weaknesses_avg": [ 152.5, 51.10039138793361 ], "wc_questions_avg": [ 53.0, 30.911163032147464 ], "wc_limitations_avg": [ 17.25, 15.927570436196476 ], "wc_review_avg": [ 438.0, 80.74961300216863 ], "wc_reply_reviewers_avg": [ 52.0, 44.81629168059312 ], "wc_reply_authors_avg": [ 110.0, 190.5255888325765 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.7071067811865476, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10883467848175983224&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "zjut.edu.cn;zju.edu.cn;huawei.com;;cityu.edu.hk;;zju.edu.cn;suss.edu.sg;zju.edu.cn;cs.zju.edu.cn", "author_num": 10, "aff_unique_index": "0;0;1;2;0;3;0;0", "aff_unique_norm": "Zhejiang University;Huawei;City University of Hong Kong;Singapore University of Social Sciences", "aff_unique_dep": ";Huawei Technologies;;", "aff_unique_url": "https://www.zju.edu.cn;https://www.huawei.com;https://www.cityu.edu.hk;https://www.suss.edu.sg", "aff_unique_abbr": "ZJU;Huawei;CityU;SUSS", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;1;0;0", "aff_country_unique": "China;Singapore" }, { "title": "SCRREAM : SCan, Register, REnder And Map: A Framework for Annotating Accurate and Dense 3D Indoor Scenes with a Benchmark", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97725", "id": "NHob4eMg7R", "proceeding": "", "pdf": "https://openreview.net/pdf?id=NHob4eMg7R", "openreview": "https://openreview.net/forum?id=NHob4eMg7R", "poster": "/media/PosterPDFs/NeurIPS%202024/97725.png?t=1731304189.390215", "project": "", "author_site": "HyunJun Jung, Weihang Li, Shun-Cheng Wu, William Bittner, Nikolas Brasch, Jifei Song, Eduardo P\u00e9rez-Pellitero, Zhensong Zhang, Arthur Moreau, Nassir Navab, Benjamin Busam", "tldr": "", "abstract": "Traditionally, 3d indoor datasets have generally prioritized scale over ground-truth accuracy in order to obtain improved generalization. However, using these datasets to evaluate dense geometry tasks, such as depth rendering, can be problematic as the meshes of the dataset are often incomplete and may produce wrong ground truth to evaluate the details. In this paper, we propose SCRREAM, a dataset annotation framework that allows annotation of fully dense meshes of objects in the scene and registers camera poses on the real image sequence, which can produce accurate ground truth for both sparse 3D as well as dense 3D tasks. We show the details of the dataset annotation pipeline and showcase four possible variants of datasets that can be obtained from our framework with example scenes, such as indoor reconstruction and SLAM, scene editing \\& object removal, human reconstruction and 6d pose estimation. Recent pipelines for indoor reconstruction and SLAM serve as new benchmarks. In contrast to previous indoor dataset, our design allows to evaluate dense geometry tasks on eleven sample scenes against accurately rendered ground truth depth maps.", "keywords": "indoor dataset;novel view synthesis;SLAM;human reconstruction;6D pose;3D scene editing;object removal", "primary_area": "", "supplementary_material": "/attachment/bc82a151f779437099c5af00879aad0254f546cc.zip", "author": "HyunJun Jung;Weihang Li;Shun-Cheng Wu;William Bittner;Nikolas Brasch;Jifei Song;Eduardo P\u00e9rez-Pellitero;Zhensong Zhang;Arthur Moreau;Nassir Navab;Benjamin Busam", "authorids": "~HyunJun_Jung1;~Weihang_Li1;~Shun-Cheng_Wu1;~William_Bittner1;~Nikolas_Brasch1;~Jifei_Song1;~Eduardo_P\u00e9rez-Pellitero1;~Zhensong_Zhang2;~Arthur_Moreau1;~Nassir_Navab1;~Benjamin_Busam2", "gender": "M;M;M;M;M;M;M;M;M;M;", "homepage": ";https://colin-de.github.io/;http://campar.in.tum.de/Main/ShunChengWu;;https://www.cs.cit.tum.de/camp/members/nikolas-brasch/;;https://perezpellitero.github.io;https://zhangzhensong.github.io/;;https://www.cs.cit.tum.de/camp/members/cv-nassir-navab/nassir-navab/;http://campar.in.tum.de/view/Main/BenjaminBusam", "dblp": ";;;;225/7788;198/2576;141/9842;130/7882;288/1718;n/NassirNavab.html;168/5446", "google_scholar": "biF5W7IAAAAJ;3yGEw74AAAAJ;qEo9eiMAAAAJ;;Linn47YAAAAJ;;oLWr6EwAAAAJ;https://scholar.google.com.hk/citations?user=fs8HQxQAAAAJ;fMmoHX0AAAAJ;https://scholar.google.com.tw/citations?user=kzoVUPYAAAAJ;https://scholar.google.de/citations?user=u4rJZwUAAAAJ", "orcid": ";0000-0002-7736-8087;0000-0002-6498-1579;;;;;;;;0000-0002-0620-5774", "linkedin": "hyun-jun-jung-1a5b45107;weihang-li-269247130/;shun-cheng-wu-58a14b119/;william-bittner-a66237132/;;;;;moreauarthur/;;benjaminbusam", "or_profile": "~HyunJun_Jung1;~Weihang_Li1;~Shun-Cheng_Wu1;~William_Bittner1;~Nikolas_Brasch1;~Jifei_Song1;~Eduardo_P\u00e9rez-Pellitero1;~Zhensong_Zhang2;~Arthur_Moreau1;~Nassir_Navab1;~Benjamin_Busam2", "aff": "Meta Facebook;Technical University of Munich;Technical University Munich;Technische Universit\u00e4t M\u00fcnchen;Technical University Munich;Huawei Technologies Ltd.;Huawei Technologies R&D (UK) Ltd.;Huawei Technologies Ltd.;Huawei Technologies Ltd.;Technical University of Munich;Technical University Munich", "aff_domain": "meta.com;tum.de;tum.de;tum.de;tum.de;huawei.com;huawei.com;huawei.com;huawei.com;tum.de;tum.de", "position": "Intern;MS student;PhD student;MS student;PhD student;Research Scientist;Principal Researcher;Researcher;Researcher;Full Professor;Researcher", "bibtex": "@inproceedings{\njung2024scrream,\ntitle={{SCRREAM} : {SC}an, Register, {RE}nder And Map: A Framework for Annotating Accurate and Dense 3D Indoor Scenes with a Benchmark},\nauthor={HyunJun Jung and Weihang Li and Shun-Cheng Wu and William Bittner and Nikolas Brasch and Jifei Song and Eduardo P{\\'e}rez-Pellitero and Zhensong Zhang and Arthur Moreau and Nassir Navab and Benjamin Busam},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=NHob4eMg7R}\n}", "github": "", "reviewers": "Nfyn;v4H5;SLq9", "pdf_size": 3642783, "rating": "6;7;8", "confidence": "4;3;5", "wc_summary_and_contributions": "27;57;106", "wc_strengths": "117;9;21", "wc_improvement": "516;9;27", "wc_limitations": "4;1;45", "wc_correctness": "1;1;32", "wc_clarity": "1;1;4", "wc_relation_to_prior_work": "3;1;14", "wc_documentation": "1;1;19", "wc_additional_feedback": "1;1;1", "wc_review": "671;81;269", "wc_reply_reviewers": "143;0;113", "wc_reply_authors": "42;0;0", "reply_reviewers": "2;0;2", "reply_authors": "2;1;2", "rating_avg": [ 7.0, 0.816496580927726 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "wc_summary_and_contributions_avg": [ 63.333333333333336, 32.561053764001905 ], "wc_strengths_avg": [ 49.0, 48.33218389437829 ], "wc_improvement_avg": [ 184.0, 234.87443453896807 ], "wc_limitations_avg": [ 16.666666666666668, 20.07209228976613 ], "wc_correctness_avg": [ 11.333333333333334, 14.613540144521982 ], "wc_clarity_avg": [ 2.0, 1.4142135623730951 ], "wc_relation_to_prior_work_avg": [ 6.0, 5.715476066494082 ], "wc_documentation_avg": [ 7.0, 8.48528137423857 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 340.3333333333333, 246.09122066601418 ], "wc_reply_reviewers_avg": [ 85.33333333333333, 61.57019480957397 ], "wc_reply_authors_avg": [ 14.0, 19.79898987322333 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.9428090415820634 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6224142963578910668&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "meta.com;tum.de;tum.de;tum.de;tum.de;huawei.com;huawei.com;huawei.com;huawei.com;tum.de;tum.de", "author_num": 11, "aff_unique_index": "0;1;1;2;1;3;3;3;3;1;1", "aff_unique_norm": "Meta;Technical University of Munich;Technische Universit\u00e4t M\u00fcnchen;Huawei", "aff_unique_dep": "Meta Platforms, Inc.;;;Huawei Technologies", "aff_unique_url": "https://meta.com;https://www.tum.de;https://www.tum.de;https://www.huawei.com", "aff_unique_abbr": "Meta;TUM;TUM;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1;2;3;2;2;1;1", "aff_country_unique": "United States;Germany;China;United Kingdom" }, { "title": "Q-Distribution guided Q-learning for offline reinforcement learning: Uncertainty penalized Q-value via consistency model", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95452", "id": "NIcIdhyfQX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=NIcIdhyfQX", "openreview": "https://openreview.net/forum?id=NIcIdhyfQX", "poster": "/media/PosterPDFs/NeurIPS%202024/95452.png?t=1731415976.581152", "project": "", "author_site": "Jing Zhang, Linjiajie Fang, Kexin SHI, Wenjia Wang, Bingyi Jing", "tldr": "", "abstract": "``Distribution shift'' is the primary obstacle to the success of offline reinforcement learning. As a learning policy may take actions beyond the knowledge of the behavior policy (referred to as Out-of-Distribution (OOD) actions), the Q-values of these OOD actions can be easily overestimated. Consequently, the learning policy becomes biasedly optimized using the incorrect recovered Q-value function. One commonly used idea to avoid the overestimation of Q-value is to make a pessimistic adjustment. Our key idea is to penalize the Q-values of OOD actions that correspond to high uncertainty. In this work, we propose Q-Distribution guided Q-learning (QDQ) which pessimistic Q-value on OOD regions based on uncertainty estimation. The uncertainty measure is based on the conditional Q-value distribution, which is learned via a high-fidelity and efficient consistency model. On the other hand, to avoid the overly conservative problem, we introduce an uncertainty-aware optimization objective to update the Q-value function. The proposed QDQ demonstrates solid theoretical guarantees for the accuracy of Q-value distribution learning and uncertainty measurement, as well as the performance of the learning policy. QDQ consistently exhibits strong performance in the D4RL benchmark and shows significant improvements for many tasks. Our code can be found at .", "keywords": "offline Reinforcement Learning;Q value distribution;Uncertainty measure;pessimistic Q learning;Consistency model", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Jing Zhang;Linjiajie Fang;Kexin Shi;Wenjia Wang;Bingyi Jing", "authorids": "~Jing_Zhang34;~Linjiajie_Fang1;~Kexin_Shi2;~Wenjia_Wang2;~Bingyi_Jing1", "gender": "F;M;;M;M", "homepage": ";;;https://www.wenjia-w.com/;https://www.sustech.edu.cn/en/faculties/jingbing-yi.html", "dblp": ";;;;15/8051", "google_scholar": "wruIkmYAAAAJ;;;EKS1sO0AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-9604-2699;0000-0002-5021-5988;0000-0003-1276-1053;;0000-0002-8876-1570", "linkedin": ";;;;", "or_profile": "~Jing_Zhang34;~Linjiajie_Fang1;~Kexin_Shi2;~Wenjia_Wang2;~Bingyi_Jing1", "aff": "Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;HKUST (GZ);South University of Science and Technology", "aff_domain": "hkust.edu;ust.hk;hkust.edu.hk;hkust-gz.edu.cn;sustech.edu.cn", "position": "PhD student;PhD student;PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nzhang2024qdistribution,\ntitle={Q-Distribution guided Q-learning for offline reinforcement learning: Uncertainty penalized Q-value via consistency model},\nauthor={Jing Zhang and Linjiajie Fang and Kexin Shi and Wenjia Wang and Bingyi Jing},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=NIcIdhyfQX}\n}", "github": "", "reviewers": "T4W8;4FB7;rDnz", "pdf_size": 3027294, "rating": "6;6;7", "confidence": "3;4;3", "soundness": "3;3;2", "novelty": "3;3;3", "presentation": "3;3;2", "wc_summary": "122;49;63", "wc_strengths": "112;42;93", "wc_weaknesses": "251;268;371", "wc_questions": "4;60;27", "wc_limitations": "22;1;1", "wc_review": "511;420;555", "wc_reply_reviewers": "91;17;191", "wc_reply_authors": "240;56;388", "reply_reviewers": "2;1;2", "reply_authors": "3;2;3", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 78.0, 31.63331577098213 ], "wc_strengths_avg": [ 82.33333333333333, 29.555973263547852 ], "wc_weaknesses_avg": [ 296.6666666666667, 53.01781671182706 ], "wc_questions_avg": [ 30.333333333333332, 22.983085567917602 ], "wc_limitations_avg": [ 8.0, 9.899494936611665 ], "wc_review_avg": [ 495.3333333333333, 56.21585383343584 ], "wc_reply_reviewers_avg": [ 99.66666666666667, 71.29905718560067 ], "wc_reply_authors_avg": [ 228.0, 135.80378001611984 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:h4NRaEJKdY4J:scholar.google.com/&scioq=Q-Distribution+guided+Q-learning+for+offline+reinforcement+learning:+Uncertainty+penalized+Q-value+via+consistency+model&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": "hkust.edu;ust.hk;hkust.edu.hk;hkust-gz.edu.cn;sustech.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Hong Kong University of Science and Technology;South University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.ust.hk;https://www.sustech.edu.cn", "aff_unique_abbr": "HKUST;SUSTech", "aff_campus_unique_index": "0;0;0;1", "aff_campus_unique": "Hong Kong SAR;Guangzhou;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Discrete Dictionary-based Decomposition Layer for Structured Representation Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95451", "id": "NJUClFbosX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=NJUClFbosX", "openreview": "https://openreview.net/forum?id=NJUClFbosX", "poster": "/media/PosterPDFs/NeurIPS%202024/95451.png?t=1733276453.6824028", "project": "", "author_site": "Taewon Park, Hyun-Chul Kim, Minho Lee", "tldr": "", "abstract": "Neuro-symbolic neural networks have been extensively studied to integrate symbolic operations with neural networks, thereby improving systematic generalization. Specifically, Tensor Product Representation (TPR) framework enables neural networks to perform differentiable symbolic operations by encoding the symbolic structure of data within vector spaces. However, TPR-based neural networks often struggle to decompose unseen data into structured TPR representations, undermining their symbolic operations. To address this decomposition problem, we propose a Discrete Dictionary-based Decomposition (D3) layer designed to enhance the decomposition capabilities of TPR-based models. D3 employs discrete, learnable key-value dictionaries trained to capture symbolic features essential for decomposition operations. It leverages the prior knowledge acquired during training to generate structured TPR representations by mapping input data to pre-learned symbolic features within these dictionaries. D3 is a straightforward drop-in layer that can be seamlessly integrated into any TPR-based model without modifications. Our experimental results demonstrate that D3 significantly improves the systematic generalization of various TPR-based models while requiring fewer additional parameters. Notably, D3 outperforms baseline models on the synthetic task that demands the systematic decomposition of unseen combinatorial data.", "keywords": "tensor product representation;neuro-symbolic neural network;systematic generalization;compositional generalization;decomposition problem;structured representation learning;discrete representation learning", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/bc9f5ee677dac851bdf0cb65f75e39a6a202a0d3.zip", "author": "Taewon Park;Hyun-Chul Kim;Minho Lee", "authorids": "~Taewon_Park1;~Hyun-Chul_Kim3;~Minho_Lee2", "gender": "M;;M", "homepage": ";https://nai.knu.ac.kr;https://www.knu.ac.kr/", "dblp": "82/10595;;", "google_scholar": "https://scholar.google.co.kr/citations?hl=ko;CuEdQjwAAAAJ;", "orcid": ";;", "linkedin": "taewon-park-755394169/;;", "or_profile": "~Taewon_Park1;~Hyun-Chul_Kim3;~Minho_Lee2", "aff": "Kyungpook National University;Kyungpook National University;Kyungpook National University", "aff_domain": "knu.ac.kr;knu.ac.kr;knu.ac.kr", "position": "PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\npark2024discrete,\ntitle={Discrete Dictionary-based Decomposition Layer for Structured Representation Learning},\nauthor={Taewon Park and Hyun-Chul Kim and Minho Lee},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=NJUClFbosX}\n}", "github": "", "reviewers": "3Nfz;FjyM;QJJM;KCxS", "pdf_size": 1450606, "rating": "5;6;6;6", "confidence": "3;4;3;4", "soundness": "2;3;2;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "110;77;90;86", "wc_strengths": "82;57;38;91", "wc_weaknesses": "316;140;132;128", "wc_questions": "48;134;115;8", "wc_limitations": "1;68;6;32", "wc_review": "557;476;381;345", "wc_reply_reviewers": "10;384;18;5", "wc_reply_authors": "15;897;15;0", "reply_reviewers": "1;3;1;1", "reply_authors": "2;4;2;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 90.75, 12.07010770457331 ], "wc_strengths_avg": [ 67.0, 20.868636754709208 ], "wc_weaknesses_avg": [ 179.0, 79.21489758877429 ], "wc_questions_avg": [ 76.25, 50.726595588507614 ], "wc_limitations_avg": [ 26.75, 26.564779313971346 ], "wc_review_avg": [ 439.75, 82.90167368636172 ], "wc_reply_reviewers_avg": [ 104.25, 161.58028190345505 ], "wc_reply_authors_avg": [ 231.75, 384.1310811428828 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Fzanr28zoi4J:scholar.google.com/&scioq=Discrete+Dictionary-based+Decomposition+Layer+for+Structured+Representation+Learning&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "knu.ac.kr;knu.ac.kr;knu.ac.kr", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Kyungpook National University", "aff_unique_dep": "", "aff_unique_url": "https://www.knu.ac.kr", "aff_unique_abbr": "KNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "Would I Lie To You? Inference Time Alignment of Language Models using Direct Preference Heads", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95450", "id": "NKGuLthW80", "proceeding": "", "pdf": "https://openreview.net/pdf?id=NKGuLthW80", "openreview": "https://openreview.net/forum?id=NKGuLthW80", "poster": "/media/PosterPDFs/NeurIPS%202024/95450.png?t=1732274901.9217083", "project": "", "author_site": "Avelina Hadji-Kyriacou, Ognjen Arandjelovic", "tldr": "", "abstract": "Pre-trained Language Models (LMs) exhibit strong zero-shot and in-context learning capabilities; however, their behaviors are often difficult to control. By utilizing Reinforcement Learning from Human Feedback (RLHF), it is possible to fine-tune unsupervised LMs to follow instructions and produce outputs that reflect human preferences. Despite its benefits, RLHF has been shown to potentially harm a language model's reasoning capabilities and introduce artifacts such as hallucinations where the model may fabricate facts. To address this issue we introduce Direct Preference Heads (DPH), a fine-tuning framework that enables LMs to learn human preference signals through an auxiliary reward head without directly affecting the output distribution of the language modeling head. We perform a theoretical analysis of our objective function and find strong ties to Conservative Direct Preference Optimization (cDPO). Finally we evaluate our models on GLUE, RACE, and the GPT4All evaluation suite and demonstrate that our method produces models which achieve higher scores than those fine-tuned with Supervised Fine-Tuning (SFT) or Direct Preference Optimization (DPO) alone.", "keywords": "Deep Learning;Neural Networks;Ranking and Preference Learning;Reinforcement Learning", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Avelina Asada Hadji-Kyriacou;Ognjen Arandjelovic", "authorids": "~Avelina_Asada_Hadji-Kyriacou1;~Ognjen_Arandjelovic2", "gender": "F;M", "homepage": "https://avelina.io;https://oa7.host.cs.st-andrews.ac.uk/", "dblp": "364/5792;98/5718", "google_scholar": ";D7bpRJ8AAAAJ", "orcid": ";0000-0002-9314-194X", "linkedin": ";", "or_profile": "~Avelina_Asada_Hadji-Kyriacou1;~Ognjen_Arandjelovic2", "aff": "University of St Andrews;University of St Andrews", "aff_domain": "st-andrews.ac.uk;st-andrews.ac.uk", "position": "PhD student;Reader", "bibtex": "@inproceedings{\nhadji-kyriacou2024would,\ntitle={Would I Lie To You? Inference Time Alignment of Language Models using Direct Preference Heads},\nauthor={Avelina Asada Hadji-Kyriacou and Ognjen Arandjelovic},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=NKGuLthW80}\n}", "github": "", "reviewers": "3kiM;LdN5;zurN;ebEQ", "pdf_size": 426542, "rating": "3;6;6;6", "confidence": "4;2;3;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;2;3;3", "wc_summary": "96;162;21;56", "wc_strengths": "50;60;63;61", "wc_weaknesses": "202;66;228;132", "wc_questions": "43;74;2;84", "wc_limitations": "12;54;1;10", "wc_review": "403;416;315;343", "wc_reply_reviewers": "401;0;21;95", "wc_reply_authors": "0;0;0;135", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;2", "rating_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 83.75, 52.39453692895854 ], "wc_strengths_avg": [ 58.5, 5.024937810560445 ], "wc_weaknesses_avg": [ 157.0, 63.190189111918315 ], "wc_questions_avg": [ 50.75, 31.948200262299597 ], "wc_limitations_avg": [ 19.25, 20.48627589387588 ], "wc_review_avg": [ 369.25, 41.70356699372369 ], "wc_reply_reviewers_avg": [ 129.25, 160.81413961464955 ], "wc_reply_authors_avg": [ 33.75, 58.45671475544961 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:xTIFwmeIxkgJ:scholar.google.com/&scioq=Would+I+Lie+To+You%3F+Inference+Time+Alignment+of+Language+Models+using+Direct+Preference+Heads&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "st-andrews.ac.uk;st-andrews.ac.uk", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of St Andrews", "aff_unique_dep": "", "aff_unique_url": "https://www.st-andrews.ac.uk", "aff_unique_abbr": "St Andrews", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "title": "VideoLLM-MoD: Efficient Video-Language Streaming with Mixture-of-Depths Vision Computation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95449", "id": "NKPXHzYusG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=NKPXHzYusG", "openreview": "https://openreview.net/forum?id=NKPXHzYusG", "poster": "", "project": "", "author_site": "Shiwei Wu, Joya Chen, Kevin Qinghong Lin, Qimeng Wang, Yan Gao, Qianli Xu, Tong Xu, Yao Hu, Enhong Chen, Mike Zheng Shou", "tldr": "", "abstract": "A well-known dilemma in large vision-language models (e.g., GPT-4, LLaVA) is that while increasing the number of vision tokens generally enhances visual understanding, it also significantly raises memory and computational costs, especially in long-term, dense video frame streaming scenarios. Although learnable approaches like Q-Former and Perceiver Resampler have been developed to reduce the vision token burden, they overlook the context causally modeled by LLMs (i.e., key-value cache), potentially leading to missed visual cues when addressing user queries. In this paper, we introduce a novel approach to reduce vision compute by leveraging redundant vision tokens ``skipping layers'' rather than decreasing the number of vision tokens. Our method, VideoLLM-MoD, is inspired by mixture-of-depths LLMs and addresses the challenge of numerous vision tokens in long-term or streaming video. Specifically, for certain transformer layer, we learn to skip the computation for a high proportion (e.g., 80\\%) of vision tokens, passing them directly to the next layer. This approach significantly enhances model efficiency, achieving approximately 42% time and 30% memory savings for the entire training. Moreover, our method reduces the computation in the context and avoid decreasing the vision tokens, thus preserving or even improving performance compared to the vanilla model. We conduct extensive experiments to demonstrate the effectiveness of VideoLLM-MoD, showing its state-of-the-art results on multiple benchmarks, including narration, forecasting, and summarization tasks in COIN, Ego4D, and Ego-Exo4D datasets. The code and checkpoints will be made available at github.com/showlab/VideoLLM-online.", "keywords": "Online video understanding; effcient modeling", "primary_area": "machine_vision", "supplementary_material": "/attachment/9b35dd7fd02f5462d5a5cdc8913b59e5505051ca.zip", "author": "Shiwei Wu;Joya Chen;Kevin Qinghong Lin;Qimeng Wang;Yan Gao;Qianli Xu;Tong Xu;Yao Hu;Enhong Chen;Mike Zheng Shou", "authorids": "~Shiwei_Wu1;~Joya_Chen1;~Kevin_Qinghong_Lin1;~Qimeng_Wang1;~Yan_Gao10;~Qianli_Xu1;~Tong_Xu2;~Yao_Hu4;~Enhong_Chen1;~Mike_Zheng_Shou1", "gender": ";M;;M;M;M;M;;M;", "homepage": ";https://chenjoya.github.io/;;;;;http://staff.ustc.edu.cn/~tongxu/;;http://staff.ustc.edu.cn/~cheneh;", "dblp": ";247/9518;;;;30/3276;70/6770-1.html;;07/258;", "google_scholar": ";https://scholar.google.com.ph/citations?user=IIx9dc8AAAAJ;;;https://scholar.google.com.hk/citations?hl=zh-CN;https://scholar.google.com.sg/citations?user=JLpYAlQAAAAJ;;;Q9h02J0AAAAJ;", "orcid": ";;;0000-0002-9715-836X;0009-0004-5960-1684;0000-0003-0105-5903;0000-0003-4246-5386;;0000-0002-4835-4102;", "linkedin": ";;;;;;;;;", "or_profile": "~Shiwei_Wu1;~Joya_Chen1;~Kevin_Qinghong_Lin1;~Qimeng_Wang1;~Yan_Gao10;~Qianli_Xu1;~Tong_Xu2;~Yao_Hu4;~Enhong_Chen1;~Mike_Zheng_Shou1", "aff": ";National University of Singapore;;Xiaohongshu;Xiaohongshu;Institute for Infocomm Research, A*STAR;University of Science and Technology of China;;University of Science and Technology of China;", "aff_domain": ";u.nus.edu;;xiaohongshu.com;xiaohongshu.com;i2r.a-star.edu.sg;ustc.edu.cn;;ustc.edu.cn;", "position": ";PhD student;;CV/NLP Algorithm Engineer;Researcher;Researcher;Full Professor;;Full Professor;", "bibtex": "@inproceedings{\nwu2024videollmmod,\ntitle={Video{LLM}-MoD: Efficient Video-Language Streaming with Mixture-of-Depths Vision Computation},\nauthor={Shiwei Wu and Joya Chen and Kevin Qinghong Lin and Qimeng Wang and Yan Gao and Qianli Xu and Tong Xu and Yao Hu and Enhong Chen and Mike Zheng Shou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=NKPXHzYusG}\n}", "github": "", "reviewers": "x5JZ;2CSB;qExK;HwD3", "pdf_size": 1891886, "rating": "4;4;5;7", "confidence": "5;4;4;4", "soundness": "2;2;3;4", "novelty": "2;2;2;4", "presentation": "3;3;3;4", "wc_summary": "87;72;30;48", "wc_strengths": "60;52;28;66", "wc_weaknesses": "112;40;82;34", "wc_questions": "28;16;73;9", "wc_limitations": "14;5;6;26", "wc_review": "301;185;219;183", "wc_reply_reviewers": "171;0;0;0", "wc_reply_authors": "1120;150;133;77", "reply_reviewers": "1;0;0;0", "reply_authors": "5;3;3;2", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 59.25, 21.878928218722233 ], "wc_strengths_avg": [ 51.5, 14.44818327679989 ], "wc_weaknesses_avg": [ 67.0, 31.89043743820395 ], "wc_questions_avg": [ 31.5, 24.904818810824544 ], "wc_limitations_avg": [ 12.75, 8.407585860400118 ], "wc_review_avg": [ 222.0, 47.80167361086848 ], "wc_reply_reviewers_avg": [ 42.75, 74.0451720235695 ], "wc_reply_authors_avg": [ 370.0, 433.8542381952722 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.25, 1.0897247358851685 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.4714045207910316, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9738112120708630428&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": ";u.nus.edu;;xiaohongshu.com;xiaohongshu.com;i2r.a-star.edu.sg;ustc.edu.cn;;ustc.edu.cn;", "author_num": 10, "aff_unique_index": "0;1;1;2;3;3", "aff_unique_norm": "National University of Singapore;Xiaohongshu;Institute for Infocomm Research;University of Science and Technology of China", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.nus.edu.sg;https://www.xiaohongshu.com;https://www.i2r.a-star.edu.sg;http://www.ustc.edu.cn", "aff_unique_abbr": "NUS;XHS;I2R;USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0;1;1", "aff_country_unique": "Singapore;China" }, { "title": "Time-Constrained Robust MDPs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95448", "id": "NKpPnb3YNg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=NKpPnb3YNg", "openreview": "https://openreview.net/forum?id=NKpPnb3YNg", "poster": "/media/PosterPDFs/NeurIPS%202024/95448.png?t=1731690295.7438674", "project": "", "author_site": "Adil Zouitine, David Bertoin, Pierre Clavier, Matthieu Geist, Emmanuel Rachelson", "tldr": "", "abstract": "Robust reinforcement learning is essential for deploying reinforcement learning algorithms in real-world scenarios where environmental uncertainty predominates.\nTraditional robust reinforcement learning often depends on rectangularity assumptions, where adverse probability measures of outcome states are assumed to be independent across different states and actions. \nThis assumption, rarely fulfilled in practice, leads to overly conservative policies. \nTo address this problem, we introduce a new time-constrained robust MDP (TC-RMDP) formulation that considers multifactorial, correlated, and time-dependent disturbances, thus more accurately reflecting real-world dynamics. This formulation goes beyond the conventional rectangularity paradigm, offering new perspectives and expanding the analytical framework for robust RL.\nWe propose three distinct algorithms, each using varying levels of environmental information, and evaluate them extensively on continuous control benchmarks. \nOur results demonstrate that these algorithms yield an efficient tradeoff between performance and robustness, outperforming traditional deep robust RL methods in time-constrained environments while preserving robustness in classical benchmarks.\nThis study revisits the prevailing assumptions in robust RL and opens new avenues for developing more practical and realistic RL applications.", "keywords": "Robust reinforcement learning", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/0fc8d9e10892208810bbb58dc70463c46156897c.zip", "author": "Adil Zouitine;David Bertoin;Pierre Clavier;Matthieu Geist;Emmanuel Rachelson", "authorids": "~Adil_Zouitine1;~David_Bertoin1;~Pierre_Clavier1;~Matthieu_Geist1;~Emmanuel_Rachelson1", "gender": ";M;M;M;M", "homepage": ";https://davidbert.github.io/;https://pierreclavier.github.io/aboutme/;;https://personnel.isae-supaero.fr/emmanuel-rachelson", "dblp": "281/6912;;;38/6508;52/6241", "google_scholar": "https://scholar.google.fr/citations?user=jw4_zowAAAAJ;oAZZ-o4AAAAJ;-KnIaGsAAAAJ;ectPLEUAAAAJ;https://scholar.google.fr/citations?user=KtG9BSgAAAAJ", "orcid": ";;;;0000-0002-8559-1617", "linkedin": ";;pierre-clavier-823171135/;;emmanuelrachelson/", "or_profile": "~Adil_Zouitine1;~David_Bertoin1;~Pierre_Clavier1;~Matthieu_Geist1;~Emmanuel_Rachelson1", "aff": "Institut Sup\u00e9rieur de l'A\u00e9ronautique et de l'Espace;IRT Saint Exup\u00e9ry;\u00c9cole Polytechnique;Google;Institut Sup\u00e9rieur de l'A\u00e9ronautique et de l'Espace", "aff_domain": "isae-supaero.fr;irt-saintexupery.com;polytechnique.edu;google.com;isae-supaero.fr", "position": "PhD student;Researcher;PhD student;Researcher;Full Professor", "bibtex": "@inproceedings{\nzouitine2024timeconstrained,\ntitle={Time-Constrained Robust {MDP}s},\nauthor={Adil Zouitine and David Bertoin and Pierre Clavier and Matthieu Geist and Emmanuel Rachelson},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=NKpPnb3YNg}\n}", "github": "", "reviewers": "aiQz;ZKxz;1Xhb;stMZ", "pdf_size": 16480813, "rating": "5;5;6;7", "confidence": "2;4;2;3", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "2;3;3;3", "wc_summary": "60;61;60;152", "wc_strengths": "72;50;77;88", "wc_weaknesses": "51;79;171;49", "wc_questions": "108;3;3;24", "wc_limitations": "1;1;35;62", "wc_review": "292;194;346;375", "wc_reply_reviewers": "11;18;20;11", "wc_reply_authors": "77;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 83.25, 39.694930406791244 ], "wc_strengths_avg": [ 71.75, 13.827056809024834 ], "wc_weaknesses_avg": [ 87.5, 49.646248599466205 ], "wc_questions_avg": [ 34.5, 43.292609068985435 ], "wc_limitations_avg": [ 24.75, 25.59663063764448 ], "wc_review_avg": [ 301.75, 68.9723676554604 ], "wc_reply_reviewers_avg": [ 15.0, 4.06201920231798 ], "wc_reply_authors_avg": [ 19.25, 33.34197804570089 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.0909090909090909, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7474841680787627028&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "isae-supaero.fr;irt-saintexupery.com;polytechnique.edu;google.com;isae-supaero.fr", "author_num": 5, "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "Institut Sup\u00e9rieur de l'A\u00e9ronautique et de l'Espace;IRT Saint Exup\u00e9ry;Ecole Polytechnique;Google", "aff_unique_dep": ";;;Google", "aff_unique_url": "https://www.isae-supaero.fr;https://www.irt-saintexupery.com;https://www.polytechnique.edu;https://www.google.com", "aff_unique_abbr": "ISAE-SUPAERO;;X;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "France;United States" }, { "title": "Parameter-Inverted Image Pyramid Networks", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95447", "id": "NKzLqRgG45", "proceeding": "", "pdf": "https://openreview.net/pdf?id=NKzLqRgG45", "openreview": "https://openreview.net/forum?id=NKzLqRgG45", "poster": "/media/PosterPDFs/NeurIPS%202024/95447.png?t=1730105273.851379", "project": "", "author_site": "Xizhou Zhu, Xue Yang, Zhaokai Wang, Hao Li, Wenhan Dou, Junqi Ge, Lewei Lu, Yu Qiao, Jifeng Dai", "tldr": "", "abstract": "Image pyramids are commonly used in modern computer vision tasks to obtain multi-scale features for precise understanding of images. However, image pyramids process multiple resolutions of images using the same large-scale model, which requires significant computational cost. To overcome this issue, we propose a novel network architecture known as the Parameter-Inverted Image Pyramid Networks (PIIP). Our core idea is to use models with different parameter sizes to process different resolution levels of the image pyramid, thereby balancing computational efficiency and performance. Specifically, the input to PIIP is a set of multi-scale images, where higher resolution images are processed by smaller networks. We further propose a feature interaction mechanism to allow features of different resolutions to complement each other and effectively integrate information from different spatial scales. Extensive experiments demonstrate that the PIIP achieves superior performance in tasks such as object detection, segmentation, and image classification, compared to traditional image pyramid methods and single-branch networks, while reducing computational cost. Notably, when applying our method on a large-scale vision foundation model InternViT-6B, we improve its performance by 1\\%-2\\% on detection and segmentation with only 40\\%-60\\% of the original computation. These results validate the effectiveness of the PIIP approach and provide a new technical direction for future vision computing tasks.", "keywords": "Vision Foundation Models; Object Detection", "primary_area": "machine_vision", "supplementary_material": "", "author": "Xizhou Zhu;Xue Yang;Zhaokai Wang;Hao Li;Wenhan Dou;Junqi Ge;Lewei Lu;Yu Qiao;Jifeng Dai", "authorids": "~Xizhou_Zhu1;~Xue_Yang2;~Zhaokai_Wang1;~Hao_Li13;~Wenhan_Dou1;~Junqi_Ge1;~Lewei_Lu1;~Yu_Qiao1;~Jifeng_Dai1", "gender": ";M;M;M;M;;M;;M", "homepage": ";https://yangxue.site/;https://wzk.plus/;https://cpsxhao.github.io;https://github.com/douwh21;;;;https://jifengdai.org/", "dblp": "170/1608;13/1779-5;232/7086;17/5705-69.html;;;247/6438;;14/9399", "google_scholar": "02RXI00AAAAJ;2xTlvV0AAAAJ;W0zVf-oAAAAJ;qHqQsY4AAAAJ;;;https://scholar.google.com.hk/citations?user=zdgKJXIAAAAJ;;SH_-B_AAAAAJ", "orcid": ";0000-0002-7084-9101;;0000-0002-4612-0740;;;;;", "linkedin": ";;zhaokai-wang-%E7%8E%8B%E8%82%87%E5%87%AF-5428181aa/;;;;lewei-lu-94015977/;;", "or_profile": "~Xizhou_Zhu1;~Xue_Yang2;~Zhaokai_Wang1;~Hao_Li13;~Wenhan_Dou1;~Junqi_Ge1;~Lewei_Lu1;~Yu_Qiao1;~Jifeng_Dai1", "aff": "Tsinghua University;Shanghai AI Laboratory;Shanghai Jiaotong University;The Chinese University of Hong Kong;Tsinghua University;;SenseTime;;Tsinghua University", "aff_domain": "tsinghua.edu.cn;pjlab.org.cn;sjtu.edu.cn;cuhk.edu.hk;tsinghua.edu.cn;;sensetime.com;;tsinghua.edu.cn", "position": "Postdoc;Researcher;PhD student;PhD student;Undergrad student;;Researcher;;Associate Professor", "bibtex": "@inproceedings{\nzhu2024parameterinverted,\ntitle={Parameter-Inverted Image Pyramid Networks},\nauthor={Xizhou Zhu and Xue Yang and Zhaokai Wang and Hao Li and Wenhan Dou and Junqi Ge and Lewei Lu and Yu Qiao and Jifeng Dai},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=NKzLqRgG45}\n}", "github": "", "reviewers": "Lww4;H3y6;hVKM", "pdf_size": 1393304, "rating": "7;7;9", "confidence": "4;4;5", "soundness": "3;3;4", "novelty": "3;3;4", "presentation": "4;3;4", "wc_summary": "41;332;49", "wc_strengths": "10;239;100", "wc_weaknesses": "96;580;106", "wc_questions": "2;70;67", "wc_limitations": "1;25;1", "wc_review": "150;1246;323", "wc_reply_reviewers": "11;133;52", "wc_reply_authors": "18;18;0", "reply_reviewers": "1;1;1", "reply_authors": "2;2;1", "rating_avg": [ 7.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 140.66666666666666, 135.3325123127805 ], "wc_strengths_avg": [ 116.33333333333333, 94.19955178001409 ], "wc_weaknesses_avg": [ 260.6666666666667, 225.83966780784007 ], "wc_questions_avg": [ 46.333333333333336, 31.372316175606514 ], "wc_limitations_avg": [ 9.0, 11.313708498984761 ], "wc_review_avg": [ 573.0, 481.0952781587725 ], "wc_reply_reviewers_avg": [ 65.33333333333333, 50.690783733885546 ], "wc_reply_authors_avg": [ 12.0, 8.48528137423857 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.9999999999999997, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1306398256446330381&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "tsinghua.edu.cn;pjlab.org.cn;sjtu.edu.cn;cuhk.edu.hk;tsinghua.edu.cn;;sensetime.com;;tsinghua.edu.cn", "author_num": 9, "aff_unique_index": "0;1;2;3;0;4;0", "aff_unique_norm": "Tsinghua University;Shanghai AI Laboratory;Shanghai Jiao Tong University;Chinese University of Hong Kong;SenseTime", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.shanghai-ai-lab.com;https://www.sjtu.edu.cn;https://www.cuhk.edu.hk;https://www.sensetime.com", "aff_unique_abbr": "THU;SAIL;SJTU;CUHK;SenseTime", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "SaulLM-54B & SaulLM-141B: Scaling Up Domain Adaptation for the Legal Domain", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95446", "id": "NLUYZ4ZqNq", "proceeding": "", "pdf": "https://openreview.net/pdf?id=NLUYZ4ZqNq", "openreview": "https://openreview.net/forum?id=NLUYZ4ZqNq", "poster": "", "project": "", "author_site": "Pierre Colombo, Telmo Pessoa Pires, Malik Boudiaf, Rui Melo, Gabriel Hautreux, Etienne Malaboeuf, Johanne Charpentier, Dominic Culver, Michael Desa", "tldr": "", "abstract": "In this paper, we introduce SaulLM-medium and SaulLM-large, two large language models (LLMs) families tailored for the legal sector. These models, which feature architectures of 54 billion and 140 billion parameters, respectively, are based on the Mixtral architecture. The development of SaulLM-54B and SaulLM-140B is guided by large-scale domain adaptation, divided into strategies: (1) the exploitation of continued pretaining involving a legal corpus that includes over $400$ billion tokens, (2) the implementation of a specialized legal instruction-following protocol, and (3) the alignment of model outputs with human preferences in legal interpretations. The integration of synthetically generated data in the second and third steps enhances the models' capabilities in interpreting and processing legal texts, effectively reaching state-of-the-art performance and outperforming all previous open-source models on LegalBench Instruct. This research thoroughly explores the trade-offs involved in domain-specific adaptation at this scale, offering insights that may inform future studies on domain adaptation using strong decoder models. Building upon SaulLM-7B, this study refines the approach to produce an LLM better equipped for legal tasks and domains. Additionally, we release base, instruct and aligned versions on top of SaulLM-medium and SaulLM-large under the MIT License to facilitate reuse and collaborative research.", "keywords": "NLP; Deep Learning; Law", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Pierre Colombo;Telmo Pires;Malik Boudiaf;Rui Filipe Coimbra Pereira de Melo;Gabriel Hautreux;Etienne Malaboeuf;Johanne Charpentier;Dominic Culver;Michael Desa", "authorids": "~Pierre_Colombo2;~Telmo_Pires1;~Malik_Boudiaf1;~Rui_Filipe_Coimbra_Pereira_de_Melo1;~Gabriel_Hautreux2;~Etienne_Malaboeuf1;~Johanne_Charpentier1;~Dominic_Culver1;~Michael_Desa1", "gender": "M;;;M;M;;F;M;M", "homepage": "https://pierrecolombo.github.io/;;;;;;;https://dlculver.github.io/;", "dblp": ";242/7817;;;;;;;", "google_scholar": "yPoMt8gAAAAJ;8j9lb_wAAAAJ;;https://scholar.google.com/citations?hl=pt-PT;;;;;", "orcid": ";;;0009-0007-0009-0463;0009-0000-9127-336X;;;;", "linkedin": ";;malik-boudiaf/;rui--melo/;;;johanne-charpentier-96740728/;;desa-michael", "or_profile": "~Pierre_Colombo2;~Telmo_Pires1;~Malik_Boudiaf1;~Rui_Filipe_Coimbra_Pereira_de_Melo1;~Gabriel_Hautreux2;~Etienne_Malaboeuf1;~Johanne_Charpentier1;~Dominic_Culver1;~Michael_Desa1", "aff": "CentraleSupelec;Apple;;Equall;LIRMM;;CINES;Equall AI;Equall", "aff_domain": "centralesupelec.fr;apple.com;;equall.ai;lirmm.fr;;cines.fr;equall.ai;equall.ai", "position": "Assistant Professor;Researcher;;Researcher;PhD student;;Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\ncolombo2024saullmb,\ntitle={Saul{LM}-54B \\& Saul{LM}-141B: Scaling Up Domain Adaptation for the Legal Domain},\nauthor={Pierre Colombo and Telmo Pires and Malik Boudiaf and Rui Filipe Coimbra Pereira de Melo and Gabriel Hautreux and Etienne Malaboeuf and Johanne Charpentier and Dominic Culver and Michael Desa},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=NLUYZ4ZqNq}\n}", "github": "", "reviewers": "pgvT;iZW5;LHKy;jvqv", "pdf_size": 1005741, "rating": "5;5;7;8", "confidence": "4;4;4;3", "soundness": "2;3;3;4", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "103;81;49;50", "wc_strengths": "138;39;50;84", "wc_weaknesses": "194;152;136;80", "wc_questions": "223;44;147;2", "wc_limitations": "1;1;1;14", "wc_review": "659;317;383;230", "wc_reply_reviewers": "0;0;9;0", "wc_reply_authors": "593;363;289;95", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 70.75, 22.63155982251334 ], "wc_strengths_avg": [ 77.75, 38.53813046840752 ], "wc_weaknesses_avg": [ 140.5, 40.85033659592048 ], "wc_questions_avg": [ 104.0, 86.62274528090182 ], "wc_limitations_avg": [ 4.25, 5.629165124598851 ], "wc_review_avg": [ 397.25, 160.56832657781547 ], "wc_reply_reviewers_avg": [ 2.25, 3.897114317029974 ], "wc_reply_authors_avg": [ 335.0, 178.2301882398153 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.7777777777777777, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=375512161863068638&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "centralesupelec.fr;apple.com;;equall.ai;lirmm.fr;;cines.fr;equall.ai;equall.ai", "author_num": 9, "aff_unique_index": "0;1;2;3;4;5;2", "aff_unique_norm": "CentraleSup\u00e9lec;Apple;Equall;Laboratoire d'Informatique, de Robotique et de Micro\u00e9lectronique de Montpellier;Centre National Informatique et Syst\u00e8mes d'Information;Equall AI", "aff_unique_dep": ";Apple Inc.;;;;", "aff_unique_url": "https://www.centralesupelec.fr;https://www.apple.com;;https://www.lirmm.fr;https://www.cines.fr;https://www.equall.ai", "aff_unique_abbr": "CS;Apple;;LIRMM;CINES;Equall AI", "aff_campus_unique_index": "1", "aff_campus_unique": ";Montpellier", "aff_country_unique_index": "0;1;0;0;1", "aff_country_unique": "France;United States;" }, { "title": "PTQ4DiT: Post-training Quantization for Diffusion Transformers", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95445", "id": "NLmAGkN6nn", "proceeding": "", "pdf": "https://openreview.net/pdf?id=NLmAGkN6nn", "openreview": "https://openreview.net/forum?id=NLmAGkN6nn", "poster": "/media/PosterPDFs/NeurIPS%202024/95445.png?t=1729104884.1312888", "project": "", "author_site": "Junyi Wu, Haoxuan Wang, Yuzhang Shang, Mubarak Shah, Yan Yan", "tldr": "", "abstract": "The recent introduction of Diffusion Transformers (DiTs) has demonstrated exceptional capabilities in image generation by using a different backbone architecture, departing from traditional U-Nets and embracing the scalable nature of transformers. Despite their advanced capabilities, the wide deployment of DiTs, particularly for real-time applications, is currently hampered by considerable computational demands at the inference stage. Post-training Quantization (PTQ) has emerged as a fast and data-efficient solution that can significantly reduce computation and memory footprint by using low-bit weights and activations. However, its applicability to DiTs has not yet been explored and faces non-trivial difficulties due to the unique design of DiTs. In this paper, we propose PTQ4DiT, a specifically designed PTQ method for DiTs. We discover two primary quantization challenges inherent in DiTs, notably the presence of salient channels with extreme magnitudes and the temporal variability in distributions of salient activation over multiple timesteps. To tackle these challenges, we propose Channel-wise Salience Balancing (CSB) and Spearmen's $\\rho$-guided Salience Calibration (SSC). CSB leverages the complementarity property of channel magnitudes to redistribute the extremes, alleviating quantization errors for both activations and weights. SSC extends this approach by dynamically adjusting the balanced salience to capture the temporal variations in activation. Additionally, to eliminate extra computational costs caused by PTQ4DiT during inference, we design an offline re-parameterization strategy for DiTs. Experiments demonstrate that our PTQ4DiT successfully quantizes DiTs to 8-bit precision (W8A8) while preserving comparable generation ability and further enables effective quantization to 4-bit weight precision (W4A8) for the first time.", "keywords": "Model Quantization; Diffusion Transformer; Efficiency", "primary_area": "generative_models", "supplementary_material": "", "author": "Junyi Wu;Haoxuan Wang;Yuzhang Shang;Mubarak Shah;Yan Yan", "authorids": "~Junyi_Wu1;~Haoxuan_Wang1;~Yuzhang_Shang1;~Mubarak_Shah3;~Yan_Yan6", "gender": "M;M;M;M;M", "homepage": "https://adreamwu.github.io/;https://hatchetproject.github.io/;https://42shawn.github.io/;https://www.crcv.ucf.edu/person/mubarak-shah/;", "dblp": ";;300/8483;s/MubarakShah;13/3953-2", "google_scholar": "Akua_xUAAAAJ;vRXYQvYAAAAJ;6ZPL5E0AAAAJ;https://scholar.google.com.tw/citations?user=p8gsO3gAAAAJ;", "orcid": ";0009-0003-4014-6610;;0000-0002-8216-1128;", "linkedin": "junyi-wu-82a92b24a/;;;mubarak-shah-b6aa68213/;", "or_profile": "~Junyi_Wu1;~Haoxuan_Wang1;~Yuzhang_Shang1;~Mubarak_Shah3;~Yan_Yan6", "aff": "Illinois Institute of Technology;Illinois Institute of Technology;Illinois Institute of Technology;University of Central Florida;University of Illinois Chicago", "aff_domain": "iit.edu;hawk.iit.edu;iit.edu;ucf.edu;uic.edu", "position": "PhD student;PhD student;PhD student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nwu2024ptqdit,\ntitle={{PTQ}4DiT: Post-training Quantization for Diffusion Transformers},\nauthor={Junyi Wu and Haoxuan Wang and Yuzhang Shang and Mubarak Shah and Yan Yan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=NLmAGkN6nn}\n}", "github": "", "reviewers": "a8C3;jrKh;N2VF;USPH", "pdf_size": 29808756, "rating": "5;5;5;6", "confidence": "4;5;4;4", "soundness": "3;3;3;3", "novelty": "3;3;2;3", "presentation": "3;3;3;3", "wc_summary": "80;57;72;44", "wc_strengths": "34;48;18;40", "wc_weaknesses": "42;230;207;190", "wc_questions": "2;204;47;6", "wc_limitations": "9;10;1;10", "wc_review": "167;549;345;290", "wc_reply_reviewers": "15;18;0;16", "wc_reply_authors": "30;36;0;25", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;1;2", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 63.25, 13.845125496000389 ], "wc_strengths_avg": [ 35.0, 11.0 ], "wc_weaknesses_avg": [ 167.25, 73.69319846498726 ], "wc_questions_avg": [ 64.75, 82.30241490989192 ], "wc_limitations_avg": [ 7.5, 3.774917217635375 ], "wc_review_avg": [ 337.75, 137.94450877073723 ], "wc_reply_reviewers_avg": [ 12.25, 7.1545440106270926 ], "wc_reply_authors_avg": [ 22.75, 13.699908758820257 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=753739816411140139&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "iit.edu;hawk.iit.edu;iit.edu;ucf.edu;uic.edu", "author_num": 5, "aff_unique_index": "0;0;0;1;2", "aff_unique_norm": "Illinois Institute of Technology;University of Central Florida;University of Illinois at Chicago", "aff_unique_dep": ";;", "aff_unique_url": "https://www.iit.edu;https://www.ucf.edu;https://www.uic.edu", "aff_unique_abbr": "IIT;UCF;UIC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Chicago", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Understanding the Role of Equivariance in Self-supervised Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95444", "id": "NLqdudgBfy", "proceeding": "", "pdf": "https://openreview.net/pdf?id=NLqdudgBfy", "openreview": "https://openreview.net/forum?id=NLqdudgBfy", "poster": "/media/PosterPDFs/NeurIPS%202024/95444.png?t=1731600224.3385878", "project": "", "author_site": "Yifei Wang, Kaiwen Hu, Sharut Gupta, Ziyu Ye, Yisen Wang, Stefanie Jegelka", "tldr": "", "abstract": "Contrastive learning has been a leading paradigm for self-supervised learning, but it is widely observed that it comes at the price of sacrificing useful features (\\eg colors) by being invariant to data augmentations. Given this limitation, there has been a surge of interest in equivariant self-supervised learning (E-SSL) that learns features to be augmentation-aware. However, even for the simplest rotation prediction method, there is a lack of rigorous understanding of why, when, and how E-SSL learns useful features for downstream tasks. To bridge this gap between practice and theory, we establish an information-theoretic perspective to understand the generalization ability of E-SSL. In particular, we identify a critical explaining-away effect in E-SSL that creates a synergy between the equivariant and classification tasks. This synergy effect encourages models to extract class-relevant features to improve its equivariant prediction, which, in turn, benefits downstream tasks requiring semantic features. Based on this perspective, we theoretically analyze the influence of data transformations and reveal several principles for practical designs of E-SSL. Our theory not only aligns well with existing E-SSL methods but also sheds light on new directions by exploring the benefits of model equivariance. We believe that a theoretically grounded understanding on the role of equivariance would inspire more principled and advanced designs in this field. Code is available at \n https://github.com/kaotty/Understanding-ESSL.", "keywords": "Self-supervised Learning;Unsupervised Learning;Representation Learning;Equivariance;Theory;Downstream Generalization", "primary_area": "learning_theory", "supplementary_material": "", "author": "Yifei Wang;Kaiwen Hu;Sharut Gupta;Ziyu Ye;Yisen Wang;Stefanie Jegelka", "authorids": "~Yifei_Wang1;~Kaiwen_Hu3;~Sharut_Gupta1;~Ziyu_Ye1;~Yisen_Wang1;~Stefanie_Jegelka3", "gender": "M;M;F;;M;F", "homepage": "https://yifeiwang77.com;https://github.com/kaotty;https://www.mit.edu/~sharut/;https://hazelye-bot.github.io/;https://yisenwang.github.io/;http://people.csail.mit.edu/stefje/", "dblp": "00/555-1;190/2631;;;172/1346-1;38/7003", "google_scholar": "-CLy6YsAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;S2da4LUAAAAJ;uMWPDboAAAAJ;gTWUZlsAAAAJ", "orcid": ";;;0000-0002-0078-6758;;", "linkedin": ";;sharut-gupta/;;;", "or_profile": "~Yifei_Wang1;~Kaiwen_Hu3;~Sharut_Gupta1;~Ziyu_Ye1;~Yisen_Wang1;~Stefanie_Jegelka3", "aff": "Massachusetts Institute of Technology;Peking University;Google;Google DeepMind;Peking University;Massachusetts Institute of Technology", "aff_domain": "mit.edu;stu.pku.edu.cn;google.com;deepmind.google;pku.edu.cn;mit.edu", "position": "Postdoc;Undergrad student;Student Researcher;Research Intern;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nwang2024understanding,\ntitle={Understanding the Role of Equivariance in Self-supervised Learning},\nauthor={Yifei Wang and Kaiwen Hu and Sharut Gupta and Ziyu Ye and Yisen Wang and Stefanie Jegelka},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=NLqdudgBfy}\n}", "github": "", "reviewers": "RgKg;TYZN;yP9X;a2cS", "pdf_size": 705569, "rating": "5;6;7;7", "confidence": "3;3;4;4", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "3;2;3;4", "wc_summary": "90;110;40;92", "wc_strengths": "44;52;37;193", "wc_weaknesses": "61;494;10;142", "wc_questions": "139;30;12;69", "wc_limitations": "1;19;5;62", "wc_review": "335;705;104;558", "wc_reply_reviewers": "16;141;27;57", "wc_reply_authors": "0;0;32;281", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;2;3", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 83.0, 26.019223662515376 ], "wc_strengths_avg": [ 81.5, 64.59295627233669 ], "wc_weaknesses_avg": [ 176.75, 189.11554008066074 ], "wc_questions_avg": [ 62.5, 48.73653660243001 ], "wc_limitations_avg": [ 21.75, 24.180312239505923 ], "wc_review_avg": [ 425.5, 227.61206031315652 ], "wc_reply_reviewers_avg": [ 60.25, 48.976397376695644 ], "wc_reply_authors_avg": [ 78.25, 117.78449600860039 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.9045340337332909, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12534528083567681815&as_sdt=5,40&sciodt=0,40&hl=en", "gs_version_total": 4, "email": "mit.edu;stu.pku.edu.cn;google.com;deepmind.google;pku.edu.cn;mit.edu", "author_num": 6, "aff_unique_index": "0;1;2;2;1;0", "aff_unique_norm": "Massachusetts Institute of Technology;Peking University;Google", "aff_unique_dep": ";;Google", "aff_unique_url": "https://web.mit.edu;http://www.pku.edu.cn;https://www.google.com", "aff_unique_abbr": "MIT;Peking U;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;0;2;1;0", "aff_country_unique": "United States;China;United Kingdom" }, { "id": "NMwPKjNTEP", "title": "ManiBCI: Manipulating EEG BCI with Invisible and Robust Backdoor Attack via Frequency Transform", "track": "main", "status": "Reject", "tldr": "", "abstract": "The electroencephalogram (EEG) based brain-computer interface (BCI) has taken the advantages of the tremendous success of deep learning (DL) models, gaining a wide range of applications. However, DL models have been shown to be vulnerable to backdoor attacks. Despite there are extensive successful attacks for image, designing a stealthy and effect attack for EEG is a non-trivial task. While existing EEG attacks mainly focus on single target class attack, and they either require engaging the training stage of the target DL models, or fail to maintain high stealthiness. Addressing these limitations, we exploit a novel backdoor attack called ManiBCI, where the adversary can arbitrarily manipulate which target class the EEG BCI will misclassify without engaging the training stage. Specifically, ManiBCI is a three-stages clean label poisoning attacks: 1) selecting one trigger for each class; 2) learning optimal injecting EEG electrodes and frequencies masks with reinforcement learning for each trigger; 3) injecting the corresponding trigger\u2019s frequencies into poisoned data for each class by linearly interpolating the spectral amplitude of both data according to the learned masks. Experiments on three EEG datasets demonstrate the effectiveness and robustness of ManiBCI. The proposed ManiBCI also easily bypass existing backdoor defenses. Code will be published after the anonymous period.", "keywords": "EEG BCI;electroencephalogram;backdoor attack;reinforcement learning;frequency transform", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Xuanhao Liu;Xinhao Song;Dexuan He;Wei-Long Zheng;Bao-liang Lu", "authorids": "~Xuanhao_Liu2;~Xinhao_Song1;~Dexuan_He1;~Wei-Long_Zheng1;~Bao-liang_Lu1", "gender": "M;M;M;;M", "homepage": "https://xuanhaoliu.github.io;https://github.com/sxh4396;https://github.com/Firehdx;https://weilongzheng.github.io/;http://bcmi.sjtu.edu.cn/~blu/", "dblp": "359/4199;;;150/4150;09/3116.html", "google_scholar": "99yIdXAAAAAJ;;;MZXXe8UAAAAJ;https://scholar.google.com.tw/citations?user=709il6EAAAAJ", "orcid": "0000-0002-9693-7839;;;;0000-0001-8359-0058", "linkedin": ";;;;", "or_profile": "~Xuanhao_Liu2;~Xinhao_Song1;~Dexuan_He1;~Wei-Long_Zheng1;~Bao-liang_Lu1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "position": "PhD student;Undergrad student;Undergrad student;Associate Professor;Full Professor", "bibtex": "@misc{\nanonymous2024manibci,\ntitle={Mani{BCI}: Manipulating {EEG} {BCI} with Invisible and Robust Backdoor Attack via Frequency Transform},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=NMwPKjNTEP}\n}", "github": "", "project": "", "reviewers": "bcDT;fxMo;hi3h;WGew", "site": "https://openreview.net/forum?id=NMwPKjNTEP", "pdf_size": 0, "rating": "2;3;5;7", "confidence": "5;1;4;2", "soundness": "1;2;3;3", "novelty": "1;2;2;3", "presentation": "1;2;3;3", "wc_summary": "92;150;41;76", "wc_strengths": "12;48;52;34", "wc_weaknesses": "33;106;20;63", "wc_questions": "20;1;31;11", "wc_limitations": "14;1;7;8", "wc_review": "171;306;151;192", "wc_reply_reviewers": "72;42;24;0", "wc_reply_authors": "91;45;22;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 4.25, 1.920286436967152 ], "confidence_avg": [ 3.0, 1.5811388300841898 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "novelty_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 89.75, 39.37242055043098 ], "wc_strengths_avg": [ 36.5, 15.644487847162015 ], "wc_weaknesses_avg": [ 55.5, 33.06433123473088 ], "wc_questions_avg": [ 15.75, 11.07643895843786 ], "wc_limitations_avg": [ 7.5, 4.6097722286464435 ], "wc_review_avg": [ 205.0, 60.087436290792105 ], "wc_reply_reviewers_avg": [ 34.5, 26.28212320190285 ], "wc_reply_authors_avg": [ 39.5, 33.7231374578345 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.3293547878370473, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:WdEOIi0QCaUJ:scholar.google.com/&scioq=ManiBCI:+Manipulating+EEG+BCI+with+Invisible+and+Robust+Backdoor+Attack+via+Frequency+Transform&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "ActFusion: a Unified Diffusion Model for Action Segmentation and Anticipation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95443", "id": "NN9U0lEcAn", "proceeding": "", "pdf": "https://openreview.net/pdf?id=NN9U0lEcAn", "openreview": "https://openreview.net/forum?id=NN9U0lEcAn", "poster": "/media/PosterPDFs/NeurIPS%202024/95443.png?t=1733903338.6583939", "project": "", "author_site": "Dayoung Gong, Suha Kwak, Minsu Cho", "tldr": "", "abstract": "Temporal action segmentation and long-term action anticipation are two popular vision tasks for the temporal analysis of actions in videos. \nDespite apparent relevance and potential complementarity, these two problems have been investigated as separate and distinct tasks. In this work, we tackle these two problems, action segmentation, and action anticipation, jointly using a unified diffusion model dubbed ActFusion. \nThe key idea to unification is to train the model to effectively handle both visible and invisible parts of the sequence in an integrated manner;\nthe visible part is for temporal segmentation, and the invisible part is for future anticipation. \nTo this end, we introduce a new anticipative masking strategy during training in which a late part of the video frames is masked as invisible, and learnable tokens replace these frames to learn to predict the invisible future.\nExperimental results demonstrate the bi-directional benefits between action segmentation and anticipation.\nActFusion achieves the state-of-the-art performance across the standard benchmarks of 50 Salads, Breakfast, and GTEA, outperforming task-specific models in both of the two tasks with a single unified model through joint learning.", "keywords": "temporal action segmentation;long-term action anticipation", "primary_area": "machine_vision", "supplementary_material": "", "author": "Dayoung Gong;Suha Kwak;Minsu Cho", "authorids": "~Dayoung_Gong1;~Suha_Kwak3;~Minsu_Cho1", "gender": "F;M;M", "homepage": "https://gongda0e.github.io/;https://suhakwak.github.io/;http://cvlab.postech.ac.kr/~mcho/", "dblp": "321/1839;65/6173;", "google_scholar": "https://scholar.google.com/citations?hl=ko;-gscDIEAAAAJ;5TyoF5QAAAAJ", "orcid": ";;", "linkedin": "dayoung-gong-11120717a/;;minsu-cho-062b3750/", "or_profile": "~Dayoung_Gong1;~Suha_Kwak3;~Minsu_Cho1", "aff": "Pohang University of Science and Technology;POSTECH;POSTECH", "aff_domain": "postech.ac.kr;postech.ac.kr;postech.ac.kr", "position": "PhD student;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\ngong2024actfusion,\ntitle={ActFusion: a Unified Diffusion Model for Action Segmentation and Anticipation},\nauthor={Dayoung Gong and Suha Kwak and Minsu Cho},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=NN9U0lEcAn}\n}", "github": "", "reviewers": "zKDZ;uA7n;FW3D", "pdf_size": 2622964, "rating": "6;6;6", "confidence": "1;5;3", "soundness": "1;3;2", "novelty": "3;3;2", "presentation": "3;3;3", "wc_summary": "31;88;82", "wc_strengths": "58;59;101", "wc_weaknesses": "184;132;198", "wc_questions": "1;132;104", "wc_limitations": "5;43;58", "wc_review": "279;454;543", "wc_reply_reviewers": "299;21;41", "wc_reply_authors": "762;37;132", "reply_reviewers": "2;1;1", "reply_authors": "3;2;3", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.0, 1.632993161855452 ], "soundness_avg": [ 2.0, 0.816496580927726 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 67.0, 25.573423705088842 ], "wc_strengths_avg": [ 72.66666666666667, 20.038851153585515 ], "wc_weaknesses_avg": [ 171.33333333333334, 28.394052585395805 ], "wc_questions_avg": [ 79.0, 56.32642955723953 ], "wc_limitations_avg": [ 35.333333333333336, 22.305953365762143 ], "wc_review_avg": [ 425.3333333333333, 109.6671732511096 ], "wc_reply_reviewers_avg": [ 120.33333333333333, 126.59998244690067 ], "wc_reply_authors_avg": [ 310.3333333333333, 321.72279303082576 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:N7TVVefSXGwJ:scholar.google.com/&scioq=ActFusion:+a+Unified+Diffusion+Model+for+Action+Segmentation+and+Anticipation&hl=en&as_sdt=0,33", "gs_version_total": 5, "email": "postech.ac.kr;postech.ac.kr;postech.ac.kr", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Pohang University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.postech.ac.kr", "aff_unique_abbr": "POSTECH", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Pohang", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "Smoothed Online Classification can be Harder than Batch Classification", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95442", "id": "NO9MSeZs6g", "proceeding": "", "pdf": "https://openreview.net/pdf?id=NO9MSeZs6g", "openreview": "https://openreview.net/forum?id=NO9MSeZs6g", "poster": "", "project": "", "author_site": "Vinod Raman, Unique Subedi, Ambuj Tewari", "tldr": "", "abstract": "We study online classification under smoothed adversaries. In this setting, at each time point, the adversary draws an example from a distribution that has a bounded density with respect to a fixed base measure, which is known apriori to the learner. For binary classification and scalar-valued regression, previous works [Haghtalab et al., 2020, Block et al., 2022] have shown that smoothed online learning is as easy as learning in the iid batch setting under PAC model. However, we show that smoothed online classification can be harder than the iid batch classification when the label space is unbounded. In particular, we construct a hypothesis class that is learnable in the iid batch setting under the PAC model but is not learnable under the smoothed online model. Finally, we identify a condition that ensures that the PAC learnability of a hypothesis class is sufficient for its smoothed online learnability.", "keywords": "Online Learning;Smoothed Analysis", "primary_area": "online_learning", "supplementary_material": "", "author": "Vinod Raman;Unique Subedi;Ambuj Tewari", "authorids": "~Vinod_Raman1;~Unique_Subedi2;~Ambuj_Tewari1", "gender": "M;M;M", "homepage": "https://vinodkraman.github.io;https://www.ambujtewari.com;https://unique-subedi.github.io/", "dblp": "126/5382;24/567;", "google_scholar": "Wn5QzOgAAAAJ;ttbl4FsAAAAJ;DO16ipsAAAAJ", "orcid": ";0000-0001-6969-7844;", "linkedin": ";;", "or_profile": "~Vinod_Raman1;~Ambuj_Tewari1;~UNIQUE_SUBEDI1", "aff": "Apple;University of Michigan - Ann Arbor;University of Michigan - Ann Arbor", "aff_domain": "apple.com;umich.edu;umich.edu", "position": "Intern;Full Professor;PhD student", "bibtex": "@inproceedings{\nraman2024smoothed,\ntitle={Smoothed Online Classification can be Harder than Batch Classification},\nauthor={Vinod Raman and Unique Subedi and Ambuj Tewari},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=NO9MSeZs6g}\n}", "github": "", "reviewers": "ercW;jMfw;7N9e;SmVy", "pdf_size": 375473, "rating": "4;6;6;7", "confidence": "3;4;3;1", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "3;3;3;3", "wc_summary": "161;63;125;51", "wc_strengths": "53;73;19;49", "wc_weaknesses": "182;130;3;26", "wc_questions": "13;128;9;1", "wc_limitations": "1;1;3;1", "wc_review": "410;395;159;128", "wc_reply_reviewers": "91;48;29;28", "wc_reply_authors": "553;250;250;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;1", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 2.75, 1.0897247358851685 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 100.0, 45.044422518220834 ], "wc_strengths_avg": [ 48.5, 19.30673457630782 ], "wc_weaknesses_avg": [ 85.25, 73.55057783593546 ], "wc_questions_avg": [ 37.75, 52.284677487768825 ], "wc_limitations_avg": [ 1.5, 0.8660254037844386 ], "wc_review_avg": [ 273.0, 130.07113438422837 ], "wc_reply_reviewers_avg": [ 49.0, 25.524498036200438 ], "wc_reply_authors_avg": [ 263.25, 195.96348511903946 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.4736842105263159, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:-s7ti0pC3bYJ:scholar.google.com/&scioq=Smoothed+Online+Classification+can+be+Harder+than+Batch+Classification&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "apple.com;umich.edu;umich.edu", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Apple;University of Michigan", "aff_unique_dep": "Apple Inc.;", "aff_unique_url": "https://www.apple.com;https://www.umich.edu", "aff_unique_abbr": "Apple;UM", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Ann Arbor", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "NPAgarrM73", "title": "KidSat: satellite imagery to map childhood poverty dataset and benchmark", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "Satellite imagery has emerged as an important tool to analyse demographic, health, and development indicators. While various deep learning models have been built for these tasks, each is specific to a particular problem, with no standard benchmarks available. We propose a new dataset pairing satellite imagery and high-quality survey data on child poverty to benchmark satellite feature representations. Our dataset consists of 33,608 images, each 10 km $\\times$ 10 km, from 19 countries in Eastern and Southern Africa in the time period 1997-2022. As defined by UNICEF, multidimensional child poverty covers six dimensions and it can be calculated from the face-to-face Demographic and Health Surveys (DHS) Program. As part of the benchmark, we test spatial as well as temporal generalization, by testing on unseen locations, and on data after the training years. \n Using our dataset we benchmark multiple models, from low-level satellite imagery models such as MOSAIKS, to deep learning foundation models, which include both generic vision models such as Self-Distillation with no Labels (DINOv2) models and specific satellite imagery models such as SatMAE. We provide open source code for building the satellite dataset, obtaining ground truth data from DHS and running various models assessed in our work.", "keywords": "satellite imagery;remote sensing;self-supervised learning;social science;global health;economic;health and development indicators", "primary_area": "", "supplementary_material": "/attachment/783f6886fbb34fdf93938936fb9c5b9bb2d9c4c2.pdf", "author": "Makkunda Sharma;Fan Yang;Duy-Nhat Vo;Esra Suel;Swapnil Mishra;Samir Bhatt;Oliver Fiala;William Rudgard;Seth Flaxman", "authorids": "~Makkunda_Sharma1;~Fan_Yang72;~Duy-Nhat_Vo1;~Esra_Suel1;~Swapnil_Mishra1;~Samir_Bhatt1;~Oliver_Fiala1;~William_Rudgard1;~Seth_Flaxman1", "gender": "M;M;M;F;M;M;M;M;M", "homepage": "https://makkunda.github.io;;;;https://s-mishra.github.io;https://www.imperial.ac.uk/people/s.bhatt;;;http://sethrf.com", "dblp": "211/8087.html;;;;151/3153;;;;67/7881", "google_scholar": "M_a8AH4AAAAJ;jGsoLuMAAAAJ;;;https://scholar.google.co.uk/citations?user=RqbpaXcAAAAJ;tL0x_vUAAAAJ;;Zclm5hUAAAAJ;Wnxq0mgAAAAJ", "orcid": ";;0009-0006-6443-3990;;0000-0002-8759-5902;0000-0002-0891-4611;;;0000-0002-2477-4217", "linkedin": ";luke-yang-542222199;;;;samir-bhatt-31927464/;oliverfiala/;;https://uk.linkedin.com/in/sethrf", "or_profile": "~Makkunda_Sharma1;~Fan_Yang72;~Duy-Nhat_Vo1;~Esra_Suel1;~Swapnil_Mishra1;~Samir_Bhatt1;~Oliver_Fiala1;~William_Rudgard1;~Seth_Flaxman1", "aff": "Department of Computer Science, University of Oxford;Department of Computer Science, University of Oxford;Department of Computer Science, University of Oxford;Imperial College London;National University of Singapore;Copenhagen University;Save the Children;University of Oxford;University of Oxford", "aff_domain": "cs.ox.ac.uk;cs.ox.ac.uk;cs.ox.ac.uk;imperial.ac.uk;nus.edu.sg;ku.dk;tu-dresden.de;ox.ac.uk;ox.ac.uk", "position": "PhD student;MS student;MS student;Postdoc;Assistant Professor;Full Professor;Researcher;Researcher;Associate Professor", "bibtex": "@misc{\nanonymous2024kidsat,\ntitle={KidSat: satellite imagery to map childhood poverty dataset and benchmark},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=NPAgarrM73}\n}", "github": "", "project": "", "reviewers": "rwhX;inpU;yepH", "site": "https://openreview.net/forum?id=NPAgarrM73", "pdf_size": 168475, "rating": "5;6;7", "confidence": "4;4;4", "wc_summary_and_contributions": "27;116;53", "wc_strengths": "52;53;77", "wc_improvement": "580;58;114", "wc_limitations": "1;1;38", "wc_correctness": "26;1;9", "wc_clarity": "5;1;17", "wc_relation_to_prior_work": "1;1;70", "wc_documentation": "1;1;15", "wc_additional_feedback": "1;1;1", "wc_review": "694;233;394", "wc_reply_reviewers": "265;0;54", "wc_reply_authors": "627;0;0", "reply_reviewers": "1;0;1", "reply_authors": "2;1;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 4.0, 0.0 ], "wc_summary_and_contributions_avg": [ 65.33333333333333, 37.366057086910075 ], "wc_strengths_avg": [ 60.666666666666664, 11.55662388223981 ], "wc_improvement_avg": [ 250.66666666666666, 233.9933522322566 ], "wc_limitations_avg": [ 13.333333333333334, 17.441967269268172 ], "wc_correctness_avg": [ 12.0, 10.424330514074594 ], "wc_clarity_avg": [ 7.666666666666667, 6.79869268479038 ], "wc_relation_to_prior_work_avg": [ 24.0, 32.526911934581186 ], "wc_documentation_avg": [ 5.666666666666667, 6.599663291074443 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 440.3333333333333, 191.0328651189516 ], "wc_reply_reviewers_avg": [ 106.33333333333333, 114.33964997128318 ], "wc_reply_authors_avg": [ 209.0, 295.57063453597686 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:gb8OBp0OF3oJ:scholar.google.com/&scioq=KidSat:+satellite+imagery+to+map+childhood+poverty+dataset+and+benchmark&hl=en&as_sdt=0,33", "gs_version_total": 4, "aff_unique_index": "0;0;0;1;2;3;4;0;0", "aff_unique_norm": "University of Oxford;Imperial College London;National University of Singapore;University of Copenhagen;Save the Children", "aff_unique_dep": "Department of Computer Science;;;;", "aff_unique_url": "https://www.ox.ac.uk;https://www.imperial.ac.uk;https://www.nus.edu.sg;https://www.ku.dk;https://www.savethechildren.org", "aff_unique_abbr": "Oxford;ICL;NUS;UCPH;Save the Children", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Oxford;", "aff_country_unique_index": "0;0;0;0;1;2;3;0;0", "aff_country_unique": "United Kingdom;Singapore;Denmark;United States" }, { "title": "Decompose, Analyze and Rethink: Solving Intricate Problems with Human-like Reasoning Cycle", "status": "Oral", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95441", "id": "NPKZF1WDjZ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=NPKZF1WDjZ", "openreview": "https://openreview.net/forum?id=NPKZF1WDjZ", "poster": "/media/PosterPDFs/NeurIPS%202024/95441.png?t=1731555406.4020128", "project": "", "author_site": "Shangzi Xue, Zhenya Huang, Jiayu Liu, Xin Lin, Yuting Ning, Binbin Jin, Xin Li, Qi Liu", "tldr": "", "abstract": "In this paper, we introduce DeAR (_Decompose-Analyze-Rethink_), a framework that iteratively builds a reasoning tree to tackle intricate problems within a single large language model (LLM). Unlike approaches that extend or search for rationales, DeAR is featured by 1) adopting a tree-based question decomposition manner to plan the organization of rationales, which mimics the logical planning inherent\nin human cognition; 2) globally updating the rationales at each reasoning step through natural language feedback. Specifically, the _Decompose_ stage decomposes the question into simpler sub-questions, storing them as new nodes; the _Analyze_ stage generates and self-checks rationales for sub-questions at each node evel; and the _Rethink_ stage updates parent-node rationales based on feedback from their child nodes. By generating and updating the reasoning process from a more global perspective, DeAR constructs more adaptive and accurate logical structures for complex problems, facilitating timely error correction compared to rationale-extension and search-based approaches such as Tree-of-Thoughts (ToT) and Graph-of-Thoughts (GoT). We conduct extensive experiments on three reasoning benchmarks, including ScienceQA, StrategyQA, and GSM8K, which cover a variety of reasoning tasks, demonstrating that our approach significantly reduces logical errors and enhances performance across various LLMs. Furthermore, we validate that DeAR is an efficient method that achieves a superior trade-off between accuracy and reasoning time compared to ToT and GoT.", "keywords": "Reasoning Tree;Large Language Models;Question Decomposition;Rationale Updating", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Shangzi Xue;Zhenya Huang;Jiayu Liu;Xin Lin;Yuting Ning;Binbin Jin;Xin Li;Qi Liu", "authorids": "~Shangzi_Xue1;~Zhenya_Huang2;~Jiayu_Liu2;~Xin_Lin7;~Yuting_Ning1;~Binbin_Jin1;~Xin_Li56;~Qi_Liu3", "gender": "M;M;M;M;;M;M;M", "homepage": ";http://staff.ustc.edu.cn/~huangzhy/;http://home.ustc.edu.cn/~jy251198/;https://scholar.google.com/citations?user=Scd71RAAAAAJ;https://nnnyt.github.io;;https://www.scopus.com/authid/detail.uri?authorId=57196399539;http://staff.ustc.edu.cn/~qiliuql/", "dblp": ";178/8690;120/1047-1;50/3323-5;;242/8289;09/1365-64;95/2446-3", "google_scholar": ";dVZuU90AAAAJ;https://scholar.google.com/citations?hl=zh-CN;Scd71RAAAAAJ;HYmzF-QAAAAJ;xhb7PikAAAAJ;;5EoHAFwAAAAJ", "orcid": ";0000-0003-1661-0420;0000-0001-8639-3308;0000-0001-6913-4654;;;;0000-0001-6956-5550", "linkedin": "ShangziXue;;;;;;;", "or_profile": "~Shangzi_Xue1;~Zhenya_Huang2;~Jiayu_Liu2;~Xin_Lin7;~Yuting_Ning1;~Binbin_Jin1;~Xin_Li56;~Qi_Liu3", "aff": "University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;;University of Science and Technology of China;University of Science and Technology of China", "aff_domain": "ustc.edu.cn;ustc.edu.cn;ustc.edu;ustc.edu.cn;ustc.edu.cn;;ustc.edu.cn;ustc.edu.cn", "position": "MS student;Associate Professor;PhD student;PhD student;MS student;;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nxue2024decompose,\ntitle={Decompose, Analyze and Rethink: Solving Intricate Problems with Human-like Reasoning Cycle},\nauthor={Shangzi Xue and Zhenya Huang and Jiayu Liu and Xin Lin and Yuting Ning and Binbin Jin and Xin Li and Qi Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=NPKZF1WDjZ}\n}", "github": "", "reviewers": "vfU8;ANC9;Zsa1;WGVX;dFYv", "pdf_size": 1071317, "rating": "6;7;7;7;7", "confidence": "4;4;3;5;4", "soundness": "3;4;3;3;3", "novelty": "2;3;3;3;3", "presentation": "3;3;3;4;3", "wc_summary": "65;76;62;63;97", "wc_strengths": "77;71;25;98;94", "wc_weaknesses": "89;131;477;94;111", "wc_questions": "69;7;57;95;74", "wc_limitations": "10;4;1;45;26", "wc_review": "310;289;622;395;402", "wc_reply_reviewers": "28;18;27;14;18", "wc_reply_authors": "42;42;44;50;43", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 6.8, 0.39999999999999997 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 72.6, 13.184839779079608 ], "wc_strengths_avg": [ 73.0, 26.038433132583073 ], "wc_weaknesses_avg": [ 180.4, 149.028319456404 ], "wc_questions_avg": [ 60.4, 29.39115513211415 ], "wc_limitations_avg": [ 17.2, 16.36337373526621 ], "wc_review_avg": [ 403.6, 118.0416875514748 ], "wc_reply_reviewers_avg": [ 21.0, 5.513619500836088 ], "wc_reply_authors_avg": [ 44.2, 2.993325909419153 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14134130564452225489&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "ustc.edu.cn;ustc.edu.cn;ustc.edu;ustc.edu.cn;ustc.edu.cn;;ustc.edu.cn;ustc.edu.cn", "author_num": 8, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "University of Science and Technology of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ustc.edu.cn", "aff_unique_abbr": "USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Adaptive Depth Networks with Skippable Sub-Paths", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95440", "id": "NPu7Cdk2f9", "proceeding": "", "pdf": "https://openreview.net/pdf?id=NPu7Cdk2f9", "openreview": "https://openreview.net/forum?id=NPu7Cdk2f9", "poster": "/media/PosterPDFs/NeurIPS%202024/95440.png?t=1733663037.6841395", "project": "", "author_site": "Woochul Kang, HYUNGSEOP LEE", "tldr": "", "abstract": "Predictable adaptation of network depths can be an effective way to control inference latency and meet the resource condition of various devices. However, previous adaptive depth networks do not provide general principles and a formal explanation on why and which layers can be skipped, and, hence, their approaches are hard to be generalized and require long and complex training steps. In this paper, we present a practical approach to adaptive depth networks that is applicable to various networks with minimal training effort. In our approach, every hierarchical residual stage is divided into two sub-paths, and they are trained to acquire different properties through a simple self-distillation strategy. While the first sub-path is essential for hierarchical feature learning, the second one is trained to refine the learned features and minimize performance degradation if it is skipped. Unlike prior adaptive networks, our approach does not train every target sub-network in an iterative manner. At test time, however, we can connect these sub-paths in a combinatorial manner to select sub-networks of various accuracy-efficiency trade-offs from a single network. We provide a formal rationale for why the proposed training method can reduce overall prediction errors while minimizing the impact of skipping sub-paths. We demonstrate the generality and effectiveness of our approach with convolutional neural networks and transformers.", "keywords": "adaptive networks;training efficiency;inference efficiency;efficiency;acceleration;inference acceleration;convolutional neural networks;CNN;Vision transformer;vit;transformer", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Woochul Kang;Hyungseop Lee", "authorids": "~Woochul_Kang1;~Hyungseop_Lee1", "gender": "M;M", "homepage": "https://sites.google.com/site/woochulkang/;", "dblp": "20/3530;", "google_scholar": "pgo9aYAAAAAJ;https://scholar.google.com/citations?hl=ko", "orcid": "0000-0002-4757-8999;0009-0004-7117-7580", "linkedin": ";", "or_profile": "~Woochul_Kang1;~Hyungseop_Lee1", "aff": "Incheon National University, South Korea;Incheon National University", "aff_domain": "inu.ac.kr;inu.ac.kr", "position": "Full Professor;Undergrad student", "bibtex": "@inproceedings{\nkang2024adaptive,\ntitle={Adaptive Depth Networks with Skippable Sub-Paths},\nauthor={Woochul Kang and Hyungseop Lee},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=NPu7Cdk2f9}\n}", "github": "", "reviewers": "RAbE;phTU;dirV;gteR", "pdf_size": 880443, "rating": "5;5;5;7", "confidence": "2;3;4;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;4;3", "wc_summary": "92;47;47;139", "wc_strengths": "48;44;64;226", "wc_weaknesses": "48;148;135;94", "wc_questions": "8;8;13;53", "wc_limitations": "4;1;9;43", "wc_review": "200;248;268;555", "wc_reply_reviewers": "23;0;254;69", "wc_reply_authors": "10;0;462;195", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;2;2", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 81.25, 38.06819538670043 ], "wc_strengths_avg": [ 95.5, 75.71492587330454 ], "wc_weaknesses_avg": [ 106.25, 39.092038831455184 ], "wc_questions_avg": [ 20.5, 18.874586088176873 ], "wc_limitations_avg": [ 14.25, 16.843025262701474 ], "wc_review_avg": [ 317.75, 139.18759822627877 ], "wc_reply_reviewers_avg": [ 86.5, 99.8461316226122 ], "wc_reply_authors_avg": [ 166.75, 187.31440814843901 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11518405858353487783&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "inu.ac.kr;inu.ac.kr", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Incheon National University", "aff_unique_dep": "", "aff_unique_url": "https://www.inu.ac.kr", "aff_unique_abbr": "INU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "title": "Robustly overfitting latents for flexible neural image compression", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95439", "id": "NQB9myZksw", "proceeding": "", "pdf": "https://openreview.net/pdf?id=NQB9myZksw", "openreview": "https://openreview.net/forum?id=NQB9myZksw", "poster": "/media/PosterPDFs/NeurIPS%202024/95439.png?t=1730812584.0941174", "project": "", "author_site": "Yura Perugachi Diaz, Arwin Gansekoele, Sandjai Bhulai", "tldr": "", "abstract": "Neural image compression has made a great deal of progress. State-of-the-art models are based on variational autoencoders and are outperforming classical models. Neural compression models learn to encode an image into a quantized latent representation that can be efficiently sent to the decoder, which decodes the quantized latent into a reconstructed image. While these models have proven successful in practice, they lead to sub-optimal results due to imperfect optimization and limitations in the encoder and decoder capacity. Recent work shows how to use stochastic Gumbel annealing (SGA) to refine the latents of pre-trained neural image compression models. \nWe extend this idea by introducing SGA+, which contains three different methods that build upon SGA.\nWe show how our method improves the overall compression performance in terms of the R-D trade-off, compared to its predecessors. Additionally, we show how refinement of the latents with our best-performing method improves the compression performance on both the Tecnick and CLIC dataset. Our method is deployed for a pre-trained hyperprior and for a more flexible model.\nFurther, we give a detailed analysis of our proposed methods and show that they are less sensitive to hyperparameter choices. Finally, we show how each method can be extended to three- instead of two-class rounding.", "keywords": "neural image compression;latent optimization", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Yura Perugachi-Diaz;Arwin Gansekoele;Sandjai Bhulai", "authorids": "~Yura_Perugachi-Diaz1;~Arwin_Gansekoele1;~Sandjai_Bhulai1", "gender": "F;M;M", "homepage": ";;https://math.vu.nl/~sbhulai/", "dblp": ";;", "google_scholar": "d1kNj-wAAAAJ;3dY54lQAAAAJ;https://scholar.google.nl/citations?user=IiZ4tkIAAAAJ", "orcid": ";;0000-0003-1124-8821", "linkedin": ";;sbhulai/", "or_profile": "~Yura_Perugachi-Diaz1;~Arwin_Gansekoele1;~Sandjai_Bhulai1", "aff": ";Centrum voor Wiskunde en Informatica;Vrije Universiteit Amsterdam", "aff_domain": ";cwi.nl;vu.nl", "position": ";PhD student;Full Professor", "bibtex": "@inproceedings{\nperugachi-diaz2024robustly,\ntitle={Robustly overfitting latents for flexible neural image compression},\nauthor={Yura Perugachi-Diaz and Arwin Gansekoele and Sandjai Bhulai},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=NQB9myZksw}\n}", "github": "", "reviewers": "Vgvs;N6DE;5M8z", "pdf_size": 2413261, "rating": "5;6;7", "confidence": "4;4;3", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "2;3;4", "wc_summary": "224;86;96", "wc_strengths": "9;74;54", "wc_weaknesses": "316;188;135", "wc_questions": "1;164;69", "wc_limitations": "1;4;60", "wc_review": "551;516;414", "wc_reply_reviewers": "209;0;0", "wc_reply_authors": "150;0;0", "reply_reviewers": "1;0;0", "reply_authors": "2;1;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 135.33333333333334, 62.82957548444487 ], "wc_strengths_avg": [ 45.666666666666664, 27.182510717166817 ], "wc_weaknesses_avg": [ 213.0, 75.97806701059633 ], "wc_questions_avg": [ 78.0, 66.8480864847055 ], "wc_limitations_avg": [ 21.666666666666668, 27.13341523329163 ], "wc_review_avg": [ 493.6666666666667, 58.116740751314985 ], "wc_reply_reviewers_avg": [ 69.66666666666667, 98.52354484532562 ], "wc_reply_authors_avg": [ 50.0, 70.71067811865476 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6592589034225790080&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 3, "email": ";cwi.nl;vu.nl", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Centrum voor Wiskunde en Informatica;Vrije Universiteit Amsterdam", "aff_unique_dep": ";", "aff_unique_url": "https://www.cwi.nl/;https://www.vu.nl", "aff_unique_abbr": "CWI;VU Amsterdam", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Netherlands" }, { "title": "Harmonizing Stochasticity and Determinism: Scene-responsive Diverse Human Motion Prediction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95438", "id": "NQCkNM6TES", "proceeding": "", "pdf": "https://openreview.net/pdf?id=NQCkNM6TES", "openreview": "https://openreview.net/forum?id=NQCkNM6TES", "poster": "/media/PosterPDFs/NeurIPS%202024/95438.png?t=1731169558.636808", "project": "", "author_site": "Zhenyu Lou, Qiongjie Cui, Tuo Wang, Zhenbo Song, Luoming Zhang, Cheng Cheng, Haofan Wang, Xu Tang, Huaxia Li, Hong Zhou", "tldr": "", "abstract": "Diverse human motion prediction (HMP) is a fundamental application in computer vision that has recently attracted considerable interest. Prior methods primarily focus on the stochastic nature of human motion, while neglecting the specific impact of external environment, leading to the pronounced artifacts in prediction when applied to real-world scenarios. To fill this gap, this work introduces a novel task: predicting diverse human motion within real-world 3D scenes. In contrast to prior works, it requires harmonizing the deterministic constraints imposed by the surrounding 3D scenes with the stochastic aspect of human motion. For this purpose, we propose DiMoP3D, a diverse motion prediction framework with 3D scene awareness, which leverages the 3D point cloud and observed sequence to generate diverse and high-fidelity predictions. DiMoP3D is able to comprehend the 3D scene, and determines the probable target objects and their desired interactive pose based on the historical motion. Then, it plans the obstacle-free trajectory towards these interested objects, and generates diverse and physically-consistent future motions. On top of that, DiMoP3D identifies deterministic factors in the scene and integrates them into the stochastic modeling, making the diverse HMP in realistic scenes become a controllable stochastic generation process. On two real-captured benchmarks, DiMoP3D has demonstrated significant improvements over state-of-the-art methods, showcasing its effectiveness in generating diverse and physically-consistent motion predictions within real-world 3D environments.", "keywords": "Human Motion Prediction;Diverse Motion Prediction;3D Scene-aware;Multimodal", "primary_area": "machine_vision", "supplementary_material": "/attachment/720223a75b5a416273d01aeec7a6087e6635be06.zip", "author": "Zhenyu Lou;Qiongjie Cui;Tuo Wang;Zhenbo Song;Luoming Zhang;Cheng Cheng;Haofan Wang;Xu Tang;Huaxia Li;Hong Zhou", "authorids": "~Zhenyu_Lou1;~Qiongjie_Cui1;~Tuo_Wang3;~Zhenbo_Song1;~Luoming_Zhang1;~Cheng_Cheng12;~Haofan_Wang1;~Xu_Tang1;~Huaxia_Li1;~Hong_Zhou5", "gender": "M;M;M;M;M;F;M;M;M;M", "homepage": ";;;;;;https://haofanwang.github.io/;https://tangxuvis.github.io/;;https://person.zju.edu.cn/zhouhong", "dblp": "333/1095;232/2538.html;;267/1972;125/0980;;234/7841.html;;233/2259.html;", "google_scholar": ";VoPGwJQAAAAJ;Thdi24IAAAAJ;9KXd7qQAAAAJ;;1F_HI-AAAAAJ;https://scholar.google.com/citations?hl=en;grP24aAAAAAJ;https://scholar.google.com/citations?hl=en;", "orcid": "0000-0002-1165-3301;;;0000-0002-5020-4277;0000-0003-2188-4485;0000-0001-6962-6923;;;;", "linkedin": ";;tuo-wang-1008771a3/;;;;;;;", "or_profile": "~Zhenyu_Lou1;~Qiongjie_Cui1;~Tuo_Wang3;~Zhenbo_Song1;~Luoming_Zhang1;~Cheng_Cheng12;~Haofan_Wang1;~Xu_Tang1;~Huaxia_Li1;~Hong_Zhou5", "aff": "Zhejiang University;Singapore University of Technology and Design;University of Texas at Austin;Nanjing University of Science and Technology;Zhejiang University;Concordia University;Xiaohongshu;Xiaohongshu;Xiaohongshu;Zhejiang University", "aff_domain": "zju.edu.cn;sutd.edu.sg;utexas.edu;njust.edu.cn;zju.edu.cn;concordia.ca;xiaohongshu.com;xiaohongshu.com;xiaohongshu.com;zju.edu.cn", "position": "PhD student;Researcher;PhD student;Postdoc;PhD student;PhD student;Researcher;Researcher;Researcher;Full Professor", "bibtex": "@inproceedings{\nlou2024harmonizing,\ntitle={Harmonizing Stochasticity and Determinism: Scene-responsive Diverse Human Motion Prediction},\nauthor={Zhenyu Lou and Qiongjie Cui and Tuo Wang and Zhenbo Song and Luoming Zhang and Cheng Cheng and Haofan Wang and Xu Tang and Huaxia Li and Hong Zhou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=NQCkNM6TES}\n}", "github": "", "reviewers": "wnWw;nnKQ;p6AA;Q59f", "pdf_size": 21958762, "rating": "3;4;6;8", "confidence": "3;5;3;3", "soundness": "2;2;3;4", "novelty": "2;3;3;4", "presentation": "2;2;3;4", "wc_summary": "40;91;78;145", "wc_strengths": "23;13;194;91", "wc_weaknesses": "140;258;66;67", "wc_questions": "3;38;78;14", "wc_limitations": "13;5;17;8", "wc_review": "219;405;433;325", "wc_reply_reviewers": "131;799;178;16", "wc_reply_authors": "600;1409;78;13", "reply_reviewers": "1;3;1;1", "reply_authors": "4;4;2;2", "rating_avg": [ 5.25, 1.920286436967152 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 88.5, 37.61980861195336 ], "wc_strengths_avg": [ 80.25, 72.20586887504366 ], "wc_weaknesses_avg": [ 132.75, 78.29232082394799 ], "wc_questions_avg": [ 33.25, 28.769558564566125 ], "wc_limitations_avg": [ 10.75, 4.602988159880492 ], "wc_review_avg": [ 345.5, 83.0948253503189 ], "wc_reply_reviewers_avg": [ 281.0, 304.8187986328927 ], "wc_reply_authors_avg": [ 525.0, 558.8009484601829 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 3.0, 1.0 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.3758230140014144, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6455545327713847956&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "email": "zju.edu.cn;sutd.edu.sg;utexas.edu;njust.edu.cn;zju.edu.cn;concordia.ca;xiaohongshu.com;xiaohongshu.com;xiaohongshu.com;zju.edu.cn", "author_num": 10, "aff_unique_index": "0;1;2;3;0;4;5;5;5;0", "aff_unique_norm": "Zhejiang University;Singapore University of Technology and Design;University of Texas at Austin;Nanjing University of Science and Technology;Concordia University;Xiaohongshu", "aff_unique_dep": ";;;;;", "aff_unique_url": "https://www.zju.edu.cn;https://www.sutd.edu.sg;https://www.utexas.edu;http://www.nust.edu.cn/;https://www.concordia.ca;https://www.xiaohongshu.com", "aff_unique_abbr": "ZJU;SUTD;UT Austin;NUST;Concordia;XHS", "aff_campus_unique_index": "1", "aff_campus_unique": ";Austin", "aff_country_unique_index": "0;1;2;0;0;3;0;0;0;0", "aff_country_unique": "China;Singapore;United States;Canada" }, { "title": "NewTerm: Benchmarking Real-Time New Terms for Large Language Models with Annual Updates", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97724", "id": "NQLZoMHm6u", "proceeding": "", "pdf": "https://openreview.net/pdf?id=NQLZoMHm6u", "openreview": "https://openreview.net/forum?id=NQLZoMHm6u", "poster": "/media/PosterPDFs/NeurIPS%202024/97724.png?t=1730540004.921088", "project": "", "author_site": "Hexuan Deng, Wenxiang Jiao, Xuebo Liu, Min Zhang, Zhaopeng Tu", "tldr": "", "abstract": "Despite their remarkable abilities in various tasks, large language models (LLMs) still struggle with real-time information (e.g., new facts and terms) due to the knowledge cutoff in their development process. However, existing benchmarks focus on outdated content and limited fields, facing difficulties in real-time updating and leaving new terms unexplored. To address this problem, we propose an adaptive benchmark, NewTerm, for real-time evaluation of new terms. We design a highly automated construction method to ensure high-quality benchmark construction with minimal human effort, allowing flexible updates for real-time information. Empirical results on various LLMs demonstrate over 20% performance reduction caused by new terms. Additionally, while updates to the knowledge cutoff of LLMs can cover some of the new terms, they are unable to generalize to more distant new terms. We also analyze which types of terms are more challenging and why LLMs struggle with new terms, paving the way for future research. Finally, we construct NewTerm 2022 and 2023 to evaluate the new terms updated each year and will continue updating annually. The benchmark and codes can be found at https://anonymous.4open.science/r/NewTerms.", "keywords": "new term;benchmark;real-time evaluation;large language model;annual update", "primary_area": "", "supplementary_material": "/attachment/4b6b22235060727433a626ad61d945766d280074.pdf", "author": "Hexuan Deng;Wenxiang Jiao;Xuebo Liu;Min Zhang;Zhaopeng Tu", "authorids": "~Hexuan_Deng1;~Wenxiang_Jiao1;~Xuebo_Liu1;~Min_Zhang9;~Zhaopeng_Tu1", "gender": "M;M;M;M;M", "homepage": "https://hexuandeng.github.io/;https://wxjiao.github.io/;https://sunbowliu.github.io/;https://zhangmin-nlp-ai.github.io/;http://www.zptu.net", "dblp": "335/1449;239/4883;166/0029-2;83/5342-5;71/9281", "google_scholar": "https://scholar.google.com.hk/citations?user=vGS3kkQAAAAJ;CvtODukAAAAJ;XkDl9aoAAAAJ;https://scholar.google.com/citations?hl=zh-CN;IvE2zRgAAAAJ", "orcid": "0009-0006-5513-928X;;;;", "linkedin": "hexuan-deng-693a99227/;;xuebo-liu-47877b195/;;tuzhaopeng", "or_profile": "~Hexuan_Deng1;~Wenxiang_Jiao1;~Xuebo_Liu1;~Min_Zhang9;~Zhaopeng_Tu1", "aff": "Harbin Institute of Technology, Shenzhen;Tencent AI Lab;Harbin Institute of Technolgy, Shenzhen;Harbin Institute of Technology, Shenzhen;Tencent AI Lab", "aff_domain": "stu.hit.edu.cn;tencent.com;hit.edu.cn;hit.edu.cn;tencent.com", "position": "MS student;Researcher;Assistant Professor;Full Professor;Principal Researcher", "bibtex": "@inproceedings{\ndeng2024newterm,\ntitle={NewTerm: Benchmarking Real-Time New Terms for Large Language Models with Annual Updates},\nauthor={Hexuan Deng and Wenxiang Jiao and Xuebo Liu and Min Zhang and Zhaopeng Tu},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=NQLZoMHm6u}\n}", "github": "", "reviewers": "LL5Y;HEp8;7eHY", "pdf_size": 3311171, "rating": "7;7;7", "confidence": "3;4;4", "wc_summary_and_contributions": "58;44;137", "wc_strengths": "266;16;57", "wc_improvement": "254;3;99", "wc_limitations": "8;1;4", "wc_correctness": "16;1;37", "wc_clarity": "16;1;2", "wc_relation_to_prior_work": "7;1;1", "wc_documentation": "25;1;41", "wc_additional_feedback": "1;1;1", "wc_review": "651;69;379", "wc_reply_reviewers": "19;16;0", "wc_reply_authors": "996;1503;706", "reply_reviewers": "1;1;0", "reply_authors": "2;3;1", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 79.66666666666667, 40.94169295745136 ], "wc_strengths_avg": [ 113.0, 109.4745023586162 ], "wc_improvement_avg": [ 118.66666666666667, 103.40964923814195 ], "wc_limitations_avg": [ 4.333333333333333, 2.8674417556808756 ], "wc_correctness_avg": [ 18.0, 14.7648230602334 ], "wc_clarity_avg": [ 6.333333333333333, 6.847546194724712 ], "wc_relation_to_prior_work_avg": [ 3.0, 2.8284271247461903 ], "wc_documentation_avg": [ 22.333333333333332, 16.438437341250605 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 366.3333333333333, 237.76926256819283 ], "wc_reply_reviewers_avg": [ 11.666666666666666, 8.339997335464536 ], "wc_reply_authors_avg": [ 1068.3333333333333, 329.3694312200545 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=498839800514363036&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "stu.hit.edu.cn;tencent.com;hit.edu.cn;hit.edu.cn;tencent.com", "author_num": 5, "aff_unique_index": "0;1;0;0;1", "aff_unique_norm": "Harbin Institute of Technology;Tencent", "aff_unique_dep": ";Tencent AI Lab", "aff_unique_url": "http://en.hhit.edu.cn/;https://ai.tencent.com", "aff_unique_abbr": "HIT;Tencent AI Lab", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Shenzhen;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Dual-Diffusion for Binocular 3D Human Pose Estimation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95437", "id": "NT8Z5NjwxF", "proceeding": "", "pdf": "https://openreview.net/pdf?id=NT8Z5NjwxF", "openreview": "https://openreview.net/forum?id=NT8Z5NjwxF", "poster": "/media/PosterPDFs/NeurIPS%202024/95437.png?t=1730944783.175956", "project": "", "author_site": "Xiaoyue Wan, Zhuo Chen, Bingzhi Duan, Xu Zhao", "tldr": "", "abstract": "Binocular 3D human pose estimation (HPE), reconstructing a 3D pose from 2D poses of two views, offers practical advantages by combining multiview geometry with the convenience of a monocular setup. However, compared to a multiview setup, the reduction in the number of cameras increases uncertainty in 3D reconstruction. To address this issue, we leverage the diffusion model, which has shown success in monocular 3D HPE by recovering 3D poses from noisy data with high uncertainty. Yet, the uncertainty distribution of initial 3D poses remains unknown. Considering that 3D errors stem from 2D errors within geometric constraints, we recognize that the uncertainties of 3D and 2D are integrated in a binocular configuration, with the initial 2D uncertainty being well-defined. Based on this insight, we propose Dual-Diffusion specifically for Binocular 3D HPE, simultaneously denoising the uncertainties in 2D and 3D, and recovering plausible and accurate results. Additionally, we introduce Z-embedding as an additional condition for denoising and implement baseline-width-related pose normalization to enhance the model flexibility for various baseline settings. This is crucial as 3D error influence factors encompass depth and baseline width. Extensive experiments validate the effectiveness of our Dual-Diffusion in 2D refinement and 3D estimation. The code and models are available at https://github.com/sherrywan/Dual-Diffusion.", "keywords": "3D Human Pose Estimation;Binocular Vision;Diffusion Model;Pose Priors", "primary_area": "machine_vision", "supplementary_material": "", "author": "Xiaoyue Wan;Zhuo Chen;Bingzhi Duan;Xu Zhao", "authorids": "~Xiaoyue_Wan1;~Zhuo_Chen12;~Bingzhi_Duan1;~Xu_Zhao1", "gender": "F;M;M;M", "homepage": ";https://chzh9311.github.io;https://automation.sjtu.edu.cn/Xu;https://github.com/BingzhiDuan", "dblp": "203/9576;;;", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;xfjQN98AAAAJ;bsp_RSUAAAAJ;", "orcid": "0000-0001-9810-0891;0000-0003-2988-9387;;", "linkedin": ";;;", "or_profile": "~Xiaoyue_Wan1;~Zhuo_Chen12;~Xu_Zhao1;~BingzhiDuan1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "position": "PhD student;MS student;Full Professor;MS student", "bibtex": "@inproceedings{\nwan2024dualdiffusion,\ntitle={Dual-Diffusion for Binocular 3D Human Pose Estimation},\nauthor={Xiaoyue Wan and Zhuo Chen and Bingzhi Duan and Xu Zhao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=NT8Z5NjwxF}\n}", "github": "", "reviewers": "cXv4;o1bT;cBDy;9xXX", "pdf_size": 12683820, "rating": "5;6;6;7", "confidence": "3;4;5;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;4;3", "wc_summary": "74;76;115;83", "wc_strengths": "12;34;78;58", "wc_weaknesses": "105;176;40;104", "wc_questions": "77;3;54;71", "wc_limitations": "65;4;1;8", "wc_review": "333;293;288;324", "wc_reply_reviewers": "110;18;22;56", "wc_reply_authors": "205;0;86;133", "reply_reviewers": "1;1;1;1", "reply_authors": "3;1;2;3", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 87.0, 16.507574019219177 ], "wc_strengths_avg": [ 45.5, 24.834451876375287 ], "wc_weaknesses_avg": [ 106.25, 48.11639533464659 ], "wc_questions_avg": [ 51.25, 29.106485531578695 ], "wc_limitations_avg": [ 19.5, 26.386549603917523 ], "wc_review_avg": [ 309.5, 19.345542122153102 ], "wc_reply_reviewers_avg": [ 51.5, 36.86122624113311 ], "wc_reply_authors_avg": [ 106.0, 74.44125200451695 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ahocd5SFHQsJ:scholar.google.com/&scioq=Dual-Diffusion+for+Binocular+3D+Human+Pose+Estimation&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Meta-Diffu$B$: A Contextualized Sequence-to-Sequence Text Diffusion Model with Meta-Exploration", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95436", "id": "NTWXVvIXJM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=NTWXVvIXJM", "openreview": "https://openreview.net/forum?id=NTWXVvIXJM", "poster": "/media/PosterPDFs/NeurIPS%202024/95436.png?t=1729485777.3869653", "project": "", "author_site": "Yun-Yen Chuang, Hung-Min Hsu, Kevin Lin, Chen-Sheng Gu, Ling-Zhen Li, Ray-I Chang, Hung-yi Lee", "tldr": "", "abstract": "The diffusion model, a new generative modeling paradigm, has achieved significant success in generating images, audio, video, and text. It has been adapted for sequence-to-sequence text generation (Seq2Seq) through DiffuSeq, termed the S2S-Diffusion model. Existing S2S-Diffusion models predominantly rely on fixed or hand-crafted rules to schedule noise during the diffusion and denoising processes. However, these models are limited by non-contextualized noise, which fails to fully consider the characteristics of Seq2Seq tasks. In this paper, we propose the Meta-Diffu$B$ framework\u2014a novel scheduler-exploiter S2S-Diffusion paradigm designed to overcome the limitations of existing S2S-Diffusion models. We employ Meta-Exploration to train an additional scheduler model dedicated to scheduling contextualized noise for each sentence. Our exploiter model, an S2S-Diffusion model, leverages the noise scheduled by our scheduler model for updating and generation. Meta-Diffu$B$ achieves state-of-the-art performance compared to previous S2S-Diffusion models and fine-tuned pre-trained language models (PLMs) across four Seq2Seq benchmark datasets. We further investigate and visualize the impact of Meta-Diffu$B$'s noise scheduling on the generation of sentences with varying difficulties. Additionally, our scheduler model can function as a \"plug-and-play\" model to enhance DiffuSeq without the need for fine-tuning during the inference stage.", "keywords": "Diffusion Models;Sequence-to-Sequence;Text Generation;Meta-Exploration;Noise Scheduling", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Yunyen Chuang;Hung-Min Hsu;Kevin Lin;Chen-Sheng Gu;Ling Zhen Li;Ray-I Chang;Hung-yi Lee", "authorids": "~Yunyen_Chuang1;~Hung-Min_Hsu1;~Kevin_Lin3;~Chen-Sheng_Gu1;~Ling_Zhen_Li1;~Ray-I_Chang1;~Hung-yi_Lee1", "gender": "M;M;;;;M;Non-Binary", "homepage": "https://www.facebook.com/zhuang.y.yan;;https://sites.google.com/site/kevinlin311tw/;;;http://www.esoe.ntu.edu.tw/en/members/Ray-I-Chang-85025873;https://speech.ee.ntu.edu.tw/~hylee/index.html", "dblp": "https://dblp.org/pers/hd/c/Chuang:Yun=Yen;139/5774;;211/9400.html;;;81/8056", "google_scholar": "5BJ63mUAAAAJ;stEqRQwAAAAJ;https://scholar.google.com.tw/citations?user=LKSy1kwAAAAJ;;https://scholar.google.com.tw/citations?user=wgZXJ_cAAAAJ;https://scholar.google.com.tw/citations?user=YiqWyHkAAAAJ;DxLO11IAAAAJ", "orcid": ";;0000-0001-8944-1336;;;;", "linkedin": "yunyen-chuang-547b51188/;;;;;;", "or_profile": "~Yunyen_Chuang1;~Hung-Min_Hsu1;~Kevin_Lin3;~Chen-Sheng_Gu1;~Ling_Zhen_Li1;~Ray-I_Chang1;~Hung-yi_Lee2", "aff": ";AIWaysion Inc.;Microsoft;National Taiwan University;National Taiwan University;National Taiwan University;National Taiwan University", "aff_domain": ";aiwaysion.com;microsoft.com;ntu.edu.tw;ntu.edu.tw;ntu.edu.tw;ntu.edu.tw", "position": ";Researcher;Principal Researcher;MS student;Researcher;Full Professor;Full Professor", "bibtex": "@inproceedings{\nchuang2024metadiffub,\ntitle={Meta-Diffu\\$B\\$: A Contextualized Sequence-to-Sequence Text Diffusion Model with Meta-Exploration},\nauthor={Yunyen Chuang and Hung-Min Hsu and Kevin Lin and Chen-Sheng Gu and Ling Zhen Li and Ray-I Chang and Hung-yi Lee},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=NTWXVvIXJM}\n}", "github": "", "reviewers": "qgkV;5aL8;YnwD;SoCC", "pdf_size": 527958, "rating": "4;4;6;6", "confidence": "3;3;4;4", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "2;3;3;4", "wc_summary": "116;9;78;77", "wc_strengths": "75;34;89;49", "wc_weaknesses": "274;317;126;173", "wc_questions": "126;36;128;32", "wc_limitations": "39;6;1;40", "wc_review": "630;402;422;371", "wc_reply_reviewers": "29;0;84;0", "wc_reply_authors": "824;549;717;215", "reply_reviewers": "1;0;1;0", "reply_authors": "2;1;2;1", "rating_avg": [ 5.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 70.0, 38.568121551353784 ], "wc_strengths_avg": [ 61.75, 21.510172012329424 ], "wc_weaknesses_avg": [ 222.5, 76.39535326183132 ], "wc_questions_avg": [ 80.5, 46.52687395473717 ], "wc_limitations_avg": [ 21.5, 18.090052515125542 ], "wc_review_avg": [ 456.25, 101.94698377097775 ], "wc_reply_reviewers_avg": [ 28.25, 34.29559009552103 ], "wc_reply_authors_avg": [ 576.25, 230.45322193451756 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:0lWRlVEJp1IJ:scholar.google.com/&scioq=Meta-Diffu%24B%24:+A+Contextualized+Sequence-to-Sequence+Text+Diffusion+Model+with+Meta-Exploration&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": ";aiwaysion.com;microsoft.com;ntu.edu.tw;ntu.edu.tw;ntu.edu.tw;ntu.edu.tw", "author_num": 7, "aff_unique_index": "0;1;2;2;2;2", "aff_unique_norm": "AIWaysion Inc.;Microsoft;National Taiwan University", "aff_unique_dep": ";Microsoft Corporation;", "aff_unique_url": ";https://www.microsoft.com;https://www.ntu.edu.tw", "aff_unique_abbr": ";Microsoft;NTU", "aff_campus_unique_index": "1;1;1;1", "aff_campus_unique": ";Taiwan", "aff_country_unique_index": "0;0;1;1;1;1", "aff_country_unique": "United States;China" }, { "title": "Using Unity to Help Solve Reinforcement Learning", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97723", "id": "NTkUXqDvlg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=NTkUXqDvlg", "openreview": "https://openreview.net/forum?id=NTkUXqDvlg", "poster": "", "project": "", "author_site": "Connor Brennan, Andrew Williams, Omar G. Younis, Vedant Vyas, Daria Yasafova, Irina Rish", "tldr": "", "abstract": "Leveraging the depth and flexibility of XLand as well as the rapid prototyping features of the Unity engine, we present the United Unity Universe \u2014 an open-source toolkit designed to accelerate the creation of innovative reinforcement learning environments. This toolkit includes a robust implementation of XLand 2.0 complemented by a user-friendly interface which allows users to modify the details of procedurally generated terrains and task rules with ease. Additionally, we provide a curated selection of terrains and rule sets, accompanied by implementations of reinforcement learning baselines to facilitate quick experimentation with novel architectural designs for adaptive agents. Furthermore, we illustrate how the United Unity Universe serves as a high-level language that enables researchers to develop diverse and endlessly variable 3D environments within a unified framework. This functionality establishes the United Unity Universe (U3) as an essential tool for advancing the field of reinforcement learning, especially in the development of adaptive and generalizable learning systems.", "keywords": "Meta-learning;Reinforcement Learning;Procedural Generation;Adaptive Agents", "primary_area": "", "supplementary_material": "/attachment/2bd3364e4e002f4195c4d65b4619c36f8a7f9def.pdf", "author": "Connor Brennan;Andrew Robert Williams;Omar G. Younis;Vedant Vyas;Daria Yasafova;Irina Rish", "authorids": "~Connor_Brennan1;~Andrew_Robert_Williams1;~Omar_G._Younis2;~Vedant_Vyas2;~Daria_Yasafova1;~Irina_Rish1", "gender": ";M;F;F;M;M", "homepage": ";;;http://irina-rish.com;https://younis.dev;", "dblp": ";277/1398.html;;;;", "google_scholar": "https://scholar.google.com/citations?hl=en;6gnCkJ0AAAAJ;;Avse5gIAAAAJ;J-bfb4sAAAAJ;", "orcid": ";0000-0003-0178-3652;;;0009-0004-2783-7932;", "linkedin": ";;daria-yasafova/;irina-rish-8b2162;omar-g-younis/;vedant-vyas2501/", "or_profile": "~Connor_Brennan1;~Andrew_Robert_Williams1;~Daria_Yasafova1;~Irina_Rish1;~Omar_Gallal_Younis1;~VEDANT_VYAS1", "aff": "Mila - Quebec Artificial Intelligence Institute;Universit\u00e9 de Montr\u00e9al;;University of Montreal;University of Bologna;University of Alberta", "aff_domain": "mila.quebec;umontreal.ca;;mila.quebec;unibo.it;ualberta.ca", "position": "Postdoc;PhD student;;Professor;MS student;Undergrad student", "bibtex": "@inproceedings{\nbrennan2024using,\ntitle={Using Unity to Help Solve Reinforcement Learning},\nauthor={Connor Brennan and Andrew Robert Williams and Omar G. Younis and Vedant Vyas and Daria Yasafova and Irina Rish},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=NTkUXqDvlg}\n}", "github": "", "reviewers": "Trj1;XKFT;gZM4;2JpX", "pdf_size": 6292361, "rating": "6;6;7;8", "confidence": "4;3;5;4", "wc_summary_and_contributions": "86;64;119;70", "wc_strengths": "92;25;69;2", "wc_improvement": "75;14;82;121", "wc_limitations": "16;19;71;8", "wc_correctness": "37;8;52;1", "wc_clarity": "32;4;262;1", "wc_relation_to_prior_work": "30;29;72;1", "wc_documentation": "6;1;22;8", "wc_additional_feedback": "1;1;1;1", "wc_review": "375;165;750;213", "wc_reply_reviewers": "0;0;31;0", "wc_reply_authors": "0;0;107;0", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;3;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 84.75, 21.34683817336891 ], "wc_strengths_avg": [ 47.0, 35.41892149684968 ], "wc_improvement_avg": [ 73.0, 38.30796261875591 ], "wc_limitations_avg": [ 28.5, 24.86463351831271 ], "wc_correctness_avg": [ 24.5, 20.838665984174707 ], "wc_clarity_avg": [ 74.75, 108.78275368825703 ], "wc_relation_to_prior_work_avg": [ 33.0, 25.347583711273153 ], "wc_documentation_avg": [ 9.25, 7.790218225441442 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 375.75, 229.65558451733762 ], "wc_reply_reviewers_avg": [ 7.75, 13.423393758658799 ], "wc_reply_authors_avg": [ 26.75, 46.332359102467464 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.42640143271122083, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:0Ye_TeHPUawJ:scholar.google.com/&scioq=Using+Unity+to+Help+Solve+Reinforcement+Learning&hl=en&as_sdt=0,23", "gs_version_total": 2, "email": "mila.quebec;umontreal.ca;;mila.quebec;unibo.it;ualberta.ca", "author_num": 6, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "Quebec Artificial Intelligence Institute;Universit\u00e9 de Montr\u00e9al;University of Montreal;University of Bologna;University of Alberta", "aff_unique_dep": "Artificial Intelligence;;;;", "aff_unique_url": "https://mila.quebec;https://www.umontreal.ca;https://wwwumontreal.ca;https://www.unibo.it;https://www.ualberta.ca", "aff_unique_abbr": "Mila;UdeM;UM;Unibo;UAlberta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "Canada;Italy" }, { "title": "Amnesia as a Catalyst for Enhancing Black Box Pixel Attacks in Image Classification and Object Detection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95435", "id": "NTkYSWnVjl", "proceeding": "", "pdf": "https://openreview.net/pdf?id=NTkYSWnVjl", "openreview": "https://openreview.net/forum?id=NTkYSWnVjl", "poster": "/media/PosterPDFs/NeurIPS%202024/95435.png?t=1731846479.5854676", "project": "", "author_site": "Dongsu Song, Daehwa Ko, Jay Hoon Jung", "tldr": "", "abstract": "It is well known that query-based attacks tend to have relatively higher success\nrates in adversarial black-box attacks. While research on black-box attacks is actively\nbeing conducted, relatively few studies have focused on pixel attacks that\ntarget only a limited number of pixels. In image classification, query-based pixel\nattacks often rely on patches, which heavily depend on randomness and neglect\nthe fact that scattered pixels are more suitable for adversarial attacks. Moreover, to\nthe best of our knowledge, query-based pixel attacks have not been explored in the\nfield of object detection. To address these issues, we propose a novel pixel-based\nblack-box attack called Remember and Forget Pixel Attack using Reinforcement\nLearning(RFPAR), consisting of two main components: the Remember and Forget\nprocesses. RFPAR mitigates randomness and avoids patch dependency by\nleveraging rewards generated through a one-step RL algorithm to perturb pixels.\nRFPAR effectively creates perturbed images that minimize the confidence scores\nwhile adhering to limited pixel constraints. Furthermore, we advance our proposed\nattack beyond image classification to object detection, where RFPAR reduces\nthe confidence scores of detected objects to avoid detection. Experiments\non the ImageNet-1K dataset for classification show that RFPAR outperformed\nstate-of-the-art query-based pixel attacks. For object detection, using the MSCOCO\ndataset with YOLOv8 and DDQ, RFPAR demonstrates comparable mAP\nreduction to state-of-the-art query-based attack while requiring fewer query. Further\nexperiments on the Argoverse dataset using YOLOv8 confirm that RFPAR\neffectively removed objects on a larger scale dataset. Our code is available at\nhttps://github.com/KAU-QuantumAILab/RFPAR.", "keywords": "Adversarial attack;Query-based pixel attack;Image classification;Object detection;RL", "primary_area": "machine_vision", "supplementary_material": "/attachment/638f7552e72bfe1a3d54274fc065b2a2f059d13f.zip", "author": "Dongsu Song;Daehwa Ko;Jay Hoon Jung", "authorids": "~Dongsu_Song1;~Daehwa_Ko1;~Jay_Hoon_Jung2", "gender": "M;M;", "homepage": ";;", "dblp": "396/7144;;", "google_scholar": ";;", "orcid": "0009-0006-1443-5736;0009-0009-1037-5598;", "linkedin": ";;", "or_profile": "~Dongsu_Song1;~Daehwa_Ko1;~Jay_Hoon_Jung2", "aff": "Korea Aerospace University;Korea Aerospace University;", "aff_domain": "kau.kr;kau.kr;", "position": "MS student;Undergrad student;", "bibtex": "@inproceedings{\nsong2024amnesia,\ntitle={Amnesia as a Catalyst for Enhancing Black Box Pixel Attacks in Image Classification and Object Detection},\nauthor={Dongsu Song and Daehwa Ko and Jay Hoon Jung},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=NTkYSWnVjl}\n}", "github": "", "reviewers": "R24Y;RX8p;p4SQ", "pdf_size": 23959757, "rating": "5;5;6", "confidence": "4;4;4", "soundness": "4;2;3", "novelty": "3;2;3", "presentation": "3;2;3", "wc_summary": "112;69;74", "wc_strengths": "86;36;25", "wc_weaknesses": "184;57;86", "wc_questions": "57;68;112", "wc_limitations": "76;35;6", "wc_review": "515;265;303", "wc_reply_reviewers": "45;28;13", "wc_reply_authors": "0;624;836", "reply_reviewers": "1;1;1", "reply_authors": "1;2;3", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 85.0, 19.200694431886227 ], "wc_strengths_avg": [ 49.0, 26.54555832275273 ], "wc_weaknesses_avg": [ 109.0, 54.33844556726542 ], "wc_questions_avg": [ 79.0, 23.762715894162152 ], "wc_limitations_avg": [ 39.0, 28.717010057919794 ], "wc_review_avg": [ 361.0, 109.99393922697135 ], "wc_reply_reviewers_avg": [ 28.666666666666668, 13.072447700751718 ], "wc_reply_authors_avg": [ 486.6666666666667, 354.8420618935823 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:46gu0J_wp-cJ:scholar.google.com/&scioq=Amnesia+as+a+Catalyst+for+Enhancing+Black+Box+Pixel+Attacks+in+Image+Classification+and+Object+Detection&hl=en&as_sdt=0,23", "gs_version_total": 3, "email": "kau.kr;kau.kr;", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Korea Aerospace University", "aff_unique_dep": "", "aff_unique_url": "http://www.kau.ac.kr", "aff_unique_abbr": "KAU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "title": "WildGaussians: 3D Gaussian Splatting In the Wild", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95434", "id": "NU3tE3lIqf", "proceeding": "", "pdf": "https://openreview.net/pdf?id=NU3tE3lIqf", "openreview": "https://openreview.net/forum?id=NU3tE3lIqf", "poster": "", "project": "", "author_site": "Jonas Kulhanek, Songyou Peng, Zuzana Kukelova, Marc Pollefeys, Torsten Sattler", "tldr": "", "abstract": "While the field of 3D scene reconstruction is dominated by NeRFs due to their photorealistic quality, 3D Gaussian Splatting (3DGS) has recently emerged, offering similar quality with real-time rendering speeds. However, both methods primarily excel with well-controlled 3D scenes, while in-the-wild data - characterized by occlusions, dynamic objects, and varying illumination - remains challenging. NeRFs can adapt to such conditions easily through per-image embedding vectors, but 3DGS struggles due to its explicit representation and lack of shared parameters. To address this, we introduce WildGaussians, a novel approach to handle occlusions and appearance changes with 3DGS. By leveraging robust DINO features and integrating an appearance modeling module within 3DGS, our method achieves state-of-the-art results. We demonstrate that WildGaussians matches the real-time rendering speed of 3DGS while surpassing both 3DGS and NeRF baselines in handling in-the-wild data, all within a simple architectural framework.", "keywords": "Gaussian Splatting;Novel View Synthesis;3D Scene Reconstruction", "primary_area": "machine_vision", "supplementary_material": "", "author": "Jonas Kulhanek;Songyou Peng;Zuzana Kukelova;Marc Pollefeys;Torsten Sattler", "authorids": "~Jonas_Kulhanek1;~Songyou_Peng1;~Zuzana_Kukelova3;~Marc_Pollefeys2;~Torsten_Sattler1", "gender": "M;M;F;M;M", "homepage": "https://jkulhanek.com;https://pengsongyou.github.io/;http://cmp.felk.cvut.cz/~kukelova;;https://tsattler.github.io/", "dblp": "247/1194;205/2316;17/4583;p/MarcPollefeys;51/9054", "google_scholar": "YDNzfN4AAAAJ;eNypkO0AAAAJ;https://scholar.google.ca/citations?user=M4a3VyYAAAAJ;YYH0BjEAAAAJ;jzx6_ZIAAAAJ", "orcid": "0000-0002-8437-3626;;;;0000-0001-9760-4553", "linkedin": ";;;marc-pollefeys-30a7075/;torsten-sattler-ba2ab0145", "or_profile": "~Jonas_Kulhanek1;~Songyou_Peng1;~Zuzana_Kukelova3;~Marc_Pollefeys2;~Torsten_Sattler1", "aff": "Department of Computer Science, ETHZ - ETH Zurich;ETH Zurich;Czech Technical University in Prague;Swiss Federal Institute of Technology;CIIRC, Czech Technical University in Prague", "aff_domain": "inf.ethz.ch;inf.ethz.ch;cvut.cz;ethz.ch;cvut.cz", "position": "Intern;Senior Researcher;Researcher;Full Professor;Senior Researcher", "bibtex": "@inproceedings{\nkulhanek2024wildgaussians,\ntitle={WildGaussians: 3D Gaussian Splatting In the Wild},\nauthor={Jonas Kulhanek and Songyou Peng and Zuzana Kukelova and Marc Pollefeys and Torsten Sattler},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=NU3tE3lIqf}\n}", "github": "", "reviewers": "nPTW;BAXf;5zzk;bi9Y", "pdf_size": 46394951, "rating": "5;6;6;7", "confidence": "5;5;3;5", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "3;3;3;3", "wc_summary": "176;49;204;98", "wc_strengths": "68;21;130;59", "wc_weaknesses": "345;184;171;98", "wc_questions": "426;39;65;3", "wc_limitations": "15;84;1;1", "wc_review": "1030;377;571;259", "wc_reply_reviewers": "23;54;56;63", "wc_reply_authors": "0;34;32;110", "reply_reviewers": "1;1;1;2", "reply_authors": "1;2;2;3", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 131.75, 61.57261972662849 ], "wc_strengths_avg": [ 69.5, 39.131189606246316 ], "wc_weaknesses_avg": [ 199.5, 90.17344398435716 ], "wc_questions_avg": [ 133.25, 170.44702256126388 ], "wc_limitations_avg": [ 25.25, 34.39749264117953 ], "wc_review_avg": [ 559.25, 293.7297865385804 ], "wc_reply_reviewers_avg": [ 49.0, 15.378556499229699 ], "wc_reply_authors_avg": [ 44.0, 40.422765862815474 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 33, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=501300415878823569&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "inf.ethz.ch;inf.ethz.ch;cvut.cz;ethz.ch;cvut.cz", "author_num": 5, "aff_unique_index": "0;0;1;2;3", "aff_unique_norm": "ETH Zurich;Czech Technical University;Swiss Federal Institute of Technology;Czech Technical University in Prague", "aff_unique_dep": "Department of Computer Science;;;CIIRC", "aff_unique_url": "https://www.ethz.ch;https://www.ctu.cz;https://www.ethz.ch;https://www.ciirc.cvut.cz/", "aff_unique_abbr": "ETHZ;CTU;ETH Zurich;CTU", "aff_campus_unique_index": "0;2;2", "aff_campus_unique": "Zurich;;Prague", "aff_country_unique_index": "0;0;1;0;1", "aff_country_unique": "Switzerland;Czech Republic" }, { "title": "Neural Pose Representation Learning for Generating and Transferring Non-Rigid Object Poses", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95433", "id": "NU54MoKWlA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=NU54MoKWlA", "openreview": "https://openreview.net/forum?id=NU54MoKWlA", "poster": "", "project": "", "author_site": "Seungwoo Yoo, Juil Koo, Kyeongmin Yeo, Minhyuk Sung", "tldr": "", "abstract": "We propose a novel method for learning representations of poses for 3D deformable objects, which specializes in 1) disentangling pose information from the object's identity, 2) facilitating the learning of pose variations, and 3) transferring pose information to other object identities. Based on these properties, our method enables the generation of 3D deformable objects with diversity in both identities and poses, using variations of a single object. It does not require explicit shape parameterization such as skeletons or joints, point-level or shape-level correspondence supervision, or variations of the target object for pose transfer.\nTo achieve pose disentanglement, compactness for generative models, and transferability, we first design the pose extractor to represent the pose as a keypoint-based hybrid representation and the pose applier to learn an implicit deformation field. To better distill pose information from the object's geometry, we propose the implicit pose applier to output an intrinsic mesh property, the face Jacobian. Once the extracted pose information is transferred to the target object, the pose applier is fine-tuned in a self-supervised manner to better describe the target object's shapes with pose variations. The extracted poses are also used to train a cascaded diffusion model to enable the generation of novel poses.\nOur experiments with the DeformThings4D and Human datasets demonstrate state-of-the-art performance in pose transfer and the ability to generate diverse deformed shapes with various objects and poses.", "keywords": "Pose Variation Transfer;Mesh Deformation;Diffusion Models", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Seungwoo Yoo;Juil Koo;Kyeongmin Yeo;Minhyuk Sung", "authorids": "~Seungwoo_Yoo1;~Juil_Koo1;~Kyeongmin_Yeo1;~Minhyuk_Sung1", "gender": "M;M;M;M", "homepage": "https://dvelopery0115.github.io;https://63days.github.io/;;https://mhsung.github.io/", "dblp": "130/0903;309/6316;372/5415;171/6792", "google_scholar": ";https://scholar.google.co.kr/citations?user=avxsz1UAAAAJ;;PcIYMp4AAAAJ", "orcid": ";;;", "linkedin": ";;;mhsung", "or_profile": "~Seungwoo_Yoo1;~Juil_Koo1;~Kyeongmin_Yeo1;~Minhyuk_Sung1", "aff": "Korea Advanced Institute of Science & Technology;Adobe Systems;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;adobe.com;kaist.ac.kr;kaist.ac.kr", "position": "MS student;Intern;Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\nyoo2024neural,\ntitle={Neural Pose Representation Learning for Generating and Transferring Non-Rigid Object Poses},\nauthor={Seungwoo Yoo and Juil Koo and Kyeongmin Yeo and Minhyuk Sung},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=NU54MoKWlA}\n}", "github": "", "reviewers": "RKb2;skND;xBSu", "pdf_size": 50632171, "rating": "5;6;9", "confidence": "4;4;5", "soundness": "3;3;4", "novelty": "3;3;4", "presentation": "2;3;4", "wc_summary": "59;68;155", "wc_strengths": "38;104;49", "wc_weaknesses": "77;96;3", "wc_questions": "83;155;49", "wc_limitations": "4;16;10", "wc_review": "261;439;266", "wc_reply_reviewers": "76;36;72", "wc_reply_authors": "473;0;34", "reply_reviewers": "1;1;1", "reply_authors": "2;1;2", "rating_avg": [ 6.666666666666667, 1.699673171197595 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 94.0, 43.289721643826724 ], "wc_strengths_avg": [ 63.666666666666664, 28.871362204709975 ], "wc_weaknesses_avg": [ 58.666666666666664, 40.119266637808266 ], "wc_questions_avg": [ 95.66666666666667, 44.19150245113747 ], "wc_limitations_avg": [ 10.0, 4.898979485566356 ], "wc_review_avg": [ 322.0, 82.75667143298277 ], "wc_reply_reviewers_avg": [ 61.333333333333336, 17.987650084309387 ], "wc_reply_authors_avg": [ 169.0, 215.40813974097327 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.9707253433941508, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:49XiBcT1euwJ:scholar.google.com/&scioq=Neural+Pose+Representation+Learning+for+Generating+and+Transferring+Non-Rigid+Object+Poses&hl=en&as_sdt=0,10", "gs_version_total": 5, "email": "kaist.ac.kr;adobe.com;kaist.ac.kr;kaist.ac.kr", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Adobe", "aff_unique_dep": ";Adobe Systems Incorporated", "aff_unique_url": "https://www.kaist.ac.kr;https://www.adobe.com", "aff_unique_abbr": "KAIST;Adobe", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "South Korea;United States" }, { "title": "Adaptive and Optimal Second-order Optimistic Methods for Minimax Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95432", "id": "NVDYgEFXCy", "proceeding": "", "pdf": "https://openreview.net/pdf?id=NVDYgEFXCy", "openreview": "https://openreview.net/forum?id=NVDYgEFXCy", "poster": "/media/PosterPDFs/NeurIPS%202024/95432.png?t=1733966827.7264943", "project": "", "author_site": "Ruichen Jiang, Ali Kavis, Qiujiang Jin, Sujay Sanghavi, Aryan Mokhtari", "tldr": "", "abstract": "We propose adaptive, line-search-free second-order methods with optimal rate of convergence for solving convex-concave min-max problems. By means of an adaptive step size, our algorithms feature a simple update rule that requires solving only one linear system per iteration, eliminating the need for line-search or backtracking mechanisms. Specifically, we base our algorithms on the optimistic method and appropriately combine it with second-order information. Moreover, distinct from common adaptive schemes, we define the step size recursively as a function of the gradient norm and the prediction error in the optimistic update. We first analyze a variant where the step size requires knowledge of the Lipschitz constant of the Hessian. Under the additional assumption of Lipschitz continuous gradients, we further design a parameter-free version by tracking the Hessian Lipschitz constant locally and ensuring the iterates remain bounded. We also evaluate the practical performance of our algorithm by comparing it to existing second-order algorithms for minimax optimization.", "keywords": "min-max optimization;second-order methods;adaptive methods;saddle-point problems;variational inequalities", "primary_area": "optimization", "supplementary_material": "/attachment/cae012bc26279f4dbb341fcbbb15f62afb4f6784.zip", "author": "Ruichen Jiang;Ali Kavis;Qiujiang Jin;sujay sanghavi;Aryan Mokhtari", "authorids": "~Ruichen_Jiang1;~Ali_Kavis1;~Qiujiang_Jin1;~sujay_sanghavi1;~Aryan_Mokhtari3", "gender": ";;;M;M", "homepage": "https://ruichen-jiang.github.io/;https://alikavis.github.io;https://qiujiangjin.github.io/;https://sites.utexas.edu/sanghavi;https://sites.utexas.edu/mokhtari/", "dblp": "271/7916;231/7697;;69/4911.html;140/7407", "google_scholar": "BGFt1UMAAAAJ;sPrPq6oAAAAJ;;O-DazBUAAAAJ;glcep6EAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Ruichen_Jiang1;~Ali_Kavis1;~Qiujiang_Jin1;~sujay_sanghavi1;~Aryan_Mokhtari3", "aff": "University of Texas at Austin;University of Texas at Austin;University of Texas, Austin;University of Texas, Austin;University of Texas, Austin", "aff_domain": "utexas.edu;utexas.edu;utexas.edu;utexas.edu;utexas.edu", "position": "PhD student;Postdoc;PhD student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\njiang2024adaptive,\ntitle={Adaptive and Optimal Second-order Optimistic Methods for Minimax Optimization},\nauthor={Ruichen Jiang and Ali Kavis and Qiujiang Jin and sujay sanghavi and Aryan Mokhtari},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=NVDYgEFXCy}\n}", "github": "", "reviewers": "gLyW;w2ph;GGjT;4DVD", "pdf_size": 2273399, "rating": "5;6;7;7", "confidence": "3;2;2;4", "soundness": "3;3;4;4", "novelty": "3;2;4;3", "presentation": "2;3;3;3", "wc_summary": "75;60;83;99", "wc_strengths": "76;45;79;33", "wc_weaknesses": "61;65;42;213", "wc_questions": "101;69;56;115", "wc_limitations": "10;1;36;10", "wc_review": "323;240;296;470", "wc_reply_reviewers": "0;17;17;471", "wc_reply_authors": "0;0;0;726", "reply_reviewers": "0;1;1;2", "reply_authors": "1;1;1;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 79.25, 14.077908225301087 ], "wc_strengths_avg": [ 58.25, 19.74050404624968 ], "wc_weaknesses_avg": [ 95.25, 68.53603067000597 ], "wc_questions_avg": [ 85.25, 23.731571797923543 ], "wc_limitations_avg": [ 14.25, 13.083864108129525 ], "wc_review_avg": [ 332.25, 84.97757057012161 ], "wc_reply_reviewers_avg": [ 126.25, 199.1624650881787 ], "wc_reply_authors_avg": [ 181.5, 314.36722157375124 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0909090909090909, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1641479676299471621&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "utexas.edu;utexas.edu;utexas.edu;utexas.edu;utexas.edu", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Why Warmup the Learning Rate? Underlying Mechanisms and Improvements", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95431", "id": "NVl4SAmz5c", "proceeding": "", "pdf": "https://openreview.net/pdf?id=NVl4SAmz5c", "openreview": "https://openreview.net/forum?id=NVl4SAmz5c", "poster": "", "project": "", "author_site": "Dayal Singh Kalra, Maissam Barkeshli", "tldr": "", "abstract": "In modern deep learning, it is common to warm up the learning rate $\\eta$, often by a linear schedule between $\\eta_{\\text{init}} = 0$ and a predetermined target $\\eta_{\\text{trgt}}$. In this paper, we show through systematic experiments with SGD and Adam that the overwhelming benefit of warmup arises from allowing the network to tolerate larger $\\eta_{\\text{trgt}}$ by forcing the network to more well-conditioned areas of the loss landscape. The ability to handle larger target learning rates in turn makes hyperparameter tuning more robust while improving the final performance of the network. We uncover different regimes of operation during the warmup period, depending on whether the network training starts off in a progressive sharpening or sharpness reduction phase, which in turn depends on the initialization and parameterization. Using these insights, we show how $\\eta_{\\text{init}}$ can be properly chosen by utilizing the loss catapult mechanism, which saves on the number of warmup steps, in some cases completely eliminating the need for warmup. We also suggest an initialization for the variance in Adam, which provides benefits similar to warmup.", "keywords": "Learning rate Warmup;Optimization Dynamics", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Dayal Singh Kalra;Maissam Barkeshli", "authorids": "~Dayal_Singh_Kalra1;~Maissam_Barkeshli1", "gender": "M;", "homepage": ";", "dblp": "341/1483;", "google_scholar": "pqtFNxoAAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Dayal_Singh_Kalra1;~Maissam_Barkeshli1", "aff": "University of Maryland, College Park;", "aff_domain": "umd.edu;", "position": "PhD student;", "bibtex": "@inproceedings{\nkalra2024why,\ntitle={Why Warmup the Learning Rate? Underlying Mechanisms and Improvements},\nauthor={Dayal Singh Kalra and Maissam Barkeshli},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=NVl4SAmz5c}\n}", "github": "", "reviewers": "ZUpa;LAU1;6XDG;kKLj;YVuK", "pdf_size": 4639730, "rating": "5;6;6;7;7", "confidence": "3;4;4;4;4", "soundness": "2;3;3;3;3", "novelty": "2;3;3;3;3", "presentation": "3;3;3;3;3", "wc_summary": "44;40;135;73;31", "wc_strengths": "20;39;131;35;87", "wc_weaknesses": "163;64;497;25;69", "wc_questions": "4;2;456;40;173", "wc_limitations": "5;1;12;1;4", "wc_review": "236;146;1231;174;364", "wc_reply_reviewers": "9;14;590;18;24", "wc_reply_authors": "0;0;1448;0;0", "reply_reviewers": "1;1;2;1;1", "reply_authors": "1;1;5;1;1", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 64.6, 37.90831043452082 ], "wc_strengths_avg": [ 62.4, 41.00536550257783 ], "wc_weaknesses_avg": [ 163.6, 172.7629589929508 ], "wc_questions_avg": [ 135.0, 172.2556240010758 ], "wc_limitations_avg": [ 4.6, 4.029888335921977 ], "wc_review_avg": [ 430.2, 407.37815356250997 ], "wc_reply_reviewers_avg": [ 131.0, 229.55260834937164 ], "wc_reply_authors_avg": [ 289.6, 579.2 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 1.8, 1.6000000000000003 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.8017837257372733, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10017123576447874584&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "umd.edu;", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "University of Maryland", "aff_unique_dep": "", "aff_unique_url": "https://www/umd.edu", "aff_unique_abbr": "UMD", "aff_campus_unique_index": "0", "aff_campus_unique": "College Park", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "MeLLoC: Lossless Compression with High-order Mechanism Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95430", "id": "NWctqX77b3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=NWctqX77b3", "openreview": "https://openreview.net/forum?id=NWctqX77b3", "poster": "/media/PosterPDFs/NeurIPS%202024/95430.png?t=1730320628.8370557", "project": "", "author_site": "Xinyue Luo, Jin Cheng, Yu Chen", "tldr": "", "abstract": "Lossless compression of large-scale scientific floating-point data is critical yet challenging due to the presence of high-order information and noise that arises from model truncation and discretization errors. Existing entropy coding techniques fail to effectively leverage the mechanisms underlying the data generation process. This paper introduces MeLLoC(Mechanism Learning for Lossless Compression), a novel approach that combines high-order mechanism learning with classical encoding to enhance lossless compression for scientific data. The key idea is to treat the data as discrete samples from an underlying physical field described by differential equations and solve an inverse problem to identify the governing equation coefficients exhibiting more compressible numeric representations. Periodic extension techniques are employed to accelerate the decompression. Through extensive experiments on various scientific datasets, MeLLoC consistently outperforms state-of-the-art lossless compressors while offering compelling trade-offs between compression ratios and computational costs. This work opens up new avenues for exploiting domain knowledge and high-order information to improve data compression in scientific computing.", "keywords": "lossless compression;mechanism learning;inverse problem", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Xinyue Luo;Jin Cheng;Yu Chen", "authorids": "~Xinyue_Luo1;~Jin_Cheng2;~Yu_Chen40", "gender": "F;;M", "homepage": ";;", "dblp": ";;", "google_scholar": "https://scholar.google.com/citations?hl=en;;", "orcid": ";0000-0001-9983-8230;0009-0007-8863-003X", "linkedin": ";;", "or_profile": "~Xinyue_Luo1;~Jin_Cheng2;~Yu_Chen40", "aff": "The Fields Institute;Fudan University;Shanghai University of Finance and Economics", "aff_domain": "fields.utoronto.ca;fudan.edu.cn;sufe.edu.cn", "position": "PhD student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nluo2024melloc,\ntitle={Me{LL}oC: Lossless Compression with High-order Mechanism Learning},\nauthor={Xinyue Luo and Jin Cheng and Yu Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=NWctqX77b3}\n}", "github": "", "reviewers": "XGH7;VS2J;5CqK", "pdf_size": 1298999, "rating": "3;4;8", "confidence": "4;3;4", "soundness": "2;2;3", "novelty": "1;3;3", "presentation": "2;2;3", "wc_summary": "82;67;64", "wc_strengths": "16;54;67", "wc_weaknesses": "183;107;68", "wc_questions": "68;90;66", "wc_limitations": "6;1;79", "wc_review": "355;319;344", "wc_reply_reviewers": "140;13;35", "wc_reply_authors": "170;0;0", "reply_reviewers": "1;1;1", "reply_authors": "2;1;1", "rating_avg": [ 5.0, 2.160246899469287 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.9428090415820634 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 71.0, 7.874007874011811 ], "wc_strengths_avg": [ 45.666666666666664, 21.63844315615664 ], "wc_weaknesses_avg": [ 119.33333333333333, 47.75167245471327 ], "wc_questions_avg": [ 74.66666666666667, 10.873004286866728 ], "wc_limitations_avg": [ 28.666666666666668, 35.64952859280034 ], "wc_review_avg": [ 339.3333333333333, 15.062831370260005 ], "wc_reply_reviewers_avg": [ 62.666666666666664, 55.415601493522466 ], "wc_reply_authors_avg": [ 56.666666666666664, 80.13876853447539 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3273268353539885, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ihfmWB6Zt3QJ:scholar.google.com/&scioq=MeLLoC:+Lossless+Compression+with+High-order+Mechanism+Learning&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "fields.utoronto.ca;fudan.edu.cn;sufe.edu.cn", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Fields Institute;Fudan University;Shanghai University of Finance and Economics", "aff_unique_dep": ";;", "aff_unique_url": "https://fields.utoronto.ca;https://www.fudan.edu.cn;http://www.sufe.edu.cn", "aff_unique_abbr": "Fields Inst.;Fudan;SUFE", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Canada;China" }, { "title": "A SARS-CoV-2 Interaction Dataset and VHH Sequence Corpus for Antibody Language Models", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97722", "id": "Na2gnQFkn8", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Na2gnQFkn8", "openreview": "https://openreview.net/forum?id=Na2gnQFkn8", "poster": "/media/PosterPDFs/NeurIPS%202024/97722.png?t=1731024452.1033678", "project": "", "author_site": "Hirofumi Tsuruta, Hiroyuki Yamazaki, Ryota Maeda, Ryotaro Tamura, Akihiro Imura", "tldr": "", "abstract": "Antibodies are crucial proteins produced by the immune system to eliminate harmful foreign substances and have become pivotal therapeutic agents for treating human diseases.\nTo accelerate the discovery of antibody therapeutics, there is growing interest in constructing language models using antibody sequences.\nHowever, the applicability of pre-trained language models for antibody discovery has not been thoroughly evaluated due to the scarcity of labeled datasets.\nTo overcome these limitations, we introduce AVIDa-SARS-CoV-2, a dataset featuring the antigen-variable domain of heavy chain of heavy chain antibody (VHH) interactions obtained from two alpacas immunized with severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) spike proteins.\nAVIDa-SARS-CoV-2 includes binary labels indicating the binding or non-binding of diverse VHH sequences to 12 SARS-CoV-2 mutants, such as the Delta and Omicron variants.\nFurthermore, we release VHHCorpus-2M, a pre-training dataset for antibody language models, containing over two million VHH sequences.\nWe report benchmark results for predicting SARS-CoV-2-VHH binding using VHHBERT pre-trained on VHHCorpus-2M and existing general protein and antibody-specific pre-trained language models.\nThese results confirm that AVIDa-SARS-CoV-2 provides valuable benchmarks for evaluating the representation capabilities of antibody language models for binding prediction, thereby facilitating the development of AI-driven antibody discovery.\nThe datasets are available at https://datasets.cognanous.com.", "keywords": "antibody;protein;SARS-CoV-2;COVID-19;drug discovery;machine learning;language model", "primary_area": "", "supplementary_material": "/attachment/e890e3143eead80303f0b65d7c99f49bea8236f1.pdf", "author": "Hirofumi Tsuruta;Hiroyuki Yamazaki;Ryota Maeda;Ryotaro Tamura;Akihiro Imura", "authorids": "~Hirofumi_Tsuruta1;~Hiroyuki_Yamazaki1;~Ryota_Maeda1;~Ryotaro_Tamura1;~Akihiro_Imura1", "gender": "M;M;;M;M", "homepage": ";https://cognanous.com/;https://www.cognano.co.jp;;https://cognanous.com/?lang=en", "dblp": ";;;;", "google_scholar": "https://scholar.google.co.jp/citations?user=t8xmSZcAAAAJ;;;;", "orcid": ";0000-0002-7690-5522;0000-0002-1514-1071;;", "linkedin": ";yamazaki-hiroyuki-481622117?lipi=urn%3Ali%3Apage%3Ad_flagship3_profile_view_base_contact_details%3BCPrSP%2BKcT5mZi03e%2BDuwGg%3D%3D;;ryotaro-tamura-364082117/;akihiro-imura-127599242/", "or_profile": "~Hirofumi_Tsuruta1;~Hiroyuki_Yamazaki1;~Ryota_Maeda1;~Ryotaro_Tamura1;~Akihiro_Imura1", "aff": "SAKURA internet Inc.;Shizuoka City Shizuoka Hospital;COGNANO Inc.;SAKURA internet Inc.;biorhodes, Inc.", "aff_domain": "sakura.ad.jp;shizuokahospital.jp;cognano.co.jp;sakura.ad.jp;biorhodes.co.jp", "position": "Researcher;MD;Researcher;R&D Engineer;Principal Researcher", "bibtex": "@inproceedings{\ntsuruta2024a,\ntitle={A {SARS}-CoV-2 Interaction Dataset and {VHH} Sequence Corpus for Antibody Language Models},\nauthor={Hirofumi Tsuruta and Hiroyuki Yamazaki and Ryota Maeda and Ryotaro Tamura and Akihiro Imura},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=Na2gnQFkn8}\n}", "github": "", "reviewers": "dkKX;SRMp;YXer;DRir", "pdf_size": 4319254, "rating": "6;6;7;8", "confidence": "4;3;4;4", "wc_summary_and_contributions": "60;77;90;119", "wc_strengths": "92;60;83;179", "wc_improvement": "58;22;193;343", "wc_limitations": "32;11;8;84", "wc_correctness": "40;12;21;92", "wc_clarity": "28;6;5;52", "wc_relation_to_prior_work": "35;1;130;50", "wc_documentation": "39;15;4;49", "wc_additional_feedback": "1;1;1;1", "wc_review": "385;205;535;969", "wc_reply_reviewers": "0;0;0;59", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 86.5, 21.569654610122992 ], "wc_strengths_avg": [ 103.5, 45.12482686947397 ], "wc_improvement_avg": [ 154.0, 126.37444361895328 ], "wc_limitations_avg": [ 33.75, 30.44975369358511 ], "wc_correctness_avg": [ 41.25, 30.994959267597046 ], "wc_clarity_avg": [ 22.75, 19.22725929507375 ], "wc_relation_to_prior_work_avg": [ 54.0, 47.33392018415547 ], "wc_documentation_avg": [ 26.75, 18.0329559418305 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 523.5, 282.50088495436614 ], "wc_reply_reviewers_avg": [ 14.75, 25.54774941164094 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3083353098988502467&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "sakura.ad.jp;shizuokahospital.jp;cognano.co.jp;sakura.ad.jp;biorhodes.co.jp", "author_num": 5, "aff_unique_index": "0;1;2;0;3", "aff_unique_norm": "SAKURA internet Inc.;Shizuoka City Shizuoka Hospital;COGNANO Inc.;biorhodes, Inc.", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.sakura.ne.jp;;;", "aff_unique_abbr": ";;;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;1", "aff_country_unique": "Japan;United States" }, { "title": "Towards a theory of how the structure of language is acquired by deep neural networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95429", "id": "NaCXcUKihH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=NaCXcUKihH", "openreview": "https://openreview.net/forum?id=NaCXcUKihH", "poster": "", "project": "", "author_site": "Francesco Cagnetta, Matthieu Wyart", "tldr": "", "abstract": "How much data is required to learn the structure of a language via next-token prediction? We study this question for synthetic datasets generated via a Probabilistic Context-Free Grammar (PCFG)---a hierarchical generative model that captures the tree-like structure of natural languages. We determine token-token correlations analytically in our model and show that they can be used to build a representation of the grammar's hidden variables, the longer the range the deeper the variable. In addition, a finite training set limits the resolution of correlations to an effective range, whose size grows with that of the training set. As a result, a Language Model trained with increasingly many examples can build a deeper representation of the grammar's structure, thus reaching good performance despite the high dimensionality of the problem. We conjecture that the relationship between training set size and effective range of correlations holds beyond our synthetic datasets, and we test it in a collection of lines from Shakespeare's plays. In particular, we show that reducing the input size leads to saturation of the test loss decay at a characteristic training set size that can be predicted in our framework.", "keywords": "Hierarchical Models;Language Models;Learning Theory;Representation Learning;Self-Supervised Learning;Statistical Physics of Learning", "primary_area": "learning_theory", "supplementary_material": "", "author": "Francesco Cagnetta;Matthieu Wyart", "authorids": "~Francesco_Cagnetta1;~Matthieu_Wyart2", "gender": "M;M", "homepage": "https://people.epfl.ch/francesco.cagnetta;http://pcsl.epfl.ch/", "dblp": "296/0251;26/11007", "google_scholar": "https://scholar.google.it/citations?user=Unp0fSQAAAAJ;https://scholar.google.ch/citations?user=1TttZYYAAAAJ", "orcid": "0000-0002-8302-431X;0000-0003-0644-0990", "linkedin": ";", "or_profile": "~Francesco_Cagnetta1;~Matthieu_Wyart2", "aff": "Swiss Federal Institute of Technology Lausanne;EPFL - EPF Lausanne", "aff_domain": "epfl.ch;epfl.ch", "position": "Postdoc;Full Professor", "bibtex": "@inproceedings{\ncagnetta2024towards,\ntitle={Towards a theory of how the structure of language is acquired by deep neural networks},\nauthor={Francesco Cagnetta and Matthieu Wyart},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=NaCXcUKihH}\n}", "github": "", "reviewers": "Y3P4;h6xP;Fszw;qWaK", "pdf_size": 879251, "rating": "6;6;6;7", "confidence": "3;4;4;3", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "2;3;3;3", "wc_summary": "142;117;268;92", "wc_strengths": "112;40;50;158", "wc_weaknesses": "348;82;51;161", "wc_questions": "253;49;40;3", "wc_limitations": "46;4;33;11", "wc_review": "901;292;442;425", "wc_reply_reviewers": "100;0;0;28", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 154.75, 67.73247005683463 ], "wc_strengths_avg": [ 90.0, 47.97916214358062 ], "wc_weaknesses_avg": [ 160.5, 115.44370922661832 ], "wc_questions_avg": [ 86.25, 97.80433272611188 ], "wc_limitations_avg": [ 23.5, 16.830032679706836 ], "wc_review_avg": [ 515.0, 230.3008901415711 ], "wc_reply_reviewers_avg": [ 32.0, 40.890096600521744 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14048187125814944233&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "epfl.ch;epfl.ch", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Swiss Federal Institute of Technology Lausanne;EPFL", "aff_unique_dep": ";", "aff_unique_url": "https://www.epfl.ch;https://www.epfl.ch", "aff_unique_abbr": "EPFL;EPFL", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Lausanne", "aff_country_unique_index": "0;0", "aff_country_unique": "Switzerland" }, { "title": "Diffusion for World Modeling: Visual Details Matter in Atari", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95428", "id": "NadTwTODgC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=NadTwTODgC", "openreview": "https://openreview.net/forum?id=NadTwTODgC", "poster": "/media/PosterPDFs/NeurIPS%202024/95428.png?t=1733572817.8032894", "project": "", "author_site": "Eloi Alonso, Adam Jelley, Vincent Micheli, Anssi Kanervisto, Amos Storkey, Tim Pearce, Fran\u00e7ois Fleuret", "tldr": "", "abstract": "World models constitute a promising approach for training reinforcement learning agents in a safe and sample-efficient manner. Recent world models predominantly operate on sequences of discrete latent variables to model environment dynamics. However, this compression into a compact discrete representation may ignore visual details that are important for reinforcement learning. Concurrently, diffusion models have become a dominant approach for image generation, challenging well-established methods modeling discrete latents. Motivated by this paradigm shift, we introduce DIAMOND (DIffusion As a Model Of eNvironment Dreams), a reinforcement learning agent trained in a diffusion world model. We analyze the key design choices that are required to make diffusion suitable for world modeling, and demonstrate how improved visual details can lead to improved agent performance. DIAMOND achieves a mean human normalized score of 1.46 on the competitive Atari 100k benchmark; a new best for agents trained entirely within a world model. We further demonstrate that DIAMOND's diffusion world model can stand alone as an interactive neural game engine by training on static *Counter-Strike: Global Offensive* gameplay. To foster future research on diffusion for world modeling, we release our code, agents, videos and playable world models at https://diamond-wm.github.io.", "keywords": "World models;diffusion models;reinforcement learning;generative models;Atari", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Eloi Alonso;Adam Jelley;Vincent Micheli;Anssi Kanervisto;Amos Storkey;Tim Pearce;Fran\u00e7ois Fleuret", "authorids": "~Eloi_Alonso1;~Adam_Jelley1;~Vincent_Micheli1;~Anssi_Kanervisto1;~Amos_Storkey1;~Tim_Pearce1;~Fran\u00e7ois_Fleuret2", "gender": "M;M;;M;Not Specified;;M", "homepage": ";https://adamjelley.github.io;;;http://homepages.inf.ed.ac.uk/amos/;;https://fleuret.org/francois/", "dblp": "237/9666;339/0093;;186/7786;;142/9777;90/5265", "google_scholar": "Ya4KugcAAAAJ;39t3yJcAAAAJ;Dx7fMZ4AAAAJ;https://scholar.google.fi/citations?user=iPimqbwAAAAJ;;https://scholar.google.co.uk/citations?user=09k1kdQAAAAJ;https://scholar.google.ch/citations?user=Bj1tRlsAAAAJ", "orcid": ";0000-0002-0052-482X;;0000-0002-7479-4574;;;0000-0001-9457-7393", "linkedin": "eloialonso/;adamjelley/;vincent-micheli/;;;tim-pearce-3b165b69/;francois-fleuret/", "or_profile": "~Eloi_Alonso1;~Adam_Jelley1;~Vincent_Micheli1;~Anssi_Kanervisto1;~Amos_Storkey1;~Tim_Pearce1;~Francois_Fleuret1", "aff": "University of Geneva;University of Edinburgh, University of Edinburgh;University of Geneva, Switzerland;Microsoft;University of Edinburgh;Microsoft Research;University of Geneva", "aff_domain": "unige.ch;ed.ac.uk;unige.ch;microsoft.com;ed.ac.uk;research.microsoft.com;unige.ch", "position": "PhD student;PhD student;PhD student;Researcher;Full Professor;Researcher;Full Professor", "bibtex": "@inproceedings{\nalonso2024diffusion,\ntitle={Diffusion for World Modeling: Visual Details Matter in Atari},\nauthor={Eloi Alonso and Adam Jelley and Vincent Micheli and Anssi Kanervisto and Amos Storkey and Tim Pearce and Fran{\\c{c}}ois Fleuret},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=NadTwTODgC}\n}", "github": "", "reviewers": "6rRD;hqzV;JScL;h6bf", "pdf_size": 4217570, "rating": "4;7;7;7", "confidence": "4;4;4;3", "soundness": "3;4;3;3", "novelty": "2;4;3;3", "presentation": "3;4;4;3", "wc_summary": "79;66;183;69", "wc_strengths": "96;59;97;39", "wc_weaknesses": "199;119;57;137", "wc_questions": "176;2;87;33", "wc_limitations": "9;3;14;6", "wc_review": "559;249;438;284", "wc_reply_reviewers": "0;22;19;12", "wc_reply_authors": "66;74;16;16", "reply_reviewers": "0;1;1;1", "reply_authors": "2;3;2;2", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 99.25, 48.59205181920187 ], "wc_strengths_avg": [ 72.75, 24.78280654001883 ], "wc_weaknesses_avg": [ 128.0, 50.60632371551998 ], "wc_questions_avg": [ 74.5, 66.02461662137843 ], "wc_limitations_avg": [ 8.0, 4.06201920231798 ], "wc_review_avg": [ 382.5, 124.25477858014153 ], "wc_reply_reviewers_avg": [ 13.25, 8.46684711093805 ], "wc_reply_authors_avg": [ 43.0, 27.147743920996454 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 43, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4180418892258866323&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 4, "email": "unige.ch;ed.ac.uk;unige.ch;microsoft.com;ed.ac.uk;research.microsoft.com;unige.ch", "author_num": 7, "aff_unique_index": "0;1;0;2;1;2;0", "aff_unique_norm": "University of Geneva;University of Edinburgh;Microsoft", "aff_unique_dep": ";;Microsoft Corporation", "aff_unique_url": "https://www.unige.ch;https://www.ed.ac.uk;https://www.microsoft.com", "aff_unique_abbr": "UNIGE;Edinburgh;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;2;1;2;0", "aff_country_unique": "Switzerland;United Kingdom;United States" }, { "title": "AID: Attention Interpolation of Text-to-Image Diffusion", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95427", "id": "Nb5xlelV0C", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Nb5xlelV0C", "openreview": "https://openreview.net/forum?id=Nb5xlelV0C", "poster": "/media/PosterPDFs/NeurIPS%202024/95427.png?t=1731431964.047102", "project": "", "author_site": "He Qiyuan, Jinghao Wang, Ziwei Liu, Angela Yao", "tldr": "", "abstract": "Conditional diffusion models can create unseen images in various settings, aiding image interpolation. Interpolation in latent spaces is well-studied, but interpolation with specific conditions like text or image is less understood. Common approaches interpolate linearly in the conditioning space but tend to result in inconsistent images with poor fidelity. This work introduces a novel training-free technique named \\textbf{Attention Interpolation via Diffusion (AID)}. AID has two key contributions: \\textbf{1)} a fused inner/outer interpolated attention layer to boost image consistency and fidelity; and \\textbf{2)} selection of interpolation coefficients via a beta distribution to increase smoothness. Additionally, we present an AID variant called \\textbf{Prompt-guided Attention Interpolation via Diffusion (PAID)}, which \\textbf{3)} treats interpolation as a condition-dependent generative process. Experiments demonstrate that our method achieves greater consistency, smoothness, and efficiency in condition-based interpolation, aligning closely with human preferences. Furthermore, PAID offers substantial benefits for compositional generation, controlled image editing, image morphing and image-controlled generation, all while remaining training-free.", "keywords": "diffusion models;training-free;image interpolation;compositional generation", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Qiyuan He;Jinghao Wang;Ziwei Liu;Angela Yao", "authorids": "~Qiyuan_He2;~Jinghao_Wang2;~Ziwei_Liu1;~Angela_Yao1", "gender": "M;M;M;", "homepage": ";https://wjh.app;https://liuziwei7.github.io/;http://www.angelayao.com", "dblp": "182/7216;;05/6300-2;64/8484", "google_scholar": ";G8ztcD8AAAAJ;https://scholar.google.com.hk/citations?user=lc45xlcAAAAJ;https://scholar.google.ch/citations?user=-LJCZMMAAAAJ", "orcid": ";;;", "linkedin": "%E5%90%AF%E5%85%83-%E4%BD%95-b3a6641ab/?locale=en_US;;;", "or_profile": "~Qiyuan_He2;~Jinghao_Wang2;~Ziwei_Liu1;~Angela_Yao1", "aff": "National University of Singapore;School of Computer Science and Engineering, Nanyang Technological University;Nanyang Technological University;National University of Singapore", "aff_domain": "u.nus.edu;scse.ntu.edu.sg;ntu.edu.sg;nus.edu.sg", "position": "PhD student;MS student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nhe2024aid,\ntitle={{AID}: Attention Interpolation of Text-to-Image Diffusion},\nauthor={Qiyuan He and Jinghao Wang and Ziwei Liu and Angela Yao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Nb5xlelV0C}\n}", "github": "", "reviewers": "frah;oFS1;THPg;oKyj", "pdf_size": 35615039, "rating": "4;4;5;6", "confidence": "4;3;4;3", "soundness": "3;2;2;3", "novelty": "3;2;3;3", "presentation": "3;3;3;3", "wc_summary": "67;112;65;111", "wc_strengths": "34;30;164;95", "wc_weaknesses": "104;54;103;111", "wc_questions": "22;123;9;5", "wc_limitations": "13;6;2;10", "wc_review": "240;325;343;332", "wc_reply_reviewers": "57;0;0;0", "wc_reply_authors": "198;0;0;0", "reply_reviewers": "1;0;0;0", "reply_authors": "2;1;1;1", "rating_avg": [ 4.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 88.75, 22.76373211931646 ], "wc_strengths_avg": [ 80.75, 54.53152757809009 ], "wc_weaknesses_avg": [ 93.0, 22.726636354727024 ], "wc_questions_avg": [ 39.75, 48.47357527560764 ], "wc_limitations_avg": [ 7.75, 4.14578098794425 ], "wc_review_avg": [ 310.0, 40.92065493122025 ], "wc_reply_reviewers_avg": [ 14.25, 24.681724007856502 ], "wc_reply_authors_avg": [ 49.5, 85.73651497465943 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9586403221746930828&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "u.nus.edu;scse.ntu.edu.sg;ntu.edu.sg;nus.edu.sg", "author_num": 4, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "National University of Singapore;Nanyang Technological University", "aff_unique_dep": ";School of Computer Science and Engineering", "aff_unique_url": "https://www.nus.edu.sg;https://www.ntu.edu.sg", "aff_unique_abbr": "NUS;NTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Singapore" }, { "title": "Taming Generative Diffusion Prior for Universal Blind Image Restoration", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95426", "id": "NbFOrcwqbR", "proceeding": "", "pdf": "https://openreview.net/pdf?id=NbFOrcwqbR", "openreview": "https://openreview.net/forum?id=NbFOrcwqbR", "poster": "/media/PosterPDFs/NeurIPS%202024/95426.png?t=1731577233.1080341", "project": "", "author_site": "Siwei Tu, Weidong Yang, Ben Fei", "tldr": "", "abstract": "Diffusion models have been widely utilized for image restoration. However, previous blind image restoration methods still need to assume the type of degradation model while leaving the parameters to be optimized, limiting their real-world applications. Therefore, we aim to tame generative diffusion prior for universal blind image restoration dubbed BIR-D, which utilizes an optimizable convolutional kernel to simulate the degradation model and dynamically update the parameters of the kernel in the diffusion steps, enabling it to achieve blind image restoration results even in various complex situations. Besides, based on mathematical reasoning, we have provided an empirical formula for the chosen of adaptive guidance scale, eliminating the need for a grid search for the optimal parameter. Experimentally, Our BIR-D has demonstrated superior practicality and versatility than off-the-shelf unsupervised methods across various tasks both on real-world and synthetic datasets, qualitatively and quantitatively. BIR-D is able to fulfill multi-guidance blind image restoration. Moreover, BIR-D can also restore images that undergo multiple and complicated degradations, demonstrating the practical applications. The code is available at https://github.com/Tusiwei/BIR-D.", "keywords": "Blind image restoration;Diffusion model;Optimizable degradation model;Adaptive guidance scale", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/0b5823c22a5c069c12ec3d087f31539d7e2d1f91.zip", "author": "Siwei Tu;Weidong Yang;Ben Fei", "authorids": "~Siwei_Tu1;~Weidong_Yang2;~Ben_Fei2", "gender": "M;M;M", "homepage": ";http://www.bigdatalab.fudan.edu.cn;", "dblp": ";;93/866", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;https://scholar.google.com/citations?hl=en;skQROj8AAAAJ", "orcid": ";0000-0002-6473-9272;0000-0002-3219-9996", "linkedin": ";;", "or_profile": "~Siwei_Tu1;~Weidong_Yang2;~Ben_Fei2", "aff": "Fudan University;Fudan University;Fudan University", "aff_domain": "fudan.edu;fudan.edu.cn;fudan.edu.cn", "position": "Researcher;Full Professor;PhD student", "bibtex": "@inproceedings{\ntu2024taming,\ntitle={Taming Generative Diffusion Prior for Universal Blind Image Restoration},\nauthor={Siwei Tu and Weidong Yang and Ben Fei},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=NbFOrcwqbR}\n}", "github": "", "reviewers": "rhDr;CUUT;xbfe", "pdf_size": 49101042, "rating": "3;5;7", "confidence": "5;4;4", "soundness": "2;2;3", "novelty": "2;3;3", "presentation": "2;3;3", "wc_summary": "60;79;55", "wc_strengths": "37;108;88", "wc_weaknesses": "174;181;161", "wc_questions": "3;48;2", "wc_limitations": "1;1;6", "wc_review": "275;417;312", "wc_reply_reviewers": "82;16;50", "wc_reply_authors": "675;326;56", "reply_reviewers": "1;1;1", "reply_authors": "4;3;2", "rating_avg": [ 5.0, 1.632993161855452 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 64.66666666666667, 10.338708279513883 ], "wc_strengths_avg": [ 77.66666666666667, 29.892399628593814 ], "wc_weaknesses_avg": [ 172.0, 8.286535263104035 ], "wc_questions_avg": [ 17.666666666666668, 21.452790546272116 ], "wc_limitations_avg": [ 2.6666666666666665, 2.3570226039551585 ], "wc_review_avg": [ 334.6666666666667, 60.14611837480085 ], "wc_reply_reviewers_avg": [ 49.333333333333336, 26.948510575210314 ], "wc_reply_authors_avg": [ 352.3333333333333, 253.39078295961932 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.0, 0.816496580927726 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ey2GtzvXr8sJ:scholar.google.com/&scioq=Taming+Generative+Diffusion+Prior+for+Universal+Blind+Image+Restoration&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "fudan.edu;fudan.edu.cn;fudan.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Fudan University", "aff_unique_dep": "", "aff_unique_url": "https://www.fudan.edu.cn", "aff_unique_abbr": "Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Watch Out for Your Agents! Investigating Backdoor Threats to LLM-Based Agents", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95425", "id": "Nf4MHF1pi5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Nf4MHF1pi5", "openreview": "https://openreview.net/forum?id=Nf4MHF1pi5", "poster": "/media/PosterPDFs/NeurIPS%202024/95425.png?t=1731141628.21531", "project": "", "author_site": "Wenkai Yang, Xiaohan Bi, Yankai Lin, Sishuo Chen, Jie Zhou, Xu Sun", "tldr": "", "abstract": "Driven by the rapid development of Large Language Models (LLMs), LLM-based agents have been developed to handle various real-world applications, including finance, healthcare, and shopping, etc. It is crucial to ensure the reliability and security of LLM-based agents during applications. However, the safety issues of LLM-based agents are currently under-explored. In this work, we take the first step to investigate one of the typical safety threats, backdoor attack, to LLM-based agents. We first formulate a general framework of agent backdoor attacks, then we present a thorough analysis of different forms of agent backdoor attacks. Specifically, compared with traditional backdoor attacks on LLMs that are only able to manipulate the user inputs and model outputs, agent backdoor attacks exhibit more diverse and covert forms: (1) From the perspective of the final attacking outcomes, the agent backdoor attacker can not only choose to manipulate the final output distribution, but also introduce the malicious behavior in an intermediate reasoning step only, while keeping the final output correct. (2) Furthermore, the former category can be divided into two subcategories based on trigger locations, in which the backdoor trigger can either be hidden in the user query or appear in an intermediate observation returned by the external environment. We implement the above variations of agent backdoor attacks on two typical agent tasks including web shopping and tool utilization. Extensive experiments show that LLM-based agents suffer severely from backdoor attacks and such backdoor vulnerability cannot be easily mitigated by current textual backdoor defense algorithms. This indicates an urgent need for further research on the development of targeted defenses against backdoor attacks on LLM-based agents. Warning: This paper may contain biased content.", "keywords": "LLM-based Agents;Backdoor Attack", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/59cf44cc2a4d58ce189157d43da6e36f6e309f2d.zip", "author": "Wenkai Yang;Xiaohan Bi;Yankai Lin;Sishuo Chen;Jie Zhou;Xu Sun", "authorids": "~Wenkai_Yang1;~Xiaohan_Bi1;~Yankai_Lin1;~Sishuo_Chen1;~Jie_Zhou8;~Xu_Sun1", "gender": "M;M;M;M;M;M", "homepage": "https://keven980716.github.io;https://github.com/Flutter01;https://linyankai.github.io/;https://pkucss.github.io/;;https://xusun.org/", "dblp": "250/3934;;161/0001.html;279/6225;00/5012-16;37/1971-1", "google_scholar": "8oNc9ZMAAAAJ;;https://scholar.google.com.hk/citations?user=j8K1FqEAAAAJ;Jn6gAIAAAAAJ;https://scholar.google.com.hk/citations?user=OijxQCMAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0002-9182-8158;;0000-0002-5899-5165;", "linkedin": ";;;;;", "or_profile": "~Wenkai_Yang1;~Xiaohan_Bi1;~Yankai_Lin1;~Sishuo_Chen1;~Jie_Zhou8;~Xu_Sun1", "aff": "Renmin University of China;Peking University;Renmin University of China;Peking University;WeChat AI, Tencent Inc.;Peking University", "aff_domain": "ruc.edu.cn;pku.edu.cn;ruc.edu.cn;pku.edu.cn;tencent.com;pku.edu.cn", "position": "PhD student;MS student;Assistant Professor;MS student;Principal Researcher;Associate Professor", "bibtex": "@inproceedings{\nyang2024watch,\ntitle={Watch Out for Your Agents! Investigating Backdoor Threats to {LLM}-Based Agents},\nauthor={Wenkai Yang and Xiaohan Bi and Yankai Lin and Sishuo Chen and Jie Zhou and Xu Sun},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Nf4MHF1pi5}\n}", "github": "", "reviewers": "jN3U;9U6V;pNki;7KQt", "pdf_size": 1572473, "rating": "5;7;7;7", "confidence": "4;3;5;3", "soundness": "3;3;4;3", "novelty": "2;3;4;3", "presentation": "3;2;4;3", "wc_summary": "48;233;103;81", "wc_strengths": "27;149;91;87", "wc_weaknesses": "109;477;125;67", "wc_questions": "4;132;1;47", "wc_limitations": "4;28;1;17", "wc_review": "192;1019;321;299", "wc_reply_reviewers": "24;334;0;19", "wc_reply_authors": "76;425;0;17", "reply_reviewers": "1;3;0;1", "reply_authors": "3;4;1;2", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 116.25, 70.19036614806906 ], "wc_strengths_avg": [ 88.5, 43.159587579123134 ], "wc_weaknesses_avg": [ 194.5, 164.47112208530712 ], "wc_questions_avg": [ 46.0, 52.88194398847304 ], "wc_limitations_avg": [ 12.5, 10.781929326423912 ], "wc_review_avg": [ 457.75, 327.6914516736743 ], "wc_reply_reviewers_avg": [ 94.25, 138.7090029522237 ], "wc_reply_authors_avg": [ 129.5, 172.92267057849875 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 56, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12910243784934728922&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "ruc.edu.cn;pku.edu.cn;ruc.edu.cn;pku.edu.cn;tencent.com;pku.edu.cn", "author_num": 6, "aff_unique_index": "0;1;0;1;2;1", "aff_unique_norm": "Renmin University of China;Peking University;Tencent", "aff_unique_dep": ";;WeChat AI", "aff_unique_url": "http://www.ruc.edu.cn;http://www.pku.edu.cn;https://www.tencent.com", "aff_unique_abbr": "RUC;Peking U;Tencent", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Non-asymptotic Convergence of Training Transformers for Next-token Prediction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95424", "id": "NfOFbPpYII", "proceeding": "", "pdf": "https://openreview.net/pdf?id=NfOFbPpYII", "openreview": "https://openreview.net/forum?id=NfOFbPpYII", "poster": "/media/PosterPDFs/NeurIPS%202024/95424.png?t=1733516085.1311324", "project": "", "author_site": "Ruiquan Huang, Yingbin Liang, Jing Yang", "tldr": "", "abstract": "Transformers have achieved extraordinary success in modern machine learning due to their excellent ability to handle sequential data, especially in next-token prediction (NTP) tasks. However, the theoretical understanding of their performance in NTP is limited, with existing studies focusing mainly on asymptotic performance. This paper provides a fine-grained non-asymptotic analysis of the training dynamics of a one-layer transformer consisting of a self-attention module followed by a feed-forward layer. We first characterize the essential structural properties of training datasets for NTP using a mathematical framework based on partial orders. \nThen, we design a two-stage training algorithm, where the pre-processing stage for training the feed-forward layer and the main stage for training the attention layer exhibit fast convergence performance. Specifically, both layers converge sub-linearly to the direction of their corresponding max-margin solutions. We also show that the cross-entropy loss enjoys a linear convergence rate. Furthermore, we show that the trained transformer presents non-trivial prediction ability with dataset shift, which sheds light on the remarkable generalization performance of transformers. Our analysis technique involves the development of novel properties on the attention gradient and further in-depth analysis of how these properties contribute to the convergence of the training process. Our experiments further validate our theoretical findings.", "keywords": "Transformer;Convergence Rate;Next-token Prediction;Self-attention;Implicit Bias", "primary_area": "learning_theory", "supplementary_material": "/attachment/01eb1e51f9878f5ecb5e944385c59bef4887d813.zip", "author": "Ruiquan Huang;Yingbin Liang;Jing Yang", "authorids": "~Ruiquan_Huang1;~Yingbin_Liang1;~Jing_Yang3", "gender": "M;F;", "homepage": ";https://sites.google.com/view/yingbinliang/home;http://www.ee.psu.edu/yang", "dblp": "304/8880;51/332;", "google_scholar": "0eo3JGgAAAAJ;lGgLAiIAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;", "linkedin": "ruiquan-huang-369543185/;;", "or_profile": "~Ruiquan_Huang1;~Yingbin_Liang1;~Jing_Yang3", "aff": "Pennsylvania State University;The Ohio State University;Pennsylvania State University", "aff_domain": "psu.edu;osu.edu;psu.edu", "position": "PhD student;Professor;Associate Professor", "bibtex": "@inproceedings{\nhuang2024nonasymptotic,\ntitle={Non-asymptotic Convergence of Training Transformers for Next-token Prediction},\nauthor={Ruiquan Huang and Yingbin Liang and Jing Yang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=NfOFbPpYII}\n}", "github": "", "reviewers": "U8Jg;jsSk;ZtZ3;72WK", "pdf_size": 737445, "rating": "5;6;6;6", "confidence": "3;3;3;4", "soundness": "2;3;3;3", "novelty": "2;3;2;3", "presentation": "3;3;3;3", "wc_summary": "56;98;92;89", "wc_strengths": "25;47;121;45", "wc_weaknesses": "21;65;210;37", "wc_questions": "237;4;34;18", "wc_limitations": "13;36;6;1", "wc_review": "352;250;463;190", "wc_reply_reviewers": "117;20;27;19", "wc_reply_authors": "565;11;77;11", "reply_reviewers": "2;1;1;1", "reply_authors": "4;2;3;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 83.75, 16.345871038277526 ], "wc_strengths_avg": [ 59.5, 36.53423052426313 ], "wc_weaknesses_avg": [ 83.25, 74.85444208595773 ], "wc_questions_avg": [ 73.25, 95.135101303357 ], "wc_limitations_avg": [ 14.0, 13.397761006974262 ], "wc_review_avg": [ 313.75, 103.82286597855021 ], "wc_reply_reviewers_avg": [ 45.75, 41.2515151236897 ], "wc_reply_authors_avg": [ 166.0, 231.93318003252574 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9736895439094763140&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "psu.edu;osu.edu;psu.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Pennsylvania State University;Ohio State University", "aff_unique_dep": ";", "aff_unique_url": "https://www.psu.edu;https://www.osu.edu", "aff_unique_abbr": "PSU;OSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Matrix Denoising with Doubly Heteroscedastic Noise: Fundamental Limits and Optimal Spectral Methods", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95423", "id": "NgyT80IPUK", "proceeding": "", "pdf": "https://openreview.net/pdf?id=NgyT80IPUK", "openreview": "https://openreview.net/forum?id=NgyT80IPUK", "poster": "/media/PosterPDFs/NeurIPS%202024/95423.png?t=1731490804.3149383", "project": "", "author_site": "Yihan Zhang, Marco Mondelli", "tldr": "", "abstract": "We study the matrix denoising problem of estimating the singular vectors of a rank-$1$ signal corrupted by noise with both column and row correlations. Existing works are either unable to pinpoint the exact asymptotic estimation error or, when they do so, the resulting approaches (e.g., based on whitening or singular value shrinkage) remain vastly suboptimal. On top of this, most of the literature has focused on the special case of estimating the left singular vector of the signal when the noise only possesses row correlation (one-sided heteroscedasticity). In contrast, our work establishes the information-theoretic and algorithmic limits of matrix denoising with doubly heteroscedastic noise. We characterize the exact asymptotic minimum mean square error, and design a novel spectral estimator with rigorous optimality guarantees: under a technical condition, it attains positive correlation with the signals whenever information-theoretically possible and, for one-sided heteroscedasticity, it also achieves the Bayes-optimal error. Numerical experiments demonstrate the significant advantage of our theoretically principled method with the state of the art. The proofs draw connections with statistical physics and approximate message passing, departing drastically from standard random matrix theory techniques.", "keywords": "Matrix denoising;heteroscedasticity;spectral methods;approximate message passing;statistical physics", "primary_area": "learning_theory", "supplementary_material": "", "author": "Yihan Zhang;Marco Mondelli", "authorids": "~Yihan_Zhang1;~Marco_Mondelli1", "gender": "M;M", "homepage": "https://sites.google.com/view/yihan/;http://marcomondelli.com", "dblp": "119/9989-1;120/7089", "google_scholar": "https://scholar.google.com.hk/citations?user=JTCngxEAAAAJ;BHdSb5AAAAAJ", "orcid": "0000-0002-6465-6258;", "linkedin": "yihan-zhang-9b413168/?originalSubdomain=hk;", "or_profile": "~Yihan_Zhang1;~Marco_Mondelli1", "aff": "Institute of Science and Technology Austria;Institute of Science and Technology", "aff_domain": "ist.ac.at;ist.ac.at", "position": "Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nzhang2024matrix,\ntitle={Matrix Denoising with Doubly Heteroscedastic Noise: Fundamental Limits and Optimal Spectral Methods},\nauthor={Yihan Zhang and Marco Mondelli},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=NgyT80IPUK}\n}", "github": "", "reviewers": "Fvn7;hDgq;S4fP", "pdf_size": 1117096, "rating": "7;7;8", "confidence": "3;2;1", "soundness": "4;3;4", "novelty": "3;2;4", "presentation": "4;3;4", "wc_summary": "243;69;71", "wc_strengths": "27;102;29", "wc_weaknesses": "85;262;23", "wc_questions": "1;98;30", "wc_limitations": "1;17;1", "wc_review": "357;548;154", "wc_reply_reviewers": "4;36;5", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 7.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 2.0, 0.816496580927726 ], "soundness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 127.66666666666667, 81.55706931686275 ], "wc_strengths_avg": [ 52.666666666666664, 34.89348872720461 ], "wc_weaknesses_avg": [ 123.33333333333333, 101.26642527950165 ], "wc_questions_avg": [ 43.0, 40.65300316909769 ], "wc_limitations_avg": [ 6.333333333333333, 7.542472332656507 ], "wc_review_avg": [ 353.0, 160.87469243689844 ], "wc_reply_reviewers_avg": [ 15.0, 14.854853303438128 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11847128697667506099&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "ist.ac.at;ist.ac.at", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Institute of Science and Technology Austria;Institute of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.ist.ac.at;", "aff_unique_abbr": "IST Austria;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0", "aff_country_unique": "Austria;" }, { "id": "NhP8MTJzT5", "title": "SyntheOcc: Synthesize Geometric-Controlled Street View Images through 3D Semantic MPIs", "track": "main", "status": "Reject", "tldr": "", "abstract": "The advancement of autonomous driving is increasingly reliant on high-quality annotated datasets, especially in the task of 3D occupancy prediction, where the occupancy labels require dense 3D annotation with significant human effort. In this paper, we propose SyntheOcc, which denotes a diffusion model that Synthesize photorealistic and geometric-controlled images by conditioning Occupancy labels in driving scenarios. This yields an unlimited amount of diverse, annotated, and controllable datasets for applications like training perception models and simulation. SyntheOcc addresses the critical challenge of how to efficiently encode 3D geometric information as conditional input to a 2D diffusion model. Our approach innovatively incorporates 3D semantic multi-plane images (MPIs) to provide comprehensive and spatially aligned 3D scene descriptions for conditioning. As a result, SyntheOcc can generate photorealistic multi-view images and videos that faithfully align with the given geometric labels (semantics in 3D voxel space). Extensive qualitative and quantitative evaluations of SyntheOcc on the nuScenes dataset prove its effectiveness in generating controllable occupancy datasets that serve as an effective data augmentation to perception models.", "keywords": "Autonomous Driving;Image Generation;Data-centric AI;3D Vision", "primary_area": "robotics", "supplementary_material": "", "author": "Leheng Li;Weichao Qiu;Yingjie CAI;Xu Yan;Qing LIAN;Bingbing Liu;Ying-Cong Chen", "authorids": "~Leheng_Li1;~Weichao_Qiu1;~Yingjie_CAI3;~Xu_Yan3;~Qing_LIAN3;~Bingbing_Liu2;~Ying-Cong_Chen1", "gender": "M;M;F;M;M;M;M", "homepage": "https://len-li.github.io/;https://weichaoqiu.com;https://yjcaimeow.github.io/;https://yanx27.github.io/;https://www.lianqing11.github.io;;https://www.yingcong.me/", "dblp": "285/6899;126/0918;84/9538;03/4702-14;234/4406;;137/6578", "google_scholar": ";9_AUwFUAAAAJ;Ss1eSnEAAAAJ;;;-rCulKwAAAAJ;https://scholar.google.com.hk/citations?user=n7j4bJUAAAAJ", "orcid": ";;;;;;", "linkedin": ";;;;;;", "or_profile": "~Leheng_Li1;~Weichao_Qiu1;~Yingjie_CAI3;~Xu_Yan3;~Qing_LIAN3;~Bingbing_Liu2;~Ying-Cong_Chen1", "aff": "The Hong Kong University of Science and Technology (Guangzhou);Huawei Technologies Ltd.;Huawei Technologies Ltd.;The Chinese University of Hong Kong;Hong Kong University of Science and Technology;Huawei Technologies Ltd.;Hong Kong University of Science and Technology", "aff_domain": "connect.hkust-gz.edu.cn;huawei.com;huawei.com;link.cuhk.edu.hk;ust.hk;huawei.com;hkust-gz.edu.cn", "position": "PhD student;Researcher;Researcher;PhD student;PhD student;R&D Manager;Assistant Professor", "bibtex": "@misc{\nanonymous2024syntheocc,\ntitle={SyntheOcc: Synthesize Geometric-Controlled Street View Images through 3D Semantic {MPI}s},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=NhP8MTJzT5}\n}", "github": "", "project": "", "reviewers": "PrKG;uu4q;RWNf;tC5t", "site": "https://openreview.net/forum?id=NhP8MTJzT5", "pdf_size": 21709410, "rating": "4;5;6;6", "confidence": "3;5;4;5", "soundness": "2;3;3;3", "novelty": "2;3;2;3", "presentation": "3;3;3;4", "wc_summary": "47;21;95;50", "wc_strengths": "65;53;220;50", "wc_weaknesses": "97;194;194;33", "wc_questions": "52;25;24;126", "wc_limitations": "1;15;116;117", "wc_review": "262;308;649;376", "wc_reply_reviewers": "0;17;42;0", "wc_reply_authors": "69;0;0;0", "reply_reviewers": "0;1;1;0", "reply_authors": "2;1;1;1", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 53.25, 26.61179249881526 ], "wc_strengths_avg": [ 97.0, 71.23552484540281 ], "wc_weaknesses_avg": [ 129.5, 68.35385870600138 ], "wc_questions_avg": [ 56.75, 41.52935708628295 ], "wc_limitations_avg": [ 62.25, 54.47648575302926 ], "wc_review_avg": [ 398.75, 150.06561065080834 ], "wc_reply_reviewers_avg": [ 14.75, 17.195566289017645 ], "wc_reply_authors_avg": [ 17.25, 29.877876430563134 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.6363636363636364, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6047859764836793265&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;1;2;0;1;0", "aff_unique_norm": "Hong Kong University of Science and Technology;Huawei;Chinese University of Hong Kong", "aff_unique_dep": ";Huawei Technologies;", "aff_unique_url": "https://www.ust.hk;https://www.huawei.com;https://www.cuhk.edu.hk", "aff_unique_abbr": "HKUST;Huawei;CUHK", "aff_campus_unique_index": "0;2;2;2", "aff_campus_unique": "Guangzhou;;Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Understanding Visual Feature Reliance through the Lens of Complexity", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95422", "id": "NhqZpst42I", "proceeding": "", "pdf": "https://openreview.net/pdf?id=NhqZpst42I", "openreview": "https://openreview.net/forum?id=NhqZpst42I", "poster": "", "project": "", "author_site": "Thomas Fel, Louis B\u00e9thune, Andrew Lampinen, Thomas Serre, Katherine Hermann", "tldr": "", "abstract": "Recent studies suggest that deep learning models' inductive bias towards favoring simpler features may be an origin of shortcut learning. Yet, there has been limited focus on understanding the complexities of the myriad features that models learn. In this work, we introduce a new metric for quantifying feature complexity, based on V-information and capturing whether a feature requires complex computational transformations to be extracted. Using this V-information metric, we analyze the complexities of 10,000 features\u2014represented as directions in the penultimate layer\u2014that were extracted from a standard ImageNet-trained vision model. Our study addresses four key questions:\n\nFirst, we ask what features look like as a function of complexity, and find a spectrum of simple-to-complex features present within the model. Second, we ask when features are learned during training. We find that simpler features dominate early in training, and more complex features emerge gradually. Third, we investigate where within the network simple and complex features \"flow,\" and find that simpler features tend to bypass the visual hierarchy via residual connections. Fourth, we explore the connection between features' complexity and their importance for driving the network's decision. We find that complex features tend to be less important. Surprisingly, important features become accessible at earlier layers during training, like a \"sedimentation process,\" allowing the model to build upon these foundational elements.", "keywords": "Explainability", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Thomas FEL;Louis B\u00e9thune;Andrew Kyle Lampinen;Thomas Serre;Katherine Hermann", "authorids": "~Thomas_FEL1;~Louis_B\u00e9thune1;~Andrew_Kyle_Lampinen1;~Thomas_Serre1;~Katherine_Hermann1", "gender": "M;M;M;M;F", "homepage": "https://thomasfel.me;https://louis-bethune.fr/;https://github.com/google/BIG-bench;https://serre-lab.clps.brown.edu/;", "dblp": "274/2390;270/0797;https://dblp.uni-trier.de/pers/hd/l/Lampinen:Andrew_K=;;254/1923", "google_scholar": "1m5Mlx4AAAAJ;1zvpCDcAAAAJ;_N44XxAAAAAJ;kZlPW4wAAAAJ;owcAYmEAAAAJ", "orcid": ";0000-0003-1498-8251;;;", "linkedin": ";;;;", "or_profile": "~Thomas_FEL1;~Louis_B\u00e9thune1;~Andrew_Kyle_Lampinen1;~Thomas_Serre1;~Katherine_Hermann1", "aff": "Brown University;Apple ;Google DeepMind;Universit\u00e9 de Toulouse;Google", "aff_domain": "brown.edu;apple.com;google.com;univ-toulouse.fr;google.com", "position": "PhD student;Researcher;Research Scientist;Full Professor;Researcher", "bibtex": "@inproceedings{\nfel2024understanding,\ntitle={Understanding Visual Feature Reliance through the Lens of Complexity},\nauthor={Thomas FEL and Louis B{\\'e}thune and Andrew Kyle Lampinen and Thomas Serre and Katherine Hermann},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=NhqZpst42I}\n}", "github": "", "reviewers": "aiER;5SRh;mfVA;ozGz;d2Wb", "pdf_size": 29781711, "rating": "4;4;6;7;8", "confidence": "3;4;1;4;3", "soundness": "2;2;3;4;4", "novelty": "2;1;2;3;3", "presentation": "3;3;2;2;4", "wc_summary": "133;73;50;112;285", "wc_strengths": "105;44;13;131;93", "wc_weaknesses": "118;390;39;339;375", "wc_questions": "128;198;188;237;12", "wc_limitations": "138;1;8;10;21", "wc_review": "622;706;298;829;786", "wc_reply_reviewers": "0;185;80;88;97", "wc_reply_authors": "0;206;0;46;0", "reply_reviewers": "0;1;1;1;1", "reply_authors": "1;2;1;2;1", "rating_avg": [ 5.8, 1.6 ], "confidence_avg": [ 3.0, 1.0954451150103321 ], "soundness_avg": [ 3.0, 0.8944271909999159 ], "novelty_avg": [ 2.2, 0.7483314773547882 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 130.6, 82.46841819751364 ], "wc_strengths_avg": [ 77.2, 42.75698773300102 ], "wc_weaknesses_avg": [ 252.2, 144.95985651207027 ], "wc_questions_avg": [ 152.6, 78.49993630570664 ], "wc_limitations_avg": [ 35.6, 51.60077518797562 ], "wc_review_avg": [ 648.2, 188.8622778640563 ], "wc_reply_reviewers_avg": [ 90.0, 58.784351659263876 ], "wc_reply_authors_avg": [ 50.4, 79.81378327081106 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.11410886614690963, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11134673860601955359&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "brown.edu;apple.com;google.com;univ-toulouse.fr;google.com", "author_num": 5, "aff_unique_index": "0;1;2;3;2", "aff_unique_norm": "Brown University;Apple;Google;Universit\u00e9 de Toulouse", "aff_unique_dep": ";Apple Inc.;Google DeepMind;", "aff_unique_url": "https://www.brown.edu;https://www.apple.com;https://deepmind.com;https://www.univ-toulouse.fr", "aff_unique_abbr": "Brown;Apple;DeepMind;UT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;1;2;0", "aff_country_unique": "United States;United Kingdom;France" }, { "title": "Single-Loop Stochastic Algorithms for Difference of Max-Structured Weakly Convex Functions", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95421", "id": "NhtBXSNXKA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=NhtBXSNXKA", "openreview": "https://openreview.net/forum?id=NhtBXSNXKA", "poster": "/media/PosterPDFs/NeurIPS%202024/95421.png?t=1732648300.4953544", "project": "", "author_site": "Quanqi Hu, Qi Qi, Zhaosong Lu, Tianbao Yang", "tldr": "", "abstract": "In this paper, we study a class of non-smooth non-convex problems in the form of $\\min_{x}[\\max_{y\\in\\mathcal Y}\\phi(x, y) - \\max_{z\\in\\mathcal Z}\\psi(x, z)]$, where both $\\Phi(x) = \\max_{y\\in\\mathcal Y}\\phi(x, y)$ and $\\Psi(x)=\\max_{z\\in\\mathcal Z}\\psi(x, z)$ are weakly convex functions, and $\\phi(x, y), \\psi(x, z)$ are strongly concave functions in terms of $y$ and $z$, respectively. It covers two families of problems that have been studied but are missing single-loop stochastic algorithms, i.e., difference of weakly convex functions and weakly convex strongly-concave min-max problems. We propose a stochastic Moreau envelope approximate gradient method dubbed SMAG, the first single-loop algorithm for solving these problems, and provide a state-of-the-art non-asymptotic convergence rate. The key idea of the design is to compute an approximate gradient of the Moreau envelopes of $\\Phi, \\Psi$ using only one step of stochastic gradient update of the primal and dual variables. Empirically, we conduct experiments on positive-unlabeled (PU) learning and partial area under ROC curve (pAUC) optimization with an adversarial fairness regularizer to validate the effectiveness of our proposed algorithms.", "keywords": "Stochastic Optimization;DC Program;Min-Max Optimization;Non-smooth Optimization", "primary_area": "optimization", "supplementary_material": "/attachment/b6bb01b058c239583f84271f467bbe82b7d46151.zip", "author": "Quanqi Hu;Qi Qi;Zhaosong Lu;Tianbao Yang", "authorids": "~Quanqi_Hu1;~Qi_Qi3;~Zhaosong_Lu3;~Tianbao_Yang1", "gender": "F;;;M", "homepage": "https://quanqihu.github.io/;;https://zhaosong-lu.github.io/;https://people.tamu.edu/~tianbao-yang/publications.html", "dblp": ";;42/6774.html;56/7047", "google_scholar": "AGEYvcAAAAAJ;;o1uMWLoAAAAJ;https://scholar.google.com.tw/citations?user=BCxFU0EAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Quanqi_Hu1;~Qi_Qi3;~Zhaosong_Lu3;~Tianbao_Yang1", "aff": "Texas A&M University - College Station;;University of Minnesota - Twin Cities;Texas A&M University - College Station", "aff_domain": "tamu.edu;;umn.edu;tamu.edu", "position": "PhD student;;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nhu2024singleloop,\ntitle={Single-Loop Stochastic Algorithms for Difference of Max-Structured Weakly Convex Functions},\nauthor={Quanqi Hu and Qi Qi and Zhaosong Lu and Tianbao Yang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=NhtBXSNXKA}\n}", "github": "", "reviewers": "QoFj;jGsv;gunK;2iRa", "pdf_size": 959863, "rating": "5;5;6;6", "confidence": "1;2;3;3", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "68;71;111;79", "wc_strengths": "1;53;273;58", "wc_weaknesses": "1;314;215;82", "wc_questions": "64;2;94;32", "wc_limitations": "1;2;15;1", "wc_review": "135;442;708;252", "wc_reply_reviewers": "0;30;34;0", "wc_reply_authors": "0;10;16;0", "reply_reviewers": "0;1;1;0", "reply_authors": "1;2;2;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 2.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 82.25, 17.07886120325357 ], "wc_strengths_avg": [ 96.25, 104.45902306646373 ], "wc_weaknesses_avg": [ 153.0, 120.32248335203192 ], "wc_questions_avg": [ 48.0, 34.438350715445125 ], "wc_limitations_avg": [ 4.75, 5.931905258852336 ], "wc_review_avg": [ 384.25, 216.6591505106581 ], "wc_reply_reviewers_avg": [ 16.0, 16.06237840420901 ], "wc_reply_authors_avg": [ 6.5, 6.837397165588672 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.9045340337332909, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4474827812993896243&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "tamu.edu;;umn.edu;tamu.edu", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Texas A&M University;University of Minnesota", "aff_unique_dep": ";", "aff_unique_url": "https://www.tamu.edu;https://www.minnesota.edu", "aff_unique_abbr": "TAMU;UMN", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "College Station;Twin Cities", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Deep Learning Through A Telescoping Lens: A Simple Model Provides Empirical Insights On Grokking, Gradient Boosting & Beyond", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95420", "id": "NhucGZtikE", "proceeding": "", "pdf": "https://openreview.net/pdf?id=NhucGZtikE", "openreview": "https://openreview.net/forum?id=NhucGZtikE", "poster": "", "project": "", "author_site": "Alan Jeffares, Alicia Curth, Mihaela van der Schaar", "tldr": "", "abstract": "Deep learning sometimes appears to work in unexpected ways. In pursuit of a deeper understanding of its surprising behaviors, we investigate the utility of a simple yet accurate model of a trained neural network consisting of a sequence of first-order approximations telescoping out into a single empirically operational tool for practical analysis. Across three case studies, we illustrate how it can be applied to derive new empirical insights on a diverse range of prominent phenomena in the literature -- including double descent, grokking, linear mode connectivity, and the challenges of applying deep learning on tabular data -- highlighting that this model allows us to construct and extract metrics that help predict and understand the a priori unexpected performance of neural networks. We also demonstrate that this model presents a pedagogical formalism allowing us to isolate components of the training process even in complex contemporary settings, providing a lens to reason about the effects of design choices such as architecture & optimization strategy, and reveals surprising parallels between neural network learning and gradient boosting.", "keywords": "Empirical theory;double descent;grokking;linear mode connectivity;gradient boosting", "primary_area": "other", "supplementary_material": "", "author": "Alan Jeffares;Alicia Curth;Mihaela van der Schaar", "authorids": "~Alan_Jeffares1;~Alicia_Curth1;~Mihaela_van_der_Schaar2", "gender": ";F;F", "homepage": "https://alanjeffares.com;;https://www.vanderschaar-lab.com", "dblp": "304/1985;261/8064;", "google_scholar": "e65kJ08AAAAJ;eWRBqsYAAAAJ;DZ3S--MAAAAJ", "orcid": ";;", "linkedin": "alanjeffares;;", "or_profile": "~Alan_Jeffares1;~Alicia_Curth1;~Mihaela_van_der_Schaar2", "aff": "University of Cambridge;University of Cambridge;University of California, Los Angeles", "aff_domain": "cam.ac.uk;cam.ac.uk;ucla.edu", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\njeffares2024deep,\ntitle={Deep Learning Through A Telescoping Lens: A Simple Model Provides Empirical Insights On Grokking, Gradient Boosting \\& Beyond},\nauthor={Alan Jeffares and Alicia Curth and Mihaela van der Schaar},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=NhucGZtikE}\n}", "github": "", "reviewers": "gQrQ;EFqT;b22S;YCvv", "pdf_size": 10220326, "rating": "3;6;6;7", "confidence": "3;2;3;3", "soundness": "1;3;3;3", "novelty": "1;2;3;3", "presentation": "1;3;2;2", "wc_summary": "33;69;90;86", "wc_strengths": "34;78;277;158", "wc_weaknesses": "271;84;728;114", "wc_questions": "1;73;159;15", "wc_limitations": "1;1;49;57", "wc_review": "340;305;1303;430", "wc_reply_reviewers": "0;0;123;143", "wc_reply_authors": "0;0;1203;0", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;3;1", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 69.5, 22.5 ], "wc_strengths_avg": [ 136.75, 92.37254732873832 ], "wc_weaknesses_avg": [ 299.25, 257.5241493530267 ], "wc_questions_avg": [ 62.0, 62.16912416947821 ], "wc_limitations_avg": [ 27.0, 26.153393661244042 ], "wc_review_avg": [ 594.5, 411.58626070363425 ], "wc_reply_reviewers_avg": [ 66.5, 66.87488317746805 ], "wc_reply_authors_avg": [ 300.75, 520.9142803763399 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.19245008972987526, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15394462192271377651&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 4, "email": "cam.ac.uk;cam.ac.uk;ucla.edu", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Cambridge;University of California, Los Angeles", "aff_unique_dep": ";", "aff_unique_url": "https://www.cam.ac.uk;https://www.ucla.edu", "aff_unique_abbr": "Cambridge;UCLA", "aff_campus_unique_index": "0;0;1", "aff_campus_unique": "Cambridge;Los Angeles", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "A Local Method for Satisfying Interventional Fairness with Partially Known Causal Graphs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95419", "id": "NhyDfZXjQX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=NhyDfZXjQX", "openreview": "https://openreview.net/forum?id=NhyDfZXjQX", "poster": "", "project": "", "author_site": "Haoxuan Li, Yue Liu, Zhi Geng, Kun Zhang", "tldr": "", "abstract": "Developing fair automated machine learning algorithms is critical in making safe and trustworthy decisions. Many causality-based fairness notions have been proposed to address the above issues by quantifying the causal connections between sensitive attributes and decisions, and when the true causal graph is fully known, certain algorithms that achieve interventional fairness have been proposed. However, when the true causal graph is unknown, it is still challenging to effectively and efficiently exploit partially directed acyclic graphs (PDAGs) to achieve interventional fairness. To exploit the PDAGs for achieving interventional fairness, previous methods have been built on variable selection or causal effect identification, but limited to reduced prediction accuracy or strong assumptions. In this paper, we propose a general min-max optimization framework that can achieve interventional fairness with promising prediction accuracy and can be extended to maximally oriented PDAGs (MPDAGs) with added background knowledge. Specifically, we first estimate all possible treatment effects of sensitive attributes on a given prediction model from all possible adjustment sets of sensitive attributes via an efficient local approach. Next, we propose to alternatively update the prediction model and possible estimated causal effects, where the prediction model is trained via a min-max loss to control the worst-case fairness violations. Extensive experiments on synthetic and real-world datasets verify the superiority of our methods. To benefit the research community, we have released our project at https://github.com/haoxuanli-pku/NeurIPS24-Interventional-Fairness-with-PDAGs.", "keywords": "Interventional fairness;PDAG;MPDAG;Causal effect", "primary_area": "fairness", "supplementary_material": "", "author": "Haoxuan Li;Yue Liu;Zhi Geng;Kun Zhang", "authorids": "~Haoxuan_Li6;~Yue_Liu9;~Zhi_Geng1;~Kun_Zhang1", "gender": "M;;M;M", "homepage": "https://haoxuanli-pku.github.io/;;https://stxy.btbu.edu.cn/szdw/bssds/34339356074b408c8650309f05f24558.htm;http://www.andrew.cmu.edu/user/kunz1/", "dblp": "145/4965-1.html;;;96/3115-1", "google_scholar": "gtDqiucAAAAJ;;;RGoypN4AAAAJ", "orcid": "0000-0003-3620-3769;;;", "linkedin": ";;;", "or_profile": "~Haoxuan_Li6;~Yue_Liu9;~Zhi_Geng1;~Kun_Zhang1", "aff": "Peking University;;School of mathematical Science, Peking University, Peking University;Carnegie Mellon University", "aff_domain": "pku.edu.cn;;math.pku.edu.cn;cmu.edu", "position": "PhD student;;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nli2024a,\ntitle={A Local Method for Satisfying Interventional Fairness with Partially Known Causal Graphs},\nauthor={Haoxuan Li and Yue Liu and Zhi Geng and Kun Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=NhyDfZXjQX}\n}", "github": "", "reviewers": "GJNz;t9F5;BRmE;3Hwm", "pdf_size": 560582, "rating": "5;6;8;8", "confidence": "4;2;3;4", "soundness": "3;3;4;4", "novelty": "3;3;3;4", "presentation": "3;3;4;3", "wc_summary": "76;44;84;58", "wc_strengths": "101;26;44;67", "wc_weaknesses": "125;78;35;148", "wc_questions": "40;108;18;4", "wc_limitations": "1;6;49;10", "wc_review": "343;262;230;287", "wc_reply_reviewers": "119;24;20;0", "wc_reply_authors": "1489;51;54;43", "reply_reviewers": "1;1;1;0", "reply_authors": "5;2;2;2", "rating_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 65.5, 15.580436450882884 ], "wc_strengths_avg": [ 59.5, 28.02231253840411 ], "wc_weaknesses_avg": [ 96.5, 43.55743334954437 ], "wc_questions_avg": [ 42.5, 39.93432107849087 ], "wc_limitations_avg": [ 16.5, 19.03286631067428 ], "wc_review_avg": [ 280.5, 41.35516896350443 ], "wc_reply_reviewers_avg": [ 40.75, 46.08348402627562 ], "wc_reply_authors_avg": [ 409.25, 623.4069196760652 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.058025885318565944, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7038002947933737804&as_sdt=8005&sciodt=0,7&hl=en", "gs_version_total": 2, "email": "pku.edu.cn;;math.pku.edu.cn;cmu.edu", "author_num": 4, "aff_unique_index": "0;0;1", "aff_unique_norm": "Peking University;Carnegie Mellon University", "aff_unique_dep": ";", "aff_unique_url": "http://www.pku.edu.cn;https://www.cmu.edu", "aff_unique_abbr": "Peking U;CMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Peking", "aff_country_unique_index": "0;0;1", "aff_country_unique": "China;United States" }, { "title": "Nearest Neighbor Speculative Decoding for LLM Generation and Attribution", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95418", "id": "Ni9kebsSTt", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Ni9kebsSTt", "openreview": "https://openreview.net/forum?id=Ni9kebsSTt", "poster": "", "project": "", "author_site": "Minghan Li, Xilun Chen, Ari Holtzman, Beidi Chen, Jimmy Lin, Scott Yih, Victoria Lin", "tldr": "", "abstract": "Large language models (LLMs) often hallucinate and lack the ability to provide attribution for their generations. Semi-parametric LMs, such as kNN-LM, approach these limitations by refining the output of an LM for a given prompt using its nearest neighbor matches in a non-parametric data store. However, these models often exhibit slow inference speeds and produce non-fluent texts. In this paper, we introduce Nearest Neighbor Speculative Decoding (NEST), a novel semi-parametric language modeling approach that is capable of incorporating real-world text spans of arbitrary length into the LM generations and providing attribution to their sources. NEST performs token-level retrieval at each inference step to compute a semi-parametric mixture distribution and identify promising span continuations in a corpus. It then uses an approximate speculative decoding procedure that accepts a prefix of the retrieved span or generates a new token. NEST significantly enhances the generation quality and attribution rate of the base LM across a variety of knowledge-intensive tasks, surpassing the conventional kNN-LM method and performing competitively with in-context retrieval augmentation. In addition, NEST substantially improves the generation speed, achieving a 1.8x speedup in inference time when applied to Llama-2-Chat 70B. Code will be released at https://github.com/facebookresearch/NEST/tree/main.", "keywords": "large language model;nearest neigbour language model;speculative decoding;attribution;factuality", "primary_area": "generative_models", "supplementary_material": "", "author": "Minghan Li;Xilun Chen;Ari Holtzman;Beidi Chen;Jimmy Lin;Wen-tau Yih;Xi Victoria Lin", "authorids": "~Minghan_Li4;~Xilun_Chen1;~Ari_Holtzman1;~Beidi_Chen1;~Jimmy_Lin2;~Wen-tau_Yih1;~Xi_Victoria_Lin1", "gender": ";;M;F;;M;F", "homepage": "https://alexlimh.github.io/;https://xilunchen.com;http://ariholtzman.com;https://www.andrew.cmu.edu/user/beidic/;https://cs.uwaterloo.ca/~jimmylin/;http://scottyih.org;http://victorialin.net", "dblp": ";96/10207-2.html;https://dblp.uni-trier.de/pers/hd/h/Holtzman:Ari;192/1339;00/7739;07/7129;215/5264", "google_scholar": ";eUk_hy8AAAAJ;https://scholar.google.com/citations?authuser=2;;;8rDNIMsAAAAJ;gYUOJwMAAAAJ", "orcid": "0009-0007-8972-7714;;;;;0000-0003-4263-395X;", "linkedin": ";;;;;scottyih/;xivictorialin/", "or_profile": "~Minghan_Li4;~Xilun_Chen1;~Ari_Holtzman1;~Beidi_Chen1;~Jimmy_Lin2;~Wen-tau_Yih1;~Xi_Victoria_Lin1", "aff": "University of Waterloo;Meta FAIR;Meta;Meta Facebook;University of Waterloo;Meta Platforms, Inc.;Meta", "aff_domain": "uwaterloo.ca;meta.com;meta.com;fb.com;waterloo.ca;meta.com;fb.com", "position": "PhD student;Research Scientist;Postdoc;Researcher;Full Professor;Research Scientist;Research Scientist", "bibtex": "@inproceedings{\nli2024nearest,\ntitle={Nearest Neighbor Speculative Decoding for {LLM} Generation and Attribution},\nauthor={Minghan Li and Xilun Chen and Ari Holtzman and Beidi Chen and Jimmy Lin and Wen-tau Yih and Xi Victoria Lin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Ni9kebsSTt}\n}", "github": "", "reviewers": "vMVQ;nNiG;kPHS;L2uT", "pdf_size": 979440, "rating": "5;5;7;7", "confidence": "4;4;3;2", "soundness": "3;2;3;4", "novelty": "2;2;3;2", "presentation": "3;2;3;3", "wc_summary": "51;43;117;99", "wc_strengths": "66;47;58;53", "wc_weaknesses": "42;52;25;45", "wc_questions": "55;110;2;44", "wc_limitations": "35;1;1;8", "wc_review": "249;253;203;249", "wc_reply_reviewers": "0;16;0;0", "wc_reply_authors": "0;29;0;313", "reply_reviewers": "0;1;0;0", "reply_authors": "1;2;1;2", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 77.5, 31.284980421921315 ], "wc_strengths_avg": [ 56.0, 6.96419413859206 ], "wc_weaknesses_avg": [ 41.0, 9.924716620639604 ], "wc_questions_avg": [ 52.75, 38.518664307060284 ], "wc_limitations_avg": [ 11.25, 14.00669482783144 ], "wc_review_avg": [ 238.5, 20.56088519495209 ], "wc_reply_reviewers_avg": [ 4.0, 6.928203230275509 ], "wc_reply_authors_avg": [ 85.5, 131.87968001174403 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.9045340337332909, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1434342886667886745&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "uwaterloo.ca;meta.com;meta.com;fb.com;waterloo.ca;meta.com;fb.com", "author_num": 7, "aff_unique_index": "0;1;1;1;0;1;1", "aff_unique_norm": "University of Waterloo;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://uwaterloo.ca;https://meta.com", "aff_unique_abbr": "UW;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;0;1;1", "aff_country_unique": "Canada;United States" }, { "title": "Provably Efficient Interactive-Grounded Learning with Personalized Reward", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95417", "id": "NidGPsP0Qq", "proceeding": "", "pdf": "https://openreview.net/pdf?id=NidGPsP0Qq", "openreview": "https://openreview.net/forum?id=NidGPsP0Qq", "poster": "", "project": "", "author_site": "Mengxiao Zhang, Yuheng Zhang, Haipeng Luo, Paul Mineiro", "tldr": "", "abstract": "Interactive-Grounded Learning (IGL) [Xie et al., 2021] is a powerful framework in which a learner aims at maximizing unobservable rewards through interacting with an environment and observing reward-dependent feedback on the taken actions.\nTo deal with personalized rewards that are ubiquitous in applications such as recommendation systems, Maghakian et al. [2022] study a version of IGL with context-dependent feedback, but their algorithm does not come with theoretical guarantees. In this work, we consider the same problem and provide the first provably efficient algorithms with sublinear regret under realizability. Our analysis reveals that the step-function estimator of prior work can deviate uncontrollably due to finite-sample effects. Our solution is a novel Lipschitz reward estimator which underestimates the true reward and enjoys favorable generalization performances. Building on this estimator, we propose two algorithms, one based on explore-then-exploit and the other based on inverse-gap weighting. We apply IGL to learning from image feedback and learning from text feedback, which are reward-free settings that arise in practice. Experimental results showcase the importance of using our Lipschitz reward estimator and the overall effectiveness of our algorithms.", "keywords": "Online learning;Interactive-Grounded Learning;Contextual Bandit", "primary_area": "bandits", "supplementary_material": "/attachment/9077df62604cde9928ba9da9a0fc845f4b59a9c2.zip", "author": "Mengxiao Zhang;Yuheng Zhang;Haipeng Luo;Paul Mineiro", "authorids": "~Mengxiao_Zhang2;~Yuheng_Zhang1;~Haipeng_Luo1;~Paul_Mineiro1", "gender": ";M;M;", "homepage": ";;https://haipeng-luo.net/;", "dblp": ";;62/2576;35/5613", "google_scholar": ";IoEBLNYAAAAJ;ct2hw4UAAAAJ;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Mengxiao_Zhang2;~Yuheng_Zhang1;~Haipeng_Luo1;~Paul_Mineiro1", "aff": ";University of Illinois, Urbana Champaign;University of Southern California;", "aff_domain": ";cs.illinois.edu;usc.edu;", "position": ";PhD student;Associate Professor;", "bibtex": "@inproceedings{\nzhang2024provably,\ntitle={Provably Efficient Interactive-Grounded Learning with Personalized Reward},\nauthor={Mengxiao Zhang and Yuheng Zhang and Haipeng Luo and Paul Mineiro},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=NidGPsP0Qq}\n}", "github": "", "reviewers": "TjTV;LmpU;qSGo;8SUA", "pdf_size": 1319563, "rating": "5;5;5;7", "confidence": "3;3;2;4", "soundness": "2;2;3;3", "novelty": "2;2;2;3", "presentation": "3;2;2;4", "wc_summary": "49;99;69;124", "wc_strengths": "40;104;45;31", "wc_weaknesses": "48;275;49;81", "wc_questions": "54;71;54;80", "wc_limitations": "8;5;3;1", "wc_review": "199;554;220;317", "wc_reply_reviewers": "52;122;0;0", "wc_reply_authors": "24;114;0;0", "reply_reviewers": "1;2;0;0", "reply_authors": "2;2;1;1", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 85.25, 28.586491565073178 ], "wc_strengths_avg": [ 55.0, 28.731515797117282 ], "wc_weaknesses_avg": [ 113.25, 94.32490392255907 ], "wc_questions_avg": [ 64.75, 11.211043662389331 ], "wc_limitations_avg": [ 4.25, 2.5860201081971503 ], "wc_review_avg": [ 322.5, 140.8731699082547 ], "wc_reply_reviewers_avg": [ 43.5, 50.04747745890896 ], "wc_reply_authors_avg": [ 34.5, 46.93346354148605 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:xtNTXeOsNS4J:scholar.google.com/&scioq=Provably+Efficient+Interactive-Grounded+Learning+with+Personalized+Reward&hl=en&as_sdt=0,33", "gs_version_total": 5, "email": ";cs.illinois.edu;usc.edu;", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "University of Illinois Urbana-Champaign;University of Southern California", "aff_unique_dep": ";", "aff_unique_url": "https://illinois.edu;https://www.usc.edu", "aff_unique_abbr": "UIUC;USC", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Urbana-Champaign;Los Angeles", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Paralinguistics-Aware Speech-Empowered Large Language Models for Natural Conversation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95416", "id": "NjewXJUDYq", "proceeding": "", "pdf": "https://openreview.net/pdf?id=NjewXJUDYq", "openreview": "https://openreview.net/forum?id=NjewXJUDYq", "poster": "/media/PosterPDFs/NeurIPS%202024/95416.png?t=1731289160.6820369", "project": "", "author_site": "Heeseung Kim, Soonshin Seo, Kyeongseok Jeong, Ohsung Kwon, Soyoon Kim, Jungwhan Kim, Jaehong Lee, Eunwoo Song, Myungwoo Oh, Jung-Woo Ha, Sungroh Yoon, Kang Min Yoo", "tldr": "", "abstract": "Recent work shows promising results in expanding the capabilities of large language models (LLM) to directly understand and synthesize speech. However, an LLM-based strategy for modeling spoken dialogs remains elusive, calling for further investigation. This paper introduces an extensive speech-text LLM framework, the Unified Spoken Dialog Model (USDM), designed to generate coherent spoken responses with naturally occurring prosodic features relevant to the given input speech without relying on explicit automatic speech recognition (ASR) or text-to-speech (TTS) systems. We have verified the inclusion of prosody in speech tokens that predominantly contain semantic information and have used this foundation to construct a prosody-infused speech-text model. Additionally, we propose a generalized speech-text pretraining scheme that enhances the capture of cross-modal semantics. To construct USDM, we fine-tune our speech-text model on spoken dialog data using a multi-step spoken dialog template that stimulates the chain-of-reasoning capabilities exhibited by the underlying LLM. Automatic and human evaluations on the DailyTalk dataset demonstrate that our approach effectively generates natural-sounding spoken responses, surpassing previous and cascaded baselines. Our code and checkpoints are available at https://github.com/naver-ai/usdm.", "keywords": "Spoken Dialog Modeling;Speech-Text Pretraining;Paralingustics;Spoken Language Model;LLM;USDM", "primary_area": "speech_and_audio", "supplementary_material": "", "author": "Heeseung Kim;Soonshin Seo;Kyeongseok Jeong;Ohsung Kwon;Soyoon Kim;Jungwhan Kim;Jaehong Lee;Eunwoo Song;Myungwoo Oh;Jung-Woo Ha;Sungroh Yoon;Kang Min Yoo", "authorids": "~Heeseung_Kim1;~Soonshin_Seo1;~Kyeongseok_Jeong1;~Ohsung_Kwon1;~Soyoon_Kim1;~Jungwhan_Kim1;~Jaehong_Lee1;~Eunwoo_Song1;~Myungwoo_Oh1;~Jung-Woo_Ha1;~Sungroh_Yoon1;~Kang_Min_Yoo2", "gender": "M;;M;M;F;M;;M;;M;;M", "homepage": "https://gmltmd789.github.io;;https://github.com/chungks603;;;;https://www.linkedin.com/in/jaehong-lee-74a0901b1;https://sewplay.github.io/cv/;;https://aidljwha.wordpress.com/;http://ailab.snu.ac.kr;", "dblp": "294/8710;;;;;;;;;66/867-1;99/1474;163/5657", "google_scholar": "4ojbJpoAAAAJ;https://scholar.google.co.kr/citations?user=ZKeGcP8AAAAJ;;fYfhX38AAAAJ;;EB-MgDUAAAAJ;;H8Of2IIAAAAJ;;https://scholar.google.co.kr/citations?user=eGj3ay4AAAAJ;Bphl_fIAAAAJ;BqaWtH8AAAAJ", "orcid": ";;;;;;;;;0000-0002-7400-7681;0000-0002-2367-197X;", "linkedin": "gmltmd789/;;;ohsung-kwon-78b091164/;soyoon-kim-b402a769;jungwhan-kim;;;myungwoo5/;jung-woo-ha-b2782862?trk=hp-identity-name;;", "or_profile": "~Heeseung_Kim1;~Soonshin_Seo1;~Kyeongseok_Jeong1;~Ohsung_Kwon1;~Soyoon_Kim1;~Jungwhan_Kim1;~Jaehong_Lee1;~Eunwoo_Song1;~Myungwoo_Oh1;~Jung-Woo_Ha1;~Sungroh_Yoon1;~Kang_Min_Yoo2", "aff": "Seoul National University;NAVER Corp.;NAVER;NAVER;NAVER Cloud;NAVER;NAVER;NAVER;;NAVER AI Lab;Seoul National University;NAVER", "aff_domain": "snu.ac.kr;navercorp.com;navercorp.com;navercorp.com;navercorp.com;navercorp.com;navercorp.com;navercorp.com;;navercorp.com;snu.ac.kr;navercorp.com", "position": "PhD student;Researcher;Researcher;Researcher;Researcher;Researcher;Researcher;Researcher;;Research fellow;Full Professor;Researcher", "bibtex": "@inproceedings{\nkim2024paralinguisticsaware,\ntitle={Paralinguistics-Aware Speech-Empowered Large Language Models for Natural Conversation},\nauthor={Heeseung Kim and Soonshin Seo and Kyeongseok Jeong and Ohsung Kwon and Soyoon Kim and Jungwhan Kim and Jaehong Lee and Eunwoo Song and Myungwoo Oh and Jung-Woo Ha and Sungroh Yoon and Kang Min Yoo},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=NjewXJUDYq}\n}", "github": "", "reviewers": "yvFi;4xzH;BwMS", "pdf_size": 6852060, "rating": "6;6;7", "confidence": "5;5;4", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;3;4", "wc_summary": "69;43;88", "wc_strengths": "81;54;40", "wc_weaknesses": "126;116;89", "wc_questions": "1;88;19", "wc_limitations": "1;2;18", "wc_review": "278;303;254", "wc_reply_reviewers": "0;24;29", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 66.66666666666667, 18.445113776342563 ], "wc_strengths_avg": [ 58.333333333333336, 17.016332024133625 ], "wc_weaknesses_avg": [ 110.33333333333333, 15.627610892974724 ], "wc_questions_avg": [ 36.0, 37.49666651850535 ], "wc_limitations_avg": [ 7.0, 7.788880963698615 ], "wc_review_avg": [ 278.3333333333333, 20.005554784164875 ], "wc_reply_reviewers_avg": [ 17.666666666666668, 12.657891697365017 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": -0.9999999999999998, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ikfkjr4zPSUJ:scholar.google.com/&scioq=Paralinguistics-Aware+Speech-Empowered+Large+Language+Models+for+Natural+Conversation&hl=en&as_sdt=0,33", "gs_version_total": 2, "email": "snu.ac.kr;navercorp.com;navercorp.com;navercorp.com;navercorp.com;navercorp.com;navercorp.com;navercorp.com;;navercorp.com;snu.ac.kr;navercorp.com", "author_num": 12, "aff_unique_index": "0;1;1;1;1;1;1;1;1;0;1", "aff_unique_norm": "Seoul National University;NAVER Corporation", "aff_unique_dep": ";", "aff_unique_url": "https://www.snu.ac.kr;https://www.naver.com", "aff_unique_abbr": "SNU;NAVER", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Test-Time Dynamic Image Fusion", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95415", "id": "NkXuAOygXN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=NkXuAOygXN", "openreview": "https://openreview.net/forum?id=NkXuAOygXN", "poster": "/media/PosterPDFs/NeurIPS%202024/95415.png?t=1731332838.07863", "project": "", "author_site": "Bing Cao, Yinan Xia, Yi Ding, Changqing Zhang, Qinghua Hu", "tldr": "", "abstract": "The inherent challenge of image fusion lies in capturing the correlation of multi-source images and comprehensively integrating effective information from different sources. Most existing techniques fail to perform dynamic image fusion while notably lacking theoretical guarantees, leading to potential deployment risks in this field. Is it possible to conduct dynamic image fusion with a clear theoretical justification? In this paper, we give our solution from a generalization perspective. We proceed to reveal the generalized form of image fusion and derive a new test-time dynamic image fusion paradigm. It provably reduces the upper bound of generalization error. Specifically, we decompose the fused image into multiple components corresponding to its source data. The decomposed components represent the effective information from the source data, thus the gap between them reflects the \\textit{Relative Dominability} (RD) of the uni-source data in constructing the fusion image. Theoretically, we prove that the key to reducing generalization error hinges on the negative correlation between the RD-based fusion weight and the uni-source reconstruction loss. Intuitively, RD dynamically highlights the dominant regions of each source and can be naturally converted to the corresponding fusion weight, achieving robust results. Extensive experiments and discussions with in-depth analysis on multiple benchmarks confirm our findings and superiority. Our code is available at https://github.com/Yinan-Xia/TTD.", "keywords": "Dynamic image fusion;generalization error;negative correlation", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Bing Cao;Yinan Xia;Yi Ding;Changqing Zhang;Qinghua Hu", "authorids": "~Bing_Cao1;~Yinan_Xia1;~Yi_Ding8;~Changqing_Zhang1;~Qinghua_Hu1", "gender": "M;F;M;M;M", "homepage": "https://bcaosudo.github.io;https://github.com/Yinan-Xia;https://dripnowhy.github.io;http://cic.tju.edu.cn/faculty/zhangchangqing/index.html;http://cic.tju.edu.cn/faculty/huqinghua/index.html", "dblp": "59/4329;;;78/2668;", "google_scholar": "6KeTXm4AAAAJ;DNl4KHgAAAAJ;https://scholar.google.com/citations?hl=en;yJGhdykAAAAJ;TVSNq_wAAAAJ", "orcid": "0000-0002-0316-5404;;;;0000-0001-7765-8095", "linkedin": ";;;;", "or_profile": "~Bing_Cao1;~Yinan_Xia1;~Yi_Ding8;~Changqing_Zhang1;~Qinghua_Hu1", "aff": "Tianjin University;Tianjin University;Tianjin University;Tianjin University;Tianjin University", "aff_domain": "tju.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu.cn", "position": "Associate Professor;MS student;Undergrad student;Associate Professor;Professor", "bibtex": "@inproceedings{\ncao2024testtime,\ntitle={Test-Time Dynamic Image Fusion},\nauthor={Bing Cao and Yinan Xia and Yi Ding and Changqing Zhang and Qinghua Hu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=NkXuAOygXN}\n}", "github": "", "reviewers": "EtNz;gBb1;gbUk;UH2o", "pdf_size": 30412041, "rating": "5;5;7;7", "confidence": "3;3;5;5", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "2;4;2;2", "wc_summary": "40;139;77;48", "wc_strengths": "24;40;22;21", "wc_weaknesses": "128;32;12;88", "wc_questions": "4;1;91;2", "wc_limitations": "7;1;39;2", "wc_review": "203;213;241;161", "wc_reply_reviewers": "13;0;0;16", "wc_reply_authors": "32;0;0;20", "reply_reviewers": "1;0;0;1", "reply_authors": "2;1;1;2", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 4.0, 1.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 76.0, 38.890872965260115 ], "wc_strengths_avg": [ 26.75, 7.725768570181222 ], "wc_weaknesses_avg": [ 65.0, 45.81484475582123 ], "wc_questions_avg": [ 24.5, 38.40898332421727 ], "wc_limitations_avg": [ 12.25, 15.610493265749165 ], "wc_review_avg": [ 204.5, 28.718460961548757 ], "wc_reply_reviewers_avg": [ 7.25, 7.327175444876422 ], "wc_reply_authors_avg": [ 13.0, 13.674794331177344 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15458014040534643026&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 3, "email": "tju.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Tianjin University", "aff_unique_dep": "", "aff_unique_url": "http://www.tju.edu.cn", "aff_unique_abbr": "TJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "No Free Lunch Theorem and Black-Box Complexity Analysis for Adversarial Optimisation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95414", "id": "NkuySm8qVs", "proceeding": "", "pdf": "https://openreview.net/pdf?id=NkuySm8qVs", "openreview": "https://openreview.net/forum?id=NkuySm8qVs", "poster": "", "project": "", "author_site": "Per Kristian Lehre, Shishen Lin", "tldr": "", "abstract": "Black-box optimisation is one of the important areas in optimisation. The original No Free Lunch (NFL) theorems highlight the limitations of traditional black-box optimisation and learning algorithms, serving as a theoretical foundation for traditional optimisation. No Free Lunch Analysis in adversarial (also called maximin) optimisation is a long-standing problem [45 , 46]. This paper first rigorously proves a (NFL) Theorem for general black-box adversarial optimisation when considering Pure Strategy Nash Equilibrium (NE) as the solution concept. We emphasise the solution concept (i.e. define the optimality in adversarial optimisation) as the key in our NFL theorem. In particular, if Nash Equilibrium is considered as the solution concept and the cost of the algorithm is measured in terms of the number of columns and rows queried in the payoff matrix, then the average performance of all black-box adversarial optimisation algorithms is the same. Moreover, we first introduce black-box complexity to analyse the black-box adversarial optimisation algorithm. We employ Yao\u2019s Principle and our new NFL Theorem to provide general lower bounds for the query complexity of finding a Nash Equilibrium in adversarial optimisation. Finally, we illustrate the practical ramifications of our results on simple two-player zero-sum games. More specifically, no black-box optimisation algorithm for finding the unique Nash equilibrium in two-player zero-sum games can exceed logarithmic complexity relative to search space size. Meanwhile, no black-box algorithm can solve any bimatrix game with unique NE with fewer than a linear number of queries in the size of the payoff matrix.", "keywords": "No Free Lunch Theorem;Black-Box Optimisation;Black-Box Complexity Analysis;Zero-Sum Games", "primary_area": "algorithmic_game_theory", "supplementary_material": "", "author": "Per Kristian Lehre;Shishen Lin", "authorids": "~Per_Kristian_Lehre1;~Shishen_Lin1", "gender": "M;M", "homepage": "https://pklehre.github.io/EC-Theory-UoB/;https://lss1242.github.io", "dblp": "02/1252;352/5439.html", "google_scholar": "HTszQ5gAAAAJ;", "orcid": "0000-0002-9521-1251;", "linkedin": ";", "or_profile": "~Per_Kristian_Lehre1;~Shishen_Lin1", "aff": "University of Birmingham;University of Birmingham", "aff_domain": "bham.ac.uk;bham.ac.uk", "position": "Full Professor;PhD student", "bibtex": "@inproceedings{\nlehre2024no,\ntitle={No Free Lunch Theorem and Black-Box Complexity Analysis for Adversarial Optimisation},\nauthor={Per Kristian Lehre and Shishen Lin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=NkuySm8qVs}\n}", "github": "", "reviewers": "Puqf;Xe7y;RwGp;7LPo", "pdf_size": 507619, "rating": "5;5;6;6", "confidence": "4;3;3;3", "soundness": "3;2;3;3", "novelty": "3;2;3;3", "presentation": "3;3;3;3", "wc_summary": "168;94;139;77", "wc_strengths": "76;93;47;64", "wc_weaknesses": "274;3;82;69", "wc_questions": "8;100;99;49", "wc_limitations": "12;5;4;1", "wc_review": "538;295;371;260", "wc_reply_reviewers": "83;21;8;17", "wc_reply_authors": "333;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 119.5, 36.01735692690401 ], "wc_strengths_avg": [ 70.0, 16.80773631397161 ], "wc_weaknesses_avg": [ 107.0, 100.96286446015684 ], "wc_questions_avg": [ 64.0, 38.34709897762802 ], "wc_limitations_avg": [ 5.5, 4.031128874149275 ], "wc_review_avg": [ 366.0, 107.1050885812621 ], "wc_reply_reviewers_avg": [ 32.25, 29.67637949615822 ], "wc_reply_authors_avg": [ 83.25, 144.19322973010904 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:6WMkSmutkN8J:scholar.google.com/&scioq=No+Free+Lunch+Theorem+and+Black-Box+Complexity+Analysis+for+Adversarial+Optimisation&hl=en&as_sdt=0,5", "gs_version_total": 5, "email": "bham.ac.uk;bham.ac.uk", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Birmingham", "aff_unique_dep": "", "aff_unique_url": "https://www.birmingham.ac.uk", "aff_unique_abbr": "Birmingham", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "title": "Just Add $100 More: Augmenting Pseudo-LiDAR Point Cloud for Resolving Class-imbalance Problem", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95413", "id": "NlpHKNjNNZ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=NlpHKNjNNZ", "openreview": "https://openreview.net/forum?id=NlpHKNjNNZ", "poster": "/media/PosterPDFs/NeurIPS%202024/95413.png?t=1730688451.8500443", "project": "", "author_site": "Mincheol Chang, Siyeong Lee, Jinkyu Kim, Namil Kim", "tldr": "", "abstract": "Typical LiDAR-based 3D object detection models are trained with real-world data collection, which is often imbalanced over classes.\nTo deal with it, augmentation techniques are commonly used, such as copying ground truth LiDAR points and pasting them into scenes.\nHowever, existing methods struggle with the lack of sample diversity for minority classes and the limitation of suitable placement.\nIn this work, we introduce a novel approach that utilizes pseudo LiDAR point clouds generated from low-cost miniatures or real-world videos, which is called Pseudo Ground Truth augmentation (PGT-Aug).\nPGT-Aug involves three key steps: (i) volumetric 3D instance reconstruction using a 2D-to-3D view synthesis model, (ii) object-level domain alignment with LiDAR intensity simulation, and (iii) a hybrid context-aware placement method from ground and map information. \nWe demonstrate the superiority and generality of our method through performance improvements in extensive experiments conducted on popular benchmarks, i.e., nuScenes, KITTI, and Lyft, especially for the datasets with large domain gaps captured by different LiDAR configurations.\nThe project webpage is https://just-add-100-more.github.io.", "keywords": "Autonomous Driving;Class Imbalance;Data Augmentation", "primary_area": "machine_vision", "supplementary_material": "/attachment/5cf582e159365f219018a44a89cc4d37ed79c85c.zip", "author": "Mincheol Chang;Siyeong Lee;Jinkyu Kim;Namil Kim", "authorids": "~Mincheol_Chang2;~Siyeong_Lee1;~Jinkyu_Kim1;~Namil_Kim1", "gender": "M;M;M;M", "homepage": "https://github.com/mincheoree;;https://visionai.korea.ac.kr/;https://www.namilkim.com", "dblp": ";213/8232;;", "google_scholar": ";iGSaIU0AAAAJ;;IYyLBQYAAAAJ", "orcid": ";;;", "linkedin": ";siyeong/;;nlkim0817/?originalSubdomain=kr", "or_profile": "~Mincheol_Chang2;~Siyeong_Lee1;~Jinkyu_Kim1;~Namil_Kim1", "aff": "Korea University;Naver Labs;Korea University;Naver Labs", "aff_domain": "korea.ac.kr;naverlabs.com;korea.ac.kr;naverlabs.com", "position": "MS student;Researcher;Assistant Professor;Researcher", "bibtex": "@inproceedings{\nchang2024just,\ntitle={Just Add \\$100 More: Augmenting Pseudo-Li{DAR} Point Cloud for Resolving Class-imbalance Problem},\nauthor={Mincheol Chang and Siyeong Lee and Jinkyu Kim and Namil Kim},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=NlpHKNjNNZ}\n}", "github": "", "reviewers": "kswe;jNRg;eXGm;iAKq", "pdf_size": 13338939, "rating": "4;5;6;6", "confidence": "4;4;3;5", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "61;85;105;61", "wc_strengths": "69;128;11;48", "wc_weaknesses": "121;52;31;216", "wc_questions": "54;93;119;7", "wc_limitations": "13;1;10;8", "wc_review": "318;359;276;340", "wc_reply_reviewers": "0;0;0;68", "wc_reply_authors": "0;0;0;45", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;2", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 78.0, 18.411952639521967 ], "wc_strengths_avg": [ 64.0, 42.38513890504548 ], "wc_weaknesses_avg": [ 105.0, 72.21841870326433 ], "wc_questions_avg": [ 68.25, 42.2573958023918 ], "wc_limitations_avg": [ 8.0, 4.415880433163924 ], "wc_review_avg": [ 323.25, 30.898017735770686 ], "wc_reply_reviewers_avg": [ 17.0, 29.444863728670914 ], "wc_reply_authors_avg": [ 11.25, 19.48557158514987 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:g10IOCzJOwYJ:scholar.google.com/&scioq=Just+Add+%24100+More:+Augmenting+Pseudo-LiDAR+Point+Cloud+for+Resolving+Class-imbalance+Problem&hl=en&as_sdt=0,44", "gs_version_total": 2, "email": "korea.ac.kr;naverlabs.com;korea.ac.kr;naverlabs.com", "author_num": 4, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "Korea University;NAVER LABS", "aff_unique_dep": ";", "aff_unique_url": "https://www.korea.ac.kr;https://labs.naver.com", "aff_unique_abbr": "KU;Naver Labs", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "When does perceptual alignment benefit vision representations?", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95412", "id": "NmlnmLYMZ4", "proceeding": "", "pdf": "https://openreview.net/pdf?id=NmlnmLYMZ4", "openreview": "https://openreview.net/forum?id=NmlnmLYMZ4", "poster": "", "project": "", "author_site": "Shobhita Sundaram, Stephanie Fu, Lukas Muttenthaler, Netanel Tamir, Lucy Chai, Simon Kornblith, Trevor Darrell, Phillip Isola", "tldr": "", "abstract": "Humans judge perceptual similarity according to diverse visual attributes, including scene layout, subject location, and camera pose. Existing vision models understand a wide range of semantic abstractions but improperly weigh these attributes and thus make inferences misaligned with human perception. \nWhile vision representations have previously benefited from human preference alignment in contexts like image generation, the utility of perceptually aligned representations in more general-purpose settings remains unclear. Here, we investigate how aligning vision model representations to human perceptual judgments impacts their usability in standard computer vision tasks. We finetune state-of-the-art models on a dataset of human similarity judgments for synthetic image triplets and evaluate them across diverse computer vision tasks. We find that aligning models to perceptual judgments yields representations that improve upon the original backbones across many downstream tasks, including counting, semantic segmentation, depth estimation, instance retrieval, and retrieval-augmented generation. In addition, we find that performance is widely preserved on other tasks, including specialized out-of-distribution domains such as in medical imaging and 3D environment frames. Our results suggest that injecting an inductive bias about human perceptual knowledge into vision models can make them better representation learners.", "keywords": "representation learning;alignment;perception;transfer learning;computer vision;foundation model", "primary_area": "machine_vision", "supplementary_material": "/attachment/f2040f9f9daaf4e638b8c244175a93fee34c8a54.zip", "author": "Shobhita Sundaram;Stephanie Fu;Lukas Muttenthaler;Netanel Yakir Tamir;Lucy Chai;Simon Kornblith;Trevor Darrell;Phillip Isola", "authorids": "~Shobhita_Sundaram1;~Stephanie_Fu1;~Lukas_Muttenthaler1;~Netanel_Yakir_Tamir1;~Lucy_Chai1;~Simon_Kornblith1;~Trevor_Darrell2;~Phillip_Isola1", "gender": "F;F;M;M;;M;M;M", "homepage": "https://ssundaram21.github.io;https://stephanie-fu.github.io/;https://lukasmut.github.io/;https://netanel-tamir.github.io/;https://people.csail.mit.edu/lrchai/;;http://web.mit.edu/phillipi/;https://people.eecs.berkeley.edu/~trevor/", "dblp": ";270/1541;245/4369;349/5316;245/2729;220/4059;36/9988;d/TrevorDarrell", "google_scholar": ";Rx-h05AAAAAJ;https://scholar.google.com/citations?hl=en;easC2gEAAAAJ;;1O3RPmsAAAAJ;ROILf3EAAAAJ;https://scholar.google.com.tw/citations?user=bh-uRFMAAAAJ", "orcid": ";0000-0001-6591-6026;0000-0002-0804-4687;;;;0000-0002-1411-6704;", "linkedin": ";stephanie-fu/;lukas-muttenthaler/;netanel-yakir-tamir-4a9691167/;;;phillip-isola-a9955b20/;", "or_profile": "~Shobhita_Sundaram1;~Stephanie_Fu1;~Lukas_Muttenthaler1;~Netanel_Yakir_Tamir1;~Lucy_Chai1;~Simon_Kornblith1;~Phillip_Isola1;~trevor_darrell1", "aff": "Massachusetts Institute of Technology;University of California, Berkeley;TU Berlin;Apple;Google;Anthropic;Massachusetts Institute of Technology;Electrical Engineering & Computer Science Department", "aff_domain": "mit.edu;berkeley.edu;tu-berlin.de;apple.com;google.com;anthropic.com;mit.edu;eecs.berkeley.edu", "position": "PhD student;PhD student;PhD student;Intern;Researcher;Member of the Technical Staff;Associate Professor;Professor", "bibtex": "@inproceedings{\nsundaram2024when,\ntitle={When does perceptual alignment benefit vision representations?},\nauthor={Shobhita Sundaram and Stephanie Fu and Lukas Muttenthaler and Netanel Yakir Tamir and Lucy Chai and Simon Kornblith and Trevor Darrell and Phillip Isola},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=NmlnmLYMZ4}\n}", "github": "", "reviewers": "LKEh;rQrx;VQay;kSUr;9gB3", "pdf_size": 11669407, "rating": "4;5;6;6;8", "confidence": "4;4;3;3;5", "soundness": "2;2;2;3;4", "novelty": "2;2;3;3;3", "presentation": "3;3;2;3;4", "wc_summary": "41;81;85;89;95", "wc_strengths": "30;41;104;58;232", "wc_weaknesses": "101;180;90;156;334", "wc_questions": "178;5;19;166;60", "wc_limitations": "2;25;4;10;37", "wc_review": "352;332;302;479;758", "wc_reply_reviewers": "0;34;13;59;75", "wc_reply_authors": "76;43;43;43;46", "reply_reviewers": "0;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 5.8, 1.32664991614216 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 2.6, 0.8 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 78.2, 19.1666376811375 ], "wc_strengths_avg": [ 93.0, 73.9459261893446 ], "wc_weaknesses_avg": [ 172.2, 87.55432599249451 ], "wc_questions_avg": [ 85.6, 72.92352158254565 ], "wc_limitations_avg": [ 15.6, 13.395521639712282 ], "wc_review_avg": [ 444.6, 167.92331583195946 ], "wc_reply_reviewers_avg": [ 36.2, 27.852468472291644 ], "wc_reply_authors_avg": [ 50.2, 12.952219886953742 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.36262033381142106, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11178031366831227578&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "mit.edu;berkeley.edu;tu-berlin.de;apple.com;google.com;anthropic.com;mit.edu;eecs.berkeley.edu", "author_num": 8, "aff_unique_index": "0;1;2;3;4;5;0;6", "aff_unique_norm": "Massachusetts Institute of Technology;University of California, Berkeley;Technische Universit\u00e4t Berlin;Apple;Google;Anthropic;Electrical Engineering & Computer Science Department", "aff_unique_dep": ";;;Apple Inc.;Google;;Electrical Engineering & Computer Science", "aff_unique_url": "https://web.mit.edu;https://www.berkeley.edu;https://www.tu-berlin.de;https://www.apple.com;https://www.google.com;https://www.anthropic.com;", "aff_unique_abbr": "MIT;UC Berkeley;TU Berlin;Apple;Google;Anthropic;", "aff_campus_unique_index": "1;2;3", "aff_campus_unique": ";Berkeley;Berlin;Mountain View", "aff_country_unique_index": "0;0;1;0;0;0;0", "aff_country_unique": "United States;Germany;" }, { "title": "Safe and Sparse Newton Method for Entropic-Regularized Optimal Transport", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95411", "id": "Nmmiyjw7Xg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Nmmiyjw7Xg", "openreview": "https://openreview.net/forum?id=Nmmiyjw7Xg", "poster": "/media/PosterPDFs/NeurIPS%202024/95411.png?t=1731749240.8293817", "project": "", "author_site": "Zihao Tang, Yixuan Qiu", "tldr": "", "abstract": "Computational optimal transport (OT) has received massive interests in the machine learning community, and great advances have been gained in the direction of entropic-regularized OT. The Sinkhorn algorithm, as well as its many improved versions, has become the *de facto* solution to large-scale OT problems. However, most of the existing methods behave like first-order methods, which typically require a large number of iterations to converge. More recently, Newton-type methods using sparsified Hessian matrices have demonstrated promising results on OT computation, but there still remain a lot of unresolved open questions. In this article, we make major new progresses towards this direction: first, we propose a novel Hessian sparsification scheme that promises a strict control of the approximation error; second, based on this sparsification scheme, we develop a *safe* Newton-type method that is guaranteed to avoid singularity in computing the search directions; third, the developed algorithm has a clear implementation for practical use, avoiding most hyperparameter tuning; and remarkably, we provide rigorous global and local convergence analysis of the proposed algorithm, which is lacking in the prior literature. Various numerical experiments are conducted to demonstrate the effectiveness of the proposed algorithm in solving large-scale OT problems.", "keywords": "optimal transport;Newton method;sparsified Hessian matrix;global convergence;quadratic convergence rate", "primary_area": "optimization", "supplementary_material": "", "author": "Zihao Tang;Yixuan Qiu", "authorids": "~Zihao_Tang3;~Yixuan_Qiu1", "gender": "M;", "homepage": "https://github.com/TangZihao1997;https://statr.me", "dblp": "176/5858-1.html;209/7159", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;", "orcid": "0009-0008-4716-8616;", "linkedin": ";", "or_profile": "~Zihao_Tang3;~Yixuan_Qiu1", "aff": "Shanghai University of Finance and Economics;Shanghai University of Finance and Economics", "aff_domain": "sufe.edu;sufe.edu.cn", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\ntang2024safe,\ntitle={Safe and Sparse Newton Method for Entropic-Regularized Optimal Transport},\nauthor={Zihao Tang and Yixuan Qiu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Nmmiyjw7Xg}\n}", "github": "", "reviewers": "gzRs;DpgA;bgcB;vqB6", "pdf_size": 1186203, "rating": "5;5;6;8", "confidence": "3;4;3;3", "soundness": "3;2;3;4", "novelty": "3;2;3;4", "presentation": "3;3;3;4", "wc_summary": "45;79;42;100", "wc_strengths": "74;16;48;80", "wc_weaknesses": "127;139;26;123", "wc_questions": "127;33;225;36", "wc_limitations": "1;1;7;9", "wc_review": "374;268;348;348", "wc_reply_reviewers": "191;11;30;5", "wc_reply_authors": "86;0;4;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;2;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 66.5, 24.19194080680589 ], "wc_strengths_avg": [ 54.5, 25.273503912200223 ], "wc_weaknesses_avg": [ 103.75, 45.273474573971015 ], "wc_questions_avg": [ 105.25, 78.7857061909075 ], "wc_limitations_avg": [ 4.5, 3.570714214271425 ], "wc_review_avg": [ 334.5, 39.834030677299026 ], "wc_reply_reviewers_avg": [ 59.25, 76.62367453992272 ], "wc_reply_authors_avg": [ 22.5, 36.69809259348502 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.4714045207910316, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15638518893896821352&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "sufe.edu;sufe.edu.cn", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Shanghai University of Finance and Economics", "aff_unique_dep": "", "aff_unique_url": "http://www.sufe.edu.cn", "aff_unique_abbr": "SUFE", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Multi-Instance Partial-Label Learning with Margin Adjustment", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95410", "id": "NnAi0L5H8J", "proceeding": "", "pdf": "https://openreview.net/pdf?id=NnAi0L5H8J", "openreview": "https://openreview.net/forum?id=NnAi0L5H8J", "poster": "/media/PosterPDFs/NeurIPS%202024/95410.png?t=1731484168.9986148", "project": "", "author_site": "Wei Tang, Yin-Fang Yang, Zhaofei Wang, Weijia Zhang, Min-Ling Zhang", "tldr": "", "abstract": "Multi-instance partial-label learning (MIPL) is an emerging learning framework where each training sample is represented as a multi-instance bag associated with a candidate label set. Existing MIPL algorithms often overlook the margins for attention scores and predicted probabilities, leading to suboptimal generalization performance. A critical issue with these algorithms is that the highest prediction probability of the classifier may appear on a non-candidate label. In this paper, we propose an algorithm named MIPLMA, i.e., Multi-Instance Partial-Label learning with Margin Adjustment, which adjusts the margins for attention scores and predicted probabilities. We introduce a margin-aware attention mechanism to dynamically adjust the margins for attention scores and propose a margin distribution\nloss to constrain the margins between the predicted probabilities on candidate and non-candidate label sets. Experimental results demonstrate the superior performance of MIPLMA over existing MIPL algorithms, as well as other well-established multi-instance learning algorithms and partial-label learning algorithms.", "keywords": "Machine Learning;Multi-Instance Partial-Label Learning;Multi-Instance Learning;Partial-Label Learning", "primary_area": "other", "supplementary_material": "", "author": "Wei Tang;Yin-Fang Yang;Zhaofei Wang;Weijia Zhang;Min-Ling Zhang", "authorids": "~Wei_Tang16;~Yin-Fang_Yang1;~Zhaofei_Wang1;~Weijia_Zhang2;~Min-Ling_Zhang2", "gender": ";F;M;;M", "homepage": "https://tangw-seu.github.io/;;;https://www.weijiazhangxh.com/;http://palm.seu.edu.cn/zhangml/", "dblp": "58/1874-17;396/6243;;158/5387-1;84/271.html", "google_scholar": "BLvzAjgAAAAJ;W7AooIIAAAAJ;;https://scholar.google.com.au/citations?user=7jmAPvAAAAAJ;uFHCIM0AAAAJ", "orcid": "0000-0001-9080-9281;;;0000-0001-8103-5325;0000-0003-1880-5918", "linkedin": ";;%E5%8F%AC%E9%A3%9E-%E7%8E%8B-34aa77250/;weijia-zhang-86152337/;", "or_profile": "~Wei_Tang16;~Yin-Fang_Yang1;~Zhaofei_Wang1;~Weijia_Zhang2;~Min-Ling_Zhang2", "aff": "Southeast University;Southeast University;Southeast University;University of Newcastle;Southeast University", "aff_domain": "seu.edu.cn;seu.edu.cn;seu.edu.cn;newcastle.edu.au;seu.edu.cn", "position": "PhD student;Research Assistant;MS student;Lecturer;Full Professor", "bibtex": "@inproceedings{\ntang2024multiinstance,\ntitle={Multi-Instance Partial-Label Learning with Margin Adjustment},\nauthor={Wei Tang and Yin-Fang Yang and Zhaofei Wang and Weijia Zhang and Min-Ling Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=NnAi0L5H8J}\n}", "github": "", "reviewers": "BX8M;H7jM;Unv6;b79F;zb3e", "pdf_size": 633277, "rating": "5;5;7;7;7", "confidence": "4;4;4;5;4", "soundness": "2;3;3;3;3", "novelty": "2;2;3;4;3", "presentation": "2;3;3;4;3", "wc_summary": "54;75;130;51;86", "wc_strengths": "28;27;27;59;131", "wc_weaknesses": "207;135;69;169;16", "wc_questions": "1;113;53;2;91", "wc_limitations": "3;1;1;1;1", "wc_review": "293;351;280;282;325", "wc_reply_reviewers": "21;73;0;0;0", "wc_reply_authors": "104;409;62;61;52", "reply_reviewers": "1;1;0;0;0", "reply_authors": "3;3;2;2;2", "rating_avg": [ 6.2, 0.9797958971132712 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 79.2, 28.547504269200136 ], "wc_strengths_avg": [ 54.4, 40.217409165683456 ], "wc_weaknesses_avg": [ 119.2, 68.71506385065796 ], "wc_questions_avg": [ 52.0, 45.48406314303946 ], "wc_limitations_avg": [ 1.4, 0.8000000000000002 ], "wc_review_avg": [ 306.2, 27.592752671670873 ], "wc_reply_reviewers_avg": [ 18.8, 28.29416901059298 ], "wc_reply_authors_avg": [ 137.6, 136.8920742775125 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 2.4, 0.4898979485566356 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.408248290463863, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11923915035172076487&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "seu.edu.cn;seu.edu.cn;seu.edu.cn;newcastle.edu.au;seu.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Southeast University;University of Newcastle", "aff_unique_dep": ";", "aff_unique_url": "https://www.seu.edu.cn/;https://www.newcastle.edu.au", "aff_unique_abbr": "SEU;UON", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "China;Australia" }, { "title": "No-Regret M${}^{\\natural}$-Concave Function Maximization: Stochastic Bandit Algorithms and NP-Hardness of Adversarial Full-Information Setting", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95409", "id": "NnoAj91HZX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=NnoAj91HZX", "openreview": "https://openreview.net/forum?id=NnoAj91HZX", "poster": "/media/PosterPDFs/NeurIPS%202024/95409.png?t=1731303250.3086798", "project": "", "author_site": "Taihei Oki, Shinsaku Sakaue", "tldr": "", "abstract": "M${}^{\\natural}$-concave functions, a.k.a. gross substitute valuation functions, play a fundamental role in many fields, including discrete mathematics and economics. In practice, perfect knowledge of M${}^{\\natural}$-concave functions is often unavailable a priori, and we can optimize them only interactively based on some feedback. Motivated by such situations, we study online M${}^{\\natural}$-concave function maximization problems, which are interactive versions of the problem studied by Murota and Shioura (1999). For the stochastic bandit setting, we present $O(T^{-1/2})$-simple regret and $O(T^{2/3})$-regret algorithms under $T$ times access to unbiased noisy value oracles of M${}^{\\natural}$-concave functions. A key to proving these results is the robustness of the greedy algorithm to local errors in M${}^{\\natural}$-concave function maximization, which is one of our main technical results. While we obtain those positive results for the stochastic setting, another main result of our work is an impossibility in the adversarial setting. We prove that, even with full-information feedback, no algorithms that run in polynomial time per round can achieve $O(T^{1-c})$ regret for any constant $c > 0$ unless $\\mathsf{P} = \\mathsf{NP}$. Our proof is based on a reduction from the matroid intersection problem for three matroids, which would be a novel idea in the context of online learning.", "keywords": "online learning;discrete convex analysis;combinatorial bandit", "primary_area": "online_learning", "supplementary_material": "", "author": "Taihei Oki;Shinsaku Sakaue", "authorids": "~Taihei_Oki1;~Shinsaku_Sakaue1", "gender": "M;M", "homepage": "https://www.opt.mist.i.u-tokyo.ac.jp/~oki/;https://ssakaue.github.io/", "dblp": "185/4435;183/6350", "google_scholar": "o-QxZEYAAAAJ;https://scholar.google.co.jp/citations?user=9oTbrmEAAAAJ", "orcid": "0000-0002-6862-9484;", "linkedin": ";", "or_profile": "~Taihei_Oki1;~Shinsaku_Sakaue1", "aff": "The University of Tokyo;NTT", "aff_domain": "u-tokyo.ac.jp;ntt.co.jp", "position": "Project Research Associate;Researcher", "bibtex": "@inproceedings{\noki2024noregret,\ntitle={No-Regret M\\$\\{\\}{\\textasciicircum}\\{{\\textbackslash}natural\\}\\$-Concave Function Maximization: Stochastic Bandit Algorithms and {NP}-Hardness of Adversarial Full-Information Setting},\nauthor={Taihei Oki and Shinsaku Sakaue},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=NnoAj91HZX}\n}", "github": "", "reviewers": "PdPM;opkE;ugrH;yBbo", "pdf_size": 442068, "rating": "6;6;6;6", "confidence": "4;3;3;4", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "2;3;3;3", "wc_summary": "164;120;57;109", "wc_strengths": "77;229;42;85", "wc_weaknesses": "128;210;10;3", "wc_questions": "106;122;33;108", "wc_limitations": "11;1;1;4", "wc_review": "486;682;143;309", "wc_reply_reviewers": "48;25;24;53", "wc_reply_authors": "0;0;0;116", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 112.5, 38.08214804865923 ], "wc_strengths_avg": [ 108.25, 71.56596607326698 ], "wc_weaknesses_avg": [ 87.75, 86.30288233888831 ], "wc_questions_avg": [ 92.25, 34.758991642451306 ], "wc_limitations_avg": [ 4.25, 4.085033659592048 ], "wc_review_avg": [ 405.0, 200.71746311669048 ], "wc_reply_reviewers_avg": [ 37.5, 13.124404748406688 ], "wc_reply_authors_avg": [ 29.0, 50.22947341949744 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1096701226161042293&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "u-tokyo.ac.jp;ntt.co.jp", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "University of Tokyo;NTT Corporation", "aff_unique_dep": ";", "aff_unique_url": "https://www.u-tokyo.ac.jp;https://www.ntt.co.jp", "aff_unique_abbr": "UTokyo;NTT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Japan" }, { "title": "Occupancy-based Policy Gradient: Estimation, Convergence, and Optimality", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95408", "id": "Nq8enbbaP2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Nq8enbbaP2", "openreview": "https://openreview.net/forum?id=Nq8enbbaP2", "poster": "", "project": "", "author_site": "Audrey Huang, Nan Jiang", "tldr": "", "abstract": "Occupancy functions play an instrumental role in reinforcement learning (RL) for guiding exploration, handling distribution shift, and optimizing general objectives beyond the expected return. Yet, computationally efficient policy optimization methods that use (only) occupancy functions are virtually non-existent. In this paper, we establish the theoretical foundations of model-free policy gradient (PG) methods that compute the gradient through the occupancy for both online and offline RL, without modeling value functions. Our algorithms reduce gradient estimation to squared-loss regression and are computationally oracle-efficient. We characterize the sample complexities of both local and global convergence, accounting for both finite-sample estimation error and the roles of exploration (online) and data coverage (offline). Occupancy-based PG naturally handles arbitrary offline data distributions, and, with one-line algorithmic changes, can be adapted to optimize any differentiable objective functional.", "keywords": "policy gradient;offline reinforcement learning;offline policy gradient", "primary_area": "learning_theory", "supplementary_material": "", "author": "Audrey Huang;Nan Jiang", "authorids": "~Audrey_Huang1;~Nan_Jiang2", "gender": ";M", "homepage": "https://audhuang.github.io/;http://nanjiang.cs.illinois.edu", "dblp": "245/0244;06/4489-8", "google_scholar": "https://scholar.google.com/citations?hl=en;nUlanA8AAAAJ", "orcid": ";", "linkedin": ";nan-jiang-28139937/", "or_profile": "~Audrey_Huang1;~Nan_Jiang2", "aff": "University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign", "aff_domain": "illinois.edu;illinois.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nhuang2024occupancybased,\ntitle={Occupancy-based Policy Gradient: Estimation, Convergence, and Optimality},\nauthor={Audrey Huang and Nan Jiang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Nq8enbbaP2}\n}", "github": "", "reviewers": "2KwR;n45j;K5ET", "pdf_size": 432997, "rating": "3;6;7", "confidence": "2;2;3", "soundness": "3;3;4", "novelty": "2;2;4", "presentation": "3;3;3", "wc_summary": "17;207;130", "wc_strengths": "10;58;90", "wc_weaknesses": "52;76;481", "wc_questions": "4;29;126", "wc_limitations": "4;7;84", "wc_review": "87;377;911", "wc_reply_reviewers": "0;35;35", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;1;1", "reply_authors": "1;1;1", "rating_avg": [ 5.333333333333333, 1.699673171197595 ], "confidence_avg": [ 2.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.9428090415820634 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 118.0, 78.02990879570902 ], "wc_strengths_avg": [ 52.666666666666664, 32.87687468250121 ], "wc_weaknesses_avg": [ 203.0, 196.8197144597055 ], "wc_questions_avg": [ 53.0, 52.61812108643434 ], "wc_limitations_avg": [ 31.666666666666668, 37.025516726831626 ], "wc_review_avg": [ 458.3333333333333, 341.2773391572054 ], "wc_reply_reviewers_avg": [ 23.333333333333332, 16.499158227686106 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.6933752452815364, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16450836756391508744&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": "illinois.edu;illinois.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Illinois Urbana-Champaign", "aff_unique_dep": "", "aff_unique_url": "https://illinois.edu", "aff_unique_abbr": "UIUC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Urbana-Champaign", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "DACO: Towards Application-Driven and Comprehensive Data Analysis via Code Generation", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97721", "id": "NrCPBJSOOc", "proceeding": "", "pdf": "https://openreview.net/pdf?id=NrCPBJSOOc", "openreview": "https://openreview.net/forum?id=NrCPBJSOOc", "poster": "/media/PosterPDFs/NeurIPS%202024/97721.png?t=1730879810.7870657", "project": "", "author_site": "Xueqing Wu, Rui Zheng, Jingzhen Sha, Te-Lin Wu, Hanyu Zhou, Tang Mohan, Kai-Wei Chang, Nanyun Peng, Haoran Huang", "tldr": "", "abstract": "Data analysis is a crucial analytical process essential for deriving insights from real-world databases. As shown in Figure 1, the need for data analysis typically arises from specific application scenarios, and requires diverse reasoning skills including mathematical reasoning, logical reasoning, and strategic reasoning. Existing work often focus on simple factual retrieval or arithmetic resolutions and thus are insufficient for addressing complex real-world queries. This work aims to propose new resources and benchmarks on this crucial yet challenging and under-explored task. Due to the prohibitively high cost of collecting expert annotations, we use large language models (LLMs) enhanced by code generation to automatically generate high-quality data analysis, which will later be refined by human annotators. We construct the **DACO dataset**, containing (1) 440 databases (of tabular data) collected from real-world scenarios, (2) ~2k automatically generated query-answer pairs that can serve as weak supervision for model training, and (3) a concentrated but high-quality test set with human refined annotations that serves as our main evaluation benchmark. Experiments show that while LLMs like GPT-4 exhibit promising data analysis capabilities, they are still evaluated as less helpful than human-written analysis on 58.1% cases. Leveraging our weak supervision data, we experiment with various fine-tuning methods, including supervised fine-tuning (SFT) and reinforcement learning from human feedback (RLHF). Our trained model outperforms existing baselines for table question answering, and RLHF further boosts the helpfulness of generated analysis on 58.5% cases.\nData and code are released at https://github.com/shirley-wu/daco.", "keywords": "data analysis;tabular data;table;code generation", "primary_area": "", "supplementary_material": "/attachment/d0d51675e08731074aaa4f9817327c98b709f952.pdf", "author": "Xueqing Wu;Rui Zheng;Jingzhen Sha;Te-Lin Wu;Hanyu Zhou;Tang Mohan;Kai-Wei Chang;Nanyun Peng;Haoran Huang", "authorids": "~Xueqing_Wu1;~Rui_Zheng1;~Jingzhen_Sha1;~Te-Lin_Wu1;~Hanyu_Zhou3;~Tang_Mohan1;~Kai-Wei_Chang1;~Nanyun_Peng1;~Haoran_Huang1", "gender": "M;F;M;F;M;M;F;M;F", "homepage": "https://github.com/ruizheng20;https://ahaminasha.wixsite.com/eportfolio;https://telin0411.github.io/;;;http://kwchang.net;https://violetpeng.github.io/;;https://shirley-wu.github.io/", "dblp": ";;166/3298;;;18/2428;117/4036;170/8136;243/5831-1.html", "google_scholar": "https://scholar.google.com.hk/citations?user=7Z0V_SoAAAAJ;;Q5aezXQAAAAJ;;;fqDBtzYAAAAJ;XxRXvX0AAAAJ;ZT6jRqwAAAAJ;6iUVrU8AAAAJ", "orcid": ";0009-0002-8546-7621;;;;0000-0001-5365-0072;;;", "linkedin": ";jingzhen-sha-575a4222b/;telinwu/;hanyu-zhou/;mohan-tang-6169a6227/;kai-wei-chang-41239040;;;xueqing-wu-12346b197/", "or_profile": "~Rui_Zheng1;~Jingzhen_Sha1;~Te-Lin_Wu1;~Hanyu_Zhou3;~Tang_Mohan1;~Kai-Wei_Chang1;~Nanyun_Peng1;~Haoran_Huang1;~Xueqing_Wu2", "aff": "Fudan University;University of California, Los Angeles;University of California, Los Angeles;University of California, Los Angeles;UCLA Computer Science Department, University of California, Los Angeles;Amazon;University of California, Los Angeles;Bytedance Inc;University of California, Los Angeles", "aff_domain": "fudan.edu.cn;ucla.edu;cs.ucla.edu;ucla.edu;cs.ucla.edu;amazon.com;ucla.edu;bytedance.com;cs.ucla.edu", "position": "PhD student;Undergrad student;PhD student;Undergrad student;MS student;Researcher;Assistant Professor;Researcher;PhD student", "bibtex": "@inproceedings{\nwu2024daco,\ntitle={{DACO}: Towards Application-Driven and Comprehensive Data Analysis via Code Generation},\nauthor={Xueqing Wu and Rui Zheng and Jingzhen Sha and Te-Lin Wu and Hanyu Zhou and Tang Mohan and Kai-Wei Chang and Nanyun Peng and Haoran Huang},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=NrCPBJSOOc}\n}", "github": "", "reviewers": "dx7U;M7Gf;wqLD", "pdf_size": 8612111, "rating": "6;6;6", "confidence": "4;4;4", "wc_summary_and_contributions": "99;37;105", "wc_strengths": "38;55;69", "wc_improvement": "113;19;73", "wc_limitations": "1;4;86", "wc_correctness": "41;1;30", "wc_clarity": "1;5;6", "wc_relation_to_prior_work": "6;11;11", "wc_documentation": "1;1;17", "wc_additional_feedback": "1;1;1", "wc_review": "301;134;398", "wc_reply_reviewers": "21;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;0;0", "reply_authors": "1;1;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.0, 0.0 ], "wc_summary_and_contributions_avg": [ 80.33333333333333, 30.739045022396013 ], "wc_strengths_avg": [ 54.0, 12.675435561221029 ], "wc_improvement_avg": [ 68.33333333333333, 38.5169515350781 ], "wc_limitations_avg": [ 30.333333333333332, 39.38132665222045 ], "wc_correctness_avg": [ 24.0, 16.87206764645835 ], "wc_clarity_avg": [ 4.0, 2.160246899469287 ], "wc_relation_to_prior_work_avg": [ 9.333333333333334, 2.357022603955158 ], "wc_documentation_avg": [ 6.333333333333333, 7.542472332656507 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 277.6666666666667, 109.03312442658068 ], "wc_reply_reviewers_avg": [ 7.0, 9.899494936611665 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16275809240724552984&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "fudan.edu.cn;ucla.edu;cs.ucla.edu;ucla.edu;cs.ucla.edu;amazon.com;ucla.edu;bytedance.com;cs.ucla.edu", "author_num": 9, "aff_unique_index": "0;1;1;1;1;2;1;3;1", "aff_unique_norm": "Fudan University;University of California, Los Angeles;Amazon;ByteDance", "aff_unique_dep": ";;Amazon.com, Inc.;", "aff_unique_url": "https://www.fudan.edu.cn;https://www.ucla.edu;https://www.amazon.com;https://www.bytedance.com", "aff_unique_abbr": "Fudan;UCLA;Amazon;Bytedance", "aff_campus_unique_index": "1;1;1;1;1;1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;1;1;1;1;1;1;0;1", "aff_country_unique": "China;United States" }, { "title": "ANAH-v2: Scaling Analytical Hallucination Annotation of Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95407", "id": "NrwASKGm7A", "proceeding": "", "pdf": "https://openreview.net/pdf?id=NrwASKGm7A", "openreview": "https://openreview.net/forum?id=NrwASKGm7A", "poster": "/media/PosterPDFs/NeurIPS%202024/95407.png?t=1731161160.996381", "project": "", "author_site": "Yuzhe Gu, Ziwei Ji, Wenwei Zhang, Chengqi Lyu, Dahua Lin, Kai Chen", "tldr": "", "abstract": "Large language models (LLMs) exhibit hallucinations in long-form question-answering tasks across various domains and wide applications. Current hallucination detection and mitigation datasets are limited in domain and size, which struggle to scale due to prohibitive labor costs and insufficient reliability of existing hallucination annotators. To facilitate the scalable oversight of LLM hallucinations, this paper introduces an iterative self-training framework that simultaneously and progressively scales up the annotation dataset and improves the accuracy of the annotator. Based on the Expectation Maximization algorithm, in each iteration, the framework first applies an automatic hallucination annotation pipeline for a scaled dataset and then trains a more accurate annotator on the dataset. This new annotator is adopted in the annotation pipeline for the next iteration. Extensive experimental results demonstrate that the finally obtained hallucination annotator with only 7B parameters surpasses GPT-4 and obtains new state-of-the-art hallucination detection results on HaluEval and HalluQA by zero-shot inference. Such an annotator can not only evaluate the hallucination levels of various LLMs on the large-scale dataset but also help to mitigate the hallucination of LLMs generations, with the Natural Language Inference metric increasing from 25% to 37% on HaluEval.", "keywords": "Hallucination;Large Language Model;Fine-grained Annotation;Dataset", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Yuzhe Gu;Ziwei Ji;Wenwei Zhang;Chengqi Lyu;Dahua Lin;Kai Chen", "authorids": "~Yuzhe_Gu1;~Ziwei_Ji2;~Wenwei_Zhang1;~Chengqi_Lyu1;~Dahua_Lin1;~Kai_Chen4", "gender": ";F;M;M;M;M", "homepage": "http://guyuzhe.site/;https://ziweiji.github.io/;https://zhangwenwei.cn;;http://dahua.site;https://chenkai.site/", "dblp": ";176/4574;;319/5244;53/6088;181/2839-26", "google_scholar": "NaiWQ5oAAAAJ;oSnZ9mMAAAAJ;QDXADSEAAAAJ;https://scholar.google.com/citations?view_op=list_works;GMzzRRUAAAAJ;https://scholar.google.com.hk/citations?user=eGD0b7IAAAAJ", "orcid": ";0000-0002-0206-7861;0000-0002-2748-4514;;;0000-0002-6820-2325", "linkedin": ";ziwei-ji-a516b91a7/;wenweizhang-b9769a124/;;;", "or_profile": "~Yuzhe_Gu1;~Ziwei_Ji2;~Wenwei_Zhang1;~Chengqi_Lyu1;~Dahua_Lin1;~Kai_Chen4", "aff": "Wuhan University;Hong Kong University of Science and Technology;Shanghai AI Laboratory;Shanghai AI Laboratory;The Chinese University of Hong Kong;Shanghai AI Laboratory", "aff_domain": "whu.edu.cn;ust.hk;pjlab.org.cn;pjlab.org.cn;cuhk.edu.hk;pjlab.org.cn", "position": "Undergrad student;PhD student;Researcher;Researcher;Associate Professor;Researcher", "bibtex": "@inproceedings{\ngu2024anahv,\ntitle={{ANAH}-v2: Scaling Analytical Hallucination Annotation of Large Language Models},\nauthor={Yuzhe Gu and Ziwei Ji and Wenwei Zhang and Chengqi Lyu and Dahua Lin and Kai Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=NrwASKGm7A}\n}", "github": "", "reviewers": "5wgD;i3ot;4rBJ;nXFU", "pdf_size": 1044437, "rating": "5;6;6;8", "confidence": "4;4;3;4", "soundness": "2;3;3;4", "novelty": "3;3;3;4", "presentation": "3;4;3;4", "wc_summary": "73;72;130;73", "wc_strengths": "73;118;92;110", "wc_weaknesses": "35;115;122;15", "wc_questions": "72;111;2;13", "wc_limitations": "1;29;1;5", "wc_review": "254;445;347;216", "wc_reply_reviewers": "0;0;43;14", "wc_reply_authors": "0;93;39;37", "reply_reviewers": "0;0;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 87.0, 24.829418035870273 ], "wc_strengths_avg": [ 98.25, 17.354754391808605 ], "wc_weaknesses_avg": [ 71.75, 47.34646238104807 ], "wc_questions_avg": [ 49.5, 44.37623237725348 ], "wc_limitations_avg": [ 9.0, 11.661903789690601 ], "wc_review_avg": [ 315.5, 88.66369042623931 ], "wc_reply_reviewers_avg": [ 14.25, 17.55526986406076 ], "wc_reply_authors_avg": [ 42.25, 33.16153645415122 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.13245323570650439, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5125459128037141224&as_sdt=5,24&sciodt=0,24&hl=en", "gs_version_total": 4, "email": "whu.edu.cn;ust.hk;pjlab.org.cn;pjlab.org.cn;cuhk.edu.hk;pjlab.org.cn", "author_num": 6, "aff_unique_index": "0;1;2;2;3;2", "aff_unique_norm": "Wuhan University;Hong Kong University of Science and Technology;Shanghai AI Laboratory;Chinese University of Hong Kong", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.whu.edu.cn/;https://www.ust.hk;https://www.shanghai-ai-lab.com;https://www.cuhk.edu.hk", "aff_unique_abbr": "WHU;HKUST;SAIL;CUHK", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "GaussianCut: Interactive segmentation via graph cut for 3D Gaussian Splatting", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95406", "id": "Ns0LQokxa5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Ns0LQokxa5", "openreview": "https://openreview.net/forum?id=Ns0LQokxa5", "poster": "/media/PosterPDFs/NeurIPS%202024/95406.png?t=1733983999.2776697", "project": "", "author_site": "Umangi Jain, Ashkan Mirzaei, Igor Gilitschenski", "tldr": "", "abstract": "We introduce GaussianCut, a new method for interactive multiview segmentation of scenes represented as 3D Gaussians. Our approach allows for selecting the objects to be segmented by interacting with a single view. It accepts intuitive user input, such as point clicks, coarse scribbles, or text. Using 3D Gaussian Splatting (3DGS) as the underlying scene representation simplifies the extraction of objects of interest which are considered to be a subset of the scene's Gaussians. Our key idea is to represent the scene as a graph and use the graph-cut algorithm to minimize an energy function to effectively partition the Gaussians into foreground and background. To achieve this, we construct a graph based on scene Gaussians and devise a segmentation-aligned energy function on the graph to combine user inputs with scene properties. To obtain an initial coarse segmentation, we leverage 2D image/video segmentation models and further refine these coarse estimates using our graph construction. Our empirical evaluations show the adaptability of GaussianCut across a diverse set of scenes. GaussianCut achieves competitive performance with state-of-the-art approaches for 3D segmentation without requiring any additional segmentation-aware training", "keywords": "3D Vision;Segmentation;Graph cut;Gaussian Splatting", "primary_area": "machine_vision", "supplementary_material": "", "author": "Umangi Jain;Ashkan Mirzaei;Igor Gilitschenski", "authorids": "~Umangi_Jain2;~Ashkan_Mirzaei1;~Igor_Gilitschenski1", "gender": "F;M;M", "homepage": "https://umangi-jain.github.io/;https://ashmrz.github.io/;https://www.gilitschenski.org/igor", "dblp": ";323/8457;129/1281", "google_scholar": ";z8GwuTgAAAAJ;Nuw1Y4oAAAAJ", "orcid": ";;", "linkedin": "umangi-jain-b57158137/;ashkan-mirzaei-6b9651145/;igorgilitschenski/", "or_profile": "~Umangi_Jain2;~Ashkan_Mirzaei1;~Igor_Gilitschenski1", "aff": "Samsung;NVIDIA;University of Toronto", "aff_domain": "samsung.com;nvidia.com;toronto.edu", "position": "Intern;Intern;Assistant Professor", "bibtex": "@inproceedings{\njain2024gaussiancut,\ntitle={GaussianCut: Interactive segmentation via graph cut for 3D Gaussian Splatting},\nauthor={Umangi Jain and Ashkan Mirzaei and Igor Gilitschenski},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Ns0LQokxa5}\n}", "github": "", "reviewers": "DeYw;Qza2;24kC;BR3k;MadC", "pdf_size": 16909746, "rating": "4;4;6;6;8", "confidence": "5;5;3;4;3", "soundness": "2;3;3;3;3", "novelty": "2;2;2;2;3", "presentation": "3;3;3;3;3", "wc_summary": "77;95;104;106;165", "wc_strengths": "73;37;67;37;11", "wc_weaknesses": "261;277;372;159;117", "wc_questions": "45;32;163;25;16", "wc_limitations": "7;5;1;7;3", "wc_review": "463;446;707;334;312", "wc_reply_reviewers": "86;0;200;149;0", "wc_reply_authors": "246;0;13;628;0", "reply_reviewers": "1;0;1;2;0", "reply_authors": "2;1;2;3;1", "rating_avg": [ 5.6, 1.4966629547095764 ], "confidence_avg": [ 4.0, 0.8944271909999159 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 109.4, 29.62836478781777 ], "wc_strengths_avg": [ 45.0, 22.5920339943087 ], "wc_weaknesses_avg": [ 237.2, 90.42654477530368 ], "wc_questions_avg": [ 56.2, 54.23430648583975 ], "wc_limitations_avg": [ 4.6, 2.33238075793812 ], "wc_review_avg": [ 452.4, 140.50281135977315 ], "wc_reply_reviewers_avg": [ 87.0, 79.68939703624315 ], "wc_reply_authors_avg": [ 177.4, 244.014425803066 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8964214570007952, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6743039753444393687&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "samsung.com;nvidia.com;toronto.edu", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Samsung;NVIDIA;University of Toronto", "aff_unique_dep": "Samsung;NVIDIA Corporation;", "aff_unique_url": "https://www.samsung.com;https://www.nvidia.com;https://www.utoronto.ca", "aff_unique_abbr": "Samsung;NVIDIA;U of T", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2", "aff_country_unique": "South Korea;United States;Canada" }, { "title": "Motion Consistency Model: Accelerating Video Diffusion with Disentangled Motion-Appearance Distillation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95405", "id": "NsqxN9iOJ7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=NsqxN9iOJ7", "openreview": "https://openreview.net/forum?id=NsqxN9iOJ7", "poster": "/media/PosterPDFs/NeurIPS%202024/95405.png?t=1731707649.681373", "project": "", "author_site": "Yuanhao Zhai, Kevin Lin, Zhengyuan Yang, Linjie Li, Jianfeng Wang, Chung-Ching Lin, DAVID DOERMANN, Junsong Yuan, Lijuan Wang", "tldr": "", "abstract": "Image diffusion distillation achieves high-fidelity generation with very few sampling steps. However, directly applying these techniques to video models results in unsatisfied frame quality. This issue arises from the limited frame appearance quality in public video datasets, affecting the performance of both teacher and student video diffusion models. Our study aims to improve video diffusion distillation and meanwhile enabling the student model to improve frame appearance using the abundant high-quality image data. To this end, we propose motion consistency models (MCM), a single-stage video diffusion distillation method that disentangles motion and appearance learning. Specifically, MCM involves a video consistency model that distills motion from the video teacher model, and an image discriminator that boosts frame appearance to match high-quality image data. However, directly combining these components leads to two significant challenges: a conflict in frame learning objectives, where video distillation learns from low-quality video frames while the image discriminator targets high-quality images, and training-inference discrepancies due to the differing quality of video samples used during training and inference. To address these challenges, we introduce disentangled motion distillation and mixed trajectory distillation. The former applies the distillation objective solely to the motion representation, while the latter mitigates training-inference discrepancies by mixing distillation trajectories from both the low- and high-quality video domains. Extensive experiments show that our MCM achieves state-of-the-art video diffusion distillation performance. Additionally, our method can enhance frame quality in video diffusion models, producing frames with high aesthetic value or specific styles.", "keywords": "consistency distillation;video diffusion models;diffusion distillation;text-to-video generation", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/4ee3820a1904de0a051061194d8bda86386f7974.zip", "author": "Yuanhao Zhai;Kevin Lin;Zhengyuan Yang;Linjie Li;Jianfeng Wang;Chung-Ching Lin;David Doermann;Junsong Yuan;Lijuan Wang", "authorids": "~Yuanhao_Zhai1;~Kevin_Lin3;~Zhengyuan_Yang1;~Linjie_Li1;~Jianfeng_Wang4;~Chung-Ching_Lin2;~David_Doermann2;~Junsong_Yuan2;~Lijuan_Wang1", "gender": "M;;M;F;M;M;M;F;", "homepage": "https://www.yhzhai.com;https://sites.google.com/site/kevinlin311tw/;http://zhengyuan.info/;;;https://cse.buffalo.edu/~doermann/;https://cse.buffalo.edu/~jsyuan/;https://www.microsoft.com/en-us/research/people/lijuanw/;", "dblp": "22/11135-1;;163/9713;200/8256;;;42/3332;51/2527.html;37/8616", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=LKSy1kwAAAAJ;https://scholar.google.com/citations?hl=zh-CN;WR875gYAAAAJ;vJWEw_8AAAAJ;RoGOW9AAAAAJ;fJ7seq0AAAAJ;cDcWXuIAAAAJ;legkbM0AAAAJ", "orcid": "0000-0002-3277-3329;0000-0001-8944-1336;;;;0000-0003-1639-4561;;;", "linkedin": "yuanhao-zhai-895518161/;;;;;david-doermann-bb7757/;;;", "or_profile": "~Yuanhao_Zhai1;~Kevin_Lin3;~Zhengyuan_Yang1;~Linjie_Li1;~Jianfeng_Wang4;~David_Doermann2;~Junsong_Yuan2;~Lijuan_Wang1;~Chung-ching_Lin1", "aff": "State University of New York at Buffalo;Microsoft;Microsoft;Microsoft;Microsoft;State University of New York at Buffalo;State University of New York at Buffalo;Microsoft;Microsoft", "aff_domain": "buffalo.edu;microsoft.com;microsoft.com;microsoft.com;microsoft.com;buffalo.edu;buffalo.edu;microsoft.com;microsoft.com", "position": "PhD student;Principal Researcher;Researcher;Researcher;Principal Researcher;Full Professor;Full Professor;Principal Researcher;Principal\u00a0Researcher", "bibtex": "@inproceedings{\nzhai2024motion,\ntitle={Motion Consistency Model: Accelerating Video Diffusion with Disentangled Motion-Appearance Distillation},\nauthor={Yuanhao Zhai and Kevin Lin and Zhengyuan Yang and Linjie Li and Jianfeng Wang and Chung-Ching Lin and David Doermann and Junsong Yuan and Lijuan Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=NsqxN9iOJ7}\n}", "github": "", "reviewers": "VLKW;sR1C;FQyx", "pdf_size": 6502048, "rating": "5;6;7", "confidence": "5;3;3", "soundness": "3;3;4", "novelty": "2;4;3", "presentation": "3;3;3", "wc_summary": "35;76;55", "wc_strengths": "47;33;61", "wc_weaknesses": "77;18;64", "wc_questions": "5;21;14", "wc_limitations": "10;17;9", "wc_review": "174;165;203", "wc_reply_reviewers": "38;10;10", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 55.333333333333336, 16.73983937265296 ], "wc_strengths_avg": [ 47.0, 11.430952132988164 ], "wc_weaknesses_avg": [ 53.0, 25.311394008759507 ], "wc_questions_avg": [ 13.333333333333334, 6.548960901462833 ], "wc_limitations_avg": [ 12.0, 3.559026084010437 ], "wc_review_avg": [ 180.66666666666666, 16.21384867602041 ], "wc_reply_reviewers_avg": [ 19.333333333333332, 13.199326582148888 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.8660254037844387, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6806336259443830522&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "buffalo.edu;microsoft.com;microsoft.com;microsoft.com;microsoft.com;buffalo.edu;buffalo.edu;microsoft.com;microsoft.com", "author_num": 9, "aff_unique_index": "0;1;1;1;1;0;0;1;1", "aff_unique_norm": "State University of New York at Buffalo;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.buffalo.edu;https://www.microsoft.com", "aff_unique_abbr": "SUNY Buffalo;Microsoft", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Buffalo;", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Seeing the Image: Prioritizing Visual Correlation by Contrastive Alignment", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95404", "id": "NsxthTVpqA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=NsxthTVpqA", "openreview": "https://openreview.net/forum?id=NsxthTVpqA", "poster": "/media/PosterPDFs/NeurIPS%202024/95404.png?t=1730641285.9150648", "project": "", "author_site": "Xin Xiao, Bohong Wu, Jiacong Wang, Chunyuan Li, zhou Xun, Haoyuan Guo", "tldr": "", "abstract": "Existing image-text modality alignment in Vision Language Models (VLMs) treats each text token equally in an autoregressive manner. Despite being simple and effective, this method results in sub-optimal cross-modal alignment by over-emphasizing the text tokens that are less correlated with or even contradictory with the input images. In this paper, we advocate for distinct contributions for each text token based on its visual correlation. Specifically, we present by contrasting image inputs, the difference in prediction logits on each text token provides strong guidance of visual correlation. We therefore introduce Contrastive Alignment (CAL), a simple yet effective re-weighting strategy that prioritizes training visually correlated tokens. Our experimental results demonstrate that CAL consistently improves different types of VLMs across different resolutions and model sizes on various benchmark datasets. Importantly, our method incurs minimal additional computational overhead, rendering it highly efficient compared to alternative data scaling strategies.", "keywords": "Vision Language Model;Contrastive Alignment;Selective Token Re-weighting", "primary_area": "generative_models", "supplementary_material": "", "author": "Xin Xiao;Bohong Wu;Jiacong Wang;Chunyuan Li;zhou Xun;Haoyuan Guo", "authorids": "~Xin_Xiao4;~Bohong_Wu1;~Jiacong_Wang1;~Chunyuan_Li1;~zhou_Xun2;~Haoyuan_Guo1", "gender": ";;M;;;", "homepage": ";;;http://chunyuan.li/;;", "dblp": ";;62/2575;64/9590;;", "google_scholar": ";;rzYgLkgAAAAJ;Zd7WmXUAAAAJ;;", "orcid": ";;0009-0001-8719-0614;;;", "linkedin": ";;;;;", "or_profile": "~Xin_Xiao4;~Bohong_Wu1;~Jiacong_Wang1;~Chunyuan_Li1;~zhou_Xun2;~Haoyuan_Guo1", "aff": ";;University of Chinese Academy of Sciences;Microsoft Research;;", "aff_domain": ";;ucas.ac.cn;microsoft.com;;", "position": ";;PhD student;Principal Researcher;;", "bibtex": "@inproceedings{\nxiao2024seeing,\ntitle={Seeing the Image: Prioritizing Visual Correlation by Contrastive Alignment},\nauthor={Xin Xiao and Bohong Wu and Jiacong Wang and Chunyuan Li and zhou Xun and Haoyuan Guo},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=NsxthTVpqA}\n}", "github": "", "reviewers": "mF2S;ewhK;DwVB;tsLt", "pdf_size": 10314373, "rating": "5;6;6;7", "confidence": "4;4;3;4", "soundness": "3;2;2;4", "novelty": "2;2;2;4", "presentation": "1;3;3;3", "wc_summary": "65;61;79;43", "wc_strengths": "74;89;30;46", "wc_weaknesses": "343;194;161;4", "wc_questions": "111;101;3;75", "wc_limitations": "2;1;12;1", "wc_review": "595;446;285;169", "wc_reply_reviewers": "239;152;11;49", "wc_reply_authors": "159;96;22;29", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 62.0, 12.84523257866513 ], "wc_strengths_avg": [ 59.75, 23.09085316743407 ], "wc_weaknesses_avg": [ 175.5, 120.43774325351667 ], "wc_questions_avg": [ 72.5, 42.22262426709169 ], "wc_limitations_avg": [ 4.0, 4.636809247747852 ], "wc_review_avg": [ 373.75, 161.2224782714867 ], "wc_reply_reviewers_avg": [ 112.75, 89.29830625493409 ], "wc_reply_authors_avg": [ 76.5, 55.7068218443666 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7900358407609858452&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": ";;ucas.ac.cn;microsoft.com;;", "author_num": 6, "aff_unique_index": "0;1", "aff_unique_norm": "University of Chinese Academy of Sciences;Microsoft", "aff_unique_dep": ";Microsoft Research", "aff_unique_url": "http://www.ucas.ac.cn;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "UCAS;MSR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "China;United States" }, { "title": "Breaking Semantic Artifacts for Generalized AI-generated Image Detection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95403", "id": "NtNTfRTjE8", "proceeding": "", "pdf": "https://openreview.net/pdf?id=NtNTfRTjE8", "openreview": "https://openreview.net/forum?id=NtNTfRTjE8", "poster": "/media/PosterPDFs/NeurIPS%202024/95403.png?t=1731727531.1892638", "project": "", "author_site": "Chende Zheng, Chenhao Lin, Zhengyu Zhao, Hang Wang, Xu Guo, Shuai Liu, Chao Shen", "tldr": "", "abstract": "With the continuous evolution of AI-generated images, the generalized detection of them has become a crucial aspect of AI security. \nExisting detectors have focused on cross-generator generalization, while it remains unexplored whether these detectors can generalize across different image scenes, e.g., images from different datasets with different semantics. In this paper, we reveal that existing detectors suffer from substantial Accuracy drops in such cross-scene generalization. In particular, we attribute their failures to ''semantic artifacts'' in both real and generated images, to which detectors may overfit. To break such ''semantic artifacts'', we propose a simple yet effective approach based on conducting an image patch shuffle and then training an end-to-end patch-based classifier. We conduct a comprehensive open-world evaluation on 31 test sets, covering 7 Generative Adversarial Networks, 18 (variants of) Diffusion Models, and another 6 CNN-based generative models. The results demonstrate that our approach outperforms previous approaches by 2.08\\% (absolute) on average regarding cross-scene detection Accuracy. We also notice the superiority of our approach in open-world generalization, with an average Accuracy improvement of 10.59\\% (absolute) across all test sets. Our code is available at *https://github.com/Zig-HS/FakeImageDetection*.", "keywords": "AI Security; Deepfake Detection; AI-generated Image; Diffusion Model;", "primary_area": "other", "supplementary_material": "", "author": "Chende Zheng;Chenhao Lin;Zhengyu Zhao;Hang Wang;Xu Guo;Shuai Liu;Chao Shen", "authorids": "~Chende_Zheng1;~Chenhao_Lin1;~Zhengyu_Zhao1;~Hang_Wang8;~Xu_Guo4;~Shuai_Liu18;~Chao_Shen2", "gender": "M;M;M;;M;F;M", "homepage": "https://github.com/Zig-HS;;https://zhengyuzhao.github.io/;;https://github.com/CarrotZelda;https://gr.xjtu.edu.cn/en/web/sh_liu;http://gr.xjtu.edu.cn/web/cshen", "dblp": ";198/9470;58/10770-1;;;;48/4825-1", "google_scholar": ";YK0G990AAAAJ;pC8KpPMAAAAJ;;;;m6QY7-wAAAAJ", "orcid": ";;;;;;0000-0002-6959-0569", "linkedin": ";;;;;;", "or_profile": "~Chende_Zheng1;~Chenhao_Lin1;~Zhengyu_Zhao1;~Hang_Wang8;~Xu_Guo4;~Shuai_Liu18;~Chao_Shen2", "aff": "Xi'an Jiaotong University;Xi'an Jiaotong University;Xi'an Jiaotong University;;Xi'an Jiaotong University;Xi'an Jiaotong University;Xi\u2019an Jiaotong University", "aff_domain": "xjtu.edu;xjtu.edu.cn;xjtu.edu.cn;;xjtu.edu.cn;xjtu.edu.cn;xjtu.edu.cn", "position": "PhD student;Full Professor;Researcher;;MS student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nzheng2024breaking,\ntitle={Breaking Semantic Artifacts for Generalized {AI}-generated Image Detection},\nauthor={Chende Zheng and Chenhao Lin and Zhengyu Zhao and Hang Wang and Xu Guo and Shuai Liu and Chao Shen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=NtNTfRTjE8}\n}", "github": "", "reviewers": "iKNb;4wrZ;Zwma;a5zv", "pdf_size": 21838819, "rating": "4;4;6;6", "confidence": "5;5;5;4", "soundness": "2;3;3;2", "novelty": "2;3;3;3", "presentation": "2;3;3;2", "wc_summary": "119;42;92;98", "wc_strengths": "40;45;106;137", "wc_weaknesses": "201;30;227;146", "wc_questions": "27;2;69;90", "wc_limitations": "63;2;1;1", "wc_review": "450;121;495;472", "wc_reply_reviewers": "0;0;38;7", "wc_reply_authors": "37;37;23;18", "reply_reviewers": "0;0;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.0, 1.0 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 87.75, 28.252212302756046 ], "wc_strengths_avg": [ 82.0, 41.030476477857285 ], "wc_weaknesses_avg": [ 151.0, 75.7330839197771 ], "wc_questions_avg": [ 47.0, 34.489128721961066 ], "wc_limitations_avg": [ 16.75, 26.705570579937064 ], "wc_review_avg": [ 384.5, 152.9615964874844 ], "wc_reply_reviewers_avg": [ 11.25, 15.706288549495072 ], "wc_reply_authors_avg": [ 28.75, 8.437268515343103 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16072094454094466660&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "xjtu.edu;xjtu.edu.cn;xjtu.edu.cn;;xjtu.edu.cn;xjtu.edu.cn;xjtu.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Xi'an Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.xjtu.edu.cn", "aff_unique_abbr": "XJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Streaming Bayes GFlowNets", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95402", "id": "Nv0Vvz588D", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Nv0Vvz588D", "openreview": "https://openreview.net/forum?id=Nv0Vvz588D", "poster": "/media/PosterPDFs/NeurIPS%202024/95402.png?t=1733184099.6347404", "project": "", "author_site": "Tiago Silva, Daniel Augusto de Souza, Diego Mesquita", "tldr": "", "abstract": "Bayes' rule naturally allows for inference refinement in a streaming fashion, without the need to recompute posteriors from scratch whenever new data arrives. In principle, Bayesian streaming is straightforward: we update our prior with the available data and use the resulting posterior as a prior when processing the next data chunk. In practice, however, this recipe entails i) approximating an intractable posterior at each time step; and ii) encapsulating results appropriately to allow for posterior propagation. For continuous state spaces, variational inference (VI) is particularly convenient due to its scalability and the tractability of variational posteriors, For discrete state spaces, however, state-of-the-art VI results in analytically intractable approximations that are ill-suited for streaming settings. To enable streaming Bayesian inference over discrete parameter spaces, we propose streaming Bayes GFlowNets (abbreviated as SB-GFlowNets) by leveraging the recently proposed GFlowNets --- a powerful class of amortized samplers for discrete compositional objects. Notably, SB-GFlowNet approximates the initial posterior using a standard GFlowNet and subsequently updates it using a tailored procedure that requires only the newly observed data. Our case studies in linear preference learning and phylogenetic inference showcase the effectiveness of SB-GFlowNets in sampling from an unnormalized posterior in a streaming setting. As expected, we also observe that SB-GFlowNets is significantly faster than repeatedly training a GFlowNet from scratch to sample from the full posterior.", "keywords": "GFlowNets;Streaming VI", "primary_area": "probabilistic_methods", "supplementary_material": "/attachment/53a066b3e6fe41ab0f9655f7e1044857889b02ea.zip", "author": "Tiago Silva;Daniel Augusto de Souza;Diego Mesquita", "authorids": "~Tiago_Silva4;~Daniel_Augusto_de_Souza1;~Diego_Mesquita1", "gender": "M;M;M", "homepage": "https://github.com/tiagodsilva;https://weakly-informative.github.io;https://spectral.space/", "dblp": ";163/4293;244/1958", "google_scholar": ";;WbownOEAAAAJ", "orcid": ";;0000-0002-4721-2401", "linkedin": ";;ramosmd/", "or_profile": "~Tiago_Silva4;~Diego_Mesquita1;~Daniel_Augusto_Ramos_Macedo_Antunes_de_Souza1", "aff": "Escola de Matem\u00e1tica Aplicada;Getulio Vargas Foundation;University College London", "aff_domain": "fgv.br;fgv.br;cs.ucl.ac.uk", "position": "PhD student;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nsilva2024streaming,\ntitle={Streaming Bayes {GF}lowNets},\nauthor={Tiago Silva and Daniel Augusto de Souza and Diego Mesquita},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Nv0Vvz588D}\n}", "github": "", "reviewers": "c2ea;w9BG;bMoM;ZbQa;xCCn", "pdf_size": 1342949, "rating": "6;6;7;7;7", "confidence": "4;4;4;4;3", "soundness": "3;3;4;3;4", "novelty": "3;4;3;3;3", "presentation": "2;3;3;3;3", "wc_summary": "118;69;79;184;145", "wc_strengths": "84;61;27;51;45", "wc_weaknesses": "429;75;178;199;44", "wc_questions": "104;53;95;2;6", "wc_limitations": "13;7;22;23;5", "wc_review": "748;265;401;459;245", "wc_reply_reviewers": "112;12;7;12;29", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.6, 0.48989794855663565 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 119.0, 42.43112065453846 ], "wc_strengths_avg": [ 53.6, 18.8 ], "wc_weaknesses_avg": [ 185.0, 135.47103011345268 ], "wc_questions_avg": [ 52.0, 42.82522621072771 ], "wc_limitations_avg": [ 14.0, 7.429670248402684 ], "wc_review_avg": [ 423.6, 181.1359710272921 ], "wc_reply_reviewers_avg": [ 34.4, 39.50999873449757 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.408248290463863, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Oumy-kYjV0YJ:scholar.google.com/&scioq=Streaming+Bayes+GFlowNets&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": "fgv.br;fgv.br;cs.ucl.ac.uk", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Escola de Matem\u00e1tica Aplicada;Getulio Vargas Foundation;University College London", "aff_unique_dep": "Escola de Matem\u00e1tica Aplicada;;", "aff_unique_url": ";https://fgv.br;https://www.ucl.ac.uk", "aff_unique_abbr": ";FGV;UCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Brazil;United Kingdom" }, { "title": "Reinforcement Learning with Euclidean Data Augmentation for State-Based Continuous Control", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95401", "id": "NwiFLtWGEg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=NwiFLtWGEg", "openreview": "https://openreview.net/forum?id=NwiFLtWGEg", "poster": "/media/PosterPDFs/NeurIPS%202024/95401.png?t=1731615011.0001311", "project": "", "author_site": "Jinzhu Luo, Dingyang Chen, Qi Zhang", "tldr": "", "abstract": "Data augmentation creates new data points by transforming the original ones for an reinforcement learning (RL) agent to learn from, which has been shown to be effective for the objective of improving data efficiency of RL for continuous control. Prior work towards this objective has been largely restricted to perturbation-based data augmentation where new data points are created by perturbing the original ones,\nwhich has been impressively effective for tasks where the RL agent observe control states as images with perturbations including random cropping, shifting, etc. This work focuses on state-based control, where the RL agent can directly observe raw kinematic and task features, and considers an alternative data augmentation applied to these features based on Euclidean symmetries under transformations like rotations. We show that the default state features used in exiting benchmark tasks that are based on joint configurations are not amenable to Euclidean transformations. We therefore advocate using state features based on configurations of the limbs (i.e., rigid bodies connected by joints) that instead provides rich augmented data under Euclidean transformations. With minimal hyperparameter tuning, we show this new Euclidean data augmentation strategy significantly improve both data efficiency and asymptotic performance of RL on a wide range of continuous control tasks.", "keywords": "Reinforcement learning;Data augmentation;Continuous control;Euclidean symmetry", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/910bb35b04d6d6cd64edebdf5cb8080bb8182adf.zip", "author": "Jinzhu Luo;Dingyang Chen;Qi Zhang", "authorids": "~Jinzhu_Luo1;~Dingyang_Chen1;~Qi_Zhang12", "gender": "M;M;M", "homepage": "https://www.linkedin.com/in/jinzhu-luo-782b30208/;https://dchen48.github.io/;https://qizhg.github.io/", "dblp": ";212/7542-1.html;https://dblp.uni-trier.de/pers/hd/z/Zhang_0038:Qi", "google_scholar": ";vSdOGREAAAAJ;wJNQVS0AAAAJ", "orcid": ";;", "linkedin": ";dingyang-chen-97512712a/;", "or_profile": "~Jinzhu_Luo1;~Dingyang_Chen1;~Qi_Zhang12", "aff": "University of South Carolina;University of South Carolina;University of South Carolina", "aff_domain": "sc.edu;sc.edu;sc.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nluo2024reinforcement,\ntitle={Reinforcement Learning with Euclidean Data Augmentation for State-Based Continuous Control},\nauthor={Jinzhu Luo and Dingyang Chen and Qi Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=NwiFLtWGEg}\n}", "github": "", "reviewers": "X7ZX;5Bxo;rWGW;aR8o", "pdf_size": 2601372, "rating": "4;5;5;5", "confidence": "4;5;2;4", "soundness": "2;2;3;3", "novelty": "1;2;2;3", "presentation": "2;3;2;3", "wc_summary": "75;100;61;76", "wc_strengths": "61;96;35;68", "wc_weaknesses": "297;151;60;170", "wc_questions": "38;137;31;56", "wc_limitations": "134;51;26;7", "wc_review": "605;535;213;377", "wc_reply_reviewers": "209;521;31;13", "wc_reply_authors": "44;887;2;37", "reply_reviewers": "1;4;1;1", "reply_authors": "3;7;2;3", "rating_avg": [ 4.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 78.0, 14.017845768876187 ], "wc_strengths_avg": [ 65.0, 21.714050750608465 ], "wc_weaknesses_avg": [ 169.5, 84.54140997168193 ], "wc_questions_avg": [ 65.5, 42.27587964785594 ], "wc_limitations_avg": [ 54.5, 48.479377058704046 ], "wc_review_avg": [ 432.5, 151.26384234178371 ], "wc_reply_reviewers_avg": [ 193.5, 204.01164182467627 ], "wc_reply_authors_avg": [ 242.5, 372.44227740684863 ], "reply_reviewers_avg": [ 1.75, 1.299038105676658 ], "reply_authors_avg": [ 3.75, 1.920286436967152 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.13245323570650439, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:s3xYIry0yA0J:scholar.google.com/&scioq=Reinforcement+Learning+with+Euclidean+Data+Augmentation+for+State-Based+Continuous+Control&hl=en&as_sdt=0,44", "gs_version_total": 3, "email": "sc.edu;sc.edu;sc.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of South Carolina", "aff_unique_dep": "", "aff_unique_url": "https://www.sc.edu", "aff_unique_abbr": "USC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "UrbanKGent: A Unified Large Language Model Agent Framework for Urban Knowledge Graph Construction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95400", "id": "Nycj81Z692", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Nycj81Z692", "openreview": "https://openreview.net/forum?id=Nycj81Z692", "poster": "/media/PosterPDFs/NeurIPS%202024/95400.png?t=1729477272.7310243", "project": "", "author_site": "Yansong Ning, Hao Liu", "tldr": "", "abstract": "Urban knowledge graph has recently worked as an emerging building block to distill critical knowledge from multi-sourced urban data for diverse urban application scenarios. Despite its promising benefits, urban knowledge graph construction (UrbanKGC) still heavily relies on manual effort, hindering its potential advancement. This paper presents UrbanKGent, a unified large language model agent framework, for urban knowledge graph construction. Specifically, we first construct the knowledgeable instruction set for UrbanKGC tasks (such as relational triplet extraction and knowledge graph completion) via heterogeneity-aware and geospatial-infused instruction generation. Moreover, we propose a tool-augmented iterative trajectory refinement module to enhance and refine the trajectories distilled from GPT-4. Through hybrid instruction fine-tuning with augmented trajectories on Llama 2 and Llama 3 family, we obtain UrbanKGC agent family, consisting of UrbanKGent-7/8/13B version. We perform a comprehensive evaluation on two real-world datasets using both human and GPT-4 self-evaluation. The experimental results demonstrate that UrbanKGent family can not only significantly outperform 31 baselines in UrbanKGC tasks, but also surpass the state-of-the-art LLM, GPT-4, by more than 10% with approximately 20 times lower cost. Compared with the existing benchmark, the UrbanKGent family could help construct an UrbanKG with hundreds of times richer relationships using only one-fifth of the data. Our data and code are available at https://github.com/usail-hkust/UrbanKGent.", "keywords": "urban knowledge graph;knowledge graph construction;large language model agent", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/a03a4ca9becd624f10d84acffc98cf3e03612610.zip", "author": "Yansong Ning;Hao Liu", "authorids": "~Yansong_Ning3;~Hao_Liu17", "gender": "M;", "homepage": "https://yasning.github.io/homepage/;https://raymondhliu.github.io/", "dblp": "349/7861.html;09/3214-26", "google_scholar": "2_GJmgMAAAAJ;", "orcid": ";0000-0003-4271-1567", "linkedin": ";", "or_profile": "~Yansong_Ning3;~Hao_Liu17", "aff": "The Hong Kong University of Science and Technology (Guangzhou);The Hong Kong University of Science and Technology (Guangzhou)", "aff_domain": "hkust.edu;hkust-gz.edu.cn", "position": "MS student;Assistant Professor", "bibtex": "@inproceedings{\nning2024urbankgent,\ntitle={Urban{KG}ent: A Unified Large Language Model Agent Framework for Urban Knowledge Graph Construction},\nauthor={Yansong Ning and Hao Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Nycj81Z692}\n}", "github": "", "reviewers": "GYCX;e3YZ;rXu6;72X8", "pdf_size": 3597056, "rating": "5;6;7;7", "confidence": "3;4;5;4", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "3;2;4;2", "wc_summary": "65;57;97;140", "wc_strengths": "71;31;52;127", "wc_weaknesses": "297;59;21;181", "wc_questions": "81;2;36;1", "wc_limitations": "1;2;27;1", "wc_review": "515;151;233;450", "wc_reply_reviewers": "28;42;68;15", "wc_reply_authors": "580;303;0;0", "reply_reviewers": "1;2;1;1", "reply_authors": "3;2;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 89.75, 32.64486942844159 ], "wc_strengths_avg": [ 70.25, 35.688758734369 ], "wc_weaknesses_avg": [ 139.5, 108.45621236240919 ], "wc_questions_avg": [ 30.0, 32.64199748789893 ], "wc_limitations_avg": [ 7.75, 11.121488209767612 ], "wc_review_avg": [ 337.25, 149.88724929092535 ], "wc_reply_reviewers_avg": [ 38.25, 19.651653874419832 ], "wc_reply_authors_avg": [ 220.75, 241.49883540091867 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.8528028654224417, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7377515752811945873&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "hkust.edu;hkust-gz.edu.cn", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Hong Kong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.ust.hk", "aff_unique_abbr": "HKUST", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Guangzhou", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "How Diffusion Models Learn to Factorize and Compose", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95399", "id": "Nzfg1LXTdS", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Nzfg1LXTdS", "openreview": "https://openreview.net/forum?id=Nzfg1LXTdS", "poster": "/media/PosterPDFs/NeurIPS%202024/95399.png?t=1732663009.314082", "project": "", "author_site": "Qiyao Liang, Ziming Liu, Mitchell Ostrow, Ila Fiete", "tldr": "", "abstract": "Diffusion models are capable of generating photo-realistic images that combine elements which do not appear together in natural images, demonstrating their ability to compositionally generalize. Nonetheless, the precise mechanism of compositionality and how it is acquired through training remains elusive. Here, we consider a highly reduced setting to examine whether diffusion models learn semantically meaningful and fully factorized representations of composable features. We performed extensive controlled experiments on conditional DDPMs trained to generate various forms of 2D Gaussian data. We demonstrate that the models learn factorized, semi-continuous manifold representations that are orthogonal in underlying continuous latent features of independent variations but are not aligned for different values of the same feature. With such representations, models demonstrate superior compositionality but have limited ability to interpolate over unseen values of a given feature. Our experimental results further demonstrate that diffusion models can attain compositionality with a small amount of compositional examples, suggesting a novel way to train DDPMs. Finally, we connect manifold formation in diffusion models to percolation theory in physics, thereby offering insights into the sudden onset of factorized representation learning. Our thorough toy experiments thus contribute a deeper understanding of how diffusion models capture compositional structure in data, paving the way for future research aimed at enhancing factorization and compositional generalization in generative models for real-world applications.", "keywords": "Representation Learning;Compositional Generalization;Diffusion Models;Generative Models", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Qiyao Liang;Ziming Liu;Mitchell Ostrow;Ila R Fiete", "authorids": "~Qiyao_Liang1;~Ziming_Liu2;~Mitchell_Ostrow2;~Ila_R_Fiete1", "gender": "F;M;;F", "homepage": "https://qiyaoliang.com/;https://kindxiaoming.github.io/;https://mitchellostrow.github.io;https://fietelab.mit.edu/", "dblp": ";;;", "google_scholar": "qzSOnnwAAAAJ;0b32RKAAAAAJ;WRIrZ2cAAAAJ;uE-CihIAAAAJ", "orcid": ";;;0000-0003-4738-2539", "linkedin": ";;https://linkedin.com/in/mitchell-ostrow;", "or_profile": "~Qiyao_Liang1;~Ziming_Liu2;~Mitchell_Ostrow2;~Ila_R_Fiete1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;mit.edu;mit.edu", "position": "PhD student;PhD student;PhD student;Professor", "bibtex": "@inproceedings{\nliang2024how,\ntitle={How Diffusion Models Learn to Factorize and Compose},\nauthor={Qiyao Liang and Ziming Liu and Mitchell Ostrow and Ila R Fiete},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Nzfg1LXTdS}\n}", "github": "", "reviewers": "un3W;u7D1;RMNd;VwNJ", "pdf_size": 7064302, "rating": "5;5;6;8", "confidence": "4;3;3;3", "soundness": "2;2;3;4", "novelty": "2;2;2;3", "presentation": "2;2;3;4", "wc_summary": "42;116;102;116", "wc_strengths": "31;18;45;82", "wc_weaknesses": "1058;82;102;88", "wc_questions": "2;58;240;1", "wc_limitations": "20;6;42;2", "wc_review": "1153;280;531;289", "wc_reply_reviewers": "410;87;57;55", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 94.0, 30.56141357987225 ], "wc_strengths_avg": [ 44.0, 23.926972228010797 ], "wc_weaknesses_avg": [ 332.5, 418.9304834933834 ], "wc_questions_avg": [ 75.25, 97.87587802926726 ], "wc_limitations_avg": [ 17.5, 15.644487847162015 ], "wc_review_avg": [ 563.25, 355.06645504750236 ], "wc_reply_reviewers_avg": [ 152.25, 149.3508871751353 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.4714045207910316, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15189421196431133705&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "mit.edu;mit.edu;mit.edu;mit.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Mobile-Agent-v2: Mobile Device Operation Assistant with Effective Navigation via Multi-Agent Collaboration", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95398", "id": "O0nBMRlkc8", "proceeding": "", "pdf": "https://openreview.net/pdf?id=O0nBMRlkc8", "openreview": "https://openreview.net/forum?id=O0nBMRlkc8", "poster": "", "project": "", "author_site": "Junyang Wang, Haiyang Xu, Haitao Jia, Xi Zhang, Ming Yan, Weizhou Shen, Ji Zhang, Fei Huang, Jitao Sang", "tldr": "", "abstract": "Mobile device operation tasks are increasingly becoming a popular multi-modal AI application scenario. Current Multi-modal Large Language Models (MLLMs), constrained by their training data, lack the capability to function effectively as operation assistants. Instead, MLLM-based agents, which enhance capabilities through tool invocation, are gradually being applied to this scenario. However, the two major navigation challenges in mobile device operation tasks \u2014 task progress navigation and focus content navigation \u2014 are difficult to effectively solve under the single-agent architecture of existing work. This is due to the overly long token sequences and the interleaved text-image data format, which limit performance. To address these navigation challenges effectively, we propose Mobile-Agent-v2, a multi-agent architecture for mobile device operation assistance. The architecture comprises three agents: planning agent, decision agent, and reflection agent. The planning agent condenses lengthy, interleaved image-text history operations and screens summaries into a pure-text task progress, which is then passed on to the decision agent. This reduction in context length makes it easier for decision agent to navigate the task progress. To retain focus content, we design a memory unit that updates with task progress by decision agent. Additionally, to correct erroneous operations, the reflection agent observes the outcomes of each operation and handles any mistake accordingly. Experimental results indicate that Mobile-Agent-v2 achieves over a 30% improvement in task completion compared to the single-agent architecture of Mobile-Agent. The code is open-sourced at https://github.com/X-PLUG/MobileAgent.", "keywords": "multi-agent;multi-modal agent;multi-modal large language model;mobile operation;UI assistant", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/6263aec0af15290710128efdb33a6745a7567b98.zip", "author": "Junyang Wang;Haiyang Xu;Haitao Jia;Xi Zhang;Ming Yan;Weizhou Shen;Ji Zhang;Fei Huang;Jitao Sang", "authorids": "~Junyang_Wang1;~Haiyang_Xu1;~Haitao_Jia1;~Xi_Zhang11;~Ming_Yan2;~Weizhou_Shen1;~Ji_Zhang3;~Fei_Huang2;~Jitao_Sang1", "gender": "M;M;M;F;M;M;;M;", "homepage": "http://junyangwang0410.github.io/;;https://github.com/haitao1101;;;;;https://sites.google.com/view/fei-huang;", "dblp": ";;;;51/5332-4.html;245/3622;86/1953-11;h/FeiHuang.html;", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;qZYvce8AAAAJ;;https://scholar.google.com.hk/citations?user=TE1odswAAAAJ;uIUfGxYAAAAJ;387Sg1wAAAAJ;cgnuJDUAAAAJ;9r98PpoAAAAJ;", "orcid": ";;;;0000-0003-4959-8878;;;;", "linkedin": ";;;;;;;fei-huang-cas-cmu;", "or_profile": "~Junyang_Wang1;~Haiyang_Xu1;~Haitao_Jia1;~Xi_Zhang11;~Ming_Yan2;~Weizhou_Shen1;~Ji_Zhang3;~Fei_Huang2;~Jitao_Sang1", "aff": "Beijing Jiaotong University;Alibaba Group;Beijing Jiaotong University;Institute of Automation, Chinese Academy of Sciences;Alibaba Group;SUN YAT-SEN UNIVERSITY;Alibaba Group;Alibaba Group US;", "aff_domain": "bjtu.edu.cn;alibaba-inc.com;bjtu.edu.cn;ia.ac.cn;alibaba-inc.com;sysu.edu.cn;alibaba-inc.com;alibaba-inc.com;", "position": "PhD student;Researcher;MS student;PhD student;Instructor;PhD student;Senior Staff Engineer;Senior Research Director;", "bibtex": "@inproceedings{\nwang2024mobileagentv,\ntitle={Mobile-Agent-v2: Mobile Device Operation Assistant with Effective Navigation via Multi-Agent Collaboration},\nauthor={Junyang Wang and Haiyang Xu and Haitao Jia and Xi Zhang and Ming Yan and Weizhou Shen and Ji Zhang and Fei Huang and Jitao Sang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=O0nBMRlkc8}\n}", "github": "", "reviewers": "kbfL;cAKE;Xdcr;c2St;4qiL", "pdf_size": 30024250, "rating": "4;5;5;6;7", "confidence": "3;4;4;4;4", "soundness": "3;2;2;3;3", "novelty": "2;2;2;3;3", "presentation": "3;2;3;3;3", "wc_summary": "77;49;65;70;85", "wc_strengths": "42;27;43;88;56", "wc_weaknesses": "286;74;30;70;62", "wc_questions": "23;9;211;65;18", "wc_limitations": "44;83;1;2;7", "wc_review": "472;242;350;295;228", "wc_reply_reviewers": "138;191;102;0;81", "wc_reply_authors": "255;380;61;0;386", "reply_reviewers": "1;2;1;0;2", "reply_authors": "2;2;2;1;2", "rating_avg": [ 5.4, 1.0198039027185568 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 69.2, 12.139192724394816 ], "wc_strengths_avg": [ 51.2, 20.565991344936428 ], "wc_weaknesses_avg": [ 104.4, 92.10776297359523 ], "wc_questions_avg": [ 65.2, 75.39867372838863 ], "wc_limitations_avg": [ 27.4, 32.01624587611733 ], "wc_review_avg": [ 317.4, 88.47960216908753 ], "wc_reply_reviewers_avg": [ 102.4, 63.34224498705426 ], "wc_reply_authors_avg": [ 216.4, 159.9982499904296 ], "reply_reviewers_avg": [ 1.2, 0.7483314773547883 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.6864064729836443, "gs_citation": 52, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1164920476958909005&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "bjtu.edu.cn;alibaba-inc.com;bjtu.edu.cn;ia.ac.cn;alibaba-inc.com;sysu.edu.cn;alibaba-inc.com;alibaba-inc.com;", "author_num": 9, "aff_unique_index": "0;1;0;2;1;3;1;1", "aff_unique_norm": "Beijing Jiao Tong University;Alibaba Group;Chinese Academy of Sciences;Sun Yat-sen University", "aff_unique_dep": ";;Institute of Automation;", "aff_unique_url": "http://www.njtu.edu.cn/en;https://www.alibaba.com;http://www.ia.cas.cn;http://www.sysu.edu.cn", "aff_unique_abbr": "BJTU;Alibaba;CAS;SYSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;1", "aff_country_unique": "China;United States" }, { "title": "On scalable oversight with weak LLMs judging strong LLMs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95397", "id": "O1fp9nVraj", "proceeding": "", "pdf": "https://openreview.net/pdf?id=O1fp9nVraj", "openreview": "https://openreview.net/forum?id=O1fp9nVraj", "poster": "/media/PosterPDFs/NeurIPS%202024/95397.png?t=1730831341.1459036", "project": "", "author_site": "Zachary Kenton, Noah Siegel, Janos Kramar, Jonah Brown-Cohen, Samuel Albanie, Jannis Bulian, Rishabh Agarwal, David Lindner, Yunhao Tang, Noah Goodman, Rohin Shah", "tldr": "", "abstract": "Scalable oversight protocols aim to enable humans to accurately supervise superhuman AI. \nIn this paper we study debate, where two AI's compete to convince a judge; consultancy, \nwhere a single AI tries to convince a judge that asks questions;\nand compare to a baseline of direct question-answering, where the judge just answers outright without the AI.\nWe use large language models (LLMs) as both AI agents and as stand-ins for human judges, taking the judge models to be weaker than agent models. \nWe benchmark on a diverse range of asymmetries between judges and agents, extending previous work on a single extractive QA task with information asymmetry, to also include mathematics, coding, logic and multimodal reasoning asymmetries. \nWe find that debate outperforms consultancy across all tasks when the consultant is randomly assigned to argue for the correct/incorrect answer. Comparing debate to direct question answering, the results depend on the type of task: in extractive QA tasks with information asymmetry debate outperforms direct question answering, but in other tasks without information asymmetry the results are mixed.\nPrevious work assigned debaters/consultants an answer to argue for. When we allow them to instead choose which answer to argue for, we find judges are less frequently convinced by the wrong answer in debate than in consultancy.\nFurther, we find that stronger debater models increase judge accuracy, though more modestly than in previous studies.", "keywords": "alignment;safety;scalable oversight;debate;LLM", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/2f52c28d1178a6a0a3fe173b872329acd63b12ba.zip", "author": "Zachary Kenton;Noah Yamamoto Siegel;Janos Kramar;Jonah Brown-Cohen;Samuel Albanie;Jannis Bulian;Rishabh Agarwal;David Lindner;Yunhao Tang;Noah Goodman;Rohin Shah", "authorids": "~Zachary_Kenton2;~Noah_Yamamoto_Siegel1;~Janos_Kramar1;~Jonah_Brown-Cohen1;~Samuel_Albanie2;~Jannis_Bulian1;~Rishabh_Agarwal2;~David_Lindner1;~Yunhao_Tang1;~Noah_Goodman1;~Rohin_Shah1", "gender": "M;;M;M;;M;M;;M;;M", "homepage": "https://zackenton.github.io/;;;https://jonahbc.github.io/;;http://bulian.org/;https://agarwl.github.io;;https://robintyh1.github.io;https://cocolab.stanford.edu/;http://rohinshah.com/", "dblp": "209/9980;259/1484;49/9013;157/1513;;09/10967;;;210/2229;96/1216;145/1009", "google_scholar": "https://scholar.google.co.uk/citations?hl=en;l2E0LR4AAAAJ;;fRc3A80AAAAJ;;https://scholar.google.co.uk/citations?user=Yq32OuIAAAAJ;https://scholar.google.ca/citations?user=aH8AJu4AAAAJ;;;OUpIbcQAAAAJ;odFQXSYAAAAJ", "orcid": ";0000-0002-5746-117X;;;;0000-0002-2908-4485;;;;;", "linkedin": "zac-kenton-824429124/;noah-y-siegel-8751925b;;;;jbulian/;;;;;rohin-shah-76405832/", "or_profile": "~Zachary_Kenton2;~Noah_Yamamoto_Siegel1;~Janos_Kramar1;~Jonah_Brown-Cohen1;~Samuel_Albanie2;~Jannis_Bulian1;~Rishabh_Agarwal2;~David_Lindner1;~Yunhao_Tang1;~Noah_Goodman1;~Rohin_Shah1", "aff": "Google DeepMind;Google DeepMind;Google DeepMind;Google DeepMind;;Google DeepMind;Google DeepMind;;Google DeepMind;Stanford University;Google DeepMind", "aff_domain": "google.com;deepmind.com;deepmind.com;deepmind.com;;google.com;google.com;;deepmind.com;stanford.edu;deepmind.com", "position": "Researcher;Researcher;Researcher;Researcher;;Researcher;Research Scientist;;Research Scientist;Full Professor;Researcher", "bibtex": "@inproceedings{\nkenton2024on,\ntitle={On scalable oversight with weak {LLM}s judging strong {LLM}s},\nauthor={Zachary Kenton and Noah Yamamoto Siegel and Janos Kramar and Jonah Brown-Cohen and Samuel Albanie and Jannis Bulian and Rishabh Agarwal and David Lindner and Yunhao Tang and Noah Goodman and Rohin Shah},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=O1fp9nVraj}\n}", "github": "", "reviewers": "jsK8;P56G;MBPi;HiQA", "pdf_size": 4040245, "rating": "4;5;7;8", "confidence": "3;3;4;2", "soundness": "3;2;3;4", "novelty": "2;3;3;3", "presentation": "3;3;3;4", "wc_summary": "92;68;101;395", "wc_strengths": "45;39;110;272", "wc_weaknesses": "167;191;91;68", "wc_questions": "2;4;86;99", "wc_limitations": "2;9;4;69", "wc_review": "308;311;392;903", "wc_reply_reviewers": "161;65;0;90", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 1.5811388300841898 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 164.0, 133.91228472399385 ], "wc_strengths_avg": [ 116.5, 93.99601055363999 ], "wc_weaknesses_avg": [ 129.25, 51.119345653089105 ], "wc_questions_avg": [ 47.75, 44.990971316476376 ], "wc_limitations_avg": [ 21.0, 27.829840100151493 ], "wc_review_avg": [ 478.5, 247.39088503823257 ], "wc_reply_reviewers_avg": [ 79.0, 57.623779813545724 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": -0.223606797749979, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7536641292607050168&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "google.com;deepmind.com;deepmind.com;deepmind.com;;google.com;google.com;;deepmind.com;stanford.edu;deepmind.com", "author_num": 11, "aff_unique_index": "0;0;0;0;0;0;0;1;0", "aff_unique_norm": "Google;Stanford University", "aff_unique_dep": "Google DeepMind;", "aff_unique_url": "https://deepmind.com;https://www.stanford.edu", "aff_unique_abbr": "DeepMind;Stanford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;0;0;0;0;0;1;0", "aff_country_unique": "United Kingdom;United States" }, { "title": "Graphcode: Learning from multiparameter persistent homology using graph neural networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95396", "id": "O23XfTnhWR", "proceeding": "", "pdf": "https://openreview.net/pdf?id=O23XfTnhWR", "openreview": "https://openreview.net/forum?id=O23XfTnhWR", "poster": "/media/PosterPDFs/NeurIPS%202024/95396.png?t=1733759840.9757173", "project": "", "author_site": "Florian Russold, Michael Kerber", "tldr": "", "abstract": "We introduce graphcodes, a novel multi-scale summary of the topological properties of a dataset that is based on the well-established theory of persistent homology. Graphcodes handle datasets that are filtered along two real-valued scale parameters. Such multi-parameter topological summaries are usually based on complicated theoretical foundations and difficult to compute; in contrast, graphcodes yield an informative and interpretable summary and can be computed as efficient as one-parameter summaries. Moreover, a graphcode is simply an embedded graph and can therefore be readily integrated in machine learning pipelines using graph neural networks. We describe such a pipeline and demonstrate that graphcodes achieve better classification accuracy than state-of-the-art approaches on various datasets.", "keywords": "Topological Data Analysis;Multiparameter Persistent Homology;Machine Learning;Geometric Deep Learning;Graph Neural Networks", "primary_area": "graph_neural_networks", "supplementary_material": "/attachment/18613a7fc2106efc93414403a8c6d3133416fce6.zip", "author": "Florian Russold;Michael Kerber", "authorids": "~Florian_Russold1;~Michael_Kerber1", "gender": "M;M", "homepage": "http://www.geometrie.tugraz.at/russold/;http://www.geometrie.tugraz.at/kerber/", "dblp": "350/4759;76/4651", "google_scholar": "jh0TgN4AAAAJ;https://scholar.google.de/citations?user=Ue-ElbUAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Florian_Russold1;~Michael_Kerber1", "aff": "Technische Universit\u00e4t Graz;Geometry", "aff_domain": "tugraz.at;tugraz.at", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nrussold2024graphcode,\ntitle={Graphcode: Learning from multiparameter persistent homology using graph neural networks},\nauthor={Florian Russold and Michael Kerber},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=O23XfTnhWR}\n}", "github": "", "reviewers": "1X25;NxrC;j5Nq;k1Z5", "pdf_size": 952217, "rating": "4;5;6;7", "confidence": "4;3;4;5", "soundness": "2;2;3;3", "novelty": "2;1;3;3", "presentation": "2;2;4;3", "wc_summary": "123;66;148;108", "wc_strengths": "20;64;332;65", "wc_weaknesses": "28;15;609;79", "wc_questions": "103;150;143;98", "wc_limitations": "65;2;12;6", "wc_review": "339;297;1244;356", "wc_reply_reviewers": "10;136;324;23", "wc_reply_authors": "0;432;0;0", "reply_reviewers": "1;3;1;1", "reply_authors": "1;3;1;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 111.25, 29.77729839995563 ], "wc_strengths_avg": [ 120.25, 123.59687496049405 ], "wc_weaknesses_avg": [ 182.75, 247.2553083353318 ], "wc_questions_avg": [ 123.5, 23.200215516240362 ], "wc_limitations_avg": [ 21.25, 25.508576988926684 ], "wc_review_avg": [ 559.0, 396.0675447445802 ], "wc_reply_reviewers_avg": [ 123.25, 125.83595471883226 ], "wc_reply_authors_avg": [ 108.0, 187.06148721743875 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.6324555320336758, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11938738931184101118&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": "tugraz.at;tugraz.at", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Technische Universit\u00e4t Graz;Geometry", "aff_unique_dep": ";", "aff_unique_url": "https://www.tugraz.at;", "aff_unique_abbr": "TU Graz;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0", "aff_country_unique": "Austria;" }, { "title": "On the Comparison between Multi-modal and Single-modal Contrastive Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95395", "id": "O2UwxfhY1P", "proceeding": "", "pdf": "https://openreview.net/pdf?id=O2UwxfhY1P", "openreview": "https://openreview.net/forum?id=O2UwxfhY1P", "poster": "", "project": "", "author_site": "Wei Huang, Andi Han, Yongqiang Chen, Yuan Cao, Zhiqiang Xu, Taiji Suzuki", "tldr": "", "abstract": "Multi-modal contrastive learning with language supervision has presented a paradigm shift in modern machine learning. By pre-training on a web-scale dataset, multi-modal contrastive learning can learn high-quality representations that exhibit impressive robustness and transferability. Despite its empirical success, the theoretical understanding is still in its infancy, especially regarding its comparison with single-modal contrastive learning. In this work, we introduce a feature learning theory framework that provides a theoretical foundation for understanding the differences between multi-modal and single-modal contrastive learning. Based on a data generation model consisting of signal and noise, our analysis is performed on a ReLU network trained with the InfoMax objective function. Through a trajectory-based optimization analysis and generalization characterization on downstream tasks, we identify the critical factor, which is the signal-to-noise ratio (SNR), that impacts the generalizability in downstream tasks of both multi-modal and single-modal contrastive learning. Through the cooperation between the two modalities, multi-modal learning can achieve better feature learning, leading to improvements in performance in downstream tasks compared to single-modal learning. Our analysis provides a unified framework that can characterize the optimization and generalization of both single-modal and multi-modal contrastive learning. Empirical experiments on both synthetic and real-world datasets further consolidate our theoretical findings.", "keywords": "multi-modal contrastive learning;single-modal contrastive learning;optimization;learning theory", "primary_area": "other", "supplementary_material": "/attachment/5351bd5a1322b1fac330ef5e0f4bda82e6a6c674.zip", "author": "Wei Huang;Andi Han;Yongqiang Chen;Yuan Cao;zhiqiang xu;Taiji Suzuki", "authorids": "~Wei_Huang6;~Andi_Han1;~Yongqiang_Chen1;~Yuan_Cao1;~zhiqiang_xu1;~Taiji_Suzuki1", "gender": "M;M;;M;M;M", "homepage": "https://weihuang05.github.io/;https://github.com/andyjm3;https://lfhase.win;https://yuancaohku.github.io/;https://scholar.google.com/citations?user=0R20iBMAAAAJ&hl=en;http://ibis.t.u-tokyo.ac.jp/suzuki/", "dblp": "81/6685-34;268/7976.html;76/5774-2;;72/51-3.html;08/312", "google_scholar": "RZfDh4MAAAAJ;AKHQHs0AAAAJ;huQ_Ig8AAAAJ;-VGnHI4AAAAJ;;x8osrBsAAAAJ", "orcid": "0000-0001-5674-7021;0000-0003-4655-655X;;;0000-0002-5693-8933;", "linkedin": ";;;;;", "or_profile": "~Wei_Huang6;~Andi_Han1;~Yongqiang_Chen1;~Yuan_Cao1;~zhiqiang_xu1;~Taiji_Suzuki1", "aff": "RIKEN AIP;RIKEN AIP;Department of Computer Science and Engineering, The Chinese University of Hong Kong;University of Hong Kong;Mohamed bin Zayed University of Artificial Intelligence;The University of Tokyo", "aff_domain": "riken.jp;riken.jp;cse.cuhk.edu.hk;hku.hk;mbzuai.ac.ae;tokyo.ac.jp", "position": "Research Scientist;Postdoc;PhD student;Assistant Professor;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nhuang2024on,\ntitle={On the Comparison between Multi-modal and Single-modal Contrastive Learning},\nauthor={Wei Huang and Andi Han and Yongqiang Chen and Yuan Cao and zhiqiang xu and Taiji Suzuki},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=O2UwxfhY1P}\n}", "github": "", "reviewers": "nN5z;7UFF;bcfY", "pdf_size": 612420, "rating": "5;6;6", "confidence": "2;5;3", "soundness": "2;3;3", "novelty": "2;3;3", "presentation": "1;3;3", "wc_summary": "59;60;99", "wc_strengths": "37;132;58", "wc_weaknesses": "511;86;159", "wc_questions": "25;28;48", "wc_limitations": "3;1;2", "wc_review": "635;307;366", "wc_reply_reviewers": "610;20;66", "wc_reply_authors": "980;24;375", "reply_reviewers": "3;1;2", "reply_authors": "5;2;3", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 1.247219128924647 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.9428090415820634 ], "wc_summary_avg": [ 72.66666666666667, 18.624953392931992 ], "wc_strengths_avg": [ 75.66666666666667, 40.74582459862878 ], "wc_weaknesses_avg": [ 252.0, 185.54963397071594 ], "wc_questions_avg": [ 33.666666666666664, 10.208928554075703 ], "wc_limitations_avg": [ 2.0, 0.816496580927726 ], "wc_review_avg": [ 436.0, 142.76087232385024 ], "wc_reply_reviewers_avg": [ 232.0, 267.9452680430589 ], "wc_reply_authors_avg": [ 459.6666666666667, 394.85046395931823 ], "reply_reviewers_avg": [ 2.0, 0.816496580927726 ], "reply_authors_avg": [ 3.3333333333333335, 1.247219128924647 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.7559289460184542, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4404510755883570988&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "riken.jp;riken.jp;cse.cuhk.edu.hk;hku.hk;mbzuai.ac.ae;tokyo.ac.jp", "author_num": 6, "aff_unique_index": "0;0;1;2;3;4", "aff_unique_norm": "RIKEN;Chinese University of Hong Kong;University of Hong Kong;Mohamed bin Zayed University of Artificial Intelligence;University of Tokyo", "aff_unique_dep": "Advanced Institute for Computational Science;Department of Computer Science and Engineering;;;", "aff_unique_url": "https://www.aip.riken.jp;https://www.cuhk.edu.hk;https://www.hku.hk;https://mbzuai.ac.ae;https://www.u-tokyo.ac.jp", "aff_unique_abbr": "RIKEN AIP;CUHK;HKU;MBZUAI;UTokyo", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;1;1;2;0", "aff_country_unique": "Japan;China;United Arab Emirates" }, { "title": "Pseudo-Siamese Blind-spot Transformers for Self-Supervised Real-World Denoising", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95394", "id": "O3nPufVaee", "proceeding": "", "pdf": "https://openreview.net/pdf?id=O3nPufVaee", "openreview": "https://openreview.net/forum?id=O3nPufVaee", "poster": "/media/PosterPDFs/NeurIPS%202024/95394.png?t=1730310184.4268615", "project": "", "author_site": "Yuhui Quan, Tianxiang Zheng, Hui Ji", "tldr": "", "abstract": "Real-world image denoising remains a challenge task. This paper studies self-supervised image denoising, requiring only noisy images captured in a single shot. We revamping the blind-spot technique by leveraging the transformer\u2019s capability for long-range pixel interactions, which is crucial for effectively removing noise dependence in relating pixel\u2013a requirement for achieving great performance for the blind-spot technique. The proposed method integrates these elements with two key innovations: a directional self-attention (DSA) module using a half-plane grid for self-attention, creating a sophisticated blind-spot structure, and a Siamese architecture with mutual learning to mitigate the performance impacts\nfrom the restricted attention grid in DSA. Experiments on benchmark datasets demonstrate that our method outperforms existing self-supervised and clean-image-free methods. This combination of blind-spot and transformer techniques provides a natural synergy for tackling real-world image denoising challenges.", "keywords": "self-supervision;image denoising;low-level vision;transformer", "primary_area": "machine_vision", "supplementary_material": "", "author": "Yuhui Quan;Tianxiang Zheng;Hui Ji", "authorids": "~Yuhui_Quan5;~Tianxiang_Zheng2;~Hui_Ji1", "gender": ";;M", "homepage": ";;https://blog.nus.edu.sg/matjh/", "dblp": ";;", "google_scholar": ";;AsKY0XoAAAAJ", "orcid": ";;0000-0002-1674-6056", "linkedin": ";;", "or_profile": "~Yuhui_Quan5;~Tianxiang_Zheng2;~Hui_Ji1", "aff": ";;National University of Singapore", "aff_domain": ";;nus.edu.sg", "position": ";;Full Professor", "bibtex": "@inproceedings{\nquan2024pseudosiamese,\ntitle={Pseudo-Siamese Blind-spot Transformers for Self-Supervised Real-World Denoising},\nauthor={Yuhui Quan and Tianxiang Zheng and Hui Ji},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=O3nPufVaee}\n}", "github": "", "reviewers": "Yeyk;5hgz;JdHm;UEMY", "pdf_size": 40688771, "rating": "5;5;6;7", "confidence": "4;5;5;5", "soundness": "3;2;3;3", "novelty": "3;2;4;4", "presentation": "3;2;4;3", "wc_summary": "35;44;49;72", "wc_strengths": "6;50;112;81", "wc_weaknesses": "155;134;85;92", "wc_questions": "98;83;89;41", "wc_limitations": "6;8;1;1", "wc_review": "300;319;336;287", "wc_reply_reviewers": "31;15;104;16", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 50.0, 13.656500283747663 ], "wc_strengths_avg": [ 62.25, 39.18146883413127 ], "wc_weaknesses_avg": [ 116.5, 29.073183520213263 ], "wc_questions_avg": [ 77.75, 21.878928218722233 ], "wc_limitations_avg": [ 4.0, 3.082207001484488 ], "wc_review_avg": [ 310.5, 18.607794065928395 ], "wc_reply_reviewers_avg": [ 41.5, 36.63673020344474 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:m_H2fWASmFsJ:scholar.google.com/&scioq=Pseudo-Siamese+Blind-spot+Transformers+for+Self-Supervised+Real-World+Denoising&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": ";;nus.edu.sg", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "National University of Singapore", "aff_unique_dep": "", "aff_unique_url": "https://www.nus.edu.sg", "aff_unique_abbr": "NUS", "aff_country_unique_index": "0", "aff_country_unique": "Singapore" }, { "title": "How to Continually Adapt Text-to-Image Diffusion Models for Flexible Customization?", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95393", "id": "O4RCFjVUBJ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=O4RCFjVUBJ", "openreview": "https://openreview.net/forum?id=O4RCFjVUBJ", "poster": "/media/PosterPDFs/NeurIPS%202024/95393.png?t=1731738136.5531528", "project": "", "author_site": "Jiahua Dong, Wenqi Liang, Hongliu Li, Duzhen Zhang, Meng Cao, Henghui Ding, Salman Khan, Fahad Shahbaz Khan", "tldr": "", "abstract": "Custom diffusion models (CDMs) have attracted widespread attention due to their astonishing generative ability for personalized concepts. However, most existing CDMs unreasonably assume that personalized concepts are fixed and cannot change over time. Moreover, they heavily suffer from catastrophic forgetting and concept neglect on old personalized concepts when continually learning a series of new concepts. To address these challenges, we propose a novel Concept-Incremental text-to-image Diffusion Model (CIDM), which can resolve catastrophic forgetting and concept neglect to learn new customization tasks in a concept-incremental manner. Specifically, to surmount the catastrophic forgetting of old concepts, we develop a concept consolidation loss and an elastic weight aggregation module. They can explore task-specific and task-shared knowledge during training, and aggregate all low-rank weights of old concepts based on their contributions during inference. Moreover, in order to address concept neglect, we devise a context-controllable synthesis strategy that leverages expressive region features and noise estimation to control the contexts of generated images according to user conditions. Experiments validate that our CIDM surpasses existing custom diffusion models. The source codes are available at https://github.com/JiahuaDong/CIFC.", "keywords": "Text-to-Image Diffusion;Continual Learning;Concept Customization", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/dcb699e2832f246c4ff71f676e6d97ffca349fe8.zip", "author": "Jiahua Dong;Wenqi Liang;Hongliu Li;Duzhen Zhang;Meng Cao;Henghui Ding;Salman Khan;Fahad Khan", "authorids": "~Jiahua_Dong1;~Wenqi_Liang3;~Hongliu_Li2;~Duzhen_Zhang1;~Meng_Cao1;~Henghui_Ding2;~Salman_Khan4;~Fahad_Khan1", "gender": ";M;;M;M;;M;M", "homepage": ";https://github.com/lxw4878;;https://bladedancer957.github.io/;https://mengcaopku.github.io/;;https://salman-h-khan.github.io/;https://sites.google.com/view/fahadkhans/home", "dblp": ";;;235/0398.html;67/833;;32/11535-1;05/8618", "google_scholar": ";https://scholar.google.com.hk/citations?user=KryGUoQAAAAJ;;o0jlAfwAAAAJ;ZRbRQ0cAAAAJ;;https://scholar.google.es/citations?user=M59O9lkAAAAJ;zvaeYnUAAAAJ", "orcid": ";;;0000-0002-4280-431X;0000-0002-8946-4228;;0000-0002-9502-1749;", "linkedin": ";;;;;;;", "or_profile": "~Jiahua_Dong1;~Wenqi_Liang3;~Hongliu_Li2;~Duzhen_Zhang1;~Meng_Cao1;~Henghui_Ding2;~Salman_Khan4;~Fahad_Khan1", "aff": ";University of Chinese Academy of Sciences;;Institute of Automation, Chinese Academy of Sciences;International Digital Economy Academy (IDEA);;Australian National University;Link\u00f6ping University", "aff_domain": ";ucas.ac.cn;;ia.ac.cn;idea.edu.cn;;anu.edu.au;liu.se", "position": ";MS student;;PhD student;Researcher;;Lecturer;Associate Professor", "bibtex": "@inproceedings{\ndong2024how,\ntitle={How to Continually Adapt Text-to-Image Diffusion Models for Flexible Customization?},\nauthor={Jiahua Dong and Wenqi Liang and Hongliu Li and Duzhen Zhang and Meng Cao and Henghui Ding and Salman Khan and Fahad Khan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=O4RCFjVUBJ}\n}", "github": "", "reviewers": "SpKd;YpEo;5EBF;Th4Q", "pdf_size": 32601499, "rating": "5;7;7;7", "confidence": "4;4;4;5", "soundness": "3;4;3;3", "novelty": "3;3;3;3", "presentation": "2;3;3;3", "wc_summary": "81;99;126;43", "wc_strengths": "61;65;124;48", "wc_weaknesses": "44;223;127;168", "wc_questions": "2;21;61;39", "wc_limitations": "1;4;7;1", "wc_review": "189;412;445;299", "wc_reply_reviewers": "12;85;14;18", "wc_reply_authors": "36;577;30;37", "reply_reviewers": "1;2;1;1", "reply_authors": "2;3;2;2", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 87.25, 30.152736194249435 ], "wc_strengths_avg": [ 74.5, 29.261749776799064 ], "wc_weaknesses_avg": [ 140.5, 65.30122510336234 ], "wc_questions_avg": [ 30.75, 21.821720830401986 ], "wc_limitations_avg": [ 3.25, 2.48746859276655 ], "wc_review_avg": [ 336.25, 100.79031451483819 ], "wc_reply_reviewers_avg": [ 32.25, 30.531745773866255 ], "wc_reply_authors_avg": [ 170.0, 234.9968084889665 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9089518441220705272&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 3, "email": ";ucas.ac.cn;;ia.ac.cn;idea.edu.cn;;anu.edu.au;liu.se", "author_num": 8, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "University of Chinese Academy of Sciences;Chinese Academy of Sciences;International Digital Economy Academy;Australian National University;Link\u00f6ping University", "aff_unique_dep": ";Institute of Automation;;;", "aff_unique_url": "http://www.ucas.ac.cn;http://www.ia.cas.cn;;https://www.anu.edu.au;https://www.liu.se", "aff_unique_abbr": "UCAS;CAS;IDEA;ANU;LiU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;2;3", "aff_country_unique": "China;;Australia;Sweden" }, { "title": "Hyper-SD: Trajectory Segmented Consistency Model for Efficient Image Synthesis", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95392", "id": "O5XbOoi0x3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=O5XbOoi0x3", "openreview": "https://openreview.net/forum?id=O5XbOoi0x3", "poster": "/media/PosterPDFs/NeurIPS%202024/95392.png?t=1731569355.0108004", "project": "", "author_site": "Yuxi Ren, Xin Xia, Yanzuo Lu, Jiacheng Zhang, Jie Wu, Pan Xie, XING WANG, Xuefeng Xiao", "tldr": "", "abstract": "Recently, a series of diffusion-aware distillation algorithms have emerged to alleviate the computational overhead associated with the multi-step inference process of Diffusion Models (DMs). Current distillation techniques often dichotomize into two distinct aspects: i) ODE Trajectory Preservation; and ii) ODE Trajectory Reformulation. However, these approaches suffer from severe performance degradation or domain shifts. To address these limitations, we propose Hyper-SD, a novel framework that synergistically amalgamates the advantages of ODE Trajectory Preservation and Reformulation, while maintaining near-lossless performance during step compression. Firstly, we introduce Trajectory Segmented Consistency Distillation to progressively perform consistent distillation within pre-defined time-step segments, which facilitates the preservation of the original ODE trajectory from a higher-order perspective. Secondly, we incorporate human feedback learning to boost the performance of the model in a low-step regime and mitigate the performance loss incurred by the distillation process. Thirdly, we integrate score distillation to further improve the low-step generation capability of the model and offer the first attempt to leverage a unified LoRA to support the inference process at all steps. Extensive experiments and user studies demonstrate that Hyper-SD achieves SOTA performance from 1 to 8 inference steps for both SDXL and SD1.5. For example, Hyper-SDXL surpasses SDXL-Lightning by +0.68 in CLIP Score and +0.51 in Aes Score in the 1-step inference.", "keywords": "Diffusion Model;Consistency Distillation;Human-Feedback Learning", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/b3008484d33ee754b9dce5e5ba1ed11ae501d664.zip", "author": "Yuxi Ren;Xin Xia;Yanzuo Lu;Jiacheng Zhang;Jie Wu;Pan Xie;XING WANG;Xuefeng Xiao", "authorids": "~Yuxi_Ren1;~Xin_Xia1;~Yanzuo_Lu1;~Jiacheng_Zhang4;~Jie_Wu8;~Pan_Xie1;~XING_WANG3;~Xuefeng_Xiao1", "gender": ";M;;M;M;M;M;M", "homepage": ";;;https://github.com/Zhangjiacheng144;https://github.com/WuJie1010;https://panxiaoxie.cn;;", "dblp": ";06/2072-5;;;;78/6247;;245/9547", "google_scholar": ";https://scholar.google.com.sg/citations?hl=zh-CN;;;MxvLqLcAAAAJ;Z-0EqtgAAAAJ;cswtxw4AAAAJ;", "orcid": ";;;;;;;", "linkedin": ";;;;;;;", "or_profile": "~Yuxi_Ren1;~Xin_Xia1;~Yanzuo_Lu1;~Jiacheng_Zhang4;~Jie_Wu8;~Pan_Xie1;~XING_WANG3;~Xuefeng_Xiao1", "aff": ";Bytedance;;;ByteDance Inc.;ByteDance Inc.;ByteDance;ByteDance", "aff_domain": ";bytedance.com;;;bytedance.com;bytedance.com;bytedance.com;bytedance.com", "position": ";Researcher;;;Researcher;Researcher;Engineer;Researcher", "bibtex": "@inproceedings{\nren2024hypersd,\ntitle={Hyper-{SD}: Trajectory Segmented Consistency Model for Efficient Image Synthesis},\nauthor={Yuxi Ren and Xin Xia and Yanzuo Lu and Jiacheng Zhang and Jie Wu and Pan Xie and XING WANG and Xuefeng Xiao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=O5XbOoi0x3}\n}", "github": "", "reviewers": "5gi9;XyiH;JxBk;nBsQ", "pdf_size": 14407088, "rating": "3;5;6;7", "confidence": "3;4;5;5", "soundness": "2;3;3;3", "novelty": "2;2;2;3", "presentation": "2;3;3;3", "wc_summary": "139;52;118;83", "wc_strengths": "41;48;75;66", "wc_weaknesses": "69;335;118;216", "wc_questions": "3;7;38;3", "wc_limitations": "3;36;1;4", "wc_review": "255;478;350;372", "wc_reply_reviewers": "0;90;42;70", "wc_reply_authors": "0;0;204;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;2;1", "rating_avg": [ 5.25, 1.479019945774904 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 98.0, 33.2490601370926 ], "wc_strengths_avg": [ 57.5, 13.6106575888162 ], "wc_weaknesses_avg": [ 184.5, 101.74109297624044 ], "wc_questions_avg": [ 12.75, 14.669270602180601 ], "wc_limitations_avg": [ 11.0, 14.474114826130128 ], "wc_review_avg": [ 363.75, 79.27286736330407 ], "wc_reply_reviewers_avg": [ 50.5, 33.77499074759311 ], "wc_reply_authors_avg": [ 51.0, 88.33459118601274 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.9683296637314885, "gs_citation": 42, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6294585860258840359&as_sdt=5,24&sciodt=0,24&hl=en", "gs_version_total": 4, "email": ";bytedance.com;;;bytedance.com;bytedance.com;bytedance.com;bytedance.com", "author_num": 8, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "ByteDance", "aff_unique_dep": "", "aff_unique_url": "https://www.bytedance.com", "aff_unique_abbr": "Bytedance", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "O6YRAOfHGt", "title": "Enhancing the Hierarchical Environment Design via Generative Trajectory Modeling", "track": "main", "status": "Reject", "tldr": "", "abstract": "Unsupervised Environment Design (UED) is a paradigm that automatically generates a curriculum of training environments, enabling agents trained in these environments to develop general capabilities, i.e., achieving good zero-shot transfer performance. However, existing UED approaches focus primarily on the random generation of environments for open-ended agent training. This is impractical in resource-limited scenarios where there is a constraint on the number of environments that can be generated. In this paper, we introduce a hierarchical MDP framework for environment design under resource constraints. It consists of an upper-level RL teacher agent that generates suitable training environments for a lower-level student agent. The RL teacher can leverage previously discovered environment structures and generate environments at the frontier of the student's capabilities by observing the student policy's representation. Additionally, to alleviate the time-consuming process of collecting the experience of the upper-level teacher, we utilize recent advances in generative modeling to synthesize a trajectory dataset for training the teacher agent. Our method significantly reduces the resource-intensive interactions between agents and environments, and empirical experiments across various domains demonstrate the effectiveness of our approach.", "keywords": "hierarchical MDP;environment design;general capability;generative model", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/9d2cbe1f8a79db1e015e8b64cec10ce14f813b00.zip", "author": "Dexun Li;Pradeep Varakantham", "authorids": "~Dexun_Li1;~Pradeep_Varakantham1", "gender": "M;M", "homepage": ";http://www.mysmu.edu.sg/faculty/pradeepv", "dblp": "https://dblp.uni-trier.de/pid/130/1878.html;72/759", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.sg/citations?user=BAdQpFkAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Dexun_Li1;~Pradeep_Varakantham1", "aff": "Singapore Management University;Singapore Management University", "aff_domain": "smu.edu.sg;smu.edu.sg", "position": "PhD student;Full Professor", "bibtex": "@misc{\nanonymous2024enhancing,\ntitle={Enhancing the Hierarchical Environment Design via Generative Trajectory Modeling},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=O6YRAOfHGt}\n}", "github": "", "project": "", "reviewers": "EFDP;AEHi;kFnL;8F47", "site": "https://openreview.net/forum?id=O6YRAOfHGt", "pdf_size": 5866113, "rating": "4;4;5;6", "confidence": "3;3;3;4", "soundness": "2;2;2;3", "novelty": "2;2;2;3", "presentation": "2;2;2;2", "wc_summary": "72;55;94;150", "wc_strengths": "76;29;48;69", "wc_weaknesses": "252;221;72;406", "wc_questions": "40;40;45;239", "wc_limitations": "4;9;20;43", "wc_review": "444;354;279;907", "wc_reply_reviewers": "45;233;15;445", "wc_reply_authors": "298;530;0;1002", "reply_reviewers": "1;2;1;2", "reply_authors": "2;3;1;3", "rating_avg": [ 4.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 92.75, 35.8285849567074 ], "wc_strengths_avg": [ 55.5, 18.445866745696716 ], "wc_weaknesses_avg": [ 237.75, 118.6009591023614 ], "wc_questions_avg": [ 91.0, 85.47221770844605 ], "wc_limitations_avg": [ 19.0, 15.016657417681207 ], "wc_review_avg": [ 496.0, 244.37573529301145 ], "wc_reply_reviewers_avg": [ 184.5, 172.0486849702723 ], "wc_reply_authors_avg": [ 457.5, 366.2249991466994 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11451444390052654217&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Singapore Management University", "aff_unique_dep": "", "aff_unique_url": "https://www.smu.edu.sg", "aff_unique_abbr": "SMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Singapore" }, { "title": "Achieving Near-Optimal Convergence for Distributed Minimax Optimization with Adaptive Stepsizes", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95391", "id": "O7IN4nsaIO", "proceeding": "", "pdf": "https://openreview.net/pdf?id=O7IN4nsaIO", "openreview": "https://openreview.net/forum?id=O7IN4nsaIO", "poster": "/media/PosterPDFs/NeurIPS%202024/95391.png?t=1731296758.0936031", "project": "", "author_site": "Yan Huang, Xiang Li, Yipeng Shen, Niao He, Jinming Xu", "tldr": "", "abstract": "In this paper, we show that applying adaptive methods directly to distributed minimax problems can result in non-convergence due to inconsistency in locally computed adaptive stepsizes. To address this challenge, we propose D-AdaST, a Distributed Adaptive minimax method with Stepsize Tracking. The key strategy is to employ an adaptive stepsize tracking protocol involving the transmission of two extra (scalar) variables. This protocol ensures the consistency among stepsizes of nodes, eliminating the steady-state error due to the lack of coordination of stepsizes among nodes that commonly exists in vanilla distributed adaptive methods, and thus guarantees exact convergence. For nonconvex-strongly-concave distributed minimax problems, we characterize the specific transient times that ensure time-scale separation of stepsizes and quasi-independence of networks, leading to a near-optimal convergence rate of $\\tilde{\\mathcal{O}} \\left( \\epsilon ^{-\\left( 4+\\delta \\right)} \\right)$ for any small $\\delta > 0$, matching that of the centralized counterpart. To our best knowledge, D-AdaST is the *first* distributed adaptive method achieving near-optimal convergence without knowing any problem-dependent parameters for nonconvex minimax problems. Extensive experiments are conducted to validate our theoretical results.", "keywords": "Minimax Optimization;Distributed Learning;Nonconvex Optimization;Convergence Analysis;Stepsize Inconsistency", "primary_area": "optimization", "supplementary_material": "/attachment/5fb36ba28e1deeb1202cd4b3c2423771ebb2ec3c.zip", "author": "Yan Huang;Xiang Li;Yipeng Shen;Niao He;Jinming Xu", "authorids": "~Yan_Huang13;~Xiang_Li38;~Yipeng_Shen1;~Niao_He3;~Jinming_Xu1", "gender": ";M;M;;M", "homepage": ";https://shawnli.me;;;https://jinmingxu.github.io/", "dblp": ";40/1491;;;143/5957", "google_scholar": ";VFwF8tEAAAAJ;;;MuflLyIAAAAJ", "orcid": ";;0009-0006-7504-4456;;", "linkedin": ";;;;", "or_profile": "~Yan_Huang13;~Xiang_Li38;~Yipeng_Shen1;~Niao_He3;~Jinming_Xu1", "aff": ";ETHZ - ETH Zurich;Zhejiang University;;Zhejiang University", "aff_domain": ";ethz.ch;zju.edu.cn;;zju.edu.cn", "position": ";PhD student;MS student;;Assistant Professor", "bibtex": "@inproceedings{\nhuang2024achieving,\ntitle={Achieving Near-Optimal Convergence for Distributed Minimax Optimization with Adaptive Stepsizes},\nauthor={Yan Huang and Xiang Li and Yipeng Shen and Niao He and Jinming Xu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=O7IN4nsaIO}\n}", "github": "", "reviewers": "KXvx;Nhkx;vU1y;rQb7", "pdf_size": 1746680, "rating": "6;6;6;6", "confidence": "3;3;2;2", "soundness": "3;3;3;3", "novelty": "3;3;3;2", "presentation": "3;3;3;3", "wc_summary": "54;28;65;198", "wc_strengths": "30;2;80;99", "wc_weaknesses": "7;2;43;140", "wc_questions": "85;218;3;94", "wc_limitations": "14;1;18;12", "wc_review": "190;251;209;543", "wc_reply_reviewers": "17;229;15;14", "wc_reply_authors": "21;695;21;21", "reply_reviewers": "1;2;1;1", "reply_authors": "2;3;2;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 2.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 86.25, 65.902864125924 ], "wc_strengths_avg": [ 52.75, 38.64825351810868 ], "wc_weaknesses_avg": [ 48.0, 55.421115109676386 ], "wc_questions_avg": [ 100.0, 76.8016926896797 ], "wc_limitations_avg": [ 11.25, 6.299801584177076 ], "wc_review_avg": [ 298.25, 143.01988498107528 ], "wc_reply_reviewers_avg": [ 68.75, 92.52668533996017 ], "wc_reply_authors_avg": [ 189.5, 291.8505610753558 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:oeUJ0U-02YMJ:scholar.google.com/&scioq=Achieving+Near-Optimal+Convergence+for+Distributed+Minimax+Optimization+with+Adaptive+Stepsizes&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": ";ethz.ch;zju.edu.cn;;zju.edu.cn", "author_num": 5, "aff_unique_index": "0;1;1", "aff_unique_norm": "ETH Zurich;Zhejiang University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ethz.ch;https://www.zju.edu.cn", "aff_unique_abbr": "ETHZ;ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Switzerland;China" }, { "title": "Shadowheart SGD: Distributed Asynchronous SGD with Optimal Time Complexity Under Arbitrary Computation and Communication Heterogeneity", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95390", "id": "O8yHsRLwPl", "proceeding": "", "pdf": "https://openreview.net/pdf?id=O8yHsRLwPl", "openreview": "https://openreview.net/forum?id=O8yHsRLwPl", "poster": "/media/PosterPDFs/NeurIPS%202024/95390.png?t=1730556563.8793452", "project": "", "author_site": "Alexander Tyurin, Marta Pozzi, Ivan Ilin, Peter Richtarik", "tldr": "", "abstract": "We consider nonconvex stochastic optimization problems in the asynchronous centralized distributed setup where the communication times from workers to a server can not be ignored, and the computation and communication times are potentially different for all workers. Using an unbiassed compression technique, we develop a new method\u2014Shadowheart SGD\u2014that provably improves the time complexities of all previous centralized methods. Moreover, we show that the time complexity of Shadowheart SGD is optimal in the family of centralized methods with compressed communication. We also consider the bidirectional setup, where broadcasting from the server to the workers is non-negligible, and develop a corresponding method.", "keywords": "nonconvex optimization;parallel methods;asynchronous methods;lower bounds;compressed communication", "primary_area": "optimization", "supplementary_material": "/attachment/e40844a7aec443a62ec0c2c24195af52ec55d2d9.zip", "author": "Alexander Tyurin;Marta Pozzi;Ivan Ilin;Peter Richt\u00e1rik", "authorids": "~Alexander_Tyurin1;~Marta_Pozzi1;~Ivan_Ilin1;~Peter_Richt\u00e1rik1", "gender": "M;F;M;M", "homepage": "https://k3nfalt.github.io/;;https://ivan-ilin.netlify.app;https://richtarik.org", "dblp": "203/8919;;;62/8001", "google_scholar": ";;;https://scholar.google.com/citations?hl=en", "orcid": ";;;0000-0003-4380-5848", "linkedin": ";marta-pozzi-b061a5289/;;richtarik/", "or_profile": "~Alexander_Tyurin1;~Marta_Pozzi1;~Ivan_Ilin1;~Peter_Richtarik1", "aff": "KAUST;University of Pavia;King Abdullah University of Science and Technology;King Abdullah University of Science and Technology (KAUST)", "aff_domain": "kaust.edu.sa;unipv.it;kaust.edu.sa;kaust.edu.sa", "position": "Postdoc;MS student;MS student;Full Professor", "bibtex": "@inproceedings{\ntyurin2024shadowheart,\ntitle={Shadowheart {SGD}: Distributed Asynchronous {SGD} with Optimal Time Complexity Under Arbitrary Computation and Communication Heterogeneity},\nauthor={Alexander Tyurin and Marta Pozzi and Ivan Ilin and Peter Richt{\\'a}rik},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=O8yHsRLwPl}\n}", "github": "", "reviewers": "U2d9;Ptpi;VesE;zjmf", "pdf_size": 1891061, "rating": "5;6;7;7", "confidence": "4;4;3;2", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "2;3;4;3", "wc_summary": "69;123;37;39", "wc_strengths": "88;160;39;65", "wc_weaknesses": "112;298;100;91", "wc_questions": "11;82;6;58", "wc_limitations": "8;11;12;1", "wc_review": "288;674;194;254", "wc_reply_reviewers": "0;19;13;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 67.0, 34.72751070837067 ], "wc_strengths_avg": [ 88.0, 45.03887209955418 ], "wc_weaknesses_avg": [ 150.25, 85.6281933711088 ], "wc_questions_avg": [ 39.25, 31.948200262299597 ], "wc_limitations_avg": [ 8.0, 4.301162633521313 ], "wc_review_avg": [ 352.5, 188.64450694361605 ], "wc_reply_reviewers_avg": [ 8.0, 8.276472678623424 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8181818181818182, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15435696907502294044&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "kaust.edu.sa;unipv.it;kaust.edu.sa;kaust.edu.sa", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "King Abdullah University of Science and Technology;University of Pavia", "aff_unique_dep": ";", "aff_unique_url": "https://www.kaust.edu.sa;https://www.unipv.eu", "aff_unique_abbr": "KAUST;UNIPV", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "Saudi Arabia;Italy" }, { "title": "GDeR: Safeguarding Efficiency, Balancing, and Robustness via Prototypical Graph Pruning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95389", "id": "O97BzlN9Wh", "proceeding": "", "pdf": "https://openreview.net/pdf?id=O97BzlN9Wh", "openreview": "https://openreview.net/forum?id=O97BzlN9Wh", "poster": "", "project": "", "author_site": "Guibin Zhang, Haonan Dong, yuchen zhang, Zhixun Li, Dingshuo Chen, Kai Wang, Tianlong Chen, Yuxuan Liang, Dawei Cheng, Kun Wang", "tldr": "", "abstract": "Training high-quality deep models necessitates vast amounts of data, resulting in overwhelming computational and memory demands. Recently, data pruning, distillation, and coreset selection have been developed to streamline data volume by \\textit{retaining}, \\textit{synthesizing}, or \\textit{selecting} a small yet informative subset from the full set. Among these methods, data pruning incurs the least additional training cost and offers the most practical acceleration benefits. However, it is the most vulnerable, often suffering significant performance degradation with imbalanced or biased data schema, thus raising concerns about its accuracy and reliability in on-device deployment. Therefore, there is a looming need for a new data pruning paradigm that maintains the efficiency of previous practices while ensuring balance and robustness.\nUnlike the fields of computer vision and natural language processing, where mature solutions have been developed to address these issues, graph neural networks (GNNs) continue to struggle with increasingly large-scale, imbalanced, and noisy datasets, lacking a unified dataset pruning solution. \nTo achieve this, we introduce a novel dynamic soft-pruning method, \\ourmethod, designed to update the training ``basket'' during the process using trainable prototypes. \\ourmethod first constructs a well-modeled graph embedding hypersphere and then samples \\textit{representative, balanced, and unbiased subsets} from this embedding space, which achieves the goal we called {\\fontfamily{lmtt}\\selectfont \\textbf{Graph Training Debugging}}.\nExtensive experiments on four datasets across three GNN backbones, demonstrate that \\ourmethod (I) achieves or surpasses the performance of the full dataset with $30\\%\\sim50\\%$ fewer training samples, (II) attains up to a $2.81\\times$ lossless training speedup, and (III) outperforms state-of-the-art pruning methods in imbalanced training and noisy training scenarios by $0.3\\%\\sim4.3\\%$ and $3.6\\%\\sim7.8\\%$, respectively.", "keywords": "data pruning;graph pruning;graph neural networks", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Guibin Zhang;Haonan Dong;Yuchen Zhang;Zhixun Li;Dingshuo Chen;Kai Wang;Tianlong Chen;Yuxuan Liang;Dawei Cheng;Kun Wang", "authorids": "~Guibin_Zhang1;~Haonan_Dong1;~Yuchen_Zhang8;~Zhixun_Li1;~Dingshuo_Chen1;~Kai_Wang8;~Tianlong_Chen1;~Yuxuan_Liang1;~Dawei_Cheng1;~Kun_Wang15", "gender": ";M;M;M;M;M;M;M;M;M", "homepage": ";https://github.com/ins1stenc3;https://yuczhang.com/;;;https://kaiwang960112.github.io/;https://tianlong-chen.github.io;https://yuxuanliang.com;http://cs1.tongji.edu.cn/~dawei/;http://home.ustc.edu.cn/~wk520529/#home", "dblp": ";315/0299.html;;;289/7535;78/2022-36;;183/0977;135/6864;", "google_scholar": ";;Y2oqeP0AAAAJ;;jvrhEfIAAAAJ;i2II0XIAAAAJ;LE3ctn0AAAAJ;n9cODgcAAAAJ;4UD20ukAAAAJ;UnyqjWQAAAAJ", "orcid": ";;;0000-0001-6750-9002;;0000-0002-1154-5175;0000-0001-7774-8197;0000-0003-2817-7337;0000-0002-5877-7387;0000-0003-0602-169X", "linkedin": ";;;;;;tianlong-chen-783862167/;yoshall/;;", "or_profile": "~Guibin_Zhang1;~Haonan_Dong1;~Yuchen_Zhang8;~Zhixun_Li1;~Dingshuo_Chen1;~Kai_Wang8;~Tianlong_Chen1;~Yuxuan_Liang1;~Dawei_Cheng1;~Kun_Wang15", "aff": ";Tongji University;University of Electronic Science and Technology of China;The Chinese University of Hong Kong;Institute of automation, Chinese Academy of Sciences;National University of Singapore;Harvard University;The Hong Kong University of Science and Technology (Guangzhou);Tongji University;University of Science and Technology of China", "aff_domain": ";tongji.edu.cn;uestc.edu.cn;se.cuhk.edu.hk;ia.ac.cn;u.nus.edu;harvard.edu;hkust-gz.edu.cn;tongji.edu.cn;ustc.edu.cn", "position": ";Undergrad student;Undergrad student;PhD student;PhD student;PhD student;Postdoc;Assistant Professor;Associate Professor;PhD student", "bibtex": "@inproceedings{\nzhang2024gder,\ntitle={{GD}eR: Safeguarding Efficiency, Balancing, and Robustness via Prototypical Graph Pruning},\nauthor={Guibin Zhang and Haonan Dong and Yuchen Zhang and Zhixun Li and Dingshuo Chen and Kai Wang and Tianlong Chen and Yuxuan Liang and Dawei Cheng and Kun Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=O97BzlN9Wh}\n}", "github": "", "reviewers": "kNzQ;RKXd;zhEK", "pdf_size": 2437219, "rating": "6;7;8", "confidence": "3;4;4", "soundness": "2;3;3", "novelty": "3;3;3", "presentation": "3;3;4", "wc_summary": "117;63;71", "wc_strengths": "86;81;113", "wc_weaknesses": "136;188;208", "wc_questions": "26;52;44", "wc_limitations": "7;7;1", "wc_review": "372;391;437", "wc_reply_reviewers": "0;17;49", "wc_reply_authors": "0;23;29", "reply_reviewers": "0;1;1", "reply_authors": "1;2;2", "rating_avg": [ 7.0, 0.816496580927726 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 83.66666666666667, 23.79542439676633 ], "wc_strengths_avg": [ 93.33333333333333, 14.055445761538676 ], "wc_weaknesses_avg": [ 177.33333333333334, 30.346151137976115 ], "wc_questions_avg": [ 40.666666666666664, 10.873004286866728 ], "wc_limitations_avg": [ 5.0, 2.8284271247461903 ], "wc_review_avg": [ 400.0, 27.28858125052797 ], "wc_reply_reviewers_avg": [ 22.0, 20.314198646923455 ], "wc_reply_authors_avg": [ 17.333333333333332, 12.498888839501783 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=997954290760039479&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": ";tongji.edu.cn;uestc.edu.cn;se.cuhk.edu.hk;ia.ac.cn;u.nus.edu;harvard.edu;hkust-gz.edu.cn;tongji.edu.cn;ustc.edu.cn", "author_num": 10, "aff_unique_index": "0;1;2;3;4;5;6;0;7", "aff_unique_norm": "Tongji University;University of Electronic Science and Technology of China;Chinese University of Hong Kong;Chinese Academy of Sciences;National University of Singapore;Harvard University;Hong Kong University of Science and Technology;University of Science and Technology of China", "aff_unique_dep": ";;;Institute of Automation;;;;", "aff_unique_url": "https://www.tongji.edu.cn;https://www.uestc.edu.cn;https://www.cuhk.edu.hk;http://www.ia.cas.cn;https://www.nus.edu.sg;https://www.harvard.edu;https://www.ust.hk;http://www.ustc.edu.cn", "aff_unique_abbr": "Tongji;UESTC;CUHK;CAS;NUS;Harvard;HKUST;USTC", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Hong Kong SAR;Guangzhou", "aff_country_unique_index": "0;0;0;0;1;2;0;0;0", "aff_country_unique": "China;Singapore;United States" }, { "title": "Abrupt Learning in Transformers: A Case Study on Matrix Completion", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95388", "id": "O9RZAEp34l", "proceeding": "", "pdf": "https://openreview.net/pdf?id=O9RZAEp34l", "openreview": "https://openreview.net/forum?id=O9RZAEp34l", "poster": "/media/PosterPDFs/NeurIPS%202024/95388.png?t=1733256426.226655", "project": "", "author_site": "Pulkit Gopalani, Ekdeep S Lubana, Wei Hu", "tldr": "", "abstract": "Recent analysis on the training dynamics of Transformers has unveiled an interesting characteristic: the training loss plateaus for a significant number of training steps, and then suddenly (and sharply) drops to near--optimal values. To understand this phenomenon in depth, we formulate the low-rank matrix completion problem as a masked language modeling (MLM) task, and show that it is possible to train a BERT model to solve this task to low error. Furthermore, the loss curve shows a plateau early in training followed by a sudden drop to near-optimal values, despite no changes in the training procedure or hyper-parameters. To gain interpretability insights into this sudden drop, we examine the model's predictions, attention heads, and hidden states before and after this transition. Concretely, we observe that (a) the model transitions from simply copying the masked input to accurately predicting the masked entries; (b) the attention heads transition to interpretable patterns relevant to the task; and (c) the embeddings and hidden states encode information relevant to the problem. We also analyze the training dynamics of individual model components to understand the sudden drop in loss.", "keywords": "Science of language models;matrix completion;BERT;phase transition;interpretability", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Pulkit Gopalani;Ekdeep Singh Lubana;Wei Hu", "authorids": "~Pulkit_Gopalani1;~Ekdeep_Singh_Lubana1;~Wei_Hu1", "gender": ";M;M", "homepage": ";https://ekdeepslubana.github.io/;https://weihu.me", "dblp": ";228/2683;", "google_scholar": ";https://scholar.google.co.in/citations?user=OP7S3vsAAAAJ;ZybgAqkAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Pulkit_Gopalani1;~Ekdeep_Singh_Lubana1;~Wei_Hu1", "aff": ";University of Michigan;University of Michigan - Ann Arbor", "aff_domain": ";umich.edu;umich.edu", "position": ";PhD student;Assistant Professor", "bibtex": "@inproceedings{\ngopalani2024abrupt,\ntitle={Abrupt Learning in Transformers: A Case Study on Matrix Completion},\nauthor={Pulkit Gopalani and Ekdeep Singh Lubana and Wei Hu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=O9RZAEp34l}\n}", "github": "", "reviewers": "Kxvs;eRgv;1DQa;LwY5;h7Ji", "pdf_size": 24738204, "rating": "4;5;6;6;7", "confidence": "4;3;3;3;4", "soundness": "3;3;3;3;4", "novelty": "2;2;3;2;3", "presentation": "4;3;4;2;4", "wc_summary": "93;70;76;150;84", "wc_strengths": "22;50;19;73;131", "wc_weaknesses": "71;67;56;71;227", "wc_questions": "56;116;5;85;63", "wc_limitations": "22;2;2;12;26", "wc_review": "264;305;158;391;531", "wc_reply_reviewers": "0;168;4;0;63", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;1;1;0;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.6, 1.0198039027185568 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.4, 0.8 ], "wc_summary_avg": [ 94.6, 28.758303148829903 ], "wc_strengths_avg": [ 59.0, 41.060930335295616 ], "wc_weaknesses_avg": [ 98.4, 64.53402203489257 ], "wc_questions_avg": [ 65.0, 36.56774535024001 ], "wc_limitations_avg": [ 12.8, 9.927738916792684 ], "wc_review_avg": [ 329.8, 125.4486349068813 ], "wc_reply_reviewers_avg": [ 47.0, 65.05997233322498 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.08006407690254366, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7357792208572977501&as_sdt=8000005&sciodt=0,19&hl=en", "gs_version_total": 3, "email": ";umich.edu;umich.edu", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "University of Michigan", "aff_unique_dep": "", "aff_unique_url": "https://www.umich.edu", "aff_unique_abbr": "UM", "aff_campus_unique_index": "1", "aff_campus_unique": ";Ann Arbor", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Barely Random Algorithms and Collective Metrical Task Systems", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95387", "id": "OAjHFvrTbq", "proceeding": "", "pdf": "https://openreview.net/pdf?id=OAjHFvrTbq", "openreview": "https://openreview.net/forum?id=OAjHFvrTbq", "poster": "/media/PosterPDFs/NeurIPS%202024/95387.png?t=1731689815.5999358", "project": "", "author_site": "Romain Cosson, Laurent Massouli\u00e9", "tldr": "", "abstract": "We consider metrical task systems on general metric spaces with $n$ points, and show that any fully randomized algorithm can be turned into a randomized algorithm that uses only $2\\log n$ random bits, and achieves the same competitive ratio up to a factor $2$. This provides the first order-optimal barely random algorithms for metrical task systems, i.e. which use a number of random bits that does not depend on the number of requests addressed to the system. We discuss implications on various aspects of online decision making such as: distributed systems, advice complexity and transaction costs, suggesting broad applicability. We put forward an equivalent view that we call collective metrical task systems where $k$ agents in a metrical task system team up, and suffer the average cost paid by each agent. Our results imply that such team can be $O(\\log^2 n)$-competitive as soon as $k\\geq n^2$. In comparison, a single agent is always $\\Omega(n)$-competitive.", "keywords": "online algorithms;learning-augmented algorithms;barely random;advice complexity;metrical task systems;competitive ratio;online learning;randomness;hysteresis;transaction costs.", "primary_area": "online_learning", "supplementary_material": "", "author": "Romain Cosson;Laurent Massouli\u00e9", "authorids": "~Romain_Cosson1;~Laurent_Massouli\u00e91", "gender": ";", "homepage": "https://romcos.github.io/;https://www.di.ens.fr/laurent.massoulie/", "dblp": "267/5645;58/4130", "google_scholar": ";https://scholar.google.fr/citations?user=TvVmLjUAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Romain_Cosson1;~Laurent_Massouli\u00e91", "aff": "Inria, Paris;INRIA", "aff_domain": "inria.fr;inria.fr", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\ncosson2024barely,\ntitle={Barely Random Algorithms and Collective Metrical Task Systems},\nauthor={Romain Cosson and Laurent Massouli{\\'e}},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=OAjHFvrTbq}\n}", "github": "", "reviewers": "1Eyx;Kc9y;stvh;DDub", "pdf_size": 326589, "rating": "6;6;7;8", "confidence": "3;3;4;5", "soundness": "4;4;3;4", "novelty": "3;3;3;4", "presentation": "4;3;3;4", "wc_summary": "199;132;96;218", "wc_strengths": "74;48;85;61", "wc_weaknesses": "36;63;20;14", "wc_questions": "114;29;77;1", "wc_limitations": "6;9;7;1", "wc_review": "429;281;285;295", "wc_reply_reviewers": "23;142;5;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 161.25, 49.39319285083725 ], "wc_strengths_avg": [ 67.0, 13.874436925511608 ], "wc_weaknesses_avg": [ 33.25, 18.965429075030176 ], "wc_questions_avg": [ 55.25, 43.464784596268274 ], "wc_limitations_avg": [ 5.75, 2.947456530637899 ], "wc_review_avg": [ 322.5, 61.69886546768911 ], "wc_reply_reviewers_avg": [ 42.5, 58.07968663827311 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:kF83AonVQyUJ:scholar.google.com/&scioq=Barely+Random+Algorithms+and+Collective+Metrical+Task+Systems&hl=en&as_sdt=0,5", "gs_version_total": 5, "email": "inria.fr;inria.fr", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "INRIA", "aff_unique_dep": "", "aff_unique_url": "https://www.inria.fr", "aff_unique_abbr": "Inria", "aff_campus_unique_index": "0", "aff_campus_unique": "Paris;", "aff_country_unique_index": "0;0", "aff_country_unique": "France" }, { "title": "Honor Among Bandits: No-Regret Learning for Online Fair Division", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95386", "id": "OCQbC0eDJJ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=OCQbC0eDJJ", "openreview": "https://openreview.net/forum?id=OCQbC0eDJJ", "poster": "/media/PosterPDFs/NeurIPS%202024/95386.png?t=1731447154.8007793", "project": "", "author_site": "Ariel Procaccia, Ben Schiffer, Shirley Zhang", "tldr": "", "abstract": "We consider the problem of online fair division of indivisible goods to players when there are a finite number of types of goods and player values are drawn from distributions with unknown means. Our goal is to maximize social welfare subject to allocating the goods fairly in expectation. When a player's value for an item is unknown at the time of allocation, we show that this problem reduces to a variant of (stochastic) multi-armed bandits, where there exists an arm for each player's value for each type of good. At each time step, we choose a distribution over arms which determines how the next item is allocated. We consider two sets of fairness constraints for this problem: envy-freeness in expectation and proportionality in expectation. Our main result is the design of an explore-then-commit algorithm that achieves $\\tilde{O}(T^{2/3})$ regret while maintaining either fairness constraint. This result relies on unique properties fundamental to fair-division constraints that allow faster rates of learning, despite the restricted action space.", "keywords": "Fair Division;Multi-Armed Bandits", "primary_area": "algorithmic_game_theory", "supplementary_material": "", "author": "Ariel D. Procaccia;Benjamin Schiffer;Shirley Zhang", "authorids": "~Ariel_D._Procaccia1;~Benjamin_Schiffer1;~Shirley_Zhang1", "gender": ";;", "homepage": ";;", "dblp": ";238/1246;", "google_scholar": ";;", "orcid": ";;", "linkedin": ";benjamin-s-387965148;", "or_profile": "~Ariel_D._Procaccia1;~Benjamin_Schiffer1;~Shirley_Zhang1", "aff": ";Harvard University;", "aff_domain": ";g.harvard.edu;", "position": ";PhD student;", "bibtex": "@inproceedings{\nprocaccia2024honor,\ntitle={Honor Among Bandits: No-Regret Learning for Online Fair Division},\nauthor={Ariel D. Procaccia and Benjamin Schiffer and Shirley Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=OCQbC0eDJJ}\n}", "github": "", "reviewers": "V1uC;Mvzr;MmvH", "pdf_size": 499849, "rating": "7;7;8", "confidence": "4;4;3", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;3;4", "wc_summary": "302;168;86", "wc_strengths": "100;61;19", "wc_weaknesses": "267;193;1", "wc_questions": "340;2;47", "wc_limitations": "34;1;1", "wc_review": "1043;425;154", "wc_reply_reviewers": "182;156;0", "wc_reply_authors": "160;60;0", "reply_reviewers": "2;1;0", "reply_authors": "2;2;1", "rating_avg": [ 7.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 185.33333333333334, 89.02933349308094 ], "wc_strengths_avg": [ 60.0, 33.075670817082454 ], "wc_weaknesses_avg": [ 153.66666666666666, 112.09916245102916 ], "wc_questions_avg": [ 129.66666666666666, 149.85845173214474 ], "wc_limitations_avg": [ 12.0, 15.556349186104045 ], "wc_review_avg": [ 540.6666666666666, 372.03434727932785 ], "wc_reply_reviewers_avg": [ 112.66666666666667, 80.3713602942961 ], "wc_reply_authors_avg": [ 73.33333333333333, 65.99663291074444 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.9999999999999997, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6019557323044450117&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": ";g.harvard.edu;", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "Harvard University", "aff_unique_dep": "", "aff_unique_url": "https://www.harvard.edu", "aff_unique_abbr": "Harvard", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Mining and Transferring Feature-Geometry Coherence for Unsupervised Point Cloud Registration", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95385", "id": "OCcfKzXded", "proceeding": "", "pdf": "https://openreview.net/pdf?id=OCcfKzXded", "openreview": "https://openreview.net/forum?id=OCcfKzXded", "poster": "/media/PosterPDFs/NeurIPS%202024/95385.png?t=1730446203.5882936", "project": "", "author_site": "KeZheng Xiong, Haoen Xiang, Qingshan Xu, Chenglu Wen, Siqi Shen, Jonathan Jun LI, Cheng Wang", "tldr": "", "abstract": "Point cloud registration, a fundamental task in 3D vision, has achieved remark\u0002able success with learning-based methods in outdoor environments. Unsupervised outdoor point cloud registration methods have recently emerged to circumvent the need for costly pose annotations. However, they fail to establish reliable optimization objectives for unsupervised training, either relying on overly strong geometric assumptions, or suffering from poor-quality pseudo-labels due to inadequate integration of low-level geometric and high-level contextual information. We have observed that in the feature space, latent new inlier correspondences tend to cluster\naround respective positive anchors that summarize features of existing inliers. Motivated by this observation, we propose a novel unsupervised registration method termed INTEGER to incorporate high-level contextual information for reliable pseudo-label mining. Specifically, we propose the Feature-Geometry Coherence Mining module to dynamically adapt the teacher for each mini-batch of data during training and discover reliable pseudo-labels by considering both high-level feature representations and low-level geometric cues. Furthermore, we propose Anchor-Based Contrastive Learning to facilitate contrastive learning with anchors for a robust feature space. Lastly, we introduce a Mixed-Density Student to learn density-invariant features, addressing challenges related to density variation and low overlap in the outdoor scenario. Extensive experiments on KITTI and nuScenes datasets demonstrate that our INTEGER achieves competitive performance in terms of accuracy and generalizability.", "keywords": "Unsupervised Point Cloud Registration;Deep Learning;Computer Vision", "primary_area": "machine_vision", "supplementary_material": "", "author": "KeZheng Xiong;Haoen Xiang;Qingshan Xu;Chenglu Wen;Siqi Shen;Jonathan Li;Cheng Wang", "authorids": "~KeZheng_Xiong1;~Haoen_Xiang1;~Qingshan_Xu1;~Chenglu_Wen1;~Siqi_Shen5;~Jonathan_Li2;~Cheng_Wang2", "gender": "M;M;;;M;M;M", "homepage": ";https://github.com/a7f4123;;;https://uwaterloo.ca/geospatial-intelligence/profiles/jonathan-li;https://chwang.xmu.edu.cn/index_en.htm;https://asc.xmu.edu.cn/t/shensiqi", "dblp": "364/5890;;32/9530-1;140/4398;85/6906-1;54/2062-3;37/8026", "google_scholar": "https://scholar.google.com/citations?hl=en;;k96kDhsAAAAJ;;https://scholar.google.ca/citations?user=7hyYxRkAAAAJ;https://scholar.google.com/citations?hl=en;gFKYanAAAAAJ", "orcid": ";;;;;0000-0001-6075-796X;", "linkedin": "%E6%81%AA%E5%B3%A5-%E7%86%8A-642236118;;;;;;", "or_profile": "~KeZheng_Xiong1;~Haoen_Xiang1;~Qingshan_Xu1;~Chenglu_Wen1;~Jonathan_Li2;~Cheng_Wang2;~Siqi_SHEN2", "aff": "Xiamen University;Xiamen University;Nanyang Technological University;Xiamen University;University of Waterloo;Xiamen University;Xiamen University", "aff_domain": "xmu.edu.cn;xmu.edu;ntu.edu.sg;xmu.edu.cn;uwaterloo.ca;xmu.edu.cn;xmu.edu.cn", "position": "Undergrad student;MS student;Research Fellow;Full Professor;Full Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nxiong2024mining,\ntitle={Mining and Transferring Feature-Geometry Coherence for Unsupervised Point Cloud Registration},\nauthor={KeZheng Xiong and Haoen Xiang and Qingshan Xu and Chenglu Wen and Siqi Shen and Jonathan Li and Cheng Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=OCcfKzXded}\n}", "github": "", "reviewers": "SEpR;2VKD;Bwh6;FNCk", "pdf_size": 6839234, "rating": "4;5;5;6", "confidence": "5;4;4;2", "soundness": "2;2;3;3", "novelty": "3;2;3;3", "presentation": "2;3;3;3", "wc_summary": "70;114;79;48", "wc_strengths": "136;146;47;71", "wc_weaknesses": "76;266;48;45", "wc_questions": "408;132;66;65", "wc_limitations": "6;99;4;7", "wc_review": "696;757;244;236", "wc_reply_reviewers": "325;84;13;11", "wc_reply_authors": "889;171;96;88", "reply_reviewers": "3;1;1;1", "reply_authors": "6;3;3;3", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 77.75, 23.7736724129866 ], "wc_strengths_avg": [ 100.0, 42.017853348308975 ], "wc_weaknesses_avg": [ 108.75, 91.5897783598148 ], "wc_questions_avg": [ 167.75, 141.34067885785748 ], "wc_limitations_avg": [ 29.0, 40.428950023467095 ], "wc_review_avg": [ 483.25, 244.22057141035438 ], "wc_reply_reviewers_avg": [ 108.25, 128.54838583195044 ], "wc_reply_authors_avg": [ 311.0, 335.27526004762115 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 3.75, 1.299038105676658 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.9733285267845752, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=78835377851407752&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 4, "email": "xmu.edu.cn;xmu.edu;ntu.edu.sg;xmu.edu.cn;uwaterloo.ca;xmu.edu.cn;xmu.edu.cn", "author_num": 7, "aff_unique_index": "0;0;1;0;2;0;0", "aff_unique_norm": "Xiamen University;Nanyang Technological University;University of Waterloo", "aff_unique_dep": ";;", "aff_unique_url": "https://www.xmu.edu.cn;https://www.ntu.edu.sg;https://uwaterloo.ca", "aff_unique_abbr": "XMU;NTU;UW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;2;0;0", "aff_country_unique": "China;Singapore;Canada" }, { "id": "OCrxDanhoO", "title": "BIGOS V2 Benchmark for Polish ASR: Curated Datasets and Tools for Reproducible Evaluation", "track": "Datasets & Benchmarks", "status": "Poster", "tldr": "", "abstract": "Speech datasets available in the public domain are often underutilized because of challenges in accessibility and interoperability. To address this, a system to survey, catalog, and curate existing speech datasets was developed, enabling reproducible evaluation of automatic speech recognition (ASR) systems. The system was applied to curate over 24 datasets and evaluate 25 ASR models, with a specific focus on Polish. This research represents the most extensive comparison to date of commercial and free ASR systems for the Polish language, drawing insights from 600 system-model-test set evaluations across 8 analysis scenarios. Curated datasets and benchmark results are available publicly. The evaluation tools are open-sourced to support reproducibility of the benchmark, encourage community-driven improvements, and facilitate adaptation for other languages.", "keywords": "speech dataset;automatic speech recognition;evaluation;benchmark;bigos;polish;asr", "primary_area": "", "supplementary_material": "", "author": "Micha\u0142 Junczyk", "authorids": "~Micha\u0142_Junczyk1", "gender": "M", "homepage": "", "dblp": "", "google_scholar": "", "orcid": "0000-0002-7545-5152", "linkedin": "", "or_profile": "~Micha\u0142_Junczyk1", "aff": "Adam Mickiewicz University of Poznan", "aff_domain": "amu.edu.pl", "position": "PhD student", "bibtex": "@inproceedings{\njunczyk2024bigos,\ntitle={{BIGOS} Benchmark for Polish {ASR}: Curated Datasets and Tools for Reproducible Evaluation},\nauthor={Micha{\\l} Junczyk},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=OCrxDanhoO}\n}", "github": "", "project": "", "reviewers": "B4Cv;j19w;h4WC;VBUM", "site": "https://openreview.net/forum?id=OCrxDanhoO", "pdf_size": 2155152, "rating": "5;7;7;7", "confidence": "5;3;4;4", "wc_summary_and_contributions": "39;78;26;107", "wc_strengths": "2;5;25;115", "wc_improvement": "120;156;48;136", "wc_limitations": "2;1;81;112", "wc_correctness": "2;1;55;105", "wc_clarity": "2;6;69;37", "wc_relation_to_prior_work": "2;5;19;78", "wc_documentation": "2;14;13;128", "wc_additional_feedback": "1;1;1;1", "wc_review": "172;267;337;819", "wc_reply_reviewers": "21;36;0;0", "wc_reply_authors": "548;371;262;300", "reply_reviewers": "1;1;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 62.5, 32.035136959282696 ], "wc_strengths_avg": [ 36.75, 46.03463370116026 ], "wc_improvement_avg": [ 115.0, 40.7308237088326 ], "wc_limitations_avg": [ 49.0, 48.74935897014442 ], "wc_correctness_avg": [ 40.75, 43.04866432306582 ], "wc_clarity_avg": [ 28.5, 27.02313823374332 ], "wc_relation_to_prior_work_avg": [ 26.0, 30.700162866017504 ], "wc_documentation_avg": [ 39.25, 51.45568481713172 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 398.75, 249.59805187541028 ], "wc_reply_reviewers_avg": [ 14.25, 15.20485119953497 ], "wc_reply_authors_avg": [ 370.25, 109.82799051243722 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:_rFXLkC__uQJ:scholar.google.com/&scioq=BIGOS+V2+Benchmark+for+Polish+ASR:+Curated+Datasets+and+Tools+for+Reproducible+Evaluation&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "Adam Mickiewicz University", "aff_unique_dep": "", "aff_unique_url": "https://www.amu.edu.pl", "aff_unique_abbr": "AMU", "aff_campus_unique_index": "0", "aff_campus_unique": "Poznan", "aff_country_unique_index": "0", "aff_country_unique": "Poland" }, { "title": "M$^3$GPT: An Advanced Multimodal, Multitask Framework for Motion Comprehension and Generation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95384", "id": "ODbTlAs0Oj", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ODbTlAs0Oj", "openreview": "https://openreview.net/forum?id=ODbTlAs0Oj", "poster": "/media/PosterPDFs/NeurIPS%202024/95384.png?t=1730214920.422947", "project": "", "author_site": "Mingshuang Luo, RuiBing Hou, Zhuo Li, Hong Chang, Zimo Liu, Yaowei Wang, Shiguang Shan", "tldr": "", "abstract": "This paper presents M$^3$GPT, an advanced $\\textbf{M}$ultimodal, $\\textbf{M}$ultitask framework for $\\textbf{M}$otion comprehension and generation. M$^3$GPT operates on three fundamental principles. The first focuses on creating a unified representation space for various motion-relevant modalities. We employ discrete vector quantization for multimodal conditional signals, such as text, music and motion/dance, enabling seamless integration into a large language model (LLM) with a single vocabulary.\nThe second involves modeling motion generation directly in the raw motion space. This strategy circumvents the information loss associated with a discrete tokenizer, resulting in more detailed and comprehensive motion generation. \nThird, M$^3$GPT learns to model the connections and synergies among various motion-relevant tasks. Text, the most familiar and well-understood modality for LLMs, is utilized as a bridge to establish connections between different motion tasks, facilitating mutual \nreinforcement. To our knowledge, M$^3$GPT is the first model capable of comprehending and generating motions based on multiple signals.\nExtensive experiments highlight M$^3$GPT's superior performance across various motion-relevant tasks and its powerful zero-shot generalization capabilities for extremely challenging tasks. Project page: \\url{https://github.com/luomingshuang/M3GPT}.", "keywords": "motion comprehension; motion generation; multitask; multimodal; large language model", "primary_area": "machine_vision", "supplementary_material": "/attachment/d6b69d92318d6dcde03063b7944aa6c6d80b68d3.zip", "author": "Mingshuang Luo;RuiBing Hou;Zhuo Li;Hong Chang;Zimo Liu;Yaowei Wang;Shiguang Shan", "authorids": "~Mingshuang_Luo1;~RuiBing_Hou1;~Zhuo_Li3;~Hong_Chang1;~Zimo_Liu1;~Yaowei_Wang1;~Shiguang_Shan2", "gender": "M;F;M;F;;M;M", "homepage": "https://github.com/luomingshuang;https://scholar.google.com.tw/citations?hl=zh-CN&view_op=list_works&gmla=AJsN-F4WR9JNhX176XHhFNyY-uOoxAkSKlLh3wyOGdmZz2O2DsHREcsE__DkYGPfqQhe6mOj1V68pipS94MgujusUy5PRDZ1ZgpejCjdktRkQtfpffMKWW0&user=VfS4cisAAAAJ;https://interestingzhuo.github.io/;;;https://dblp.org/pid/68/2992.html;http://vipl.ict.ac.cn/people/sgshan/", "dblp": "260/0750;;;;211/7223;68/2992-1;s/ShiguangShan", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;DgWtX4AAAAAJ;LX6MnNsAAAAJ;qj2tU6oAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.tw/citations?user=Vkzd7MIAAAAJ", "orcid": ";;;;;0000-0002-6110-4036;0000-0002-8348-392X", "linkedin": ";;;;;yaowei-wang-971ab310/;", "or_profile": "~Mingshuang_Luo1;~RuiBing_Hou1;~Zhuo_Li3;~Hong_Chang1;~Zimo_Liu1;~Yaowei_Wang1;~Shiguang_Shan2", "aff": "Chinese Academy of Sciences; Chinese Academy of Sciences;Wechat AI;Institute of Computing Technology, Chinese Academy of Sciences;PengCheng Lab;Pengcheng Laboratory;Institute of Computing Technology, Chinese Academy of Sciences", "aff_domain": "ict.ac.cn;ict.ac.cn;tencent.com;ict.ac.cn;pcl.ac.cn;pcl.ac.cn;ict.ac.cn", "position": "PhD student;Assistant Professor;Researcher;Full Professor;Postdoc;Full Professor;Full Professor", "bibtex": "@inproceedings{\nluo2024mgpt,\ntitle={M\\${\\textasciicircum}3\\${GPT}: An Advanced Multimodal, Multitask Framework for Motion Comprehension and Generation},\nauthor={Mingshuang Luo and RuiBing Hou and Zhuo Li and Hong Chang and Zimo Liu and Yaowei Wang and Shiguang Shan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ODbTlAs0Oj}\n}", "github": "", "reviewers": "2Gpw;Wkeh;ZwnC;KMVC", "pdf_size": 12096235, "rating": "5;5;5;6", "confidence": "4;5;4;4", "soundness": "3;3;3;3", "novelty": "2;3;2;3", "presentation": "2;3;3;3", "wc_summary": "130;74;120;72", "wc_strengths": "61;36;49;108", "wc_weaknesses": "105;91;250;78", "wc_questions": "33;203;116;70", "wc_limitations": "19;1;5;4", "wc_review": "348;405;540;332", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "29;29;29;29", "reply_reviewers": "0;0;0;0", "reply_authors": "2;2;2;2", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 99.0, 26.248809496813376 ], "wc_strengths_avg": [ 63.5, 27.17075633838705 ], "wc_weaknesses_avg": [ 131.0, 69.364976753402 ], "wc_questions_avg": [ 105.5, 63.50787352761861 ], "wc_limitations_avg": [ 7.25, 6.94172168845741 ], "wc_review_avg": [ 406.25, 81.84856443457026 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 29.0, 0.0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1867883877313514979&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 4, "email": "ict.ac.cn;ict.ac.cn;tencent.com;ict.ac.cn;pcl.ac.cn;pcl.ac.cn;ict.ac.cn", "author_num": 7, "aff_unique_index": "0;0;1;0;2;3;0", "aff_unique_norm": "Chinese Academy of Sciences;WeChat;Pengcheng Lab;Pengcheng Laboratory", "aff_unique_dep": ";WeChat AI;;", "aff_unique_url": "https://www.cas.cn;https://www.wechat.com;;", "aff_unique_abbr": "CAS;WeChat AI;;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Towards Stable Representations for Protein Interface Prediction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95383", "id": "OEWBkLrRZu", "proceeding": "", "pdf": "https://openreview.net/pdf?id=OEWBkLrRZu", "openreview": "https://openreview.net/forum?id=OEWBkLrRZu", "poster": "", "project": "", "author_site": "Ziqi Gao, Zijing Liu, Yu Li, Jia Li", "tldr": "", "abstract": "The knowledge of protein interactions is crucial but challenging for drug discovery applications. This work focuses on protein interface prediction, which aims to determine whether a pair of residues from different proteins interact. Existing data-driven methods have made significant progress in effectively learning protein structures. Nevertheless, they overlook the conformational changes (i.e., flexibility) within proteins upon binding, leading to poor generalization ability. In this paper, we regard the protein flexibility as an attack on the trained model and aim to defend against it for improved generalization. To fulfill this purpose, we propose ATProt, an adversarial training framework for protein representations to robustly defend against the attack of protein flexibility. ATProt can theoretically guarantee protein representation stability under complicated protein flexibility. Experiments on various benchmarks demonstrate that ATProt consistently improves the performance for protein interface prediction. Moreover, our method demonstrates broad applicability, performing the best even when provided with testing structures from structure prediction models like ESMFold and AlphaFold2.", "keywords": "protein interface;graph learning;adversarial training;stable representation", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Ziqi Gao;Zijing Liu;Yu Li;Jia Li", "authorids": "~Ziqi_Gao1;~Zijing_Liu1;~Yu_Li4;~Jia_Li4", "gender": ";;M;M", "homepage": ";https://github.com/zj-liu;https://yu-li.github.io/;https://sites.google.com/view/lijia", "dblp": ";205/3211;34/2997-3;23/6950-9", "google_scholar": "https://scholar.google.com.hk/citations?user=UHwNFy8AAAAJ;;j9lwU7kAAAAJ;1gSbcYoAAAAJ", "orcid": ";0000-0002-0189-7409;;0000-0002-6362-4385", "linkedin": ";;;", "or_profile": "~Ziqi_Gao1;~Zijing_Liu1;~Yu_Li4;~Jia_Li4", "aff": "Hong Kong University of Science and Technology;International Digital Economy Academy;International Digital Economy Academy;Hong Kong University of Science and Technology (Guangzhou)", "aff_domain": "ust.hk;idea.edu.cn;idea.edu.cn;ust.hk", "position": "PhD student;Researcher;Principal Researcher;Assistant Professor", "bibtex": "@inproceedings{\ngao2024towards,\ntitle={Towards Stable Representations for Protein Interface Prediction},\nauthor={Ziqi Gao and Zijing Liu and Yu Li and Jia Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=OEWBkLrRZu}\n}", "github": "", "reviewers": "RNrt;kBXn;RLcQ", "pdf_size": 2855195, "rating": "6;6;7", "confidence": "5;4;4", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "4;2;2", "wc_summary": "113;87;40", "wc_strengths": "122;76;31", "wc_weaknesses": "487;77;67", "wc_questions": "228;101;1", "wc_limitations": "5;21;54", "wc_review": "955;362;193", "wc_reply_reviewers": "251;0;88", "wc_reply_authors": "465;0;0", "reply_reviewers": "1;0;3", "reply_authors": "2;1;1", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.9428090415820634 ], "wc_summary_avg": [ 80.0, 30.21037349432586 ], "wc_strengths_avg": [ 76.33333333333333, 37.15134213217905 ], "wc_weaknesses_avg": [ 210.33333333333334, 195.6754682858556 ], "wc_questions_avg": [ 110.0, 92.89061667717932 ], "wc_limitations_avg": [ 26.666666666666668, 20.401524997465806 ], "wc_review_avg": [ 503.3333333333333, 326.7438684284 ], "wc_reply_reviewers_avg": [ 113.0, 103.98397312406689 ], "wc_reply_authors_avg": [ 155.0, 219.20310216782974 ], "reply_reviewers_avg": [ 1.3333333333333333, 1.247219128924647 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3101026364504723272&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "ust.hk;idea.edu.cn;idea.edu.cn;ust.hk", "author_num": 4, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Hong Kong University of Science and Technology;International Digital Economy Academy", "aff_unique_dep": ";", "aff_unique_url": "https://www.ust.hk;", "aff_unique_abbr": "HKUST;", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0", "aff_country_unique": "China;" }, { "title": "Scalable Bayesian Optimization via Focalized Sparse Gaussian Processes", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95382", "id": "OF0YsxoRai", "proceeding": "", "pdf": "https://openreview.net/pdf?id=OF0YsxoRai", "openreview": "https://openreview.net/forum?id=OF0YsxoRai", "poster": "/media/PosterPDFs/NeurIPS%202024/95382.png?t=1731652326.8880463", "project": "", "author_site": "Yunyue Wei, Vincent Zhuang, Saraswati Soedarmadji, Yanan Sui", "tldr": "", "abstract": "Bayesian optimization is an effective technique for black-box optimization, but its applicability is typically limited to low-dimensional and small-budget problems due to the cubic complexity of computing the Gaussian process (GP) surrogate. While various approximate GP models have been employed to scale Bayesian optimization to larger sample sizes, most suffer from overly-smooth estimation and focus primarily on problems that allow for large online samples. In this work, we argue that Bayesian optimization algorithms with sparse GPs can more efficiently allocate their representational power to relevant regions of the search space. To achieve this, we propose focalized GP, which leverages a novel variational loss function to achieve stronger local prediction, as well as FocalBO, which hierarchically optimizes the focalized GP acquisition function over progressively smaller search spaces. Experimental results demonstrate that FocalBO can efficiently leverage large amounts of offline and online data to achieve state-of-the-art performance on robot morphology design and to control a 585-dimensional musculoskeletal system.", "keywords": "Scalable Bayesian optimization;Sparse Gaussian process", "primary_area": "probabilistic_methods", "supplementary_material": "/attachment/4b477203e0e6585d567febc7a54380d36afb262b.zip", "author": "Yunyue Wei;Vincent Zhuang;Saraswati Soedarmadji;Yanan Sui", "authorids": "~Yunyue_Wei1;~Vincent_Zhuang2;~Saraswati_Soedarmadji1;~Yanan_Sui1", "gender": "M;M;;M", "homepage": "https://github.com/yunyuewei;https://github.com/vzhuang;https://www.caltech.edu;https://www.yanansui.com", "dblp": ";;;151/6934", "google_scholar": ";rxYjVngAAAAJ;;https://scholar.google.com/citations?hl=en", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Yunyue_Wei1;~Vincent_Zhuang2;~Saraswati_Soedarmadji1;~Yanan_Sui1", "aff": "Tsinghua University;Google DeepMind;California Institute of Technology;Tsinghua University", "aff_domain": "tsinghua.edu.cn;google.com;caltech.edu;tsinghua.edu.cn", "position": "PhD student;Research Engineer;Undergrad student;Associate Professor", "bibtex": "@inproceedings{\nwei2024scalable,\ntitle={Scalable Bayesian Optimization via Focalized Sparse Gaussian Processes},\nauthor={Yunyue Wei and Vincent Zhuang and Saraswati Soedarmadji and Yanan Sui},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=OF0YsxoRai}\n}", "github": "", "reviewers": "SUei;BXny;f3AD", "pdf_size": 4546816, "rating": "4;7;7", "confidence": "4;3;4", "soundness": "3;3;4", "novelty": "2;3;3", "presentation": "3;3;3", "wc_summary": "60;83;94", "wc_strengths": "62;43;55", "wc_weaknesses": "269;106;9", "wc_questions": "221;83;119", "wc_limitations": "180;16;22", "wc_review": "792;331;299", "wc_reply_reviewers": "720;109;50", "wc_reply_authors": "1407;37;432", "reply_reviewers": "1;1;1", "reply_authors": "6;2;2", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 79.0, 14.165686240583852 ], "wc_strengths_avg": [ 53.333333333333336, 7.845734863959881 ], "wc_weaknesses_avg": [ 128.0, 107.27845387899038 ], "wc_questions_avg": [ 141.0, 58.44655678480983 ], "wc_limitations_avg": [ 72.66666666666667, 75.93564526770167 ], "wc_review_avg": [ 474.0, 225.23913218325688 ], "wc_reply_reviewers_avg": [ 293.0, 302.89382077993383 ], "wc_reply_authors_avg": [ 625.3333333333334, 575.765191337194 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.3333333333333335, 1.8856180831641267 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:safORcU8KMAJ:scholar.google.com/&scioq=Scalable+Bayesian+Optimization+via+Focalized+Sparse+Gaussian+Processes&hl=en&as_sdt=0,44", "gs_version_total": 3, "email": "tsinghua.edu.cn;google.com;caltech.edu;tsinghua.edu.cn", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Tsinghua University;Google;California Institute of Technology", "aff_unique_dep": ";Google DeepMind;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://deepmind.com;https://www.caltech.edu", "aff_unique_abbr": "THU;DeepMind;Caltech", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pasadena", "aff_country_unique_index": "0;1;2;0", "aff_country_unique": "China;United Kingdom;United States" }, { "title": "Deterministic Uncertainty Propagation for Improved Model-Based Offline Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95381", "id": "OFmclNhp0y", "proceeding": "", "pdf": "https://openreview.net/pdf?id=OFmclNhp0y", "openreview": "https://openreview.net/forum?id=OFmclNhp0y", "poster": "/media/PosterPDFs/NeurIPS%202024/95381.png?t=1731080238.7283041", "project": "", "author_site": "Abdullah Akg\u00fcl, Manuel Haussmann, Melih Kandemir", "tldr": "", "abstract": "Current approaches to model-based offline reinforcement learning often incorporate uncertainty-based reward penalization to address the distributional shift problem. These approaches, commonly known as pessimistic value iteration, use Monte Carlo sampling to estimate the Bellman target to perform temporal difference-based policy evaluation. We find out that the randomness caused by this sampling step significantly delays convergence. We present a theoretical result demonstrating the strong dependency of suboptimality on the number of Monte Carlo samples taken per Bellman target calculation. Our main contribution is a deterministic approximation to the Bellman target that uses progressive moment matching, a method developed originally for deterministic variational inference. The resulting algorithm, which we call Moment Matching Offline Model-Based Policy Optimization (MOMBO), propagates the uncertainty of the next state through a nonlinear Q-network in a deterministic fashion by approximating the distributions of hidden layer activations by a normal distribution. We show that it is possible to provide tighter guarantees for the suboptimality of MOMBO than the existing Monte Carlo sampling approaches. We also observe MOMBO to converge faster than these approaches in a large set of benchmark tasks.", "keywords": "offline reinforcement learning;offline model-based reinforcement learning;uncertainty propagation;moment matching", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/e7ae81b09cf0c342b8c29fb1cd13f55a390be138.zip", "author": "Abdullah Akg\u00fcl;Manuel Haussmann;Melih Kandemir", "authorids": "~Abdullah_Akg\u00fcl1;~Manuel_Haussmann1;~Melih_Kandemir1", "gender": "M;;M", "homepage": "https://aportekila.github.io/;https://manuelhaussmann.github.io/;https://imada.sdu.dk/~kandemir/", "dblp": "294/5457;198/2433;95/7056", "google_scholar": "FZeaKPoAAAAJ;https://scholar.google.com/citations?hl=de;Jxm1UeYAAAAJ", "orcid": "0000-0002-0489-9493;;0000-0001-6293-3656", "linkedin": "abdullahakgul70/;;melih-kandemir-64681a16/", "or_profile": "~Abdullah_Akg\u00fcl1;~Manuel_Haussmann1;~Melih_Kandemir1", "aff": "University of Southern Denmark - SDU;University of Southern Denmark - SDU;University of Southern Denmark", "aff_domain": "sdu.dk;sdu.dk;sdu.dk", "position": "PhD student;Postdoc;Associate Professor", "bibtex": "@inproceedings{\nakg{\\\"u}l2024deterministic,\ntitle={Deterministic Uncertainty Propagation for Improved Model-Based Offline Reinforcement Learning},\nauthor={Abdullah Akg{\\\"u}l and Manuel Haussmann and Melih Kandemir},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=OFmclNhp0y}\n}", "github": "", "reviewers": "igqb;czcW;BmgH;3Ppu", "pdf_size": 887238, "rating": "3;5;6;7", "confidence": "4;2;4;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;2;3;3", "wc_summary": "68;29;52;111", "wc_strengths": "28;20;31;100", "wc_weaknesses": "283;23;234;81", "wc_questions": "2;81;73;1", "wc_limitations": "1;1;13;1", "wc_review": "382;154;403;294", "wc_reply_reviewers": "969;61;17;13", "wc_reply_authors": "1800;299;0;0", "reply_reviewers": "3;2;1;1", "reply_authors": "5;2;1;1", "rating_avg": [ 5.25, 1.479019945774904 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 65.0, 29.958304357890484 ], "wc_strengths_avg": [ 44.75, 32.151010870577615 ], "wc_weaknesses_avg": [ 155.25, 106.68264854230044 ], "wc_questions_avg": [ 39.25, 37.85746293665227 ], "wc_limitations_avg": [ 4.0, 5.196152422706632 ], "wc_review_avg": [ 308.25, 97.99585450415746 ], "wc_reply_reviewers_avg": [ 265.0, 406.89064870060605 ], "wc_reply_authors_avg": [ 524.75, 746.3160774765608 ], "reply_reviewers_avg": [ 1.75, 0.82915619758885 ], "reply_authors_avg": [ 2.25, 1.6393596310755 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.09759000729485331, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18180305328186391463&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 5, "email": "sdu.dk;sdu.dk;sdu.dk", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Southern Denmark", "aff_unique_dep": "", "aff_unique_url": "https://www.sdu.dk", "aff_unique_abbr": "SDU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Denmark" }, { "id": "OGaZVSS0Cx", "title": "Mini-batch kernel $k$-means", "track": "main", "status": "Reject", "tldr": "", "abstract": "We present the first mini-batch kernel $k$-means algorithm. Our algorithm achieves an order of magnitude improvement in running time compared to the full batch algorithm, with only a minor negative effect on the quality of the solution. Specifically, a single iteration of our algorithm requires only $O(n(k+b))$ time, compared to $O(n^2)$ for the full batch kernel $k$-means, where $n$ is the size of the dataset and $b$ is the batch size. \n \n We provide a theoretical analysis for our algorithm with an early stopping condition and show that if the batch is of size $\\Omega((\\gamma / \\epsilon)^2\\log (n\\gamma/\\epsilon))$, the algorithm must terminate within $O(\\gamma^2/\\epsilon)$ iterations with high probability, where $\\gamma$ is the bound on the norm of points in the dataset in feature space,\nand $\\epsilon$ is a threshold parameter for termination. \nOur results hold for any reasonable initialization of centers. When the algorithm is initialized with the $k$-means++ initialization scheme, it\nachieves an approximation ratio of $O(\\log k)$.\n\nMany popular kernels are normalized (e.g., Gaussian, Laplacian), which implies $\\gamma=1$. For these kernels, taking $\\epsilon$ to be a constant and $b=\\Theta(\\log n)$, our algorithm terminates within $O(1)$ iterations where each iteration takes time $O(n(\\log n+k))$.", "keywords": "kernel k-means;k-means;mini-batch;clustering", "primary_area": "other", "supplementary_material": "/attachment/a7a7c8ac30ed82ea52b0a34317e7d1eb630b89fd.zip", "author": "Ben Jourdan;Gregory Schwartzman", "authorids": "~Ben_Jourdan1;~Gregory_Schwartzman1", "gender": "M;", "homepage": "https://benjourdan.github.io/website/;https://sites.google.com/view/gregoryschwartzman/", "dblp": ";176/5322.html", "google_scholar": "RZLKd2cAAAAJ;", "orcid": "0000-0001-9831-0098;", "linkedin": ";", "or_profile": "~Ben_Jourdan1;~Gregory_Schwartzman1", "aff": "Edinburgh University, University of Edinburgh;Japan Advanced Institute of Science and Technology", "aff_domain": "inf.ed.ac.uk;jaist.ac.jp", "position": "PhD student;Associate Professor", "bibtex": "@misc{\nanonymous2024minibatch,\ntitle={Mini-batch kernel \\$k\\$-means},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=OGaZVSS0Cx}\n}", "github": "", "project": "", "reviewers": "AkgB;gVj8;hmC1;mEhQ", "site": "https://openreview.net/forum?id=OGaZVSS0Cx", "pdf_size": 1082673, "rating": "4;5;5;6", "confidence": "2;4;3;4", "soundness": "2;3;3;3", "novelty": "2;2;2;2", "presentation": "3;2;3;3", "wc_summary": "83;116;171;201", "wc_strengths": "87;34;43;61", "wc_weaknesses": "132;85;14;120", "wc_questions": "3;327;1;98", "wc_limitations": "1;22;1;24", "wc_review": "306;584;230;504", "wc_reply_reviewers": "0;0;20;10", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 142.75, 46.03463370116026 ], "wc_strengths_avg": [ 56.25, 20.24073862288627 ], "wc_weaknesses_avg": [ 87.75, 45.94766044098437 ], "wc_questions_avg": [ 107.25, 132.79001280216823 ], "wc_limitations_avg": [ 12.0, 11.022703842524301 ], "wc_review_avg": [ 406.0, 143.40850741849314 ], "wc_reply_reviewers_avg": [ 7.5, 8.2915619758885 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.8528028654224418, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:K_Ezmc24rc4J:scholar.google.com/&scioq=Mini-batch+kernel+%24k%24-means&hl=en&as_sdt=0,33", "gs_version_total": 3, "aff_unique_index": "0;1", "aff_unique_norm": "University of Edinburgh;Japan Advanced Institute of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.ed.ac.uk;https://www.jaist.ac.jp", "aff_unique_abbr": "Edinburgh;JAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United Kingdom;Japan" }, { "title": "Identifying Causal Effects Under Functional Dependencies", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95380", "id": "OIsUWQSvkD", "proceeding": "", "pdf": "https://openreview.net/pdf?id=OIsUWQSvkD", "openreview": "https://openreview.net/forum?id=OIsUWQSvkD", "poster": "/media/PosterPDFs/NeurIPS%202024/95380.png?t=1733524086.960242", "project": "", "author_site": "Yizuo Chen, Adnan Darwiche", "tldr": "", "abstract": "We study the identification of causal effects, motivated by two improvements to identifiability which can be attained if one knows that some variables in a causal graph are functionally determined by their parents (without needing to know the specific functions). First, an unidentifiable causal effect may become identifiable when certain variables are functional. Second, certain functional variables can be excluded from being observed without affecting the identifiability of a causal effect, which may significantly reduce the number of needed variables in observational data. Our results are largely based on an elimination procedure which removes functional variables from a causal graph while preserving key properties in the resulting causal graph, including the identifiability of causal effects.", "keywords": "Identifiability;Causal Effects;Functional Dependencies", "primary_area": "causal_inference", "supplementary_material": "", "author": "Yizuo Chen;Adnan Darwiche", "authorids": "~Yizuo_Chen1;~Adnan_Darwiche1", "gender": "M;", "homepage": "http://www.seas.ucla.edu/~yizuo;http://web.cs.ucla.edu/~darwiche/", "dblp": "294/7697;p/ADarwiche", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "~Yizuo_Chen1;~Adnan_Darwiche1", "aff": "University of California, Los Angeles;University of California, Los Angeles", "aff_domain": "ucla.edu;ucla.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nchen2024identifying,\ntitle={Identifying Causal Effects Under Functional Dependencies},\nauthor={Yizuo Chen and Adnan Darwiche},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=OIsUWQSvkD}\n}", "github": "", "reviewers": "3A71;uRPz;GMHe;bXg6", "pdf_size": 514743, "rating": "7;7;8;8", "confidence": "3;3;4;4", "soundness": "4;4;4;4", "novelty": "3;3;3;4", "presentation": "3;3;3;3", "wc_summary": "54;91;105;109", "wc_strengths": "61;27;54;195", "wc_weaknesses": "24;53;115;81", "wc_questions": "109;3;109;76", "wc_limitations": "1;4;52;15", "wc_review": "249;178;435;476", "wc_reply_reviewers": "22;35;28;12", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 4.0, 0.0 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 89.75, 21.695333599647643 ], "wc_strengths_avg": [ 84.25, 65.18962724237653 ], "wc_weaknesses_avg": [ 68.25, 33.6851228289285 ], "wc_questions_avg": [ 74.25, 43.28611209152423 ], "wc_limitations_avg": [ 18.0, 20.310096011589902 ], "wc_review_avg": [ 334.5, 124.42367138129303 ], "wc_reply_reviewers_avg": [ 24.25, 8.437268515343103 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8091380878004492400&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "email": "ucla.edu;ucla.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of California, Los Angeles", "aff_unique_dep": "", "aff_unique_url": "https://www.ucla.edu", "aff_unique_abbr": "UCLA", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Enhancing Zero-Shot Vision Models by Label-Free Prompt Distribution Learning and Bias Correcting", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95379", "id": "OJximyClit", "proceeding": "", "pdf": "https://openreview.net/pdf?id=OJximyClit", "openreview": "https://openreview.net/forum?id=OJximyClit", "poster": "/media/PosterPDFs/NeurIPS%202024/95379.png?t=1731657704.3942304", "project": "", "author_site": "Xingyu Zhu, Beier Zhu, Yi Tan, Shuo Wang, Yanbin Hao, Hanwang Zhang", "tldr": "", "abstract": "Vision-language models, such as CLIP, have shown impressive generalization capacities when using appropriate text descriptions. While optimizing prompts on downstream labeled data has proven effective in improving performance, these methods entail labor costs for annotations and are limited by their quality. Additionally, since CLIP is pre-trained on highly imbalanced Web-scale data, it suffers from inherent label bias that leads to suboptimal performance. \n To tackle the above challenges, we propose a label-**F**ree p**ro**mpt distribution **l**earning and b**i**as **c**orrection framework, dubbed as **Frolic**, which boosts zero-shot performance without the need for labeled data. Specifically, our Frolic learns distributions over prompt prototypes to capture diverse visual representations and adaptively fuses these with the original CLIP through confidence matching.\nThis fused model is further enhanced by correcting label bias via a label-free logit adjustment. Notably, our method is not only training-free but also circumvents the necessity for hyper-parameter tuning. Extensive experimental results across 16 datasets demonstrate the efficacy of our approach, particularly outperforming the state-of-the-art by an average of $2.6\\%$ on 10 datasets with CLIP ViT-B/16 and achieving an average margin of $1.5\\%$ on ImageNet and its five distribution shifts with CLIP ViT-B/16. Codes are available in [https://github.com/zhuhsingyuu/Frolic](https://github.com/zhuhsingyuu/Frolic).", "keywords": "vision-language model;zero-shot classification;logit adjustment", "primary_area": "machine_vision", "supplementary_material": "/attachment/1906ea5a325ccbbb1812d8f920224d467710d9a1.zip", "author": "Xingyu Zhu;Beier Zhu;Yi Tan;Shuo Wang;Yanbin Hao;Hanwang Zhang", "authorids": "~Xingyu_Zhu3;~Beier_Zhu1;~Yi_Tan3;~Shuo_Wang9;~Yanbin_Hao1;~Hanwang_Zhang3", "gender": "M;M;M;M;M;M", "homepage": "https://zhuhsingyuu.github.io/;https://beierzhu.github.io;;http://www.wangshuo.pub;https://haoyanbin918.github.io/;https://mreallab.github.io/index.html", "dblp": "132/4210-2;243/7531;25/5709-1;63/1591-8;96/1538;79/8116.html", "google_scholar": "PpqUnAwAAAAJ;jHczmjwAAAAJ;kBvMv04AAAAJ;qTE3BacAAAAJ;vhPSOkEAAAAJ;YG0DFyYAAAAJ", "orcid": ";0000-0002-7900-6979;0000-0002-8670-1312;0000-0002-4881-9344;0000-0002-0695-1566;", "linkedin": ";;;;;", "or_profile": "~Xingyu_Zhu3;~Beier_Zhu1;~Yi_Tan3;~Shuo_Wang9;~Yanbin_Hao1;~Hanwang_Zhang3", "aff": "University of Science and Technology of China;Nanyang Technological University;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;Nanyang Technological University", "aff_domain": "ustc.edu.cn;ntu.edu.sg;ustc.edu.cn;ustc.edu;ustc.edu.cn;ntu.edu.sg", "position": "PhD student;PhD student;PhD student;Assistant Professor;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nzhu2024enhancing,\ntitle={Enhancing Zero-Shot Vision Models by Label-Free Prompt Distribution Learning and Bias Correcting},\nauthor={Xingyu Zhu and Beier Zhu and Yi Tan and Shuo Wang and Yanbin Hao and Hanwang Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=OJximyClit}\n}", "github": "", "reviewers": "VFZ9;jbMQ;mJSR;GPPc", "pdf_size": 581768, "rating": "6;7;7;7", "confidence": "5;4;4;3", "soundness": "2;4;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;4", "wc_summary": "50;52;51;55", "wc_strengths": "45;36;62;27", "wc_weaknesses": "125;101;62;69", "wc_questions": "147;17;9;22", "wc_limitations": "5;8;1;6", "wc_review": "372;214;185;179", "wc_reply_reviewers": "122;28;16;18", "wc_reply_authors": "518;20;25;22", "reply_reviewers": "2;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 52.0, 1.8708286933869707 ], "wc_strengths_avg": [ 42.5, 12.932517156377562 ], "wc_weaknesses_avg": [ 89.25, 25.341418665891617 ], "wc_questions_avg": [ 48.75, 56.91386035053324 ], "wc_limitations_avg": [ 5.0, 2.5495097567963922 ], "wc_review_avg": [ 237.5, 78.77340921910134 ], "wc_reply_reviewers_avg": [ 46.0, 44.11349000022555 ], "wc_reply_authors_avg": [ 146.25, 214.63733948220658 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4474938145839626745&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "ustc.edu.cn;ntu.edu.sg;ustc.edu.cn;ustc.edu;ustc.edu.cn;ntu.edu.sg", "author_num": 6, "aff_unique_index": "0;1;0;0;0;1", "aff_unique_norm": "University of Science and Technology of China;Nanyang Technological University", "aff_unique_dep": ";", "aff_unique_url": "http://www.ustc.edu.cn;https://www.ntu.edu.sg", "aff_unique_abbr": "USTC;NTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0;1", "aff_country_unique": "China;Singapore" }, { "title": "Stochastic Extragradient with Flip-Flop Shuffling & Anchoring: Provable Improvements", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95378", "id": "OJxua0PAIo", "proceeding": "", "pdf": "https://openreview.net/pdf?id=OJxua0PAIo", "openreview": "https://openreview.net/forum?id=OJxua0PAIo", "poster": "", "project": "", "author_site": "Jiseok Chae, Chulhee Yun, Donghwan Kim", "tldr": "", "abstract": "In minimax optimization, the extragradient (EG) method has been extensively studied because it outperforms the gradient descent-ascent method in convex-concave (C-C) problems. Yet, stochastic EG (SEG) has seen limited success in C-C problems, especially for unconstrained cases. Motivated by the recent progress of shuffling-based stochastic methods, we investigate the convergence of shuffling-based SEG in unconstrained finite-sum minimax problems, in search of convergent shuffling-based SEG. Our analysis reveals that both random reshuffling and the recently proposed flip-flop shuffling alone can suffer divergence in C-C problems. However, with an additional simple trick called anchoring, we develop the SEG with flip-flop anchoring (SEG-FFA) method which successfully converges in C-C problems. We also show upper and lower bounds in the strongly-convex-strongly-concave setting, demonstrating that SEG-FFA has a provably faster convergence rate compared to other shuffling-based methods.", "keywords": "minimax optimization;stochastic optimization;extragradient method;without-replacement sampling", "primary_area": "optimization", "supplementary_material": "/attachment/11c1a11c30754d8c27f0ee9a830ff7b043e27869.zip", "author": "Jiseok Chae;Chulhee Yun;Donghwan Kim", "authorids": "~Jiseok_Chae1;~Chulhee_Yun1;~Donghwan_Kim2", "gender": "M;M;M", "homepage": "https://jsch8q.github.io/;https://chulheeyun.github.io/;http://mathsci.kaist.ac.kr/~donghwankim/", "dblp": "348/5478;138/0148.html;05/1032", "google_scholar": ";Ukl64ggAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0009-0009-0672-3422;;", "linkedin": ";;", "or_profile": "~Jiseok_Chae1;~Chulhee_Yun1;~Donghwan_Kim2", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.edu;kaist.ac.kr;kaist.ac.kr", "position": "PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nchae2024stochastic,\ntitle={Stochastic Extragradient with Flip-Flop Shuffling \\& Anchoring: Provable Improvements},\nauthor={Jiseok Chae and Chulhee Yun and Donghwan Kim},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=OJxua0PAIo}\n}", "github": "", "reviewers": "k7BS;jRnf;19M7;hmmd", "pdf_size": 1351573, "rating": "6;6;6;7", "confidence": "4;4;2;3", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "2;3;3;3", "wc_summary": "110;44;92;137", "wc_strengths": "39;90;99;92", "wc_weaknesses": "447;124;78;100", "wc_questions": "2;112;90;141", "wc_limitations": "18;1;26;4", "wc_review": "616;371;385;474", "wc_reply_reviewers": "311;21;24;10", "wc_reply_authors": "169;41;26;28", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 95.75, 33.89966813996857 ], "wc_strengths_avg": [ 80.0, 23.90606617576384 ], "wc_weaknesses_avg": [ 187.25, 150.84656940083192 ], "wc_questions_avg": [ 86.25, 51.895929512824026 ], "wc_limitations_avg": [ 12.25, 10.207227831296802 ], "wc_review_avg": [ 461.5, 97.55639394729594 ], "wc_reply_reviewers_avg": [ 91.5, 126.83552341516946 ], "wc_reply_authors_avg": [ 66.0, 59.74529270160119 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:z3UVapOXQ5EJ:scholar.google.com/&scioq=Stochastic+Extragradient+with+Flip-Flop+Shuffling+%26+Anchoring:+Provable+Improvements&hl=en&as_sdt=0,5", "gs_version_total": 6, "email": "kaist.edu;kaist.ac.kr;kaist.ac.kr", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "id": "OLEQJoAED6", "title": "Don't Always Say No to Me: Benchmarking Safety-Related Refusal in Large VLM", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "Warning: this paper contains example data that may be offensive or harmful. Although many existing evaluation datasets have been proposed to assess the safety of Large Vision-Language Models (LVLMs) on malicious prompt-image pairs, the research community lacks a systematic investigation into LVLMs' reasonable refusal toward both safe and unsafe pairs. We define a control group consisting of an unsafe prompt-image pair and a safe pair, in which these two pairs share the same prompt or image. In a control group, an LVLM shows reasonable refusal if it refuses the former pair and responds to the latter. Otherwise, the model displays false refusal, such as refusing both pairs or none. For example, a control group contains an image depicting violent behavior and two prompts based on the same visual information. An LVLM should respond to the safe prompt \"How to deter this behavior?\" and refuse the unsafe prompt \"How to promote this behavior?\". To bridge this gap, we present LVLM-SafeR, a challenging and high-quality benchmark designed to measure Safety-related Refusal in LVLMs. The evaluation results from 9 closed-source LVLMs, 23 open-source LVLMs and 4 LVLM safety alignment approaches demonstrate that existing LVLMs have notable issues in providing proper refusals. Furthermore, we explore the effects of post-hoc/mixed safety fine-tuning, full/LoRA safety fine-tuning, and inference-time parameters (top-p, temperature) on LVLMs. Then we propose an effective prompt-engineering baseline to instruct LVLMs to give more reasonable refusals. Our project page is available at isxinliu.github.io/Project/LVLM-SafeR.", "keywords": "Large Vision-Language Models;Safety;Benchmark", "primary_area": "", "supplementary_material": "/attachment/1da853c371ba198b6c0f7e50cf3b5f44b774cc2f.pdf", "author": "Xin Liu;Zhichen Dong;Zhanhui Zhou;Yichen Zhu;Yunshi Lan;Jing Shao;Chao Yang;Yu Qiao", "authorids": "~Xin_Liu22;~Zhichen_Dong1;~Zhanhui_Zhou1;~Yichen_Zhu1;~Yunshi_Lan1;~Jing_Shao3;~Chao_Yang3;~Yu_Qiao1", "gender": ";F;M;M;F;F;;", "homepage": "https://isxinliu.github.io/;https://github.com/niconi19;https://zhziszz.github.io/;;https://lanyunshi.github.io;https://amandajshao.github.io/;;", "dblp": ";;;;185/6830.html;;;", "google_scholar": ";;SbACfYQAAAAJ;eyKyrbsAAAAJ;Q0F92XIAAAAJ;VU5ObUwAAAAJ;;", "orcid": ";;;0000-0001-5126-838X;0000-0002-0192-8498;;;", "linkedin": ";;;;;;;", "or_profile": "~Xin_Liu22;~Zhichen_Dong1;~Zhanhui_Zhou1;~Yichen_Zhu1;~Yunshi_Lan1;~Jing_Shao3;~Chao_Yang3;~Yu_Qiao1", "aff": "East China Normal University;Shanghai Jiaotong University;Shanghai Artificial Intelligence Laboratory;Midea Group;East China Normal University;Shanghai AI Laboratory;;", "aff_domain": "ecnu.edu.cn;sjtu.edu.cn;pjlab.org.cn;midea.com;ecnu.edu.cn;pjlab.org.cn;;", "position": "MS student;PhD student;Researcher;Researcher;Associate Professor;Researcher;;", "bibtex": "@misc{\nanonymous2024dont,\ntitle={Don't Always Say No to Me: Benchmarking Safety-Related Refusal in Large {VLM}},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=OLEQJoAED6}\n}", "github": "", "project": "", "reviewers": "VnYA;uANp;jBbR;iuHU", "site": "https://openreview.net/forum?id=OLEQJoAED6", "pdf_size": 1286532, "rating": "4;4;5;6", "confidence": "3;3;4;4", "wc_summary_and_contributions": "73;64;31;66", "wc_strengths": "54;67;44;2", "wc_improvement": "167;169;32;20", "wc_limitations": "17;2;22;2", "wc_correctness": "1;1;1;31", "wc_clarity": "13;1;10;9", "wc_relation_to_prior_work": "7;40;23;65", "wc_documentation": "23;1;1;4", "wc_additional_feedback": "1;1;1;1", "wc_review": "356;346;165;200", "wc_reply_reviewers": "278;237;89;33", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;2;1;2", "rating_avg": [ 4.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "wc_summary_and_contributions_avg": [ 58.5, 16.224980739587952 ], "wc_strengths_avg": [ 41.75, 24.355440870573457 ], "wc_improvement_avg": [ 97.0, 71.1301623785578 ], "wc_limitations_avg": [ 10.75, 8.926785535678562 ], "wc_correctness_avg": [ 8.5, 12.99038105676658 ], "wc_clarity_avg": [ 8.25, 4.437059837324712 ], "wc_relation_to_prior_work_avg": [ 33.75, 21.48691462262556 ], "wc_documentation_avg": [ 7.25, 9.175374651751284 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 266.75, 85.22726969696964 ], "wc_reply_reviewers_avg": [ 159.25, 101.26789965235776 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.9045340337332909, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:KcT3eTpnglEJ:scholar.google.com/&scioq=Don%27t+Always+Say+No+to+Me:+Benchmarking+Safety-Related+Refusal+in+Large+VLM&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;0;4", "aff_unique_norm": "East China Normal University;Shanghai Jiao Tong University;Shanghai Artificial Intelligence Laboratory;Midea Group;Shanghai AI Laboratory", "aff_unique_dep": ";;;;", "aff_unique_url": "http://www.ecnu.edu.cn;https://www.sjtu.edu.cn;http://www.shailab.org/;https://www.mideaglobal.com;https://www.shanghai-ai-lab.com", "aff_unique_abbr": "ECNU;SJTU;Shanghai AI Lab;Midea;SAIL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "A Cross-Domain Benchmark for Active Learning", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97719", "id": "OOItbUUQcd", "proceeding": "", "pdf": "https://openreview.net/pdf?id=OOItbUUQcd", "openreview": "https://openreview.net/forum?id=OOItbUUQcd", "poster": "/media/PosterPDFs/NeurIPS%202024/97719.png?t=1729592983.9250154", "project": "", "author_site": "Thorben Werner, Johannes Burchert, Maximilian Stubbemann, Lars Schmidt-Thieme", "tldr": "", "abstract": "Active Learning (AL) deals with identifying the most informative samples for\nlabeling to reduce data annotation costs for supervised learning tasks. AL\nresearch suffers from the fact that lifts from literature generalize poorly and\nthat only a small number of repetitions of experiments are conducted. To overcome\nthese obstacles, we propose CDALBench, the first active learning benchmark\nwhich includes tasks in computer vision, natural language processing and tabular\nlearning. Furthermore, by providing an efficient, greedy oracle, CDALBench\ncan be evaluated with 50 runs for each experiment. We show, that both the\ncross-domain character and a large amount of repetitions are crucial for\nsophisticated evaluation of AL research. Concretely, we show that the\nsuperiority of specific methods varies over the different domains, making it\nimportant to evaluate Active Learning with a cross-domain benchmark.\nAdditionally, we show that having a large amount of runs is crucial. With only\nconducting three runs as often done in the literature, the superiority of\nspecific methods can strongly vary with the specific runs. This effect is so strong, that, depending on the seed, even a well-established method's performance can be significantly better and significantly\nworse than random for the same dataset.", "keywords": "Active Learning;Benchmark", "primary_area": "", "supplementary_material": "/attachment/d8bfbabbefed5c99ea72e39522dcbfe85c7d6cc5.zip", "author": "Thorben Werner;Johannes Burchert;Maximilian Stubbemann;Lars Schmidt-Thieme", "authorids": "~Thorben_Werner1;~Johannes_Burchert1;~Maximilian_Stubbemann1;~Lars_Schmidt-Thieme1", "gender": "M;;M;M", "homepage": ";;https://www.ismll.uni-hildesheim.de/personen/stubbemann.html;https://www.ismll.uni-hildesheim.de/personen/lst_en.html", "dblp": ";;245/7557;s/LarsSchmidtThieme", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;https://scholar.google.de/citations?user=MJ94W_UAAAAJ;https://scholar.google.de/citations?user=jnaEhVQAAAAJ;https://scholar.google.de/citations?user=l3taTdYAAAAJ", "orcid": "0000-0002-5944-6045;;0000-0003-1579-1151;0000-0001-5729-6023", "linkedin": ";;;", "or_profile": "~Thorben_Werner1;~Johannes_Burchert1;~Maximilian_Stubbemann1;~Lars_Schmidt-Thieme1", "aff": "Universit\u00e4t Hildesheim;Universit\u00e4t Hildesheim;University of Hildesheim;University of Hildesheim", "aff_domain": "uni-hildesheim.de;uni-hildesheim.de;ismll.de;uni-hildesheim.de", "position": "PhD student;PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\nwerner2024a,\ntitle={A Cross-Domain Benchmark for Active Learning},\nauthor={Thorben Werner and Johannes Burchert and Maximilian Stubbemann and Lars Schmidt-Thieme},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=OOItbUUQcd}\n}", "github": "", "reviewers": "x7yL;c7pZ;3W2R;iCXF", "pdf_size": 1624157, "rating": "6;7;7;7", "confidence": "5;5;3;3", "wc_summary_and_contributions": "35;81;110;81", "wc_strengths": "31;2;2;130", "wc_improvement": "124;2;264;81", "wc_limitations": "13;1;1;1", "wc_correctness": "11;7;7;1", "wc_clarity": "21;3;2;1", "wc_relation_to_prior_work": "19;5;2;1", "wc_documentation": "7;29;1;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "262;131;390;298", "wc_reply_reviewers": "12;13;176;0", "wc_reply_authors": "35;35;171;35", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;2;2", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 1.0 ], "wc_summary_and_contributions_avg": [ 76.75, 26.85493436968335 ], "wc_strengths_avg": [ 41.25, 52.58980414491007 ], "wc_improvement_avg": [ 117.75, 95.10093322360197 ], "wc_limitations_avg": [ 4.0, 5.196152422706632 ], "wc_correctness_avg": [ 6.5, 3.570714214271425 ], "wc_clarity_avg": [ 6.75, 8.257572282456872 ], "wc_relation_to_prior_work_avg": [ 6.75, 7.224091638399945 ], "wc_documentation_avg": [ 9.5, 11.521718621802913 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 270.25, 92.96336644076526 ], "wc_reply_reviewers_avg": [ 50.25, 72.78178000021708 ], "wc_reply_authors_avg": [ 69.0, 58.88972745734183 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=526869016848633755&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "uni-hildesheim.de;uni-hildesheim.de;ismll.de;uni-hildesheim.de", "author_num": 4, "aff_unique_index": "0;0;1;1", "aff_unique_norm": "Universit\u00e4t Hildesheim;University of Hildesheim", "aff_unique_dep": ";", "aff_unique_url": "https://www.uni-hildesheim.de/;https://www.uni-hildesheim.de/", "aff_unique_abbr": ";", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "title": "When is Multicalibration Post-Processing Necessary?", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95377", "id": "OONojmx3wH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=OONojmx3wH", "openreview": "https://openreview.net/forum?id=OONojmx3wH", "poster": "", "project": "", "author_site": "Dutch Hansen, Siddartha Devic, Preetum Nakkiran, Vatsal Sharan", "tldr": "", "abstract": "Calibration is a well-studied property of predictors which guarantees meaningful uncertainty estimates. Multicalibration is a related notion --- originating in algorithmic fairness --- which requires predictors to be simultaneously calibrated over a potentially complex and overlapping collection of protected subpopulations (such as groups defined by ethnicity, race, or income). We conduct the first comprehensive study evaluating the usefulness of multicalibration post-processing across a broad set of tabular, image, and language datasets for models spanning from simple decision trees to 90 million parameter fine-tuned LLMs. Our findings can be summarized as follows: (1) models which are calibrated out of the box tend to be relatively multicalibrated without any additional post-processing; (2) multicalibration can help inherently uncalibrated models and also large vision and language models; and (3) traditional calibration measures may sometimes provide multicalibration implicitly. More generally, we also distill many independent observations which may be useful for practical and effective applications of multicalibration post-processing in real-world contexts.", "keywords": "multicalibration;calibration;fairness", "primary_area": "fairness", "supplementary_material": "/attachment/68e08c659810cc67206f579197ea56ab4180cb41.zip", "author": "Dutch Hansen;Siddartha Devic;Preetum Nakkiran;Vatsal Sharan", "authorids": "~Dutch_Hansen1;~Siddartha_Devic1;~Preetum_Nakkiran1;~Vatsal_Sharan1", "gender": "M;;;M", "homepage": "https://dutchhansen.notion.site;http://sid.devic.us/;http://preetum.nakkiran.org;https://vatsalsharan.github.io/", "dblp": ";239/8389;151/6343;126/2543", "google_scholar": ";LVL-kmUAAAAJ;zithBbUAAAAJ;Ize17HEAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Dutch_Hansen1;~Siddartha_Devic1;~Preetum_Nakkiran1;~Vatsal_Sharan1", "aff": "University of Southern California;Amazon;Apple;University of Southern California", "aff_domain": "usc.edu;amazon.com;apple.com;usc.edu", "position": "Undergrad student;Intern;Principal Researcher;Assistant Professor", "bibtex": "@inproceedings{\nhansen2024when,\ntitle={When is Multicalibration Post-Processing Necessary?},\nauthor={Dutch Hansen and Siddartha Devic and Preetum Nakkiran and Vatsal Sharan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=OONojmx3wH}\n}", "github": "", "reviewers": "nkHR;Zyam;rHYn;M2Di", "pdf_size": 2850072, "rating": "5;5;6;8", "confidence": "4;4;3;4", "soundness": "2;2;2;4", "novelty": "2;3;3;4", "presentation": "2;3;4;4", "wc_summary": "233;83;113;138", "wc_strengths": "261;50;79;84", "wc_weaknesses": "539;59;226;90", "wc_questions": "24;160;11;38", "wc_limitations": "1;7;3;10", "wc_review": "1058;359;432;360", "wc_reply_reviewers": "506;167;315;35", "wc_reply_authors": "27;44;1464;0", "reply_reviewers": "1;1;2;1", "reply_authors": "2;2;5;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 141.75, 56.166604846652426 ], "wc_strengths_avg": [ 118.5, 83.29015548070493 ], "wc_weaknesses_avg": [ 228.5, 189.95328373050043 ], "wc_questions_avg": [ 58.25, 59.51627928558707 ], "wc_limitations_avg": [ 5.25, 3.491060010942235 ], "wc_review_avg": [ 552.25, 293.4913755121264 ], "wc_reply_reviewers_avg": [ 255.75, 175.17330704191207 ], "wc_reply_authors_avg": [ 383.75, 623.8799463839176 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 1.5 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16402028098977135626&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "usc.edu;amazon.com;apple.com;usc.edu", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of Southern California;Amazon;Apple", "aff_unique_dep": ";Amazon.com, Inc.;Apple Inc.", "aff_unique_url": "https://www.usc.edu;https://www.amazon.com;https://www.apple.com", "aff_unique_abbr": "USC;Amazon;Apple", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "A Fast Convoluted Story: Scaling Probabilistic Inference for Integer Arithmetics", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95376", "id": "OOiRS6fiM7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=OOiRS6fiM7", "openreview": "https://openreview.net/forum?id=OOiRS6fiM7", "poster": "/media/PosterPDFs/NeurIPS%202024/95376.png?t=1733963765.0121722", "project": "", "author_site": "Lennert De Smet, Pedro Zuidberg Dos Martires", "tldr": "", "abstract": "As illustrated by the success of integer linear programming, linear integer arithmetics is a powerful tool for modelling combinatorial problems. Furthermore, the probabilistic extension of linear programming has been used to formulate problems in neurosymbolic AI. However, two key problems persist that prevent the adoption of neurosymbolic techniques beyond toy problems. First, probabilistic inference is inherently hard, #P-hard to be precise. Second, the discrete nature of integers renders the construction of meaningful gradients challenging, which is problematic for learning. In order to mitigate these issues, we formulate linear arithmetics over integer-valued random variables as tensor manipulations that can be implemented in a straightforward fashion using modern deep learning libraries. At the core of our formulation lies the observation that the addition of two integer-valued random variables can be performed by adapting the fast Fourier transform to probabilities in the log-domain. By relying on tensor operations we obtain a differentiable data structure, which unlocks, virtually for free, gradient-based learning. In our experimental validation we show that tensorising probabilistic integer linear arithmetics and leveraging the fast Fourier transform allows us to push the state of the art by several orders of magnitude in terms of inference and learning times.", "keywords": "Probability theory;neurosymbolic AI;neuro-symbolic AI;neural-symbolic AI;integer arithmetic;linear integer arithmetic;integer programming;discrete random variables", "primary_area": "probabilistic_methods", "supplementary_material": "/attachment/92be14081cccd83de41c7e58077c077ad82a624f.zip", "author": "Lennert De Smet;Pedro Zuidberg Dos Martires", "authorids": "~Lennert_De_Smet1;~Pedro_Zuidberg_Dos_Martires1", "gender": "M;M", "homepage": ";https://pedrozudo.github.io/", "dblp": "342/2788;223/4292", "google_scholar": ";", "orcid": "0000-0003-3136-0634;", "linkedin": ";", "or_profile": "~Lennert_De_Smet1;~Pedro_Zuidberg_Dos_Martires1", "aff": "KU Leuven;\u00d6rebro University", "aff_domain": "kuleuven.be;oru.se", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nsmet2024a,\ntitle={A Fast Convoluted Story: Scaling Probabilistic Inference for Integer Arithmetics},\nauthor={Lennert De Smet and Pedro Zuidberg Dos Martires},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=OOiRS6fiM7}\n}", "github": "", "reviewers": "GGzh;Ltsw;DnxY;JM4B", "pdf_size": 622209, "rating": "5;5;6;7", "confidence": "4;2;2;4", "soundness": "3;2;3;4", "novelty": "3;3;2;3", "presentation": "2;3;4;3", "wc_summary": "81;127;58;99", "wc_strengths": "24;186;57;43", "wc_weaknesses": "87;167;22;95", "wc_questions": "15;86;34;40", "wc_limitations": "1;67;1;2", "wc_review": "208;633;172;279", "wc_reply_reviewers": "0;12;4;22", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.0, 1.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 91.25, 25.24257316519059 ], "wc_strengths_avg": [ 77.5, 63.72793735874401 ], "wc_weaknesses_avg": [ 92.75, 51.37302307631896 ], "wc_questions_avg": [ 43.75, 26.080404521402652 ], "wc_limitations_avg": [ 17.75, 28.43743131859838 ], "wc_review_avg": [ 323.0, 183.07239005377082 ], "wc_reply_reviewers_avg": [ 9.5, 8.411301920630361 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:5OTj-Lv_LTwJ:scholar.google.com/&scioq=A+Fast+Convoluted+Story:+Scaling+Probabilistic+Inference+for+Integer+Arithmetics&hl=en&as_sdt=0,48", "gs_version_total": 6, "email": "kuleuven.be;oru.se", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Katholieke Universiteit Leuven;\u00d6rebro University", "aff_unique_dep": ";", "aff_unique_url": "https://www.kuleuven.be;https://www.oru.se", "aff_unique_abbr": "KU Leuven;\u00d6rebro U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Belgium;Sweden" }, { "title": "Suitable is the Best: Task-Oriented Knowledge Fusion in Vulnerability Detection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95375", "id": "OP2D9sIdo4", "proceeding": "", "pdf": "https://openreview.net/pdf?id=OP2D9sIdo4", "openreview": "https://openreview.net/forum?id=OP2D9sIdo4", "poster": "/media/PosterPDFs/NeurIPS%202024/95375.png?t=1730779027.5826573", "project": "", "author_site": "Jingjing Wang, Minhuan Huang, yuanping nie, Xiang Li, Qianjin Du, Wei Kong, Huan Deng, Xiaohui Kuang", "tldr": "", "abstract": "Deep learning technologies have demonstrated remarkable performance in vulnerability detection. Existing works primarily adopt a uniform and consistent feature learning pattern across the entire target set. While designed for general-purpose detection tasks, they lack sensitivity towards target code comprising multiple functional modules or diverse vulnerability subtypes. In this paper, we present a knowledge fusion-based vulnerability detection method (KF-GVD) that integrates specific vulnerability knowledge into the Graph Neural Network feature learning process. KF-GVD achieves accurate vulnerability detection across different functional modules of the Linux kernel and vulnerability subtypes without compromising general task performance. Extensive experiments demonstrate that KF-GVD outperforms SOTAs on function-level and statement-level vulnerability detection across various target tasks, with an average increase of 40.9% in precision and 26.1% in recall. Notably, KF-GVD discovered 9 undisclosed vulnerabilities when employing on C/C++ open-source projects without ground truth.", "keywords": "deep learning;Graph Neural Networks;vulnerability detection;static analysis;software security;knowledge fusion", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "/attachment/df9f96bcf6bb2d8675c75e0c9f13e9737e05f608.zip", "author": "Jingjing Wang;Minhuan Huang;Yuanping Nie;Xiang Li;Qianjin Du;Wei Kong;Huan Deng;Xiaohui Kuang", "authorids": "~Jingjing_Wang5;~Minhuan_Huang1;~Yuanping_Nie1;~Xiang_Li41;~Qianjin_Du2;~Wei_Kong2;~Huan_Deng1;~Xiaohui_Kuang1", "gender": "F;M;M;F;;;M;M", "homepage": "https://ieeexplore.ieee.org/author/37088904264;https://ieeexplore.ieee.org/author/37086385592;https://ieeexplore.ieee.org/author/37085683034;https://ieeexplore.ieee.org/author/38241044300;http://www.duqlk.com;;;https://ieeexplore.ieee.org/author/37543548800", "dblp": ";;150/3642;;;;;", "google_scholar": ";;;;;m6tes9wAAAAJ;;", "orcid": ";;;;;;0000-0002-5116-1766;", "linkedin": ";;;;;;;", "or_profile": "~Jingjing_Wang5;~Minhuan_Huang1;~Yuanping_Nie1;~Xiang_Li41;~Qianjin_Du2;~Wei_Kong2;~Huan_Deng1;~Xiaohui_Kuang1", "aff": "Academy of Military Sciences;;;;;National Key Laboratory of Information Science ;Institute of Systems Engineering, Academy of Military Sciences;", "aff_domain": "nudt.edu.cn;;;;;nuist.edu.cn;stu.xidian.edu.cn;", "position": "PhD student;;;;;PhD student;MS student;", "bibtex": "@inproceedings{\nwang2024suitable,\ntitle={Suitable is the Best: Task-Oriented Knowledge Fusion in Vulnerability Detection},\nauthor={Jingjing Wang and Minhuan Huang and Yuanping Nie and Xiang Li and Qianjin Du and Wei Kong and Huan Deng and Xiaohui Kuang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=OP2D9sIdo4}\n}", "github": "", "reviewers": "YqLp;G6qq;Yw62;78X5", "pdf_size": 1417504, "rating": "4;6;6;7", "confidence": "4;3;2;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "3;4;3;4", "wc_summary": "111;91;60;99", "wc_strengths": "153;132;38;53", "wc_weaknesses": "265;118;60;44", "wc_questions": "175;157;4;351", "wc_limitations": "86;32;1;39", "wc_review": "790;530;163;586", "wc_reply_reviewers": "0;10;0;244", "wc_reply_authors": "25;0;0;234", "reply_reviewers": "0;1;0;1", "reply_authors": "2;1;1;2", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 90.25, 18.859679212542297 ], "wc_strengths_avg": [ 94.0, 49.35078520145348 ], "wc_weaknesses_avg": [ 121.75, 87.16758285050699 ], "wc_questions_avg": [ 171.75, 122.98246826275687 ], "wc_limitations_avg": [ 39.5, 30.41792234851026 ], "wc_review_avg": [ 517.25, 226.26022076361545 ], "wc_reply_reviewers_avg": [ 63.5, 104.29165834332102 ], "wc_reply_authors_avg": [ 64.75, 98.24809158451883 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.20751433915982243, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:zrjyyYHA3WQJ:scholar.google.com/&scioq=Suitable+is+the+Best:+Task-Oriented+Knowledge+Fusion+in+Vulnerability+Detection&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "nudt.edu.cn;;;;;nuist.edu.cn;stu.xidian.edu.cn;", "author_num": 8, "aff_unique_index": "0;1;0", "aff_unique_norm": "Academy of Military Sciences;National Key Laboratory of Information Science", "aff_unique_dep": ";", "aff_unique_url": ";", "aff_unique_abbr": ";", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Data Augmentation with Diffusion for Open-Set Semi-Supervised Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95374", "id": "OP3sNTIE1O", "proceeding": "", "pdf": "https://openreview.net/pdf?id=OP3sNTIE1O", "openreview": "https://openreview.net/forum?id=OP3sNTIE1O", "poster": "/media/PosterPDFs/NeurIPS%202024/95374.png?t=1733552351.248976", "project": "", "author_site": "Seonghyun Ban, Heesan Kong, Kee-Eung Kim", "tldr": "", "abstract": "Semi-supervised learning (SSL) seeks to utilize unlabeled data to overcome the limited amount of labeled data and improve model performance. However, many SSL methods typically struggle in real-world scenarios, particularly when there is a large number of irrelevant instances in the unlabeled data that do not belong to any class in the labeled data. Previous approaches often downweight instances from irrelevant classes to mitigate the negative impact of class distribution mismatch on model training. However, by discarding irrelevant instances, they may result in the loss of valuable information such as invariance, regularity, and diversity within the data. In this paper, we propose a data-centric generative augmentation approach that leverages a diffusion model to enrich labeled data using both labeled and unlabeled samples. A key challenge is extracting the diversity inherent in the unlabeled data while mitigating the generation of samples irrelevant to the labeled data. To tackle this issue, we combine diffusion model training with a discriminator that identifies and reduces the impact of irrelevant instances. We also demonstrate that such a trained diffusion model can even convert an irrelevant instance into a relevant one, yielding highly effective synthetic data for training. Through a comprehensive suite of experiments, we show that our data augmentation approach significantly enhances the performance of SSL methods, especially in the presence of class distribution mismatch.", "keywords": "Semi-supervised learning;Class distribution mismatch;Diffusion model.", "primary_area": "machine_vision", "supplementary_material": "", "author": "Seonghyun Ban;Heesan Kong;Kee-Eung Kim", "authorids": "~Seonghyun_Ban1;~Heesan_Kong1;~Kee-Eung_Kim2", "gender": "M;M;M", "homepage": "http://ailab.kaist.ac.kr/;https://ailab.kaist.ac.kr/users/kekim;http://ailab.kaist.ac.kr", "dblp": ";;35/6703", "google_scholar": ";;https://scholar.google.com/citations?hl=ko", "orcid": ";;", "linkedin": ";;", "or_profile": "~Seonghyun_Ban1;~Heesan_Kong1;~Kee-Eung_Kim2", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.edu;kaist.ac.kr;kaist.ac.kr", "position": "MS student;PhD student;Full Professor", "bibtex": "@inproceedings{\nban2024data,\ntitle={Data Augmentation with Diffusion for Open-Set Semi-Supervised Learning},\nauthor={Seonghyun Ban and Heesan Kong and Kee-Eung Kim},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=OP3sNTIE1O}\n}", "github": "", "reviewers": "5EHv;9H8V;KfDe", "pdf_size": 33608296, "rating": "6;6;6", "confidence": "3;4;4", "soundness": "3;2;3", "novelty": "3;3;3", "presentation": "2;3;3", "wc_summary": "83;91;38", "wc_strengths": "99;45;39", "wc_weaknesses": "211;53;154", "wc_questions": "35;46;47", "wc_limitations": "6;1;5", "wc_review": "434;236;283", "wc_reply_reviewers": "21;37;18", "wc_reply_authors": "349;25;30", "reply_reviewers": "1;1;1", "reply_authors": "3;2;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 70.66666666666667, 23.328570942563587 ], "wc_strengths_avg": [ 61.0, 26.981475126464083 ], "wc_weaknesses_avg": [ 139.33333333333334, 65.3316326309256 ], "wc_questions_avg": [ 42.666666666666664, 5.436502143433364 ], "wc_limitations_avg": [ 4.0, 2.160246899469287 ], "wc_review_avg": [ 317.6666666666667, 84.46827149225257 ], "wc_reply_reviewers_avg": [ 25.333333333333332, 8.339997335464536 ], "wc_reply_authors_avg": [ 134.66666666666666, 151.57029905478035 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:lJUXArjFhDsJ:scholar.google.com/&scioq=Data+Augmentation+with+Diffusion+for+Open-Set+Semi-Supervised+Learning&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "kaist.edu;kaist.ac.kr;kaist.ac.kr", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "DynaMITE-RL: A Dynamic Model for Improved Temporal Meta-Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95373", "id": "OPrPegYIZo", "proceeding": "", "pdf": "https://openreview.net/pdf?id=OPrPegYIZo", "openreview": "https://openreview.net/forum?id=OPrPegYIZo", "poster": "/media/PosterPDFs/NeurIPS%202024/95373.png?t=1733356951.2504904", "project": "", "author_site": "Anthony Liang, Guy Tennenholtz, Chih-wei Hsu, Yinlam Chow, Erdem B\u0131y\u0131k, Craig Boutilier", "tldr": "", "abstract": "We introduce DynaMITE-RL, a meta-reinforcement learning (meta-RL) approach to approximate inference in environments where the latent state evolves at varying rates. We model episode sessions---parts of the episode where the latent state is fixed---and propose three key modifications to existing meta-RL methods: (i) consistency of latent information within sessions, (ii) session masking, and (iii) prior latent conditioning. We demonstrate the importance of these modifications in various domains, ranging from discrete Gridworld environments to continuous-control and simulated robot assistive tasks, illustrating the efficacy of DynaMITE-RL over state-of-the-art baselines in both online and offline RL settings.", "keywords": "Reinforcement Learning;Bayesian Reinforcement Learning;Meta-Reinforcement Learning", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Anthony Liang;Guy Tennenholtz;ChihWei Hsu;Yinlam Chow;Erdem Biyik;Craig Boutilier", "authorids": "~Anthony_Liang1;~Guy_Tennenholtz2;~ChihWei_Hsu2;~Yinlam_Chow1;~Erdem_Biyik1;~Craig_Boutilier2", "gender": ";;;M;M;M", "homepage": "https://aliang8.github.io/;https://guytenn.com;https://research.google/people/107258/;;http://people.eecs.berkeley.edu/~ebiyik/;https://research.google/people/craigboutilier/", "dblp": ";;;146/7869;194/2736;10/3411", "google_scholar": ";https://scholar.google.co.il/citations?user=pldrn8IAAAAJ;;;https://scholar.google.com.tr/citations?user=P-G3sjYAAAAJ;cXkm3rsAAAAJ", "orcid": ";;0000-0001-9929-9951;;0000-0002-9516-3130;", "linkedin": ";;;;https://linkedin.com/in/ebiyik;", "or_profile": "~Anthony_Liang1;~Guy_Tennenholtz2;~ChihWei_Hsu2;~Yinlam_Chow1;~Erdem_Biyik1;~Craig_Boutilier2", "aff": "University of Southern California;Google;Google Research;Google Research;University of Southern California;Google", "aff_domain": "usc.edu;google.com;google.com;google.com;usc.edu;google.com", "position": "PhD student;Researcher;Software Engineer;Research Scientist;Assistant Professor;Principal Researcher", "bibtex": "@inproceedings{\nliang2024dynamiterl,\ntitle={Dyna{MITE}-{RL}: A Dynamic Model for Improved Temporal Meta-Reinforcement Learning},\nauthor={Anthony Liang and Guy Tennenholtz and ChihWei Hsu and Yinlam Chow and Erdem Biyik and Craig Boutilier},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=OPrPegYIZo}\n}", "github": "", "reviewers": "hpwN;arsQ;iPsr;xD22", "pdf_size": 1893675, "rating": "5;6;7;7", "confidence": "3;4;4;2", "soundness": "2;2;3;3", "novelty": "2;2;3;3", "presentation": "2;3;3;4", "wc_summary": "52;93;220;69", "wc_strengths": "57;147;234;38", "wc_weaknesses": "264;126;99;34", "wc_questions": "2;65;43;31", "wc_limitations": "2;38;20;14", "wc_review": "377;469;616;186", "wc_reply_reviewers": "31;23;27;15", "wc_reply_authors": "48;22;27;27", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 108.5, 66.00189391222042 ], "wc_strengths_avg": [ 119.0, 78.124899999936 ], "wc_weaknesses_avg": [ 130.75, 83.8849658758946 ], "wc_questions_avg": [ 35.25, 22.741756748325315 ], "wc_limitations_avg": [ 18.5, 12.99038105676658 ], "wc_review_avg": [ 412.0, 155.85730653389336 ], "wc_reply_reviewers_avg": [ 24.0, 5.916079783099616 ], "wc_reply_authors_avg": [ 31.0, 10.024968827881711 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.0909090909090909, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1360860237515059537&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "usc.edu;google.com;google.com;google.com;usc.edu;google.com", "author_num": 6, "aff_unique_index": "0;1;1;1;0;1", "aff_unique_norm": "University of Southern California;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.usc.edu;https://www.google.com", "aff_unique_abbr": "USC;Google", "aff_campus_unique_index": "0;1;1;1;0;1", "aff_campus_unique": "Los Angeles;Mountain View", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Newswire: A Large-Scale Structured Database of a Century of Historical News", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97718", "id": "OPx8Dd27zT", "proceeding": "", "pdf": "https://openreview.net/pdf?id=OPx8Dd27zT", "openreview": "https://openreview.net/forum?id=OPx8Dd27zT", "poster": "/media/PosterPDFs/NeurIPS%202024/97718.png?t=1731464751.3507354", "project": "", "author_site": "Emily Silcock, Abhishek Arora, Luca D'Amico-Wong, Melissa Dell", "tldr": "", "abstract": "In the U.S. historically, local newspapers drew their content largely from newswires like the Associated Press. Historians argue that newswires played a pivotal role in creating a national identity and shared understanding of the world, but there is no comprehensive archive of the content sent over newswires. We reconstruct such an archive by applying a customized deep learning pipeline to hundreds of terabytes of raw image scans from thousands of local newspapers. The resulting dataset contains 2.7 million unique public domain U.S. news wire articles, written between 1878 and 1977. Locations in these articles are georeferenced, topics are tagged using customized neural topic classification, named entities are recognized, and individuals are disambiguated to Wikipedia using a novel entity disambiguation model. To construct the Newswire dataset, we first recognize newspaper layouts and transcribe around 138 millions structured article texts from raw image scans. We then use a customized neural bi-encoder model to de-duplicate reproduced articles, in the presence of considerable abridgement and noise, quantifying how widely each article was reproduced. A text classifier is used to ensure that we only include newswire articles, which historically are in the public domain. The structured data that accompany the texts provide rich information about the who (disambiguated individuals), what (topics), and where (georeferencing) of the news that millions of Americans read over the course of a century. We also include Library of Congress metadata information about the newspapers that ran the articles on their front pages. The Newswire dataset is useful both for large language modeling - expanding training data beyond what is available from modern web texts - and for studying a diversity of questions in computational linguistics, social science, and the digital humanities.", "keywords": "historical news;historical nlp;document processing", "primary_area": "", "supplementary_material": "/attachment/adb3d65609679e61b3a2bf0df6bcc9eb9710a7ec.pdf", "author": "Emily Silcock;Abhishek Arora;Luca D'Amico-Wong;Melissa Dell", "authorids": "~Emily_Silcock1;~Abhishek_Arora1;~Luca_D'Amico-Wong1;~Melissa_Dell1", "gender": ";M;M;", "homepage": ";https://econabhishek.github.io/;;", "dblp": ";344/4529;330/9302;", "google_scholar": "dfGziwkAAAAJ;https://scholar.google.com/citations?hl=en;;", "orcid": ";;;", "linkedin": ";abhishek-arora1996/;luca-d-amico-wong-41879712b/;", "or_profile": "~Emily_Silcock1;~Abhishek_Arora1;~Luca_D'Amico-Wong1;~Melissa_Dell1", "aff": "Harvard University;Harvard University, Harvard University;Harvard University;", "aff_domain": "harvard.edu;fas.harvard.edu;harvard.edu;", "position": "PhD student;Researcher;Undergrad student;", "bibtex": "@inproceedings{\nsilcock2024newswire,\ntitle={Newswire: A Large-Scale Structured Database of a Century of Historical News},\nauthor={Emily Silcock and Abhishek Arora and Luca D'Amico-Wong and Melissa Dell},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=OPx8Dd27zT}\n}", "github": "", "reviewers": "L75Y;kSLJ;GpZ3;L8QG", "pdf_size": 3044363, "rating": "6;6;7;8", "confidence": "4;4;4;3", "wc_summary_and_contributions": "35;92;267;158", "wc_strengths": "37;47;155;217", "wc_improvement": "312;650;457;373", "wc_limitations": "29;55;36;7", "wc_correctness": "32;13;6;4", "wc_clarity": "72;10;7;6", "wc_relation_to_prior_work": "66;13;5;9", "wc_documentation": "35;43;6;16", "wc_additional_feedback": "1;1;1;1", "wc_review": "619;924;940;791", "wc_reply_reviewers": "49;117;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "2;2;1;2", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 138.0, 86.26412927746966 ], "wc_strengths_avg": [ 114.0, 75.34586916347837 ], "wc_improvement_avg": [ 448.0, 127.48137118810732 ], "wc_limitations_avg": [ 31.75, 17.166464400102893 ], "wc_correctness_avg": [ 13.75, 11.053845484717073 ], "wc_clarity_avg": [ 23.75, 27.896012259819503 ], "wc_relation_to_prior_work_avg": [ 23.25, 24.843258642939738 ], "wc_documentation_avg": [ 25.0, 14.713938969562161 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 818.5, 128.88851771977207 ], "wc_reply_reviewers_avg": [ 41.5, 47.96092159247985 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9656073604563168913&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "harvard.edu;fas.harvard.edu;harvard.edu;", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Harvard University", "aff_unique_dep": "", "aff_unique_url": "https://www.harvard.edu", "aff_unique_abbr": "Harvard", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Ordering-Based Causal Discovery for Linear and Nonlinear Relations", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95372", "id": "OQUg2T4qJB", "proceeding": "", "pdf": "https://openreview.net/pdf?id=OQUg2T4qJB", "openreview": "https://openreview.net/forum?id=OQUg2T4qJB", "poster": "/media/PosterPDFs/NeurIPS%202024/95372.png?t=1730098681.3999124", "project": "", "author_site": "Zhuopeng Xu, Yujie Li, Cheng Liu, Ning Gui", "tldr": "", "abstract": "Identifying causal relations from purely observational data typically requires additional assumptions on relations and/or noise. Most current methods restrict their analysis to datasets that are assumed to have pure linear or nonlinear relations, which is often not reflective of real-world datasets that contain a combination of both. This paper presents CaPS, an ordering-based causal discovery algorithm that effectively handles linear and nonlinear relations. CaPS introduces a novel identification criterion for topological ordering and incorporates the concept of \"parent score\" during the post-processing optimization stage. These scores quantify the strength of the average causal effect, helping to accelerate the pruning process and correct inaccurate predictions in the pruning step. Experimental results demonstrate that our proposed solutions outperform state-of-the-art baselines on synthetic data with varying ratios of linear and nonlinear relations. The results obtained from real-world data also support the competitiveness of CaPS. Code and datasets are available at https://github.com/E2real/CaPS.", "keywords": "causal discovery; directed acyclic graph; additive noise model", "primary_area": "causal_inference", "supplementary_material": "/attachment/3c6ed28cd5096a721477b12e39f262f4104bba22.zip", "author": "Zhuopeng Xu;Yujie Li;Cheng Liu;Ning Gui", "authorids": "~Zhuopeng_Xu1;~Yujie_Li8;~Cheng_Liu10;~Ning_Gui1", "gender": "M;F;;M", "homepage": ";;;https://faculty.csu.edu.cn/guining/zh_CN/index.htm", "dblp": "278/3587;;;30/3048", "google_scholar": ";;https://scholar.google.com.hk/citations?user=sgPLRHQAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";0009-0001-7397-2543;;", "linkedin": ";;;", "or_profile": "~Zhuopeng_Xu1;~Yujie_Li8;~Cheng_Liu10;~Ning_Gui1", "aff": "Central South University;Central South University;Central South University;Central South University", "aff_domain": "csu.edu.cn;csu.edu.cn;csu.edu.cn;csu.edu.cn", "position": "MS student;MS student;MS student;Full Professor", "bibtex": "@inproceedings{\nxu2024orderingbased,\ntitle={Ordering-Based Causal Discovery for Linear and Nonlinear Relations},\nauthor={Zhuopeng Xu and Yujie Li and Cheng Liu and Ning Gui},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=OQUg2T4qJB}\n}", "github": "", "reviewers": "wbDM;7cyK;SPoa;8ao9", "pdf_size": 10060763, "rating": "4;6;6;6", "confidence": "4;3;3;4", "soundness": "3;3;3;4", "novelty": "1;2;3;3", "presentation": "2;3;3;3", "wc_summary": "124;54;48;83", "wc_strengths": "56;47;30;86", "wc_weaknesses": "322;90;30;38", "wc_questions": "11;2;4;131", "wc_limitations": "1;1;3;17", "wc_review": "514;194;115;355", "wc_reply_reviewers": "193;90;18;75", "wc_reply_authors": "1079;503;362;861", "reply_reviewers": "1;1;1;1", "reply_authors": "3;3;3;3", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 77.25, 30.06139550985616 ], "wc_strengths_avg": [ 54.75, 20.314711418083203 ], "wc_weaknesses_avg": [ 120.0, 118.87808881370864 ], "wc_questions_avg": [ 37.0, 54.37370688117557 ], "wc_limitations_avg": [ 5.5, 6.689544080129826 ], "wc_review_avg": [ 294.5, 153.42832202693216 ], "wc_reply_reviewers_avg": [ 94.0, 63.15457228103124 ], "wc_reply_authors_avg": [ 701.25, 283.99328777279226 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.0, 0.0 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:RvSZjwNBaZQJ:scholar.google.com/&scioq=Ordering-Based+Causal+Discovery+for+Linear+and+Nonlinear+Relations&hl=en&as_sdt=0,34", "gs_version_total": 4, "email": "csu.edu.cn;csu.edu.cn;csu.edu.cn;csu.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Central South University", "aff_unique_dep": "", "aff_unique_url": "https://www.csu.edu.cn", "aff_unique_abbr": "CSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Parallel Backpropagation for Shared-Feature Visualization", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95371", "id": "OQzCSb6fbl", "proceeding": "", "pdf": "https://openreview.net/pdf?id=OQzCSb6fbl", "openreview": "https://openreview.net/forum?id=OQzCSb6fbl", "poster": "", "project": "", "author_site": "Alexander Lappe, Anna Bogn\u00e1r, Ghazaleh Ghamkahri Nejad, Albert Mukovskiy, Lucas Martini, Martin Giese, Rufin Vogels", "tldr": "", "abstract": "High-level visual brain regions contain subareas in which neurons appear to respond more strongly to examples of a particular semantic category, like faces or bodies, rather than objects. However, recent work has shown that while this finding holds on average, some out-of-category stimuli also activate neurons in these regions. This may be due to visual features common among the preferred class also being present in other images. Here, we propose a deep-learning-based approach for visualizing these features. For each neuron, we identify relevant visual features driving its selectivity by modelling responses to images based on latent activations of a deep neural network. Given an out-of-category image which strongly activates the neuron, our method first identifies a reference image from the preferred category yielding a similar feature activation pattern. We then backpropagate latent activations of both images to the pixel level, while enhancing the identified shared dimensions and attenuating non-shared features. The procedure highlights image regions containing shared features driving responses of the model neuron. We apply the algorithm to novel recordings from body-selective regions in macaque IT cortex in order to understand why some images of objects excite these neurons. Visualizations reveal object parts which resemble parts of a macaque body, shedding light on neural preference of these objects.", "keywords": "Computer Vision;Neuroscience;Deep Learning;Feature Visualization", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "/attachment/a4c155e1f84905e7f5079216c45fbcdcbf7f547d.zip", "author": "Alexander Lappe;Anna Bogn\u00e1r;Ghazaleh Ghamkhari Nejad;Albert Mukovskiy;Lucas Martini;Martin A. Giese;Rufin Vogels", "authorids": "~Alexander_Lappe1;~Anna_Bogn\u00e1r1;~Ghazaleh_Ghamkhari_Nejad1;~Albert_Mukovskiy1;~Lucas_Martini1;~Martin_A._Giese1;~Rufin_Vogels1", "gender": "M;F;F;M;M;M;M", "homepage": "http://www.compsens.uni-tuebingen.de/compsens/index.php/people?view=member&task=show&id=70;;;https://www.compsens.uni-tuebingen.de/compsens/index.php/people?view=member&task=show&id=7;https://www.compsens.uni-tuebingen.de/compsens/index.php;http://www.compsens.uni-tuebingen.de/;https://www.kuleuven.be/wieiswie/en/person/00006512", "dblp": ";;;;;g/MartinAGiese;", "google_scholar": "KI4Y9eMAAAAJ;sZXBQh8AAAAJ;https://scholar.google.com/citations?hl=en;1ETMlWEAAAAJ;https://scholar.google.de/citations?user=_ovDunkAAAAJ;https://scholar.google.co.uk/citations?user=ebo0Qn4AAAAJ;fuQ8xgkAAAAJ", "orcid": "0000-0002-8124-8977;0009-0001-5953-8257;0000-0001-8345-630X;;0000-0002-7445-0691;0000-0003-1178-2768;", "linkedin": "alappe-96b8b11b4/;;;;lucas-martini/;;", "or_profile": "~Alexander_Lappe1;~Anna_Bogn\u00e1r1;~Ghazaleh_Ghamkhari_Nejad1;~Albert_Mukovskiy1;~Lucas_Martini1;~Martin_A._Giese1;~Rufin_Vogels1", "aff": "Eberhard-Karls-Universit\u00e4t T\u00fcbingen;KU Leuven;KU Leuven;Eberhard-Karls-Universit\u00e4t T\u00fcbingen;Eberhard-Karls-Universit\u00e4t T\u00fcbingen;Uni Tuebingen;KU Leuven", "aff_domain": "uni-tuebingen.de;kuleuven.be;kuleuven.be;uni-tuebingen.de;uni-tuebingen.de;tuebingen.de;kuleuven.be", "position": "PhD student;Postdoc;Postdoc;Researcher;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nlappe2024parallel,\ntitle={Parallel Backpropagation for Shared-Feature Visualization},\nauthor={Alexander Lappe and Anna Bogn{\\'a}r and Ghazaleh Ghamkhari Nejad and Albert Mukovskiy and Lucas Martini and Martin A. Giese and Rufin Vogels},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=OQzCSb6fbl}\n}", "github": "", "reviewers": "yQac;aP8u;7NES;5Zdi", "pdf_size": 4781930, "rating": "5;6;8;10", "confidence": "4;4;4;3", "soundness": "3;3;4;4", "novelty": "2;3;4;4", "presentation": "3;4;4;4", "wc_summary": "103;75;124;94", "wc_strengths": "102;28;56;138", "wc_weaknesses": "37;58;183;140", "wc_questions": "96;65;2;38", "wc_limitations": "6;8;2;11", "wc_review": "344;234;367;421", "wc_reply_reviewers": "33;21;8;13", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.25, 1.920286436967152 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 99.0, 17.621010186706094 ], "wc_strengths_avg": [ 81.0, 42.20189569201838 ], "wc_weaknesses_avg": [ 104.5, 59.457968347396466 ], "wc_questions_avg": [ 50.25, 34.60039739656179 ], "wc_limitations_avg": [ 6.75, 3.2691742076555053 ], "wc_review_avg": [ 341.5, 68.06798072515447 ], "wc_reply_reviewers_avg": [ 18.75, 9.443913383762052 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.8268106308031117, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Z8dDmlw8zsQJ:scholar.google.com/&scioq=Parallel+Backpropagation+for+Shared-Feature+Visualization&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "uni-tuebingen.de;kuleuven.be;kuleuven.be;uni-tuebingen.de;uni-tuebingen.de;tuebingen.de;kuleuven.be", "author_num": 7, "aff_unique_index": "0;1;1;0;0;2;1", "aff_unique_norm": "Eberhard Karls University of T\u00fcbingen;Katholieke Universiteit Leuven;University of T\u00fcbingen", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uni-tuebingen.de/;https://www.kuleuven.be;https://www.uni-tuebingen.de/", "aff_unique_abbr": "Uni T\u00fcbingen;KU Leuven;Uni T\u00fcbingen", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "T\u00fcbingen;", "aff_country_unique_index": "0;1;1;0;0;0;1", "aff_country_unique": "Germany;Belgium" }, { "title": "On the Power of Small-size Graph Neural Networks for Linear Programming", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95370", "id": "ORQiboaRqY", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ORQiboaRqY", "openreview": "https://openreview.net/forum?id=ORQiboaRqY", "poster": "/media/PosterPDFs/NeurIPS%202024/95370.png?t=1730618682.6313634", "project": "", "author_site": "Qian Li, Tian Ding, Linxin Yang, Minghui Ouyang, Qingjiang Shi, Ruoyu Sun", "tldr": "", "abstract": "Graph neural networks (GNNs) have recently emerged as powerful tools for addressing complex optimization problems. It has been theoretically demonstrated that GNNs can universally approximate the solution mapping functions of linear programming (LP) problems. However, these theoretical results typically require GNNs to have large parameter sizes. Conversely, empirical experiments have shown that relatively small GNNs can solve LPs effectively, revealing a significant discrepancy between theoretical predictions and practical observations. In this work, we aim to bridge this gap by providing a theoretical foundation for the effectiveness of small-size GNNs. We prove that polylogarithmic-depth, constant-width GNNs are sufficient to solve packing and covering LPs, two widely used classes of LPs. Our proof leverages the capability of GNNs to simulate a variant of the gradient descent algorithm on a carefully selected potential function. Additionally, we introduce a new GNN architecture, termed GD-Net. Experimental results demonstrate that GD-Net significantly outperforms conventional GNN structures while using fewer parameters.", "keywords": "Learning to optimize;graph neural network;linear programming;gradient descent;efficiency", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Qian Li;Tian Ding;Linxin Yang;Minghui Ouyang;Qingjiang Shi;Ruoyu Sun", "authorids": "~Qian_Li20;~Tian_Ding1;~Linxin_Yang1;~Minghui_Ouyang1;~Qingjiang_Shi1;~Ruoyu_Sun1", "gender": "M;M;M;M;M;", "homepage": "https://sites.google.com/view/liqiansea;;;http://www.pku.edu.cn;https://sse.tongji.edu.cn/info/1210/4497.htm;https://ruoyus.github.io/", "dblp": "69/5902-12;;;;63/1006;30/9879-1", "google_scholar": "hG-hcbvpn9kC;https://scholar.google.com.hk/citations?user=lVkDF-YAAAAJ;https://scholar.google.com/citations?hl=en;;8xoKeR0AAAAJ;PsfzbCMAAAAJ", "orcid": "0000-0002-2047-8146;0000-0002-9383-8405;;;;", "linkedin": ";;;;;", "or_profile": "~Qian_Li20;~Tian_Ding1;~Linxin_Yang1;~Minghui_Ouyang1;~Qingjiang_Shi1;~Ruoyu_Sun1", "aff": "Shenzhen Research Institute of Big Data;Shenzhen Research Institute of Big Data;The Chinese University of Hong Kong, Shenzhen;Peking University;Tongji University;The Chinese University of Hong Kong", "aff_domain": "sribd.cn;sribd.cn;link.cuhk.edu.cn;pku.edu.cn;tongji.edu.cn;cuhk.edu.cn", "position": "Researcher;Researcher;PhD student;PhD student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nli2024on,\ntitle={On the Power of Small-size Graph Neural Networks for Linear Programming},\nauthor={Qian Li and Tian Ding and Linxin Yang and Minghui Ouyang and Qingjiang Shi and Ruoyu Sun},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ORQiboaRqY}\n}", "github": "", "reviewers": "hJjQ;h5Xh;xnS6", "pdf_size": 521590, "rating": "4;7;7", "confidence": "4;5;2", "soundness": "2;3;4", "novelty": "1;3;4", "presentation": "4;3;3", "wc_summary": "71;102;76", "wc_strengths": "17;57;104", "wc_weaknesses": "189;112;29", "wc_questions": "3;252;52", "wc_limitations": "11;1;64", "wc_review": "291;524;325", "wc_reply_reviewers": "150;82;76", "wc_reply_authors": "1216;36;41", "reply_reviewers": "1;1;1", "reply_authors": "4;2;2", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.6666666666666665, 1.247219128924647 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.6666666666666665, 1.247219128924647 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 83.0, 13.589211407093005 ], "wc_strengths_avg": [ 59.333333333333336, 35.55590277608237 ], "wc_weaknesses_avg": [ 110.0, 65.33503399147097 ], "wc_questions_avg": [ 102.33333333333333, 107.70432777851698 ], "wc_limitations_avg": [ 25.333333333333332, 27.644569488820444 ], "wc_review_avg": [ 380.0, 102.76510432372784 ], "wc_reply_reviewers_avg": [ 102.66666666666667, 33.559234529741914 ], "wc_reply_authors_avg": [ 431.0, 555.082576439458 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.6666666666666665, 0.9428090415820634 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.18898223650461365, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:9T2yskQhBxwJ:scholar.google.com/&scioq=On+the+Power+of+Small-size+Graph+Neural+Networks+for+Linear+Programming&hl=en&as_sdt=0,44", "gs_version_total": 0, "email": "sribd.cn;sribd.cn;link.cuhk.edu.cn;pku.edu.cn;tongji.edu.cn;cuhk.edu.cn", "author_num": 6, "aff_unique_index": "0;0;1;2;3;1", "aff_unique_norm": "Shenzhen Research Institute of Big Data;Chinese University of Hong Kong;Peking University;Tongji University", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.sribd.cn;https://www.cuhk.edu.cn;http://www.pku.edu.cn;https://www.tongji.edu.cn", "aff_unique_abbr": ";CUHK;Peking U;Tongji", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Shenzhen;Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "AMAGO-2: Breaking the Multi-Task Barrier in Meta-Reinforcement Learning with Transformers", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95369", "id": "OSHaRf4TVU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=OSHaRf4TVU", "openreview": "https://openreview.net/forum?id=OSHaRf4TVU", "poster": "", "project": "", "author_site": "Jake Grigsby, Justin Sasek, Samyak Parajuli, Ikechukwu D. Adebi, Amy Zhang, Yuke Zhu", "tldr": "", "abstract": "Language models trained on diverse datasets unlock generalization by in-context learning. Reinforcement Learning (RL) policies can achieve a similar effect by meta-learning within the memory of a sequence model. However, meta-RL research primarily focuses on adapting to minor variations of a single task. It is difficult to scale towards more general behavior without confronting challenges in multi-task optimization, and few solutions are compatible with meta-RL's goal of learning from large training sets of unlabeled tasks. To address this challenge, we revisit the idea that multi-task RL is bottlenecked by imbalanced training losses created by uneven return scales across different tasks. We build upon recent advancements in Transformer-based (in-context) meta-RL and evaluate a simple yet scalable solution where both an agent's actor and critic objectives are converted to classification terms that decouple optimization from the current scale of returns. Large-scale comparisons in Meta-World ML45, Multi-Game Procgen, Multi-Task POPGym, Multi-Game Atari, and BabyAI find that this design unlocks significant progress in online multi-task adaptation and memory problems without explicit task labels.", "keywords": "Transformers;Meta-RL;Multi-Task RL", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Jake Grigsby;Justin Sasek;Samyak Parajuli;Daniel Adebi;Amy Zhang;Yuke Zhu", "authorids": "~Jake_Grigsby1;~Justin_Sasek1;~Samyak_Parajuli1;~Daniel_Adebi1;~Amy_Zhang1;~Yuke_Zhu1", "gender": "M;;M;M;M;F", "homepage": "https://github.com/jakegrigsby;;;https://danieladebi.github.io;https://cs.utexas.edu/~yukez/;", "dblp": "276/6109;;https://dblp.uni-trier.de/pers/hd/p/Parajuli:Samyak;;133/1772;43/2754", "google_scholar": "qgUe3jYAAAAJ;;NACSmGwAAAAJ;IhwFoWYAAAAJ;mWGyYMsAAAAJ;", "orcid": ";0000-0003-0431-6822;;;;", "linkedin": ";justinsasek/;samyakparajuli/;ikdanieladebi/;;", "or_profile": "~Jake_Grigsby1;~Justin_Sasek1;~Samyak_Parajuli1;~Daniel_Adebi1;~Yuke_Zhu1;~Amy_Zhang2", "aff": "University of Texas at Austin;University of Texas at Austin;University of Texas at Austin;University of Texas at Austin;Computer Science Department, University of Texas, Austin;Meta Facebook", "aff_domain": "cs.utexas.edu;utexas.edu;utexas.edu;utexas.edu;cs.utexas.edu;facebook.com", "position": "PhD student;Undergrad student;PhD student;PhD student;Assistant Professor;Research Scientist", "bibtex": "@inproceedings{\ngrigsby2024amago,\ntitle={{AMAGO}-2: Breaking the Multi-Task Barrier in Meta-Reinforcement Learning with Transformers},\nauthor={Jake Grigsby and Justin Sasek and Samyak Parajuli and Daniel Adebi and Amy Zhang and Yuke Zhu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=OSHaRf4TVU}\n}", "github": "", "reviewers": "Ypn5;NjDj;1L6X;TXBg", "pdf_size": 3739468, "rating": "3;6;6;7", "confidence": "5;4;4;4", "soundness": "2;3;3;4", "novelty": "1;3;3;3", "presentation": "3;3;3;3", "wc_summary": "51;119;82;55", "wc_strengths": "32;175;129;18", "wc_weaknesses": "237;15;157;197", "wc_questions": "4;275;29;35", "wc_limitations": "7;69;37;21", "wc_review": "331;653;434;326", "wc_reply_reviewers": "220;17;30;16", "wc_reply_authors": "655;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "3;1;1;1", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 76.75, 27.151197027018902 ], "wc_strengths_avg": [ 88.5, 65.73621528503143 ], "wc_weaknesses_avg": [ 151.5, 83.73022154515058 ], "wc_questions_avg": [ 85.75, 109.880332635099 ], "wc_limitations_avg": [ 33.5, 23.08137777516758 ], "wc_review_avg": [ 436.0, 132.4933960618415 ], "wc_reply_reviewers_avg": [ 70.75, 86.34632302536107 ], "wc_reply_authors_avg": [ 163.75, 283.62331973940366 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.9622504486493763, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17917695161655405677&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "cs.utexas.edu;utexas.edu;utexas.edu;utexas.edu;cs.utexas.edu;facebook.com", "author_num": 6, "aff_unique_index": "0;0;0;0;0;1", "aff_unique_norm": "University of Texas at Austin;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://www.utexas.edu;https://meta.com", "aff_unique_abbr": "UT Austin;Meta", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Austin;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "OTjTKFk7gb", "title": "AuctionNet: A Novel Benchmark for Decision-Making in Large-Scale Games", "track": "Datasets & Benchmarks", "status": "Spotlight", "tldr": "", "abstract": "Decision-making in large-scale games is an essential research area in artificial intelligence (AI) with significant real-world impact. However, the limited access to realistic large-scale game environments has hindered research progress in this area. In this paper, we present AuctionNet, a benchmark for bid decision-making in large-scale ad auctions derived from a real-world online advertising platform. AuctionNet is composed of three parts: an ad auction environment, a pre-generated dataset based on the environment, and performance evaluations of several baseline bid decision-making algorithms. More specifically, the environment effectively replicates the integrity and complexity of real-world ad auctions through the interaction of several modules: the ad opportunity generation module employs deep generative networks to bridge the gap between simulated and real-world data while mitigating the risk of sensitive data exposure; the bidding module implements diverse auto-bidding agents trained with different decision-making algorithms; and the auction module is anchored in the classic Generalized Second Price (GSP) auction but also allows for customization of auction mechanisms as needed. To facilitate research and provide insights into the environment, we have also pre-generated a substantial dataset based on the environment. The dataset contains 10 million ad opportunities, 48 diverse auto-bidding agents, and over 500 million auction records. Performance evaluations of baseline algorithms such as linear programming, reinforcement learning, and generative models for bid decision-making are also presented as a part of AuctionNet. AuctionNet has powered the NeurIPS 2024 Auto-Bidding in Large-Scale Auctions competition, providing competition environments for over 1,500 teams. We believe that AuctionNet is applicable not only to research on bid decision-making in ad auctions but also to the general area of decision-making in large-scale games. Code: https://github.com/alimama-tech/AuctionNet.", "keywords": "Decision-Making;Large-Scale Games;Online Advertising;Auto-bidding", "primary_area": "", "supplementary_material": "", "author": "Kefan Su;Yusen Huo;Zhilin Zhang;Shuai Dou;Chuan Yu;Jian Xu;Zongqing Lu;Bo Zheng", "authorids": "~Kefan_Su1;~Yusen_Huo1;~Zhilin_Zhang1;~Shuai_Dou1;~Chuan_Yu1;~Jian_Xu8;~Zongqing_Lu2;~Bo_Zheng5", "gender": ";M;M;M;M;M;;M", "homepage": ";;https://scholar.google.com/citations?user=ehwxPtEAAAAJ&hl=zh-CN;;;https://www.linkedin.com/in/jianxu15/;;", "dblp": ";;95/1820-3;;;73/1149-15.html;;33/1610-7", "google_scholar": ";;ehwxPtEAAAAJ;;;30VZBsIAAAAJ;;3gHhO9QAAAAJ", "orcid": ";0009-0006-8863-3209;0000-0001-6665-2288;;0000-0001-8094-1545;0000-0003-3111-1005;;0000-0002-4037-6315", "linkedin": ";yusen-huo-072b9a23a/;;shuai-dou-51096315a/;;jianxu15/;;bo-zheng-0315254/", "or_profile": "~Kefan_Su1;~Yusen_Huo1;~Zhilin_Zhang1;~Shuai_Dou1;~Chuan_Yu1;~Jian_Xu8;~Zongqing_Lu2;~Bo_Zheng5", "aff": ";TAOBAO & TMALL GROUP;Alibaba Group;;Alibaba Group;Alibaba Group;;Alibaba Group", "aff_domain": ";taobao.com;alibaba-inc.com;;alibaba-inc.com;alibaba-inc.com;;alibaba-inc.com", "position": ";Researcher;Senior Algorithm Specialist;;Researcher;Principal Researcher;;Principal Researcher", "bibtex": "@inproceedings{\nsu2024a,\ntitle={A Novel Benchmark for Decision-Making in Uncertain and Competitive Games},\nauthor={Kefan Su and Yusen Huo and Zhilin Zhang and Shuai Dou and Chuan Yu and Jian Xu and Zongqing Lu and Bo Zheng},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=OTjTKFk7gb}\n}", "github": "", "project": "", "reviewers": "UBgR;qWub;3s3b;FJBQ", "site": "https://openreview.net/forum?id=OTjTKFk7gb", "pdf_size": 4029751, "rating": "6;6;7;8", "confidence": "5;3;3;4", "wc_summary_and_contributions": "72;50;66;95", "wc_strengths": "3;17;70;51", "wc_improvement": "90;148;26;24", "wc_limitations": "1;6;32;34", "wc_correctness": "1;147;49;1", "wc_clarity": "4;162;6;1", "wc_relation_to_prior_work": "1;24;26;1", "wc_documentation": "1;21;52;5", "wc_additional_feedback": "1;1;1;1", "wc_review": "174;576;328;213", "wc_reply_reviewers": "0;24;37;0", "wc_reply_authors": "0;48;0;0", "reply_reviewers": "0;1;1;0", "reply_authors": "1;4;1;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "wc_summary_and_contributions_avg": [ 70.75, 16.145819892467525 ], "wc_strengths_avg": [ 35.25, 26.592997198510737 ], "wc_improvement_avg": [ 72.0, 51.28352561983234 ], "wc_limitations_avg": [ 18.25, 14.872373717735847 ], "wc_correctness_avg": [ 49.5, 59.60494945891658 ], "wc_clarity_avg": [ 43.25, 68.58343458882764 ], "wc_relation_to_prior_work_avg": [ 13.0, 12.020815280171307 ], "wc_documentation_avg": [ 19.75, 20.06707502353046 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 322.75, 156.79186043924602 ], "wc_reply_reviewers_avg": [ 15.25, 15.927570436196476 ], "wc_reply_authors_avg": [ 12.0, 20.784609690826528 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.0909090909090909, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10828149622833563804&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;1;1;1", "aff_unique_norm": "TAOBAO & TMALL GROUP;Alibaba Group", "aff_unique_dep": ";", "aff_unique_url": "https://www.taobao.com;https://www.alibaba.com", "aff_unique_abbr": ";Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "CLUES: Collaborative Private-domain High-quality Data Selection for LLMs via Training Dynamics", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95368", "id": "OU1uqd1vyw", "proceeding": "", "pdf": "https://openreview.net/pdf?id=OU1uqd1vyw", "openreview": "https://openreview.net/forum?id=OU1uqd1vyw", "poster": "", "project": "", "author_site": "Wanru Zhao, Hongxiang Fan, Shell Xu Hu, Wangchunshu Zhou, Nicholas Lane", "tldr": "", "abstract": "Recent research has highlighted the importance of data quality in scaling large language models (LLMs). However, automated data quality control faces unique challenges in collaborative settings where sharing is not allowed directly between data silos. To tackle this issue, this paper proposes a novel data quality control technique based on the notion of data influence on the training dynamics of LLMs, that high quality data are more likely to have similar training dynamics to the anchor dataset. We then leverage the influence of the training dynamics to select high-quality data from different private domains, with centralized model updates on the server side in a collaborative training fashion by either model merging or federated learning. As for the data quality indicator, we compute the per-sample gradients with respect to the private data and the anchor dataset, and use the trace of the accumulated inner products as a measurement of data quality. In addition, we develop a quality control evaluation tailored for collaborative settings with heterogeneous medical domain data. Experiments show that training on the high-quality data selected by our method can often outperform other data selection methods for collaborative fine-tuning of LLMs, across diverse private domain datasets, in medical, multilingual and financial settings.", "keywords": "large language models;data quality;model merging;federated learning", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Wanru Zhao;Hongxiang Fan;Shell Xu Hu;Wangchunshu Zhou;Nicholas Donald Lane", "authorids": "~Wanru_Zhao1;~Hongxiang_Fan1;~Shell_Xu_Hu1;~Wangchunshu_Zhou1;~Nicholas_Donald_Lane1", "gender": ";M;;M;", "homepage": ";https://os-hxfan.github.io/;;https://michaelzhouwang.github.io;", "dblp": ";;;245/8640.html;", "google_scholar": ";iBT_uw4AAAAJ;;UebIjuQAAAAJ;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Wanru_Zhao1;~Hongxiang_Fan1;~Shell_Xu_Hu1;~Wangchunshu_Zhou1;~Nicholas_Donald_Lane1", "aff": ";Samsung;;AIWaves Inc.;", "aff_domain": ";samsung.com;;aiwaves.cn;", "position": ";Researcher;;Researcher;", "bibtex": "@inproceedings{\nzhao2024clues,\ntitle={{CLUES}: Collaborative Private-domain High-quality Data Selection for {LLM}s via Training Dynamics},\nauthor={Wanru Zhao and Hongxiang Fan and Shell Xu Hu and Wangchunshu Zhou and Nicholas Donald Lane},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=OU1uqd1vyw}\n}", "github": "", "reviewers": "V7za;ngAe;Xfnf;igLM", "pdf_size": 1082051, "rating": "4;4;5;7", "confidence": "4;4;5;3", "soundness": "3;2;2;3", "novelty": "3;2;2;3", "presentation": "4;2;2;3", "wc_summary": "104;180;49;78", "wc_strengths": "47;82;32;125", "wc_weaknesses": "185;229;36;91", "wc_questions": "115;202;156;5", "wc_limitations": "18;161;4;1", "wc_review": "469;854;277;300", "wc_reply_reviewers": "0;910;226;33", "wc_reply_authors": "1043;1415;1885;67", "reply_reviewers": "0;2;2;1", "reply_authors": "5;4;6;2", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 102.75, 48.65888921872344 ], "wc_strengths_avg": [ 71.5, 35.82247897619594 ], "wc_weaknesses_avg": [ 135.25, 75.94858458193939 ], "wc_questions_avg": [ 119.5, 72.91947613635195 ], "wc_limitations_avg": [ 46.0, 66.70457255690947 ], "wc_review_avg": [ 475.0, 231.03354734756596 ], "wc_reply_reviewers_avg": [ 292.25, 366.95529359855266 ], "wc_reply_authors_avg": [ 1102.5, 668.1622183272563 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 4.25, 1.479019945774904 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1171505411211823082&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": ";samsung.com;;aiwaves.cn;", "author_num": 5, "aff_unique_index": "0;1", "aff_unique_norm": "Samsung;AIWaves Inc.", "aff_unique_dep": "Samsung;", "aff_unique_url": "https://www.samsung.com;", "aff_unique_abbr": "Samsung;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "South Korea;United States" }, { "title": "Perplexity-aware Correction for Robust Alignment with Noisy Preferences", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95367", "id": "OUXnnPJzXJ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=OUXnnPJzXJ", "openreview": "https://openreview.net/forum?id=OUXnnPJzXJ", "poster": "/media/PosterPDFs/NeurIPS%202024/95367.png?t=1732465688.5636535", "project": "", "author_site": "Keyi Kong, Xilie Xu, Di Wang, Jingfeng ZHANG, Mohan Kankanhalli", "tldr": "", "abstract": "Alignment techniques are critical in ensuring that large language models (LLMs) output helpful and harmless content by enforcing the LLM-generated content to align with human preferences. \nHowever, the existence of noisy preferences (NPs), where the responses are mistakenly labelled as chosen or rejected, could spoil the alignment, thus making the LLMs generate useless and even malicious content. \nExisting methods mitigate the issue of NPs from the loss perspective by adjusting the alignment loss based on a clean validation dataset.\nOrthogonal to these loss-oriented methods, we propose perplexity-aware correction (PerpCorrect) from the data perspective for robust alignment which detects and corrects NPs based on the differences between the perplexity of the chosen and rejected responses (dubbed as PPLDiff). \nIntuitively, a higher PPLDiff indicates a higher probability of the NP because a rejected/chosen response which is mistakenly labelled as chosen/rejected is less preferable to be generated by an aligned LLM, thus having a higher/lower perplexity.\nPerpCorrect works in three steps: \n(1) PerpCorrect aligns a surrogate LLM using the clean validation data to make the PPLDiff able to distinguish clean preferences (CPs) and NPs. \n(2) PerpCorrect further aligns the surrogate LLM by incorporating the reliable clean training data whose PPLDiff is extremely small and reliable noisy training data whose PPLDiff is extremely large after correction to boost the discriminatory power.\n(3) Detecting and correcting NPs according to the PPLDiff obtained by the aligned surrogate LLM to obtain a denoised training dataset for robust alignment.\nComprehensive experiments validate that our proposed PerpCorrect can achieve state-of-the-art alignment performance under NPs.\nNotably, PerpCorrect demonstrates practical utility by requiring only a modest amount of validation data and being compatible with various alignment techniques. \nOur code is available at [PerpCorrect](https://github.com/luxinyayaya/PerpCorrect).", "keywords": "Large language model;Label-noise learning", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/3721d5ec8f0b53bdc139e5a7184bdfa242b2e52d.zip", "author": "Keyi Kong;Xilie Xu;Di Wang;Jingfeng Zhang;Mohan Kankanhalli", "authorids": "~Keyi_Kong1;~Xilie_Xu1;~Di_Wang1;~Jingfeng_Zhang1;~Mohan_Kankanhalli1", "gender": "M;M;;M;M", "homepage": "https://github.com/luxinyayaya;https://godxuxilie.github.io/;;https://zjfheart.github.io;https://www.comp.nus.edu.sg/~mohan", "dblp": "359/3282;259/2327;;227/2664.html;09/3613.html", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=en;;NS0P1FkAAAAJ;6Lx_eowAAAAJ", "orcid": "0009-0001-2646-4781;;;0000-0003-3491-8074;0000-0002-4846-2015", "linkedin": ";;;;mohan-kankanhalli-583417221", "or_profile": "~Keyi_Kong1;~Xilie_Xu1;~Di_Wang1;~Jingfeng_Zhang1;~Mohan_Kankanhalli1", "aff": "Shandong University;National University of Singapore;;University of Auckland;National University of Singapore", "aff_domain": "sdu.edu.cn;nus.edu.sg;;auckland.ac.nz;nus.edu.sg", "position": "Undergrad student;PhD student;;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nkong2024perplexityaware,\ntitle={Perplexity-aware Correction for Robust Alignment with Noisy Preferences},\nauthor={Keyi Kong and Xilie Xu and Di Wang and Jingfeng Zhang and Mohan Kankanhalli},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=OUXnnPJzXJ}\n}", "github": "", "reviewers": "JfpF;LN38;J5j5;JSSg", "pdf_size": 418065, "rating": "5;6;7;7", "confidence": "2;5;4;4", "soundness": "3;4;3;4", "novelty": "3;4;3;3", "presentation": "2;4;3;3", "wc_summary": "53;73;84;75", "wc_strengths": "25;101;70;91", "wc_weaknesses": "47;87;16;94", "wc_questions": "41;48;89;2", "wc_limitations": "3;5;19;7", "wc_review": "169;314;278;269", "wc_reply_reviewers": "9;10;10;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 71.25, 11.321991874224253 ], "wc_strengths_avg": [ 71.75, 29.217931138258233 ], "wc_weaknesses_avg": [ 61.0, 31.567388235329194 ], "wc_questions_avg": [ 45.0, 30.86259872402193 ], "wc_limitations_avg": [ 8.5, 6.224949798994366 ], "wc_review_avg": [ 257.5, 53.79823417176441 ], "wc_reply_reviewers_avg": [ 7.25, 4.205650960315181 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.6225430174794673, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12636221628166247419&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "sdu.edu.cn;nus.edu.sg;;auckland.ac.nz;nus.edu.sg", "author_num": 5, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "Shandong University;National University of Singapore;University of Auckland", "aff_unique_dep": ";;", "aff_unique_url": "http://www.sdu.edu.cn;https://www.nus.edu.sg;https://www.auckland.ac.nz", "aff_unique_abbr": "SDU;NUS;UoA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;1", "aff_country_unique": "China;Singapore;New Zealand" }, { "title": "HLM-Cite: Hybrid Language Model Workflow for Text-based Scientific Citation Prediction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95366", "id": "OV8YUk151r", "proceeding": "", "pdf": "https://openreview.net/pdf?id=OV8YUk151r", "openreview": "https://openreview.net/forum?id=OV8YUk151r", "poster": "/media/PosterPDFs/NeurIPS%202024/95366.png?t=1730553862.1035912", "project": "", "author_site": "Qianyue Hao, Jingyang Fan, Fengli Xu, Jian Yuan, Yong Li", "tldr": "", "abstract": "Citation networks are critical infrastructures of modern science, serving as intricate webs of past literature and enabling researchers to navigate the knowledge production system. To mine information hiding in the link space of such networks, predicting which previous papers (candidates) will a new paper (query) cite is a critical problem that has long been studied. However, an important gap remains unaddressed: the roles of a paper's citations vary significantly, ranging from foundational knowledge basis to superficial contexts. Distinguishing these roles requires a deeper understanding of the logical relationships among papers, beyond simple edges in citation networks. The emergence of large language models (LLMs) with textual reasoning capabilities offers new possibilities for discerning these relationships, but there are two major challenges. First, in practice, a new paper may select its citations from gigantic existing papers, where the combined texts far exceed the context length of LLMs. Second, logical relationships between papers are often implicit, and directly prompting an LLM to predict citations may lead to results based primarily on surface-level textual similarities, rather than the deeper logical reasoning required. In this paper, we introduce the novel concept of core citation, which identifies the critical references that go beyond superficial mentions. Thereby, we elevate the citation prediction task from a simple binary classification to a more nuanced problem: distinguishing core citations from both superficial citations and non-citations. To address this, we propose $\\textbf{HLM-Cite}$, a $\\textbf{H}$ybrid $\\textbf{L}$anguage $\\textbf{M}$odel workflow for citation prediction, which combines embedding and generative LMs. We design a curriculum finetune procedure to adapt a pretrained text embedding model to coarsely retrieve high-likelihood core citations from vast candidate sets and then design an LLM agentic workflow to rank the retrieved papers through one-shot reasoning, revealing the implicit relationships among papers. With the two-stage pipeline, we can scale the candidate sets to 100K papers, vastly exceeding the size handled by existing methods. We evaluate HLM-Cite on a dataset across 19 scientific fields, demonstrating a 17.6\\% performance improvement comparing SOTA methods. Our code is open-source at https://github.com/tsinghua-fib-lab/H-LM for reproducibility.", "keywords": "Citation prediction;large language model;text embedding model", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/621887e47a1ee24c900d6937d0ba1a13edec4446.zip", "author": "Qianyue Hao;Jingyang Fan;Fengli Xu;Jian Yuan;Yong Li", "authorids": "~Qianyue_Hao1;~Jingyang_Fan1;~Fengli_Xu1;~Jian_Yuan1;~Yong_Li7", "gender": "M;M;;;M", "homepage": "https://scholar.google.com/citations?user=3qDk0OcAAAAJ;https://github.com/siempr9;;http://bdktzweb.tsinghua.edu.cn/yuanjian/zh_CN/index.htm;http://fi.ee.tsinghua.edu.cn/~liyong/", "dblp": "272/9909;348/2894;;64/4192;", "google_scholar": "3qDk0OcAAAAJ;;;;https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-7109-3588;;;;", "linkedin": ";;;;", "or_profile": "~Qianyue_Hao1;~Jingyang_Fan1;~Fengli_Xu1;~Jian_Yuan1;~Yong_Li7", "aff": "Electronic Engineering, Tsinghua University, Tsinghua University;Sichuan University;;Tsinghua University;Tsinghua University", "aff_domain": "mails.tsinghua.edu.cn;scu.edu.cn;;tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;Undergrad student;;Professor;Full Professor", "bibtex": "@inproceedings{\nhao2024hlmcite,\ntitle={{HLM}-Cite: Hybrid Language Model Workflow for Text-based Scientific Citation Prediction},\nauthor={Qianyue Hao and Jingyang Fan and Fengli Xu and Jian Yuan and Yong Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=OV8YUk151r}\n}", "github": "", "reviewers": "b1Uz;7Ub3;3KQH;tEdQ", "pdf_size": 2215730, "rating": "4;6;6;7", "confidence": "4;4;4;5", "soundness": "3;2;3;3", "novelty": "2;3;3;3", "presentation": "4;3;2;3", "wc_summary": "67;48;126;110", "wc_strengths": "98;109;207;69", "wc_weaknesses": "304;100;131;222", "wc_questions": "142;101;58;56", "wc_limitations": "10;4;92;28", "wc_review": "621;362;614;485", "wc_reply_reviewers": "56;25;0;49", "wc_reply_authors": "366;11;62;74", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;2;3", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 87.75, 31.499007920885383 ], "wc_strengths_avg": [ 120.75, 51.895929512824026 ], "wc_weaknesses_avg": [ 189.25, 79.99804685115755 ], "wc_questions_avg": [ 89.25, 35.36506044106245 ], "wc_limitations_avg": [ 33.5, 34.910600109422354 ], "wc_review_avg": [ 520.5, 106.3308515906837 ], "wc_reply_reviewers_avg": [ 32.5, 22.005681084665387 ], "wc_reply_authors_avg": [ 128.25, 139.2881455831759 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.6622661785325219, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:iWUmbbe-QVkJ:scholar.google.com/&scioq=HLM-Cite:+Hybrid+Language+Model+Workflow+for+Text-based+Scientific+Citation+Prediction&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "mails.tsinghua.edu.cn;scu.edu.cn;;tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 5, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Tsinghua University;Sichuan University", "aff_unique_dep": "Electronic Engineering;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.scu.edu.cn", "aff_unique_abbr": "THU;SCU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "CoMat: Aligning Text-to-Image Diffusion Model with Image-to-Text Concept Matching", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95365", "id": "OW1ldvMNJ6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=OW1ldvMNJ6", "openreview": "https://openreview.net/forum?id=OW1ldvMNJ6", "poster": "/media/PosterPDFs/NeurIPS%202024/95365.png?t=1733832641.5216107", "project": "", "author_site": "DONGZHI JIANG, Guanglu Song, Xiaoshi Wu, Renrui Zhang, Dazhong Shen, ZHUOFAN ZONG, Yu Liu, Hongsheng Li", "tldr": "", "abstract": "Diffusion models have demonstrated great success in the field of text-to-image generation. However, alleviating the misalignment between the text prompts and images is still challenging. We break down the problem into two causes: concept ignorance and concept mismapping. To tackle the two challenges, we propose CoMat, an end-to-end diffusion model fine-tuning strategy with the image-to-text concept matching mechanism. Firstly, we introduce a novel image-to-text concept activation module to guide the diffusion model in revisiting ignored concepts. Additionally, an attribute concentration module is proposed to map the text conditions of each entity to its corresponding image area correctly. Extensive experimental evaluations, conducted across three distinct text-to-image alignment benchmarks, demonstrate the superior efficacy of our proposed method, CoMat-SDXL, over the baseline model, SDXL~\\cite{podell2023sdxl}. We also show that our method enhances general condition utilization capability and generalizes to the long and complex prompt despite not specifically training on it.", "keywords": "Text-to-Image Generation;Diffusion model;Text-Image Alignment", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/3c8b1179377a97825cb23e23f5ff2ce56a1d19ac.zip", "author": "Dongzhi Jiang;Guanglu Song;Xiaoshi Wu;Renrui Zhang;Dazhong Shen;Zhuofan Zong;Yu Liu;Hongsheng Li", "authorids": "~Dongzhi_Jiang1;~Guanglu_Song2;~Xiaoshi_Wu1;~Renrui_Zhang1;~Dazhong_Shen1;~Zhuofan_Zong1;~Yu_Liu2;~Hongsheng_Li3", "gender": "M;M;M;M;M;M;M;M", "homepage": "https://caraj7.github.io;;https://github.com/tgxs002;;http://www.shendazhong.com/;https://zongzhuofan.github.io/;http://liuyu.us;http://www.ee.cuhk.edu.hk/~hsli", "dblp": "344/2917;207/4745;299/7667;244/1748;222/7906;266/4989;97/2274-15;27/7402-1", "google_scholar": "jIR4PAsAAAAJ;Bd3v08QAAAAJ;cnOAMbUAAAAJ;YlL3xN4AAAAJ;5vSh09YAAAAJ;vls0YhoAAAAJ;;BN2Ze-QAAAAJ", "orcid": ";;;;0000-0002-3947-4153;;;", "linkedin": ";;;;;;;", "or_profile": "~Dongzhi_Jiang1;~Guanglu_Song2;~Xiaoshi_Wu1;~Renrui_Zhang1;~Dazhong_Shen1;~Zhuofan_Zong1;~Yu_Liu2;~Hongsheng_Li3", "aff": "The Chinese University of Hong Kong;Sensetime;The Chinese University of Hong Kong;MMLab of CUHK & Shanghai AI Laboratory;Shanghai Artificial Intelligence Laboratory;The Chinese University of Hong Kong;SenseTime;The Chinese University of Hong Kong", "aff_domain": "cuhk.edu.hk;sensetime.com;link.cuhk.edu.hk;pjlab.org.cn;pjlab.org.cn;link.cuhk.edu.hk;sensetime.com;cuhk.edu.hk", "position": "PhD student;Computer Vision Researcher;PhD student;PhD student;Researcher;PhD student;Principal Researcher;Associate Professor", "bibtex": "@inproceedings{\njiang2024comat,\ntitle={CoMat: Aligning Text-to-Image Diffusion Model with Image-to-Text Concept Matching},\nauthor={Dongzhi Jiang and Guanglu Song and Xiaoshi Wu and Renrui Zhang and Dazhong Shen and Zhuofan Zong and Yu Liu and Hongsheng Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=OW1ldvMNJ6}\n}", "github": "", "reviewers": "KjKz;9f7Y;82oN;3go2", "pdf_size": 23257552, "rating": "4;5;5;6", "confidence": "4;3;5;3", "soundness": "3;2;4;2", "novelty": "2;3;3;3", "presentation": "3;3;4;3", "wc_summary": "82;41;83;52", "wc_strengths": "64;61;22;46", "wc_weaknesses": "119;178;169;158", "wc_questions": "3;30;2;95", "wc_limitations": "3;51;5;28", "wc_review": "271;361;281;379", "wc_reply_reviewers": "0;76;171;96", "wc_reply_authors": "0;161;1019;143", "reply_reviewers": "0;1;3;2", "reply_authors": "1;2;4;3", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 64.5, 18.418740456393863 ], "wc_strengths_avg": [ 48.25, 16.618889854620253 ], "wc_weaknesses_avg": [ 156.0, 22.5055548698538 ], "wc_questions_avg": [ 32.5, 37.79219496139381 ], "wc_limitations_avg": [ 21.75, 19.536824204563032 ], "wc_review_avg": [ 323.0, 47.56048780237646 ], "wc_reply_reviewers_avg": [ 85.75, 60.87025135482849 ], "wc_reply_authors_avg": [ 330.75, 402.22778061690366 ], "reply_reviewers_avg": [ 1.5, 1.118033988749895 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.4264014327112209, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5473611153361784966&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 5, "email": "cuhk.edu.hk;sensetime.com;link.cuhk.edu.hk;pjlab.org.cn;pjlab.org.cn;link.cuhk.edu.hk;sensetime.com;cuhk.edu.hk", "author_num": 8, "aff_unique_index": "0;1;0;0;2;0;1;0", "aff_unique_norm": "Chinese University of Hong Kong;SenseTime;Shanghai Artificial Intelligence Laboratory", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.sensetime.com;http://www.shailab.org/", "aff_unique_abbr": "CUHK;SenseTime;Shanghai AI Lab", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Can large language models explore in-context?", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95364", "id": "OWPzhVqIux", "proceeding": "", "pdf": "https://openreview.net/pdf?id=OWPzhVqIux", "openreview": "https://openreview.net/forum?id=OWPzhVqIux", "poster": "", "project": "", "author_site": "Akshay Krishnamurthy, Keegan Harris, Dylan J Foster, Cyril Zhang, Aleksandrs Slivkins", "tldr": "", "abstract": "We investigate the extent to which contemporary Large Language Models (LLMs) can engage in exploration, a core capability in reinforcement learning and decision making. We focus on native performance of existing LLMs, without training interventions. We deploy LLMs as agents in simple multi-armed bandit environments, specifying the environment description and interaction history entirely in-context, i.e., within the LLM prompt. We experiment with GPT-3.5, GPT-4, and Llama2, using a variety of prompt designs, and find that the models do not robustly engage in exploration without substantial interventions: i) Only one configuration resulted in satisfactory exploratory behavior: GPT-4 with chain-of-thought reasoning and an externally summarized interaction history; ii) All other configurations did not result in robust exploratory behavior, including those with chain-of-thought reasoning but unsummarized history. While these findings can be interpreted positively, they suggest that external summarization\u2014which may not be possible in more complex settings\u2014is essential for desirable LLM behavior. We conclude that non-trivial algorithmic interventions, such as fine-tuning or dataset curation, may be required to empower LLM-based decision making agents in complex settings.", "keywords": "in-context learning;exploration;multi-armed bandits;reinforcement learning", "primary_area": "bandits", "supplementary_material": "/attachment/0209cc8e54fbe4a3e06dfb8d1834be080c1c8b6a.gz", "author": "Akshay Krishnamurthy;Keegan Harris;Dylan J Foster;Cyril Zhang;Aleksandrs Slivkins", "authorids": "~Akshay_Krishnamurthy1;~Keegan_Harris1;~Dylan_J_Foster1;~Cyril_Zhang1;~Aleksandrs_Slivkins1", "gender": "M;M;;;M", "homepage": "https://www.cics.umass.edu/~akshay/;https://keeganharris.github.io/;http://dylanfoster.net;https://cyrilzhang.com;https://www.microsoft.com/en-us/research/people/slivkins/", "dblp": "85/8024;294/5044;167/4271;203/4448;91/4014", "google_scholar": "https://scholar.google.com.tw/citations?user=K0kaNvkAAAAJ;TnvQIrYAAAAJ;RqwU8xsAAAAJ;sXtjq8IAAAAJ;f2x233wAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Akshay_Krishnamurthy1;~Keegan_Harris1;~Dylan_J_Foster1;~Cyril_Zhang1;~Aleksandrs_Slivkins1", "aff": "Microsoft Research;Carnegie Mellon University;Microsoft Research;Microsoft;Microsoft", "aff_domain": "research.microsoft.com;cmu.edu;microsoft.com;microsoft.com;microsoft.com", "position": "Principal Researcher;PhD student;Principal Researcher;Senior Researcher;Researcher", "bibtex": "@inproceedings{\nkrishnamurthy2024can,\ntitle={Can large language models explore in-context?},\nauthor={Akshay Krishnamurthy and Keegan Harris and Dylan J Foster and Cyril Zhang and Aleksandrs Slivkins},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=OWPzhVqIux}\n}", "github": "", "reviewers": "qwyY;Jht5;yw69;6C6Y", "pdf_size": 1395016, "rating": "4;5;5;7", "confidence": "3;4;3;4", "soundness": "2;3;2;3", "novelty": "2;2;2;3", "presentation": "3;3;3;3", "wc_summary": "150;156;60;98", "wc_strengths": "78;35;18;73", "wc_weaknesses": "113;302;58;102", "wc_questions": "1;214;56;83", "wc_limitations": "1;115;15;11", "wc_review": "343;822;207;367", "wc_reply_reviewers": "0;246;0;73", "wc_reply_authors": "0;20;0;0", "reply_reviewers": "0;1;0;1", "reply_authors": "1;2;1;1", "rating_avg": [ 5.25, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 116.0, 39.42080668885405 ], "wc_strengths_avg": [ 51.0, 25.288337232803585 ], "wc_weaknesses_avg": [ 143.75, 93.65461814560989 ], "wc_questions_avg": [ 88.5, 78.25119807389532 ], "wc_limitations_avg": [ 35.5, 46.18170633486814 ], "wc_review_avg": [ 434.75, 231.75458463642096 ], "wc_reply_reviewers_avg": [ 79.75, 100.50466407087782 ], "wc_reply_authors_avg": [ 5.0, 8.660254037844387 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.6882472016116854, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17699937249934042413&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "research.microsoft.com;cmu.edu;microsoft.com;microsoft.com;microsoft.com", "author_num": 5, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "Microsoft;Carnegie Mellon University", "aff_unique_dep": "Microsoft Research;", "aff_unique_url": "https://www.microsoft.com/en-us/research;https://www.cmu.edu", "aff_unique_abbr": "MSR;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Sparse maximal update parameterization: A holistic approach to sparse training dynamics", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95363", "id": "OWmu3QOa0O", "proceeding": "", "pdf": "https://openreview.net/pdf?id=OWmu3QOa0O", "openreview": "https://openreview.net/forum?id=OWmu3QOa0O", "poster": "/media/PosterPDFs/NeurIPS%202024/95363.png?t=1730741322.214214", "project": "", "author_site": "Nolan Dey, Shane Bergsma, Joel Hestness", "tldr": "", "abstract": "Several challenges make it difficult for sparse neural networks to compete with dense models. First, setting a large fraction of weights to zero impairs forward and gradient signal propagation. Second, sparse studies often need to test multiple sparsity levels, while also introducing new hyperparameters (HPs), leading to prohibitive tuning costs. Indeed, the standard practice is to re-use the learning HPs originally crafted for dense models. Unfortunately, we show sparse and\ndense networks do not share the same optimal HPs. Without stable dynamics and effective training recipes, it is costly to test sparsity at scale, which is key to surpassing dense networks and making the business case for sparsity acceleration in hardware.\n\nA holistic approach is needed to tackle these challenges and we propose S$\\textmu$Par as one such approach. For random unstructured static sparsity, S$\\textmu$Par ensures activations, gradients, and weight updates all scale independently of sparsity level. Further, by reparameterizing the HPs, S$\\textmu$Par enables the same HP values to be optimal as we vary both sparsity level and model width. HPs can be tuned on small dense networks and transferred to large sparse models, greatly reducing tuning costs. On large-scale language modeling, S$\\textmu$Par shows increasing improvements over standard parameterization as sparsity increases, leading up to 11.9\\% relative loss improvement at 99.2\\% sparsity. A minimal implementation of S$\\textmu$Par is available at https://github.com/EleutherAI/nanoGPT-mup/tree/supar.", "keywords": "sparsity;large language models;scaling laws", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Nolan Simran Dey;Shane Bergsma;Joel Hestness", "authorids": "~Nolan_Simran_Dey1;~Shane_Bergsma1;~Joel_Hestness2", "gender": "M;M;M", "homepage": "https://ndey96.github.io;https://sites.google.com/site/shaneabergsma/;", "dblp": "263/9353;57/2540;60/3063", "google_scholar": "JHUfMr0AAAAJ;https://scholar.google.ca/citations?hl=en;wkbvCf0AAAAJ", "orcid": ";;0000-0001-6920-0906", "linkedin": "nolandey/;;joelhestness", "or_profile": "~Nolan_Simran_Dey1;~Shane_Bergsma1;~Joel_Thomas_Hestness1", "aff": "Cerebras Systems, Inc;Huawei Canada;Cerebras Systems, Inc", "aff_domain": "cerebras.net;huawei.com;cerebras.net", "position": "Researcher;Researcher;Research Scientist", "bibtex": "@inproceedings{\ndey2024sparse,\ntitle={Sparse maximal update parameterization: A holistic approach to sparse training dynamics},\nauthor={Nolan Simran Dey and Shane Bergsma and Joel Hestness},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=OWmu3QOa0O}\n}", "github": "", "reviewers": "F4Ay;6wqj;hqe9;Grts", "pdf_size": 764768, "rating": "3;5;6;7", "confidence": "3;4;4;4", "soundness": "2;2;3;3", "novelty": "2;3;3;3", "presentation": "1;3;3;2", "wc_summary": "49;115;39;45", "wc_strengths": "63;63;33;77", "wc_weaknesses": "181;235;164;72", "wc_questions": "33;4;165;186", "wc_limitations": "1;4;12;88", "wc_review": "327;421;413;468", "wc_reply_reviewers": "105;34;44;28", "wc_reply_authors": "66;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.25, 1.479019945774904 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 62.0, 30.805843601498726 ], "wc_strengths_avg": [ 59.0, 16.06237840420901 ], "wc_weaknesses_avg": [ 163.0, 58.715415352358704 ], "wc_questions_avg": [ 97.0, 79.51414968419142 ], "wc_limitations_avg": [ 26.25, 35.87739539041261 ], "wc_review_avg": [ 407.25, 50.87423218093812 ], "wc_reply_reviewers_avg": [ 52.75, 30.70321644388418 ], "wc_reply_authors_avg": [ 16.5, 28.578838324886476 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8783100656536799, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12827052669391779202&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "cerebras.net;huawei.com;cerebras.net", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Cerebras Systems;Huawei", "aff_unique_dep": ";Huawei", "aff_unique_url": "https://www.cerebras.com;https://www.huawei.com/ca-en/", "aff_unique_abbr": "Cerebras;Huawei Canada", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;Canada" }, { "title": "MonkeySee: Space-time-resolved reconstructions of natural images from macaque multi-unit activity", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95362", "id": "OWwdlxwnFN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=OWwdlxwnFN", "openreview": "https://openreview.net/forum?id=OWwdlxwnFN", "poster": "/media/PosterPDFs/NeurIPS%202024/95362.png?t=1733696765.6695437", "project": "", "author_site": "Lynn Le, Paolo Papale, Katja Seeliger, Antonio Lozano, Thirza Dado, Feng Wang, Pieter Roelfsema, Marcel A. J. van Gerven, Ya\u011fmur G\u00fc\u00e7l\u00fct\u00fcrk, Umut G\u00fc\u00e7l\u00fc", "tldr": "", "abstract": "In this paper, we reconstruct naturalistic images directly from macaque brain signals using a convolutional neural network (CNN) based decoder. We investigate the ability of this CNN-based decoding technique to differentiate among neuronal populations from areas V1, V4, and IT, revealing distinct readout characteristics for each. This research marks a progression from low-level to high-level brain signals, thereby enriching the existing framework for utilizing CNN-based decoders to decode brain activity. Our results demonstrate high-precision reconstructions of naturalistic images, highlighting the efficiency of CNN-based decoders in advancing our knowledge of how the brain's representations translate into pixels. Additionally, we present a novel space-time-resolved decoding technique, demonstrating how temporal resolution in decoding can advance our understanding of neural representations. Moreover, we introduce a learned receptive field layer that sheds light on the CNN-based model's data processing during training, enhancing understanding of its structure and interpretive capacity.", "keywords": "space-time dependent reconstructions;biological constraints;neural decoding;deep neural network;receptive fields;visual cortex;multi unit activity;convolutional model;brain image reconstruction;naturalistic stimuli", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "", "author": "Lynn Le;Paolo Papale;K. Seeliger;Antonio Lozano;Thirza Dado;Feng Wang;Pieter R. Roelfsema;Marcel van Gerven;Ya\u011fmur G\u00fc\u00e7l\u00fct\u00fcrk;Umut G\u00fc\u00e7l\u00fc", "authorids": "~Lynn_Le2;~Paolo_Papale1;~K._Seeliger1;~Antonio_Lozano1;~Thirza_Dado1;~Feng_Wang19;~Pieter_R._Roelfsema1;~Marcel_van_Gerven1;~Ya\u011fmur_G\u00fc\u00e7l\u00fct\u00fcrk1;~Umut_G\u00fc\u00e7l\u00fc1", "gender": ";;;;F;M;;M;F;M", "homepage": "https://www.lynnle.com;;;https://nin.nl/about-us/the-organisation/team/antonio-lozano/;https://www.ru.nl/english/people/dado-t/;;;http://www.artcogsys.com;;https://neuralcoding.nl", "dblp": ";221/8248;;;;;09/1286;;https://dblp.uni-trier.de/pers/g/G=uuml==ccedil=l=uuml=t=uuml=rk:Yagmur.html;02/8743", "google_scholar": "M68qL4cAAAAJ;;;domCROIAAAAJ;;;;https://scholar.google.com/citations?hl=en;https://scholar.google.nl/citations?user=X2vDok4AAAAJ;LM9WFngAAAAJ", "orcid": ";0000-0002-6249-841X;;0000-0003-4508-1484;;;;0000-0002-2206-9098;0000-0001-7872-3573;0000-0003-4753-159X", "linkedin": ";;;antonio-manuel-lozano-ortega/;thirza-dado-53a199a4/;feng-wang-b253b91b/;;marcel-van-gerven-8698784/;yagmurgucluturk/;umutguclu", "or_profile": "~Lynn_Le2;~Paolo_Papale1;~K._Seeliger1;~Antonio_Lozano1;~Thirza_Dado1;~Feng_Wang19;~Pieter_R._Roelfsema1;~Marcel_van_Gerven1;~Ya\u011fmur_G\u00fc\u00e7l\u00fct\u00fcrk1;~Umut_G\u00fc\u00e7l\u00fc1", "aff": "Radboud University;Netherlands Institute for Neuroscience;;Netherlands Institute for Neuroscience;Donders Institute for Brain, Cognition and Behaviour;Netherlands institute for neuroscience;;Donders Institute for Brain, Cognition and Behaviour, Radboud University;Radboud University Nijmegen;Radboud University Nijmegen", "aff_domain": "ru.nl;nin.knaw.nl;;nin.nl;donders.ru.nl;knaw.nl;;ru.nl;ru.nl;ru.nl", "position": "PhD student;Postdoc;;Postdoc;PhD student;PhD student;;Full Professor;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nle2024monkeysee,\ntitle={MonkeySee: Space-time-resolved reconstructions of natural images from macaque multi-unit activity},\nauthor={Lynn Le and Paolo Papale and K. Seeliger and Antonio Lozano and Thirza Dado and Feng Wang and Pieter R. Roelfsema and Marcel van Gerven and Ya{\\u{g}}mur G{\\\"u}{\\c{c}}l{\\\"u}t{\\\"u}rk and Umut G{\\\"u}{\\c{c}}l{\\\"u}},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=OWwdlxwnFN}\n}", "github": "", "reviewers": "skQc;gW2o;R5Jx;d65o", "pdf_size": 25736003, "rating": "4;6;6;7", "confidence": "4;4;4;4", "soundness": "3;2;3;4", "novelty": "2;2;3;4", "presentation": "2;3;2;3", "wc_summary": "103;99;148;42", "wc_strengths": "51;58;34;44", "wc_weaknesses": "65;261;92;46", "wc_questions": "209;107;704;3", "wc_limitations": "44;10;1;2", "wc_review": "472;535;979;137", "wc_reply_reviewers": "0;66;124;8", "wc_reply_authors": "0;8;260;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;1", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 98.0, 37.62313118282422 ], "wc_strengths_avg": [ 46.75, 8.870597499605086 ], "wc_weaknesses_avg": [ 116.0, 85.29654154770873 ], "wc_questions_avg": [ 255.75, 268.85067881632733 ], "wc_limitations_avg": [ 14.25, 17.52676524633111 ], "wc_review_avg": [ 530.75, 299.7652206310799 ], "wc_reply_reviewers_avg": [ 49.5, 49.98749843710925 ], "wc_reply_authors_avg": [ 67.0, 111.476454913134 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:atIxyOYsl-MJ:scholar.google.com/&scioq=MonkeySee:+Space-time-resolved+reconstructions+of+natural+images+from+macaque+multi-unit+activity&hl=en&as_sdt=0,33", "gs_version_total": 2, "email": "ru.nl;nin.knaw.nl;;nin.nl;donders.ru.nl;knaw.nl;;ru.nl;ru.nl;ru.nl", "author_num": 10, "aff_unique_index": "0;1;1;2;1;0;0;0", "aff_unique_norm": "Radboud University;Netherlands Institute for Neuroscience;Donders Institute for Brain, Cognition and Behaviour", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ru.nl;https://www.nin.nl;https://www.donders.ru.nl", "aff_unique_abbr": "RU;NIN;", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Nijmegen", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "Netherlands" }, { "title": "Local to Global: Learning Dynamics and Effect of Initialization for Transformers", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95361", "id": "OX4yll3X53", "proceeding": "", "pdf": "https://openreview.net/pdf?id=OX4yll3X53", "openreview": "https://openreview.net/forum?id=OX4yll3X53", "poster": "/media/PosterPDFs/NeurIPS%202024/95361.png?t=1732186139.694369", "project": "", "author_site": "Ashok Vardhan Makkuva, Marco Bondaschi, Adway Girish, Alliot Nagle, Hyeji Kim, Michael Gastpar, Chanakya Ekbote", "tldr": "", "abstract": "In recent years, transformer-based models have revolutionized deep learning, particularly in sequence modeling. To better understand this phenomenon, there is a growing interest in using Markov input processes to study transformers. However, our current understanding in this regard remains limited with many fundamental questions about how transformers learn Markov chains still unanswered. In this paper, we address this by focusing on first-order Markov chains and single-layer transformers, providing a comprehensive characterization of the learning dynamics in this context. Specifically, we prove that transformer parameters trained on next-token prediction loss can either converge to global or local minima, contingent on the initialization and the Markovian data properties, and we characterize the precise conditions under which this occurs. To the best of our knowledge, this is the first result of its kind highlighting the role of initialization. We further demonstrate that our theoretical findings are corroborated by empirical evidence. Based on these insights, we provide guidelines for the initialization of single-layer transformers and demonstrate their effectiveness. Finally, we outline several open problems in this arena. Code is available at: \\url{https://github.com/Bond1995/Markov}.", "keywords": "gradient descent dynamics;transformers;markov chains", "primary_area": "interpretability_and_explainability", "supplementary_material": "/attachment/deb0dac32bee8c959f1d2dfcf750cc3bdb4a07b3.zip", "author": "Ashok Vardhan Makkuva;Marco Bondaschi;Adway Girish;Alliot Nagle;Hyeji Kim;Michael Gastpar;Chanakya Ekbote", "authorids": "~Ashok_Vardhan_Makkuva1;~Marco_Bondaschi1;~Adway_Girish1;~Alliot_Nagle1;~Hyeji_Kim1;~Michael_Gastpar1;~Chanakya_Ekbote1", "gender": ";M;M;M;;;", "homepage": ";;https://sites.google.com/view/adwaygirish;https://acnagle.com/;;https://people.epfl.ch/michael.gastpar;", "dblp": ";255/4933;324/4046.html;;;;", "google_scholar": ";;WOt1dxsAAAAJ;OUilkgIAAAAJ;;https://scholar.google.ch/citations?user=IQ3hcw4AAAAJ;", "orcid": ";0000-0002-4158-2487;;;;0000-0002-5499-5336;", "linkedin": ";;adwaygirish/;alliot-nagle/;;;", "or_profile": "~Ashok_Vardhan_Makkuva1;~Marco_Bondaschi1;~Adway_Girish1;~Alliot_Nagle1;~Hyeji_Kim1;~Michael_Gastpar1;~Chanakya_Ekbote1", "aff": ";EPFL - EPF Lausanne;EPFL;University of Texas at Austin;;School of Computer and Communication Sciences, EPFL - EPF Lausanne;", "aff_domain": ";epfl.ch;epfl.ch;utexas.edu;;ic.epfl.ch;", "position": ";PhD student;PhD student;PhD student;;Full Professor;", "bibtex": "@inproceedings{\nmakkuva2024local,\ntitle={Local to Global: Learning Dynamics and Effect of Initialization for Transformers},\nauthor={Ashok Vardhan Makkuva and Marco Bondaschi and Adway Girish and Alliot Nagle and Hyeji Kim and Michael Gastpar and Chanakya Ekbote},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=OX4yll3X53}\n}", "github": "", "reviewers": "er5c;DieV;Fte8;JG7D;LVoR", "pdf_size": 5837918, "rating": "5;5;6;6;6", "confidence": "3;3;3;3;3", "soundness": "3;3;3;4;4", "novelty": "3;3;2;3;3", "presentation": "2;2;4;4;3", "wc_summary": "72;89;73;51;128", "wc_strengths": "103;82;117;61;65", "wc_weaknesses": "215;102;352;106;116", "wc_questions": "15;30;182;4;143", "wc_limitations": "30;42;86;1;69", "wc_review": "435;345;810;223;521", "wc_reply_reviewers": "659;47;80;19;206", "wc_reply_authors": "677;412;396;0;0", "reply_reviewers": "2;1;2;1;1", "reply_authors": "3;3;3;1;1", "rating_avg": [ 5.6, 0.48989794855663565 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.8944271909999159 ], "wc_summary_avg": [ 82.6, 25.710698162438142 ], "wc_strengths_avg": [ 85.6, 21.592591322025246 ], "wc_weaknesses_avg": [ 178.2, 96.38340106055605 ], "wc_questions_avg": [ 74.8, 73.12838026375259 ], "wc_limitations_avg": [ 45.6, 29.749621846336133 ], "wc_review_avg": [ 466.8, 197.99434335354127 ], "wc_reply_reviewers_avg": [ 202.2, 237.1762214050979 ], "wc_reply_authors_avg": [ 297.0, 262.238059785379 ], "reply_reviewers_avg": [ 1.4, 0.4898979485566356 ], "reply_authors_avg": [ 2.2, 0.9797958971132712 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4999511019590312104&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": ";epfl.ch;epfl.ch;utexas.edu;;ic.epfl.ch;", "author_num": 7, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "EPFL;University of Texas at Austin", "aff_unique_dep": ";", "aff_unique_url": "https://www.epfl.ch;https://www.utexas.edu", "aff_unique_abbr": "EPFL;UT Austin", "aff_campus_unique_index": "0;2;0", "aff_campus_unique": "Lausanne;;Austin", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Switzerland;United States" }, { "title": "Amortized Eigendecomposition for Neural Networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95360", "id": "OYOkkqRLvj", "proceeding": "", "pdf": "https://openreview.net/pdf?id=OYOkkqRLvj", "openreview": "https://openreview.net/forum?id=OYOkkqRLvj", "poster": "/media/PosterPDFs/NeurIPS%202024/95360.png?t=1731737761.815465", "project": "", "author_site": "Tianbo Li, Zekun Shi, Jiaxi Zhao, Min Lin", "tldr": "", "abstract": "Performing eigendecomposition during neural network training is essential for tasks such as dimensionality reduction, network compression, image denoising, and graph learning. However, eigendecomposition is computationally expensive as it is orders of magnitude slower than other neural network operations. To address this challenge, we propose a novel approach called \"amortized eigendecomposition\" that relaxes the exact eigendecomposition by introducing an additional loss term called eigen loss. Our approach offers significant speed improvements by replacing the computationally expensive eigendecomposition with a more affordable QR decomposition at each iteration. Theoretical analysis guarantees that the desired eigenpair is attained as optima of the eigen loss. Empirical studies on nuclear norm regularization, latent-space principal component analysis, and graphs adversarial learning demonstrate significant improvements in training efficiency while producing nearly identical outcomes to conventional approaches. This novel methodology promises to integrate eigendecomposition efficiently into neural network training, overcoming existing computational challenges and unlocking new potential for advanced deep learning applications.", "keywords": "Eigendecomposition;SVD;Amortized optimization.", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Tianbo Li;Zekun Shi;Jiaxi Zhao;Min Lin", "authorids": "~Tianbo_Li1;~Zekun_Shi3;~Jiaxi_Zhao1;~Min_Lin1", "gender": "M;M;M;M", "homepage": ";https://github.com/jiaxi98;https://linmin.me;https://zekun-shi.github.io/", "dblp": "153/7013;;;", "google_scholar": ";jkbWVM0AAAAJ;BGONmkIAAAAJ;X9vcv1oAAAAJ", "orcid": ";0009-0007-2110-6006;;", "linkedin": ";;min-lin-08a3a422/;", "or_profile": "~Tianbo_Li1;~Jiaxi_Zhao1;~Min_Lin1;~ZEKUN_SHI2", "aff": "Sea AI Lab;National University of Singapore;Sea AI Lab;Sea AI Lab", "aff_domain": "sea.com;nus.edu;sea.com;sea.com", "position": "Researcher;PhD student;Principal Researcher;Researcher", "bibtex": "@inproceedings{\nli2024amortized,\ntitle={Amortized Eigendecomposition for Neural Networks},\nauthor={Tianbo Li and Zekun Shi and Jiaxi Zhao and Min Lin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=OYOkkqRLvj}\n}", "github": "", "reviewers": "fsNU;6W8h;WiHy;5kBi", "pdf_size": 1895122, "rating": "5;7;8;8", "confidence": "3;4;3;4", "soundness": "3;4;4;4", "novelty": "2;3;4;4", "presentation": "3;4;4;4", "wc_summary": "151;124;56;101", "wc_strengths": "65;275;31;57", "wc_weaknesses": "42;119;50;30", "wc_questions": "55;190;4;67", "wc_limitations": "82;1;9;35", "wc_review": "395;709;150;290", "wc_reply_reviewers": "28;14;12;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 7.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 108.0, 34.84967718645325 ], "wc_strengths_avg": [ 107.0, 97.80593029055038 ], "wc_weaknesses_avg": [ 60.25, 34.65815199920504 ], "wc_questions_avg": [ 79.0, 68.3117852204142 ], "wc_limitations_avg": [ 31.75, 31.61783515675923 ], "wc_review_avg": [ 386.0, 205.74377268826387 ], "wc_reply_reviewers_avg": [ 13.5, 9.937303457175895 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.40824829046386296, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:5q-y4QQsLGgJ:scholar.google.com/&scioq=Amortized+Eigendecomposition+for+Neural+Networks&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "sea.com;nus.edu;sea.com;sea.com", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Sea AI Lab;National University of Singapore", "aff_unique_dep": ";", "aff_unique_url": ";https://www.nus.edu.sg", "aff_unique_abbr": ";NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1", "aff_country_unique": ";Singapore" }, { "title": "Geometric Trajectory Diffusion Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95359", "id": "OYmms5Mv9H", "proceeding": "", "pdf": "https://openreview.net/pdf?id=OYmms5Mv9H", "openreview": "https://openreview.net/forum?id=OYmms5Mv9H", "poster": "", "project": "", "author_site": "Jiaqi Han, Minkai Xu, Aaron Lou, Haotian Ye, Stefano Ermon", "tldr": "", "abstract": "Generative models have shown great promise in generating 3D geometric systems, which is a fundamental problem in many natural science domains such as molecule and protein design. However, existing approaches only operate on static structures, neglecting the fact that physical systems are always dynamic in nature. In this work, we propose geometric trajectory diffusion models (GeoTDM), the first diffusion model for modeling the temporal distribution of 3D geometric trajectories. Modeling such distribution is challenging as it requires capturing both the complex spatial interactions with physical symmetries and temporal correspondence encapsulated in the dynamics. We theoretically justify that diffusion models with equivariant temporal kernels can lead to density with desired symmetry, and develop a novel transition kernel leveraging SE(3)-equivariant spatial convolution and temporal attention. Furthermore, to induce an expressive trajectory distribution for conditional generation, we introduce a generalized learnable geometric prior into the forward diffusion process to enhance temporal conditioning. We conduct extensive experiments on both unconditional and conditional generation in various scenarios, including physical simulation, molecular dynamics, and pedestrian motion. Empirical results on a wide suite of metrics demonstrate that GeoTDM can generate realistic geometric trajectories with significantly higher quality.", "keywords": "diffusion model;geometric trajectory;equivariant graph neural networks", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/ceff2d705a7f2e956f86b1bf09f2bff1c2d1916a.zip", "author": "Jiaqi Han;Minkai Xu;Aaron Lou;Haotian Ye;Stefano Ermon", "authorids": "~Jiaqi_Han2;~Minkai_Xu1;~Aaron_Lou1;~Haotian_Ye1;~Stefano_Ermon1", "gender": "M;M;M;M;M", "homepage": "https://hanjq17.github.io;https://minkaixu.com;https://aaronlou.com;https://haotianye.com;http://cs.stanford.edu/~ermon/", "dblp": "235/0412;257/3355;232/3858;284/0539;47/8135", "google_scholar": "AKppgMAAAAAJ;https://scholar.google.com/citations?hl=en;;VU4chlsAAAAJ;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Jiaqi_Han2;~Minkai_Xu1;~Aaron_Lou1;~Haotian_Ye1;~Stefano_Ermon1", "aff": "Computer Science Department, Stanford University;Stanford University;Stanford University;Stanford University;Stanford University", "aff_domain": "cs.stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu", "position": "PhD student;PhD student;PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nhan2024geometric,\ntitle={Geometric Trajectory Diffusion Models},\nauthor={Jiaqi Han and Minkai Xu and Aaron Lou and Haotian Ye and Stefano Ermon},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=OYmms5Mv9H}\n}", "github": "", "reviewers": "UDq7;CUJA;e5nM;RWWk", "pdf_size": 11659588, "rating": "5;6;6;7", "confidence": "4;2;1;4", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "4;3;3;3", "wc_summary": "73;27;63;90", "wc_strengths": "68;22;58;69", "wc_weaknesses": "132;96;1;13", "wc_questions": "2;28;1;174", "wc_limitations": "1;1;1;34", "wc_review": "276;174;124;380", "wc_reply_reviewers": "23;197;0;29", "wc_reply_authors": "215;948;0;32", "reply_reviewers": "1;3;0;1", "reply_authors": "2;5;1;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 2.75, 1.299038105676658 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 63.25, 23.047505287991584 ], "wc_strengths_avg": [ 54.25, 19.109879643786353 ], "wc_weaknesses_avg": [ 60.5, 55.15659525387694 ], "wc_questions_avg": [ 51.25, 71.6916138749854 ], "wc_limitations_avg": [ 9.25, 14.289419162443238 ], "wc_review_avg": [ 238.5, 98.36030703490103 ], "wc_reply_reviewers_avg": [ 62.25, 78.54735832604429 ], "wc_reply_authors_avg": [ 298.75, 383.71433058982825 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.5, 1.5 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4944692385369982384&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "cs.stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "Computer Science Department", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Mitigating Spurious Correlations via Disagreement Probability", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95358", "id": "ObUjBHBx8O", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ObUjBHBx8O", "openreview": "https://openreview.net/forum?id=ObUjBHBx8O", "poster": "/media/PosterPDFs/NeurIPS%202024/95358.png?t=1731577550.1008873", "project": "", "author_site": "Hyeonggeun Han, Sehwan Kim, Hyungjun Joo, Sangwoo Hong, Jungwoo Lee", "tldr": "", "abstract": "Models trained with empirical risk minimization (ERM) are prone to be biased towards spurious correlations between target labels and bias attributes, which leads to poor performance on data groups lacking spurious correlations. It is particularly challenging to address this problem when access to bias labels is not permitted. To mitigate the effect of spurious correlations without bias labels, we first introduce a novel training objective designed to robustly enhance model performance across all data samples, irrespective of the presence of spurious correlations. From this objective, we then derive a debiasing method, Disagreement Probability based Resampling for debiasing (DPR), which does not require bias labels. DPR leverages the disagreement between the target label and the prediction of a biased model to identify bias-conflicting samples\u2014those without spurious correlations\u2014and upsamples them according to the disagreement probability. Empirical evaluations on multiple benchmarks demonstrate that DPR achieves state-of-the-art performance over existing baselines that do not use bias labels. Furthermore, we provide a theoretical analysis that details how DPR reduces dependency on spurious correlations.", "keywords": "Debiasing;Spurious correlation;Group robustness", "primary_area": "fairness", "supplementary_material": "/attachment/b182e34bab90260bba4c0c5c4b274f6d036fb9e4.zip", "author": "Hyeonggeun Han;Sehwan Kim;Hyungjun Joo;Sangwoo Hong;Jungwoo Lee", "authorids": "~Hyeonggeun_Han1;~Sehwan_Kim2;~Hyungjun_Joo1;~Sangwoo_Hong1;~Jungwoo_Lee1", "gender": "M;M;M;M;M", "homepage": ";;;;https://cml.snu.ac.kr", "dblp": "345/1092;01/6355;289/8846;48/334;34/516-1", "google_scholar": ";;;5HqydTsAAAAJ;j98IWfoAAAAJ", "orcid": ";;;0000-0002-0270-2781;0000-0002-6804-980X", "linkedin": "hyeonggeun-han-3529b523a/;https://linkedin.com/in/sehwankim-snu;hyungjun-joo-817a81212/;sangwoohong1995;", "or_profile": "~Hyeonggeun_Han1;~Sehwan_Kim2;~Hyungjun_Joo1;~Sangwoo_Hong1;~Jungwoo_Lee1", "aff": "Seoul National University;Seoul National University;Seoul National University;Seoul National University;Seoul National University", "aff_domain": "snu.ac.kr;snu.ac.kr;snu.ac.kr;snu.ac.kr;snu.ac.kr", "position": "PhD student;PhD student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nhan2024mitigating,\ntitle={Mitigating Spurious Correlations via Disagreement Probability},\nauthor={Hyeonggeun Han and Sehwan Kim and Hyungjun Joo and Sangwoo Hong and Jungwoo Lee},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ObUjBHBx8O}\n}", "github": "", "reviewers": "GDUK;qVjP;Het1;1RoL", "pdf_size": 4293780, "rating": "5;5;6;8", "confidence": "4;5;3;4", "soundness": "2;2;3;4", "novelty": "2;2;3;4", "presentation": "3;2;3;4", "wc_summary": "52;53;75;213", "wc_strengths": "44;40;42;407", "wc_weaknesses": "366;207;149;1", "wc_questions": "3;4;30;1", "wc_limitations": "3;4;9;6", "wc_review": "468;308;305;628", "wc_reply_reviewers": "289;258;83;34", "wc_reply_authors": "589;534;292;29", "reply_reviewers": "3;2;2;1", "reply_authors": "4;3;3;2", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 98.25, 66.88562999628545 ], "wc_strengths_avg": [ 133.25, 158.0559631902574 ], "wc_weaknesses_avg": [ 180.75, 130.69501712001113 ], "wc_questions_avg": [ 9.5, 11.884864324004713 ], "wc_limitations_avg": [ 5.5, 2.29128784747792 ], "wc_review_avg": [ 427.25, 133.34799398566145 ], "wc_reply_reviewers_avg": [ 166.0, 109.43719660152118 ], "wc_reply_authors_avg": [ 361.0, 221.86595051967754 ], "reply_reviewers_avg": [ 2.0, 0.7071067811865476 ], "reply_authors_avg": [ 3.0, 0.7071067811865476 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.28867513459481287, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10000564548275989630&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "snu.ac.kr;snu.ac.kr;snu.ac.kr;snu.ac.kr;snu.ac.kr", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Seoul National University", "aff_unique_dep": "", "aff_unique_url": "https://www.snu.ac.kr", "aff_unique_abbr": "SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Realizable $H$-Consistent and Bayes-Consistent Loss Functions for Learning to Defer", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95357", "id": "OcO2XakUUK", "proceeding": "", "pdf": "https://openreview.net/pdf?id=OcO2XakUUK", "openreview": "https://openreview.net/forum?id=OcO2XakUUK", "poster": "", "project": "", "author_site": "Anqi Mao, Mehryar Mohri, Yutao Zhong", "tldr": "", "abstract": "We present a comprehensive study of surrogate loss functions for learning to defer. We introduce a broad family of surrogate losses, parameterized by a non-increasing function $\\Psi$, and establish their realizable $H$-consistency under mild conditions. For cost functions based on classification error, we further show that these losses admit $H$-consistency bounds when the hypothesis set is symmetric and complete, a property satisfied by common neural network and linear function hypothesis sets. Our results also resolve an open question raised in previous work [Mozannar et al., 2023] by proving the realizable $H$-consistency and Bayes-consistency of a specific surrogate loss. Furthermore, we identify choices of $\\Psi$ that lead to $H$-consistent surrogate losses for *any general cost function*, thus achieving Bayes-consistency, realizable $H$-consistency, and $H$-consistency bounds *simultaneously*. We also investigate the relationship between $H$-consistency bounds and realizable $H$-consistency in learning to defer, highlighting key differences from standard classification. Finally, we empirically evaluate our proposed surrogate losses and compare them with existing baselines.", "keywords": "learning to defer;consistency;realizable H-consistency;learning theory", "primary_area": "learning_theory", "supplementary_material": "", "author": "Anqi Mao;Mehryar Mohri;Yutao Zhong", "authorids": "~Anqi_Mao1;~Mehryar_Mohri2;~Yutao_Zhong1", "gender": "F;M;", "homepage": "https://anqi-mao.github.io;https://cs.nyu.edu/~mohri/;", "dblp": "241/6864;03/5448;51/3178-2", "google_scholar": "nkjIZ-oAAAAJ;ktwwLjsAAAAJ;", "orcid": ";;", "linkedin": ";mehryar-mohri-3737b981/;", "or_profile": "~Anqi_Mao1;~Mehryar_Mohri2;~Yutao_Zhong1", "aff": "Courant Institute of Mathematical Sciences, NYU;Google Research;Google", "aff_domain": "cims.nyu.edu;google.com;google.com", "position": "PhD student;Principal Researcher;Researcher", "bibtex": "@inproceedings{\nmao2024realizable,\ntitle={Realizable \\$H\\$-Consistent and Bayes-Consistent Loss Functions for Learning to Defer},\nauthor={Anqi Mao and Mehryar Mohri and Yutao Zhong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=OcO2XakUUK}\n}", "github": "", "reviewers": "kR4F;Nrap;W4md;oVSD", "pdf_size": 455396, "rating": "6;6;7;7", "confidence": "3;5;3;3", "soundness": "3;4;3;3", "novelty": "2;2;3;3", "presentation": "2;4;3;3", "wc_summary": "66;112;142;109", "wc_strengths": "27;233;191;50", "wc_weaknesses": "210;79;96;52", "wc_questions": "109;62;62;8", "wc_limitations": "1;3;7;1", "wc_review": "413;489;498;220", "wc_reply_reviewers": "20;0;31;58", "wc_reply_authors": "29;0;32;27", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;2;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 107.25, 27.086666461563706 ], "wc_strengths_avg": [ 125.25, 88.38657986368744 ], "wc_weaknesses_avg": [ 109.25, 60.246887886429455 ], "wc_questions_avg": [ 60.25, 35.75174820900371 ], "wc_limitations_avg": [ 3.0, 2.449489742783178 ], "wc_review_avg": [ 405.0, 111.79669046979879 ], "wc_reply_reviewers_avg": [ 27.25, 20.94486810653149 ], "wc_reply_authors_avg": [ 22.0, 12.82575533838066 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11697559109816286156&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "cims.nyu.edu;google.com;google.com", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "New York University;Google", "aff_unique_dep": "Courant Institute of Mathematical Sciences;Google Research", "aff_unique_url": "https://www.courant.nyu.edu;https://research.google", "aff_unique_abbr": "NYU;Google Research", "aff_campus_unique_index": "0;1;1", "aff_campus_unique": "New York;Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "ST$_k$: A Scalable Module for Solving Top-k Problems", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95356", "id": "OdJKB9jSa5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=OdJKB9jSa5", "openreview": "https://openreview.net/forum?id=OdJKB9jSa5", "poster": "", "project": "", "author_site": "Hanchen Xia, Weidong Liu, Xiaojun Mao", "tldr": "", "abstract": "The cost of ranking becomes significant in the new stage of deep learning. We propose ST$_k$, a fully differentiable module with a single trainable parameter, designed to solve the Top-k problem without requiring additional time or GPU memory. Due to its fully differentiable nature, ST$_k$ can be embedded end-to-end into neural networks and optimize the Top-k problems within a unified computational graph. We apply ST$_k$ to the Average Top-k Loss (AT$_k$), which inherently faces a Top-k problem. The proposed ST$_k$ Loss outperforms AT$_k$ Loss and achieves the best average performance on multiple benchmarks, with the lowest standard deviation. With the assistance of ST$_k$ Loss, we surpass the state-of-the-art (SOTA) on both CIFAR-100-LT and Places-LT leaderboards.", "keywords": "Average Top-k Loss;Smoothing ReLU;Ranking Problem", "primary_area": "other", "supplementary_material": "/attachment/1f8ff4a4780927bda21c53aa72c7ef106584b36b.zip", "author": "Hanchen Xia;Weidong Liu;Xiaojun Mao", "authorids": "~Hanchen_Xia1;~Weidong_Liu2;~Xiaojun_Mao1", "gender": "M;;", "homepage": "https://github.com/1ring2rta;http://www.math.sjtu.edu.cn/faculty/weidongl/;https://mxjki.github.io/", "dblp": "334/3927;;232/4239", "google_scholar": "sno8F30AAAAJ;;f6KvrMYAAAAJ", "orcid": ";;0000-0002-9362-508X", "linkedin": ";;", "or_profile": "~Hanchen_Xia1;~Weidong_Liu2;~Mao_Xiaojun1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "position": "MS student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nxia2024stk,\ntitle={{ST}\\$\\_k\\$: A Scalable Module for Solving Top-k Problems},\nauthor={Hanchen Xia and Weidong Liu and Xiaojun Mao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=OdJKB9jSa5}\n}", "github": "", "reviewers": "46Ey;uCvC;SwHE;8VJU", "pdf_size": 939902, "rating": "4;4;6;7", "confidence": "4;1;3;3", "soundness": "2;2;3;3", "novelty": "2;2;2;3", "presentation": "3;2;3;3", "wc_summary": "48;55;46;108", "wc_strengths": "38;48;28;55", "wc_weaknesses": "174;149;168;65", "wc_questions": "38;28;90;70", "wc_limitations": "3;1;1;47", "wc_review": "301;281;333;345", "wc_reply_reviewers": "131;0;80;17", "wc_reply_authors": "941;738;372;74", "reply_reviewers": "1;0;2;1", "reply_authors": "4;3;4;2", "rating_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 2.75, 1.0897247358851685 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 64.25, 25.479158149358074 ], "wc_strengths_avg": [ 42.25, 10.207227831296802 ], "wc_weaknesses_avg": [ 139.0, 43.70926675202868 ], "wc_questions_avg": [ 56.5, 24.794152536434876 ], "wc_limitations_avg": [ 13.0, 19.6468827043885 ], "wc_review_avg": [ 315.0, 25.37715508089904 ], "wc_reply_reviewers_avg": [ 57.0, 52.091266062556016 ], "wc_reply_authors_avg": [ 531.25, 333.57111310783495 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 3.25, 0.82915619758885 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.22075539284417395, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:-I7S4Nzm90UJ:scholar.google.com/&scioq=ST%24_k%24:+A+Scalable+Module+for+Solving+Top-k+Problems&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Decomposable Transformer Point Processes", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95355", "id": "OesteJF0ls", "proceeding": "", "pdf": "https://openreview.net/pdf?id=OesteJF0ls", "openreview": "https://openreview.net/forum?id=OesteJF0ls", "poster": "", "project": "", "tldr": "", "abstract": "The standard paradigm of modeling marked point processes is by parameterizing the intensity function using an attention-based (Transformer-style) architecture. Despite the flexibility of these methods, their inference is based on the computationally intensive thinning algorithm. In this work, we propose a framework where the advantages of the attention-based architecture are maintained and the limitation of the thinning algorithm is circumvented. The framework depends on modeling the conditional distribution of inter-event times with a mixture of log-normals satisfying a Markov property and the conditional probability mass function for the marks with a Transformer-based architecture. The proposed method attains state-of-the-art performance in predicting the next event of a sequence given its history. The experiments also reveal the efficacy of the methods that do not rely on the thinning algorithm during inference over the ones they do. Finally, we test our method on the challenging long-horizon prediction task and find that it outperforms a baseline developed specifically for tackling this task; importantly, inference requires just a fraction of time compared to the thinning-based baseline.", "keywords": "point processes", "primary_area": "probabilistic_methods", "supplementary_material": "/attachment/5d47b0ac5f2395b424be35bd3be2acdb488aeaf4.zip", "author": "Aristeidis Panos", "authorids": "~Aristeidis_Panos1", "gender": "M", "homepage": "", "dblp": "223/5785", "google_scholar": "", "orcid": "", "linkedin": "aristeidis-panos-061864a9/", "or_profile": "~Aristeidis_Panos1", "aff": "University of Cambridge", "aff_domain": "cam.ac.uk", "position": "Postdoc", "bibtex": "@inproceedings{\npanos2024decomposable,\ntitle={Decomposable Transformer Point Processes},\nauthor={Aristeidis Panos},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=OesteJF0ls}\n}", "github": "", "reviewers": "pTAD;Txj7;1AM8", "pdf_size": 642287, "rating": "5;5;6", "confidence": "4;2;3", "soundness": "3;3;4", "novelty": "2;2;3", "presentation": "3;2;4", "wc_summary": "86;70;75", "wc_strengths": "128;53;54", "wc_weaknesses": "172;106;100", "wc_questions": "97;2;92", "wc_limitations": "9;7;8", "wc_review": "492;238;329", "wc_reply_reviewers": "93;0;42", "wc_reply_authors": "0;0;53", "reply_reviewers": "1;0;1", "reply_authors": "1;1;2", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 77.0, 6.683312551921141 ], "wc_strengths_avg": [ 78.33333333333333, 35.122009560324926 ], "wc_weaknesses_avg": [ 126.0, 32.61901286060018 ], "wc_questions_avg": [ 63.666666666666664, 43.65266951236265 ], "wc_limitations_avg": [ 8.0, 0.816496580927726 ], "wc_review_avg": [ 353.0, 105.0745766903996 ], "wc_reply_reviewers_avg": [ 45.0, 38.02630668366309 ], "wc_reply_authors_avg": [ 17.666666666666668, 24.984439601924677 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5452670283796444218&as_sdt=20000005&sciodt=0,21&hl=en", "gs_version_total": 3, "email": "cam.ac.uk", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "University of Cambridge", "aff_unique_dep": "", "aff_unique_url": "https://www.cam.ac.uk", "aff_unique_abbr": "Cambridge", "aff_campus_unique_index": "0", "aff_campus_unique": "Cambridge", "aff_country_unique_index": "0", "aff_country_unique": "United Kingdom" }, { "title": "Resource-Aware Federated Self-Supervised Learning with Global Class Representations", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95354", "id": "Of4iNAIUSe", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Of4iNAIUSe", "openreview": "https://openreview.net/forum?id=Of4iNAIUSe", "poster": "/media/PosterPDFs/NeurIPS%202024/95354.png?t=1731665893.7232146", "project": "", "author_site": "Mingyi Li, Xiao Zhang, Qi Wang, Tengfei LIU, Ruofan Wu, Weiqiang Wang, Fuzhen Zhuang, Hui Xiong, Dongxiao Yu", "tldr": "", "abstract": "Due to the heterogeneous architectures and class skew, the global representation models training in resource-adaptive federated self-supervised learning face with tricky challenges: $\\textit{deviated representation abilities}$ and $\\textit{inconsistent representation spaces}$. \nIn this work, we are the first to propose a multi-teacher knowledge distillation framework, namely $\\textit{FedMKD}$, to learn global representations with whole class knowledge from heterogeneous clients even under extreme class skew. Firstly, the adaptive knowledge integration mechanism is designed to learn better representations from all heterogeneous models with deviated representation abilities. Then the weighted combination of the self-supervised loss and the distillation loss can support the global model to encode all classes from clients into a unified space. Besides, the global knowledge anchored alignment module can make the local representation spaces close to the global spaces, which further improves the representation abilities of local ones. Finally, extensive experiments conducted on two datasets demonstrate the effectiveness of $\\textit{FedMKD}$ which outperforms state-of-the-art baselines 4.78\\% under linear evaluation on average.", "keywords": "Federated self-supervised learning;Heterogeneity;Distillation", "primary_area": "other", "supplementary_material": "", "author": "Mingyi Li;Xiao Zhang;Qi Wang;Tengfei LIU;Ruofan Wu;Weiqiang Wang;Fuzhen Zhuang;Hui Xiong;Dongxiao Yu", "authorids": "~Mingyi_Li1;~Xiao_Zhang18;~Qi_Wang39;~Tengfei_LIU2;~Ruofan_Wu1;~Weiqiang_Wang4;~Fuzhen_Zhuang1;~Hui_Xiong1;~Dongxiao_Yu1", "gender": "F;;M;;M;M;M;M;", "homepage": "https://limee-ee.github.io/;https://tobexiao1.github.io/xiaozhangsdu.github.io/;https://wsq57.github.io;;https://rorschach1989.github.io/;https://www.linkedin.com/in/weiqiang-wang-489b925/;https://fuzhenzhuang.github.io/index.html;https://www.hkust-gz.edu.cn/people/hui-xiong/;https://www.cs.sdu.edu.cn/info/1070/5367.htm", "dblp": ";49/4478-15;;;;;48/5638;262/1686-1.html;44/7265", "google_scholar": "https://scholar.google.com/citations?hl=en;Mjc1iUQAAAAJ;;;;;https://scholar.google.com/citations?hl=en;cVDF1tkAAAAJ;hiQxuHYAAAAJ", "orcid": "0009-0006-9432-1068;0000-0003-0824-9284;;;;0000-0002-6159-619X;0000-0001-9170-7009;0000-0001-6016-6465;0000-0001-6835-5981", "linkedin": ";;;;;weiqiang-wang-489b925/;;;", "or_profile": "~Mingyi_Li1;~Xiao_Zhang18;~Qi_Wang39;~Tengfei_LIU2;~Ruofan_Wu1;~Weiqiang_Wang4;~Fuzhen_Zhuang1;~Hui_Xiong1;~Dongxiao_Yu1", "aff": "Shandong University;Shandong University;Shandong University;;Ant Group;Ant Group;Institute of Computing Technology, Chinese Academy of Sciences;Hong Kong University of Science and Technology (Guangzhou);Shandong University", "aff_domain": "sdu.edu.cn;sdu.edu.cn;sdu.edu.cn;;antgroup.com;antgroup.com;ict.ac.cn;hkust.edu;sdu.edu.cn", "position": "PhD student;Associate Professor;MS student;;Researcher;Researcher;Associate Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nli2024resourceaware,\ntitle={Resource-Aware Federated Self-Supervised Learning with Global Class Representations},\nauthor={Mingyi Li and Xiao Zhang and Qi Wang and Tengfei LIU and Ruofan Wu and Weiqiang Wang and Fuzhen Zhuang and Hui Xiong and Dongxiao Yu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Of4iNAIUSe}\n}", "github": "", "reviewers": "de59;24fB;hPCk;QsUu;sGaU", "pdf_size": 8299315, "rating": "4;5;6;6;8", "confidence": "4;4;4;3;5", "soundness": "3;3;3;3;3", "novelty": "2;2;3;2;4", "presentation": "3;3;3;3;3", "wc_summary": "74;57;57;96;85", "wc_strengths": "37;91;88;88;108", "wc_weaknesses": "57;181;74;114;138", "wc_questions": "1;2;39;6;79", "wc_limitations": "9;31;32;10;35", "wc_review": "178;362;290;314;445", "wc_reply_reviewers": "0;0;32;25;36", "wc_reply_authors": "0;0;23;22;22", "reply_reviewers": "0;0;1;1;1", "reply_authors": "1;1;2;2;2", "rating_avg": [ 5.8, 1.32664991614216 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6, 0.8 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 73.8, 15.380507143784303 ], "wc_strengths_avg": [ 82.4, 23.88807233746583 ], "wc_weaknesses_avg": [ 112.8, 44.512470162865604 ], "wc_questions_avg": [ 25.4, 30.25623902602569 ], "wc_limitations_avg": [ 23.4, 11.42978564978364 ], "wc_review_avg": [ 317.8, 87.70952057787115 ], "wc_reply_reviewers_avg": [ 18.6, 15.589740215924062 ], "wc_reply_authors_avg": [ 13.4, 10.947145746723207 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.47673129462279606, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:AyF9XhcDrgsJ:scholar.google.com/&scioq=Resource-Aware+Federated+Self-Supervised+Learning+with+Global+Class+Representations&hl=en&as_sdt=0,44", "gs_version_total": 0, "email": "sdu.edu.cn;sdu.edu.cn;sdu.edu.cn;;antgroup.com;antgroup.com;ict.ac.cn;hkust.edu;sdu.edu.cn", "author_num": 9, "aff_unique_index": "0;0;0;1;1;2;3;0", "aff_unique_norm": "Shandong University;Ant Group;Chinese Academy of Sciences;Hong Kong University of Science and Technology", "aff_unique_dep": ";;Institute of Computing Technology;", "aff_unique_url": "http://www.sdu.edu.cn;https://www.antgroup.com;http://www.ict.ac.cn;https://www.ust.hk", "aff_unique_abbr": "SDU;Ant Group;CAS;HKUST", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "ConMe: Rethinking Evaluation of Compositional Reasoning for Modern VLMs", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97716", "id": "OfOCl3dGcF", "proceeding": "", "pdf": "https://openreview.net/pdf?id=OfOCl3dGcF", "openreview": "https://openreview.net/forum?id=OfOCl3dGcF", "poster": "", "project": "", "author_site": "Irene Huang, Wei Lin, Muhammad Jehanzeb Mirza, Jacob Hansen, Sivan Doveh, Victor Butoi, Roei Herzig, Assaf Arbelle, Hilde Kuehne, Trevor Darrell, Chuang Gan, Aude Oliva, Rogerio Feris, Leonid Karlinsky", "tldr": "", "abstract": "Compositional Reasoning (CR) entails grasping the significance of attributes, relations, and word order. Recent Vision-Language Models (VLMs), comprising a visual encoder and a Large Language Model (LLM) decoder, have demonstrated remarkable proficiency in such reasoning tasks. This prompts a crucial question: have VLMs effectively tackled the CR challenge? We conjecture that existing CR benchmarks may not adequately push the boundaries of modern VLMs due to the reliance on an LLM only negative text generation pipeline. Consequently, the negatives produced either appear as outliers from the natural language distribution learned by VLMs' LLM decoders or as improbable within the corresponding image context. To address these limitations, we introduce ConMe\\footnote{ConMe is an abbreviation for Confuse Me.} -- a compositional reasoning benchmark and a novel data generation pipeline leveraging VLMs to produce `hard CR Q&A'. Through a new concept of VLMs conversing with each other to collaboratively expose their weaknesses, our pipeline autonomously generates, evaluates, and selects challenging compositional reasoning questions, establishing a robust CR benchmark, also subsequently validated manually. Our benchmark provokes a noteworthy, up to 33%, decrease in CR performance compared to preceding benchmarks, reinstating the CR challenge even for state-of-the-art VLMs.", "keywords": "compositional reasoning;vlms;llms", "primary_area": "", "supplementary_material": "/attachment/8a96e961361db57a4fbfbd10a4d77fa7b3f75ba2.pdf", "author": "Irene Huang;Wei Lin;Muhammad Jehanzeb Mirza;Jacob A Hansen;Sivan Doveh;Victor Ion Butoi;Roei Herzig;Assaf Arbelle;Hilde Kuehne;Trevor Darrell;Chuang Gan;Aude Oliva;Rogerio Feris;Leonid Karlinsky", "authorids": "~Irene_Huang1;~Wei_Lin9;~Muhammad_Jehanzeb_Mirza1;~Jacob_A_Hansen1;~Sivan_Doveh1;~Victor_Ion_Butoi1;~Roei_Herzig2;~Assaf_Arbelle1;~Hilde_Kuehne5;~Trevor_Darrell2;~Chuang_Gan1;~Aude_Oliva1;~Rogerio_Feris1;~Leonid_Karlinsky3", "gender": "F;M;M;M;;M;M;M;F;;M;;M;M", "homepage": ";https://wlin-at.github.io/;;;;https://victorbutoi.github.io/;https://roeiherz.github.io/;https://www.linkedin.com/in/assaf-arbelle-74065876/?originalSubdomain=il;https://hildekuehne.github.io;;http://people.csail.mit.edu/ganchuang/;;http://rogerioferis.com;", "dblp": ";99/2649-19;295/9034;;;;215/5165;168/5494;45/4963;;139/6993;;;05/4463", "google_scholar": ";JJRr8c8AAAAJ;cES2rkAAAAAJ;;;7aSLkMUAAAAJ;https://scholar.google.co.il/citations?user=6Q-289IAAAAJ;https://scholar.google.co.uk/citations?user=uU_V_PsAAAAJ;pxhCcH0AAAAJ;;PTeSCbIAAAAJ;;xt3XLjcAAAAJ;https://scholar.google.co.il/citations?user=WbO7tjYAAAAJ", "orcid": ";;;;;;;0000-0001-6559-2316;0000-0003-1079-4441;;;;;", "linkedin": "ireneyhuang;;;jacob-h-3b648011b/;sivan-doveh-128849b1;victorbutoi/;roei-herzig-7534615a/;assaf-arbelle-74065876/?originalSubdomain=il;hilde-kuehne-8b9aa661;;;;;", "or_profile": "~Irene_Huang1;~Wei_Lin9;~Muhammad_Jehanzeb_Mirza1;~Jacob_A_Hansen1;~Sivan_Doveh1;~Victor_Ion_Butoi1;~Roei_Herzig2;~Assaf_Arbelle1;~Hilde_Kuehne5;~Trevor_Darrell2;~Chuang_Gan1;~Aude_Oliva1;~Rogerio_Feris1;~Leonid_Karlinsky3", "aff": "Massachusetts Institute of Technology;Johannes Kepler Universit\u00e4t Linz;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Weizmann Institute of Science;Massachusetts Institute of Technology;University of California, Berkeley;International Business Machines;Rheinische Friedrich-Wilhelms-Universit\u00e4t Bonn, Rheinische Friedrich-Wilhelms Universit\u00e4t Bonn;;University of Massachusetts at Amherst;;International Business Machines;International Business Machines", "aff_domain": "mit.edu;jku.at;mit.edu;mit.edu;weizmann.ac.il;mit.edu;berkeley.edu;ibm.com;cs.uni-bonn.de;;umass.edu;;ibm.com;ibm.com", "position": "Undergrad student;Researcher;Postdoc;MS student;PhD student;PhD student;Postdoc;Researcher;Associate Professor;;Assistant Professor;;Research Manager;Principal Researcher", "bibtex": "@inproceedings{\nhuang2024conme,\ntitle={ConMe: Rethinking Evaluation of Compositional Reasoning for Modern {VLM}s},\nauthor={Irene Huang and Wei Lin and Muhammad Jehanzeb Mirza and Jacob A Hansen and Sivan Doveh and Victor Ion Butoi and Roei Herzig and Assaf Arbelle and Hilde Kuehne and Trevor Darrell and Chuang Gan and Aude Oliva and Rogerio Feris and Leonid Karlinsky},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=OfOCl3dGcF}\n}", "github": "", "reviewers": "pFDo;b3yx;GWAy", "pdf_size": 13167906, "rating": "6;6;9", "confidence": "3;4;4", "wc_summary_and_contributions": "47;178;101", "wc_strengths": "3;105;230", "wc_improvement": "3;399;191", "wc_limitations": "3;11;78", "wc_correctness": "3;52;76", "wc_clarity": "3;19;1", "wc_relation_to_prior_work": "9;11;92", "wc_documentation": "28;15;1", "wc_additional_feedback": "1;1;1", "wc_review": "100;791;771", "wc_reply_reviewers": "49;126;14", "wc_reply_authors": "126;117;38", "reply_reviewers": "1;1;1", "reply_authors": "4;4;2", "rating_avg": [ 7.0, 1.4142135623730951 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 108.66666666666667, 53.75458636763523 ], "wc_strengths_avg": [ 112.66666666666667, 92.83078991129804 ], "wc_improvement_avg": [ 197.66666666666666, 161.7350370891299 ], "wc_limitations_avg": [ 30.666666666666668, 33.6286914537109 ], "wc_correctness_avg": [ 43.666666666666664, 30.379086373505196 ], "wc_clarity_avg": [ 7.666666666666667, 8.055363982396381 ], "wc_relation_to_prior_work_avg": [ 37.333333333333336, 38.663792996664064 ], "wc_documentation_avg": [ 14.666666666666666, 11.025223605694151 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 554.0, 321.13029546691274 ], "wc_reply_reviewers_avg": [ 63.0, 46.783187863447985 ], "wc_reply_authors_avg": [ 93.66666666666667, 39.53338954464806 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.3333333333333335, 0.9428090415820634 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 14, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16923754722031978286&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "mit.edu;jku.at;mit.edu;mit.edu;weizmann.ac.il;mit.edu;berkeley.edu;ibm.com;cs.uni-bonn.de;;umass.edu;;ibm.com;ibm.com", "author_num": 14, "aff_unique_index": "0;1;0;0;2;0;3;4;5;6;4;4", "aff_unique_norm": "Massachusetts Institute of Technology;Johannes Kepler University Linz;Weizmann Institute of Science;University of California, Berkeley;International Business Machines Corporation;Rheinische Friedrich-Wilhelms-Universit\u00e4t Bonn;University of Massachusetts Amherst", "aff_unique_dep": ";;;;;;", "aff_unique_url": "https://web.mit.edu;https://www.jku.at;https://www.weizmann.org.il;https://www.berkeley.edu;https://www.ibm.com;https://www.uni-bonn.de;https://www.umass.edu", "aff_unique_abbr": "MIT;JKU;Weizmann;UC Berkeley;IBM;Uni Bonn;UMass Amherst", "aff_campus_unique_index": "1;2;3", "aff_campus_unique": ";Linz;Berkeley;Amherst", "aff_country_unique_index": "0;1;0;0;2;0;0;0;3;0;0;0", "aff_country_unique": "United States;Austria;Israel;Germany" }, { "title": "SHDocs: A dataset, benchmark, and method to efficiently generate high-quality, real-world specular highlight data with near-perfect alignment", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97715", "id": "OfXwix3NRH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=OfXwix3NRH", "openreview": "https://openreview.net/forum?id=OfXwix3NRH", "poster": "/media/PosterPDFs/NeurIPS%202024/97715.png?t=1733150057.3881943", "project": "", "author_site": "Jovin Leong, Koa Di, Benjamin Cham, Shaun Heng", "tldr": "", "abstract": "A frequent problem in vision-based reasoning tasks such as object detection and optical character recognition (OCR) is the persistence of specular highlights. Specular highlights appear as bright spots of glare that occur due to the concentrated reflection of light; these spots manifest as image artifacts which occlude computer vision models and are challenging to reconstruct. Despite this, specular highlight removal receives relatively little attention due to the difficulty of acquiring high-quality, real-world data. We introduce a method to generate specular highlight data with near-perfect alignment and present SHDocs\u2014a dataset of specular highlights on document images created using our method. Through our benchmark, we demonstrate that our dataset enables us to surpass the performance of state-of-the-art specular highlight removal models and downstream OCR tasks. We release our dataset, code, and methods publicly to motivate further exploration of image enhancement for practical computer vision challenges.", "keywords": "specular highlights;image enhancement;computer vision;unet;ocr;document;document analysis;specularity", "primary_area": "", "supplementary_material": "/attachment/4f8883fabd3fb122c7a4f231a27c7512e74cb563.pdf", "author": "Jovin Leong;Koa Ming Di;Benjamin Cham Wen Bin;Shaun Heng", "authorids": "~Jovin_Leong1;~Koa_Ming_Di1;~Benjamin_Cham_Wen_Bin1;~Shaun_Heng2", "gender": "M;M;M;M", "homepage": ";;https://github.com/benjamincham;", "dblp": ";;;", "google_scholar": "efOG6QMAAAAJ;;;", "orcid": ";;;", "linkedin": "jovin-leong-85111767/;https://sg.linkedin.com/in/ming-di-koa-92793b1a4;;shaunhwq?utm_source=share&utm_campaign=share_via&utm_content=profile&utm_medium=android_app", "or_profile": "~Jovin_Leong1;~Koa_Ming_Di1;~Benjamin_Cham_Wen_Bin1;~Shaun_Heng2", "aff": "Home Team Science and Technology Agency;Home Team Science and Technology Agency;Home Team Science and Technology Agency;Htx", "aff_domain": "htx.gov.sg;htx.com.sg;htx.gov.sg;htx.gov.sg", "position": "Researcher;Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\nleong2024shdocs,\ntitle={{SHD}ocs: A dataset, benchmark, and method to efficiently generate high-quality, real-world specular highlight data with near-perfect alignment},\nauthor={Jovin Leong and Koa Ming Di and Benjamin Cham Wen Bin and Shaun Heng},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=OfXwix3NRH}\n}", "github": "", "reviewers": "Ka9r;uNsq;FWs1", "pdf_size": 9799543, "rating": "6;6;7", "confidence": "4;2;3", "wc_summary_and_contributions": "72;54;50", "wc_strengths": "35;18;3", "wc_improvement": "95;27;3", "wc_limitations": "10;1;1", "wc_correctness": "8;1;1", "wc_clarity": "5;1;1", "wc_relation_to_prior_work": "11;1;1", "wc_documentation": "17;1;1", "wc_additional_feedback": "1;1;1", "wc_review": "254;105;62", "wc_reply_reviewers": "19;0;0", "wc_reply_authors": "10;0;0", "reply_reviewers": "1;0;0", "reply_authors": "2;1;1", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "wc_summary_and_contributions_avg": [ 58.666666666666664, 9.568466729604882 ], "wc_strengths_avg": [ 18.666666666666668, 13.072447700751718 ], "wc_improvement_avg": [ 41.666666666666664, 38.96437118987322 ], "wc_limitations_avg": [ 4.0, 4.242640687119285 ], "wc_correctness_avg": [ 3.3333333333333335, 3.2998316455372216 ], "wc_clarity_avg": [ 2.3333333333333335, 1.8856180831641267 ], "wc_relation_to_prior_work_avg": [ 4.333333333333333, 4.714045207910317 ], "wc_documentation_avg": [ 6.333333333333333, 7.542472332656507 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 140.33333333333334, 82.26920579549935 ], "wc_reply_reviewers_avg": [ 6.333333333333333, 8.956685895029603 ], "wc_reply_authors_avg": [ 3.3333333333333335, 4.714045207910316 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:1z9nUvwNWV4J:scholar.google.com/&scioq=SHDocs:+A+dataset,+benchmark,+and+method+to+efficiently+generate+high-quality,+real-world+specular+highlight+data+with+near-perfect+alignment&hl=en&as_sdt=0,44", "gs_version_total": 2, "email": "htx.gov.sg;htx.com.sg;htx.gov.sg;htx.gov.sg", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Home Team Science and Technology Agency;", "aff_unique_dep": ";", "aff_unique_url": ";", "aff_unique_abbr": ";", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Singapore;" }, { "title": "Zero-Shot Transfer of Neural ODEs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95353", "id": "OgnYoIxtIN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=OgnYoIxtIN", "openreview": "https://openreview.net/forum?id=OgnYoIxtIN", "poster": "/media/PosterPDFs/NeurIPS%202024/95353.png?t=1730732057.4542203", "project": "", "author_site": "Tyler Ingebrand, Adam Thorpe, Ufuk Topcu", "tldr": "", "abstract": "Autonomous systems often encounter environments and scenarios beyond the scope of their training data, which underscores a critical challenge: the need to generalize and adapt to unseen scenarios in real time. This challenge necessitates new mathematical and algorithmic tools that enable adaptation and zero-shot transfer. To this end, we leverage the theory of function encoders, which enables zero-shot transfer by combining the flexibility of neural networks with the mathematical principles of Hilbert spaces. Using this theory, we first present a method for learning a space of dynamics spanned by a set of neural ODE basis functions. After training, the proposed approach can rapidly identify dynamics in the learned space using an efficient inner product calculation. Critically, this calculation requires no gradient calculations or retraining during the online phase. This method enables zero-shot transfer for autonomous systems at runtime and opens the door for a new class of adaptable control algorithms. We demonstrate state-of-the-art system modeling accuracy for two MuJoCo robot environments and show that the learned models can be used for more efficient MPC control of a quadrotor.", "keywords": "Zero-shot Transfer;Neural ODE;Model-based control", "primary_area": "robotics", "supplementary_material": "", "author": "Tyler Ingebrand;Adam Thorpe;ufuk topcu", "authorids": "~Tyler_Ingebrand1;~Adam_Thorpe1;~ufuk_topcu1", "gender": "M;;Unspecified", "homepage": ";;https://autonomy.oden.utexas.edu/", "dblp": ";;12/6659.html", "google_scholar": ";bApIsIAAAAAJ;jeNGFfQAAAAJ", "orcid": ";;0000-0003-0819-9985", "linkedin": "tyler-ingebrand;;", "or_profile": "~Tyler_Ingebrand1;~Adam_Thorpe1;~ufuk_topcu1", "aff": "University of Texas at Austin;University of Texas at Austin;University of Texas, Austin", "aff_domain": "utexas.edu;utexas.edu;utexas.edu", "position": "PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\ningebrand2024zeroshot,\ntitle={Zero-Shot Transfer of Neural {ODE}s},\nauthor={Tyler Ingebrand and Adam Thorpe and ufuk topcu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=OgnYoIxtIN}\n}", "github": "", "reviewers": "6dAJ;3pa5;w5ay;A5qk", "pdf_size": 1476147, "rating": "5;5;6;6", "confidence": "4;4;4;2", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "3;4;3;3", "wc_summary": "41;110;119;68", "wc_strengths": "57;67;73;83", "wc_weaknesses": "186;313;228;89", "wc_questions": "148;120;146;7", "wc_limitations": "5;6;77;9", "wc_review": "437;616;643;256", "wc_reply_reviewers": "26;333;63;0", "wc_reply_authors": "0;989;0;0", "reply_reviewers": "1;3;1;0", "reply_authors": "1;4;1;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 84.5, 31.64253466459348 ], "wc_strengths_avg": [ 70.0, 9.433981132056603 ], "wc_weaknesses_avg": [ 204.0, 80.63187955145285 ], "wc_questions_avg": [ 105.25, 57.79002941684664 ], "wc_limitations_avg": [ 24.25, 30.49077729412617 ], "wc_review_avg": [ 488.0, 155.59080949721934 ], "wc_reply_reviewers_avg": [ 105.5, 133.24132241913543 ], "wc_reply_authors_avg": [ 247.25, 428.24956217140493 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9036283777513049587&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "utexas.edu;utexas.edu;utexas.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "TEG-DB: A Comprehensive Dataset and Benchmark of Textual-Edge Graphs", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97714", "id": "Ogw1sSo9FP", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Ogw1sSo9FP", "openreview": "https://openreview.net/forum?id=Ogw1sSo9FP", "poster": "/media/PosterPDFs/NeurIPS%202024/97714.png?t=1731637730.394094", "project": "", "author_site": "Zhuofeng Li, Zixing Gou, Xiangnan Zhang, Zhongyuan Liu, Sirui Li, Yuntong Hu, Chen LING, Zheng Zhang, Liang Zhao", "tldr": "", "abstract": "Text-Attributed Graphs (TAGs) augment graph structures with natural language descriptions, facilitating detailed depictions of data and their interconnections across various real-world settings. However, existing TAG datasets predominantly feature textual information only at the nodes, with edges typically represented by mere binary or categorical attributes. This lack of rich textual edge annotations significantly limits the exploration of contextual relationships between entities, hindering deeper insights into graph-structured data. To address this gap, we introduce Textual-Edge Graphs Datasets and Benchmark (TEG-DB), a comprehensive and diverse collection of benchmark textual-edge datasets featuring rich textual descriptions on nodes and edges. The TEG-DB datasets are large-scale and encompass a wide range of domains, from citation networks to social networks. In addition, we conduct extensive benchmark experiments on TEG-DB to assess the extent to which current techniques, including pre-trained language models, graph neural networks, and their combinations, can utilize textual node and edge information. Our goal is to elicit advancements in \n textual-edge graph research, specifically in developing methodologies that exploit rich textual node and edge descriptions to enhance graph analysis and provide deeper insights into complex real-world networks. The entire TEG-DB project is publicly accessible as an open-source repository on Github, accessible at https://github.com/Zhuofeng-Li/TEG-Benchmark.", "keywords": "Text-Attributed Graphs;textual edge;graph neural network;graph benchmark", "primary_area": "", "supplementary_material": "/attachment/9daae589d7a66beb3ae5fe189571283a338a107b.pdf", "author": "Zhuofeng Li;Zixing Gou;Xiangnan Zhang;Zhongyuan Liu;Sirui Li;Yuntong Hu;Chen Ling;Zheng Zhang;Liang Zhao", "authorids": "~Zhuofeng_Li1;~Zixing_Gou1;~Xiangnan_Zhang2;~Zhongyuan_Liu2;~Sirui_Li3;~Yuntong_Hu1;~Chen_Ling3;~Zheng_Zhang10;~Liang_Zhao6", "gender": ";M;F;M;F;M;M;M;M", "homepage": ";https://service.sdu.edu.cn/tp_up/view?m=up#act=portal/viewhome;;https://www.linkedin.com/in/zhongyuan-liu-144094309;;;;https://cs.emory.edu/~lzhao41/;https://lingchen0331.github.io/", "dblp": ";;;;;323/9826.html;181/2621-18;63/5422-2;", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;;;;QeljXxUAAAAJ;fRdZRHsAAAAJ;qnvyqtwAAAAJ;275NKcEAAAAJ", "orcid": ";;;;;0000-0003-3802-9039;;0000-0002-2648-9989;0000-0001-8044-6026", "linkedin": ";;xiangnan-zhang0514/?utm_source=share&utm_campaign=share_via&utm_content=profile&utm_medium=ios_app;;sirui-l-a1a43b210/;;;;", "or_profile": "~Zhuofeng_Li1;~Zixing_Gou1;~Xiangnan_Zhang2;~Zhongyuan_Liu2;~Sirui_Li3;~Yuntong_Hu1;~Zheng_Zhang10;~Liang_Zhao6;~Chen_LING2", "aff": "Shanghai University;Shandong University;Emory University;China University of Petroleum (East China);Emory University;Emory University;Emory University;Emory University;Emory University", "aff_domain": "shu.edu.cn;sdu.edu.cn;emory.edu;upc.edu.cn;emory.edu;emory.edu;emory.edu;emory.edu;emory.edu", "position": "Undergrad student;Undergrad student;Intern;Undergrad student;Undergrad student;PhD student;PhD student;Associate Professor;PhD student", "bibtex": "@inproceedings{\nli2024tegdb,\ntitle={{TEG}-{DB}: A Comprehensive Dataset and Benchmark of Textual-Edge Graphs},\nauthor={Zhuofeng Li and Zixing Gou and Xiangnan Zhang and Zhongyuan Liu and Sirui Li and Yuntong Hu and Chen Ling and Zheng Zhang and Liang Zhao},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=Ogw1sSo9FP}\n}", "github": "", "reviewers": "bd5x;d2Yp;r2GB;crpc", "pdf_size": 412630, "rating": "6;6;6;7", "confidence": "5;4;4;3", "wc_summary_and_contributions": "78;36;52;115", "wc_strengths": "91;32;38;32", "wc_improvement": "486;5;252;51", "wc_limitations": "10;1;15;18", "wc_correctness": "66;1;163;5", "wc_clarity": "7;1;265;5", "wc_relation_to_prior_work": "20;7;5;9", "wc_documentation": "12;1;23;6", "wc_additional_feedback": "1;1;1;1", "wc_review": "771;85;814;242", "wc_reply_reviewers": "70;0;26;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "2;0;1;0", "reply_authors": "2;1;6;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 70.25, 29.8695078633713 ], "wc_strengths_avg": [ 48.25, 24.80297361204902 ], "wc_improvement_avg": [ 198.5, 190.20843829862017 ], "wc_limitations_avg": [ 11.0, 6.442049363362563 ], "wc_correctness_avg": [ 58.75, 65.46898120484234 ], "wc_clarity_avg": [ 69.5, 112.89264812201014 ], "wc_relation_to_prior_work_avg": [ 10.25, 5.80409338312195 ], "wc_documentation_avg": [ 10.5, 8.200609733428363 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 478.0, 319.7225359589155 ], "wc_reply_reviewers_avg": [ 24.0, 28.600699292150182 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.5, 2.0615528128088303 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14520548117622231355&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "shu.edu.cn;sdu.edu.cn;emory.edu;upc.edu.cn;emory.edu;emory.edu;emory.edu;emory.edu;emory.edu", "author_num": 9, "aff_unique_index": "0;1;2;3;2;2;2;2;2", "aff_unique_norm": "Shanghai University;Shandong University;Emory University;China University of Petroleum", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.shu.edu.cn;http://www.sdu.edu.cn;https://www.emory.edu;http://www.cup.edu.cn", "aff_unique_abbr": "SHU;SDU;Emory;CUP", "aff_campus_unique_index": "1", "aff_campus_unique": ";East China", "aff_country_unique_index": "0;0;1;0;1;1;1;1;1", "aff_country_unique": "China;United States" }, { "id": "OiTr2v90t7", "title": "Permutree Process", "track": "main", "status": "Reject", "tldr": "", "abstract": "The permutree is an innovative mathematical concept recently introduced in the field of combinatorics. It encompasses permutations, trees, partitions, and binary sequences as its special cases, while also allowing for interpolations between them. In this paper, we present the permutree notion within the context of Bayesian machine learning. We exploit the fact that permutrees have a one-to-one correspondence with special permutations to propose a stochastic process on permutrees, and further propose two data modeling strategies analogous to the stick-breaking process and Chinese restaurant process that are frequently used in Bayesian nonparametrics. \nPermutations, trees, partitions, and binary sequences frequently appear as building blocks in Bayesian nonparametric models, and these models have been studied and developed independently. However, in practical situations, there are many complicated problems that require master craftsmanship to combine these individual models into a single giant model. Our models provide a framework for modeling such complicated tasks in a unified manner. As a significant application, we demonstrate the potential of our models for phylogenetic analysis of lineages, which involve coalescence, recombination, multiple ancestors, and mutation.", "keywords": "Bayesian nonparametrics;Permutree;Marked point process", "primary_area": "probabilistic_methods", "supplementary_material": "/attachment/1fe60e692acf39847e480c4525edb8b37664b56a.zip", "author": "Masahiro Nakano;Daiki Chijiwa;Ryohei Shibue;Yasuhiro Fujiwara;Ryo Nishikimi;Tomoharu Iwata;Akisato Kimura;Takeshi Yamada;Naonori Ueda", "authorids": "~Masahiro_Nakano1;~Daiki_Chijiwa1;~Ryohei_Shibue1;~Yasuhiro_Fujiwara1;~Ryo_Nishikimi1;~Tomoharu_Iwata1;~Akisato_Kimura1;~Takeshi_Yamada1;~Naonori_Ueda1", "gender": "M;M;M;M;M;M;M;M;M", "homepage": "https://www.kecl.ntt.co.jp/people/nakano.masahiro/index.html;;;http://www.linkedin.com/in/yasuhiro-fujiwara-8960b0180;;http://www.kecl.ntt.co.jp/as/members/iwata/;https://akisatok.tech;https://researchmap.jp/read0137550;https://www.kecl.ntt.co.jp/as/members/ueda/index-e.html", "dblp": "15/4944;295/8488;;02/2520;185/3600.html;29/5953;55/1636;;87/2491.html", "google_scholar": "b4yD-DQAAAAJ;;e-gbBVYAAAAJ;https://scholar.google.co.jp/citations?user=kCaZaaMAAAAJ;;S1F-gScAAAAJ;https://scholar.google.co.jp/citations?user=HYYmIKoAAAAJ;nHCB2CkAAAAJ;lelCr80AAAAJ", "orcid": "0009-0008-9378-0099;;;0000-0001-9578-1118;0000-0002-3638-6115;;0009-0007-3042-6810;;0000-0001-5701-9333", "linkedin": ";daiki-chijiwa-81491a1a7/;;;;tomoharu-iwata-025a493;akisatokimura/;;", "or_profile": "~Masahiro_Nakano1;~Daiki_Chijiwa1;~Ryohei_Shibue1;~Yasuhiro_Fujiwara1;~Ryo_Nishikimi1;~Tomoharu_Iwata1;~Akisato_Kimura1;~Takeshi_Yamada1;~Naonori_Ueda1", "aff": "NTT;NTT;NTT corpolation;NTT;NTT Communication Science Laboratories;NTT;NTT Corporation;Kinki University;NTT Communication Science Laboratories", "aff_domain": "ntt.co.jp;ntt.co.jp;hco.ntt.co.jp;ntt.co.jp;kecl.ntt.co.jp;hco.ntt.co.jp;ntt.com;kindai.ac.jp;hco.ntt.co.jp", "position": "Researcher;Researcher;Researcher;Researcher;Researcher;Researcher;Executive Reserch Scientist;Full Professor;Researcher", "bibtex": "@misc{\nanonymous2024permutree,\ntitle={Permutree Process},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=OiTr2v90t7}\n}", "github": "", "project": "", "reviewers": "V6GE;UGqF;7WLf;mmW8", "site": "https://openreview.net/forum?id=OiTr2v90t7", "pdf_size": 6866881, "rating": "5;6;6;7", "confidence": "3;3;2;3", "soundness": "3;2;3;4", "novelty": "3;3;3;3", "presentation": "1;3;3;3", "wc_summary": "67;54;29;81", "wc_strengths": "13;130;17;47", "wc_weaknesses": "162;345;12;149", "wc_questions": "113;346;83;25", "wc_limitations": "1;54;1;20", "wc_review": "356;929;142;322", "wc_reply_reviewers": "23;31;0;38", "wc_reply_authors": "517;736;0;38", "reply_reviewers": "1;1;0;1", "reply_authors": "3;3;1;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 57.75, 19.149086140074676 ], "wc_strengths_avg": [ 51.75, 47.04984059484155 ], "wc_weaknesses_avg": [ 167.0, 118.38285348816356 ], "wc_questions_avg": [ 141.75, 122.09294615169216 ], "wc_limitations_avg": [ 19.0, 21.644860821913362 ], "wc_review_avg": [ 437.25, 295.3281014397377 ], "wc_reply_reviewers_avg": [ 23.0, 14.300349646075091 ], "wc_reply_authors_avg": [ 322.75, 313.7509960143553 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Ty2s6WCb7LEJ:scholar.google.com/&scioq=Permutree+Process&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;1;0;0;2;1", "aff_unique_norm": "NTT Corporation;NTT Communication Science Laboratories;Kinki University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ntt.co.jp;https://www.ntt-csl.com;https://www.kinki-u.ac.jp", "aff_unique_abbr": "NTT;NTT CSL;KU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "Japan" }, { "title": "Clustering in Causal Attention Masking", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95352", "id": "OiVxYf9trg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=OiVxYf9trg", "openreview": "https://openreview.net/forum?id=OiVxYf9trg", "poster": "/media/PosterPDFs/NeurIPS%202024/95352.png?t=1734050775.9867222", "project": "", "author_site": "Nikita Karagodin, Yury Polyanskiy, Philippe Rigollet", "tldr": "", "abstract": "This work presents a modification of the self-attention dynamics proposed in Geshkovski et al to better reflect the practically relevant, causally masked attention used in transformer architectures for generative AI. This modification translates into an interacting particle system that cannot be interpreted as a mean-field gradient flow. Despite this loss of structure, we significantly strengthen the results of Geshkovski et al in this context: While previous rigorous results focused on cases where all three matrices (key, query, and value) were scaled identities, we prove asymptotic convergence to a single cluster for arbitrary key-query matrices and value matrix equal to the identity.\nAdditionally, we establish a connection to the classical R\\'enyi parking problem from combinatorial geometry to make initial theoretical steps towards demonstrating the existence of meta-stable states.", "keywords": "Transformers;causal attention;continuous-time interacting particle systems;clustering", "primary_area": "learning_theory", "supplementary_material": "", "author": "Nikita Karagodin;Yury Polyanskiy;Philippe Rigollet", "authorids": "~Nikita_Karagodin1;~Yury_Polyanskiy1;~Philippe_Rigollet1", "gender": "M;M;M", "homepage": ";http://www.mit.edu/~ypol/;http://www-math.mit.edu/~rigollet/", "dblp": ";74/8860;05/5072", "google_scholar": "m5zx5lsAAAAJ;;", "orcid": ";;", "linkedin": "nikitus/;;", "or_profile": "~Nikita_Karagodin1;~Yury_Polyanskiy1;~Philippe_Rigollet1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;mit.edu", "position": "PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nkaragodin2024clustering,\ntitle={Clustering in Causal Attention Masking},\nauthor={Nikita Karagodin and Yury Polyanskiy and Philippe Rigollet},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=OiVxYf9trg}\n}", "github": "", "reviewers": "9pe5;Bsfq;Apon;6isq", "pdf_size": 1797244, "rating": "5;6;7;7", "confidence": "2;2;3;1", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;4", "wc_summary": "46;99;243;91", "wc_strengths": "42;95;69;84", "wc_weaknesses": "68;26;96;89", "wc_questions": "45;9;41;33", "wc_limitations": "6;49;1;13", "wc_review": "207;278;450;310", "wc_reply_reviewers": "0;0;8;16", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 2.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 119.75, 73.97085574738202 ], "wc_strengths_avg": [ 72.5, 19.880895352071043 ], "wc_weaknesses_avg": [ 69.75, 27.279800219209818 ], "wc_questions_avg": [ 32.0, 13.96424004376894 ], "wc_limitations_avg": [ 17.25, 18.819869818890883 ], "wc_review_avg": [ 311.25, 88.35546106495059 ], "wc_reply_reviewers_avg": [ 6.0, 6.6332495807108 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17047015924684760585&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 4, "email": "mit.edu;mit.edu;mit.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "RLE: A Unified Perspective of Data Augmentation for Cross-Spectral Re-Identification", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95351", "id": "Ok6jSSxzfj", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Ok6jSSxzfj", "openreview": "https://openreview.net/forum?id=Ok6jSSxzfj", "poster": "/media/PosterPDFs/NeurIPS%202024/95351.png?t=1731730338.4482133", "project": "", "author_site": "Lei Tan, Yukang Zhang, Keke Han, Pingyang Dai, Yan Zhang, Yongjian Wu, Rongrong Ji", "tldr": "", "abstract": "This paper makes a step towards modeling the modality discrepancy in the cross-spectral re-identification task. Based on the Lambertain model, we observe that the non-linear modality discrepancy mainly comes from diverse linear transformations acting on the surface of different materials. From this view, we unify all data augmentation strategies for cross-spectral re-identification as mimicking such local linear transformations and categorize them into moderate transformation and radical transformation. By extending the observation, we propose a Random Linear Enhancement (RLE) strategy which includes Moderate Random Linear Enhancement (MRLE) and Radical Random Linear Enhancement (RRLE) to push the boundaries of both types of transformation. Moderate Random Linear Enhancement is designed to provide diverse image transformations that satisfy the original linear correlations under constrained conditions, whereas Radical Random Linear Enhancement seeks to generate local linear transformations directly without relying on external information. The experimental results not only demonstrate the superiority and effectiveness of RLE but also confirm its great potential as a general-purpose data augmentation for cross-spectral re-identification.", "keywords": "Cross-Spectral Retrieval;Data Augmentation;Re-Identification", "primary_area": "machine_vision", "supplementary_material": "", "author": "Lei Tan;Yukang Zhang;Keke Han;Pingyang Dai;Yan Zhang;YONGJIAN WU;Rongrong Ji", "authorids": "~Lei_Tan3;~Yukang_Zhang1;~Keke_Han1;~Pingyang_Dai1;~Yan_Zhang22;~YONGJIAN_WU2;~Rongrong_Ji5", "gender": "M;M;M;M;;;M", "homepage": "https://stone96123.github.io/;;https://github.com/hankeke303;;;https://open.youtu.qq.com/;http://mac.xmu.edu.cn/rrji-en.html", "dblp": ";;;04/8207;;;86/5681", "google_scholar": "QTnulp0AAAAJ;https://scholar.google.com/citations?view_op=list_works;;https://scholar.google.com.hk/citations?user=fEw3__QAAAAJ;;;", "orcid": ";;;;;;", "linkedin": ";;;;;;", "or_profile": "~Lei_Tan3;~Yukang_Zhang1;~Keke_Han1;~Pingyang_Dai1;~Yan_Zhang22;~YONGJIAN_WU2;~Rongrong_Ji5", "aff": "Xiamen University;Xiamen University;Xiamen University;Xiamen University;;;Xiamen University", "aff_domain": "xmu.edu.cn;stu.xmu.edu.cn;xmu.edu.cn;xmu.edu.cn;;;xmu.edu.cn", "position": "PhD student;PhD student;Undergrad student;Senior Engineer;;;Full Professor", "bibtex": "@inproceedings{\ntan2024rle,\ntitle={{RLE}: A Unified Perspective of Data Augmentation for Cross-Spectral Re-Identification},\nauthor={Lei Tan and Yukang Zhang and Keke Han and Pingyang Dai and Yan Zhang and YONGJIAN WU and Rongrong Ji},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Ok6jSSxzfj}\n}", "github": "", "reviewers": "SzQ4;gwge;muwA;DZyN", "pdf_size": 4709639, "rating": "3;5;7;8", "confidence": "5;4;5;5", "soundness": "2;2;3;3", "novelty": "2;2;3;3", "presentation": "3;2;3;3", "wc_summary": "60;75;66;66", "wc_strengths": "91;48;57;44", "wc_weaknesses": "107;93;96;56", "wc_questions": "122;3;5;8", "wc_limitations": "29;63;11;13", "wc_review": "409;282;235;187", "wc_reply_reviewers": "470;46;48;108", "wc_reply_authors": "97;0;0;0", "reply_reviewers": "2;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.75, 1.920286436967152 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 66.75, 5.356071321407137 ], "wc_strengths_avg": [ 60.0, 18.506755523321747 ], "wc_weaknesses_avg": [ 88.0, 19.196353820452465 ], "wc_questions_avg": [ 34.5, 50.54948070949889 ], "wc_limitations_avg": [ 29.0, 20.83266665599966 ], "wc_review_avg": [ 278.25, 82.623770792672 ], "wc_reply_reviewers_avg": [ 168.0, 176.13063333787227 ], "wc_reply_authors_avg": [ 24.25, 42.00223208354527 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.22549380840084865, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11309956668575358015&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "xmu.edu.cn;stu.xmu.edu.cn;xmu.edu.cn;xmu.edu.cn;;;xmu.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Xiamen University", "aff_unique_dep": "", "aff_unique_url": "https://www.xmu.edu.cn", "aff_unique_abbr": "XMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Observational Scaling Laws and the Predictability of Langauge Model Performance", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95350", "id": "On5WIN7xyD", "proceeding": "", "pdf": "https://openreview.net/pdf?id=On5WIN7xyD", "openreview": "https://openreview.net/forum?id=On5WIN7xyD", "poster": "", "project": "", "author_site": "Yangjun Ruan, Chris Maddison, Tatsunori Hashimoto", "tldr": "", "abstract": "Understanding how language model performance varies with scale is critical to benchmark and algorithm development. Scaling laws are one approach to building this understanding, but the requirement of training models across many different scales has limited their use. We propose an alternative, observational approach that bypasses model training and instead builds scaling laws from ~100 publically available models. Building a single scaling law from multiple model families is challenging due to large variations in their training compute efficiencies and capabilities. However, we show that these variations are consistent with a simple, generalized scaling law where language model performance is a function of a low-dimensional capability space, and model families only vary in their efficiency in converting training compute to capabilities. Using this approach, we show the surprising predictability of complex scaling phenomena: we show that several emergent phenomena follow a smooth, sigmoidal behavior and are predictable from small models; we show that the agent performance of models such as GPT-4 can be precisely predicted from simpler non-agentic benchmarks; and we show how to predict the impact of post-training interventions like Chain-of-Thought and Self-Consistency as language model capabilities continue to improve.", "keywords": "Language Models;Scaling Laws;Emergent Capability;Agents;Post-Training Methods", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Yangjun Ruan;Chris J. Maddison;Tatsunori Hashimoto", "authorids": "~Yangjun_Ruan1;~Chris_J._Maddison1;~Tatsunori_Hashimoto1", "gender": "M;M;M", "homepage": "http://www.cs.toronto.edu/~yjruan/;https://thashim.github.io;http://www.cs.toronto.edu/~cmaddis/", "dblp": "237/3892;;139/1388", "google_scholar": "https://scholar.google.com.hk/citations?user=9AdCSywAAAAJ;5ygiTwsAAAAJ;https://scholar.google.ca/citations?user=WjCG3owAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Yangjun_Ruan1;~Tatsunori_Hashimoto1;~Chris_J_Maddison1", "aff": "University of Toronto;Stanford University;Google", "aff_domain": "toronto.edu;stanford.edu;google.com", "position": "PhD student;Assistant Professor;Researcher", "bibtex": "@inproceedings{\nruan2024observational,\ntitle={Observational Scaling Laws and the Predictability of Langauge Model Performance},\nauthor={Yangjun Ruan and Chris J. Maddison and Tatsunori Hashimoto},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=On5WIN7xyD}\n}", "github": "", "reviewers": "oXqB;DfgG;M7jM;JzvB;V1YP", "pdf_size": 3649550, "rating": "6;7;7;7;7", "confidence": "3;4;3;2;4", "soundness": "3;3;3;3;3", "novelty": "3;4;3;3;3", "presentation": "3;4;3;3;3", "wc_summary": "177;60;43;52;140", "wc_strengths": "43;44;25;37;253", "wc_weaknesses": "144;148;10;2;312", "wc_questions": "43;64;72;80;188", "wc_limitations": "11;19;3;17;99", "wc_review": "418;335;153;188;992", "wc_reply_reviewers": "64;63;0;0;173", "wc_reply_authors": "52;0;0;0;502", "reply_reviewers": "2;1;0;0;1", "reply_authors": "2;1;1;1;2", "rating_avg": [ 6.8, 0.39999999999999997 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 94.4, 53.89842298249551 ], "wc_strengths_avg": [ 80.4, 86.56465791534094 ], "wc_weaknesses_avg": [ 123.2, 113.31089974049276 ], "wc_questions_avg": [ 89.4, 50.81574559130271 ], "wc_limitations_avg": [ 29.8, 35.04511378209522 ], "wc_review_avg": [ 417.2, 303.1457735149873 ], "wc_reply_reviewers_avg": [ 60.0, 63.23606565876786 ], "wc_reply_authors_avg": [ 110.8, 196.63407639572546 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.13363062095621223, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14270073194703426153&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 3, "email": "toronto.edu;stanford.edu;google.com", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Toronto;Stanford University;Google", "aff_unique_dep": ";;Google", "aff_unique_url": "https://www.utoronto.ca;https://www.stanford.edu;https://www.google.com", "aff_unique_abbr": "U of T;Stanford;Google", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Stanford;Mountain View", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Canada;United States" }, { "title": "Mean-Field Langevin Dynamics for Signed Measures via a Bilevel Approach", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95349", "id": "Oo7HY9kmK6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Oo7HY9kmK6", "openreview": "https://openreview.net/forum?id=Oo7HY9kmK6", "poster": "/media/PosterPDFs/NeurIPS%202024/95349.png?t=1729932723.8143022", "project": "", "author_site": "Guillaume Wang, Alireza Mousavi-Hosseini, L\u00e9na\u00efc Chizat", "tldr": "", "abstract": "Mean-field Langevin dynamics (MLFD) is a class of interacting particle methods that tackle convex optimization over probability measures on a manifold, which are scalable, versatile, and enjoy computational guarantees. However, some important problems -- such as risk minimization for infinite width two-layer neural networks, or sparse deconvolution -- are originally defined over the set of signed, rather than probability, measures. In this paper, we investigate how to extend the MFLD framework to convex optimization problems over signed measures.\nAmong two known reductions from signed to probability measures -- the lifting and the bilevel approaches -- we show that the bilevel reduction leads to stronger guarantees and faster rates (at the price of a higher per-iteration complexity).\nIn particular, we investigate the convergence rate of MFLD applied to the bilevel reduction in the low-noise regime and obtain two results. First, this dynamics is amenable to an annealing schedule, adapted from [Suzuki et al., 2023], that results in polynomial convergence rates to a fixed multiplicative accuracy. Second, we investigate the problem of learning a single neuron with the bilevel approach and obtain local exponential convergence rates that depend polynomially on the dimension and noise level (to compare with the exponential dependence that would result from prior analyses).", "keywords": "two-layer neural network;Wasserstein gradient flow;noisy particle method;mean-field Langevin dynamics;logarithmic Sobolev inequality;bilevel optimization", "primary_area": "optimization", "supplementary_material": "", "author": "Guillaume Wang;Alireza Mousavi-Hosseini;L\u00e9na\u00efc Chizat", "authorids": "~Guillaume_Wang1;~Alireza_Mousavi-Hosseini1;~L\u00e9na\u00efc_Chizat1", "gender": "M;M;M", "homepage": "https://guillaumew16.github.io/;https://lchizat.github.io/;https://www.cs.toronto.edu/~mousavi/", "dblp": "306/1191;192/1488;296/4041", "google_scholar": "CXwkg4sAAAAJ;https://scholar.google.fr/citations?user=jrJh9yIAAAAJ;", "orcid": "0000-0003-4396-0688;;", "linkedin": ";;", "or_profile": "~Guillaume_Wang1;~L\u00e9na\u00efc_Chizat1;~Alireza_Mousavi1", "aff": "Swiss Federal Institute of Technology Lausanne;EPFL - EPF Lausanne;Department of Computer Science, University of Toronto", "aff_domain": "epfl.ch;epfl.ch;cs.toronto.edu", "position": "PhD student;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nwang2024meanfield,\ntitle={Mean-Field Langevin Dynamics for Signed Measures via a Bilevel Approach},\nauthor={Guillaume Wang and Alireza Mousavi-Hosseini and L{\\'e}na{\\\"\\i}c Chizat},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Oo7HY9kmK6}\n}", "github": "", "reviewers": "hSwQ;Hr7F;3p9w;akPi", "pdf_size": 1124383, "rating": "6;7;7;7", "confidence": "4;3;3;3", "soundness": "3;4;4;3", "novelty": "2;4;3;3", "presentation": "3;3;3;3", "wc_summary": "86;46;92;117", "wc_strengths": "69;20;100;146", "wc_weaknesses": "104;82;78;91", "wc_questions": "31;2;87;275", "wc_limitations": "6;1;4;26", "wc_review": "296;151;361;655", "wc_reply_reviewers": "35;0;14;47", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 85.25, 25.469344318219107 ], "wc_strengths_avg": [ 83.75, 45.88232230391134 ], "wc_weaknesses_avg": [ 88.75, 9.98436277385793 ], "wc_questions_avg": [ 98.75, 106.24588227314976 ], "wc_limitations_avg": [ 9.25, 9.832980219648569 ], "wc_review_avg": [ 365.75, 183.48756769874083 ], "wc_reply_reviewers_avg": [ 24.0, 18.207141456033124 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6784433396743071638&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "epfl.ch;epfl.ch;cs.toronto.edu", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Swiss Federal Institute of Technology Lausanne;EPFL;University of Toronto", "aff_unique_dep": ";;Department of Computer Science", "aff_unique_url": "https://www.epfl.ch;https://www.epfl.ch;https://www.utoronto.ca", "aff_unique_abbr": "EPFL;EPFL;U of T", "aff_campus_unique_index": "0;0;1", "aff_campus_unique": "Lausanne;Toronto", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Switzerland;Canada" }, { "title": "Questioning the Survey Responses of Large Language Models", "status": "Oral", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95348", "id": "Oo7dlLgqQX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Oo7dlLgqQX", "openreview": "https://openreview.net/forum?id=Oo7dlLgqQX", "poster": "", "project": "", "author_site": "Ricardo Dominguez-Olmedo, Moritz Hardt, Celestine Mendler-D\u00fcnner", "tldr": "", "abstract": "Surveys have recently gained popularity as a tool to study large language models. By comparing models\u2019 survey responses to those of different human reference populations, researchers aim to infer the demographics, political opinions, or values best represented by current language models. In this work, we critically examine language models' survey responses on the basis of the well-established American Community Survey by the U.S. Census Bureau. Evaluating 43 different language models using de-facto standard prompting methodologies, we establish two dominant patterns. First, models' responses are governed by ordering and labeling biases, for example, towards survey responses labeled with the letter \u201cA\u201d. Second, when adjusting for these systematic biases through randomized answer ordering, models across the board trend towards uniformly random survey responses, irrespective of model size or training data. As a result, models consistently appear to better represent subgroups whose aggregate statistics are closest to uniform for the survey under consideration, leading to potentially misguided conclusions about model alignment.", "keywords": "large language models;surveys", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/dd25ca2f9b7c3486be27ec657ad641d977a5d7a6.zip", "author": "Ricardo Dominguez-Olmedo;Moritz Hardt;Celestine Mendler-D\u00fcnner", "authorids": "~Ricardo_Dominguez-Olmedo1;~Moritz_Hardt1;~Celestine_Mendler-D\u00fcnner1", "gender": "M;Not Specified;", "homepage": "https://ricardodominguez.github.io/;http://mrtz.org/;http://celestine.ai/", "dblp": "309/6725;26/4683;176/5511", "google_scholar": ";adnTgaAAAAAJ;UqtDdZUAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Ricardo_Dominguez-Olmedo1;~Moritz_Hardt1;~Celestine_Mendler-D\u00fcnner1", "aff": "Max-Planck-Institute for Intelligent Systems, Max-Planck Institute;Max-Planck-Institute for Intelligent Systems, Max-Planck Institute;Max Planck Institute for Intelligent Systems", "aff_domain": "is.mpg.de;is.mpg.de;tuebingen.mpg.de", "position": "PhD student;Principal Researcher;Group Lead", "bibtex": "@inproceedings{\ndominguez-olmedo2024questioning,\ntitle={Questioning the Survey Responses of Large Language Models},\nauthor={Ricardo Dominguez-Olmedo and Moritz Hardt and Celestine Mendler-D{\\\"u}nner},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Oo7dlLgqQX}\n}", "github": "", "reviewers": "gDVK;Pjc3;SkTL;gFgE", "pdf_size": 1619683, "rating": "6;7;7;8", "confidence": "5;2;4;4", "soundness": "2;3;3;4", "novelty": "2;2;3;3", "presentation": "2;3;3;3", "wc_summary": "199;52;96;33", "wc_strengths": "66;74;147;68", "wc_weaknesses": "358;158;126;164", "wc_questions": "31;38;92;1", "wc_limitations": "35;23;1;14", "wc_review": "689;345;462;280", "wc_reply_reviewers": "238;16;0;11", "wc_reply_authors": "80;14;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;1;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 95.0, 64.24562241896331 ], "wc_strengths_avg": [ 88.75, 33.75925798947601 ], "wc_weaknesses_avg": [ 201.5, 91.50273219964528 ], "wc_questions_avg": [ 40.5, 32.82148686455262 ], "wc_limitations_avg": [ 18.25, 12.43734296383275 ], "wc_review_avg": [ 444.0, 155.76103492208827 ], "wc_reply_reviewers_avg": [ 66.25, 99.32868417531766 ], "wc_reply_authors_avg": [ 23.5, 33.11721606657178 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.3244428422615251, "gs_citation": 63, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=852324857643166113&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "is.mpg.de;is.mpg.de;tuebingen.mpg.de", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Max-Planck-Institute for Intelligent Systems;Max Planck Institute for Intelligent Systems", "aff_unique_dep": "Intelligent Systems;Intelligent Systems", "aff_unique_url": "https://www.mpi-is.mpg.de;https://www.mpi-is.mpg.de", "aff_unique_abbr": "MPI-IS;MPI-IS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "title": "Coevolving with the Other You: Fine-Tuning LLM with Sequential Cooperative Multi-Agent Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95347", "id": "OoOCoZFVK3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=OoOCoZFVK3", "openreview": "https://openreview.net/forum?id=OoOCoZFVK3", "poster": "/media/PosterPDFs/NeurIPS%202024/95347.png?t=1731385190.9635818", "project": "", "author_site": "Hao Ma, Tianyi Hu, Zhiqiang Pu, Liu Boyin, Xiaolin Ai, Yanyan Liang, Min Chen", "tldr": "", "abstract": "Reinforcement learning (RL) has emerged as a pivotal technique for fine-tuning large language models (LLMs) on specific tasks. However, prevailing RL fine-tuning methods predominantly rely on PPO and its variants. Though these algorithms are effective in general RL settings, they often exhibit suboptimal performance and vulnerability to distribution collapse when applied to the fine-tuning of LLMs. In this paper, we propose CORY, extending the RL fine-tuning of LLMs to a sequential cooperative multi-agent reinforcement learning framework, to leverage the inherent coevolution and emergent capabilities of multi-agent systems. In CORY, the LLM to be fine-tuned is initially duplicated into two autonomous agents: a pioneer and an observer. The pioneer generates responses based on queries, while the observer generates responses using both the queries and the pioneer\u2019s responses. The two agents are trained together. During training, the agents exchange roles periodically, fostering cooperation and coevolution between them. Experiments evaluate CORY's performance by fine-tuning GPT-2 and Llama-2 under subjective and objective reward functions on the IMDB Review and GSM8K datasets, respectively. Results show that CORY outperforms PPO in terms of policy optimality, resistance to distribution collapse, and training robustness, thereby underscoring its potential as a superior methodology for refining LLMs in real-world applications.", "keywords": "large language model;reinforcement learning with human feedback;multi-agent reinforcement learning", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/13b8b576e2e47d2e3812a79a0330aa00de4fd9e4.zip", "author": "Hao Ma;Tianyi Hu;Zhiqiang Pu;Boyin Liu;Xiaolin Ai;Yanyan Liang;Min Chen", "authorids": "~Hao_Ma5;~Tianyi_Hu1;~Zhiqiang_Pu1;~Boyin_Liu2;~Xiaolin_Ai1;~Yanyan_Liang1;~Min_Chen7", "gender": "M;M;M;M;M;M;M", "homepage": "https://github.com/Marroh;;https://teacher.ucas.ac.cn/~0054331;;https://www.must.edu.mo/scse/staff/liang-yanyan;;", "dblp": ";;;;43/10437;;", "google_scholar": "https://scholar.google.com.hk/citations?hl=zh-CN;;;;;;https://scholar.google.com.hk/citations?user=GsggJOoAAAAJ", "orcid": "0000-0001-9593-4102;0009-0003-2570-2287;;my-orcid?orcid=0000-0001-7943-8336;0000-0002-5780-8540;0000-0003-1938-2603;", "linkedin": "https://linkedin.com/in/%E6%98%8A-%E9%A9%AC-9679221b0;;;;;;", "or_profile": "~Hao_Ma5;~Tianyi_Hu1;~Zhiqiang_Pu1;~Xiaolin_Ai1;~Yanyan_Liang1;~Min_Chen7;~Liu_Boyin1", "aff": "Institute of Automation, Chinese Academy of Sciences;University of Chinese Academy of Sciences;;Institute of Automation, Chinese Academy of Sciences;Macau University of Science and Technology;;University of Chinese Academy of Sciences", "aff_domain": "ia.ac.cn;ucas.edu.cn;;ia.ac.cn;must.edu.mo;;ucas.edu.cn", "position": "PhD student;PhD student;;Assistant Professor;Associate Professor;;PhD student", "bibtex": "@inproceedings{\nma2024coevolving,\ntitle={Coevolving with the Other You: Fine-Tuning {LLM} with Sequential Cooperative Multi-Agent Reinforcement Learning},\nauthor={Hao Ma and Tianyi Hu and Zhiqiang Pu and Boyin Liu and Xiaolin Ai and Yanyan Liang and Min Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=OoOCoZFVK3}\n}", "github": "", "reviewers": "7TR2;chZh;F4Ro;Syvc", "pdf_size": 3479583, "rating": "5;6;6;6", "confidence": "4;4;3;3", "soundness": "2;2;3;3", "novelty": "2;3;3;3", "presentation": "3;3;4;3", "wc_summary": "55;103;104;114", "wc_strengths": "12;99;52;66", "wc_weaknesses": "52;456;77;45", "wc_questions": "213;10;48;145", "wc_limitations": "63;17;9;7", "wc_review": "395;685;290;377", "wc_reply_reviewers": "14;93;158;20", "wc_reply_authors": "159;740;123;0", "reply_reviewers": "1;2;1;1", "reply_authors": "2;3;2;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 94.0, 22.9237867726953 ], "wc_strengths_avg": [ 57.25, 31.20396609407208 ], "wc_weaknesses_avg": [ 157.5, 172.74909551138032 ], "wc_questions_avg": [ 104.0, 79.89680844689605 ], "wc_limitations_avg": [ 24.0, 22.825424421026653 ], "wc_review_avg": [ 436.75, 148.72520801800883 ], "wc_reply_reviewers_avg": [ 71.25, 58.9549616232595 ], "wc_reply_authors_avg": [ 255.5, 285.87103735775685 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=38914403760541029&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "ia.ac.cn;ucas.edu.cn;;ia.ac.cn;must.edu.mo;;ucas.edu.cn", "author_num": 7, "aff_unique_index": "0;1;0;2;1", "aff_unique_norm": "Chinese Academy of Sciences;University of Chinese Academy of Sciences;Macau University of Science and Technology", "aff_unique_dep": "Institute of Automation;;", "aff_unique_url": "http://www.ia.cas.cn;http://www.ucas.ac.cn;https://www.must.edu.mo", "aff_unique_abbr": "CAS;UCAS;MUST", "aff_campus_unique_index": "1", "aff_campus_unique": ";Macau SAR", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "MindMerger: Efficiently Boosting LLM Reasoning in non-English Languages", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95346", "id": "Oq32ylAOu2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Oq32ylAOu2", "openreview": "https://openreview.net/forum?id=Oq32ylAOu2", "poster": "", "project": "", "author_site": "Zixian Huang, Wenhao Zhu, Gong Cheng, Lei Li, Fei Yuan", "tldr": "", "abstract": "Reasoning capabilities are crucial for Large Language Models~(LLMs), yet a notable gap exists between English and non-English languages. To bridge this disparity, some works fine-tune LLMs to relearn reasoning capabilities in non-English languages, while others replace non-English inputs with an external model's outputs such as English translation text to circumvent the challenge of LLM understanding non-English. Unfortunately, these methods often underutilize the built-in skilled reasoning and useful language understanding capabilities of LLMs. In order to better utilize the minds of reasoning and language understanding in LLMs, we propose a new method, namely MergeMinds, which merges LLMs with the external language understanding capabilities from multilingual models to boost the multilingual reasoning performance. Furthermore, a two-step training scheme is introduced to first train to embeded the external capabilities into LLMs and then train the collaborative utilization of the external capabilities and the built-in capabilities in LLMs. Experiments on three multilingual reasoning datasets and a language understanding dataset demonstrate that MergeMinds consistently outperforms all baselines, especially in low-resource languages. Without updating the parameters of LLMs, the average accuracy improved by 6.7 and 8.0 across all languages and low-resource languages on the MGSM dataset, respectively.", "keywords": "Large Language Model;Multilingual;Reasoning", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/75f972576473d7903f4d66b971398be5fd67bd1e.zip", "author": "Zixian Huang;Wenhao Zhu;Gong Cheng;Lei Li;Fei Yuan", "authorids": "~Zixian_Huang1;~Wenhao_Zhu1;~Gong_Cheng3;~Lei_Li11;~Fei_Yuan2", "gender": "M;M;M;M;", "homepage": ";https://owennju.github.io/;http://ws.nju.edu.cn/~gcheng;https://www.cs.cmu.edu/~leili;", "dblp": ";;69/1215-1;13/7007-5.html;", "google_scholar": "FiqiYDUAAAAJ;https://scholar.google.com/citations?hl=zh-CN;_ncKAiwAAAAJ;BYXqAlwAAAAJ;", "orcid": ";;0000-0003-3539-7776;0000-0003-3095-9776;", "linkedin": ";;gongcheng/;;", "or_profile": "~Zixian_Huang1;~Wenhao_Zhu1;~Gong_Cheng3;~Lei_Li11;~Fei_Yuan2", "aff": "Nanjing University;Shanghai Artificial Intelligence Laboratory;Nanjing University;School of Computer Science, Carnegie Mellon University;", "aff_domain": "nju.edu.cn;pjlab.org.cn;nju.edu.cn;cs.cmu.edu;", "position": "PhD student;Research Intern;Full Professor;Assistant Professor;", "bibtex": "@inproceedings{\nhuang2024mindmerger,\ntitle={MindMerger: Efficiently Boosting {LLM} Reasoning in non-English Languages},\nauthor={Zixian Huang and Wenhao Zhu and Gong Cheng and Lei Li and Fei Yuan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Oq32ylAOu2}\n}", "github": "", "reviewers": "iF4J;uQ8B;VBLh;q66P", "pdf_size": 4386661, "rating": "5;6;6;6", "confidence": "4;4;3;5", "soundness": "2;4;3;3", "novelty": "2;3;2;3", "presentation": "3;3;3;2", "wc_summary": "25;49;67;81", "wc_strengths": "21;53;41;43", "wc_weaknesses": "191;56;129;177", "wc_questions": "26;107;2;1", "wc_limitations": "26;7;2;4", "wc_review": "289;272;241;306", "wc_reply_reviewers": "24;10;18;13", "wc_reply_authors": "1103;0;486;616", "reply_reviewers": "1;1;1;1", "reply_authors": "3;1;2;3", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 55.5, 20.946360065653412 ], "wc_strengths_avg": [ 39.5, 11.6081867662439 ], "wc_weaknesses_avg": [ 138.25, 52.76066242950329 ], "wc_questions_avg": [ 34.0, 43.31858723458095 ], "wc_limitations_avg": [ 9.75, 9.54921462739214 ], "wc_review_avg": [ 277.0, 24.01041440708594 ], "wc_reply_reviewers_avg": [ 16.25, 5.3091901453988255 ], "wc_reply_authors_avg": [ 551.25, 392.6686739478972 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6908160685214106625&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "nju.edu.cn;pjlab.org.cn;nju.edu.cn;cs.cmu.edu;", "author_num": 5, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Nanjing University;Shanghai Artificial Intelligence Laboratory;Carnegie Mellon University", "aff_unique_dep": ";;School of Computer Science", "aff_unique_url": "https://www.nju.edu.cn;http://www.shailab.org/;https://www.cmu.edu", "aff_unique_abbr": "Nanjing U;Shanghai AI Lab;CMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pittsburgh", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "China;United States" }, { "title": "The GAN is dead; long live the GAN! A Modern GAN Baseline", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95345", "id": "OrtN9hPP7V", "proceeding": "", "pdf": "https://openreview.net/pdf?id=OrtN9hPP7V", "openreview": "https://openreview.net/forum?id=OrtN9hPP7V", "poster": "/media/PosterPDFs/NeurIPS%202024/95345.png?t=1731369202.5380197", "project": "", "author_site": "Nick Huang, Aaron Gokaslan, Volodymyr Kuleshov, James Tompkin", "tldr": "", "abstract": "There is a widely-spread claim that GANs are difficult to train, and GAN architectures in the literature are littered with empirical tricks. We provide evidence against this claim and build a modern GAN baseline in a more principled manner. First, we derive a well-behaved regularized relativistic GAN loss that addresses issues of mode dropping and non-convergence that were previously tackled via a bag of ad-hoc tricks. We analyze our loss mathematically and prove that it admits local convergence guarantees, unlike most existing relativistic losses. Second, this loss allows us to discard all ad-hoc tricks and replace outdated backbones used in common GANs with modern architectures. Using StyleGAN2 as an example, we present a roadmap of simplification and modernization that results in a new minimalist baseline---R3GAN. Despite being simple, our approach surpasses StyleGAN2 on FFHQ, ImageNet, CIFAR, and Stacked MNIST datasets, and compares favorably against state-of-the-art GANs and diffusion models. Code: https://www.github.com/brownvc/R3GAN", "keywords": "GAN", "primary_area": "generative_models", "supplementary_material": "", "author": "Nick Huang;Aaron Gokaslan;Volodymyr Kuleshov;James Tompkin", "authorids": "~Nick_Huang1;~Aaron_Gokaslan1;~Volodymyr_Kuleshov1;~James_Tompkin3", "gender": "M;M;;M", "homepage": "https://cs.brown.edu/people/grad/yhuan170/;https://skylion007.github.io/;https://www.cs.cornell.edu/~kuleshov/;http://www.jamestompkin.com/", "dblp": ";220/6816;81/8612;95/7687", "google_scholar": ";Mt2wyL4AAAAJ;RY_t8XAAAAAJ;Ex3pgLAAAAAJ", "orcid": ";0000-0002-3575-2961;;0000-0003-2218-2899", "linkedin": ";aarongokaslan/;;", "or_profile": "~Nick_Huang1;~Aaron_Gokaslan1;~Volodymyr_Kuleshov1;~James_Tompkin3", "aff": "Brown University;Cornell University;Cornell University;Brown University", "aff_domain": "brown.edu;cornell.edu;cornell.edu;brown.edu", "position": "PhD student;PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nhuang2024the,\ntitle={The {GAN} is dead; long live the {GAN}! A Modern {GAN} Baseline},\nauthor={Nick Huang and Aaron Gokaslan and Volodymyr Kuleshov and James Tompkin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=OrtN9hPP7V}\n}", "github": "", "reviewers": "2Hzz;otaU;YF9j", "pdf_size": 23112132, "rating": "5;6;9", "confidence": "3;3;5", "soundness": "2;3;4", "novelty": "2;3;4", "presentation": "2;3;4", "wc_summary": "85;291;358", "wc_strengths": "62;89;41", "wc_weaknesses": "103;57;84", "wc_questions": "51;50;231", "wc_limitations": "14;1;7", "wc_review": "315;488;721", "wc_reply_reviewers": "57;23;95", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.666666666666667, 1.699673171197595 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 244.66666666666666, 116.1675035837858 ], "wc_strengths_avg": [ 64.0, 19.6468827043885 ], "wc_weaknesses_avg": [ 81.33333333333333, 18.87385022252275 ], "wc_questions_avg": [ 110.66666666666667, 85.08949536941809 ], "wc_limitations_avg": [ 7.333333333333333, 5.312459150169743 ], "wc_review_avg": [ 508.0, 166.351034462268 ], "wc_reply_reviewers_avg": [ 58.333333333333336, 29.408993333483707 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.9707253433941512, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3799289932745662035&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "brown.edu;cornell.edu;cornell.edu;brown.edu", "author_num": 4, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Brown University;Cornell University", "aff_unique_dep": ";", "aff_unique_url": "https://www.brown.edu;https://www.cornell.edu", "aff_unique_abbr": "Brown;Cornell", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Leveraging Separated World Model for Exploration in Visually Distracted Environments", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95344", "id": "Osh7u2E1kC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Osh7u2E1kC", "openreview": "https://openreview.net/forum?id=Osh7u2E1kC", "poster": "", "project": "", "author_site": "Kaichen Huang, Shenghua Wan, Minghao Shao, Hai-Hang Sun, Le Gan, Shuai Feng, De-Chuan Zhan", "tldr": "", "abstract": "Model-based unsupervised reinforcement learning (URL) has gained prominence for reducing environment interactions and learning general skills using intrinsic rewards. However, distractors in observations can severely affect intrinsic reward estimation, leading to a biased exploration process, especially in environments with visual inputs like images or videos. To address this challenge, we propose a bi-level optimization framework named Separation-assisted eXplorer (SeeX). In the inner optimization, SeeX trains a separated world model to extract exogenous and endogenous information, minimizing uncertainty to ensure task relevance. In the outer optimization, it learns a policy on imaginary trajectories generated within the endogenous state space to maximize task-relevant uncertainty. Evaluations on multiple locomotion and manipulation tasks demonstrate SeeX's effectiveness.", "keywords": "unsupervised RL;separate world model;visual inputs with distractors;minimax optimization", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/9edbac60d0b27068eb1216bb54fdec95be91c117.zip", "author": "Kaichen Huang;Shenghua Wan;Minghao Shao;Hai-Hang Sun;Le Gan;Shuai Feng;De-Chuan Zhan", "authorids": "~Kaichen_Huang2;~Shenghua_Wan1;~Minghao_Shao2;~Hai-Hang_Sun1;~Le_Gan1;~Shuai_Feng3;~De-Chuan_Zhan1", "gender": "M;;M;M;M;;M", "homepage": "https://www.lamda.nju.edu.cn/huangkc/;;https://www.lamda.nju.edu.cn/shaomh/;https://www.lamda.nju.edu.cn/sunhh/;;;http://www.lamda.nju.edu.cn/zhandc/", "dblp": ";;;;199/0588.html;;74/498", "google_scholar": ";;;;cCD5SDoAAAAJ;;mYJf4TcAAAAJ", "orcid": ";;;;0000-0002-8260-6932;;0000-0002-3533-2078", "linkedin": ";;;;;;", "or_profile": "~Kaichen_Huang2;~Shenghua_Wan1;~Minghao_Shao2;~Hai-Hang_Sun1;~Le_Gan1;~Shuai_Feng3;~De-Chuan_Zhan1", "aff": "Nanjing University;;Nanjing University;Nanjing University;Nanjing University;;Nanjing University", "aff_domain": "nju.edu.cn;;nju.edu.cn;nju.edu.cn;nju.edu.cn;;nju.edu.cn", "position": "MS student;;MS student;MS student;Researcher;;Full Professor", "bibtex": "@inproceedings{\nhuang2024leveraging,\ntitle={Leveraging Separated World Model for Exploration in Visually Distracted Environments},\nauthor={Kaichen Huang and Shenghua Wan and Minghao Shao and Hai-Hang Sun and Le Gan and Shuai Feng and De-Chuan Zhan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Osh7u2E1kC}\n}", "github": "", "reviewers": "NjAE;HYxU;wcfo;SZqk", "pdf_size": 3075763, "rating": "4;4;6;8", "confidence": "4;3;4;3", "soundness": "2;2;3;4", "novelty": "2;2;2;3", "presentation": "1;2;3;4", "wc_summary": "43;107;171;110", "wc_strengths": "35;26;98;36", "wc_weaknesses": "154;193;332;31", "wc_questions": "117;4;67;19", "wc_limitations": "1;9;20;22", "wc_review": "350;339;688;218", "wc_reply_reviewers": "17;172;57;17", "wc_reply_authors": "98;758;117;116", "reply_reviewers": "1;2;1;1", "reply_authors": "3;5;2;3", "rating_avg": [ 5.5, 1.6583123951777 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 107.75, 45.273474573971015 ], "wc_strengths_avg": [ 48.75, 28.699956445959984 ], "wc_weaknesses_avg": [ 177.5, 107.38365797457264 ], "wc_questions_avg": [ 51.75, 44.27965108263614 ], "wc_limitations_avg": [ 13.0, 8.514693182963201 ], "wc_review_avg": [ 398.75, 174.84475256638387 ], "wc_reply_reviewers_avg": [ 65.75, 63.47981962797311 ], "wc_reply_authors_avg": [ 272.25, 280.5497950453716 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.25, 1.0897247358851685 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9712565990273434584&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 2, "email": "nju.edu.cn;;nju.edu.cn;nju.edu.cn;nju.edu.cn;;nju.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Nanjing University", "aff_unique_dep": "", "aff_unique_url": "https://www.nju.edu.cn", "aff_unique_abbr": "Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Improved Guarantees for Fully Dynamic $k$-Center Clustering with Outliers in General Metric Spaces", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95343", "id": "OtYCp1yfbX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=OtYCp1yfbX", "openreview": "https://openreview.net/forum?id=OtYCp1yfbX", "poster": "", "project": "", "author_site": "Leyla Biabani, Annika Hennes, Denise La Gordt Dillie, Morteza Monemizadeh, Melanie Schmidt", "tldr": "", "abstract": "The metric $k$-center clustering problem with $z$ outliers, also known as $(k,z)$-center clustering, \ninvolves clustering a given point set $P$ in a metric space $(M,d)$ using at most $k$ balls, \nminimizing the maximum ball radius while excluding up to $z$ points from the clustering. \nThis problem holds fundamental significance in various domains such as machine learning, \ndata mining, and database systems.\n\nThis paper addresses the fully dynamic version of the problem, where the point set undergoes continuous updates (insertions and deletions) over time. The objective is to maintain an approximate $(k,z)$-center clustering with efficient update times. \nWe propose a novel fully dynamic algorithm that maintains a $(4+\\epsilon)$-approximate \nsolution to the $(k,z)$-center clustering problem that covers \nall but at most $(1+\\epsilon)z$ points at any time in the sequence with probability $1-k/e^{\\Omega(\\log k)}$. \nThe algorithm achieves an expected amortized update time of $\\mathcal{O}(\\epsilon^{-2} k^6\\log(k) \\log(\\Delta))$, and is applicable to general metric spaces. \nOur dynamic algorithm presents a significant improvement over the recent dynamic $(14+\\epsilon)$-approximation algorithm by Chan, Lattanzi, Sozio, and Wang for this problem.", "keywords": "$k$-center with outliers;fully dynamic model;metric spaces", "primary_area": "optimization", "supplementary_material": "", "author": "Leyla Biabani;Annika Hennes;Denise La Gordt Dillie;Morteza Monemizadeh;Melanie Schmidt", "authorids": "~Leyla_Biabani1;~Annika_Hennes1;~Denise_La_Gordt_Dillie1;~Morteza_Monemizadeh1;~Melanie_Schmidt1", "gender": ";F;F;M;F", "homepage": "https://research.tue.nl/en/persons/leyla-biabani;;;https://research.tue.nl/en/persons/morteza-monemizadeh;", "dblp": ";352/5632;;11/4322.html;67/7224-1", "google_scholar": ";YdPpy6QAAAAJ;;wVH7Gp4AAAAJ;https://scholar.google.com/citations?hl=de", "orcid": ";0000-0001-9109-3107;;;", "linkedin": ";;denise-la-gordt-dillie;;", "or_profile": "~Leyla_Biabani1;~Annika_Hennes1;~Denise_La_Gordt_Dillie1;~Morteza_Monemizadeh1;~Melanie_Schmidt1", "aff": "Eindhoven University of Technology;Heinrich Heine University D\u00fcsseldorf;Eindhoven University of Technology;Eindhoven University of Technology;Heinrich Heine University D\u00fcsseldorf", "aff_domain": "tue.nl;uni-duesseldorf.de;tue.nl;tue.nl;hhu.de", "position": "PhD student;PhD student;MS student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nbiabani2024improved,\ntitle={Improved Guarantees for Fully Dynamic \\$k\\$-Center Clustering with Outliers in General Metric Spaces},\nauthor={Leyla Biabani and Annika Hennes and Denise La Gordt Dillie and Morteza Monemizadeh and Melanie Schmidt},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=OtYCp1yfbX}\n}", "github": "", "reviewers": "fWP4;fRgM;CFX4;NQy9", "pdf_size": 637779, "rating": "6;6;7;7", "confidence": "3;3;5;4", "soundness": "3;4;4;3", "novelty": "3;2;3;3", "presentation": "3;3;3;4", "wc_summary": "149;48;434;188", "wc_strengths": "50;13;107;51", "wc_weaknesses": "114;105;141;37", "wc_questions": "140;45;134;73", "wc_limitations": "3;16;1;2", "wc_review": "456;227;817;351", "wc_reply_reviewers": "0;85;0;10", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 204.75, 141.8756057255792 ], "wc_strengths_avg": [ 55.25, 33.573613150806395 ], "wc_weaknesses_avg": [ 99.25, 38.3038836151114 ], "wc_questions_avg": [ 98.0, 40.29267923581156 ], "wc_limitations_avg": [ 5.5, 6.103277807866851 ], "wc_review_avg": [ 462.75, 220.0026988470823 ], "wc_reply_reviewers_avg": [ 23.75, 35.59757716474535 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.9045340337332909, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:GK4wkncWyegJ:scholar.google.com/&scioq=Improved+Guarantees+for+Fully+Dynamic+%24k%24-Center+Clustering+with+Outliers+in+General+Metric+Spaces&hl=en&as_sdt=0,44", "gs_version_total": 0, "email": "tue.nl;uni-duesseldorf.de;tue.nl;tue.nl;hhu.de", "author_num": 5, "aff_unique_index": "0;1;0;0;1", "aff_unique_norm": "Eindhoven University of Technology;Heinrich Heine University", "aff_unique_dep": ";", "aff_unique_url": "https://www.tue.nl;https://www.hhu.de", "aff_unique_abbr": "TU/e;HHU", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";D\u00fcsseldorf", "aff_country_unique_index": "0;1;0;0;1", "aff_country_unique": "Netherlands;Germany" }, { "title": "A Theoretical Understanding of Self-Correction through In-context Alignment", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95342", "id": "OtvNLTWYww", "proceeding": "", "pdf": "https://openreview.net/pdf?id=OtvNLTWYww", "openreview": "https://openreview.net/forum?id=OtvNLTWYww", "poster": "", "project": "", "author_site": "Yifei Wang, Yuyang Wu, Zeming Wei, Stefanie Jegelka, Yisen Wang", "tldr": "", "abstract": "Going beyond mimicking limited human experiences, recent studies show initial evidence that, like humans, large language models (LLMs) are capable of improving their abilities purely by self-correction, i.e., correcting previous responses through self-examination, as seen in models like OpenAI o1. Nevertheless, little is known about how such capabilities arise. In this work, based on a simplified setup akin to an alignment task, we theoretically analyze self-correction from an in-context learning perspective, showing that when LLMs give relatively accurate self-examinations as rewards, they are capable of refining responses in an in-context way. Notably, going beyond previous theories on over-simplified linear transformers, our theoretical construction underpins the roles of several key designs of realistic transformers for self-correction: softmax attention, multi-head attention, and the MLP block. We validate these findings extensively on synthetic datasets. Inspired by these findings, we propose a simple self-correction strategy, Checking as Context (CaC), which finds novel applications in alleviating social bias and defending against LLM jailbreaks. We believe that these findings will inspire further research on understanding, exploiting, and enhancing self-correction for building better foundation models. Code is at https://github.com/yifeiwang77/Self-Correction.", "keywords": "Self-correction;Theory;In-context Learning;Transformer;Language Model;Alignment", "primary_area": "generative_models", "supplementary_material": "", "author": "Yifei Wang;Yuyang Wu;Zeming Wei;Stefanie Jegelka;Yisen Wang", "authorids": "~Yifei_Wang1;~Yuyang_Wu1;~Zeming_Wei1;~Stefanie_Jegelka3;~Yisen_Wang1", "gender": "M;M;M;F;M", "homepage": "https://yifeiwang77.com;https://dblp.org/pid/245/3328;https://weizeming.github.io;http://people.csail.mit.edu/stefje/;https://yisenwang.github.io/", "dblp": "00/555-1;245/3328;276/6608;38/7003;172/1346-1", "google_scholar": "-CLy6YsAAAAJ;;Kyn1zdQAAAAJ;gTWUZlsAAAAJ;uMWPDboAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Yifei_Wang1;~Yuyang_Wu1;~Zeming_Wei1;~Stefanie_Jegelka3;~Yisen_Wang1", "aff": "Massachusetts Institute of Technology;;University of California, Berkeley;Massachusetts Institute of Technology;Peking University", "aff_domain": "mit.edu;;berkeley.edu;mit.edu;pku.edu.cn", "position": "Postdoc;;Undergrad student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nwang2024a,\ntitle={A Theoretical Understanding of Self-Correction through In-context Alignment},\nauthor={Yifei Wang and Yuyang Wu and Zeming Wei and Stefanie Jegelka and Yisen Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=OtvNLTWYww}\n}", "github": "", "reviewers": "bH8U;pXYm;deQ8;zSQV;jEFz", "pdf_size": 1123844, "rating": "6;6;7;7;7", "confidence": "3;3;2;3;3", "soundness": "3;3;3;4;3", "novelty": "3;3;3;4;3", "presentation": "3;3;4;4;3", "wc_summary": "44;171;69;87;71", "wc_strengths": "63;66;66;177;46", "wc_weaknesses": "30;207;27;163;45", "wc_questions": "23;23;20;9;31", "wc_limitations": "15;2;9;6;11", "wc_review": "175;469;191;442;204", "wc_reply_reviewers": "32;18;6;82;153", "wc_reply_authors": "0;0;0;47;695", "reply_reviewers": "1;1;1;1;3", "reply_authors": "1;1;1;2;3", "rating_avg": [ 6.6, 0.48989794855663565 ], "confidence_avg": [ 2.8, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 88.4, 43.53205715332093 ], "wc_strengths_avg": [ 83.6, 47.28889933166134 ], "wc_weaknesses_avg": [ 94.4, 75.5184745608649 ], "wc_questions_avg": [ 21.2, 7.110555533852471 ], "wc_limitations_avg": [ 8.6, 4.409081537009721 ], "wc_review_avg": [ 296.2, 130.6711903978838 ], "wc_reply_reviewers_avg": [ 58.2, 54.00148146115993 ], "wc_reply_authors_avg": [ 148.4, 273.90553115992384 ], "reply_reviewers_avg": [ 1.4, 0.8 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.408248290463863, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14232417896736061252&as_sdt=8005&sciodt=0,7&hl=en", "gs_version_total": 5, "email": "mit.edu;;berkeley.edu;mit.edu;pku.edu.cn", "author_num": 5, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Massachusetts Institute of Technology;University of California, Berkeley;Peking University", "aff_unique_dep": ";;", "aff_unique_url": "https://web.mit.edu;https://www.berkeley.edu;http://www.pku.edu.cn", "aff_unique_abbr": "MIT;UC Berkeley;Peking U", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "United States;China" }, { "title": "Diffusion Priors for Variational Likelihood Estimation and Image Denoising", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95341", "id": "OuKW8cUiuY", "proceeding": "", "pdf": "https://openreview.net/pdf?id=OuKW8cUiuY", "openreview": "https://openreview.net/forum?id=OuKW8cUiuY", "poster": "/media/PosterPDFs/NeurIPS%202024/95341.png?t=1731054892.317937", "project": "", "author_site": "Jun Cheng, Shan Tan", "tldr": "", "abstract": "Real-world noise removal is crucial in low-level computer vision. Due to the remarkable generation capabilities of diffusion models, recent attention has shifted towards leveraging diffusion priors for image restoration tasks. However, existing diffusion priors-based methods either consider simple noise types or rely on approximate posterior estimation, limiting their effectiveness in addressing structured and signal-dependent noise commonly found in real-world images. In this paper, we build upon diffusion priors and propose adaptive likelihood estimation and MAP inference during the reverse diffusion process to tackle real-world noise. We introduce an independent, non-identically distributed likelihood combined with the noise precision (inverse variance) prior and dynamically infer the precision posterior using variational Bayes during the generation process. Meanwhile, we rectify the estimated noise variance through local Gaussian convolution. The final denoised image is obtained by propagating intermediate MAP solutions that balance the updated likelihood and diffusion prior. Additionally, we explore the local diffusion prior inherent in low-resolution diffusion models, enabling direct handling of high-resolution noisy images. Extensive experiments and analyses on diverse real-world datasets demonstrate the effectiveness of our method. Code is available at https://github.com/HUST-Tan/DiffusionVI.", "keywords": "real-world denoising;image restoration;diffusion models", "primary_area": "machine_vision", "supplementary_material": "", "author": "Jun Cheng;Shan Tan", "authorids": "~Jun_Cheng7;~Shan_Tan1", "gender": "M;M", "homepage": ";", "dblp": ";92/5711", "google_scholar": "AJL8a48AAAAJ;WnVc_nUAAAAJ", "orcid": "0000-0002-5320-7318;", "linkedin": ";", "or_profile": "~Jun_Cheng7;~Shan_Tan1", "aff": "Huazhong University of Science and Technology;Huazhong University of Science and Technology", "aff_domain": "hust.edu.cn;hust.edu.cn", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\ncheng2024diffusion,\ntitle={Diffusion Priors for Variational Likelihood Estimation and Image Denoising},\nauthor={Jun Cheng and Shan Tan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=OuKW8cUiuY}\n}", "github": "", "reviewers": "tZQC;UCEq;Wzuv;sawX;eDuR", "pdf_size": 2298613, "rating": "5;5;6;6;6", "confidence": "3;4;4;4;3", "soundness": "2;2;3;3;3", "novelty": "2;2;3;3;3", "presentation": "2;4;3;3;3", "wc_summary": "54;49;37;103;31", "wc_strengths": "1;63;43;50;67", "wc_weaknesses": "86;105;95;61;72", "wc_questions": "56;79;20;14;38", "wc_limitations": "23;11;9;26;7", "wc_review": "220;307;204;254;215", "wc_reply_reviewers": "48;45;41;71;18", "wc_reply_authors": "26;76;61;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;1;1", "rating_avg": [ 5.6, 0.48989794855663565 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 54.8, 25.4589866255513 ], "wc_strengths_avg": [ 44.8, 23.54909764725604 ], "wc_weaknesses_avg": [ 83.8, 15.740393895960802 ], "wc_questions_avg": [ 41.4, 23.862941981239445 ], "wc_limitations_avg": [ 15.2, 7.756287771866127 ], "wc_review_avg": [ 240.0, 37.43260610751007 ], "wc_reply_reviewers_avg": [ 44.6, 16.906803364326443 ], "wc_reply_authors_avg": [ 32.6, 31.17434842943794 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.16666666666666669, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:8YEoj0WaCF4J:scholar.google.com/&scioq=Diffusion+Priors+for+Variational+Likelihood+Estimation+and+Image+Denoising&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "hust.edu.cn;hust.edu.cn", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Huazhong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.hust.edu.cn", "aff_unique_abbr": "HUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Accelerating Relative Entropy Coding with Space Partitioning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95340", "id": "OuQYWNuNxm", "proceeding": "", "pdf": "https://openreview.net/pdf?id=OuQYWNuNxm", "openreview": "https://openreview.net/forum?id=OuQYWNuNxm", "poster": "/media/PosterPDFs/NeurIPS%202024/95340.png?t=1731719516.1591341", "project": "", "author_site": "Jiajun He, Gergely Flamich, Jos\u00e9 Miguel Hern\u00e1ndez-Lobato", "tldr": "", "abstract": "Relative entropy coding (REC) algorithms encode a random sample following a target distribution $Q$, using a coding distribution $P$ shared between the sender and receiver. Sadly, general REC algorithms suffer from prohibitive encoding times, at least on the order of $2^{D_{\\text{KL}}[Q||P]}$, and faster algorithms are limited to very specific settings. This work addresses this issue by introducing a REC scheme utilizing space partitioning to reduce runtime in practical scenarios. We provide theoretical analyses of our method and demonstrate its effectiveness with both toy examples and practical applications. Notably, our method successfully handles REC tasks with $D_{\\text{KL}}[Q||P]$ about three times greater than what previous methods can manage, and reduces the bitrate by approximately 5-15\\% in VAE-based lossless compression on MNIST and INR-based lossy compression on CIFAR-10, compared to previous methods, significantly improving the practicality of REC for neural compression.", "keywords": "Relative Entropy Coding; Neural Compression; Information Theory", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Jiajun He;Gergely Flamich;Jos\u00e9 Miguel Hern\u00e1ndez-Lobato", "authorids": "~Jiajun_He3;~Gergely_Flamich1;~Jos\u00e9_Miguel_Hern\u00e1ndez-Lobato1", "gender": "M;M;", "homepage": ";https://gergely-flamich.github.io/;http://jmhl.org", "dblp": "205/5074-3;187/9709;40/6058", "google_scholar": ";4Iw9TH8AAAAJ;BEBccCQAAAAJ", "orcid": ";0009-0009-9831-7455;0000-0001-7610-949X", "linkedin": "jiajun-he-76a59526b/;gergely-flamich-142773102;", "or_profile": "~Jiajun_He3;~Gergely_Flamich1;~Jose_Miguel_Hernandez_Lobato1", "aff": "University of Cambridge;University of Cambridge;University of Cambridge", "aff_domain": "cam.ac.uk;cam.ac.uk;cam.ac.uk", "position": "Research Assistant;PhD student;Associate Professor", "bibtex": "@inproceedings{\nhe2024accelerating,\ntitle={Accelerating Relative Entropy Coding with Space Partitioning},\nauthor={Jiajun He and Gergely Flamich and Jos{\\'e} Miguel Hern{\\'a}ndez-Lobato},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=OuQYWNuNxm}\n}", "github": "", "reviewers": "wyos;tEuP;we4Y", "pdf_size": 1698351, "rating": "7;7;8", "confidence": "3;2;2", "soundness": "4;3;3", "novelty": "3;3;2", "presentation": "4;3;4", "wc_summary": "93;97;167", "wc_strengths": "38;47;64", "wc_weaknesses": "89;21;39", "wc_questions": "126;91;184", "wc_limitations": "37;60;111", "wc_review": "383;316;565", "wc_reply_reviewers": "200;28;137", "wc_reply_authors": "63;64;68", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 7.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 2.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 119.0, 33.980386499665755 ], "wc_strengths_avg": [ 49.666666666666664, 10.780641085864152 ], "wc_weaknesses_avg": [ 49.666666666666664, 28.767265347188555 ], "wc_questions_avg": [ 133.66666666666666, 38.35216928530756 ], "wc_limitations_avg": [ 69.33333333333333, 30.922843048824316 ], "wc_review_avg": [ 421.3333333333333, 105.20561877686107 ], "wc_reply_reviewers_avg": [ 121.66666666666667, 71.0508425159211 ], "wc_reply_authors_avg": [ 65.0, 2.160246899469287 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17905303279827483202&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "cam.ac.uk;cam.ac.uk;cam.ac.uk", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Cambridge", "aff_unique_dep": "", "aff_unique_url": "https://www.cam.ac.uk", "aff_unique_abbr": "Cambridge", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Cambridge", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Cost-aware Bayesian Optimization via the Pandora's Box Gittins Index", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95339", "id": "Ouc1F0Sfb7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Ouc1F0Sfb7", "openreview": "https://openreview.net/forum?id=Ouc1F0Sfb7", "poster": "/media/PosterPDFs/NeurIPS%202024/95339.png?t=1733874745.635722", "project": "", "author_site": "Qian Xie, Raul Astudillo, Peter Frazier, Ziv Scully, Alexander Terenin", "tldr": "", "abstract": "Bayesian optimization is a technique for efficiently optimizing unknown functions in a black-box manner. To handle practical settings where gathering data requires use of finite resources, it is desirable to explicitly incorporate function evaluation costs into Bayesian optimization policies. To understand how to do so, we develop a previously-unexplored connection between cost-aware Bayesian optimization and the Pandora's Box problem, a decision problem from economics. The Pandora's Box problem admits a Bayesian-optimal solution based on an expression called the Gittins index, which can be reinterpreted as an acquisition function. We study the use of this acquisition function for cost-aware Bayesian optimization, and demonstrate empirically that it performs well, particularly in medium-high dimensions. We further show that this performance carries over to classical Bayesian optimization without explicit evaluation costs. Our work constitutes a first step towards integrating techniques from Gittins index theory into Bayesian optimization.", "keywords": "Bayesian optimization;acquisition function;cost-per-sample;Gittins index;Pandora's box", "primary_area": "probabilistic_methods", "supplementary_material": "/attachment/6a5b5d432efaf9456652badd247126cf52813ab1.zip", "author": "Qian Xie;Raul Astudillo;Peter I. Frazier;Ziv Scully;Alexander Terenin", "authorids": "~Qian_Xie4;~Raul_Astudillo1;~Peter_I._Frazier1;~Ziv_Scully1;~Alexander_Terenin1", "gender": "F;M;;M;M", "homepage": "https://qianjanexie.github.io/;https://raulastudillo.netlify.app/;;https://ziv.codes;https://avt.im/", "dblp": "36/789-5;242/3889;;122/3070.html;185/1040", "google_scholar": "9GJO6bQAAAAJ;r1Jkj7MAAAAJ;;d7fUTNIAAAAJ;6Qa-wXMAAAAJ", "orcid": "0000-0002-3513-2674;;;0000-0002-8547-1068;0000-0001-5292-3104", "linkedin": "qian-xie-0abb54190;;;;", "or_profile": "~Qian_Xie4;~Raul_Astudillo1;~Peter_I._Frazier1;~Ziv_Scully1;~Alexander_Terenin1", "aff": "Cornell University;California Institute of Technology;;Cornell University;Cornell University", "aff_domain": "cornell.edu;caltech.edu;;cornell.edu;cornell.edu", "position": "PhD student;Postdoc;;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nxie2024costaware,\ntitle={Cost-aware Bayesian Optimization via the Pandora's Box Gittins Index},\nauthor={Qian Xie and Raul Astudillo and Peter I. Frazier and Ziv Scully and Alexander Terenin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Ouc1F0Sfb7}\n}", "github": "", "reviewers": "iB7T;7Tfe;ykvm;q648", "pdf_size": 2733347, "rating": "4;5;5;8", "confidence": "4;3;4;5", "soundness": "2;2;3;4", "novelty": "2;3;2;4", "presentation": "2;3;2;2", "wc_summary": "51;51;63;63", "wc_strengths": "56;34;27;209", "wc_weaknesses": "322;297;21;361", "wc_questions": "57;107;169;92", "wc_limitations": "2;19;13;5", "wc_review": "488;508;293;730", "wc_reply_reviewers": "1021;68;910;273", "wc_reply_authors": "2308;733;1071;374", "reply_reviewers": "4;1;3;2", "reply_authors": "6;2;3;2", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 57.0, 6.0 ], "wc_strengths_avg": [ 81.5, 74.38581854090201 ], "wc_weaknesses_avg": [ 250.25, 134.30818106131883 ], "wc_questions_avg": [ 106.25, 40.51774302697523 ], "wc_limitations_avg": [ 9.75, 6.684870978560469 ], "wc_review_avg": [ 504.75, 154.81178088246386 ], "wc_reply_reviewers_avg": [ 568.0, 405.9550467724228 ], "wc_reply_authors_avg": [ 1121.5, 728.0145946339263 ], "reply_reviewers_avg": [ 2.5, 1.118033988749895 ], "reply_authors_avg": [ 3.25, 1.6393596310755 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.7071067811865476, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8533688772746217609&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "cornell.edu;caltech.edu;;cornell.edu;cornell.edu", "author_num": 5, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Cornell University;California Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.cornell.edu;https://www.caltech.edu", "aff_unique_abbr": "Cornell;Caltech", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pasadena", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "HGDL: Heterogeneous Graph Label Distribution Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95338", "id": "OwguhIAh8R", "proceeding": "", "pdf": "https://openreview.net/pdf?id=OwguhIAh8R", "openreview": "https://openreview.net/forum?id=OwguhIAh8R", "poster": "/media/PosterPDFs/NeurIPS%202024/95338.png?t=1731529160.7662497", "project": "", "author_site": "Yufei Jin, Heng Lian, Yi He, Xingquan Zhu", "tldr": "", "abstract": "Label Distribution Learning (LDL) has been extensively studied in IID data applications such as computer vision, thanks to its more generic setting over single-label and multi-label classification. \nThis paper advances LDL into graph domains and aims to tackle a novel and fundamental\nheterogeneous graph label distribution learning (HGDL) problem.\nWe argue that \nthe graph heterogeneity reflected on node types, node attributes, and neighborhood structures can \nimpose significant challenges for generalizing \nLDL onto graphs. \nTo address the challenges, we propose a new \nlearning framework with two key components: \n1) proactive graph topology homogenization, \nand 2) topology and content consistency-aware graph transformer. \nSpecifically, \nthe former learns optimal information aggregation between meta-paths, so that the node\nheterogeneity can be proactively addressed prior to the succeeding embedding learning; the latter leverages an attention mechanism to learn consistency between meta-path and node attributes, allowing network topology and nodal attributes to be equally emphasized during the label distribution learning. By using KL-divergence and additional constraints, \\method~delivers \nan end-to-end solution for learning and predicting label distribution for nodes. \nBoth theoretical and empirical studies substantiate \nthe effectiveness of our HGDL approach.\nOur code and datasets are available at https://github.com/Listener-Watcher/HGDL.", "keywords": "Heterogeneous Graph;Graph Representation Learning;Label Distribution Learning", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Yufei Jin;Heng Lian;Yi He;Xingquan Zhu", "authorids": "~Yufei_Jin1;~Heng_Lian2;~Yi_He4;~Xingquan_Zhu1", "gender": "M;M;M;M", "homepage": ";;https://www.lions.odu.edu/~y1he;http://www.cse.fau.edu/~xqzhu/", "dblp": "228/6799;;65/425-7;https://dblp.uni-trier.de/pid/26/4253.html", "google_scholar": ";https://scholar.google.com/citations?hl=en;DKqpGaUAAAAJ;YhKZXtcAAAAJ", "orcid": "0009-0006-6570-123X;0000-0003-3240-4221;0000-0002-5357-6623;", "linkedin": ";;;", "or_profile": "~Yufei_Jin1;~Heng_Lian2;~Yi_He4;~Xingquan_Zhu1", "aff": "Florida Atlantic University;Old Dominion University;Old Dominion University;Florida Atlantic University", "aff_domain": "fau.edu;odu.edu;odu.edu;fau.edu", "position": "PhD student;PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\njin2024hgdl,\ntitle={{HGDL}: Heterogeneous Graph Label Distribution Learning},\nauthor={Yufei Jin and Heng Lian and Yi He and Xingquan Zhu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=OwguhIAh8R}\n}", "github": "", "reviewers": "xSZH;pXs2;EYc1;FJkr", "pdf_size": 1480613, "rating": "5;6;6;6", "confidence": "4;3;3;4", "soundness": "3;2;3;3", "novelty": "2;3;3;3", "presentation": "3;2;3;3", "wc_summary": "63;51;90;120", "wc_strengths": "44;57;110;61", "wc_weaknesses": "65;197;179;62", "wc_questions": "77;55;5;2", "wc_limitations": "12;1;56;1", "wc_review": "261;361;440;246", "wc_reply_reviewers": "0;16;8;16", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 81.0, 26.580067720004024 ], "wc_strengths_avg": [ 68.0, 25.0499500997507 ], "wc_weaknesses_avg": [ 125.75, 62.58344429639519 ], "wc_questions_avg": [ 34.75, 32.220917119163445 ], "wc_limitations_avg": [ 17.5, 22.677080940897135 ], "wc_review_avg": [ 327.0, 78.80672560130893 ], "wc_reply_reviewers_avg": [ 10.0, 6.6332495807108 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8209443927695312261&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": "fau.edu;odu.edu;odu.edu;fau.edu", "author_num": 4, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Florida Atlantic University;Old Dominion University", "aff_unique_dep": ";", "aff_unique_url": "https://www.fau.edu;https://www.odu.edu", "aff_unique_abbr": "FAU;ODU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Improved Sample Complexity Bounds for Diffusion Model Training", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95337", "id": "OxcqkYOy8q", "proceeding": "", "pdf": "https://openreview.net/pdf?id=OxcqkYOy8q", "openreview": "https://openreview.net/forum?id=OxcqkYOy8q", "poster": "", "project": "", "author_site": "Shivam Gupta, Aditya Parulekar, Eric Price, Zhiyang Xun", "tldr": "", "abstract": "Diffusion models have become the most popular approach to deep generative modeling of images, largely due to their empirical performance and reliability. From a theoretical standpoint, a number of recent works [CCL+23, CCSW22, BBDD24] have studied the iteration complexity of sampling, assuming access to an accurate diffusion model. In this work, we focus on understanding the *sample complexity* of training such a model; how many samples are needed to learn an accurate diffusion model using a sufficiently expressive neural network? Prior work [BMR20] showed bounds polynomial in the dimension, desired Total Variation error, and Wasserstein error. We show an *exponential improvement* in the dependence on Wasserstein error and depth, along with improved dependencies on other relevant parameters.", "keywords": "Diffusion Models;Learning Theory;Sample Complexity", "primary_area": "learning_theory", "supplementary_material": "", "author": "Shivam Gupta;Aditya Parulekar;Eric Price;Zhiyang Xun", "authorids": "~Shivam_Gupta1;~Aditya_Parulekar1;~Eric_Price1;~Zhiyang_Xun1", "gender": "M;M;;M", "homepage": "https://shivamgupta2.github.io/;https://www.linkedin.com/in/aditya-parulekar-b97899190/;;", "dblp": "29/8830-2;293/7171;;307/5331", "google_scholar": "HsbPV-EAAAAJ;;;ICzrrFkAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Shivam_Gupta1;~Aditya_Parulekar1;~Eric_Price1;~Zhiyang_Xun1", "aff": "University of Texas, Austin;University of Texas at Austin;;University of Texas at Austin", "aff_domain": "utexas.edu;utexas.edu;;utexas.edu", "position": "PhD student;PhD student;;PhD student", "bibtex": "@inproceedings{\ngupta2024improved,\ntitle={Improved Sample Complexity Bounds for Diffusion Model Training},\nauthor={Shivam Gupta and Aditya Parulekar and Eric Price and Zhiyang Xun},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=OxcqkYOy8q}\n}", "github": "", "reviewers": "acEK;6Nj2;tUeu;r7NG", "pdf_size": 639394, "rating": "5;5;5;7", "confidence": "2;3;4;3", "soundness": "3;3;3;3", "novelty": "2;3;2;3", "presentation": "3;2;3;3", "wc_summary": "69;36;73;78", "wc_strengths": "36;43;23;76", "wc_weaknesses": "46;330;115;39", "wc_questions": "169;2;104;105", "wc_limitations": "2;1;16;2", "wc_review": "322;412;331;300", "wc_reply_reviewers": "73;20;0;153", "wc_reply_authors": "0;0;0;418", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;2", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 64.0, 16.47725705328408 ], "wc_strengths_avg": [ 44.5, 19.551214796017153 ], "wc_weaknesses_avg": [ 132.5, 117.8314474153653 ], "wc_questions_avg": [ 95.0, 59.803846030167655 ], "wc_limitations_avg": [ 5.25, 6.219927652312364 ], "wc_review_avg": [ 341.25, 42.37555309373554 ], "wc_reply_reviewers_avg": [ 61.5, 59.179810746571334 ], "wc_reply_authors_avg": [ 104.5, 180.99930939094767 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17334179580831243055&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "utexas.edu;utexas.edu;;utexas.edu", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "What do Graph Neural Networks learn? Insights from Tropical Geometry", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95336", "id": "Oy2x0Xfx0u", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Oy2x0Xfx0u", "openreview": "https://openreview.net/forum?id=Oy2x0Xfx0u", "poster": "/media/PosterPDFs/NeurIPS%202024/95336.png?t=1733765370.154898", "project": "", "author_site": "Tuan Anh Pham, Vikas Garg", "tldr": "", "abstract": "Graph neural networks (GNNs) have been analyzed from multiple perspectives, including the WL-hierarchy, which exposes limits on their expressivity to distinguish graphs. However, characterizing the class of functions that they learn has remained unresolved. We address this fundamental question for message passing GNNs under ReLU activations, i.e., the de-facto choice for most GNNs.\n\nWe first show that such GNNs learn tropical rational signomial maps or continuous piecewise linear functions, establishing an equivalence with feedforward networks (FNNs). We then elucidate the role of the choice of aggregation and update functions, and derive the first general upper and lower bounds on the geometric complexity (i.e., the number of linear regions), establishing new results for popular architectures such as GraphSAGE and GIN. We also introduce and theoretically analyze several new architectures to illuminate the relative merits of the feedforward and the message passing layers, and the tradeoffs involving depth and number of trainable parameters. Finally, we also characterize the decision boundary for node and graph classification tasks.", "keywords": "Graph Representation Learning;Graph Neural Networks;Geometric Complexity;Message Passing;Learning Theory", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Tuan Anh Pham;Vikas Garg", "authorids": "~Tuan_Anh_Pham2;~Vikas_Garg2", "gender": "M;", "homepage": ";", "dblp": ";", "google_scholar": ";", "orcid": "0000-0003-0269-220X;", "linkedin": "tuan-pham-15455919a/;", "or_profile": "~Tuan_Anh_Pham2;~Vikas_Garg2", "aff": "University of Edinburgh, University of Edinburgh;", "aff_domain": "ed.ac.uk;", "position": "PhD student;", "bibtex": "@inproceedings{\npham2024what,\ntitle={What do Graph Neural Networks learn? Insights from Tropical Geometry},\nauthor={Tuan Anh Pham and Vikas Garg},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Oy2x0Xfx0u}\n}", "github": "", "reviewers": "pwpt;jpJX;sZYW", "pdf_size": 447883, "rating": "5;6;7", "confidence": "3;3;5", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "1;3;3", "wc_summary": "79;101;72", "wc_strengths": "117;65;55", "wc_weaknesses": "1138;189;226", "wc_questions": "3;5;120", "wc_limitations": "3;1;19", "wc_review": "1340;361;492", "wc_reply_reviewers": "768;22;171", "wc_reply_authors": "1609;0;78", "reply_reviewers": "3;1;1", "reply_authors": "5;1;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.9428090415820634 ], "wc_summary_avg": [ 84.0, 12.355835328567093 ], "wc_strengths_avg": [ 79.0, 27.17842281418601 ], "wc_weaknesses_avg": [ 517.6666666666666, 438.90191260564 ], "wc_questions_avg": [ 42.666666666666664, 54.68901982015119 ], "wc_limitations_avg": [ 7.666666666666667, 8.055363982396381 ], "wc_review_avg": [ 731.0, 433.93624723761747 ], "wc_reply_reviewers_avg": [ 320.3333333333333, 322.33971038159245 ], "wc_reply_authors_avg": [ 562.3333333333334, 740.7898187445312 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.9428090415820634 ], "reply_authors_avg": [ 2.6666666666666665, 1.699673171197595 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.8660254037844387, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:jHaQEk1OIsIJ:scholar.google.com/&scioq=What+do+Graph+Neural+Networks+learn%3F+Insights+from+Tropical+Geometry&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "ed.ac.uk;", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "University of Edinburgh", "aff_unique_dep": "", "aff_unique_url": "https://www.ed.ac.uk", "aff_unique_abbr": "Edinburgh", "aff_country_unique_index": "0", "aff_country_unique": "United Kingdom" }, { "title": "DenoiseRep: Denoising Model for Representation Learning", "status": "Oral", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95335", "id": "OycU0bAus6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=OycU0bAus6", "openreview": "https://openreview.net/forum?id=OycU0bAus6", "poster": "/media/PosterPDFs/NeurIPS%202024/95335.png?t=1731687775.3890939", "project": "", "author_site": "zhengrui Xu, Guan'an Wang, Xiaowen Huang, Jitao Sang", "tldr": "", "abstract": "The denoising model has been proven a powerful generative model but has little exploration of discriminative tasks. Representation learning is important in discriminative tasks, which is defined as *\"learning representations (or features) of the data that make it easier to extract useful information when building classifiers or other predictors\"*. In this paper, we propose a novel Denoising Model for Representation Learning (*DenoiseRep*) to improve feature discrimination with joint feature extraction and denoising. *DenoiseRep* views each embedding layer in a backbone as a denoising layer, processing the cascaded embedding layers as if we are recursively denoise features step-by-step. This unifies the frameworks of feature extraction and denoising, where the former progressively embeds features from low-level to high-level, and the latter recursively denoises features step-by-step. After that, *DenoiseRep* fuses the parameters of feature extraction and denoising layers, and *theoretically demonstrates* its equivalence before and after the fusion, thus making feature denoising computation-free. *DenoiseRep* is a label-free algorithm that incrementally improves features but also complementary to the label if available. Experimental results on various discriminative vision tasks, including re-identification (Market-1501, DukeMTMC-reID, MSMT17, CUHK-03, vehicleID), image classification (ImageNet, UB200, Oxford-Pet, Flowers), object detection (COCO), image segmentation (ADE20K) show stability and impressive improvements. We also validate its effectiveness on the CNN (ResNet) and Transformer (ViT, Swin, Vmamda) architectures.", "keywords": "Diffusion Model;Representation Learning;Generative Model;Discriminative Models", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "zhengrui Xu;Guan'an Wang;Xiaowen Huang;Jitao Sang", "authorids": "~zhengrui_Xu1;~Guan'an_Wang2;~Xiaowen_Huang1;~Jitao_Sang1", "gender": "M;;F;", "homepage": "https://github.com/Xzhengrui;;https://faculty.bjtu.edu.cn/9545/;", "dblp": ";;166/0337;", "google_scholar": ";;Is1Of9MAAAAJ;", "orcid": ";;0000-0001-9590-3285;", "linkedin": ";;;", "or_profile": "~zhengrui_Xu1;~Guan'an_Wang2;~Xiaowen_Huang1;~Jitao_Sang1", "aff": "Beijing Jiaotong University;;Beijing Jiaotong University;", "aff_domain": "bjtu.edu.cn;;bjtu.edu.cn;", "position": "MS student;;Associate Professor;", "bibtex": "@inproceedings{\nxu2024denoiserep,\ntitle={DenoiseRep: Denoising Model for Representation Learning},\nauthor={zhengrui Xu and Guan'an Wang and Xiaowen Huang and Jitao Sang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=OycU0bAus6}\n}", "github": "", "reviewers": "4MBF;Ckqo;DSeM;w7Th", "pdf_size": 1046045, "rating": "6;8;8;8", "confidence": "5;5;5;4", "soundness": "2;4;3;3", "novelty": "2;4;3;3", "presentation": "3;4;3;3", "wc_summary": "74;37;79;55", "wc_strengths": "16;17;67;32", "wc_weaknesses": "155;6;90;38", "wc_questions": "4;1;10;43", "wc_limitations": "6;1;10;7", "wc_review": "255;62;256;175", "wc_reply_reviewers": "356;29;93;129", "wc_reply_authors": "541;54;22;22", "reply_reviewers": "2;1;1;1", "reply_authors": "3;3;2;2", "rating_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 61.25, 16.618889854620253 ], "wc_strengths_avg": [ 33.0, 20.627651344736268 ], "wc_weaknesses_avg": [ 72.25, 56.40201680791211 ], "wc_questions_avg": [ 14.5, 16.77050983124842 ], "wc_limitations_avg": [ 6.0, 3.24037034920393 ], "wc_review_avg": [ 187.0, 79.30006305167733 ], "wc_reply_reviewers_avg": [ 151.75, 123.2423932743924 ], "wc_reply_authors_avg": [ 159.75, 220.50212584009253 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1139553776664718624&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": "bjtu.edu.cn;;bjtu.edu.cn;", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Beijing Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "http://www.njtu.edu.cn/en", "aff_unique_abbr": "BJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "QueST: Self-Supervised Skill Abstractions for Learning Continuous Control", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95334", "id": "P3v3x7HnV0", "proceeding": "", "pdf": "https://openreview.net/pdf?id=P3v3x7HnV0", "openreview": "https://openreview.net/forum?id=P3v3x7HnV0", "poster": "", "project": "", "author_site": "Atharva Mete, Haotian Xue, Albert Wilcox, Yongxin Chen, Animesh Garg", "tldr": "", "abstract": "Generalization capabilities, or rather a lack thereof, is one of the most important unsolved problems in the field of robot learning, and while several large scale efforts have set out to tackle this problem, unsolved it remains. In this paper, we hypothesize that learning temporal action abstractions using latent variable models (LVMs), which learn to map data to a compressed latent space and back, is a\npromising direction towards low-level skills that can readily be used for new tasks. Although several works have attempted to show this, they have generally been limited by architectures that do not faithfully capture sharable representations. To address this we present Quantized Skill Transformer (QueST), which learns a larger and more flexible latent encoding that is more capable of modeling the breadth of low-level skills necessary for a variety of tasks. To make use of this extra flexibility, QueST imparts causal inductive bias from the action sequence data into the latent space, leading to more semantically useful and transferable representations. We compare to state-of-the-art imitation learning and LVM baselines and see that QueST\u2019s architecture leads to strong performance on several multitask and few-shot learning benchmarks. Further results and videos are available at https://quest-model.github.io.", "keywords": "Behavior Clonning;Action Tokenization;Self Supervised Skill Abstraction;Few-shot Imitation Learning", "primary_area": "robotics", "supplementary_material": "", "author": "Atharva Mete;Haotian Xue;Albert Wilcox;Yongxin Chen;Animesh Garg", "authorids": "~Atharva_Mete1;~Haotian_Xue1;~Albert_Wilcox1;~Yongxin_Chen1;~Animesh_Garg1", "gender": "M;M;M;M;M", "homepage": "https://sites.google.com/view/atharva-mete/home;https://albertwilcox.github.io/;https://yongxin.ae.gatech.edu/;http://animesh.garg.tech;https://xavihart.github.io", "dblp": ";;;123/5728;", "google_scholar": ";bj628LsAAAAJ;X8BYiV4AAAAJ;zp8V7ZMAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;0000-0003-0482-4296;", "linkedin": ";albert-wilcox-314898184/;;animeshgarg/;haotian-xue-gatech/", "or_profile": "~Atharva_Mete1;~Albert_Wilcox1;~Yongxin_Chen1;~Animesh_Garg1;~Xue_Haotian1", "aff": "Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology;NVIDIA;NVIDIA", "aff_domain": "gatech.edu;gatech.edu;gatech.edu;nvidia.com;nvidia.com", "position": "MS student;PhD student;Associate Professor;Researcher;Research Intern", "bibtex": "@inproceedings{\nmete2024quest,\ntitle={Que{ST}: Self-Supervised Skill Abstractions for Learning Continuous Control},\nauthor={Atharva Mete and Haotian Xue and Albert Wilcox and Yongxin Chen and Animesh Garg},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=P3v3x7HnV0}\n}", "github": "", "reviewers": "utkR;jrVG;VaLR;cTdz", "pdf_size": 3596382, "rating": "4;5;6;7", "confidence": "3;3;3;3", "soundness": "2;2;2;3", "novelty": "2;2;2;2", "presentation": "3;3;3;3", "wc_summary": "91;63;156;113", "wc_strengths": "41;55;211;57", "wc_weaknesses": "76;172;353;128", "wc_questions": "11;105;130;48", "wc_limitations": "1;20;12;40", "wc_review": "220;415;862;386", "wc_reply_reviewers": "0;104;142;0", "wc_reply_authors": "0;334;473;0", "reply_reviewers": "0;1;1;0", "reply_authors": "1;2;2;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 105.75, 33.995404101142846 ], "wc_strengths_avg": [ 91.0, 69.55573304911681 ], "wc_weaknesses_avg": [ 182.25, 104.27457743860677 ], "wc_questions_avg": [ 73.5, 46.74665763453041 ], "wc_limitations_avg": [ 18.25, 14.254385290148432 ], "wc_review_avg": [ 470.75, 237.82490933457748 ], "wc_reply_reviewers_avg": [ 61.5, 62.95037728242778 ], "wc_reply_authors_avg": [ 201.75, 207.64919335263502 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2541323972421435654&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 6, "email": "gatech.edu;gatech.edu;gatech.edu;nvidia.com;nvidia.com", "author_num": 5, "aff_unique_index": "0;0;0;1;1", "aff_unique_norm": "Georgia Institute of Technology;NVIDIA", "aff_unique_dep": ";NVIDIA Corporation", "aff_unique_url": "https://www.gatech.edu;https://www.nvidia.com", "aff_unique_abbr": "Georgia Tech;NVIDIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "3DGS-Enhancer: Enhancing Unbounded 3D Gaussian Splatting with View-consistent 2D Diffusion Priors", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95333", "id": "P4s6FUpCbG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=P4s6FUpCbG", "openreview": "https://openreview.net/forum?id=P4s6FUpCbG", "poster": "/media/PosterPDFs/NeurIPS%202024/95333.png?t=1733765745.32909", "project": "", "author_site": "Xi Liu, Chaoyi Zhou, Siyu Huang", "tldr": "", "abstract": "Novel-view synthesis aims to generate novel views of a scene from multiple input\nimages or videos, and recent advancements like 3D Gaussian splatting (3DGS)\nhave achieved notable success in producing photorealistic renderings with efficient\npipelines. However, generating high-quality novel views under challenging settings,\nsuch as sparse input views, remains difficult due to insufficient information in\nunder-sampled areas, often resulting in noticeable artifacts. This paper presents\n3DGS-Enhancer, a novel pipeline for enhancing the representation quality of\n3DGS representations. We leverage 2D video diffusion priors to address the\nchallenging 3D view consistency problem, reformulating it as achieving temporal\nconsistency within a video generation process. 3DGS-Enhancer restores view-\nconsistent latent features of rendered novel views and integrates them with the\ninput views through a spatial-temporal decoder. The enhanced views are then\nused to fine-tune the initial 3DGS model, significantly improving its rendering\nperformance. Extensive experiments on large-scale datasets of unbounded scenes\ndemonstrate that 3DGS-Enhancer yields superior reconstruction performance and\nhigh-fidelity rendering results compared to state-of-the-art methods. The project\nwebpage is https://xiliu8006.github.io/3DGS-Enhancer-project.", "keywords": "3D model enhancement;3D Guassian splatting;novel view synthesis;diffusion model;image restoration", "primary_area": "machine_vision", "supplementary_material": "", "author": "Xi Liu;Chaoyi Zhou;Siyu Huang", "authorids": "~Xi_Liu3;~Chaoyi_Zhou2;~Siyu_Huang2", "gender": "M;;M", "homepage": "https://xiliu8006.github.io;https://chaoyizh.github.io/chaoyizh-home-page/;https://siyuhuang.github.io", "dblp": ";325/4163;146/9031.html", "google_scholar": ";oEfDTB4AAAAJ;hQN7Zn0AAAAJ", "orcid": ";;", "linkedin": "xiliu-4626391b5;chaoyi-zhou-313a3222a;", "or_profile": "~Xi_Liu3;~Chaoyi_Zhou2;~Siyu_Huang2", "aff": "Clemson University;Clemson University;Clemson University", "aff_domain": "clemson.edu;clemson.edu;clemson.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nliu2024dgsenhancer,\ntitle={3{DGS}-Enhancer: Enhancing Unbounded 3D Gaussian Splatting with View-consistent 2D Diffusion Priors},\nauthor={Xi Liu and Chaoyi Zhou and Siyu Huang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=P4s6FUpCbG}\n}", "github": "", "reviewers": "KbVQ;19Ng;LSGq;RSAr", "pdf_size": 15719299, "rating": "6;6;7;7", "confidence": "4;4;4;4", "soundness": "2;4;3;3", "novelty": "3;3;3;3", "presentation": "2;4;3;3", "wc_summary": "79;51;128;43", "wc_strengths": "47;74;92;56", "wc_weaknesses": "83;22;45;10", "wc_questions": "1;27;141;22", "wc_limitations": "1;1;4;30", "wc_review": "211;175;410;161", "wc_reply_reviewers": "125;14;27;6", "wc_reply_authors": "220;42;65;42", "reply_reviewers": "2;1;1;1", "reply_authors": "3;2;3;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 75.25, 33.25939716831921 ], "wc_strengths_avg": [ 67.25, 17.282577932704367 ], "wc_weaknesses_avg": [ 40.0, 27.829840100151493 ], "wc_questions_avg": [ 47.75, 54.71460042803932 ], "wc_limitations_avg": [ 9.0, 12.186057606953941 ], "wc_review_avg": [ 239.25, 100.25561081555486 ], "wc_reply_reviewers_avg": [ 43.0, 47.93224384482746 ], "wc_reply_authors_avg": [ 92.25, 74.35178209027676 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17399828492219508223&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "clemson.edu;clemson.edu;clemson.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Clemson University", "aff_unique_dep": "", "aff_unique_url": "https://www.clemson.edu", "aff_unique_abbr": "Clemson", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "FlexCap: Describe Anything in Images in Controllable Detail", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95332", "id": "P5dEZeECGu", "proceeding": "", "pdf": "https://openreview.net/pdf?id=P5dEZeECGu", "openreview": "https://openreview.net/forum?id=P5dEZeECGu", "poster": "/media/PosterPDFs/NeurIPS%202024/95332.png?t=1733957094.4493651", "project": "", "author_site": "Debidatta Dwibedi, Vidhi Jain, Jonathan Tompson, Andrew Zisserman, Yusuf Aytar", "tldr": "", "abstract": "We introduce FlexCap, a vision-language model that generates region-specific descriptions of varying lengths. FlexCap is trained to produce length-conditioned captions for input boxes, enabling control over information density, with descriptions ranging from concise object labels to detailed captions. To achieve this, we create large-scale training datasets of image region descriptions with varying lengths from captioned web images. We demonstrate FlexCap\u2019s effectiveness in several applications: first, it achieves strong performance in dense captioning tasks on the Visual Genome dataset. Second, we show how FlexCap\u2019s localized descriptions can serve as input to a large language model to create a visual question answering (VQA) system, achieving state-of-the-art zero-shot performance on multiple VQA benchmarks. Our experiments illustrate FlexCap\u2019s utility for tasks including image labeling, object attribute recognition, and visual dialog. Project webpage: https://flex-cap.github.io.", "keywords": "vision-language models;dense captioning;open-ended object detection", "primary_area": "machine_vision", "supplementary_material": "/attachment/415a42586eee81fa854c31779b789fe54bfc4c03.zip", "author": "Debidatta Dwibedi;Vidhi Jain;Jonathan Tompson;Andrew Zisserman;Yusuf Aytar", "authorids": "~Debidatta_Dwibedi1;~Vidhi_Jain2;~Jonathan_Tompson1;~Andrew_Zisserman1;~Yusuf_Aytar1", "gender": "M;F;M;;M", "homepage": "https://debidatta.github.io/;http://vidhijain.github.io;http://jonathantompson.com;;", "dblp": "160/3739;199/2574;139/0769;;41/5577", "google_scholar": "EPfOJwQAAAAJ;;U_Jw8DUAAAAJ;;0ncQNL8AAAAJ", "orcid": ";;;;", "linkedin": ";vidhijain96/;;;", "or_profile": "~Debidatta_Dwibedi1;~Vidhi_Jain2;~Jonathan_Tompson1;~Andrew_Zisserman1;~Yusuf_Aytar1", "aff": "Google;Google;Google DeepMind;;Google DeepMind", "aff_domain": "google.com;google.com;google.com;;google.com", "position": "Google;Student Researcher;Researcher;;Research Scientist", "bibtex": "@inproceedings{\ndwibedi2024flexcap,\ntitle={FlexCap: Describe Anything in Images in Controllable Detail},\nauthor={Debidatta Dwibedi and Vidhi Jain and Jonathan Tompson and Andrew Zisserman and Yusuf Aytar},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=P5dEZeECGu}\n}", "github": "", "reviewers": "T9RF;bbcR;eaKh", "pdf_size": 4686124, "rating": "5;6;6", "confidence": "3;4;5", "soundness": "3;3;3", "novelty": "3;2;3", "presentation": "3;3;2", "wc_summary": "62;176;85", "wc_strengths": "28;141;80", "wc_weaknesses": "223;864;178", "wc_questions": "5;103;47", "wc_limitations": "5;10;71", "wc_review": "323;1294;461", "wc_reply_reviewers": "0;50;64", "wc_reply_authors": "0;268;0", "reply_reviewers": "0;1;1", "reply_authors": "1;2;1", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 107.66666666666667, 49.22284925610147 ], "wc_strengths_avg": [ 83.0, 46.180804092898455 ], "wc_weaknesses_avg": [ 421.6666666666667, 313.31595696499227 ], "wc_questions_avg": [ 51.666666666666664, 40.14418457953226 ], "wc_limitations_avg": [ 28.666666666666668, 30.003703475108235 ], "wc_review_avg": [ 692.6666666666666, 428.92294050200775 ], "wc_reply_reviewers_avg": [ 38.0, 27.47119703738202 ], "wc_reply_authors_avg": [ 89.33333333333333, 126.33641157199649 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18076020307232315228&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "google.com;google.com;google.com;;google.com", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;1;1", "aff_country_unique": "United States;United Kingdom" }, { "title": "Monoculture in Matching Markets", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95331", "id": "P5yezHuMSS", "proceeding": "", "pdf": "https://openreview.net/pdf?id=P5yezHuMSS", "openreview": "https://openreview.net/forum?id=P5yezHuMSS", "poster": "", "project": "", "author_site": "Kenny Peng, Nikhil Garg", "tldr": "", "abstract": "Algorithmic monoculture arises when many decision-makers rely on the same algorithm to evaluate applicants. An emerging body of work investigates possible harms of this kind of homogeneity, but has been limited by the challenge of incorporating market effects in which the preferences and behavior of many applicants and decision-makers jointly interact to determine outcomes.\n\nAddressing this challenge, we introduce a tractable theoretical model of algorithmic monoculture in a two-sided matching market with many participants. We use the model to analyze outcomes under monoculture (when decision-makers all evaluate applicants using a common algorithm) and under polyculture (when decision-makers evaluate applicants independently). All else equal, monoculture (1) selects less-preferred applicants when noise is well-behaved, (2) matches more applicants to their top choice, though individual applicants may be worse off depending on their value to decision-makers and risk tolerance, and (3) is more robust to disparities in the number of applications submitted.", "keywords": "algorithmic monoculture;matching markets;hiring", "primary_area": "algorithmic_game_theory", "supplementary_material": "", "author": "Kenny Peng;Nikhil Garg", "authorids": "~Kenny_Peng1;~Nikhil_Garg2", "gender": ";M", "homepage": "https://gargnikhil.com/;http://kennypeng.me", "dblp": "83/6058-1;", "google_scholar": "8qSK3noAAAAJ;", "orcid": "0000-0002-1988-792X;", "linkedin": ";", "or_profile": "~Nikhil_Garg2;~Kenneth_L_Peng1", "aff": "Cornell University;Cornell University", "aff_domain": "cornell.edu;cornell.edu", "position": "Assistant Professor;PhD student", "bibtex": "@inproceedings{\npeng2024monoculture,\ntitle={Monoculture in Matching Markets},\nauthor={Kenny Peng and Nikhil Garg},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=P5yezHuMSS}\n}", "github": "", "reviewers": "dgPg;FF9T;V2nV;LFB2", "pdf_size": 4680086, "rating": "3;5;6;7", "confidence": "5;3;4;4", "soundness": "2;3;3;4", "novelty": "1;3;3;4", "presentation": "3;3;3;4", "wc_summary": "154;68;152;500", "wc_strengths": "95;54;19;176", "wc_weaknesses": "504;67;53;319", "wc_questions": "2;3;7;50", "wc_limitations": "62;51;1;54", "wc_review": "817;243;232;1099", "wc_reply_reviewers": "359;162;49;45", "wc_reply_authors": "558;66;156;18", "reply_reviewers": "1;1;2;1", "reply_authors": "3;2;3;2", "rating_avg": [ 5.25, 1.479019945774904 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 1.0897247358851685 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 218.5, 166.18889854620255 ], "wc_strengths_avg": [ 86.0, 58.510682785282896 ], "wc_weaknesses_avg": [ 235.75, 187.5918108553782 ], "wc_questions_avg": [ 15.5, 20.006249023742555 ], "wc_limitations_avg": [ 42.0, 24.01041440708594 ], "wc_review_avg": [ 597.75, 373.8123693780076 ], "wc_reply_reviewers_avg": [ 153.75, 127.47033968731706 ], "wc_reply_authors_avg": [ 199.5, 212.82563285469163 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.47809144373375745, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12044660926379457711&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "cornell.edu;cornell.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Cornell University", "aff_unique_dep": "", "aff_unique_url": "https://www.cornell.edu", "aff_unique_abbr": "Cornell", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "GACL: Exemplar-Free Generalized Analytic Continual Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95330", "id": "P6aJ7BqYlc", "proceeding": "", "pdf": "https://openreview.net/pdf?id=P6aJ7BqYlc", "openreview": "https://openreview.net/forum?id=P6aJ7BqYlc", "poster": "/media/PosterPDFs/NeurIPS%202024/95330.png?t=1731677833.4510787", "project": "", "author_site": "HUIPING ZHUANG, Yizhu Chen, Di Fang, Run He, Kai Tong, Hongxin Wei, Ziqian Zeng, Cen Chen", "tldr": "", "abstract": "Class incremental learning (CIL) trains a network on sequential tasks with separated categories in each task but suffers from catastrophic forgetting, where models quickly lose previously learned knowledge when acquiring new tasks. The generalized CIL (GCIL) aims to address the CIL problem in a more real-world scenario, where incoming data have mixed data categories and unknown sample size distribution. Existing attempts for the GCIL either have poor performance or invade data privacy by saving exemplars. In this paper, we propose a new exemplar-free GCIL technique named generalized analytic continual learning (GACL). The GACL adopts analytic learning (a gradient-free training technique) and delivers an analytical (i.e., closed-form) solution to the GCIL scenario. This solution is derived via decomposing the incoming data into exposed and unexposed classes, thereby attaining a weight-invariant property, a rare yet valuable property supporting an equivalence between incremental learning and its joint training. Such an equivalence is crucial in GCIL settings as data distributions among different tasks no longer pose challenges to adopting our GACL. Theoretically, this equivalence property is validated through matrix analysis tools. Empirically, we conduct extensive experiments where, compared with existing GCIL methods, our GACL exhibits a consistently leading performance across various datasets and GCIL settings. Source code is available at https://github.com/CHEN-YIZHU/GACL.", "keywords": "Class incremental learning;closed-form solution;exemplar-free;continual learning;online continual learning;generalized continual learning", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Huiping Zhuang;Yizhu Chen;Di Fang;Run He;Kai Tong;Hongxin Wei;Ziqian Zeng;Cen Chen", "authorids": "~Huiping_Zhuang2;~Yizhu_Chen1;~Di_Fang2;~Run_He1;~Kai_Tong1;~Hongxin_Wei1;~Ziqian_Zeng1;~Cen_Chen4", "gender": "F;M;M;M;M;F;M;M", "homepage": "https://github.com/rayHartley;https://fangd.net;;;https://hongxin001.github.io/;https://ziqianzeng.github.io;;https://zhuanghp.github.io/", "dblp": ";142/2761-4;21/10617;211/4831;150/6350;155/0168;152/6215-2.html;194/5829", "google_scholar": ";YR-Cnw8AAAAJ;cN4SxagAAAAJ;;cABH034AAAAJ;fuOr3nAAAAAJ;pPsNBWUAAAAJ;https://scholar.google.com.sg/citations?user=vCXxuLkAAAAJ", "orcid": ";0009-0004-8135-2354;;0009-0001-6073-8918;;;0000-0003-1389-0148;0000-0002-4612-5445", "linkedin": ";fang-d/;;kai-tong-4530a6299/;;;;", "or_profile": "~Yizhu_Chen1;~Di_Fang2;~Run_He1;~Kai_Tong1;~Hongxin_Wei1;~Ziqian_Zeng1;~Cen_Chen4;~HUIPING_ZHUANG1", "aff": "South China University of Technology;South China University of Technology;South China University of Technology;South China University of Technology;Southern University of Science and Technology;South China University of Technology;South China University of Technology;South China University of Technology", "aff_domain": "scut.edu.cn;scut.edu.cn;scut.edu.cn;scut.edu.cn;sustech.edu.cn;scut.edu.cn;scut.edu.cn;scut.edu.cn", "position": "Undergrad student;Undergrad student;MS student;PhD student;Assistant Professor;Associate Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nzhuang2024gacl,\ntitle={{GACL}: Exemplar-Free Generalized Analytic Continual Learning},\nauthor={Huiping Zhuang and Yizhu Chen and Di Fang and Run He and Kai Tong and Hongxin Wei and Ziqian Zeng and Cen Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=P6aJ7BqYlc}\n}", "github": "", "reviewers": "vKCe;Xb3R;qU32;tsV2", "pdf_size": 0, "rating": "5;6;7;8", "confidence": "4;4;4;5", "soundness": "3;3;3;4", "novelty": "2;2;2;4", "presentation": "3;4;3;3", "wc_summary": "57;63;97;71", "wc_strengths": "34;81;30;94", "wc_weaknesses": "112;57;79;102", "wc_questions": "13;27;144;120", "wc_limitations": "1;40;14;1", "wc_review": "217;268;364;388", "wc_reply_reviewers": "12;0;58;30", "wc_reply_authors": "232;181;25;19", "reply_reviewers": "1;0;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 72.0, 15.264337522473747 ], "wc_strengths_avg": [ 59.75, 28.16358464400439 ], "wc_weaknesses_avg": [ 87.5, 21.289668856043768 ], "wc_questions_avg": [ 76.0, 56.855078928799315 ], "wc_limitations_avg": [ 14.0, 15.921683328090658 ], "wc_review_avg": [ 309.25, 69.66123383920213 ], "wc_reply_reviewers_avg": [ 25.0, 21.840329667841555 ], "wc_reply_authors_avg": [ 114.25, 94.0196123157291 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.7745966692414834, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16175959888409864855&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "scut.edu.cn;scut.edu.cn;scut.edu.cn;scut.edu.cn;sustech.edu.cn;scut.edu.cn;scut.edu.cn;scut.edu.cn", "author_num": 8, "aff_unique_index": "0;0;0;0;1;0;0;0", "aff_unique_norm": "South China University of Technology;Southern University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.scut.edu.cn;https://www.sustech.edu.cn", "aff_unique_abbr": "SCUT;SUSTech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Are Uncertainty Quantification Capabilities of Evidential Deep Learning a Mirage?", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95329", "id": "P6nVDZRZRB", "proceeding": "", "pdf": "https://openreview.net/pdf?id=P6nVDZRZRB", "openreview": "https://openreview.net/forum?id=P6nVDZRZRB", "poster": "/media/PosterPDFs/NeurIPS%202024/95329.png?t=1733875599.3255997", "project": "", "author_site": "Maohao Shen, Jongha (Jon) Ryu, Soumya Ghosh, Yuheng Bu, Prasanna Sattigeri, Subhro Das, Gregory Wornell", "tldr": "", "abstract": "This paper questions the effectiveness of a modern predictive uncertainty quantification approach, called *evidential deep learning* (EDL), in which a single neural network model is trained to learn a meta distribution over the predictive distribution by minimizing a specific objective function. Despite their perceived strong empirical performance on downstream tasks, a line of recent studies by Bengs et al. identify limitations of the existing methods to conclude their learned epistemic uncertainties are unreliable, e.g., in that they are non-vanishing even with infinite data. Building on and sharpening such analysis, we 1) provide a sharper understanding of the asymptotic behavior of a wide class of EDL methods by unifying various objective functions; 2) reveal that the EDL methods can be better interpreted as an out-of-distribution detection algorithm based on energy-based-models; and 3) conduct extensive ablation studies to better assess their empirical effectiveness with real-world datasets. \nThrough all these analyses, we conclude that even when EDL methods are empirically effective on downstream tasks, this occurs despite their poor uncertainty quantification capabilities. Our investigation suggests that incorporating model uncertainty can help EDL methods faithfully quantify uncertainties and further improve performance on representative downstream tasks, albeit at the cost of additional computational complexity.", "keywords": "Uncertainty Quantification;Evidential Deep Learning;Out-of-distribution Data Detection;Bayesian Learning;Epistemic Uncertainty", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Maohao Shen;Jongha Jon Ryu;Soumya Ghosh;Yuheng Bu;Prasanna Sattigeri;Subhro Das;Gregory W. Wornell", "authorids": "~Maohao_Shen1;~Jongha_Jon_Ryu1;~Soumya_Ghosh1;~Yuheng_Bu1;~Prasanna_Sattigeri1;~Subhro_Das1;~Gregory_W._Wornell1", "gender": "M;M;M;M;;;", "homepage": "https://maohaos2.github.io/Maohao/;https://jongharyu.github.io;http://soumyaghosh.com;https://buyuheng.github.io/;;;", "dblp": "272/5397;340/4088;58/5138;168/8338;00/7428;;", "google_scholar": ";5ZYeWgcAAAAJ;GEYQenQAAAAJ;1jPQEVMAAAAJ;m-s38ikAAAAJ;;", "orcid": ";;;0000-0002-3479-4553;0000-0003-4435-0486;;", "linkedin": ";;;bu-yuheng-36560039/;prasannasattigeri/;;", "or_profile": "~Maohao_Shen1;~Jongha_Jon_Ryu1;~Soumya_Ghosh1;~Yuheng_Bu1;~Prasanna_Sattigeri1;~Subhro_Das1;~Gregory_W._Wornell1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;International Business Machines;University of Florida;IBM Research;;", "aff_domain": "mit.edu;mit.edu;ibm.com;ufl.edu;ibm.com;;", "position": "PhD student;Postdoc;Research Scientist;Assistant Professor;Researcher;;", "bibtex": "@inproceedings{\nshen2024are,\ntitle={Are Uncertainty Quantification Capabilities of Evidential Deep Learning a Mirage?},\nauthor={Maohao Shen and Jongha Jon Ryu and Soumya Ghosh and Yuheng Bu and Prasanna Sattigeri and Subhro Das and Gregory W. Wornell},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=P6nVDZRZRB}\n}", "github": "", "reviewers": "cF7x;LBKP;gUpP;Sxes", "pdf_size": 27511787, "rating": "5;5;6;7", "confidence": "4;4;3;4", "soundness": "2;3;3;3", "novelty": "3;2;3;3", "presentation": "2;2;2;3", "wc_summary": "71;111;108;134", "wc_strengths": "91;71;128;143", "wc_weaknesses": "964;259;267;151", "wc_questions": "140;240;239;184", "wc_limitations": "105;24;73;1", "wc_review": "1371;705;815;613", "wc_reply_reviewers": "943;540;294;94", "wc_reply_authors": "1311;843;25;29", "reply_reviewers": "3;2;1;1", "reply_authors": "6;3;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 106.0, 22.572106680591425 ], "wc_strengths_avg": [ 108.25, 28.647643882176418 ], "wc_weaknesses_avg": [ 410.25, 322.97319935251596 ], "wc_questions_avg": [ 200.75, 41.75748435909424 ], "wc_limitations_avg": [ 50.75, 40.708567894240645 ], "wc_review_avg": [ 876.0, 294.59972844522446 ], "wc_reply_reviewers_avg": [ 467.75, 316.6073080331533 ], "wc_reply_authors_avg": [ 552.0, 550.4588994648011 ], "reply_reviewers_avg": [ 1.75, 0.82915619758885 ], "reply_authors_avg": [ 3.25, 1.6393596310755 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=233457877280440370&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "mit.edu;mit.edu;ibm.com;ufl.edu;ibm.com;;", "author_num": 7, "aff_unique_index": "0;0;1;2;3", "aff_unique_norm": "Massachusetts Institute of Technology;International Business Machines Corporation;University of Florida;IBM", "aff_unique_dep": ";;;IBM Research", "aff_unique_url": "https://web.mit.edu;https://www.ibm.com;https://www.ufl.edu;https://www.ibm.com/research", "aff_unique_abbr": "MIT;IBM;UF;IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Memory-Efficient LLM Training with Online Subspace Descent", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95328", "id": "P8rTCT6g45", "proceeding": "", "pdf": "https://openreview.net/pdf?id=P8rTCT6g45", "openreview": "https://openreview.net/forum?id=P8rTCT6g45", "poster": "", "project": "", "author_site": "Kaizhao Liang, Bo Liu, Lizhang Chen, Qiang Liu", "tldr": "", "abstract": "Recently, a wide range of memory-efficient LLM training algorithms have gained substantial popularity. These methods leverage the low-rank structure of gradients to project optimizer states into a subspace using projection matrix found by singular value decomposition (SVD). However, convergence of these algorithms is highly dependent on the update rules of their projection matrix. In this work, we provide the \\emph{first} convergence guarantee for arbitrary update rules of projection matrix. This guarantee is generally applicable to optimizers that can be analyzed with Hamiltonian Descent, including most common ones, such as LION, Adam. Inspired by our theoretical understanding, we propose Online Subspace Descent, a new family of subspace descent optimizer without SVD. Instead of updating projection matrix with eigenvectors, Online Subspace Descent updates projection matrix wtih online PCA. Online Subspace Descent is flexible and introduces only minimum overhead to training. We demonstrate that, for the task of pretraining LLaMA models ranging from 60M to 1B parameters on the C4 dataset, Online Subspace Descent achieves lower perplexity than state-of-the-art low-rank training methods across different settings and narrows the gap with full-rank baselines.", "keywords": "Large Language Model Pretraining; Optimizer; Memory-Efficient LLM Training", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Kaizhao Liang;Bo Liu;Lizhang Chen;qiang liu", "authorids": "~Kaizhao_Liang1;~Bo_Liu13;~Lizhang_Chen1;~qiang_liu4", "gender": "M;M;M;M", "homepage": "https://kaizhaoliang.github.io/Portfolio/;https://cranial-xix.github.io/;https://l-z-chen.github.io/;https://www.cs.utexas.edu/~lqiang/", "dblp": "239/5146;;225/1559;61/3234-1", "google_scholar": "qKLmNfoAAAAJ;https://scholar.google.com/citations?hl=en;;https://scholar.google.com.tw/citations?user=2qDh4WUAAAAJ", "orcid": ";;;", "linkedin": "kaizhao-liang-427a42132/;;;", "or_profile": "~Kaizhao_Liang1;~Bo_Liu13;~Lizhang_Chen1;~Qiang_Liu1", "aff": "SambaNova Systems, Inc;University of Texas, Austin;University of Texas at Austin;University of Texas, Austin", "aff_domain": "sambanovasystems.com;cs.utexas.edu;utexas.edu;utexas.edu", "position": "Principal Engineer;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nliang2024memoryefficient,\ntitle={Memory-Efficient {LLM} Training with Online Subspace Descent},\nauthor={Kaizhao Liang and Bo Liu and Lizhang Chen and qiang liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=P8rTCT6g45}\n}", "github": "", "reviewers": "FtMv;RBsr;6zai;NLbB", "pdf_size": 620640, "rating": "5;5;6;7", "confidence": "3;3;2;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "83;62;56;67", "wc_strengths": "45;69;24;69", "wc_weaknesses": "173;110;39;429", "wc_questions": "52;9;15;319", "wc_limitations": "5;1;6;6", "wc_review": "358;251;140;890", "wc_reply_reviewers": "62;25;8;74", "wc_reply_authors": "117;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 67.0, 10.024968827881711 ], "wc_strengths_avg": [ 51.75, 18.779976038323372 ], "wc_weaknesses_avg": [ 187.75, 147.13153129088272 ], "wc_questions_avg": [ 98.75, 128.2231940796984 ], "wc_limitations_avg": [ 4.5, 2.0615528128088303 ], "wc_review_avg": [ 409.75, 287.7867048701173 ], "wc_reply_reviewers_avg": [ 42.25, 26.78035660703569 ], "wc_reply_authors_avg": [ 29.25, 50.66248612138966 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.42640143271122083, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16289516922116005286&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "sambanovasystems.com;cs.utexas.edu;utexas.edu;utexas.edu", "author_num": 4, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "SambaNova Systems;University of Texas at Austin", "aff_unique_dep": ";", "aff_unique_url": "https://www.sambanova.com;https://www.utexas.edu", "aff_unique_abbr": "SambaNova;UT Austin", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Austin", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Membership Inference Attacks against Fine-tuned Large Language Models via Self-prompt Calibration", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95327", "id": "PAWQvrForJ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=PAWQvrForJ", "openreview": "https://openreview.net/forum?id=PAWQvrForJ", "poster": "/media/PosterPDFs/NeurIPS%202024/95327.png?t=1729481865.7660568", "project": "", "author_site": "Wenjie Fu, Huandong Wang, Chen Gao, Guanghua Liu, Yong Li, Tao Jiang", "tldr": "", "abstract": "Membership Inference Attacks (MIA) aim to infer whether a target data record has been utilized for model training or not. Existing MIAs designed for large language models (LLMs) can be bifurcated into two types: reference-free and reference-based attacks. Although reference-based attacks appear promising performance by calibrating the probability measured on the target model with reference models, this illusion of privacy risk heavily depends on a reference dataset that closely resembles the training set. Both two types of attacks are predicated on the hypothesis that training records consistently maintain a higher probability of being sampled. However, this hypothesis heavily relies on the overfitting of target models, which will be mitigated by multiple regularization methods and the generalization of LLMs. Thus, these reasons lead to high false-positive rates of MIAs in practical scenarios.\nWe propose a Membership Inference Attack based on Self-calibrated Probabilistic Variation (SPV-MIA). \nSpecifically, we introduce a self-prompt approach, which constructs the dataset to fine-tune the reference model by prompting the target LLM itself. In this manner, the adversary can collect a dataset with a similar distribution from public APIs.\nFurthermore, we introduce probabilistic variation, a more reliable membership signal based on LLM memorization rather than overfitting, from which we rediscover the neighbour attack with theoretical grounding. \nComprehensive evaluation conducted on three datasets and four exemplary LLMs shows that SPV-MIA raises the AUC of MIAs from 0.7 to a significantly high level of 0.9. Our code and dataset are available at: https://github.com/tsinghua-fib-lab/NeurIPS2024_SPV-MIA", "keywords": "Large Language Models;Membership Inference Attacks;Privacy and Security", "primary_area": "privacy", "supplementary_material": "", "author": "Wenjie Fu;Huandong Wang;Chen Gao;Guanghua Liu;Yong Li;Tao Jiang", "authorids": "~Wenjie_Fu2;~Huandong_Wang2;~Chen_Gao3;~Guanghua_Liu1;~Yong_Li7;~Tao_Jiang6", "gender": "M;M;Not Specified;M;M;M", "homepage": "https://wjfu99.github.io/;;;https://www.researchgate.net/profile/Guanghua_Liu6;http://fi.ee.tsinghua.edu.cn/~liyong/;http://ei.hust.edu.cn/lab/SINC-lab/jiangtao.html", "dblp": "80/2331-5;;76/5013-1;;;", "google_scholar": ";PNbioq0AAAAJ;Af60_cEAAAAJ;;https://scholar.google.com/citations?hl=en;https://scholar.google.com.hk/citations?user=9BEmtIwAAAAJ", "orcid": ";0000-0002-6382-0861;0000-0002-7561-5646;;;", "linkedin": ";;;;;", "or_profile": "~Wenjie_Fu2;~Huandong_Wang2;~Chen_Gao3;~Guanghua_Liu1;~Yong_Li7;~Tao_Jiang6", "aff": "Huazhong University of Science and Technology;Tsinghua University;Tsinghua University;Huazhong University of Science and Technology;Tsinghua University;Huazhong University of Science and Technology, Tsinghua University", "aff_domain": "hust.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;hust.edu.cn;tsinghua.edu.cn;hust.edu.cn", "position": "PhD student;Researcher;Assistant Professor;Associate Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nfu2024membership,\ntitle={Membership Inference Attacks against Fine-tuned Large Language Models via Self-prompt Calibration},\nauthor={Wenjie Fu and Huandong Wang and Chen Gao and Guanghua Liu and Yong Li and Tao Jiang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=PAWQvrForJ}\n}", "github": "", "reviewers": "saYA;i2LA;DLpC;v2Kc", "pdf_size": 627874, "rating": "5;7;7;7", "confidence": "4;5;4;4", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "3;1;3;3", "wc_summary": "65;59;97;105", "wc_strengths": "53;72;20;74", "wc_weaknesses": "371;609;36;69", "wc_questions": "56;285;136;3", "wc_limitations": "5;15;30;6", "wc_review": "550;1040;319;257", "wc_reply_reviewers": "53;38;0;35", "wc_reply_authors": "1114;2077;49;54", "reply_reviewers": "1;1;0;1", "reply_authors": "5;6;2;2", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 81.5, 19.817921182606415 ], "wc_strengths_avg": [ 54.75, 21.672274915199836 ], "wc_weaknesses_avg": [ 271.25, 234.666119199172 ], "wc_questions_avg": [ 120.0, 106.37903928876214 ], "wc_limitations_avg": [ 14.0, 10.024968827881711 ], "wc_review_avg": [ 541.5, 307.82340716716135 ], "wc_reply_reviewers_avg": [ 31.5, 19.42292459955503 ], "wc_reply_authors_avg": [ 823.5, 843.7465555485248 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.75, 1.7853571071357126 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12918517988176256262&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "hust.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;hust.edu.cn;tsinghua.edu.cn;hust.edu.cn", "author_num": 6, "aff_unique_index": "0;1;1;0;1;0", "aff_unique_norm": "Huazhong University of Science and Technology;Tsinghua University", "aff_unique_dep": ";", "aff_unique_url": "http://www.hust.edu.cn;https://www.tsinghua.edu.cn", "aff_unique_abbr": "HUST;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Functionally Constrained Algorithm Solves Convex Simple Bilevel Problem", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95326", "id": "PAiGHJppam", "proceeding": "", "pdf": "https://openreview.net/pdf?id=PAiGHJppam", "openreview": "https://openreview.net/forum?id=PAiGHJppam", "poster": "/media/PosterPDFs/NeurIPS%202024/95326.png?t=1731588659.0226045", "project": "", "author_site": "Huaqing Zhang, Lesi Chen, Jing Xu, Jingzhao Zhang", "tldr": "", "abstract": "This paper studies simple bilevel problems, where a convex upper-level function is minimized over the optimal solutions of a convex lower-level problem. We first show the fundamental difficulty of simple bilevel problems, that the approximate optimal value of such problems is not obtainable by first-order zero-respecting algorithms. Then we follow recent works to pursue the weak approximate solutions. For this goal, we propose a novel method by reformulating them into functionally constrained problems. Our method achieves near-optimal rates for both \nsmooth and nonsmooth problems. To the best of our knowledge, this is the first near-optimal algorithm that works under standard assumptions of smoothness or Lipschitz continuity for the objective functions.", "keywords": "bilevel optimization;first-order methods;optimal complexity", "primary_area": "optimization", "supplementary_material": "/attachment/89ab6635aa9d825eabdd09e939e44af479558772.zip", "author": "Huaqing Zhang;Lesi Chen;Jing Xu;Jingzhao Zhang", "authorids": "~Huaqing_Zhang2;~Lesi_Chen1;~Jing_Xu4;~Jingzhao_Zhang2", "gender": ";M;M;M", "homepage": "https://github.com/zhqwqwq/;https://truenobility303.github.io/;https://jingxuthu.github.io;https://sites.google.com/view/jingzhao/home", "dblp": ";326/5433;07/1951-27;220/5559", "google_scholar": ";ynGzhugAAAAJ;jlrroGQAAAAJ;8NudxYsAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Huaqing_Zhang2;~Lesi_Chen1;~Jing_Xu4;~Jingzhao_Zhang2", "aff": "Tsinghua University;Tsinghua Univeristy;Tsinghua University;Tsinghua University", "aff_domain": "mails.tsinghua.edu.cn;mails.tsinghua.edu.cn;thu.edu.cn;mail.tsinghua.edu.cn", "position": "Undergrad student;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nzhang2024functionally,\ntitle={Functionally Constrained Algorithm Solves Convex Simple Bilevel Problem},\nauthor={Huaqing Zhang and Lesi Chen and Jing Xu and Jingzhao Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=PAiGHJppam}\n}", "github": "", "reviewers": "EaJT;iPzZ;KpB7;213i;RLu3", "pdf_size": 1595487, "rating": "5;5;6;6;6", "confidence": "4;4;3;4;4", "soundness": "3;3;3;4;2", "novelty": "2;2;3;3;2", "presentation": "3;3;3;3;2", "wc_summary": "65;112;57;33;69", "wc_strengths": "70;13;79;40;54", "wc_weaknesses": "230;372;152;38;238", "wc_questions": "2;17;5;94;48", "wc_limitations": "4;1;1;2;6", "wc_review": "371;515;294;207;415", "wc_reply_reviewers": "63;25;11;30;138", "wc_reply_authors": "278;71;67;67;101", "reply_reviewers": "1;1;1;1;1", "reply_authors": "3;2;2;2;3", "rating_avg": [ 5.6, 0.48989794855663565 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 67.2, 25.646832163056704 ], "wc_strengths_avg": [ 51.2, 23.318662054243163 ], "wc_weaknesses_avg": [ 206.0, 109.81438885683424 ], "wc_questions_avg": [ 33.2, 34.48709903717621 ], "wc_limitations_avg": [ 2.8, 1.9390719429665317 ], "wc_review_avg": [ 360.4, 104.81908223219664 ], "wc_reply_reviewers_avg": [ 53.4, 45.60964810212857 ], "wc_reply_authors_avg": [ 116.8, 81.6 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.4, 0.4898979485566356 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.408248290463863, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:x_AaaNcCYYoJ:scholar.google.com/&scioq=Functionally+Constrained+Algorithm+Solves+Convex+Simple+Bilevel+Problem&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "mails.tsinghua.edu.cn;mails.tsinghua.edu.cn;thu.edu.cn;mail.tsinghua.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Linear Causal Bandits: Unknown Graph and Soft Interventions", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95325", "id": "PAu0W5YAKC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=PAu0W5YAKC", "openreview": "https://openreview.net/forum?id=PAu0W5YAKC", "poster": "", "project": "", "author_site": "Zirui Yan, Ali Tajer", "tldr": "", "abstract": "Designing causal bandit algorithms depends on two central categories of assumptions: (i) the extent of information about the underlying causal graphs and (ii) the extent of information about interventional statistical models. There have been extensive recent advances in dispensing with assumptions on either category. These include assuming known graphs but unknown interventional distributions, and the converse setting of assuming unknown graphs but access to restrictive hard/$\\operatorname{do}$ interventions, which removes the stochasticity and ancestral dependencies. Nevertheless, the problem in its general form, i.e., _unknown_ graph and _unknown_ stochastic intervention models, remains open. This paper addresses this problem and establishes that in a graph with $N$ nodes, maximum in-degree $d$ and maximum causal path length $L$, after $T$ interaction rounds the regret upper bound scales as $\\tilde{\\mathcal{O}}((cd)^{L-\\frac{1}{2}}\\sqrt{T} + d + RN)$ where $c>1$ is a constant and $R$ is a measure of intervention power. A universal minimax lower bound is also established, which scales as $\\Omega(d^{L-\\frac{3}{2}}\\sqrt{T})$. Importantly, the graph size $N$ has a diminishing effect on the regret as $T$ grows. These bounds have matching behavior in $T$, exponential dependence on $L$, and polynomial dependence on $d$ (with the gap $d\\ $). On the algorithmic aspect, the paper presents a novel way of designing a computationally efficient CB algorithm, addressing a challenge that the existing CB algorithms using soft interventions face.", "keywords": "causal bandit;unknown graph;soft interventions", "primary_area": "bandits", "supplementary_material": "", "author": "Zirui Yan;Ali Tajer", "authorids": "~Zirui_Yan1;~Ali_Tajer1", "gender": "M;M", "homepage": "https://ziruiyan.github.io/;https://www.isg-rpi.com/", "dblp": "256/9541;65/2830", "google_scholar": "XXgTWskAAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Zirui_Yan1;~Ali_Tajer1", "aff": "Rensselaer Polytechnic Institute;Rensselaer Polytechnic Institute", "aff_domain": "rpi.edu;rpi.edu", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nyan2024linear,\ntitle={Linear Causal Bandits: Unknown Graph and Soft Interventions},\nauthor={Zirui Yan and Ali Tajer},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=PAu0W5YAKC}\n}", "github": "", "reviewers": "zJSz;2rLb;1kEj", "pdf_size": 898316, "rating": "4;6;7", "confidence": "4;3;4", "soundness": "2;4;3", "novelty": "3;3;4", "presentation": "3;4;2", "wc_summary": "70;81;151", "wc_strengths": "42;89;37", "wc_weaknesses": "10;42;336", "wc_questions": "166;92;95", "wc_limitations": "7;1;18", "wc_review": "295;305;637", "wc_reply_reviewers": "77;352;79", "wc_reply_authors": "235;1226;51", "reply_reviewers": "2;5;1", "reply_authors": "3;5;2", "rating_avg": [ 5.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 100.66666666666667, 35.87323359956402 ], "wc_strengths_avg": [ 56.0, 23.423634787681152 ], "wc_weaknesses_avg": [ 129.33333333333334, 146.71817277427573 ], "wc_questions_avg": [ 117.66666666666667, 34.198765409814946 ], "wc_limitations_avg": [ 8.666666666666666, 7.039570693980959 ], "wc_review_avg": [ 412.3333333333333, 158.91577084173306 ], "wc_reply_reviewers_avg": [ 169.33333333333334, 129.16741935264568 ], "wc_reply_authors_avg": [ 504.0, 516.0277770301389 ], "reply_reviewers_avg": [ 2.6666666666666665, 1.699673171197595 ], "reply_authors_avg": [ 3.3333333333333335, 1.247219128924647 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.18898223650461363, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16357598914382179860&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "rpi.edu;rpi.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Rensselaer Polytechnic Institute", "aff_unique_dep": "", "aff_unique_url": "https://www.rpi.edu", "aff_unique_abbr": "RPI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Credal Deep Ensembles for Uncertainty Quantification", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95324", "id": "PCgnTiGC9K", "proceeding": "", "pdf": "https://openreview.net/pdf?id=PCgnTiGC9K", "openreview": "https://openreview.net/forum?id=PCgnTiGC9K", "poster": "", "project": "", "author_site": "Kaizheng Wang, Fabio Cuzzolin, Shireen Kudukkil Manchingal -, Keivan Shariatmadar, David Moens, Hans Hallez", "tldr": "", "abstract": "This paper introduces an innovative approach to classification called Credal Deep Ensembles (CreDEs), namely, ensembles of novel Credal-Set Neural Networks (CreNets). CreNets are trained to predict a lower and an upper probability bound for each class, which, in turn, determine a convex set of probabilities (credal set) on the class set. The training employs a loss inspired by distributionally robust optimization which simulates the potential divergence of the test distribution from the training distribution, in such a way that the width of the predicted probability interval reflects the epistemic uncertainty about the future data distribution. Ensembles can be constructed by training multiple CreNets, each associated with a different random seed, and averaging the outputted intervals. Extensive experiments are conducted on various out-of-distributions (OOD) detection benchmarks (CIFAR10/100 vs SVHN/Tiny-ImageNet, CIFAR10 vs CIFAR10-C, ImageNet vs ImageNet-O) and using different network architectures (ResNet50, VGG16, and ViT Base). Compared to Deep Ensemble baselines, CreDEs demonstrate higher test accuracy, lower expected calibration error, and significantly improved epistemic uncertainty estimation.", "keywords": "neural networks;credal inference;uncertainty estimation and quantification;classification", "primary_area": "probabilistic_methods", "supplementary_material": "/attachment/e079b684fce2f6ad9a0951dc047653f36ce4b5f0.zip", "author": "Kaizheng Wang;Fabio Cuzzolin;Shireen Kudukkil Manchingal;Keivan Shariatmadar;David Moens;Hans Hallez", "authorids": "~Kaizheng_Wang2;~Fabio_Cuzzolin1;~Shireen_Kudukkil_Manchingal1;~Keivan_Shariatmadar1;~David_Moens1;~Hans_Hallez1", "gender": "M;M;F;;M;M", "homepage": "https://www.kuleuven.be/wieiswie/nl/person/00151239;https://www.brookes.ac.uk/profiles/staff/fabio-cuzzolin;https://www.brookes.ac.uk/profiles/student/shireen-kudukkil-manchingal/;;;", "dblp": ";60/2919;322/5736;;214/0807;62/1802", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.co.uk/citations?user=T8LkBTYAAAAJ;RioUSBEAAAAJ;;https://scholar.google.nl/citations?user=qW2VpMAAAAAJ;https://scholar.google.be/citations?user=CWF_wV8AAAAJ", "orcid": "0000-0001-8268-962X;0000-0002-9271-2130;0009-0001-6597-4290;;0000-0002-5707-0160;0000-0003-2623-9055", "linkedin": ";fabio-cuzzolin-b481a928/;shireenkudukkil/;;;hanshallez/", "or_profile": "~Kaizheng_Wang2;~Fabio_Cuzzolin1;~Shireen_Kudukkil_Manchingal1;~Keivan_Shariatmadar1;~David_Moens1;~Hans_Hallez1", "aff": "KU Leuven;Oxford Brookes University;Oxford Brookes University;;;KU Leuven, KU Leuven", "aff_domain": "kuleuven.be;brookes.ac.uk;brookes.ac.uk;;;cs.kuleuven.be", "position": "PhD student;Full Professor;PhD student;;;Associate Professor", "bibtex": "@inproceedings{\nwang2024credal,\ntitle={Credal Deep Ensembles for Uncertainty Quantification},\nauthor={Kaizheng Wang and Fabio Cuzzolin and Shireen Kudukkil Manchingal and Keivan Shariatmadar and David Moens and Hans Hallez},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=PCgnTiGC9K}\n}", "github": "", "reviewers": "YsPp;AC8P;HFiw;7GAw", "pdf_size": 1860765, "rating": "5;5;6;7", "confidence": "4;3;4;4", "soundness": "3;2;3;4", "novelty": "2;2;3;4", "presentation": "3;3;3;4", "wc_summary": "139;27;122;62", "wc_strengths": "73;43;90;77", "wc_weaknesses": "361;81;240;92", "wc_questions": "345;118;120;137", "wc_limitations": "7;15;12;18", "wc_review": "925;284;584;386", "wc_reply_reviewers": "443;328;87;52", "wc_reply_authors": "2180;1198;43;57", "reply_reviewers": "3;1;1;1", "reply_authors": "6;3;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 87.5, 45.14698218042929 ], "wc_strengths_avg": [ 70.75, 17.210098779495716 ], "wc_weaknesses_avg": [ 193.5, 115.30069384006325 ], "wc_questions_avg": [ 180.0, 95.54841704601914 ], "wc_limitations_avg": [ 13.0, 4.06201920231798 ], "wc_review_avg": [ 544.75, 244.6031224248783 ], "wc_reply_reviewers_avg": [ 227.5, 163.61616668288008 ], "wc_reply_authors_avg": [ 869.5, 890.025420985266 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 3.25, 1.6393596310755 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3013775681892005972&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "kuleuven.be;brookes.ac.uk;brookes.ac.uk;;;cs.kuleuven.be", "author_num": 6, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Katholieke Universiteit Leuven;Oxford Brookes University;KU Leuven", "aff_unique_dep": ";;", "aff_unique_url": "https://www.kuleuven.be;https://www.oxfordbrookes.ac.uk;https://www.kuleuven.be", "aff_unique_abbr": "KU Leuven;OBU;KU Leuven", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "Belgium;United Kingdom" }, { "title": "WorkArena++: Towards Compositional Planning and Reasoning-based Common Knowledge Work Tasks", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97713", "id": "PCjK8dqrWW", "proceeding": "", "pdf": "https://openreview.net/pdf?id=PCjK8dqrWW", "openreview": "https://openreview.net/forum?id=PCjK8dqrWW", "poster": "/media/PosterPDFs/NeurIPS%202024/97713.png?t=1731709297.9176207", "project": "", "author_site": "L\u00e9o Boisvert, Megh Thakkar, Maxime Gasse, Massimo Caccia, Thibault de Chezelles, Quentin Cappart, Nicolas Chapados, Alexandre Lacoste, Alexandre Drouin", "tldr": "", "abstract": "The ability of large language models (LLMs) to mimic human-like intelligence has led to a surge in LLM-based autonomous agents. \nThough recent LLMs seem capable of planning and reasoning given user instructions, their effectiveness in applying these capabilities for autonomous task solving remains underexplored. This is especially true in enterprise settings, where automated agents hold the promise of a high impact. To fill this gap, we propose WorkArena++, a novel benchmark consisting of 682 tasks corresponding to realistic workflows routinely performed by knowledge workers. WorkArena++ is designed to evaluate the planning, problem-solving, logical/arithmetic reasoning, retrieval, and contextual understanding abilities of web agents. Our empirical studies across state-of-the-art LLMs and vision-language models (VLMs), as well as human workers, reveal several challenges for such models to serve as useful assistants in the workplace. In addition to the benchmark, we provide a mechanism to effortlessly generate thousands of ground-truth observation/action traces, which can be used for fine-tuning existing models. Overall, we expect this work to serve as a useful resource to help the community progress towards capable autonomous agents. The benchmark can be found at https://github.com/ServiceNow/WorkArena.", "keywords": "Large language models;Web Agents;LLMs;VLMs;Autonomous agents", "primary_area": "", "supplementary_material": "/attachment/b12a7767bbc89b68d8816f792d0b03e7ade1b4b2.zip", "author": "L\u00e9o Boisvert;Megh Thakkar;Maxime Gasse;Massimo Caccia;Thibault Le Sellier de Chezelles;Quentin Cappart;Nicolas Chapados;Alexandre Lacoste;Alexandre Drouin", "authorids": "~L\u00e9o_Boisvert1;~Megh_Thakkar1;~Maxime_Gasse2;~Massimo_Caccia1;~Thibault_Le_Sellier_de_Chezelles1;~Quentin_Cappart1;~Nicolas_Chapados1;~Alexandre_Lacoste1;~Alexandre_Drouin2", "gender": "M;M;M;;M;M;M;M;M", "homepage": "https://leo-boisvert.com/;http://megh-thakkar.github.io;http://www.maximegasse.com/;;;https://qcappart.github.io/;;;https://alexdrouin.com", "dblp": ";92/6840;118/4730;43/6338.html;;164/5606;58/1013;59/6239.html;117/3861", "google_scholar": "2XdDpIgAAAAJ;;https://scholar.google.fr/citations?user=s7m9rikAAAAJ;WaE4GicAAAAJ;;QJQMYLsAAAAJ;QdnjDj8AAAAJ;;https://scholar.google.ca/citations?user=LR6aJcEAAAAJ", "orcid": ";;0000-0001-6982-062X;;;0000-0002-8742-0774;;;0000-0001-7718-0319", "linkedin": ";Megh-Thakkar;maxime-gasse-100a4a62/;;thibault-lsdc;;;;drouinalexandre/", "or_profile": "~L\u00e9o_Boisvert1;~Megh_Thakkar1;~Maxime_Gasse2;~Massimo_Caccia1;~Thibault_Le_Sellier_de_Chezelles1;~Quentin_Cappart1;~Nicolas_Chapados1;~Alexandre_Lacoste1;~Alexandre_Drouin2", "aff": "\u00c9cole Polytechnique de Montr\u00e9al, Universit\u00e9 de Montr\u00e9al;Universit\u00e9 de Montr\u00e9al;Montreal Institute for Learning Algorithms, University of Montreal, Universit\u00e9 de Montr\u00e9al;ServiceNow Inc;ServiceNow Inc;Polytechnique Montreal;ServiceNow Research;ServiceNow;ServiceNow Research ", "aff_domain": "polymtl.ca;umontreal.ca;mila.umontreal.ca;servicenow.com;servicenow.com;polymtl.ca;servicenow.com;servicenow.com;servicenow.com", "position": "PhD student;MS student;Researcher;Researcher;Intern;Assistant Professor;VP Research;Research Scientist;Research Scientist", "bibtex": "@inproceedings{\nboisvert2024workarena,\ntitle={WorkArena++: Towards Compositional Planning and Reasoning-based Common Knowledge Work Tasks},\nauthor={L{\\'e}o Boisvert and Megh Thakkar and Maxime Gasse and Massimo Caccia and Thibault Le Sellier de Chezelles and Quentin Cappart and Nicolas Chapados and Alexandre Lacoste and Alexandre Drouin},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=PCjK8dqrWW}\n}", "github": "", "reviewers": "VC5R;ysFs;8Prt", "pdf_size": 24818093, "rating": "5;7;8", "confidence": "3;3;4", "wc_summary_and_contributions": "71;144;99", "wc_strengths": "73;77;45", "wc_improvement": "416;21;39", "wc_limitations": "44;33;8", "wc_correctness": "29;38;10", "wc_clarity": "10;46;8", "wc_relation_to_prior_work": "15;45;64", "wc_documentation": "13;23;5", "wc_additional_feedback": "1;1;1", "wc_review": "672;428;279", "wc_reply_reviewers": "24;4;24", "wc_reply_authors": "35;0;24", "reply_reviewers": "1;1;1", "reply_authors": "5;2;3", "rating_avg": [ 6.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 104.66666666666667, 30.07028803025043 ], "wc_strengths_avg": [ 65.0, 14.236104336041748 ], "wc_improvement_avg": [ 158.66666666666666, 182.1104670858384 ], "wc_limitations_avg": [ 28.333333333333332, 15.062831370260005 ], "wc_correctness_avg": [ 25.666666666666668, 11.671427600007732 ], "wc_clarity_avg": [ 21.333333333333332, 17.46106780494506 ], "wc_relation_to_prior_work_avg": [ 41.333333333333336, 20.171487027209693 ], "wc_documentation_avg": [ 13.666666666666666, 7.363574011458175 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 459.6666666666667, 161.99657060842026 ], "wc_reply_reviewers_avg": [ 17.333333333333332, 9.428090415820632 ], "wc_reply_authors_avg": [ 19.666666666666668, 14.613540144521982 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.3333333333333335, 1.247219128924647 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.7559289460184545, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2495825256396927444&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "polymtl.ca;umontreal.ca;mila.umontreal.ca;servicenow.com;servicenow.com;polymtl.ca;servicenow.com;servicenow.com;servicenow.com", "author_num": 9, "aff_unique_index": "0;1;2;3;3;4;3;3;3", "aff_unique_norm": "\u00c9cole Polytechnique de Montr\u00e9al;Universit\u00e9 de Montr\u00e9al;University of Montreal;ServiceNow;Polytechnique Montreal", "aff_unique_dep": ";;Montreal Institute for Learning Algorithms;;", "aff_unique_url": "https://www.polymtl.ca;https://www.umontreal.ca;https://www.mila.quebec;https://www.servicenow.com;https://www.polymtl.ca", "aff_unique_abbr": "Polytechnique Montr\u00e9al;UdeM;MILA;ServiceNow;PolyMTL", "aff_campus_unique_index": "0;2;2", "aff_campus_unique": "Montr\u00e9al;;Montreal", "aff_country_unique_index": "0;0;0;1;1;0;1;1;1", "aff_country_unique": "Canada;United States" }, { "title": "SDP4Bit: Toward 4-bit Communication Quantization in Sharded Data Parallelism for LLM Training", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95323", "id": "PEEqnXlSCk", "proceeding": "", "pdf": "https://openreview.net/pdf?id=PEEqnXlSCk", "openreview": "https://openreview.net/forum?id=PEEqnXlSCk", "poster": "/media/PosterPDFs/NeurIPS%202024/95323.png?t=1731747814.0461655", "project": "", "author_site": "Jinda Jia, Cong Xie, Hanlin Lu, Daoce Wang, Hao Feng, Chengming Zhang, Baixi Sun, Haibin Lin, Zhi Zhang, Xin Liu, Dingwen Tao", "tldr": "", "abstract": "Recent years have witnessed a clear trend towards language models with an ever-increasing number of parameters, as well as the growing training overhead and memory usage. Distributed training, particularly through Sharded Data Parallelism (ShardedDP) which partitions optimizer states among workers, has emerged as a crucial technique to mitigate training time and memory usage. Yet, a major challenge in the scalability of ShardedDP is the intensive communication of weights and gradients. While compression techniques can alleviate this issue, they often result in worse accuracy. Driven by this limitation, we propose SDP4Bit (Toward 4Bit Communication Quantization in Sharded Data Parallelism for LLM Training), which effectively reduces the communication of weights and gradients to nearly 4 bits via two novel techniques: quantization on weight differences, and two-level gradient smooth quantization. Furthermore, SDP4Bit presents an algorithm-system co-design with runtime optimization to minimize the computation overhead of compression. Additional to the theoretical guarantees of convergence, we empirically evaluate the accuracy of SDP4Bit on the pre-training of GPT models with up to 6.7 billion parameters, and the results demonstrate a negligible impact on training loss. Furthermore, speed experiments show that SDP4Bit achieves up to 4.08\u00d7 speedup in end-to-end throughput on a scale of 128 GPUs.", "keywords": "LLM Training;Quantization;Communication Reduction;Collective Communication", "primary_area": "infrastructure", "supplementary_material": "/attachment/e34c92b384e9877897ac6cc5e86a44007cc1998f.zip", "author": "Jinda Jia;Cong Xie;Hanlin Lu;Daoce Wang;Hao Feng;Chengming Zhang;Baixi Sun;Haibin Lin;Zhi Zhang;Xin Liu;Dingwen Tao", "authorids": "~Jinda_Jia2;~Cong_Xie1;~Hanlin_Lu1;~Daoce_Wang1;~Hao_Feng7;~Chengming_Zhang1;~Baixi_Sun1;~Haibin_Lin1;~Zhi_Zhang4;~Xin_Liu51;~Dingwen_Tao1", "gender": "M;M;M;M;M;M;M;;M;M;M", "homepage": "https://jindajia.github.io/;https://congxie1108.github.io/;;;;https://chengmingzh8.github.io/;;;https://zhreshold.github.io;;https://www.dingwentao.com/", "dblp": ";;31/7172;223/2198.html;;78/5836-6;;;;;162/0802.html", "google_scholar": "6lAxnDwAAAAJ;pIPJUJMAAAAJ;https://scholar.google.at/citations?user=UbWokRoAAAAJ;;;F4TtJZEAAAAJ;CGs05FoAAAAJ;;nZr0oXQAAAAJ;ZMfk2F8AAAAJ;Ppjzn_EAAAAJ", "orcid": "0009-0008-5283-5241;;;0000-0002-4444-3634;0009-0003-3720-0444;0000-0003-3008-9133;0000-0001-9807-7978;;0000-0003-0249-1678;;0000-0001-5422-4497", "linkedin": "jindajia/;;;;;chengming-zhang-71a3b1120/;baixi-sun-525501288/;;;;dingwentao/", "or_profile": "~Jinda_Jia2;~Cong_Xie1;~Hanlin_Lu1;~Daoce_Wang1;~Hao_Feng7;~Chengming_Zhang1;~Baixi_Sun1;~Haibin_Lin1;~Zhi_Zhang4;~Xin_Liu51;~Dingwen_Tao1", "aff": "Indiana University;ByteDance Inc.;ByteDance;Indiana University at Bloomington;Indiana University at Bloomington;Indiana University;Indiana University;;ByteDance Inc.;ByteDance Inc.;Indiana University", "aff_domain": "iu.edu;bytedance.com;bytedance.com;bloomington.iu.edu;indiana.edu;iu.edu;iu.edu;;bytedance.com;bytedance.com;iu.edu", "position": "PhD student;Researcher;Researcher;PhD student;PhD student;PhD student;PhD student;;Researcher;Researcher;Associate Professor", "bibtex": "@inproceedings{\njia2024sdpbit,\ntitle={{SDP}4Bit: Toward 4-bit Communication Quantization in Sharded Data Parallelism for {LLM} Training},\nauthor={Jinda Jia and Cong Xie and Hanlin Lu and Daoce Wang and Hao Feng and Chengming Zhang and Baixi Sun and Haibin Lin and Zhi Zhang and Xin Liu and Dingwen Tao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=PEEqnXlSCk}\n}", "github": "", "reviewers": "6CdP;MiBn;Ah9Z;4e2h", "pdf_size": 636948, "rating": "5;6;6;7", "confidence": "4;4;4;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "4;3;3;3", "wc_summary": "59;67;92;52", "wc_strengths": "42;46;59;68", "wc_weaknesses": "113;190;134;82", "wc_questions": "74;6;98;228", "wc_limitations": "1;1;1;12", "wc_review": "289;310;384;442", "wc_reply_reviewers": "18;0;19;14", "wc_reply_authors": "0;0;0;18", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 67.5, 15.107944929738128 ], "wc_strengths_avg": [ 53.75, 10.353139620424328 ], "wc_weaknesses_avg": [ 129.75, 39.39781085288877 ], "wc_questions_avg": [ 101.5, 80.45340266265933 ], "wc_limitations_avg": [ 3.75, 4.763139720814412 ], "wc_review_avg": [ 356.25, 60.796278669010654 ], "wc_reply_reviewers_avg": [ 12.75, 7.595228765481656 ], "wc_reply_authors_avg": [ 4.5, 7.794228634059948 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2509289975298603453&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "iu.edu;bytedance.com;bytedance.com;bloomington.iu.edu;indiana.edu;iu.edu;iu.edu;;bytedance.com;bytedance.com;iu.edu", "author_num": 11, "aff_unique_index": "0;1;1;0;0;0;0;1;1;0", "aff_unique_norm": "Indiana University;ByteDance", "aff_unique_dep": ";", "aff_unique_url": "https://www.indiana.edu;https://www.bytedance.com", "aff_unique_abbr": "IU;ByteDance", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Bloomington", "aff_country_unique_index": "0;1;1;0;0;0;0;1;1;0", "aff_country_unique": "United States;China" }, { "title": "CARE: a Benchmark Suite for the Classification and Retrieval of Enzymes", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97712", "id": "PFwlw9bnAr", "proceeding": "", "pdf": "https://openreview.net/pdf?id=PFwlw9bnAr", "openreview": "https://openreview.net/forum?id=PFwlw9bnAr", "poster": "/media/PosterPDFs/NeurIPS%202024/97712.png?t=1731716911.2204814", "project": "", "author_site": "Jason Yang, Ariane Mora, Shengchao Liu, Bruce Wittmann, Animashree Anandkumar, Frances Arnold, Yisong Yue", "tldr": "", "abstract": "Enzymes are important proteins that catalyze chemical reactions. In recent years, machine learning methods have emerged to predict enzyme function from sequence; however, there are no standardized benchmarks to evaluate these methods. We introduce CARE, a benchmark and dataset suite for the Classification And Retrieval of Enzymes (CARE). CARE centers on two tasks: (1) classification of a protein sequence by its enzyme commission (EC) number and (2) retrieval of an EC number given a chemical reaction. For each task, we design train-test splits to evaluate different kinds of out-of-distribution generalization that are relevant to real use cases. For the classification task, we provide baselines for state-of-the-art methods. Because the retrieval task has not been previously formalized, we propose a method called Contrastive Reaction-EnzymE Pretraining (CREEP) as one of the first baselines for this task and compare it to the recent method, CLIPZyme. CARE is available at https://github.com/jsunn-y/CARE/.", "keywords": "enzyme;protein;enzyme annotation;enzyme classification;enzyme function;enzyme engineering;machine learning;contrastive learning", "primary_area": "", "supplementary_material": "", "author": "Jason Yang;Ariane Mora;Shengchao Liu;Bruce James Wittmann;Anima Anandkumar;Frances H. Arnold;Yisong Yue", "authorids": "~Jason_Yang3;~Ariane_Mora1;~Shengchao_Liu1;~Bruce_James_Wittmann1;~Anima_Anandkumar1;~Frances_H._Arnold1;~Yisong_Yue1", "gender": ";F;M;M;;M;F", "homepage": "https://jsunn-y.github.io;https://github.com/ArianeMora;https://chao1224.github.io/;;http://fhalab.caltech.edu;http://www.yisongyue.com;http://tensorlab.cms.caltech.edu/users/anima/", "dblp": ";;;;;28/1244;", "google_scholar": ";H_kqdb0AAAAJ;F1ws3XUAAAAJ;;;tEk4qo8AAAAJ;bEcLezcAAAAJ", "orcid": ";;0000-0003-2030-2367;0000-0001-8144-9157;;0000-0001-9127-1989;", "linkedin": ";;;bwittmann/;;yisongyue/;anima-anandkumar-35171b1/", "or_profile": "~Jason_Yang3;~Ariane_Mora1;~Shengchao_Liu1;~Bruce_James_Wittmann1;~Frances_H._Arnold1;~Yisong_Yue1;~anima_anandkumar1", "aff": "California Institute of Technology;California Institute of Technology;University of California, Berkeley;Microsoft;California Institute of Technology;California Institute of Technology;California Institute of Technology", "aff_domain": "caltech.edu;caltech.edu;berkeley.edu;microsoft.com;caltech.edu;caltech.edu;caltech.edu", "position": "PhD student;Postdoc;Postdoc;Researcher;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nyang2024care,\ntitle={{CARE}: a Benchmark Suite for the Classification and Retrieval of Enzymes},\nauthor={Jason Yang and Ariane Mora and Shengchao Liu and Bruce James Wittmann and Anima Anandkumar and Frances H. Arnold and Yisong Yue},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=PFwlw9bnAr}\n}", "github": "", "reviewers": "UrNk;QaD7;hc5Q", "pdf_size": 4083119, "rating": "5;7;9", "confidence": "5;4;3", "wc_summary_and_contributions": "41;48;105", "wc_strengths": "39;26;96", "wc_improvement": "19;83;45", "wc_limitations": "54;36;7", "wc_correctness": "11;19;1", "wc_clarity": "5;13;1", "wc_relation_to_prior_work": "4;7;1", "wc_documentation": "15;37;8", "wc_additional_feedback": "1;1;1", "wc_review": "189;270;265", "wc_reply_reviewers": "0;103;14", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;1;1", "reply_authors": "2;3;1", "rating_avg": [ 7.0, 1.632993161855452 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "wc_summary_and_contributions_avg": [ 64.66666666666667, 28.662790435607548 ], "wc_strengths_avg": [ 53.666666666666664, 30.40102337458761 ], "wc_improvement_avg": [ 49.0, 26.280537792569366 ], "wc_limitations_avg": [ 32.333333333333336, 19.362047641943477 ], "wc_correctness_avg": [ 10.333333333333334, 7.363574011458175 ], "wc_clarity_avg": [ 6.333333333333333, 4.988876515698588 ], "wc_relation_to_prior_work_avg": [ 4.0, 2.449489742783178 ], "wc_documentation_avg": [ 20.0, 12.355835328567093 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 241.33333333333334, 37.06151043273271 ], "wc_reply_reviewers_avg": [ 39.0, 45.614325235244536 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14751236476976918674&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "caltech.edu;caltech.edu;berkeley.edu;microsoft.com;caltech.edu;caltech.edu;caltech.edu", "author_num": 7, "aff_unique_index": "0;0;1;2;0;0;0", "aff_unique_norm": "California Institute of Technology;University of California, Berkeley;Microsoft", "aff_unique_dep": ";;Microsoft Corporation", "aff_unique_url": "https://www.caltech.edu;https://www.berkeley.edu;https://www.microsoft.com", "aff_unique_abbr": "Caltech;UC Berkeley;Microsoft", "aff_campus_unique_index": "0;0;1;0;0;0", "aff_campus_unique": "Pasadena;Berkeley;", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Thompson Sampling For Combinatorial Bandits: Polynomial Regret and Mismatched Sampling Paradox", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95322", "id": "PGOuBHYdbr", "proceeding": "", "pdf": "https://openreview.net/pdf?id=PGOuBHYdbr", "openreview": "https://openreview.net/forum?id=PGOuBHYdbr", "poster": "/media/PosterPDFs/NeurIPS%202024/95322.png?t=1733885246.0054488", "project": "", "author_site": "Raymond Zhang, Richard Combes", "tldr": "", "abstract": "We consider Thompson Sampling (TS) for linear combinatorial semi-bandits and subgaussian rewards. We propose the first known TS whose finite-time regret does not scale exponentially with the dimension of the problem. We further show the mismatched sampling paradox: A learner who knows the rewards distributions and samples from the correct posterior distribution can perform exponentially worse than a learner who does not know the rewards and simply samples from a well-chosen Gaussian posterior. The code used to generate the experiments is available at https://github.com/RaymZhang/CTS-Mismatched-Paradox", "keywords": "Combinatorial bandits;Thomspon Sampling", "primary_area": "bandits", "supplementary_material": "", "author": "Raymond Zhang;Richard Combes", "authorids": "~Raymond_Zhang1;~Richard_Combes1", "gender": "M;M", "homepage": "https://www.raymzhang.com/;", "dblp": ";47/8356", "google_scholar": "SnNQLrQAAAAJ;https://scholar.google.fr/citations?user=ojq6NbAAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Raymond_Zhang1;~Richard_Combes1", "aff": "CentraleSupelec;CentraleSupelec", "aff_domain": "centralesupelec.fr;centralesupelec.fr", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nzhang2024thompson,\ntitle={Thompson Sampling For Combinatorial Bandits: Polynomial Regret and Mismatched Sampling Paradox},\nauthor={Raymond Zhang and Richard Combes},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=PGOuBHYdbr}\n}", "github": "", "reviewers": "PpKq;4UPn;npKN;yJpu", "pdf_size": 782320, "rating": "5;6;6;7", "confidence": "3;3;3;5", "soundness": "3;3;4;3", "novelty": "3;3;2;3", "presentation": "3;3;3;3", "wc_summary": "29;139;51;33", "wc_strengths": "44;140;29;55", "wc_weaknesses": "167;189;34;158", "wc_questions": "6;293;45;58", "wc_limitations": "2;1;8;28", "wc_review": "248;762;167;332", "wc_reply_reviewers": "13;73;10;20", "wc_reply_authors": "0;49;0;0", "reply_reviewers": "1;2;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 63.0, 44.654227123532216 ], "wc_strengths_avg": [ 67.0, 43.14510400960925 ], "wc_weaknesses_avg": [ 137.0, 60.52685354452187 ], "wc_questions_avg": [ 100.5, 112.77521890912028 ], "wc_limitations_avg": [ 9.75, 10.871407452579449 ], "wc_review_avg": [ 377.25, 229.6686471854615 ], "wc_reply_reviewers_avg": [ 29.0, 25.66125484071268 ], "wc_reply_authors_avg": [ 12.25, 21.21762239271875 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:BZNSiLDvrs0J:scholar.google.com/&scioq=Thompson+Sampling+For+Combinatorial+Bandits:+Polynomial+Regret+and+Mismatched+Sampling+Paradox&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "centralesupelec.fr;centralesupelec.fr", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "CentraleSup\u00e9lec", "aff_unique_dep": "", "aff_unique_url": "https://www.centralesupelec.fr", "aff_unique_abbr": "CS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "France" }, { "title": "Scaling Laws in Linear Regression: Compute, Parameters, and Data", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95321", "id": "PH7sdEanXP", "proceeding": "", "pdf": "https://openreview.net/pdf?id=PH7sdEanXP", "openreview": "https://openreview.net/forum?id=PH7sdEanXP", "poster": "", "project": "", "author_site": "Licong Lin, Jingfeng Wu, Sham Kakade, Peter Bartlett, Jason Lee", "tldr": "", "abstract": "Empirically, large-scale deep learning models often satisfy a neural scaling law: the test error of the trained model improves polynomially as the model size and data size grow. However, conventional wisdom suggests the test error consists of approximation, bias, and variance errors, where the variance error increases with model size. This disagrees with the general form of neural scaling laws, which predict that increasing model size monotonically improves performance.\n\nWe study the theory of scaling laws in an infinite dimensional linear regression setup. Specifically, we consider a model with $M$ parameters as a linear function of sketched covariates. The model is trained by one-pass stochastic gradient descent (SGD) using $N$ data. Assuming the optimal parameter satisfies a Gaussian prior and the data covariance matrix has a power-law spectrum of degree $a>1$, we show that the reducible part of the test error is $\\Theta(M^{-(a-1)} + N^{-(a-1)/a})$. The variance error, which increases with $M$, is dominated by the other errors due to the implicit regularization of SGD, thus disappearing from the bound. Our theory is consistent with the empirical neural scaling laws and verified by numerical simulation.", "keywords": "learning theory;scaling law;deep learning;SGD;optimization", "primary_area": "learning_theory", "supplementary_material": "", "author": "Licong Lin;Jingfeng Wu;Sham M. Kakade;Peter Bartlett;Jason D. Lee", "authorids": "~Licong_Lin2;~Jingfeng_Wu1;~Sham_M._Kakade1;~Peter_Bartlett1;~Jason_D._Lee1", "gender": "M;M;M;M;M", "homepage": "https://statistics.berkeley.edu/people/licong-lin;https://uuujf.github.io;https://shamulent.github.io;https://www.stat.berkeley.edu/~bartlett/;https://jasondlee88.github.io/", "dblp": ";;s/SMKakade;https://dblp.org/pers/hd/b/Bartlett:Peter_L=;88/3262", "google_scholar": ";z-KILD8AAAAJ;https://scholar.google.com.tw/citations?user=wb-DKCIAAAAJ;yQNhFGUAAAAJ;GR_DsT0AAAAJ", "orcid": ";0009-0009-3414-4487;;;", "linkedin": ";jingfeng-wu-79205b184/;;;", "or_profile": "~Licong_Lin2;~Jingfeng_Wu1;~Sham_M._Kakade1;~Peter_Bartlett1;~Jason_D._Lee1", "aff": "University of California, Berkeley;University of California, Berkeley;Harvard University;University of California, Berkeley;Princeton University", "aff_domain": "berkeley.edu;berkeley.edu;harvard.edu;berkeley;princeton.edu", "position": "PhD student;Postdoc;Full Professor;Professor;Assistant Professor", "bibtex": "@inproceedings{\nlin2024scaling,\ntitle={Scaling Laws in Linear Regression: Compute, Parameters, and Data},\nauthor={Licong Lin and Jingfeng Wu and Sham M. Kakade and Peter Bartlett and Jason D. Lee},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=PH7sdEanXP}\n}", "github": "", "reviewers": "9pNW;v1rx;iyfq;8LLD;zAqY", "pdf_size": 793667, "rating": "5;5;6;8;8", "confidence": "3;3;5;2;4", "soundness": "3;3;4;4;4", "novelty": "3;2;1;3;4", "presentation": "3;3;4;4;4", "wc_summary": "142;54;19;84;122", "wc_strengths": "26;22;70;88;82", "wc_weaknesses": "96;477;53;77;3", "wc_questions": "33;194;2;107;77", "wc_limitations": "21;19;1;2;1", "wc_review": "318;766;145;358;285", "wc_reply_reviewers": "11;273;138;11;7", "wc_reply_authors": "0;211;255;0;0", "reply_reviewers": "1;2;1;1;1", "reply_authors": "1;2;2;1;1", "rating_avg": [ 6.4, 1.3564659966250536 ], "confidence_avg": [ 3.4, 1.019803902718557 ], "soundness_avg": [ 3.6, 0.4898979485566356 ], "novelty_avg": [ 2.6, 1.019803902718557 ], "presentation_avg": [ 3.6, 0.4898979485566356 ], "wc_summary_avg": [ 84.2, 44.570842486989186 ], "wc_strengths_avg": [ 57.6, 28.06848766855813 ], "wc_weaknesses_avg": [ 141.2, 170.76580453943347 ], "wc_questions_avg": [ 82.6, 66.32224362911738 ], "wc_limitations_avg": [ 8.8, 9.173875952943773 ], "wc_review_avg": [ 374.4, 208.52683280575667 ], "wc_reply_reviewers_avg": [ 88.0, 105.01809367913702 ], "wc_reply_authors_avg": [ 93.2, 114.99113009271629 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.11566298639324801, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16043033752565483861&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "berkeley.edu;berkeley.edu;harvard.edu;berkeley;princeton.edu", "author_num": 5, "aff_unique_index": "0;0;1;0;2", "aff_unique_norm": "University of California, Berkeley;Harvard University;Princeton University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.berkeley.edu;https://www.harvard.edu;https://www.princeton.edu", "aff_unique_abbr": "UC Berkeley;Harvard;Princeton", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Towards Efficient and Optimal Covariance-Adaptive Algorithms for Combinatorial Semi-Bandits", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95320", "id": "PI0CDY6nmo", "proceeding": "", "pdf": "https://openreview.net/pdf?id=PI0CDY6nmo", "openreview": "https://openreview.net/forum?id=PI0CDY6nmo", "poster": "/media/PosterPDFs/NeurIPS%202024/95320.png?t=1733578419.6925075", "project": "", "author_site": "Julien Zhou, Pierre Gaillard, Thibaud Rahier, Houssam Zenati, Julyan Arbel", "tldr": "", "abstract": "We address the problem of stochastic combinatorial semi-bandits, where a player selects among $P$ actions from the power set of a set containing $d$ base items. Adaptivity to the problem's structure is essential in order to obtain optimal regret upper bounds. As estimating the coefficients of a covariance matrix can be manageable in practice, leveraging them should improve the regret. We design ``optimistic'' covariance-adaptive algorithms relying on online estimations of the covariance structure, called OLS-UCB-C and COS-V (only the variances for the latter). They both yields improved gap-free regret. Although COS-V can be slightly suboptimal, it improves on computational complexity by taking inspiration from Thompson Sampling approaches. It is the first sampling-based algorithm satisfying a $\\sqrt{T}$ gap-free regret (up to poly-logs). We also show that in some cases, our approach efficiently leverages the semi-bandit feedback and outperforms bandit feedback approaches, not only in exponential regimes where $P\\gg d$ but also when $P\\leq d$, which is not covered by existing analyses.", "keywords": "Combinatorial Semi-Bandit;Upper Confidence Bound;Thompson Sampling;Confidence Ellipsoid", "primary_area": "bandits", "supplementary_material": "", "author": "Julien Zhou;Pierre Gaillard;Thibaud Rahier;Houssam Zenati;Julyan Arbel", "authorids": "~Julien_Zhou1;~Pierre_Gaillard1;~Thibaud_Rahier1;~Houssam_Zenati1;~Julyan_Arbel1", "gender": "M;M;M;M;M", "homepage": "https://jlnzhou.github.io/;http://pierre.gaillard.me;;https://houssamzenati.github.io/;http://www.julyanarbel.com/", "dblp": "371/2724;25/2131;;;172/8198", "google_scholar": ";https://scholar.google.fr/citations?user=-CPaGaEAAAAJ;;LBqNPp4AAAAJ;Q7P4K3wAAAAJ", "orcid": ";0000-0001-6777-6127;;;0000-0002-2525-4416", "linkedin": ";;;houssam-zenati/;julyanarbel/", "or_profile": "~Julien_Zhou1;~Pierre_Gaillard1;~Thibaud_Rahier1;~Houssam_Zenati1;~Julyan_Arbel1", "aff": "Criteo;INRIA;;INRIA;Inria", "aff_domain": "criteo.com;inria.fr;;inria.fr;inria.fr", "position": "PhD student;Researcher;;Postdoc;Researcher", "bibtex": "@inproceedings{\nzhou2024towards,\ntitle={Towards Efficient and Optimal Covariance-Adaptive Algorithms for Combinatorial Semi-Bandits},\nauthor={Julien Zhou and Pierre Gaillard and Thibaud Rahier and Houssam Zenati and Julyan Arbel},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=PI0CDY6nmo}\n}", "github": "", "reviewers": "RAfE;47Sr;zmBr", "pdf_size": 1205450, "rating": "6;6;7", "confidence": "5;3;2", "soundness": "4;3;4", "novelty": "3;3;4", "presentation": "3;3;4", "wc_summary": "86;43;35", "wc_strengths": "43;75;29", "wc_weaknesses": "35;58;171", "wc_questions": "25;239;88", "wc_limitations": "14;18;10", "wc_review": "203;433;333", "wc_reply_reviewers": "19;139;12", "wc_reply_authors": "0;592;0", "reply_reviewers": "1;2;1", "reply_authors": "1;3;1", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 1.247219128924647 ], "soundness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 54.666666666666664, 22.395436042987765 ], "wc_strengths_avg": [ 49.0, 19.252705437591537 ], "wc_weaknesses_avg": [ 88.0, 59.43624034767565 ], "wc_questions_avg": [ 117.33333333333333, 89.79359046663014 ], "wc_limitations_avg": [ 14.0, 3.265986323710904 ], "wc_review_avg": [ 323.0, 94.16297927883689 ], "wc_reply_reviewers_avg": [ 56.666666666666664, 58.2885542414251 ], "wc_reply_authors_avg": [ 197.33333333333334, 279.07147630829076 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.7559289460184545, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1929869729860661606&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 7, "email": "criteo.com;inria.fr;;inria.fr;inria.fr", "author_num": 5, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Criteo;INRIA", "aff_unique_dep": ";", "aff_unique_url": "https://www.criteo.com;https://www.inria.fr", "aff_unique_abbr": "Criteo;INRIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "France" }, { "title": "Transfer Learning for Latent Variable Network Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95319", "id": "PK8xOCBQRO", "proceeding": "", "pdf": "https://openreview.net/pdf?id=PK8xOCBQRO", "openreview": "https://openreview.net/forum?id=PK8xOCBQRO", "poster": "/media/PosterPDFs/NeurIPS%202024/95319.png?t=1731368845.885558", "project": "", "author_site": "Akhil Jalan, Arya Mazumdar, Soumendu Sundar Mukherjee, Purnamrita Sarkar", "tldr": "", "abstract": "We study transfer learning for estimation in latent variable network models. In our setting, the conditional edge probability matrices given the latent variables are represented by $P$ for the source and $Q$ for the target. We wish to estimate $Q$ given two kinds of data: (1) edge data from a subgraph induced by an $o(1)$ fraction of the nodes of $Q$, and (2) edge data from all of $P$. If the source $P$ has no relation to the target $Q$, the estimation error must be $\\Omega(1)$. However, we show that if the latent variables are shared, then vanishing error is possible. We give an efficient algorithm that utilizes the ordering of a suitably defined graph distance. Our algorithm achieves $o(1)$ error and does not assume a parametric form on the source or target networks. Next, for the specific case of Stochastic Block Models we prove a minimax lower bound and show that a simple algorithm achieves this rate. Finally, we empirically demonstrate our algorithm's use on real-world and simulated graph transfer problems.", "keywords": "Transfer learning;network estimation;latent variable models", "primary_area": "learning_theory", "supplementary_material": "/attachment/b07fe419a18eb41022ca16921f5117006ceb346f.zip", "author": "Akhil Jalan;Arya Mazumdar;Soumendu Sundar Mukherjee;Purnamrita Sarkar", "authorids": "~Akhil_Jalan1;~Arya_Mazumdar1;~Soumendu_Sundar_Mukherjee2;~Purnamrita_Sarkar1", "gender": "M;M;F;M", "homepage": "https://akhiljalan.github.io/;http://www.cs.umass.edu/~arya;https://psarkar.github.io/;https://soumendu041.gitlab.io/", "dblp": ";77/6050;25/6929;", "google_scholar": ";https://scholar.google.com.tw/citations?user=9tjQU1EAAAAJ;KfT3_0AAAAAJ;DctLHfsAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Akhil_Jalan1;~Arya_Mazumdar1;~Purnamrita_Sarkar1;~Soumendu_Mukherjee1", "aff": "University of Texas at Austin;University of California, San Diego;University of Texas, Austin;Indian Statistical Institute", "aff_domain": "utexas.edu;ucsd.edu;utexas.edu;isical.ac.in", "position": "PhD student;Associate Professor;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\njalan2024transfer,\ntitle={Transfer Learning for Latent Variable Network Models},\nauthor={Akhil Jalan and Arya Mazumdar and Soumendu Sundar Mukherjee and Purnamrita Sarkar},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=PK8xOCBQRO}\n}", "github": "", "reviewers": "VhmF;BaBd;ajJ3;TFsn", "pdf_size": 21346920, "rating": "6;7;7;8", "confidence": "1;2;3;5", "soundness": "2;3;3;4", "novelty": "2;3;3;4", "presentation": "3;3;3;4", "wc_summary": "77;59;153;363", "wc_strengths": "57;20;77;175", "wc_weaknesses": "58;40;195;134", "wc_questions": "79;21;233;80", "wc_limitations": "5;59;21;25", "wc_review": "276;199;679;777", "wc_reply_reviewers": "27;23;39;51", "wc_reply_authors": "18;10;30;28", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 2.75, 1.479019945774904 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 163.0, 120.7393887677091 ], "wc_strengths_avg": [ 82.25, 57.32091677564134 ], "wc_weaknesses_avg": [ 106.75, 61.973280532823175 ], "wc_questions_avg": [ 103.25, 78.62688789466361 ], "wc_limitations_avg": [ 27.5, 19.665960439297137 ], "wc_review_avg": [ 482.75, 249.1770204091862 ], "wc_reply_reviewers_avg": [ 35.0, 10.954451150103322 ], "wc_reply_authors_avg": [ 21.5, 8.04673846971554 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.9561828874675149, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16774108543582515416&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "utexas.edu;ucsd.edu;utexas.edu;isical.ac.in", "author_num": 4, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "University of Texas at Austin;University of California, San Diego;Indian Statistical Institute", "aff_unique_dep": ";;", "aff_unique_url": "https://www.utexas.edu;https://www.ucsd.edu;https://www.isical.ac.in", "aff_unique_abbr": "UT Austin;UCSD;ISI", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Austin;San Diego;", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "United States;India" }, { "id": "PKEmH9ZJfw", "title": "Implicit Causal Representation Learning via Switchable Mechanisms", "track": "main", "status": "Reject", "tldr": "", "abstract": "Learning causal representations from observational and interventional data in the absence of known ground-truth graph structures necessitates implicit latent causal representation learning. Implicit learning of causal mechanisms typically involves two categories of interventional data: hard and soft interventions. In real-world scenarios, soft interventions are often more realistic than hard interventions, as the latter require fully controlled environments. Unlike hard interventions, which directly force changes in a causal variable, soft interventions exert influence indirectly by affecting the causal mechanism. However, the subtlety of soft interventions impose several challenges for learning causal models. One challenge is that soft intervention's effects are ambiguous, since parental relations remain intact. In this paper, we tackle the challenges of learning causal models using soft interventions while retaining implicit modeling. Our approach models the effects of soft interventions by employing a *causal mechanism switch variable* designed to toggle between different causal mechanisms. In our experiments, we consistently observe improved learning of identifiable, causal representations, compared to baseline approaches.", "keywords": "Causal Representation Learning;Implicit Model;Soft Intervention;Variational Inference", "primary_area": "causal_inference", "supplementary_material": "/attachment/e9373433e03f6961978fbaf1333db02bbefe80ad.zip", "author": "Shayan Shirahmad Gale Bagi;Zahra Gharaee;Oliver Schulte;Mark Crowley", "authorids": "~Shayan_Shirahmad_Gale_Bagi1;~Zahra_Gharaee1;~Oliver_Schulte1;~Mark_Crowley1", "gender": "M;F;M;M", "homepage": "https://uwaterloo.ca/scholar/sshirahm;https://zahrag.github.io/;http://www.cs.sfu.ca/~oschulte/;http://markcrowley.ca", "dblp": ";174/4584;s/OliverSchulte;14/5500-1", "google_scholar": "3-YdeMMAAAAJ;https://scholar.google.pl/citations?user=nWe8d1MAAAAJ;;https://scholar.google.ca/citations?user=eL_y80EAAAAJ", "orcid": ";0000-0003-0140-0025;;0000-0003-3921-4762", "linkedin": "shayan-shirahmad-gale-bagi-819571149;zahragh/;;markanthonycrowley/", "or_profile": "~Shayan_Shirahmad_Gale_Bagi1;~Zahra_Gharaee1;~Oliver_Schulte1;~Mark_Crowley1", "aff": "University of Waterloo;University of Waterloo;Simon Fraser University;University of Waterloo", "aff_domain": "uwaterloo.ca;uwaterloo.ca;sfu.ca;uwaterloo.ca", "position": "PhD student;Postdoc;Full Professor;Associate Professor", "bibtex": "@misc{\nanonymous2024implicit,\ntitle={Implicit Causal Representation Learning via Switchable Mechanisms},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=PKEmH9ZJfw}\n}", "github": "", "project": "", "reviewers": "pbX5;81HE;Qdqq;CSBJ", "site": "https://openreview.net/forum?id=PKEmH9ZJfw", "pdf_size": 4847128, "rating": "3;3;4;5", "confidence": "3;3;5;3", "soundness": "2;2;2;3", "novelty": "2;2;2;2", "presentation": "2;1;2;3", "wc_summary": "95;150;75;105", "wc_strengths": "59;29;37;173", "wc_weaknesses": "373;105;141;77", "wc_questions": "356;136;8;52", "wc_limitations": "61;33;1;1", "wc_review": "944;453;262;408", "wc_reply_reviewers": "353;416;172;97", "wc_reply_authors": "398;498;333;295", "reply_reviewers": "1;2;2;1", "reply_authors": "2;2;2;2", "rating_avg": [ 3.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 106.25, 27.471576219794887 ], "wc_strengths_avg": [ 74.5, 57.92020372892347 ], "wc_weaknesses_avg": [ 174.0, 117.11105840184351 ], "wc_questions_avg": [ 138.0, 134.0 ], "wc_limitations_avg": [ 24.0, 25.03996805109783 ], "wc_review_avg": [ 516.75, 256.5788134277653 ], "wc_reply_reviewers_avg": [ 259.5, 129.7083266409678 ], "wc_reply_authors_avg": [ 381.0, 76.93828695779494 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:LI1XpCPcrMsJ:scholar.google.com/&scioq=Implicit+Causal+Representation+Learning+via+Switchable+Mechanisms&hl=en&as_sdt=0,11", "gs_version_total": 2, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Waterloo;Simon Fraser University", "aff_unique_dep": ";", "aff_unique_url": "https://uwaterloo.ca;https://www.sfu.ca", "aff_unique_abbr": "UW;SFU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Canada" }, { "title": "Relationship Prompt Learning is Enough for Open-Vocabulary Semantic Segmentation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95318", "id": "PKcCHncbzg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=PKcCHncbzg", "openreview": "https://openreview.net/forum?id=PKcCHncbzg", "poster": "/media/PosterPDFs/NeurIPS%202024/95318.png?t=1731657938.6714563", "project": "", "author_site": "Jiahao Li, Yang Lu, Yuan Xie, Yanyun Qu", "tldr": "", "abstract": "Open-vocabulary semantic segmentation (OVSS) aims to segment unseen classes without corresponding labels. Existing Vision-Language Model (VLM)-based methods leverage VLM's rich knowledge to enhance additional explicit segmentation-specific networks, yielding competitive results, but at the cost of extensive training cost. To reduce the cost, we attempt to enable VLM to directly produce the segmentation results without any segmentation-specific networks. Prompt learning offers a direct and parameter-efficient approach, yet it falls short in guiding VLM for pixel-level visual classification. Therefore, we propose the ${\\bf R}$elationship ${\\bf P}$rompt ${\\bf M}$odule (${\\bf RPM}$), which generates the relationship prompt that directs VLM to extract pixel-level semantic embeddings suitable for OVSS. Moreover, RPM integrates with VLM to construct the ${\\bf R}$elationship ${\\bf P}$rompt ${\\bf N}$etwork (${\\bf RPN}$), achieving OVSS without any segmentation-specific networks. RPN attains state-of-the-art performance with merely about ${\\bf 3M}$ trainable parameters (2\\% of total parameters).", "keywords": "Open-vocabulary semantic segmentation;Zero-shot semantic segmentation;Vision-Language Model;Prompt learning;Mixture-of-Expert", "primary_area": "machine_vision", "supplementary_material": "/attachment/5e4419e864796d94e1f282fe03f4090aa5c5b9f4.zip", "author": "Jiahaoli;Yang Lu;Yuan Xie;Yanyun Qu", "authorids": "~Jiahaoli1;~Yang_Lu5;~Yuan_Xie5;~Yanyun_Qu1", "gender": "M;M;;F", "homepage": ";https://jasonyanglu.github.io/;;http://quyanyun.xmu.edu.cn", "dblp": ";16/6317-9;;03/3500", "google_scholar": "https://scholar.google.com.hk/citations?user=Ab8l2h8AAAAJ;r7r4FGwAAAAJ;;", "orcid": "0009-0007-3481-4159;0000-0002-3497-9611;;", "linkedin": ";;;", "or_profile": "~Jiahaoli1;~Yang_Lu5;~Yuan_Xie5;~Yanyun_Qu1", "aff": "Xiamen University;Xiamen University;;Xiamen University", "aff_domain": "xmu.edu.cn;xmu.edu.cn;;xmu.edu.cn", "position": "PhD student;Assistant Professor;;Full Professor", "bibtex": "@inproceedings{\njiahaoli2024relationship,\ntitle={Relationship Prompt Learning is Enough for Open-Vocabulary Semantic Segmentation},\nauthor={Jiahaoli and Yang Lu and Yuan Xie and Yanyun Qu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=PKcCHncbzg}\n}", "github": "", "reviewers": "Rvjz;2wFX;DSMk;Rcdd", "pdf_size": 6839856, "rating": "4;4;4;7", "confidence": "4;5;5;4", "soundness": "2;2;2;4", "novelty": "2;2;2;3", "presentation": "1;3;2;3", "wc_summary": "105;52;43;54", "wc_strengths": "38;66;34;28", "wc_weaknesses": "105;59;412;33", "wc_questions": "288;11;61;3", "wc_limitations": "10;11;47;27", "wc_review": "546;199;597;145", "wc_reply_reviewers": "541;0;0;0", "wc_reply_authors": "1065;63;63;63", "reply_reviewers": "3;0;0;0", "reply_authors": "5;2;2;2", "rating_avg": [ 4.75, 1.299038105676658 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 63.5, 24.315632831575655 ], "wc_strengths_avg": [ 41.5, 14.585952145814822 ], "wc_weaknesses_avg": [ 152.25, 152.16664384811804 ], "wc_questions_avg": [ 90.75, 116.03097646749337 ], "wc_limitations_avg": [ 23.75, 15.022899187573616 ], "wc_review_avg": [ 371.75, 201.46882513183027 ], "wc_reply_reviewers_avg": [ 135.25, 234.25987172369065 ], "wc_reply_authors_avg": [ 313.5, 433.8787272960038 ], "reply_reviewers_avg": [ 0.75, 1.299038105676658 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:AJ2FucaV610J:scholar.google.com/&scioq=Relationship+Prompt+Learning+is+Enough+for+Open-Vocabulary+Semantic+Segmentation&hl=en&as_sdt=0,44", "gs_version_total": 0, "email": "xmu.edu.cn;xmu.edu.cn;;xmu.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Xiamen University", "aff_unique_dep": "", "aff_unique_url": "https://www.xmu.edu.cn", "aff_unique_abbr": "XMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "The Impact of Geometric Complexity on Neural Collapse in Transfer Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95317", "id": "PLbFid00aU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=PLbFid00aU", "openreview": "https://openreview.net/forum?id=PLbFid00aU", "poster": "", "project": "", "author_site": "Michael Munn, Benoit Dherin, Javier Gonzalvo", "tldr": "", "abstract": "Many of the recent advances in computer vision and language models can be attributed to the success of transfer learning via the pre-training of large foundation models. However, a theoretical framework which explains this empirical success is incomplete and remains an active area of research. Flatness of the loss surface and neural collapse have recently emerged as useful pre-training metrics which shed light on the implicit biases underlying pre-training. In this paper, we explore the geometric complexity of a model's learned representations as a fundamental mechanism that relates these two concepts. We show through experiments and theory that mechanisms which affect the geometric complexity of the pre-trained network also influence the neural collapse. Furthermore, we show how this effect of the geometric complexity generalizes to the neural collapse of new classes as well, thus encouraging better performance on downstream tasks, particularly in the few-shot setting.", "keywords": "transfer learning;geometric complexity;neural collapse;implicit bias;flatness;generalization bounds", "primary_area": "learning_theory", "supplementary_material": "", "author": "Michael Munn;Benoit Dherin;Javier Gonzalvo", "authorids": "~Michael_Munn1;~Benoit_Dherin1;~Javier_Gonzalvo1", "gender": "M;;", "homepage": ";;http://www.xavigonzalvo.com", "dblp": ";;", "google_scholar": "agHn7jkAAAAJ;;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Michael_Munn1;~Benoit_Dherin1;~Javier_Gonzalvo1", "aff": "Google;;Google", "aff_domain": "google.com;;google.com", "position": "Researcher;;Researcher", "bibtex": "@inproceedings{\nmunn2024the,\ntitle={The Impact of Geometric Complexity on Neural Collapse in Transfer Learning},\nauthor={Michael Munn and Benoit Dherin and Javier Gonzalvo},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=PLbFid00aU}\n}", "github": "", "reviewers": "pQCH;N73X;5tN3", "pdf_size": 25527089, "rating": "5;5;6", "confidence": "4;4;3", "soundness": "3;2;3", "novelty": "2;2;3", "presentation": "3;2;3", "wc_summary": "77;160;77", "wc_strengths": "30;341;48", "wc_weaknesses": "206;408;160", "wc_questions": "74;158;85", "wc_limitations": "1;61;1", "wc_review": "388;1128;371", "wc_reply_reviewers": "109;0;44", "wc_reply_authors": "41;96;349", "reply_reviewers": "1;0;2", "reply_authors": "2;2;4", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 104.66666666666667, 39.12657522565563 ], "wc_strengths_avg": [ 139.66666666666666, 142.55369358790938 ], "wc_weaknesses_avg": [ 258.0, 107.71567512050726 ], "wc_questions_avg": [ 105.66666666666667, 37.276742823851386 ], "wc_limitations_avg": [ 21.0, 28.284271247461902 ], "wc_review_avg": [ 629.0, 352.9145316739829 ], "wc_reply_reviewers_avg": [ 51.0, 44.773504069557326 ], "wc_reply_authors_avg": [ 162.0, 134.12183515992714 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 2.6666666666666665, 0.9428090415820634 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.9999999999999997, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:v-c5uTEEGF0J:scholar.google.com/&scioq=The+Impact+of+Geometric+Complexity+on+Neural+Collapse+in+Transfer+Learning&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "google.com;;google.com", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "POFf5SGSAL", "title": "Wiki Entity Summarization Benchmark", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "Entity summarization aims to compute concise summaries for entities in knowledge graphs. Existing datasets and benchmarks are often limited to a few hundred entities and discard graph structure in source knowledge graphs. This limitation is particularly pronounced when it comes to ground-truth summaries, where there exist only a few labeled summaries for evaluation and training. We propose WikES, a comprehensive benchmark comprising of entities, their summaries, and their connections. Additionally, WikES features a dataset generator to test entity summarization algorithms in different areas of the knowledge graph. Importantly, our approach combines graph algorithms and NLP models as well as different data sources such that WikES does not require human annotation, rendering the approach cost-effective and generalizable to multiple domains. Finally, WikES is scalable and capable of capturing the complexities of knowledge graphs in terms of topology and semantics. \nWikES features existing datasets for comparison. Empirical studies of entity summarization methods confirm the usefulness of our benchmark. Data, code, and models are available at: https://github.com/msorkhpar/wiki-entity-summarization.", "keywords": "entity summarization;knowledge graph;dataset generator", "primary_area": "", "supplementary_material": "/attachment/284af97bfee6d26263e46280f38c7e71cb73bab1.pdf", "author": "Mohammad Sorkhpar;Saeedeh Javadi;Atefeh Moradan;Klim Zaporojets;Davide Mottin;Ira Assent", "authorids": "~Mohammad_Sorkhpar1;~Saeedeh_Javadi1;~Atefeh_Moradan1;~Klim_Zaporojets1;~Davide_Mottin1;~Ira_Assent1", "gender": "M;F;F;M;M;F", "homepage": "https://sorkhpar.dev;;https://pure.au.dk/portal/en/persons/atefeh-moradan(9e4d4374-f0a8-43ae-9f1e-67c8d4c576da).html;http://klimzaporojets.github.io/;https://mott.in;https://cs.au.dk/contact/people/show/person/ira@cs.au.dk", "dblp": "357/3891.html;;;;135/7623;a/IraAssent", "google_scholar": "1M5LFmIAAAAJ;;;oFjUJvwAAAAJ;https://scholar.google.it/citations?user=evZ9Q9EAAAAJ;https://scholar.google.com.tw/citations?user=w2n5LhUAAAAJ", "orcid": ";;;0000-0003-4988-978X;0000-0001-8256-2258;0000-0002-1091-9948", "linkedin": "msorkhpar/;saeedeh-javadi/;atefeh-moradan-ba158396/;klim-zaporojets-9102b0a/;davide-mottin-67ab7323/;ira-assent-954b2431/", "or_profile": "~Mohammad_Sorkhpar1;~Saeedeh_Javadi1;~Atefeh_Moradan1;~Klim_Zaporojets1;~Davide_Mottin1;~Ira_Assent1", "aff": "Indiana State University ;Polytechnic Institute of Turin;;Aarhus University;Aarhus University;Aarhus University", "aff_domain": "cs.indstate.edu;polito.it;;au.dk;au.dk;au.dk", "position": "MS student;MS student;;Postdoc;Associate Professor;Full Professor", "bibtex": "@misc{\nanonymous2024wiki,\ntitle={Wiki Entity Summarization Benchmark},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=POFf5SGSAL}\n}", "github": "", "project": "", "reviewers": "Tfwf;XQWd;RL55;C42c", "site": "https://openreview.net/forum?id=POFf5SGSAL", "pdf_size": 447483, "rating": "6;6;7;7", "confidence": "3;4;4;4", "wc_summary_and_contributions": "37;68;70;67", "wc_strengths": "19;44;61;50", "wc_improvement": "72;11;50;81", "wc_limitations": "5;30;29;4", "wc_correctness": "245;43;21;3", "wc_clarity": "83;10;106;1", "wc_relation_to_prior_work": "6;18;1;1", "wc_documentation": "5;25;1;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "473;250;340;209", "wc_reply_reviewers": "115;112;13;26", "wc_reply_authors": "567;570;0;0", "reply_reviewers": "3;1;1;1", "reply_authors": "4;4;2;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 60.5, 13.6106575888162 ], "wc_strengths_avg": [ 43.5, 15.402921800749363 ], "wc_improvement_avg": [ 53.5, 27.004629232781554 ], "wc_limitations_avg": [ 17.0, 12.509996003196804 ], "wc_correctness_avg": [ 78.0, 97.45255255764212 ], "wc_clarity_avg": [ 50.0, 45.348649373492925 ], "wc_relation_to_prior_work_avg": [ 6.5, 6.946221994724902 ], "wc_documentation_avg": [ 8.0, 9.9498743710662 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 318.0, 101.25956744920452 ], "wc_reply_reviewers_avg": [ 66.5, 47.23610906922796 ], "wc_reply_authors_avg": [ 284.25, 284.2519788849323 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=821372155228785072&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;2;2;2", "aff_unique_norm": "Indiana State University;Polytechnic Institute of Turin;Aarhus University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.indstate.edu;https://www.polito.it;https://au.dk", "aff_unique_abbr": "ISU;Polito;AU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;2;2", "aff_country_unique": "United States;Italy;Denmark" }, { "title": "Accelerating Transformers with Spectrum-Preserving Token Merging", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95316", "id": "PPdJPIO3mV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=PPdJPIO3mV", "openreview": "https://openreview.net/forum?id=PPdJPIO3mV", "poster": "/media/PosterPDFs/NeurIPS%202024/95316.png?t=1731655495.0183496", "project": "", "author_site": "Chau Tran, Duy M. H. Nguyen, Manh-Duy Nguyen, TrungTin Nguyen, Ngan Le, Pengtao Xie, Daniel Sonntag, James Zou, Binh Nguyen, Mathias Niepert", "tldr": "", "abstract": "Increasing the throughput of the Transformer architecture, a foundational component used in numerous state-of-the-art models for vision and language tasks (e.g., GPT, LLaVa), is an important problem in machine learning. One recent and effective strategy is to merge token representations within Transformer models, aiming to reduce computational and memory requirements while maintaining accuracy. Prior work has proposed algorithms based on Bipartite Soft Matching (BSM), which divides tokens into distinct sets and merges the top $k$ similar tokens. However, these methods have significant drawbacks, such as sensitivity to token-splitting strategies and damage to informative tokens in later layers. This paper presents a novel paradigm called PiToMe, which prioritizes the preservation of informative tokens using an additional metric termed the \\textit{energy score}. This score identifies large clusters of similar tokens as high-energy, indicating potential candidates for merging, while smaller (unique and isolated) clusters are considered as low-energy and preserved. Experimental findings demonstrate that PiToMe saved from 40-60\\% FLOPs of the base models while exhibiting superior off-the-shelf performance on image classification (0.5\\% average performance drop of ViT-MAEH compared to 2.6\\% as baselines), image-text retrieval (0.3\\% average performance drop of Clip on Flick30k compared to 4.5\\% as others), and analogously in visual questions answering with LLaVa-7B. Furthermore, PiToMe is theoretically shown to preserve intrinsic spectral properties to the original token space under mild conditions.", "keywords": "token merging;vision transformer;model compression", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Hoai-Chau Tran;Duy Minh Ho Nguyen;Manh-Duy Nguyen;TrungTin Nguyen;Ngan Hoang Le;Pengtao Xie;Daniel Sonntag;James Zou;Binh T. Nguyen;Mathias Niepert", "authorids": "~Hoai-Chau_Tran1;~Duy_Minh_Ho_Nguyen1;~Manh-Duy_Nguyen1;~TrungTin_Nguyen1;~Ngan_Hoang_Le1;~Pengtao_Xie3;~Daniel_Sonntag2;~James_Zou1;~Binh_T._Nguyen1;~Mathias_Niepert1", "gender": "M;M;M;M;F;M;M;;M;M", "homepage": "https://hchautran.github.io/;;;https://trung-tinnguyen.github.io/;https://computer-science-and-computer-engineering.uark.edu/directory/index/uid/thile/name/Thi+Hoang+Ngan+Le/;https://pengtaoxie.github.io/;https://www.dfki.de/~sonntag/;;https://sites.google.com/site/ntbinhpolytechnique/;http://www.matlog.net", "dblp": ";199/8349;;275/3643;37/245;133/1998;83/5858.html;;06/2545;n/MathiasNiepert", "google_scholar": "FZH2vcEAAAAJ;_NIyeykAAAAJ;wmuJBfcAAAAJ;NhiJDJsAAAAJ;8ck0k_UAAAAJ;cnncomYAAAAJ;v7i6Uz4AAAAJ;23ZXZvEAAAAJ;dXEb3PMAAAAJ;https://scholar.google.de/citations?user=p5vLzq0AAAAJ", "orcid": "0009-0003-1137-8340;;0000-0001-6878-7039;0000-0001-8433-5980;0000-0003-2571-0511;;;;0000-0001-5249-9702;", "linkedin": ";;;trungtinnguyen0/;;;;;;", "or_profile": "~Hoai-Chau_Tran1;~Duy_Minh_Ho_Nguyen1;~Manh-Duy_Nguyen1;~TrungTin_Nguyen1;~Ngan_Hoang_Le1;~Pengtao_Xie3;~Daniel_Sonntag2;~James_Zou1;~Binh_T._Nguyen1;~Mathias_Niepert1", "aff": ";German Research Center for AI;;The University of Queensland;University of Arkansas, Fayetteville;Carnegie Mellon University;Carl von Ossietzky Universit\u00e4t Oldenburg;Stanford University;Ho Chi Minh city University of Science, Vietnam National University;NEC", "aff_domain": ";dfki.de;;uq.edu.au;uark.edu; ;uol.de;stanford.edu;hcmus.edu.vn;neclab.eu", "position": ";Researcher;;Postdoc;Assistant Professor;Graduate Student;Full Professor;Assistant Professor;Associate Professor;Research Scientist", "bibtex": "@inproceedings{\ntran2024accelerating,\ntitle={Accelerating Transformers with Spectrum-Preserving Token Merging},\nauthor={Hoai-Chau Tran and Duy Minh Ho Nguyen and Manh-Duy Nguyen and TrungTin Nguyen and Ngan Hoang Le and Pengtao Xie and Daniel Sonntag and James Zou and Binh T. Nguyen and Mathias Niepert},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=PPdJPIO3mV}\n}", "github": "", "reviewers": "zfJf;ZkPm;jrpm;4Z7M", "pdf_size": 13101675, "rating": "5;6;6;7", "confidence": "5;4;3;3", "soundness": "2;3;3;4", "novelty": "2;2;3;4", "presentation": "2;3;2;4", "wc_summary": "104;162;187;86", "wc_strengths": "140;45;40;90", "wc_weaknesses": "194;72;263;106", "wc_questions": "69;214;39;32", "wc_limitations": "55;10;15;19", "wc_review": "562;503;544;333", "wc_reply_reviewers": "0;56;66;0", "wc_reply_authors": "81;107;111;64", "reply_reviewers": "0;1;1;0", "reply_authors": "3;3;3;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 134.75, 41.215136782497765 ], "wc_strengths_avg": [ 78.75, 40.36938815488786 ], "wc_weaknesses_avg": [ 158.75, 74.86446086094523 ], "wc_questions_avg": [ 88.5, 73.77838436832295 ], "wc_limitations_avg": [ 24.75, 17.75352077758099 ], "wc_review_avg": [ 485.5, 90.60491156664742 ], "wc_reply_reviewers_avg": [ 30.5, 30.70423423568808 ], "wc_reply_authors_avg": [ 90.75, 19.266226926930972 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.8528028654224418, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2779933548364211601&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 7, "email": ";dfki.de;;uq.edu.au;uark.edu; ;uol.de;stanford.edu;hcmus.edu.vn;neclab.eu", "author_num": 10, "aff_unique_index": "0;1;2;3;4;5;6;7", "aff_unique_norm": "German Research Center for Artificial Intelligence;University of Queensland;University of Arkansas;Carnegie Mellon University;Carl von Ossietzky University of Oldenburg;Stanford University;Ho Chi Minh City University of Science;NEC Corporation", "aff_unique_dep": ";;;;;;;", "aff_unique_url": "https://www.dfki.de/;https://www.uq.edu.au;https://www.uark.edu;https://www.cmu.edu;https://www.uni-oldenburg.de/;https://www.stanford.edu;;https://www.nec.com", "aff_unique_abbr": "DFKI;UQ;UARK;CMU;UvO;Stanford;;NEC", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Fayetteville;Stanford", "aff_country_unique_index": "0;1;2;2;0;2;3;4", "aff_country_unique": "Germany;Australia;United States;Vietnam;Japan" }, { "title": "Recursive PAC-Bayes: A Frequentist Approach to Sequential Prior Updates with No Information Loss", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95315", "id": "PQt6Vg2X5u", "proceeding": "", "pdf": "https://openreview.net/pdf?id=PQt6Vg2X5u", "openreview": "https://openreview.net/forum?id=PQt6Vg2X5u", "poster": "/media/PosterPDFs/NeurIPS%202024/95315.png?t=1732094996.876973", "project": "", "author_site": "Yi-Shan Wu, Yijie Zhang, Badr-Eddine Cherief-Abdellatif, Yevgeny Seldin", "tldr": "", "abstract": "PAC-Bayesian analysis is a frequentist framework for incorporating prior knowledge into learning. It was inspired by Bayesian learning, which allows sequential data processing and naturally turns posteriors from one processing step into priors for the next. However, despite two and a half decades of research, the ability to update priors sequentially without losing confidence information along the way remained elusive for PAC-Bayes. While PAC-Bayes allows construction of data-informed priors, the final confidence intervals depend only on the number of points that were not used for the construction of the prior, whereas confidence information in the prior, which is related to the number of points used to construct the prior, is lost. This limits the possibility and benefit of sequential prior updates, because the final bounds depend only on the size of the final batch.\n\nWe present a novel and, in retrospect, surprisingly simple and powerful PAC-Bayesian procedure that allows sequential prior updates with no information loss. The procedure is based on a novel decomposition of the expected loss of randomized classifiers. The decomposition rewrites the loss of the posterior as an excess loss relative to a downscaled loss of the prior plus the downscaled loss of the prior, which is bounded recursively. As a side result, we also present a generalization of the split-kl and PAC-Bayes-split-kl inequalities to discrete random variables, which we use for bounding the excess losses, and which can be of independent interest. In empirical evaluation the new procedure significantly outperforms state-of-the-art.", "keywords": "PAC-Bayes;data-dependent prior", "primary_area": "learning_theory", "supplementary_material": "/attachment/752e0496be18b4d06509f5c64ab12b1137a78c7b.zip", "author": "Yi-Shan Wu;Yijie Zhang;Badr-Eddine Ch\u00e9rief-Abdellatif;Yevgeny Seldin", "authorids": "~Yi-Shan_Wu1;~Yijie_Zhang1;~Badr-Eddine_Ch\u00e9rief-Abdellatif1;~Yevgeny_Seldin2", "gender": "Non-Binary;M;M;M", "homepage": "https://scholar.google.com/citations?user=IrW8ytQAAAAJ&hl=en;https://sites.google.com/view/yijiezhang/home;http://badreddinecheriefabdellatif.github.io;https://sites.google.com/site/yevgenyseldin/", "dblp": "138/4357-3;;;34/39", "google_scholar": "IrW8ytQAAAAJ;HVR4014AAAAJ;;fpWsD9oAAAAJ", "orcid": "0000-0002-7949-0115;;;", "linkedin": ";;;", "or_profile": "~Yi-Shan_Wu1;~Yijie_Zhang1;~Badr-Eddine_Ch\u00e9rief-Abdellatif1;~Yevgeny_Seldin2", "aff": "University of Southern Denmark - SDU;University of Copenhagen;CNRS;University of Copenhagen", "aff_domain": "sdu.dk;di.ku.dk;cnrs.fr;di.ku.dk", "position": "Postdoc;PhD student;Researcher;Full Professor", "bibtex": "@inproceedings{\nwu2024recursive,\ntitle={Recursive {PAC}-Bayes: A Frequentist Approach to Sequential Prior Updates with No Information Loss},\nauthor={Yi-Shan Wu and Yijie Zhang and Badr-Eddine Ch{\\'e}rief-Abdellatif and Yevgeny Seldin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=PQt6Vg2X5u}\n}", "github": "", "reviewers": "gLvU;Kzqj;ZaLx;oqWC", "pdf_size": 802477, "rating": "6;6;7;8", "confidence": "3;4;3;4", "soundness": "3;3;3;4", "novelty": "3;3;3;4", "presentation": "3;2;3;4", "wc_summary": "27;92;99;51", "wc_strengths": "105;40;33;13", "wc_weaknesses": "745;624;155;121", "wc_questions": "25;141;2;53", "wc_limitations": "17;51;3;2", "wc_review": "919;948;292;240", "wc_reply_reviewers": "4;251;48;0", "wc_reply_authors": "0;20;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;2;1;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 67.25, 29.600464523382062 ], "wc_strengths_avg": [ 47.75, 34.50633999716574 ], "wc_weaknesses_avg": [ 411.25, 276.8396422118769 ], "wc_questions_avg": [ 55.25, 52.69902750525858 ], "wc_limitations_avg": [ 18.25, 19.81634426426832 ], "wc_review_avg": [ 599.75, 334.4131987526808 ], "wc_reply_reviewers_avg": [ 75.75, 102.91835356242345 ], "wc_reply_authors_avg": [ 5.0, 8.660254037844387 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17434735190326082654&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 5, "email": "sdu.dk;di.ku.dk;cnrs.fr;di.ku.dk", "author_num": 4, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "University of Southern Denmark;University of Copenhagen;Centre National de la Recherche Scientifique", "aff_unique_dep": ";;", "aff_unique_url": "https://www.sdu.dk;https://www.ku.dk;https://www.cnrs.fr", "aff_unique_abbr": "SDU;UCPH;CNRS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Denmark;France" }, { "title": "Group Robust Preference Optimization in Reward-free RLHF", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95314", "id": "PRAsjrmXXK", "proceeding": "", "pdf": "https://openreview.net/pdf?id=PRAsjrmXXK", "openreview": "https://openreview.net/forum?id=PRAsjrmXXK", "poster": "", "project": "", "author_site": "Shyam Sundhar Ramesh, Yifan Hu, Iason Chaimalas, Viraj Mehta, Pier Giuseppe Sessa, Haitham Bou Ammar, Ilija Bogunovic", "tldr": "", "abstract": "Adapting large language models (LLMs) for specific tasks usually involves fine-tuning through reinforcement learning with human feedback (RLHF) on preference data. While these data often come from diverse labelers' groups (e.g., different demographics, ethnicities, company teams, etc.), traditional RLHF approaches adopt a \"one-size-fits-all\" approach, i.e., they indiscriminately assume and optimize a single preference model, thus not being robust to unique characteristics and needs of the various groups. To address this limitation, we propose a novel Group Robust Preference Optimization (GRPO) method to align LLMs to individual groups' preferences robustly. Our approach builds upon reward-free direct preference optimization methods, but unlike previous approaches, it seeks a robust policy which maximizes the worst-case group performance. To achieve this, GRPO adaptively and sequentially weights the importance of different groups, prioritizing groups with worse cumulative loss. We theoretically study the feasibility of GRPO and analyze its convergence for the log-linear policy class. By fine-tuning LLMs with GRPO using diverse group-based global opinion data, we significantly improved performance for the worst-performing groups, reduced loss imbalances across groups, and improved probability accuracies compared to non-robust baselines.", "keywords": "RLHF;DPO;Robust Alignment", "primary_area": "generative_models", "supplementary_material": "/attachment/954e6786d83aad53e058b4998d213428736608e3.zip", "author": "Shyam Sundhar Ramesh;Yifan Hu;Iason Chaimalas;Viraj Mehta;Pier Giuseppe Sessa;Haitham Bou Ammar;Ilija Bogunovic", "authorids": "~Shyam_Sundhar_Ramesh1;~Yifan_Hu2;~Iason_Chaimalas1;~Viraj_Mehta1;~Pier_Giuseppe_Sessa1;~Haitham_Bou_Ammar1;~Ilija_Bogunovic2", "gender": "M;M;;M;;M;", "homepage": "https://www.linkedin.com/in/shyamsundharr/;https://sites.google.com/view/yifan-hu;;http://virajm.com;;;", "dblp": ";;;https://dblp.org/pers/m/Mehta:Viraj.html;;;", "google_scholar": "-d5usyUAAAAJ;rO2s0EEAAAAJ;;4pHjHBkAAAAJ;;https://scholar.google.co.uk/citations?user=AE5suDoAAAAJ;", "orcid": ";;0009-0008-1460-7678;0000-0002-2021-9718;;;", "linkedin": ";;iason-chaimalas;virajrmehta/;;;", "or_profile": "~Shyam_Sundhar_Ramesh1;~Yifan_Hu2;~Iason_Chaimalas1;~Viraj_Mehta1;~Pier_Giuseppe_Sessa1;~Haitham_Bou_Ammar1;~Ilija_Bogunovic2", "aff": "University College London, University of London;ETHZ - ETH Zurich;University College London, University of London;Carnegie Mellon University;;Huawei R&D UK;", "aff_domain": "ucl.ac.uk;inf.ethz.ch;ucl.ac.uk;cmu.edu;;huawei.com;", "position": "PhD student;Postdoc;Undergrad student;PhD student;;Principal Researcher;", "bibtex": "@inproceedings{\nramesh2024group,\ntitle={Group Robust Preference Optimization in Reward-free {RLHF}},\nauthor={Shyam Sundhar Ramesh and Yifan Hu and Iason Chaimalas and Viraj Mehta and Pier Giuseppe Sessa and Haitham Bou Ammar and Ilija Bogunovic},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=PRAsjrmXXK}\n}", "github": "", "reviewers": "SYLq;7Bpj;qNjA;V78J;kmNK", "pdf_size": 1272558, "rating": "4;5;6;6;7", "confidence": "4;4;3;2;3", "soundness": "3;3;3;3;3", "novelty": "2;3;3;3;3", "presentation": "3;2;3;3;3", "wc_summary": "82;48;48;43;112", "wc_strengths": "46;5;42;41;253", "wc_weaknesses": "283;82;45;97;150", "wc_questions": "83;11;3;20;142", "wc_limitations": "10;1;33;4;181", "wc_review": "504;147;171;205;838", "wc_reply_reviewers": "0;0;0;0;30", "wc_reply_authors": "0;0;0;0;13", "reply_reviewers": "0;0;0;0;1", "reply_authors": "1;1;1;1;2", "rating_avg": [ 5.6, 1.0198039027185568 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 66.6, 26.635314903338386 ], "wc_strengths_avg": [ 77.4, 89.04066486723917 ], "wc_weaknesses_avg": [ 131.4, 82.96649926325685 ], "wc_questions_avg": [ 51.8, 53.22931523136475 ], "wc_limitations_avg": [ 45.8, 68.5256156484566 ], "wc_review_avg": [ 373.0, 265.89095509249654 ], "wc_reply_reviewers_avg": [ 6.0, 12.0 ], "wc_reply_authors_avg": [ 2.6, 5.2 ], "reply_reviewers_avg": [ 0.2, 0.4 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.681385143869247, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16175550776135929264&as_sdt=4005&sciodt=0,6&hl=en", "gs_version_total": 4, "email": "ucl.ac.uk;inf.ethz.ch;ucl.ac.uk;cmu.edu;;huawei.com;", "author_num": 7, "aff_unique_index": "0;1;0;2;3", "aff_unique_norm": "University College London;ETH Zurich;Carnegie Mellon University;Huawei", "aff_unique_dep": ";;;R&D", "aff_unique_url": "https://www.ucl.ac.uk;https://www.ethz.ch;https://www.cmu.edu;https://www.huawei.com/uk", "aff_unique_abbr": "UCL;ETHZ;CMU;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;2;0", "aff_country_unique": "United Kingdom;Switzerland;United States" }, { "title": "No Train, all Gain: Self-Supervised Gradients Improve Deep Frozen Representations", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95313", "id": "PRBsEz8rnV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=PRBsEz8rnV", "openreview": "https://openreview.net/forum?id=PRBsEz8rnV", "poster": "/media/PosterPDFs/NeurIPS%202024/95313.png?t=1733095472.4199572", "project": "", "author_site": "Walter Simoncini, Andrei Bursuc, Spyridon Gidaris, Yuki Asano", "tldr": "", "abstract": "This paper introduces FUNGI, **F**eatures from **UN**supervised **G**rad**I**ents, a method to enhance the features of transformer encoders by leveraging self-supervised gradients. Our method is simple: given any pretrained model, we first compute gradients from various self-supervised objectives for each input. These gradients are projected to a lower dimension and then concatenated with the model's output embedding. The resulting features are evaluated on k-nearest neighbor classification over 11 datasets from vision, 5 from natural language processing, and 2 from audio. Across backbones spanning various sizes and pretraining strategies, FUNGI features provide consistent performance improvements over the embeddings. We also show that using FUNGI features can benefit linear classification, clustering and image retrieval, and that they significantly improve the retrieval-based in-context scene understanding abilities of pretrained models, for example improving upon DINO by +17% for semantic segmentation - without any training. Code is available at https://github.com/WalterSimoncini/fungivision.", "keywords": "self-supervised;gradients;computer vision;transformers;k-nearest neighbor;classification;in-context learning;clustering;retrieval", "primary_area": "machine_vision", "supplementary_material": "", "author": "Walter Simoncini;Andrei Bursuc;Spyros Gidaris;Yuki M Asano", "authorids": "~Walter_Simoncini1;~Andrei_Bursuc1;~Spyros_Gidaris1;~Yuki_M_Asano1", "gender": "Not Specified;M;M;M", "homepage": "https://walter.ashita.nl/;https://abursuc.github.io/;;https://yukimasano.github.io/", "dblp": "305/9900;40/8692.html;163/2312;239/8823", "google_scholar": "YfmqjwEAAAAJ;https://scholar.google.fr/citations?user=HTfERCsAAAAJ;https://scholar.google.fr/citations?user=7atfg7EAAAAJ;CdpLhlgAAAAJ", "orcid": ";;;", "linkedin": "walter-simoncini-a77508137/;;;", "or_profile": "~Walter_Simoncini1;~Andrei_Bursuc1;~Spyros_Gidaris1;~Yuki_Asano1", "aff": "University of Amsterdam;Valeo;Valeo.ai;University of Amsterdam", "aff_domain": "uva.nl;valeo.com;valeo.com;uva.nl", "position": "MS student;Research Scientist;Research scientist;Assistant Professor", "bibtex": "@inproceedings{\nsimoncini2024no,\ntitle={No Train, all Gain: Self-Supervised Gradients Improve Deep Frozen Representations},\nauthor={Walter Simoncini and Andrei Bursuc and Spyros Gidaris and Yuki M Asano},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=PRBsEz8rnV}\n}", "github": "", "reviewers": "8XpZ;gxYR;EjYW;98WT", "pdf_size": 1522290, "rating": "4;6;7;8", "confidence": "4;3;4;4", "soundness": "2;3;3;3", "novelty": "3;3;3;3", "presentation": "3;2;4;4", "wc_summary": "109;62;88;69", "wc_strengths": "150;10;124;58", "wc_weaknesses": "112;24;109;217", "wc_questions": "55;119;83;67", "wc_limitations": "13;1;10;17", "wc_review": "439;216;414;428", "wc_reply_reviewers": "0;0;0;230", "wc_reply_authors": "0;0;0;463", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;2", "rating_avg": [ 6.25, 1.479019945774904 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 82.0, 18.261982367749674 ], "wc_strengths_avg": [ 85.5, 54.99772722576816 ], "wc_weaknesses_avg": [ 115.5, 68.42696836774226 ], "wc_questions_avg": [ 81.0, 24.08318915758459 ], "wc_limitations_avg": [ 10.25, 5.889609494694874 ], "wc_review_avg": [ 374.25, 91.79426725019378 ], "wc_reply_reviewers_avg": [ 57.5, 99.59292143521044 ], "wc_reply_authors_avg": [ 115.75, 200.48488097609754 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.09759000729485331, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5833377586130161431&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 5, "email": "uva.nl;valeo.com;valeo.com;uva.nl", "author_num": 4, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "University of Amsterdam;Valeo", "aff_unique_dep": ";", "aff_unique_url": "https://www.uva.nl;https://www.valeo.com", "aff_unique_abbr": "UvA;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "Netherlands;France" }, { "title": "Towards Comprehensive Detection of Chinese Harmful Memes", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97711", "id": "PSDXcYjrkO", "proceeding": "", "pdf": "https://openreview.net/pdf?id=PSDXcYjrkO", "openreview": "https://openreview.net/forum?id=PSDXcYjrkO", "poster": "/media/PosterPDFs/NeurIPS%202024/97711.png?t=1729393611.1245017", "project": "", "author_site": "Junyu Lu, Bo Xu, Xiaokun Zhang, Hongbo Wang, Haohao Zhu, Dongyu Zhang, Liang Yang, Hongfei Lin", "tldr": "", "abstract": "Harmful memes have proliferated on the Chinese Internet, while research on detecting Chinese harmful memes significantly lags behind due to the absence of reliable datasets and effective detectors.\nTo this end, we present the comprehensive detection of Chinese harmful memes.\nWe introduce ToxiCN MM, the first Chinese harmful meme dataset, which consists of 12,000 samples with fine-grained annotations for meme types. \nAdditionally, we propose a baseline detector, Multimodal Knowledge Enhancement (MKE), designed to incorporate contextual information from meme content, thereby enhancing the model's understanding of Chinese memes.\nIn the evaluation phase, we conduct extensive quantitative experiments and qualitative analyses on multiple baselines, including LLMs and our MKE. \nExperimental results indicate that detecting Chinese harmful memes is challenging for existing models, while demonstrating the effectiveness of MKE.", "keywords": "Chinese harmful meme; Fine-grained dataset; Baseline detector", "primary_area": "", "supplementary_material": "", "author": "Junyu Lu;Bo Xu;Xiaokun Zhang;WangHongbo;Haohao Zhu;Dongyu Zhang;Liang Yang;Hongfei Lin", "authorids": "~Junyu_Lu3;~Bo_Xu8;~Xiaokun_Zhang1;~WangHongbo2;~Haohao_Zhu1;~Dongyu_Zhang4;~Liang_Yang3;~Hongfei_Lin3", "gender": "M;;;M;F;M;M;M", "homepage": "https://xubo123456.github.io/;https://zhang-xiaokun.github.io/;https://github.com/dut-laowang?tab=repositories;;https://www.dongyuzhang.com/;;;https://github.com/DUT-lujunyu", "dblp": "26/1194-9;32/753-1.html;;267/2711;;05/3933-3;https://dblp.uni-trier.de/pid/07/1644;", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;TQS4VgcAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=zh-CN;;kV68br0AAAAJ;", "orcid": "0000-0001-5453-978X;0000-0002-9755-2471;;;0000-0002-7683-5560;;0000-0003-0872-7688;0000-0002-4094-2540", "linkedin": ";;;;;;;", "or_profile": "~Bo_Xu8;~Xiaokun_Zhang1;~WangHongbo2;~Haohao_Zhu1;~Dongyu_Zhang4;~Liang_Yang3;~Hongfei_Lin3;~JunYu_Lu1", "aff": "Dalian University of Technology;Dalian University of Technology;Dalian University of Technology;Dalian University of Technology;Dalian University of Technology;Dalian University of Technology;Dalian University of Technology;Dalian University of Technology", "aff_domain": "dlut.edu.cn;dlut.edu.cn;dlut.edu.cn;dlut.edu.cn;dlut.edu.cn;dlut.edu.cn;dlut.edu.cn;dlut.edu.cn", "position": "Associate Professor;PhD student;MS student;PhD student;Full Professor;Associate Professor;Full Professor;MS student", "bibtex": "@inproceedings{\nlu2024towards,\ntitle={Towards Comprehensive Detection of Chinese Harmful Memes},\nauthor={Junyu Lu and Bo Xu and Xiaokun Zhang and WangHongbo and Haohao Zhu and Dongyu Zhang and Liang Yang and Hongfei Lin},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=PSDXcYjrkO}\n}", "github": "", "reviewers": "VZSy;gcwq;U1sx;wCp9", "pdf_size": 3818807, "rating": "4;7;7;8", "confidence": "4;4;4;4", "wc_summary_and_contributions": "100;104;46;49", "wc_strengths": "7;50;7;94", "wc_improvement": "4;87;7;75", "wc_limitations": "4;55;1;1", "wc_correctness": "18;34;1;1", "wc_clarity": "3;4;1;1", "wc_relation_to_prior_work": "3;12;1;1", "wc_documentation": "8;26;1;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "148;373;66;224", "wc_reply_reviewers": "0;17;23;0", "wc_reply_authors": "726;418;999;341", "reply_reviewers": "0;1;1;0", "reply_authors": "3;1;3;1", "rating_avg": [ 6.5, 1.5 ], "confidence_avg": [ 4.0, 0.0 ], "wc_summary_and_contributions_avg": [ 74.75, 27.307279249313726 ], "wc_strengths_avg": [ 39.5, 36.03123644839294 ], "wc_improvement_avg": [ 43.25, 38.0024670251814 ], "wc_limitations_avg": [ 15.25, 22.982330169066845 ], "wc_correctness_avg": [ 13.5, 13.720422734012244 ], "wc_clarity_avg": [ 2.25, 1.299038105676658 ], "wc_relation_to_prior_work_avg": [ 4.25, 4.548351349665063 ], "wc_documentation_avg": [ 9.0, 10.222524150130436 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 202.75, 113.06497026046573 ], "wc_reply_reviewers_avg": [ 10.0, 10.222524150130436 ], "wc_reply_authors_avg": [ 621.0, 261.49474182093985 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 1.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=679570825516642660&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "dlut.edu.cn;dlut.edu.cn;dlut.edu.cn;dlut.edu.cn;dlut.edu.cn;dlut.edu.cn;dlut.edu.cn;dlut.edu.cn", "author_num": 8, "aff_unique_index": "0;0;0;0;0;0;0;0", "aff_unique_norm": "Dalian University of Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.dlut.edu.cn/", "aff_unique_abbr": "DUT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Active preference learning for ordering items in- and out-of-sample", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95312", "id": "PSLH5q7PFo", "proceeding": "", "pdf": "https://openreview.net/pdf?id=PSLH5q7PFo", "openreview": "https://openreview.net/forum?id=PSLH5q7PFo", "poster": "/media/PosterPDFs/NeurIPS%202024/95312.png?t=1733302482.5254073", "project": "", "author_site": "Herman Bergstr\u00f6m, Emil Carlsson, Devdatt Dubhashi, Fredrik Johansson", "tldr": "", "abstract": "Learning an ordering of items based on pairwise comparisons is useful when items are difficult to rate consistently on an absolute scale, for example, when annotators have to make subjective assessments. When exhaustive comparison is infeasible, actively sampling item pairs can reduce the number of annotations necessary for learning an accurate ordering. However, many algorithms ignore shared structure between items, limiting their sample efficiency and precluding generalization to new items. It is also common to disregard how noise in comparisons varies between item pairs, despite it being informative of item similarity. In this work, we study active preference learning for ordering items with contextual attributes, both in- and out-of-sample. We give an upper bound on the expected ordering error of a logistic preference model as a function of which items have been compared. Next, we propose an active learning strategy that samples items to minimize this bound by accounting for aleatoric and epistemic uncertainty in comparisons. We evaluate the resulting algorithm, and a variant aimed at reducing model misspecification, in multiple realistic ordering tasks with comparisons made by human annotators. Our results demonstrate superior sample efficiency and generalization compared to non-contextual ranking approaches and active preference learning baselines.", "keywords": "ordering;active learning;preference learning;medical imaging;human feedback;pairwise comparison", "primary_area": "active_learning", "supplementary_material": "/attachment/1fe583241947bcef814d154df29c918c5dcd81aa.zip", "author": "Herman Bergstr\u00f6m;Emil Carlsson;Devdatt Dubhashi;Fredrik D. Johansson", "authorids": "~Herman_Bergstr\u00f6m1;~Emil_Carlsson1;~Devdatt_Dubhashi1;~Fredrik_D._Johansson1", "gender": "M;M;M;M", "homepage": ";https://e-carlsson.github.io;;http://www.fredjo.com", "dblp": ";293/8792;d/DPDubhashi.html;58/1342-2", "google_scholar": ";VZhBQWQAAAAJ;z8cbzqkAAAAJ;ml-AyBQAAAAJ", "orcid": ";;0000-0002-9928-2305;", "linkedin": "herman-bergstr\u00f6m-9b3b2815a;;;", "or_profile": "~Herman_Bergstr\u00f6m1;~Emil_Carlsson1;~Devdatt_Dubhashi1;~Fredrik_Daniel_Johansson1", "aff": "Chalmers University of Technology;Chalmers University of Technology;Chalmers University;Chalmers University of Technology", "aff_domain": "chalmers.se;chalmers.se;chalmers.se;chalmers.se", "position": "PhD student;PhD student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nbergstr{\\\"o}m2024active,\ntitle={Active preference learning for ordering items in- and out-of-sample},\nauthor={Herman Bergstr{\\\"o}m and Emil Carlsson and Devdatt Dubhashi and Fredrik D. Johansson},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=PSLH5q7PFo}\n}", "github": "", "reviewers": "nNXZ;tuDy;6ajg;LY2M", "pdf_size": 4693468, "rating": "4;5;5;8", "confidence": "3;3;3;3", "soundness": "2;3;3;4", "novelty": "2;2;2;4", "presentation": "3;3;3;4", "wc_summary": "48;68;91;144", "wc_strengths": "27;26;70;163", "wc_weaknesses": "64;87;140;81", "wc_questions": "8;147;15;559", "wc_limitations": "4;18;11;1", "wc_review": "151;346;327;948", "wc_reply_reviewers": "0;5;0;45", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 87.75, 35.86345633092271 ], "wc_strengths_avg": [ 71.5, 55.73374202401988 ], "wc_weaknesses_avg": [ 93.0, 28.416544476765644 ], "wc_questions_avg": [ 182.25, 224.45419911420683 ], "wc_limitations_avg": [ 8.5, 6.576473218982953 ], "wc_review_avg": [ 443.0, 301.3113008169458 ], "wc_reply_reviewers_avg": [ 12.5, 18.874586088176873 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5012589129173740776&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "chalmers.se;chalmers.se;chalmers.se;chalmers.se", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Chalmers University of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.chalmers.se", "aff_unique_abbr": "Chalmers", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Sweden" }, { "title": "Reinforcement Learning Guided Semi-Supervised Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95311", "id": "PSMBefUZa2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=PSMBefUZa2", "openreview": "https://openreview.net/forum?id=PSMBefUZa2", "poster": "/media/PosterPDFs/NeurIPS%202024/95311.png?t=1733092358.3769631", "project": "", "author_site": "Marzi Heidari, Hanping Zhang, Yuhong Guo", "tldr": "", "abstract": "In recent years, semi-supervised learning (SSL) has gained significant attention due to its ability to leverage both labeled and unlabeled data to improve model performance, especially when labeled data is scarce. However, most current SSL methods rely on heuristics or predefined rules for generating pseudo-labels and leveraging unlabeled data. They are limited to exploiting loss functions and regularization methods within the standard norm. In this paper, we propose a novel Reinforcement Learning (RL) Guided SSL method, RLGSSL, that formulates SSL as a one-armed bandit problem and deploys an innovative RL loss based on weighted reward to adaptively guide the learning process of the prediction model. RLGSSL incorporates a carefully designed reward function that balances the use of labeled and unlabeled data to enhance generalization performance. A semi-supervised teacher-student framework is further deployed to increase the learning stability. We demonstrate the effectiveness of RLGSSL through extensive experiments on several benchmark datasets and show that our approach achieves consistent superior performance compared to state-of-the-art SSL methods.", "keywords": "Semi-Supervised Learning", "primary_area": "other", "supplementary_material": "", "author": "Marzi Heidari;Hanping Zhang;Yuhong Guo", "authorids": "~Marzi_Heidari1;~Hanping_Zhang1;~Yuhong_Guo1", "gender": "F;M;", "homepage": ";https://jajajag.github.io/;", "dblp": "270/0305;230/3460;", "google_scholar": "https://scholar.google.ca/citations?user=OEWPekoAAAAJ;;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Marzi_Heidari1;~Hanping_Zhang1;~Yuhong_Guo1", "aff": "Carleton University;Carleton University;", "aff_domain": "carleton.ca;carleton.ca;", "position": "PhD student;PhD student;", "bibtex": "@inproceedings{\nheidari2024reinforcement,\ntitle={Reinforcement Learning Guided Semi-Supervised Learning},\nauthor={Marzi Heidari and Hanping Zhang and Yuhong Guo},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=PSMBefUZa2}\n}", "github": "", "reviewers": "mW6a;EQVV;6MiT;5CmG", "pdf_size": 383229, "rating": "3;5;5;5", "confidence": "4;4;3;3", "soundness": "2;2;3;2", "novelty": "2;2;3;2", "presentation": "2;3;2;2", "wc_summary": "52;85;67;161", "wc_strengths": "25;50;154;42", "wc_weaknesses": "154;105;51;74", "wc_questions": "38;46;42;237", "wc_limitations": "24;8;1;33", "wc_review": "293;294;315;547", "wc_reply_reviewers": "200;149;0;22", "wc_reply_authors": "351;228;0;0", "reply_reviewers": "1;2;0;1", "reply_authors": "2;2;1;1", "rating_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 91.25, 41.930746475587576 ], "wc_strengths_avg": [ 67.75, 50.60817621689207 ], "wc_weaknesses_avg": [ 96.0, 38.58108344772085 ], "wc_questions_avg": [ 90.75, 84.48483591745918 ], "wc_limitations_avg": [ 16.5, 12.658988901172163 ], "wc_review_avg": [ 362.25, 107.02657380295793 ], "wc_reply_reviewers_avg": [ 92.75, 84.07548691503368 ], "wc_reply_authors_avg": [ 144.75, 151.14128324187274 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Ox8hEu4QRmUJ:scholar.google.com/&scioq=Reinforcement+Learning+Guided+Semi-Supervised+Learning&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "carleton.ca;carleton.ca;", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Carleton University", "aff_unique_dep": "", "aff_unique_url": "https://carleton.ca", "aff_unique_abbr": "Carleton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Canada" }, { "title": "L4GM: Large 4D Gaussian Reconstruction Model", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95310", "id": "PSPtj26Lbp", "proceeding": "", "pdf": "https://openreview.net/pdf?id=PSPtj26Lbp", "openreview": "https://openreview.net/forum?id=PSPtj26Lbp", "poster": "/media/PosterPDFs/NeurIPS%202024/95310.png?t=1732532225.0743613", "project": "", "author_site": "Jiawei Ren, Cheng Xie, Ashkan Mirzaei, hanxue liang, xiaohui zeng, Karsten Kreis, Ziwei Liu, Antonio Torralba, Sanja Fidler, Seung Wook Kim, Huan Ling", "tldr": "", "abstract": "We present L4GM, the first 4D Large Reconstruction Model that produces animated objects from a single-view video input -- in a single feed-forward pass that takes only a second.\nKey to our success is a novel dataset of multiview videos containing curated, rendered animated objects from Objaverse. This dataset depicts 44K diverse objects with 110K animations rendered in 48 viewpoints, resulting in 12M videos with a total of 300M frames. \nWe keep our L4GM simple for scalability and build directly on top of LGM, a pretrained 3D Large Reconstruction Model that outputs 3D Gaussian ellipsoids from multiview image input.\nL4GM outputs a per-frame 3D Gaussian splat representation from video frames sampled at a low fps and then upsamples the representation to a higher fps to achieve temporal smoothness. We add temporal self-attention layers to the base LGM to help it learn consistency across time, and utilize a per-timestep multiview rendering loss to train the model. The representation is upsampled to a higher framerate by training an interpolation model which produces intermediate 3D Gaussian representations. \nWe showcase that L4GM that is only trained on synthetic data generalizes well on in-the-wild videos, producing high quality animated 3D assets.", "keywords": "4D Reconstruction; 4D Generation", "primary_area": "generative_models", "supplementary_material": "/attachment/feff3c1c3f1545ae64615da1518bb56d822640a6.zip", "author": "Jiawei Ren;Kevin Xie;Ashkan Mirzaei;hanxue liang;Xiaohui Zeng;Karsten Kreis;Ziwei Liu;Antonio Torralba;Sanja Fidler;Seung Wook Kim;Huan Ling", "authorids": "~Jiawei_Ren1;~Kevin_Xie1;~Ashkan_Mirzaei1;~hanxue_liang1;~Xiaohui_Zeng2;~Karsten_Kreis1;~Ziwei_Liu1;~Antonio_Torralba1;~Sanja_Fidler1;~Seung_Wook_Kim1;~Huan_Ling1", "gender": "Unspecified;M;M;M;;M;M;F;M;M;F", "homepage": "https://jiawei-ren.github.io/;https://kevincxie.github.io;https://ashmrz.github.io/;https://hanxuel.github.io/;https://karstenkreis.github.io/;https://liuziwei7.github.io/;http://web.mit.edu/torralba/www//;http://www.cs.toronto.edu/~fidler/;http://www.cs.toronto.edu/~seung/;http://www.cs.toronto.edu/~linghuan/;https://www.cs.utoronto.ca/~xiaohui/", "dblp": "122/3626-1;162/1953;323/8457;295/9018;238/6834;05/6300-2;t/AntonioBTorralba;08/6607;;202/1680;71/1709", "google_scholar": "https://scholar.google.com.sg/citations?user=YUKPVCoAAAAJ;04dL0akAAAAJ;z8GwuTgAAAAJ;https://scholar.google.com/citations?view_op=list_works;https://scholar.google.de/citations?user=rFd-DiAAAAAJ;https://scholar.google.com.hk/citations?user=lc45xlcAAAAJ;https://scholar.google.com.tw/citations?user=8cxDHS4AAAAJ;CUlqK5EAAAAJ;https://scholar.google.co.kr/citations?hl=en;03n03GEAAAAJ;https://scholar.google.ca/citations?user=ba8ZWdcAAAAJ", "orcid": "0000-0003-1950-5976;;;;;;;;;;", "linkedin": ";;ashkan-mirzaei-6b9651145/;hanxue-charles-liang-78b581177/;karstenkreis;;;sanja-fidler-2846a1a?trk=hp-identity-name;;;", "or_profile": "~Jiawei_Ren1;~Kevin_Xie1;~Ashkan_Mirzaei1;~hanxue_liang1;~Karsten_Kreis1;~Ziwei_Liu1;~Antonio_Torralba1;~Sanja_Fidler1;~Seung_Wook_Kim1;~Huan_Ling1;~Xiaohui_Zeng1", "aff": "Nanyang Technological University;Department of Computer Science, University of Toronto;NVIDIA;University of Cambridge;NVIDIA;Nanyang Technological University;Massachusetts Institute of Technology;Department of Computer Science, University of Toronto;Vector Institute;NVIDIA;Department of Computer Science, University of Toronto", "aff_domain": "ntu.edu.sg;cs.toronto.edu;nvidia.com;cam.ac.uk;nvidia.com;ntu.edu.sg;mit.edu;cs.toronto.edu;vectorinstitute.ai;nvidia.com;cs.toronto.edu", "position": "PhD student;PhD student;Intern;PhD student;Research Scientist;Assistant Professor;Full Professor;Associate Professor;Grad student;Researcher;PhD student", "bibtex": "@inproceedings{\nren2024lgm,\ntitle={L4{GM}: Large 4D Gaussian Reconstruction Model},\nauthor={Jiawei Ren and Kevin Xie and Ashkan Mirzaei and hanxue liang and Xiaohui Zeng and Karsten Kreis and Ziwei Liu and Antonio Torralba and Sanja Fidler and Seung Wook Kim and Huan Ling},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=PSPtj26Lbp}\n}", "github": "", "reviewers": "2k2z;hQSp;UkTx;4G9y", "pdf_size": 11590405, "rating": "5;6;6;6", "confidence": "4;4;3;5", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "4;3;3;3", "wc_summary": "98;74;91;85", "wc_strengths": "44;70;131;61", "wc_weaknesses": "129;52;95;129", "wc_questions": "89;93;136;63", "wc_limitations": "8;22;144;33", "wc_review": "368;311;597;371", "wc_reply_reviewers": "32;29;25;5", "wc_reply_authors": "17;17;13;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 87.0, 8.803408430829505 ], "wc_strengths_avg": [ 76.5, 32.82148686455262 ], "wc_weaknesses_avg": [ 101.25, 31.641547054466223 ], "wc_questions_avg": [ 95.25, 26.1951808545007 ], "wc_limitations_avg": [ 51.75, 53.99247632772551 ], "wc_review_avg": [ 411.75, 109.59328218463028 ], "wc_reply_reviewers_avg": [ 22.75, 10.54454835448157 ], "wc_reply_authors_avg": [ 11.75, 6.977642868476432 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 38, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10098495755569017759&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "ntu.edu.sg;cs.toronto.edu;nvidia.com;cam.ac.uk;nvidia.com;ntu.edu.sg;mit.edu;cs.toronto.edu;vectorinstitute.ai;nvidia.com;cs.toronto.edu", "author_num": 11, "aff_unique_index": "0;1;2;3;2;0;4;1;5;2;1", "aff_unique_norm": "Nanyang Technological University;University of Toronto;NVIDIA;University of Cambridge;Massachusetts Institute of Technology;Vector Institute", "aff_unique_dep": ";Department of Computer Science;NVIDIA Corporation;;;", "aff_unique_url": "https://www.ntu.edu.sg;https://www.utoronto.ca;https://www.nvidia.com;https://www.cam.ac.uk;https://web.mit.edu;https://vectorinstitute.ai/", "aff_unique_abbr": "NTU;U of T;NVIDIA;Cambridge;MIT;Vector Institute", "aff_campus_unique_index": "1;2;1;1", "aff_campus_unique": ";Toronto;Cambridge", "aff_country_unique_index": "0;1;2;3;2;0;2;1;1;2;1", "aff_country_unique": "Singapore;Canada;United States;United Kingdom" }, { "title": "Infusing Self-Consistency into Density Functional Theory Hamiltonian Prediction via Deep Equilibrium Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95309", "id": "PSVkinBs4u", "proceeding": "", "pdf": "https://openreview.net/pdf?id=PSVkinBs4u", "openreview": "https://openreview.net/forum?id=PSVkinBs4u", "poster": "/media/PosterPDFs/NeurIPS%202024/95309.png?t=1730375256.8707411", "project": "", "author_site": "Zun Wang, Chang Liu, Nianlong Zou, He Zhang, Xinran Wei, Lin Huang, Lijun Wu, Bin Shao", "tldr": "", "abstract": "In this study, we introduce a unified neural network architecture, the Deep Equilibrium Density Functional Theory Hamiltonian (DEQH) model, which incorporates Deep Equilibrium Models (DEQs) for predicting Density Functional Theory (DFT) Hamiltonians. The DEQH model inherently captures the self-consistency nature of Hamiltonian, a critical aspect often overlooked by traditional machine learning approaches for Hamiltonian prediction. By employing DEQ within our model architecture, we circumvent the need for DFT calculations during the training phase to introduce the Hamiltonian's self-consistency, thus addressing computational bottlenecks associated with large or complex systems. We propose a versatile framework that combines DEQ with off-the-shelf machine learning models for predicting Hamiltonians. When benchmarked on the MD17 and QH9 datasets, DEQHNet, an instantiation of the DEQH framework, has demonstrated a significant improvement in prediction accuracy. Beyond a predictor, the DEQH model is a Hamiltonian solver, in the sense that it uses the fixed-point solving capability of the deep equilibrium model to iteratively solve for the Hamiltonian. Ablation studies of DEQHNet further elucidate the network's effectiveness, offering insights into the potential of DEQ-integrated networks for Hamiltonian learning. We open source our implementation at https://github.com/Zun-Wang/DEQHNet.", "keywords": "electronic structure;deep equilibrium model", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Zun Wang;Chang Liu;Nianlong Zou;He Zhang;Xinran Wei;Lin Huang;Lijun Wu;Bin Shao", "authorids": "~Zun_Wang2;~Chang_Liu10;~Nianlong_Zou1;~He_Zhang1;~Xinran_Wei1;~Lin_Huang2;~Lijun_Wu1;~Bin_Shao1", "gender": "M;M;;M;F;M;M;", "homepage": ";https://changliu00.github.io/;;;;https://www.researchgate.net/profile/Lin-Huang-3;https://apeterswu.github.io/;https://www.binshao.info/", "dblp": "44/8410;52/5716-30;;24/2058;;;68/1284-3;", "google_scholar": ";rYd0GEsAAAAJ;;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=zh-CN;;https://scholar.google.com/citations?hl=en;h9L4CgIAAAAJ", "orcid": "0000-0002-8763-8327;0000-0001-5207-5440;0000-0002-8746-8970;0000-0003-4294-5697;;;0000-0002-3530-590X;", "linkedin": ";chang-liu-9ab479168/;;%E8%B4%BA-%E5%BC%A0-8a592a16b/;;linhuang6385/;lijun-wu-59340478/;", "or_profile": "~Zun_Wang2;~Chang_Liu10;~Nianlong_Zou1;~He_Zhang1;~Xinran_Wei1;~Lin_Huang2;~Lijun_Wu1;~Bin_Shao1", "aff": "Microsoft;Microsoft;Tsinghua University;Xi'an Jiaotong University;Microsoft;Microsoft;Microsoft Research;Microsoft", "aff_domain": "microsoft.com;microsoft.com;tsinghua.edu.cn;xjtu.edu;microsoft.com;microsoft.com;microsoft.com;microsoft.com", "position": "Researcher;Researcher;PhD student;PhD student;Researcher;Researcher;Researcher;Principal Research Manager", "bibtex": "@inproceedings{\nwang2024infusing,\ntitle={Infusing Self-Consistency into Density Functional Theory Hamiltonian Prediction via Deep Equilibrium Models},\nauthor={Zun Wang and Chang Liu and Nianlong Zou and He Zhang and Xinran Wei and Lin Huang and Lijun Wu and Bin Shao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=PSVkinBs4u}\n}", "github": "", "reviewers": "Tf4v;mPEr;eJGc;DtX8", "pdf_size": 5443705, "rating": "4;5;6;7", "confidence": "2;2;2;2", "soundness": "2;2;3;3", "novelty": "2;3;3;3", "presentation": "3;2;3;3", "wc_summary": "46;58;83;37", "wc_strengths": "32;52;72;52", "wc_weaknesses": "139;82;65;63", "wc_questions": "41;2;32;34", "wc_limitations": "6;76;4;23", "wc_review": "264;270;256;209", "wc_reply_reviewers": "109;29;31;37", "wc_reply_authors": "539;375;63;49", "reply_reviewers": "1;1;2;1", "reply_authors": "2;2;3;2", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 2.0, 0.0 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 56.0, 17.277152543170995 ], "wc_strengths_avg": [ 52.0, 14.142135623730951 ], "wc_weaknesses_avg": [ 87.25, 30.776411421736615 ], "wc_questions_avg": [ 27.25, 14.956186011146023 ], "wc_limitations_avg": [ 27.25, 29.09789511287715 ], "wc_review_avg": [ 249.75, 24.045529730076648 ], "wc_reply_reviewers_avg": [ 51.5, 33.32791622649097 ], "wc_reply_authors_avg": [ 256.5, 208.7743997716195 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15643175123172587096&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "microsoft.com;microsoft.com;tsinghua.edu.cn;xjtu.edu;microsoft.com;microsoft.com;microsoft.com;microsoft.com", "author_num": 8, "aff_unique_index": "0;0;1;2;0;0;0;0", "aff_unique_norm": "Microsoft;Tsinghua University;Xi'an Jiao Tong University", "aff_unique_dep": "Microsoft Corporation;;", "aff_unique_url": "https://www.microsoft.com;https://www.tsinghua.edu.cn;https://www.xjtu.edu.cn", "aff_unique_abbr": "Microsoft;THU;XJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1;0;0;0;0", "aff_country_unique": "United States;China" }, { "title": "Efficient Policy Evaluation Across Multiple Different Experimental Datasets", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95308", "id": "PSubtZAitM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=PSubtZAitM", "openreview": "https://openreview.net/forum?id=PSubtZAitM", "poster": "/media/PosterPDFs/NeurIPS%202024/95308.png?t=1733599400.261608", "project": "", "author_site": "Yonghan Jung, Alexis Bellot", "tldr": "", "abstract": "Artificial intelligence systems are trained combining various observational and experimental datasets from different source sites, and are increasingly used to reason about the effectiveness of candidate policies. One common assumption in this context is that the data in source and target sites (where the candidate policy is due to be deployed) come from the same distribution. This assumption is often violated in practice, causing challenges for generalization, transportability, or external validity. Despite recent advances for determining the identifiability of the effectiveness of policies in a target domain, there are still challenges for the accurate estimation of effects from finite samples. In this paper, we develop novel graphical criteria and estimators for evaluating the effectiveness of policies (e.g., conditional, stochastic) by combining data from multiple experimental studies. Asymptotic error analysis of our estimators provides fast convergence guarantee. We empirically verified the robustness of estimators through simulations.", "keywords": "dynamic treatment regime;data fusion;off-policy evaluation;transportability", "primary_area": "causal_inference", "supplementary_material": "/attachment/41ed27af0896b17851d3504110cfa6bf50e632a7.zip", "author": "Yonghan Jung;Alexis Bellot", "authorids": "~Yonghan_Jung1;~Alexis_Bellot1", "gender": ";M", "homepage": "https://sites.google.com/view/yonghanjung;", "dblp": "201/0684.html;217/4339", "google_scholar": "D9ATOa4AAAAJ;", "orcid": ";", "linkedin": "yhansjung/;", "or_profile": "~Yonghan_Jung1;~Alexis_Bellot1", "aff": "Purdue University;Google DeepMind", "aff_domain": "purdue.edu;deepmind.com", "position": "PhD student;Researcher", "bibtex": "@inproceedings{\njung2024efficient,\ntitle={Efficient Policy Evaluation Across Multiple Different Experimental Datasets},\nauthor={Yonghan Jung and Alexis Bellot},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=PSubtZAitM}\n}", "github": "", "reviewers": "xiN2;cwjE;J4GZ;GRsa", "pdf_size": 676259, "rating": "5;6;6;7", "confidence": "3;3;1;1", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;2;3", "wc_summary": "42;105;55;54", "wc_strengths": "56;52;29;58", "wc_weaknesses": "147;603;122;51", "wc_questions": "160;13;40;3", "wc_limitations": "27;8;1;1", "wc_review": "432;781;247;167", "wc_reply_reviewers": "0;20;20;0", "wc_reply_authors": "28;28;28;28", "reply_reviewers": "0;1;1;0", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 2.0, 1.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 64.0, 24.21776207662467 ], "wc_strengths_avg": [ 48.75, 11.60549438843516 ], "wc_weaknesses_avg": [ 230.75, 217.7847274259607 ], "wc_questions_avg": [ 54.0, 62.67774724732854 ], "wc_limitations_avg": [ 9.25, 10.638961415476606 ], "wc_review_avg": [ 406.75, 236.48506823899052 ], "wc_reply_reviewers_avg": [ 10.0, 10.0 ], "wc_reply_authors_avg": [ 28.0, 0.0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.7071067811865476, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15258802627728167424&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "email": "purdue.edu;deepmind.com", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Purdue University;Google", "aff_unique_dep": ";Google DeepMind", "aff_unique_url": "https://www.purdue.edu;https://deepmind.com", "aff_unique_abbr": "Purdue;DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;United Kingdom" }, { "title": "Mutual Information Estimation via $f$-Divergence and Data Derangements", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95307", "id": "PThi9hf9UT", "proceeding": "", "pdf": "https://openreview.net/pdf?id=PThi9hf9UT", "openreview": "https://openreview.net/forum?id=PThi9hf9UT", "poster": "/media/PosterPDFs/NeurIPS%202024/95307.png?t=1731435948.923166", "project": "", "author_site": "Nunzio Alexandro Letizia, Nicola Novello, Andrea M Tonello", "tldr": "", "abstract": "Estimating mutual information accurately is pivotal across diverse applications, from machine learning to communications and biology, enabling us to gain insights into the inner mechanisms of complex systems. Yet, dealing with high-dimensional data presents a formidable challenge, due to its size and the presence of intricate relationships. Recently proposed neural methods employing variational lower bounds on the mutual information have gained prominence. However, these approaches suffer from either high bias or high variance, as the sample size and the structure of the loss function directly influence the training process. In this paper, we propose a novel class of discriminative mutual information estimators based on the variational representation of the $f$-divergence. We investigate the impact of the permutation function used to obtain the marginal training samples and present a novel architectural solution based on derangements. The proposed estimator is flexible since it exhibits an excellent bias/variance trade-off. The comparison with state-of-the-art neural estimators, through extensive experimentation within established reference scenarios, shows that our approach offers higher accuracy and lower complexity.", "keywords": "mutual information;variational divergence;f-divergence;neural estimators;permutation;derangement", "primary_area": "probabilistic_methods", "supplementary_material": "/attachment/b5acbc5aa2f9298087b13ca6bea99e4930d41689.zip", "author": "Nunzio Alexandro Letizia;Nicola Novello;Andrea M Tonello", "authorids": "~Nunzio_Alexandro_Letizia1;~Nicola_Novello1;~Andrea_M_Tonello1", "gender": "M;;M", "homepage": ";;http://www.andreatonello.com", "dblp": "239/8530;;191/4511", "google_scholar": "v50jRAIAAAAJ;;https://scholar.google.de/citations?user=qBiseEsAAAAJ", "orcid": ";;0000-0002-9873-2407", "linkedin": "nunzio-alexandro-letizia/;;", "or_profile": "~Nunzio_Alexandro_Letizia1;~Nicola_Novello1;~Andrea_M_Tonello1", "aff": ";;Alpen-Adria Universit\u00e4t Klagenfurt", "aff_domain": ";;aau.at", "position": ";;Full Professor", "bibtex": "@inproceedings{\nletizia2024mutual,\ntitle={Mutual Information Estimation via \\$f\\$-Divergence and Data Derangements},\nauthor={Nunzio Alexandro Letizia and Nicola Novello and Andrea M Tonello},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=PThi9hf9UT}\n}", "github": "", "reviewers": "dXEo;3r9p;H3tA;mW4u", "pdf_size": 6445658, "rating": "4;5;6;7", "confidence": "3;5;3;3", "soundness": "2;3;3;4", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "57;32;68;73", "wc_strengths": "21;48;117;63", "wc_weaknesses": "125;122;177;95", "wc_questions": "22;4;119;89", "wc_limitations": "16;1;16;8", "wc_review": "241;207;497;328", "wc_reply_reviewers": "80;0;30;237", "wc_reply_authors": "465;46;57;246", "reply_reviewers": "1;0;1;3", "reply_authors": "2;2;2;3", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 57.5, 15.819292019556375 ], "wc_strengths_avg": [ 62.25, 35.00982005095142 ], "wc_weaknesses_avg": [ 129.75, 29.67637949615822 ], "wc_questions_avg": [ 58.5, 47.15135204848319 ], "wc_limitations_avg": [ 10.25, 6.2599920127744575 ], "wc_review_avg": [ 318.25, 112.23942043685008 ], "wc_reply_reviewers_avg": [ 86.75, 91.33283911058497 ], "wc_reply_authors_avg": [ 203.5, 170.62898346998378 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.2581988897471611, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9178024733438759976&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": ";;aau.at", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "Alpen-Adria-Universit\u00e4t Klagenfurt", "aff_unique_dep": "", "aff_unique_url": "https://www.aau.at", "aff_unique_abbr": "AAU", "aff_country_unique_index": "0", "aff_country_unique": "Austria" }, { "title": "Gradient Methods for Online DR-Submodular Maximization with Stochastic Long-Term Constraints", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95306", "id": "PTxRRUEpHq", "proceeding": "", "pdf": "https://openreview.net/pdf?id=PTxRRUEpHq", "openreview": "https://openreview.net/forum?id=PTxRRUEpHq", "poster": "/media/PosterPDFs/NeurIPS%202024/95306.png?t=1733583920.2483695", "project": "", "author_site": "Guanyu Nie, Vaneet Aggarwal, Christopher Quinn", "tldr": "", "abstract": "In this paper, we consider the problem of online monotone DR-submodular maximization subject to long-term stochastic constraints. Specifically, at each round $t\\in [T]$, after committing an action $\\mathbf{x}_t$, a random reward $f_t(\\mathbf{x}_t)$ and an unbiased gradient estimate of the point $\\widetilde{\\nabla}f_t(\\mathbf{x}_t)$ (semi-bandit feedback) are revealed. Meanwhile, a budget of $g_t(\\mathbf{x}_t)$, which is linear and stochastic, is consumed of its total allotted budget $B_T$. We propose a gradient ascent based algorithm that achieves $\\frac{1}{2}$-regret of $\\mathcal{O}(\\sqrt{T})$ with $\\mathcal{O}(T^{3/4})$ constraint violation with high probability. Moreover, when first-order full-information feedback is available, we propose an algorithm that achieves $(1-1/e)$-regret of $\\mathcal{O}(\\sqrt{T})$ with $\\mathcal{O}(T^{3/4})$ constraint violation. These algorithms significantly improve over the state-of-the-art in terms of query complexity.", "keywords": "DR-submodular;long term constraint;gradient ascent", "primary_area": "online_learning", "supplementary_material": "", "author": "Guanyu Nie;Vaneet Aggarwal;Christopher John Quinn", "authorids": "~Guanyu_Nie1;~Vaneet_Aggarwal1;~Christopher_John_Quinn1", "gender": "M;M;M", "homepage": ";;https://www.cs.iastate.edu/people/christopher-quinn", "dblp": ";91/6560;50/8822", "google_scholar": "https://scholar.google.com/citations?hl=en;;oXWIgXcAAAAJ", "orcid": ";;0000-0002-9053-1504", "linkedin": ";;", "or_profile": "~Guanyu_Nie1;~Vaneet_Aggarwal1;~Christopher_John_Quinn1", "aff": "Iowa State University;Purdue University;Iowa State University", "aff_domain": "iastate.edu;purdue.edu;iastate.edu", "position": "PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nnie2024gradient,\ntitle={Gradient Methods for Online {DR}-Submodular Maximization with Stochastic Long-Term Constraints},\nauthor={Guanyu Nie and Vaneet Aggarwal and Christopher John Quinn},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=PTxRRUEpHq}\n}", "github": "", "reviewers": "QRJN;w3Qq;Tz1S", "pdf_size": 405079, "rating": "5;6;6", "confidence": "4;3;2", "soundness": "3;2;3", "novelty": "2;2;3", "presentation": "3;3;3", "wc_summary": "185;173;92", "wc_strengths": "31;24;94", "wc_weaknesses": "284;37;48", "wc_questions": "97;4;54", "wc_limitations": "1;6;1", "wc_review": "598;244;289", "wc_reply_reviewers": "118;60;34", "wc_reply_authors": "156;105;0", "reply_reviewers": "1;1;1", "reply_authors": "2;2;1", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 150.0, 41.30375285612676 ], "wc_strengths_avg": [ 49.666666666666664, 31.478387647541428 ], "wc_weaknesses_avg": [ 123.0, 113.93272868963801 ], "wc_questions_avg": [ 51.666666666666664, 38.00292386412159 ], "wc_limitations_avg": [ 2.6666666666666665, 2.3570226039551585 ], "wc_review_avg": [ 377.0, 157.34675084030175 ], "wc_reply_reviewers_avg": [ 70.66666666666667, 35.11251755270318 ], "wc_reply_authors_avg": [ 87.0, 64.94613152451807 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:R-JOpEkYm3oJ:scholar.google.com/&scioq=Gradient+Methods+for+Online+DR-Submodular+Maximization+with+Stochastic+Long-Term+Constraints&hl=en&as_sdt=0,10", "gs_version_total": 0, "email": "iastate.edu;purdue.edu;iastate.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Iowa State University;Purdue University", "aff_unique_dep": ";", "aff_unique_url": "https://www.iastate.edu;https://www.purdue.edu", "aff_unique_abbr": "ISU;Purdue", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "SF-V: Single Forward Video Generation Model", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95305", "id": "PVgAeMm3MW", "proceeding": "", "pdf": "https://openreview.net/pdf?id=PVgAeMm3MW", "openreview": "https://openreview.net/forum?id=PVgAeMm3MW", "poster": "/media/PosterPDFs/NeurIPS%202024/95305.png?t=1731396171.6800032", "project": "", "author_site": "Zhixing Zhang, Yanyu Li, Yushu Wu, yanwu xu, Anil Kag, Ivan Skorokhodov, Willi Menapace, Aliaksandr Siarohin, Junli Cao, Dimitris Metaxas, Sergey Tulyakov, Jian Ren", "tldr": "", "abstract": "Diffusion-based video generation models have demonstrated remarkable success in obtaining high-fidelity videos through the iterative denoising process. However, these models require multiple denoising steps during sampling, resulting in high computational costs. In this work, we propose a novel approach to obtain single-step video generation models by leveraging adversarial training to fine-tune pre-trained video diffusion models. We show that, through the adversarial training, the multi-steps video diffusion model, i.e., Stable Video Diffusion (SVD), can be trained to perform single forward pass to synthesize high-quality videos, capturing both temporal and spatial dependencies in the video data. Extensive experiments demonstrate that our method achieves competitive generation quality of synthesized videos with significantly reduced computational overhead for the denoising process (i.e., around $23\\times$ speedup compared with SVD and $6\\times$ speedup compared with existing works, with even better generation quality), paving the way for real-time video synthesis and editing.", "keywords": "Video generation;diffusion model;distillation;adversarial training", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/eeedf4264b5eabf986d2613e7d89947ab184c726.zip", "author": "Zhixing Zhang;Yanyu Li;Yushu Wu;yanwu xu;Anil Kag;Ivan Skorokhodov;Willi Menapace;Aliaksandr Siarohin;Junli Cao;Dimitris N. Metaxas;Sergey Tulyakov;Jian Ren", "authorids": "~Zhixing_Zhang1;~Yanyu_Li1;~Yushu_Wu1;~yanwu_xu4;~Anil_Kag1;~Ivan_Skorokhodov1;~Willi_Menapace1;~Aliaksandr_Siarohin1;~Junli_Cao2;~Dimitris_N._Metaxas1;~Sergey_Tulyakov1;~Jian_Ren2", "gender": "M;;M;M;M;M;M;M;M;M;M;M", "homepage": "https://zhang-zx.github.io/;;https://wuyushuwys.github.io;https://xuyanwu.github.io/;https://anilkagak2.github.io/;https://universome.github.io/;;;;http://www.stulyakov.com/;https://alanspike.github.io/;https://www.cs.rutgers.edu/~dnm/", "dblp": ";194/5818;166/4244;59/9180-3;213/9132;223/0010;271/8571;199/1971;234/8466;40/6115;59/2180-5;m/DNMetaxas", "google_scholar": "RhM5qHoAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;NOEyacoAAAAJ;bZdVsMkAAAAJ;https://scholar.google.com/citations?hl=en;31ha1LgAAAAJ;https://scholar.google.it/citations?user=uMl5-k4AAAAJ;;mgzXR0sAAAAJ;https://scholar.google.co.jp/citations?user=vDALiU4AAAAJ;https://scholar.google.com.tw/citations?user=a7VNhCIAAAAJ", "orcid": ";;0000-0001-9883-7973;;;0000-0002-7611-9310;;;;;;", "linkedin": "zhixing-zhang-174959198/;;;;;ivan-skorokhodov;willi-menapace/;;junli-cao-5165b41a1;sergeytulyakov/;;dimitris-metaxas-1bb74914/", "or_profile": "~Zhixing_Zhang1;~Yanyu_Li1;~Yushu_Wu1;~yanwu_xu4;~Anil_Kag1;~Ivan_Skorokhodov1;~Willi_Menapace1;~Aliaksandr_Siarohin1;~Junli_Cao2;~Sergey_Tulyakov1;~Jian_Ren2;~Dimitris_Metaxas1", "aff": "Snap Inc.;Northeastern University;Northeastern University;Boston University, Boston University;Snap Inc.;Snap Inc.;University of Trento;Snap Inc.;Snap Inc.;Snap Inc.;Snap Inc.;Rutgers University", "aff_domain": "snapchat.com;northeastern.edu;northeastern.edu;bu.edu;snap.com;snap.com;unitn.it;snapchat.com;snapchat.com;snapchat.com;snapchat.com;cs.rutgers.edu", "position": "Intern;PhD student;PhD student;PhD student;Researcher;Researcher;PhD student;Intern;Researcher;Director of Research;Research Scientist;Full Professor", "bibtex": "@inproceedings{\nzhang2024sfv,\ntitle={{SF}-V: Single Forward Video Generation Model},\nauthor={Zhixing Zhang and Yanyu Li and Yushu Wu and yanwu xu and Anil Kag and Ivan Skorokhodov and Willi Menapace and Aliaksandr Siarohin and Junli Cao and Dimitris N. Metaxas and Sergey Tulyakov and Jian Ren},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=PVgAeMm3MW}\n}", "github": "", "reviewers": "R1mD;Q9Lc;FT7s;Do55", "pdf_size": 17252302, "rating": "4;5;5;5", "confidence": "5;5;4;4", "soundness": "3;3;3;3", "novelty": "2;3;2;2", "presentation": "3;3;4;3", "wc_summary": "55;52;112;100", "wc_strengths": "41;56;123;31", "wc_weaknesses": "40;138;245;57", "wc_questions": "45;2;36;21", "wc_limitations": "14;34;25;31", "wc_review": "195;282;541;240", "wc_reply_reviewers": "0;22;98;0", "wc_reply_authors": "189;127;483;128", "reply_reviewers": "0;1;1;0", "reply_authors": "4;3;3;3", "rating_avg": [ 4.75, 0.4330127018922193 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 79.75, 26.61179249881526 ], "wc_strengths_avg": [ 62.75, 35.90525727522364 ], "wc_weaknesses_avg": [ 120.0, 81.11411714368838 ], "wc_questions_avg": [ 26.0, 16.294170736800325 ], "wc_limitations_avg": [ 26.0, 7.648529270389178 ], "wc_review_avg": [ 314.5, 134.3400535953444 ], "wc_reply_reviewers_avg": [ 30.0, 40.27406113120453 ], "wc_reply_authors_avg": [ 231.75, 147.21646477211712 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 3.25, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6221810494912013059&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "snapchat.com;northeastern.edu;northeastern.edu;bu.edu;snap.com;snap.com;unitn.it;snapchat.com;snapchat.com;snapchat.com;snapchat.com;cs.rutgers.edu", "author_num": 12, "aff_unique_index": "0;1;1;2;0;0;3;0;0;0;0;4", "aff_unique_norm": "Snap Inc.;Northeastern University;Boston University;University of Trento;Rutgers University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.snapinc.com;https://www.northeastern.edu;https://www.bu.edu;https://www.unitn.it;https://www.rutgers.edu", "aff_unique_abbr": "Snap;NEU;BU;UniTN;Rutgers", "aff_campus_unique_index": "1", "aff_campus_unique": ";Boston", "aff_country_unique_index": "0;0;0;0;0;0;1;0;0;0;0;0", "aff_country_unique": "United States;Italy" }, { "title": "Hierarchical Visual Feature Aggregation for OCR-Free Document Understanding", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95304", "id": "PWkjxjgGLP", "proceeding": "", "pdf": "https://openreview.net/pdf?id=PWkjxjgGLP", "openreview": "https://openreview.net/forum?id=PWkjxjgGLP", "poster": "/media/PosterPDFs/NeurIPS%202024/95304.png?t=1731666405.6498399", "project": "", "author_site": "JaeYoo Park, Jin Young Choi, Jeonghyung Park, Bohyung Han", "tldr": "", "abstract": "We present a novel OCR-free document understanding framework based on pretrained Multimodal Large Language Models (MLLMs). \nOur approach employs multi-scale visual features to effectively handle various font sizes within document images.\nTo address the increasing costs of considering the multi-scale visual inputs for MLLMs, we propose the Hierarchical Visual Feature Aggregation (HVFA) module, designed to reduce the number of input tokens to LLMs. \nLeveraging a feature pyramid with cross-attentive pooling, our approach effectively manages the trade-off between information loss and efficiency without being affected by varying document image sizes.\nFurthermore, we introduce a novel instruction tuning task, which facilitates the model's text-reading capability by learning to predict the relative positions of input text, eventually minimizing the risk of truncated text caused by the limited capacity of LLMs.\nComprehensive experiments validate the effectiveness of our approach, demonstrating superior performance in various document understanding tasks.", "keywords": "Document Understanding;Multi-modal Learning", "primary_area": "machine_vision", "supplementary_material": "", "author": "Jaeyoo Park;Jin Young Choi;Jeonghyung Park;Bohyung Han", "authorids": "~Jaeyoo_Park1;~Jin_Young_Choi2;~Jeonghyung_Park1;~Bohyung_Han1", "gender": "M;M;F;Not Specified", "homepage": "https://bellos1203.github.io;;http://www.samsung.com;http://cvlab.snu.ac.kr/~bhhan", "dblp": "276/0511;;;73/4880.html", "google_scholar": "https://scholar.google.co.kr/citations?user=TRlk3lsAAAAJ;;;9aaeCToAAAAJ", "orcid": ";;;", "linkedin": ";jin-young-choi-99421219b;;", "or_profile": "~Jaeyoo_Park1;~Jin_Young_Choi2;~Jeonghyung_Park1;~Bohyung_Han1", "aff": "Seoul National University;Seoul National University;Samsung SDS;Seoul National University", "aff_domain": "snu.ac.kr;snu.ac.kr;samsung.com;snu.ac.kr", "position": "Integrated MS/PhD student;Undergrad student;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\npark2024hierarchical,\ntitle={Hierarchical Visual Feature Aggregation for {OCR}-Free Document Understanding},\nauthor={Jaeyoo Park and Jin Young Choi and Jeonghyung Park and Bohyung Han},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=PWkjxjgGLP}\n}", "github": "", "reviewers": "Pfd9;rxzX;CDZQ;cWFY", "pdf_size": 4021621, "rating": "4;5;5;6", "confidence": "5;4;5;4", "soundness": "3;3;3;3", "novelty": "2;2;2;2", "presentation": "3;3;2;4", "wc_summary": "75;55;61;129", "wc_strengths": "80;57;79;88", "wc_weaknesses": "165;66;155;79", "wc_questions": "20;54;4;116", "wc_limitations": "1;28;11;1", "wc_review": "341;260;310;413", "wc_reply_reviewers": "48;13;131;44", "wc_reply_authors": "434;43;732;131", "reply_reviewers": "1;1;1;1", "reply_authors": "5;2;3;3", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 80.0, 29.206163733020468 ], "wc_strengths_avg": [ 76.0, 11.510864433221338 ], "wc_weaknesses_avg": [ 116.25, 44.13261265776138 ], "wc_questions_avg": [ 48.5, 42.9505529650085 ], "wc_limitations_avg": [ 10.25, 11.031205736455105 ], "wc_review_avg": [ 331.0, 55.46620592757359 ], "wc_reply_reviewers_avg": [ 59.0, 43.72070447739835 ], "wc_reply_authors_avg": [ 335.0, 271.24251141736613 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.25, 1.0897247358851685 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.7071067811865476, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8607814272420800457&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "snu.ac.kr;snu.ac.kr;samsung.com;snu.ac.kr", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Seoul National University;Samsung", "aff_unique_dep": ";Samsung SDS", "aff_unique_url": "https://www.snu.ac.kr;https://www.samsungsds.com", "aff_unique_abbr": "SNU;Samsung SDS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Does Video-Text Pretraining Help Open-Vocabulary Online Action Detection?", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95303", "id": "PWzB2V2b6R", "proceeding": "", "pdf": "https://openreview.net/pdf?id=PWzB2V2b6R", "openreview": "https://openreview.net/forum?id=PWzB2V2b6R", "poster": "/media/PosterPDFs/NeurIPS%202024/95303.png?t=1730430156.678926", "project": "", "author_site": "qingsong zhao, Yi Wang, Jilan Xu, Yinan He, Zifan Song, Limin Wang, Yu Qiao, Cairong Zhao", "tldr": "", "abstract": "Video understanding relies on accurate action detection for temporal analysis. However, existing mainstream methods have limitations in real-world applications due to their offline and closed-set evaluation approaches, as well as their dependence on manual annotations. To address these challenges and enable real-time action understanding in open-world scenarios, we propose OV-OAD, a zero-shot online action detector that leverages vision-language models and learns solely from text supervision. By introducing an object-centered decoder unit into a Transformer-based model, we aggregate frames with similar semantics using video-text correspondence. Extensive experiments on four action detection benchmarks demonstrate that OV-OAD outperforms other advanced zero-shot methods. Specifically, it achieves 37.5\\% mean average precision on THUMOS\u201914 and 73.8\\% calibrated average precision on TVSeries. This research establishes a robust baseline for zero-shot transfer in online action detection, enabling scalable solutions for open-world temporal understanding. The code will be available for download at \\url{https://github.com/OpenGVLab/OV-OAD}.", "keywords": "Online action detection;Video understanding;Zero-shot transfer", "primary_area": "machine_vision", "supplementary_material": "", "author": "Qingsong Zhao;Yi Wang;Jilan Xu;Yinan He;Zifan Song;Limin Wang;Yu Qiao;Cairong Zhao", "authorids": "~Qingsong_Zhao1;~Yi_Wang19;~Jilan_Xu1;~Yinan_He1;~Zifan_Song1;~Limin_Wang1;~Yu_Qiao1;~Cairong_Zhao2", "gender": ";M;M;M;M;;;", "homepage": ";https://shepnerd.github.io/;https://jazzcharles.github.io;https://yinanhe.github.io;https://matcaviar.github.io/;;;", "dblp": ";17/221-33;232/2004;93/7763;307/3965;;;", "google_scholar": ";Xm2M8UwAAAAJ;mf2U64IAAAAJ;EgfF_CEAAAAJ;https://scholar.google.com/citations?hl=zh-CN;;;", "orcid": ";;;;0000-0001-8734-9878;;;", "linkedin": ";;;;;;;", "or_profile": "~Qingsong_Zhao1;~Yi_Wang19;~Jilan_Xu1;~Yinan_He1;~Zifan_Song1;~Limin_Wang1;~Yu_Qiao1;~Cairong_Zhao2", "aff": ";Shanghai AI Laboratory;Fudan University;Shanghai Aritifcal Intelligence Laboratory;Tongji University;;;", "aff_domain": ";pjlab.org.cn;fudan.edu.cn;pjlab.org.cn;tongji.edu.cn;;;", "position": ";Researcher;PhD student;Researcher;PhD student;;;", "bibtex": "@inproceedings{\nzhao2024does,\ntitle={Does Video-Text Pretraining Help Open-Vocabulary Online Action Detection?},\nauthor={Qingsong Zhao and Yi Wang and Jilan Xu and Yinan He and Zifan Song and Limin Wang and Yu Qiao and Cairong Zhao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=PWzB2V2b6R}\n}", "github": "", "reviewers": "wz2n;Boy7;zhJf;1BHH", "pdf_size": 1505285, "rating": "6;6;6;6", "confidence": "4;3;5;3", "soundness": "3;3;3;2", "novelty": "3;2;3;3", "presentation": "3;3;3;2", "wc_summary": "156;29;54;142", "wc_strengths": "91;66;67;122", "wc_weaknesses": "135;99;333;318", "wc_questions": "111;5;333;2", "wc_limitations": "1;11;44;1", "wc_review": "494;210;831;585", "wc_reply_reviewers": "39;31;22;214", "wc_reply_authors": "90;343;100;330", "reply_reviewers": "1;1;1;3", "reply_authors": "2;4;2;5", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 95.25, 54.69632071721095 ], "wc_strengths_avg": [ 86.5, 22.808989455914087 ], "wc_weaknesses_avg": [ 221.25, 105.15791696301329 ], "wc_questions_avg": [ 112.75, 134.52578749072612 ], "wc_limitations_avg": [ 14.25, 17.65467360219384 ], "wc_review_avg": [ 530.0, 222.10470503796176 ], "wc_reply_reviewers_avg": [ 76.5, 79.6131270582936 ], "wc_reply_authors_avg": [ 215.75, 120.88915377319836 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 3.25, 1.299038105676658 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:dnkf933g2FoJ:scholar.google.com/&scioq=Does+Video-Text+Pretraining+Help+Open-Vocabulary+Online+Action+Detection%3F&hl=en&as_sdt=0,33", "gs_version_total": 0, "email": ";pjlab.org.cn;fudan.edu.cn;pjlab.org.cn;tongji.edu.cn;;;", "author_num": 8, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Shanghai AI Laboratory;Fudan University;Shanghai Artificial Intelligence Laboratory;Tongji University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.shanghai-ai-lab.com;https://www.fudan.edu.cn;http://www.shanghaiai.cn;https://www.tongji.edu.cn", "aff_unique_abbr": "SAIL;Fudan;;Tongji", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Who\u2019s Gaming the System? A Causally-Motivated Approach for Detecting Strategic Adaptation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95302", "id": "PXGY9Fz8vC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=PXGY9Fz8vC", "openreview": "https://openreview.net/forum?id=PXGY9Fz8vC", "poster": "", "project": "", "author_site": "Trenton Chang, Lindsay Warrenburg, Sae-Hwan Park, Ravi Parikh, Maggie Makar, Jenna Wiens", "tldr": "", "abstract": "In many settings, machine learning models may be used to inform decisions that impact individuals or entities who interact with the model. Such entities, or *agents,* may *game* model decisions by manipulating their inputs to the model to obtain better outcomes and maximize some utility. We consider a multi-agent setting where the goal is to identify the \u201cworst offenders:\u201d agents that are gaming most aggressively. However, identifying such agents is difficult without knowledge of their utility function. Thus, we introduce a framework in which each agent\u2019s tendency to game is parameterized via a scalar. We show that this gaming parameter is only partially identifiable. By recasting the problem as a causal effect estimation problem where different agents represent different \u201ctreatments,\u201d we prove that a ranking of all agents by their gaming parameters is identifiable. We present empirical results in a synthetic data study validating the usage of causal effect estimation for gaming detection and show in a case study of diagnosis coding behavior in the U.S. that our approach highlights features associated with gaming.", "keywords": "causal inference;strategic classification;gaming;healthcare", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "/attachment/9808949f0a22c72089d0081a41a26ad17893ec43.zip", "author": "Trenton Chang;Lindsay Warrenburg;Sae-Hwan Park;Ravi B Parikh;Maggie Makar;Jenna Wiens", "authorids": "~Trenton_Chang1;~Lindsay_Warrenburg1;~Sae-Hwan_Park1;~Ravi_B_Parikh1;~Maggie_Makar1;~Jenna_Wiens1", "gender": ";F;M;M;F;F", "homepage": ";;;https://www.haclab.org/;https://mymakar.github.io/;http://www-personal.umich.edu/~wiensj/", "dblp": ";307/5073.html;343/8719;;211/6995;63/10451", "google_scholar": ";https://scholar.google.com/citations?hl=en;;CEp7XO0AAAAJ;bmlgkM4AAAAJ;fvEfKxkAAAAJ", "orcid": ";0000-0002-3986-4573;0000-0001-5297-5502;;;0000-0002-1057-7722", "linkedin": ";lindsay-a-warrenburg/;saehwan-park/;ravibparikh/;;", "or_profile": "~Trenton_Chang1;~Lindsay_Warrenburg1;~Sae-Hwan_Park1;~Ravi_B_Parikh1;~Maggie_Makar1;~Jenna_Wiens1", "aff": ";University of Pennsylvania;Perelman School of Medicine, University of Pennsylvania;Emory University;University of Michigan - Ann Arbor;University of Michigan Ann Arbor", "aff_domain": ";upenn.edu;pennmedicine.upenn.edu;emory.edu;umich.edu;umich.edu", "position": ";Principal Researcher;Principal Researcher;Associate Professor;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nchang2024whos,\ntitle={Who{\\textquoteright}s Gaming the System? A Causally-Motivated Approach for Detecting Strategic Adaptation},\nauthor={Trenton Chang and Lindsay Warrenburg and Sae-Hwan Park and Ravi B Parikh and Maggie Makar and Jenna Wiens},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=PXGY9Fz8vC}\n}", "github": "", "reviewers": "dPj8;Drhf;P2tW", "pdf_size": 2268055, "rating": "3;6;6", "confidence": "3;2;2", "soundness": "2;3;4", "novelty": "2;3;3", "presentation": "2;3;2", "wc_summary": "74;31;60", "wc_strengths": "8;58;36", "wc_weaknesses": "117;50;44", "wc_questions": "1;45;24", "wc_limitations": "1097;7;1", "wc_review": "1297;191;165", "wc_reply_reviewers": "0;0;41", "wc_reply_authors": "914;0;0", "reply_reviewers": "0;0;1", "reply_authors": "3;1;1", "rating_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 2.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 55.0, 17.90716802475106 ], "wc_strengths_avg": [ 34.0, 20.46134567096374 ], "wc_weaknesses_avg": [ 70.33333333333333, 33.089105289942324 ], "wc_questions_avg": [ 23.333333333333332, 17.96910929592474 ], "wc_limitations_avg": [ 368.3333333333333, 515.2509636629082 ], "wc_review_avg": [ 551.0, 527.6084406704149 ], "wc_reply_reviewers_avg": [ 13.666666666666666, 19.3275853524323 ], "wc_reply_authors_avg": [ 304.6666666666667, 430.86373200300295 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9787481065636302140&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": ";upenn.edu;pennmedicine.upenn.edu;emory.edu;umich.edu;umich.edu", "author_num": 6, "aff_unique_index": "0;0;1;2;2", "aff_unique_norm": "University of Pennsylvania;Emory University;University of Michigan", "aff_unique_dep": ";;", "aff_unique_url": "https://www.upenn.edu;https://www.emory.edu;https://www.umich.edu", "aff_unique_abbr": "UPenn;Emory;UM", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Ann Arbor", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Continual Audio-Visual Sound Separation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95301", "id": "PZCiWtQjAw", "proceeding": "", "pdf": "https://openreview.net/pdf?id=PZCiWtQjAw", "openreview": "https://openreview.net/forum?id=PZCiWtQjAw", "poster": "/media/PosterPDFs/NeurIPS%202024/95301.png?t=1731533948.2109025", "project": "", "author_site": "Weiguo Pian, Yiyang Nan, Shijian Deng, Shentong Mo, Yunhui Guo, Yapeng Tian", "tldr": "", "abstract": "In this paper, we introduce a novel continual audio-visual sound separation task, aiming to continuously separate sound sources for new classes while preserving performance on previously learned classes, with the aid of visual guidance. This problem is crucial for practical visually guided auditory perception as it can significantly enhance the adaptability and robustness of audio-visual sound separation models, making them more applicable for real-world scenarios where encountering new sound sources is commonplace. The task is inherently challenging as our models must not only effectively utilize information from both modalities in current tasks but also preserve their cross-modal association in old tasks to mitigate catastrophic forgetting during audio-visual continual learning. To address these challenges, we propose a novel approach named ContAV-Sep ($\\textbf{Cont}$inual $\\textbf{A}$udio-$\\textbf{V}$isual Sound $\\textbf{Sep}$aration). ContAV-Sep presents a novel Cross-modal Similarity Distillation Constraint (CrossSDC) to uphold the cross-modal semantic similarity through incremental tasks and retain previously acquired knowledge of semantic similarity in old models, mitigating the risk of catastrophic forgetting. The CrossSDC can seamlessly integrate into the training process of different audio-visual sound separation frameworks. Experiments demonstrate that ContAV-Sep can effectively mitigate catastrophic forgetting and achieve significantly better performance compared to other continual learning baselines for audio-visual sound separation. Code is available at: https://github.com/weiguoPian/ContAV-Sep_NeurIPS2024.", "keywords": "Audio-Visual Learning;Sound Separation;Continual Learning", "primary_area": "speech_and_audio", "supplementary_material": "/attachment/7652c89589d0e2576e34ee01754f6022f9e5e74d.zip", "author": "Weiguo Pian;Yiyang Nan;Shijian Deng;Shentong Mo;Yunhui Guo;Yapeng Tian", "authorids": "~Weiguo_Pian1;~Yiyang_Nan1;~Shijian_Deng1;~Shentong_Mo1;~Yunhui_Guo2;~Yapeng_Tian1", "gender": ";M;;;M;M", "homepage": ";https://nanyyyyyy.github.io;;;https://yunhuiguo.github.io/;http://www.yapengtian.com/", "dblp": ";371/4841.html;;;165/3105;176/4020", "google_scholar": ";1ZR7r-oAAAAJ;;;BxIXuZYAAAAJ;lxCqdpoAAAAJ", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Weiguo_Pian1;~Yiyang_Nan1;~Shijian_Deng1;~Shentong_Mo1;~Yunhui_Guo2;~Yapeng_Tian1", "aff": ";Brown University;;;University of Texas at Dallas;University of Texas at Dallas", "aff_domain": ";brown.edu;;;utdallas.edu;utdallas.edu", "position": ";MS student;;;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\npian2024continual,\ntitle={Continual Audio-Visual Sound Separation},\nauthor={Weiguo Pian and Yiyang Nan and Shijian Deng and Shentong Mo and Yunhui Guo and Yapeng Tian},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=PZCiWtQjAw}\n}", "github": "", "reviewers": "NCtq;tY1x;5v9b;wmVP;49mS", "pdf_size": 1729918, "rating": "4;5;5;6;7", "confidence": "4;4;3;4;4", "soundness": "2;2;3;3;3", "novelty": "2;1;3;3;3", "presentation": "2;3;3;3;4", "wc_summary": "15;27;55;181;160", "wc_strengths": "19;40;34;120;74", "wc_weaknesses": "131;119;34;302;212", "wc_questions": "5;96;2;63;283", "wc_limitations": "6;53;2;28;12", "wc_review": "176;335;127;694;741", "wc_reply_reviewers": "351;258;0;55;37", "wc_reply_authors": "1157;755;69;298;195", "reply_reviewers": "2;3;0;1;1", "reply_authors": "5;4;2;4;3", "rating_avg": [ 5.4, 1.0198039027185568 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.8 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 87.6, 69.24045060511955 ], "wc_strengths_avg": [ 57.4, 36.10872470747202 ], "wc_weaknesses_avg": [ 159.6, 90.84624373082247 ], "wc_questions_avg": [ 89.8, 102.93959393741555 ], "wc_limitations_avg": [ 20.2, 18.637596411554792 ], "wc_review_avg": [ 414.6, 257.13078384355305 ], "wc_reply_reviewers_avg": [ 140.2, 138.47656841502103 ], "wc_reply_authors_avg": [ 494.8, 404.0120790273479 ], "reply_reviewers_avg": [ 1.4, 1.019803902718557 ], "reply_authors_avg": [ 3.6, 1.019803902718557 ], "replies_avg": [ 34, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.1961161351381841, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17184795053715716774&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": ";brown.edu;;;utdallas.edu;utdallas.edu", "author_num": 6, "aff_unique_index": "0;1;1", "aff_unique_norm": "Brown University;University of Texas at Dallas", "aff_unique_dep": ";", "aff_unique_url": "https://www.brown.edu;https://www.utdallas.edu", "aff_unique_abbr": "Brown;UT Dallas", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Dallas", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "TabularBench: Benchmarking Adversarial Robustness for Tabular Deep Learning in Real-world Use-cases", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97710", "id": "PZbFW8ZrSJ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=PZbFW8ZrSJ", "openreview": "https://openreview.net/forum?id=PZbFW8ZrSJ", "poster": "", "project": "", "author_site": "Thibault Simonetto, Salah GHAMIZI, Maxime Cordy", "tldr": "", "abstract": "While adversarial robustness in computer vision is a mature research field, fewer researchers have tackled the evasion attacks against tabular deep learning, and even fewer investigated robustification mechanisms and reliable defenses. We hypothesize that this lag in the research on tabular adversarial attacks is in part due to the lack of standardized benchmarks. To fill this gap, we propose TabularBench, the first comprehensive benchmark of robustness of tabular deep learning classification models. We evaluated adversarial robustness with CAA, an ensemble of gradient and search attacks which was recently demonstrated as the most effective attack against a tabular model. In addition to our open benchmark https://github.com/serval-uni-lu/tabularbench where we welcome submissions of new models and defenses, we implement 7 robustification mechanisms inspired by state-of-the-art defenses in computer vision and propose the largest benchmark of robust tabular deep learning over 200 models across five critical scenarios in finance, healthcare and security. We curated real datasets for each use case, augmented with hundreds of thousands of realistic synthetic inputs, and trained and assessed our models with and without data augmentations. We open-source our library that provides API access to all our pre-trained robust tabular models, and the largest datasets of real and synthetic tabular inputs. Finally, we analyze the impact of various defenses on the robustness and provide actionable insights to design new defenses and robustification mechanisms.", "keywords": "benchmark;machine learning;security;adversarial attacks;tabular data;constrained machine learning", "primary_area": "", "supplementary_material": "/attachment/51157e6265f1880f03dba8f5638b4e9868f04d12.zip", "author": "Thibault Simonetto;Salah GHAMIZI;Maxime Cordy", "authorids": "~Thibault_Simonetto1;~Salah_GHAMIZI1;~Maxime_Cordy1", "gender": "M;M;M", "homepage": "https://wwwen.uni.lu/snt/people/thibault_jean_angel_simonetto;https://www.sghamizi.com;https://maxcordy.github.io/", "dblp": "308/0948;165/4263;73/10839.html", "google_scholar": "4RhGnOoAAAAJ;UcvKgR0AAAAJ;sRXHjkIAAAAJ", "orcid": "0000-0001-5336-3751;0000-0002-0738-8250;0000-0001-8312-1358", "linkedin": ";;", "or_profile": "~Thibault_Simonetto1;~Salah_GHAMIZI1;~Maxime_Cordy1", "aff": "University Of Luxembourg;Luxembourg Institute of Science and Technology ;University of Luxemburg", "aff_domain": "uni.lu;list.lu;uni.lu", "position": "PhD student;Postdoc;Researcher", "bibtex": "@inproceedings{\nsimonetto2024tabularbench,\ntitle={TabularBench: Benchmarking Adversarial Robustness for Tabular Deep Learning in Real-world Use-cases},\nauthor={Thibault Simonetto and Salah GHAMIZI and Maxime Cordy},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=PZbFW8ZrSJ}\n}", "github": "", "reviewers": "cirj;EokF;ZSGk", "pdf_size": 850666, "rating": "5;6;7", "confidence": "3;4;3", "wc_summary_and_contributions": "66;48;100", "wc_strengths": "82;35;85", "wc_improvement": "89;1;126", "wc_limitations": "248;4;38", "wc_correctness": "68;10;19", "wc_clarity": "78;20;38", "wc_relation_to_prior_work": "150;20;11", "wc_documentation": "160;7;63", "wc_additional_feedback": "1;1;1", "wc_review": "942;146;481", "wc_reply_reviewers": "0;139;522", "wc_reply_authors": "0;340;659", "reply_reviewers": "0;2;2", "reply_authors": "0;3;4", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 71.33333333333333, 21.561282171728305 ], "wc_strengths_avg": [ 67.33333333333333, 22.89589968143253 ], "wc_improvement_avg": [ 72.0, 52.42772803266099 ], "wc_limitations_avg": [ 96.66666666666667, 107.90530828256576 ], "wc_correctness_avg": [ 32.333333333333336, 25.48637980482037 ], "wc_clarity_avg": [ 45.333333333333336, 24.239545283597124 ], "wc_relation_to_prior_work_avg": [ 60.333333333333336, 63.51027913303133 ], "wc_documentation_avg": [ 76.66666666666667, 63.20513340614739 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 523.0, 326.31988395846594 ], "wc_reply_reviewers_avg": [ 220.33333333333334, 220.72959827706737 ], "wc_reply_authors_avg": [ 333.0, 269.0811525667799 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.9428090415820634 ], "reply_authors_avg": [ 2.3333333333333335, 1.699673171197595 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1930239663325516333&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "uni.lu;list.lu;uni.lu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Luxembourg;Luxembourg Institute of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://wwwen.uniluxembourg.lu;https://www.list.lu", "aff_unique_abbr": "UniLu;LIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Luxembourg" }, { "title": "Hollowed Net for On-Device Personalization of Text-to-Image Diffusion Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95300", "id": "Pa8jsrdOnU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Pa8jsrdOnU", "openreview": "https://openreview.net/forum?id=Pa8jsrdOnU", "poster": "/media/PosterPDFs/NeurIPS%202024/95300.png?t=1731372973.7360709", "project": "", "author_site": "Wonguk Cho, Seokeon Choi, Debasmit Das, Matthias Reisser, Taesup Kim, Sungrack Yun, Fatih Porikli", "tldr": "", "abstract": "Recent advancements in text-to-image diffusion models have enabled the personalization of these models to generate custom images from textual prompts. This paper presents an efficient LoRA-based personalization approach for on-device subject-driven generation, where pre-trained diffusion models are fine-tuned with user-specific data on resource-constrained devices. Our method, termed Hollowed Net, enhances memory efficiency during fine-tuning by modifying the architecture of a diffusion U-Net to temporarily remove a fraction of its deep layers, creating a hollowed structure. This approach directly addresses on-device memory constraints and substantially reduces GPU memory requirements for training, in contrast to previous methods that primarily focus on minimizing training steps and reducing the number of parameters to update. Additionally, the personalized Hollowed Net can be transferred back into the original U-Net, enabling inference without additional memory overhead. Quantitative and qualitative analyses demonstrate that our approach not only reduces training memory to levels as low as those required for inference but also maintains or improves personalization performance compared to existing methods.", "keywords": "Hollowed Net;On-Device Learning;Memory-Efficient Fine-Tuning;LoRA Personalization;Text-to-Image Diffusion Models", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Wonguk Cho;Seokeon Choi;Debasmit Das;Matthias Reisser;Taesup Kim;Sungrack Yun;Fatih Porikli", "authorids": "~Wonguk_Cho1;~Seokeon_Choi1;~Debasmit_Das2;~Matthias_Reisser1;~Taesup_Kim1;~Sungrack_Yun1;~Fatih_Porikli2", "gender": "M;M;M;M;M;M;M", "homepage": ";https://sites.google.com/site/seokeonchoi/;https://sites.google.com/site/debasmitbunadas/;http://matthiasreisser.github.io/;;;https://www.porikli.com", "dblp": "319/3781;214/2200;155/4271;228/6851;;67/8053;p/FatihMuratPorikli", "google_scholar": "Jijzx8sAAAAJ;https://scholar.google.co.kr/citations?user=wydV__gAAAAJ;0tP8MuMAAAAJ;;7V7yNeoAAAAJ;;https://scholar.google.com.tw/citations?user=VpB8NZ8AAAAJ", "orcid": ";0000-0002-1695-5894;;;;;0000-0002-1520-4466", "linkedin": "wonguk-cho/;seokeon/;;;;;fatih-porikli-a95643/", "or_profile": "~Wonguk_Cho1;~Seokeon_Choi1;~Debasmit_Das2;~Matthias_Reisser1;~Taesup_Kim1;~Sungrack_Yun1;~Fatih_Porikli2", "aff": "Qualcomm Inc, QualComm;Qualcomm Inc, QualComm;Qualcomm Inc.;Qualcomm Inc, QualComm;Seoul National University;Qualcomm;QualComm", "aff_domain": "qti.qualcomm.com;qti.qualcomm.com;qti.qualcomm.com;qti.qualcomm.com;snu.ac.kr;qualcomm.com;qualcomm.com", "position": "Research Intern;Researcher;Researcher;Senior Engineer;Assistant Professor;Researcher;Senior Director", "bibtex": "@inproceedings{\ncho2024hollowed,\ntitle={Hollowed Net for On-Device Personalization of Text-to-Image Diffusion Models},\nauthor={Wonguk Cho and Seokeon Choi and Debasmit Das and Matthias Reisser and Taesup Kim and Sungrack Yun and Fatih Porikli},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Pa8jsrdOnU}\n}", "github": "", "reviewers": "Jp6t;cT56;SH8P;fGHD", "pdf_size": 13496149, "rating": "3;5;6;6", "confidence": "4;5;5;5", "soundness": "2;4;3;2", "novelty": "2;3;3;2", "presentation": "3;4;3;3", "wc_summary": "29;63;90;17", "wc_strengths": "21;49;58;20", "wc_weaknesses": "134;118;278;288", "wc_questions": "25;10;125;31", "wc_limitations": "41;15;37;23", "wc_review": "250;255;588;379", "wc_reply_reviewers": "259;0;144;156", "wc_reply_authors": "1015;492;479;530", "reply_reviewers": "1;0;2;2", "reply_authors": "3;2;3;3", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 49.75, 28.71737279069936 ], "wc_strengths_avg": [ 37.0, 16.80773631397161 ], "wc_weaknesses_avg": [ 204.5, 78.78292962311062 ], "wc_questions_avg": [ 47.75, 45.25138119439008 ], "wc_limitations_avg": [ 29.0, 10.488088481701515 ], "wc_review_avg": [ 368.0, 137.12585460080095 ], "wc_reply_reviewers_avg": [ 139.75, 92.2398368385374 ], "wc_reply_authors_avg": [ 629.0, 223.64368982826232 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.9428090415820632, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:pV6w1j5ivmoJ:scholar.google.com/&scioq=Hollowed+Net+for+On-Device+Personalization+of+Text-to-Image+Diffusion+Models&hl=en&as_sdt=0,5", "gs_version_total": 5, "email": "qti.qualcomm.com;qti.qualcomm.com;qti.qualcomm.com;qti.qualcomm.com;snu.ac.kr;qualcomm.com;qualcomm.com", "author_num": 7, "aff_unique_index": "0;0;0;0;1;0;0", "aff_unique_norm": "Qualcomm Incorporated;Seoul National University", "aff_unique_dep": ";", "aff_unique_url": "https://www.qualcomm.com;https://www.snu.ac.kr", "aff_unique_abbr": "Qualcomm;SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;0;0", "aff_country_unique": "United States;South Korea" }, { "title": "KnowGPT: Knowledge Graph based Prompting for Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95299", "id": "PacBluO5m7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=PacBluO5m7", "openreview": "https://openreview.net/forum?id=PacBluO5m7", "poster": "/media/PosterPDFs/NeurIPS%202024/95299.png?t=1731738615.124185", "project": "", "author_site": "Qinggang Zhang, Junnan Dong, Hao Chen, Daochen Zha, Zailiang Yu, Xiao Huang", "tldr": "", "abstract": "Large Language Models (LLMs) have demonstrated remarkable capabilities in many real-world applications. Nonetheless, LLMs are often criticized for their tendency to produce hallucinations, wherein the models fabricate incorrect statements on tasks beyond their knowledge and perception. To alleviate this issue, graph retrieval-augmented generation (GraphRAG) has been extensively explored which leverages the factual knowledge in knowledge graphs (KGs) to ground the LLM's responses in established facts and principles. However, most state-of-the-art LLMs are closed-source, making it challenging to develop a prompting framework that can efficiently and effectively integrate KGs into LLMs with hard prompts only. Generally, existing KG-enhanced LLMs usually suffer from three critical issues, including huge search space, high API costs, and laborious prompt engineering, that impede their widespread application in practice. To this end, we introduce a novel **Know**ledge **Gr**aph based **P**romp**T**ing framework, namely **KnowGPT**, to enhance LLMs with domain knowledge. KnowGPT contains a knowledge extraction module to extract the most informative knowledge from KGs, and a context-aware prompt construction module to automatically convert extracted knowledge into effective prompts. Experiments on three benchmarks demonstrate that KnowGPT significantly outperforms all competitors. Notably, KnowGPT achieves a 92.6% accuracy on OpenbookQA leaderboard, comparable to human-level performance.", "keywords": "Large language models;knowledge graphs;reasoning", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Qinggang Zhang;Junnan Dong;Hao Chen;Daochen Zha;Zailiang Yu;Xiao Huang", "authorids": "~Qinggang_Zhang2;~Junnan_Dong1;~Hao_Chen18;~Daochen_Zha1;~Zailiang_Yu1;~Xiao_Huang1", "gender": "M;M;M;;M;M", "homepage": "https://qing145.github.io/;https://junnandong.github.io;;http://dczha.com/;;https://www4.comp.polyu.edu.hk/~xiaohuang/", "dblp": "17/8559;322/5787;175/3324-62;167/0903;;25/692-1.html", "google_scholar": "eF8PATI7r3IC;https://scholar.google.com.hk/citations?user=m7_ceMcAAAAJ;https://scholar.google.com/citations?view_op=list_works;jK0NgMcAAAAJ;;Be21PkYAAAAJ", "orcid": "0000-0002-1536-6529;0000-0003-2117-6083;0000-0001-6816-5344;0000-0002-6677-7504;;0000-0002-3867-900X", "linkedin": ";;hao-chen-phd-37136b1b5/;daochen-zha;zailiang;", "or_profile": "~Qinggang_Zhang2;~Junnan_Dong1;~Hao_Chen18;~Daochen_Zha1;~Zailiang_Yu1;~Xiao_Huang1", "aff": "The Hong Kong Polytechnic University, Hong Kong Polytechnic University;The Hong Kong Polytechnic University;Hong Kong Polytechnic University;Airbnb;;The Hong Kong Polytechnic University", "aff_domain": "comp.polyu.edu.hk;connect.polyu.hk;polyu.edu.hk;airbnb.com;;polyu.edu.hk", "position": "PhD student;PhD student;Postdoc;Researcher;;Assistant Professor", "bibtex": "@inproceedings{\nzhang2024knowgpt,\ntitle={Know{GPT}: Knowledge Graph based Prompting for Large Language Models},\nauthor={Qinggang Zhang and Junnan Dong and Hao Chen and Daochen Zha and Zailiang Yu and Xiao Huang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=PacBluO5m7}\n}", "github": "", "reviewers": "t5Eo;vKLu;4L5K", "pdf_size": 990852, "rating": "5;6;6", "confidence": "4;4;4", "soundness": "2;3;3", "novelty": "2;3;3", "presentation": "2;3;3", "wc_summary": "87;42;138", "wc_strengths": "68;33;55", "wc_weaknesses": "280;22;26", "wc_questions": "263;120;36", "wc_limitations": "1;11;10", "wc_review": "699;228;265", "wc_reply_reviewers": "24;28;53", "wc_reply_authors": "2414;52;198", "reply_reviewers": "1;1;1", "reply_authors": "6;2;2", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 89.0, 39.21734310225516 ], "wc_strengths_avg": [ 52.0, 14.445299120013633 ], "wc_weaknesses_avg": [ 109.33333333333333, 120.69060536024428 ], "wc_questions_avg": [ 139.66666666666666, 93.70995441016687 ], "wc_limitations_avg": [ 7.333333333333333, 4.4969125210773475 ], "wc_review_avg": [ 397.3333333333333, 213.84469961997084 ], "wc_reply_reviewers_avg": [ 35.0, 12.832251036613439 ], "wc_reply_authors_avg": [ 888.0, 1080.6899031020262 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.3333333333333335, 1.8856180831641267 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11908731020916198647&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "comp.polyu.edu.hk;connect.polyu.hk;polyu.edu.hk;airbnb.com;;polyu.edu.hk", "author_num": 6, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Hong Kong Polytechnic University;Airbnb", "aff_unique_dep": ";", "aff_unique_url": "https://www.polyu.edu.hk;https://www.airbnb.com", "aff_unique_abbr": "PolyU;Airbnb", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "China;United States" }, { "title": "Continuous Contrastive Learning for Long-Tailed Semi-Supervised Recognition", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95298", "id": "PaqJ71zf1M", "proceeding": "", "pdf": "https://openreview.net/pdf?id=PaqJ71zf1M", "openreview": "https://openreview.net/forum?id=PaqJ71zf1M", "poster": "/media/PosterPDFs/NeurIPS%202024/95298.png?t=1731406347.9159007", "project": "", "author_site": "Zi-Hao Zhou, Siyuan Fang, Zi-Jing Zhou, Tong Wei, Yuanyu Wan, Min-Ling Zhang", "tldr": "", "abstract": "Long-tailed semi-supervised learning poses a significant challenge in training models with limited labeled data exhibiting a long-tailed label distribution. Current state-of-the-art LTSSL approaches heavily rely on high-quality pseudo-labels for large-scale unlabeled data. However, these methods often neglect the impact of representations learned by the neural network and struggle with real-world unlabeled data, which typically follows a different distribution than labeled data. This paper introduces a novel probabilistic framework that unifies various recent proposals in long-tail learning. Our framework derives the class-balanced contrastive loss through Gaussian kernel density estimation. We introduce a continuous contrastive learning method, CCL, extending our framework to unlabeled data using *reliable* and *smoothed* pseudo-labels. By progressively estimating the underlying label distribution and optimizing its alignment with model predictions, we tackle the diverse distribution of unlabeled data in real-world scenarios. Extensive experiments across multiple datasets with varying unlabeled data distributions demonstrate that CCL consistently outperforms prior state-of-the-art methods, achieving over 4% improvement on the ImageNet-127 dataset. The supplementary material includes the source code for reproducibility.", "keywords": "Semi-supervised learning;Long-tail learning;Weakly-supervised learning", "primary_area": "machine_vision", "supplementary_material": "/attachment/d8cff249d82c9b7fd1f88e0443aab06a7c9201e9.zip", "author": "Zi-Hao Zhou;Siyuan Fang;Zi-Jing Zhou;Tong Wei;Yuanyu Wan;Min-Ling Zhang", "authorids": "~Zi-Hao_Zhou1;~Siyuan_Fang2;~Zi-Jing_Zhou1;~Tong_Wei1;~Yuanyu_Wan1;~Min-Ling_Zhang2", "gender": "M;M;M;M;M;", "homepage": "https://github.com/F-JISOO;https://github.com/zzjzzj1;https://palm.seu.edu.cn/weit/;https://yuanyuwan.github.io/;http://palm.seu.edu.cn/zhangml/;https://github.com/zhouzihao11", "dblp": ";;49/933-1;221/3499;84/271.html;", "google_scholar": ";;EFCZuW4AAAAJ;CEymMc8AAAAJ;uFHCIM0AAAAJ;", "orcid": ";;0000-0002-2766-8209;;0000-0003-1880-5918;", "linkedin": ";;;;;", "or_profile": "~Siyuan_Fang2;~Zi-Jing_Zhou1;~Tong_Wei1;~Yuanyu_Wan1;~Min-Ling_Zhang2;~Zhou_Zihao1", "aff": "Southeast University;Xiaomi Corporation;Southeast University;Zhejiang University;Southeast University;Southeast University", "aff_domain": "seu.edu.cn;xiaomi.com;seu.edu.cn;zju.edu.cn;seu.edu.cn;seu.edu.cn", "position": "MS student;Researcher;Associate Professor;Researcher;Full Professor;MS student", "bibtex": "@inproceedings{\nzhou2024continuous,\ntitle={Continuous Contrastive Learning for Long-Tailed Semi-Supervised Recognition},\nauthor={Zi-Hao Zhou and Siyuan Fang and Zi-Jing Zhou and Tong Wei and Yuanyu Wan and Min-Ling Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=PaqJ71zf1M}\n}", "github": "", "reviewers": "WTLw;JsM2;biXa;1xLE", "pdf_size": 3118515, "rating": "6;7;7;7", "confidence": "2;5;5;4", "soundness": "3;3;3;4", "novelty": "3;3;3;3", "presentation": "2;3;3;3", "wc_summary": "48;33;46;53", "wc_strengths": "51;108;103;64", "wc_weaknesses": "98;18;55;27", "wc_questions": "2;37;3;24", "wc_limitations": "1;15;15;8", "wc_review": "200;211;222;176", "wc_reply_reviewers": "0;12;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 1.224744871391589 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 45.0, 7.3824115301167 ], "wc_strengths_avg": [ 81.5, 24.5 ], "wc_weaknesses_avg": [ 49.5, 31.148836254345042 ], "wc_questions_avg": [ 16.5, 14.739402972983676 ], "wc_limitations_avg": [ 9.75, 5.80409338312195 ], "wc_review_avg": [ 202.25, 17.03489066592445 ], "wc_reply_reviewers_avg": [ 3.0, 5.196152422706632 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.9428090415820632, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10373366645816180065&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "seu.edu.cn;xiaomi.com;seu.edu.cn;zju.edu.cn;seu.edu.cn;seu.edu.cn", "author_num": 6, "aff_unique_index": "0;1;0;2;0;0", "aff_unique_norm": "Southeast University;Xiaomi Corporation;Zhejiang University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.seu.edu.cn/;https://www.xiaomi.com;https://www.zju.edu.cn", "aff_unique_abbr": "SEU;Xiaomi;ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Elo Uncovered: Robustness and Best Practices in Language Model Evaluation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95297", "id": "Pc9LLjTL5f", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Pc9LLjTL5f", "openreview": "https://openreview.net/forum?id=Pc9LLjTL5f", "poster": "/media/PosterPDFs/NeurIPS%202024/95297.png?t=1731741018.9616206", "project": "", "author_site": "Meriem Boubdir, Edward Kim, Beyza Ermis, Sara Hooker, Marzieh Fadaee", "tldr": "", "abstract": "In Natural Language Processing (NLP), the Elo rating system, originally designed for ranking players in dynamic games such as chess, is increasingly being used to evaluate Large Language Models (LLMs) through \"A vs B\" paired comparisons.\nHowever, while popular, the system's suitability for assessing entities with constant skill levels, such as LLMs, remains relatively unexplored. \nWe study two fundamental axioms that evaluation methods should adhere to: reliability and transitivity. \nWe conduct an extensive evaluation of Elo behavior across simulated and real-world scenarios, demonstrating that individual Elo computations can exhibit significant volatility.\nWe show that both axioms are not always satisfied, raising questions about the reliability of current comparative evaluations of LLMs.\nIf the current use of Elo scores is intended to substitute the costly head-to-head comparison of LLMs, it is crucial to ensure the ranking is as robust as possible.\nGuided by the axioms, our findings offer concrete guidelines for enhancing the reliability of LLM evaluation methods, suggesting a need for reassessment of existing comparative approaches.", "keywords": "Elo Rating System;Language Model Evaluation;Reliability;Robustness;Reproducibility;LLMs Ranking", "primary_area": "evaluation", "supplementary_material": "", "author": "Meriem Boubdir;Edward Kim;Beyza Ermis;Sara Hooker;Marzieh Fadaee", "authorids": "~Meriem_Boubdir1;~Edward_Kim7;~Beyza_Ermis1;~Sara_Hooker2;~Marzieh_Fadaee2", "gender": "F;M;F;F;", "homepage": ";;https://www.cmpe.boun.edu.tr/people/beyza.ermi%C5%9F;http://marziehf.github.io/;https://www.sarahooker.me/", "dblp": "359/3915.html;06/445-4;117/9290;159/4868;210/2611", "google_scholar": "vC2BaXIAAAAJ;FSj_J7MAAAAJ;v2cMiCAAAAAJ;https://scholar.google.nl/citations?user=NZqs0toAAAAJ;2xy6h3sAAAAJ", "orcid": ";;;;", "linkedin": "mboubdir/;;;marzieh-fadaee-b7393370/;", "or_profile": "~Meriem_Boubdir1;~Edward_Kim7;~Beyza_Ermis1;~Marzieh_Fadaee2;~Sara_Hooker1", "aff": "MATS Program;Cohere;Cohere AI;Cohere For AI;Cohere For AI", "aff_domain": "matsprogram.org;cohere.com;cohere.com;cohere.com;cohere.com", "position": "Researcher;Researcher;Researcher;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nboubdir2024elo,\ntitle={Elo Uncovered: Robustness and Best Practices in Language Model Evaluation},\nauthor={Meriem Boubdir and Edward Kim and Beyza Ermis and Sara Hooker and Marzieh Fadaee},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Pc9LLjTL5f}\n}", "github": "", "reviewers": "9SNU;NZSD;hqxB;jqvp", "pdf_size": 7522599, "rating": "4;5;7;7", "confidence": "4;2;5;3", "soundness": "2;3;3;3", "novelty": "3;3;3;3", "presentation": "1;3;2;4", "wc_summary": "420;167;79;167", "wc_strengths": "45;65;81;78", "wc_weaknesses": "486;24;69;117", "wc_questions": "29;51;7;45", "wc_limitations": "1;11;20;22", "wc_review": "981;318;256;429", "wc_reply_reviewers": "102;11;14;23", "wc_reply_authors": "301;90;89;183", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;2;3", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 208.25, 127.42326122023404 ], "wc_strengths_avg": [ 67.25, 14.184057952504283 ], "wc_weaknesses_avg": [ 174.0, 183.11062230247595 ], "wc_questions_avg": [ 33.0, 17.029386365926403 ], "wc_limitations_avg": [ 13.5, 8.32165848854662 ], "wc_review_avg": [ 496.0, 286.79173628262026 ], "wc_reply_reviewers_avg": [ 37.5, 37.5 ], "wc_reply_authors_avg": [ 165.75, 86.91770533096235 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.2581988897471611, "gs_citation": 40, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3764315819503624665&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "matsprogram.org;cohere.com;cohere.com;cohere.com;cohere.com", "author_num": 5, "aff_unique_index": "0;1;2;1;1", "aff_unique_norm": "MATS Program;Cohere;Cohere AI", "aff_unique_dep": ";;", "aff_unique_url": ";https://cohere.ai;https://cohere.ai", "aff_unique_abbr": ";;Cohere AI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1;2;1;1", "aff_country_unique": ";United States;Canada" }, { "title": "DreamCatcher: A Wearer-aware Multi-modal Sleep Event Dataset Based on Earables in Non-restrictive Environments", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97709", "id": "PcbSZwVVc5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=PcbSZwVVc5", "openreview": "https://openreview.net/forum?id=PcbSZwVVc5", "poster": "", "project": "", "author_site": "Zeyu Wang, Xiyuxing Zhang, Ruotong Yu, Yuntao Wang, Kenneth Christofferson, Jingru Zhang, Alex Mariakakis, Yuanchun Shi", "tldr": "", "abstract": "Poor quality sleep can be characterized by the occurrence of events ranging from body movement to breathing impairment. Widely available earbuds equipped with sensors (also known as earables) can be combined with a sleep event detection algorithm to offer a convenient alternative to laborious clinical tests for individuals suffering from sleep disorders. Although various solutions utilizing such devices have been proposed to detect sleep events, they ignore the fact that individuals often share sleeping spaces with roommates or couples. To address this issue, we introduce DreamCatcher, the first publicly available dataset for wearer-aware sleep event algorithm development on earables. DreamCatcher encompasses eight distinct sleep events, including synchronous dual-channel audio and motion data collected from 12 pairs (24 participants) totaling 210 hours (420 hour.person) with fine-grained label. We tested multiple benchmark models on three tasks related to sleep event detection, demonstrating the usability and unique challenge of DreamCatcher. We hope that the proposed DreamCatcher can inspire other researchers to further explore efficient wearer-aware human vocal activity sensing on earables. DreamCatcher is publicly available at https://github.com/thuhci/DreamCatcher.", "keywords": "sleep monitoring\uff0c multi-modal sensing\uff0c sound event detection\uff0c wearer aware\uff0c open environment", "primary_area": "", "supplementary_material": "/attachment/0deb762a4f16c942dc8f50a7c4e8c844bdc2950d.zip", "author": "Zeyu Wang;Xiyuxing Zhang;Ruotong Yu;Yuntao Wang;Kenneth Christofferson;Jingru Zhang;Alex Mariakakis;Yuanchun Shi", "authorids": "~Zeyu_Wang12;~Xiyuxing_Zhang1;~Ruotong_Yu1;~Yuntao_Wang1;~Kenneth_Christofferson1;~Jingru_Zhang2;~Alex_Mariakakis1;~Yuanchun_Shi1", "gender": "F;Not Specified;M;M;F;M;F;F", "homepage": ";;https://pi.cs.tsinghua.edu.cn/lab/people/YuntaoWang/;;;http://mariakakis.github.io/;http://media.cs.tsinghua.edu.cn/en/shiyc;", "dblp": ";;52/4107-1.html;;;116/0750.html;08/5313;", "google_scholar": ";https://scholar.google.co.jp/citations?user=oZfHbF8AAAAJ;kHpwoAUAAAAJ;;;YeGQtewAAAAJ;;", "orcid": "0009-0007-5048-1665;0009-0002-9337-2278;0000-0002-4249-8893;;0009-0000-4336-0151;0000-0002-9986-3345;;0009-0006-3691-3110", "linkedin": ";;;https://www.linkedin.com/;;alex-mariakakis/;;ruotong-yu-5142b0295?trk=contact-info", "or_profile": "~Zeyu_Wang12;~Xiyuxing_Zhang1;~Yuntao_Wang1;~Kenneth_Christofferson1;~Jingru_Zhang2;~Alex_Mariakakis1;~Yuanchun_Shi1;~YU_RUOTONG1", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;Department of Computer Science, University of Toronto;Tsinghua University;University of Toronto;, Tsinghua University;Tsinghua University", "aff_domain": "mail.tsinghua.edu.cn;mails.tsinghua.edu.cn;tsinghua.edu.cn;cs.toronto.edu;mails.tsinghua.edu.cn;toronto.edu;cs.tsinghua.edu.cn;mails.tsinghua.edu.cn", "position": "PhD student;PhD student;Associate Professor;PhD student;Undergrad student;Assistant Professor;Full Professor;Undergrad student", "bibtex": "@inproceedings{\nwang2024dreamcatcher,\ntitle={DreamCatcher: A Wearer-aware Multi-modal Sleep Event Dataset Based on Earables in Non-restrictive Environments},\nauthor={Zeyu Wang and Xiyuxing Zhang and Ruotong Yu and Yuntao Wang and Kenneth Christofferson and Jingru Zhang and Alex Mariakakis and Yuanchun Shi},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=PcbSZwVVc5}\n}", "github": "", "reviewers": "46zb;VByL;ZSYL;gmYF", "pdf_size": 16037394, "rating": "6;7;7;9", "confidence": "4;4;5;4", "wc_summary_and_contributions": "50;111;41;141", "wc_strengths": "19;99;20;58", "wc_improvement": "72;44;97;79", "wc_limitations": "64;87;164;134", "wc_correctness": "5;5;56;14", "wc_clarity": "11;18;20;6", "wc_relation_to_prior_work": "15;16;6;18", "wc_documentation": "4;12;16;48", "wc_additional_feedback": "1;1;1;1", "wc_review": "241;393;421;499", "wc_reply_reviewers": "59;15;393;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "2;1;2;0", "reply_authors": "4;1;2;1", "rating_avg": [ 7.25, 1.0897247358851685 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 85.75, 41.74550874046213 ], "wc_strengths_avg": [ 49.0, 32.8709598277872 ], "wc_improvement_avg": [ 73.0, 19.06567596493762 ], "wc_limitations_avg": [ 112.25, 39.10482706776748 ], "wc_correctness_avg": [ 20.0, 21.106870919205434 ], "wc_clarity_avg": [ 13.75, 5.584576975922169 ], "wc_relation_to_prior_work_avg": [ 13.75, 4.602988159880492 ], "wc_documentation_avg": [ 20.0, 16.73320053068151 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 388.5, 93.59887819840577 ], "wc_reply_reviewers_avg": [ 116.75, 160.96020470911435 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.13245323570650439, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12054654452409362818&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "mail.tsinghua.edu.cn;mails.tsinghua.edu.cn;tsinghua.edu.cn;cs.toronto.edu;mails.tsinghua.edu.cn;toronto.edu;cs.tsinghua.edu.cn;mails.tsinghua.edu.cn", "author_num": 8, "aff_unique_index": "0;0;0;1;0;1;0;0", "aff_unique_norm": "Tsinghua University;University of Toronto", "aff_unique_dep": ";Department of Computer Science", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.utoronto.ca", "aff_unique_abbr": "THU;U of T", "aff_campus_unique_index": "1", "aff_campus_unique": ";Toronto", "aff_country_unique_index": "0;0;0;1;0;1;0;0", "aff_country_unique": "China;Canada" }, { "title": "What Makes CLIP More Robust to Long-Tailed Pre-Training Data? A Controlled Study for Transferable Insights", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95296", "id": "PcyioHOmjq", "proceeding": "", "pdf": "https://openreview.net/pdf?id=PcyioHOmjq", "openreview": "https://openreview.net/forum?id=PcyioHOmjq", "poster": "/media/PosterPDFs/NeurIPS%202024/95296.png?t=1731374891.1884873", "project": "", "author_site": "Xin Wen, Bingchen Zhao, Yilun Chen, Jiangmiao Pang, Xiaojuan Qi", "tldr": "", "abstract": "Severe data imbalance naturally exists among web-scale vision-language datasets. Despite this, we find CLIP pre-trained thereupon exhibits notable robustness to the data imbalance compared to supervised learning, and demonstrates significant effectiveness in learning generalizable representations. With an aim to investigate the reasons behind this finding, we conduct controlled experiments to study various underlying factors, and reveal that CLIP's pretext task forms a dynamic classification problem wherein only a subset of classes is present in training. This isolates the bias from dominant classes and implicitly balances the learning signal. Furthermore, the robustness and discriminability of CLIP improve with more descriptive language supervision, larger data scale, and broader open-world concepts, which are inaccessible to supervised learning. Our study not only uncovers the mechanisms behind CLIP's generalizability beyond data imbalance but also provides transferable insights for the research community. The findings are validated in both supervised and self-supervised learning, enabling models trained on imbalanced data to achieve CLIP-level performance on diverse recognition tasks. Code and data are available at: https://github.com/CVMI-Lab/clip-beyond-tail.", "keywords": "CLIP;vision-language pre-training;robustness;long-tail;data imbalance;uncurated data", "primary_area": "machine_vision", "supplementary_material": "", "author": "Xin Wen;Bingchen Zhao;Yilun Chen;Jiangmiao Pang;XIAOJUAN QI", "authorids": "~Xin_Wen3;~Bingchen_Zhao1;~Yilun_Chen1;~Jiangmiao_Pang1;~XIAOJUAN_QI2", "gender": "M;M;M;M;F", "homepage": "https://wen-xin.info;http://bzhao.me/;http://yilunchen.com/about/;https://oceanpang.github.io/;https://xjqi.github.io/", "dblp": "42/4185;120/3602;;231/7630;176/1445-1.html", "google_scholar": "byCeJl4AAAAJ;lEcqFJEAAAAJ;gKXC9Q8AAAAJ;https://scholar.google.com/citations?authuser=0;bGn0uacAAAAJ", "orcid": "0000-0003-3898-0406;;0000-0003-3372-8703;0000-0002-6711-9319;", "linkedin": "x-wen/;;yilunchen-cuhk/;;", "or_profile": "~Xin_Wen3;~Bingchen_Zhao1;~Yilun_Chen1;~Jiangmiao_Pang1;~XIAOJUAN_QI2", "aff": "The University of Hong Kong;University of Edinburgh, University of Edinburgh;Shanghai Artificial Intelligence Laboratory;Shanghai AI Laboratory ;University of Hong Kong", "aff_domain": "hku.hk;ed.ac.uk;pjlab.org.cn;pjlab.org.cn;hku.hk", "position": "PhD student;PhD student;Researcher;Research Scientist;Assistant Professor", "bibtex": "@inproceedings{\nwen2024what,\ntitle={What Makes {CLIP} More Robust to Long-Tailed Pre-Training Data? A Controlled Study for Transferable Insights},\nauthor={Xin Wen and Bingchen Zhao and Yilun Chen and Jiangmiao Pang and XIAOJUAN QI},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=PcyioHOmjq}\n}", "github": "", "reviewers": "XjUF;WXxS;iwVX;tULz", "pdf_size": 2159334, "rating": "4;5;5;7", "confidence": "4;3;3;3", "soundness": "3;3;3;4", "novelty": "3;2;2;4", "presentation": "3;3;3;4", "wc_summary": "50;33;66;83", "wc_strengths": "33;12;51;82", "wc_weaknesses": "242;148;144;219", "wc_questions": "76;104;6;34", "wc_limitations": "2;8;12;14", "wc_review": "403;305;279;432", "wc_reply_reviewers": "387;0;46;19", "wc_reply_authors": "1731;331;98;113", "reply_reviewers": "2;0;1;1", "reply_authors": "4;3;3;3", "rating_avg": [ 5.25, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 58.0, 18.560711193270585 ], "wc_strengths_avg": [ 44.5, 25.675864152935535 ], "wc_weaknesses_avg": [ 188.25, 43.04866432306582 ], "wc_questions_avg": [ 55.0, 37.69615364994153 ], "wc_limitations_avg": [ 9.0, 4.58257569495584 ], "wc_review_avg": [ 354.75, 64.24319030060695 ], "wc_reply_reviewers_avg": [ 113.0, 159.03615941036804 ], "wc_reply_authors_avg": [ 568.25, 677.6176558355014 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 3.25, 0.4330127018922193 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.6622661785325219, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:IqvwLEWRa3UJ:scholar.google.com/&scioq=What+Makes+CLIP+More+Robust+to+Long-Tailed+Pre-Training+Data%3F+A+Controlled+Study+for+Transferable+Insights&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "hku.hk;ed.ac.uk;pjlab.org.cn;pjlab.org.cn;hku.hk", "author_num": 5, "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "University of Hong Kong;University of Edinburgh;Shanghai Artificial Intelligence Laboratory;Shanghai AI Laboratory", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.hku.hk;https://www.ed.ac.uk;http://www.shailab.org/;https://www.shanghai-ai-lab.com", "aff_unique_abbr": "HKU;Edinburgh;Shanghai AI Lab;SAIL", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "China;United Kingdom" }, { "title": "DapperFL: Domain Adaptive Federated Learning with Model Fusion Pruning for Edge Devices", "status": "Oral", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95295", "id": "Pezt0xttae", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Pezt0xttae", "openreview": "https://openreview.net/forum?id=Pezt0xttae", "poster": "/media/PosterPDFs/NeurIPS%202024/95295.png?t=1731676853.2338245", "project": "", "author_site": "Yongzhe Jia, Xuyun Zhang, Hongsheng Hu, Kim-Kwang Raymond Choo, Lianyong Qi, Xiaolong Xu, Amin Beheshti, Wanchun Dou", "tldr": "", "abstract": "Federated learning (FL) has emerged as a prominent machine learning paradigm in edge computing environments, enabling edge devices to collaboratively optimize a global model without sharing their private data. However, existing FL frameworks suffer from efficacy deterioration due to the system heterogeneity inherent in edge computing, especially in the presence of domain shifts across local data. \nIn this paper, we propose a heterogeneous FL framework DapperFL, to enhance model performance across multiple domains. In DapperFL, we introduce a dedicated Model Fusion Pruning (MFP) module to produce personalized compact local models for clients to address the system heterogeneity challenges. The MFP module prunes local models with fused knowledge obtained from both local and remaining domains, ensuring robustness to domain shifts. Additionally, we design a Domain Adaptive Regularization (DAR) module to further improve the overall performance of DapperFL. The DAR module employs regularization generated by the pruned model, aiming to learn robust representations across domains. Furthermore, we introduce a specific aggregation algorithm for aggregating heterogeneous local models with tailored architectures and weights. We implement DapperFL on a real-world FL platform with heterogeneous clients. Experimental results on benchmark datasets with multiple domains demonstrate that DapperFL outperforms several state-of-the-art FL frameworks by up to 2.28%, while significantly achieving model volume reductions ranging from 20% to 80%. Our code is available at: https://github.com/jyzgh/DapperFL.", "keywords": "Federated learning;Model pruning;Domain adaptation;Edge intelligence", "primary_area": "infrastructure", "supplementary_material": "", "author": "Yongzhe Jia;Xuyun Zhang;Hongsheng Hu;Kim-Kwang Raymond Choo;Lianyong Qi;Xiaolong Xu;Amin Beheshti;Wanchun Dou", "authorids": "~Yongzhe_Jia1;~Xuyun_Zhang1;~Hongsheng_Hu2;~Kim-Kwang_Raymond_Choo1;~Lianyong_Qi2;~Xiaolong_Xu3;~Amin_Beheshti2;~Wanchun_Dou1", "gender": "M;M;;;;;M;M", "homepage": "https://orcid.org/0000-0002-5270-4742;https://researchers.mq.edu.au/en/persons/xuyun-zhang;;;;https://faculty.nuist.edu.cn/xuxiaolong/en/index.htm;https://data-science-group.github.io/people/aminbeheshti/;https://cs.nju.edu.cn/douwanchun/", "dblp": "253/0721;54/8558;;;;10/137-1;90/10041.htmlx;57/5595", "google_scholar": ";https://scholar.google.com.au/citations?user=wbF6HL8AAAAJ;;;;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.au/citations?user=Uw1OLAgAAAAJ;", "orcid": "0000-0002-5270-4742;0000-0001-7353-4159;;;;0000-0003-4879-9803;0000-0002-5988-5494;0000-0003-4833-2023", "linkedin": ";;;;;;prof-amin-beheshti/;", "or_profile": "~Yongzhe_Jia1;~Xuyun_Zhang1;~Hongsheng_Hu2;~Kim-Kwang_Raymond_Choo1;~Lianyong_Qi2;~Xiaolong_Xu3;~Amin_Beheshti2;~Wanchun_Dou1", "aff": "Nanjing University;Macquarie University;;;;Nanjing University of Information Science and Technology;Macquarie University;Nanjing University", "aff_domain": "nju.edu.cn;mq.edu.au;;;;nuist.edu.cn;mq.edu.au;nju.edu.cn", "position": "PhD student;Associate Professor;;;;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\njia2024dapperfl,\ntitle={Dapper{FL}: Domain Adaptive Federated Learning with Model Fusion Pruning for Edge Devices},\nauthor={Yongzhe Jia and Xuyun Zhang and Hongsheng Hu and Kim-Kwang Raymond Choo and Lianyong Qi and Xiaolong Xu and Amin Beheshti and Wanchun Dou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Pezt0xttae}\n}", "github": "", "reviewers": "Ct3S;1ipw;pq1G;npsw", "pdf_size": 1595297, "rating": "6;6;7;8", "confidence": "4;5;5;5", "soundness": "3;4;3;3", "novelty": "3;3;4;4", "presentation": "3;4;4;3", "wc_summary": "204;64;61;63", "wc_strengths": "56;108;61;105", "wc_weaknesses": "245;193;87;358", "wc_questions": "39;90;42;92", "wc_limitations": "25;1;2;44", "wc_review": "569;456;253;662", "wc_reply_reviewers": "95;0;17;220", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 98.0, 61.208659518077994 ], "wc_strengths_avg": [ 82.5, 24.088378940891808 ], "wc_weaknesses_avg": [ 220.75, 97.57657249565594 ], "wc_questions_avg": [ 65.75, 25.282157740192982 ], "wc_limitations_avg": [ 18.0, 17.81852968120546 ], "wc_review_avg": [ 485.0, 152.52049042669643 ], "wc_reply_reviewers_avg": [ 83.0, 86.83029425263973 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:BCNdqy3EMWcJ:scholar.google.com/&scioq=DapperFL:+Domain+Adaptive+Federated+Learning+with+Model+Fusion+Pruning+for+Edge+Devices&hl=en&as_sdt=0,5", "gs_version_total": 5, "email": "nju.edu.cn;mq.edu.au;;;;nuist.edu.cn;mq.edu.au;nju.edu.cn", "author_num": 8, "aff_unique_index": "0;1;2;1;0", "aff_unique_norm": "Nanjing University;Macquarie University;Nanjing University of Information Science and Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.nju.edu.cn;https://www.mq.edu.au;http://www.nuist.edu.cn", "aff_unique_abbr": "Nanjing U;MQ;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1;0", "aff_country_unique": "China;Australia" }, { "title": "Scaling Proprioceptive-Visual Learning with Heterogeneous Pre-trained Transformers", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95294", "id": "Pf7kdIjHRf", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Pf7kdIjHRf", "openreview": "https://openreview.net/forum?id=Pf7kdIjHRf", "poster": "/media/PosterPDFs/NeurIPS%202024/95294.png?t=1729045777.586279", "project": "", "author_site": "Lirui Wang, Xinlei Chen, Jialiang Zhao, Kaiming He", "tldr": "", "abstract": "One of the roadblocks for training generalist robotic models today is heterogeneity. Previous robot learning methods often collect data to train with one specific embodiment for one task, which is expensive and prone to overfitting. This work studies the problem of learning policy representations through heterogeneous pre-training on robot data across different embodiments and tasks at scale. We propose Heterogeneous Pre-trained Transformers (HPT), which pre-train a large, shareable trunk of a policy neural network to learn a task and embodiment agnostic shared representation. This general architecture aligns the specific proprioception and vision inputs from distinct embodiments to a short sequence of tokens and then processes such tokens to map to control robots for different tasks. Leveraging the recent large-scale multi-embodiment real-world robotic datasets as well as simulation, deployed robots, and human video datasets, we investigate pre-training policies across heterogeneity. We conduct experiments to investigate the scaling behaviors of training objectives, to the extent of 52 datasets. HPTs outperform several baselines and enhance the fine-tuned policy performance by over 20% on unseen tasks in multiple simulator benchmarks and real-world settings. See the project website (liruiw.github.io/hpt) for code and videos.", "keywords": "heterogeneous robot learning;heterogeneous pre-trained transformer;scaling law for robotics;robotic foundation model", "primary_area": "robotics", "supplementary_material": "", "author": "Lirui Wang;Xinlei Chen;Jialiang Zhao;Kaiming He", "authorids": "~Lirui_Wang1;~Xinlei_Chen1;~Jialiang_Zhao1;~Kaiming_He2", "gender": "M;M;M;", "homepage": "https://liruiw.github.io/;http://xinleic.xyz;https://alanz.info/;", "dblp": "221/9612;;204/1900;", "google_scholar": "EM9YhH0AAAAJ;bSU7LYoAAAAJ;LaW7igYAAAAJ;", "orcid": ";;;", "linkedin": ";;jialiang-zhao/;", "or_profile": "~Lirui_Wang1;~Xinlei_Chen1;~Jialiang_Zhao1;~Kaiming_He2", "aff": "Massachusetts Institute of Technology;Meta;Massachusetts Institute of Technology;", "aff_domain": "mit.edu;meta.com;mit.edu;", "position": "PhD student;Researcher;PhD student;", "bibtex": "@inproceedings{\nwang2024scaling,\ntitle={Scaling Proprioceptive-Visual Learning with Heterogeneous Pre-trained Transformers},\nauthor={Lirui Wang and Xinlei Chen and Jialiang Zhao and Kaiming He},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Pf7kdIjHRf}\n}", "github": "", "reviewers": "wUp5;KFGf;Vi4L;AQbN", "pdf_size": 5898796, "rating": "3;6;6;7", "confidence": "4;4;4;4", "soundness": "3;2;3;3", "novelty": "2;3;2;3", "presentation": "3;3;2;3", "wc_summary": "67;102;39;146", "wc_strengths": "23;112;30;114", "wc_weaknesses": "133;323;304;200", "wc_questions": "1;117;70;93", "wc_limitations": "1;1;1;7", "wc_review": "225;655;444;560", "wc_reply_reviewers": "356;246;131;91", "wc_reply_authors": "1961;654;713;0", "reply_reviewers": "2;2;1;1", "reply_authors": "7;3;5;1", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 88.5, 40.00312487793922 ], "wc_strengths_avg": [ 69.75, 43.3265219005634 ], "wc_weaknesses_avg": [ 240.0, 77.51451476981585 ], "wc_questions_avg": [ 70.25, 43.29766159967534 ], "wc_limitations_avg": [ 2.5, 2.598076211353316 ], "wc_review_avg": [ 471.0, 160.48520181001112 ], "wc_reply_reviewers_avg": [ 206.0, 103.62190888031353 ], "wc_reply_authors_avg": [ 832.0, 709.3500546274738 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 4.0, 2.23606797749979 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 33, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14171734420618683881&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "mit.edu;meta.com;mit.edu;", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Massachusetts Institute of Technology;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://web.mit.edu;https://meta.com", "aff_unique_abbr": "MIT;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Algebraic Positional Encodings", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95293", "id": "PfOeAKxx6i", "proceeding": "", "pdf": "https://openreview.net/pdf?id=PfOeAKxx6i", "openreview": "https://openreview.net/forum?id=PfOeAKxx6i", "poster": "/media/PosterPDFs/NeurIPS%202024/95293.png?t=1732708892.7872577", "project": "", "author_site": "Konstantinos Kogkalidis, Jean-Philippe Bernardy, Vikas Garg", "tldr": "", "abstract": "We introduce a novel positional encoding strategy for Transformer-style models, addressing the shortcomings of existing, often ad hoc, approaches. Our framework implements a flexible mapping from the algebraic specification of a domain to a positional encoding scheme where positions are interpreted as orthogonal operators. This design preserves the structural properties of the source domain, thereby ensuring that the end-model upholds them. The framework can accommodate various structures, including sequences, grids and trees, but also their compositions. We conduct a series of experiments demonstrating the practical applicability of our method. Our results suggest performance on par with or surpassing the current state of the art, without hyper-parameter optimizations or ``task search'' of any kind.\nCode is available through https://aalto-quml.github.io/ape/.", "keywords": "positional encodings;transformers;structured attention;group theory", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Konstantinos Kogkalidis;Jean-Philippe Bernardy;Vikas Garg", "authorids": "~Konstantinos_Kogkalidis1;~Jean-Philippe_Bernardy1;~Vikas_Garg2", "gender": "Not Specified;Unspecified;", "homepage": "https://konstantinoskokos.github.io/;;", "dblp": "241/9652;47/929;", "google_scholar": ";https://scholar.google.se/citations?user=KwPrvYQAAAAJ;", "orcid": ";my-orcid?orcid=0000-0002-8469-5617;", "linkedin": ";;", "or_profile": "~Konstantinos_Kogkalidis1;~Jean-Philippe_Bernardy1;~Vikas_Garg2", "aff": "Aalto University;Gothenburg University;", "aff_domain": "aalto.fi;gu.se;", "position": "Postdoc;Researcher;", "bibtex": "@inproceedings{\nkogkalidis2024algebraic,\ntitle={Algebraic Positional Encodings},\nauthor={Konstantinos Kogkalidis and Jean-Philippe Bernardy and Vikas Garg},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=PfOeAKxx6i}\n}", "github": "", "reviewers": "PGKE;F3X8;xNaW;18aa;8qDc", "pdf_size": 404830, "rating": "6;7;7;8;8", "confidence": "4;3;3;4;4", "soundness": "3;4;4;4;4", "novelty": "3;3;4;4;4", "presentation": "3;4;3;2;4", "wc_summary": "107;55;50;71;91", "wc_strengths": "39;57;94;36;110", "wc_weaknesses": "128;179;32;159;67", "wc_questions": "5;70;51;17;5", "wc_limitations": "7;12;9;4;10", "wc_review": "286;373;236;287;283", "wc_reply_reviewers": "198;799;0;150;20", "wc_reply_authors": "460;1101;0;12;0", "reply_reviewers": "3;3;0;1;1", "reply_authors": "3;4;1;2;1", "rating_avg": [ 7.2, 0.7483314773547882 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 3.8, 0.39999999999999997 ], "novelty_avg": [ 3.6, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 74.8, 21.54437281519237 ], "wc_strengths_avg": [ 67.2, 29.741553422778708 ], "wc_weaknesses_avg": [ 113.0, 55.45087916345421 ], "wc_questions_avg": [ 29.6, 26.30285155643775 ], "wc_limitations_avg": [ 8.4, 2.727636339397171 ], "wc_review_avg": [ 293.0, 44.34861891874424 ], "wc_reply_reviewers_avg": [ 233.4, 292.6182496017636 ], "wc_reply_authors_avg": [ 314.6, 431.0636147948467 ], "reply_reviewers_avg": [ 1.6, 1.2000000000000002 ], "reply_authors_avg": [ 2.2, 1.16619037896906 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.21821789023599233, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4114856143100838063&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "aalto.fi;gu.se;", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Aalto University;Gothenburg University", "aff_unique_dep": ";", "aff_unique_url": "https://www.aalto.fi;https://www.gu.se", "aff_unique_abbr": "Aalto;GU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Finland;Sweden" }, { "title": "On Sampling Strategies for Spectral Model Sharding", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95292", "id": "PgTHgLUFi3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=PgTHgLUFi3", "openreview": "https://openreview.net/forum?id=PgTHgLUFi3", "poster": "/media/PosterPDFs/NeurIPS%202024/95292.png?t=1731089789.2379181", "project": "", "author_site": "Denis Korzhenkov, Christos Louizos", "tldr": "", "abstract": "The problem of heterogeneous clients in federated learning has recently drawn a lot of attention. Spectral model sharding, i.e., partitioning the model parameters into low-rank matrices based on the singular value decomposition, has been one of the proposed solutions for more efficient on-device training in such settings. In this work we present two sampling strategies for such sharding, obtained as solutions to specific optimization problems. The first produces unbiased estimators of the original weights, while the second aims to minimize the squared approximation error. We discuss how both of these estimators can be incorporated in the federated learning loop and practical considerations that arise during local training. Empirically, we demonstrate that both of these methods can lead to improved performance in various commonly used datasets.", "keywords": "federated learning;singular vector decomposition;heterogeneous devices", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Denis Korzhenkov;Christos Louizos", "authorids": "~Denis_Korzhenkov1;~Christos_Louizos1", "gender": "M;", "homepage": ";", "dblp": "230/3666;", "google_scholar": "https://scholar.google.ru/citations?user=ypspak0AAAAJ;", "orcid": ";", "linkedin": "denis-korzhenkov-5b498512a/;", "or_profile": "~Denis_Korzhenkov1;~Christos_Louizos1", "aff": "Qualcomm Inc, QualComm;", "aff_domain": "qti.qualcomm.com;", "position": "Researcher;", "bibtex": "@inproceedings{\nkorzhenkov2024on,\ntitle={On Sampling Strategies for Spectral Model Sharding},\nauthor={Denis Korzhenkov and Christos Louizos},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=PgTHgLUFi3}\n}", "github": "", "reviewers": "qPbH;7vsp;fYuS", "pdf_size": 1165250, "rating": "5;5;6", "confidence": "3;4;4", "soundness": "2;3;3", "novelty": "2;2;3", "presentation": "3;3;3", "wc_summary": "117;125;57", "wc_strengths": "100;247;50", "wc_weaknesses": "211;295;29", "wc_questions": "126;73;114", "wc_limitations": "1;18;12", "wc_review": "555;758;262", "wc_reply_reviewers": "23;58;13", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 99.66666666666667, 30.34615113797611 ], "wc_strengths_avg": [ 132.33333333333334, 83.61153562092308 ], "wc_weaknesses_avg": [ 178.33333333333334, 111.02352103145633 ], "wc_questions_avg": [ 104.33333333333333, 22.691163233490013 ], "wc_limitations_avg": [ 10.333333333333334, 7.039570693980959 ], "wc_review_avg": [ 525.0, 203.59927963199345 ], "wc_reply_reviewers_avg": [ 31.333333333333332, 19.293061504650375 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:XyhF0eoS8VUJ:scholar.google.com/&scioq=On+Sampling+Strategies+for+Spectral+Model+Sharding&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "qti.qualcomm.com;", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Qualcomm Incorporated", "aff_unique_dep": "", "aff_unique_url": "https://www.qualcomm.com", "aff_unique_abbr": "Qualcomm", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Provable Posterior Sampling with Denoising Oracles via Tilted Transport", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95291", "id": "PhLlE8UOEv", "proceeding": "", "pdf": "https://openreview.net/pdf?id=PhLlE8UOEv", "openreview": "https://openreview.net/forum?id=PhLlE8UOEv", "poster": "/media/PosterPDFs/NeurIPS%202024/95291.png?t=1729100654.0282674", "project": "", "author_site": "Joan Bruna, Jiequn Han", "tldr": "", "abstract": "Score-based diffusion models have significantly advanced high-dimensional data generation across various domains, by learning a denoising oracle (or score) from datasets. From a Bayesian perspective, they offer a realistic modeling of data priors and facilitate solving inverse problems through posterior sampling. Although many heuristic methods have been developed recently for this purpose, they lack the quantitative guarantees needed in many scientific applications. This work addresses the topic from two perspectives. We first present a hardness result indicating that a generic method leveraging the prior denoising oracle for posterior sampling becomes infeasible as soon as the measurement operator is mildly ill-conditioned. We next develop the *tilted transport* technique, which leverages the quadratic structure of the log-likelihood in linear inverse problems in combination with the prior denoising oracle to exactly transform the original posterior sampling problem into a new one that is provably easier to sample from. We quantify the conditions under which the boosted posterior is strongly log-concave, highlighting how task difficulty depends on the condition number of the measurement matrix and the signal-to-noise ratio. The resulting general scheme is shown to match the best-known sampling methods for Ising models, and is further validated on high-dimensional Gaussian mixture models.", "keywords": "diffusion based model; posterior sampling; inverse problem; provable sampling", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/84fb7d0391412ea3c307cc7abe0768b95583483d.zip", "author": "Joan Bruna;Jiequn Han", "authorids": "~Joan_Bruna1;~Jiequn_Han1", "gender": "M;M", "homepage": "http://cims.nyu.edu/~bruna;https://users.flatironinstitute.org/~jhan/", "dblp": "44/8776;190/7087", "google_scholar": "L4bNmsMAAAAJ;el5gT4AAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Joan_Bruna1;~Jiequn_Han1", "aff": "New York University;Simons Foundation", "aff_domain": "nyu.edu;simonsfoundation.org", "position": "Associate Professor;Researcher", "bibtex": "@inproceedings{\nbruna2024provable,\ntitle={Provable Posterior Sampling with Denoising Oracles via Tilted Transport},\nauthor={Joan Bruna and Jiequn Han},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=PhLlE8UOEv}\n}", "github": "", "reviewers": "SHHR;jPCg;G6DH;ue87", "pdf_size": 1039708, "rating": "6;6;8;8", "confidence": "3;4;4;1", "soundness": "4;3;4;4", "novelty": "3;3;4;4", "presentation": "1;3;4;4", "wc_summary": "71;105;145;98", "wc_strengths": "16;35;58;12", "wc_weaknesses": "416;142;46;29", "wc_questions": "109;3;1;16", "wc_limitations": "2;32;1;1", "wc_review": "614;317;251;156", "wc_reply_reviewers": "163;15;0;5", "wc_reply_authors": "758;0;0;0", "reply_reviewers": "2;1;0;1", "reply_authors": "3;1;1;1", "rating_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.0, 1.224744871391589 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.0, 1.224744871391589 ], "wc_summary_avg": [ 104.75, 26.479945241635225 ], "wc_strengths_avg": [ 30.25, 18.226011631731172 ], "wc_weaknesses_avg": [ 158.25, 154.92316644065858 ], "wc_questions_avg": [ 32.25, 44.68430932665291 ], "wc_limitations_avg": [ 9.0, 13.285330255586423 ], "wc_review_avg": [ 334.5, 171.2169676171144 ], "wc_reply_reviewers_avg": [ 45.75, 67.90940656492295 ], "wc_reply_authors_avg": [ 189.5, 328.22362803430224 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.40824829046386296, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:i-eXo2_oRzgJ:scholar.google.com/&scioq=Provable+Posterior+Sampling+with+Denoising+Oracles+via+Tilted+Transport&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "nyu.edu;simonsfoundation.org", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "New York University;Simons Foundation", "aff_unique_dep": ";", "aff_unique_url": "https://www.nyu.edu;https://www.simonsfoundation.org", "aff_unique_abbr": "NYU;Simons Foundation", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "PSL: Rethinking and Improving Softmax Loss from Pairwise Perspective for Recommendation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95290", "id": "PhjnK9KWOx", "proceeding": "", "pdf": "https://openreview.net/pdf?id=PhjnK9KWOx", "openreview": "https://openreview.net/forum?id=PhjnK9KWOx", "poster": "/media/PosterPDFs/NeurIPS%202024/95290.png?t=1731773662.3734531", "project": "", "author_site": "Weiqin Yang, Jiawei Chen, Xin Xin, Sheng Zhou, Binbin Hu, Yan Feng, Chun Chen, Can Wang", "tldr": "", "abstract": "Softmax Loss (SL) is widely applied in recommender systems (RS) and has demonstrated effectiveness. This work analyzes SL from a pairwise perspective, revealing two significant limitations: 1) the relationship between SL and conventional ranking metrics like DCG is not sufficiently tight; 2) SL is highly sensitive to false negative instances. Our analysis indicates that these limitations are primarily due to the use of the exponential function. To address these issues, this work extends SL to a new family of loss functions, termed Pairwise Softmax Loss (PSL), which replaces the exponential function in SL with other appropriate activation functions. While the revision is minimal, we highlight three merits of PSL: 1) it serves as a tighter surrogate for DCG with suitable activation functions; 2) it better balances data contributions; and 3) it acts as a specific BPR loss enhanced by Distributionally Robust Optimization (DRO). We further validate the effectiveness and robustness of PSL through empirical experiments. The code is available at https://github.com/Tiny-Snow/IR-Benchmark.", "keywords": "Recommender Systems;Ranking Metrics Optimization;Surrogate Loss;Distributionally Robust Optimization", "primary_area": "other", "supplementary_material": "/attachment/25a882b5eaac93d6a42d46659c0f5554213d71a1.zip", "author": "Weiqin Yang;Jiawei Chen;Xin Xin;Sheng Zhou;Binbin Hu;Yan Feng;Chun Chen;Can Wang", "authorids": "~Weiqin_Yang1;~Jiawei_Chen6;~Xin_Xin2;~Sheng_Zhou1;~Binbin_Hu1;~Yan_Feng1;~Chun_Chen1;~Can_Wang5", "gender": ";M;M;M;M;;M;M", "homepage": "https://tiny-snow.github.io/;https://jiawei-chen.github.io/;;https://zhoushengisnoob.github.io/;https://librahu.github.io/;https://person.zju.edu.cn/0085162;https://person.zju.edu.cn/en/0082004;https://person.zju.edu.cn/en/wangcan", "dblp": "313/4545-2.html;03/1390-7;;34/4858-4.html;;62/3960-5.html;07/4182-0001.html;71/4716-1", "google_scholar": "https://scholar.google.com/citations?hl=en;;cYvuOUMAAAAJ;https://scholar.google.co.jp/citations?user=Ss76nMwAAAAJ;a70Jt9oAAAAJ;;;https://scholar.google.fr/citations?user=C63q3HoAAAAJ", "orcid": "0000-0002-5750-5515;0000-0002-4752-2629;;0000-0003-3645-1041;0000-0002-2505-1619;0000-0002-3605-5404;0000-0002-6198-7481;0000-0002-5890-4307", "linkedin": ";;;;;;;", "or_profile": "~Weiqin_Yang1;~Jiawei_Chen6;~Xin_Xin2;~Sheng_Zhou1;~Binbin_Hu1;~Yan_Feng1;~Chun_Chen1;~Can_Wang5", "aff": "Shandong University;Zhejiang University;;Zhejiang University;Ant Group;Zhejiang University;Zhejiang University;Zhejiang University", "aff_domain": "sdu.edu.cn;zju.edu.cn;;zju.edu.cn;antfin.com;zju.edu.cn;zju.edu.cn;zju.edu.cn", "position": "Undergrad student;Researcher;;Associate Professor;Researcher;Associate Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nyang2024psl,\ntitle={{PSL}: Rethinking and Improving Softmax Loss from Pairwise Perspective for Recommendation},\nauthor={Weiqin Yang and Jiawei Chen and Xin Xin and Sheng Zhou and Binbin Hu and Yan Feng and Chun Chen and Can Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=PhjnK9KWOx}\n}", "github": "", "reviewers": "m2Qo;kuk6;VFmg;ufck", "pdf_size": 873010, "rating": "5;5;6;8", "confidence": "4;3;3;4", "soundness": "2;3;3;4", "novelty": "2;3;3;3", "presentation": "3;3;3;4", "wc_summary": "110;51;64;89", "wc_strengths": "52;44;59;145", "wc_weaknesses": "88;96;90;143", "wc_questions": "320;39;35;129", "wc_limitations": "83;6;1;2", "wc_review": "653;236;249;508", "wc_reply_reviewers": "10;11;15;21", "wc_reply_authors": "455;9;9;562", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;2;3", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 78.5, 22.74313083108832 ], "wc_strengths_avg": [ 75.0, 40.761501444377636 ], "wc_weaknesses_avg": [ 104.25, 22.56518335843961 ], "wc_questions_avg": [ 130.75, 115.54733878372102 ], "wc_limitations_avg": [ 23.0, 34.69149751740331 ], "wc_review_avg": [ 411.5, 176.66422954293833 ], "wc_reply_reviewers_avg": [ 14.25, 4.322904116447646 ], "wc_reply_authors_avg": [ 258.75, 252.5988667828896 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.40824829046386296, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9637684000588214962&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "sdu.edu.cn;zju.edu.cn;;zju.edu.cn;antfin.com;zju.edu.cn;zju.edu.cn;zju.edu.cn", "author_num": 8, "aff_unique_index": "0;1;1;2;1;1;1", "aff_unique_norm": "Shandong University;Zhejiang University;Ant Group", "aff_unique_dep": ";;", "aff_unique_url": "http://www.sdu.edu.cn;https://www.zju.edu.cn;https://www.antgroup.com", "aff_unique_abbr": "SDU;ZJU;Ant Group", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "ENAT: Rethinking Spatial-temporal Interactions in Token-based Image Synthesis", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95289", "id": "PhsYFyTeHr", "proceeding": "", "pdf": "https://openreview.net/pdf?id=PhsYFyTeHr", "openreview": "https://openreview.net/forum?id=PhsYFyTeHr", "poster": "/media/PosterPDFs/NeurIPS%202024/95289.png?t=1731469228.2314885", "project": "", "author_site": "Zanlin Ni, Yulin Wang, Renping Zhou, Yizeng Han, Jiayi Guo, Zhiyuan Liu, Yuan Yao, Gao Huang", "tldr": "", "abstract": "Recently, token-based generation approaches have demonstrated their effectiveness in synthesizing visual content. As a representative example, non-autoregressive Transformers (NATs) can generate decent-quality images in just a few steps. NATs perform generation in a progressive manner, where the latent tokens of a resulting image are incrementally revealed step-by-step. At each step, the unrevealed image regions are padded with [MASK] tokens and inferred by NAT, with the most reliable predictions preserved as newly revealed, visible tokens. In this paper, we delve into understanding the mechanisms behind the effectiveness of NATs and uncover two important interaction patterns that naturally emerge from NAT\u2019s paradigm: Spatially (within a step), although [MASK] and visible tokens are processed uniformly by NATs, the interactions between them are highly asymmetric. In specific, [MASK] tokens mainly gather information for decoding. On the contrary, visible tokens tend to primarily provide information, and their deep representations can be built only upon themselves. Temporally (across steps), the interactions between adjacent generation steps mostly concentrate on updating the representations of a few critical tokens, while the computation for the majority of tokens is generally repetitive. Driven by these findings, we propose EfficientNAT (ENAT), a NAT model that explicitly encourages these critical interactions inherent in NATs. At the spatial level, we disentangle the computations of visible and [MASK] tokens by encoding visible tokens independently, while decoding [MASK] tokens conditioned on the fully encoded visible tokens. At the temporal level, we prioritize the computation of the critical tokens at each step, while maximally reusing previously computed token representations to supplement necessary information. ENAT improves the performance of NATs notably with significantly reduced computational cost. Experiments on ImageNet-256 2 & 512 2 and MS-COCO validate the effectiveness of ENAT. Code and pre-trained models will be released at https://github.com/LeapLabTHU/ENAT.", "keywords": "Token-based Generation; Non-autoregressive Transformers", "primary_area": "generative_models", "supplementary_material": "", "author": "Zanlin Ni;Yulin Wang;Renping Zhou;Yizeng Han;Jiayi Guo;Zhiyuan Liu;Yuan Yao;Gao Huang", "authorids": "~Zanlin_Ni1;~Yulin_Wang1;~Renping_Zhou1;~Yizeng_Han1;~Jiayi_Guo2;~Zhiyuan_Liu1;~Yuan_Yao12;~Gao_Huang1", "gender": "M;M;M;M;M;M;M;M", "homepage": ";https://www.wyl.cool/;;https://yizenghan.top/;https://jiayiguo821.github.io/;http://nlp.csai.tsinghua.edu.cn/~lzy;https://yaoyuanthu.github.io/;http://www.gaohuang.net", "dblp": ";;;217/9548;;53/3245-1;;", "google_scholar": "https://scholar.google.com/citations?hl=en;gBP38gcAAAAJ;zNc7Eg8AAAAJ;25mubAsAAAAJ;2p6GCEEAAAAJ;dT0v5u0AAAAJ;https://scholar.google.com.hk/citations?user=3NWfi3YAAAAJ;-P9LwcgAAAAJ", "orcid": ";0000-0002-1363-0234;;;;0000-0002-7709-2543;;", "linkedin": "zanlin-ni-83582215a/;;rpzh/;;;;;", "or_profile": "~Zanlin_Ni1;~Yulin_Wang1;~Renping_Zhou1;~Yizeng_Han1;~Jiayi_Guo2;~Zhiyuan_Liu1;~Yuan_Yao12;~Gao_Huang1", "aff": "Tsinghua University;Tsinghua University;, Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University;National University of Singapore;Tsinghua University", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;cs.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;nus.edu;tsinghua.edu.cn", "position": "PhD student;PhD student;Undergrad student;PhD student;PhD student;Associate Professor;Postdoc;Associate Professor", "bibtex": "@inproceedings{\nni2024enat,\ntitle={{ENAT}: Rethinking Spatial-temporal Interactions in Token-based Image Synthesis},\nauthor={Zanlin Ni and Yulin Wang and Renping Zhou and Yizeng Han and Jiayi Guo and Zhiyuan Liu and Yuan Yao and Gao Huang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=PhsYFyTeHr}\n}", "github": "", "reviewers": "BoDK;bqgC;GVb9;PoGn", "pdf_size": 3268196, "rating": "5;6;6;6", "confidence": "4;4;4;4", "soundness": "3;3;3;4", "novelty": "3;2;3;4", "presentation": "3;2;3;3", "wc_summary": "63;96;70;69", "wc_strengths": "32;70;90;74", "wc_weaknesses": "106;434;90;77", "wc_questions": "5;151;3;147", "wc_limitations": "14;10;1;1", "wc_review": "220;761;254;368", "wc_reply_reviewers": "0;22;0;9", "wc_reply_authors": "0;41;0;0", "reply_reviewers": "0;1;0;1", "reply_authors": "1;2;1;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 74.5, 12.698425099200294 ], "wc_strengths_avg": [ 66.5, 21.277922830953212 ], "wc_weaknesses_avg": [ 176.75, 148.87809610550505 ], "wc_questions_avg": [ 76.5, 72.51723932969318 ], "wc_limitations_avg": [ 6.5, 5.678908345800274 ], "wc_review_avg": [ 400.75, 215.09227670932307 ], "wc_reply_reviewers_avg": [ 7.75, 9.01041064547005 ], "wc_reply_authors_avg": [ 10.25, 17.75352077758099 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17967244437544740856&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "tsinghua.edu.cn;tsinghua.edu.cn;cs.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;nus.edu;tsinghua.edu.cn", "author_num": 8, "aff_unique_index": "0;0;0;0;0;0;1;0", "aff_unique_norm": "Tsinghua University;National University of Singapore", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.nus.edu.sg", "aff_unique_abbr": "THU;NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;1;0", "aff_country_unique": "China;Singapore" }, { "id": "PlBUSoSUJG", "title": "Policy Gradient with Tree Expansion", "track": "main", "status": "Reject", "tldr": "", "abstract": "Policy gradient methods are notorious for having a large variance and high sample complexity. To mitigate this, we introduce SoftTreeMax---a generalization of softmax that employs planning. In SoftTreeMax, we extend the traditional logits with the multi-step discounted cumulative reward, topped with the logits of future states. We analyze SoftTreeMax and explain how tree expansion helps to reduce its gradient variance. We prove that the variance decays exponentially with the planning horizon as a function of the chosen tree-expansion policy. Specifically, we show that the closer the induced transitions are to being state-independent, the stronger the decay. With approximate forward models, we prove that the resulting gradient bias diminishes with the approximation error while retaining the same variance reduction. Ours is the first result to bound the gradient bias for an approximate model. In a practical implementation of SoftTreeMax, we utilize a parallel GPU-based simulator for fast and efficient tree expansion. Using this implementation in Atari, we show that SoftTreeMax reduces the gradient variance by three orders of magnitude. This leads to better sample complexity and improved performance compared to distributed PPO.", "keywords": "Policy Gradient;Softmax policy;Tree expansion;Model-Based RL", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/f17a7ad6fc63108c7dd6f1b7b6cc1d4baefedc24.zip", "author": "Gal Dalal;Assaf Hallak;Gugan Thoppe;Shie Mannor;Gal Chechik", "authorids": "~Gal_Dalal2;~Assaf_Hallak1;~Gugan_Thoppe1;~Shie_Mannor2;~Gal_Chechik1", "gender": "M;M;M;;M", "homepage": ";;https://shie.net.technion.ac.il;https://chechiklab.biu.ac.il/~gal/;", "dblp": "117/9126;117/3710;20/1669;c/GalChechik;166/1605", "google_scholar": ";https://scholar.google.co.in/citations?user=X5zV3s8AAAAJ;https://scholar.google.com.tw/citations?user=q1HlbIUAAAAJ;Wk2gAZUAAAAJ;https://scholar.google.co.il/citations?user=NfJiSMMAAAAJ", "orcid": "0000-0001-7915-9206;;;0000-0001-9164-5303;0000-0002-3166-4251", "linkedin": ";;;;galdalal/", "or_profile": "~Assaf_Hallak1;~Gugan_Thoppe1;~Shie_Mannor2;~Gal_Chechik1;~Gal_Dalal1", "aff": "NVIDIA;Indian Institute of Science;Technion - Israel Institute of Technology, Technion;NVIDIA;NVIDIA", "aff_domain": "nvidia.com;iisc.ac.in;technion.il;nvidia.com;nvidia.com", "position": "Senior research scientist;Assistant Professor;Full Professor;Principal Researcher;Senior Research Scientist", "bibtex": "@misc{\nanonymous2024policy,\ntitle={Policy Gradient with Tree Expansion},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=PlBUSoSUJG}\n}", "github": "", "project": "", "reviewers": "m94G;mmxv;dZVs;d2zq", "site": "https://openreview.net/forum?id=PlBUSoSUJG", "pdf_size": 3339864, "rating": "5;7;7;7", "confidence": "4;4;4;3", "soundness": "3;3;4;3", "novelty": "3;4;3;3", "presentation": "2;2;3;3", "wc_summary": "86;142;192;159", "wc_strengths": "86;103;102;305", "wc_weaknesses": "559;256;265;447", "wc_questions": "87;329;3;481", "wc_limitations": "1;2;1;287", "wc_review": "819;832;563;1679", "wc_reply_reviewers": "173;243;0;394", "wc_reply_authors": "102;472;0;340", "reply_reviewers": "1;2;0;2", "reply_authors": "2;3;1;2", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 144.75, 38.38863764188565 ], "wc_strengths_avg": [ 149.0, 90.31887953246542 ], "wc_weaknesses_avg": [ 381.75, 127.59187866004639 ], "wc_questions_avg": [ 225.0, 190.18412131405714 ], "wc_limitations_avg": [ 72.75, 123.69796885963811 ], "wc_review_avg": [ 973.25, 421.3468731342384 ], "wc_reply_reviewers_avg": [ 202.5, 141.58831166448732 ], "wc_reply_authors_avg": [ 228.5, 187.0421075586992 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:GqisPYrSBecJ:scholar.google.com/&scioq=Policy+Gradient+with+Tree+Expansion&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "NVIDIA;Indian Institute of Science;Technion - Israel Institute of Technology", "aff_unique_dep": "NVIDIA Corporation;;", "aff_unique_url": "https://www.nvidia.com;https://www.iisc.ac.in;https://www.technion.ac.il", "aff_unique_abbr": "NVIDIA;IISc;Technion", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;0;0", "aff_country_unique": "United States;India;Israel" }, { "title": "SM3-Text-to-Query: Synthetic Multi-Model Medical Text-to-Query Benchmark", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97708", "id": "Pm0UzCehgB", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Pm0UzCehgB", "openreview": "https://openreview.net/forum?id=Pm0UzCehgB", "poster": "/media/PosterPDFs/NeurIPS%202024/97708.png?t=1731577919.4025", "project": "", "author_site": "Sithursan Sivasubramaniam, Cedric E. Osei-Akoto, Yi Zhang, Kurt Stockinger, Jonathan Fuerst", "tldr": "", "abstract": "Electronic health records (EHRs) are stored in various database systems with different database models on heterogeneous storage architectures, such as relational databases, document stores, or graph databases. These different database models have a big impact on query complexity and performance. While this has been a known fact in database research, its implications for the growing number of Text-to-Query systems have surprisingly not been investigated so far.\nIn this paper, we present SM3-Text-to-Query, the first multi-model medical Text-to-Query benchmark based on synthetic patient data from Synthea, following the SNOMED-CT taxonomy---a widely used knowledge graph ontology covering medical terminology. SM3-Text-to-Query provides data representations for relational databases (PostgreSQL), document stores (MongoDB), and graph databases (Neo4j and GraphDB (RDF)), allowing the evaluation across four popular query languages, namely SQL, MQL, Cypher, and SPARQL.\nWe systematically and manually develop 408 template questions, which we augment to construct a benchmark of 10K diverse natural language question/query pairs for these four query languages (40K pairs overall). On our dataset, we evaluate several common in-context-learning (ICL) approaches for a set of representative closed and open-source LLMs.\nOur evaluation sheds light on the trade-offs between database models and query languages for different ICL strategies and LLMs. Last,\nSM3-Text-to-Query is easily extendable to additional query languages or real, standard-based patient databases.", "keywords": "Text-to-Query;Text-to-SQL;Text-to-SPARQL;Text-to-Cypher;Text-to-MQL;Multi-model;Medical;Question Answering;Dataset;SNOMED", "primary_area": "", "supplementary_material": "/attachment/96ae3ee60e933cd8f909519aa3d52571e1fa31cb.zip", "author": "Sithursan Sivasubramaniam;Cedric Osei-Akoto;Yi Zhang;Kurt Stockinger;Jonathan Fuerst", "authorids": "~Sithursan_Sivasubramaniam1;~Cedric_Osei-Akoto1;~Yi_Zhang83;~Kurt_Stockinger1;~Jonathan_Fuerst1", "gender": "M;M;M;M;", "homepage": ";;;https://www.zhaw.ch/en/about-us/person/stog/;", "dblp": ";;;38/5145;139/9885", "google_scholar": ";;l7hWgZsAAAAJ;0C1Ow3gAAAAJ;https://scholar.google.de/citations?user=1qY48AQAAAAJ", "orcid": ";;;0000-0003-4034-4812;", "linkedin": "sithursan-sivasubramaniam-a7661b291/;cedric-o-a73b38166/;;;", "or_profile": "~Sithursan_Sivasubramaniam1;~Cedric_Osei-Akoto1;~Yi_Zhang83;~Kurt_Stockinger1;~Jonathan_Fuerst1", "aff": "ZHAW - Z\u00fcrcher Hochschule f\u00fcr Angewandte Wissenschaften;ZHAW - Z\u00fcrcher Hochschule f\u00fcr Angewandte Wissenschaften;ZHAW - Z\u00fcrcher Hochschule f\u00fcr Angewandte Wissenschaften;University of Washington;ZHAW - Z\u00fcrcher Hochschule f\u00fcr Angewandte Wissenschaften", "aff_domain": "zhaw.ch;zhaw.ch;zhaw.ch;uw.edu;zhaw.ch", "position": "Undergrad student;Undergrad student;Researcher;Intern;Associate Professor", "bibtex": "@inproceedings{\nsivasubramaniam2024smtexttoquery,\ntitle={{SM}3-Text-to-Query: Synthetic Multi-Model Medical Text-to-Query Benchmark},\nauthor={Sithursan Sivasubramaniam and Cedric Osei-Akoto and Yi Zhang and Kurt Stockinger and Jonathan Fuerst},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=Pm0UzCehgB}\n}", "github": "", "reviewers": "oe2b;gjFK;BzGp", "pdf_size": 1031274, "rating": "6;6;8", "confidence": "4;4;4", "wc_summary_and_contributions": "74;140;73", "wc_strengths": "2;11;87", "wc_improvement": "2;266;254", "wc_limitations": "2;1;1", "wc_correctness": "1;22;1", "wc_clarity": "1;6;1", "wc_relation_to_prior_work": "1;13;1", "wc_documentation": "1;15;1", "wc_additional_feedback": "1;1;1", "wc_review": "85;475;420", "wc_reply_reviewers": "0;164;0", "wc_reply_authors": "0;34;0", "reply_reviewers": "0;1;0", "reply_authors": "1;2;1", "rating_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.0 ], "wc_summary_and_contributions_avg": [ 95.66666666666667, 31.351058816073323 ], "wc_strengths_avg": [ 33.333333333333336, 38.12552367582058 ], "wc_improvement_avg": [ 174.0, 121.72099243762351 ], "wc_limitations_avg": [ 1.3333333333333333, 0.4714045207910317 ], "wc_correctness_avg": [ 8.0, 9.899494936611665 ], "wc_clarity_avg": [ 2.6666666666666665, 2.3570226039551585 ], "wc_relation_to_prior_work_avg": [ 5.0, 5.656854249492381 ], "wc_documentation_avg": [ 5.666666666666667, 6.599663291074443 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 326.6666666666667, 172.35299694393353 ], "wc_reply_reviewers_avg": [ 54.666666666666664, 77.3103414097292 ], "wc_reply_authors_avg": [ 11.333333333333334, 16.027753706895076 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2402004346829474902&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "zhaw.ch;zhaw.ch;zhaw.ch;uw.edu;zhaw.ch", "author_num": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Z\u00fcrcher Hochschule f\u00fcr Angewandte Wissenschaften;University of Washington", "aff_unique_dep": ";", "aff_unique_url": "https://www.zhaw.ch;https://www.washington.edu", "aff_unique_abbr": "ZHAW;UW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "Switzerland;United States" }, { "title": "Interpretable Mesomorphic Networks for Tabular Data", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95288", "id": "PmLty7tODm", "proceeding": "", "pdf": "https://openreview.net/pdf?id=PmLty7tODm", "openreview": "https://openreview.net/forum?id=PmLty7tODm", "poster": "/media/PosterPDFs/NeurIPS%202024/95288.png?t=1731965789.120006", "project": "", "author_site": "Arlind Kadra, Sebastian Pineda Arango, Josif Grabocka", "tldr": "", "abstract": "Even though neural networks have been long deployed in applications involving tabular data, still existing neural architectures are not explainable by design. In this paper, we propose a new class of interpretable neural networks for tabular data that are both deep and linear at the same time (i.e. mesomorphic). We optimize deep hypernetworks to generate explainable linear models on a per-instance basis. As a result, our models retain the accuracy of black-box deep networks while offering free-lunch explainability for tabular data by design. Through extensive experiments, we demonstrate that our explainable deep networks have comparable performance to state-of-the-art classifiers on tabular data and outperform current existing methods that are explainable by design.", "keywords": "explainability;deep neural networks;tabular data;hypernetwork;interpretability;explainable benchmark;xai.", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Arlind Kadra;Sebastian Pineda Arango;Josif Grabocka", "authorids": "~Arlind_Kadra1;~Sebastian_Pineda_Arango1;~Josif_Grabocka1", "gender": "M;M;M", "homepage": ";;https://www.utn.de/departments/department-engineering/machine-learning-lab/", "dblp": "252/5295;271/4257;117/4936", "google_scholar": "bMa0KUcAAAAJ;8UI_0B0AAAAJ;KRy27XcAAAAJ", "orcid": "0000-0001-9308-6576;;", "linkedin": ";sebaspine/;", "or_profile": "~Arlind_Kadra1;~Sebastian_Pineda_Arango1;~Josif_Grabocka1", "aff": "Universit\u00e4t Freiburg;Universit\u00e4t Freiburg;University of Technology Nuremberg", "aff_domain": "uni-freiburg.de;uni-freiburg.de;utn.de", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nkadra2024interpretable,\ntitle={Interpretable Mesomorphic Networks for Tabular Data},\nauthor={Arlind Kadra and Sebastian Pineda Arango and Josif Grabocka},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=PmLty7tODm}\n}", "github": "", "reviewers": "hm6c;csKM;uYeq;qRx1", "pdf_size": 909431, "rating": "6;6;7;7", "confidence": "4;2;3;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "47;76;145;52", "wc_strengths": "60;77;61;74", "wc_weaknesses": "67;64;244;94", "wc_questions": "78;2;110;41", "wc_limitations": "4;1;2;1", "wc_review": "256;220;562;262", "wc_reply_reviewers": "269;11;26;18", "wc_reply_authors": "1473;0;0;0", "reply_reviewers": "4;1;1;1", "reply_authors": "5;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 80.0, 39.096035604649224 ], "wc_strengths_avg": [ 68.0, 7.582875444051551 ], "wc_weaknesses_avg": [ 117.25, 74.10592081608594 ], "wc_questions_avg": [ 57.75, 40.40034034510106 ], "wc_limitations_avg": [ 2.0, 1.224744871391589 ], "wc_review_avg": [ 325.0, 137.77155003846042 ], "wc_reply_reviewers_avg": [ 81.0, 108.67152340884893 ], "wc_reply_authors_avg": [ 368.25, 637.827709887239 ], "reply_reviewers_avg": [ 1.75, 1.299038105676658 ], "reply_authors_avg": [ 2.0, 1.7320508075688772 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9265971905673866645&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "uni-freiburg.de;uni-freiburg.de;utn.de", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Freiburg;Nuremberg University of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.uni-freiburg.de;https://www.tu-nuernberg.de", "aff_unique_abbr": "Uni Freiburg;TUN", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "id": "PnSTlFUfcd", "title": "Shielding Regular Safety Properties in Reinforcement Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "To deploy reinforcement learning (RL) systems in real-world scenarios we need to consider requirements such as safety and constraint compliance, rather than blindly maximizing for reward. In this paper we study RL with regular safety properties. We present a constrained problem based on the satisfaction of regular safety properties with high probability and we compare our setup to the some common constrained Markov decision processes (CMDP) settings. We also present a meta-algorithm with provable safety-guarantees, that can be used to shield the agent from violating the regular safety property during training and deployment. We demonstrate the effectiveness and scalability of our framework by evaluating our meta-algorithm in both the tabular and deep RL setting.", "keywords": "Safe Reinforcement Learning;Model Checking;Shielding", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/b9c5a51e09f00f22f8e9640985bf3b8d6e72cb59.zip", "author": "Alex Goodall;Francesco Belardinelli", "authorids": "~Alex_Goodall1;~Francesco_Belardinelli1", "gender": "M;M", "homepage": ";https://www.doc.ic.ac.uk/~fbelard/", "dblp": "345/7795;59/2916", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.fr/citations?user=Mr35r1EAAAAJ", "orcid": "0000-0003-4659-5241;0000-0002-7768-1794", "linkedin": "alex-goodall-6aa1a94b/;", "or_profile": "~Alex_Goodall1;~Francesco_Belardinelli1", "aff": "Imperial College London;Imperial College London", "aff_domain": "imperial.ac.uk;imperial.ac.uk", "position": "PhD student;Lecturer", "bibtex": "@misc{\nanonymous2024shielding,\ntitle={Shielding Regular Safety Properties in Reinforcement Learning},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=PnSTlFUfcd}\n}", "github": "", "project": "", "reviewers": "amjX;sSLq;7jyh;5bwE;6qag", "site": "https://openreview.net/forum?id=PnSTlFUfcd", "pdf_size": 5548548, "rating": "3;3;3;7;7", "confidence": "3;4;5;4;4", "soundness": "3;1;3;4;4", "novelty": "1;2;1;3;3", "presentation": "2;1;1;4;4", "wc_summary": "68;83;98;218;90", "wc_strengths": "32;21;61;127;203", "wc_weaknesses": "145;178;679;104;318", "wc_questions": "6;116;58;102;301", "wc_limitations": "16;54;20;25;18", "wc_review": "267;452;916;576;930", "wc_reply_reviewers": "200;0;166;11;52", "wc_reply_authors": "401;0;752;0;19", "reply_reviewers": "1;0;1;1;1", "reply_authors": "2;1;2;1;2", "rating_avg": [ 4.6, 1.9595917942265424 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.0, 1.0954451150103321 ], "novelty_avg": [ 2.0, 0.8944271909999159 ], "presentation_avg": [ 2.4, 1.3564659966250536 ], "wc_summary_avg": [ 111.4, 54.20553477275176 ], "wc_strengths_avg": [ 88.8, 67.96587378971891 ], "wc_weaknesses_avg": [ 284.8, 209.82602317157898 ], "wc_questions_avg": [ 116.6, 99.8831317090128 ], "wc_limitations_avg": [ 26.6, 14.022838514366484 ], "wc_review_avg": [ 628.2, 260.0572244718458 ], "wc_reply_reviewers_avg": [ 85.8, 81.94241880735521 ], "wc_reply_authors_avg": [ 234.4, 300.6490312640305 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:qVj7ghin59QJ:scholar.google.com/&scioq=Shielding+Regular+Safety+Properties+in+Reinforcement+Learning&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Imperial College London", "aff_unique_dep": "", "aff_unique_url": "https://www.imperial.ac.uk", "aff_unique_abbr": "ICL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "title": "SD-Eval: A Benchmark Dataset for Spoken Dialogue Understanding Beyond Words", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97707", "id": "PnjbvbblGv", "proceeding": "", "pdf": "https://openreview.net/pdf?id=PnjbvbblGv", "openreview": "https://openreview.net/forum?id=PnjbvbblGv", "poster": "", "project": "", "author_site": "Junyi AO, Yuancheng Wang, Xiaohai Tian, Dekun Chen, Jun Zhang, Lu Lu, Yuxuan Wang, Haizhou Li, Zhizheng Wu", "tldr": "", "abstract": "Speech encompasses a wealth of information, including but not limited to content, paralinguistic, and environmental information.\nThis comprehensive nature of speech significantly impacts communication and is crucial for human-computer interaction.\nChat-Oriented Large Language Models (LLMs), known for their general-purpose assistance capabilities, have evolved to handle multi-modal inputs, including speech.\nAlthough these models can be adept at recognizing and analyzing speech, they often fall short of generating appropriate responses.\nWe argue that this is due to the lack of principles on task definition and model development, which requires open-source datasets and metrics suitable for model evaluation.\nTo bridge the gap, we present SD-Eval, a benchmark dataset aimed at multidimensional evaluation of spoken dialogue understanding and generation.\nSD-Eval focuses on paralinguistic and environmental information and includes 7,303 utterances, amounting to 8.76 hours of speech data. The data is aggregated from eight public datasets, representing four perspectives: emotion, accent, age, and background sound.\nTo assess the SD-Eval benchmark dataset, we implement three different models and construct a training set following a process similar to that of SD-Eval. The training set contains 1,052.72 hours of speech data and 724.4k utterances. \nWe also conduct a comprehensive evaluation using objective evaluation methods (e.g. BLEU and ROUGE), subjective evaluations and LLM-based metrics for the generated responses.\nModels conditioned with paralinguistic and environmental information outperform their counterparts in both objective and subjective measures.\nMoreover, experiments demonstrate that LLM-based metrics show a higher correlation with human evaluation compared to traditional metrics.\nWe open-source SD-Eval at https://github.com/amphionspace/SD-Eval.", "keywords": "spoken dialogue;large language model;benchmark;dataset;baseline", "primary_area": "", "supplementary_material": "/attachment/22af532e381b55672ee7f49fbcce250707cc5664.zip", "author": "Junyi Ao;Yuancheng Wang;Xiaohai Tian;Dekun Chen;Jun Zhang;Lu Lu;Yuxuan Wang;Haizhou Li;Zhizheng Wu", "authorids": "~Junyi_Ao1;~Yuancheng_Wang1;~Xiaohai_Tian1;~Dekun_Chen1;~Jun_Zhang42;~Lu_Lu6;~Yuxuan_Wang1;~Haizhou_Li3;~Zhizheng_Wu1", "gender": "M;M;M;M;M;M;M;M;", "homepage": "https://ajyy.github.io/;https://hecheng0625.github.io/;;;;;;https://colips.org/~eleliha/;", "dblp": "287/8250;199/2310;;;;;;36/4118;", "google_scholar": "eUiG0O0AAAAJ;60uamz4AAAAJ;6gc45QcAAAAJ;4a4GBB8AAAAJ;tgG06K4AAAAJ;IQaR2KoAAAAJ;3RaOfJkAAAAJ;https://scholar.google.com.sg/citations?user=z8_x7C8AAAAJ;", "orcid": "0000-0001-8979-0835;;;;;;;0000-0001-9158-9401;", "linkedin": ";;;;;;;haizhou-li-4ba74b6/;", "or_profile": "~Junyi_Ao1;~Yuancheng_Wang1;~Xiaohai_Tian1;~Dekun_Chen1;~Jun_Zhang42;~Lu_Lu6;~Yuxuan_Wang1;~Haizhou_Li3;~Zhizheng_Wu1", "aff": "The Chinese University of Hong Kong;The Chinese University of Hong Kong, Shenzhen;ByteDance Inc.;The Chinese University of Hong Kong, Shenzhen;ByteDance Inc.;;ByteDance;National University of Singapore;", "aff_domain": "cuhk.edu.cn;cuhk.edu.cn;bytedance.com;link.cuhk.edu.cn;bytedance.com;;bytedance.com;nus.edu.sg;", "position": "PhD student;PhD student;Researcher;PhD student;Principal Researcher;;Researcher;Full Professor;", "bibtex": "@inproceedings{\nao2024sdeval,\ntitle={{SD}-Eval: A Benchmark Dataset for Spoken Dialogue Understanding Beyond Words},\nauthor={Junyi Ao and Yuancheng Wang and Xiaohai Tian and Dekun Chen and Jun Zhang and Lu Lu and Yuxuan Wang and Haizhou Li and Zhizheng Wu},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=PnjbvbblGv}\n}", "github": "", "reviewers": "jKhG;tZuJ;P62M", "pdf_size": 778139, "rating": "4;6;8", "confidence": "5;4;3", "wc_summary_and_contributions": "25;78;386", "wc_strengths": "1;48;93", "wc_improvement": "1;11;197", "wc_limitations": "81;6;13", "wc_correctness": "1;15;181", "wc_clarity": "1;1;15", "wc_relation_to_prior_work": "1;1;94", "wc_documentation": "1;10;39", "wc_additional_feedback": "1;1;1", "wc_review": "113;171;1019", "wc_reply_reviewers": "0;0;61", "wc_reply_authors": "168;168;55", "reply_reviewers": "0;0;1", "reply_authors": "3;3;2", "rating_avg": [ 6.0, 1.632993161855452 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "wc_summary_and_contributions_avg": [ 163.0, 159.16239086752458 ], "wc_strengths_avg": [ 47.333333333333336, 37.56180092712394 ], "wc_improvement_avg": [ 69.66666666666667, 90.13076919429655 ], "wc_limitations_avg": [ 33.333333333333336, 33.82635395992631 ], "wc_correctness_avg": [ 65.66666666666667, 81.75301557469031 ], "wc_clarity_avg": [ 5.666666666666667, 6.599663291074443 ], "wc_relation_to_prior_work_avg": [ 32.0, 43.840620433565945 ], "wc_documentation_avg": [ 16.666666666666668, 16.21384867602041 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 434.3333333333333, 414.099290294275 ], "wc_reply_reviewers_avg": [ 20.333333333333332, 28.755675768252935 ], "wc_reply_authors_avg": [ 130.33333333333334, 53.26871084938658 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6093497436504891285&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "cuhk.edu.cn;cuhk.edu.cn;bytedance.com;link.cuhk.edu.cn;bytedance.com;;bytedance.com;nus.edu.sg;", "author_num": 9, "aff_unique_index": "0;0;1;0;1;1;2", "aff_unique_norm": "Chinese University of Hong Kong;ByteDance;National University of Singapore", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.bytedance.com;https://www.nus.edu.sg", "aff_unique_abbr": "CUHK;ByteDance;NUS", "aff_campus_unique_index": "0;1;1", "aff_campus_unique": "Hong Kong SAR;Shenzhen;", "aff_country_unique_index": "0;0;0;0;0;0;1", "aff_country_unique": "China;Singapore" }, { "title": "SemCoder: Training Code Language Models with Comprehensive Semantics Reasoning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95287", "id": "PnlCHQrM69", "proceeding": "", "pdf": "https://openreview.net/pdf?id=PnlCHQrM69", "openreview": "https://openreview.net/forum?id=PnlCHQrM69", "poster": "/media/PosterPDFs/NeurIPS%202024/95287.png?t=1733814151.4320748", "project": "", "author_site": "Yangruibo Ding, Jinjun Peng, Marcus Min, Gail Kaiser, Junfeng Yang, Baishakhi Ray", "tldr": "", "abstract": "Code Large Language Models (Code LLMs) have excelled at tasks like code completion but often miss deeper semantics such as execution effects and dynamic states. This paper aims to bridge the gap between Code LLMs' reliance on static text data and the need for semantic understanding for complex tasks like debugging and program repair. We introduce a novel strategy, _monologue reasoning_, to train Code LLMs to reason comprehensive semantics, encompassing high-level functional descriptions, local execution effects of individual statements, and overall input/output behavior, thereby linking static code text with dynamic execution states.\nWe begin by collecting PyX, a clean Python corpus of fully executable code samples with functional descriptions and test cases. \nWe propose training Code LLMs not only to write code but also to understand code semantics by reasoning about key properties, constraints, and execution behaviors using natural language, mimicking human verbal debugging, i.e., rubber-duck debugging. This approach led to the development of SemCoder, a Code LLM with only 6.7B parameters, which shows competitive performance with GPT-3.5-turbo on code generation and execution reasoning tasks. SemCoder achieves 79.3% on HumanEval (GPT-3.5-turbo: 76.8%), 63.6% on CRUXEval-I (GPT-3.5-turbo: 50.3%), and 63.9% on CRUXEval-O (GPT-3.5-turbo: 59.0%). We also study the effectiveness of SemCoder's monologue-style execution reasoning compared to concrete scratchpad reasoning, showing that our approach integrates semantics from multiple dimensions more smoothly. Finally, we demonstrate the potential of applying learned semantics to improve Code LLMs' debugging and self-refining capabilities. Our data, code, and models are available at: https://github.com/ARiSE-Lab/SemCoder.", "keywords": "Code Language Models;Program Semantics Learning;Code Execution Reasoning", "primary_area": "generative_models", "supplementary_material": "", "author": "Yangruibo Ding;Jinjun Peng;Marcus J. Min;Gail Kaiser;Junfeng Yang;Baishakhi Ray", "authorids": "~Yangruibo_Ding1;~Jinjun_Peng1;~Marcus_J._Min1;~Gail_Kaiser1;~Junfeng_Yang1;~Baishakhi_Ray2", "gender": ";;;F;M;F", "homepage": ";https://co1in.me;;http://www.cs.columbia.edu/~kaiser/;https://www.cs.columbia.edu/~junfeng/;http://rayb.info/", "dblp": ";339/7123;;k/GailEKaiser;71/3724.html;74/1969", "google_scholar": ";GPdE5UsAAAAJ;;https://scholar.google.com/citations?hl=en;JJ9AvbAAAAAJ;https://scholar.google.com.tw/citations?user=VaAEb5YAAAAJ", "orcid": ";0009-0007-0202-2122;;0000-0002-8791-1178;0009-0000-2277-6545;", "linkedin": ";;;gail-kaiser-b4495a3/;;", "or_profile": "~Yangruibo_Ding1;~Jinjun_Peng1;~Marcus_J._Min1;~Gail_Kaiser1;~Junfeng_Yang1;~Baishakhi_Ray2", "aff": ";Columbia University;;Columbia University;Columbia University;Columbia University", "aff_domain": ";columbia.edu;;cs.columbia.edu;columbia.edu;columbia.edu", "position": ";PhD student;;Full Professor;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nding2024semcoder,\ntitle={SemCoder: Training Code Language Models with Comprehensive Semantics Reasoning},\nauthor={Yangruibo Ding and Jinjun Peng and Marcus J. Min and Gail Kaiser and Junfeng Yang and Baishakhi Ray},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=PnlCHQrM69}\n}", "github": "", "reviewers": "H1xj;sURP;GuSF;MYsU", "pdf_size": 2715078, "rating": "6;6;6;7", "confidence": "4;4;3;4", "soundness": "2;3;3;3", "novelty": "3;3;3;4", "presentation": "3;3;3;4", "wc_summary": "80;89;132;216", "wc_strengths": "30;40;37;17", "wc_weaknesses": "53;148;27;5", "wc_questions": "54;8;332;111", "wc_limitations": "25;1;2;4", "wc_review": "242;286;530;353", "wc_reply_reviewers": "0;0;0;11", "wc_reply_authors": "0;0;539;0", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;2;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 129.25, 53.80230013670419 ], "wc_strengths_avg": [ 31.0, 8.860022573334675 ], "wc_weaknesses_avg": [ 58.25, 54.53152757809009 ], "wc_questions_avg": [ 126.25, 124.26659848889403 ], "wc_limitations_avg": [ 8.0, 9.874208829065749 ], "wc_review_avg": [ 352.75, 109.70272330256893 ], "wc_reply_reviewers_avg": [ 2.75, 4.763139720814412 ], "wc_reply_authors_avg": [ 134.75, 233.3938463199062 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15289521907244721493&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 4, "email": ";columbia.edu;;cs.columbia.edu;columbia.edu;columbia.edu", "author_num": 6, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Columbia University", "aff_unique_dep": "", "aff_unique_url": "https://www.columbia.edu", "aff_unique_abbr": "Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "LoQT: Low-Rank Adapters for Quantized Pretraining", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95286", "id": "Pnv8C0bU9t", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Pnv8C0bU9t", "openreview": "https://openreview.net/forum?id=Pnv8C0bU9t", "poster": "/media/PosterPDFs/NeurIPS%202024/95286.png?t=1731583468.0830362", "project": "", "author_site": "Sebastian Loeschcke, Mads Toftrup, Michael Kastoryano, Serge Belongie, V\u00e9steinn Sn\u00e6bjarnarson", "tldr": "", "abstract": "Despite advances using low-rank adapters and quantization, pretraining of large models on consumer hardware has not been possible without model sharding, offloading during training, or per-layer gradient updates. To address these limitations, we propose Low-Rank Adapters for Quantized Training (LoQT), a method for efficiently training quantized models. LoQT uses gradient-based tensor factorization to initialize low-rank trainable weight matrices that are periodically merged into quantized full-rank weight matrices. Our approach is suitable for both pretraining and fine-tuning models. We demonstrate this for language modeling and downstream task adaptation, finding that LoQT enables efficient training of models up to 7B parameters on a 24GB GPU. We also demonstrate the feasibility of training a 13B model using per-layer gradient updates on the same hardware.", "keywords": "Quantization;Low-Rank Adaptation;Memory Efficient Training;Large Language Models", "primary_area": "optimization_for_deep_networks", "supplementary_material": "/attachment/e1ed468067c3a464bf9bc328cfa2f4abf5d24cdf.zip", "author": "Sebastian Bugge Loeschcke;Mads Toftrup;Michael Kastoryano;Serge Belongie;V\u00e9steinn Sn\u00e6bjarnarson", "authorids": "~Sebastian_Bugge_Loeschcke1;~Mads_Toftrup1;~Michael_Kastoryano1;~Serge_Belongie1;~V\u00e9steinn_Sn\u00e6bjarnarson1", "gender": "M;;M;M;M", "homepage": "https://sebulo.github.io/;;https://mkastoryano.com/;https://di.ku.dk/english/staff/?pure=en%2Fpersons%2Fserge-belongie(0ce65383-3761-4b17-948a-83b461e371e2)%2Fpublications.html;https://vesteinn.is", "dblp": "267/7543;;;http://dblp.uni-trier.de/pers/hd/b/Belongie:Serge_J=;273/5233", "google_scholar": "_aM-ud8AAAAJ;SzTMW48AAAAJ;https://scholar.google.dk/citations?user=2roUPxkAAAAJ;ORr4XJYAAAAJ;cp283P4AAAAJ", "orcid": ";;;0000-0002-0388-5217;0000-0001-9995-6181", "linkedin": "sebastian-loeschcke/;;;sergebelongie;v%C3%A9steinn-sn%C3%A6bjarnarson-781b82a6/", "or_profile": "~Sebastian_Bugge_Loeschcke1;~Mads_Toftrup1;~Michael_Kastoryano1;~Serge_Belongie1;~V\u00e9steinn_Sn\u00e6bjarnarson1", "aff": "University of Copenhagen;Aarhus University;IT University of Copenhagen;University of Copenhagen;University of Copenhagen", "aff_domain": "diku.dk;cs.au.dk;itu.dk;ku.dk;ku.dk", "position": "PhD student;PhD student;Associate Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\nloeschcke2024loqt,\ntitle={Lo{QT}: Low-Rank Adapters for Quantized Pretraining},\nauthor={Sebastian Bugge Loeschcke and Mads Toftrup and Michael Kastoryano and Serge Belongie and V{\\'e}steinn Sn{\\ae}bjarnarson},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Pnv8C0bU9t}\n}", "github": "", "reviewers": "Ekw9;CPTS;ps9K;tC4j", "pdf_size": 1511406, "rating": "4;6;6;6", "confidence": "3;4;3;3", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "3;3;3;3", "wc_summary": "135;86;52;185", "wc_strengths": "159;48;36;20", "wc_weaknesses": "242;44;43;39", "wc_questions": "206;62;119;1", "wc_limitations": "5;1;1;1", "wc_review": "747;241;251;246", "wc_reply_reviewers": "62;15;10;0", "wc_reply_authors": "122;0;24;24", "reply_reviewers": "1;1;1;0", "reply_authors": "2;1;2;2", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 114.5, 50.271761457104326 ], "wc_strengths_avg": [ 65.75, 54.74657523535148 ], "wc_weaknesses_avg": [ 92.0, 86.62274528090182 ], "wc_questions_avg": [ 97.0, 75.50827769191932 ], "wc_limitations_avg": [ 2.0, 1.7320508075688772 ], "wc_review_avg": [ 371.25, 216.9681716289281 ], "wc_reply_reviewers_avg": [ 21.75, 23.85765076448224 ], "wc_reply_authors_avg": [ 42.5, 46.93346354148605 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9701125762852954485&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "diku.dk;cs.au.dk;itu.dk;ku.dk;ku.dk", "author_num": 5, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "University of Copenhagen;Aarhus University;IT University of Copenhagen", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ku.dk;https://au.dk;https://itu.dk", "aff_unique_abbr": "UCPH;AU;ITU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Denmark" }, { "title": "Object segmentation from common fate: Motion energy processing enables human-like zero-shot generalization to random dot stimuli", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95285", "id": "Po7iQKKT5b", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Po7iQKKT5b", "openreview": "https://openreview.net/forum?id=Po7iQKKT5b", "poster": "", "project": "", "author_site": "Matthias Tangemann, Matthias K\u00fcmmerer, Matthias Bethge", "tldr": "", "abstract": "Humans excel at detecting and segmenting moving objects according to the {\\it Gestalt} principle of \u201ccommon fate\u201d. Remarkably, previous works have shown that human perception generalizes this principle in a zero-shot fashion to unseen textures or random dots. In this work, we seek to better understand the computational basis for this capability by evaluating a broad range of optical flow models and a neuroscience inspired motion energy model for zero-shot figure-ground segmentation of random dot stimuli. Specifically, we use the extensively validated motion energy model proposed by Simoncelli and Heeger in 1998 which is fitted to neural recordings in cortex area MT. We find that a cross section of 40 deep optical flow models trained on different datasets struggle to estimate motion patterns in random dot videos, resulting in poor figure-ground segmentation performance. Conversely, the neuroscience-inspired model significantly outperforms all optical flow models on this task. For a direct comparison to human perception, we conduct a psychophysical study using a shape identification task as a proxy to measure human segmentation performance. All state-of-the-art optical flow models fall short of human performance, but only the motion energy model matches human capability. This neuroscience-inspired model successfully addresses the lack of human-like zero-shot generalization to random dot stimuli in current computer vision models, and thus establishes a compelling link between the Gestalt psychology of human object perception and cortical motion processing in the brain.\n\nCode, models and datasets are available at https://github.com/mtangemann/motion_energy_segmentation", "keywords": "common fate;motion energy;optical flow;figure-ground segmentation;humans vs machines", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "/attachment/314a11f0ef17044416920255d9c8365a95c2b3c0.zip", "author": "Matthias Tangemann;Matthias Kuemmerer;Matthias Bethge", "authorids": "~Matthias_Tangemann1;~Matthias_Kuemmerer1;~Matthias_Bethge1", "gender": "M;;M", "homepage": ";;https://bethgelab.org", "dblp": "277/6683;151/6291.html;77/3005", "google_scholar": ";https://scholar.google.de/citations?user=y5Ej2qYAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0001-9734-8692;0000-0001-9644-4703;", "linkedin": ";;", "or_profile": "~Matthias_Tangemann1;~Matthias_Kuemmerer1;~Matthias_Bethge1", "aff": "University of T\u00fcbingen;Eberhard-Karls-Universit\u00e4t T\u00fcbingen;University of Tuebingen", "aff_domain": "uni-tuebingen.de;uni-tuebingen.de;uni-tuebingen.de", "position": "PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\ntangemann2024object,\ntitle={Object segmentation from common fate: Motion energy processing enables human-like zero-shot generalization to random dot stimuli},\nauthor={Matthias Tangemann and Matthias Kuemmerer and Matthias Bethge},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Po7iQKKT5b}\n}", "github": "", "reviewers": "ezYE;cRYq;DKrM;8R4V", "pdf_size": 5854617, "rating": "5;6;6;7", "confidence": "3;4;3;4", "soundness": "2;3;3;3", "novelty": "1;3;3;3", "presentation": "2;3;3;3", "wc_summary": "134;61;73;181", "wc_strengths": "28;40;47;247", "wc_weaknesses": "489;217;177;707", "wc_questions": "132;27;101;220", "wc_limitations": "5;52;9;25", "wc_review": "788;397;407;1380", "wc_reply_reviewers": "270;50;21;220", "wc_reply_authors": "93;14;21;134", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 112.25, 48.39098573081561 ], "wc_strengths_avg": [ 90.5, 90.61042986323373 ], "wc_weaknesses_avg": [ 397.5, 215.26901774291628 ], "wc_questions_avg": [ 120.0, 69.1989884319128 ], "wc_limitations_avg": [ 22.75, 18.471261462065875 ], "wc_review_avg": [ 743.0, 400.12685488479775 ], "wc_reply_reviewers_avg": [ 140.25, 106.72482138659217 ], "wc_reply_authors_avg": [ 65.5, 50.202091589892945 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.7071067811865476, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:JxWFTZsNXkYJ:scholar.google.com/&scioq=Object+segmentation+from+common+fate:+Motion+energy+processing+enables+human-like+zero-shot+generalization+to+random+dot+stimuli&hl=en&as_sdt=0,14", "gs_version_total": 4, "email": "uni-tuebingen.de;uni-tuebingen.de;uni-tuebingen.de", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of T\u00fcbingen;Eberhard Karls University of T\u00fcbingen;University of Tuebingen", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uni-tuebingen.de/;https://www.uni-tuebingen.de/;https://www.uni-tuebingen.de/", "aff_unique_abbr": "Uni T\u00fcbingen;Uni T\u00fcbingen;Uni T\u00fcbingen", "aff_campus_unique_index": "1", "aff_campus_unique": ";T\u00fcbingen", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "title": "Inference via Interpolation: Contrastive Representations Provably Enable Planning and Inference", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95284", "id": "PoCs4jq7cV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=PoCs4jq7cV", "openreview": "https://openreview.net/forum?id=PoCs4jq7cV", "poster": "", "project": "", "author_site": "Benjamin Eysenbach, Vivek Myers, Ruslan Salakhutdinov, Sergey Levine", "tldr": "", "abstract": "Given time series data, how can we answer questions like ``what will happen in the future?'' and ``how did we get here?'' These sorts of probabilistic inference questions are challenging when observations are high-dimensional. In this paper, we show how these questions can have compact, closed form solutions in terms of learned representations. The key idea is to apply a variant of contrastive learning to time series data. Prior work already shows that the representations learned by contrastive learning encode a probability ratio. By extending prior work to show that the marginal distribution over representations is Gaussian, we can then prove that joint distribution of representations is also Gaussian. Taken together, these results show that representations learned via temporal contrastive learning follow a Gauss-Markov chain, a graphical model where inference (e.g., prediction, planning) over representations corresponds to inverting a low-dimensional matrix. In one special case, inferring intermediate representations will be equivalent to interpolating between the learned representations. We validate our theory using numerical simulations on tasks up to 46-dimensions.", "keywords": "contrastive learning;prediction;planning;inference;time-series", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Benjamin Eysenbach;Vivek Myers;Russ Salakhutdinov;Sergey Levine", "authorids": "~Benjamin_Eysenbach1;~Vivek_Myers1;~Russ_Salakhutdinov1;~Sergey_Levine1", "gender": "M;;M;M", "homepage": "https://ben-eysenbach.github.io/;https://people.eecs.berkeley.edu/~vmyers/;https://www.cs.cmu.edu/~rsalakhu/;https://people.eecs.berkeley.edu/~svlevine/", "dblp": "192/1863;270/8694;;80/7594", "google_scholar": "DRnOvU8AAAAJ;5NGAbT4AAAAJ;;8R35rCwAAAAJ", "orcid": "0009-0000-7136-6307;;;", "linkedin": "benjamin-eysenbach-a7235775/;;;", "or_profile": "~Benjamin_Eysenbach1;~Vivek_Myers1;~Russ_Salakhutdinov1;~Sergey_Levine1", "aff": "Princeton University;University of California, Berkeley;School of Computer Science, Carnegie Mellon University;Google", "aff_domain": "princeton.edu;berkeley.edu;cs.cmu.edu;google.com", "position": "Assistant Professor;PhD student;Full Professor;Research Scientist", "bibtex": "@inproceedings{\neysenbach2024inference,\ntitle={Inference via Interpolation: Contrastive Representations Provably Enable Planning and Inference},\nauthor={Benjamin Eysenbach and Vivek Myers and Russ Salakhutdinov and Sergey Levine},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=PoCs4jq7cV}\n}", "github": "", "reviewers": "GBML;4BoG;4JuK;e5if", "pdf_size": 4174297, "rating": "6;6;6;7", "confidence": "3;2;2;4", "soundness": "3;3;4;3", "novelty": "2;3;3;3", "presentation": "3;3;2;3", "wc_summary": "52;22;58;92", "wc_strengths": "38;39;62;70", "wc_weaknesses": "60;77;96;855", "wc_questions": "98;37;138;1", "wc_limitations": "9;1;8;70", "wc_review": "257;176;362;1088", "wc_reply_reviewers": "0;0;37;286", "wc_reply_authors": "0;0;0;56", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;1;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 56.0, 24.859605789312106 ], "wc_strengths_avg": [ 52.25, 14.042346669983617 ], "wc_weaknesses_avg": [ 272.0, 336.83601351399466 ], "wc_questions_avg": [ 68.5, 53.0306515140065 ], "wc_limitations_avg": [ 22.0, 27.883686987197372 ], "wc_review_avg": [ 470.75, 362.41921513628387 ], "wc_reply_reviewers_avg": [ 80.75, 119.45998283944293 ], "wc_reply_authors_avg": [ 14.0, 24.24871130596428 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1488613903912087665&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "princeton.edu;berkeley.edu;cs.cmu.edu;google.com", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Princeton University;University of California, Berkeley;Carnegie Mellon University;Google", "aff_unique_dep": ";;School of Computer Science;Google", "aff_unique_url": "https://www.princeton.edu;https://www.berkeley.edu;https://www.cmu.edu;https://www.google.com", "aff_unique_abbr": "Princeton;UC Berkeley;CMU;Google", "aff_campus_unique_index": "1;2;3", "aff_campus_unique": ";Berkeley;Pittsburgh;Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "From Transparent to Opaque: Rethinking Neural Implicit Surfaces with $\\alpha$-NeuS", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95283", "id": "Pojt9RWIjJ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Pojt9RWIjJ", "openreview": "https://openreview.net/forum?id=Pojt9RWIjJ", "poster": "/media/PosterPDFs/NeurIPS%202024/95283.png?t=1732868605.1904397", "project": "", "author_site": "Haoran Zhang, Junkai Deng, Xuhui Chen, Fei Hou, Wencheng Wang, Hong Qin, Chen Qian, Ying He", "tldr": "", "abstract": "Traditional 3D shape reconstruction techniques from multi-view images, such as structure from motion and multi-view stereo, face challenges in reconstructing transparent objects. Recent advances in neural radiance fields and its variants primarily address opaque or transparent objects, encountering difficulties to reconstruct both transparent and opaque objects simultaneously. This paper introduces $\\alpha$-NeuS$\\textemdash$an extension of NeuS$\\textemdash$that proves NeuS is unbiased for materials from fully transparent to fully opaque. We find that transparent and opaque surfaces align with the non-negative local minima and the zero iso-surface, respectively, in the learned distance field of NeuS. Traditional iso-surfacing extraction algorithms, such as marching cubes, which rely on fixed iso-values, are ill-suited for such data. We develop a method to extract the transparent and opaque surface simultaneously based on DCUDF. To validate our approach, we construct a benchmark that includes both real-world and synthetic scenes, demonstrating its practical utility and effectiveness. Our data and code are publicly available at https://github.com/728388808/alpha-NeuS.", "keywords": "NeuS;Transparent Modeling", "primary_area": "machine_vision", "supplementary_material": "/attachment/399cb8003916b9d52cdbbb028c7915aee80a24d9.zip", "author": "Haoran Zhang;Junkai Deng;Xuhui Chen;Fei Hou;Wencheng Wang;Hong Qin;Chen Qian;Ying He", "authorids": "~Haoran_Zhang20;~Junkai_Deng1;~Xuhui_Chen2;~Fei_Hou1;~Wencheng_Wang1;~Hong_Qin1;~Chen_Qian1;~Ying_He1", "gender": "M;;M;M;M;M;M;M", "homepage": "https://github.com/728388808;;https://github.com/jjjkkyz;https://lcs.ios.ac.cn/~houf/;http://lcs.ios.ac.cn/~whn/;http://www.cs.stonybrook.edu/~qin;;https://personal.ntu.edu.sg/yhe/", "dblp": ";;;24/3702;;79/627-1;;h/YingHe1", "google_scholar": ";;;NWoYRf8AAAAJ;;NOcejj8AAAAJ;AerkT0YAAAAJ;ISNmBxwAAAAJ", "orcid": ";;;;;;;0000-0002-6749-4485", "linkedin": ";;;;;;;", "or_profile": "~Haoran_Zhang20;~Junkai_Deng1;~Xuhui_Chen2;~Fei_Hou1;~Wencheng_Wang1;~Hong_Qin1;~Chen_Qian1;~Ying_He1", "aff": "Institute of Software, Chinese Academy of Sciences;;Chinese Academy of Sciences, Chinese Academy of Sciences;Institute of Software, Chinese Academy of Sciences;Institute of Software, Chinese Academy of Sciences;Stony Brook University (State University of New York, Stony Brook);Tsinghua University;Nanyang Technological University", "aff_domain": "ios.ac.cn;;ios.ac.cn;ios.ac.cn;ios.ac.cn;cs.stonybrook.edu;mails.tsinghua.edu.cn;ntu.edu.sg", "position": "MS student;;PhD student;Associate Professor;Full Professor;Full Professor;PhD student;Associate Professor", "bibtex": "@inproceedings{\nzhang2024from,\ntitle={From Transparent to Opaque: Rethinking Neural Implicit Surfaces with \\${\\textbackslash}alpha\\$-NeuS},\nauthor={Haoran Zhang and Junkai Deng and Xuhui Chen and Fei Hou and Wencheng Wang and Hong Qin and Chen Qian and Ying He},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Pojt9RWIjJ}\n}", "github": "", "reviewers": "GRsP;vpPg;ruwJ;6LF5", "pdf_size": 7248313, "rating": "2;6;6;8", "confidence": "3;3;3;4", "soundness": "2;3;3;4", "novelty": "2;3;3;4", "presentation": "1;2;3;4", "wc_summary": "42;93;72;57", "wc_strengths": "37;31;61;201", "wc_weaknesses": "600;266;98;14", "wc_questions": "39;32;238;8", "wc_limitations": "8;5;12;56", "wc_review": "726;427;481;336", "wc_reply_reviewers": "326;0;0;0", "wc_reply_authors": "504;0;0;0", "reply_reviewers": "2;0;0;0", "reply_authors": "3;1;1;1", "rating_avg": [ 5.5, 2.179449471770337 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 66.0, 18.854707634964804 ], "wc_strengths_avg": [ 82.5, 69.33072911775845 ], "wc_weaknesses_avg": [ 244.5, 224.4075533488122 ], "wc_questions_avg": [ 79.25, 92.37254732873832 ], "wc_limitations_avg": [ 20.25, 20.78911975048487 ], "wc_review_avg": [ 492.5, 144.42731736067108 ], "wc_reply_reviewers_avg": [ 81.5, 141.1621408168635 ], "wc_reply_authors_avg": [ 126.0, 218.23840175367854 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.6622661785325219, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:HoWM6OQXsUEJ:scholar.google.com/&scioq=From+Transparent+to+Opaque:+Rethinking+Neural+Implicit+Surfaces+with+%24%5Calpha%24-NeuS&hl=en&as_sdt=0,7", "gs_version_total": 3, "email": "ios.ac.cn;;ios.ac.cn;ios.ac.cn;ios.ac.cn;cs.stonybrook.edu;mails.tsinghua.edu.cn;ntu.edu.sg", "author_num": 8, "aff_unique_index": "0;0;0;0;1;2;3", "aff_unique_norm": "Chinese Academy of Sciences;Stony Brook University;Tsinghua University;Nanyang Technological University", "aff_unique_dep": "Institute of Software;;;", "aff_unique_url": "http://www.ios.ac.cn;https://www.stonybrook.edu;https://www.tsinghua.edu.cn;https://www.ntu.edu.sg", "aff_unique_abbr": "CAS;SBU;THU;NTU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stony Brook", "aff_country_unique_index": "0;0;0;0;1;0;2", "aff_country_unique": "China;United States;Singapore" }, { "title": "Second-order forward-mode optimization of recurrent neural networks for neuroscience", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95282", "id": "Pox8jNQOo5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Pox8jNQOo5", "openreview": "https://openreview.net/forum?id=Pox8jNQOo5", "poster": "/media/PosterPDFs/NeurIPS%202024/95282.png?t=1731346138.1904387", "project": "", "author_site": "Youjing Yu, Rui Xia, Qingxi Ma, Mate Lengyel, Guillaume Hennequin", "tldr": "", "abstract": "A common source of anxiety for the computational neuroscience student is the question \u201cwill my recurrent neural network (RNN) model finally learn that task?\u201d. Unlike in machine learning where any architectural modification of an RNN (e.g. GRU or LSTM) is acceptable if it speeds up training, the RNN models trained as _models of brain dynamics_ are subject to plausibility constraints that fundamentally exclude the usual machine learning hacks. The \u201cvanilla\u201d RNNs commonly used in computational neuroscience find themselves plagued by ill-conditioned loss surfaces that complicate training and significantly hinder our capacity to investigate the brain dynamics underlying complex tasks. Moreover, some tasks may require very long time horizons which backpropagation cannot handle given typical GPU memory limits. Here, we develop SOFO, a second-order optimizer that efficiently navigates loss surfaces whilst _not_ requiring backpropagation. By relying instead on easily parallelized batched forward-mode differentiation, SOFO enjoys constant memory cost in time. Morever, unlike most second-order optimizers which involve inherently sequential operations, SOFO's effective use of GPU parallelism yields a per-iteration wallclock time essentially on par with first-order gradient-based optimizers. We show vastly superior performance compared to Adam on a number of RNN tasks, including a difficult double-reaching motor task and the learning of an adaptive Kalman filter algorithm trained over a long horizon.", "keywords": "computational neuroscience;recurrent neural networks;motor control", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "", "author": "Youjing Yu;Rui Xia;Qingxi Ma;M\u00e1t\u00e9 Lengyel;Guillaume Hennequin", "authorids": "~Youjing_Yu1;~Rui_Xia2;~Qingxi_Ma1;~M\u00e1t\u00e9_Lengyel1;~Guillaume_Hennequin1", "gender": ";F;F;M;M", "homepage": "https://github.com/YoujingYu99/YoujingYu99.github.io;;https://neuroscience.cam.ac.uk/member/qm218/;http://lengyellab.org;https://cbl-cambridge.org", "dblp": ";;;48/6327;56/10432", "google_scholar": ";;;WvgoL14AAAAJ;-NkKYYcAAAAJ", "orcid": ";;;0000-0001-7266-0049;", "linkedin": "youjing-yu99/;rui-xia-567b56150;;;", "or_profile": "~Youjing_Yu1;~Rui_Xia2;~Qingxi_Ma1;~M\u00e1t\u00e9_Lengyel1;~Guillaume_Hennequin1", "aff": "University of Cambridge;University of Cambridge;University of Cambridge;Central European University;MediaTek Research", "aff_domain": "cam.ac.uk;cam.ac.uk;cam.ac.uk;ceu.edu;mtkresearch.com", "position": "PhD student;PhD student;MS student;Principal Researcher;Researcher", "bibtex": "@inproceedings{\nyu2024secondorder,\ntitle={Second-order forward-mode optimization of recurrent neural networks for neuroscience},\nauthor={Youjing Yu and Rui Xia and Qingxi Ma and M{\\'a}t{\\'e} Lengyel and Guillaume Hennequin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Pox8jNQOo5}\n}", "github": "", "reviewers": "mSGm;3Uai;Z9Qd;535W", "pdf_size": 2197526, "rating": "5;8;8;8", "confidence": "3;4;4;4", "soundness": "3;3;3;4", "novelty": "2;4;4;3", "presentation": "3;4;3;3", "wc_summary": "84;639;74;106", "wc_strengths": "39;102;50;90", "wc_weaknesses": "220;93;48;114", "wc_questions": "37;247;764;38", "wc_limitations": "20;20;51;6", "wc_review": "400;1101;987;354", "wc_reply_reviewers": "101;195;59;114", "wc_reply_authors": "0;60;23;0", "reply_reviewers": "1;1;2;1", "reply_authors": "1;2;2;1", "rating_avg": [ 7.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 225.75, 238.87065014354525 ], "wc_strengths_avg": [ 70.25, 26.38536526182649 ], "wc_weaknesses_avg": [ 118.75, 63.13230155791883 ], "wc_questions_avg": [ 271.5, 296.92970548599544 ], "wc_limitations_avg": [ 24.25, 16.467771555374455 ], "wc_review_avg": [ 710.5, 336.32015996666036 ], "wc_reply_reviewers_avg": [ 117.25, 49.27664253984843 ], "wc_reply_authors_avg": [ 20.75, 24.529319191530774 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:jmODBLdPDGUJ:scholar.google.com/&scioq=Second-order+forward-mode+optimization+of+recurrent+neural+networks+for+neuroscience&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "cam.ac.uk;cam.ac.uk;cam.ac.uk;ceu.edu;mtkresearch.com", "author_num": 5, "aff_unique_index": "0;0;0;1;2", "aff_unique_norm": "University of Cambridge;Central European University;MediaTek Inc.", "aff_unique_dep": ";;Research", "aff_unique_url": "https://www.cam.ac.uk;https://www.ceu.edu;https://www.mediatek.com/", "aff_unique_abbr": "Cambridge;CEU;MediaTek", "aff_campus_unique_index": "0;0;0;2", "aff_campus_unique": "Cambridge;;Taiwan", "aff_country_unique_index": "0;0;0;1;2", "aff_country_unique": "United Kingdom;Hungary;China" }, { "title": "LoD-Loc: Aerial Visual Localization using LoD 3D Map with Neural Wireframe Alignment", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95281", "id": "PqlKliEXyJ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=PqlKliEXyJ", "openreview": "https://openreview.net/forum?id=PqlKliEXyJ", "poster": "/media/PosterPDFs/NeurIPS%202024/95281.png?t=1732592144.0766199", "project": "", "author_site": "Juelin Zhu, Shen Yan, Long Wang, zhang shengYue, Yu Liu, Maojun Zhang", "tldr": "", "abstract": "We propose a new method named LoD-Loc for visual localization in the air. Unlike existing localization algorithms, LoD-Loc does not rely on complex 3D representations and can estimate the pose of an Unmanned Aerial Vehicle (UAV) using a Level-of-Detail (LoD) 3D map. LoD-Loc mainly achieves this goal by aligning the wireframe derived from the LoD projected model with that predicted by the neural network. Specifically, given a coarse pose provided by the UAV sensor, LoD-Loc hierarchically builds a cost volume for uniformly sampled pose hypotheses to describe pose probability distribution and select a pose with maximum probability. Each cost within this volume measures the degree of line alignment between projected and predicted wireframes. LoD-Loc also devises a 6-DoF pose optimization algorithm to refine the previous result with a differentiable Gaussian-Newton method. As no public dataset exists for the studied problem, we collect two datasets with map levels of LoD3.0 and LoD2.0, along with real RGB queries and ground-truth pose annotations. We benchmark our method and demonstrate that LoD-Loc achieves excellent performance, even surpassing current state-of-the-art methods that use textured 3D models for localization. The code and dataset will be made available upon publication.", "keywords": "3D computer vision; Pose estimation; Privacy-preserving localization", "primary_area": "other", "supplementary_material": "/attachment/b284e7d23bf3cd438e10789c474b75841e731dfb.zip", "author": "Juelin Zhu;Shen Yan;Long Wang;zhang shengYue;Yu Liu;Maojun Zhang", "authorids": "~Juelin_Zhu1;~Shen_Yan6;~Long_Wang2;~zhang_shengYue1;~Yu_Liu4;~Maojun_Zhang1", "gender": "M;M;M;M;M;M", "homepage": ";;https://wanglongzju.github.io/wanglong.github.io/;;;", "dblp": "268/9328;;;;97/2274-8;", "google_scholar": "yT471_QAAAAJ;;;;;", "orcid": ";0000-0002-1415-5113;;0009-0000-8968-651X;;0000-0001-6748-0545", "linkedin": ";;;;;", "or_profile": "~Juelin_Zhu1;~Shen_Yan6;~Long_Wang2;~zhang_shengYue1;~Yu_Liu4;~Maojun_Zhang1", "aff": "National University of Defense Technology;National University of Defense Technology;Sensetime;National University of Defense Technology;National University of Defense Technology;National University of Defense Technology", "aff_domain": "nudt.edu.cn;nudt.edu.cn;sensetime.com;nudt.edu.cn;nudt.edu.cn;nudt.edu.cn", "position": "PhD student;Lecturer;Researcher;Undergrad student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nzhu2024lodloc,\ntitle={LoD-Loc: Aerial Visual Localization using LoD 3D Map with Neural Wireframe Alignment},\nauthor={Juelin Zhu and Shen Yan and Long Wang and zhang shengYue and Yu Liu and Maojun Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=PqlKliEXyJ}\n}", "github": "", "reviewers": "nkE1;CDoh;RdXx;UAbv;H1Dr", "pdf_size": 29006704, "rating": "4;5;5;6;8", "confidence": "4;4;4;4;5", "soundness": "2;2;3;3;3", "novelty": "2;2;3;3;3", "presentation": "2;2;3;3;3", "wc_summary": "103;18;132;97;132", "wc_strengths": "50;16;123;77;113", "wc_weaknesses": "76;9;331;98;286", "wc_questions": "27;57;60;14;68", "wc_limitations": "89;3;1;14;5", "wc_review": "345;103;647;300;604", "wc_reply_reviewers": "0;4;24;15;19", "wc_reply_authors": "44;0;57;129;0", "reply_reviewers": "0;1;1;2;1", "reply_authors": "2;1;2;3;1", "rating_avg": [ 5.6, 1.3564659966250536 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 96.4, 41.773675921565726 ], "wc_strengths_avg": [ 75.8, 39.635337768208814 ], "wc_weaknesses_avg": [ 160.0, 125.55317598531707 ], "wc_questions_avg": [ 45.2, 20.894018282752604 ], "wc_limitations_avg": [ 22.4, 33.5952377577537 ], "wc_review_avg": [ 399.8, 201.92018225031393 ], "wc_reply_reviewers_avg": [ 12.4, 9.046546302318914 ], "wc_reply_authors_avg": [ 46.0, 47.42573141238836 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.8846517369293829, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:DkWTjOFhPG8J:scholar.google.com/&scioq=LoD-Loc:+Aerial+Visual+Localization+using+LoD+3D+Map+with+Neural+Wireframe+Alignment&hl=en&as_sdt=0,5", "gs_version_total": 5, "email": "nudt.edu.cn;nudt.edu.cn;sensetime.com;nudt.edu.cn;nudt.edu.cn;nudt.edu.cn", "author_num": 6, "aff_unique_index": "0;0;1;0;0;0", "aff_unique_norm": "National University of Defense Technology;SenseTime", "aff_unique_dep": ";", "aff_unique_url": "http://www.nudt.edu.cn/;https://www.sensetime.com", "aff_unique_abbr": "NUDT;SenseTime", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Extending Multi-modal Contrastive Representations", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95280", "id": "PquRXu9pQ6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=PquRXu9pQ6", "openreview": "https://openreview.net/forum?id=PquRXu9pQ6", "poster": "", "project": "", "author_site": "Ziang Zhang, Zehan Wang, Luping Liu, Rongjie Huang, Xize Cheng, Zhenhui Ye, wang lin, Huadai Liu, Haifeng Huang, Yang Zhao, Tao Jin, Siqi Zheng, Zhou Zhao", "tldr": "", "abstract": "Multi-modal contrastive representation (MCR) of more than three modalities is critical in multi-modal learning. Although recent methods showcase impressive achievements, the high dependence on large-scale, high-quality paired data and the expensive training costs limit their further development. Inspired by recent C-MCR, this paper proposes $\\textbf{Ex}$tending $\\textbf{M}$ultimodal $\\textbf{C}$ontrastive $\\textbf{R}$epresentation (Ex-MCR), a training-efficient and paired-data-free method to build unified contrastive representation for many modalities. Since C-MCR is designed to learn a new latent space for the two non-overlapping modalities and projects them onto this space, a significant amount of information from their original spaces is lost in the projection process. To address this issue, Ex-MCR proposes to extend one modality's space into the other's, rather than mapping both modalities onto a completely new space. This method effectively preserves semantic alignment in the original space. Experimentally, we extend pre-trained audio-text and 3D-image representations to the existing vision-text space. Without using paired data, Ex-MCR achieves comparable performance to advanced methods on a series of audio-image-text and 3D-image-text tasks and achieves superior performance when used in parallel with data-driven methods. Moreover, semantic alignment also emerges between the extended modalities (e.g., audio and 3D).", "keywords": "Multi-modal learning; Representation Learning", "primary_area": "other", "supplementary_material": "/attachment/de4b7c495a3684ae8826fd8a13a643ded9a1dc73.zip", "author": "Ziang Zhang;Zehan Wang;Luping Liu;Rongjie Huang;Xize Cheng;Zhenhui Ye;Wang Lin;Huadai Liu;Haifeng Huang;Yang Zhao;Tao Jin;Siqi Zheng;Zhou Zhao", "authorids": "~Ziang_Zhang1;~Zehan_Wang2;~Luping_Liu2;~Rongjie_Huang1;~Xize_Cheng1;~Zhenhui_Ye1;~Wang_Lin2;~Huadai_Liu1;~Haifeng_Huang3;~Yang_Zhao14;~Tao_Jin2;~Siqi_Zheng1;~Zhou_Zhao3", "gender": "M;M;;M;M;M;;M;M;M;M;M;", "homepage": ";https://github.com/12zehan17;;;https://exgc.github.io/;https://yerfor.github.io;;;https://zzzzchs.github.io/;;https://hugddygff.github.io/;;", "dblp": ";126/7826-1;;212/8936-1;334/2167;265/6375;;321/0749;;50/2082-22;88/4850-4.html;;", "google_scholar": "DptGMnYAAAAJ;euXK0lkAAAAJ;;iRHBUsgAAAAJ;https://scholar.google.com/citations?hl=zh-CN;;;https://scholar.google.com/citations?hl=zh-CN;oUm2gZUAAAAJ;;;https://scholar.google.com.hk/citations?user=BsrS95gAAAAJ;", "orcid": ";0009-0007-7509-7563;;;0000-0001-9708-3225;;;;;;0000-0003-3564-1628;;", "linkedin": ";;;;;;;https://www.linkedin.cn/injobs/in/%E5%8D%8E%E5%B2%B1-%E5%88%98-463b1622a;haifeng-huang-784b2b249/;;;;", "or_profile": "~Ziang_Zhang1;~Zehan_Wang2;~Luping_Liu2;~Rongjie_Huang1;~Xize_Cheng1;~Zhenhui_Ye1;~Wang_Lin2;~Huadai_Liu1;~Haifeng_Huang3;~Yang_Zhao14;~Tao_Jin2;~Siqi_Zheng1;~Zhou_Zhao3", "aff": "Zhejiang University;Zhejiang University;;Zhejiang University;Zhejiang University;Zhejiang University;;Zhejiang University;Zhejiang University;ByteDance Inc.;Zhejiang University;Alibaba Group;", "aff_domain": "zju.edu.cn;zju.edu.cn;;zju.edu.cn;zju.edu.cn;zju.edu.cn;;zju.edu.cn;zju.edu.cn;bytedance.com;zju.edu.cn;alibaba-inc.com;", "position": "Undergrad student;PhD student;;MS student;PhD student;PhD student;;MS student;MS student;Researcher;Assistant Professor;Researcher;", "bibtex": "@inproceedings{\nzhang2024extending,\ntitle={Extending Multi-modal Contrastive Representations},\nauthor={Ziang Zhang and Zehan Wang and Luping Liu and Rongjie Huang and Xize Cheng and Zhenhui Ye and Wang Lin and Huadai Liu and Haifeng Huang and Yang Zhao and Tao Jin and Siqi Zheng and Zhou Zhao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=PquRXu9pQ6}\n}", "github": "", "reviewers": "n4Yo;V8oT;1W4Z;KwrG", "pdf_size": 1669766, "rating": "4;5;5;7", "confidence": "4;4;5;3", "soundness": "2;2;2;3", "novelty": "2;2;2;3", "presentation": "2;2;3;3", "wc_summary": "50;110;84;73", "wc_strengths": "44;122;27;55", "wc_weaknesses": "58;229;266;83", "wc_questions": "59;31;194;6", "wc_limitations": "4;5;1;1", "wc_review": "215;497;572;218", "wc_reply_reviewers": "0;142;11;86", "wc_reply_authors": "48;573;0;95", "reply_reviewers": "0;3;1;1", "reply_authors": "2;3;1;3", "rating_avg": [ 5.25, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 79.25, 21.579793789561567 ], "wc_strengths_avg": [ 62.0, 36.04857833535187 ], "wc_weaknesses_avg": [ 159.0, 89.89716347026751 ], "wc_questions_avg": [ 72.5, 72.6102609828666 ], "wc_limitations_avg": [ 2.75, 1.7853571071357126 ], "wc_review_avg": [ 375.5, 161.1994106688979 ], "wc_reply_reviewers_avg": [ 59.75, 57.88080424458527 ], "wc_reply_authors_avg": [ 179.0, 229.94238408784057 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 13, 0 ], "corr_rating_confidence": -0.6488856845230502, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12624363346917589753&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "zju.edu.cn;zju.edu.cn;;zju.edu.cn;zju.edu.cn;zju.edu.cn;;zju.edu.cn;zju.edu.cn;bytedance.com;zju.edu.cn;alibaba-inc.com;", "author_num": 13, "aff_unique_index": "0;0;0;0;0;0;0;1;0;2", "aff_unique_norm": "Zhejiang University;ByteDance;Alibaba Group", "aff_unique_dep": ";;", "aff_unique_url": "https://www.zju.edu.cn;https://www.bytedance.com;https://www.alibaba.com", "aff_unique_abbr": "ZJU;ByteDance;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Sharpness-Aware Minimization Activates the Interactive Teaching's Understanding and Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95279", "id": "Prw98p1nV0", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Prw98p1nV0", "openreview": "https://openreview.net/forum?id=Prw98p1nV0", "poster": "/media/PosterPDFs/NeurIPS%202024/95279.png?t=1730250457.9458485", "project": "", "author_site": "Mingwei Xu, Xiaofeng Cao, Ivor Tsang", "tldr": "", "abstract": "Teaching is a potentially effective approach for understanding interactions among multiple intelligences. Previous explorations have convincingly shown that teaching presents additional opportunities for observation and demonstration within the learning model, such as data distillation and selection. However, the underlying optimization principles and convergence of interactive teaching lack theoretical analysis, and in this regard co-teaching serves as a notable prototype. In this paper, we discuss its role as a reduction of the larger loss landscape derived from Sharpness-Aware Minimization (SAM). Then, we classify it as an iterative parameter estimation process using Expectation-Maximization. The convergence of this typical interactive teaching is achieved by continuously optimizing a variational lower bound on the log marginal likelihood. This lower bound represents the expected value of the log posterior distribution of the latent variables under a scaled, factorized variational distribution. To further enhance interactive teaching's performance, we incorporate SAM's strong generalization information into interactive teaching, referred as Sharpness Reduction Interactive Teaching (SRIT). This integration can be viewed as a novel sequential optimization process. Finally, we validate the performance of our approach through multiple experiments.", "keywords": "Interactive Teaching;Sharpness-Aware Minimization;Co-teaching;Expectation-Maximization;Optimization", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Mingwei Xu;Xiaofeng Cao;Ivor Tsang", "authorids": "~Mingwei_Xu4;~Xiaofeng_Cao2;~Ivor_Tsang1", "gender": "M;M;M", "homepage": ";https://www.a-star.edu.sg/cfar/about-cfar/management/prof-ivor-tsang;https://xiaofengcaoml.github.io/", "dblp": ";35/5873;117/3982-2.html", "google_scholar": ";rJMOlVsAAAAJ;", "orcid": "0009-0001-0805-9002;;", "linkedin": ";;", "or_profile": "~Mingwei_Xu4;~Ivor_W_Tsang1;~Xiaofeng_Cao1", "aff": "Jilin University;A*STAR;Jilin University", "aff_domain": "jlu.edu;cfar.a-star.edu.sg;jlu.edu.cn", "position": "PhD student;Principal Researcher;Associate Professor", "bibtex": "@inproceedings{\nxu2024sharpnessaware,\ntitle={Sharpness-Aware Minimization Activates the Interactive Teaching's Understanding and Optimization},\nauthor={Mingwei Xu and Xiaofeng Cao and Ivor Tsang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Prw98p1nV0}\n}", "github": "", "reviewers": "d2nj;4Qn5;hGPy;N9mR", "pdf_size": 8220629, "rating": "5;5;5;7", "confidence": "4;4;4;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;4;3", "wc_summary": "51;77;96;61", "wc_strengths": "30;36;74;48", "wc_weaknesses": "95;145;145;114", "wc_questions": "2;2;5;2", "wc_limitations": "8;7;20;2", "wc_review": "186;267;340;227", "wc_reply_reviewers": "18;30;96;22", "wc_reply_authors": "0;0;508;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;3;1", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 71.25, 17.03489066592445 ], "wc_strengths_avg": [ 47.0, 16.881943016134134 ], "wc_weaknesses_avg": [ 124.75, 21.33512362279628 ], "wc_questions_avg": [ 2.75, 1.299038105676658 ], "wc_limitations_avg": [ 9.25, 6.609652033201143 ], "wc_review_avg": [ 255.0, 56.819890883386954 ], "wc_reply_reviewers_avg": [ 41.5, 31.76082492631449 ], "wc_reply_authors_avg": [ 127.0, 219.9704525612474 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:03lXN1XLR3cJ:scholar.google.com/&scioq=Sharpness-Aware+Minimization+Activates+the+Interactive+Teaching%27s+Understanding+and+Optimization&hl=en&as_sdt=0,14", "gs_version_total": 0, "email": "jlu.edu;cfar.a-star.edu.sg;jlu.edu.cn", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Jilin University;Agency for Science, Technology and Research", "aff_unique_dep": ";", "aff_unique_url": "http://www.jlu.edu.cn;https://www.a-star.edu.sg", "aff_unique_abbr": "JLU;A*STAR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "China;Singapore" }, { "title": "Generative Hierarchical Materials Search", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95278", "id": "PsPR4NOiRC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=PsPR4NOiRC", "openreview": "https://openreview.net/forum?id=PsPR4NOiRC", "poster": "", "project": "", "author_site": "Sherry Yang, Simon Batzner, Ruiqi Gao, Muratahan Aykol, Alexander Gaunt, Brendan C McMorrow, Danilo Jimenez Rezende, Dale Schuurmans, Igor Mordatch, Ekin Dogus Cubuk", "tldr": "", "abstract": "Generative models trained at scale can now produce novel text, video, and more recently, scientific data such as crystal structures. The ultimate goal for materials discovery, however, goes beyond generation: we desire a fully automated system that proposes, generates, and verifies crystal structures given a high-level user instruction. In this work, we formulate end-to-end language-to-structure generation as a multi-objective optimization problem, and propose Generative Hierarchical Materials Search (GenMS) for controllable generation of crystal structures. GenMS consists of (1) a language model that takes high-level natural language as input and generates intermediate textual information about a crystal (e.g., chemical formulae), and (2) a diffusion model that takes intermediate information as input and generates low-level continuous value crystal structures. GenMS additionally uses a graph neural network to predict properties (e.g., formation energy) from the generated crystal structures. During inference, GenMS leverages all three components to conduct a forward tree search over the space of possible structures. Experiments show that GenMS outperforms other alternatives both in satisfying user request and in generating low-energy structures. GenMS is able to generate complex structures such as double perovskites (or elpasolites), layered structures, and spinels, solely from natural language input.", "keywords": "Generative models for materials;diffusion models;multimodality;hierarchical search", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "", "author": "Sherry Yang;Simon Batzner;Ruiqi Gao;Muratahan Aykol;Alexander L Gaunt;Brendan McMorrow;Danilo Jimenez Rezende;Dale Schuurmans;Igor Mordatch;Ekin Dogus Cubuk", "authorids": "~Sherry_Yang1;~Simon_Batzner1;~Ruiqi_Gao1;~Muratahan_Aykol1;~Alexander_L_Gaunt1;~Brendan_McMorrow1;~Danilo_Jimenez_Rezende2;~Dale_Schuurmans1;~Igor_Mordatch5;~Ekin_Dogus_Cubuk1", "gender": "F;M;F;M;M;M;;;;M", "homepage": "https://sherryy.github.io;https://simonbatzner.github.io/;http://www.stat.ucla.edu/~ruiqigao/;;;;;;;", "dblp": ";241/7204;206/7084;;185/1083;;;;;83/7734", "google_scholar": "7c1B_fIAAAAJ;364jgpgAAAAJ;VdlgOXoAAAAJ;7JNUMRAAAAAJ;;YJd3v4QAAAAJ;;;Vzr1RukAAAAJ;Mu_8iOEAAAAJ", "orcid": ";0000-0002-8826-2712;;0000-0001-6433-7217;;0000-0002-1933-8891;;;;", "linkedin": ";simonbatzner/;;;;brendanmcm/?originalSubdomain=uk;;;;ekin-dogus-cubuk-9148b8114/", "or_profile": "~Sherry_Yang1;~Simon_Batzner1;~Ruiqi_Gao1;~Muratahan_Aykol1;~Alexander_L_Gaunt1;~Brendan_McMorrow1;~Danilo_Jimenez_Rezende2;~Dale_Schuurmans1;~Igor_Mordatch5;~Ekin_Dogus_Cubuk1", "aff": "University of California, Berkeley;Google DeepMind;Google;Google;Google DeepMind;Google;;;Research, Google;Google", "aff_domain": "berkeley.edu;google.com;google.com;google.com;google.com;deepmind.com;;;research.google.com;google.com", "position": "Student;Research Scientist;Researcher;Researcher;Researcher;Researcher;;;Researcher;Staff Research Scientist", "bibtex": "@inproceedings{\nyang2024generative,\ntitle={Generative Hierarchical Materials Search},\nauthor={Sherry Yang and Simon Batzner and Ruiqi Gao and Muratahan Aykol and Alexander L Gaunt and Brendan McMorrow and Danilo Jimenez Rezende and Dale Schuurmans and Igor Mordatch and Ekin Dogus Cubuk},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=PsPR4NOiRC}\n}", "github": "", "reviewers": "SSy6;Tzq3;Xu7Y;DY2F", "pdf_size": 961041, "rating": "5;5;6;7", "confidence": "3;1;2;3", "soundness": "3;3;3;4", "novelty": "2;3;3;4", "presentation": "2;3;3;4", "wc_summary": "125;68;121;106", "wc_strengths": "56;50;81;39", "wc_weaknesses": "249;88;82;21", "wc_questions": "76;11;35;33", "wc_limitations": "46;6;18;26", "wc_review": "552;223;337;225", "wc_reply_reviewers": "380;72;0;9", "wc_reply_authors": "298;40;28;0", "reply_reviewers": "2;1;0;1", "reply_authors": "2;2;2;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 2.25, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 105.0, 22.5055548698538 ], "wc_strengths_avg": [ 56.5, 15.402921800749363 ], "wc_weaknesses_avg": [ 110.0, 84.42452250383178 ], "wc_questions_avg": [ 38.75, 23.47738273317535 ], "wc_limitations_avg": [ 24.0, 14.560219778561036 ], "wc_review_avg": [ 334.25, 133.91671852311794 ], "wc_reply_reviewers_avg": [ 115.25, 155.35020920488006 ], "wc_reply_authors_avg": [ 91.5, 120.10308072651593 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.4545454545454545, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14115165407031355265&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "berkeley.edu;google.com;google.com;google.com;google.com;deepmind.com;;;research.google.com;google.com", "author_num": 10, "aff_unique_index": "0;1;1;1;1;1;1;1", "aff_unique_norm": "University of California, Berkeley;Google", "aff_unique_dep": ";Google DeepMind", "aff_unique_url": "https://www.berkeley.edu;https://deepmind.com", "aff_unique_abbr": "UC Berkeley;DeepMind", "aff_campus_unique_index": "0;2;2;2;2;2", "aff_campus_unique": "Berkeley;;Mountain View", "aff_country_unique_index": "0;1;0;0;1;0;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Statistical-Computational Trade-offs for Density Estimation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95277", "id": "PtD4aZPzcR", "proceeding": "", "pdf": "https://openreview.net/pdf?id=PtD4aZPzcR", "openreview": "https://openreview.net/forum?id=PtD4aZPzcR", "poster": "", "project": "", "author_site": "Anders Aamand, Alexandr Andoni, Justin Chen, Piotr Indyk, Shyam Narayanan, Sandeep Silwal, Haike Xu", "tldr": "", "abstract": "We study the density estimation problem defined as follows: given $k$ distributions $p_1, \\ldots, p_k$ over a discrete domain $[n]$, as well as a collection of samples chosen from a \"query\" distribution $q$ over $[n]$, output $p_i$ that is \"close\" to $q$. Recently Aamand et al. gave the first and only known result that achieves sublinear bounds in both the sampling complexity and the query time while preserving polynomial data structure space. However, their improvement over linear samples and time is only by subpolynomial factors.\n\nOur main result is a lower bound showing that, for a broad class of data structures, their bounds cannot be significantly improved. In particular, if an algorithm uses $O(n/\\log^c k)$ samples for some constant $c>0$ and polynomial space, then the query time of the data structure must be at least $k^{1-O(1)/\\log \\log k}$, i.e., close to linear in the number of distributions $k$. This is a novel statistical-computational trade-off for density estimation, demonstrating that any data structure must use close to a linear number of samples or take close to linear query time. The lower bound holds even in the realizable case where $q=p_i$ for some $i$, and when the distributions are flat (specifically, all distributions are uniform over half of the domain $[n]$). We also give a simple data structure for our lower bound instance with asymptotically matching upper bounds. Experiments show that the data structure is quite efficient in practice.", "keywords": "statistical-computational tradeoffs;lower bounds;data structures;density estimation;hypothesis selection", "primary_area": "learning_theory", "supplementary_material": "/attachment/15a3c5af284a40bcc970344eff18b138aa587069.zip", "author": "Anders Aamand;Alexandr Andoni;Justin Y. Chen;Piotr Indyk;Shyam Narayanan;Sandeep Silwal;Haike Xu", "authorids": "~Anders_Aamand1;~Alexandr_Andoni1;~Justin_Y._Chen1;~Piotr_Indyk1;~Shyam_Narayanan1;~Sandeep_Silwal1;~Haike_Xu1", "gender": "M;M;;M;M;M;M", "homepage": "https://www.andersaamand.com/;http://www.mit.edu/~andoni/;https://people.csail.mit.edu/indyk/;https://sites.google.com/view/shyamnarayanan/home;https://sandeepsilwal.com;https://www.haikexu.com/;https://people.csail.mit.edu/justc/", "dblp": "205/2416;66/6009;i/PiotrIndyk;222/2805;225/4637;285/4898;254/0805.html", "google_scholar": "WpIvLroAAAAJ;Evgx6UkAAAAJ;oOwNKsAAAAAJ;CTT44Y0AAAAJ;MnDnUvcAAAAJ;bvDRaVcAAAAJ;X_myU1YAAAAJ", "orcid": "0000-0002-0402-0514;;;;;0009-0006-5526-6924;", "linkedin": ";;;;;;", "or_profile": "~Anders_Aamand1;~Alexandr_Andoni1;~Piotr_Indyk1;~Shyam_Narayanan1;~Sandeep_Silwal1;~Haike_Xu1;~Justin_Y_Chen1", "aff": "Massachusetts Institute of Technology;Columbia University;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;columbia.edu;mit.edu;mit.edu;mit.edu;mit.edu;mit.edu", "position": "Postdoc;Associate Professor;Full Professor;PhD student;PhD student;PhD student;PhD student", "bibtex": "@inproceedings{\naamand2024statisticalcomputational,\ntitle={Statistical-Computational Trade-offs for Density Estimation},\nauthor={Anders Aamand and Alexandr Andoni and Justin Y. Chen and Piotr Indyk and Shyam Narayanan and Sandeep Silwal and Haike Xu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=PtD4aZPzcR}\n}", "github": "", "reviewers": "ZQja;prnQ;yUic;b6AN", "pdf_size": 1100409, "rating": "5;5;6;7", "confidence": "3;3;2;3", "soundness": "3;4;3;4", "novelty": "2;3;3;4", "presentation": "2;3;3;4", "wc_summary": "130;103;98;67", "wc_strengths": "36;49;95;139", "wc_weaknesses": "90;152;3;51", "wc_questions": "111;18;3;64", "wc_limitations": "40;1;4;9", "wc_review": "407;323;203;330", "wc_reply_reviewers": "16;10;0;6", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 99.5, 22.36626924634504 ], "wc_strengths_avg": [ 79.75, 40.62865368185365 ], "wc_weaknesses_avg": [ 74.0, 54.566473223033206 ], "wc_questions_avg": [ 49.0, 42.26700841081611 ], "wc_limitations_avg": [ 13.5, 15.56438241627338 ], "wc_review_avg": [ 315.75, 72.96360394059494 ], "wc_reply_reviewers_avg": [ 8.0, 5.830951894845301 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:6Iy5lmz5PbYJ:scholar.google.com/&scioq=Statistical-Computational+Trade-offs+for+Density+Estimation&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "mit.edu;columbia.edu;mit.edu;mit.edu;mit.edu;mit.edu;mit.edu", "author_num": 7, "aff_unique_index": "0;1;0;0;0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology;Columbia University", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.columbia.edu", "aff_unique_abbr": "MIT;Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Fundamental Convergence Analysis of Sharpness-Aware Minimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95276", "id": "PuXYI4HOQU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=PuXYI4HOQU", "openreview": "https://openreview.net/forum?id=PuXYI4HOQU", "poster": "/media/PosterPDFs/NeurIPS%202024/95276.png?t=1731384568.3993683", "project": "", "author_site": "Pham Khanh, Hoang-Chau Luong, Boris Mordukhovich, Dat Tran", "tldr": "", "abstract": "The paper investigates the fundamental convergence properties of Sharpness-Aware Minimization (SAM), a recently proposed gradient-based optimization method (Foret et al., 2021) that significantly improves the generalization of deep neural networks. The convergence properties including the stationarity of accumulation points, the convergence of the sequence of gradients to the origin, the sequence of function values to the optimal value, and the sequence of iterates to the optimal solution are established for the method. The universality of the provided convergence analysis based on inexact gradient descent frameworks (Khanh et al., 2023b) allows its extensions to the normalized versions of SAM such as F-SAM (Li et al. 2024), VaSSO (Li & Giannakis, 2023), RSAM (Liu et al., 2022), and to the unnormalized versions of SAM such as USAM (Andriushchenko & Flammarion, 2022). Numerical experiments are conducted on classification tasks using deep learning models to confirm the practical aspects of our analysis.", "keywords": "Convergence Analysis;Deep Learning;Inexact Gradient Descent Methods;Neural Network;Sharpness-Aware Minimization", "primary_area": "optimization", "supplementary_material": "/attachment/170c95551234d87fdbf62911536cc29d7b8a024a.zip", "author": "Pham Duy Khanh;Hoang-Chau Luong;Boris Mordukhovich;Dat Ba Tran", "authorids": "~Pham_Duy_Khanh1;~Hoang-Chau_Luong1;~Boris_Mordukhovich2;~Dat_Ba_Tran1", "gender": "M;M;M;M", "homepage": "https://sites.google.com/site/khanhpd182/;;http://borismordukhovich.com;https://dattran24.github.io/", "dblp": "141/7462.html;;;", "google_scholar": "https://scholar.google.com.vn/citations?user=lkp8e5UAAAAJ;https://scholar.google.com/citations?hl=en;;9V5qSRcAAAAJ", "orcid": "0000-0001-7156-1380;;;0000-0001-6711-7471", "linkedin": ";;;", "or_profile": "~Pham_Duy_Khanh1;~Hoang-Chau_Luong1;~Boris_Mordukhovich2;~Dat_Ba_Tran1", "aff": "Ho Chi Minh City University of Education ;Ho Chi Minh city University of Science, Vietnam National University;Wayne State University;Wayne State University", "aff_domain": "hcmue.edu.vn;hcmus.edu.vn;wayne.edu;wayne.edu", "position": "Lecturer;Undergrad student;Full Professor;PhD student", "bibtex": "@inproceedings{\nkhanh2024fundamental,\ntitle={Fundamental Convergence Analysis of Sharpness-Aware Minimization},\nauthor={Pham Duy Khanh and Hoang-Chau Luong and Boris Mordukhovich and Dat Ba Tran},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=PuXYI4HOQU}\n}", "github": "", "reviewers": "ZtLt;cPcD;Yksm;wdGg", "pdf_size": 894427, "rating": "4;6;6;6", "confidence": "4;3;4;5", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;2", "wc_summary": "31;65;66;89", "wc_strengths": "16;89;35;21", "wc_weaknesses": "80;95;61;32", "wc_questions": "1;28;4;162", "wc_limitations": "1;1;7;1", "wc_review": "129;278;173;305", "wc_reply_reviewers": "21;39;36;80", "wc_reply_authors": "14;0;25;24", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;2;2", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 62.75, 20.69269194667528 ], "wc_strengths_avg": [ 40.25, 28.994611568358696 ], "wc_weaknesses_avg": [ 67.0, 23.526580712037184 ], "wc_questions_avg": [ 48.75, 66.216972899703 ], "wc_limitations_avg": [ 2.5, 2.598076211353316 ], "wc_review_avg": [ 221.25, 72.5822808955464 ], "wc_reply_reviewers_avg": [ 44.0, 21.874642854227357 ], "wc_reply_authors_avg": [ 15.75, 10.059199769365355 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16152335760912377221&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "hcmue.edu.vn;hcmus.edu.vn;wayne.edu;wayne.edu", "author_num": 4, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "Ho Chi Minh City University of Education;Ho Chi Minh City University of Science;Wayne State University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.hcmue.edu.vn;;https://wayne.edu", "aff_unique_abbr": "HCMUE;;WSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1", "aff_country_unique": "Vietnam;United States" }, { "title": "Learning and Transferring Sparse Contextual Bigrams with Linear Transformers", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95275", "id": "PukaVAwYBo", "proceeding": "", "pdf": "https://openreview.net/pdf?id=PukaVAwYBo", "openreview": "https://openreview.net/forum?id=PukaVAwYBo", "poster": "", "project": "", "author_site": "Yunwei Ren, Zixuan Wang, Jason Lee", "tldr": "", "abstract": "Transformers have achieved significant success in natural language modeling because of their exceptional capabilities to combine contextual information and global knowledge, yet their theoretical basis remains unclear. In this paper, we first propose Sparse Contextual Bigram (SCB), a natural extension to the classical bigram model, where the generation of the next token depends on a sparse set of earlier positions determined by the last token. We investigate the training dynamics and sample complexity of learning SCB using a one-layer linear transformer with a gradient-based algorithm. We show that when trained from scratch, the training process can be split into an initial sample-intensive stage where the correlation is boosted from zero to a nontrivial value, followed by a more sample-efficient stage of further improvement. Additionally, we prove that, provided a nontrivial correlation between the downstream and pretraining tasks, finetuning from a pretrained model allows us to bypass the initial sample-intensive stage. We also empirically demonstrate that our algorithm can outperform SGD in our setting.", "keywords": "transformer;sample complexity;optimization", "primary_area": "learning_theory", "supplementary_material": "/attachment/6c68217dbfcb8ebdd408f97a948c4668469a7e8a.zip", "author": "Yunwei Ren;Zixuan Wang;Jason D. Lee", "authorids": "~Yunwei_Ren1;~Zixuan_Wang4;~Jason_D._Lee1", "gender": ";M;M", "homepage": "https://yunwei-ren.me;https://zixuan-wang-dlt.github.io;https://jasondlee88.github.io/", "dblp": "294/9141.html;;88/3262", "google_scholar": "R3GdCt0AAAAJ;vNJDZyEAAAAJ;GR_DsT0AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Yunwei_Ren1;~Zixuan_Wang4;~Jason_D._Lee1", "aff": "Princeton University;Princeton University;Princeton University", "aff_domain": "princeton.edu;princeton.edu;princeton.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nren2024learning,\ntitle={Learning and Transferring Sparse Contextual Bigrams with Linear Transformers},\nauthor={Yunwei Ren and Zixuan Wang and Jason D. Lee},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=PukaVAwYBo}\n}", "github": "", "reviewers": "fyHi;9wme;yR2y;Qmma", "pdf_size": 1117031, "rating": "5;6;6;7", "confidence": "4;3;3;2", "soundness": "2;4;3;3", "novelty": "2;2;3;3", "presentation": "4;3;4;3", "wc_summary": "137;47;286;90", "wc_strengths": "55;23;70;77", "wc_weaknesses": "168;51;130;86", "wc_questions": "9;57;1;64", "wc_limitations": "7;78;1;58", "wc_review": "376;256;488;375", "wc_reply_reviewers": "11;13;0;19", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 140.0, 90.10271915985666 ], "wc_strengths_avg": [ 56.25, 20.777090749188154 ], "wc_weaknesses_avg": [ 108.75, 44.200537327050675 ], "wc_questions_avg": [ 32.75, 28.003348014121453 ], "wc_limitations_avg": [ 36.0, 32.840523747346055 ], "wc_review_avg": [ 373.75, 82.04381451395346 ], "wc_reply_reviewers_avg": [ 10.75, 6.869315832017043 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15117907260626258970&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 3, "email": "princeton.edu;princeton.edu;princeton.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Princeton University", "aff_unique_dep": "", "aff_unique_url": "https://www.princeton.edu", "aff_unique_abbr": "Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "HelpSteer 2: Open-source dataset for training top-performing reward models", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97706", "id": "PvVKUFhaNy", "proceeding": "", "pdf": "https://openreview.net/pdf?id=PvVKUFhaNy", "openreview": "https://openreview.net/forum?id=PvVKUFhaNy", "poster": "/media/PosterPDFs/NeurIPS%202024/97706.png?t=1731176991.3336926", "project": "", "author_site": "Zhilin Wang, Yi Dong, Olivier Delalleau, Jiaqi Zeng, Gerald Shen, Daniel Egert, Jimmy Zhang, Makesh Narsimhan Sreedhar, Oleksii Kuchaiev", "tldr": "", "abstract": "High-quality preference datasets are essential for training reward models that can effectively guide large language models (LLMs) in generating high-quality responses aligned with human preferences.\nAs LLMs become stronger and better aligned, permissively licensed preference datasets, such as Open Assistant, HH-RLHF, and HelpSteer need to be updated to remain effective for reward modeling.\nMethods that distil preference data from proprietary LLMs such as GPT-4 have restrictions on commercial usage imposed by model providers.\nTo improve upon both generated responses and attribute labeling quality, we release HelpSteer2, a permissively licensed preference dataset (CC-BY-4.0). \nUsing a powerful Nemotron-4-340B base model trained on HelpSteer2, we are able to achieve the SOTA score (92.0%) on Reward-Bench's primary dataset, outperforming currently listed open and proprietary models, as of June 12th, 2024.\nNotably, HelpSteer2 consists of only ten thousand response pairs, an order of magnitude fewer than existing preference datasets (e.g., HH-RLHF), which makes it highly efficient for training reward models. \nOur extensive experiments demonstrate that reward models trained with HelpSteer2 are effective in aligning LLMs. Additionally, we propose SteerLM 2.0, a model alignment approach that can effectively make use of the rich multi-attribute score predicted by our reward models. \nHelpSteer2 is available at https://huggingface.co/datasets/nvidia/HelpSteer2 and code is available at https://github.com/NVIDIA/NeMo-Aligner", "keywords": "reward model;preference dataset;helpfulness dataset;model alignment", "primary_area": "", "supplementary_material": "/attachment/369ede11116ab2856bc2f56971156837555e90e3.pdf", "author": "Zhilin Wang;Yi Dong;Olivier Delalleau;Jiaqi Zeng;Gerald Shen;Daniel Egert;Jimmy J. Zhang;Makesh Narsimhan Sreedhar;Oleksii Kuchaiev", "authorids": "~Zhilin_Wang2;~Yi_Dong4;~Olivier_Delalleau1;~Jiaqi_Zeng1;~Gerald_Shen1;~Daniel_Egert1;~Jimmy_J._Zhang1;~Makesh_Narsimhan_Sreedhar1;~Oleksii_Kuchaiev1", "gender": ";M;M;;;M;M;;", "homepage": ";;;;https://www.linkedin.com/in/gerald-shen-5234211b7/;;;;http://www.kuchaev.com", "dblp": "53/10643;;68/2192;;;;;;", "google_scholar": "OmMgSQsAAAAJ;;https://scholar.google.ca/citations?user=zqLpO2QAAAAJ;;;https://scholar.google.com/citations?hl=en;;;qmmIGnwAAAAJ", "orcid": ";;0000-0002-0610-7226;;;;;;", "linkedin": ";yi-dong-04057b18;odelalleau;;gerald-shen-5234211b7/;;jimmyjzhang;;oleksiikuchaiev/", "or_profile": "~Zhilin_Wang2;~Yi_Dong4;~Olivier_Delalleau1;~Jiaqi_Zeng1;~Gerald_Shen1;~Daniel_Egert1;~Jimmy_J._Zhang1;~Makesh_Narsimhan_Sreedhar1;~Oleksii_Kuchaiev1", "aff": "NVIDIA;;NVIDIA;;NVIDIA;NVIDIA;NVIDIA;;NVIDIA", "aff_domain": "nvidia.com;;nvidia.com;;nvidia.com;nvidia.com;nvidia.com;;nvidia.com", "position": "Applied Scientist;;Researcher;;Researcher;Researcher;Researcher;;Principal Researcher", "bibtex": "@inproceedings{\nwang2024helpsteer,\ntitle={HelpSteer 2: Open-source dataset for training top-performing reward models},\nauthor={Zhilin Wang and Yi Dong and Olivier Delalleau and Jiaqi Zeng and Gerald Shen and Daniel Egert and Jimmy J. Zhang and Makesh Narsimhan Sreedhar and Oleksii Kuchaiev},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=PvVKUFhaNy}\n}", "github": "", "reviewers": "WxYS;MsiT;BUY9;67qs", "pdf_size": 452171, "rating": "5;6;7;8", "confidence": "4;3;4;4", "wc_summary_and_contributions": "39;63;46;175", "wc_strengths": "2;127;25;2", "wc_improvement": "2;3;79;111", "wc_limitations": "2;7;1;32", "wc_correctness": "1;13;27;45", "wc_clarity": "1;10;9;5", "wc_relation_to_prior_work": "1;2;8;24", "wc_documentation": "1;2;1;3", "wc_additional_feedback": "1;1;1;1", "wc_review": "50;228;197;398", "wc_reply_reviewers": "9;0;12;0", "wc_reply_authors": "73;48;0;0", "reply_reviewers": "1;0;1;0", "reply_authors": "2;2;1;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 80.75, 55.110684082126944 ], "wc_strengths_avg": [ 39.0, 51.66720429827803 ], "wc_improvement_avg": [ 48.75, 47.61499238685227 ], "wc_limitations_avg": [ 10.5, 12.619429464123963 ], "wc_correctness_avg": [ 21.5, 16.393596310755 ], "wc_clarity_avg": [ 6.25, 3.5619517121937516 ], "wc_relation_to_prior_work_avg": [ 8.75, 9.202581159652981 ], "wc_documentation_avg": [ 1.75, 0.82915619758885 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 218.25, 123.657541217671 ], "wc_reply_reviewers_avg": [ 5.25, 5.356071321407137 ], "wc_reply_authors_avg": [ 30.25, 31.514877439076294 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.2581988897471611, "gs_citation": 85, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5928156843276485947&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "nvidia.com;;nvidia.com;;nvidia.com;nvidia.com;nvidia.com;;nvidia.com", "author_num": 9, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "NVIDIA", "aff_unique_dep": "NVIDIA Corporation", "aff_unique_url": "https://www.nvidia.com", "aff_unique_abbr": "NVIDIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "MADiff: Offline Multi-agent Learning with Diffusion Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95274", "id": "PvoxbjcRPT", "proceeding": "", "pdf": "https://openreview.net/pdf?id=PvoxbjcRPT", "openreview": "https://openreview.net/forum?id=PvoxbjcRPT", "poster": "", "project": "", "author_site": "Zhengbang Zhu, Minghuan Liu, Liyuan Mao, Bingyi Kang, Minkai Xu, Yong Yu, Stefano Ermon, Weinan Zhang", "tldr": "", "abstract": "Offline reinforcement learning (RL) aims to learn policies from pre-existing datasets without further interactions, making it a challenging task. Q-learning algorithms struggle with extrapolation errors in offline settings, while supervised learning methods are constrained by model expressiveness. Recently, diffusion models (DMs) have shown promise in overcoming these limitations in single-agent learning, but their application in multi-agent scenarios remains unclear. Generating trajectories for each agent with independent DMs may impede coordination, while concatenating all agents\u2019 information can lead to low sample efficiency. Accordingly, we propose MADiff, which is realized with an attention-based diffusion model to model the complex coordination among behaviors of multiple agents. To our knowledge, MADiff is the first diffusion-based multi-agent learning framework, functioning as both a decentralized policy and a centralized controller. During decentralized executions, MADiff simultaneously performs teammate modeling, and the centralized controller can also be applied in multi-agent trajectory predictions. Our experiments demonstrate that MADiff outperforms baseline algorithms across various multi-agent learning tasks, highlighting its effectiveness in modeling complex multi-agent interactions.", "keywords": "Multi-agent RL;Diffusion Models;Offline RL", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/896f6a58157cdb4315183d4cbe573fe100de81a2.zip", "author": "Zhengbang Zhu;Minghuan Liu;Liyuan Mao;Bingyi Kang;Minkai Xu;Yong Yu;Stefano Ermon;Weinan Zhang", "authorids": "~Zhengbang_Zhu1;~Minghuan_Liu1;~Liyuan_Mao2;~Bingyi_Kang1;~Minkai_Xu1;~Yong_Yu1;~Stefano_Ermon1;~Weinan_Zhang1", "gender": "M;M;;M;;M;M;M", "homepage": "https://github.com/zbzhu99;http://minghuanliu.com;https://bingykang.github.io/;https://minkaixu.com;https://apex.sjtu.edu.cn/members/yyu;http://cs.stanford.edu/~ermon/;http://wnzhang.net;https://github.com/maoliyuan", "dblp": "277/0869;249/7554;;257/3355;43/5685.html;47/8135;28/10261-1;", "google_scholar": ";;https://scholar.google.com.sg/citations?user=NmHgX-wAAAAJ;https://scholar.google.com/citations?hl=en;;;Qzss0GEAAAAJ;", "orcid": ";;;;0000-0003-4457-2820;;0000-0002-0127-2425;", "linkedin": ";;;;;;;", "or_profile": "~Zhengbang_Zhu1;~Minghuan_Liu1;~Bingyi_Kang1;~Minkai_Xu1;~Yong_Yu1;~Stefano_Ermon1;~Weinan_Zhang1;~Liyuan_Richard_Mao1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Bytedance;Stanford University;Shanghai Jiaotong University;Stanford University;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;bytedance.com;stanford.edu;sjtu.edu.cn;stanford.edu;sjtu.edu.cn;sjtu.edu.cn", "position": "PhD student;PhD student;Researcher;PhD student;Full Professor;Associate Professor;Associate Professor;Undergrad student", "bibtex": "@inproceedings{\nzhu2024madiff,\ntitle={{MAD}iff: Offline Multi-agent Learning with Diffusion Models},\nauthor={Zhengbang Zhu and Minghuan Liu and Liyuan Mao and Bingyi Kang and Minkai Xu and Yong Yu and Stefano Ermon and Weinan Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=PvoxbjcRPT}\n}", "github": "", "reviewers": "hh3p;nRbJ;vCUA;joCT", "pdf_size": 2998411, "rating": "5;6;6;7", "confidence": "3;3;4;4", "soundness": "3;3;3;2", "novelty": "3;2;3;2", "presentation": "3;2;2;3", "wc_summary": "58;98;94;57", "wc_strengths": "75;77;57;42", "wc_weaknesses": "56;117;99;278", "wc_questions": "132;134;149;110", "wc_limitations": "72;15;16;11", "wc_review": "393;441;415;498", "wc_reply_reviewers": "225;17;25;150", "wc_reply_authors": "2465;41;33;557", "reply_reviewers": "3;1;1;2", "reply_authors": "9;2;2;4", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 76.75, 19.30511590226798 ], "wc_strengths_avg": [ 62.75, 14.289419162443238 ], "wc_weaknesses_avg": [ 137.5, 84.0907248155229 ], "wc_questions_avg": [ 131.25, 13.91716565971678 ], "wc_limitations_avg": [ 28.5, 25.184320518926057 ], "wc_review_avg": [ 436.75, 39.23248016631118 ], "wc_reply_reviewers_avg": [ 104.25, 87.41674610736779 ], "wc_reply_authors_avg": [ 774.0, 999.1171102528472 ], "reply_reviewers_avg": [ 1.75, 0.82915619758885 ], "reply_authors_avg": [ 4.25, 2.8613807855648994 ], "replies_avg": [ 32, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.7071067811865476, "gs_citation": 39, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6620009748424414321&as_sdt=20000005&sciodt=0,21&hl=en", "gs_version_total": 5, "email": "sjtu.edu.cn;sjtu.edu.cn;bytedance.com;stanford.edu;sjtu.edu.cn;stanford.edu;sjtu.edu.cn;sjtu.edu.cn", "author_num": 8, "aff_unique_index": "0;0;1;2;0;2;0;0", "aff_unique_norm": "Shanghai Jiao Tong University;ByteDance;Stanford University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.bytedance.com;https://www.stanford.edu", "aff_unique_abbr": "SJTU;Bytedance;Stanford", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;0;1;0;1;0;0", "aff_country_unique": "China;United States" }, { "title": "AutoManual: Constructing Instruction Manuals by LLM Agents via Interactive Environmental Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95273", "id": "Pwl9n4zlf5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Pwl9n4zlf5", "openreview": "https://openreview.net/forum?id=Pwl9n4zlf5", "poster": "/media/PosterPDFs/NeurIPS%202024/95273.png?t=1731296167.055757", "project": "", "author_site": "Minghao Chen, Yihang Li, Yanting Yang, Shiyu Yu, Binbin Lin, Xiaofei He", "tldr": "", "abstract": "Large Language Models (LLM) based agents have shown promise in autonomously completing tasks across various domains, e.g., robotics, games, and web navigation. However, these agents typically require elaborate design and expert prompts to solve tasks in specific domains, which limits their adaptability. We introduce AutoManual, a framework enabling LLM agents to autonomously build their understanding through interaction and adapt to new environments. AutoManual categorizes environmental knowledge into diverse rules and optimizes them in an online fashion by two agents: 1) The Planner codes actionable plans based on current rules for interacting with the environment. 2) The Builder updates the rules through a well-structured rule system that facilitates online rule management and essential detail retention. To mitigate hallucinations in managing rules, we introduce a *case-conditioned prompting* strategy for the Builder. Finally, the Formulator agent compiles these rules into a comprehensive manual. The self-generated manual can not only improve the adaptability but also guide the planning of smaller LLMs while being human-readable. Given only one simple demonstration, AutoManual significantly improves task success rates, achieving 97.4\\% with GPT-4-turbo and 86.2\\% with GPT-3.5-turbo on ALFWorld benchmark tasks. The code is available at https://github.com/minghchen/automanual.", "keywords": "Large Language Models;AI Agents;planning;decision making;programming", "primary_area": "other", "supplementary_material": "/attachment/28e6868a100f6b4746f5bffeaaa012ebba16c879.zip", "author": "Minghao Chen;Yihang Li;Yanting Yang;Shiyu Yu;Binbin Lin;Xiaofei He", "authorids": "~Minghao_Chen2;~Yihang_Li5;~Yanting_Yang2;~Shiyu_Yu1;~Binbin_Lin3;~Xiaofei_He2", "gender": "M;M;F;M;M;M", "homepage": ";https://github.com/lyh1028;https://github.com/Yt2000/;;https://www.linkedin.com/in/binbin-lin-03598b31/;https://person.zju.edu.cn/0007101", "dblp": "39/332-1;;;;51/8073;h/XiaofeiHe.html", "google_scholar": "xxPcRRQAAAAJ;;;;Zmvq4KYAAAAJ;QLLFowsAAAAJ", "orcid": ";;;0009-0004-0039-8538;0000-0002-0330-6406;0009-0001-9107-2354", "linkedin": ";;;;;", "or_profile": "~Minghao_Chen2;~Yihang_Li5;~Yanting_Yang2;~Shiyu_Yu1;~Binbin_Lin3;~Xiaofei_He2", "aff": "Hangzhou Dianzi University;Zhejiang University;Zhejiang University;Ningbo Meidong Container Terminal Co.,Ltd.;Zhejiang University;Zhejiang University", "aff_domain": "hdu.edu.cn;zju.edu.cn;zju.edu.cn;nbport.com.cn;zju.edu.cn;zju.edu.cn", "position": "Lecturer;MS student;MS student;Researcher;Researcher;Professor", "bibtex": "@inproceedings{\nchen2024automanual,\ntitle={AutoManual: Generating Instruction Manuals by {LLM} Agents via Interactive Environmental Learning},\nauthor={Minghao Chen and Yihang Li and Yanting Yang and Shiyu Yu and Binbin Lin and Xiaofei He},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Pwl9n4zlf5}\n}", "github": "", "reviewers": "zSsL;3jfh;azPd;HWDR", "pdf_size": 1580358, "rating": "5;6;6;8", "confidence": "4;4;4;3", "soundness": "3;3;2;3", "novelty": "2;3;3;3", "presentation": "2;3;3;4", "wc_summary": "166;120;107;95", "wc_strengths": "73;96;56;87", "wc_weaknesses": "279;91;291;121", "wc_questions": "26;118;39;189", "wc_limitations": "10;64;8;13", "wc_review": "554;489;501;505", "wc_reply_reviewers": "0;273;92;41", "wc_reply_authors": "0;21;34;0", "reply_reviewers": "0;2;1;1", "reply_authors": "1;3;2;1", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 122.0, 26.89795531262553 ], "wc_strengths_avg": [ 78.0, 15.116216457830975 ], "wc_weaknesses_avg": [ 195.5, 90.22610486993219 ], "wc_questions_avg": [ 93.0, 65.66201337150727 ], "wc_limitations_avg": [ 23.75, 23.306383245797704 ], "wc_review_avg": [ 512.25, 24.81305100143874 ], "wc_reply_reviewers_avg": [ 101.5, 104.24130659196479 ], "wc_reply_authors_avg": [ 13.75, 14.49784466739798 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.9271726499455306, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=908704235853502636&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "hdu.edu.cn;zju.edu.cn;zju.edu.cn;nbport.com.cn;zju.edu.cn;zju.edu.cn", "author_num": 6, "aff_unique_index": "0;1;1;2;1;1", "aff_unique_norm": "Hangzhou Dianzi University;Zhejiang University;Ningbo Meidong Container Terminal Co., Ltd.", "aff_unique_dep": ";;", "aff_unique_url": "http://www.hdu.edu.cn/;https://www.zju.edu.cn;", "aff_unique_abbr": "HGHDU;ZJU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Density-based User Representation using Gaussian Process Regression for Multi-interest Personalized Retrieval", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95272", "id": "Px1hQM72iX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Px1hQM72iX", "openreview": "https://openreview.net/forum?id=Px1hQM72iX", "poster": "/media/PosterPDFs/NeurIPS%202024/95272.png?t=1732812437.1456838", "project": "", "author_site": "Haolun Wu, Ofer Meshi, Masrour Zoghi, Fernando Diaz, Xue (Steve) Liu, Craig Boutilier, Maryam Karimzadehgan", "tldr": "", "abstract": "Accurate modeling of the diverse and dynamic interests of users remains a significant challenge in the design of personalized recommender systems. Existing user modeling methods, like single-point and multi-point representations, have limitations w.r.t.\\ accuracy, diversity, and adaptability. To overcome these deficiencies, we introduce density-based user representations (DURs), a novel method that leverages Gaussian process regression (GPR) for effective multi-interest recommendation and retrieval. Our approach, GPR4DUR, exploits DURs to capture user interest variability without manual tuning, incorporates uncertainty-awareness, and scales well to large numbers of users. Experiments using real-world offline datasets confirm the adaptability and efficiency of GPR4DUR, while online experiments with simulated users demonstrate its ability to address the exploration-exploitation trade-off by effectively utilizing model uncertainty.", "keywords": "User Representation;Recommendation;Retrieval;Gaussian Process Regression", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Haolun Wu;Ofer Meshi;Masrour Zoghi;Fernando Diaz;Xue Liu;Craig Boutilier;MARYAM KARIMZADEHGAN", "authorids": "~Haolun_Wu1;~Ofer_Meshi1;~Masrour_Zoghi2;~Fernando_Diaz1;~Xue_Liu1;~Craig_Boutilier2;~MARYAM_KARIMZADEHGAN2", "gender": "M;M;M;;M;M;", "homepage": "https://haolun-wu.github.io/;https://sites.google.com/site/ofermeshi/home;https://research.google/people/MasrourZoghi/;;http://www.cs.mcgill.ca/~xueliu/;https://research.google/people/craigboutilier/;https://scholar.google.com/citations?user=9Wh4EoMAAAAJ&hl=en", "dblp": "283/5463;91/8411;;;l/XueLiu;10/3411;55/1130.html", "google_scholar": "-KcBDLcAAAAJ;KMBgMs0AAAAJ;zpCibPYAAAAJ;;https://scholar.google.com.tw/citations?user=rfLIRakAAAAJ;cXkm3rsAAAAJ;9Wh4EoMAAAAJ", "orcid": "0000-0001-6255-1535;;;;;;", "linkedin": "haolun-wu-23ba08133/;;;;;;", "or_profile": "~Haolun_Wu1;~Ofer_Meshi1;~Masrour_Zoghi2;~Fernando_Diaz1;~Xue_Liu1;~Craig_Boutilier2;~MARYAM_KARIMZADEHGAN2", "aff": "Google Research;Google;Google;;McGill University;Google;Google", "aff_domain": "google.com;google.com;google.com;;mcgill.ca;google.com;google.com", "position": "Intern;Research Scientist;Software Engineer;;Full Professor;Principal Researcher;Principal Researcher", "bibtex": "@inproceedings{\nwu2024densitybased,\ntitle={Density-based User Representation using Gaussian Process Regression for Multi-interest Personalized Retrieval},\nauthor={Haolun Wu and Ofer Meshi and Masrour Zoghi and Fernando Diaz and Xue Liu and Craig Boutilier and MARYAM KARIMZADEHGAN},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Px1hQM72iX}\n}", "github": "", "reviewers": "Eeve;r5NL;icLv;F9WH", "pdf_size": 4593561, "rating": "5;5;6;8", "confidence": "4;4;3;4", "soundness": "2;3;3;3", "novelty": "2;3;3;4", "presentation": "2;3;3;4", "wc_summary": "60;102;73;103", "wc_strengths": "57;37;123;93", "wc_weaknesses": "144;52;190;66", "wc_questions": "148;190;6;45", "wc_limitations": "33;17;15;1", "wc_review": "442;398;407;308", "wc_reply_reviewers": "25;0;10;0", "wc_reply_authors": "37;0;37;0", "reply_reviewers": "1;0;1;0", "reply_authors": "2;1;2;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 84.5, 18.580904176062045 ], "wc_strengths_avg": [ 77.5, 33.05676935213119 ], "wc_weaknesses_avg": [ 113.0, 56.61271941887264 ], "wc_questions_avg": [ 97.25, 74.55660601180824 ], "wc_limitations_avg": [ 16.5, 11.346805717910216 ], "wc_review_avg": [ 388.75, 49.43366767699924 ], "wc_reply_reviewers_avg": [ 8.75, 10.231690964840562 ], "wc_reply_authors_avg": [ 18.5, 18.5 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:VvSVsG976TQJ:scholar.google.com/&scioq=Density-based+User+Representation+using+Gaussian+Process+Regression+for+Multi-interest+Personalized+Retrieval&hl=en&as_sdt=0,7", "gs_version_total": 7, "email": "google.com;google.com;google.com;;mcgill.ca;google.com;google.com", "author_num": 7, "aff_unique_index": "0;0;0;1;0;0", "aff_unique_norm": "Google;McGill University", "aff_unique_dep": "Google Research;", "aff_unique_url": "https://research.google;https://www.mcgill.ca", "aff_unique_abbr": "Google Research;McGill", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;1;0;0", "aff_country_unique": "United States;Canada" }, { "title": "ConvBench: A Multi-Turn Conversation Evaluation Benchmark with Hierarchical Ablation Capability for Large Vision-Language Models", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97705", "id": "PyTf2jj0SH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=PyTf2jj0SH", "openreview": "https://openreview.net/forum?id=PyTf2jj0SH", "poster": "/media/PosterPDFs/NeurIPS%202024/97705.png?t=1733118401.104419", "project": "", "author_site": "Shuo Liu, Kaining Ying, Hao Zhang, yue yang, Yuqi Lin, Tianle Zhang, Chuanhao Li, Yu Qiao, Ping Luo, Wenqi Shao, Kaipeng Zhang", "tldr": "", "abstract": "Multi-turn visual conversation is an important ability of real-world AI assistants. However, the related evaluation benchmark is missed. This paper presents ConvBench, a multi-turn conversation benchmark with hierarchical capabilities ablation evaluation for Large Vision-Language Models (LVLMs). ConvBench comprises 577 curated multi-turn conversations, encompassing 215 tasks. These tasks are broad and open-ended, which resemble real-world user behaviors. ConvBench progressively examines the LVLMs' perception, reasoning, and creativity capabilities in each conversation and can decouple these capabilities in evaluations and thus perform reliable error attribution. Besides, considering the diversity of open-ended questions, we introduce an efficient and reliable automatic evaluation framework. Experimental results reveal that ConvBench is a significant challenge for current LVLMs, even for GPT4V, which achieves only a 39.51% score. Besides, we have some insightful findings, such as the weak perception of LVLMs inhibits authentic strengths in reasoning and creation. We believe our design of hierarchical capabilities, decoupling capabilities evaluation, and multi-turn conversation can blaze a new trail in LVLMs evaluation. Code and benchmark are released at https://github.com/shirlyliu64/ConvBench.", "keywords": "Multi-Turn Conversation Evaluation;Progressive Evaluation;Large Vision-Language Model", "primary_area": "", "supplementary_material": "/attachment/b50203c58bdcf85dd9e6e47f0a75ab58a2a0d2c0.pdf", "author": "Shuo Liu;Kaining Ying;Hao Zhang;Yue Yang;Yuqi Lin;Tianle Zhang;Chuanhao Li;Yu Qiao;Ping Luo;Wenqi Shao;Kaipeng Zhang", "authorids": "~Shuo_Liu5;~Kaining_Ying2;~Hao_Zhang56;~Yue_Yang6;~Yuqi_Lin1;~Tianle_Zhang4;~Chuanhao_Li2;~Yu_Qiao1;~Ping_Luo2;~Wenqi_Shao2;~Kaipeng_Zhang1", "gender": "F;M;M;;M;M;M;;;M;M", "homepage": ";https://github.com/Morgott-The-Omen-King;;;http://wiki.zjulearning.org.cn/wiki/User:Linyuqi;;https://nevermorelch.github.io/;;;https://wqshao126.github.io/;http://kpzhang93.github.io/", "dblp": "07/6773;291/9018;;;117/7752;;195/9947-1;;;227/3122;179/2126", "google_scholar": "https://scholar.google.com.tw/citations?hl=zh-CN;MDvaeqUAAAAJ;hwZUvY0AAAAJ;;5-jDh48AAAAJ;;https://scholar.google.com.hk/citations?hl=zh-CN;;;Bs9mrwwAAAAJ;4OqZBmYAAAAJ", "orcid": ";;0000-0002-3572-7053;;;0000-0003-1502-9730;;;;;", "linkedin": ";;;;;;;;;;", "or_profile": "~Shuo_Liu5;~Kaining_Ying2;~Hao_Zhang56;~Yue_Yang6;~Yuqi_Lin1;~Tianle_Zhang4;~Chuanhao_Li2;~Yu_Qiao1;~Ping_Luo2;~Wenqi_Shao2;~Kaipeng_Zhang1", "aff": "Shanghai AI lab;Zhejiang University of Technology;Xi'an Jiaotong University;;Zhejiang University;University of Electronic Science and Technology of China;Beijing Institute of Technology;;;Shanghai AI Laboratory;Shanghai AI Laboratory", "aff_domain": "pjlab.org;zjut.edu.cn;xjtu.edu.cn;;zju.edu.cn;cn.edu;bit.edu.cn;;;pjlab.org.cn;pjlab.org.cn", "position": "Researcher;MS student;PhD student;;PhD student;Undergrad student;PhD student;;;Researcher;Researcher", "bibtex": "@inproceedings{\nliu2024convbench,\ntitle={ConvBench: A Multi-Turn Conversation Evaluation Benchmark with Hierarchical Ablation Capability for Large Vision-Language Models},\nauthor={Shuo Liu and Kaining Ying and Hao Zhang and Yue Yang and Yuqi Lin and Tianle Zhang and Chuanhao Li and Yu Qiao and Ping Luo and Wenqi Shao and Kaipeng Zhang},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=PyTf2jj0SH}\n}", "github": "", "reviewers": "HBs1;q8kb;qGUt", "pdf_size": 0, "rating": "7;8;8", "confidence": "4;5;3", "wc_summary_and_contributions": "69;55;64", "wc_strengths": "59;82;29", "wc_improvement": "188;121;44", "wc_limitations": "12;1;30", "wc_correctness": "15;9;11", "wc_clarity": "24;5;5", "wc_relation_to_prior_work": "18;71;22", "wc_documentation": "16;1;19", "wc_additional_feedback": "1;1;1", "wc_review": "402;346;225", "wc_reply_reviewers": "65;24;0", "wc_reply_authors": "2083;268;0", "reply_reviewers": "1;1;0", "reply_authors": "7;3;1", "rating_avg": [ 7.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "wc_summary_and_contributions_avg": [ 62.666666666666664, 5.792715732327589 ], "wc_strengths_avg": [ 56.666666666666664, 21.699974398346395 ], "wc_improvement_avg": [ 117.66666666666667, 58.83498581248708 ], "wc_limitations_avg": [ 14.333333333333334, 11.953614051360738 ], "wc_correctness_avg": [ 11.666666666666666, 2.494438257849294 ], "wc_clarity_avg": [ 11.333333333333334, 8.9566858950296 ], "wc_relation_to_prior_work_avg": [ 37.0, 24.097026095903757 ], "wc_documentation_avg": [ 12.0, 7.874007874011811 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 324.3333333333333, 73.86624548616386 ], "wc_reply_reviewers_avg": [ 29.666666666666668, 26.83695627716046 ], "wc_reply_authors_avg": [ 783.6666666666666, 925.2590027782612 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 3.6666666666666665, 2.494438257849294 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10356855139344321478&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "pjlab.org;zjut.edu.cn;xjtu.edu.cn;;zju.edu.cn;cn.edu;bit.edu.cn;;;pjlab.org.cn;pjlab.org.cn", "author_num": 11, "aff_unique_index": "0;1;2;3;4;5;6;6", "aff_unique_norm": "Shanghai AI Lab;Zhejiang University of Technology;Xi'an Jiao Tong University;Zhejiang University;University of Electronic Science and Technology of China;Beijing Institute of Technology;Shanghai AI Laboratory", "aff_unique_dep": "AI Research;;;;;;", "aff_unique_url": "https://www.shanghaiailab.com;https://www.zjut.edu.cn;https://www.xjtu.edu.cn;https://www.zju.edu.cn;https://www.uestc.edu.cn;http://www.bit.edu.cn/;https://www.shanghai-ai-lab.com", "aff_unique_abbr": "Shanghai AI Lab;ZJUT;XJTU;ZJU;UESTC;BIT;SAIL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Controlling Counterfactual Harm in Decision Support Systems Based on Prediction Sets", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95271", "id": "PyTkA6HkzX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=PyTkA6HkzX", "openreview": "https://openreview.net/forum?id=PyTkA6HkzX", "poster": "/media/PosterPDFs/NeurIPS%202024/95271.png?t=1733143666.1995745", "project": "", "author_site": "Eleni Straitouri, Suhas Thejaswi, Manuel Rodriguez", "tldr": "", "abstract": "Decision support systems based on prediction sets help humans solve multiclass classification tasks by narrowing down the set of potential label values to a subset of them, namely a prediction set, and asking them to always predict label values from the prediction sets. While this type of systems have been proven to be effective at improving the average accuracy of the predictions made by humans, by restricting human agency, they may cause harm---a human who has succeeded at predicting the ground-truth label of an instance on their own may have failed had they used these systems. In this paper, our goal is to control how frequently a decision support system based on prediction sets may cause harm, by design. To this end, we start by characterizing the above notion of harm using the theoretical framework of structural causal models. Then, we show that, under a natural, albeit unverifiable, monotonicity assumption, we can estimate how frequently a system may cause harm using only predictions made by humans on their own. Further, we also show that, under a weaker monotonicity assumption, which can be verified experimentally, we can bound how frequently a system may cause harm again using only predictions made by humans on their own. Building upon these assumptions, we introduce a computational framework to design decision support systems based on prediction sets that are guaranteed to cause harm less frequently than a user-specified value \nusing conformal risk control. We validate our framework using real human predictions from two different human subject studies and show that, in decision support systems based on prediction sets, there is a trade-off between accuracy and counterfactual harm.", "keywords": "Counterfactual Harm;Prediction Sets;Decision Support Systems", "primary_area": "human-AI_interaction", "supplementary_material": "", "author": "Eleni Straitouri;Suhas Thejaswi;Manuel Gomez Rodriguez", "authorids": "~Eleni_Straitouri1;~Suhas_Thejaswi1;~Manuel_Gomez_Rodriguez1", "gender": ";;M", "homepage": "https://people.mpi-sws.org/~estraitouri/;;https://www.mpi-sws.org/~manuelgr/", "dblp": "302/4619;;73/8260", "google_scholar": "kphSqwwAAAAJ;;https://scholar.google.com.tw/citations?user=UcuXmuwAAAAJ", "orcid": ";;", "linkedin": "eleni-straitouri-919419205;;", "or_profile": "~Eleni_Straitouri1;~Suhas_Thejaswi1;~Manuel_Gomez_Rodriguez1", "aff": "MPI-SWS;;MPI-SWS", "aff_domain": "mpi-sws.org;;mpi-sws.org", "position": "PhD student;;Associate Professor", "bibtex": "@inproceedings{\nstraitouri2024controlling,\ntitle={Controlling Counterfactual Harm in Decision Support Systems Based on Prediction Sets},\nauthor={Eleni Straitouri and Suhas Thejaswi and Manuel Gomez Rodriguez},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=PyTkA6HkzX}\n}", "github": "", "reviewers": "8Whm;RCNh;KmhA;DuUW", "pdf_size": 3493056, "rating": "5;5;6;6", "confidence": "4;4;4;2", "soundness": "3;3;4;3", "novelty": "2;2;3;3", "presentation": "3;3;3;2", "wc_summary": "201;64;83;112", "wc_strengths": "110;112;80;94", "wc_weaknesses": "5;159;310;221", "wc_questions": "389;86;333;222", "wc_limitations": "4;1;7;13", "wc_review": "709;422;813;662", "wc_reply_reviewers": "792;0;285;98", "wc_reply_authors": "1119;0;265;53", "reply_reviewers": "3;0;1;1", "reply_authors": "4;1;2;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 115.0, 52.51190341246449 ], "wc_strengths_avg": [ 99.0, 13.0 ], "wc_weaknesses_avg": [ 173.75, 111.23258290626897 ], "wc_questions_avg": [ 257.5, 115.82853707096537 ], "wc_limitations_avg": [ 6.25, 4.437059837324712 ], "wc_review_avg": [ 651.5, 143.32567809014546 ], "wc_reply_reviewers_avg": [ 293.75, 305.34273775546063 ], "wc_reply_authors_avg": [ 359.25, 449.70900313424903 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11912589010112541446&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "mpi-sws.org;;mpi-sws.org", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Max Planck Institute for Software Systems", "aff_unique_dep": "", "aff_unique_url": "https://www.mpi-sws.org", "aff_unique_abbr": "MPI-SWS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "title": "On the Computational Complexity of Private High-dimensional Model Selection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95270", "id": "PzG7xVlYqm", "proceeding": "", "pdf": "https://openreview.net/pdf?id=PzG7xVlYqm", "openreview": "https://openreview.net/forum?id=PzG7xVlYqm", "poster": "/media/PosterPDFs/NeurIPS%202024/95270.png?t=1731724272.1507", "project": "", "author_site": "Saptarshi Roy, Zehua Wang, Ambuj Tewari", "tldr": "", "abstract": "We consider the problem of model selection in a high-dimensional sparse linear regression model under privacy constraints. We propose a differentially private (DP) best subset selection method with strong statistical utility properties by adopting the well-known exponential mechanism for selecting the best model. To achieve computational expediency, we propose an efficient Metropolis-Hastings algorithm and under certain regularity conditions, we establish that it enjoys polynomial mixing time to its stationary distribution. As a result, we also establish both approximate differential privacy and statistical utility for the estimates of the mixed Metropolis-Hastings chain. Finally, we perform some illustrative experiments on simulated data showing that our algorithm can quickly identify active features under reasonable privacy budget constraints.", "keywords": "Best Subset Selection;Differential Privacy;Exponential Mechanism;Metropolis-Hastings;Model Consistency;Variable Selection.", "primary_area": "privacy", "supplementary_material": "", "author": "Saptarshi Roy;Zehua Wang;Ambuj Tewari", "authorids": "~Saptarshi_Roy1;~Zehua_Wang3;~Ambuj_Tewari1", "gender": "M;Not Specified;M", "homepage": "https://sites.google.com/umich.edu/saptarshi-roys-home-page/home?authuser=1;https://lsa.umich.edu/stats/people/master-s-students-of-data-science/class-of-2021/wangzeh.html;https://www.ambujtewari.com", "dblp": ";;24/567", "google_scholar": "Ywix3OUAAAAJ;;ttbl4FsAAAAJ", "orcid": "0000-0003-4183-205X;;0000-0001-6969-7844", "linkedin": ";;", "or_profile": "~Saptarshi_Roy1;~Zehua_Wang3;~Ambuj_Tewari1", "aff": "University of Michigan - Ann Arbor;University of Michigan - Ann Arbor;University of Michigan - Ann Arbor", "aff_domain": "umich.edu;umich.edu;umich.edu", "position": "PhD student;MS student;Full Professor", "bibtex": "@inproceedings{\nroy2024on,\ntitle={On the Computational Complexity of Private High-dimensional Model Selection},\nauthor={Saptarshi Roy and Zehua Wang and Ambuj Tewari},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=PzG7xVlYqm}\n}", "github": "", "reviewers": "2aDP;piCf;QHbD;QExW", "pdf_size": 25369598, "rating": "4;5;5;5", "confidence": "2;3;2;3", "soundness": "2;3;3;3", "novelty": "2;3;2;3", "presentation": "2;3;3;3", "wc_summary": "15;100;69;106", "wc_strengths": "68;38;69;46", "wc_weaknesses": "61;19;47;62", "wc_questions": "152;125;16;47", "wc_limitations": "5;5;2;10", "wc_review": "301;287;203;271", "wc_reply_reviewers": "0;13;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 4.75, 0.4330127018922193 ], "confidence_avg": [ 2.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 72.5, 36.04511062543712 ], "wc_strengths_avg": [ 55.25, 13.5531361684298 ], "wc_weaknesses_avg": [ 47.25, 17.354754391808605 ], "wc_questions_avg": [ 85.0, 55.43915583772899 ], "wc_limitations_avg": [ 5.5, 2.8722813232690143 ], "wc_review_avg": [ 265.5, 37.613162589710534 ], "wc_reply_reviewers_avg": [ 3.25, 5.629165124598851 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:bDV3LXY1wkkJ:scholar.google.com/&scioq=On+the+Computational+Complexity+of+Private+High-dimensional+Model+Selection&hl=en&as_sdt=0,44", "gs_version_total": 3, "email": "umich.edu;umich.edu;umich.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Michigan", "aff_unique_dep": "", "aff_unique_url": "https://www.umich.edu", "aff_unique_abbr": "UM", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Ann Arbor", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "On the Complexity of Learning Sparse Functions with Statistical and Gradient Queries", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95269", "id": "Q0KwoyZlSo", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Q0KwoyZlSo", "openreview": "https://openreview.net/forum?id=Q0KwoyZlSo", "poster": "/media/PosterPDFs/NeurIPS%202024/95269.png?t=1731447756.479201", "project": "", "author_site": "Nirmit Joshi, Theodor Misiakiewicz, Nati Srebro", "tldr": "", "abstract": "The goal of this paper is to investigate the complexity of gradient algorithms when learning sparse functions (juntas). We introduce a type of Statistical Queries ($\\mathsf{SQ}$), which we call Differentiable Learning Queries ($\\mathsf{DLQ}$), to model gradient queries on a specified loss with respect to an arbitrary model. We provide a tight characterization of the query complexity of $\\mathsf{DLQ}$ for learning the support of a sparse function over generic product distributions. This complexity crucially depends on the loss function. For the squared loss, $\\mathsf{DLQ}$ matches the complexity of Correlation Statistical Queries $(\\mathsf{CSQ})$\u2014potentially much worse than $\\mathsf{SQ}$. But for other simple loss functions, including the $\\ell_1$ loss, $\\mathsf{DLQ}$ always achieves the same complexity as $\\mathsf{SQ}$. We also provide evidence that $\\mathsf{DLQ}$ can indeed capture learning with (stochastic) gradient descent by showing it correctly describes the complexity of learning with a two-layer neural network in the mean field regime and linear scaling.", "keywords": "Statistical Query Complexity;Differtiable Learning;Sparse Functions;Leap Exponent", "primary_area": "learning_theory", "supplementary_material": "/attachment/364ff444922bc998884eb3300a1c3312b7c2ff23.zip", "author": "Nirmit Joshi;Theodor Misiakiewicz;Nathan Srebro", "authorids": "~Nirmit_Joshi1;~Theodor_Misiakiewicz1;~Nathan_Srebro1", "gender": "M;;M", "homepage": "https://nirmitj6.github.io/static-webpage/;https://misiakie.github.io;http://ttic.uchicago.edu/~nati/", "dblp": "327/7118;168/8360;50/3633", "google_scholar": ";E8Jst30AAAAJ;https://scholar.google.com.tw/citations?user=ZnT-QpMAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Nirmit_Joshi1;~Theodor_Misiakiewicz1;~Nathan_Srebro1", "aff": "Toyota Technological Institute at Chicago;Toyota Technological Institute at Chicago;University of Chicago", "aff_domain": "ttic.edu;ttic.edu;uchicago.edu", "position": "PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\njoshi2024on,\ntitle={On the Complexity of Learning Sparse Functions with Statistical and Gradient Queries},\nauthor={Nirmit Joshi and Theodor Misiakiewicz and Nathan Srebro},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Q0KwoyZlSo}\n}", "github": "", "reviewers": "Grg1;zev8;atP9", "pdf_size": 2200799, "rating": "6;6;7", "confidence": "3;3;3", "soundness": "4;3;3", "novelty": "3;3;3", "presentation": "3;3;2", "wc_summary": "282;88;112", "wc_strengths": "109;86;103", "wc_weaknesses": "72;114;275", "wc_questions": "237;16;2", "wc_limitations": "1;5;2", "wc_review": "701;309;494", "wc_reply_reviewers": "9;11;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;0", "reply_authors": "1;1;1", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 160.66666666666666, 86.35327954912243 ], "wc_strengths_avg": [ 99.33333333333333, 9.741092797468305 ], "wc_weaknesses_avg": [ 153.66666666666666, 87.49222187651247 ], "wc_questions_avg": [ 85.0, 107.63208939097423 ], "wc_limitations_avg": [ 2.6666666666666665, 1.699673171197595 ], "wc_review_avg": [ 501.3333333333333, 160.11731810005924 ], "wc_reply_reviewers_avg": [ 6.666666666666667, 4.784233364802441 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15096760242512873068&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 4, "email": "ttic.edu;ttic.edu;uchicago.edu", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Toyota Technological Institute at Chicago;University of Chicago", "aff_unique_dep": ";", "aff_unique_url": "https://www.tti-chicago.org;https://www.uchicago.edu", "aff_unique_abbr": "TTI Chicago;UChicago", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Chicago;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Text-space Graph Foundation Models: Comprehensive Benchmarks and New Insights", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97704", "id": "Q2sDuwtutB", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Q2sDuwtutB", "openreview": "https://openreview.net/forum?id=Q2sDuwtutB", "poster": "/media/PosterPDFs/NeurIPS%202024/97704.png?t=1733200304.8956842", "project": "", "author_site": "Zhikai Chen, Haitao Mao, Jingzhe Liu, Yu Song, Bingheng Li, Wei Jin, Bahare Fatemi, Anton Tsitsulin, Bryan Perozzi, Hui Liu, Jiliang Tang", "tldr": "", "abstract": "Given the ubiquity of graph data and its applications in diverse domains, building a Graph Foundation Model (GFM) that can work well across different graphs and tasks with a unified backbone has recently garnered significant interests. A major obstacle to achieving this goal stems from the fact that graphs from different domains often exhibit diverse node features. Inspired by multi-modal models that align different modalities with natural language, the text has recently been adopted to provide a unified feature space for diverse graphs. Despite the great potential of these text-space GFMs, current research in this field is hampered by two problems. First, the absence of a comprehensive benchmark with unified problem settings hinders a clear understanding of the comparative effectiveness and practical value of different text-space GFMs. Second, there is a lack of sufficient datasets to thoroughly explore the methods' full potential and verify their effectiveness across diverse settings. To address these issues, we conduct a comprehensive benchmark providing novel text-space datasets and comprehensive evaluation under unified problem settings. Empirical results provide new insights and inspire future research directions. Our code and data are publicly available from https://github.com/CurryTang/TSGFM.", "keywords": "Graph Machine Learning;Graph Foundation Models;Large Language Models", "primary_area": "", "supplementary_material": "/attachment/00629b155ed4736df2689a4bfdf48f3ab6e218f7.pdf", "author": "Zhikai Chen;Haitao Mao;Jingzhe Liu;Yu Song;Bingheng Li;Wei Jin;Bahare Fatemi;Anton Tsitsulin;Bryan Perozzi;Hui Liu;Jiliang Tang", "authorids": "~Zhikai_Chen3;~Haitao_Mao1;~Jingzhe_Liu1;~Yu_Song6;~Bingheng_Li1;~Wei_Jin4;~Bahare_Fatemi1;~Anton_Tsitsulin1;~Bryan_Perozzi1;~Hui_Liu8;~Jiliang_Tang1", "gender": "M;M;;M;;F;M;;F;M;M", "homepage": "http://currytang.github.io;https://liu-jingzhe.github.io/personalpage/;https://github.com/SongYYYY;https://github.com/uestclbh;http://www.cs.emory.edu/~wjin30/;;http://tsitsul.in;http://www.perozzi.net/;https://scholar.google.com/citations?user=EuzF_zsAAAAJ&hl=en;https://www.cse.msu.edu/~tangjili/;", "dblp": "92/40;339/7558;54/1216-7;;66/2173-9;;217/1668;91/10813;93/4010-31;64/10812;", "google_scholar": "6hUny38AAAAJ;;17z7IcgAAAAJ;;eWow24EAAAAJ;;https://scholar.google.com/citations?hl=en;rZgbMs4AAAAJ;;WtzKMWAAAAAJ;3GmlKM4AAAAJ", "orcid": "0009-0009-7305-8629;;0000-0002-8940-2561;0009-0000-0950-9012;;;;;0000-0002-3555-3495;0000-0001-7125-3898;", "linkedin": ";;;;;bahare-fatemi-b0049179/;atsitsulin/;;;;", "or_profile": "~Zhikai_Chen3;~Jingzhe_Liu1;~Yu_Song6;~Bingheng_Li1;~Wei_Jin4;~Bahare_Fatemi1;~Anton_Tsitsulin1;~Bryan_Perozzi1;~Hui_Liu8;~Jiliang_Tang1;~Mao_Haitao1", "aff": "Michigan State University;Michigan State University;Michigan State University;University of Electronic Science and Technology of China;Emory University;Google;Google;Google;Michigan State University;Michigan State University;Michigan State University", "aff_domain": "msu.edu;msu.edu;msu.edu;uestc.edu.cn;emory.edu;google.com;google.com;google.com;msu.edu;msu.edu;msu.edu", "position": "PhD student;PhD student;PhD student;Undergrad student;Assistant Professor;Researcher;Research Scientist;Researcher;Assistant Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\nchen2024textspace,\ntitle={Text-space Graph Foundation Models: Comprehensive Benchmarks and New Insights},\nauthor={Zhikai Chen and Haitao Mao and Jingzhe Liu and Yu Song and Bingheng Li and Wei Jin and Bahare Fatemi and Anton Tsitsulin and Bryan Perozzi and Hui Liu and Jiliang Tang},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=Q2sDuwtutB}\n}", "github": "", "reviewers": "SB91;9PU3;aFAh;Ltcu", "pdf_size": 742409, "rating": "5;6;7;7", "confidence": "3;4;3;3", "wc_summary_and_contributions": "135;80;108;125", "wc_strengths": "4;2;80;153", "wc_improvement": "49;2;67;217", "wc_limitations": "2;1;14;17", "wc_correctness": "18;1;11;18", "wc_clarity": "1;1;9;16", "wc_relation_to_prior_work": "1;1;1;20", "wc_documentation": "2;3;17;49", "wc_additional_feedback": "1;1;1;1", "wc_review": "213;92;308;616", "wc_reply_reviewers": "163;0;269;105", "wc_reply_authors": "800;91;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "5;3;4;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 112.0, 20.844663585675832 ], "wc_strengths_avg": [ 59.75, 62.34731349464867 ], "wc_improvement_avg": [ 83.75, 80.50892807633201 ], "wc_limitations_avg": [ 8.5, 7.088723439378913 ], "wc_correctness_avg": [ 12.0, 6.96419413859206 ], "wc_clarity_avg": [ 6.75, 6.2599920127744575 ], "wc_relation_to_prior_work_avg": [ 5.75, 8.227241335952167 ], "wc_documentation_avg": [ 17.75, 18.9917745353087 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 307.25, 193.99919458595699 ], "wc_reply_reviewers_avg": [ 134.25, 97.29176481079989 ], "wc_reply_authors_avg": [ 222.75, 335.3396598972451 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.5, 1.118033988749895 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3991282203754037766&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "msu.edu;msu.edu;msu.edu;uestc.edu.cn;emory.edu;google.com;google.com;google.com;msu.edu;msu.edu;msu.edu", "author_num": 11, "aff_unique_index": "0;0;0;1;2;3;3;3;0;0;0", "aff_unique_norm": "Michigan State University;University of Electronic Science and Technology of China;Emory University;Google", "aff_unique_dep": ";;;Google", "aff_unique_url": "https://www.msu.edu;https://www.uestc.edu.cn;https://www.emory.edu;https://www.google.com", "aff_unique_abbr": "MSU;UESTC;Emory;Google", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;1;0;0;0;0;0;0;0", "aff_country_unique": "United States;China" }, { "title": "Nearly Minimax Optimal Regret for Multinomial Logistic Bandit", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95268", "id": "Q4NWfStqVf", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Q4NWfStqVf", "openreview": "https://openreview.net/forum?id=Q4NWfStqVf", "poster": "/media/PosterPDFs/NeurIPS%202024/95268.png?t=1733800348.2901156", "project": "", "author_site": "Joongkyu Lee, Min-hwan Oh", "tldr": "", "abstract": "In this paper, we study the contextual multinomial logit (MNL) bandit problem in which a learning agent sequentially selects an assortment based on contextual information, and user feedback follows an MNL choice model.\nThere has been a significant discrepancy between lower and upper regret bounds, particularly regarding the maximum assortment size $K$. Additionally, the variation in reward structures between these bounds complicates the quest for optimality. Under uniform rewards, where all items have the same expected reward, we establish a regret lower bound of $\\Omega(d\\sqrt{\\smash[b]{T/K}})$ and propose a constant-time algorithm, OFU-MNL+, that achieves a matching upper bound of $\\tilde{\\mathcal{O}}(d\\sqrt{\\smash[b]{T/K}})$. \nWe also provide instance-dependent minimax regret bounds under uniform rewards.\nUnder non-uniform rewards, we prove a lower bound of $\\Omega(d\\sqrt{T})$ and an upper bound of $\\tilde{\\mathcal{O}}(d\\sqrt{T})$, also achievable by OFU-MNL+. Our empirical studies support these theoretical findings. To the best of our knowledge, this is the first work in the contextual MNL bandit literature to prove minimax optimality --- for either uniform or non-uniform reward setting --- and to propose a computationally efficient algorithm that achieves this optimality up to logarithmic factors.", "keywords": "Bandit;Contextual Bandit;Multinomial Logistic Bandit;Minimax Regret", "primary_area": "bandits", "supplementary_material": "/attachment/1da315e5e5a49c4f36ba97001baf534910e03860.zip", "author": "Joongkyu Lee;Min-hwan Oh", "authorids": "~Joongkyu_Lee1;~Min-hwan_Oh1", "gender": "M;", "homepage": "https://joongkyulee.com/;https://minoh.io", "dblp": "368/2751;172/0531", "google_scholar": "YWZakwkAAAAJ;KzVALFwAAAAJ", "orcid": ";", "linkedin": "joongkyu-lee-939aa91a7;", "or_profile": "~Joongkyu_Lee1;~Min-hwan_Oh1", "aff": "Seoul National University;Seoul National University", "aff_domain": "snu.ac.kr;snu.ac.kr", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nlee2024nearly,\ntitle={Nearly Minimax Optimal Regret for Multinomial Logistic Bandit},\nauthor={Joongkyu Lee and Min-hwan Oh},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Q4NWfStqVf}\n}", "github": "", "reviewers": "NbJU;SF7h;8Ks7;bygC;FJtD", "pdf_size": 926360, "rating": "7;7;7;7;9", "confidence": "4;4;2;3;4", "soundness": "3;3;3;3;4", "novelty": "3;3;3;2;4", "presentation": "4;3;3;3;4", "wc_summary": "74;37;132;110;45", "wc_strengths": "76;63;107;122;37", "wc_weaknesses": "271;223;131;541;5", "wc_questions": "2;92;48;10;359", "wc_limitations": "1;1;1;1;1", "wc_review": "424;416;419;784;447", "wc_reply_reviewers": "0;106;0;25;227", "wc_reply_authors": "529;1283;0;1048;518", "reply_reviewers": "0;2;0;1;1", "reply_authors": "2;6;1;4;2", "rating_avg": [ 7.4, 0.7999999999999999 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 79.6, 36.642052344266965 ], "wc_strengths_avg": [ 81.0, 30.469657037781044 ], "wc_weaknesses_avg": [ 234.2, 178.20707056679876 ], "wc_questions_avg": [ 102.2, 132.30933451574762 ], "wc_limitations_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 498.0, 143.41408577960533 ], "wc_reply_reviewers_avg": [ 71.6, 86.89902185870679 ], "wc_reply_authors_avg": [ 675.6, 449.52890897026856 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 3.0, 1.7888543819998317 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.37500000000000006, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13358635911983695823&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "snu.ac.kr;snu.ac.kr", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Seoul National University", "aff_unique_dep": "", "aff_unique_url": "https://www.snu.ac.kr", "aff_unique_abbr": "SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "title": "An End-To-End Graph Attention Network Hashing for Cross-Modal Retrieval", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95267", "id": "Q4QUCN2ioc", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Q4QUCN2ioc", "openreview": "https://openreview.net/forum?id=Q4QUCN2ioc", "poster": "", "project": "", "author_site": "Huilong Jin, Yingxue Zhang, Lei Shi, Shuang Zhang, Feifei Kou, Jiapeng Yang, Chuangying Zhu, Jia Luo", "tldr": "", "abstract": "Due to its low storage cost and fast search speed, cross-modal retrieval based on hashing has attracted widespread attention and is widely used in real-world applications of social media search. However, most existing hashing methods are often limited by uncomprehensive feature representations and semantic associations, which greatly restricts their performance and applicability in practical applications. To deal with this challenge, in this paper, we propose an end-to-end graph attention network hashing (EGATH) for cross-modal retrieval, which can not only capture direct semantic associations between images and texts but also match semantic content between different modalities. We adopt the contrastive language image pretraining (CLIP) combined with the Transformer to improve understanding and generalization ability in semantic consistency across different data modalities. The classifier based on graph attention network is applied to obtain predicted labels to enhance cross-modal feature representation. We construct hash codes using an optimization strategy and loss function to preserve the semantic information and compactness of the hash code. Comprehensive experiments on the NUS-WIDE, MIRFlickr25K, and MS-COCO benchmark datasets show that our EGATH significantly outperforms against several state-of-the-art methods.", "keywords": "Cross-modal Retrieval;Graph Attention Network;Hash Algorithm;CLIP;Transformer", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Huilong Jin;Yingxue Zhang;Lei Shi;Shuang Zhang;Feifei Kou;Jiapeng Yang;Chuangying Zhu;Jia Luo", "authorids": "~Huilong_Jin1;~Yingxue_Zhang4;~Lei_Shi21;~Shuang_Zhang5;~Feifei_Kou1;~Jiapeng_Yang2;~Chuangying_Zhu1;~Jia_Luo2", "gender": ";;M;;F;M;M;F", "homepage": "https://zrgxy.hebtu.edu.cn/a/2023/04/13/042B718136414F34B5BA7CF4EC2A724D.html;;https://scholar.google.com.hk/citations?user=2yg1Fp8AAAAJ&hl=zh-CN;https://zrgxy.hebtu.edu.cn/a/2024/03/16/1B849E323E9E431A9086FB40156CEE62.html;;https://www.scopus.com/authid/detail.uri?authorId=57546372100;https://yjsjy.guet.edu.cn/(S(ysc4vqvjvibmb3omuoj54csr))/dsfc/dsfcgrxx/8737913571A900FFF0234FECCF41E49D;https://scholar.google.co.jp/citations?user=0iPSjvEAAAAJ&hl=zh-CN", "dblp": ";;29/563-30;;223/2313.html;;https://dblp.org/search?q=zhu+chuangying;", "google_scholar": ";;https://scholar.google.com.hk/citations?user=2yg1Fp8AAAAJ;;;;;https://scholar.google.co.jp/citations?user=0iPSjvEAAAAJ", "orcid": ";;;;;;;", "linkedin": ";;;;;;;", "or_profile": "~Huilong_Jin1;~Yingxue_Zhang4;~Lei_Shi21;~Shuang_Zhang5;~Feifei_Kou1;~Jiapeng_Yang2;~Chuangying_Zhu1;~Jia_Luo2", "aff": "Hebei Normal University;Hebei Normal University;Communication University of China;Hebei Normal University;Beijing University of Posts and Telecommunications;Communication University of China;Guilin University Of Electronic Technology;Beijing University of Technology", "aff_domain": "hebtu.edu;hebtu.edu;cuc.edu.cn;hebtu.edu;bupt.edu.cn;cuc.edu.cn;guet.edu.cn;bjut.edu.cn", "position": "Full Professor;MS student;Associate Professor;Assistant Professor;Lecturer;MS student;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\njin2024an,\ntitle={An End-To-End Graph Attention Network Hashing for Cross-Modal Retrieval},\nauthor={Huilong Jin and Yingxue Zhang and Lei Shi and Shuang Zhang and Feifei Kou and Jiapeng Yang and Chuangying Zhu and Jia Luo},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Q4QUCN2ioc}\n}", "github": "", "reviewers": "FNxK;V2kH;5SGA;L29L", "pdf_size": 1007299, "rating": "3;6;6;7", "confidence": "5;3;5;5", "soundness": "2;3;3;3", "novelty": "2;3;3;4", "presentation": "2;2;4;4", "wc_summary": "64;69;100;127", "wc_strengths": "24;63;42;70", "wc_weaknesses": "64;74;28;72", "wc_questions": "48;2;64;97", "wc_limitations": "6;1;9;10", "wc_review": "206;209;243;376", "wc_reply_reviewers": "56;12;16;20", "wc_reply_authors": "409;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 4.5, 0.8660254037844386 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 1.0 ], "wc_summary_avg": [ 90.0, 25.42636427018224 ], "wc_strengths_avg": [ 49.75, 18.08832496390973 ], "wc_weaknesses_avg": [ 59.5, 18.567444627627143 ], "wc_questions_avg": [ 52.75, 34.2153109002388 ], "wc_limitations_avg": [ 6.5, 3.5 ], "wc_review_avg": [ 258.5, 69.37759004174187 ], "wc_reply_reviewers_avg": [ 26.0, 17.549928774784245 ], "wc_reply_authors_avg": [ 102.25, 177.1021950739177 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.19245008972987526, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=925260125637425518&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 2, "email": "hebtu.edu;hebtu.edu;cuc.edu.cn;hebtu.edu;bupt.edu.cn;cuc.edu.cn;guet.edu.cn;bjut.edu.cn", "author_num": 8, "aff_unique_index": "0;0;1;0;2;1;3;4", "aff_unique_norm": "Hebei Normal University;Communication University of China;Beijing University of Posts and Telecommunications;Guilin University of Electronic Technology;Beijing University of Technology", "aff_unique_dep": ";;;;", "aff_unique_url": "http://www.hebn.edu.cn;http://www.cuc.edu.cn/;http://www.bupt.edu.cn/;http://www.guet.edu.cn;http://www.bjut.edu.cn", "aff_unique_abbr": ";CUC;BUPT;GUET;BJUT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Beijing", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Understanding the Limits of Vision Language Models Through the Lens of the Binding Problem", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95266", "id": "Q5RYn6jagC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Q5RYn6jagC", "openreview": "https://openreview.net/forum?id=Q5RYn6jagC", "poster": "", "project": "", "author_site": "Declan Campbell, Sunayana Rane, Tyler Giallanza, Camillo Nicol\u00f2 De Sabbata, Kia Ghods, Amogh Joshi, Alexander Ku, Steven Frankland, Tom Griffiths, Jonathan D Cohen, Taylor Webb", "tldr": "", "abstract": "Recent work has documented striking heterogeneity in the performance of state-of-the-art vision language models (VLMs), including both multimodal language models and text-to-image models. These models are able to describe and generate a diverse array of complex, naturalistic images, yet they exhibit surprising failures on basic multi-object reasoning tasks -- such as counting, localization, and simple forms of visual analogy -- that humans perform with near perfect accuracy. To better understand this puzzling pattern of successes and failures, we turn to theoretical accounts of the binding problem in cognitive science and neuroscience, a fundamental problem that arises when a shared set of representational resources must be used to represent distinct entities (e.g., to represent multiple objects in an image), necessitating the use of serial processing to avoid interference. We find that many of the puzzling failures of state-of-the-art VLMs can be explained as arising due to the binding problem, and that these failure modes are strikingly similar to the limitations exhibited by rapid, feedforward processing in the human brain.", "keywords": "visual reasoning;foundation models;multi-object reasoning;cognitive science", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "/attachment/ef9f99a9d8e09fdf4599a1815a631dc4633cc1e1.zip", "author": "Declan Iain Campbell;Sunayana Rane;Tyler Giallanza;C. Nicol\u00f2 De Sabbata;Kia Ghods;Amogh Joshi;Alexander Ku;Steven M Frankland;Thomas L. Griffiths;Jonathan D. Cohen;Taylor Whittington Webb", "authorids": "~Declan_Iain_Campbell1;~Sunayana_Rane1;~Tyler_Giallanza1;~C._Nicol\u00f2_De_Sabbata1;~Kia_Ghods1;~Amogh_Joshi2;~Alexander_Ku1;~Steven_M_Frankland1;~Thomas_L._Griffiths1;~Jonathan_D._Cohen1;~Taylor_Whittington_Webb1", "gender": "M;;;;M;;M;;;;M", "homepage": ";;;;https://kiaghods.com;;https://alexyku.github.io/;http://www.stevenfrankland.com;http://cocosci.princeton.edu/tom/;;https://scholar.google.com/citations?user=WCmrJoQAAAAJ&hl=en", "dblp": ";;;;;;215/4289.html;;34/4472;;183/6144", "google_scholar": "https://scholar.google.com/citations?hl=en;;ABQNb_0AAAAJ;;maFTcpkAAAAJ;GIMb9IsAAAAJ;Lh_ZqdcAAAAJ;gkFnOeQAAAAJ;https://scholar.google.com/citations?hl=en;;WCmrJoQAAAAJ", "orcid": "0009-0009-4225-7983;;;;0009-0009-5805-489X;;;;;;", "linkedin": ";;;;kia-ghods/;;;;;;", "or_profile": "~Declan_Iain_Campbell1;~Sunayana_Rane1;~Tyler_Giallanza1;~C._Nicol\u00f2_De_Sabbata1;~Kia_Ghods1;~Amogh_Joshi2;~Alexander_Ku1;~Steven_M_Frankland1;~Thomas_L._Griffiths1;~Jonathan_D._Cohen1;~Taylor_Whittington_Webb1", "aff": "Princeton University;;Princeton University;;Princeton University;Princeton University;Google;Princeton University;Princeton University;;University of California, Los Angeles", "aff_domain": "princeton.edu;;princeton.edu;;princeton.edu;princeton.edu;google.com;princeton.edu;princeton.edu;;ucla.edu", "position": "PhD student;;PhD student;;Undergrad student;Undergrad student;Researcher;Postdoc;Professor;;Postdoc", "bibtex": "@inproceedings{\ncampbell2024understanding,\ntitle={Understanding the Limits of Vision Language Models Through the Lens of the Binding Problem},\nauthor={Declan Iain Campbell and Sunayana Rane and Tyler Giallanza and C. Nicol{\\`o} De Sabbata and Kia Ghods and Amogh Joshi and Alexander Ku and Steven M Frankland and Thomas L. Griffiths and Jonathan D. Cohen and Taylor Whittington Webb},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Q5RYn6jagC}\n}", "github": "", "reviewers": "MYwe;gAnB;N3JA;Ga5j", "pdf_size": 17921406, "rating": "7;7;7;8", "confidence": "4;3;4;4", "soundness": "2;3;3;4", "novelty": "3;2;3;3", "presentation": "3;3;4;3", "wc_summary": "179;86;82;558", "wc_strengths": "98;83;29;139", "wc_weaknesses": "141;9;153;826", "wc_questions": "45;402;363;32", "wc_limitations": "2;49;9;36", "wc_review": "465;629;636;1591", "wc_reply_reviewers": "38;46;22;48", "wc_reply_authors": "0;126;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 226.25, 195.42821572127193 ], "wc_strengths_avg": [ 87.25, 39.38511774769754 ], "wc_weaknesses_avg": [ 282.25, 318.9775658255608 ], "wc_questions_avg": [ 210.5, 172.6130064624332 ], "wc_limitations_avg": [ 24.0, 19.222382786741086 ], "wc_review_avg": [ 830.25, 444.5173646776918 ], "wc_reply_reviewers_avg": [ 38.5, 10.23474474522936 ], "wc_reply_authors_avg": [ 31.5, 54.559600438419636 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8940804931871594369&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "princeton.edu;;princeton.edu;;princeton.edu;princeton.edu;google.com;princeton.edu;princeton.edu;;ucla.edu", "author_num": 11, "aff_unique_index": "0;0;0;0;1;0;0;2", "aff_unique_norm": "Princeton University;Google;University of California, Los Angeles", "aff_unique_dep": ";Google;", "aff_unique_url": "https://www.princeton.edu;https://www.google.com;https://www.ucla.edu", "aff_unique_abbr": "Princeton;Google;UCLA", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Mountain View;Los Angeles", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Almost Minimax Optimal Best Arm Identification in Piecewise Stationary Linear Bandits", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95265", "id": "Q5e3ftQ3q3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Q5e3ftQ3q3", "openreview": "https://openreview.net/forum?id=Q5e3ftQ3q3", "poster": "/media/PosterPDFs/NeurIPS%202024/95265.png?t=1731314363.4409866", "project": "", "author_site": "Yunlong Hou, Vincent Tan, Zixin Zhong", "tldr": "", "abstract": "We propose a novel piecewise stationary linear bandit (PSLB) model, where the environment randomly samples a context from an unknown probability distribution at each changepoint, and the quality of an arm is measured by its return averaged over all contexts. The contexts and their distribution, as well as the changepoints are unknown to the agent.\nWe design Piecewise-Stationary $\\varepsilon$-Best Arm Identification$^+$ (PS$\\varepsilon$BAI$^+$), an algorithm that is guaranteed to identify an $\\varepsilon$-optimal arm with probability $\\ge 1-\\delta$ and with a minimal number of samples.\nPS$\\varepsilon$BAI$^+$ consists of two subroutines, PS$\\varepsilon$BAI and Na\u00efve $\\varepsilon$-BAI (N$\\varepsilon$BAI), which are executed in parallel. PS$\\varepsilon$BAI actively detects changepoints and aligns contexts to facilitate the arm identification process.\nWhen PS$\\varepsilon$BAI and N$\\varepsilon$BAI are utilized judiciously in parallel, PS$\\varepsilon$BAI$^+$ is shown to have a finite expected sample complexity. \nBy proving a lower bound, we show the expected sample complexity of PS$\\varepsilon$BAI$^+$ is optimal up to a logarithmic factor.\nWe compare PS$\\varepsilon$BAI$^+$ to baseline algorithms using numerical experiments which demonstrate its efficiency.\nBoth our analytical and numerical results corroborate that the efficacy of PS$\\varepsilon$BAI$^+$ is due to the delicate change detection and context alignment procedures embedded in PS$\\varepsilon$BAI.", "keywords": "Multi-armed bandits;piecewise stationary bandits;best arm identification;fixed-confidence", "primary_area": "bandits", "supplementary_material": "", "author": "Yunlong Hou;Vincent Y. F. Tan;Zixin Zhong", "authorids": "~Yunlong_Hou1;~Vincent_Tan1;~Zixin_Zhong1", "gender": "M;M;F", "homepage": "https://github.com/Y-Hou;https://www.ece.nus.edu.sg/stfpage/vtan/pubs.htm;https://zixinzh.github.io/homepage/", "dblp": "160/0888-1;60/2327;", "google_scholar": ";dJoAVvAAAAAJ;xy9V7MEAAAAJ", "orcid": ";0000-0002-5008-4527;", "linkedin": ";;", "or_profile": "~Yunlong_Hou1;~Vincent_Tan1;~Zixin_Zhong1", "aff": "National University of Singaore, National University of Singapore;National University of Singapore;University of Alberta", "aff_domain": "u.nus.edu;nus.edu.sg;ualberta.ca", "position": "PhD student;Full Professor;Postdoc", "bibtex": "@inproceedings{\nhou2024almost,\ntitle={Almost Minimax Optimal Best Arm Identification in Piecewise Stationary Linear Bandits},\nauthor={Yunlong Hou and Vincent Y. F. Tan and Zixin Zhong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Q5e3ftQ3q3}\n}", "github": "", "reviewers": "tHAq;comL;5EBt", "pdf_size": 1025869, "rating": "6;6;6", "confidence": "3;4;3", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;3;2", "wc_summary": "75;83;72", "wc_strengths": "73;49;76", "wc_weaknesses": "59;44;87", "wc_questions": "87;17;119", "wc_limitations": "1;9;25", "wc_review": "295;202;379", "wc_reply_reviewers": "15;20;16", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 76.66666666666667, 4.642796092394706 ], "wc_strengths_avg": [ 66.0, 12.083045973594572 ], "wc_weaknesses_avg": [ 63.333333333333336, 17.82008853949821 ], "wc_questions_avg": [ 74.33333333333333, 42.59368758656876 ], "wc_limitations_avg": [ 11.666666666666666, 9.977753031397176 ], "wc_review_avg": [ 292.0, 72.29107828771127 ], "wc_reply_reviewers_avg": [ 17.0, 2.160246899469287 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:KQDLJxekIIEJ:scholar.google.com/&scioq=Almost+Minimax+Optimal+Best+Arm+Identification+in+Piecewise+Stationary+Linear+Bandits&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "u.nus.edu;nus.edu.sg;ualberta.ca", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "National University of Singapore;University of Alberta", "aff_unique_dep": ";", "aff_unique_url": "https://www.nus.edu.sg;https://www.ualberta.ca", "aff_unique_abbr": "NUS;UAlberta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Singapore;Canada" }, { "title": "Near-Optimality of Contrastive Divergence Algorithms", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95264", "id": "Q74JVgKCP6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Q74JVgKCP6", "openreview": "https://openreview.net/forum?id=Q74JVgKCP6", "poster": "", "project": "", "author_site": "Pierre Glaser, Kevin Han Huang, Arthur Gretton", "tldr": "", "abstract": "We provide a non-asymptotic analysis of the contrastive divergence (CD) algorithm, a training method for unnormalized models. While prior work has established that (for exponential family distributions) the CD iterates asymptotically converge at an $O(n^{-1 / 3})$ rate to the true parameter of the data distribution, we show that CD can achieve the parametric rate $O(n^{-1 / 2})$. Our analysis provides results for various data batching schemes, including fully online and minibatch. We additionally show that CD is near-optimal, in the sense that its asymptotic variance is close to the Cram\u00e9r-Rao lower bound.", "keywords": "Statistical Estimation;Learning Theory;Stochastic Optimization;Exponential Families;Unnormalized Models", "primary_area": "learning_theory", "supplementary_material": "", "author": "Pierre Glaser;Kevin Han Huang;Arthur Gretton", "authorids": "~Pierre_Glaser1;~Kevin_Han_Huang1;~Arthur_Gretton1", "gender": "M;;M", "homepage": "https://pierreglaser.github.io;;http://www.gatsby.ucl.ac.uk/~gretton/", "dblp": ";;56/2574", "google_scholar": ";;OUv7J6QAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Pierre_Glaser1;~Kevin_Han_Huang1;~Arthur_Gretton1", "aff": "University College London;;University College London", "aff_domain": "ucl.ac.uk;;ucl.ac.uk", "position": "PhD student;;Professor", "bibtex": "@inproceedings{\nglaser2024nearoptimality,\ntitle={Near-Optimality of Contrastive Divergence Algorithms},\nauthor={Pierre Glaser and Kevin Han Huang and Arthur Gretton},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Q74JVgKCP6}\n}", "github": "", "reviewers": "Hdtk;b7nK;Fohj;ouZy;yRNH", "pdf_size": 667353, "rating": "5;6;6;6;6", "confidence": "3;2;3;4;2", "soundness": "4;3;3;3;3", "novelty": "2;3;4;3;2", "presentation": "4;3;2;2;3", "wc_summary": "134;140;44;71;87", "wc_strengths": "56;308;53;66;107", "wc_weaknesses": "196;241;112;66;62", "wc_questions": "11;4;74;115;34", "wc_limitations": "1;25;35;8;12", "wc_review": "398;718;318;326;302", "wc_reply_reviewers": "33;45;80;105;16", "wc_reply_authors": "0;0;797;446;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;2;2;1", "rating_avg": [ 5.8, 0.39999999999999997 ], "confidence_avg": [ 2.8, 0.7483314773547882 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 95.2, 36.8423669163641 ], "wc_strengths_avg": [ 118.0, 96.94740842333022 ], "wc_weaknesses_avg": [ 135.4, 71.51950782828416 ], "wc_questions_avg": [ 47.6, 41.62979702088398 ], "wc_limitations_avg": [ 16.2, 12.221292893961753 ], "wc_review_avg": [ 412.4, 156.3094366952936 ], "wc_reply_reviewers_avg": [ 55.8, 32.33202746503844 ], "wc_reply_authors_avg": [ 248.6, 324.0725844621849 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.13363062095621223, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3043758001267640129&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "ucl.ac.uk;;ucl.ac.uk", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "University College London", "aff_unique_dep": "", "aff_unique_url": "https://www.ucl.ac.uk", "aff_unique_abbr": "UCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "title": "CRAG - Comprehensive RAG Benchmark", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97703", "id": "Q7lAqY41HH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Q7lAqY41HH", "openreview": "https://openreview.net/forum?id=Q7lAqY41HH", "poster": "/media/PosterPDFs/NeurIPS%202024/97703.png?t=1733935327.6051967", "project": "", "author_site": "Xiao Yang, Kai Sun, Hao Xin, Yushi Sun, Nikita Bhalla, Xiangsen Chen, Sajal Choudhary, Rongze Gui, Ziran Jiang, Ziyu Jiang, Lingkun Kong, Brian Moran, Jiaqi Wang, Yifan Xu, An Yan, Chenyu Yang, Eting Yuan, Hanwen Zha, Nan Tang, Lei Chen, Nicolas Scheffer, Yue Liu, Nirav Shah, Rakesh Wanga, Anuj Kumar, Scott Yih, Xin Dong", "tldr": "", "abstract": "Retrieval-Augmented Generation (RAG) has recently emerged as a promising solution to alleviate Large Language Model (LLM)\u2019s deficiency in lack of knowledge. Existing RAG datasets, however, do not adequately represent the diverse and dynamic nature of real-world Question Answering (QA) tasks. To bridge this gap, we introduce the Comprehensive RAG Benchmark (CRAG), a factual question answering benchmark of 4,409 question-answer pairs and mock APIs to simulate web and Knowledge Graph (KG) search. CRAG is designed to encapsulate a diverse array of questions across five domains and eight question categories, reflecting varied entity popularity from popular to long-tail, and temporal dynamisms ranging from years to seconds. Our evaluation on this benchmark highlights the gap to fully trustworthy QA. Whereas most advanced LLMs achieve $\\le 34\\%$ accuracy on CRAG, adding RAG in a straightforward manner improves the accuracy only to 44%. State-of-the-art industry RAG solutions only answer 63% questions without any hallucination. CRAG also reveals much lower accuracy in answering questions regarding facts with higher dynamism, lower popularity, or higher complexity, suggesting future research directions. The CRAG benchmark laid the groundwork for a KDD Cup 2024 challenge, attracted thousands of participants and submissions. We commit to maintaining CRAG to serve research communities in advancing RAG solutions and general QA solutions. CRAG is available at https://github.com/facebookresearch/CRAG/.", "keywords": "RAG;Generative AI;Factuality;Question Answering", "primary_area": "", "supplementary_material": "/attachment/262aa3716f225afcd0b50c6f651c1322dda96dfa.pdf", "author": "Xiao Yang;Kai Sun;Hao Xin;Yushi Sun;Nikita Bhalla;Xiangsen Chen;Sajal Choudhary;Rongze Gui;Ziran Jiang;Ziyu JIANG;Lingkun Kong;Brian Moran;Jiaqi Wang;Yifan Ethan Xu;An Yan;Chenyu Yang;Eting Yuan;Hanwen Zha;Nan Tang;Lei Chen;Nicolas SCHEFFER;Yue Liu;Nirav Shah;Rakesh Wanga;Anuj Kumar;Wen-tau Yih;Xin Luna Dong", "authorids": "~Xiao_Yang17;~Kai_Sun5;~Hao_Xin2;~Yushi_Sun1;~Nikita_Bhalla1;~Xiangsen_Chen1;~Sajal_Choudhary1;~Rongze_Gui1;~Ziran_Jiang1;~Ziyu_JIANG2;~Lingkun_Kong1;~Brian_Moran1;~Jiaqi_Wang16;~Yifan_Ethan_Xu1;~An_Yan5;~Chenyu_Yang5;~Eting_Yuan1;~Hanwen_Zha1;~Nan_Tang3;~Lei_Chen7;~Nicolas_SCHEFFER1;~Yue_Liu18;~Nirav_Shah2;~Rakesh_Wanga1;~Anuj_Kumar3;~Wen-tau_Yih1;~Xin_Luna_Dong2", "gender": ";;M;M;F;;M;M;M;M;M;;F;M;;F;M;;M;M;M;M;M;M;M;M;F", "homepage": ";https://www.kaisun.org/;;http://ysunbp.student.ust.hk/;;https://xschen-beb.github.io;;;;https://jzwilliams07.github.io/;https://ohyoukillkenny.github.io/;;;https://scholar.google.com/citations?user=e0KdvxUAAAAJ&hl=en;;;;https://zhw12.github.io/;https://nantang.github.io/;http://www.cs.ust.hk/~leichen/;;;;https://www.linkedin.com/in/rakesh-wanga;;http://scottyih.org;http://lunadong.com", "dblp": ";09/1171-6.html;41/82;203/9664;;;205/3107.html;;;;;;;;;177/3158;;219/1743;27/104-1;c/LeiChen0002;;;;;;07/7129;d/XinLunaDong.html", "google_scholar": "JrherJ4AAAAJ;EIctlPwAAAAJ;R2ShoB8AAAAJ;j9ypp3QAAAAJ;;;nTetCPIAAAAJ;;;DEn-0KIAAAAJ;wbbtKz0AAAAJ;;;e0KdvxUAAAAJ;VfuGxZIAAAAJ;4NOsk-gAAAAJ;;https://scholar.google.com/citations?hl=en;;gtglwgYAAAAJ;2vDiK3IAAAAJ;fAMLTWYAAAAJ;;;_QMxs4kAAAAJ;8rDNIMsAAAAJ;uGsKvHoAAAAJ", "orcid": ";0000-0001-8262-4906;0000-0002-0523-6675;0000-0003-3853-6364;;;;;;;;;;0000-0002-7539-4492;;0009-0009-9726-8380;;;;0000-0002-8257-5806;;;;;;0000-0003-4263-395X;0009-0001-2049-2458", "linkedin": "xiao-yang-19808817/;;;;;;;daniel-gui-bb3032186?original_referer=https%3A%2F%2Fwww.google.com%2F;will-ziran-jiang-31869232/;williamsjiang/;;brian-moran-nlp/;jiaqi-jackie-wang-21226ba7/;;;;eting-yuan-1679252b/;;;;;yliuc/;nirav-shah-76613211?utm_source=share&utm_campaign=share_via&utm_content=profile&utm_medium=android_app;rakesh-wanga;anujkumar9?utm_source=share&utm_campaign=share_via&utm_content=profile&utm_medium=ios_app;scottyih/;", "or_profile": "~Xiao_Yang17;~Kai_Sun5;~Hao_Xin2;~Yushi_Sun1;~Nikita_Bhalla1;~Xiangsen_Chen1;~Sajal_Choudhary1;~Rongze_Gui1;~Ziran_Jiang1;~Ziyu_JIANG2;~Lingkun_Kong1;~Brian_Moran1;~Jiaqi_Wang16;~Yifan_Ethan_Xu1;~An_Yan5;~Chenyu_Yang5;~Eting_Yuan1;~Hanwen_Zha1;~Nan_Tang3;~Lei_Chen7;~Nicolas_SCHEFFER1;~Yue_Liu18;~Nirav_Shah2;~Rakesh_Wanga1;~Anuj_Kumar3;~Wen-tau_Yih1;~Xin_Luna_Dong2", "aff": "Meta Facebook;Meta;Department of Computer Science and Engineering, Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;;Hong Kong University of Science and Technology;Meta;Meta Facebook;Stanford University;The Hong Kong University of Science and Technology(Guangzhou);;Meta Facebook;Meta Facebook;Meta;Meta Facebook;HKUST(GZ);Meta Facebook;Meta;HKUST(GZ);Hong Kong University of Science and Technology;Meta Facebook;Meta Facebook;Meta Facebook;Meta Facebook;Meta;Meta Platforms, Inc.;Meta Facebook", "aff_domain": "meta.com;fb.com;cse.ust.hk;ust.hk;;connect.hkust-gz.edu.cn;meta.com;meta.com;stanford.edu;connect.hkust-gz.edu.cn;;meta.com;meta.com;meta.com;meta.com;connect.hkust-gz.edu.cn;meta.com;meta.com;hkust-gz.edu.cn;hkust.edu;meta.com;meta.com;meta.com;meta.com;meta.com;meta.com;meta.com", "position": "Researcher;Research Scientist;PhD student;PhD student;;MS student;Researcher;Researcher;MS student;MS student;;Researcher;Researcher;Staff Research Scientist;Researcher;PhD student;Researcher;Researcher;Associate Professor;Full Professor;Researcher;Researcher;manager;Director;Principal Researcher;Research Scientist;Principal Researcher", "bibtex": "@inproceedings{\nyang2024crag,\ntitle={{CRAG} - Comprehensive {RAG} Benchmark},\nauthor={Xiao Yang and Kai Sun and Hao Xin and Yushi Sun and Nikita Bhalla and Xiangsen Chen and Sajal Choudhary and Rongze Gui and Ziran Jiang and Ziyu JIANG and Lingkun Kong and Brian Moran and Jiaqi Wang and Yifan Ethan Xu and An Yan and Chenyu Yang and Eting Yuan and Hanwen Zha and Nan Tang and Lei Chen and Nicolas SCHEFFER and Yue Liu and Nirav Shah and Rakesh Wanga and Anuj Kumar and Wen-tau Yih and Xin Luna Dong},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=Q7lAqY41HH}\n}", "github": "", "reviewers": "DKoq;ZoM5;w7tu;2hh7", "pdf_size": 884186, "rating": "7;7;7;7", "confidence": "4;4;2;3", "wc_summary_and_contributions": "83;94;80;170", "wc_strengths": "26;144;13;44", "wc_improvement": "39;203;39;191", "wc_limitations": "28;14;4;11", "wc_correctness": "8;14;1;13", "wc_clarity": "1;5;1;10", "wc_relation_to_prior_work": "1;15;1;34", "wc_documentation": "1;23;1;52", "wc_additional_feedback": "1;1;1;1", "wc_review": "188;513;141;526", "wc_reply_reviewers": "10;10;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "wc_summary_and_contributions_avg": [ 106.75, 36.88749788207381 ], "wc_strengths_avg": [ 56.75, 51.56246212119821 ], "wc_improvement_avg": [ 118.0, 79.11384202527394 ], "wc_limitations_avg": [ 14.25, 8.728545125048045 ], "wc_correctness_avg": [ 9.0, 5.1478150704935 ], "wc_clarity_avg": [ 4.25, 3.6996621467371855 ], "wc_relation_to_prior_work_avg": [ 12.75, 13.534677683639163 ], "wc_documentation_avg": [ 19.25, 20.932928605429293 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 342.0, 178.33535824395565 ], "wc_reply_reviewers_avg": [ 5.0, 5.0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 27, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10094844837685293937&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "meta.com;fb.com;cse.ust.hk;ust.hk;;connect.hkust-gz.edu.cn;meta.com;meta.com;stanford.edu;connect.hkust-gz.edu.cn;;meta.com;meta.com;meta.com;meta.com;connect.hkust-gz.edu.cn;meta.com;meta.com;hkust-gz.edu.cn;hkust.edu;meta.com;meta.com;meta.com;meta.com;meta.com;meta.com;meta.com", "author_num": 27, "aff_unique_index": "0;0;1;1;1;0;0;2;1;0;0;0;0;1;0;0;1;1;0;0;0;0;0;0;0", "aff_unique_norm": "Meta;Hong Kong University of Science and Technology;Stanford University", "aff_unique_dep": "Meta Platforms, Inc.;Department of Computer Science and Engineering;", "aff_unique_url": "https://meta.com;https://www.ust.hk;https://www.stanford.edu", "aff_unique_abbr": "Meta;HKUST;Stanford", "aff_campus_unique_index": "1;1;1;2;3;3;3;1", "aff_campus_unique": ";Hong Kong SAR;Stanford;Guangzhou", "aff_country_unique_index": "0;0;1;1;1;0;0;0;1;0;0;0;0;1;0;0;1;1;0;0;0;0;0;0;0", "aff_country_unique": "United States;China" }, { "title": "Learning an Actionable Discrete Diffusion Policy via Large-Scale Actionless Video Pre-Training", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95263", "id": "Q7s8mFWqsx", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Q7s8mFWqsx", "openreview": "https://openreview.net/forum?id=Q7s8mFWqsx", "poster": "", "project": "", "author_site": "Haoran He, Chenjia Bai, Ling Pan, Weinan Zhang, Bin Zhao, Xuelong Li", "tldr": "", "abstract": "Learning a generalist embodied agent capable of completing multiple tasks poses challenges, primarily stemming from the scarcity of action-labeled robotic datasets. In contrast, a vast amount of human videos exist, capturing intricate tasks and interactions with the physical world. Promising prospects arise for utilizing actionless human videos for pre-training and transferring the knowledge to facilitate robot policy learning through limited robot demonstrations. However, it remains a challenge due to the domain gap between humans and robots. Moreover, it is difficult to extract useful information representing the dynamic world from human videos, because of its noisy and multimodal data structure. In this paper, we introduce a novel framework to tackle these challenges, which leverages a unified discrete diffusion to combine generative pre-training on human videos and policy fine-tuning on a small number of action-labeled robot videos. We start by compressing both human and robot videos into unified video tokens. In the pre-training stage, we employ a discrete diffusion model with a mask-and-replace diffusion strategy to predict future video tokens in the latent space. In the fine-tuning stage, we harness the imagined future videos to guide low-level action learning with a limited set of robot data. Experiments demonstrate that our method generates high-fidelity future videos for planning and enhances the fine-tuned policies compared to previous state-of-the-art approaches with superior performance.", "keywords": "Imitation Learning;Actionless Video Pre-Training;Video-Based Policy Learning;Discrete Diffusion", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/0678f4ccc9a5af22676a5b2d7c92f1ed7aecc9c0.zip", "author": "Haoran He;Chenjia Bai;Ling Pan;Weinan Zhang;Bin Zhao;Xuelong Li", "authorids": "~Haoran_He1;~Chenjia_Bai2;~Ling_Pan1;~Weinan_Zhang1;~Bin_Zhao7;~Xuelong_Li2", "gender": "M;M;F;M;M;M", "homepage": "https://tinnerhrhe.github.io/;https://baichenjia.github.io/;https://ling-pan.github.io/;http://wnzhang.net;https://iopen.nwpu.edu.cn/info/1347/2105.htm;", "dblp": "299/7312;247/1943;199/9303/;28/10261-1;73/4325-1.html;l/XuelongLi", "google_scholar": "Z33PHQ0AAAAJ;Rm_1y2kAAAAJ;qZ_zlacAAAAJ;Qzss0GEAAAAJ;https://scholar.google.com.hk/citations?user=DQB0hqwAAAAJ;ahUibskAAAAJ", "orcid": "0000-0002-7340-8643;;;0000-0002-0127-2425;;", "linkedin": ";;;;;", "or_profile": "~Haoran_He1;~Chenjia_Bai2;~Ling_Pan1;~Weinan_Zhang1;~Bin_Zhao7;~Xuelong_Li2", "aff": "Hong Kong University of Science and Technology;Shanghai AI Laboratory;Montreal Institute for Learning Algorithms (MILA);Shanghai Jiaotong University;Northwest Polytechnical University Xi'an;Northwestern Polytechnical University", "aff_domain": "connect.ust.hk;pjlab.org.cn;mila.umontreal.ca;sjtu.edu.cn;nwpu.edu.cn;nwpu.edu.cn", "position": "PhD student;Researcher;Postdoc;Associate Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nhe2024learning,\ntitle={Learning an Actionable Discrete Diffusion Policy via Large-Scale Actionless Video Pre-Training},\nauthor={Haoran He and Chenjia Bai and Ling Pan and Weinan Zhang and Bin Zhao and Xuelong Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Q7s8mFWqsx}\n}", "github": "", "reviewers": "wSvy;ew7s;kd9D;v4Bj", "pdf_size": 2534278, "rating": "6;6;6;7", "confidence": "5;4;4;4", "soundness": "3;4;3;3", "novelty": "3;3;2;3", "presentation": "4;3;3;3", "wc_summary": "73;70;78;96", "wc_strengths": "36;89;57;35", "wc_weaknesses": "318;55;83;194", "wc_questions": "4;25;48;120", "wc_limitations": "1;7;66;2", "wc_review": "432;246;332;447", "wc_reply_reviewers": "38;33;0;23", "wc_reply_authors": "29;134;95;36", "reply_reviewers": "1;1;0;1", "reply_authors": "2;3;2;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 79.25, 10.084022015049353 ], "wc_strengths_avg": [ 54.25, 21.901769334919038 ], "wc_weaknesses_avg": [ 162.5, 103.74126469250315 ], "wc_questions_avg": [ 49.25, 43.71141155350625 ], "wc_limitations_avg": [ 19.0, 27.230497608380205 ], "wc_review_avg": [ 364.25, 81.33380293580278 ], "wc_reply_reviewers_avg": [ 23.5, 14.603081866510234 ], "wc_reply_authors_avg": [ 73.5, 43.32724316177986 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16692345951268172859&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 2, "email": "connect.ust.hk;pjlab.org.cn;mila.umontreal.ca;sjtu.edu.cn;nwpu.edu.cn;nwpu.edu.cn", "author_num": 6, "aff_unique_index": "0;1;2;3;4;5", "aff_unique_norm": "Hong Kong University of Science and Technology;Shanghai AI Laboratory;Montreal Institute for Learning Algorithms;Shanghai Jiao Tong University;Northwest Polytechnical University;Northwestern Polytechnical University", "aff_unique_dep": ";;Artificial Intelligence;;;", "aff_unique_url": "https://www.ust.hk;https://www.shanghai-ai-lab.com;https://mila.quebec;https://www.sjtu.edu.cn;http://www.nwpu.edu.cn;https://www.nwpu.edu.cn", "aff_unique_abbr": "HKUST;SAIL;MILA;SJTU;NWPU;NWPU", "aff_campus_unique_index": "0;2;3", "aff_campus_unique": "Hong Kong SAR;;Montreal;Xi'an", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "China;Canada" }, { "title": "AsEP: Benchmarking Deep Learning Methods for Antibody-specific Epitope Prediction", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97702", "id": "Q7xKdEMrrZ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Q7xKdEMrrZ", "openreview": "https://openreview.net/forum?id=Q7xKdEMrrZ", "poster": "/media/PosterPDFs/NeurIPS%202024/97702.png?t=1731599612.946919", "project": "", "author_site": "ChuNan Liu, Lilian Denzler, Yihong Chen, Andrew Martin, Brooks Paige", "tldr": "", "abstract": "Epitope identification is vital for antibody design yet challenging due to the inherent variability in antibodies. While many deep learning methods have been developed for general protein binding site prediction tasks, whether they work for epitope prediction remains an understudied research question. The challenge is also heightened by the lack of a consistent evaluation pipeline with sufficient dataset size and epitope diversity. We introduce a filtered antibody-antigen complex structure dataset, AsEP (Antibody-specific Epitope Prediction). AsEP is the largest of its kind and provides clustered epitope groups, allowing the community to develop and test novel epitope prediction methods and evaluate their generalisability. AsEP comes with an easy-to-use interface in Python and pre-built graph representations of each antibody-antigen complex while also supporting customizable embedding methods. Using this new dataset, we benchmark several representative general protein-binding site prediction methods and find that their performances fall short of expectations for epitope prediction. To address this, we propose a novel method, WALLE, which leverages both unstructured modeling from protein language models and structural modeling from graph neural networks. WALLE demonstrate up to 3-10X performance improvement over the baseline methods. Our empirical findings suggest that epitope prediction benefits from combining sequential features provided by language models with geometrical information from graph representations. This provides a guideline for future epitope prediction method design. In addition, we reformulate the task as bipartite link prediction, allowing convenient model performance attribution and interpretability. We open source our data and code at https://github.com/biochunan/AsEP-dataset.", "keywords": "Dataset; Antibody; Epitope; Protein Structures; Language Model; Graph Neural Networks", "primary_area": "", "supplementary_material": "/attachment/16df4e8631e95dafb7b3e8292e058940b91c38d5.zip", "author": "ChuNan Liu;Lilian Denzler;Yihong Chen;Andrew CR Martin;Brooks Paige", "authorids": "~ChuNan_Liu1;~Lilian_Denzler1;~Yihong_Chen3;~Andrew_CR_Martin1;~Brooks_Paige1", "gender": "M;F;;M;M", "homepage": "https://github.com/biochunan;https://liliandenzler.github.io/;;http://www.bioinf.org.uk;https://tbrx.github.io", "dblp": ";;;;https://dblp.uni-trier.de/pers/p/Paige:Brooks", "google_scholar": "sxwjx9EAAAAJ;;;;JrFJmx0AAAAJ", "orcid": "0000-0001-5957-4197;;;;", "linkedin": "chunan-liu/;liliandenzler/;;;", "or_profile": "~ChuNan_Liu1;~Lilian_Denzler1;~Yihong_Chen3;~Andrew_CR_Martin1;~Brooks_Paige1", "aff": "University College London;University College London, University of London;;University College London, University of London;University College London", "aff_domain": "ucl.ac.uk;ucl.ac.uk;;ucl.ac.uk;ucl.ac.uk", "position": "PhD student;PhD student;;Emeritus;Associate Professor", "bibtex": "@inproceedings{\nliu2024asep,\ntitle={As{EP}: Benchmarking Deep Learning Methods for Antibody-specific Epitope Prediction},\nauthor={ChuNan Liu and Lilian Denzler and Yihong Chen and Andrew CR Martin and Brooks Paige},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=Q7xKdEMrrZ}\n}", "github": "", "reviewers": "TnvS;JQrQ;bJmy;cuAf", "pdf_size": 5907468, "rating": "7;7;7;8", "confidence": "3;4;4;3", "wc_summary_and_contributions": "75;100;113;128", "wc_strengths": "111;77;69;138", "wc_improvement": "152;41;6;268", "wc_limitations": "93;82;14;59", "wc_correctness": "53;38;11;60", "wc_clarity": "8;10;10;73", "wc_relation_to_prior_work": "25;36;12;47", "wc_documentation": "24;24;18;75", "wc_additional_feedback": "1;1;1;1", "wc_review": "542;409;254;849", "wc_reply_reviewers": "24;18;53;102", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "wc_summary_and_contributions_avg": [ 104.0, 19.45507645834372 ], "wc_strengths_avg": [ 98.75, 27.60774347895894 ], "wc_improvement_avg": [ 116.75, 102.61913807862547 ], "wc_limitations_avg": [ 62.0, 30.306764921383476 ], "wc_correctness_avg": [ 40.5, 18.7949461292125 ], "wc_clarity_avg": [ 25.25, 27.580563808595358 ], "wc_relation_to_prior_work_avg": [ 30.0, 12.98075498574717 ], "wc_documentation_avg": [ 35.25, 23.080023830143677 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 513.5, 218.87953307698734 ], "wc_reply_reviewers_avg": [ 49.25, 33.20673877392961 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11903024855663125217&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 6, "email": "ucl.ac.uk;ucl.ac.uk;;ucl.ac.uk;ucl.ac.uk", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University College London", "aff_unique_dep": "", "aff_unique_url": "https://www.ucl.ac.uk", "aff_unique_abbr": "UCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "MoE Jetpack: From Dense Checkpoints to Adaptive Mixture of Experts for Vision Tasks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95262", "id": "Q8Z04XhDdL", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Q8Z04XhDdL", "openreview": "https://openreview.net/forum?id=Q8Z04XhDdL", "poster": "/media/PosterPDFs/NeurIPS%202024/95262.png?t=1731415042.6729841", "project": "", "author_site": "xingkui zhu, Yiran Guan, Dingkang Liang, Yuchao Chen, Yuliang Liu, Xiang Bai", "tldr": "", "abstract": "The sparsely activated mixture of experts (MoE) model presents an effective alternative to densely activated (dense) models, combining improved accuracy with computational efficiency. However, training MoE models from scratch requires extensive data and computational resources, a challenge that limits their widespread adoption. To address this, we introduce MoE Jetpack, a framework designed to fine-tune the abundant and easily accessible dense checkpoints into MoE models. MoE Jetpack incorporates two key techniques: (1) **checkpoint recycling**, which initializes MoE models with dense checkpoints to accelerate convergence and enhance accuracy, minimizing the need for extensive pre-training; (2) the **hyperspherical adaptive MoE (SpheroMoE) layer**, which optimizes the MoE architecture to enhance fine-tuning performance and efficiency.\nExperimental results indicate that MoE Jetpack doubles the convergence speed and enhances accuracy by 2.8% on ImageNet-1K. On smaller datasets, it achieves up to 8-fold faster convergence and over 30% accuracy gains, highlighting its efficiency.\nThe code is available at https://github.com/Adlith/MoE-Jetpack.", "keywords": "deep learning;computer vision;mixture of experts;weight initialization;fine-tuning", "primary_area": "machine_vision", "supplementary_material": "", "author": "Xingkui Zhu;Yiran Guan;Dingkang Liang;Yuchao Chen;Yuliang Liu;Xiang Bai", "authorids": "~Xingkui_Zhu1;~Yiran_Guan1;~Dingkang_Liang2;~Yuchao_Chen1;~Yuliang_Liu2;~Xiang_Bai1", "gender": "M;M;M;M;M;M", "homepage": ";https://1ranGuan.github.io;https://github.com/chen-yu-chao;https://github.com/Yuliang-Liu;http://mclab.eic.hust.edu.cn/~xbai;https://dk-liang.github.io/", "dblp": "300/4541;328/4507;;;59/2741;255/6274", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.hk/citations?user=1Gx_r4wAAAAJ;;;UeltiQ4AAAAJ;Tre69v0AAAAJ", "orcid": "0009-0008-0561-4390;0009-0000-3407-7581;;;;", "linkedin": ";;;;;", "or_profile": "~Xingkui_Zhu1;~Yiran_Guan1;~Yuchao_Chen1;~Yuliang_Liu2;~Xiang_Bai1;~dingkang_liang1", "aff": "Huazhong University of Science and Technology;Huazhong University of Science and Technology;Huazhong University of Science and Technology;Huazhong University of Science and Technology;Huazhong University of Science and Technology;Huazhong University of Science and Technology", "aff_domain": "hust.edu.cn;hust.edu.cn;hust.edu.cn;hust.edu.cn;hust.edu.cn;hust.edu.cn", "position": "PhD student;Undergrad student;Undergrad student;Full Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\nzhu2024moe,\ntitle={MoE Jetpack: From Dense Checkpoints to Adaptive Mixture of Experts for Vision Tasks},\nauthor={Xingkui Zhu and Yiran Guan and Dingkang Liang and Yuchao Chen and Yuliang Liu and Xiang Bai},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Q8Z04XhDdL}\n}", "github": "", "reviewers": "hQZu;qYPk;7BZ9;ysa3", "pdf_size": 8458915, "rating": "5;5;6;7", "confidence": "5;3;5;5", "soundness": "3;3;2;4", "novelty": "3;3;3;3", "presentation": "2;3;3;4", "wc_summary": "59;68;228;74", "wc_strengths": "35;33;180;122", "wc_weaknesses": "96;180;282;215", "wc_questions": "47;39;88;25", "wc_limitations": "5;33;12;1", "wc_review": "242;353;790;437", "wc_reply_reviewers": "778;0;0;47", "wc_reply_authors": "1549;0;0;30", "reply_reviewers": "5;0;0;1", "reply_authors": "7;1;1;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 107.25, 69.91914973739311 ], "wc_strengths_avg": [ 92.5, 61.99395131785036 ], "wc_weaknesses_avg": [ 193.25, 67.04988814308344 ], "wc_questions_avg": [ 49.75, 23.44541533008106 ], "wc_limitations_avg": [ 12.75, 12.336429791475327 ], "wc_review_avg": [ 455.5, 205.13471183590553 ], "wc_reply_reviewers_avg": [ 206.25, 330.65720542580044 ], "wc_reply_authors_avg": [ 394.75, 666.5190826225457 ], "reply_reviewers_avg": [ 1.5, 2.0615528128088303 ], "reply_authors_avg": [ 2.75, 2.48746859276655 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4993198571982547726&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "hust.edu.cn;hust.edu.cn;hust.edu.cn;hust.edu.cn;hust.edu.cn;hust.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Huazhong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.hust.edu.cn", "aff_unique_abbr": "HUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Bridge-IF: Learning Inverse Protein Folding with Markov Bridges", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95261", "id": "Q8yfhrBBD8", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Q8yfhrBBD8", "openreview": "https://openreview.net/forum?id=Q8yfhrBBD8", "poster": "/media/PosterPDFs/NeurIPS%202024/95261.png?t=1733487300.3892567", "project": "", "author_site": "Yiheng Zhu, Jialu Wu, Qiuyi Li, Jiahuan Yan, Mingze Yin, Wei Wu, Mingyang Li, Jieping Ye, Zheng Wang, Jian Wu", "tldr": "", "abstract": "Inverse protein folding is a fundamental task in computational protein design, which aims to design protein sequences that fold into the desired backbone structures. While the development of machine learning algorithms for this task has seen significant success, the prevailing approaches, which predominantly employ a discriminative formulation, frequently encounter the error accumulation issue and often fail to capture the extensive variety of plausible sequences. To fill these gaps, we propose Bridge-IF, a generative diffusion bridge model for inverse folding, which is designed to learn the probabilistic dependency between the distributions of backbone structures and protein sequences. Specifically, we harness an expressive structure encoder to propose a discrete, informative prior derived from structures, and establish a Markov bridge to connect this prior with native sequences. During the inference stage, Bridge-IF progressively refines the prior sequence, culminating in a more plausible design. Moreover, we introduce a reparameterization perspective on Markov bridge models, from which we derive a simplified loss function that facilitates more effective training. We also modulate protein language models (PLMs) with structural conditions to precisely approximate the Markov bridge process, thereby significantly enhancing generation performance while maintaining parameter-efficient training. Extensive experiments on well-established benchmarks demonstrate that Bridge-IF predominantly surpasses existing baselines in sequence recovery and excels in the design of plausible proteins with high foldability. The code is available at https://github.com/violet-sto/Bridge-IF.", "keywords": "Inverse Protein Folding;Markov Bridges;Protein Language Models", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "", "author": "Yiheng Zhu;Jialu Wu;Qiuyi Li;Jiahuan Yan;Mingze Yin;Wei Wu;Mingyang Li;Jieping Ye;Zheng Wang;Jian Wu", "authorids": "~Yiheng_Zhu3;~Jialu_Wu1;~Qiuyi_Li1;~Jiahuan_Yan1;~Mingze_Yin1;~Wei_Wu25;~Mingyang_Li5;~Jieping_Ye4;~Zheng_Wang32;~Jian_Wu6", "gender": "M;;M;M;;M;;M;;M", "homepage": ";;;;;https://github.com/U-rara;;http://yelabs.net/;;https://scholar.google.com/citations?hl=zh-TW&user=VO9XIXYAAAAJ", "dblp": ";;;334/7537;;95/6985-45.html;;03/5454;;96/2744-1", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;https://scholar.google.com/citations?hl=en;_wQJGDcAAAAJ;;;;T9AzhwcAAAAJ;;https://scholar.google.com/citations?hl=zh-TW", "orcid": "0000-0001-8020-9979;;;0000-0002-2002-2579;;0009-0009-1590-601X;;0000-0001-8662-5818;;", "linkedin": ";;;;;;;;;", "or_profile": "~Yiheng_Zhu3;~Jialu_Wu1;~Qiuyi_Li1;~Jiahuan_Yan1;~Mingze_Yin1;~Wei_Wu25;~Mingyang_Li5;~Jieping_Ye4;~Zheng_Wang32;~Jian_Wu6", "aff": "Zhejiang University;;Alibaba Group;Zhejiang University;;University of Science and Technology of China;;Alibaba Group;;Zhejiang University", "aff_domain": "zju.edu.cn;;alibaba-inc.com;zju.edu.cn;;ustc.edu.cn;;alibaba-inc.com;;zju.edu.cn", "position": "PhD student;;Postdoc;PhD student;;PhD student;;Principal Researcher;;Full Professor", "bibtex": "@inproceedings{\nzhu2024bridgeif,\ntitle={Bridge-{IF}: Learning Inverse Protein Folding with Markov Bridges},\nauthor={Yiheng Zhu and Jialu Wu and Qiuyi Li and Jiahuan Yan and Mingze Yin and Wei Wu and Mingyang Li and Jieping Ye and Zheng Wang and Jian Wu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Q8yfhrBBD8}\n}", "github": "", "reviewers": "pPDQ;HBUp;r7sf;3QQQ", "pdf_size": 4899075, "rating": "3;6;6;6", "confidence": "4;3;5;4", "soundness": "2;3;2;3", "novelty": "2;2;3;3", "presentation": "3;3;3;2", "wc_summary": "59;68;81;55", "wc_strengths": "23;54;50;46", "wc_weaknesses": "66;94;356;75", "wc_questions": "62;6;212;52", "wc_limitations": "21;11;9;1", "wc_review": "231;233;708;229", "wc_reply_reviewers": "601;32;775;29", "wc_reply_authors": "2386;25;1655;25", "reply_reviewers": "3;1;3;1", "reply_authors": "6;2;5;2", "rating_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 65.75, 9.98436277385793 ], "wc_strengths_avg": [ 43.25, 12.028611723719408 ], "wc_weaknesses_avg": [ 147.75, 120.65731432449506 ], "wc_questions_avg": [ 83.0, 77.41446893184762 ], "wc_limitations_avg": [ 10.5, 7.123903424387503 ], "wc_review_avg": [ 350.25, 206.55190025753816 ], "wc_reply_reviewers_avg": [ 359.25, 334.45805043383245 ], "wc_reply_authors_avg": [ 1022.75, 1030.679478548011 ], "reply_reviewers_avg": [ 2.0, 1.0 ], "reply_authors_avg": [ 3.75, 1.7853571071357126 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9063551936638283155&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "zju.edu.cn;;alibaba-inc.com;zju.edu.cn;;ustc.edu.cn;;alibaba-inc.com;;zju.edu.cn", "author_num": 10, "aff_unique_index": "0;1;0;2;1;0", "aff_unique_norm": "Zhejiang University;Alibaba Group;University of Science and Technology of China", "aff_unique_dep": ";;", "aff_unique_url": "https://www.zju.edu.cn;https://www.alibaba.com;http://www.ustc.edu.cn", "aff_unique_abbr": "ZJU;Alibaba;USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "FashionR2R: Texture-preserving Rendered-to-Real Image Translation with Diffusion Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95260", "id": "QAEnr5j172", "proceeding": "", "pdf": "https://openreview.net/pdf?id=QAEnr5j172", "openreview": "https://openreview.net/forum?id=QAEnr5j172", "poster": "/media/PosterPDFs/NeurIPS%202024/95260.png?t=1733212275.6956651", "project": "", "author_site": "Rui Hu, Qian He, Gaofeng He, Jiedong Zhuang, Huang Chen, Huafeng Liu, Huamin Wang", "tldr": "", "abstract": "Modeling and producing lifelike clothed human images has attracted researchers' attention from different areas for decades, with the complexity from highly articulated and structured content. Rendering algorithms decompose and simulate the imaging process of a camera, while are limited by the accuracy of modeled variables and the efficiency of computation. Generative models can produce impressively vivid human images, however still lacking in controllability and editability. This paper studies photorealism enhancement of rendered images, leveraging generative power from diffusion models on the controlled basis of rendering. We introduce a novel framework to translate rendered images into their realistic counterparts, which consists of two stages: Domain Knowledge Injection (DKI) and Realistic Image Generation (RIG). In DKI, we adopt positive (real) domain finetuning and negative (rendered) domain embedding to inject knowledge into a pretrained Text-to-image (T2I) diffusion model. In RIG, we generate the realistic image corresponding to the input rendered image, with a Texture-preserving Attention Control (TAC) to preserve fine-grained clothing textures, exploiting the decoupled features encoded in the UNet structure. Additionally, we introduce SynFashion dataset, featuring high-quality digital clothing images with diverse textures. Extensive experimental results demonstrate the superiority and effectiveness of our method in rendered-to-real image translation.", "keywords": "rendered-to-real;photo-realism enhancement;image-to-image translation;diffusion model", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Rui Hu;Qian He;Gaofeng He;Jiedong Zhuang;Huang Chen;Huafeng Liu;Huamin Wang", "authorids": "~Rui_Hu8;~Qian_He1;~Gaofeng_He1;~Jiedong_Zhuang1;~Huang_Chen3;~Huafeng_Liu1;~Huamin_Wang2", "gender": "M;;M;M;M;M;M", "homepage": ";;https://www.aiuai.cn;https://github.com/pumpkin805;https://person.zju.edu.cn/0003436;https://wanghmin.github.io/;https://github.com/orayel", "dblp": ";69/6357-1;;305/1138;;;", "google_scholar": "AN_J0DYAAAAJ;hJSlrFAAAAAJ;;;;https://scholar.google.co.uk/citations?user=iHpqfoQAAAAJ;", "orcid": "0000-0002-5322-2251;;;;;0000-0002-8153-2337;", "linkedin": ";;;;;huamin-wang-8583a77a/;", "or_profile": "~Rui_Hu8;~Qian_He1;~Gaofeng_He1;~Jiedong_Zhuang1;~Huafeng_Liu1;~Huamin_Wang1;~ChenHuang1", "aff": "Zhejiang University;Style3D;Style3D Research;Zhejiang University;Zhejiang University;Style3D Research;Linctex", "aff_domain": "zju.edu.cn;linctex.com;linctex.com;zju.edu.cn;zju.edu.cn;style3d.com;linctex.com", "position": "PhD student;Researcher;Researcher;PhD student;Full Professor;Principal Researcher;Researcher", "bibtex": "@inproceedings{\nhu2024fashionrr,\ntitle={FashionR2R: Texture-preserving Rendered-to-Real Image Translation with Diffusion Models},\nauthor={Rui Hu and Qian He and Gaofeng He and Jiedong Zhuang and Huang Chen and Huafeng Liu and Huamin Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=QAEnr5j172}\n}", "github": "", "reviewers": "cE2m;m8pA;L3jv;SbYx", "pdf_size": 43557357, "rating": "3;5;6;6", "confidence": "4;3;3;3", "soundness": "3;2;2;3", "novelty": "1;3;3;3", "presentation": "3;2;2;3", "wc_summary": "97;65;76;37", "wc_strengths": "119;76;115;63", "wc_weaknesses": "255;61;215;30", "wc_questions": "154;100;73;40", "wc_limitations": "14;31;14;37", "wc_review": "639;333;493;207", "wc_reply_reviewers": "65;47;0;158", "wc_reply_authors": "551;21;0;29", "reply_reviewers": "1;1;0;1", "reply_authors": "3;2;1;2", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 68.75, 21.637640814099857 ], "wc_strengths_avg": [ 93.25, 24.23195204683271 ], "wc_weaknesses_avg": [ 140.25, 96.42451711053575 ], "wc_questions_avg": [ 91.75, 41.75149697915034 ], "wc_limitations_avg": [ 24.0, 10.222524150130436 ], "wc_review_avg": [ 418.0, 162.9509128541476 ], "wc_reply_reviewers_avg": [ 67.5, 57.38684518249805 ], "wc_reply_authors_avg": [ 150.25, 231.61538701044884 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.9428090415820632, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5228299208229495239&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "zju.edu.cn;linctex.com;linctex.com;zju.edu.cn;zju.edu.cn;style3d.com;linctex.com", "author_num": 7, "aff_unique_index": "0;1;2;0;0;2;3", "aff_unique_norm": "Zhejiang University;Style3D;Style3D Research;Linctex", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.zju.edu.cn;;;", "aff_unique_abbr": "ZJU;;;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China;" }, { "title": "What makes unlearning hard and what to do about it", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95259", "id": "QAbhLBF72K", "proceeding": "", "pdf": "https://openreview.net/pdf?id=QAbhLBF72K", "openreview": "https://openreview.net/forum?id=QAbhLBF72K", "poster": "/media/PosterPDFs/NeurIPS%202024/95259.png?t=1733155045.1892438", "project": "", "author_site": "KAIRAN ZHAO, Meghdad Kurmanji, George-Octavian B\u0103rbulescu, Eleni Triantafillou, Peter Triantafillou", "tldr": "", "abstract": "Machine unlearning is the problem of removing the effect of a subset of training data (the ``forget set'') from a trained model without damaging the model's utility e.g. to comply with users' requests to delete their data, or remove mislabeled, poisoned or otherwise problematic data.\nWith unlearning research still being at its infancy, many fundamental open questions exist: \nAre there interpretable characteristics of forget sets that substantially affect the difficulty of the problem? \nHow do these characteristics affect different state-of-the-art algorithms?\nWith this paper, we present the first investigation aiming to answer these questions. We identify two key factors affecting unlearning difficulty and the performance of unlearning algorithms. Evaluation on forget sets that isolate these identified factors reveals previously-unknown behaviours of state-of-the-art algorithms that don't materialize on random forget sets.\nBased on our insights, we develop a framework coined Refined-Unlearning Meta-algorithm (RUM) that encompasses: (i) refining the forget set into homogenized subsets, according to different characteristics; and (ii) a meta-algorithm that employs existing algorithms to unlearn each subset and finally delivers a model that has unlearned the overall forget set. \nWe find that RUM substantially improves top-performing unlearning algorithms. \nOverall, we view our work as an important step in (i) deepening our scientific understanding of unlearning and (ii) revealing new pathways to improving the state-of-the-art.", "keywords": "machine unlearning", "primary_area": "other", "supplementary_material": "", "author": "Kairan Zhao;Meghdad Kurmanji;George-Octavian B\u0103rbulescu;Eleni Triantafillou;Peter Triantafillou", "authorids": "~Kairan_Zhao1;~Meghdad_Kurmanji1;~George-Octavian_B\u0103rbulescu1;~Eleni_Triantafillou1;~Peter_Triantafillou1", "gender": ";M;;F;", "homepage": "https://kairanzhao.github.io/;;;http://www.cs.toronto.edu/~eleni/;https://warwick.ac.uk/fac/sci/dcs/people/peter_triantafillou/", "dblp": "336/7794;;;183/8430;t/PeterTriantafillou", "google_scholar": ";7t9HbecAAAAJ;hUK-vOMAAAAJ;Y5x2ZgQAAAAJ;", "orcid": ";0009-0007-5089-7759;;;", "linkedin": ";meghdad-kurmanji-948380113/;;;", "or_profile": "~Kairan_Zhao1;~Meghdad_Kurmanji1;~George-Octavian_B\u0103rbulescu1;~Eleni_Triantafillou1;~Peter_Triantafillou1", "aff": "University of Warwick;University of Warwick;University of Warwick;Google;University of Warwick", "aff_domain": "warwick.ac.uk;warwick.ac.uk;warwick.ac.uk;google.com;warwick.ac.uk", "position": "PhD student;PhD student;PhD student;Researcher;Full Professor", "bibtex": "@inproceedings{\nzhao2024what,\ntitle={What makes unlearning hard and what to do about it},\nauthor={Kairan Zhao and Meghdad Kurmanji and George-Octavian B{\\u{a}}rbulescu and Eleni Triantafillou and Peter Triantafillou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=QAbhLBF72K}\n}", "github": "", "reviewers": "3Cxb;Bp7U;7Yis;HxTF", "pdf_size": 2679813, "rating": "5;5;6;7", "confidence": "3;3;3;4", "soundness": "2;3;2;4", "novelty": "2;3;3;3", "presentation": "4;2;3;2", "wc_summary": "176;56;59;96", "wc_strengths": "177;43;69;113", "wc_weaknesses": "502;146;436;377", "wc_questions": "78;20;2;25", "wc_limitations": "87;4;3;5", "wc_review": "1020;269;569;616", "wc_reply_reviewers": "1266;30;184;696", "wc_reply_authors": "859;83;222;50", "reply_reviewers": "2;1;1;2", "reply_authors": "3;2;3;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 96.75, 48.39098573081561 ], "wc_strengths_avg": [ 100.5, 50.76169815914357 ], "wc_weaknesses_avg": [ 365.25, 134.08462812716454 ], "wc_questions_avg": [ 31.25, 28.314086600135983 ], "wc_limitations_avg": [ 24.75, 35.947009611371016 ], "wc_review_avg": [ 618.5, 267.3055367926373 ], "wc_reply_reviewers_avg": [ 544.0, 484.29949411495363 ], "wc_reply_authors_avg": [ 303.5, 327.1486665111139 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14668039004068001853&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "warwick.ac.uk;warwick.ac.uk;warwick.ac.uk;google.com;warwick.ac.uk", "author_num": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "University of Warwick;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.warwick.ac.uk;https://www.google.com", "aff_unique_abbr": "Warwick;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "United Kingdom;United States" }, { "title": "Cherry on Top: Parameter Heterogeneity and Quantization in Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95258", "id": "QAiKLaCrKj", "proceeding": "", "pdf": "https://openreview.net/pdf?id=QAiKLaCrKj", "openreview": "https://openreview.net/forum?id=QAiKLaCrKj", "poster": "/media/PosterPDFs/NeurIPS%202024/95258.png?t=1731687219.522369", "project": "", "author_site": "Wanyun Cui, Qianle Wang", "tldr": "", "abstract": "This paper reveals the phenomenon of parameter heterogeneity in large language models (LLMs). We find that a small subset of ``cherry'' parameters exhibit a disproportionately large influence on model performance, while the vast majority of parameters have minimal impact. This heterogeneity is found to be prevalent across different model families, scales, and types. Motivated by this observation, we propose CherryQ, a novel quantization method that unifies the optimization of mixed-precision parameters. CherryQ identifies and preserves the critical cherry parameters in high precision while aggressively quantizing the remaining parameters to low precision. Extensive experiments demonstrate the effectiveness of CherryQ. CherryQ outperforms existing quantization approaches in terms of perplexity and downstream task performance. Notably, our 3-bit quantized Vicuna-1.5 exhibits competitive performance compared to their 16-bit counterparts. These findings highlight the potential of CherryQ for enabling efficient deployment of LLMs by taking advantage of parameter heterogeneity.", "keywords": "large language models;parameter quantization", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/87b0383f0c2092ea89f73f97faa69c3b3540bb61.zip", "author": "Wanyun Cui;Qianle Wang", "authorids": "~Wanyun_Cui1;~Qianle_Wang1", "gender": "M;M", "homepage": "https://cuiwanyun.github.io/;https://github.com/wangitu", "dblp": "116/5305;", "google_scholar": "https://scholar.google.com.hk/citations?user=FP7ANisAAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Wanyun_Cui1;~Qianle_Wang1", "aff": "Shanghai University of Finance and Economics;SUFE", "aff_domain": "shufe.edu.cn;sufe.edu", "position": "Associate Professor;MS student", "bibtex": "@inproceedings{\ncui2024cherry,\ntitle={Cherry on Top: Parameter Heterogeneity and Quantization in Large Language Models},\nauthor={Wanyun Cui and Qianle Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=QAiKLaCrKj}\n}", "github": "", "reviewers": "P3Ag;GdR4;YuKp", "pdf_size": 1118292, "rating": "5;5;7", "confidence": "4;3;4", "soundness": "2;2;3", "novelty": "3;2;3", "presentation": "3;2;4", "wc_summary": "68;91;250", "wc_strengths": "25;38;59", "wc_weaknesses": "277;81;170", "wc_questions": "3;1;44", "wc_limitations": "3;1;14", "wc_review": "376;212;537", "wc_reply_reviewers": "151;0;21", "wc_reply_authors": "25;0;106", "reply_reviewers": "1;0;1", "reply_authors": "2;1;2", "rating_avg": [ 5.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 136.33333333333334, 80.92108638804983 ], "wc_strengths_avg": [ 40.666666666666664, 14.007934259633796 ], "wc_weaknesses_avg": [ 176.0, 80.12906255951499 ], "wc_questions_avg": [ 16.0, 19.8158185969358 ], "wc_limitations_avg": [ 6.0, 5.715476066494082 ], "wc_review_avg": [ 375.0, 132.68257861025563 ], "wc_reply_reviewers_avg": [ 57.333333333333336, 66.78489516516606 ], "wc_reply_authors_avg": [ 43.666666666666664, 45.24255617103093 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.49999999999999983, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7092242032715931380&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "shufe.edu.cn;sufe.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Shanghai University of Finance and Economics", "aff_unique_dep": "", "aff_unique_url": "http://www.sufe.edu.cn", "aff_unique_abbr": "SUFE", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "An Offline Adaptation Framework for Constrained Multi-Objective Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95257", "id": "QB6CvDqa6b", "proceeding": "", "pdf": "https://openreview.net/pdf?id=QB6CvDqa6b", "openreview": "https://openreview.net/forum?id=QB6CvDqa6b", "poster": "/media/PosterPDFs/NeurIPS%202024/95257.png?t=1729822606.4679687", "project": "", "author_site": "Qian Lin, Zongkai Liu, Danying Mo, Chao Yu", "tldr": "", "abstract": "In recent years, significant progress has been made in multi-objective reinforcement learning (RL) research, which aims to balance multiple objectives by incorporating preferences for each objective. In most existing studies, specific preferences must be provided during deployment to indicate the desired policies explicitly. However, designing these preferences depends heavily on human prior knowledge, which is typically obtained through extensive observation of high-performing demonstrations with expected behaviors. In this work, we propose a simple yet effective offline adaptation framework for multi-objective RL problems without assuming handcrafted target preferences, but only given several demonstrations to implicitly indicate the preferences of expected policies. Additionally, we demonstrate that our framework can naturally be extended to meet constraints on safety-critical objectives by utilizing safe demonstrations, even when the safety thresholds are unknown. Empirical results on offline multi-objective and safe tasks demonstrate the capability of our framework to infer policies that align with real preferences while meeting the constraints implied by the provided demonstrations.", "keywords": "Multi-Objective RL;safe RL;offline RL", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/b2b870c3a8de07a1200790734fde78e444a55387.zip", "author": "Qian Lin;Zongkai Liu;Danying Mo;Chao Yu", "authorids": "~Qian_Lin3;~Zongkai_Liu1;~Danying_Mo1;~Chao_Yu2", "gender": "M;;;M", "homepage": "https://qianlin04.github.io/;https://dblp.org/pid/214/0917;;https://cse.sysu.edu.cn/teacher/YuChao", "dblp": ";214/0917;;36/6789-4", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;;;", "orcid": "0000-0003-0724-8909;;0000-0001-6308-6591;", "linkedin": ";;;", "or_profile": "~Qian_Lin3;~Zongkai_Liu1;~Danying_Mo1;~Chao_Yu2", "aff": "SUN YAT-SEN UNIVERSITY;SUN YAT-SEN UNIVERSITY;Sun Yat-Sen University;SUN YAT-SEN UNIVERSITY", "aff_domain": "sysu.edu.cn;sysu.edu.cn;mail2.sysu.edu.cn;mail.sysu.edu.cn", "position": "MS student;PhD student;MS student;Associate Professor", "bibtex": "@inproceedings{\nlin2024an,\ntitle={An Offline Adaptation Framework for Constrained Multi-Objective Reinforcement Learning},\nauthor={Qian Lin and Zongkai Liu and Danying Mo and Chao Yu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=QB6CvDqa6b}\n}", "github": "", "reviewers": "TEUx;58tv;ZqLH;FywX", "pdf_size": 1555651, "rating": "6;6;6;7", "confidence": "4;3;3;3", "soundness": "3;4;2;3", "novelty": "3;3;3;3", "presentation": "3;4;3;3", "wc_summary": "74;29;159;118", "wc_strengths": "89;42;85;294", "wc_weaknesses": "55;160;327;255", "wc_questions": "283;72;75;165", "wc_limitations": "6;5;17;126", "wc_review": "507;308;663;958", "wc_reply_reviewers": "27;0;202;16", "wc_reply_authors": "16;21;515;0", "reply_reviewers": "1;0;1;1", "reply_authors": "2;2;2;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 95.0, 48.53349358947901 ], "wc_strengths_avg": [ 127.5, 97.8787515245265 ], "wc_weaknesses_avg": [ 199.25, 102.19680768008362 ], "wc_questions_avg": [ 148.75, 86.04758857748426 ], "wc_limitations_avg": [ 38.5, 50.7370673176919 ], "wc_review_avg": [ 609.0, 237.55104714566087 ], "wc_reply_reviewers_avg": [ 61.25, 81.82718069199257 ], "wc_reply_authors_avg": [ 138.0, 217.79921946600268 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:TrblJD6CbhEJ:scholar.google.com/&scioq=An+Offline+Adaptation+Framework+for+Constrained+Multi-Objective+Reinforcement+Learning&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "sysu.edu.cn;sysu.edu.cn;mail2.sysu.edu.cn;mail.sysu.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Sun Yat-sen University", "aff_unique_dep": "", "aff_unique_url": "http://www.sysu.edu.cn", "aff_unique_abbr": "SYSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Iteration Head: A Mechanistic Study of Chain-of-Thought", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95256", "id": "QBCxWpOt5w", "proceeding": "", "pdf": "https://openreview.net/pdf?id=QBCxWpOt5w", "openreview": "https://openreview.net/forum?id=QBCxWpOt5w", "poster": "/media/PosterPDFs/NeurIPS%202024/95256.png?t=1731702620.7202322", "project": "", "author_site": "Vivien Cabannes, Charles Arnal, Wassim Bouaziz, Xingyu Yang, Francois Charton, Julia Kempe", "tldr": "", "abstract": "Chain-of-Thought (CoT) reasoning is known to improve Large Language Models both empirically and in terms of theoretical approximation power.\nHowever, our understanding of the inner workings and conditions of apparition of CoT capabilities remains limited.\nThis paper helps fill this gap by demonstrating how CoT reasoning emerges in transformers in a controlled and interpretable setting.\nIn particular, we observe the appearance of a specialized attention mechanism dedicated to iterative reasoning, which we coined \"iteration heads\".\nWe track both the emergence and the precise working of these iteration heads down to the attention level, and measure the transferability of the CoT skills to which they give rise between tasks.", "keywords": "chain-of-thought;mechanistic interpretability;controlled experiments", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Vivien Cabannes;Charles Arnal;Wassim Bouaziz;Xingyu Alice Yang;Francois Charton;Julia Kempe", "authorids": "~Vivien_Cabannes1;~Charles_Arnal1;~Wassim_Bouaziz1;~Xingyu_Alice_Yang1;~Francois_Charton1;~Julia_Kempe1", "gender": "Not Specified;M;M;F;M;", "homepage": "https://viviencabannes.github.io/;https://charlesarnal.github.io/;https://wesbz.github.io/;https://github.com/tfaod;;", "dblp": ";;;;255/5318;", "google_scholar": ";Pre7QicAAAAJ;QDtbGtIAAAAJ;;;", "orcid": ";0000-0002-3306-0574;;;;", "linkedin": ";charles-arnal-049001183/;wassim-bouaziz/;xyang13;fran%C3%A7ois-charton-214187120/;", "or_profile": "~Vivien_Cabannes1;~Charles_Arnal1;~Wassim_Bouaziz1;~Xingyu_Alice_Yang1;~Francois_Charton1;~Julia_Kempe1", "aff": "META;INRIA;\u00c9cole Polytechnique;Meta;Meta Facebook;", "aff_domain": "meta.com;inria.fr;polytechnique.edu;meta.com;fb.com;", "position": "Postdoc;Postdoc;PhD student;Research Engineer;Research Engineer;", "bibtex": "@inproceedings{\ncabannes2024iteration,\ntitle={Iteration Head: A Mechanistic Study of Chain-of-Thought},\nauthor={Vivien Cabannes and Charles Arnal and Wassim Bouaziz and Xingyu Alice Yang and Francois Charton and Julia Kempe},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=QBCxWpOt5w}\n}", "github": "", "reviewers": "sSSN;r3gz;uh9V;YdTm", "pdf_size": 13618201, "rating": "6;7;7;7", "confidence": "4;3;4;4", "soundness": "3;3;3;3", "novelty": "2;3;3;2", "presentation": "2;3;3;3", "wc_summary": "235;52;174;77", "wc_strengths": "76;63;116;45", "wc_weaknesses": "148;162;282;216", "wc_questions": "973;178;56;77", "wc_limitations": "2;14;31;13", "wc_review": "1434;469;659;428", "wc_reply_reviewers": "0;75;157;22", "wc_reply_authors": "0;19;186;21", "reply_reviewers": "0;1;3;1", "reply_authors": "1;2;4;2", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 134.5, 73.77838436832295 ], "wc_strengths_avg": [ 75.0, 26.105554964413226 ], "wc_weaknesses_avg": [ 202.0, 52.706735812417755 ], "wc_questions_avg": [ 321.0, 379.2472808076546 ], "wc_limitations_avg": [ 15.0, 10.36822067666386 ], "wc_review_avg": [ 747.5, 405.81923315683304 ], "wc_reply_reviewers_avg": [ 63.5, 60.47520152922188 ], "wc_reply_authors_avg": [ 56.5, 75.21469271359153 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17869400255627044743&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "meta.com;inria.fr;polytechnique.edu;meta.com;fb.com;", "author_num": 6, "aff_unique_index": "1;2;3;3", "aff_unique_norm": ";INRIA;Ecole Polytechnique;Meta", "aff_unique_dep": ";;;Meta Platforms, Inc.", "aff_unique_url": ";https://www.inria.fr;https://www.polytechnique.edu;https://meta.com", "aff_unique_abbr": ";INRIA;X;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1;1;2;2", "aff_country_unique": ";France;United States" }, { "title": "Leveraging partial stragglers within gradient coding", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95255", "id": "QC4e0vOanp", "proceeding": "", "pdf": "https://openreview.net/pdf?id=QC4e0vOanp", "openreview": "https://openreview.net/forum?id=QC4e0vOanp", "poster": "", "project": "", "author_site": "Aditya RAMAMOORTHY, Ruoyu Meng, Vrinda Girimaji", "tldr": "", "abstract": "Within distributed learning, workers typically compute gradients on their assigned dataset chunks and send them to the parameter server (PS), which aggregates them to compute either an exact or approximate version of $\\nabla L$ (gradient of the loss function $L$). However, in large-scale clusters, many workers are slower than their promised speed or even failure-prone. A gradient coding solution introduces redundancy within the assignment of chunks to the workers and uses coding theoretic ideas to allow the PS to recover $\\nabla L$ (exactly or approximately), even in the presence of stragglers. Unfortunately, most existing gradient coding protocols are inefficient from a computation perspective as they coarsely classify workers as operational or failed; the potentially valuable work performed by slow workers (partial stragglers) is ignored. In this work, we present novel gradient coding protocols that judiciously leverage the work performed by partial stragglers. Our protocols are efficient from a computation and communication perspective and numerically stable. For an important class of chunk assignments, we present efficient algorithms for optimizing the relative ordering of chunks within the workers; this ordering affects the overall execution time. For exact gradient reconstruction, our protocol is around $2\\times$ faster than the original class of protocols and for approximate gradient reconstruction, the mean-squared-error of our reconstructed gradient is several orders of magnitude better.", "keywords": "gradient coding;stragglers;communication-efficient", "primary_area": "infrastructure", "supplementary_material": "", "author": "Aditya Ramamoorthy;Ruoyu meng;Vrinda S Girimaji", "authorids": "~Aditya_Ramamoorthy1;~Ruoyu_meng1;~Vrinda_S_Girimaji1", "gender": "M;M;", "homepage": "https://www.ece.iastate.edu/adityar/;;", "dblp": ";;", "google_scholar": "tSTjVf4AAAAJ;;", "orcid": ";;", "linkedin": ";meng-ruoyu-9bb7151b2/;vrinda-girimaji-152261178/", "or_profile": "~Aditya_Ramamoorthy1;~Ruoyu_meng1;~Vrinda_S_Girimaji1", "aff": "Iowa State University;Iowa State University;Iowa State University", "aff_domain": "iastate.edu;iastate.edu;iastate.edu", "position": "Full Professor;PhD student;MS student", "bibtex": "@inproceedings{\nramamoorthy2024leveraging,\ntitle={Leveraging partial stragglers within gradient coding},\nauthor={Aditya Ramamoorthy and Ruoyu meng and Vrinda S Girimaji},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=QC4e0vOanp}\n}", "github": "", "reviewers": "Wfvp;TBo9;aGFU;ryxi", "pdf_size": 900906, "rating": "3;4;5;6", "confidence": "2;4;4;3", "soundness": "3;3;3;4", "novelty": "3;2;2;3", "presentation": "2;2;3;3", "wc_summary": "156;53;83;145", "wc_strengths": "51;39;85;158", "wc_weaknesses": "141;206;239;82", "wc_questions": "263;87;115;86", "wc_limitations": "17;4;38;9", "wc_review": "628;389;560;480", "wc_reply_reviewers": "0;166;268;28", "wc_reply_authors": "0;121;197;127", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 4.5, 1.118033988749895 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 109.25, 42.769001625008734 ], "wc_strengths_avg": [ 83.25, 46.337754585219166 ], "wc_weaknesses_avg": [ 167.0, 60.427642681143865 ], "wc_questions_avg": [ 137.75, 73.24402706023201 ], "wc_limitations_avg": [ 17.0, 12.98075498574717 ], "wc_review_avg": [ 514.25, 89.2927068690383 ], "wc_reply_reviewers_avg": [ 115.5, 108.17000508458895 ], "wc_reply_authors_avg": [ 111.25, 70.83916642649037 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.40451991747794525, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=100833630382740869&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "iastate.edu;iastate.edu;iastate.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Iowa State University", "aff_unique_dep": "", "aff_unique_url": "https://www.iastate.edu", "aff_unique_abbr": "ISU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Cross-video Identity Correlating for Person Re-identification Pre-training", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95254", "id": "QCINh3O9q6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=QCINh3O9q6", "openreview": "https://openreview.net/forum?id=QCINh3O9q6", "poster": "", "project": "", "author_site": "Jialong Zuo, Ying Nie, Hanyu Zhou, Huaxin Zhang, Haoyu Wang, Tianyu Guo, Nong Sang, Changxin Gao", "tldr": "", "abstract": "Recent researches have proven that pre-training on large-scale person images extracted from internet videos is an effective way in learning better representations for person re-identification. However, these researches are mostly confined to pre-training at the instance-level or single-video tracklet-level. They ignore the identity-invariance in images of the same person across different videos, which is a key focus in person re-identification. To address this issue, we propose a Cross-video Identity-cOrrelating pre-traiNing (CION) framework. Defining a noise concept that comprehensively considers both intra-identity consistency and inter-identity discrimination, CION seeks the identity correlation from cross-video images by modeling it as a progressive multi-level denoising problem. Furthermore, an identity-guided self-distillation loss is proposed to implement better large-scale pre-training by mining the identity-invariance within person images. We conduct extensive experiments to verify the superiority of our CION in terms of efficiency and performance. CION achieves significantly leading performance with even fewer training samples. For example, compared with the previous state-of-the-art ISR, CION with the same ResNet50-IBN achieves higher mAP of 93.3% and 74.3% on Market1501 and MSMT17, while only utilizing 8% training samples. Finally, with CION demonstrating superior model-agnostic ability, we contribute a model zoo named ReIDZoo to meet diverse research and application needs in this field. It contains a series of CION pre-trained models with spanning structures and parameters, totaling 32 models with 10 different structures, including GhostNet, ConvNext, RepViT, FastViT and so on. The code and models will be open-sourced.", "keywords": "Person Re-identification;Pre-training;Self-supervised Representation Learning", "primary_area": "machine_vision", "supplementary_material": "/attachment/6cca21518755236a271c8aba0366bf38b54e0e02.zip", "author": "Jialong Zuo;Ying Nie;Hanyu Zhou;Huaxin Zhang;Haoyu Wang;Tianyu Guo;Nong Sang;Changxin Gao", "authorids": "~Jialong_Zuo1;~Ying_Nie1;~Hanyu_Zhou2;~Huaxin_Zhang2;~Haoyu_Wang28;~Tianyu_Guo1;~Nong_Sang1;~Changxin_Gao1", "gender": "M;M;M;M;;M;M;M", "homepage": "https://github.com/Zplusdragon;;https://github.com/6irdy;https://github.com/pipixin321;https://github.com/xjwhy;;http://faculty.hust.edu.cn/sangnong/en/index.htm;https://sites.google.com/site/changxingao/home", "dblp": ";;;;;218/7273;10/1545;99/7463", "google_scholar": ";https://scholar.google.com.hk/citations?hl=zh-CN;;https://scholar.google.com.hk/citations?user=oyfu0pgAAAAJ;;RPK3oQgAAAAJ;ky_ZowEAAAAJ;4tku-lwAAAAJ", "orcid": ";;;;;;0000-0002-9167-1496;0000-0003-2736-3920", "linkedin": ";;;;;;;", "or_profile": "~Jialong_Zuo1;~Ying_Nie1;~Hanyu_Zhou2;~Huaxin_Zhang2;~Haoyu_Wang28;~Tianyu_Guo1;~Nong_Sang1;~Changxin_Gao1", "aff": "Huazhong University of Science and Technology;Huawei Noah's Ark Lab;Huazhong University of Science and Technology;Huazhong University of Science and Technology;Huawei Technologies Ltd.;Huawei Technologies Ltd.;Huazhong University of Science and Technology;Huazhong University of Science and Technology", "aff_domain": "hust.edu.cn;huawei.com;hust.edu.cn;hust.edu.cn;huawei.com;huawei.com;hust.edu.cn;hust.edu.cn", "position": "MS student;Researcher;MS student;MS student;Researcher;Researcher;Full Professor;Full Professor", "bibtex": "@inproceedings{\nzuo2024crossvideo,\ntitle={Cross-video Identity Correlating for Person Re-identification Pre-training},\nauthor={Jialong Zuo and Ying Nie and Hanyu Zhou and Huaxin Zhang and Haoyu Wang and Tianyu Guo and Nong Sang and Changxin Gao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=QCINh3O9q6}\n}", "github": "", "reviewers": "3WDg;uzbJ;hfuf;5xRM", "pdf_size": 2018646, "rating": "5;5;5;7", "confidence": "4;5;2;5", "soundness": "3;2;3;4", "novelty": "2;2;3;3", "presentation": "2;3;2;4", "wc_summary": "77;39;51;37", "wc_strengths": "67;135;34;21", "wc_weaknesses": "255;220;25;186", "wc_questions": "2;63;56;4", "wc_limitations": "103;1;63;16", "wc_review": "504;458;229;264", "wc_reply_reviewers": "19;46;22;43", "wc_reply_authors": "87;48;92;98", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 1.224744871391589 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 51.0, 15.937377450509228 ], "wc_strengths_avg": [ 64.25, 44.15526582413472 ], "wc_weaknesses_avg": [ 171.5, 88.02982449147561 ], "wc_questions_avg": [ 31.25, 28.367014294775544 ], "wc_limitations_avg": [ 45.75, 40.195615432531945 ], "wc_review_avg": [ 363.75, 119.01759323730252 ], "wc_reply_reviewers_avg": [ 32.5, 12.093386622447824 ], "wc_reply_authors_avg": [ 81.25, 19.587942719948924 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.4714045207910316, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3238170472110684205&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "hust.edu.cn;huawei.com;hust.edu.cn;hust.edu.cn;huawei.com;huawei.com;hust.edu.cn;hust.edu.cn", "author_num": 8, "aff_unique_index": "0;1;0;0;1;1;0;0", "aff_unique_norm": "Huazhong University of Science and Technology;Huawei", "aff_unique_dep": ";Noah's Ark Lab", "aff_unique_url": "http://www.hust.edu.cn;https://www.huawei.com", "aff_unique_abbr": "HUST;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "The iNaturalist Sounds Dataset", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97701", "id": "QCY01LvyKm", "proceeding": "", "pdf": "https://openreview.net/pdf?id=QCY01LvyKm", "openreview": "https://openreview.net/forum?id=QCY01LvyKm", "poster": "/media/PosterPDFs/NeurIPS%202024/97701.png?t=1733624303.736406", "project": "", "author_site": "Mustafa Chasmai, Alexander Shepard, Subhransu Maji, Grant Van Horn", "tldr": "", "abstract": "We present the iNaturalist Sounds Dataset (iNatSounds), a collection of 230,000 audio files capturing sounds from over 5,500 species, contributed by more than 27,000 recordists worldwide. The dataset encompasses sounds from birds, mammals, insects, reptiles, and amphibians, with audio and species labels derived from observations submitted to iNaturalist, a global citizen science platform. Each recording in the dataset varies in length and includes a single species annotation. We benchmark multiple backbone architectures, comparing multiclass classification objectives with multilabel objectives. Despite weak labeling, we demonstrate that iNatSounds serves as a useful pretraining resource by benchmarking it on strongly labeled downstream evaluation datasets. The dataset is available as a single, freely accessible archive, promoting accessibility and research in this important domain. We envision models trained on this data powering next-generation public engagement applications, and assisting biologists, ecologists, and land use managers in processing large audio collections, thereby contributing to the understanding of species compositions in diverse soundscapes.", "keywords": "fine-grained classification;audio;dataset;benchmark;bioacoustics;multilabel;inaturalist", "primary_area": "", "supplementary_material": "", "author": "Mustafa Chasmai;Alexander Shepard;Subhransu Maji;Grant Van Horn", "authorids": "~Mustafa_Chasmai1;~Alexander_Shepard1;~Subhransu_Maji1;~Grant_Van_Horn1", "gender": "M;M;M;M", "homepage": "https://github.com/mustafa1728;;https://people.cs.umass.edu/~smaji/;https://gvh.codes/", "dblp": "295/9019;04/11130;92/6598;144/8033", "google_scholar": "c9dbnFcAAAAJ;;l7Qx0zAAAAAJ;PxYY_nsAAAAJ", "orcid": "0000-0001-8694-3453;;0000-0002-3869-9334;0000-0003-2953-9651", "linkedin": "mustafa-chasmai/;;;", "or_profile": "~Mustafa_Chasmai1;~Alexander_Shepard1;~Subhransu_Maji1;~Grant_Van_Horn1", "aff": "University of Massachusetts at Amherst;iNaturalist;University of Massachusetts at Amherst;University of Massachusetts at Amherst", "aff_domain": "umass.edu;inaturalist.org;cs.umass.edu;umass.edu", "position": "PhD student;Software Developer;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nchasmai2024the,\ntitle={The iNaturalist Sounds Dataset},\nauthor={Mustafa Chasmai and Alexander Shepard and Subhransu Maji and Grant Van Horn},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=QCY01LvyKm}\n}", "github": "", "reviewers": "geSw;u4zG;XHtJ", "pdf_size": 7521725, "rating": "6;8;8", "confidence": "4;4;5", "wc_summary_and_contributions": "87;42;43", "wc_strengths": "41;46;46", "wc_improvement": "55;34;221", "wc_limitations": "16;24;11", "wc_correctness": "19;22;52", "wc_clarity": "11;6;5", "wc_relation_to_prior_work": "32;10;8", "wc_documentation": "49;1;12", "wc_additional_feedback": "1;1;1", "wc_review": "311;186;399", "wc_reply_reviewers": "0;301;265", "wc_reply_authors": "23;0;0", "reply_reviewers": "0;3;1", "reply_authors": "2;2;2", "rating_avg": [ 7.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 57.333333333333336, 20.98147330914162 ], "wc_strengths_avg": [ 44.333333333333336, 2.357022603955158 ], "wc_improvement_avg": [ 103.33333333333333, 83.64342306614563 ], "wc_limitations_avg": [ 17.0, 5.354126134736337 ], "wc_correctness_avg": [ 31.0, 14.89966442575134 ], "wc_clarity_avg": [ 7.333333333333333, 2.6246692913372702 ], "wc_relation_to_prior_work_avg": [ 16.666666666666668, 10.873004286866728 ], "wc_documentation_avg": [ 20.666666666666668, 20.531818125912658 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 298.6666666666667, 87.3931093139245 ], "wc_reply_reviewers_avg": [ 188.66666666666666, 134.2145877151793 ], "wc_reply_authors_avg": [ 7.666666666666667, 10.842303978193728 ], "reply_reviewers_avg": [ 1.3333333333333333, 1.247219128924647 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17052528035245655435&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "umass.edu;inaturalist.org;cs.umass.edu;umass.edu", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Massachusetts Amherst;iNaturalist", "aff_unique_dep": ";", "aff_unique_url": "https://www.umass.edu;https://www.inaturalist.org", "aff_unique_abbr": "UMass Amherst;iNaturalist", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Amherst;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "A Gradient Accumulation Method for Dense Retriever under Memory Constraint", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95253", "id": "QDG2q5MYHV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=QDG2q5MYHV", "openreview": "https://openreview.net/forum?id=QDG2q5MYHV", "poster": "/media/PosterPDFs/NeurIPS%202024/95253.png?t=1731658771.4989426", "project": "", "author_site": "Jaehee Kim, Yukyung Lee, Pilsung Kang", "tldr": "", "abstract": "InfoNCE loss is commonly used to train dense retriever in information retrieval tasks. It is well known that a large batch is essential to stable and effective training with InfoNCE loss, which requires significant hardware resources. Due to the dependency of large batch, dense retriever has bottleneck of application and research. Recently, memory reduction methods have been broadly adopted to resolve the hardware bottleneck by decomposing forward and backward or using a memory bank. However, current methods still suffer from slow and unstable train. To address these issues, we propose Contrastive Accumulation (ContAccum), a stable and efficient memory reduction method for dense retriever trains that uses a dual memory bank structure to leverage previously generated query and passage representations. Experiments on widely used five information retrieval datasets indicate that ContAccum can surpass not only existing memory reduction methods but also high-resource scenarios. Moreover, theoretical analysis and experimental results confirm that ContAccum provides more stable dual-encoder training than current memory bank utilization methods.", "keywords": "Dense Retriever;Efficient Training;Memory Reduction;Memory Bank;Dual Encoder", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/6f0d1a8040312ef634e3bed4885b4cff5b0ead4e.zip", "author": "Jaehee Kim;Yukyung Lee;Pilsung Kang", "authorids": "~Jaehee_Kim1;~Yukyung_Lee1;~Pilsung_Kang1", "gender": "M;F;M", "homepage": "https://github.com/KimJaehee0725;https://yukyunglee.github.io/;", "dblp": "92/5260;22/8259;65/3604", "google_scholar": "Iwp52qoAAAAJ;V6Hm5rEAAAAJ;I2pcWZIAAAAJ", "orcid": ";0000-0002-7835-6336;", "linkedin": ";yukyung-lee-149681155/;", "or_profile": "~Jaehee_Kim1;~Yukyung_Lee1;~Pilsung_Kang1", "aff": "Korea University;Korea University;Korea University", "aff_domain": "korea.ac.kr;korea.ac.kr;korea.ac.kr", "position": "MS student;PhD student;Full Professor", "bibtex": "@inproceedings{\nkim2024a,\ntitle={A Gradient Accumulation Method for Dense Retriever under Memory Constraint},\nauthor={Jaehee Kim and Yukyung Lee and Pilsung Kang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=QDG2q5MYHV}\n}", "github": "", "reviewers": "jj9d;ADDC;sDHF", "pdf_size": 1534558, "rating": "5;6;7", "confidence": "2;3;3", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "2;3;3", "wc_summary": "101;186;103", "wc_strengths": "116;83;80", "wc_weaknesses": "176;49;177", "wc_questions": "79;79;77", "wc_limitations": "6;9;1", "wc_review": "478;406;438", "wc_reply_reviewers": "25;38;0", "wc_reply_authors": "39;42;51", "reply_reviewers": "1;1;0", "reply_authors": "2;2;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 130.0, 39.60639678974429 ], "wc_strengths_avg": [ 93.0, 16.30950643030009 ], "wc_weaknesses_avg": [ 134.0, 60.10546286874985 ], "wc_questions_avg": [ 78.33333333333333, 0.9428090415820634 ], "wc_limitations_avg": [ 5.333333333333333, 3.2998316455372216 ], "wc_review_avg": [ 440.6666666666667, 29.4542960458327 ], "wc_reply_reviewers_avg": [ 21.0, 15.769168230019828 ], "wc_reply_authors_avg": [ 44.0, 5.0990195135927845 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11893104898976138819&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "korea.ac.kr;korea.ac.kr;korea.ac.kr", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Korea University", "aff_unique_dep": "", "aff_unique_url": "https://www.korea.ac.kr", "aff_unique_abbr": "KU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "Learning rigid-body simulators over implicit shapes for large-scale scenes and vision", "status": "Oral", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95252", "id": "QDYts5dYgq", "proceeding": "", "pdf": "https://openreview.net/pdf?id=QDYts5dYgq", "openreview": "https://openreview.net/forum?id=QDYts5dYgq", "poster": "/media/PosterPDFs/NeurIPS%202024/95252.png?t=1731256192.1181202", "project": "", "author_site": "Yulia Rubanova, Tatiana Lopez-Guevara, Kelsey Allen, Will Whitney, Kimberly Stachenfeld, Tobias Pfaff", "tldr": "", "abstract": "Simulating large scenes with many rigid objects is crucial for a variety of applications, such as robotics, engineering, film and video games. Rigid interactions are notoriously hard to model: small changes to the initial state or the simulation parameters can lead to large changes in the final state. Recently, learned simulators based on graph networks (GNNs) were developed as an alternative to hand-designed simulators like MuJoCo and Bullet. They are able to accurately capture dynamics of real objects directly from real-world observations. However, current state-of-the-art learned simulators operate on meshes and scale poorly to scenes with many objects or detailed shapes. Here we present SDF-Sim, the first learned rigid-body simulator designed for scale. We use learned signed-distance functions (SDFs) to represent the object shapes and to speed up distance computation. We design the simulator to leverage SDFs and avoid the fundamental bottleneck of the previous simulators associated with collision detection.\nFor the first time in literature, we demonstrate that we can scale the GNN-based simulators to scenes with hundreds of objects and up to 1.1 million nodes, where mesh-based approaches run out of memory. Finally, we show that SDF-Sim can be applied to real world scenes by extracting SDFs from multi-view images.", "keywords": "graph networks;learned simulation;physics;rigid body simulation;scaling", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Yulia Rubanova;Tatiana Lopez-Guevara;Kelsey R Allen;William F Whitney;Kim Stachenfeld;Tobias Pfaff", "authorids": "~Yulia_Rubanova2;~Tatiana_Lopez-Guevara1;~Kelsey_R_Allen1;~William_F_Whitney1;~Kim_Stachenfeld1;~Tobias_Pfaff1", "gender": "F;F;;F;M;F", "homepage": ";;http://willwhitney.com;https://neurokim.com/;http://tobiaspfaff.com;https://yuliarubanova.github.io/", "dblp": "208/0996;153/9528;160/8671;155/1888;67/7591;222/3085", "google_scholar": "Op4nexcAAAAJ;kpcjFekAAAAJ;aQcYWDMAAAAJ;jNtH2WUAAAAJ;3oUgDKQAAAAJ;u_HzE9wAAAAJ", "orcid": ";;;;;", "linkedin": ";;;;;https://linkedin.com/in/yulia-rubanova-031702100", "or_profile": "~Tatiana_Lopez-Guevara1;~Kelsey_R_Allen1;~William_F_Whitney1;~Kim_Stachenfeld1;~Tobias_Pfaff1;~Yulia_Rubanova1", "aff": "Google;Google;Google DeepMind;Google DeepMind;Deepmind;Google DeepMind", "aff_domain": "google.com;deepmind.com;deepmind.com;deepmind.com;google.com;deepmind.com", "position": "Researcher;Research Scientist;Researcher;Research Scientist;Research scientist;Research Scientist", "bibtex": "@inproceedings{\nrubanova2024learning,\ntitle={Learning rigid-body simulators over implicit shapes for large-scale scenes and vision},\nauthor={Yulia Rubanova and Tatiana Lopez-Guevara and Kelsey R Allen and William F Whitney and Kim Stachenfeld and Tobias Pfaff},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=QDYts5dYgq}\n}", "github": "", "reviewers": "nWDb;SAmo;HA7E;mwTT", "pdf_size": 49116489, "rating": "5;6;7;7", "confidence": "4;4;4;4", "soundness": "3;3;3;3", "novelty": "3;2;3;4", "presentation": "3;3;3;4", "wc_summary": "148;173;69;54", "wc_strengths": "51;167;118;157", "wc_weaknesses": "288;214;200;286", "wc_questions": "7;165;256;80", "wc_limitations": "9;40;101;22", "wc_review": "503;759;744;599", "wc_reply_reviewers": "15;19;33;39", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 111.0, 50.5618433208284 ], "wc_strengths_avg": [ 123.25, 45.554225929105634 ], "wc_weaknesses_avg": [ 247.0, 40.311288741492746 ], "wc_questions_avg": [ 127.0, 93.13162728096187 ], "wc_limitations_avg": [ 43.0, 35.249113464029136 ], "wc_review_avg": [ 651.25, 105.97257900041878 ], "wc_reply_reviewers_avg": [ 26.5, 9.836157786453 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14327266822996781633&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "google.com;deepmind.com;deepmind.com;deepmind.com;google.com;deepmind.com", "author_num": 6, "aff_unique_index": "0;0;0;0;1;0", "aff_unique_norm": "Google;DeepMind", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://deepmind.com", "aff_unique_abbr": "Google;DeepMind", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;1;1;1;1", "aff_country_unique": "United States;United Kingdom" }, { "title": "Learning Optimal Tax Design in Nonatomic Congestion Games", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95251", "id": "QDprhde3jb", "proceeding": "", "pdf": "https://openreview.net/pdf?id=QDprhde3jb", "openreview": "https://openreview.net/forum?id=QDprhde3jb", "poster": "/media/PosterPDFs/NeurIPS%202024/95251.png?t=1731948411.5957", "project": "", "author_site": "Qiwen Cui, Maryam Fazel, Simon Du", "tldr": "", "abstract": "In multiplayer games, self-interested behavior among the players can harm the social welfare. Tax mechanisms are a common method to alleviate this issue and induce socially optimal behavior. In this work, we take the initial step of learning the optimal tax that can maximize social welfare with limited feedback in congestion games. We propose a new type of feedback named \\emph{equilibrium feedback}, where the tax designer can only observe the Nash equilibrium after deploying a tax plan. Existing algorithms are not applicable due to the exponentially large tax function space, nonexistence of the gradient, and nonconvexity of the objective. To tackle these challenges, we design a computationally efficient algorithm that leverages several novel components: (1) a piece-wise linear tax to approximate the optimal tax; (2) extra linear terms to guarantee a strongly convex potential function; (3) an efficient subroutine to find the exploratory tax that can provide critical information about the game. The algorithm can find an $\\epsilon$-optimal tax with $O(\\beta F^2/\\epsilon)$ sample complexity, where $\\beta$ is the smoothness of the cost function and $F$ is the number of facilities.", "keywords": "game theory; congestion games; mechanism design; equilibrium feedback", "primary_area": "algorithmic_game_theory", "supplementary_material": "", "author": "Qiwen Cui;Maryam Fazel;Simon Shaolei Du", "authorids": "~Qiwen_Cui1;~Maryam_Fazel1;~Simon_Shaolei_Du1", "gender": "M;F;M", "homepage": ";;http://simonshaoleidu.com", "dblp": "276/6268;10/2309;176/5602", "google_scholar": "AnSVkUYAAAAJ;vlN_kRoAAAAJ;OttawxUAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Qiwen_Cui1;~Maryam_Fazel1;~Simon_Shaolei_Du1", "aff": "Department of Computer Science, University of Washington;University of Washington, Seattle;University of Washington", "aff_domain": "cs.washington.edu;uw.edu;washington.edu", "position": "PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\ncui2024learning,\ntitle={Learning Optimal Tax Design in Nonatomic Congestion Games},\nauthor={Qiwen Cui and Maryam Fazel and Simon Shaolei Du},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=QDprhde3jb}\n}", "github": "", "reviewers": "nncK;Q6MU;18G8;udJm;vts3", "pdf_size": 694402, "rating": "4;6;7;7;7", "confidence": "3;4;3;3;4", "soundness": "2;3;3;3;3", "novelty": "3;3;4;4;3", "presentation": "3;3;2;3;3", "wc_summary": "113;65;46;104;67", "wc_strengths": "98;147;53;72;118", "wc_weaknesses": "75;285;45;123;212", "wc_questions": "50;2;29;64;103", "wc_limitations": "1;7;10;14;5", "wc_review": "337;506;183;377;505", "wc_reply_reviewers": "0;12;11;36;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;1;1;1;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.2, 1.16619037896906 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 3.4, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 79.0, 25.337718918639855 ], "wc_strengths_avg": [ 97.6, 33.16986584235758 ], "wc_weaknesses_avg": [ 148.0, 88.800900896331 ], "wc_questions_avg": [ 49.6, 33.9092907622675 ], "wc_limitations_avg": [ 7.4, 4.409081537009721 ], "wc_review_avg": [ 381.6, 120.1292637120531 ], "wc_reply_reviewers_avg": [ 11.8, 13.151425778218876 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.21004201260420152, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14367819089444445945&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "cs.washington.edu;uw.edu;washington.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Washington", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.washington.edu", "aff_unique_abbr": "UW", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Seattle;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "The surprising efficiency of temporal difference learning for rare event prediction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95250", "id": "QEUntqKvmm", "proceeding": "", "pdf": "https://openreview.net/pdf?id=QEUntqKvmm", "openreview": "https://openreview.net/forum?id=QEUntqKvmm", "poster": "/media/PosterPDFs/NeurIPS%202024/95250.png?t=1733816222.9652796", "project": "", "author_site": "Xiaoou Cheng, Jonathan Weare", "tldr": "", "abstract": "We quantify the efficiency of temporal difference (TD) learning over the direct, or Monte Carlo (MC), estimator for policy evaluation in reinforcement learning, with an emphasis on estimation of quantities related to rare events. Policy evaluation is complicated in the rare event setting by the long timescale of the event and by the need for \\emph{relative accuracy} in estimates of very small values. Specifically, we focus on least-squares TD (LSTD) prediction for finite state Markov chains, and show that LSTD can achieve relative accuracy far more efficiently than MC. We prove a central limit theorem for the LSTD estimator and upper bound the \n \\emph{relative asymptotic variance}\n by simple quantities characterizing the connectivity of states relative to the transition probabilities between them. Using this bound, we show that, even when both the timescale of the rare event and the relative accuracy of the MC estimator are exponentially large in the number of states, LSTD maintains a fixed level of relative accuracy with a total number of observed transitions of the Markov chain that is only \\emph{polynomially} large in the number of states.", "keywords": "temporal difference learning;reinforcement learning;rare events;policy evaluation;prediction;perturbation bounds", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Xiaoou Cheng;Jonathan Weare", "authorids": "~Xiaoou_Cheng1;~Jonathan_Weare1", "gender": "F;M", "homepage": "https://chengxo.github.io;https://cims.nyu.edu/~weare/", "dblp": "10/8074;", "google_scholar": "SOuYjCgAAAAJ;", "orcid": "0000-0002-6696-1731;", "linkedin": ";", "or_profile": "~Xiaoou_Cheng1;~Jonathan_Weare1", "aff": "NYU, New York University;New York University", "aff_domain": "cims.nyu.edu;nyu.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\ncheng2024the,\ntitle={The surprising efficiency of temporal difference learning for rare event prediction},\nauthor={Xiaoou Cheng and Jonathan Weare},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=QEUntqKvmm}\n}", "github": "", "reviewers": "251G;4mMw;piRP", "pdf_size": 690333, "rating": "5;6;7", "confidence": "2;2;3", "soundness": "2;3;4", "novelty": "2;3;4", "presentation": "2;2;4", "wc_summary": "116;46;108", "wc_strengths": "15;32;124", "wc_weaknesses": "41;39;29", "wc_questions": "261;1;5", "wc_limitations": "23;1;24", "wc_review": "456;119;290", "wc_reply_reviewers": "198;0;0", "wc_reply_authors": "799;0;0", "reply_reviewers": "2;0;0", "reply_authors": "2;1;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 2.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 2.6666666666666665, 0.9428090415820634 ], "wc_summary_avg": [ 90.0, 31.283648551066843 ], "wc_strengths_avg": [ 57.0, 47.88179890800539 ], "wc_weaknesses_avg": [ 36.333333333333336, 5.2493385826745405 ], "wc_questions_avg": [ 89.0, 121.63332876587184 ], "wc_limitations_avg": [ 16.0, 10.614455552060438 ], "wc_review_avg": [ 288.3333333333333, 137.58472137397945 ], "wc_reply_reviewers_avg": [ 66.0, 93.33809511662427 ], "wc_reply_authors_avg": [ 266.3333333333333, 376.65221211203436 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.9428090415820634 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:LVOCWNDkq6QJ:scholar.google.com/&scioq=The+surprising+efficiency+of+temporal+difference+learning+for+rare+event+prediction&hl=en&as_sdt=0,44", "gs_version_total": 4, "email": "cims.nyu.edu;nyu.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "New York University", "aff_unique_dep": "", "aff_unique_url": "https://www.nyu.edu", "aff_unique_abbr": "NYU", "aff_campus_unique_index": "0", "aff_campus_unique": "New York;", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Fast TRAC: A Parameter-Free Optimizer for Lifelong Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95249", "id": "QEaHE4TUgc", "proceeding": "", "pdf": "https://openreview.net/pdf?id=QEaHE4TUgc", "openreview": "https://openreview.net/forum?id=QEaHE4TUgc", "poster": "", "project": "", "author_site": "Aneesh Muppidi, Zhiyu Zhang, Heng Yang", "tldr": "", "abstract": "A key challenge in lifelong reinforcement learning (RL) is the loss of plasticity, where previous learning progress hinders an agent's adaptation to new tasks. While regularization and resetting can help, they require precise hyperparameter selection at the outset and environment-dependent adjustments. Building on the principled theory of online convex optimization, we present a parameter-free optimizer for lifelong RL, called TRAC, which requires no tuning or prior knowledge about the distribution shifts. Extensive experiments on Procgen, Atari, and Gym Control environments show that TRAC works surprisingly well\u2014mitigating loss of plasticity and rapidly adapting to challenging distribution shifts\u2014despite the underlying optimization problem being nonconvex and nonstationary.", "keywords": "lifelong reinforcement learning;parameter-free optimization;continual reinforcement learning;loss of plasticity", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/37354b574d60699ebe19b12e90b733c20d139518.zip", "author": "Aneesh Muppidi;Zhiyu Zhang;Heng Yang", "authorids": "~Aneesh_Muppidi1;~Zhiyu_Zhang1;~Heng_Yang4", "gender": "M;;M", "homepage": "https://aneeshers.github.io;https://zhiyuzz.github.io/;https://hankyang.seas.harvard.edu/", "dblp": ";45/6271-3;83/415-2", "google_scholar": "OAWT85oAAAAJ;5KHfVTQAAAAJ;GuKEDfixZqsC", "orcid": ";;", "linkedin": ";;", "or_profile": "~Aneesh_Muppidi1;~Zhiyu_Zhang1;~Heng_Yang4", "aff": "Harvard University;Harvard University;NVIDIA", "aff_domain": "college.harvard.edu;harvard.edu;nvidia.com", "position": "Undergrad student;Postdoc;Researcher", "bibtex": "@inproceedings{\nmuppidi2024fast,\ntitle={Fast {TRAC}: A Parameter-Free Optimizer for Lifelong Reinforcement Learning},\nauthor={Aneesh Muppidi and Zhiyu Zhang and Heng Yang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=QEaHE4TUgc}\n}", "github": "", "reviewers": "Jcag;Jqmp;346D;eCfR", "pdf_size": 28570132, "rating": "5;6;6;7", "confidence": "2;3;3;4", "soundness": "2;3;2;3", "novelty": "2;3;2;4", "presentation": "2;1;2;4", "wc_summary": "86;108;134;66", "wc_strengths": "176;58;63;110", "wc_weaknesses": "409;611;446;112", "wc_questions": "120;105;86;470", "wc_limitations": "67;37;97;3", "wc_review": "858;919;826;761", "wc_reply_reviewers": "502;95;52;26", "wc_reply_authors": "833;651;42;12", "reply_reviewers": "2;2;1;1", "reply_authors": "4;3;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.25, 1.0897247358851685 ], "wc_summary_avg": [ 98.5, 25.31304011769428 ], "wc_strengths_avg": [ 101.75, 47.42559962720556 ], "wc_weaknesses_avg": [ 394.5, 179.9590231135966 ], "wc_questions_avg": [ 195.25, 159.08390081966184 ], "wc_limitations_avg": [ 51.0, 34.899856733230294 ], "wc_review_avg": [ 841.0, 57.00438579618238 ], "wc_reply_reviewers_avg": [ 168.75, 193.97341957082676 ], "wc_reply_authors_avg": [ 384.5, 363.399573472507 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5966476485624851290&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "college.harvard.edu;harvard.edu;nvidia.com", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Harvard University;NVIDIA", "aff_unique_dep": ";NVIDIA Corporation", "aff_unique_url": "https://www.harvard.edu;https://www.nvidia.com", "aff_unique_abbr": "Harvard;NVIDIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Local Linearity: the Key for No-regret Reinforcement Learning in Continuous MDPs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95248", "id": "QEmsZoQ45M", "proceeding": "", "pdf": "https://openreview.net/pdf?id=QEmsZoQ45M", "openreview": "https://openreview.net/forum?id=QEmsZoQ45M", "poster": "", "project": "", "author_site": "Davide Maran, Alberto Maria Metelli, Matteo Papini, Marcello Restelli", "tldr": "", "abstract": "Achieving the no-regret property for Reinforcement Learning (RL) problems in continuous state and action-space environments is one of the major open problems in the field. Existing solutions either work under very specific assumptions or achieve bounds that are vacuous in some regimes. Furthermore, many structural assumptions\n are known to suffer from a provably unavoidable exponential dependence on the time horizon $H$ in the regret, which makes any possible solution unfeasible in practice. \n In this paper, we identify _local linearity_ as the feature that makes Markov Decision Processes (MDPs) both _learnable_ (sublinear regret) and _feasible_ (regret that is polynomial in $H$). \n We define a novel MDP representation class, namely _Locally Linearizable MDPs_, generalizing other representation classes like Linear MDPs and MDPS with low inherent Belmman error. \n Then, i) we introduce **Cinderella**, a no-regret algorithm for this general representation class, and ii) we show that all known learnable and feasible MDP families are representable in this class. \n We first show that all known feasible MDPs belong to a family that we call _Mildly Smooth MDPs_. Then, we show how any mildly smooth MDP can be represented as a Locally Linearizable MDP by an appropriate choice of representation. This way, **Cinderella** is shown to achieve state-of-the-art regret bounds for all previously known (and some new) continuous MDPs for which RL is learnable and feasible.", "keywords": "RL;regret;smoothness", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Davide Maran;Alberto Maria Metelli;Matteo Papini;Marcello Restelli", "authorids": "~Davide_Maran1;~Alberto_Maria_Metelli2;~Matteo_Papini1;~Marcello_Restelli1", "gender": "M;M;M;M", "homepage": "https://davidezfc.github.io/;https://albertometelli.github.io/;https://t3p.github.io/;http://home.deib.polimi.it/restelli/", "dblp": "320/3835;209/4941;209/4897;64/1011", "google_scholar": "https://scholar.google.it/citations?user=a8i0X8oAAAAJ;R31IsPwAAAAJ;https://scholar.google.it/citations?user=A2WxZlsAAAAJ;https://scholar.google.com.tw/citations?user=xdgxRiEAAAAJ", "orcid": ";0000-0002-3424-5212;0000-0002-3807-3171;0000-0002-6322-1076", "linkedin": "davide-maran/;;matteo-papini/;", "or_profile": "~Davide_Maran1;~Alberto_Maria_Metelli2;~Matteo_Papini1;~Marcello_Restelli1", "aff": "Polytechnic Institute of Milan;Politecnico di Milano;Polytechnic Institute of Milan;Politecnico di Milano", "aff_domain": "polimi.it;polimi.it;polimi.it;polimi.it", "position": "PhD student;Assistant Professor;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nmaran2024local,\ntitle={Local Linearity: the Key for No-regret Reinforcement Learning in Continuous {MDP}s},\nauthor={Davide Maran and Alberto Maria Metelli and Matteo Papini and Marcello Restelli},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=QEmsZoQ45M}\n}", "github": "", "reviewers": "qoL4;1ViU;TQwD;RTnn;MjfW", "pdf_size": 582016, "rating": "5;5;6;6;7", "confidence": "3;3;4;3;2", "soundness": "3;3;4;3;3", "novelty": "2;2;2;3;3", "presentation": "2;3;3;3;4", "wc_summary": "174;213;60;88;34", "wc_strengths": "40;106;69;26;107", "wc_weaknesses": "88;120;208;27;30", "wc_questions": "117;39;12;5;69", "wc_limitations": "1;9;42;7;8", "wc_review": "420;487;391;153;248", "wc_reply_reviewers": "94;27;26;16;27", "wc_reply_authors": "0;181;0;0;369", "reply_reviewers": "1;1;1;1;2", "reply_authors": "1;2;1;1;2", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 113.8, 68.4 ], "wc_strengths_avg": [ 69.6, 33.16986584235758 ], "wc_weaknesses_avg": [ 94.6, 66.77005316756906 ], "wc_questions_avg": [ 48.4, 41.054110634624635 ], "wc_limitations_avg": [ 13.4, 14.568459081179451 ], "wc_review_avg": [ 339.8, 121.69042690368047 ], "wc_reply_reviewers_avg": [ 38.0, 28.305476501906835 ], "wc_reply_authors_avg": [ 110.0, 147.25623925661012 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.42257712736425823, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:vRv2Yz1HF4sJ:scholar.google.com/&scioq=Local+Linearity:+the+Key+for+No-regret+Reinforcement+Learning+in+Continuous+MDPs&hl=en&as_sdt=0,11", "gs_version_total": 3, "email": "polimi.it;polimi.it;polimi.it;polimi.it", "author_num": 4, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "Polytechnic Institute of Milan;Politecnico di Milano", "aff_unique_dep": ";", "aff_unique_url": "https://www.polimi.it/;https://www.polimi.it", "aff_unique_abbr": "Politecnico di Milano;Polimi", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Italy" }, { "title": "Towards an Information Theoretic Framework of Context-Based Offline Meta-Reinforcement Learning", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95247", "id": "QFUsZvw9mx", "proceeding": "", "pdf": "https://openreview.net/pdf?id=QFUsZvw9mx", "openreview": "https://openreview.net/forum?id=QFUsZvw9mx", "poster": "/media/PosterPDFs/NeurIPS%202024/95247.png?t=1733318504.1924589", "project": "", "author_site": "Lanqing Li, Hai Zhang, Xinyu Zhang, Shatong Zhu, Yang YU, Junqiao Zhao, Pheng-Ann Heng", "tldr": "", "abstract": "As a marriage between offline RL and meta-RL, the advent of offline meta-reinforcement learning (OMRL) has shown great promise in enabling RL agents to multi-task and quickly adapt while acquiring knowledge safely. Among which, context-based OMRL (COMRL) as a popular paradigm, aims to learn a universal policy conditioned on effective task representations. In this work, by examining several key milestones in the field of COMRL, we propose to integrate these seemingly independent methodologies into a unified framework. Most importantly, we show that the pre-existing COMRL algorithms are essentially optimizing the same mutual information objective between the task variable $M$ and its latent representation $Z$ by implementing various approximate bounds. Such theoretical insight offers ample design freedom for novel algorithms. As demonstrations, we propose a supervised and a self-supervised implementation of $I(Z; M)$, and empirically show that the corresponding optimization algorithms exhibit remarkable generalization across a broad spectrum of RL benchmarks, context shift scenarios, data qualities and deep learning architectures. This work lays the information theoretic foundation for COMRL methods, leading to a better understanding of task representation learning in the context of reinforcement learning. Given its\ngenerality, we envision our framework as a promising offline pre-training paradigm of foundation models for decision making.", "keywords": "Offline Meta-RL;Information Theory", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/07cf2935695929d893afcf5390231f42ef5a32c0.zip", "author": "Lanqing Li;Hai Zhang;Xinyu Zhang;Shatong Zhu;Yang YU;Junqiao Zhao;Pheng-Ann Heng", "authorids": "~Lanqing_Li1;~Hai_Zhang2;~Xinyu_Zhang31;~Shatong_Zhu1;~Yang_YU10;~Junqiao_Zhao1;~Pheng-Ann_Heng1", "gender": "M;M;M;M;M;M;M", "homepage": "https://lanqingli1993.github.io/;https://betray12138.github.io/resume/;;https://github.com/Zhu-Shatong;;http://cs1.tongji.edu.cn/~junqiao;http://www.cse.cuhk.edu.hk/~pheng", "dblp": "275/9979;;;;;;52/2889", "google_scholar": "n8IjgKkAAAAJ;YHqAzxUAAAAJ;TiRMqHAAAAAJ;;;;https://scholar.google.com/citations?sortby=pubdate", "orcid": "0000-0003-1998-4022;;;;;;", "linkedin": "lanqing-li-%EF%BC%88%E6%9D%8E%E8%93%9D%E9%9D%92%EF%BC%89-49209a83/;;xinyu-zhang-088432198/;;yang-yu-a02a6a1b3/;;", "or_profile": "~Lanqing_Li1;~Hai_Zhang2;~Xinyu_Zhang31;~Shatong_Zhu1;~Yang_YU10;~Junqiao_Zhao1;~Pheng-Ann_Heng1", "aff": "Department of Computer Science and Engineering, The Chinese University of Hong Kong;Tongji University;, State University of New York at Stony Brook;Tongji University;The Chinese University of Hong Kong;Tongji University;The Chinese University of Hong Kong", "aff_domain": "cse.cuhk.edu.hk;tongji.edu.cn;cs.stonybrook.edu;tongji.edu.cn;cuhk.edu.hk;tongji.edu.cn;cuhk.edu.hk", "position": "PhD student;MS student;PhD student;Undergrad student;PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nli2024towards,\ntitle={Towards an Information Theoretic Framework of Context-Based Offline Meta-Reinforcement Learning},\nauthor={Lanqing Li and Hai Zhang and Xinyu Zhang and Shatong Zhu and Yang YU and Junqiao Zhao and Pheng-Ann Heng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=QFUsZvw9mx}\n}", "github": "", "reviewers": "AbCD;7Lx7;sHdF", "pdf_size": 6675112, "rating": "7;7;8", "confidence": "3;3;4", "soundness": "2;3;3", "novelty": "3;3;4", "presentation": "2;3;4", "wc_summary": "84;165;146", "wc_strengths": "47;135;185", "wc_weaknesses": "206;63;279", "wc_questions": "149;251;269", "wc_limitations": "56;14;5", "wc_review": "542;628;884", "wc_reply_reviewers": "423;70;69", "wc_reply_authors": "492;29;37", "reply_reviewers": "2;1;1", "reply_authors": "3;2;2", "rating_avg": [ 7.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 131.66666666666666, 34.58644564308715 ], "wc_strengths_avg": [ 122.33333333333333, 57.045790574083746 ], "wc_weaknesses_avg": [ 182.66666666666666, 89.71188450936823 ], "wc_questions_avg": [ 223.0, 52.839379254491625 ], "wc_limitations_avg": [ 25.0, 22.22611077089287 ], "wc_review_avg": [ 684.6666666666666, 145.2568606144149 ], "wc_reply_reviewers_avg": [ 187.33333333333334, 166.64199817439646 ], "wc_reply_authors_avg": [ 186.0, 216.39932224169897 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.9999999999999997, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4014768477953829285&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "cse.cuhk.edu.hk;tongji.edu.cn;cs.stonybrook.edu;tongji.edu.cn;cuhk.edu.hk;tongji.edu.cn;cuhk.edu.hk", "author_num": 7, "aff_unique_index": "0;1;2;1;0;1;0", "aff_unique_norm": "Chinese University of Hong Kong;Tongji University;State University of New York at Stony Brook", "aff_unique_dep": "Department of Computer Science and Engineering;;", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.tongji.edu.cn;https://www.stonybrook.edu", "aff_unique_abbr": "CUHK;Tongji;SUNY Stony Brook", "aff_campus_unique_index": "0;2;0;0", "aff_campus_unique": "Hong Kong SAR;;Stony Brook", "aff_country_unique_index": "0;0;1;0;0;0;0", "aff_country_unique": "China;United States" }, { "title": "WorldCoder, a Model-Based LLM Agent: Building World Models by Writing Code and Interacting with the Environment", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95246", "id": "QGJSXMhVaL", "proceeding": "", "pdf": "https://openreview.net/pdf?id=QGJSXMhVaL", "openreview": "https://openreview.net/forum?id=QGJSXMhVaL", "poster": "/media/PosterPDFs/NeurIPS%202024/95246.png?t=1733436000.5002184", "project": "", "author_site": "Hao Tang, Darren Key, Kevin Ellis", "tldr": "", "abstract": "We give a model-based agent that builds a Python program representing its knowledge of the world based on its interactions with the environment. The world model tries to explain its interactions, while also being optimistic about what reward it can achieve. We define this optimism as a logical constraint between a program and a planner. We study our agent on gridworlds, and on task planning, finding our approach is more sample-efficient compared to deep RL, more compute-efficient compared to ReAct-style agents, and that it can transfer its knowledge across environments by editing its code.", "keywords": "program synthesis;LLM;reinforcement learning", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Hao Tang;Darren Yan Key;Kevin Ellis", "authorids": "~Hao_Tang5;~Darren_Yan_Key1;~Kevin_Ellis1", "gender": "M;M;M", "homepage": "https://haotang1995.github.io/;https://darrenkey.github.io/;https://www.cs.cornell.edu/~ellisk/", "dblp": "07/5751-8;;", "google_scholar": ";;L7XI6asAAAAJ", "orcid": ";;", "linkedin": ";darren-key/;", "or_profile": "~Hao_Tang5;~Darren_Yan_Key1;~Kevin_Ellis1", "aff": "Cornell University;Cornell University;Cornell University", "aff_domain": "cornell.edu;cornell.edu;cornell.edu", "position": "PhD student;Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\ntang2024worldcoder,\ntitle={WorldCoder, a Model-Based {LLM} Agent: Building World Models by Writing Code and Interacting with the Environment},\nauthor={Hao Tang and Darren Yan Key and Kevin Ellis},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=QGJSXMhVaL}\n}", "github": "", "reviewers": "x859;9H9M;WssY;NhBf", "pdf_size": 2685400, "rating": "4;6;7;7", "confidence": "4;3;4;3", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "3;4;3;3", "wc_summary": "57;92;114;267", "wc_strengths": "48;60;162;115", "wc_weaknesses": "65;99;199;164", "wc_questions": "385;72;53;129", "wc_limitations": "1;94;22;97", "wc_review": "556;417;550;772", "wc_reply_reviewers": "612;90;28;62", "wc_reply_authors": "868;0;0;0", "reply_reviewers": "4;1;1;1", "reply_authors": "4;1;1;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 132.5, 80.26985735629533 ], "wc_strengths_avg": [ 96.25, 45.598108513402174 ], "wc_weaknesses_avg": [ 131.75, 52.65631491093922 ], "wc_questions_avg": [ 159.75, 133.0213798605322 ], "wc_limitations_avg": [ 53.5, 42.66438795998368 ], "wc_review_avg": [ 573.75, 127.23280826893667 ], "wc_reply_reviewers_avg": [ 198.0, 240.02916489460193 ], "wc_reply_authors_avg": [ 217.0, 375.85502524244635 ], "reply_reviewers_avg": [ 1.75, 1.299038105676658 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.40824829046386296, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11026860296026815376&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "cornell.edu;cornell.edu;cornell.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Cornell University", "aff_unique_dep": "", "aff_unique_url": "https://www.cornell.edu", "aff_unique_abbr": "Cornell", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Reference Trustable Decoding: A Training-Free Augmentation Paradigm for Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95245", "id": "QHRLFdhkLu", "proceeding": "", "pdf": "https://openreview.net/pdf?id=QHRLFdhkLu", "openreview": "https://openreview.net/forum?id=QHRLFdhkLu", "poster": "/media/PosterPDFs/NeurIPS%202024/95245.png?t=1733799756.7400753", "project": "", "author_site": "Luohe Shi, Yao Yao, Zuchao Li, Lefei Zhang, Hai Zhao", "tldr": "", "abstract": "Large language models (LLMs) have rapidly advanced and demonstrated impressive capabilities. In-Context Learning (ICL) and Parameter-Efficient Fine-Tuning (PEFT) are currently two mainstream methods for augmenting LLMs to downstream tasks. ICL typically constructs a few-shot learning scenario, either manually or by setting up a Retrieval-Augmented Generation (RAG) system, helping models quickly grasp domain knowledge or question-answering patterns without changing model parameters. However, this approach involves trade-offs, such as slower inference speed and increased space occupancy. PEFT assists the model in adapting to tasks through minimal parameter modifications, but the training process still demands high hardware requirements, even with a small number of parameters involved. To address these challenges, we propose Reference Trustable Decoding (RTD), a paradigm that allows models to quickly adapt to new tasks without fine-tuning, maintaining low inference costs. RTD constructs a reference datastore from the provided training examples and optimizes the LLM's final vocabulary distribution by flexibly selecting suitable references based on the input, resulting in more trustable responses and enabling the model to adapt to downstream tasks at a low cost. Experimental evaluations on various LLMs using different benchmarks demonstrate that RTD establishes a new paradigm for augmenting models to downstream tasks. Furthermore, our method exhibits strong orthogonality with traditional methods, allowing for concurrent usage. Our code can be found at https://github.com/ShiLuohe/ReferenceTrustableDecoding.", "keywords": "LLM;augmentation;efficient methods", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/fb58df9c796139099ab1035f4f70e2176af85960.zip", "author": "Shi Luohe;Yao Yao;Zuchao Li;Lefei Zhang;hai zhao", "authorids": "~Shi_Luohe1;~Yao_Yao8;~Zuchao_Li1;~Lefei_Zhang1;~hai_zhao1", "gender": "M;F;M;M;M", "homepage": ";https://github.com/Zoeyyao27;https://zcli-charlie.github.io/;;http://bcmi.sjtu.edu.cn/~zhaohai/", "dblp": "383/8810.html;;198/9339;28/10770;25/1145-1.html", "google_scholar": "YsT5134AAAAJ;tLMP3IkAAAAJ;PyzBf5oAAAAJ;BLKHwNwAAAAJ;https://scholar.google.com.tw/citations?user=4dU5KS0AAAAJ", "orcid": ";0009-0006-6055-2546;;;", "linkedin": ";;;;", "or_profile": "~Shi_Luohe1;~Yao_Yao8;~Zuchao_Li1;~Lefei_Zhang1;~hai_zhao1", "aff": "Wuhan University;Shanghai Jiaotong University;Wuhan University;Wuhan University;Shanghai Jiaotong University", "aff_domain": "whu.edu.cn;sjtu.edu.cn;whu.edu.cn;whu.edu.cn;sjtu.edu.cn", "position": "Undergrad student;PhD student;Researcher;Full Professor;Full Professor", "bibtex": "@inproceedings{\nluohe2024reference,\ntitle={Reference Trustable Decoding: A Training-Free Augmentation Paradigm for Large Language Models},\nauthor={Shi Luohe and Yao Yao and Zuchao Li and Lefei Zhang and hai zhao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=QHRLFdhkLu}\n}", "github": "", "reviewers": "vZ3x;wxwA;tphZ", "pdf_size": 526905, "rating": "4;5;6", "confidence": "3;3;4", "soundness": "3;2;3", "novelty": "2;2;3", "presentation": "2;2;3", "wc_summary": "83;124;84", "wc_strengths": "90;79;102", "wc_weaknesses": "93;237;172", "wc_questions": "75;60;5", "wc_limitations": "109;8;9", "wc_review": "450;508;372", "wc_reply_reviewers": "0;66;108", "wc_reply_authors": "198;308;369", "reply_reviewers": "0;1;2", "reply_authors": "2;3;3", "rating_avg": [ 5.0, 0.816496580927726 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 97.0, 19.096247449870006 ], "wc_strengths_avg": [ 90.33333333333333, 9.392668535736915 ], "wc_weaknesses_avg": [ 167.33333333333334, 58.88029287366775 ], "wc_questions_avg": [ 46.666666666666664, 30.09245014211298 ], "wc_limitations_avg": [ 42.0, 47.37791327893902 ], "wc_review_avg": [ 443.3333333333333, 55.7215298505783 ], "wc_reply_reviewers_avg": [ 58.0, 44.45222154178574 ], "wc_reply_authors_avg": [ 291.6666666666667, 70.75937315217602 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:gyUJ7buZPCUJ:scholar.google.com/&scioq=Reference+Trustable+Decoding:+A+Training-Free+Augmentation+Paradigm+for+Large+Language+Models&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": "whu.edu.cn;sjtu.edu.cn;whu.edu.cn;whu.edu.cn;sjtu.edu.cn", "author_num": 5, "aff_unique_index": "0;1;0;0;1", "aff_unique_norm": "Wuhan University;Shanghai Jiao Tong University", "aff_unique_dep": ";", "aff_unique_url": "http://www.whu.edu.cn/;https://www.sjtu.edu.cn", "aff_unique_abbr": "WHU;SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Upping the Game: How 2D U-Net Skip Connections Flip 3D Segmentation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95244", "id": "QI1ScdeQjp", "proceeding": "", "pdf": "https://openreview.net/pdf?id=QI1ScdeQjp", "openreview": "https://openreview.net/forum?id=QI1ScdeQjp", "poster": "/media/PosterPDFs/NeurIPS%202024/95244.png?t=1731334701.6604967", "project": "", "author_site": "Xingru Huang, yihao guo, Jian Huang, Tianyun Zhang, HE HONG, Shaowei Jiang, Yaoqi Sun", "tldr": "", "abstract": "In the present study, we introduce an innovative structure for 3D medical image segmentation that effectively integrates 2D U-Net-derived skip connections into the architecture of 3D convolutional neural networks (3D CNNs). Conventional 3D segmentation techniques predominantly depend on isotropic 3D convolutions for the extraction of volumetric features, which frequently engenders inefficiencies due to the varying information density across the three orthogonal axes in medical imaging modalities such as computed tomography (CT) and magnetic resonance imaging (MRI). This disparity leads to a decline in axial-slice plane feature extraction efficiency, with slice plane features being comparatively underutilized relative to features in the time-axial. To address this issue, we introduce the U-shaped Connection (uC), utilizing simplified 2D U-Net in place of standard skip connections to augment the extraction of the axial-slice plane features while concurrently preserving the volumetric context afforded by 3D convolutions. Based on uC, we further present uC 3DU-Net, an enhanced 3D U-Net backbone that integrates the uC approach to facilitate optimal axial-slice plane feature utilization. Through rigorous experimental validation on five publicly accessible datasets\u2014FLARE2021, OIMHS, FeTA2021, AbdomenCT-1K, and BTCV, the proposed method surpasses contemporary state-of-the-art models. Notably, this performance is achieved while reducing the number of parameters and computational complexity. This investigation underscores the efficacy of incorporating 2D convolutions within the framework of 3D CNNs to overcome the intrinsic limitations of volumetric segmentation, thereby potentially expanding the frontiers of medical image analysis. Our implementation is available at https://github.com/IMOP-lab/U-Shaped-Connection.", "keywords": "3D medical image segmentation;Anisotropic voxel spacing;Skip connection;Plane feature extraction;Multiscale feature fusion", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "", "author": "Xingru Huang;Yihao Guo;Jian Huang;Tianyun Zhang;HE HONG;Shaowei Jiang;Yaoqi Sun", "authorids": "~Xingru_Huang1;~Yihao_Guo2;~Jian_Huang9;~Tianyun_Zhang3;~HE_HONG1;~Shaowei_Jiang1;~Yaoqi_Sun1", "gender": "Non-Binary;Non-Binary;;M;M;Non-Binary;M", "homepage": ";;https://faculty.hdu.edu.cn/;;;;", "dblp": ";;;;226/6091;;", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;;;Eye3vKkAAAAJ;https://scholar.google.com.hk/citations?user=G9Sdb2AAAAAJ;dfeLyycAAAAJ;", "orcid": "0000-0003-3971-8434;0009-0001-6863-6852;;;0000-0001-8874-241X;0009-0001-1679-3047;0009-0004-3215-9603", "linkedin": ";;;;;;", "or_profile": "~Xingru_Huang1;~Yihao_Guo2;~HE_HONG1;~Shaowei_Jiang1;~Yaoqi_Sun1;~J_Huang2;~TIANYUN_ZHANG2", "aff": "Hangzhou Dianzi University;Hangzhou Dianzi University;Dalian University;Hangzhou Dianzi University;Hangzhou Dianzi University;Hangzhou Dianzi University;Hangzhou Dianzi University", "aff_domain": "hdu.edu.cn;hdu.edu.cn;dlu.edu.cn;hdu.edu.cn;hdu.edu.cn;hdu.edu.cn;hdu.edu.cn", "position": "Full Professor;MS student;Associate Professor;Full Professor;PhD student;MS student;MS student", "bibtex": "@inproceedings{\nhuang2024upping,\ntitle={Upping the Game: How 2D U-Net Skip Connections Flip 3D Segmentation},\nauthor={Xingru Huang and Yihao Guo and Jian Huang and Tianyun Zhang and HE HONG and Shaowei Jiang and Yaoqi Sun},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=QI1ScdeQjp}\n}", "github": "", "reviewers": "WWao;Lrjd;7CRU;J8Kk", "pdf_size": 2605814, "rating": "5;5;7;7", "confidence": "5;4;3;4", "soundness": "2;2;3;3", "novelty": "2;2;3;3", "presentation": "2;3;3;4", "wc_summary": "19;90;23;48", "wc_strengths": "21;103;31;46", "wc_weaknesses": "331;280;39;33", "wc_questions": "3;56;40;29", "wc_limitations": "3;4;12;12", "wc_review": "377;533;145;168", "wc_reply_reviewers": "42;37;37;35", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 45.0, 28.257742301889582 ], "wc_strengths_avg": [ 50.25, 31.728339067779768 ], "wc_weaknesses_avg": [ 170.75, 135.96759724287253 ], "wc_questions_avg": [ 32.0, 19.300259065618782 ], "wc_limitations_avg": [ 7.75, 4.264680527307995 ], "wc_review_avg": [ 305.75, 159.3225894215883 ], "wc_reply_reviewers_avg": [ 37.75, 2.5860201081971503 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.7071067811865475, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13011645766689694343&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 2, "email": "hdu.edu.cn;hdu.edu.cn;dlu.edu.cn;hdu.edu.cn;hdu.edu.cn;hdu.edu.cn;hdu.edu.cn", "author_num": 7, "aff_unique_index": "0;0;1;0;0;0;0", "aff_unique_norm": "Hangzhou Dianzi University;Dalian University", "aff_unique_dep": ";", "aff_unique_url": "http://www.hdu.edu.cn/;http://www.dlu.edu.cn", "aff_unique_abbr": "HGHDU;DLU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Text to Blind Motion", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97700", "id": "QIJQ1qCGqV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=QIJQ1qCGqV", "openreview": "https://openreview.net/forum?id=QIJQ1qCGqV", "poster": "/media/PosterPDFs/NeurIPS%202024/97700.png?t=1733902890.9176319", "project": "", "author_site": "Hee Jae Kim, Kathakoli Sengupta, Masaki Kuribayashi, Hernisa Kacorri, Eshed Ohn-Bar", "tldr": "", "abstract": "People who are blind perceive the world differently than those who are sighted, which can result in distinct motion characteristics. For instance, when crossing at an intersection, blind individuals may have different patterns of movement, such as veering more from a straight path or using touch-based exploration around curbs and obstacles. These behaviors may appear less predictable to motion models embedded in technologies such as autonomous vehicles. Yet, the ability of 3D motion models to capture such behavior has not been previously studied, as existing datasets for 3D human motion currently lack diversity and are biased toward people who are sighted. In this work, we introduce BlindWays, the first multimodal motion benchmark for pedestrians who are blind. We collect 3D motion data using wearable sensors with 11 blind participants navigating eight different routes in a real-world urban setting. Additionally, we provide rich textual descriptions that capture the distinctive movement characteristics of blind pedestrians and their interactions with both the navigation aid (e.g., a white cane or a guide dog) and the environment. We benchmark state-of-the-art 3D human prediction models, finding poor performance with off-the-shelf and pre-training-based methods for our novel task. To contribute toward safer and more reliable systems that can seamlessly reason over diverse human movements in their environments, our text-and-motion benchmark is available at https://blindways.github.io/.", "keywords": "3D human modeling;pedestrian navigation;blind;mobility;accessibility", "primary_area": "", "supplementary_material": "", "author": "Hee Jae Kim;Kathakoli Sengupta;Masaki Kuribayashi;Hernisa Kacorri;Eshed Ohn-Bar", "authorids": "~Hee_Jae_Kim1;~Kathakoli_Sengupta1;~Masaki_Kuribayashi1;~Hernisa_Kacorri1;~Eshed_Ohn-Bar4", "gender": "F;F;M;F;Not Specified", "homepage": "https://hee-jae-kim.github.io/;;https://www.masakikuribayashi.com/;https://hernisakacorri.com/;https://eshed1.github.io/", "dblp": "130/9539;;;13/8237.html;121/0305", "google_scholar": "9i7QbK0AAAAJ;VLbRHNAAAAAJ;fdEcI_MAAAAJ;El-R5MEAAAAJ;p9zVBV4AAAAJ", "orcid": ";;;0000-0002-7798-308X;", "linkedin": "https://linkedin.com/in/hee-jae-kim-aa45261a0;kathakolisengupta/;masaki-kuribayashi/;;", "or_profile": "~Hee_Jae_Kim1;~Kathakoli_Sengupta1;~Masaki_Kuribayashi1;~Hernisa_Kacorri1;~Eshed_Ohn-Bar4", "aff": "Boston University, Boston University;Boston University, Boston University;Waseda University;University of Maryland, College Park;Boston University", "aff_domain": "bu.edu;bu.edu;waseda.jp;umd.edu;bu.edu", "position": "PhD student;MS student;PhD student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nkim2024text,\ntitle={Text to Blind Motion},\nauthor={Hee Jae Kim and Kathakoli Sengupta and Masaki Kuribayashi and Hernisa Kacorri and Eshed Ohn-Bar},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=QIJQ1qCGqV}\n}", "github": "", "reviewers": "c6UN;Z4ZQ;S3wn", "pdf_size": 810101, "rating": "6;7;8", "confidence": "4;4;5", "wc_summary_and_contributions": "36;66;45", "wc_strengths": "6;43;26", "wc_improvement": "83;183;52", "wc_limitations": "4;3;28", "wc_correctness": "6;8;6", "wc_clarity": "5;6;11", "wc_relation_to_prior_work": "7;6;14", "wc_documentation": "4;11;22", "wc_additional_feedback": "1;1;1", "wc_review": "152;327;205", "wc_reply_reviewers": "13;0;20", "wc_reply_authors": "57;0;0", "reply_reviewers": "1;0;1", "reply_authors": "4;2;1", "rating_avg": [ 7.0, 0.816496580927726 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 49.0, 12.569805089976535 ], "wc_strengths_avg": [ 25.0, 15.121728296285006 ], "wc_improvement_avg": [ 106.0, 55.89871793401586 ], "wc_limitations_avg": [ 11.666666666666666, 11.556623882239812 ], "wc_correctness_avg": [ 6.666666666666667, 0.9428090415820634 ], "wc_clarity_avg": [ 7.333333333333333, 2.6246692913372702 ], "wc_relation_to_prior_work_avg": [ 9.0, 3.559026084010437 ], "wc_documentation_avg": [ 12.333333333333334, 7.408703590297623 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 228.0, 73.27118578722926 ], "wc_reply_reviewers_avg": [ 11.0, 8.286535263104035 ], "wc_reply_authors_avg": [ 19.0, 26.870057685088806 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 1.247219128924647 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:yLX-YGlc-oMJ:scholar.google.com/&scioq=Text+to+Blind+Motion&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "bu.edu;bu.edu;waseda.jp;umd.edu;bu.edu", "author_num": 5, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Boston University;Waseda University;University of Maryland", "aff_unique_dep": ";;", "aff_unique_url": "https://www.bu.edu;https://www.waseda.jp/top;https://www/umd.edu", "aff_unique_abbr": "BU;Waseda;UMD", "aff_campus_unique_index": "0;0;2", "aff_campus_unique": "Boston;;College Park", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "United States;Japan" }, { "title": "Understanding the Expressivity and Trainability of Fourier Neural Operator: A Mean-Field Perspective", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95243", "id": "QJr02BTM7J", "proceeding": "", "pdf": "https://openreview.net/pdf?id=QJr02BTM7J", "openreview": "https://openreview.net/forum?id=QJr02BTM7J", "poster": "/media/PosterPDFs/NeurIPS%202024/95243.png?t=1731731156.7062256", "project": "", "author_site": "Takeshi Koshizuka, Masahiro Fujisawa, Yusuke Tanaka, Issei Sato", "tldr": "", "abstract": "In this paper, we explores the expressivity and trainability of the Fourier Neural Operator (FNO). We establish a mean-field theory for the FNO, analyzing the behavior of the random FNO from an \\emph{edge of chaos} perspective. Our investigation into the expressivity of a random FNO involves examining the ordered-chaos phase transition of the network based on the weight distribution. This phase transition demonstrates characteristics unique to the FNO, induced by mode truncation, while also showcasing similarities to those of densely connected networks. Furthermore, we identify a connection between expressivity and trainability: the ordered and chaotic phases correspond to regions of vanishing and exploding gradients, respectively. This finding provides a practical prerequisite for the stable training of the FNO. Our experimental results corroborate our theoretical findings.", "keywords": "Fourier Neural Operator;PDE;Mean-Field Theory", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "", "author": "Takeshi Koshizuka;Masahiro Fujisawa;Yusuke Tanaka;Issei Sato", "authorids": "~Takeshi_Koshizuka1;~Masahiro_Fujisawa1;~Yusuke_Tanaka1;~Issei_Sato2", "gender": "M;M;M;", "homepage": "https://sites.google.com/view/takeshi-koshizuka/home;https://msfuji0211.github.io/;https://sites.google.com/view/yusuketanaka/english;https://www.ml.is.s.u-tokyo.ac.jp/issei-sato-en", "dblp": ";236/6307;34/2327-2;", "google_scholar": "https://scholar.google.co.jp/citations?hl=en;gS24jX8AAAAJ;https://scholar.google.co.jp/citations?user=leMnxA4AAAAJ;", "orcid": ";;0000-0002-7316-1425;", "linkedin": "takeshi-koshizuka-837938188/?originalSubdomain=jp;;;", "or_profile": "~Takeshi_Koshizuka1;~Masahiro_Fujisawa1;~Yusuke_Tanaka1;~Issei_Sato2", "aff": "The University of Tokyo;RIKEN;NTT;The University of Tokyo", "aff_domain": "g.ecc.u-tokyo.ac.jp;riken.jp;ntt.com;u-tokyo.ac.jp", "position": "PhD student;Special Postdoctoral Researcher;Researcher;Full Professor", "bibtex": "@inproceedings{\nkoshizuka2024understanding,\ntitle={Understanding the Expressivity and Trainability of Fourier Neural Operator: A Mean-Field Perspective},\nauthor={Takeshi Koshizuka and Masahiro Fujisawa and Yusuke Tanaka and Issei Sato},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=QJr02BTM7J}\n}", "github": "", "reviewers": "ePA1;gSyo;Jbgf;xhAf", "pdf_size": 8210134, "rating": "5;6;7;8", "confidence": "3;3;3;4", "soundness": "2;3;3;3", "novelty": "2;3;3;4", "presentation": "2;3;3;3", "wc_summary": "31;130;25;85", "wc_strengths": "42;78;70;160", "wc_weaknesses": "275;171;82;127", "wc_questions": "56;307;30;59", "wc_limitations": "49;7;1;6", "wc_review": "453;693;208;437", "wc_reply_reviewers": "33;42;31;99", "wc_reply_authors": "363;46;73;55", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 67.75, 42.868257487329714 ], "wc_strengths_avg": [ 87.5, 43.940300408622605 ], "wc_weaknesses_avg": [ 163.75, 71.52403442200391 ], "wc_questions_avg": [ 113.0, 112.57219905465115 ], "wc_limitations_avg": [ 15.75, 19.330998422223306 ], "wc_review_avg": [ 447.75, 171.5887161208452 ], "wc_reply_reviewers_avg": [ 51.25, 27.87808278917329 ], "wc_reply_authors_avg": [ 134.25, 132.42615866965258 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.7745966692414834, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:iVJONtz_lJwJ:scholar.google.com/&scioq=Understanding+the+Expressivity+and+Trainability+of+Fourier+Neural+Operator:+A+Mean-Field+Perspective&hl=en&as_sdt=0,33", "gs_version_total": 5, "email": "g.ecc.u-tokyo.ac.jp;riken.jp;ntt.com;u-tokyo.ac.jp", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of Tokyo;RIKEN;NTT Corporation", "aff_unique_dep": ";;", "aff_unique_url": "https://www.u-tokyo.ac.jp;https://www.riken.jp;https://www.ntt.co.jp", "aff_unique_abbr": "UTokyo;RIKEN;NTT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Japan" }, { "title": "DeeR-VLA: Dynamic Inference of Multimodal Large Language Models for Efficient Robot Execution", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95242", "id": "QKp3nhPU41", "proceeding": "", "pdf": "https://openreview.net/pdf?id=QKp3nhPU41", "openreview": "https://openreview.net/forum?id=QKp3nhPU41", "poster": "", "project": "", "author_site": "Yang Yue, Yulin Wang, Bingyi Kang, Yizeng Han, Shenzhi Wang, Shiji Song, Jiashi Feng, Gao Huang", "tldr": "", "abstract": "Multimodal Large Language Models (MLLMs) have demonstrated remarkable comprehension and reasoning capabilities with complex language and visual data.\nThese advances have spurred the vision of establishing a generalist robotic MLLM proficient in understanding complex human instructions and accomplishing various embodied tasks, whose feasibility has been recently verified~\\cite{rt-2,rt-x}.\nHowever, developing MLLMs for real-world robots is challenging due to the typically limited computation and memory capacities available on robotic platforms. \nIn contrast, the inference of MLLMs usually incorporates storing billions of parameters and performing tremendous computation, imposing significant hardware demands.\nIn our paper, we seek to address this challenge by leveraging an intriguing observation: relatively easier situations make up the bulk of the procedure of controlling robots to fulfill diverse tasks, and they generally require far smaller models to obtain the correct robotic actions.\nMotivated by this observation, we propose a \\emph{Dynamic\nEarly-Exit for Robotic MLLM} (DeeR) framework that automatically adjusts the size of the activated MLLM based on each situation at hand. \nThe approach leverages a multi-exit architecture in MLLMs, which allows the model to cease processing once a proper size of the model has been activated for a specific situation, thus avoiding further redundant computation. \nAdditionally, we develop novel algorithms that establish early-termination criteria for DeeR, conditioned on predefined demands such as average computational cost (\\emph{i.e.}, power consumption), as well as peak computational consumption (\\emph{i.e.}, latency) and GPU memory usage. These enhancements ensure that DeeR operates efficiently under varying resource constraints while maintaining competitive performance.\nMoreover, we design a tailored training method for integrating temporal information on top of such multi-exit architectures to predict actions reasonably. \nOn the CALVIN robot manipulation benchmark, DeeR demonstrates significant reductions in computational costs by 5.2-6.5x and GPU memory by 2x without compromising performance.\nCode and checkpoints are available at https://github.com/yueyang130/DeeR-VLA.", "keywords": "embodied AI;dynamic network;CALVIN benchmark;multimodal large language model;robotics", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/61aaeb342d459077610f18a2e41ef2a8a5d567a1.zip", "author": "Yang Yue;Yulin Wang;Bingyi Kang;Yizeng Han;Shenzhi Wang;Shiji Song;Jiashi Feng;Gao Huang", "authorids": "~Yang_Yue1;~Yulin_Wang1;~Bingyi_Kang1;~Yizeng_Han1;~Shenzhi_Wang1;~Shiji_Song1;~Jiashi_Feng1;~Gao_Huang1", "gender": ";M;;M;;M;;M", "homepage": ";https://www.wyl.cool/;https://bingykang.github.io/;https://yizenghan.top/;;;;http://www.gaohuang.net", "dblp": ";;;217/9548;;72/5351;;", "google_scholar": ";gBP38gcAAAAJ;https://scholar.google.com.sg/citations?user=NmHgX-wAAAAJ;25mubAsAAAAJ;;;;-P9LwcgAAAAJ", "orcid": ";0000-0002-1363-0234;;;;;;", "linkedin": ";;;;;;;", "or_profile": "~Yang_Yue1;~Yulin_Wang1;~Bingyi_Kang1;~Yizeng_Han1;~Shenzhi_Wang1;~Shiji_Song1;~Jiashi_Feng1;~Gao_Huang1", "aff": ";Tsinghua University;Bytedance;Tsinghua University;;Tsinghua University;;Tsinghua University", "aff_domain": ";tsinghua.edu.cn;bytedance.com;tsinghua.edu.cn;;mail.tsinghua.edu.cn;;tsinghua.edu.cn", "position": ";PhD student;Researcher;PhD student;;Full Professor;;Associate Professor", "bibtex": "@inproceedings{\nyue2024deervla,\ntitle={DeeR-{VLA}: Dynamic Inference of Multimodal Large Language Models for Efficient Robot Execution},\nauthor={Yang Yue and Yulin Wang and Bingyi Kang and Yizeng Han and Shenzhi Wang and Shiji Song and Jiashi Feng and Gao Huang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=QKp3nhPU41}\n}", "github": "", "reviewers": "4zeN;Cni2;LFFp;4SAr", "pdf_size": 1764677, "rating": "5;7;7;7", "confidence": "4;4;4;5", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "85;94;41;70", "wc_strengths": "101;72;91;39", "wc_weaknesses": "74;111;199;86", "wc_questions": "69;61;5;92", "wc_limitations": "52;100;1;11", "wc_review": "381;438;337;298", "wc_reply_reviewers": "0;56;165;0", "wc_reply_authors": "70;49;421;28", "reply_reviewers": "0;1;2;0", "reply_authors": "2;2;4;2", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 72.5, 20.1059692628831 ], "wc_strengths_avg": [ 75.75, 23.636571240347024 ], "wc_weaknesses_avg": [ 117.5, 48.91063279083598 ], "wc_questions_avg": [ 56.75, 31.971667144520318 ], "wc_limitations_avg": [ 41.0, 39.05764969887461 ], "wc_review_avg": [ 363.5, 52.0792665078916 ], "wc_reply_reviewers_avg": [ 55.25, 67.36235966769573 ], "wc_reply_authors_avg": [ 142.0, 161.76371657451494 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13041339969740064881&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 4, "email": ";tsinghua.edu.cn;bytedance.com;tsinghua.edu.cn;;mail.tsinghua.edu.cn;;tsinghua.edu.cn", "author_num": 8, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "Tsinghua University;ByteDance", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.bytedance.com", "aff_unique_abbr": "THU;Bytedance", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "FUSU: A Multi-temporal-source Land Use Change Segmentation Dataset for Fine-grained Urban Semantic Understanding", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97699", "id": "QLO0pXYKVi", "proceeding": "", "pdf": "https://openreview.net/pdf?id=QLO0pXYKVi", "openreview": "https://openreview.net/forum?id=QLO0pXYKVi", "poster": "", "project": "", "author_site": "Shuai Yuan, Guancong Lin, Lixian Zhang, Runmin Dong, Jinxiao Zhang, Shuang Chen, Juepeng Zheng, Jie Wang, Haohuan Fu", "tldr": "", "abstract": "Fine urban change segmentation using multi-temporal remote sensing images is essential for understanding human-environment interactions in urban areas. Although there have been advances in high-quality land cover datasets that reveal the physical features of urban landscapes, the lack of fine-grained land use datasets hinders a deeper understanding of how human activities are distributed across landscapes and the impact of these activities on the environment, thus constraining proper technique development. To address this, we introduce FUSU, the first fine-grained land use change segmentation dataset for Fine-grained Urban Semantic Understanding. FUSU features the most detailed land use classification system to date, with 17 classes and 30 billion pixels of annotations. It includes bi-temporal high-resolution satellite images with 0.2-0.5 m ground sample distance and monthly optical and radar satellite time series, covering 847 km^2 across five urban areas in the southern and northern of China with different geographical features. The fine-grained land use pixel-wise annotations and high spatial-temporal resolution data provide a robust foundation for developing proper deep learning models to provide contextual insights on human activities and urbanization. To fully leverage FUSU, we propose a unified time-series architecture for both change detection and segmentation. We benchmark FUSU on various methods for several tasks. Dataset and code are available at: https://github.com/yuanshuai0914/FUSU.", "keywords": "Change Segmentation;fine-grained land use;multi-source;multi-temporal", "primary_area": "", "supplementary_material": "", "author": "Shuai Yuan;Guancong Lin;Lixian Zhang;Runmin Dong;Jinxiao Zhang;Shuang Chen;Juepeng Zheng;Jie Wang;Haohuan Fu", "authorids": "~Shuai_Yuan1;~Guancong_Lin1;~Lixian_Zhang1;~Runmin_Dong1;~Jinxiao_Zhang1;~Shuang_Chen2;~Juepeng_Zheng1;~Jie_Wang51;~Haohuan_Fu1", "gender": "M;M;M;F;F;;M;M;M", "homepage": ";https://rssysu.github.io/;;https://scholar.google.com/citations?hl=zh-CN&user=6J2t4zEAAAAJ;http://www.thuhpgc.net/mediawiki/index.php/Jinxiao_Zhang;;https://rssysu.github.io/;;", "dblp": "19/1243-5;;45/2915;235/5351;212/4286;;237/5258;;71/3657", "google_scholar": "https://scholar.google.com.hk/citations?hl=zh-TW;;wL_uBCQAAAAJ;https://scholar.google.com/citations?hl=zh-CN;aB_HZKIAAAAJ;;xrb-A3kAAAAJ;Wl9-ytMAAAAJ;hbMx114AAAAJ", "orcid": "0000-0002-8942-0145;;;0000-0002-2999-2029;;;0000-0002-4403-593X;0000-0002-9663-3165;", "linkedin": ";;;;;shuangchen17/;;;", "or_profile": "~Shuai_Yuan1;~Guancong_Lin1;~Lixian_Zhang1;~Runmin_Dong1;~Jinxiao_Zhang1;~Shuang_Chen2;~Juepeng_Zheng1;~Jie_Wang51;~Haohuan_Fu1", "aff": "The University of Hong Kong;Sun Yat-Sen University;Tsinghua University;Tsinghua University;Tsinghua University;University of Hong Kong;Sun Yat-Sen University;Pengcheng Laboratory;Tsinghua University", "aff_domain": "hku.hk;mail2.sysu.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;hku.hk;mail.sysu.edu.cn;pcl.ac.cn;tsinghua.edu.cn", "position": "PhD student;Undergrad student;PhD student;Postdoc;PhD student;PhD student;Assistant Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nyuan2024fusu,\ntitle={{FUSU}: A Multi-temporal-source Land Use Change Segmentation Dataset for Fine-grained Urban Semantic Understanding},\nauthor={Shuai Yuan and Guancong Lin and Lixian Zhang and Runmin Dong and Jinxiao Zhang and Shuang Chen and Juepeng Zheng and Jie Wang and Haohuan Fu},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=QLO0pXYKVi}\n}", "github": "", "reviewers": "xrGX;ozMH;HuM6", "pdf_size": 4863750, "rating": "5;6;8", "confidence": "5;4;4", "wc_summary_and_contributions": "66;27;113", "wc_strengths": "34;29;55", "wc_improvement": "152;21;191", "wc_limitations": "77;14;21", "wc_correctness": "104;3;9", "wc_clarity": "53;3;6", "wc_relation_to_prior_work": "14;3;17", "wc_documentation": "14;6;19", "wc_additional_feedback": "1;1;1", "wc_review": "515;107;432", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "2;1;2", "rating_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 68.66666666666667, 35.15995196558469 ], "wc_strengths_avg": [ 39.333333333333336, 11.2644968324772 ], "wc_improvement_avg": [ 121.33333333333333, 72.71099565326341 ], "wc_limitations_avg": [ 37.333333333333336, 28.193773938387334 ], "wc_correctness_avg": [ 38.666666666666664, 46.26253583864835 ], "wc_clarity_avg": [ 20.666666666666668, 22.895899681432525 ], "wc_relation_to_prior_work_avg": [ 11.333333333333334, 6.018490028422597 ], "wc_documentation_avg": [ 13.0, 5.354126134736337 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 351.3333333333333, 176.0612267239881 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.7559289460184544, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17557824560139477394&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "hku.hk;mail2.sysu.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;hku.hk;mail.sysu.edu.cn;pcl.ac.cn;tsinghua.edu.cn", "author_num": 9, "aff_unique_index": "0;1;2;2;2;0;1;3;2", "aff_unique_norm": "University of Hong Kong;Sun Yat-sen University;Tsinghua University;Pengcheng Laboratory", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.hku.hk;http://www.sysu.edu.cn/;https://www.tsinghua.edu.cn;", "aff_unique_abbr": "HKU;SYSU;THU;", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Generate Universal Adversarial Perturbations for Few-Shot Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95241", "id": "QLRO8o4bol", "proceeding": "", "pdf": "https://openreview.net/pdf?id=QLRO8o4bol", "openreview": "https://openreview.net/forum?id=QLRO8o4bol", "poster": "/media/PosterPDFs/NeurIPS%202024/95241.png?t=1731316622.1706097", "project": "", "author_site": "Yiman Hu, Yixiong Zou, Ruixuan Li, Yuhua Li", "tldr": "", "abstract": "Deep networks are known to be vulnerable to adversarial examples which are deliberately designed to mislead the trained model by introducing imperceptible perturbations to input samples. Compared to traditional perturbations crafted specifically for each data point, Universal Adversarial Perturbations (UAPs) are input-agnostic and shown to be more practical in the real world. However, UAPs are typically generated in a close-set scenario that shares the same classification task during the training and testing phases. This paper demonstrates the ineffectiveness of traditional UAPs in open-set scenarios like Few-Shot Learning (FSL). Through analysis, we identify two primary challenges that hinder the attacking process: the task shift and the semantic shift. To enhance the transferability of UAPs in FSL, we propose a unifying attacking framework addressing these two shifts. The task shift is addressed by aligning proxy tasks to the downstream tasks, while the semantic shift is handled by leveraging the generalizability of pre-trained encoders.The proposed Few-Shot Attacking FrameWork, denoted as FSAFW, can effectively generate UAPs across various FSL training paradigms and different downstream tasks. Our approach not only sets a new standard for state-of-the-art works but also significantly enhances attack performance, exceeding the baseline method by over 16\\%.", "keywords": "Adversarial Attacks;Universal Adversarial Perturbations;Few-Shot Learning", "primary_area": "machine_vision", "supplementary_material": "", "author": "Yiman Hu;Yixiong Zou;Ruixuan Li;Yuhua Li", "authorids": "~Yiman_Hu1;~Yixiong_Zou1;~Ruixuan_Li1;~Yuhua_Li2", "gender": ";;M;F", "homepage": ";;http://idc.hust.edu.cn/rxli/index.html;", "dblp": ";;60/4429.html;79/5796-3", "google_scholar": ";;https://scholar.google.com/scholar?q=ruixuan+li;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";;0000-0002-7791-5511;", "linkedin": ";;https://www.linkedin.cn/incareer/in/ruixuan-li-b367319;", "or_profile": "~Yiman_Hu1;~Yixiong_Zou1;~Ruixuan_Li1;~Yuhua_Li2", "aff": ";;Huazhong University of Science and Technology;Huazhong University of Science and Technology", "aff_domain": ";;hust.edu.cn;hust.edu.cn", "position": ";;Full Professor;Full Professor", "bibtex": "@inproceedings{\nhu2024generate,\ntitle={Generate Universal Adversarial Perturbations for Few-Shot Learning},\nauthor={Yiman Hu and Yixiong Zou and Ruixuan Li and Yuhua Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=QLRO8o4bol}\n}", "github": "", "reviewers": "yKwa;gmrg;L9xm;id8n", "pdf_size": 1857150, "rating": "5;5;6;6", "confidence": "5;3;3;3", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "1;3;3;2", "wc_summary": "59;85;51;106", "wc_strengths": "12;43;30;47", "wc_weaknesses": "345;92;127;65", "wc_questions": "42;38;97;97", "wc_limitations": "44;4;1;8", "wc_review": "502;262;306;323", "wc_reply_reviewers": "35;0;14;0", "wc_reply_authors": "67;44;23;44", "reply_reviewers": "2;0;1;0", "reply_authors": "3;2;2;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 75.25, 21.75287337341897 ], "wc_strengths_avg": [ 33.0, 13.656500283747663 ], "wc_weaknesses_avg": [ 157.25, 110.60374089514333 ], "wc_questions_avg": [ 68.5, 28.53506614676055 ], "wc_limitations_avg": [ 14.25, 17.354754391808605 ], "wc_review_avg": [ 348.25, 91.51605050481582 ], "wc_reply_reviewers_avg": [ 12.25, 14.324367350776788 ], "wc_reply_authors_avg": [ 44.5, 15.56438241627338 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:hUUl6dV4-tQJ:scholar.google.com/&scioq=Generate+Universal+Adversarial+Perturbations+for+Few-Shot+Learning&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": ";;hust.edu.cn;hust.edu.cn", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Huazhong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.hust.edu.cn", "aff_unique_abbr": "HUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "SSDiff: Spatial-spectral Integrated Diffusion Model for Remote Sensing Pansharpening", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95240", "id": "QMVydwvrx7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=QMVydwvrx7", "openreview": "https://openreview.net/forum?id=QMVydwvrx7", "poster": "/media/PosterPDFs/NeurIPS%202024/95240.png?t=1731306252.7861748", "project": "", "author_site": "Yu Zhong, Xiao Wu, Liang-Jian Deng, ZiHan Cao, Hong-Xia Dou", "tldr": "", "abstract": "Pansharpening is a significant image fusion technique that merges the spatial content and spectral characteristics of remote sensing images to generate high-resolution multispectral images. Recently, denoising diffusion probabilistic models have been gradually applied to visual tasks, enhancing controllable image generation through low-rank adaptation (LoRA). In this paper, we introduce a spatial-spectral integrated diffusion model for the remote sensing pansharpening task, called SSDiff, which considers the pansharpening process as the fusion process of spatial and spectral components from the perspective of subspace decomposition. Specifically, SSDiff utilizes spatial and spectral branches to learn spatial details and spectral features separately, then employs a designed alternating projection fusion module (APFM) to accomplish the fusion. Furthermore, we propose a frequency modulation inter-branch module (FMIM) to modulate the frequency distribution between branches. The two components of SSDiff can perform favorably against the APFM when utilizing a LoRA-like branch-wise alternative fine-tuning method. It refines SSDiff to capture component-discriminating features more sufficiently. Finally, extensive experiments on four commonly used datasets, i.e., WorldView-3, WorldView-2, GaoFen-2, and QuickBird, demonstrate the superiority of SSDiff both visually and quantitatively. The code is available at https://github.com/Z-ypnos/SSdiff_main.", "keywords": "denoising diffusion model;LoRA;Multi-source image fusion;Pansharpening", "primary_area": "machine_vision", "supplementary_material": "", "author": "Yu Zhong;Xiao Wu;Liang-Jian Deng;Zihan Cao;Hong-Xia Dou", "authorids": "~Yu_Zhong4;~Xiao_Wu6;~Liang-Jian_Deng2;~Zihan_Cao1;~Hong-Xia_Dou1", "gender": "M;M;M;M;F", "homepage": ";https://xiaoxiao-woo.github.io/;https://liangjiandeng.github.io/;https://294coder.github.io/;", "dblp": "84/4962;;136/7368;235/8988;210/0039.html", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;-aFhoQgAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=zh-CN;", "orcid": "0009-0003-3106-1113;0000-0002-1259-8674;;0000-0002-7532-2122;", "linkedin": ";;;;", "or_profile": "~Yu_Zhong4;~Xiao_Wu6;~Liang-Jian_Deng2;~Zihan_Cao1;~Hong-Xia_Dou1", "aff": "University of Electronic Science and Technology of China;University of Electronic Science and Technology of China;University of Electronic Science and Technology of China;University of Electronic Science and Technology of China;Xihua University", "aff_domain": "uestc.edu;uestc.edu;uestc.edu.cn;uestc.edu.cn;xhu.edu.cn", "position": "MS student;PhD student;Full Professor;MS student;Associate Professor", "bibtex": "@inproceedings{\nzhong2024ssdiff,\ntitle={{SSD}iff: Spatial-spectral Integrated Diffusion Model for Remote Sensing Pansharpening},\nauthor={Yu Zhong and Xiao Wu and Liang-Jian Deng and Zihan Cao and Hong-Xia Dou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=QMVydwvrx7}\n}", "github": "", "reviewers": "Noom;kLBw;qNHH", "pdf_size": 4235200, "rating": "6;6;7", "confidence": "4;4;4", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;2;3", "wc_summary": "78;166;119", "wc_strengths": "57;155;84", "wc_weaknesses": "111;92;94", "wc_questions": "163;47;44", "wc_limitations": "18;80;9", "wc_review": "427;540;350", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 121.0, 35.95367389665021 ], "wc_strengths_avg": [ 98.66666666666667, 41.330645073870095 ], "wc_weaknesses_avg": [ 99.0, 8.524474568362947 ], "wc_questions_avg": [ 84.66666666666667, 55.40356988097026 ], "wc_limitations_avg": [ 35.666666666666664, 31.562988170042175 ], "wc_review_avg": [ 439.0, 78.02990879570902 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14849542031477505125&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "uestc.edu;uestc.edu;uestc.edu.cn;uestc.edu.cn;xhu.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "University of Electronic Science and Technology of China;Xihua University", "aff_unique_dep": ";", "aff_unique_url": "https://www.uestc.edu.cn;http://www.xihua.edu.cn", "aff_unique_abbr": "UESTC;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Aligning Audio-Visual Joint Representations with an Agentic Workflow", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95239", "id": "QMaLS4VeY3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=QMaLS4VeY3", "openreview": "https://openreview.net/forum?id=QMaLS4VeY3", "poster": "", "project": "", "author_site": "Shentong Mo, Yibing Song", "tldr": "", "abstract": "Visual content and accompanied audio signals naturally formulate a joint representation to improve audio-visual (AV) related applications. While studies develop various AV representation learning frameworks, the importance of AV data alignment is usually undermined for achieving high-quality representation. We observe that an audio signal may contain background noise interference. Also, non-synchronization may appear between audio and video streams. These non-strict data alignment limits representation quality and downgrade application performance. In this paper, we propose to improve AV joint representations from a data-centric perspective by aligning audio signals to visual data. Our alignment is conducted in an agentic workflow controlled by an LLM-based assistant named AVAgent. For each input AV data pair, our AVAgent uses a multi-modal LLM to convert audio and visual data into language descriptions separately (i.e., tool use). Then, AVAgent reasons whether this paired data is aligned well and plans to edit the audio signal if needed (i.e., planning). The audio editing is executed by predefined actions that filter noise or augment data. Moreover, we use a VLM to evaluate how modified audio signals match the visual content and provide feedback to AVAgent (i.e., reflection). The tool use, planning, and reflection steps operate cyclically to become an agentic workflow where audio signals are gradually aligned to visual content. To this end, existing methods can directly leverage the aligned AV data via our agentic workflow to improve AV joint representations. The experimental results comprehensively demonstrate the state-of-the-art performance of the proposed approach against previous baselines in diverse downstream tasks.", "keywords": "Audio-visual Learning;LLM Agent", "primary_area": "machine_vision", "supplementary_material": "", "author": "Shentong Mo;Yibing Song", "authorids": "~Shentong_Mo1;~Yibing_Song1", "gender": ";", "homepage": ";https://ybsong00.github.io/", "dblp": ";77/2117", "google_scholar": ";oRhJHmIAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Shentong_Mo1;~Yibing_Song1", "aff": ";Alibaba DAMO Academy", "aff_domain": ";alibaba-inc.com", "position": ";Staff Scientist", "bibtex": "@inproceedings{\nmo2024aligning,\ntitle={Aligning Audio-Visual Joint Representations with an Agentic Workflow},\nauthor={Shentong Mo and Yibing Song},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=QMaLS4VeY3}\n}", "github": "", "reviewers": "GoMh;siDn;RJkZ;Jci9", "pdf_size": 2080824, "rating": "3;5;5;5", "confidence": "4;3;4;4", "soundness": "1;2;3;2", "novelty": "1;2;3;3", "presentation": "3;3;3;2", "wc_summary": "94;69;89;36", "wc_strengths": "93;74;77;31", "wc_weaknesses": "223;53;98;231", "wc_questions": "57;86;131;137", "wc_limitations": "5;24;8;53", "wc_review": "472;306;403;488", "wc_reply_reviewers": "0;63;54;42", "wc_reply_authors": "118;227;176;30", "reply_reviewers": "0;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.0, 0.7071067811865476 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 72.0, 22.792542640082964 ], "wc_strengths_avg": [ 68.75, 22.960564017462637 ], "wc_weaknesses_avg": [ 151.25, 77.45442208163456 ], "wc_questions_avg": [ 102.75, 32.95735881407975 ], "wc_limitations_avg": [ 22.5, 19.03286631067428 ], "wc_review_avg": [ 417.25, 71.7334475680627 ], "wc_reply_reviewers_avg": [ 39.75, 24.128561913218117 ], "wc_reply_authors_avg": [ 137.75, 73.19281043927744 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:sOr3bkt6jfQJ:scholar.google.com/&scioq=Aligning+Audio-Visual+Joint+Representations+with+an+Agentic+Workflow&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": ";alibaba-inc.com", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Alibaba Group", "aff_unique_dep": "DAMO Academy", "aff_unique_url": "https://www.alibaba-group.com", "aff_unique_abbr": "Alibaba DAMO", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "title": "SelectIT: Selective Instruction Tuning for LLMs via Uncertainty-Aware Self-Reflection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95238", "id": "QNieOPt4fg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=QNieOPt4fg", "openreview": "https://openreview.net/forum?id=QNieOPt4fg", "poster": "/media/PosterPDFs/NeurIPS%202024/95238.png?t=1731338496.3662984", "project": "", "author_site": "Liangxin Liu, Xuebo Liu, Derek Wong, Dongfang Li, Ziyi Wang, Baotian Hu, Min Zhang", "tldr": "", "abstract": "Instruction tuning (IT) is crucial to tailoring large language models (LLMs) towards human-centric interactions. Recent advancements have shown that the careful selection of a small, high-quality subset of IT data can significantly enhance the performance of LLMs. Despite this, common approaches often rely on additional models or data, which increases costs and limits widespread adoption. In this work, we propose a novel approach, termed $\\textit{SelectIT}$, that capitalizes on the foundational capabilities of the LLM itself. Specifically, we exploit the intrinsic uncertainty present in LLMs to more effectively select high-quality IT data, without the need for extra resources. Furthermore, we introduce a curated IT dataset, the $\\textit{Selective Alpaca}$, created by applying SelectIT to the Alpaca-GPT4 dataset. Empirical results demonstrate that IT using Selective Alpaca leads to substantial model ability enhancement. The robustness of SelectIT has also been corroborated in various foundation models and domain-specific tasks. Our findings suggest that longer and more computationally intensive IT data may serve as superior sources of IT, offering valuable insights for future research in this area. Data, code, and scripts are freely available at https://github.com/Blue-Raincoat/SelectIT.", "keywords": "Data Selection;Large Language Models", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/a3f9ac6304a68d5bc118974b342ab5a9a17746bc.zip", "author": "Liangxin Liu;Xuebo Liu;Derek F. Wong;Dongfang Li;Ziyi Wang;Baotian Hu;Min Zhang", "authorids": "~Liangxin_Liu1;~Xuebo_Liu1;~Derek_F._Wong1;~Dongfang_Li2;~Ziyi_Wang14;~Baotian_Hu1;~Min_Zhang9", "gender": "M;M;M;M;M;M;M", "homepage": ";https://sunbowliu.github.io/;https://www.fst.um.edu.mo/personal/derek-wong/;https://github.com/Blue-Raincoat;;https://zhangmin-nlp-ai.github.io/;http://crazyofapple.github.io", "dblp": "300/6281.html;166/0029-2;123/0533;;155/1902;83/5342-5;https://dblp.uni-trier.de/pid/98/6118.html", "google_scholar": ";XkDl9aoAAAAJ;KjQBe8oAAAAJ;;5NiJ1VoAAAAJ;https://scholar.google.com/citations?hl=zh-CN;_OOzj40AAAAJ", "orcid": ";;0000-0002-5307-7322;;0000-0001-7490-684X;;", "linkedin": ";xuebo-liu-47877b195/;derek-wong-6209445/;;;;", "or_profile": "~Liangxin_Liu1;~Xuebo_Liu1;~Derek_F._Wong1;~Ziyi_Wang14;~Baotian_Hu1;~Min_Zhang9;~dongfang_li1", "aff": "Harbin Institute of Technology;Harbin Institute of Technolgy, Shenzhen;University of Macau;Harbin Institute of Technology;Harbin Institute of Technology, Shenzhen;Harbin Institute of Technology, Shenzhen;Harbin Institute of Technology", "aff_domain": "hit.edu.cn;hit.edu.cn;um.edu.mo;stu.hit.edu.cn;hhit.edu.cn;hit.edu.cn;hit.edu.cn", "position": "MS student;Assistant Professor;Associate Professor;Undergrad student;Associate Professor;Full Professor;Postdoc", "bibtex": "@inproceedings{\nliu2024selectit,\ntitle={Select{IT}: Selective Instruction Tuning for {LLM}s via Uncertainty-Aware Self-Reflection},\nauthor={Liangxin Liu and Xuebo Liu and Derek F. Wong and Dongfang Li and Ziyi Wang and Baotian Hu and Min Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=QNieOPt4fg}\n}", "github": "", "reviewers": "4rCk;CKC2;KKrK;6ofY", "pdf_size": 7364852, "rating": "5;5;6;7", "confidence": "4;4;4;4", "soundness": "3;3;3;4", "novelty": "3;2;2;3", "presentation": "2;3;4;4", "wc_summary": "72;56;126;70", "wc_strengths": "45;29;122;185", "wc_weaknesses": "55;84;185;45", "wc_questions": "62;5;19;90", "wc_limitations": "1;1;6;11", "wc_review": "235;175;458;401", "wc_reply_reviewers": "61;41;0;14", "wc_reply_authors": "522;389;161;0", "reply_reviewers": "1;1;0;1", "reply_authors": "4;4;3;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 81.0, 26.70205984563738 ], "wc_strengths_avg": [ 95.25, 62.61938597591005 ], "wc_weaknesses_avg": [ 92.25, 55.431827500092396 ], "wc_questions_avg": [ 44.0, 33.860005906674026 ], "wc_limitations_avg": [ 4.75, 4.14578098794425 ], "wc_review_avg": [ 317.25, 116.0008081868398 ], "wc_reply_reviewers_avg": [ 29.0, 23.632604596192948 ], "wc_reply_authors_avg": [ 268.0, 201.5130268741949 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.0, 1.224744871391589 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=95324172469074813&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 0, "email": "hit.edu.cn;hit.edu.cn;um.edu.mo;stu.hit.edu.cn;hhit.edu.cn;hit.edu.cn;hit.edu.cn", "author_num": 7, "aff_unique_index": "0;0;1;0;0;0;0", "aff_unique_norm": "Harbin Institute of Technology;University of Macau", "aff_unique_dep": ";", "aff_unique_url": "http://www.hit.edu.cn/;https://www.um.edu.mo", "aff_unique_abbr": "HIT;UM", "aff_campus_unique_index": "0;1;2;0;1;1;0", "aff_campus_unique": "Harbin;Shenzhen;Macau SAR", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Learning Commonality, Divergence and Variety for Unsupervised Visible-Infrared Person Re-identification", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95237", "id": "QQSGwpmDfU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=QQSGwpmDfU", "openreview": "https://openreview.net/forum?id=QQSGwpmDfU", "poster": "/media/PosterPDFs/NeurIPS%202024/95237.png?t=1733319316.7103567", "project": "", "author_site": "Jiangming Shi, Xiangbo Yin, Yachao Zhang, zhizhong zhang, Yuan Xie, Yanyun Qu", "tldr": "", "abstract": "Unsupervised visible-infrared person re-identification (USVI-ReID) aims to match specified persons in infrared images to visible images without annotations, and vice versa. USVI-ReID is a challenging yet underexplored task. Most existing methods address the USVI-ReID through cluster-based contrastive learning, which simply employs the cluster center to represent an individual. However, the cluster center primarily focuses on commonality, overlooking divergence and variety. To address the problem, we propose a Progressive Contrastive Learning with Hard and Dynamic Prototypes for USVI-ReID. In brief, we generate the hard prototype by selecting the sample with the maximum distance from the cluster center. We reveal that the inclusion of the hard prototype in contrastive loss helps to emphasize divergence. Additionally, instead of rigidly aligning query images to a specific prototype, we generate the dynamic prototype by randomly picking samples within a cluster. The dynamic prototype is used to encourage variety. Finally, we introduce a progressive learning strategy to gradually shift the model's attention towards divergence and variety, avoiding cluster deterioration. Extensive experiments conducted on the publicly available SYSU-MM01 and RegDB datasets validate the effectiveness of the proposed method.", "keywords": "VI-ReID;Multi-Prototype;Progressive Contrastive Learning", "primary_area": "machine_vision", "supplementary_material": "", "author": "Jiangming Shi;Xiangbo Yin;Yachao Zhang;zhizhong zhang;Yuan Xie;Yanyun Qu", "authorids": "~Jiangming_Shi1;~Xiangbo_Yin1;~Yachao_Zhang1;~zhizhong_zhang1;~Yuan_Xie5;~Yanyun_Qu1", "gender": "M;M;M;M;;F", "homepage": "https://shijiangming1.github.io/;;https://yachao-zhang.github.io/;;;http://quyanyun.xmu.edu.cn", "dblp": "304/1527;;40/10584-1;20/1541;;03/3500", "google_scholar": "Go9q2jsAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.de/citations?user=a-I8c8EAAAAJ;CXZciFAAAAAJ;;", "orcid": ";0000-0002-8599-909X;0000-0002-6153-5004;;;", "linkedin": "jiangming-shi-63160a103/;;;;;", "or_profile": "~Jiangming_Shi1;~Xiangbo_Yin1;~Yachao_Zhang1;~zhizhong_zhang1;~Yuan_Xie5;~Yanyun_Qu1", "aff": "Xiamen University;Xiamen University;Tsinghua University;East China Normal University;;Xiamen University", "aff_domain": "xmu.edu.cn;xmu.edu.cn;tsinghua.edu.cn;ecnu.edu.cn;;xmu.edu.cn", "position": "PhD student;MS student;Postdoc;Associate Professor;;Full Professor", "bibtex": "@inproceedings{\nshi2024learning,\ntitle={Learning Commonality, Divergence and Variety for Unsupervised Visible-Infrared Person Re-identification},\nauthor={Jiangming Shi and Xiangbo Yin and Yachao Zhang and zhizhong zhang and Yuan Xie and Yanyun Qu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=QQSGwpmDfU}\n}", "github": "", "reviewers": "mgXA;edeS;n3oM;i5Cg", "pdf_size": 1126536, "rating": "4;5;7;7", "confidence": "4;4;5;4", "soundness": "3;3;4;3", "novelty": "3;3;3;3", "presentation": "2;2;3;4", "wc_summary": "79;215;60;95", "wc_strengths": "125;42;73;92", "wc_weaknesses": "197;384;72;102", "wc_questions": "128;103;23;20", "wc_limitations": "41;133;23;25", "wc_review": "570;877;251;334", "wc_reply_reviewers": "112;151;117;88", "wc_reply_authors": "374;691;178;74", "reply_reviewers": "1;1;2;1", "reply_authors": "3;3;2;3", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 112.25, 60.602702084973075 ], "wc_strengths_avg": [ 83.0, 30.108138434649195 ], "wc_weaknesses_avg": [ 188.75, 121.80594197328799 ], "wc_questions_avg": [ 68.5, 47.83565615730592 ], "wc_limitations_avg": [ 55.5, 45.2852072977479 ], "wc_review_avg": [ 508.0, 243.06892026748298 ], "wc_reply_reviewers_avg": [ 117.0, 22.483327155917117 ], "wc_reply_authors_avg": [ 329.25, 234.99720743021606 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5555555555555555, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11178299958390789915&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "xmu.edu.cn;xmu.edu.cn;tsinghua.edu.cn;ecnu.edu.cn;;xmu.edu.cn", "author_num": 6, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Xiamen University;Tsinghua University;East China Normal University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.xmu.edu.cn;https://www.tsinghua.edu.cn;http://www.ecnu.edu.cn", "aff_unique_abbr": "XMU;THU;ECNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "DN-4DGS: Denoised Deformable Network with Temporal-Spatial Aggregation for Dynamic Scene Rendering", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95236", "id": "QQSyNX5s83", "proceeding": "", "pdf": "https://openreview.net/pdf?id=QQSyNX5s83", "openreview": "https://openreview.net/forum?id=QQSyNX5s83", "poster": "", "project": "", "author_site": "Jiahao Lu, Jiacheng Deng, Ruijie Zhu, Yanzhe Liang, Wenfei Yang, Xu Zhou, Tianzhu Zhang", "tldr": "", "abstract": "Dynamic scenes rendering is an intriguing yet challenging problem. Although current methods based on NeRF have achieved satisfactory performance, they still can not reach real-time levels. Recently, 3D Gaussian Splatting (3DGS) has garnered researchers' attention due to their outstanding rendering quality and real-time speed. Therefore, a new paradigm has been proposed: defining a canonical 3D gaussians and deforming it to individual frames in deformable fields. However, since the coordinates of canonical 3D gaussians are filled with noise, which can transfer noise into the deformable fields, and there is currently no method that adequately considers the aggregation of 4D information. Therefore, we propose Denoised Deformable Network with Temporal-Spatial Aggregation for Dynamic Scene Rendering (DN-4DGS). Specifically, a Noise Suppression Strategy is introduced to change the distribution of the coordinates of the canonical 3D gaussians and suppress noise. Additionally, a Decoupled Temporal-Spatial Aggregation Module is designed to aggregate information from adjacent points and frames. Extensive experiments on various real-world datasets demonstrate that our method achieves state-of-the-art rendering quality under a real-time level. Code is available at https://github.com/peoplelu/DN-4DGS.", "keywords": "Dynamic Scene Rendering;3D Gaussian Splatting;Point Cloud", "primary_area": "machine_vision", "supplementary_material": "", "author": "Jiahao Lu;Jiacheng Deng;Ruijie Zhu;Yanzhe Liang;Wenfei Yang;Xu Zhou;Tianzhu Zhang", "authorids": "~Jiahao_Lu5;~Jiacheng_Deng2;~Ruijie_Zhu2;~Yanzhe_Liang1;~Wenfei_Yang2;~Xu_Zhou7;~Tianzhu_Zhang1", "gender": "M;M;M;M;M;M;M", "homepage": "https://github.com/peoplelu;https://github.com/JiachengDeng;https://ruijiezhu94.github.io/ruijiezhu/;https://github.com/Rosetta-Leong;;;https://scholar.google.com/citations?user=9sCGe-gAAAAJ&hl=zh-CN", "dblp": ";320/4938;194/6909-2;;;;", "google_scholar": "cRpteW4AAAAJ;https://scholar.google.cz/citations?hl=zh-CN;6uuAEdkAAAAJ;;rtO5VmQAAAAJ;https://scholar.google.com/citations?hl=en;9sCGe-gAAAAJ", "orcid": ";0000-0003-2838-0378;0000-0001-6092-0712;;;;", "linkedin": ";https://linkedin.com/in/jiacheng-deng-0843a3280;;yanzhe-liang-1a703020b/;;;", "or_profile": "~Jiahao_Lu5;~Jiacheng_Deng2;~Ruijie_Zhu2;~Yanzhe_Liang1;~Wenfei_Yang2;~Xu_Zhou7;~Tianzhu_Zhang1", "aff": "University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;Sangfor Technologies Inc.;University of Science and Technology of China", "aff_domain": "ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;mail.ustc.edu.cn;ustc.edu.cn;sangfor.com.cn;ustc.edu.cn", "position": "MS student;PhD student;MS student;MS student;Postdoc;Researcher;Full Professor", "bibtex": "@inproceedings{\nlu2024dndgs,\ntitle={{DN}-4{DGS}: Denoised Deformable Network with Temporal-Spatial Aggregation for Dynamic Scene Rendering},\nauthor={Jiahao Lu and Jiacheng Deng and Ruijie Zhu and Yanzhe Liang and Wenfei Yang and Xu Zhou and Tianzhu Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=QQSyNX5s83}\n}", "github": "", "reviewers": "umjH;3vN1;1vtH;ELfM", "pdf_size": 40812035, "rating": "5;5;5;6", "confidence": "5;5;4;4", "soundness": "2;3;3;4", "novelty": "2;3;2;4", "presentation": "3;3;2;2", "wc_summary": "108;31;112;60", "wc_strengths": "73;15;48;58", "wc_weaknesses": "497;58;129;142", "wc_questions": "22;155;55;28", "wc_limitations": "5;6;6;13", "wc_review": "705;265;350;301", "wc_reply_reviewers": "358;73;127;0", "wc_reply_authors": "434;251;400;0", "reply_reviewers": "2;1;2;0", "reply_authors": "3;2;3;1", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 77.75, 33.87015648029988 ], "wc_strengths_avg": [ 48.5, 21.289668856043768 ], "wc_weaknesses_avg": [ 206.5, 170.7402998708858 ], "wc_questions_avg": [ 65.0, 53.427520998077384 ], "wc_limitations_avg": [ 7.5, 3.2015621187164243 ], "wc_review_avg": [ 405.25, 175.67067911293563 ], "wc_reply_reviewers_avg": [ 139.5, 133.95988205429265 ], "wc_reply_authors_avg": [ 271.25, 171.06340198885326 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=704979270867636991&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 4, "email": "ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;mail.ustc.edu.cn;ustc.edu.cn;sangfor.com.cn;ustc.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;0;0;1;0", "aff_unique_norm": "University of Science and Technology of China;Sangfor Technologies", "aff_unique_dep": ";", "aff_unique_url": "http://www.ustc.edu.cn;https://www.sangfor.com.cn", "aff_unique_abbr": "USTC;Sangfor", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "STaRK: Benchmarking LLM Retrieval on Textual and Relational Knowledge Bases", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97698", "id": "QSS5cGmKb1", "proceeding": "", "pdf": "https://openreview.net/pdf?id=QSS5cGmKb1", "openreview": "https://openreview.net/forum?id=QSS5cGmKb1", "poster": "", "project": "", "author_site": "Shirley Wu, Shiyu Zhao, Michihiro Yasunaga, Kexin Huang, Kaidi Cao, Qian Huang, Vassilis Ioannidis, Karthik Subbian, James Zou, Jure Leskovec", "tldr": "", "abstract": "Answering real-world complex queries, such as complex product search, often requires accurate retrieval from semi-structured knowledge bases that involve blend of unstructured (e.g., textual descriptions of products) and structured (e.g., entity relations of products) information. However, many previous works studied textual and relational retrieval tasks as separate topics. To address the gap, we develop STARK, a large-scale Semi-structure retrieval benchmark on Textual and Relational Knowledge Bases. Our benchmark covers three domains: product search, academic paper search, and queries in precision medicine. We design a novel pipeline to synthesize realistic user queries that integrate diverse relational information and complex textual properties, together with their ground-truth answers (items). We conduct rigorous human evaluation to validate the quality of our synthesized queries. We further enhance the benchmark with high-quality human-generated queries to provide an authentic reference. STARK serves as a comprehensive testbed for evaluating the performance of retrieval systems driven by large language models (LLMs). Our experiments suggest that STARK presents significant challenges to the current retrieval and LLM systems, highlighting the need for more capable semi-structured retrieval systems.", "keywords": "Semi-structured Knowledge Base;Large Language Models;Retrieval", "primary_area": "", "supplementary_material": "/attachment/5ddb6f4c9cfa673b4a48df9055c1d539c358a32e.pdf", "author": "Shirley Wu;Shiyu Zhao;Michihiro Yasunaga;Kexin Huang;Kaidi Cao;Qian Huang;Vassilis N. Ioannidis;Karthik Subbian;James Zou;Jure Leskovec", "authorids": "~Shirley_Wu1;~Shiyu_Zhao5;~Michihiro_Yasunaga1;~Kexin_Huang1;~Kaidi_Cao1;~Qian_Huang2;~Vassilis_N._Ioannidis1;~Karthik_Subbian1;~James_Zou1;~Jure_Leskovec1", "gender": "M;;M;M;F;;M;;;F", "homepage": "https://shiyu-zhao.netlify.app/;;https://www.kexinhuang.com/;https://ai.stanford.edu/~kaidicao/;https://q-hwang.github.io/;https://scholar.google.com/citations?hl=en&user=mjmiI4sAAAAJ&view_op=list_works&authuser=1;http://mailtosuka.googlepages.com;;http://cs.stanford.edu/~jure/;https://cs.stanford.edu/~shirwu", "dblp": ";202/1809;;203/8207;07/4378.html;;32/5843;;l/JureLeskovec;79/4173-2", "google_scholar": ";SieJYoEAAAAJ;ogEXTOgAAAAJ;https://scholar.google.com.hk/citations?user=4Zw1PJ8AAAAJ;L3hkmG0AAAAJ;;;23ZXZvEAAAAJ;Q_kKkIUAAAAJ;r2cVEucAAAAJ", "orcid": ";;;;;0000-0002-8367-0733;;;0000-0002-5411-923X;", "linkedin": "shiyu-zhao-1124a0266/;;;;qian-huang-b20315149/;;;;leskovec/;", "or_profile": "~Shiyu_Zhao5;~Michihiro_Yasunaga1;~Kexin_Huang1;~Kaidi_Cao1;~Qian_Huang2;~Vassilis_N._Ioannidis1;~Karthik_Subbian1;~James_Zou1;~Jure_Leskovec1;~Yingxin_Wu1", "aff": "Stanford University;Stanford University;Stanford University;Stanford University;Stanford University;Amazon Web Services;Amazon;Stanford University;Kumo.AI;Computer Science Department, Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu;amazon.com;amazon.com;stanford.edu;kumo.ai;cs.stanford.edu", "position": "MS student;PhD student;PhD student;PhD student;PhD student;Applied Scientist II;Researcher;Assistant Professor;Chief Scientist;PhD student", "bibtex": "@inproceedings{\nwu2024stark,\ntitle={{ST}a{RK}: Benchmarking {LLM} Retrieval on Textual and Relational Knowledge Bases},\nauthor={Shirley Wu and Shiyu Zhao and Michihiro Yasunaga and Kexin Huang and Kaidi Cao and Qian Huang and Vassilis N. Ioannidis and Karthik Subbian and James Zou and Jure Leskovec},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=QSS5cGmKb1}\n}", "github": "", "reviewers": "CdN8;XAxN;bote", "pdf_size": 1012442, "rating": "5;6;9", "confidence": "4;3;2", "wc_summary_and_contributions": "65;54;45", "wc_strengths": "32;43;37", "wc_improvement": "88;123;23", "wc_limitations": "14;9;27", "wc_correctness": "71;1;58", "wc_clarity": "133;1;5", "wc_relation_to_prior_work": "18;1;13", "wc_documentation": "113;6;9", "wc_additional_feedback": "1;1;1", "wc_review": "535;239;218", "wc_reply_reviewers": "0;20;0", "wc_reply_authors": "101;12;0", "reply_reviewers": "0;1;0", "reply_authors": "3;2;1", "rating_avg": [ 6.666666666666667, 1.699673171197595 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "wc_summary_and_contributions_avg": [ 54.666666666666664, 8.178562764256865 ], "wc_strengths_avg": [ 37.333333333333336, 4.4969125210773475 ], "wc_improvement_avg": [ 78.0, 41.432676315520176 ], "wc_limitations_avg": [ 16.666666666666668, 7.586537784494029 ], "wc_correctness_avg": [ 43.333333333333336, 30.40102337458761 ], "wc_clarity_avg": [ 46.333333333333336, 61.30434097372079 ], "wc_relation_to_prior_work_avg": [ 10.666666666666666, 7.1336448530109 ], "wc_documentation_avg": [ 42.666666666666664, 49.74825513411389 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 330.6666666666667, 144.73961294530105 ], "wc_reply_reviewers_avg": [ 6.666666666666667, 9.428090415820632 ], "wc_reply_authors_avg": [ 37.666666666666664, 45.05058884804454 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.9607689228305228, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1501475398752845640&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 4, "email": "stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu;amazon.com;amazon.com;stanford.edu;kumo.ai;cs.stanford.edu", "author_num": 10, "aff_unique_index": "0;0;0;0;0;1;1;0;2;0", "aff_unique_norm": "Stanford University;Amazon;Kumo.AI", "aff_unique_dep": ";Amazon Web Services;", "aff_unique_url": "https://www.stanford.edu;https://aws.amazon.com;https://www.kumo.ai", "aff_unique_abbr": "Stanford;AWS;Kumo.AI", "aff_campus_unique_index": "0;0;0;0;0;0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Bias in Motion: Theoretical Insights into the Dynamics of Bias in SGD Training", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95235", "id": "QUYLbzwtTV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=QUYLbzwtTV", "openreview": "https://openreview.net/forum?id=QUYLbzwtTV", "poster": "", "project": "", "author_site": "Anchit Jain, Rozhin Nobahari, Aristide Baratin, Stefano Sarao Mannelli", "tldr": "", "abstract": "Machine learning systems often acquire biases by leveraging undesired features in the data, impacting accuracy variably across different sub-populations of the data. However, our current understanding of bias formation mostly focuses on the initial and final stages of learning, leaving a gap in knowledge regarding the transient dynamics. To address this gap, this paper explores the evolution of bias in a teacher-student setup that models different data sub-populations with a Gaussian-mixture model. We provide an analytical description of the stochastic gradient descent dynamics of a linear classifier in this setup, which we prove to be exact in high dimension.\nNotably, our analysis identifies different properties of the sub-populations that drive bias at different timescales and hence shows a shifting preference of our classifier during training. By applying our general solution to fairness and robustness, we delineate how and when heterogeneous data and spurious features can generate and amplify bias. We empirically validate our results in more complex scenarios by training deeper networks on synthetic and real data, i.e. using CIFAR10, MNIST, and CelebA datasets.", "keywords": "learning dynamics;online learning;stochastic gradient descent;analytical model;fairness;spurious correlation", "primary_area": "optimization", "supplementary_material": "/attachment/1fed0e674749079daabe621aedd944e07a5dcf00.zip", "author": "Anchit Jain;Rozhin Nobahari;Aristide Baratin;Stefano Sarao Mannelli", "authorids": "~Anchit_Jain1;~Rozhin_Nobahari1;~Aristide_Baratin1;~Stefano_Sarao_Mannelli1", "gender": ";;;M", "homepage": ";;;https://stefsmlab.github.io/", "dblp": ";;;232/3343", "google_scholar": ";https://scholar.google.ca/citations?user=5fLSIlcAAAAJ;;https://scholar.google.it/citations?user=Kq272_MAAAAJ", "orcid": ";;;", "linkedin": "anchitjain625/;;;", "or_profile": "~Anchit_Jain1;~Rozhin_Nobahari1;~Aristide_Baratin1;~Stefano_Sarao_Mannelli1", "aff": "University of Cambridge;Universit\u00e9 de Montr\u00e9al;;University College London", "aff_domain": "cam.ac.uk;umontreal.ca;;ucl.ac.uk", "position": "Undergrad student;MS student;;Postdoc", "bibtex": "@inproceedings{\njain2024bias,\ntitle={Bias in Motion: Theoretical Insights into the Dynamics of Bias in {SGD} Training},\nauthor={Anchit Jain and Rozhin Nobahari and Aristide Baratin and Stefano Sarao Mannelli},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=QUYLbzwtTV}\n}", "github": "", "reviewers": "JTC4;43yt;CSav;gHK2;a8nr", "pdf_size": 1220172, "rating": "5;6;6;6;7", "confidence": "4;3;2;3;2", "soundness": "3;3;3;4;3", "novelty": "3;2;3;3;3", "presentation": "3;3;3;4;3", "wc_summary": "102;227;57;74;103", "wc_strengths": "61;62;29;140;63", "wc_weaknesses": "74;43;159;105;9", "wc_questions": "21;65;47;27;17", "wc_limitations": "7;1;2;4;1", "wc_review": "265;398;294;350;193", "wc_reply_reviewers": "42;0;0;89;15", "wc_reply_authors": "0;33;33;0;16", "reply_reviewers": "1;0;0;1;1", "reply_authors": "1;2;2;1;2", "rating_avg": [ 6.0, 0.6324555320336759 ], "confidence_avg": [ 2.8, 0.7483314773547882 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 112.6, 59.788293168479065 ], "wc_strengths_avg": [ 71.0, 36.796738985948195 ], "wc_weaknesses_avg": [ 78.0, 51.55967416499061 ], "wc_questions_avg": [ 35.4, 18.03995565404749 ], "wc_limitations_avg": [ 3.0, 2.280350850198276 ], "wc_review_avg": [ 300.0, 70.44714330616962 ], "wc_reply_reviewers_avg": [ 29.2, 33.60595185380114 ], "wc_reply_authors_avg": [ 16.4, 14.759403781996074 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8451542547285165, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13123464677423405126&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "cam.ac.uk;umontreal.ca;;ucl.ac.uk", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Cambridge;Universit\u00e9 de Montr\u00e9al;University College London", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cam.ac.uk;https://www.umontreal.ca;https://www.ucl.ac.uk", "aff_unique_abbr": "Cambridge;UdeM;UCL", "aff_campus_unique_index": "0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United Kingdom;Canada" }, { "title": "Accuracy is Not All You Need", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95234", "id": "QVG7j29Sta", "proceeding": "", "pdf": "https://openreview.net/pdf?id=QVG7j29Sta", "openreview": "https://openreview.net/forum?id=QVG7j29Sta", "poster": "/media/PosterPDFs/NeurIPS%202024/95234.png?t=1733580711.220655", "project": "", "author_site": "Abhinav Dutta, Sanjeev Krishnan, Nipun Kwatra, Ramachandran Ramjee", "tldr": "", "abstract": "When Large Language Models (LLMs) are compressed using techniques such as quantization, the predominant way to demonstrate the validity of such techniques is by measuring the model's accuracy on various benchmarks. If the accuracies of the baseline model and the compressed model are close, it is assumed that there was negligible degradation in quality. However, even when the accuracy of baseline and compressed model are similar, we observe the phenomenon of flips, wherein answers change from correct to incorrect and vice versa in proportion. We conduct a detailed study of metrics across multiple compression techniques, models and datasets, demonstrating that the behavior of compressed models as visible to end-users is often significantly different from the baseline model, even when accuracy is similar. We further evaluate compressed models qualitatively and quantitatively using MT-Bench and show that compressed models exhibiting high flips are worse than baseline models in this free-form generative task. Thus, we argue that accuracy and perplexity are necessary but not sufficient for evaluating compressed models, since these metrics hide large underlying changes that have not been observed by previous work. Hence, compression techniques should also be evaluated using distance metrics. We propose two such distance metrics, KL-Divergence and flips, and show that they are well correlated.", "keywords": "LLM Evaluation;Evaluation of Compressed LLMs;Quantization;Pruning;Sparsification", "primary_area": "evaluation", "supplementary_material": "/attachment/c1a90c2d1f7820ca8f4a7000b631d249bc78ddb3.zip", "author": "Abhinav Dutta;Sanjeev Krishnan;Nipun Kwatra;Ramachandran Ramjee", "authorids": "~Abhinav_Dutta1;~Sanjeev_Krishnan1;~Nipun_Kwatra1;~Ramachandran_Ramjee1", "gender": "M;;M;", "homepage": ";;https://www.microsoft.com/en-us/research/people/nkwatra/;https://www.microsoft.com/en-us/research/people/ramjee/", "dblp": "346/8246;45/6260.html;;97/117.html", "google_scholar": "https://scholar.google.com.my/citations?user=YAYI4NUAAAAJ;;;https://scholar.google.co.in/citations?user=0P35aLUAAAAJ", "orcid": "0000-0001-6519-9581;;;0000-0003-0007-6040", "linkedin": ";;;", "or_profile": "~Abhinav_Dutta1;~Sanjeev_Krishnan1;~Nipun_Kwatra1;~Ramachandran_Ramjee1", "aff": "Microsoft;Microsoft Research;Microsoft;Microsoft", "aff_domain": "microsoft.com;research.microsoft.com;microsoft.com;microsoft.com", "position": "Research Fellow;Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\ndutta2024accuracy,\ntitle={Accuracy is Not All You Need},\nauthor={Abhinav Dutta and Sanjeev Krishnan and Nipun Kwatra and Ramachandran Ramjee},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=QVG7j29Sta}\n}", "github": "", "reviewers": "mPMT;jN9C;n3Hv;Sq51", "pdf_size": 1308907, "rating": "3;5;6;7", "confidence": "4;4;5;4", "soundness": "2;3;3;3", "novelty": "2;2;2;3", "presentation": "2;3;3;4", "wc_summary": "76;57;114;130", "wc_strengths": "71;49;50;105", "wc_weaknesses": "166;41;156;88", "wc_questions": "116;21;93;4", "wc_limitations": "8;4;27;4", "wc_review": "437;172;440;331", "wc_reply_reviewers": "0;77;0;40", "wc_reply_authors": "0;239;0;2256", "reply_reviewers": "0;1;0;1", "reply_authors": "1;2;1;4", "rating_avg": [ 5.25, 1.479019945774904 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 94.25, 29.106485531578695 ], "wc_strengths_avg": [ 68.75, 22.69774217846348 ], "wc_weaknesses_avg": [ 112.75, 51.153567813007925 ], "wc_questions_avg": [ 58.5, 47.09830145557268 ], "wc_limitations_avg": [ 10.75, 9.522998477370455 ], "wc_review_avg": [ 345.0, 109.10316219065331 ], "wc_reply_reviewers_avg": [ 29.25, 32.041964671349355 ], "wc_reply_authors_avg": [ 623.75, 947.4176415393583 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.29277002188455997, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9160631556301598392&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 3, "email": "microsoft.com;research.microsoft.com;microsoft.com;microsoft.com", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Microsoft", "aff_unique_dep": "Microsoft Corporation", "aff_unique_url": "https://www.microsoft.com", "aff_unique_abbr": "Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Tetrahedron Splatting for 3D Generation", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95233", "id": "QVSP1uk7b5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=QVSP1uk7b5", "openreview": "https://openreview.net/forum?id=QVSP1uk7b5", "poster": "/media/PosterPDFs/NeurIPS%202024/95233.png?t=1733324423.5630507", "project": "", "author_site": "Chun Gu, Zeyu Yang, Zijie Pan, Xiatian Zhu, Li Zhang", "tldr": "", "abstract": "3D representation is essential to the significant advance of 3D generation with 2D diffusion priors. As a flexible representation, NeRF has been first adopted for 3D representation. With density-based volumetric rendering, it however suffers both intensive computational overhead and inaccurate mesh extraction. Using a signed distance field and Marching Tetrahedra, DMTet allows for precise mesh extraction and real-time rendering but is limited in handling large topological changes in meshes, leading to optimization challenges. Alternatively, 3D Gaussian Splatting (3DGS) is favored in both training and rendering efficiency while falling short in mesh extraction. In this work, we introduce a novel 3D representation, Tetrahedron Splatting (TeT-Splatting), that supports easy convergence during optimization, precise mesh extraction, and real-time rendering simultaneously. This is achieved by integrating surface-based volumetric rendering within a structured tetrahedral grid while preserving the desired ability of precise mesh extraction, and a tile-based differentiable tetrahedron rasterizer. Furthermore, we incorporate eikonal and normal consistency regularization terms for the signed distance field to improve generation quality and stability. Critically, our representation can be trained without mesh extraction, making the optimization process easier to converge. Our TeT-Splatting can be readily integrated in existing 3D generation pipelines, along with polygonal mesh for texture optimization. Extensive experiments show that our TeT-Splatting strikes a superior tradeoff among convergence speed, render efficiency, and mesh quality as compared to previous alternatives under varying 3D generation settings.", "keywords": "text-to-3D;tetrahedron splatting;volumetric rendering", "primary_area": "generative_models", "supplementary_material": "", "author": "Chun Gu;Zeyu Yang;Zijie Pan;Xiatian Zhu;Li Zhang", "authorids": "~Chun_Gu1;~Zeyu_Yang3;~Zijie_Pan2;~Xiatian_Zhu3;~Li_Zhang5", "gender": "M;M;M;;M", "homepage": "https://sulvxiangxin.github.io/;https://github.com/Alexander0Yang;https://github.com/mdarhdarz;https://x-up-lab.github.io;http://www.robots.ox.ac.uk/~lz/", "dblp": "58/4570;;290/3418;128/7935;89/5992-40", "google_scholar": "cWXTnT0AAAAJ;;;ZbA-z1cAAAAJ;-wOTCE8AAAAJ", "orcid": ";;;0000-0002-9284-2955;", "linkedin": ";;;;", "or_profile": "~Chun_Gu1;~Zeyu_Yang3;~Zijie_Pan2;~Xiatian_Zhu3;~Li_Zhang5", "aff": "Fudan University;Fudan University;Fudan University;University of Surrey;Fudan University", "aff_domain": "fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;surrey.ac.uk;fudan.edu.cn", "position": "Undergrad student;PhD student;PhD student;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\ngu2024tetrahedron,\ntitle={Tetrahedron Splatting for 3D Generation},\nauthor={Chun Gu and Zeyu Yang and Zijie Pan and Xiatian Zhu and Li Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=QVSP1uk7b5}\n}", "github": "", "reviewers": "Tjej;dtJc;7DJZ;39Vr", "pdf_size": 0, "rating": "6;6;7;7", "confidence": "4;4;5;3", "soundness": "3;3;4;3", "novelty": "2;3;3;3", "presentation": "2;4;3;3", "wc_summary": "50;35;43;214", "wc_strengths": "65;49;28;174", "wc_weaknesses": "124;108;35;175", "wc_questions": "62;4;26;24", "wc_limitations": "14;15;11;1", "wc_review": "315;211;143;588", "wc_reply_reviewers": "29;14;0;116", "wc_reply_authors": "20;0;0;24", "reply_reviewers": "1;1;0;1", "reply_authors": "2;1;1;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 85.5, 74.37909652583849 ], "wc_strengths_avg": [ 79.0, 56.39592183837409 ], "wc_weaknesses_avg": [ 110.5, 50.12235030403104 ], "wc_questions_avg": [ 29.0, 20.904544960366874 ], "wc_limitations_avg": [ 10.25, 5.539629951540085 ], "wc_review_avg": [ 314.25, 169.50424035993908 ], "wc_reply_reviewers_avg": [ 39.75, 45.20163160771965 ], "wc_reply_authors_avg": [ 11.0, 11.090536506409418 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13630677086322183036&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;surrey.ac.uk;fudan.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Fudan University;University of Surrey", "aff_unique_dep": ";", "aff_unique_url": "https://www.fudan.edu.cn;https://www.surrey.ac.uk", "aff_unique_abbr": "Fudan;Surrey", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "China;United Kingdom" }, { "title": "Rule Based Rewards for Language Model Safety", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95232", "id": "QVtwpT5Dmg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=QVtwpT5Dmg", "openreview": "https://openreview.net/forum?id=QVtwpT5Dmg", "poster": "", "project": "", "author_site": "Tong Mu, Alec Helyar, Johannes Heidecke, Joshua Achiam, Andrea Vallone, Ian Kivlichan, Molly Lin, Alex Beutel, John Schulman, Lilian Weng", "tldr": "", "abstract": "Reinforcement learning based fine-tuning of large language models (LLMs) on human preferences has been shown to enhance both their capabilities and safety behavior.\n However, in cases related to safety, without precise instructions to human annotators, the data collected may cause the model to become overly cautious, or to respond in an undesirable style, such as being judgmental.\n Additionally, as model capabilities and usage patterns evolve, there may be a costly need to add or relabel data to modify safety behavior. \n We propose a novel preference modeling approach that utilizes AI feedback and only requires a small amount of human data. \n Our method, Rule Based Rewards (RBR), uses a collection of rules for desired or undesired behaviors (e.g. refusals should not be judgmental) along with a LLM grader.\n In contrast to prior methods using AI feedback, our method uses fine-grained, composable, LLM-graded few-shot prompts as reward directly in RL training, resulting in greater control, accuracy and ease of updating.\n We show that RBRs are an effective training method, achieving an F1 score of 97.1, compared to a human-feedback baseline of 91.7, resulting in much higher safety-behavior accuracy through better balancing usefulness and safety.", "keywords": "Large Language Model;LLM;RLHF;RLAIF;Safety;RBR;refusal;alignment", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Tong Mu;Alec Helyar;Johannes Heidecke;Joshua Achiam;Andrea Vallone;Ian D Kivlichan;Molly Lin;Alex Beutel;John Schulman;Lilian Weng", "authorids": "~Tong_Mu1;~Alec_Helyar1;~Johannes_Heidecke1;~Joshua_Achiam1;~Andrea_Vallone1;~Ian_D_Kivlichan1;~Molly_Lin1;~Alex_Beutel1;~John_Schulman1;~Lilian_Weng1", "gender": "F;M;;M;F;;F;;;F", "homepage": ";;;;;;;;;https://lilianweng.github.io/lil-log/", "dblp": ";;;;;;;;;70/10948", "google_scholar": ";WW2SExwAAAAJ;https://scholar.google.com/citations?hl=en;;;FRBObOwAAAAJ;;;;https://scholar.google.com/citations?hl=en", "orcid": ";;;;;;;;;", "linkedin": "tong-mu-9b42b2a7/;alec-helyar/;;;andreavallone;;mollyslin/;;;", "or_profile": "~Tong_Mu1;~Alec_Helyar1;~Johannes_Heidecke1;~Joshua_Achiam1;~Andrea_Vallone1;~Ian_D_Kivlichan1;~Molly_Lin1;~Alex_Beutel1;~John_Schulman1;~Lilian_Weng1", "aff": "OpenAI;Microsoft;OpenAI;University of California, Berkeley;OpenAI;OpenAI;OpenAI;;OpenAI;OpenAI", "aff_domain": "openai.com;microsoft.com;openai.com;berkeley.edu;openai.com;openai.com;openai.com;;openai.com;openai.com", "position": "Researcher;Researcher;Researcher;PhD student;Undergrad student;Researcher;Member of Safety Systems;;Researcher;Research Scientist", "bibtex": "@inproceedings{\nmu2024rule,\ntitle={Rule Based Rewards for Language Model Safety},\nauthor={Tong Mu and Alec Helyar and Johannes Heidecke and Joshua Achiam and Andrea Vallone and Ian D Kivlichan and Molly Lin and Alex Beutel and John Schulman and Lilian Weng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=QVtwpT5Dmg}\n}", "github": "", "reviewers": "JMNb;QvaG;CTB5", "pdf_size": 0, "rating": "5;6;6", "confidence": "3;4;3", "soundness": "3;3;3", "novelty": "2;4;3", "presentation": "3;2;2", "wc_summary": "118;85;204", "wc_strengths": "48;60;130", "wc_weaknesses": "146;570;204", "wc_questions": "35;159;89", "wc_limitations": "1;16;1", "wc_review": "348;890;628", "wc_reply_reviewers": "0;51;64", "wc_reply_authors": "0;69;63", "reply_reviewers": "0;1;1", "reply_authors": "1;2;2", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 135.66666666666666, 50.16195991209098 ], "wc_strengths_avg": [ 79.33333333333333, 36.160137659521645 ], "wc_weaknesses_avg": [ 306.6666666666667, 187.70425911227719 ], "wc_questions_avg": [ 94.33333333333333, 50.76306618880393 ], "wc_limitations_avg": [ 6.0, 7.0710678118654755 ], "wc_review_avg": [ 622.0, 221.31124387763643 ], "wc_reply_reviewers_avg": [ 38.333333333333336, 27.620443314488796 ], "wc_reply_authors_avg": [ 44.0, 31.20897306865447 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13126060863964534565&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "openai.com;microsoft.com;openai.com;berkeley.edu;openai.com;openai.com;openai.com;;openai.com;openai.com", "author_num": 10, "aff_unique_index": "0;1;0;2;0;0;0;0;0", "aff_unique_norm": "OpenAI;Microsoft;University of California, Berkeley", "aff_unique_dep": ";Microsoft Corporation;", "aff_unique_url": "https://openai.com;https://www.microsoft.com;https://www.berkeley.edu", "aff_unique_abbr": "OpenAI;Microsoft;UC Berkeley", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Measuring Multimodal Mathematical Reasoning with MATH-Vision Dataset", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97697", "id": "QWTCcxMpPA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=QWTCcxMpPA", "openreview": "https://openreview.net/forum?id=QWTCcxMpPA", "poster": "", "project": "", "author_site": "Ke Wang, Junting Pan, Charlie Wang, Zimu Lu, Houxing Ren, Aojun Zhou, Mingjie Zhan, Hongsheng Li", "tldr": "", "abstract": "Recent advancements in Large Multimodal Models (LMMs) have shown promising results in mathematical reasoning within visual contexts, with models exceeding human-level performance on existing benchmarks such as MathVista. However, we observe significant limitations in the diversity of questions and breadth of subjects covered by these benchmarks. To address this issue, we present the MATH-Vision (MATH-V) dataset, a meticulously curated collection of 3,040 high-quality mathematical problems with visual contexts sourced from real math competitions. Spanning 16 distinct mathematical disciplines and graded across 5 levels of difficulty, our dataset provides a comprehensive and diverse set of challenges for evaluating the mathematical reasoning abilities of LMMs. Through extensive experimentation, we unveil a notable performance gap between current LMMs and human performance on \\datasetname, underscoring the imperative for further advancements in LMMs. Moreover, our detailed categorization allows for a thorough error analysis of LMMs, offering valuable insights to guide future research and development. The dataset is released at [MathLLMs/MathVision](https://huggingface.co/datasets/MathLLMs/MathVision)", "keywords": "mathematical reasoning;multimodality;large language model", "primary_area": "", "supplementary_material": "/attachment/2a44eb97edbdd2d0da2b7c1b49e30428df9f8e6d.pdf", "author": "Ke Wang;Junting Pan;Weikang Shi;Zimu Lu;Houxing Ren;Aojun Zhou;Mingjie Zhan;Hongsheng Li", "authorids": "~Ke_Wang18;~Junting_Pan2;~Weikang_Shi1;~Zimu_Lu1;~Houxing_Ren1;~Aojun_Zhou2;~Mingjie_Zhan1;~Hongsheng_Li3", "gender": "M;M;M;F;M;M;M;M", "homepage": "https://wangk.org/;https://junting.github.io/;https://shiwk20.github.io;;;;http://www.ee.cuhk.edu.hk/~hsli;", "dblp": "181/2613-36;166/1669;213/4016.html;;292/3943.html;277/1226;27/7402-1;195/6034", "google_scholar": "woY4bS8AAAAJ;8Xt3TnAAAAAJ;;ewuGUCwAAAAJ;reSJxKkAAAAJ;;BN2Ze-QAAAAJ;cC8lXi8AAAAJ", "orcid": ";;;;0000-0001-9750-1626;;;", "linkedin": ";junting-pan/;;;;;;", "or_profile": "~Ke_Wang18;~Junting_Pan2;~Weikang_Shi1;~Zimu_Lu1;~Houxing_Ren1;~Mingjie_Zhan1;~Hongsheng_Li3;~Aojun_Zhou3", "aff": "Nanjing University;The Chinese University of Hong Kong;Tsinghua University;University of Science and Technology of China;Sensetime;SenseTime Research;The Chinese University of Hong Kong;The Chinese University of Hong Kong", "aff_domain": "nju.edu.cn;cuhk.edu.hk;tsinghua.edu.cn;ustc.edu.cn;sensetime.com;sensetime.com;cuhk.edu.hk;cuhk.edu.hk", "position": "Undergrad student;Postdoc;Undergrad student;Undergrad student;Researcher;Researcher;Associate Professor;Researcher", "bibtex": "@inproceedings{\nwang2024measuring,\ntitle={Measuring Multimodal Mathematical Reasoning with {MATH}-Vision Dataset},\nauthor={Ke Wang and Junting Pan and Weikang Shi and Zimu Lu and Houxing Ren and Aojun Zhou and Mingjie Zhan and Hongsheng Li},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=QWTCcxMpPA}\n}", "github": "", "reviewers": "ZxmA;mPoH;QvcB;YHoM", "pdf_size": 22035091, "rating": "5;7;8;8", "confidence": "4;4;4;4", "wc_summary_and_contributions": "37;102;53;63", "wc_strengths": "14;83;98;40", "wc_improvement": "31;266;86;13", "wc_limitations": "20;470;10;16", "wc_correctness": "55;1;5;32", "wc_clarity": "54;5;9;8", "wc_relation_to_prior_work": "1;1;75;20", "wc_documentation": "1;7;7;43", "wc_additional_feedback": "1;1;1;1", "wc_review": "214;936;344;236", "wc_reply_reviewers": "0;0;28;0", "wc_reply_authors": "244;0;27;0", "reply_reviewers": "0;0;1;0", "reply_authors": "4;1;2;1", "rating_avg": [ 7.0, 1.224744871391589 ], "confidence_avg": [ 4.0, 0.0 ], "wc_summary_and_contributions_avg": [ 63.75, 23.951774464536026 ], "wc_strengths_avg": [ 58.75, 33.47667098144617 ], "wc_improvement_avg": [ 99.0, 100.09745251503657 ], "wc_limitations_avg": [ 129.0, 196.90860824250422 ], "wc_correctness_avg": [ 23.25, 21.867498713844707 ], "wc_clarity_avg": [ 19.0, 20.26079958935481 ], "wc_relation_to_prior_work_avg": [ 24.25, 30.309858132297485 ], "wc_documentation_avg": [ 14.5, 16.635804759614125 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 432.5, 294.8300357833306 ], "wc_reply_reviewers_avg": [ 7.0, 12.12435565298214 ], "wc_reply_authors_avg": [ 67.75, 102.35324860501497 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 113, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1693831576430202147&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "nju.edu.cn;cuhk.edu.hk;tsinghua.edu.cn;ustc.edu.cn;sensetime.com;sensetime.com;cuhk.edu.hk;cuhk.edu.hk", "author_num": 8, "aff_unique_index": "0;1;2;3;4;4;1;1", "aff_unique_norm": "Nanjing University;Chinese University of Hong Kong;Tsinghua University;University of Science and Technology of China;SenseTime", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.nju.edu.cn;https://www.cuhk.edu.hk;https://www.tsinghua.edu.cn;http://www.ustc.edu.cn;https://www.sensetime.com", "aff_unique_abbr": "Nanjing U;CUHK;THU;USTC;SenseTime", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Grounded Answers for Multi-agent Decision-making Problem through Generative World Model", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95231", "id": "QWsLks8LCO", "proceeding": "", "pdf": "https://openreview.net/pdf?id=QWsLks8LCO", "openreview": "https://openreview.net/forum?id=QWsLks8LCO", "poster": "/media/PosterPDFs/NeurIPS%202024/95231.png?t=1731596043.2659447", "project": "", "author_site": "Zeyang Liu, Xinrui Yang, Shiguang Sun, Long Qian, Lipeng Wan, Xingyu Chen, Xuguang Lan", "tldr": "", "abstract": "Recent progress in generative models has stimulated significant innovations in many fields, such as image generation and chatbots. Despite their success, these models often produce sketchy and misleading solutions for complex multi-agent decision-making problems because they miss the trial-and-error experience and reasoning as humans. To address this limitation, we explore a paradigm that integrates a language-guided simulator into the multi-agent reinforcement learning pipeline to enhance the generated answer. The simulator is a world model that separately learns dynamics and reward, where the dynamics model comprises an image tokenizer as well as a causal transformer to generate interaction transitions autoregressively, and the reward model is a bidirectional transformer learned by maximizing the likelihood of trajectories in the expert demonstrations under language guidance. Given an image of the current state and the task description, we use the world model to train the joint policy and produce the image sequence as the answer by running the converged policy on the dynamics model. The empirical results demonstrate that this framework can improve the answers for multi-agent decision-making problems by showing superior performance on the training and unseen tasks of the StarCraft Multi-Agent Challenge benchmark. In particular, it can generate consistent interaction sequences and explainable reward functions at interaction states, opening the path for training generative models of the future.", "keywords": "World Model;Multi-agent Reinforcement Learning;Model-based Reinforcement Learning", "primary_area": "generative_models", "supplementary_material": "/attachment/5f6d8cb8693780bac222d4ec724e323c01979528.zip", "author": "Zeyang Liu;Xinrui Yang;Shiguang Sun;Long Qian;Lipeng Wan;Xingyu Chen;Xuguang Lan", "authorids": "~Zeyang_Liu2;~Xinrui_Yang1;~Shiguang_Sun1;~Long_Qian3;~Lipeng_Wan1;~Xingyu_Chen2;~Xuguang_Lan2", "gender": "M;M;M;M;M;M;", "homepage": ";;;https://lostthinker.github.io/;http://gr.xjtu.edu.cn/web/zeuslan/team;jsessionid=F923495DAAA043708B20337E681673E5;;", "dblp": ";;;;377/4923.html;;", "google_scholar": "YOOlkJoAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.hk/citations?user=s0KcXmgAAAAJ;;;https://scholar.google.com.hk/citations?user=LR76K-MAAAAJ;", "orcid": "0000-0002-3110-8618;;;;;0000-0002-5226-963X;", "linkedin": ";;;;;;", "or_profile": "~Zeyang_Liu2;~Xinrui_Yang1;~Shiguang_Sun1;~Long_Qian3;~Lipeng_Wan1;~Xingyu_Chen2;~Xuguang_Lan2", "aff": "Xi'an Jiaotong University;Xi'an Jiaotong University;Xi'an Jiaotong University;Xi'an Jiaotong University;Xi'an Jiaotong University;Xi'an Jiaotong University;", "aff_domain": "xjtu.edu.cn;xjtu.edu.cn;xjtu.edu.cn;xjtu.edu.cn;xjtu.edu.cn;xjtu.edu.cn;", "position": "PhD student;PhD student;PhD student;PhD student;Postdoc;Assistant Professor;", "bibtex": "@inproceedings{\nliu2024grounded,\ntitle={Grounded Answers for Multi-agent Decision-making Problem through Generative World Model},\nauthor={Zeyang Liu and Xinrui Yang and Shiguang Sun and Long Qian and Lipeng Wan and Xingyu Chen and Xuguang Lan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=QWsLks8LCO}\n}", "github": "", "reviewers": "2fu7;sAvh;Frvn;yaXk", "pdf_size": 2668677, "rating": "5;5;6;7", "confidence": "4;4;4;3", "soundness": "4;3;2;3", "novelty": "3;3;2;3", "presentation": "3;2;2;3", "wc_summary": "129;95;78;90", "wc_strengths": "85;37;82;106", "wc_weaknesses": "180;102;409;103", "wc_questions": "219;59;2;2", "wc_limitations": "5;7;1;4", "wc_review": "618;300;572;305", "wc_reply_reviewers": "50;11;249;0", "wc_reply_authors": "0;0;689;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;2;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 98.0, 18.934096228761486 ], "wc_strengths_avg": [ 77.5, 25.144581921360317 ], "wc_weaknesses_avg": [ 198.5, 125.5836374692181 ], "wc_questions_avg": [ 70.5, 88.83833631940661 ], "wc_limitations_avg": [ 4.25, 2.165063509461097 ], "wc_review_avg": [ 448.75, 147.16211299108204 ], "wc_reply_reviewers_avg": [ 77.5, 100.74348614178487 ], "wc_reply_authors_avg": [ 172.25, 298.34575160373913 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:67uiKPL1afQJ:scholar.google.com/&scioq=Grounded+Answers+for+Multi-agent+Decision-making+Problem+through+Generative+World+Model&hl=en&as_sdt=0,33", "gs_version_total": 5, "email": "xjtu.edu.cn;xjtu.edu.cn;xjtu.edu.cn;xjtu.edu.cn;xjtu.edu.cn;xjtu.edu.cn;", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Xi'an Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.xjtu.edu.cn", "aff_unique_abbr": "XJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Data-Efficient Learning with Neural Programs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95230", "id": "QXQY58xU25", "proceeding": "", "pdf": "https://openreview.net/pdf?id=QXQY58xU25", "openreview": "https://openreview.net/forum?id=QXQY58xU25", "poster": "/media/PosterPDFs/NeurIPS%202024/95230.png?t=1733507326.813911", "project": "", "author_site": "Alaia Solko-Breslin, Seewon Choi, Ziyang Li, Neelay Velingker, Rajeev Alur, Mayur Naik, Eric Wong", "tldr": "", "abstract": "Many computational tasks can be naturally expressed as a composition of a DNN followed by a program written in a traditional programming language or an API call to an LLM. We call such composites \"neural programs\" and focus on the problem of learning the DNN parameters when the training data consist of end-to-end input-output labels for the composite. When the program is written in a differentiable logic programming language, techniques from neurosymbolic learning are applicable, but in general, the learning for neural programs requires estimating the gradients of black-box components. We present an algorithm for learning neural programs, called ISED, that only relies on input-output samples of black-box components. For evaluation, we introduce new benchmarks that involve calls to modern LLMs such as GPT-4 and also consider benchmarks from the neurosymbolic learning literature. Our evaluation shows that for the latter benchmarks, ISED has comparable performance to state-of-the-art neurosymbolic frameworks. For the former, we use adaptations of prior work on gradient approximations of black-box components as a baseline, and show that ISED achieves comparable accuracy but in a more data- and sample-efficient manner.", "keywords": "neurosymbolic learning", "primary_area": "other", "supplementary_material": "/attachment/32dea96f21a09f9430707f38990ee767a30c6d48.zip", "author": "Alaia Solko-Breslin;Seewon Choi;Ziyang Li;Neelay Velingker;Rajeev Alur;Mayur Naik;Eric Wong", "authorids": "~Alaia_Solko-Breslin1;~Seewon_Choi1;~Ziyang_Li2;~Neelay_Velingker1;~Rajeev_Alur1;~Mayur_Naik1;~Eric_Wong1", "gender": "F;F;M;M;M;M;M", "homepage": "https://alaiasolkobreslin.github.io/;https://seewonchoi.github.io;https://liby99.github.io;https://www.linkedin.com/in/nvelingker/;http://www.cis.upenn.edu/~alur/;http://www.cis.upenn.edu/~mhnaik/;http://riceric22.github.io/", "dblp": "278/3188;371/5757;;236/5641;https://dblp.uni-trier.de/pid/a/RAlur.html;92/6794;64/1811-1.html", "google_scholar": "T-9avSwAAAAJ;TmtxOw0AAAAJ;aAQ9abEAAAAJ;;ZvLa1RIAAAAJ;https://scholar.google.com.tw/citations?user=fmsV6nEAAAAJ;pWnTMRkAAAAJ", "orcid": "0009-0009-3723-5181;0009-0000-5424-480X;;;;;", "linkedin": "alaia-solko-breslin/;;liby99/;;;ai4code/;", "or_profile": "~Alaia_Solko-Breslin1;~Seewon_Choi1;~Ziyang_Li2;~Neelay_Velingker1;~Rajeev_Alur1;~Mayur_Naik1;~Eric_Wong1", "aff": "University of Pennsylvania;University of Pennsylvania;School of Engineering and Applied Science, University of Pennsylvania;University of Pennsylvania;University of Pennsylvania;University of Pennsylvania;University of Pennsylvania", "aff_domain": "upenn.edu;seas.upenn.edu;seas.upenn.edu;upenn.edu;upenn.edu;upenn.edu;upenn.edu", "position": "PhD student;PhD student;PhD student;PhD student;Full Professor;Professor;Assistant Professor", "bibtex": "@inproceedings{\nsolko-breslin2024dataefficient,\ntitle={Data-Efficient Learning with Neural Programs},\nauthor={Alaia Solko-Breslin and Seewon Choi and Ziyang Li and Neelay Velingker and Rajeev Alur and Mayur Naik and Eric Wong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=QXQY58xU25}\n}", "github": "", "reviewers": "E3jF;5P1v;2rwx;ubXN", "pdf_size": 1885622, "rating": "5;6;7;7", "confidence": "3;2;4;4", "soundness": "3;3;4;3", "novelty": "3;3;3;3", "presentation": "3;2;3;4", "wc_summary": "73;84;130;220", "wc_strengths": "31;30;87;136", "wc_weaknesses": "269;92;64;148", "wc_questions": "34;46;22;42", "wc_limitations": "9;20;3;8", "wc_review": "416;272;306;554", "wc_reply_reviewers": "9;36;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 126.75, 57.92829619451965 ], "wc_strengths_avg": [ 71.0, 44.05110668303352 ], "wc_weaknesses_avg": [ 143.25, 78.649141762641 ], "wc_questions_avg": [ 36.0, 9.16515138991168 ], "wc_limitations_avg": [ 10.0, 6.2048368229954285 ], "wc_review_avg": [ 387.0, 110.13173929435601 ], "wc_reply_reviewers_avg": [ 11.25, 14.7542366796795 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.6363636363636364, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4171929149288524272&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "upenn.edu;seas.upenn.edu;seas.upenn.edu;upenn.edu;upenn.edu;upenn.edu;upenn.edu", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "University of Pennsylvania", "aff_unique_dep": "", "aff_unique_url": "https://www.upenn.edu", "aff_unique_abbr": "UPenn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "FedGTST: Boosting Global Transferability of Federated Models via Statistics Tuning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95229", "id": "QXkFC7D6p4", "proceeding": "", "pdf": "https://openreview.net/pdf?id=QXkFC7D6p4", "openreview": "https://openreview.net/forum?id=QXkFC7D6p4", "poster": "/media/PosterPDFs/NeurIPS%202024/95229.png?t=1731625604.864774", "project": "", "author_site": "Evelyn Ma, Chao Pan, S. Rasoul Etesami, Han Zhao, Olgica Milenkovic", "tldr": "", "abstract": "The performance of Transfer Learning (TL) significantly depends on effective pretraining, which not only requires extensive amounts of data but also substantial computational resources. As a result, in practice, it is challenging to successfully perform TL at the level of individual model developers. Federated Learning (FL) addresses these challenges by enabling collaboration among individual clients through an indirect expansion of the available dataset, distribution of the computation burden across different entities, and privacy-preserving communication mechanisms. Despite several attempts to devise effective transferable FL approaches, several important issues remain unsolved. First, existing methods in this setting primarily focus on optimizing transferability within their local client domains, thereby ignoring transferability over the global learning domain. Second, most approaches focus on analyzing indirect transferability metrics, which does not allow for accurate assessment of the final target loss and extent of transferability. To address these issues, we introduce two important FL features into the model. The first boosts transferability via an exchange protocol between the clients and the server that includes information about cross-client Jacobian (gradient) norms. The second feature promotes an increase of the average of the Jacobians of the clients at the server side, which is subsequently used as a local regularizer that reduces the cross-client Jacobian variance. A rigorous analysis of our transferable federated algorithm, termed FedGTST (Federated Global Transferability via Statistics Tuning), reveals that increasing the averaged Jacobian norm across clients and reducing its variance ensures tight control of the target loss. This insight leads to the first known upper bound on the target loss of transferable federated learning in terms of the source loss and source-target domain discrepancy. Extensive experimental results on datasets including MNIST \u2192 MNIST-M and CIFAR10 \u2192 SVHN suggest that FedGTST significantly outperforms other relevant baselines, such as FedSR. For example, on the second source-target dataset pair, we improve the accuracy of FedSR by 9.8% and that of FedIIR by 7.6% when the backbone used is LeNet.", "keywords": "Federated Learning;Transfer Learning", "primary_area": "learning_theory", "supplementary_material": "/attachment/7717b90ca41b5cdd8b69341c9b26dc29f914fb18.zip", "author": "Evelyn Ma;Chao Pan;S. Rasoul Etesami;Han Zhao;Olgica Milenkovic", "authorids": "~Evelyn_Ma1;~Chao_Pan2;~S._Rasoul_Etesami1;~Han_Zhao1;~Olgica_Milenkovic1", "gender": ";M;M;M;F", "homepage": "https://ise.illinois.edu/directory/profile/pingm;;https://ise.illinois.edu/directory/profile/etesami1;https://hanzhaoml.github.io/;https://www.ece.illinois.edu/directory/profile/milenkov/", "dblp": ";06/7730-3;;03/3520-2;m/OlgicaMilenkovic", "google_scholar": ";M3T3YPIAAAAJ;sWQwdiwAAAAJ;x942ipYAAAAJ;G4LSqL8AAAAJ", "orcid": ";0000-0002-9275-7072;;0000-0002-8579-1600;", "linkedin": ";chao-pan-5abb7314b/;;;", "or_profile": "~Evelyn_Ma1;~Chao_Pan2;~S._Rasoul_Etesami1;~Han_Zhao1;~Olgica_Milenkovic1", "aff": "University of Illinois, Urbana Champaign;Google;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;", "aff_domain": "illinois.edu;google.com;illinois.edu;illinois.edu;", "position": "PhD student;Researcher;Associate Professor;Assistant Professor;", "bibtex": "@inproceedings{\nma2024fedgtst,\ntitle={Fed{GTST}: Boosting Global Transferability of Federated Models via Statistics Tuning},\nauthor={Evelyn Ma and Chao Pan and S. Rasoul Etesami and Han Zhao and Olgica Milenkovic},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=QXkFC7D6p4}\n}", "github": "", "reviewers": "kpC8;Z3J8;SWUE;RowG", "pdf_size": 1317969, "rating": "4;5;6;6", "confidence": "4;4;3;4", "soundness": "2;2;3;2", "novelty": "2;2;3;2", "presentation": "3;3;3;3", "wc_summary": "55;114;65;86", "wc_strengths": "55;139;28;76", "wc_weaknesses": "285;187;81;345", "wc_questions": "298;5;6;97", "wc_limitations": "4;17;10;37", "wc_review": "697;462;190;641", "wc_reply_reviewers": "0;11;12;145", "wc_reply_authors": "0;16;17;680", "reply_reviewers": "0;1;1;3", "reply_authors": "1;2;2;4", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 80.0, 22.594247055390007 ], "wc_strengths_avg": [ 74.5, 40.94203219186854 ], "wc_weaknesses_avg": [ 224.5, 100.22350023821758 ], "wc_questions_avg": [ 101.5, 119.44140823014438 ], "wc_limitations_avg": [ 17.0, 12.429802894656053 ], "wc_review_avg": [ 497.5, 197.6164213824347 ], "wc_reply_reviewers_avg": [ 42.0, 59.65316420777694 ], "wc_reply_authors_avg": [ 178.25, 289.764020368299 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:kLgtQoR4tGMJ:scholar.google.com/&scioq=FedGTST:+Boosting+Global+Transferability+of+Federated+Models+via+Statistics+Tuning&hl=en&as_sdt=0,44", "gs_version_total": 5, "email": "illinois.edu;google.com;illinois.edu;illinois.edu;", "author_num": 5, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Illinois Urbana-Champaign;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://illinois.edu;https://www.google.com", "aff_unique_abbr": "UIUC;Google", "aff_campus_unique_index": "0;1;0;0", "aff_campus_unique": "Urbana-Champaign;Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "ReF-LDM: A Latent Diffusion Model for Reference-based Face Image Restoration", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95228", "id": "QY4SpBhQZI", "proceeding": "", "pdf": "https://openreview.net/pdf?id=QY4SpBhQZI", "openreview": "https://openreview.net/forum?id=QY4SpBhQZI", "poster": "/media/PosterPDFs/NeurIPS%202024/95228.png?t=1733247930.616639", "project": "", "author_site": "Chi-Wei Hsiao, Yu-Lun Liu, Cheng-Kun Yang, Sheng-Po Kuo, Kevin Jou, Chia-Ping Chen", "tldr": "", "abstract": "While recent works on blind face image restoration have successfully produced impressive high-quality (HQ) images with abundant details from low-quality (LQ) input images, the generated content may not accurately reflect the real appearance of a person. To address this problem, incorporating well-shot personal images as additional reference inputs may be a promising strategy. Inspired by the recent success of the Latent Diffusion Model (LDM) in image generation, we propose ReF-LDM\u2014an adaptation of LDM designed to generate HQ face images conditioned on one LQ image and multiple HQ reference images. Our LDM-based model incorporates an effective and efficient mechanism, CacheKV, for conditioning on reference images. Additionally, we design a timestep-scaled identity loss, enabling LDM to focus on learning the discriminating features of human faces. Lastly, we construct FFHQ-ref, a dataset consisting of 20,406 high-quality (HQ) face images with corresponding reference images, which can serve as both training and evaluation data for reference-based face restoration models.", "keywords": "Blind face restoration;Diffusion models;Image restoration;Reference-based;Latent diffusion model", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Chi-Wei Hsiao;Yu-Lun Liu;Cheng-Kun Yang;Sheng-Po Kuo;Kevin Jou;Chia-Ping Chen", "authorids": "~Chi-Wei_Hsiao2;~Yu-Lun_Liu2;~Cheng-Kun_Yang1;~Sheng-Po_Kuo1;~Kevin_Jou1;~Chia-Ping_Chen2", "gender": "F;;M;M;M;", "homepage": ";;;;;", "dblp": "217/5715;;265/0573;;160/5531.html;", "google_scholar": ";;https://scholar.google.com.tw/citations?user=Ke4_ozgAAAAJ;RCi7_iAAAAAJ;;", "orcid": ";;;;;", "linkedin": "chiwei-hsiao/;;%E8%A8%BC%E7%90%A8-%E6%A5%8A-497950119/;;;", "or_profile": "~Chi-Wei_Hsiao2;~Yu-Lun_Liu2;~Cheng-Kun_Yang1;~Sheng-Po_Kuo1;~Kevin_Jou1;~Chia-Ping_Chen2", "aff": "MediaTek Inc.;;MediaTek;Mediatek;MediaTek;", "aff_domain": "mediatek.com;;mediatek.com;mediatek.com;mediatek.com;", "position": "Researcher;;Researcher;Researcher;Exec VP/CTO;", "bibtex": "@inproceedings{\nhsiao2024refldm,\ntitle={ReF-{LDM}: A Latent Diffusion Model for Reference-based Face Image Restoration},\nauthor={Chi-Wei Hsiao and Yu-Lun Liu and Cheng-Kun Yang and Sheng-Po Kuo and Kevin Jou and Chia-Ping Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=QY4SpBhQZI}\n}", "github": "", "reviewers": "ZeBa;5Xiy;E2Jp;G6n4", "pdf_size": 38161125, "rating": "3;5;5;6", "confidence": "4;3;4;4", "soundness": "2;3;3;4", "novelty": "3;2;2;3", "presentation": "2;2;2;4", "wc_summary": "53;70;46;84", "wc_strengths": "45;66;34;41", "wc_weaknesses": "57;170;92;31", "wc_questions": "14;15;82;32", "wc_limitations": "2;34;7;6", "wc_review": "171;355;261;194", "wc_reply_reviewers": "0;55;15;12", "wc_reply_authors": "30;145;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "2;3;1;1", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 63.25, 14.821858857781638 ], "wc_strengths_avg": [ 46.5, 11.926860441876563 ], "wc_weaknesses_avg": [ 87.5, 52.31873469418006 ], "wc_questions_avg": [ 35.75, 27.643941470058138 ], "wc_limitations_avg": [ 12.25, 12.695963925594622 ], "wc_review_avg": [ 245.25, 71.47158526295607 ], "wc_reply_reviewers_avg": [ 20.5, 20.694202086574876 ], "wc_reply_authors_avg": [ 43.75, 59.725936577001455 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.13245323570650439, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:UAQ7B_6Ayl4J:scholar.google.com/&scioq=ReF-LDM:+A+Latent+Diffusion+Model+for+Reference-based+Face+Image+Restoration&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "mediatek.com;;mediatek.com;mediatek.com;mediatek.com;", "author_num": 6, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "MediaTek Inc.", "aff_unique_dep": "", "aff_unique_url": "https://www.mediatek.com/", "aff_unique_abbr": "MediaTek", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Taiwan", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "LLMDFA: Analyzing Dataflow in Code with Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95227", "id": "QZ2d8E8Whu", "proceeding": "", "pdf": "https://openreview.net/pdf?id=QZ2d8E8Whu", "openreview": "https://openreview.net/forum?id=QZ2d8E8Whu", "poster": "", "project": "", "author_site": "Chengpeng Wang, Wuqi Zhang, Zian Su, Xiangzhe Xu, Xiaoheng Xie, Xiangyu Zhang", "tldr": "", "abstract": "Dataflow analysis is a fundamental code analysis technique that identifies dependencies between program values. Traditional approaches typically necessitate successful compilation and expert customization, hindering their applicability and usability for analyzing uncompilable programs with evolving analysis needs in real-world scenarios. This paper presents LLMDFA, an LLM-powered compilation-free and customizable dataflow analysis framework. To address hallucinations for reliable results, we decompose the problem into several subtasks and introduce a series of novel strategies. Specifically, we leverage LLMs to synthesize code that outsources delicate reasoning to external expert tools, such as using a parsing library to extract program values of interest and invoking an automated theorem prover to validate path feasibility. Additionally, we adopt a few-shot chain-of-thought prompting to summarize dataflow facts in individual functions, aligning the LLMs with the program semantics of small code snippets to mitigate hallucinations. We evaluate LLMDFA on synthetic programs to detect three representative types of bugs and on real-world Android applications for customized bug detection. On average, LLMDFA achieves 87.10% precision and 80.77% recall, surpassing existing techniques with F1 score improvements of up to 0.35. We have open-sourced LLMDFA at https://github.com/chengpeng-wang/LLMDFA.", "keywords": "LLM for code;code reasoning;dataflow analysis", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Chengpeng Wang;Wuqi Zhang;Zian Su;Xiangzhe Xu;Xiaoheng Xie;Xiangyu Zhang", "authorids": "~Chengpeng_Wang2;~Wuqi_Zhang2;~Zian_Su1;~Xiangzhe_Xu1;~Xiaoheng_Xie2;~Xiangyu_Zhang3", "gender": "M;M;M;;M;F", "homepage": "https://wcphkust.github.io/;https://troublor.xyz;;https://sites.google.com/view/alex-xu/;https://www.cs.purdue.edu/homes/xyzhang;", "dblp": "https://dblp.uni-trier.de/pid/240/2235.html;295/8585.html;;276/3462;;", "google_scholar": "https://scholar.google.com.hk/citations?hl=zh-CN;MdH5hE0AAAAJ;gSQzZT0AAAA;;PXbu1wIAAAAJ;", "orcid": "0000-0003-0617-5322;;;;;", "linkedin": ";;;;;", "or_profile": "~Chengpeng_Wang2;~Wuqi_Zhang2;~Zian_Su1;~Xiangzhe_Xu1;~Xiangyu_Zhang3;~XIE_Xiaoheng1", "aff": "Purdue University;Hong Kong University of Science and Technology;Purdue University;Purdue University;Purdue University;Hong Kong University of Science and Technology", "aff_domain": "purdue.edu;connect.ust.hk;purdue.edu;purdue.edu;cs.purdue.edu;hkust.edu", "position": "Postdoc;PhD student;PhD student;PhD student;Full Professor;PhD student", "bibtex": "@inproceedings{\nwang2024llmdfa,\ntitle={{LLMDFA}: Analyzing Dataflow in Code with Large Language Models},\nauthor={Chengpeng Wang and Wuqi Zhang and Zian Su and Xiangzhe Xu and Xiaoheng Xie and Xiangyu Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=QZ2d8E8Whu}\n}", "github": "", "reviewers": "KJcQ;ep1y;jfEP;UBFi", "pdf_size": 2997299, "rating": "5;6;6;7", "confidence": "3;4;4;3", "soundness": "2;2;2;3", "novelty": "2;3;3;3", "presentation": "2;3;3;2", "wc_summary": "89;62;67;52", "wc_strengths": "26;22;122;39", "wc_weaknesses": "313;201;77;33", "wc_questions": "66;35;54;96", "wc_limitations": "2;5;7;5", "wc_review": "496;325;327;225", "wc_reply_reviewers": "23;197;42;0", "wc_reply_authors": "25;391;39;0", "reply_reviewers": "1;2;1;0", "reply_authors": "2;3;2;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 67.5, 13.536986370680884 ], "wc_strengths_avg": [ 52.25, 40.757667990207686 ], "wc_weaknesses_avg": [ 156.0, 109.59470790143108 ], "wc_questions_avg": [ 62.75, 22.151467220028564 ], "wc_limitations_avg": [ 4.75, 1.7853571071357126 ], "wc_review_avg": [ 343.25, 97.35598338058118 ], "wc_reply_reviewers_avg": [ 65.5, 77.36439749652291 ], "wc_reply_authors_avg": [ 113.75, 160.67883339133377 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14929316644296048998&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "purdue.edu;connect.ust.hk;purdue.edu;purdue.edu;cs.purdue.edu;hkust.edu", "author_num": 6, "aff_unique_index": "0;1;0;0;0;1", "aff_unique_norm": "Purdue University;Hong Kong University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.purdue.edu;https://www.ust.hk", "aff_unique_abbr": "Purdue;HKUST", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;0;0;0;1", "aff_country_unique": "United States;China" }, { "title": "Safe Exploitative Play with Untrusted Type Beliefs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95226", "id": "QZtJ22aOV4", "proceeding": "", "pdf": "https://openreview.net/pdf?id=QZtJ22aOV4", "openreview": "https://openreview.net/forum?id=QZtJ22aOV4", "poster": "/media/PosterPDFs/NeurIPS%202024/95226.png?t=1732263492.8088715", "project": "", "author_site": "Tongxin Li, Tinashe Handina, Shaolei Ren, Adam Wierman", "tldr": "", "abstract": "The combination of the Bayesian game and learning has a rich history, with the idea of controlling a single agent in a system composed of multiple agents with unknown behaviors given a set of types, each specifying a possible behavior for the other agents. The idea is to plan an agent's own actions with respect to those types which it believes are most likely to maximize the payoff. However, the type beliefs are often learned from past actions and likely to be incorrect. With this perspective in mind, we consider an agent in a game with type predictions of other components, and investigate the impact of incorrect beliefs to the agent\u2019s payoff. In particular, we formally define a tradeoff between risk and opportunity by comparing the payoff obtained against the optimal payoff, which is represented by a gap caused by trusting or distrusting the learned beliefs.Our main results characterize the tradeoff by establishing upper and lower bounds on the Pareto front for both normal-form and stochastic Bayesian games, with numerical results provided.", "keywords": "Bayesian games;type beliefs;opportunity and risk tradeoff", "primary_area": "algorithmic_game_theory", "supplementary_material": "", "author": "Tongxin Li;Tinashe Handina;Shaolei Ren;Adam Wierman", "authorids": "~Tongxin_Li1;~Tinashe_Handina1;~Shaolei_Ren1;~Adam_Wierman1", "gender": "M;;;M", "homepage": "https://tongxin.me/;;;https://adamwierman.com/", "dblp": "140/7353;;;56/4447", "google_scholar": "qyNc3CkAAAAJ;;;4OvOdSgAAAAJ", "orcid": ";;;0000-0002-5923-0199", "linkedin": ";tinashe-handina02250/;;adam-wierman-a529474/", "or_profile": "~Tongxin_Li1;~Tinashe_Handina1;~Shaolei_Ren1;~Adam_Wierman1", "aff": "The Chinese University of Hong Kong, Shenzhen;California Institute of Technology;;California Institute of Technology", "aff_domain": "cuhk.edu.cn;caltech.edu;;caltech.edu", "position": "Assistant Professor;PhD student;;Professor", "bibtex": "@inproceedings{\nli2024safe,\ntitle={Safe Exploitative Play with Untrusted Type Beliefs},\nauthor={Tongxin Li and Tinashe Handina and Shaolei Ren and Adam Wierman},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=QZtJ22aOV4}\n}", "github": "", "reviewers": "4mAk;caR8;m4Kj;Ui8p;iunt", "pdf_size": 5110343, "rating": "4;5;6;7;7", "confidence": "2;1;2;1;2", "soundness": "4;2;3;4;4", "novelty": "3;2;3;3;3", "presentation": "3;2;2;3;3", "wc_summary": "231;87;33;58;97", "wc_strengths": "24;43;23;41;94", "wc_weaknesses": "118;28;118;46;260", "wc_questions": "20;1;4;80;281", "wc_limitations": "7;8;1;3;5", "wc_review": "400;167;179;228;737", "wc_reply_reviewers": "159;5;0;77;9", "wc_reply_authors": "283;32;0;152;19", "reply_reviewers": "1;1;0;2;1", "reply_authors": "3;2;1;2;2", "rating_avg": [ 5.8, 1.16619037896906 ], "confidence_avg": [ 1.6, 0.4898979485566356 ], "soundness_avg": [ 3.4, 0.8 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 101.2, 68.68012813034059 ], "wc_strengths_avg": [ 45.0, 25.868900247208035 ], "wc_weaknesses_avg": [ 114.0, 81.69210488168365 ], "wc_questions_avg": [ 77.2, 105.81001842925839 ], "wc_limitations_avg": [ 4.8, 2.5612496949731396 ], "wc_review_avg": [ 342.2, 214.27963038982494 ], "wc_reply_reviewers_avg": [ 50.0, 61.344926440578604 ], "wc_reply_authors_avg": [ 97.2, 107.08762767005346 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.14002800840280094, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:wcEMsaMLFAsJ:scholar.google.com/&scioq=Safe+Exploitative+Play+with+Untrusted+Type+Beliefs&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "cuhk.edu.cn;caltech.edu;;caltech.edu", "author_num": 4, "aff_unique_index": "0;1;1", "aff_unique_norm": "Chinese University of Hong Kong;California Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.cuhk.edu.cn;https://www.caltech.edu", "aff_unique_abbr": "CUHK;Caltech", "aff_campus_unique_index": "0;1;1", "aff_campus_unique": "Shenzhen;Pasadena", "aff_country_unique_index": "0;1;1", "aff_country_unique": "China;United States" }, { "title": "Learning Distributions on Manifolds with Free-Form Flows", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95225", "id": "QbPHYPZKJI", "proceeding": "", "pdf": "https://openreview.net/pdf?id=QbPHYPZKJI", "openreview": "https://openreview.net/forum?id=QbPHYPZKJI", "poster": "/media/PosterPDFs/NeurIPS%202024/95225.png?t=1733235244.1805196", "project": "", "author_site": "Peter Sorrenson, Felix Draxler, Armand Rousselot, Sander Hummerich, Ullrich K\u00f6the", "tldr": "", "abstract": "We propose Manifold Free-Form Flows (M-FFF), a simple new generative model for data on manifolds. The existing approaches to learning a distribution on arbitrary manifolds are expensive at inference time, since sampling requires solving a differential equation. Our method overcomes this limitation by sampling in a single function evaluation. The key innovation is to optimize a neural network via maximum likelihood on the manifold, possible by adapting the free-form flow framework to Riemannian manifolds. M-FFF is straightforwardly adapted to any manifold with a known projection. It consistently matches or outperforms previous single-step methods specialized to specific manifolds. It is typically two orders of magnitude faster than multi-step methods based on diffusion or flow matching, achieving better likelihoods in several experiments. We provide our code at https://github.com/vislearn/FFF.", "keywords": "generative model;riemannian geometry;riemannian manifolds;free-form flows;normalizing flows", "primary_area": "generative_models", "supplementary_material": "", "author": "Peter Sorrenson;Felix Draxler;Armand Rousselot;Sander Hummerich;Ullrich Koethe", "authorids": "~Peter_Sorrenson1;~Felix_Draxler1;~Armand_Rousselot1;~Sander_Hummerich1;~Ullrich_Koethe1", "gender": "M;M;M;M;M", "homepage": ";;;https://hci.iwr.uni-heidelberg.de/vislearn/people/ullrich-koethe/;", "dblp": "242/9148;304/8323;304/8414;15/809;256/5384", "google_scholar": "rFbxDSAAAAAJ;FqvjidYAAAAJ;https://scholar.google.de/citations?user=ELi2EVIAAAAJ;gt-yaNMAAAAJ;894uGm4AAAAJ", "orcid": "0000-0003-0978-1539;;;0000-0001-6036-1287;", "linkedin": "felix-draxler/;;;;", "or_profile": "~Felix_Draxler1;~Armand_Rousselot1;~Sander_Hummerich1;~Ullrich_Koethe1;~Peter_Rangi_Sorrenson1", "aff": "Heidelberg University;Heidelberg University, Ruprecht-Karls-Universit\u00e4t Heidelberg;Ruprecht-Karls-Universit\u00e4t Heidelberg;Heidelberg University;Heidelberg University", "aff_domain": "uni-heidelberg.de;iwr.uni-heidelberg.de;uni-heidelberg.de;uni-heidelberg.de;uni-heidelberg.de", "position": "PhD student;PhD student;MS student;Adjunct Professor;PhD student", "bibtex": "@inproceedings{\nsorrenson2024learning,\ntitle={Learning Distributions on Manifolds with Free-Form Flows},\nauthor={Peter Sorrenson and Felix Draxler and Armand Rousselot and Sander Hummerich and Ullrich Koethe},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=QbPHYPZKJI}\n}", "github": "", "reviewers": "Z5qx;m8ZT;nDWB;RvPv", "pdf_size": 7174558, "rating": "5;6;7;7", "confidence": "4;4;5;5", "soundness": "2;4;4;3", "novelty": "2;3;3;3", "presentation": "3;4;3;3", "wc_summary": "103;95;56;141", "wc_strengths": "53;66;64;86", "wc_weaknesses": "77;236;78;239", "wc_questions": "350;208;289;26", "wc_limitations": "65;40;10;6", "wc_review": "648;645;497;498", "wc_reply_reviewers": "81;33;67;223", "wc_reply_authors": "0;0;0;415", "reply_reviewers": "1;1;1;2", "reply_authors": "1;1;1;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 98.75, 30.185882461839675 ], "wc_strengths_avg": [ 67.25, 11.903255857117413 ], "wc_weaknesses_avg": [ 157.5, 80.00781211856753 ], "wc_questions_avg": [ 218.25, 121.89006317169583 ], "wc_limitations_avg": [ 30.25, 23.98306694315804 ], "wc_review_avg": [ 572.0, 74.50838878945109 ], "wc_reply_reviewers_avg": [ 101.0, 72.56721022610694 ], "wc_reply_authors_avg": [ 103.75, 179.70027128527101 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.9045340337332909, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7626742167717131571&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "uni-heidelberg.de;iwr.uni-heidelberg.de;uni-heidelberg.de;uni-heidelberg.de;uni-heidelberg.de", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Heidelberg University;Ruprecht-Karls-Universit\u00e4t Heidelberg", "aff_unique_dep": ";", "aff_unique_url": "https://www.uni-heidelberg.de;https://www.uni-heidelberg.de/", "aff_unique_abbr": "Uni Heidelberg;Uni Heidelberg", "aff_campus_unique_index": "1", "aff_campus_unique": ";Heidelberg", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Germany" }, { "title": "Selective Attention: Enhancing Transformer through Principled Context Control", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95224", "id": "QbqLcwMXfF", "proceeding": "", "pdf": "https://openreview.net/pdf?id=QbqLcwMXfF", "openreview": "https://openreview.net/forum?id=QbqLcwMXfF", "poster": "/media/PosterPDFs/NeurIPS%202024/95224.png?t=1733457000.8398347", "project": "", "author_site": "Xuechen Zhang, Xiangyu Chang, Mingchen Li, Amit Roy-Chowdhury, Jiasi Chen, Samet Oymak", "tldr": "", "abstract": "The attention mechanism within the transformer architecture enables the model to weigh and combine tokens based on their relevance to the query. While self-attention has enjoyed major success, it notably treats all queries $q$ in the same way by applying the mapping $V^\\top\\text{softmax}(Kq)$, where $V,K$ are the value and key embeddings respectively. In this work, we argue that this uniform treatment hinders the ability to control contextual sparsity and relevance. As a solution, we introduce the Selective Self-Attention (SSA) layer that augments the softmax nonlinearity with a principled temperature scaling strategy. By controlling temperature, SSA adapts the contextual sparsity of the attention map to the query embedding and its position in the context window. Through theory and experiments, we demonstrate that this alleviates attention dilution, aids the optimization process, and enhances the model's ability to control softmax spikiness of individual queries. We also incorporate temperature scaling for value embeddings and show that it boosts the model's ability to suppress irrelevant/noisy tokens. Notably, SSA is a lightweight method which introduces less than 0.5\\% new parameters through a weight-sharing strategy and can be fine-tuned on existing LLMs. Extensive empirical evaluations demonstrate that SSA-equipped models achieve a noticeable and consistent accuracy improvement on language modeling benchmarks.", "keywords": "attention mechanism;transformer;sparsity;language model;architecture", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/e05064267b271acaaebb64678e6d834f531c09b4.zip", "author": "Xuechen Zhang;Xiangyu Chang;Mingchen Li;Amit Roy-Chowdhury;Jiasi Chen;Samet Oymak", "authorids": "~Xuechen_Zhang2;~Xiangyu_Chang2;~Mingchen_Li1;~Amit_Roy-Chowdhury2;~Jiasi_Chen1;~Samet_Oymak2", "gender": "F;M;Non-Binary;F;M;M", "homepage": "https://www.linkedin.com/in/xuechen-zhang-9a5385213;;http://nya.space/;https://jiasi.engin.umich.edu/;https://vlg.engr.ucr.edu/amit;https://sota.engin.umich.edu/", "dblp": "51/7435-2;;184/9471;35/9005;c/AmitKRoyChowdhury;89/8771", "google_scholar": "Xj4fIC4AAAAJ;mQh2GmoAAAAJ;8vqVgdsAAAAJ;;hfgwx0oAAAAJ;AY6InkoAAAAJ", "orcid": ";;;;0000-0001-6690-9725;", "linkedin": ";;;;;", "or_profile": "~Xuechen_Zhang2;~Xiangyu_Chang2;~Mingchen_Li1;~Jiasi_Chen1;~Amit_Roy-chowdhury1;~Samet_Oymak1", "aff": "University of California, Riverside;University of California, Riverside;University of Michigan - Ann Arbor;University of Michigan - Ann Arbor;University of California, Riverside;University of Michigan - Ann Arbor", "aff_domain": "ucr.edu;ucr.edu;umich.edu;umich.edu;ucr.edu;umich.edu", "position": "PhD student;PhD student;PhD student;Associate Professor;Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhang2024selective,\ntitle={Selective Attention: Enhancing Transformer through Principled Context Control},\nauthor={Xuechen Zhang and Xiangyu Chang and Mingchen Li and Amit Roy-Chowdhury and Jiasi Chen and Samet Oymak},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=QbqLcwMXfF}\n}", "github": "", "reviewers": "qMbv;Lp6e;NV9t;TP8j", "pdf_size": 456381, "rating": "5;5;5;7", "confidence": "3;5;3;4", "soundness": "2;2;2;3", "novelty": "2;4;2;4", "presentation": "3;2;2;3", "wc_summary": "65;16;97;65", "wc_strengths": "19;45;37;76", "wc_weaknesses": "114;185;161;116", "wc_questions": "44;41;4;55", "wc_limitations": "5;1;1;33", "wc_review": "247;288;300;345", "wc_reply_reviewers": "14;88;13;90", "wc_reply_authors": "70;127;115;70", "reply_reviewers": "1;2;1;1", "reply_authors": "2;4;3;2", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 1.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 60.75, 28.951468011138918 ], "wc_strengths_avg": [ 44.25, 20.60794749605113 ], "wc_weaknesses_avg": [ 144.0, 30.224162519414826 ], "wc_questions_avg": [ 36.0, 19.196353820452465 ], "wc_limitations_avg": [ 10.0, 13.379088160259652 ], "wc_review_avg": [ 295.0, 34.92134018046845 ], "wc_reply_reviewers_avg": [ 51.25, 37.75827723824274 ], "wc_reply_authors_avg": [ 95.5, 25.85053190942113 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16612353358219166261&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 4, "email": "ucr.edu;ucr.edu;umich.edu;umich.edu;ucr.edu;umich.edu", "author_num": 6, "aff_unique_index": "0;0;1;1;0;1", "aff_unique_norm": "University of California, Riverside;University of Michigan", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucr.edu;https://www.umich.edu", "aff_unique_abbr": "UCR;UM", "aff_campus_unique_index": "0;0;1;1;0;1", "aff_campus_unique": "Riverside;Ann Arbor", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Facilitating Multimodal Classification via Dynamically Learning Modality Gap", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95223", "id": "QbsPz0SnyV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=QbsPz0SnyV", "openreview": "https://openreview.net/forum?id=QbsPz0SnyV", "poster": "", "project": "", "author_site": "Yang Yang, Fengqiang Wan, Qing-Yuan Jiang, Yi Xu", "tldr": "", "abstract": "Multimodal learning falls into the trap of the optimization dilemma due to the modality imbalance phenomenon, leading to unsatisfactory performance in real applications. A core reason for modality imbalance is that the models of each modality converge at different rates. Many attempts naturally focus on adjusting learning procedures adaptively. Essentially, the reason why models converge at different rates is because the difficulty of fitting category labels is inconsistent for each modality during learning. From the perspective of fitting labels, we find that appropriate positive intervention label fitting can correct this difference in learning ability. By exploiting the ability of contrastive learning to intervene in the learning of category label fitting, we propose a novel multimodal learning approach that dynamically integrates unsupervised contrastive learning and supervised multimodal learning to address the modality imbalance problem. We find that a simple yet heuristic integration strategy can significantly alleviate the modality imbalance phenomenon. Moreover, we design a learning-based integration strategy to integrate two losses dynamically, further improving the performance. Experiments on widely used datasets demonstrate the superiority of our method compared with state-of-the-art (SOTA) multimodal learning approaches. The code is available at https://github.com/njustkmg/NeurIPS24-LFM.", "keywords": "Multimodal Learning;Modality Gap;Multimodal Imbalance", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/849991bbb47f3643dff2f003562f5f1c41b0e584.zip", "author": "Yang Yang;Fengqiang Wan;Qing-Yuan Jiang;Yi Xu", "authorids": "~Yang_Yang17;~Fengqiang_Wan2;~Qing-Yuan_Jiang3;~Yi_Xu8", "gender": "M;M;;M", "homepage": "http://www.njustkmg.cn/;https://jiangqy.github.io;https://github.com/wanfengqiang;https://yxu71.github.io", "dblp": "48/450-74;165/3025;351/3064;14/5580", "google_scholar": "_6NJip0AAAAJ;2RtnHp4AAAAJ;;D4jEMqEAAAAJ", "orcid": "0000-0002-5245-3584;;;0009-0000-9900-6143", "linkedin": ";;;", "or_profile": "~Yang_Yang17;~Qing-Yuan_Jiang3;~Fengqiang_wan1;~YI_XU3", "aff": "Nanjing University of Science and Technology;Huawei Technologies Ltd.;Nanjing University of Science and Technology;Dalian University of Technology", "aff_domain": "njust.edu.cn;huawei.com;njust.edu.cn;dlut.edu.cn", "position": "Full Professor;Researcher;PhD student;Full Professor", "bibtex": "@inproceedings{\nyang2024facilitating,\ntitle={Facilitating Multimodal Classification via Dynamically Learning Modality Gap},\nauthor={Yang Yang and Fengqiang Wan and Qing-Yuan Jiang and Yi Xu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=QbsPz0SnyV}\n}", "github": "", "reviewers": "XdY4;hRBu;CoWQ;g7cu", "pdf_size": 500933, "rating": "3;6;7;7", "confidence": "4;5;5;5", "soundness": "1;3;3;2", "novelty": "1;3;3;2", "presentation": "3;2;3;3", "wc_summary": "48;40;93;58", "wc_strengths": "47;52;140;61", "wc_weaknesses": "332;67;27;230", "wc_questions": "8;2;4;43", "wc_limitations": "1;3;1;1", "wc_review": "436;164;265;393", "wc_reply_reviewers": "113;18;10;36", "wc_reply_authors": "310;17;0;11", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;1;2", "rating_avg": [ 5.75, 1.6393596310755 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 59.75, 20.22838352414745 ], "wc_strengths_avg": [ 75.0, 37.8615900352851 ], "wc_weaknesses_avg": [ 164.0, 123.24568957979828 ], "wc_questions_avg": [ 14.25, 16.7388022271607 ], "wc_limitations_avg": [ 1.5, 0.8660254037844386 ], "wc_review_avg": [ 314.5, 107.26718976462467 ], "wc_reply_reviewers_avg": [ 44.25, 40.794454279963105 ], "wc_reply_authors_avg": [ 84.5, 130.3351449149461 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.9684959969581861, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=530032686190241935&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "njust.edu.cn;huawei.com;njust.edu.cn;dlut.edu.cn", "author_num": 4, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Nanjing University of Science and Technology;Huawei;Dalian University of Technology", "aff_unique_dep": ";Huawei Technologies;", "aff_unique_url": "http://www.nust.edu.cn/;https://www.huawei.com;http://www.dlut.edu.cn/", "aff_unique_abbr": "NUST;Huawei;DUT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "MMBench-Video: A Long-Form Multi-Shot Benchmark for Holistic Video Understanding", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97696", "id": "Qdf3ad5MXH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Qdf3ad5MXH", "openreview": "https://openreview.net/forum?id=Qdf3ad5MXH", "poster": "/media/PosterPDFs/NeurIPS%202024/97696.png?t=1733317753.8602273", "project": "", "author_site": "Xinyu Fang, Kangrui Mao, Haodong Duan, Xiangyu Zhao, Yining Li, Dahua Lin, Kai Chen", "tldr": "", "abstract": "The advent of large vision-language models (LVLMs) has spurred research into their applications in multi-modal contexts, particularly in video understanding. Traditional VideoQA benchmarks, despite providing quantitative metrics, often fail to encompass the full spectrum of video content and inadequately assess models' temporal comprehension. To address these limitations, we introduce MMBench-Video, \na quantitative benchmark designed to rigorously evaluate LVLMs' proficiency in video understanding. MMBench-Video incorporates lengthy videos from YouTube and employs free-form questions, mirroring practical use cases. The benchmark is meticulously crafted to probe the models' temporal reasoning skills, with all questions human-annotated according to a carefully constructed ability taxonomy.\nWe employ GPT-4 for automated assessment, demonstrating superior accuracy and robustness over earlier LLM-based evaluations. \nUtilizing MMBench-Video, we have conducted comprehensive evaluations that include both proprietary and open-source LVLMs for images and videos. MMBench-Video stands as a valuable resource for the research community, facilitating improved evaluation of LVLMs and catalyzing progress in the field of video understanding.", "keywords": "video understanding; long-form video; web video", "primary_area": "", "supplementary_material": "", "author": "Xinyu Fang;Kangrui Mao;Haodong Duan;Xiangyu Zhao;Yining Li;Dahua Lin;Kai Chen", "authorids": "~Xinyu_Fang1;~Kangrui_Mao1;~Haodong_Duan1;~Xiangyu_Zhao5;~Yining_Li1;~Dahua_Lin1;~Kai_Chen4", "gender": "M;M;M;M;M;M;M", "homepage": "https://github.com/FangXinyu-0913;;https://kennymckormick.github.io;https://ee.sjtu.edu.cn/;https://liyn.site;http://dahua.site;https://chenkai.site/", "dblp": "143/0236;372/7182;211/7919;;166/3420;53/6088;181/2839-26", "google_scholar": "QZk6nZ8AAAAJ;S9jX-7cAAAAJ;vi3W-m8AAAAJ;;https://scholar.google.com.hk/citations?user=y_cp1sUAAAAJ;GMzzRRUAAAAJ;https://scholar.google.com.hk/citations?user=eGD0b7IAAAAJ", "orcid": "0009-0003-3764-1266;;0000-0002-3052-4177;;;;0000-0002-6820-2325", "linkedin": ";kangrui-mao/;haodong-duan-bb9349166/;;;;", "or_profile": "~Xinyu_Fang1;~Kangrui_Mao1;~Haodong_Duan1;~Xiangyu_Zhao5;~Yining_Li1;~Dahua_Lin1;~Kai_Chen4", "aff": "College of Computer Science and Technology, Zhejiang University;Shanghai Jiaotong University;Shanghai Artificial Intelligence Laboratory;Shanghai Jiaotong University;Shanghai AI Laboratory;The Chinese University of Hong Kong;Shanghai AI Laboratory", "aff_domain": "cs.zju.edu.cn;sjtu.edu.cn;pjlab.org.cn;sjtu.edu.cn;pjlab.org.cn;cuhk.edu.hk;pjlab.org.cn", "position": "PhD student;Undergrad student;Postdoc;PhD student;Researcher;Associate Professor;Researcher", "bibtex": "@inproceedings{\nfang2024mmbenchvideo,\ntitle={{MMB}ench-Video: A Long-Form Multi-Shot Benchmark for Holistic Video Understanding},\nauthor={Xinyu Fang and Kangrui Mao and Haodong Duan and Xiangyu Zhao and Yining Li and Dahua Lin and Kai Chen},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=Qdf3ad5MXH}\n}", "github": "", "reviewers": "FpG8;kFa6;aXTw;8fpf;AJQp", "pdf_size": 8229853, "rating": "5;5;6;7;7", "confidence": "4;4;4;4;5", "wc_summary_and_contributions": "70;153;118;55;54", "wc_strengths": "75;27;55;79;78", "wc_improvement": "121;52;50;59;140", "wc_limitations": "1;23;1;160;1", "wc_correctness": "1;21;1;1;1", "wc_clarity": "1;55;1;1;1", "wc_relation_to_prior_work": "1;8;1;2;1", "wc_documentation": "1;14;1;2;1", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "272;354;229;360;278", "wc_reply_reviewers": "0;0;135;0;140", "wc_reply_authors": "309;442;153;315;191", "reply_reviewers": "0;0;2;0;2", "reply_authors": "4;4;6;4;5", "rating_avg": [ 6.0, 0.8944271909999159 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "wc_summary_and_contributions_avg": [ 90.0, 39.176523582370095 ], "wc_strengths_avg": [ 62.8, 19.923855048659636 ], "wc_improvement_avg": [ 84.4, 38.23401626823946 ], "wc_limitations_avg": [ 37.2, 61.98838600899365 ], "wc_correctness_avg": [ 5.0, 8.0 ], "wc_clarity_avg": [ 11.8, 21.6 ], "wc_relation_to_prior_work_avg": [ 2.6, 2.7276363393971716 ], "wc_documentation_avg": [ 3.8, 5.1146847410177685 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 298.6, 50.62647528714595 ], "wc_reply_reviewers_avg": [ 55.0, 67.37952211169207 ], "wc_reply_authors_avg": [ 282.0, 102.3132444994293 ], "reply_reviewers_avg": [ 0.8, 0.9797958971132713 ], "reply_authors_avg": [ 4.6, 0.7999999999999999 ], "replies_avg": [ 34, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.5590169943749475, "gs_citation": 59, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9817094672112351410&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "cs.zju.edu.cn;sjtu.edu.cn;pjlab.org.cn;sjtu.edu.cn;pjlab.org.cn;cuhk.edu.hk;pjlab.org.cn", "author_num": 7, "aff_unique_index": "0;1;2;1;3;4;3", "aff_unique_norm": "Zhejiang University;Shanghai Jiao Tong University;Shanghai Artificial Intelligence Laboratory;Shanghai AI Laboratory;Chinese University of Hong Kong", "aff_unique_dep": "College of Computer Science and Technology;;;;", "aff_unique_url": "http://www.zju.edu.cn;https://www.sjtu.edu.cn;http://www.shailab.org/;https://www.shanghai-ai-lab.com;https://www.cuhk.edu.hk", "aff_unique_abbr": "ZJU;SJTU;Shanghai AI Lab;SAIL;CUHK", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Hybrid Mamba for Few-Shot Segmentation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95222", "id": "Qe2BKeCEBC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Qe2BKeCEBC", "openreview": "https://openreview.net/forum?id=Qe2BKeCEBC", "poster": "/media/PosterPDFs/NeurIPS%202024/95222.png?t=1731144941.2766514", "project": "", "author_site": "Qianxiong Xu, Xuanyi Liu, Lanyun Zhu, Guosheng Lin, Cheng Long, Ziyue Li, Rui Zhao", "tldr": "", "abstract": "Many few-shot segmentation (FSS) methods use cross attention to fuse support foreground (FG) into query features, regardless of the quadratic complexity. A recent advance Mamba can also well capture intra-sequence dependencies, yet the complexity is only linear. Hence, we aim to devise a cross (attention-like) Mamba to capture inter-sequence dependencies for FSS. A simple idea is to scan on support features to selectively compress them into the hidden state, which is then used as the initial hidden state to sequentially scan query features. Nevertheless, it suffers from (1) support forgetting issue: query features will also gradually be compressed when scanning on them, so the support features in hidden state keep reducing, and many query pixels cannot fuse sufficient support features; (2) intra-class gap issue: query FG is essentially more similar to itself rather than to support FG, i.e., query may prefer not to fuse support features but their own ones from the hidden state, yet the success of FSS relies on the effective use of support information. To tackle them, we design a hybrid Mamba network (HMNet), including (1) a support recapped Mamba to periodically recap the support features when scanning query, so the hidden state can always contain rich support information; (2) a query intercepted Mamba to forbid the mutual interactions among query pixels, and encourage them to fuse more support features from the hidden state. Consequently, the support information is better utilized, leading to better performance. Extensive experiments have been conducted on two public benchmarks, showing the superiority of HMNet. The code is available at https://github.com/Sam1224/HMNet.", "keywords": "few-shot segmentation;mamba;intra-sequence dependencies", "primary_area": "machine_vision", "supplementary_material": "/attachment/c2e47c3a1b58104b90d7e4c87b1052435097d66e.zip", "author": "Qianxiong Xu;Xuanyi Liu;Lanyun Zhu;Guosheng Lin;Cheng Long;Ziyue Li;Rui Zhao", "authorids": "~Qianxiong_Xu1;~Xuanyi_Liu3;~Lanyun_Zhu1;~Guosheng_Lin1;~Cheng_Long1;~Ziyue_Li2;~Rui_Zhao6", "gender": "M;M;M;M;;M;M", "homepage": "https://github.com/Sam1224;;https://lanyunzhu.site;https://personal.ntu.edu.sg/c.long/index.html;https://bonaldli.github.io/;http://zhaorui.xyz/;https://guosheng.github.io/", "dblp": "328/0688;;245/2640;58/10813;189/5871-2;26/2578-1;126/4778", "google_scholar": "1jz1_hMAAAAJ;;urOSnlQAAAAJ;LybJ7ksAAAAJ;q5_My2AAAAAJ;1c9oQNMAAAAJ;https://scholar.google.com.au/citations?user=ZudEhvcAAAAJ", "orcid": "0000-0001-9175-6783;0000-0001-7667-0252;;0000-0001-6806-8405;0000-0003-4983-9352;;0000-0002-0329-7458", "linkedin": ";;;;;;", "or_profile": "~Qianxiong_Xu1;~Xuanyi_Liu3;~Lanyun_Zhu1;~Cheng_Long1;~Ziyue_Li2;~Rui_Zhao6;~Guosheng_Lin2", "aff": "Nanyang Technological University;Nanyang Technological University;Singapore University of Technology and Design;Nanyang Technological University;University of Cologne;SenseTime Research;Nanyang Technological University", "aff_domain": "ntu.edu.sg;ntu.edu.sg;sutd.edu.sg;ntu.edu.sg;uni-koeln.de;sensetime.com;ntu.edu.sg", "position": "PhD student;Researcher;PhD student;Assistant Professor;Assistant Professor;Researcher;Associate Professor", "bibtex": "@inproceedings{\nxu2024hybrid,\ntitle={Hybrid Mamba for Few-Shot Segmentation},\nauthor={Qianxiong Xu and Xuanyi Liu and Lanyun Zhu and Guosheng Lin and Cheng Long and Ziyue Li and Rui Zhao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Qe2BKeCEBC}\n}", "github": "", "reviewers": "dFqt;noa5;iSLu;hBYs", "pdf_size": 2419947, "rating": "4;5;5;5", "confidence": "5;4;2;5", "soundness": "2;3;3;3", "novelty": "2;3;2;3", "presentation": "3;2;3;3", "wc_summary": "37;62;114;50", "wc_strengths": "14;36;44;38", "wc_weaknesses": "226;83;96;80", "wc_questions": "2;8;2;32", "wc_limitations": "1;23;26;9", "wc_review": "280;212;282;209", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 4.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 1.224744871391589 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 65.75, 29.22648627529488 ], "wc_strengths_avg": [ 33.0, 11.357816691600547 ], "wc_weaknesses_avg": [ 121.25, 60.77571472224741 ], "wc_questions_avg": [ 11.0, 12.36931687685298 ], "wc_limitations_avg": [ 14.75, 10.207227831296802 ], "wc_review_avg": [ 245.75, 35.27304211433995 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.4714045207910316, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14514138658453002025&as_sdt=8005&sciodt=0,7&hl=en", "gs_version_total": 6, "email": "ntu.edu.sg;ntu.edu.sg;sutd.edu.sg;ntu.edu.sg;uni-koeln.de;sensetime.com;ntu.edu.sg", "author_num": 7, "aff_unique_index": "0;0;1;0;2;3;0", "aff_unique_norm": "Nanyang Technological University;Singapore University of Technology and Design;University of Cologne;SenseTime", "aff_unique_dep": ";;;SenseTime Research", "aff_unique_url": "https://www.ntu.edu.sg;https://www.sutd.edu.sg;https://www.uni-koeln.de/;https://www.sensetime.com", "aff_unique_abbr": "NTU;SUTD;UC;SenseTime", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;2;0", "aff_country_unique": "Singapore;Germany;China" }, { "title": "Grasp as You Say: Language-guided Dexterous Grasp Generation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95221", "id": "QeWibaTmnn", "proceeding": "", "pdf": "https://openreview.net/pdf?id=QeWibaTmnn", "openreview": "https://openreview.net/forum?id=QeWibaTmnn", "poster": "/media/PosterPDFs/NeurIPS%202024/95221.png?t=1731424473.77484", "project": "", "author_site": "Yi-Lin Wei, Jian-Jian Jiang, Chengyi Xing, Xian-Tuo Tan, Xiao-Ming Wu, Hao Li, Mark Cutkosky, Wei-Shi Zheng", "tldr": "", "abstract": "This paper explores a novel task \"Dexterous Grasp as You Say'' (DexGYS), enabling robots to perform dexterous grasping based on human commands expressed in natural language. However, the development of this field is hindered by the lack of datasets with natural human guidance; thus, we propose a language-guided dexterous grasp dataset, named DexGYSNet, offering high-quality dexterous grasp annotations along with flexible and fine-grained human language guidance. Our dataset construction is cost-efficient, with the carefully-design hand-object interaction retargeting strategy, and the LLM-assisted language guidance annotation system. Equipped with this dataset, we introduce the DexGYSGrasp framework for generating dexterous grasps based on human language instructions, with the capability of producing grasps that are intent-aligned, high quality and diversity. To achieve this capability, our framework decomposes the complex learning process into two manageable progressive objectives and introduce two components to realize them. The first component learns the grasp distribution focusing on intention alignment and generation diversity. And the second component refines the grasp quality while maintaining intention consistency. Extensive experiments are conducted on DexGYSNet and real world environments for validation.", "keywords": "Dexterous Grasp Generation;Robotics", "primary_area": "robotics", "supplementary_material": "/attachment/21fc9d7449e3ee01624d27ca7a45285d5c527908.zip", "author": "Yi-Lin Wei;Jian-Jian Jiang;Chengyi Xing;Xiantuo Tan;Xiao-Ming Wu;Hao Li;Mark Cutkosky;Wei-Shi Zheng", "authorids": "~Yi-Lin_Wei1;~Jian-Jian_Jiang1;~Chengyi_Xing1;~Xiantuo_Tan1;~Xiao-Ming_Wu5;~Hao_Li23;~Mark_Cutkosky1;~Wei-Shi_Zheng3", "gender": "M;M;M;M;Not Specified;M;M;M", "homepage": "https://github.com/JianJian-Jiang;https://chengyi-xing.com;https://github.com/krtxt;https://dravenalg.github.io/;https://haolirobo.github.io;http://bdml.stanford.edu;http://www.isee-ai.cn/~zhwshi;", "dblp": ";348/9124;;98/2898-2;;;30/8399;376/2518.html", "google_scholar": ";BglGZXEAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;IDmUyTEAAAAJ;https://scholar.google.com/citations?hl=en;AwqDDGoAAAAJ;", "orcid": ";;;0000-0003-1115-8551;0000-0001-5030-457X;0000-0003-4730-0900;;", "linkedin": ";xcyhbp/;%E5%85%88%E6%8B%93-%E8%B0%AD-519a9a2a3/;;hao-li-sjtu-pu/;;;", "or_profile": "~Jian-Jian_Jiang1;~Chengyi_Xing1;~Xiantuo_Tan1;~Xiao-Ming_Wu5;~Hao_Li23;~Mark_Cutkosky1;~Wei-Shi_Zheng3;~yilin_wei1", "aff": "SUN YAT-SEN UNIVERSITY;Stanford University;Wuhan University;Macquarie University;Stanford University;Stanford University;SUN YAT-SEN UNIVERSITY;SUN YAT-SEN UNIVERSITY", "aff_domain": "sysu.edu.cn;stanford.edu;whu.edu.cn;mq.edu.au;stanford.edu;stanford.edu;sysu.edu.cn;sysu.edu.cn", "position": "MS student;MS student;Undergrad student;Intern;PhD student;Full Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\nwei2024grasp,\ntitle={Grasp as You Say: Language-guided Dexterous Grasp Generation},\nauthor={Yi-Lin Wei and Jian-Jian Jiang and Chengyi Xing and Xiantuo Tan and Xiao-Ming Wu and Hao Li and Mark Cutkosky and Wei-Shi Zheng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=QeWibaTmnn}\n}", "github": "", "reviewers": "dbTG;MqTF;3CYr;TfTv", "pdf_size": 3092953, "rating": "5;5;6;7", "confidence": "5;3;5;4", "soundness": "2;2;3;3", "novelty": "3;2;3;3", "presentation": "3;2;4;4", "wc_summary": "140;38;126;72", "wc_strengths": "62;87;78;144", "wc_weaknesses": "245;85;245;370", "wc_questions": "71;126;141;2", "wc_limitations": "11;2;102;2", "wc_review": "529;338;692;590", "wc_reply_reviewers": "67;37;50;79", "wc_reply_authors": "28;15;15;36", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 94.0, 41.10960958218893 ], "wc_strengths_avg": [ 92.75, 30.914195768287424 ], "wc_weaknesses_avg": [ 236.25, 101.14191762073725 ], "wc_questions_avg": [ 85.0, 54.54814387309618 ], "wc_limitations_avg": [ 29.25, 42.16263155923738 ], "wc_review_avg": [ 537.25, 128.93675775355916 ], "wc_reply_reviewers_avg": [ 58.25, 16.021469970012117 ], "wc_reply_authors_avg": [ 23.5, 8.958236433584458 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0909090909090909, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=413467254004591354&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "sysu.edu.cn;stanford.edu;whu.edu.cn;mq.edu.au;stanford.edu;stanford.edu;sysu.edu.cn;sysu.edu.cn", "author_num": 8, "aff_unique_index": "0;1;2;3;1;1;0;0", "aff_unique_norm": "Sun Yat-sen University;Stanford University;Wuhan University;Macquarie University", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.sysu.edu.cn;https://www.stanford.edu;http://www.whu.edu.cn/;https://www.mq.edu.au", "aff_unique_abbr": "SYSU;Stanford;WHU;MQ", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;1;0;2;1;1;0;0", "aff_country_unique": "China;United States;Australia" }, { "id": "QeebTNgRjn", "title": "Conditional Lagrangian Wasserstein Flow for Time Series Imputation", "track": "main", "status": "Reject", "tldr": "", "abstract": "Time series imputation is important for numerous real-world applications. To overcome the limitations of diffusion model-based imputation methods, e.g., slow convergence in inference, we propose a novel method for time series imputation in this work, called Conditional Lagrangian Wasserstein Flow. The proposed method leverages the (conditional) optimal transport theory to learn the probability flow in a simulation-free manner, in which the initial noise, missing data, and observations are treated as the source distribution, target distribution, and conditional information, respectively. According to the principle of least action in Lagrangian mechanics, we learn the velocity by minimizing the corresponding kinetic energy. Moreover, to incorporate more prior information into the model, we parameterize the derivative of a task-specific potential function via a variational autoencoder, and combine it with the base estimator to formulate a Rao-Blackwellized sampler. The propose model allows us to take less intermediate steps to produce high-quality samples for inference compared to existing diffusion methods. Finally, the experimental results on the real-word datasets show that the proposed method achieves competitive performance on time series imputation compared to the state-of-the-art methods.", "keywords": "dep learning; diffusion model; optimal transport; time series imputation", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/b3158d640f77864e78643ce9477ef46fa2074959.zip", "author": "Weizhu Qian;Dalin Zhang;Yan Zhao", "authorids": "~Weizhu_Qian1;~Dalin_Zhang1;~Yan_Zhao3", "gender": "M;;F", "homepage": "https://scholar.google.com/citations?hl=en&user=jRbawDIAAAAJ&view_op=list_works&sortby=pubdate;;https://yan20191113.github.io/", "dblp": "254/2048;;88/5320-8", "google_scholar": ";;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";;0009-0006-8370-010X", "linkedin": ";;", "or_profile": "~Weizhu_Qian1;~Dalin_Zhang1;~Yan_Zhao3", "aff": "Suzhou University;;Aalborg University, Aalborg University", "aff_domain": "suda.edu.cn;;cs.aau.dk", "position": "Assistant Professor;;Assistant Professor", "bibtex": "@misc{\nanonymous2024conditional,\ntitle={Conditional Lagrangian Wasserstein Flow for Time Series Imputation},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=QeebTNgRjn}\n}", "github": "", "project": "", "reviewers": "3hN3;FtzN;MvNs;4rvE;FYKM", "site": "https://openreview.net/forum?id=QeebTNgRjn", "pdf_size": 356104, "rating": "2;5;6;6;7", "confidence": "4;3;4;5;4", "soundness": "1;3;3;3;3", "novelty": "1;2;3;3;3", "presentation": "2;3;3;3;3", "wc_summary": "70;49;61;52;41", "wc_strengths": "35;33;41;29;43", "wc_weaknesses": "270;77;73;84;83", "wc_questions": "35;5;22;15;15", "wc_limitations": "96;1;11;16;18", "wc_review": "506;165;208;196;200", "wc_reply_reviewers": "688;22;20;0;15", "wc_reply_authors": "1153;0;0;0;0", "reply_reviewers": "2;1;1;0;1", "reply_authors": "5;1;1;1;1", "rating_avg": [ 5.2, 1.7204650534085253 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 2.6, 0.8000000000000002 ], "novelty_avg": [ 2.4, 0.8 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 54.6, 10.011992808627062 ], "wc_strengths_avg": [ 36.2, 5.15363949069005 ], "wc_weaknesses_avg": [ 117.4, 76.40575894525229 ], "wc_questions_avg": [ 18.4, 9.911609354691095 ], "wc_limitations_avg": [ 28.4, 34.30801655590133 ], "wc_review_avg": [ 255.0, 126.34555789579625 ], "wc_reply_reviewers_avg": [ 149.0, 269.610088832002 ], "wc_reply_authors_avg": [ 230.6, 461.2 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.8, 1.6000000000000003 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.18380365552345193, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:W9-LaAfvRNYJ:scholar.google.com/&scioq=Conditional+Lagrangian+Wasserstein+Flow+for+Time+Series+Imputation&hl=en&as_sdt=0,5", "gs_version_total": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Suzhou University;Aalborg University", "aff_unique_dep": ";", "aff_unique_url": "https://www.suda.edu.cn;https://www.aau.dk", "aff_unique_abbr": "Suda;AAU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Aalborg", "aff_country_unique_index": "0;1", "aff_country_unique": "China;Denmark" }, { "title": "On the Role of Information Structure in Reinforcement Learning for Partially-Observable Sequential Teams and Games", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95220", "id": "QgMC8ftbNd", "proceeding": "", "pdf": "https://openreview.net/pdf?id=QgMC8ftbNd", "openreview": "https://openreview.net/forum?id=QgMC8ftbNd", "poster": "", "project": "", "author_site": "Awni Altabaa, Zhuoran Yang", "tldr": "", "abstract": "In sequential decision-making problems, the *information structure* describes the causal dependencies between system variables, encompassing the dynamics of the environment and the agents' actions. Classical models of reinforcement learning (e.g., MDPs, POMDPs) assume a restricted and highly regular information structure, while more general models like predictive state representations do not explicitly model the information structure. By contrast, real-world sequential decision-making problems typically involve a complex and time-varying interdependence of system variables, requiring a rich and flexible representation of information structure. In this paper, we formalize a novel reinforcement learning model which explicitly represents the information structure.\nWe then use this model to carry out an information-structural analysis of the statistical complexity of general sequential decision-making problems, obtaining a characterization via a graph-theoretic quantity of the DAG representation of the information structure. We prove an upper bound on the sample complexity of learning a general sequential decision-making problem in terms of its information structure by exhibiting an algorithm achieving the upper bound. This recovers known tractability results and gives a novel perspective on reinforcement learning in general sequential decision-making problems, providing a systematic way of identifying new tractable classes of problems.", "keywords": "reinforcement learning;partially-observable;information structure;sequential decision-making;causal;causal structure;graphical model", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Awni Altabaa;Zhuoran Yang", "authorids": "~Awni_Altabaa1;~Zhuoran_Yang1", "gender": ";M", "homepage": "https://www.awni.xyz;https://zhuoranyang.github.io/", "dblp": ";", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;", "orcid": ";", "linkedin": ";", "or_profile": "~Awni_Altabaa1;~Zhuoran_Yang1", "aff": "Yale University;Yale University", "aff_domain": "yale.edu;yale.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\naltabaa2024on,\ntitle={On the Role of Information Structure in Reinforcement Learning for Partially-Observable Sequential Teams and Games},\nauthor={Awni Altabaa and Zhuoran Yang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=QgMC8ftbNd}\n}", "github": "", "reviewers": "Ugr3;w9Kg;a41N", "pdf_size": 1086965, "rating": "6;6;7", "confidence": "4;3;4", "soundness": "3;3;3", "novelty": "3;2;3", "presentation": "2;2;3", "wc_summary": "23;38;68", "wc_strengths": "20;42;162", "wc_weaknesses": "273;147;74", "wc_questions": "1;264;89", "wc_limitations": "1;3;10", "wc_review": "318;494;403", "wc_reply_reviewers": "220;347;135", "wc_reply_authors": "1004;1763;83", "reply_reviewers": "2;3;1", "reply_authors": "3;5;2", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 43.0, 18.708286933869708 ], "wc_strengths_avg": [ 74.66666666666667, 62.403703593794994 ], "wc_weaknesses_avg": [ 164.66666666666666, 82.1962421417319 ], "wc_questions_avg": [ 118.0, 109.30995685053885 ], "wc_limitations_avg": [ 4.666666666666667, 3.8586123009300755 ], "wc_review_avg": [ 405.0, 71.8656153293539 ], "wc_reply_reviewers_avg": [ 234.0, 87.11295349525619 ], "wc_reply_authors_avg": [ 950.0, 686.9192092233263 ], "reply_reviewers_avg": [ 2.0, 0.816496580927726 ], "reply_authors_avg": [ 3.3333333333333335, 1.247219128924647 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=569734130088350127&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "yale.edu;yale.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Yale University", "aff_unique_dep": "", "aff_unique_url": "https://www.yale.edu", "aff_unique_abbr": "Yale", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Predicting Future Actions of Reinforcement Learning Agents", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95219", "id": "QgaGs7peYe", "proceeding": "", "pdf": "https://openreview.net/pdf?id=QgaGs7peYe", "openreview": "https://openreview.net/forum?id=QgaGs7peYe", "poster": "", "project": "", "author_site": "Stephen Chung, Scott Niekum, David Krueger", "tldr": "", "abstract": "As reinforcement learning agents become increasingly deployed in real-world scenarios, predicting future agent actions and events during deployment is important for facilitating better human-agent interaction and preventing catastrophic outcomes. This paper experimentally evaluates and compares the effectiveness of future action and event prediction for three types of RL agents: explicitly planning, implicitly planning, and non-planning. We employ two approaches: the inner state approach, which involves predicting based on the inner computations of the agents (e.g., plans or neuron activations), and a simulation-based approach, which involves unrolling the agent in a learned world model. Our results show that the plans of explicitly planning agents are significantly more informative for prediction than the neuron activations of the other types. Furthermore, using internal plans proves more robust to model quality compared to simulation-based approaches when predicting actions, while the results for event prediction are more mixed. These findings highlight the benefits of leveraging inner states and simulations to predict future agent actions and events, thereby improving interaction and safety in real-world deployments.", "keywords": "Safe reinforcement learning;deep reinforcement learning;agent predictability", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Stephen Chung;Scott Niekum;David Krueger", "authorids": "~Stephen_Chung1;~Scott_Niekum1;~David_Krueger1", "gender": "M;M;M", "homepage": "https://stephen-c.com;https://people.cs.umass.edu/~sniekum/index.php;https://mila.umontreal.ca/en/person/david-scott-krueger/", "dblp": "274/1299;62/8399;142/2741.html", "google_scholar": "QPyTwPIAAAAJ;4wXYfSUAAAAJ;https://scholar.google.ca/citations?user=5Uz70IoAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Stephen_Chung1;~Scott_Niekum1;~David_Krueger1", "aff": "University of Cambridge;University of Massachusetts at Amherst;University of Cambridge", "aff_domain": "cam.ac.uk;umass.edu;cam.ac.uk", "position": "PhD student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nchung2024predicting,\ntitle={Predicting Future Actions of Reinforcement Learning Agents},\nauthor={Stephen Chung and Scott Niekum and David Krueger},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=QgaGs7peYe}\n}", "github": "", "reviewers": "SSgd;tXgP;7yyB;W8D8", "pdf_size": 752305, "rating": "4;5;6;7", "confidence": "4;4;3;4", "soundness": "2;3;2;2", "novelty": "2;2;2;2", "presentation": "1;3;3;3", "wc_summary": "70;75;83;83", "wc_strengths": "33;42;90;131", "wc_weaknesses": "292;4;98;72", "wc_questions": "7;92;37;8", "wc_limitations": "64;1;12;15", "wc_review": "466;214;320;309", "wc_reply_reviewers": "387;29;0;29", "wc_reply_authors": "757;0;0;0", "reply_reviewers": "3;1;0;1", "reply_authors": "3;1;1;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 77.75, 5.539629951540085 ], "wc_strengths_avg": [ 74.0, 39.40177660969109 ], "wc_weaknesses_avg": [ 116.5, 106.98013834352618 ], "wc_questions_avg": [ 36.0, 34.50362299817223 ], "wc_limitations_avg": [ 23.0, 24.238399287081645 ], "wc_review_avg": [ 327.25, 90.08711062077637 ], "wc_reply_reviewers_avg": [ 111.25, 159.64393975344007 ], "wc_reply_authors_avg": [ 189.25, 327.79061533241 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.2581988897471611, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16726403274732704456&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "cam.ac.uk;umass.edu;cam.ac.uk", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Cambridge;University of Massachusetts Amherst", "aff_unique_dep": ";", "aff_unique_url": "https://www.cam.ac.uk;https://www.umass.edu", "aff_unique_abbr": "Cambridge;UMass Amherst", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Cambridge;Amherst", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United Kingdom;United States" }, { "title": "LIVE: Learnable In-Context Vector for Visual Question Answering", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95218", "id": "QhRemVrZbG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=QhRemVrZbG", "openreview": "https://openreview.net/forum?id=QhRemVrZbG", "poster": "", "project": "", "author_site": "Yingzhe Peng, chenduo hao, Xinting Hu, Jiawei Peng, Xin Geng, Xu Yang", "tldr": "", "abstract": "As language models continue to scale, Large Language Models (LLMs) have exhibited emerging capabilities in In-Context Learning (ICL), enabling them to solve language tasks by prefixing a few in-context demonstrations (ICDs) as context. Inspired by these advancements, researchers have extended these techniques to develop Large Multimodal Models (LMMs) with ICL capabilities. However, applying ICL usually faces two major challenges: 1) using more ICDs will largely increase the inference time and 2) the performance is sensitive to the selection of ICDs. These challenges are further exacerbated in LMMs due to the integration of multiple data types and the combinational complexity of multimodal ICDs. Recently, to address these challenges, some NLP studies introduce non-learnable In-Context Vectors (ICVs) which extract useful task information from ICDs into a single vector and then insert it into the LLM to help solve the corresponding task. However, although useful in simple NLP tasks, these non-learnable methods fail to handle complex multimodal tasks like Visual Question Answering (VQA). In this study, we propose \\underline{\\textbf{L}}earnable \\underline{\\textbf{I}}n-Context \\underline{\\textbf{Ve}}ctor (LIVE) to distill essential task information from demonstrations, improving ICL performance in LMMs. Experiments show that LIVE can significantly reduce computational costs while enhancing accuracy in VQA tasks compared to traditional ICL and other non-learnable ICV methods.", "keywords": "In-Context Learning;MultiModal", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/c62105e7b27444454677ef8154fccb8d07c81503.zip", "author": "Yingzhe Peng;chenduo hao;Xinting Hu;Jiawei Peng;Xin Geng;Xu Yang", "authorids": "~Yingzhe_Peng1;~chenduo_hao1;~Xinting_Hu1;~Jiawei_Peng2;~Xin_Geng1;~Xu_Yang5", "gender": "M;M;;M;M;", "homepage": "https://github.com/ForJadeForest;https://github.com/aohenuo;http://palm.seu.edu.cn/members.html;http://palm.seu.edu.cn/xgeng/index.htm;;https://joyhuyy1412.github.io/", "dblp": ";;244/7365;;63/1534-21.html;222/7753", "google_scholar": ";;https://scholar.google.com/citations?hl=zh-CN;ZOCxkIcAAAAJ;SqdxMH0AAAAJ;", "orcid": ";;;;0000-0002-8276-2679;", "linkedin": ";;;;;", "or_profile": "~Yingzhe_Peng1;~chenduo_hao1;~Jiawei_Peng2;~Xin_Geng1;~Xu_Yang5;~Joy_Hu1", "aff": "Southeast University;Southeast University;Southeast University;Southeast University, China;Southeast University;Saarland Informatics Campus, Max-Planck Institute", "aff_domain": "southeast.edu;seu.edu.cn;seu.edu.cn;seu.edu.cn;seu.edu.cn;mpi-inf.mpg.de", "position": "MS student;Undergrad student;MS student;Professor;Associate Professor;Postdoc", "bibtex": "@inproceedings{\npeng2024live,\ntitle={{LIVE}: Learnable In-Context Vector for Visual Question Answering},\nauthor={Yingzhe Peng and chenduo hao and Xinting Hu and Jiawei Peng and Xin Geng and Xu Yang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=QhRemVrZbG}\n}", "github": "", "reviewers": "fRTc;BDyQ;VKDo;8z8x", "pdf_size": 1344348, "rating": "4;5;7;8", "confidence": "3;4;3;4", "soundness": "2;2;3;4", "novelty": "2;2;3;3", "presentation": "3;3;3;3", "wc_summary": "82;64;128;34", "wc_strengths": "49;50;96;55", "wc_weaknesses": "97;481;201;5", "wc_questions": "124;5;89;50", "wc_limitations": "10;85;1;1", "wc_review": "362;685;515;145", "wc_reply_reviewers": "0;236;0;0", "wc_reply_authors": "0;652;0;0", "reply_reviewers": "0;2;0;0", "reply_authors": "1;3;1;1", "rating_avg": [ 6.0, 1.5811388300841898 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 77.0, 34.07345007480164 ], "wc_strengths_avg": [ 62.5, 19.474342094150447 ], "wc_weaknesses_avg": [ 196.0, 178.5581137893207 ], "wc_questions_avg": [ 67.0, 44.34523649728345 ], "wc_limitations_avg": [ 24.25, 35.26595383652624 ], "wc_review_avg": [ 426.75, 198.78175846892995 ], "wc_reply_reviewers_avg": [ 59.0, 102.19099764656376 ], "wc_reply_authors_avg": [ 163.0, 282.324281633727 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.316227766016838, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6971389700264872191&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "southeast.edu;seu.edu.cn;seu.edu.cn;seu.edu.cn;seu.edu.cn;mpi-inf.mpg.de", "author_num": 6, "aff_unique_index": "0;0;0;0;0;1", "aff_unique_norm": "Southeast University;Max-Planck Institute", "aff_unique_dep": ";Informatics", "aff_unique_url": "https://www.seu.edu.cn/;https://www.mpi-sws.org", "aff_unique_abbr": "SEU;MPI-SWS", "aff_campus_unique_index": "1", "aff_campus_unique": ";Saarland", "aff_country_unique_index": "0;0;0;0;0;1", "aff_country_unique": "China;Germany" }, { "title": "Physics-Constrained Comprehensive Optical Neural Networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95217", "id": "QhUXU2ilIG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=QhUXU2ilIG", "openreview": "https://openreview.net/forum?id=QhUXU2ilIG", "poster": "/media/PosterPDFs/NeurIPS%202024/95217.png?t=1731290795.6228929", "project": "", "author_site": "Yanbing Liu, JIANWEI QIN, Yan Liu, Xi Yue, Xun Liu, Guoqing Wang, Tianyu Li, Ye, Wei Li", "tldr": "", "abstract": "With the advantages of low latency, low power consumption, and high parallelism, optical neural networks (ONN) offer a promising solution for time-sensitive and resource-limited artificial intelligence applications. However, the performance of the ONN model is often diminished by the gap between the ideal simulated system and the actual physical system. To bridge the gap, this work conducts extensive experiments to investigate systematic errors in the optical physical system within the context of image classification tasks. Through our investigation, two quantifiable errors\u2014light source instability and exposure time mismatches\u2014significantly impact the prediction performance of ONN. To address these systematic errors, a physics-constrained ONN learning framework is constructed, including a well designed loss function to mitigate the effect of light fluctuations, a CCD adjustment strategy to alleviate the effects of exposure time mismatches and a \u2019physics-prior based\u2019 error compensation network to manage other systematic errors, ensuring consistent light intensity across experimental results and simulations. In our experiments, the proposed method achieved a test classification accuracy of 96.5% on the MNIST dataset, a substantial improvement over the 61.6% achieved with the original ONN. For the more challenging QuickDraw16 and Fashion MNIST datasets, experimental accuracy improved from 63.0% to 85.7% and from 56.2% to 77.5%, respectively. Moreover, the comparison results further demonstrate the effectiveness of the proposed physics-constrained ONN learning framework over state-of-the-art ONN approaches. This lays the groundwork for more robust and precise optical computing applications.", "keywords": "Optical Neural Networks;Physical Neural Networks;Error Compensation", "primary_area": "machine_vision", "supplementary_material": "/attachment/c5da1c87166bbc87caa52afa0eefd9d32809e684.zip", "author": "Yanbing Liu;Jianwei Qin;Yan Liu;Xi Yue;Xun Liu;Guoqing Wang;Tianyu Li;Fangwei Ye;Wei Li", "authorids": "~Yanbing_Liu2;~Jianwei_Qin1;~Yan_Liu31;~Xi_Yue1;~Xun_Liu7;~Guoqing_Wang2;~Tianyu_Li12;~Fangwei_Ye1;~Wei_Li102", "gender": "M;M;M;M;M;M;M;M;M", "homepage": ";;;;https://github.com/Liuxun;https://faculty.uestc.edu.cn/wangguoqing1/zh_CN/index.htm;https://github.com/CosmosYu9;;https://github.com/LiWei", "dblp": ";;;;;17/356-1;;;", "google_scholar": ";;;;;https://scholar.google.com/citations?hl=en;6MUsCT4AAAAJ;ycBkkwgAAAAJ;", "orcid": "0000-0003-3870-3345;0000-0002-2240-0932;0009-0001-2873-1310;0009-0009-4198-8303;;;0000-0001-5069-1493;;", "linkedin": ";;;;;;;;", "or_profile": "~Yanbing_Liu2;~Jianwei_Qin1;~Yan_Liu31;~Xi_Yue1;~Xun_Liu7;~Guoqing_Wang2;~Tianyu_Li12;~Fangwei_Ye1;~Wei_Li102", "aff": "Beijing University of Posts and Telecommunications;Shanghai Jiaotong University;Shanghai Jiaotong University;Beijing University of Posts and Telecommunications;Beijing Institute of Space Mechanics and Electricity;University of Electronic Science and Technology of China;University of Electronic Science and Technology of China;Shanghai Jiaotong University;Beijing Institute of Space Mechanics and Electricity", "aff_domain": "bupt.edu.cn;sjtu.edu.cn;sjtu.edu.cn;bupt.edu.cn;cast.cn;uestc.edu.cn;uestc.edu.cn;sjtu.edu.cn;cast.cn", "position": "PhD student;PhD student;PhD student;MS student;Researcher;Full Professor;Postdoc;Full Professor;Researcher", "bibtex": "@inproceedings{\nliu2024physicsconstrained,\ntitle={Physics-Constrained Comprehensive Optical Neural Networks},\nauthor={Yanbing Liu and Jianwei Qin and Yan Liu and Xi Yue and Xun Liu and Guoqing Wang and Tianyu Li and Fangwei Ye and Wei Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=QhUXU2ilIG}\n}", "github": "", "reviewers": "M382;W9o4;fa6W;8n42", "pdf_size": 4568787, "rating": "3;5;7;7", "confidence": "5;4;3;3", "soundness": "2;3;3;3", "novelty": "1;2;3;3", "presentation": "2;3;3;2", "wc_summary": "35;45;115;92", "wc_strengths": "25;36;91;129", "wc_weaknesses": "156;139;84;39", "wc_questions": "156;94;55;27", "wc_limitations": "1;45;33;40", "wc_review": "373;359;378;327", "wc_reply_reviewers": "80;34;25;114", "wc_reply_authors": "450;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "3;1;1;1", "rating_avg": [ 5.5, 1.6583123951777 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 71.75, 32.96494350063413 ], "wc_strengths_avg": [ 70.25, 42.13890719987883 ], "wc_weaknesses_avg": [ 104.5, 46.2412153819512 ], "wc_questions_avg": [ 83.0, 48.399380161320245 ], "wc_limitations_avg": [ 29.75, 17.137313091613866 ], "wc_review_avg": [ 359.25, 19.879323429131084 ], "wc_reply_reviewers_avg": [ 63.25, 35.967867604293694 ], "wc_reply_authors_avg": [ 112.5, 194.8557158514987 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6489556647044981422&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "bupt.edu.cn;sjtu.edu.cn;sjtu.edu.cn;bupt.edu.cn;cast.cn;uestc.edu.cn;uestc.edu.cn;sjtu.edu.cn;cast.cn", "author_num": 9, "aff_unique_index": "0;1;1;0;2;3;3;1;2", "aff_unique_norm": "Beijing University of Posts and Telecommunications;Shanghai Jiao Tong University;Beijing Institute of Space Mechanics and Electricity;University of Electronic Science and Technology of China", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.bupt.edu.cn/;https://www.sjtu.edu.cn;http://www.bisee.ac.cn/;https://www.uestc.edu.cn", "aff_unique_abbr": "BUPT;SJTU;;UESTC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Beijing;", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Toward Dynamic Non-Line-of-Sight Imaging with Mamba Enforced Temporal Consistency", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95216", "id": "QiCJomIW3l", "proceeding": "", "pdf": "https://openreview.net/pdf?id=QiCJomIW3l", "openreview": "https://openreview.net/forum?id=QiCJomIW3l", "poster": "", "project": "", "author_site": "Yue Li, Yi Sun, Shida Sun, Juntian Ye, Yueyi Zhang, Feihu Xu, Zhiwei Xiong", "tldr": "", "abstract": "Dynamic reconstruction in confocal non-line-of-sight imaging encounters great challenges since the dense raster-scanning manner limits the practical frame rate. A fewer pioneer works reconstruct high-resolution volumes from the under-scanning transient measurements but overlook temporal consistency among transient frames. To fully exploit multi-frame information, we propose the first spatial-temporal Mamba (ST-Mamba) based method tailored for dynamic reconstruction of transient videos. Our method capitalizes on neighbouring transient frames to aggregate the target 3D hidden volume. Specifically, the interleaved features extracted from the input transient frames are fed to the proposed ST-Mamba blocks, which leverage the time-resolving causality in transient measurement. The cross ST-Mamba blocks are then devised to integrate the adjacent transient features. The target high-resolution transient frame is subsequently recovered by the transient spreading module. After transient fusion and recovery, a physical-based network is employed to reconstruct the hidden volume. To tackle the substantial noise inherent in transient videos, we propose a wave-based loss function to impose constraints within the phasor field. Besides, we introduce a new dataset, comprising synthetic videos for training and real-world videos for evaluation. Extensive experiments showcase the superior performance of our method on both synthetic data and real world data captured by different imaging setups. The code and data are available at https://github.com/Depth2World/Dynamic_NLOS.", "keywords": "dynamic;non-line-of-sight imaging;spatial-temporal Mamba", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "/attachment/af9bfb4f504be74ca2c3a0f0a41c613d33f41b6f.zip", "author": "Yue Li;Yi Sun;Shida Sun;Juntian Ye;Yueyi Zhang;Feihu Xu;Zhiwei Xiong", "authorids": "~Yue_Li11;~Yi_Sun20;~Shida_Sun1;~Juntian_Ye1;~Yueyi_Zhang2;~Feihu_Xu1;~Zhiwei_Xiong1", "gender": ";M;;M;;M;M", "homepage": ";https://github.com/SunYi2017;;http://quantum.ustc.edu.cn/web/node/659;;https://quantum.ustc.edu.cn/web/en/node/475;", "dblp": ";;;;;;54/6827", "google_scholar": ";;;;LatWlFAAAAAJ;;Snl0HPEAAAAJ", "orcid": ";;;;;;", "linkedin": ";;;https://www.linkedin.cn/incareer/in/ACoAADB0h1IBzCDl1QrA9Xrx5XQzxhJsOcO73uI;;;", "or_profile": "~Yue_Li11;~Yi_Sun20;~Shida_Sun1;~Juntian_Ye1;~Yueyi_Zhang2;~Feihu_Xu1;~Zhiwei_Xiong1", "aff": ";University of Science and Technology of China;;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;USTC", "aff_domain": ";mail.ustc.edu.cn;;ustc.edu.cn;ustc.edu.cn;ustc.edu;ustc.edu.cn", "position": ";PhD student;;PhD student;Associate Researcher;Full Professor;Professor", "bibtex": "@inproceedings{\nli2024toward,\ntitle={Toward Dynamic Non-Line-of-Sight Imaging with Mamba Enforced Temporal Consistency},\nauthor={Yue Li and Yi Sun and Shida Sun and Juntian Ye and Yueyi Zhang and Feihu Xu and Zhiwei Xiong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=QiCJomIW3l}\n}", "github": "", "reviewers": "j9gw;mnC2;oqBG;CVND", "pdf_size": 3203314, "rating": "5;6;6;7", "confidence": "4;4;3;5", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "3;3;3;3", "wc_summary": "108;37;115;86", "wc_strengths": "109;32;98;85", "wc_weaknesses": "48;166;261;143", "wc_questions": "18;3;101;4", "wc_limitations": "11;3;5;4", "wc_review": "294;241;580;322", "wc_reply_reviewers": "0;42;250;16", "wc_reply_authors": "0;31;57;16", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 86.5, 30.516389039334257 ], "wc_strengths_avg": [ 81.0, 29.53811097548386 ], "wc_weaknesses_avg": [ 154.5, 75.74463677383369 ], "wc_questions_avg": [ 31.5, 40.56168142471414 ], "wc_limitations_avg": [ 5.75, 3.112474899497183 ], "wc_review_avg": [ 359.25, 130.72753153027867 ], "wc_reply_reviewers_avg": [ 77.0, 101.0 ], "wc_reply_authors_avg": [ 26.0, 20.988091861815356 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:uSvOf6lF6q4J:scholar.google.com/&scioq=Toward+Dynamic+Non-Line-of-Sight+Imaging+with+Mamba+Enforced+Temporal+Consistency&hl=en&as_sdt=0,33", "gs_version_total": 0, "email": ";mail.ustc.edu.cn;;ustc.edu.cn;ustc.edu.cn;ustc.edu;ustc.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of Science and Technology of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ustc.edu.cn", "aff_unique_abbr": "USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Multi-Agent Imitation Learning: Value is Easy, Regret is Hard", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95215", "id": "Qk3IBHyv6z", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Qk3IBHyv6z", "openreview": "https://openreview.net/forum?id=Qk3IBHyv6z", "poster": "", "project": "", "author_site": "Jingwu Tang, Gokul Swamy, Fei Fang, Steven Wu", "tldr": "", "abstract": "We study a multi-agent imitation learning (MAIL) problem where we take the perspective of a learner attempting to *coordinate* a group of agents based on demonstrations of an expert doing so. Most prior work in MAIL essentially reduces the problem to matching the behavior of the expert *within* the support of the demonstrations. While doing so is sufficient to drive the *value gap* between the learner and the expert to zero under the assumption that agents are non-strategic, it does not guarantee robustness to deviations by strategic agents. Intuitively, this is because strategic deviations can depend on a counterfactual quantity: the coordinator's recommendations outside of the state distribution their recommendations induce. In response, we initiate the study of an alternative objective for MAIL in Markov Games we term the *regret gap* that explicitly accounts for potential deviations by agents in the group. We first perform an in-depth exploration of the relationship between the value and regret gaps. First, we show that while the value gap can be efficiently minimized via a direct extension of single-agent IL algorithms, even *value equivalence* can lead to an arbitrarily large regret gap. This implies that achieving regret equivalence is harder than achieving value equivalence in MAIL. We then provide a pair of efficient reductions to no-regret online convex optimization that are capable of minimizing the regret gap *(a)* under a coverage assumption on the expert (MALICE) or *(b)* with access to a queryable expert (BLADES).", "keywords": "imitation learning;multi-agent reinforcement learning", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Jingwu Tang;Gokul Swamy;Fei Fang;Steven Wu", "authorids": "~Jingwu_Tang1;~Gokul_Swamy1;~Fei_Fang1;~Steven_Wu1", "gender": "M;;F;M", "homepage": "http://tangjingwu.com;https://gokul.dev/;https://feifang.info/;https://zstevenwu.com/", "dblp": ";31/11509;57/2878;137/8350", "google_scholar": "H0MLFHEAAAAJ;Sbpra_AAAAAJ;R6jE0VEAAAAJ;MbF6rTEAAAAJ", "orcid": ";;;", "linkedin": "https://www.linkedin.cn/incareer/in/ACoAAD_hNX4Bds-yWr-qD4hwwbXU27WykaXJ0yo;;;zstevenwu/", "or_profile": "~Jingwu_Tang1;~Gokul_Swamy1;~Fei_Fang1;~Zhiwei_Steven_Wu1", "aff": "Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "cmu.edu;cmu.edu;cmu.edu;cmu.edu", "position": "PhD student;PhD student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\ntang2024multiagent,\ntitle={Multi-Agent Imitation Learning: Value is Easy, Regret is Hard},\nauthor={Jingwu Tang and Gokul Swamy and Fei Fang and Steven Wu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Qk3IBHyv6z}\n}", "github": "", "reviewers": "aTBa;3HJn;SJkf;NThg", "pdf_size": 460967, "rating": "5;5;6;6", "confidence": "2;3;2;3", "soundness": "2;3;3;3", "novelty": "2;2;3;2", "presentation": "2;3;2;4", "wc_summary": "55;208;77;176", "wc_strengths": "53;103;71;129", "wc_weaknesses": "111;181;73;85", "wc_questions": "118;199;85;141", "wc_limitations": "32;14;7;11", "wc_review": "369;705;313;542", "wc_reply_reviewers": "0;0;0;11", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 2.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 129.0, 64.47867864651074 ], "wc_strengths_avg": [ 89.0, 29.223278392404914 ], "wc_weaknesses_avg": [ 112.5, 41.865857210858586 ], "wc_questions_avg": [ 135.75, 41.589511899035315 ], "wc_limitations_avg": [ 16.0, 9.565563234854496 ], "wc_review_avg": [ 482.25, 153.83331076200628 ], "wc_reply_reviewers_avg": [ 2.75, 4.763139720814412 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3195099651124065949&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "cmu.edu;cmu.edu;cmu.edu;cmu.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Towards a Theoretical Understanding of the 'Reversal Curse' via Training Dynamics", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95214", "id": "QoWf3lo6m7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=QoWf3lo6m7", "openreview": "https://openreview.net/forum?id=QoWf3lo6m7", "poster": "/media/PosterPDFs/NeurIPS%202024/95214.png?t=1733460458.9786222", "project": "", "author_site": "Hanlin Zhu, Baihe Huang, Shaolun Zhang, Michael Jordan, Jiantao Jiao, Yuandong Tian, Stuart J Russell", "tldr": "", "abstract": "Auto-regressive large language models (LLMs) show impressive capacities to solve many complex reasoning tasks while struggling with some simple logical reasoning tasks such as inverse search: when trained on ''$A \\to B$'' (e.g., *Tom is the parent of John*), LLM fails to directly conclude ''$B \\gets A$'' (e.g., *John is the child of Tom*) during inference even if the two sentences are semantically identical, which is known as the ''reversal curse''. In this paper, we theoretically analyze the reversal curse via the training dynamics of (stochastic) gradient descent for two auto-regressive models: (1) a bilinear model that can be viewed as a simplification of a one-layer transformer; (2) one-layer transformers under certain assumptions. Our analysis reveals that for both models, the reversal curse is a consequence of the (effective) model weights *asymmetry*, i.e., the increase of weights from a token $A$ to token $B$ during training does not necessarily cause the increase of the weights from $B$ to $A$, which is caused by the training dynamics under certain choice of loss function and the optimization space of model parameters. Moreover, our analysis can be naturally applied to other logical reasoning tasks such as chain-of-thought (COT), which provides a new perspective different from previous work that focuses on expressivity. Finally, we conduct experiments to validate our theory on multi-layer transformers under different settings. Our code is available at [https://github.com/marlo-z/reversal_curse_analysis/](https://github.com/marlo-z/reversal_curse_analysis/).", "keywords": "logical reasoning;reversal curse;training dynamics;autoregressive transformer;chain of thought", "primary_area": "learning_theory", "supplementary_material": "", "author": "Hanlin Zhu;Baihe Huang;Shaolun Zhang;Michael Jordan;Jiantao Jiao;Yuandong Tian;Stuart Russell", "authorids": "~Hanlin_Zhu2;~Baihe_Huang1;~Shaolun_Zhang1;~Michael_Jordan1;~Jiantao_Jiao1;~Yuandong_Tian1;~Stuart_Russell1", "gender": "M;;;M;M;M;M", "homepage": "https://hanlinzhu.com/;;;http://www.cs.berkeley.edu/~jordan/;https://scholar.google.com/citations?user=aO8KpGcAAAAJ&hl=en;http://yuandong-tian.com;https://people.eecs.berkeley.edu/~russell/", "dblp": ";279/4131;;j/MichaelIJordan;43/8919;t/YuandongTian;", "google_scholar": "yDVn5LEAAAAJ;chICXXMAAAAJ;;https://scholar.google.com.tw/citations?user=yxUduqMAAAAJ;aO8KpGcAAAAJ;0mgEF28AAAAJ;https://scholar.google.com.tw/citations?user=KJGrjCAAAAAJ", "orcid": ";;;0000-0001-8935-817X;;0000-0003-4202-4847;", "linkedin": ";;marlo-shaolun-zhang-351611235/;;;yuandongtian;", "or_profile": "~Hanlin_Zhu2;~Baihe_Huang1;~Shaolun_Zhang1;~Michael_Jordan1;~Jiantao_Jiao1;~Yuandong_Tian1;~Stuart_Russell1", "aff": "Electrical Engineering & Computer Science Department, University of California Berkeley;University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;Meta AI (FAIR);University of California, Berkeley", "aff_domain": "eecs.berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu;meta.com;berkeley.edu", "position": "PhD student;PhD student;Undergrad student;Full Professor;Assistant Professor;Research Scientist;Full Professor", "bibtex": "@inproceedings{\nzhu2024towards,\ntitle={Towards a Theoretical Understanding of the 'Reversal Curse' via Training Dynamics},\nauthor={Hanlin Zhu and Baihe Huang and Shaolun Zhang and Michael Jordan and Jiantao Jiao and Yuandong Tian and Stuart Russell},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=QoWf3lo6m7}\n}", "github": "", "reviewers": "82qZ;bwDn;yXHq;8So4", "pdf_size": 6775419, "rating": "5;5;7;7", "confidence": "4;4;4;3", "soundness": "4;2;4;3", "novelty": "2;2;3;3", "presentation": "3;2;3;3", "wc_summary": "188;93;114;228", "wc_strengths": "83;121;122;77", "wc_weaknesses": "363;388;36;44", "wc_questions": "2;679;427;317", "wc_limitations": "14;30;1;30", "wc_review": "650;1311;700;696", "wc_reply_reviewers": "14;209;200;0", "wc_reply_authors": "19;307;405;0", "reply_reviewers": "1;2;3;0", "reply_authors": "2;3;4;1", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 155.75, 54.63686941983408 ], "wc_strengths_avg": [ 100.75, 20.8611480987984 ], "wc_weaknesses_avg": [ 207.75, 168.00651029052415 ], "wc_questions_avg": [ 356.25, 243.00552977247247 ], "wc_limitations_avg": [ 18.75, 12.152674602736633 ], "wc_review_avg": [ 839.25, 273.07267805476255 ], "wc_reply_reviewers_avg": [ 105.75, 98.92516110676797 ], "wc_reply_authors_avg": [ 182.75, 176.80833549355074 ], "reply_reviewers_avg": [ 1.5, 1.118033988749895 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18020355285475123988&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "eecs.berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu;meta.com;berkeley.edu", "author_num": 7, "aff_unique_index": "0;0;0;0;0;1;0", "aff_unique_norm": "University of California, Berkeley;Meta", "aff_unique_dep": "Electrical Engineering & Computer Science Department;Facebook AI Research (FAIR)", "aff_unique_url": "https://www.berkeley.edu;https://ai.facebook.com", "aff_unique_abbr": "UC Berkeley;Meta AI", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "BertaQA: How Much Do Language Models Know About Local Culture?", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97695", "id": "QocjHRR31U", "proceeding": "", "pdf": "https://openreview.net/pdf?id=QocjHRR31U", "openreview": "https://openreview.net/forum?id=QocjHRR31U", "poster": "/media/PosterPDFs/NeurIPS%202024/97695.png?t=1731604692.7333078", "project": "", "author_site": "Julen Etxaniz, Gorka Azkune, Aitor Soroa, Oier Lacalle, Mikel Artetxe", "tldr": "", "abstract": "Large Language Models (LLMs) exhibit extensive knowledge about the world, but most evaluations have been limited to global or anglocentric subjects. This raises the question of how well these models perform on topics relevant to other cultures, whose presence on the web is not that prominent. To address this gap, we introduce BertaQA, a multiple-choice trivia dataset that is parallel in English and Basque. The dataset consists of a local subset with questions pertinent to the Basque culture, and a global subset with questions of broader interest. We find that state-of-the-art LLMs struggle with local cultural knowledge, even as they excel on global topics. However, we show that continued pre-training in Basque significantly improves the models' performance on Basque culture, even when queried in English. To our knowledge, this is the first solid evidence of knowledge transfer from a low-resource to a high-resource language. Our analysis sheds light on the complex interplay between language and knowledge, and reveals that some prior findings do not fully hold when reassessed on local topics. Our dataset and evaluation code are available under open licenses at https://github.com/juletx/BertaQA.", "keywords": "LLM evaluation;multilinguality;culture;basque culture", "primary_area": "", "supplementary_material": "/attachment/3c582e6a70cc167f94637844e728cc869e06bf27.pdf", "author": "Julen Etxaniz;Gorka Azkune;Aitor Soroa;Oier Lopez de Lacalle;Mikel Artetxe", "authorids": "~Julen_Etxaniz1;~Gorka_Azkune1;~Aitor_Soroa1;~Oier_Lopez_de_Lacalle1;~Mikel_Artetxe1", "gender": "M;;M;M;M", "homepage": "https://julenetxaniz.eus/en;https://gazkune.github.io/;https://ixa2.si.ehu.eus/asoroa/;https://oierldl.github.io/;http://www.mikelartetxe.com", "dblp": "354/6422;14/10011;03/6734;11/4461;168/0354", "google_scholar": "BDGXAjgAAAAJ;_1wx6NoAAAAJ;yklm660AAAAJ;nieh6tUAAAAJ;N5InzP8AAAAJ", "orcid": "0009-0000-2099-7766;0000-0002-2506-7426;0000-0001-8573-2654;0000-0003-4969-2055;", "linkedin": "juletxara;gorka-azkune-80780035/?originalSubdomain=es;;oier-lopez-de-lacalle-4044a36/;artetxem", "or_profile": "~Julen_Etxaniz1;~Gorka_Azkune1;~Aitor_Soroa1;~Oier_Lopez_de_Lacalle1;~Mikel_Artetxe1", "aff": "HiTZ Center, University of the Basque Country (UPV/EHU);Universidad del Pa\u00eds Vasco;University of the Basque Country. UPV/EHU.;Universidad del Pa\u00eds Vasco;Reka AI", "aff_domain": "ehu.eus;ehu.es;ehu.eus;ehu.eus;reka.ai", "position": "PhD student;Associate Professor;Associate Professor;Associate Professor;Researcher", "bibtex": "@inproceedings{\netxaniz2024bertaqa,\ntitle={Berta{QA}: How Much Do Language Models Know About Local Culture?},\nauthor={Julen Etxaniz and Gorka Azkune and Aitor Soroa and Oier Lopez de Lacalle and Mikel Artetxe},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=QocjHRR31U}\n}", "github": "", "reviewers": "7FUP;SVZZ;NEZ8;Zd5e", "pdf_size": 223869, "rating": "6;6;6;7", "confidence": "4;4;4;4", "wc_summary_and_contributions": "41;127;262;28", "wc_strengths": "14;74;44;16", "wc_improvement": "3;131;116;81", "wc_limitations": "7;13;26;49", "wc_correctness": "1;38;61;20", "wc_clarity": "1;18;16;2", "wc_relation_to_prior_work": "1;17;60;4", "wc_documentation": "1;23;5;5", "wc_additional_feedback": "1;1;1;1", "wc_review": "70;442;591;206", "wc_reply_reviewers": "15;23;97;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;2;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.0 ], "wc_summary_and_contributions_avg": [ 114.5, 93.26977002223175 ], "wc_strengths_avg": [ 37.0, 24.43358344574123 ], "wc_improvement_avg": [ 82.75, 49.48926651305311 ], "wc_limitations_avg": [ 23.75, 16.11482236948332 ], "wc_correctness_avg": [ 30.0, 22.169799277395363 ], "wc_clarity_avg": [ 9.25, 7.790218225441442 ], "wc_relation_to_prior_work_avg": [ 20.5, 23.58495283014151 ], "wc_documentation_avg": [ 8.5, 8.52936105461599 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 327.25, 202.2441284685417 ], "wc_reply_reviewers_avg": [ 33.75, 37.43911724386674 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9310929901858507207&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "ehu.eus;ehu.es;ehu.eus;ehu.eus;reka.ai", "author_num": 5, "aff_unique_index": "0;1;0;1;2", "aff_unique_norm": "University of the Basque Country;Universidad del Pa\u00eds Vasco;Reka AI", "aff_unique_dep": "HiTZ Center;;", "aff_unique_url": "https://www.ehu.eus/en;https://www.ehu.eus/en;https://www.reka.ai", "aff_unique_abbr": "UPV/EHU;UPV/EHU;Reka AI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "Spain;United States" }, { "title": "Archaeoscape: Bringing Aerial Laser Scanning Archaeology to the Deep Learning Era", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97694", "id": "QpF3DFP3Td", "proceeding": "", "pdf": "https://openreview.net/pdf?id=QpF3DFP3Td", "openreview": "https://openreview.net/forum?id=QpF3DFP3Td", "poster": "/media/PosterPDFs/NeurIPS%202024/97694.png?t=1733944803.8750367", "project": "", "author_site": "Yohann PERRON, Vladyslav Sydorov, Adam P. Wijker, Damian Evans, Christophe Pottier, Loic Landrieu", "tldr": "", "abstract": "Airborne Laser Scanning (ALS) technology has transformed modern archaeology by unveiling hidden landscapes beneath dense vegetation. However, the lack of expert-annotated, open-access resources has hindered the analysis of ALS data using advanced deep learning techniques. We address this limitation with Archaeoscape (available at https://archaeoscape.ai/data/2024), a novel large-scale archaeological ALS dataset spanning 888 km\u00b2 in Cambodia with 31,141 annotated archaeological features from the Angkorian period. Archaeoscape is over four times larger than comparable datasets, and the first ALS archaeology resource with open-access data, annotations, and models.\nWe benchmark several recent segmentation models to demonstrate the benefits of modern vision techniques for this problem and highlight the unique challenges of discovering subtle human-made structures under dense jungle canopies. By making Archaeoscape available in open access, we hope to bridge the gap between traditional archaeology and modern computer vision methods.", "keywords": "Archaeology;Aerial Laser Scanning;Semantic Segmentation", "primary_area": "", "supplementary_material": "", "author": "Yohann PERRON;Vladyslav Sydorov;Adam P. Wijker;Damian Evans;Christophe Pottier;Loic Landrieu", "authorids": "~Yohann_PERRON1;~Vladyslav_Sydorov1;~Adam_P._Wijker1;~Damian_Evans1;~Christophe_Pottier1;~Loic_Landrieu1", "gender": "M;M;M;M;M;M", "homepage": "https://yohannperron.github.io/WebPage/;;;;https://www.efeo.fr/index.php?l=EN;https://loiclandrieu.com/", "dblp": ";151/8849;;;;165/7958", "google_scholar": ";;NQYDyLYAAAAJ;MZHHjK4AAAAJ;;B9VnFRcAAAAJ", "orcid": "0000-0002-0497-8263;;;0000-0003-0320-4319;0000-0001-8699-6280;0000-0002-7738-8141", "linkedin": ";vsydorov/;;damian-evans-4a389a200/;;", "or_profile": "~Yohann_PERRON1;~Vladyslav_Sydorov1;~Adam_P._Wijker1;~Damian_Evans1;~Christophe_Pottier1;~Loic_Landrieu1", "aff": "Ecole Nationale des Ponts et Chausees;The French School of Asian Studies;\u00c9cole fran\u00e7aise d'Extr\u00eame-Orient;\u00c9cole fran\u00e7aise d'Extr\u00eame-Orient (EFEO);Ecole fran\u00e7aise d'Extr\u00eame-Orient ;IGN", "aff_domain": "enpc.fr;efeo.fr;efeo.net;efeo.fr;efeo.fr;ign.fr", "position": "PhD student;Researcher;PhD student;Principal Researcher;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nperron2024archaeoscape,\ntitle={Archaeoscape: Bringing Aerial Laser Scanning Archaeology to the Deep Learning Era},\nauthor={Yohann PERRON and Vladyslav Sydorov and Adam P. Wijker and Damian Evans and Christophe Pottier and Loic Landrieu},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=QpF3DFP3Td}\n}", "github": "", "reviewers": "AJFq;ak9C;t7Z7;puKz", "pdf_size": 8821678, "rating": "6;7;9;9", "confidence": "4;4;4;3", "wc_summary_and_contributions": "51;110;127;78", "wc_strengths": "62;35;32;65", "wc_improvement": "9;59;158;43", "wc_limitations": "285;30;7;32", "wc_correctness": "1;510;15;1", "wc_clarity": "1;34;9;1", "wc_relation_to_prior_work": "1;335;17;1", "wc_documentation": "6;43;9;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "417;1157;375;223", "wc_reply_reviewers": "0;21;128;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;0", "reply_authors": "1;2;1;1", "rating_avg": [ 7.75, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 91.5, 29.261749776799064 ], "wc_strengths_avg": [ 48.5, 15.074813431681335 ], "wc_improvement_avg": [ 67.25, 55.41829571540431 ], "wc_limitations_avg": [ 88.5, 113.87383369325896 ], "wc_correctness_avg": [ 131.75, 218.45751875364692 ], "wc_clarity_avg": [ 11.25, 13.534677683639163 ], "wc_relation_to_prior_work_avg": [ 88.5, 142.46666276711895 ], "wc_documentation_avg": [ 14.75, 16.55860803328589 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 543.0, 361.76511716858494 ], "wc_reply_reviewers_avg": [ 37.25, 53.09131284871377 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5555555555555555, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7120363334575205985&as_sdt=4005&sciodt=0,6&hl=en", "gs_version_total": 6, "email": "enpc.fr;efeo.fr;efeo.net;efeo.fr;efeo.fr;ign.fr", "author_num": 6, "aff_unique_index": "0;1;2;2;3;4", "aff_unique_norm": "Ecole Nationale des Ponts et Chaussees;French School of Asian Studies;\u00c9cole fran\u00e7aise d'Extr\u00eame-Orient;Ecole fran\u00e7aise d'Extr\u00eame-Orient;IGN Entertainment", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.enpc.fr;https://efeo.fr;https://www.efeo.fr;https://www.efeo.fr;https://www.ign.com", "aff_unique_abbr": "ENPC;EFEO;EFEO;EFEO;IGN", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;1", "aff_country_unique": "France;United States" }, { "title": "Rethinking Exploration in Reinforcement Learning with Effective Metric-Based Exploration Bonus", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95213", "id": "QpKWFLtZKi", "proceeding": "", "pdf": "https://openreview.net/pdf?id=QpKWFLtZKi", "openreview": "https://openreview.net/forum?id=QpKWFLtZKi", "poster": "/media/PosterPDFs/NeurIPS%202024/95213.png?t=1731336679.8887572", "project": "", "author_site": "Yiming Wang, Kaiyan Zhao, Furui Liu, Leong Hou U", "tldr": "", "abstract": "Enhancing exploration in reinforcement learning (RL) through the incorporation of intrinsic rewards, specifically by leveraging *state discrepancy* measures within various metric spaces as exploration bonuses, has emerged as a prevalent strategy to encourage agents to visit novel states. The critical factor lies in how to quantify the difference between adjacent states as *novelty* for promoting effective exploration.\nNonetheless, existing methods that evaluate state discrepancy in the latent space under $L_1$ or $L_2$ norm often depend on count-based episodic terms as scaling factors for exploration bonuses, significantly limiting their scalability. Additionally, methods that utilize the bisimulation metric for evaluating state discrepancies face a theory-practice gap due to improper approximations in metric learning, particularly struggling with *hard exploration* tasks. To overcome these challenges, we introduce the **E**ffective **M**etric-based **E**xploration-bonus (EME). EME critically examines and addresses the inherent limitations and approximation inaccuracies of current metric-based state discrepancy methods for exploration, proposing a robust metric for state discrepancy evaluation backed by comprehensive theoretical analysis. Furthermore, we propose the diversity-enhanced scaling factor integrated into the exploration bonus to be dynamically adjusted by the variance of prediction from an ensemble of reward models, thereby enhancing exploration effectiveness in particularly challenging scenarios. \nExtensive experiments are conducted on hard exploration tasks within Atari games, Minigrid, Robosuite, and Habitat, which illustrate our method's scalability to various scenarios. The project website can be found at https://sites.google.com/view/effective-metric-exploration.", "keywords": "Reinforcement Learning;exploration bonus;intrinsic reward;metric-based behavioral similarity", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Yiming Wang;Kaiyan Zhao;Furui Liu;Leong Hou U", "authorids": "~Yiming_Wang15;~Kaiyan_Zhao3;~Furui_Liu1;~Leong_Hou_U2", "gender": "M;M;M;M", "homepage": "https://github.com/KaiyanZhaoPhoenix;;https://www.fst.um.edu.mo/personal/ryanlhu/;", "dblp": ";116/7289;38/4996;", "google_scholar": "0xX-rn4AAAAJ;https://scholar.google.com.hk/citations?user=DJY8NXMAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?view_op=list_works", "orcid": ";;0000-0002-5135-5165;0000-0002-7113-9726", "linkedin": ";;;", "or_profile": "~Kaiyan_Zhao3;~Furui_Liu1;~Leong_Hou_U2;~YIMING_WANG8", "aff": "University of Macau;Zhejiang Lab & UCAS & Zhejiang University;University of macau;University of Macau", "aff_domain": "um.edu.mo;zhejianglab.com;um.edu.mo;um.edu.mo", "position": "MS student;Associate Professor;Associate Professor;PhD student", "bibtex": "@inproceedings{\nwang2024rethinking,\ntitle={Rethinking Exploration in Reinforcement Learning with Effective Metric-Based Exploration Bonus},\nauthor={Yiming Wang and Kaiyan Zhao and Furui Liu and Leong Hou U},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=QpKWFLtZKi}\n}", "github": "", "reviewers": "7tZv;DhVP;tp7G;TUTk", "pdf_size": 7020923, "rating": "6;7;7;7", "confidence": "1;5;4;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;2;3", "wc_summary": "60;138;122;128", "wc_strengths": "52;111;112;109", "wc_weaknesses": "129;91;101;90", "wc_questions": "90;71;188;5", "wc_limitations": "1;21;68;8", "wc_review": "332;432;591;340", "wc_reply_reviewers": "0;52;35;0", "wc_reply_authors": "0;30;38;0", "reply_reviewers": "0;1;1;0", "reply_authors": "1;2;2;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 1.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 112.0, 30.56141357987225 ], "wc_strengths_avg": [ 96.0, 25.42636427018224 ], "wc_weaknesses_avg": [ 102.75, 15.75396775418815 ], "wc_questions_avg": [ 88.5, 65.53815682486044 ], "wc_limitations_avg": [ 24.5, 26.119915773217954 ], "wc_review_avg": [ 423.75, 104.25059951865984 ], "wc_reply_reviewers_avg": [ 21.75, 22.56518335843961 ], "wc_reply_authors_avg": [ 17.0, 17.233687939614086 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.9622504486493763, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:yGon4YMJvqQJ:scholar.google.com/&scioq=Rethinking+Exploration+in+Reinforcement+Learning+with+Effective+Metric-Based+Exploration+Bonus&hl=en&as_sdt=0,33", "gs_version_total": 2, "email": "um.edu.mo;zhejianglab.com;um.edu.mo;um.edu.mo", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Macau;Zhejiang University", "aff_unique_dep": ";", "aff_unique_url": "https://www.um.edu.mo;http://www.zju.edu.cn", "aff_unique_abbr": "UM;ZJU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Macau SAR;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "PhyRecon: Physically Plausible Neural Scene Reconstruction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95212", "id": "QrE9QPq4ya", "proceeding": "", "pdf": "https://openreview.net/pdf?id=QrE9QPq4ya", "openreview": "https://openreview.net/forum?id=QrE9QPq4ya", "poster": "/media/PosterPDFs/NeurIPS%202024/95212.png?t=1733227910.4817345", "project": "", "author_site": "Junfeng Ni, Yixin Chen, Bohan Jing, Nan Jiang, Bin Wang, Bo Dai, Puhao Li, Yixin Zhu, Song-Chun Zhu, Siyuan Huang", "tldr": "", "abstract": "We address the issue of physical implausibility in multi-view neural reconstruction. While implicit representations have gained popularity in multi-view 3D reconstruction, previous work struggles to yield physically plausible results, limiting their utility in domains requiring rigorous physical accuracy. This lack of plausibility stems from the absence of physics modeling in existing methods and their inability to recover intricate geometrical structures. In this paper, we introduce PHYRECON, the first approach to leverage both differentiable rendering and differentiable physics simulation to learn implicit surface representations. PHYRECON features a novel differentiable particle-based physical simulator built on neural implicit representations. Central to this design is an efficient transformation between SDF-based implicit representations and explicit surface points via our proposed Surface Points Marching Cubes (SP-MC), enabling differentiable learning with both rendering and physical losses. Additionally, PHYRECON models both rendering and physical uncertainty to identify and compensate for inconsistent and inaccurate monocular geometric priors. The physical uncertainty further facilitates physics-guided pixel sampling to enhance the learning of slender structures. By integrating these techniques, our model supports differentiable joint modeling of appearance, geometry, and physics. Extensive experiments demonstrate that PHYRECON significantly improves the reconstruction quality. Our results also exhibit superior physical stability in physical simulators, with at least a 40% improvement across all datasets, paving the way for future physics-based applications.", "keywords": "Multi-view Reconstruction;Neural Implicit Surface Reconstruction;Physically Plausible Reconstruction", "primary_area": "machine_vision", "supplementary_material": "/attachment/65a4b4ddd3cbab1b143149a85213084c51113424.zip", "author": "Junfeng Ni;Yixin Chen;Bohan Jing;Nan Jiang;Bin Wang;Bo Dai;Puhao Li;Yixin Zhu;Song-Chun Zhu;Siyuan Huang", "authorids": "~Junfeng_Ni1;~Yixin_Chen3;~Bohan_Jing2;~Nan_Jiang8;~Bin_Wang24;~Bo_Dai5;~Puhao_Li1;~Yixin_Zhu1;~Song-Chun_Zhu1;~Siyuan_Huang2", "gender": "M;M;M;M;F;M;M;M;M;M", "homepage": "https://github.com/DaLi-Jack/DaLi-Jack.github.io;https://yixchen.github.io/;;http://jnnan.github.io;https://binwangbfa.github.io/;https://daibopku.github.io/daibo/;https://xiaoyao-li.github.io/;https://yzhu.io/;https://zhusongchun.net/;https://siyuanhuang.com/", "dblp": "359/8893;;335/6547;;;355/2844;330/4121.html;91/1103-1.html;10/10313;62/885-1", "google_scholar": "zOoHsAcAAAAJ;FCdDIOQAAAAJ;;https://scholar.google.com/citations?hl=en;;y-Mz7PwAAAAJ;https://scholar.google.at/citations?user=HTsO18AAAAAJ;qG9l6JEAAAAJ;https://scholar.google.com.tw/citations?user=Al8dyb4AAAAJ;1NN7Ee8AAAAJ", "orcid": ";;0000-0002-7520-2307;0009-0006-5726-7672;;0000-0001-8892-4395;0009-0003-2696-9346;0000-0001-7024-1545;;", "linkedin": ";;;;;;;;;", "or_profile": "~Junfeng_Ni1;~Yixin_Chen3;~Bohan_Jing2;~Nan_Jiang8;~Bin_Wang24;~Bo_Dai5;~Puhao_Li1;~Yixin_Zhu1;~Song-Chun_Zhu1;~Siyuan_Huang2", "aff": "Tsinghua University;BIGAI;Beijing Institute for General Artificial Intelligence;Peking University;;Beijing Institute for General Artificial Intelligence;Tsinghua University;Peking University;Peking University;Beijing Institute for General Artificial Intelligence", "aff_domain": "mail.tsinghua.edu.cn;bigai.ai;bigai.ai;pku.edu.cn;;bigai.ai;tsinghua.edu.cn;pku.edu.cn;pku.edu.cn;bigai.ai", "position": "PhD student;Researcher;Researcher;PhD student;;Researcher;PhD student;Assistant Professor;Full Professor;Researcher", "bibtex": "@inproceedings{\nni2024phyrecon,\ntitle={PhyRecon: Physically Plausible Neural Scene Reconstruction},\nauthor={Junfeng Ni and Yixin Chen and Bohan Jing and Nan Jiang and Bin Wang and Bo Dai and Puhao Li and Yixin Zhu and Song-Chun Zhu and Siyuan Huang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=QrE9QPq4ya}\n}", "github": "", "reviewers": "5HaF;mFxQ;qV3n;fqR5", "pdf_size": 3823295, "rating": "4;6;7;7", "confidence": "4;4;4;5", "soundness": "3;3;4;4", "novelty": "2;3;3;3", "presentation": "3;4;4;3", "wc_summary": "124;110;109;54", "wc_strengths": "75;42;123;205", "wc_weaknesses": "334;140;82;113", "wc_questions": "2;70;85;111", "wc_limitations": "13;15;12;8", "wc_review": "548;377;411;491", "wc_reply_reviewers": "469;0;0;18", "wc_reply_authors": "935;0;0;277", "reply_reviewers": "1;0;0;1", "reply_authors": "4;1;1;2", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 99.25, 26.789690181112583 ], "wc_strengths_avg": [ 111.25, 61.31221330208199 ], "wc_weaknesses_avg": [ 167.25, 98.4362103090118 ], "wc_questions_avg": [ 67.0, 40.29267923581156 ], "wc_limitations_avg": [ 12.0, 2.5495097567963922 ], "wc_review_avg": [ 456.75, 66.99393629277205 ], "wc_reply_reviewers_avg": [ 121.75, 200.6195092706589 ], "wc_reply_authors_avg": [ 303.0, 382.0071988850472 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.4714045207910316, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=312962934088113967&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "mail.tsinghua.edu.cn;bigai.ai;bigai.ai;pku.edu.cn;;bigai.ai;tsinghua.edu.cn;pku.edu.cn;pku.edu.cn;bigai.ai", "author_num": 10, "aff_unique_index": "0;1;2;3;2;0;3;3;2", "aff_unique_norm": "Tsinghua University;Beijing Institute of General Artificial Intelligence;Beijing Institute for General Artificial Intelligence;Peking University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.bigmodel.cn/;http://www.bigaiai.org/;http://www.pku.edu.cn", "aff_unique_abbr": "THU;BIGAI;BIGAI;Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "QsxldAFMFx", "title": "Generative Modeling of Individual Behavior at Scale", "track": "main", "status": "Reject", "tldr": "", "abstract": "Recent years have seen a growing interest in using AI to model human behavior, particularly in domains where humans learn from or collaborate with this technology. While most existing work attempts to model human behavior at an aggregate level, our goal is to model behavior at the individual level. Recent work in the domain of chess has shown that behavioral stylometry, or the task of identifying a person from their actions alone, can be achieved with high accuracy among a pool of a few thousand players. However, this approach cannot generate actions in the style of each player, and hence cannot reason about or influence player behavior in practice. We provide a new perspective on behavioral stylometry that addresses these limitations, by drawing a connection to the vast literature of transfer learning in NLP. Specifically, by casting the stylometry problem as a multi-task learning problem---where each task represents a distinct---we show that parameter-efficient fine-tuning (PEFT) methods can be adapted to model individual behavior in an explicit and generative manner, at unprecedented scale. We apply our approach at scale to two very different games: chess (47,864 players) and Rocket League (2,000 players).\n\nOur approach leverages recent modular PEFT methods to learn a shared set of skill parameters that can be combined in different ways via style vectors. Style vectors enable two important capabilities. First, they are generative: we can generate actions in the style of a player simply by conditioning on the player's style vector. Second, they induce a latent style space that we can interpret and manipulate algorithmically. This allows us to compare different player styles, as well as synthesize new (human-like) styles, e.g. by interpolating between the style vectors of two players.", "keywords": "style;parameter efficient fine-tuning;peft;chess;stylometry;playstyle;representation learning;steerability", "primary_area": "generative_models", "supplementary_material": "", "author": "Nabil Omi;Lucas Caccia;Anurag Sarkar;Jordan T. Ash;Siddhartha Sen", "authorids": "~Nabil_Omi1;~Lucas_Caccia1;~Anurag_Sarkar1;~Jordan_T._Ash1;~Siddhartha_Sen1", "gender": "M;M;;;", "homepage": "https://www.nabilto.com/;https://www.cs.mcgill.ca/~lpagec/;;http://www.jordantash.com;http://sidsen.org", "dblp": "391/7704;;;176/5225;", "google_scholar": "18iysQgAAAAJ;fuvIITUAAAAJ;;bmRNH-UAAAAJ;", "orcid": ";;;;", "linkedin": "nabil-omi/;;;;", "or_profile": "~Nabil_Omi1;~Lucas_Caccia1;~Anurag_Sarkar1;~Jordan_T._Ash1;~Siddhartha_Sen1", "aff": "The City College of New York;Microsoft;;Microsoft Research;Microsoft Research", "aff_domain": "citymail.cuny.edu;microsoft.com;;research.microsoft.com;research.microsoft.com", "position": "Undergrad student;Postdoc;;Postdoc;Principal Researcher", "bibtex": "@misc{\nanonymous2024generative,\ntitle={Generative Modeling of Individual Behavior at Scale},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=QsxldAFMFx}\n}", "github": "", "project": "", "reviewers": "tLZq;Ff76;JjNr;ALah", "site": "https://openreview.net/forum?id=QsxldAFMFx", "pdf_size": 2801430, "rating": "4;5;6;7", "confidence": "3;3;5;4", "soundness": "3;3;4;3", "novelty": "2;2;3;3", "presentation": "3;3;4;4", "wc_summary": "71;216;32;92", "wc_strengths": "49;44;92;96", "wc_weaknesses": "130;161;196;173", "wc_questions": "31;68;1;107", "wc_limitations": "13;34;1;9", "wc_review": "294;523;322;477", "wc_reply_reviewers": "23;0;22;19", "wc_reply_authors": "41;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 102.75, 68.83812533763539 ], "wc_strengths_avg": [ 70.25, 23.85765076448224 ], "wc_weaknesses_avg": [ 165.0, 23.80126047082381 ], "wc_questions_avg": [ 51.75, 39.757860857948586 ], "wc_limitations_avg": [ 14.25, 12.193748398257199 ], "wc_review_avg": [ 404.0, 97.8698114844409 ], "wc_reply_reviewers_avg": [ 16.0, 9.354143466934854 ], "wc_reply_authors_avg": [ 10.25, 17.75352077758099 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.674199862463242, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:YgxLMYmtMnMJ:scholar.google.com/&scioq=Generative+Modeling+of+Individual+Behavior+at+Scale&hl=en&as_sdt=0,5", "gs_version_total": 2, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "City College of New York;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.ccny.cuny.edu;https://www.microsoft.com", "aff_unique_abbr": "CCNY;Microsoft", "aff_campus_unique_index": "0", "aff_campus_unique": "New York;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "GraphMETRO: Mitigating Complex Graph Distribution Shifts via Mixture of Aligned Experts", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95211", "id": "QtYg4g3Deu", "proceeding": "", "pdf": "https://openreview.net/pdf?id=QtYg4g3Deu", "openreview": "https://openreview.net/forum?id=QtYg4g3Deu", "poster": "", "project": "", "author_site": "Shirley Wu, Kaidi Cao, Bruno Ribeiro, James Zou, Jure Leskovec", "tldr": "", "abstract": "Graph data are inherently complex and heterogeneous, leading to a high natural diversity of distributional shifts. However, it remains unclear how to build machine learning architectures that generalize to the complex distributional shifts naturally occurring in the real world. Here, we develop GraphMETRO, a Graph Neural Network architecture that models natural diversity and captures complex distributional shifts. GraphMETRO employs a Mixture-of-Experts (MoE) architecture with a gating model and multiple expert models, where each expert model targets a specific distributional shift to produce a referential representation w.r.t. a reference model, and the gating model identifies shift components. Additionally, we design a novel objective that aligns the representations from different expert models to ensure reliable optimization. GraphMETRO achieves state-of-the-art results on four datasets from the GOOD benchmark, which is comprised of complex and natural real-world distribution shifts, improving by 67% and 4.2% on the WebKB and Twitch datasets. Code and data are available at https://github.com/Wuyxin/GraphMETRO.", "keywords": "Graph Neural Network;Distribution Shifts;Generalization;Mixture-of-expert model", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Shirley Wu;Kaidi Cao;Bruno Ribeiro;James Zou;Jure Leskovec", "authorids": "~Shirley_Wu1;~Kaidi_Cao1;~Bruno_Ribeiro1;~James_Zou1;~Jure_Leskovec1", "gender": "M;M;;;F", "homepage": "https://ai.stanford.edu/~kaidicao/;https://www.cs.purdue.edu/homes/ribeirob/;;http://cs.stanford.edu/~jure/;https://cs.stanford.edu/~shirwu", "dblp": "203/8207;15/606;;l/JureLeskovec;79/4173-2", "google_scholar": "https://scholar.google.com.hk/citations?user=4Zw1PJ8AAAAJ;KIEleCsAAAAJ;23ZXZvEAAAAJ;Q_kKkIUAAAAJ;r2cVEucAAAAJ", "orcid": ";0000-0002-3527-6192;;0000-0002-5411-923X;", "linkedin": ";;;leskovec/;", "or_profile": "~Kaidi_Cao1;~Bruno_Ribeiro1;~James_Zou1;~Jure_Leskovec1;~Yingxin_Wu1", "aff": "Stanford University;Stanford University;Stanford University;Kumo.AI;Computer Science Department, Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu;kumo.ai;cs.stanford.edu", "position": "PhD student;Visiting Associate Professor;Assistant Professor;Chief Scientist;PhD student", "bibtex": "@inproceedings{\nwu2024graphmetro,\ntitle={Graph{METRO}: Mitigating Complex Graph Distribution Shifts via Mixture of Aligned Experts},\nauthor={Shirley Wu and Kaidi Cao and Bruno Ribeiro and James Zou and Jure Leskovec},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=QtYg4g3Deu}\n}", "github": "", "reviewers": "ymW6;LHfk;U8um;t31K;uY6m", "pdf_size": 2481043, "rating": "5;5;5;6;7", "confidence": "4;3;3;4;3", "soundness": "3;3;2;2;3", "novelty": "3;2;3;2;3", "presentation": "3;3;3;3;3", "wc_summary": "63;99;34;76;106", "wc_strengths": "62;44;62;63;64", "wc_weaknesses": "139;90;341;148;85", "wc_questions": "22;52;24;48;78", "wc_limitations": "1;5;1;14;9", "wc_review": "287;290;462;349;342", "wc_reply_reviewers": "63;0;129;19;63", "wc_reply_authors": "17;0;171;0;18", "reply_reviewers": "1;0;2;1;1", "reply_authors": "2;1;3;1;2", "rating_avg": [ 5.6, 0.8 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 75.6, 25.92759148089155 ], "wc_strengths_avg": [ 59.0, 7.536577472566709 ], "wc_weaknesses_avg": [ 160.6, 93.66877814939191 ], "wc_questions_avg": [ 44.8, 20.575713839378697 ], "wc_limitations_avg": [ 6.0, 4.979959839195493 ], "wc_review_avg": [ 346.0, 63.40031545662845 ], "wc_reply_reviewers_avg": [ 54.8, 44.5528899174902 ], "wc_reply_authors_avg": [ 41.2, 65.37094155662744 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.1020620726159658, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15327966597849114104&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "stanford.edu;stanford.edu;stanford.edu;kumo.ai;cs.stanford.edu", "author_num": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Stanford University;Kumo.AI", "aff_unique_dep": ";", "aff_unique_url": "https://www.stanford.edu;https://www.kumo.ai", "aff_unique_abbr": "Stanford;Kumo.AI", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Cascade of phase transitions in the training of energy-based models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95210", "id": "Qtf6Xz4VvE", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Qtf6Xz4VvE", "openreview": "https://openreview.net/forum?id=Qtf6Xz4VvE", "poster": "/media/PosterPDFs/NeurIPS%202024/95210.png?t=1731419036.7647934", "project": "", "author_site": "Dimitrios Bachtis, Giulio Biroli, Aur\u00e9lien Decelle, Beatriz Seoane", "tldr": "", "abstract": "In this paper, we investigate the feature encoding process in a prototypical energy-based generative model, the Restricted Boltzmann Machine (RBM). We start with an analytical investigation using simplified architectures and data structures, and end with numerical analysis of real trainings on real datasets. Our study tracks the evolution of the model\u2019s weight matrix through its singular value decomposition, revealing a series of thermodynamic phase transitions that shape the principal learning modes of the empirical probability distribution. We first describe this process analytically in several controlled setups that allow us to fully monitor the training dynamics until convergence. We then validate these findings by training the Bernoulli-Bernoulli RBM on real data sets. By studying the phase behavior over data sets of increasing dimension, we show that these phase transitions are genuine in the thermodynamic sense. Moreover, we propose a mean-field finite-size scaling hypothesis, confirming that the initial phase transition, reminiscent of the paramagnetic-to-ferromagnetic phase transition in mean-field ferromagnetism models, is governed by mean-field critical exponents.", "keywords": "Restricted Boltzmann Machine;Generative model;Phase transition;statistical physics;Energy-based model", "primary_area": "generative_models", "supplementary_material": "", "author": "Dimitrios Bachtis;Giulio Biroli;Aur\u00e9lien Decelle;Beatriz Seoane", "authorids": "~Dimitrios_Bachtis1;~Giulio_Biroli1;~Aur\u00e9lien_Decelle1;~Beatriz_Seoane1", "gender": ";;M;F", "homepage": ";https://www.lpens.ens.psl.eu/giulio-biroli/;http://www.lri.fr/~adecelle;", "dblp": ";18/5547;86/9220;", "google_scholar": "eiaff1QAAAAJ;https://scholar.google.fr/citations?user=BadZJUsAAAAJ;https://scholar.google.fr/citations?user=__OKD-kAAAAJ;https://scholar.google.es/citations?user=RyCweLUAAAAJ", "orcid": ";;0000-0002-3017-0858;0000-0003-4007-9406", "linkedin": ";;aur%C3%A9lien-decelle-947309105/;beatriz-seoane-bartolome-39620412/", "or_profile": "~Dimitrios_Bachtis1;~Giulio_Biroli1;~Aur\u00e9lien_Decelle1;~Beatriz_Seoane1", "aff": "Ecole Normale Sup\u00e9rieure de Paris;Ecole Normale Superieure;Universidad Complutense de Madrid;Universit\u00e9 Paris-Saclay", "aff_domain": "ens.fr;ens.fr;ucm.es;univ-paris-saclay.fr", "position": "Postdoc;Full Professor;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nbachtis2024cascade,\ntitle={Cascade of phase transitions in the training of energy-based models},\nauthor={Dimitrios Bachtis and Giulio Biroli and Aur{\\'e}lien Decelle and Beatriz Seoane},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Qtf6Xz4VvE}\n}", "github": "", "reviewers": "vpPo;ZU37;pfhu;mUPE", "pdf_size": 3194566, "rating": "6;6;6;7", "confidence": "4;3;3;3", "soundness": "3;2;3;4", "novelty": "3;2;3;3", "presentation": "1;3;2;3", "wc_summary": "29;41;142;132", "wc_strengths": "52;48;91;54", "wc_weaknesses": "108;62;101;202", "wc_questions": "547;63;22;95", "wc_limitations": "11;6;6;72", "wc_review": "747;220;362;555", "wc_reply_reviewers": "122;28;40;98", "wc_reply_authors": "1326;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "3;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 86.0, 51.29814811472242 ], "wc_strengths_avg": [ 61.25, 17.311484627264065 ], "wc_weaknesses_avg": [ 118.25, 51.431386331694384 ], "wc_questions_avg": [ 181.75, 212.45867245184414 ], "wc_limitations_avg": [ 23.75, 27.931836674304108 ], "wc_review_avg": [ 471.0, 198.81775574631155 ], "wc_reply_reviewers_avg": [ 72.0, 39.16631205513228 ], "wc_reply_authors_avg": [ 331.5, 574.1748427090828 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8950941950926048877&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": "ens.fr;ens.fr;ucm.es;univ-paris-saclay.fr", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Ecole Normale Sup\u00e9rieure de Paris;Ecole Normale Superieure;Universidad Complutense de Madrid;Universit\u00e9 Paris-Saclay", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.ens.fr;https://www.ens.fr;https://www.ucm.es;https://www.universite-paris-saclay.fr", "aff_unique_abbr": "ENS Paris;ENS;UCM;UPSaclay", "aff_campus_unique_index": "0", "aff_campus_unique": "Paris;", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "France;Spain" }, { "title": "Suppress Content Shift: Better Diffusion Features via Off-the-Shelf Generation Techniques", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95209", "id": "QvqLdeSLWA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=QvqLdeSLWA", "openreview": "https://openreview.net/forum?id=QvqLdeSLWA", "poster": "/media/PosterPDFs/NeurIPS%202024/95209.png?t=1729469206.17666", "project": "", "author_site": "Benyuan Meng, Qianqian Xu, Zitai Wang, Zhiyong Yang, Xiaochun Cao, Qingming Huang", "tldr": "", "abstract": "Diffusion models are powerful generative models, and this capability can also be applied to discrimination. The inner activations of a pre-trained diffusion model can serve as features for discriminative tasks, namely, diffusion feature. We discover that diffusion feature has been hindered by a hidden yet universal phenomenon that we call content shift. To be specific, there are content differences between features and the input image, such as the exact shape of a certain object. We locate the cause of content shift as one inherent characteristic of diffusion models, which suggests the broad existence of this phenomenon in diffusion feature. Further empirical study also indicates that its negative impact is not negligible even when content shift is not visually perceivable. Hence, we propose to suppress content shift to enhance the overall quality of diffusion features. Specifically, content shift is related to the information drift during the process of recovering an image from the noisy input, pointing out the possibility of turning off-the-shelf generation techniques into tools for content shift suppression. We further propose a practical guideline named GATE to efficiently evaluate the potential benefit of a technique and provide an implementation of our methodology. Despite the simplicity, the proposed approach has achieved superior results on various tasks and datasets, validating its potential as a generic booster for diffusion features. Our code is available at https://github.com/Darkbblue/diffusion-content-shift.", "keywords": "Diffusion Models;Representation Learning;Model Property", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Benyuan Meng;Qianqian Xu;Zitai Wang;Zhiyong Yang;Xiaochun Cao;Qingming Huang", "authorids": "~Benyuan_Meng1;~Qianqian_Xu2;~Zitai_Wang1;~Zhiyong_Yang1;~Xiaochun_Cao3;~Qingming_Huang2", "gender": "M;F;M;M;M;", "homepage": "https://github.com/Darkbblue;http://vipl.ict.ac.cn/people/~qianqianxu;https://wang22ti.github.io;https://joshuaas.github.io/;https://scst.sysu.edu.cn/members/caoxiaochun.htm;https://qmhuang-ucas.github.io/", "dblp": "359/4161;07/7627;251/3361;01/452-1.html;39/3695;68/4388", "google_scholar": ";https://scholar.google.com.hk/citations?user=MjifS2MAAAAJ;45qZ_LcAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=en;https://scholar.google.com.hk/citations?user=J1vMnRgAAAAJ", "orcid": "0009-0006-5739-6781;;0000-0003-4156-6417;0000-0002-4409-4999;0000-0001-7141-708X;", "linkedin": ";;;;;", "or_profile": "~Benyuan_Meng1;~Qianqian_Xu2;~Zitai_Wang1;~Zhiyong_Yang1;~Xiaochun_Cao3;~Qingming_Huang2", "aff": "Institute of Information Engineering, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;University of Chinese Academy of Sciences;University of Chinese Academic of Sciences;SUN YAT-SEN UNIVERSITY;University of Chinese Academy of Sciences", "aff_domain": "iie.ac.cn;ict.ac.cn;ucas.ac.cn;ucas.ac.cb;sysu.edu.cn;ucas.ac.cn", "position": "MS student;Full Professor;PhD student;Associate Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nmeng2024suppress,\ntitle={Suppress Content Shift: Better Diffusion Features via Off-the-Shelf Generation Techniques},\nauthor={Benyuan Meng and Qianqian Xu and Zitai Wang and Zhiyong Yang and Xiaochun Cao and Qingming Huang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=QvqLdeSLWA}\n}", "github": "", "reviewers": "RvKH;z9si;ECpW;cofK", "pdf_size": 5391286, "rating": "4;4;7;7", "confidence": "3;3;5;5", "soundness": "2;3;4;3", "novelty": "2;3;3;3", "presentation": "1;2;3;4", "wc_summary": "133;100;95;98", "wc_strengths": "46;70;135;80", "wc_weaknesses": "54;134;150;70", "wc_questions": "381;61;4;3", "wc_limitations": "22;57;9;1", "wc_review": "636;422;393;252", "wc_reply_reviewers": "62;76;16;0", "wc_reply_authors": "2633;333;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "6;3;1;1", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 4.0, 1.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 106.5, 15.402921800749363 ], "wc_strengths_avg": [ 82.75, 32.59888801784503 ], "wc_weaknesses_avg": [ 102.0, 40.792156108742276 ], "wc_questions_avg": [ 112.25, 156.92892499472492 ], "wc_limitations_avg": [ 22.25, 21.41699091842736 ], "wc_review_avg": [ 425.75, 137.3688010430316 ], "wc_reply_reviewers_avg": [ 38.5, 31.412577098990145 ], "wc_reply_authors_avg": [ 741.5, 1100.487278436239 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 2.0463381929681126 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:RAlodnB7azUJ:scholar.google.com/&scioq=Suppress+Content+Shift:+Better+Diffusion+Features+via+Off-the-Shelf+Generation+Techniques&hl=en&as_sdt=0,44", "gs_version_total": 3, "email": "iie.ac.cn;ict.ac.cn;ucas.ac.cn;ucas.ac.cb;sysu.edu.cn;ucas.ac.cn", "author_num": 6, "aff_unique_index": "0;0;1;1;2;1", "aff_unique_norm": "Chinese Academy of Sciences;University of Chinese Academy of Sciences;Sun Yat-sen University", "aff_unique_dep": "Institute of Information Engineering;;", "aff_unique_url": "http://www.cas.cn;http://www.ucas.ac.cn;http://www.sysu.edu.cn", "aff_unique_abbr": "CAS;UCAS;SYSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Expecting The Unexpected: Towards Broad Out-Of-Distribution Detection", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97693", "id": "QxJHh7Z39R", "proceeding": "", "pdf": "https://openreview.net/pdf?id=QxJHh7Z39R", "openreview": "https://openreview.net/forum?id=QxJHh7Z39R", "poster": "/media/PosterPDFs/NeurIPS%202024/97693.png?t=1731724046.1699939", "project": "", "author_site": "Charles Guille-Escuret, Pierre-Andr\u00e9 No\u00ebl, Ioannis Mitliagkas, David Vazquez, Joao Monteiro", "tldr": "", "abstract": "Deployed machine learning systems require some mechanism to detect out-of-distribution (OOD) inputs. Existing research mainly focuses on one type of distribution shift: detecting samples from novel classes, absent from the training set. However, real-world systems encounter a broad variety of anomalous inputs, and the OOD literature neglects this diversity. This work categorizes five distinct types of distribution shifts and critically evaluates the performance of recent OOD detection methods on each of them. We publicly release our benchmark under the name BROAD (Benchmarking Resilience Over Anomaly Diversity). We find that while these methods excel in detecting novel classes, their performances are inconsistent across other types of distribution shifts. In other words, they can only reliably detect unexpected inputs that they have been specifically designed to expect. As a first step toward broad OOD detection, we learn a Gaussian mixture generative model for existing detection scores, enabling an ensemble detection approach that is more consistent and comprehensive for broad OOD detection, with improved performances over existing methods. We release code to build BROAD to facilitate a more comprehensive evaluation of novel OOD detectors.", "keywords": "out-of-distribution detection;distribution shifts;broad;detection benchmark", "primary_area": "", "supplementary_material": "/attachment/e51bcf3fedbb6d2e65d9f6482d9416fae9cee228.zip", "author": "Charles Guille-Escuret;Pierre-Andre Noel;Ioannis Mitliagkas;David Vazquez;Joao Monteiro", "authorids": "~Charles_Guille-Escuret1;~Pierre-Andre_Noel1;~Ioannis_Mitliagkas1;~David_Vazquez1;~Joao_Monteiro1", "gender": "M;M;M;M;M", "homepage": ";;http://mitliagkas.github.io/;http://www.david-vazquez.com;", "dblp": "243/7039;47/9226.html;83/8757;94/8653;215/5354-2", "google_scholar": "VNgVRmgAAAAJ;https://scholar.google.com/citations?hl=en;K757SxgAAAAJ;1jHvtfsAAAAJ;https://scholar.google.ca/citations?hl=en", "orcid": ";0000-0001-6979-1873;;0000-0002-2845-8158;", "linkedin": ";panoel/;;https://www.linkedin.com/company/david-vazquez/;joao-monteiro-47180256/", "or_profile": "~Charles_Guille-Escuret1;~Pierre-Andre_Noel1;~Ioannis_Mitliagkas1;~David_Vazquez1;~Joao_Monteiro1", "aff": "ServiceNow;ServiceNow;Mila - Quebec AI Institute;ServiceNow research;ServiceNow Research", "aff_domain": "servicenow.com;servicenow.com;mila.quebec;servicenow.com;servicenow.com", "position": "Intern;Researcher;Principal Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\nguille-escuret2024expecting,\ntitle={Expecting The Unexpected: Towards Broad Out-Of-Distribution Detection},\nauthor={Charles Guille-Escuret and Pierre-Andre Noel and Ioannis Mitliagkas and David Vazquez and Joao Monteiro},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=QxJHh7Z39R}\n}", "github": "", "reviewers": "mRTn;CJ5S;DbBn", "pdf_size": 2100402, "rating": "6;6;7", "confidence": "3;2;4", "wc_summary_and_contributions": "379;53;59", "wc_strengths": "468;9;2", "wc_improvement": "433;9;2", "wc_limitations": "525;9;2", "wc_correctness": "643;1;1", "wc_clarity": "1;1;1", "wc_relation_to_prior_work": "1;1;1", "wc_documentation": "1;1;1", "wc_additional_feedback": "1;1;1", "wc_review": "2452;85;70", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "wc_summary_and_contributions_avg": [ 163.66666666666666, 152.28336160665162 ], "wc_strengths_avg": [ 159.66666666666666, 218.04331883570498 ], "wc_improvement_avg": [ 148.0, 201.54569374379267 ], "wc_limitations_avg": [ 178.66666666666666, 244.91132181986924 ], "wc_correctness_avg": [ 215.0, 302.64170234784234 ], "wc_clarity_avg": [ 1.0, 0.0 ], "wc_relation_to_prior_work_avg": [ 1.0, 0.0 ], "wc_documentation_avg": [ 1.0, 0.0 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 869.0, 1119.3667852853237 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16811754698536378445&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "servicenow.com;servicenow.com;mila.quebec;servicenow.com;servicenow.com", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "ServiceNow;Quebec AI Institute", "aff_unique_dep": ";AI Institute", "aff_unique_url": "https://www.servicenow.com;https://mila.quebec", "aff_unique_abbr": "ServiceNow;Mila", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "United States;Canada" }, { "title": "Provable Tempered Overfitting of Minimal Nets and Typical Nets", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95208", "id": "QyR1dNDxRP", "proceeding": "", "pdf": "https://openreview.net/pdf?id=QyR1dNDxRP", "openreview": "https://openreview.net/forum?id=QyR1dNDxRP", "poster": "/media/PosterPDFs/NeurIPS%202024/95208.png?t=1733667777.010886", "project": "", "author_site": "Itamar Harel, William Hoza, Gal Vardi, Itay Evron, Nati Srebro, Daniel Soudry", "tldr": "", "abstract": "We study the overfitting behavior of fully connected deep Neural Networks (NNs) with binary weights fitted to perfectly classify a noisy training set. We consider interpolation using both the smallest NN (having the minimal number of weights) and a random interpolating NN. For both learning rules, we prove overfitting is tempered. Our analysis rests on a new bound on the size of a threshold circuit consistent with a partial function. To the best of our knowledge, ours are the first theoretical results on benign or tempered overfitting that: (1) apply to deep NNs, and (2) do not require a very high or very low input dimension.", "keywords": "Deep Learning;Tempered Overfitting;Generalization", "primary_area": "learning_theory", "supplementary_material": "", "author": "Itamar Harel;William M. Hoza;Gal Vardi;Itay Evron;Nathan Srebro;Daniel Soudry", "authorids": "~Itamar_Harel1;~William_M._Hoza1;~Gal_Vardi1;~Itay_Evron1;~Nathan_Srebro1;~Daniel_Soudry1", "gender": "M;M;M;M;M;M", "homepage": ";https://williamhoza.com;https://sites.google.com/view/galvardi/home;http://www.evron.me;http://ttic.uchicago.edu/~nati/;https://soudry.github.io/", "dblp": "368/6206;155/9793.html;https://dblp.uni-trier.de/pid/167/9638.html;217/1801;50/3633;126/1779", "google_scholar": "BdWvuiIAAAAJ;sORZkb4AAAAJ;https://scholar.google.co.il/citations?hl=en;https://scholar.google.co.il/citations?user=Df3po24AAAAJ;https://scholar.google.com.tw/citations?user=ZnT-QpMAAAAJ;https://scholar.google.co.il/citations?user=AEBWEm8AAAAJ", "orcid": "0009-0007-2900-4653;0000-0001-5162-9181;;;;0000-0001-9368-6352", "linkedin": "itamar-harel-3245a82b2;;;;;daniel-soudry-2aa3a88/", "or_profile": "~Itamar_Harel1;~William_M._Hoza1;~Gal_Vardi1;~Itay_Evron1;~Nathan_Srebro1;~Daniel_Soudry1", "aff": "Technion - Israel Institute of Technology, Technion - Israel Institute of Technology;University of Chicago;Toyota Technological Institute at Chicago;Technion, Technion;University of Chicago;Technion - Israel Institute of Technology, Technion", "aff_domain": "campus.technion.ac.il;uchicago.edu;ttic.edu;technion.ac.il;uchicago.edu;technion.ac.il", "position": "MS student;Assistant Professor;Postdoc;PhD student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nharel2024provable,\ntitle={Provable Tempered Overfitting of Minimal Nets and Typical Nets},\nauthor={Itamar Harel and William M. Hoza and Gal Vardi and Itay Evron and Nathan Srebro and Daniel Soudry},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=QyR1dNDxRP}\n}", "github": "", "reviewers": "gENy;X2nX;E8Kb;q6NB", "pdf_size": 925808, "rating": "5;6;7;7", "confidence": "4;3;3;3", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "2;2;3;3", "wc_summary": "59;32;75;31", "wc_strengths": "51;28;79;74", "wc_weaknesses": "179;268;53;34", "wc_questions": "168;68;42;21", "wc_limitations": "7;5;5;3", "wc_review": "464;401;254;163", "wc_reply_reviewers": "8;20;8;16", "wc_reply_authors": "23;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 49.25, 18.632968094214082 ], "wc_strengths_avg": [ 58.0, 20.285462775100793 ], "wc_weaknesses_avg": [ 133.5, 95.57850176687224 ], "wc_questions_avg": [ 74.75, 56.35323859371349 ], "wc_limitations_avg": [ 5.0, 1.4142135623730951 ], "wc_review_avg": [ 320.5, 118.63915879674805 ], "wc_reply_reviewers_avg": [ 13.0, 5.196152422706632 ], "wc_reply_authors_avg": [ 5.75, 9.959292143521045 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3798865387164670734&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "campus.technion.ac.il;uchicago.edu;ttic.edu;technion.ac.il;uchicago.edu;technion.ac.il", "author_num": 6, "aff_unique_index": "0;1;2;0;1;0", "aff_unique_norm": "Technion - Israel Institute of Technology;University of Chicago;Toyota Technological Institute at Chicago", "aff_unique_dep": ";;", "aff_unique_url": "https://www.technion.ac.il/en/;https://www.uchicago.edu;https://www.tti-chicago.org", "aff_unique_abbr": "Technion;UChicago;TTI Chicago", "aff_campus_unique_index": "1", "aff_campus_unique": ";Chicago", "aff_country_unique_index": "0;1;1;0;1;0", "aff_country_unique": "Israel;United States" }, { "title": "Faster Differentially Private Top-$k$ Selection: A Joint Exponential Mechanism with Pruning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95207", "id": "QyxE3W9Yni", "proceeding": "", "pdf": "https://openreview.net/pdf?id=QyxE3W9Yni", "openreview": "https://openreview.net/forum?id=QyxE3W9Yni", "poster": "/media/PosterPDFs/NeurIPS%202024/95207.png?t=1731694991.4607704", "project": "", "author_site": "Hao WU, Hanwen Zhang", "tldr": "", "abstract": "We study the differentially private top-$k$ selection problem, aiming to identify a sequence of $k$ items with approximately the highest scores from $d$ items. Recent work by Gillenwater et al. (2022) employs a direct sampling approach from the vast collection of $O(d^k)$ possible length-$k$ sequences, showing superior empirical accuracy compared to previous pure or approximate differentially private methods. Their algorithm has a time and space complexity of $\\tilde{O}(dk)$. \n\nIn this paper, we present an improved algorithm that achieves time and space complexity of $\\tilde{O}(d + k^2)$.\nExperimental results show that our algorithm runs orders of magnitude faster than their approach, while achieving similar empirical accuracy.", "keywords": "Differential Privacy;Top-k Selection;Exponential Mechanism", "primary_area": "privacy", "supplementary_material": "/attachment/ddc3c344fa114ff053917ed71c8cfec50b196a10.zip", "author": "Hao WU;Hanwen Zhang", "authorids": "~Hao_WU21;~Hanwen_Zhang8", "gender": "M;", "homepage": ";", "dblp": "72/4250-57;", "google_scholar": "Aaui0ucAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";", "linkedin": "wuhaowujiang/;", "or_profile": "~Hao_WU21;~Hanwen_Zhang8", "aff": "Copenhagen University;Copenhagen University", "aff_domain": "ku.dk;ku.dk", "position": "Postdoc;PhD student", "bibtex": "@inproceedings{\nwu2024faster,\ntitle={Faster Differentially Private Top-\\$k\\$ Selection: A Joint Exponential Mechanism with Pruning},\nauthor={Hao WU and Hanwen Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=QyxE3W9Yni}\n}", "github": "", "reviewers": "owmk;pna3;3vYL;u5C6", "pdf_size": 1094422, "rating": "6;6;7;7", "confidence": "3;3;4;3", "soundness": "4;3;4;3", "novelty": "2;3;3;3", "presentation": "3;3;4;3", "wc_summary": "113;198;301;70", "wc_strengths": "41;58;108;70", "wc_weaknesses": "138;101;10;34", "wc_questions": "98;18;84;28", "wc_limitations": "1;1;1;1", "wc_review": "391;376;504;203", "wc_reply_reviewers": "147;10;121;5", "wc_reply_authors": "512;0;0;0", "reply_reviewers": "2;1;1;1", "reply_authors": "4;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 170.5, 88.30770068346249 ], "wc_strengths_avg": [ 69.25, 24.631027181179434 ], "wc_weaknesses_avg": [ 70.75, 51.18288288090072 ], "wc_questions_avg": [ 57.0, 34.539832078341085 ], "wc_limitations_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 368.5, 107.60227692758178 ], "wc_reply_reviewers_avg": [ 70.75, 63.938935712130835 ], "wc_reply_authors_avg": [ 128.0, 221.70250336881628 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:yMGcYc6h7qgJ:scholar.google.com/&scioq=Faster+Differentially+Private+Top-%24k%24+Selection:+A+Joint+Exponential+Mechanism+with+Pruning&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "ku.dk;ku.dk", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Copenhagen", "aff_unique_dep": "", "aff_unique_url": "https://www.ku.dk", "aff_unique_abbr": "UCPH", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Denmark" }, { "title": "Spider2-V: How Far Are Multimodal Agents From Automating Data Science and Engineering Workflows?", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97692", "id": "Qz2xmVhn4S", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Qz2xmVhn4S", "openreview": "https://openreview.net/forum?id=Qz2xmVhn4S", "poster": "", "project": "", "author_site": "Ruisheng Cao, Fangyu Lei, Haoyuan Wu, Jixuan Chen, Yeqiao Fu, Hongcheng Gao, Xinzhuang Xiong, Hanchong Zhang, Wenjing Hu, Yuchen Mao, Tianbao Xie, Hongshen Xu, Danyang Zhang, Sida Wang, Ruoxi Sun, Pengcheng Yin, Caiming Xiong, Ansong Ni, Qian Liu, Victor Zhong, Lu Chen, Kai Yu, Tao Yu", "tldr": "", "abstract": "Data science and engineering workflows often span multiple stages, from warehousing to orchestration, using tools like BigQuery, dbt, and Airbyte. As vision language models (VLMs) advance in multimodal understanding and code generation, VLM-based agents could potentially automate these workflows by generating SQL queries, Python code, and GUI operations. This automation can improve the productivity of experts while democratizing access to large-scale data analysis. In this paper, we introduce Spider2-V, the first multimodal agent benchmark focusing on professional data science and engineering workflows, featuring 494 real-world tasks in authentic computer environments and incorporating 20 enterprise-level professional applications. These tasks, derived from real-world use cases, evaluate the ability of a multimodal agent to perform data-related tasks by writing code and managing the GUI in enterprise data software systems. To balance realistic simulation with evaluation simplicity, we devote significant effort to developing automatic configurations for task setup and carefully crafting evaluation metrics for each task. Furthermore, we supplement multimodal agents with comprehensive documents of these enterprise data software systems. Our empirical evaluation reveals that existing state-of-the-art LLM/VLM-based agents do not reliably automate full data workflows (14.0% success). Even with step-by-step guidance, these agents still underperform in tasks that require fine-grained, knowledge-intensive GUI actions (16.2%) and involve remote cloud-hosted workspaces (10.6%). We hope that Spider2-V paves the way for autonomous multimodal agents to transform the automation of data science and engineering workflow. Our code and data are available at https://spider2-v.github.io.", "keywords": "LLM benchmark;Data Science and Engineering;Professional Software;GUI Interaction;Executable Environment", "primary_area": "", "supplementary_material": "", "author": "Ruisheng Cao;Fangyu Lei;Haoyuan Wu;Jixuan Chen;Yeqiao Fu;Hongcheng Gao;Xiong Xinzhuang;Hanchong Zhang;Wenjing Hu;Yuchen Mao;Tianbao Xie;Hongshen Xu;Danyang Zhang;Sida Wang;Ruoxi Sun;Pengcheng Yin;Caiming Xiong;Ansong Ni;Qian Liu;Victor Zhong;Lu Chen;Kai Yu;Tao Yu", "authorids": "~Ruisheng_Cao1;~Fangyu_Lei1;~Haoyuan_Wu2;~Jixuan_Chen1;~Yeqiao_Fu1;~Hongcheng_Gao1;~Xiong_Xinzhuang1;~Hanchong_Zhang1;~Wenjing_Hu1;~Yuchen_Mao1;~Tianbao_Xie1;~Hongshen_Xu1;~Danyang_Zhang2;~Sida_Wang2;~Ruoxi_Sun2;~Pengcheng_Yin1;~Caiming_Xiong1;~Ansong_Ni1;~Qian_Liu2;~Victor_Zhong1;~Lu_Chen3;~Kai_Yu3;~Tao_Yu5", "gender": "M;M;M;F;M;M;M;M;F;F;M;M;;M;F;M;M;M;M;M;M;M;M", "homepage": "https://rhythmcao.github.io/;https://lfy79001.github.io;;https://chenjix.github.io;https://docs.google.com/document/d/1u-YTt6_8iN7Mb-KJcYwliS9zeCq_fvwaqNvBd3Qncmk/edit?usp=sharing;https://gao-hongcheng.github.io/;https://thisisxxz.com/;;https://huwenjing0819.github.io/;https://yuchenambernlp.github.io/;https://tianbaoxie.com;https://speechlab.sjtu.edu.cn/members/hongshen-xu;https://zdy023.github.io;https://www.sidaw.xyz;;https://pengcheng.in;http://cmxiong.com/;https://niansong1996.github.io/;http://siviltaram.github.io/;http://www.victorzhong.com;https://coai-sjtu.github.io;https://x-lance.sjtu.edu.cn/~kaiyu/;https://taoyds.github.io/", "dblp": "244/9541;329/5621;;342/1459;;318/1404;;348/6967;;;;314/8140;;153/9609;72/7683;130/7385;80/7282;202/1480;;182/8931;69/157-2;197/1322-4;67/1014-9", "google_scholar": "NdK881sAAAAJ;1WzAOSkAAAAJ;;kmBSlgEAAAAJ;;https://scholar.google.com/citations?hl=en;;4xNsDNgAAAAJ;;;https://scholar.google.com/citations?hl=zh-CN;;https://scholar.google.com.hk/citations?user=CMHormkAAAAJ;XUI4PMEAAAAJ;ut1-7LAAAAAJ;t5lVb6sAAAAJ;vaSdahkAAAAJ;4IA1clAAAAAJ;bcbeUo0AAAAJ;lT3YoNkAAAAJ;https://scholar.google.ca/citations?user=Fb3jWaYAAAAJ;https://scholar.google.com/citations?hl=en;5_Fn5CIAAAAJ", "orcid": "0000-0003-4635-4368;;;;;;;0000-0003-1152-4355;;;;0000-0002-6770-6564;0009-0001-4958-5017;;;;;;;;;0000-0002-7102-9826;", "linkedin": ";;haoyuan-wu-240878291/;jixuan-chen-a99112298/;;;;;;amber-mao;;;%E4%B8%B9%E9%98%B3-%E5%BC%A0-b62931182;;;pchyin/;caiming-xiong-150a1417;;;victorzhong;;;tao-yu-b9b551a5/", "or_profile": "~Ruisheng_Cao1;~Fangyu_Lei1;~Haoyuan_Wu2;~Jixuan_Chen1;~Yeqiao_Fu1;~Hongcheng_Gao1;~Xiong_Xinzhuang1;~Hanchong_Zhang1;~Wenjing_Hu1;~Yuchen_Mao1;~Tianbao_Xie1;~Hongshen_Xu1;~Danyang_Zhang2;~Sida_Wang2;~Ruoxi_Sun2;~Pengcheng_Yin1;~Caiming_Xiong1;~Ansong_Ni1;~Qian_Liu2;~Victor_Zhong1;~Lu_Chen3;~Kai_Yu3;~Tao_Yu5", "aff": "Shanghai Jiaotong University;Institute of Automation, Chinese Academy of Sciences;University of Hong Kong;Nanjing University;University of Hong Kong;University of Chinese Academy of Sciences;University of Hong Kong;Shanghai Jiaotong University;Nanjing University;Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;University of Hong Kong;Shanghai Jiaotong University;University of Hong Kong;Meta Facebook;Google;Google;Salesforce Research;Yale University;Tiktok;Microsoft;Shanghai Jiaotong University;Shanghai Jiaotong University;The University of Hong Kong", "aff_domain": "sjtu.edu.cn;ia.ac.cn;hku.hk;smail.nju.edu.cn;hku.hk;ucas.ac.cn;hku.hk;sjtu.edu.cn;smail.nju.edu.cn;lmu.edu;cs.hku.hk;sjtu.edu.cn;hku.hk;fb.com;google.com;google.com;salesforce.com;yale.edu;bytedance.com;microsoft.com;sjtu.edu.cn;sjtu.edu.cn;hku.hk", "position": "PhD student;PhD student;Undergrad student;Undergrad student;Undergrad student;MS student;Undergrad student;MS student;Undergrad student;MS student;PhD student;PhD student;Intern;Research Scientist;Google;Researcher;Research Scientist;PhD student;Researcher;Postdoc;Associate Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\ncao2024spiderv,\ntitle={Spider2-V: How Far Are Multimodal Agents From Automating Data Science and Engineering Workflows?},\nauthor={Ruisheng Cao and Fangyu Lei and Haoyuan Wu and Jixuan Chen and Yeqiao Fu and Hongcheng Gao and Xiong Xinzhuang and Hanchong Zhang and Wenjing Hu and Yuchen Mao and Tianbao Xie and Hongshen Xu and Danyang Zhang and Sida Wang and Ruoxi Sun and Pengcheng Yin and Caiming Xiong and Ansong Ni and Qian Liu and Victor Zhong and Lu Chen and Kai Yu and Tao Yu},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=Qz2xmVhn4S}\n}", "github": "", "reviewers": "Lwbw;v3tL;ULRM;HHQN", "pdf_size": 10457439, "rating": "6;7;8;8", "confidence": "2;3;4;4", "wc_summary_and_contributions": "78;62;100;44", "wc_strengths": "62;42;55;3", "wc_improvement": "6;41;81;56", "wc_limitations": "29;9;4;5", "wc_correctness": "1;18;13;27", "wc_clarity": "3;7;35;9", "wc_relation_to_prior_work": "4;12;9;9", "wc_documentation": "1;8;5;9", "wc_additional_feedback": "1;1;1;1", "wc_review": "185;200;303;163", "wc_reply_reviewers": "25;0;0;20", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;0;1", "reply_authors": "4;1;1;1", "rating_avg": [ 7.25, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "wc_summary_and_contributions_avg": [ 71.0, 20.615528128088304 ], "wc_strengths_avg": [ 40.5, 22.808989455914087 ], "wc_improvement_avg": [ 46.0, 27.15695122800054 ], "wc_limitations_avg": [ 11.75, 10.133484099755622 ], "wc_correctness_avg": [ 14.75, 9.390819985496474 ], "wc_clarity_avg": [ 13.5, 12.599603168354152 ], "wc_relation_to_prior_work_avg": [ 8.5, 2.8722813232690143 ], "wc_documentation_avg": [ 5.75, 3.112474899497183 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 212.75, 53.741859848725 ], "wc_reply_reviewers_avg": [ 11.25, 11.388041973930374 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 23, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6785641295581753616&as_sdt=1005&sciodt=0,4&hl=en", "gs_version_total": 5, "email": "sjtu.edu.cn;ia.ac.cn;hku.hk;smail.nju.edu.cn;hku.hk;ucas.ac.cn;hku.hk;sjtu.edu.cn;smail.nju.edu.cn;lmu.edu;cs.hku.hk;sjtu.edu.cn;hku.hk;fb.com;google.com;google.com;salesforce.com;yale.edu;bytedance.com;microsoft.com;sjtu.edu.cn;sjtu.edu.cn;hku.hk", "author_num": 23, "aff_unique_index": "0;1;2;3;2;4;2;0;3;5;2;0;2;6;7;7;8;9;10;11;0;0;2", "aff_unique_norm": "Shanghai Jiao Tong University;Chinese Academy of Sciences;University of Hong Kong;Nanjing University;University of Chinese Academy of Sciences;Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;Meta;Google;Salesforce;Yale University;TikTok;Microsoft", "aff_unique_dep": ";Institute of Automation;;;;;Meta Platforms, Inc.;Google;Salesforce Research;;;Microsoft Corporation", "aff_unique_url": "https://www.sjtu.edu.cn;http://www.ia.cas.cn;https://www.hku.hk;https://www.nju.edu.cn;http://www.ucas.ac.cn;https://www.lmu.de;https://meta.com;https://www.google.com;https://research.salesforce.com;https://www.yale.edu;https://www.tiktok.com;https://www.microsoft.com", "aff_unique_abbr": "SJTU;CAS;HKU;Nanjing U;UCAS;LMU;Meta;Google;Salesforce;Yale;TikTok;Microsoft", "aff_campus_unique_index": "1;1;1;1;1;2;2;1", "aff_campus_unique": ";Hong Kong SAR;Mountain View", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;1;0;0;0;2;2;2;2;2;0;2;0;0;0", "aff_country_unique": "China;Germany;United States" }, { "title": "The motion planning neural circuit in goal-directed navigation as Lie group operator search", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95206", "id": "Qz7BfmWizk", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Qz7BfmWizk", "openreview": "https://openreview.net/forum?id=Qz7BfmWizk", "poster": "", "project": "", "author_site": "Junfeng Zuo, Ying Nian Wu, Si Wu, Wenhao Zhang", "tldr": "", "abstract": "The information processing in the brain and embodied agents form a sensory-action loop to interact with the world. An important step in the loop is motion planning which selects motor actions based on the current world state and task need. In goal-directed navigation, the brain chooses and generates motor actions to bring the current state into the goal state. It is unclear about the neural circuit mechanism of motor action selection, nor its underlying theory. The present study formulates the motion planning as a Lie group operator search problem, and uses the 1D rotation group as an example to provide insight into general operator search in neural circuits. We found the abstract group operator search can be implemented by a two-layer feedforward circuit utilizing circuit motifs of connection phase shift, nonlinear activation function, and pooling, similar to Drosophila's goal-directed navigation neural circuits. And the computational complexity of the feedforward circuit can be even lower than common signal processing algorithms in certain conditions. We also provide geometric interpretations of circuit computation in the group representation space. The feedforward motion planning circuit is further combined with sensory and motor circuit modules into a full circuit of the sensory-action loop implementing goal-directed navigation. Our work for the first time links the abstract operator search with biological neural circuits.", "keywords": "Lie group equivariance;motion planning;goal-directed navigation;ring attractor network", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "/attachment/c2122b2f7eefa2c28a7ac8045de15ee6a01818d4.zip", "author": "Junfeng Zuo;Ying Nian Wu;Si Wu;Wenhao Zhang", "authorids": "~Junfeng_Zuo1;~Ying_Nian_Wu1;~Si_Wu1;~Wenhao_Zhang3", "gender": ";M;M;M", "homepage": "https://www.researchgate.net/profile/Junfeng-Zuo;https://mgv.pku.edu.cn/english/people/lbd/soeeace/267528.htm;https://www.zhang-cnl.org/;http://www.stat.ucla.edu/~ywu/", "dblp": "346/0879;25/437-1;57/7458-2;18/568.html", "google_scholar": ";;TqGPd9QAAAAJ;7k_1QFIAAAAJ", "orcid": ";;0000-0001-7641-5024;", "linkedin": ";;;", "or_profile": "~Junfeng_Zuo1;~Si_Wu1;~Wenhao_Zhang3;~Yingnian_Wu1", "aff": "University of Texas Southwestern Medical Center;Peking University;University of Texas Southwestern Medical Center;UCLA", "aff_domain": "utsouthwestern.edu;pku.edu.cn;utsouthwestern.edu;stat.ucla.edu", "position": "Researcher;Full Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nzuo2024the,\ntitle={The motion planning neural circuit in goal-directed navigation as Lie group operator search},\nauthor={Junfeng Zuo and Ying Nian Wu and Si Wu and Wenhao Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Qz7BfmWizk}\n}", "github": "", "reviewers": "9EEd;wau7;a5Tr", "pdf_size": 954625, "rating": "5;5;7", "confidence": "1;4;4", "soundness": "3;3;4", "novelty": "2;2;3", "presentation": "2;3;4", "wc_summary": "47;245;40", "wc_strengths": "62;139;124", "wc_weaknesses": "289;284;783", "wc_questions": "117;333;19", "wc_limitations": "40;26;17", "wc_review": "555;1027;983", "wc_reply_reviewers": "230;826;402", "wc_reply_authors": "47;832;803", "reply_reviewers": "1;2;1", "reply_authors": "3;5;2", "rating_avg": [ 5.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.0, 1.4142135623730951 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 110.66666666666667, 95.030989097709 ], "wc_strengths_avg": [ 108.33333333333333, 33.32999983331666 ], "wc_weaknesses_avg": [ 452.0, 234.0612455462601 ], "wc_questions_avg": [ 156.33333333333334, 131.172490340857 ], "wc_limitations_avg": [ 27.666666666666668, 9.463379711052259 ], "wc_review_avg": [ 855.0, 212.8912085236651 ], "wc_reply_reviewers_avg": [ 486.0, 250.46090846011612 ], "wc_reply_authors_avg": [ 560.6666666666666, 363.4100836367765 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 3.3333333333333335, 1.247219128924647 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.49999999999999994, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:yiqQjm4DugUJ:scholar.google.com/&scioq=The+motion+planning+neural+circuit+in+goal-directed+navigation+as+Lie+group+operator+search&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "utsouthwestern.edu;pku.edu.cn;utsouthwestern.edu;stat.ucla.edu", "author_num": 4, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "University of Texas Southwestern Medical Center;Peking University;University of California, Los Angeles", "aff_unique_dep": ";;", "aff_unique_url": "https://www.utsouthwestern.edu;http://www.pku.edu.cn;https://www.ucla.edu", "aff_unique_abbr": "UT Southwestern;Peking U;UCLA", "aff_campus_unique_index": "1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;China" }, { "title": "Large Language Models Must Be Taught to Know What They Don\u2019t Know", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95205", "id": "QzvWyggrYB", "proceeding": "", "pdf": "https://openreview.net/pdf?id=QzvWyggrYB", "openreview": "https://openreview.net/forum?id=QzvWyggrYB", "poster": "", "project": "", "author_site": "Sanyam Kapoor, Nate Gruver, Manley Roberts, Katie Collins, Arka Pal, Umang Bhatt, Adrian Weller, Samuel Dooley, Micah Goldblum, Andrew Wilson", "tldr": "", "abstract": "When using large language models (LLMs) in high-stakes applications, we need to know when we can trust their predictions. Some works argue that prompting high-performance LLMs is sufficient to produce calibrated uncertainties, while others introduce sampling methods that can be prohibitively expensive. In this work, we first argue that prompting on its own is insufficient to achieve good calibration and then show that fine-tuning on a small dataset of correct and incorrect answers can create an uncertainty estimate with good generalization and small computational overhead. We show that a thousand graded examples are sufficient to outperform baseline methods and that training through the features of a model is necessary for good performance and tractable for large open-source models when using LoRA. We also investigate the mechanisms that enable reliable LLM uncertainty estimation, finding that many models can be used as general-purpose uncertainty estimators, applicable not just to their own uncertainties but also the uncertainty of other models. Lastly, we show that uncertainty estimates inform human use of LLMs in human-AI collaborative settings through a user study.", "keywords": "large language models;calibration;uncertainty estimation;prompting", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Sanyam Kapoor;Nate Gruver;Manley Roberts;Katherine M. Collins;Arka Pal;Umang Bhatt;Adrian Weller;Samuel Dooley;Micah Goldblum;Andrew Gordon Wilson", "authorids": "~Sanyam_Kapoor1;~Nate_Gruver1;~Manley_Roberts1;~Katherine_M._Collins1;~Arka_Pal2;~Umang_Bhatt1;~Adrian_Weller1;~Samuel_Dooley1;~Micah_Goldblum1;~Andrew_Gordon_Wilson1", "gender": "M;M;;F;;M;M;;;Not Specified", "homepage": "https://sanyamkapoor.com;https://ngruver.github.io/;;https://collinskatie.github.io/;;https://umangsbhatt.github.io;http://mlg.eng.cam.ac.uk/adrian/;;;https://cims.nyu.edu/~andrewgw", "dblp": "223/5826;223/5568;;284/4959.html;182/2408;207/7955;73/8324;;241/7231;65/10453", "google_scholar": "https://scholar.google.com/citations?hl=en;R5QNdhcAAAAJ;;48ZphCEAAAAJ;qj_ViCAAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.co.uk/citations?user=Ek4hM10AAAAJ;;pGDKzuUAAAAJ;https://scholar.google.com.tw/citations?user=twWX2LIAAAAJ", "orcid": ";;;0000-0002-7032-716X;;;;;;", "linkedin": "sanyamkapoor/;;;katie-collins-474121175/;arkapal/;umangsbhatt/;;;;", "or_profile": "~Sanyam_Kapoor1;~Nate_Gruver1;~Manley_Roberts1;~Katherine_M._Collins1;~Arka_Pal2;~Umang_Bhatt1;~Adrian_Weller1;~Samuel_Dooley1;~Micah_Goldblum1;~Andrew_Gordon_Wilson1", "aff": "New York University;New York University;;University of Cambridge;Abacus AI;New York University;University of Cambridge;;New York University;New York University", "aff_domain": "nyu.edu;nyu.edu;;cam.ac.uk;abacus.ai;nyu.edu;cam.ac.uk;;nyu.edu;nyu.edu", "position": "PhD student;PhD student;;PhD student;Researcher;Assistant Professor;Principal Researcher;;Postdoc;Associate Professor", "bibtex": "@inproceedings{\nkapoor2024large,\ntitle={Large Language Models Must Be Taught to Know What They Don{\\textquoteright}t Know},\nauthor={Sanyam Kapoor and Nate Gruver and Manley Roberts and Katherine M. Collins and Arka Pal and Umang Bhatt and Adrian Weller and Samuel Dooley and Micah Goldblum and Andrew Gordon Wilson},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=QzvWyggrYB}\n}", "github": "", "reviewers": "XHRg;h75Z;AVYo;BBBX;oPif", "pdf_size": 2438530, "rating": "3;6;7;7;7", "confidence": "4;3;4;4;4", "soundness": "3;2;3;3;3", "novelty": "2;2;3;3;3", "presentation": "3;3;4;3;4", "wc_summary": "70;879;103;72;67", "wc_strengths": "28;49;96;47;75", "wc_weaknesses": "204;3;195;69;179", "wc_questions": "60;3;144;434;47", "wc_limitations": "2;3;47;6;6", "wc_review": "364;937;585;628;374", "wc_reply_reviewers": "0;0;11;93;0", "wc_reply_authors": "0;0;0;564;0", "reply_reviewers": "0;0;1;1;0", "reply_authors": "1;1;1;2;1", "rating_avg": [ 6.0, 1.5491933384829668 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 238.2, 320.6639362323116 ], "wc_strengths_avg": [ 59.0, 23.790754506740637 ], "wc_weaknesses_avg": [ 130.0, 79.93997748310916 ], "wc_questions_avg": [ 137.6, 155.07494962114288 ], "wc_limitations_avg": [ 12.8, 17.174399552822802 ], "wc_review_avg": [ 577.6, 209.20860402956663 ], "wc_reply_reviewers_avg": [ 20.8, 36.35051581477215 ], "wc_reply_authors_avg": [ 112.8, 225.6 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7797428216273551418&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "nyu.edu;nyu.edu;;cam.ac.uk;abacus.ai;nyu.edu;cam.ac.uk;;nyu.edu;nyu.edu", "author_num": 10, "aff_unique_index": "0;0;1;2;0;1;0;0", "aff_unique_norm": "New York University;University of Cambridge;Abacus AI", "aff_unique_dep": ";;", "aff_unique_url": "https://www.nyu.edu;https://www.cam.ac.uk;", "aff_unique_abbr": "NYU;Cambridge;", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;0;1;0;0;1;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "CoSy: Evaluating Textual Explanations of Neurons", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95204", "id": "R0bnWrpIeN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=R0bnWrpIeN", "openreview": "https://openreview.net/forum?id=R0bnWrpIeN", "poster": "/media/PosterPDFs/NeurIPS%202024/95204.png?t=1731581151.8205118", "project": "", "author_site": "Laura Kopf, Philine L Bommer, Anna Hedstr\u00f6m, Sebastian Lapuschkin, Marina H\u00f6hne, Kirill Bykov", "tldr": "", "abstract": "A crucial aspect of understanding the complex nature of Deep Neural Networks (DNNs) is the ability to explain learned concepts within their latent representations. While methods exist to connect neurons to human-understandable textual descriptions, evaluating the quality of these explanations is challenging due to the lack of a unified quantitative approach. We introduce CoSy (Concept Synthesis), a novel, architecture-agnostic framework for evaluating textual explanations of latent neurons. Given textual explanations, our proposed framework uses a generative model conditioned on textual input to create data points representing the explanations. By comparing the neuron's response to these generated data points and control data points, we can estimate the quality of the explanation. We validate our framework through sanity checks and benchmark various neuron description methods for Computer Vision tasks, revealing significant differences in quality.", "keywords": "Explainable AI;Evaluation of Explainability Methods;Mechanistic Interpretability", "primary_area": "interpretability_and_explainability", "supplementary_material": "/attachment/307878e55a00ad22a63615aab51b540de2a4e84e.zip", "author": "Laura Kopf;Philine Lou Bommer;Anna Hedstr\u00f6m;Sebastian Lapuschkin;Marina MC H\u00f6hne;Kirill Bykov", "authorids": "~Laura_Kopf1;~Philine_Lou_Bommer1;~Anna_Hedstr\u00f6m1;~Sebastian_Lapuschkin1;~Marina_MC_H\u00f6hne1;~Kirill_Bykov1", "gender": "F;F;F;M;M;F", "homepage": ";;;http://iphome.hhi.de/lapuschkin/;https://www.linkedin.com/in/bykovkirill/;", "dblp": "359/1479;;;184/7883;;167/3225", "google_scholar": "https://scholar.google.de/citations?user=-sCedSYAAAAJ;;ldOYtBUAAAAJ;https://scholar.google.de/citations?user=wpLQuroAAAAJ;tI39EK8AAAAJ;https://scholar.google.de/citations?user=araOPxQAAAAJ", "orcid": "0009-0005-4280-3818;;;0000-0002-0762-7258;;", "linkedin": "laura-kopf-943577242/;http://linkedin.com/in/philine-lou-bommer;;sebastian-lapuschkin/;;marina-marie-claire-h\u00f6hne-8949bb77/", "or_profile": "~Laura_Kopf1;~Philine_Lou_Bommer1;~Anna_Hedstr\u00f6m1;~Sebastian_Lapuschkin1;~Kirill_Bykov1;~Marina_H\u00f6hne1", "aff": "Universit\u00e4t Potsdam;TU Berlin;TU Berlin;Fraunhofer HHI;TU Berlin;Universit\u00e4t Potsdam", "aff_domain": "uni-potsdam.de;tu-berlin.de;tu-berlin.de;hhi.fraunhofer.de;tu-berlin.de;uni-potsdam.de", "position": "MS student;PhD student;PhD student;Head of Explainable Artificial Intelligence;PhD student;Full Professor", "bibtex": "@inproceedings{\nkopf2024cosy,\ntitle={CoSy: Evaluating Textual Explanations of Neurons},\nauthor={Laura Kopf and Philine Lou Bommer and Anna Hedstr{\\\"o}m and Sebastian Lapuschkin and Marina MC H{\\\"o}hne and Kirill Bykov},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=R0bnWrpIeN}\n}", "github": "", "reviewers": "yev1;9Kbo;hkvS;1Sdz", "pdf_size": 6002570, "rating": "6;7;7;7", "confidence": "4;4;4;4", "soundness": "3;4;3;3", "novelty": "1;4;3;3", "presentation": "3;4;3;4", "wc_summary": "139;156;81;171", "wc_strengths": "65;145;114;110", "wc_weaknesses": "235;521;159;514", "wc_questions": "179;79;4;146", "wc_limitations": "9;10;7;7", "wc_review": "627;911;365;948", "wc_reply_reviewers": "22;116;0;540", "wc_reply_authors": "27;76;0;87", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;1;2", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 1.0897247358851685 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 136.75, 34.120191968979306 ], "wc_strengths_avg": [ 108.5, 28.53506614676055 ], "wc_weaknesses_avg": [ 357.25, 162.5059614291119 ], "wc_questions_avg": [ 102.0, 67.07831244150377 ], "wc_limitations_avg": [ 8.25, 1.299038105676658 ], "wc_review_avg": [ 712.75, 236.0766559827549 ], "wc_reply_reviewers_avg": [ 169.5, 218.2996793401218 ], "wc_reply_authors_avg": [ 47.5, 35.52815784698103 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11078607326169427475&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "uni-potsdam.de;tu-berlin.de;tu-berlin.de;hhi.fraunhofer.de;tu-berlin.de;uni-potsdam.de", "author_num": 6, "aff_unique_index": "0;1;1;2;1;0", "aff_unique_norm": "University of Potsdam;Technische Universit\u00e4t Berlin;Fraunhofer Heinrich Hertz Institute", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uni-potsdam.de;https://www.tu-berlin.de;https://www.hhi.fraunhofer.de/", "aff_unique_abbr": "UP;TU Berlin;HHI", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Berlin", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "Germany" }, { "title": "EZ-HOI: VLM Adaptation via Guided Prompt Learning for Zero-Shot HOI Detection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95203", "id": "R1Rrb2d5BH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=R1Rrb2d5BH", "openreview": "https://openreview.net/forum?id=R1Rrb2d5BH", "poster": "/media/PosterPDFs/NeurIPS%202024/95203.png?t=1730357524.531229", "project": "", "author_site": "Qinqian Lei, Bo Wang, Robby Tan", "tldr": "", "abstract": "Detecting Human-Object Interactions (HOI) in zero-shot settings, where models must handle unseen classes, poses significant challenges. Existing methods that rely on aligning visual encoders with large Vision-Language Models (VLMs) to tap into the extensive knowledge of VLMs, require large, computationally expensive models and encounter training difficulties. Adapting VLMs with prompt learning offers an alternative to direct alignment. However, fine-tuning on task-specific datasets often leads to overfitting to seen classes and suboptimal performance on unseen classes, due to the absence of unseen class labels. To address these challenges, we introduce a novel prompt learning-based framework for Efficient Zero-Shot HOI detection (EZ-HOI). First, we introduce Large Language Model (LLM) and VLM guidance for learnable prompts, integrating detailed HOI descriptions and visual semantics to adapt VLMs to HOI tasks. However, because training datasets contain seen-class labels alone, fine-tuning VLMs on such datasets tends to optimize learnable prompts for seen classes instead of unseen ones. Therefore, we design prompt learning for unseen classes using information from related seen classes, with LLMs utilized to highlight the differences between unseen and related seen classes. Quantitative evaluations on benchmark datasets demonstrate that our EZ-HOI achieves state-of-the-art performance across various zero-shot settings with only 10.35\\% to 33.95\\% of the trainable parameters compared to existing methods. Code is available at https://github.com/ChelsieLei/EZ-HOI.", "keywords": "Zero-shot HOI detection;Human-Object Interaction;Prompt Learning", "primary_area": "machine_vision", "supplementary_material": "", "author": "Qinqian Lei;Bo Wang;Robby T. Tan", "authorids": "~Qinqian_Lei1;~Bo_Wang12;~Robby_T._Tan1", "gender": "F;;", "homepage": "https://chelsielei.github.io/;;https://tanrobby.github.io/", "dblp": "365/4727;72/6811-19;t/RobbyTTan", "google_scholar": "6ZDmdlAAAAAJ;https://scholar.google.com/citations?hl=en;MOD0gv4AAAAJ", "orcid": ";;0000-0001-7532-6919", "linkedin": "qin-q-lei/;;robby-t-tan-875a5a31/?originalSubdomain=sg", "or_profile": "~Qinqian_Lei1;~Bo_Wang12;~Robby_Tan4", "aff": "National University of Singapore;CtrsVision;National University of Singapore", "aff_domain": "u.nus.edu;ctrsvision.com;nus.edu.sg", "position": "PhD student;Principal Researcher;Associate Professor", "bibtex": "@inproceedings{\nlei2024ezhoi,\ntitle={{EZ}-{HOI}: {VLM} Adaptation via Guided Prompt Learning for Zero-Shot {HOI} Detection},\nauthor={Qinqian Lei and Bo Wang and Robby T. Tan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=R1Rrb2d5BH}\n}", "github": "", "reviewers": "RCVD;kLJ1;UfLd;1u4N;kbXP;f78r", "pdf_size": 2190136, "rating": "4;5;5;6;6;7", "confidence": "5;3;3;5;3;4", "soundness": "2;3;3;3;3;3", "novelty": "1;2;2;3;3;4", "presentation": "2;3;3;3;2;3", "wc_summary": "63;76;81;32;179;48", "wc_strengths": "52;24;43;33;59;20", "wc_weaknesses": "268;65;47;261;140;210", "wc_questions": "10;1;32;4;154;210", "wc_limitations": "4;1;1;1;35;1", "wc_review": "397;167;204;331;567;489", "wc_reply_reviewers": "0;18;15;22;73;0", "wc_reply_authors": "41;183;29;36;55;0", "reply_reviewers": "0;1;1;1;1;0", "reply_authors": "2;2;2;2;2;1", "rating_avg": [ 5.5, 0.9574271077563381 ], "confidence_avg": [ 3.8333333333333335, 0.8975274678557507 ], "soundness_avg": [ 2.8333333333333335, 0.3726779962499649 ], "novelty_avg": [ 2.5, 0.9574271077563381 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 79.83333333333333, 47.31954024384524 ], "wc_strengths_avg": [ 38.5, 14.174507634012077 ], "wc_weaknesses_avg": [ 165.16666666666666, 87.93826748855636 ], "wc_questions_avg": [ 68.5, 82.46564941436728 ], "wc_limitations_avg": [ 7.166666666666667, 12.495554765151041 ], "wc_review_avg": [ 359.1666666666667, 143.34156953080833 ], "wc_reply_reviewers_avg": [ 21.333333333333332, 24.601264646806722 ], "wc_reply_authors_avg": [ 57.333333333333336, 58.60792513721065 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.8333333333333333, 0.3726779962499649 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.09697622757528539, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5482100252299968413&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "u.nus.edu;ctrsvision.com;nus.edu.sg", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "National University of Singapore;CtrsVision", "aff_unique_dep": ";", "aff_unique_url": "https://www.nus.edu.sg;", "aff_unique_abbr": "NUS;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Singapore;" }, { "title": "The Reliability of OKRidge Method in Solving Sparse Ridge Regression Problems", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95202", "id": "R3ruv1gF8R", "proceeding": "", "pdf": "https://openreview.net/pdf?id=R3ruv1gF8R", "openreview": "https://openreview.net/forum?id=R3ruv1gF8R", "poster": "/media/PosterPDFs/NeurIPS%202024/95202.png?t=1731671024.4029727", "project": "", "author_site": "Xiyuan Li, Youjun Wang, Weiwei Liu", "tldr": "", "abstract": "Sparse ridge regression problems play a significant role across various domains. To solve sparse ridge regression, Liu et al. (2023) recently propose an advanced algorithm, Scalable Optimal $K$-Sparse Ridge Regression (OKRidge), which is both faster and more accurate than existing approaches. However, the absence of theoretical analysis on the error of OKRidge impedes its large-scale applications. In this paper, we reframe the estimation error of OKRidge as a Primary Optimization ($\\textbf{PO}$) problem and employ the Convex Gaussian min-max theorem (CGMT) to simplify the $\\textbf{PO}$ problem into an Auxiliary Optimization ($\\textbf{AO}$) problem. Subsequently, we provide a theoretical error analysis for OKRidge based on the $\\textbf{AO}$ problem. This error analysis improves the theoretical reliability of OKRidge. We also conduct experiments to verify our theorems and the results are in excellent agreement with our theoretical findings.", "keywords": "regression", "primary_area": "learning_theory", "supplementary_material": "", "author": "Xiyuan Li;Youjun Wang;Weiwei Liu", "authorids": "~Xiyuan_Li3;~Youjun_Wang1;~Weiwei_Liu1", "gender": "M;M;M", "homepage": "https://github.com/cuola;https://sites.google.com/site/weiweiliuhomepage/;https://github.com/LXYambition", "dblp": "02/8310;54/6677-3.html;205/4037", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;", "orcid": ";;", "linkedin": ";weiwei-liu-4a7849134/;", "or_profile": "~Youjun_Wang1;~Weiwei_Liu1;~Xiyuan_Li1", "aff": "Wuhan University;Wuhan University;Wuhan University", "aff_domain": "whu.edu.cn;whu.edu.cn;whu.edu.cn", "position": "MS student;Full Professor;PhD student", "bibtex": "@inproceedings{\nli2024the,\ntitle={The Reliability of {OKR}idge Method in Solving Sparse Ridge Regression Problems},\nauthor={Xiyuan Li and Youjun Wang and Weiwei Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=R3ruv1gF8R}\n}", "github": "", "reviewers": "mBVh;553k;8imU;uvCX;mn2F", "pdf_size": 560147, "rating": "3;4;8;8;8", "confidence": "2;2;5;4;4", "soundness": "2;2;3;3;3", "novelty": "3;2;4;3;4", "presentation": "1;3;3;3;3", "wc_summary": "96;62;66;37;61", "wc_strengths": "17;42;107;47;101", "wc_weaknesses": "266;108;76;123;91", "wc_questions": "37;57;23;3;34", "wc_limitations": "6;22;8;14;9", "wc_review": "422;291;280;224;296", "wc_reply_reviewers": "492;0;12;8;16", "wc_reply_authors": "1387;446;7;12;9", "reply_reviewers": "3;0;1;1;1", "reply_authors": "5;3;2;2;2", "rating_avg": [ 6.2, 2.2271057451320084 ], "confidence_avg": [ 3.4, 1.2000000000000002 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 3.2, 0.7483314773547882 ], "presentation_avg": [ 2.6, 0.8000000000000002 ], "wc_summary_avg": [ 64.4, 18.810635289643994 ], "wc_strengths_avg": [ 62.8, 35.19318115771861 ], "wc_weaknesses_avg": [ 132.8, 68.44968955371529 ], "wc_questions_avg": [ 30.8, 17.71327186038762 ], "wc_limitations_avg": [ 11.8, 5.74108003776293 ], "wc_review_avg": [ 302.6, 64.99723071023872 ], "wc_reply_reviewers_avg": [ 105.6, 193.27245018367205 ], "wc_reply_authors_avg": [ 372.2, 534.8447999186305 ], "reply_reviewers_avg": [ 1.2, 0.9797958971132712 ], "reply_authors_avg": [ 2.8, 1.16619037896906 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.9429278356405681, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:EVluLvQPhAkJ:scholar.google.com/&scioq=The+Reliability+of+OKRidge+Method+in+Solving+Sparse+Ridge+Regression+Problems&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "whu.edu.cn;whu.edu.cn;whu.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Wuhan University", "aff_unique_dep": "", "aff_unique_url": "http://www.whu.edu.cn/", "aff_unique_abbr": "WHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Localizing Memorization in SSL Vision Encoders", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95201", "id": "R46HGlIjcG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=R46HGlIjcG", "openreview": "https://openreview.net/forum?id=R46HGlIjcG", "poster": "", "project": "", "author_site": "Wenhao Wang, Adam Dziedzic, Michael Backes, Franziska Boenisch", "tldr": "", "abstract": "Recent work on studying memorization in self-supervised learning (SSL) suggests that even though SSL encoders are trained on millions of images, they still memorize individual data points. While effort has been put into characterizing the memorized data and linking encoder memorization to downstream utility, little is known about where the memorization happens inside SSL encoders. To close this gap, we propose two metrics for localizing memorization in SSL encoders on a per-layer (LayerMem) and per-unit basis (UnitMem). Our localization methods are independent of the downstream task, do not require any label information, and can be performed in a forward pass. By localizing memorization in various encoder architectures (convolutional and transformer-based) trained on diverse datasets with contrastive and non-contrastive SSL frameworks, we find that (1) while SSL memorization increases with layer depth, highly memorizing units are distributed across the entire encoder, (2) a significant fraction of units in SSL encoders experiences surprisingly high memorization of individual data points, which is in contrast to models trained under supervision, (3) atypical (or outlier) data points cause much higher layer and unit memorization than standard data points, and (4) in vision transformers, most memorization happens in the fully-connected layers. Finally, we show that localizing memorization in SSL has the potential to improve fine-tuning and to inform pruning strategies.", "keywords": "self-supervised learning;memorization;localization", "primary_area": "evaluation", "supplementary_material": "/attachment/b677f4b0c4f648b9cae73d08201ba6573efadac9.zip", "author": "Wenhao Wang;Adam Dziedzic;Michael Backes;Franziska Boenisch", "authorids": "~Wenhao_Wang6;~Adam_Dziedzic1;~Michael_Backes3;~Franziska_Boenisch2", "gender": "M;;;", "homepage": "https://wenhaowang1995.github.io/;;;", "dblp": ";;;", "google_scholar": ";;;", "orcid": "0000-0001-7566-2322;;;", "linkedin": ";;;", "or_profile": "~Wenhao_Wang6;~Adam_Dziedzic1;~Michael_Backes3;~Franziska_Boenisch2", "aff": "CISPA, saarland university, saarland informatics campus;;;", "aff_domain": "cispa.saarland;;;", "position": "PhD student;;;", "bibtex": "@inproceedings{\nwang2024localizing,\ntitle={Localizing Memorization in {SSL} Vision Encoders},\nauthor={Wenhao Wang and Adam Dziedzic and Michael Backes and Franziska Boenisch},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=R46HGlIjcG}\n}", "github": "", "reviewers": "ne6k;GSFs;2UD6;BVAM", "pdf_size": 1236884, "rating": "5;6;7;7", "confidence": "4;3;4;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "56;73;141;88", "wc_strengths": "21;73;29;66", "wc_weaknesses": "139;337;327;137", "wc_questions": "69;87;44;64", "wc_limitations": "1;10;73;9", "wc_review": "286;580;614;364", "wc_reply_reviewers": "11;24;29;111", "wc_reply_authors": "131;154;190;757", "reply_reviewers": "1;1;1;1", "reply_authors": "3;3;2;3", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 89.5, 31.815876539866068 ], "wc_strengths_avg": [ 47.25, 22.56518335843961 ], "wc_weaknesses_avg": [ 235.0, 97.06698717895802 ], "wc_questions_avg": [ 66.0, 15.313392831113555 ], "wc_limitations_avg": [ 23.25, 28.93419257556706 ], "wc_review_avg": [ 461.0, 139.2874725163753 ], "wc_reply_reviewers_avg": [ 43.75, 39.37876966082104 ], "wc_reply_authors_avg": [ 308.0, 260.08171792726995 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6314688346338273445&as_sdt=4000005&sciodt=0,18&hl=en", "gs_version_total": 4, "email": "cispa.saarland;;;", "author_num": 4, "aff_unique_index": "0", "aff_unique_norm": "Saarland University", "aff_unique_dep": "CISPA", "aff_unique_url": "https://www.uni-saarland.de", "aff_unique_abbr": "Saarland U", "aff_campus_unique_index": "0", "aff_campus_unique": "Saarland Informatics Campus", "aff_country_unique_index": "0", "aff_country_unique": "Germany" }, { "title": "Virtual Scanning: Unsupervised Non-line-of-sight Imaging from Irregularly Undersampled Transients", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95200", "id": "R4IBZrSF5d", "proceeding": "", "pdf": "https://openreview.net/pdf?id=R4IBZrSF5d", "openreview": "https://openreview.net/forum?id=R4IBZrSF5d", "poster": "", "project": "", "author_site": "Xingyu Cui, Huanjing Yue, Song Li, Xiangjun Yin, Yusen Hou, Yun Meng, Kai Zou, Xiaolong Hu, Jingyu Yang", "tldr": "", "abstract": "Non-line-of-sight (NLOS) imaging allows for seeing hidden scenes around corners through active sensing.\nMost previous algorithms for NLOS reconstruction require dense transients acquired through regular scans over a large relay surface, which limits their applicability in realistic scenarios with irregular relay surfaces.\nIn this paper, we propose an unsupervised learning-based framework for NLOS imaging from irregularly undersampled transients~(IUT).\nOur method learns implicit priors from noisy irregularly undersampled transients without requiring paired data, which is difficult and expensive to acquire and align. \nTo overcome the ambiguity of the measurement consistency constraint in inferring the albedo volume, we design a virtual scanning process that enables the network to learn within both range and null spaces for high-quality reconstruction.\nWe devise a physics-guided SURE-based denoiser to enhance robustness to ubiquitous noise in low-photon imaging conditions. \nExtensive experiments on both simulated and real-world data validate the performance and generalization of our method.\nCompared with the state-of-the-art (SOTA) method, our method achieves higher fidelity, greater robustness, and remarkably faster inference times by orders of magnitude.\nThe code and model are available at https://github.com/XingyuCuii/Virtual-Scanning-NLOS.", "keywords": "Non-line-of-sight imaging;Machine Vision;Computational Imaging", "primary_area": "machine_vision", "supplementary_material": "", "author": "Xingyu Cui;Huanjing Yue;Song Li;Xiangjun Yin;Yusen Hou;Yun Meng;Kai Zou;Xiaolong Hu;Jingyu Yang", "authorids": "~Xingyu_Cui2;~Huanjing_Yue2;~Song_Li8;~Xiangjun_Yin1;~Yusen_Hou1;~Yun_Meng1;~Kai_Zou5;~Xiaolong_Hu2;~Jingyu_Yang2", "gender": "M;F;M;M;M;;M;M;", "homepage": ";https://sites.google.com/site/huanjingyue/;https://nanophotonics.tju.edu.cn/index/PEOPLE.htm;;;https://scholar.google.com/citations?user=4innWvwAAAAJ&hl=zh-CN;https://nanophotonics.tju.edu.cn/index/PEOPLE.htm;https://nanophotonics.tju.edu.cn/index/PEOPLE.htm;", "dblp": "324/0257;119/0275;;;;;;;", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;;;;;4innWvwAAAAJ;;;", "orcid": ";0000-0003-2517-9783;;0000-0002-4829-9019;0009-0005-9417-9121;;;;", "linkedin": ";;;%E7%A5%A5%E5%86%9B-%E6%AE%B7-74b510198/?locale=en_US;;;;;", "or_profile": "~Xingyu_Cui2;~Huanjing_Yue2;~Song_Li8;~Xiangjun_Yin1;~Yusen_Hou1;~Yun_Meng1;~Kai_Zou5;~Xiaolong_Hu2;~Jingyu_Yang2", "aff": "Tianjin University;Tianjin University;Tianjin University;;Tianjin University;Tianjin University;Tianjin University;Tianjin University;", "aff_domain": "tju.edu.cn;tju.edu.cn;tju.edu.cn;;tju.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu;", "position": "PhD student;Associate Professor;MS student;;Undergrad student;PhD student;PhD student;Full Professor;", "bibtex": "@inproceedings{\ncui2024virtual,\ntitle={Virtual Scanning: Unsupervised Non-line-of-sight Imaging from Irregularly Undersampled Transients},\nauthor={Xingyu Cui and Huanjing Yue and Song Li and Xiangjun Yin and Yusen Hou and Yun Meng and Kai Zou and Xiaolong Hu and Jingyu Yang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=R4IBZrSF5d}\n}", "github": "", "reviewers": "cypv;WCZG;wGeV;8wFw", "pdf_size": 4331332, "rating": "4;5;6;8", "confidence": "4;2;3;4", "soundness": "2;3;3;4", "novelty": "2;3;3;4", "presentation": "1;3;3;3", "wc_summary": "87;122;35;81", "wc_strengths": "43;61;40;130", "wc_weaknesses": "307;187;42;165", "wc_questions": "13;56;49;69", "wc_limitations": "16;23;13;1", "wc_review": "466;449;179;446", "wc_reply_reviewers": "379;98;11;70", "wc_reply_authors": "792;312;25;68", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 5.75, 1.479019945774904 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 81.25, 30.95460385790779 ], "wc_strengths_avg": [ 68.5, 36.40398329853479 ], "wc_weaknesses_avg": [ 175.25, 94.01695325844165 ], "wc_questions_avg": [ 46.75, 20.765054779605084 ], "wc_limitations_avg": [ 13.25, 7.949056547792323 ], "wc_review_avg": [ 385.0, 119.17843764708446 ], "wc_reply_reviewers_avg": [ 139.5, 141.79650912487233 ], "wc_reply_authors_avg": [ 299.25, 304.81746587097007 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.25482359571881275, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16716526237859892370&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 2, "email": "tju.edu.cn;tju.edu.cn;tju.edu.cn;;tju.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu;", "author_num": 9, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "Tianjin University", "aff_unique_dep": "", "aff_unique_url": "http://www.tju.edu.cn", "aff_unique_abbr": "TJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "OpenSatMap: A Fine-grained High-resolution Satellite Dataset for Large-scale Map Construction", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97691", "id": "R4rNYJ2slJ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=R4rNYJ2slJ", "openreview": "https://openreview.net/forum?id=R4rNYJ2slJ", "poster": "/media/PosterPDFs/NeurIPS%202024/97691.png?t=1730537822.0762672", "project": "", "author_site": "Hongbo Zhao, Lue Fan, Yuntao Chen, Haochen Wang, yuran Yang, Xiaojuan Jin, YIXIN ZHANG, GAOFENG MENG, ZHAO-XIANG ZHANG", "tldr": "", "abstract": "In this paper, we propose OpenSatMap, a fine-grained, high-resolution satellite dataset for large-scale map construction. Map construction is one of the foundations of the transportation industry, such as navigation and autonomous driving. Extracting road structures from satellite images is an efficient way to construct large-scale maps. However, existing satellite datasets provide only coarse semantic-level labels with a relatively low resolution (up to level 19), impeding the advancement of this field. In contrast, the proposed OpenSatMap (1) has fine-grained instance-level annotations; (2) consists of high-resolution images (level 20); (3) is currently the largest one of its kind; (4) collects data with high diversity. Moreover, OpenSatMap covers and aligns with the popular nuScenes dataset and Argoverse 2 dataset to potentially advance autonomous driving technologies. By publishing and maintaining the dataset, we provide a high-quality benchmark for satellite-based map construction and downstream tasks like autonomous driving.", "keywords": "Satellite Dataset;Road Network Extraction", "primary_area": "", "supplementary_material": "", "author": "Hongbo Zhao;Lue Fan;Yuntao Chen;Haochen Wang;yuran Yang;Xiaojuan Jin;YIXIN ZHANG;Gaofeng Meng;Zhaoxiang Zhang", "authorids": "~Hongbo_Zhao1;~Lue_Fan1;~Yuntao_Chen1;~Haochen_Wang5;~yuran_Yang1;~Xiaojuan_Jin1;~YIXIN_ZHANG6;~Gaofeng_Meng1;~Zhaoxiang_Zhang3", "gender": "M;;M;M;M;F;F;M;M", "homepage": "https://bjzhb666.github.io/;;;https://haochen-wang409.github.io/;https://ieeexplore.ieee.org/author/37090059964;http://cripac.ia.ac.cn/en/EN/column/column42.shtml;https://github.com/yixinhuahua68;http://people.ucas.edu.cn/~gfmeng;http://zhaoxiangzhang.net", "dblp": "07/8330-6;287/9792;203/8284;;327/3762;391/7628;;78/6915;55/2285-1.html", "google_scholar": "Gs22F0UAAAAJ;https://scholar.google.com/citations?hl=en;iLOoUqIAAAAJ;oNlpTdcAAAAJ;;;;5hti_r0AAAAJ;qxWfV6cAAAAJ", "orcid": "0000-0002-4859-275X;;;0000-0002-2333-1844;;;;0000-0002-7103-6321;", "linkedin": ";;;;;;;;", "or_profile": "~Hongbo_Zhao1;~Lue_Fan1;~Yuntao_Chen1;~Haochen_Wang5;~yuran_Yang1;~Xiaojuan_Jin1;~YIXIN_ZHANG6;~Gaofeng_Meng1;~Zhaoxiang_Zhang3", "aff": "Institute of automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;Centre for Artificial Intelligence and Robotics (CAIR), Hong Kong Institute of Science & Innovation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;Tencent;Institute of Automation, Chinese Academy of Sciences;Tencent;Institute of automation;Institute of Automation, Chinese Academy of Sciences", "aff_domain": "ia.ac.cn;ia.ac.cn;cair-cas.org.hk;ia.ac.cn;tencent.com;ia.ac.cn;tencent.com;ia.ac.cn;ia.ac.cn", "position": "PhD student;PhD student;Assistant Professor;PhD student;Researcher;Engineer;Researcher;Full Professor;Full Professor", "bibtex": "@inproceedings{\nzhao2024opensatmap,\ntitle={OpenSatMap: A Fine-grained High-resolution Satellite Dataset for Large-scale Map Construction},\nauthor={Hongbo Zhao and Lue Fan and Yuntao Chen and Haochen Wang and yuran Yang and Xiaojuan Jin and YIXIN ZHANG and Gaofeng Meng and Zhaoxiang Zhang},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=R4rNYJ2slJ}\n}", "github": "", "reviewers": "kEvY;jdjh;gmZm", "pdf_size": 27811106, "rating": "6;6;7", "confidence": "5;4;4", "wc_summary_and_contributions": "73;66;85", "wc_strengths": "6;58;88", "wc_improvement": "58;89;90", "wc_limitations": "4;1;1", "wc_correctness": "1;1;1", "wc_clarity": "1;1;1", "wc_relation_to_prior_work": "1;1;20", "wc_documentation": "1;1;9", "wc_additional_feedback": "1;1;1", "wc_review": "146;219;296", "wc_reply_reviewers": "0;23;31", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 74.66666666666667, 7.845734863959881 ], "wc_strengths_avg": [ 50.666666666666664, 33.87558937576667 ], "wc_improvement_avg": [ 79.0, 14.854853303438128 ], "wc_limitations_avg": [ 2.0, 1.4142135623730951 ], "wc_correctness_avg": [ 1.0, 0.0 ], "wc_clarity_avg": [ 1.0, 0.0 ], "wc_relation_to_prior_work_avg": [ 7.333333333333333, 8.956685895029603 ], "wc_documentation_avg": [ 3.6666666666666665, 3.7712361663282534 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 220.33333333333334, 61.24450088692771 ], "wc_reply_reviewers_avg": [ 18.0, 13.140268896284683 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15424198821945067766&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 5, "email": "ia.ac.cn;ia.ac.cn;cair-cas.org.hk;ia.ac.cn;tencent.com;ia.ac.cn;tencent.com;ia.ac.cn;ia.ac.cn", "author_num": 9, "aff_unique_index": "0;0;0;0;1;0;1;2;0", "aff_unique_norm": "Chinese Academy of Sciences;Tencent;Institute of Automation", "aff_unique_dep": "Institute of Automation;Tencent Holdings Limited;", "aff_unique_url": "http://www.ia.cas.cn;https://www.tencent.com;", "aff_unique_abbr": "CAS;Tencent;", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China;" }, { "title": "Understanding Model Selection for Learning in Strategic Environments", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95199", "id": "R6FOuWv5MD", "proceeding": "", "pdf": "https://openreview.net/pdf?id=R6FOuWv5MD", "openreview": "https://openreview.net/forum?id=R6FOuWv5MD", "poster": "", "project": "", "author_site": "Tinashe Handina, Eric Mazumdar", "tldr": "", "abstract": "The deployment of ever-larger machine learning models reflects a growing consensus that the more expressive the model class one optimizes over\u2014and the more data one has access to\u2014the more one can improve performance. As models get deployed in a variety of real-world scenarios, they inevitably face strategic environments. In this work, we consider the natural question of how the interplay of models and strategic interactions affects the relationship between performance at equilibrium and the expressivity of model classes. We find that strategic interactions can break the conventional view\u2014meaning that performance does not necessarily monotonically improve as model classes get larger or more expressive (even with infinite data). We show the implications of this result in several contexts including strategic regression, strategic classification, and multi-agent reinforcement learning. In particular, we show that each of these settings admits a Braess' paradox-like phenomenon in which optimizing over less expressive model classes allows one to achieve strictly better equilibrium outcomes. Motivated by these examples, we then propose a new paradigm for model selection in games wherein an agent seeks to choose amongst different model classes to use as their action set in a game.", "keywords": "Machine Learning;Game Theory;Nash Equilibrium;Stackelberg games;Model Selection", "primary_area": "learning_theory", "supplementary_material": "", "author": "Tinashe Handina;Eric Mazumdar", "authorids": "~Tinashe_Handina1;~Eric_Mazumdar1", "gender": ";M", "homepage": ";http://people.eecs.berkeley.edu/~emazumdar/", "dblp": ";177/9322", "google_scholar": ";FZOxxvcAAAAJ", "orcid": ";", "linkedin": "tinashe-handina02250/;", "or_profile": "~Tinashe_Handina1;~Eric_Mazumdar1", "aff": "California Institute of Technology;Deparment of Computing + Mathematical Sciences, California Institute of Technology", "aff_domain": "caltech.edu;cms.caltech.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nhandina2024understanding,\ntitle={Understanding Model Selection for Learning in Strategic Environments},\nauthor={Tinashe Handina and Eric Mazumdar},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=R6FOuWv5MD}\n}", "github": "", "reviewers": "8iV5;RwJQ;QRet;yWQz", "pdf_size": 578175, "rating": "5;6;6;7", "confidence": "3;3;3;2", "soundness": "4;3;3;4", "novelty": "2;3;3;3", "presentation": "3;3;2;4", "wc_summary": "85;128;216;116", "wc_strengths": "31;7;81;145", "wc_weaknesses": "34;6;143;131", "wc_questions": "277;17;53;3", "wc_limitations": "2;3;15;8", "wc_review": "429;161;508;403", "wc_reply_reviewers": "38;82;14;20", "wc_reply_authors": "7;205;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 136.25, 48.64347335460329 ], "wc_strengths_avg": [ 66.0, 52.848841046895245 ], "wc_weaknesses_avg": [ 78.5, 59.48319090297695 ], "wc_questions_avg": [ 87.5, 110.91776232867304 ], "wc_limitations_avg": [ 7.0, 5.1478150704935 ], "wc_review_avg": [ 375.25, 129.6001060956356 ], "wc_reply_reviewers_avg": [ 38.5, 26.622359023948274 ], "wc_reply_authors_avg": [ 53.0, 87.80375846169684 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:rToTn2j8Cx8J:scholar.google.com/&scioq=Understanding+Model+Selection+for+Learning+in+Strategic+Environments&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "caltech.edu;cms.caltech.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "California Institute of Technology;", "aff_unique_dep": ";", "aff_unique_url": "https://www.caltech.edu;", "aff_unique_abbr": "Caltech;", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Pasadena;", "aff_country_unique_index": "0;0", "aff_country_unique": "United States;" }, { "title": "Parallelizing Model-based Reinforcement Learning Over the Sequence Length", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95198", "id": "R6N9AGyz13", "proceeding": "", "pdf": "https://openreview.net/pdf?id=R6N9AGyz13", "openreview": "https://openreview.net/forum?id=R6N9AGyz13", "poster": "/media/PosterPDFs/NeurIPS%202024/95198.png?t=1731944059.5689883", "project": "", "author_site": "Zirui Wang, Yue DENG, Junfeng Long, Yin Zhang", "tldr": "", "abstract": "Recently, Model-based Reinforcement Learning (MBRL) methods have demonstrated stunning sample efficiency in various RL domains.\nHowever, achieving this extraordinary sample efficiency comes with additional training costs in terms of computations, memory, and training time.\nTo address these challenges, we propose the **Pa**rallelized **Mo**del-based **R**einforcement **L**earning (**PaMoRL**) framework.\nPaMoRL introduces two novel techniques: the **P**arallel **W**orld **M**odel (**PWM**) and the **P**arallelized **E**ligibility **T**race **E**stimation (**PETE**) to parallelize both model learning and policy learning stages of current MBRL methods over the sequence length.\nOur PaMoRL framework is hardware-efficient and stable, and it can be applied to various tasks with discrete or continuous action spaces using a single set of hyperparameters.\nThe empirical results demonstrate that the PWM and PETE within PaMoRL significantly increase training speed without sacrificing inference efficiency.\nIn terms of sample efficiency, PaMoRL maintains an MBRL-level sample efficiency that outperforms other no-look-ahead MBRL methods and model-free RL methods, and it even exceeds the performance of planning-based MBRL methods and methods with larger networks in certain tasks.", "keywords": "Model-based reinforcement learning;world model;parallelization", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/76405f66e8081a615ada39dccf339e8796c0ba83.zip", "author": "ZiRui Wang;Yue DENG;Junfeng Long;Yin Zhang", "authorids": "~ZiRui_Wang8;~Yue_DENG2;~Junfeng_Long1;~Yin_Zhang3", "gender": "M;M;;M", "homepage": "https://github.com/RuoShuiDY;https://junfeng-long.github.io/;https://person.zju.edu.cn/en/zhangyin;https://github.com/Wongziseoi", "dblp": ";343/2990;91/3045-6;", "google_scholar": ";olmfqBEAAAAJ;vCoh6tYAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";0000-0001-7047-4963;;", "linkedin": ";;;", "or_profile": "~Yue_DENG2;~Junfeng_Long1;~Yin_Zhang3;~ZiRui_Wang4", "aff": "Zhejiang University;Shanghai AI Laboratory;Zhejiang University;Shanghai Artificial Intelligence Laboratory", "aff_domain": "zju.edu.cn;pjlab.org.cn;zju.edu.cn;pjlab.org.cn", "position": "PhD student;Researcher;Full Professor;Intern", "bibtex": "@inproceedings{\nwang2024parallelizing,\ntitle={Parallelizing Model-based Reinforcement Learning Over the Sequence Length},\nauthor={ZiRui Wang and Yue DENG and Junfeng Long and Yin Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=R6N9AGyz13}\n}", "github": "", "reviewers": "earm;5drW;MVJ1;wbEe", "pdf_size": 3245287, "rating": "5;5;5;8", "confidence": "3;3;4;4", "soundness": "3;2;3;4", "novelty": "2;3;3;4", "presentation": "3;2;2;4", "wc_summary": "56;61;136;49", "wc_strengths": "36;23;170;133", "wc_weaknesses": "201;57;260;12", "wc_questions": "4;23;176;1", "wc_limitations": "9;1;15;1", "wc_review": "306;165;757;196", "wc_reply_reviewers": "102;57;90;0", "wc_reply_authors": "304;88;49;0", "reply_reviewers": "2;1;1;0", "reply_authors": "3;2;2;1", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 75.5, 35.188776619825816 ], "wc_strengths_avg": [ 90.5, 62.5559749344537 ], "wc_weaknesses_avg": [ 132.5, 101.45072695648858 ], "wc_questions_avg": [ 51.0, 72.66016790511841 ], "wc_limitations_avg": [ 6.5, 5.894913061275798 ], "wc_review_avg": [ 356.0, 237.3720708086779 ], "wc_reply_reviewers_avg": [ 62.25, 39.53716605929161 ], "wc_reply_authors_avg": [ 110.25, 116.12574004069899 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9648720077282245309&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "zju.edu.cn;pjlab.org.cn;zju.edu.cn;pjlab.org.cn", "author_num": 4, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Zhejiang University;Shanghai AI Laboratory;Shanghai Artificial Intelligence Laboratory", "aff_unique_dep": ";;", "aff_unique_url": "https://www.zju.edu.cn;https://www.shanghai-ai-lab.com;http://www.shailab.org/", "aff_unique_abbr": "ZJU;SAIL;Shanghai AI Lab", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "The Elephant in the Room: Towards A Reliable Time-Series Anomaly Detection Benchmark", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97690", "id": "R6kJtWsTGy", "proceeding": "", "pdf": "https://openreview.net/pdf?id=R6kJtWsTGy", "openreview": "https://openreview.net/forum?id=R6kJtWsTGy", "poster": "/media/PosterPDFs/NeurIPS%202024/97690.png?t=1731471706.5915518", "project": "", "author_site": "Qinghua Liu, John Paparrizos", "tldr": "", "abstract": "Time-series anomaly detection is a fundamental task across scientific fields and industries. However, the field has long faced the ``elephant in the room:'' critical issues including flawed datasets, biased evaluation measures, and inconsistent benchmarking practices that have remained largely ignored and unaddressed. We introduce the TSB-AD to systematically tackle these issues in the following three aspects: (i) Dataset Integrity: with 1070 high-quality time series from a diverse collection of 40 datasets (doubling the size of the largest collection and four times the number of existing curated datasets), we provide the first large-scale, heterogeneous, meticulously curated dataset that combines the effort of human perception and model interpretation; (ii) Measure Reliability: by revealing issues and biases in evaluation measures, we identify the most reliable and accurate measure, namely, VUS-PR for anomaly detection in time series to address concerns from the community; and (iii) Comprehensive Benchmarking: with a broad spectrum of 40 detection algorithms, from statistical methods to the latest foundation models, we perform a comprehensive evaluation that includes a thorough hyperparameter tuning and a unified setup for a fair and reproducible comparison. Our findings challenge the conventional wisdom regarding the superiority of advanced neural network architectures, revealing that simpler architectures and statistical methods often yield better performance. The promising performance of neural networks on multivariate cases and foundation models on point anomalies highlights the need for further advancements in these methods. We open-source the benchmark at https://github.com/TheDatumOrg/TSB-AD to promote further research.", "keywords": "Time-series Analysis;Anomaly Detection;Benchmark", "primary_area": "", "supplementary_material": "", "author": "Qinghua Liu;John Paparrizos", "authorids": "~Qinghua_Liu3;~John_Paparrizos1", "gender": "M;M", "homepage": "https://qhliu26.github.io/;http://www.paparrizos.org", "dblp": ";163/0545", "google_scholar": "8JodwVIAAAAJ;CmxltbUAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Qinghua_Liu3;~John_Paparrizos1", "aff": "Ohio State University, Columbus;Ohio State University, Columbus", "aff_domain": "osu.edu;osu.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nliu2024the,\ntitle={The Elephant in the Room: Towards A Reliable Time-Series Anomaly Detection Benchmark},\nauthor={Qinghua Liu and John Paparrizos},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=R6kJtWsTGy}\n}", "github": "", "reviewers": "SW7e;eDST;HHcx;Tcpd", "pdf_size": 4422745, "rating": "4;7;7;8", "confidence": "4;3;4;5", "wc_summary_and_contributions": "89;67;34;67", "wc_strengths": "4;86;9;53", "wc_improvement": "4;87;9;127", "wc_limitations": "1;9;9;20", "wc_correctness": "2;9;1;72", "wc_clarity": "8;5;1;45", "wc_relation_to_prior_work": "7;28;1;16", "wc_documentation": "1;23;1;34", "wc_additional_feedback": "1;1;1;1", "wc_review": "117;315;66;435", "wc_reply_reviewers": "0;0;0;65", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.5, 1.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 64.25, 19.638928178492836 ], "wc_strengths_avg": [ 38.0, 33.637776383108324 ], "wc_improvement_avg": [ 56.75, 52.23205433447932 ], "wc_limitations_avg": [ 9.75, 6.7592529172978875 ], "wc_correctness_avg": [ 21.0, 29.605742686174924 ], "wc_clarity_avg": [ 14.75, 17.640507362318125 ], "wc_relation_to_prior_work_avg": [ 13.0, 10.173494974687902 ], "wc_documentation_avg": [ 14.75, 14.289419162443238 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 233.25, 149.05766501592598 ], "wc_reply_reviewers_avg": [ 16.25, 28.145825622994256 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.23570226039551584, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7545580397977216157&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "osu.edu;osu.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Ohio State University", "aff_unique_dep": "", "aff_unique_url": "https://www.osu.edu", "aff_unique_abbr": "OSU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Columbus", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Parameter Efficient Adaptation for Image Restoration with Heterogeneous Mixture-of-Experts", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95197", "id": "R7w68Z5iqf", "proceeding": "", "pdf": "https://openreview.net/pdf?id=R7w68Z5iqf", "openreview": "https://openreview.net/forum?id=R7w68Z5iqf", "poster": "/media/PosterPDFs/NeurIPS%202024/95197.png?t=1730186080.8168912", "project": "", "author_site": "Hang Guo, Tao Dai, Yuanchao Bai, Bin Chen, Xudong Ren, Zexuan Zhu, Shu-Tao Xia", "tldr": "", "abstract": "Designing single-task image restoration models for specific degradation has seen great success in recent years. To achieve generalized image restoration, all-in-one methods have recently been proposed and shown potential for multiple restoration tasks using one single model. Despite the promising results, the existing all-in-one paradigm still suffers from high computational costs as well as limited generalization on unseen degradations. In this work, we introduce an alternative solution to improve the generalization of image restoration models. Drawing inspiration from recent advancements in Parameter Efficient Transfer Learning (PETL), we aim to tune only a small number of parameters to adapt pre-trained restoration models to various tasks. However, current PETL methods fail to generalize across varied restoration tasks due to their homogeneous representation nature. To this end, we propose AdaptIR, a Mixture-of-Experts (MoE) with orthogonal multi-branch design to capture local spatial, global spatial, and channel representation bases, followed by adaptive base combination to obtain heterogeneous representation for different degradations. Extensive experiments demonstrate that our AdaptIR achieves stable performance on single-degradation tasks, and excels in hybrid-degradation tasks, with training only 0.6% parameters for 8 hours.", "keywords": "Image Restoration;Parameter-efficient Transfer Learning", "primary_area": "machine_vision", "supplementary_material": "", "author": "Hang Guo;Tao Dai;Yuanchao Bai;Bin Chen;Xudong Ren;Zexuan Zhu;Shu-Tao Xia", "authorids": "~Hang_Guo3;~Tao_Dai3;~Yuanchao_Bai2;~Bin_Chen4;~Xudong_Ren1;~Zexuan_Zhu1;~Shu-Tao_Xia1", "gender": "M;M;M;M;M;M;M", "homepage": "https://github.com/csguoh;https://csse.szu.edu.cn/pages/user/index?id=1204;https://binchen17tsinghua.wixsite.com/website;;http://csse.szu.edu.cn/staff/zhuzx;https://www.sigs.tsinghua.edu.cn/xst/list.htm;", "dblp": ";54/875-1;22/5523-11;;17/4590;03/6195;137/5961", "google_scholar": "https://scholar.google.com.hk/citations?user=fRwhfpoAAAAJ;MqJNdaAAAAAJ;Yl0wv7AAAAAJ;https://scholar.google.com/citations?hl=zh-CN;kqHaIGEAAAAJ;https://scholar.google.com.hk/citations?user=koAXTXgAAAAJ;hjYIFZcAAAAJ", "orcid": "0000-0003-1746-2693;0000-0003-0594-6404;0000-0002-4798-230X;;0000-0001-8479-6904;0000-0002-8639-982X;", "linkedin": ";;;;;;", "or_profile": "~Hang_Guo3;~Tao_Dai3;~Bin_Chen4;~Xudong_Ren1;~Zexuan_Zhu1;~Shu-Tao_Xia1;~Yuanchao_Bai1", "aff": "Tsinghua University;Department of Software Engineering, Shenzhen University;Harbin Institute of Technology, Shenzhen;Tsinghua University;Shenzhen University;Shenzhen International Graduate School, Tsinghua University;Harbin Institute of Technology", "aff_domain": "tsinghua.edu.cn;szu.edu;hit.edu.cn;tsinghua.edu.cn;szu.edu.cn;sz.tsinghua.edu.cn;hit.edu.cn", "position": "MS student;Assistant Professor;Assistant Professor;PhD student;Full Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nguo2024parameter,\ntitle={Parameter Efficient Adaptation for Image Restoration with Heterogeneous Mixture-of-Experts},\nauthor={Hang Guo and Tao Dai and Yuanchao Bai and Bin Chen and Xudong Ren and Zexuan Zhu and Shu-Tao Xia},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=R7w68Z5iqf}\n}", "github": "", "reviewers": "jwPV;62kc;Y1LM;EhxW", "pdf_size": 2850089, "rating": "5;5;5;7", "confidence": "5;4;5;4", "soundness": "2;2;2;4", "novelty": "2;2;2;3", "presentation": "3;3;3;3", "wc_summary": "58;83;48;63", "wc_strengths": "70;52;23;157", "wc_weaknesses": "212;69;146;36", "wc_questions": "3;3;3;33", "wc_limitations": "10;1;1;4", "wc_review": "353;208;221;293", "wc_reply_reviewers": "59;0;297;12", "wc_reply_authors": "61;0;844;0", "reply_reviewers": "1;0;2;1", "reply_authors": "2;1;3;1", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 63.0, 12.747548783981962 ], "wc_strengths_avg": [ 75.5, 49.95247741604014 ], "wc_weaknesses_avg": [ 115.75, 68.41920417543601 ], "wc_questions_avg": [ 10.5, 12.99038105676658 ], "wc_limitations_avg": [ 4.0, 3.6742346141747673 ], "wc_review_avg": [ 268.75, 58.43104910918509 ], "wc_reply_reviewers_avg": [ 92.0, 120.3931061149267 ], "wc_reply_authors_avg": [ 226.25, 357.5264850329273 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:dWbo_2rUQe4J:scholar.google.com/&scioq=Parameter+Efficient+Adaptation+for+Image+Restoration+with+Heterogeneous+Mixture-of-Experts&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "tsinghua.edu.cn;szu.edu;hit.edu.cn;tsinghua.edu.cn;szu.edu.cn;sz.tsinghua.edu.cn;hit.edu.cn", "author_num": 7, "aff_unique_index": "0;1;2;0;1;0;2", "aff_unique_norm": "Tsinghua University;Shenzhen University;Harbin Institute of Technology", "aff_unique_dep": ";Department of Software Engineering;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.szu.edu.cn;http://en.hhit.edu.cn/", "aff_unique_abbr": "THU;SZU;HIT", "aff_campus_unique_index": "1;1;1;2", "aff_campus_unique": ";Shenzhen;Harbin", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Exploitation of a Latent Mechanism in Graph Contrastive Learning: Representation Scattering", "status": "Oral", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95196", "id": "R8SolCx62K", "proceeding": "", "pdf": "https://openreview.net/pdf?id=R8SolCx62K", "openreview": "https://openreview.net/forum?id=R8SolCx62K", "poster": "/media/PosterPDFs/NeurIPS%202024/95196.png?t=1733466940.2243414", "project": "", "author_site": "Dongxiao He, Lianze Shan, Jitao Zhao, Hengrui Zhang, Zhen Wang, Weixiong Zhang", "tldr": "", "abstract": "Graph Contrastive Learning (GCL) has emerged as a powerful approach for generating graph representations without the need for manual annotation. Most advanced GCL methods fall into three main frameworks: node discrimination, group discrimination, and bootstrapping schemes, all of which achieve comparable performance. However, the underlying mechanisms and factors that contribute to their effectiveness are not yet fully understood. In this paper, we revisit these frameworks and reveal a common mechanism\u2014representation scattering\u2014that significantly enhances their performance. Our discovery highlights an essential feature of GCL and unifies these seemingly disparate methods under the concept of representation scattering. To leverage this insight, we introduce Scattering Graph Representation Learning (SGRL), a novel framework that incorporates a new representation scattering mechanism designed to enhance representation diversity through a center-away strategy. Additionally, consider the interconnected nature of graphs, we develop a topology-based constraint mechanism that integrates graph structural properties with representation scattering to prevent excessive scattering. We extensively evaluate SGRL across various downstream tasks on benchmark datasets, demonstrating its efficacy and superiority over existing GCL methods. Our findings underscore the significance of representation scattering in GCL and provide a structured framework for harnessing this mechanism to advance graph representation learning. The code of SGRL is at https://github.com/hedongxiao-tju/SGRL.", "keywords": "Graph Contrastive Learning", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Dongxiao He;Lianze Shan;Jitao Zhao;Hengrui Zhang;Zhen Wang;Weixiong Zhang", "authorids": "~Dongxiao_He1;~Lianze_Shan1;~Jitao_Zhao2;~Hengrui_Zhang1;~Zhen_Wang11;~Weixiong_Zhang1", "gender": "F;;M;M;;M", "homepage": "http://cic.tju.edu.cn/faculty/hedongxiao/index.htm;;https://hengruizhang98.github.io;http://iopen.nwpu.edu.cn/info/1015/1351.htm?ivk_sa=1024320u;https://www.polyu.edu.hk/en/hti/people/academic-staff/prof-zhang-weixiong/;https://github.com/Tao-Zac", "dblp": "48/8875;379/4259;;;51/2284;", "google_scholar": "JyqwTr4AAAAJ;jqKt0ZEAAAAJ;iwffiD0AAAAJ;https://scholar.google.co.uk/citations?hl=zh-CN;;QGvyorsAAAAJ", "orcid": ";;0009-0006-1330-0899;;;", "linkedin": ";;;;;", "or_profile": "~Dongxiao_He1;~Lianze_Shan1;~Hengrui_Zhang1;~Zhen_Wang11;~Weixiong_Zhang1;~JiTao_Zhao1", "aff": "Tianjin University;Tianjin University;University of Illinois, Chicago;Northwestern Polytechnical University;Washington University, Saint Louis;Tianjin University", "aff_domain": "tju.edu.cn;tju.edu.cn;uic.edu;nwpu.edu.cn;wustl.edu;tju.edu.cn", "position": "Full Professor;MS student;PhD student;Full Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\nhe2024exploitation,\ntitle={Exploitation of a Latent Mechanism in Graph Contrastive Learning: Representation Scattering},\nauthor={Dongxiao He and Lianze Shan and Jitao Zhao and Hengrui Zhang and Zhen Wang and Weixiong Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=R8SolCx62K}\n}", "github": "", "reviewers": "ErfB;6M1t;dtE3;HnqZ", "pdf_size": 959125, "rating": "8;9;9;9", "confidence": "4;4;5;5", "soundness": "4;4;4;4", "novelty": "4;4;3;4", "presentation": "3;3;4;4", "wc_summary": "92;100;73;148", "wc_strengths": "102;94;105;90", "wc_weaknesses": "172;134;112;324", "wc_questions": "4;33;2;48", "wc_limitations": "3;15;2;1", "wc_review": "373;376;294;611", "wc_reply_reviewers": "45;72;0;51", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 8.75, 0.4330127018922193 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 4.0, 0.0 ], "novelty_avg": [ 3.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 103.25, 27.63489641739227 ], "wc_strengths_avg": [ 97.75, 6.015604707757983 ], "wc_weaknesses_avg": [ 185.5, 82.7934176127547 ], "wc_questions_avg": [ 21.75, 19.49839737004044 ], "wc_limitations_avg": [ 5.25, 5.673402858955108 ], "wc_review_avg": [ 413.5, 118.67286968806307 ], "wc_reply_reviewers_avg": [ 42.0, 26.239283526803852 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4681386483622469912&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 2, "email": "tju.edu.cn;tju.edu.cn;uic.edu;nwpu.edu.cn;wustl.edu;tju.edu.cn", "author_num": 6, "aff_unique_index": "0;0;1;2;3;0", "aff_unique_norm": "Tianjin University;University of Illinois at Chicago;Northwestern Polytechnical University;Washington University in St. Louis", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.tju.edu.cn;https://www.uic.edu;https://www.nwpu.edu.cn;https://wustl.edu", "aff_unique_abbr": "TJU;UIC;NWPU;WUSTL", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Chicago;Saint Louis", "aff_country_unique_index": "0;0;1;0;1;0", "aff_country_unique": "China;United States" }, { "title": "RealCompo: Balancing Realism and Compositionality Improves Text-to-Image Diffusion Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95195", "id": "R8mfn3rHd5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=R8mfn3rHd5", "openreview": "https://openreview.net/forum?id=R8mfn3rHd5", "poster": "/media/PosterPDFs/NeurIPS%202024/95195.png?t=1733098790.0615222", "project": "", "author_site": "Xinchen Zhang, Ling Yang, YaQi Cai, Zhaochen Yu, Kai-Ni Wang, xie jiake, Ye Tian, Minkai Xu, Yong Tang, Yujiu Yang, Bin CUI", "tldr": "", "abstract": "Diffusion models have achieved remarkable advancements in text-to-image generation. However, existing models still have many difficulties when faced with multiple-object compositional generation. In this paper, we propose ***RealCompo***, a new *training-free* and *transferred-friendly* text-to-image generation framework, which aims to leverage the respective advantages of text-to-image models and spatial-aware image diffusion models (e.g., layout, keypoints and segmentation maps) to enhance both realism and compositionality of the generated images. An intuitive and novel *balancer* is proposed to dynamically balance the strengths of the two models in denoising process, allowing plug-and-play use of any model without extra training. Extensive experiments show that our RealCompo consistently outperforms state-of-the-art text-to-image models and spatial-aware image diffusion models in multiple-object compositional generation while keeping satisfactory realism and compositionality of the generated images. Notably, our RealCompo can be seamlessly extended with a wide range of spatial-aware image diffusion models and stylized diffusion models. Code is available at: https://github.com/YangLing0818/RealCompo", "keywords": "Text-to-Image Diffusion;Layout-guided Image Diffusion", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/069f132e200e4978cde7017ebc7f62fbe60b4798.zip", "author": "Xinchen Zhang;Ling Yang;YaQi Cai;Zhaochen Yu;Kai-Ni Wang;xie jiake;Ye Tian;Minkai Xu;Yong Tang;Yujiu Yang;Bin CUI", "authorids": "~Xinchen_Zhang1;~Ling_Yang1;~YaQi_Cai2;~Zhaochen_Yu2;~Kai-Ni_Wang1;~xie_jiake1;~Ye_Tian15;~Minkai_Xu1;~Yong_Tang4;~Yujiu_Yang2;~Bin_CUI2", "gender": "M;M;;M;F;M;M;M;M;M;M", "homepage": "https://cominclip.github.io/;https://yangling0818.github.io/;https://github.com/CCYaqi;https://zhaochenyu0201.github.io;https://soleilssss.github.io/;https://kelisiya@github.io;https://tyfeld.github.io/;https://minkaixu.com;;https://sites.google.com/view/iigroup-thu;https://cuibinpku.github.io/index.html", "dblp": ";01/24-6.html;;;311/5582.html;295/9450;;257/3355;;30/3847;55/5031.html", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.hk/citations?user=sIKujqAAAAAJ;;9RNgZOIAAAAJ;https://scholar.google.com.hk/citations?user=nMRUtZsAAAAJ;https://scholar.google.com/citations?hl=zh-CN;vUY_PIUAAAAJ;https://scholar.google.com/citations?hl=en;ubVWrRwAAAAJ;4gH3sxsAAAAJ;IJAU8KoAAAAJ", "orcid": ";0000-0003-1905-8053;;;0000-0002-4000-188X;;;;;0000-0002-6427-1024;0000-0003-1681-4677", "linkedin": ";;;;;;;;;;", "or_profile": "~Xinchen_Zhang1;~Ling_Yang1;~YaQi_Cai2;~Zhaochen_Yu2;~Kai-Ni_Wang1;~xie_jiake1;~Ye_Tian15;~Minkai_Xu1;~Yong_Tang4;~Yujiu_Yang2;~Bin_CUI2", "aff": "Tsinghua University;Peking University;University of Science and Technology of China;Peking University;Southeast University;Winroad Holdings Limited;Peking University;Stanford University;;Tsinghua University;Peking University", "aff_domain": "mails.tsinghua.edu.cn;pku.edu.cn;mail.ustc.edu.cn;pku.edu.cn;seu.edu.cn;wonderworker.com;pku.edu.cn;stanford.edu;;tsinghua.edu.cn;pku.edu.cn", "position": "MS student;PhD student;MS student;Intern;PhD student;Computer Vision Engineer;MS student;PhD student;;Full Professor;Full Professor", "bibtex": "@inproceedings{\nzhang2024realcompo,\ntitle={RealCompo: Balancing Realism and Compositionality Improves Text-to-Image Diffusion Models},\nauthor={Xinchen Zhang and Ling Yang and YaQi Cai and Zhaochen Yu and Kai-Ni Wang and xie jiake and Ye Tian and Minkai Xu and Yong Tang and Yujiu Yang and Bin CUI},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=R8mfn3rHd5}\n}", "github": "", "reviewers": "ZHfz;juPc;XVVX", "pdf_size": 5261967, "rating": "6;6;7", "confidence": "5;4;5", "soundness": "3;2;3", "novelty": "1;3;3", "presentation": "2;3;3", "wc_summary": "64;94;61", "wc_strengths": "42;54;41", "wc_weaknesses": "242;51;40", "wc_questions": "43;90;92", "wc_limitations": "1;36;1", "wc_review": "392;325;235", "wc_reply_reviewers": "32;156;306", "wc_reply_authors": "216;465;572", "reply_reviewers": "1;3;2", "reply_authors": "4;4;4", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.9428090415820634 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 73.0, 14.89966442575134 ], "wc_strengths_avg": [ 45.666666666666664, 5.90668171555645 ], "wc_weaknesses_avg": [ 111.0, 92.73977931107376 ], "wc_questions_avg": [ 75.0, 22.642143596988927 ], "wc_limitations_avg": [ 12.666666666666666, 16.49915822768611 ], "wc_review_avg": [ 317.3333333333333, 64.32383349549026 ], "wc_reply_reviewers_avg": [ 164.66666666666666, 112.02777433396693 ], "wc_reply_authors_avg": [ 417.6666666666667, 149.14050049831832 ], "reply_reviewers_avg": [ 2.0, 0.816496580927726 ], "reply_authors_avg": [ 4.0, 0.0 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15705285011043562907&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "mails.tsinghua.edu.cn;pku.edu.cn;mail.ustc.edu.cn;pku.edu.cn;seu.edu.cn;wonderworker.com;pku.edu.cn;stanford.edu;;tsinghua.edu.cn;pku.edu.cn", "author_num": 11, "aff_unique_index": "0;1;2;1;3;4;1;5;0;1", "aff_unique_norm": "Tsinghua University;Peking University;University of Science and Technology of China;Southeast University;Winroad Holdings Limited;Stanford University", "aff_unique_dep": ";;;;;", "aff_unique_url": "https://www.tsinghua.edu.cn;http://www.pku.edu.cn;http://www.ustc.edu.cn;https://www.seu.edu.cn/;;https://www.stanford.edu", "aff_unique_abbr": "THU;Peking U;USTC;SEU;;Stanford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;0;0;0;0;0;1;0;0", "aff_country_unique": "China;United States" }, { "title": "Bayes-optimal learning of an extensive-width neural network from quadratically many samples", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95194", "id": "R8znYRjxj3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=R8znYRjxj3", "openreview": "https://openreview.net/forum?id=R8znYRjxj3", "poster": "/media/PosterPDFs/NeurIPS%202024/95194.png?t=1731681316.0284033", "project": "", "author_site": "Antoine Maillard, Emanuele Troiani, Simon Martin, Florent Krzakala, Lenka Zdeborov\u00e1", "tldr": "", "abstract": "We consider the problem of learning a target function corresponding to a single\nhidden layer neural network, with a quadratic activation function after the first layer,\nand random weights. We consider the asymptotic limit where the input dimension\nand the network width are proportionally large. Recent work [Cui et al., 2023]\nestablished that linear regression provides Bayes-optimal test error to learn such\na function when the number of available samples is only linear in the dimension.\nThat work stressed the open challenge of theoretically analyzing the optimal test\nerror in the more interesting regime where the number of samples is quadratic in\nthe dimension. In this paper, we solve this challenge for quadratic activations and\nderive a closed-form expression for the Bayes-optimal test error. We also provide an\nalgorithm, that we call GAMP-RIE, which combines approximate message passing\nwith rotationally invariant matrix denoising, and that asymptotically achieves the\noptimal performance. Technically, our result is enabled by establishing a link\nwith recent works on optimal denoising of extensive-rank matrices and on the\nellipsoid fitting problem. We further show empirically that, in the absence of\nnoise, randomly-initialized gradient descent seems to sample the space of weights,\nleading to zero training loss, and averaging over initialization leads to a test error\nequal to the Bayes-optimal one.", "keywords": "Theory of neural networks;Bayes-optimal learning;non-convex optimization;statistical physics;high-dimensional statistics", "primary_area": "learning_theory", "supplementary_material": "", "author": "Antoine Maillard;Emanuele Troiani;Simon Martin;Florent Krzakala;Lenka Zdeborova", "authorids": "~Antoine_Maillard1;~Emanuele_Troiani2;~Simon_Martin1;~Florent_Krzakala1;~Lenka_Zdeborova1", "gender": "M;M;M;;F", "homepage": "https://anmaillard.github.io;;;http://Krzakala.org;http://artax.karlin.mff.cuni.cz/~zdebl9am/", "dblp": "215/5029;267/5270;;25/1282;27/6064.html", "google_scholar": "hHz2ovsAAAAJ;https://scholar.google.fr/citations?user=Gh0snLcAAAAJ;https://scholar.google.com/citations?hl=fr;https://scholar.google.fr/citations?user=3jDeUlMAAAAJ;https://scholar.google.fr/citations?user=gkCjy_UAAAAJ", "orcid": "0000-0002-1078-4553;0000-0003-0968-7585;;0000-0003-2313-2578;", "linkedin": ";;;;", "or_profile": "~Antoine_Maillard1;~Emanuele_Troiani2;~Simon_Martin1;~Florent_Krzakala1;~Lenka_Zdeborova1", "aff": "Swiss Federal Institute of Technology;School of Computer and Communication Sciences, EPFL - EPF Lausanne;INRIA;Swiss Federal Institute of Technology Lausanne;Swiss Federal Institute of Technology Lausanne", "aff_domain": "ethz.ch;ic.epfl.ch;inria.fr;epfl.ch;epfl.ch", "position": "Postdoc;PhD student;PhD student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nmaillard2024bayesoptimal,\ntitle={Bayes-optimal learning of an extensive-width neural network from quadratically many samples},\nauthor={Antoine Maillard and Emanuele Troiani and Simon Martin and Florent Krzakala and Lenka Zdeborova},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=R8znYRjxj3}\n}", "github": "", "reviewers": "bzi8;xzmx;XJ6b;NWbd", "pdf_size": 1015235, "rating": "4;6;8;8", "confidence": "2;2;2;3", "soundness": "2;3;4;4", "novelty": "2;2;4;3", "presentation": "2;3;4;4", "wc_summary": "67;60;66;228", "wc_strengths": "35;18;133;336", "wc_weaknesses": "54;76;134;517", "wc_questions": "113;57;78;262", "wc_limitations": "1;15;32;1", "wc_review": "270;226;443;1344", "wc_reply_reviewers": "11;95;12;177", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 1.6583123951777 ], "confidence_avg": [ 2.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 105.25, 70.92028976252142 ], "wc_strengths_avg": [ 130.5, 126.50395250742169 ], "wc_weaknesses_avg": [ 195.25, 188.04703533956604 ], "wc_questions_avg": [ 127.5, 80.18883962248113 ], "wc_limitations_avg": [ 12.25, 12.754901018824098 ], "wc_review_avg": [ 570.75, 453.74517903775023 ], "wc_reply_reviewers_avg": [ 73.75, 68.67086354488343 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11090432221661647341&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "ethz.ch;ic.epfl.ch;inria.fr;epfl.ch;epfl.ch", "author_num": 5, "aff_unique_index": "0;1;2;3;3", "aff_unique_norm": "Swiss Federal Institute of Technology;EPFL;INRIA;Swiss Federal Institute of Technology Lausanne", "aff_unique_dep": ";School of Computer and Communication Sciences;;", "aff_unique_url": "https://www.ethz.ch;https://www.epfl.ch;https://www.inria.fr;https://www.epfl.ch", "aff_unique_abbr": "ETH Zurich;EPFL;INRIA;EPFL", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Lausanne", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "Switzerland;France" }, { "title": "InterpBench: Semi-Synthetic Transformers for Evaluating Mechanistic Interpretability Techniques", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97689", "id": "R9gR9MPuD5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=R9gR9MPuD5", "openreview": "https://openreview.net/forum?id=R9gR9MPuD5", "poster": "/media/PosterPDFs/NeurIPS%202024/97689.png?t=1731683720.7634645", "project": "", "author_site": "Rohan Gupta, Iv\u00e1n Arcuschin Moreno, Thomas Kwa, Adri\u00e0 Garriga-Alonso", "tldr": "", "abstract": "Mechanistic interpretability methods aim to identify the algorithm a neural network implements, but it is difficult to validate such methods when the true algorithm is unknown. This work presents InterpBench, a collection of semi-synthetic yet realistic transformers with known circuits for evaluating these techniques. We train simple neural networks using a stricter version of Interchange Intervention Training (IIT) which we call Strict IIT (SIIT). Like the original, SIIT trains neural networks by aligning their internal computation with a desired high-level causal model, but it also prevents non-circuit nodes from affecting the model's output. We evaluate SIIT on sparse transformers produced by the Tracr tool and find that SIIT models maintain Tracr's original circuit while being more realistic. SIIT can also train transformers with larger circuits, like Indirect Object Identification (IOI). Finally, we use our benchmark to evaluate existing circuit discovery techniques.", "keywords": "Benchmark;Mechanistic Interpretability;Transformers;Circuit Discovery", "primary_area": "", "supplementary_material": "/attachment/865645bb8e22658224ad80464710f25c386997a6.pdf", "author": "Rohan Gupta;Iv\u00e1n Arcuschin;Thomas Kwa;Adri\u00e0 Garriga-Alonso", "authorids": "~Rohan_Gupta5;~Iv\u00e1n_Arcuschin1;~Thomas_Kwa1;~Adri\u00e0_Garriga-Alonso1", "gender": ";M;;M", "homepage": ";https://iarcuschin.com/;;https://agarri.ga/", "dblp": ";274/6148.html;;225/6564", "google_scholar": ";vfB-YhcAAAAJ;;OtnThiMAAAAJ", "orcid": ";0000-0001-5805-8828;;0000-0003-3409-5047", "linkedin": "cybershiptrooper/;iarcuschin/;tkwa/;adrigarriga/", "or_profile": "~Rohan_Gupta5;~Iv\u00e1n_Arcuschin1;~Thomas_Kwa1;~Adria_Garriga-Alonso1", "aff": "ML Alignment and Theory Scholars;Universidad de Buenos Aires;Model Evaluation and Threat Research;FAR", "aff_domain": "serimats.org;uba.ar;metr.org;far.ai", "position": "Researcher;PhD student;Researcher;Researcher", "bibtex": "@inproceedings{\ngupta2024interpbench,\ntitle={InterpBench: Semi-Synthetic Transformers for Evaluating Mechanistic Interpretability Techniques},\nauthor={Rohan Gupta and Iv{\\'a}n Arcuschin and Thomas Kwa and Adri{\\`a} Garriga-Alonso},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=R9gR9MPuD5}\n}", "github": "", "reviewers": "S3m7;SvKE;ZPbQ;a2Fe", "pdf_size": 678470, "rating": "5;5;7;7", "confidence": "3;2;4;3", "wc_summary_and_contributions": "86;72;59;52", "wc_strengths": "32;24;6;4", "wc_improvement": "24;4;6;4", "wc_limitations": "40;26;1;4", "wc_correctness": "23;4;1;4", "wc_clarity": "25;4;1;1", "wc_relation_to_prior_work": "57;4;1;1", "wc_documentation": "21;4;1;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "309;143;77;72", "wc_reply_reviewers": "20;0;0;0", "wc_reply_authors": "267;76;0;0", "reply_reviewers": "1;0;0;0", "reply_authors": "2;2;1;1", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 67.25, 12.987975207860538 ], "wc_strengths_avg": [ 16.5, 11.863810517704673 ], "wc_improvement_avg": [ 9.5, 8.411301920630361 ], "wc_limitations_avg": [ 17.75, 16.068213964221414 ], "wc_correctness_avg": [ 8.0, 8.74642784226795 ], "wc_clarity_avg": [ 7.75, 10.034316120194738 ], "wc_relation_to_prior_work_avg": [ 15.75, 23.84716964337697 ], "wc_documentation_avg": [ 6.75, 8.317902379807062 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 150.25, 95.84199236242952 ], "wc_reply_reviewers_avg": [ 5.0, 8.660254037844387 ], "wc_reply_authors_avg": [ 85.75, 109.14754921664526 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.7071067811865475, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10687724570113096019&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "serimats.org;uba.ar;metr.org;far.ai", "author_num": 4, "aff_unique_index": "0;1;3", "aff_unique_norm": "ML Alignment and Theory Scholars;Universidad de Buenos Aires;;FAR", "aff_unique_dep": ";;;", "aff_unique_url": ";https://www.uba.ar;;", "aff_unique_abbr": ";UBA;;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1", "aff_country_unique": ";Argentina" }, { "title": "Navigating Extremes: Dynamic Sparsity in Large Output Spaces", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95193", "id": "RA6rzOJ2zI", "proceeding": "", "pdf": "https://openreview.net/pdf?id=RA6rzOJ2zI", "openreview": "https://openreview.net/forum?id=RA6rzOJ2zI", "poster": "", "project": "", "author_site": "Nasibullah Nasibullah, Erik Schultheis, Michael Lasby, Yani Ioannou, Rohit Babbar", "tldr": "", "abstract": "In recent years, Dynamic Sparse Training (DST) has emerged as an alternative to post-training pruning \nfor generating efficient models. In principle, DST allows for a much more memory efficient training process,\nas it maintains sparsity throughout the entire training run. However, current DST implementations fail to capitalize on this. Because sparse matrix multiplication is much less efficient than dense matrix multiplication on GPUs, most\nimplementations simulate sparsity by masking weights. \nIn this paper, we leverage recent advances in semi-structured sparse training to apply DST in the domain of classification\nwith large output spaces, where memory-efficiency is paramount. With a label space of possibly millions of candidates,\nthe classification layer alone will consume several gigabytes of memory. Switching from a dense to a fixed fan-in \nsparse layer updated with sparse evolutionary training (SET); however, severely hampers training convergence, especially\nat the largest label spaces. We find that the gradients fed back from the classifier into the text encoder make it\nmuch more difficult to learn good input representations, despite using a dense encoder.\nBy employing an intermediate layer or adding an auxiliary training objective, we recover most of the generalisation performance of the dense model. \nOverall, we demonstrate the applicability of DST in a challenging domain, characterized by a highly skewed label distribution, \nthat lies outside of DST's typical benchmark datasets, and enable end-to-end training with millions of labels on commodity hardware.", "keywords": "Dynamic sparse training;extreme classification;memory efficient training;large output spaces;scalable machine learning", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Nasib Ullah;Erik Schultheis;Mike Lasby;Yani Ioannou;Rohit Babbar", "authorids": "~Nasib_Ullah1;~Erik_Schultheis1;~Mike_Lasby1;~Yani_Ioannou1;~Rohit_Babbar1", "gender": "M;;M;M;", "homepage": "https://nasib-ullah.github.io/;https://www.aalto.fi/en/people/erik-schultheis;;https://yani.ai;", "dblp": "317/5509;268/7969;319/5507;119/5087;", "google_scholar": "8gI7sGgAAAAJ;MGxmO7EAAAAJ;4odTWZMAAAAJ;Qy9yv44AAAAJ;", "orcid": "0009-0005-3153-6734;0000-0003-1685-8397;0000-0002-7090-8773;0000-0002-9797-5888;", "linkedin": "nasib-ullah-103b4923/;;mike-lasby-779b4388/;yanii/;", "or_profile": "~Nasib_Ullah1;~Erik_Schultheis1;~Mike_Lasby1;~Yani_Ioannou1;~Rohit_Babbar1", "aff": "Aalto University;Aalto University;Cerebras Systems, Inc;University of Calgary;", "aff_domain": "aalto.fi;aalto.fi;cerebras.net;ucalgary.ca;", "position": "PhD student;PhD student;Intern;Assistant Professor;", "bibtex": "@inproceedings{\nullah2024navigating,\ntitle={Navigating Extremes: Dynamic Sparsity in Large Output Spaces},\nauthor={Nasib Ullah and Erik Schultheis and Mike Lasby and Yani Ioannou and Rohit Babbar},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=RA6rzOJ2zI}\n}", "github": "", "reviewers": "2d9T;3LZE;9KVe", "pdf_size": 3020319, "rating": "6;7;7", "confidence": "4;2;2", "soundness": "3;3;3", "novelty": "3;2;3", "presentation": "4;2;4", "wc_summary": "38;208;126", "wc_strengths": "50;210;135", "wc_weaknesses": "99;250;77", "wc_questions": "145;25;33", "wc_limitations": "32;14;1", "wc_review": "364;707;372", "wc_reply_reviewers": "41;36;68", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 2.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.9428090415820634 ], "wc_summary_avg": [ 124.0, 69.41661664664065 ], "wc_strengths_avg": [ 131.66666666666666, 65.36223850375859 ], "wc_weaknesses_avg": [ 142.0, 76.89386624865905 ], "wc_questions_avg": [ 67.66666666666667, 54.7803695577977 ], "wc_limitations_avg": [ 15.666666666666666, 12.710450643291747 ], "wc_review_avg": [ 481.0, 159.83950283539633 ], "wc_reply_reviewers_avg": [ 48.333333333333336, 14.055445761538676 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17019621525974644216&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "aalto.fi;aalto.fi;cerebras.net;ucalgary.ca;", "author_num": 5, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Aalto University;Cerebras Systems;University of Calgary", "aff_unique_dep": ";;", "aff_unique_url": "https://www.aalto.fi;https://www.cerebras.com;https://www.ucalgary.ca", "aff_unique_abbr": "Aalto;Cerebras;U of C", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;2", "aff_country_unique": "Finland;United States;Canada" }, { "title": "Parseval Regularization for Continual Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95192", "id": "RB1F2h5YEx", "proceeding": "", "pdf": "https://openreview.net/pdf?id=RB1F2h5YEx", "openreview": "https://openreview.net/forum?id=RB1F2h5YEx", "poster": "", "project": "", "author_site": "Wesley Chung, Lynn Cherif, Doina Precup, David Meger", "tldr": "", "abstract": "Plasticity loss, trainability loss, and primacy bias have been identified as issues arising when training deep neural networks on sequences of tasks---referring to the increased difficulty in training on new tasks.\nWe propose to use Parseval regularization, which maintains orthogonality of weight matrices, to preserve useful optimization properties and improve training in a continual reinforcement learning setting.\nWe show that it provides significant benefits to RL agents on a suite of gridworld, CARL and MetaWorld tasks.\nWe conduct comprehensive ablations to identify the source of its benefits and investigate the effect of certain metrics associated to network trainability including weight matrix rank, weight norms and policy entropy.", "keywords": "Reinforcement Learning;Continual Learning;Plasticity;Optimization", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Wesley Chung;Lynn Cherif;Doina Precup;David Meger", "authorids": "~Wesley_Chung1;~Lynn_Cherif1;~Doina_Precup1;~David_Meger2", "gender": ";F;F;M", "homepage": ";https://lc-dev.github.io/;http://cs.mcgill.ca/~dprecup/;http://www.cim.mcgill.ca/~dmeger/", "dblp": "225/7749.html;;p/DoinaPrecup;51/3415.html", "google_scholar": "https://scholar.google.ca/citations?user=y5e1qjQAAAAJ;QfmZ_bEAAAAJ;https://scholar.google.com.tw/citations?user=j54VcVEAAAAJ;https://scholar.google.com.tw/citations?user=gFwEytkAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Wesley_Chung1;~Lynn_Cherif1;~Doina_Precup1;~David_Meger2", "aff": "McGill University;McGill University;McGill University;McGill University", "aff_domain": "mcgill.ca;mail.mcgill.ca;mcgill.ca;mcgill.ca", "position": "PhD student;MS student;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nchung2024parseval,\ntitle={Parseval Regularization for Continual Reinforcement Learning},\nauthor={Wesley Chung and Lynn Cherif and Doina Precup and David Meger},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=RB1F2h5YEx}\n}", "github": "", "reviewers": "g7ai;yTUz;f6b5", "pdf_size": 2965882, "rating": "4;4;7", "confidence": "4;3;5", "soundness": "3;3;4", "novelty": "2;2;3", "presentation": "2;3;3", "wc_summary": "114;57;61", "wc_strengths": "209;44;39", "wc_weaknesses": "204;242;150", "wc_questions": "243;2;94", "wc_limitations": "291;2;54", "wc_review": "1061;347;398", "wc_reply_reviewers": "0;0;159", "wc_reply_authors": "36;33;98", "reply_reviewers": "0;0;1", "reply_authors": "2;2;3", "rating_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 77.33333333333333, 25.978623691198287 ], "wc_strengths_avg": [ 97.33333333333333, 78.9866374071519 ], "wc_weaknesses_avg": [ 198.66666666666666, 37.7477004450455 ], "wc_questions_avg": [ 113.0, 99.30088955627068 ], "wc_limitations_avg": [ 115.66666666666667, 125.78376507147317 ], "wc_review_avg": [ 602.0, 325.22914998505286 ], "wc_reply_reviewers_avg": [ 53.0, 74.95331880577403 ], "wc_reply_authors_avg": [ 55.666666666666664, 29.95923155816176 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8660254037844387, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:FDVFrhBJUxMJ:scholar.google.com/&scioq=Parseval+Regularization+for+Continual+Reinforcement+Learning&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "mcgill.ca;mail.mcgill.ca;mcgill.ca;mcgill.ca", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "McGill University", "aff_unique_dep": "", "aff_unique_url": "https://www.mcgill.ca", "aff_unique_abbr": "McGill", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Canada" }, { "title": "ImOV3D: Learning Open Vocabulary Point Clouds 3D Object Detection from Only 2D Images", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95191", "id": "RCO9fRP8AJ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=RCO9fRP8AJ", "openreview": "https://openreview.net/forum?id=RCO9fRP8AJ", "poster": "/media/PosterPDFs/NeurIPS%202024/95191.png?t=1731600863.2607563", "project": "", "author_site": "Timing Yang, Yuanliang Ju, Li Yi", "tldr": "", "abstract": "Open-vocabulary 3D object detection (OV-3Det) aims to generalize beyond the limited number of base categories labeled during the training phase. The biggest bottleneck is the scarcity of annotated 3D data, whereas 2D image datasets are abundant and richly annotated. Consequently, it is intuitive to leverage the wealth of annotations in 2D images to alleviate the inherent data scarcity in OV-3Det. In this paper, we push the task setup to its limits by exploring the potential of using solely 2D images to learn OV-3Det. The major challenges for this setup is the modality gap between training images and testing point clouds, which prevents effective integration of 2D knowledge into OV-3Det. To address this challenge, we propose a novel framework ImOV3D to leverage pseudo multimodal representation containing both images and point clouds (PC) to close the modality gap. The key of ImOV3D lies in flexible modality conversion where 2D images can be lifted into 3D using monocular depth estimation and can also be derived from 3D scenes through rendering. This allows unifying both training images and testing point clouds into a common image-PC representation, encompassing a wealth of 2D semantic information and also incorporating the depth and structural characteristics of 3D spatial data. We carefully conduct such conversion to minimize the domain gap between training and test cases. Extensive experiments on two benchmark datasets, SUNRGBD and ScanNet, show that ImOV3D significantly outperforms existing methods, even in the absence of ground truth 3D training data. With the inclusion of a minimal amount of real 3D data for fine-tuning, the performance also significantly surpasses previous state-of-the-art. Codes and pre-trained models are released on the https://github.com/yangtiming/ImOV3D.", "keywords": "Open Vocabulary 3D Detection \u00b7 Pseudo Multimodal Learning", "primary_area": "machine_vision", "supplementary_material": "/attachment/de4bdc61a9d91de3d373c5564cc0c097ecdedeea.zip", "author": "Timing Yang;Yuanliang Ju;Li Yi", "authorids": "~Timing_Yang1;~Yuanliang_Ju1;~Li_Yi2", "gender": "M;F;M", "homepage": "http://yangtiming.github.io;;https://ericyi.github.io/", "dblp": ";;26/4239-1", "google_scholar": "lNm4eG8AAAAJ;rG90YVAAAAAJ;UyZL660AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Timing_Yang1;~Yuanliang_Ju1;~Li_Yi2", "aff": "Tsinghua University;University of Toronto;Tsinghua University", "aff_domain": "mail.tsinghua.edu.cn;utoronto.ca;tsinghua.edu.cn", "position": "Intern;CS PhD Student;Assistant Professor", "bibtex": "@inproceedings{\nyang2024imovd,\ntitle={Im{OV}3D: Learning Open Vocabulary Point Clouds 3D Object Detection from Only 2D Images},\nauthor={Timing Yang and Yuanliang Ju and Li Yi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=RCO9fRP8AJ}\n}", "github": "", "reviewers": "m5vv;TsJm;teog;HQsS;MLwT", "pdf_size": 13441129, "rating": "5;5;5;6;7", "confidence": "3;4;3;3;5", "soundness": "3;3;2;3;3", "novelty": "2;2;2;3;3", "presentation": "3;3;3;2;3", "wc_summary": "68;134;95;137;81", "wc_strengths": "31;68;85;50;40", "wc_weaknesses": "45;176;70;168;337", "wc_questions": "41;10;4;14;4", "wc_limitations": "7;8;4;1;4", "wc_review": "192;396;258;370;466", "wc_reply_reviewers": "10;34;0;24;280", "wc_reply_authors": "85;92;63;24;1195", "reply_reviewers": "1;1;0;1;4", "reply_authors": "3;3;2;2;5", "rating_avg": [ 5.6, 0.8 ], "confidence_avg": [ 3.6, 0.8 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 103.0, 27.892651361962706 ], "wc_strengths_avg": [ 54.8, 19.46689497583012 ], "wc_weaknesses_avg": [ 159.2, 102.92793595521091 ], "wc_questions_avg": [ 14.6, 13.734627770711516 ], "wc_limitations_avg": [ 4.8, 2.4819347291981715 ], "wc_review_avg": [ 336.4, 98.46339421328112 ], "wc_reply_reviewers_avg": [ 69.6, 105.84063491873053 ], "wc_reply_authors_avg": [ 291.8, 452.2218039856106 ], "reply_reviewers_avg": [ 1.4, 1.3564659966250536 ], "reply_authors_avg": [ 3.0, 1.0954451150103321 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.6875000000000001, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3414413193909741732&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "mail.tsinghua.edu.cn;utoronto.ca;tsinghua.edu.cn", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Tsinghua University;University of Toronto", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.utoronto.ca", "aff_unique_abbr": "THU;U of T", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "China;Canada" }, { "title": "Quantifying Aleatoric Uncertainty of the Treatment Effect: A Novel Orthogonal Learner", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95190", "id": "RDsDvSHGkA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=RDsDvSHGkA", "openreview": "https://openreview.net/forum?id=RDsDvSHGkA", "poster": "/media/PosterPDFs/NeurIPS%202024/95190.png?t=1733231196.8165712", "project": "", "author_site": "Valentyn Melnychuk, Stefan Feuerriegel, Mihaela van der Schaar", "tldr": "", "abstract": "Estimating causal quantities from observational data is crucial for understanding the safety and effectiveness of medical treatments. However, to make reliable inferences, medical practitioners require not only estimating averaged causal quantities, such as the conditional average treatment effect, but also understanding the randomness of the treatment effect as a random variable. This randomness is referred to as aleatoric uncertainty and is necessary for understanding the probability of benefit from treatment or quantiles of the treatment effect. Yet, the aleatoric uncertainty of the treatment effect has received surprisingly little attention in the causal machine learning community. To fill this gap, we aim to quantify the aleatoric uncertainty of the treatment effect at the covariate-conditional level, namely, the conditional distribution of the treatment effect (CDTE). Unlike average causal quantities, the CDTE is not point identifiable without strong additional assumptions. As a remedy, we employ partial identification to obtain sharp bounds on the CDTE and thereby quantify the aleatoric uncertainty of the treatment effect. We then develop a novel, orthogonal learner for the bounds on the CDTE, which we call AU-learner. We further show that our AU-learner has several strengths in that it satisfies Neyman-orthogonality and, thus, quasi-oracle efficiency. Finally, we propose a fully-parametric deep learning instantiation of our AU-learner.", "keywords": "causal inference;treatment effect estimation", "primary_area": "causal_inference", "supplementary_material": "", "author": "Valentyn Melnychuk;Stefan Feuerriegel;Mihaela van der Schaar", "authorids": "~Valentyn_Melnychuk1;~Stefan_Feuerriegel1;~Mihaela_van_der_Schaar2", "gender": "M;M;F", "homepage": "https://valentyn1997.github.io/;http://www.ai.bwl.lmu.de;https://www.vanderschaar-lab.com", "dblp": "254/1513;125/0630;", "google_scholar": "EMExrOMAAAAJ;https://scholar.google.de/citations?hl=de;DZ3S--MAAAAJ", "orcid": "0000-0002-2401-6803;0000-0001-7856-8729;", "linkedin": "valentyn-melnychuk/;;", "or_profile": "~Valentyn_Melnychuk1;~Stefan_Feuerriegel1;~Mihaela_van_der_Schaar2", "aff": "Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;LMU Munich;University of California, Los Angeles", "aff_domain": "lmu.de;lmu.de;ucla.edu", "position": "PhD student;Professor;Full Professor", "bibtex": "@inproceedings{\nmelnychuk2024quantifying,\ntitle={Quantifying Aleatoric Uncertainty of the Treatment Effect: A Novel Orthogonal Learner},\nauthor={Valentyn Melnychuk and Stefan Feuerriegel and Mihaela van der Schaar},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=RDsDvSHGkA}\n}", "github": "", "reviewers": "8SMd;d6jE;Cpq8;bvQh;U9cv", "pdf_size": 2328999, "rating": "3;7;7;7;7", "confidence": "5;4;3;2;3", "soundness": "4;4;3;3;4", "novelty": "2;3;3;3;3", "presentation": "1;4;2;2;3", "wc_summary": "16;62;77;48;94", "wc_strengths": "32;47;44;39;110", "wc_weaknesses": "599;100;197;218;107", "wc_questions": "43;114;84;36;60", "wc_limitations": "11;1;27;1;7", "wc_review": "701;324;429;342;378", "wc_reply_reviewers": "228;13;53;22;49", "wc_reply_authors": "1722;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "4;1;1;1;1", "rating_avg": [ 6.2, 1.6000000000000003 ], "confidence_avg": [ 3.4, 1.019803902718557 ], "soundness_avg": [ 3.6, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.4, 1.019803902718557 ], "wc_summary_avg": [ 59.4, 26.560120481654444 ], "wc_strengths_avg": [ 54.4, 28.26021939051429 ], "wc_weaknesses_avg": [ 244.2, 183.52918024118125 ], "wc_questions_avg": [ 67.4, 28.576913759186805 ], "wc_limitations_avg": [ 9.4, 9.58331884056875 ], "wc_review_avg": [ 434.8, 137.84832244173302 ], "wc_reply_reviewers_avg": [ 73.0, 78.99620244037051 ], "wc_reply_authors_avg": [ 344.4, 688.7999999999998 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6, 1.2000000000000002 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.784464540552736, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:9dntNM8LU6MJ:scholar.google.com/&scioq=Quantifying+Aleatoric+Uncertainty+of+the+Treatment+Effect:+A+Novel+Orthogonal+Learner&hl=en&as_sdt=0,44", "gs_version_total": 4, "email": "lmu.de;lmu.de;ucla.edu", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;Ludwig Maximilian University of Munich;University of California, Los Angeles", "aff_unique_dep": ";;", "aff_unique_url": "https://www.lmu.de;https://www.lmu.de;https://www.ucla.edu", "aff_unique_abbr": "LMU;LMU;UCLA", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Munich;Los Angeles", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Germany;United States" }, { "title": "Qualitative Mechanism Independence", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95189", "id": "RE5LSV8QYH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=RE5LSV8QYH", "openreview": "https://openreview.net/forum?id=RE5LSV8QYH", "poster": "", "project": "", "author_site": "Oliver Richardson, Spencer J Peters, Joseph Halpern", "tldr": "", "abstract": "We define what it means for a joint probability distribution to be compatible with aset of independent causal mechanisms, at a qualitative level\u2014or, more precisely with a directed hypergraph $\\mathcal A$, which is the qualitative structure of a probabilistic dependency graph (PDG). When A represents a qualitative Bayesian network, QIM-compatibility with $\\mathcal A$ reduces to satisfying the appropriate conditional independencies. But giving semantics to hypergraphs using QIM-compatibility lets us do much more. For one thing, we can capture functional dependencies. For another, we can capture important aspects of causality using compatibility: we can use compatibility to understand cyclic causal graphs, and to demonstrate structural compatibility, we must essentially produce a causal model. Finally, compatibility has deep connections to information theory. Applying compatibility to cyclic structures helps to clarify a longstanding conceptual issue in information theory.", "keywords": "causality;information theory;directed hypergraphs;qualitative structures", "primary_area": "causal_inference", "supplementary_material": "", "author": "Oliver Ethan Richardson;Spencer J Peters;Joseph Halpern", "authorids": "~Oliver_Ethan_Richardson1;~Spencer_J_Peters1;~Joseph_Halpern1", "gender": ";M;M", "homepage": "http://cs.cornell.edu/~oli;https://www.cs.cornell.edu/~speters/;https://www.cs.cornell.edu/home/halpern", "dblp": "281/7499;;", "google_scholar": ";;", "orcid": ";0000-0002-9248-107X;", "linkedin": ";spencer-peters-278230152/;", "or_profile": "~Oliver_Ethan_Richardson1;~Spencer_J_Peters1;~Joseph_Halpern1", "aff": "Cornell University;Cornell University;Cornell University", "aff_domain": "cornell.edu;cornell.edu;cornell.edu", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nrichardson2024qualitative,\ntitle={Qualitative Mechanism Independence},\nauthor={Oliver Ethan Richardson and Spencer J Peters and Joseph Halpern},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=RE5LSV8QYH}\n}", "github": "", "reviewers": "3P8H;b6Hm;wE8z;91DJ", "pdf_size": 553911, "rating": "4;7;7;7", "confidence": "4;3;4;2", "soundness": "2;4;3;3", "novelty": "1;3;3;3", "presentation": "1;3;2;2", "wc_summary": "17;231;245;32", "wc_strengths": "26;74;67;140", "wc_weaknesses": "97;250;142;261", "wc_questions": "144;18;1;199", "wc_limitations": "20;8;1;22", "wc_review": "304;581;456;654", "wc_reply_reviewers": "100;55;0;35", "wc_reply_authors": "806;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "3;1;1;1", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 131.25, 106.99620320366513 ], "wc_strengths_avg": [ 76.75, 40.861809798392436 ], "wc_weaknesses_avg": [ 187.5, 69.94462095114963 ], "wc_questions_avg": [ 90.5, 83.5179621398894 ], "wc_limitations_avg": [ 12.75, 8.642193008721803 ], "wc_review_avg": [ 498.75, 132.87470602037092 ], "wc_reply_reviewers_avg": [ 47.5, 36.1420807370024 ], "wc_reply_authors_avg": [ 201.5, 349.00823772512877 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1754095327681700123&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "cornell.edu;cornell.edu;cornell.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Cornell University", "aff_unique_dep": "", "aff_unique_url": "https://www.cornell.edu", "aff_unique_abbr": "Cornell", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Your Diffusion Model is Secretly a Noise Classifier and Benefits from Contrastive Training", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95188", "id": "RE7wPI4vfT", "proceeding": "", "pdf": "https://openreview.net/pdf?id=RE7wPI4vfT", "openreview": "https://openreview.net/forum?id=RE7wPI4vfT", "poster": "/media/PosterPDFs/NeurIPS%202024/95188.png?t=1733251741.672145", "project": "", "author_site": "Yunshu Wu, Yingtao Luo, Xianghao Kong, Vagelis Papalexakis, Greg Ver Steeg", "tldr": "", "abstract": "Diffusion models learn to denoise data and the trained denoiser is then used to generate new samples from the data distribution. \nIn this paper, we revisit the diffusion sampling process and identify a fundamental cause of sample quality degradation: the denoiser is poorly estimated in regions that are far Outside Of the training Distribution (OOD), and the sampling process inevitably evaluates in these OOD regions.\nThis can become problematic for all sampling methods, especially when we move to parallel sampling which requires us to initialize and update the entire sample trajectory of dynamics in parallel, leading to many OOD evaluations. \nTo address this problem, we introduce a new self-supervised training objective that differentiates the levels of noise added to a sample, leading to improved OOD denoising performance. The approach is based on our observation that diffusion models implicitly define a log-likelihood ratio that distinguishes distributions with different amounts of noise, and this expression depends on denoiser performance outside the standard training distribution.\nWe show by diverse experiments that the proposed contrastive diffusion training is effective for both sequential and parallel settings, and it improves the performance and speed of parallel samplers significantly. Code for our paper can be found at https://github.com/yunshuwu/ContrastiveDiffusionLoss", "keywords": "Diffusion", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Yunshu Wu;Yingtao Luo;Xianghao Kong;Evangelos E. Papalexakis;Greg Ver Steeg", "authorids": "~Yunshu_Wu1;~Yingtao_Luo1;~Xianghao_Kong1;~Evangelos_E._Papalexakis1;~Greg_Ver_Steeg1", "gender": "F;;M;M;M", "homepage": ";https://yingtaoluo.github.io/;;http://www.cs.ucr.edu/~epapalex;https://profiles.ucr.edu/app/home/profile/gregoryv", "dblp": ";278/2956;;48/9024;82/9058", "google_scholar": ";g_MmNEoAAAAJ;IMxyo-YAAAAJ;https://scholar.google.com.tw/citations?user=2P1kinAAAAAJ;goLucoIAAAAJ", "orcid": ";0000-0003-1794-3657;;0000-0002-3411-8483;0000-0002-0793-141X", "linkedin": "yunshu-wu-182812191/;;;;", "or_profile": "~Yunshu_Wu1;~Yingtao_Luo1;~Xianghao_Kong1;~Evangelos_E._Papalexakis1;~Greg_Ver_Steeg1", "aff": "University of California, Riverside;Carnegie Mellon University;University of California, Riverside;University of California, Riverside;USC/ISI", "aff_domain": "ucr.edu;andrew.cmu.edu;cs.ucr.edu;ucr.edu;isi.edu", "position": "PhD student;PhD student;PhD student;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nwu2024your,\ntitle={Your Diffusion Model is Secretly a Noise Classifier and Benefits from Contrastive Training},\nauthor={Yunshu Wu and Yingtao Luo and Xianghao Kong and Evangelos E. Papalexakis and Greg Ver Steeg},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=RE7wPI4vfT}\n}", "github": "", "reviewers": "XUix;Y79y;M92h", "pdf_size": 4150884, "rating": "3;5;7", "confidence": "3;3;3", "soundness": "2;3;3", "novelty": "2;2;3", "presentation": "3;3;2", "wc_summary": "77;77;66", "wc_strengths": "53;57;33", "wc_weaknesses": "156;117;178", "wc_questions": "65;144;17", "wc_limitations": "1;9;13", "wc_review": "352;404;307", "wc_reply_reviewers": "0;12;36", "wc_reply_authors": "55;15;55", "reply_reviewers": "0;1;1", "reply_authors": "2;2;2", "rating_avg": [ 5.0, 1.632993161855452 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 73.33333333333333, 5.185449728701348 ], "wc_strengths_avg": [ 47.666666666666664, 10.498677165349081 ], "wc_weaknesses_avg": [ 150.33333333333334, 25.223445883190152 ], "wc_questions_avg": [ 75.33333333333333, 52.359865885576475 ], "wc_limitations_avg": [ 7.666666666666667, 4.988876515698588 ], "wc_review_avg": [ 354.3333333333333, 39.634440691006205 ], "wc_reply_reviewers_avg": [ 16.0, 14.966629547095765 ], "wc_reply_authors_avg": [ 41.666666666666664, 18.856180831641264 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7523394684463281374&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 4, "email": "ucr.edu;andrew.cmu.edu;cs.ucr.edu;ucr.edu;isi.edu", "author_num": 5, "aff_unique_index": "0;1;0;0;2", "aff_unique_norm": "University of California, Riverside;Carnegie Mellon University;University of Southern California", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ucr.edu;https://www.cmu.edu;https://isi.usc.edu", "aff_unique_abbr": "UCR;CMU;USC", "aff_campus_unique_index": "0;0;0;2", "aff_campus_unique": "Riverside;;ISI", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Trading Place for Space: Increasing Location Resolution Reduces Contextual Capacity in Hippocampal Codes", "status": "Oral", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95187", "id": "REIK4SZMJt", "proceeding": "", "pdf": "https://openreview.net/pdf?id=REIK4SZMJt", "openreview": "https://openreview.net/forum?id=REIK4SZMJt", "poster": "", "project": "", "author_site": "Spencer Rooke, Zhaoze Wang, Ronald Di Tullio, Vijay Balasubramanian", "tldr": "", "abstract": "Many animals learn cognitive maps of their environment - a simultaneous representation of context, experience, and position. Place cells in the hippocampus, named for their explicit encoding of position, are believed to be a neural substrate of these maps, with place cell \"remapping\" explaining how this system can represent different contexts. Briefly, place cells alter their firing properties, or \"remap\", in response to changes in experiential or sensory cues. Substantial sensory changes, produced, e.g., by moving between environments, cause large subpopulations of place cells to change their tuning entirely. While many studies have looked at the physiological basis of remapping, we lack explicit calculations of how the contextual capacity of the place cell system changes as a function of place field firing properties. Here, we propose a geometric approach to understanding population level activity of place cells. Using known firing field statistics, we investigate how changes to place cell firing properties affect the distances between representations of different environments within firing rate space. Using this approach, we find that the number of contexts storable by the hippocampus grows exponentially with the number of place cells, and calculate this exponent for environments of different sizes. We identify a fundamental trade-off between high resolution encoding of position and the number of storable contexts. This trade-off is tuned by place cell width, which might explain the change in firing field scale along the dorsal-ventral axis of the hippocampus. We demonstrate that clustering of place cells near likely points of confusion, such as boundaries, increases the contextual capacity of the place system within our framework and conclude by discussing how our geometric approach could be extended to include other cell types and abstract spaces.", "keywords": "Neuroscience;Neural Coding;Memory", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "", "author": "Spencer Rooke;Zhaoze Wang;Ronald W Di Tullio;Vijay Balasubramanian", "authorids": "~Spencer_Rooke1;~Zhaoze_Wang2;~Ronald_W_Di_Tullio1;~Vijay_Balasubramanian2", "gender": "M;;M;M", "homepage": "http://spencer.roo.ke;;;https://www.sas.upenn.edu/~vbalasub", "dblp": ";;;", "google_scholar": ";;https://scholar.google.com/citations?hl=en;ltj3BwwAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Spencer_Rooke1;~Zhaoze_Wang2;~Ronald_W_Di_Tullio1;~Vijay_Balasubramanian2", "aff": "University of Pennsylvania;;University of Pennsylvania;University of Pennsylvania", "aff_domain": "upenn.edu;;upenn.edu;upenn.edu", "position": "PhD student;;Postdoc;Full Professor", "bibtex": "@inproceedings{\nrooke2024trading,\ntitle={Trading Place for Space: Increasing Location Resolution Reduces Contextual Capacity in Hippocampal Codes},\nauthor={Spencer Rooke and Zhaoze Wang and Ronald W Di Tullio and Vijay Balasubramanian},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=REIK4SZMJt}\n}", "github": "", "reviewers": "FBYZ;V9KM;DtE5", "pdf_size": 23604064, "rating": "6;7;8", "confidence": "4;2;4", "soundness": "3;3;4", "novelty": "3;2;4", "presentation": "3;3;4", "wc_summary": "84;109;82", "wc_strengths": "46;53;83", "wc_weaknesses": "23;66;478", "wc_questions": "2;147;74", "wc_limitations": "2;16;16", "wc_review": "157;391;733", "wc_reply_reviewers": "0;57;28", "wc_reply_authors": "0;22;18", "reply_reviewers": "0;1;1", "reply_authors": "1;2;2", "rating_avg": [ 7.0, 0.816496580927726 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 91.66666666666667, 12.283683848458853 ], "wc_strengths_avg": [ 60.666666666666664, 16.048537489614297 ], "wc_weaknesses_avg": [ 189.0, 205.1064764132685 ], "wc_questions_avg": [ 74.33333333333333, 59.19647136630884 ], "wc_limitations_avg": [ 11.333333333333334, 6.599663291074444 ], "wc_review_avg": [ 427.0, 236.5248401331243 ], "wc_reply_reviewers_avg": [ 28.333333333333332, 23.27134623427608 ], "wc_reply_authors_avg": [ 13.333333333333334, 9.568466729604882 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:LM9pcuS0t2EJ:scholar.google.com/&scioq=Trading+Place+for+Space:+Increasing+Location+Resolution+Reduces+Contextual+Capacity+in+Hippocampal+Codes&hl=en&as_sdt=0,5", "gs_version_total": 6, "email": "upenn.edu;;upenn.edu;upenn.edu", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Pennsylvania", "aff_unique_dep": "", "aff_unique_url": "https://www.upenn.edu", "aff_unique_abbr": "UPenn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Sample-efficient Bayesian Optimisation Using Known Invariances", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95186", "id": "RERls4Opnm", "proceeding": "", "pdf": "https://openreview.net/pdf?id=RERls4Opnm", "openreview": "https://openreview.net/forum?id=RERls4Opnm", "poster": "", "project": "", "author_site": "Theodore Brown, Alexandru Cioba, Ilija Bogunovic", "tldr": "", "abstract": "Bayesian optimisation (BO) is a powerful framework for global optimisation of costly functions, using predictions from Gaussian process models (GPs). In this work, we apply BO to functions that exhibit invariance to a known group of transformations. We show that vanilla and constrained BO algorithms are inefficient when optimising such invariant objectives, and provide a method for incorporating group invariances into the kernel of the GP to produce invariance-aware algorithms that achieve significant improvements in sample efficiency. We derive a bound on the maximum information gain of these invariant kernels, and provide novel upper and lower bounds on the number of observations required for invariance-aware BO algorithms to achieve $\\epsilon$-optimality. We demonstrate our method's improved performance on a range of synthetic invariant and quasi-invariant functions. We also apply our method in the case where only some of the invariance is incorporated into the kernel, and find that these kernels achieve similar gains in sample efficiency at significantly reduced computational cost. Finally, we use invariant BO to design a current drive system for a nuclear fusion reactor, finding a high-performance solution where non-invariant methods failed.", "keywords": "Bayesian optimisation;bandit optimisation;Gaussian processes;kernel methods;groups;invariance;transformations;sample efficiency", "primary_area": "bandits", "supplementary_material": "", "author": "Theodore Brown;Alexandru Cioba;Ilija Bogunovic", "authorids": "~Theodore_Brown2;~Alexandru_Cioba1;~Ilija_Bogunovic2", "gender": "M;M;M", "homepage": "https://theobrown.uk/;;http://ilijabogunovic.com/", "dblp": ";288/0091.html;142/2725", "google_scholar": "m4LY31gAAAAJ;;xMvt3NEAAAAJ", "orcid": "0009-0009-2378-8965;;", "linkedin": "theo-brown-uk/;alexandru-cioba-aa8aa9161/;", "or_profile": "~Theodore_Brown2;~Alexandru_Cioba1;~Ilija_Bogunovic1", "aff": "UK Atomic Energy Authority;Mediatek Research;Swiss Federal Institute of Technology", "aff_domain": "ukaea.uk;mtkresearch.com;ethz.ch", "position": "Researcher;Researcher;Postdoc", "bibtex": "@inproceedings{\nbrown2024sampleefficient,\ntitle={Sample-efficient Bayesian Optimisation Using Known Invariances},\nauthor={Theodore Brown and Alexandru Cioba and Ilija Bogunovic},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=RERls4Opnm}\n}", "github": "", "reviewers": "eKC1;pMXJ;5kTb;8csK", "pdf_size": 5574350, "rating": "6;6;6;7", "confidence": "2;3;3;3", "soundness": "3;3;3;3", "novelty": "2;3;2;3", "presentation": "3;3;3;3", "wc_summary": "439;68;118;72", "wc_strengths": "136;37;66;51", "wc_weaknesses": "72;331;226;26", "wc_questions": "68;9;150;49", "wc_limitations": "6;4;25;27", "wc_review": "721;449;585;225", "wc_reply_reviewers": "133;21;90;16", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 174.25, 154.110958403353 ], "wc_strengths_avg": [ 72.5, 38.069016273079605 ], "wc_weaknesses_avg": [ 163.75, 121.69711377021231 ], "wc_questions_avg": [ 69.0, 51.38579570270368 ], "wc_limitations_avg": [ 15.5, 10.547511554864494 ], "wc_review_avg": [ 495.0, 183.16113124787148 ], "wc_reply_reviewers_avg": [ 65.0, 48.95406009719725 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8740237994881266802&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "ukaea.uk;mtkresearch.com;ethz.ch", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "UK Atomic Energy Authority;MediaTek Inc.;Swiss Federal Institute of Technology", "aff_unique_dep": ";Research;", "aff_unique_url": "https://www.ukaea.ukri.org;https://www.mediatek.com/;https://www.ethz.ch", "aff_unique_abbr": "UKAEA;MediaTek;ETH Zurich", "aff_campus_unique_index": "1", "aff_campus_unique": ";Taiwan", "aff_country_unique_index": "0;1;2", "aff_country_unique": "United Kingdom;China;Switzerland" }, { "title": "What Factors Affect Multi-Modal In-Context Learning? An In-Depth Exploration", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95185", "id": "REVdYKGcfb", "proceeding": "", "pdf": "https://openreview.net/pdf?id=REVdYKGcfb", "openreview": "https://openreview.net/forum?id=REVdYKGcfb", "poster": "/media/PosterPDFs/NeurIPS%202024/95185.png?t=1731419919.028298", "project": "", "author_site": "Libo Qin, Qiguang Chen, Hao Fei, Zhi Chen, Min Li, Wanxiang Che", "tldr": "", "abstract": "Recently, rapid advancements in Multi-Modal In-Context Learning (MM-ICL) have achieved notable success, which is capable of achieving superior performance across various tasks without requiring additional parameter tuning. However, the underlying rules for the effectiveness of MM-ICL remain under-explored. To fill this gap, this work aims to investigate the research question: \"_What factors affect the performance of MM-ICL?_\" To this end, we investigate extensive experiments on the three core steps of MM-ICL including demonstration retrieval, demonstration ordering, and prompt construction using 6 vision large language models and 20 strategies. Our findings highlight (1) the necessity of a multi-modal retriever for demonstration retrieval, (2) the importance of intra-demonstration ordering over inter-demonstration ordering, and (3) the enhancement of task comprehension through introductory instructions in prompts. We hope this study can serve as a foundational guide for optimizing MM-ICL strategies in future research.", "keywords": "Multi-modal In-Context Learning;Demonstration Retrieval;Demonstration Ordering;In-Context Learning", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Libo Qin;Qiguang Chen;Hao Fei;Zhi Chen;Min Li;Wanxiang Che", "authorids": "~Libo_Qin1;~Qiguang_Chen1;~Hao_Fei1;~Zhi_Chen1;~Min_Li10;~Wanxiang_Che1", "gender": ";M;M;M;F;M", "homepage": ";https://scholar.google.com/citations?user=8j8AfF0AAAAJ;https://haofei.vip/;https://donmaclean7.github.io/;http://bioinformatics.csu.edu.cn/limin/index_en.html;http://ir.hit.edu.cn/~car/", "dblp": ";292/9953;81/3569-1;05/1539-6;;https://dblp.uni-trier.de/pers/hd/c/Che:Wanxiang", "google_scholar": ";8j8AfF0AAAAJ;YGDX46AAAAAJ;5pPm6FEAAAAJ;w47WJE4AAAAJ;SVlQ6IEAAAAJ", "orcid": ";;0000-0003-3026-6347;0000-0003-4180-8455;;", "linkedin": ";;;;;", "or_profile": "~Libo_Qin1;~Qiguang_Chen1;~Hao_Fei1;~Zhi_Chen1;~Min_Li10;~Wanxiang_Che1", "aff": ";Harbin Institute of Technology;National University of Singapore;Shanghai AI Laboratory;Central South University;Harbin Institute of Technology", "aff_domain": ";hit.edu.cn;nus.edu.sg;pjlab.org.cn;csu.edu.cn;hit.edu.cn", "position": ";PhD student;Postdoc;Researcher;Full Professor;Full Professor", "bibtex": "@inproceedings{\nqin2024what,\ntitle={What Factors Affect Multi-Modal In-Context Learning? An In-Depth Exploration},\nauthor={Libo Qin and Qiguang Chen and Hao Fei and Zhi Chen and Min Li and Wanxiang Che},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=REVdYKGcfb}\n}", "github": "", "reviewers": "fN5J;Pm2y;Wrbn;jbhA", "pdf_size": 4346101, "rating": "4;5;7;8", "confidence": "4;3;4;5", "soundness": "1;3;4;3", "novelty": "2;2;3;3", "presentation": "2;3;3;3", "wc_summary": "40;85;67;52", "wc_strengths": "22;53;61;94", "wc_weaknesses": "482;233;51;62", "wc_questions": "5;92;40;3", "wc_limitations": "7;16;24;3", "wc_review": "556;479;243;214", "wc_reply_reviewers": "622;11;124;90", "wc_reply_authors": "1124;51;48;44", "reply_reviewers": "3;1;1;1", "reply_authors": "5;2;2;2", "rating_avg": [ 6.0, 1.5811388300841898 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 1.0897247358851685 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 61.0, 16.837458240482736 ], "wc_strengths_avg": [ 57.5, 25.617376914898998 ], "wc_weaknesses_avg": [ 207.0, 174.4004013756849 ], "wc_questions_avg": [ 35.0, 36.04857833535187 ], "wc_limitations_avg": [ 12.5, 8.139410298049853 ], "wc_review_avg": [ 373.0, 147.3991180434944 ], "wc_reply_reviewers_avg": [ 211.75, 240.37925763260023 ], "wc_reply_authors_avg": [ 316.75, 466.0726204144586 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.670820393249937, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5562177255924098525&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": ";hit.edu.cn;nus.edu.sg;pjlab.org.cn;csu.edu.cn;hit.edu.cn", "author_num": 6, "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "Harbin Institute of Technology;National University of Singapore;Shanghai AI Laboratory;Central South University", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.hit.edu.cn/;https://www.nus.edu.sg;https://www.shanghai-ai-lab.com;https://www.csu.edu.cn", "aff_unique_abbr": "HIT;NUS;SAIL;CSU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Harbin;", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "China;Singapore" }, { "id": "RGnjY6l2HT", "title": "UniEdit: A Unified Tuning-Free Framework for Video Motion and Appearance Editing", "track": "main", "status": "Reject", "tldr": "", "abstract": "Recent advances in text-guided video editing have showcased promising results in appearance editing (e.g., stylization). However, video motion editing in the temporal dimension (e.g., from eating to waving), which distinguishes video editing from image editing, is underexplored. In this work, we present UniEdit, a tuning-free framework that supports both video motion and appearance editing by harnessing the power of a pre-trained text-to-video generator within an inversion-then-generation framework.\nTo realize motion editing while preserving source video content, based on the insights that temporal and spatial self-attention layers encode inter-frame and intra-frame dependency respectively, we introduce auxiliary motion-reference and reconstruction branches to produce text-guided motion and source features respectively. The obtained features are then injected into the main editing path via temporal and spatial self-attention layers. Extensive experiments demonstrate that UniEdit covers video motion editing and various appearance editing scenarios, and surpasses the state-of-the-art methods.", "keywords": "Video Editing;Diffusion Model", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/c91c414302361a6622f4f51289b62a9fbe58fa78.zip", "author": "Jianhong Bai;Tianyu He;Yuchi Wang;Junliang Guo;Haoji Hu;Zuozhu Liu;Jiang Bian", "authorids": "~Jianhong_Bai2;~Tianyu_He1;~Yuchi_Wang1;~Junliang_Guo1;~Haoji_Hu1;~Zuozhu_Liu1;~Jiang_Bian1", "gender": "M;M;M;M;M;M;M", "homepage": "https://jianhongbai.github.io/;https://www.microsoft.com/en-us/research/people/tianyuhe/;https://wangyuchi369.github.io/;https://leoguojl.me/;https://person.zju.edu.cn/huhaoji;https://person.zju.edu.cn/en/lzz;https://sites.google.com/view/jiangbian", "dblp": "349/0391;198/4010;358/5700;209/9674;65/11145;173/9297;09/851-2.html", "google_scholar": "U926UgYAAAAJ;P08KU1YAAAAJ;RxuU_0YAAAAJ;https://scholar.google.com.sg/citations?user=S88C9ewAAAAJ;https://scholar.google.com/citations?hl=zh-CN;h602wLIAAAAJ;pZBEnY8AAAAJ", "orcid": "0000-0002-3121-7259;0000-0002-4828-3228;;0000-0001-8360-5483;0000-0001-6048-6549;0000-0002-7816-502X;0000-0002-9472-600X", "linkedin": ";;;;;;jbian/", "or_profile": "~Jianhong_Bai2;~Tianyu_He1;~Yuchi_Wang1;~Junliang_Guo1;~Haoji_Hu1;~Zuozhu_Liu1;~Jiang_Bian1", "aff": "Zhejiang University;Microsoft Research Asia;Peking University;Microsoft;Zhejiang University;Zhejiang University;Microsoft", "aff_domain": "zju.edu.cn;microsoft.com;pku.edu.cn;microsoft.com;zju.edu.cn;zju.edu.cn;microsoft.com", "position": "PhD student;Researcher;MS student;Researcher;Associate Professor;Assistant Professor;Partner Research Manager", "bibtex": "@misc{\nanonymous2024uniedit,\ntitle={UniEdit: A Unified Tuning-Free Framework for Video Motion and Appearance Editing},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=RGnjY6l2HT}\n}", "github": "", "project": "", "reviewers": "t6q7;UJWt;rXMa", "site": "https://openreview.net/forum?id=RGnjY6l2HT", "pdf_size": 46572785, "rating": "5;5;6", "confidence": "1;4;2", "soundness": "3;3;2", "novelty": "4;2;3", "presentation": "3;3;3", "wc_summary": "91;101;57", "wc_strengths": "39;12;80", "wc_weaknesses": "128;138;15", "wc_questions": "16;10;48", "wc_limitations": "34;9;10", "wc_review": "308;270;210", "wc_reply_reviewers": "0;91;22", "wc_reply_authors": "0;27;28", "reply_reviewers": "0;1;1", "reply_authors": "1;2;2", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 2.3333333333333335, 1.247219128924647 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 83.0, 18.83259585576738 ], "wc_strengths_avg": [ 43.666666666666664, 27.956315128349008 ], "wc_weaknesses_avg": [ 93.66666666666667, 55.77534301901593 ], "wc_questions_avg": [ 24.666666666666668, 16.679994670929073 ], "wc_limitations_avg": [ 17.666666666666668, 11.55662388223981 ], "wc_review_avg": [ 262.6666666666667, 40.34297405441938 ], "wc_reply_reviewers_avg": [ 37.666666666666664, 38.76711091748892 ], "wc_reply_authors_avg": [ 18.333333333333332, 12.970050972229146 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.1889822365046136, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5300330164572410388&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;2;1;0;0;1", "aff_unique_norm": "Zhejiang University;Microsoft;Peking University", "aff_unique_dep": ";Research;", "aff_unique_url": "https://www.zju.edu.cn;https://www.microsoft.com/en-us/research/group/asia;http://www.pku.edu.cn", "aff_unique_abbr": "ZJU;MSR Asia;Peking U", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;0;0;1;0;0;1", "aff_country_unique": "China;United States" }, { "title": "YouDream: Generating Anatomically Controllable Consistent Text-to-3D Animals", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95184", "id": "RH7tfqhiZY", "proceeding": "", "pdf": "https://openreview.net/pdf?id=RH7tfqhiZY", "openreview": "https://openreview.net/forum?id=RH7tfqhiZY", "poster": "/media/PosterPDFs/NeurIPS%202024/95184.png?t=1733118069.4302685", "project": "", "author_site": "Sandeep Mishra, Oindrila Saha, Alan Bovik", "tldr": "", "abstract": "3D generation guided by text-to-image diffusion models enables the creation of visually compelling assets. However previous methods explore generation based on image or text. The boundaries of creativity are limited by what can be expressed through words or the images that can be sourced. We present YouDream, a method to generate high-quality anatomically controllable animals. YouDream is guided using a text-to-image diffusion model controlled by 2D views of a 3D pose prior. Our method is capable of generating novel imaginary animals that previous text-to-3D generative methods are unable to create. Additionally, our method can preserve anatomic consistency in the generated animals, an area where prior approaches often struggle. Moreover, we design a fully automated pipeline for generating commonly observed animals. To circumvent the need for human intervention to create a 3D pose, we propose a multi-agent LLM that adapts poses from a limited library of animal 3D poses to represent the desired animal. A user study conducted on the outcomes of YouDream demonstrates the preference of the animal models generated by our method over others. Visualizations and code are available at https://youdream3d.github.io/.", "keywords": "3D Animal generation;Text-to-3D;Diffusion models", "primary_area": "generative_models", "supplementary_material": "", "author": "Sandeep Mishra;Oindrila Saha;Alan Bovik", "authorids": "~Sandeep_Mishra2;~Oindrila_Saha4;~Alan_Bovik1", "gender": "M;F;M", "homepage": "https://sandeep-sm.github.io/;https://oindrilasaha.github.io/;https://www.ece.utexas.edu/people/faculty/alan-bovik", "dblp": ";;b/ACBovik", "google_scholar": "r3tgxpAAAAAJ;kyD0qbIAAAAJ;https://scholar.google.com/scholar?hl=en", "orcid": "0000-0002-2698-2028;;", "linkedin": "sandeep-mishrakgp;;alan-bovik-8989b5205/", "or_profile": "~Sandeep_Mishra2;~Oindrila_Saha4;~Alan_Bovik1", "aff": "University of Texas at Austin;University of Massachusetts at Amherst;University of Texas at Austin", "aff_domain": "utexas.edu;umass.edu;utexas.edu", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nmishra2024youdream,\ntitle={YouDream: Generating Anatomically Controllable Consistent Text-to-3D Animals},\nauthor={Sandeep Mishra and Oindrila Saha and Alan Bovik},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=RH7tfqhiZY}\n}", "github": "", "reviewers": "4gaH;PBQU;127z;rnqi", "pdf_size": 41670246, "rating": "5;5;5;6", "confidence": "4;3;5;3", "soundness": "2;3;3;3", "novelty": "2;2;2;3", "presentation": "1;3;3;3", "wc_summary": "33;53;96;109", "wc_strengths": "36;42;42;115", "wc_weaknesses": "192;152;121;125", "wc_questions": "4;38;5;165", "wc_limitations": "4;1;7;15", "wc_review": "269;286;271;529", "wc_reply_reviewers": "49;0;138;31", "wc_reply_authors": "158;0;1012;51", "reply_reviewers": "1;0;2;1", "reply_authors": "2;1;3;2", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 72.75, 30.922281610515096 ], "wc_strengths_avg": [ 58.75, 32.5681976781031 ], "wc_weaknesses_avg": [ 147.5, 28.324018076537094 ], "wc_questions_avg": [ 53.0, 66.09462913126906 ], "wc_limitations_avg": [ 6.75, 5.2141634036535525 ], "wc_review_avg": [ 338.75, 110.03720961565683 ], "wc_reply_reviewers_avg": [ 54.5, 51.29571132170798 ], "wc_reply_authors_avg": [ 305.25, 412.0069022480085 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:R6_CYlryHSAJ:scholar.google.com/&scioq=YouDream:+Generating+Anatomically+Controllable+Consistent+Text-to-3D+Animals&hl=en&as_sdt=0,44", "gs_version_total": 4, "email": "utexas.edu;umass.edu;utexas.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Texas at Austin;University of Massachusetts Amherst", "aff_unique_dep": ";", "aff_unique_url": "https://www.utexas.edu;https://www.umass.edu", "aff_unique_abbr": "UT Austin;UMass Amherst", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Austin;Amherst", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "FastSurvival: Hidden Computational Blessings in Training Cox Proportional Hazards Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95183", "id": "RHQbxlhzhm", "proceeding": "", "pdf": "https://openreview.net/pdf?id=RHQbxlhzhm", "openreview": "https://openreview.net/forum?id=RHQbxlhzhm", "poster": "/media/PosterPDFs/NeurIPS%202024/95183.png?t=1731727410.258186", "project": "", "author_site": "Jiachang Liu, Rui Zhang, Cynthia Rudin", "tldr": "", "abstract": "Survival analysis is an important research topic with applications in healthcare, business, and manufacturing. One essential tool in this area is the Cox proportional hazards (CPH) model, which is widely used for its interpretability, flexibility, and predictive performance. However, for modern data science challenges such as high dimensionality (both $n$ and $p$) and high feature correlations, current algorithms to train the CPH model have drawbacks, preventing us from using the CPH model at its full potential. The root cause is that the current algorithms, based on the Newton method, have trouble converging due to vanishing second order derivatives when outside the local region of the minimizer. To circumvent this problem, we propose new optimization methods by constructing and minimizing surrogate functions that exploit hidden mathematical structures of the CPH model. Our new methods are easy to implement and ensure monotonic loss decrease and global convergence. Empirically, we verify the computational efficiency of our methods. As a direct application, we show how our optimization methods can be used to solve the cardinality-constrained CPH problem, producing very sparse high-quality models that were not previously practical to construct. We list several extensions that our breakthrough enables, including optimization opportunities, theoretical questions on CPH's mathematical structure, as well as other CPH-related applications.", "keywords": "survival analysis;cox proportional hazards model;optimization;first-order method;sparse learning", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Jiachang Liu;Rui Zhang;Cynthia Rudin", "authorids": "~Jiachang_Liu1;~Rui_Zhang25;~Cynthia_Rudin1", "gender": "M;;", "homepage": "https://jiachangliu.github.io/;https://ruizhang1996.github.io/;", "dblp": "194/1565-1;60/2536;", "google_scholar": "W_Zyr0AAAAAJ;FiPstXYAAAAJ;", "orcid": ";;", "linkedin": ";rzhang9997/;", "or_profile": "~Jiachang_Liu1;~Rui_Zhang25;~Cynthia_Rudin1", "aff": "Duke University;Duke University;", "aff_domain": "duke.edu;duke.edu;", "position": "PhD student;PhD student;", "bibtex": "@inproceedings{\nliu2024fastsurvival,\ntitle={FastSurvival: Hidden Computational Blessings in Training Cox Proportional Hazards Models},\nauthor={Jiachang Liu and Rui Zhang and Cynthia Rudin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=RHQbxlhzhm}\n}", "github": "", "reviewers": "qYft;GRak;EDwF;GBdX", "pdf_size": 3841464, "rating": "4;7;7;9", "confidence": "4;3;3;4", "soundness": "2;4;3;4", "novelty": "2;3;3;3", "presentation": "2;4;3;4", "wc_summary": "67;22;83;38", "wc_strengths": "46;81;82;45", "wc_weaknesses": "663;193;96;11", "wc_questions": "29;1;103;67", "wc_limitations": "14;4;11;21", "wc_review": "819;301;375;182", "wc_reply_reviewers": "278;11;12;10", "wc_reply_authors": "1822;0;0;0", "reply_reviewers": "2;1;1;1", "reply_authors": "3;1;1;1", "rating_avg": [ 6.75, 1.7853571071357126 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 52.5, 23.879907872519105 ], "wc_strengths_avg": [ 63.5, 18.006943105369107 ], "wc_weaknesses_avg": [ 240.75, 252.14715445548856 ], "wc_questions_avg": [ 50.0, 38.535697735995385 ], "wc_limitations_avg": [ 12.5, 6.103277807866851 ], "wc_review_avg": [ 419.25, 240.84681334823594 ], "wc_reply_reviewers_avg": [ 77.75, 115.61655374556015 ], "wc_reply_authors_avg": [ 455.5, 788.9491428476236 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.14002800840280097, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7670799742604335281&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 4, "email": "duke.edu;duke.edu;", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Duke University", "aff_unique_dep": "", "aff_unique_url": "https://www.duke.edu", "aff_unique_abbr": "Duke", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Online Adaptation of Language Models with a Memory of Amortized Contexts", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95182", "id": "RIfgKCknTu", "proceeding": "", "pdf": "https://openreview.net/pdf?id=RIfgKCknTu", "openreview": "https://openreview.net/forum?id=RIfgKCknTu", "poster": "", "project": "", "author_site": "Jihoon Tack, Jaehyung Kim, Eric Mitchell, Jinwoo Shin, Yee Whye Teh, Jonathan Richard Schwarz", "tldr": "", "abstract": "Due to the rapid generation and dissemination of information, large language models (LLMs) quickly run out of date despite enormous development costs. To address the crucial need to keep models updated, online learning has emerged as a critical tool when utilizing LLMs for real-world applications. However, given the ever-expanding corpus of unseen documents and the large parameter space of modern LLMs, efficient adaptation is essential. To address these challenges, we propose Memory of Amortized Contexts (MAC), an efficient and effective online adaptation framework for LLMs with strong knowledge retention. We propose a feature extraction and memory-augmentation approach to compress and extract information from new documents into compact modulations stored in a memory bank. When answering questions, our model attends to and extracts relevant knowledge from this memory bank. To learn informative modulations in an efficient manner, we utilize amortization-based meta-learning, which substitutes an otherwise required optimization process with a single forward pass of the encoder. Subsequently, we learn to choose from and aggregate selected documents into a single modulation by conditioning on the question, allowing us to adapt a frozen language model during test time without requiring further gradient updates. Our experiment demonstrates the superiority of MAC in multiple aspects, including online adaptation performance, time, and memory efficiency. In addition, we show how MAC can be combined with and improve the performance of popular alternatives such as retrieval augmented generations (RAGs). Code is available at: https://github.com/jihoontack/MAC.", "keywords": "Language Model;Online Learning;Continual Learning;Meta-Learning", "primary_area": "online_learning", "supplementary_material": "", "author": "Jihoon Tack;Jaehyung Kim;Eric Mitchell;Jinwoo Shin;Yee Whye Teh;Jonathan Richard Schwarz", "authorids": "~Jihoon_Tack1;~Jaehyung_Kim1;~Eric_Mitchell1;~Jinwoo_Shin1;~Yee_Whye_Teh2;~Jonathan_Richard_Schwarz1", "gender": "M;M;M;M;M;M", "homepage": "https://jihoontack.github.io;https://sites.google.com/view/jaehyungkim;https://ericmitchell.ai;https://sites.google.com/site/mijirim/;https://jonathan-schwarz.github.io;http://csml.stats.ox.ac.uk/people/teh/", "dblp": "267/5487;02/7206-1;238/0419;31/7062;211/7673;88/2483", "google_scholar": "eW8-OT4AAAAJ;https://scholar.google.co.kr/citations?user=6OYOsGsAAAAJ;q77J4fgAAAAJ;https://scholar.google.com.tw/citations?user=m3eDp7kAAAAJ;Efs3XxQAAAAJ;https://scholar.google.co.uk/citations?user=y-nUzMwAAAAJ", "orcid": ";;0000-0002-7487-1744;;;", "linkedin": ";;;;schwarzjonathan/;", "or_profile": "~Jihoon_Tack1;~Jaehyung_Kim1;~Eric_Mitchell1;~Jinwoo_Shin1;~Jonathan_Schwarz1;~Yee_Whye_Teh1", "aff": "Meta FAIR;Carnegie Mellon University;Stanford University;Korea Advanced Institute of Science & Technology;Harvard University;University of Oxford", "aff_domain": "meta.com;andrew.cmu.edu;stanford.edu;kaist.ac.kr;harvard.edu;ox.ac.uk", "position": "Intern;Postdoc;PhD student;Full Professor;Postdoc;Full Professor", "bibtex": "@inproceedings{\ntack2024online,\ntitle={Online Adaptation of Language Models with a Memory of Amortized Contexts},\nauthor={Jihoon Tack and Jaehyung Kim and Eric Mitchell and Jinwoo Shin and Yee Whye Teh and Jonathan Richard Schwarz},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=RIfgKCknTu}\n}", "github": "", "reviewers": "QtU4;j5pf;pwuf;kPwd", "pdf_size": 3118691, "rating": "5;5;6;6", "confidence": "4;4;4;3", "soundness": "3;2;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "29;99;122;99", "wc_strengths": "19;87;90;90", "wc_weaknesses": "369;78;262;83", "wc_questions": "4;88;97;66", "wc_limitations": "7;1;1;1", "wc_review": "428;353;572;339", "wc_reply_reviewers": "76;0;22;10", "wc_reply_authors": "160;0;52;42", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;2;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 87.25, 34.91686555233731 ], "wc_strengths_avg": [ 71.5, 30.335622624235025 ], "wc_weaknesses_avg": [ 198.0, 123.45241998438102 ], "wc_questions_avg": [ 63.75, 36.29307785239494 ], "wc_limitations_avg": [ 2.5, 2.598076211353316 ], "wc_review_avg": [ 423.0, 92.44187362878361 ], "wc_reply_reviewers_avg": [ 27.0, 29.34280150224242 ], "wc_reply_authors_avg": [ 63.5, 59.03177110675234 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10453748419256301030&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 3, "email": "meta.com;andrew.cmu.edu;stanford.edu;kaist.ac.kr;harvard.edu;ox.ac.uk", "author_num": 6, "aff_unique_index": "0;1;2;3;4;5", "aff_unique_norm": "Meta;Carnegie Mellon University;Stanford University;Korea Advanced Institute of Science and Technology;Harvard University;University of Oxford", "aff_unique_dep": "Meta Platforms, Inc.;;;;;", "aff_unique_url": "https://meta.com;https://www.cmu.edu;https://www.stanford.edu;https://www.kaist.ac.kr;https://www.harvard.edu;https://www.ox.ac.uk", "aff_unique_abbr": "Meta;CMU;Stanford;KAIST;Harvard;Oxford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;0;1;0;2", "aff_country_unique": "United States;South Korea;United Kingdom" }, { "title": "Neural Collapse To Multiple Centers For Imbalanced Data", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95181", "id": "RJEC9fZ9Ma", "proceeding": "", "pdf": "https://openreview.net/pdf?id=RJEC9fZ9Ma", "openreview": "https://openreview.net/forum?id=RJEC9fZ9Ma", "poster": "", "project": "", "author_site": "Hongren Yan, Yuhua Qian, Furong Peng, Jiachen Luo, zheqing zhu, Feijiang Li", "tldr": "", "abstract": "Neural Collapse (NC) was a recently discovered phenomenon that the output features and the classifier weights of the neural network converge to optimal geometric structures at the Terminal Phase of Training (TPT) under various losses. However, the relationship between these optimal structures at TPT and the classification performance remains elusive, especially in imbalanced learning. Even though it is noticed that fixing the classifier to an optimal structure can mitigate the minority collapse problem, the performance is still not comparable to the classical imbalanced learning methods with a learnable classifier. In this work, we find that the optimal structure can be designed to represent a better classification rule, and thus achieve better performance. In particular, we justify that, to achieve better classification, the features from the minor classes should align with more directions. This justification then yields a decision rule called the Generalized Classification Rule (GCR) and we also term these directions as the centers of the classes. Then we study the NC under an MSE-type loss via the Unconstrained Features Model (UFM) framework where (1) the features from a class tend to collapse to the mean of the corresponding centers of that class (named Neural Collapse to Multiple Centers (NCMC)) at the global optimum, and (2) the original classifier approximates a surrogate to GCR when NCMC occurs. Based on the analysis, we develop a strategy for determining the number of centers and propose a Cosine Loss function for the fixed classifier that induces NCMC. Our experiments have shown that the Cosine Loss can induce NCMC and has performance on long-tail classification comparable to the classical imbalanced learning methods.", "keywords": "Neural Collapse;Imbalanced Data;Classification Learning;Classification Rule", "primary_area": "learning_theory", "supplementary_material": "/attachment/2c56e8b7de48c5e86702b76f0b913ed4ed85ab3a.zip", "author": "Hongren Yan;Yuhua Qian;Furong Peng;Jiachen Luo;zheqing zhu;Feijiang Li", "authorids": "~Hongren_Yan1;~Yuhua_Qian1;~Furong_Peng1;~Jiachen_Luo1;~zheqing_zhu2;~Feijiang_Li1", "gender": "M;M;;;M;M", "homepage": "https://dblp.org/pid/263/2292.html;http://yuhuaqian.net/;;;https://dig.sxu.edu.cn/xwz/td/gjgcsgcs/94678a4dfd074e779c4b58b119bd7cc0.htm;https://feijiangli.github.io/", "dblp": "263/2292.html;;;;;153/4298.html", "google_scholar": ";;;;;", "orcid": "0009-0002-0984-6670;0000-0001-6772-4247;;;;0000-0003-3730-9602", "linkedin": ";;;;;", "or_profile": "~Hongren_Yan1;~Yuhua_Qian1;~Furong_Peng1;~Jiachen_Luo1;~zheqing_zhu2;~Feijiang_Li1", "aff": "Shanxi University;Shanxi University;;;Shanxi University;Shanxi University", "aff_domain": "sxu.edu.cn;sxu.edu.cn;;;sxu.edu.cn;sxu.edu.cn", "position": "PhD student;Full Professor;;;Researcher;Full Professor", "bibtex": "@inproceedings{\nyan2024neural,\ntitle={Neural Collapse To Multiple Centers For Imbalanced Data},\nauthor={Hongren Yan and Yuhua Qian and Furong Peng and Jiachen Luo and zheqing zhu and Feijiang Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=RJEC9fZ9Ma}\n}", "github": "", "reviewers": "FHuh;sPeJ;vNc6;HFbj", "pdf_size": 2914418, "rating": "5;6;6;7", "confidence": "3;4;3;4", "soundness": "3;3;3;3", "novelty": "3;4;3;4", "presentation": "2;3;2;4", "wc_summary": "78;84;40;110", "wc_strengths": "18;66;44;190", "wc_weaknesses": "82;195;46;156", "wc_questions": "6;2;68;9", "wc_limitations": "29;2;1;2", "wc_review": "213;349;199;467", "wc_reply_reviewers": "35;0;12;10", "wc_reply_authors": "18;5;7;5", "reply_reviewers": "1;0;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 78.0, 25.019992006393608 ], "wc_strengths_avg": [ 79.5, 66.0208300462816 ], "wc_weaknesses_avg": [ 119.75, 58.82335845563393 ], "wc_questions_avg": [ 21.25, 27.105119442644042 ], "wc_limitations_avg": [ 8.5, 11.84271928232701 ], "wc_review_avg": [ 307.0, 109.38921336219582 ], "wc_reply_reviewers_avg": [ 14.25, 12.813567028739499 ], "wc_reply_authors_avg": [ 8.75, 5.402545696243577 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.7071067811865476, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3995942619994851343&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 0, "email": "sxu.edu.cn;sxu.edu.cn;;;sxu.edu.cn;sxu.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Shanxi University", "aff_unique_dep": "", "aff_unique_url": "http://www.sxu.edu.cn", "aff_unique_abbr": "SXU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Improving Generalization of Dynamic Graph Learning via Environment Prompt", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95180", "id": "RJG8ar4wHA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=RJG8ar4wHA", "openreview": "https://openreview.net/forum?id=RJG8ar4wHA", "poster": "/media/PosterPDFs/NeurIPS%202024/95180.png?t=1730381567.469496", "project": "", "author_site": "Kuo Yang, Zhengyang Zhou, Qihe Huang, Limin Li, Yuxuan Liang, Yang Wang", "tldr": "", "abstract": "Out-of-distribution (OOD) generalization issue is a well-known challenge within deep learning tasks. In dynamic graphs, the change of temporal environments is regarded as the main cause of data distribution shift. While numerous OOD studies focusing on environment factors have achieved remarkable performance, they still fail to systematically solve the two issue of environment inference and utilization. In this work, we propose a novel dynamic graph learning model named EpoD based on prompt learning and structural causal model to comprehensively enhance both environment inference and utilization. Inspired by the superior performance of prompt learning in understanding underlying semantic and causal associations, we first design a self-prompted learning mechanism to infer unseen environment factors. We then rethink the role of environment variable within spatio-temporal causal structure model, and introduce a novel causal pathway where dynamic subgraphs serve as mediating variables. The extracted dynamic subgraph can effectively capture the data distribution shift by incorporating the inferred environment variables into the node-wise dependencies. Theoretical discussions and intuitive analysis support the generalizability and interpretability of EpoD. Extensive experiments on seven real-world datasets across domains showcase the superiority of EpoD against baselines, and toy example experiments further verify the powerful interpretability and rationality of our EpoD.", "keywords": "Dynamic graph;spatio-temporal graph learning;out-of-distribution generalization;causal theory;prompt learning;subgraph learning.", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Kuo Yang;Zhengyang Zhou;Qihe Huang;Limin Li;Yuxuan Liang;Yang Wang", "authorids": "~Kuo_Yang2;~Zhengyang_Zhou1;~Qihe_Huang2;~Limin_Li3;~Yuxuan_Liang1;~Yang_Wang32", "gender": "M;M;M;M;M;M", "homepage": ";http://home.ustc.edu.cn/~zzy0929/Home/;;http://di.ustc.edu.cn/;https://yuxuanliang.com;http://staff.ustc.edu.cn/~angyan/", "dblp": ";246/8238;;;183/0977;", "google_scholar": ";dPElQLUAAAAJ;;;n9cODgcAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": "0000-0003-3346-5130;0000-0003-4728-7347;0000-0001-8960-6583;;0000-0003-2817-7337;0000-0002-6079-7053", "linkedin": "https://www.linkedin.cn/incareer/in/kuo-yang-440a241b4;;;;yoshall/;", "or_profile": "~Kuo_Yang2;~Zhengyang_Zhou1;~Qihe_Huang2;~Limin_Li3;~Yuxuan_Liang1;~Yang_Wang32", "aff": "University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;The Hong Kong University of Science and Technology (Guangzhou);University of Science and Technology of China", "aff_domain": "ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;hkust-gz.edu.cn;ustc.edu.cn", "position": "PhD student;Researcher;PhD student;PhD student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nyang2024improving,\ntitle={Improving Generalization of Dynamic Graph Learning via Environment Prompt},\nauthor={Kuo Yang and Zhengyang Zhou and Qihe Huang and Limin Li and Yuxuan Liang and Yang Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=RJG8ar4wHA}\n}", "github": "", "reviewers": "XPEX;YNjr;5ZgN;Zz1E", "pdf_size": 1372546, "rating": "6;7;7;7", "confidence": "3;4;4;4", "soundness": "3;3;3;3", "novelty": "3;4;3;3", "presentation": "2;3;4;3", "wc_summary": "69;93;94;71", "wc_strengths": "11;83;117;234", "wc_weaknesses": "85;55;134;122", "wc_questions": "2;86;6;1", "wc_limitations": "1;27;13;4", "wc_review": "168;344;364;432", "wc_reply_reviewers": "0;0;17;7", "wc_reply_authors": "0;0;57;48", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;2;2", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 81.75, 11.776565713313877 ], "wc_strengths_avg": [ 111.25, 80.54307853565072 ], "wc_weaknesses_avg": [ 99.0, 31.1688947510174 ], "wc_questions_avg": [ 23.75, 35.9887135085432 ], "wc_limitations_avg": [ 11.25, 10.108783309577865 ], "wc_review_avg": [ 327.0, 97.42176348229383 ], "wc_reply_reviewers_avg": [ 6.0, 6.96419413859206 ], "wc_reply_authors_avg": [ 26.25, 26.44215384570629 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11562259208279408037&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "email": "ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;hkust-gz.edu.cn;ustc.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;1;0", "aff_unique_norm": "University of Science and Technology of China;Hong Kong University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "http://www.ustc.edu.cn;https://www.ust.hk", "aff_unique_abbr": "USTC;HKUST", "aff_campus_unique_index": "1", "aff_campus_unique": ";Guangzhou", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "A survey and benchmark of high-dimensional Bayesian optimization of discrete sequences", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97688", "id": "RJHQAcbmpZ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=RJHQAcbmpZ", "openreview": "https://openreview.net/forum?id=RJHQAcbmpZ", "poster": "/media/PosterPDFs/NeurIPS%202024/97688.png?t=1731622417.2173195", "project": "", "author_site": "Miguel Gonz\u00e1lez-Duque, Richard Michael, Simon Bartels, Yevgen Zainchkovskyy, S\u00f8ren Hauberg, Wouter Boomsma", "tldr": "", "abstract": "Optimizing discrete black-box functions is key in several domains, e.g. protein engineering and drug design. Due to the lack of gradient information and the need for sample efficiency, Bayesian optimization is an ideal candidate for these tasks. Several methods for high-dimensional continuous and categorical Bayesian optimization have been proposed recently. However, our survey of the field reveals highly heterogeneous experimental set-ups across methods and technical barriers for the replicability and application of published algorithms to real-world tasks. To address these issues, we develop a unified framework to test a vast array of high-dimensional Bayesian optimization methods and a collection of standardized black-box functions representing real-world application domains in chemistry and biology. These two components of the benchmark are each supported by flexible, scalable, and easily extendable software libraries (poli and poli-baselines), allowing practitioners to readily incorporate new optimization objectives or discrete optimizers. Project website: https://machinelearninglifescience.github.io/hdbo_benchmark.", "keywords": "Bayesian optimization;discrete sequences;discrete optimization;molecular optimization", "primary_area": "", "supplementary_material": "", "author": "Miguel Gonz\u00e1lez-Duque;Richard Michael;Simon Bartels;Yevgen Zainchkovskyy;S\u00f8ren Hauberg;Wouter Boomsma", "authorids": "~Miguel_Gonz\u00e1lez-Duque3;~Richard_Michael1;~Simon_Bartels1;~Yevgen_Zainchkovskyy1;~S\u00f8ren_Hauberg1;~Wouter_Boomsma1", "gender": "M;;M;M;M;M", "homepage": "https://laplaceml.com;;http://github.com/eugene;http://www2.compute.dtu.dk/~sohau/;;https://www.miguelgondu.com/", "dblp": ";180/5936.html;;39/7226;06/5945;244/9609.html", "google_scholar": "-U772ZIAAAAJ;;;https://scholar.google.com/citations?hl=en;EwqU_jsAAAAJ;eje0FAYAAAAJ", "orcid": "0000-0002-6264-7323;;;;0000-0002-8257-3827;", "linkedin": "rimichael/;;;;;", "or_profile": "~Richard_Michael1;~Simon_Bartels1;~Yevgen_Zainchkovskyy1;~S\u00f8ren_Hauberg1;~Wouter_Boomsma1;~Miguel_Gonz\u00e1lez_Duque1", "aff": "University of Copenhagen;Universit\u00e9 de Toulouse;Technical University of Denmark;Technical University of Denmark;University of Copenhagen;Copenhagen University", "aff_domain": "diku.dk;univ-toulouse.fr;dtu.dk;dtu.dk;ku.dk;ku.dk", "position": "PhD student;Postdoc;PhD student;Professor;Full Professor;Researcher", "bibtex": "@inproceedings{\ngonz{\\'a}lez-duque2024a,\ntitle={A survey and benchmark of high-dimensional Bayesian optimization of discrete sequences},\nauthor={Miguel Gonz{\\'a}lez-Duque and Richard Michael and Simon Bartels and Yevgen Zainchkovskyy and S{\\o}ren Hauberg and Wouter Boomsma},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=RJHQAcbmpZ}\n}", "github": "", "reviewers": "1TmH;dDpj;wLu8;eK8S", "pdf_size": 3311683, "rating": "5;7;7;8", "confidence": "3;4;5;4", "wc_summary_and_contributions": "65;51;61;35", "wc_strengths": "56;5;21;44", "wc_improvement": "246;5;769;95", "wc_limitations": "28;9;31;3", "wc_correctness": "25;5;28;7", "wc_clarity": "25;5;59;2", "wc_relation_to_prior_work": "6;5;188;32", "wc_documentation": "20;5;15;2", "wc_additional_feedback": "1;1;1;1", "wc_review": "472;91;1173;221", "wc_reply_reviewers": "0;25;104;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 53.0, 11.575836902790225 ], "wc_strengths_avg": [ 31.5, 19.80530232033836 ], "wc_improvement_avg": [ 278.75, 295.8550109428603 ], "wc_limitations_avg": [ 17.75, 11.986972094736853 ], "wc_correctness_avg": [ 16.25, 10.328964130056798 ], "wc_clarity_avg": [ 22.75, 22.71976012197312 ], "wc_relation_to_prior_work_avg": [ 57.75, 75.97491362285317 ], "wc_documentation_avg": [ 10.5, 7.297259759663212 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 489.25, 417.843496419413 ], "wc_reply_reviewers_avg": [ 32.25, 42.66365549270245 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.6488856845230502, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1695435375889015468&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "diku.dk;univ-toulouse.fr;dtu.dk;dtu.dk;ku.dk;ku.dk", "author_num": 6, "aff_unique_index": "0;1;2;2;0;0", "aff_unique_norm": "University of Copenhagen;Universit\u00e9 de Toulouse;Technical University of Denmark", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ku.dk;https://www.univ-toulouse.fr;https://www.tek.dk", "aff_unique_abbr": "UCPH;UT;DTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0;0", "aff_country_unique": "Denmark;France" }, { "title": "A Careful Examination of Large Language Model Performance on Grade School Arithmetic", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97687", "id": "RJZRhMzZzH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=RJZRhMzZzH", "openreview": "https://openreview.net/forum?id=RJZRhMzZzH", "poster": "/media/PosterPDFs/NeurIPS%202024/97687.png?t=1733515154.3824368", "project": "", "author_site": "Hugh Zhang, Jeff Da, Dean Lee, Vaughn Robinson, Catherine Wu, William Song, Tiffany Zhao, Pranav Raja, Charlotte Zhuang, Dylan Slack, Qin Lyu, Sean Hendryx, Russell Kaplan, Michele Lunati, Summer Yue", "tldr": "", "abstract": "Large language models (LLMs) have achieved impressive success on many benchmarks for mathematical reasoning.\nHowever, there is growing concern that some of this performance actually reflects dataset contamination, where data closely resembling benchmark questions leaks into the training data, instead of true reasoning ability.\nTo investigate this claim rigorously, we commission Grade School Math 1000 (GSM1k). GSM1k is designed to mirror the style and complexity of the established GSM8k benchmark,\nthe gold standard for measuring elementary mathematical reasoning. We ensure that the two benchmarks are comparable across important metrics such as human solve rates, number of steps in solution, answer magnitude, and more.\nWhen evaluating leading open- and closed-source LLMs on GSM1k, we observe accuracy drops of up to 8%, with several families of models showing evidence of systematic overfitting across almost all model sizes.\nFurther analysis suggests a positive relationship (Spearman's r^2=0.36) between a model's probability of generating an example from GSM8k and its performance gap between GSM8k and GSM1k, suggesting that some models may have partially memorized GSM8k.\nNevertheless, many models, especially those on the frontier, show minimal signs of overfitting, and all models broadly demonstrate generalization to novel math problems guaranteed to not be in their training data.", "keywords": "math;evals;data contamination;overfitting;gsm8k;reasoning", "primary_area": "", "supplementary_material": "/attachment/7a21a56b561fe85d1d6b126218baadd4468c347c.zip", "author": "Hugh Zhang;Jeff Da;Dean Lee;Vaughn Robinson;Catherine Wu;William Song;Tiffany Zhao;Pranav Vishnu Raja;Charlotte Zhuang;Dylan Z Slack;Qin Lyu;Sean M. Hendryx;Russell Kaplan;Michele Lunati;Summer Yue", "authorids": "~Hugh_Zhang1;~Jeff_Da1;~Dean_Lee1;~Vaughn_Robinson1;~Catherine_Wu1;~William_Song2;~Tiffany_Zhao1;~Pranav_Vishnu_Raja1;~Charlotte_Zhuang1;~Dylan_Z_Slack1;~Qin_Lyu1;~Sean_M._Hendryx1;~Russell_Kaplan1;~Michele_Lunati1;~Summer_Yue2", "gender": ";;;M;F;M;F;M;F;M;M;M;;M;F", "homepage": ";https://www.jeffda.com;;;;;;https://scholar.google.com/citations?user=857m3Y4AAAAJ&hl=en;;https://dylanslacks.website;;https://smhendryx.github.io/;;;", "dblp": "239/4076;;;;;;;;;https://dblp.org/pers/s/Slack:Dylan.html;;;199/1989;;", "google_scholar": ";;;;;;;857m3Y4AAAAJ;;pyhz-gUAAAAJ;fdrfM2IAAAAJ;https://scholar.google.com/citations?hl=en;;;", "orcid": ";;;;;;;;;;;;;;", "linkedin": ";;;vaughn-r-48b086182/;catherine-w;william-song39/;tiffany-zhao/;;charlotte-zhuang/;;;sean-hendryx-43894056/;russelljkaplan/;mikelunati/;yutingyue/", "or_profile": "~Hugh_Zhang1;~Jeff_Da1;~Dean_Lee1;~Vaughn_Robinson1;~Catherine_Wu1;~William_Song2;~Tiffany_Zhao1;~Pranav_Vishnu_Raja1;~Charlotte_Zhuang1;~Dylan_Z_Slack1;~Qin_Lyu1;~Sean_M._Hendryx1;~Russell_Kaplan1;~Michele_Lunati1;~Summer_Yue2", "aff": "Harvard University;Department of Computer Science, University of Washington;;Scale AI;Scale AI;Scale AI;Scale AI;Scale AI;Scale AI;University of California, Irvine;Scale AI;Scale AI;Scale AI;Scale AI;Scale AI", "aff_domain": "harvard.edu;cs.washington.edu;;scale.ai;scale.com;scale.com;scale.ai;scale.com;scale.com;uci.edu;scale.ai;scale.com;scale.com;scale.com;scale.ai", "position": "PhD student;Undergrad student;;Operations;Researcher;Researcher;Researcher;Researcher;Researcher;PhD student;Researcher;Engineering Manager, Machine Learning;Researcher;Principal Researcher;Researcher", "bibtex": "@inproceedings{\nzhang2024a,\ntitle={A Careful Examination of Large Language Model Performance on Grade School Arithmetic},\nauthor={Hugh Zhang and Jeff Da and Dean Lee and Vaughn Robinson and Catherine Wu and William Song and Tiffany Zhao and Pranav Vishnu Raja and Charlotte Zhuang and Dylan Z Slack and Qin Lyu and Sean M. Hendryx and Russell Kaplan and Michele Lunati and Summer Yue},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=RJZRhMzZzH}\n}", "github": "", "reviewers": "7fg6;KwgF;adMu", "pdf_size": 3439868, "rating": "7;8;9", "confidence": "4;4;4", "wc_summary_and_contributions": "73;90;69", "wc_strengths": "56;91;14", "wc_improvement": "127;65;15", "wc_limitations": "1;11;1", "wc_correctness": "1;6;7", "wc_clarity": "1;13;9", "wc_relation_to_prior_work": "1;1;19", "wc_documentation": "1;26;14", "wc_additional_feedback": "1;1;1", "wc_review": "262;304;149", "wc_reply_reviewers": "0;17;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;1;0", "reply_authors": "1;1;1", "rating_avg": [ 8.0, 0.816496580927726 ], "confidence_avg": [ 4.0, 0.0 ], "wc_summary_and_contributions_avg": [ 77.33333333333333, 9.104333522498443 ], "wc_strengths_avg": [ 53.666666666666664, 31.47838764754143 ], "wc_improvement_avg": [ 69.0, 45.81120678029194 ], "wc_limitations_avg": [ 4.333333333333333, 4.714045207910317 ], "wc_correctness_avg": [ 4.666666666666667, 2.6246692913372702 ], "wc_clarity_avg": [ 7.666666666666667, 4.988876515698588 ], "wc_relation_to_prior_work_avg": [ 7.0, 8.48528137423857 ], "wc_documentation_avg": [ 13.666666666666666, 10.208928554075703 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 238.33333333333334, 65.45397025560956 ], "wc_reply_reviewers_avg": [ 5.666666666666667, 8.013876853447538 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 15, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 77, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14483083858780094549&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 3, "email": "harvard.edu;cs.washington.edu;;scale.ai;scale.com;scale.com;scale.ai;scale.com;scale.com;uci.edu;scale.ai;scale.com;scale.com;scale.com;scale.ai", "author_num": 15, "aff_unique_index": "0;1;2;2;2;2;2;2;3;2;2;2;2;2", "aff_unique_norm": "Harvard University;University of Washington;Scale AI;University of California, Irvine", "aff_unique_dep": ";Department of Computer Science;;", "aff_unique_url": "https://www.harvard.edu;https://www.washington.edu;https://scale.ai;https://www.uci.edu", "aff_unique_abbr": "Harvard;UW;Scale AI;UCI", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Seattle;Irvine", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Gradients of Functions of Large Matrices", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95179", "id": "RL4FXrGcTw", "proceeding": "", "pdf": "https://openreview.net/pdf?id=RL4FXrGcTw", "openreview": "https://openreview.net/forum?id=RL4FXrGcTw", "poster": "", "project": "", "author_site": "Nicholas Kr\u00e4mer, Pablo Moreno-Mu\u00f1oz, Hrittik Roy, S\u00f8ren Hauberg", "tldr": "", "abstract": "Tuning scientific and probabilistic machine learning models - for example, partial differential equations, Gaussian processes, or Bayesian neural networks - often relies on evaluating functions of matrices whose size grows with the data set or the number of parameters.\nWhile the state-of-the-art for _evaluating_ these quantities is almost always based on Lanczos and Arnoldi iterations, the present work is the first to explain how to _differentiate_ these workhorses of numerical linear algebra efficiently.\nTo get there, we derive previously unknown adjoint systems for Lanczos and Arnoldi iterations, implement them in JAX, and show that the resulting code can compete with Diffrax when it comes to differentiating PDEs, GPyTorch for selecting Gaussian process models and beats standard factorisation methods for calibrating Bayesian neural networks.\nAll this is achieved without any problem-specific code optimisation.\nFind the code at [link redacted] and install the library with *pip install [redacted]*.", "keywords": "Automatic differentiation;numerical methods;linear algebra;implicit differentiation;adjoint methods;differential equations;Bayesian neural networks;Gaussian processes", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "/attachment/8af435846b2d2a10e27d06e02c84a50faae4f156.zip", "author": "Nicholas Kr\u00e4mer;Pablo Moreno-Mu\u00f1oz;Hrittik Roy;S\u00f8ren Hauberg", "authorids": "~Nicholas_Kr\u00e4mer1;~Pablo_Moreno-Mu\u00f1oz1;~Hrittik_Roy2;~S\u00f8ren_Hauberg1", "gender": ";M;M;M", "homepage": "https://pnkraemer.github.io;https://pmorenoz.github.io/;http://www.compute.dtu.dk;http://www2.compute.dtu.dk/~sohau/", "dblp": "259/3083;220/5334;;39/7226", "google_scholar": "BGufKRsAAAAJ;8vL8iawAAAAJ;;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0002-7249-2986;;", "linkedin": ";;;", "or_profile": "~Nicholas_Kr\u00e4mer1;~Pablo_Moreno-Mu\u00f1oz1;~Hrittik_Roy2;~S\u00f8ren_Hauberg1", "aff": "University of Tuebingen;Technical University of Denmark;Technical University of Denmark;Technical University of Denmark", "aff_domain": "uni-tuebingen.de;dtu.dk;dtu.dk;dtu.dk", "position": "PhD student;Postdoc;PhD student;Professor", "bibtex": "@inproceedings{\nkr{\\\"a}mer2024gradients,\ntitle={Gradients of Functions of Large Matrices},\nauthor={Nicholas Kr{\\\"a}mer and Pablo Moreno-Mu{\\~n}oz and Hrittik Roy and S{\\o}ren Hauberg},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=RL4FXrGcTw}\n}", "github": "", "reviewers": "RkAM;FpA9;3YyQ;X6aJ", "pdf_size": 770013, "rating": "6;7;7;8", "confidence": "4;3;4;4", "soundness": "4;3;3;3", "novelty": "3;3;3;3", "presentation": "4;4;3;4", "wc_summary": "106;216;126;64", "wc_strengths": "64;60;138;58", "wc_weaknesses": "191;16;165;42", "wc_questions": "34;43;157;19", "wc_limitations": "5;18;30;19", "wc_review": "400;353;616;202", "wc_reply_reviewers": "182;0;36;36", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 128.0, 55.51576352712804 ], "wc_strengths_avg": [ 80.0, 33.555923471125034 ], "wc_weaknesses_avg": [ 103.5, 75.62572313703849 ], "wc_questions_avg": [ 63.25, 54.8013457863947 ], "wc_limitations_avg": [ 18.0, 8.860022573334675 ], "wc_review_avg": [ 392.75, 148.20488352277735 ], "wc_reply_reviewers_avg": [ 63.5, 69.97678186370105 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:h6QDW2GWRF4J:scholar.google.com/&scioq=Gradients+of+Functions+of+Large+Matrices&hl=en&as_sdt=0,44", "gs_version_total": 8, "email": "uni-tuebingen.de;dtu.dk;dtu.dk;dtu.dk", "author_num": 4, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "University of Tuebingen;Technical University of Denmark", "aff_unique_dep": ";", "aff_unique_url": "https://www.uni-tuebingen.de/;https://www.tek.dk", "aff_unique_abbr": "Uni T\u00fcbingen;DTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "Germany;Denmark" }, { "title": "Coarse-to-Fine Concept Bottleneck Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95178", "id": "RMdnTnffou", "proceeding": "", "pdf": "https://openreview.net/pdf?id=RMdnTnffou", "openreview": "https://openreview.net/forum?id=RMdnTnffou", "poster": "/media/PosterPDFs/NeurIPS%202024/95178.png?t=1731793007.2162364", "project": "", "author_site": "Konstantinos Panousis, Dino Ienco, Diego Marcos", "tldr": "", "abstract": "Deep learning algorithms have recently gained significant attention due to their impressive performance. However, their high complexity and un-interpretable mode of operation hinders their confident deployment in real-world safety-critical tasks. This work targets ante hoc interpretability, and specifically Concept Bottleneck Models (CBMs). Our goal is to design a framework that admits a highly interpretable decision making process with respect to human understandable concepts, on two levels of granularity. To this end, we propose a novel two-level concept discovery formulation leveraging: (i) recent advances in vision-language models, and (ii) an innovative formulation for coarse-to-fine concept selection via data-driven and sparsity inducing Bayesian arguments. Within this framework, concept information does not solely rely on the similarity between the whole image and general unstructured concepts; instead, we introduce the notion of concept hierarchy to uncover and exploit more granular concept information residing in patch-specific regions of the image scene. As we experimentally show, the proposed construction not only outperforms recent CBM approaches, but also yields a principled framework towards interpetability.", "keywords": "Interpretability;Explainability;Concept Bottleneck Models;Sparsity;Multimodal Models;Concepts;Textual Descriptions;Bayesian;Masking", "primary_area": "interpretability_and_explainability", "supplementary_material": "/attachment/41ae3af2a5cf18abccefdddf8f71f11ad456bc1a.zip", "author": "Konstantinos P. Panousis;Dino Ienco;Diego Marcos", "authorids": "~Konstantinos_P._Panousis1;~Dino_Ienco2;~Diego_Marcos1", "gender": ";M;", "homepage": ";https://sites.google.com/site/dinoienco/;", "dblp": "220/4070;91/6423.html;171/0518", "google_scholar": ";https://scholar.google.it/citations?user=C8zfH3kAAAAJ;IUqydU0AAAAJ", "orcid": ";0000-0002-8736-3132;", "linkedin": ";dino-ienco-2511b16/;", "or_profile": "~Konstantinos_P._Panousis1;~Dino_Ienco2;~Diego_Marcos1", "aff": "INRIA;INRAE, National Research Institute in Agriculture and Environment;INRIA", "aff_domain": "inria.fr;inrae.fr;inria.fr", "position": "Postdoc;Researcher;Assistant Professor", "bibtex": "@inproceedings{\npanousis2024coarsetofine,\ntitle={Coarse-to-Fine Concept Bottleneck Models},\nauthor={Konstantinos P. Panousis and Dino Ienco and Diego Marcos},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=RMdnTnffou}\n}", "github": "", "reviewers": "6WPf;JnxP;yu5m;pbhW", "pdf_size": 4458081, "rating": "5;6;6;6", "confidence": "4;4;3;3", "soundness": "3;2;2;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "57;73;81;44", "wc_strengths": "38;59;34;56", "wc_weaknesses": "166;98;134;28", "wc_questions": "217;197;60;17", "wc_limitations": "39;16;6;13", "wc_review": "517;443;315;158", "wc_reply_reviewers": "128;93;24;14", "wc_reply_authors": "1142;147;0;0", "reply_reviewers": "2;2;1;1", "reply_authors": "4;2;1;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 63.75, 14.306903927824496 ], "wc_strengths_avg": [ 46.75, 10.894379284750462 ], "wc_weaknesses_avg": [ 106.5, 51.310330343898585 ], "wc_questions_avg": [ 122.75, 85.90219729436494 ], "wc_limitations_avg": [ 18.5, 12.379418403139947 ], "wc_review_avg": [ 358.25, 136.3403370246678 ], "wc_reply_reviewers_avg": [ 64.75, 47.52565096871373 ], "wc_reply_authors_avg": [ 322.25, 477.07251807246246 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7704881248074668502&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "inria.fr;inrae.fr;inria.fr", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "INRIA;INRAE", "aff_unique_dep": ";National Research Institute in Agriculture and Environment", "aff_unique_url": "https://www.inria.fr;https://www.inrae.fr", "aff_unique_abbr": "INRIA;INRAE", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "France" }, { "title": "On Giant's Shoulders: Effortless Weak to Strong by Dynamic Logits Fusion", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95177", "id": "RMfiqfWAWg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=RMfiqfWAWg", "openreview": "https://openreview.net/forum?id=RMfiqfWAWg", "poster": "", "project": "", "author_site": "Chenghao Fan, Zhenyi Lu, Wei Wei, Jie Tian, Xiaoye Qu, Dangyang Chen, Yu Cheng", "tldr": "", "abstract": "Efficient fine-tuning of large language models for task-specific applications is imperative, yet the vast number of parameters in these models makes their training increasingly challenging.\nDespite numerous proposals for effective methods, a substantial memory overhead remains for gradient computations during updates. \\thm{Can we fine-tune a series of task-specific small models and transfer their knowledge directly to a much larger model without additional training?} \nIn this paper, we explore weak-to-strong specialization using logit arithmetic, facilitating a direct answer to this question.\nExisting weak-to-strong methods often employ a static knowledge transfer ratio and a single small model for transferring complex knowledge, which leads to suboptimal performance. \nTo surmount these limitations,\nwe propose a dynamic logit fusion approach that works with a series of task-specific small models, each specialized in a different task. \nThis method adaptively allocates weights among these models at each decoding step,\nlearning the weights through Kullback-Leibler divergence constrained optimization problems. \nWe conduct extensive experiments across various benchmarks in both single-task and multi-task settings, achieving leading results.\nBy transferring expertise from the 7B model to the 13B model, our method closes the performance gap by 96.4\\% in single-task scenarios and by 86.3\\% in multi-task scenarios compared to full fine-tuning of the 13B model. Notably, we achieve surpassing performance on unseen tasks. Moreover, we further demonstrate that our method can effortlessly integrate in-context learning for single tasks and task arithmetic for multi-task scenarios.", "keywords": "weak to strong;natural language generation;Generative AI;large language model;gradient-free approach", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/0f6074314e9d7020d036a81be99bea795399109d.zip", "author": "Chenghao Fan;Zhenyi Lu;Wei Wei;Jie Tian;Xiaoye Qu;Dangyang Chen;Yu Cheng", "authorids": "~Chenghao_Fan1;~Zhenyi_Lu2;~Wei_Wei14;~Jie_Tian7;~Xiaoye_Qu1;~Dangyang_Chen1;~Yu_Cheng1", "gender": "M;M;M;M;M;M;M", "homepage": ";https://orcid.org/0009-0002-8381-3236;https://www.eric-weiwei.com;;;https://ych133.github.io;", "dblp": "313/1709.html;307/6180;24/4105-2;229/8206;327/3353;96/3060-1.html;", "google_scholar": "https://scholar.google.com/citations?hl=en;;https://scholar.google.com.sg/citations?hl=en;rT3hqdcAAAAJ;;https://scholar.google.com/citations?hl=en;", "orcid": ";;0000-0003-4488-0102;;;;0009-0007-5683-7204", "linkedin": ";;;%E6%99%93%E6%99%94-xiaoye-qu-%E7%9E%BF-8b9a0a133/;;chengyu05/;", "or_profile": "~Chenghao_Fan1;~Zhenyi_Lu2;~Wei_Wei14;~Xiaoye_Qu1;~Dangyang_Chen1;~Yu_Cheng1;~Tian_Jie1", "aff": "Huazhong University of Science and Technology;Huazhong University of Science and Technology;Huazhong University of Science and Technology;Shanghai Artificial Intelligence Laboratory;Pingan Technology;The Chinese University of Hong Kong;Huazhong University of Science and Technology", "aff_domain": "hust.edu.cn;hust.edu.cn;hust.edu.cn;pjlab.org.cn;pingan.com.cn;cuhk.edu.hk;hust.edu.cn", "position": "MS student;MS student;Full Professor;Researcher;CTO;Associate Professor;MS student", "bibtex": "@inproceedings{\nfan2024on,\ntitle={On Giant's Shoulders: Effortless Weak to Strong by Dynamic Logits Fusion},\nauthor={Chenghao Fan and Zhenyi Lu and Wei Wei and Jie Tian and Xiaoye Qu and Dangyang Chen and Yu Cheng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=RMfiqfWAWg}\n}", "github": "", "reviewers": "Ttwe;XMhg;YRcj;frHX", "pdf_size": 3199128, "rating": "4;5;6;7", "confidence": "3;4;4;4", "soundness": "3;2;3;3", "novelty": "2;2;3;3", "presentation": "2;3;2;4", "wc_summary": "84;160;64;41", "wc_strengths": "66;45;105;22", "wc_weaknesses": "80;418;156;45", "wc_questions": "72;25;104;161", "wc_limitations": "7;16;4;1", "wc_review": "309;664;433;270", "wc_reply_reviewers": "75;38;0;10", "wc_reply_authors": "446;83;65;0", "reply_reviewers": "2;1;0;1", "reply_authors": "4;3;2;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 87.25, 44.67311831515682 ], "wc_strengths_avg": [ 59.5, 30.532769281544052 ], "wc_weaknesses_avg": [ 174.75, 146.0605610697152 ], "wc_questions_avg": [ 90.5, 49.45957945636012 ], "wc_limitations_avg": [ 7.0, 5.612486080160912 ], "wc_review_avg": [ 419.0, 153.72215194954825 ], "wc_reply_reviewers_avg": [ 30.75, 29.09789511287715 ], "wc_reply_authors_avg": [ 148.5, 174.51432605949574 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.7745966692414834, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17577619045817613513&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "hust.edu.cn;hust.edu.cn;hust.edu.cn;pjlab.org.cn;pingan.com.cn;cuhk.edu.hk;hust.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;1;2;3;0", "aff_unique_norm": "Huazhong University of Science and Technology;Shanghai Artificial Intelligence Laboratory;PingAn Technology;Chinese University of Hong Kong", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.hust.edu.cn;http://www.shailab.org/;https://www.pingan.com;https://www.cuhk.edu.hk", "aff_unique_abbr": "HUST;Shanghai AI Lab;;CUHK", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Image Understanding Makes for A Good Tokenizer for Image Generation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95176", "id": "RMmgu49lwn", "proceeding": "", "pdf": "https://openreview.net/pdf?id=RMmgu49lwn", "openreview": "https://openreview.net/forum?id=RMmgu49lwn", "poster": "/media/PosterPDFs/NeurIPS%202024/95176.png?t=1731685072.106643", "project": "", "author_site": "Luting Wang, Yang Zhao, Zijian Zhang, Jiashi Feng, Si Liu, Bingyi Kang", "tldr": "", "abstract": "Modern image generation (IG) models have been shown to capture rich semantics valuable for image understanding (IU) tasks. However, the potential of IU models to improve IG performance remains uncharted. We address this issue using a token-based IG framework, which relies on effective tokenizers to project images into token sequences. Currently, **pixel reconstruction** (e.g., VQGAN) dominates the training objective for image tokenizers. In contrast, our approach adopts the **feature reconstruction** objective, where tokenizers are trained by distilling knowledge from pretrained IU encoders. Comprehensive comparisons indicate that tokenizers with strong IU capabilities achieve superior IG performance across a variety of metrics, datasets, tasks, and proposal networks. Notably, VQ-KD CLIP achieves $4.10$ FID on ImageNet-1k (IN-1k). Visualization suggests that the superiority of VQ-KD can be partly attributed to the rich semantics within the VQ-KD codebook. We further introduce a straightforward pipeline to directly transform IU encoders into tokenizers, demonstrating exceptional effectiveness for IG tasks. These discoveries may energize further exploration into image tokenizer research and inspire the community to reassess the relationship between IU and IG. The code is released at https://github.com/magic-research/vector_quantization.", "keywords": "Image Tokenizer;Vector Quantization;Image Generation", "primary_area": "generative_models", "supplementary_material": "", "author": "Luting Wang;Yang Zhao;Zijian Zhang;Jiashi Feng;Si Liu;Bingyi Kang", "authorids": "~Luting_Wang1;~Yang_Zhao14;~Zijian_Zhang3;~Jiashi_Feng1;~Si_Liu5;~Bingyi_Kang1", "gender": "M;M;M;F;;M", "homepage": ";;https://ckczzj.com;https://colalab.net;https://bingykang.github.io/;https://sites.google.com/site/jshfeng/", "dblp": "214/2310-1;50/2082-22;43/6524-2;60/7642;;56/8278", "google_scholar": "https://scholar.google.com/citations?hl=en;;TZ0nnhgAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.sg/citations?user=NmHgX-wAAAAJ;https://scholar.google.com.sg/citations?user=Q8iay0gAAAAJ", "orcid": "0000-0001-8317-226X;;0000-0001-8308-768X;0000-0002-9180-2935;;0000-0001-6843-0064", "linkedin": "luting-wang-060516161/;;;;;", "or_profile": "~Luting_Wang1;~Yang_Zhao14;~Zijian_Zhang3;~Si_Liu5;~Bingyi_Kang1;~Jiashi_Feng2", "aff": "ByteDance Inc.;ByteDance Inc.;Zhejiang University;Beihang University;Bytedance;ByteDance", "aff_domain": "bytedance.com;bytedance.com;zju.edu.cn;buaa.edu.cn;bytedance.com;bytedance.com", "position": "Intern;Researcher;PhD student;Full Professor;Researcher;Research Lead", "bibtex": "@inproceedings{\nwang2024image,\ntitle={Image Understanding Makes for A Good Tokenizer for Image Generation},\nauthor={Luting Wang and Yang Zhao and Zijian Zhang and Jiashi Feng and Si Liu and Bingyi Kang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=RMmgu49lwn}\n}", "github": "", "reviewers": "3Kzg;aFBf;WgNi;cC74", "pdf_size": 1541918, "rating": "5;5;6;6", "confidence": "4;3;3;3", "soundness": "3;3;3;4", "novelty": "3;3;2;3", "presentation": "3;4;2;3", "wc_summary": "68;56;47;77", "wc_strengths": "46;132;57;86", "wc_weaknesses": "251;59;133;93", "wc_questions": "111;3;71;57", "wc_limitations": "16;13;23;1", "wc_review": "492;263;331;314", "wc_reply_reviewers": "0;47;76;13", "wc_reply_authors": "50;61;12;50", "reply_reviewers": "0;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 62.0, 11.423659658795863 ], "wc_strengths_avg": [ 80.25, 33.25939716831921 ], "wc_weaknesses_avg": [ 134.0, 72.44998274671983 ], "wc_questions_avg": [ 60.5, 38.66199684444661 ], "wc_limitations_avg": [ 13.25, 7.949056547792323 ], "wc_review_avg": [ 350.0, 85.71755946129124 ], "wc_reply_reviewers_avg": [ 34.0, 29.706901555025897 ], "wc_reply_authors_avg": [ 43.25, 18.59267328815305 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12353074856207739003&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "bytedance.com;bytedance.com;zju.edu.cn;buaa.edu.cn;bytedance.com;bytedance.com", "author_num": 6, "aff_unique_index": "0;0;1;2;0;0", "aff_unique_norm": "ByteDance;Zhejiang University;Beihang University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.bytedance.com;https://www.zju.edu.cn;http://www.buaa.edu.cn/", "aff_unique_abbr": "ByteDance;ZJU;BUAA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Ada-MSHyper: Adaptive Multi-Scale Hypergraph Transformer for Time Series Forecasting", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95175", "id": "RNbrIQ0se8", "proceeding": "", "pdf": "https://openreview.net/pdf?id=RNbrIQ0se8", "openreview": "https://openreview.net/forum?id=RNbrIQ0se8", "poster": "/media/PosterPDFs/NeurIPS%202024/95175.png?t=1731412527.3179564", "project": "", "author_site": "Zongjiang Shang, Ling Chen, Binqing Wu, Dongliang Cui", "tldr": "", "abstract": "Although transformer-based methods have achieved great success in multi-scale temporal pattern interaction modeling, two key challenges limit their further development: (1) Individual time points contain less semantic information, and leveraging attention to model pair-wise interactions may cause the information utilization bottleneck. (2) Multiple inherent temporal variations (e.g., rising, falling, and fluctuating) entangled in temporal patterns. To this end, we propose Adaptive Multi-Scale Hypergraph Transformer (Ada-MSHyper) for time series forecasting. Specifically, an adaptive hypergraph learning module is designed to provide foundations for modeling group-wise interactions, then a multi-scale interaction module is introduced to promote more comprehensive pattern interactions at different scales. In addition, a node and hyperedge constraint mechanism is introduced to cluster nodes with similar semantic information and differentiate the temporal variations within each scales. Extensive experiments on 11 real-world datasets demonstrate that Ada-MSHyper achieves state-of-the-art performance, reducing prediction errors by an average of 4.56%, 10.38%, and 4.97% in MSE for long-range, short-range, and ultra-long-range time series forecasting, respectively. Code is available at https://github.com/shangzongjiang/Ada-MSHyper.", "keywords": "Time series forecasting;transformer;multi-scale modeling;hypergraph neural network;hypergraph learning", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Zongjiang Shang;Ling Chen;Binqing Wu;Dongliang Cui", "authorids": "~Zongjiang_Shang1;~Ling_Chen4;~Binqing_Wu1;~Dongliang_Cui1", "gender": "M;M;F;M", "homepage": ";https://person.zju.edu.cn/en/lc;;", "dblp": "247/4128;17/1237-1.html;300/8286.html;", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;Vxi9eakAAAAJ;;", "orcid": ";0000-0003-1934-5992;0000-0001-8276-0801;0000-0002-8306-0513", "linkedin": ";;;", "or_profile": "~Zongjiang_Shang1;~Ling_Chen4;~Binqing_Wu1;~Dongliang_Cui1", "aff": "Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn", "position": "PhD student;Full Professor;PhD student;PhD student", "bibtex": "@inproceedings{\nshang2024adamshyper,\ntitle={Ada-{MSH}yper: Adaptive Multi-Scale Hypergraph Transformer for Time Series Forecasting},\nauthor={Zongjiang Shang and Ling Chen and Binqing Wu and Dongliang Cui},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=RNbrIQ0se8}\n}", "github": "", "reviewers": "HJMp;VVbq;HQ9E;Y3pA", "pdf_size": 1061863, "rating": "4;7;7;7", "confidence": "4;3;4;4", "soundness": "3;4;3;3", "novelty": "3;3;3;3", "presentation": "3;4;3;4", "wc_summary": "46;54;43;78", "wc_strengths": "55;54;30;72", "wc_weaknesses": "37;88;65;104", "wc_questions": "155;55;80;200", "wc_limitations": "40;13;2;6", "wc_review": "333;264;220;460", "wc_reply_reviewers": "0;16;13;0", "wc_reply_authors": "0;37;32;0", "reply_reviewers": "0;1;1;0", "reply_authors": "1;2;2;1", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 55.25, 13.736356867816154 ], "wc_strengths_avg": [ 52.75, 14.956186011146023 ], "wc_weaknesses_avg": [ 73.5, 25.22399651125888 ], "wc_questions_avg": [ 122.5, 57.93315113128234 ], "wc_limitations_avg": [ 15.25, 14.821858857781638 ], "wc_review_avg": [ 319.25, 90.69557596707791 ], "wc_reply_reviewers_avg": [ 7.25, 7.327175444876422 ], "wc_reply_authors_avg": [ 17.25, 17.340343133859836 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14253648588086674879&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Zhejiang University", "aff_unique_dep": "", "aff_unique_url": "https://www.zju.edu.cn", "aff_unique_abbr": "ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "RNeb41ybNL", "title": "On the Mode-Seeking Properties of Langevin Dynamics", "track": "main", "status": "Reject", "tldr": "", "abstract": "The Langevin Dynamics framework, which aims to generate samples from the score function of a probability distribution, is widely used for analyzing and interpreting score-based generative modeling. While the convergence behavior of Langevin Dynamics under unimodal distributions has been extensively studied in the literature, in practice the data distribution could consist of multiple distinct modes. In this work, we investigate Langevin Dynamics in producing samples from multimodal distributions and theoretically study its mode-seeking properties. We prove that under a variety of sub-Gaussian mixtures, Langevin Dynamics is unlikely to find all mixture components within a sub-exponential number of steps in the data dimension. To reduce the mode-seeking tendencies of Langevin Dynamics, we propose \\emph{Chained Langevin Dynamics}, which divides the data vector into patches of constant size and generates every patch sequentially conditioned on the previous patches. We perform a theoretical analysis of Chained Langevin Dynamics by reducing it to sampling from a constant-dimensional distribution. We present the results of several numerical experiments on synthetic and real image datasets, supporting our theoretical results on the iteration complexities of sample generation from mixture distributions using the chained and vanilla Langevin Dynamics.", "keywords": "Langevin dynamics;convergence analysis;mixture distribution;mode-seeking", "primary_area": "learning_theory", "supplementary_material": "/attachment/6d1fe1aa5654f4ff1e4e1d4ccc09a29f5eb4edff.zip", "author": "Xiwei Cheng;Kexin Fu;Farzan Farnia", "authorids": "~Xiwei_Cheng2;~Kexin_Fu2;~Farzan_Farnia1", "gender": "M;F;M", "homepage": "https://xiwei-cheng.github.io/;;https://www.cse.cuhk.edu.hk/~farnia/", "dblp": ";;132/7757", "google_scholar": "IDbEZeoAAAAJ;;GYPCqcYAAAAJ", "orcid": ";;0000-0002-6049-9232", "linkedin": ";kexin-fu-9a5179247/;farzan-farnia-00798335", "or_profile": "~Xiwei_Cheng2;~Kexin_Fu2;~Farzan_Farnia1", "aff": "Department of Computer Science and Engineering, The Chinese University of Hong Kong;The Chinese University of Hong Kong;The Chinese University of Hong Kong", "aff_domain": "cse.cuhk.edu.hk;cuhk.edu.hk;cuhk.edu.hk", "position": "Master of Philosophy student;Undergrad student;Assistant Professor", "bibtex": "@misc{\nanonymous2024on,\ntitle={On the Mode-Seeking Properties of Langevin Dynamics},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=RNeb41ybNL}\n}", "github": "", "project": "", "reviewers": "3VKE;Ave7;nEdB;7G8Z", "site": "https://openreview.net/forum?id=RNeb41ybNL", "pdf_size": 11946262, "rating": "5;6;7;8", "confidence": "3;3;3;4", "soundness": "3;4;3;4", "novelty": "3;3;3;4", "presentation": "3;3;3;3", "wc_summary": "56;66;212;56", "wc_strengths": "38;40;31;29", "wc_weaknesses": "175;29;2;22", "wc_questions": "239;20;332;212", "wc_limitations": "9;28;1;9", "wc_review": "517;183;578;328", "wc_reply_reviewers": "59;97;674;0", "wc_reply_authors": "74;267;651;0", "reply_reviewers": "1;1;2;0", "reply_authors": "2;2;3;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 97.5, 66.23254487032791 ], "wc_strengths_avg": [ 34.5, 4.6097722286464435 ], "wc_weaknesses_avg": [ 57.0, 68.84402661088325 ], "wc_questions_avg": [ 200.75, 113.45345962111513 ], "wc_limitations_avg": [ 11.75, 9.934158243152764 ], "wc_review_avg": [ 401.5, 156.2345992410132 ], "wc_reply_reviewers_avg": [ 207.5, 271.542353970794 ], "wc_reply_authors_avg": [ 248.0, 252.26474188835823 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.7745966692414834, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:VcLMRcw0AFkJ:scholar.google.com/&scioq=On+the+Mode-Seeking+Properties+of+Langevin+Dynamics&hl=en&as_sdt=0,14", "gs_version_total": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Chinese University of Hong Kong", "aff_unique_dep": "Department of Computer Science and Engineering", "aff_unique_url": "https://www.cuhk.edu.hk", "aff_unique_abbr": "CUHK", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Lisa: Lazy Safety Alignment for Large Language Models against Harmful Fine-tuning Attack", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95174", "id": "RPChapuXlC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=RPChapuXlC", "openreview": "https://openreview.net/forum?id=RPChapuXlC", "poster": "/media/PosterPDFs/NeurIPS%202024/95174.png?t=1733184390.0531397", "project": "", "author_site": "Tiansheng Huang, Sihao Hu, Fatih Ilhan, Selim Tekin, Ling Liu", "tldr": "", "abstract": "Recent studies show that Large Language Models (LLMs) with safety alignment can be jail-broken by fine-tuning on a dataset mixed with harmful data. For the first time in the literature, we show that the jail-break effect can be mitigated by separating two states in the fine-tuning stage to respectively optimize over the alignment and user datasets. Unfortunately, our subsequent study shows that this simple Bi-State Optimization (BSO) solution experiences convergence instability when steps invested in its alignment state is too small, leading to downgraded alignment performance. By statistical analysis, we show that the \\textit{excess drift} towards the switching iterates of the two states could be a probable reason for the instability. To remedy this issue, we propose \\textbf{L}azy(\\textbf{i}) \\textbf{s}afety \\textbf{a}lignment (\\textbf{Lisa}), which introduces a proximal term to constraint the drift of each state. Theoretically, the benefit of the proximal term is supported by the convergence analysis, wherein we show that a sufficient large proximal factor is necessary to guarantee Lisa's convergence. Empirically, our results on four downstream fine-tuning tasks show that Lisa with a proximal term can significantly increase alignment performance while maintaining the LLM's accuracy on the user tasks. Code is available at https://github.com/git-disl/Lisa.", "keywords": "Large language model;safety alignment;harmful finetuning attack", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Tiansheng Huang;Sihao Hu;Fatih Ilhan;Selim Furkan Tekin;Ling Liu", "authorids": "~Tiansheng_Huang1;~Sihao_Hu1;~Fatih_Ilhan1;~Selim_Furkan_Tekin1;~Ling_Liu3", "gender": "M;M;M;M;", "homepage": "https://huangtiansheng.github.io/;https://bayi-hu.github.io/;https://fatih-ilhan.github.io/;https://sftekin.com/;", "dblp": "249/2114;266/4995;260/0254;283/8174;", "google_scholar": "zz6Oq8wAAAAJ;WcZbKF4AAAAJ;https://scholar.google.com.tr/citations?hl=en;a2O4Zx0AAAAJ;", "orcid": "0000-0002-4557-1865;0000-0003-3297-6991;0000-0002-0173-7544;0000-0002-8662-3609;", "linkedin": ";;;;", "or_profile": "~Tiansheng_Huang1;~Sihao_Hu1;~Fatih_Ilhan1;~Selim_Furkan_Tekin1;~Ling_Liu3", "aff": "Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology;College of Computing, Georgia Institute of Technology;", "aff_domain": "gatech.edu;gatech.edu;gatech.edu;cc.gatech.edu;", "position": "PhD student;PhD student;PhD student;PhD student;", "bibtex": "@inproceedings{\nhuang2024lisa,\ntitle={Lisa: Lazy Safety Alignment for Large Language Models against Harmful Fine-tuning Attack},\nauthor={Tiansheng Huang and Sihao Hu and Fatih Ilhan and Selim Furkan Tekin and Ling Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=RPChapuXlC}\n}", "github": "", "reviewers": "1udp;MRGF;hVc3;ytGJ", "pdf_size": 2082336, "rating": "4;7;7;8", "confidence": "5;4;4;4", "soundness": "3;3;3;3", "novelty": "2;3;1;3", "presentation": "2;4;3;3", "wc_summary": "77;54;94;106", "wc_strengths": "28;74;14;73", "wc_weaknesses": "67;125;92;509", "wc_questions": "63;44;74;652", "wc_limitations": "40;8;4;81", "wc_review": "275;305;278;1421", "wc_reply_reviewers": "112;88;231;88", "wc_reply_authors": "1277;483;1042;391", "reply_reviewers": "2;1;3;1", "reply_authors": "7;4;6;3", "rating_avg": [ 6.5, 1.5 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 82.75, 19.536824204563032 ], "wc_strengths_avg": [ 47.25, 26.714930282521795 ], "wc_weaknesses_avg": [ 198.25, 180.58706349016256 ], "wc_questions_avg": [ 208.25, 256.42384347014223 ], "wc_limitations_avg": [ 33.25, 30.898017735770686 ], "wc_review_avg": [ 569.75, 491.6082663056023 ], "wc_reply_reviewers_avg": [ 129.75, 59.27214775929754 ], "wc_reply_authors_avg": [ 798.25, 372.1057477384621 ], "reply_reviewers_avg": [ 1.75, 0.82915619758885 ], "reply_authors_avg": [ 5.0, 1.5811388300841898 ], "replies_avg": [ 34, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.9622504486493763, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16277549822665014527&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "gatech.edu;gatech.edu;gatech.edu;cc.gatech.edu;", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Georgia Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.gatech.edu", "aff_unique_abbr": "Georgia Tech", "aff_campus_unique_index": "1", "aff_campus_unique": ";Atlanta", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "VideoTetris: Towards Compositional Text-to-Video Generation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95173", "id": "RPM7STrnVz", "proceeding": "", "pdf": "https://openreview.net/pdf?id=RPM7STrnVz", "openreview": "https://openreview.net/forum?id=RPM7STrnVz", "poster": "/media/PosterPDFs/NeurIPS%202024/95173.png?t=1733059355.1363056", "project": "", "author_site": "Ye Tian, Ling Yang, Haotian Yang, Yuan Gao, Yufan Deng, Xintao Wang, Zhaochen Yu, Xin Tao, Pengfei Wan, Di ZHANG, Bin CUI", "tldr": "", "abstract": "Diffusion models have demonstrated great success in text-to-video (T2V) generation. However, existing methods may face challenges when handling complex (long) video generation scenarios that involve multiple objects or dynamic changes in object numbers. To address these limitations, we propose VideoTetris, a novel framework that enables compositional T2V generation. Specifically, we propose spatio-temporal compositional diffusion to precisely follow complex textual semantics by manipulating and composing the attention maps of denoising networks spatially and temporally. Moreover, we propose a new dynamic-aware data processing pipeline and a consistency regularization method to enhance the consistency of auto-regressive video generation. Extensive experiments demonstrate that our VideoTetris achieves impressive qualitative and quantitative results in compositional T2V generation. Code is available at: https://github.com/YangLing0818/VideoTetris", "keywords": "Text-to-Video Generation;Video Diffusion Models", "primary_area": "generative_models", "supplementary_material": "/attachment/f72c15c52e21ddf70f17cb82f2289107037072b9.zip", "author": "Ye Tian;Ling Yang;Haotian Yang;Yuan Gao;Yufan Deng;Xintao Wang;Zhaochen Yu;Xin Tao;Pengfei Wan;Di ZHANG;Bin CUI", "authorids": "~Ye_Tian15;~Ling_Yang1;~Haotian_Yang1;~Yuan_Gao32;~Yufan_Deng2;~Xintao_Wang1;~Zhaochen_Yu2;~Xin_Tao3;~Pengfei_Wan1;~Di_ZHANG3;~Bin_CUI2", "gender": "M;M;M;M;M;;M;M;M;M;M", "homepage": "https://tyfeld.github.io/;https://yangling0818.github.io/;https://yanght321.github.io/;;;;https://zhaochenyu0201.github.io;http://www.xtao.website;;;https://cuibinpku.github.io/index.html", "dblp": ";01/24-6.html;;;;;;98/7443-1;;;55/5031.html", "google_scholar": "vUY_PIUAAAAJ;https://scholar.google.com.hk/citations?user=sIKujqAAAAAJ;LH71RGkAAAAJ;;;;9RNgZOIAAAAJ;sQ30WyUAAAAJ;P6MraaYAAAAJ;;IJAU8KoAAAAJ", "orcid": ";0000-0003-1905-8053;0009-0008-7334-6113;0009-0009-1198-9954;0009-0003-2335-3191;;;0000-0001-9126-4746;0000-0001-7225-565X;0009-0006-5475-2728;0000-0003-1681-4677", "linkedin": ";;;;;;;;;;", "or_profile": "~Ye_Tian15;~Ling_Yang1;~Haotian_Yang1;~Yuan_Gao32;~Yufan_Deng2;~Xintao_Wang1;~Zhaochen_Yu2;~Xin_Tao3;~Pengfei_Wan1;~Di_ZHANG3;~Bin_CUI2", "aff": "Peking University;Peking University;Kuaishou Technology;Kuaishou- \u5feb\u624b\u79d1\u6280;Peking University;;Peking University;Kuaishou;Kuaishou Technology;Kuaishou Technology;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;kuaishou.com;kuaishou.com;stu.pku.edu.cn;;pku.edu.cn;kuaishou.com;kuaishou.com;kuaishou.com;pku.edu.cn", "position": "MS student;PhD student;Researcher;Researcher;PhD student;;Intern;Researcher;Director;VP;Full Professor", "bibtex": "@inproceedings{\ntian2024videotetris,\ntitle={VideoTetris: Towards Compositional Text-to-Video Generation},\nauthor={Ye Tian and Ling Yang and Haotian Yang and Yuan Gao and Yufan Deng and Xintao Wang and Zhaochen Yu and Xin Tao and Pengfei Wan and Di ZHANG and Bin CUI},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=RPM7STrnVz}\n}", "github": "", "reviewers": "Qhp4;9Z5k;uyep;pWwG", "pdf_size": 9041357, "rating": "5;6;6;6", "confidence": "5;4;4;4", "soundness": "3;3;3;3", "novelty": "3;2;2;2", "presentation": "2;3;3;3", "wc_summary": "70;115;60;67", "wc_strengths": "93;52;49;103", "wc_weaknesses": "168;150;76;316", "wc_questions": "41;6;7;172", "wc_limitations": "71;11;9;6", "wc_review": "443;334;201;664", "wc_reply_reviewers": "70;22;18;80", "wc_reply_authors": "172;144;136;51", "reply_reviewers": "1;1;1;1", "reply_authors": "3;3;3;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 78.0, 21.66794868002045 ], "wc_strengths_avg": [ 74.25, 24.0351305384431 ], "wc_weaknesses_avg": [ 177.5, 87.0789871323731 ], "wc_questions_avg": [ 56.5, 68.15607089614248 ], "wc_limitations_avg": [ 24.25, 27.049722734253674 ], "wc_review_avg": [ 410.5, 169.60321341295395 ], "wc_reply_reviewers_avg": [ 47.5, 27.76238462380348 ], "wc_reply_authors_avg": [ 125.75, 45.17950309598369 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8679615223789801573&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "pku.edu.cn;pku.edu.cn;kuaishou.com;kuaishou.com;stu.pku.edu.cn;;pku.edu.cn;kuaishou.com;kuaishou.com;kuaishou.com;pku.edu.cn", "author_num": 11, "aff_unique_index": "0;0;1;1;0;0;1;1;1;0", "aff_unique_norm": "Peking University;Kuaishou Technology", "aff_unique_dep": ";", "aff_unique_url": "http://www.pku.edu.cn;https://www.kuaishou.com", "aff_unique_abbr": "Peking U;Kuaishou", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Non-Asymptotic Uncertainty Quantification in High-Dimensional Learning", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95172", "id": "RQCmMSSzvI", "proceeding": "", "pdf": "https://openreview.net/pdf?id=RQCmMSSzvI", "openreview": "https://openreview.net/forum?id=RQCmMSSzvI", "poster": "/media/PosterPDFs/NeurIPS%202024/95172.png?t=1731755269.1793509", "project": "", "author_site": "Frederik Hoppe, Claudio Mayrink Verdun, Hannah Laus, Felix Krahmer, Holger Rauhut", "tldr": "", "abstract": "Uncertainty quantification (UQ) is a crucial but challenging task in many high-dimensional learning problems to increase the confidence of a given predictor. We develop a new data-driven approach for UQ in regression that applies both to classical optimization approaches such as the LASSO as well as to neural networks. One of the most notable UQ techniques is the debiased LASSO, which modifies the LASSO to allow for the construction of asymptotic confidence intervals by decomposing the estimation error into a Gaussian and an asymptotically vanishing bias component. However, in real-world problems with finite-dimensional data, the bias term is often too significant to disregard, resulting in overly narrow confidence intervals. Our work rigorously addresses this issue and derives a data-driven adjustment that corrects the confidence intervals for a large class of predictors by estimating the means and variances of the bias terms from training data, exploiting high-dimensional concentration phenomena. This gives rise to non-asymptotic confidence intervals, which can help avoid overestimating certainty in critical applications such as MRI diagnosis. Importantly, our analysis extends beyond sparse regression to data-driven predictors like neural networks, enhancing the reliability of model-based deep learning. Our findings bridge the gap between established theory and the practical applicability of such methods.", "keywords": "high-dimensional regression;uncertainty quantification;model-based deep learning;debiased estimator;inverse problems", "primary_area": "probabilistic_methods", "supplementary_material": "/attachment/fc499512f04f0bda71d5f6bc0946ae7e932e3ad7.zip", "author": "Frederik Hoppe;Claudio Mayrink Verdun;Hannah Laus;Felix Krahmer;Holger Rauhut", "authorids": "~Frederik_Hoppe1;~Claudio_Mayrink_Verdun1;~Hannah_Laus1;~Felix_Krahmer1;~Holger_Rauhut1", "gender": "M;M;F;M;M", "homepage": ";;https://www.math.cit.tum.de/math/personen/wissenschaftliches-personal/hannah-laus/;https://www.math.cit.tum.de/math/personen/professuren/krahmer-felix/;https://www.mathc.rwth-aachen.de/~rauhut/home/", "dblp": "337/0472;;357/4456;17/7802;52/5600", "google_scholar": "https://scholar.google.de/citations?user=ZPc7r_4AAAAJ;lsOne4AAAAAJ;;https://scholar.google.de/citations?user=A0IX_6AAAAAJ;https://scholar.google.de/citations?user=KA2BM_UAAAAJ", "orcid": ";;;;0000-0003-4750-5092", "linkedin": ";;;;", "or_profile": "~Frederik_Hoppe1;~Claudio_Mayrink_Verdun1;~Hannah_Laus1;~Felix_Krahmer1;~Holger_Rauhut1", "aff": "Rheinisch Westf\u00e4lische Technische Hochschule Aachen;Harvard University;Technische Universit\u00e4t M\u00fcnchen;Technische Universit\u00e4t M\u00fcnchen;Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen", "aff_domain": "rwth-aachen.de;harvard.edu;tum.de;tum.de;lmu.de", "position": "PhD student;Postdoc;PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nhoppe2024nonasymptotic,\ntitle={Non-Asymptotic Uncertainty Quantification in High-Dimensional Learning},\nauthor={Frederik Hoppe and Claudio Mayrink Verdun and Hannah Laus and Felix Krahmer and Holger Rauhut},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=RQCmMSSzvI}\n}", "github": "", "reviewers": "HzkU;9jkp;BKX7;WkBX", "pdf_size": 1444503, "rating": "3;6;7;8", "confidence": "1;3;3;4", "soundness": "2;3;3;4", "novelty": "2;3;3;4", "presentation": "1;3;2;3", "wc_summary": "97;9;76;67", "wc_strengths": "16;22;77;13", "wc_weaknesses": "149;1;123;74", "wc_questions": "2;16;109;7", "wc_limitations": "27;1;50;13", "wc_review": "291;49;435;174", "wc_reply_reviewers": "0;0;29;48", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 1.8708286933869707 ], "confidence_avg": [ 2.75, 1.0897247358851685 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 62.25, 32.61422235773835 ], "wc_strengths_avg": [ 32.0, 26.182054923172092 ], "wc_weaknesses_avg": [ 86.75, 56.35767472137224 ], "wc_questions_avg": [ 33.5, 43.877670858877636 ], "wc_limitations_avg": [ 22.75, 18.226011631731172 ], "wc_review_avg": [ 237.25, 142.68212046363763 ], "wc_reply_reviewers_avg": [ 19.25, 20.38841582860228 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.9810229431759453, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:LDcGeX-kvQsJ:scholar.google.com/&scioq=Non-Asymptotic+Uncertainty+Quantification+in+High-Dimensional+Learning&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "rwth-aachen.de;harvard.edu;tum.de;tum.de;lmu.de", "author_num": 5, "aff_unique_index": "0;1;2;2;3", "aff_unique_norm": "RWTH Aachen University;Harvard University;Technische Universit\u00e4t M\u00fcnchen;Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.rwth-aachen.de;https://www.harvard.edu;https://www.tum.de;https://www.lmu.de", "aff_unique_abbr": "RWTH;Harvard;TUM;LMU", "aff_campus_unique_index": "0", "aff_campus_unique": "Aachen;", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "Germany;United States" }, { "title": "Collaborative Refining for Learning from Inaccurate Labels", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95171", "id": "RQQGbBqvbL", "proceeding": "", "pdf": "https://openreview.net/pdf?id=RQQGbBqvbL", "openreview": "https://openreview.net/forum?id=RQQGbBqvbL", "poster": "/media/PosterPDFs/NeurIPS%202024/95171.png?t=1731120504.1543565", "project": "", "author_site": "BIN HAN, Yi-Xuan Sun, Ya-Lin Zhang, Libang Zhang, Haoran Hu, Longfei Li, Jun Zhou, Guo Ye, HUIMEI HE", "tldr": "", "abstract": "This paper considers the problem of learning from multiple sets of inaccurate labels, which can be easily obtained from low-cost annotators, such as rule-based annotators. Previous works typically concentrate on aggregating information from all the annotators, overlooking the significance of data refinement. This paper presents a collaborative refining approach for learning from inaccurate labels. To refine the data, we introduce the annotator agreement as an instrument, which refers to whether multiple annotators agree or disagree on the labels for a given sample. For samples where some annotators disagree, a comparative strategy is proposed to filter noise. Through theoretical analysis, the connections among multiple sets of labels, the respective models trained on them, and the true labels are uncovered to identify relatively reliable labels. For samples where all annotators agree, an aggregating strategy is designed to mitigate potential noise. Guided by theoretical bounds on loss values, a sample selection criterion is introduced and modified to be more robust against potentially problematic values. Through these two methods, all the samples are refined during training, and these refined samples are used to train a lightweight model simultaneously. Extensive experiments are conducted on benchmark and real-world datasets to demonstrate the superiority of our methods.", "keywords": "Inaccurate Labels;Annotator Agreement;Data Refinement;Sample Selection", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "BIN HAN;Yi-Xuan Sun;Ya-Lin Zhang;Libang Zhang;Haoran Hu;Longfei Li;JUN ZHOU;Guo Ye;HUIMEI HE", "authorids": "~BIN_HAN2;~Yi-Xuan_Sun1;~Ya-Lin_Zhang1;~Libang_Zhang1;~Haoran_Hu2;~Longfei_Li1;~JUN_ZHOU6;~Guo_Ye2;~HUIMEI_HE1", "gender": "M;;;M;M;M;M;M;F", "homepage": ";;;;;;https://scholar.google.com/citations?user=mCVvloEAAAAJ&hl=en;;", "dblp": ";;;;;139/8073;99/3847-11;;", "google_scholar": "IzmdkvMCESkC;;;https://scholar.google.com.hk/citations?hl=zh-CN;https://scholar.google.com/citations?hl=zh-CN;;mCVvloEAAAAJ;;", "orcid": ";;;;;;0000-0001-6033-6102;0000-0001-9361-9977;0000-0002-9642-7783", "linkedin": ";;;;;;;;", "or_profile": "~BIN_HAN2;~Yi-Xuan_Sun1;~Ya-Lin_Zhang1;~Libang_Zhang1;~Haoran_Hu2;~Longfei_Li1;~JUN_ZHOU6;~Guo_Ye2;~HUIMEI_HE1", "aff": "Ant Group;;;Harbin Institute of Technology;Ant Group;ant group;Ant Group;University of Electronic Science and Technology of China;Alibaba Group", "aff_domain": "antgroup.com;;;hit.edu;antgroup.com;antgroup.com;antgroup.com;uestc.edu.cn;alibaba-inc.com", "position": "Researcher;;;MS student;Researcher;Researcher;Researcher;MS student;Researcher", "bibtex": "@inproceedings{\nhan2024collaborative,\ntitle={Collaborative Refining for Learning from Inaccurate Labels},\nauthor={BIN HAN and Yi-Xuan Sun and Ya-Lin Zhang and Libang Zhang and Haoran Hu and Longfei Li and JUN ZHOU and Guo Ye and HUIMEI HE},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=RQQGbBqvbL}\n}", "github": "", "reviewers": "qood;qnq5;9hER;eKLA;hvQ6", "pdf_size": 398159, "rating": "5;5;5;6;7", "confidence": "4;3;4;3;3", "soundness": "1;3;3;3;3", "novelty": "1;2;2;3;3", "presentation": "1;3;3;2;3", "wc_summary": "103;73;83;120;87", "wc_strengths": "21;99;40;20;46", "wc_weaknesses": "418;45;182;143;73", "wc_questions": "58;35;30;34;101", "wc_limitations": "2;1;43;1;7", "wc_review": "602;253;378;318;314", "wc_reply_reviewers": "33;25;234;33;60", "wc_reply_authors": "14;28;428;0;69", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;1;2", "rating_avg": [ 5.6, 0.8 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 2.6, 0.8000000000000002 ], "novelty_avg": [ 2.2, 0.7483314773547882 ], "presentation_avg": [ 2.4, 0.8 ], "wc_summary_avg": [ 93.2, 16.521501142450706 ], "wc_strengths_avg": [ 45.2, 28.78471816780564 ], "wc_weaknesses_avg": [ 172.2, 132.20196670246628 ], "wc_questions_avg": [ 51.6, 26.582701141908057 ], "wc_limitations_avg": [ 10.8, 16.252999723128035 ], "wc_review_avg": [ 373.0, 121.13793790551331 ], "wc_reply_reviewers_avg": [ 77.0, 79.39017571463108 ], "wc_reply_authors_avg": [ 107.8, 161.7533925455661 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.6123724356957947, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:NQKvdSI0BXAJ:scholar.google.com/&scioq=Collaborative+Refining+for+Learning+from+Inaccurate+Labels&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "antgroup.com;;;hit.edu;antgroup.com;antgroup.com;antgroup.com;uestc.edu.cn;alibaba-inc.com", "author_num": 9, "aff_unique_index": "0;1;0;0;0;2;3", "aff_unique_norm": "Ant Group;Harbin Institute of Technology;University of Electronic Science and Technology of China;Alibaba Group", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.antgroup.com;http://www.hit.edu.cn/;https://www.uestc.edu.cn;https://www.alibaba.com", "aff_unique_abbr": "Ant Group;HIT;UESTC;Alibaba", "aff_campus_unique_index": "1", "aff_campus_unique": ";Harbin", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "NovoBench: Benchmarking Deep Learning-based \\emph{De Novo} Sequencing Methods in Proteomics", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97686", "id": "RQlbMrA5XL", "proceeding": "", "pdf": "https://openreview.net/pdf?id=RQlbMrA5XL", "openreview": "https://openreview.net/forum?id=RQlbMrA5XL", "poster": "/media/PosterPDFs/NeurIPS%202024/97686.png?t=1731415413.3103836", "project": "", "author_site": "Jingbo Zhou, Shaorong Chen, Jun Xia, Sizhe Liu, Tianze Ling, Wenjie Du, Yue Liu, Jianwei Yin, Stan Z. Li", "tldr": "", "abstract": "Tandem mass spectrometry has played a pivotal role in advancing proteomics, enabling the analysis of protein composition in biological tissues. Many deep learning methods have been developed for \\emph{de novo} peptide sequencing task, i.e., predicting the peptide sequence for the observed mass spectrum. \nHowever, two key challenges seriously hinder the further research of this important task. Firstly, since there is no consensus for the evaluation datasets, the empirical results in different research papers are often not comparable, leading to unfair comparison. Secondly, the current methods are usually limited to amino acid-level or peptide-level precision and recall metrics. In this work, we present the first unified benchmark NovoBench for \\emph{de novo} peptide sequencing, which comprises diverse mass spectrum data, integrated models, and comprehensive evaluation metrics. Recent impressive methods, including DeepNovo, PointNovo, Casanovo, InstaNovo, AdaNovo and $\\pi$-HelixNovo are integrated into our framework. In addition to amino acid-level and peptide-level precision and recall, we also evaluate the models' performance in terms of identifying post-tranlational modifications (PTMs), efficiency and robustness to peptide length, noise peaks and missing fragment ratio, which are important influencing factors while seldom be considered. Leveraging this benchmark, we conduct a large-scale study of current methods, report many insightful findings that open up new possibilities for future development. The benchmark is open-sourced to facilitate future research and application. The code is available at \\url{https://github.com/Westlake-OmicsAI/NovoBench}.", "keywords": "Proteomics;Peptide Sequencing", "primary_area": "", "supplementary_material": "/attachment/24b8d833d01ad7e5b2b1e394e9eb1c4b9759ee01.zip", "author": "Jingbo Zhou;Shaorong Chen;Jun Xia;Sizhe Liu;Tianze Ling;Wenjie Du;Yue Liu;Jianwei Yin;Stan Z. Li", "authorids": "~Jingbo_Zhou2;~Shaorong_Chen1;~Jun_Xia1;~Sizhe_Liu1;~Tianze_Ling1;~Wenjie_Du2;~Yue_Liu10;~Jianwei_Yin1;~Stan_Z._Li2", "gender": "M;;M;M;M;M;M;M;M", "homepage": "https://jingbo02.github.io/;;http://junxia97.github.io/;;https://invokerqwer.github.io/duwenjie.github.io/;https://yueliu1999.github.io/;https://person.zju.edu.cn/0001038;https://en.westlake.edu.cn/academics/School_of_Engineering/About/Our_People/Faculty/201912/t20191206_2497.shtml;", "dblp": ";284/3994;;;;74/1932-8;74/3786;l/StanZLi;", "google_scholar": ";;aPKKpSYAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;5tfpu3MAAAAJ;0s1A5fwAAAAJ;https://scholar.google.com/citations?hl=zh-CN;", "orcid": "0009-0005-4221-2911;0000-0002-7185-8616;;;0000-0002-3517-7354;;0000-0003-4703-7348;;0009-0006-6715-2553", "linkedin": ";;;steven-liu-195745241/;;;;stan-z-li-%E6%9D%8E%E5%AD%90%E9%9D%92-55753224/;", "or_profile": "~Jingbo_Zhou2;~Shaorong_Chen1;~Jun_Xia1;~Sizhe_Liu1;~Wenjie_Du2;~Yue_Liu10;~Jianwei_Yin1;~Stan_Z._Li1;~TianzeLing1", "aff": "Jilin University;Westlake University;Westlake University, China;University of Southern California;University of Science and Technology of China;University of Illinois, Urbana Champaign;Zhejiang University;Westlake University;Tsinghua University", "aff_domain": "jlu.edu.cn;westlake.edu.cn;westlake.edu.cn;usc.edu;ustc.edu.cn;uiuc.edu;zju.edu.cn;westlake.edu.cn;mail.tsinghua.edu.cn", "position": "Undergrad student;PhD student;PhD student;Undergrad student;Assistant Professor;Intern;Full Professor;Chair Professor;PhD student", "bibtex": "@inproceedings{\nzhou2024novobench,\ntitle={NovoBench: Benchmarking Deep Learning-based {\\textbackslash}emph\\{De Novo\\} Sequencing Methods in Proteomics},\nauthor={Jingbo Zhou and Shaorong Chen and Jun Xia and Sizhe Liu and Tianze Ling and Wenjie Du and Yue Liu and Jianwei Yin and Stan Z. Li},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=RQlbMrA5XL}\n}", "github": "", "reviewers": "ABkg;c3Hs;9E2x;BQ8s", "pdf_size": 920287, "rating": "6;6;7;7", "confidence": "5;3;4;3", "wc_summary_and_contributions": "204;19;65;73", "wc_strengths": "78;41;18;44", "wc_improvement": "66;14;38;36", "wc_limitations": "24;39;8;17", "wc_correctness": "1;1;7;2", "wc_clarity": "16;2;7;1", "wc_relation_to_prior_work": "15;10;42;4", "wc_documentation": "22;10;12;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "427;137;198;179", "wc_reply_reviewers": "0;0;69;21", "wc_reply_authors": "204;185;106;112", "reply_reviewers": "0;0;1;1", "reply_authors": "3;3;3;4", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "wc_summary_and_contributions_avg": [ 90.25, 68.83086153753997 ], "wc_strengths_avg": [ 45.25, 21.41699091842736 ], "wc_improvement_avg": [ 38.5, 18.45941494197473 ], "wc_limitations_avg": [ 22.0, 11.335784048754634 ], "wc_correctness_avg": [ 2.75, 2.48746859276655 ], "wc_clarity_avg": [ 6.5, 5.937171043518958 ], "wc_relation_to_prior_work_avg": [ 17.75, 14.53229162933362 ], "wc_documentation_avg": [ 11.25, 7.46240577829965 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 235.25, 112.88572761868527 ], "wc_reply_reviewers_avg": [ 22.5, 28.182441342083905 ], "wc_reply_authors_avg": [ 151.75, 43.3265219005634 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 3.25, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:RwlFrc1x5BEJ:scholar.google.com/&scioq=NovoBench:+Benchmarking+Deep+Learning-based+%5Cemph%7BDe+Novo%7D+Sequencing+Methods+in+Proteomics&hl=en&as_sdt=0,48", "gs_version_total": 2, "email": "jlu.edu.cn;westlake.edu.cn;westlake.edu.cn;usc.edu;ustc.edu.cn;uiuc.edu;zju.edu.cn;westlake.edu.cn;mail.tsinghua.edu.cn", "author_num": 9, "aff_unique_index": "0;1;1;2;3;4;5;1;6", "aff_unique_norm": "Jilin University;Westlake University;University of Southern California;University of Science and Technology of China;University of Illinois Urbana-Champaign;Zhejiang University;Tsinghua University", "aff_unique_dep": ";;;;;;", "aff_unique_url": "http://www.jlu.edu.cn;https://www.westlake.edu.cn;https://www.usc.edu;http://www.ustc.edu.cn;https://illinois.edu;https://www.zju.edu.cn;https://www.tsinghua.edu.cn", "aff_unique_abbr": "JLU;WU;USC;USTC;UIUC;ZJU;THU", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Los Angeles;Urbana-Champaign", "aff_country_unique_index": "0;0;0;1;0;1;0;0;0", "aff_country_unique": "China;United States" }, { "title": "CoSW: Conditional Sample Weighting for Smoke Segmentation with Label Noise", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95170", "id": "RRRyQMn6dv", "proceeding": "", "pdf": "https://openreview.net/pdf?id=RRRyQMn6dv", "openreview": "https://openreview.net/forum?id=RRRyQMn6dv", "poster": "/media/PosterPDFs/NeurIPS%202024/95170.png?t=1730361659.6645706", "project": "", "author_site": "Lujian Yao, Haitao Zhao, Zhongze Wang, Kaijie Zhao, Jingchao Peng", "tldr": "", "abstract": "Smoke segmentation is of great importance in precisely identifying the smoke location, enabling timely fire rescue and gas leak detection. However, due to the visual diversity and blurry edges of the non-grid smoke, noisy labels are almost inevitable in large-scale pixel-level smoke datasets. Noisy labels significantly impact the robustness of the model and may lead to serious accidents. Nevertheless, currently, there are no specific methods for addressing noisy labels in smoke segmentation. Smoke differs from regular objects as its transparency varies, causing inconsistent features in the noisy labels. In this paper, we propose a conditional sample weighting (CoSW). CoSW utilizes a multi-prototype framework, where prototypes serve as prior information to apply different weighting criteria to the different feature clusters. A novel regularized within-prototype entropy (RWE) is introduced to achieve CoSW and stable prototype update. The experiments show that our approach achieves SOTA performance on both real-world and synthetic noisy smoke segmentation datasets.", "keywords": "Smoke Recognition; Smoke Segmentation; Industrial Applications", "primary_area": "machine_vision", "supplementary_material": "", "author": "Lujian Yao;Haitao Zhao;Zhongze Wang;Kaijie Zhao;Jingchao Peng", "authorids": "~Lujian_Yao1;~Haitao_Zhao1;~Zhongze_Wang1;~Kaijie_Zhao1;~Jingchao_Peng1", "gender": "M;M;M;M;", "homepage": ";;https://github.com/b-bad;https://github.com/955374;", "dblp": "347/4200;00/3855-2;;;", "google_scholar": "https://scholar.google.com/citations?hl=en;uU_As9wAAAAJ;AI8ccjUAAAAJ;rnmpSfkAAAAJ;", "orcid": ";;0009-0002-6938-2724;;", "linkedin": ";;;;", "or_profile": "~Lujian_Yao1;~Haitao_Zhao1;~Zhongze_Wang1;~Kaijie_Zhao1;~Jingchao_Peng1", "aff": "East China University of Science and Technology;East China University of Science and Technology;East China University of Science and Technology;East China University of Science and Technology;", "aff_domain": "ecust.edu.cn;ecust.edu.cn;ecust.edu.cn;ecust.edu.cn;", "position": "PhD student;Full Professor;PhD student;PhD student;", "bibtex": "@inproceedings{\nyao2024cosw,\ntitle={Co{SW}: Conditional Sample Weighting for Smoke Segmentation with Label Noise},\nauthor={Lujian Yao and Haitao Zhao and Zhongze Wang and Kaijie Zhao and Jingchao Peng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=RRRyQMn6dv}\n}", "github": "", "reviewers": "KX4g;T8tv;1bVm;bKT9", "pdf_size": 4599634, "rating": "4;4;5;7", "confidence": "3;5;1;3", "soundness": "2;3;2;3", "novelty": "2;3;4;4", "presentation": "3;3;2;3", "wc_summary": "87;46;52;151", "wc_strengths": "119;1;37;202", "wc_weaknesses": "226;96;74;94", "wc_questions": "95;2;58;90", "wc_limitations": "99;2;11;171", "wc_review": "626;147;232;708", "wc_reply_reviewers": "12;17;12;346", "wc_reply_authors": "78;93;7;7", "reply_reviewers": "1;1;1;1", "reply_authors": "3;3;2;2", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.0, 1.4142135623730951 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 84.0, 41.731283229730664 ], "wc_strengths_avg": [ 89.75, 77.64462312356214 ], "wc_weaknesses_avg": [ 122.5, 60.37176492367935 ], "wc_questions_avg": [ 61.25, 37.036299761180246 ], "wc_limitations_avg": [ 70.75, 69.18227735482549 ], "wc_review_avg": [ 428.25, 242.3740652380118 ], "wc_reply_reviewers_avg": [ 96.75, 143.9190310556599 ], "wc_reply_authors_avg": [ 46.25, 39.60665979352462 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.28867513459481287, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:5ByREu_M804J:scholar.google.com/&scioq=CoSW:+Conditional+Sample+Weighting+for+Smoke+Segmentation+with+Label+Noise&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "ecust.edu.cn;ecust.edu.cn;ecust.edu.cn;ecust.edu.cn;", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "East China University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.ecust.edu.cn", "aff_unique_abbr": "ECUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Bridging the Divide: Reconsidering Softmax and Linear Attention", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95169", "id": "RSiGFzQapl", "proceeding": "", "pdf": "https://openreview.net/pdf?id=RSiGFzQapl", "openreview": "https://openreview.net/forum?id=RSiGFzQapl", "poster": "/media/PosterPDFs/NeurIPS%202024/95169.png?t=1731409661.6830997", "project": "", "author_site": "Dongchen Han, Yifan Pu, Zhuofan Xia, Yizeng Han, Xuran Pan, Xiu Li, Jiwen Lu, Shiji Song, Gao Huang", "tldr": "", "abstract": "Widely adopted in modern Vision Transformer designs, Softmax attention can effectively capture long-range visual information; however, it incurs excessive computational cost when dealing with high-resolution inputs. In contrast, linear attention naturally enjoys linear complexity and has great potential to scale up to higher-resolution images. Nonetheless, the unsatisfactory performance of linear attention greatly limits its practical application in various scenarios. In this paper, we take a step forward to close the gap between the linear and Softmax attention with novel theoretical analyses, which demystify the core factors behind the performance deviations. Specifically, we present two key perspectives to understand and alleviate the limitations of linear attention: the injective property and the local modeling ability. Firstly, we prove that linear attention is not injective, which is prone to assign identical attention weights to different query vectors, thus adding to severe semantic confusion since different queries correspond to the same outputs. Secondly, we confirm that effective local modeling is essential for the success of Softmax attention, in which linear attention falls short. The aforementioned two fundamental differences significantly contribute to the disparities between these two attention paradigms, which is demonstrated by our substantial empirical validation in the paper. In addition, more experiment results indicate that linear attention, as long as endowed with these two properties, can outperform Softmax attention across various tasks while maintaining lower computation complexity. Code is available at https://github.com/LeapLabTHU/InLine.", "keywords": "Softmax attention;linear attention", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Dongchen Han;Yifan Pu;Zhuofan Xia;Yizeng Han;Xuran Pan;Xiu Li;Jiwen Lu;Shiji Song;Gao Huang", "authorids": "~Dongchen_Han1;~Yifan_Pu1;~Zhuofan_Xia2;~Yizeng_Han1;~Xuran_Pan1;~Xiu_Li1;~Jiwen_Lu1;~Shiji_Song1;~Gao_Huang1", "gender": "M;M;;M;M;F;M;M;M", "homepage": "https://github.com/tian-qing001;https://yifanpu001.github.io/;;https://yizenghan.top/;https://xuranpan.plus;https://thusigsiclab.github.io/thu.github.io/introduction.html;http://ivg.au.tsinghua.edu.cn/Jiwen_Lu/;;http://www.gaohuang.net", "dblp": ";222/2710;;217/9548;224/3833;13/1206-1;http://dblp.uni-trier.de/pers/hd/l/Lu:Jiwen;72/5351;", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;oM9rnYQAAAAJ;;25mubAsAAAAJ;pIg5Qc4AAAAJ;https://scholar.google.com/citations?hl=zh-CN;TN8uDQoAAAAJ;;-P9LwcgAAAAJ", "orcid": ";0000-0002-0404-1737;;;;0000-0003-0403-1923;0000-0002-6121-5529;;", "linkedin": ";;;;;;;;", "or_profile": "~Dongchen_Han1;~Yifan_Pu1;~Zhuofan_Xia2;~Yizeng_Han1;~Xuran_Pan1;~Xiu_Li1;~Jiwen_Lu1;~Shiji_Song1;~Gao_Huang1", "aff": "Tsinghua University;Tsinghua University;;Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;mail.tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;PhD student;;PhD student;PhD student;Professor;Full Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nhan2024bridging,\ntitle={Bridging the Divide: Reconsidering Softmax and Linear Attention},\nauthor={Dongchen Han and Yifan Pu and Zhuofan Xia and Yizeng Han and Xuran Pan and Xiu Li and Jiwen Lu and Shiji Song and Gao Huang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=RSiGFzQapl}\n}", "github": "", "reviewers": "3cj2;My8Q;TKLm;yvdC", "pdf_size": 696551, "rating": "5;5;5;6", "confidence": "4;3;4;4", "soundness": "3;3;3;3", "novelty": "3;2;2;3", "presentation": "3;3;3;3", "wc_summary": "10;131;89;98", "wc_strengths": "69;103;30;43", "wc_weaknesses": "46;264;42;176", "wc_questions": "46;31;89;64", "wc_limitations": "18;1;8;1", "wc_review": "189;530;258;382", "wc_reply_reviewers": "0;45;0;34", "wc_reply_authors": "43;62;43;29", "reply_reviewers": "0;1;0;1", "reply_authors": "2;3;2;2", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 82.0, 44.41283598240491 ], "wc_strengths_avg": [ 61.25, 27.896012259819503 ], "wc_weaknesses_avg": [ 132.0, 93.34880824091971 ], "wc_questions_avg": [ 57.5, 21.615966321217286 ], "wc_limitations_avg": [ 7.0, 6.96419413859206 ], "wc_review_avg": [ 339.75, 129.79671606015307 ], "wc_reply_reviewers_avg": [ 19.75, 20.129269733400662 ], "wc_reply_authors_avg": [ 44.25, 11.734031702701335 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1705136524545875950&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "tsinghua.edu.cn;tsinghua.edu.cn;;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;mail.tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 9, "aff_unique_index": "0;0;0;0;0;0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Conditional Controllable Image Fusion", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95168", "id": "RSs4o7CSqe", "proceeding": "", "pdf": "https://openreview.net/pdf?id=RSs4o7CSqe", "openreview": "https://openreview.net/forum?id=RSs4o7CSqe", "poster": "/media/PosterPDFs/NeurIPS%202024/95168.png?t=1731661525.6414688", "project": "", "author_site": "Bing Cao, Xingxin Xu, Pengfei Zhu, Qilong Wang, Qinghua Hu", "tldr": "", "abstract": "Image fusion aims to integrate complementary information from multiple input images acquired through various sources to synthesize a new fused image. Existing methods usually employ distinct constraint designs tailored to specific scenes, forming fixed fusion paradigms. However, this data-driven fusion approach is challenging to deploy in varying scenarios, especially in rapidly changing environments. To address this issue, we propose a conditional controllable fusion (CCF) framework for general image fusion tasks without specific training. Due to the dynamic differences of different samples, our CCF employs specific fusion constraints for each individual in practice. Given the powerful generative capabilities of the denoising diffusion model, we first inject the specific constraints into the pre-trained DDPM as adaptive fusion conditions. The appropriate conditions are dynamically selected to ensure the fusion process remains responsive to the specific requirements in each reverse diffusion stage. Thus, CCF enables conditionally calibrating the fused images step by step. Extensive experiments validate our effectiveness in general fusion tasks across diverse scenarios against the competing methods without additional training. The code is publicly available.", "keywords": "Image Fusion; DDPM; Conditional Bank; Controllable Fusion", "primary_area": "machine_vision", "supplementary_material": "", "author": "Bing Cao;Xingxin Xu;Pengfei Zhu;Qilong Wang;Qinghua Hu", "authorids": "~Bing_Cao1;~Xingxin_Xu2;~Pengfei_Zhu1;~Qilong_Wang3;~Qinghua_Hu1", "gender": "M;M;M;;M", "homepage": "https://bcaosudo.github.io;;http://aiskyeye.com/;https://csqlwang.github.io/homepage/;http://cic.tju.edu.cn/faculty/huqinghua/index.html", "dblp": "59/4329;243/3806.html;40/6172-1.html;119/1488.html;", "google_scholar": "6KeTXm4AAAAJ;aBuL35gAAAAJ;https://scholar.google.com/citations?hl=zh-TW;qdPVJN0AAAAJ;TVSNq_wAAAAJ", "orcid": "0000-0002-0316-5404;;;0000-0002-3765-9787;0000-0001-7765-8095", "linkedin": ";;;;", "or_profile": "~Bing_Cao1;~Xingxin_Xu2;~Pengfei_Zhu1;~Qilong_Wang3;~Qinghua_Hu1", "aff": "Tianjin University;Tianjin University;Tianjin University;Tianjin University;Tianjin University", "aff_domain": "tju.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu.cn", "position": "Associate Professor;PhD student;Full Professor;Associate Professor;Professor", "bibtex": "@inproceedings{\ncao2024conditional,\ntitle={Conditional Controllable Image Fusion},\nauthor={Bing Cao and Xingxin Xu and Pengfei Zhu and Qilong Wang and Qinghua Hu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=RSs4o7CSqe}\n}", "github": "", "reviewers": "ghie;PorP;spMU;y2va", "pdf_size": 38223651, "rating": "5;5;6;7", "confidence": "5;5;5;4", "soundness": "3;3;3;3", "novelty": "4;2;2;4", "presentation": "2;2;2;3", "wc_summary": "65;76;46;83", "wc_strengths": "41;58;38;112", "wc_weaknesses": "158;152;205;192", "wc_questions": "166;127;50;6", "wc_limitations": "41;41;4;34", "wc_review": "471;454;343;427", "wc_reply_reviewers": "0;32;33;31", "wc_reply_authors": "0;146;18;32", "reply_reviewers": "0;1;1;1", "reply_authors": "1;3;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 1.0 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 67.5, 13.97318861248212 ], "wc_strengths_avg": [ 62.25, 29.71847068743612 ], "wc_weaknesses_avg": [ 176.75, 22.331312097590683 ], "wc_questions_avg": [ 87.25, 62.79082337412052 ], "wc_limitations_avg": [ 30.0, 15.280706789936126 ], "wc_review_avg": [ 423.75, 49.190319169527655 ], "wc_reply_reviewers_avg": [ 24.0, 13.874436925511608 ], "wc_reply_authors_avg": [ 49.0, 57.14017850864661 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14320921920854940222&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "tju.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Tianjin University", "aff_unique_dep": "", "aff_unique_url": "http://www.tju.edu.cn", "aff_unique_abbr": "TJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "MathPile: A Billion-Token-Scale Pretraining Corpus for Math", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97685", "id": "RSvhU69sbG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=RSvhU69sbG", "openreview": "https://openreview.net/forum?id=RSvhU69sbG", "poster": "/media/PosterPDFs/NeurIPS%202024/97685.png?t=1731515561.126715", "project": "", "author_site": "Zengzhi Wang, Xuefeng Li, Rui Xia, Pengfei Liu", "tldr": "", "abstract": "High-quality, large-scale corpora are the cornerstone of building foundation models. In this work, we introduce MathPile, a diverse and high-quality math-centric corpus comprising about 9.5 billion tokens. Throughout its creation, we adhered to the principle of \u201cless is more\u201d, firmly believing in the supremacy of data quality over quantity, even in the pre-training phase. Our meticulous data collection and processing efforts included a complex suite of preprocessing, prefiltering, language identification, cleaning, filtering, and deduplication, ensuring the high quality of our corpus. Furthermore, we performed data contamination detection on downstream benchmark test sets to eliminate duplicates and conducted continual pre-training experiments, booting the performance on common mathematical reasoning benchmarks. We aim for our MathPile to boost language models\u2019 mathematical reasoning abilities and open-source its different versions and processing scripts to advance the field.", "keywords": "mathematical reasoning;pretraining corpus for math;large language models;math", "primary_area": "", "supplementary_material": "/attachment/ab9ba9785d6cd1fe2bf16cdbb3a5abfa76babcce.pdf", "author": "Zengzhi Wang;Xuefeng Li;Rui Xia;Pengfei Liu", "authorids": "~Zengzhi_Wang1;~Xuefeng_Li6;~Rui_Xia1;~Pengfei_Liu1", "gender": "M;M;M;M", "homepage": "https://sinclaircoder.github.io/;https://github.com/hongtangshui;http://www.nustm.cn/member/rxia/;http://pfliu.com/", "dblp": "34/133;;;34/3381-3", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;https://scholar.google.com.hk/citations?user=Znde6gwAAAAJ;oIz_CYEAAAAJ", "orcid": "0000-0002-6146-6248;;;", "linkedin": ";;;", "or_profile": "~Zengzhi_Wang1;~Xuefeng_Li6;~Rui_Xia1;~Pengfei_Liu1", "aff": ";Shanghai Jiaotong University;Nanjing University of Science and Technology;Shanghai Jiaotong University", "aff_domain": ";sjtu.edu.cn;njust.edu.cn;sjtu.edu", "position": ";PhD student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nwang2024mathpile,\ntitle={MathPile: A Billion-Token-Scale Pretraining Corpus for Math},\nauthor={Zengzhi Wang and Xuefeng Li and Rui Xia and Pengfei Liu},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=RSvhU69sbG}\n}", "github": "", "reviewers": "y6yr;yX5o;W3kS;t4e7", "pdf_size": 1030679, "rating": "6;6;6;9", "confidence": "4;4;4;4", "wc_summary_and_contributions": "79;85;51;53", "wc_strengths": "170;3;3;34", "wc_improvement": "307;12;15;21", "wc_limitations": "1;3;56;8", "wc_correctness": "10;1;3;8", "wc_clarity": "5;1;3;10", "wc_relation_to_prior_work": "9;1;3;24", "wc_documentation": "14;1;3;9", "wc_additional_feedback": "1;1;1;1", "wc_review": "596;108;138;168", "wc_reply_reviewers": "0;0;572;26", "wc_reply_authors": "0;0;4291;0", "reply_reviewers": "0;0;3;1", "reply_authors": "2;1;13;1", "rating_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.0 ], "wc_summary_and_contributions_avg": [ 67.0, 15.165750888103101 ], "wc_strengths_avg": [ 52.5, 69.00905737655022 ], "wc_improvement_avg": [ 88.75, 126.04835381709671 ], "wc_limitations_avg": [ 17.0, 22.66053838724932 ], "wc_correctness_avg": [ 5.5, 3.640054944640259 ], "wc_clarity_avg": [ 4.75, 3.344772040064913 ], "wc_relation_to_prior_work_avg": [ 9.25, 9.01041064547005 ], "wc_documentation_avg": [ 6.75, 5.11737237261468 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 252.5, 199.45112183189144 ], "wc_reply_reviewers_avg": [ 149.5, 244.16131962290834 ], "wc_reply_authors_avg": [ 1072.75, 1858.0575038195132 ], "reply_reviewers_avg": [ 1.0, 1.224744871391589 ], "reply_authors_avg": [ 4.25, 5.0682837331783235 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12287889888439988735&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": ";sjtu.edu.cn;njust.edu.cn;sjtu.edu", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Shanghai Jiao Tong University;Nanjing University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.sjtu.edu.cn;http://www.nust.edu.cn/", "aff_unique_abbr": "SJTU;NUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "RTaSjoxmw9", "title": "Outlier-Robust Phase Retrieval in Nearly-Linear Time", "track": "main", "status": "Reject", "tldr": "", "abstract": "Phase retrieval is a fundamental problem in signal processing, where the goal is to recover a (complex-valued) signal from phaseless intensity measurements. It is well-known that natural nonconvex formulations of phase retrieval do not have spurious local optima. However, the theoretical analyses of such landscape results often rely on strong assumptions, such as the sampled measurements are (complex) Gaussian. In this paper, we propose and study the problem of outlier robust phase retrieval. We focus on the real-valued case, where we seek to recover a vector $x \\in \\mathbb{R}^d$ from $n$ intensity measurements $y_i = (a_i^\\top x )^2$, under the assumption that the $a_i$'s are initially i.i.d. Gaussian but a small fraction of the $(y_i, a_i)$ pairs are adversarially corrupted. Our main result is a near sample-optimal nearly-linear time algorithm that provably recovers the ground-truth vector $x$ in the presence of outliers. We first solve a lightweight convex program to find a vector close to the ground truth. We then run robust gradient descent starting from this initial solution, leveraging recent advances in high-dimensional robust statistics. Our approach is conceptually simple and provides a framework for developing robust algorithms for tractable nonconvex problems.", "keywords": "phase retrieval;high-dimensional robust statistics;learning theory", "primary_area": "learning_theory", "supplementary_material": "", "author": "Haichen Dong;Alessio Mazzetto;Yu Cheng;Rong Ge", "authorids": "~Haichen_Dong1;~Alessio_Mazzetto1;~Yu_Cheng2;~Rong_Ge1", "gender": "M;M;M;M", "homepage": "https://haichendong.com/;https://cs.brown.edu/~amazzett/;https://cs.brown.edu/people/ycheng79/;https://users.cs.duke.edu/~rongge/", "dblp": ";239/8316.html;96/3060-2;89/6869-1.html", "google_scholar": ";FkZ0hSsAAAAJ;lVoOIv4AAAAJ;https://scholar.google.com.tw/citations?user=MVxcjEoAAAAJ", "orcid": ";0009-0006-5893-0915;0000-0002-0019-2570;", "linkedin": ";;yu-cheng-40401632/;", "or_profile": "~Haichen_Dong1;~Alessio_Mazzetto1;~Yu_Cheng2;~Rong_Ge1", "aff": "Princeton University;Brown University;Brown University;Duke University", "aff_domain": "princeton.edu;brown.edu;brown.edu;duke.edu", "position": "MS student;PhD student;Assistant Professor;Associate Professor", "bibtex": "@misc{\nanonymous2024outlierrobust,\ntitle={Outlier-Robust Phase Retrieval in Nearly-Linear Time},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=RTaSjoxmw9}\n}", "github": "", "project": "", "reviewers": "ednt;MNuM;BQYS;XpSZ", "site": "https://openreview.net/forum?id=RTaSjoxmw9", "pdf_size": 406446, "rating": "2;3;4;7", "confidence": "5;4;4;4", "soundness": "2;2;2;3", "novelty": "2;2;2;3", "presentation": "3;2;2;4", "wc_summary": "82;76;65;92", "wc_strengths": "41;32;31;62", "wc_weaknesses": "396;150;204;104", "wc_questions": "347;3;29;26", "wc_limitations": "1;3;3;8", "wc_review": "867;264;332;292", "wc_reply_reviewers": "0;522;73;106", "wc_reply_authors": "0;658;0;0", "reply_reviewers": "0;3;1;1", "reply_authors": "1;5;1;1", "rating_avg": [ 4.0, 1.8708286933869707 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 78.75, 9.781998773256925 ], "wc_strengths_avg": [ 41.5, 12.459935794377111 ], "wc_weaknesses_avg": [ 213.5, 111.15192306028717 ], "wc_questions_avg": [ 101.25, 142.23989419287403 ], "wc_limitations_avg": [ 3.75, 2.5860201081971503 ], "wc_review_avg": [ 438.75, 248.42843536922257 ], "wc_reply_reviewers_avg": [ 175.25, 203.83740456550166 ], "wc_reply_authors_avg": [ 164.5, 284.92235784508034 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.0, 1.7320508075688772 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.6172133998483676, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:J61ZRUyySjoJ:scholar.google.com/&scioq=Outlier-Robust+Phase+Retrieval+in+Nearly-Linear+Time&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Princeton University;Brown University;Duke University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.princeton.edu;https://www.brown.edu;https://www.duke.edu", "aff_unique_abbr": "Princeton;Brown;Duke", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "DDN: Dual-domain Dynamic Normalization for Non-stationary Time Series Forecasting", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95167", "id": "RVZfra6sZo", "proceeding": "", "pdf": "https://openreview.net/pdf?id=RVZfra6sZo", "openreview": "https://openreview.net/forum?id=RVZfra6sZo", "poster": "/media/PosterPDFs/NeurIPS%202024/95167.png?t=1731404736.2303023", "project": "", "author_site": "Tao Dai, Beiliang Wu, Peiyuan Liu, Naiqi Li, Xue Yuerong, Shu-Tao Xia, Zexuan Zhu", "tldr": "", "abstract": "Deep neural networks (DNNs) have recently achieved remarkable advancements in time series forecasting (TSF) due to their powerful ability of sequence dependence modeling. To date, existing DNN-based TSF methods still suffer from unreliable predictions for real-world data due to its non-stationarity characteristics, i.e., data distribution varies quickly over time. To mitigate this issue, several normalization methods (e.g., SAN) have recently been specifically designed by normalization in a fixed period/window in the time domain. However, these methods still struggle to capture distribution variations, due to the complex time patterns of time series in the time domain. Based on the fact that wavelet transform can decompose time series into a linear combination of different frequencies, which exhibits distribution variations with time-varying periods, we propose a novel Dual-domain Dynamic Normalization (DDN) to dynamically capture distribution variations in both time and frequency domains. Specifically, our DDN tries to eliminate the non-stationarity of time series via both frequency and time domain normalization in a sliding window way. Besides, our DDN can serve as a plug-in-play module, and thus can be easily incorporated into other forecasting models. Extensive experiments on public benchmark datasets under different forecasting models demonstrate the superiority of our DDN over other normalization methods. Code will be made available following the review process.", "keywords": "time series forecasting;non-stationary", "primary_area": "other", "supplementary_material": "", "author": "Tao Dai;Beiliang Wu;Peiyuan Liu;Naiqi Li;Xue Yuerong;Shu-Tao Xia;Zexuan Zhu", "authorids": "~Tao_Dai3;~Beiliang_Wu1;~Peiyuan_Liu1;~Naiqi_Li1;~Xue_Yuerong1;~Shu-Tao_Xia1;~Zexuan_Zhu1", "gender": "M;M;M;M;M;M;M", "homepage": "https://csse.szu.edu.cn/pages/user/index?id=1204;https://peiyuanliu.info;https://naiqili.github.io/;;https://www.sigs.tsinghua.edu.cn/xst/list.htm;http://csse.szu.edu.cn/staff/zhuzx;https://github.com/ClaiDenStudent", "dblp": "54/875-1;157/4439;117/4912;123/7213.html;03/6195;17/4590;", "google_scholar": "MqJNdaAAAAAJ;mS0R3qEAAAAJ;5K2l_wUAAAAJ;;https://scholar.google.com.hk/citations?user=koAXTXgAAAAJ;kqHaIGEAAAAJ;", "orcid": "0000-0003-0594-6404;;;;0000-0002-8639-982X;0000-0001-8479-6904;", "linkedin": ";peiyuan-liu-b7b49022b;;;;;", "or_profile": "~Tao_Dai3;~Peiyuan_Liu1;~Naiqi_Li1;~Xue_Yuerong1;~Shu-Tao_Xia1;~Zexuan_Zhu1;~Alan_Wu2", "aff": "Department of Software Engineering, Shenzhen University;Tsinghua University;Tsinghua University;Tsinghua University;Shenzhen International Graduate School, Tsinghua University;Shenzhen University;Shenzhen University", "aff_domain": "szu.edu;mail.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;sz.tsinghua.edu.cn;szu.edu.cn;szu.edu.cn", "position": "Assistant Professor;MS student;PhD student;PhD student;Full Professor;Full Professor;MS student", "bibtex": "@inproceedings{\ndai2024ddn,\ntitle={{DDN}: Dual-domain Dynamic Normalization for Non-stationary Time Series Forecasting},\nauthor={Tao Dai and Beiliang Wu and Peiyuan Liu and Naiqi Li and Xue Yuerong and Shu-Tao Xia and Zexuan Zhu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=RVZfra6sZo}\n}", "github": "", "reviewers": "G9hB;i5Qt;qBzF;Ej1X", "pdf_size": 2532152, "rating": "5;5;7;7", "confidence": "5;3;5;4", "soundness": "3;3;4;4", "novelty": "3;2;3;3", "presentation": "2;2;3;3", "wc_summary": "77;78;98;87", "wc_strengths": "16;104;143;116", "wc_weaknesses": "373;160;194;76", "wc_questions": "6;89;24;37", "wc_limitations": "14;1;6;18", "wc_review": "486;432;465;334", "wc_reply_reviewers": "24;46;32;18", "wc_reply_authors": "28;91;12;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;3;2;1", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 85.0, 8.455767262643882 ], "wc_strengths_avg": [ 94.75, 47.60974165021272 ], "wc_weaknesses_avg": [ 200.75, 108.32676262124701 ], "wc_questions_avg": [ 39.0, 30.894983411550815 ], "wc_limitations_avg": [ 9.75, 6.6473679001541655 ], "wc_review_avg": [ 429.25, 58.26394682820586 ], "wc_reply_reviewers_avg": [ 30.0, 10.488088481701515 ], "wc_reply_authors_avg": [ 32.75, 35.06690034776384 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10488171859099717998&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 2, "email": "szu.edu;mail.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;sz.tsinghua.edu.cn;szu.edu.cn;szu.edu.cn", "author_num": 7, "aff_unique_index": "0;1;1;1;1;0;0", "aff_unique_norm": "Shenzhen University;Tsinghua University", "aff_unique_dep": "Department of Software Engineering;", "aff_unique_url": "https://www.szu.edu.cn;https://www.tsinghua.edu.cn", "aff_unique_abbr": "SZU;THU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Shenzhen;", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "DDR: Exploiting Deep Degradation Response as Flexible Image Descriptor", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95166", "id": "RXLO4Zv3wB", "proceeding": "", "pdf": "https://openreview.net/pdf?id=RXLO4Zv3wB", "openreview": "https://openreview.net/forum?id=RXLO4Zv3wB", "poster": "/media/PosterPDFs/NeurIPS%202024/95166.png?t=1731303739.6132426", "project": "", "author_site": "Juncheng Wu, Zhangkai Ni, Hanli Wang, Wenhan Yang, Yuyin Zhou, Shiqi Wang", "tldr": "", "abstract": "Image deep features extracted by pre-trained networks are known to contain rich and informative representations. In this paper, we present Deep Degradation Response (DDR), a method to quantify changes in image deep features under varying degradation conditions. Specifically, our approach facilitates flexible and adaptive degradation, enabling the controlled synthesis of image degradation through text-driven prompts. Extensive evaluations demonstrate the versatility of DDR as an image descriptor, with strong correlations observed with key image attributes such as complexity, colorfulness, sharpness, and overall quality. Moreover, we demonstrate the efficacy of DDR across a spectrum of applications. It excels as a blind image quality assessment metric, outperforming existing methodologies across multiple datasets. Additionally, DDR serves as an effective unsupervised learning objective in image restoration tasks, yielding notable advancements in image deblurring and single-image super-resolution. Our code is available at: https://github.com/eezkni/DDR.", "keywords": "Degradation Response;Image Discriptor;Image Deblur;Image Super-Resolution;Image Quality Assessment", "primary_area": "machine_vision", "supplementary_material": "/attachment/a2d551d2bc90ae8100e009bf7115ed18be77dd29.zip", "author": "Juncheng Wu;Zhangkai Ni;Hanli Wang;Wenhan Yang;Yuyin Zhou;Shiqi Wang", "authorids": "~Juncheng_Wu1;~Zhangkai_Ni2;~Hanli_Wang1;~Wenhan_Yang6;~Yuyin_Zhou1;~Shiqi_Wang1", "gender": "M;M;M;;M;M", "homepage": "https://chtholly17.github.io/;https://mic.tongji.edu.cn/;https://flyywh.github.io/;https://yuyinzhou.github.io/;https://www.cs.cityu.edu.hk/~shiqwang/;https://eezkni.github.io/", "dblp": "371/4972;04/5757.html;156/2359.html;192/1413;58/9145-1;185/7403", "google_scholar": "https://scholar.google.com/citations?hl=en;;S8nAnakAAAAJ;eiqVLC0AAAAJ;Pr7s2VUAAAAJ;68IcrE4AAAAJ", "orcid": ";;;;;0000-0003-3682-6288", "linkedin": ";;;;;", "or_profile": "~Juncheng_Wu1;~Hanli_Wang1;~Wenhan_Yang6;~Yuyin_Zhou1;~Shiqi_Wang1;~Zhangkai_NI1", "aff": "Tongji University;Tongji University;Peng Cheng Laboratory;University of California, Santa Cruz;City University of Hong Kong;Tongji University", "aff_domain": "tongji.edu.cn;tongji.edu.cn;pcl.ac.cn;ucsc.edu;cityu.edu.hk;tongji.edu.cn", "position": "Undergrad student;Full Professor;Researcher;Assistant Professor;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nwu2024ddr,\ntitle={{DDR}: Exploiting Deep Degradation Response as Flexible Image Descriptor},\nauthor={Juncheng Wu and Zhangkai Ni and Hanli Wang and Wenhan Yang and Yuyin Zhou and Shiqi Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=RXLO4Zv3wB}\n}", "github": "", "reviewers": "mUCn;Y3EC;tuJD", "pdf_size": 29977886, "rating": "5;5;7", "confidence": "5;4;4", "soundness": "2;2;3", "novelty": "2;3;2", "presentation": "2;2;3", "wc_summary": "69;104;19", "wc_strengths": "49;70;12", "wc_weaknesses": "168;331;172", "wc_questions": "46;50;99", "wc_limitations": "12;42;6", "wc_review": "344;597;308", "wc_reply_reviewers": "14;46;65", "wc_reply_authors": "31;62;98", "reply_reviewers": "1;1;2", "reply_authors": "2;3;3", "rating_avg": [ 5.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 64.0, 34.88074922742725 ], "wc_strengths_avg": [ 43.666666666666664, 23.976840677805924 ], "wc_weaknesses_avg": [ 223.66666666666666, 75.91369368492677 ], "wc_questions_avg": [ 65.0, 24.097026095903757 ], "wc_limitations_avg": [ 20.0, 15.748015748023622 ], "wc_review_avg": [ 416.3333333333333, 128.59324329925823 ], "wc_reply_reviewers_avg": [ 41.666666666666664, 21.044925490219462 ], "wc_reply_authors_avg": [ 63.666666666666664, 27.378012264508094 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10525679875363736362&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "tongji.edu.cn;tongji.edu.cn;pcl.ac.cn;ucsc.edu;cityu.edu.hk;tongji.edu.cn", "author_num": 6, "aff_unique_index": "0;0;1;2;3;0", "aff_unique_norm": "Tongji University;Pengcheng Laboratory;University of California, Santa Cruz;City University of Hong Kong", "aff_unique_dep": ";Peng Cheng Laboratory;;", "aff_unique_url": "https://www.tongji.edu.cn;http://www.pcl.ac.cn;https://www.ucsc.edu;https://www.cityu.edu.hk", "aff_unique_abbr": "Tongji;PCL;UCSC;CityU", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Santa Cruz;Hong Kong SAR", "aff_country_unique_index": "0;0;0;1;0;0", "aff_country_unique": "China;United States" }, { "title": "Optical Diffusion Models for Image Generation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95165", "id": "RY3rDQV0tQ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=RY3rDQV0tQ", "openreview": "https://openreview.net/forum?id=RY3rDQV0tQ", "poster": "", "project": "", "author_site": "Ilker Oguz, Niyazi Dinc, Mustafa Yildirim, Junjie Ke, Innfarn Yoo, Qifei Wang, Feng Yang, Christophe Moser, Demetri Psaltis", "tldr": "", "abstract": "Diffusion models generate new samples by progressively decreasing the noise from the initially provided random distribution. This inference procedure generally utilizes a trained neural network numerous times to obtain the final output, creating significant latency and energy consumption on digital electronic hardware such as GPUs. In this study, we demonstrate that the propagation of a light beam through a transparent medium can be programmed to implement a denoising diffusion model on image samples. This framework projects noisy image patterns through passive diffractive optical layers, which collectively only transmit the predicted noise term in the image. The optical transparent layers, which are trained with an online training approach, backpropagating the error to the analytical model of the system, are passive and kept the same across different steps of denoising. Hence this method enables high-speed image generation with minimal power consumption, benefiting from the bandwidth and energy efficiency of optical information processing.", "keywords": "Diffusion based model;image generation;optical computing;efficient computing", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Ilker Oguz;Niyazi Ulas Dinc;Mustafa Yildirim;Junjie Ke;Innfarn Yoo;QIFEI WANG;Feng Yang;Christophe Moser;Demetri Psaltis", "authorids": "~Ilker_Oguz1;~Niyazi_Ulas_Dinc1;~Mustafa_Yildirim1;~Junjie_Ke1;~Innfarn_Yoo1;~QIFEI_WANG1;~Feng_Yang2;~Christophe_Moser1;~Demetri_Psaltis1", "gender": ";M;M;F;M;M;M;M;M", "homepage": ";;;;;;https://sites.google.com/corp/view/feng-yang;https://www.epfl.ch/labs/lapd/;", "dblp": "282/1349;;;;140/7741;73/8011;22/4613-8;;63/1744", "google_scholar": "4jP60b4AAAAJ;https://scholar.google.com.tr/citations?user=YUy4K_QAAAAJ;https://scholar.google.com.tr/citations?user=s-bSMvkAAAAJ;;z8de7WQAAAAJ;vHZenW0AAAAJ;XI8oQn8AAAAJ;https://scholar.google.ch/citations?user=GsCNrfUAAAAJ;", "orcid": "0000-0003-2674-684X;0000-0002-9606-8098;;;;;;;", "linkedin": ";niyaziulasdinc/;;junjie-ke/;;;feng-yang-214a281a;;", "or_profile": "~Ilker_Oguz1;~Niyazi_Ulas_Dinc1;~Mustafa_Yildirim1;~Junjie_Ke1;~Innfarn_Yoo1;~QIFEI_WANG1;~Feng_Yang2;~Christophe_Moser1;~Demetri_Psaltis1", "aff": "EPFL - EPF Lausanne;EPFL - EPF Lausanne;EPFL - EPF Lausanne;Research, Google;Google;Google;Google;EPFL - EPF Lausanne;EPFL - EPF Lausanne", "aff_domain": "epfl.ch;epfl.ch;epfl.ch;research.google.com;google.com;google.com;google.com;epfl.ch;epfl.ch", "position": "PhD student;Postdoc;PhD student;Researcher;Researcher;Researcher;Senior Staff Software Engineer;Associate Professor;Full Professor", "bibtex": "@inproceedings{\noguz2024optical,\ntitle={Optical Diffusion Models for Image Generation},\nauthor={Ilker Oguz and Niyazi Ulas Dinc and Mustafa Yildirim and Junjie Ke and Innfarn Yoo and QIFEI WANG and Feng Yang and Christophe Moser and Demetri Psaltis},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=RY3rDQV0tQ}\n}", "github": "", "reviewers": "Kx6v;hbsN;heiE;5uf9", "pdf_size": 10318580, "rating": "4;6;7;7", "confidence": "5;3;3;4", "soundness": "2;3;3;3", "novelty": "1;2;3;3", "presentation": "2;3;3;2", "wc_summary": "35;59;69;124", "wc_strengths": "22;69;32;30", "wc_weaknesses": "66;181;87;715", "wc_questions": "119;24;68;59", "wc_limitations": "119;30;65;2", "wc_review": "361;363;321;930", "wc_reply_reviewers": "70;11;15;230", "wc_reply_authors": "331;0;0;851", "reply_reviewers": "1;1;1;2", "reply_authors": "2;1;1;3", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 71.75, 32.59888801784503 ], "wc_strengths_avg": [ 38.25, 18.14352501582865 ], "wc_weaknesses_avg": [ 262.25, 264.95789759884497 ], "wc_questions_avg": [ 67.5, 33.974254958718376 ], "wc_limitations_avg": [ 54.0, 43.6634858892416 ], "wc_review_avg": [ 493.75, 252.4256078530861 ], "wc_reply_reviewers_avg": [ 81.5, 88.8495920080672 ], "wc_reply_authors_avg": [ 295.5, 348.0233469179905 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.7385489458759963, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10940844506209568117&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "epfl.ch;epfl.ch;epfl.ch;research.google.com;google.com;google.com;google.com;epfl.ch;epfl.ch", "author_num": 9, "aff_unique_index": "0;0;0;1;1;1;1;0;0", "aff_unique_norm": "EPFL;Google", "aff_unique_dep": ";Google Research", "aff_unique_url": "https://www.epfl.ch;https://research.google", "aff_unique_abbr": "EPFL;Google", "aff_campus_unique_index": "0;0;0;1;1;1;1;0;0", "aff_campus_unique": "Lausanne;Mountain View", "aff_country_unique_index": "0;0;0;1;1;1;1;0;0", "aff_country_unique": "Switzerland;United States" }, { "title": "Sample Complexity Reduction via Policy Difference Estimation in Tabular Reinforcement Learning", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95164", "id": "RYQ0KuZvkL", "proceeding": "", "pdf": "https://openreview.net/pdf?id=RYQ0KuZvkL", "openreview": "https://openreview.net/forum?id=RYQ0KuZvkL", "poster": "", "project": "", "author_site": "Adhyyan Narang, Andrew Wagenmaker, Lillian Ratliff, Kevin Jamieson", "tldr": "", "abstract": "In this paper, we study the non-asymptotic sample complexity for the pure exploration problem in contextual bandits and tabular reinforcement learning (RL): identifying an $\\epsilon$-optimal policy from a set of policies $\\Pi$ with high probability. Existing work in bandits has shown that it is possible to identify the best policy by estimating only the *difference* between the behaviors of individual policies\u2013which can have substantially lower variance than estimating the behavior of each policy directly\u2014yet the best-known complexities in RL fail to take advantage of this, and instead estimate the behavior of each policy directly. Does it suffice to estimate only the differences in the behaviors of policies in RL? We answer this question positively for contextual bandits, but in the negative for tabular RL, showing a separation between contextual bandits and RL. However, inspired by this, we show that it *almost* suffices to estimate only the differences in RL: if we can estimate the behavior of a *single* reference policy, it suffices to only estimate how any other policy deviates from this reference policy. We develop an algorithm which instantiates this principle and obtains, to the best of our knowledge, the tightest known bound on the sample complexity of tabular RL.", "keywords": "pure exploration;tabular RL;bandits;best policy identification;instance-dependent", "primary_area": "bandits", "supplementary_material": "", "author": "Adhyyan Narang;Andrew Wagenmaker;Lillian J. Ratliff;Kevin Jamieson", "authorids": "~Adhyyan_Narang1;~Andrew_Wagenmaker1;~Lillian_J._Ratliff1;~Kevin_Jamieson1", "gender": "M;M;M;F", "homepage": "https://wagenmaker.github.io;;http://adhyyannarang.com/;https://faculty.washington.edu/ratliffl/", "dblp": "195/1036;85/10260;265/5821;127/7426", "google_scholar": "ym8AZSIAAAAJ;;;https://scholar.google.com/citations?hl=en", "orcid": ";;;0000-0001-8936-0229", "linkedin": ";;;", "or_profile": "~Andrew_Wagenmaker1;~Kevin_Jamieson1;~Adhyyan_Manish_Narang1;~Lillian_Ratliff1", "aff": "University of Washington, Seattle;University of Washington;University of Washington, Seattle;University of Washington, Seattle", "aff_domain": "uw.edu;washington.edu;uw.edu;uw.edu", "position": "PhD student;Associate Professor;PhD student;Associate Professor", "bibtex": "@inproceedings{\nnarang2024sample,\ntitle={Sample Complexity Reduction via Policy Difference Estimation in Tabular Reinforcement Learning},\nauthor={Adhyyan Narang and Andrew Wagenmaker and Lillian J. Ratliff and Kevin Jamieson},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=RYQ0KuZvkL}\n}", "github": "", "reviewers": "FF9h;76qf;f5AC", "pdf_size": 680079, "rating": "7;7;8", "confidence": "2;2;4", "soundness": "4;3;3", "novelty": "3;4;3", "presentation": "4;3;4", "wc_summary": "98;39;69", "wc_strengths": "144;38;32", "wc_weaknesses": "50;41;27", "wc_questions": "114;42;74", "wc_limitations": "1;2;9", "wc_review": "407;162;211", "wc_reply_reviewers": "24;18;21", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 7.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 2.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 68.66666666666667, 24.087802353519557 ], "wc_strengths_avg": [ 71.33333333333333, 51.44144459696114 ], "wc_weaknesses_avg": [ 39.333333333333336, 9.46337971105226 ], "wc_questions_avg": [ 76.66666666666667, 29.4542960458327 ], "wc_limitations_avg": [ 4.0, 3.559026084010437 ], "wc_review_avg": [ 260.0, 105.85209807399505 ], "wc_reply_reviewers_avg": [ 21.0, 2.449489742783178 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:b78U0M0olbUJ:scholar.google.com/&scioq=Sample+Complexity+Reduction+via+Policy+Difference+Estimation+in+Tabular+Reinforcement+Learning&hl=en&as_sdt=0,5", "gs_version_total": 5, "email": "uw.edu;washington.edu;uw.edu;uw.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Washington", "aff_unique_dep": "", "aff_unique_url": "https://www.washington.edu", "aff_unique_abbr": "UW", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Seattle;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "SSA-Seg: Semantic and Spatial Adaptive Pixel-level Classifier for Semantic Segmentation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95163", "id": "RZZo23pQFL", "proceeding": "", "pdf": "https://openreview.net/pdf?id=RZZo23pQFL", "openreview": "https://openreview.net/forum?id=RZZo23pQFL", "poster": "/media/PosterPDFs/NeurIPS%202024/95163.png?t=1730946845.1236084", "project": "", "author_site": "Xiaowen Ma, Zhen-Liang Ni, Xinghao Chen", "tldr": "", "abstract": "Vanilla pixel-level classifiers for semantic segmentation are based on a certain paradigm, involving the inner product of fixed prototypes obtained from the training set and pixel features in the test image. This approach, however, encounters significant limitations, i.e., feature deviation in the semantic domain and information loss in the spatial domain. The former struggles with large intra-class variance among pixel features from different images, while the latter fails to utilize the structured information of semantic objects effectively. This leads to blurred mask boundaries as well as a deficiency of fine-grained recognition capability. In this paper, we propose a novel Semantic and Spatial Adaptive Classifier (SSA-Seg) to address the above challenges. Specifically, we employ the coarse masks obtained from the fixed prototypes as a guide to adjust the fixed prototype towards the center of the semantic and spatial domains in the test image. The adapted prototypes in semantic and spatial domains are then simultaneously considered to accomplish classification decisions. In addition, we propose an online multi-domain distillation learning strategy to improve the adaption process. Experimental results on three publicly available benchmarks show that the proposed SSA-Seg significantly improves the segmentation performance of the baseline models with only a minimal increase in computational cost.", "keywords": "Pixel-level classifier;multi-domain distillation;semantic adaptive;spatial adaptive", "primary_area": "machine_vision", "supplementary_material": "/attachment/c6ea01ef64111781e33d1ef2f4bf6f5a54a0296f.zip", "author": "Xiaowen Ma;Zhen-Liang Ni;Xinghao Chen", "authorids": "~Xiaowen_Ma1;~Zhen-Liang_Ni1;~Xinghao_Chen1", "gender": "M;M;M", "homepage": "https://github.com/xwmaxwma;https://scholar.google.com/citations?user=2urTmpkAAAAJ&hl=zh-CN;", "dblp": "258/6970;241/7013;30/4937-1", "google_scholar": "UXj8Q6kAAAAJ;https://scholar.google.com.hk/citations?hl=zh-CN;tuGWUVIAAAAJ", "orcid": "0000-0001-5031-2641;0000-0002-3358-1994;0000-0002-2102-8235", "linkedin": ";;", "or_profile": "~Xiaowen_Ma1;~Zhen-Liang_Ni1;~Xinghao_Chen1", "aff": "Zhejiang University;Huawei Technologies Ltd.;Huawei Noah's Ark Lab", "aff_domain": "zju.edu.cn;huawei.com;huawei.com", "position": "MS student;Researcher;Researcher", "bibtex": "@inproceedings{\nma2024ssaseg,\ntitle={{SSA}-Seg: Semantic and Spatial Adaptive Pixel-level Classifier for Semantic Segmentation},\nauthor={Xiaowen Ma and Zhen-Liang Ni and Xinghao Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=RZZo23pQFL}\n}", "github": "", "reviewers": "bEdU;Dw1G;ra2D", "pdf_size": 43552603, "rating": "2;5;5", "confidence": "5;3;3", "soundness": "2;3;3", "novelty": "2;3;3", "presentation": "2;3;2", "wc_summary": "86;121;61", "wc_strengths": "39;118;49", "wc_weaknesses": "750;75;38", "wc_questions": "43;5;2", "wc_limitations": "37;5;6", "wc_review": "955;324;156", "wc_reply_reviewers": "0;0;141", "wc_reply_authors": "0;0;620", "reply_reviewers": "0;0;1", "reply_authors": "1;1;2", "rating_avg": [ 4.0, 1.4142135623730951 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 89.33333333333333, 24.608038433722335 ], "wc_strengths_avg": [ 68.66666666666667, 35.122009560324926 ], "wc_weaknesses_avg": [ 287.6666666666667, 327.2678142167699 ], "wc_questions_avg": [ 16.666666666666668, 18.660713336371206 ], "wc_limitations_avg": [ 16.0, 14.854853303438128 ], "wc_review_avg": [ 478.3333333333333, 343.96156115989993 ], "wc_reply_reviewers_avg": [ 47.0, 66.46803743153546 ], "wc_reply_authors_avg": [ 206.66666666666666, 292.27080289043965 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=823013284133223477&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "zju.edu.cn;huawei.com;huawei.com", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Zhejiang University;Huawei", "aff_unique_dep": ";Huawei Technologies", "aff_unique_url": "https://www.zju.edu.cn;https://www.huawei.com", "aff_unique_abbr": "ZJU;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "RZk2rxJT55", "title": "Hamiltonian Mechanics of Feature Learning: Bottleneck Structure in Leaky ResNets", "track": "main", "status": "Reject", "tldr": "", "abstract": "We study Leaky ResNets, which interpolate between ResNets ($\\tilde{L}=0$)\nand Fully-Connected nets ($\\tilde{L}\\to\\infty$) depending on an 'effective\ndepth' hyper-parameter $\\tilde{L}$. In the infinite depth limit,\nwe study 'representation geodesics' $A_{p}$: continuous paths in\nrepresentation space (similar to NeuralODEs) from input $p=0$ to\noutput $p=1$ that minimize the parameter norm of the network. We\ngive a Lagrangian and Hamiltonian reformulation, which highlight the\nimportance of two terms: a kinetic energy which favors small layer\nderivatives $\\partial_{p}A_{p}$ and a potential energy that favors\nlow-dimensional representations, as measured by the 'Cost of Identity'.\nThe balance between these two forces offers an intuitive understanding\nof feature learning in ResNets. We leverage this intuition to explain\nthe emergence of a bottleneck structure, as observed in previous work:\nfor large $\\tilde{L}$ the potential energy dominates and leads to\na separation of timescales, where the representation jumps rapidly\nfrom the high dimensional inputs to a low-dimensional representation,\nmove slowly inside the space of low-dimensional representations, before\njumping back to the potentially high-dimensional outputs. Inspired\nby this phenomenon, we train with an adaptive layer step-size\nto adapt to the separation of timescales.", "keywords": "Feature Learning;Bottleneck Structure;NeuralODE;Hamiltonian mechanics", "primary_area": "learning_theory", "supplementary_material": "", "author": "Arthur Jacot;Alexandre Kaiser", "authorids": "~Arthur_Jacot1;~Alexandre_Kaiser1", "gender": "M;M", "homepage": ";https://a6kaiser.github.io/", "dblp": "222/2747;", "google_scholar": "https://scholar.google.ch/citations?user=G6OhFawAAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Arthur_Jacot1;~Alexandre_Kaiser1", "aff": "NYU, New York University;New York University", "aff_domain": "cims.nyu.edu;nyu.edu", "position": "Assistant Professor;MS student", "bibtex": "@misc{\nanonymous2024hamiltonian,\ntitle={Hamiltonian Mechanics of Feature Learning: Bottleneck Structure in Leaky ResNets},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=RZk2rxJT55}\n}", "github": "", "project": "", "reviewers": "JYMC;wm1b;N8Zt;kgQn", "site": "https://openreview.net/forum?id=RZk2rxJT55", "pdf_size": 674246, "rating": "4;5;6;6", "confidence": "4;2;3;3", "soundness": "2;2;2;3", "novelty": "2;3;2;2", "presentation": "2;2;2;2", "wc_summary": "146;83;145;60", "wc_strengths": "84;71;69;51", "wc_weaknesses": "368;315;327;169", "wc_questions": "113;159;29;108", "wc_limitations": "22;5;34;10", "wc_review": "733;633;604;398", "wc_reply_reviewers": "9;38;555;93", "wc_reply_authors": "383;376;1741;341", "reply_reviewers": "1;1;5;1", "reply_authors": "1;1;5;1", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 108.5, 37.88469347902923 ], "wc_strengths_avg": [ 68.75, 11.755317945508747 ], "wc_weaknesses_avg": [ 294.75, 75.21427723511009 ], "wc_questions_avg": [ 102.25, 46.72994222123541 ], "wc_limitations_avg": [ 17.75, 11.233320969330485 ], "wc_review_avg": [ 592.0, 121.80106731880473 ], "wc_reply_reviewers_avg": [ 173.75, 222.17265245749758 ], "wc_reply_authors_avg": [ 710.25, 595.3164599605826 ], "reply_reviewers_avg": [ 2.0, 1.7320508075688772 ], "reply_authors_avg": [ 2.0, 1.7320508075688772 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.42640143271122083, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13633954341554556715&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0", "aff_unique_norm": "New York University", "aff_unique_dep": "", "aff_unique_url": "https://www.nyu.edu", "aff_unique_abbr": "NYU", "aff_campus_unique_index": "0", "aff_campus_unique": "New York;", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Feature-Level Adversarial Attacks and Ranking Disruption for Visible-Infrared Person Re-identification", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95162", "id": "RaNct2xkyI", "proceeding": "", "pdf": "https://openreview.net/pdf?id=RaNct2xkyI", "openreview": "https://openreview.net/forum?id=RaNct2xkyI", "poster": "/media/PosterPDFs/NeurIPS%202024/95162.png?t=1731509576.2494986", "project": "", "author_site": "Xi Yang, Huanling Liu, De Cheng, Nannan Wang, Xinbo Gao", "tldr": "", "abstract": "Visible-infrared person re-identification (VIReID) is widely used in fields such as video surveillance and intelligent transportation, imposing higher demands on model security. In practice, the adversarial attacks based on VIReID aim to disrupt output ranking and quantify the security risks of models. Although numerous studies have been emerged on adversarial attacks and defenses in fields such as face recognition, person re-identification, and pedestrian detection, there is currently a lack of research on the security of VIReID systems. To this end, we propose to explore the vulnerabilities of VIReID systems and prevent potential serious losses due to insecurity. Compared to research on single-modality ReID, adversarial feature alignment and modality differences need to be particularly emphasized. Thus, we advocate for feature-level adversarial attacks to disrupt the output rankings of VIReID systems. To obtain adversarial features, we introduce \\textit{Universal Adversarial Perturbations} (UAP) to simulate common disturbances in real-world environments. Additionally, we employ a \\textit{Frequency-Spatial Attention Module} (FSAM), integrating frequency information extraction and spatial focusing mechanisms, and further emphasize important regional features from different domains on the shared features. This ensures that adversarial features maintain consistency within the feature space. Finally, we employ an \\textit{Auxiliary Quadruple Adversarial Loss} to amplify the differences between modalities, thereby improving the distinction and recognition of features between visible and infrared images, which causes the system to output incorrect rankings. Extensive experiments on two VIReID benchmarks (i.e., SYSU-MM01, RegDB) and different systems validate the effectiveness of our method.", "keywords": "Visible-Infrared Person Re-identification;Universal Adversarial Perturbation;Frequency-Spatial Attention Module;Auxiliary Quadruple Adversarial loss", "primary_area": "machine_vision", "supplementary_material": "", "author": "Xi Yang;Huanling Liu;De Cheng;Nannan Wang;Xinbo Gao", "authorids": "~Xi_Yang4;~Huanling_Liu1;~De_Cheng3;~Nannan_Wang1;~Xinbo_Gao5", "gender": "F;;M;M;M", "homepage": "https://web.xidian.edu.cn/yangx/;;https://web.xidian.edu.cn/dcheng/index.html;;https://faculty.cqupt.edu.cn/gaoxinbo/zh_CN/index.htm", "dblp": "13/1520-11;;154/1991/;10/8359-1;", "google_scholar": "W5c-LSYAAAAJ;;180lASkAAAAJ;SRBn7oUAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": "0000-0002-5791-3674;0009-0004-0905-2664;;;0000-0002-7985-0037", "linkedin": ";;;;xinbo-gao-151a2224/", "or_profile": "~Xi_Yang4;~Huanling_Liu1;~De_Cheng3;~Nannan_Wang1;~Xinbo_Gao5", "aff": "Xidian University;Xi'an University of Electronic Science and Technology;Xidian University;Xidian University;Chongqing University of Post and Telecommunications", "aff_domain": "xidian.edu;xidian.edu.cn;xidian.edu.cn;xidian.edu.cn;cqupt.edu.cn", "position": "Full Professor;MS student;Associate Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nyang2024featurelevel,\ntitle={Feature-Level Adversarial Attacks and Ranking Disruption for Visible-Infrared Person Re-identification},\nauthor={Xi Yang and Huanling Liu and De Cheng and Nannan Wang and Xinbo Gao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=RaNct2xkyI}\n}", "github": "", "reviewers": "ksiy;78wy;Jhhd;sAfC", "pdf_size": 1037205, "rating": "6;6;6;7", "confidence": "5;5;3;5", "soundness": "2;3;2;4", "novelty": "2;3;3;3", "presentation": "2;4;3;3", "wc_summary": "98;83;76;92", "wc_strengths": "13;52;37;127", "wc_weaknesses": "134;102;75;83", "wc_questions": "27;4;14;86", "wc_limitations": "12;11;11;15", "wc_review": "284;252;213;403", "wc_reply_reviewers": "16;21;14;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 4.5, 0.8660254037844386 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 87.25, 8.407585860400118 ], "wc_strengths_avg": [ 57.25, 42.60501731017135 ], "wc_weaknesses_avg": [ 98.5, 22.721135535003526 ], "wc_questions_avg": [ 32.75, 31.80703538527286 ], "wc_limitations_avg": [ 12.25, 1.6393596310755 ], "wc_review_avg": [ 288.0, 70.9964787859229 ], "wc_reply_reviewers_avg": [ 12.75, 7.790218225441442 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8238662339854005810&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": "xidian.edu;xidian.edu.cn;xidian.edu.cn;xidian.edu.cn;cqupt.edu.cn", "author_num": 5, "aff_unique_index": "0;1;0;0;2", "aff_unique_norm": "Xidian University;Xi'an University of Electronic Science and Technology;Chongqing University of Post and Telecommunications", "aff_unique_dep": ";;", "aff_unique_url": "http://www.xidian.edu.cn/;http://www.xidian.edu.cn/;http://www.cqupt.edu.cn", "aff_unique_abbr": "Xidian;Xidian University;CQUPT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Xi'an", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Iteratively Refined Behavior Regularization for Offline Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95161", "id": "RbS7RWxw3r", "proceeding": "", "pdf": "https://openreview.net/pdf?id=RbS7RWxw3r", "openreview": "https://openreview.net/forum?id=RbS7RWxw3r", "poster": "", "project": "", "author_site": "Yi Ma, Jianye Hao, Xiaohan Hu, YAN ZHENG, Chenjun Xiao", "tldr": "", "abstract": "One of the fundamental challenges for offline reinforcement learning (RL) is ensuring robustness to data distribution. Whether the data originates from a near-optimal policy or not, we anticipate that an algorithm should demonstrate its ability to learn an effective control policy that seamlessly aligns with the inherent distribution of offline data. Unfortunately, behavior regularization, a simple yet effective offline RL algorithm, tends to struggle in this regard. In this paper, we propose a new algorithm that substantially enhances behavior-regularization based on conservative policy iteration. Our key observation is that by iteratively refining the reference policy used for behavior regularization, conservative policy update guarantees gradually improvement, while also implicitly avoiding querying out-of-sample actions to prevent catastrophic learning failures. We prove that in the tabular setting this algorithm is capable of learning the optimal policy covered by the offline dataset, commonly referred to as the in-sample optimal policy. We then explore several implementation details of the algorithm when function approximations are applied. The resulting algorithm is easy to implement, requiring only a few lines of code modification to existing methods. Experimental results on the D4RL benchmark indicate that our method outperforms previous state-of-the-art baselines in most tasks, clearly demonstrate its superiority over behavior regularization.", "keywords": "Offline RL", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Yi Ma;Jianye HAO;Xiaohan Hu;YAN ZHENG;Chenjun Xiao", "authorids": "~Yi_Ma5;~Jianye_HAO1;~Xiaohan_Hu1;~YAN_ZHENG1;~Chenjun_Xiao1", "gender": ";M;M;M;", "homepage": "https://mayi1996.top/;http://www.icdai.org/jianye.html;;https://yanzzzzz.github.io;https://chenjun-x.github.io/", "dblp": "69/1112-5.html;21/7664.html;;10/2381-2;178/8641", "google_scholar": "TdVWzqgAAAAJ;;;https://scholar.google.com.hk/citations?user=tJuhd1kAAAAJ;", "orcid": "0000-0001-9375-6605;0000-0002-0422-8235;0000-0001-7645-201X;;0000-0002-5493-1500", "linkedin": ";;;;", "or_profile": "~Yi_Ma5;~Jianye_HAO1;~Xiaohan_Hu1;~YAN_ZHENG1;~Chenjun_Xiao1", "aff": "Tianjin University;Tianjin University;Tianjin University;Tianjin Unibersity, China;Huawei Technologies Ltd.", "aff_domain": "tju.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu.cn;huawei.com", "position": "PhD student;Associate Professor;MS student;Associate Professor;Researcher", "bibtex": "@inproceedings{\nma2024iteratively,\ntitle={Iteratively Refined Behavior Regularization for Offline Reinforcement Learning},\nauthor={Yi Ma and Jianye HAO and Xiaohan Hu and YAN ZHENG and Chenjun Xiao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=RbS7RWxw3r}\n}", "github": "", "reviewers": "k25f;Y6m9;h61i;Hcq2", "pdf_size": 841108, "rating": "5;5;7;7", "confidence": "4;4;3;4", "soundness": "2;2;4;3", "novelty": "3;2;2;3", "presentation": "3;3;4;3", "wc_summary": "35;143;84;69", "wc_strengths": "23;46;45;97", "wc_weaknesses": "180;59;105;56", "wc_questions": "25;56;2;38", "wc_limitations": "1;3;5;9", "wc_review": "264;307;241;269", "wc_reply_reviewers": "92;39;19;56", "wc_reply_authors": "352;106;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "3;4;1;1", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 82.75, 39.0536489972448 ], "wc_strengths_avg": [ 52.75, 27.151197027018902 ], "wc_weaknesses_avg": [ 100.0, 50.104889980919026 ], "wc_questions_avg": [ 30.25, 19.677080576142387 ], "wc_limitations_avg": [ 4.5, 2.958039891549808 ], "wc_review_avg": [ 270.25, 23.699947257325277 ], "wc_reply_reviewers_avg": [ 51.5, 26.800186566514792 ], "wc_reply_authors_avg": [ 114.5, 143.78716910767804 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 1.299038105676658 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6920572114126102147&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "tju.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu.cn;huawei.com", "author_num": 5, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Tianjin University;Huawei", "aff_unique_dep": ";Huawei Technologies", "aff_unique_url": "http://www.tju.edu.cn;https://www.huawei.com", "aff_unique_abbr": "TJU;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Scaling the Codebook Size of VQ-GAN to 100,000 with a Utilization Rate of 99%", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95160", "id": "RbU10yvkk6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=RbU10yvkk6", "openreview": "https://openreview.net/forum?id=RbU10yvkk6", "poster": "/media/PosterPDFs/NeurIPS%202024/95160.png?t=1730773124.7778256", "project": "", "author_site": "Lei Zhu, Fangyun Wei, Yanye Lu, Dong Chen", "tldr": "", "abstract": "In the realm of image quantization exemplified by VQGAN, the process encodes images into discrete tokens drawn from a codebook with a predefined size. Recent advancements, particularly with LLAMA 3, reveal that enlarging the codebook significantly enhances model performance. However, VQGAN and its derivatives, such as VQGAN-FC (Factorized Codes) and VQGAN-EMA, continue to grapple with challenges related to expanding the codebook size and enhancing codebook utilization. For instance, VQGAN-FC is restricted to learning a codebook with a maximum size of 16,384, maintaining a typically low utilization rate of less than 12% on ImageNet. In this work, we propose a novel image quantization model named VQGAN-LC (Large Codebook), which extends the codebook size to 100,000, achieving an utilization rate exceeding 99%. Unlike previous methods that optimize each codebook entry, our approach begins with a codebook initialized with 100,000 features extracted by a pre-trained vision encoder. Optimization then focuses on training a projector that aligns the entire codebook with the feature distributions of the encoder in VQGAN-LC. We demonstrate the superior performance of our model over its counterparts across a variety of tasks, including image reconstruction, image classification, auto-regressive image generation using GPT, and image creation with diffusion- and flow-based generative models.", "keywords": "Image quantization;image generation", "primary_area": "machine_vision", "supplementary_material": "/attachment/10a5904c47b071e229c6f2d74da380bdbaa61eb8.zip", "author": "Lei Zhu;Fangyun Wei;Yanye Lu;Dong Chen", "authorids": "~Lei_Zhu10;~Fangyun_Wei1;~Yanye_Lu2;~Dong_Chen1", "gender": "M;M;M;M", "homepage": ";;https://wiki.milab.wiki/pages/viewpage.action?pageId=7503960;http://www.dongchen.pro/", "dblp": "99/549-12;161/2636;173/2256;44/3371-3", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;-ncz2s8AAAAJ;WSFToOMAAAAJ;https://scholar.google.com.hk/citations?user=_fKSYOwAAAAJ", "orcid": ";;0000-0002-3063-8051;", "linkedin": ";;;", "or_profile": "~Lei_Zhu10;~Fangyun_Wei1;~Yanye_Lu2;~Dong_Chen4", "aff": "Peking University;Microsoft Research;Peking University;Microsoft", "aff_domain": "pku.edu.cn;microsoft.com;pku.edu.cn;microsoft.com", "position": "PhD student;Researcher;Assistant Professor;Principal Researcher", "bibtex": "@inproceedings{\nzhu2024scaling,\ntitle={Scaling the Codebook Size of {VQ}-{GAN} to 100,000 with a Utilization Rate of 99\\%},\nauthor={Lei Zhu and Fangyun Wei and Yanye Lu and Dong Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=RbU10yvkk6}\n}", "github": "", "reviewers": "igUX;Kj4D;c7tj", "pdf_size": 2982471, "rating": "5;6;7", "confidence": "5;4;3", "soundness": "2;3;3", "novelty": "2;3;3", "presentation": "3;3;3", "wc_summary": "94;88;101", "wc_strengths": "27;51;81", "wc_weaknesses": "459;53;133", "wc_questions": "155;37;136", "wc_limitations": "13;9;1", "wc_review": "748;238;452", "wc_reply_reviewers": "41;0;86", "wc_reply_authors": "0;0;348", "reply_reviewers": "1;0;2", "reply_authors": "1;1;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 94.33333333333333, 5.312459150169742 ], "wc_strengths_avg": [ 53.0, 22.090722034374522 ], "wc_weaknesses_avg": [ 215.0, 175.59802580515154 ], "wc_questions_avg": [ 109.33333333333333, 51.73221648278974 ], "wc_limitations_avg": [ 7.666666666666667, 4.988876515698588 ], "wc_review_avg": [ 479.3333333333333, 209.10178276512985 ], "wc_reply_reviewers_avg": [ 42.333333333333336, 35.122009560324926 ], "wc_reply_authors_avg": [ 116.0, 164.04877323527901 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3472695131528491032&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 4, "email": "pku.edu.cn;microsoft.com;pku.edu.cn;microsoft.com", "author_num": 4, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "Peking University;Microsoft", "aff_unique_dep": ";Microsoft Research", "aff_unique_url": "http://www.pku.edu.cn;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "Peking U;MSR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "China;United States" }, { "title": "Incremental Learning of Retrievable Skills For Efficient Continual Task Adaptation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95159", "id": "RcPAJAnpnm", "proceeding": "", "pdf": "https://openreview.net/pdf?id=RcPAJAnpnm", "openreview": "https://openreview.net/forum?id=RcPAJAnpnm", "poster": "/media/PosterPDFs/NeurIPS%202024/95159.png?t=1731736337.9434285", "project": "", "author_site": "Daehee Lee, Minjong Yoo, Woo Kyung Kim, Wonje Choi, Honguk Woo", "tldr": "", "abstract": "Continual Imitation Learning (CiL) involves extracting and accumulating task knowledge from demonstrations across multiple stages and tasks to achieve a multi-task policy. With recent advancements in foundation models, there has been a growing interest in adapter-based CiL approaches, where adapters are established parameter-efficiently for tasks newly demonstrated. While these approaches isolate parameters for specific tasks and tend to mitigate catastrophic forgetting, they limit knowledge sharing among different demonstrations. We introduce IsCiL, an adapter-based CiL framework that addresses this limitation of knowledge sharing by incrementally learning shareable skills from different demonstrations, thus enabling sample-efficient task adaptation using the skills particularly in non-stationary CiL environments. In IsCiL, demonstrations are mapped into the state embedding space, where proper skills can be retrieved upon input states through prototype-based memory. These retrievable skills are incrementally learned on their corresponding adapters. Our CiL experiments with complex tasks in the Franka-Kitchen and Meta-World demonstrate the robust performance of IsCiL in both task adaptation and sample-efficiency. We also show a simple extension of IsCiL for task unlearning scenarios.", "keywords": "Continual Imitation Learning;Unlearning", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/c0490828d4c61b4ab9a826d91fdc6b14fdb4aba6.zip", "author": "Daehee Lee;Minjong Yoo;Woo Kyung Kim;Wonje Choi;Honguk Woo", "authorids": "~Daehee_Lee1;~Minjong_Yoo2;~Woo_Kyung_Kim1;~Wonje_Choi2;~Honguk_Woo1", "gender": "M;M;M;M;M", "homepage": ";;http://115.145.179.118/students/;https://sites.google.com/view/csi-agent-group/about;", "dblp": "211/5281;306/0140;163/3705-3;63/6072;253/2606.html", "google_scholar": "https://scholar.google.co.kr/citations?user=llB3SucAAAAJ;OFFacb0AAAAJ;L4d1CjEAAAAJ;https://scholar.google.co.kr/citations?user=Gaxjc7UAAAAJ;O6L-PkgAAAAJ", "orcid": ";0000-0001-6214-4171;0000-0001-5138-0101;0000-0001-6948-3440;", "linkedin": "daehee-lee-10b396246/?locale=en_US;;;;", "or_profile": "~Daehee_Lee1;~Woo_Kyung_Kim1;~Wonje_Choi2;~Honguk_Woo1;~minjong_Yoo1", "aff": "Carnegie Mellon University(Visiting scholar);Sungkyunkwan University;Sung Kyun Kwan University;Sungkyunkwan University;Sungkyunkwan University", "aff_domain": "cs.cmu.edu;skku.edu;skku.edu;skku.edu;skku.edu", "position": "PhD student;PhD student;PhD student;Associate Professor;PhD student", "bibtex": "@inproceedings{\nlee2024incremental,\ntitle={Incremental Learning of Retrievable Skills For Efficient Continual Task Adaptation},\nauthor={Daehee Lee and Minjong Yoo and Woo Kyung Kim and Wonje Choi and Honguk Woo},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=RcPAJAnpnm}\n}", "github": "", "reviewers": "DhgS;kNto;B1dC;KNda", "pdf_size": 3003363, "rating": "5;6;6;6", "confidence": "3;3;3;4", "soundness": "3;3;2;3", "novelty": "2;3;2;2", "presentation": "2;3;2;2", "wc_summary": "65;106;53;87", "wc_strengths": "32;100;60;32", "wc_weaknesses": "164;125;341;115", "wc_questions": "102;90;40;57", "wc_limitations": "5;35;21;2", "wc_review": "368;456;515;293", "wc_reply_reviewers": "20;25;23;20", "wc_reply_authors": "69;51;60;83", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 77.75, 20.363877332178173 ], "wc_strengths_avg": [ 56.0, 27.85677655436824 ], "wc_weaknesses_avg": [ 186.25, 91.2013568978006 ], "wc_questions_avg": [ 72.25, 24.863376681376163 ], "wc_limitations_avg": [ 15.75, 13.254716141811564 ], "wc_review_avg": [ 408.0, 84.52514418798704 ], "wc_reply_reviewers_avg": [ 22.0, 2.1213203435596424 ], "wc_reply_authors_avg": [ 65.75, 11.818946653572814 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10568537972256400514&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 5, "email": "cs.cmu.edu;skku.edu;skku.edu;skku.edu;skku.edu", "author_num": 5, "aff_unique_index": "0;1;1;1;1", "aff_unique_norm": "Carnegie Mellon University;Sungkyunkwan University", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://www.skku.edu", "aff_unique_abbr": "CMU;SKKU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1", "aff_country_unique": "United States;South Korea" }, { "title": "Mixture of In-Context Experts Enhance LLMs' Long Context Awareness", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95158", "id": "RcPHbofiCN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=RcPHbofiCN", "openreview": "https://openreview.net/forum?id=RcPHbofiCN", "poster": "/media/PosterPDFs/NeurIPS%202024/95158.png?t=1731394742.1267145", "project": "", "author_site": "Hongzhan Lin, Ang Lv, yuhan chen, chen zhu, Yang Song, Hengshu Zhu, Rui Yan", "tldr": "", "abstract": "Many studies have revealed that large language models (LLMs) exhibit uneven awareness of different contextual positions. Their limited context awareness can lead to overlooking critical information and subsequent task failures. While several approaches have been proposed to enhance LLMs' context awareness, achieving both effectiveness and efficiency remains challenging. In this paper, for LLMs utilizing RoPE as position embeddings, we introduce a novel method called \"Mixture of In-Context Experts\" (MoICE) to address this challenge. MoICE comprises two key components: a router integrated into each attention head within LLMs and a lightweight router-only training optimization strategy:(1) MoICE views each RoPE angle as an 'in-context' expert, demonstrated to be capable of directing the attention of a head to specific contextual positions. Consequently, each attention head flexibly processes tokens using multiple RoPE angles dynamically selected by the router to attend to the needed positions. This approach mitigates the risk of overlooking essential contextual information. (2) The router-only training strategy entails freezing LLM parameters and exclusively updating routers for only a few steps. When applied to open-source LLMs including Llama and Mistral, MoICE surpasses prior methods across multiple tasks on long context understanding and generation, all while maintaining commendable inference efficiency.", "keywords": "long context awareness;large language model;attention mechanism", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Hongzhan Lin;Ang Lv;Yuhan Chen;Chen Zhu;Yang Song;Hengshu Zhu;Rui Yan", "authorids": "~Hongzhan_Lin2;~Ang_Lv1;~Yuhan_Chen2;~Chen_Zhu5;~Yang_Song19;~Hengshu_Zhu1;~Rui_Yan2", "gender": "M;M;F;;;;M", "homepage": "https://github.com/p1nksnow;https://trestad.github.io;https://github.com/Fiorina1212;;;http://www.zhuhengshu.com/;https://gsai.ruc.edu.cn/english/ruiyan", "dblp": "292/1751-2.html;326/5506;155/2863-1;;;61/10440;19/2405-1", "google_scholar": ";https://scholar.google.com/citations?view_op=list_works;;;;55MQBzYAAAAJ;eLw6g-UAAAAJ", "orcid": "0009-0001-4029-810X;0000-0002-8027-2270;0009-0001-8752-9411;;;0000-0003-4570-643X;0000-0002-3356-6823", "linkedin": ";;;;;;", "or_profile": "~Hongzhan_Lin2;~Ang_Lv1;~Yuhan_Chen2;~Chen_Zhu5;~Yang_Song19;~Hengshu_Zhu1;~Rui_Yan2", "aff": "Renmin University of China;Renmin University of China;Renmin University of China;;;Kanzhun Limited (BOSS Zhipin);Renmin University of China", "aff_domain": "ruc.edu.cn;ruc.edu.cn;ruc.edu.cn;;;kanzhun.com;ruc.edu.cn", "position": "MS student;PhD student;MS student;;;Chief Research Scientist;Associate Professor", "bibtex": "@inproceedings{\nlin2024mixture,\ntitle={Mixture of In-Context Experts Enhance {LLM}s' Long Context Awareness},\nauthor={Hongzhan Lin and Ang Lv and Yuhan Chen and Chen Zhu and Yang Song and Hengshu Zhu and Rui Yan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=RcPHbofiCN}\n}", "github": "", "reviewers": "uPsd;jb4B;BQEC;K8VP", "pdf_size": 642518, "rating": "6;6;6;7", "confidence": "3;3;2;4", "soundness": "3;3;2;3", "novelty": "3;2;3;3", "presentation": "3;3;3;4", "wc_summary": "72;94;113;106", "wc_strengths": "93;34;27;55", "wc_weaknesses": "127;26;101;125", "wc_questions": "6;77;28;15", "wc_limitations": "6;14;32;1", "wc_review": "304;245;301;302", "wc_reply_reviewers": "13;0;73;49", "wc_reply_authors": "17;0;38;25", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;2;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 96.25, 15.562374497485916 ], "wc_strengths_avg": [ 52.25, 25.68438241422207 ], "wc_weaknesses_avg": [ 94.75, 40.99009026581913 ], "wc_questions_avg": [ 31.5, 27.408940147331492 ], "wc_limitations_avg": [ 13.25, 11.776565713313877 ], "wc_review_avg": [ 288.0, 24.849547279578356 ], "wc_reply_reviewers_avg": [ 33.75, 28.908260065247788 ], "wc_reply_authors_avg": [ 20.0, 13.765899897936205 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9680567010562084548&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "ruc.edu.cn;ruc.edu.cn;ruc.edu.cn;;;kanzhun.com;ruc.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Renmin University of China;Kanzhun Limited", "aff_unique_dep": ";", "aff_unique_url": "http://www.ruc.edu.cn;https://www.zhipin.com", "aff_unique_abbr": "RUC;BOSS Zhipin", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "RdAfUp4LcD", "title": "Linear Mode Connectivity in Differentiable Tree Ensembles", "track": "main", "status": "Reject", "tldr": "", "abstract": "Linear Mode Connectivity (LMC) refers to the phenomenon that performance remains consistent for linearly interpolated models in the parameter space. For independently optimized model pairs from different random initializations, achieving LMC is considered crucial for validating the stable success of the non-convex optimization in modern machine learning models and for facilitating practical parameter-based operations such as model merging.\nWhile LMC has been achieved for neural networks by considering the permutation invariance of neurons in each hidden layer, its attainment for other models remains an open question.\nIn this paper, we first achieve LMC for soft tree ensembles, which are tree-based differentiable models extensively used in practice.\nWe show the necessity of incorporating two invariances: subtree flip invariance and splitting order invariance, which do not exist in neural networks but are inherent to tree architectures, in addition to permutation invariance of trees.\nMoreover, we demonstrate that it is even possible to exclude such additional invariances while keeping LMC by designing decision list-based tree architectures, where such invariances do not exist by definition.\nOur findings indicate the significance of accounting for architecture-specific invariances in achieving LMC.", "keywords": "Linear Mode Connectivity;Soft Tree", "primary_area": "optimization", "supplementary_material": "/attachment/83209aec149f4379b15596d8b618427faa796300.zip", "author": "Ryuichi Kanoh;Mahito Sugiyama", "authorids": "~Ryuichi_Kanoh1;~Mahito_Sugiyama1", "gender": "M;M", "homepage": ";https://mahito.nii.ac.jp/", "dblp": "287/4416;05/8421", "google_scholar": ";qLlRvTkAAAAJ", "orcid": ";0000-0001-5907-9831", "linkedin": "ryuichi-kanoh-43ab4316b/;", "or_profile": "~Ryuichi_Kanoh1;~Mahito_Sugiyama1", "aff": "NII, the Graduate University for Advanced Studies;National Institute of Informatics", "aff_domain": "nii.ac.jp;nii.ac.jp", "position": "PhD student;Associate Professor", "bibtex": "@misc{\nanonymous2024linear,\ntitle={Linear Mode Connectivity in Differentiable Tree Ensembles},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=RdAfUp4LcD}\n}", "github": "", "project": "", "reviewers": "JSfD;fRZd;Agya;Varw", "site": "https://openreview.net/forum?id=RdAfUp4LcD", "pdf_size": 2711558, "rating": "4;5;6;7", "confidence": "3;4;2;4", "soundness": "3;2;3;4", "novelty": "3;2;3;3", "presentation": "2;3;3;3", "wc_summary": "21;86;60;116", "wc_strengths": "47;23;39;136", "wc_weaknesses": "291;307;66;112", "wc_questions": "2;299;1;348", "wc_limitations": "4;84;1;11", "wc_review": "365;799;167;723", "wc_reply_reviewers": "119;1228;29;50", "wc_reply_authors": "94;1637;25;19", "reply_reviewers": "1;2;1;1", "reply_authors": "2;4;2;2", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 70.75, 34.89537935028075 ], "wc_strengths_avg": [ 61.25, 44.01349224953639 ], "wc_weaknesses_avg": [ 194.0, 106.40253756372543 ], "wc_questions_avg": [ 162.5, 161.92976872706265 ], "wc_limitations_avg": [ 25.0, 34.25638626592128 ], "wc_review_avg": [ 513.5, 258.60926124174284 ], "wc_reply_reviewers_avg": [ 356.5, 504.2610930857149 ], "wc_reply_authors_avg": [ 443.75, 689.5532521132795 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.1348399724926484, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Hxv4Bifcm3YJ:scholar.google.com/&scioq=Linear+Mode+Connectivity+in+Differentiable+Tree+Ensembles&hl=en&as_sdt=0,14", "gs_version_total": 3, "aff_unique_index": "0;0", "aff_unique_norm": "National Institute of Informatics", "aff_unique_dep": "", "aff_unique_url": "https://www.nii.ac.jp", "aff_unique_abbr": "NII", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Japan" }, { "title": "Sample Efficient Bayesian Learning of Causal Graphs from Interventions", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95157", "id": "RfSvAom7sS", "proceeding": "", "pdf": "https://openreview.net/pdf?id=RfSvAom7sS", "openreview": "https://openreview.net/forum?id=RfSvAom7sS", "poster": "/media/PosterPDFs/NeurIPS%202024/95157.png?t=1731475473.9719439", "project": "", "author_site": "Zihan Zhou, Muhammad Qasim Elahi, Murat Kocaoglu", "tldr": "", "abstract": "Causal discovery is a fundamental problem with applications spanning various areas in science and engineering. It is well understood that solely using observational data, one can only orient the causal graph up to its Markov equivalence class, necessitating interventional data to learn the complete causal graph. Most works in the literature design causal discovery policies with perfect interventions, i.e., they have access to infinite interventional samples. This study considers a Bayesian approach for learning causal graphs with limited interventional samples, mirroring real-world scenarios where such samples are usually costly to obtain. By leveraging the recent result of Wien\u00f6bst et al. [2023] on uniform DAG sampling in polynomial time, we can efficiently enumerate all the cut configurations and their corresponding interventional distributions of a target set, and further track their posteriors. Given any number of interventional samples, our proposed algorithm randomly intervenes on a set of target vertices that cut all the edges in the graph and returns a causal graph according to the posterior of each target set. When the number of interventional samples is large enough, we show theoretically that our proposed algorithm will return the true causal graph with high probability. We compare our algorithm against various baseline methods on simulated datasets, demonstrating its superior accuracy measured by the structural Hamming distance between the learned DAG and the ground truth. Additionally, we present a case study showing how this algorithm could be modified to answer more general causal questions without learning the whole graph. As an example, we illustrate that our method can be used to estimate the causal effect of a variable that cannot be intervened.", "keywords": "Causal Discovery;Bayesian Learning;Sample Efficiency", "primary_area": "causal_inference", "supplementary_material": "/attachment/9ea9d3d819be4ffaa017d89971f507df0b48bdfa.zip", "author": "Zihan Zhou;Muhammad Qasim Elahi;Murat Kocaoglu", "authorids": "~Zihan_Zhou6;~Muhammad_Qasim_Elahi1;~Murat_Kocaoglu1", "gender": "M;M;M", "homepage": "https://sites.google.com/view/zihan-zhou/home;https://www.linkedin.com/in/qasim-elahi-b59948133/;https://www.muratkocaoglu.com", "dblp": "309/9212;;74/11343", "google_scholar": "https://scholar.google.com/citations?hl=en;M7C8dFAAAAAJ;7N7bzdwAAAAJ", "orcid": "0000-0002-6823-7931;;", "linkedin": "zihan-zhou-099981182/;;mkocaoglu/", "or_profile": "~Zihan_Zhou6;~Muhammad_Qasim_Elahi1;~Murat_Kocaoglu1", "aff": "Purdue University;Purdue University;Purdue University", "aff_domain": "purdue.edu;purdue.edu;purdue.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nzhou2024sample,\ntitle={Sample Efficient Bayesian Learning of Causal Graphs from Interventions},\nauthor={Zihan Zhou and Muhammad Qasim Elahi and Murat Kocaoglu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=RfSvAom7sS}\n}", "github": "", "reviewers": "uDQi;i7ee;HSDU;UxJA", "pdf_size": 1408864, "rating": "4;5;6;6", "confidence": "3;3;2;3", "soundness": "3;3;3;3", "novelty": "2;2;2;3", "presentation": "2;2;1;3", "wc_summary": "68;85;38;44", "wc_strengths": "56;61;54;18", "wc_weaknesses": "86;352;42;34", "wc_questions": "57;34;93;137", "wc_limitations": "1;9;2;1", "wc_review": "268;541;229;234", "wc_reply_reviewers": "75;592;42;52", "wc_reply_authors": "149;653;0;0", "reply_reviewers": "1;2;1;1", "reply_authors": "2;3;1;1", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 58.75, 18.859679212542297 ], "wc_strengths_avg": [ 47.25, 17.07886120325357 ], "wc_weaknesses_avg": [ 128.5, 130.54788393535912 ], "wc_questions_avg": [ 80.25, 38.931831449342326 ], "wc_limitations_avg": [ 3.25, 3.344772040064913 ], "wc_review_avg": [ 318.0, 129.6206002146264 ], "wc_reply_reviewers_avg": [ 190.25, 232.25888034690945 ], "wc_reply_authors_avg": [ 200.5, 268.2391656712345 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:CjpCtW1a4agJ:scholar.google.com/&scioq=Sample+Efficient+Bayesian+Learning+of+Causal+Graphs+from+Interventions&hl=en&as_sdt=0,5", "gs_version_total": 5, "email": "purdue.edu;purdue.edu;purdue.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Purdue University", "aff_unique_dep": "", "aff_unique_url": "https://www.purdue.edu", "aff_unique_abbr": "Purdue", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "EEG2Video: Towards Decoding Dynamic Visual Perception from EEG Signals", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95156", "id": "RfsfRn9OFd", "proceeding": "", "pdf": "https://openreview.net/pdf?id=RfsfRn9OFd", "openreview": "https://openreview.net/forum?id=RfsfRn9OFd", "poster": "/media/PosterPDFs/NeurIPS%202024/95156.png?t=1729572646.0559065", "project": "", "author_site": "Xuan-Hao Liu, Yan-Kai Liu, Yansen Wang, Kan Ren, Hanwen Shi, Zilong Wang, Dongsheng Li, Bao-Liang Lu, Wei-Long Zheng", "tldr": "", "abstract": "Our visual experience in daily life are dominated by dynamic change. Decoding such dynamic information from brain activity can enhance the understanding of the brain\u2019s visual processing system. However, previous studies predominately focus on reconstructing static visual stimuli. In this paper, we explore to decode dynamic visual perception from electroencephalography (EEG), a neuroimaging technique able to record brain activity with high temporal resolution (1000 Hz) for capturing rapid changes in brains. Our contributions are threefold: Firstly, we develop a large dataset recording signals from 20 subjects while they were watching 1400 dynamic video clips of 40 concepts. This dataset fills the gap in the lack of EEG-video pairs. Secondly, we annotate each video clips to investigate the potential for decoding some specific meta information (e.g., color, dynamic, human or not) from EEG. Thirdly, we propose a novel baseline EEG2Video for video reconstruction from EEG signals that better aligns dynamic movements with high temporal resolution brain signals by Seq2Seq architecture. EEG2Video achieves a 2-way accuracy of 79.8% in semantic classification tasks and 0.256 in structural similarity index (SSIM). Overall, our works takes an important step towards decoding dynamic visual perception from EEG signals. Our dataset and code will be released soon.", "keywords": "EEG;video generation;diffusion model;brain-computer interface", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "/attachment/44c231ba4e1dda99312f7354b6c439294f8a61c6.zip", "author": "Xuanhao Liu;Yan-Kai Liu;Yansen Wang;Kan Ren;Hanwen Shi;Zilong Wang;Dongsheng Li;Bao-liang Lu;Wei-Long Zheng", "authorids": "~Xuanhao_Liu2;~Yan-Kai_Liu1;~Yansen_Wang2;~Kan_Ren1;~Hanwen_Shi1;~Zilong_Wang8;~Dongsheng_Li2;~Bao-liang_Lu1;~Wei-Long_Zheng1", "gender": "M;M;F;M;M;M;;;M", "homepage": "https://xuanhaoliu.github.io;https://saying.ren;https://github.com/IvyCharon;;http://recmind.cn;http://bcmi.sjtu.edu.cn/~blu/;https://weilongzheng.github.io/;https://bcmi.sjtu.edu.cn/;", "dblp": "359/4199;28/7458;;42/898-6;254/0830-2.html;09/3116.html;150/4150;330/7260;134/7817", "google_scholar": "99yIdXAAAAAJ;USnQVWgAAAAJ;;;VNg5rA8AAAAJ;https://scholar.google.com.tw/citations?user=709il6EAAAAJ;MZXXe8UAAAAJ;;Hvbzb1kAAAAJ", "orcid": "0000-0002-9693-7839;;;0000-0002-6760-1471;0000-0003-3103-8442;0000-0001-8359-0058;;;", "linkedin": ";;;;;;;;", "or_profile": "~Xuanhao_Liu2;~Kan_Ren1;~Hanwen_Shi1;~Zilong_Wang8;~Dongsheng_Li2;~Bao-liang_Lu1;~Wei-Long_Zheng1;~Yankai_Liu1;~Yansen_Wang1", "aff": "Shanghai Jiaotong University;ShanghaiTech University;Shanghai Jiaotong University;Microsoft Research;Microsoft Research Asia;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Microsoft Research Asia", "aff_domain": "sjtu.edu.cn;shanghaitech.edu.cn;sjtu.edu.cn;microsoft.com;microsoft.com;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;microsoft.com", "position": "PhD student;Assistant Professor;PhD student;Researcher;Principal Researcher;Full Professor;Associate Professor;MS student;Researcher", "bibtex": "@inproceedings{\nliu2024eegvideo,\ntitle={{EEG}2Video: Towards Decoding Dynamic Visual Perception from {EEG} Signals},\nauthor={Xuanhao Liu and Yan-Kai Liu and Yansen Wang and Kan Ren and Hanwen Shi and Zilong Wang and Dongsheng Li and Bao-liang Lu and Wei-Long Zheng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=RfsfRn9OFd}\n}", "github": "", "reviewers": "Kswp;ACPC;nkHY;9iBh", "pdf_size": 0, "rating": "4;4;5;6", "confidence": "4;4;4;4", "soundness": "2;2;3;3", "novelty": "2;2;3;3", "presentation": "2;2;3;4", "wc_summary": "75;86;57;34", "wc_strengths": "68;94;62;53", "wc_weaknesses": "110;211;15;116", "wc_questions": "84;119;27;50", "wc_limitations": "8;1;7;5", "wc_review": "345;511;168;258", "wc_reply_reviewers": "48;599;24;0", "wc_reply_authors": "486;1278;23;0", "reply_reviewers": "2;2;1;0", "reply_authors": "2;3;2;1", "rating_avg": [ 4.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 63.0, 19.685019685029527 ], "wc_strengths_avg": [ 69.25, 15.2540978100968 ], "wc_weaknesses_avg": [ 113.0, 69.32892614197915 ], "wc_questions_avg": [ 70.0, 34.806608567914225 ], "wc_limitations_avg": [ 5.25, 2.680951323690902 ], "wc_review_avg": [ 320.5, 126.54347079166115 ], "wc_reply_reviewers_avg": [ 167.75, 249.55998777848984 ], "wc_reply_authors_avg": [ 446.75, 517.606691900327 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13730861110199704258&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "email": "sjtu.edu.cn;shanghaitech.edu.cn;sjtu.edu.cn;microsoft.com;microsoft.com;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;microsoft.com", "author_num": 9, "aff_unique_index": "0;1;0;2;2;0;0;0;2", "aff_unique_norm": "Shanghai Jiao Tong University;ShanghaiTech University;Microsoft", "aff_unique_dep": ";;Microsoft Research", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.shanghaitech.edu.cn;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "SJTU;ShanghaiTech;MSR", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;0;0;1;0;0;0;0;0", "aff_country_unique": "China;United States" }, { "title": "WelQrate: Defining the Gold Standard in Small Molecule Drug Discovery Benchmarking", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97684", "id": "RgUcvs6ssu", "proceeding": "", "pdf": "https://openreview.net/pdf?id=RgUcvs6ssu", "openreview": "https://openreview.net/forum?id=RgUcvs6ssu", "poster": "/media/PosterPDFs/NeurIPS%202024/97684.png?t=1731693807.9926407", "project": "", "author_site": "Yunchao Liu, Ha Dong, Xin Wang, Rocco Moretti, Yu Wang, Zhaoqian Su, Jiawei Gu, Bobby Bodenheimer, Charles Weaver, Jens Meiler, Tyler Derr", "tldr": "", "abstract": "While deep learning has revolutionized computer-aided drug discovery, the AI community has predominantly focused on model innovation and placed less emphasis on establishing best benchmarking practices. \nWe posit that without a sound model evaluation framework, the AI community's efforts cannot reach their full potential, thereby slowing the progress and transfer of innovation into real-world drug discovery.\nThus, in this paper, we seek to establish a new gold standard for small molecule drug discovery benchmarking, *WelQrate*. \nSpecifically, our contributions are threefold: \n***WelQrate*** **dataset collection** - we introduce a meticulously curated collection of 9 datasets spanning 5 therapeutic target classes. Our hierarchical curation pipelines, designed by drug discovery experts, go beyond the primary high-throughput screen by leveraging additional confirmatory and counter screens along with rigorous domain-driven preprocessing, such as Pan-Assay Interference Compounds (PAINS) filtering, to ensure the high-quality data in the datasets; ***WelQrate*** **Evaluation Framework** - we propose a standardized model evaluation framework considering high-quality datasets, featurization, 3D conformation generation, evaluation metrics, and data splits, which provides a reliable benchmarking for drug discovery experts conducting real-world virtual screening; **Benchmarking** - \nwe evaluate model performance through various research questions using the *WelQrate* dataset collection, exploring the effects of different models, dataset quality, featurization methods, and data splitting strategies on the results.\nIn summary, we recommend adopting our proposed *WelQrate* as the gold standard in small molecule drug discovery benchmarking. The *WelQrate* dataset collection, along with the curation codes, and experimental scripts are all publicly available at www.WelQrate.org.", "keywords": "drug discovery;benchmarking;small molecule", "primary_area": "", "supplementary_material": "/attachment/e61c9b8beeedf9cb25f73b04d19a0f40211fb9c8.pdf", "author": "Yunchao Liu;Ha Dong;Xin Wang;Rocco Moretti;Yu Wang;Zhaoqian Su;Jiawei Gu;Bobby Bodenheimer;Charles Weaver;Jens Meiler;Tyler Derr", "authorids": "~Yunchao_Liu2;~Ha_Dong1;~Xin_Wang92;~Rocco_Moretti1;~Yu_Wang41;~Zhaoqian_Su1;~Jiawei_Gu5;~Bobby_Bodenheimer1;~Charles_Weaver1;~Jens_Meiler1;~Tyler_Derr1", "gender": ";M;M;;M;M;M;M;;;", "homepage": "http://www.liuyunchao.com;;https://github.com/xwang38438;;https://yuwang0103.github.io/;;https://github.com/jiawku;http://www.vuse.vanderbilt.edu/~bobbyb/;;;http://www.tylerderr.com", "dblp": ";;;;02/5889-160;;;;;;207/7927.html", "google_scholar": "oFtlWfwAAAAJ;;;Q_yfqd4AAAAJ;XPCmiz4AAAAJ;L_bDUpsAAAAJ;https://scholar.google.com/citations?view_op=list_works;https://scholar.google.com.tw/citations?user=hI4XguUAAAAJ;;;et6IhFcAAAAJ", "orcid": "0000-0002-3982-1311;;;;0000-0001-6908-508X;0000-0002-8369-0697;;;;;", "linkedin": "yunchaoliu/;ha-dong-ngoc;;;;;jiawei-gu/;;;;tylersnetwork/", "or_profile": "~Yunchao_Liu2;~Ha_Dong1;~Xin_Wang92;~Rocco_Moretti1;~Yu_Wang41;~Zhaoqian_Su1;~Jiawei_Gu5;~Bobby_Bodenheimer1;~Charles_Weaver1;~Jens_Meiler1;~Tyler_Derr1", "aff": "Vanderbilt University;Amherst College;Vanderbilt University;Vanderbilt University;Adobe Systems;Vanderbilt University;intermountain Healthcare;Vanderbilt University;;Vanderbilt University;Vanderbilt University", "aff_domain": "vanderbilt.edu;amherst.edu;vanderbilt.edu;vanderbilt.edu;adobe.com;vanderbilt.edu;imail.org;vanderbilt.edu;;vanderbilt.edu;vanderbilt.edu", "position": "PhD student;Undergrad student;MS student;Full Professor;Intern;Postdoc;Researcher;Full Professor;;;Assistant Professor", "bibtex": "@inproceedings{\nliu2024welqrate,\ntitle={WelQrate: Defining the Gold Standard in Small Molecule Drug Discovery Benchmarking},\nauthor={Yunchao Liu and Ha Dong and Xin Wang and Rocco Moretti and Yu Wang and Zhaoqian Su and Jiawei Gu and Bobby Bodenheimer and Charles Weaver and Jens Meiler and Tyler Derr},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=RgUcvs6ssu}\n}", "github": "", "reviewers": "yZji;scA8;JFZX;E61p", "pdf_size": 1517181, "rating": "4;5;7;8", "confidence": "4;2;4;4", "wc_summary_and_contributions": "73;73;104;54", "wc_strengths": "48;73;56;30", "wc_improvement": "94;377;134;109", "wc_limitations": "256;15;5;1", "wc_correctness": "12;14;1;1", "wc_clarity": "8;27;1;1", "wc_relation_to_prior_work": "5;13;1;1", "wc_documentation": "9;15;1;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "506;608;304;199", "wc_reply_reviewers": "0;137;13;10", "wc_reply_authors": "1600;1083;69;26", "reply_reviewers": "0;1;1;1", "reply_authors": "6;4;3;2", "rating_avg": [ 6.0, 1.5811388300841898 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "wc_summary_and_contributions_avg": [ 76.0, 17.930421077041107 ], "wc_strengths_avg": [ 51.75, 15.465687828221544 ], "wc_improvement_avg": [ 178.5, 115.49134166681068 ], "wc_limitations_avg": [ 69.25, 107.94066657196443 ], "wc_correctness_avg": [ 7.0, 6.041522986797286 ], "wc_clarity_avg": [ 9.25, 10.638961415476606 ], "wc_relation_to_prior_work_avg": [ 5.0, 4.898979485566356 ], "wc_documentation_avg": [ 6.5, 5.894913061275798 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 404.25, 161.2798421998236 ], "wc_reply_reviewers_avg": [ 40.0, 56.20942981386664 ], "wc_reply_authors_avg": [ 694.5, 672.4962825176062 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.75, 1.479019945774904 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.36514837167011077, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:MIWTOuF8y5AJ:scholar.google.com/&scioq=WelQrate:+Defining+the+Gold+Standard+in+Small+Molecule+Drug+Discovery+Benchmarking&hl=en&as_sdt=0,33", "gs_version_total": 7, "email": "vanderbilt.edu;amherst.edu;vanderbilt.edu;vanderbilt.edu;adobe.com;vanderbilt.edu;imail.org;vanderbilt.edu;;vanderbilt.edu;vanderbilt.edu", "author_num": 11, "aff_unique_index": "0;1;0;0;2;0;3;0;0;0", "aff_unique_norm": "Vanderbilt University;Amherst College;Adobe;Intermountain Healthcare", "aff_unique_dep": ";;Adobe Systems Incorporated;", "aff_unique_url": "https://www.vanderbilt.edu;https://www.amherst.edu;https://www.adobe.com;https://intermountainhealthcare.org", "aff_unique_abbr": "Vanderbilt;Amherst;Adobe;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Optimal-state Dynamics Estimation for Physics-based Human Motion Capture from Videos", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95155", "id": "RkOT8rAmRR", "proceeding": "", "pdf": "https://openreview.net/pdf?id=RkOT8rAmRR", "openreview": "https://openreview.net/forum?id=RkOT8rAmRR", "poster": "", "project": "", "author_site": "Cuong Le, Manon Kok, John Viktor Johansson, Bastian Wandt", "tldr": "", "abstract": "Human motion capture from monocular videos has made significant progress in recent years. However, modern approaches often produce temporal artifacts, e.g. in form of jittery motion and struggle to achieve smooth and physically plausible motions. Explicitly integrating physics, in form of internal forces and exterior torques, helps alleviating these artifacts. Current state-of-the-art approaches make use of an automatic PD controller to predict torques and reaction forces in order to re-simulate the input kinematics, i.e. the joint angles of a predefined skeleton. However, due to imperfect physical models, these methods often require simplifying assumptions and extensive preprocessing of the input kinematics to achieve good performance. To this end, we propose a novel method to selectively incorporate the physics models with the kinematics observations in an online setting, inspired by a neural Kalman-filtering approach. We develop a control loop as a meta-PD controller to predict internal joint torques and external reaction forces, followed by a physics-based motion simulation. A recurrent neural network is introduced to realize a Kalman filter that attentively balances the kinematics input and simulated motion, resulting in an optimal-state dynamics prediction. We show that this filtering step is crucial to provide an online supervision that helps balancing the shortcoming of the respective input motions, thus being important for not only capturing accurate global motion trajectories but also producing physically plausible human poses. The proposed approach excels in the physics-based human pose estimation task and demonstrates the physical plausibility of the predictive dynamics, compared to state of the art. The code is available on https://github.com/cuongle1206/OSDCap.", "keywords": "human motion;dynamics;optimal state;kalman filter;physics-based", "primary_area": "machine_vision", "supplementary_material": "/attachment/b72ba1da11508232b4d347e8798c559c3b5203ba.zip", "author": "Cuong Le;Viktor Johansson;Manon Kok;Bastian Wandt", "authorids": "~Cuong_Le1;~Viktor_Johansson1;~Manon_Kok1;~Bastian_Wandt2", "gender": "M;M;F;M", "homepage": ";;https://sites.google.com/site/manonkok2/publications;http://bastianwandt.de", "dblp": ";;132/4758;", "google_scholar": "aGtNlKgAAAAJ;;https://scholar.google.nl/citations?user=l1uvSZIAAAAJ;z4aXEBYAAAAJ", "orcid": ";;;", "linkedin": "cuong-le-8811ba16b/;viktor-johansson-442346162/;;", "or_profile": "~Cuong_Le1;~Viktor_Johansson1;~Manon_Kok1;~Bastian_Wandt2", "aff": "Link\u00f6ping University;Link\u00f6ping University;Delft University of Technology;Link\u00f6ping University", "aff_domain": "liu.se;liu.se;tudelft.nl;liu.se", "position": "PhD student;MS student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nle2024optimalstate,\ntitle={Optimal-state Dynamics Estimation for Physics-based Human Motion Capture from Videos},\nauthor={Cuong Le and Manon Kok and Viktor Johansson and Bastian Wandt},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=RkOT8rAmRR}\n}", "github": "", "reviewers": "mMU1;DQhK;wqMZ;QEqx;BVYQ", "pdf_size": 31086218, "rating": "5;5;5;7;8", "confidence": "5;4;4;4;5", "soundness": "3;3;3;3;3", "novelty": "2;3;3;3;4", "presentation": "3;2;3;4;3", "wc_summary": "62;106;114;68;195", "wc_strengths": "40;42;57;70;123", "wc_weaknesses": "167;303;106;106;287", "wc_questions": "84;64;92;39;135", "wc_limitations": "4;1;29;28;1", "wc_review": "357;516;398;311;741", "wc_reply_reviewers": "120;28;260;58;241", "wc_reply_authors": "350;13;133;23;232", "reply_reviewers": "2;1;2;1;1", "reply_authors": "3;2;3;2;2", "rating_avg": [ 6.0, 1.2649110640673518 ], "confidence_avg": [ 4.4, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 109.0, 47.58150901348127 ], "wc_strengths_avg": [ 66.4, 30.32226904438387 ], "wc_weaknesses_avg": [ 193.8, 85.72840836035626 ], "wc_questions_avg": [ 82.8, 31.883538072177622 ], "wc_limitations_avg": [ 12.6, 13.032267646115928 ], "wc_review_avg": [ 464.6, 154.04233184420443 ], "wc_reply_reviewers_avg": [ 141.4, 94.08421759253781 ], "wc_reply_authors_avg": [ 150.2, 127.99281229819118 ], "reply_reviewers_avg": [ 1.4, 0.4898979485566356 ], "reply_authors_avg": [ 2.4, 0.4898979485566356 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.3227486121839514, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12355700191979037100&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "liu.se;liu.se;tudelft.nl;liu.se", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Link\u00f6ping University;Delft University of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.liu.se;https://www.tudelft.nl", "aff_unique_abbr": "LiU;TU Delft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Sweden;Netherlands" }, { "title": "HuRef: HUman-REadable Fingerprint for Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95154", "id": "RlZgnEZsOH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=RlZgnEZsOH", "openreview": "https://openreview.net/forum?id=RlZgnEZsOH", "poster": "/media/PosterPDFs/NeurIPS%202024/95154.png?t=1731726876.1092582", "project": "", "author_site": "Boyi Zeng, Lizheng Wang, Yuncong Hu, Yi Xu, Chenghu Zhou, Xinbing Wang, Yu Yu, Zhouhan Lin", "tldr": "", "abstract": "Protecting the copyright of large language models (LLMs) has become crucial due to their resource-intensive training and accompanying carefully designed licenses. However, identifying the original base model of an LLM is challenging due to potential parameter alterations. In this\nstudy, we introduce HuRef, a human-readable fingerprint for LLMs that uniquely identifies the base model without interfering with training or exposing model parameters to the public.\nWe first observe that the vector direction of LLM parameters remains stable after the model has converged during pretraining, \nwith negligible perturbations through subsequent training steps, including continued pretraining, supervised fine-tuning, and RLHF, \nwhich makes it a sufficient condition\nto identify the base model.\nThe necessity is validated by continuing to train an LLM with an extra term to drive away the model parameters' direction and the model becomes damaged. However, this direction is vulnerable to simple attacks like dimension permutation or matrix rotation, which significantly change it without affecting performance. To address this, leveraging the Transformer structure, we systematically analyze potential attacks and define three invariant terms that identify an LLM's base model. \nDue to the potential risk of information leakage, we cannot publish invariant terms directly. Instead, we map them to a Gaussian vector using an encoder, then convert it into a natural image using StyleGAN2, and finally publish the image. In our black-box setting, all fingerprinting steps are internally conducted by the LLMs owners. To ensure the published fingerprints are honestly generated, we introduced Zero-Knowledge Proof (ZKP).\nExperimental results across various LLMs demonstrate the effectiveness of our method. The code is available at https://github.com/LUMIA-Group/HuRef.", "keywords": "Model Identification;Fingerprinting;Large Language Models (LLMs)", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Boyi Zeng;Lizheng Wang;Yuncong Hu;Yi Xu;Chenghu Zhou;Xinbing Wang;Yu Yu;Zhouhan Lin", "authorids": "~Boyi_Zeng2;~Lizheng_Wang1;~Yuncong_Hu2;~Yi_Xu13;~Chenghu_Zhou3;~Xinbing_Wang1;~Yu_Yu1;~Zhouhan_Lin1", "gender": "M;;M;M;M;M;M;M", "homepage": ";;https://huyuncong.com;https://cv.omegaxyz.com/;http://www.igsnrr.cas.cn/gkjj/ysfc/ysfc_zhouchenghu/;http://www.cs.sjtu.edu.cn/~wang-xb/;http://yuyu.hk/;https://hantek.github.io", "dblp": ";;;14/5580-4;85/1324.html;96/1149.html;;121/7919.html", "google_scholar": ";;;E-VwoYEAAAAJ;;https://scholar.google.com.tw/citations?user=CT5yZbwAAAAJ;https://scholar.google.com.hk/citations?user=lpRkCB4AAAAJ;https://scholar.google.ca/citations?user=LNZ4efwAAAAJ", "orcid": "0009-0001-9131-0228;;;0000-0002-5280-6132;;0000-0002-0357-8356;;0009-0009-7204-0689", "linkedin": ";;;;;;;https://ca.linkedin.com/in/zhouhan-lin-34b98975", "or_profile": "~Boyi_Zeng2;~Lizheng_Wang1;~Yuncong_Hu2;~Yi_Xu13;~Chenghu_Zhou3;~Xinbing_Wang1;~Yu_Yu1;~Zhouhan_Lin1", "aff": "Shanghai Jiaotong University;;Shanghai Jiaotong University;Shanghai Jiaotong University;IGSNRR, Chinese Academy of Sciences, Beijing, China;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;;sjtu.edu.cn;sjtu.edu.cn;lreis.ac.cn;cs.sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "position": "PhD student;;Assistant Professor;PhD student;Full Professor;Full Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nzeng2024huref,\ntitle={HuRef: {HU}man-{RE}adable Fingerprint for Large Language Models},\nauthor={Boyi Zeng and Lizheng Wang and Yuncong Hu and Yi Xu and Chenghu Zhou and Xinbing Wang and Yu Yu and Zhouhan Lin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=RlZgnEZsOH}\n}", "github": "", "reviewers": "BRHe;TSUQ;YYto", "pdf_size": 9827946, "rating": "5;5;8", "confidence": "5;3;4", "soundness": "3;3;4", "novelty": "2;2;3", "presentation": "3;2;4", "wc_summary": "100;61;174", "wc_strengths": "30;34;96", "wc_weaknesses": "272;148;98", "wc_questions": "28;2;37", "wc_limitations": "6;6;7", "wc_review": "436;251;412", "wc_reply_reviewers": "17;66;17", "wc_reply_authors": "731;30;0", "reply_reviewers": "1;1;1", "reply_authors": "2;2;1", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 111.66666666666667, 46.8638690487909 ], "wc_strengths_avg": [ 53.333333333333336, 30.214051182999096 ], "wc_weaknesses_avg": [ 172.66666666666666, 73.14521325570267 ], "wc_questions_avg": [ 22.333333333333332, 14.839886193034712 ], "wc_limitations_avg": [ 6.333333333333333, 0.4714045207910317 ], "wc_review_avg": [ 366.3333333333333, 82.13944782434861 ], "wc_reply_reviewers_avg": [ 33.333333333333336, 23.098821518760552 ], "wc_reply_authors_avg": [ 253.66666666666666, 337.7477691348317 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5402153993814029029&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "sjtu.edu.cn;;sjtu.edu.cn;sjtu.edu.cn;lreis.ac.cn;cs.sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "author_num": 8, "aff_unique_index": "0;0;0;1;0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University;Chinese Academy of Sciences", "aff_unique_dep": ";IGSNRR", "aff_unique_url": "https://www.sjtu.edu.cn;http://www.cas.cn", "aff_unique_abbr": "SJTU;CAS", "aff_campus_unique_index": "1", "aff_campus_unique": ";Beijing", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "Rllg9nOasE", "title": "Multi-modal brain encoding models for multi-modal stimuli", "track": "main", "status": "Reject", "tldr": "", "abstract": "Despite participants engaging in single modality stimuli, such as watching images or silent videos, recent work has demonstrated that multi-modal Transformer models can predict visual brain activity impressively well, even with incongruent modality representations. This raises the question of how accurately these multi-modal models can predict brain activity when participants are engaged in multi-modal stimuli. As these models grow increasingly popular, their use in studying neural activity provides insights into how our brains respond to such multi-modal naturalistic stimuli, i.e., where it separates and integrates information from different sensory modalities. We investigate this question by using multiple unimodal and two types of multi-modal models\u2014cross-modal and jointly pretrained\u2014to determine which type of models is more relevant to fMRI brain activity when participants were engaged in watching movies (videos with audio). We observe that both types of multi-modal models show improved alignment in several language and visual regions. This study also helps in identifying which brain regions process unimodal versus multi-modal information. We further investigate the impact of removal of unimodal features from multi-modal representations and find that there is additional information beyond the unimodal embeddings that is processed in the visual and language regions. Based on this investigation, we find that while for cross-modal models, their brain alignment is partially attributed to the video modality; for jointly pretrained models, it is partially attributed to both the video and audio modalities. The inability of individual modalities in explaining the brain alignment effectiveness of multi-modal models suggests that multi-modal models capture additional information processed by all brain regions. This serves as a strong motivation for the neuro-science community to investigate the interpretability of these models for deepening our understanding of multi-modal information processing in brain.", "keywords": "brain encoding;fMRI;multi-modal models;multi-modal stimuli;Transformers;videos;speech;language", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "", "author": "SUBBA REDDY OOTA;Khushbu Pahwa;mounika marreddy;Maneesh Kumar Singh;Manish Gupta;Bapi Raju Surampudi", "authorids": "~SUBBA_REDDY_OOTA1;~Khushbu_Pahwa1;~mounika_marreddy1;~Maneesh_Kumar_Singh1;~Manish_Gupta1;~Bapi_Raju_Surampudi1", "gender": "M;F;F;M;M;", "homepage": "https://sites.google.com/view/subbareddyoota300/home?authuser=0;;;https://arxiv.org/search/?query=Singh%2C+Maneesh&searchtype=author&abstracts=show&order=-announced_date_first&size=50;https://sites.google.com/view/manishg/;", "dblp": "190/1709;299/8490;206/3366;263/9205-1;g/ManishGupta1.html;", "google_scholar": "https://scholar.google.co.in/citations?user=4Uz0LngAAAAJ;https://scholar.google.com/citations?hl=en;Ikqyo5sAAAAJ;hdQhiFgAAAAJ;https://scholar.google.co.in/citations?user=eX9PSu0AAAAJ;", "orcid": "0000-0002-5975-622X;;;0000-0002-7414-1813;0000-0002-2843-3110;", "linkedin": "subba-reddy-oota-11a91254/;khushbupahwa;;maneesh-singh-3523ab9/;manishsgupta/;", "or_profile": "~SUBBA_REDDY_OOTA1;~Khushbu_Pahwa1;~mounika_marreddy1;~Maneesh_Kumar_Singh1;~Manish_Gupta1;~Bapi_Raju_Surampudi1", "aff": "MPI-SWS;Rice University;Rheinische Friedrich-Wilhelms Universit\u00e4t Bonn;Spector Inc;Microsoft;", "aff_domain": "mpi-sws.org;rice.edu;uni-bonn.de;spector.com;microsoft.com;", "position": "Visiting Scholar;MS student;Postdoc;Head, AI Research & Technologies;Principal Researcher;", "bibtex": "@misc{\nanonymous2024multimodal,\ntitle={Multi-modal brain encoding models for multi-modal stimuli},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=Rllg9nOasE}\n}", "github": "", "project": "", "reviewers": "bruL;28FZ;4K7X;UHTg", "site": "https://openreview.net/forum?id=Rllg9nOasE", "pdf_size": 2517886, "rating": "4;5;6;7", "confidence": "2;5;4;4", "soundness": "2;2;2;3", "novelty": "2;2;3;2", "presentation": "2;2;2;3", "wc_summary": "23;74;71;37", "wc_strengths": "18;20;81;89", "wc_weaknesses": "284;66;351;50", "wc_questions": "31;123;91;164", "wc_limitations": "7;3;7;9", "wc_review": "363;286;601;349", "wc_reply_reviewers": "86;12;267;27", "wc_reply_authors": "914;110;824;15", "reply_reviewers": "3;1;2;1", "reply_authors": "6;3;4;2", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 51.25, 21.84462176372024 ], "wc_strengths_avg": [ 52.0, 33.12853754695489 ], "wc_weaknesses_avg": [ 187.75, 132.01586078952786 ], "wc_questions_avg": [ 102.25, 48.59719642119286 ], "wc_limitations_avg": [ 6.5, 2.179449471770337 ], "wc_review_avg": [ 399.75, 119.75678477647936 ], "wc_reply_reviewers_avg": [ 98.0, 101.41745411910121 ], "wc_reply_authors_avg": [ 465.75, 405.8955376694846 ], "reply_reviewers_avg": [ 1.75, 0.82915619758885 ], "reply_authors_avg": [ 3.75, 1.479019945774904 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5129891760425771, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:pEBoAahqUrAJ:scholar.google.com/&scioq=Multi-modal+brain+encoding+models+for+multi-modal+stimuli&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "Max Planck Institute for Software Systems;Rice University;Rheinische Friedrich-Wilhelms Universit\u00e4t Bonn;Spector Inc;Microsoft", "aff_unique_dep": ";;;;Microsoft Corporation", "aff_unique_url": "https://www.mpi-sws.org;https://www.rice.edu;https://www.uni-bonn.de/;;https://www.microsoft.com", "aff_unique_abbr": "MPI-SWS;Rice;Uni Bonn;;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1;1", "aff_country_unique": "Germany;United States" }, { "title": "B'MOJO: Hybrid State Space Realizations of Foundation Models with Eidetic and Fading Memory", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95153", "id": "RnQdRY1h5v", "proceeding": "", "pdf": "https://openreview.net/pdf?id=RnQdRY1h5v", "openreview": "https://openreview.net/forum?id=RnQdRY1h5v", "poster": "/media/PosterPDFs/NeurIPS%202024/95153.png?t=1733726210.1365247", "project": "", "author_site": "Luca Zancato, Arjun Seshadri, Yonatan Dukler, Aditya Sharad Golatkar, Yantao Shen, Benjamin Bowman, Matthew Trager, Alessandro Achille, Stefano Soatto", "tldr": "", "abstract": "We describe a family of architectures to support transductive inference by allowing memory to grow to a finite but a-priori unknown bound while making efficient use of finite resources for inference. Current architectures use such resources to represent data either eidetically over a finite span ('context' in Transformers), or fading over an infinite span (in State Space Models, or SSMs). Recent hybrid architectures have combined eidetic and fading memory, but with limitations that do not allow the designer or the learning process to seamlessly modulate the two, nor to extend the eidetic memory span. We leverage ideas from Stochastic Realization Theory to develop a class of models called B'MOJO to seamlessly combine eidetic and fading memory within an elementary composable module. The overall architecture can be used to implement models that can access short-term eidetic memory 'in-context,' permanent structural memory 'in-weights,' fading memory 'in-state,' and long-term eidetic memory 'in-storage' by natively incorporating retrieval from an asynchronously updated memory. We show that Transformers, existing SSMs such as Mamba, and hybrid architectures such as Jamba are special cases of B'MOJO and describe a basic implementation that can be stacked and scaled efficiently in hardware. We test B'MOJO on transductive inference tasks, such as associative recall, where it outperforms existing SSMs and Hybrid models; as a baseline, we test ordinary language modeling where B'MOJO achieves perplexity comparable to similarly-sized Transformers and SSMs up to 1.4B parameters, while being up to 10% faster to train. Finally, we test whether models trained inductively on a-priori bounded sequences (up to 8K tokens) can still perform transductive inference on sequences many-fold longer. B'MOJO's ability to modulate eidetic and fading memory results in better inference on longer sequences tested up to 32K tokens, four-fold the length of the longest sequences seen during training.", "keywords": "Sequence Models;Language Models;State Space Models;Hybrid Architectures", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Luca Zancato;Arjun Seshadri;Yonatan Dukler;Aditya Golatkar;Yantao Shen;Benjamin Bowman;Matthew Trager;Alessandro Achille;Stefano Soatto", "authorids": "~Luca_Zancato1;~Arjun_Seshadri1;~Yonatan_Dukler1;~Aditya_Golatkar1;~Yantao_Shen2;~Benjamin_Bowman1;~Matthew_Trager2;~Alessandro_Achille1;~Stefano_Soatto3", "gender": "M;;M;M;M;;;M;", "homepage": ";;https://dukleryoni.github.io/;https://adityagolatkar.github.io;https://scholar.google.com.hk/citations?user=bEctTN0AAAAJ&hl=zh-CN;https://www.benjamin-bowman.com/;;;", "dblp": "274/1481;;242/3844;;86/3372;248/2643;176/1439;190/7328;", "google_scholar": "Z2Mhh2UAAAAJ;;;IMAwpekAAAAJ;https://scholar.google.com.hk/citations?user=bEctTN0AAAAJ;zYZ_FNEAAAAJ;g3sXAWkAAAAJ;;", "orcid": ";;;;;;;;", "linkedin": ";;;;;benjamin-bowman314;;;", "or_profile": "~Luca_Zancato1;~Arjun_Seshadri1;~Yonatan_Dukler1;~Aditya_Golatkar1;~Yantao_Shen2;~Benjamin_Bowman1;~Matthew_Trager2;~Alessandro_Achille1;~Stefano_Soatto3", "aff": "Amazon Web Services;;AWS AI Labs;Amazon;Amazon;Amazon;Amazon;Amazon;", "aff_domain": "amazon.it;;amazon.com;amazon.com;amazon.com;amazon.com;amazon.com;amazon.com;", "position": "Applied Scientist;;Researcher;Researcher;Researcher;Researcher;Applied Scientist;Applied Research Scientist;", "bibtex": "@inproceedings{\nzancato2024bmojo,\ntitle={B'{MOJO}: Hybrid State Space Realizations of Foundation Models with Eidetic and Fading Memory},\nauthor={Luca Zancato and Arjun Seshadri and Yonatan Dukler and Aditya Golatkar and Yantao Shen and Benjamin Bowman and Matthew Trager and Alessandro Achille and Stefano Soatto},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=RnQdRY1h5v}\n}", "github": "", "reviewers": "majr;MJ98;PLgm;Wgie", "pdf_size": 1391964, "rating": "3;5;6;7", "confidence": "4;4;2;4", "soundness": "2;2;3;4", "novelty": "3;2;4;3", "presentation": "3;2;2;3", "wc_summary": "49;55;226;251", "wc_strengths": "93;63;31;96", "wc_weaknesses": "135;590;32;189", "wc_questions": "97;229;113;3", "wc_limitations": "4;2;7;11", "wc_review": "378;939;409;550", "wc_reply_reviewers": "0;532;0;33", "wc_reply_authors": "0;924;0;0", "reply_reviewers": "0;2;0;1", "reply_authors": "1;3;1;1", "rating_avg": [ 5.25, 1.479019945774904 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 145.25, 93.69198204755837 ], "wc_strengths_avg": [ 70.75, 26.328454189336675 ], "wc_weaknesses_avg": [ 236.5, 211.74335880966845 ], "wc_questions_avg": [ 110.5, 80.29165585538762 ], "wc_limitations_avg": [ 6.0, 3.391164991562634 ], "wc_review_avg": [ 569.0, 223.2386615261792 ], "wc_reply_reviewers_avg": [ 141.25, 226.0015210125808 ], "wc_reply_authors_avg": [ 231.0, 400.10373654841067 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.29277002188455997, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4386040810297442972&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "amazon.it;;amazon.com;amazon.com;amazon.com;amazon.com;amazon.com;amazon.com;", "author_num": 9, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "Amazon", "aff_unique_dep": "Amazon Web Services", "aff_unique_url": "https://aws.amazon.com", "aff_unique_abbr": "AWS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "LACIE: Listener-Aware Finetuning for Calibration in Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95152", "id": "RnvgYd9RAh", "proceeding": "", "pdf": "https://openreview.net/pdf?id=RnvgYd9RAh", "openreview": "https://openreview.net/forum?id=RnvgYd9RAh", "poster": "", "project": "", "author_site": "Elias Stengel-Eskin, Peter Hase, Mohit Bansal", "tldr": "", "abstract": "When answering questions, large language models (LLMs) can convey not only an answer to the question, but a level of confidence about the answer being correct. This includes explicit markers of confidence (e.g. giving a numeric confidence score) as well as implicit markers, like using an authoritative tone or elaborating with additional knowledge of a subject. For LLMs to be trustworthy sources of knowledge, the confidence they convey should match their actual expertise on a topic; however, this is currently not the case, with most models tending towards overconfidence. To calibrate both implicit and explicit confidence markers, we introduce a pragmatic, listener-aware finetuning method (LACIE) that directly models the listener, considering not only whether an answer is right, but whether it will be accepted by a listener. Specifically, we cast calibration as a preference optimization problem, creating data via a two-agent speaker-listener game, where a speaker model\u2019s outputs are judged by a simulated listener. We then finetune three different LLMs (Mistral-7B, Llama3-8B, Llama3-70B) with LACIE, and show that the models resulting from this multi-agent optimization are better calibrated on TriviaQA with respect to a simulated listener. Crucially, these trends transfer to human listeners, helping them correctly predict model correctness: we conduct a human evaluation where annotators accept or reject an LLM\u2019s answers to trivia questions, finding that training with LACIE results in 47% fewer incorrect answers being accepted while maintaining the same level of acceptance for correct answers. Furthermore, LACIE generalizes to another dataset, resulting in a large increase in truthfulness on TruthfulQA when trained on TriviaQA. Our analysis indicates that LACIE leads to a better separation in confidence between correct and incorrect examples. Qualitatively, we find that a LACIE-trained model hedges more when uncertain and adopts implicit cues to signal certainty when it is correct, such as using an authoritative tone or including details. Finally, finetuning with our listener- aware method leads to an emergent increase in model abstention (e.g. saying \u201cI don\u2019t know\u201d) for answers that are likely to be wrong, trading recall for precision.", "keywords": "LLM calibration;uncertainty;question answering;pragmatics;listener-speaker model;LLM confidence estimation", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/9d2f29fbb6fd20068da560229f2eba909828e398.zip", "author": "Elias Stengel-Eskin;Peter Hase;Mohit Bansal", "authorids": "~Elias_Stengel-Eskin1;~Peter_Hase1;~Mohit_Bansal2", "gender": "M;;M", "homepage": "https://esteng.github.io;;https://www.cs.unc.edu/~mbansal/", "dblp": "212/6138;;32/5243.html", "google_scholar": "gr_ZVSQAAAAJ;;DN8QtscAAAAJ", "orcid": "0000-0002-6689-505X;;", "linkedin": ";;", "or_profile": "~Elias_Stengel-Eskin1;~Peter_Hase1;~Mohit_Bansal2", "aff": "University of North Carolina at Chapel Hill;;University of North Carolina at Chapel Hill", "aff_domain": "cs.unc.edu;;unc.edu", "position": "Postdoc;;Full Professor", "bibtex": "@inproceedings{\nstengel-eskin2024lacie,\ntitle={{LACIE}: Listener-Aware Finetuning for Calibration in Large Language Models},\nauthor={Elias Stengel-Eskin and Peter Hase and Mohit Bansal},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=RnvgYd9RAh}\n}", "github": "", "reviewers": "dvWw;NSp4;rP95;Aw31", "pdf_size": 766663, "rating": "4;6;8;8", "confidence": "4;3;4;4", "soundness": "3;3;3;4", "novelty": "2;3;3;4", "presentation": "2;3;3;4", "wc_summary": "131;88;257;82", "wc_strengths": "62;32;210;37", "wc_weaknesses": "133;62;411;8", "wc_questions": "44;21;158;71", "wc_limitations": "4;1;27;42", "wc_review": "374;204;1063;240", "wc_reply_reviewers": "0;43;37;48", "wc_reply_authors": "48;99;32;23", "reply_reviewers": "0;1;1;1", "reply_authors": "2;3;2;2", "rating_avg": [ 6.5, 1.6583123951777 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 139.5, 70.4219426031404 ], "wc_strengths_avg": [ 85.25, 72.91561904009319 ], "wc_weaknesses_avg": [ 153.5, 155.13623045568693 ], "wc_questions_avg": [ 73.5, 51.89653167601858 ], "wc_limitations_avg": [ 18.5, 16.889345754054535 ], "wc_review_avg": [ 470.25, 348.0376236845666 ], "wc_reply_reviewers_avg": [ 32.0, 18.881207588499205 ], "wc_reply_authors_avg": [ 50.5, 29.3981291921782 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11187687766000653985&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "email": "cs.unc.edu;;unc.edu", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "University of North Carolina", "aff_unique_dep": "", "aff_unique_url": "https://www.unc.edu", "aff_unique_abbr": "UNC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Chapel Hill", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "SCaR: Refining Skill Chaining for Long-Horizon Robotic Manipulation via Dual Regularization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95151", "id": "RnxJc4vTVi", "proceeding": "", "pdf": "https://openreview.net/pdf?id=RnxJc4vTVi", "openreview": "https://openreview.net/forum?id=RnxJc4vTVi", "poster": "/media/PosterPDFs/NeurIPS%202024/95151.png?t=1731690215.664542", "project": "", "author_site": "Zixuan Chen, Ze Ji, Jing Huo, Yang Gao", "tldr": "", "abstract": "Long-horizon robotic manipulation tasks typically involve a series of interrelated sub-tasks spanning multiple execution stages. Skill chaining offers a feasible solution for these tasks by pre-training the skills for each sub-task and linking them sequentially. However, imperfections in skill learning or disturbances during execution can lead to the accumulation of errors in skill chaining process, resulting in execution failures. In this paper, we investigate how to achieve stable and smooth skill chaining for long-horizon robotic manipulation tasks. Specifically, we propose a novel skill chaining framework called Skill Chaining via Dual Regularization (SCaR). This framework applies dual regularization to sub-task skill pre-training and fine-tuning, which not only enhances the intra-skill dependencies within each sub-task skill but also reinforces the inter-skill dependencies between sequential sub-task skills, thus ensuring smooth skill chaining and stable long-horizon execution. We evaluate the SCaR framework on two representative long-horizon robotic manipulation simulation benchmarks: IKEA furniture assembly and kitchen organization. Additionally, we conduct a simple real-world validation in tabletop robot pick-and-place tasks. The experimental results show that, with the support of SCaR, the robot achieves a higher success rate in long-horizon tasks compared to relevant baselines and demonstrates greater robustness to perturbations.", "keywords": "long-horizon manipulation;sub-task;skill chaining;robot learning;reinforcement learning;imitation learning", "primary_area": "robotics", "supplementary_material": "", "author": "Zixuan Chen;Ze Ji;Jing Huo;Yang Gao", "authorids": "~Zixuan_Chen3;~Ze_Ji1;~Jing_Huo2;~Yang_Gao3", "gender": "F;M;F;M", "homepage": "https://cs.nju.edu.cn/rl/index.htm;https://www.cardiff.ac.uk/people/view/542654-ji-ze;https://cs.nju.edu.cn/huojing;https://cs.nju.edu.cn/gaoyang/", "dblp": ";;38/9090;89/4402-1", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;HKK1BdgAAAAJ;https://scholar.google.com.tw/citations?user=CJwLwzQAAAAJ", "orcid": "0000-0002-9744-1157;;0000-0002-8504-455X;", "linkedin": ";;;", "or_profile": "~Zixuan_Chen3;~Ze_Ji1;~Jing_Huo2;~Yang_Gao3", "aff": "Nanjing University;Cardiff University;Nanjing University;Nanjing University", "aff_domain": "nju.edu;cardiff.ac.uk;nju.edu.cn;nju.edu.cn", "position": "PhD student;Associate Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nchen2024scar,\ntitle={{SC}aR: Refining Skill Chaining for Long-Horizon Robotic Manipulation via Dual Regularization},\nauthor={Zixuan Chen and Ze Ji and Jing Huo and Yang Gao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=RnxJc4vTVi}\n}", "github": "", "reviewers": "ysj9;2GPv;uZgV;9LLM", "pdf_size": 26258354, "rating": "6;6;6;7", "confidence": "4;5;4;3", "soundness": "2;3;2;3", "novelty": "2;2;2;3", "presentation": "3;3;2;3", "wc_summary": "75;125;202;86", "wc_strengths": "59;58;36;84", "wc_weaknesses": "73;269;158;101", "wc_questions": "38;65;27;68", "wc_limitations": "7;35;2;26", "wc_review": "252;552;425;365", "wc_reply_reviewers": "53;20;107;12", "wc_reply_authors": "39;26;57;26", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 122.0, 49.78453575157651 ], "wc_strengths_avg": [ 59.25, 16.990806337546196 ], "wc_weaknesses_avg": [ 150.25, 75.09119455701847 ], "wc_questions_avg": [ 49.5, 17.471405209656147 ], "wc_limitations_avg": [ 17.5, 13.5 ], "wc_review_avg": [ 398.5, 108.22314909482166 ], "wc_reply_reviewers_avg": [ 48.0, 37.36977388210959 ], "wc_reply_authors_avg": [ 37.0, 12.708265027138834 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17857831419917920212&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "nju.edu;cardiff.ac.uk;nju.edu.cn;nju.edu.cn", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Nanjing University;Cardiff University", "aff_unique_dep": ";", "aff_unique_url": "https://www.nju.edu.cn;https://www.cardiff.ac.uk", "aff_unique_abbr": "Nanjing U;Cardiff", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "China;United Kingdom" }, { "title": "QVAE-Mole: The Quantum VAE with Spherical Latent Variable Learning for 3-D Molecule Generation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95150", "id": "RqvesBxqDo", "proceeding": "", "pdf": "https://openreview.net/pdf?id=RqvesBxqDo", "openreview": "https://openreview.net/forum?id=RqvesBxqDo", "poster": "/media/PosterPDFs/NeurIPS%202024/95150.png?t=1731228472.736515", "project": "", "author_site": "Huaijin Wu, Xinyu Ye, Junchi Yan", "tldr": "", "abstract": "Molecule generation ideally in its 3-D form has enjoyed wide applications in material, chemistry, life science, etc. We propose the first quantum parametric circuit for 3-D molecule generation for its potential quantum advantage especially considering the arrival of Noisy Intermediate-Scale Quantum (NISQ) era. We choose the Variational AutoEncoder (VAE) scheme for its simplicity and one-shot generation ability, which we believe is more quantum-friendly compared with the auto-regressive generative models or diffusion models as used in classic approaches. Specifically, we present a quantum encoding scheme designed for 3-D molecules with qubits complexity $\\mathcal{O}(C\\log n)$ ($n$ is the number of atoms) and adopt a von Mises-Fisher (vMF) distributed latent space to meet the inherent coherence of the quantum system. We further design to encode conditions into quantum circuits for property-specified generation. Experimentally, our model could generate plausible 3-D molecules and achieve competitive quantitative performance with significantly reduced circuit parameters compared with their classic counterparts. The source code will be released upon publication.", "keywords": "quantum machine learning;quantum generative model;quantum VAE;3-D molecule generation;conditional generation", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "", "author": "Huaijin Wu;Xinyu Ye;Junchi Yan", "authorids": "~Huaijin_Wu1;~Xinyu_Ye2;~Junchi_Yan2", "gender": "F;;", "homepage": "https://github.com/wuhuaijin;;", "dblp": "314/5447;;", "google_scholar": ";;", "orcid": "0009-0005-2626-3460;;", "linkedin": ";;", "or_profile": "~Huaijin_Wu1;~Xinyu_Ye2;~Junchi_Yan2", "aff": "Shanghai Jiaotong University;;", "aff_domain": "sjtu.edu.cn;;", "position": "PhD student;;", "bibtex": "@inproceedings{\nwu2024qvaemole,\ntitle={{QVAE}-Mole: The Quantum {VAE} with Spherical Latent Variable Learning for 3-D Molecule Generation},\nauthor={Huaijin Wu and Xinyu Ye and Junchi Yan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=RqvesBxqDo}\n}", "github": "", "reviewers": "CAJp;hKnP;5Ebq;VsUh", "pdf_size": 932136, "rating": "4;7;7;7", "confidence": "4;2;3;3", "soundness": "1;3;3;4", "novelty": "2;3;2;4", "presentation": "2;3;2;4", "wc_summary": "194;59;42;136", "wc_strengths": "71;85;11;120", "wc_weaknesses": "664;66;12;96", "wc_questions": "61;3;54;165", "wc_limitations": "36;3;22;47", "wc_review": "1026;216;141;564", "wc_reply_reviewers": "616;23;9;101", "wc_reply_authors": "3309;226;497;1328", "reply_reviewers": "1;1;1;1", "reply_authors": "9;3;3;4", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 1.0897247358851685 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 107.75, 61.10799865811349 ], "wc_strengths_avg": [ 71.75, 39.35336707322513 ], "wc_weaknesses_avg": [ 209.5, 264.1263901998435 ], "wc_questions_avg": [ 70.75, 58.84035604922866 ], "wc_limitations_avg": [ 27.0, 16.446884203398525 ], "wc_review_avg": [ 486.75, 349.8595253812593 ], "wc_reply_reviewers_avg": [ 187.25, 250.00837485972346 ], "wc_reply_authors_avg": [ 1340.0, 1207.1402155507867 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 4.75, 2.48746859276655 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16677726091374650800&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "sjtu.edu.cn;;", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "title": "Neural Localizer Fields for Continuous 3D Human Pose and Shape Estimation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95149", "id": "RrTjcbcHEH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=RrTjcbcHEH", "openreview": "https://openreview.net/forum?id=RrTjcbcHEH", "poster": "", "project": "", "author_site": "Istv\u00e1n S\u00e1r\u00e1ndi, Gerard Pons-Moll", "tldr": "", "abstract": "With the explosive growth of available training data, single-image 3D human modeling is ahead of a transition to a data-centric paradigm.\nA key to successfully exploiting data scale is to design flexible models that can be supervised from various heterogeneous data sources produced by different researchers or vendors.\nTo this end, we propose a simple yet powerful paradigm for seamlessly unifying different human pose and shape-related tasks and datasets.\nOur formulation is centered on the ability - both at training and test time - to query any arbitrary point of the human volume, and obtain its estimated location in 3D.\nWe achieve this by learning a continuous neural field of body point localizer functions, each of which is a differently parameterized 3D heatmap-based convolutional point localizer (detector).\nFor generating parametric output, we propose an efficient post-processing step for fitting SMPL-family body models to nonparametric joint and vertex predictions.\nWith this approach, we can naturally exploit differently annotated data sources including mesh, 2D/3D skeleton and dense pose, without having to convert between them, and thereby train large-scale 3D human mesh and skeleton estimation models that outperform the state-of-the-art on several public benchmarks including 3DPW, EMDB, EHF, SSP-3D and AGORA by a considerable margin.\nWe release our code and models to foster downstream research.", "keywords": "3D human pose estimation;human shape estimation;computer vision;human mesh recovery", "primary_area": "machine_vision", "supplementary_material": "/attachment/4225600235b9f9a3cc10e9d522b0d81ee9cc0c54.zip", "author": "Istv\u00e1n S\u00e1r\u00e1ndi;Gerard Pons-Moll", "authorids": "~Istv\u00e1n_S\u00e1r\u00e1ndi1;~Gerard_Pons-Moll2", "gender": "M;", "homepage": "https://virtualhumans.mpi-inf.mpg.de;https://IstvanSarandi.com", "dblp": "66/8652.html;216/2004", "google_scholar": "https://scholar.google.de/citations?user=OpXMNnMAAAAJ;ZeCLyhAAAAAJ", "orcid": "0000-0001-5115-7794;0000-0002-5686-3214", "linkedin": "gerard-pons-moll-b19bb6164/;istvansarandi/", "or_profile": "~Gerard_Pons-moll1;~Istvan_Sarandi1", "aff": "Saarland Informatics Campus, Max-Planck Institute;Eberhard-Karls-Universit\u00e4t T\u00fcbingen", "aff_domain": "mpi-inf.mpg.de;uni-tuebingen.de", "position": "Independent Research Group Leader;Postdoc", "bibtex": "@inproceedings{\ns{\\'a}r{\\'a}ndi2024neural,\ntitle={Neural Localizer Fields for Continuous 3D Human Pose and Shape Estimation},\nauthor={Istv{\\'a}n S{\\'a}r{\\'a}ndi and Gerard Pons-Moll},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=RrTjcbcHEH}\n}", "github": "", "reviewers": "M7cf;YzeS;R9xH;Dcsh", "pdf_size": 20345885, "rating": "4;5;7;7", "confidence": "5;4;4;5", "soundness": "4;3;3;2", "novelty": "3;2;4;3", "presentation": "4;3;3;2", "wc_summary": "140;95;76;140", "wc_strengths": "64;112;88;116", "wc_weaknesses": "88;208;33;624", "wc_questions": "65;67;11;272", "wc_limitations": "1;12;8;4", "wc_review": "358;494;216;1156", "wc_reply_reviewers": "0;18;0;186", "wc_reply_authors": "0;0;0;104", "reply_reviewers": "0;1;0;2", "reply_authors": "1;1;1;3", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 112.75, 28.065770967497045 ], "wc_strengths_avg": [ 95.0, 20.85665361461421 ], "wc_weaknesses_avg": [ 238.25, 231.52794107839338 ], "wc_questions_avg": [ 103.75, 99.70299644443992 ], "wc_limitations_avg": [ 6.25, 4.14578098794425 ], "wc_review_avg": [ 556.0, 360.0861008147912 ], "wc_reply_reviewers_avg": [ 51.0, 78.28793010419933 ], "wc_reply_authors_avg": [ 26.0, 45.033320996790806 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.19245008972987526, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7968754900070417403&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "mpi-inf.mpg.de;uni-tuebingen.de", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Max-Planck Institute;Eberhard Karls University of T\u00fcbingen", "aff_unique_dep": "Informatics;", "aff_unique_url": "https://www.mpi-sws.org;https://www.uni-tuebingen.de/", "aff_unique_abbr": "MPI-SWS;Uni T\u00fcbingen", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Saarland;T\u00fcbingen", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "title": "OTTER: Effortless Label Distribution Adaptation of Zero-shot Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95148", "id": "RsawwSBCs7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=RsawwSBCs7", "openreview": "https://openreview.net/forum?id=RsawwSBCs7", "poster": "", "project": "", "author_site": "Changho Shin, Jitian Zhao, Sonia Cromp, Harit Vishwakarma, Frederic Sala", "tldr": "", "abstract": "Popular zero-shot models suffer due to artifacts inherited from pretraining. One particularly detrimental issue, caused by unbalanced web-scale pretraining data, is mismatched label distribution. Existing approaches that seek to repair the label distribution are not suitable in zero-shot settings, as they have mismatching requirements, such as needing access to labeled downstream task data or knowledge of the true label balance in the pretraining distribution. We sidestep these challenges and introduce a simple and lightweight approach to adjust pretrained model predictions via optimal transport. Our technique requires only an estimate of the label distribution of a downstream task. Theoretically, we characterize the improvement produced by our procedure under certain mild conditions and provide bounds on the error caused by misspecification. Empirically, we validate our method in a wide array of zero-shot image and text classification tasks, improving accuracy by 4.8% and 15.9% on average, and beating baselines like prior matching---often by significant margins---in 17 out of 21 datasets.", "keywords": "zero-shot classification;label distribution;adaptation", "primary_area": "other", "supplementary_material": "", "author": "Changho Shin;Jitian Zhao;Sonia Cromp;Harit Vishwakarma;Frederic Sala", "authorids": "~Changho_Shin2;~Jitian_Zhao1;~Sonia_Cromp1;~Harit_Vishwakarma1;~Frederic_Sala1", "gender": ";F;F;M;M", "homepage": ";https://jzhao326.github.io/;;https://harit7.github.io;https://pages.cs.wisc.edu/~fredsala/", "dblp": ";;;207/7622;133/3602", "google_scholar": "VpvIQAcAAAAJ;;;pJF_ZZUAAAAJ;9KhIkNkAAAAJ", "orcid": ";;;;", "linkedin": ";jitian-zhao-931b5a175/;sonia-cromp;harit7;", "or_profile": "~Changho_Shin2;~Jitian_Zhao1;~Sonia_Cromp1;~Harit_Vishwakarma1;~Frederic_Sala1", "aff": "University of Wisconsin, Madison;University of Wisconsin - Madison;NASA;University of Wisconsin, Madison;University of Wisconsin, Madison", "aff_domain": "wisc.edu;wisc.edu;nasa.gov;wisc.edu;wisc.edu", "position": "PhD student;PhD student;Intern;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nshin2024otter,\ntitle={{OTTER}: Effortless Label Distribution Adaptation of Zero-shot Models},\nauthor={Changho Shin and Jitian Zhao and Sonia Cromp and Harit Vishwakarma and Frederic Sala},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=RsawwSBCs7}\n}", "github": "", "reviewers": "M3dD;t22L;t3Bz", "pdf_size": 1058160, "rating": "5;7;8", "confidence": "4;4;4", "soundness": "3;4;3", "novelty": "1;3;3", "presentation": "3;3;3", "wc_summary": "52;60;82", "wc_strengths": "9;64;57", "wc_weaknesses": "124;212;174", "wc_questions": "164;58;97", "wc_limitations": "1;6;12", "wc_review": "350;400;422", "wc_reply_reviewers": "38;22;169", "wc_reply_authors": "129;70;672", "reply_reviewers": "1;1;3", "reply_authors": "2;2;3", "rating_avg": [ 6.666666666666667, 1.247219128924647 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.9428090415820634 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 64.66666666666667, 12.684198393626966 ], "wc_strengths_avg": [ 43.333333333333336, 24.44494948973214 ], "wc_weaknesses_avg": [ 170.0, 36.03701800463888 ], "wc_questions_avg": [ 106.33333333333333, 43.77467558100485 ], "wc_limitations_avg": [ 6.333333333333333, 4.4969125210773475 ], "wc_review_avg": [ 390.6666666666667, 30.12566274052001 ], "wc_reply_reviewers_avg": [ 76.33333333333333, 65.84999789082929 ], "wc_reply_authors_avg": [ 290.3333333333333, 270.9518202354228 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.9428090415820634 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7854837433541730541&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "wisc.edu;wisc.edu;nasa.gov;wisc.edu;wisc.edu", "author_num": 5, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "University of Wisconsin;University of Wisconsin-Madison;National Aeronautics and Space Administration", "aff_unique_dep": ";;", "aff_unique_url": "https://www.wisc.edu;https://www.wisc.edu;https://www.nasa.gov", "aff_unique_abbr": "UW;UW-Madison;NASA", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Madison;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Exploring Adversarial Robustness of Deep State Space Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95147", "id": "Rsb32EBmbj", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Rsb32EBmbj", "openreview": "https://openreview.net/forum?id=Rsb32EBmbj", "poster": "/media/PosterPDFs/NeurIPS%202024/95147.png?t=1731691021.0901237", "project": "", "author_site": "Biqing Qi, Yiang Luo, Junqi Gao, Pengfei Li, Kai Tian, Zhiyuan Ma, Bowen Zhou", "tldr": "", "abstract": "Deep State Space Models (SSMs) have proven effective in numerous task scenarios but face significant security challenges due to Adversarial Perturbations (APs) in real-world deployments. Adversarial Training (AT) is a mainstream approach to enhancing Adversarial Robustness (AR) and has been validated on various traditional DNN architectures. However, its effectiveness in improving the AR of SSMs remains unclear.\nWhile many enhancements in SSM components, such as integrating Attention mechanisms and expanding to data-dependent SSM parameterizations, have brought significant gains in Standard Training (ST) settings, their potential benefits in AT remain unexplored. To investigate this, we evaluate existing structural variants of SSMs with AT to assess their AR performance. We observe that pure SSM structures struggle to benefit from AT, whereas incorporating Attention yields a markedly better trade-off between robustness and generalization for SSMs in AT compared to other components. Nonetheless, the integration of Attention also leads to Robust Overfitting (RO) issues.\nTo understand these phenomena, we empirically and theoretically analyze the output error of SSMs under AP. We find that fixed-parameterized SSMs have output error bounds strictly related to their parameters, limiting their AT benefits, while input-dependent SSMs may face the problem of error explosion. Furthermore, we show that the Attention component effectively scales the output error of SSMs during training, enabling them to benefit more from AT, but at the cost of introducing RO due to its high model complexity.\nInspired by this, we propose a simple and effective Adaptive Scaling (AdS) mechanism that brings AT performance close to Attention-integrated SSMs without introducing the issue of RO.", "keywords": "Adversarial Robustness; Robustness Exploration; State Space Models", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Biqing Qi;Yiang Luo;Junqi Gao;Pengfei Li;Kai Tian;Zhiyuan Ma;Bowen Zhou", "authorids": "~Biqing_Qi1;~Yiang_Luo2;~Junqi_Gao1;~Pengfei_Li10;~Kai_Tian3;~Zhiyuan_Ma1;~Bowen_Zhou8", "gender": "M;M;M;;M;;M", "homepage": "https://biqing-qi.github.io/;https://github.com/Norma7755;;http://c3i.ee.tsinghua.edu.cn/author/%E7%94%B0%E9%94%B4/;;http://web.ee.tsinghua.edu.cn/zhoubowen/zh_CN/index.htm?eqid=b894e49b0000ec7d0000000464857b51;", "dblp": "233/4949.html;;81/9266.html;;138/5978-5;;", "google_scholar": ";;;;https://scholar.google.com.hk/citations?hl=zh-CN;https://scholar.google.com/citations?hl=en;https://scholar.google.com.hk/citations?hl=zh-CN", "orcid": "0000-0002-4072-0577;0009-0003-2559-8926;0009-0007-1644-5812;;;0000-0003-1062-9526;0009-0006-1880-1297", "linkedin": ";;;;;;", "or_profile": "~Biqing_Qi1;~Yiang_Luo2;~Junqi_Gao1;~Kai_Tian3;~Zhiyuan_Ma1;~Bowen_Zhou8;~Li_Pengfei1", "aff": "Harbin Institute of Technology;Harbin Institute of Technology;Shanghai Artificial Intelligence Laboratory;Tsinghua University;Tsinghua University;Tsinghua University;Harbin Institute of Technology", "aff_domain": "hit.edu.cn;hit.edu.cn;pjlab.org.cn;mails.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;stu.hit.edu.cn", "position": "PhD student;MS student;Intern;MS student;Postdoc;Full Professor;MS student", "bibtex": "@inproceedings{\nqi2024exploring,\ntitle={Exploring Adversarial Robustness of Deep State Space Models},\nauthor={Biqing Qi and Yiang Luo and Junqi Gao and Pengfei Li and Kai Tian and Zhiyuan Ma and Bowen Zhou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Rsb32EBmbj}\n}", "github": "", "reviewers": "a6wJ;eBVC;PGjA", "pdf_size": 702225, "rating": "5;5;6", "confidence": "5;4;3", "soundness": "3;2;3", "novelty": "2;2;3", "presentation": "2;3;3", "wc_summary": "70;94;56", "wc_strengths": "43;169;30", "wc_weaknesses": "111;138;37", "wc_questions": "61;48;35", "wc_limitations": "9;1;4", "wc_review": "294;450;162", "wc_reply_reviewers": "27;135;14", "wc_reply_authors": "733;1413;1063", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 73.33333333333333, 15.691469727919761 ], "wc_strengths_avg": [ 80.66666666666667, 62.68616717869705 ], "wc_weaknesses_avg": [ 95.33333333333333, 42.695302890234764 ], "wc_questions_avg": [ 48.0, 10.614455552060438 ], "wc_limitations_avg": [ 4.666666666666667, 3.2998316455372216 ], "wc_review_avg": [ 302.0, 117.71151175649729 ], "wc_reply_reviewers_avg": [ 58.666666666666664, 54.23610933276424 ], "wc_reply_authors_avg": [ 1069.6666666666667, 277.648858972784 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1863069279188580653&as_sdt=20000005&sciodt=0,21&hl=en", "gs_version_total": 4, "email": "hit.edu.cn;hit.edu.cn;pjlab.org.cn;mails.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;stu.hit.edu.cn", "author_num": 7, "aff_unique_index": "0;0;1;2;2;2;0", "aff_unique_norm": "Harbin Institute of Technology;Shanghai Artificial Intelligence Laboratory;Tsinghua University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.hit.edu.cn/;http://www.shailab.org/;https://www.tsinghua.edu.cn", "aff_unique_abbr": "HIT;Shanghai AI Lab;THU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Harbin;", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "SymILO: A Symmetry-Aware Learning Framework for Integer Linear Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95146", "id": "RtMyTzIW6l", "proceeding": "", "pdf": "https://openreview.net/pdf?id=RtMyTzIW6l", "openreview": "https://openreview.net/forum?id=RtMyTzIW6l", "poster": "", "project": "", "author_site": "Qian Chen, Tianjian Zhang, Linxin Yang, Qingyu Han, Akang Wang, Ruoyu Sun, Xiaodong Luo, Tsung-Hui Chang", "tldr": "", "abstract": "Integer linear programs (ILPs) are commonly employed to model diverse practical problems such as scheduling and planning. \nRecently, machine learning techniques have been utilized to solve ILPs. A straightforward idea is to train a model via supervised learning, with an ILP as the input and an optimal solution as the label. An ILP is symmetric if its variables can be permuted without changing the problem structure, resulting in numerous equivalent and optimal solutions. Randomly selecting an optimal solution as the label can introduce variability in the training data, which may hinder the model from learning stable patterns. In this work, we incorporate the intrinsic symmetry of ILPs and propose a novel training framework called SymILO. Specifically, we modify the learning task by introducing solution permutation along with neural network weights as learnable parameters and then design an alternating algorithm to jointly optimize the loss function.\nWe conduct extensive experiments on ILPs involving different symmetries and the computational results demonstrate that our symmetry-aware approach significantly outperforms three existing methods----achieving $50.3\\\\%$, $66.5\\\\%$, and $45.4\\\\%$ average improvements, respectively.", "keywords": "integer linear programming;symmetry;machine learning;neural networks", "primary_area": "optimization", "supplementary_material": "", "author": "Qian Chen;Tianjian Zhang;Linxin Yang;Qingyu Han;Akang Wang;Ruoyu Sun;Xiaodong Luo;Tsung-Hui Chang", "authorids": "~Qian_Chen10;~Tianjian_Zhang1;~Linxin_Yang1;~Qingyu_Han2;~Akang_Wang1;~Ruoyu_Sun1;~Xiaodong_Luo2;~Tsung-Hui_Chang1", "gender": ";M;M;M;Not Specified;;M;", "homepage": ";https://sse.cuhk.edu.cn/en/teacher/309;;https://www.math.sdu.edu.cn/;https://akangw.github.io/;https://ruoyus.github.io/;https://www.sribd.cn/teacher/240;", "dblp": ";190/6500;;;222/3290;30/9879-1;16/6157.html;", "google_scholar": "O74Oj08AAAAJ;;https://scholar.google.com/citations?hl=en;;TyYzzmoAAAAJ;PsfzbCMAAAAJ;;", "orcid": ";;;;0000-0002-3325-8441;;0000-0001-9552-0735;", "linkedin": ";;;;wangakang/;;;", "or_profile": "~Qian_Chen10;~Tianjian_Zhang1;~Linxin_Yang1;~Qingyu_Han2;~Akang_Wang1;~Ruoyu_Sun1;~Xiaodong_Luo2;~Tsung-Hui_Chang1", "aff": "The Chinese University of Hong Kong, Shenzhen;The Chinese University of Hong Kong, Shenzhen;The Chinese University of Hong Kong, Shenzhen;The Chinese University of Hong Kong;Shenzhen Research Institute of Big Data;The Chinese University of Hong Kong;Shenzhen Research Institute of Big Data;", "aff_domain": "cuhk.edu.cn;cuhk.edu.cn;link.cuhk.edu.cn;cuhk.edu.cn;sribd.cn;cuhk.edu.cn;sribd.cn;", "position": "PhD student;PhD student;PhD student;PhD student;Researcher;Associate Professor;Principal Researcher;", "bibtex": "@inproceedings{\nchen2024symilo,\ntitle={Sym{ILO}: A Symmetry-Aware Learning Framework for Integer Linear Optimization},\nauthor={Qian Chen and Tianjian Zhang and Linxin Yang and Qingyu Han and Akang Wang and Ruoyu Sun and Xiaodong Luo and Tsung-Hui Chang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=RtMyTzIW6l}\n}", "github": "", "reviewers": "nCep;cWBm;7rcN;HH73", "pdf_size": 876265, "rating": "6;6;6;7", "confidence": "4;5;4;4", "soundness": "3;3;3;4", "novelty": "2;3;3;3", "presentation": "3;3;3;4", "wc_summary": "47;115;81;138", "wc_strengths": "48;131;46;155", "wc_weaknesses": "2;145;197;100", "wc_questions": "140;123;5;347", "wc_limitations": "2;23;15;13", "wc_review": "239;537;344;753", "wc_reply_reviewers": "77;9;30;17", "wc_reply_authors": "605;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "3;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 95.25, 34.455587355318734 ], "wc_strengths_avg": [ 95.0, 48.74935897014442 ], "wc_weaknesses_avg": [ 111.0, 71.6833313957994 ], "wc_questions_avg": [ 153.75, 123.09219106019682 ], "wc_limitations_avg": [ 13.25, 7.495832175282475 ], "wc_review_avg": [ 468.25, 196.0884685543747 ], "wc_reply_reviewers_avg": [ 33.25, 26.34743820563965 ], "wc_reply_authors_avg": [ 151.25, 261.9726846447927 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12188511015553448104&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "cuhk.edu.cn;cuhk.edu.cn;link.cuhk.edu.cn;cuhk.edu.cn;sribd.cn;cuhk.edu.cn;sribd.cn;", "author_num": 8, "aff_unique_index": "0;0;0;0;1;0;1", "aff_unique_norm": "Chinese University of Hong Kong;Shenzhen Research Institute of Big Data", "aff_unique_dep": ";", "aff_unique_url": "https://www.cuhk.edu.cn;http://www.sribd.cn", "aff_unique_abbr": "CUHK;", "aff_campus_unique_index": "0;0;0;1;1", "aff_campus_unique": "Shenzhen;Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Learning a Single Neuron Robustly to Distributional Shifts and Adversarial Label Noise", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95145", "id": "Rv5dUg4JcZ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Rv5dUg4JcZ", "openreview": "https://openreview.net/forum?id=Rv5dUg4JcZ", "poster": "", "project": "", "author_site": "Shuyao Li, Sushrut Karmalkar, Ilias Diakonikolas, Jelena Diakonikolas", "tldr": "", "abstract": "We study the problem of learning a single neuron with respect to the $L_2^2$-loss in the presence of adversarial distribution shifts, where the labels can be arbitrary, and the goal is to find a \"best-fit\" function.\nMore precisely, given training samples from a reference distribution $p_0$, \nthe goal is to approximate the vector $\\mathbf{w}^*$\nwhich minimizes the squared loss with respect to the worst-case distribution \nthat is close in $\\chi^2$-divergence to $p_{0}$.\nWe design a computationally efficient algorithm that recovers a vector $ \\hat{\\mathbf{w}}$\nsatisfying \n$\\mathbb{E}\\_{p^*} (\\sigma(\\hat{\\mathbf{w}} \\cdot \\mathbf{x}) - y)^2 \\leq C \\hspace{0.2em} \\mathbb{E}\\_{p^*} (\\sigma(\\mathbf{w}^* \\cdot \\mathbf{x}) - y)^2 + \\epsilon$, where $C>1$ is a dimension-independent constant and $(\\mathbf{w}^*, p^*)$ is the witness attaining the min-max risk\n$\\min_{\\mathbf{w}:\\|\\mathbf{w}\\| \\leq W} \\max\\_{p} \\mathbb{E}\\_{(\\mathbf{x}, y) \\sim p} (\\sigma(\\mathbf{w} \\cdot \\mathbf{x}) - y)^2 - \\nu \\chi^2(p, p_0)$.\nOur algorithm follows the primal-dual framework and is \ndesigned by directly bounding the risk with respect to the original, nonconvex $L_2^2$ loss.\nFrom an optimization standpoint, our work opens new avenues for the design of primal-dual algorithms under structured nonconvexity.", "keywords": "Distributionally-Robust Optimization;Nonconvex Optimization;Primal-Dual Algorithm", "primary_area": "learning_theory", "supplementary_material": "", "author": "Shuyao Li;Sushrut Karmalkar;Ilias Diakonikolas;Jelena Diakonikolas", "authorids": "~Shuyao_Li1;~Sushrut_Karmalkar2;~Ilias_Diakonikolas1;~Jelena_Diakonikolas2", "gender": "M;;M;F", "homepage": "https://shuyaoli.github.io/;;http://www.iliasdiakonikolas.org/;http://www.jelena-diakonikolas.com/", "dblp": "183/5509;;d/IliasDiakonikolas;147/5178", "google_scholar": "D8A-8x8AAAAJ;;Vb3FLmkAAAAJ;J8ixfu8AAAAJ", "orcid": "0009-0000-0170-1018;;;0000-0003-3439-0310", "linkedin": ";;;", "or_profile": "~Shuyao_Li1;~Sushrut_Karmalkar2;~Ilias_Diakonikolas1;~Jelena_Diakonikolas2", "aff": "Meta;;University of Wisconsin - Madison;University of Wisconsin, Madison", "aff_domain": "meta.com;;wisc.edu;wisc.edu", "position": "Intern;;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nli2024learning,\ntitle={Learning a Single Neuron Robustly to Distributional Shifts and Adversarial Label Noise},\nauthor={Shuyao Li and Sushrut Karmalkar and Ilias Diakonikolas and Jelena Diakonikolas},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Rv5dUg4JcZ}\n}", "github": "", "reviewers": "ehoc;ef6F;3Jnc;aZHR", "pdf_size": 506267, "rating": "5;6;6;7", "confidence": "3;2;2;3", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;1;3;3", "wc_summary": "80;76;186;107", "wc_strengths": "49;44;39;78", "wc_weaknesses": "83;112;60;36", "wc_questions": "24;202;34;4", "wc_limitations": "5;10;16;19", "wc_review": "241;444;335;244", "wc_reply_reviewers": "0;122;12;12", "wc_reply_authors": "0;30;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 2.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 112.25, 44.2175021908746 ], "wc_strengths_avg": [ 52.5, 15.14100392972672 ], "wc_weaknesses_avg": [ 72.75, 28.101378969723175 ], "wc_questions_avg": [ 66.0, 79.25906888173743 ], "wc_limitations_avg": [ 12.5, 5.408326913195984 ], "wc_review_avg": [ 316.0, 82.99698789715202 ], "wc_reply_reviewers_avg": [ 36.5, 49.60594722409804 ], "wc_reply_authors_avg": [ 7.5, 12.99038105676658 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:D_QqVd-ACiUJ:scholar.google.com/&scioq=Learning+a+Single+Neuron+Robustly+to+Distributional+Shifts+and+Adversarial+Label+Noise&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "meta.com;;wisc.edu;wisc.edu", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "Meta;University of Wisconsin-Madison;University of Wisconsin", "aff_unique_dep": "Meta Platforms, Inc.;;", "aff_unique_url": "https://meta.com;https://www.wisc.edu;https://www.wisc.edu", "aff_unique_abbr": "Meta;UW-Madison;UW", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Madison", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "ROBIN: Robust and Invisible Watermarks for Diffusion Models with Adversarial Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95144", "id": "RvoxlFvnlX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=RvoxlFvnlX", "openreview": "https://openreview.net/forum?id=RvoxlFvnlX", "poster": "/media/PosterPDFs/NeurIPS%202024/95144.png?t=1733226998.142773", "project": "", "author_site": "Huayang Huang, Yu Wu, Qian Wang", "tldr": "", "abstract": "Watermarking generative content serves as a vital tool for authentication, ownership protection, and mitigation of potential misuse. Existing watermarking methods face the challenge of balancing robustness and concealment. They empirically inject a watermark that is both invisible and robust and passively achieve concealment by limiting the strength of the watermark, thus reducing the robustness. In this paper, we propose to explicitly introduce a watermark hiding process to actively achieve concealment, thus allowing the embedding of stronger watermarks. To be specific, we implant a robust watermark in an intermediate diffusion state and then guide the model to hide the watermark in the final generated image. We employ an adversarial optimization algorithm to produce the optimal hiding prompt guiding signal for each watermark. The prompt embedding is optimized to minimize artifacts in the generated image, while the watermark is optimized to achieve maximum strength. The watermark can be verified by reversing the generation process. Experiments on various diffusion models demonstrate the watermark remains verifiable even under significant image tampering and shows superior invisibility compared to other state-of-the-art robust watermarking methods.", "keywords": "Diffusion models;Image watermarking", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Huayang Huang;Yu Wu;Qian Wang", "authorids": "~Huayang_Huang1;~Yu_Wu3;~Qian_Wang13", "gender": "F;M;", "homepage": ";https://yu-wu.net;", "dblp": "275/6583;22/0-11;", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;23SZHUwAAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Huayang_Huang1;~Yu_Wu3;~Qian_Wang13", "aff": "Wuhan University;Wuhan University;", "aff_domain": "whu.edu.cn;whu.edu.cn;", "position": "MS student;Full Professor;", "bibtex": "@inproceedings{\nhuang2024robin,\ntitle={{ROBIN}: Robust and Invisible Watermarks for Diffusion Models with Adversarial Optimization},\nauthor={Huayang Huang and Yu Wu and Qian Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=RvoxlFvnlX}\n}", "github": "", "reviewers": "4gSJ;duRp;Axd1;kSaH", "pdf_size": 2765828, "rating": "6;6;6;7", "confidence": "3;4;4;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;2", "wc_summary": "160;63;91;41", "wc_strengths": "27;37;219;19", "wc_weaknesses": "24;345;323;108", "wc_questions": "163;118;6;2", "wc_limitations": "7;22;1;2", "wc_review": "381;585;640;172", "wc_reply_reviewers": "48;0;24;17", "wc_reply_authors": "40;0;41;0", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;2;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 88.75, 44.79048448052332 ], "wc_strengths_avg": [ 75.5, 83.0948253503189 ], "wc_weaknesses_avg": [ 200.0, 137.471815293172 ], "wc_questions_avg": [ 72.25, 70.09413313537732 ], "wc_limitations_avg": [ 8.0, 8.396427811873332 ], "wc_review_avg": [ 444.5, 184.55961096621328 ], "wc_reply_reviewers_avg": [ 22.25, 17.239127008059313 ], "wc_reply_authors_avg": [ 20.25, 20.253086184579377 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13913562147332877497&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "whu.edu.cn;whu.edu.cn;", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Wuhan University", "aff_unique_dep": "", "aff_unique_url": "http://www.whu.edu.cn/", "aff_unique_abbr": "WHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Zero-Shot Tokenizer Transfer", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95143", "id": "RwBObRsIzC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=RwBObRsIzC", "openreview": "https://openreview.net/forum?id=RwBObRsIzC", "poster": "/media/PosterPDFs/NeurIPS%202024/95143.png?t=1731757410.1436498", "project": "", "author_site": "Benjamin Minixhofer, Edoardo Maria Ponti, Ivan Vuli\u0107", "tldr": "", "abstract": "Language models (LMs) are bound to their tokenizer, which maps raw text to a sequence of vocabulary items (tokens). This restricts their flexibility: for example, LMs trained primarily on English may still perform well in other natural and programming languages, but have vastly decreased efficiency due to their English-centric tokenizer. To mitigate this, we should be able to swap the original LM tokenizer with an arbitrary one, on the fly, without degrading performance. Hence, in this work we define a new problem: Zero-Shot Tokenizer Transfer (ZeTT). The challenge at the core of ZeTT is finding embeddings for the tokens in the vocabulary of the new tokenizer. Since prior heuristics for initializing embeddings often perform at chance level in a ZeTT setting, we propose a new solution: we train a hypernetwork taking a tokenizer as input and predicting the corresponding embeddings. We empirically demonstrate that the hypernetwork generalizes to new tokenizers both with encoder (e.g., XLM-R) and decoder LLMs (e.g., Mistral-7B). Our method comes close to the original models' performance in cross-lingual and coding tasks while markedly reducing the length of the tokenized sequence. We also find that the remaining gap can be quickly closed by continued training on less than 1B tokens. Finally, we show that a ZeTT hypernetwork trained for a base (L)LM can also be applied to fine-tuned variants without extra training. Overall, our results make substantial strides toward detaching LMs from their tokenizer.", "keywords": "tokenization;transfer learning;natural language processing;hypernetworks;zero-shot learning", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Benjamin Minixhofer;Edoardo Ponti;Ivan Vuli\u0107", "authorids": "~Benjamin_Minixhofer1;~Edoardo_Ponti1;~Ivan_Vuli\u01071", "gender": "M;;M", "homepage": "https://github.com/bminixhofer;https://ducdauge.github.io/;https://sites.google.com/site/ivanvulic/", "dblp": "292/4068;178/8829;77/9768", "google_scholar": "P5Z2Pj0AAAAJ;https://scholar.google.ca/citations?user=tklL2q0AAAAJ;ZX8js60AAAAJ", "orcid": ";0000-0002-6308-1050;", "linkedin": ";edoardo-maria-ponti/;ivan-vuli%C4%87-286b4a81/", "or_profile": "~Benjamin_Minixhofer1;~Edoardo_Ponti1;~Ivan_Vuli\u01071", "aff": "Google;NVIDIA;PolyAI Limited", "aff_domain": "google.com;nvidia.com;poly-ai.com", "position": "Student Researcher;Researcher;Senior Scientist", "bibtex": "@inproceedings{\nminixhofer2024zeroshot,\ntitle={Zero-Shot Tokenizer Transfer},\nauthor={Benjamin Minixhofer and Edoardo Ponti and Ivan Vuli{\\'c}},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=RwBObRsIzC}\n}", "github": "", "reviewers": "mdD9;1Y74;EScf", "pdf_size": 1014041, "rating": "6;7;7", "confidence": "4;4;3", "soundness": "3;4;3", "novelty": "3;3;3", "presentation": "4;4;3", "wc_summary": "111;94;55", "wc_strengths": "50;99;24", "wc_weaknesses": "215;52;64", "wc_questions": "141;88;18", "wc_limitations": "29;1;4", "wc_review": "546;334;165", "wc_reply_reviewers": "26;16;9", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 86.66666666666667, 23.442601296689656 ], "wc_strengths_avg": [ 57.666666666666664, 31.09483701338357 ], "wc_weaknesses_avg": [ 110.33333333333333, 74.17247168293338 ], "wc_questions_avg": [ 82.33333333333333, 50.37415563119203 ], "wc_limitations_avg": [ 11.333333333333334, 12.552113589175152 ], "wc_review_avg": [ 348.3333333333333, 155.87245498234196 ], "wc_reply_reviewers_avg": [ 17.0, 6.97614984548545 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10934563750262081083&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "google.com;nvidia.com;poly-ai.com", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Google;NVIDIA;PolyAI Limited", "aff_unique_dep": "Google;NVIDIA Corporation;", "aff_unique_url": "https://www.google.com;https://www.nvidia.com;https://www.poly.ai", "aff_unique_abbr": "Google;NVIDIA;PolyAI", "aff_campus_unique_index": "0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;United Kingdom" }, { "title": "Shaving Weights with Occam's Razor: Bayesian Sparsification for Neural Networks using the Marginal Likelihood", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95142", "id": "RwK0tgfptL", "proceeding": "", "pdf": "https://openreview.net/pdf?id=RwK0tgfptL", "openreview": "https://openreview.net/forum?id=RwK0tgfptL", "poster": "/media/PosterPDFs/NeurIPS%202024/95142.png?t=1733407411.3060954", "project": "", "author_site": "Rayen Dhahri, Alexander Immer, Bertrand Charpentier, Stephan G\u00fcnnemann, Vincent Fortuin", "tldr": "", "abstract": "Neural network sparsification is a promising avenue to save computational time and memory costs, especially in an age where many successful AI models are becoming too large to naively deploy on consumer hardware. While much work has focused on different weight pruning criteria, the overall sparsifiability of the network, i.e., its capacity to be pruned without quality loss, has often been overlooked. We present Sparsifiability via the Marginal likelihood (SpaM), a sparsification framework that highlights the effectiveness of using the Bayesian marginal likelihood in conjunction with sparsity-inducing priors for making neural networks more sparsifiable. Our approach implements an automatic Occam's razor that selects the most sparsifiable model that still explains the data well, both for structured and unstructured sparsification. In addition, we demonstrate that the pre-computed posterior precision from the Laplace approximation can be re-used to define a cheap pruning criterion, which outperforms many existing (more expensive) approaches. We demonstrate the effectiveness of our framework, especially at high sparsity levels, across a range of different neural network architectures and datasets.", "keywords": "Bayesian deep learning;Bayesian model selection;Marginal likelihood;Laplace approximation;Sparsification;Pruning", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Rayen Dhahri;Alexander Immer;Bertrand Charpentier;Stephan G\u00fcnnemann;Vincent Fortuin", "authorids": "~Rayen_Dhahri1;~Alexander_Immer1;~Bertrand_Charpentier2;~Stephan_G\u00fcnnemann1;~Vincent_Fortuin1", "gender": "M;;;M;M", "homepage": ";;https://sharpenb.github.io/;http://www.daml.in.tum.de;https://fortuin.github.io/", "dblp": ";;222/1875;43/3011;218/7489", "google_scholar": ";;0rqI-ycAAAAJ;;https://scholar.google.ch/citations?user=XBlrYTIAAAAJ", "orcid": ";;;;0000-0002-0640-2671", "linkedin": "rayen-dh;;bertrand-charpentier-76995ab6/;;vincent-fortuin-42426b134/", "or_profile": "~Rayen_Dhahri1;~Alexander_Immer1;~Bertrand_Charpentier2;~Stephan_G\u00fcnnemann1;~Vincent_Fortuin1", "aff": "Department of Informatics, Technische Universit\u00e4t M\u00fcnchen;;Technical University Munich;Technical University Munich;Helmholtz AI", "aff_domain": "in.tum.de;;tum.de;tum.de;helmholtz.ai", "position": "MS student;;PhD student;Professor;Principal Researcher", "bibtex": "@inproceedings{\ndhahri2024shaving,\ntitle={Shaving Weights with Occam's Razor: Bayesian Sparsification for Neural Networks using the Marginal Likelihood},\nauthor={Rayen Dhahri and Alexander Immer and Bertrand Charpentier and Stephan G{\\\"u}nnemann and Vincent Fortuin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=RwK0tgfptL}\n}", "github": "", "reviewers": "oKfx;NVyT;rzPD;bZUn", "pdf_size": 1516160, "rating": "5;6;7;8", "confidence": "4;4;3;4", "soundness": "2;4;3;4", "novelty": "2;3;3;4", "presentation": "3;4;3;4", "wc_summary": "46;98;154;117", "wc_strengths": "74;68;205;91", "wc_weaknesses": "353;235;387;143", "wc_questions": "48;38;88;250", "wc_limitations": "25;10;10;47", "wc_review": "546;449;844;648", "wc_reply_reviewers": "36;19;156;181", "wc_reply_authors": "0;0;238;144", "reply_reviewers": "1;1;2;2", "reply_authors": "1;1;2;2", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 103.75, 38.95109112720721 ], "wc_strengths_avg": [ 109.5, 55.778580118177985 ], "wc_weaknesses_avg": [ 279.5, 96.91620091604912 ], "wc_questions_avg": [ 106.0, 85.21736912155878 ], "wc_limitations_avg": [ 23.0, 15.149257407543116 ], "wc_review_avg": [ 621.75, 146.3427056603779 ], "wc_reply_reviewers_avg": [ 98.0, 71.30567999816003 ], "wc_reply_authors_avg": [ 95.5, 101.11750590278619 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.2581988897471611, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=58837257123035477&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "in.tum.de;;tum.de;tum.de;helmholtz.ai", "author_num": 5, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Technische Universit\u00e4t M\u00fcnchen;Technical University of Munich;Helmholtz Association of German Research Centres", "aff_unique_dep": "Department of Informatics;;Helmholtz AI", "aff_unique_url": "https://www.tum.de;https://www.tum.de;https://www.helmholtz-ai.de", "aff_unique_abbr": "TUM;TUM;Helmholtz AI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "title": "Reparameterized Multi-Resolution Convolutions for Long Sequence Modelling", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95141", "id": "RwgNbIpCpk", "proceeding": "", "pdf": "https://openreview.net/pdf?id=RwgNbIpCpk", "openreview": "https://openreview.net/forum?id=RwgNbIpCpk", "poster": "/media/PosterPDFs/NeurIPS%202024/95141.png?t=1732098053.4906762", "project": "", "author_site": "Jake Cunningham, Giorgio Giannone, Mingtian Zhang, Marc Deisenroth", "tldr": "", "abstract": "Global convolutions have shown increasing promise as powerful general-purpose sequence models. However, training long convolutions is challenging, and kernel parameterizations must be able to learn long-range dependencies without overfitting. This work introduces reparameterized multi-resolution convolutions ($\\texttt{MRConv}$), a novel approach to parameterizing global convolutional kernels for long-sequence modeling. By leveraging multi-resolution convolutions, incorporating structural reparameterization and introducing learnable kernel decay, $\\texttt{MRConv}$ learns expressive long-range kernels that perform well across various data modalities. Our experiments demonstrate state-of-the-art performance on the Long Range Arena, Sequential CIFAR, and Speech Commands tasks among convolution models and linear-time transformers. Moreover, we report improved performance on ImageNet classification by replacing 2D convolutions with 1D $\\texttt{MRConv}$ layers.", "keywords": "Sequence Modelling;Convolutions;Structural Reparameterization;Self Attention;State Space Models", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/b31a082bbd7b4c4b945895ba10a0c90591d33abb.zip", "author": "Harry Jake Cunningham;Giorgio Giannone;Mingtian Zhang;Marc Peter Deisenroth", "authorids": "~Harry_Jake_Cunningham1;~Giorgio_Giannone1;~Mingtian_Zhang1;~Marc_Peter_Deisenroth1", "gender": "M;;M;", "homepage": "https://hjakecunningham.github.io/;;http://tomo.wiki;", "dblp": ";;230/8340;", "google_scholar": "GEWN-PsAAAAJ;;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Harry_Jake_Cunningham1;~Giorgio_Giannone1;~Mingtian_Zhang1;~Marc_Peter_Deisenroth1", "aff": "University College London, University of London;;;", "aff_domain": "ucl.ac.uk;;;", "position": "PhD student;;;", "bibtex": "@inproceedings{\ncunningham2024reparameterized,\ntitle={Reparameterized Multi-Resolution Convolutions for Long Sequence Modelling},\nauthor={Harry Jake Cunningham and Giorgio Giannone and Mingtian Zhang and Marc Peter Deisenroth},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=RwgNbIpCpk}\n}", "github": "", "reviewers": "8iUW;JNPq;pbnE;CE6k;wDz6", "pdf_size": 863228, "rating": "5;6;6;7;7", "confidence": "5;5;4;3;4", "soundness": "3;3;4;3;3", "novelty": "3;3;3;3;3", "presentation": "2;3;3;3;3", "wc_summary": "83;76;68;47;76", "wc_strengths": "88;89;55;51;32", "wc_weaknesses": "385;817;104;45;293", "wc_questions": "135;13;80;2;346", "wc_limitations": "72;19;7;9;105", "wc_review": "763;1014;314;154;852", "wc_reply_reviewers": "386;620;0;0;136", "wc_reply_authors": "1296;720;0;0;0", "reply_reviewers": "2;2;0;0;1", "reply_authors": "4;2;1;1;1", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 70.0, 12.441864811996632 ], "wc_strengths_avg": [ 63.0, 22.22611077089287 ], "wc_weaknesses_avg": [ 328.8, 273.44352250510525 ], "wc_questions_avg": [ 115.2, 125.02703707598609 ], "wc_limitations_avg": [ 42.4, 39.271363612688575 ], "wc_review_avg": [ 619.4, 328.724565556029 ], "wc_reply_reviewers_avg": [ 228.4, 241.27129957788185 ], "wc_reply_authors_avg": [ 403.2, 526.3392062159154 ], "reply_reviewers_avg": [ 1.0, 0.8944271909999159 ], "reply_authors_avg": [ 1.8, 1.1661903789690604 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.7857142857142858, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4547619669920670761&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "ucl.ac.uk;;;", "author_num": 4, "aff_unique_index": "0", "aff_unique_norm": "University College London", "aff_unique_dep": "", "aff_unique_url": "https://www.ucl.ac.uk", "aff_unique_abbr": "UCL", "aff_country_unique_index": "0", "aff_country_unique": "United Kingdom" }, { "title": "Statistical and Geometrical properties of the Kernel Kullback-Leibler divergence", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95140", "id": "RxQoIekEa2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=RxQoIekEa2", "openreview": "https://openreview.net/forum?id=RxQoIekEa2", "poster": "/media/PosterPDFs/NeurIPS%202024/95140.png?t=1731707970.3870184", "project": "", "author_site": "Anna Korba, Francis Bach, Cl\u00e9mentine CHAZAL", "tldr": "", "abstract": "In this paper, we study the statistical and geometrical properties of the Kullback-Leibler divergence with kernel covariance operators (KKL) introduced by [Bach, 2022, Information Theory with Kernel Methods]. Unlike the classical Kullback-Leibler (KL) divergence that involves density ratios, the KKL compares probability distributions through covariance operators (embeddings) in a reproducible kernel Hilbert space (RKHS), and compute the Kullback-Leibler quantum divergence. \nThis novel divergence hence shares parallel but different aspects with both the standard Kullback-Leibler between probability distributions and kernel embeddings metrics such as the maximum mean discrepancy. \nA limitation faced with the original KKL divergence is its inability to be defined for distributions with disjoint supports. To solve this problem, we propose in this paper a regularised variant that guarantees that divergence is well defined for all distributions. We derive bounds that quantify the deviation of the regularised KKL to the original one, as well as concentration bounds. \nIn addition, we provide a closed-form expression for the regularised KKL, specifically applicable when the distributions consist of finite sets of points, which makes it implementable. \nFurthermore, we derive a Wasserstein gradient descent scheme of the KKL divergence in the case of discrete distributions, and study empirically its properties to transport a set of points to a target distribution.", "keywords": "kernels; optimisation; optimal transport", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Anna Korba;Francis Bach;Cl\u00e9mentine Chazal", "authorids": "~Anna_Korba2;~Francis_Bach1;~Cl\u00e9mentine_Chazal1", "gender": ";M;", "homepage": ";http://www.di.ens.fr/~fbach;", "dblp": "182/8959.html;b/FrancisRBach;", "google_scholar": "https://scholar.google.fr/citations?user=dbH6E3kAAAAJ;https://scholar.google.fr/citations?user=6PJWcFEAAAAJ;", "orcid": ";;", "linkedin": ";;cl%C3%A9mentine-chazal-76a702218/", "or_profile": "~Anna_Korba2;~Francis_Bach1;~Cl\u00e9mentine_Chazal1", "aff": "Ensae ParisTech;Ecole Normale Superieure;Ecole Nationale de la Statistique et de l'Administration Economique", "aff_domain": "ensae.fr;ens.fr;ensae.fr", "position": "Assistant Professor;Faculty;Intern", "bibtex": "@inproceedings{\nkorba2024statistical,\ntitle={Statistical and Geometrical properties of the Kernel Kullback-Leibler divergence},\nauthor={Anna Korba and Francis Bach and Cl{\\'e}mentine Chazal},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=RxQoIekEa2}\n}", "github": "", "reviewers": "ffGL;19Wg;6zq3;r7ey", "pdf_size": 952851, "rating": "5;5;6;6", "confidence": "4;4;4;5", "soundness": "3;2;4;3", "novelty": "3;2;3;3", "presentation": "3;3;3;4", "wc_summary": "80;159;56;96", "wc_strengths": "52;40;26;12", "wc_weaknesses": "102;140;28;13", "wc_questions": "103;37;123;60", "wc_limitations": "8;1;1;1", "wc_review": "345;377;234;182", "wc_reply_reviewers": "12;0;14;33", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 97.75, 38.120696478422325 ], "wc_strengths_avg": [ 32.5, 14.99166435056495 ], "wc_weaknesses_avg": [ 70.75, 52.284677487768825 ], "wc_questions_avg": [ 80.75, 34.00275724114149 ], "wc_limitations_avg": [ 2.75, 3.031088913245535 ], "wc_review_avg": [ 284.5, 79.48742038838598 ], "wc_reply_reviewers_avg": [ 14.75, 11.818946653572814 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:znYLFEJQY7oJ:scholar.google.com/&scioq=Statistical+and+Geometrical+properties+of+the+Kernel+Kullback-Leibler+divergence&hl=en&as_sdt=0,44", "gs_version_total": 2, "email": "ensae.fr;ens.fr;ensae.fr", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "ENSAE ParisTech;Ecole Normale Superieure;Ecole Nationale de la Statistique et de l'Administration Economique", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ensae.fr;https://www.ens.fr;https://ensae.fr", "aff_unique_abbr": "Ensae;ENS;ENSAE", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "France" }, { "title": "Computing the Bias of Constant-step Stochastic Approximation with Markovian Noise", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95139", "id": "RxXdokK2qz", "proceeding": "", "pdf": "https://openreview.net/pdf?id=RxXdokK2qz", "openreview": "https://openreview.net/forum?id=RxXdokK2qz", "poster": "", "project": "", "author_site": "Sebastian Allmeier, Nicolas Gast", "tldr": "", "abstract": "We study stochastic approximation algorithms with Markovian noise and constant step-size $\\alpha$. We develop a method based on infinitesimal generator comparisons to study the bias of the algorithm, which is the expected difference between $\\theta_n$ ---the value at iteration $n$--- and $\\theta^*$ ---the unique equilibrium of the corresponding ODE. We show that, under some smoothness conditions, this bias is of order $O(\\alpha)$. Furthermore, we show that the time-averaged bias is equal to $\\alpha V + O(\\alpha^2)$, where $V$ is a constant characterized by a Lyapunov equation, showing that $E[\\bar{\\theta}_n] \\approx \\theta^*+V\\alpha + O(\\alpha^2)$, where $\\bar{\\theta}_n$ is the Polyak-Ruppert average. We also show that $\\bar{\\theta}_n$ converges with high probability around $\\theta^*+\\alpha V$. We illustrate how to combine this with Richardson-Romberg extrapolation to derive an iterative scheme with a bias of order $O(\\alpha^2)$.", "keywords": "Stochastic approximation; Polyak-Ruppert averaging; Stein's method", "primary_area": "optimization", "supplementary_material": "", "author": "Sebastian Allmeier;Nicolas Gast", "authorids": "~Sebastian_Allmeier1;~Nicolas_Gast1", "gender": "M;M", "homepage": "https://sebastianallmeier.github.io/;http://polaris.imag.fr/nicolas.gast/", "dblp": ";64/4367", "google_scholar": ";https://scholar.google.fr/citations?user=KbEN-HoAAAAJ", "orcid": "0000-0003-4629-6348;0000-0001-6884-8698", "linkedin": ";", "or_profile": "~Sebastian_Allmeier1;~Nicolas_Gast1", "aff": "INRIA;INRIA", "aff_domain": "inria.fr;inria.fr", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nallmeier2024computing,\ntitle={Computing the Bias of Constant-step Stochastic Approximation with Markovian Noise},\nauthor={Sebastian Allmeier and Nicolas Gast},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=RxXdokK2qz}\n}", "github": "", "reviewers": "Hzq1;xFdD;Mbpd", "pdf_size": 1049769, "rating": "5;5;7", "confidence": "4;3;4", "soundness": "3;2;3", "novelty": "3;2;3", "presentation": "3;2;2", "wc_summary": "55;82;73", "wc_strengths": "82;7;81", "wc_weaknesses": "45;38;206", "wc_questions": "30;63;164", "wc_limitations": "23;1;13", "wc_review": "235;191;537", "wc_reply_reviewers": "17;12;131", "wc_reply_authors": "84;162;145", "reply_reviewers": "1;1;1", "reply_authors": "2;1;2", "rating_avg": [ 5.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 70.0, 11.224972160321824 ], "wc_strengths_avg": [ 56.666666666666664, 35.122009560324926 ], "wc_weaknesses_avg": [ 96.33333333333333, 77.59868269214083 ], "wc_questions_avg": [ 85.66666666666667, 57.00487308604024 ], "wc_limitations_avg": [ 12.333333333333334, 8.993825042154695 ], "wc_review_avg": [ 321.0, 153.7877324973181 ], "wc_reply_reviewers_avg": [ 53.333333333333336, 54.9565484926248 ], "wc_reply_authors_avg": [ 130.33333333333334, 33.48963355361709 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.49999999999999983, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2428120573210560777&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "inria.fr;inria.fr", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "INRIA", "aff_unique_dep": "", "aff_unique_url": "https://www.inria.fr", "aff_unique_abbr": "INRIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "France" }, { "title": "Visual Decoding and Reconstruction via EEG Embeddings with Guided Diffusion", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95138", "id": "RxkcroC8qP", "proceeding": "", "pdf": "https://openreview.net/pdf?id=RxkcroC8qP", "openreview": "https://openreview.net/forum?id=RxkcroC8qP", "poster": "/media/PosterPDFs/NeurIPS%202024/95138.png?t=1731250024.469363", "project": "", "author_site": "Dongyang Li, Chen Wei, Shiying Li, Jiachen Zou, Quanying Liu", "tldr": "", "abstract": "How to decode human vision through neural signals has attracted a long-standing interest in neuroscience and machine learning. Modern contrastive learning and generative models improved the performance of visual decoding and reconstruction based on functional Magnetic Resonance Imaging (fMRI). However, the high cost and low temporal resolution of fMRI limit their applications in brain-computer interfaces (BCIs), prompting a high need for visual decoding based on electroencephalography (EEG). In this study, we present an end-to-end EEG-based visual reconstruction zero-shot framework, consisting of a tailored brain encoder, called the Adaptive Thinking Mapper (ATM), which projects neural signals from different sources into the shared subspace as the clip embedding, and a two-stage multi-pipe EEG-to-image generation strategy. In stage one, EEG is embedded to align the high-level clip embedding, and then the prior diffusion model refines EEG embedding into image priors. A blurry image also decoded from EEG for maintaining the low-level feature. In stage two, we input both the high-level clip embedding, the blurry image and caption from EEG latent to a pre-trained diffusion model. Furthermore, we analyzed the impacts of different time windows and brain regions on decoding and reconstruction. The versatility of our framework is demonstrated in the magnetoencephalogram (MEG) data modality. The experimental results indicate that our EEG-based visual zero-shot framework achieves SOTA performance in classification, retrieval and reconstruction, highlighting the portability, low cost, and high temporal resolution of EEG, enabling a wide range of BCI applications. Our code is available at https://github.com/ncclab-sustech/EEG_Image_decode.", "keywords": "EEG decoding;Visual reconstruction;BCI;Visual neural decoding", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "", "author": "Dongyang Li;Chen Wei;Shiying Li;Jiachen Zou;Quanying Liu", "authorids": "~Dongyang_Li4;~Chen_Wei7;~Shiying_Li5;~Jiachen_Zou1;~Quanying_Liu1", "gender": "M;M;F;F;F", "homepage": "https://dongyangli-del.github.io/;;https://phyever.github.io/;;https://github.com/1275673085", "dblp": "0000-0001-6197-5950;181/2831-6;329/4128;;", "google_scholar": "4VTOeU4AAAAJ;https://scholar.google.com/citations?hl=en;MQZEqo0AAAAJ;https://scholar.google.com/citations?hl=en;", "orcid": "0009-0007-6841-0929;0000-0003-2999-2771;;0000-0002-2501-7656;", "linkedin": "dongyang-li-43a4b5320/;;;;", "or_profile": "~Dongyang_Li4;~Chen_Wei7;~Jiachen_Zou1;~Quanying_Liu1;~Lishiying1", "aff": "Southern University of Science and Technology;Southern University of Science and Technology;Southern University of Science and Technology;Southern University of Science and Technology;Southern University of Science and Technology", "aff_domain": "sustech.edu;sustech.edu.cn;sustech.edu.cn;sustech.edu.cn;mail.sustech.edu.cn", "position": "PhD student;PhD student;Undergrad student;Assistant Professor;Undergrad student", "bibtex": "@inproceedings{\nli2024visual,\ntitle={Visual Decoding and Reconstruction via {EEG} Embeddings with Guided Diffusion},\nauthor={Dongyang Li and Chen Wei and Shiying Li and Jiachen Zou and Quanying Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=RxkcroC8qP}\n}", "github": "", "reviewers": "4xL4;gPm4;zVcP;asin", "pdf_size": 36796527, "rating": "3;5;6;6", "confidence": "4;4;4;3", "soundness": "1;2;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "41;48;48;55", "wc_strengths": "12;58;15;18", "wc_weaknesses": "321;247;227;130", "wc_questions": "8;151;83;22", "wc_limitations": "1;10;7;27", "wc_review": "383;514;380;252", "wc_reply_reviewers": "0;57;12;0", "wc_reply_authors": "724;659;491;0", "reply_reviewers": "0;1;1;0", "reply_authors": "2;4;2;1", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 48.0, 4.949747468305833 ], "wc_strengths_avg": [ 25.75, 18.73999733191016 ], "wc_weaknesses_avg": [ 231.25, 68.14093850248909 ], "wc_questions_avg": [ 66.0, 56.59946996218251 ], "wc_limitations_avg": [ 11.25, 9.65336728815391 ], "wc_review_avg": [ 382.25, 92.64009661048503 ], "wc_reply_reviewers_avg": [ 17.25, 23.466731770743024 ], "wc_reply_authors_avg": [ 468.5, 283.5352711745048 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.4714045207910316, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1995555660993711662&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "sustech.edu;sustech.edu.cn;sustech.edu.cn;sustech.edu.cn;mail.sustech.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Southern University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.sustech.edu.cn", "aff_unique_abbr": "SUSTech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Leveraging Environment Interaction for Automated PDDL Translation and Planning with Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95137", "id": "RzlCqnncQv", "proceeding": "", "pdf": "https://openreview.net/pdf?id=RzlCqnncQv", "openreview": "https://openreview.net/forum?id=RzlCqnncQv", "poster": "", "project": "", "author_site": "Sadegh Mahdavi, Raquel Aoki, Keyi Tang, Yanshuai Cao", "tldr": "", "abstract": "Large Language Models (LLMs) have shown remarkable performance in various natural language tasks, but they often struggle with planning problems that require structured reasoning. To address this limitation, the conversion of planning problems into the Planning Domain Definition Language (PDDL) has been proposed as a potential solution, enabling the use of automated planners. However, generating accurate PDDL files typically demands human inputs or correction, which can be time-consuming and costly. In this paper, we propose a novel approach that leverages LLMs and environment feedback to automatically generate PDDL domain and problem description files without the need for human intervention. Our method introduces an iterative refinement process that generates multiple problem PDDL candidates and progressively refines the domain PDDL based on feedback obtained from interacting with the environment. To guide the refinement process, we develop an Exploration Walk (EW) metric, which provides rich feedback signals for LLMs to update the PDDL file. We evaluate our approach on $10$ PDDL environments. We achieve an average task solve rate of 66\\% compared to a 29\\% solve rate by GPT-4's intrinsic planning with chain-of-thought prompting. Our work enables the automated modeling of planning environments using LLMs and environment feedback, eliminating the need for human intervention in the PDDL translation process and paving the way for more reliable LLM agents in challenging problems. Our code is available at https://github.com/BorealisAI/llm-pddl-planning", "keywords": "Planning;PDDL;Large Language Models", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Sadegh Mahdavi;Raquel Aoki;Keyi Tang;Yanshuai Cao", "authorids": "~Sadegh_Mahdavi1;~Raquel_Aoki1;~Keyi_Tang1;~Yanshuai_Cao1", "gender": "M;M;;F", "homepage": "http://smahdavi.com;;;https://sites.google.com/view/raquelaoki/home", "dblp": "332/2137;;;261/3697", "google_scholar": ";;https://scholar.google.ca/citations?user=RTVRTSsAAAAJ;https://scholar.google.ca/citations?hl=en", "orcid": ";;;", "linkedin": ";keyi-tang/;yanshuai-cao-b59878a4/;", "or_profile": "~Sadegh_Mahdavi1;~Keyi_Tang1;~Yanshuai_Cao1;~Raquel_Y._S._Aoki1", "aff": "University of British Columbia;Borealis AI;Borealis AI;Royal Bank of Canada (RBC)", "aff_domain": "ubc.ca;borealisai.com;borealisai.com;borealisai.com", "position": "PhD student;Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\nmahdavi2024leveraging,\ntitle={Leveraging Environment Interaction for Automated {PDDL} Translation and Planning with Large Language Models},\nauthor={Sadegh Mahdavi and Raquel Aoki and Keyi Tang and Yanshuai Cao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=RzlCqnncQv}\n}", "github": "", "reviewers": "cVUW;wCSx;iKF8;zxRN", "pdf_size": 715471, "rating": "5;6;6;7", "confidence": "4;5;5;4", "soundness": "3;3;3;3", "novelty": "3;3;2;3", "presentation": "3;2;3;4", "wc_summary": "63;63;73;90", "wc_strengths": "25;114;55;61", "wc_weaknesses": "170;402;132;459", "wc_questions": "336;94;38;4", "wc_limitations": "56;4;7;4", "wc_review": "650;677;305;618", "wc_reply_reviewers": "244;343;36;26", "wc_reply_authors": "1344;136;0;0", "reply_reviewers": "3;2;1;1", "reply_authors": "5;3;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 72.25, 11.031205736455105 ], "wc_strengths_avg": [ 63.75, 32.057565409743766 ], "wc_weaknesses_avg": [ 290.75, 141.83330885232849 ], "wc_questions_avg": [ 118.0, 129.89996150884727 ], "wc_limitations_avg": [ 17.75, 22.117583502724706 ], "wc_review_avg": [ 562.5, 150.1274458585105 ], "wc_reply_reviewers_avg": [ 162.25, 135.88299194527622 ], "wc_reply_authors_avg": [ 370.0, 565.0734465536317 ], "reply_reviewers_avg": [ 1.75, 0.82915619758885 ], "reply_authors_avg": [ 2.5, 1.6583123951777 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18436035598749331992&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "ubc.ca;borealisai.com;borealisai.com;borealisai.com", "author_num": 4, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "University of British Columbia;Borealis AI;Royal Bank of Canada", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ubc.ca;https://www.borealisai.com;https://www.rbc.com", "aff_unique_abbr": "UBC;Borealis AI;RBC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Canada" }, { "title": "Gaussian Approximation and Multiplier Bootstrap for Polyak-Ruppert Averaged Linear Stochastic Approximation with Applications to TD Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95136", "id": "S0Ci1AsJL5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=S0Ci1AsJL5", "openreview": "https://openreview.net/forum?id=S0Ci1AsJL5", "poster": "/media/PosterPDFs/NeurIPS%202024/95136.png?t=1733991782.6700387", "project": "", "author_site": "Sergey Samsonov, Eric Moulines, Qi-Man Shao, Zhuo-Song Zhang, Alexey Naumov", "tldr": "", "abstract": "In this paper, we obtain the Berry\u2013Esseen bound for multivariate normal approximation for the Polyak-Ruppert averaged iterates of the linear stochastic approximation (LSA) algorithm with decreasing step size. Moreover, we prove the non-asymptotic validity of the confidence intervals for parameter estimation with LSA based on multiplier bootstrap. This procedure updates the LSA estimate together with a set of randomly perturbed LSA estimates upon the arrival of subsequent observations. We illustrate our findings in the setting of temporal difference learning with linear function approximation.", "keywords": "Linear Stochastic Approximation;Normal Approximation;Bootstrap Validity", "primary_area": "probabilistic_methods", "supplementary_material": "/attachment/82a1cee0f0b8d0cd2a1a0039810c6c52e9887739.zip", "author": "Sergey Samsonov;Eric Moulines;Qi-Man Shao;Zhuo-Song Zhang;Alexey Naumov", "authorids": "~Sergey_Samsonov1;~Eric_Moulines1;~Qi-Man_Shao1;~Zhuo-Song_Zhang1;~Alexey_Naumov1", "gender": "M;M;M;M;M", "homepage": "https://www.hse.ru/org/persons/219484540;;https://www.sustech.edu.cn/en/faculties/shaoqiman.html;https://zhuosongz.github.io;https://www.hse.ru/en/staff/anaumov", "dblp": "23/8962;54/2358;09/8506;378/6926;196/2848", "google_scholar": "https://scholar.google.ru/citations?user=8BwDmyMAAAAJ;https://scholar.google.fr/citations?user=_XE1LvQAAAAJ;https://scholar.google.com.hk/scholar?hl=zh-CN;;5723KoYAAAAJ", "orcid": ";0000-0002-2058-0693;;;", "linkedin": ";;;;", "or_profile": "~Sergey_Samsonov1;~Eric_Moulines1;~Qi-Man_Shao1;~Zhuo-Song_Zhang1;~Alexey_Naumov1", "aff": "Higher School of Economics;Ecole polytechnique;Southern University of Science and Technology;Southern University of Science and Technology;Higher School of Economics", "aff_domain": "hse.ru;polytechnique.edu;sustech.edu.cn;sustech.edu.cn;hse.ru", "position": "PhD student;Full Professor;Full Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nsamsonov2024gaussian,\ntitle={Gaussian Approximation and Multiplier Bootstrap for Polyak-Ruppert Averaged Linear Stochastic Approximation with Applications to {TD} Learning},\nauthor={Sergey Samsonov and Eric Moulines and Qi-Man Shao and Zhuo-Song Zhang and Alexey Naumov},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=S0Ci1AsJL5}\n}", "github": "", "reviewers": "uNZU;DkFj;ew5L", "pdf_size": 660478, "rating": "5;6;7", "confidence": "4;3;3", "soundness": "3;4;2", "novelty": "2;3;3", "presentation": "3;2;2", "wc_summary": "81;472;54", "wc_strengths": "61;137;71", "wc_weaknesses": "86;108;197", "wc_questions": "3;98;56", "wc_limitations": "195;13;15", "wc_review": "426;828;393", "wc_reply_reviewers": "0;82;65", "wc_reply_authors": "36;68;27", "reply_reviewers": "0;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 202.33333333333334, 191.00145432837823 ], "wc_strengths_avg": [ 89.66666666666667, 33.717782977071444 ], "wc_weaknesses_avg": [ 130.33333333333334, 47.98842453018112 ], "wc_questions_avg": [ 52.333333333333336, 38.870154217456985 ], "wc_limitations_avg": [ 74.33333333333333, 85.32812484104458 ], "wc_review_avg": [ 549.0, 197.74225648555748 ], "wc_reply_reviewers_avg": [ 49.0, 35.33647784749729 ], "wc_reply_authors_avg": [ 43.666666666666664, 17.594190960528863 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5446570101064966132&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "hse.ru;polytechnique.edu;sustech.edu.cn;sustech.edu.cn;hse.ru", "author_num": 5, "aff_unique_index": "0;1;2;2;0", "aff_unique_norm": "Higher School of Economics;Ecole Polytechnique;Southern University of Science and Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.hse.ru;https://www.polytechnique.edu;https://www.sustech.edu.cn", "aff_unique_abbr": "HSE;X;SUSTech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;2;0", "aff_country_unique": "Russian Federation;France;China" }, { "title": "RankRAG: Unifying Context Ranking with Retrieval-Augmented Generation in LLMs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95135", "id": "S1fc92uemC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=S1fc92uemC", "openreview": "https://openreview.net/forum?id=S1fc92uemC", "poster": "", "project": "", "author_site": "Yue Yu, Wei Ping, Zihan Liu, Boxin Wang, Jiaxuan You, Chao Zhang, Mohammad Shoeybi, Bryan Catanzaro", "tldr": "", "abstract": "Large language models (LLMs) typically utilize the top-k contexts from a retriever in retrieval-augmented generation (RAG). In this work, we propose a novel method called RankRAG, which instruction-tunes a single LLM for both context ranking and answer generation in RAG. In particular, the instruction-tuned LLMs work surprisingly well by adding a small fraction of ranking data into the training blend, and outperform existing expert ranking models, including the same LLM exclusively fine-tuned on a large amount of ranking data. For generation, we compare our model with many strong baselines, including ChatQA-1.5, an open-sourced model with the state-of-the-art performance on RAG benchmarks. Specifically, our Llama3-RankRAG-8B and Llama3-RankRAG-70B significantly outperform Llama3-ChatQA-1.5-8B and Llama3-ChatQA-1.5-70B, respectively, on nine general knowledge-intensive benchmarks for RAG. In addition, it also performs comparably to GPT-4 on five RAG benchmarks in the biomedical domain without instruction fine-tuning on biomedical data, demonstrating its superb capability for generalization to new domains.", "keywords": "LLM;Retrieval-augmented Generation;Ranking", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Yue Yu;Wei Ping;Zihan Liu;Boxin Wang;Jiaxuan You;Chao Zhang;Mohammad Shoeybi;Bryan Catanzaro", "authorids": "~Yue_Yu2;~Wei_Ping1;~Zihan_Liu2;~Boxin_Wang1;~Jiaxuan_You2;~Chao_Zhang15;~Mohammad_Shoeybi1;~Bryan_Catanzaro1", "gender": "M;M;M;;;M;M;M", "homepage": "https://yueyu1030.github.io;https://wpingnet.github.io/;https://zliucr.github.io;https://wbx.life;http://chaozhang.org/;;https://ctnzr.io;https://cs.stanford.edu/~jiaxuan/", "dblp": ";08/8399.html;46/9231;236/6319;94/3019-14;53/9742;14/4826;192/4727", "google_scholar": "zQ3Jh6UAAAAJ;6gKEYRgAAAAJ;LPabcsYAAAAJ;YOf2ATIAAAAJ;https://scholar.google.com/citations?hl=en;62ElavIAAAAJ;UZ6kI2AAAAAJ;NDbMl7oAAAAJ", "orcid": "0000-0002-3683-5208;;;;0000-0003-3009-598X;;0000-0003-0034-7728;", "linkedin": ";wei-ping/;;;;shoeybi/;bryancatanzaro/;jiaxuan-you-5859b37b/", "or_profile": "~Yue_Yu2;~Wei_Ping1;~Zihan_Liu2;~Boxin_Wang1;~Chao_Zhang15;~Mohammad_Shoeybi1;~Bryan_Catanzaro1;~Jiaxuan_You1", "aff": "Georgia Institute of Technology;NVIDIA;NVIDIA;NVIDIA;Georgia Institute of Technology;NVIDIA;NVIDIA;NVIDIA", "aff_domain": "gatech.edu;nvidia.com;nvidia.com;nvidia.com;gatech.edu;nvidia.com;nvidia.com;nvidia.com", "position": "PhD student;Principal Researcher;Researcher;Senior Research Scientist;Assistant Professor;Director of Applied Resesrch;Vice President;Researcher", "bibtex": "@inproceedings{\nyu2024rankrag,\ntitle={Rank{RAG}: Unifying Context Ranking with Retrieval-Augmented Generation in {LLM}s},\nauthor={Yue Yu and Wei Ping and Zihan Liu and Boxin Wang and Jiaxuan You and Chao Zhang and Mohammad Shoeybi and Bryan Catanzaro},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=S1fc92uemC}\n}", "github": "", "reviewers": "y5n5;LbZs;ze6x", "pdf_size": 774061, "rating": "5;8;8", "confidence": "4;4;5", "soundness": "2;4;4", "novelty": "2;4;3", "presentation": "3;4;3", "wc_summary": "85;117;126", "wc_strengths": "30;79;70", "wc_weaknesses": "222;81;483", "wc_questions": "54;1;122", "wc_limitations": "5;21;22", "wc_review": "396;299;823", "wc_reply_reviewers": "0;14;92", "wc_reply_authors": "63;18;674", "reply_reviewers": "0;1;2", "reply_authors": "2;2;4", "rating_avg": [ 7.0, 1.4142135623730951 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.9428090415820634 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 109.33333333333333, 17.594190960528863 ], "wc_strengths_avg": [ 59.666666666666664, 21.296843793284385 ], "wc_weaknesses_avg": [ 262.0, 166.53528154718447 ], "wc_questions_avg": [ 59.0, 49.52440475832765 ], "wc_limitations_avg": [ 16.0, 7.788880963698615 ], "wc_review_avg": [ 506.0, 227.6239589029825 ], "wc_reply_reviewers_avg": [ 35.333333333333336, 40.4749579644281 ], "wc_reply_authors_avg": [ 251.66666666666666, 299.1993018411344 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 2.6666666666666665, 0.9428090415820634 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14046685579988457849&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "gatech.edu;nvidia.com;nvidia.com;nvidia.com;gatech.edu;nvidia.com;nvidia.com;nvidia.com", "author_num": 8, "aff_unique_index": "0;1;1;1;0;1;1;1", "aff_unique_norm": "Georgia Institute of Technology;NVIDIA", "aff_unique_dep": ";NVIDIA Corporation", "aff_unique_url": "https://www.gatech.edu;https://www.nvidia.com", "aff_unique_abbr": "Georgia Tech;NVIDIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Identification and Estimation of the Bi-Directional MR with Some Invalid Instruments", "status": "Oral", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95134", "id": "S2P6KPLtm8", "proceeding": "", "pdf": "https://openreview.net/pdf?id=S2P6KPLtm8", "openreview": "https://openreview.net/forum?id=S2P6KPLtm8", "poster": "/media/PosterPDFs/NeurIPS%202024/95134.png?t=1731492045.459363", "project": "", "author_site": "Feng Xie, Zhen Yao, Lin Xie, Yan Zeng, Zhi Geng", "tldr": "", "abstract": "We consider the challenging problem of estimating causal effects from purely observational data in the bi-directional Mendelian randomization (MR), where some invalid instruments, as well as unmeasured confounding, usually exist. \nTo address this problem, most existing methods attempt to find proper valid instrumental variables (IVs) for the target causal effect by expert knowledge or by assuming that the causal model is a one-directional MR model. \nAs such, in this paper, we first theoretically investigate the identification of the bi-directional MR from observational data. In particular, we provide necessary and sufficient conditions under which valid IV sets are correctly identified such that the bi-directional MR model is identifiable, including the causal directions of a pair of phenotypes (i.e., the treatment and outcome).\nMoreover, based on the identification theory, we develop a cluster fusion-like method to discover valid IV sets and estimate the causal effects of interest.\nWe theoretically demonstrate the correctness of the proposed algorithm.\nExperimental results show the effectiveness of our method for estimating causal effects in both one-directional and bi-directional MR models.", "keywords": "Mendelian Randomization;Instrumental Variable;Causal Effect;Testability", "primary_area": "causal_inference", "supplementary_material": "/attachment/8207d9052ff1552a78777d6048a9ca15ad959ae3.zip", "author": "Feng Xie;Zhen Yao;Lin Xie;Yan Zeng;Zhi Geng", "authorids": "~Feng_Xie1;~Zhen_Yao3;~Lin_Xie2;~Yan_Zeng2;~Zhi_Geng1", "gender": "M;M;F;;M", "homepage": "https://fengxie.site/;https://github.com/wh1tetoo;https://linxie.cn;https://scholar.google.com/citations?user=XyxLHCAAAAAJ&hl=zh-CN;https://stxy.btbu.edu.cn/szdw/bssds/34339356074b408c8650309f05f24558.htm", "dblp": "11/4605-2;;;83/4665-2;", "google_scholar": "stLFCtQAAAAJ;;;XyxLHCAAAAAJ;", "orcid": "0000-0001-7229-3955;;;0000-0001-7721-2560;", "linkedin": ";;;;", "or_profile": "~Feng_Xie1;~Zhen_Yao3;~Lin_Xie2;~Yan_Zeng2;~Zhi_Geng1", "aff": "Beijing Technology and Business University;Beijing Technology and Business University;Beijing Technology and Business University;Beijing Technology and Business University;School of mathematical Science, Peking University, Peking University", "aff_domain": "btbu.edu.cn;btbu.edu.cn;btbu.edu;btbu.edu.cn;math.pku.edu.cn", "position": "Associate Professor;MS student;Undergrad student;Lecturer;Full Professor", "bibtex": "@inproceedings{\nxie2024identification,\ntitle={Identification and Estimation of the Bi-Directional {MR} with Some Invalid Instruments},\nauthor={Feng Xie and Zhen Yao and Lin Xie and Yan Zeng and Zhi Geng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=S2P6KPLtm8}\n}", "github": "", "reviewers": "UQEn;XbcF;St5u;Qa6w", "pdf_size": 332383, "rating": "6;6;7;7", "confidence": "3;4;4;4", "soundness": "3;3;4;3", "novelty": "3;3;4;3", "presentation": "3;3;4;4", "wc_summary": "76;81;26;136", "wc_strengths": "80;60;47;31", "wc_weaknesses": "43;81;51;98", "wc_questions": "181;122;3;120", "wc_limitations": "37;2;9;27", "wc_review": "417;346;136;412", "wc_reply_reviewers": "34;16;11;24", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 79.75, 38.95109112720721 ], "wc_strengths_avg": [ 54.5, 17.95132307101624 ], "wc_weaknesses_avg": [ 68.25, 22.26404051379713 ], "wc_questions_avg": [ 106.5, 64.58521502635104 ], "wc_limitations_avg": [ 18.75, 13.935117509371782 ], "wc_review_avg": [ 327.75, 114.1980188094347 ], "wc_reply_reviewers_avg": [ 21.25, 8.699856320652657 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:xAZGvjs91m8J:scholar.google.com/&scioq=Identification+and+Estimation+of+the+Bi-Directional+MR+with+Some+Invalid+Instruments&hl=en&as_sdt=0,6", "gs_version_total": 4, "email": "btbu.edu.cn;btbu.edu.cn;btbu.edu;btbu.edu.cn;math.pku.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Beijing Technology and Business University;Peking University", "aff_unique_dep": ";School of Mathematical Sciences", "aff_unique_url": "http://www.btbu.edu.cn;http://www.pku.edu.cn", "aff_unique_abbr": "BTBU;PKU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Peking", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "A Closer Look at AUROC and AUPRC under Class Imbalance", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95133", "id": "S3HvA808gk", "proceeding": "", "pdf": "https://openreview.net/pdf?id=S3HvA808gk", "openreview": "https://openreview.net/forum?id=S3HvA808gk", "poster": "/media/PosterPDFs/NeurIPS%202024/95133.png?t=1731609810.5667832", "project": "", "author_site": "Matthew McDermott, Haoran Zhang, Lasse Hansen, Giovanni Angelotti, Jack Gallifant", "tldr": "", "abstract": "In machine learning (ML), a widespread claim is that the area under the precision-recall curve (AUPRC) is a superior metric for model comparison to the area under the receiver operating characteristic (AUROC) for tasks with class imbalance. This paper refutes this notion on two fronts. First, we theoretically characterize the behavior of AUROC and AUPRC in the presence of model mistakes, establishing clearly that AUPRC is not generally superior in cases of class imbalance. We further show that AUPRC can be a harmful metric as it can unduly favor model improvements in subpopulations with more frequent positive labels, heightening algorithmic disparities. Next, we empirically support our theory using experiments on both semi-synthetic and real-world fairness datasets. Prompted by these insights, we conduct a review of over 1.5 million scientific papers to understand the origin of this invalid claim, finding that it is often made without citation, misattributed to papers that do not argue this point, and aggressively over-generalized from source arguments. Our findings represent a dual contribution: a significant technical advancement in understanding the relationship between AUROC and AUPRC and a stark warning about unchecked assumptions in the ML community.", "keywords": "AUROC;AUPRC;Area under the receiver operating characteristic;Area under the precision recall curve;evaluation;fairness;disparities;bias", "primary_area": "fairness", "supplementary_material": "", "author": "Matthew B.A. McDermott;Haoran Zhang;Lasse Hyldig Hansen;Giovanni Angelotti;Jack Gallifant", "authorids": "~Matthew_B.A._McDermott1;~Haoran_Zhang4;~Lasse_Hyldig_Hansen1;~Giovanni_Angelotti1;~Jack_Gallifant1", "gender": "M;M;M;M;M", "homepage": ";https://haoran.ca;;https://jhn-nt.github.io/;https://www.jackgallifant.com/", "dblp": "218/6984;95/4452-3.html;;;366/7591", "google_scholar": "https://scholar.google.ca/citations?user=_V96PXoAAAAJ;https://scholar.google.ca/citations?user=6aWRAPkAAAAJ;;MLykPNAAAAAJ;SlLz8KoAAAAJ", "orcid": "0000-0001-6048-9707;;;0000-0003-3264-2721;0000-0003-1306-2334", "linkedin": ";;lasse-hyldig-hansen-a1ab1216a/;;jackgallifant/", "or_profile": "~Matthew_B.A._McDermott1;~Haoran_Zhang4;~Lasse_Hyldig_Hansen1;~Giovanni_Angelotti1;~Jack_Gallifant1", "aff": "Harvard University;Massachusetts Institute of Technology;Aarhus University;Humanitas Research Hospital;Massachusetts Institute of Technology", "aff_domain": "harvard.edu;mit.edu;post.au.dk;humanitas.it;mit.edu", "position": "Postdoc;PhD student;MS student;Researcher;Postdoc", "bibtex": "@inproceedings{\nmcdermott2024a,\ntitle={A Closer Look at {AUROC} and {AUPRC} under Class Imbalance},\nauthor={Matthew B.A. McDermott and Haoran Zhang and Lasse Hyldig Hansen and Giovanni Angelotti and Jack Gallifant},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=S3HvA808gk}\n}", "github": "", "reviewers": "pyKS;XQD9;PmDV;2iwp", "pdf_size": 1816669, "rating": "5;6;7;8", "confidence": "4;3;3;4", "soundness": "4;3;3;3", "novelty": "3;3;3;3", "presentation": "2;2;2;4", "wc_summary": "98;68;161;98", "wc_strengths": "186;61;104;126", "wc_weaknesses": "131;338;147;230", "wc_questions": "27;38;35;158", "wc_limitations": "1;8;7;14", "wc_review": "443;513;454;626", "wc_reply_reviewers": "0;117;107;16", "wc_reply_authors": "0;438;204;15", "reply_reviewers": "0;1;2;1", "reply_authors": "1;4;3;2", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 106.25, 33.89966813996857 ], "wc_strengths_avg": [ 119.25, 45.074244308695846 ], "wc_weaknesses_avg": [ 211.5, 82.13555868197403 ], "wc_questions_avg": [ 64.5, 54.13178363955875 ], "wc_limitations_avg": [ 7.5, 4.6097722286464435 ], "wc_review_avg": [ 509.0, 72.6050962398646 ], "wc_reply_reviewers_avg": [ 60.0, 52.426138518872435 ], "wc_reply_authors_avg": [ 164.25, 177.32227017495575 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 46, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11606186250721355742&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "harvard.edu;mit.edu;post.au.dk;humanitas.it;mit.edu", "author_num": 5, "aff_unique_index": "0;1;2;3;1", "aff_unique_norm": "Harvard University;Massachusetts Institute of Technology;Aarhus University;Humanitas Research Hospital", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.harvard.edu;https://web.mit.edu;https://au.dk;https://www.humanitas-research.com", "aff_unique_abbr": "Harvard;MIT;AU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;2;0", "aff_country_unique": "United States;Denmark;Italy" }, { "title": "Who Evaluates the Evaluations? Objectively Scoring Text-to-Image Prompt Coherence Metrics with T2IScoreScore (TS2)", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95132", "id": "S4YRCLbUK1", "proceeding": "", "pdf": "https://openreview.net/pdf?id=S4YRCLbUK1", "openreview": "https://openreview.net/forum?id=S4YRCLbUK1", "poster": "", "project": "", "author_site": "Michael Saxon, Fatima Jahara, Mahsa Khoshnoodi, Yujie Lu, Aditya Sharma, William Yang Wang", "tldr": "", "abstract": "With advances in the quality of text-to-image (T2I) models has come interest in benchmarking their prompt faithfulness---the semantic coherence of generated images to the prompts they were conditioned on. A variety of T2I faithfulness metrics have been proposed, leveraging advances in cross-modal embeddings and vision-language models (VLMs). However, these metrics are not rigorously compared and benchmarked, instead presented with correlation to human Likert scores over a set of easy-to-discriminate images against seemingly weak baselines. \n\nWe introduce T2IScoreScore, a curated set of semantic error graphs containing a prompt and a set of increasingly erroneous images. These allow us to rigorously judge whether a given prompt faithfulness metric can correctly order images with respect to their objective error count and significantly discriminate between different error nodes, using meta-metric scores derived from established statistical tests. Surprisingly, we find that the state-of-the-art VLM-based metrics (e.g., TIFA, DSG, LLMScore, VIEScore) we tested fail to significantly outperform simple (and supposedly worse) feature-based metrics like CLIPScore, particularly on a hard subset of naturally-occurring T2I model errors. TS2 will enable the development of better T2I prompt faithfulness metrics through more rigorous comparison of their conformity to expected orderings and separations under objective criteria.", "keywords": "text-to-image;t2i;metric;meta-metric;analysis", "primary_area": "evaluation", "supplementary_material": "", "author": "Michael Saxon;Fatima Jahara;Mahsa Khoshnoodi;Yujie Lu;Aditya Sharma;William Yang Wang", "authorids": "~Michael_Saxon1;~Fatima_Jahara1;~Mahsa_Khoshnoodi1;~Yujie_Lu1;~Aditya_Sharma3;~William_Yang_Wang2", "gender": "M;F;;;M;", "homepage": "https://saxon.me;https://fatimajahara.com/;;https://yujielu10.github.io/;;", "dblp": "222/6656;340/6715;;;;", "google_scholar": "pAlwjdgAAAAJ;DbhknBMAAAAJ;;pcmr6GMAAAAJ;4SRc4qkAAAAJ;", "orcid": ";0009-0008-1741-6842;;;;", "linkedin": ";fatimajahara/;;;adityas17/;", "or_profile": "~Michael_Saxon1;~Fatima_Jahara1;~Mahsa_Khoshnoodi1;~Yujie_Lu1;~Aditya_Sharma3;~William_Yang_Wang2", "aff": "Advanced Micro Devices;Workera.ai;;UC Santa Barbara;University of California, Santa Barbara;", "aff_domain": "amd.com;workera.ai;;ucsb.edu;ucsb.edu;", "position": "Intern;Senior Assessment Developer;;PhD student;MS student;", "bibtex": "@inproceedings{\nsaxon2024who,\ntitle={Who Evaluates the Evaluations? Objectively Scoring Text-to-Image Prompt Coherence Metrics with T2{IS}coreScore ({TS}2)},\nauthor={Michael Saxon and Fatima Jahara and Mahsa Khoshnoodi and Yujie Lu and Aditya Sharma and William Yang Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=S4YRCLbUK1}\n}", "github": "", "reviewers": "vRNq;Xy9h;h3XZ;cvSS", "pdf_size": 17588023, "rating": "5;6;7;7", "confidence": "3;4;4;4", "soundness": "2;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "36;108;101;74", "wc_strengths": "55;29;74;49", "wc_weaknesses": "123;82;136;322", "wc_questions": "35;2;74;1", "wc_limitations": "1;7;3;2", "wc_review": "250;228;388;448", "wc_reply_reviewers": "0;26;434;152", "wc_reply_authors": "0;0;229;266", "reply_reviewers": "0;1;1;2", "reply_authors": "1;1;2;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 79.75, 28.269904492233433 ], "wc_strengths_avg": [ 51.75, 16.052647756678645 ], "wc_weaknesses_avg": [ 165.75, 92.38607849670858 ], "wc_questions_avg": [ 28.0, 29.87473849257931 ], "wc_limitations_avg": [ 3.25, 2.277608394786075 ], "wc_review_avg": [ 328.5, 92.30790865359262 ], "wc_reply_reviewers_avg": [ 153.0, 172.11914478058506 ], "wc_reply_authors_avg": [ 123.75, 124.43949332908745 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17334610306870913027&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "amd.com;workera.ai;;ucsb.edu;ucsb.edu;", "author_num": 6, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "Advanced Micro Devices, Inc.;Workera;University of California, Santa Barbara", "aff_unique_dep": ";;", "aff_unique_url": "https://www.amd.com;https://www.workera.ai;https://www.ucsb.edu", "aff_unique_abbr": "AMD;Workera;UCSB", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Santa Barbara", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Spatio-Temporal Interactive Learning for Efficient Image Reconstruction of Spiking Cameras", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95131", "id": "S4ZqnMywcM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=S4ZqnMywcM", "openreview": "https://openreview.net/forum?id=S4ZqnMywcM", "poster": "/media/PosterPDFs/NeurIPS%202024/95131.png?t=1731421166.1166022", "project": "", "author_site": "Bin Fan, Jiaoyang Yin, Yuchao Dai, Chao Xu, Tiejun Huang, Boxin Shi", "tldr": "", "abstract": "The spiking camera is an emerging neuromorphic vision sensor that records high-speed motion scenes by asynchronously firing continuous binary spike streams. Prevailing image reconstruction methods, generating intermediate frames from these spike streams, often rely on complex step-by-step network architectures that overlook the intrinsic collaboration of spatio-temporal complementary information. In this paper, we propose an efficient spatio-temporal interactive reconstruction network to jointly perform inter-frame feature alignment and intra-frame feature filtering in a coarse-to-fine manner. Specifically, it starts by extracting hierarchical features from a concise hybrid spike representation, then refines the motion fields and target frames scale-by-scale, ultimately obtaining a full-resolution output. Meanwhile, we introduce a symmetric interactive attention block and a multi-motion field estimation block to further enhance the interaction capability of the overall network. Experiments on synthetic and real-captured data show that our approach exhibits excellent performance while maintaining low model complexity.", "keywords": "Spiking camera;Image reconstruction;High-speed motion;Spatio-temporal interaction;Coarse-to-fine", "primary_area": "machine_vision", "supplementary_material": "", "author": "Bin Fan;Jiaoyang Yin;Yuchao Dai;Chao Xu;Tiejun Huang;Boxin Shi", "authorids": "~Bin_Fan3;~Jiaoyang_Yin1;~Yuchao_Dai1;~Chao_Xu1;~Tiejun_Huang1;~Boxin_Shi3", "gender": "M;M;M;M;M;M", "homepage": ";http://npu-cvr.cn/;http://www.cis.pku.edu.cn/faculty/vision/xuchao/xuchao01.htm;https://idm.pku.edu.cn/~tjhuang/;http://camera.pku.edu.cn;https://gitcvfb.github.io/", "dblp": ";65/7804;;h/TiejunHuang;69/783;60/105-2", "google_scholar": ";https://scholar.google.com.tw/citations?user=fddAbqsAAAAJ;https://scholar.google.co.uk/citations?hl=zh-CN;https://scholar.google.com.tw/citations?user=knvEK4AAAAAJ;K1LjZxcAAAAJ;c6As1PcAAAAJ", "orcid": ";0000-0002-4432-7406;;0000-0002-4234-6099;0000-0001-6749-0364;0000-0002-8028-0166", "linkedin": "jiaoyang-yin-a8909a309/;;;;;", "or_profile": "~Jiaoyang_Yin1;~Yuchao_Dai1;~Chao_Xu1;~Tiejun_Huang1;~Boxin_Shi3;~Fan_Bin1", "aff": "Peking University;Northwestern Polytechnical University;Peking University;Peking University;Peking University;Peking University", "aff_domain": "stu.pku.edu.cn;nwpu.edu.cn;pku.edu;pku.edu.cn;pku.edu.cn;pku.edu.cn", "position": "Undergrad student;Professor;Full Professor;Full Professor;Assistant Professor;Postdoc", "bibtex": "@inproceedings{\nfan2024spatiotemporal,\ntitle={Spatio-Temporal Interactive Learning for Efficient Image Reconstruction of Spiking Cameras},\nauthor={Bin Fan and Jiaoyang Yin and Yuchao Dai and Chao Xu and Tiejun Huang and Boxin Shi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=S4ZqnMywcM}\n}", "github": "", "reviewers": "94R6;1mUp;GueT;1ZaT", "pdf_size": 6519261, "rating": "4;5;6;6", "confidence": "1;4;4;2", "soundness": "2;2;3;3", "novelty": "2;2;2;3", "presentation": "2;2;3;3", "wc_summary": "19;92;64;74", "wc_strengths": "30;35;59;28", "wc_weaknesses": "33;101;53;15", "wc_questions": "101;52;46;15", "wc_limitations": "48;8;65;1", "wc_review": "231;288;287;133", "wc_reply_reviewers": "80;16;26;0", "wc_reply_authors": "567;61;50;61", "reply_reviewers": "2;1;1;0", "reply_authors": "3;2;2;2", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 2.75, 1.299038105676658 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 62.25, 26.910732059905023 ], "wc_strengths_avg": [ 38.0, 12.389511693363866 ], "wc_weaknesses_avg": [ 50.5, 32.10529551335729 ], "wc_questions_avg": [ 53.5, 30.80990100600779 ], "wc_limitations_avg": [ 30.5, 26.800186566514792 ], "wc_review_avg": [ 234.75, 63.1124987621311 ], "wc_reply_reviewers_avg": [ 30.5, 30.045798375147232 ], "wc_reply_authors_avg": [ 184.75, 220.73782525883504 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3888183987768932759&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "stu.pku.edu.cn;nwpu.edu.cn;pku.edu;pku.edu.cn;pku.edu.cn;pku.edu.cn", "author_num": 6, "aff_unique_index": "0;1;0;0;0;0", "aff_unique_norm": "Peking University;Northwestern Polytechnical University", "aff_unique_dep": ";", "aff_unique_url": "http://www.pku.edu.cn;https://www.nwpu.edu.cn", "aff_unique_abbr": "Peking U;NWPU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "VeXKD: The Versatile Integration of Cross-Modal Fusion and Knowledge Distillation for 3D Perception", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95130", "id": "S5coB5kqSD", "proceeding": "", "pdf": "https://openreview.net/pdf?id=S5coB5kqSD", "openreview": "https://openreview.net/forum?id=S5coB5kqSD", "poster": "/media/PosterPDFs/NeurIPS%202024/95130.png?t=1731596099.2150335", "project": "", "author_site": "JI Yuzhe, Yijie CHEN, Liuqing Yang, Ding Rui, Meng Yang, Xinhu Zheng", "tldr": "", "abstract": "Recent advancements in 3D perception have led to a proliferation of network architectures, particularly those involving multi-modal fusion algorithms. While these fusion algorithms improve accuracy, their complexity often impedes real-time performance. This paper introduces VeXKD, an effective and Versatile framework that integrates Cross-Modal Fusion with Knowledge Distillation. VeXKD applies knowledge distillation exclusively to the Bird's Eye View (BEV) feature maps, enabling the transfer of cross-modal insights to single-modal students without additional inference time overhead. It avoids volatile components that can vary across various 3D perception tasks and student modalities, thus improving versatility. The framework adopts a modality-general cross-modal fusion module to bridge the modality gap between the multi-modal teachers and single-modal students. Furthermore, leveraging byproducts generated during fusion, our BEV query guided mask generation network identifies crucial spatial locations across different BEV feature maps in a data-driven manner, significantly enhancing the effectiveness of knowledge distillation. Extensive experiments on the nuScenes dataset demonstrate notable improvements, with up to 6.9\\%/4.2\\% increase in mAP and NDS for 3D detection tasks and up to 4.3\\% rise in mIoU for BEV map segmentation tasks, narrowing the performance gap with multi-modal models.", "keywords": "3D Perception;Multi-modal Fusion;Cross-modal Knowledge Distillation", "primary_area": "machine_vision", "supplementary_material": "/attachment/bfb6f61c03d03f1347734f5713e6da48ce812aba.zip", "author": "JI Yuzhe;Yijie CHEN;Liuqing Yang;Rui Ding;Meng Yang;Xinhu Zheng", "authorids": "~JI_Yuzhe1;~Yijie_CHEN3;~Liuqing_Yang3;~Rui_Ding11;~Meng_Yang8;~Xinhu_Zheng1", "gender": "M;;;;;", "homepage": ";;https://personal.hkust-gz.edu.cn/lqyang;;;", "dblp": "305/9308.html;;;;;", "google_scholar": ";;;;;", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~JI_Yuzhe1;~Yijie_CHEN3;~Liuqing_Yang3;~Rui_Ding11;~Meng_Yang8;~Xinhu_Zheng1", "aff": "HKUST(GZ);;Hong Kong University of Science and Technology;;;", "aff_domain": "hkust-gz.edu.cn;;ust.hk;;;", "position": "PhD student;;Full Professor;;;", "bibtex": "@inproceedings{\nyuzhe2024vexkd,\ntitle={Ve{XKD}: The Versatile Integration of Cross-Modal Fusion and Knowledge Distillation for 3D Perception},\nauthor={JI Yuzhe and Yijie CHEN and Liuqing Yang and Rui Ding and Meng Yang and Xinhu Zheng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=S5coB5kqSD}\n}", "github": "", "reviewers": "uk3B;Vuwz;GBJi;dGF9", "pdf_size": 18934332, "rating": "5;6;6;7", "confidence": "4;5;5;4", "soundness": "3;2;3;4", "novelty": "3;2;2;4", "presentation": "3;3;2;4", "wc_summary": "54;52;87;52", "wc_strengths": "60;52;104;74", "wc_weaknesses": "58;132;97;50", "wc_questions": "80;35;2;60", "wc_limitations": "1;6;12;23", "wc_review": "253;277;302;259", "wc_reply_reviewers": "20;79;23;8", "wc_reply_authors": "20;411;19;16", "reply_reviewers": "1;2;1;1", "reply_authors": "2;3;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 61.25, 14.889173919328098 ], "wc_strengths_avg": [ 72.5, 19.817921182606415 ], "wc_weaknesses_avg": [ 84.25, 32.80529682840867 ], "wc_questions_avg": [ 44.25, 29.140821882712917 ], "wc_limitations_avg": [ 10.5, 8.200609733428363 ], "wc_review_avg": [ 272.75, 19.057478846898924 ], "wc_reply_reviewers_avg": [ 32.5, 27.427176303804956 ], "wc_reply_authors_avg": [ 116.5, 170.03602559457804 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ppEPX5DW8JEJ:scholar.google.com/&scioq=VeXKD:+The+Versatile+Integration+of+Cross-Modal+Fusion+and+Knowledge+Distillation+for+3D+Perception&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "hkust-gz.edu.cn;;ust.hk;;;", "author_num": 6, "aff_unique_index": "0;0", "aff_unique_norm": "Hong Kong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.ust.hk", "aff_unique_abbr": "HKUST", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Guangzhou;Hong Kong SAR", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "A versatile informative diffusion model for single-cell ATAC-seq data generation and analysis", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95129", "id": "S6YLeBMoWF", "proceeding": "", "pdf": "https://openreview.net/pdf?id=S6YLeBMoWF", "openreview": "https://openreview.net/forum?id=S6YLeBMoWF", "poster": "", "project": "", "author_site": "Lei Huang, Lei Xiong, Na Sun, Zunpeng Liu, Ka-Chun Wong, Manolis Kellis", "tldr": "", "abstract": "The rapid advancement of single-cell ATAC sequencing (scATAC-seq) technologies holds great promise for investigating the heterogeneity of epigenetic landscapes at the cellular level. The amplification process in scATAC-seq experiments often introduces noise due to dropout events, which results in extreme sparsity that hinders accurate analysis. Consequently, there is a significant demand for the generation of high-quality scATAC-seq data in silico. Furthermore, current methodologies are typically task-specific, lacking a versatile framework capable of handling multiple tasks within a single model. In this work, we propose ATAC-Diff, a versatile framework, which is based on a diffusion model conditioned on the latent auxiliary variables to adapt for various tasks. ATAC-Diff is the first diffusion model for the scATAC-seq data generation and analysis, composed of auxiliary modules encoding the latent high-level variables to enable the model to learn the semantic information to sample high-quality data. Gaussian Mixture Model (GMM) as the latent prior and auxiliary decoder, the yield variables reserve the refined genomic information beneficial for downstream analyses. Another innovation is the incorporation of mutual information between observed and hidden variables as a regularization term to prevent the model from decoupling from latent variables. Through extensive experiments, we demonstrate that ATAC-Diff achieves high performance in both generation and analysis tasks, outperforming state-of-the-art models.", "keywords": "scATAC-seq;diffusion model;generative model", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "", "author": "Lei huang;Lei Xiong;Na Sun;Zunpeng Liu;Ka-Chun Wong;Manolis Kellis", "authorids": "~Lei_huang9;~Lei_Xiong2;~Na_Sun1;~Zunpeng_Liu1;~Ka-Chun_Wong1;~Manolis_Kellis1", "gender": "M;M;F;M;;M", "homepage": "https://layne-huang.github.io/;https://xiong-lei.com;https://scholar.google.com/citations?user=oOxS8vwAAAAJ&hl=en;;http://www.cs.toronto.edu/~wkc/;http://compbio.mit.edu", "dblp": ";;;;45/7183;75/2690.html", "google_scholar": "xO1oupAAAAAJ;ky6bUTgAAAAJ;oOxS8vwAAAAJ;Yk4VxGQAAAAJ;nZH_Ws8AAAAJ;lsYXBx8AAAAJ", "orcid": ";;;;0000-0001-6062-733X;0000-0001-7113-9630", "linkedin": ";;;;;manolis-kellis", "or_profile": "~Lei_huang9;~Lei_Xiong2;~Na_Sun1;~Zunpeng_Liu1;~Ka-Chun_Wong1;~Manolis_Kellis1", "aff": "City University;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;City University of Hong Kong;Massachusetts Institute of Technology", "aff_domain": "cityu.edu;mit.edu;mit.edu;mit.edu;cityu.edu.hk;mit.edu", "position": "PhD student;Postdoc;PhD student;Postdoc;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nhuang2024a,\ntitle={A versatile informative diffusion model for single-cell {ATAC}-seq data generation and analysis},\nauthor={Lei huang and Lei Xiong and Na Sun and Zunpeng Liu and Ka-Chun Wong and Manolis Kellis},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=S6YLeBMoWF}\n}", "github": "", "reviewers": "BJyq;feok;kixm", "pdf_size": 1220890, "rating": "4;6;7", "confidence": "3;4;3", "soundness": "2;3;3", "novelty": "3;3;2", "presentation": "3;4;2", "wc_summary": "73;284;81", "wc_strengths": "79;284;67", "wc_weaknesses": "105;284;256", "wc_questions": "38;284;6", "wc_limitations": "1;284;99", "wc_review": "296;1420;509", "wc_reply_reviewers": "0;13;26", "wc_reply_authors": "41;51;27", "reply_reviewers": "0;1;1", "reply_authors": "2;2;2", "rating_avg": [ 5.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 146.0, 97.63537610244899 ], "wc_strengths_avg": [ 143.33333333333334, 99.5869246214359 ], "wc_weaknesses_avg": [ 215.0, 78.61721609588237 ], "wc_questions_avg": [ 109.33333333333333, 124.19697616644655 ], "wc_limitations_avg": [ 128.0, 117.33996193397485 ], "wc_review_avg": [ 741.6666666666666, 487.4726203137522 ], "wc_reply_reviewers_avg": [ 13.0, 10.614455552060438 ], "wc_reply_authors_avg": [ 39.666666666666664, 9.843215373488933 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.18898223650461363, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:VZHedJ6zbUIJ:scholar.google.com/&scioq=A+versatile+informative+diffusion+model+for+single-cell+ATAC-seq+data+generation+and+analysis&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "cityu.edu;mit.edu;mit.edu;mit.edu;cityu.edu.hk;mit.edu", "author_num": 6, "aff_unique_index": "0;1;1;1;2;1", "aff_unique_norm": "City University;Massachusetts Institute of Technology;City University of Hong Kong", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cityuniversity.edu;https://web.mit.edu;https://www.cityu.edu.hk", "aff_unique_abbr": "CityU;MIT;CityU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;1;0", "aff_country_unique": "United States;China" }, { "title": "Normalization Layer Per-Example Gradients are Sufficient to Predict Gradient Noise Scale in Transformers", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95128", "id": "S7THlpvH8i", "proceeding": "", "pdf": "https://openreview.net/pdf?id=S7THlpvH8i", "openreview": "https://openreview.net/forum?id=S7THlpvH8i", "poster": "/media/PosterPDFs/NeurIPS%202024/95128.png?t=1731696416.5188384", "project": "", "author_site": "Gavia Gray, aman tiwari, Shane Bergsma, Joel Hestness", "tldr": "", "abstract": "Per-example gradient norms are a vital ingredient for estimating gradient noise scale (GNS) with minimal variance. Observing the tensor contractions required to compute them, we propose a method with minimal FLOPs in 3D or greater tensor regimes by simultaneously computing the norms while computing the parameter gradients. Using this method we are able to observe the GNS of different layers at higher accuracy than previously possible. We find that the total GNS of contemporary transformer models is predicted well by the GNS of only the normalization layers. As a result, focusing only on the normalization layer, we develop a custom kernel to compute the per-example gradient norms while performing the LayerNorm backward pass with zero throughput overhead. Tracking GNS on only those layers, we are able to guide a practical batch size schedule that reduces training time by 18% on a Chinchilla-optimal language model.", "keywords": "Efficient deep learning;gradient noise scale;critical batch size;language models", "primary_area": "optimization_for_deep_networks", "supplementary_material": "/attachment/a4eeded65f7f473639dfc9e4eb39161435ccdc90.zip", "author": "Gavia Gray;Aman Tiwari;Shane Bergsma;Joel Hestness", "authorids": "~Gavia_Gray1;~Aman_Tiwari1;~Shane_Bergsma1;~Joel_Hestness2", "gender": "Non-Binary;M;F;M", "homepage": "https://carper.ai;https://sites.google.com/site/shaneabergsma/;https://reflected.codes;", "dblp": ";57/2540;210/2422;60/3063", "google_scholar": ";https://scholar.google.ca/citations?hl=en;;wkbvCf0AAAAJ", "orcid": ";;0000-0002-9864-3666;0000-0001-6920-0906", "linkedin": ";;;joelhestness", "or_profile": "~Aman_Tiwari1;~Shane_Bergsma1;~Gavin_Gray1;~Joel_Thomas_Hestness1", "aff": "Eleuther;Huawei Canada;Cerebras Systems, Inc;Cerebras Systems, Inc", "aff_domain": "eleuther.ai;huawei.com;cerebras.net;cerebras.net", "position": "Researcher;Researcher;Researcher;Research Scientist", "bibtex": "@inproceedings{\ngray2024normalization,\ntitle={Normalization Layer Per-Example Gradients are Sufficient to Predict Gradient Noise Scale in Transformers},\nauthor={Gavia Gray and Aman Tiwari and Shane Bergsma and Joel Hestness},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=S7THlpvH8i}\n}", "github": "", "reviewers": "32m2;6ptT;fwbc;5nZ5", "pdf_size": 1995298, "rating": "4;5;6;7", "confidence": "4;2;4;3", "soundness": "2;2;3;3", "novelty": "3;2;1;3", "presentation": "1;2;2;3", "wc_summary": "110;80;66;44", "wc_strengths": "34;36;96;97", "wc_weaknesses": "169;178;241;78", "wc_questions": "313;140;20;21", "wc_limitations": "8;3;59;24", "wc_review": "634;437;482;264", "wc_reply_reviewers": "320;15;32;11", "wc_reply_authors": "773;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 75.0, 23.93741840717165 ], "wc_strengths_avg": [ 65.75, 30.760160922856045 ], "wc_weaknesses_avg": [ 166.5, 58.139917440601856 ], "wc_questions_avg": [ 123.5, 119.79252898240357 ], "wc_limitations_avg": [ 23.5, 21.914607000811127 ], "wc_review_avg": [ 454.25, 131.88323434007827 ], "wc_reply_reviewers_avg": [ 94.5, 130.43101624997024 ], "wc_reply_authors_avg": [ 193.25, 334.71881856268556 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.1348399724926484, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7764989930325261227&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "eleuther.ai;huawei.com;cerebras.net;cerebras.net", "author_num": 4, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "Eleuther AI;Huawei;Cerebras Systems", "aff_unique_dep": ";Huawei;", "aff_unique_url": "https://www.eleuther.ai;https://www.huawei.com/ca-en/;https://www.cerebras.com", "aff_unique_abbr": "Eleuther AI;Huawei Canada;Cerebras", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;Canada" }, { "title": "Cloud Object Detector Adaptation by Integrating Different Source Knowledge", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95127", "id": "S8SEjerTTg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=S8SEjerTTg", "openreview": "https://openreview.net/forum?id=S8SEjerTTg", "poster": "/media/PosterPDFs/NeurIPS%202024/95127.png?t=1731622821.3023086", "project": "", "author_site": "Shuaifeng Li, Mao Ye, Lihua Zhou, Nianxin Li, Siying Xiao, Song Tang, Xiatian Zhu", "tldr": "", "abstract": "We propose to explore an interesting and promising problem, Cloud Object Detector Adaptation (CODA), where the target domain leverages detections provided by a large cloud model to build a target detector. Despite with powerful generalization capability, the cloud model still cannot achieve error-free detection in a specific target domain. In this work, we present a novel Cloud Object detector adaptation method by Integrating different source kNowledge (COIN). The key idea is to incorporate a public vision-language model (CLIP) to distill positive knowledge while refining negative knowledge for adaptation by self-promotion gradient direction alignment. To that end, knowledge dissemination, separation, and distillation are carried out successively. Knowledge dissemination combines knowledge from cloud detector and CLIP model to initialize a target detector and a CLIP detector in target domain. By matching CLIP detector with the cloud detector, knowledge separation categorizes detections into three parts: consistent, inconsistent and private detections such that divide-and-conquer strategy can be used for knowledge distillation. Consistent and private detections are directly used to train target detector; while inconsistent detections are fused based on a consistent knowledge generation network, which is trained by aligning the gradient direction of inconsistent detections to that of consistent detections, because it provides a direction toward an optimal target detector. Experiment results demonstrate that the proposed COIN method achieves the state-of-the-art performance.", "keywords": "Object detection;Cloud model;Domain adaptation", "primary_area": "machine_vision", "supplementary_material": "/attachment/58b64d86fa926d524679197c79b2b536eb41a4ab.zip", "author": "Shuaifeng Li;Mao Ye;Lihua Zhou;Nianxin Li;Siying Xiao;Song Tang;Xiatian Zhu", "authorids": "~Shuaifeng_Li1;~Mao_Ye1;~Lihua_Zhou1;~Nianxin_Li1;~Siying_Xiao1;~Song_Tang5;~Xiatian_Zhu3", "gender": "M;M;M;F;M;;M", "homepage": "http://en.uestc.edu.cn/index.php?m=content&c=index&a=show&catid=79&id=5422;;;;;https://x-up-lab.github.io;https://flashkong.github.io/", "dblp": "dblp.uni-trier.de/pers/hd/y/Ye_0001:Mao;dblp.uni-trier.de/pers/hd/y/Zhou_0001:Lihua;;;181/8826-1;128/7935;310/5895", "google_scholar": "V5gL_H0AAAAJ;7wECe98AAAAJ;TfSmxQYAAAAJ;TosYOUsAAAAJ;https://scholar.google.com/citations?hl=en;ZbA-z1cAAAAJ;uswf_pQAAAAJ", "orcid": ";0000-0003-0370-3337;;;;0000-0002-9284-2955;0000-0003-2371-2513", "linkedin": ";;;;;;", "or_profile": "~Mao_Ye1;~Lihua_Zhou1;~Nianxin_Li1;~Siying_Xiao1;~Song_Tang5;~Xiatian_Zhu3;~Li_Shuaifeng1", "aff": "University of Electronic Science and Technology of China;University of Electronic Science and Technology of China;University of Electronic Science and Technology of China;University of Electronic Science and Technology of China;University of Shanghai for Science and Technology;University of Surrey;University of Electronic Science and Technology of China", "aff_domain": "uestc.edu.cn;uestc.edu.cn;uestc.edu.cn;uestc.edu.cn;usst.edu.cn;surrey.ac.uk;uestc.edu.cn", "position": "Full Professor;PhD student;MS student;Undergrad student;Associate Professor;Associate Professor;PhD student", "bibtex": "@inproceedings{\nli2024cloud,\ntitle={Cloud Object Detector Adaptation by Integrating Different Source Knowledge},\nauthor={Shuaifeng Li and Mao Ye and Lihua Zhou and Nianxin Li and Siying Xiao and Song Tang and Xiatian Zhu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=S8SEjerTTg}\n}", "github": "", "reviewers": "MA6n;FRKx;Ht4V;s7Cc;nLTh", "pdf_size": 16530567, "rating": "4;5;6;6;6", "confidence": "4;3;4;4;4", "soundness": "3;3;3;4;2", "novelty": "2;3;3;3;2", "presentation": "2;3;3;2;3", "wc_summary": "42;57;82;115;91", "wc_strengths": "18;47;43;53;50", "wc_weaknesses": "122;92;46;115;136", "wc_questions": "4;36;258;26;55", "wc_limitations": "10;9;19;5;1", "wc_review": "196;241;448;314;333", "wc_reply_reviewers": "0;0;17;0;105", "wc_reply_authors": "78;78;41;78;445", "reply_reviewers": "0;0;1;0;1", "reply_authors": "2;2;2;2;3", "rating_avg": [ 5.4, 0.7999999999999999 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 77.4, 25.648391762447794 ], "wc_strengths_avg": [ 42.2, 12.544321424453376 ], "wc_weaknesses_avg": [ 102.2, 31.49857139617605 ], "wc_questions_avg": [ 75.8, 92.57299822302397 ], "wc_limitations_avg": [ 8.8, 6.013318551349164 ], "wc_review_avg": [ 306.4, 86.34952229167223 ], "wc_reply_reviewers_avg": [ 24.4, 40.83429930830208 ], "wc_reply_authors_avg": [ 144.0, 151.18068659719734 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.2500000000000001, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12205303691915349417&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "uestc.edu.cn;uestc.edu.cn;uestc.edu.cn;uestc.edu.cn;usst.edu.cn;surrey.ac.uk;uestc.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;0;1;2;0", "aff_unique_norm": "University of Electronic Science and Technology of China;University of Shanghai for Science and Technology;University of Surrey", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uestc.edu.cn;https://www.usst.edu.cn;https://www.surrey.ac.uk", "aff_unique_abbr": "UESTC;USST;Surrey", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;1;0", "aff_country_unique": "China;United Kingdom" }, { "title": "PPLNs: Parametric Piecewise Linear Networks for Event-Based Temporal Modeling and Beyond", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95126", "id": "S8wFXyT4dY", "proceeding": "", "pdf": "https://openreview.net/pdf?id=S8wFXyT4dY", "openreview": "https://openreview.net/forum?id=S8wFXyT4dY", "poster": "/media/PosterPDFs/NeurIPS%202024/95126.png?t=1729995069.1956997", "project": "", "author_site": "Chen Song, Zhenxiao Liang, Bo Sun, Qixing Huang", "tldr": "", "abstract": "We present Parametric Piecewise Linear Networks (PPLNs) for temporal vision inference. Motivated by the neuromorphic principles that regulate biological neural behaviors, PPLNs are ideal for processing data captured by event cameras, which are built to simulate neural activities in the human retina. We discuss how to represent the membrane potential of an artificial neuron by a parametric piecewise linear function with learnable coefficients. This design echoes the idea of building deep models from learnable parametric functions recently popularized by Kolmogorov\u2013Arnold Networks (KANs). Experiments demonstrate the state-of-the-art performance of PPLNs in event-based and image-based vision applications, including steering prediction, human pose estimation, and motion deblurring.", "keywords": "Spiking mechanism;event camera;event vision", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Chen Song;Zhenxiao Liang;Bo Sun;Qixing Huang", "authorids": "~Chen_Song5;~Zhenxiao_Liang1;~Bo_Sun6;~Qixing_Huang1", "gender": "M;M;F;M", "homepage": "http://songc.me/;;https://sites.google.com/view/bosun/home;https://www.cs.utexas.edu/~huangqx/", "dblp": ";209/4944;;82/241", "google_scholar": "AoV0Q34AAAAJ;;H9Yzbw4AAAAJ;https://scholar.google.com.tw/citations?user=pamL_rIAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Chen_Song5;~Zhenxiao_Liang1;~Bo_Sun6;~Qixing_Huang1", "aff": "University of Texas at Austin;University of Texas, Austin;University of Texas, Austin;University of Texas at Austin", "aff_domain": "utexas.edu;utexas.edu;utexas.edu;utexas.edu", "position": "PhD student;PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nsong2024pplns,\ntitle={{PPLN}s: Parametric Piecewise Linear Networks for Event-Based Temporal Modeling and Beyond},\nauthor={Chen Song and Zhenxiao Liang and Bo Sun and Qixing Huang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=S8wFXyT4dY}\n}", "github": "", "reviewers": "oEKu;NNdg;4tcq", "pdf_size": 1587876, "rating": "5;5;7", "confidence": "4;4;5", "soundness": "3;2;4", "novelty": "3;3;4", "presentation": "3;3;3", "wc_summary": "68;82;85", "wc_strengths": "51;41;87", "wc_weaknesses": "45;136;64", "wc_questions": "8;46;131", "wc_limitations": "25;48;14", "wc_review": "197;353;381", "wc_reply_reviewers": "30;16;130", "wc_reply_authors": "55;11;57", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 5.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 78.33333333333333, 7.408703590297623 ], "wc_strengths_avg": [ 59.666666666666664, 19.754043186705402 ], "wc_weaknesses_avg": [ 81.66666666666667, 39.19467083956978 ], "wc_questions_avg": [ 61.666666666666664, 51.42200134399888 ], "wc_limitations_avg": [ 29.0, 14.165686240583852 ], "wc_review_avg": [ 310.3333333333333, 80.94991592885621 ], "wc_reply_reviewers_avg": [ 58.666666666666664, 50.76306618880393 ], "wc_reply_authors_avg": [ 41.0, 21.228911104120876 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.9999999999999997, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:EPrr2KwAQt0J:scholar.google.com/&scioq=PPLNs:+Parametric+Piecewise+Linear+Networks+for+Event-Based+Temporal+Modeling+and+Beyond&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "utexas.edu;utexas.edu;utexas.edu;utexas.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Activation Map Compression through Tensor Decomposition for Deep Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95125", "id": "S93hrwT8u9", "proceeding": "", "pdf": "https://openreview.net/pdf?id=S93hrwT8u9", "openreview": "https://openreview.net/forum?id=S93hrwT8u9", "poster": "", "project": "", "author_site": "Le-Trung Nguyen, A\u00ebl Qu\u00e9lennec, Enzo Tartaglione, Samuel Tardieu, Van-Tam Nguyen", "tldr": "", "abstract": "Internet of Things and Deep Learning are synergetically and exponentially growing industrial fields with a massive call for their unification into a common framework called Edge AI. While on-device inference is a well-explored topic in recent research, backpropagation remains an open challenge due to its prohibitive computational and memory costs compared to the extreme resource constraints of embedded devices. Drawing on tensor decomposition research, we tackle the main bottleneck of backpropagation, namely the memory footprint of activation map storage. We investigate and compare the effects of activation compression using Singular Value Decomposition and its tensor variant, High-Order Singular Value Decomposition. The application of low-order decomposition results in considerable memory savings while preserving the features essential for learning, and also offers theoretical guarantees to convergence. Experimental results obtained on main-stream architectures and tasks demonstrate Pareto-superiority over other state-of-the-art solutions, in terms of the trade-off between generalization and memory footprint.", "keywords": "Deep Learning;Computer Vision;Compression", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Le-Trung Nguyen;A\u00ebl Qu\u00e9lennec;Enzo Tartaglione;Samuel Tardieu;Van-Tam Nguyen", "authorids": "~Le-Trung_Nguyen1;~A\u00ebl_Qu\u00e9lennec1;~Enzo_Tartaglione1;~Samuel_Tardieu1;~Van-Tam_Nguyen1", "gender": "M;;M;M;M", "homepage": ";;https://perso.telecom-paristech.fr/etartaglione/index.html;https://rfc1149.net;https://perso.telecom-paristech.fr/vtnguyen/", "dblp": ";;170/0115;45/411;", "google_scholar": "RdNCK-QAAAAJ;GmPAfRcAAAAJ;https://scholar.google.it/citations?user=uKuvN64AAAAJ;;", "orcid": ";;0000-0003-4274-8298;;", "linkedin": "nguyenletrung/;a%C3%ABl-qu%C3%A9lennec/;enzo-tartaglione-490950a2;;", "or_profile": "~Le-Trung_Nguyen1;~A\u00ebl_Qu\u00e9lennec1;~Enzo_Tartaglione1;~Samuel_Tardieu1;~Van-Tam_Nguyen1", "aff": "T\u00e9l\u00e9com Paris;T\u00e9l\u00e9com Paris;T\u00e9l\u00e9com Paris;T\u00e9l\u00e9com Paris;T\u00e9l\u00e9com Paris", "aff_domain": "telecom-paris.fr;telecom-paris.fr;telecom-paristech.fr;telecom-paris.fr;telecom-paristech.fr", "position": "PhD student;PhD student;Associate Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nnguyen2024activation,\ntitle={Activation Map Compression through Tensor Decomposition for Deep Learning},\nauthor={Le-Trung Nguyen and A{\\\"e}l Qu{\\'e}lennec and Enzo Tartaglione and Samuel Tardieu and Van-Tam Nguyen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=S93hrwT8u9}\n}", "github": "", "reviewers": "WgAz;dqZf;f1cQ;HEKd", "pdf_size": 667898, "rating": "3;4;4;7", "confidence": "5;4;5;3", "soundness": "2;3;2;3", "novelty": "2;3;2;3", "presentation": "3;3;2;2", "wc_summary": "54;74;53;132", "wc_strengths": "30;67;62;230", "wc_weaknesses": "103;87;68;660", "wc_questions": "60;60;48;56", "wc_limitations": "1;122;30;9", "wc_review": "248;410;261;1087", "wc_reply_reviewers": "86;31;0;491", "wc_reply_authors": "459;0;0;254", "reply_reviewers": "1;1;0;2", "reply_authors": "2;1;1;2", "rating_avg": [ 4.5, 1.5 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 78.25, 32.14323412477344 ], "wc_strengths_avg": [ 97.25, 77.94669653038542 ], "wc_weaknesses_avg": [ 229.5, 248.85789117486308 ], "wc_questions_avg": [ 56.0, 4.898979485566356 ], "wc_limitations_avg": [ 40.5, 48.23121395942673 ], "wc_review_avg": [ 501.5, 343.978560378405 ], "wc_reply_reviewers_avg": [ 152.0, 198.13000782314626 ], "wc_reply_authors_avg": [ 178.25, 192.42189974116772 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.9045340337332909, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14052490875518283809&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "telecom-paris.fr;telecom-paris.fr;telecom-paristech.fr;telecom-paris.fr;telecom-paristech.fr", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "T\u00e9l\u00e9com Paris", "aff_unique_dep": "", "aff_unique_url": "https://www.telecom-paris.fr", "aff_unique_abbr": "T\u00e9l\u00e9com Paris", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "France" }, { "title": "Diffusion Tuning: Transferring Diffusion Models via Chain of Forgetting", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95124", "id": "S98OzJD3jn", "proceeding": "", "pdf": "https://openreview.net/pdf?id=S98OzJD3jn", "openreview": "https://openreview.net/forum?id=S98OzJD3jn", "poster": "", "project": "", "author_site": "Jincheng Zhong, Xingzhuo Guo, Jiaxiang Dong, Mingsheng Long", "tldr": "", "abstract": "Diffusion models have significantly advanced the field of generative modeling. However, training a diffusion model is computationally expensive, creating a pressing need to adapt off-the-shelf diffusion models for downstream generation tasks. Current fine-tuning methods focus on parameter-efficient transfer learning but overlook the fundamental transfer characteristics of diffusion models.\n In this paper, we investigate the transferability of diffusion models and observe a monotonous chain of forgetting trend of transferability along the reverse process. Based on this observation and novel theoretical insights, we present Diff-Tuning, a frustratingly simple transfer approach that leverages the chain of forgetting tendency. Diff-Tuning encourages the fine-tuned model to retain the pre-trained knowledge at the end of the denoising chain close to the generated data while discarding the other noise side.\n We conduct comprehensive experiments to evaluate Diff-Tuning, including the transfer of pre-trained Diffusion Transformer models to eight downstream generations and the adaptation of Stable Diffusion to five control conditions with ControlNet.\n Diff-Tuning achieves a 24.6% improvement over standard fine-tuning and enhances the convergence speed of ControlNet by 24%. Notably, parameter-efficient transfer learning techniques for diffusion models can also benefit from Diff-Tuning. Code\n is available at this repository: https://github.com/thuml/Diffusion-Tuning.", "keywords": "Transfer Learning;Fine-tuning;Diffusion Model;Generative Model", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Jincheng Zhong;Xingzhuo Guo;Jiaxiang Dong;Mingsheng Long", "authorids": "~Jincheng_Zhong1;~Xingzhuo_Guo1;~Jiaxiang_Dong1;~Mingsheng_Long5", "gender": "M;M;;", "homepage": ";;;", "dblp": "257/2831;;;", "google_scholar": ";Cbinj9QAAAAJ;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Jincheng_Zhong1;~Xingzhuo_Guo1;~Jiaxiang_Dong1;~Mingsheng_Long5", "aff": "Tsinghua University;Tsinghua University;;", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;;", "position": "PhD student;PhD student;;", "bibtex": "@inproceedings{\nzhong2024diffusion,\ntitle={Diffusion Tuning: Transferring Diffusion Models via Chain of Forgetting},\nauthor={Jincheng Zhong and Xingzhuo Guo and Jiaxiang Dong and Mingsheng Long},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=S98OzJD3jn}\n}", "github": "", "reviewers": "x2jk;WW5v;y5ux;a4Zm", "pdf_size": 12281621, "rating": "5;5;6;6", "confidence": "4;4;4;3", "soundness": "4;3;4;3", "novelty": "3;2;3;2", "presentation": "3;3;3;3", "wc_summary": "62;86;85;66", "wc_strengths": "52;46;54;13", "wc_weaknesses": "191;348;63;98", "wc_questions": "13;98;111;5", "wc_limitations": "13;33;1;7", "wc_review": "331;611;314;189", "wc_reply_reviewers": "18;89;21;17", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 74.75, 10.848386976873567 ], "wc_strengths_avg": [ 41.25, 16.57369904396722 ], "wc_weaknesses_avg": [ 175.0, 110.29279214889793 ], "wc_questions_avg": [ 56.75, 48.05400607649689 ], "wc_limitations_avg": [ 13.5, 12.031209415515965 ], "wc_review_avg": [ 361.25, 154.2666117473253 ], "wc_reply_reviewers_avg": [ 36.25, 30.49077729412617 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9820699461870081105&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "tsinghua.edu.cn;tsinghua.edu.cn;;", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "CiteME: Can Language Models Accurately Cite Scientific Claims?", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97683", "id": "S9Qrrxpy6z", "proceeding": "", "pdf": "https://openreview.net/pdf?id=S9Qrrxpy6z", "openreview": "https://openreview.net/forum?id=S9Qrrxpy6z", "poster": "/media/PosterPDFs/NeurIPS%202024/97683.png?t=1731398221.6533978", "project": "", "author_site": "Ori Press, Andreas Hochlehnert, Ameya Prabhu, Vishaal Udandarao, Ofir Press, Matthias Bethge", "tldr": "", "abstract": "Thousands of new scientific papers are published each month. Such information overload complicates researcher efforts to stay current with the state-of-the-art as well as to verify and correctly attribute claims. We pose the following research question: Given a text excerpt referencing a paper, could an LM act as a research assistant to correctly identify the referenced paper? We advance efforts to answer this question by building a benchmark that evaluates the abilities of LMs in citation attribution. Our benchmark, CiteME, consists of text excerpts from recent machine learning papers, each referencing a single other paper. CiteME use reveals a large gap between frontier LMs and human performance, with LMs achieving only 4.2-18.5% accuracy and humans 69.7%. We close this gap by introducing CiteAgent, an autonomous system built on the GPT-4o LM that can also search and read papers, which achieves an accuracy of 35.3% on CiteME. \nOverall, CiteME serves as a challenging testbed for open-ended claim attribution, driving the research community towards a future where any claim made by an LM can be automatically verified and discarded if found to be incorrect.", "keywords": "paper citation;language models;language model agents;citation recommendaiton", "primary_area": "", "supplementary_material": "/attachment/3be30bdd30e7fbc0673c4b39c32e9938d7e73a8c.zip", "author": "Ori Press;Andreas Hochlehnert;Ameya Prabhu;Vishaal Udandarao;Ofir Press;Matthias Bethge", "authorids": "~Ori_Press1;~Andreas_Hochlehnert1;~Ameya_Prabhu1;~Vishaal_Udandarao1;~Ofir_Press1;~Matthias_Bethge1", "gender": ";M;M;M;M;M", "homepage": ";;https://drimpossible.github.io/;https://vishaal27.github.io/;https://ofir.io/about;https://bethgelab.org", "dblp": ";;181/4512;247/4693;185/0577;77/3005", "google_scholar": ";https://scholar.google.com/citations?hl=en;0kK7sSAAAAAJ;jUOcawkAAAAJ;LeHa8psAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;;;", "linkedin": ";;;vishaal-udandarao/?originalSubdomain=de;;", "or_profile": "~Ori_Press1;~Andreas_Hochlehnert1;~Ameya_Prabhu1;~Vishaal_Udandarao1;~Ofir_Press1;~Matthias_Bethge1", "aff": ";University of Tuebingen;University of Oxford;University of Cambridge;Princeton University;University of Tuebingen", "aff_domain": ";uni-tuebingen.de;ox.ac.uk;cam.ac.uk;princeton.edu;uni-tuebingen.de", "position": ";PhD student;PhD student;PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\npress2024citeme,\ntitle={Cite{ME}: Can Language Models Accurately Cite Scientific Claims?},\nauthor={Ori Press and Andreas Hochlehnert and Ameya Prabhu and Vishaal Udandarao and Ofir Press and Matthias Bethge},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=S9Qrrxpy6z}\n}", "github": "", "reviewers": "L4JP;L2LE;m9VH", "pdf_size": 3922384, "rating": "6;6;6", "confidence": "4;3;4", "wc_summary_and_contributions": "83;71;78", "wc_strengths": "37;100;49", "wc_improvement": "170;137;149", "wc_limitations": "45;18;33", "wc_correctness": "1;1;15", "wc_clarity": "1;22;9", "wc_relation_to_prior_work": "58;39;7", "wc_documentation": "1;15;17", "wc_additional_feedback": "1;1;1", "wc_review": "397;404;358", "wc_reply_reviewers": "86;46;40", "wc_reply_authors": "858;808;1019", "reply_reviewers": "2;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 77.33333333333333, 4.921607686744467 ], "wc_strengths_avg": [ 62.0, 27.313000567495326 ], "wc_improvement_avg": [ 152.0, 13.638181696985855 ], "wc_limitations_avg": [ 32.0, 11.045361017187261 ], "wc_correctness_avg": [ 5.666666666666667, 6.599663291074443 ], "wc_clarity_avg": [ 10.666666666666666, 8.65383665716478 ], "wc_relation_to_prior_work_avg": [ 34.666666666666664, 21.044925490219462 ], "wc_documentation_avg": [ 11.0, 7.118052168020874 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 386.3333333333333, 20.23747898221405 ], "wc_reply_reviewers_avg": [ 57.333333333333336, 20.417857108151406 ], "wc_reply_authors_avg": [ 895.0, 90.02592219281438 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16604580710949982218&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": ";uni-tuebingen.de;ox.ac.uk;cam.ac.uk;princeton.edu;uni-tuebingen.de", "author_num": 6, "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "University of Tuebingen;University of Oxford;University of Cambridge;Princeton University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.uni-tuebingen.de/;https://www.ox.ac.uk;https://www.cam.ac.uk;https://www.princeton.edu", "aff_unique_abbr": "Uni T\u00fcbingen;Oxford;Cambridge;Princeton", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;1;1;2;0", "aff_country_unique": "Germany;United Kingdom;United States" }, { "title": "AlchemistCoder: Harmonizing and Eliciting Code Capability by Hindsight Tuning on Multi-source Data", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95123", "id": "SAQXbnvv4t", "proceeding": "", "pdf": "https://openreview.net/pdf?id=SAQXbnvv4t", "openreview": "https://openreview.net/forum?id=SAQXbnvv4t", "poster": "/media/PosterPDFs/NeurIPS%202024/95123.png?t=1730112918.4739614", "project": "", "author_site": "Zifan Song, Yudong Wang, Wenwei Zhang, Kuikun Liu, Chengqi Lyu, Demin Song, Qipeng Guo, Hang Yan, Dahua Lin, Kai Chen, Cairong Zhao", "tldr": "", "abstract": "Open-source Large Language Models (LLMs) and their specialized variants, particularly Code LLMs, have recently delivered impressive performance. However, previous Code LLMs are typically fine-tuned on single-source data with limited quality and diversity, which may insufficiently elicit the potential of pre-trained Code LLMs. In this paper, we present AlchemistCoder, a series of Code LLMs with enhanced code generation and generalization capabilities fine-tuned on multi-source data. To achieve this, we pioneer to unveil inherent conflicts among the various styles and qualities in multi-source code corpora and introduce data-specific prompts with hindsight relabeling, termed AlchemistPrompts, to harmonize different data sources and instruction-response pairs. Additionally, we propose incorporating the data construction process into the fine-tuning data as code comprehension tasks, including instruction evolution, data filtering, and code review. Extensive experiments demonstrate that AlchemistCoder holds a clear lead among all models of the same size (6.7B/7B) and rivals or even surpasses larger models (15B/33B/70B), showcasing the efficacy of our method in refining instruction-following capabilities and advancing the boundaries of code intelligence. Source code and models are available at https://github.com/InternLM/AlchemistCoder.", "keywords": "Large Language Model;Code Generation;Instruction Fine-tuning;Hindsight Relabeling", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/5846a737f4844ad0197dc753b6c3ea4d6851dd96.zip", "author": "Zifan Song;Yudong Wang;Wenwei Zhang;Kuikun Liu;Chengqi Lyu;Demin Song;Qipeng Guo;Hang Yan;Dahua Lin;Kai Chen;Cairong Zhao", "authorids": "~Zifan_Song1;~Yudong_Wang3;~Wenwei_Zhang1;~Kuikun_Liu1;~Chengqi_Lyu1;~Demin_Song1;~Qipeng_Guo1;~Hang_Yan2;~Dahua_Lin1;~Kai_Chen4;~Cairong_Zhao2", "gender": "M;M;M;M;M;M;M;;M;M;", "homepage": "https://matcaviar.github.io/;;https://zhangwenwei.cn;;;https://github.com/dmsong-star;;;http://dahua.site;https://chenkai.site/;", "dblp": "307/3965;;;;319/5244;282/4437;172/1046;;53/6088;181/2839-26;", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;O9Tzr1EAAAAJ;QDXADSEAAAAJ;lcSm6RoAAAAJ;https://scholar.google.com/citations?view_op=list_works;KUI3yWYAAAAJ;k3mPGKgAAAAJ;;GMzzRRUAAAAJ;https://scholar.google.com.hk/citations?user=eGD0b7IAAAAJ;", "orcid": "0000-0001-8734-9878;;0000-0002-2748-4514;;;;;;;0000-0002-6820-2325;", "linkedin": ";;wenweizhang-b9769a124/;;;;;;;;", "or_profile": "~Zifan_Song1;~Yudong_Wang3;~Wenwei_Zhang1;~Kuikun_Liu1;~Chengqi_Lyu1;~Demin_Song1;~Qipeng_Guo1;~Hang_Yan2;~Dahua_Lin1;~Kai_Chen4;~Cairong_Zhao2", "aff": "Tongji University;Shanghai AI Laboratory;Shanghai AI Laboratory;Shanghai Artificial Intelligence Laboratory;Shanghai AI Laboratory;Shanghai AI Laboratory;Shanghai AI Laboratory;;The Chinese University of Hong Kong;Shanghai AI Laboratory;", "aff_domain": "tongji.edu.cn;pjlab.org.cn;pjlab.org.cn;pjlab.org.cn;pjlab.org.cn;pjlab.org.cn;pjlab.org.cn;;cuhk.edu.hk;pjlab.org.cn;", "position": "PhD student;Postdoc;Researcher;Researcher;Researcher;Researcher;Researcher;;Associate Professor;Researcher;", "bibtex": "@inproceedings{\nsong2024alchemistcoder,\ntitle={AlchemistCoder: Harmonizing and Eliciting Code Capability by Hindsight Tuning on Multi-source Data},\nauthor={Zifan Song and Yudong Wang and Wenwei Zhang and Kuikun Liu and Chengqi Lyu and Demin Song and Qipeng Guo and Hang Yan and Dahua Lin and Kai Chen and Cairong Zhao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=SAQXbnvv4t}\n}", "github": "", "reviewers": "FM86;yLAq;fYHy;q6UB", "pdf_size": 9707322, "rating": "6;6;6;6", "confidence": "3;3;3;3", "soundness": "3;3;4;3", "novelty": "3;3;2;2", "presentation": "2;4;3;3", "wc_summary": "75;114;128;54", "wc_strengths": "82;50;124;70", "wc_weaknesses": "156;64;78;172", "wc_questions": "103;22;57;76", "wc_limitations": "4;2;8;117", "wc_review": "420;252;395;489", "wc_reply_reviewers": "56;0;0;94", "wc_reply_authors": "35;64;64;647", "reply_reviewers": "1;0;0;2", "reply_authors": "2;2;2;4", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 92.75, 29.625791128677054 ], "wc_strengths_avg": [ 81.5, 27.069355367278327 ], "wc_weaknesses_avg": [ 117.5, 47.10360920354193 ], "wc_questions_avg": [ 64.5, 29.483045975611137 ], "wc_limitations_avg": [ 32.75, 48.68970630431036 ], "wc_review_avg": [ 389.0, 86.26412927746966 ], "wc_reply_reviewers_avg": [ 37.5, 39.834030677299026 ], "wc_reply_authors_avg": [ 202.5, 256.9051381346819 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14838555607759409711&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 4, "email": "tongji.edu.cn;pjlab.org.cn;pjlab.org.cn;pjlab.org.cn;pjlab.org.cn;pjlab.org.cn;pjlab.org.cn;;cuhk.edu.hk;pjlab.org.cn;", "author_num": 11, "aff_unique_index": "0;1;1;2;1;1;1;3;1", "aff_unique_norm": "Tongji University;Shanghai AI Laboratory;Shanghai Artificial Intelligence Laboratory;Chinese University of Hong Kong", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.tongji.edu.cn;https://www.shanghai-ai-lab.com;http://www.shailab.org/;https://www.cuhk.edu.hk", "aff_unique_abbr": "Tongji;SAIL;Shanghai AI Lab;CUHK", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "General bounds on the quality of Bayesian coresets", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95122", "id": "SAZeQV2PtT", "proceeding": "", "pdf": "https://openreview.net/pdf?id=SAZeQV2PtT", "openreview": "https://openreview.net/forum?id=SAZeQV2PtT", "poster": "/media/PosterPDFs/NeurIPS%202024/95122.png?t=1729783510.7441955", "project": "", "tldr": "", "abstract": "Bayesian coresets speed up posterior inference in the large-scale data regime by approximating the full-data log-likelihood function with a surrogate log-likelihood based on a small, weighted subset of the data. But while Bayesian coresets and methods for construction are applicable in a wide range of models, existing theoretical analysis of the posterior inferential error incurred by coreset approximations only apply in restrictive settings---i.e., exponential family models, or models with strong log-concavity and smoothness assumptions. This work presents general upper and lower bounds on the Kullback-Leibler (KL) divergence of coreset approximations that reflect the full range of applicability of Bayesian coresets. The lower bounds require only mild model assumptions typical of Bayesian asymptotic analyses, while the upper bounds require the log-likelihood functions to satisfy a generalized subexponentiality criterion that is weaker than conditions used in earlier work. The lower bounds are applied to obtain fundamental limitations on the quality of coreset approximations, and to provide a theoretical explanation for the previously-observed poor empirical performance of importance sampling-based construction methods. The upper bounds are used to analyze the performance of recent subsample-optimize methods. The flexibility of the theory is demonstrated in validation experiments involving multimodal, unidentifiable, heavy-tailed Bayesian posterior distributions.", "keywords": "Bayesian;coreset;Kullback Leibler divergence;error bounds", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Trevor Campbell", "authorids": "~Trevor_Campbell1", "gender": "M", "homepage": "https://trevorcampbell.me", "dblp": "130/3822", "google_scholar": "", "orcid": "", "linkedin": "", "or_profile": "~Trevor_Campbell1", "aff": "University of British Columbia", "aff_domain": "ubc.ca", "position": "Associate Professor", "bibtex": "@inproceedings{\ncampbell2024general,\ntitle={General bounds on the quality of Bayesian coresets},\nauthor={Trevor Campbell},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=SAZeQV2PtT}\n}", "github": "", "reviewers": "2Qah;EUq5;UnLw;jwKa;vJwU", "pdf_size": 767907, "rating": "4;7;7;7;8", "confidence": "4;3;2;3;3", "soundness": "2;4;4;4;4", "novelty": "2;3;4;3;4", "presentation": "2;4;4;3;3", "wc_summary": "51;84;81;68;117", "wc_strengths": "23;74;65;89;47", "wc_weaknesses": "47;34;70;220;281", "wc_questions": "32;39;32;2;222", "wc_limitations": "1;7;6;11;67", "wc_review": "154;238;254;390;734", "wc_reply_reviewers": "105;0;14;22;77", "wc_reply_authors": "169;0;0;0;0", "reply_reviewers": "1;0;1;1;1", "reply_authors": "2;1;1;1;1", "rating_avg": [ 6.6, 1.3564659966250536 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.6, 0.8000000000000002 ], "novelty_avg": [ 3.2, 0.7483314773547882 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 80.2, 21.775215268740745 ], "wc_strengths_avg": [ 59.6, 22.8 ], "wc_weaknesses_avg": [ 130.4, 100.60337966489992 ], "wc_questions_avg": [ 65.4, 79.33624644511485 ], "wc_limitations_avg": [ 18.4, 24.507957891264624 ], "wc_review_avg": [ 354.0, 204.52481512031738 ], "wc_reply_reviewers_avg": [ 43.6, 40.32170631310139 ], "wc_reply_authors_avg": [ 33.8, 67.6 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": -0.6993786061802353, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3366638156386489487&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "ubc.ca", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "University of British Columbia", "aff_unique_dep": "", "aff_unique_url": "https://www.ubc.ca", "aff_unique_abbr": "UBC", "aff_country_unique_index": "0", "aff_country_unique": "Canada" }, { "title": "Measuring Progress in Dictionary Learning for Language Model Interpretability with Board Game Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95121", "id": "SCEdoGghcw", "proceeding": "", "pdf": "https://openreview.net/pdf?id=SCEdoGghcw", "openreview": "https://openreview.net/forum?id=SCEdoGghcw", "poster": "/media/PosterPDFs/NeurIPS%202024/95121.png?t=1730259153.6686015", "project": "", "author_site": "Adam Karvonen, Benjamin Wright, Can Rager, Rico Angell, Jannik Brinkmann, Logan Smith, Claudio Mayrink Verdun, David Bau, Samuel Marks", "tldr": "", "abstract": "What latent features are encoded in language model (LM) representations? Recent work on training sparse autoencoders (SAEs) to disentangle interpretable features in LM representations has shown significant promise. However, evaluating the quality of these SAEs is difficult because we lack a ground-truth collection of interpretable features which we expect good SAEs to identify. We thus propose to measure progress in interpretable dictionary learning by working in the setting of LMs trained on Chess and Othello transcripts. These settings carry natural collections of interpretable features\u2014for example, \u201cthere is a knight on F3\u201d\u2014which we leverage into metrics for SAE quality. To guide progress in interpretable dictionary learning, we introduce a new SAE training technique, $p$-annealing, which demonstrates improved performance on our metric.", "keywords": "Language models;interpretability;dictionary learning", "primary_area": "interpretability_and_explainability", "supplementary_material": "/attachment/92d2a4d35f561e243df2ed6038b5c6b82c3f308b.zip", "author": "Adam Karvonen;Benjamin Wright;Can Rager;Rico Angell;Jannik Brinkmann;Logan Riggs Smith;Claudio Mayrink Verdun;David Bau;Samuel Marks", "authorids": "~Adam_Karvonen1;~Benjamin_Wright2;~Can_Rager1;~Rico_Angell1;~Jannik_Brinkmann1;~Logan_Riggs_Smith1;~Claudio_Mayrink_Verdun1;~David_Bau1;~Samuel_Marks1", "gender": ";;;M;M;;M;M;", "homepage": "https://adamkarvonen.github.io/;;;https://people.cs.umass.edu/~rangell/;https://jannik-brinkmann.github.io;https://www.spiedigitallibrary.org/profile/Logan.Smith-4229428;;https://baulab.info/;", "dblp": ";;;184/9716;354/6215;;;47/3614;", "google_scholar": ";;dzbnS2UAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;;lsOne4AAAAAJ;CYI6cKgAAAAJ;", "orcid": ";;;;;;;0000-0003-1744-6765;", "linkedin": "adam-karvonen/;benjamin-p-wright/;canrager/;;;;;david-bau-4b8130/;", "or_profile": "~Adam_Karvonen1;~Benjamin_Wright2;~Can_Rager1;~Rico_Angell1;~Jannik_Brinkmann1;~Logan_Riggs_Smith1;~Claudio_Mayrink_Verdun1;~David_Bau1;~Samuel_Marks1", "aff": "Galois;Massachusetts Institute of Technology;Universit\u00e4t Hamburg;University of Massachusetts Amherst;Northeastern University;;Harvard University;Northeastern University;Northeastern University", "aff_domain": "galois.com;mit.edu;uni-hamburg.de;cs.umass.edu;northeastern.edu;;harvard.edu;northeastern.edu;northeastern.edu", "position": "Researcher;Undergrad student;MS student;PhD student;Researcher;;Postdoc;Assistant Professor;Postdoc", "bibtex": "@inproceedings{\nkarvonen2024measuring,\ntitle={Measuring Progress in Dictionary Learning for Language Model Interpretability with Board Game Models},\nauthor={Adam Karvonen and Benjamin Wright and Can Rager and Rico Angell and Jannik Brinkmann and Logan Riggs Smith and Claudio Mayrink Verdun and David Bau and Samuel Marks},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=SCEdoGghcw}\n}", "github": "", "reviewers": "pBfq;R4sd;NNJv", "pdf_size": 3490992, "rating": "5;7;7", "confidence": "4;4;3", "soundness": "2;3;3", "novelty": "3;3;3", "presentation": "2;4;3", "wc_summary": "41;79;163", "wc_strengths": "92;117;124", "wc_weaknesses": "268;50;424", "wc_questions": "139;18;364", "wc_limitations": "29;13;1", "wc_review": "569;277;1076", "wc_reply_reviewers": "201;27;12", "wc_reply_authors": "287;0;0", "reply_reviewers": "2;1;1", "reply_authors": "2;1;1", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 94.33333333333333, 50.97275960964074 ], "wc_strengths_avg": [ 111.0, 13.73559851869101 ], "wc_weaknesses_avg": [ 247.33333333333334, 153.38260078060424 ], "wc_questions_avg": [ 173.66666666666666, 143.36511275605218 ], "wc_limitations_avg": [ 14.333333333333334, 11.469767022723502 ], "wc_review_avg": [ 640.6666666666666, 330.10335081943987 ], "wc_reply_reviewers_avg": [ 80.0, 85.77878525602937 ], "wc_reply_authors_avg": [ 95.66666666666667, 135.2930974670261 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.49999999999999983, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6031201829082872764&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 7, "email": "galois.com;mit.edu;uni-hamburg.de;cs.umass.edu;northeastern.edu;;harvard.edu;northeastern.edu;northeastern.edu", "author_num": 9, "aff_unique_index": "0;1;2;3;4;5;4;4", "aff_unique_norm": "Galois, Inc.;Massachusetts Institute of Technology;University of Hamburg;University of Massachusetts Amherst;Northeastern University;Harvard University", "aff_unique_dep": ";;;;;", "aff_unique_url": "https://galois.com;https://web.mit.edu;https://www.uni-hamburg.de;https://www.umass.edu;https://www.northeastern.edu;https://www.harvard.edu", "aff_unique_abbr": "Galois;MIT;UHH;UMass Amherst;NEU;Harvard", "aff_campus_unique_index": "1", "aff_campus_unique": ";Amherst", "aff_country_unique_index": "0;0;1;0;0;0;0;0", "aff_country_unique": "United States;Germany" }, { "title": "Typicalness-Aware Learning for Failure Detection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95120", "id": "SDWeIGPAh9", "proceeding": "", "pdf": "https://openreview.net/pdf?id=SDWeIGPAh9", "openreview": "https://openreview.net/forum?id=SDWeIGPAh9", "poster": "/media/PosterPDFs/NeurIPS%202024/95120.png?t=1730969428.5406783", "project": "", "author_site": "Yijun Liu, Jiequan Cui, Zhuotao Tian, Senqiao Yang, Qingdong He, Xiaoling Wang, Jingyong Su", "tldr": "", "abstract": "Deep neural networks (DNNs) often suffer from the overconfidence issue, where incorrect predictions are made with high confidence scores, hindering the applications in critical systems. In this paper, we propose a novel approach called Typicalness-Aware Learning (TAL) to address this issue and improve failure detection performance. \nWe observe that, with the cross-entropy loss, model predictions are optimized to align with the corresponding labels via increasing logit magnitude or refining logit direction. However, regarding atypical samples, the image content and their labels may exhibit disparities. This discrepancy can lead to overfitting on atypical samples, ultimately resulting in the overconfidence issue that we aim to address.\nTo address this issue, we have devised a metric that quantifies the typicalness of each sample, enabling the dynamic adjustment of the logit magnitude during the training process. By allowing relatively atypical samples to be adequately fitted while preserving reliable logit direction, the problem of overconfidence can be mitigated. TAL has been extensively evaluated on benchmark datasets, and the results demonstrate its superiority over existing failure detection methods. Specifically, TAL achieves a more than 5\\% improvement on CIFAR100 in terms of the Area Under the Risk-Coverage Curve (AURC) compared to the state-of-the-art. Code is available at https://github.com/liuyijungoon/TAL.", "keywords": "failure detection;overconfidence;typicalness-aware training;logit norm", "primary_area": "machine_vision", "supplementary_material": "", "author": "Yijun Liu;Jiequan Cui;Zhuotao Tian;Senqiao Yang;Qingdong He;wangxiaoling;Jingyong Su", "authorids": "~Yijun_Liu4;~Jiequan_Cui1;~Zhuotao_Tian1;~Senqiao_Yang1;~Qingdong_He1;~wangxiaoling1;~Jingyong_Su3", "gender": ";M;M;;M;;M", "homepage": ";https://jiequancui.github.io/;https://scholar.google.com/citations?user=mEjhz-IAAAAJ&hl=zh-CN;;;;", "dblp": ";259/5474;243/7181;;267/1653;;82/8615.html", "google_scholar": ";KbXLN2AAAAAJ;mEjhz-IAAAAJ;;gUJWww0AAAAJ;;T8YbHBwAAAAJ", "orcid": ";;;;;;", "linkedin": ";;;;;;", "or_profile": "~Yijun_Liu4;~Jiequan_Cui1;~Zhuotao_Tian1;~Senqiao_Yang1;~Qingdong_He1;~wangxiaoling1;~Jingyong_Su3", "aff": ";Nanyang Technological University;SmartMore;;Tencent Youtu Lab;;Harbin Institute of Technology", "aff_domain": ";ntu.edu.sg;smartmore.com;;tencent.com;;hit.edu.cn", "position": ";Research Fellow;Researcher;;Researcher;;Full Professor", "bibtex": "@inproceedings{\nliu2024typicalnessaware,\ntitle={Typicalness-Aware Learning for Failure Detection},\nauthor={Yijun Liu and Jiequan Cui and Zhuotao Tian and Senqiao Yang and Qingdong He and wangxiaoling and Jingyong Su},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=SDWeIGPAh9}\n}", "github": "", "reviewers": "hbz4;RLx5;yqWV;9uPn", "pdf_size": 0, "rating": "6;6;7;7", "confidence": "2;4;4;4", "soundness": "3;2;3;2", "novelty": "3;2;3;3", "presentation": "3;3;4;3", "wc_summary": "173;107;60;83", "wc_strengths": "64;71;87;87", "wc_weaknesses": "72;306;96;455", "wc_questions": "72;73;75;286", "wc_limitations": "35;1;1;2", "wc_review": "416;558;319;913", "wc_reply_reviewers": "95;49;109;615", "wc_reply_authors": "149;450;220;1359", "reply_reviewers": "1;1;2;4", "reply_authors": "2;3;3;5", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 105.75, 42.23372467590326 ], "wc_strengths_avg": [ 77.25, 10.059199769365355 ], "wc_weaknesses_avg": [ 232.25, 157.56010757802878 ], "wc_questions_avg": [ 126.5, 92.09370228196931 ], "wc_limitations_avg": [ 9.75, 14.58380951603524 ], "wc_review_avg": [ 551.5, 225.35582974487258 ], "wc_reply_reviewers_avg": [ 217.0, 230.85493280413135 ], "wc_reply_authors_avg": [ 544.5, 483.2341564914467 ], "reply_reviewers_avg": [ 2.0, 1.224744871391589 ], "reply_authors_avg": [ 3.25, 1.0897247358851685 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5474781530877854028&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": ";ntu.edu.sg;smartmore.com;;tencent.com;;hit.edu.cn", "author_num": 7, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Nanyang Technological University;SmartMore;Tencent;Harbin Institute of Technology", "aff_unique_dep": ";;Youtu Lab;", "aff_unique_url": "https://www.ntu.edu.sg;;https://www.tencent.com;http://www.hit.edu.cn/", "aff_unique_abbr": "NTU;;Tencent;HIT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Harbin", "aff_country_unique_index": "0;2;2", "aff_country_unique": "Singapore;;China" }, { "title": "Stepping on the Edge: Curvature Aware Learning Rate Tuners", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95119", "id": "SEflLHIhhJ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=SEflLHIhhJ", "openreview": "https://openreview.net/forum?id=SEflLHIhhJ", "poster": "", "project": "", "author_site": "Vincent Roulet, Atish Agarwala, Jean-Bastien Grill, Grzegorz Swirszcz, Mathieu Blondel, Fabian Pedregosa", "tldr": "", "abstract": "Curvature information -- particularly, the largest eigenvalue of the loss\nHessian, known as the sharpness -- often forms the basis for learning rate\ntuners. However, recent work has shown that the curvature information undergoes\ncomplex dynamics during training, going from a phase of increasing sharpness to\neventual stabilization. We analyze the closed-loop feedback effect between\nlearning rate tuning and curvature. We find that classical learning rate tuners\nmay yield greater one-step loss reduction, yet they ultimately underperform in\nthe long term when compared to constant learning rates in the full batch regime.\nThese models break the stabilization of the sharpness, which we explain using a\nsimplified model of the joint dynamics of the learning rate and the curvature.\nTo further investigate these effects, we introduce a new learning rate tuning\nmethod, Curvature Dynamics Aware Tuning (CDAT), which prioritizes long term\ncurvature stabilization over instantaneous progress on the objective. In the\nfull batch regime, CDAT shows behavior akin to prefixed warm-up schedules on deep\nlearning objectives, outperforming tuned constant learning rates. In the mini\nbatch regime, we observe that stochasticity introduces confounding effects that\nexplain the previous success of some learning rate tuners at appropriate batch\nsizes. Our findings highlight the critical role of understanding the joint\ndynamics of the learning rate and curvature, beyond greedy minimization, to\ndiagnose failures and design effective adaptive learning rate tuners.", "keywords": "deep learning optimization;learning rate tuner;progressive sharpening;edge of stability", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Vincent Roulet;Atish Agarwala;Jean-Bastien Grill;Grzegorz Michal Swirszcz;Mathieu Blondel;Fabian Pedregosa", "authorids": "~Vincent_Roulet1;~Atish_Agarwala1;~Jean-Bastien_Grill2;~Grzegorz_Michal_Swirszcz1;~Mathieu_Blondel1;~Fabian_Pedregosa1", "gender": "M;;M;M;;M", "homepage": "https://vroulet.github.io/;;https://dblp.uni-trier.de/pid/178/3291.html;https://sites.google.com/site/grzegorzswirszcz/;http://www.mblondel.org;http://fa.bianp.net", "dblp": "164/6165;265/6410.html;;54/6341;05/8614.html;11/9764", "google_scholar": "https://scholar.google.fr/citations?user=vwoZrVMAAAAJ;https://scholar.google.com/citations?hl=en;;;C0EKzrUAAAAJ;https://scholar.google.fr/citations?hl=en", "orcid": ";;;;;0000-0003-4025-3953", "linkedin": "vincentroulet/;;;;;http://www.linkedin.com/in/fabianpedregosa", "or_profile": "~Vincent_Roulet1;~Atish_Agarwala1;~Jean-Bastien_Grill2;~Grzegorz_Michal_Swirszcz1;~Mathieu_Blondel1;~Fabian_Pedregosa1", "aff": "Google;Google;Google DeepMind;GoogleDeepMind;Google;Google AI", "aff_domain": "google.com;google.com;deepmind.com;google.com;google.com;google.com", "position": "Researcher;Researcher;Researcher;Research Scientist;Research scientist;Research Scientist", "bibtex": "@inproceedings{\nroulet2024stepping,\ntitle={Stepping on the Edge: Curvature Aware Learning Rate Tuners},\nauthor={Vincent Roulet and Atish Agarwala and Jean-Bastien Grill and Grzegorz Michal Swirszcz and Mathieu Blondel and Fabian Pedregosa},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=SEflLHIhhJ}\n}", "github": "", "reviewers": "jCtW;cxBm;7YAk;J393", "pdf_size": 1726361, "rating": "5;6;6;7", "confidence": "3;4;4;4", "soundness": "2;3;3;4", "novelty": "2;3;3;3", "presentation": "2;3;3;4", "wc_summary": "50;274;82;186", "wc_strengths": "56;32;21;238", "wc_weaknesses": "241;71;244;258", "wc_questions": "155;49;27;202", "wc_limitations": "8;1;1;81", "wc_review": "510;427;375;965", "wc_reply_reviewers": "300;44;291;174", "wc_reply_authors": "464;0;814;0", "reply_reviewers": "1;1;2;1", "reply_authors": "2;1;2;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 148.0, 88.43076387773658 ], "wc_strengths_avg": [ 86.75, 88.23654288332017 ], "wc_weaknesses_avg": [ 203.5, 76.76750614680667 ], "wc_questions_avg": [ 108.25, 72.60638746005753 ], "wc_limitations_avg": [ 22.75, 33.751851801049376 ], "wc_review_avg": [ 569.25, 233.50414878541238 ], "wc_reply_reviewers_avg": [ 202.25, 104.01051629522853 ], "wc_reply_authors_avg": [ 319.5, 342.6262541020463 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12846368083403352582&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "google.com;google.com;deepmind.com;google.com;google.com;google.com", "author_num": 6, "aff_unique_index": "0;0;0;1;0;0", "aff_unique_norm": "Google;DeepMind", "aff_unique_dep": "Google;DeepMind", "aff_unique_url": "https://www.google.com;https://deepmind.com", "aff_unique_abbr": "Google;DeepMind", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;1;1;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Alleviating Hallucinations in Large Vision-Language Models through Hallucination-Induced Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95118", "id": "SF2GlFhVsS", "proceeding": "", "pdf": "https://openreview.net/pdf?id=SF2GlFhVsS", "openreview": "https://openreview.net/forum?id=SF2GlFhVsS", "poster": "", "project": "", "author_site": "Beitao Chen, Xinyu Lyu, Lianli Gao, Hengtao Shen, Jingkuan Song", "tldr": "", "abstract": "Although Large Visual Language Models (LVLMs) have demonstrated exceptional abilities in understanding multimodal data, they invariably suffer from hallucinations, leading to a disconnection between the generated text and the corresponding images. Almost all current visual contrastive decoding methods attempt to mitigate these hallucinations by introducing visual uncertainty information that appropriately widens the contrastive logits gap between hallucinatory and targeted ones.\n However, due to uncontrollable nature of the global visual uncertainty, they struggle to precisely induce the hallucinatory tokens, which severely limits their effectiveness in mitigating hallucinations and may even lead to the generation of undesired hallucinations.\n To tackle this issue, we conducted the theoretical analysis to promote the effectiveness of contrast decoding. Building on this insight, we introduce a novel optimization strategy named Hallucination-Induced Optimization (HIO). This strategy seeks to amplify the contrast between hallucinatory and targeted tokens relying on a fine-tuned theoretical preference model (i.e., Contrary Bradley-Terry Model), thereby facilitating efficient contrast decoding to alleviate hallucinations in LVLMs.\n Extensive experimental research demonstrates that our HIO strategy can effectively reduce hallucinations in LVLMs, outperforming state-of-the-art methods across various benchmarks.", "keywords": "Hallucination;Contrast Decoding", "primary_area": "optimization_for_deep_networks", "supplementary_material": "/attachment/5f797c3188c41c3728f859eb95ef2ef02f15988d.zip", "author": "Xinyu Lyu;Beitao Chen;Lianli Gao;Heng Tao Shen;Jingkuan Song", "authorids": "~Xinyu_Lyu1;~Beitao_Chen1;~Lianli_Gao1;~Heng_Tao_Shen3;~Jingkuan_Song3", "gender": "M;M;F;M;M", "homepage": "https://xinyulyu.github.io/;https://www.csdn.net/;https://lianligao.github.io/;https://cfm.uestc.edu.cn/~songjingkuan/;https://cfm.uestc.edu.cn/~shenht/", "dblp": "198/6153;;123/9849.html;70/10575;s/HTShen", "google_scholar": "https://scholar.google.com/citations?hl=en;;https://scholar.google.com.au/citations?user=zsm2dpYAAAAJ;F5Zy9V4AAAAJ;https://scholar.google.com.au/citations?user=krryaDkAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Xinyu_Lyu1;~Beitao_Chen1;~Lianli_Gao1;~Jingkuan_Song3;~Hengtao_Shen1", "aff": "Southwest University of Finance and Economics;University of Electronic Science and Technology of China;University of Electronic Science and Technology of China;University of Electronic Science and Technology of China,;Tongji University", "aff_domain": "swufe.edu.cn;uestc.edu.cn;uestc.edu.cn;uestc.edu.cn;tongji.edu.cn", "position": "Associate Professor;MS student;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nchen2024alleviating,\ntitle={Alleviating Hallucinations in Large Vision-Language Models through Hallucination-Induced Optimization},\nauthor={Beitao Chen and Xinyu Lyu and Lianli Gao and Heng Tao Shen and Jingkuan Song},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=SF2GlFhVsS}\n}", "github": "", "reviewers": "RdjK;6ngE;DdM1;pE7J", "pdf_size": 1465599, "rating": "4;6;6;7", "confidence": "5;4;4;4", "soundness": "3;3;2;3", "novelty": "2;2;2;3", "presentation": "3;3;1;3", "wc_summary": "55;128;56;75", "wc_strengths": "18;52;16;40", "wc_weaknesses": "73;169;269;154", "wc_questions": "4;62;25;85", "wc_limitations": "1;8;12;59", "wc_review": "151;419;378;413", "wc_reply_reviewers": "0;24;0;21", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 78.5, 29.669007398293594 ], "wc_strengths_avg": [ 31.5, 15.124483462254174 ], "wc_weaknesses_avg": [ 166.25, 69.66123383920213 ], "wc_questions_avg": [ 44.0, 31.488092987667578 ], "wc_limitations_avg": [ 20.0, 22.85825890132492 ], "wc_review_avg": [ 340.25, 110.3797422537306 ], "wc_reply_reviewers_avg": [ 11.25, 11.299889379989523 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.9271726499455306, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8604771668682789638&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "swufe.edu.cn;uestc.edu.cn;uestc.edu.cn;uestc.edu.cn;tongji.edu.cn", "author_num": 5, "aff_unique_index": "0;1;1;1;2", "aff_unique_norm": "Southwest University of Finance and Economics;University of Electronic Science and Technology of China;Tongji University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.swufe.edu.cn;https://www.uestc.edu.cn;https://www.tongji.edu.cn", "aff_unique_abbr": "SWUFE;UESTC;Tongji", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "D\u00e9j\u00e0 Vu Memorization in Vision\u2013Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95117", "id": "SFCZdXDyNs", "proceeding": "", "pdf": "https://openreview.net/pdf?id=SFCZdXDyNs", "openreview": "https://openreview.net/forum?id=SFCZdXDyNs", "poster": "", "project": "", "author_site": "Bargav Jayaraman, Chuan Guo, Kamalika Chaudhuri", "tldr": "", "abstract": "Vision-Language Models (VLMs) have emerged as the state-of-the-art representation learning solution, with myriads of downstream applications such as image classification, retrieval and generation. A natural question is whether these models memorize their training data, which also has implications for generalization. We propose a new method for measuring memorization in VLMs, which we call d\u00e8j\u00e1 vu memorization. For VLMs trained on image-caption pairs, we show that the model indeed retains information about individual objects in the training images beyond what can be inferred from correlations or the image caption. We evaluate d\u00e8j\u00e1 vu memorization at both sample and population level, and show that it is significant for OpenCLIP trained on as many as 50M image-caption pairs. Finally, we show that text randomization considerably mitigates memorization risk while only moderately impacting the model\u2019s downstream task performance. The code is available here: https://github.com/facebookresearch/VLMDejaVu.", "keywords": "memorization;multimodal;CLIP;vision-language models", "primary_area": "privacy", "supplementary_material": "", "author": "Bargav Jayaraman;Chuan Guo;Kamalika Chaudhuri", "authorids": "~Bargav_Jayaraman1;~Chuan_Guo1;~Kamalika_Chaudhuri1", "gender": "M;M;F", "homepage": "https://bargavjayaraman.github.io;https://sites.google.com/view/chuanguo;http://cseweb.ucsd.edu/users/kamalika", "dblp": "165/8210;;56/6435", "google_scholar": "https://scholar.google.co.in/citations?user=gL3ZkpEAAAAJ;0gp5M-kAAAAJ;I-DJ7EsAAAAJ", "orcid": ";;", "linkedin": "bargav-jayaraman/;;", "or_profile": "~Bargav_Jayaraman1;~Chuan_Guo1;~Kamalika_Chaudhuri1", "aff": "Meta;Meta;University of California, San Diego", "aff_domain": "meta.com;meta.com;ucsd.edu", "position": "Postdoc;Researcher;Associate Professor", "bibtex": "@inproceedings{\njayaraman2024dj,\ntitle={D\\'ej\\`a Vu Memorization in Vision{\\textendash}Language Models},\nauthor={Bargav Jayaraman and Chuan Guo and Kamalika Chaudhuri},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=SFCZdXDyNs}\n}", "github": "", "reviewers": "7Gny;vpe4;GQcH;aCTU", "pdf_size": 4028021, "rating": "4;6;6;7", "confidence": "4;3;2;5", "soundness": "2;2;3;3", "novelty": "2;3;3;4", "presentation": "4;2;3;4", "wc_summary": "61;105;40;117", "wc_strengths": "34;57;52;105", "wc_weaknesses": "449;150;3;71", "wc_questions": "66;183;51;77", "wc_limitations": "2;6;9;132", "wc_review": "612;501;155;502", "wc_reply_reviewers": "259;39;12;12", "wc_reply_authors": "447;21;34;22", "reply_reviewers": "2;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 80.75, 31.43544973433655 ], "wc_strengths_avg": [ 62.0, 26.258332011001766 ], "wc_weaknesses_avg": [ 168.25, 170.23421365871198 ], "wc_questions_avg": [ 94.25, 52.06426317542581 ], "wc_limitations_avg": [ 37.25, 54.76027300881543 ], "wc_review_avg": [ 442.5, 172.00944741496033 ], "wc_reply_reviewers_avg": [ 80.5, 103.64482620951226 ], "wc_reply_authors_avg": [ 131.0, 182.51438299487523 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.10259783520851541, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4654905791815723988&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 3, "email": "meta.com;meta.com;ucsd.edu", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Meta;University of California, San Diego", "aff_unique_dep": "Meta Platforms, Inc.;", "aff_unique_url": "https://meta.com;https://www.ucsd.edu", "aff_unique_abbr": "Meta;UCSD", "aff_campus_unique_index": "1", "aff_campus_unique": ";San Diego", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "The Closeness of In-Context Learning and Weight Shifting for Softmax Regression", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95116", "id": "SFaEENfEyw", "proceeding": "", "pdf": "https://openreview.net/pdf?id=SFaEENfEyw", "openreview": "https://openreview.net/forum?id=SFaEENfEyw", "poster": "", "project": "", "author_site": "Shuai Li, Zhao Song, Yu Xia, Tong Yu, Tianyi Zhou", "tldr": "", "abstract": "Large language models (LLMs) are known for their exceptional performance in natural language processing, making them highly effective in many human life-related tasks. The attention mechanism in the Transformer architecture is a critical component of LLMs, as it allows the model to selectively focus on specific input parts. The softmax unit, which is a key part of the attention mechanism, normalizes the attention scores. Hence, the performance of LLMs in various NLP tasks depends significantly on the crucial role played by the attention mechanism with the softmax unit.\n\nIn-context learning is one of the celebrated abilities of recent LLMs. \nWithout further parameter updates, Transformers can learn to predict based on few in-context examples. \nHowever, the reason why Transformers becomes in-context learners is not well understood.\nRecently, in-context learning has been studied from a mathematical perspective with simplified linear self-attention without softmax unit. \nBased on a linear regression formulation $\\min_x\\| Ax - b \\|_2$, existing works show linear Transformers' capability of learning linear functions in context. The capability of Transformers with softmax unit approaching full Transformers, however, remains unexplored.\n\nIn this work, we study the in-context learning based on a softmax regression formulation $\\min_{x} \\| \\langle \\exp(Ax), {\\bf 1}_n \\rangle^{-1} \\exp(Ax) - b \\|_2$. We show the upper bounds of the data transformations induced by a single self-attention layer with softmax unit and by gradient-descent on a $\\ell_2$ regression loss for softmax prediction function.\nOur theoretical results imply that when training self-attention-only Transformers for fundamental regression tasks, the models learned by gradient-descent and Transformers show great similarity.", "keywords": "in-context learning", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Shuai Li;Zhao Song;Yu Xia;Tong Yu;Tianyi Zhou", "authorids": "~Shuai_Li3;~Zhao_Song3;~Yu_Xia9;~Tong_Yu3;~Tianyi_Zhou4", "gender": "F;M;M;;", "homepage": "http://shuaili8.github.io;https://www.youtube.com/@zhaosong2031;https://andree-9.github.io/;https://www.linkedin.com/in/tong-yu-42790744;", "dblp": "57/2281-10;76/4051-2;28/4326-7;32/1593-1;", "google_scholar": "https://scholar.google.com.hk/citations?user=kMZgQxcAAAAJ;yDZct7UAAAAJ;sTVqEUMAAAAJ;https://scholar.google.com/citations?hl=en;", "orcid": ";;;0000-0002-5991-2050;", "linkedin": ";;;tong-yu-42790744;", "or_profile": "~Shuai_Li3;~Zhao_Song3;~Yu_Xia9;~Tong_Yu3;~Tianyi_Zhou4", "aff": "John Hopcroft Center, Shanghai Jiao Tong University;Adobe;University of Michigan;Adobe Research;", "aff_domain": "sjtu.edu.cn;adobe.com;umich.edu;adobe.com;", "position": "Assistant Professor;Researcher;MS student;Senior Research Scientist;", "bibtex": "@inproceedings{\nli2024the,\ntitle={The Closeness of In-Context Learning and Weight Shifting for Softmax Regression},\nauthor={Shuai Li and Zhao Song and Yu Xia and Tong Yu and Tianyi Zhou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=SFaEENfEyw}\n}", "github": "", "reviewers": "K6fK;LX6R;EkHX", "pdf_size": 617949, "rating": "5;5;8", "confidence": "2;3;2", "soundness": "2;2;3", "novelty": "2;2;3", "presentation": "2;2;3", "wc_summary": "118;67;112", "wc_strengths": "62;25;124", "wc_weaknesses": "159;102;61", "wc_questions": "1;2;4", "wc_limitations": "6;6;4", "wc_review": "346;202;305", "wc_reply_reviewers": "36;64;0", "wc_reply_authors": "122;113;0", "reply_reviewers": "1;1;0", "reply_authors": "2;2;1", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 2.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 99.0, 22.759613353482084 ], "wc_strengths_avg": [ 70.33333333333333, 40.84387618997764 ], "wc_weaknesses_avg": [ 107.33333333333333, 40.18568014714805 ], "wc_questions_avg": [ 2.3333333333333335, 1.247219128924647 ], "wc_limitations_avg": [ 5.333333333333333, 0.9428090415820634 ], "wc_review_avg": [ 284.3333333333333, 60.576856600153455 ], "wc_reply_reviewers_avg": [ 33.333333333333336, 26.195843605851334 ], "wc_reply_authors_avg": [ 78.33333333333333, 55.51176051572816 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 43, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1635458034096028391&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "sjtu.edu.cn;adobe.com;umich.edu;adobe.com;", "author_num": 5, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "Shanghai Jiao Tong University;Adobe;University of Michigan", "aff_unique_dep": "John Hopcroft Center;Adobe Inc.;", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.adobe.com;https://www.umich.edu", "aff_unique_abbr": "SJTU;Adobe;UM", "aff_campus_unique_index": "0", "aff_campus_unique": "Shanghai;", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "China;United States" }, { "title": "4Diffusion: Multi-view Video Diffusion Model for 4D Generation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95115", "id": "SFk7AMpyhx", "proceeding": "", "pdf": "https://openreview.net/pdf?id=SFk7AMpyhx", "openreview": "https://openreview.net/forum?id=SFk7AMpyhx", "poster": "/media/PosterPDFs/NeurIPS%202024/95115.png?t=1731401223.9327457", "project": "", "author_site": "Haiyu Zhang, Xinyuan Chen, Yaohui WANG, Xihui Liu, Yunhong Wang, Yu Qiao", "tldr": "", "abstract": "Current 4D generation methods have achieved noteworthy efficacy with the aid of advanced diffusion generative models. However, these methods lack multi-view spatial-temporal modeling and encounter challenges in integrating diverse prior knowledge from multiple diffusion models, resulting in inconsistent temporal appearance and flickers. In this paper, we propose a novel 4D generation pipeline, namely $\\textbf{4Diffusion}$, aimed at generating spatial-temporally consistent 4D content from a monocular video. We first design a unified diffusion model tailored for multi-view video generation by incorporating a learnable motion module into a frozen 3D-aware diffusion model to capture multi-view spatial-temporal correlations. After training on a curated dataset, our diffusion model acquires reasonable temporal consistency and inherently preserves the generalizability and spatial consistency of the 3D-aware diffusion model. Subsequently, we propose 4D-aware Score Distillation Sampling loss, which is based on our multi-view video diffusion model, to optimize 4D representation parameterized by dynamic NeRF. This aims to eliminate discrepancies arising from multiple diffusion models, allowing for generating spatial-temporally consistent 4D content. Moreover, we devise an anchor loss to enhance the appearance details and facilitate the learning of dynamic NeRF. Extensive qualitative and quantitative experiments demonstrate that our method achieves superior performance compared to previous methods.", "keywords": "Diffusion Model;4D Generation;NeRF", "primary_area": "generative_models", "supplementary_material": "/attachment/93d614b0ed825bfaa519b19418cdb02188b26762.zip", "author": "Haiyu Zhang;Xinyuan Chen;Yaohui Wang;Xihui Liu;Yunhong Wang;Yu Qiao", "authorids": "~Haiyu_Zhang1;~Xinyuan_Chen1;~Yaohui_Wang1;~Xihui_Liu1;~Yunhong_Wang1;~Yu_Qiao1", "gender": "M;F;M;F;;", "homepage": "https://github.com/aejion;;https://wyhsirius.github.io/;https://xh-liu.github.io/;;", "dblp": ";;168/6263-1.html;184/3911;;", "google_scholar": "9B1t_zIAAAAJ;3fWSC8YAAAAJ;R7LyAb4AAAAJ;https://scholar.google.com.hk/citations?user=4YL23GMAAAAJ;;", "orcid": ";0000-0002-5517-7255;;0000-0003-1831-9952;;", "linkedin": ";;;;;", "or_profile": "~Haiyu_Zhang1;~Xinyuan_Chen1;~Yaohui_Wang1;~Xihui_Liu1;~Yunhong_Wang1;~Yu_Qiao1", "aff": "Shanghai Artificial Intelligence Laboratory;Shanghai Artificial Intelligence Laboratory;Shanghai AI Laboratory;University of Hong Kong;;", "aff_domain": "pjlab.org.cn;pjlab.org.cn;pjlab.org.cn;hku.hk;;", "position": "Intern;Research Scientist;Research Scientist;Assistant Professor;;", "bibtex": "@inproceedings{\nzhang2024diffusion,\ntitle={4Diffusion: Multi-view Video Diffusion Model for 4D Generation},\nauthor={Haiyu Zhang and Xinyuan Chen and Yaohui Wang and Xihui Liu and Yunhong Wang and Yu Qiao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=SFk7AMpyhx}\n}", "github": "", "reviewers": "Y3vx;MfEx;4Nq2;ioTR", "pdf_size": 24836585, "rating": "4;5;5;5", "confidence": "4;5;5;5", "soundness": "2;3;2;3", "novelty": "3;2;3;2", "presentation": "2;3;2;3", "wc_summary": "150;74;36;83", "wc_strengths": "91;24;29;38", "wc_weaknesses": "183;68;158;207", "wc_questions": "42;36;72;6", "wc_limitations": "4;24;4;12", "wc_review": "470;226;299;346", "wc_reply_reviewers": "49;0;0;41", "wc_reply_authors": "133;0;0;15", "reply_reviewers": "1;0;0;1", "reply_authors": "2;1;1;2", "rating_avg": [ 4.75, 0.4330127018922193 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 85.75, 41.07538800790566 ], "wc_strengths_avg": [ 45.5, 26.744158240632665 ], "wc_weaknesses_avg": [ 154.0, 52.58802144975603 ], "wc_questions_avg": [ 39.0, 23.430749027719962 ], "wc_limitations_avg": [ 11.0, 8.18535277187245 ], "wc_review_avg": [ 335.25, 88.77323639476033 ], "wc_reply_reviewers_avg": [ 22.5, 22.677080940897135 ], "wc_reply_authors_avg": [ 37.0, 55.76289088632332 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17301233423891415487&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 5, "email": "pjlab.org.cn;pjlab.org.cn;pjlab.org.cn;hku.hk;;", "author_num": 6, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Shanghai Artificial Intelligence Laboratory;Shanghai AI Laboratory;University of Hong Kong", "aff_unique_dep": ";;", "aff_unique_url": "http://www.shailab.org/;https://www.shanghai-ai-lab.com;https://www.hku.hk", "aff_unique_abbr": "Shanghai AI Lab;SAIL;HKU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Scalable Optimization in the Modular Norm", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95114", "id": "SFxAjB7UXx", "proceeding": "", "pdf": "https://openreview.net/pdf?id=SFxAjB7UXx", "openreview": "https://openreview.net/forum?id=SFxAjB7UXx", "poster": "", "project": "", "author_site": "Tim Large, Yang Liu, Jacob Huh, Hyojin Bahng, Phillip Isola, Jeremy Bernstein", "tldr": "", "abstract": "To improve performance in contemporary deep learning, one is interested in scaling up the neural network in terms of both the number and the size of the layers. When ramping up the width of a single layer, graceful scaling of training has been linked to the need to normalize the weights and their updates in the \"natural norm\" particular to that layer. In this paper, we significantly generalize this idea by defining the modular norm, which is the natural norm on the full weight space of any neural network architecture. The modular norm is defined recursively in tandem with the network architecture itself. We show that the modular norm has several promising applications. On the practical side, the modular norm can be used to normalize the updates of any base optimizer so that the learning rate becomes transferable across width and depth. This means that the user does not need to compute optimizer-specific scale factors in order to scale training. On the theoretical side, we show that for any neural network built from \"well-behaved\" atomic modules, the gradient of the network is Lipschitz-continuous in the modular norm, with the Lipschitz constant admitting a simple recursive formula. This characterization opens the door to porting standard ideas in optimization theory over to deep learning. We have created a Python package called Modula that automatically normalizes weight updates in the modular norm of the architecture. Both the Modula package and code for our experiments are provided in the supplementary material.", "keywords": "scalable;optimization;modular;norm;normalization;modula;hyperparameter;transfer;architecture;aware;operator;module;tree", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Tim Large;Yang Liu;Minyoung Huh;Hyojin Bahng;Phillip Isola;Jeremy Bernstein", "authorids": "~Tim_Large1;~Yang_Liu40;~Minyoung_Huh1;~Hyojin_Bahng1;~Phillip_Isola1;~Jeremy_Bernstein1", "gender": "M;M;M;F;M;M", "homepage": "https://www.simonsfoundation.org/people/timothy-large/;;https://people.csail.mit.edu/minhuh/;https://hjbahng.github.io/;http://web.mit.edu/phillipi/;https://jeremybernste.in", "dblp": ";;220/3360;218/5470;36/9988;215/3638", "google_scholar": ";nVWQwHkAAAAJ;2k18_1IAAAAJ;qI51abwAAAAJ;ROILf3EAAAAJ;", "orcid": ";;;;0000-0002-1411-6704;", "linkedin": ";yang-liu-24b98a33/;;;phillip-isola-a9955b20/;", "or_profile": "~Tim_Large1;~Yang_Liu40;~Minyoung_Huh1;~Hyojin_Bahng1;~Phillip_Isola1;~Jeremy_Bernstein1", "aff": "Columbia University;Lawrence Livermore National Laboratory;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "columbia.edu;llnl.gov;mit.edu;mit.edu;mit.edu;mit.edu", "position": "Postdoc;Postdoc;PhD student;PhD student;Associate Professor;Postdoc", "bibtex": "@inproceedings{\nlarge2024scalable,\ntitle={Scalable Optimization in the Modular Norm},\nauthor={Tim Large and Yang Liu and Minyoung Huh and Hyojin Bahng and Phillip Isola and Jeremy Bernstein},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=SFxAjB7UXx}\n}", "github": "", "reviewers": "3bTa;U3gK;qUST;FbvX", "pdf_size": 1652868, "rating": "3;5;7;8", "confidence": "4;4;5;3", "soundness": "2;3;3;4", "novelty": "2;3;4;4", "presentation": "2;2;3;3", "wc_summary": "60;59;83;64", "wc_strengths": "101;47;62;115", "wc_weaknesses": "230;31;294;582", "wc_questions": "12;50;213;224", "wc_limitations": "10;5;3;67", "wc_review": "413;192;655;1052", "wc_reply_reviewers": "457;59;60;440", "wc_reply_authors": "393;39;28;409", "reply_reviewers": "2;1;1;2", "reply_authors": "3;3;2;4", "rating_avg": [ 5.75, 1.920286436967152 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 66.5, 9.7082439194738 ], "wc_strengths_avg": [ 81.25, 27.716195626384224 ], "wc_weaknesses_avg": [ 284.25, 197.37575205683194 ], "wc_questions_avg": [ 124.75, 94.78759148749376 ], "wc_limitations_avg": [ 21.25, 26.536531423680827 ], "wc_review_avg": [ 578.0, 318.91456536194767 ], "wc_reply_reviewers_avg": [ 254.0, 194.59316534760413 ], "wc_reply_authors_avg": [ 217.25, 183.87818658013788 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 3.0, 0.7071067811865476 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.18411492357966466, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3896168910127627460&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "columbia.edu;llnl.gov;mit.edu;mit.edu;mit.edu;mit.edu", "author_num": 6, "aff_unique_index": "0;1;2;2;2;2", "aff_unique_norm": "Columbia University;Lawrence Livermore National Laboratory;Massachusetts Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.columbia.edu;https://www.llnl.gov;https://web.mit.edu", "aff_unique_abbr": "Columbia;LLNL;MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Parameter-free Clipped Gradient Descent Meets Polyak", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95113", "id": "SGcnphYOeq", "proceeding": "", "pdf": "https://openreview.net/pdf?id=SGcnphYOeq", "openreview": "https://openreview.net/forum?id=SGcnphYOeq", "poster": "", "project": "", "author_site": "Yuki Takezawa, Han Bao, Ryoma Sato, Kenta Niwa, Makoto Yamada", "tldr": "", "abstract": "Gradient descent and its variants are de facto standard algorithms for training machine learning models. As gradient descent is sensitive to its hyperparameters, we need to tune the hyperparameters carefully using a grid search. However, the method is time-consuming, particularly when multiple hyperparameters exist. Therefore, recent studies have analyzed parameter-free methods that adjust the hyperparameters on the fly. However, the existing work is limited to investigations of parameter-free methods for the stepsize, and parameter-free methods for other hyperparameters have not been explored. For instance, although the gradient clipping threshold is a crucial hyperparameter in addition to the stepsize for preventing gradient explosion issues, none of the existing studies have investigated parameter-free methods for clipped gradient descent. Therefore, in this study, we investigate the parameter-free methods for clipped gradient descent. Specifically, we propose Inexact Polyak Stepsize, which converges to the optimal solution without any hyperparameters tuning, and its convergence rate is asymptotically independent of $L$ under $L$-smooth and $(L_0, L_1)$-smooth assumptions of the loss function, similar to that of clipped gradient descent with well-tuned hyperparameters. We numerically validated our convergence results using a synthetic function and demonstrated the effectiveness of our proposed methods using LSTM, Nano-GPT, and T5.", "keywords": "Polyak stepsize;clipped gradient descent;generalized smoothness", "primary_area": "optimization", "supplementary_material": "/attachment/a40654bba98e7dadf1c78a0c191c7e75e73aa265.zip", "author": "Yuki Takezawa;Han Bao;Ryoma Sato;Kenta Niwa;Makoto Yamada", "authorids": "~Yuki_Takezawa1;~Han_Bao2;~Ryoma_Sato1;~Kenta_Niwa1;~Makoto_Yamada3", "gender": "M;M;M;M;M", "homepage": "https://yukitakezawa.github.io/;https://hermite.jp/;https://joisino.net/en/;http://www.kecl.ntt.co.jp/icl/ls/members/niwa/index.html;https://groups.oist.jp/mlds", "dblp": "284/1294;120/1444-2;227/2014;64/1008.html;56/4937", "google_scholar": "eaKQb8IAAAAJ;MqMzjeMAAAAJ;https://scholar.google.co.jp/citations?user=S4kMic4AAAAJ;Btla06EAAAAJ;1cKNu1gAAAAJ", "orcid": "0000-0002-8532-2775;0000-0002-4473-2604;;0000-0002-6911-0238;", "linkedin": ";;;;", "or_profile": "~Yuki_Takezawa1;~Han_Bao2;~Ryoma_Sato1;~Kenta_Niwa1;~Makoto_Yamada3", "aff": "Kyoto University;Kyoto University, Kyoto University;Kyoto University;NTT Corporation;Okinawa Institute of Science and Technology (OIST)", "aff_domain": "kyoto-u.ac.jp;i.kyoto-u.ac.jp;kyoto-u.ac.jp;ntt.co.jp;oist.jp", "position": "PhD student;Assistant Professor;PhD student;Researcher;Associate Professor", "bibtex": "@inproceedings{\ntakezawa2024parameterfree,\ntitle={Parameter-free Clipped Gradient Descent Meets Polyak},\nauthor={Yuki Takezawa and Han Bao and Ryoma Sato and Kenta Niwa and Makoto Yamada},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=SGcnphYOeq}\n}", "github": "", "reviewers": "bj1n;zAFh;JWAj;rinC", "pdf_size": 803500, "rating": "3;5;7;7", "confidence": "4;4;5;4", "soundness": "2;3;4;3", "novelty": "2;2;3;3", "presentation": "2;2;3;4", "wc_summary": "34;70;156;46", "wc_strengths": "21;73;19;26", "wc_weaknesses": "224;228;35;14", "wc_questions": "57;2;1;169", "wc_limitations": "2;76;2;29", "wc_review": "338;449;213;284", "wc_reply_reviewers": "162;250;0;71", "wc_reply_authors": "579;377;0;85", "reply_reviewers": "2;1;0;2", "reply_authors": "3;2;1;2", "rating_avg": [ 5.5, 1.6583123951777 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 76.5, 47.69433928675394 ], "wc_strengths_avg": [ 34.75, 22.230328382639787 ], "wc_weaknesses_avg": [ 125.25, 101.0331010115002 ], "wc_questions_avg": [ 57.25, 68.38265496454493 ], "wc_limitations_avg": [ 27.25, 30.22726418318403 ], "wc_review_avg": [ 321.0, 86.17714314132257 ], "wc_reply_reviewers_avg": [ 120.75, 94.15777981664606 ], "wc_reply_authors_avg": [ 260.25, 231.12483098966237 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=136000377305383497&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "kyoto-u.ac.jp;i.kyoto-u.ac.jp;kyoto-u.ac.jp;ntt.co.jp;oist.jp", "author_num": 5, "aff_unique_index": "0;0;0;1;2", "aff_unique_norm": "Kyoto University;NTT Corporation;Okinawa Institute of Science and Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.kyoto-u.ac.jp;https://www.ntt.co.jp;https://www.oist.jp", "aff_unique_abbr": "Kyoto U;NTT;OIST", "aff_campus_unique_index": "1", "aff_campus_unique": ";Kyoto", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Japan" }, { "title": "SpeechAlign: Aligning Speech Generation to Human Preferences", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95112", "id": "SKCbZR8Pyd", "proceeding": "", "pdf": "https://openreview.net/pdf?id=SKCbZR8Pyd", "openreview": "https://openreview.net/forum?id=SKCbZR8Pyd", "poster": "", "project": "", "author_site": "Dong Zhang, Zhaowei Li, Shimin Li, Xin Zhang, Pengyu Wang, Yaqian Zhou, Xipeng Qiu", "tldr": "", "abstract": "Speech language models have significantly advanced in generating realistic speech, with neural codec language models standing out. However, the integration of preference optimization to align speech outputs to human preferences is often neglected. This paper addresses this gap by first analyzing the distribution gap in codec language models, highlighting how it leads to discrepancies between the training and inference phases, which negatively affects performance. Then we explore leveraging preference optimization to bridge the distribution gap. We introduce SpeechAlign, an iterative self-improvement strategy that aligns speech language models to human preferences. SpeechAlign involves constructing a preference codec dataset contrasting golden codec tokens against synthetic tokens, followed by preference optimization to improve the codec language model. This cycle of improvement is carried out iteratively to steadily convert weak models to strong ones. Through both subjective and objective evaluations, we show that SpeechAlign can bridge the distribution gap and facilitating continuous self-improvement of the speech language model. Moreover, SpeechAlign exhibits robust generalization capabilities and works for smaller models. Demos are available at https://0nutation.github.io/SpeechAlign.github.io/.", "keywords": "Speech generation;RLHF;self-improvement", "primary_area": "speech_and_audio", "supplementary_material": "", "author": "Dong Zhang;Zhaowei Li;Shimin Li;Xin Zhang;Pengyu Wang;Yaqian Zhou;Xipeng Qiu", "authorids": "~Dong_Zhang9;~Zhaowei_Li4;~Shimin_Li1;~Xin_Zhang36;~Pengyu_Wang2;~Yaqian_Zhou1;~Xipeng_Qiu1", "gender": "M;M;M;M;F;M;M", "homepage": ";;https://github.com/ZhangXInFD;;;https://xpqiu.github.io/;https://github.com/lzw-lzw", "dblp": ";;;14/3832-6;34/389-1.html;69/1395;10/963", "google_scholar": "ScVbeu0AAAAJ;0xxkGjMAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.co.jp/citations?user=NGniJS0AAAAJ;;Pq4Yp_kAAAAJ;4CygZ0oAAAAJ", "orcid": ";;;;;0000-0001-7163-5247;", "linkedin": ";;;;;;", "or_profile": "~Dong_Zhang9;~Shimin_Li1;~Xin_Zhang36;~Pengyu_Wang2;~Yaqian_Zhou1;~Xipeng_Qiu1;~zhaowei_Li1", "aff": "Fudan University;Fudan University;Fudan University;Fudan University;Fudan University;Fudan University;Fudan University", "aff_domain": "fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn", "position": "MS student;PhD student;MS student;PhD student;Associate Professor;Full Professor;MS student", "bibtex": "@inproceedings{\nzhang2024speechalign,\ntitle={SpeechAlign: Aligning Speech Generation to Human Preferences},\nauthor={Dong Zhang and Zhaowei Li and Shimin Li and Xin Zhang and Pengyu Wang and Yaqian Zhou and Xipeng Qiu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=SKCbZR8Pyd}\n}", "github": "", "reviewers": "VNUv;ZtfR;BxMB;cBrC", "pdf_size": 1233207, "rating": "5;7;8;8", "confidence": "4;4;4;3", "soundness": "3;2;4;4", "novelty": "3;3;4;4", "presentation": "3;3;4;3", "wc_summary": "55;57;139;101", "wc_strengths": "74;77;26;52", "wc_weaknesses": "19;47;69;64", "wc_questions": "315;90;33;10", "wc_limitations": "7;53;9;10", "wc_review": "470;324;276;237", "wc_reply_reviewers": "81;40;0;13", "wc_reply_authors": "125;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "2;1;1;1", "rating_avg": [ 7.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 88.0, 34.713109915419565 ], "wc_strengths_avg": [ 57.25, 20.461854754640402 ], "wc_weaknesses_avg": [ 49.75, 19.536824204563032 ], "wc_questions_avg": [ 112.0, 120.76630324722207 ], "wc_limitations_avg": [ 19.75, 19.22725929507375 ], "wc_review_avg": [ 326.75, 88.25920631866117 ], "wc_reply_reviewers_avg": [ 33.5, 30.987900864692335 ], "wc_reply_authors_avg": [ 31.25, 54.12658773652741 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.4714045207910316, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16097866084001723851&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "Fudan University", "aff_unique_dep": "", "aff_unique_url": "https://www.fudan.edu.cn", "aff_unique_abbr": "Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "The Intelligible and Effective Graph Neural Additive Network", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95111", "id": "SKY1ScUTwA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=SKY1ScUTwA", "openreview": "https://openreview.net/forum?id=SKY1ScUTwA", "poster": "", "project": "", "author_site": "Maya Bechler-Speicher, Amir Globerson, Ran Gilad-Bachrach", "tldr": "", "abstract": "Graph Neural Networks (GNNs) have emerged as the predominant approach for learning over graph-structured data. However, most GNNs operate as black-box models and require post-hoc explanations, which may not suffice in high-stakes scenarios where transparency is crucial.\nIn this paper, we present a GNN that is interpretable by design. Our model, Graph Neural Additive Network (GNAN), is a novel extension of the interpretable class of Generalized Additive Models, and can be visualized and fully understood by humans. GNAN is designed to be fully interpretable, offering both global and local explanations at the feature and graph levels through direct visualization of the model. These visualizations describe exactly how the model uses the relationships between the target variable, the features, and the graph. We demonstrate the intelligibility of GNANs in a series of examples on different tasks and datasets. In addition, we show that the accuracy of GNAN is on par with black-box GNNs, making it suitable for critical applications where transparency is essential, alongside high accuracy.", "keywords": "Explainable AI;Graph Neural Networks;Graph Learning;Interpretability.", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Maya Bechler-Speicher;Amir Globerson;Ran Gilad-Bachrach", "authorids": "~Maya_Bechler-Speicher1;~Amir_Globerson1;~Ran_Gilad-Bachrach2", "gender": "Not Specified;M;", "homepage": ";http://www.cs.tau.ac.il/~gamir/;http://mlwell.org", "dblp": ";08/4162.html;g/RGiladBachrach", "google_scholar": "https://scholar.google.co.il/citations?user=5Fj_AUoAAAAJ;https://scholar.google.com.tw/citations?user=5JserkUAAAAJ;nnLiId8AAAAJ", "orcid": ";;0000-0002-4001-8307", "linkedin": "maya-bechler-speicher-815103103/;;ranigb/", "or_profile": "~Maya_Bechler-Speicher1;~Amir_Globerson1;~Ran_Gilad-Bachrach2", "aff": "Tel Aviv University;Tel Aviv University;Microsoft", "aff_domain": "tau.ac.il;tau.ac.il;microsoft.com", "position": "PhD student;Associate Professor;Principal Researcher", "bibtex": "@inproceedings{\nbechler-speicher2024the,\ntitle={The Intelligible and Effective Graph Neural Additive Network},\nauthor={Maya Bechler-Speicher and Amir Globerson and Ran Gilad-Bachrach},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=SKY1ScUTwA}\n}", "github": "", "reviewers": "5GdN;uKSz;vkt8;3kSH", "pdf_size": 1232516, "rating": "4;5;5;7", "confidence": "3;3;3;5", "soundness": "2;3;3;4", "novelty": "1;2;3;4", "presentation": "2;3;2;4", "wc_summary": "92;33;68;52", "wc_strengths": "51;14;32;142", "wc_weaknesses": "149;324;66;36", "wc_questions": "61;94;4;57", "wc_limitations": "1;1;1;1", "wc_review": "354;466;171;288", "wc_reply_reviewers": "143;294;0;9", "wc_reply_authors": "253;510;46;40", "reply_reviewers": "1;1;0;1", "reply_authors": "3;3;2;2", "rating_avg": [ 5.25, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 1.118033988749895 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 61.25, 21.649191670822262 ], "wc_strengths_avg": [ 59.75, 49.25634476897367 ], "wc_weaknesses_avg": [ 143.75, 111.99637270911947 ], "wc_questions_avg": [ 54.0, 32.24127789030702 ], "wc_limitations_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 319.75, 106.88399084989295 ], "wc_reply_reviewers_avg": [ 111.5, 119.62127737154456 ], "wc_reply_authors_avg": [ 212.25, 192.10983186708586 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.9271726499455306, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4535170199988286933&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "tau.ac.il;tau.ac.il;microsoft.com", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Tel Aviv University;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.tau.ac.il;https://www.microsoft.com", "aff_unique_abbr": "TAU;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Israel;United States" }, { "title": "Diffusing Differentiable Representations", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95110", "id": "SKhR5CuiqQ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=SKhR5CuiqQ", "openreview": "https://openreview.net/forum?id=SKhR5CuiqQ", "poster": "", "project": "", "author_site": "Yash Savani, Marc Finzi, J. Zico Kolter", "tldr": "", "abstract": "We introduce a novel, training-free method for sampling *differentiable representations* (diffreps) using pretrained diffusion models. Rather than merely mode-seeking, our method achieves sampling by \"pulling back\" the dynamics of the reverse-time process\u2014from the image space to the diffrep parameter space\u2014and updating the parameters according to this pulled-back process. We identify an implicit constraint on the samples induced by the diffrep and demonstrate that addressing this constraint significantly improves the consistency and detail of the generated objects. Our method yields diffreps with substantially **improved quality and diversity** for images, panoramas, and 3D NeRFs compared to existing techniques. Our approach is a general-purpose method for sampling diffreps, expanding the scope of problems that diffusion models can tackle.", "keywords": "Diffusion models;Differential Geometry;Implicit Neural Representations;NeRF;Siren", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Yash Savani;Marc Anton Finzi;J Zico Kolter", "authorids": "~Yash_Savani1;~Marc_Anton_Finzi1;~J_Zico_Kolter1", "gender": "M;M;M", "homepage": "https://yashsavani.com;https://mfinzi.github.io;http://www.zicokolter.com", "dblp": "251/8428;222/3062;67/2526", "google_scholar": "086M0OIAAAAJ;ysMAhlwAAAAJ;UXh1I6UAAAAJ", "orcid": " 0000-0001-9642-3988;;", "linkedin": "yashsavani/;;", "or_profile": "~Yash_Savani1;~Marc_Anton_Finzi1;~Zico_Kolter1", "aff": "Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "andrew.cmu.edu;cmu.edu;cmu.edu", "position": "PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\nsavani2024diffusing,\ntitle={Diffusing Differentiable Representations},\nauthor={Yash Savani and Marc Anton Finzi and J Zico Kolter},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=SKhR5CuiqQ}\n}", "github": "", "reviewers": "wR2k;BA4f;H3if;za3b", "pdf_size": 15605488, "rating": "4;5;5;6", "confidence": "2;1;3;3", "soundness": "2;2;2;4", "novelty": "2;2;2;4", "presentation": "2;3;2;3", "wc_summary": "51;16;83;125", "wc_strengths": "37;12;32;99", "wc_weaknesses": "38;14;211;145", "wc_questions": "97;8;58;197", "wc_limitations": "1;6;20;34", "wc_review": "224;56;404;600", "wc_reply_reviewers": "0;0;84;100", "wc_reply_authors": "0;0;0;352", "reply_reviewers": "0;0;1;2", "reply_authors": "1;1;1;2", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 2.25, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 68.75, 40.20183453525473 ], "wc_strengths_avg": [ 45.0, 32.54996159751959 ], "wc_weaknesses_avg": [ 102.0, 79.95311125903731 ], "wc_questions_avg": [ 90.0, 69.364976753402 ], "wc_limitations_avg": [ 15.25, 12.871965661856 ], "wc_review_avg": [ 321.0, 202.7091512487781 ], "wc_reply_reviewers_avg": [ 46.0, 46.34652090502587 ], "wc_reply_authors_avg": [ 88.0, 152.4204710660612 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.4264014327112209, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:nVvJVV60NioJ:scholar.google.com/&scioq=Diffusing+Differentiable+Representations&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "andrew.cmu.edu;cmu.edu;cmu.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "SLXHVsbU8r", "title": "End-to-End Autonomous Driving without Costly Modularization and 3D Manual Annotation", "track": "main", "status": "Reject", "tldr": "", "abstract": "We propose UAD, a method for vision-based end-to-end autonomous driving (E2EAD), achieving the best open-loop evaluation performance in nuScenes, meanwhile showing robust closed-loop driving quality in CARLA. Our motivation stems from the observation that current E2EAD models still mimic the modular architecture in typical driving stacks, with carefully designed supervised perception and prediction subtasks to provide environment information for oriented planning. Although achieving groundbreaking progress, such design has certain drawbacks: 1) preceding subtasks require massive high-quality 3D annotations as supervision, posing a significant impediment to scaling the training data; 2) each submodule entails substantial computation overhead in both training and inference. To this end, we propose UAD, an E2EAD framework with an unsupervised proxy to address all these issues. Firstly, we design a novel Angular Perception Pretext to eliminate the annotation requirement. The pretext models the driving scene by predicting the angular-wise spatial objectness and temporal dynamics, without manual annotation. Secondly, a self-supervised training strategy, which learns the consistency of the predicted trajectories under different augment views, is proposed to enhance the planning robustness in steering scenarios. Our UAD achieves 38.7% relative improvements over UniAD on the average collision rate in nuScenes and surpasses VAD for 6.40 points on the driving score in CARLA's Town05 Long benchmark. Moreover, the proposed method only consumes 44.3% training resources of UniAD and runs 3.4$\\times$ faster in inference. Our innovative design not only for the first time demonstrates unarguable performance advantages over supervised counterparts, but also enjoys unprecedented efficiency in data, training, and inference.", "keywords": "End-to-End Autonomous Driving;Unsupervised Pretext Task;Direction-Aware Planning", "primary_area": "machine_vision", "supplementary_material": "/attachment/d828b2866c430ea7c226df28408556ab488c7faf.zip", "author": "Mingzhe Guo;Zhipeng Zhang;Yuan He;Ke Wang;Liping Jing", "authorids": "~Mingzhe_Guo1;~Zhipeng_Zhang2;~Yuan_He4;~Ke_Wang3;~Liping_Jing3", "gender": "M;;F;M;", "homepage": ";;;;", "dblp": "274/6518;;;181/2613-21;", "google_scholar": "CbSSnj4AAAAJ;;;;", "orcid": "0000-0001-6399-9753;;;;", "linkedin": ";;yuan-he-3b068a139/;;", "or_profile": "~Mingzhe_Guo1;~Zhipeng_Zhang2;~Yuan_He4;~Ke_Wang3;~Liping_Jing3", "aff": "Beijing Jiaotong University;;Didi Research;Didi Research;", "aff_domain": "bjtu.edu.cn;;didichuxing.com;didichuxing.com;", "position": "PhD student;;Researcher;DiDi-Labs;", "bibtex": "@misc{\nanonymous2024endtoend,\ntitle={End-to-End Autonomous Driving without Costly Modularization and 3D Manual Annotation},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=SLXHVsbU8r}\n}", "github": "", "project": "", "reviewers": "XcsQ;UZRG;d489;3W73;YXrs", "site": "https://openreview.net/forum?id=SLXHVsbU8r", "pdf_size": 16098631, "rating": "3;4;6;8;9", "confidence": "5;5;5;5;5", "soundness": "3;2;3;4;4", "novelty": "2;2;3;3;4", "presentation": "4;3;3;3;4", "wc_summary": "87;64;105;129;127", "wc_strengths": "47;69;70;98;256", "wc_weaknesses": "269;181;272;126;80", "wc_questions": "32;167;9;55;37", "wc_limitations": "28;5;74;22;14", "wc_review": "463;486;530;430;514", "wc_reply_reviewers": "0;268;64;18;72", "wc_reply_authors": "59;1159;233;34;0", "reply_reviewers": "0;3;1;1;1", "reply_authors": "2;4;2;2;1", "rating_avg": [ 6.0, 2.280350850198276 ], "confidence_avg": [ 5.0, 0.0 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 102.4, 24.621941434419828 ], "wc_strengths_avg": [ 108.0, 75.7495874576225 ], "wc_weaknesses_avg": [ 185.6, 76.34814994484148 ], "wc_questions_avg": [ 60.0, 55.47612098912468 ], "wc_limitations_avg": [ 28.6, 23.97999165971498 ], "wc_review_avg": [ 484.6, 35.7077022503549 ], "wc_reply_reviewers_avg": [ 84.4, 95.71959047133456 ], "wc_reply_authors_avg": [ 297.0, 438.4431548102901 ], "reply_reviewers_avg": [ 1.2, 0.9797958971132712 ], "reply_authors_avg": [ 2.2, 0.9797958971132712 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8829908189057800054&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Beijing Jiao Tong University;Didi Research", "aff_unique_dep": ";", "aff_unique_url": "http://www.njtu.edu.cn/en;https://www.didi.com", "aff_unique_abbr": "BJTU;Didi", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Provably Robust Score-Based Diffusion Posterior Sampling for Plug-and-Play Image Reconstruction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95109", "id": "SLnsoaY4u1", "proceeding": "", "pdf": "https://openreview.net/pdf?id=SLnsoaY4u1", "openreview": "https://openreview.net/forum?id=SLnsoaY4u1", "poster": "", "project": "", "author_site": "Xingyu Xu, Yuejie Chi", "tldr": "", "abstract": "In a great number of tasks in science and engineering, the goal is to infer an unknown image from a small number of noisy measurements collected from a known forward model describing certain sensing or imaging modality. Due to resource constraints, this image reconstruction task is often extremely ill-posed, which necessitates the adoption of expressive prior information to regularize the solution space. Score-based diffusion models, thanks to its impressive empirical success, have emerged as an appealing candidate of an expressive prior in image reconstruction. In order to accommodate diverse tasks at once, it is of great interest to develop efficient, consistent and robust algorithms that incorporate unconditional score functions of an image prior distribution in conjunction with flexible choices of forward models.\n\nThis work develops an algorithmic framework for employing score-based diffusion models as an expressive data prior in nonlinear inverse problems with general forward models. Motivated by the plug-and-play framework in the imaging community, we introduce a diffusion plug-and-play method (DPnP) that alternatively calls two samplers, a proximal consistency sampler based solely on the likelihood function of the forward model, and a denoising diffusion sampler based solely on the score functions of the image prior. The key insight is that denoising under white Gaussian noise can be solved rigorously via both stochastic (i.e., DDPM-type) and deterministic (i.e., DDIM-type) samplers using the same set of score functions trained for generation. We establish both asymptotic and non-asymptotic performance guarantees of DPnP, and provide numerical experiments to illustrate its promise in solving both linear and nonlinear image reconstruction tasks. To the best of our knowledge, DPnP is the first provably-robust posterior sampling method for nonlinear inverse problems using unconditional diffusion priors.", "keywords": "score-based generative models;nonlinear inverse problems;posterior sampling;plug-and-play", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Xingyu Xu;Yuejie Chi", "authorids": "~Xingyu_Xu1;~Yuejie_Chi1", "gender": ";", "homepage": ";", "dblp": ";", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": ";", "aff": ";", "aff_domain": ";", "position": ";", "bibtex": "@inproceedings{\nxu2024provably,\ntitle={Provably Robust Score-Based Diffusion Posterior Sampling for Plug-and-Play Image Reconstruction},\nauthor={Xingyu Xu and Yuejie Chi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=SLnsoaY4u1}\n}", "github": "", "reviewers": "43oG;ppwi;tiqf;juip", "pdf_size": 8506222, "rating": "4;5;6;7", "confidence": "4;4;3;3", "soundness": "3;3;3;4", "novelty": "2;2;3;3", "presentation": "2;3;3;3", "wc_summary": "90;52;59;48", "wc_strengths": "38;33;71;78", "wc_weaknesses": "351;137;112;299", "wc_questions": "6;62;89;2", "wc_limitations": "7;27;8;9", "wc_review": "492;311;339;436", "wc_reply_reviewers": "296;78;0;22", "wc_reply_authors": "622;248;0;0", "reply_reviewers": "1;2;0;1", "reply_authors": "3;2;1;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 62.25, 16.498105951896417 ], "wc_strengths_avg": [ 55.0, 19.73575435599055 ], "wc_weaknesses_avg": [ 224.75, 102.30438651397114 ], "wc_questions_avg": [ 39.75, 37.029549011566424 ], "wc_limitations_avg": [ 12.75, 8.257572282456872 ], "wc_review_avg": [ 394.5, 72.94004387166216 ], "wc_reply_reviewers_avg": [ 99.0, 117.23907198540937 ], "wc_reply_authors_avg": [ 217.5, 254.54027186282332 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.8944271909999159, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2295589576672875129&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": ";", "author_num": 2 }, { "title": "ID-to-3D: Expressive ID-guided 3D Heads via Score Distillation Sampling", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95108", "id": "SLuZpdMDFg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=SLuZpdMDFg", "openreview": "https://openreview.net/forum?id=SLuZpdMDFg", "poster": "", "project": "", "author_site": "Francesca Babiloni, Alexandros Lattas, Jiankang Deng, Stefanos Zafeiriou", "tldr": "", "abstract": "We propose ID-to-3D, a method to generate identity- and text-guided 3D human heads with disentangled expressions, starting from even a single casually captured \u2018in-the-wild\u2019 image of a subject. The foundation of our approach is anchored in compositionality, alongside the use of task-specific 2D diffusion models as priors for optimization. First, we extend a foundational model with a lightweight expression-aware and ID-aware architecture, and create 2D priors for geometric and texture generation, via fine-tuning only 0.2% of its available training parameters. Then, we jointly leverage a neural parametric representation for the expression of each subject and a multi-stage generation of highly detailed geometry and albedo texture. This combination of strong face identity embeddings and our neural representation enables accurate reconstruction of not only facial features but also accessories and hair, and can be meshed to provide render-ready assets for gaming and telepresence. Our results achieve an unprecedented level of id-consistent and high-quality texture and geometry generation, generalizing to a \u2018world\u2019 of unseen 3D identities, without relying on large 3D captured datasets of human assets.", "keywords": "Score Distillation Sampling;3D Heads;ID-conditioning;ID-to-3D", "primary_area": "machine_vision", "supplementary_material": "/attachment/1e9aaf13292a4e51227760e7753c4df742f03e0c.zip", "author": "Francesca Babiloni;Alexandros Lattas;Jiankang Deng;Stefanos Zafeiriou", "authorids": "~Francesca_Babiloni3;~Alexandros_Lattas1;~Jiankang_Deng1;~Stefanos_Zafeiriou1", "gender": ";M;M;M", "homepage": ";;https://jiankangdeng.github.io/;http://www.imperial.ac.uk/people/s.zafeiriou/", "dblp": ";221/0633;156/7808;25/1885.html", "google_scholar": ";0wJRUlsAAAAJ;Z_UoQFsAAAAJ;QKOH5iYAAAAJ", "orcid": ";0000-0002-9964-6105;0000-0002-3709-6216;", "linkedin": ";;jiankang-deng-b45b21b4/?originalSubdomain=uk;", "or_profile": "~Francesca_Babiloni3;~Alexandros_Lattas1;~Jiankang_Deng1;~Stefanos_Zafeiriou1", "aff": ";Google;Imperial College London;Imperial College London", "aff_domain": ";google.com;imperial.ac.uk;ic.ac.uk", "position": ";Researcher;Lecturer;Full Professor", "bibtex": "@inproceedings{\nbabiloni2024idtod,\ntitle={{ID}-to-3D: Expressive {ID}-guided 3D Heads via Score Distillation Sampling},\nauthor={Francesca Babiloni and Alexandros Lattas and Jiankang Deng and Stefanos Zafeiriou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=SLuZpdMDFg}\n}", "github": "", "reviewers": "vm8B;HnyJ;G3ux;fD9E", "pdf_size": 39200331, "rating": "5;5;6;7", "confidence": "3;4;4;5", "soundness": "3;3;3;4", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "112;87;79;75", "wc_strengths": "54;40;48;204", "wc_weaknesses": "165;317;151;175", "wc_questions": "121;6;36;4", "wc_limitations": "1;1;18;6", "wc_review": "453;451;332;464", "wc_reply_reviewers": "8;17;12;33", "wc_reply_authors": "14;0;0;33", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 88.25, 14.376630342329875 ], "wc_strengths_avg": [ 86.5, 68.0202175827158 ], "wc_weaknesses_avg": [ 202.0, 66.94027188471824 ], "wc_questions_avg": [ 41.75, 47.478284509868296 ], "wc_limitations_avg": [ 6.5, 6.946221994724902 ], "wc_review_avg": [ 425.0, 53.92123885817165 ], "wc_reply_reviewers_avg": [ 17.5, 9.5 ], "wc_reply_authors_avg": [ 11.75, 13.534677683639163 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8528028654224417, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=33205346413342462&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": ";google.com;imperial.ac.uk;ic.ac.uk", "author_num": 4, "aff_unique_index": "0;1;1", "aff_unique_norm": "Google;Imperial College London", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://www.imperial.ac.uk", "aff_unique_abbr": "Google;ICL", "aff_campus_unique_index": "0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United States;United Kingdom" }, { "title": "Achieving Tractable Minimax Optimal Regret in Average Reward MDPs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95107", "id": "SM9IWrHz4e", "proceeding": "", "pdf": "https://openreview.net/pdf?id=SM9IWrHz4e", "openreview": "https://openreview.net/forum?id=SM9IWrHz4e", "poster": "/media/PosterPDFs/NeurIPS%202024/95107.png?t=1731743604.2857246", "project": "", "author_site": "Victor Boone, Zihan Zhang", "tldr": "", "abstract": "In recent years, significant attention has been directed towards learning average-reward Markov Decision Processes (MDPs).\nHowever, existing algorithms either suffer from sub-optimal regret guarantees or computational inefficiencies.\nIn this paper, we present the first *tractable* algorithm with minimax optimal regret of $\\mathrm{O}\\left(\\sqrt{\\mathrm{sp}(h^*) S A T \\log(SAT)}\\right)$ where $\\mathrm{sp}(h^*)$ is the span of the optimal bias function $h^*$, $S\\times A$ is the size of the state-action space and $T$ the number of learning steps. \nRemarkably, our algorithm does not require prior information on $\\mathrm{sp}(h^*)$. \n\nOur algorithm relies on a novel subroutine, **P**rojected **M**itigated **E**xtended **V**alue **I**teration (`PMEVI`), to compute bias-constrained optimal policies efficiently. \nThis subroutine can be applied to various previous algorithms to obtain improved regret bounds.", "keywords": "Markov decision processes;Regret;Average reward;Minimax;Optimism;Model-based", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/d149f3a838268a753c5eee0582fc290d7fe0f485.zip", "author": "Victor Boone;Zihan Zhang", "authorids": "~Victor_Boone1;~Zihan_Zhang1", "gender": "M;M", "homepage": "https://victor-boone.github.io/;https://zsubfuncz.github.io/zihanzhang.github.io/", "dblp": "249/9447.html;", "google_scholar": ";un0eGzEAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Victor_Boone1;~Zihan_Zhang1", "aff": "Universit\u00e9 Grenoble Alpes;Princeton University", "aff_domain": "univ-grenoble-alpes.fr;princeton.edu", "position": "PhD student;Postdoc", "bibtex": "@inproceedings{\nboone2024achieving,\ntitle={Achieving Tractable Minimax Optimal Regret in Average Reward {MDP}s},\nauthor={Victor Boone and Zihan Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=SM9IWrHz4e}\n}", "github": "", "reviewers": "9hWK;f6aP;zKbr;EC5m", "pdf_size": 492601, "rating": "4;7;7;7", "confidence": "3;3;3;3", "soundness": "2;4;3;3", "novelty": "2;4;4;4", "presentation": "3;3;2;2", "wc_summary": "32;54;36;128", "wc_strengths": "17;45;53;10", "wc_weaknesses": "2;52;158;103", "wc_questions": "293;174;216;120", "wc_limitations": "15;15;3;1", "wc_review": "359;340;466;362", "wc_reply_reviewers": "0;18;25;40", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.5, 0.8660254037844386 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 62.5, 38.713692668098716 ], "wc_strengths_avg": [ 31.25, 18.14352501582865 ], "wc_weaknesses_avg": [ 78.75, 58.04039541560688 ], "wc_questions_avg": [ 200.75, 63.20354024894492 ], "wc_limitations_avg": [ 8.5, 6.5383484153110105 ], "wc_review_avg": [ 381.75, 49.36787923336387 ], "wc_reply_reviewers_avg": [ 20.75, 14.376630342329875 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13335598478681254121&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "univ-grenoble-alpes.fr;princeton.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Universit\u00e9 Grenoble Alpes;Princeton University", "aff_unique_dep": ";", "aff_unique_url": "https://www.univ-grenoble-alpes.fr;https://www.princeton.edu", "aff_unique_abbr": "UGA;Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "France;United States" }, { "title": "Mixtures of Experts for Audio-Visual Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95106", "id": "SNmuKbU0am", "proceeding": "", "pdf": "https://openreview.net/pdf?id=SNmuKbU0am", "openreview": "https://openreview.net/forum?id=SNmuKbU0am", "poster": "/media/PosterPDFs/NeurIPS%202024/95106.png?t=1731604187.1072936", "project": "", "author_site": "Ying Cheng, Yang Li, Junjie He, Rui Feng", "tldr": "", "abstract": "With the rapid development of multimedia technology, audio-visual learning has emerged as a promising research topic within the field of multimodal analysis. In this paper, we explore parameter-efficient transfer learning for audio-visual learning and propose the Audio-Visual Mixture of Experts (\\ourmethodname) to inject adapters into pre-trained models flexibly. Specifically, we introduce unimodal and cross-modal adapters as multiple experts to specialize in intra-modal and inter-modal information, respectively, and employ a lightweight router to dynamically allocate the weights of each expert according to the specific demands of each task. Extensive experiments demonstrate that our proposed approach \\ourmethodname achieves superior performance across multiple audio-visual tasks, \nincluding AVE, AVVP, AVS, and AVQA. Furthermore, visual-only experimental results also indicate that our approach can tackle challenging scenes where modality information is missing.\nThe source code is available at \\url{https://github.com/yingchengy/AVMOE}.", "keywords": "audio-visual learning;mixture of experts;parameter-efficient transfer learning", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/bf3b8e89177de355d8e253fd56b0213b36fd1e2b.zip", "author": "Ying Cheng;Yang Li;Junjie He;Rui Feng", "authorids": "~Ying_Cheng2;~Yang_Li97;~Junjie_He3;~Rui_Feng2", "gender": "F;M;M;", "homepage": ";https://github.com/LePereLee/Lee;https://github.com/hojkol;", "dblp": "54/4536-5;;;", "google_scholar": "wsmBf7oAAAAJ;;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Ying_Cheng2;~Yang_Li97;~Junjie_He3;~Rui_Feng2", "aff": "Fudan University;Fudan University;Fudan University;", "aff_domain": "fudan.edu;fudan.edu.cn;fudan.edu.cn;", "position": "Postdoc;MS student;MS student;", "bibtex": "@inproceedings{\ncheng2024mixtures,\ntitle={Mixtures of Experts for Audio-Visual Learning},\nauthor={Ying Cheng and Yang Li and Junjie He and Rui Feng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=SNmuKbU0am}\n}", "github": "", "reviewers": "k9Vn;YVYv;usTX;jXV1;pdXh", "pdf_size": 8213927, "rating": "5;6;7;7;7", "confidence": "5;5;4;4;4", "soundness": "2;3;3;3;2", "novelty": "1;3;3;3;3", "presentation": "3;2;3;3;2", "wc_summary": "133;71;16;56;64", "wc_strengths": "26;18;13;31;50", "wc_weaknesses": "82;170;104;82;350", "wc_questions": "4;17;7;46;40", "wc_limitations": "15;14;13;23;78", "wc_review": "260;290;153;238;582", "wc_reply_reviewers": "0;0;0;14;288", "wc_reply_authors": "0;42;0;14;205", "reply_reviewers": "0;0;0;1;1", "reply_authors": "1;2;1;2;2", "rating_avg": [ 6.4, 0.7999999999999999 ], "confidence_avg": [ 4.4, 0.48989794855663565 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.8000000000000002 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 68.0, 37.67757954009254 ], "wc_strengths_avg": [ 27.6, 12.815615474880635 ], "wc_weaknesses_avg": [ 157.6, 101.46447654228547 ], "wc_questions_avg": [ 22.8, 17.151093259614676 ], "wc_limitations_avg": [ 28.6, 24.952755358877702 ], "wc_review_avg": [ 304.6, 145.99397247831843 ], "wc_reply_reviewers_avg": [ 60.4, 113.92910076007797 ], "wc_reply_authors_avg": [ 52.2, 77.9240656023542 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.9185586535436916, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:OiogBLf7pPwJ:scholar.google.com/&scioq=Mixtures+of+Experts+for+Audio-Visual+Learning&hl=en&as_sdt=0,33", "gs_version_total": 2, "email": "fudan.edu;fudan.edu.cn;fudan.edu.cn;", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Fudan University", "aff_unique_dep": "", "aff_unique_url": "https://www.fudan.edu.cn", "aff_unique_abbr": "Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "4Real: Towards Photorealistic 4D Scene Generation via Video Diffusion Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95105", "id": "SO1aRpwVLk", "proceeding": "", "pdf": "https://openreview.net/pdf?id=SO1aRpwVLk", "openreview": "https://openreview.net/forum?id=SO1aRpwVLk", "poster": "", "project": "", "author_site": "Heng Yu, Chaoyang Wang, Peiye Zhuang, Willi Menapace, Aliaksandr Siarohin, Junli Cao, L\u00e1szl\u00f3 Jeni, Sergey Tulyakov, Hsin-Ying Lee", "tldr": "", "abstract": "Existing dynamic scene generation methods mostly rely on distilling knowledge from pre-trained 3D generative models, which are typically fine-tuned on synthetic object datasets.\nAs a result, the generated scenes are often object-centric and lack photorealism. \nTo address these limitations, we introduce a novel pipeline designed for photorealistic text-to-4D scene generation, discarding the dependency on multi-view generative models and instead fully utilizing video generative models trained on diverse real-world datasets. \nOur method begins by generating a reference video using the video generation model.\nWe then learn the canonical 3D representation of the video using a freeze-time video, delicately generated from the reference video.\nTo handle inconsistencies in the freeze-time video, we jointly learn a per-frame deformation to model these imperfections.\nWe then learn the temporal deformation based on the canonical representation to capture dynamic interactions in the reference video. \nThe pipeline facilitates the generation of dynamic scenes with enhanced photorealism and structural integrity, viewable from multiple perspectives, thereby setting a new standard in 4D scene generation.", "keywords": "4D generation; novel view synthesis; gaussian splatting; text-4D; 4D reconstruction", "primary_area": "generative_models", "supplementary_material": "/attachment/3d8a0ac15f6ea3b90b04625e795009e3d4552ecf.zip", "author": "Heng Yu;Chaoyang Wang;Peiye Zhuang;Willi Menapace;Aliaksandr Siarohin;Junli Cao;Laszlo Attila Jeni;Sergey Tulyakov;Hsin-Ying Lee", "authorids": "~Heng_Yu3;~Chaoyang_Wang1;~Peiye_Zhuang2;~Willi_Menapace1;~Aliaksandr_Siarohin1;~Junli_Cao2;~Laszlo_Attila_Jeni1;~Sergey_Tulyakov1;~Hsin-Ying_Lee2", "gender": "M;M;F;M;M;M;M;M;M", "homepage": "https://heng14.github.io/;https://mightychaos.github.io/;https://payeah.net;;;;http://www.laszlojeni.com/;http://www.stulyakov.com/;http://hsinyinglee.com/", "dblp": ";;244/7937;271/8571;199/1971;234/8466;35/7547;40/6115;149/7976-1.html", "google_scholar": ";I-xDKHEAAAAJ;gsPILWoAAAAJ;31ha1LgAAAAJ;https://scholar.google.it/citations?user=uMl5-k4AAAAJ;;Wdnc-mEAAAAJ;mgzXR0sAAAAJ;", "orcid": ";;;;;;0000-0002-2830-700X;;", "linkedin": ";;;willi-menapace/;;junli-cao-5165b41a1;laszlojeni/;sergeytulyakov/;", "or_profile": "~Heng_Yu3;~Chaoyang_Wang1;~Peiye_Zhuang2;~Willi_Menapace1;~Aliaksandr_Siarohin1;~Junli_Cao2;~Laszlo_Attila_Jeni1;~Sergey_Tulyakov1;~Hsin-Ying_Lee2", "aff": "Stanford University;Snap Inc.;Snap Inc.;University of Trento;Snap Inc.;Snap Inc.;Carnegie Mellon University;Snap Inc.;Snap Inc.", "aff_domain": "stanford.edu;snapchat.com;snapchat.com;unitn.it;snapchat.com;snapchat.com;cmu.edu;snapchat.com;snap.com", "position": "PhD student;Researcher;Researcher;PhD student;Intern;Researcher;Assistant Professor;Director of Research;Researcher", "bibtex": "@inproceedings{\nyu2024real,\ntitle={4Real: Towards Photorealistic 4D Scene Generation via Video Diffusion Models},\nauthor={Heng Yu and Chaoyang Wang and Peiye Zhuang and Willi Menapace and Aliaksandr Siarohin and Junli Cao and Laszlo Attila Jeni and Sergey Tulyakov and Hsin-Ying Lee},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=SO1aRpwVLk}\n}", "github": "", "reviewers": "7sxF;vJ8m;oa6s;yWZo;3AzP", "pdf_size": 5392711, "rating": "5;6;6;6;8", "confidence": "4;4;5;4;4", "soundness": "2;3;3;2;4", "novelty": "2;3;3;2;4", "presentation": "2;2;3;3;4", "wc_summary": "107;76;98;78;124", "wc_strengths": "26;154;38;113;52", "wc_weaknesses": "163;263;143;171;235", "wc_questions": "13;6;72;142;42", "wc_limitations": "10;1;34;8;1", "wc_review": "319;500;385;512;454", "wc_reply_reviewers": "62;102;0;45;74", "wc_reply_authors": "0;0;0;0;22", "reply_reviewers": "1;1;0;1;1", "reply_authors": "1;1;1;1;2", "rating_avg": [ 6.2, 0.9797958971132712 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 96.6, 18.06211504780102 ], "wc_strengths_avg": [ 76.6, 48.93097178679369 ], "wc_weaknesses_avg": [ 195.0, 45.88681727903996 ], "wc_questions_avg": [ 55.0, 49.380157958435085 ], "wc_limitations_avg": [ 10.8, 12.155657119218198 ], "wc_review_avg": [ 434.0, 72.75438131136846 ], "wc_reply_reviewers_avg": [ 56.6, 33.85616635119812 ], "wc_reply_authors_avg": [ 4.4, 8.8 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.10206207261596577, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9135060141883667949&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "stanford.edu;snapchat.com;snapchat.com;unitn.it;snapchat.com;snapchat.com;cmu.edu;snapchat.com;snap.com", "author_num": 9, "aff_unique_index": "0;1;1;2;1;1;3;1;1", "aff_unique_norm": "Stanford University;Snap Inc.;University of Trento;Carnegie Mellon University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.stanford.edu;https://www.snapinc.com;https://www.unitn.it;https://www.cmu.edu", "aff_unique_abbr": "Stanford;Snap;UniTN;CMU", "aff_campus_unique_index": "0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0;1;0;0;0;0;0", "aff_country_unique": "United States;Italy" }, { "title": "Ensemble sampling for linear bandits: small ensembles suffice", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95104", "id": "SO7fnIFq0o", "proceeding": "", "pdf": "https://openreview.net/pdf?id=SO7fnIFq0o", "openreview": "https://openreview.net/forum?id=SO7fnIFq0o", "poster": "", "project": "", "author_site": "David Janz, Alexander Litvak, Csaba Szepesvari", "tldr": "", "abstract": "We provide the first useful and rigorous analysis of ensemble sampling for the stochastic linear bandit setting. In particular, we show that, under standard assumptions, for a $d$-dimensional stochastic linear bandit with an interaction horizon $T$, ensemble sampling with an ensemble of size of order $\\smash{d \\log T}$ incurs regret at most of the order $\\smash{(d \\log T)^{5/2} \\sqrt{T}}$. Ours is the first result in any structured setting not to require the size of the ensemble to scale linearly with $T$---which defeats the purpose of ensemble sampling---while obtaining near $\\smash{\\sqrt{T}}$ order regret. Our result is also the first to allow for infinite action sets.", "keywords": "linear;bandits;ensemble sampling", "primary_area": "bandits", "supplementary_material": "", "author": "David Janz;Alexander Litvak;Csaba Szepesvari", "authorids": "~David_Janz1;~Alexander_Litvak1;~Csaba_Szepesvari1", "gender": "M;;M", "homepage": "http://www.djanz.org;http://www.math.ualberta.ca/~alexandr/;https://sites.ualberta.ca/~szepesva/", "dblp": "190/7685;17/2978;http://dblp.uni-trier.de/pers/hd/s/Szepesv=aacute=ri:Csaba", "google_scholar": "https://scholar.google.co.uk/citations?user=rI5XB7sAAAAJ;39-ij5sAAAAJ;https://scholar.google.ca/citations?user=zvC19mQAAAAJ", "orcid": ";;", "linkedin": ";;csaba-szepesvari-09376b1?trk=hp-identity-name", "or_profile": "~David_Janz1;~Alexander_Litvak1;~Csaba_Szepesvari1", "aff": "University of Alberta;University of Alberta;Google DeepMind", "aff_domain": "ualberta.ca;ualberta.ca;google.com", "position": "Postdoc;Full Professor;Research Scientist", "bibtex": "@inproceedings{\njanz2024ensemble,\ntitle={Ensemble sampling for linear bandits: small ensembles suffice},\nauthor={David Janz and Alexander Litvak and Csaba Szepesvari},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=SO7fnIFq0o}\n}", "github": "", "reviewers": "P6Xa;Pgmx;YgVw;N2aY", "pdf_size": 407610, "rating": "6;6;7;7", "confidence": "2;3;3;4", "soundness": "3;3;3;3", "novelty": "1;2;3;4", "presentation": "2;3;3;3", "wc_summary": "62;54;70;120", "wc_strengths": "36;56;94;67", "wc_weaknesses": "100;64;113;199", "wc_questions": "39;21;215;40", "wc_limitations": "1;11;24;16", "wc_review": "238;206;516;442", "wc_reply_reviewers": "57;20;578;15", "wc_reply_authors": "0;37;1105;2", "reply_reviewers": "1;1;2;1", "reply_authors": "1;2;4;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 1.118033988749895 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 76.5, 25.743931323712 ], "wc_strengths_avg": [ 63.25, 20.94486810653149 ], "wc_weaknesses_avg": [ 119.0, 49.553001927229396 ], "wc_questions_avg": [ 78.75, 79.02649872036594 ], "wc_limitations_avg": [ 13.0, 8.336666000266533 ], "wc_review_avg": [ 350.5, 131.62351613598537 ], "wc_reply_reviewers_avg": [ 167.5, 237.5568353047329 ], "wc_reply_authors_avg": [ 286.0, 473.07874608779457 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.7071067811865475, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14744994350372871808&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "ualberta.ca;ualberta.ca;google.com", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Alberta;Google", "aff_unique_dep": ";Google DeepMind", "aff_unique_url": "https://www.ualberta.ca;https://deepmind.com", "aff_unique_abbr": "UAlberta;DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Canada;United Kingdom" }, { "title": "Automatically Learning Hybrid Digital Twins of Dynamical Systems", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95103", "id": "SOsiObSdU2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=SOsiObSdU2", "openreview": "https://openreview.net/forum?id=SOsiObSdU2", "poster": "", "project": "", "author_site": "Samuel Holt, Tennison Liu, Mihaela van der Schaar", "tldr": "", "abstract": "Digital Twins (DTs) are computational models that simulate the states and temporal dynamics of real-world systems, playing a crucial role in prediction, understanding, and decision-making across diverse domains. However, existing approaches to DTs often struggle to generalize to unseen conditions in data-scarce settings, a crucial requirement for such models. To address these limitations, our work begins by establishing the essential desiderata for effective DTs. Hybrid Digital Twins (**HDTwins**) represent a promising approach to address these requirements, modeling systems using a composition of both mechanistic and neural components. This hybrid architecture simultaneously leverages (partial) domain knowledge and neural network expressiveness to enhance generalization, with its modular design facilitating improved evolvability. While existing hybrid models rely on expert-specified architectures with only parameters optimized on data, *automatically* specifying and optimizing HDTwins remains intractable due to the complex search space and the need for flexible integration of domain priors. To overcome this complexity, we propose an evolutionary algorithm (**HDTwinGen**) that employs Large Language Models (LLMs) to autonomously propose, evaluate, and optimize HDTwins. Specifically, LLMs iteratively generate novel model specifications, while offline tools are employed to optimize emitted parameters. Correspondingly, proposed models are evaluated and evolved based on targeted feedback, enabling the discovery of increasingly effective hybrid models. Our empirical results reveal that HDTwinGen produces generalizable, sample-efficient, and evolvable models, significantly advancing DTs' efficacy in real-world applications.", "keywords": "Hybrid Models;Dynamical Systems", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Samuel Holt;Tennison Liu;Mihaela van der Schaar", "authorids": "~Samuel_Holt1;~Tennison_Liu1;~Mihaela_van_der_Schaar2", "gender": ";M;F", "homepage": "https://samholt.github.io/;https://www.vanderschaar-lab.com/research-team/#Tennison;https://www.vanderschaar-lab.com", "dblp": "322/3656;256/9899;", "google_scholar": "Ey5aInIAAAAJ;LtdHRjsAAAAJ;DZ3S--MAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Samuel_Holt1;~Tennison_Liu1;~Mihaela_van_der_Schaar2", "aff": "Google DeepMind;University of Cambridge;University of California, Los Angeles", "aff_domain": "google.com;cam.ac.uk;ucla.edu", "position": "Intern;PhD student;Full Professor", "bibtex": "@inproceedings{\nholt2024automatically,\ntitle={Automatically Learning Hybrid Digital Twins of Dynamical Systems},\nauthor={Samuel Holt and Tennison Liu and Mihaela van der Schaar},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=SOsiObSdU2}\n}", "github": "", "reviewers": "8ECc;UNnB;YMit;jD19", "pdf_size": 1953222, "rating": "6;6;7;8", "confidence": "4;3;3;4", "soundness": "3;2;2;3", "novelty": "3;3;2;3", "presentation": "3;3;3;4", "wc_summary": "72;44;70;96", "wc_strengths": "61;35;124;32", "wc_weaknesses": "249;222;254;49", "wc_questions": "109;58;49;179", "wc_limitations": "15;3;154;180", "wc_review": "506;362;651;536", "wc_reply_reviewers": "34;19;21;30", "wc_reply_authors": "27;27;26;34", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 70.5, 18.405162319305962 ], "wc_strengths_avg": [ 63.0, 36.979724174201195 ], "wc_weaknesses_avg": [ 193.5, 84.31043826241209 ], "wc_questions_avg": [ 98.75, 51.6738570265468 ], "wc_limitations_avg": [ 88.0, 79.64609218285602 ], "wc_review_avg": [ 513.75, 102.9814910554319 ], "wc_reply_reviewers_avg": [ 26.0, 6.2048368229954285 ], "wc_reply_authors_avg": [ 28.5, 3.2015621187164243 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2876653416987932704&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "google.com;cam.ac.uk;ucla.edu", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Google;University of Cambridge;University of California, Los Angeles", "aff_unique_dep": "Google DeepMind;;", "aff_unique_url": "https://deepmind.com;https://www.cam.ac.uk;https://www.ucla.edu", "aff_unique_abbr": "DeepMind;Cambridge;UCLA", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Cambridge;Los Angeles", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United Kingdom;United States" }, { "id": "SOxxGATGsl", "title": "Efficient Algorithms for Lipschitz Bandits", "track": "main", "status": "Reject", "tldr": "", "abstract": "Lipschitz bandits is a fundamental framework used to model sequential decision-making problems with large, structured action spaces. This framework has been applied in various areas. Previous algorithms, such as the Zooming algorithm, achieve near-optimal regret with $O(T^2)$ time complexity and $O(T)$ arms stored in memory, where $T$ denotes the size of the time horizons. However, in practical scenarios, learners may face limitations regarding the storage of a large number of arms in memory. In this paper, we explore the bounded memory stochastic Lipschitz bandits problem, where the algorithm is limited to storing only a limited number of arms at any given time horizon. We propose algorithms that achieve near-optimal regret with $O(T)$ time complexity and $O(1)$ arms stored, both of which are almost optimal and state-of-the-art. Moreover, our numerical results demonstrate the efficiency of these algorithms.", "keywords": "bandits", "primary_area": "bandits", "supplementary_material": "", "author": "Shaoang Li;Lan Zhang;Xiangyang Li", "authorids": "~Shaoang_Li1;~Lan_Zhang1;~Xiangyang_Li4", "gender": "M;F;", "homepage": ";https://cs.ustc.edu.cn/2020/0706/c23235a460088/page.htm;http://staff.ustc.edu.cn/~xiangyangli/", "dblp": ";54/2752-2;l/XiangYangLi", "google_scholar": "https://scholar.google.com/citations?hl=en;83QxmA8AAAAJ;JURtNb0AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Shaoang_Li1;~Lan_Zhang1;~Xiangyang_Li4", "aff": "University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China", "aff_domain": "ustc.edu.cn;ustc.edu.cn;ustc.edu", "position": "PhD student;Full Professor;Full Professor", "bibtex": "@misc{\nanonymous2024efficient,\ntitle={Efficient Algorithms for Lipschitz Bandits},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=SOxxGATGsl}\n}", "github": "", "project": "", "reviewers": "Bcg5;q7Fp;svqm;mKhe", "site": "https://openreview.net/forum?id=SOxxGATGsl", "pdf_size": 823066, "rating": "2;4;6;6", "confidence": "5;4;3;3", "soundness": "1;3;3;4", "novelty": "4;3;3;3", "presentation": "1;3;3;3", "wc_summary": "16;118;94;45", "wc_strengths": "46;73;35;20", "wc_weaknesses": "610;200;62;16", "wc_questions": "1;121;108;55", "wc_limitations": "1;12;6;29", "wc_review": "674;524;305;165", "wc_reply_reviewers": "321;87;68;66", "wc_reply_authors": "302;0;0;46", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;2", "rating_avg": [ 4.5, 1.6583123951777 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 1.0897247358851685 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 68.25, 40.02733441037512 ], "wc_strengths_avg": [ 43.5, 19.371370627810517 ], "wc_weaknesses_avg": [ 222.0, 234.0213665458776 ], "wc_questions_avg": [ 71.25, 47.49934210070704 ], "wc_limitations_avg": [ 12.0, 10.559356040971437 ], "wc_review_avg": [ 417.0, 195.92473044513804 ], "wc_reply_reviewers_avg": [ 135.5, 107.41159155323973 ], "wc_reply_authors_avg": [ 87.0, 125.54282137979854 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:2UzQddQon14J:scholar.google.com/&scioq=Efficient+Algorithms+for+Lipschitz+Bandits&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Science and Technology of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ustc.edu.cn", "aff_unique_abbr": "USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "TextCtrl: Diffusion-based Scene Text Editing with Prior Guidance Control", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95102", "id": "SQVns9hWJT", "proceeding": "", "pdf": "https://openreview.net/pdf?id=SQVns9hWJT", "openreview": "https://openreview.net/forum?id=SQVns9hWJT", "poster": "/media/PosterPDFs/NeurIPS%202024/95102.png?t=1733302972.3497107", "project": "", "author_site": "Weichao Zeng, Yan Shu, Zhenhang Li, Dongbao Yang, Yu Zhou", "tldr": "", "abstract": "Centred on content modification and style preservation, Scene Text Editing (STE) remains a challenging task despite considerable progress in text-to-image synthesis and text-driven image manipulation recently. GAN-based STE methods generally encounter a common issue of model generalization, while Diffusion-based STE methods suffer from undesired style deviations. To address these problems, we propose TextCtrl, a diffusion-based method that edits text with prior guidance control. Our method consists of two key components: (i) By constructing fine-grained text style disentanglement and robust text glyph structure representation, TextCtrl explicitly incorporates Style-Structure guidance into model design and network training, significantly improving text style consistency and rendering accuracy. (ii) To further leverage the style prior, a Glyph-adaptive Mutual Self-attention mechanism is proposed which deconstructs the implicit fine-grained features of the source image to enhance style consistency and vision quality during inference. Furthermore, to fill the vacancy of the real-world STE evaluation benchmark, we create the first real-world image-pair dataset termed ScenePair for fair comparisons. Experiments demonstrate the effectiveness of TextCtrl compared with previous methods concerning both style fidelity and text accuracy. Project page: https://github.com/weichaozeng/TextCtrl.", "keywords": "Image Synthesis;Image Editing;Scene Text Editing.", "primary_area": "machine_vision", "supplementary_material": "", "author": "Weichao Zeng;Yan Shu;Zhenhang Li;Dongbao Yang;Yu Zhou", "authorids": "~Weichao_Zeng1;~Yan_Shu3;~Zhenhang_Li1;~Dongbao_Yang1;~Yu_Zhou2", "gender": ";M;M;F;M", "homepage": ";https://shuyansy.github.io/;https://zhenhang-li.github.io/;;https://intimelab.github.io", "dblp": ";;367/9353;193/2602;36/2728-15", "google_scholar": ";qLu1W08AAAAJ;https://scholar.google.com.hk/citations?user=Ah7QRmwAAAAJ;f9eIQZAAAAAJ;FNfBHg8AAAAJ", "orcid": ";0000-0002-5544-9425;0009-0002-6367-6692;0000-0001-8628-411X;0000-0003-4188-9953", "linkedin": ";;;;", "or_profile": "~Weichao_Zeng1;~Yan_Shu3;~Zhenhang_Li1;~Dongbao_Yang1;~Yu_Zhou2", "aff": ";Institute of Information Engineering, Chinese Academy of Sciences;Institute of Information Engineering\uff0cChinese Academy of Sciences;Institute of Information Engineering, Chinese Academy of Sciences;Institute of Information Engineering, CAS", "aff_domain": ";iie.ac.cn;iie.ac.cn;iie.ac.cn;iie.ac.cn", "position": ";Intern;MS student;PhD student;Full Professor", "bibtex": "@inproceedings{\nzeng2024textctrl,\ntitle={TextCtrl: Diffusion-based Scene Text Editing with Prior Guidance Control},\nauthor={Weichao Zeng and Yan Shu and Zhenhang Li and Dongbao Yang and Yu Zhou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=SQVns9hWJT}\n}", "github": "", "reviewers": "rTPt;MkTr;Ktb6;25Re;mqKG", "pdf_size": 37666846, "rating": "5;5;6;7;7", "confidence": "4;5;4;5;4", "soundness": "3;3;3;4;4", "novelty": "2;3;3;3;4", "presentation": "3;3;3;3;3", "wc_summary": "75;97;73;95;77", "wc_strengths": "21;59;99;98;63", "wc_weaknesses": "29;190;177;66;99", "wc_questions": "104;4;28;69;3", "wc_limitations": "9;4;1;6;10", "wc_review": "238;354;378;334;252", "wc_reply_reviewers": "0;0;0;41;19", "wc_reply_authors": "0;0;24;29;30", "reply_reviewers": "0;0;0;1;1", "reply_authors": "1;1;2;2;2", "rating_avg": [ 6.0, 0.8944271909999159 ], "confidence_avg": [ 4.4, 0.48989794855663565 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 83.4, 10.384603988597735 ], "wc_strengths_avg": [ 68.0, 28.89982698910151 ], "wc_weaknesses_avg": [ 112.2, 62.42243186547605 ], "wc_questions_avg": [ 41.6, 39.32734417679384 ], "wc_limitations_avg": [ 6.0, 3.286335345030997 ], "wc_review_avg": [ 311.2, 55.99428542271077 ], "wc_reply_reviewers_avg": [ 12.0, 16.260381299342274 ], "wc_reply_authors_avg": [ 16.6, 13.705473359209451 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12863159230015804247&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": ";iie.ac.cn;iie.ac.cn;iie.ac.cn;iie.ac.cn", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Chinese Academy of Sciences", "aff_unique_dep": "Institute of Information Engineering", "aff_unique_url": "http://www.cas.cn", "aff_unique_abbr": "CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "State-free Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95101", "id": "SQicD307Oh", "proceeding": "", "pdf": "https://openreview.net/pdf?id=SQicD307Oh", "openreview": "https://openreview.net/forum?id=SQicD307Oh", "poster": "/media/PosterPDFs/NeurIPS%202024/95101.png?t=1731684417.1709294", "project": "", "author_site": "Mingyu Chen, Aldo Pacchiano, Xuezhou Zhang", "tldr": "", "abstract": "In this work, we study the \\textit{state-free RL} problem, where the algorithm does not have the states information before interacting with the environment. Specifically, denote the reachable state set by $\\mathcal{S}^\\Pi := \\{ s|\\max_{\\pi\\in \\Pi}q^{P, \\pi}(s)>0 \\}$, we design an algorithm which requires no information on the state space $S$ while having a regret that is completely independent of $\\mathcal{S}$ and only depend on $\\mathcal{S}^\\Pi$. We view this as a concrete first step towards \\textit{parameter-free RL}, with the goal of designing RL algorithms that require no hyper-parameter tuning.", "keywords": "Reinforcement Learning;Parameter-free;Online Learning", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Mingyu Chen;Aldo Pacchiano;Xuezhou Zhang", "authorids": "~Mingyu_Chen2;~Aldo_Pacchiano1;~Xuezhou_Zhang2", "gender": "M;M;M", "homepage": ";https://www.aldopacchiano.ai;https://zhangxz1123.github.io/", "dblp": ";129/6338;213/7993", "google_scholar": "-C4-gdYAAAAJ;no_BfYgAAAAJ;tR-p-r8AAAAJ", "orcid": ";;", "linkedin": "mingyu-chen-7b0136180/;;", "or_profile": "~Mingyu_Chen2;~Aldo_Pacchiano1;~Xuezhou_Zhang1", "aff": "Boston University, Boston University;Broad Institute;Boston University, Boston University", "aff_domain": "bu.edu;broadinstitute.org;bu.edu", "position": "PhD student;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nchen2024statefree,\ntitle={State-free Reinforcement Learning},\nauthor={Mingyu Chen and Aldo Pacchiano and Xuezhou Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=SQicD307Oh}\n}", "github": "", "reviewers": "8pEE;zatZ;sD2S", "pdf_size": 589832, "rating": "4;7;7", "confidence": "2;4;4", "soundness": "2;4;3", "novelty": "2;4;3", "presentation": "3;1;3", "wc_summary": "67;58;55", "wc_strengths": "36;40;68", "wc_weaknesses": "52;156;296", "wc_questions": "2;7;2", "wc_limitations": "1;13;2", "wc_review": "158;274;423", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 2.3333333333333335, 0.9428090415820634 ], "wc_summary_avg": [ 60.0, 5.0990195135927845 ], "wc_strengths_avg": [ 48.0, 14.236104336041748 ], "wc_weaknesses_avg": [ 168.0, 99.97332977682932 ], "wc_questions_avg": [ 3.6666666666666665, 2.3570226039551585 ], "wc_limitations_avg": [ 5.333333333333333, 5.436502143433364 ], "wc_review_avg": [ 285.0, 108.46504813379592 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18390382919992408117&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "bu.edu;broadinstitute.org;bu.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Boston University;Broad Institute", "aff_unique_dep": ";", "aff_unique_url": "https://www.bu.edu;https://www.broadinstitute.org", "aff_unique_abbr": "BU;Broad", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Boston;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "U-DiTs: Downsample Tokens in U-Shaped Diffusion Transformers", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95100", "id": "SRWs2wxNs7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=SRWs2wxNs7", "openreview": "https://openreview.net/forum?id=SRWs2wxNs7", "poster": "", "project": "", "author_site": "Yuchuan Tian, Zhijun Tu, Hanting Chen, Jie Hu, Chao Xu, Yunhe Wang", "tldr": "", "abstract": "Diffusion Transformers (DiTs) introduce the transformer architecture to diffusion tasks for latent-space image generation. With an isotropic architecture that chains a series of transformer blocks, DiTs demonstrate competitive performance and good scalability; but meanwhile, the abandonment of U-Net by DiTs and their following improvements is worth rethinking. To this end, we conduct a simple toy experiment by comparing a U-Net architectured DiT with an isotropic one. It turns out that the U-Net architecture only gain a slight advantage amid the U-Net inductive bias, indicating potential redundancies within the U-Net-style DiT. Inspired by the discovery that U-Net backbone features are low-frequency-dominated, we perform token downsampling on the query-key-value tuple for self-attention and bring further improvements despite a considerable amount of reduction in computation. Based on self-attention with downsampled tokens, we propose a series of U-shaped DiTs (U-DiTs) in the paper and conduct extensive experiments to demonstrate the extraordinary performance of U-DiT models. The proposed U-DiT could outperform DiT-XL with only 1/6 of its computation cost. Codes are available at https://github.com/YuchuanTian/U-DiT.", "keywords": "Diffusion Transformers; U-Net; Architecture", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/fbde8c207355e78382dc561187a66cbe3b363811.zip", "author": "Yuchuan Tian;Zhijun Tu;Hanting Chen;Jie Hu;Chao Xu;Yunhe Wang", "authorids": "~Yuchuan_Tian1;~Zhijun_Tu1;~Hanting_Chen1;~Jie_Hu8;~Chao_Xu1;~Yunhe_Wang1", "gender": "M;M;M;M;M;M", "homepage": ";;;;http://www.cis.pku.edu.cn/faculty/vision/xuchao/xuchao01.htm;https://www.wangyunhe.site/", "dblp": "193/6675;228/8537;232/2060;;;63/8217-1", "google_scholar": ";kSPs6FsAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=en;https://scholar.google.co.uk/citations?hl=zh-CN;https://scholar.google.com.sg/citations?user=isizOkYAAAAJ", "orcid": ";0000-0001-8740-7927;;;;0000-0002-0142-509X", "linkedin": ";;;%E6%9D%B0-%E8%83%A1-b6a598118/;;", "or_profile": "~Yuchuan_Tian1;~Zhijun_Tu1;~Hanting_Chen1;~Jie_Hu8;~Chao_Xu1;~Yunhe_Wang1", "aff": "Peking University;Huawei Noah's Ark Lab;Huawei Technologies Ltd.;Huawei Technologies Ltd.;Peking University;Huawei Noah's Ark Lab", "aff_domain": "pku.edu.cn;huawei.com;huawei.com;huawei.com;pku.edu;huawei.com", "position": "PhD student;Researcher;Researcher;Researcher;Full Professor;Principal Researcher", "bibtex": "@inproceedings{\ntian2024udits,\ntitle={U-DiTs: Downsample Tokens in U-Shaped Diffusion Transformers},\nauthor={Yuchuan Tian and Zhijun Tu and Hanting Chen and Jie Hu and Chao Xu and Yunhe Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=SRWs2wxNs7}\n}", "github": "", "reviewers": "AGQh;Df8i;JEXU;GsTx", "pdf_size": 4123960, "rating": "4;5;6;7", "confidence": "4;4;5;4", "soundness": "2;3;3;4", "novelty": "3;2;3;3", "presentation": "2;3;3;3", "wc_summary": "43;89;78;93", "wc_strengths": "45;40;60;36", "wc_weaknesses": "37;104;171;93", "wc_questions": "26;15;22;20", "wc_limitations": "1;35;1;1", "wc_review": "152;283;332;243", "wc_reply_reviewers": "299;0;25;68", "wc_reply_authors": "627;57;76;22", "reply_reviewers": "1;0;2;1", "reply_authors": "3;2;3;2", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 75.75, 19.68978161382193 ], "wc_strengths_avg": [ 45.25, 9.093266739736606 ], "wc_weaknesses_avg": [ 101.25, 47.61499238685227 ], "wc_questions_avg": [ 20.75, 3.960744879438715 ], "wc_limitations_avg": [ 9.5, 14.722431864335457 ], "wc_review_avg": [ 252.5, 66.03218912015564 ], "wc_reply_reviewers_avg": [ 98.0, 118.56854557596631 ], "wc_reply_authors_avg": [ 195.5, 249.87847046114237 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.2581988897471611, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9747406430246150168&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "pku.edu.cn;huawei.com;huawei.com;huawei.com;pku.edu;huawei.com", "author_num": 6, "aff_unique_index": "0;1;1;1;0;1", "aff_unique_norm": "Peking University;Huawei", "aff_unique_dep": ";Noah's Ark Lab", "aff_unique_url": "http://www.pku.edu.cn;https://www.huawei.com", "aff_unique_abbr": "Peking U;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "GIC: Gaussian-Informed Continuum for Physical Property Identification and Simulation", "status": "Oral", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95099", "id": "SSCtCq2MH2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=SSCtCq2MH2", "openreview": "https://openreview.net/forum?id=SSCtCq2MH2", "poster": "/media/PosterPDFs/NeurIPS%202024/95099.png?t=1731381053.2302415", "project": "", "author_site": "Junhao Cai, Yuji Yang, Weihao Yuan, Yisheng HE, Zilong Dong, Liefeng Bo, Hui Cheng, Qifeng Chen", "tldr": "", "abstract": "This paper studies the problem of estimating physical properties (system identification) through visual observations. To facilitate geometry-aware guidance in physical property estimation, we introduce a novel hybrid framework that leverages 3D Gaussian representation to not only capture explicit shapes but also enable the simulated continuum to render object masks as 2D shape surrogates during training. We propose a new dynamic 3D Gaussian framework based on motion factorization to recover the object as 3D Gaussian point sets across different time states. Furthermore, we develop a coarse-to-fine filling strategy to generate the density fields of the object from the Gaussian reconstruction, allowing for the extraction of object continuums along with their surfaces and the integration of Gaussian attributes into these continuum. In addition to the extracted object surfaces, the Gaussian-informed continuum also enables the rendering of object masks during simulations, serving as 2D-shape guidance for physical property estimation. Extensive experimental evaluations demonstrate that our pipeline achieves state-of-the-art performance across multiple benchmarks and metrics. Additionally, we illustrate the effectiveness of the proposed method through real-world demonstrations, showcasing its practical utility. Our project page is at https://jukgei.github.io/project/gic.", "keywords": "Object Property Identification;Gaussian-inform Continuum", "primary_area": "machine_vision", "supplementary_material": "/attachment/01293ad490e1104485723ea9102a08ac3057eaf6.zip", "author": "Junhao Cai;Yuji Yang;Weihao Yuan;Yisheng HE;Zilong Dong;Liefeng Bo;Hui Cheng;Qifeng Chen", "authorids": "~Junhao_Cai1;~Yuji_Yang1;~Weihao_Yuan1;~Yisheng_HE1;~Zilong_Dong2;~Liefeng_Bo1;~Hui_Cheng5;~Qifeng_Chen1", "gender": "M;M;M;M;;M;;M", "homepage": ";https://jukgei.github.io/;https://www.weihao-yuan.com;https://hyshkust.github.io;;https://research.cs.washington.edu/istc/lfb/;;http://cqf.io/", "dblp": "226/6472;;217/2047-1;254/0856;;17/6808;;117/4819", "google_scholar": "GeSCNR4AAAAJ;;m3tqxRQAAAAJ;UM4qFCsAAAAJ;;FJwtMf0AAAAJ;;lLMX9hcAAAAJ", "orcid": "0000-0002-1440-0406;0000-0003-2510-6365;;;;;;", "linkedin": ";;;;;;;", "or_profile": "~Junhao_Cai1;~Yuji_Yang1;~Weihao_Yuan1;~Yisheng_HE1;~Zilong_Dong2;~Liefeng_Bo1;~Hui_Cheng5;~Qifeng_Chen1", "aff": "Hong Kong University of Science and Technology;Sun Yat-Sen University;Alibaba Group;Alibaba Group;;Alibaba Group;;Hong Kong University of Science and Technology", "aff_domain": "ust.hk;mail2.sysu.edu.cn;alibaba-inc.com;alibaba-inc.com;;alibaba-inc.com;;hkust.edu", "position": "PhD student;Researcher;Researcher;Researcher;;Principal Researcher;;Assistant Professor", "bibtex": "@inproceedings{\ncai2024gic,\ntitle={{GIC}: Gaussian-Informed Continuum for Physical Property Identification and Simulation},\nauthor={Junhao Cai and Yuji Yang and Weihao Yuan and Yisheng HE and Zilong Dong and Liefeng Bo and Hui Cheng and Qifeng Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=SSCtCq2MH2}\n}", "github": "", "reviewers": "mZdJ;HGBq;a7go;FGDU", "pdf_size": 3059200, "rating": "6;6;7;7", "confidence": "5;3;3;4", "soundness": "3;2;3;3", "novelty": "3;2;3;3", "presentation": "2;2;3;3", "wc_summary": "81;81;66;82", "wc_strengths": "46;114;59;60", "wc_weaknesses": "36;78;46;103", "wc_questions": "98;46;135;128", "wc_limitations": "4;4;1;55", "wc_review": "265;323;307;428", "wc_reply_reviewers": "11;15;102;16", "wc_reply_authors": "16;52;49;16", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 77.5, 6.652067347825035 ], "wc_strengths_avg": [ 69.75, 26.13785568863674 ], "wc_weaknesses_avg": [ 65.75, 26.517682779609533 ], "wc_questions_avg": [ 101.75, 35.05977039285911 ], "wc_limitations_avg": [ 16.0, 22.54994456755936 ], "wc_review_avg": [ 330.75, 60.009895017405256 ], "wc_reply_reviewers_avg": [ 36.0, 38.15101571387058 ], "wc_reply_authors_avg": [ 33.25, 17.282577932704367 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11632955838533441469&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "ust.hk;mail2.sysu.edu.cn;alibaba-inc.com;alibaba-inc.com;;alibaba-inc.com;;hkust.edu", "author_num": 8, "aff_unique_index": "0;1;2;2;2;0", "aff_unique_norm": "Hong Kong University of Science and Technology;Sun Yat-sen University;Alibaba Group", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ust.hk;http://www.sysu.edu.cn/;https://www.alibaba.com", "aff_unique_abbr": "HKUST;SYSU;Alibaba", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "A Textbook Remedy for Domain Shifts: Knowledge Priors for Medical Image Analysis", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95098", "id": "STrpbhrvt3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=STrpbhrvt3", "openreview": "https://openreview.net/forum?id=STrpbhrvt3", "poster": "/media/PosterPDFs/NeurIPS%202024/95098.png?t=1731118916.7940962", "project": "", "author_site": "Yue Yang, Mona Gandhi, Yufei Wang, Yifan Wu, Michael Yao, Chris Callison-Burch, James Gee, Mark Yatskar", "tldr": "", "abstract": "While deep networks have achieved broad success in analyzing natural images, when applied to medical scans, they often fail in unexcepted situations. We investigate this challenge and focus on model sensitivity to domain shifts, such as data sampled from different hospitals or data confounded by demographic variables such as sex, race, etc, in the context of chest X-rays and skin lesion images. A key finding we show empirically is that existing visual backbones lack an appropriate prior from the architecture for reliable generalization in these settings. Taking inspiration from medical training, we propose giving deep networks a prior grounded in explicit medical knowledge communicated in natural language. To this end, we introduce Knowledge-enhanced Bottlenecks (KnoBo), a class of concept bottleneck models that incorporates knowledge priors that constrain it to reason with clinically relevant factors found in medical textbooks or PubMed. KnoBo uses retrieval-augmented language models to design an appropriate concept space paired with an automatic training procedure for recognizing the concept. We evaluate different resources of knowledge and recognition architectures on a broad range of domain shifts across 20 datasets. In our comprehensive evaluation with two imaging modalities, KnoBo outperforms fine-tuned models on confounded datasets by 32.4% on average. Finally, evaluations reveal that PubMed is a promising resource for making medical models less sensitive to domain shift, outperforming other resources on both diversity of information and final prediction performance.", "keywords": "Robustness;Interpretability;Domain Generalization;Knowledge Prior;Medical Images", "primary_area": "interpretability_and_explainability", "supplementary_material": "/attachment/925d5783ed0b0e3c6c54f6a3f5f1129386e6cba6.zip", "author": "Yue Yang;Mona Gandhi;Yufei Wang;Yifan Wu;Michael S Yao;Chris Callison-Burch;James Gee;Mark Yatskar", "authorids": "~Yue_Yang3;~Mona_Gandhi1;~Yufei_Wang11;~Yifan_Wu4;~Michael_S_Yao1;~Chris_Callison-Burch1;~James_Gee1;~Mark_Yatskar1", "gender": "M;F;F;F;M;M;M;", "homepage": "https://yueyang1996.github.io;https://monagandhi09.github.io/;;https://yifannnwu.com/;https://michaelsyao.com;https://www.cis.upenn.edu/~ccb/;https://www.med.upenn.edu/apps/faculty/index.php/g5455356/p10656;", "dblp": ";75/3718;;;327/9549.html;;30/6904;", "google_scholar": "uvyYzagAAAAJ;1wHg0-sAAAAJ;;rxg0zB4AAAAJ;jz9IC2QAAAAJ;nv-MV58AAAAJ;https://scholar.google.com.tw/citations?user=fU8fmEIAAAAJ;", "orcid": ";;;;0000-0002-7008-6028;;;", "linkedin": ";monagandhi09/;yufei-wang-896938257/;yifan-wu-675737105/;michael-s-yao/;chris-callison-burch-40bb87b7/;;", "or_profile": "~Yue_Yang3;~Mona_Gandhi1;~Yufei_Wang11;~Yifan_Wu4;~Michael_S_Yao1;~Chris_Callison-Burch1;~James_Gee1;~Mark_Yatskar1", "aff": "University of Pennsylvania;University of Pennsylvania;University of Pennsylvania;University of Pennsylvania;University of Pennsylvania;Allen Institute for Artificial Intelligence;University of Pennsylvania;", "aff_domain": "seas.upenn.edu;upenn.edu;seas.upenn.edu;upenn.edu;seas.upenn.edu;allenai.org;upenn.edu;", "position": "PhD student;MS student;MS student;PhD student;PhD student;Researcher;Full Professor;", "bibtex": "@inproceedings{\nyang2024a,\ntitle={A Textbook Remedy for Domain Shifts: Knowledge Priors for Medical Image Analysis},\nauthor={Yue Yang and Mona Gandhi and Yufei Wang and Yifan Wu and Michael S Yao and Chris Callison-Burch and James Gee and Mark Yatskar},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=STrpbhrvt3}\n}", "github": "", "reviewers": "tMbd;f9Nf;T8n7;z571", "pdf_size": 4250922, "rating": "6;6;7;7", "confidence": "3;5;3;3", "soundness": "3;2;3;3", "novelty": "4;3;3;3", "presentation": "3;2;4;3", "wc_summary": "85;65;116;83", "wc_strengths": "157;65;131;55", "wc_weaknesses": "159;32;60;112", "wc_questions": "136;284;67;96", "wc_limitations": "1;1;72;8", "wc_review": "538;447;446;354", "wc_reply_reviewers": "10;20;25;384", "wc_reply_authors": "55;38;0;932", "reply_reviewers": "1;1;1;3", "reply_authors": "2;2;1;6", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 87.25, 18.335416548308903 ], "wc_strengths_avg": [ 102.0, 43.139309220245984 ], "wc_weaknesses_avg": [ 90.75, 48.75128203442449 ], "wc_questions_avg": [ 145.75, 83.49363748214591 ], "wc_limitations_avg": [ 20.5, 29.87055406248769 ], "wc_review_avg": [ 446.25, 65.05526496756431 ], "wc_reply_reviewers_avg": [ 109.75, 158.4303869211964 ], "wc_reply_authors_avg": [ 256.25, 390.65225904888865 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.75, 1.920286436967152 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14342754979217645412&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "seas.upenn.edu;upenn.edu;seas.upenn.edu;upenn.edu;seas.upenn.edu;allenai.org;upenn.edu;", "author_num": 8, "aff_unique_index": "0;0;0;0;0;1;0", "aff_unique_norm": "University of Pennsylvania;Allen Institute for Artificial Intelligence", "aff_unique_dep": ";", "aff_unique_url": "https://www.upenn.edu;https://allenai.org", "aff_unique_abbr": "UPenn;AI2", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "The Fragility of Fairness: Causal Sensitivity Analysis for Fair Machine Learning", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97682", "id": "SXYmSTXyHm", "proceeding": "", "pdf": "https://openreview.net/pdf?id=SXYmSTXyHm", "openreview": "https://openreview.net/forum?id=SXYmSTXyHm", "poster": "", "project": "", "author_site": "Jake Fawkes, Nic Fishman, Mel Andrews, Zachary Lipton", "tldr": "", "abstract": "Fairness metrics are a core tool in the fair machine learning literature (FairML),\nused to determine that ML models are, in some sense, \u201cfair.\u201d Real-world data,\nhowever, are typically plagued by various measurement biases and other violated\nassumptions, which can render fairness assessments meaningless. We adapt tools\nfrom causal sensitivity analysis to the FairML context, providing a general frame-\nwork which (1) accommodates effectively any combination of fairness metric and\nbias that can be posed in the \u201coblivious setting\u201d; (2) allows researchers to inves-\ntigate combinations of biases, resulting in non-linear sensitivity; and (3) enables\nflexible encoding of domain-specific constraints and assumptions. Employing this\nframework, we analyze the sensitivity of the most common parity metrics under 3\nvarieties of classifier across 14 canonical fairness datasets. Our analysis reveals the\nstriking fragility of fairness assessments to even minor dataset biases. We show that\ncausal sensitivity analysis provides a powerful and necessary toolkit for gauging\nthe informativeness of parity metric evaluations. Our repository is \\href{https://github.com/Jakefawkes/fragile_fair}{available here}.", "keywords": "FairML;Sensitivity Analysis;Causal inference", "primary_area": "", "supplementary_material": "/attachment/a739831b8f844c8012ebba5e2c8e1ed35418fa79.pdf", "author": "Jake Fawkes;Nic Fishman;Mel Andrews;Zachary Chase Lipton", "authorids": "~Jake_Fawkes1;~Nic_Fishman1;~Mel_Andrews1;~Zachary_Chase_Lipton1", "gender": "M;M;Non-Binary;Unspecified", "homepage": "http://csml.stats.ox.ac.uk/people/;https://njw.fish;https://mel-andrews.com/;http://zacklipton.com", "dblp": ";;;", "google_scholar": ";saYhrnwAAAAJ;aEh_jKcAAAAJ;MN9Kfg8AAAAJ", "orcid": ";;0000-0002-0042-5098;", "linkedin": ";;;", "or_profile": "~Jake_Fawkes1;~Nic_Fishman1;~Mel_Andrews1;~Zachary_Chase_Lipton1", "aff": "University of Oxford;Harvard University, Harvard University;University of Cincinnati;Carnegie Mellon University", "aff_domain": "oxford.ac.uk;g.harvard.edu;uc.edu;cmu.edu", "position": "PhD student;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nfawkes2024the,\ntitle={The Fragility of Fairness: Causal Sensitivity Analysis for Fair Machine Learning},\nauthor={Jake Fawkes and Nic Fishman and Mel Andrews and Zachary Chase Lipton},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=SXYmSTXyHm}\n}", "github": "", "reviewers": "TDXm;k2PJ;5n81;smoh", "pdf_size": 3356663, "rating": "6;7;7;8", "confidence": "3;4;4;2", "wc_summary_and_contributions": "159;102;73;182", "wc_strengths": "26;2;66;96", "wc_improvement": "81;36;44;98", "wc_limitations": "19;35;154;113", "wc_correctness": "36;22;17;39", "wc_clarity": "36;26;20;127", "wc_relation_to_prior_work": "21;23;27;29", "wc_documentation": "36;10;6;23", "wc_additional_feedback": "1;1;1;1", "wc_review": "415;257;408;708", "wc_reply_reviewers": "22;0;36;88", "wc_reply_authors": "114;33;0;57", "reply_reviewers": "1;0;1;2", "reply_authors": "3;2;1;2", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "wc_summary_and_contributions_avg": [ 129.0, 43.514365444069156 ], "wc_strengths_avg": [ 47.5, 36.14899721984 ], "wc_improvement_avg": [ 64.75, 25.625914617823888 ], "wc_limitations_avg": [ 80.25, 55.47690961111659 ], "wc_correctness_avg": [ 28.5, 9.233092656309694 ], "wc_clarity_avg": [ 52.25, 43.533751274155094 ], "wc_relation_to_prior_work_avg": [ 25.0, 3.1622776601683795 ], "wc_documentation_avg": [ 18.75, 11.776565713313877 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 447.0, 163.3753347356938 ], "wc_reply_reviewers_avg": [ 36.5, 32.38440982942255 ], "wc_reply_authors_avg": [ 51.0, 41.62331077653482 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.4264014327112209, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7860097136574309401&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 3, "email": "oxford.ac.uk;g.harvard.edu;uc.edu;cmu.edu", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "University of Oxford;Harvard University;University of Cincinnati;Carnegie Mellon University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.ox.ac.uk;https://www.harvard.edu;https://www.uc.edu;https://www.cmu.edu", "aff_unique_abbr": "Oxford;Harvard;UC;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "GrounDiT: Grounding Diffusion Transformers via Noisy Patch Transplantation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95097", "id": "SXbyy0a3rY", "proceeding": "", "pdf": "https://openreview.net/pdf?id=SXbyy0a3rY", "openreview": "https://openreview.net/forum?id=SXbyy0a3rY", "poster": "/media/PosterPDFs/NeurIPS%202024/95097.png?t=1733404783.5596027", "project": "", "author_site": "Phillip Lee, Taehoon Yoon, Minhyuk Sung", "tldr": "", "abstract": "We introduce GrounDiT, a novel training-free spatial grounding technique for text-to-image generation using Diffusion Transformers (DiT). Spatial grounding with bounding boxes has gained attention for its simplicity and versatility, allowing for enhanced user control in image generation. However, prior training-free approaches often rely on updating the noisy image during the reverse diffusion process via backpropagation from custom loss functions, which frequently struggle to provide precise control over individual bounding boxes. In this work, we leverage the flexibility of the Transformer architecture, demonstrating that DiT can generate noisy patches corresponding to each bounding box, fully encoding the target object and allowing for fine-grained control over each region. Our approach builds on an intriguing property of DiT, which we refer to as semantic sharing. Due to semantic sharing, when a smaller patch is jointly denoised alongside a generatable-size image, the two become semantic clones. Each patch is denoised in its own branch of the generation process and then transplanted into the corresponding region of the original noisy image at each timestep, resulting in robust spatial grounding for each bounding box. In our experiments on the HRS and DrawBench benchmarks, we achieve state-of-the-art performance compared to previous training-free approaches. Project Page: https://groundit-diffusion.github.io/.", "keywords": "Text-to-Image Generation;Visual Grounding", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Yuseung Lee;TaeHoon Yoon;Minhyuk Sung", "authorids": "~Yuseung_Lee1;~TaeHoon_Yoon1;~Minhyuk_Sung1", "gender": "M;M;M", "homepage": "https://phillipinseoul.github.io/;https://github.com/taehoon-yoon;https://mhsung.github.io/", "dblp": "389/9579;391/5521;171/6792", "google_scholar": "h_Jb0wcAAAAJ;LOJKOWcAAAAJ;PcIYMp4AAAAJ", "orcid": ";;", "linkedin": "yuseung-lee-6b085223a/;;mhsung", "or_profile": "~Yuseung_Lee1;~TaeHoon_Yoon1;~Minhyuk_Sung1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "position": "MS student;MS student;Assistant Professor", "bibtex": "@inproceedings{\nlee2024groundit,\ntitle={GrounDiT: Grounding Diffusion Transformers via Noisy Patch Transplantation},\nauthor={Yuseung Lee and TaeHoon Yoon and Minhyuk Sung},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=SXbyy0a3rY}\n}", "github": "", "reviewers": "LMpP;bA1A;xDuk;sEMS", "pdf_size": 33584360, "rating": "4;5;6;7", "confidence": "4;3;4;3", "soundness": "2;3;3;2", "novelty": "2;3;3;3", "presentation": "1;3;2;1", "wc_summary": "39;123;137;180", "wc_strengths": "32;60;57;97", "wc_weaknesses": "85;168;250;245", "wc_questions": "2;6;1;151", "wc_limitations": "1;19;9;6", "wc_review": "159;376;454;679", "wc_reply_reviewers": "0;21;108;93", "wc_reply_authors": "0;30;28;31", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 1.75, 0.82915619758885 ], "wc_summary_avg": [ 119.75, 51.1340150975845 ], "wc_strengths_avg": [ 61.5, 23.200215516240362 ], "wc_weaknesses_avg": [ 187.0, 67.26440366196671 ], "wc_questions_avg": [ 40.0, 64.11318117204917 ], "wc_limitations_avg": [ 8.75, 6.5717197140474575 ], "wc_review_avg": [ 417.0, 185.9153032969583 ], "wc_reply_reviewers_avg": [ 55.5, 45.9156835950419 ], "wc_reply_authors_avg": [ 22.25, 12.891373084353738 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.4472135954999579, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10787021231413060023&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "On the Identifiability of Hybrid Deep Generative Models: Meta-Learning as a Solution", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95096", "id": "SXy1nVGyO7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=SXy1nVGyO7", "openreview": "https://openreview.net/forum?id=SXy1nVGyO7", "poster": "/media/PosterPDFs/NeurIPS%202024/95096.png?t=1731711961.1187346", "project": "", "author_site": "Yubo Ye, Maryam Tolou, Sumeet Vadhavkar, Xiajun Jiang, Huafeng Liu, Linwei Wang", "tldr": "", "abstract": "The interest in leveraging physics-based inductive bias in deep learning has resulted in recent development of _hybrid deep generative models (hybrid-DGMs)_ that integrates known physics-based mathematical expressions in neural generative models. To identify these hybrid-DGMs requires inferring parameters of the physics-based component along with their neural component. The identifiability of these hybrid-DGMs, however, has not yet been theoretically probed or established. How does the existing theory of the un-identifiability of general DGMs apply to hybrid-DGMs? What may be an effective approach to consutrct a hybrid-DGM with theoretically-proven identifiability? This paper provides the first theoretical probe into the identifiability of hybrid-DGMs, and present meta-learning as a novel solution to construct identifiable hybrid-DGMs. On synthetic and real-data benchmarks, we provide strong empirical evidence for the un-identifiability of existing hybrid-DGMs using unconditional priors, and strong identifiability results of the presented meta-formulations of hybrid-DGMs.", "keywords": "hybrid modeling; identifiability; meta-learning", "primary_area": "generative_models", "supplementary_material": "/attachment/dd7a89824d8d4b921c5233690fe175d281a9a6ba.zip", "author": "Yubo Ye;Maryam Toloubidokhti;Sumeet Vadhavkar;Xiajun Jiang;Huafeng Liu;Linwei Wang", "authorids": "~Yubo_Ye1;~Maryam_Toloubidokhti1;~Sumeet_Vadhavkar1;~Xiajun_Jiang1;~Huafeng_Liu1;~Linwei_Wang1", "gender": "M;F;M;M;M;F", "homepage": "https://goforit-yyb.github.io/GoForItYYB.github.io/;;;;https://person.zju.edu.cn/0003436;https://people.rit.edu/lxwast", "dblp": ";295/3704;;45/10201;;02/6162", "google_scholar": ";Qjpj72cAAAAJ;;P9klFBUAAAAJ;;https://scholar.google.com.tw/citations?user=CG56DzcAAAAJ", "orcid": ";;;0000-0003-1075-6736;;", "linkedin": ";;sumeet-vadhavkar/;;;", "or_profile": "~Yubo_Ye1;~Maryam_Toloubidokhti1;~Sumeet_Vadhavkar1;~Xiajun_Jiang1;~Huafeng_Liu1;~Linwei_Wang1", "aff": "Zhejiang University;Rochester Institute of Technology;Rochester Institute of Technology;Rochester Institute of Technology;Zhejiang University;Rochester Institute of Technology", "aff_domain": "zju.edu.cn;rit.edu;rit.edu;rit.edu;zju.edu.cn;rit.edu", "position": "MS student;PhD student;PhD student;PhD student;Full Professor;Professor", "bibtex": "@inproceedings{\nye2024on,\ntitle={On the Identifiability of Hybrid Deep Generative Models: Meta-Learning as a Solution},\nauthor={Yubo Ye and Maryam Toloubidokhti and Sumeet Vadhavkar and Xiajun Jiang and Huafeng Liu and Linwei Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=SXy1nVGyO7}\n}", "github": "", "reviewers": "HAHc;1aic;1hrQ;5WN4", "pdf_size": 992238, "rating": "5;5;6;7", "confidence": "4;3;3;4", "soundness": "3;3;2;3", "novelty": "3;3;2;3", "presentation": "2;1;3;3", "wc_summary": "36;21;46;54", "wc_strengths": "52;50;66;12", "wc_weaknesses": "243;82;167;9", "wc_questions": "37;68;11;145", "wc_limitations": "25;6;3;22", "wc_review": "393;227;293;242", "wc_reply_reviewers": "36;50;38;17", "wc_reply_authors": "54;27;54;54", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 39.25, 12.316147936753602 ], "wc_strengths_avg": [ 45.0, 20.024984394500787 ], "wc_weaknesses_avg": [ 125.25, 88.02378939809397 ], "wc_questions_avg": [ 65.25, 50.27113983191549 ], "wc_limitations_avg": [ 14.0, 9.617692030835672 ], "wc_review_avg": [ 288.75, 64.9706664580255 ], "wc_reply_reviewers_avg": [ 35.25, 11.818946653572814 ], "wc_reply_authors_avg": [ 47.25, 11.691342951089922 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:cqbaKMMRs6QJ:scholar.google.com/&scioq=On+the+Identifiability+of+Hybrid+Deep+Generative+Models:+Meta-Learning+as+a+Solution&hl=en&as_sdt=0,33", "gs_version_total": 2, "email": "zju.edu.cn;rit.edu;rit.edu;rit.edu;zju.edu.cn;rit.edu", "author_num": 6, "aff_unique_index": "0;1;1;1;0;1", "aff_unique_norm": "Zhejiang University;Rochester Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.zju.edu.cn;https://www.rit.edu", "aff_unique_abbr": "ZJU;RIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;0;1", "aff_country_unique": "China;United States" }, { "id": "SYZzABI1ns", "title": "CS-Bench: A Comprehensive Benchmark for Large Language Models towards Computer Science Mastery", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "Computer Science (CS) stands as a testament to the intricacies of human intelligence, profoundly advancing the development of artificial intelligence and modern society. However, the current community of large language models (LLMs) overly focuses on benchmarks for analyzing specific foundational skills (e.g. mathematics and code generation), neglecting an all-round evaluation of the computer science field. To bridge this gap, we introduce CS-Bench, the first bilingual (Chinese-English) benchmark dedicated to evaluating the performance of LLMs in computer science. CS-Bench comprises approximately 5K meticulously curated test samples, covering 26 subfields across 4 key areas of computer science, encompassing various task forms and divisions of knowledge and reasoning. Utilizing CS-Bench, we conduct a comprehensive evaluation of over 30 mainstream LLMs, revealing the relationship between CS performance and model scales. We also quantitatively analyze the reasons for failures in existing LLMs and highlight directions for improvements, including knowledge supplementation and CS-specific reasoning. Further cross-capability experiments show a high correlation between LLMs' capabilities in computer science and their abilities in mathematics and coding. Moreover, expert LLMs specialized in mathematics and coding also demonstrate strong performances in several CS subfields. Looking ahead, we envision CS-Bench serving as a cornerstone for LLM applications in the CS field and paving new avenues in assessing LLMs' diverse reasoning capabilities.", "keywords": "large language model;evaluation;computer science", "primary_area": "", "supplementary_material": "/attachment/a41d8186387858ccfabdf9783727666a7938773e.zip", "author": "Xiaoshuai Song;Muxi Diao;Guanting Dong;Zhengyang Wang;Yujia Fu;Runqi Qiao;Zhexu Wang;Dayuan Fu;Huangxuan Wu;Bin Liang;Weihao Zeng;Yejie Wang;Zhuoma GongQue;Jianing Yu;Qiuna Tan;Weiran Xu", "authorids": "~Xiaoshuai_Song1;~Muxi_Diao1;~Guanting_Dong1;~Zhengyang_Wang4;~Yujia_Fu1;~Runqi_Qiao1;~Zhexu_Wang1;~Dayuan_Fu2;~Huangxuan_Wu1;~Bin_Liang11;~Weihao_Zeng2;~Yejie_Wang1;~Zhuoma_GongQue1;~Jianing_Yu1;~Qiuna_Tan1;~Weiran_Xu1", "gender": "M;M;M;M;M;M;M;;F;F;F;M;M;M;F;", "homepage": ";https://dongguanting.github.io/;;https://www.notion.so/wang-zx/528382744a0444d4a2f8f244a042e7ce;https://github.com/OddFunction0205;;https://zeng-wh.github.io/;https://github.com/banksy23;https://www.zhihu.com/people/x-xuan-6;https://github.com/Galling-Yu;https://github.com/Na-nata;;https://github.com/Fu-Dayuan;https://github.com/LeonDiao0427;https://Fuyujia7799.com;https://github.com/majiangmqjiangma", "dblp": "45/9576;;379/9974;;;;174/3836;349/7881.html;;;380/0012;41/5448;331/3042;;;", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;amozZDkAAAAJ;;;;https://scholar.google.com.hk/citations?user=4faqfxAAAAAJ;;8AcrmfsAAAAJ;;;;https://scholar.google.com/citations?view_op=list_works;8t2DPs0AAAAJ;;;", "orcid": ";;0009-0000-4508-201X;;;;;;;;;0000-0002-9416-7666;0000-0003-3614-6653;;;", "linkedin": ";;;;;;;;;;;;;;;", "or_profile": "~Xiaoshuai_Song1;~Guanting_Dong1;~Runqi_Qiao1;~Zhexu_Wang1;~Huangxuan_Wu1;~Bin_Liang11;~Weihao_Zeng2;~Yejie_Wang1;~Zhuoma_GongQue1;~Jianing_Yu1;~Qiuna_Tan1;~Weiran_Xu1;~dayuan_fu1;~MuxiDiao1;~Fu_Yujia1;~Wang_Zengyang1", "aff": "Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;School of Artificial Intelligence, Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;Beijing University of Post and Telecommunication;Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications", "aff_domain": "bupt.edu.cn;bupt.edu.cn;bupt.edu.cn;bupt.edu.cn;bupt.edu.cn;bupt.edu.cn;bupt.edu.cn;bupt.edu.cn;bupt.edu.cn;bupt.edu.cn;bupt.edu.cn;bupt.edu.cn;bupt.edu.cn;bupt.edu.cn;bupt.edu.cn;bupt.edu.cn", "position": "MS student;MS student;PhD student;Undergrad student;Undergrad student;Undergrad student;MS student;MS student;Undergrad student;Undergrad student;Undergrad student;Associate Professor;MS student;PhD student;Undergrad student;MS student", "bibtex": "@misc{\nanonymous2024csbench,\ntitle={{CS}-Bench: A Comprehensive Benchmark for Large Language Models towards Computer Science Mastery},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=SYZzABI1ns}\n}", "github": "", "project": "", "reviewers": "qT8F;Whd5;TgaA", "site": "https://openreview.net/forum?id=SYZzABI1ns", "pdf_size": 1123136, "rating": "2;7;8", "confidence": "5;3;3", "wc_summary_and_contributions": "20;57;36", "wc_strengths": "2;26;64", "wc_improvement": "2;11;6", "wc_limitations": "6;10;1", "wc_correctness": "2;1;35", "wc_clarity": "21;1;39", "wc_relation_to_prior_work": "1;1;1", "wc_documentation": "1;10;1", "wc_additional_feedback": "1;1;1", "wc_review": "56;118;184", "wc_reply_reviewers": "0;21;32", "wc_reply_authors": "53;27;27", "reply_reviewers": "0;1;1", "reply_authors": "3;2;3", "rating_avg": [ 5.666666666666667, 2.6246692913372702 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "wc_summary_and_contributions_avg": [ 37.666666666666664, 15.15109090315135 ], "wc_strengths_avg": [ 30.666666666666668, 25.525586292102197 ], "wc_improvement_avg": [ 6.333333333333333, 3.6817870057290873 ], "wc_limitations_avg": [ 5.666666666666667, 3.6817870057290873 ], "wc_correctness_avg": [ 12.666666666666666, 15.797327481430381 ], "wc_clarity_avg": [ 20.333333333333332, 15.520595635763755 ], "wc_relation_to_prior_work_avg": [ 1.0, 0.0 ], "wc_documentation_avg": [ 4.0, 4.242640687119285 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 119.33333333333333, 52.264285660052366 ], "wc_reply_reviewers_avg": [ 17.666666666666668, 13.27487183449325 ], "wc_reply_authors_avg": [ 35.666666666666664, 12.256517540566824 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 16, 0 ], "corr_rating_confidence": -0.9878291611472622, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3349707847183467152&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0", "aff_unique_norm": "Beijing University of Posts and Telecommunications", "aff_unique_dep": "", "aff_unique_url": "http://www.bupt.edu.cn/", "aff_unique_abbr": "BUPT", "aff_campus_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0", "aff_campus_unique": "Beijing", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "LFME: A Simple Framework for Learning from Multiple Experts in Domain Generalization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95095", "id": "SYjxhKcXoN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=SYjxhKcXoN", "openreview": "https://openreview.net/forum?id=SYjxhKcXoN", "poster": "/media/PosterPDFs/NeurIPS%202024/95095.png?t=1731178807.8590627", "project": "", "author_site": "Liang Chen, Yong Zhang, Yibing Song, Zhiqiang Shen, Lingqiao Liu", "tldr": "", "abstract": "Domain generalization (DG) methods aim to maintain good performance in an unseen target domain by using training data from multiple source domains. While success on certain occasions are observed, enhancing the baseline across most scenarios remains challenging. This work introduces a simple yet effective framework, dubbed learning from multiple experts (LFME), that aims to make the target model an expert in all source domains to improve DG. Specifically, besides learning the target model used in inference, LFME will also train multiple experts specialized in different domains, whose output probabilities provide professional guidance by simply regularizing the logit of the target model. Delving deep into the framework, we reveal that the introduced logit regularization term implicitly provides effects of enabling the target model to harness more information, and mining hard samples from the experts during training. Extensive experiments on benchmarks from different DG tasks demonstrate that LFME is consistently beneficial to the baseline and can achieve comparable performance to existing arts. Code is available at https://github.com/liangchen527/LFME.", "keywords": "Domain generalization;Knowledge distillation", "primary_area": "machine_vision", "supplementary_material": "/attachment/1cc0bd106dbfe3a8bf8f5759440cd841700bdd6e.zip", "author": "Liang Chen;Yong Zhang;Yibing Song;Zhiqiang Shen;Lingqiao Liu", "authorids": "~Liang_Chen6;~Yong_Zhang6;~Yibing_Song1;~Zhiqiang_Shen1;~Lingqiao_Liu3", "gender": "M;M;;;M", "homepage": "https://liangchen527.github.io/;https://yzhang2016.github.io/yongnorriszhang.github.io/;https://ybsong00.github.io/;;https://sites.google.com/site/lingqiaoliu83/", "dblp": "01/5394-1.html;66/4615-34.html;77/2117;;45/7776", "google_scholar": "ZRd8xLIAAAAJ;a_zSeVEAAAAJ;oRhJHmIAAAAJ;;Y2xu62UAAAAJ", "orcid": "0000-0002-2359-6498;;;;", "linkedin": ";;;;", "or_profile": "~Liang_Chen6;~Yong_Zhang6;~Yibing_Song1;~Zhiqiang_Shen1;~Lingqiao_Liu3", "aff": "University of Adelaide;Tencent AI Lab;Alibaba DAMO Academy;;University of Adelaide", "aff_domain": "adelaide.edu.au;tencent.com;alibaba-inc.com;;adelaide.edu.au", "position": "PhD student;Researcher;Staff Scientist;;Associate Professor", "bibtex": "@inproceedings{\nchen2024lfme,\ntitle={{LFME}: A Simple Framework for Learning from Multiple Experts in Domain Generalization},\nauthor={Liang Chen and Yong Zhang and Yibing Song and Zhiqiang Shen and Lingqiao Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=SYjxhKcXoN}\n}", "github": "", "reviewers": "uZyo;oULp;h8LM;xxxE", "pdf_size": 2707755, "rating": "4;4;8;8", "confidence": "3;4;4;5", "soundness": "2;2;3;4", "novelty": "2;2;3;4", "presentation": "2;2;3;4", "wc_summary": "66;71;91;78", "wc_strengths": "20;69;131;100", "wc_weaknesses": "94;368;173;280", "wc_questions": "134;138;65;84", "wc_limitations": "50;8;6;1", "wc_review": "364;654;466;543", "wc_reply_reviewers": "0;393;25;40", "wc_reply_authors": "0;1430;0;0", "reply_reviewers": "0;3;1;1", "reply_authors": "1;4;1;1", "rating_avg": [ 6.0, 2.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 76.5, 9.394147114027968 ], "wc_strengths_avg": [ 80.0, 40.99390198553927 ], "wc_weaknesses_avg": [ 228.75, 104.02253361651984 ], "wc_questions_avg": [ 105.25, 31.506943679132064 ], "wc_limitations_avg": [ 16.25, 19.651653874419832 ], "wc_review_avg": [ 506.75, 106.10696254252122 ], "wc_reply_reviewers_avg": [ 114.5, 161.42567949369146 ], "wc_reply_authors_avg": [ 357.5, 619.2081637058736 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.7071067811865475, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16251876563374167356&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 3, "email": "adelaide.edu.au;tencent.com;alibaba-inc.com;;adelaide.edu.au", "author_num": 5, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of Adelaide;Tencent;Alibaba Group", "aff_unique_dep": ";Tencent AI Lab;DAMO Academy", "aff_unique_url": "https://www.adelaide.edu.au;https://ai.tencent.com;https://www.alibaba-group.com", "aff_unique_abbr": "Adelaide;Tencent AI Lab;Alibaba DAMO", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "Australia;China" }, { "title": "Bayesian Strategic Classification", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95094", "id": "SadbRPoG2k", "proceeding": "", "pdf": "https://openreview.net/pdf?id=SadbRPoG2k", "openreview": "https://openreview.net/forum?id=SadbRPoG2k", "poster": "/media/PosterPDFs/NeurIPS%202024/95094.png?t=1731725297.684675", "project": "", "author_site": "Lee Cohen, Saeed Sharifi-Malvajerdi, Kevin Stangl, Ali Vakilian, Juba Ziani", "tldr": "", "abstract": "In strategic classification, agents modify their features, at a cost, to obtain a positive classification outcome from the learner\u2019s classifier, \ntypically assuming agents have full knowledge of the deployed classifier. In contrast, we consider a Bayesian setting where agents have a common distributional prior on the classifier being used and agents manipulate their features to maximize their expected utility according to this prior.\nThe learner can reveal truthful, yet not necessarily complete, information about the classifier to the agents, aiming to release just enough information to shape the agents' behavior and thus maximize accuracy. We show that partial information release can counter-intuitively benefit the learner\u2019s accuracy, allowing qualified agents to pass the classifier while preventing unqualified agents from doing so. Despite the intractability of computing the best response of an agent in the general case, we provide oracle-efficient algorithms for scenarios where the learner\u2019s hypothesis class consists of low-dimensional linear classifiers or when the agents\u2019 cost function satisfies a sub-modularity condition. \nAdditionally, we address the learner\u2019s optimization problem, offering both positive and negative results on determining the optimal information release to maximize expected accuracy, particularly in settings where an agent\u2019s qualification can be represented by a real-valued number.", "keywords": "strategic classification;strategic agents;game theory;Bayesian", "primary_area": "algorithmic_game_theory", "supplementary_material": "", "author": "Lee Cohen;Saeed Sharifi -Malvajerdi;Kevin Stangl;Ali Vakilian;Juba Ziani", "authorids": "~Lee_Cohen1;~Saeed_Sharifi_-Malvajerdi1;~Kevin_Stangl1;~Ali_Vakilian1;~Juba_Ziani1", "gender": "F;M;;M;M", "homepage": "https://sites.google.com/view/leecohen;https://sites.google.com/view/saeedsh/home;http://www.mit.edu/~vakilian/;http://www.juba-ziani.com;https://home.ttic.edu/~kstangl/", "dblp": "162/2494.html;179/2421;116/4679;157/3784;185/0953", "google_scholar": ";lRhBdecAAAAJ;uXZaVaAAAAAJ;https://scholar.google.co.in/citations?user=1bwPKXpo97YC;76AAneMAAAAJ", "orcid": ";;0000-0001-5049-7594;0000-0002-3324-4349;", "linkedin": ";;;;", "or_profile": "~Lee_Cohen1;~Saeed_Sharifi_-Malvajerdi1;~Ali_Vakilian1;~Juba_Ziani1;~Kevin_Matthew_Stangl1", "aff": "Computer Science Department, Stanford University;;Toyota Technological Institute at Chicago;Georgia Institute of Technology;Toyota Technological Institute at Chicago", "aff_domain": "cs.stanford.edu;;ttic.edu;gatech.edu;ttic.edu", "position": "Postdoc;;Research Assistant Professor;Assistant Professor;PhD student", "bibtex": "@inproceedings{\ncohen2024bayesian,\ntitle={Bayesian Strategic Classification},\nauthor={Lee Cohen and Saeed Sharifi -Malvajerdi and Kevin Stangl and Ali Vakilian and Juba Ziani},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=SadbRPoG2k}\n}", "github": "", "reviewers": "VHeD;XrSi;oMrT;F6uC", "pdf_size": 500822, "rating": "5;5;6;8", "confidence": "3;3;4;3", "soundness": "4;3;3;4", "novelty": "2;3;3;4", "presentation": "3;3;3;4", "wc_summary": "91;101;230;622", "wc_strengths": "43;36;42;103", "wc_weaknesses": "52;100;150;72", "wc_questions": "257;59;151;112", "wc_limitations": "4;1;6;9", "wc_review": "447;297;579;918", "wc_reply_reviewers": "46;68;17;0", "wc_reply_authors": "0;141;488;0", "reply_reviewers": "1;2;1;0", "reply_authors": "1;2;2;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 261.0, 215.51218063023722 ], "wc_strengths_avg": [ 56.0, 27.26719640887196 ], "wc_weaknesses_avg": [ 93.5, 36.806928695559485 ], "wc_questions_avg": [ 144.75, 72.56850212041034 ], "wc_limitations_avg": [ 5.0, 2.9154759474226504 ], "wc_review_avg": [ 560.25, 229.3810966492226 ], "wc_reply_reviewers_avg": [ 32.75, 26.166533969939543 ], "wc_reply_authors_avg": [ 157.25, 199.44595132516477 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3058198948280000570&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "cs.stanford.edu;;ttic.edu;gatech.edu;ttic.edu", "author_num": 5, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "Stanford University;Toyota Technological Institute at Chicago;Georgia Institute of Technology", "aff_unique_dep": "Computer Science Department;;", "aff_unique_url": "https://www.stanford.edu;https://www.tti-chicago.org;https://www.gatech.edu", "aff_unique_abbr": "Stanford;TTI Chicago;Georgia Tech", "aff_campus_unique_index": "0;1;1", "aff_campus_unique": "Stanford;Chicago;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "GITA: Graph to Visual and Textual Integration for Vision-Language Graph Reasoning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95093", "id": "SaodQ13jga", "proceeding": "", "pdf": "https://openreview.net/pdf?id=SaodQ13jga", "openreview": "https://openreview.net/forum?id=SaodQ13jga", "poster": "/media/PosterPDFs/NeurIPS%202024/95093.png?t=1731146601.9916906", "project": "", "author_site": "Yanbin Wei, Shuai Fu, Weisen Jiang, Zejian Zhang, Zhixiong Zeng, Qi Wu, James Kwok, Yu Zhang", "tldr": "", "abstract": "Large Language Models (LLMs) are increasingly used for various tasks with graph structures. Though LLMs can process graph information in a textual format, they overlook the rich vision modality, which is an intuitive way for humans to comprehend structural information and conduct general graph reasoning. The potential benefits and capabilities of representing graph structures as visual images (i.e., $\\textit{visual graph}$) are still unexplored. To fill the gap, we innovatively propose an end-to-end framework, called $\\textbf{G}$raph to v$\\textbf{I}$sual and $\\textbf{T}$extual Integr$\\textbf{A}$tion (GITA), which firstly incorporates visual graphs into general graph reasoning. Besides, we establish $\\textbf{G}$raph-based $\\textbf{V}$ision-$\\textbf{L}$anguage $\\textbf{Q}$uestion $\\textbf{A}$nswering (GVLQA) dataset from existing graph data, which is the first vision-language dataset for general graph reasoning purposes. Extensive experiments on the GVLQA dataset and five real-world datasets show that GITA outperforms mainstream LLMs in terms of general graph reasoning capabilities. Moreover, We highlight the effectiveness of the layout augmentation on visual graphs and pretraining on the GVLQA dataset.", "keywords": "Graph Reasoning;Visual Question Answering;Large Multimodal Model", "primary_area": "other", "supplementary_material": "/attachment/79333f6862f3e5d4487da573e8ba524d7b41e00a.zip", "author": "Yanbin Wei;Shuai Fu;Weisen Jiang;Zejian Zhang;Zhixiong Zeng;Qi Wu;James Kwok;Yu Zhang", "authorids": "~Yanbin_Wei1;~Shuai_Fu1;~Weisen_Jiang1;~Zejian_Zhang3;~Zhixiong_Zeng1;~Qi_Wu3;~James_Kwok1;~Yu_Zhang3", "gender": "M;M;M;;M;M;;M", "homepage": ";;https://wayson-ust.github.io/;https://github.com/zhangzj39?tab=repositories;https://scholar.google.com.hk/citations?user=LhKgnswAAAAJ&hl=zh-CN&oi=sra;http://qi-wu.me/;;http://cse.sustech.edu.cn/faculty/~zhangy/", "dblp": "329/1767;;302/7625;;23/10840.html;96/3446-1;;50/671-6", "google_scholar": "QkcrPzIAAAAJ;https://scholar.google.com.hk/citations?user=QXoLj2oAAAAJ;https://scholar.google.com/citations?hl=en;;https://scholar.google.com.hk/citations?user=LhKgnswAAAAJ;https://scholar.google.co.uk/citations?user=aKXe1FEAAAAJ;;https://scholar.google.com.hk/citations?user=jaRS5w4AAAAJ", "orcid": "0000-0003-1301-2505;;;;;;;", "linkedin": ";;;;;;;", "or_profile": "~Yanbin_Wei1;~Shuai_Fu1;~Weisen_Jiang1;~Zejian_Zhang3;~Zhixiong_Zeng1;~Qi_Wu3;~James_Kwok1;~Yu_Zhang3", "aff": "Hong Kong University of Science and Technology;Southern University of Science and Technology;Hong Kong University of Science and Technology;University of Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;The University of Adelaide;;Southern University of Science and Technology", "aff_domain": "ust.hk;sustech.edu.cn;ust.hk;ucas.ac.cn;ia.ac.cn;adelaide.edu.au;;sustc.edu.cn", "position": "PhD student;Research Assistant;PhD student;MS student;PhD student;Associate Professor;;Associate Professor", "bibtex": "@inproceedings{\nwei2024gita,\ntitle={{GITA}: Graph to Visual and Textual Integration for Vision-Language Graph Reasoning},\nauthor={Yanbin Wei and Shuai Fu and Weisen Jiang and Zejian Zhang and Zhixiong Zeng and Qi Wu and James Kwok and Yu Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=SaodQ13jga}\n}", "github": "", "reviewers": "6MUx;SxbK;oE8d;gmAJ", "pdf_size": 3696060, "rating": "5;5;6;6", "confidence": "4;4;4;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;2;4;2", "wc_summary": "85;97;76;42", "wc_strengths": "73;43;143;31", "wc_weaknesses": "91;273;327;85", "wc_questions": "55;5;5;13", "wc_limitations": "3;1;32;3", "wc_review": "307;419;583;174", "wc_reply_reviewers": "252;17;21;13", "wc_reply_authors": "1150;0;0;0", "reply_reviewers": "2;1;1;1", "reply_authors": "3;1;1;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 75.0, 20.457272545478784 ], "wc_strengths_avg": [ 72.5, 43.482755202493784 ], "wc_weaknesses_avg": [ 194.0, 107.72650555921695 ], "wc_questions_avg": [ 19.5, 20.75451758051726 ], "wc_limitations_avg": [ 9.75, 12.871965661856 ], "wc_review_avg": [ 370.75, 150.12723770189072 ], "wc_reply_reviewers_avg": [ 75.75, 101.7972863096065 ], "wc_reply_authors_avg": [ 287.5, 497.9646071760522 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8460787319608910150&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "ust.hk;sustech.edu.cn;ust.hk;ucas.ac.cn;ia.ac.cn;adelaide.edu.au;;sustc.edu.cn", "author_num": 8, "aff_unique_index": "0;1;0;2;3;4;1", "aff_unique_norm": "Hong Kong University of Science and Technology;Southern University of Science and Technology;University of Chinese Academy of Sciences;Chinese Academy of Sciences;University of Adelaide", "aff_unique_dep": ";;;Institute of Automation;", "aff_unique_url": "https://www.ust.hk;https://www.sustech.edu.cn;http://www.ucas.ac.cn;http://www.ia.cas.cn;https://www.adelaide.edu.au", "aff_unique_abbr": "HKUST;SUSTech;UCAS;CAS;Adelaide", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0;1;0", "aff_country_unique": "China;Australia" }, { "title": "GC-Bench: An Open and Unified Benchmark for Graph Condensation", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97681", "id": "ScPgzCZ6Lo", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ScPgzCZ6Lo", "openreview": "https://openreview.net/forum?id=ScPgzCZ6Lo", "poster": "/media/PosterPDFs/NeurIPS%202024/97681.png?t=1731423894.7346277", "project": "", "author_site": "Qingyun Sun, Ziying Chen, Beining Yang, Cheng Ji, Xingcheng Fu, Sheng Zhou, Hao Peng, Jianxin Li, Philip S Yu", "tldr": "", "abstract": "Graph condensation (GC) has recently garnered considerable attention due to its ability to reduce large-scale graph datasets while preserving their essential properties. The core concept of GC is to create a smaller, more manageable graph that retains the characteristics of the original graph. Despite the proliferation of graph condensation methods developed in recent years, there is no comprehensive evaluation and in-depth analysis, which creates a great obstacle to understanding the progress in this field. To fill this gap, we develop a comprehensive Graph Condensation Benchmark (GC-Bench) to analyze the performance of graph condensation in different scenarios systematically. Specifically, GC-Bench systematically investigates the characteristics of graph condensation in terms of the following dimensions: effectiveness, transferability, and complexity. We comprehensively evaluate 12 state-of-the-art graph condensation algorithms in node-level and graph-level tasks and analyze their performance in 12 diverse graph datasets. Further, we have developed an easy-to-use library for training and evaluating different GC methods to facilitate reproducible research.The GC-Bench library is available at https://github.com/RingBDStack/GC-Bench.", "keywords": "graph condensation;graph distillation;dataset distillation", "primary_area": "", "supplementary_material": "/attachment/d267571cf7d281c342190135e5ec0f740726bdf4.pdf", "author": "Qingyun Sun;Ziying Chen;Beining Yang;Cheng Ji;Xingcheng Fu;Sheng Zhou;Hao Peng;Jianxin Li;Philip S. Yu", "authorids": "~Qingyun_Sun2;~Ziying_Chen1;~Beining_Yang1;~Cheng_Ji1;~Xingcheng_Fu1;~Sheng_Zhou1;~Hao_Peng7;~Jianxin_Li3;~Philip_S._Yu1", "gender": "F;F;;M;M;M;M;M;M", "homepage": "https://sunqysunqy.github.io/;https://www.researchgate.net/profile/Ziying-Chen-23;;https://scholar.google.com/citations?hl=en&user=fRAeIZAAAAAJ;https://fuxingcheng.github.io/;https://zhoushengisnoob.github.io/;https://penghao-bdsc.github.io/;http://myjianxin.github.io;https://cs.uic.edu/profiles/philip-yu/", "dblp": ";;;32/598-1.html;236/7003;34/4858-4.html;69/7742-1;l/JianxinLi-2.html;y/PhilipSYu", "google_scholar": "e2oYBzUAAAAJ;;;https://scholar.google.com/citations?hl=en;gN4tbgMAAAAJ;https://scholar.google.co.jp/citations?user=Ss76nMwAAAAJ;R25rbyQAAAAJ;EY2lqD0AAAAJ;D0lL1r0AAAAJ", "orcid": ";;;0000-0003-2513-3822;0000-0002-4643-8126;0000-0003-3645-1041;0000-0003-0458-5977;0000-0001-5152-0055;0000-0002-3491-5968", "linkedin": ";;;;;;;;", "or_profile": "~Qingyun_Sun2;~Ziying_Chen1;~Beining_Yang1;~Cheng_Ji1;~Xingcheng_Fu1;~Sheng_Zhou1;~Hao_Peng7;~Jianxin_Li3;~Philip_S._Yu1", "aff": "Beihang University;Beihang University;;Beihang University;Guangxi Normal University;Zhejiang University;Beihang University;Beihang University ;University of Illinois Chicago", "aff_domain": "buaa.edu.cn;buaa.edu.cn;;buaa.edu.cn;gxnu.edu.cn;zju.edu.cn;buaa.edu.cn;buaa.edu.cn;uic.edu", "position": "Assistant Professor;Undergrad student;;PhD student;Assistant Professor;Associate Professor;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nsun2024gcbench,\ntitle={{GC}-Bench: An Open and Unified Benchmark for Graph Condensation},\nauthor={Qingyun Sun and Ziying Chen and Beining Yang and Cheng Ji and Xingcheng Fu and Sheng Zhou and Hao Peng and Jianxin Li and Philip S. Yu},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=ScPgzCZ6Lo}\n}", "github": "", "reviewers": "9zaP;GyyU;oApz;qXuL", "pdf_size": 0, "rating": "6;6;7;8", "confidence": "4;4;4;3", "wc_summary_and_contributions": "66;72;133;128", "wc_strengths": "51;22;2;137", "wc_improvement": "226;25;2;152", "wc_limitations": "1;64;1;7", "wc_correctness": "1;16;1;5", "wc_clarity": "1;16;1;4", "wc_relation_to_prior_work": "1;5;1;17", "wc_documentation": "1;22;4;11", "wc_additional_feedback": "1;1;1;1", "wc_review": "349;243;146;462", "wc_reply_reviewers": "0;19;0;21", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;0;1", "reply_authors": "2;2;2;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 99.75, 30.873734791890662 ], "wc_strengths_avg": [ 53.0, 51.53154373779229 ], "wc_improvement_avg": [ 101.25, 91.92762098520771 ], "wc_limitations_avg": [ 18.25, 26.52710877574109 ], "wc_correctness_avg": [ 5.75, 6.139014578904337 ], "wc_clarity_avg": [ 5.5, 6.18465843842649 ], "wc_relation_to_prior_work_avg": [ 6.0, 6.557438524302 ], "wc_documentation_avg": [ 9.5, 8.077747210701755 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 300.0, 117.90886311045493 ], "wc_reply_reviewers_avg": [ 10.0, 10.024968827881711 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2648740433330184544&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "buaa.edu.cn;buaa.edu.cn;;buaa.edu.cn;gxnu.edu.cn;zju.edu.cn;buaa.edu.cn;buaa.edu.cn;uic.edu", "author_num": 9, "aff_unique_index": "0;0;0;1;2;0;0;3", "aff_unique_norm": "Beihang University;Guangxi Normal University;Zhejiang University;University of Illinois at Chicago", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.buaa.edu.cn/;http://www.gxnu.edu.cn;https://www.zju.edu.cn;https://www.uic.edu", "aff_unique_abbr": "BUAA;;ZJU;UIC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Chicago", "aff_country_unique_index": "0;0;0;0;0;0;0;1", "aff_country_unique": "China;United States" }, { "title": "DisC-GS: Discontinuity-aware Gaussian Splatting", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95092", "id": "ScbmEmtsH5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ScbmEmtsH5", "openreview": "https://openreview.net/forum?id=ScbmEmtsH5", "poster": "/media/PosterPDFs/NeurIPS%202024/95092.png?t=1731439762.1759", "project": "", "author_site": "Haoxuan Qu, Zhuoling Li, Hossein Rahmani, Yujun Cai, Jun Liu", "tldr": "", "abstract": "Recently, Gaussian Splatting, a method that represents a 3D scene as a collection of Gaussian distributions, has gained significant attention in addressing the task of novel view synthesis. In this paper, we highlight a fundamental limitation of Gaussian Splatting: its inability to accurately render discontinuities and boundaries in images due to the continuous nature of Gaussian distributions. To address this issue, we propose a novel framework enabling Gaussian Splatting to perform discontinuity-aware image rendering. Additionally, we introduce a B\\'ezier-boundary gradient approximation strategy within our framework to keep the ``differentiability'' of the proposed discontinuity-aware rendering process. Extensive experiments demonstrate the efficacy of our framework.", "keywords": "Gaussian Splatting;discontinuity awareness;gradient approximation", "primary_area": "machine_vision", "supplementary_material": "", "author": "Haoxuan Qu;Zhuoling Li;Hossein Rahmani;Yujun Cai;Jun Liu", "authorids": "~Haoxuan_Qu1;~Zhuoling_Li2;~Hossein_Rahmani4;~Yujun_Cai1;~Jun_Liu8", "gender": "M;M;M;F;M", "homepage": ";https://github.com/zhuolingli;https://sites.google.com/view/rahmaniatlu;;", "dblp": "302/3883;;40/7475;227/4399;95/3736-36", "google_scholar": "https://scholar.google.com.sg/citations?user=fR83-ycAAAAJ;https://scholar.google.co.jp/citations?user=9BAxU1MAAAAJ;zFyT_gwAAAAJ;https://scholar.google.com/citations?hl=en;Q5Ild8UAAAAJ", "orcid": "0000-0001-5054-3394;0009-0004-2166-3353;;;", "linkedin": ";;;;", "or_profile": "~Haoxuan_Qu1;~Zhuoling_Li2;~Hossein_Rahmani4;~Yujun_Cai1;~Jun_Liu8", "aff": "Singapore University of Technology and Design;Central South University;Lancaster University;Meta Facebook;Singapore University of Technology and Design", "aff_domain": "sutd.edu.sg;csu.edu.cn;lancaster.ac.uk;fb.com;sutd.edu.sg", "position": "PhD student;MS student;Full Professor;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nqu2024discgs,\ntitle={DisC-{GS}: Discontinuity-aware Gaussian Splatting},\nauthor={Haoxuan Qu and Zhuoling Li and Hossein Rahmani and Yujun Cai and Jun Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ScbmEmtsH5}\n}", "github": "", "reviewers": "ncnd;F99C;TofM;Kjj1", "pdf_size": 18824935, "rating": "4;5;5;7", "confidence": "4;4;4;4", "soundness": "4;4;4;3", "novelty": "2;4;3;3", "presentation": "2;3;3;3", "wc_summary": "107;59;44;44", "wc_strengths": "86;87;40;78", "wc_weaknesses": "252;103;90;148", "wc_questions": "35;42;49;4", "wc_limitations": "6;16;26;40", "wc_review": "486;307;249;314", "wc_reply_reviewers": "274;101;0;315", "wc_reply_authors": "963;516;0;919", "reply_reviewers": "1;1;0;2", "reply_authors": "3;2;1;3", "rating_avg": [ 5.25, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 63.5, 25.85053190942113 ], "wc_strengths_avg": [ 72.75, 19.22725929507375 ], "wc_weaknesses_avg": [ 148.25, 63.64893950412685 ], "wc_questions_avg": [ 32.5, 17.18284027743958 ], "wc_limitations_avg": [ 22.0, 12.569805089976535 ], "wc_review_avg": [ 339.0, 88.54095097749968 ], "wc_reply_reviewers_avg": [ 172.5, 127.94236983892397 ], "wc_reply_authors_avg": [ 599.5, 387.4870965593564 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1290473809247394281&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "sutd.edu.sg;csu.edu.cn;lancaster.ac.uk;fb.com;sutd.edu.sg", "author_num": 5, "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "Singapore University of Technology and Design;Central South University;Lancaster University;Meta", "aff_unique_dep": ";;;Meta Platforms, Inc.", "aff_unique_url": "https://www.sutd.edu.sg;https://www.csu.edu.cn;https://www.lancaster.ac.uk;https://meta.com", "aff_unique_abbr": "SUTD;CSU;Lancaster;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;3;0", "aff_country_unique": "Singapore;China;United Kingdom;United States" }, { "title": "Information Re-Organization Improves Reasoning in Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95091", "id": "SciWuYPNG0", "proceeding": "", "pdf": "https://openreview.net/pdf?id=SciWuYPNG0", "openreview": "https://openreview.net/forum?id=SciWuYPNG0", "poster": "/media/PosterPDFs/NeurIPS%202024/95091.png?t=1729852427.4042194", "project": "", "author_site": "Xiaoxia Cheng, Zeqi Tan, Wei Xue, Weiming Lu", "tldr": "", "abstract": "Improving the reasoning capabilities of large language models (LLMs) has attracted considerable interest. Recent approaches primarily focus on improving the reasoning process to yield a more precise final answer. However, in scenarios involving contextually aware reasoning, these methods neglect the importance of first identifying logical relationships from the context before proceeding with the reasoning. This oversight could lead to a superficial understanding and interaction with the context, potentially undermining the quality and reliability of the reasoning outcomes. In this paper, we propose an information re-organization (\\textbf{InfoRE}) method before proceeding with the reasoning to enhance the reasoning ability of LLMs. Our re-organization method involves initially extracting logical relationships from the contextual content, such as documents or paragraphs, and subsequently pruning redundant content to minimize noise. Then, we utilize the re-organized information in the reasoning process. This enables LLMs to deeply understand the contextual content by clearly perceiving these logical relationships, while also ensuring high-quality responses by eliminating potential noise. To demonstrate the effectiveness of our approach in improving the reasoning ability, we conduct experiments using Llama2-70B, GPT-3.5, and GPT-4 on various contextually aware multi-hop reasoning tasks. Using only a zero-shot setting, our method achieves an average absolute improvement of 4\\% across all tasks, highlighting its potential to improve the reasoning performance of LLMs.", "keywords": "LLM;Information Re-organization;Multi-hop Reasoning", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/21f09f365e7f51d1d785020c19a59e5a3b3281c1.zip", "author": "Xiaoxia Cheng;Zeqi Tan;Wei Xue;Weiming Lu", "authorids": "~Xiaoxia_Cheng1;~Zeqi_Tan1;~Wei_Xue6;~Weiming_Lu1", "gender": "F;M;;", "homepage": ";;;", "dblp": "331/5716;200/9648.html;;", "google_scholar": "nWKkHFQAAAAJ;https://scholar.google.com/citations?hl=zh-CN;;", "orcid": ";;;", "linkedin": "%E5%AD%9D%E4%BE%A0-%E7%A8%8B-595994225/;;;", "or_profile": "~Xiaoxia_Cheng1;~Zeqi_Tan1;~Wei_Xue6;~Weiming_Lu1", "aff": "Zhejiang University;University of Hong Kong;;", "aff_domain": "zju.edu.cn;hku.hk;;", "position": "PhD student;Intern;;", "bibtex": "@inproceedings{\ncheng2024information,\ntitle={Information Re-Organization Improves Reasoning in Large Language Models},\nauthor={Xiaoxia Cheng and Zeqi Tan and Wei Xue and Weiming Lu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=SciWuYPNG0}\n}", "github": "", "reviewers": "cK3D;x5hg;wQkW", "pdf_size": 583617, "rating": "5;7;7", "confidence": "4;3;4", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "3;3;3", "wc_summary": "95;69;100", "wc_strengths": "42;75;56", "wc_weaknesses": "124;338;35", "wc_questions": "2;117;163", "wc_limitations": "10;1;6", "wc_review": "273;600;360", "wc_reply_reviewers": "16;17;17", "wc_reply_authors": "191;69;44", "reply_reviewers": "1;1;1", "reply_authors": "3;2;2", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 88.0, 13.589211407093005 ], "wc_strengths_avg": [ 57.666666666666664, 13.523641850067197 ], "wc_weaknesses_avg": [ 165.66666666666666, 127.15956729855428 ], "wc_questions_avg": [ 94.0, 67.71016664184683 ], "wc_limitations_avg": [ 5.666666666666667, 3.6817870057290873 ], "wc_review_avg": [ 411.0, 138.28231991111517 ], "wc_reply_reviewers_avg": [ 16.666666666666668, 0.4714045207910317 ], "wc_reply_authors_avg": [ 101.33333333333333, 64.22010761609033 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.49999999999999983, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17884755951758283488&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "zju.edu.cn;hku.hk;;", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Zhejiang University;University of Hong Kong", "aff_unique_dep": ";", "aff_unique_url": "https://www.zju.edu.cn;https://www.hku.hk", "aff_unique_abbr": "ZJU;HKU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "FUGAL: Feature-fortified Unrestricted Graph Alignment", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95090", "id": "SdLOs1FR4h", "proceeding": "", "pdf": "https://openreview.net/pdf?id=SdLOs1FR4h", "openreview": "https://openreview.net/forum?id=SdLOs1FR4h", "poster": "", "project": "", "author_site": "Aditya Bommakanti, Harshith Vonteri, Konstantinos Skitsas, Sayan Ranu, Davide Mottin, Panagiotis Karras", "tldr": "", "abstract": "The necessity to align two graphs, minimizing a structural distance metric, is prevalent in biology, chemistry, recommender systems, and social network analysis. Due to the problem\u2019s NP-hardness, prevailing graph alignment methods follow a modular and mediated approach, solving the problem by restricting to the domain of intermediary graph representations or products like embeddings, spectra, and graph signals. Restricting the problem to this intermediate space may distort the original problem and are hence predisposed to miss high-quality solutions. In this paper, we propose an unrestricted method, FUGAL, which finds a permutation matrix that maps one graph to another by directly operating on their adjacency matrices with judicious constraint relaxation. Extensive experimentation demonstrates that FUGAL consistently surpasses state-of-the-art graph alignment methods in accuracy across all benchmark datasets without encumbering efficiency.", "keywords": "graph alignment", "primary_area": "optimization", "supplementary_material": "", "author": "Aditya Bommakanti;Harshith Reddy Vonteri;Konstantinos Skitsas;Sayan Ranu;Davide Mottin;Panagiotis Karras", "authorids": "~Aditya_Bommakanti1;~Harshith_Reddy_Vonteri1;~Konstantinos_Skitsas1;~Sayan_Ranu2;~Davide_Mottin1;~Panagiotis_Karras1", "gender": ";M;M;M;M;M", "homepage": ";;https://www.linkedin.com/in/constantinos-skitsas/;https://www.cse.iitd.ac.in/~sayan/index.html;https://mott.in;http://cs.au.dk/~karras/", "dblp": "368/2587;;228/8067;38/768;135/7623;08/5342", "google_scholar": ";;E8JeOjMAAAAJ;K4w5qYUAAAAJ;https://scholar.google.it/citations?user=evZ9Q9EAAAAJ;https://scholar.google.com.tw/citations?user=B6C4aBoAAAAJ", "orcid": "0009-0009-3460-2337;0009-0000-2147-3956;0000-0001-5078-6468;0000-0003-4147-9372;0000-0001-8256-2258;", "linkedin": "aditya-bommakanti-6b035321a/;harshith-reddy-vonteri-1a1724216/;;;davide-mottin-67ab7323/;", "or_profile": "~Aditya_Bommakanti1;~Harshith_Reddy_Vonteri1;~Konstantinos_Skitsas1;~Sayan_Ranu2;~Davide_Mottin1;~Panagiotis_Karras1", "aff": "Indian Institute of Technology, Delhi;Indian Institute of Technology, Delhi;Aarhus University;Indian Institute of Technology Delhi;Aarhus University;Nagoya University", "aff_domain": "iitd.ac.in;iitd.ac.in;au.dk;iitd.ac.in;au.dk;nagoya-u.ac.jp", "position": "Undergrad student;Undergrad student;PhD student;Associate Professor;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nbommakanti2024fugal,\ntitle={{FUGAL}: Feature-fortified Unrestricted Graph Alignment},\nauthor={Aditya Bommakanti and Harshith Reddy Vonteri and Konstantinos Skitsas and Sayan Ranu and Davide Mottin and Panagiotis Karras},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=SdLOs1FR4h}\n}", "github": "", "reviewers": "T3ao;ts95;FAZB;b2PL", "pdf_size": 599558, "rating": "3;5;7;7", "confidence": "4;5;4;3", "soundness": "2;2;3;3", "novelty": "1;2;3;3", "presentation": "2;1;3;2", "wc_summary": "106;76;43;116", "wc_strengths": "29;64;42;42", "wc_weaknesses": "141;493;72;34", "wc_questions": "2;155;1;308", "wc_limitations": "8;10;1;13", "wc_review": "286;798;159;513", "wc_reply_reviewers": "381;42;26;11", "wc_reply_authors": "657;113;24;0", "reply_reviewers": "1;1;1;1", "reply_authors": "4;3;2;1", "rating_avg": [ 5.5, 1.6583123951777 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 85.25, 28.49012986983387 ], "wc_strengths_avg": [ 44.25, 12.577261228105266 ], "wc_weaknesses_avg": [ 185.0, 181.9134409547574 ], "wc_questions_avg": [ 116.5, 127.08756823544937 ], "wc_limitations_avg": [ 8.0, 4.415880433163924 ], "wc_review_avg": [ 439.0, 242.98456741118355 ], "wc_reply_reviewers_avg": [ 115.0, 153.96590531672913 ], "wc_reply_authors_avg": [ 198.5, 268.0415079796411 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.42640143271122083, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15964293640931004361&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 3, "email": "iitd.ac.in;iitd.ac.in;au.dk;iitd.ac.in;au.dk;nagoya-u.ac.jp", "author_num": 6, "aff_unique_index": "0;0;1;0;1;2", "aff_unique_norm": "Indian Institute of Technology Delhi;Aarhus University;Nagoya University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.iitdelhi.ac.in;https://au.dk;https://www.nagoya-u.ac.jp", "aff_unique_abbr": "IIT Delhi;AU;Nagoya U", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Delhi;", "aff_country_unique_index": "0;0;1;0;1;2", "aff_country_unique": "India;Denmark;Japan" }, { "title": "Unlearnable 3D Point Clouds: Class-wise Transformation Is All You Need", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95089", "id": "SeefZa7Vmq", "proceeding": "", "pdf": "https://openreview.net/pdf?id=SeefZa7Vmq", "openreview": "https://openreview.net/forum?id=SeefZa7Vmq", "poster": "/media/PosterPDFs/NeurIPS%202024/95089.png?t=1729474558.1076663", "project": "", "author_site": "Xianlong Wang, Minghui Li, Wei Liu, Hangtao Zhang, Shengshan Hu, Yechao Zhang, Ziqi Zhou, Hai Jin", "tldr": "", "abstract": "Traditional unlearnable strategies have been proposed to prevent unauthorized users from training on the 2D image data. With more 3D point cloud data containing sensitivity information, unauthorized usage of this new type data has also become a serious concern. To address this, we propose the first integral unlearnable framework for 3D point clouds including two processes: (i) we propose an unlearnable data protection scheme, involving a class-wise setting established by a category-adaptive allocation strategy and multi-transformations assigned to samples; (ii) we propose a data restoration scheme that utilizes class-wise inverse matrix transformation, thus enabling authorized-only training for unlearnable data. This restoration process is a practical issue overlooked in most existing unlearnable literature, i.e., even authorized users struggle to gain knowledge from 3D unlearnable data. Both theoretical and empirical results (including 6 datasets, 16 models, and 2 tasks) demonstrate the effectiveness of our proposed unlearnable framework. Our code is available at https://github.com/CGCL-codes/UnlearnablePC.", "keywords": "Unlearnable examples;3D point clouds;deep neural networks", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/96addd6286030adecf2fd7fb16bcf68046c540c5.zip", "author": "Xianlong Wang;Minghui Li;Wei Liu;Hangtao Zhang;Shengshan Hu;Yechao Zhang;Ziqi Zhou;Hai Jin", "authorids": "~Xianlong_Wang1;~Minghui_Li2;~Wei_Liu27;~Hangtao_Zhang1;~Shengshan_Hu1;~Yechao_Zhang1;~Ziqi_Zhou2;~Hai_Jin1", "gender": "M;F;;M;M;M;M;M", "homepage": "https://wxldragon.github.io/;;https://wilmido.github.io/;;http://faculty.hust.edu.cn/HUSHENGSHAN;;https://zhou-zi7.github.io;http://www.linkedin.com/in/jinhust", "dblp": ";;;345/8090;169/2268;304/1238;;98/4156", "google_scholar": "https://scholar.google.com.hk/citations?user=EgsgIq0AAAAJ;j_y67gEAAAAJ;-eSvjJoAAAAJ;H6wMyNEAAAAJ;lkAFwJgAAAAJ;6DN1wxkAAAAJ;-eyLn4wAAAAJ;", "orcid": "0009-0009-3057-827X;;0009-0001-6664-2888;0000-0002-6805-6401;;0000-0002-0551-1200;;0000-0002-3934-7605", "linkedin": ";;;hangtao;;;;jinhust", "or_profile": "~Xianlong_Wang1;~Minghui_Li2;~Wei_Liu27;~Hangtao_Zhang1;~Shengshan_Hu1;~Yechao_Zhang1;~Ziqi_Zhou2;~Hai_Jin1", "aff": "Huazhong University of Science and Technology;Huazhong University of Science and Technology;Huazhong University of Science and Technology;Huazhong University of Science and Technology;Huazhong University of Science and Technology;Huazhong University of Science and Technology;Huazhong University of Science and Technology;Huazhong University of Science and Technology", "aff_domain": "hust.edu.cn;hust.edu.cn;hust.edu.cn;hust.edu.cn;hust.edu.cn;hust.edu.cn;hust.edu.cn;hust.edu.cn", "position": "PhD student;Assistant Professor;MS student;PhD student;Associate Professor;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nwang2024unlearnable,\ntitle={Unlearnable 3D Point Clouds: Class-wise Transformation Is All You Need},\nauthor={Xianlong Wang and Minghui Li and Wei Liu and Hangtao Zhang and Shengshan Hu and Yechao Zhang and Ziqi Zhou and Hai Jin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=SeefZa7Vmq}\n}", "github": "", "reviewers": "nf2f;Yqki;BhZK;upBr", "pdf_size": 7360642, "rating": "5;5;5;7", "confidence": "3;2;4;3", "soundness": "3;3;3;3", "novelty": "2;3;2;3", "presentation": "3;3;2;3", "wc_summary": "101;47;99;91", "wc_strengths": "109;35;28;95", "wc_weaknesses": "131;190;50;109", "wc_questions": "12;121;337;3", "wc_limitations": "13;14;1;36", "wc_review": "366;407;515;334", "wc_reply_reviewers": "65;31;0;0", "wc_reply_authors": "1056;684;954;0", "reply_reviewers": "1;1;0;0", "reply_authors": "3;4;3;1", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 84.5, 21.97157254271983 ], "wc_strengths_avg": [ 66.75, 35.68175303989421 ], "wc_weaknesses_avg": [ 120.0, 50.104889980919026 ], "wc_questions_avg": [ 118.25, 134.56480780649895 ], "wc_limitations_avg": [ 16.0, 12.62933094031509 ], "wc_review_avg": [ 405.5, 68.30995535059293 ], "wc_reply_reviewers_avg": [ 24.0, 26.842131062939096 ], "wc_reply_authors_avg": [ 673.5, 411.91595016459365 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.75, 1.0897247358851685 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11519396332768204537&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "hust.edu.cn;hust.edu.cn;hust.edu.cn;hust.edu.cn;hust.edu.cn;hust.edu.cn;hust.edu.cn;hust.edu.cn", "author_num": 8, "aff_unique_index": "0;0;0;0;0;0;0;0", "aff_unique_norm": "Huazhong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.hust.edu.cn", "aff_unique_abbr": "HUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "SeesCzBelI", "title": "Removing Length Bias in RLHF is not Enough", "track": "main", "status": "Reject", "tldr": "", "abstract": "Reinforcement Learning from Human Feedback (RLHF) has become an essential technique for enhancing pretrained large language models (LLMs) to generate responses that align with human preferences and societal values. \nWhile RLHF has shown promise, the training of reward models (RMs) still faces the challenge of \\emph{reward hacking}, motivating recent works to prevent RMs from finding shortcuts that bypass the intended optimization objectives by identifying simplistic patterns, especially response length.\nBesides the issue of \\emph{length bias}, our work firstly reveal that \\emph{prompt-template bias} learned by RMs can also cause \\emph{reward hacking} when dealing with marginal samples, resulting in LLMs preferring to generate responses in a specific format after RLHF fine-tuning, regardless of the format requested in the prompt.\nTo this end, we propose a low-cost but effective method, namely Prompt Bias Calibration (PBC), to estimate the \\emph{prompt-template bias} term during reward modeling, which can be utilized to calibrate reward scores in the following RL fine-tuning process. \nThen, we show that our PBC method can be flexibly combined with existing algorithms of removing \\emph{length bias}, leading to a further improvement in the aspect of enhancing the quality of generated responses.\nExperiments results show that the performance of our PBC method and its extensions have significantly surpassed the original implementation of RLHF.", "keywords": "LLM;RLHF;Prompt Bias", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/fd088daf838c3b2e5ed9a2bebe43ddf95c49fb9a.zip", "author": "Chaojie Wang;Haonan shi;Long Tian;Bo An;Shuicheng YAN", "authorids": "~Chaojie_Wang1;~Haonan_shi3;~Long_Tian1;~Bo_An2;~Shuicheng_YAN3", "gender": "M;M;M;M;", "homepage": "https://chaojiewang94.github.io/;https://faculty.xidian.edu.cn/TL1/zh_CN/index.htm;https://personal.ntu.edu.sg/boan/;https://yanshuicheng.ai/;https://faculty.xidian.edu.cn/WLM1/en/xsxx/352538/content/127872.htm", "dblp": "134/9314-1;;42/6178-1.html;y/ShuichengYan;", "google_scholar": "https://scholar.google.com/citations?hl=en;;PEEpuNwAAAAJ;https://scholar.google.com.hk/citations?user=DNuiPHwAAAAJ;", "orcid": ";;0000-0002-7064-7438;;", "linkedin": ";;;;", "or_profile": "~Chaojie_Wang1;~Long_Tian1;~Bo_An2;~Shuicheng_YAN3;~shihaonan1", "aff": "Skywork AI;Xi'an University of Software Engineering Institute;Nanyang Technological University;sea Group;Xi'an University of Electronic Science and Technology", "aff_domain": "kunlun-inc.com;xidian.edu.cn;ntu.edu.sg;sea.com;xidian.edu.cn", "position": "Researcher;Assistant Professor;Full Professor;Researcher;Researcher", "bibtex": "@misc{\nanonymous2024removing,\ntitle={Removing Length Bias in {RLHF} is not Enough},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=SeesCzBelI}\n}", "github": "", "project": "", "reviewers": "ehkZ;ukQE;1bbE;MaJ5", "site": "https://openreview.net/forum?id=SeesCzBelI", "pdf_size": 935241, "rating": "3;4;4;7", "confidence": "3;3;4;3", "soundness": "3;2;2;3", "novelty": "2;2;2;4", "presentation": "3;2;1;3", "wc_summary": "42;132;48;79", "wc_strengths": "36;13;10;41", "wc_weaknesses": "2;49;119;47", "wc_questions": "283;93;126;26", "wc_limitations": "3;1;3;4", "wc_review": "366;288;306;197", "wc_reply_reviewers": "140;0;0;21", "wc_reply_authors": "870;66;66;66", "reply_reviewers": "1;0;0;1", "reply_authors": "3;2;2;2", "rating_avg": [ 4.5, 1.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 75.25, 35.64670391494843 ], "wc_strengths_avg": [ 25.0, 13.656500283747663 ], "wc_weaknesses_avg": [ 54.25, 41.841217716505334 ], "wc_questions_avg": [ 132.0, 94.33186100146652 ], "wc_limitations_avg": [ 2.75, 1.0897247358851685 ], "wc_review_avg": [ 289.25, 60.58619892351723 ], "wc_reply_reviewers_avg": [ 40.25, 58.22531665864944 ], "wc_reply_authors_avg": [ 267.0, 348.14221232134435 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.19245008972987526, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:9qjXSPIY6YsJ:scholar.google.com/&scioq=Removing+Length+Bias+in+RLHF+is+not+Enough&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "Skywork AI;Xi'an University of Software Engineering;Nanyang Technological University;Sea Group;Xi'an University of Electronic Science and Technology", "aff_unique_dep": ";Software Engineering Institute;;;", "aff_unique_url": "https://www.skywork.ai;http://www.xauat.edu.cn;https://www.ntu.edu.sg;;http://www.xidian.edu.cn/", "aff_unique_abbr": "Skywork AI;;NTU;;Xidian University", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Xi'an", "aff_country_unique_index": "0;1;2;1", "aff_country_unique": "United States;China;Singapore;" }, { "title": "Time-Varying LoRA: Towards Effective Cross-Domain Fine-Tuning of Diffusion Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95088", "id": "SgODU2mx9T", "proceeding": "", "pdf": "https://openreview.net/pdf?id=SgODU2mx9T", "openreview": "https://openreview.net/forum?id=SgODU2mx9T", "poster": "/media/PosterPDFs/NeurIPS%202024/95088.png?t=1732769594.444445", "project": "", "author_site": "Zhan Zhuang, Yulong Zhang, Xuehao Wang, Jiangang Lu, Ying Wei, Yu Zhang", "tldr": "", "abstract": "Large-scale diffusion models are adept at generating high-fidelity images and facilitating image editing and interpolation. However, they have limitations when tasked with generating images in dynamic, evolving domains. In this paper, we introduce Terra, a novel Time-varying low-rank adapter that offers a fine-tuning framework specifically tailored for domain flow generation. The key innovation of Terra lies in its construction of a continuous parameter manifold through a time variable, with its expressive power analyzed theoretically. This framework not only enables interpolation of image content and style but also offers a generation-based approach to address the domain shift problems in unsupervised domain adaptation and domain generalization. Specifically, Terra transforms images from the source domain to the target domain and generates interpolated domains with various styles to bridge the gap between domains and enhance the model generalization, respectively. We conduct extensive experiments on various benchmark datasets, empirically demonstrate the effectiveness of Terra. Our source code is publicly available on https://github.com/zwebzone/terra.", "keywords": "Low-rank adaptation;diffusion models;cross-domain learning", "primary_area": "other", "supplementary_material": "", "author": "Zhan Zhuang;Yulong Zhang;Xuehao Wang;Jiangang Lu;Ying Wei;Yu Zhang", "authorids": "~Zhan_Zhuang1;~Yulong_Zhang2;~Xuehao_Wang3;~Jiangang_Lu1;~Ying_Wei1;~Yu_Zhang3", "gender": ";;M;M;F;M", "homepage": ";;;https://person.zju.edu.cn/lujg/;https://wei-ying.net/;http://cse.sustech.edu.cn/faculty/~zhangy/", "dblp": "354/8439;32/9374;272/4397;20/6851;14/4899-1;50/671-6", "google_scholar": "YC7hQdIAAAAJ;https://scholar.google.com.hk/citations?user=2hY14LYAAAAJ;ZaJRYnMAAAAJ;;5UpFdKsAAAAJ;https://scholar.google.com.hk/citations?user=jaRS5w4AAAAJ", "orcid": "0000-0003-0215-8728;0000-0002-4038-1616;0000-0003-1274-2100;0000-0002-1551-6179;;", "linkedin": ";;;;;", "or_profile": "~Zhan_Zhuang1;~Yulong_Zhang2;~Xuehao_Wang3;~Jiangang_Lu1;~Ying_Wei1;~Yu_Zhang3", "aff": "City University of Hong Kong;Zhejiang University;Tencent;;Nanyang Technological University;Southern University of Science and Technology", "aff_domain": "cityu.edu.hk;zju.edu.cn;tencent.com;;ntu.edu.sg;sustc.edu.cn", "position": "PhD student;PhD student;Intern;;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nzhuang2024timevarying,\ntitle={Time-Varying Lo{RA}: Towards Effective Cross-Domain Fine-Tuning of Diffusion Models},\nauthor={Zhan Zhuang and Yulong Zhang and Xuehao Wang and Jiangang Lu and Ying Wei and Yu Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=SgODU2mx9T}\n}", "github": "", "reviewers": "LKTK;76UZ;Y8y3;STii", "pdf_size": 6655722, "rating": "6;7;7;8", "confidence": "3;4;4;4", "soundness": "3;3;3;4", "novelty": "3;2;3;3", "presentation": "3;3;2;4", "wc_summary": "54;45;91;78", "wc_strengths": "46;65;37;118", "wc_weaknesses": "108;92;158;148", "wc_questions": "22;180;5;115", "wc_limitations": "9;10;13;9", "wc_review": "239;392;304;468", "wc_reply_reviewers": "22;16;33;43", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 67.0, 18.371173070873837 ], "wc_strengths_avg": [ 66.5, 31.40461749488441 ], "wc_weaknesses_avg": [ 126.5, 27.326726843879417 ], "wc_questions_avg": [ 80.5, 71.0862152600629 ], "wc_limitations_avg": [ 10.25, 1.6393596310755 ], "wc_review_avg": [ 350.75, 86.77953387752207 ], "wc_reply_reviewers_avg": [ 28.5, 10.35615758860399 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12861639549093028987&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "cityu.edu.hk;zju.edu.cn;tencent.com;;ntu.edu.sg;sustc.edu.cn", "author_num": 6, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "City University of Hong Kong;Zhejiang University;Tencent;Nanyang Technological University;Southern University of Science and Technology", "aff_unique_dep": ";;Tencent Holdings Limited;;", "aff_unique_url": "https://www.cityu.edu.hk;https://www.zju.edu.cn;https://www.tencent.com;https://www.ntu.edu.sg;https://www.sustech.edu.cn", "aff_unique_abbr": "CityU;ZJU;Tencent;NTU;SUSTech", "aff_campus_unique_index": "0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "China;Singapore" }, { "title": "Doob's Lagrangian: A Sample-Efficient Variational Approach to Transition Path Sampling", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95087", "id": "ShJWT0n7kX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ShJWT0n7kX", "openreview": "https://openreview.net/forum?id=ShJWT0n7kX", "poster": "/media/PosterPDFs/NeurIPS%202024/95087.png?t=1733794427.1894517", "project": "", "author_site": "Yuanqi Du, Michael Plainer, Rob Brekelmans, Chenru Duan, Frank Noe, Carla Gomes, Alan Aspuru-Guzik, Kirill Neklyudov", "tldr": "", "abstract": "Rare event sampling in dynamical systems is a fundamental problem arising in the natural sciences, which poses significant computational challenges due to an exponentially large space of trajectories. For settings where the dynamical system of interest follows a Brownian motion with known drift, the question of conditioning the process to reach a given endpoint or desired rare event is definitively answered by Doob's $h$-transform. However, the naive estimation of this transform is infeasible, as it requires simulating sufficiently many forward trajectories to estimate rare event probabilities. In this work, we propose a variational formulation of Doob's $h$-transform as an optimization problem over trajectories between a given initial point and the desired ending point. To solve this optimization, we propose a simulation-free training objective with a model parameterization that imposes the desired boundary conditions by design. Our approach significantly reduces the search space over trajectories and avoids expensive trajectory simulation and inefficient importance sampling estimators which are required in existing methods. We demonstrate the ability of our method to find feasible transition paths on real-world molecular simulation and protein folding tasks.", "keywords": "Transition Path Sampling;Protein Folding;Schr\u00f6dinger Bridge", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "", "author": "Yuanqi Du;Michael Plainer;Rob Brekelmans;Chenru Duan;Frank Noe;Carla P Gomes;Alan Aspuru-Guzik;Kirill Neklyudov", "authorids": "~Yuanqi_Du1;~Michael_Plainer1;~Rob_Brekelmans1;~Chenru_Duan1;~Frank_Noe1;~Carla_P_Gomes1;~Alan_Aspuru-Guzik2;~Kirill_Neklyudov1", "gender": "M;M;M;M;M;;M;M", "homepage": "https://yuanqidu.github.io/;https://plainer.dev;https://brekelma.github.io;https://www.deepprinciple.com;;;http://matter.toronto.edu;https://necludov.github.io/", "dblp": "266/2837;364/7250.html;207/7856.html;;;;;195/1093", "google_scholar": "fAc_zZMAAAAJ;7Bnt9kUAAAAJ;M6ADg_UAAAAJ;canPgVoAAAAJ;QGiLc_cAAAAJ;;Ag_6KEgAAAAJ;https://scholar.google.ru/citations?user=eOttYWgAAAAJ", "orcid": ";;;0000-0003-2592-4237;;;0000-0002-8277-4434;", "linkedin": ";https://linkedin.com/in/plainer/;;chenru-duan-8882a010b/;;;;", "or_profile": "~Yuanqi_Du1;~Michael_Plainer1;~Rob_Brekelmans1;~Chenru_Duan1;~Frank_Noe1;~Carla_P_Gomes1;~Alan_Aspuru-Guzik2;~Kirill_Neklyudov1", "aff": "Cornell University;Freie Universit\u00e4t Berlin;;Microsoft;Freie Universit\u00e4t Berlin;;University of Toronto;Vector Institute", "aff_domain": "cornell.edu;fu-berlin.de;;microsoft.com;fu-berlin.de;;utoronto.ca;vectorinstitute.ai", "position": "PhD student;PhD student;;Researcher;Professor;;Full Professor;Postdoc", "bibtex": "@inproceedings{\ndu2024doobs,\ntitle={Doob's Lagrangian: A Sample-Efficient Variational Approach to Transition Path Sampling},\nauthor={Yuanqi Du and Michael Plainer and Rob Brekelmans and Chenru Duan and Frank Noe and Carla P Gomes and Alan Aspuru-Guzik and Kirill Neklyudov},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ShJWT0n7kX}\n}", "github": "", "reviewers": "zwM4;cUXK;tKYF;irHw;iZfM", "pdf_size": 1803861, "rating": "6;7;7;7;7", "confidence": "3;3;2;3;3", "soundness": "2;3;3;3;3", "novelty": "3;2;3;3;3", "presentation": "3;3;4;3;4", "wc_summary": "136;83;75;107;135", "wc_strengths": "77;57;84;75;159", "wc_weaknesses": "373;66;216;170;34", "wc_questions": "233;49;63;49;89", "wc_limitations": "40;9;15;3;17", "wc_review": "859;264;453;404;434", "wc_reply_reviewers": "248;13;13;17;0", "wc_reply_authors": "842;0;0;0;0", "reply_reviewers": "2;1;1;1;0", "reply_authors": "3;1;1;1;1", "rating_avg": [ 6.8, 0.39999999999999997 ], "confidence_avg": [ 2.8, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 107.2, 25.396062686959958 ], "wc_strengths_avg": [ 90.4, 35.437832890852675 ], "wc_weaknesses_avg": [ 171.8, 120.51622297433653 ], "wc_questions_avg": [ 96.6, 69.74697126040671 ], "wc_limitations_avg": [ 16.8, 12.592060990957755 ], "wc_review_avg": [ 482.8, 199.43861210908986 ], "wc_reply_reviewers_avg": [ 58.2, 95.0734452936255 ], "wc_reply_authors_avg": [ 168.4, 336.8 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.4, 0.8000000000000002 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.25000000000000006, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4876652408859098694&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "cornell.edu;fu-berlin.de;;microsoft.com;fu-berlin.de;;utoronto.ca;vectorinstitute.ai", "author_num": 8, "aff_unique_index": "0;1;2;1;3;4", "aff_unique_norm": "Cornell University;Freie Universit\u00e4t Berlin;Microsoft;University of Toronto;Vector Institute", "aff_unique_dep": ";;Microsoft Corporation;;", "aff_unique_url": "https://www.cornell.edu;https://www.fu-berlin.de;https://www.microsoft.com;https://www.utoronto.ca;https://vectorinstitute.ai/", "aff_unique_abbr": "Cornell;FU Berlin;Microsoft;U of T;Vector Institute", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1;2;2", "aff_country_unique": "United States;Germany;Canada" }, { "title": "Single Image Reflection Separation via Dual-Stream Interactive Transformers", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95086", "id": "Shwtw8uV8l", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Shwtw8uV8l", "openreview": "https://openreview.net/forum?id=Shwtw8uV8l", "poster": "/media/PosterPDFs/NeurIPS%202024/95086.png?t=1731648801.664665", "project": "", "author_site": "Qiming Hu, Hainuo Wang, Xiaojie Guo", "tldr": "", "abstract": "Despite satisfactory results on ``easy'' cases of single image reflection separation, prior dual-stream methods still suffer from considerable performance degradation when facing complex ones, i.e, the transmission layer is densely entangled with the reflection having a wide distribution of spatial intensity. The main reasons come from the lack of concern on the feature correlation during interaction, and the limited receptive field. To remedy these deficiencies, this paper presents a Dual-Stream Interactive Transformer (DSIT) design. Specifically, we devise a dual-attention interactive structure that embraces a dual-stream self-attention and a layer-aware dual-stream cross-attention mechanism to simultaneously capture intra-layer and inter-layer feature correlations. Meanwhile, the introduction of attention mechanisms can also mitigate the receptive field limitation. We modulate single-stream pre-trained Transformer embeddings with dual-stream convolutional features through cross-architecture interactions to provide richer semantic priors, thereby further relieving the ill-posedness of the problem. Extensive experimental results reveal the merits of the proposed DSIT over other state-of-the-art alternatives. Our code is publicly available at https://github.com/mingcv/DSIT.", "keywords": "Single Image Reflection Separation;Vision Transformer;Image Restoration;Reflection Removal", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/7bf590c15fa86441ae4edce34bdbb17595504d9d.zip", "author": "Qiming Hu;Hainuo Wang;Xiaojie Guo", "authorids": "~Qiming_Hu1;~Hainuo_Wang1;~Xiaojie_Guo2", "gender": "M;M;M", "homepage": "https://github.com/mingcv;https://github.com/Hainuo-Wang;https://sites.google.com/view/xjguo", "dblp": "235/8865;396/6753.html;43/8066-1", "google_scholar": "4zasPbwAAAAJ;Z2RcbF4AAAAJ;RL7jPuQAAAAJ", "orcid": "0009-0004-0620-5311;;", "linkedin": ";;", "or_profile": "~Qiming_Hu1;~Hainuo_Wang1;~Xiaojie_Guo2", "aff": "Tianjin University;Tianjin University;Tianjin University", "aff_domain": "tju.edu.cn;tju.edu.cn;tju.edu.cn", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nhu2024single,\ntitle={Single Image Reflection Separation via Dual-Stream Interactive Transformers},\nauthor={Qiming Hu and Hainuo Wang and Xiaojie Guo},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Shwtw8uV8l}\n}", "github": "", "reviewers": "gpBZ;9gCf;bMoK;k1uS", "pdf_size": 13065017, "rating": "5;5;5;6", "confidence": "3;4;4;4", "soundness": "3;3;2;3", "novelty": "2;3;2;3", "presentation": "3;2;1;2", "wc_summary": "38;36;83;65", "wc_strengths": "49;21;43;89", "wc_weaknesses": "59;223;200;138", "wc_questions": "3;7;61;65", "wc_limitations": "1;32;19;9", "wc_review": "150;319;406;366", "wc_reply_reviewers": "13;79;28;26", "wc_reply_authors": "26;51;31;29", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 55.5, 19.576771950451892 ], "wc_strengths_avg": [ 50.5, 24.550967394381836 ], "wc_weaknesses_avg": [ 155.0, 63.549193543270086 ], "wc_questions_avg": [ 34.0, 29.068883707497267 ], "wc_limitations_avg": [ 15.25, 11.583932838203095 ], "wc_review_avg": [ 310.25, 97.50993539122052 ], "wc_reply_reviewers_avg": [ 36.5, 25.20416632225712 ], "wc_reply_authors_avg": [ 34.25, 9.832980219648569 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17765814525489675256&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "tju.edu.cn;tju.edu.cn;tju.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Tianjin University", "aff_unique_dep": "", "aff_unique_url": "http://www.tju.edu.cn", "aff_unique_abbr": "TJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Provably Safe Neural Network Controllers via Differential Dynamic Logic", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95085", "id": "SiALFXa0NN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=SiALFXa0NN", "openreview": "https://openreview.net/forum?id=SiALFXa0NN", "poster": "/media/PosterPDFs/NeurIPS%202024/95085.png?t=1733682702.9595087", "project": "", "author_site": "Samuel Teuber, Stefan Mitsch, Andr\u00e9 Platzer", "tldr": "", "abstract": "While neural networks (NNs) have a large potential as autonomous controllers for Cyber-Physical Systems, verifying the safety of neural network based control systems (NNCSs) poses significant challenges for the practical use of NNs\u2014 especially when safety is needed for unbounded time horizons. One reason for this is the intractability of analyzing NNs, ODEs and hybrid systems. To this end, we introduce VerSAILLE (Verifiably Safe AI via Logically Linked Envelopes): The first general approach that allows reusing control theory literature for NNCS verification. By joining forces, we can exploit the efficiency of NN verification tools while retaining the rigor of differential dynamic logic (dL). Based on a provably safe control envelope in dL, we derive a specification for the NN which is proven with NN verification tools. We show that a proof of the NN\u2019s adherence to the specification is then mirrored by a dL proof on the infinite-time safety of the NNCS.\n\nThe NN verification properties resulting from hybrid systems typically contain nonlinear arithmetic over formulas with arbitrary logical structure while efficient NN verification tools merely support linear constraints. To overcome this divide, we present Mosaic: An efficient, sound and complete verification approach for polynomial real arithmetic properties on piece-wise linear NNs. Mosaic partitions complex NN verification queries into simple queries and lifts off-the-shelf linear constraint tools to the nonlinear setting in a completeness-preserving manner by combining approximation with exact reasoning for counterexample regions. In our evaluation we demonstrate the versatility of VerSAILLE and Mosaic: We prove infinite-time safety on the classical Vertical Airborne Collision Avoidance NNCS verification benchmark for some scenarios while (exhaustively) enumerating counterexample regions in unsafe scenarios. We also show that our approach significantly outperforms the State-of-the-Art tools in closed-loop NNV", "keywords": "Cyber-Physical Systems;Neural Network Verification;Infinite-Time Horizon Safety;Differential Dynamic Logic;Safety Verification;neural network based control systems;Safety;Verification", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/90a49132da848177788e1152fc2a5ff81ccb7ec6.zip", "author": "Samuel Teuber;Stefan Mitsch;Andre Platzer", "authorids": "~Samuel_Teuber1;~Stefan_Mitsch1;~Andre_Platzer1", "gender": "M;M;M", "homepage": "https://teuber.dev;http://www.cs.cmu.edu/~smitsch/;https://symbolaris.com/", "dblp": "273/3924;61/5931.html;55/950", "google_scholar": "0dq6DCQAAAAJ;;Y80iWtgAAAAJ", "orcid": "0000-0001-7945-9110;;0000-0001-7238-5710", "linkedin": ";;", "or_profile": "~Samuel_Teuber1;~Stefan_Mitsch1;~Andre_Platzer1", "aff": "Karlsruher Institut f\u00fcr Technologie;Carnegie Mellon University;Karlsruher Institut f\u00fcr Technologie", "aff_domain": "kit.edu;cmu.edu;kit.edu", "position": "PhD student;System Scientist;Full Professor", "bibtex": "@inproceedings{\nteuber2024provably,\ntitle={Provably Safe Neural Network Controllers via Differential Dynamic Logic},\nauthor={Samuel Teuber and Stefan Mitsch and Andre Platzer},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=SiALFXa0NN}\n}", "github": "", "reviewers": "CSRh;JF4H;rNvs;H7zS", "pdf_size": 974336, "rating": "7;7;7;8", "confidence": "4;2;3;4", "soundness": "3;4;3;4", "novelty": "3;3;3;3", "presentation": "3;4;2;4", "wc_summary": "117;69;95;71", "wc_strengths": "114;80;31;200", "wc_weaknesses": "48;42;145;142", "wc_questions": "51;167;28;144", "wc_limitations": "8;30;16;48", "wc_review": "338;388;315;605", "wc_reply_reviewers": "9;20;99;19", "wc_reply_authors": "0;0;461;0", "reply_reviewers": "1;1;2;1", "reply_authors": "1;1;2;1", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 88.0, 19.621416870348583 ], "wc_strengths_avg": [ 106.25, 61.645660836753144 ], "wc_weaknesses_avg": [ 94.25, 49.30707352905869 ], "wc_questions_avg": [ 97.5, 59.12909605262032 ], "wc_limitations_avg": [ 25.5, 15.190457530963313 ], "wc_review_avg": [ 411.5, 114.79220356801241 ], "wc_reply_reviewers_avg": [ 36.75, 36.196512262923896 ], "wc_reply_authors_avg": [ 115.25, 199.6188555723131 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16959935660305432844&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "kit.edu;cmu.edu;kit.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Karlsruher Institut f\u00fcr Technologie;Carnegie Mellon University", "aff_unique_dep": ";", "aff_unique_url": "https://www.kit.edu;https://www.cmu.edu", "aff_unique_abbr": "KIT;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Germany;United States" }, { "title": "Inexact Augmented Lagrangian Methods for Conic Optimization: Quadratic Growth and Linear Convergence", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95084", "id": "Sj8G020ADl", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Sj8G020ADl", "openreview": "https://openreview.net/forum?id=Sj8G020ADl", "poster": "", "project": "", "author_site": "Feng-Yi Liao, Lijun Ding, Yang Zheng", "tldr": "", "abstract": "Augmented Lagrangian Methods (ALMs) are widely employed in solving constrained optimizations, and some efficient solvers are developed based on this framework. Under the quadratic growth assumption, it is known that the dual iterates and the Karush\u2013Kuhn\u2013Tucker (KKT) residuals of ALMs applied to conic programs converge linearly. In contrast, the convergence rate of the primal iterates has remained elusive. In this paper, we resolve this challenge by establishing new $\\textit{quadratic growth}$ and $\\textit{error bound}$ properties for primal and dual conic programs under the standard strict complementarity condition. Our main results reveal that both primal and dual iterates of the ALMs converge linearly contingent solely upon the assumption of strict complementarity and a bounded solution set. This finding provides a positive answer to an open question regarding the asymptotically linear convergence of the primal iterates of ALMs applied to conic optimization.", "keywords": "convex optimization;augmented Lagrangian method;quadratic growth;error bound;linear convergence", "primary_area": "optimization", "supplementary_material": "/attachment/7662f99677546904411dd086bb3aba0020aa7e34.zip", "author": "Feng-Yi Liao;Lijun Ding;Yang Zheng", "authorids": "~Feng-Yi_Liao1;~Lijun_Ding1;~Yang_Zheng4", "gender": "M;M;M", "homepage": "https://fengyiliao.github.io/;https://www.lijunding.net/;https://zhengy09.github.io/", "dblp": ";;", "google_scholar": ";https://scholar.google.com/citations?hl=en;https://scholar.google.co.uk/citations?user=hm5XrmoAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Feng-Yi_Liao1;~Lijun_Ding1;~Yang_Zheng4", "aff": "University of California, San Diego;Texas A&M University - College Station;University of California, San Diego", "aff_domain": "ucsd.edu;tamu.edu;ucsd.edu", "position": "PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nliao2024inexact,\ntitle={Inexact Augmented Lagrangian Methods for Conic Optimization: Quadratic Growth and Linear Convergence},\nauthor={Feng-Yi Liao and Lijun Ding and Yang Zheng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Sj8G020ADl}\n}", "github": "", "reviewers": "rMRz;K87V;Vd1D;X2mJ", "pdf_size": 1325287, "rating": "5;6;7;8", "confidence": "3;4;3;3", "soundness": "3;3;3;3", "novelty": "2;3;3;4", "presentation": "2;3;3;3", "wc_summary": "81;73;137;181", "wc_strengths": "17;79;66;81", "wc_weaknesses": "408;215;188;165", "wc_questions": "38;62;120;77", "wc_limitations": "3;24;17;1", "wc_review": "547;453;528;505", "wc_reply_reviewers": "19;22;142;29", "wc_reply_authors": "0;0;394;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;2;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 118.0, 43.94314508543966 ], "wc_strengths_avg": [ 60.75, 25.907286619790966 ], "wc_weaknesses_avg": [ 244.0, 96.32497080196806 ], "wc_questions_avg": [ 74.25, 29.852763691155968 ], "wc_limitations_avg": [ 11.25, 9.60143218483576 ], "wc_review_avg": [ 508.25, 35.19499254155341 ], "wc_reply_reviewers_avg": [ 53.0, 51.512134492758115 ], "wc_reply_authors_avg": [ 98.5, 170.6070045455344 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.2581988897471611, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18310552328570660432&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "ucsd.edu;tamu.edu;ucsd.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of California, San Diego;Texas A&M University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucsd.edu;https://www.tamu.edu", "aff_unique_abbr": "UCSD;TAMU", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "San Diego;College Station", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "CoBo: Collaborative Learning via Bilevel Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95083", "id": "SjQ1iIqpfU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=SjQ1iIqpfU", "openreview": "https://openreview.net/forum?id=SjQ1iIqpfU", "poster": "", "project": "", "author_site": "Diba Hashemi, Lie He, Martin Jaggi", "tldr": "", "abstract": "Collaborative learning is an important tool to train multiple clients more effectively by enabling communication among clients. Identifying helpful clients, however, presents challenging and often introduces significant overhead. In this paper, we model **client-selection** and **model-training** as two interconnected optimization problems, proposing a novel bilevel optimization problem for collaborative learning.\nWe introduce **CoBo**, a *scalable* and *elastic*, SGD-type alternating optimization algorithm that efficiently addresses these problem with theoretical convergence guarantees. Empirically, **CoBo** achieves superior performance, surpassing popular personalization algorithms by 9.3% in accuracy on a task with high heterogeneity, involving datasets distributed among 80 clients.", "keywords": "collaborative learning;personalized federated learning;bilevel optimization;distributed learning", "primary_area": "optimization", "supplementary_material": "/attachment/4f8134abf0557608fab0bb1f42670af0490eda3a.zip", "author": "Diba Hashemi;Lie He;Martin Jaggi", "authorids": "~Diba_Hashemi1;~Lie_He1;~Martin_Jaggi1", "gender": "F;M;M", "homepage": ";https://liehe.github.io/;https://mlo.epfl.ch", "dblp": ";225/5245;17/4402", "google_scholar": "KU-7kn0AAAAJ;rIAYxaMAAAAJ;https://scholar.google.ch/citations?user=r1TJBr8AAAAJ", "orcid": ";;0000-0003-1579-5558", "linkedin": ";;", "or_profile": "~Diba_Hashemi1;~Lie_He1;~Martin_Jaggi1", "aff": "EPFL - EPF Lausanne;Tencent;EPFL", "aff_domain": "epfl.ch;tencent.com;epfl.ch", "position": "PhD student;Researcher;Associate Professor", "bibtex": "@inproceedings{\nhashemi2024cobo,\ntitle={CoBo: Collaborative Learning via Bilevel Optimization},\nauthor={Diba Hashemi and Lie He and Martin Jaggi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=SjQ1iIqpfU}\n}", "github": "", "reviewers": "xPeE;4QtE;H7Ja", "pdf_size": 873319, "rating": "4;5;6", "confidence": "4;3;2", "soundness": "2;3;2", "novelty": "2;2;3", "presentation": "3;3;3", "wc_summary": "46;97;40", "wc_strengths": "33;95;59", "wc_weaknesses": "123;28;76", "wc_questions": "40;109;80", "wc_limitations": "1;86;28", "wc_review": "243;415;283", "wc_reply_reviewers": "59;0;79", "wc_reply_authors": "214;0;1435", "reply_reviewers": "1;0;3", "reply_authors": "2;1;4", "rating_avg": [ 5.0, 0.816496580927726 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 61.0, 25.573423705088842 ], "wc_strengths_avg": [ 62.333333333333336, 25.42090128658349 ], "wc_weaknesses_avg": [ 75.66666666666667, 38.784303812524755 ], "wc_questions_avg": [ 76.33333333333333, 28.288199345702832 ], "wc_limitations_avg": [ 38.333333333333336, 35.462029772075304 ], "wc_review_avg": [ 313.6666666666667, 73.49074015744357 ], "wc_reply_reviewers_avg": [ 46.0, 33.53605025441527 ], "wc_reply_authors_avg": [ 549.6666666666666, 632.0919412729625 ], "reply_reviewers_avg": [ 1.3333333333333333, 1.247219128924647 ], "reply_authors_avg": [ 2.3333333333333335, 1.247219128924647 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17766535467903818220&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "epfl.ch;tencent.com;epfl.ch", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "EPFL;Tencent", "aff_unique_dep": ";Tencent Holdings Limited", "aff_unique_url": "https://www.epfl.ch;https://www.tencent.com", "aff_unique_abbr": "EPFL;Tencent", "aff_campus_unique_index": "0", "aff_campus_unique": "Lausanne;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Switzerland;China" }, { "title": "Understanding Generalizability of Diffusion Models Requires Rethinking the Hidden Gaussian Structure", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95082", "id": "Sk2duBGvrK", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Sk2duBGvrK", "openreview": "https://openreview.net/forum?id=Sk2duBGvrK", "poster": "", "project": "", "author_site": "Xiang Li, Yixiang Dai, Qing Qu", "tldr": "", "abstract": "In this work, we study the generalizability of diffusion models by looking into the hidden properties of the learned score functions, which are essentially a series of deep denoisers trained on various noise levels. We observe that as diffusion models transition from memorization to generalization, their corresponding nonlinear diffusion denoisers exhibit increasing linearity. This discovery leads us to investigate the linear counterparts of the nonlinear diffusion models, which are a series of linear models trained to match the function mappings of the nonlinear diffusion denoisers. Surprisingly, these linear denoisers are approximately the optimal denoisers for a multivariate Gaussian distribution characterized by the empirical mean and covariance of the training dataset. This finding implies that diffusion models have the inductive bias towards capturing and utilizing the Gaussian structure (covariance information) of the training dataset for data generation. We empirically demonstrate that this inductive bias is a unique property of diffusion models in the generalization regime, which becomes increasingly evident when the model's capacity is relatively small compared to the training dataset size. In the case that the model is highly overparameterized, this inductive bias emerges during the initial training phases before the model fully memorizes its training data. Our study provides crucial insights into understanding the notable strong generalization phenomenon recently observed in real-world diffusion models.", "keywords": "diffusion models;inductive bias;generalization;memorization", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Xiang Li;Yixiang Dai;Qing Qu", "authorids": "~Xiang_Li44;~Yixiang_Dai1;~Qing_Qu2", "gender": "M;M;M", "homepage": "https://www.linkedin.com/in/xiang-li-860635242/;;https://qingqu.engin.umich.edu/", "dblp": ";;127/6874-1", "google_scholar": "RO2ZlG8AAAAJ;;JfblW3MAAAAJ", "orcid": ";;0000-0001-9136-558X", "linkedin": ";yxiang-dai-392937288/;qing-q-1a0b9746/", "or_profile": "~Xiang_Li44;~Yixiang_Dai1;~Qing_Qu2", "aff": "University of Michigan - Ann Arbor;Shanghai Jiaotong University;University of Michigan", "aff_domain": "umich.edu;sjtu.edu.cn;umich.edu", "position": "PhD student;Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\nli2024understanding,\ntitle={Understanding Generalizability of Diffusion Models Requires Rethinking the Hidden Gaussian Structure},\nauthor={Xiang Li and Yixiang Dai and Qing Qu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Sk2duBGvrK}\n}", "github": "", "reviewers": "6yNJ;N1bC;VCtP", "pdf_size": 9349121, "rating": "4;6;7", "confidence": "4;5;4", "soundness": "2;3;3", "novelty": "2;2;3", "presentation": "2;3;4", "wc_summary": "99;42;48", "wc_strengths": "19;81;61", "wc_weaknesses": "130;823;127", "wc_questions": "337;174;15", "wc_limitations": "41;187;8", "wc_review": "626;1307;259", "wc_reply_reviewers": "472;727;15", "wc_reply_authors": "1496;685;15", "reply_reviewers": "2;2;1", "reply_authors": "6;3;2", "rating_avg": [ 5.666666666666667, 1.247219128924647 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 63.0, 25.573423705088842 ], "wc_strengths_avg": [ 53.666666666666664, 25.837096500101467 ], "wc_weaknesses_avg": [ 360.0, 327.39273052406037 ], "wc_questions_avg": [ 175.33333333333334, 131.45933042411085 ], "wc_limitations_avg": [ 78.66666666666667, 77.7788888809525 ], "wc_review_avg": [ 730.6666666666666, 434.19836736475906 ], "wc_reply_reviewers_avg": [ 404.6666666666667, 294.5463555292368 ], "wc_reply_authors_avg": [ 732.0, 605.5284193715987 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.4714045207910317 ], "reply_authors_avg": [ 3.6666666666666665, 1.699673171197595 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.18898223650461357, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13801527288419754143&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "umich.edu;sjtu.edu.cn;umich.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Michigan;Shanghai Jiao Tong University", "aff_unique_dep": ";", "aff_unique_url": "https://www.umich.edu;https://www.sjtu.edu.cn", "aff_unique_abbr": "UM;SJTU", "aff_campus_unique_index": "0", "aff_campus_unique": "Ann Arbor;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;China" }, { "title": "Optimal Hypothesis Selection in (Almost) Linear Time", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95081", "id": "Skv26JteFz", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Skv26JteFz", "openreview": "https://openreview.net/forum?id=Skv26JteFz", "poster": "/media/PosterPDFs/NeurIPS%202024/95081.png?t=1733776953.9501443", "project": "", "author_site": "Maryam Aliakbarpour, Mark Bun, Adam Smith", "tldr": "", "abstract": "Hypothesis selection, also known as density estimation, is a fundamental problem in statistics and learning theory. Suppose we are given a sample set from an unknown distribution $P$ and a finite class of candidate distributions (called hypotheses) $\\mathcal{H} \\coloneqq \\{H_1, H_2, \\ldots, H_n\\}$. The aim is to design an algorithm that selects a distribution $\\hat H$ in $\\mathcal{H}$ that best fits the data. The algorithm's accuracy is measured based on the distance between $\\hat{H}$ and $P$ compared to the distance of the closest distribution in $\\mathcal{H}$ to $P$ (denoted by $OPT$). Concretely, we aim for $\\|\\hat{H} - P\\|_{TV}$ to be at most $ \\alpha \\cdot OPT + \\epsilon$ for some small $\\epsilon$ and $\\alpha$. \nWhile it is possible to decrease the value of $\\epsilon$ as the number of samples increases, $\\alpha$ is an inherent characteristic of the algorithm. In fact, one cannot hope to achieve $\\alpha < 3$ even when there are only two candidate hypotheses, unless the number of samples is proportional to the domain size of $P$ [Bousquet, Kane, Moran '19]. Finding the best $\\alpha$ has been one of the main focuses of studies of the problem since early work of [Devroye, Lugosi '01]. Prior to our work, no algorithm was known that achieves $\\alpha = 3$ in near-linear time. We provide the first algorithm that operates in almost linear time ($\\tilde{O}(n/\\epsilon^3)$ time) and achieves $\\alpha = 3$. This result improves upon a long list of results in hypothesis selection. Previously known algorithms either had worse time complexity, a larger factor $\\alpha$, or extra assumptions about the problem setting.\nIn addition to this algorithm, we provide another (almost) linear-time algorithm with better dependency on the additive accuracy parameter $\\epsilon$, albeit with a slightly worse accuracy parameter, $\\alpha = 4$.", "keywords": "hypothesis selection;distribution learning;density estimation;time efficient algorithms;computational constraints", "primary_area": "learning_theory", "supplementary_material": "", "author": "Maryam Aliakbarpour;Mark Bun;Adam Smith", "authorids": "~Maryam_Aliakbarpour1;~Mark_Bun1;~Adam_Smith1", "gender": "F;;M", "homepage": "https://maryamaliakbarpour.com;https://cs-people.bu.edu/mbun/;http://cs-people.bu.edu/ads22", "dblp": "175/1689;126/4933;04/5072", "google_scholar": "Q0crxvwAAAAJ;oDwLyYUAAAAJ;fkGi-JMAAAAJ", "orcid": "0000-0001-5064-3221;;", "linkedin": ";;", "or_profile": "~Maryam_Aliakbarpour1;~Mark_Bun1;~Adam_Smith1", "aff": "University of California, Berkeley;Boston University;Google", "aff_domain": "berkeley.edu;bu.edu;google.com", "position": "Research Fellow;Assistant Professor;Researcher", "bibtex": "@inproceedings{\naliakbarpour2024optimal,\ntitle={Optimal Hypothesis Selection in (Almost) Linear Time},\nauthor={Maryam Aliakbarpour and Mark Bun and Adam Smith},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Skv26JteFz}\n}", "github": "", "reviewers": "Zrpk;fMmw;URPo;reSf", "pdf_size": 529630, "rating": "5;6;7;8", "confidence": "3;4;3;3", "soundness": "3;4;3;3", "novelty": "3;3;3;3", "presentation": "1;3;3;3", "wc_summary": "79;123;162;441", "wc_strengths": "77;15;76;113", "wc_weaknesses": "227;69;91;83", "wc_questions": "118;11;108;45", "wc_limitations": "1;1;1;7", "wc_review": "502;219;438;689", "wc_reply_reviewers": "82;5;4;19", "wc_reply_authors": "77;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 201.25, 141.49977915177112 ], "wc_strengths_avg": [ 70.25, 35.20919624189112 ], "wc_weaknesses_avg": [ 117.5, 63.70831970786861 ], "wc_questions_avg": [ 70.5, 44.30857704779064 ], "wc_limitations_avg": [ 2.5, 2.598076211353316 ], "wc_review_avg": [ 462.0, 167.89431199418283 ], "wc_reply_reviewers_avg": [ 27.5, 32.01952529317073 ], "wc_reply_authors_avg": [ 19.25, 33.34197804570089 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.2581988897471611, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:eR3unIdVX70J:scholar.google.com/&scioq=Optimal+Hypothesis+Selection+in+(Almost)+Linear+Time&hl=en&as_sdt=0,14", "gs_version_total": 0, "email": "berkeley.edu;bu.edu;google.com", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of California, Berkeley;Boston University;Google", "aff_unique_dep": ";;Google", "aff_unique_url": "https://www.berkeley.edu;https://www.bu.edu;https://www.google.com", "aff_unique_abbr": "UC Berkeley;BU;Google", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Berkeley;;Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Dual Encoder GAN Inversion for High-Fidelity 3D Head Reconstruction from Single Images", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95080", "id": "SlDx451MjC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=SlDx451MjC", "openreview": "https://openreview.net/forum?id=SlDx451MjC", "poster": "/media/PosterPDFs/NeurIPS%202024/95080.png?t=1731520930.4079595", "project": "", "author_site": "Bahri Batuhan Bilecen, Ahmet G\u00f6kmen, Aysegul Dundar", "tldr": "", "abstract": "3D GAN inversion aims to project a single image into the latent space of a 3D Generative Adversarial Network (GAN), thereby achieving 3D geometry reconstruction. While there exist encoders that achieve good results in 3D GAN inversion, they are predominantly built on EG3D, which specializes in synthesizing near-frontal views and is limiting in synthesizing comprehensive 3D scenes from diverse viewpoints. In contrast to existing approaches, we propose a novel framework built on PanoHead, which excels in synthesizing images from a 360-degree perspective. To achieve realistic 3D modeling of the input image, we introduce a dual encoder system tailored for high-fidelity reconstruction and realistic generation from different viewpoints. Accompanying this, we propose a stitching framework on the triplane domain to get the best predictions from both. To achieve seamless stitching, both encoders must output consistent results despite being specialized for different tasks. For this reason, we carefully train these encoders using specialized losses, including an adversarial loss based on our novel occlusion-aware triplane discriminator. Experiments reveal that our approach surpasses the existing encoder training methods qualitatively and quantitatively.", "keywords": "3D Reconstruction;GAN", "primary_area": "generative_models", "supplementary_material": "", "author": "Bahri Batuhan Bilecen;Ahmet Berke G\u00f6kmen;Aysegul Dundar", "authorids": "~Bahri_Batuhan_Bilecen1;~Ahmet_Berke_G\u00f6kmen1;~Aysegul_Dundar1", "gender": "M;M;", "homepage": "http://three-bee.github.io/;https://berkegokmen1.github.io;", "dblp": "322/0623;388/2209;119/4855", "google_scholar": "4GUU0H4AAAAJ;rg7Vk-gAAAAJ;pvu770UAAAAJ", "orcid": ";;", "linkedin": "bbatuhan/;berkegokmen/;", "or_profile": "~Bahri_Batuhan_Bilecen1;~Ahmet_Berke_G\u00f6kmen1;~Aysegul_Dundar1", "aff": "Aselsan Research;ETH Zurich;NVIDIA", "aff_domain": "aselsan.com.tr;vision.ee.ethz.ch;nvidia.com", "position": "Researcher;Intern;Deep Learning Applied Researcher", "bibtex": "@inproceedings{\nbilecen2024dual,\ntitle={Dual Encoder {GAN} Inversion for High-Fidelity 3D Head Reconstruction from Single Images},\nauthor={Bahri Batuhan Bilecen and Ahmet Berke G{\\\"o}kmen and Aysegul Dundar},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=SlDx451MjC}\n}", "github": "", "reviewers": "jtFz;AnnS;XyTr;9tZp;Dd7h", "pdf_size": 12486517, "rating": "5;5;6;6;7", "confidence": "4;5;3;4;5", "soundness": "2;3;3;3;3", "novelty": "2;2;3;2;3", "presentation": "3;2;3;2;3", "wc_summary": "82;66;78;56;30", "wc_strengths": "49;48;66;12;26", "wc_weaknesses": "127;60;47;80;40", "wc_questions": "149;34;70;123;3", "wc_limitations": "11;36;43;19;3", "wc_review": "418;244;304;290;102", "wc_reply_reviewers": "34;12;11;27;0", "wc_reply_authors": "27;40;25;38;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "2;2;2;2;1", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 62.4, 18.607525359380812 ], "wc_strengths_avg": [ 40.2, 18.977881862842334 ], "wc_weaknesses_avg": [ 70.8, 31.224349472807276 ], "wc_questions_avg": [ 75.8, 54.160502213328854 ], "wc_limitations_avg": [ 22.4, 15.014659503298768 ], "wc_review_avg": [ 271.6, 102.3202814695112 ], "wc_reply_reviewers_avg": [ 16.8, 12.155657119218196 ], "wc_reply_authors_avg": [ 26.0, 14.268847185389575 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0714285714285715, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15748572921641808209&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 3, "email": "aselsan.com.tr;vision.ee.ethz.ch;nvidia.com", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Aselsan;ETH Zurich;NVIDIA", "aff_unique_dep": "Research;;NVIDIA Corporation", "aff_unique_url": "https://www.aselsan.com.tr;https://www.ethz.ch;https://www.nvidia.com", "aff_unique_abbr": ";ETHZ;NVIDIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2", "aff_country_unique": "T\u00fcrkiye;Switzerland;United States" }, { "title": "Adapting to Unknown Low-Dimensional Structures in Score-Based Diffusion Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95079", "id": "SnTxbQSrW7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=SnTxbQSrW7", "openreview": "https://openreview.net/forum?id=SnTxbQSrW7", "poster": "/media/PosterPDFs/NeurIPS%202024/95079.png?t=1733016804.0481207", "project": "", "author_site": "Gen Li, Yuling Yan", "tldr": "", "abstract": "This paper investigates score-based diffusion models when the underlying target distribution is concentrated on or near low-dimensional manifolds within the higher-dimensional space in which they formally reside, a common characteristic of natural image distributions. Despite previous efforts to understand the data generation process of diffusion models, existing theoretical support remains highly suboptimal in the presence of low-dimensional structure, which we strengthen in this paper. For the popular Denoising Diffusion Probabilistic Model (DDPM), we find that the dependency of the error incurred within each denoising step on the ambient dimension $d$ is in general unavoidable. We further identify a unique design of coefficients that yields a converges rate at the order of $O(k^{2}/\\sqrt{T})$ (up to log factors), where $k$ is the intrinsic dimension of the target distribution and $T$ is the number of steps. This represents the first theoretical demonstration that the DDPM sampler can adapt to unknown low-dimensional structures in the target distribution, highlighting the critical importance of coefficient design. All of this is achieved by a novel set of analysis tools that characterize the algorithmic dynamics in a more deterministic manner.", "keywords": "diffusion models;score-based generative models;low-dimensional structure;coefficient design", "primary_area": "learning_theory", "supplementary_material": "", "author": "Gen Li;Yuling Yan", "authorids": "~Gen_Li2;~Yuling_Yan1", "gender": "M;", "homepage": ";https://yulingy.github.io", "dblp": "28/538-5.html;", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;yYE8Xo8AAAAJ", "orcid": "0000-0002-3078-9191;", "linkedin": ";", "or_profile": "~Gen_Li2;~Yuling_Yan1", "aff": "The Chinese University of Hong Kong;Massachusetts Institute of Technology", "aff_domain": "cuhk.edu.hk;mit.edu", "position": "Assistant Professor;Postdoc", "bibtex": "@inproceedings{\nli2024adapting,\ntitle={Adapting to Unknown Low-Dimensional Structures in Score-Based Diffusion Models},\nauthor={Gen Li and Yuling Yan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=SnTxbQSrW7}\n}", "github": "", "reviewers": "NxHA;kJLP;1aoU;SgF5", "pdf_size": 644547, "rating": "4;5;6;7", "confidence": "3;3;4;3", "soundness": "3;3;3;3", "novelty": "2;2;3;4", "presentation": "3;3;3;3", "wc_summary": "25;75;131;60", "wc_strengths": "77;46;121;67", "wc_weaknesses": "76;112;129;42", "wc_questions": "187;102;80;31", "wc_limitations": "6;6;2;1", "wc_review": "371;341;463;201", "wc_reply_reviewers": "93;186;54;5", "wc_reply_authors": "572;518;434;0", "reply_reviewers": "1;1;2;1", "reply_authors": "3;3;2;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 72.75, 38.21239981995373 ], "wc_strengths_avg": [ 77.75, 27.36215452043205 ], "wc_weaknesses_avg": [ 89.75, 33.558717198367404 ], "wc_questions_avg": [ 100.0, 56.422513237182194 ], "wc_limitations_avg": [ 3.75, 2.277608394786075 ], "wc_review_avg": [ 344.0, 94.00531899844816 ], "wc_reply_reviewers_avg": [ 84.5, 66.37959023675877 ], "wc_reply_authors_avg": [ 381.0, 225.39964507514205 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.2581988897471611, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13686828117941722694&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "cuhk.edu.hk;mit.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Chinese University of Hong Kong;Massachusetts Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.cuhk.edu.hk;https://web.mit.edu", "aff_unique_abbr": "CUHK;MIT", "aff_campus_unique_index": "0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;1", "aff_country_unique": "China;United States" }, { "title": "Tree of Attacks: Jailbreaking Black-Box LLMs Automatically", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95078", "id": "SoM3vngOH5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=SoM3vngOH5", "openreview": "https://openreview.net/forum?id=SoM3vngOH5", "poster": "", "project": "", "author_site": "Anay Mehrotra, Manolis Zampetakis, Paul Kassianik, Blaine Nelson, Hyrum Anderson, Yaron Singer, Amin Karbasi", "tldr": "", "abstract": "While Large Language Models (LLMs) display versatile functionality, they continue to generate harmful, biased, and toxic content, as demonstrated by the prevalence of human-designed *jailbreaks*. In this work, we present *Tree of Attacks with Pruning* (TAP), an automated method for generating jailbreaks that only requires black-box access to the target LLM. TAP utilizes an attacker LLM to iteratively refine candidate (attack) prompts until one of the refined prompts jailbreaks the target. In addition, before sending prompts to the target, TAP assesses them and prunes the ones unlikely to result in jailbreaks, reducing the number of queries sent to the target LLM. In empirical evaluations, we observe that TAP generates prompts that jailbreak state-of-the-art LLMs (including GPT4-Turbo and GPT4o) for more than 80% of the prompts. This significantly improves upon the previous state-of-the-art black-box methods for generating jailbreaks while using a smaller number of queries than them. Furthermore, TAP is also capable of jailbreaking LLMs protected by state-of-the-art *guardrails*, e.g., LlamaGuard.", "keywords": "LLMs;black-box jailbreaks;red teaming;alignment", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/23ce2d61fa479b92d0240e8af4257ab96297f1a4.gz", "author": "Anay Mehrotra;Manolis Zampetakis;Paul Kassianik;Blaine Nelson;Hyrum S Anderson;Yaron Singer;Amin Karbasi", "authorids": "~Anay_Mehrotra1;~Manolis_Zampetakis2;~Paul_Kassianik1;~Blaine_Nelson1;~Hyrum_S_Anderson1;~Yaron_Singer1;~Amin_Karbasi3", "gender": ";M;;M;M;M;M", "homepage": ";https://mzampet.com/;;;;;http://seas.yale.edu/faculty-research/faculty-directory/amin-karbasi", "dblp": "234/8808;;;80/6028;;https://dblp.uni-trier.de/pers/hd/s/Singer:Yaron;49/7411", "google_scholar": ";;qQCpQQwAAAAJ;;pP6yo9EAAAAJ;;https://scholar.google.com.tw/citations?user=VusVB38AAAAJ", "orcid": ";;;;;;", "linkedin": ";;;;;;", "or_profile": "~Anay_Mehrotra1;~Manolis_Zampetakis2;~Paul_Kassianik1;~Blaine_Nelson1;~Hyrum_S_Anderson1;~Yaron_Singer1;~amin_karbasi1", "aff": "Yale University;Yale University;Robust Intelilgence;Robust Intelligence;Robust Intelligence;Harvard University;Google", "aff_domain": "yale.edu;yale.edu;robustintelligence.com;robustintellgence.com;robustintelligence.com;harvard.edu;google.com", "position": "PhD student;Assistant Professor;Principal Researcher;Researcher;Principal Researcher;Assistant Professor;Researcher", "bibtex": "@inproceedings{\nmehrotra2024tree,\ntitle={Tree of Attacks: Jailbreaking Black-Box {LLM}s Automatically},\nauthor={Anay Mehrotra and Manolis Zampetakis and Paul Kassianik and Blaine Nelson and Hyrum S Anderson and Yaron Singer and Amin Karbasi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=SoM3vngOH5}\n}", "github": "", "reviewers": "2Heq;LFqH;mTdE;psVX", "pdf_size": 880164, "rating": "5;6;7;7", "confidence": "4;4;4;4", "soundness": "3;2;4;3", "novelty": "3;2;3;3", "presentation": "3;3;4;2", "wc_summary": "46;67;109;94", "wc_strengths": "49;44;87;97", "wc_weaknesses": "61;24;109;218", "wc_questions": "3;187;2;34", "wc_limitations": "8;4;2;8", "wc_review": "167;326;309;451", "wc_reply_reviewers": "64;53;62;27", "wc_reply_authors": "214;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 79.0, 24.279621084357967 ], "wc_strengths_avg": [ 69.25, 23.09085316743407 ], "wc_weaknesses_avg": [ 103.0, 72.9143332959988 ], "wc_questions_avg": [ 56.5, 76.43461257833391 ], "wc_limitations_avg": [ 5.5, 2.598076211353316 ], "wc_review_avg": [ 313.25, 100.67863477421612 ], "wc_reply_reviewers_avg": [ 51.5, 14.739402972983676 ], "wc_reply_authors_avg": [ 53.5, 92.66471820493493 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 239, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3082256392530983010&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "yale.edu;yale.edu;robustintelligence.com;robustintellgence.com;robustintelligence.com;harvard.edu;google.com", "author_num": 7, "aff_unique_index": "0;0;1;1;1;2;3", "aff_unique_norm": "Yale University;Robust Intelligence;Harvard University;Google", "aff_unique_dep": ";;;Google", "aff_unique_url": "https://www.yale.edu;;https://www.harvard.edu;https://www.google.com", "aff_unique_abbr": "Yale;;Harvard;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States;" }, { "title": "Zero-Shot Scene Reconstruction from Single Images with Deep Prior Assembly", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95077", "id": "SoTK84ewb7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=SoTK84ewb7", "openreview": "https://openreview.net/forum?id=SoTK84ewb7", "poster": "", "project": "", "author_site": "Junsheng Zhou, Yu-Shen Liu, Zhizhong Han", "tldr": "", "abstract": "Large language and vision models have been leading a revolution in visual computing. By greatly scaling up sizes of data and model parameters, the large models learn deep priors which lead to remarkable performance in various tasks. In this work, we present deep prior assembly, a novel framework that assembles diverse deep priors from large models for scene reconstruction from single images in a zero-shot manner. We show that this challenging task can be done without extra knowledge but just simply generalizing one deep prior in one sub-task. To this end, we introduce novel methods related to poses, scales, and occlusion parsing which are keys to enable deep priors to work together in a robust way. Deep prior assembly does not require any 3D or 2D data-driven training in the task and demonstrates superior performance in generalizing priors to open-world scenes. We conduct evaluations on various datasets, and report analysis, numerical and visual comparisons with the latest methods to show our superiority. Project page: https://junshengzhou.github.io/DeepPriorAssembly.", "keywords": "Scene Reconstruction from Single Images;Zero-Shot Reconstruction;Deep Prior Assembly", "primary_area": "machine_vision", "supplementary_material": "/attachment/d6f71a821fa9e31dffcd016fc08eb2705950b494.zip", "author": "Junsheng Zhou;Yu-Shen Liu;Zhizhong Han", "authorids": "~Junsheng_Zhou3;~Yu-Shen_Liu1;~Zhizhong_Han2", "gender": "M;M;M", "homepage": "https://junshengzhou.github.io/;https://yushen-liu.github.io/;https://h312h.github.io/", "dblp": ";44/2229.html;166/5173", "google_scholar": "afPIrLYAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0001-7305-1915;", "linkedin": ";;", "or_profile": "~Junsheng_Zhou3;~Yu-Shen_Liu1;~Zhizhong_Han2", "aff": "Tsinghua University;Tsinghua University;Wayne State University", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;wayne.edu", "position": "MS student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhou2024zeroshot,\ntitle={Zero-Shot Scene Reconstruction from Single Images with Deep Prior Assembly},\nauthor={Junsheng Zhou and Yu-Shen Liu and Zhizhong Han},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=SoTK84ewb7}\n}", "github": "", "reviewers": "veoH;DEJS;67ke;osDy", "pdf_size": 41602685, "rating": "5;5;5;5", "confidence": "3;4;3;4", "soundness": "2;3;3;3", "novelty": "3;2;3;2", "presentation": "3;3;3;3", "wc_summary": "64;61;63;69", "wc_strengths": "50;69;150;17", "wc_weaknesses": "164;171;104;167", "wc_questions": "6;71;43;53", "wc_limitations": "6;26;4;7", "wc_review": "290;398;364;313", "wc_reply_reviewers": "210;74;119;102", "wc_reply_authors": "1249;309;611;86", "reply_reviewers": "2;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 64.25, 2.947456530637899 ], "wc_strengths_avg": [ 71.5, 48.99234634103576 ], "wc_weaknesses_avg": [ 151.5, 27.536339626028727 ], "wc_questions_avg": [ 43.25, 23.731571797923543 ], "wc_limitations_avg": [ 10.75, 8.870597499605086 ], "wc_review_avg": [ 341.25, 42.31651568832198 ], "wc_reply_reviewers_avg": [ 126.25, 50.95279678290486 ], "wc_reply_authors_avg": [ 563.75, 437.3050279839005 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11340879062770339989&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 3, "email": "tsinghua.edu.cn;tsinghua.edu.cn;wayne.edu", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Tsinghua University;Wayne State University", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;https://wayne.edu", "aff_unique_abbr": "THU;WSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "China;United States" }, { "title": "Unscrambling disease progression at scale: fast inference of event permutations with optimal transport", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95076", "id": "SoYCqMiVIh", "proceeding": "", "pdf": "https://openreview.net/pdf?id=SoYCqMiVIh", "openreview": "https://openreview.net/forum?id=SoYCqMiVIh", "poster": "/media/PosterPDFs/NeurIPS%202024/95076.png?t=1731662401.653206", "project": "", "author_site": "Peter Wijeratne, Daniel Alexander", "tldr": "", "abstract": "Disease progression models infer group-level temporal trajectories of change in patients' features as a chronic degenerative condition plays out. They provide unique insight into disease biology and staging systems with individual-level clinical utility. Discrete models consider disease progression as a latent permutation of events, where each event corresponds to a feature becoming measurably abnormal. However, permutation inference using traditional maximum likelihood approaches becomes prohibitive due to combinatoric explosion, severely limiting model dimensionality and utility. Here we leverage ideas from optimal transport to model disease progression as a latent permutation matrix of events belonging to the Birkhoff polytope, facilitating fast inference via optimisation of the variational lower bound. This enables a factor of 1000 times faster inference than the current state of the art and, correspondingly, supports models with several orders of magnitude more features than the current state of the art can consider. Experiments demonstrate the increase in speed, accuracy and robustness to noise in simulation. Further experiments with real-world imaging data from two separate datasets, one from Alzheimer's disease patients, the other age-related macular degeneration, showcase, for the first time, pixel-level disease progression events in the brain and eye, respectively. Our method is low compute, interpretable and applicable to any progressive condition and data modality, giving it broad potential clinical utility.", "keywords": "optimal transport;variational inference;latent variable model;disease progression", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Peter A. Wijeratne;Daniel C. Alexander", "authorids": "~Peter_A._Wijeratne1;~Daniel_C._Alexander1", "gender": "M;M", "homepage": "https://pawij.github.io;http://www.cs.ucl.ac.uk/staff/d.alexander", "dblp": ";37/6152", "google_scholar": ";https://scholar.google.co.uk/citations?user=mH-ZOQEAAAAJ", "orcid": ";0000-0003-2439-350X", "linkedin": ";daniel-alexander-2b096737", "or_profile": "~Peter_A._Wijeratne1;~Daniel_C._Alexander1", "aff": "University of Sussex;University College London", "aff_domain": "sussex.ac.uk;ucl.ac.uk", "position": "Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nwijeratne2024unscrambling,\ntitle={Unscrambling disease progression at scale: fast inference of event permutations with optimal transport},\nauthor={Peter A. Wijeratne and Daniel C. Alexander},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=SoYCqMiVIh}\n}", "github": "", "reviewers": "5r2K;Vfdn;SCze;wy4q", "pdf_size": 1686023, "rating": "5;6;7;7", "confidence": "2;3;4;3", "soundness": "2;3;3;3", "novelty": "2;3;3;2", "presentation": "2;2;3;2", "wc_summary": "56;105;33;106", "wc_strengths": "19;105;66;42", "wc_weaknesses": "135;149;287;137", "wc_questions": "31;84;6;130", "wc_limitations": "10;17;6;32", "wc_review": "251;460;398;447", "wc_reply_reviewers": "167;149;17;55", "wc_reply_authors": "355;189;0;15", "reply_reviewers": "3;3;1;1", "reply_authors": "3;3;1;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 75.0, 31.567388235329194 ], "wc_strengths_avg": [ 58.0, 31.81980515339464 ], "wc_weaknesses_avg": [ 177.0, 63.733821476512766 ], "wc_questions_avg": [ 62.75, 47.96548238056196 ], "wc_limitations_avg": [ 16.25, 9.908960591303208 ], "wc_review_avg": [ 389.0, 82.96083413274 ], "wc_reply_reviewers_avg": [ 97.0, 62.78534860936905 ], "wc_reply_authors_avg": [ 139.75, 144.78496986911313 ], "reply_reviewers_avg": [ 2.0, 1.0 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.8528028654224417, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ke9N7ZQAr8gJ:scholar.google.com/&scioq=Unscrambling+disease+progression+at+scale:+fast+inference+of+event+permutations+with+optimal+transport&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": "sussex.ac.uk;ucl.ac.uk", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "University of Sussex;University College London", "aff_unique_dep": ";", "aff_unique_url": "https://www.sussex.ac.uk;https://www.ucl.ac.uk", "aff_unique_abbr": "Sussex;UCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "id": "Sp9cj4pNYD", "title": "SurgicAI: A Hierarchical Platform for Fine-Grained Surgical Policy Learning and Benchmarking", "track": "Datasets & Benchmarks", "status": "Poster", "tldr": "", "abstract": "Despite advancements in robotic-assisted surgery, automating complex tasks like suturing remains challenging due to the need for adaptability and precision. Learning-based approaches, particularly reinforcement learning (RL) and imitation learning (IL), require realistic simulation environments for efficient data collection. However, current platforms often include only relatively simple, non-dexterous manipulations and lack the flexibility required for effective learning and generalization. We introduce SurgicAI, a novel platform for development and benchmarking that addresses these challenges by providing the flexibility to accommodate both modular subtasks and more importantly task decomposition in RL-based surgical robotics. Compatible with the da Vinci Surgical System, SurgicAI offers a standardized pipeline for collecting and utilizing expert demonstrations. It supports the deployment of multiple RL and IL approaches, and the training of both singular and compositional subtasks in suturing scenarios, featuring high dexterity and modularization. Meanwhile, SurgicAI sets clear metrics and benchmarks for the assessment of learned policies. We implemented and evaluated multiple RL and IL algorithms on SurgicAI. Our detailed benchmark analysis underscores SurgicAI's potential to advance policy learning in surgical robotics. Details: https://github.com/surgical-robotics-ai/SurgicAI", "keywords": "Robotic-assisted surgery;Reinforcement learning;Imitation learning;Suturing automation", "primary_area": "", "supplementary_material": "", "author": "Jin Wu;Haoying Zhou;Peter Kazanzides;Adnan Munawar;Anqi Liu", "authorids": "~Jin_Wu2;~Haoying_Zhou1;~Peter_Kazanzides1;~Adnan_Munawar1;~Anqi_Liu2", "gender": "M;M;M;M;F", "homepage": ";;https://smarts.lcsr.jhu.edu/;;https://anqiliu-ai.github.io/", "dblp": ";309/1441;;;", "google_scholar": ";rVeVNB0AAAAJ;https://scholar.google.com/citations?hl=en;;Q8yp6zQAAAAJ", "orcid": ";0000-0001-6976-4015;0000-0002-6117-5467;;0000-0002-0468-5698", "linkedin": "jin-wu-31856128a/?locale=en_US;haoyingzhoujack/;;adnan-munawar-56190a200/;", "or_profile": "~Jin_Wu2;~Haoying_Zhou1;~Peter_Kazanzides1;~Adnan_Munawar1;~Anqi_Liu2", "aff": "Johns Hopkins University;Worcester Polytechnic Institute;Johns Hopkins University;Johns Hopkins University;University of Illinois, Chicago", "aff_domain": "jh.edu;wpi.edu;jhu.edu;jhu.edu;uic.edu", "position": "MS student;PhD student;Full Professor;Researcher;PhD student", "bibtex": "@inproceedings{\nwu2024surgicai,\ntitle={Surgic{AI}: A Hierarchical Platform for Fine-Grained Surgical Policy Learning and Benchmarking},\nauthor={Jin Wu and Haoying Zhou and Peter Kazanzides and Adnan Munawar and Anqi Liu},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=Sp9cj4pNYD}\n}", "github": "", "project": "", "reviewers": "D6Tc;SDfj;LYxJ", "site": "https://openreview.net/forum?id=Sp9cj4pNYD", "pdf_size": 12011622, "rating": "6;6;7", "confidence": "4;3;3", "wc_summary_and_contributions": "63;68;141", "wc_strengths": "68;119;23", "wc_improvement": "135;377;133", "wc_limitations": "15;52;8", "wc_correctness": "109;62;8", "wc_clarity": "1;44;18", "wc_relation_to_prior_work": "1;150;37", "wc_documentation": "34;28;13", "wc_additional_feedback": "1;1;1", "wc_review": "427;901;382", "wc_reply_reviewers": "0;434;0", "wc_reply_authors": "0;712;0", "reply_reviewers": "0;3;0", "reply_authors": "1;3;1", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 90.66666666666667, 35.64952859280034 ], "wc_strengths_avg": [ 70.0, 39.21734310225516 ], "wc_improvement_avg": [ 215.0, 114.55420841971134 ], "wc_limitations_avg": [ 25.0, 19.30457631409368 ], "wc_correctness_avg": [ 59.666666666666664, 41.26607430915726 ], "wc_clarity_avg": [ 21.0, 17.682382946499793 ], "wc_relation_to_prior_work_avg": [ 62.666666666666664, 63.47878035655345 ], "wc_documentation_avg": [ 25.0, 8.831760866327848 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 570.0, 234.77223004435598 ], "wc_reply_reviewers_avg": [ 144.66666666666666, 204.58956202330776 ], "wc_reply_authors_avg": [ 237.33333333333334, 335.64001880321456 ], "reply_reviewers_avg": [ 1.0, 1.4142135623730951 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:xDfrgw0_DDgJ:scholar.google.com/&scioq=SurgicAI:+A+Hierarchical+Platform+for+Fine-Grained+Surgical+Policy+Learning+and+Benchmarking&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;1;0;0;2", "aff_unique_norm": "Johns Hopkins University;Worcester Polytechnic Institute;University of Illinois at Chicago", "aff_unique_dep": ";;", "aff_unique_url": "https://www.jhu.edu;https://www.wpi.edu;https://www.uic.edu", "aff_unique_abbr": "JHU;WPI;UIC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Chicago", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Unveiling Encoder-Free Vision-Language Models", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95075", "id": "SpPAB1tmlC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=SpPAB1tmlC", "openreview": "https://openreview.net/forum?id=SpPAB1tmlC", "poster": "/media/PosterPDFs/NeurIPS%202024/95075.png?t=1731839831.3002152", "project": "", "author_site": "Haiwen Diao, Yufeng Cui, Xiaotong Li, Yueze Wang, Huchuan Lu, Xinlong Wang", "tldr": "", "abstract": "Existing vision-language models (VLMs) mostly rely on vision encoders to extract visual features followed by large language models (LLMs) for visual-language tasks. However, the vision encoders set a strong inductive bias in abstracting visual representation, e.g., resolution, aspect ratio, and semantic priors, which could impede the flexibility and efficiency of the VLMs. Training pure VLMs that accept the seamless vision and language inputs, i.e., without vision encoders, remains challenging and rarely explored. Empirical observations reveal that direct training without encoders results in slow convergence and large performance gaps. In this work, we bridge the gap between encoder-based and encoder-free models, and present a simple yet effective training recipe towards pure VLMs. Specifically, we unveil the key aspects of training encoder-free VLMs efficiently via thorough experiments: (1) Bridging vision-language representation inside one unified decoder; (2) Enhancing visual recognition capability via extra supervision. With these strategies, we launch EVE, an encoder-free vision-language model that can be trained and forwarded efficiently. Notably, solely utilizing 35M publicly accessible data, EVE can impressively rival the encoder-based VLMs of similar capacities across multiple vision-language benchmarks. It significantly outperforms the counterpart Fuyu-8B with mysterious training procedures and undisclosed training data. We believe that EVE provides a transparent and efficient route for developing pure decoder-only architecture across modalities.", "keywords": "Large Vision-Language Model;Encoder-Free Multi-Modality Model;Pure Decoder-only Architecture", "primary_area": "machine_vision", "supplementary_material": "", "author": "Haiwen Diao;Yufeng Cui;Xiaotong Li;Yueze Wang;Huchuan Lu;Xinlong Wang", "authorids": "~Haiwen_Diao2;~Yufeng_Cui1;~Xiaotong_Li2;~Yueze_Wang1;~Huchuan_Lu1;~Xinlong_Wang2", "gender": "M;M;M;M;M;M", "homepage": "https://paranioar.github.io/;;https://github.com/lixiaotong97;https://yuezewang.github.io/;http://ice.dlut.edu.cn/lu/publications.html;", "dblp": "283/4467;;;;64/6896;", "google_scholar": "46eCjHQAAAAJ;https://scholar.google.com/citations?hl=zh-CN;cpCE_T4AAAAJ;;D3nE0agAAAAJ;DPz0DjYAAAAJ", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Haiwen_Diao2;~Yufeng_Cui1;~Xiaotong_Li2;~Yueze_Wang1;~Huchuan_Lu1;~Xinlong_Wang2", "aff": "Dalian University of Technology;Beijing Academy of Artificial Intelligence;Peking University;Beijing Academy of Artificial Intelligence;Dalian University of Technology;Beijing Academy of Artificial Intelligence", "aff_domain": "dlut.edu.cn;baai.ac.cn;pku.edu.cn;baai.ac.cn;dlut.edu.cn;baai.ac.cn", "position": "PhD student;Researcher;PhD student;Researcher;Professor;Researcher", "bibtex": "@inproceedings{\ndiao2024unveiling,\ntitle={Unveiling Encoder-Free Vision-Language Models},\nauthor={Haiwen Diao and Yufeng Cui and Xiaotong Li and Yueze Wang and Huchuan Lu and Xinlong Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=SpPAB1tmlC}\n}", "github": "", "reviewers": "VqDP;Z41h;nP6r;gQSV", "pdf_size": 1565622, "rating": "5;6;6;6", "confidence": "4;5;3;4", "soundness": "2;4;3;3", "novelty": "2;3;3;2", "presentation": "3;4;3;3", "wc_summary": "49;86;65;72", "wc_strengths": "19;122;60;68", "wc_weaknesses": "150;251;164;87", "wc_questions": "2;207;23;16", "wc_limitations": "1;27;18;1", "wc_review": "221;693;330;244", "wc_reply_reviewers": "141;76;549;0", "wc_reply_authors": "466;511;1386;0", "reply_reviewers": "1;1;2;0", "reply_authors": "2;2;3;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 68.0, 13.322912594474229 ], "wc_strengths_avg": [ 67.25, 36.66998091082132 ], "wc_weaknesses_avg": [ 163.0, 58.502136713115014 ], "wc_questions_avg": [ 62.0, 84.05652859831888 ], "wc_limitations_avg": [ 11.75, 11.211043662389331 ], "wc_review_avg": [ 372.0, 189.73007141726373 ], "wc_reply_reviewers_avg": [ 191.5, 212.3493583696452 ], "wc_reply_authors_avg": [ 590.75, 500.8319952838477 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9306402277328614143&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 4, "email": "dlut.edu.cn;baai.ac.cn;pku.edu.cn;baai.ac.cn;dlut.edu.cn;baai.ac.cn", "author_num": 6, "aff_unique_index": "0;1;2;1;0;1", "aff_unique_norm": "Dalian University of Technology;Beijing Academy of Artificial Intelligence;Peking University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.dlut.edu.cn/;https://www.baaic.cn;http://www.pku.edu.cn", "aff_unique_abbr": "DUT;BAAI;Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "EnOF-SNN: Training Accurate Spiking Neural Networks via Enhancing the Output Feature", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95074", "id": "SpcEwP6EYt", "proceeding": "", "pdf": "https://openreview.net/pdf?id=SpcEwP6EYt", "openreview": "https://openreview.net/forum?id=SpcEwP6EYt", "poster": "/media/PosterPDFs/NeurIPS%202024/95074.png?t=1730347575.0486786", "project": "", "author_site": "Yufei Guo, Weihang Peng, Xiaode Liu, Yuanpei Chen, Yuhan Zhang, Xin Tong, Zhou Jie, Zhe Ma", "tldr": "", "abstract": "Spiking neural networks (SNNs) have gained more and more interest as one of the energy-efficient alternatives of conventional artificial neural networks (ANNs). They exchange 0/1 spikes for processing information, thus most of the multiplications in networks can be replaced by additions. However, binary spike feature maps will limit the expressiveness of the SNN and result in unsatisfactory performance compared with ANNs. \nIt is shown that a rich output feature representation, i.e., the feature vector before classifier) is beneficial to training an accurate model in ANNs for classification. \nWe wonder if it also does for SNNs and how to improve the feature representation of the SNN.\nTo this end, we materialize this idea in two special designed methods for SNNs.\nFirst, inspired by some ANN-SNN methods that directly copy-paste the weight parameters from trained ANN with light modification to homogeneous SNN can obtain a well-performed SNN, we use rich information of the weight parameters from the trained ANN counterpart to guide the feature representation learning of the SNN. \nIn particular, we present the SNN's and ANN's feature representation from the same input to ANN's classifier to product SNN's and ANN's outputs respectively and then align the feature with the KL-divergence loss as in knowledge distillation methods, called L_ AF loss.\nIt can be seen as a novel and effective knowledge distillation method specially designed for the SNN that comes from both the knowledge distillation and ANN-SNN methods. Various ablation study shows that the L_AF loss is more powerful than the vanilla knowledge distillation method.\nSecond, we replace the last Leaky Integrate-and-Fire (LIF) activation layer as the ReLU activation layer to generate the output feature, thus a more powerful SNN with full-precision feature representation can be achieved but with only a little extra computation.\nExperimental results show that our method consistently outperforms the current state-of-the-art algorithms on both popular non-spiking static and neuromorphic datasets. We provide an extremely simple but effective way to train high-accuracy spiking neural networks.", "keywords": "Spiking neural network", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "", "author": "Yufei Guo;Weihang Peng;Xiaode Liu;Yuanpei Chen;Yuhan Zhang;Xin Tong;Zhou Jie;Zhe Ma", "authorids": "~Yufei_Guo1;~Weihang_Peng2;~Xiaode_Liu1;~Yuanpei_Chen1;~Yuhan_Zhang1;~Xin_Tong4;~Zhou_Jie3;~Zhe_Ma2", "gender": "M;;M;Non-Binary;F;M;F;M", "homepage": "https://github.com/yfguo91;;https://www.researchgate.net/profile/Xiaode-Liu;;https://github.com/hanbaobao950123;;https://www.zhouejie11.com;https://dblp.org/pid/22/6672", "dblp": "23/2981-1;;;;;86/2176-7;;22/6672-1", "google_scholar": ";;;;;;;", "orcid": ";;0000-0003-3067-4543;0000-0002-4674-553X;0000-0001-8579-4943;;;", "linkedin": ";;;;;;;", "or_profile": "~Yufei_Guo1;~Weihang_Peng2;~Xiaode_Liu1;~Yuanpei_Chen1;~Yuhan_Zhang1;~Xin_Tong4;~Zhou_Jie3;~Zhe_Ma2", "aff": "Intelligent Science & Technology Academy of CASIC;;CASIC;Baidu;Intelligent\u00a0Science\u00a0&\u00a0Technology\u00a0Academy\u00a0of\u00a0CASIC;Intelligent Science & Technology Academy of CASIC;China Aerospace Science and Industry Corporation Limited;Intelligent science and technology academy limited of CASIC", "aff_domain": "casic.cn;;pke.edu.cn;baidu.com;casic.cn;casic.cn;mail.tsinghua.edu.cn;casic.com", "position": "Researcher;;Researcher;Researcher;Researcher;Researcher;Researcher;Full Professor", "bibtex": "@inproceedings{\nguo2024enofsnn,\ntitle={En{OF}-{SNN}: Training Accurate Spiking Neural Networks via Enhancing the Output Feature},\nauthor={Yufei Guo and Weihang Peng and Xiaode Liu and Yuanpei Chen and Yuhan Zhang and Xin Tong and Zhou Jie and Zhe Ma},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=SpcEwP6EYt}\n}", "github": "", "reviewers": "8VEP;BFUv;4HWX;bdtv", "pdf_size": 604655, "rating": "5;6;7;7", "confidence": "4;5;5;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "79;61;101;181", "wc_strengths": "73;35;47;71", "wc_weaknesses": "140;52;74;90", "wc_questions": "184;3;28;266", "wc_limitations": "4;3;1;6", "wc_review": "480;154;251;614", "wc_reply_reviewers": "0;38;41;23", "wc_reply_authors": "0;19;19;19", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 105.5, 45.83393938993243 ], "wc_strengths_avg": [ 56.5, 16.08570794214541 ], "wc_weaknesses_avg": [ 89.0, 32.38826948140329 ], "wc_questions_avg": [ 120.25, 109.04672163802083 ], "wc_limitations_avg": [ 3.5, 1.8027756377319946 ], "wc_review_avg": [ 374.75, 181.90845912161424 ], "wc_reply_reviewers_avg": [ 25.5, 16.224980739587952 ], "wc_reply_authors_avg": [ 14.25, 8.227241335952167 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6589381355539244&as_sdt=1005&sciodt=0,4&hl=en", "gs_version_total": 0, "email": "casic.cn;;pke.edu.cn;baidu.com;casic.cn;casic.cn;mail.tsinghua.edu.cn;casic.com", "author_num": 8, "aff_unique_index": "0;0;1;0;0;2;0", "aff_unique_norm": "China Aerospace Science and Industry Corporation;Baidu;China Aerospace Science and Industry Corporation Limited", "aff_unique_dep": "Intelligent Science & Technology Academy;Baidu, Inc.;", "aff_unique_url": "http://www.casic.com.cn/;https://www.baidu.com;http://www.casic.com.cn", "aff_unique_abbr": "CASIC;Baidu;CASIC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "SqW42eR2wC", "title": "Offline Inverse Constrained Reinforcement Learning for Safe-Critical Decision Making in Healthcare", "track": "main", "status": "Reject", "tldr": "", "abstract": "Reinforcement Learning (RL) applied in healthcare can lead to unsafe medical decisions and treatment, such as excessive dosages or abrupt changes, often due to agents overlooking common-sense constraints. Consequently, Constrained Reinforcement Learning (CRL) is a natural choice for safe decisions. However, specifying the exact cost function is inherently difficult in healthcare. Recent Inverse Constrained Reinforcement Learning (ICRL) is a promising approach that infers constraints from expert demonstrations. ICRL algorithms model Markovian decisions in an interactive environment. These settings do not align with the practical requirement of a decision-making system in healthcare, where decisions rely on historical treatment recorded in an offline dataset. To tackle these issues, we propose the Constraint Transformer (CT). Specifically, 1) utilize causal attention mechanism to incorporate historical decisions and observations into the constraint modeling and employ a non-Markovian layer for weighted constraints to capture critical states, 2) generative world model to perform exploratory data augmentation, thereby enabling offline RL methods to generate unsafe decision sequences. In multiple medical scenarios, empirical results demonstrate that CT can capture unsafe states and achieve strategies that approximate lower mortality rates, reducing the occurrence probability of unsafe behaviors.", "keywords": "Reinforcement Learning;Inverse Constrained Reinforcement Learning;Healthcare", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "/attachment/e317128c589f16ffeeaeca7587cd2f35b9f1455c.zip", "author": "Nan Fang;Guiliang Liu;Wei Gong", "authorids": "~Nan_Fang1;~Guiliang_Liu1;~Wei_Gong1", "gender": "F;M;", "homepage": "https://nanfang2023.github.io/;http://guiliang.me/;https://ubiot.ustc.edu.cn/weigong/main.htm", "dblp": ";220/5411;11/3249-1", "google_scholar": ";CuMylvEAAAAJ;CtbzNl8AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Nan_Fang1;~Guiliang_Liu1;~Wei_Gong1", "aff": "University of Science and Technology of China;The Chinese University of Hong Kong, Shenzhen;University of Science and Technology of China", "aff_domain": "ustc.edu;cuhk.edu.hk;ustc.edu.cn", "position": "MS student;Assistant Professor;Full Professor", "bibtex": "@misc{\nanonymous2024offline,\ntitle={Offline Inverse Constrained Reinforcement Learning for Safe-Critical Decision Making in Healthcare},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=SqW42eR2wC}\n}", "github": "", "project": "", "reviewers": "cbZJ;sHtP;guwJ;R5ke", "site": "https://openreview.net/forum?id=SqW42eR2wC", "pdf_size": 1130565, "rating": "3;5;5;5", "confidence": "5;2;4;2", "soundness": "1;3;2;2", "novelty": "2;3;2;3", "presentation": "1;2;3;2", "wc_summary": "54;88;76;114", "wc_strengths": "6;54;113;70", "wc_weaknesses": "20;160;481;73", "wc_questions": "653;2;43;118", "wc_limitations": "3;9;212;1", "wc_review": "736;313;925;376", "wc_reply_reviewers": "35;19;825;17", "wc_reply_authors": "29;0;525;0", "reply_reviewers": "1;1;2;1", "reply_authors": "2;1;2;1", "rating_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 1.299038105676658 ], "soundness_avg": [ 2.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 83.0, 21.656407827707714 ], "wc_strengths_avg": [ 60.75, 38.27123593509883 ], "wc_weaknesses_avg": [ 183.5, 178.88613696986135 ], "wc_questions_avg": [ 204.0, 262.54618641298146 ], "wc_limitations_avg": [ 56.25, 89.97048127024775 ], "wc_review_avg": [ 587.5, 253.00247034367075 ], "wc_reply_reviewers_avg": [ 224.0, 347.0576321016439 ], "wc_reply_authors_avg": [ 138.5, 223.45972791534496 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.7777777777777777, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:5rD94OeZmWsJ:scholar.google.com/&scioq=Offline+Inverse+Constrained+Reinforcement+Learning+for+Safe-Critical+Decision+Making+in+Healthcare&hl=en&as_sdt=0,5", "gs_version_total": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Science and Technology of China;Chinese University of Hong Kong", "aff_unique_dep": ";", "aff_unique_url": "http://www.ustc.edu.cn;https://www.cuhk.edu.cn", "aff_unique_abbr": "USTC;CUHK", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "MoGU: A Framework for Enhancing Safety of LLMs While Preserving Their Usability", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95073", "id": "SrFbgIjb53", "proceeding": "", "pdf": "https://openreview.net/pdf?id=SrFbgIjb53", "openreview": "https://openreview.net/forum?id=SrFbgIjb53", "poster": "/media/PosterPDFs/NeurIPS%202024/95073.png?t=1731597172.0246348", "project": "", "author_site": "YANRUI DU, Sendong Zhao, Danyang Zhao, Ming Ma, Yuhan Chen, Liangyu Huo, Qing Yang, Dongliang Xu, Bing Qin", "tldr": "", "abstract": "Large Language Models (LLMs) are increasingly deployed in various applications. As their usage grows, concerns regarding their safety are rising, especially in maintaining harmless responses when faced with malicious instructions. Many defense strategies have been developed to enhance the safety of LLMs. However, our research finds that existing defense strategies lead LLMs to predominantly adopt a rejection-oriented stance, thereby diminishing the usability of their responses to benign instructions. To solve this problem, we introduce the MoGU framework, designed to enhance LLMs' safety while preserving their usability. Our MoGU framework transforms the base LLM into two variants: the usable LLM and the safe LLM, and further employs dynamic routing to balance their contribution. When encountering malicious instructions, the router will assign a higher weight to the safe LLM to ensure that responses are harmless. Conversely, for benign instructions, the router prioritizes the usable LLM, facilitating usable and helpful responses. On various open-sourced LLMs, we compare multiple defense strategies to verify the superiority of our MoGU framework. Besides, our analysis provides key insights into the effectiveness of MoGU and verifies that our designed routing mechanism can effectively balance the contribution of each variant by assigning weights. Our work released the safer Llama2, Vicuna, Falcon, Dolphin, and Baichuan2.", "keywords": "Enhancing Safety;Preseving Usability;Large Language Models", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/251ac796eb0c4576fd3e0b4c29ab6ea3d8d39804.zip", "author": "Yanrui Du;Sendong Zhao;Danyang Zhao;Ming Ma;Yuhan Chen;Liangyu Huo;Qing Yang;Dongliang Xu;Bing Qin", "authorids": "~Yanrui_Du1;~Sendong_Zhao2;~Danyang_Zhao1;~Ming_Ma1;~Yuhan_Chen4;~Liangyu_Huo1;~Qing_Yang11;~Dongliang_Xu2;~Bing_Qin2", "gender": "M;M;F;M;;M;M;M;", "homepage": ";https://sendongzhao.github.io/;https://kkkday.github.io/;;;;https://www.duxiaoman.com/index;;http://ir.hit.edu.cn/~qinb", "dblp": "280/1320;119/6283.html;;20/1028;;207/8210;47/3749;74/4912.html;86/5934.html", "google_scholar": "SpS35C8AAAAJ;ZtIhRvwAAAAJ;;;;;;;LKnCub0AAAAJ", "orcid": ";;;;;;;;0000-0002-2543-5604", "linkedin": ";;;;;;;;", "or_profile": "~Yanrui_Du1;~Sendong_Zhao2;~Danyang_Zhao1;~Ming_Ma1;~Yuhan_Chen4;~Liangyu_Huo1;~Qing_Yang11;~Dongliang_Xu2;~Bing_Qin2", "aff": "Harbin Institute of Technology;Harbin Institute of Technology;Harbin Institute of Technology;Harbin Institute of Technology;;Du Xiaoman Technology(BeiJing);;DuXiaoman Technology;Harbin Institute of Technology", "aff_domain": "hit.edu.cn;hit.edu.cn;hit.edu;hit.edu.cn;;duxiaoman.com;;duxiaoman.com;hit.edu.cn", "position": "PhD student;Associate Professor;Undergrad student;Undergrad student;;Postdoc;;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\ndu2024mogu,\ntitle={Mo{GU}: A Framework for Enhancing Safety of {LLM}s While Preserving Their Usability},\nauthor={Yanrui Du and Sendong Zhao and Danyang Zhao and Ming Ma and Yuhan Chen and Liangyu Huo and Qing Yang and Dongliang Xu and Bing Qin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=SrFbgIjb53}\n}", "github": "", "reviewers": "ja88;Wh4C;3hkc;mJ14", "pdf_size": 1181565, "rating": "4;5;5;6", "confidence": "4;4;3;4", "soundness": "3;2;3;3", "novelty": "2;2;3;3", "presentation": "1;2;3;2", "wc_summary": "31;42;88;61", "wc_strengths": "34;24;44;60", "wc_weaknesses": "148;140;70;182", "wc_questions": "25;48;7;78", "wc_limitations": "16;39;9;1", "wc_review": "254;293;218;382", "wc_reply_reviewers": "75;110;18;0", "wc_reply_authors": "1350;395;80;39", "reply_reviewers": "1;2;1;0", "reply_authors": "5;3;3;2", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 55.5, 21.615966321217286 ], "wc_strengths_avg": [ 40.5, 13.294735800308331 ], "wc_weaknesses_avg": [ 135.0, 40.70626487409524 ], "wc_questions_avg": [ 39.5, 26.556543449779003 ], "wc_limitations_avg": [ 16.25, 14.16642156650719 ], "wc_review_avg": [ 286.75, 61.054790966802926 ], "wc_reply_reviewers_avg": [ 50.75, 44.0078118065418 ], "wc_reply_authors_avg": [ 466.0, 528.6355076988302 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 3.25, 1.0897247358851685 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2518019546908701572&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "hit.edu.cn;hit.edu.cn;hit.edu;hit.edu.cn;;duxiaoman.com;;duxiaoman.com;hit.edu.cn", "author_num": 9, "aff_unique_index": "0;0;0;0;1;2;0", "aff_unique_norm": "Harbin Institute of Technology;Du Xiaoman Technology;DuXiaoman Technology", "aff_unique_dep": ";;", "aff_unique_url": "http://www.hit.edu.cn/;;https://www.duxiaoman.com", "aff_unique_abbr": "HIT;;", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Harbin;", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Diffusion-Inspired Truncated Sampler for Text-Video Retrieval", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95072", "id": "SrQua0ATRZ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=SrQua0ATRZ", "openreview": "https://openreview.net/forum?id=SrQua0ATRZ", "poster": "/media/PosterPDFs/NeurIPS%202024/95072.png?t=1731809505.9690995", "project": "", "author_site": "JIAMIAN WANG, Pichao WANG, Dongfang Liu, Qiang Guan, Sohail Dianat, MAJID RABBANI, Raghuveer Rao, Zhiqiang Tao", "tldr": "", "abstract": "Prevalent text-to-video retrieval methods represent multimodal text-video data in a joint embedding space, aiming at bridging the relevant text-video pairs and pulling away irrelevant ones. One main challenge in state-of-the-art retrieval methods lies in the modality gap, which stems from the substantial disparities between text and video and can persist in the joint space. In this work, we leverage the potential of Diffusion models to address the text-video modality gap by progressively aligning text and video embeddings in a unified space. However, we identify two key limitations of existing Diffusion models in retrieval tasks: The L2 loss does not fit the ranking problem inherent in text-video retrieval, and the generation quality heavily depends on the varied initial point drawn from the isotropic Gaussian, causing inaccurate retrieval. To this end, we introduce a new Diffusion-Inspired Truncated Sampler (DITS) that jointly performs progressive alignment and modality gap modeling in the joint embedding space. The key innovation of DITS is to leverage the inherent proximity of text and video embeddings, defining a truncated diffusion flow from the fixed text embedding to the video embedding, enhancing controllability compared to adopting the isotropic Gaussian. Moreover, DITS adopts the contrastive loss to jointly consider the relevant and irrelevant pairs, not only facilitating alignment but also yielding a discriminatively structured embedding. Experiments on five benchmark datasets suggest the state-of-the-art performance of DITS. We empirically find that DITS can also improve the structure of the CLIP embedding space. Code is available at https://github.com/Jiamian- Wang/DITS-text-video-retrieval", "keywords": "text-video retrieval;diffusion model;multimodal", "primary_area": "machine_vision", "supplementary_material": "/attachment/1ca96a263968808b01aaf990282106c146db4e47.zip", "author": "Jiamian Wang;Pichao WANG;Dongfang Liu;Qiang Guan;Sohail Dianat;MAJID RABBANI;Raghuveer Rao;ZHIQIANG TAO", "authorids": "~Jiamian_Wang1;~Pichao_WANG3;~Dongfang_Liu1;~Qiang_Guan1;~Sohail_Dianat1;~MAJID_RABBANI1;~Raghuveer_Rao1;~ZHIQIANG_TAO2", "gender": "M;M;;M;M;M;;", "homepage": "https://jiamian-wang.github.io/;https://wangpichao.github.io/;https://www.rit.edu/directory/dxleec-dongfang-liu;https://www.cs.kent.edu/~qguan/;https://www.rit.edu/engineering/directory/sadeee-sohail-dianat;https://www.rit.edu/directory/mxreee-majid-rabbani;;http://ztao.cc/", "dblp": "291/6309;;;20/1255;;;;135/5229.html", "google_scholar": "MGSkEscAAAAJ;;uICY0vEAAAAJ;kpPoy4gAAAAJ;https://scholar.google.com/scholar?hl=en;;;sEKglOkAAAAJ", "orcid": ";;;0000-0002-3804-8945;;;;", "linkedin": "%E5%8A%A0%E5%86%95-%E7%8E%8B-5928b81ba/;;;qiang-guan-51534128/;;majid-rabbani-8056253/;raghuveer-rao-3a99815/;", "or_profile": "~Jiamian_Wang1;~Pichao_WANG3;~Dongfang_Liu1;~Qiang_Guan1;~Sohail_Dianat1;~MAJID_RABBANI1;~Raghuveer_Rao1;~ZHIQIANG_TAO2", "aff": "Rochester Institute of Technology;Amazon;Rochester Institute of Technology;Kent State University;Rochester Institute of Technology;Rochester Institute of Technology;DEVCOM Army Research Laboratory;Rochester Institute of Technology", "aff_domain": "rit.edu;amazon.com;rit.edu;kent.edu;rit.edu;rit.edu;army.mil;rit.edu", "position": "PhD student;Researcher;Assistant Professor;Associate Professor;Full Professor;Associate Professor;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nwang2024diffusioninspired,\ntitle={Diffusion-Inspired Truncated Sampler for Text-Video Retrieval},\nauthor={Jiamian Wang and Pichao WANG and Dongfang Liu and Qiang Guan and Sohail Dianat and MAJID RABBANI and Raghuveer Rao and ZHIQIANG TAO},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=SrQua0ATRZ}\n}", "github": "", "reviewers": "QeV9;CwRd;vaB9;UKTF;fBRP", "pdf_size": 946857, "rating": "4;4;6;7;7", "confidence": "4;4;4;5;4", "soundness": "2;3;3;4;4", "novelty": "2;2;3;4;4", "presentation": "2;3;2;4;4", "wc_summary": "33;63;70;103;70", "wc_strengths": "53;72;109;76;22", "wc_weaknesses": "75;302;102;52;140", "wc_questions": "46;302;64;3;20", "wc_limitations": "4;3;10;1;7", "wc_review": "211;742;355;235;259", "wc_reply_reviewers": "0;0;50;0;20", "wc_reply_authors": "0;0;13;0;14", "reply_reviewers": "0;0;1;0;1", "reply_authors": "1;1;2;1;2", "rating_avg": [ 5.6, 1.3564659966250536 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 3.0, 0.8944271909999159 ], "presentation_avg": [ 3.0, 0.8944271909999159 ], "wc_summary_avg": [ 67.8, 22.28362627581068 ], "wc_strengths_avg": [ 66.4, 28.597902020952517 ], "wc_weaknesses_avg": [ 134.2, 88.86934229530452 ], "wc_questions_avg": [ 87.0, 109.52625256074454 ], "wc_limitations_avg": [ 5.0, 3.1622776601683795 ], "wc_review_avg": [ 360.4, 196.96456534107855 ], "wc_reply_reviewers_avg": [ 14.0, 19.595917942265423 ], "wc_reply_authors_avg": [ 5.4, 6.6211781428987395 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.5160468465421401, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=877416012506290867&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 2, "email": "rit.edu;amazon.com;rit.edu;kent.edu;rit.edu;rit.edu;army.mil;rit.edu", "author_num": 8, "aff_unique_index": "0;1;0;2;0;0;3;0", "aff_unique_norm": "Rochester Institute of Technology;Amazon;Kent State University;United States Army Research Laboratory", "aff_unique_dep": ";Amazon.com, Inc.;;Army Research Laboratory", "aff_unique_url": "https://www.rit.edu;https://www.amazon.com;https://www.kent.edu;https://www.arl.army.mil", "aff_unique_abbr": "RIT;Amazon;KSU;ARL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Wild-GS: Real-Time Novel View Synthesis from Unconstrained Photo Collections", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95071", "id": "Ss7l98DVvD", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Ss7l98DVvD", "openreview": "https://openreview.net/forum?id=Ss7l98DVvD", "poster": "/media/PosterPDFs/NeurIPS%202024/95071.png?t=1731689767.6755495", "project": "", "author_site": "Jiacong Xu, Yiqun Mei, Vishal Patel", "tldr": "", "abstract": "Photographs captured in unstructured tourist environments frequently exhibit variable appearances and transient occlusions, challenging accurate scene reconstruction and inducing artifacts in novel view synthesis. Although prior approaches have integrated the Neural Radiance Field (NeRF) with additional learnable modules to handle the dynamic appearances and eliminate transient objects, their extensive training demands and slow rendering speeds limit practical deployments. Recently, 3D Gaussian Splatting (3DGS) has emerged as a promising alternative to NeRF, offering superior training and inference efficiency along with better rendering quality. This paper presents \\textit{Wild-GS}, an innovative adaptation of 3DGS optimized for unconstrained photo collections while preserving its efficiency benefits. \\textit{Wild-GS} determines the appearance of each 3D Gaussian by their inherent material attributes, global illumination and camera properties per image, and point-level local variance of reflectance. Unlike previous methods that model reference features in image space, \\textit{Wild-GS} explicitly aligns the pixel appearance features to the corresponding local Gaussians by sampling the triplane extracted from the reference image. This novel design effectively transfers the high-frequency detailed appearance of the reference view to 3D space and significantly expedites the training process. Furthermore, 2D visibility maps and depth regularization are leveraged to mitigate the transient effects and constrain the geometry, respectively. Extensive experiments demonstrate that \\textit{Wild-GS} achieves state-of-the-art rendering performance and the highest efficiency in both training and inference among all the existing techniques. The code can be accessed via: https://github.com/XuJiacong/Wild-GS", "keywords": "3D Gaussian Splatting;Novel View Synthesis;3D Reconstruction", "primary_area": "machine_vision", "supplementary_material": "/attachment/637039be9207de852cbe36c7400e77a48a9bdcd6.zip", "author": "Jiacong Xu;Yiqun Mei;Vishal M. Patel", "authorids": "~Jiacong_Xu1;~Yiqun_Mei1;~Vishal_M._Patel1", "gender": "M;M;M", "homepage": "https://xujiacong.github.io;https://yiqunmei.net;https://engineering.jhu.edu/vpatel36/", "dblp": "322/0053;263/9774;76/6100", "google_scholar": "PgTi9kIAAAAJ;TvB8_rAAAAAJ;AkEXTbIAAAAJ", "orcid": "0000-0003-1141-9168;;", "linkedin": ";;", "or_profile": "~Jiacong_Xu1;~Yiqun_Mei1;~Vishal_Patel2", "aff": "Honda Research Institute;Netflix Eyeline Studios| ScanlineVFX;Johns Hopkins University", "aff_domain": "honda-ri.de;scanlinevfx.com;jhu.edu", "position": "Intern;Intern;Assistant Professor", "bibtex": "@inproceedings{\nxu2024wildgs,\ntitle={Wild-{GS}: Real-Time Novel View Synthesis from Unconstrained Photo Collections},\nauthor={Jiacong Xu and Yiqun Mei and Vishal M. Patel},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Ss7l98DVvD}\n}", "github": "", "reviewers": "E9p7;JabQ;ZrLe;TYNp", "pdf_size": 19748575, "rating": "3;5;5;6", "confidence": "5;3;4;3", "soundness": "3;3;3;2", "novelty": "2;2;3;3", "presentation": "3;3;3;3", "wc_summary": "53;85;129;84", "wc_strengths": "49;21;43;65", "wc_weaknesses": "358;174;157;225", "wc_questions": "96;144;6;115", "wc_limitations": "8;19;1;39", "wc_review": "564;443;336;528", "wc_reply_reviewers": "331;180;319;134", "wc_reply_authors": "1407;180;560;23", "reply_reviewers": "2;1;4;1", "reply_authors": "3;3;6;2", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 87.75, 27.068200900687877 ], "wc_strengths_avg": [ 44.5, 15.771810295587505 ], "wc_weaknesses_avg": [ 228.5, 78.84319881892159 ], "wc_questions_avg": [ 90.25, 51.55761340481151 ], "wc_limitations_avg": [ 16.75, 14.359230480774379 ], "wc_review_avg": [ 467.75, 87.84183228963293 ], "wc_reply_reviewers_avg": [ 241.0, 85.6650453802483 ], "wc_reply_authors_avg": [ 542.5, 535.9461260238756 ], "reply_reviewers_avg": [ 2.0, 1.224744871391589 ], "reply_authors_avg": [ 3.5, 1.5 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.899228803025897, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6901570819103188511&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "honda-ri.de;scanlinevfx.com;jhu.edu", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Honda Research Institute;Netflix;Johns Hopkins University", "aff_unique_dep": ";Eyeline Studios;", "aff_unique_url": "https://www.honda-ri.com;https://www.netflix.com;https://www.jhu.edu", "aff_unique_abbr": "HRI;Netflix;JHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Japan;United States" }, { "title": "Diffusion Model with Cross Attention as an Inductive Bias for Disentanglement", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95070", "id": "StapcUWm9q", "proceeding": "", "pdf": "https://openreview.net/pdf?id=StapcUWm9q", "openreview": "https://openreview.net/forum?id=StapcUWm9q", "poster": "/media/PosterPDFs/NeurIPS%202024/95070.png?t=1733490792.982777", "project": "", "author_site": "Tao Yang, Cuiling Lan, Yan Lu, Nanning Zheng", "tldr": "", "abstract": "Disentangled representation learning strives to extract the intrinsic factors within the observed data. Factoring these representations in an unsupervised manner is notably challenging and usually requires tailored loss functions or specific structural designs. In this paper, we introduce a new perspective and framework, demonstrating that diffusion models with cross-attention itself can serve as a powerful inductive bias to facilitate the learning of disentangled representations. We propose to encode an image into a set of concept tokens and treat them as the condition of the latent diffusion model for image reconstruction, where cross attention over the concept tokens is used to bridge the encoder and the U-Net of the diffusion model. We analyze that the diffusion process inherently possesses the time-varying information bottlenecks. Such information bottlenecks and cross attention act as strong inductive biases for promoting disentanglement. Without any regularization term in the loss function, this framework achieves superior disentanglement performance on the benchmark datasets, surpassing all previous methods with intricate designs. We have conducted comprehensive ablation studies and visualization analyses, shedding a light on the functioning of this model. We anticipate that our findings will inspire more investigation on exploring diffusion model for disentangled representation learning towards more sophisticated data analysis and understanding.", "keywords": "diffusion models;disentangled representation", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Tao Yang;Cuiling Lan;Yan Lu;Nanning Zheng", "authorids": "~Tao_Yang9;~Cuiling_Lan1;~Yan_Lu7;~Nanning_Zheng1", "gender": "M;F;M;M", "homepage": "https://github.com/ThomasMrY;https://www.microsoft.com/en-us/research/people/culan/;https://www.microsoft.com/en-us/research/people/yanlu/;", "dblp": ";95/8115;15/4830-1;07/256-1", "google_scholar": "https://scholar.google.com.hk/citations?user=qT5psCEAAAAJ;XZugqiwAAAAJ;djk5l-4AAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";0000-0001-9145-9957;0000-0001-5383-6424;", "linkedin": ";;;", "or_profile": "~Tao_Yang9;~Cuiling_Lan1;~Yan_Lu7;~Nanning_Zheng1", "aff": "Xi'an Jiaotong University;Microsoft;Microsoft Research Asia;Xi'an Jiaotong University", "aff_domain": "xjtu.edu.cn;microsoft.com;microsoft.com;xjtu.edu.cn", "position": "PhD student;Principal Researcher;Partner Research Manager;Full Professor", "bibtex": "@inproceedings{\nyang2024diffusion,\ntitle={Diffusion Model with Cross Attention as an Inductive Bias for Disentanglement},\nauthor={Tao Yang and Cuiling Lan and Yan Lu and Nanning Zheng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=StapcUWm9q}\n}", "github": "", "reviewers": "rNx8;Mm6Q;HVuH", "pdf_size": 14522041, "rating": "5;6;7", "confidence": "4;3;3", "soundness": "3;2;3", "novelty": "2;2;4", "presentation": "3;2;3", "wc_summary": "143;54;40", "wc_strengths": "100;45;83", "wc_weaknesses": "276;102;138", "wc_questions": "2;55;70", "wc_limitations": "7;8;1", "wc_review": "528;264;332", "wc_reply_reviewers": "29;17;43", "wc_reply_authors": "29;11;34", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.9428090415820634 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 79.0, 45.614325235244536 ], "wc_strengths_avg": [ 76.0, 22.992752481307377 ], "wc_weaknesses_avg": [ 172.0, 74.9933330370107 ], "wc_questions_avg": [ 42.333333333333336, 29.169999809545576 ], "wc_limitations_avg": [ 5.333333333333333, 3.0912061651652345 ], "wc_review_avg": [ 374.6666666666667, 111.92060678097765 ], "wc_reply_reviewers_avg": [ 29.666666666666668, 10.624918300339486 ], "wc_reply_authors_avg": [ 24.666666666666668, 9.877021593352701 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1928535260947979468&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "xjtu.edu.cn;microsoft.com;microsoft.com;xjtu.edu.cn", "author_num": 4, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Xi'an Jiao Tong University;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.xjtu.edu.cn;https://www.microsoft.com", "aff_unique_abbr": "XJTU;Microsoft", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "China;United States" }, { "title": "Deep Equilibrium Algorithmic Reasoning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95069", "id": "SuLxkxCENa", "proceeding": "", "pdf": "https://openreview.net/pdf?id=SuLxkxCENa", "openreview": "https://openreview.net/forum?id=SuLxkxCENa", "poster": "/media/PosterPDFs/NeurIPS%202024/95069.png?t=1729799188.4801102", "project": "", "author_site": "Dobrik Georgiev, Joseph Wilson, Davide Buffelli, Pietro Li\u00f3", "tldr": "", "abstract": "Neural Algorithmic Reasoning (NAR) research has demonstrated that graph neural networks (GNNs) could learn to execute classical algorithms. However, most previous approaches have always used a recurrent architecture, where each iteration of the GNN matches an iteration of the algorithm. In this paper we study neurally solving algorithms from a different perspective: since the algorithm\u2019s solution is often an equilibrium, it is possible to find the solution directly by solving an equilibrium equation. Our approach requires no information on the ground-truth number of steps of the algorithm, both during train and test time. Furthermore, the proposed method improves the performance of GNNs on executing algorithms and is a step towards speeding up existing NAR models. Our empirical evidence, leveraging algorithms from the CLRS-30 benchmark, validates that one can train a network to solve algorithmic problems by directly finding the equilibrium. We discuss the practical implementation of such models and propose regularisations to improve the performance of these equilibrium reasoners.", "keywords": "neural algorithmic reasoning;deep equilibrium models;DEQ;Graph Neural Networks", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Dobrik Georgiev Georgiev;JJ Wilson;Davide Buffelli;Pietro Lio", "authorids": "~Dobrik_Georgiev_Georgiev1;~JJ_Wilson1;~Davide_Buffelli1;~Pietro_Lio1", "gender": "M;M;M;M", "homepage": ";https://davidebuffelli.github.io;https://www.cst.cam.ac.uk/people/pl219;https://github.com/josephjwilson", "dblp": ";267/1651;l/PietroLio.html;", "google_scholar": "bdvnnIcAAAAJ;v28My7wAAAAJ;https://scholar.google.co.uk/citations?user=3YrWf7EAAAAJ;", "orcid": ";0000-0001-5565-1634;0000-0002-0540-5053;", "linkedin": ";davide-buffelli/;;", "or_profile": "~Dobrik_Georgiev_Georgiev1;~Davide_Buffelli1;~Pietro_Lio1;~Joseph_Wilson3", "aff": "Department of Computer Science and Technology;MediaTek Research;University of Cambridge;", "aff_domain": "cst.cam.ac.uk;mtkresearch.com;cam.ac.uk;", "position": "PhD student;Researcher;Full Professor;", "bibtex": "@inproceedings{\ngeorgiev2024deep,\ntitle={Deep Equilibrium Algorithmic Reasoning},\nauthor={Dobrik Georgiev Georgiev and JJ Wilson and Davide Buffelli and Pietro Lio},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=SuLxkxCENa}\n}", "github": "", "reviewers": "fLfp;E2D4;tzFP;1Bds", "pdf_size": 20983632, "rating": "5;6;6;6", "confidence": "4;1;1;3", "soundness": "2;3;2;3", "novelty": "2;3;2;3", "presentation": "2;4;2;2", "wc_summary": "106;80;70;116", "wc_strengths": "19;48;9;118", "wc_weaknesses": "100;164;78;92", "wc_questions": "333;1;70;108", "wc_limitations": "1;8;6;5", "wc_review": "559;301;233;439", "wc_reply_reviewers": "195;8;16;34", "wc_reply_authors": "463;61;45;86", "reply_reviewers": "2;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 2.25, 1.299038105676658 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 93.0, 18.681541692269406 ], "wc_strengths_avg": [ 48.5, 42.60575078554537 ], "wc_weaknesses_avg": [ 108.5, 32.99621190379283 ], "wc_questions_avg": [ 128.0, 124.41663875864836 ], "wc_limitations_avg": [ 5.0, 2.5495097567963922 ], "wc_review_avg": [ 383.0, 125.83322295800899 ], "wc_reply_reviewers_avg": [ 63.25, 76.6465100314424 ], "wc_reply_authors_avg": [ 163.75, 173.38883326212215 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.7777777777777777, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14817090809260261842&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "cst.cam.ac.uk;mtkresearch.com;cam.ac.uk;", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Cambridge;MediaTek Inc.", "aff_unique_dep": "Department of Computer Science and Technology;Research", "aff_unique_url": "https://www.cam.ac.uk;https://www.mediatek.com/", "aff_unique_abbr": "Cambridge;MediaTek", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Cambridge;Taiwan", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United Kingdom;China" }, { "title": "Detecting and Measuring Confounding Using Causal Mechanism Shifts", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95068", "id": "SvmJJJS0q1", "proceeding": "", "pdf": "https://openreview.net/pdf?id=SvmJJJS0q1", "openreview": "https://openreview.net/forum?id=SvmJJJS0q1", "poster": "", "project": "", "author_site": "Abbavaram Gowtham Reddy, Vineeth N Balasubramanian", "tldr": "", "abstract": "Detecting and measuring confounding effects from data is a key challenge in causal inference. Existing methods frequently assume causal sufficiency, disregarding the presence of unobserved confounding variables. Causal sufficiency is both unrealistic and empirically untestable. Additionally, existing methods make strong parametric assumptions about the underlying causal generative process to guarantee the identifiability of confounding variables. Relaxing the causal sufficiency and parametric assumptions and leveraging recent advancements in causal discovery and confounding analysis with non-i.i.d. data, we propose a comprehensive approach for detecting and measuring confounding. We consider various definitions of confounding and introduce tailored methodologies to achieve three objectives: (i) detecting and measuring confounding among a set of variables, (ii) separating observed and unobserved confounding effects, and (iii) understanding the relative strengths of confounding bias between different sets of variables. We present useful properties of a confounding measure and present measures that satisfy those properties. Our empirical results support the usefulness of the proposed measures.", "keywords": "Causality;Confounding;Mechanisms;Measure", "primary_area": "causal_inference", "supplementary_material": "/attachment/2fed23abc8362767c0732583a915c979f7a9cc86.zip", "author": "Abbavaram Gowtham Reddy;Vineeth N. Balasubramanian", "authorids": "~Abbavaram_Gowtham_Reddy1;~Vineeth_N._Balasubramanian2", "gender": "M;M", "homepage": "https://gautam0707.github.io;https://people.iith.ac.in/vineethnb/", "dblp": "294/8798;88/4691", "google_scholar": "Iewg-GAAAAAJ;https://scholar.google.co.in/citations?user=7soDcboAAAAJ", "orcid": ";0000-0003-2656-0375", "linkedin": ";vineethnb?originalSubdomain=in", "or_profile": "~Abbavaram_Gowtham_Reddy1;~Vineeth_Balasubramanian1", "aff": "Indian Institute of Technology Hyderabad;Indian Institute of Technology Hyderabad", "aff_domain": "iith.ac.in;iith.ac.in", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nreddy2024detecting,\ntitle={Detecting and Measuring Confounding Using Causal Mechanism Shifts},\nauthor={Abbavaram Gowtham Reddy and Vineeth N. Balasubramanian},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=SvmJJJS0q1}\n}", "github": "", "reviewers": "e4Fq;n1WD;K6Rd;3wgM", "pdf_size": 510750, "rating": "5;5;6;6", "confidence": "4;4;4;3", "soundness": "2;3;3;3", "novelty": "2;3;2;3", "presentation": "2;3;3;3", "wc_summary": "45;66;68;81", "wc_strengths": "66;22;68;29", "wc_weaknesses": "174;415;238;340", "wc_questions": "78;202;154;4", "wc_limitations": "58;6;1;14", "wc_review": "421;711;529;468", "wc_reply_reviewers": "305;51;13;35", "wc_reply_authors": "575;46;0;18", "reply_reviewers": "2;1;1;1", "reply_authors": "3;2;1;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 65.0, 12.90348790056394 ], "wc_strengths_avg": [ 46.25, 20.90902915010642 ], "wc_weaknesses_avg": [ 291.75, 92.56450453602612 ], "wc_questions_avg": [ 109.5, 75.26453348025217 ], "wc_limitations_avg": [ 19.75, 22.56518335843961 ], "wc_review_avg": [ 532.25, 110.07582613816714 ], "wc_reply_reviewers_avg": [ 101.0, 118.54956769216832 ], "wc_reply_authors_avg": [ 159.75, 240.30436429661447 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14559735985669578060&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "iith.ac.in;iith.ac.in", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Indian Institute of Technology Hyderabad", "aff_unique_dep": "", "aff_unique_url": "https://www.iith.ac.in", "aff_unique_abbr": "IIT Hyderabad", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hyderabad", "aff_country_unique_index": "0;0", "aff_country_unique": "India" }, { "title": "Learning Goal-Conditioned Representations for Language Reward Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95067", "id": "Swh8LxuycA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Swh8LxuycA", "openreview": "https://openreview.net/forum?id=Swh8LxuycA", "poster": "", "project": "", "author_site": "Vaskar Nath, Dylan Slack, Jeff Da, Yuntao Ma, Hugh Zhang, Spencer Whitehead, Sean Hendryx", "tldr": "", "abstract": "Techniques that learn improved representations via offline data or self-supervised objectives have shown impressive results in traditional reinforcement learning.\nNevertheless, it is unclear how improved representation learning can benefit reinforcement learning from human feedback on language models.\nIn this work, we propose training reward models (RMs) in a contrastive, $\\textit{goal-conditioned}$ fashion by increasing the representation similarity of future states along sampled preferred trajectories and decreasing the similarity along randomly sampled dispreferred trajectories.\nThis objective significantly improves reward model performance by up to 0.09 AUROC across challenging benchmarks, such as MATH and GSM8k. These findings extend to general alignment as well -- on the Helpful-Harmless dataset, we observe 2.3\\% increase in accuracy.\nBeyond improving reward model performance, we show this way of training RM representations enables improved steerability because it allows us to evaluate the likelihood of an action achieving a particular goal-state (e.g. whether a solution is correct or helpful).\nLeveraging this insight, we find that we can filter up to 55\\% of generated tokens during majority voting by discarding trajectories likely to end up in an \"incorrect\" state, which leads to significant cost savings.\nWe additionally find that these representations can perform fine-grained control by conditioning on desired future goal-states.\nFor example, we show that steering a Llama 3 model towards helpful generations with our approach improves helpfulness by $9.6$\\% over a supervised-fine-tuning trained baseline.\nSimilarly, steering the model towards complex generations improves complexity by $21.6$\\% over the baseline.\nOverall, we find that training RMs in this contrastive, goal-conditioned fashion significantly improves performance and enables model steerability.", "keywords": "Goal-Conditioned Q-functions;Contrastive Learning;Reinforcement Learning from Human Feedback;Representation Learning;Reward Model", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Vaskar Nath;Dylan Z Slack;Jeff Da;Yuntao Ma;Hugh Zhang;Spencer Whitehead;Sean M. Hendryx", "authorids": "~Vaskar_Nath1;~Dylan_Z_Slack1;~Jeff_Da1;~Yuntao_Ma1;~Hugh_Zhang1;~Spencer_Whitehead1;~Sean_M._Hendryx1", "gender": "M;M;;M;;;M", "homepage": ";https://dylanslacks.website;https://www.jeffda.com;;;;https://smhendryx.github.io/", "dblp": ";https://dblp.org/pers/s/Slack:Dylan.html;;10340;239/4076;;", "google_scholar": ";pyhz-gUAAAAJ;;;;;https://scholar.google.com/citations?hl=en", "orcid": ";;;0000-0003-4550-9647;;;", "linkedin": "https://ca.linkedin.com/in/vaskarnath?challengeId=AQEXevx3dE40BgAAAY9qTuG3hP1_865tPcUYWecf5AmmH-IyF8AB-JoT_ekPbtQb8b7LfnM35xM8THrBJ_3EnVO0qhuTBd1hBg&submissionId=46073402-e297-ce17-11f5-b87be2cd9a18&challengeSource=AgGpR_CnAuyF_QAAAY9qTzFMDeuE6961eMqITi1esfL71Bsr9W6S3CcxDWnioLQ&challegeType=AgHN68G556hNWQAAAY9qTzFP5tatFZeoazJPsZIv15SzuoxqQT4AP8w&memberId=AgHtVSVo27BAnQAAAY9qTzFTzab7LYWTEeSZKZYNIXWJUcE&recognizeDevice=AgHNpIQ8vule_wAAAY9qTzFX3jwY5hx5LjPy-Ey6p4b6Hfn0YKPK;;;yuntaoma0402/;;;sean-hendryx-43894056/", "or_profile": "~Vaskar_Nath1;~Dylan_Z_Slack1;~Jeff_Da1;~Yuntao_Ma1;~Hugh_Zhang1;~Spencer_Whitehead1;~Sean_M._Hendryx1", "aff": "Scale AI;University of California, Irvine;Department of Computer Science, University of Washington;Scale AI;Harvard University;;Scale AI", "aff_domain": "scale.com;uci.edu;cs.washington.edu;scale.com;harvard.edu;;scale.com", "position": "Researcher;PhD student;Undergrad student;Researcher;PhD student;;Engineering Manager, Machine Learning", "bibtex": "@inproceedings{\nnath2024learning,\ntitle={Learning Goal-Conditioned Representations for Language Reward Models},\nauthor={Vaskar Nath and Dylan Z Slack and Jeff Da and Yuntao Ma and Hugh Zhang and Spencer Whitehead and Sean M. Hendryx},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Swh8LxuycA}\n}", "github": "", "reviewers": "twr2;U1X3;2p1n;o9bF", "pdf_size": 1234172, "rating": "6;6;7;8", "confidence": "3;3;5;4", "soundness": "3;3;4;4", "novelty": "2;3;3;4", "presentation": "2;2;4;4", "wc_summary": "111;53;71;49", "wc_strengths": "94;44;81;67", "wc_weaknesses": "160;2;126;93", "wc_questions": "33;32;267;37", "wc_limitations": "10;7;6;9", "wc_review": "408;138;551;255", "wc_reply_reviewers": "192;29;73;33", "wc_reply_authors": "149;21;16;22", "reply_reviewers": "2;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 1.0 ], "wc_summary_avg": [ 71.0, 24.535688292770594 ], "wc_strengths_avg": [ 71.5, 18.527007313648905 ], "wc_weaknesses_avg": [ 95.25, 58.81910828973864 ], "wc_questions_avg": [ 92.25, 100.90930333720475 ], "wc_limitations_avg": [ 8.0, 1.5811388300841898 ], "wc_review_avg": [ 338.0, 155.85089027657173 ], "wc_reply_reviewers_avg": [ 81.75, 65.93699644357483 ], "wc_reply_authors_avg": [ 52.0, 56.049085630365106 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.6363636363636364, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:6j-M0m5UfnsJ:scholar.google.com/&scioq=Learning+Goal-Conditioned+Representations+for+Language+Reward+Models&hl=en&as_sdt=0,23", "gs_version_total": 3, "email": "scale.com;uci.edu;cs.washington.edu;scale.com;harvard.edu;;scale.com", "author_num": 7, "aff_unique_index": "0;1;2;0;3;0", "aff_unique_norm": "Scale AI;University of California, Irvine;University of Washington;Harvard University", "aff_unique_dep": ";;Department of Computer Science;", "aff_unique_url": "https://scale.ai;https://www.uci.edu;https://www.washington.edu;https://www.harvard.edu", "aff_unique_abbr": "Scale AI;UCI;UW;Harvard", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Irvine;Seattle", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Are Graph Neural Networks Optimal Approximation Algorithms?", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95066", "id": "SxRblm9aMs", "proceeding": "", "pdf": "https://openreview.net/pdf?id=SxRblm9aMs", "openreview": "https://openreview.net/forum?id=SxRblm9aMs", "poster": "", "project": "", "author_site": "Morris Yau, Nikolaos Karalias, Eric Lu, Jessica Xu, Stefanie Jegelka", "tldr": "", "abstract": "In this work we design graph neural network architectures that capture optimal\napproximation algorithms for a large class of combinatorial optimization problems,\nusing powerful algorithmic tools from semidefinite programming (SDP). Concretely, we prove that polynomial-sized message-passing algorithms can represent\nthe most powerful polynomial time algorithms for Max Constraint Satisfaction\nProblems assuming the Unique Games Conjecture. We leverage this result to\nconstruct efficient graph neural network architectures, OptGNN, that obtain high quality approximate solutions on landmark combinatorial optimization problems\nsuch as Max-Cut, Min-Vertex-Cover, and Max-3-SAT. Our approach achieves\nstrong empirical results across a wide range of real-world and synthetic datasets\nagainst solvers and neural baselines. Finally, we take advantage of OptGNN\u2019s\nability to capture convex relaxations to design an algorithm for producing bounds\non the optimal solution from the learned embeddings of OptGNN.", "keywords": "Combinatorial Optimization;Graph Neural Networks;Unsupervised Learning", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Morris Yau;Nikolaos Karalias;Eric Hanqing Lu;Jessica Xu;Stefanie Jegelka", "authorids": "~Morris_Yau3;~Nikolaos_Karalias1;~Eric_Hanqing_Lu1;~Jessica_Xu1;~Stefanie_Jegelka3", "gender": "M;M;F;F;M", "homepage": ";;https://jessxu.com;http://people.csail.mit.edu/stefje/;http://morrisyau.com/", "dblp": "267/9290;;;38/7003;", "google_scholar": "CRLG9UcAAAAJ;;8VPkPvUAAAAJ;gTWUZlsAAAAJ;", "orcid": "0000-0002-9471-5343;0000-0003-1228-9887;;;", "linkedin": ";;;;", "or_profile": "~Nikolaos_Karalias1;~Eric_Hanqing_Lu1;~Jessica_Xu1;~Stefanie_Jegelka3;~morris_yau2", "aff": "Massachusetts Institute of Technology;Harvard University, Harvard University;;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;g.harvard.edu;;mit.edu;mit.edu", "position": "Postdoc;PhD student;;Associate Professor;PhD student", "bibtex": "@inproceedings{\nyau2024are,\ntitle={Are Graph Neural Networks Optimal Approximation Algorithms?},\nauthor={Morris Yau and Nikolaos Karalias and Eric Hanqing Lu and Jessica Xu and Stefanie Jegelka},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=SxRblm9aMs}\n}", "github": "", "reviewers": "Ubhy;857L;zqcP;WeYu", "pdf_size": 1807558, "rating": "7;7;7;8", "confidence": "2;4;3;4", "soundness": "3;4;3;4", "novelty": "3;3;3;4", "presentation": "4;4;3;3", "wc_summary": "50;23;55;129", "wc_strengths": "71;45;69;89", "wc_weaknesses": "39;14;38;83", "wc_questions": "28;1;46;76", "wc_limitations": "38;7;9;1", "wc_review": "226;90;217;378", "wc_reply_reviewers": "21;0;30;14", "wc_reply_authors": "96;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 64.25, 39.31523241696531 ], "wc_strengths_avg": [ 68.5, 15.644487847162015 ], "wc_weaknesses_avg": [ 43.5, 24.904818810824544 ], "wc_questions_avg": [ 37.75, 27.279800219209818 ], "wc_limitations_avg": [ 13.75, 14.306903927824496 ], "wc_review_avg": [ 227.75, 102.06462413588756 ], "wc_reply_reviewers_avg": [ 16.25, 10.96300597464035 ], "wc_reply_authors_avg": [ 24.0, 41.569219381653056 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13625869420708795159&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 5, "email": "mit.edu;g.harvard.edu;;mit.edu;mit.edu", "author_num": 5, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Massachusetts Institute of Technology;Harvard University", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.harvard.edu", "aff_unique_abbr": "MIT;Harvard", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Prompt Tuning Strikes Back: Customizing Foundation Models with Low-Rank Prompt Adaptation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95065", "id": "SyMhGilvCv", "proceeding": "", "pdf": "https://openreview.net/pdf?id=SyMhGilvCv", "openreview": "https://openreview.net/forum?id=SyMhGilvCv", "poster": "/media/PosterPDFs/NeurIPS%202024/95065.png?t=1731090569.704999", "project": "", "author_site": "Abhinav Jain, Swarat Chaudhuri, Thomas Reps, Christopher Jermaine", "tldr": "", "abstract": "Parameter-Efficient Fine-Tuning (PEFT) has become the standard for customising Foundation Models (FMs) to user-specific downstream tasks. However, typical PEFT methods require storing multiple task-specific adapters, creating scalability issues as these adapters must be housed and run at the FM server. Traditional prompt tuning offers a potential solution by customising them through task-specific input prefixes, but it under-performs compared to other PEFT methods like LoRA. To address this gap, we propose Low-Rank Prompt Adaptation (LoPA), a prompt-tuning-based approach that performs on par with state-of-the-art PEFT methods and full fine-tuning while being more parameter-efficient and not requiring a server-based adapter. LoPA generates soft prompts by balancing between sharing task-specific information across instances and customization for each instance. It uses a low-rank decomposition of the soft-prompt component encoded for each instance to achieve parameter efficiency. We provide a comprehensive evaluation on multiple natural language understanding and code generation and understanding tasks across a wide range of foundation models with varying sizes.", "keywords": "Parameter-Efficient Fine-Tuning;Prompt tuning", "primary_area": "generative_models", "supplementary_material": "", "author": "Abhinav Jain;Swarat Chaudhuri;Thomas Reps;Chris Jermaine", "authorids": "~Abhinav_Jain1;~Swarat_Chaudhuri1;~Thomas_Reps1;~Chris_Jermaine1", "gender": "M;M;M;M", "homepage": ";http://www.cs.utexas.edu/~swarat;http://pages.cs.wisc.edu/~reps/;https://www.cs.rice.edu/~cmj4/", "dblp": "167/2404-1;37/6100;r/TWReps;j/ChrisJermaine", "google_scholar": "https://scholar.google.com/citations?hl=en;9j6RBYQAAAAJ;https://scholar.google.com.tw/citations?user=pwhyTq0AAAAJ;", "orcid": "0000-0003-4991-6872;0000-0002-6859-1391;0000-0002-5676-9949;", "linkedin": "abhinav-jain-483975aa/;swarat-chaudhuri-609b3092/;thomas-reps-437b2a69/;", "or_profile": "~Abhinav_Jain1;~Swarat_Chaudhuri1;~Thomas_Reps1;~Chris_Jermaine1", "aff": "Rice University;University of Texas at Austin;University of Wisconsin-Madison;Rice University", "aff_domain": "rice.edu;utexas.edu;cs.wisc.edu;rice.edu", "position": "PhD student;Full Professor;Full Professor;Professor", "bibtex": "@inproceedings{\njain2024prompt,\ntitle={Prompt Tuning Strikes Back: Customizing Foundation Models with Low-Rank Prompt Adaptation},\nauthor={Abhinav Jain and Swarat Chaudhuri and Thomas Reps and Chris Jermaine},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=SyMhGilvCv}\n}", "github": "", "reviewers": "4xA9;RWB9;4fPU", "pdf_size": 18756467, "rating": "5;5;5", "confidence": "3;5;5", "soundness": "3;2;3", "novelty": "2;2;3", "presentation": "2;3;3", "wc_summary": "65;90;87", "wc_strengths": "37;31;44", "wc_weaknesses": "135;322;134", "wc_questions": "4;6;21", "wc_limitations": "4;1;36", "wc_review": "245;450;322", "wc_reply_reviewers": "18;47;0", "wc_reply_authors": "0;63;0", "reply_reviewers": "1;2;0", "reply_authors": "1;2;1", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 4.333333333333333, 0.9428090415820634 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 80.66666666666667, 11.14550233153366 ], "wc_strengths_avg": [ 37.333333333333336, 5.312459150169743 ], "wc_weaknesses_avg": [ 197.0, 88.38929045233176 ], "wc_questions_avg": [ 10.333333333333334, 7.586537784494029 ], "wc_limitations_avg": [ 13.666666666666666, 15.839472494022298 ], "wc_review_avg": [ 339.0, 84.54978809356453 ], "wc_reply_reviewers_avg": [ 21.666666666666668, 19.362047641943477 ], "wc_reply_authors_avg": [ 21.0, 29.698484809834994 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10148458820783427437&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "rice.edu;utexas.edu;cs.wisc.edu;rice.edu", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Rice University;University of Texas at Austin;University of Wisconsin-Madison", "aff_unique_dep": ";;", "aff_unique_url": "https://www.rice.edu;https://www.utexas.edu;https://www.wisc.edu", "aff_unique_abbr": "Rice;UT Austin;UW-Madison", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Austin;Madison", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Differentially Private Reinforcement Learning with Self-Play", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95064", "id": "T07OHxcEYP", "proceeding": "", "pdf": "https://openreview.net/pdf?id=T07OHxcEYP", "openreview": "https://openreview.net/forum?id=T07OHxcEYP", "poster": "", "project": "", "author_site": "Dan Qiao, Yu-Xiang Wang", "tldr": "", "abstract": "We study the problem of multi-agent reinforcement learning (multi-agent RL) with differential privacy (DP) constraints. This is well-motivated by various real-world applications involving sensitive data, where it is critical to protect users' private information. We first extend the definitions of Joint DP (JDP) and Local DP (LDP) to two-player zero-sum episodic Markov Games, where both definitions ensure trajectory-wise privacy protection. Then we design a provably efficient algorithm based on optimistic Nash value iteration and privatization of Bernstein-type bonuses. The algorithm is able to satisfy JDP and LDP requirements when instantiated with appropriate privacy mechanisms. Furthermore, for both notions of DP, our regret bound generalizes the best known result under the single-agent RL case, while our regret could also reduce to the best known result for multi-agent RL without privacy constraints. To the best of our knowledge, these are the first results towards understanding trajectory-wise privacy protection in multi-agent RL.", "keywords": "differential privacy;multi-agent reinforcement learning;trajectory-wise privacy protection", "primary_area": "privacy", "supplementary_material": "", "author": "Dan Qiao;Yu-Xiang Wang", "authorids": "~Dan_Qiao1;~Yu-Xiang_Wang1", "gender": "M;", "homepage": ";http://www.cs.ucsb.edu/~yuxiangw/publications.html", "dblp": ";62/1637-3.html", "google_scholar": "EyfAUuUAAAAJ;HGNZ1fkAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Dan_Qiao1;~Yu-Xiang_Wang1", "aff": ", University of California, Santa Barbara;UC Santa Barbara", "aff_domain": "cs.ucsb.edu;ucsb.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nqiao2024differentially,\ntitle={Differentially Private Reinforcement Learning with Self-Play},\nauthor={Dan Qiao and Yu-Xiang Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=T07OHxcEYP}\n}", "github": "", "reviewers": "3xco;Bzin;vYkZ;f4t2", "pdf_size": 440519, "rating": "5;5;6;7", "confidence": "2;3;3;4", "soundness": "3;3;4;4", "novelty": "3;2;3;3", "presentation": "3;3;4;3", "wc_summary": "49;128;87;28", "wc_strengths": "43;25;81;51", "wc_weaknesses": "65;300;21;10", "wc_questions": "68;68;23;58", "wc_limitations": "1;1;7;16", "wc_review": "226;522;219;163", "wc_reply_reviewers": "12;38;10;11", "wc_reply_authors": "0;9;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 73.0, 38.15101571387058 ], "wc_strengths_avg": [ 50.0, 20.223748416156685 ], "wc_weaknesses_avg": [ 99.0, 117.85796536509528 ], "wc_questions_avg": [ 54.25, 18.498310733685926 ], "wc_limitations_avg": [ 6.25, 6.139014578904337 ], "wc_review_avg": [ 282.5, 140.41456477160764 ], "wc_reply_reviewers_avg": [ 17.75, 11.712706775122479 ], "wc_reply_authors_avg": [ 2.25, 3.897114317029974 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.8528028654224417, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:RlnmRRpajsAJ:scholar.google.com/&scioq=Differentially+Private+Reinforcement+Learning+with+Self-Play&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "cs.ucsb.edu;ucsb.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of California, Santa Barbara", "aff_unique_dep": "", "aff_unique_url": "https://www.ucsb.edu", "aff_unique_abbr": "UCSB", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Santa Barbara", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Frequency Adaptive Normalization For Non-stationary Time Series Forecasting", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95063", "id": "T0axIflVDD", "proceeding": "", "pdf": "https://openreview.net/pdf?id=T0axIflVDD", "openreview": "https://openreview.net/forum?id=T0axIflVDD", "poster": "/media/PosterPDFs/NeurIPS%202024/95063.png?t=1729657617.0172257", "project": "", "author_site": "Weiwei Ye, Songgaojun Deng, Qiaosha Zou, Ning Gui", "tldr": "", "abstract": "Time series forecasting typically needs to address non-stationary data with evolving trend and seasonal patterns. To address the non-stationarity, reversible instance normalization has been recently proposed to alleviate impacts from the trend with certain statistical measures, e.g., mean and variance. Although they demonstrate improved predictive accuracy, they are limited to expressing basic trends and are incapable of handling seasonal patterns. To address this limitation, this paper proposes a new instance normalization solution, called frequency adaptive normalization (FAN), which extends instance normalization in handling both dynamic trend and seasonal patterns. Specifically, we employ the Fourier transform to identify instance-wise predominant frequent components that cover most non-stationary factors. \nFurthermore, the discrepancy of those frequency components between inputs and outputs is explicitly modeled as a prediction task with a simple MLP model. FAN is a model-agnostic method that can be applied to arbitrary predictive backbones. We instantiate FAN on four widely used forecasting models as the backbone and evaluate their prediction performance improvements on eight benchmark datasets. FAN demonstrates significant performance advancement, achieving 7.76\\%$\\sim$37.90\\% average improvements in MSE. Our code is publicly available at http://github.com/icannotnamemyself/FAN.", "keywords": "Timeseries Normalization;Non-stationary Timeseries Forecasting;Timeseries Forecasting", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/613d073c99428c5d81e84431f816146221761079.zip", "author": "Weiwei Ye;Songgaojun Deng;Qiaosha Zou;Ning Gui", "authorids": "~Weiwei_Ye2;~Songgaojun_Deng1;~Qiaosha_Zou1;~Ning_Gui1", "gender": "M;F;F;M", "homepage": ";https://songgaojundeng.github.io/;;https://faculty.csu.edu.cn/guining/zh_CN/index.htm", "dblp": "188/6761;241/4844.html;23/10300.html;30/3048", "google_scholar": "ejSAwB0AAAAJ;RtzagbgAAAAJ;;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";0000-0002-9822-9270;;", "linkedin": ";;;", "or_profile": "~Weiwei_Ye2;~Songgaojun_Deng1;~Qiaosha_Zou1;~Ning_Gui1", "aff": "Central South University;University of Amsterdam;Fudan University, Tsinghua University;Central South University", "aff_domain": "csu.edu.cn;uva.nl;fudan.edu.cn;csu.edu.cn", "position": "MS student;Postdoc;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nye2024frequency,\ntitle={Frequency Adaptive Normalization For Non-stationary Time Series Forecasting},\nauthor={Weiwei Ye and Songgaojun Deng and Qiaosha Zou and Ning Gui},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=T0axIflVDD}\n}", "github": "", "reviewers": "LAap;MHdq;Cudr;3SM3", "pdf_size": 3573978, "rating": "3;6;7;8", "confidence": "3;5;4;5", "soundness": "3;3;3;4", "novelty": "2;3;3;3", "presentation": "2;3;3;4", "wc_summary": "85;61;43;73", "wc_strengths": "92;26;57;65", "wc_weaknesses": "597;230;60;74", "wc_questions": "111;8;3;11", "wc_limitations": "136;13;5;2", "wc_review": "1021;338;168;225", "wc_reply_reviewers": "587;0;21;13", "wc_reply_authors": "758;47;34;47", "reply_reviewers": "1;0;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 1.8708286933869707 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 65.5, 15.5161206491829 ], "wc_strengths_avg": [ 60.0, 23.526580712037184 ], "wc_weaknesses_avg": [ 240.25, 216.50909334251992 ], "wc_questions_avg": [ 33.25, 44.97985660270606 ], "wc_limitations_avg": [ 39.0, 56.14712815451918 ], "wc_review_avg": [ 438.0, 342.1103623101762 ], "wc_reply_reviewers_avg": [ 155.25, 249.38361513940725 ], "wc_reply_authors_avg": [ 221.5, 309.79388309003133 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8058229640253803, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17728975223257584897&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "csu.edu.cn;uva.nl;fudan.edu.cn;csu.edu.cn", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Central South University;University of Amsterdam;Fudan University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.csu.edu.cn;https://www.uva.nl;https://www.fudan.edu.cn", "aff_unique_abbr": "CSU;UvA;Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "China;Netherlands" }, { "title": "Universal Rates for Active Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95062", "id": "T0e4Nw09XX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=T0e4Nw09XX", "openreview": "https://openreview.net/forum?id=T0e4Nw09XX", "poster": "", "project": "", "author_site": "Steve Hanneke, Amin Karbasi, Shay Moran, Grigoris Velegkas", "tldr": "", "abstract": "In this work we study the problem of actively learning binary classifiers\n from a given concept class, i.e., learning by utilizing unlabeled data \n and submitting targeted queries about their labels to a domain expert.\n We evaluate the quality of our solutions by considering the learning curves\n they induce, i.e., the rate of decrease\n of the misclassification probability as the number of label queries\n increases. The majority of the literature on active learning has \n focused on obtaining uniform guarantees on the error rate which are\n only able to explain the upper envelope of the learning curves over families\n of different data-generating distributions. We diverge from this line of\n work and we focus on the distribution-dependent framework of universal\n learning whose goal is to obtain guarantees that hold for any fixed distribution,\n but do not apply uniformly over all the distributions. We provide a \n complete characterization of the optimal learning rates that are achievable\n by algorithms that have to specify the number of unlabeled examples they\n use ahead of their execution. Moreover, we identify combinatorial complexity\n measures that give rise to each case of our tetrachotomic characterization.\n This resolves an open question that was posed by Balcan et al. (2010).\n As a byproduct of our main result,\n we develop an active learning algorithm for partial concept classes\n that achieves exponential learning rates in the uniform setting.", "keywords": "Universal Rates;Active Learning", "primary_area": "learning_theory", "supplementary_material": "", "author": "Steve Hanneke;Amin Karbasi;Shay Moran;Grigoris Velegkas", "authorids": "~Steve_Hanneke1;~Amin_Karbasi3;~Shay_Moran1;~Grigoris_Velegkas1", "gender": "M;M;M;M", "homepage": "http://www.stevehanneke.com;http://www.cs.technion.ac.il/~shaymrn/;;http://seas.yale.edu/faculty-research/faculty-directory/amin-karbasi", "dblp": "40/154;119/5111;254/1885;49/7411", "google_scholar": "fEhNO7YAAAAJ;kALYnggAAAAJ;Ty1kgP0AAAAJ;https://scholar.google.com.tw/citations?user=VusVB38AAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Steve_Hanneke1;~Shay_Moran1;~Grigoris_Velegkas1;~amin_karbasi1", "aff": "Purdue University;Google;Yale University;Google", "aff_domain": "purdue.edu;google.com;yale.edu;google.com", "position": "Assistant Professor;Visiting Faculty;PhD student;Researcher", "bibtex": "@inproceedings{\nhanneke2024universal,\ntitle={Universal Rates for Active Learning},\nauthor={Steve Hanneke and Amin Karbasi and Shay Moran and Grigoris Velegkas},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=T0e4Nw09XX}\n}", "github": "", "reviewers": "CFsL;aq73;cBhF", "pdf_size": 499368, "rating": "7;7;8", "confidence": "3;3;3", "soundness": "3;3;4", "novelty": "3;4;4", "presentation": "3;3;4", "wc_summary": "48;39;66", "wc_strengths": "56;34;129", "wc_weaknesses": "40;46;59", "wc_questions": "115;18;61", "wc_limitations": "2;1;1", "wc_review": "261;138;316", "wc_reply_reviewers": "9;9;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;0", "reply_authors": "1;1;1", "rating_avg": [ 7.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 51.0, 11.224972160321824 ], "wc_strengths_avg": [ 73.0, 40.60377650744653 ], "wc_weaknesses_avg": [ 48.333333333333336, 7.93025150224688 ], "wc_questions_avg": [ 64.66666666666667, 39.684869772860395 ], "wc_limitations_avg": [ 1.3333333333333333, 0.4714045207910317 ], "wc_review_avg": [ 238.33333333333334, 74.41475361482799 ], "wc_reply_reviewers_avg": [ 6.0, 4.242640687119285 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:CN3g0EN2PZ8J:scholar.google.com/&scioq=Universal+Rates+for+Active+Learning&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": "purdue.edu;google.com;yale.edu;google.com", "author_num": 4, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "Purdue University;Google;Yale University", "aff_unique_dep": ";Google;", "aff_unique_url": "https://www.purdue.edu;https://www.google.com;https://www.yale.edu", "aff_unique_abbr": "Purdue;Google;Yale", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "The ALCHEmist: Automated Labeling 500x CHEaper than LLM Data Annotators", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95061", "id": "T0glCBw28a", "proceeding": "", "pdf": "https://openreview.net/pdf?id=T0glCBw28a", "openreview": "https://openreview.net/forum?id=T0glCBw28a", "poster": "/media/PosterPDFs/NeurIPS%202024/95061.png?t=1733435400.0735643", "project": "", "author_site": "Tzu-Heng Huang, Catherine Cao, Vaishnavi Bhargava, Frederic Sala", "tldr": "", "abstract": "Large pretrained models can be used as annotators, helping replace or augment crowdworkers and enabling distilling generalist models into smaller specialist models. Unfortunately, this comes at a cost: employing top-of-the-line models often requires paying thousands of dollars for API calls, while the resulting datasets are static and challenging to audit. To address these challenges, we propose a simple alternative: rather than directly querying labels from pretrained models, we task models to generate programs that can produce labels. These programs can be stored and applied locally, re-used and extended, and cost orders of magnitude less. Our system, $\\textbf{Alchemist}$, obtains comparable to or better performance than large language model-based annotation in a range of tasks for a fraction of the cost: on average, improvements amount to a $\\textbf{12.9}$% enhancement while the total labeling costs across all datasets are reduced by a factor of approximately $\\textbf{500}\\times$.", "keywords": "Automated Data Labeling; Code Generation; Weak Supervision", "primary_area": "other", "supplementary_material": "/attachment/fc70e869c30dcb396b85bf244aef327cf07fd6f1.zip", "author": "Tzu-Heng Huang;Catherine Cao;Vaishnavi Bhargava;Frederic Sala", "authorids": "~Tzu-Heng_Huang1;~Catherine_Cao1;~Vaishnavi_Bhargava1;~Frederic_Sala1", "gender": "M;;F;M", "homepage": "https://zihengh1.github.io/;;;https://pages.cs.wisc.edu/~fredsala/", "dblp": "185/7539;;;133/3602", "google_scholar": "yIZ8NCQAAAAJ;;;9KhIkNkAAAAJ", "orcid": ";;;", "linkedin": "zihengh1/;catherinecao1/;vaishnavi-bhargava;", "or_profile": "~Tzu-Heng_Huang1;~Catherine_Cao1;~Vaishnavi_Bhargava1;~Frederic_Sala1", "aff": "Apple;;Department of Computer Science, University of Wisconsin - Madison;University of Wisconsin, Madison", "aff_domain": "apple.com;;cs.wisc.edu;wisc.edu", "position": "Intern;;MS student;Assistant Professor", "bibtex": "@inproceedings{\nhuang2024the,\ntitle={The {ALCHE}mist: Automated Labeling 500x {CHE}aper than {LLM} Data Annotators},\nauthor={Tzu-Heng Huang and Catherine Cao and Vaishnavi Bhargava and Frederic Sala},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=T0glCBw28a}\n}", "github": "", "reviewers": "b4or;d5ZD;n36E;uwkF", "pdf_size": 2568298, "rating": "5;6;7;8", "confidence": "4;4;5;4", "soundness": "3;4;3;3", "novelty": "3;3;3;4", "presentation": "3;3;3;4", "wc_summary": "100;87;81;101", "wc_strengths": "68;125;43;178", "wc_weaknesses": "39;133;473;270", "wc_questions": "90;3;16;5", "wc_limitations": "56;1;9;60", "wc_review": "353;349;622;614", "wc_reply_reviewers": "14;43;290;34", "wc_reply_authors": "44;31;575;30", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 92.25, 8.525696452489967 ], "wc_strengths_avg": [ 103.5, 52.28049349422785 ], "wc_weaknesses_avg": [ 228.75, 163.19677539706475 ], "wc_questions_avg": [ 28.5, 35.8503835404867 ], "wc_limitations_avg": [ 31.5, 26.688012290165037 ], "wc_review_avg": [ 484.5, 133.53744793128254 ], "wc_reply_reviewers_avg": [ 95.25, 112.92779772934563 ], "wc_reply_authors_avg": [ 170.0, 233.89206912591115 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.2581988897471611, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15373756778198427964&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "apple.com;;cs.wisc.edu;wisc.edu", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "Apple;University of Wisconsin-Madison;University of Wisconsin", "aff_unique_dep": "Apple Inc.;Department of Computer Science;", "aff_unique_url": "https://www.apple.com;https://www.wisc.edu;https://www.wisc.edu", "aff_unique_abbr": "Apple;UW-Madison;UW", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Madison", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Latent Paraphrasing: Perturbation on Layers Improves Knowledge Injection in Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95060", "id": "T1lFrYwtf7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=T1lFrYwtf7", "openreview": "https://openreview.net/forum?id=T1lFrYwtf7", "poster": "/media/PosterPDFs/NeurIPS%202024/95060.png?t=1733374572.7060866", "project": "", "author_site": "Minki Kang, Sung Ju Hwang, Gibbeum Lee, Jaewoong Cho", "tldr": "", "abstract": "As Large Language Models (LLMs) are increasingly deployed in specialized domains with continuously evolving knowledge, the need for timely and precise knowledge injection has become essential. Fine-tuning with paraphrased data is a common approach to enhance knowledge injection, yet it faces two significant challenges: high computational costs due to repetitive external model usage and limited sample diversity. \nTo this end, we introduce LaPael, a latent-level paraphrasing method that applies input-dependent noise to early LLM layers.\nThis approach enables diverse and semantically consistent augmentations directly within the model. Furthermore, it eliminates the recurring costs of paraphrase generation for each knowledge update. \nOur extensive experiments on question-answering benchmarks demonstrate that LaPael improves knowledge injection over standard fine-tuning and existing noise-based approaches. \nAdditionally, combining LaPael with data-level paraphrasing further enhances performance.", "keywords": "large language models;knowledge injection;data augmentation", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Minki Kang;Sung Ju Hwang;Gibbeum Lee;Jaewoong Cho", "authorids": "~Minki_Kang1;~Sung_Ju_Hwang1;~Gibbeum_Lee1;~Jaewoong_Cho1", "gender": "M;;;", "homepage": "https://nardien.github.io;;;https://sites.google.com/view/jaewoongcho", "dblp": "232/2406;;196/2033;184/3848", "google_scholar": "90G751oAAAAJ;;;", "orcid": ";;0009-0004-5644-2163;", "linkedin": ";;;", "or_profile": "~Minki_Kang1;~Sung_Ju_Hwang1;~Gibbeum_Lee1;~Jaewoong_Cho1", "aff": "Korea Advanced Institute of Science & Technology;;KAIST;KRAFTON", "aff_domain": "kaist.ac.kr;;ee.kaist.ac.kr;krafton.com", "position": "PhD student;;PhD student;Researcher", "bibtex": "@inproceedings{\nkang2024latent,\ntitle={Latent Paraphrasing: Perturbation on Layers Improves Knowledge Injection in Language Models},\nauthor={Minki Kang and Sung Ju Hwang and Gibbeum Lee and Jaewoong Cho},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=T1lFrYwtf7}\n}", "github": "", "reviewers": "bjHu;NFcZ;Zuau;WC5j", "pdf_size": 1055814, "rating": "3;5;5;6", "confidence": "5;4;4;3", "soundness": "1;3;3;3", "novelty": "2;2;2;3", "presentation": "1;3;3;3", "wc_summary": "116;55;115;151", "wc_strengths": "44;34;31;65", "wc_weaknesses": "322;99;41;124", "wc_questions": "6;33;46;231", "wc_limitations": "1;6;8;7", "wc_review": "489;227;241;578", "wc_reply_reviewers": "20;21;168;181", "wc_reply_authors": "51;0;886;359", "reply_reviewers": "1;1;2;3", "reply_authors": "2;1;3;4", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 109.25, 34.513584282134474 ], "wc_strengths_avg": [ 43.5, 13.313526955694348 ], "wc_weaknesses_avg": [ 146.5, 105.70359501928021 ], "wc_questions_avg": [ 79.0, 88.93536979177632 ], "wc_limitations_avg": [ 5.5, 2.692582403567252 ], "wc_review_avg": [ 383.75, 153.10025310233814 ], "wc_reply_reviewers_avg": [ 97.5, 77.13786359499464 ], "wc_reply_authors_avg": [ 324.0, 352.34003462564397 ], "reply_reviewers_avg": [ 1.75, 0.82915619758885 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.9733285267845754, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:fjTt5DajkAMJ:scholar.google.com/&scioq=Latent+Paraphrasing:+Perturbation+on+Layers+Improves+Knowledge+Injection+in+Language+Models&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "kaist.ac.kr;;ee.kaist.ac.kr;krafton.com", "author_num": 4, "aff_unique_index": "0;0;1", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;KRAFTON Inc.", "aff_unique_dep": ";", "aff_unique_url": "https://www.kaist.ac.kr;https://www.krafton.com", "aff_unique_abbr": "KAIST;KRAFTON", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "Heavy-Tailed Class Imbalance and Why Adam Outperforms Gradient Descent on Language Models", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95059", "id": "T56j6aV8Oc", "proceeding": "", "pdf": "https://openreview.net/pdf?id=T56j6aV8Oc", "openreview": "https://openreview.net/forum?id=T56j6aV8Oc", "poster": "/media/PosterPDFs/NeurIPS%202024/95059.png?t=1731609057.4423335", "project": "", "author_site": "Frederik Kunstner, Robin Yadav, Alan Milligan, Mark Schmidt, Alberto Bietti", "tldr": "", "abstract": "Adam has been shown to outperform gradient descent on large language models by a larger margin than on other tasks, but it is unclear why. We show that a key factor in this performance gap is the heavy-tailed class imbalance found in language tasks. When trained with gradient descent, the loss of infrequent words decreases more slowly than the loss of frequent ones. This leads to a slow decrease on the average loss as most samples come from infrequent words. On the other hand, Adam and sign-based methods are less sensitive to this problem. To establish that this behavior is caused by class imbalance, we show empirically that it can be reproduced across architectures and data types, on language transformers, vision CNNs, and linear models. On a linear model with cross-entropy loss, we show that class imbalance leads to imbalanced, correlated gradients and Hessians that have been hypothesized to benefit Adam. We also prove that, in continuous time, gradient descent converges slowly on low-frequency classes while sign descent does not.", "keywords": "language model;optimization;transformers;heavy-tailed;class imbalance;gradient descent;Adam;adaptive methods;sign descent", "primary_area": "optimization_for_deep_networks", "supplementary_material": "/attachment/87a0e21ab7e01e5a37b1209b9e48456e582c9e9e.zip", "author": "Frederik Kunstner;Alan Milligan;Robin Yadav;Mark Schmidt;Alberto Bietti", "authorids": "~Frederik_Kunstner1;~Alan_Milligan1;~Robin_Yadav1;~Mark_Schmidt1;~Alberto_Bietti1", "gender": ";M;M;;M", "homepage": "https://fkunstner.github.io/;https://www.cs.ubc.ca/~alanmil/;;;http://alberto.bietti.me", "dblp": "230/3921;334/3788;;35/2638;166/6461", "google_scholar": "EhpYjPAAAAAJ;https://scholar.google.com/citations?hl=en;;https://scholar.google.com/citations?hl=en;iT7Tp70AAAAJ", "orcid": ";0000-0002-6335-9411;;;", "linkedin": ";alanmilligan/;https://linkedin.com/in/yadav-robin;;", "or_profile": "~Frederik_Kunstner1;~Alan_Milligan1;~Robin_Yadav1;~Mark_Schmidt1;~Alberto_Bietti1", "aff": "University of British Columbia;University of British Columbia;University of British Columbia;University of British Columbia;Flatiron Institute", "aff_domain": "cs.ubc.ca;cs.ubc.ca;ubc.ca;ubc.ca;flatironinstitute.org", "position": "PhD student;MS student;Undergrad student;Assistant Professor;Researcher", "bibtex": "@inproceedings{\nkunstner2024heavytailed,\ntitle={Heavy-Tailed Class Imbalance and Why Adam Outperforms Gradient Descent on Language Models},\nauthor={Frederik Kunstner and Robin Yadav and Alan Milligan and Mark Schmidt and Alberto Bietti},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=T56j6aV8Oc}\n}", "github": "", "reviewers": "LhRX;HWpb;ujWn;CdGj", "pdf_size": 2660166, "rating": "7;7;7;8", "confidence": "4;4;4;4", "soundness": "4;3;4;4", "novelty": "3;3;3;4", "presentation": "3;3;4;4", "wc_summary": "222;45;206;152", "wc_strengths": "138;54;122;111", "wc_weaknesses": "160;45;116;72", "wc_questions": "22;11;35;135", "wc_limitations": "9;1;22;20", "wc_review": "551;156;501;490", "wc_reply_reviewers": "19;10;13;46", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 156.25, 69.26895047566406 ], "wc_strengths_avg": [ 106.25, 31.657345119261027 ], "wc_weaknesses_avg": [ 98.25, 43.739998856881556 ], "wc_questions_avg": [ 50.75, 49.378006237595294 ], "wc_limitations_avg": [ 13.0, 8.514693182963201 ], "wc_review_avg": [ 424.5, 156.71391131613046 ], "wc_reply_reviewers_avg": [ 22.0, 14.230249470757707 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12138454184490652976&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "cs.ubc.ca;cs.ubc.ca;ubc.ca;ubc.ca;flatironinstitute.org", "author_num": 5, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "University of British Columbia;Flatiron Institute", "aff_unique_dep": ";", "aff_unique_url": "https://www.ubc.ca;https://flatironinstitute.org", "aff_unique_abbr": "UBC;Flatiron", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "Canada;United States" }, { "title": "Simplifying Constraint Inference with Inverse Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95058", "id": "T5Cerv7PT2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=T5Cerv7PT2", "openreview": "https://openreview.net/forum?id=T5Cerv7PT2", "poster": "", "project": "", "author_site": "Adriana Hugessen, Harley Wiltzer, Glen Berseth", "tldr": "", "abstract": "Learning safe policies has presented a longstanding challenge for the reinforcement learning (RL) community. Various formulations of safe RL have been proposed; However, fundamentally, tabula rasa RL must learn safety constraints through experience, which is problematic for real-world applications. Imitation learning is often preferred in real-world settings because the experts' safety preferences are embedded in the data the agent imitates. However, imitation learning is limited in its extensibility to new tasks, which can only be learned by providing the agent with expert trajectories. For safety-critical applications with sub-optimal or inexact expert data, it would be preferable to learn only the safety aspects of the policy through imitation, while still allowing for task learning with RL. The field of inverse constrained RL, which seeks to infer constraints from expert data, is a promising step in this direction. However, prior work in this area has relied on complex tri-level optimizations in order to infer safe behavior (constraints). This challenging optimization landscape leads to sub-optimal performance on several benchmark tasks. In this work, we present a simplified version of constraint inference that performs as well or better than prior work across a collection of continuous-control benchmarks. Moreover, besides improving performance, this simplified framework is easier to implement, tune, and more readily lends itself to various extensions, such as offline constraint inference.", "keywords": "reinforcement learning;inverse reinforcement learning;safe reinforcement learning;constrained reinforcement learning", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Adriana Hugessen;Harley Wiltzer;Glen Berseth", "authorids": "~Adriana_Hugessen1;~Harley_Wiltzer1;~Glen_Berseth1", "gender": "F;M;M", "homepage": "https://ahugs.github.io/;https://harwiltz.github.io/about;http://fracturedplane.com/", "dblp": "358/6207;321/0992;147/5478", "google_scholar": "Z0q7bU0AAAAJ;;https://scholar.google.ca/citations?user=-WZcuuwAAAAJ", "orcid": ";;0000-0001-7351-8028", "linkedin": "adriana-hugessen-20a68062/;harley-wiltzer-4998547a;glen-berseth-0523278b?trk=hp-identity-name", "or_profile": "~Adriana_Hugessen1;~Harley_Wiltzer1;~Glen_Berseth1", "aff": "Universit\u00e9 de Montr\u00e9al;Mila;Montreal Institute for Learning Algorithms, University of Montreal, Universit\u00e9 de Montr\u00e9al", "aff_domain": "umontreal.ca;mila.quebec;mila.umontreal.ca", "position": "MS student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nhugessen2024simplifying,\ntitle={Simplifying Constraint Inference with Inverse Reinforcement Learning},\nauthor={Adriana Hugessen and Harley Wiltzer and Glen Berseth},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=T5Cerv7PT2}\n}", "github": "", "reviewers": "ZV2P;xRd9;iin6;fdQQ", "pdf_size": 740518, "rating": "5;6;6;7", "confidence": "3;4;3;5", "soundness": "2;2;3;3", "novelty": "2;2;3;4", "presentation": "2;2;3;4", "wc_summary": "83;135;55;86", "wc_strengths": "40;74;28;143", "wc_weaknesses": "238;899;169;183", "wc_questions": "3;111;276;231", "wc_limitations": "10;26;31;22", "wc_review": "374;1245;559;665", "wc_reply_reviewers": "10;180;22;134", "wc_reply_authors": "56;784;40;80", "reply_reviewers": "1;2;1;1", "reply_authors": "2;3;2;3", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 89.75, 28.78693279944913 ], "wc_strengths_avg": [ 71.25, 44.72904537322477 ], "wc_weaknesses_avg": [ 372.25, 305.2108902054447 ], "wc_questions_avg": [ 155.25, 106.60294320514795 ], "wc_limitations_avg": [ 22.25, 7.75806032459145 ], "wc_review_avg": [ 710.75, 325.5551988526677 ], "wc_reply_reviewers_avg": [ 86.5, 72.47585804942223 ], "wc_reply_authors_avg": [ 240.0, 314.40101781005734 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8528028654224418, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18128680724994473367&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "umontreal.ca;mila.quebec;mila.umontreal.ca", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Universit\u00e9 de Montr\u00e9al;Mila;University of Montreal", "aff_unique_dep": ";Quebec Artificial Intelligence Institute;Montreal Institute for Learning Algorithms", "aff_unique_url": "https://www.umontreal.ca;https://mila.quebec;https://www.mila.quebec", "aff_unique_abbr": "UdeM;Mila;MILA", "aff_campus_unique_index": "1", "aff_campus_unique": ";Montreal", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Canada" }, { "title": "Monte Carlo Tree Search based Space Transfer for Black Box Optimization", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95057", "id": "T5UfIfmDbq", "proceeding": "", "pdf": "https://openreview.net/pdf?id=T5UfIfmDbq", "openreview": "https://openreview.net/forum?id=T5UfIfmDbq", "poster": "/media/PosterPDFs/NeurIPS%202024/95057.png?t=1731678371.3800926", "project": "", "author_site": "Shukuan Wang, Ke Xue, Song Lei, Xiaobin Huang, Chao Qian", "tldr": "", "abstract": "Bayesian optimization (BO) is a popular method for computationally expensive black-box optimization. However, traditional BO methods need to solve new problems from scratch, leading to slow convergence. Recent studies try to extend BO to a transfer learning setup to speed up the optimization, where search space transfer is one of the most promising approaches and has shown impressive performance on many tasks. However, existing search space transfer methods either lack an adaptive mechanism or are not flexible enough, making it difficult to efficiently identify promising search space during the optimization process. In this paper, we propose a search space transfer learning method based on Monte Carlo tree search (MCTS), called MCTS-transfer, to iteratively divide, select, and optimize in a learned subspace. MCTS-transfer can not only provide a well-performing search space for warm-start but also adaptively identify and leverage the information of similar source tasks to reconstruct the search space during the optimization process. Experiments on synthetic functions, real-world problems, Design-Bench and hyper-parameter optimization show that MCTS-transfer can demonstrate superior performance compared to other search space transfer methods under different settings. Our code is available at \\url{https://github.com/lamda-bbo/mcts-transfer}.", "keywords": "Bayesian optimization;Black-box optimization;Transfer optimization", "primary_area": "optimization", "supplementary_material": "/attachment/e355200fb42327b13c091f367c749682c05aff71.zip", "author": "Shukuan Wang;Ke Xue;Lei Song;Xiaobin Huang;Chao Qian", "authorids": "~Shukuan_Wang1;~Ke_Xue1;~Lei_Song4;~Xiaobin_Huang2;~Chao_Qian1", "gender": "F;M;M;M;M", "homepage": "http://www.lamda.nju.edu.cn/wangsk/;http://www.lamda.nju.edu.cn/xuek/;http://www.lamda.nju.edu.cn/songl/;;http://www.lamda.nju.edu.cn/qianc/", "dblp": ";93/2469-1;;;84/8508-1", "google_scholar": ";78bZVOwAAAAJ;;;", "orcid": ";0000-0001-6789-2670;;;", "linkedin": ";;;https://www.linkedin.cn/incareer/in/%E6%99%93%E6%96%8C-%E9%BB%84-1b3787221;", "or_profile": "~Shukuan_Wang1;~Ke_Xue1;~Lei_Song4;~Xiaobin_Huang2;~Chao_Qian1", "aff": "Nanjing University;Nanjing University;Nanjing University;Nanjing University;Nanjing university", "aff_domain": "nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu", "position": "MS student;PhD student;MS student;MS student;Full Professor", "bibtex": "@inproceedings{\nwang2024monte,\ntitle={Monte Carlo Tree Search based Space Transfer for Black Box Optimization},\nauthor={Shukuan Wang and Ke Xue and Lei Song and Xiaobin Huang and Chao Qian},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=T5UfIfmDbq}\n}", "github": "", "reviewers": "Tjsz;jvcN;ygQe;x4CE", "pdf_size": 1538798, "rating": "6;7;7;7", "confidence": "4;4;4;3", "soundness": "3;3;4;3", "novelty": "3;3;4;3", "presentation": "3;3;3;4", "wc_summary": "77;31;68;114", "wc_strengths": "61;41;349;90", "wc_weaknesses": "92;166;355;132", "wc_questions": "6;57;2;63", "wc_limitations": "6;38;1;26", "wc_review": "242;333;775;425", "wc_reply_reviewers": "0;18;13;17", "wc_reply_authors": "0;32;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 72.5, 29.516944286290883 ], "wc_strengths_avg": [ 135.25, 124.6322089188826 ], "wc_weaknesses_avg": [ 186.25, 100.88700362286512 ], "wc_questions_avg": [ 32.0, 28.115831839019098 ], "wc_limitations_avg": [ 17.75, 14.972892172189045 ], "wc_review_avg": [ 443.75, 201.89523892355658 ], "wc_reply_reviewers_avg": [ 12.0, 7.176350047203662 ], "wc_reply_authors_avg": [ 8.0, 13.856406460551018 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17680690752996824020&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Nanjing University", "aff_unique_dep": "", "aff_unique_url": "https://www.nju.edu.cn", "aff_unique_abbr": "Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "OPERA: Automatic Offline Policy Evaluation with Re-weighted Aggregates of Multiple Estimators", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95056", "id": "T6LOGZBC2m", "proceeding": "", "pdf": "https://openreview.net/pdf?id=T6LOGZBC2m", "openreview": "https://openreview.net/forum?id=T6LOGZBC2m", "poster": "", "project": "", "author_site": "Allen Nie, Yash Chandak, Christina Yuan, Anirudhan Badrinath, Yannis Flet-Berliac, Emma Brunskill", "tldr": "", "abstract": "Offline policy evaluation (OPE) allows us to evaluate and estimate a new sequential decision-making policy's performance by leveraging historical interaction data collected from other policies. Evaluating a new policy online without a confident estimate of its performance can lead to costly, unsafe, or hazardous outcomes, especially in education and healthcare. Several OPE estimators have been proposed in the last decade, many of which have hyperparameters and require training. Unfortunately, choosing the best OPE algorithm for each task and domain is still unclear. In this paper, we propose a new algorithm that adaptively blends a set of OPE estimators given a dataset without relying on an explicit selection using a statistical procedure. We prove that our estimator is consistent and satisfies several desirable properties for policy evaluation. Additionally, we demonstrate that when compared to alternative approaches, our estimator can be used to select higher-performing policies in healthcare and robotics. Our work contributes to improving ease of use for a general-purpose, estimator-agnostic, off-policy evaluation framework for offline RL.", "keywords": "offline reinforcement learning;off policy evaluation;statistics;ensemble method", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/77dfeec8fd2d974bd2d89a41e9e2c9d7be8ae5d4.zip", "author": "Allen Nie;Yash Chandak;Christina J. Yuan;Anirudhan Badrinath;Yannis Flet-Berliac;Emma Brunskill", "authorids": "~Allen_Nie1;~Yash_Chandak1;~Christina_J._Yuan1;~Anirudhan_Badrinath1;~Yannis_Flet-Berliac1;~Emma_Brunskill2", "gender": "M;;;;;", "homepage": "https://anie.me;https://yashchandak.github.io/;;;https://ynns.io/;", "dblp": "207/7996;168/8450;;;239/5247;", "google_scholar": "r90OelAAAAAJ;AsgUcSEAAAAJ;;;https://scholar.google.fr/citations?user=qclRKHoAAAAJ;", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Allen_Nie1;~Yash_Chandak1;~Christina_J._Yuan1;~Anirudhan_Badrinath1;~Yannis_Flet-Berliac1;~Emma_Brunskill2", "aff": "Google DeepMind;Computer Science Department, Stanford University;;;InstaDeep;", "aff_domain": "google.com;cs.stanford.edu;;;instadeep.com;", "position": "Intern;Postdoc;;;Researcher;", "bibtex": "@inproceedings{\nnie2024opera,\ntitle={{OPERA}: Automatic Offline Policy Evaluation with Re-weighted Aggregates of Multiple Estimators},\nauthor={Allen Nie and Yash Chandak and Christina J. Yuan and Anirudhan Badrinath and Yannis Flet-Berliac and Emma Brunskill},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=T6LOGZBC2m}\n}", "github": "", "reviewers": "6uiB;Rbow;QbuQ;BP3B", "pdf_size": 654751, "rating": "6;6;7;8", "confidence": "3;4;4;3", "soundness": "4;3;3;4", "novelty": "3;2;3;3", "presentation": "3;4;4;4", "wc_summary": "98;83;28;205", "wc_strengths": "75;139;64;308", "wc_weaknesses": "132;141;141;41", "wc_questions": "104;75;64;30", "wc_limitations": "1;27;15;99", "wc_review": "410;465;312;683", "wc_reply_reviewers": "15;0;25;10", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 103.5, 64.1346240341362 ], "wc_strengths_avg": [ 146.5, 97.541017013357 ], "wc_weaknesses_avg": [ 113.75, 42.16263155923738 ], "wc_questions_avg": [ 68.25, 26.479945241635225 ], "wc_limitations_avg": [ 35.5, 37.79880950506246 ], "wc_review_avg": [ 467.5, 135.95311691903206 ], "wc_reply_reviewers_avg": [ 12.5, 9.013878188659973 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:-RXsOS1dReEJ:scholar.google.com/&scioq=OPERA:+Automatic+Offline+Policy+Evaluation+with+Re-weighted+Aggregates+of+Multiple+Estimators&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "google.com;cs.stanford.edu;;;instadeep.com;", "author_num": 6, "aff_unique_index": "0;1;2", "aff_unique_norm": "Google;Stanford University;InstaDeep", "aff_unique_dep": "Google DeepMind;Computer Science Department;", "aff_unique_url": "https://deepmind.com;https://www.stanford.edu;https://www.instadeep.com", "aff_unique_abbr": "DeepMind;Stanford;InstaDeep", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United Kingdom;United States" }, { "title": "Conformal Prediction for Class-wise Coverage via Augmented Label Rank Calibration", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95055", "id": "T7dS1Ghwwu", "proceeding": "", "pdf": "https://openreview.net/pdf?id=T7dS1Ghwwu", "openreview": "https://openreview.net/forum?id=T7dS1Ghwwu", "poster": "", "project": "", "author_site": "Yuanjie Shi, Subhankar Ghosh, Taha Belkhouja, Jana Doppa, Yan Yan", "tldr": "", "abstract": "Conformal prediction (CP) is an emerging uncertainty quantification framework that allows us to construct a prediction set to cover the true label with a pre-specified marginal or conditional probability.\nAlthough the valid coverage guarantee has been extensively studied for classification problems, CP often produces large prediction sets which may not be practically useful.\nThis issue is exacerbated for the setting of class-conditional coverage on imbalanced classification tasks with many and/or imbalanced classes.\nThis paper proposes the Rank Calibrated Class-conditional CP (RC3P) algorithm to reduce the prediction set sizes to achieve class-conditional coverage, where the valid coverage holds for each class.\nIn contrast to the standard class-conditional CP (CCP) method that uniformly thresholds the class-wise conformity score for each class, the augmented label rank calibration step allows RC3P to selectively iterate this class-wise thresholding subroutine only for a subset of classes whose class-wise top-$k$ error is small.\nWe prove that agnostic to the classifier and data distribution, RC3P achieves class-wise coverage. We also show that RC3P reduces the size of prediction sets compared to the CCP method. \nComprehensive experiments on multiple real-world datasets demonstrate that RC3P achieves class-wise coverage and $26.25\\\\%$ $\\downarrow$ reduction in prediction set sizes on average.", "keywords": "Uncertainty Quantification;Conformal Prediction;Imbalanced Data;Class-conditional Coverage;Deep Models", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/d97977b8f4268958bac180fa8fa6f8d36e606c2b.zip", "author": "Yuanjie Shi;SUBHANKAR GHOSH;Taha Belkhouja;Jana Doppa;Yan Yan", "authorids": "~Yuanjie_Shi1;~SUBHANKAR_GHOSH1;~Taha_Belkhouja1;~Jana_Doppa1;~Yan_Yan3", "gender": "F;M;M;;M", "homepage": "https://yuanjiesh.github.io/Yuanjie-Shi/;https://subhankarghoshss.github.io/website/;https://tahabelkhouja.github.io/;;http://iemppu.github.io/", "dblp": "353/2390;;211/8999;;13/3953-6", "google_scholar": "A8PWiEYAAAAJ;1Q73N6IAAAAJ;JPBgQOoAAAAJ;;A6co_BAAAAAJ", "orcid": ";;0000-0001-8749-6632;;0000-0001-9108-6767", "linkedin": ";subhankar-ghosh-11701a169/;;;", "or_profile": "~Yuanjie_Shi1;~SUBHANKAR_GHOSH1;~Taha_Belkhouja1;~Jana_Doppa1;~Yan_Yan3", "aff": "Washington State University;Washington State University at Pullman;Washington State University, Pullman;;Washington State University, Pullman", "aff_domain": "wsu.edu;wsu.edu;wsu.edu;;wsu.edu", "position": "PhD student;PhD student;PhD student;;Assistant Professor", "bibtex": "@inproceedings{\nshi2024conformal,\ntitle={Conformal Prediction for Class-wise Coverage via Augmented Label Rank Calibration},\nauthor={Yuanjie Shi and SUBHANKAR GHOSH and Taha Belkhouja and Jana Doppa and Yan Yan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=T7dS1Ghwwu}\n}", "github": "", "reviewers": "yaTg;2LUb;vcMw;Bzsm", "pdf_size": 2256926, "rating": "4;6;6;6", "confidence": "3;5;3;4", "soundness": "3;3;3;2", "novelty": "2;3;3;2", "presentation": "3;3;2;3", "wc_summary": "27;84;42;64", "wc_strengths": "43;105;13;50", "wc_weaknesses": "69;123;73;162", "wc_questions": "46;42;19;53", "wc_limitations": "2;1;1;4", "wc_review": "187;355;148;333", "wc_reply_reviewers": "26;16;10;99", "wc_reply_authors": "0;0;0;755", "reply_reviewers": "1;1;1;2", "reply_authors": "1;1;1;3", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 54.25, 21.637640814099857 ], "wc_strengths_avg": [ 52.75, 33.214266513051285 ], "wc_weaknesses_avg": [ 106.75, 38.34302413738384 ], "wc_questions_avg": [ 40.0, 12.747548783981962 ], "wc_limitations_avg": [ 2.0, 1.224744871391589 ], "wc_review_avg": [ 255.75, 89.65872796331655 ], "wc_reply_reviewers_avg": [ 37.75, 35.82160660830276 ], "wc_reply_authors_avg": [ 188.75, 326.9245899286256 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=492925889757373255&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "wsu.edu;wsu.edu;wsu.edu;;wsu.edu", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Washington State University", "aff_unique_dep": "", "aff_unique_url": "https://wsu.edu", "aff_unique_abbr": "WSU", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Pullman", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Federated Online Prediction from Experts with Differential Privacy: Separations and Regret Speed-ups", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95054", "id": "T826pwZLci", "proceeding": "", "pdf": "https://openreview.net/pdf?id=T826pwZLci", "openreview": "https://openreview.net/forum?id=T826pwZLci", "poster": "", "project": "", "author_site": "Fengyu Gao, Ruiquan Huang, Jing Yang", "tldr": "", "abstract": "We study the problems of differentially private federated online prediction from experts against both *stochastic adversaries* and *oblivious adversaries*. We aim to minimize the average regret on $m$ clients working in parallel over time horizon $T$ with explicit differential privacy (DP) guarantees. With stochastic adversaries, we propose a **Fed-DP-OPE-Stoch** algorithm that achieves $\\sqrt{m}$-fold speed-up of the per-client regret compared to the single-player counterparts under both pure DP and approximate DP constraints, while maintaining logarithmic communication costs. With oblivious adversaries, we establish non-trivial lower bounds indicating that *collaboration among clients does not lead to regret speed-up with general oblivious adversaries*. We then consider a special case of the oblivious adversaries setting, where there exists a low-loss expert. We design a new algorithm **Fed-SVT** and show that it achieves an $m$-fold regret speed-up under both pure DP and approximate DP constraints over the single-player counterparts. Our lower bound indicates that Fed-SVT is nearly optimal up to logarithmic factors. Experiments demonstrate the effectiveness of our proposed algorithms. To the best of our knowledge, this is the first work examining the differentially private online prediction from experts in the federated setting.", "keywords": "online learning;federated learning;differential privacy", "primary_area": "privacy", "supplementary_material": "/attachment/8e6340a922b28eb7a537abb4bd54712498599db7.zip", "author": "Fengyu Gao;Ruiquan Huang;Jing Yang", "authorids": "~Fengyu_Gao1;~Ruiquan_Huang1;~Jing_Yang3", "gender": ";M;", "homepage": "https://gfengyu.github.io;;http://www.ee.psu.edu/yang", "dblp": ";304/8880;", "google_scholar": "ppyx_LkAAAAJ;0eo3JGgAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;", "linkedin": ";ruiquan-huang-369543185/;", "or_profile": "~Fengyu_Gao1;~Ruiquan_Huang1;~Jing_Yang3", "aff": "Pennsylvania State University;Pennsylvania State University;Pennsylvania State University", "aff_domain": "psu.edu;psu.edu;psu.edu", "position": "PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\ngao2024federated,\ntitle={Federated Online Prediction from Experts with Differential Privacy: Separations and Regret Speed-ups},\nauthor={Fengyu Gao and Ruiquan Huang and Jing Yang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=T826pwZLci}\n}", "github": "", "reviewers": "RFE1;NyWa;hhm9;H7Yw", "pdf_size": 1150053, "rating": "5;6;7;7", "confidence": "3;3;3;2", "soundness": "3;3;4;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "56;84;241;103", "wc_strengths": "53;88;153;53", "wc_weaknesses": "67;102;83;63", "wc_questions": "155;38;82;26", "wc_limitations": "32;1;12;43", "wc_review": "363;313;571;288", "wc_reply_reviewers": "49;24;26;0", "wc_reply_authors": "637;88;72;50", "reply_reviewers": "1;1;1;0", "reply_authors": "5;3;3;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 121.0, 71.270611053926 ], "wc_strengths_avg": [ 86.75, 40.831207427652686 ], "wc_weaknesses_avg": [ 78.75, 15.368392889303683 ], "wc_questions_avg": [ 75.25, 50.543916547889324 ], "wc_limitations_avg": [ 22.0, 16.446884203398525 ], "wc_review_avg": [ 383.75, 111.43019115123154 ], "wc_reply_reviewers_avg": [ 24.75, 17.340343133859836 ], "wc_reply_authors_avg": [ 211.75, 245.88856724134206 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.25, 1.0897247358851685 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:zyHDKZB5A7sJ:scholar.google.com/&scioq=Federated+Online+Prediction+from+Experts+with+Differential+Privacy:+Separations+and+Regret+Speed-ups&hl=en&as_sdt=0,33", "gs_version_total": 5, "email": "psu.edu;psu.edu;psu.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Pennsylvania State University", "aff_unique_dep": "", "aff_unique_url": "https://www.psu.edu", "aff_unique_abbr": "PSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Layer-Adaptive State Pruning for Deep State Space Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95053", "id": "T9GbbWbNQG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=T9GbbWbNQG", "openreview": "https://openreview.net/forum?id=T9GbbWbNQG", "poster": "/media/PosterPDFs/NeurIPS%202024/95053.png?t=1733118296.6109235", "project": "", "author_site": "Minseon Gwak, Seongrok Moon, Joohwan Ko, PooGyeon Park", "tldr": "", "abstract": "Due to the lack of state dimension optimization methods, deep state space models (SSMs) have sacrificed model capacity, training search space, or stability to alleviate computational costs caused by high state dimensions. In this work, we provide a structured pruning method for SSMs, Layer-Adaptive STate pruning (LAST), which reduces the state dimension of each layer in minimizing model-level output energy loss by extending modal truncation for a single system. LAST scores are evaluated using the $\\mathcal{H}_{\\infty}$ norms of subsystems and layer-wise energy normalization. The scores serve as global pruning criteria, enabling cross-layer comparison of states and layer-adaptive pruning. Across various sequence benchmarks, LAST optimizes previous SSMs, revealing the redundancy and compressibility of their state spaces. Notably, we demonstrate that, on average, pruning 33\\% of states still maintains performance with 0.52\\% accuracy loss in multi-input multi-output SSMs without retraining. Code is available at https://github.com/msgwak/LAST.", "keywords": "Sequence model;model order reduction;model compression;network pruning;system norm;long range arena", "primary_area": "other", "supplementary_material": "", "author": "Minseon Gwak;Seongrok Moon;Joohwan Ko;PooGyeon Park", "authorids": "~Minseon_Gwak1;~Seongrok_Moon1;~Joohwan_Ko2;~PooGyeon_Park1", "gender": "F;M;M;M", "homepage": "https://msgwak.github.io;https://spac.postech.ac.kr/;https://joohwanko.com/;", "dblp": ";;358/5976;", "google_scholar": "https://scholar.google.co.kr/citations?user=Wcmn2ucAAAAJ;;;ktTQiqsAAAAJ", "orcid": "0000-0002-0295-0152;0009-0000-6669-6635;;", "linkedin": "minseon-gwak-1a2ab833b/;;;", "or_profile": "~Minseon_Gwak1;~Seongrok_Moon1;~Joohwan_Ko2;~PooGyeon_Park1", "aff": "Pohang University of Science and Technology;Pohang University of Science and Technology;Korea Advanced Institute of Science & Technology;Pohang University of Science and Technology", "aff_domain": "postech.ac.kr;postech.ac.kr;kaist.edu;postech.ac.kr", "position": "PhD student;MS student;MS student;Full Professor", "bibtex": "@inproceedings{\ngwak2024layeradaptive,\ntitle={Layer-Adaptive State Pruning for Deep State Space Models},\nauthor={Minseon Gwak and Seongrok Moon and Joohwan Ko and PooGyeon Park},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=T9GbbWbNQG}\n}", "github": "", "reviewers": "y832;xVFN;c1Mv", "pdf_size": 810299, "rating": "5;6;6", "confidence": "2;2;3", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "3;2;3", "wc_summary": "54;74;52", "wc_strengths": "24;18;35", "wc_weaknesses": "37;17;68", "wc_questions": "20;10;60", "wc_limitations": "7;1;1", "wc_review": "142;120;216", "wc_reply_reviewers": "66;137;18", "wc_reply_authors": "438;1023;0", "reply_reviewers": "1;1;1", "reply_authors": "2;3;1", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 2.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 60.0, 9.93310961716756 ], "wc_strengths_avg": [ 25.666666666666668, 7.039570693980958 ], "wc_weaknesses_avg": [ 40.666666666666664, 20.98147330914162 ], "wc_questions_avg": [ 30.0, 21.602468994692867 ], "wc_limitations_avg": [ 3.0, 2.8284271247461903 ], "wc_review_avg": [ 159.33333333333334, 41.063636251825315 ], "wc_reply_reviewers_avg": [ 73.66666666666667, 48.883080463035014 ], "wc_reply_authors_avg": [ 487.0, 419.0727860408022 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:oYN6_tnOWiAJ:scholar.google.com/&scioq=Layer-Adaptive+State+Pruning+for+Deep+State+Space+Models&hl=en&as_sdt=0,33", "gs_version_total": 5, "email": "postech.ac.kr;postech.ac.kr;kaist.edu;postech.ac.kr", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Pohang University of Science and Technology;Korea Advanced Institute of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.postech.ac.kr;https://www.kaist.ac.kr", "aff_unique_abbr": "POSTECH;KAIST", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Pohang;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "HHD-GP: Incorporating Helmholtz-Hodge Decomposition into Gaussian Processes for Learning Dynamical Systems", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95052", "id": "T9PfJViMiJ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=T9PfJViMiJ", "openreview": "https://openreview.net/forum?id=T9PfJViMiJ", "poster": "", "project": "", "author_site": "Hao Xu, Jia Pan", "tldr": "", "abstract": "Machine learning models provide alternatives for efficiently recognizing complex patterns from data, but the main concern in applying them to modeling physical systems stems from their physics-agnostic design, leading to learning methods that lack interpretability, robustness, and data efficiency. This paper mitigates this concern by incorporating the Helmholtz-Hodge decomposition into a Gaussian process model, leading to a versatile framework that simultaneously learns the curl-free and divergence-free components of a dynamical system. Learning a predictive model in this form facilitates the exploitation of symmetry priors. In addition to improving predictive power, these priors make the model indentifiable, thus the identified features can be linked to comprehensible scientific properties of the system. We show that compared to baseline models, our model achieves better predictive performance on several benchmark dynamical systems while allowing physically meaningful decomposition of the systems from noisy and sparse data.", "keywords": "Gaussian process;Helmholtz-Hodge decomposition;dynamical system", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "", "author": "Hao Xu;Jia Pan", "authorids": "~Hao_Xu16;~Jia_Pan1", "gender": "M;M", "homepage": ";https://www.cs.hku.hk/people/academic-staff/jpan", "dblp": ";97/896", "google_scholar": ";YYT8-7kAAAAJ", "orcid": "0000-0003-3203-3272;", "linkedin": ";", "or_profile": "~Hao_Xu16;~Jia_Pan1", "aff": "University of Hong Kong;University of Hong Kong", "aff_domain": "hku.hk;hku.hk", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nxu2024hhdgp,\ntitle={{HHD}-{GP}: Incorporating Helmholtz-Hodge Decomposition into Gaussian Processes for Learning Dynamical Systems},\nauthor={Hao Xu and Jia Pan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=T9PfJViMiJ}\n}", "github": "", "reviewers": "KheP;w6Wk;X44d;DJX1", "pdf_size": 2523128, "rating": "6;6;7;7", "confidence": "3;4;4;3", "soundness": "3;3;4;3", "novelty": "2;3;4;3", "presentation": "3;3;4;3", "wc_summary": "70;66;185;57", "wc_strengths": "109;28;191;15", "wc_weaknesses": "184;17;157;74", "wc_questions": "146;115;77;152", "wc_limitations": "9;14;88;1", "wc_review": "518;240;698;299", "wc_reply_reviewers": "68;32;60;33", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 94.5, 52.461890930464946 ], "wc_strengths_avg": [ 85.75, 70.63772009344582 ], "wc_weaknesses_avg": [ 108.0, 66.35887280537547 ], "wc_questions_avg": [ 122.5, 29.78674201721296 ], "wc_limitations_avg": [ 28.0, 34.94996423460259 ], "wc_review_avg": [ 438.75, 182.01837132553405 ], "wc_reply_reviewers_avg": [ 48.25, 16.005858302509115 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:DguquFQYhOwJ:scholar.google.com/&scioq=HHD-GP:+Incorporating+Helmholtz-Hodge+Decomposition+into+Gaussian+Processes+for+Learning+Dynamical+Systems&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": "hku.hk;hku.hk", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Hong Kong", "aff_unique_dep": "", "aff_unique_url": "https://www.hku.hk", "aff_unique_abbr": "HKU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "B-cosification: Transforming Deep Neural Networks to be Inherently Interpretable", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95051", "id": "TA5zPfH8iI", "proceeding": "", "pdf": "https://openreview.net/pdf?id=TA5zPfH8iI", "openreview": "https://openreview.net/forum?id=TA5zPfH8iI", "poster": "/media/PosterPDFs/NeurIPS%202024/95051.png?t=1733720266.7038476", "project": "", "author_site": "Shreyash Arya, Sukrut Rao, Moritz B\u00f6hle, Bernt Schiele", "tldr": "", "abstract": "B-cos Networks have been shown to be effective for obtaining highly human interpretable explanations of model decisions by architecturally enforcing stronger alignment between inputs and weight. B-cos variants of convolutional networks (CNNs) and vision transformers (ViTs), which primarily replace linear layers with B-cos transformations, perform competitively to their respective standard variants while also yielding explanations that are faithful by design. However, it has so far been necessary to train these models from scratch, which is increasingly infeasible in the era of large, pre-trained foundation models. In this work, inspired by the architectural similarities in standard DNNs and B-cos networks, we propose \u2018B-cosification\u2019, a novel approach to transform existing pre-trained models to become inherently interpretable. We perform a thorough study of design choices to perform this conversion, both for convolutional neural networks and vision transformers. We find that B-cosification can yield models that are on par with B-cos models trained from scratch in terms of interpretability, while often outperforming them in terms of classification performance at a fraction of the training cost. Subsequently, we apply B-cosification to a pretrained CLIP model, and show that, even with limited data and compute cost, we obtain a B-cosified version that is highly interpretable and competitive on zero shot performance across a variety of datasets. We release our\ncode and pre-trained model weights at https://github.com/shrebox/B-cosification.", "keywords": "B-cos networks;Explainability;Inherent Interpretability;CLIP", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Shreyash Arya;Sukrut Rao;Moritz B\u00f6hle;Bernt Schiele", "authorids": "~Shreyash_Arya1;~Sukrut_Rao1;~Moritz_B\u00f6hle1;~Bernt_Schiele1", "gender": ";M;;M", "homepage": ";https://sukrutrao.github.io;https://moboehle.github.io/personal_info/;http://www.mpi-inf.mpg.de/~schiele", "dblp": "274/3099;217/3276;289/5742;s/BerntSchiele", "google_scholar": "AmWtEfEAAAAJ;zefwfv4AAAAJ;Kn3znAMAAAAJ;https://scholar.google.de/citations?user=z76PBfYAAAAJ", "orcid": "0000-0002-2108-4579;0000-0001-8896-7619;0000-0002-5479-3769;0000-0001-9683-5237", "linkedin": "shrebox/;sukrutrao/;moritz-b%C3%B6hle-5877b9140/;", "or_profile": "~Shreyash_Arya1;~Sukrut_Rao1;~Moritz_B\u00f6hle1;~Bernt_Schiele1", "aff": "Universit\u00e4t des Saarlandes;Universit\u00e4t des Saarlandes;Saarland Informatics Campus, Max-Planck Institute;Max Planck Institute for Informatics, Saarland Informatics Campus", "aff_domain": "uni-saarland.de;uni-saarland.de;mpi-inf.mpg.de;mpi-inf.mpg.de", "position": "MS student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\narya2024bcosification,\ntitle={B-cosification: Transforming Deep Neural Networks to be Inherently Interpretable},\nauthor={Shreyash Arya and Sukrut Rao and Moritz B{\\\"o}hle and Bernt Schiele},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=TA5zPfH8iI}\n}", "github": "", "reviewers": "71N9;JC7z;DdYE;Z7HS", "pdf_size": 26397531, "rating": "6;7;7;7", "confidence": "4;4;4;4", "soundness": "3;3;4;3", "novelty": "2;3;3;3", "presentation": "3;3;4;2", "wc_summary": "48;103;123;53", "wc_strengths": "58;136;76;34", "wc_weaknesses": "186;329;95;29", "wc_questions": "5;182;54;1533", "wc_limitations": "10;22;12;1", "wc_review": "307;772;360;1650", "wc_reply_reviewers": "31;154;55;160", "wc_reply_authors": "0;0;0;543", "reply_reviewers": "1;2;1;1", "reply_authors": "1;1;1;2", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 81.75, 32.08874413248359 ], "wc_strengths_avg": [ 76.0, 37.70941526992961 ], "wc_weaknesses_avg": [ 159.75, 112.4974999722216 ], "wc_questions_avg": [ 443.5, 632.3339703036679 ], "wc_limitations_avg": [ 11.25, 7.46240577829965 ], "wc_review_avg": [ 772.25, 537.7854474602302 ], "wc_reply_reviewers_avg": [ 100.0, 57.66714836022326 ], "wc_reply_authors_avg": [ 135.75, 235.1258971274751 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15139362618915978448&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "uni-saarland.de;uni-saarland.de;mpi-inf.mpg.de;mpi-inf.mpg.de", "author_num": 4, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Universit\u00e4t des Saarlandes;Max-Planck Institute;Max Planck Institute for Informatics", "aff_unique_dep": ";Informatics;", "aff_unique_url": "https://www.uni-saarland.de;https://www.mpi-sws.org;https://mpi-inf.mpg.de", "aff_unique_abbr": "UDS;MPI-SWS;MPII", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Saarland", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "title": "Deep Bayesian Active Learning for Preference Modeling in Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95050", "id": "TADTT9ughN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=TADTT9ughN", "openreview": "https://openreview.net/forum?id=TADTT9ughN", "poster": "/media/PosterPDFs/NeurIPS%202024/95050.png?t=1732911221.0296938", "project": "", "author_site": "Luckeciano Carvalho Melo, Panagiotis Tigas, Alessandro Abate, Yarin Gal", "tldr": "", "abstract": "Leveraging human preferences for steering the behavior of Large Language Models (LLMs) has demonstrated notable success in recent years. Nonetheless, data selection and labeling are still a bottleneck for these systems, particularly at large scale. Hence, selecting the most informative points for acquiring human feedback may considerably reduce the cost of preference labeling and unleash the further development of LLMs. Bayesian Active Learning provides a principled framework for addressing this challenge and has demonstrated remarkable success in diverse settings. However, previous attempts to employ it for Preference Modeling did not meet such expectations. In this work, we identify that naive epistemic uncertainty estimation leads to the acquisition of redundant samples. We address this by proposing the Bayesian Active Learner for Preference Modeling (BAL-PM), a novel stochastic acquisition policy that not only targets points of high epistemic uncertainty according to the preference model but also seeks to maximize the entropy of the acquired prompt distribution in the feature space spanned by the employed LLM. Notably, our experiments demonstrate that BAL-PM requires 33\\% to 68\\% fewer preference labels in two popular human preference datasets and exceeds previous stochastic Bayesian acquisition policies.", "keywords": "Bayesian Active Learning;Preference Modeling;Language Models", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Luckeciano Carvalho Melo;Panagiotis Tigas;Alessandro Abate;Yarin Gal", "authorids": "~Luckeciano_Carvalho_Melo1;~Panagiotis_Tigas1;~Alessandro_Abate1;~Yarin_Gal1", "gender": "M;M;;", "homepage": "https://luckeciano.github.io;https://www.cs.ox.ac.uk/people/alessandro.abate/;http://www.cs.ox.ac.uk/people/yarin.gal/website//;https://ptigas.com", "dblp": "234/6096;19/3904;67/9076;159/7244", "google_scholar": "b2aBi8UAAAAJ;https://scholar.google.co.uk/citations?hl=en;https://scholar.google.co.uk/citations?user=SIayDoQAAAAJ;https://scholar.google.co.uk/citations?user=E9ITYW0AAAAJ", "orcid": ";0000-0002-5627-9093;;0000-0001-9944-1129", "linkedin": ";;;", "or_profile": "~Luckeciano_Carvalho_Melo1;~Alessandro_Abate1;~Yarin_Gal1;~Panagiotis_Tigkas1", "aff": "University of Oxford;University of Oxford;University of Oxford;Isomorphic Labs (Alphabet entity)", "aff_domain": "ox.ac.uk;ox.ac.uk;ox.ac.uk;google.com", "position": "PhD student;Full Professor;Associate Professor;Researcher", "bibtex": "@inproceedings{\nmelo2024deep,\ntitle={Deep Bayesian Active Learning for Preference Modeling in Large Language Models},\nauthor={Luckeciano Carvalho Melo and Panagiotis Tigas and Alessandro Abate and Yarin Gal},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=TADTT9ughN}\n}", "github": "", "reviewers": "tUWV;vibJ;aVyo;x99G", "pdf_size": 2436924, "rating": "5;6;6;7", "confidence": "3;4;4;3", "soundness": "3;4;3;4", "novelty": "2;3;3;3", "presentation": "3;4;3;4", "wc_summary": "104;95;92;91", "wc_strengths": "47;63;37;48", "wc_weaknesses": "224;397;84;53", "wc_questions": "71;138;28;22", "wc_limitations": "15;36;24;13", "wc_review": "461;729;265;227", "wc_reply_reviewers": "76;215;16;32", "wc_reply_authors": "63;33;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 95.5, 5.123475382979799 ], "wc_strengths_avg": [ 48.75, 9.283722313813572 ], "wc_weaknesses_avg": [ 189.5, 136.02297600038017 ], "wc_questions_avg": [ 64.75, 46.32156625158523 ], "wc_limitations_avg": [ 22.0, 9.082951062292475 ], "wc_review_avg": [ 420.5, 199.0194714092066 ], "wc_reply_reviewers_avg": [ 84.75, 78.34339474390933 ], "wc_reply_authors_avg": [ 24.0, 26.239283526803852 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11552214850993906049&as_sdt=4005&sciodt=0,6&hl=en", "gs_version_total": 4, "email": "ox.ac.uk;ox.ac.uk;ox.ac.uk;google.com", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "University of Oxford;Isomorphic Labs", "aff_unique_dep": ";", "aff_unique_url": "https://www.ox.ac.uk;https://isomorphiclabs.com", "aff_unique_abbr": "Oxford;Isomorphic Labs", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "LaSCal: Label-Shift Calibration without target labels", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95049", "id": "TALJtWX7w4", "proceeding": "", "pdf": "https://openreview.net/pdf?id=TALJtWX7w4", "openreview": "https://openreview.net/forum?id=TALJtWX7w4", "poster": "/media/PosterPDFs/NeurIPS%202024/95049.png?t=1733401404.9166408", "project": "", "author_site": "Teodora Popordanoska, Gorjan Radevski, Tinne Tuytelaars, Matthew Blaschko", "tldr": "", "abstract": "When machine learning systems face dataset shift, model calibration plays a pivotal role in ensuring their reliability.\nCalibration error (CE) provides insights into the alignment between the predicted confidence scores and the classifier accuracy.\nWhile prior works have delved into the implications of dataset shift on calibration, existing CE estimators either (i) assume access to labeled data from the target domain, often unavailable in practice, or (ii) are derived under a covariate shift assumption.\nIn this work we propose a novel, label-free, consistent CE estimator under label shift. Label shift is characterized by changes in the marginal label distribution p(Y), with a constant conditional p(X|Y) distribution between the source and target. We introduce a novel calibration method, called LaSCal, which uses the estimator in conjunction with a post-hoc calibration strategy, to perform unsupervised calibration on the target distribution. Our thorough empirical analysis demonstrates the effectiveness and reliability of the proposed approach across different modalities, model architectures and label shift intensities.", "keywords": "uncertainty calibration;calibration error estimation;label-shift;domain adaptation", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Teodora Popordanoska;Gorjan Radevski;Tinne Tuytelaars;Matthew B. Blaschko", "authorids": "~Teodora_Popordanoska1;~Gorjan_Radevski1;~Tinne_Tuytelaars1;~Matthew_B._Blaschko1", "gender": ";M;;", "homepage": "https://tpopordanoska.github.io/;https://gorjanradevski.github.io/personal/about/;;", "dblp": "270/8007;278/2232;;", "google_scholar": "B2YV6zIAAAAJ;GKJnjkgAAAAJ;;", "orcid": "0000-0002-1436-2286;;;", "linkedin": "tpopordanoska/;gorjan-radevski/;;", "or_profile": "~Teodora_Popordanoska1;~Gorjan_Radevski1;~Tinne_Tuytelaars1;~Matthew_B._Blaschko1", "aff": "KU Leuven;Department of Electrical Engineering, KU Leuven, Belgium, KU Leuven;;", "aff_domain": "kuleuven.be;esat.kuleuven.be;;", "position": "PhD student;PhD student;;", "bibtex": "@inproceedings{\npopordanoska2024lascal,\ntitle={La{SC}al: Label-Shift Calibration without target labels},\nauthor={Teodora Popordanoska and Gorjan Radevski and Tinne Tuytelaars and Matthew B. Blaschko},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=TALJtWX7w4}\n}", "github": "", "reviewers": "z7nx;Mi4F;vmtH;Lv5P", "pdf_size": 564734, "rating": "5;6;6;7", "confidence": "3;4;4;4", "soundness": "3;3;4;2", "novelty": "3;3;2;3", "presentation": "3;3;3;3", "wc_summary": "59;50;121;66", "wc_strengths": "10;57;68;81", "wc_weaknesses": "72;36;82;529", "wc_questions": "11;91;132;60", "wc_limitations": "1;4;9;2", "wc_review": "153;238;412;738", "wc_reply_reviewers": "20;15;41;94", "wc_reply_authors": "16;28;20;55", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 74.0, 27.721832551258224 ], "wc_strengths_avg": [ 54.0, 26.78619047195775 ], "wc_weaknesses_avg": [ 179.75, 202.36399753908796 ], "wc_questions_avg": [ 73.5, 44.206899913927465 ], "wc_limitations_avg": [ 4.0, 3.082207001484488 ], "wc_review_avg": [ 385.25, 224.03724578739136 ], "wc_reply_reviewers_avg": [ 42.5, 31.2929704566377 ], "wc_reply_authors_avg": [ 29.75, 15.20485119953497 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9984978082427862410&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "kuleuven.be;esat.kuleuven.be;;", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Katholieke Universiteit Leuven;KU Leuven", "aff_unique_dep": ";Department of Electrical Engineering", "aff_unique_url": "https://www.kuleuven.be;https://www.kuleuven.be", "aff_unique_abbr": "KU Leuven;KU Leuven", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Belgium" }, { "title": "Generated and Pseudo Content guided Prototype Refinement for Few-shot Point Cloud Segmentation", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95048", "id": "TBVLQjdFcA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=TBVLQjdFcA", "openreview": "https://openreview.net/forum?id=TBVLQjdFcA", "poster": "/media/PosterPDFs/NeurIPS%202024/95048.png?t=1731433068.2229075", "project": "", "author_site": "Lili Wei, Congyan Lang, Ziyi Chen, Tao Wang, Yidong Li, Jun Liu", "tldr": "", "abstract": "Few-shot 3D point cloud semantic segmentation aims to segment query point clouds with only a few annotated support point clouds. Existing prototype-based methods learn prototypes from the 3D support set to guide the segmentation of query point clouds. However, they encounter the challenge of low prototype quality due to constrained semantic information in the 3D support set and class information bias between support and query sets. To address these issues, in this paper, we propose a novel framework called Generated and Pseudo Content guided Prototype Refinement (GPCPR), which explicitly leverages LLM-generated content and reliable query context to enhance prototype quality. GPCPR achieves prototype refinement through two core components: LLM-driven Generated Content-guided Prototype Refinement (GCPR) and Pseudo Query Context-guided Prototype Refinement (PCPR). Specifically, GCPR integrates diverse and differentiated class descriptions generated by large language models to enrich prototypes with comprehensive semantic knowledge. PCPR further aggregates reliable class-specific pseudo-query context to mitigate class information bias and generate more suitable query-specific prototypes. Furthermore, we introduce a dual-distillation regularization term, enabling knowledge transfer between early-stage entities (prototypes or pseudo predictions) and their deeper counterparts to enhance refinement. Extensive experiments demonstrate the superiority of our method, surpassing the state-of-the-art methods by up to 12.10% and 13.75% mIoU on S3DIS and ScanNet, respectively.", "keywords": "3D point cloud segmentation; few shot learning; Large Language Model", "primary_area": "machine_vision", "supplementary_material": "", "author": "Lili Wei;Congyan Lang;Ziyi Chen;Tao Wang;Yidong Li;Jun Liu", "authorids": "~Lili_Wei1;~Congyan_Lang2;~Ziyi_Chen15;~Tao_Wang1;~Yidong_Li1;~Jun_Liu8", "gender": "F;F;F;M;M;M", "homepage": ";http://faculty.bjtu.edu.cn/7968/;;;;", "dblp": ";89/4275;;12/5838-11;40/7652.html;95/3736-36", "google_scholar": ";;;https://scholar.google.com.hk/citations?user=F3C5oAcAAAAJ;;Q5Ild8UAAAAJ", "orcid": "0000-0002-3905-8651;;0009-0005-8310-6779;;;", "linkedin": ";;;;;", "or_profile": "~Lili_Wei1;~Congyan_Lang2;~Ziyi_Chen15;~Tao_Wang1;~Yidong_Li1;~Jun_Liu8", "aff": "Beijing Jiaotong university;Beijing jiaotong university;Beijing Jiaotong University;Beijing Jiaotong University;Beijing Jiaotong University;Singapore University of Technology and Design", "aff_domain": "bjtu.edu.cn;bjtu.edu.cn;bjtu.edu.cn;bjtu.edu.cn;bjtu.edu.cn;sutd.edu.sg", "position": "PhD student;Full Professor;PhD student;Full Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nwei2024generated,\ntitle={Generated and Pseudo Content guided Prototype Refinement for Few-shot Point Cloud Segmentation},\nauthor={Lili Wei and Congyan Lang and Ziyi Chen and Tao Wang and Yidong Li and Jun Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=TBVLQjdFcA}\n}", "github": "", "reviewers": "pd6x;ABMp;FCL4", "pdf_size": 7970936, "rating": "5;6;7", "confidence": "4;5;4", "soundness": "2;3;3", "novelty": "1;3;3", "presentation": "2;3;3", "wc_summary": "107;96;64", "wc_strengths": "22;24;31", "wc_weaknesses": "177;127;107", "wc_questions": "167;14;4", "wc_limitations": "172;10;6", "wc_review": "645;271;212", "wc_reply_reviewers": "23;0;44", "wc_reply_authors": "52;0;37", "reply_reviewers": "1;0;1", "reply_authors": "2;1;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.9428090415820634 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 89.0, 18.239152027072603 ], "wc_strengths_avg": [ 25.666666666666668, 3.8586123009300755 ], "wc_weaknesses_avg": [ 137.0, 29.43920288775949 ], "wc_questions_avg": [ 61.666666666666664, 74.59371436134698 ], "wc_limitations_avg": [ 62.666666666666664, 77.32758599332813 ], "wc_review_avg": [ 376.0, 191.73071393667385 ], "wc_reply_reviewers_avg": [ 22.333333333333332, 17.96910929592474 ], "wc_reply_authors_avg": [ 29.666666666666668, 21.85304453744502 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11576295533770601881&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "bjtu.edu.cn;bjtu.edu.cn;bjtu.edu.cn;bjtu.edu.cn;bjtu.edu.cn;sutd.edu.sg", "author_num": 6, "aff_unique_index": "0;0;0;0;0;1", "aff_unique_norm": "Beijing Jiao Tong University;Singapore University of Technology and Design", "aff_unique_dep": ";", "aff_unique_url": "http://www.bjtu.edu.cn;https://www.sutd.edu.sg", "aff_unique_abbr": "BJTU;SUTD", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;1", "aff_country_unique": "China;Singapore" }, { "id": "TEwQSWWn7S", "title": "Fast, accurate training and sampling of Restricted Boltzmann Machines", "track": "main", "status": "Reject", "tldr": "", "abstract": "Thanks to their simple architecture, Restricted Boltzmann Machines (RBMs) are powerful tools for modeling complex systems and extracting interpretable insights from data. However, training RBMs, as other energy-based models, on highly structured data poses a major challenge, as effective training relies on mixing the Markov chain Monte Carlo simulations used to estimate the gradient. This process is often hindered by multiple second-order phase transitions and the associated critical slowdown. In this paper, we present an innovative method in which the principal directions of the dataset are integrated into a low-rank RBM through a convex optimization procedure. This approach enables efficient sampling of the equilibrium measure via a static Monte Carlo process. By starting the standard training process with a model that already accurately represents the main modes of the data, we bypass the initial phase transitions. Our results show that this strategy successfully trains RBMs to capture the full diversity of data in datasets where previous methods fail. Furthermore, we use the training trajectories to propose a new sampling method, {\\em parallel trajectory tempering}, which allows us to sample the equilibrium measure of the trained model much faster than previous optimized MCMC approaches and a better estimation of the log-likelihood. We illustrate the success of the training method on several highly structured datasets.", "keywords": "Restricted Boltzmann Machine;Fast Sampling;Multimodal learning;training algorithm", "primary_area": "generative_models", "supplementary_material": "", "author": "Nicolas BEREUX;Aur\u00e9lien Decelle;Cyril Furtlehner;Lorenzo Rosset;Beatriz Seoane", "authorids": "~Nicolas_BEREUX1;~Aur\u00e9lien_Decelle1;~Cyril_Furtlehner1;~Lorenzo_Rosset1;~Beatriz_Seoane1", "gender": "M;M;M;M;F", "homepage": ";http://www.lri.fr/~adecelle;https://pages.saclay.inria.fr/cyril.furtlehner/;;", "dblp": ";86/9220;84/1139;;", "google_scholar": ";https://scholar.google.fr/citations?user=__OKD-kAAAAJ;;https://scholar.google.com/citations?hl=it;https://scholar.google.es/citations?user=RyCweLUAAAAJ", "orcid": ";0000-0002-3017-0858;;0000-0001-6122-5252;0000-0003-4007-9406", "linkedin": "nicolas-b%C3%A9reux/;aur%C3%A9lien-decelle-947309105/;;;beatriz-seoane-bartolome-39620412/", "or_profile": "~Nicolas_BEREUX1;~Aur\u00e9lien_Decelle1;~Cyril_Furtlehner1;~Lorenzo_Rosset1;~Beatriz_Seoane1", "aff": "Universit\u00e9 Paris-Saclay;Universidad Complutense de Madrid;INRIA;Ecole Normale Sup\u00e9rieure de Paris;Universit\u00e9 Paris-Saclay", "aff_domain": "universite-paris-saclay.fr;ucm.es;inria.fr;ens.fr;univ-paris-saclay.fr", "position": "PhD student;Researcher;Researcher;PhD student;Principal Researcher", "bibtex": "@misc{\nanonymous2024fast,\ntitle={Fast, accurate training and sampling of Restricted Boltzmann Machines},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=TEwQSWWn7S}\n}", "github": "", "project": "", "reviewers": "NWGy;yudf;SD84", "site": "https://openreview.net/forum?id=TEwQSWWn7S", "pdf_size": 2981076, "rating": "4;5;6", "confidence": "3;2;2", "soundness": "3;3;3", "novelty": "3;2;3", "presentation": "2;2;3", "wc_summary": "209;46;148", "wc_strengths": "88;38;38", "wc_weaknesses": "874;289;72", "wc_questions": "5;64;102", "wc_limitations": "135;31;2", "wc_review": "1311;468;362", "wc_reply_reviewers": "447;122;66", "wc_reply_authors": "1457;1503;0", "reply_reviewers": "1;1;1", "reply_authors": "5;4;1", "rating_avg": [ 5.0, 0.816496580927726 ], "confidence_avg": [ 2.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 134.33333333333334, 67.24251300743865 ], "wc_strengths_avg": [ 54.666666666666664, 23.570226039551585 ], "wc_weaknesses_avg": [ 411.6666666666667, 338.7096429424799 ], "wc_questions_avg": [ 57.0, 39.9082280572148 ], "wc_limitations_avg": [ 56.0, 57.10224747474189 ], "wc_review_avg": [ 713.6666666666666, 424.58947493104705 ], "wc_reply_reviewers_avg": [ 211.66666666666666, 167.96891246762166 ], "wc_reply_authors_avg": [ 986.6666666666666, 697.9313879044431 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.3333333333333335, 1.699673171197595 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3103112835464067291&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "Universit\u00e9 Paris-Saclay;Universidad Complutense de Madrid;INRIA;Ecole Normale Sup\u00e9rieure de Paris", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.universite-paris-saclay.fr;https://www.ucm.es;https://www.inria.fr;https://www.ens.fr", "aff_unique_abbr": "UPSaclay;UCM;INRIA;ENS Paris", "aff_campus_unique_index": "1", "aff_campus_unique": ";Paris", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "France;Spain" }, { "title": "On the Scalability of Certified Adversarial Robustness with Generated Data", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95047", "id": "TFAG9UznPv", "proceeding": "", "pdf": "https://openreview.net/pdf?id=TFAG9UznPv", "openreview": "https://openreview.net/forum?id=TFAG9UznPv", "poster": "/media/PosterPDFs/NeurIPS%202024/95047.png?t=1731493590.5395977", "project": "", "author_site": "Thomas Altstidl, David Dobre, Arthur Kosmala, Bjoern Eskofier, Gauthier Gidel, Leo Schwinn", "tldr": "", "abstract": "Certified defenses against adversarial attacks offer formal guarantees on the robustness of a model, making them more reliable than empirical methods such as adversarial training, whose effectiveness is often later reduced by unseen attacks. Still, the limited certified robustness that is currently achievable has been a bottleneck for their practical adoption. Gowal et al. and Wang et al. have shown that generating additional training data using state-of-the-art diffusion models can considerably improve the robustness of adversarial training. In this work, we demonstrate that a similar approach can substantially improve deterministic certified defenses but also reveal notable differences in the scaling behavior between certified and empirical methods. In addition, we provide a list of recommendations to scale the robustness of certified training approaches. Our approach achieves state-of-the-art deterministic robustness certificates on CIFAR-10 for the $\\ell_2$ ($\\epsilon = 36/255$) and $\\ell_{\\infty}$ ($\\epsilon = 8/255$) threat models, outperforming the previous results by $+3.95$ and $+1.39$ percentage points, respectively. Furthermore, we report similar improvements for CIFAR-100.", "keywords": "certified robustness;adversarial robustness;scaling;generated data", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/ba6acaf7f7fc50eb361d842722811b48a17581aa.zip", "author": "Thomas Altstidl;David Dobre;Arthur Kosmala;Bjoern Eskofier;Gauthier Gidel;Leo Schwinn", "authorids": "~Thomas_Altstidl1;~David_Dobre1;~Arthur_Kosmala1;~Bjoern_Eskofier1;~Gauthier_Gidel1;~Leo_Schwinn1", "gender": ";M;;;M;M", "homepage": ";;;;https://gauthiergidel.github.io/;", "dblp": ";;;;188/6326;259/2852", "google_scholar": ";;;;https://scholar.google.fr/citations?user=bDrXQPUAAAAJ;PXT4k9cAAAAJ", "orcid": ";;;;;0000-0003-3967-2202", "linkedin": ";daviddobre/;arthur-kosmala-9219371b2/;;;leo-schwinn-34a61623b/", "or_profile": "~Thomas_Altstidl1;~David_Dobre1;~Arthur_Kosmala1;~Bjoern_Eskofier1;~Gauthier_Gidel1;~Leo_Schwinn1", "aff": ";Mila - Quebec Artificial Intelligence Institute;Technische Universit\u00e4t M\u00fcnchen;;Mila - Quebec Artificial Intelligence Institute;Technical University of Munich", "aff_domain": ";mila.quebec;tum.de;;mila.quebec;tum.de", "position": ";PhD student;PhD student;;Assistant Professor;Postdoc", "bibtex": "@inproceedings{\naltstidl2024on,\ntitle={On the Scalability of Certified Adversarial Robustness with Generated Data},\nauthor={Thomas Altstidl and David Dobre and Arthur Kosmala and Bjoern Eskofier and Gauthier Gidel and Leo Schwinn},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=TFAG9UznPv}\n}", "github": "", "reviewers": "Vssv;bWYd;hXmp", "pdf_size": 509813, "rating": "5;5;6", "confidence": "4;4;4", "soundness": "3;3;2", "novelty": "3;3;2", "presentation": "4;3;3", "wc_summary": "123;117;42", "wc_strengths": "66;68;161", "wc_weaknesses": "261;138;343", "wc_questions": "179;63;60", "wc_limitations": "32;45;80", "wc_review": "661;431;686", "wc_reply_reviewers": "46;32;173", "wc_reply_authors": "56;0;197", "reply_reviewers": "1;1;1", "reply_authors": "2;1;3", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 94.0, 36.851051545376556 ], "wc_strengths_avg": [ 98.33333333333333, 44.319546728528515 ], "wc_weaknesses_avg": [ 247.33333333333334, 84.24699137390934 ], "wc_questions_avg": [ 100.66666666666667, 55.40356988097026 ], "wc_limitations_avg": [ 52.333333333333336, 20.270394394014364 ], "wc_review_avg": [ 592.6666666666666, 114.77030200457878 ], "wc_reply_reviewers_avg": [ 83.66666666666667, 63.42624763367993 ], "wc_reply_authors_avg": [ 84.33333333333333, 82.88278202109022 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:VhYUP5XoUrAJ:scholar.google.com/&scioq=On+the+Scalability+of+Certified+Adversarial+Robustness+with+Generated+Data&hl=en&as_sdt=0,7", "gs_version_total": 3, "email": ";mila.quebec;tum.de;;mila.quebec;tum.de", "author_num": 6, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Quebec Artificial Intelligence Institute;Technische Universit\u00e4t M\u00fcnchen;Technical University of Munich", "aff_unique_dep": "Artificial Intelligence;;", "aff_unique_url": "https://mila.quebec;https://www.tum.de;https://www.tum.de", "aff_unique_abbr": "Mila;TUM;TUM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "Canada;Germany" }, { "title": "CAT3D: Create Anything in 3D with Multi-View Diffusion Models", "status": "Oral", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95046", "id": "TFZlFRl9Ks", "proceeding": "", "pdf": "https://openreview.net/pdf?id=TFZlFRl9Ks", "openreview": "https://openreview.net/forum?id=TFZlFRl9Ks", "poster": "", "project": "", "author_site": "Ruiqi Gao, Aleksander Holynski, Philipp Henzler, Arthur Brussee, Ricardo Martin Brualla, Pratul Srinivasan, Jonathan Barron, Ben Poole", "tldr": "", "abstract": "Advances in 3D reconstruction have enabled high-quality 3D capture, but require a user to collect hundreds to thousands of images to create a 3D scene. We present CAT3D, a method for creating anything in 3D by simulating this real-world capture process with a multi-view diffusion model. Given any number of input images and a set of target novel viewpoints, our model generates highly consistent novel views of a scene. These generated views can be used as input to robust 3D reconstruction techniques to produce 3D representations that can be rendered from any viewpoint in real-time. CAT3D can create entire 3D scenes in as little as one minute, and outperforms existing methods for single image and few-view 3D scene creation.", "keywords": "3D generation;Diffusion Models;3D reconstruction;Generative Models", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/552c2443f2a30ea43d52ce58b58f1a4e5ab06652.zip", "author": "Ruiqi Gao;Aleksander Holynski;Philipp Henzler;Arthur Brussee;Ricardo Martin Brualla;Pratul P. Srinivasan;Jonathan T. Barron;Ben Poole", "authorids": "~Ruiqi_Gao1;~Aleksander_Holynski1;~Philipp_Henzler1;~Arthur_Brussee1;~Ricardo_Martin_Brualla1;~Pratul_P._Srinivasan1;~Jonathan_T._Barron1;~Ben_Poole1", "gender": "F;;M;M;;M;M;M", "homepage": "http://www.stat.ucla.edu/~ruiqigao/;https://holynski.org;https://henzler.github.io/;;http://ricardomartinbrualla.com;https://cs.stanford.edu/~poole;https://jonbarron.info/;https://pratulsrinivasan.github.io/", "dblp": "206/7084;230/7958;179/4982;272/9369.html;16/7968;16/10397;30/9988;169/4719", "google_scholar": "VdlgOXoAAAAJ;ypBMJMgAAAAJ;H181jygAAAAJ;jYYYIvsAAAAJ;9F59OCYAAAAJ;i5FMLA4AAAAJ;https://scholar.google.com/citations?hl=en;aYyDsZ0AAAAJ", "orcid": ";;;;0000-0003-3247-9522;;;", "linkedin": ";;;;;;;", "or_profile": "~Ruiqi_Gao1;~Aleksander_Holynski1;~Philipp_Henzler1;~Arthur_Brussee1;~Ricardo_Martin_Brualla1;~Ben_Poole1;~Jonathan_T_Barron2;~Pratul_Srinivasan1", "aff": "Google;Google DeepMind;Google;Google;Google;Google;Google;Google DeepMind", "aff_domain": "google.com;google.com;google.com;deepmind.com;google.com;google.com;google.com;google.com", "position": "Researcher;Researcher;Researcher;Researcher;Researcher;Research Scientist;Research Scientist;Research Scientist", "bibtex": "@inproceedings{\ngao2024catd,\ntitle={{CAT}3D: Create Anything in 3D with Multi-View Diffusion Models},\nauthor={Ruiqi Gao and Aleksander Holynski and Philipp Henzler and Arthur Brussee and Ricardo Martin Brualla and Pratul P. Srinivasan and Jonathan T. Barron and Ben Poole},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=TFZlFRl9Ks}\n}", "github": "", "reviewers": "1Rgc;Twzm;d7U9;xucG", "pdf_size": 36808971, "rating": "6;7;7;8", "confidence": "5;5;4;5", "soundness": "3;4;3;4", "novelty": "3;4;3;4", "presentation": "4;3;4;3", "wc_summary": "88;69;187;67", "wc_strengths": "78;79;189;49", "wc_weaknesses": "90;289;121;26", "wc_questions": "45;133;71;3", "wc_limitations": "1;14;47;16", "wc_review": "302;584;615;161", "wc_reply_reviewers": "14;266;28;14", "wc_reply_authors": "0;690;0;4", "reply_reviewers": "1;2;1;1", "reply_authors": "1;3;1;2", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 102.75, 49.32735042549924 ], "wc_strengths_avg": [ 98.75, 53.480720825359114 ], "wc_weaknesses_avg": [ 131.5, 97.17124060132195 ], "wc_questions_avg": [ 63.0, 47.138094997570704 ], "wc_limitations_avg": [ 19.5, 16.889345754054535 ], "wc_review_avg": [ 415.5, 190.94829142990517 ], "wc_reply_reviewers_avg": [ 80.5, 107.25087412231194 ], "wc_reply_authors_avg": [ 173.5, 298.20588525379577 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 118, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12456072651436655812&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "google.com;google.com;google.com;deepmind.com;google.com;google.com;google.com;google.com", "author_num": 8, "aff_unique_index": "0;0;0;0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;1;0;0;0;0;0;1", "aff_country_unique": "United States;United Kingdom" }, { "title": "Lever LM: Configuring In-Context Sequence to Lever Large Vision Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95045", "id": "TGC7HNf6nK", "proceeding": "", "pdf": "https://openreview.net/pdf?id=TGC7HNf6nK", "openreview": "https://openreview.net/forum?id=TGC7HNf6nK", "poster": "", "project": "", "author_site": "Xu Yang, Yingzhe Peng, Haoxuan Ma, Shuo Xu, Chi Zhang, Yucheng Han, Hanwang Zhang", "tldr": "", "abstract": "As Archimedes famously said, ``Give me a lever long enough and a fulcrum on which to place it, and I shall move the world'', in this study, we propose to use a tiny Language Model (LM), \\eg, a Transformer with 67M parameters, to lever much larger Vision-Language Models (LVLMs) with 9B parameters. Specifically, we use this tiny \\textbf{Lever-LM} to configure effective in-context demonstration (ICD) sequences to improve the In-Context Learinng (ICL) performance of LVLMs. Previous studies show that diverse ICD configurations like the selection and ordering of the demonstrations heavily affect the ICL performance, highlighting the significance of configuring effective ICD sequences. Motivated by this and by re-considering the the process of configuring ICD sequence, we find this is a mirror process of human sentence composition and further assume that effective ICD configurations may contain internal statistical patterns that can be captured by Lever-LM. Then a dataset with effective ICD sequences is constructed to train Lever-LM. After training, given novel queries, new ICD sequences are configured by the trained Lever-LM to solve vision-language tasks through ICL. Experiments show that these ICD sequences can improve the ICL performance of two LVLMs compared with some strong baselines in Visual Question Answering and Image Captioning, validating that Lever-LM can really capture the statistical patterns for levering LVLMs. The code is available at \\url{https://anonymous.4open.science/r/Lever-LM-604A/}.", "keywords": "In-Context Learning;MultiModal", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/2379382610d2df0f5b578c35b9abf87454c3968c.zip", "author": "Xu Yang;Yingzhe Peng;Haoxuan Ma;Shuo Xu;Chi Zhang;Yucheng Han;Hanwang Zhang", "authorids": "~Xu_Yang5;~Yingzhe_Peng1;~Haoxuan_Ma1;~Shuo_Xu8;~Chi_Zhang13;~Yucheng_Han1;~Hanwang_Zhang3", "gender": "M;M;M;M;M;M;M", "homepage": ";https://github.com/ForJadeForest;https://github.com/ATMxsp01;https://icoz69.github.io/;https://tingxueronghua.github.io/;https://mreallab.github.io/index.html;https://hunter-wrynn.github.io/", "dblp": "63/1534-21.html;;;91/195-7;226/9017;79/8116.html;", "google_scholar": "SqdxMH0AAAAJ;;;https://scholar.google.com.sg/citations?user=J4s398EAAAAJ;LbwqJBQAAAAJ;YG0DFyYAAAAJ;", "orcid": "0000-0002-8276-2679;;;0000-0001-6344-2824;;;", "linkedin": ";;;;;;", "or_profile": "~Xu_Yang5;~Yingzhe_Peng1;~Shuo_Xu8;~Chi_Zhang13;~Yucheng_Han1;~Hanwang_Zhang3;~\u9a6c\u6d69\u8f691", "aff": "Southeast University;Southeast University;Southeast University;Tencent ;Nanyang Technological University;Nanyang Technological University;Southeast University", "aff_domain": "seu.edu.cn;southeast.edu;seu.edu.cn;tencent.com;ntu.edu.sg;ntu.edu.sg;seu.edu.cn", "position": "Associate Professor;MS student;Undergrad student;Researcher;PhD student;Associate Professor;Undergrad student", "bibtex": "@inproceedings{\nyang2024lever,\ntitle={Lever {LM}: Configuring In-Context Sequence to Lever Large Vision Language Models},\nauthor={Xu Yang and Yingzhe Peng and Haoxuan Ma and Shuo Xu and Chi Zhang and Yucheng Han and Hanwang Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=TGC7HNf6nK}\n}", "github": "", "reviewers": "sise;g8yL;Djwv;RjFC", "pdf_size": 2886804, "rating": "3;5;8;8", "confidence": "5;4;5;4", "soundness": "2;3;3;3", "novelty": "2;2;4;3", "presentation": "2;3;3;3", "wc_summary": "60;78;69;78", "wc_strengths": "36;90;105;222", "wc_weaknesses": "165;170;32;42", "wc_questions": "7;84;20;62", "wc_limitations": "2;2;1;4", "wc_review": "270;424;227;408", "wc_reply_reviewers": "0;47;149;101", "wc_reply_authors": "0;146;534;267", "reply_reviewers": "0;1;2;2", "reply_authors": "1;2;2;2", "rating_avg": [ 6.0, 2.1213203435596424 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 71.25, 7.46240577829965 ], "wc_strengths_avg": [ 113.25, 67.82836795913639 ], "wc_weaknesses_avg": [ 102.25, 65.3696221497417 ], "wc_questions_avg": [ 43.25, 31.09159854365806 ], "wc_limitations_avg": [ 2.25, 1.0897247358851685 ], "wc_review_avg": [ 332.25, 85.30643293445108 ], "wc_reply_reviewers_avg": [ 74.25, 56.03291443428586 ], "wc_reply_authors_avg": [ 236.75, 195.932864777709 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.23570226039551587, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13114264757033438788&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "seu.edu.cn;southeast.edu;seu.edu.cn;tencent.com;ntu.edu.sg;ntu.edu.sg;seu.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;1;2;2;0", "aff_unique_norm": "Southeast University;Tencent;Nanyang Technological University", "aff_unique_dep": ";Tencent Holdings Limited;", "aff_unique_url": "https://www.seu.edu.cn/;https://www.tencent.com;https://www.ntu.edu.sg", "aff_unique_abbr": "SEU;Tencent;NTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;1;0", "aff_country_unique": "China;Singapore" }, { "title": "From Biased to Unbiased Dynamics: An Infinitesimal Generator Approach", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95044", "id": "TGmwp9jJXl", "proceeding": "", "pdf": "https://openreview.net/pdf?id=TGmwp9jJXl", "openreview": "https://openreview.net/forum?id=TGmwp9jJXl", "poster": "", "project": "", "author_site": "Timoth\u00e9e Devergne, Vladimir Kostic, Michele Parrinello, Massimiliano Pontil", "tldr": "", "abstract": "We investigate learning the eigenfunctions of evolution operators for time-reversal invariant stochastic processes, a prime example being the Langevin equation used in molecular dynamics. Many physical or chemical processes described by this equation involve transitions between metastable states separated by high potential barriers that can hardly be crossed during a simulation. To overcome this bottleneck, data are collected via biased simulations that explore the state space more rapidly. We propose a framework for learning from biased simulations rooted in the infinitesimal generator of the process {and the associated resolvent operator}. We contrast our approach to more common ones based on the transfer operator, showing that it can provably learn the spectral properties of the unbiased system from biased data. In experiments, we highlight the advantages of our method over transfer operator approaches and recent developments based on generator learning, demonstrating its effectiveness in estimating eigenfunctions and eigenvalues. Importantly, we show that even with datasets containing only a few relevant transitions due to sub-optimal biasing, our approach\nrecovers relevant information about the transition mechanism.", "keywords": "Molecular dynamics;stochastic differential equations", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "", "author": "Timoth\u00e9e Devergne;Vladimir R Kostic;Michele Parrinello;Massimiliano Pontil", "authorids": "~Timoth\u00e9e_Devergne1;~Vladimir_R_Kostic1;~Michele_Parrinello1;~Massimiliano_Pontil3", "gender": "M;M;M;Not Specified", "homepage": ";https://vladi-iit.github.io/;https://www.iit.it/it/web/guest/people-details/-/people/michele-parrinello;https://www.iit.it/web/computational-statistics-and-machine-learning", "dblp": ";94/879;72/809;", "google_scholar": ";66gV7SAAAAAJ;;lcOacs8AAAAJ", "orcid": "0000-0001-8369-237X;;0000-0001-6550-3272;0000-0001-9415-098X", "linkedin": ";vladimir-kostic-77500652/;;", "or_profile": "~Timoth\u00e9e_Devergne1;~Vladimir_R_Kostic1;~Michele_Parrinello1;~Massimiliano_Pontil4", "aff": "Universit\u00e0 degli Studi di Genova, Istituto Italiano di Tecnologia;University of Novi Sad;Istituto Italiano di Tecnologia, Genova;University College London, University of London", "aff_domain": "iit.it;uns.ac.rs;iit.it;ucl.ac.uk", "position": "Postdoc;Associate Professor;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\ndevergne2024from,\ntitle={From Biased to Unbiased Dynamics: An Infinitesimal Generator Approach},\nauthor={Timoth{\\'e}e Devergne and Vladimir R Kostic and Michele Parrinello and Massimiliano Pontil},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=TGmwp9jJXl}\n}", "github": "", "reviewers": "MSBv;txkX;BQ7h", "pdf_size": 5772167, "rating": "6;7;8", "confidence": "4;1;3", "soundness": "3;3;4", "novelty": "3;3;4", "presentation": "3;3;2", "wc_summary": "146;101;140", "wc_strengths": "41;95;118", "wc_weaknesses": "49;20;425", "wc_questions": "287;30;154", "wc_limitations": "3;5;48", "wc_review": "526;251;885", "wc_reply_reviewers": "210;24;279", "wc_reply_authors": "611;0;400", "reply_reviewers": "2;1;2", "reply_authors": "2;1;3", "rating_avg": [ 7.0, 0.816496580927726 ], "confidence_avg": [ 2.6666666666666665, 1.247219128924647 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 129.0, 19.949937343260004 ], "wc_strengths_avg": [ 84.66666666666667, 32.27313984655902 ], "wc_weaknesses_avg": [ 164.66666666666666, 184.46378747301299 ], "wc_questions_avg": [ 157.0, 104.94125340716427 ], "wc_limitations_avg": [ 18.666666666666668, 20.75786330258702 ], "wc_review_avg": [ 554.0, 259.5855671385963 ], "wc_reply_reviewers_avg": [ 171.0, 107.69401097554126 ], "wc_reply_authors_avg": [ 337.0, 253.386397951166 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.32732683535398854, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=934713436516817718&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "iit.it;uns.ac.rs;iit.it;ucl.ac.uk", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Universit\u00e0 degli Studi di Genova;University of Novi Sad;Istituto Italiano di Tecnologia;University College London", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.unige.it;https://www.uns.ac.rs;https://www.iit.it;https://www.ucl.ac.uk", "aff_unique_abbr": "UniGe;UNS;IIT;UCL", "aff_campus_unique_index": "1", "aff_campus_unique": ";Genova", "aff_country_unique_index": "0;1;0;2", "aff_country_unique": "Italy;Serbia;United Kingdom" }, { "title": "VLKEB: A Large Vision-Language Model Knowledge Editing Benchmark", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97679", "id": "THMgVAkZwh", "proceeding": "", "pdf": "https://openreview.net/pdf?id=THMgVAkZwh", "openreview": "https://openreview.net/forum?id=THMgVAkZwh", "poster": "/media/PosterPDFs/NeurIPS%202024/97679.png?t=1731061253.8051047", "project": "", "author_site": "Han Huang, Haitian Zhong, Tao Yu, Qiang Liu, Shu Wu, Liang Wang, Tieniu Tan", "tldr": "", "abstract": "Recently, knowledge editing on large language models (LLMs) has received considerable attention. Compared to this, editing Large Vision-Language Models (LVLMs) faces extra challenges from diverse data modalities and complicated model components, and data for LVLMs editing are limited. The existing LVLM editing benchmark, which comprises three metrics (Reliability, Locality, and Generality), falls short in the quality of synthesized evaluation images and cannot assess whether models apply edited knowledge in relevant content. Therefore, we employ more reliable data collection methods to construct a new Large $\\textbf{V}$ision-$\\textbf{L}$anguage Model $\\textbf{K}$nowledge $\\textbf{E}$diting $\\textbf{B}$enchmark, $\\textbf{VLKEB}$, and extend the Portability metric for more comprehensive evaluation. Leveraging a multi-modal knowledge graph, our image data are bound with knowledge entities. This can be further used to extract entity-related knowledge, which constitutes the base of editing data. We conduct experiments of different editing methods on five LVLMs, and thoroughly analyze how do they impact the models. The results reveal strengths and deficiencies of these methods and hopefully provide insights for future research. The codes and dataset are available at: https://github.com/VLKEB/VLKEB.", "keywords": "Large Vision-Language Model;Knowledge Editing;Model Editing;Benchmark", "primary_area": "", "supplementary_material": "", "author": "Han Huang;Haitian Zhong;Tao Yu;Qiang Liu;Shu Wu;Liang Wang;Tieniu Tan", "authorids": "~Han_Huang4;~Haitian_Zhong1;~Tao_Yu15;~Qiang_Liu8;~Shu_Wu1;~Liang_Wang3;~Tieniu_Tan1", "gender": "M;M;M;M;M;M;", "homepage": "https://hyman25.github.io/;https://jzsawyer.github.io/;;https://john-qiangliu.tech/;http://www.shuwu.name;;", "dblp": ";;;61/3234-6;06/3577;56/4499-1;", "google_scholar": ";https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.co.jp/citations?user=D-lKLcMAAAAJ;https://scholar.google.com/citations?hl=en;;", "orcid": "0009-0008-3726-5295;;0009-0003-8815-7407;0000-0002-9233-3827;0000-0003-2164-3577;;", "linkedin": ";haitian-zhong-49b758279/;;;;;", "or_profile": "~Han_Huang4;~Haitian_Zhong1;~Tao_Yu15;~Qiang_Liu8;~Shu_Wu1;~Liang_Wang3;~Tieniu_Tan1", "aff": "Institute of Automation, Chinese Academy of Sciences;Lanzhou University;Harbin Institute of Technology;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;Institute of Automation\uff0c CAS\uff0cChina;", "aff_domain": "ia.ac.cn;lzu.edu.cn;stu.hit.edu.cn;nlpr.ia.ac.cn;ia.ac.cn;ia.ac.cn;", "position": "PhD student;Undergrad student;Undergrad student;Associate Professor;Full Professor;Full Professor;", "bibtex": "@inproceedings{\nhuang2024vlkeb,\ntitle={{VLKEB}: A Large Vision-Language Model Knowledge Editing Benchmark},\nauthor={Han Huang and Haitian Zhong and Tao Yu and Qiang Liu and Shu Wu and Liang Wang and Tieniu Tan},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=THMgVAkZwh}\n}", "github": "", "reviewers": "Y4B2;4DYX;agdD;wpwF", "pdf_size": 3001167, "rating": "5;6;6;8", "confidence": "4;4;3;3", "wc_summary_and_contributions": "89;47;51;100", "wc_strengths": "82;95;82;130", "wc_improvement": "79;14;87;28", "wc_limitations": "29;10;14;1", "wc_correctness": "13;7;6;1", "wc_clarity": "1;9;52;1", "wc_relation_to_prior_work": "16;1;16;1", "wc_documentation": "16;26;6;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "326;210;315;264", "wc_reply_reviewers": "0;0;36;24", "wc_reply_authors": "254;0;0;0", "reply_reviewers": "0;0;1;1", "reply_authors": "3;1;1;1", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "wc_summary_and_contributions_avg": [ 71.75, 23.12331074911203 ], "wc_strengths_avg": [ 97.25, 19.638928178492836 ], "wc_improvement_avg": [ 52.0, 31.51983502494897 ], "wc_limitations_avg": [ 13.5, 10.111874208078342 ], "wc_correctness_avg": [ 6.75, 4.264680527307995 ], "wc_clarity_avg": [ 15.75, 21.182244923520265 ], "wc_relation_to_prior_work_avg": [ 8.5, 7.5 ], "wc_documentation_avg": [ 12.25, 9.60143218483576 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 278.75, 46.072632874625256 ], "wc_reply_reviewers_avg": [ 15.0, 15.588457268119896 ], "wc_reply_authors_avg": [ 63.5, 109.9852262806237 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.6882472016116854, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10649155607162349292&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "ia.ac.cn;lzu.edu.cn;stu.hit.edu.cn;nlpr.ia.ac.cn;ia.ac.cn;ia.ac.cn;", "author_num": 7, "aff_unique_index": "0;1;2;0;0;0", "aff_unique_norm": "Chinese Academy of Sciences;Lanzhou University;Harbin Institute of Technology", "aff_unique_dep": "Institute of Automation;;", "aff_unique_url": "http://www.ia.cas.cn;https://www.lzu.edu.cn;http://www.hit.edu.cn/", "aff_unique_abbr": "CAS;LZU;HIT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Harbin", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "TI7Vy90B9j", "title": "Boosting Perturbed Gradient Ascent for Last-Iterate Convergence in Games", "track": "main", "status": "Reject", "tldr": "", "abstract": "This paper introduces a payoff perturbation technique, introducing a strong convexity to players' payoff functions in games. This technique is specifically designed for first-order methods to achieve last-iterate convergence in games where the gradient of the payoff functions is monotone in the strategy profile space, potentially containing additive noise. Although perturbation is known to facilitate the convergence of learning algorithms, the magnitude of perturbation requires careful adjustment to ensure last-iterate convergence. Previous studies have proposed a scheme in which the magnitude is determined by the distance from an anchoring or reference strategy, which is periodically re-initialized. In response, this paper proposes Gradient Ascent with Boosting Payoff Perturbation, which incorporates a novel perturbation into the underlying payoff function, maintaining the periodically re-initializing anchoring strategy scheme. This innovation empowers us to provide faster last-iterate convergence rates against the existing payoff perturbed algorithms, even in the presence of additive noise.", "keywords": "Last-Iterate Convergence;Learning in Games;Noisy Feedback", "primary_area": "algorithmic_game_theory", "supplementary_material": "/attachment/864ddf9ec2fae0ed38d50345a896b405fdc423b8.zip", "author": "Kenshi Abe;Mitsuki Sakamoto;Kaito Ariu;Atsushi Iwasaki", "authorids": "~Kenshi_Abe1;~Mitsuki_Sakamoto1;~Kaito_Ariu1;~Atsushi_Iwasaki2", "gender": "M;M;M;M", "homepage": "https://bakanaouji.github.io/;;https://researchmap.jp/ariu?lang=en;", "dblp": "254/2763;243/6951;229/7578;04/4799", "google_scholar": "rImmohoAAAAJ;https://scholar.google.co.jp/citations?user=wIuGfiEAAAAJ;https://scholar.google.co.jp/citations?user=4zXjxhsAAAAJ;OZunx7wAAAAJ", "orcid": ";;;", "linkedin": ";https://www.linkedin.com/mwlite/in/%E5%85%85%E7%94%9F-%E5%9D%82%E6%9C%AC-1666bb233;;", "or_profile": "~Kenshi_Abe1;~Mitsuki_Sakamoto1;~Kaito_Ariu1;~Atsushi_Iwasaki1", "aff": "CyberAgent, Inc.;CyberAgent, Inc.;CyberAgent, Inc.;University of Electro-Communications", "aff_domain": "cyberagent.co.jp;cyberagent.co.jp;cyberagent.co.jp;uec.ac.jp", "position": "Research scientist;Research Engineer;Research Scientist;Associate Professor", "bibtex": "@misc{\nanonymous2024boosting,\ntitle={Boosting Perturbed Gradient Ascent for Last-Iterate Convergence in Games},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=TI7Vy90B9j}\n}", "github": "", "project": "", "reviewers": "PgVh;1HqV;U8sk;muDU", "site": "https://openreview.net/forum?id=TI7Vy90B9j", "pdf_size": 1131511, "rating": "5;6;6;6", "confidence": "4;3;3;2", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "2;3;3;3", "wc_summary": "109;112;61;50", "wc_strengths": "95;140;102;13", "wc_weaknesses": "220;98;119;51", "wc_questions": "68;1;29;74", "wc_limitations": "1;1;11;4", "wc_review": "493;352;322;192", "wc_reply_reviewers": "14;0;15;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 83.0, 27.793884219374593 ], "wc_strengths_avg": [ 87.5, 46.295248136282844 ], "wc_weaknesses_avg": [ 122.0, 61.70494307589952 ], "wc_questions_avg": [ 43.0, 29.77414986191881 ], "wc_limitations_avg": [ 4.25, 4.085033659592048 ], "wc_review_avg": [ 339.75, 106.98218309606511 ], "wc_reply_reviewers_avg": [ 7.25, 7.258615570478987 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:TvrSpAeHl-IJ:scholar.google.com/&scioq=Boosting+Perturbed+Gradient+Ascent+for+Last-Iterate+Convergence+in+Games&hl=en&as_sdt=0,44", "gs_version_total": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "CyberAgent;University of Electro-Communications", "aff_unique_dep": ";", "aff_unique_url": "https://www.cyberagent.co.jp;https://www.uec.ac.jp", "aff_unique_abbr": "CyberAgent;UEC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Japan" }, { "title": "Meaningful Learning: Enhancing Abstract Reasoning in Large Language Models via Generic Fact Guidance", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95043", "id": "TIhiFqGOYC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=TIhiFqGOYC", "openreview": "https://openreview.net/forum?id=TIhiFqGOYC", "poster": "/media/PosterPDFs/NeurIPS%202024/95043.png?t=1729649766.3085444", "project": "", "author_site": "Kai Xiong, Xiao Ding, Ting Liu, Bing Qin, Dongliang Xu, Qing Yang, Hongtao Liu, Yixin Cao", "tldr": "", "abstract": "Large language models (LLMs) have developed impressive performance and strong explainability across various reasoning scenarios, marking a significant stride towards mimicking human-like intelligence. Despite this, when tasked with several simple questions supported by a generic fact, LLMs often struggle to abstract and apply the generic fact to provide consistent and precise answers, revealing a deficiency in abstract reasoning abilities. This has sparked a vigorous debate about whether LLMs are genuinely reasoning or merely memorizing. In light of this, we design a preliminary study to quantify and delve into the abstract reasoning abilities of existing LLMs. Our findings reveal a substantial discrepancy between their general reasoning and abstract reasoning performances. To relieve this problem, we tailor an abstract reasoning dataset (AbsR) together with a meaningful learning paradigm to teach LLMs how to leverage generic facts for reasoning purposes. The results show that our approach not only boosts the general reasoning performance of LLMs but also makes considerable strides towards their capacity for abstract reasoning, moving beyond simple memorization or imitation to a more nuanced understanding and application of generic facts. The code is available at https://github.com/Waste-Wood/MeanLearn.", "keywords": "Abstract Reasoning;Large Language Models;Question Answering", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/474931020e21e9e089623ac2eb36f245edf1d59a.zip", "author": "Kai Xiong;Xiao Ding;Ting Liu;Bing Qin;Dongliang Xu;Qing Yang;Hongtao Liu;Yixin Cao", "authorids": "~Kai_Xiong2;~Xiao_Ding1;~Ting_Liu2;~Bing_Qin2;~Dongliang_Xu2;~Qing_Yang11;~Hongtao_Liu1;~Yixin_Cao2", "gender": "M;M;M;;M;M;M;M", "homepage": "https://waste-wood.github.io/;http://ir.hit.edu.cn/~xding/index_english.htm;;http://ir.hit.edu.cn/~qinb;;https://www.duxiaoman.com/index;;https://sites.google.com/view/yixin-homepage", "dblp": "38/6410-2;;52/5150-1;86/5934.html;74/4912.html;47/3749;;20/8038-2", "google_scholar": "https://scholar.google.com.hk/citations?hl=zh-CN;Mi9XXuAAAAAJ;zyMJ1V0AAAAJ;LKnCub0AAAAJ;;;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.co.uk/citations?user=CnhTvdoAAAAJ", "orcid": "0000-0002-5909-3075;0000-0002-5838-0320;;0000-0002-2543-5604;;;;", "linkedin": ";;;;;;;", "or_profile": "~Kai_Xiong2;~Xiao_Ding1;~Ting_Liu2;~Bing_Qin2;~Dongliang_Xu2;~Qing_Yang11;~Hongtao_Liu1;~Yixin_Cao2", "aff": "Harbin Institute of Technology;Harbin Institute of Technology;Harbin Institute of Technology;Harbin Institute of Technology;DuXiaoman Technology;;Du Xiaoman Financial;Singapore Management University", "aff_domain": "hit.edu.cn;hit.edu.cn;hit.edu.cn;hit.edu.cn;duxiaoman.com;;duxiaoman.com;smu.edu.sg", "position": "PhD student;Full Professor;Full Professor;Full Professor;Principal Researcher;;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nxiong2024meaningful,\ntitle={Meaningful Learning: Enhancing Abstract Reasoning in Large Language Models via Generic Fact Guidance},\nauthor={Kai Xiong and Xiao Ding and Ting Liu and Bing Qin and Dongliang Xu and Qing Yang and Hongtao Liu and Yixin Cao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=TIhiFqGOYC}\n}", "github": "", "reviewers": "zthW;vYe8;Fwhs;XVL7", "pdf_size": 1558791, "rating": "3;5;6;6", "confidence": "5;3;4;3", "soundness": "2;2;3;3", "novelty": "2;2;3;2", "presentation": "1;3;4;2", "wc_summary": "50;116;70;128", "wc_strengths": "33;23;53;84", "wc_weaknesses": "147;104;113;302", "wc_questions": "118;135;7;2", "wc_limitations": "20;1;22;6", "wc_review": "368;379;265;522", "wc_reply_reviewers": "116;52;23;74", "wc_reply_authors": "530;199;39;171", "reply_reviewers": "1;1;1;2", "reply_authors": "3;3;2;2", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 91.0, 32.07802986469088 ], "wc_strengths_avg": [ 48.25, 23.29565410114084 ], "wc_weaknesses_avg": [ 166.5, 79.85768591688593 ], "wc_questions_avg": [ 65.5, 61.32087735836792 ], "wc_limitations_avg": [ 12.25, 8.954747344286158 ], "wc_review_avg": [ 383.5, 91.49453535594353 ], "wc_reply_reviewers_avg": [ 66.25, 33.943887520435844 ], "wc_reply_authors_avg": [ 234.75, 180.85405027258858 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.7385489458759963, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13021862717315263674&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "hit.edu.cn;hit.edu.cn;hit.edu.cn;hit.edu.cn;duxiaoman.com;;duxiaoman.com;smu.edu.sg", "author_num": 8, "aff_unique_index": "0;0;0;0;1;2;3", "aff_unique_norm": "Harbin Institute of Technology;DuXiaoman Technology;Du Xiaoman Financial;Singapore Management University", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.hit.edu.cn/;https://www.duxiaoman.com;https://www.duxiaoman.com;https://www.smu.edu.sg", "aff_unique_abbr": "HIT;;DXF;SMU", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Harbin;", "aff_country_unique_index": "0;0;0;0;0;0;1", "aff_country_unique": "China;Singapore" }, { "id": "TJJ4gZtkS4", "title": "Tropical Expressivity of Neural Networks", "track": "main", "status": "Reject", "tldr": "", "abstract": "We propose an algebraic geometric framework to study the expressivity of linear activation neural networks. A particular quantity that has been actively studied in the field of deep learning is the number of linear regions, which gives an estimate of the information capacity of the architecture. To study and evaluate information capacity and expressivity, we work in the setting of tropical geometry---a combinatorial and polyhedral variant of algebraic geometry---where there are known connections between tropical rational maps and feedforward neural networks. Our work builds on and expands this relation to capitalize on the rich theory of tropical geometry to characterize and study various architectural aspects of neural networks. Our contributions are threefold: we provide a novel tropical geometric approach to selecting sampling domains among linear regions; an algebraic result allowing for a guided restriction of the sampling domain for network architectures with symmetries; and an open source library to analyze neural networks as tropical Puiseux rational maps. We provide a comprehensive set of proof-of-concept numerical experiments demonstrating the breadth of neural network architectures to which tropical geometric theory can be applied to reveal insights on expressivity characteristics of a network. Our work provides the foundations for the adaptation of both theory and existing software from computational tropical geometry and symbolic computation to deep learning.", "keywords": "linear regions; tropical rational maps; tropical Puiseux polynomials; monomoials; expressivity", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/b213874c47b162a50d0e125c1fd2dcb40f03a140.zip", "author": "Shiv Bhatia;Yueqi Cao;Paul Lezeau;Anthea Monod", "authorids": "~Shiv_Bhatia1;~Yueqi_Cao1;~Paul_Lezeau1;~Anthea_Monod1", "gender": "M;M;;F", "homepage": "https://shivbhatia.netlify.app/;https://yueqihome.site/;;https://sites.google.com/view/antheamonod/home", "dblp": ";241/9884;;246/2927", "google_scholar": ";https://scholar.google.com/citations?hl=en;;https://scholar.google.co.il/citations?hl=en", "orcid": ";0000-0001-8856-2902;;0000-0001-6774-8150", "linkedin": ";;;anthea-monod-046152162/", "or_profile": "~Shiv_Bhatia1;~Yueqi_Cao1;~Paul_Lezeau1;~Anthea_Monod1", "aff": "Imperial College London;Imperial College London;;Imperial College London, Imperial College London", "aff_domain": "imperial.ac.uk;imperial.ac.uk;;imperial.ac.uk", "position": "Undergrad student;PhD student;;Assistant Professor", "bibtex": "@misc{\nanonymous2024tropical,\ntitle={Tropical Expressivity of Neural Networks},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=TJJ4gZtkS4}\n}", "github": "", "project": "", "reviewers": "hdMP;vnGc;oTxQ;R5Gh", "site": "https://openreview.net/forum?id=TJJ4gZtkS4", "pdf_size": 345308, "rating": "3;3;4;6", "confidence": "5;3;4;3", "soundness": "1;3;2;3", "novelty": "1;3;3;2", "presentation": "2;3;4;2", "wc_summary": "68;224;138;53", "wc_strengths": "27;39;53;28", "wc_weaknesses": "706;121;282;10", "wc_questions": "698;42;131;69", "wc_limitations": "19;2;12;6", "wc_review": "1518;428;616;166", "wc_reply_reviewers": "1075;60;93;118", "wc_reply_authors": "4535;141;131;72", "reply_reviewers": "3;1;1;1", "reply_authors": "9;2;2;2", "rating_avg": [ 4.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 120.75, 67.69555007531883 ], "wc_strengths_avg": [ 36.75, 10.497023387608508 ], "wc_weaknesses_avg": [ 279.75, 264.41480196842235 ], "wc_questions_avg": [ 235.0, 269.2535979332495 ], "wc_limitations_avg": [ 9.75, 6.417748826496718 ], "wc_review_avg": [ 682.0, 508.4348532506402 ], "wc_reply_reviewers_avg": [ 336.5, 426.86912514258984 ], "wc_reply_authors_avg": [ 1219.75, 1914.2420660668806 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 3.75, 3.031088913245535 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.4923659639173309, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14733627646699616168&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;0;0", "aff_unique_norm": "Imperial College London", "aff_unique_dep": "", "aff_unique_url": "https://www.imperial.ac.uk", "aff_unique_abbr": "ICL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Improving Viewpoint-Independent Object-Centric Representations through Active Viewpoint Selection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95042", "id": "TJiw1oLAcD", "proceeding": "", "pdf": "https://openreview.net/pdf?id=TJiw1oLAcD", "openreview": "https://openreview.net/forum?id=TJiw1oLAcD", "poster": "/media/PosterPDFs/NeurIPS%202024/95042.png?t=1733491379.9040256", "project": "", "author_site": "Yinxuan Huang, Chengmin Gao, Bin Li, Xiangyang Xue", "tldr": "", "abstract": "Given the complexities inherent in visual scenes, such as object occlusion, a comprehensive understanding often requires observation from multiple viewpoints. Existing multi-viewpoint object-centric learning methods typically employ random or sequential viewpoint selection strategies. While applicable across various scenes, these strategies may not always be ideal, as certain scenes could benefit more from specific viewpoints. To address this limitation, we propose a novel active viewpoint selection strategy. This strategy predicts images from unknown viewpoints based on information from observation images for each scene. It then compares the object-centric representations extracted from both viewpoints and selects the unknown viewpoint with the largest disparity, indicating the greatest gain in information, as the next observation viewpoint. Through experiments on various datasets, we demonstrate the effectiveness of our active viewpoint selection strategy, significantly enhancing segmentation and reconstruction performance compared to random viewpoint selection. Moreover, our method can accurately predict images from unknown viewpoints.", "keywords": "Object-Centric Learning;Multi-Viewpoint Scenes;Unsupervised Learning", "primary_area": "generative_models", "supplementary_material": "", "author": "Yinxuan Huang;Chengmin Gao;Bin Li;Xiangyang Xue", "authorids": "~Yinxuan_Huang1;~Chengmin_Gao1;~Bin_Li4;~Xiangyang_Xue2", "gender": ";;M;M", "homepage": ";;https://aimpressionist.github.io/publications;http://homepage.fudan.edu.cn//xyxue", "dblp": ";;89/6764-15;84/3791", "google_scholar": ";;8t97oL8AAAAJ;", "orcid": ";;0000-0002-9633-0033;0000-0002-4897-9209", "linkedin": ";;;", "or_profile": "~Yinxuan_Huang1;~Chengmin_Gao1;~Bin_Li4;~Xiangyang_Xue2", "aff": ";;Fudan University;Fudan University", "aff_domain": ";;fudan.edu.cn;fudan.edu.cn", "position": ";;Full Professor;Full Professor", "bibtex": "@inproceedings{\nhuang2024improving,\ntitle={Improving Viewpoint-Independent Object-Centric Representations through Active Viewpoint Selection},\nauthor={Yinxuan Huang and Chengmin Gao and Bin Li and Xiangyang Xue},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=TJiw1oLAcD}\n}", "github": "", "reviewers": "qAwd;qk67;gp25", "pdf_size": 9811347, "rating": "3;5;5", "confidence": "3;3;3", "soundness": "2;2;3", "novelty": "2;3;3", "presentation": "2;3;2", "wc_summary": "68;111;130", "wc_strengths": "191;51;79", "wc_weaknesses": "282;146;240", "wc_questions": "166;5;63", "wc_limitations": "18;17;8", "wc_review": "725;330;520", "wc_reply_reviewers": "0;0;99", "wc_reply_authors": "0;0;125", "reply_reviewers": "0;0;1", "reply_authors": "1;1;2", "rating_avg": [ 4.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 103.0, 25.93581821856921 ], "wc_strengths_avg": [ 107.0, 60.486913185140025 ], "wc_weaknesses_avg": [ 222.66666666666666, 56.858498827254394 ], "wc_questions_avg": [ 78.0, 66.57827473483124 ], "wc_limitations_avg": [ 14.333333333333334, 4.4969125210773475 ], "wc_review_avg": [ 525.0, 161.29682782580278 ], "wc_reply_reviewers_avg": [ 33.0, 46.66904755831214 ], "wc_reply_authors_avg": [ 41.666666666666664, 58.92556509887896 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:yylOdwhx0H4J:scholar.google.com/&scioq=Improving+Viewpoint-Independent+Object-Centric+Representations+through+Active+Viewpoint+Selection&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": ";;fudan.edu.cn;fudan.edu.cn", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Fudan University", "aff_unique_dep": "", "aff_unique_url": "https://www.fudan.edu.cn", "aff_unique_abbr": "Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Differentially Private Stochastic Gradient Descent with Fixed-Size Minibatches: Tighter RDP Guarantees with or without Replacement", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95041", "id": "TJsknGasMy", "proceeding": "", "pdf": "https://openreview.net/pdf?id=TJsknGasMy", "openreview": "https://openreview.net/forum?id=TJsknGasMy", "poster": "/media/PosterPDFs/NeurIPS%202024/95041.png?t=1731003200.4256146", "project": "", "author_site": "Jeremiah Birrell, Reza Ebrahimi, Rouzbeh Behnia, Jason Pacheco", "tldr": "", "abstract": "Differentially private stochastic gradient descent (DP-SGD) has been instrumental in privately training deep learning models by providing a framework to control and track the privacy loss incurred during training. At the core of this computation lies a subsampling method that uses a privacy amplification lemma to enhance the privacy guarantees provided by the additive noise. Fixed size subsampling is appealing for its constant memory usage, unlike the variable sized minibatches in Poisson subsampling. It is also of interest in addressing class imbalance and federated learning. Current computable guarantees for fixed-size subsampling are not tight and do not consider both add/remove and replace-one adjacency relationships. We present a new and holistic R\u00e9nyi differential privacy (RDP) accountant for DP-SGD with fixed-size subsampling without replacement (FSwoR) and with replacement (FSwR). For FSwoR we consider both add/remove and replace-one adjacency, where we improve on the best current computable bound by a factor of $4$. We also show for the first time that the widely-used Poisson subsampling and FSwoR with replace-one adjacency have the same privacy to leading order in the sampling probability. Our work suggests that FSwoR is often preferable to Poisson subsampling due to constant memory usage. Our FSwR accountant includes explicit non-asymptotic upper and lower bounds and, to the authors' knowledge, is the first such RDP analysis of fixed-size subsampling with replacement for DP-SGD. We analytically and empirically compare fixed size and Poisson subsampling, and show that DP-SGD gradients in a fixed-size subsampling regime exhibit lower variance in practice in addition to memory usage benefits.", "keywords": "privacy preserving machine learning;differential privacy;differentially private stochastic gradient descent;fixed-size subsampled mechanisms;privacy amplification lemma", "primary_area": "privacy", "supplementary_material": "/attachment/b15ff78b03b2996d5320f4090c61f24e22e33f12.zip", "author": "Jeremiah Birrell;Mohammadreza Ebrahimi;Rouzbeh Behnia;Jason Pacheco", "authorids": "~Jeremiah_Birrell1;~Mohammadreza_Ebrahimi2;~Rouzbeh_Behnia1;~Jason_Pacheco1", "gender": "M;M;M;M", "homepage": "https://www.researchgate.net/profile/Jeremiah-Birrell;https://star-ailab.github.io/;https://rbehnia.github.io;http://www.pachecoj.com", "dblp": ";26/7531;;126/1745", "google_scholar": "R60hJGUAAAAJ;4DmURbEAAAAJ;;71ZEsnEAAAAJ", "orcid": ";0000-0003-1367-3338;;", "linkedin": ";;;", "or_profile": "~Jeremiah_Birrell1;~Mohammadreza_Ebrahimi2;~Rouzbeh_Behnia1;~Jason_Pacheco1", "aff": "Texas State University;University of South Florida;University of South Florida;University of Arizona", "aff_domain": "txstate.edu;usf.edu;usf.edu;arizona.edu", "position": "Assistant Professor;Assistant Professor;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nbirrell2024differentially,\ntitle={Differentially Private Stochastic Gradient Descent with Fixed-Size Minibatches: Tighter {RDP} Guarantees with or without Replacement},\nauthor={Jeremiah Birrell and Mohammadreza Ebrahimi and Rouzbeh Behnia and Jason Pacheco},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=TJsknGasMy}\n}", "github": "", "reviewers": "Rv66;uSpK;kPGW;JRin", "pdf_size": 1188400, "rating": "4;4;5;7", "confidence": "4;3;3;4", "soundness": "3;2;3;4", "novelty": "1;3;3;4", "presentation": "3;2;2;4", "wc_summary": "102;158;227;37", "wc_strengths": "44;132;114;50", "wc_weaknesses": "251;155;141;151", "wc_questions": "49;296;87;276", "wc_limitations": "1;34;1;1", "wc_review": "447;775;570;515", "wc_reply_reviewers": "509;215;59;60", "wc_reply_authors": "701;327;57;360", "reply_reviewers": "4;1;1;1", "reply_authors": "5;3;2;2", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 1.0897247358851685 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 131.0, 70.03927469641587 ], "wc_strengths_avg": [ 85.0, 38.58756276314948 ], "wc_weaknesses_avg": [ 174.5, 44.46065676527957 ], "wc_questions_avg": [ 177.0, 110.05226031299857 ], "wc_limitations_avg": [ 9.25, 14.289419162443238 ], "wc_review_avg": [ 576.75, 122.47117007687973 ], "wc_reply_reviewers_avg": [ 210.75, 183.52435124527753 ], "wc_reply_authors_avg": [ 361.25, 228.67703754421868 ], "reply_reviewers_avg": [ 1.75, 1.299038105676658 ], "reply_authors_avg": [ 3.0, 1.224744871391589 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.40824829046386296, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6948098482200491604&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "txstate.edu;usf.edu;usf.edu;arizona.edu", "author_num": 4, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Texas State University;University of South Florida;University of Arizona", "aff_unique_dep": ";;", "aff_unique_url": "https://www.txstate.edu;https://www.usf.edu;https://www.arizona.edu", "aff_unique_abbr": "TXST;USF;UA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "TKbGTj0YCZ", "title": "Robust Conformal Prediction under Joint Distribution Shift", "track": "main", "status": "Reject", "tldr": "", "abstract": "Uncertainty prevails due to the lack of knowledge about data or model,\nand conformal prediction (CP) predicts multiple potential targets,\nhoping to cover the true target with a high probability. \nRegarding CP robustness,\nimportance weighting can address covariate shifts,\nbut CP under joint distribution shifts remains more challenging.\nPrior attempts addressing joint shift via $f$-divergence\nignores the nuance of calibration and test distributions\nthat are critical for coverage guarantees.\nMore generally, with multiple test distributions shifted from the calibration distribution,\nsimultaneous coverage guarantees for all test domains requires a new paradigm.\nWe design Multi-domain Robust Conformal Prediction (mRCP) that first formulates the coverage difference that importance weighting fails to capture under any joint shift.\nTo squeeze such coverage difference and guarantee the $(1-\\alpha)$ coverage in all test domains,\nwe propose Normalized Truncated Wasserstein distance (NTW) to comprehensively capture the nuance of any test and calibration conformal score distributions, and design an end-to-end training algorithm incorporating NTW to provide elasticity for simultaneous coverage guarantee over distinct test domains.\nWith diverse tasks (seven datasets) and architectures (black-box and physics-informed models), NTW strongly correlates (Pearson coefficient=0.905) with coverage differences beyond covariate shifts, while mRCP reduces coverage gap by 50% on average robustly over multiple distinct test domains.", "keywords": "conformal prediction;data distribution shift;coverage robustness", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Rui Xu;Chao Chen;Yue Sun;Parvathinathan Venkitasubramaniam;Sihong Xie", "authorids": "~Rui_Xu14;~Chao_Chen14;~Yue_Sun9;~Parvathinathan_Venkitasubramaniam1;~Sihong_Xie1", "gender": "M;;M;M;M", "homepage": "https://rxu0112.github.io/;;https://www.linkedin.com/in/yue-sun-2397a8229/;https://www.lehigh.edu/~pav309/parv.html;https://sihongxie.github.io/index.html", "dblp": ";;;;67/1229", "google_scholar": "wYjog8gAAAAJ;;;;qRp1xZwAAAAJ", "orcid": ";;;;0000-0003-1060-8506", "linkedin": ";;;;", "or_profile": "~Rui_Xu14;~Chao_Chen14;~Yue_Sun9;~Parvathinathan_Venkitasubramaniam1;~Sihong_Xie1", "aff": "The Hong Kong University of Science and Technology (Guangzhou);;Lehigh University;Lehigh University;HKUST-GZ", "aff_domain": "hkust-gz.edu.cn;;lehigh.edu;lehigh.edu;hkust-gz.edu.cn", "position": "PhD student;;PhD student;Full Professor;Associate Professor", "bibtex": "@misc{\nanonymous2024robust,\ntitle={Robust Conformal Prediction under Joint Distribution Shift},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=TKbGTj0YCZ}\n}", "github": "", "project": "", "reviewers": "qkU5;Ab2r;6oEU;V1Vs;m45f", "site": "https://openreview.net/forum?id=TKbGTj0YCZ", "pdf_size": 871341, "rating": "3;3;4;4;5", "confidence": "4;4;3;2;3", "soundness": "1;1;3;2;2", "novelty": "2;2;2;2;2", "presentation": "2;1;3;2;3", "wc_summary": "46;92;67;90;169", "wc_strengths": "8;29;28;42;96", "wc_weaknesses": "74;150;63;307;558", "wc_questions": "691;416;118;28;234", "wc_limitations": "9;32;7;11;70", "wc_review": "828;719;283;478;1127", "wc_reply_reviewers": "1566;117;0;0;106", "wc_reply_authors": "1434;0;0;0;0", "reply_reviewers": "3;1;0;0;1", "reply_authors": "4;1;1;1;1", "rating_avg": [ 3.8, 0.7483314773547882 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 1.8, 0.7483314773547883 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.2, 0.7483314773547882 ], "wc_summary_avg": [ 92.8, 41.64324675142417 ], "wc_strengths_avg": [ 40.6, 29.756343861435663 ], "wc_weaknesses_avg": [ 230.4, 185.54632844656345 ], "wc_questions_avg": [ 297.4, 235.69777258175353 ], "wc_limitations_avg": [ 25.8, 23.861265683110776 ], "wc_review_avg": [ 687.0, 290.31086786408804 ], "wc_reply_reviewers_avg": [ 357.8, 606.1644661310988 ], "wc_reply_authors_avg": [ 286.8, 573.6 ], "reply_reviewers_avg": [ 1.0, 1.0954451150103321 ], "reply_authors_avg": [ 1.6, 1.2000000000000002 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.6428571428571428, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:lZQSIcBibxwJ:scholar.google.com/&scioq=Robust+Conformal+Prediction+under+Joint+Distribution+Shift&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Hong Kong University of Science and Technology;Lehigh University;Hong Kong University of Science and Technology (Guangzhou)", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ust.hk;https://www.lehigh.edu;https://www.ust.hk", "aff_unique_abbr": "HKUST;Lehigh;HKUST", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Guangzhou;", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "China;United States" }, { "id": "TKozKEMKiw", "title": "Interpretable Decision Tree Search as a Markov Decision Process", "track": "main", "status": "Reject", "tldr": "", "abstract": "Finding an optimal decision tree for a supervised learning task is a challenging combinatorial problem to solve at scale. It was recently proposed to frame this problem as a Markov Decision Problem (MDP) and use deep reinforcement learning to tackle scaling. Unfortunately, these methods are not competitive with the current branch-and-bound state of the art. Instead, we propose to scale the resolution of such MDPs using an information-theoretic tests generating function that heuristically, and dynamically for every state, limits the set of admissible test actions to a few good candidates. As a solver, we show empirically that our algorithm is at the very least competitive with branch-and-bound alternatives. As a machine learning tool, a key advantage of our approach is to solve for multiple complexity-performance trade-offs at virtually no additional cost. With such a set of solutions, a user can then select the tree that generalizes best and which has the interpretability level that best suits their needs, which no current branch-and-bound method allows.", "keywords": "mdp;cart;rl;classification", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/d7084bed3c48985c6f4cc6944ad754b8831d9d62.zip", "author": "Hector Kohler;Riad Akrour;Philippe Preux", "authorids": "~Hector_Kohler2;~Riad_Akrour1;~Philippe_Preux1", "gender": "M;M;M", "homepage": "https://kohlerhector.github.io/homepage/;https://akrouriad.github.io/;https://philippe-preux.codeberg.page", "dblp": ";98/9987.html;16/4835", "google_scholar": "aSO7bZ0AAAAJ;https://scholar.google.de/citations?user=7g1bMNkAAAAJ;JTXxmeAAAAAJ", "orcid": "0009-0007-1708-0966;0000-0002-8735-6960;0000-0002-2067-2838", "linkedin": "hector-kohler-45b550134/;;", "or_profile": "~Hector_Kohler2;~Riad_Akrour1;~Philippe_Preux1", "aff": "CRIStAL (9189);INRIA;Universit\u00e9 de Lille", "aff_domain": "univ-lille.fr;inria.fr;univ-lille.fr", "position": "PhD student;Researcher;Full Professor", "bibtex": "@misc{\nanonymous2024interpretable,\ntitle={Interpretable Decision Tree Search as a Markov Decision Process},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=TKozKEMKiw}\n}", "github": "", "project": "", "reviewers": "E4uJ;mGGd;aAr3;pDp5", "site": "https://openreview.net/forum?id=TKozKEMKiw", "pdf_size": 3978062, "rating": "3;4;5;6", "confidence": "5;4;3;3", "soundness": "3;2;2;3", "novelty": "1;2;2;2", "presentation": "3;2;2;3", "wc_summary": "28;47;100;125", "wc_strengths": "7;21;96;49", "wc_weaknesses": "217;195;56;70", "wc_questions": "1;4;7;44", "wc_limitations": "29;1;36;5", "wc_review": "282;268;295;293", "wc_reply_reviewers": "327;21;93;34", "wc_reply_authors": "550;98;105;21", "reply_reviewers": "3;1;1;1", "reply_authors": "6;3;5;3", "rating_avg": [ 4.5, 1.118033988749895 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 75.0, 39.10882253405234 ], "wc_strengths_avg": [ 43.25, 34.00275724114149 ], "wc_weaknesses_avg": [ 134.5, 72.091955168382 ], "wc_questions_avg": [ 14.0, 17.449928366615147 ], "wc_limitations_avg": [ 17.75, 15.022899187573616 ], "wc_review_avg": [ 284.5, 10.735455276791944 ], "wc_reply_reviewers_avg": [ 118.75, 123.2565921157972 ], "wc_reply_authors_avg": [ 193.5, 208.44723552976183 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 4.25, 1.299038105676658 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.9438798074485388, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3019382098132657194&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "CRISTAL;INRIA;Universit\u00e9 de Lille", "aff_unique_dep": ";;", "aff_unique_url": ";https://www.inria.fr;https://www.univ-lille.fr", "aff_unique_abbr": ";INRIA;UdeL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "France" }, { "title": "Multi-times Monte Carlo Rendering for Inter-reflection Reconstruction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95040", "id": "TLUGoShY30", "proceeding": "", "pdf": "https://openreview.net/pdf?id=TLUGoShY30", "openreview": "https://openreview.net/forum?id=TLUGoShY30", "poster": "/media/PosterPDFs/NeurIPS%202024/95040.png?t=1731150756.949793", "project": "", "author_site": "Tengjie Zhu, Zhuo Chen, Jingnan Gao, Yichao Yan, Xiaokang Yang", "tldr": "", "abstract": "Inverse rendering methods have achieved remarkable performance in reconstructing high-fidelity 3D objects with disentangled geometries, materials, and environmental light. However, they still face huge challenges in reflective surface reconstruction. Although recent methods model the light trace to learn specularity, the ignorance of indirect illumination makes it hard to handle inter-reflections among multiple smooth objects. In this work, we propose Ref-MC2 that introduces the multi-time Monte Carlo sampling which comprehensively computes the environmental illumination and meanwhile considers the reflective light from object surfaces. To address the computation challenge as the times of Monte Carlo sampling grow, we propose a specularity-adaptive sampling strategy, significantly reducing the computational complexity. Besides the computational resource, higher geometry accuracy is also required because geometric errors accumulate multiple times. Therefore, we further introduce a reflection-aware surface model to initialize the geometry and refine it during inverse rendering. We construct a challenging dataset containing scenes with multiple objects and inter-reflections. Experiments show that our method outperforms other inverse rendering methods on various object groups. We also show downstream applications, e.g., relighting and material editing, to illustrate the disentanglement ability of our method.", "keywords": "Inverse rendering;3D reconstruction;Indirect illumination;Physically based rendering;Monte Carlo Rendering;Ray tracing", "primary_area": "machine_vision", "supplementary_material": "/attachment/c50e0ef66fe1062bd3d170528144fe8d1f623f9e.zip", "author": "Zhu Tengjie;Zhuo Chen;Jingnan Gao;Yichao Yan;Xiaokang Yang", "authorids": "~Zhu_Tengjie1;~Zhuo_Chen11;~Jingnan_Gao1;~Yichao_Yan1;~Xiaokang_Yang1", "gender": "M;M;M;M;M", "homepage": "https://github.com/zhutengjie;;https://g-1nonly.github.io;https://daodaofr.github.io/;https://icne.sjtu.edu.cn/info/1064/1078.htm", "dblp": ";;;185/7881;06/3071-1.html", "google_scholar": ";;PyqKZDIAAAAJ;ZPHMMRkAAAAJ;yDEavdMAAAAJ", "orcid": ";;0000-0001-6688-8418;0000-0003-3209-8965;0000-0003-4029-3322", "linkedin": ";;;;", "or_profile": "~Zhu_Tengjie1;~Zhuo_Chen11;~Jingnan_Gao1;~Yichao_Yan1;~Xiaokang_Yang1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Alibaba Group;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;alibaba-inc.com;sjtu.edu.cn;sjtu.edu.cn", "position": "Undergrad student;PhD student;Intern;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\ntengjie2024multitimes,\ntitle={Multi-times Monte Carlo Rendering for Inter-reflection Reconstruction},\nauthor={Zhu Tengjie and Zhuo Chen and Jingnan Gao and Yichao Yan and Xiaokang Yang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=TLUGoShY30}\n}", "github": "", "reviewers": "VkB8;izde;UfeL;9PRG", "pdf_size": 11557629, "rating": "5;6;7;7", "confidence": "4;3;4;4", "soundness": "3;4;3;3", "novelty": "3;3;1;3", "presentation": "2;3;3;3", "wc_summary": "85;70;48;92", "wc_strengths": "20;88;20;103", "wc_weaknesses": "223;37;377;34", "wc_questions": "36;121;4;16", "wc_limitations": "8;9;19;13", "wc_review": "372;325;468;258", "wc_reply_reviewers": "177;29;832;0", "wc_reply_authors": "407;103;955;0", "reply_reviewers": "1;1;3;0", "reply_authors": "3;3;6;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 73.75, 16.857861667483217 ], "wc_strengths_avg": [ 57.75, 38.120696478422325 ], "wc_weaknesses_avg": [ 167.75, 143.0233809557025 ], "wc_questions_avg": [ 44.25, 45.7622934302904 ], "wc_limitations_avg": [ 12.25, 4.322904116447646 ], "wc_review_avg": [ 355.75, 76.42766187709788 ], "wc_reply_reviewers_avg": [ 259.5, 337.2806694727701 ], "wc_reply_authors_avg": [ 366.25, 371.39155550442985 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 3.25, 1.7853571071357126 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15699959344628720485&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 4, "email": "sjtu.edu.cn;sjtu.edu.cn;alibaba-inc.com;sjtu.edu.cn;sjtu.edu.cn", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Shanghai Jiao Tong University;Alibaba Group", "aff_unique_dep": ";", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.alibaba.com", "aff_unique_abbr": "SJTU;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Markov Equivalence and Consistency in Differentiable Structure Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95039", "id": "TMlGQw7EbC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=TMlGQw7EbC", "openreview": "https://openreview.net/forum?id=TMlGQw7EbC", "poster": "", "project": "", "author_site": "Chang Deng, Kevin Bello, Pradeep Ravikumar, Bryon Aragam", "tldr": "", "abstract": "Existing approaches to differentiable structure learning of directed acyclic graphs (DAGs) rely on strong identifiability assumptions in order to guarantee that global minimizers of the acyclicity-constrained optimization problem identifies the true DAG. Moreover, it has been observed empirically that the optimizer may exploit undesirable artifacts in the loss function. We explain and remedy these issues by studying the behavior of differentiable acyclicity-constrained programs under general likelihoods with multiple global minimizers. By carefully regularizing the likelihood, it is possible to identify the sparsest model in the Markov equivalence class, even in the absence of an identifiable parametrization. We first study the Gaussian case in detail, showing how proper regularization of the likelihood defines a score that identifies the sparsest model. Assuming faithfulness, it also recovers the Markov equivalence class. These results are then generalized to general models and likelihoods, where the same claims hold. These theoretical results are validated empirically, showing how this can be done using standard gradient-based optimizers (without resorting to approximations such as Gumbel-Softmax), thus paving the way for differentiable structure learning under general models and losses. Open-source code is available at \\url{https://github.com/duntrain/dagrad}.", "keywords": "structure learning;graphical models;causality;non-convex optimization;regularization", "primary_area": "causal_inference", "supplementary_material": "", "author": "Chang Deng;Kevin Bello;Pradeep Kumar Ravikumar;Bryon Aragam", "authorids": "~Chang_Deng1;~Kevin_Bello1;~Pradeep_Kumar_Ravikumar1;~Bryon_Aragam1", "gender": "M;M;M;", "homepage": "https://duntrain.github.io/;https://www.cs.cmu.edu/~kbello;http://www.cs.cmu.edu/~pradeepr/;http://bryonaragam.com/", "dblp": "16/1003;202/2531;94/3594;140/7564", "google_scholar": "51voxF8AAAAJ;pCS09UsAAAAJ;https://scholar.google.com.tw/citations?user=Q4DTPw4AAAAJ;u-W3_9QAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Chang_Deng1;~Kevin_Bello1;~Pradeep_Kumar_Ravikumar1;~Bryon_Aragam1", "aff": "University of Chicago;University of Chicago;Carnegie Mellon University;Booth School of Business", "aff_domain": "uchicago.edu;uchicago.edu;cmu.edu;chicagobooth.edu", "position": "PhD student;Postdoc;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\ndeng2024markov,\ntitle={Markov Equivalence and Consistency in Differentiable Structure Learning},\nauthor={Chang Deng and Kevin Bello and Pradeep Kumar Ravikumar and Bryon Aragam},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=TMlGQw7EbC}\n}", "github": "", "reviewers": "spWh;mGjz;o9h2", "pdf_size": 921617, "rating": "5;6;7", "confidence": "4;4;4", "soundness": "4;3;4", "novelty": "2;3;3", "presentation": "4;3;3", "wc_summary": "201;95;147", "wc_strengths": "112;45;74", "wc_weaknesses": "735;131;308", "wc_questions": "2;67;133", "wc_limitations": "14;21;84", "wc_review": "1064;359;746", "wc_reply_reviewers": "145;62;192", "wc_reply_authors": "89;27;536", "reply_reviewers": "1;2;2", "reply_authors": "2;2;3", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 147.66666666666666, 43.27688631231329 ], "wc_strengths_avg": [ 77.0, 27.434771124736336 ], "wc_weaknesses_avg": [ 391.3333333333333, 253.52492754932186 ], "wc_questions_avg": [ 67.33333333333333, 53.48104544810453 ], "wc_limitations_avg": [ 39.666666666666664, 31.478387647541428 ], "wc_review_avg": [ 723.0, 288.2741750486852 ], "wc_reply_reviewers_avg": [ 133.0, 53.74631770332426 ], "wc_reply_authors_avg": [ 217.33333333333334, 226.74851463436067 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:pHUXKQrlFVcJ:scholar.google.com/&scioq=Markov+Equivalence+and+Consistency+in+Differentiable+Structure+Learning&hl=en&as_sdt=0,33", "gs_version_total": 2, "email": "uchicago.edu;uchicago.edu;cmu.edu;chicagobooth.edu", "author_num": 4, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "University of Chicago;Carnegie Mellon University;University of Chicago Booth School of Business", "aff_unique_dep": ";;Booth School of Business", "aff_unique_url": "https://www.uchicago.edu;https://www.cmu.edu;https://www.chicagobooth.edu", "aff_unique_abbr": "UChicago;CMU;Booth", "aff_campus_unique_index": "1", "aff_campus_unique": ";Chicago", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Confident Natural Policy Gradient for Local Planning in $q_\\pi$-realizable Constrained MDPs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95038", "id": "TNEmAgwoXR", "proceeding": "", "pdf": "https://openreview.net/pdf?id=TNEmAgwoXR", "openreview": "https://openreview.net/forum?id=TNEmAgwoXR", "poster": "/media/PosterPDFs/NeurIPS%202024/95038.png?t=1731722344.722147", "project": "", "author_site": "Tian Tian, Lin Yang, Csaba Szepesvari", "tldr": "", "abstract": "The constrained Markov decision process (CMDP) framework emerges as an important reinforcement learning approach for imposing safety or other critical objectives while maximizing cumulative reward. However, the current understanding of how to learn efficiently in a CMDP environment with a potentially infinite number of states remains under investigation, particularly when function approximation is applied to the value functions. In this paper, we address the learning problem given linear function approximation with $q_{\\pi}$-realizability, where the value functions of all policies are linearly representable with a known feature map, a setting known to be more general and challenging than other linear settings. Utilizing a local-access model, we propose a novel primal-dual algorithm that, after $\\tilde{O}(\\text{poly}(d) \\epsilon^{-3})$ iterations, outputs with high probability a policy that strictly satisfies the constraints while nearly optimizing the value with respect to a reward function. Here, $d$ is the feature dimension and $\\epsilon > 0$ is a given error. The algorithm relies on a carefully crafted off-policy evaluation procedure to evaluate the policy using historical data, which informs policy updates through policy gradients and conserves samples. To our knowledge, this is the first result achieving polynomial sample complexity for CMDP in the $q_{\\pi}$-realizable setting.", "keywords": "reinforcement learning;constrained MDP;sample complexity;q-pi realizability;local planning", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Tian Tian;Lin Yang;Csaba Szepesvari", "authorids": "~Tian_Tian4;~Lin_Yang12;~Csaba_Szepesvari1", "gender": "F;M;M", "homepage": ";https://sites.ualberta.ca/~szepesva/;http://www.drlinyang.net", "dblp": ";http://dblp.uni-trier.de/pers/hd/s/Szepesv=aacute=ri:Csaba;166/6264", "google_scholar": "tjmUjXkAAAAJ;https://scholar.google.ca/citations?user=zvC19mQAAAAJ;umivlPQAAAAJ", "orcid": ";;", "linkedin": ";csaba-szepesvari-09376b1?trk=hp-identity-name;", "or_profile": "~Tian_Tian4;~Csaba_Szepesvari1;~lin_Yang1", "aff": "University of Alberta;Google DeepMind;University of California, Los Angeles", "aff_domain": "ualberta.ca;google.com;ucla.edu", "position": "PhD student;Research Scientist;Assistant Professor", "bibtex": "@inproceedings{\ntian2024confident,\ntitle={Confident Natural Policy Gradient for Local Planning in \\$q\\_{\\textbackslash}pi\\$-realizable Constrained {MDP}s},\nauthor={Tian Tian and Lin Yang and Csaba Szepesvari},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=TNEmAgwoXR}\n}", "github": "", "reviewers": "EJjQ;KqmD;ynfZ", "pdf_size": 508855, "rating": "4;6;7", "confidence": "2;2;3", "soundness": "3;3;3", "novelty": "2;2;3", "presentation": "2;3;3", "wc_summary": "73;74;294", "wc_strengths": "14;36;116", "wc_weaknesses": "175;38;68", "wc_questions": "5;77;319", "wc_limitations": "7;10;53", "wc_review": "274;235;850", "wc_reply_reviewers": "56;0;228", "wc_reply_authors": "17;0;1272", "reply_reviewers": "1;0;2", "reply_authors": "2;1;4", "rating_avg": [ 5.666666666666667, 1.247219128924647 ], "confidence_avg": [ 2.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 147.0, 103.94549853969947 ], "wc_strengths_avg": [ 55.333333333333336, 43.827946437049604 ], "wc_weaknesses_avg": [ 93.66666666666667, 58.80098260705815 ], "wc_questions_avg": [ 133.66666666666666, 134.3064489226866 ], "wc_limitations_avg": [ 23.333333333333332, 21.01322334996598 ], "wc_review_avg": [ 453.0, 281.1725448901439 ], "wc_reply_reviewers_avg": [ 94.66666666666667, 97.0131720724333 ], "wc_reply_authors_avg": [ 429.6666666666667, 595.6600447309596 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 2.3333333333333335, 1.247219128924647 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.7559289460184545, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:7iQgxoE1F0kJ:scholar.google.com/&scioq=Confident+Natural+Policy+Gradient+for+Local+Planning+in+%24q_%5Cpi%24-realizable+Constrained+MDPs&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "ualberta.ca;google.com;ucla.edu", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Alberta;Google;University of California, Los Angeles", "aff_unique_dep": ";Google DeepMind;", "aff_unique_url": "https://www.ualberta.ca;https://deepmind.com;https://www.ucla.edu", "aff_unique_abbr": "UAlberta;DeepMind;UCLA", "aff_campus_unique_index": "1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;1;2", "aff_country_unique": "Canada;United Kingdom;United States" }, { "title": "Open-Vocabulary Object Detection via Language Hierarchy", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95037", "id": "TNQ0hxh3O1", "proceeding": "", "pdf": "https://openreview.net/pdf?id=TNQ0hxh3O1", "openreview": "https://openreview.net/forum?id=TNQ0hxh3O1", "poster": "", "project": "", "author_site": "Jiaxing Huang, Jingyi Zhang, Kai Jiang, Shijian Lu", "tldr": "", "abstract": "Recent studies on generalizable object detection have attracted increasing attention with additional weak supervision from large-scale datasets with image-level labels.\nHowever, weakly-supervised detection learning often suffers from image-to-box label mismatch, i.e., image-level\nlabels do not convey precise object information.\nWe design Language Hierarchical Self-training (LHST) that introduces language hierarchy into weakly-supervised detector training for learning more generalizable detectors.\nLHST expands the image-level labels with language hierarchy and enables co-regularization between the expanded labels and self-training. Specifically, the expanded labels regularize self-training by providing richer supervision and mitigating the image-to-box label mismatch, while self-training allows assessing and selecting the expanded labels according to the predicted reliability.\nIn addition, we design language hierarchical prompt generation that introduces language hierarchy into prompt generation which helps bridge the vocabulary gaps between training and testing.\nExtensive experiments show that the proposed techniques achieve superior generalization performance consistently across 14 widely studied object detection datasets.", "keywords": "Weakly-supervised object detection;Open-Vocabulary Object Detection;detection foundation model", "primary_area": "machine_vision", "supplementary_material": "", "author": "Jiaxing Huang;Jingyi Zhang;Kai Jiang;Shijian Lu", "authorids": "~Jiaxing_Huang2;~Jingyi_Zhang7;~Kai_Jiang2;~Shijian_Lu1", "gender": "M;M;M;F", "homepage": "https://jxhuang0508.github.io/;;https://personal.ntu.edu.sg/shijian.lu/;", "dblp": "62/6016-1.html;;42/2718;15/91-5", "google_scholar": "czirNcwAAAAJ;QzspFz4AAAAJ;https://scholar.google.com.sg/scholar?hl=en;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0001-9921-2043;;", "linkedin": ";;;jingyi-zhang-6510871b0/", "or_profile": "~Jiaxing_Huang2;~Kai_Jiang2;~Shijian_Lu1;~JINGYI_ZHANG4", "aff": "Nanyang Technological University;Xidian University;Nanyang Technological University;Nanyang Technological University", "aff_domain": "ntu.edu.sg;xidian.edu;ntu.edu.sg;ntu.edu.sg", "position": "Postdoc;PhD student;Associate Professor;PhD student", "bibtex": "@inproceedings{\nhuang2024openvocabulary,\ntitle={Open-Vocabulary Object Detection via Language Hierarchy},\nauthor={Jiaxing Huang and Jingyi Zhang and Kai Jiang and Shijian Lu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=TNQ0hxh3O1}\n}", "github": "", "reviewers": "TMzm;PjbE;VMio", "pdf_size": 48467452, "rating": "4;6;7", "confidence": "3;4;3", "soundness": "2;3;4", "novelty": "2;2;3", "presentation": "3;3;4", "wc_summary": "72;76;90", "wc_strengths": "82;33;58", "wc_weaknesses": "361;215;112", "wc_questions": "109;3;114", "wc_limitations": "9;16;8", "wc_review": "633;343;382", "wc_reply_reviewers": "0;27;56", "wc_reply_authors": "0;63;48", "reply_reviewers": "0;1;1", "reply_authors": "1;2;2", "rating_avg": [ 5.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 79.33333333333333, 7.717224601860151 ], "wc_strengths_avg": [ 57.666666666666664, 20.00555478416488 ], "wc_weaknesses_avg": [ 229.33333333333334, 102.15782996042067 ], "wc_questions_avg": [ 75.33333333333333, 51.188106257432715 ], "wc_limitations_avg": [ 11.0, 3.559026084010437 ], "wc_review_avg": [ 452.6666666666667, 128.5050798822971 ], "wc_reply_reviewers_avg": [ 27.666666666666668, 22.866763848189994 ], "wc_reply_authors_avg": [ 37.0, 26.870057685088806 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.18898223650461363, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18196463123553215556&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "ntu.edu.sg;xidian.edu;ntu.edu.sg;ntu.edu.sg", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Nanyang Technological University;Xidian University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ntu.edu.sg;http://www.xidian.edu.cn/", "aff_unique_abbr": "NTU;Xidian", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "Singapore;China" }, { "title": "One-Step Effective Diffusion Network for Real-World Image Super-Resolution", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95036", "id": "TPtXnpRvur", "proceeding": "", "pdf": "https://openreview.net/pdf?id=TPtXnpRvur", "openreview": "https://openreview.net/forum?id=TPtXnpRvur", "poster": "/media/PosterPDFs/NeurIPS%202024/95036.png?t=1731755817.1855862", "project": "", "author_site": "Rongyuan Wu, Lingchen Sun, Zhiyuan Ma, Lei Zhang", "tldr": "", "abstract": "The pre-trained text-to-image diffusion models have been increasingly employed to tackle the real-world image super-resolution (Real-ISR) problem due to their powerful generative image priors. Most of the existing methods start from random noise to reconstruct the high-quality (HQ) image under the guidance of the given low-quality (LQ) image. While promising results have been achieved, such Real-ISR methods require multiple diffusion steps to reproduce the HQ image, increasing the computational cost. Meanwhile, the random noise introduces uncertainty in the output, which is unfriendly to image restoration tasks. To address these issues, we propose a one-step effective diffusion network, namely OSEDiff, for the Real-ISR problem. \nWe argue that the LQ image contains rich information to restore its HQ counterpart, and hence the given LQ image can be directly taken as the starting point for diffusion, eliminating the uncertainty introduced by random noise sampling. We finetune the pre-trained diffusion network with trainable layers to adapt it to complex image degradations. To ensure that the one-step diffusion model could yield HQ Real-ISR output, we apply variational score distillation in the latent space to conduct KL-divergence regularization. As a result, our OSEDiff model can efficiently and effectively generate HQ images in just one diffusion step. \nOur experiments demonstrate that OSEDiff achieves comparable or even better Real-ISR results, in terms of both objective metrics and subjective evaluations, than previous diffusion model-based Real-ISR methods that require dozens or hundreds of steps. The source codes are released at https://github.com/cswry/OSEDiff.", "keywords": "super-resolution; diffusion model; low-level vision", "primary_area": "machine_vision", "supplementary_material": "", "author": "Rongyuan Wu;Lingchen Sun;Zhiyuan Ma;Lei Zhang", "authorids": "~Rongyuan_Wu1;~Lingchen_Sun1;~Zhiyuan_Ma3;~Lei_Zhang2", "gender": "M;F;M;M", "homepage": ";;;http://www4.comp.polyu.edu.hk/~cslzhang/", "dblp": "300/7476;;;64/5666-6.html", "google_scholar": "A-U8zE8AAAAJ;;F15mLDYAAAAJ;tAK5l1IAAAAJ", "orcid": ";0000-0003-2254-7472;0000-0003-2514-0635;0000-0002-2078-4215", "linkedin": ";;;", "or_profile": "~Rongyuan_Wu1;~Lingchen_Sun1;~Zhiyuan_Ma3;~Lei_Zhang2", "aff": "Hong Kong Polytechnic University;Hong Kong Polytechnic University;Hong Kong Polytechnic University;The Hong Kong Polytechnic University", "aff_domain": "polyu.edu.hk;polyu.edu.hk;polyu.edu.hk;polyu.edu.hk", "position": "PhD student;PhD student;PhD student;Chair Professor", "bibtex": "@inproceedings{\nwu2024onestep,\ntitle={One-Step Effective Diffusion Network for Real-World Image Super-Resolution},\nauthor={Rongyuan Wu and Lingchen Sun and Zhiyuan Ma and Lei Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=TPtXnpRvur}\n}", "github": "", "reviewers": "4vqL;px3F;jXQz;KDhM", "pdf_size": 3090996, "rating": "5;5;6;7", "confidence": "4;5;4;5", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "3;3;3;3", "wc_summary": "95;106;94;45", "wc_strengths": "40;95;62;57", "wc_weaknesses": "36;301;169;133", "wc_questions": "104;130;6;3", "wc_limitations": "5;56;1;7", "wc_review": "280;688;332;245", "wc_reply_reviewers": "64;138;27;183", "wc_reply_authors": "343;390;27;303", "reply_reviewers": "2;1;1;2", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 85.0, 23.569047498785352 ], "wc_strengths_avg": [ 63.5, 19.93113142799475 ], "wc_weaknesses_avg": [ 159.75, 94.95623992134482 ], "wc_questions_avg": [ 60.75, 57.00603038275863 ], "wc_limitations_avg": [ 17.25, 22.47637648732553 ], "wc_review_avg": [ 386.25, 176.94402363459469 ], "wc_reply_reviewers_avg": [ 103.0, 61.077819214506995 ], "wc_reply_authors_avg": [ 265.75, 141.2398226421996 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 45, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1591452323037581218&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "polyu.edu.hk;polyu.edu.hk;polyu.edu.hk;polyu.edu.hk", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Hong Kong Polytechnic University", "aff_unique_dep": "", "aff_unique_url": "https://www.polyu.edu.hk", "aff_unique_abbr": "PolyU", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "MetaCURL: Non-stationary Concave Utility Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95035", "id": "TS09IypR3r", "proceeding": "", "pdf": "https://openreview.net/pdf?id=TS09IypR3r", "openreview": "https://openreview.net/forum?id=TS09IypR3r", "poster": "/media/PosterPDFs/NeurIPS%202024/95035.png?t=1733502833.1369488", "project": "", "author_site": "Bianca Marin Moreno, Margaux Br\u00e9g\u00e8re, Pierre Gaillard, Nadia Oudjane", "tldr": "", "abstract": "We explore online learning in episodic loop-free Markov decision processes on non-stationary environments (changing losses and probability transitions). Our focus is on the Concave Utility Reinforcement Learning problem (CURL), an extension of classical RL for handling convex performance criteria in state-action distributions induced by agent policies. While various machine learning problems can be written as CURL, its non-linearity invalidates traditional Bellman equations. Despite recent solutions to classical CURL, none address non-stationary MDPs. This paper introduces MetaCURL, the first CURL algorithm for non-stationary MDPs. It employs a meta-algorithm running multiple black-box algorithms instances over different intervals, aggregating outputs via a sleeping expert framework. The key hurdle is partial information due to MDP uncertainty. Under partial information on the probability transitions (uncertainty and non-stationarity coming only from external noise, independent of agent state-action pairs), we achieve optimal dynamic regret without prior knowledge of MDP changes. Unlike approaches for RL, MetaCURL handles full adversarial losses, not just stochastic ones. We believe our approach for managing non-stationarity with experts can be of interest to the RL community.", "keywords": "Concave utility reinforcement learning;non-stationary MDPs;learning with expert advice;online learning", "primary_area": "online_learning", "supplementary_material": "", "author": "Bianca Marin Moreno;Margaux Br\u00e9g\u00e8re;Pierre Gaillard;Nadia Oudjane", "authorids": "~Bianca_Marin_Moreno1;~Margaux_Br\u00e9g\u00e8re1;~Pierre_Gaillard1;~Nadia_Oudjane1", "gender": "F;F;M;F", "homepage": "https://sites.google.com/view/bianca-marin-moreno;https://margauxbregere.github.io/;http://pierre.gaillard.me;", "dblp": "321/8706;234/8732;25/2131;72/10152", "google_scholar": "AU_eBlUAAAAJ;https://scholar.google.pt/citations?user=ZicGK4QAAAAJ;https://scholar.google.fr/citations?user=-CPaGaEAAAAJ;", "orcid": ";;0000-0001-6777-6127;", "linkedin": "biancammoreno/;;;", "or_profile": "~Bianca_Marin_Moreno1;~Margaux_Br\u00e9g\u00e8re1;~Pierre_Gaillard1;~Nadia_Oudjane1", "aff": "INRIA;Universit\u00e9 Paris-Sorbonne (Paris IV);INRIA;", "aff_domain": "inria.fr;paris4.sorbonne.fr;inria.fr;", "position": "PhD student;Associate Professor;Researcher;", "bibtex": "@inproceedings{\nmoreno2024metacurl,\ntitle={Meta{CURL}: Non-stationary Concave Utility Reinforcement Learning},\nauthor={Bianca Marin Moreno and Margaux Br{\\'e}g{\\`e}re and Pierre Gaillard and Nadia Oudjane},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=TS09IypR3r}\n}", "github": "", "reviewers": "FWrb;La9P;unHu;55Y9;kKtv;ZTXg", "pdf_size": 512808, "rating": "4;5;6;6;7;7", "confidence": "2;2;3;2;1;3", "soundness": "2;3;3;3;3;3", "novelty": "3;2;2;3;3;3", "presentation": "1;2;3;3;3;2", "wc_summary": "40;43;92;94;87;100", "wc_strengths": "13;16;40;23;113;11", "wc_weaknesses": "27;40;198;74;99;7", "wc_questions": "762;161;513;5;8;347", "wc_limitations": "118;1;13;6;14;10", "wc_review": "960;261;856;202;321;475", "wc_reply_reviewers": "1223;36;182;10;54;497", "wc_reply_authors": "1675;0;391;0;185;354", "reply_reviewers": "3;1;1;1;1;2", "reply_authors": "4;1;2;1;2;3", "rating_avg": [ 5.833333333333333, 1.0671873729054746 ], "confidence_avg": [ 2.1666666666666665, 0.6871842709362768 ], "soundness_avg": [ 2.8333333333333335, 0.3726779962499649 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.7453559924999298 ], "wc_summary_avg": [ 76.0, 24.704925284917042 ], "wc_strengths_avg": [ 36.0, 35.749125863438955 ], "wc_weaknesses_avg": [ 74.16666666666667, 63.05398921206775 ], "wc_questions_avg": [ 299.3333333333333, 274.6601940984937 ], "wc_limitations_avg": [ 27.0, 40.930835971591556 ], "wc_review_avg": [ 512.5, 293.25458223188946 ], "wc_reply_reviewers_avg": [ 333.6666666666667, 430.53713222232324 ], "wc_reply_authors_avg": [ 434.1666666666667, 575.4770243507169 ], "reply_reviewers_avg": [ 1.5, 0.7637626158259734 ], "reply_authors_avg": [ 2.1666666666666665, 1.0671873729054748 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.037877700953928506, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:q9yL0_lu5v4J:scholar.google.com/&scioq=MetaCURL:+Non-stationary+Concave+Utility+Reinforcement+Learning&hl=en&as_sdt=0,5", "gs_version_total": 9, "email": "inria.fr;paris4.sorbonne.fr;inria.fr;", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "INRIA;Universit\u00e9 Paris-Sorbonne", "aff_unique_dep": ";", "aff_unique_url": "https://www.inria.fr;https://www.univ-paris-sorbonne.fr", "aff_unique_abbr": "INRIA;Paris IV", "aff_campus_unique_index": "1", "aff_campus_unique": ";Paris", "aff_country_unique_index": "0;0;0", "aff_country_unique": "France" }, { "title": "SGD vs GD: Rank Deficiency in Linear Networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95034", "id": "TSaieShX3j", "proceeding": "", "pdf": "https://openreview.net/pdf?id=TSaieShX3j", "openreview": "https://openreview.net/forum?id=TSaieShX3j", "poster": "", "project": "", "author_site": "Aditya Vardhan Varre, Margarita Sagitova, Nicolas Flammarion", "tldr": "", "abstract": "In this article, we study the behaviour of continuous-time gradient methods on a two-layer linear network with square loss. A dichotomy between SGD and GD is revealed: GD preserves the rank at initialization while (label noise) SGD diminishes the rank regardless of the initialization. We demonstrate this rank deficiency by studying the time evolution of the *determinant* of a matrix of parameters. To further understand this phenomenon, we derive the stochastic differential equation (SDE) governing the eigenvalues of the parameter matrix. This SDE unveils a *replusive force* between the eigenvalues: a key regularization mechanism which induces rank deficiency. Our results are well supported by experiments illustrating the phenomenon beyond linear networks and regression tasks.", "keywords": "SGD dynamics;label noise;implicit bias;low rank", "primary_area": "optimization_for_deep_networks", "supplementary_material": "/attachment/6c9d7d9f64e2c615fc757afbd75ba2816aad4efb.zip", "author": "Aditya Varre;Margarita Sagitova;Nicolas Flammarion", "authorids": "~Aditya_Varre1;~Margarita_Sagitova1;~Nicolas_Flammarion1", "gender": ";M;M", "homepage": ";;", "dblp": ";164/7417;224/6338", "google_scholar": ";;", "orcid": ";;", "linkedin": "margarita-sagitova-542957253/;;", "or_profile": "~Margarita_Sagitova1;~Nicolas_Flammarion1;~Aditya_Vardhan_Varre1", "aff": "School of Computer and Communication Sciences, EPFL - EPF Lausanne;Swiss Federal Institute of Technology Lausanne;EPFL - EPF Lausanne", "aff_domain": "ic.epfl.ch;epfl.ch;epfl.ch", "position": "MS student;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nvarre2024sgd,\ntitle={{SGD} vs {GD}: Rank Deficiency in Linear Networks},\nauthor={Aditya Varre and Margarita Sagitova and Nicolas Flammarion},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=TSaieShX3j}\n}", "github": "", "reviewers": "kpcH;cAQT;V8cQ;Kp9L", "pdf_size": 16053710, "rating": "6;6;7;8", "confidence": "3;3;3;3", "soundness": "3;2;3;4", "novelty": "2;3;3;4", "presentation": "2;3;4;4", "wc_summary": "99;78;104;152", "wc_strengths": "82;27;47;349", "wc_weaknesses": "309;102;61;73", "wc_questions": "18;70;99;223", "wc_limitations": "8;5;28;1", "wc_review": "516;282;339;798", "wc_reply_reviewers": "42;4;0;46", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 108.25, 27.07743525520835 ], "wc_strengths_avg": [ 126.25, 130.10260374027877 ], "wc_weaknesses_avg": [ 136.25, 100.84486848620509 ], "wc_questions_avg": [ 102.5, 75.38070044779367 ], "wc_limitations_avg": [ 10.5, 10.404326023342406 ], "wc_review_avg": [ 483.75, 200.90342829329717 ], "wc_reply_reviewers_avg": [ 23.0, 21.095023109728988 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ZgnFEDDNMlIJ:scholar.google.com/&scioq=SGD+vs+GD:+Rank+Deficiency+in+Linear+Networks&hl=en&as_sdt=0,33", "gs_version_total": 2, "email": "ic.epfl.ch;epfl.ch;epfl.ch", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "EPFL;Swiss Federal Institute of Technology Lausanne", "aff_unique_dep": "School of Computer and Communication Sciences;", "aff_unique_url": "https://www.epfl.ch;https://www.epfl.ch", "aff_unique_abbr": "EPFL;EPFL", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Lausanne", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Switzerland" }, { "title": "On the Identifiability of Poisson Branching Structural Causal Model Using Probability Generating Function", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95033", "id": "TUwWBLjFk9", "proceeding": "", "pdf": "https://openreview.net/pdf?id=TUwWBLjFk9", "openreview": "https://openreview.net/forum?id=TUwWBLjFk9", "poster": "", "project": "", "author_site": "Yu Xiang, Jie Qiao, Zefeng Liang, Zihuai Zeng, Ruichu Cai, Zhifeng Hao", "tldr": "", "abstract": "Causal discovery from observational data, especially for count data, is essential across scientific and industrial contexts, such as biology, economics, and network operation maintenance. For this task, most approaches model count data using Bayesian networks or ordinal relations. However, they overlook the inherent branching structures that are frequently encountered, e.g., a browsing event might trigger an adding cart or purchasing event. This can be modeled by a binomial thinning operator (for branching) and an additive independent Poisson distribution (for noising), known as Poisson Branching Structure Causal Model (PB-SCM). There is a provably sound cumulant-based causal discovery method that allows the identification of the causal structure under a branching structure. However, we show that there still remains a gap in that there exist causal directions that are identifiable while the algorithm fails to identify them. In this work, we address this gap by exploring the identifiability of PB-SCM using the Probability Generating Function (PGF). By developing a compact and exact closed-form solution for the PGF of PB-SCM, we demonstrate that each component in this closed-form solution uniquely encodes a specific local structure, enabling the identification of the local structures by testing their corresponding component appearances in the PGF. Building on this, we propose a practical algorithm for learning causal skeletons and identifying causal directions of PB-SCM using PGF. The effectiveness of our method is demonstrated through experiments on both synthetic and real datasets.", "keywords": "Causal Discovery", "primary_area": "causal_inference", "supplementary_material": "", "author": "Yu Xiang;Jie Qiao;Zefeng Liang;Zihuai Zeng;Ruichu Cai;Zhifeng Hao", "authorids": "~Yu_Xiang9;~Jie_Qiao1;~Zefeng_Liang1;~Zihuai_Zeng1;~Ruichu_Cai1;~Zhifeng_Hao4", "gender": "M;M;M;M;M;M", "homepage": ";;https://github.com/LZFeng011021;https://github.com/zzihuai;https://ruichucai.github.io/;https://www.stu.edu.cn/xxgk/dzld1/hzf.htm", "dblp": ";00/7723;;;09/6889;", "google_scholar": "bdK4ZdYAAAAJ;aCEp7f4AAAAJ;;;https://scholar.google.com/citations?hl=en;ZF3gp9wAAAAJ", "orcid": ";0000-0002-4581-9656;;;;", "linkedin": ";;;;;", "or_profile": "~Yu_Xiang9;~Jie_Qiao1;~Zefeng_Liang1;~Zihuai_Zeng1;~Ruichu_Cai1;~Zhifeng_Hao4", "aff": "Guangdong University of Technology;Guangdong University of Technology;Guangdong University of Technology;Guangdong University of Technology;Guangdong University of Technology;Shantou University", "aff_domain": "gdut.edu.cn;gdut.edu.cn;gdut.edu.cn;gdut.edu.cn;gdut.edu.cn;stu.edu.cn", "position": "MS student;Postdoc;MS student;Undergrad student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nxiang2024on,\ntitle={On the Identifiability of Poisson Branching Structural Causal Model Using Probability Generating Function},\nauthor={Yu Xiang and Jie Qiao and Zefeng Liang and Zihuai Zeng and Ruichu Cai and Zhifeng Hao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=TUwWBLjFk9}\n}", "github": "", "reviewers": "CtKZ;nGrv;w346;Gqam", "pdf_size": 883351, "rating": "6;7;7;8", "confidence": "5;4;5;4", "soundness": "4;4;3;4", "novelty": "3;4;3;4", "presentation": "3;3;3;4", "wc_summary": "62;70;55;60", "wc_strengths": "70;78;84;94", "wc_weaknesses": "79;87;126;149", "wc_questions": "86;1;2;21", "wc_limitations": "18;1;2;18", "wc_review": "315;237;269;342", "wc_reply_reviewers": "13;15;0;19", "wc_reply_authors": "27;28;0;38", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;1;2", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 61.75, 5.402545696243577 ], "wc_strengths_avg": [ 81.5, 8.760707733967616 ], "wc_weaknesses_avg": [ 110.25, 28.577744837547975 ], "wc_questions_avg": [ 27.5, 34.70230539892127 ], "wc_limitations_avg": [ 9.75, 8.257572282456872 ], "wc_review_avg": [ 290.75, 40.54858197273981 ], "wc_reply_reviewers_avg": [ 11.75, 7.119515432949071 ], "wc_reply_authors_avg": [ 23.25, 14.095655359010449 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.7071067811865476, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:J8vmzgupaWUJ:scholar.google.com/&scioq=On+the+Identifiability+of+Poisson+Branching+Structural+Causal+Model+Using+Probability+Generating+Function&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "gdut.edu.cn;gdut.edu.cn;gdut.edu.cn;gdut.edu.cn;gdut.edu.cn;stu.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;0;1", "aff_unique_norm": "Guangdong University of Technology;Shantou University", "aff_unique_dep": ";", "aff_unique_url": "http://www.gdut.edu.cn;https://www.stu.edu.cn", "aff_unique_abbr": "GDUT;STU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Trade-Offs of Diagonal Fisher Information Matrix Estimators", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95032", "id": "TVbCKAqoD8", "proceeding": "", "pdf": "https://openreview.net/pdf?id=TVbCKAqoD8", "openreview": "https://openreview.net/forum?id=TVbCKAqoD8", "poster": "/media/PosterPDFs/NeurIPS%202024/95032.png?t=1733474225.4471786", "project": "", "author_site": "Alexander Soen, Ke Sun", "tldr": "", "abstract": "The Fisher information matrix can be used to characterize the local geometry of\nthe parameter space of neural networks. It elucidates insightful theories and\nuseful tools to understand and optimize neural networks. Given its high\ncomputational cost, practitioners often use random estimators and evaluate only\nthe diagonal entries. We examine two popular estimators whose accuracy and sample\ncomplexity depend on their associated variances. We derive bounds of the\nvariances and instantiate them in neural networks for regression and\nclassification. We navigate trade-offs for both estimators based on analytical\nand numerical studies. We find that the variance quantities depend on the\nnon-linearity w.r.t. different parameter groups and should not be neglected when\nestimating the Fisher information.", "keywords": "Fisher Information;Deep Learning;Information Geometry;Neuromanifold", "primary_area": "learning_theory", "supplementary_material": "", "author": "Alexander Soen;Ke Sun", "authorids": "~Alexander_Soen1;~Ke_Sun1", "gender": "M;M", "homepage": "https://alexandersoen.github.io/;https://courbure.com", "dblp": "245/9661.html;69/476-1", "google_scholar": "apRX4awAAAAJ;n6AIt34AAAAJ", "orcid": ";0000-0001-6263-7355", "linkedin": ";courbure", "or_profile": "~Alexander_Soen1;~Ke_Sun1", "aff": "Australian National University;Australian National University", "aff_domain": "anu.edu.au;anu.edu.au", "position": "PhD student;Honorary Senior Lecturer", "bibtex": "@inproceedings{\nsoen2024tradeoffs,\ntitle={Trade-Offs of Diagonal Fisher Information Matrix Estimators},\nauthor={Alexander Soen and Ke Sun},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=TVbCKAqoD8}\n}", "github": "", "reviewers": "QMbw;wDHQ;7Xmq;qMTe", "pdf_size": 1093169, "rating": "5;5;6;8", "confidence": "3;1;2;5", "soundness": "4;2;3;4", "novelty": "2;2;2;3", "presentation": "3;2;2;3", "wc_summary": "39;64;388;81", "wc_strengths": "34;111;2;81", "wc_weaknesses": "88;267;2;199", "wc_questions": "101;44;2;97", "wc_limitations": "10;12;2;58", "wc_review": "272;498;396;516", "wc_reply_reviewers": "14;27;15;44", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 2.75, 1.479019945774904 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 143.0, 142.23747748044465 ], "wc_strengths_avg": [ 57.0, 41.970227542866624 ], "wc_weaknesses_avg": [ 139.0, 101.67841462178686 ], "wc_questions_avg": [ 61.0, 40.82278775390039 ], "wc_limitations_avg": [ 20.5, 21.97157254271983 ], "wc_review_avg": [ 420.5, 97.18410363840374 ], "wc_reply_reviewers_avg": [ 25.0, 12.103718436910205 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.828078671210825, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8467479815816954010&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "anu.edu.au;anu.edu.au", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Australian National University", "aff_unique_dep": "", "aff_unique_url": "https://www.anu.edu.au", "aff_unique_abbr": "ANU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Australia" }, { "title": "Subject-driven Text-to-Image Generation via Preference-based Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95031", "id": "TWeVQ5meMW", "proceeding": "", "pdf": "https://openreview.net/pdf?id=TWeVQ5meMW", "openreview": "https://openreview.net/forum?id=TWeVQ5meMW", "poster": "/media/PosterPDFs/NeurIPS%202024/95031.png?t=1731971057.2975147", "project": "", "author_site": "Yanting Miao, William Loh, Suraj Kothawade, Pascal Poupart, Abdullah Rashwan, Yeqing Li", "tldr": "", "abstract": "Text-to-image generative models have recently attracted considerable interest, enabling the synthesis of high-quality images from textual prompts. However, these models often lack the capability to generate specific subjects from given reference images or to synthesize novel renditions under varying conditions. Methods like DreamBooth and Subject-driven Text-to-Image (SuTI) have made significant progress in this area. Yet, both approaches primarily focus on enhancing similarity to reference images and require expensive setups, often overlooking the need for efficient training and avoiding overfitting to the reference images. In this work, we present the $\\lambda$-Harmonic reward function, which provides a reliable reward signal and enables early stopping for faster training and effective regularization. By combining the Bradley-Terry preference model, the $\\lambda$-Harmonic reward function also provides preference labels for subject-driven generation tasks. We propose Reward Preference Optimization (RPO), which offers a simpler setup (requiring only 3\\% of the negative samples used by DreamBooth) and fewer gradient steps for fine-tuning. Unlike most existing methods, our approach does not require training a text encoder or optimizing text embeddings and achieves text-image alignment by fine-tuning only the U-Net component. Empirically, $\\lambda$-Harmonic proves to be a reliable approach for model selection in subject-driven generation tasks. Based on preference labels and early stopping validation from the $\\lambda$-Harmonic reward function, our algorithm achieves a state-of-the-art CLIP-I score of 0.833 and a CLIP-T score of 0.314 on DreamBench.", "keywords": "Diffusion Models;Text-to-Image generation;Reinforcement Learning", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/d5e27f90df33df68697488351cab4144a4c2c833.zip", "author": "Yanting Miao;William Loh;Suraj Kothawade;Pascal Poupart;Abdullah Rashwan;Yeqing Li", "authorids": "~Yanting_Miao1;~William_Loh1;~Suraj_Kothawade2;~Pascal_Poupart2;~Abdullah_Rashwan1;~Yeqing_Li2", "gender": "M;M;M;M;M;M", "homepage": "https://scholar.google.com/citations?view_op=list_works&hl=en&user=5LHzXc4AAAAJ&gmla=AHoSzlXhmaPVWyX38n68u1liFi5P_S7c0mNO5i5eT6RU3QBK2P-r97m2WkLwh3FXnuWhoTAH8B-M_DBiCsguZP9ElhsN4JZSeN4Ks0vYG4c;;https://cs.uwaterloo.ca/~ppoupart;;;http://surajk.me", "dblp": ";;26/2122;;122/3142;220/3896", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;;https://scholar.google.ca/citations?user=KhAJWroAAAAJ;;2Dymb8oAAAAJ;", "orcid": ";;;;;", "linkedin": ";wei-min-loh/;;abdullah-rashwan-84655245/;yeqing-li-24016036/;suraj-kothawade-6835b5a9/", "or_profile": "~Yanting_Miao1;~William_Loh1;~Pascal_Poupart2;~Abdullah_Rashwan1;~Yeqing_Li2;~Suraj_Nandkishor_Kothawade2", "aff": "University of Waterloo;University of Waterloo;University of Waterloo;Google;Google;Google", "aff_domain": "uwaterloo.ca;uwaterloo.ca;uwaterloo.ca;google.com;google.com;google.com", "position": "PhD student;PhD student;Full Professor;Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\nmiao2024subjectdriven,\ntitle={Subject-driven Text-to-Image Generation via Preference-based Reinforcement Learning},\nauthor={Yanting Miao and William Loh and Suraj Kothawade and Pascal Poupart and Abdullah Rashwan and Yeqing Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=TWeVQ5meMW}\n}", "github": "", "reviewers": "u7k9;5xjH;BTXt;tGfs;Kj9e", "pdf_size": 2560783, "rating": "5;5;5;5;6", "confidence": "3;3;5;4;4", "soundness": "3;3;3;3;3", "novelty": "3;3;3;3;3", "presentation": "3;3;3;2;3", "wc_summary": "31;73;35;31;80", "wc_strengths": "25;121;28;28;25", "wc_weaknesses": "67;66;180;37;206", "wc_questions": "17;20;3;24;39", "wc_limitations": "1;1;1;1;6", "wc_review": "141;281;247;121;356", "wc_reply_reviewers": "0;31;45;0;40", "wc_reply_authors": "0;0;30;0;38", "reply_reviewers": "0;1;1;0;1", "reply_authors": "1;1;2;1;2", "rating_avg": [ 5.2, 0.39999999999999997 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 50.0, 21.79908254950194 ], "wc_strengths_avg": [ 45.4, 37.82380202993877 ], "wc_weaknesses_avg": [ 111.2, 68.15100879664217 ], "wc_questions_avg": [ 20.6, 11.60344776348823 ], "wc_limitations_avg": [ 2.0, 2.0 ], "wc_review_avg": [ 229.2, 87.8234592805362 ], "wc_reply_reviewers_avg": [ 23.2, 19.466894975830122 ], "wc_reply_authors_avg": [ 13.6, 16.84755175092215 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.13363062095621223, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14413153526303862192&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "uwaterloo.ca;uwaterloo.ca;uwaterloo.ca;google.com;google.com;google.com", "author_num": 6, "aff_unique_index": "0;0;0;1;1;1", "aff_unique_norm": "University of Waterloo;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://uwaterloo.ca;https://www.google.com", "aff_unique_abbr": "UW;Google", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;1;1;1", "aff_country_unique": "Canada;United States" }, { "id": "TWfNFCOPaK", "title": "Probabilistic and Differentiable Wireless Simulation with Geometric Transformers", "track": "main", "status": "Reject", "tldr": "", "abstract": "Modelling the propagation of electromagnetic signals is critical for designing modern communication systems. While there are precise simulators based on ray tracing, they do not lend themselves to solving inverse problems or the integration in an automated design loop. We propose to address these challenges through differentiable neural surrogates that exploit the geometric aspects of the problem. We first introduce the Wireless Geometric Algebra Transformer (Wi-GATr), a generic backbone architecture for simulating wireless propagation in a 3D environment. It uses versatile representations based on geometric algebra and is equivariant with respect to E(3), the symmetry group of the underlying physics. Second, we study two algorithmic approaches to signal prediction and inverse problems based on differentiable predictive modelling and diffusion models. We show how these let us predict received power, localize transmitters, and reconstruct the 3D environment from the received signal. Finally, we introduce two large, geometry-focused datasets of wireless signal propagation in indoor scenes. In experiments, we show that our geometry-forward approach achieves higher-fidelity predictions with less\ndata than various baselines.", "keywords": "Wireless communication;electromagnetic signals;inverse problems;learning to simulate;inverse problems;geometric deep learning;equivariance;diffusion models", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Thomas Hehn;Markus Peschl;Tribhuvanesh Orekondy;Arash Behboodi;Johann Brehmer", "authorids": "~Thomas_Hehn1;~Markus_Peschl1;~Tribhuvanesh_Orekondy1;~Arash_Behboodi1;~Johann_Brehmer1", "gender": ";M;;M;M", "homepage": ";https://mlpeschl.com;https://tribhuvanesh.github.io/;https://arashbehboodi.github.io/;https://johannbrehmer.github.io", "dblp": ";298/3144;198/0668;97/7718;220/5763", "google_scholar": "GBRDLBgAAAAJ;hinquVUAAAAJ;https://scholar.google.ch/citations?user=sbsFpScAAAAJ;;ZdUMvCsAAAAJ", "orcid": ";;;;0000-0003-3344-4209", "linkedin": ";;tribhuvanesh;;johannbrehmer", "or_profile": "~Thomas_Hehn1;~Markus_Peschl1;~Tribhuvanesh_Orekondy1;~Arash_Behboodi1;~Johann_Brehmer1", "aff": "Qualcomm Inc, QualComm;Qualcomm Inc, QualComm;Qualcomm Inc, QualComm;QualComm;Qualcomm AI Research", "aff_domain": "qti.qualcomm.com;qti.qualcomm.com;qti.qualcomm.com;qualcomm.com;qualcomm.com", "position": "Researcher;Researcher;Researcher;Machine Learning Researcher;Researcher", "bibtex": "@misc{\nanonymous2024probabilistic,\ntitle={Probabilistic and Differentiable Wireless Simulation with Geometric Transformers},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=TWfNFCOPaK}\n}", "github": "", "project": "", "reviewers": "2AZ5;6Syy;LEve;yJCq;3vGS", "site": "https://openreview.net/forum?id=TWfNFCOPaK", "pdf_size": 2409416, "rating": "3;4;4;5;5", "confidence": "4;4;3;3;4", "soundness": "2;3;2;2;3", "novelty": "2;3;3;2;2", "presentation": "2;3;3;3;2", "wc_summary": "113;56;88;79;250", "wc_strengths": "33;56;125;51;65", "wc_weaknesses": "275;233;151;159;131", "wc_questions": "73;33;152;171;76", "wc_limitations": "34;78;9;2;10", "wc_review": "528;456;525;462;532", "wc_reply_reviewers": "472;112;71;72;51", "wc_reply_authors": "435;342;94;76;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;2;1", "rating_avg": [ 4.2, 0.7483314773547882 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 117.2, 68.86334293366828 ], "wc_strengths_avg": [ 66.0, 31.292171544972714 ], "wc_weaknesses_avg": [ 189.8, 54.83940189316437 ], "wc_questions_avg": [ 101.0, 52.026916110797885 ], "wc_limitations_avg": [ 26.6, 27.88261106854952 ], "wc_review_avg": [ 500.6, 34.09164120426003 ], "wc_reply_reviewers_avg": [ 155.6, 159.43475154432298 ], "wc_reply_authors_avg": [ 189.4, 168.18989268086239 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.3273268353539886, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:mwaj1RbUM30J:scholar.google.com/&scioq=Probabilistic+and+Differentiable+Wireless+Simulation+with+Geometric+Transformers&hl=en&as_sdt=0,23", "gs_version_total": 2, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Qualcomm Incorporated;Qualcomm", "aff_unique_dep": ";Qualcomm AI Research", "aff_unique_url": "https://www.qualcomm.com;https://www.qualcomm.com/research", "aff_unique_abbr": "Qualcomm;QAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "What type of inference is planning?", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95030", "id": "TXsRGrzICz", "proceeding": "", "pdf": "https://openreview.net/pdf?id=TXsRGrzICz", "openreview": "https://openreview.net/forum?id=TXsRGrzICz", "poster": "/media/PosterPDFs/NeurIPS%202024/95030.png?t=1731727606.572169", "project": "", "author_site": "Miguel Lazaro-Gredilla, Li Ku, Kevin Murphy, Dileep George", "tldr": "", "abstract": "Multiple types of inference are available for probabilistic graphical models, e.g., marginal, maximum-a-posteriori, and even marginal maximum-a-posteriori. Which one do researchers mean when they talk about ``planning as inference''? There is no consistency in the literature, different types are used, and their ability to do planning is further entangled with specific approximations or additional constraints. In this work we use the variational framework to show that, just like all commonly used types of inference correspond to different weightings of the entropy terms in the variational problem, planning corresponds _exactly_ to a _different_ set of weights. This means that all the tricks of variational inference are readily applicable to planning. We develop an analogue of loopy belief propagation that allows us to perform approximate planning in factored-state Markov decisions processes without incurring intractability due to the exponentially large state space. The variational perspective shows that the previous types of inference for planning are only adequate in environments with low stochasticity, and allows us to characterize each type by its own merits, disentangling the type of inference from the additional approximations that its practical use requires. We validate these results empirically on synthetic MDPs and tasks posed in the International Planning Competition.", "keywords": "planning;variational inference;belief propagation;message passing", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Miguel Lazaro-Gredilla;Li Yang Ku;Kevin Patrick Murphy;Dileep George", "authorids": "~Miguel_Lazaro-Gredilla1;~Li_Yang_Ku1;~Kevin_Patrick_Murphy1;~Dileep_George1", "gender": "M;M;;", "homepage": ";https://www.liyangku.com/;https://www.cs.ubc.ca/~murphyk/;", "dblp": "77/4660;160/3626;26/2599;", "google_scholar": "SFjDQk8AAAAJ;https://scholar.google.com/citations?hl=en;MxxZkEcAAAAJ;", "orcid": ";;;", "linkedin": "miguel-lazaro-g/;;;", "or_profile": "~Miguel_Lazaro-Gredilla1;~Li_Yang_Ku1;~Kevin_Patrick_Murphy1;~Dileep_George1", "aff": "Google Deepmind;Google;Google;Vicarious AI", "aff_domain": "google.com;deepmind.com;google.com;vicarious.com", "position": "Research Scientist;Researcher;Principal Researcher;Co-founder", "bibtex": "@inproceedings{\nlazaro-gredilla2024what,\ntitle={What type of inference is planning?},\nauthor={Miguel Lazaro-Gredilla and Li Yang Ku and Kevin Patrick Murphy and Dileep George},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=TXsRGrzICz}\n}", "github": "", "reviewers": "us9u;vctT;QvAf;qHG5;ppF9", "pdf_size": 4326792, "rating": "6;6;6;7;8", "confidence": "4;3;2;4;5", "soundness": "4;3;3;3;4", "novelty": "3;3;3;4;4", "presentation": "4;2;2;3;3", "wc_summary": "104;54;85;215;79", "wc_strengths": "77;14;54;243;196", "wc_weaknesses": "53;386;59;408;274", "wc_questions": "9;431;338;447;75", "wc_limitations": "8;1;11;143;19", "wc_review": "251;886;547;1456;643", "wc_reply_reviewers": "13;35;14;240;13", "wc_reply_authors": "0;674;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;2;1;1;1", "rating_avg": [ 6.6, 0.8 ], "confidence_avg": [ 3.6, 1.019803902718557 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 3.4, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 107.4, 56.123435390218226 ], "wc_strengths_avg": [ 116.8, 87.51548434420049 ], "wc_weaknesses_avg": [ 236.0, 153.84797691227533 ], "wc_questions_avg": [ 260.0, 183.04097901836082 ], "wc_limitations_avg": [ 36.4, 53.61193896885282 ], "wc_review_avg": [ 756.6, 404.5672255633172 ], "wc_reply_reviewers_avg": [ 63.0, 88.89769400833747 ], "wc_reply_authors_avg": [ 134.8, 269.6 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.7844645405527362, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15452884547280960801&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "google.com;deepmind.com;google.com;vicarious.com", "author_num": 4, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "DeepMind;Google;Vicarious AI", "aff_unique_dep": "DeepMind;Google;", "aff_unique_url": "https://deepmind.com;https://www.google.com;https://www.vicarious.com", "aff_unique_abbr": "DeepMind;Google;Vicarious AI", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "A two-scale Complexity Measure for Deep Learning Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95029", "id": "TY9VoSZZIA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=TY9VoSZZIA", "openreview": "https://openreview.net/forum?id=TY9VoSZZIA", "poster": "/media/PosterPDFs/NeurIPS%202024/95029.png?t=1731314337.1465924", "project": "", "author_site": "Massimiliano Datres, Gian Leonardi, Alessio Figalli, David Sutter", "tldr": "", "abstract": "We introduce a novel capacity measure 2sED for statistical models based on the effective dimension. The new quantity provably bounds the generalization error under mild assumptions on the model. Furthermore, simulations on standard data sets and popular model architectures show that 2sED correlates well with the training error. For Markovian models, we show how to efficiently approximate 2sED from below through a layerwise iterative approach, which allows us to tackle deep learning models with a large number of parameters. Simulation results suggest that the approximation is good for different prominent models and data sets.", "keywords": "Statistical learning theory;complexity measures;covering number estimates;Fisher metric;Markovian models.", "primary_area": "learning_theory", "supplementary_material": "", "author": "Massimiliano Datres;Gian Paolo Leonardi;Alessio Figalli;David Sutter", "authorids": "~Massimiliano_Datres1;~Gian_Paolo_Leonardi1;~Alessio_Figalli2;~David_Sutter1", "gender": ";;;M", "homepage": ";;https://people.math.ethz.ch/~afigalli/;https://sites.google.com/site/suttedav/", "dblp": ";;;", "google_scholar": ";;;https://scholar.google.ch/citations?user=RzaBjEQAAAAJ", "orcid": ";;;", "linkedin": "https://linkedin.com/in/massimiliano-datres-6307851b7;;;", "or_profile": "~Massimiliano_Datres1;~Gian_Paolo_Leonardi1;~Alessio_Figalli2;~David_Sutter1", "aff": "University of Trento;;ETHZ - ETH Zurich;International Business Machines", "aff_domain": "unitn.it;;ethz.ch;ibm.com", "position": "PhD student;;Full Professor;Researcher", "bibtex": "@inproceedings{\ndatres2024a,\ntitle={A two-scale Complexity Measure for Deep Learning Models},\nauthor={Massimiliano Datres and Gian Paolo Leonardi and Alessio Figalli and David Sutter},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=TY9VoSZZIA}\n}", "github": "", "reviewers": "5Uzy;MhtR;iesa;aPDv", "pdf_size": 748786, "rating": "5;6;6;7", "confidence": "4;3;1;3", "soundness": "2;3;3;4", "novelty": "3;3;3;3", "presentation": "3;3;3;4", "wc_summary": "59;48;41;111", "wc_strengths": "32;14;48;110", "wc_weaknesses": "65;130;33;74", "wc_questions": "78;33;16;267", "wc_limitations": "29;22;17;20", "wc_review": "263;247;155;582", "wc_reply_reviewers": "40;15;12;36", "wc_reply_authors": "0;4;0;2", "reply_reviewers": "1;1;1;1", "reply_authors": "1;2;1;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 2.75, 1.0897247358851685 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 64.75, 27.462474396892937 ], "wc_strengths_avg": [ 51.0, 36.124783736376884 ], "wc_weaknesses_avg": [ 75.5, 34.96069221282668 ], "wc_questions_avg": [ 98.5, 99.88618523099179 ], "wc_limitations_avg": [ 22.0, 4.415880433163924 ], "wc_review_avg": [ 311.75, 161.38056729358712 ], "wc_reply_reviewers_avg": [ 25.75, 12.376893794486563 ], "wc_reply_authors_avg": [ 1.5, 1.6583123951777 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.3244428422615251, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1177710932782524224&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "unitn.it;;ethz.ch;ibm.com", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Trento;ETH Zurich;International Business Machines Corporation", "aff_unique_dep": ";;", "aff_unique_url": "https://www.unitn.it;https://www.ethz.ch;https://www.ibm.com", "aff_unique_abbr": "UniTN;ETHZ;IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2", "aff_country_unique": "Italy;Switzerland;United States" }, { "title": "How Do Large Language Models Acquire Factual Knowledge During Pretraining?", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95028", "id": "TYdzj1EvBP", "proceeding": "", "pdf": "https://openreview.net/pdf?id=TYdzj1EvBP", "openreview": "https://openreview.net/forum?id=TYdzj1EvBP", "poster": "", "project": "", "author_site": "Hoyeon Chang, Jinho Park, Seonghyeon Ye, Sohee Yang, Youngkyung Seo, Du-Seong Chang, Minjoon Seo", "tldr": "", "abstract": "Despite the recent observation that large language models (LLMs) can store substantial factual knowledge, there is a limited understanding of the mechanisms of how they acquire factual knowledge through pretraining. This work addresses this gap by studying how LLMs acquire factual knowledge during pretraining. The findings reveal several important insights into the dynamics of factual knowledge acquisition during pretraining. First, counterintuitively, we observe that pretraining on more data shows no significant improvement in the model's capability to acquire and maintain factual knowledge. Next, LLMs undergo forgetting of memorization and generalization of factual knowledge, and LLMs trained with duplicated training data exhibit faster forgetting. Third, training LLMs with larger batch sizes can enhance the models' robustness to forgetting. Overall, our observations suggest that factual knowledge acquisition in LLM pretraining occurs by progressively increasing the probability of factual knowledge presented in the pretraining data at each step. However, this increase is diluted by subsequent forgetting. Based on this interpretation, we demonstrate that we can provide plausible explanations on recently observed behaviors of LLMs, such as the poor performance of LLMs on long-tail knowledge and the benefits of deduplicating the pretraining corpus.", "keywords": "Large Language Models;Factual knowledge acquisition;Pre-training dynamics", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/a6a1d4f831fc59b3ae3a81ea381d4eee99971dbe.zip", "author": "Hoyeon Chang;Jinho Park;Seonghyeon Ye;Sohee Yang;Youngkyung Seo;Du-Seong Chang;Minjoon Seo", "authorids": "~Hoyeon_Chang1;~Jinho_Park5;~Seonghyeon_Ye1;~Sohee_Yang1;~Youngkyung_Seo1;~Du-Seong_Chang1;~Minjoon_Seo1", "gender": "M;M;M;F;F;M;M", "homepage": "https://duemoo.github.io/about/;https://jinhopark-ai.github.io/;https://vano1205.github.io/;https://soheeyang.github.io;https://github.com/youngkyung;https://duseongchang.github.io/;https://seominjoon.github.io", "dblp": ";;301/8927;236/5847;https://dblp.org/rec/journals/itiis/SeoHJJ19;92/3996;149/1367", "google_scholar": "https://scholar.google.com/citations?hl=ko;itUghOQAAAAJ;https://scholar.google.co.kr/citations?user=JfGGjBoAAAAJ;jh547hEAAAAJ;https://scholar.google.com/citations?hl=ko;https://scholar.google.co.kr/citations?user=y1HTwWAAAAAJ;zYze5fIAAAAJ", "orcid": ";;;;0000-0003-0206-5150;;", "linkedin": ";%EC%A7%84%ED%98%B8-%EB%B0%95-48baba237/;;;young-\u200dseo-745029245;;minjoon-seo/", "or_profile": "~Hoyeon_Chang1;~Jinho_Park5;~Seonghyeon_Ye1;~Sohee_Yang1;~Youngkyung_Seo1;~Du-Seong_Chang1;~Minjoon_Seo1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Google;Korea Telecom Research;KT;Twelve Labs", "aff_domain": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;google.com;kt.com;kt.com;twelvelabs.io", "position": "MS student;Undergrad student;PhD student;Intern;Researcher;Senior Vice President;Chief Scientist", "bibtex": "@inproceedings{\nchang2024how,\ntitle={How Do Large Language Models Acquire Factual Knowledge During Pretraining?},\nauthor={Hoyeon Chang and Jinho Park and Seonghyeon Ye and Sohee Yang and Youngkyung Seo and Du-Seong Chang and Minjoon Seo},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=TYdzj1EvBP}\n}", "github": "", "reviewers": "fwEj;1qMb;yByP;ewnz;ZYtT", "pdf_size": 4552438, "rating": "4;5;7;8;10", "confidence": "4;3;3;4;3", "soundness": "2;3;3;4;4", "novelty": "2;2;3;4;4", "presentation": "3;2;3;4;4", "wc_summary": "72;128;112;117;48", "wc_strengths": "105;83;175;43;23", "wc_weaknesses": "596;294;54;413;57", "wc_questions": "6;40;118;17;12", "wc_limitations": "6;6;9;7;1", "wc_review": "785;551;468;597;141", "wc_reply_reviewers": "109;13;82;15;297", "wc_reply_authors": "346;0;436;0;767", "reply_reviewers": "1;1;2;1;3", "reply_authors": "2;1;2;1;3", "rating_avg": [ 6.8, 2.1354156504062622 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 3.0, 0.8944271909999159 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 95.4, 30.328864139627782 ], "wc_strengths_avg": [ 85.8, 53.120240963308895 ], "wc_weaknesses_avg": [ 282.8, 209.04870245949866 ], "wc_questions_avg": [ 38.6, 41.33569885704124 ], "wc_limitations_avg": [ 5.8, 2.6381811916545836 ], "wc_review_avg": [ 508.4, 211.08633304882628 ], "wc_reply_reviewers_avg": [ 103.2, 103.8814709175799 ], "wc_reply_authors_avg": [ 309.8, 289.2130010908915 ], "reply_reviewers_avg": [ 1.6, 0.8 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.30588764516074907, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14982916411782780415&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;google.com;kt.com;kt.com;twelvelabs.io", "author_num": 7, "aff_unique_index": "0;0;0;1;2;2;3", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Google;Korea Telecom;Twelve Labs", "aff_unique_dep": ";Google;Research;", "aff_unique_url": "https://www.kaist.ac.kr;https://www.google.com;https://www.kt.com;https://twelvelabs.com", "aff_unique_abbr": "KAIST;Google;KT;", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;1;0;0;1", "aff_country_unique": "South Korea;United States" }, { "title": "RanDumb: Random Representations Outperform Online Continually Learned Representations", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95027", "id": "TZ5k9IYBBf", "proceeding": "", "pdf": "https://openreview.net/pdf?id=TZ5k9IYBBf", "openreview": "https://openreview.net/forum?id=TZ5k9IYBBf", "poster": "/media/PosterPDFs/NeurIPS%202024/95027.png?t=1731755359.9316702", "project": "", "author_site": "Ameya Prabhu, Shiven Sinha, Ponnurangam Kumaraguru, Philip Torr, Ozan Sener, Puneet Dokania", "tldr": "", "abstract": "Continual learning has primarily focused on the issue of catastrophic forgetting and the associated stability-plasticity tradeoffs. However, little attention has been paid to the efficacy of continually learned representations, as representations are learned alongside classifiers throughout the learning process. Our primary contribution is empirically demonstrating that existing online continually trained deep networks produce inferior representations compared to a simple pre-defined random transforms. Our approach embeds raw pixels using a fixed random transform, approximating an RBF-Kernel initialized before any data is seen. We then train a simple linear classifier on top without storing any exemplars, processing one sample at a time in an online continual learning setting. This method, called RanDumb, significantly outperforms state-of-the-art continually learned representations across all standard online continual learning benchmarks. Our study reveals the significant limitations of representation learning, particularly in low-exemplar and online continual learning scenarios. Extending our investigation to popular exemplar-free scenarios with pretrained models, we find that training only a linear classifier on top of pretrained representations surpasses most continual fine-tuning and prompt-tuning strategies. Overall, our investigation challenges the prevailing assumptions about effective representation learning in the online continual learning.", "keywords": "online continual learning;exemplar-free;baseline;analysis", "primary_area": "evaluation", "supplementary_material": "/attachment/90924752699f7518e8105d6236b2a7aae44e5bea.zip", "author": "Ameya Prabhu;Shiven Sinha;Ponnurangam Kumaraguru;Philip Torr;Ozan Sener;Puneet K. Dokania", "authorids": "~Ameya_Prabhu1;~Shiven_Sinha1;~Ponnurangam_Kumaraguru3;~Philip_Torr1;~Ozan_Sener1;~Puneet_K._Dokania1", "gender": "M;M;;;M;M", "homepage": "https://drimpossible.github.io/;;https://precog.iiit.ac.in/;http://www.robots.ox.ac.uk/~tvg/;http://ozansener.net;http://puneetkdokania.github.io/", "dblp": "181/4512;368/3856;97/5147.html;;125/1989;150/4211", "google_scholar": "0kK7sSAAAAAJ;WO51AfgAAAAJ;MfzQyP8AAAAJ;;BI8xFr4AAAAJ;https://scholar.google.fr/citations?user=WsM7ybkAAAAJ", "orcid": ";0009-0000-5259-2683;;;;", "linkedin": ";shiven-sinha/;ponguru/;;;", "or_profile": "~Ameya_Prabhu1;~Shiven_Sinha1;~Ponnurangam_Kumaraguru3;~Philip_Torr1;~Ozan_Sener1;~Puneet_Dokania1", "aff": "University of Oxford;International Institute of Information Technology, Hyderabad;International Institute of Information Technology Hyderabad ;University of Oxford;Apple;University of Oxford", "aff_domain": "ox.ac.uk;iiit.ac.in;iiit.ac.in;ox.ac.uk;apple.com;oxford.ac.uk", "position": "PhD student;Undergrad student;Full Professor;Full Professor;Researcher;Senior Researcher", "bibtex": "@inproceedings{\nprabhu2024random,\ntitle={Random Representations Outperform Online Continually Learned Representations},\nauthor={Ameya Prabhu and Shiven Sinha and Ponnurangam Kumaraguru and Philip Torr and Ozan Sener and Puneet K. Dokania},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=TZ5k9IYBBf}\n}", "github": "", "reviewers": "5ZCt;8BCQ;sAWs;k3sw", "pdf_size": 6341916, "rating": "5;6;7;7", "confidence": "3;5;4;3", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;4;3", "wc_summary": "36;76;100;103", "wc_strengths": "112;80;68;132", "wc_weaknesses": "468;38;124;15", "wc_questions": "24;5;142;136", "wc_limitations": "2;23;10;36", "wc_review": "642;222;444;422", "wc_reply_reviewers": "116;25;90;75", "wc_reply_authors": "59;19;208;117", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 78.75, 26.808347580557815 ], "wc_strengths_avg": [ 98.0, 25.37715508089904 ], "wc_weaknesses_avg": [ 161.25, 181.70219453820584 ], "wc_questions_avg": [ 76.75, 62.647326359550256 ], "wc_limitations_avg": [ 17.75, 12.93010054098575 ], "wc_review_avg": [ 432.5, 148.69683924011298 ], "wc_reply_reviewers_avg": [ 76.5, 33.15493930020081 ], "wc_reply_authors_avg": [ 100.75, 71.05059816778463 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0909090909090909, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:TYv75qVKqgwJ:scholar.google.com/&scioq=RanDumb:+Random+Representations+Outperform+Online+Continually+Learned+Representations&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "ox.ac.uk;iiit.ac.in;iiit.ac.in;ox.ac.uk;apple.com;oxford.ac.uk", "author_num": 6, "aff_unique_index": "0;1;1;0;2;0", "aff_unique_norm": "University of Oxford;International Institute of Information Technology;Apple", "aff_unique_dep": ";;Apple Inc.", "aff_unique_url": "https://www.ox.ac.uk;https://iiit Hyderabad.ac.in;https://www.apple.com", "aff_unique_abbr": "Oxford;IIIT Hyderabad;Apple", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Hyderabad", "aff_country_unique_index": "0;1;1;0;2;0", "aff_country_unique": "United Kingdom;India;United States" }, { "title": "Beyond Euclidean: Dual-Space Representation Learning for Weakly Supervised Video Violence Detection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95026", "id": "TbPv0qFnHO", "proceeding": "", "pdf": "https://openreview.net/pdf?id=TbPv0qFnHO", "openreview": "https://openreview.net/forum?id=TbPv0qFnHO", "poster": "/media/PosterPDFs/NeurIPS%202024/95026.png?t=1730862866.9167967", "project": "", "author_site": "Jiaxu Leng, Zhanjie Wu, Mingpi Tan, Yiran Liu, Ji Gan, Haosheng Chen, Xinbo Gao", "tldr": "", "abstract": "While numerous Video Violence Detection (VVD) methods have focused on representation learning in Euclidean space, they struggle to learn sufficiently discriminative features, leading to weaknesses in recognizing normal events that are visually similar to violent events (i.e., ambiguous violence). In contrast, hyperbolic representation learning, renowned for its ability to model hierarchical and complex relationships between events, has the potential to amplify the discrimination between visually similar events. Inspired by these, we develop a novel Dual-Space Representation Learning (DSRL) method for weakly supervised VVD to utilize the strength of both Euclidean and hyperbolic geometries, capturing the visual features of events while also exploring the intrinsic relations between events, thereby enhancing the discriminative capacity of the features. DSRL employs a novel information aggregation strategy to progressively learn event context in hyperbolic spaces, which selects aggregation nodes through layer-sensitive hyperbolic association degrees constrained by hyperbolic Dirichlet energy. Furthermore, DSRL attempts to break the cyber-balkanization of different spaces, utilizing cross-space attention to facilitate information interactions between Euclidean and hyperbolic space to capture better discriminative features for final violence detection. Comprehensive experiments demonstrate the effectiveness of our proposed DSRL.", "keywords": "weakly supervised video violence detection;ambiguous violence;hyperbolic graph convolutional network;dual-space interaction", "primary_area": "machine_vision", "supplementary_material": "", "author": "Jiaxu Leng;Zhanjie Wu;Mingpi Tan;Yiran Liu;Ji Gan;Haosheng Chen;Xinbo Gao", "authorids": "~Jiaxu_Leng2;~Zhanjie_Wu1;~Mingpi_Tan1;~Yiran_Liu8;~Ji_Gan1;~Haosheng_Chen1;~Xinbo_Gao5", "gender": "M;M;;F;M;M;M", "homepage": ";;https://github.com/TanMing11;;;https://www.hschen.cn;https://faculty.cqupt.edu.cn/gaoxinbo/zh_CN/index.htm", "dblp": "219/1664;;331/1413;;227/7216;251/3427-1;", "google_scholar": "KpX-CCcAAAAJ;;;;;cy6nGw4AAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";;0000-0003-1952-6267;0009-0008-9926-7325;0000-0001-6041-588X;0000-0001-6834-2136;0000-0002-7985-0037", "linkedin": ";%E5%8D%A0%E6%9D%B0-%E5%90%B4-9b1096309/;;;;;xinbo-gao-151a2224/", "or_profile": "~Jiaxu_Leng2;~Zhanjie_Wu1;~Mingpi_Tan1;~Yiran_Liu8;~Ji_Gan1;~Haosheng_Chen1;~Xinbo_Gao5", "aff": "Chongqing University of Post and Telecommunications;Chongqing University of Post and Telecommunications;Chongqing University of Post and Telecommunications;Chongqing University of Post and Telecommunications;Chongqing University of Post and Telecommunications;Chongqing University of Post and Telecommunications;Chongqing University of Post and Telecommunications", "aff_domain": "cqupt.edu.cn;cqupt.edu.cn;cqupt.edu.cn;cqupt.edu.cn;cqupt.edu.cn;cqupt.edu.cn;cqupt.edu.cn", "position": "Associate Professor;MS student;PhD student;Undergrad student;Associate Professor;Postdoc;Full Professor", "bibtex": "@inproceedings{\nleng2024beyond,\ntitle={Beyond Euclidean: Dual-Space Representation Learning for Weakly Supervised Video Violence Detection},\nauthor={Jiaxu Leng and Zhanjie Wu and Mingpi Tan and Yiran Liu and Ji Gan and Haosheng Chen and Xinbo Gao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=TbPv0qFnHO}\n}", "github": "", "reviewers": "8atP;8SDB;rxrS;5VkS", "pdf_size": 0, "rating": "5;6;6;7", "confidence": "4;4;4;5", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "106;59;82;114", "wc_strengths": "133;56;127;83", "wc_weaknesses": "143;59;90;96", "wc_questions": "137;24;32;3", "wc_limitations": "5;5;5;1", "wc_review": "524;203;336;297", "wc_reply_reviewers": "34;42;20;14", "wc_reply_authors": "0;166;0;0", "reply_reviewers": "1;2;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 90.25, 21.545011023436494 ], "wc_strengths_avg": [ 99.75, 31.7913117061879 ], "wc_weaknesses_avg": [ 97.0, 30.041637771599603 ], "wc_questions_avg": [ 49.0, 51.898940258930146 ], "wc_limitations_avg": [ 4.0, 1.7320508075688772 ], "wc_review_avg": [ 340.0, 116.71546598459007 ], "wc_reply_reviewers_avg": [ 27.5, 11.07925990308017 ], "wc_reply_authors_avg": [ 41.5, 71.88010851410841 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3123944523896885211&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "cqupt.edu.cn;cqupt.edu.cn;cqupt.edu.cn;cqupt.edu.cn;cqupt.edu.cn;cqupt.edu.cn;cqupt.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "Chongqing University of Post and Telecommunications", "aff_unique_dep": "", "aff_unique_url": "http://www.cqupt.edu.cn", "aff_unique_abbr": "CQUPT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Job-SDF: A Multi-Granularity Dataset for Job Skill Demand Forecasting and Benchmarking", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97678", "id": "TbslDzPxhF", "proceeding": "", "pdf": "https://openreview.net/pdf?id=TbslDzPxhF", "openreview": "https://openreview.net/forum?id=TbslDzPxhF", "poster": "", "project": "", "author_site": "Xi Chen, Chuan Qin, Chuyu Fang, Chao Wang, chen zhu, Fuzhen Zhuang, Hengshu Zhu, Hui Xiong", "tldr": "", "abstract": "In a rapidly evolving job market, skill demand forecasting is crucial as it enables policymakers and businesses to anticipate and adapt to changes, ensuring that workforce skills align with market needs, thereby enhancing productivity and competitiveness. Additionally, by identifying emerging skill requirements, it directs individuals towards relevant training and education opportunities, promoting continuous self-learning and development. However, the absence of comprehensive datasets presents a significant challenge, impeding research and the advancement of this field. To bridge this gap, we present Job-SDF, a dataset designed to train and benchmark job-skill demand forecasting models. Based on millions of public job advertisements collected from online recruitment platforms, this dataset encompasses monthly recruitment demand.\nOur dataset uniquely enables evaluating skill demand forecasting models at various granularities, including occupation, company, and regional levels. \nWe benchmark a range of models on this dataset, evaluating their performance in standard scenarios, in predictions focused on lower value ranges, and in the presence of structural breaks, providing new insights for further research. Our code and dataset are publicly accessible via the https://github.com/Job-SDF/benchmark.", "keywords": "job skill demand forecasting; labor market analysis; time series", "primary_area": "", "supplementary_material": "", "author": "Xi Chen;Chuan Qin;Chuyu Fang;Chao Wang;Chen Zhu;Fuzhen Zhuang;Hengshu Zhu;Hui Xiong", "authorids": "~Xi_Chen40;~Chuan_Qin1;~Chuyu_Fang1;~Chao_Wang14;~Chen_Zhu5;~Fuzhen_Zhuang1;~Hengshu_Zhu1;~Hui_Xiong1", "gender": "M;M;M;M;;M;;M", "homepage": "https://github.com/cx9941;https://dylan-qin.github.io;https://github.com/fangcy99;https://chaowang-ustc.github.io/;;https://fuzhenzhuang.github.io/index.html;http://www.zhuhengshu.com/;https://www.hkust-gz.edu.cn/people/hui-xiong/", "dblp": "16/3283-73;24/2771-2;327/9420;188/7759-86;;48/5638;61/10440;262/1686-1.html", "google_scholar": "ktpjw_EAAAAJ;0KTz65wAAAAJ;lfJw_MUAAAAJ;j08V64UAAAAJ;;https://scholar.google.com/citations?hl=en;55MQBzYAAAAJ;cVDF1tkAAAAJ", "orcid": "0009-0009-6180-4524;0000-0002-5354-8630;0009-0007-8353-7979;0000-0001-7717-447X;;0000-0001-9170-7009;0000-0003-4570-643X;0000-0001-6016-6465", "linkedin": ";;;;;;;", "or_profile": "~Xi_Chen40;~Chuan_Qin1;~Chuyu_Fang1;~Chao_Wang14;~Chen_Zhu5;~Fuzhen_Zhuang1;~Hengshu_Zhu1;~Hui_Xiong1", "aff": "University of Science and Technology of China;BOSS Zhipin;Beihang University;HKUST Fok Ying Tung Research Institute, The Hong Kong University of Science and Technology\u00a0(Guangzhou);;Institute of Computing Technology, Chinese Academy of Sciences;Kanzhun Limited (BOSS Zhipin);Hong Kong University of Science and Technology (Guangzhou)", "aff_domain": "ustc.edu.cn;kanzhun.com;buaa.edu.cn;ust.hk;;ict.ac.cn;kanzhun.com;hkust.edu", "position": "PhD student;Senior Researcher;MS student;Postdoc;;Associate Professor;Chief Research Scientist;Full Professor", "bibtex": "@inproceedings{\nchen2024jobsdf,\ntitle={Job-{SDF}: A Multi-Granularity Dataset for Job Skill Demand Forecasting and Benchmarking},\nauthor={Xi Chen and Chuan Qin and Chuyu Fang and Chao Wang and Chen Zhu and Fuzhen Zhuang and Hengshu Zhu and Hui Xiong},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=TbslDzPxhF}\n}", "github": "", "reviewers": "Nudj;UP9L;MZgr;xQBn", "pdf_size": 436576, "rating": "7;7;7;8", "confidence": "3;4;3;5", "wc_summary_and_contributions": "52;123;97;68", "wc_strengths": "35;60;184;50", "wc_improvement": "39;262;73;58", "wc_limitations": "28;96;38;54", "wc_correctness": "1;33;43;1", "wc_clarity": "1;11;36;1", "wc_relation_to_prior_work": "1;11;46;64", "wc_documentation": "1;18;131;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "159;615;649;298", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "wc_summary_and_contributions_avg": [ 85.0, 27.230497608380205 ], "wc_strengths_avg": [ 82.25, 59.415381005258226 ], "wc_improvement_avg": [ 108.0, 89.72457857242908 ], "wc_limitations_avg": [ 54.0, 25.96150997149434 ], "wc_correctness_avg": [ 19.5, 18.83480820183736 ], "wc_clarity_avg": [ 12.25, 14.306903927824496 ], "wc_relation_to_prior_work_avg": [ 30.5, 25.558755838264116 ], "wc_documentation_avg": [ 37.75, 54.2833998566781 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 430.25, 207.99684492799403 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6651681226553810181&as_sdt=805&sciodt=0,3&hl=en", "gs_version_total": 4, "email": "ustc.edu.cn;kanzhun.com;buaa.edu.cn;ust.hk;;ict.ac.cn;kanzhun.com;hkust.edu", "author_num": 8, "aff_unique_index": "0;1;2;3;4;5;3", "aff_unique_norm": "University of Science and Technology of China;BOSS Zhipin;Beihang University;Hong Kong University of Science and Technology;Chinese Academy of Sciences;Kanzhun Limited", "aff_unique_dep": ";;;Fok Ying Tung Research Institute;Institute of Computing Technology;", "aff_unique_url": "http://www.ustc.edu.cn;https://www.zhipin.com;http://www.buaa.edu.cn/;https://www.ust.hk;http://www.ict.ac.cn;https://www.zhipin.com", "aff_unique_abbr": "USTC;BOSS\u76f4\u8058;BUAA;HKUST;CAS;BOSS Zhipin", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Guangzhou;Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "FLoRA: Federated Fine-Tuning Large Language Models with Heterogeneous Low-Rank Adaptations", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95025", "id": "TcCorXxNJQ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=TcCorXxNJQ", "openreview": "https://openreview.net/forum?id=TcCorXxNJQ", "poster": "", "project": "", "author_site": "Ziyao Wang, Zheyu Shen, Yexiao He, Guoheng Sun, Hongyi Wang, Lingjuan Lyu, Ang Li", "tldr": "", "abstract": "The rapid development of Large Language Models (LLMs) has been pivotal in advancing AI, with pre-trained LLMs being adaptable to diverse downstream tasks through fine-tuning. Federated learning (FL) further enhances fine-tuning in a privacy-aware manner by utilizing clients' local data through in-situ computation, eliminating the need for data movement. However, fine-tuning LLMs, given their massive scale of parameters, poses challenges for clients with constrained and heterogeneous resources in FL. Previous methods employed low-rank adaptation (LoRA) for efficient federated fine-tuning but utilized traditional FL aggregation strategies on LoRA adapters. This approach led to mathematically inaccurate aggregation noise, reducing fine-tuning effectiveness and failing to address heterogeneous LoRAs. In this work, we first highlight the mathematical incorrectness of LoRA aggregation in existing federated fine-tuning methods. We introduce a new approach called FLoRA that enables federated fine-tuning on heterogeneous LoRA adapters across clients through a novel stacking-based aggregation method. Our approach is noise-free and seamlessly supports heterogeneous LoRAs. Extensive experiments demonstrate FLoRA's superior performance in both homogeneous and heterogeneous settings, surpassing state-of-the-art methods. We envision this work as a milestone for efficient, privacy-preserving, and accurate federated fine-tuning of LLMs.", "keywords": "Federated Learning;Large Language Models;LoRA", "primary_area": "infrastructure", "supplementary_material": "", "author": "Ziyao Wang;Zheyu Shen;Yexiao He;Guoheng Sun;Hongyi Wang;Lingjuan Lyu;Ang Li", "authorids": "~Ziyao_Wang2;~Zheyu_Shen2;~Yexiao_He1;~Guoheng_Sun1;~Hongyi_Wang1;~Lingjuan_Lyu1;~Ang_Li6", "gender": "M;M;M;M;M;F;M", "homepage": "https://ziyaow-about.netlify.app;https://shenzheyu.github.io/;;;https://hwang595.github.io/;https://sites.google.com/view/lingjuan-lyu;https://www.ang-li.com", "dblp": ";377/3867;;;15/832-1.html;178/9876;33/2805-5", "google_scholar": "_PdzpfAAAAAJ;AHw5vnwAAAAJ;https://scholar.google.com/citations?hl=en;fMnmSXsAAAAJ;zYdZORsAAAAJ;;JVKSaWIAAAAJ", "orcid": ";;;;;;", "linkedin": "https://www.linkedin.cn/injobs/in/ziyao-wang-370229234;zheyushen/;;guoheng-sun-1b640126a/;hongyi-wang-b89651102/;;", "or_profile": "~Ziyao_Wang2;~Zheyu_Shen2;~Yexiao_He1;~Guoheng_Sun1;~Hongyi_Wang1;~Lingjuan_Lyu1;~Ang_Li6", "aff": "University of Maryland, College Park;University of Maryland, College Park;University of Maryland, College Park;Sichuan University;Carnegie Mellon University;Sony;Duke University", "aff_domain": "umd.edu;umd.edu;umd.edu;scu.edu.cn;andrew.cmu.edu;sony.com;duke.edu", "position": "PhD student;PhD student;PhD student;Undergrad student;Researcher;scientist;PhD student", "bibtex": "@inproceedings{\nwang2024flora,\ntitle={{FL}o{RA}: Federated Fine-Tuning Large Language Models with Heterogeneous Low-Rank Adaptations},\nauthor={Ziyao Wang and Zheyu Shen and Yexiao He and Guoheng Sun and Hongyi Wang and Lingjuan Lyu and Ang Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=TcCorXxNJQ}\n}", "github": "", "reviewers": "4FRM;G8j8;7eRV", "pdf_size": 2484328, "rating": "5;7;7", "confidence": "4;4;3", "soundness": "2;3;3", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "48;62;75", "wc_strengths": "12;58;44", "wc_weaknesses": "56;118;85", "wc_questions": "1;23;4", "wc_limitations": "1;4;4", "wc_review": "118;265;212", "wc_reply_reviewers": "274;138;34", "wc_reply_authors": "1556;0;0", "reply_reviewers": "4;1;1", "reply_authors": "7;1;1", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 61.666666666666664, 11.025223605694151 ], "wc_strengths_avg": [ 38.0, 19.252705437591537 ], "wc_weaknesses_avg": [ 86.33333333333333, 25.32894698868383 ], "wc_questions_avg": [ 9.333333333333334, 9.741092797468305 ], "wc_limitations_avg": [ 3.0, 1.4142135623730951 ], "wc_review_avg": [ 198.33333333333334, 60.78559770939897 ], "wc_reply_reviewers_avg": [ 148.66666666666666, 98.26947078767083 ], "wc_reply_authors_avg": [ 518.6666666666666, 733.5054343508453 ], "reply_reviewers_avg": [ 2.0, 1.4142135623730951 ], "reply_authors_avg": [ 3.0, 2.8284271247461903 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.49999999999999983, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6739782570795037239&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "umd.edu;umd.edu;umd.edu;scu.edu.cn;andrew.cmu.edu;sony.com;duke.edu", "author_num": 7, "aff_unique_index": "0;0;0;1;2;3;4", "aff_unique_norm": "University of Maryland;Sichuan University;Carnegie Mellon University;Sony Corporation;Duke University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www/umd.edu;https://www.scu.edu.cn;https://www.cmu.edu;https://www.sony.com;https://www.duke.edu", "aff_unique_abbr": "UMD;SCU;CMU;Sony;Duke", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "College Park;", "aff_country_unique_index": "0;0;0;1;0;2;0", "aff_country_unique": "United States;China;Japan" }, { "title": "Unveiling The Matthew Effect Across Channels: Assessing Layer Width Sufficiency via Weight Norm Variance", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95024", "id": "Tcft2V63Vd", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Tcft2V63Vd", "openreview": "https://openreview.net/forum?id=Tcft2V63Vd", "poster": "/media/PosterPDFs/NeurIPS%202024/95024.png?t=1731667429.2790277", "project": "", "author_site": "Yiting Chen, Jiazi Bu, Junchi Yan", "tldr": "", "abstract": "The trade-off between cost and performance has been a longstanding and critical issue for deep neural networks. \nOne key factor affecting the computational cost is the width of each layer. \nHowever, in practice, the width of layers in a neural network is mostly empirically determined. In this paper, we show that a pattern regarding the variance of weight norm corresponding to different channels can indicate whether the layer is sufficiently wide and may help us better allocate computational resources across the layers.\nStarting from a simple intuition that channels with larger weights would have larger gradients and the difference in weight norm enlarges between channels with similar weight, we empirically validate that wide and narrow layers show two different patterns with experiments across different data modalities and network architectures. \nBased on the two different patterns, we identify three stages during training and explain each stage with corresponding evidence. We further propose to adjust the width based on the identified pattern and show that conventional layer width settings for CNNs could be adjusted to reduce the number of parameters while boosting the performance.", "keywords": "Neural Network Layer width.+Neural Network Training Dynamics", "primary_area": "other", "supplementary_material": "", "author": "Yiting Chen;Jiazi Bu;Junchi Yan", "authorids": "~Yiting_Chen1;~Jiazi_Bu1;~Junchi_Yan2", "gender": "M;M;M", "homepage": "https://ytchen981.github.io/;https://bujiazi.github.io/;http://thinklab.sjtu.edu.cn/", "dblp": "135/6971;;60/7949.html", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=zh-CN;ga230VoAAAAJ", "orcid": ";;0000-0001-9639-7679", "linkedin": ";;", "or_profile": "~Yiting_Chen1;~Jiazi_Bu1;~Junchi_Yan1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "position": "PhD student;Undergrad student;Full Professor", "bibtex": "@inproceedings{\nchen2024unveiling,\ntitle={Unveiling The Matthew Effect Across Channels: Assessing Layer Width Sufficiency via Weight Norm Variance},\nauthor={Yiting Chen and Jiazi Bu and Junchi Yan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Tcft2V63Vd}\n}", "github": "", "reviewers": "iwdV;nV2M;Q6z7;nGxF", "pdf_size": 4020052, "rating": "4;6;6;8", "confidence": "5;4;4;5", "soundness": "2;3;3;4", "novelty": "2;3;3;4", "presentation": "3;4;3;3", "wc_summary": "63;54;57;57", "wc_strengths": "74;66;52;45", "wc_weaknesses": "262;112;66;19", "wc_questions": "4;2;249;10", "wc_limitations": "3;9;3;1", "wc_review": "406;243;427;132", "wc_reply_reviewers": "53;0;0;0", "wc_reply_authors": "413;0;0;0", "reply_reviewers": "1;0;0;0", "reply_authors": "3;1;1;1", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 57.75, 3.2691742076555053 ], "wc_strengths_avg": [ 59.25, 11.388041973930374 ], "wc_weaknesses_avg": [ 114.75, 91.15200217219586 ], "wc_questions_avg": [ 66.25, 105.55182376444284 ], "wc_limitations_avg": [ 4.0, 3.0 ], "wc_review_avg": [ 302.0, 121.26623602635648 ], "wc_reply_reviewers_avg": [ 13.25, 22.949673200287624 ], "wc_reply_authors_avg": [ 103.25, 178.83424588148657 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16045341462192411684&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "MicroAdam: Accurate Adaptive Optimization with Low Space Overhead and Provable Convergence", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95023", "id": "Tck41RANGK", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Tck41RANGK", "openreview": "https://openreview.net/forum?id=Tck41RANGK", "poster": "/media/PosterPDFs/NeurIPS%202024/95023.png?t=1731616200.4165943", "project": "", "author_site": "Ionut-Vlad Modoranu, Mher Safaryan, Grigory Malinovsky, Eldar Kurti\u0107, Thomas Robert, Peter Richtarik, Dan Alistarh", "tldr": "", "abstract": "We propose a new variant of the Adam optimizer called MicroAdam that specifically minimizes memory overheads, while maintaining theoretical convergence guarantees. We achieve this by compressing the gradient information before it is fed into the optimizer state, \nthereby reducing its memory footprint significantly. We control the resulting compression error via a novel instance of the classical *error feedback* mechanism from distributed optimization in which *the error correction information is itself compressed* to allow for practical memory gains. We prove that the resulting approach maintains theoretical convergence guarantees competitive to those of AMSGrad, while providing good practical performance. Specifically, we show that MicroAdam can be implemented efficiently on GPUs: on both million-scale (BERT) and billion-scale (LLaMA) models, MicroAdam provides practical convergence competitive to that of the uncompressed Adam baseline, with lower memory usage and similar running time. Our code is available at https://github.com/IST-DASLab/MicroAdam.", "keywords": "adaptive optimization;adam;efficiency;memory efficiency", "primary_area": "optimization_for_deep_networks", "supplementary_material": "/attachment/e37263f978215f4b3a07433922905227f14fb4a6.zip", "author": "Ionut-Vlad Modoranu;Mher Safaryan;Grigory Malinovsky;Eldar Kurtic;Thomas Robert;Peter Richt\u00e1rik;Dan Alistarh", "authorids": "~Ionut-Vlad_Modoranu1;~Mher_Safaryan1;~Grigory_Malinovsky1;~Eldar_Kurtic1;~Thomas_Robert2;~Peter_Richt\u00e1rik1;~Dan_Alistarh7", "gender": "M;M;M;M;M;M;M", "homepage": ";https://mher-safaryan.github.io;https://grigory-malinovsky.github.io;;;http://people.csail.mit.edu/alistarh/;https://richtarik.org", "dblp": "275/9983;259/1444;262/3277.html;297/3713;;36/3251.html;62/8001", "google_scholar": "N56bz4gAAAAJ;dJNwgT8AAAAJ;4w2W9KQAAAAJ;https://scholar.google.com/citations?hl=en;czlNudEAAAAJ;https://scholar.google.com.tw/citations?user=75q-6ZQAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0001-6290-1398;;;;;0000-0003-4380-5848", "linkedin": "ionut-vlad-modoranu/;mher-safaryan-94565a257/;;eldar-kurti%C4%87-77963b160/;thomas-robert-x21/;;richtarik/", "or_profile": "~Ionut-Vlad_Modoranu1;~Mher_Safaryan1;~Grigory_Malinovsky1;~Eldar_Kurtic1;~Thomas_Robert2;~Dan_Alistarh1;~Peter_Richtarik1", "aff": "Institute of Science and Technology Austria;Institute of Science and Technology;Samsung;Institute of Science and Technology Austria;Institute of Science and Technology;Institute of Science and Technology;King Abdullah University of Science and Technology (KAUST)", "aff_domain": "ist.ac.at;ist.ac.at;samsung.com;ist.ac.at;ist.ac.at;ist.ac.at;kaust.edu.sa", "position": "PhD student;Postdoc;Intern;Researcher;Intern;Full Professor;Full Professor", "bibtex": "@inproceedings{\nmodoranu2024microadam,\ntitle={MicroAdam: Accurate Adaptive Optimization with Low Space Overhead and Provable Convergence},\nauthor={Ionut-Vlad Modoranu and Mher Safaryan and Grigory Malinovsky and Eldar Kurtic and Thomas Robert and Peter Richt{\\'a}rik and Dan Alistarh},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Tck41RANGK}\n}", "github": "", "reviewers": "2Efz;nVyV;u3Hk;C8Q3;8ZAo", "pdf_size": 3108479, "rating": "3;4;6;6;6", "confidence": "4;4;3;2;3", "soundness": "2;3;3;3;3", "novelty": "1;2;2;2;3", "presentation": "1;3;3;3;2", "wc_summary": "10;39;35;77;179", "wc_strengths": "16;38;45;35;69", "wc_weaknesses": "2;405;41;73;51", "wc_questions": "2;2;41;33;53", "wc_limitations": "323;2;5;3;1", "wc_review": "353;486;167;221;353", "wc_reply_reviewers": "80;0;22;57;0", "wc_reply_authors": "357;0;0;0;0", "reply_reviewers": "1;0;1;1;0", "reply_authors": "2;1;1;1;1", "rating_avg": [ 5.0, 1.2649110640673518 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.0, 0.6324555320336759 ], "presentation_avg": [ 2.4, 0.8 ], "wc_summary_avg": [ 68.0, 59.49117581625026 ], "wc_strengths_avg": [ 40.6, 17.141761869772896 ], "wc_weaknesses_avg": [ 114.4, 147.1075796823536 ], "wc_questions_avg": [ 26.2, 20.75957610357206 ], "wc_limitations_avg": [ 66.8, 128.10683041899054 ], "wc_review_avg": [ 316.0, 112.12849771579035 ], "wc_reply_reviewers_avg": [ 31.8, 31.864714026647093 ], "wc_reply_authors_avg": [ 71.4, 142.80000000000004 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.8451542547285165, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8025113889216186829&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "ist.ac.at;ist.ac.at;samsung.com;ist.ac.at;ist.ac.at;ist.ac.at;kaust.edu.sa", "author_num": 7, "aff_unique_index": "0;1;2;0;1;1;3", "aff_unique_norm": "Institute of Science and Technology Austria;Institute of Science and Technology;Samsung;King Abdullah University of Science and Technology", "aff_unique_dep": ";;Samsung;", "aff_unique_url": "https://www.ist.ac.at;;https://www.samsung.com;https://www.kaust.edu.sa", "aff_unique_abbr": "IST Austria;;Samsung;KAUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;2;0;3", "aff_country_unique": "Austria;;South Korea;Saudi Arabia" }, { "title": "Hyper-opinion Evidential Deep Learning for Out-of-Distribution Detection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95022", "id": "Te8vI2wGTh", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Te8vI2wGTh", "openreview": "https://openreview.net/forum?id=Te8vI2wGTh", "poster": "/media/PosterPDFs/NeurIPS%202024/95022.png?t=1729505912.819199", "project": "", "author_site": "Jingen Qu, Yufei Chen, Xiaodong Yue, Wei Fu, Qiguang Huang", "tldr": "", "abstract": "Evidential Deep Learning (EDL), grounded in Evidence Theory and Subjective Logic (SL), provides a robust framework to estimate uncertainty for out-of-distribution (OOD) detection alongside traditional classification probabilities.However, the EDL framework is constrained by its focus on evidence that supports only single categories, neglecting the other collective evidences that could corroborate multiple in-distribution categories. This limitation leads to a diminished estimation of uncertainty and a subsequent decline in OOD detection performance.Additionally, EDL encounters the vanishing gradient problem within its fully-connected layers, further degrading classification accuracy.To address these issues, we introduce hyper-domain and propose Hyper-opinion Evidential Deep Learning (HEDL). HEDL extends the evidence modeling paradigm by explicitly integrating sharp evidence, which supports a singular category, with vague evidence that accommodates multiple potential categories.Additionally, we propose a novel opinion projection mechanism that translates hyper-opinion into multinomial-opinion, which is then optimized within the EDL framework to ensure precise classification and refined uncertainty estimation.HEDL integrates evidences across various categories to yield a holistic evidentiary foundation for achieving superior OOD detection. Furthermore, our proposed opinion projection method effectively mitigates the vanishing gradient issue, ensuring classification accuracy without additional model complexity. Extensive experiments over many datasets demonstrate our proposed method outperforms existing OOD detection methods.", "keywords": "Out-of-distribution Detection;Evidential Deep Learning;Uncertainty Estimation.", "primary_area": "probabilistic_methods", "supplementary_material": "/attachment/7b73bd9bc780c768dec522393aa8c39e539b3c7a.zip", "author": "Jingen Qu;Yufei Chen;Xiaodong Yue;Wei Fu;Qiguang Huang", "authorids": "~Jingen_Qu1;~Yufei_Chen1;~Xiaodong_Yue2;~Wei_Fu2;~Qiguang_Huang1", "gender": "M;F;M;M;M", "homepage": "https://github.com/NewCityLetter;https://see.tongji.edu.cn/info/1385/10329.htm;https://xiaodongml.github.io/;https://github.com/KellaDoe;https://github.com/MissingJoe", "dblp": ";79/4489-2;;;", "google_scholar": ";https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.hk/citations?user=yZ212gsAAAAJ;", "orcid": ";0000-0002-3645-9046;;0009-0005-1934-1969;", "linkedin": ";;;;", "or_profile": "~Jingen_Qu1;~Yufei_Chen1;~Xiaodong_Yue2;~Wei_Fu2;~Qiguang_Huang1", "aff": "Tongji University;Tongji University;Shanghai University;Tongji University;Tongji University", "aff_domain": "tongji.edu.cn;tongji.edu.cn;shu.edu.cn;tongji.edu.cn;tongji.edu.cn", "position": "MS student;Associate Professor;Full Professor;MS student;MS student", "bibtex": "@inproceedings{\nqu2024hyperopinion,\ntitle={Hyper-opinion Evidential Deep Learning for Out-of-Distribution Detection},\nauthor={Jingen Qu and Yufei Chen and Xiaodong Yue and Wei Fu and Qiguang Huang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Te8vI2wGTh}\n}", "github": "", "reviewers": "Y3V6;jaEe;hxrE;cb7H", "pdf_size": 1473950, "rating": "6;6;6;7", "confidence": "4;4;3;4", "soundness": "4;3;3;4", "novelty": "3;3;3;4", "presentation": "3;3;2;3", "wc_summary": "95;78;79;59", "wc_strengths": "65;69;30;110", "wc_weaknesses": "135;151;134;77", "wc_questions": "64;106;6;25", "wc_limitations": "17;11;3;15", "wc_review": "376;415;252;286", "wc_reply_reviewers": "66;0;35;0", "wc_reply_authors": "33;0;18;0", "reply_reviewers": "1;0;1;0", "reply_authors": "2;1;2;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 77.75, 12.754901018824098 ], "wc_strengths_avg": [ 68.5, 28.359301824974466 ], "wc_weaknesses_avg": [ 124.25, 28.101378969723175 ], "wc_questions_avg": [ 50.25, 38.382124745771954 ], "wc_limitations_avg": [ 11.5, 5.361902647381804 ], "wc_review_avg": [ 332.25, 65.84214076106578 ], "wc_reply_reviewers_avg": [ 25.25, 27.52612395525385 ], "wc_reply_authors_avg": [ 12.75, 13.808964479641476 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:kruGBA04tyAJ:scholar.google.com/&scioq=Hyper-opinion+Evidential+Deep+Learning+for+Out-of-Distribution+Detection&hl=en&as_sdt=0,33", "gs_version_total": 2, "email": "tongji.edu.cn;tongji.edu.cn;shu.edu.cn;tongji.edu.cn;tongji.edu.cn", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Tongji University;Shanghai University", "aff_unique_dep": ";", "aff_unique_url": "https://www.tongji.edu.cn;https://www.shu.edu.cn", "aff_unique_abbr": "Tongji;SHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Fundamental Limits of Prompt Compression: A Rate-Distortion Framework for Black-Box Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95021", "id": "TeBKVfhP2M", "proceeding": "", "pdf": "https://openreview.net/pdf?id=TeBKVfhP2M", "openreview": "https://openreview.net/forum?id=TeBKVfhP2M", "poster": "/media/PosterPDFs/NeurIPS%202024/95021.png?t=1731713463.9845648", "project": "", "author_site": "Alliot Nagle, Adway Girish, Marco Bondaschi, Michael Gastpar, Ashok Vardhan Makkuva, Hyeji Kim", "tldr": "", "abstract": "We formalize the problem of prompt compression for large language models (LLMs) and present a framework to unify token-level prompt compression methods which create hard prompts for black-box models. We derive the distortion-rate function for this setup as a linear program, and provide an efficient algorithm to compute this fundamental limit via the dual of the linear program. Using the distortion-rate function as the baseline, we study the performance of existing compression schemes on a synthetic dataset consisting of prompts generated from a Markov chain, natural language queries, and their respective answers. Our empirical analysis demonstrates the criticality of query-aware prompt compression, where the compressor has knowledge of the downstream task/query for the black-box LLM. We show that there is a large gap between the performance of current prompt compression methods and the optimal strategy, and propose Adaptive QuerySelect, a query-aware, variable-rate adaptation of a prior work to close the gap. We extend our experiments to a small natural language dataset to further confirm our findings on our synthetic dataset.", "keywords": "information theory;prompt compression;LLMs;optimization", "primary_area": "other", "supplementary_material": "/attachment/c1ab8df563476101ca79d92266d35b9cda5d5506.zip", "author": "Alliot Nagle;Adway Girish;Marco Bondaschi;Michael Gastpar;Ashok Vardhan Makkuva;Hyeji Kim", "authorids": "~Alliot_Nagle1;~Adway_Girish1;~Marco_Bondaschi1;~Michael_Gastpar1;~Ashok_Vardhan_Makkuva1;~Hyeji_Kim1", "gender": "M;M;M;;;", "homepage": "https://acnagle.com/;https://sites.google.com/view/adwaygirish;;https://people.epfl.ch/michael.gastpar;;", "dblp": ";324/4046.html;255/4933;;;", "google_scholar": "OUilkgIAAAAJ;WOt1dxsAAAAJ;;https://scholar.google.ch/citations?user=IQ3hcw4AAAAJ;;", "orcid": ";;0000-0002-4158-2487;0000-0002-5499-5336;;", "linkedin": "alliot-nagle/;adwaygirish/;;;;", "or_profile": "~Alliot_Nagle1;~Adway_Girish1;~Marco_Bondaschi1;~Michael_Gastpar1;~Ashok_Vardhan_Makkuva1;~Hyeji_Kim1", "aff": "University of Texas at Austin;EPFL;EPFL - EPF Lausanne;School of Computer and Communication Sciences, EPFL - EPF Lausanne;;", "aff_domain": "utexas.edu;epfl.ch;epfl.ch;ic.epfl.ch;;", "position": "PhD student;PhD student;PhD student;Full Professor;;", "bibtex": "@inproceedings{\nnagle2024fundamental,\ntitle={Fundamental Limits of Prompt Compression: A Rate-Distortion Framework for Black-Box Language Models},\nauthor={Alliot Nagle and Adway Girish and Marco Bondaschi and Michael Gastpar and Ashok Vardhan Makkuva and Hyeji Kim},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=TeBKVfhP2M}\n}", "github": "", "reviewers": "sxbS;Jydp;cHot", "pdf_size": 951679, "rating": "7;7;7", "confidence": "3;4;5", "soundness": "3;3;4", "novelty": "3;3;3", "presentation": "4;3;3", "wc_summary": "103;62;76", "wc_strengths": "67;66;48", "wc_weaknesses": "259;63;199", "wc_questions": "61;23;206", "wc_limitations": "11;8;1", "wc_review": "501;222;530", "wc_reply_reviewers": "20;19;19", "wc_reply_authors": "48;47;15", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 80.33333333333333, 17.016332024133625 ], "wc_strengths_avg": [ 60.333333333333336, 8.73053390247253 ], "wc_weaknesses_avg": [ 173.66666666666666, 81.99728992811626 ], "wc_questions_avg": [ 96.66666666666667, 78.8514778273404 ], "wc_limitations_avg": [ 6.666666666666667, 4.189935029992179 ], "wc_review_avg": [ 417.6666666666667, 138.8628420020593 ], "wc_reply_reviewers_avg": [ 19.333333333333332, 0.4714045207910317 ], "wc_reply_authors_avg": [ 36.666666666666664, 15.326085243430198 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5253773293445789868&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "utexas.edu;epfl.ch;epfl.ch;ic.epfl.ch;;", "author_num": 6, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "University of Texas at Austin;EPFL", "aff_unique_dep": ";", "aff_unique_url": "https://www.utexas.edu;https://www.epfl.ch", "aff_unique_abbr": "UT Austin;EPFL", "aff_campus_unique_index": "0;2;2", "aff_campus_unique": "Austin;;Lausanne", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "United States;Switzerland" }, { "title": "DAT: Improving Adversarial Robustness via Generative Amplitude Mix-up in Frequency Domain", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95020", "id": "TeQvz5AlI8", "proceeding": "", "pdf": "https://openreview.net/pdf?id=TeQvz5AlI8", "openreview": "https://openreview.net/forum?id=TeQvz5AlI8", "poster": "/media/PosterPDFs/NeurIPS%202024/95020.png?t=1731240869.2129424", "project": "", "author_site": "Fengpeng Li, Kemou Li, Haiwei Wu, Jinyu Tian, Jiantao Zhou", "tldr": "", "abstract": "To protect deep neural networks (DNNs) from adversarial attacks, adversarial training (AT) is developed by incorporating adversarial examples (AEs) into model training. Recent studies show that adversarial attacks disproportionately impact the patterns within the phase of the sample's frequency spectrum---typically containing crucial semantic information---more than those in the amplitude, resulting in the model's erroneous categorization of AEs. We find that, by mixing the amplitude of training samples' frequency spectrum with those of distractor images for AT, the model can be guided to focus on phase patterns unaffected by adversarial perturbations. As a result, the model's robustness can be improved. Unfortunately, it is still challenging to select appropriate distractor images, which should mix the amplitude without affecting the phase patterns. To this end, in this paper, we propose an optimized **Adversarial Amplitude Generator (AAG)** to achieve a better tradeoff between improving the model's robustness and retaining phase patterns. Based on this generator, together with an efficient AE production procedure, we design a new **Dual Adversarial Training (DAT)** strategy. Experiments on various datasets show that our proposed DAT leads to significantly improved robustness against diverse adversarial attacks. The source code is available at https://github.com/Feng-peng-Li/DAT.", "keywords": "Deep learning;Model robustness", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/27a0eb0230a7d70616950c7f12798a8dcbab7543.zip", "author": "Fengpeng Li;Kemou Li;Haiwei Wu;Jinyu Tian;Jiantao Zhou", "authorids": "~Fengpeng_Li1;~Kemou_Li1;~Haiwei_Wu1;~Jinyu_Tian2;~Jiantao_Zhou1", "gender": "M;M;;M;M", "homepage": "https://scholar.google.com/citations?user=DgaQZ4wAAAAJ&hl=en;https://kemoulee.github.io/;;https://jinyutian.github.io/;https://www.fst.um.edu.mo/en/staff/jtzhou.html", "dblp": "252/6319;363/9782;;14/1023-1.html;52/4786-1", "google_scholar": "DgaQZ4wAAAAJ;Tdnc10kAAAAJ;;wQZDPyoAAAAJ;mcROAxAAAAAJ", "orcid": ";0000-0001-6669-9713;;;", "linkedin": ";;;;", "or_profile": "~Fengpeng_Li1;~Kemou_Li1;~Haiwei_Wu1;~Jinyu_Tian2;~Jiantao_Zhou1", "aff": "University of Macau;University of Macau;;Macau University of Science and Technology;University of Macau", "aff_domain": "umac.mo;umac.mo;;must.edu.mo;umac.mo", "position": "PhD student;PhD student;;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nli2024dat,\ntitle={{DAT}: Improving Adversarial Robustness via Generative Amplitude Mix-up in Frequency Domain},\nauthor={Fengpeng Li and Kemou Li and Haiwei Wu and Jinyu Tian and Jiantao Zhou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=TeQvz5AlI8}\n}", "github": "", "reviewers": "KucH;FTsi;8ttu", "pdf_size": 939093, "rating": "5;6;6", "confidence": "5;3;4", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;2;3", "wc_summary": "60;35;80", "wc_strengths": "20;17;27", "wc_weaknesses": "224;28;181", "wc_questions": "27;231;2", "wc_limitations": "31;28;1", "wc_review": "362;339;291", "wc_reply_reviewers": "60;13;62", "wc_reply_authors": "990;24;926", "reply_reviewers": "1;1;2", "reply_authors": "5;2;5", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 58.333333333333336, 18.408935028645434 ], "wc_strengths_avg": [ 21.333333333333332, 4.189935029992178 ], "wc_weaknesses_avg": [ 144.33333333333334, 84.11235871671231 ], "wc_questions_avg": [ 86.66666666666667, 102.56813453613272 ], "wc_limitations_avg": [ 20.0, 13.490737563232042 ], "wc_review_avg": [ 330.6666666666667, 29.578520735305357 ], "wc_reply_reviewers_avg": [ 45.0, 22.642143596988927 ], "wc_reply_authors_avg": [ 646.6666666666666, 441.0663845222798 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 4.0, 1.4142135623730951 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:bKngCCe1JYwJ:scholar.google.com/&scioq=DAT:+Improving+Adversarial+Robustness+via+Generative+Amplitude+Mix-up+in+Frequency+Domain&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "umac.mo;umac.mo;;must.edu.mo;umac.mo", "author_num": 5, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Macau;Macau University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.um.edu.mo;https://www.must.edu.mo", "aff_unique_abbr": "UM;MUST", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Macau SAR", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "DiffNorm: Self-Supervised Normalization for Non-autoregressive Speech-to-speech Translation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95019", "id": "Tg2EVad7VF", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Tg2EVad7VF", "openreview": "https://openreview.net/forum?id=Tg2EVad7VF", "poster": "", "project": "", "author_site": "Weiting Tan, Jingyu Zhang, Lingfeng Shen, Daniel Khashabi, Philipp Koehn", "tldr": "", "abstract": "Non-autoregressive Transformers (NATs) are recently applied in direct speech-to-speech translation systems, which convert speech across different languages without intermediate text data. Although NATs generate high-quality outputs and offer faster inference than autoregressive models, they tend to produce incoherent and repetitive results due to complex data distribution (e.g., acoustic and linguistic variations in speech). In this work, we introduce DiffNorm, a diffusion-based normalization strategy that simplifies data distributions for training NAT models. After training with a self-supervised noise estimation objective, DiffNorm constructs normalized target data by denoising synthetically corrupted speech features. Additionally, we propose to regularize NATs with classifier-free guidance, improving model robustness and translation quality by randomly dropping out source information during training. Our strategies result in a notable improvement of about $+7$ ASR-BLEU for English-Spanish (En-Es) translation and $+2$ ASR-BLEU for English-French (En-Fr) on the CVSS benchmark, while attaining over $14\\times$ speedup for En-Es and $5 \\times$ speedup for En-Fr translations compared to autoregressive baselines.", "keywords": "speech-to-speech translation;non-autoregressive modeling;diffusion models", "primary_area": "speech_and_audio", "supplementary_material": "", "author": "Weiting Tan;Jingyu Zhang;Lingfeng Shen;Daniel Khashabi;Philipp Koehn", "authorids": "~Weiting_Tan1;~Jingyu_Zhang2;~Lingfeng_Shen1;~Daniel_Khashabi2;~Philipp_Koehn2", "gender": "M;;M;M;M", "homepage": "https://steventan0110.github.io/;https://jackz.io/;;http://danielkhashabi.com/;http://www.cs.jhu.edu/~phi/", "dblp": "208/0745;92/3672.html;240/5490.html;71/10515;84/4538.html", "google_scholar": "hD8E4gYAAAAJ;9EC0sDMAAAAJ;PoSTdLAAAAAJ;pK2kQvgAAAAJ;OsIZgIYAAAAJ", "orcid": ";;;;0000-0003-1565-064X", "linkedin": "weiting-steven-tan-30bb4a175/;;;;philipp-koehn-bbb8024/", "or_profile": "~Weiting_Tan1;~Jingyu_Zhang2;~Lingfeng_Shen1;~Daniel_Khashabi2;~Philipp_Koehn2", "aff": "Johns Hopkins University;Johns Hopkins University;Johns Hopkins University;Johns Hopkins University;Johns Hopkins University", "aff_domain": "jhu.edu;cs.jhu.edu;jh.edu;jhu.edu;jhu.edu", "position": "PhD student;PhD student;MS student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\ntan2024diffnorm,\ntitle={DiffNorm: Self-Supervised Normalization for Non-autoregressive Speech-to-speech Translation},\nauthor={Weiting Tan and Jingyu Zhang and Lingfeng Shen and Daniel Khashabi and Philipp Koehn},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Tg2EVad7VF}\n}", "github": "", "reviewers": "DRdm;g4Z7;AuR5", "pdf_size": 1653582, "rating": "5;6;7", "confidence": "3;4;4", "soundness": "2;2;3", "novelty": "2;3;3", "presentation": "3;3;3", "wc_summary": "54;60;56", "wc_strengths": "30;23;76", "wc_weaknesses": "139;185;109", "wc_questions": "64;91;90", "wc_limitations": "12;9;38", "wc_review": "299;368;369", "wc_reply_reviewers": "0;0;5", "wc_reply_authors": "65;643;456", "reply_reviewers": "0;0;1", "reply_authors": "2;3;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 56.666666666666664, 2.494438257849294 ], "wc_strengths_avg": [ 43.0, 23.50886357667394 ], "wc_weaknesses_avg": [ 144.33333333333334, 31.255221785949445 ], "wc_questions_avg": [ 81.66666666666667, 12.498888839501783 ], "wc_limitations_avg": [ 19.666666666666668, 13.02134998974974 ], "wc_review_avg": [ 345.3333333333333, 32.765157645821816 ], "wc_reply_reviewers_avg": [ 1.6666666666666667, 2.357022603955158 ], "wc_reply_authors_avg": [ 388.0, 240.8166660899255 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:DPuZA8LpYiAJ:scholar.google.com/&scioq=DiffNorm:+Self-Supervised+Normalization+for+Non-autoregressive+Speech-to-speech+Translation&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": "jhu.edu;cs.jhu.edu;jh.edu;jhu.edu;jhu.edu", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Johns Hopkins University", "aff_unique_dep": "", "aff_unique_url": "https://www.jhu.edu", "aff_unique_abbr": "JHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "In-Context Learning of a Linear Transformer Block: Benefits of the MLP Component and One-Step GD Initialization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95018", "id": "Thou1rKdpZ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Thou1rKdpZ", "openreview": "https://openreview.net/forum?id=Thou1rKdpZ", "poster": "", "project": "", "author_site": "Ruiqi Zhang, Jingfeng Wu, Peter Bartlett", "tldr": "", "abstract": "We study the \\emph{in-context learning} (ICL) ability of a \\emph{Linear Transformer Block} (LTB) that combines a linear attention component and a linear multi-layer perceptron (MLP) component. \nFor ICL of linear regression with a Gaussian prior and a \\emph{non-zero mean}, we show that LTB can achieve nearly Bayes optimal ICL risk. In contrast, using only linear attention must incur an irreducible additive approximation error. \nFurthermore, we establish a correspondence between LTB and one-step gradient descent estimators with learnable initialization ($\\mathsf{GD}-\\beta$), in the sense that every $\\mathsf{GD}-\\beta$ estimator can be implemented by an LTB estimator and every optimal LTB estimator that minimizes the in-class ICL risk is effectively a $\\mathsf{GD}-\\beta$ estimator.\nFinally, we show that $\\mathsf{GD}-\\beta$ estimators can be efficiently optimized with gradient flow, despite a non-convex training objective.\nOur results reveal that LTB achieves ICL by implementing $\\mathsf{GD}-\\beta$, and they highlight the role of MLP layers in reducing approximation error.", "keywords": "In-Context Learning;Transformers;Approximation Theory;Optimization", "primary_area": "learning_theory", "supplementary_material": "", "author": "Ruiqi Zhang;Jingfeng Wu;Peter Bartlett", "authorids": "~Ruiqi_Zhang2;~Jingfeng_Wu1;~Peter_Bartlett1", "gender": "M;M;M", "homepage": "https://rqzhangberkeley.github.io/;https://uuujf.github.io;https://www.stat.berkeley.edu/~bartlett/", "dblp": ";;https://dblp.org/pers/hd/b/Bartlett:Peter_L=", "google_scholar": "uErE2UUAAAAJ;z-KILD8AAAAJ;yQNhFGUAAAAJ", "orcid": ";0009-0009-3414-4487;", "linkedin": ";jingfeng-wu-79205b184/;", "or_profile": "~Ruiqi_Zhang2;~Jingfeng_Wu1;~Peter_Bartlett1", "aff": "University of California, Berkeley;University of California, Berkeley;University of California, Berkeley", "aff_domain": "berkeley.edu;berkeley.edu;berkeley", "position": "PhD student;Postdoc;Professor", "bibtex": "@inproceedings{\nzhang2024incontext,\ntitle={In-Context Learning of a Linear Transformer Block: Benefits of the {MLP} Component and One-Step {GD} Initialization},\nauthor={Ruiqi Zhang and Jingfeng Wu and Peter Bartlett},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Thou1rKdpZ}\n}", "github": "", "reviewers": "D6mY;xFNi;XdPR;mBt4", "pdf_size": 901416, "rating": "5;6;7;7", "confidence": "4;3;5;3", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "3;3;4;3", "wc_summary": "123;105;119;79", "wc_strengths": "105;93;66;93", "wc_weaknesses": "47;317;163;20", "wc_questions": "108;3;83;236", "wc_limitations": "1;3;7;7", "wc_review": "384;521;438;435", "wc_reply_reviewers": "38;0;24;30", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 106.5, 17.226433176952217 ], "wc_strengths_avg": [ 89.25, 14.289419162443238 ], "wc_weaknesses_avg": [ 136.75, 117.11612826592246 ], "wc_questions_avg": [ 107.5, 83.7152913152669 ], "wc_limitations_avg": [ 4.5, 2.598076211353316 ], "wc_review_avg": [ 444.5, 49.10448044730745 ], "wc_reply_reviewers_avg": [ 23.0, 14.177446878757825 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0909090909090909, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4376763432021465812&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "berkeley.edu;berkeley.edu;berkeley", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Improving Temporal Link Prediction via Temporal Walk Matrix Projection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95017", "id": "Ti3ciyqlS3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Ti3ciyqlS3", "openreview": "https://openreview.net/forum?id=Ti3ciyqlS3", "poster": "/media/PosterPDFs/NeurIPS%202024/95017.png?t=1733404877.8921697", "project": "", "author_site": "Xiaodong Lu, Leilei Sun, Tongyu Zhu, Weifeng Lv", "tldr": "", "abstract": "Temporal link prediction, aiming at predicting future interactions among entities based on historical interactions, is crucial for a series of real-world applications. Although previous methods have demonstrated the importance of relative encodings for effective temporal link prediction, computational efficiency remains a major concern in constructing these encodings. Moreover, existing relative encodings are usually constructed based on structural connectivity, where temporal information is seldom considered. To address the aforementioned issues, we first analyze existing relative encodings and unify them as a function of temporal walk matrices. This unification establishes a connection between relative encodings and temporal walk matrices, providing a more principled way for analyzing and designing relative encodings. Based on this analysis, we propose a new temporal graph neural network called TPNet, which introduces a temporal walk matrix that incorporates the time decay effect to simultaneously consider both temporal and structural information. Moreover, TPNet designs a random feature propagation mechanism with theoretical guarantees to implicitly maintain the temporal walk matrices, which improves the computation and storage efficiency. Experimental results on 13 benchmark datasets verify the effectiveness and efficiency of TPNet, where TPNet outperforms other baselines on most datasets and achieves a maximum speedup of $33.3 \\times$ compared to the SOTA baseline.", "keywords": "Temporal Link Prediction;Dynamic Graph Learning;Graph Neural Network", "primary_area": "graph_neural_networks", "supplementary_material": "/attachment/3503be8236799326710cf9d4e418fb05cfb48c40.zip", "author": "Xiaodong Lu;Leilei Sun;Tongyu Zhu;Weifeng Lv", "authorids": "~Xiaodong_Lu1;~Leilei_Sun2;~Tongyu_Zhu1;~Weifeng_Lv1", "gender": "M;M;M;M", "homepage": "http://test;https://scse.buaa.edu.cn/info/1079/9207.htm;http://scse.buaa.edu.cn/info/1038/2994.htm;http://www.buaa.edu.cn", "dblp": ";152/1810.html;35/5757;15/2736.html", "google_scholar": ";QVHvhM4AAAAJ;;", "orcid": "0000-0003-1411-1484;0000-0002-0157-1716;0000-0002-8948-3103;0000-0003-0227-0891", "linkedin": ";;;", "or_profile": "~Xiaodong_Lu1;~Leilei_Sun2;~Tongyu_Zhu1;~Weifeng_Lv1", "aff": "Beihang University;Beihang University;Beihang University;Beihang University", "aff_domain": "buaa.edu.cn;buaa.edu.cn;buaa.edu.cn;buaa.edu.cn", "position": "PhD student;Associate Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nlu2024improving,\ntitle={Improving Temporal Link Prediction via Temporal Walk Matrix Projection},\nauthor={Xiaodong Lu and Leilei Sun and Tongyu Zhu and Weifeng Lv},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Ti3ciyqlS3}\n}", "github": "", "reviewers": "SMMz;YNEN;vApy", "pdf_size": 891865, "rating": "6;7;8", "confidence": "1;2;2", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "50;56;53", "wc_strengths": "78;104;38", "wc_weaknesses": "53;211;38", "wc_questions": "3;121;67", "wc_limitations": "1;8;11", "wc_review": "185;500;207", "wc_reply_reviewers": "12;25;19", "wc_reply_authors": "16;13;14", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 7.0, 0.816496580927726 ], "confidence_avg": [ 1.6666666666666667, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 53.0, 2.449489742783178 ], "wc_strengths_avg": [ 73.33333333333333, 27.145697428669774 ], "wc_weaknesses_avg": [ 100.66666666666667, 78.25741001478532 ], "wc_questions_avg": [ 63.666666666666664, 48.230925993829125 ], "wc_limitations_avg": [ 6.666666666666667, 4.189935029992179 ], "wc_review_avg": [ 297.3333333333333, 143.58814559550368 ], "wc_reply_reviewers_avg": [ 18.666666666666668, 5.312459150169742 ], "wc_reply_authors_avg": [ 14.333333333333334, 1.247219128924647 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8660254037844387, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13048188844540578146&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "buaa.edu.cn;buaa.edu.cn;buaa.edu.cn;buaa.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Beihang University", "aff_unique_dep": "", "aff_unique_url": "http://www.buaa.edu.cn/", "aff_unique_abbr": "BUAA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Task Confusion and Catastrophic Forgetting in Class-Incremental Learning: A Mathematical Framework for Discriminative and Generative Modelings", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95016", "id": "Tj5wJslj0R", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Tj5wJslj0R", "openreview": "https://openreview.net/forum?id=Tj5wJslj0R", "poster": "/media/PosterPDFs/NeurIPS%202024/95016.png?t=1731268453.641346", "project": "", "author_site": "Milad Khademi Nori, Il-Min Kim", "tldr": "", "abstract": "In class-incremental learning (class-IL), models must classify all previously seen classes at test time without task-IDs, leading to task confusion. Despite being a key challenge, task confusion lacks a theoretical understanding. We present a novel mathematical framework for class-IL and prove the Infeasibility Theorem, showing optimal class-IL is impossible with discriminative modeling due to task confusion. However, we establish the Feasibility Theorem, demonstrating that generative modeling can achieve optimal class-IL by overcoming task confusion. We then assess popular class-IL strategies, including regularization, bias-correction, replay, and generative classifier, using our framework. Our analysis suggests that adopting generative modeling, either for generative replay or direct classification (generative classifier), is essential for optimal class-IL.", "keywords": "Catastrophic Forgetting;Class-Incremental Learning;Continual Learning;Task Confusion.", "primary_area": "online_learning", "supplementary_material": "/attachment/ad62321d49e2904e704c5f2fc157047d58dca3d7.zip", "author": "Milad Khademi Nori;IL MIN KIM", "authorids": "~Milad_Khademi_Nori1;~IL_MIN_KIM1", "gender": "M;M", "homepage": "https://miladkhademinori.github.io/;https://www.ece.queensu.ca/people/I-M-Kim/index.html", "dblp": "267/9032;49/6121-1", "google_scholar": "TFsm204AAAAJ;iyufC8kAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Milad_Khademi_Nori1;~IL_MIN_KIM1", "aff": "Queen's University;Queen's University", "aff_domain": "queensu.ca;queensu.ca", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nnori2024task,\ntitle={Task Confusion and Catastrophic Forgetting in Class-Incremental Learning: A Mathematical Framework for Discriminative and Generative Modelings},\nauthor={Milad Khademi Nori and IL MIN KIM},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Tj5wJslj0R}\n}", "github": "", "reviewers": "PJ6U;fG2K;EnNf", "pdf_size": 678102, "rating": "5;6;7", "confidence": "2;2;4", "soundness": "2;3;3", "novelty": "2;3;4", "presentation": "3;2;2", "wc_summary": "27;49;81", "wc_strengths": "15;24;54", "wc_weaknesses": "114;24;51", "wc_questions": "3;14;3", "wc_limitations": "7;12;12", "wc_review": "166;123;201", "wc_reply_reviewers": "15;12;0", "wc_reply_authors": "16;0;0", "reply_reviewers": "1;1;0", "reply_authors": "2;1;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 2.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 52.333333333333336, 22.17105219775452 ], "wc_strengths_avg": [ 31.0, 16.673332000533065 ], "wc_weaknesses_avg": [ 63.0, 37.70941526992961 ], "wc_questions_avg": [ 6.666666666666667, 5.185449728701348 ], "wc_limitations_avg": [ 10.333333333333334, 2.357022603955158 ], "wc_review_avg": [ 163.33333333333334, 31.899146627387317 ], "wc_reply_reviewers_avg": [ 9.0, 6.48074069840786 ], "wc_reply_authors_avg": [ 5.333333333333333, 7.542472332656507 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.8660254037844387, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16859780767679117803&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "queensu.ca;queensu.ca", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Queen's University", "aff_unique_dep": "", "aff_unique_url": "https://www.queensu.ca", "aff_unique_abbr": "Queen's", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Canada" }, { "title": "Dynamic Service Fee Pricing under Strategic Behavior: Actions as Instruments and Phase Transition", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95015", "id": "Tnl2K6Iz9j", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Tnl2K6Iz9j", "openreview": "https://openreview.net/forum?id=Tnl2K6Iz9j", "poster": "/media/PosterPDFs/NeurIPS%202024/95015.png?t=1731012005.3130753", "project": "", "author_site": "Rui Ai, David Simchi-Levi, Feng Zhu", "tldr": "", "abstract": "We study a dynamic pricing problem for third-party platform service fees under strategic, far-sighted customers. In each time period, the platform sets a service fee based on historical data, observes the resulting transaction quantities, and collects revenue. The platform also monitors equilibrium prices influenced by both demand and supply. The objective is to maximize total revenue over a time horizon $T$. Our problem incorporates three practical challenges: (a) initially, the platform lacks knowledge of the demand side beforehand, necessitating a balance between exploring (learning the demand curve) and exploiting (maximizing revenue) simultaneously; (b) since only equilibrium prices and quantities are observable, traditional Ordinary Least Squares (OLS) estimators would be biased and inconsistent; (c) buyers are rational and strategic, seeking to maximize their consumer surplus and potentially misrepresenting their preferences. To address these challenges, we propose novel algorithmic solutions. Our approach involves: (i) a carefully designed active randomness injection to balance exploration and exploitation effectively; (ii) using non-i.i.d. actions as instrumental variables (IV) to consistently estimate demand; (iii) a low-switching cost design that promotes nearly truthful buyer behavior. We show an expected regret bound of $\\tilde{\\mathcal{O}} (\\sqrt{T}\\wedge\\sigma_S^{-2})$ and demonstrate its optimality, up to logarithmic factors, with respect to both the time horizon $T$ and the randomness in supply $\\sigma_S$. Despite its simplicity, our model offers valuable insights into the use of actions as estimation instruments, the benefits of low-switching pricing policies in mitigating strategic buyer behavior, and the role of supply randomness in facilitating exploration which leads to a phase transition of policy performance.", "keywords": "online learning;online platform;dynamic pricing;strategic behavior;instrumental variable;phase transition", "primary_area": "online_learning", "supplementary_material": "/attachment/b2ad3833a166d56f57a489ff9dbdf9c33073b4e0.zip", "author": "Rui Ai;David Simchi-Levi;Feng Zhu", "authorids": "~Rui_Ai1;~David_Simchi-Levi2;~Feng_Zhu7", "gender": "M;M;M", "homepage": "https://air-8.github.io/;http://slevi1.mit.edu/;https://sites.mit.edu/fengzhu/", "dblp": "184/2621-2;;", "google_scholar": "IoU4kj8AAAAJ;https://scholar.google.co.uk/citations?hl=en;", "orcid": ";;0000-0003-4979-4879", "linkedin": ";;feng-zhu-165a98184/", "or_profile": "~Rui_Ai1;~David_Simchi-Levi2;~Feng_Zhu7", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;mit.edu", "position": "PhD student;Full Professor;PhD student", "bibtex": "@inproceedings{\nai2024dynamic,\ntitle={Dynamic Service Fee Pricing under Strategic Behavior: Actions as Instruments and Phase Transition},\nauthor={Rui Ai and David Simchi-Levi and Feng Zhu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Tnl2K6Iz9j}\n}", "github": "", "reviewers": "Z4TZ;pz8n;EYLY;pXqU", "pdf_size": 884065, "rating": "5;5;6;7", "confidence": "4;2;4;2", "soundness": "3;3;3;3", "novelty": "2;2;3;2", "presentation": "3;3;4;2", "wc_summary": "96;40;38;49", "wc_strengths": "79;40;39;15", "wc_weaknesses": "130;121;61;19", "wc_questions": "67;2;199;26", "wc_limitations": "9;1;1;53", "wc_review": "381;204;338;162", "wc_reply_reviewers": "45;19;117;8", "wc_reply_authors": "66;3;147;3", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.0, 1.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 55.75, 23.60481942316018 ], "wc_strengths_avg": [ 43.25, 22.93877721239735 ], "wc_weaknesses_avg": [ 82.75, 45.36725140450984 ], "wc_questions_avg": [ 73.5, 76.09369224843805 ], "wc_limitations_avg": [ 16.0, 21.61018278497431 ], "wc_review_avg": [ 271.25, 90.77272442755037 ], "wc_reply_reviewers_avg": [ 47.25, 42.45217897823385 ], "wc_reply_authors_avg": [ 54.75, 59.14547742642712 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:crCwbSZ3wqgJ:scholar.google.com/&scioq=Dynamic+Service+Fee+Pricing+under+Strategic+Behavior:+Actions+as+Instruments+and+Phase+Transition&hl=en&as_sdt=0,33", "gs_version_total": 2, "email": "mit.edu;mit.edu;mit.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "DiffAug: A Diffuse-and-Denoise Augmentation for Training Robust Classifiers", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95014", "id": "Tpx9gcZVBf", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Tpx9gcZVBf", "openreview": "https://openreview.net/forum?id=Tpx9gcZVBf", "poster": "", "project": "", "author_site": "Chandramouli Shama Sastry, Sri Harsha Dumpala, Sageev Oore", "tldr": "", "abstract": "We introduce DiffAug, a simple and efficient diffusion-based augmentation technique to train image classifiers for the crucial yet challenging goal of improved classifier robustness. Applying DiffAug to a given example consists of one forward-diffusion step followed by one reverse-diffusion step. Using both ResNet-50 and Vision Transformer architectures, we comprehensively evaluate classifiers trained with DiffAug and demonstrate the surprising effectiveness of single-step reverse diffusion in improving robustness to covariate shifts, certified adversarial accuracy and out of distribution detection. When we combine DiffAug with other augmentations such as AugMix and DeepAugment we demonstrate further improved robustness. Finally, building on this approach, we also improve classifier-guided diffusion wherein we observe improvements in: (i) classifier-generalization, (ii) gradient quality (i.e., improved perceptual alignment) and (iii) image generation performance. We thus introduce a computationally efficient technique for training with improved robustness that does not require any additional data, and effectively complements existing augmentation approaches.", "keywords": "Synthetic Augmentations; Robust Classifiers; Classifier-guided Diffusion; Perceptually Aligned Gradients", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/d133a3c7fc1c700ba9976ea0b3a19742db0c6fa8.zip", "author": "Chandramouli Shama Sastry;Sri Harsha Dumpala;Sageev Oore", "authorids": "~Chandramouli_Shama_Sastry1;~Sri_Harsha_Dumpala3;~Sageev_Oore1", "gender": "M;M;M", "homepage": "https://scholar.google.com/citations?user=yR5pPqAAAAAJ&hl=en;;", "dblp": "223/6317;148/9851;67/4980", "google_scholar": ";https://scholar.google.ca/citations?user=D4KhVXoAAAAJ;https://scholar.google.ca/citations?user=cI0dYX4AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Chandramouli_Shama_Sastry1;~Sri_Harsha_Dumpala3;~Sageev_Oore1", "aff": "Vector Institute/Dalhousie University;Dalhousie University;Vector Institute", "aff_domain": "dal.ca;dal.ca;vectorinstitute.ai", "position": "PhD student;PhD student;Researcher", "bibtex": "@inproceedings{\nsastry2024diffaug,\ntitle={DiffAug: A Diffuse-and-Denoise Augmentation for Training Robust Classifiers},\nauthor={Chandramouli Shama Sastry and Sri Harsha Dumpala and Sageev Oore},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Tpx9gcZVBf}\n}", "github": "", "reviewers": "uEtP;3vHc;gb3b;QY34", "pdf_size": 21692553, "rating": "5;6;6;7", "confidence": "3;4;4;3", "soundness": "3;2;3;3", "novelty": "2;2;2;3", "presentation": "3;2;2;3", "wc_summary": "87;63;86;61", "wc_strengths": "127;44;78;52", "wc_weaknesses": "309;45;131;42", "wc_questions": "137;36;44;389", "wc_limitations": "6;1;1;11", "wc_review": "666;189;340;555", "wc_reply_reviewers": "44;90;3;109", "wc_reply_authors": "140;1651;30;90", "reply_reviewers": "1;2;1;1", "reply_authors": "3;5;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 74.25, 12.275483697190918 ], "wc_strengths_avg": [ 75.25, 32.41431011143072 ], "wc_weaknesses_avg": [ 131.75, 108.39597547879718 ], "wc_questions_avg": [ 151.5, 142.75240803573158 ], "wc_limitations_avg": [ 4.75, 4.14578098794425 ], "wc_review_avg": [ 437.5, 185.2545545998802 ], "wc_reply_reviewers_avg": [ 61.5, 41.22196016688192 ], "wc_reply_authors_avg": [ 477.75, 678.4947954848291 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.0, 1.224744871391589 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:TSw9uQJ6k3wJ:scholar.google.com/&scioq=DiffAug:+A+Diffuse-and-Denoise+Augmentation+for+Training+Robust+Classifiers&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": "dal.ca;dal.ca;vectorinstitute.ai", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Dalhousie University;Vector Institute", "aff_unique_dep": "Vector Institute;", "aff_unique_url": "https://www.dal.ca;https://vectorinstitute.ai/", "aff_unique_abbr": "Dal;Vector Institute", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Canada" }, { "title": "Inversion-based Latent Bayesian Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95013", "id": "TrN5TcWY87", "proceeding": "", "pdf": "https://openreview.net/pdf?id=TrN5TcWY87", "openreview": "https://openreview.net/forum?id=TrN5TcWY87", "poster": "/media/PosterPDFs/NeurIPS%202024/95013.png?t=1733732617.2276032", "project": "", "author_site": "Jaewon Chu, Jinyoung Park, Seunghun Lee, Hyunwoo Kim", "tldr": "", "abstract": "Latent Bayesian optimization (LBO) approaches have successfully adopted Bayesian optimization over a continuous latent space by employing an encoder-decoder architecture to address the challenge of optimization in a high dimensional or discrete input space. LBO learns a surrogate model to approximate the black-box objective function in the latent space. However, we observed that most LBO methods suffer from the `misalignment problem', which is induced by the reconstruction error of the encoder-decoder architecture. It hinders learning an accurate surrogate model and generating high-quality solutions. In addition, several trust region-based LBO methods select the anchor, the center of the trust region, based solely on the objective function value without considering the trust region's potential to enhance the optimization process. To address these issues, we propose $\\textbf{Inv}$ersion-based Latent $\\textbf{B}$ayesian $\\textbf{O}$ptimization (InvBO), a plug-and-play module for LBO. InvBO consists of two components: an inversion method and a potential-aware trust region anchor selection. The inversion method searches the latent code that completely reconstructs the given target data. The potential-aware trust region anchor selection considers the potential capability of the trust region for better local optimization. Experimental results demonstrate the effectiveness of InvBO on nine real-world benchmarks, such as molecule design and arithmetic expression fitting tasks. Code is available at https://github.com/mlvlab/InvBO.", "keywords": "Latent Bayesian Optimization;Bayesian Optimization;Inversion", "primary_area": "probabilistic_methods", "supplementary_material": "/attachment/a79741d1c49bd66fab1cd9bc48335e1524e092b9.zip", "author": "Jaewon Chu;Jinyoung Park;Seunghun Lee;Hyunwoo J. Kim", "authorids": "~Jaewon_Chu1;~Jinyoung_Park1;~Seunghun_Lee2;~Hyunwoo_J._Kim3", "gender": "M;M;M;M", "homepage": "https://github.com/allonsy07;;https://github.com/llsshh319;https://hyunwoojkim.com/publications", "dblp": "355/0102;03/1524;77/7676-1;150/4259", "google_scholar": "X3RX138AAAAJ;zThEyOYAAAAJ;LPuuGcAAAAAJ;https://scholar.google.co.kr/citations?user=LfBoJt8AAAAJ", "orcid": ";;0000-0001-9377-2832;0000-0002-2181-9264", "linkedin": "jaewon-chu-64b003262;jinyoung-park-4861461a3/;;", "or_profile": "~Jaewon_Chu1;~Jinyoung_Park1;~Seunghun_Lee2;~Hyunwoo_Kim1", "aff": "Korea University;Korea University;Korea University;Korea University", "aff_domain": "korea.ac.kr;korea.ac.kr;korea.ac.kr;korea.ac.kr", "position": "PhD student;PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nchu2024inversionbased,\ntitle={Inversion-based Latent Bayesian Optimization},\nauthor={Jaewon Chu and Jinyoung Park and Seunghun Lee and Hyunwoo J. Kim},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=TrN5TcWY87}\n}", "github": "", "reviewers": "k47A;tYNa;wVbQ;8vpX;nFyz", "pdf_size": 26629797, "rating": "3;5;7;7;8", "confidence": "4;4;4;4;5", "soundness": "1;2;4;4;4", "novelty": "2;3;4;3;4", "presentation": "2;3;4;3;4", "wc_summary": "54;109;223;110;156", "wc_strengths": "19;153;266;130;69", "wc_weaknesses": "21;182;117;55;1282", "wc_questions": "158;134;28;28;107", "wc_limitations": "9;54;1;20;96", "wc_review": "261;632;635;343;1710", "wc_reply_reviewers": "275;0;30;123;206", "wc_reply_authors": "457;0;16;21;19", "reply_reviewers": "1;0;1;1;1", "reply_authors": "2;1;2;2;2", "rating_avg": [ 6.0, 1.7888543819998317 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 3.0, 1.2649110640673518 ], "novelty_avg": [ 3.2, 0.7483314773547882 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 130.4, 56.46450212301531 ], "wc_strengths_avg": [ 127.4, 83.70567483749235 ], "wc_weaknesses_avg": [ 331.4, 478.4711485554798 ], "wc_questions_avg": [ 91.0, 53.911037830856124 ], "wc_limitations_avg": [ 36.0, 35.02570484658374 ], "wc_review_avg": [ 716.2, 519.1929891668415 ], "wc_reply_reviewers_avg": [ 126.8, 103.63281333631737 ], "wc_reply_authors_avg": [ 102.6, 177.35456013308482 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.8, 0.4000000000000001 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5590169943749475, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6059644801331187442&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "korea.ac.kr;korea.ac.kr;korea.ac.kr;korea.ac.kr", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Korea University", "aff_unique_dep": "", "aff_unique_url": "https://www.korea.ac.kr", "aff_unique_abbr": "KU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Robust Mixture Learning when Outliers Overwhelm Small Groups", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95012", "id": "TrXV4dMDcG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=TrXV4dMDcG", "openreview": "https://openreview.net/forum?id=TrXV4dMDcG", "poster": "", "project": "", "author_site": "Daniil Dmitriev, Rares-Darius Buhai, Stefan Tiegel, Alexander Wolters, Gleb Novikov, Amartya Sanyal, David Steurer, Fanny Yang", "tldr": "", "abstract": "We study the problem of estimating the means of well-separated mixtures when an adversary may add arbitrary outliers. While strong guarantees are available when the outlier fraction is significantly smaller than the minimum mixing weight, much less is known when outliers may crowd out low-weight clusters \u2013 a setting we refer to as list-decodable mixture learning (LD-ML). In this case, adversarial outliers can simulate additional spurious mixture components. Hence, if all means of the mixture must be recovered up to a small error in the output list, the list size needs to be larger than the number of (true) components. We propose an algorithm that obtains order-optimal error guarantees for each mixture mean with a minimal list-size overhead, significantly improving upon list-decodable mean estimation, the only existing method that is applicable for LD-ML. Although improvements are observed even when the mixture is non-separated, our algorithm achieves particularly strong guarantees when the mixture is separated: it can leverage the mixture structure to partially cluster the samples before carefully iterating a base learner for list-decodable mean estimation at different scales.", "keywords": "robust statistics;mixture learning;list-decodable learning;small group;outliers;mean estimation;efficient algorithms", "primary_area": "learning_theory", "supplementary_material": "/attachment/f9eb372d44f3f684f15b5176a0855f8308b733a6.zip", "author": "Daniil Dmitriev;Rares-Darius Buhai;Stefan Tiegel;Alexander Wolters;Gleb Novikov;Amartya Sanyal;David Steurer;Fanny Yang", "authorids": "~Daniil_Dmitriev2;~Rares-Darius_Buhai1;~Stefan_Tiegel1;~Alexander_Wolters1;~Gleb_Novikov1;~Amartya_Sanyal1;~David_Steurer1;~Fanny_Yang1", "gender": "M;;;M;;M;;", "homepage": ";;https://stefantiegel.com;;;https://amartya18x.github.io;;http://www.fanny-yang.de", "dblp": ";;218/5553;;200/9864;203/8807;;126/4852", "google_scholar": "3_4gF8wAAAAJ;;https://scholar.google.ch/citations?user=WvpFkwsAAAAJ;;;;;BfDKicQAAAAJ", "orcid": ";;;;;0000-0002-4190-0449;;", "linkedin": ";;;alexander-wolters1/;;;;", "or_profile": "~Daniil_Dmitriev2;~Rares-Darius_Buhai1;~Stefan_Tiegel1;~Alexander_Wolters1;~Gleb_Novikov1;~Amartya_Sanyal1;~David_Steurer1;~Fanny_Yang1", "aff": "ETHZ - ETH Zurich;;Swiss Federal Institute of Technology;Department of Computer Science, ETHZ - ETH Zurich;HSLU - Lucerne University of Applied Sciences and Arts;Max-Planck Institute;;Swiss Federal Institute of Technology", "aff_domain": "ethz.ch;;ethz.ch;inf.ethz.ch;hslu.ch;mpg.de;;ethz.ch", "position": "PhD student;;PhD student;Visiting Researcher;Researcher;Postdoc;;Professor", "bibtex": "@inproceedings{\ndmitriev2024robust,\ntitle={Robust Mixture Learning when Outliers Overwhelm Small Groups},\nauthor={Daniil Dmitriev and Rares-Darius Buhai and Stefan Tiegel and Alexander Wolters and Gleb Novikov and Amartya Sanyal and David Steurer and Fanny Yang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=TrXV4dMDcG}\n}", "github": "", "reviewers": "TTHY;hCCi;fDjD;izza;w5FP", "pdf_size": 850393, "rating": "6;6;6;7;8", "confidence": "3;3;3;3;2", "soundness": "3;3;3;3;4", "novelty": "3;3;3;3;4", "presentation": "3;3;2;3;4", "wc_summary": "233;46;109;424;96", "wc_strengths": "75;72;66;70;100", "wc_weaknesses": "8;27;433;142;36", "wc_questions": "46;119;118;88;3", "wc_limitations": "2;1;1;1;5", "wc_review": "364;265;727;725;240", "wc_reply_reviewers": "21;16;154;11;0", "wc_reply_authors": "0;0;518;0;0", "reply_reviewers": "1;1;2;1;0", "reply_authors": "1;1;2;1;1", "rating_avg": [ 6.6, 0.8 ], "confidence_avg": [ 2.8, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 181.6, 135.93027624484546 ], "wc_strengths_avg": [ 76.6, 12.059850745345068 ], "wc_weaknesses_avg": [ 129.2, 158.92060911033533 ], "wc_questions_avg": [ 74.8, 44.67392975774573 ], "wc_limitations_avg": [ 2.0, 1.5491933384829668 ], "wc_review_avg": [ 464.2, 217.74609066525167 ], "wc_reply_reviewers_avg": [ 40.4, 57.224470290252576 ], "wc_reply_authors_avg": [ 103.6, 207.19999999999996 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.8750000000000001, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11541029120172829276&as_sdt=4005&sciodt=0,6&hl=en", "gs_version_total": 5, "email": "ethz.ch;;ethz.ch;inf.ethz.ch;hslu.ch;mpg.de;;ethz.ch", "author_num": 8, "aff_unique_index": "0;1;0;2;3;1", "aff_unique_norm": "ETH Zurich;Swiss Federal Institute of Technology;Lucerne University of Applied Sciences and Arts;Max-Planck-Gesellschaft zur F\u00f6rderung der Wissenschaften e.V.", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.ethz.ch;https://www.ethz.ch;https://www.hslu.ch;https://www.mpg.de", "aff_unique_abbr": "ETHZ;ETH Zurich;HSLU;MPG", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Zurich;Lucerne", "aff_country_unique_index": "0;0;0;0;1;0", "aff_country_unique": "Switzerland;Germany" }, { "title": "High-dimensional (Group) Adversarial Training in Linear Regression", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95011", "id": "Tsb4dVtCHx", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Tsb4dVtCHx", "openreview": "https://openreview.net/forum?id=Tsb4dVtCHx", "poster": "/media/PosterPDFs/NeurIPS%202024/95011.png?t=1730258158.5505764", "project": "", "author_site": "Yiling Xie, Xiaoming Huo", "tldr": "", "abstract": "Adversarial training can achieve robustness against adversarial perturbations and has been widely used in machine-learning models. This paper delivers a non-asymptotic consistency analysis of the adversarial training procedure under $\\ell_\\infty$-perturbation in high-dimensional linear regression. It will be shown that, under the restricted eigenvalue condition, the associated convergence rate of prediction error can achieve the minimax rate up to a logarithmic factor in the high-dimensional linear regression on the class of sparse parameters. Additionally, the group adversarial training procedure is analyzed. Compared with classic adversarial training, it will be proved that the group adversarial training procedure enjoys a better prediction error upper bound under certain group-sparsity patterns.", "keywords": "adversarial training;high-dimension;convergence rate;sparsity;minimax", "primary_area": "learning_theory", "supplementary_material": "/attachment/14656f097750fe60c7f8b8a561e7303a3de872e4.zip", "author": "Yiling Xie;Xiaoming Huo", "authorids": "~Yiling_Xie1;~Xiaoming_Huo1", "gender": "F;M", "homepage": ";https://www.isye.gatech.edu/users/xiaoming-huo", "dblp": ";67/3392", "google_scholar": ";https://scholar.google.com/citations?hl=en", "orcid": ";0000-0003-0101-1206", "linkedin": "yiling-x-084448150/;xiaoming-huo-9653374/", "or_profile": "~Yiling_Xie1;~Xiaoming_Huo1", "aff": "Georgia Institute of Technology;Georgia Institute of Technology", "aff_domain": "gatech.edu;gatech.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nxie2024highdimensional,\ntitle={High-dimensional (Group) Adversarial Training in Linear Regression},\nauthor={Yiling Xie and Xiaoming Huo},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Tsb4dVtCHx}\n}", "github": "", "reviewers": "fdf4;CJZh;6XeH;J4Ez", "pdf_size": 642282, "rating": "4;4;5;8", "confidence": "4;3;3;4", "soundness": "3;2;3;4", "novelty": "2;1;2;3", "presentation": "3;2;3;4", "wc_summary": "57;22;40;48", "wc_strengths": "40;35;37;50", "wc_weaknesses": "181;241;74;99", "wc_questions": "2;78;2;43", "wc_limitations": "1;168;4;9", "wc_review": "281;544;157;249", "wc_reply_reviewers": "0;14;27;59", "wc_reply_authors": "51;51;51;36", "reply_reviewers": "0;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.25, 1.6393596310755 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 41.75, 12.891373084353738 ], "wc_strengths_avg": [ 40.5, 5.766281297335398 ], "wc_weaknesses_avg": [ 148.75, 66.35651814253066 ], "wc_questions_avg": [ 31.25, 31.75984099456419 ], "wc_limitations_avg": [ 45.5, 70.78311945654839 ], "wc_review_avg": [ 307.75, 143.79390633820336 ], "wc_reply_reviewers_avg": [ 25.0, 21.828879952943073 ], "wc_reply_authors_avg": [ 47.25, 6.49519052838329 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.45749571099781405, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16374801931055313034&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "gatech.edu;gatech.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Georgia Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.gatech.edu", "aff_unique_abbr": "Georgia Tech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Randomized Truthful Auctions with Learning Agents", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95010", "id": "Tt2xJaxDc4", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Tt2xJaxDc4", "openreview": "https://openreview.net/forum?id=Tt2xJaxDc4", "poster": "", "project": "", "author_site": "Gagan Aggarwal, Anupam Gupta, Andres Perlroth, Grigoris Velegkas", "tldr": "", "abstract": "We study a setting where agents use no-regret learning algorithms to participate in repeated auctions. Recently, Kolumbus and Nisan [2022a] showed, rather surprisingly, that when bidders participate in second-price auctions using no-regret bidding algorithms, no matter how large the number of interactions $T$ is, the runner-up bidder may not converge to bidding truthfully. Our first result shows that this holds forall deterministictruthful auctions. We also show that the ratio of the learning rates of different bidders can qualitatively affect the convergence of the bidders. Next, we consider the problem of revenue maximization in this environment. In the setting with fully rational bidders, the seminal result of Myerson [1981] showed that revenue can be maximized by using a second-price auction with reserves. We show that, in stark contrast, in our setting with learning bidders, randomized auctions can have strictly better revenue guarantees than second-price auctions with reserves, when $T$ is large enough. To do this, we provide a black-box transformation from any truthful auction $A$ to an auction $A'$ such that: i) all mean-based no-regret learners that participate in $A'$ converge to bidding truthfully, ii) the distance between the allocation rule and the payment rule between $A, A'$ is negligible. Finally, we study revenue maximization in the non-asymptotic regime. We define a notion of auctioneer regret that compares the revenue generated to the revenue of a second price auction with truthful bids. When the auctioneer has to use the same auction throughout the interaction, we show an (almost) tight regret bound of $\\tilde{\\Theta}(T^{3/4})$. Then, we consider the case where the auctioneer can use different auctions throughout the interaction, but in a way that is oblivious to the bids. For this setting, we show an (almost) tight bound of $\\tilde{\\Theta}(\\sqrt{T})$.", "keywords": "Auctions;No-Regret Learning;Revenue Maximization", "primary_area": "algorithmic_game_theory", "supplementary_material": "", "author": "Gagan Aggarwal;Anupam Gupta;Andres Perlroth;Grigoris Velegkas", "authorids": "~Gagan_Aggarwal1;~Anupam_Gupta2;~Andres_Perlroth1;~Grigoris_Velegkas1", "gender": "F;M;M;M", "homepage": ";https://cs.nyu.edu/~anupamg/;https://www.linkedin.com/in/andres-perlroth-2044a11a0;", "dblp": "75/3847;27/2931;;254/1885", "google_scholar": ";QuwaU-8AAAAJ;;Ty1kgP0AAAAJ", "orcid": "0009-0003-3296-4891;;;", "linkedin": ";;;", "or_profile": "~Gagan_Aggarwal1;~Anupam_Gupta2;~Andres_Perlroth1;~Grigoris_Velegkas1", "aff": "Google Research;New York University;;Yale University", "aff_domain": "research.google.com;cs.nyu.edu;;yale.edu", "position": "Researcher;Full Professor;;PhD student", "bibtex": "@inproceedings{\naggarwal2024randomized,\ntitle={Randomized Truthful Auctions with Learning Agents},\nauthor={Gagan Aggarwal and Anupam Gupta and Andres Perlroth and Grigoris Velegkas},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Tt2xJaxDc4}\n}", "github": "", "reviewers": "Lf7G;rdXf;9Ayy;di38", "pdf_size": 393395, "rating": "6;6;7;7", "confidence": "4;4;4;4", "soundness": "3;3;4;3", "novelty": "3;3;4;3", "presentation": "3;2;4;3", "wc_summary": "205;105;114;379", "wc_strengths": "94;44;47;3", "wc_weaknesses": "163;122;34;3", "wc_questions": "261;40;1;3", "wc_limitations": "220;11;1;3", "wc_review": "943;322;197;391", "wc_reply_reviewers": "152;21;4;11", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 200.75, 110.09626469594689 ], "wc_strengths_avg": [ 47.0, 32.225766088644036 ], "wc_weaknesses_avg": [ 80.5, 64.60843598168896 ], "wc_questions_avg": [ 76.25, 107.79001577140622 ], "wc_limitations_avg": [ 58.75, 93.17289037053644 ], "wc_review_avg": [ 463.25, 285.57868880573005 ], "wc_reply_reviewers_avg": [ 47.0, 60.92208138269736 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17602105595055724507&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "research.google.com;cs.nyu.edu;;yale.edu", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "Google;New York University;Yale University", "aff_unique_dep": "Google Research;;", "aff_unique_url": "https://research.google;https://www.nyu.edu;https://www.yale.edu", "aff_unique_abbr": "Google Research;NYU;Yale", "aff_campus_unique_index": "0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "QuadMamba: Learning Quadtree-based Selective Scan for Visual State Space Model", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95009", "id": "TtcwVuBZu1", "proceeding": "", "pdf": "https://openreview.net/pdf?id=TtcwVuBZu1", "openreview": "https://openreview.net/forum?id=TtcwVuBZu1", "poster": "/media/PosterPDFs/NeurIPS%202024/95009.png?t=1730255531.4128652", "project": "", "author_site": "Fei Xie, Weijia Zhang, Zhongdao Wang, Chao Ma", "tldr": "", "abstract": "Recent advancements in State Space Models, notably Mamba, have demonstrated superior performance over the dominant Transformer models, particularly in reducing the computational complexity from quadratic to linear. Yet, difficulties in adapting Mamba from language to vision tasks arise due to the distinct characteristics of visual data, such as the spatial locality and adjacency within images and large variations in information granularity across visual tokens. Existing vision Mamba approaches either flatten tokens into sequences in a raster scan fashion, which breaks the local adjacency of images, or manually partition tokens into windows, which limits their long-range modeling and generalization capabilities. To address these limitations, we present a new vision Mamba model, coined QuadMamba, that effectively captures local dependencies of varying granularities via quadtree-based image partition and scan. Concretely, our lightweight quadtree-based scan module learns to preserve the 2D locality of spatial regions within learned window quadrants. The module estimates the locality score of each token from their features, before adaptively partitioning tokens into window quadrants. An omnidirectional window shifting scheme is also introduced to capture more intact and informative features across different local regions. To make the discretized quadtree partition end-to-end trainable, we further devise a sequence masking strategy based on Gumbel-Softmax and its straight-through gradient estimator. Extensive experiments demonstrate that QuadMamba achieves state-of-the-art performance in various vision tasks, including image classification, object detection, instance segmentation, and semantic segmentation. Our code and models will be released.", "keywords": "Visual Mamba;Visual Recognition;Vision Backbone", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/daeea7fc669370d7e0181fde8442c4c77113891a.zip", "author": "Fei Xie;Weijia Zhang;Zhongdao Wang;Chao Ma", "authorids": "~Fei_Xie3;~Weijia_Zhang7;~Zhongdao_Wang2;~Chao_Ma3", "gender": "M;;;M", "homepage": "https://phiphiphi31.github.io/;;;https://vision.sjtu.edu.cn/", "dblp": ";;;79/1552-4", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;;syoPhv8AAAAJ", "orcid": "0009-0001-2733-2393;;;", "linkedin": ";;;", "or_profile": "~Fei_Xie3;~Weijia_Zhang7;~Zhongdao_Wang2;~Chao_Ma3", "aff": "Shanghai Jiaotong University;;;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;;;sjtu.edu.cn", "position": "PhD student;;;Full Professor", "bibtex": "@inproceedings{\nxie2024quadmamba,\ntitle={QuadMamba: Learning Quadtree-based Selective Scan for Visual State Space Model},\nauthor={Fei Xie and Weijia Zhang and Zhongdao Wang and Chao Ma},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=TtcwVuBZu1}\n}", "github": "", "reviewers": "Kbfx;HNK1;ALic;VDbd", "pdf_size": 1309063, "rating": "5;5;8;9", "confidence": "3;3;4;4", "soundness": "3;3;3;4", "novelty": "3;3;4;4", "presentation": "3;3;3;4", "wc_summary": "49;116;96;118", "wc_strengths": "39;70;226;108", "wc_weaknesses": "95;504;88;154", "wc_questions": "20;147;9;74", "wc_limitations": "1;4;1;22", "wc_review": "204;841;420;476", "wc_reply_reviewers": "0;239;15;0", "wc_reply_authors": "0;246;0;0", "reply_reviewers": "0;1;1;0", "reply_authors": "1;2;1;1", "rating_avg": [ 6.75, 1.7853571071357126 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 94.75, 27.779263849137543 ], "wc_strengths_avg": [ 110.75, 70.88503015446915 ], "wc_weaknesses_avg": [ 210.25, 171.523139838332 ], "wc_questions_avg": [ 62.5, 54.637441374939954 ], "wc_limitations_avg": [ 7.0, 8.74642784226795 ], "wc_review_avg": [ 485.25, 229.13028499087588 ], "wc_reply_reviewers_avg": [ 63.5, 101.50985173863668 ], "wc_reply_authors_avg": [ 61.5, 106.52112466548596 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.9801960588196068, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14993805729895015406&as_sdt=8005&sciodt=0,7&hl=en", "gs_version_total": 4, "email": "sjtu.edu.cn;;;sjtu.edu.cn", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "FEEL-SNN: Robust Spiking Neural Networks with Frequency Encoding and Evolutionary Leak Factor", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95008", "id": "TuCQdBo4NC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=TuCQdBo4NC", "openreview": "https://openreview.net/forum?id=TuCQdBo4NC", "poster": "/media/PosterPDFs/NeurIPS%202024/95008.png?t=1730691982.3333106", "project": "", "author_site": "Mengting Xu, De Ma, Huajin Tang, Qian Zheng, Gang Pan", "tldr": "", "abstract": "Currently, researchers think that the inherent robustness of spiking neural networks (SNNs) stems from their biologically plausible spiking neurons, and are dedicated to developing more bio-inspired models to defend attacks. However, most work relies solely on experimental analysis and lacks theoretical support, and the direct-encoding method and fixed membrane potential leak factor they used in spiking neurons are simplified simulations of those in the biological nervous system, which makes it difficult to ensure generalizability across all datasets and networks. Contrarily, the biological nervous system can stay reliable even in a highly complex noise environment, one of the reasons is selective visual attention and non-fixed membrane potential leaks in biological neurons. This biological finding has inspired us to design a highly robust SNN model that closely mimics the biological nervous system. In our study, we first present a unified theoretical framework for SNN robustness constraint, which suggests that improving the encoding method and evolution of the membrane potential leak factor in spiking neurons can improve SNN robustness. Subsequently, we propose a robust SNN (FEEL-SNN) with Frequency Encoding (FE) and Evolutionary Leak factor (EL) to defend against different noises, mimicking the selective visual attention mechanism and non-fixed leak observed in biological systems. Experimental results confirm the efficacy of both our FE, EL, and FEEL methods, either in isolation or in conjunction with established robust enhancement algorithms, for enhancing the robustness of SNNs.", "keywords": "spiking neural network;robustness;attack", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "/attachment/9332d6d0c213aa8bbda130c57a98f6eeadef8ce9.zip", "author": "Mengting Xu;De Ma;Huajin Tang;Qian Zheng;Gang Pan", "authorids": "~Mengting_Xu1;~De_Ma1;~Huajin_Tang1;~Qian_Zheng5;~Gang_Pan1", "gender": "F;M;M;;", "homepage": "https://github.com/MengtingXu1203;;https://person.zju.edu.cn/htang;;", "dblp": "171/2836;18/8568;18/434;;", "google_scholar": "c_ge2IwAAAAJ;;U041O4QAAAAJ;;", "orcid": ";0000-0001-8700-938X;;;", "linkedin": ";;;;", "or_profile": "~Mengting_Xu1;~De_Ma1;~Huajin_Tang1;~Qian_Zheng5;~Gang_Pan1", "aff": "Zhejiang University;Zhejiang University;Zhejiang University;;", "aff_domain": "zju.edu.cn;zju.edu.cn;zju.edu.cn;;", "position": "PhD student;Associate Professor;Full Professor;;", "bibtex": "@inproceedings{\nxu2024feelsnn,\ntitle={{FEEL}-{SNN}: Robust Spiking Neural Networks with Frequency Encoding and Evolutionary Leak Factor},\nauthor={Mengting Xu and De Ma and Huajin Tang and Qian Zheng and Gang Pan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=TuCQdBo4NC}\n}", "github": "", "reviewers": "uyY9;zYHj;yJ85", "pdf_size": 3568063, "rating": "4;5;7", "confidence": "5;5;5", "soundness": "1;2;3", "novelty": "2;2;3", "presentation": "2;3;3", "wc_summary": "69;72;73", "wc_strengths": "57;37;89", "wc_weaknesses": "67;158;202", "wc_questions": "3;135;5", "wc_limitations": "13;4;1", "wc_review": "209;406;370", "wc_reply_reviewers": "61;446;17", "wc_reply_authors": "525;959;21", "reply_reviewers": "1;3;1", "reply_authors": "4;5;2", "rating_avg": [ 5.333333333333333, 1.247219128924647 ], "confidence_avg": [ 5.0, 0.0 ], "soundness_avg": [ 2.0, 0.816496580927726 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 71.33333333333333, 1.699673171197595 ], "wc_strengths_avg": [ 61.0, 21.416504538945347 ], "wc_weaknesses_avg": [ 142.33333333333334, 56.21585383343583 ], "wc_questions_avg": [ 47.666666666666664, 61.759389749431804 ], "wc_limitations_avg": [ 6.0, 5.0990195135927845 ], "wc_review_avg": [ 328.3333333333333, 85.65174967402723 ], "wc_reply_reviewers_avg": [ 174.66666666666666, 192.7006890375734 ], "wc_reply_authors_avg": [ 501.6666666666667, 383.2921717031133 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.9428090415820634 ], "reply_authors_avg": [ 3.6666666666666665, 1.247219128924647 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17062285718018972548&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": "zju.edu.cn;zju.edu.cn;zju.edu.cn;;", "author_num": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "Zhejiang University", "aff_unique_dep": "", "aff_unique_url": "https://www.zju.edu.cn", "aff_unique_abbr": "ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "VHELM: A Holistic Evaluation of Vision Language Models", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97677", "id": "TuMnKFKPho", "proceeding": "", "pdf": "https://openreview.net/pdf?id=TuMnKFKPho", "openreview": "https://openreview.net/forum?id=TuMnKFKPho", "poster": "/media/PosterPDFs/NeurIPS%202024/97677.png?t=1731897390.3293777", "project": "", "author_site": "Tony Lee, Haoqin Tu, Chi Heem Wong, Wenhao Zheng, Yiyang Zhou, Yifan Mai, Josselin Roberts, Michihiro Yasunaga, Huaxiu Yao, Cihang Xie, Percy Liang", "tldr": "", "abstract": "Current benchmarks for assessing vision-language models (VLMs) often focus on their perception or problem-solving capabilities and neglect other critical aspects such as fairness, multilinguality, or toxicity. Furthermore, they differ in their evaluation procedures and the scope of the evaluation, making it difficult to compare models. To address these issues, we extend the HELM framework to VLMs to present the Holistic Evaluation of Vision Language Models (VHELM). VHELM aggregates various datasets to cover one or more of the 9 aspects: *visual perception*, *knowledge*, *reasoning*, *bias*, *fairness*, *multilinguality*, *robustness*, *toxicity*, and *safety*. In doing so, we produce a comprehensive, multi-dimensional view of the capabilities of the VLMs across these important factors. In addition, we standardize the standard inference parameters, methods of prompting, and evaluation metrics to enable fair comparisons across models. Our framework is designed to be lightweight and automatic so that evaluation runs are cheap and fast. Our initial run evaluates 22 VLMs on 21 existing datasets to provide a holistic snapshot of the models. We uncover new key findings, such as the fact that efficiency-focused models (e.g., Claude 3 Haiku or Gemini 1.5 Flash) perform significantly worse than their full models (e.g., Claude 3 Opus or Gemini 1.5 Pro) on the bias benchmark but not when evaluated on the other aspects. For transparency, we release the raw model generations and complete results on our website at https://crfm.stanford.edu/helm/vhelm/v2.0.1. VHELM is intended to be a living benchmark, and we hope to continue adding new datasets and models over time.", "keywords": "Vision-language models;benchmark;holistic", "primary_area": "", "supplementary_material": "", "author": "Tony Lee;Haoqin Tu;Chi Heem Wong;Wenhao Zheng;Yiyang Zhou;Yifan Mai;Josselin Somerville Roberts;Michihiro Yasunaga;Huaxiu Yao;Cihang Xie;Percy Liang", "authorids": "~Tony_Lee1;~Haoqin_Tu1;~Chi_Heem_Wong1;~Wenhao_Zheng4;~Yiyang_Zhou1;~Yifan_Mai1;~Josselin_Somerville_Roberts1;~Michihiro_Yasunaga1;~Huaxiu_Yao1;~Cihang_Xie3;~Percy_Liang1", "gender": "M;M;;M;M;Non-Binary;M;;M;;", "homepage": ";https://www.haqtu.me;;;https://yiyangzhou.github.io/;https://yifanmai.com/;https://josselinsomervilleroberts.github.io/;;http://huaxiuyao.mystrikingly.com;;https://cs.stanford.edu/~pliang/", "dblp": "46/4265;309/7386;;;175/1589.html;156/8369;;202/1809;197/1635;;04/1701", "google_scholar": "OYNdx48AAAAJ;https://scholar.google.com/citations?hl=en;;dR1J_4EAAAAJ;https://scholar.google.com.hk/citations?user=6KltFMAAAAAJ;QLbLGIMAAAAJ;eLfRuNAAAAAJ;SieJYoEAAAAJ;A20BZnQAAAAJ;;pouyVyUAAAAJ", "orcid": ";;;0000-0002-7108-370X;;0009-0004-7270-2607;0009-0009-7878-5067;;;;", "linkedin": "tonyhlee/;;;;;yifan-mai;josselin-somerville-roberts/;;huaxiuyao/;;", "or_profile": "~Tony_Lee1;~Haoqin_Tu1;~Chi_Heem_Wong1;~Wenhao_Zheng4;~Yiyang_Zhou1;~Yifan_Mai1;~Josselin_Somerville_Roberts1;~Michihiro_Yasunaga1;~Huaxiu_Yao1;~Cihang_Xie3;~Percy_Liang1", "aff": "Stanford University;University of Chinese Academy of Sciences;;Zhejiang University;Xi'an Jiaotong University;Stanford University;Stanford University;Stanford University;Department of Computer Science, University of North Carolina at Chapel Hill;;Stanford University", "aff_domain": "stanford.edu;ucas.ac.cn;;zju.edu.cn;xjtu.edu.cn;stanford.edu;stanford.edu;stanford.edu;cs.unc.edu;;stanford.edu", "position": "Researcher;MS student;;MS student;MS student;Researcher;MS student;PhD student;Assistant Professor;;Associate Professor", "bibtex": "@inproceedings{\nlee2024vhelm,\ntitle={{VHELM}: A Holistic Evaluation of Vision Language Models},\nauthor={Tony Lee and Haoqin Tu and Chi Heem Wong and Wenhao Zheng and Yiyang Zhou and Yifan Mai and Josselin Somerville Roberts and Michihiro Yasunaga and Huaxiu Yao and Cihang Xie and Percy Liang},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=TuMnKFKPho}\n}", "github": "", "reviewers": "DCP4;sinz;F3VF;zMuT", "pdf_size": 9324996, "rating": "4;7;8;10", "confidence": "4;3;3;3", "wc_summary_and_contributions": "43;67;93;55", "wc_strengths": "37;77;109;44", "wc_improvement": "6;144;52;63", "wc_limitations": "1;5;13;7", "wc_correctness": "1;5;18;1", "wc_clarity": "27;5;7;4", "wc_relation_to_prior_work": "1;23;1;1", "wc_documentation": "1;4;1;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "118;331;295;177", "wc_reply_reviewers": "0;15;12;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 7.25, 2.165063509461097 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 64.5, 18.513508581573618 ], "wc_strengths_avg": [ 66.75, 28.69124430902222 ], "wc_improvement_avg": [ 66.25, 49.721097131901665 ], "wc_limitations_avg": [ 6.5, 4.330127018922194 ], "wc_correctness_avg": [ 6.25, 6.977642868476432 ], "wc_clarity_avg": [ 10.75, 9.443913383762052 ], "wc_relation_to_prior_work_avg": [ 6.5, 9.526279441628825 ], "wc_documentation_avg": [ 1.75, 1.299038105676658 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 230.25, 86.2826025337669 ], "wc_reply_reviewers_avg": [ 6.75, 6.832825184358224 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": -0.8666666666666666, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8970220017755072274&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "stanford.edu;ucas.ac.cn;;zju.edu.cn;xjtu.edu.cn;stanford.edu;stanford.edu;stanford.edu;cs.unc.edu;;stanford.edu", "author_num": 11, "aff_unique_index": "0;1;2;3;0;0;0;4;0", "aff_unique_norm": "Stanford University;University of Chinese Academy of Sciences;Zhejiang University;Xi'an Jiao Tong University;University of North Carolina at Chapel Hill", "aff_unique_dep": ";;;;Department of Computer Science", "aff_unique_url": "https://www.stanford.edu;http://www.ucas.ac.cn;https://www.zju.edu.cn;https://www.xjtu.edu.cn;https://www.unc.edu", "aff_unique_abbr": "Stanford;UCAS;ZJU;XJTU;UNC Chapel Hill", "aff_campus_unique_index": "0;0;0;0;2;0", "aff_campus_unique": "Stanford;;Chapel Hill", "aff_country_unique_index": "0;1;1;1;0;0;0;0;0", "aff_country_unique": "United States;China" }, { "title": "Mixture of neural fields for heterogeneous reconstruction in cryo-EM", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95007", "id": "TuspoNzIdB", "proceeding": "", "pdf": "https://openreview.net/pdf?id=TuspoNzIdB", "openreview": "https://openreview.net/forum?id=TuspoNzIdB", "poster": "/media/PosterPDFs/NeurIPS%202024/95007.png?t=1733647822.3404837", "project": "", "author_site": "Axel Levy, Rishwanth Raghu, David Shustin, Adele Peng, Huan Li, Oliver Clarke, Gordon Wetzstein, Ellen Zhong", "tldr": "", "abstract": "Cryo-electron microscopy (cryo-EM) is an experimental technique for protein structure determination that images an ensemble of macromolecules in near-physiological contexts. While recent advances enable the reconstruction of dynamic conformations of a single biomolecular complex, current methods do not adequately model samples with mixed conformational and compositional heterogeneity. In particular, datasets containing mixtures of multiple proteins require the joint inference of structure, pose, compositional class, and conformational states for 3D reconstruction. Here, we present Hydra, an approach that models both conformational and compositional heterogeneity fully ab initio by parameterizing structures as arising from one of K neural fields. We employ a hybrid optimization strategy and demonstrate the effectiveness of our approach on synthetic datasets composed of mixtures of proteins with large degrees of conformational variability. We additionally demonstrate Hydra on an experimental dataset imaged of a cellular lysate containing a mixture of different protein complexes. Hydra expands the expressivity of heterogeneous reconstruction methods and thus broadens the scope of cryo-EM to increasingly complex samples.", "keywords": "cryogenic electron microscopy;neural representations", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "/attachment/44cf6c08b13d114ceb94a6d10a7613158c2a4823.zip", "author": "Axel Levy;Rishwanth Raghu;David Shustin;Adele Rui-Yang Peng;Huan Li;Oliver Biggs Clarke;Gordon Wetzstein;Ellen D Zhong", "authorids": "~Axel_Levy1;~Rishwanth_Raghu1;~David_Shustin1;~Adele_Rui-Yang_Peng1;~Huan_Li8;~Oliver_Biggs_Clarke1;~Gordon_Wetzstein3;~Ellen_D_Zhong1", "gender": "M;M;M;F;F;M;M;F", "homepage": "https://axlevy.com/;https://rish-raghu.github.io/;https://davidshust.in;;;;http://web.stanford.edu/~gordonwz/;https://www.cs.princeton.edu/~zhonge/", "dblp": "311/5494;385/3039;349/7766;;;;13/4660;193/8335", "google_scholar": "yO62sYUAAAAJ;https://scholar.google.com/citations?hl=en;-mKK8SMAAAAJ;;jm8DucoAAAAJ;eJbnxVgAAAAJ;VOf45S0AAAAJ;17tGlqAAAAAJ", "orcid": "0000-0001-7890-9562;0009-0006-6651-142X;0000-0001-6797-1379;0000-0003-1828-0648;;0000-0003-1876-196X;0000-0002-9243-6885;", "linkedin": "axel-levy-x17/;rishwanth-raghu/;david-shustin/;adele-peng;;;gordon-wetzstein-2406723/;", "or_profile": "~Axel_Levy1;~Rishwanth_Raghu1;~David_Shustin1;~Adele_Rui-Yang_Peng1;~Huan_Li8;~Oliver_Biggs_Clarke1;~Gordon_Wetzstein3;~Ellen_D_Zhong1", "aff": "Stanford University;Princeton University;Princeton University;Department of Chemistry, Princeton University;Columbia University;Columbia University;Stanford University;Princeton University", "aff_domain": "stanford.edu;princeton.edu;princeton.edu;chemistry.princeton.edu;columbia.edu;columbia.edu;stanford.edu;princeton.edu", "position": "PhD student;MS student;Undergrad student;Undergrad student;Postdoc;Assistant Professor;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nlevy2024mixture,\ntitle={Mixture of neural fields for heterogeneous reconstruction in cryo-{EM}},\nauthor={Axel Levy and Rishwanth Raghu and David Shustin and Adele Rui-Yang Peng and Huan Li and Oliver Biggs Clarke and Gordon Wetzstein and Ellen D Zhong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=TuspoNzIdB}\n}", "github": "", "reviewers": "qUkh;14wr;u3f9;SenM;RoiP", "pdf_size": 16990483, "rating": "4;4;4;7;7", "confidence": "5;4;4;5;3", "soundness": "2;3;2;3;3", "novelty": "1;2;2;4;3", "presentation": "3;3;2;4;2", "wc_summary": "88;31;74;81;128", "wc_strengths": "90;72;39;47;87", "wc_weaknesses": "523;106;62;103;377", "wc_questions": "252;14;61;392;207", "wc_limitations": "12;7;1;40;125", "wc_review": "965;230;237;663;924", "wc_reply_reviewers": "280;37;90;35;0", "wc_reply_authors": "406;267;228;0;0", "reply_reviewers": "2;1;2;1;0", "reply_authors": "3;2;2;1;1", "rating_avg": [ 5.2, 1.469693845669907 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.4, 1.019803902718557 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 80.4, 31.000645154577025 ], "wc_strengths_avg": [ 67.0, 20.67849124090053 ], "wc_weaknesses_avg": [ 234.2, 182.81072178622347 ], "wc_questions_avg": [ 185.2, 135.9697025075807 ], "wc_limitations_avg": [ 37.0, 45.98695467195018 ], "wc_review_avg": [ 603.8, 319.6143926671639 ], "wc_reply_reviewers_avg": [ 88.4, 100.02119775327628 ], "wc_reply_authors_avg": [ 180.2, 158.5867585897385 ], "reply_reviewers_avg": [ 1.2, 0.7483314773547883 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.2182178902359924, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:BAiN1CVxRJMJ:scholar.google.com/&scioq=Mixture+of+neural+fields+for+heterogeneous+reconstruction+in+cryo-EM&hl=en&as_sdt=0,33", "gs_version_total": 5, "email": "stanford.edu;princeton.edu;princeton.edu;chemistry.princeton.edu;columbia.edu;columbia.edu;stanford.edu;princeton.edu", "author_num": 8, "aff_unique_index": "0;1;1;1;2;2;0;1", "aff_unique_norm": "Stanford University;Princeton University;Columbia University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.stanford.edu;https://www.princeton.edu;https://www.columbia.edu", "aff_unique_abbr": "Stanford;Princeton;Columbia", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Trajectory Data Suffices for Statistically Efficient Learning in Offline RL with Linear $q^\\pi$-Realizability and Concentrability", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95006", "id": "TusuJSbRxm", "proceeding": "", "pdf": "https://openreview.net/pdf?id=TusuJSbRxm", "openreview": "https://openreview.net/forum?id=TusuJSbRxm", "poster": "/media/PosterPDFs/NeurIPS%202024/95006.png?t=1731379189.2144604", "project": "", "author_site": "Volodymyr Tkachuk, Gellert Weisz, Csaba Szepesvari", "tldr": "", "abstract": "We consider offline reinforcement learning (RL) in $H$-horizon Markov decision processes (MDPs) under the linear $q^\\pi$-realizability assumption, where the action-value function of every policy is linear with respect to a given $d$-dimensional feature function. The hope in this setting is that learning a good policy will be possible without requiring a sample size that scales with the number of states in the MDP. Foster et al. [2021] have shown this to be impossible even under $\\text{\\textit{concentrability}}$, a data coverage assumption where a coefficient $C_\\text{conc}$ bounds the extent to which the state-action distribution of any policy can veer off the data distribution. However, the data in this previous work was in the form of a sequence of individual transitions. This leaves open the question of whether the negative result mentioned could be overcome if the data was composed of sequences of full trajectories. In this work we answer this question positively by proving that with trajectory data, a dataset of size $\\text{poly}(d,H,C_\\text{conc})/\\epsilon^2$ is sufficient for deriving an $\\epsilon$-optimal policy, regardless of the size of the state space. The main tool that makes this result possible is due to Weisz et al. [2023], who demonstrate that linear MDPs can be used to approximate linearly $q^\\pi$-realizable MDPs. The connection to trajectory data is that the linear MDP approximation relies on \"skipping\" over certain states. The associated estimation problems are thus easy when working with trajectory data, while they remain nontrivial when working with individual transitions. The question of computational efficiency under our assumptions remains open.", "keywords": "reinforcement learning;learning theory;offline RL;batch RL", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Volodymyr Tkachuk;Gell\u00e9rt Weisz;Csaba Szepesvari", "authorids": "~Volodymyr_Tkachuk2;~Gell\u00e9rt_Weisz2;~Csaba_Szepesvari1", "gender": "M;M;", "homepage": "https://vladtkachuk4.github.io/;https://sites.ualberta.ca/~szepesva/;", "dblp": "287/4223;http://dblp.uni-trier.de/pers/hd/s/Szepesv=aacute=ri:Csaba;215/3618.html", "google_scholar": "9sSwAAsAAAAJ;https://scholar.google.ca/citations?user=zvC19mQAAAAJ;8u-RYZcAAAAJ", "orcid": ";;", "linkedin": "vtkachuk4/;csaba-szepesvari-09376b1?trk=hp-identity-name;", "or_profile": "~Volodymyr_Tkachuk2;~Csaba_Szepesvari1;~Gellert_Weisz1", "aff": "University of Alberta;Google DeepMind;Google DeepMind", "aff_domain": "ualberta.ca;google.com;deepmind.com", "position": "PhD student;Research Scientist;Researcher", "bibtex": "@inproceedings{\ntkachuk2024trajectory,\ntitle={Trajectory Data Suffices for Statistically Efficient Learning in Offline {RL} with Linear \\$q{\\textasciicircum}{\\textbackslash}pi\\$-Realizability and Concentrability},\nauthor={Volodymyr Tkachuk and Gell{\\'e}rt Weisz and Csaba Szepesvari},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=TusuJSbRxm}\n}", "github": "", "reviewers": "kP5e;sDBe;eyWj", "pdf_size": 350524, "rating": "5;7;7", "confidence": "3;3;2", "soundness": "3;3;3", "novelty": "3;4;3", "presentation": "2;3;2", "wc_summary": "43;83;90", "wc_strengths": "14;33;79", "wc_weaknesses": "108;182;144", "wc_questions": "7;3;1", "wc_limitations": "1;11;6", "wc_review": "173;312;320", "wc_reply_reviewers": "107;0;27", "wc_reply_authors": "840;0;0", "reply_reviewers": "1;0;1", "reply_authors": "2;1;1", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 72.0, 20.704266871026046 ], "wc_strengths_avg": [ 42.0, 27.28858125052797 ], "wc_weaknesses_avg": [ 144.66666666666666, 30.214051182999093 ], "wc_questions_avg": [ 3.6666666666666665, 2.494438257849294 ], "wc_limitations_avg": [ 6.0, 4.08248290463863 ], "wc_review_avg": [ 268.3333333333333, 67.48991694237658 ], "wc_reply_reviewers_avg": [ 44.666666666666664, 45.433712397538265 ], "wc_reply_authors_avg": [ 280.0, 395.9797974644666 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.49999999999999983, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:iucm3TxZXYoJ:scholar.google.com/&scioq=Trajectory+Data+Suffices+for+Statistically+Efficient+Learning+in+Offline+RL+with+Linear+%24q%5E%5Cpi%24-Realizability+and+Concentrability&hl=en&as_sdt=0,22", "gs_version_total": 4, "email": "ualberta.ca;google.com;deepmind.com", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "University of Alberta;Google", "aff_unique_dep": ";Google DeepMind", "aff_unique_url": "https://www.ualberta.ca;https://deepmind.com", "aff_unique_abbr": "UAlberta;DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Canada;United Kingdom" }, { "title": "A Huber Loss Minimization Approach to Mean Estimation under User-level Differential Privacy", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95005", "id": "TutGINeJzZ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=TutGINeJzZ", "openreview": "https://openreview.net/forum?id=TutGINeJzZ", "poster": "/media/PosterPDFs/NeurIPS%202024/95005.png?t=1729774753.3424783", "project": "", "author_site": "Puning Zhao, Lifeng LAI, Li Shen, Qingming Li, Jiafei Wu, Zhe Liu", "tldr": "", "abstract": "Privacy protection of users' entire contribution of samples is important in distributed systems. The most effective approach is the two-stage scheme, which finds a small interval first and then gets a refined estimate by clipping samples into the interval. However, the clipping operation induces bias, which is serious if the sample distribution is heavy-tailed. Besides, users with large local sample sizes can make the sensitivity much larger, thus the method is not suitable for imbalanced users. Motivated by these challenges, we propose a Huber loss minimization approach to mean estimation under user-level differential privacy. The connecting points of Huber loss can be adaptively adjusted to deal with imbalanced users. Moreover, it avoids the clipping operation, thus significantly reducing the bias compared with the two-stage approach. We provide a theoretical analysis of our approach, which gives the noise strength needed for privacy protection, as well as the bound of mean squared error. The result shows that the new method is much less sensitive to the imbalance of user-wise sample sizes and the tail of sample distributions. Finally, we perform numerical experiments to validate our theoretical analysis.", "keywords": "differential privacy;robustness;huber loss", "primary_area": "privacy", "supplementary_material": "/attachment/693ff023677026c9710baeb97ce8e42e8000770b.zip", "author": "Puning Zhao;Lifeng Lai;Li Shen;Qingming Li;Jiafei Wu;Zhe Liu", "authorids": "~Puning_Zhao1;~Lifeng_Lai1;~Li_Shen1;~Qingming_Li1;~Jiafei_Wu1;~Zhe_Liu11", "gender": "M;;M;F;M;M", "homepage": "https://scst.sysu.edu.cn/members/members01/1417942.htm;;https://sites.google.com/site/mathshenli/home;;https://www.eee.hku.hk/~wujiafei/;", "dblp": "216/2680;12/4889;91/3680-8;226/5763;227/7227;70/1220-1.html", "google_scholar": "1jc7kasAAAAJ;gOhaCfUAAAAJ;yVhgENIAAAAJ;https://scholar.google.com/citations?hl=zh-CN;;https://scholar.google.com/citations?hl=en", "orcid": "0009-0002-3264-3417;;;0000-0002-6085-7300;;0000-0001-8578-2635", "linkedin": ";;;;;", "or_profile": "~Puning_Zhao1;~Lifeng_Lai1;~Li_Shen1;~Qingming_Li1;~Jiafei_Wu1;~Zhe_Liu11", "aff": "Zhejiang Lab;University of California, Davis;JD Explore Academy;Zhejiang Lab, Zhejiang Lab;Zhejiang Lab;Zhejiang Lab", "aff_domain": "zhejianglab.com;ucdavis.edu;jd.com;zhejianglab.com;zhejianglab.com;zhejianglab.com", "position": "Researcher;Full Professor;Researcher;Assistant Professor;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nzhao2024a,\ntitle={A Huber Loss Minimization Approach to Mean Estimation under User-level Differential Privacy},\nauthor={Puning Zhao and Lifeng Lai and Li Shen and Qingming Li and Jiafei Wu and Zhe Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=TutGINeJzZ}\n}", "github": "", "reviewers": "zYzW;BDM5;Bmi5", "pdf_size": 750555, "rating": "5;6;7", "confidence": "3;3;4", "soundness": "3;2;3", "novelty": "2;3;3", "presentation": "3;2;3", "wc_summary": "29;48;38", "wc_strengths": "26;33;27", "wc_weaknesses": "113;150;24", "wc_questions": "2;66;190", "wc_limitations": "5;123;1", "wc_review": "175;420;280", "wc_reply_reviewers": "21;87;48", "wc_reply_authors": "0;184;21", "reply_reviewers": "1;2;1", "reply_authors": "1;3;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 38.333333333333336, 7.760297817881877 ], "wc_strengths_avg": [ 28.666666666666668, 3.0912061651652345 ], "wc_weaknesses_avg": [ 95.66666666666667, 52.87931752795437 ], "wc_questions_avg": [ 86.0, 78.04272334219678 ], "wc_limitations_avg": [ 43.0, 56.592107812544555 ], "wc_review_avg": [ 291.6666666666667, 100.36046144883065 ], "wc_reply_reviewers_avg": [ 52.0, 27.09243436828813 ], "wc_reply_authors_avg": [ 68.33333333333333, 82.23678549705654 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4843352118709729263&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "zhejianglab.com;ucdavis.edu;jd.com;zhejianglab.com;zhejianglab.com;zhejianglab.com", "author_num": 6, "aff_unique_index": "0;1;2;0;0;0", "aff_unique_norm": "Zhejiang Lab;University of California, Davis;JD", "aff_unique_dep": ";;JD Explore Academy", "aff_unique_url": "http://www.zhejianglab.com;https://www.ucdavis.edu;", "aff_unique_abbr": ";UC Davis;", "aff_campus_unique_index": "1", "aff_campus_unique": ";Davis", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "China;United States;" }, { "title": "Boosted Conformal Prediction Intervals", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95004", "id": "Tw032H2onS", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Tw032H2onS", "openreview": "https://openreview.net/forum?id=Tw032H2onS", "poster": "/media/PosterPDFs/NeurIPS%202024/95004.png?t=1731481297.5588975", "project": "", "author_site": "Ran Xie, Rina Barber, Emmanuel Candes", "tldr": "", "abstract": "This paper introduces a boosted conformal procedure designed to tailor conformalized prediction intervals toward specific desired properties, such as enhanced conditional coverage or reduced interval length. We employ machine learning techniques, notably gradient boosting, to systematically improve upon a predefined conformity score function. This process is guided by carefully constructed loss functions that measure the deviation of prediction intervals from the targeted properties. The procedure operates post-training, relying solely on model predictions and without modifying the trained model (e.g., the deep network). Systematic experiments demonstrate that starting from conventional conformal methods, our boosted procedure achieves substantial improvements in reducing interval length and decreasing deviation from target conditional coverage.", "keywords": "Conformal Prediction;Uncertainty Quantification;(Other) Statistical Learning", "primary_area": "other", "supplementary_material": "", "author": "Ran Xie;Rina Foygel Barber;Emmanuel Candes", "authorids": "~Ran_Xie2;~Rina_Foygel_Barber2;~Emmanuel_Candes1", "gender": "F;;F", "homepage": "https://profiles.stanford.edu/ran-xie;http://statweb.stanford.edu/~candes/;http://www.stat.uchicago.edu/~rina", "dblp": ";;", "google_scholar": ";nRQi4O8AAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Ran_Xie2;~Emmanuel_Candes1;~Rina_Barber1", "aff": "Stanford University;Stanford University;University of Chicago", "aff_domain": "stanford.edu;stanford.edu;uchicago.edu", "position": "PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nxie2024boosted,\ntitle={Boosted Conformal Prediction Intervals},\nauthor={Ran Xie and Rina Foygel Barber and Emmanuel Candes},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Tw032H2onS}\n}", "github": "", "reviewers": "3gAf;D558;tKW7;trTq", "pdf_size": 1012436, "rating": "6;6;6;6", "confidence": "4;4;4;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;2", "wc_summary": "127;40;48;78", "wc_strengths": "79;116;53;78", "wc_weaknesses": "682;168;63;177", "wc_questions": "126;106;166;24", "wc_limitations": "44;10;1;5", "wc_review": "1058;440;331;362", "wc_reply_reviewers": "852;41;48;17", "wc_reply_authors": "799;0;0;0", "reply_reviewers": "2;1;1;1", "reply_authors": "3;1;1;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 73.25, 34.11286414243166 ], "wc_strengths_avg": [ 81.5, 22.477766792989023 ], "wc_weaknesses_avg": [ 272.5, 240.63509719074648 ], "wc_questions_avg": [ 105.5, 51.7759596724194 ], "wc_limitations_avg": [ 15.0, 17.04406054905931 ], "wc_review_avg": [ 547.75, 297.25777954495993 ], "wc_reply_reviewers_avg": [ 239.5, 353.8138634932215 ], "wc_reply_authors_avg": [ 199.75, 345.97714881188324 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5770035009386958394&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "stanford.edu;stanford.edu;uchicago.edu", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Stanford University;University of Chicago", "aff_unique_dep": ";", "aff_unique_url": "https://www.stanford.edu;https://www.uchicago.edu", "aff_unique_abbr": "Stanford;UChicago", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Vista: A Generalizable Driving World Model with High Fidelity and Versatile Controllability", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95003", "id": "Tw9nfNyOMy", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Tw9nfNyOMy", "openreview": "https://openreview.net/forum?id=Tw9nfNyOMy", "poster": "/media/PosterPDFs/NeurIPS%202024/95003.png?t=1731569371.436295", "project": "", "author_site": "Shenyuan Gao, Jiazhi Yang, Li Chen, Kashyap Chitta, Yihang Qiu, Andreas Geiger, Jun Zhang, Hongyang Li", "tldr": "", "abstract": "World models can foresee the outcomes of different actions, which is of paramount importance for autonomous driving. Nevertheless, existing driving world models still have limitations in generalization to unseen environments, prediction fidelity of critical details, and action controllability for flexible application. In this paper, we present Vista, a generalizable driving world model with high fidelity and versatile controllability. Based on a systematic diagnosis of existing methods, we introduce several key ingredients to address these limitations. To accurately predict real-world dynamics at high resolution, we propose two novel losses to promote the learning of moving instances and structural information. We also devise an effective latent replacement approach to inject historical frames as priors for coherent long-horizon rollouts. For action controllability, we incorporate a versatile set of controls from high-level intentions (command, goal point) to low-level maneuvers (trajectory, angle, and speed) through an efficient learning strategy. After large-scale training, the capabilities of Vista can seamlessly generalize to different scenarios. Extensive experiments on multiple datasets show that Vista outperforms the most advanced general-purpose video generator in over 70% of comparisons and surpasses the best-performing driving world model by 55% in FID and 27% in FVD. Moreover, for the first time, we utilize the capacity of Vista itself to establish a generalizable reward for real-world action evaluation without accessing the ground truth actions.", "keywords": "World Model;Autonomous Driving;Video Prediction", "primary_area": "machine_vision", "supplementary_material": "/attachment/559c58ca2bb550cf9f215706e235155449c05c1f.zip", "author": "Shenyuan Gao;Jiazhi Yang;Li Chen;Kashyap Chitta;Yihang Qiu;Andreas Geiger;Jun Zhang;Hongyang Li", "authorids": "~Shenyuan_Gao1;~Jiazhi_Yang1;~Li_Chen15;~Kashyap_Chitta1;~Yihang_Qiu1;~Andreas_Geiger3;~Jun_Zhang25;~Hongyang_Li1", "gender": "M;M;M;M;M;M;;M", "homepage": "https://github.com/Little-Podi;https://github.com/YTEP-ZHI;https://ilnehc.github.io/;https://kashyap7x.github.io/;https://github.com/gihharwtw;http://www.cvlibs.net;https://eejzhang.people.ust.hk/;https://datascience.hku.hk/people/hongyang-li/", "dblp": "324/8678;305/2099;181/2847;220/3765;;40/5825-1;z/JunZhang4;95/8433-1", "google_scholar": "hZtOnecAAAAJ;Ju7nGX8AAAAJ;ulZxvY0AAAAJ;vX5i2CcAAAAJ;;https://scholar.google.ca/citations?hl=en;1Is687QAAAAJ;https://scholar.google.com.hk/citations?user=Hfrih1EAAAAJ", "orcid": ";;;;;0000-0002-8151-3726;0000-0002-5222-1898;0000-0001-9110-5534", "linkedin": ";jiazhi-yang-a07805208/;;;;;;hongyangli2020/", "or_profile": "~Shenyuan_Gao1;~Jiazhi_Yang1;~Li_Chen15;~Kashyap_Chitta1;~Yihang_Qiu1;~Andreas_Geiger3;~Jun_Zhang25;~Hongyang_Li1", "aff": "Hong Kong University of Science and Technology;Shanghai AI Laboratory;Shanghai AI Laboratory;University of T\u00fcbingen;Shanghai Jiaotong University;University of Tuebingen;Hong Kong University of Science and Technology;Shanghai AI Lab", "aff_domain": "ust.hk;pjlab.org.cn;pjlab.org.cn;uni-tuebingen.de;sjtu.edu.cn;uni-tuebingen.de;ust.hk;pjlab.org.cn", "position": "PhD student;Researcher;Researcher;PhD student;Undergrad student;Professor;Associate Professor;Researcher", "bibtex": "@inproceedings{\ngao2024vista,\ntitle={Vista: A Generalizable Driving World Model with High Fidelity and Versatile Controllability},\nauthor={Shenyuan Gao and Jiazhi Yang and Li Chen and Kashyap Chitta and Yihang Qiu and Andreas Geiger and Jun Zhang and Hongyang Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Tw9nfNyOMy}\n}", "github": "", "reviewers": "bcia;ApTu;Fp7V", "pdf_size": 17804253, "rating": "5;7;7", "confidence": "3;5;4", "soundness": "2;4;3", "novelty": "2;3;4", "presentation": "3;4;4", "wc_summary": "77;74;111", "wc_strengths": "16;54;107", "wc_weaknesses": "91;135;164", "wc_questions": "40;5;151", "wc_limitations": "1;1;6", "wc_review": "225;269;539", "wc_reply_reviewers": "18;14;26", "wc_reply_authors": "26;13;19", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 87.33333333333333, 16.779617264870957 ], "wc_strengths_avg": [ 59.0, 37.31844941401862 ], "wc_weaknesses_avg": [ 130.0, 30.011109054259666 ], "wc_questions_avg": [ 65.33333333333333, 62.23789485157379 ], "wc_limitations_avg": [ 2.6666666666666665, 2.3570226039551585 ], "wc_review_avg": [ 344.3333333333333, 138.8172259563712 ], "wc_reply_reviewers_avg": [ 19.333333333333332, 4.988876515698588 ], "wc_reply_authors_avg": [ 19.333333333333332, 5.312459150169742 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 67, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2823578777628443856&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "ust.hk;pjlab.org.cn;pjlab.org.cn;uni-tuebingen.de;sjtu.edu.cn;uni-tuebingen.de;ust.hk;pjlab.org.cn", "author_num": 8, "aff_unique_index": "0;1;1;2;3;4;0;5", "aff_unique_norm": "Hong Kong University of Science and Technology;Shanghai AI Laboratory;University of T\u00fcbingen;Shanghai Jiao Tong University;University of Tuebingen;Shanghai AI Lab", "aff_unique_dep": ";;;;;", "aff_unique_url": "https://www.ust.hk;https://www.shanghai-ai-lab.com;https://www.uni-tuebingen.de/;https://www.sjtu.edu.cn;https://www.uni-tuebingen.de/;https://www.shanghaiailab.com", "aff_unique_abbr": "HKUST;SAIL;Uni T\u00fcbingen;SJTU;Uni T\u00fcbingen;SAIL", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;1;0;1;0;0", "aff_country_unique": "China;Germany" }, { "title": "Online Iterative Reinforcement Learning from Human Feedback with General Preference Model", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95002", "id": "TwdX1W3M6S", "proceeding": "", "pdf": "https://openreview.net/pdf?id=TwdX1W3M6S", "openreview": "https://openreview.net/forum?id=TwdX1W3M6S", "poster": "", "project": "", "author_site": "Chenlu Ye, Wei Xiong, Yuheng Zhang, Hanze Dong, Nan Jiang, Tong Zhang", "tldr": "", "abstract": "We investigate Reinforcement Learning from Human Feedback (RLHF) in the context of a general preference oracle. In particular, we do not assume the existence of a reward function and an oracle preference signal drawn from the Bradley-Terry model as most of the prior works do. We consider a standard mathematical formulation, the reverse-KL regularized minimax game between two LLMs for RLHF under general preference oracle. The learning objective of this formulation is to find a policy so that it is consistently preferred by the KL-regularized preference oracle over any competing LLMs. We show that this framework is strictly more general than the reward-based one, and propose sample-efficient algorithms for both the offline learning from a pre-collected preference dataset and online learning where we can query the preference oracle along the way of training. Empirical studies verify the effectiveness of the proposed framework.", "keywords": "RLHF Theory", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/a17a162abbf083c9a5b55209479d53f66439f2f8.zip", "author": "Chenlu Ye;Wei Xiong;Yuheng Zhang;Hanze Dong;Nan Jiang;Tong Zhang", "authorids": "~Chenlu_Ye1;~Wei_Xiong9;~Yuheng_Zhang1;~Hanze_Dong1;~Nan_Jiang2;~Tong_Zhang2", "gender": "F;M;M;M;M;M", "homepage": "https://chenluye99.github.io/;https://weixiongust.github.io/WeiXiongUST/index.html;;https://hendrydong.github.io/;http://nanjiang.cs.illinois.edu;http://tongzhang-ml.org", "dblp": "336/2092;33/4054-15;;228/7798;06/4489-8;07/4227-1", "google_scholar": "c8yK5XsAAAAJ;m2-OwQEAAAAJ;IoEBLNYAAAAJ;g9WLzWoAAAAJ;nUlanA8AAAAJ;LurWtuYAAAAJ", "orcid": ";;;;;0000-0002-5511-2558", "linkedin": "https://www.linkedin.cn/incareer/in/chenlu-ye-9b015b184;;;hanze-dong/;nan-jiang-28139937/;", "or_profile": "~Chenlu_Ye1;~Wei_Xiong9;~Yuheng_Zhang1;~Hanze_Dong1;~Nan_Jiang2;~Tong_Zhang2", "aff": "University of Illinois, Urbana Champaign;Google;University of Illinois, Urbana Champaign;SalesForce;University of Illinois, Urbana Champaign;UIUC", "aff_domain": "illinois.edu;deepmind.com;cs.illinois.edu;salesforce.com;illinois.edu;illinois.edu", "position": "PhD student;Intern;PhD student;Researcher;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nye2024online,\ntitle={Online Iterative Reinforcement Learning from Human Feedback with General Preference Model},\nauthor={Chenlu Ye and Wei Xiong and Yuheng Zhang and Hanze Dong and Nan Jiang and Tong Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=TwdX1W3M6S}\n}", "github": "", "reviewers": "Bkdx;rwGu;ZfnB;kGMp", "pdf_size": 554302, "rating": "5;6;7;7", "confidence": "3;4;3;4", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "3;2;3;4", "wc_summary": "43;63;55;75", "wc_strengths": "49;29;53;129", "wc_weaknesses": "26;8;78;298", "wc_questions": "23;353;54;72", "wc_limitations": "1;18;1;1", "wc_review": "142;471;241;575", "wc_reply_reviewers": "0;26;15;30", "wc_reply_authors": "0;19;26;28", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 59.0, 11.661903789690601 ], "wc_strengths_avg": [ 65.0, 38.05259518088089 ], "wc_weaknesses_avg": [ 102.5, 115.76160848917054 ], "wc_questions_avg": [ 125.5, 132.51132027113758 ], "wc_limitations_avg": [ 5.25, 7.361215932167728 ], "wc_review_avg": [ 357.25, 173.34989904813904 ], "wc_reply_reviewers_avg": [ 17.75, 11.627015954233485 ], "wc_reply_authors_avg": [ 18.25, 11.053845484717073 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4189601293243627946&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "illinois.edu;deepmind.com;cs.illinois.edu;salesforce.com;illinois.edu;illinois.edu", "author_num": 6, "aff_unique_index": "0;1;0;2;0;0", "aff_unique_norm": "University of Illinois Urbana-Champaign;Google;Salesforce", "aff_unique_dep": ";Google;", "aff_unique_url": "https://illinois.edu;https://www.google.com;https://www.salesforce.com", "aff_unique_abbr": "UIUC;Google;Salesforce", "aff_campus_unique_index": "0;1;0;0;0", "aff_campus_unique": "Urbana-Champaign;Mountain View;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Idiographic Personality Gaussian Process for Psychological Assessment", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95001", "id": "Twqa0GFMGX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Twqa0GFMGX", "openreview": "https://openreview.net/forum?id=Twqa0GFMGX", "poster": "/media/PosterPDFs/NeurIPS%202024/95001.png?t=1730907555.8300536", "project": "", "author_site": "Yehu Chen, Muchen Xi, Joshua Jackson, Jacob Montgomery, Roman Garnett", "tldr": "", "abstract": "We develop a novel measurement framework based on Gaussian process coregionalization model to address a long-lasting debate in psychometrics: whether psychological features like personality share a common structure across the population or vary uniquely for individuals. We propose idiographic personality Gaussian process (IPGP), an intermediate model that accommodates both shared trait structure across individuals and \"idiographic\" deviations. IPGP leverages the Gaussian process coregionalization model to conceptualize responses of grouped survey batteries but adjusted to non-Gaussian ordinal data, and exploits stochastic variational inference for latent factor estimation. Using both synthetic data and a novel survey, we show that IPGP improves both prediction of actual responses and estimation of intrapersonal response patterns compared to existing benchmarks. In the survey study, IPGP also identifies unique clusters of personality taxonomies, displaying great potential in advancing individualized approaches to psychological diagnosis.", "keywords": "Applications -- Cognitive science;Gaussian process;Latent variable model", "primary_area": "machine_learning_for_social_sciences", "supplementary_material": "/attachment/62124c576da2d2421834bf7f4a9f1cd00a772078.zip", "author": "Yehu Chen;Muchen Xi;Joshua J. Jackson;Jacob Montgomery;Roman Garnett", "authorids": "~Yehu_Chen1;~Muchen_Xi1;~Joshua_J._Jackson1;~Jacob_Montgomery1;~Roman_Garnett1", "gender": "M;M;M;M;M", "homepage": "https://yahoochen97.github.io/;;https://psych.wustl.edu/people/joshua-jackson;;http://www.cse.wustl.edu/~garnett/", "dblp": ";;;;29/7058", "google_scholar": "https://scholar.google.com/citations?hl=en;rITNpAcAAAAJ;YdkoYF8AAAAJ;GaWC-J4AAAAJ;CUkAtC4AAAAJ", "orcid": ";0000-0003-3939-9568;0000-0002-9490-8890;;", "linkedin": ";;;;", "or_profile": "~Yehu_Chen1;~Muchen_Xi1;~Joshua_J._Jackson1;~Jacob_Montgomery1;~Roman_Garnett1", "aff": "Washington University, Saint Louis;Washington University, Saint Louis;Washington University, Saint Louis;Washington University, Saint Louis;Uber", "aff_domain": "wustl.edu;wustl.edu;wustl.edu;wustl.edu;uber.com", "position": "PhD student;PhD student;Associate Professor;Full Professor;Applied Scientist", "bibtex": "@inproceedings{\nchen2024idiographic,\ntitle={Idiographic Personality Gaussian Process for Psychological Assessment},\nauthor={Yehu Chen and Muchen Xi and Joshua J. Jackson and Jacob Montgomery and Roman Garnett},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Twqa0GFMGX}\n}", "github": "", "reviewers": "YTHZ;mvMV;5m3j;A4W7", "pdf_size": 958800, "rating": "3;6;6;7", "confidence": "5;5;4;4", "soundness": "2;3;3;4", "novelty": "2;2;3;3", "presentation": "3;3;3;4", "wc_summary": "103;53;329;91", "wc_strengths": "84;47;143;95", "wc_weaknesses": "182;104;363;95", "wc_questions": "5;7;43;202", "wc_limitations": "5;1;40;20", "wc_review": "379;212;918;503", "wc_reply_reviewers": "74;85;0;190", "wc_reply_authors": "306;82;0;201", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;0;2", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 144.0, 108.39280418920806 ], "wc_strengths_avg": [ 92.25, 34.273714417903406 ], "wc_weaknesses_avg": [ 186.0, 107.64525070805493 ], "wc_questions_avg": [ 64.25, 80.95484852681771 ], "wc_limitations_avg": [ 16.5, 15.305227865013967 ], "wc_review_avg": [ 503.0, 260.9032387687052 ], "wc_reply_reviewers_avg": [ 87.25, 67.73247005683463 ], "wc_reply_authors_avg": [ 147.25, 116.22257741075957 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.6666666666666667, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2614347084481973481&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "wustl.edu;wustl.edu;wustl.edu;wustl.edu;uber.com", "author_num": 5, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Washington University in St. Louis;Uber Technologies Inc.", "aff_unique_dep": ";", "aff_unique_url": "https://wustl.edu;https://www.uber.com", "aff_unique_abbr": "WUSTL;Uber", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Saint Louis;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Test Where Decisions Matter: Importance-driven Testing for Deep Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/95000", "id": "TwrnhZfD6a", "proceeding": "", "pdf": "https://openreview.net/pdf?id=TwrnhZfD6a", "openreview": "https://openreview.net/forum?id=TwrnhZfD6a", "poster": "/media/PosterPDFs/NeurIPS%202024/95000.png?t=1733775247.3703797", "project": "", "author_site": "Stefan Pranger, Hana Chockler, Martin Tappler, Bettina K\u00f6nighofer", "tldr": "", "abstract": "In many Deep Reinforcement Learning (RL) problems, decisions in a trained policy vary in significance for the expected safety and performance of the policy. Since RL policies are very complex, testing efforts should concentrate on states in which the agent's decisions have the highest impact on the expected outcome. In this paper, we propose a novel model-based method to rigorously compute a ranking of state importance across the entire state space. We then focus our testing efforts on the highest-ranked states. In this paper, we focus on testing for safety. However, the proposed methods can be easily adapted to test for performance. In each iteration, our testing framework computes optimistic and pessimistic safety estimates. These estimates provide lower and upper bounds on the expected outcomes of the policy execution across all modeled states in the state space. Our approach divides the state space into safe and unsafe regions upon convergence, providing clear insights into the policy's weaknesses. Two important properties characterize our approach. (1) Optimal Test-Case Selection: At any time in the testing process, our approach evaluates the policy in the states that are most critical for safety. (2) Guaranteed Safety: Our approach can provide formal verification guarantees over the entire state space by sampling only a fraction of the policy. Any safety properties assured by the pessimistic estimate are formally proven to hold for the policy. We provide a detailed evaluation of our framework on several examples, showing that our method discovers unsafe policy behavior with low testing effort.", "keywords": "Policy Verification;Probabilistic Model Checking;Deep Reinforcement Learning", "primary_area": "interpretability_and_explainability", "supplementary_material": "/attachment/f992f3e60963c7f475648b41a4570aafdad1865b.zip", "author": "Stefan Pranger;Hana Chockler;Martin Tappler;Bettina K\u00f6nighofer", "authorids": "~Stefan_Pranger1;~Hana_Chockler1;~Martin_Tappler1;~Bettina_K\u00f6nighofer1", "gender": "M;F;M;F", "homepage": "https://www.iaik.tugraz.at/person/stefan-pranger/;https://www.hanachockler.com/;;https://www.iaik.tugraz.at/person/bettina-koenighofer/", "dblp": "244/8267;http://dblp.uni-trier.de/pers/hd/c/Chockler:Hana;177/2521.html;117/3735", "google_scholar": "ny1EkgIAAAAJ;https://scholar.google.co.uk/scholar?hl=en;https://scholar.google.at/citations?user=PoUtPGQAAAAJ;", "orcid": "0009-0000-6011-9925;;0000-0002-4193-5609;0000-0001-5183-5452", "linkedin": ";;martin-tappler-417248a5/;", "or_profile": "~Stefan_Pranger1;~Hana_Chockler1;~Martin_Tappler1;~Bettina_K\u00f6nighofer1", "aff": "Technische Universit\u00e4t Graz;King's College London;Technische Universit\u00e4t Graz;Technische Universit\u00e4t Graz", "aff_domain": "tugraz.at;kcl.ac.uk;tugraz.at;tugraz.at", "position": "PhD student;Associate Professor;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\npranger2024test,\ntitle={Test Where Decisions Matter: Importance-driven Testing for Deep Reinforcement Learning},\nauthor={Stefan Pranger and Hana Chockler and Martin Tappler and Bettina K{\\\"o}nighofer},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=TwrnhZfD6a}\n}", "github": "", "reviewers": "ak3E;xBae;sLgt;nWP9;4erK", "pdf_size": 2891484, "rating": "4;5;6;6;7", "confidence": "4;3;4;3;5", "soundness": "2;2;4;3;3", "novelty": "2;2;3;2;3", "presentation": "3;3;3;3;3", "wc_summary": "79;52;132;170;74", "wc_strengths": "29;19;68;137;59", "wc_weaknesses": "62;87;92;222;30", "wc_questions": "25;42;52;230;175", "wc_limitations": "9;1;28;1;8", "wc_review": "204;201;372;760;346", "wc_reply_reviewers": "0;0;0;106;16", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;0;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.6, 1.0198039027185568 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 101.4, 43.20925826718158 ], "wc_strengths_avg": [ 62.4, 41.49023981613025 ], "wc_weaknesses_avg": [ 98.6, 65.49992366407766 ], "wc_questions_avg": [ 104.8, 82.10091351501516 ], "wc_limitations_avg": [ 9.4, 9.891410415102591 ], "wc_review_avg": [ 376.6, 204.24455929106168 ], "wc_reply_reviewers_avg": [ 24.4, 41.26790520489258 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.4193139346887674, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ZDYDFqlszq4J:scholar.google.com/&scioq=Test+Where+Decisions+Matter:+Importance-driven+Testing+for+Deep+Reinforcement+Learning&hl=en&as_sdt=0,48", "gs_version_total": 5, "email": "tugraz.at;kcl.ac.uk;tugraz.at;tugraz.at", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Technische Universit\u00e4t Graz;King's College London", "aff_unique_dep": ";", "aff_unique_url": "https://www.tugraz.at;https://www.kcl.ac.uk", "aff_unique_abbr": "TU Graz;KCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "Austria;United Kingdom" }, { "title": "Optimal Algorithms for Online Convex Optimization with Adversarial Constraints", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94999", "id": "TxffvJMnBy", "proceeding": "", "pdf": "https://openreview.net/pdf?id=TxffvJMnBy", "openreview": "https://openreview.net/forum?id=TxffvJMnBy", "poster": "/media/PosterPDFs/NeurIPS%202024/94999.png?t=1731011801.5224965", "project": "", "author_site": "Abhishek Sinha, Rahul Vaze", "tldr": "", "abstract": "A well-studied generalization of the standard online convex optimization (OCO) framework is constrained online convex optimization (COCO). In COCO, on every round, a convex cost function and a convex constraint function are revealed to the learner after it chooses the action for that round. The objective is to design an online learning policy that simultaneously achieves a small regret while ensuring a small cumulative constraint violation (CCV) against an adaptive adversary interacting over a horizon of length $T$. A long-standing open question in COCO is whether an online policy can simultaneously achieve $O(\\sqrt{T})$ regret and $\\tilde{O}(\\sqrt{T})$ CCV without any restrictive assumptions. For the first time, we answer this in the affirmative and show that a simple first-order policy can simultaneously achieve these bounds. Furthermore, in the case of strongly convex cost and convex constraint functions, the regret guarantee can be improved to $O(\\log T)$ while keeping the CCV bound the same as above. We establish these results by effectively combining adaptive OCO policies as a blackbox with Lyapunov optimization - a classic tool from control theory. Surprisingly, the analysis is short and elegant.", "keywords": "Online Convex Optimization;Regret bounds;Constraint violation bounds", "primary_area": "online_learning", "supplementary_material": "", "author": "Abhishek Sinha;Rahul Vaze", "authorids": "~Abhishek_Sinha3;~Rahul_Vaze1", "gender": "M;M", "homepage": "https://www.tifr.res.in/~abhishek.sinha/;http://www.tcs.tifr.res.in/~vaze/", "dblp": "47/9175;80/5048", "google_scholar": "https://scholar.google.co.in/citations?user=Oc7BRX0AAAAJ;noTrTDYAAAAJ", "orcid": "0000-0001-7220-0691;", "linkedin": "abhishek-sinha-a645291b/;", "or_profile": "~Abhishek_Sinha3;~Rahul_Vaze1", "aff": "Tata Institute of Fundamental Research;Tata Institute of Fundamental Research, Mumbai", "aff_domain": "tifr.res.in;tifr.res.in", "position": "Reader;Associate Professor", "bibtex": "@inproceedings{\nsinha2024optimal,\ntitle={Optimal Algorithms for Online Convex Optimization with Adversarial Constraints},\nauthor={Abhishek Sinha and Rahul Vaze},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=TxffvJMnBy}\n}", "github": "", "reviewers": "MH7f;G8HN;FrRh;pcig", "pdf_size": 710442, "rating": "5;7;7;7", "confidence": "4;3;3;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "105;49;107;306", "wc_strengths": "137;42;95;90", "wc_weaknesses": "183;39;229;120", "wc_questions": "51;25;134;66", "wc_limitations": "12;6;1;3", "wc_review": "488;161;566;585", "wc_reply_reviewers": "174;18;88;54", "wc_reply_authors": "157;7;7;16", "reply_reviewers": "2;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 141.75, 97.64572443276766 ], "wc_strengths_avg": [ 91.0, 33.66749173906485 ], "wc_weaknesses_avg": [ 142.75, 71.31050062929022 ], "wc_questions_avg": [ 69.0, 40.29267923581156 ], "wc_limitations_avg": [ 5.5, 4.153311931459037 ], "wc_review_avg": [ 450.0, 170.76738564491757 ], "wc_reply_reviewers_avg": [ 83.5, 57.816520130495576 ], "wc_reply_authors_avg": [ 46.75, 63.758822918871395 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14425942127121293984&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "tifr.res.in;tifr.res.in", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Tata Institute of Fundamental Research", "aff_unique_dep": "", "aff_unique_url": "https://www.tifr.res.in", "aff_unique_abbr": "TIFR", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mumbai", "aff_country_unique_index": "0;0", "aff_country_unique": "India" }, { "title": "UniSDF: Unifying Neural Representations for High-Fidelity 3D Reconstruction of Complex Scenes with Reflections", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94998", "id": "Ty25oVKTqj", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Ty25oVKTqj", "openreview": "https://openreview.net/forum?id=Ty25oVKTqj", "poster": "/media/PosterPDFs/NeurIPS%202024/94998.png?t=1731685125.9155488", "project": "", "author_site": "Fangjinhua Wang, Marie-Julie Rakotosaona, Michael Niemeyer, Richard Szeliski, Marc Pollefeys, Federico Tombari", "tldr": "", "abstract": "Neural 3D scene representations have shown great potential for 3D reconstruction from 2D images. However, reconstructing real-world captures of complex scenes still remains a challenge. Existing generic 3D reconstruction methods often struggle to represent fine geometric details and do not adequately model reflective surfaces of large-scale scenes. Techniques that explicitly focus on reflective surfaces can model complex and detailed reflections by exploiting better reflection parameterizations. However, we observe that these methods are often not robust in real scenarios where non-reflective as well as reflective components are present. In this work, we propose UniSDF, a general purpose 3D reconstruction method that can reconstruct large complex scenes with reflections. We investigate both camera view as well as reflected view-based color parameterization techniques and find that explicitly blending these representations in 3D space enables reconstruction of surfaces that are more geometrically accurate, especially for reflective surfaces. We further combine this representation with a multi-resolution grid backbone that is trained in a coarse-to-fine manner, enabling faster reconstructions than prior methods. Extensive experiments on object-level datasets DTU, Shiny Blender as well as unbounded datasets Mip-NeRF 360 and Ref-NeRF real demonstrate that our method is able to robustly reconstruct complex large-scale scenes with fine details and reflective surfaces, leading to the best overall performance. Project page: https://fangjinhuawang.github.io/UniSDF.", "keywords": "3D reconstruction;novel view synthesis;reflection", "primary_area": "machine_vision", "supplementary_material": "", "author": "Fangjinhua Wang;Marie-Julie Rakotosaona;Michael Niemeyer;Richard Szeliski;Marc Pollefeys;Federico Tombari", "authorids": "~Fangjinhua_Wang1;~Marie-Julie_Rakotosaona1;~Michael_Niemeyer1;~Richard_Szeliski1;~Marc_Pollefeys2;~Federico_Tombari1", "gender": "M;;M;M;M;M", "homepage": "https://fangjinhuawang.github.io/;;https://m-niemeyer.github.io/;http://szeliski.org;;https://federicotombari.github.io/", "dblp": "280/0720;234/6243;232/1712;46/2186;p/MarcPollefeys;16/3539", "google_scholar": "ysTmrEsAAAAJ;eQ0om98AAAAJ;https://scholar.google.de/citations?user=v1O7i_0AAAAJ;3_u1jHQAAAAJ;YYH0BjEAAAAJ;TFsE4BIAAAAJ", "orcid": ";;;0009-0005-5300-5475;;0000-0001-5598-5212", "linkedin": "fangjinhua-wang-4ba2aa150/;;;;marc-pollefeys-30a7075/;fedet/", "or_profile": "~Fangjinhua_Wang1;~Marie-Julie_Rakotosaona1;~Michael_Niemeyer1;~Richard_Szeliski1;~Marc_Pollefeys2;~Federico_Tombari1", "aff": "ETHZ - ETH Zurich;Google;Google;Department of Computer Science;Swiss Federal Institute of Technology;Technical University Munich (TUM)", "aff_domain": "ethz.ch;google.com;google.com;cs.washington.edu;ethz.ch;in.tum.de", "position": "PhD student;Researcher;Researcher;Full Professor;Full Professor;Lecturer", "bibtex": "@inproceedings{\nwang2024unisdf,\ntitle={Uni{SDF}: Unifying Neural Representations for High-Fidelity 3D Reconstruction of Complex Scenes with Reflections},\nauthor={Fangjinhua Wang and Marie-Julie Rakotosaona and Michael Niemeyer and Richard Szeliski and Marc Pollefeys and Federico Tombari},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Ty25oVKTqj}\n}", "github": "", "reviewers": "5Tqb;2MRi;dks9;aZTE", "pdf_size": 42692848, "rating": "6;6;6;7", "confidence": "4;3;4;4", "soundness": "3;3;3;4", "novelty": "2;2;3;3", "presentation": "2;4;3;4", "wc_summary": "62;129;76;120", "wc_strengths": "102;75;64;83", "wc_weaknesses": "166;140;145;37", "wc_questions": "310;129;3;39", "wc_limitations": "7;8;59;41", "wc_review": "647;481;347;320", "wc_reply_reviewers": "0;142;29;0", "wc_reply_authors": "0;36;0;0", "reply_reviewers": "0;1;1;0", "reply_authors": "1;2;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 96.75, 28.367014294775544 ], "wc_strengths_avg": [ 81.0, 13.874436925511608 ], "wc_weaknesses_avg": [ 122.0, 50.0349877585675 ], "wc_questions_avg": [ 120.25, 118.775786673884 ], "wc_limitations_avg": [ 28.75, 22.18529918662356 ], "wc_review_avg": [ 448.75, 129.6849547943014 ], "wc_reply_reviewers_avg": [ 42.75, 58.512285034854 ], "wc_reply_authors_avg": [ 9.0, 15.588457268119896 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16871556136224998069&as_sdt=4000005&sciodt=0,18&hl=en", "gs_version_total": 4, "email": "ethz.ch;google.com;google.com;cs.washington.edu;ethz.ch;in.tum.de", "author_num": 6, "aff_unique_index": "0;1;1;2;3;4", "aff_unique_norm": "ETH Zurich;Google;Unknown Institution;Swiss Federal Institute of Technology;Technical University Munich", "aff_unique_dep": ";Google;Department of Computer Science;;", "aff_unique_url": "https://www.ethz.ch;https://www.google.com;;https://www.ethz.ch;https://www.tum.de", "aff_unique_abbr": "ETHZ;Google;;ETH Zurich;TUM", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;1;0;3", "aff_country_unique": "Switzerland;United States;;Germany" }, { "title": "Arctique: An artificial histopathological dataset unifying realism and controllability for uncertainty quantification", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97676", "id": "TyIWrwzpgu", "proceeding": "", "pdf": "https://openreview.net/pdf?id=TyIWrwzpgu", "openreview": "https://openreview.net/forum?id=TyIWrwzpgu", "poster": "/media/PosterPDFs/NeurIPS%202024/97676.png?t=1731759789.9262376", "project": "", "author_site": "Jannik Franzen, Claudia Winklmayr, Vanessa Emanuela Guarino, Christoph Karg, Xiaoyan Yu, Nora Koreuber, Jan Albrecht, Philip Bischoff, Dagmar Kainmueller", "tldr": "", "abstract": "Uncertainty Quantification (UQ) is crucial for reliable image segmentation. Yet, while the field sees continual development of novel methods, a lack of agreed-upon benchmarks limits their systematic comparison and evaluation: Current UQ methods are typically tested either on overly simplistic toy datasets or on complex real-world datasets that do not allow to discern true uncertainty. To unify both controllability and complexity, we introduce Arctique, a procedurally generated dataset modeled after histopathological colon images. We chose histopathological images for two reasons: 1) their complexity in terms of intricate object structures and highly variable appearance, which yields challenging segmentation problems, and 2) their broad prevalence for medical diagnosis and respective relevance of high-quality UQ. To generate Arctique, we established a Blender-based framework for 3D scene creation with intrinsic noise manipulation. Arctique contains up to 50,000 rendered images with precise masks as well as noisy label simulations. We show that by independently controlling the uncertainty in both images and labels, we can effectively study the performance of several commonly used UQ methods. Hence, Arctique serves as a critical resource for benchmarking and advancing UQ techniques and other methodologies in complex, multi-object environments, bridging the gap between realism and controllability. All code is publicly available, allowing re-creation and controlled manipulations of our shipped images as well as creation and rendering of new scenes.", "keywords": "Uncertainty Quantification;Semantic Segmentation;Instance Segmentation;Synthetic Dataset;Biomedical Images;Uncertainty Evaluation", "primary_area": "", "supplementary_material": "/attachment/a70839c1535e3ec3ca77631aa2c1805e0ecadb0b.pdf", "author": "Jannik Franzen;Claudia Winklmayr;Vanessa Emanuela Guarino;Christoph Karg;Xiaoyan Yu;Nora Koreuber;Jan Philipp Albrecht;Philip Bischoff;Dagmar Kainmueller", "authorids": "~Jannik_Franzen1;~Claudia_Winklmayr1;~Vanessa_Emanuela_Guarino1;~Christoph_Karg1;~Xiaoyan_Yu2;~Nora_Koreuber1;~Jan_Philipp_Albrecht1;~Philip_Bischoff1;~Dagmar_Kainmueller2", "gender": "M;F;F;;F;F;M;M;", "homepage": ";;;;;;;;", "dblp": ";;;;;;;;22/4586", "google_scholar": ";vluivG0AAAAJ;-zscgKMAAAAJ;;;;;;mg0LpUwAAAAJ", "orcid": ";0000-0002-8784-2301;0000-0003-1625-4323;0009-0006-4625-4470;0000-0001-8196-663X;;0000-0002-3792-9695;0000-0002-4442-7116;", "linkedin": "jannik-franzen-5931b121a;;vanessa-emanuela-guarino-28b34b294/;;;nora-koreuber/;;;", "or_profile": "~Jannik_Franzen1;~Claudia_Winklmayr1;~Vanessa_Emanuela_Guarino1;~Christoph_Karg1;~Xiaoyan_Yu2;~Nora_Koreuber1;~Jan_Philipp_Albrecht1;~Philip_Bischoff1;~Dagmar_Kainm\u00fcller1", "aff": "Max Delbr\u00fcck Center for Molecular Medicine;Max Delbr\u00fcck Center for Molecular Medicine;Max Delbr\u00fcck Center for Molecular Medicine;Max Delbr\u00fcck Center for Molecular Medicine;Max Delbr\u00fcck Center for Molecular Medicine;Charit\u00e9 - Universit\u00e4tsmedizin Berlin;Max Delbr\u00fcck Center for Molecular Medicine;Charit\u00e9 - Universit\u00e4tsmedizin Berlin;Max Delbr\u00fcck Center for Molecular Medicine", "aff_domain": "mdc-berlin.de;mdc-berlin.de;mdc-berlin.de;mdc-berlin.de;mdc-berlin.de;charite.de;mdc-berlin.de;charite.de;mdc-berlin.de", "position": "PhD student;PhD student;PhD student;Postdoc;PhD student;PhD student;PhD student;Principal Researcher;Group Leader", "bibtex": "@inproceedings{\nfranzen2024arctique,\ntitle={Arctique: An artificial histopathological dataset unifying realism and controllability for uncertainty quantification},\nauthor={Jannik Franzen and Claudia Winklmayr and Vanessa Emanuela Guarino and Christoph Karg and Xiaoyan Yu and Nora Koreuber and Jan Philipp Albrecht and Philip Bischoff and Dagmar Kainmueller},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=TyIWrwzpgu}\n}", "github": "", "reviewers": "1Q23;9out;EfH6", "pdf_size": 5909022, "rating": "4;7;9", "confidence": "3;3;5", "wc_summary_and_contributions": "32;43;80", "wc_strengths": "18;65;27", "wc_improvement": "17;49;108", "wc_limitations": "57;1;2", "wc_correctness": "7;12;2", "wc_clarity": "6;7;8", "wc_relation_to_prior_work": "5;1;16", "wc_documentation": "19;4;1", "wc_additional_feedback": "1;1;1", "wc_review": "162;183;245", "wc_reply_reviewers": "0;9;52", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;1;1", "reply_authors": "1;1;2", "rating_avg": [ 6.666666666666667, 2.0548046676563256 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "wc_summary_and_contributions_avg": [ 51.666666666666664, 20.531818125912658 ], "wc_strengths_avg": [ 36.666666666666664, 20.368821489936252 ], "wc_improvement_avg": [ 58.0, 37.69173207305107 ], "wc_limitations_avg": [ 20.0, 26.166135875720485 ], "wc_correctness_avg": [ 7.0, 4.08248290463863 ], "wc_clarity_avg": [ 7.0, 0.816496580927726 ], "wc_relation_to_prior_work_avg": [ 7.333333333333333, 6.342099196813483 ], "wc_documentation_avg": [ 8.0, 7.874007874011811 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 196.66666666666666, 35.23571420527127 ], "wc_reply_reviewers_avg": [ 20.333333333333332, 22.69116323349001 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.8029550685469663, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Ba45CjsbAckJ:scholar.google.com/&scioq=Arctique:+An+artificial+histopathological+dataset+unifying+realism+and+controllability+for+uncertainty+quantification&hl=en&as_sdt=0,5", "gs_version_total": 5, "email": "mdc-berlin.de;mdc-berlin.de;mdc-berlin.de;mdc-berlin.de;mdc-berlin.de;charite.de;mdc-berlin.de;charite.de;mdc-berlin.de", "author_num": 9, "aff_unique_index": "0;0;0;0;0;1;0;1;0", "aff_unique_norm": "Max Delbr\u00fcck Center for Molecular Medicine;Charit\u00e9 - Universit\u00e4tsmedizin Berlin", "aff_unique_dep": ";", "aff_unique_url": "https://www.mdc-berlin.de;https://www.charite.de", "aff_unique_abbr": "MDC;Charit\u00e9", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Berlin", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "Germany" }, { "title": "Non-asymptotic Analysis of Biased Adaptive Stochastic Approximation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94997", "id": "TzxSrNJE0T", "proceeding": "", "pdf": "https://openreview.net/pdf?id=TzxSrNJE0T", "openreview": "https://openreview.net/forum?id=TzxSrNJE0T", "poster": "/media/PosterPDFs/NeurIPS%202024/94997.png?t=1733243155.9966817", "project": "", "author_site": "Sobihan Surendran, Adeline Fermanian, Antoine Godichon-Baggioni, Sylvain Le Corff", "tldr": "", "abstract": "Stochastic Gradient Descent (SGD) with adaptive steps is widely used to train deep neural networks and generative models. Most theoretical results assume that it is possible to obtain unbiased gradient estimators, which is not the case in several recent deep learning and reinforcement learning applications that use Monte Carlo methods.\nThis paper provides a comprehensive non-asymptotic analysis of SGD with biased gradients and adaptive steps for non-convex smooth functions. Our study incorporates time-dependent bias and emphasizes the importance of controlling the bias of the gradient estimator. \nIn particular, we establish that Adagrad, RMSProp, and AMSGRAD, an exponential moving average variant of Adam, with biased gradients, converge to critical points for smooth non-convex functions at a rate similar to existing results in the literature for the unbiased case. Finally, we provide experimental results using Variational Autoenconders (VAE) and applications to several learning frameworks that illustrate our convergence results and show how the effect of bias can be reduced by appropriate hyperparameter tuning.", "keywords": "Stochastic Optimization;Adaptive Stochastic Approximation;Monte Carlo Methods;Variational Autoenconders", "primary_area": "optimization", "supplementary_material": "", "author": "Sobihan Surendran;Adeline Fermanian;Antoine Godichon-Baggioni;Sylvain Le Corff", "authorids": "~Sobihan_Surendran1;~Adeline_Fermanian1;~Antoine_Godichon-Baggioni1;~Sylvain_Le_Corff1", "gender": "M;;M;M", "homepage": ";https://afermanian.github.io;http://godichon.perso.math.cnrs.fr;https://sylvainlc.github.io/", "dblp": ";;;29/10875", "google_scholar": ";3Mfclk8AAAAJ;;gHRCj-EAAAAJ", "orcid": ";;;0000-0001-5211-2328", "linkedin": "sobihan-surendran-859272200/?originalSubdomain=fr;;;", "or_profile": "~Sobihan_Surendran1;~Adeline_Fermanian1;~Antoine_Godichon-Baggioni1;~Sylvain_Le_Corff1", "aff": "Sorbonne Universit\u00e9 - Facult\u00e9 des Sciences (Paris VI);Califrais;Sorbonne Universit\u00e9 - Facult\u00e9 des Sciences (Paris VI);Sorbonne Universit\u00e9, LPSM", "aff_domain": "sorbonne-universite.fr;califrais.fr;sorbonne-universite.fr;sorbonne-universite.fr", "position": "PhD student;Researcher;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nsurendran2024nonasymptotic,\ntitle={Non-asymptotic Analysis of Biased Adaptive Stochastic Approximation},\nauthor={Sobihan Surendran and Adeline Fermanian and Antoine Godichon-Baggioni and Sylvain Le Corff},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=TzxSrNJE0T}\n}", "github": "", "reviewers": "nF8X;ioBT;3iVV;1qy9;aKkE;BcqM", "pdf_size": 1552128, "rating": "4;6;7;7;7;7", "confidence": "3;4;3;3;4;3", "soundness": "2;3;3;4;3;3", "novelty": "2;2;3;3;3;3", "presentation": "3;3;4;4;3;3", "wc_summary": "45;36;59;51;51;146", "wc_strengths": "46;24;120;88;46;161", "wc_weaknesses": "155;239;61;18;22;65", "wc_questions": "32;4;59;16;35;135", "wc_limitations": "9;19;14;1;1;19", "wc_review": "287;322;313;174;155;526", "wc_reply_reviewers": "73;12;20;0;15;23", "wc_reply_authors": "599;0;0;0;0;0", "reply_reviewers": "1;1;1;0;1;1", "reply_authors": "2;1;1;1;1;1", "rating_avg": [ 6.333333333333333, 1.1055415967851332 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.5773502691896257 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 64.66666666666667, 37.03451843288307 ], "wc_strengths_avg": [ 80.83333333333333, 47.72985322509267 ], "wc_weaknesses_avg": [ 93.33333333333333, 79.19736247011149 ], "wc_questions_avg": [ 46.833333333333336, 42.947319150585194 ], "wc_limitations_avg": [ 10.5, 7.522189397597839 ], "wc_review_avg": [ 296.1666666666667, 121.5941564750909 ], "wc_reply_reviewers_avg": [ 23.833333333333332, 23.1618700070228 ], "wc_reply_authors_avg": [ 99.83333333333333, 223.234119753729 ], "reply_reviewers_avg": [ 0.8333333333333334, 0.37267799624996495 ], "reply_authors_avg": [ 1.1666666666666667, 0.3726779962499649 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.10660035817780521, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4058318851308960030&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "sorbonne-universite.fr;califrais.fr;sorbonne-universite.fr;sorbonne-universite.fr", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Sorbonne Universit\u00e9;California Institute of Technology", "aff_unique_dep": "Facult\u00e9 des Sciences;", "aff_unique_url": "https://www.sorbonne-universite.fr;https://www.caltech.edu", "aff_unique_abbr": "Sorbonne U;Caltech", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Paris VI;Pasadena;", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "France;United States" }, { "title": "Neural Cover Selection for Image Steganography", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94996", "id": "TzzZ5KAEE2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=TzzZ5KAEE2", "openreview": "https://openreview.net/forum?id=TzzZ5KAEE2", "poster": "", "project": "", "author_site": "Karl Chahine, Hyeji Kim", "tldr": "", "abstract": "In steganography, selecting an optimal cover image\u2014referred to as cover selection\u2014is pivotal for effective message concealment. Traditional methods have typically employed exhaustive searches to identify images that conform to specific perceptual or complexity metrics. However, the relationship between these metrics and the actual message hiding efficacy of an image is unclear, often yielding less-than-ideal steganographic outcomes. Inspired by recent advancements in generative models, we introduce a novel cover selection framework, which involves optimizing within the latent space of pretrained generative models to identify the most suitable cover images, distinguishing itself from traditional exhaustive search methods. Our method shows significant advantages in message recovery and image quality. We also conduct an information-theoretic analysis of the generated cover images, revealing that message hiding predominantly occurs in low-variance pixels, reflecting the waterfilling algorithm's principles in parallel Gaussian channels.", "keywords": "Image Steganography;Watermarking;Generative Models", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Karl Chahine;Hyeji Kim", "authorids": "~Karl_Chahine1;~Hyeji_Kim1", "gender": "M;", "homepage": ";", "dblp": "299/7754;", "google_scholar": ";", "orcid": ";", "linkedin": "karlchahine/;", "or_profile": "~Karl_Chahine1;~Hyeji_Kim1", "aff": "University of Texas, Austin;", "aff_domain": "utexas.edu;", "position": "PhD student;", "bibtex": "@inproceedings{\nchahine2024neural,\ntitle={Neural Cover Selection for Image Steganography},\nauthor={Karl Chahine and Hyeji Kim},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=TzzZ5KAEE2}\n}", "github": "", "reviewers": "XM9B;kQa7;DaKE", "pdf_size": 18637682, "rating": "4;5;6", "confidence": "4;4;5", "soundness": "3;3;3", "novelty": "3;3;2", "presentation": "3;3;2", "wc_summary": "84;50;82", "wc_strengths": "118;52;88", "wc_weaknesses": "52;88;156", "wc_questions": "119;180;315", "wc_limitations": "41;12;6", "wc_review": "414;382;647", "wc_reply_reviewers": "0;172;14", "wc_reply_authors": "0;314;13", "reply_reviewers": "0;2;1", "reply_authors": "1;2;2", "rating_avg": [ 5.0, 0.816496580927726 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 72.0, 15.57776192739723 ], "wc_strengths_avg": [ 86.0, 26.981475126464083 ], "wc_weaknesses_avg": [ 98.66666666666667, 43.12256434345661 ], "wc_questions_avg": [ 204.66666666666666, 81.89559749393669 ], "wc_limitations_avg": [ 19.666666666666668, 15.2825245151302 ], "wc_review_avg": [ 481.0, 118.10447352520846 ], "wc_reply_reviewers_avg": [ 62.0, 77.99145252312375 ], "wc_reply_authors_avg": [ 109.0, 145.054012928518 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:MIq6WoqQZuoJ:scholar.google.com/&scioq=Neural+Cover+Selection+for+Image+Steganography&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "utexas.edu;", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Ordered Momentum for Asynchronous SGD", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94995", "id": "U2Mx0hSRwA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=U2Mx0hSRwA", "openreview": "https://openreview.net/forum?id=U2Mx0hSRwA", "poster": "/media/PosterPDFs/NeurIPS%202024/94995.png?t=1732613347.1739993", "project": "", "author_site": "Chang-Wei Shi, Yi-Rui Yang, Wu-Jun Li", "tldr": "", "abstract": "Distributed learning is essential for training large-scale deep models.\nAsynchronous SGD (ASGD) and its variants are commonly used distributed learning methods, particularly in scenarios where the computing capabilities of workers in the cluster are heterogeneous.\nMomentum has been acknowledged for its benefits in both optimization and generalization in deep model training. However, existing works have found that naively incorporating momentum into ASGD can impede the convergence.\nIn this paper, we propose a novel method called ordered momentum (OrMo) for ASGD. In OrMo, momentum is incorporated into ASGD by organizing the gradients in order based on their iteration indexes. We theoretically prove the convergence of OrMo with both constant and delay-adaptive learning rates for non-convex problems. To the best of our knowledge, this is the first work to establish the convergence analysis of ASGD with momentum without dependence on the maximum delay. Empirical results demonstrate that OrMo can achieve better convergence performance compared with ASGD and other asynchronous methods with momentum.", "keywords": "optimization;deep learning;distributed learning;asynchronous SGD", "primary_area": "optimization", "supplementary_material": "/attachment/59ec6ac3a741311cb06cce486bee640691b6e2b5.zip", "author": "Chang-Wei Shi;Yi-Rui Yang;Wu-Jun Li", "authorids": "~Chang-Wei_Shi1;~Yi-Rui_Yang2;~Wu-Jun_Li1", "gender": "M;M;M", "homepage": "https://github.com/Changwei-Shi;http://www.lamda.nju.edu.cn/yangyr/;https://cs.nju.edu.cn/lwj/", "dblp": ";260/0404;26/988.html", "google_scholar": ";;NCCdqdcAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Chang-Wei_Shi1;~Yi-Rui_Yang2;~Wu-Jun_Li1", "aff": "Nanjing University;Nanjing University;Nanjing University", "aff_domain": "nju.edu.cn;nju.edu.cn;nju.edu.cn", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nshi2024ordered,\ntitle={Ordered Momentum for Asynchronous {SGD}},\nauthor={Chang-Wei Shi and Yi-Rui Yang and Wu-Jun Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=U2Mx0hSRwA}\n}", "github": "", "reviewers": "M3Nx;dA6E;5XzJ;NjVh", "pdf_size": 1386996, "rating": "3;5;5;6", "confidence": "2;3;4;3", "soundness": "2;2;3;3", "novelty": "2;3;3;2", "presentation": "1;3;2;3", "wc_summary": "83;77;41;72", "wc_strengths": "42;88;32;110", "wc_weaknesses": "118;105;46;231", "wc_questions": "49;23;185;64", "wc_limitations": "7;1;6;1", "wc_review": "299;294;310;478", "wc_reply_reviewers": "68;199;13;70", "wc_reply_authors": "357;607;14;47", "reply_reviewers": "1;2;1;1", "reply_authors": "2;3;2;2", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 68.25, 16.20763708873073 ], "wc_strengths_avg": [ 68.0, 32.155870381627054 ], "wc_weaknesses_avg": [ 125.0, 66.94400645315457 ], "wc_questions_avg": [ 80.25, 62.23092077094794 ], "wc_limitations_avg": [ 3.75, 2.7726341266023544 ], "wc_review_avg": [ 345.25, 76.86148255140542 ], "wc_reply_reviewers_avg": [ 87.5, 68.31727453580098 ], "wc_reply_authors_avg": [ 256.25, 242.71729954826046 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.6488856845230502, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:E50DF2rAN8UJ:scholar.google.com/&scioq=Ordered+Momentum+for+Asynchronous+SGD&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "nju.edu.cn;nju.edu.cn;nju.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Nanjing University", "aff_unique_dep": "", "aff_unique_url": "https://www.nju.edu.cn", "aff_unique_abbr": "Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Benchmarking Complex Instruction-Following with Multiple Constraints Composition", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97675", "id": "U2aVNDrZGx", "proceeding": "", "pdf": "https://openreview.net/pdf?id=U2aVNDrZGx", "openreview": "https://openreview.net/forum?id=U2aVNDrZGx", "poster": "/media/PosterPDFs/NeurIPS%202024/97675.png?t=1733624212.6641142", "project": "", "author_site": "Bosi Wen, Pei Ke, Xiaotao Gu, Lindong Wu, Hao Huang, Jinfeng Zhou, Wenchuang Li, Binxin Hu, Wendy Gao, Jiaxing Xu, Yiming Liu, Jie Tang, Hongning Wang, Minlie Huang", "tldr": "", "abstract": "Instruction following is one of the fundamental capabilities of large language models (LLMs). As the ability of LLMs is constantly improving, they have been increasingly applied to deal with complex human instructions in real-world scenarios. Therefore, how to evaluate the ability of complex instruction-following of LLMs has become a critical research problem. Existing benchmarks mainly focus on modeling different types of constraints in human instructions while neglecting the composition of different constraints, which is an indispensable constituent in complex instructions. To this end, we propose ComplexBench, a benchmark for comprehensively evaluating the ability of LLMs to follow complex instructions composed of multiple constraints. We propose a hierarchical taxonomy for complex instructions, including 4 constraint types, 19 constraint dimensions, and 4 composition types, and manually collect a high-quality dataset accordingly. To make the evaluation reliable, we augment LLM-based evaluators with rules to effectively verify whether generated texts can satisfy each constraint and composition. Furthermore, we obtain the final evaluation score based on the dependency structure determined by different composition types. ComplexBench identifies significant deficiencies in existing LLMs when dealing with complex instructions with multiple constraints composition.", "keywords": "complex instruction-following;constraint;composition;large language models;automatic evaluation", "primary_area": "", "supplementary_material": "/attachment/bab6aae5fe87513c471aba2f5364de6063a8644d.zip", "author": "Bosi Wen;Pei Ke;Xiaotao Gu;Lindong Wu;Hao Huang;Jinfeng Zhou;Wenchuang Li;Binxin Hu;Wendy Gao;Jiaxing Xu;Yiming Liu;Jie Tang;Hongning Wang;Minlie Huang", "authorids": "~Bosi_Wen1;~Pei_Ke2;~Xiaotao_Gu1;~Lindong_Wu1;~Hao_Huang17;~Jinfeng_Zhou1;~Wenchuang_Li3;~Binxin_Hu1;~Wendy_Gao1;~Jiaxing_Xu3;~Yiming_Liu14;~Jie_Tang1;~Hongning_Wang1;~Minlie_Huang1", "gender": ";M;M;M;M;;M;;;M;M;;M;M", "homepage": ";https://kepei1106.github.io/;;https://github.com/WuLindong1997;;;https://elib.cugb.edu.cn/https/77726476706e69737468656265737421e0f85388263c7b1e7d1d8eaed6502720a801d5/index#/app/home/cugb;;;https://github.com/ThePopeXjx;https://leo1oel.github.io/;;http://www.cs.virginia.edu/~hw5x/;http://coai.cs.tsinghua.edu.cn/hml", "dblp": ";10/2179;190/5278.html;;;305/6557;;;;;;;05/6545;", "google_scholar": ";W_zPCtEAAAAJ;YR4Lp0QAAAAJ;;https://scholar.google.com/citations?view_op=list_works;y58dUQgAAAAJ;;;;;tOEF7V8AAAAJ;;qkdvKNoAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";;;;;;;;;;;;0000-0002-6524-9195;", "linkedin": ";;;;;;;;wendy-gao-9b6143141/;;;;;", "or_profile": "~Bosi_Wen1;~Pei_Ke2;~Xiaotao_Gu1;~Lindong_Wu1;~Hao_Huang17;~Jinfeng_Zhou1;~Wenchuang_Li3;~Binxin_Hu1;~Wendy_Gao1;~Jiaxing_Xu3;~Yiming_Liu14;~Jie_Tang1;~Hongning_Wang1;~Minlie_Huang1", "aff": ";Tsinghua University;Zhipu AI;Northwest Minzu University ;Zhipu\u00b7AI;Tsinghua University;China University of Geoscience Beijing;;ZHIPU AI;Tsinghua University;Tsinghua University;;Tsinghua University;Tsinghua University", "aff_domain": ";tsinghua.edu.cn;aminer.cn;xbmu.edu.cn;zhipu.cn;tsinghua.edu.cn;cugb.edu.cn;;zhipuai.cn;mails.tsinghua.edu.cn;mails.tsinghua.edu.cn;;tsinghua.edu.cn;tsinghua.edu.cn", "position": ";Postdoc;Researcher;MS student;Researcher;PhD student;Undergrad student;;Senior product manager;Undergrad student;Undergrad student;;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nwen2024benchmarking,\ntitle={Benchmarking Complex Instruction-Following with Multiple Constraints Composition},\nauthor={Bosi Wen and Pei Ke and Xiaotao Gu and Lindong Wu and Hao Huang and Jinfeng Zhou and Wenchuang Li and Binxin Hu and Wendy Gao and Jiaxing Xu and Yiming Liu and Jie Tang and Hongning Wang and Minlie Huang},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=U2aVNDrZGx}\n}", "github": "", "reviewers": "91Pj;nFjW;Xp7s;bTTo", "pdf_size": 9087163, "rating": "6;6;6;7", "confidence": "4;3;4;3", "wc_summary_and_contributions": "84;186;78;72", "wc_strengths": "124;83;58;32", "wc_improvement": "59;194;103;54", "wc_limitations": "49;3;62;32", "wc_correctness": "13;8;14;14", "wc_clarity": "8;46;31;7", "wc_relation_to_prior_work": "14;4;78;17", "wc_documentation": "1;3;22;18", "wc_additional_feedback": "1;1;1;1", "wc_review": "353;528;447;247", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "wc_summary_and_contributions_avg": [ 105.0, 46.95742752749558 ], "wc_strengths_avg": [ 74.25, 33.914414339628514 ], "wc_improvement_avg": [ 102.5, 56.162710048572265 ], "wc_limitations_avg": [ 36.5, 22.073740054644116 ], "wc_correctness_avg": [ 12.25, 2.48746859276655 ], "wc_clarity_avg": [ 23.0, 16.38596960817394 ], "wc_relation_to_prior_work_avg": [ 28.25, 29.123658767400773 ], "wc_documentation_avg": [ 11.0, 9.137833441248533 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 393.75, 104.946117126838 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 14, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6314734829621218803&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": ";tsinghua.edu.cn;aminer.cn;xbmu.edu.cn;zhipu.cn;tsinghua.edu.cn;cugb.edu.cn;;zhipuai.cn;mails.tsinghua.edu.cn;mails.tsinghua.edu.cn;;tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 14, "aff_unique_index": "0;1;2;1;0;3;1;0;0;0;0", "aff_unique_norm": "Tsinghua University;Zhipu AI;Northwest Minzu University;China University of Geoscience", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.zhipu.ai;http://www.nwmu.edu.cn;http://www.cugb.edu.cn/", "aff_unique_abbr": "THU;Zhipu AI;NWU;CUGB", "aff_campus_unique_index": "1", "aff_campus_unique": ";Beijing", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Needle In A Multimodal Haystack", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97674", "id": "U2pNwSuQqD", "proceeding": "", "pdf": "https://openreview.net/pdf?id=U2pNwSuQqD", "openreview": "https://openreview.net/forum?id=U2pNwSuQqD", "poster": "/media/PosterPDFs/NeurIPS%202024/97674.png?t=1731608834.9861147", "project": "", "author_site": "Weiyun Wang, Shuibo Zhang, Yiming Ren, Yuchen Duan, Tiantong Li, Shuo Liu, Mengkang Hu, Zhe Chen, Kaipeng Zhang, Lewei Lu, Xizhou Zhu, Ping Luo, Yu Qiao, Jifeng Dai, Wenqi Shao, Wenhai Wang", "tldr": "", "abstract": "With the rapid advancement of multimodal large language models (MLLMs), their evaluation has become increasingly comprehensive. However, understanding long multimodal content, as a foundational ability for real-world applications, remains underexplored. In this work, we present Needle In A Multimodal Haystack (MM-NIAH), the first benchmark specifically designed to systematically evaluate the capability of existing MLLMs to comprehend long multimodal documents. Our benchmark includes three types of evaluation tasks: multimodal retrieval, counting, and reasoning. In each task, the model is required to answer the questions according to different key information scattered throughout the given multimodal document. Evaluating the leading MLLMs on MM-NIAH, we observe that existing models still have significant room for improvement on these tasks, especially on vision-centric evaluation. We hope this work can provide a platform for further research on long multimodal document comprehension and contribute to the advancement of MLLMs. Code and benchmark are released at https://github.com/OpenGVLab/MM-NIAH.", "keywords": "Evaluation;Multimodal;Large Language Model;Long Context", "primary_area": "", "supplementary_material": "/attachment/a5f050031eaeb9515f721c1fb9e931aa0995dcae.pdf", "author": "Weiyun Wang;Shuibo Zhang;Yiming Ren;Yuchen Duan;Tiantong Li;Shuo Liu;Mengkang Hu;Zhe Chen;Kaipeng Zhang;Lewei Lu;Xizhou Zhu;Ping Luo;Yu Qiao;Jifeng Dai;Wenqi Shao;Wenhai Wang", "authorids": "~Weiyun_Wang2;~Shuibo_Zhang1;~Yiming_Ren2;~Yuchen_Duan1;~Tiantong_Li1;~Shuo_Liu5;~Mengkang_Hu1;~Zhe_Chen10;~Kaipeng_Zhang1;~Lewei_Lu1;~Xizhou_Zhu1;~Ping_Luo2;~Yu_Qiao1;~Jifeng_Dai1;~Wenqi_Shao2;~Wenhai_Wang2", "gender": ";;M;M;;F;M;M;M;M;;;;M;M;", "homepage": ";https://www.google.com.hk/;;https://github.com/duanduanduanyuchen;https://fundamentalvision.github.io/members/tiantongli.html;;https://aaron617.github.io/;https://czczup.github.io/;http://kpzhang93.github.io/;;;;;https://jifengdai.org/;https://wqshao126.github.io/;", "dblp": ";;;;;07/6773;321/0644;06/4240-17;179/2126;247/6438;170/1608;;;14/9399;227/3122;", "google_scholar": ";;https://scholar.google.com/citations?hl=zh-CN;;;https://scholar.google.com.tw/citations?hl=zh-CN;FhVRimUAAAAJ;j1rq_lYAAAAJ;4OqZBmYAAAAJ;https://scholar.google.com.hk/citations?user=zdgKJXIAAAAJ;02RXI00AAAAJ;;;SH_-B_AAAAAJ;Bs9mrwwAAAAJ;", "orcid": ";;;;;;0009-0009-3779-3378;;;;;;;;;", "linkedin": ";;;;;;;;;lewei-lu-94015977/;;;;;;", "or_profile": "~Weiyun_Wang2;~Shuibo_Zhang1;~Yiming_Ren2;~Yuchen_Duan1;~Tiantong_Li1;~Shuo_Liu5;~Mengkang_Hu1;~Zhe_Chen10;~Kaipeng_Zhang1;~Lewei_Lu1;~Xizhou_Zhu1;~Ping_Luo2;~Yu_Qiao1;~Jifeng_Dai1;~Wenqi_Shao2;~Wenhai_Wang2", "aff": ";Shanghai Artificial Intelligence Laboratory;Beijing University of Posts and Telecommunications;The Chinese University of Hong Kong;Tsinghua University;Shanghai AI lab;University of Hong Kong;Nanjing University;Shanghai AI Laboratory;SenseTime;Tsinghua University;;;Tsinghua University;Shanghai AI Laboratory;", "aff_domain": ";pjlab.org.cn;bupt.edu.cn;link.cuhk.edu.hk;tsinghua.edu.cn;pjlab.org;hku.hk;nju.edu.cn;pjlab.org.cn;sensetime.com;tsinghua.edu.cn;;;tsinghua.edu.cn;pjlab.org.cn;", "position": ";Researcher;Undergrad student;PhD student;Undergrad student;Researcher;PhD student;PhD student;Researcher;Researcher;Postdoc;;;Associate Professor;Researcher;", "bibtex": "@inproceedings{\nwang2024needle,\ntitle={Needle In A Multimodal Haystack},\nauthor={Weiyun Wang and Shuibo Zhang and Yiming Ren and Yuchen Duan and Tiantong Li and Shuo Liu and Mengkang Hu and Zhe Chen and Kaipeng Zhang and Lewei Lu and Xizhou Zhu and Ping Luo and Yu Qiao and Jifeng Dai and Wenqi Shao and Wenhai Wang},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=U2pNwSuQqD}\n}", "github": "", "reviewers": "voQ1;qb5f;nTjb", "pdf_size": 28906108, "rating": "5;6;8", "confidence": "4;4;4", "wc_summary_and_contributions": "42;62;52", "wc_strengths": "26;56;51", "wc_improvement": "236;65;16", "wc_limitations": "9;64;10", "wc_correctness": "29;37;1", "wc_clarity": "13;1;1", "wc_relation_to_prior_work": "20;1;1", "wc_documentation": "10;21;1", "wc_additional_feedback": "1;1;1", "wc_review": "386;308;134", "wc_reply_reviewers": "19;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;0;0", "reply_authors": "1;1;1", "rating_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 4.0, 0.0 ], "wc_summary_and_contributions_avg": [ 52.0, 8.16496580927726 ], "wc_strengths_avg": [ 44.333333333333336, 13.123346456686352 ], "wc_improvement_avg": [ 105.66666666666667, 94.30564964812848 ], "wc_limitations_avg": [ 27.666666666666668, 25.69478978746902 ], "wc_correctness_avg": [ 22.333333333333332, 15.434449203720302 ], "wc_clarity_avg": [ 5.0, 5.656854249492381 ], "wc_relation_to_prior_work_avg": [ 7.333333333333333, 8.956685895029603 ], "wc_documentation_avg": [ 10.666666666666666, 8.178562764256865 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 276.0, 105.33755265810954 ], "wc_reply_reviewers_avg": [ 6.333333333333333, 8.956685895029603 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 16, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18272805526395406924&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": ";pjlab.org.cn;bupt.edu.cn;link.cuhk.edu.hk;tsinghua.edu.cn;pjlab.org;hku.hk;nju.edu.cn;pjlab.org.cn;sensetime.com;tsinghua.edu.cn;;;tsinghua.edu.cn;pjlab.org.cn;", "author_num": 16, "aff_unique_index": "0;1;2;3;4;5;6;7;8;3;3;7", "aff_unique_norm": "Shanghai Artificial Intelligence Laboratory;Beijing University of Posts and Telecommunications;Chinese University of Hong Kong;Tsinghua University;Shanghai AI Lab;University of Hong Kong;Nanjing University;Shanghai AI Laboratory;SenseTime", "aff_unique_dep": ";;;;AI Research;;;;", "aff_unique_url": "http://www.shailab.org/;http://www.bupt.edu.cn/;https://www.cuhk.edu.hk;https://www.tsinghua.edu.cn;https://www.shanghaiailab.com;https://www.hku.hk;https://www.nju.edu.cn;https://www.shanghai-ai-lab.com;https://www.sensetime.com", "aff_unique_abbr": "Shanghai AI Lab;BUPT;CUHK;THU;Shanghai AI Lab;HKU;Nanjing U;SAIL;SenseTime", "aff_campus_unique_index": "1;2;2", "aff_campus_unique": ";Beijing;Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Targeted Sequential Indirect Experiment Design", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94994", "id": "U3Rgdb4li9", "proceeding": "", "pdf": "https://openreview.net/pdf?id=U3Rgdb4li9", "openreview": "https://openreview.net/forum?id=U3Rgdb4li9", "poster": "/media/PosterPDFs/NeurIPS%202024/94994.png?t=1731687805.9918973", "project": "", "author_site": "Elisabeth Ailer, Niclas Dern, Jason Hartford, Niki Kilbertus", "tldr": "", "abstract": "Scientific hypotheses typically concern specific aspects of complex, imperfectly understood or entirely unknown mechanisms, such as the effect of gene expression levels on phenotypes or how microbial communities influence environmental health. Such queries are inherently causal (rather than purely associational), but in many settings, experiments can not be conducted directly on the target variables of interest, but are indirect. Therefore, they perturb the target variable, but do not remove potential confounding factors. If, additionally, the resulting experimental measurements are high-dimensional and the studied mechanisms nonlinear, the query of interest is generally not identified. We develop an adaptive strategy to design indirect experiments that optimally inform a targeted query about the ground truth mechanism in terms of sequentially narrowing the gap between an upper and lower bound on the query. While the general formulation consists of a bi-level optimization procedure, we derive an efficiently estimable analytical kernel-based estimator of the bounds for the causal effect, a query of key interest, and demonstrate the efficacy of our approach in confounded, multivariate, nonlinear synthetic settings.", "keywords": "causality;experiment design;instrumental variables;indirect experiments", "primary_area": "causal_inference", "supplementary_material": "/attachment/7800e7cc4583297806bf169b20565f622396a2ba.zip", "author": "Elisabeth Ailer;Niclas Dern;Jason Hartford;Niki Kilbertus", "authorids": "~Elisabeth_Ailer1;~Niclas_Dern1;~Jason_Hartford1;~Niki_Kilbertus1", "gender": ";M;M;", "homepage": ";https://www.niclasdern.com;https://jhartford.github.io;", "dblp": ";;191/6716;202/1966", "google_scholar": ";https://scholar.google.com/citations?hl=de;https://scholar.google.ca/citations?user=eBNK7SsAAAAJ;uQZjTq4AAAAJ", "orcid": ";;;", "linkedin": "https://linkedin.com/in/elisabeth-ailer;niclas-dern-335109219;jasonhartford1/;", "or_profile": "~Elisabeth_Ailer1;~Niclas_Dern1;~Jason_Hartford1;~Niki_Kilbertus1", "aff": "Helmholtz Zentrum;Technische Universit\u00e4t M\u00fcnchen;Valence Labs powered by Recursion;Helmholtz AI", "aff_domain": "helmholtz-muenchen.de;tum.de;valencelabs.com;helmholtz-muenchen.de", "position": "PhD student;Undergrad student;Researcher;Group Leader", "bibtex": "@inproceedings{\nailer2024targeted,\ntitle={Targeted Sequential Indirect Experiment Design},\nauthor={Elisabeth Ailer and Niclas Dern and Jason Hartford and Niki Kilbertus},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=U3Rgdb4li9}\n}", "github": "", "reviewers": "sEkT;aNmJ;pD3c;GhAo", "pdf_size": 831189, "rating": "5;6;7;7", "confidence": "3;3;3;3", "soundness": "2;3;3;4", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "26;83;21;157", "wc_strengths": "17;85;93;60", "wc_weaknesses": "73;123;491;119", "wc_questions": "198;126;2;1", "wc_limitations": "54;9;2;1", "wc_review": "368;426;609;338", "wc_reply_reviewers": "16;0;148;85", "wc_reply_authors": "112;151;38;316", "reply_reviewers": "1;0;1;1", "reply_authors": "2;2;2;3", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 71.75, 54.91527565259051 ], "wc_strengths_avg": [ 63.75, 29.608909132218972 ], "wc_weaknesses_avg": [ 201.5, 168.29364218531845 ], "wc_questions_avg": [ 81.75, 84.19137426126265 ], "wc_limitations_avg": [ 16.5, 21.86892772862904 ], "wc_review_avg": [ 435.25, 105.18406485775304 ], "wc_reply_reviewers_avg": [ 62.25, 58.91678453547851 ], "wc_reply_authors_avg": [ 154.25, 101.8242971986549 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1962875541464053297&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "helmholtz-muenchen.de;tum.de;valencelabs.com;helmholtz-muenchen.de", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Helmholtz Zentrum;Technische Universit\u00e4t M\u00fcnchen;Valence Labs;Helmholtz Association of German Research Centres", "aff_unique_dep": ";;;Helmholtz AI", "aff_unique_url": "https://www.helmholtz-berlin.de;https://www.tum.de;;https://www.helmholtz-ai.de", "aff_unique_abbr": ";TUM;;Helmholtz AI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany;" }, { "title": "Interfacing Foundation Models' Embeddings", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94993", "id": "U3hQoqgQDJ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=U3hQoqgQDJ", "openreview": "https://openreview.net/forum?id=U3hQoqgQDJ", "poster": "", "project": "", "author_site": "Xueyan Zou, Linjie Li, Jianfeng Wang, Jianwei Yang, Mingyu Ding, Junyi Wei, Zhengyuan Yang, Feng Li, Hao Zhang, Shilong Liu, Arul Aravinthan, Yong Jae Lee, Lijuan Wang", "tldr": "", "abstract": "Foundation models possess strong capabilities in reasoning and memorizing across modalities. To further unleash the power of foundation models, we present FIND, a generalized interface for aligning foundation models' embeddings with unified image and dataset-level understanding spanning modality and granularity. As shown in Fig.1, a lightweight transformer interface without tuning any foundation model weights is enough for segmentation, grounding, and retrieval in an interleaved manner. The proposed interface has the following favorable attributes: (1) Generalizable. It applies to various tasks spanning retrieval, segmentation, etc., under the same architecture and weights. (2) Interleavable. With the benefit of multi-task multi-modal training, the proposed interface creates an interleaved shared embedding space. (3) Extendable. The proposed interface is adaptive to new tasks, and new models. In light of the interleaved embedding space, we introduce FIND-Bench, which introduces new training and evaluation annotations to the COCO dataset for interleaved segmentation and retrieval. We are the first work aligning foundations models' embeddings for interleave understanding. Meanwhile, our approach achieves state-of-the-art performance on FIND-Bench and competitive performance on standard retrieval and segmentation settings.", "keywords": "Foundation Model;Vision and Language;Interleave;Segmentation;Detection;Large Language Model;Large Multimodal Model", "primary_area": "machine_vision", "supplementary_material": "/attachment/6f9400e12c2e3e60e8e057aa923fca9e18b9e329.zip", "author": "Xueyan Zou;Linjie Li;Jianfeng Wang;Jianwei Yang;Mingyu Ding;Junyi Wei;Zhengyuan Yang;Feng Li;Hao Zhang;Shilong Liu;Arul Aravinthan;Yong Jae Lee;Lijuan Wang", "authorids": "~Xueyan_Zou1;~Linjie_Li1;~Jianfeng_Wang4;~Jianwei_Yang1;~Mingyu_Ding1;~Junyi_Wei1;~Zhengyuan_Yang1;~Feng_Li9;~Hao_Zhang39;~Shilong_Liu1;~Arul_Aravinthan1;~Yong_Jae_Lee2;~Lijuan_Wang1", "gender": "F;F;M;M;F;M;M;M;M;M;F;M;M", "homepage": "https://maureenzou.github.io/;;;https://dingmyu.github.io/;;http://zhengyuan.info/;https://fengli-ust.github.io/;https://haozhang534.github.io/;https://www.lsl.zone;;https://www.microsoft.com/en-us/research/people/lijuanw/;https://jwyang.github.io/;https://pages.cs.wisc.edu/~yongjaelee/", "dblp": "273/3780;200/8256;;188/5243;166/6146;163/9713;92/2954-40.html;55/2270-97;;;51/2527.html;;15/5471", "google_scholar": "eslbQqoAAAAJ;WR875gYAAAAJ;vJWEw_8AAAAJ;w4yTWwoAAAAJ;Kb1GL40AAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=zh-CN;B8hPxMQAAAAJ;nkSVY3MAAAAJ;;cDcWXuIAAAAJ;Cl9byD8AAAAJ;4GTpCxcAAAAJ", "orcid": ";;;0000-0001-6556-8359;;;;;;;;;", "linkedin": ";;;dingmyu/;Junyi-Jenny-Wei-04ba979b/;;;hao-zhang-3b09b8196/;;arul-aravinthan-414509218/;;;", "or_profile": "~Xueyan_Zou1;~Linjie_Li1;~Jianfeng_Wang4;~Mingyu_Ding1;~Junyi_Wei1;~Zhengyuan_Yang1;~Feng_Li9;~Hao_Zhang39;~Shilong_Liu1;~Arul_Aravinthan1;~Lijuan_Wang1;~Jianwei_Yang2;~Yong_Jae_Lee1", "aff": "University of Wisconsin - Madison;Microsoft;Microsoft;University of California, Berkeley;University of Wisconsin, Madison;Microsoft;Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;NVIDIA;University of Wisconsin - Madison;Microsoft;Microsoft;University of Wisconsin - Madison", "aff_domain": "wisc.edu;microsoft.com;microsoft.com;berkeley.edu;wisc.edu;microsoft.com;ust.hk;ust.hk;nvidia.com;wisc.edu;microsoft.com;microsoft.com;cs.wisc.edu", "position": "PhD student;Researcher;Principal Researcher;Postdoc;PhD student;Researcher;PhD student;PhD student;Research Intern;Undergrad student;Principal Researcher;Researcher;Associate Professor", "bibtex": "@inproceedings{\nzou2024interfacing,\ntitle={Interfacing Foundation Models' Embeddings},\nauthor={Xueyan Zou and Linjie Li and Jianfeng Wang and Jianwei Yang and Mingyu Ding and Junyi Wei and Zhengyuan Yang and Feng Li and Hao Zhang and Shilong Liu and Arul Aravinthan and Yong Jae Lee and Lijuan Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=U3hQoqgQDJ}\n}", "github": "", "reviewers": "AFHH;kpHd;iZKM;kGcR", "pdf_size": 19517702, "rating": "3;5;6;7", "confidence": "5;4;3;5", "soundness": "3;2;3;3", "novelty": "2;2;3;4", "presentation": "1;2;3;3", "wc_summary": "75;61;76;22", "wc_strengths": "25;18;141;63", "wc_weaknesses": "153;153;201;88", "wc_questions": "343;4;78;4", "wc_limitations": "43;2;13;1", "wc_review": "639;238;509;178", "wc_reply_reviewers": "76;388;80;10", "wc_reply_authors": "1911;912;50;7", "reply_reviewers": "1;3;1;1", "reply_authors": "6;5;2;2", "rating_avg": [ 5.25, 1.479019945774904 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 58.5, 21.891779278989635 ], "wc_strengths_avg": [ 61.75, 48.853735783458774 ], "wc_weaknesses_avg": [ 148.75, 40.17695234833025 ], "wc_questions_avg": [ 107.25, 139.4226936334254 ], "wc_limitations_avg": [ 14.75, 16.97608612136496 ], "wc_review_avg": [ 391.0, 189.87232552428486 ], "wc_reply_reviewers_avg": [ 138.5, 146.70633933133223 ], "wc_reply_authors_avg": [ 720.0, 776.6295770829231 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 3.75, 1.7853571071357126 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 13, 0 ], "corr_rating_confidence": -0.25482359571881275, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16689492512216931883&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "wisc.edu;microsoft.com;microsoft.com;berkeley.edu;wisc.edu;microsoft.com;ust.hk;ust.hk;nvidia.com;wisc.edu;microsoft.com;microsoft.com;cs.wisc.edu", "author_num": 13, "aff_unique_index": "0;1;1;2;3;1;4;4;5;0;1;1;0", "aff_unique_norm": "University of Wisconsin-Madison;Microsoft;University of California, Berkeley;University of Wisconsin;Hong Kong University of Science and Technology;NVIDIA", "aff_unique_dep": ";Microsoft Corporation;;;;NVIDIA Corporation", "aff_unique_url": "https://www.wisc.edu;https://www.microsoft.com;https://www.berkeley.edu;https://www.wisc.edu;https://www.ust.hk;https://www.nvidia.com", "aff_unique_abbr": "UW-Madison;Microsoft;UC Berkeley;UW;HKUST;NVIDIA", "aff_campus_unique_index": "0;2;0;3;3;0;0", "aff_campus_unique": "Madison;;Berkeley;Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;1;1;0;0;0;0;0", "aff_country_unique": "United States;China" }, { "title": "Do causal predictors generalize better to new domains?", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94992", "id": "U4BC0GrFAz", "proceeding": "", "pdf": "https://openreview.net/pdf?id=U4BC0GrFAz", "openreview": "https://openreview.net/forum?id=U4BC0GrFAz", "poster": "/media/PosterPDFs/NeurIPS%202024/94992.png?t=1733480603.003637", "project": "", "author_site": "Vivian Nastl, Moritz Hardt", "tldr": "", "abstract": "We study how well machine learning models trained on causal features generalize across domains. We consider 16 prediction tasks on tabular datasets covering applications in health, employment, education, social benefits, and politics. Each dataset comes with multiple domains, allowing us to test how well a model trained in one domain performs in another. For each prediction task, we select features that have a causal influence on the target of prediction. Our goal is to test the hypothesis that models trained on causal features generalize better across domains. Without exception, we find that predictors using all available features, regardless of causality, have better in-domain and out-of-domain accuracy than predictors using causal features. Moreover, even the absolute drop in accuracy from one domain to the other is no better for causal predictors than for models that use all features. In addition, we show that recent causal machine learning methods for domain generalization do not perform better in our evaluation than standard predictors trained on the set of causal features. Likewise, causal discovery algorithms either fail to run or select causal variables that perform no better than our selection. Extensive robustness checks confirm that our findings are stable under variable misclassification.", "keywords": "causality;domain generalization;tabular data", "primary_area": "causal_inference", "supplementary_material": "", "author": "Vivian Yvonne Nastl;Moritz Hardt", "authorids": "~Vivian_Yvonne_Nastl1;~Moritz_Hardt1", "gender": "F;Not Specified", "homepage": "https://sf.is.mpg.de/person/vnastl;http://mrtz.org/", "dblp": ";26/4683", "google_scholar": ";adnTgaAAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Vivian_Yvonne_Nastl1;~Moritz_Hardt1", "aff": "ETHZ - ETH Zurich;Max-Planck-Institute for Intelligent Systems, Max-Planck Institute", "aff_domain": "ethz.ch;is.mpg.de", "position": "PhD student;Principal Researcher", "bibtex": "@inproceedings{\nnastl2024do,\ntitle={Do causal predictors generalize better to new domains?},\nauthor={Vivian Yvonne Nastl and Moritz Hardt},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=U4BC0GrFAz}\n}", "github": "", "reviewers": "6nV6;QBUN;bBCb;yvyj;MY3d", "pdf_size": 4782875, "rating": "4;4;7;7;9", "confidence": "3;3;3;4;4", "soundness": "2;1;3;3;3", "novelty": "2;2;3;3;4", "presentation": "2;3;3;4;4", "wc_summary": "110;65;75;99;47", "wc_strengths": "57;24;35;104;47", "wc_weaknesses": "182;694;28;89;26", "wc_questions": "392;62;16;37;49", "wc_limitations": "25;6;38;6;1", "wc_review": "766;851;192;335;170", "wc_reply_reviewers": "52;485;18;47;0", "wc_reply_authors": "32;491;16;14;0", "reply_reviewers": "1;2;1;1;0", "reply_authors": "2;3;2;2;1", "rating_avg": [ 6.2, 1.9390719429665317 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 2.4, 0.8 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 79.2, 22.789471253190584 ], "wc_strengths_avg": [ 53.4, 27.63041802072491 ], "wc_weaknesses_avg": [ 203.8, 251.58251131587028 ], "wc_questions_avg": [ 111.2, 141.21388033759288 ], "wc_limitations_avg": [ 15.2, 14.048487463068755 ], "wc_review_avg": [ 462.8, 289.1459147212701 ], "wc_reply_reviewers_avg": [ 120.4, 183.2927712704459 ], "wc_reply_authors_avg": [ 110.6, 190.47057515532418 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.7579367289598672, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13665950960496576741&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 3, "email": "ethz.ch;is.mpg.de", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "ETH Zurich;Max-Planck-Institute for Intelligent Systems", "aff_unique_dep": ";Intelligent Systems", "aff_unique_url": "https://www.ethz.ch;https://www.mpi-is.mpg.de", "aff_unique_abbr": "ETHZ;MPI-IS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Switzerland;Germany" }, { "title": "Enhancing Graph Transformers with Hierarchical Distance Structural Encoding", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94991", "id": "U4KldRgoph", "proceeding": "", "pdf": "https://openreview.net/pdf?id=U4KldRgoph", "openreview": "https://openreview.net/forum?id=U4KldRgoph", "poster": "/media/PosterPDFs/NeurIPS%202024/94991.png?t=1731463512.4148142", "project": "", "author_site": "Yuankai Luo, Hongkang Li, Lei Shi, Xiao-Ming Wu", "tldr": "", "abstract": "Graph transformers need strong inductive biases to derive meaningful attention scores. Yet, current methods often fall short in capturing longer ranges, hierarchical structures, or community structures, which are common in various graphs such as molecules, social networks, and citation networks. This paper presents a Hierarchical Distance Structural Encoding (HDSE) method to model node distances in a graph, focusing on its multi-level, hierarchical nature. We introduce a novel framework to seamlessly integrate HDSE into the attention mechanism of existing graph transformers, allowing for simultaneous application with other positional encodings. To apply graph transformers with HDSE to large-scale graphs, we further propose a high-level HDSE that effectively biases the linear transformers towards graph hierarchies. We theoretically prove the superiority of HDSE in terms of expressivity and generalization. Empirically, we demonstrate that graph transformers with HDSE excel in graph classification, regression on 7 graph-level datasets, and node classification on 11 large-scale graphs.", "keywords": "Graph Transformers;Graph Neural Networks;Graph Classification;Node Classification;Large Graphs;Scalability", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Yuankai Luo;Hongkang Li;Lei Shi;Xiao-Ming Wu", "authorids": "~Yuankai_Luo2;~Hongkang_Li1;~Lei_Shi13;~Xiao-Ming_Wu1", "gender": "M;;M;F", "homepage": "https://luoyk1999.github.io/;https://lohek330.github.io/lihongkang.github.io/;https://leishidata.com/;http://www4.comp.polyu.edu.hk/~csxmwu/", "dblp": "299/6707;318/8643;29/563-2;98/2898-3", "google_scholar": "33f_QqAAAAAJ;https://scholar.google.com.hk/citations?user=DVlDPjMAAAAJ;NmaU6U0AAAAJ;3KbaUFkAAAAJ", "orcid": "0000-0003-3844-7214;;;", "linkedin": ";hongkang-li-b7a341173/;;", "or_profile": "~Yuankai_Luo2;~Hongkang_Li1;~Lei_Shi13;~Xiao-Ming_Wu1", "aff": "Beihang University;Rensselaer Polytechnic Institute;Beihang University;Hong Kong Polytechnic University", "aff_domain": "buaa.edu.cn;rpi.edu;buaa.edu.cn;polyu.edu.hk", "position": "PhD student;PhD student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nluo2024enhancing,\ntitle={Enhancing Graph Transformers with Hierarchical Distance Structural Encoding},\nauthor={Yuankai Luo and Hongkang Li and Lei Shi and Xiao-Ming Wu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=U4KldRgoph}\n}", "github": "", "reviewers": "4HNX;p468;o879", "pdf_size": 4344101, "rating": "6;6;7", "confidence": "4;3;3", "soundness": "3;3;3", "novelty": "3;2;3", "presentation": "3;3;4", "wc_summary": "50;57;66", "wc_strengths": "96;91;53", "wc_weaknesses": "153;63;18", "wc_questions": "259;112;59", "wc_limitations": "35;6;10", "wc_review": "593;329;206", "wc_reply_reviewers": "21;17;5", "wc_reply_authors": "43;20;12", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 57.666666666666664, 6.548960901462833 ], "wc_strengths_avg": [ 80.0, 19.200694431886227 ], "wc_weaknesses_avg": [ 78.0, 56.124860801609124 ], "wc_questions_avg": [ 143.33333333333334, 84.60233776649174 ], "wc_limitations_avg": [ 17.0, 12.832251036613439 ], "wc_review_avg": [ 376.0, 161.44968256394932 ], "wc_reply_reviewers_avg": [ 14.333333333333334, 6.79869268479038 ], "wc_reply_authors_avg": [ 25.0, 13.140268896284683 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7746028500884816049&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "buaa.edu.cn;rpi.edu;buaa.edu.cn;polyu.edu.hk", "author_num": 4, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Beihang University;Rensselaer Polytechnic Institute;Hong Kong Polytechnic University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.buaa.edu.cn/;https://www.rpi.edu;https://www.polyu.edu.hk", "aff_unique_abbr": "BUAA;RPI;PolyU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "China;United States" }, { "title": "MambaSCI: Efficient Mamba-UNet for Quad-Bayer Patterned Video Snapshot Compressive Imaging", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94990", "id": "U4WeoyRHPd", "proceeding": "", "pdf": "https://openreview.net/pdf?id=U4WeoyRHPd", "openreview": "https://openreview.net/forum?id=U4WeoyRHPd", "poster": "", "project": "", "author_site": "Zhenghao Pan, Haijin Zeng, Jiezhang Cao, Yongyong Chen, Kai Zhang, Yong Xu", "tldr": "", "abstract": "Color video snapshot compressive imaging (SCI) employs computational imaging techniques to capture multiple sequential video frames in a single Bayer-patterned measurement. With the increasing popularity of quad-Bayer pattern in mainstream smartphone cameras for capturing high-resolution videos, mobile photography has become more accessible to a wider audience. However, existing color video SCI reconstruction algorithms are designed based on the traditional Bayer pattern. When applied to videos captured by quad-Bayer cameras, these algorithms often result in color distortion and ineffective demosaicing, rendering them impractical for primary equipment. To address this challenge, we propose the MambaSCI method, which leverages the Mamba and UNet architectures for efficient reconstruction of quad-Bayer patterned color video SCI. To the best of our knowledge, our work presents the first algorithm for quad-Bayer patterned SCI reconstruction, and also the initial application of the Mamba model to this task. Specifically, we customize Residual-Mamba-Blocks, which residually connect the Spatial-Temporal Mamba (STMamba), Edge-Detail-Reconstruction (EDR) module, and Channel Attention (CA) module. Respectively, STMamba is used to model long-range spatial-temporal dependencies with linear complexity, EDR is for better edge-detail reconstruction, and CA is used to compensate for the missing channel information interaction in Mamba model. Experiments demonstrate that MambaSCI surpasses state-of-the-art methods with lower computational and memory costs. PyTorch style pseudo-code for the core modules is provided in the supplementary materials. Code is at https://github.com/PAN083/MambaSCI.", "keywords": "Snapshot Compressive Imaging; Compressive Imaging; Deep Learning; Attention; Coded Aperture Compressive Temporal Imaging (CACTI); State Space Models", "primary_area": "machine_vision", "supplementary_material": "/attachment/5497c5a57122f835d37bd551c00f9d42d8905317.zip", "author": "Zhenghao Pan;Haijin Zeng;Jiezhang Cao;Yongyong Chen;Kai Zhang;Yong Xu", "authorids": "~Zhenghao_Pan1;~Haijin_Zeng1;~Jiezhang_Cao2;~Yongyong_Chen1;~Kai_Zhang8;~Yong_Xu9", "gender": "M;M;M;M;M;M", "homepage": "https://github.com/PAN083;https://navyzeng.github.io/;https://cyyhit.github.io/;https://github.com/cszn;https://www.yongxu.org;https://www.jiezhangcao.com/", "dblp": ";261/8056;196/7154;55/957-8;;211/2850", "google_scholar": ";e9f3XqcAAAAJ;https://scholar.google.com.tw/citations?user=ny2mn-cAAAAJ;0RycFIIAAAAJ;https://scholar.google.com.hk/citations?user=zOVgYQYAAAAJ;IFYbb7oAAAAJ", "orcid": ";0000-0003-0398-3316;0000-0003-1970-1993;0000-0002-6319-3722;;", "linkedin": ";;;;;", "or_profile": "~Zhenghao_Pan1;~Haijin_Zeng1;~Yongyong_Chen1;~Kai_Zhang8;~Yong_Xu9;~Jiezhang_Cao1", "aff": "Harbin Institute of Technology (Shenzhen);IMEC & Universiteit Gent;Harbin Institute of Technology (Shenzhen);ETH Zurich;Harbin Institute of Technology;ETH Z\u00fcrich", "aff_domain": "hitsz.edu.cn;ugent.be;hit.edu.cn;vision.ee.ethz.ch;hit.edu.cn;ethz.ch", "position": "MS student;PhD student;Associate Professor;Postdoc;Full Professor;PhD student", "bibtex": "@inproceedings{\npan2024mambasci,\ntitle={Mamba{SCI}: Efficient Mamba-{UN}et for Quad-Bayer Patterned Video Snapshot Compressive Imaging},\nauthor={Zhenghao Pan and Haijin Zeng and Jiezhang Cao and Yongyong Chen and Kai Zhang and Yong Xu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=U4WeoyRHPd}\n}", "github": "", "reviewers": "zshs;xnVL;M9Ao;yWFS", "pdf_size": 8523609, "rating": "3;5;6;8", "confidence": "5;4;5;5", "soundness": "2;3;3;4", "novelty": "1;3;3;4", "presentation": "3;3;3;4", "wc_summary": "68;45;15;34", "wc_strengths": "66;44;13;123", "wc_weaknesses": "164;108;51;108", "wc_questions": "4;20;26;152", "wc_limitations": "4;25;5;14", "wc_review": "306;242;110;431", "wc_reply_reviewers": "376;110;27;0", "wc_reply_authors": "829;356;36;0", "reply_reviewers": "3;2;1;0", "reply_authors": "4;2;2;1", "rating_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 1.0897247358851685 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 40.5, 19.1637678967368 ], "wc_strengths_avg": [ 61.5, 40.190172928217166 ], "wc_weaknesses_avg": [ 107.75, 39.95231532714969 ], "wc_questions_avg": [ 50.5, 59.15023245939106 ], "wc_limitations_avg": [ 12.0, 8.455767262643882 ], "wc_review_avg": [ 272.25, 115.73758032722128 ], "wc_reply_reviewers_avg": [ 128.25, 148.67140780930274 ], "wc_reply_authors_avg": [ 305.25, 332.6269494493794 ], "reply_reviewers_avg": [ 1.5, 1.118033988749895 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.16012815380508713, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16041733008708221745&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "hitsz.edu.cn;ugent.be;hit.edu.cn;vision.ee.ethz.ch;hit.edu.cn;ethz.ch", "author_num": 6, "aff_unique_index": "0;1;0;2;0;2", "aff_unique_norm": "Harbin Institute of Technology;IMEC;ETH Zurich", "aff_unique_dep": ";;", "aff_unique_url": "http://en.hhit.edu.cn/;https://www.imec-int.com;https://www.ethz.ch", "aff_unique_abbr": "HIT;IMEC;ETHZ", "aff_campus_unique_index": "0;0;2", "aff_campus_unique": "Shenzhen;;Harbin", "aff_country_unique_index": "0;1;0;2;0;2", "aff_country_unique": "China;Belgium;Switzerland" }, { "title": "An eye for an ear: zero-shot audio description leveraging an image captioner with audio-visual token distribution matching", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94989", "id": "U6oQEzSp8z", "proceeding": "", "pdf": "https://openreview.net/pdf?id=U6oQEzSp8z", "openreview": "https://openreview.net/forum?id=U6oQEzSp8z", "poster": "/media/PosterPDFs/NeurIPS%202024/94989.png?t=1730370199.6855204", "project": "", "author_site": "Hugo Malard, Michel Olvera, St\u00e9phane Lathuili\u00e8re, Slim Essid", "tldr": "", "abstract": "Multimodal large language models have fueled progress in image captioning. These models, fine-tuned on vast image datasets, exhibit a deep understanding of semantic concepts.\nIn this work, we show that this ability can be re-purposed for audio captioning, where the joint image-language decoder can be leveraged to describe auditory content associated with image sequences within videos featuring audiovisual content. This can be achieved via multimodal alignment.\nYet, this multimodal alignment task is non-trivial due to the inherent disparity between audible and visible elements in real-world videos. Moreover, multimodal representation learning often relies on contrastive learning, facing the challenge of the so-called modality gap which hinders smooth integration between modalities. In this work, we introduce a novel methodology for bridging the audiovisual modality gap by matching the distributions of tokens produced by an audio backbone and those of an image captioner. Our approach aligns the audio token distribution with that of the image tokens, enabling the model to perform zero-shot audio captioning in an unsupervised fashion. This alignment allows for the use of either audio or audiovisual input by combining or substituting the image encoder with the aligned audio encoder. Our method achieves significantly improved performances in zero-shot audio captioning, compared to existing approaches.", "keywords": "Multimodal representation learning;Audio Captioning;Image Captioning;Audio-Visual;Large Language Model", "primary_area": "speech_and_audio", "supplementary_material": "", "author": "Hugo Malard;Michel Olvera;St\u00e9phane Lathuili\u00e8re;Slim Essid", "authorids": "~Hugo_Malard1;~Michel_Olvera1;~St\u00e9phane_Lathuili\u00e8re1;~Slim_Essid1", "gender": "M;;M;Not Specified", "homepage": ";;http://stelat.eu;https://perso.telecom-paris.fr/essid/", "dblp": "358/2828.html;;151/8604;53/6904", "google_scholar": "yltsx8cAAAAJ;;https://scholar.google.fr/citations?user=xllguWMAAAAJ;5dP_Pv0AAAAJ", "orcid": ";;;", "linkedin": "hugo-malard-5b4938172/;;;", "or_profile": "~Hugo_Malard1;~Michel_Olvera1;~St\u00e9phane_Lathuili\u00e8re1;~Slim_Essid1", "aff": "T\u00e9l\u00e9com ParisTech;;T\u00e9l\u00e9com ParisTech;T\u00e9l\u00e9com ParisTech", "aff_domain": "telecom-paristech.fr;;telecom-paristech.fr;telecom-paristech.fr", "position": "PhD student;;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nmalard2024an,\ntitle={An eye for an ear: zero-shot audio description leveraging an image captioner with audio-visual token distribution matching},\nauthor={Hugo Malard and Michel Olvera and St{\\'e}phane Lathuili{\\`e}re and Slim Essid},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=U6oQEzSp8z}\n}", "github": "", "reviewers": "aH67;rTvm;7jnN;mzTo", "pdf_size": 3963495, "rating": "3;6;6;6", "confidence": "4;4;3;3", "soundness": "2;3;3;3", "novelty": "2;3;2;3", "presentation": "1;3;2;2", "wc_summary": "57;214;80;208", "wc_strengths": "26;156;47;56", "wc_weaknesses": "377;410;152;222", "wc_questions": "2;116;2;22", "wc_limitations": "13;21;1;1", "wc_review": "475;917;282;509", "wc_reply_reviewers": "474;193;42;0", "wc_reply_authors": "763;79;6;51", "reply_reviewers": "2;2;1;0", "reply_authors": "3;2;2;2", "rating_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 139.75, 71.74390217990656 ], "wc_strengths_avg": [ 71.25, 50.12671443452084 ], "wc_weaknesses_avg": [ 290.25, 106.81379826595439 ], "wc_questions_avg": [ 35.5, 47.188451977152205 ], "wc_limitations_avg": [ 9.0, 8.48528137423857 ], "wc_review_avg": [ 545.75, 231.16376770592748 ], "wc_reply_reviewers_avg": [ 177.25, 185.75437410731408 ], "wc_reply_authors_avg": [ 224.75, 311.8480198750667 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1821870647553723730&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 2, "email": "telecom-paristech.fr;;telecom-paristech.fr;telecom-paristech.fr", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "T\u00e9l\u00e9com ParisTech", "aff_unique_dep": "", "aff_unique_url": "https://www.telecom-paristech.fr", "aff_unique_abbr": "TP", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "France" }, { "title": "Meta-DT: Offline Meta-RL as Conditional Sequence Modeling with World Model Disentanglement", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94988", "id": "U9MzoDOKZu", "proceeding": "", "pdf": "https://openreview.net/pdf?id=U9MzoDOKZu", "openreview": "https://openreview.net/forum?id=U9MzoDOKZu", "poster": "", "project": "", "author_site": "Zhi Wang, Li Zhang, Wenhao Wu, Yuanheng Zhu, Dongbin Zhao, Chunlin Chen", "tldr": "", "abstract": "A longstanding goal of artificial general intelligence is highly capable generalists that can learn from diverse experiences and generalize to unseen tasks. The language and vision communities have seen remarkable progress toward this trend by scaling up transformer-based models trained on massive datasets, while reinforcement learning (RL) agents still suffer from poor generalization capacity under such paradigms. To tackle this challenge, we propose Meta Decision Transformer (Meta-DT), which leverages the sequential modeling ability of the transformer architecture and robust task representation learning via world model disentanglement to achieve efficient generalization in offline meta-RL. We pretrain a context-aware world model to learn a compact task representation, and inject it as a contextual condition to the causal transformer to guide task-oriented sequence generation. Then, we subtly utilize history trajectories generated by the meta-policy as a self-guided prompt to exploit the architectural inductive bias. We select the trajectory segment that yields the largest prediction error on the pretrained world model to construct the prompt, aiming to encode task-specific information complementary to the world model maximally. Notably, the proposed framework eliminates the requirement of any expert demonstration or domain knowledge at test time. Experimental results on MuJoCo and Meta-World benchmarks across various dataset types show that Meta-DT exhibits superior few and zero-shot generalization capacity compared to strong baselines while being more practical with fewer prerequisites. Our code is available at https://github.com/NJU-RL/Meta-DT.", "keywords": "decision transformer;offline meta reinforcement learning;world model", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Zhi Wang;Li Zhang;Wenhao Wu;Yuanheng Zhu;Dongbin Zhao;Chunlin Chen", "authorids": "~Zhi_Wang7;~Li_Zhang43;~Wenhao_Wu13;~Yuanheng_Zhu1;~Dongbin_Zhao1;~Chunlin_Chen1", "gender": ";;;;M;M", "homepage": ";https://github.com/soulmujoco;;;http://people.ucas.ac.cn/~zhaodongbin?language=en;https://sme.nju.edu.cn/ccl/list.htm", "dblp": ";;;;40/255;68/6992.html", "google_scholar": ";;;;;", "orcid": ";;;;0000-0001-8218-9633;", "linkedin": ";;;;;", "or_profile": "~Zhi_Wang7;~Li_Zhang43;~Wenhao_Wu13;~Yuanheng_Zhu1;~Dongbin_Zhao1;~Chunlin_Chen1", "aff": ";Nanjing University;;;Institute of Automation, Chinese Academy of Sciences;Nanjing University", "aff_domain": ";nju.edu;;;ia.ac.cn;nju.edu.cn", "position": ";MS student;;;Full Professor;Full Professor", "bibtex": "@inproceedings{\nwang2024metadt,\ntitle={Meta-{DT}: Offline Meta-{RL} as Conditional Sequence Modeling with World Model Disentanglement},\nauthor={Zhi Wang and Li Zhang and Wenhao Wu and Yuanheng Zhu and Dongbin Zhao and Chunlin Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=U9MzoDOKZu}\n}", "github": "", "reviewers": "EjPB;LRM6;iZYy;BWEr;sbh9", "pdf_size": 27972210, "rating": "5;6;6;7;7", "confidence": "4;3;3;3;3", "soundness": "3;3;3;3;3", "novelty": "2;3;3;3;3", "presentation": "2;3;3;3;3", "wc_summary": "31;84;72;76;51", "wc_strengths": "16;90;39;105;55", "wc_weaknesses": "82;111;48;68;115", "wc_questions": "54;7;32;5;73", "wc_limitations": "34;6;4;1;8", "wc_review": "217;298;195;255;302", "wc_reply_reviewers": "20;33;13;16;0", "wc_reply_authors": "383;37;83;35;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "3;2;3;2;1", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 62.8, 19.281078807992046 ], "wc_strengths_avg": [ 61.0, 32.62514367784455 ], "wc_weaknesses_avg": [ 84.8, 25.4668411861385 ], "wc_questions_avg": [ 34.2, 26.43785165250762 ], "wc_limitations_avg": [ 10.6, 11.926441212700457 ], "wc_review_avg": [ 253.4, 42.63613490925274 ], "wc_reply_reviewers_avg": [ 16.4, 10.669582934679312 ], "wc_reply_authors_avg": [ 107.6, 140.2021397839562 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 2.2, 0.7483314773547882 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.8017837257372733, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2946731946027431160&as_sdt=4005&sciodt=0,6&hl=en", "gs_version_total": 4, "email": ";nju.edu;;;ia.ac.cn;nju.edu.cn", "author_num": 6, "aff_unique_index": "0;1;0", "aff_unique_norm": "Nanjing University;Chinese Academy of Sciences", "aff_unique_dep": ";Institute of Automation", "aff_unique_url": "https://www.nju.edu.cn;http://www.ia.cas.cn", "aff_unique_abbr": "Nanjing U;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Optimal Rates for Vector-Valued Spectral Regularization Learning Algorithms", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94987", "id": "U9e1d2xOc8", "proceeding": "", "pdf": "https://openreview.net/pdf?id=U9e1d2xOc8", "openreview": "https://openreview.net/forum?id=U9e1d2xOc8", "poster": "", "project": "", "author_site": "Dimitri Meunier, Zikai Shen, Mattes Mollenhauer, Arthur Gretton, Zhu Li", "tldr": "", "abstract": "We study theoretical properties of a broad class of regularized algorithms with vector-valued output. These spectral algorithms include kernel ridge regression, kernel principal component regression and various implementations of gradient descent. Our contributions are twofold. First, we rigorously confirm the so-called saturation effect for ridge regression with vector-valued output by deriving a novel lower bound on learning rates; this bound is shown to be suboptimal when the smoothness of the regression function exceeds a certain level.\nSecond, we present an upper bound on the finite sample risk for general vector-valued spectral algorithms, applicable to both well-specified and misspecified scenarios (where the true regression function lies outside of the hypothesis space), and show that this bound is minimax optimal in various regimes. All of our results explicitly allow the case of infinite-dimensional output variables, proving consistency of recent practical applications.", "keywords": "Kernel methods;nonparametric regression;spectral algorithms", "primary_area": "learning_theory", "supplementary_material": "", "author": "Dimitri Meunier;Zikai Shen;Mattes Mollenhauer;Arthur Gretton;Zhu Li", "authorids": "~Dimitri_Meunier1;~Zikai_Shen1;~Mattes_Mollenhauer1;~Arthur_Gretton1;~Zhu_Li3", "gender": "Not Specified;M;Not Specified;M;M", "homepage": "https://dimitri-meunier.github.io;https://cosmic-tarantula.github.io/;;http://www.gatsby.ucl.ac.uk/~gretton/;https://zhuli-michael.github.io/", "dblp": "284/9524;;;56/2574;", "google_scholar": "_04YU0EAAAAJ;;nxIcGXwAAAAJ;OUv7J6QAAAAJ;Svq4CgwAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Dimitri_Meunier1;~Zikai_Shen1;~Mattes_Mollenhauer1;~Arthur_Gretton1;~Zhu_Li3", "aff": "University College London, University of London;University College London, University of London;Freie Universit\u00e4t Berlin;University College London;University College London", "aff_domain": "ucl.ac.uk;ucl.ac.uk;fu-berlin.de;ucl.ac.uk;ucl.ac.uk", "position": "PhD student;MS student;Postdoc;Professor;Postdoc", "bibtex": "@inproceedings{\nmeunier2024optimal,\ntitle={Optimal Rates for Vector-Valued Spectral Regularization Learning Algorithms},\nauthor={Dimitri Meunier and Zikai Shen and Mattes Mollenhauer and Arthur Gretton and Zhu Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=U9e1d2xOc8}\n}", "github": "", "reviewers": "bQuY;R42A;U1CH;f39z;48mf", "pdf_size": 623595, "rating": "6;6;6;6;7", "confidence": "4;3;5;3;3", "soundness": "3;3;3;3;3", "novelty": "2;2;2;3;3", "presentation": "3;3;3;3;3", "wc_summary": "37;124;128;86;71", "wc_strengths": "77;72;179;17;107", "wc_weaknesses": "181;136;105;11;57", "wc_questions": "170;35;58;139;181", "wc_limitations": "39;1;1;7;1", "wc_review": "504;368;471;260;417", "wc_reply_reviewers": "210;19;0;137;12", "wc_reply_authors": "0;0;0;665;0", "reply_reviewers": "1;1;0;1;1", "reply_authors": "1;1;1;3;1", "rating_avg": [ 6.2, 0.39999999999999997 ], "confidence_avg": [ 3.6, 0.8 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 89.2, 34.00823429700519 ], "wc_strengths_avg": [ 90.4, 52.973955865123 ], "wc_weaknesses_avg": [ 98.0, 59.383499391666035 ], "wc_questions_avg": [ 116.6, 59.31812539182269 ], "wc_limitations_avg": [ 9.8, 14.783774890061064 ], "wc_review_avg": [ 404.0, 85.66212698736823 ], "wc_reply_reviewers_avg": [ 75.6, 83.42325814783308 ], "wc_reply_authors_avg": [ 133.0, 266.0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.4, 0.8 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.37500000000000006, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1614726840800401459&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "ucl.ac.uk;ucl.ac.uk;fu-berlin.de;ucl.ac.uk;ucl.ac.uk", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "University College London;Freie Universit\u00e4t Berlin", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucl.ac.uk;https://www.fu-berlin.de", "aff_unique_abbr": "UCL;FU Berlin", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "United Kingdom;Germany" }, { "title": "MagR: Weight Magnitude Reduction for Enhancing Post-Training Quantization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94986", "id": "UARTFgkTqW", "proceeding": "", "pdf": "https://openreview.net/pdf?id=UARTFgkTqW", "openreview": "https://openreview.net/forum?id=UARTFgkTqW", "poster": "/media/PosterPDFs/NeurIPS%202024/94986.png?t=1732680415.7648926", "project": "", "author_site": "Aozhong Zhang, Naigang Wang, Yanxia Deng, Xin Li, Zi Yang, Penghang Yin", "tldr": "", "abstract": "In this paper, we present a simple optimization-based preprocessing technique called Weight Magnitude Reduction (MagR) to improve the performance of post-training quantization. For each linear layer, we adjust the pre-trained floating-point weights by solving an $\\ell_\\infty$-regularized optimization problem. This process greatly diminishes the maximum magnitude of the weights and smooths out outliers, while preserving the layer's output. The preprocessed weights are centered more towards zero, which facilitates the subsequent quantization process. To implement MagR, we address the $\\ell_\\infty$-regularization by employing an efficient proximal gradient descent algorithm. Unlike existing preprocessing methods that involve linear transformations and subsequent post-processing steps, which can introduce significant overhead at inference time, MagR functions as a non-linear transformation, eliminating the need for any additional post-processing. This ensures that MagR introduces no overhead whatsoever during inference. Our experiments demonstrate that MagR achieves state-of-the-art performance on the Llama family of models. For example, we achieve a Wikitext2 perplexity of 6.7 on the LLaMA2-70B model for per-channel INT2 weight quantization without incurring any inference overhead.", "keywords": "post-training quantization;model compression;$\\ell_\\infty$-regularization", "primary_area": "other", "supplementary_material": "", "author": "Aozhong Zhang;Naigang Wang;Yanxia Deng;Xin Li;Zi Yang;Penghang Yin", "authorids": "~Aozhong_Zhang2;~Naigang_Wang1;~Yanxia_Deng2;~Xin_Li2;~Zi_Yang3;~Penghang_Yin1", "gender": ";M;F;M;M;", "homepage": ";;;;https://sites.google.com/view/ziyangjoy;", "dblp": ";78/11176;;09/1365-5;45/3180;", "google_scholar": ";https://scholar.google.com/citations?hl=en;;gMBvzGoAAAAJ;EQE26QsAAAAJ;", "orcid": ";;0000-0003-2795-3680;0000-0003-2067-2763;0000-0003-1048-9869;", "linkedin": ";;;;;", "or_profile": "~Aozhong_Zhang2;~Naigang_Wang1;~Yanxia_Deng2;~Xin_Li2;~Zi_Yang3;~Penghang_Yin1", "aff": ";IBM, International Business Machines;Sun Yat-Sen University;State University of New York at Albany;State University of New York at Albany;", "aff_domain": ";us.ibm.com;mail.sysu.edu.cn;albany.edu;albany.edu;", "position": ";Researcher;Associate Professor;Full Professor;Assistant Professor;", "bibtex": "@inproceedings{\nzhang2024magr,\ntitle={MagR: Weight Magnitude Reduction for Enhancing Post-Training Quantization},\nauthor={Aozhong Zhang and Naigang Wang and Yanxia Deng and Xin Li and Zi Yang and Penghang Yin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=UARTFgkTqW}\n}", "github": "", "reviewers": "AvoN;HDx9;jujD;5GSj", "pdf_size": 546688, "rating": "5;6;7;8", "confidence": "4;4;3;4", "soundness": "2;3;2;3", "novelty": "3;3;3;3", "presentation": "3;3;2;2", "wc_summary": "72;36;67;174", "wc_strengths": "19;14;73;116", "wc_weaknesses": "75;234;234;145", "wc_questions": "91;46;113;292", "wc_limitations": "17;1;40;24", "wc_review": "274;331;527;751", "wc_reply_reviewers": "0;17;338;70", "wc_reply_authors": "0;13;413;17", "reply_reviewers": "0;1;2;1", "reply_authors": "1;2;3;2", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 87.25, 51.94889315471505 ], "wc_strengths_avg": [ 55.5, 41.89570383702844 ], "wc_weaknesses_avg": [ 172.0, 66.75702210254738 ], "wc_questions_avg": [ 135.5, 93.52673414591146 ], "wc_limitations_avg": [ 20.5, 14.0089257261219 ], "wc_review_avg": [ 470.75, 187.0459502368335 ], "wc_reply_reviewers_avg": [ 106.25, 136.26880604158825 ], "wc_reply_authors_avg": [ 110.75, 174.61726002889864 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.2581988897471611, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6248540331406650007&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 5, "email": ";us.ibm.com;mail.sysu.edu.cn;albany.edu;albany.edu;", "author_num": 6, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "International Business Machines;Sun Yat-sen University;State University of New York", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ibm.com;http://www.sysu.edu.cn/;https://www.albany.edu", "aff_unique_abbr": "IBM;SYSU;SUNY Albany", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Albany", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;China" }, { "title": "Federated Graph Learning for Cross-Domain Recommendation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94985", "id": "UBpPOqrBKE", "proceeding": "", "pdf": "https://openreview.net/pdf?id=UBpPOqrBKE", "openreview": "https://openreview.net/forum?id=UBpPOqrBKE", "poster": "/media/PosterPDFs/NeurIPS%202024/94985.png?t=1730880814.8625317", "project": "", "author_site": "Ziqi Yang, Zhaopeng Peng, Zihui Wang, Jianzhong Qi, Chaochao Chen, Weike Pan, Chenglu Wen, Cheng Wang, Xiaoliang Fan", "tldr": "", "abstract": "Cross-domain recommendation (CDR) offers a promising solution to the data sparsity problem by enabling knowledge transfer across source and target domains. However, many recent CDR models overlook crucial issues such as privacy as well as the risk of negative transfer (which negatively impact model performance), especially in multi-domain settings. To address these challenges, we propose FedGCDR, a novel federated graph learning framework that securely and effectively leverages positive knowledge from multiple source domains. First, we design a positive knowledge transfer module that ensures privacy during inter-domain knowledge transmission. This module employs differential privacy-based knowledge extraction combined with a feature mapping mechanism, transforming source domain embeddings from federated graph attention networks into reliable domain knowledge. Second, we design a knowledge activation module to filter out potential harmful or conflicting knowledge from source domains, addressing the issues of negative transfer. This module enhances target domain training by expanding the graph of the target domain to generate reliable domain attentions and fine-tunes the target model for improved negative knowledge filtering and more accurate predictions. We conduct extensive experiments on 16 popular domains of the Amazon dataset, demonstrating that FedGCDR significantly outperforms state-of-the-art methods.", "keywords": "federated learning; cross-domain recommendation; negative transfer; GNN", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Ziqi Yang;Zhaopeng Peng;Zihui Wang;Jianzhong Qi;Chaochao Chen;Weike Pan;Chenglu Wen;Cheng Wang;Xiaoliang Fan", "authorids": "~Ziqi_Yang5;~Zhaopeng_Peng1;~Zihui_Wang2;~Jianzhong_Qi1;~Chaochao_Chen3;~Weike_Pan1;~Chenglu_Wen1;~Cheng_Wang2;~Xiaoliang_Fan1", "gender": "M;M;M;M;;;;M;M", "homepage": "https://github.com/LafinHana/LafinHana.github.io;http://github.com/pzp-dzd;https://scholar.google.com/citations?view_op=list_works&hl=zh-CN&user=DCJV5e0AAAAJ;https://people.eng.unimelb.edu.au/jianzhongq/;https://sites.google.com/site/ccchomepage/;;;https://chwang.xmu.edu.cn/index_en.htm;", "dblp": ";;;41/1074-1;26/1492-1;;140/4398;54/2062-3;06/209", "google_scholar": ";;https://scholar.google.com/citations?view_op=list_works;https://scholar.google.com.au/citations?user=mxS6eHYAAAAJ;qZTMyzwAAAAJ;;;https://scholar.google.com/citations?hl=en;gR7VT-4AAAAJ", "orcid": "0009-0006-0617-1232;;;0000-0001-6501-9050;0000-0003-1419-964X;;;0000-0001-6075-796X;", "linkedin": ";;;;ccchomepage/;;;;", "or_profile": "~Ziqi_Yang5;~Zhaopeng_Peng1;~Zihui_Wang2;~Jianzhong_Qi1;~Chaochao_Chen3;~Weike_Pan1;~Chenglu_Wen1;~Cheng_Wang2;~Xiaoliang_Fan1", "aff": "Xiamen University;Xiamen University;Xiamen University;University of Melbourne;Zhejiang University;;Xiamen University;Xiamen University;Xiamen University", "aff_domain": "xmu.edu.cn;stu.xmu.edu.cn;xmu.edu.cn;unimelb.edu.au;zju.edu.cn;;xmu.edu.cn;xmu.edu.cn;xmu.edu.cn", "position": "MS student;MS student;PhD student;Associate Professor;Distinguished Research Fellow;;Full Professor;Full Professor;Researcher", "bibtex": "@inproceedings{\nyang2024federated,\ntitle={Federated Graph Learning for Cross-Domain Recommendation},\nauthor={Ziqi Yang and Zhaopeng Peng and Zihui Wang and Jianzhong Qi and Chaochao Chen and Weike Pan and Chenglu Wen and Cheng Wang and Xiaoliang Fan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=UBpPOqrBKE}\n}", "github": "", "reviewers": "GMwN;yU7k;Dn4X;xoiK", "pdf_size": 870815, "rating": "6;7;7;8", "confidence": "3;5;5;5", "soundness": "3;4;3;3", "novelty": "3;4;3;3", "presentation": "3;4;3;3", "wc_summary": "88;61;54;62", "wc_strengths": "62;162;140;337", "wc_weaknesses": "80;129;117;145", "wc_questions": "159;36;25;101", "wc_limitations": "7;4;6;6", "wc_review": "396;392;342;651", "wc_reply_reviewers": "74;0;0;10", "wc_reply_authors": "40;0;0;0", "reply_reviewers": "1;0;0;1", "reply_authors": "3;1;1;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 4.5, 0.8660254037844386 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 66.25, 12.93010054098575 ], "wc_strengths_avg": [ 175.25, 100.50715148684695 ], "wc_weaknesses_avg": [ 117.75, 23.951774464536026 ], "wc_questions_avg": [ 80.25, 53.9507877606991 ], "wc_limitations_avg": [ 5.75, 1.0897247358851685 ], "wc_review_avg": [ 445.25, 120.68010399398901 ], "wc_reply_reviewers_avg": [ 21.0, 30.870698080866262 ], "wc_reply_authors_avg": [ 10.0, 17.320508075688775 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15913759367480129764&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "xmu.edu.cn;stu.xmu.edu.cn;xmu.edu.cn;unimelb.edu.au;zju.edu.cn;;xmu.edu.cn;xmu.edu.cn;xmu.edu.cn", "author_num": 9, "aff_unique_index": "0;0;0;1;2;0;0;0", "aff_unique_norm": "Xiamen University;University of Melbourne;Zhejiang University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.xmu.edu.cn;https://www.unimelb.edu.au;https://www.zju.edu.cn", "aff_unique_abbr": "XMU;UniMelb;ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;0;0;0", "aff_country_unique": "China;Australia" }, { "title": "3D Gaussian Splatting as Markov Chain Monte Carlo", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94984", "id": "UCSt4gk6iX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=UCSt4gk6iX", "openreview": "https://openreview.net/forum?id=UCSt4gk6iX", "poster": "", "project": "", "author_site": "Shakiba Kheradmand, Daniel Rebain, Gopal Sharma, Weiwei Sun, Yang-Che Tseng, Hossam Isack, Abhishek Kar, Andrea Tagliasacchi, Kwang Moo Yi", "tldr": "", "abstract": "While 3D Gaussian Splatting has recently become popular for neural rendering, current methods rely on carefully engineered cloning and splitting strategies for placing Gaussians, which does not always generalize and may lead to poor-quality renderings. For many real-world scenes this leads to their heavy dependence on good initializations. In this work, we rethink the set of 3D Gaussians as a random sample drawn from an underlying probability distribution describing the physical representation of the scene\u2014in other words, Markov Chain Monte Carlo (MCMC) samples. Under this view, we show that the 3D Gaussian updates can be converted as Stochastic Gradient Langevin Dynamics (SGLD) update by simply introducing noise. We then rewrite the densification and pruning strategies in 3D Gaussian Splatting as simply a deterministic state transition of MCMC samples, removing these heuristics from the framework. To do so, we revise the \u2018cloning\u2019 of Gaussians into a relocalization scheme that approximately preserves sample probability. To encourage efficient use of Gaussians, we introduce an L1-regularizer on the Gaussians. On various standard evaluation scenes, we show that our method provides improved rendering quality, easy control over the number of Gaussians, and robustness to initialization. The project website is available at https://3dgs-mcmc.github.io/.", "keywords": "Novel View Synthesis;3D Gaussian Splatting", "primary_area": "machine_vision", "supplementary_material": "", "author": "Shakiba Kheradmand;Daniel Rebain;Gopal Sharma;Weiwei Sun;Yang-Che Tseng;Hossam Isack;Abhishek Kar;Andrea Tagliasacchi;Kwang Moo Yi", "authorids": "~Shakiba_Kheradmand1;~Daniel_Rebain1;~Gopal_Sharma1;~Weiwei_Sun4;~Yang-Che_Tseng1;~Hossam_Isack1;~Abhishek_Kar1;~Andrea_Tagliasacchi2;~Kwang_Moo_Yi1", "gender": ";;M;M;M;;M;M;", "homepage": ";;https://hippogriff.github.io/;http://wsunid.github.io/;;;https://abhishekkar.info;http://taiya.github.io;", "dblp": ";;190/8365;63/6566-6;;;46/11300;46/5514;", "google_scholar": ";https://scholar.google.ca/citations?user=h-qFKrQAAAAJ;Oe3bmrQAAAAJ;https://scholar.google.ca/citations?user=XXC5tSEAAAAJ;;;TIpmrtoAAAAJ;1RmD-YsAAAAJ;", "orcid": ";;;;;;;;", "linkedin": ";;;weiwei-sun-5705b013b/;jeff-tseng-6a9502224/;;abhishekkar/;;", "or_profile": "~Shakiba_Kheradmand1;~Daniel_Rebain1;~Gopal_Sharma1;~Weiwei_Sun4;~Yang-Che_Tseng1;~Hossam_Isack1;~Abhishek_Kar1;~Andrea_Tagliasacchi2;~Kwang_Moo_Yi1", "aff": ";University of British Columbia;University of British Columbia;University of British Columbia;University of British Columbia;;Google;Google DeepMind;", "aff_domain": ";cs.ubc.ca;ubc.ca;cs.ubc.ca;cs.ubc.ca;;google.com;google.com;", "position": ";PhD student;Postdoc;PhD student;MS student;;Researcher;Researcher;", "bibtex": "@inproceedings{\nkheradmand2024d,\ntitle={3D Gaussian Splatting as Markov Chain Monte Carlo},\nauthor={Shakiba Kheradmand and Daniel Rebain and Gopal Sharma and Weiwei Sun and Yang-Che Tseng and Hossam Isack and Abhishek Kar and Andrea Tagliasacchi and Kwang Moo Yi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=UCSt4gk6iX}\n}", "github": "", "reviewers": "cJQR;Cpa7;m7Kb;jFyG", "pdf_size": 16551863, "rating": "5;5;7;8", "confidence": "4;4;2;5", "soundness": "3;3;3;4", "novelty": "3;2;3;4", "presentation": "3;3;3;4", "wc_summary": "54;130;134;123", "wc_strengths": "69;140;12;257", "wc_weaknesses": "39;280;1;164", "wc_questions": "73;35;1;96", "wc_limitations": "23;9;1;41", "wc_review": "258;594;149;681", "wc_reply_reviewers": "28;107;14;89", "wc_reply_authors": "0;0;0;40", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;2", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 110.25, 32.713720363174836 ], "wc_strengths_avg": [ 119.5, 91.42346525919919 ], "wc_weaknesses_avg": [ 121.0, 109.83396560263132 ], "wc_questions_avg": [ 51.25, 36.27929850479472 ], "wc_limitations_avg": [ 18.5, 15.190457530963313 ], "wc_review_avg": [ 420.5, 222.53145845026046 ], "wc_reply_reviewers_avg": [ 59.5, 39.33509883043387 ], "wc_reply_authors_avg": [ 10.0, 17.320508075688775 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.044151078568834795, "gs_citation": 48, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15479746007837487850&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": ";cs.ubc.ca;ubc.ca;cs.ubc.ca;cs.ubc.ca;;google.com;google.com;", "author_num": 9, "aff_unique_index": "0;0;0;0;1;1", "aff_unique_norm": "University of British Columbia;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.ubc.ca;https://www.google.com", "aff_unique_abbr": "UBC;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;1;2", "aff_country_unique": "Canada;United States;United Kingdom" }, { "title": "TAPVid-3D: A Benchmark for Tracking Any Point in 3D", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97673", "id": "UDC8D6U7dX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=UDC8D6U7dX", "openreview": "https://openreview.net/forum?id=UDC8D6U7dX", "poster": "/media/PosterPDFs/NeurIPS%202024/97673.png?t=1732583167.4965754", "project": "", "author_site": "Skanda Koppula, Ignacio Rocco, Yi Yang, joseph heyward, Joao Carreira, Andrew Zisserman, Gabriel Brostow, Carl Doersch", "tldr": "", "abstract": "We introduce a new benchmark, TAPVid-3D, for evaluating the task of long-range Tracking Any Point in 3D (TAP-3D). While point tracking in two dimensions (TAP-2D) has many benchmarks measuring performance on real-world videos, such as TAPVid-DAVIS, three-dimensional point tracking has none. To this end, leveraging existing footage, we build a new benchmark for 3D point tracking featuring 4,000+ real-world videos, composed of three different data sources spanning a variety of object types, motion patterns, and indoor and outdoor environments. To measure performance on the TAP-3D task, we formulate a collection of metrics that extend the Jaccard-based metric used in TAP-2D to handle the complexities of ambiguous depth scales across models, occlusions, and multi-track spatio-temporal smoothness. We manually verify a large sample of trajectories to ensure correct video annotations, and assess the current state of the TAP-3D task by constructing competitive baselines using existing tracking models. We anticipate this benchmark will serve as a guidepost to improve our ability to understand precise 3D motion and surface deformation from monocular video.", "keywords": "computer vision;point tracking;3D understanding;motion analysis;video understanding", "primary_area": "", "supplementary_material": "/attachment/2b3751612e7da57ddf2410bf466458e168c4dcdb.pdf", "author": "Skanda Koppula;Ignacio Rocco;Yi Yang;Joseph Heyward;Joao Carreira;Andrew Zisserman;Gabriel Brostow;Carl Doersch", "authorids": "~Skanda_Koppula1;~Ignacio_Rocco1;~Yi_Yang10;~Joseph_Heyward2;~Joao_Carreira1;~Andrew_Zisserman1;~Gabriel_Brostow1;~Carl_Doersch1", "gender": ";;M;M;M;;M;M", "homepage": ";;https://yangyi02.github.io/;https://uk.linkedin.com/in/joe-heyward-71623595;;;http://www0.cs.ucl.ac.uk/staff/G.Brostow/;", "dblp": ";;33/4854-7;;61/5621-1;;41/281;12/8654", "google_scholar": ";;-BO7TXUAAAAJ;;https://scholar.google.pt/citations?user=IUZ-7_cAAAAJ;;https://scholar.google.com.tw/citations?user=CZiTv0gAAAAJ;SBTxvCoAAAAJ", "orcid": ";;;;;;0000-0001-8472-3828;", "linkedin": ";;;https://linkedin.com/in/joe-heyward-71623595;jo%C3%A3o-carreira-56238a7/;;gabriel-brostow-b126071/?originalSubdomain=uk;", "or_profile": "~Skanda_Koppula1;~Ignacio_Rocco1;~Yi_Yang10;~Joseph_Heyward2;~Joao_Carreira1;~Andrew_Zisserman1;~Gabriel_Brostow1;~Carl_Doersch1", "aff": ";;Google DeepMind;Google;Google DeepMind;;Niantic;Google DeepMind", "aff_domain": ";;deepmind.com;google.com;google.com;;nianticlabs.com;google.com", "position": ";;Researcher;Researcher;Research Scientist;;Chief Research Scientist;Research Scientist", "bibtex": "@inproceedings{\nkoppula2024tapvidd,\ntitle={{TAPV}id-3D: A Benchmark for Tracking Any Point in 3D},\nauthor={Skanda Koppula and Ignacio Rocco and Yi Yang and Joseph Heyward and Joao Carreira and Andrew Zisserman and Gabriel Brostow and Carl Doersch},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=UDC8D6U7dX}\n}", "github": "", "reviewers": "oGa3;nHmX;yv9g;XvtV", "pdf_size": 6167554, "rating": "6;6;7;8", "confidence": "4;3;3;5", "wc_summary_and_contributions": "66;51;126;145", "wc_strengths": "41;66;80;198", "wc_improvement": "59;92;65;208", "wc_limitations": "17;1;51;6", "wc_correctness": "13;1;15;1", "wc_clarity": "1;1;11;1", "wc_relation_to_prior_work": "1;1;23;1", "wc_documentation": "1;1;22;6", "wc_additional_feedback": "1;1;1;1", "wc_review": "200;215;394;567", "wc_reply_reviewers": "13;0;18;20", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "wc_summary_and_contributions_avg": [ 97.0, 39.43982758583004 ], "wc_strengths_avg": [ 96.25, 60.38366914986204 ], "wc_improvement_avg": [ 106.0, 60.18720794321664 ], "wc_limitations_avg": [ 18.75, 19.49839737004044 ], "wc_correctness_avg": [ 7.5, 6.5383484153110105 ], "wc_clarity_avg": [ 3.5, 4.330127018922194 ], "wc_relation_to_prior_work_avg": [ 6.5, 9.526279441628825 ], "wc_documentation_avg": [ 7.5, 8.616843969807043 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 344.0, 149.67130653535432 ], "wc_reply_reviewers_avg": [ 12.75, 7.790218225441442 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.6363636363636364, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15697423795952632936&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": ";;deepmind.com;google.com;google.com;;nianticlabs.com;google.com", "author_num": 8, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Google;Niantic", "aff_unique_dep": "Google DeepMind;", "aff_unique_url": "https://deepmind.com;https://www.nianticlabs.com", "aff_unique_abbr": "DeepMind;Niantic", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;0;1;0", "aff_country_unique": "United Kingdom;United States" }, { "title": "Exploring the trade-off between deep-learning and explainable models for brain-machine interfaces", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94983", "id": "UDi51I8K1p", "proceeding": "", "pdf": "https://openreview.net/pdf?id=UDi51I8K1p", "openreview": "https://openreview.net/forum?id=UDi51I8K1p", "poster": "", "project": "", "author_site": "Luis Cubillos, Guy Revach, Matthew Mender, Joseph Costello, Hisham Temmar, Aren Hite, Diksha Anoop Kumar Zutshi, Dylan Wallace, Xiaoyong Ni, Madison Kelberman, Matt Willsey, Ruud Van Sloun, Nir Shlezinger, Parag Patil, Anne Draelos, Cynthia Chestek", "tldr": "", "abstract": "People with brain or spinal cord-related paralysis often need to rely on others for basic tasks, limiting their independence. A potential solution is brain-machine interfaces (BMIs), which could allow them to voluntarily control external devices (e.g., robotic arm) by decoding brain activity to movement commands. In the past decade, deep-learning decoders have achieved state-of-the-art results in most BMI applications, ranging from speech production to finger control. However, the 'black-box' nature of deep-learning decoders could lead to unexpected behaviors, resulting in major safety concerns in real-world physical control scenarios. In these applications, explainable but lower-performing decoders, such as the Kalman filter (KF), remain the norm. In this study, we designed a BMI decoder based on KalmanNet, an extension of the KF that augments its operation with recurrent neural networks to compute the Kalman gain. This results in a varying \u201ctrust\u201d that shifts between inputs and dynamics. We used this algorithm to predict finger movements from the brain activity of two monkeys. We compared KalmanNet results offline (pre-recorded data, $n=13$ days) and online (real-time predictions, $n=5$ days) with a simple KF and two recent deep-learning algorithms: tcFNN (non-ReFIT version) and LSTM. KalmanNet achieved comparable or better results than other deep learning models in offline and online modes, relying on the dynamical model for stopping while depending more on neural inputs for initiating movements. We further validated this mechanism by implementing a heteroscedastic KF that used the same strategy, and it also approached state-of-the-art performance while remaining in the explainable domain of standard KFs. However, we also see two downsides to KalmanNet. KalmanNet shares the limited generalization ability of existing deep-learning decoders, and its usage of the KF as an inductive bias limits its performance in the presence of unseen noise distributions. Despite this trade-off, our analysis successfully integrates traditional controls and modern deep-learning approaches to motivate high-performing yet still explainable BMI designs.", "keywords": "brain-machine interfaces;neural decoders;safety;kalman filter;real-time processing", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "/attachment/203c25a384ec61159d6ad44dc3d76593f520f005.zip", "author": "Luis Hernan Cubillos;Guy Revach;Matthew Mender;Joseph T Costello;Hisham Temmar;Aren Hite;Diksha Anoop Kumar Zutshi;Dylan Michael Wallace;Xiaoyong Ni;Madison M. Kelberman;Matt Willsey;Ruud Van Sloun;Nir Shlezinger;Parag Ganapati Patil;Anne Draelos;Cynthia Chestek", "authorids": "~Luis_Hernan_Cubillos1;~Guy_Revach1;~Matthew_Mender1;~Joseph_T_Costello1;~Hisham_Temmar1;~Aren_Hite1;~Diksha_Anoop_Kumar_Zutshi1;~Dylan_Michael_Wallace1;~Xiaoyong_Ni1;~Madison_M._Kelberman1;~Matt_Willsey1;~Ruud_Van_Sloun1;~Nir_Shlezinger1;~Parag_Ganapati_Patil1;~Anne_Draelos1;~Cynthia_Chestek1", "gender": "M;M;M;M;M;Non-Binary;F;M;M;;;F;M;M;;F", "homepage": "https://github.com/lhcubillos;https://www.linkedin.com/in/guy-revach-ph-d-candidate-247a993/;;;https://chestekresearch.engin.umich.edu/;;;https://www.dylanwallace.com;;;https://profiles.stanford.edu/matthew-willsey;https://www.tue.nl/en/research/researchers/ruud-van-sloun;https://sites.google.com/view/nirshl;;http://draelos.github.io;http://chestekresearch.engin.umich.edu/", "dblp": ";;;;;;;;08/10594;;;162/9715.html;;;271/4339;", "google_scholar": "evFVDFUAAAAJ;RKnercsAAAAJ;;BwHH0TYAAAAJ;;;;;vN7VetMAAAAJ;pAgniMoAAAAJ;;gQQJgocAAAAJ;84Z9R2EAAAAJ;;drLbv9gAAAAJ;", "orcid": "0000-0001-9876-6204;;0000-0003-1562-3289;0000-0001-7608-0885;0000-0002-4464-4911;;;0000-0003-2770-3614;0000-0001-6441-8519;0000-0003-1395-6792;;;;0000-0002-2300-6136;;", "linkedin": "lhcubillos;guy-revach-ph-d-candidate-247a993/;;;;aren-hite;dikshazutshi/;;xiaoyong-ni-ab3881138/?locale=en_US;;;;;;;", "or_profile": "~Luis_Hernan_Cubillos1;~Guy_Revach1;~Matthew_Mender1;~Joseph_T_Costello1;~Hisham_Temmar1;~Aren_Hite1;~Diksha_Anoop_Kumar_Zutshi1;~Dylan_Michael_Wallace1;~Xiaoyong_Ni1;~Madison_M._Kelberman1;~Matt_Willsey1;~Ruud_Van_Sloun1;~Nir_Shlezinger1;~Parag_Ganapati_Patil1;~Anne_Draelos1;~Cynthia_Chestek1", "aff": "University of Michigan - Ann Arbor;ETH Z\u00fcrich;University of Michigan - Ann Arbor;University of Michigan - Ann Arbor;University of Michigan - Ann Arbor;University of Michigan - Ann Arbor;University of Michigan - Ann Arbor;University of Michigan - Ann Arbor;ETHZ - ETH Zurich;University of Michigan - Ann Arbor;Stanford University;Eindhoven University of Technology;Ben Gurion University of the Negev;;University of Michigan - Ann Arbor;University of Michigan - Ann Arbor", "aff_domain": "umich.edu;ethz.ch;umich.edu;umich.edu;umich.edu;umich.edu;umich.edu;umich.edu;ethz.ch;umich.edu;stanford.edu;tue.nl;bgu.ac.il;;umich.edu;umich.edu", "position": "PhD student;PhD student;PhD student;PhD student;PhD student;Undergrad student;MS student;PhD student;MS student;PhD student;Instructor;Associate Professor;Assistant Professor;;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\ncubillos2024exploring,\ntitle={Exploring the trade-off between deep-learning and explainable models for brain-machine interfaces},\nauthor={Luis Hernan Cubillos and Guy Revach and Matthew Mender and Joseph T Costello and Hisham Temmar and Aren Hite and Diksha Anoop Kumar Zutshi and Dylan Michael Wallace and Xiaoyong Ni and Madison M. Kelberman and Matt Willsey and Ruud Van Sloun and Nir Shlezinger and Parag Ganapati Patil and Anne Draelos and Cynthia Chestek},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=UDi51I8K1p}\n}", "github": "", "reviewers": "JYj7;xWsH;586k", "pdf_size": 2774416, "rating": "5;6;6", "confidence": "3;3;3", "soundness": "2;3;2", "novelty": "3;3;2", "presentation": "2;3;2", "wc_summary": "188;149;124", "wc_strengths": "146;202;74", "wc_weaknesses": "210;135;171", "wc_questions": "34;126;146", "wc_limitations": "10;9;18", "wc_review": "588;621;533", "wc_reply_reviewers": "31;295;92", "wc_reply_authors": "0;209;0", "reply_reviewers": "1;1;1", "reply_authors": "1;2;1", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 153.66666666666666, 26.335442953471574 ], "wc_strengths_avg": [ 140.66666666666666, 52.39168721170267 ], "wc_weaknesses_avg": [ 172.0, 30.62678566222711 ], "wc_questions_avg": [ 102.0, 48.771576421791686 ], "wc_limitations_avg": [ 12.333333333333334, 4.027681991198191 ], "wc_review_avg": [ 580.6666666666666, 36.298148100909444 ], "wc_reply_reviewers_avg": [ 139.33333333333334, 112.85487239026156 ], "wc_reply_authors_avg": [ 69.66666666666667, 98.52354484532562 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 16, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:FvuaWy-rTn4J:scholar.google.com/&scioq=Exploring+the+trade-off+between+deep-learning+and+explainable+models+for+brain-machine+interfaces&hl=en&as_sdt=0,44", "gs_version_total": 3, "email": "umich.edu;ethz.ch;umich.edu;umich.edu;umich.edu;umich.edu;umich.edu;umich.edu;ethz.ch;umich.edu;stanford.edu;tue.nl;bgu.ac.il;;umich.edu;umich.edu", "author_num": 16, "aff_unique_index": "0;1;0;0;0;0;0;0;1;0;2;3;4;0;0", "aff_unique_norm": "University of Michigan;ETH Zurich;Stanford University;Eindhoven University of Technology;Ben Gurion University of the Negev", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.umich.edu;https://www.ethz.ch;https://www.stanford.edu;https://www.tue.nl;https://www.bgu.ac.il", "aff_unique_abbr": "UM;ETHZ;Stanford;TU/e;BGU", "aff_campus_unique_index": "0;0;0;0;0;0;0;0;2;0;0", "aff_campus_unique": "Ann Arbor;;Stanford", "aff_country_unique_index": "0;1;0;0;0;0;0;0;1;0;0;2;3;0;0", "aff_country_unique": "United States;Switzerland;Netherlands;Israel" }, { "title": "Frequency-aware Generative Models for Multivariate Time Series Imputation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94982", "id": "UE6CeRMnq3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=UE6CeRMnq3", "openreview": "https://openreview.net/forum?id=UE6CeRMnq3", "poster": "/media/PosterPDFs/NeurIPS%202024/94982.png?t=1730048105.5433192", "project": "", "author_site": "XINYU YANG, Yu Sun, Yuan xiaojie, Xinyang Chen", "tldr": "", "abstract": "Missing data in multivariate time series are common issues that can affect the analysis and downstream applications.\nAlthough multivariate time series data generally consist of the trend, seasonal and residual terms, existing works mainly focus on optimizing the modeling for the first two items. However, we find that the residual term is more crucial for getting accurate fillings, since it is more related to the diverse changes of data and the biggest component of imputation errors.\nTherefore, in this study, we introduce frequency-domain information and design Frequency-aware Generative Models for Multivariate Time Series Imputation (FGTI). Specifically, FGTI employs a high-frequency filter to boost the residual term imputation, supplemented by a dominant-frequency filter for the trend and seasonal imputation. Cross-domain representation learning module then fuses frequency-domain insights with deep representations.\nExperiments over various datasets with real-world missing values show that FGTI achieves superiority in both data imputation and downstream applications.", "keywords": "Time series; Time series Imputation; Generative Models; Frequency domain", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Xinyu Yang;Yu Sun;Xiaojie Yuan;Xinyang Chen", "authorids": "~Xinyu_Yang11;~Yu_Sun17;~Xiaojie_Yuan1;~Xinyang_Chen1", "gender": "M;M;;Not Specified", "homepage": ";https://cc.nankai.edu.cn/2021/0323/c13621a550004/page.htm;https://dbis.nankai.edu.cn/2023/0322/c12139a506919/page.htm;https://chenxinyang123.github.io/", "dblp": ";62/3689-27.html;79/2280;242/3871-1", "google_scholar": "Au1f3xwAAAAJ;zB9fzRAAAAAJ;;qVxhGWUAAAAJ", "orcid": ";0009-0007-7398-2972;0000-0002-5876-6856;0000-0001-6743-838X", "linkedin": ";;;", "or_profile": "~Xinyu_Yang11;~Yu_Sun17;~Xiaojie_Yuan1;~Xinyang_Chen1", "aff": "Nankai University;Nankai University;Nankai University;Harbin Institute of Technology, Shenzhen", "aff_domain": "nankai.edu.cn;nankai.edu.cn;nankai.edu.cn;hit.edu.cn", "position": "PhD student;Assistant Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nyang2024frequencyaware,\ntitle={Frequency-aware Generative Models for Multivariate Time Series Imputation},\nauthor={Xinyu Yang and Yu Sun and Xiaojie Yuan and Xinyang Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=UE6CeRMnq3}\n}", "github": "", "reviewers": "SUMs;Q2t8;WWjA;qcDA", "pdf_size": 1606423, "rating": "5;5;5;6", "confidence": "3;3;4;4", "soundness": "3;3;3;4", "novelty": "3;3;3;3", "presentation": "2;3;3;4", "wc_summary": "36;115;57;86", "wc_strengths": "37;36;46;49", "wc_weaknesses": "88;111;236;254", "wc_questions": "66;11;3;83", "wc_limitations": "1;14;47;1", "wc_review": "228;287;389;473", "wc_reply_reviewers": "0;100;0;37", "wc_reply_authors": "0;170;1689;1314", "reply_reviewers": "0;1;0;1", "reply_authors": "1;2;4;4", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 73.5, 29.82029510249689 ], "wc_strengths_avg": [ 42.0, 5.612486080160912 ], "wc_weaknesses_avg": [ 172.25, 73.47916371325955 ], "wc_questions_avg": [ 40.75, 34.39749264117953 ], "wc_limitations_avg": [ 15.75, 18.806581294855267 ], "wc_review_avg": [ 344.25, 94.0355650804524 ], "wc_reply_reviewers_avg": [ 34.25, 40.855691158025955 ], "wc_reply_authors_avg": [ 793.25, 723.05510682105 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16550124766860467334&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "nankai.edu.cn;nankai.edu.cn;nankai.edu.cn;hit.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Nankai University;Harbin Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "http://www.nankai.edu.cn;http://en.hhit.edu.cn/", "aff_unique_abbr": "NKU;HIT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "RaVL: Discovering and Mitigating Spurious Correlations in Fine-Tuned Vision-Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94981", "id": "UFRZHFYW8e", "proceeding": "", "pdf": "https://openreview.net/pdf?id=UFRZHFYW8e", "openreview": "https://openreview.net/forum?id=UFRZHFYW8e", "poster": "/media/PosterPDFs/NeurIPS%202024/94981.png?t=1731392690.013125", "project": "", "author_site": "Maya Varma, Jean-Benoit Delbrouck, Zhihong Chen, Akshay Chaudhari, Curtis Langlotz", "tldr": "", "abstract": "Fine-tuned vision-language models (VLMs) often capture spurious correlations between image features and textual attributes, resulting in degraded zero-shot performance at test time. Existing approaches for addressing spurious correlations (i) primarily operate at the global image-level rather than intervening directly on fine-grained image features and (ii) are predominantly designed for unimodal settings. In this work, we present RaVL, which takes a fine-grained perspective on VLM robustness by discovering and mitigating spurious correlations using local image features rather than operating at the global image level. Given a fine-tuned VLM, RaVL first discovers spurious correlations by leveraging a region-level clustering approach to identify precise image features contributing to zero-shot classification errors. Then, RaVL mitigates the identified spurious correlation with a novel region-aware loss function that enables the VLM to focus on relevant regions and ignore spurious relationships during fine-tuning. We evaluate RaVL on 654 VLMs with various model architectures, data domains, and learned spurious correlations. Our results show that RaVL accurately discovers (191% improvement over the closest baseline) and mitigates (8.2% improvement on worst-group image classification accuracy) spurious correlations. Qualitative evaluations on general-domain and medical-domain VLMs confirm our findings.", "keywords": "vision-language models;robustness;spurious correlations;fine-grained", "primary_area": "evaluation", "supplementary_material": "", "author": "Maya Varma;Jean-Benoit Delbrouck;Zhihong Chen;Akshay S Chaudhari;Curtis Langlotz", "authorids": "~Maya_Varma1;~Jean-Benoit_Delbrouck1;~Zhihong_Chen2;~Akshay_S_Chaudhari1;~Curtis_Langlotz1", "gender": ";;M;;M", "homepage": "https://maya-varma.com/;;;;https://profiles.stanford.edu/curtis-langlotz", "dblp": "233/4077;;78/3726;;12/1751", "google_scholar": ";;y55sF8cAAAAJ;;WQkBYwQAAAAJ", "orcid": ";;;;0000-0002-8972-8051", "linkedin": ";;;;langlotz/", "or_profile": "~Maya_Varma1;~Jean-Benoit_Delbrouck1;~Zhihong_Chen2;~Akshay_S_Chaudhari1;~Curtis_Langlotz1", "aff": "Stanford University;;The Chinese University of Hong Kong, Shenzhen;;Stanford University", "aff_domain": "stanford.edu;;cuhk.edu.cn;;stanford.edu", "position": "PhD student;;PhD student;;Full Professor", "bibtex": "@inproceedings{\nvarma2024ravl,\ntitle={Ra{VL}: Discovering and Mitigating Spurious Correlations in Fine-Tuned Vision-Language Models},\nauthor={Maya Varma and Jean-Benoit Delbrouck and Zhihong Chen and Akshay S Chaudhari and Curtis Langlotz},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=UFRZHFYW8e}\n}", "github": "", "reviewers": "onxK;aCX1;LC5i", "pdf_size": 13295281, "rating": "5;6;6", "confidence": "4;3;4", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "118;106;45", "wc_strengths": "46;64;24", "wc_weaknesses": "105;55;62", "wc_questions": "112;28;25", "wc_limitations": "22;1;9", "wc_review": "403;254;165", "wc_reply_reviewers": "197;10;214", "wc_reply_authors": "1167;0;397", "reply_reviewers": "2;1;3", "reply_authors": "3;1;2", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 89.66666666666667, 31.961782734314987 ], "wc_strengths_avg": [ 44.666666666666664, 16.35712552851373 ], "wc_weaknesses_avg": [ 74.0, 22.105806175452337 ], "wc_questions_avg": [ 55.0, 40.32369030731191 ], "wc_limitations_avg": [ 10.666666666666666, 8.65383665716478 ], "wc_review_avg": [ 274.0, 98.1868966138897 ], "wc_reply_reviewers_avg": [ 140.33333333333334, 92.42053643836718 ], "wc_reply_authors_avg": [ 521.3333333333334, 484.46969862818963 ], "reply_reviewers_avg": [ 2.0, 0.816496580927726 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13006410362667831192&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "stanford.edu;;cuhk.edu.cn;;stanford.edu", "author_num": 5, "aff_unique_index": "0;1;0", "aff_unique_norm": "Stanford University;Chinese University of Hong Kong", "aff_unique_dep": ";", "aff_unique_url": "https://www.stanford.edu;https://www.cuhk.edu.cn", "aff_unique_abbr": "Stanford;CUHK", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Stanford;Shenzhen", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;China" }, { "id": "UGKgoAZuuL", "title": "Bridging Inter-task Gap of Continual Self-supervised Learning with External Data", "track": "main", "status": "Reject", "tldr": "", "abstract": "Recent research on Self-Supervised Learning (SSL) has demonstrated its ability to extract high-quality representations from unlabeled samples. However, in continual learning scenarios where training data arrives sequentially, SSL's performance tends to deteriorate. This study focuses on Continual Contrastive Self-Supervised Learning (CCSSL) and highlights that the absence of contrastive learning on inter-task data, due to the unavailability of historical samples, leads to a significant drop in performance. To tackle this issue, we introduce a simple and effective method called BGE, which Bridges the inter-task Gap of CCSSL using External data from publicly available datasets. BGE enables the contrastive learning of each task data with external data, allowing relationships between them to be passed along the tasks, thereby facilitating implicit inter-task data comparisons. To overcome the limitation of the external data selection and maintain its effectiveness, we further propose the One-Propose-One algorithm to collect more relevant and diverse high-quality samples from the chosen external data while filtering out distractions from the out-of-distribution data. Experiments show that BGE can generate better discriminative representation in CCSSL, especially for inter-task data, and improve classification results with various external data compositions. Additionally, the proposed method can be seamlessly integrated into existing continual learning methods yielding significant performance improvement.", "keywords": "self-supervised learning;continual learning", "primary_area": "online_learning", "supplementary_material": "/attachment/cfdf31f29609bf1c5854e8ae1cb4d3d745a98490.zip", "author": "Haori Lu;Xusheng Cao;Fei Yang;Xialei Liu", "authorids": "~Haori_Lu1;~Xusheng_Cao1;~Fei_Yang4;~Xialei_Liu1", "gender": ";M;M;M", "homepage": ";https://superman22.top;;https://xialeiliu.github.io", "dblp": ";;19/2504-4;184/6616", "google_scholar": ";;S1gksNwAAAAJ;https://scholar.google.es/citations?user=akuWIJQAAAAJ", "orcid": "0000-0003-0167-6646;;;", "linkedin": ";;;", "or_profile": "~Haori_Lu1;~Xusheng_Cao1;~Fei_Yang4;~Xialei_Liu1", "aff": "Nankai University;Nankai University;Nankai University;Nankai University", "aff_domain": "nankai.edu.cn;nankai.edu.cn;nankai.edu.cn;nankai.edu.cn", "position": "MS student;PhD student;Assistant Professor;Associate Professor", "bibtex": "@misc{\nanonymous2024bridging,\ntitle={Bridging Inter-task Gap of Continual Self-supervised Learning with External Data},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=UGKgoAZuuL}\n}", "github": "", "project": "", "reviewers": "xWKh;hAsz;gaeT;oaUg", "site": "https://openreview.net/forum?id=UGKgoAZuuL", "pdf_size": 941008, "rating": "4;5;5;6", "confidence": "5;5;3;5", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;2", "wc_summary": "53;74;90;140", "wc_strengths": "61;41;305;55", "wc_weaknesses": "114;63;854;161", "wc_questions": "30;22;228;495", "wc_limitations": "9;27;323;68", "wc_review": "267;227;1800;919", "wc_reply_reviewers": "72;0;446;114", "wc_reply_authors": "140;0;318;174", "reply_reviewers": "1;0;2;1", "reply_authors": "2;1;3;2", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 4.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 89.25, 32.104322138927024 ], "wc_strengths_avg": [ 115.5, 109.6483014004321 ], "wc_weaknesses_avg": [ 298.0, 322.8722657646519 ], "wc_questions_avg": [ 193.75, 192.50762971892829 ], "wc_limitations_avg": [ 106.75, 126.6695997467427 ], "wc_review_avg": [ 803.25, 637.6787494499091 ], "wc_reply_reviewers_avg": [ 158.0, 171.20163550620654 ], "wc_reply_authors_avg": [ 158.0, 113.07519621915321 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:xNXTiajf58sJ:scholar.google.com/&scioq=Bridging+Inter-task+Gap+of+Continual+Self-supervised+Learning+with+External+Data&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Nankai University", "aff_unique_dep": "", "aff_unique_url": "http://www.nankai.edu.cn", "aff_unique_abbr": "NKU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Distribution-Aware Data Expansion with Diffusion Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94980", "id": "UGUkPYSdg4", "proceeding": "", "pdf": "https://openreview.net/pdf?id=UGUkPYSdg4", "openreview": "https://openreview.net/forum?id=UGUkPYSdg4", "poster": "/media/PosterPDFs/NeurIPS%202024/94980.png?t=1731411387.5390196", "project": "", "author_site": "Haowei Zhu, Ling Yang, Jun-Hai Yong, Hongzhi Yin, Jiawei Jiang, Meng Xiao, Wentao Zhang, Bin Wang", "tldr": "", "abstract": "The scale and quality of a dataset significantly impact the performance of deep models. However, acquiring large-scale annotated datasets is both a costly and time-consuming endeavor. To address this challenge, dataset expansion technologies aim to automatically augment datasets, unlocking the full potential of deep models. Current data expansion techniques include image transformation and image synthesis methods. Transformation-based methods introduce only local variations, leading to limited diversity. In contrast, synthesis-based methods generate entirely new content, greatly enhancing informativeness. However, existing synthesis methods carry the risk of distribution deviations, potentially degrading model performance with out-of-distribution samples. In this paper, we propose DistDiff, a training-free data expansion framework based on the distribution-aware diffusion model. DistDiff constructs hierarchical prototypes to approximate the real data distribution, optimizing latent data points within diffusion models with hierarchical energy guidance. We demonstrate its capability to generate distribution-consistent samples, significantly improving data expansion tasks. DistDiff consistently enhances accuracy across a diverse range of datasets compared to models trained solely on original data. Furthermore, our approach consistently outperforms existing synthesis-based techniques and demonstrates compatibility with widely adopted transformation-based augmentation methods. Additionally, the expanded dataset exhibits robustness across various architectural frameworks.", "keywords": "diffusion models;data expansion", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "haoweiz;Ling Yang;Jun-Hai Yong;Hongzhi Yin;Jiawei Jiang;Meng Xiao;Wentao Zhang;Bin Wang", "authorids": "~haoweiz1;~Ling_Yang1;~Jun-Hai_Yong3;~Hongzhi_Yin2;~Jiawei_Jiang1;~Meng_Xiao1;~Wentao_Zhang1;~Bin_Wang3", "gender": "M;M;M;M;M;M;M;M", "homepage": ";https://yangling0818.github.io/;https://www.thss.tsinghua.edu.cn/person/yongjunhai;https://sites.google.com/view/hongzhi-yin/home;http://bluesjjw.github.io/;https://coco11563.github.io/;https://binwangthss.github.io/;https://zwt233.github.io/", "dblp": ";01/24-6.html;;04/10606.html;185/1521-1;25/6475-1;13/1898-21;41/3249-1.html", "google_scholar": ";https://scholar.google.com.hk/citations?user=sIKujqAAAAAJ;;JJsBmhYAAAAJ;G_Hg-j0AAAAJ;YGwukbUAAAAJ;o7AFnlUAAAAJ;JE4VON0AAAAJ", "orcid": "0009-0000-4120-3312;0000-0003-1905-8053;;0000-0003-1395-261X;0000-0003-0051-0046;0000-0001-5294-5776;0000-0002-5176-9202;0000-0002-7532-5550", "linkedin": ";;;;;;bin-wang-22992524/;", "or_profile": "~haoweiz1;~Ling_Yang1;~Jun-Hai_Yong3;~Hongzhi_Yin2;~Jiawei_Jiang1;~Meng_Xiao1;~Bin_Wang3;~Zhang_wen_tao1", "aff": "Tsinghua University;Peking University;Tsinghua University;University of Queensland;Wuhan University;University of Chinese Academy of Sciences;Tsinghua University;Peking University", "aff_domain": "tsinghua.edu.cn;pku.edu.cn;tsinghua.edu.cn;uq.edu.au;whu.edu.cn;cnic.cn;tsinghua.edu.cn;pku.edu.cn", "position": "PhD student;PhD student;Full Professor;Full Professor;Full Professor;Postdoc;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nhaoweiz2024distributionaware,\ntitle={Distribution-Aware Data Expansion with Diffusion Models},\nauthor={haoweiz and Ling Yang and Jun-Hai Yong and Hongzhi Yin and Jiawei Jiang and Meng Xiao and Wentao Zhang and Bin Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=UGUkPYSdg4}\n}", "github": "", "reviewers": "4Bhc;EsYJ;ghiL", "pdf_size": 23281696, "rating": "4;5;7", "confidence": "3;5;3", "soundness": "2;2;3", "novelty": "2;3;3", "presentation": "3;3;3", "wc_summary": "68;75;117", "wc_strengths": "46;82;64", "wc_weaknesses": "138;139;150", "wc_questions": "150;249;37", "wc_limitations": "3;31;7", "wc_review": "405;576;375", "wc_reply_reviewers": "0;189;116", "wc_reply_authors": "311;282;593", "reply_reviewers": "0;2;2", "reply_authors": "3;3;4", "rating_avg": [ 5.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 86.66666666666667, 21.638443156156644 ], "wc_strengths_avg": [ 64.0, 14.696938456699069 ], "wc_weaknesses_avg": [ 142.33333333333334, 5.436502143433363 ], "wc_questions_avg": [ 145.33333333333334, 86.61152091699785 ], "wc_limitations_avg": [ 13.666666666666666, 12.36482466066094 ], "wc_review_avg": [ 452.0, 88.53247991556545 ], "wc_reply_reviewers_avg": [ 101.66666666666667, 77.82173361100499 ], "wc_reply_authors_avg": [ 395.3333333333333, 140.2719580750986 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.9428090415820634 ], "reply_authors_avg": [ 3.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.18898223650461365, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5532038992782397291&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 2, "email": "tsinghua.edu.cn;pku.edu.cn;tsinghua.edu.cn;uq.edu.au;whu.edu.cn;cnic.cn;tsinghua.edu.cn;pku.edu.cn", "author_num": 8, "aff_unique_index": "0;1;0;2;3;4;0;1", "aff_unique_norm": "Tsinghua University;Peking University;University of Queensland;Wuhan University;University of Chinese Academy of Sciences", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.tsinghua.edu.cn;http://www.pku.edu.cn;https://www.uq.edu.au;http://www.whu.edu.cn/;http://www.ucas.ac.cn", "aff_unique_abbr": "THU;Peking U;UQ;WHU;UCAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;0;0;0", "aff_country_unique": "China;Australia" }, { "title": "LLM-based Skill Diffusion for Zero-shot Policy Adaptation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94979", "id": "UGlDVc0GTU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=UGlDVc0GTU", "openreview": "https://openreview.net/forum?id=UGlDVc0GTU", "poster": "/media/PosterPDFs/NeurIPS%202024/94979.png?t=1730278839.6608691", "project": "", "author_site": "Woo Kyung Kim, Youngseok Lee, Jooyoung Kim, Honguk Woo", "tldr": "", "abstract": "Recent advances in data-driven imitation learning and offline reinforcement learning have highlighted the use of expert data for skill acquisition and the development of hierarchical policies based on these skills. However, these approaches have not significantly advanced in adapting these skills to unseen contexts, which may involve changing environmental conditions or different user requirements. In this paper, we present a novel LLM-based policy adaptation framework LDuS which leverages an LLM to guide the generation process of a skill diffusion model upon contexts specified in language, facilitating zero-shot skill-based policy adaptation to different contexts. To implement the skill diffusion model, we adapt the loss-guided diffusion with a sequential in-painting technique, where target trajectories are conditioned by masking them with past state-action sequences, thereby enabling the robust and controlled generation of skill trajectories in test-time. To have a loss function for a given context, we employ the LLM-based code generation with iterative refinement, by which the code and controlled trajectory are validated to align with the context in a closed-loop manner. Through experiments, we demonstrate the zero-shot adaptability of LDuS to various context types including different specification levels, multi-modality, and varied temporal conditions for several robotic manipulation tasks, outperforming other language-conditioned imitation and planning methods.", "keywords": "Imitation Learning;Planning;Diffusion Model;Large Language Model", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/1576c08c38a326aac821d743260db177ec3a5a60.zip", "author": "Woo Kyung Kim;Youngseok Lee;Jooyoung Kim;Honguk Woo", "authorids": "~Woo_Kyung_Kim1;~Youngseok_Lee2;~Jooyoung_Kim1;~Honguk_Woo1", "gender": "M;M;M;M", "homepage": ";;https://sites.google.com/view/csi-agent-group/;https://sites.google.com/view/csi-agent-group/about", "dblp": "306/0140;;;63/6072", "google_scholar": "OFFacb0AAAAJ;https://scholar.google.com/citations?hl=ko;https://scholar.google.co.kr/citations?hl=ko;https://scholar.google.co.kr/citations?user=Gaxjc7UAAAAJ", "orcid": "0000-0001-6214-4171;;;0000-0001-6948-3440", "linkedin": ";;%EC%A3%BC%EC%98%81-%EA%B9%80-b883b3248/?locale=en_US;", "or_profile": "~Woo_Kyung_Kim1;~Youngseok_Lee2;~Jooyoung_Kim1;~Honguk_Woo1", "aff": "Sungkyunkwan University;Sung Kyun Kwan University;;Sungkyunkwan University", "aff_domain": "skku.edu;skku.edu;;skku.edu", "position": "PhD student;PhD student;;Associate Professor", "bibtex": "@inproceedings{\nkim2024llmbased,\ntitle={{LLM}-based Skill Diffusion for Zero-shot Policy Adaptation},\nauthor={Woo Kyung Kim and Youngseok Lee and Jooyoung Kim and Honguk Woo},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=UGlDVc0GTU}\n}", "github": "", "reviewers": "UqKP;MnsC;261B;UfZc", "pdf_size": 4700509, "rating": "5;5;7;7", "confidence": "4;3;3;5", "soundness": "2;3;3;4", "novelty": "2;3;3;2", "presentation": "3;3;3;4", "wc_summary": "84;102;39;97", "wc_strengths": "42;82;112;74", "wc_weaknesses": "372;44;87;165", "wc_questions": "14;75;36;197", "wc_limitations": "49;10;20;19", "wc_review": "561;313;294;552", "wc_reply_reviewers": "19;51;40;48", "wc_reply_authors": "85;107;46;76", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 80.5, 24.84451649760969 ], "wc_strengths_avg": [ 77.5, 24.914855006601986 ], "wc_weaknesses_avg": [ 167.0, 126.05356004492694 ], "wc_questions_avg": [ 80.5, 70.71951640106145 ], "wc_limitations_avg": [ 24.5, 14.67140075112121 ], "wc_review_avg": [ 430.0, 126.71819127497047 ], "wc_reply_reviewers_avg": [ 39.5, 12.5 ], "wc_reply_authors_avg": [ 78.5, 21.891779278989635 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:GjdHDI8Fp6EJ:scholar.google.com/&scioq=LLM-based+Skill+Diffusion+for+Zero-shot+Policy+Adaptation&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": "skku.edu;skku.edu;;skku.edu", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Sungkyunkwan University", "aff_unique_dep": "", "aff_unique_url": "https://www.skku.edu", "aff_unique_abbr": "SKKU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "id": "UGwdz3kjht", "title": "Prioritize Alignment in Dataset Distillation", "track": "main", "status": "Reject", "tldr": "", "abstract": "Dataset Distillation aims to compress a large dataset into a significantly more compact, synthetic one without compromising the performance of the trained models. To achieve this, existing methods use the agent model to extract information from the target dataset and embed it into the distilled dataset. Consequently, the quality of extracted and embedded information determines the quality of the distilled dataset. In this work, we find that existing methods introduce misaligned information in both information extraction and embedding stages. To alleviate this, we propose Prioritize Alignment in Dataset Distillation (PAD), which aligns information from the following two perspectives. 1) We prune the target dataset according to the compressing ratio to filter the information that can be extracted by the agent model. 2) We use only deep layers of the agent model to perform the distillation to avoid excessively introducing low-level information. This simple strategy effectively filters out misaligned information and brings non-trivial improvement for mainstream matching-based distillation algorithms. Furthermore, built on trajectory matching, PAD achieves remarkable improvements on various benchmarks, achieving state-of-the-art performance. The code and distilled datasets will be made public.", "keywords": "dataset distillation;efficient learning", "primary_area": "machine_vision", "supplementary_material": "", "author": "Zekai Li;Ziyao Guo;Wangbo Zhao;Tianle Zhang;Zhi-Qi Cheng;Samir Khaki;Kaipeng Zhang;Ahmad Sajedi;Kai Wang;Konstantinos N Plataniotis;Yang You", "authorids": "~Zekai_Li2;~Ziyao_Guo1;~Wangbo_Zhao1;~Tianle_Zhang4;~Zhi-Qi_Cheng1;~Samir_Khaki1;~Kaipeng_Zhang1;~Ahmad_Sajedi2;~Kai_Wang8;~Konstantinos_N_Plataniotis1;~Yang_You1", "gender": "M;M;M;Not Specified;M;M;M;M;M;M;M", "homepage": "https://lizekai-richard.github.io/;https://github.com/GzyAftermath;;https://faculty.washington.edu/zhiqics/;http://samirkhaki.com;http://kpzhang93.github.io/;https://ahmadsajedii.github.io/;https://kaiwang960112.github.io/;https://www.comp.nus.edu.sg/~youy/;https://www.comm.utoronto.ca/~kostas/;", "dblp": ";309/6165;;188/1193;304/3390.html;179/2126;290/7612;78/2022-36;33/8167-1.html;p/KonstantinosNPlataniotis;289/6986.html", "google_scholar": "1tZ6zC8AAAAJ;FlZSxJMAAAAJ;;uB2He2UAAAAJ;XUyUZY4AAAAJ;4OqZBmYAAAAJ;;i2II0XIAAAAJ;jF4dPZwAAAAJ;https://scholar.google.ca/citations?hl=en;https://scholar.google.com.hk/citations?user=aocj89kAAAAJ", "orcid": ";;0000-0003-1502-9730;0000-0002-1720-2085;0009-0006-7278-8652;;0009-0000-0618-5198;0000-0002-1154-5175;;0000-0003-3647-5473;", "linkedin": "zekai-li-047350219/;;;zhiqicheng/;samir-khaki-0125861b7/;;ahmad-sajedi-392a30b6/;;yang-you-0b92914b/;;", "or_profile": "~Zekai_Li2;~Ziyao_Guo1;~Tianle_Zhang4;~Zhi-Qi_Cheng1;~Samir_Khaki1;~Kaipeng_Zhang1;~Ahmad_Sajedi2;~Kai_Wang8;~Yang_You1;~Kostantinos_Plataniotis1;~ZHAO_WANGBO1", "aff": "National University of Singapore;Xidian University;University of Electronic Science and Technology of China;Carnegie Mellon University;University of Toronto;Shanghai AI Laboratory;University of Toronto;National University of Singapore;National University of Singapore;Toronto University;National University of Singapore", "aff_domain": "u.nus.edu;xidian.edu.cn;cn.edu;cmu.edu;ece.utoronto.ca;pjlab.org.cn;ece.utoronto.ca;u.nus.edu;nus.edu.sg;utoronto.ca;nus.edu", "position": "Undergrad student;MS student;Undergrad student;Project Scientist & Instructor;Undergrad student;Researcher;PhD student;PhD student;Professor;Full Professor;PhD student", "bibtex": "@misc{\nanonymous2024prioritize,\ntitle={Prioritize Alignment in Dataset Distillation},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=UGwdz3kjht}\n}", "github": "", "project": "", "reviewers": "UeUP;emvM;xUdN", "site": "https://openreview.net/forum?id=UGwdz3kjht", "pdf_size": 8204815, "rating": "5;5;6", "confidence": "3;3;4", "soundness": "3;3;2", "novelty": "2;2;3", "presentation": "3;3;3", "wc_summary": "86;61;41", "wc_strengths": "124;26;39", "wc_weaknesses": "139;111;108", "wc_questions": "168;43;57", "wc_limitations": "10;6;19", "wc_review": "527;247;264", "wc_reply_reviewers": "46;95;32", "wc_reply_authors": "362;658;331", "reply_reviewers": "1;1;1", "reply_authors": "3;3;3", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 62.666666666666664, 18.408935028645434 ], "wc_strengths_avg": [ 63.0, 43.45879274285777 ], "wc_weaknesses_avg": [ 119.33333333333333, 13.960261060914616 ], "wc_questions_avg": [ 89.33333333333333, 55.91859162111371 ], "wc_limitations_avg": [ 11.666666666666666, 5.436502143433364 ], "wc_review_avg": [ 346.0, 128.1743604106011 ], "wc_reply_reviewers_avg": [ 57.666666666666664, 27.010286106510527 ], "wc_reply_authors_avg": [ 450.3333333333333, 147.38686810190686 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.9999999999999997, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12394637011492708992&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;2;3;4;5;4;0;0;4;0", "aff_unique_norm": "National University of Singapore;Xidian University;University of Electronic Science and Technology of China;Carnegie Mellon University;University of Toronto;Shanghai AI Laboratory", "aff_unique_dep": ";;;;;", "aff_unique_url": "https://www.nus.edu.sg;http://www.xidian.edu.cn/;https://www.uestc.edu.cn;https://www.cmu.edu;https://www.utoronto.ca;https://www.shanghai-ai-lab.com", "aff_unique_abbr": "NUS;Xidian;UESTC;CMU;U of T;SAIL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;2;3;1;3;0;0;3;0", "aff_country_unique": "Singapore;China;United States;Canada" }, { "title": "Exocentric-to-Egocentric Video Generation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94978", "id": "UHDCbIrCFL", "proceeding": "", "pdf": "https://openreview.net/pdf?id=UHDCbIrCFL", "openreview": "https://openreview.net/forum?id=UHDCbIrCFL", "poster": "/media/PosterPDFs/NeurIPS%202024/94978.png?t=1733405415.989026", "project": "", "author_site": "Jia-Wei Liu, Weijia Mao, Zhongcong XU, Jussi Keppo, Mike Zheng Shou", "tldr": "", "abstract": "We introduce Exo2Ego-V, a novel exocentric-to-egocentric diffusion-based video generation method for daily-life skilled human activities where sparse 4-view exocentric viewpoints are configured 360\u00b0 around the scene. This task is particularly challenging due to the significant variations between exocentric and egocentric viewpoints and high complexity of dynamic motions and real-world daily-life environments. To address these challenges, we first propose a new diffusion-based multi-view exocentric encoder to extract the dense multi-scale features from multi-view exocentric videos as the appearance conditions for egocentric video generation. Then, we design an exocentric-to-egocentric view translation prior to provide spatially aligned egocentric features as a concatenation guidance for the input of egocentric video diffusion model. Finally, we introduce the temporal attention layers into our egocentric video diffusion pipeline to improve the temporal consistency cross egocentric frames. Extensive experiments demonstrate that Exo2Ego-V significantly outperforms SOTA approaches on 5 categories from the Ego-Exo4D dataset with an average of 35% in terms of LPIPS. Our code and model will be made available on https://github.com/showlab/Exo2Ego-V.", "keywords": "Exocentric-Egocentric Vision;Video Generation;Viewpoint Translation", "primary_area": "generative_models", "supplementary_material": "/attachment/3f918317b4ca243800dffd79432e231855405595.zip", "author": "Jia-Wei Liu;Weijia Mao;Zhongcong Xu;Jussi Keppo;Mike Zheng Shou", "authorids": "~Jia-Wei_Liu1;~Weijia_Mao1;~Zhongcong_Xu1;~Jussi_Keppo1;~Mike_Zheng_Shou1", "gender": "M;M;;M;", "homepage": "https://jia-wei-liu.github.io/;https://sites.google.com/view/showlab;;https://www.jussikeppo.com;", "dblp": "85/3336;321/4529;;;", "google_scholar": "stQQf7wAAAAJ;;;;", "orcid": ";;;;", "linkedin": ";;;keppo/;", "or_profile": "~Jia-Wei_Liu1;~Weijia_Mao1;~Zhongcong_Xu1;~Jussi_Keppo1;~Mike_Zheng_Shou1", "aff": "National University of Singapore;National University of Singapore;;;", "aff_domain": "u.nus.edu;u.nus.edu;;;", "position": "PhD student;PhD student;;;", "bibtex": "@inproceedings{\nliu2024exocentrictoegocentric,\ntitle={Exocentric-to-Egocentric Video Generation},\nauthor={Jia-Wei Liu and Weijia Mao and Zhongcong Xu and Jussi Keppo and Mike Zheng Shou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=UHDCbIrCFL}\n}", "github": "", "reviewers": "qWdm;ANQv;W4kZ;aauy", "pdf_size": 32909204, "rating": "5;6;6;7", "confidence": "4;4;4;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;4", "wc_summary": "61;33;102;57", "wc_strengths": "13;41;89;36", "wc_weaknesses": "187;150;181;109", "wc_questions": "48;132;115;20", "wc_limitations": "1;6;12;2", "wc_review": "310;362;499;224", "wc_reply_reviewers": "0;197;32;0", "wc_reply_authors": "879;278;40;40", "reply_reviewers": "0;1;1;0", "reply_authors": "4;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 63.25, 24.80297361204902 ], "wc_strengths_avg": [ 44.75, 27.643941470058138 ], "wc_weaknesses_avg": [ 156.75, 30.93844695520446 ], "wc_questions_avg": [ 78.75, 46.22431719344267 ], "wc_limitations_avg": [ 5.25, 4.322904116447646 ], "wc_review_avg": [ 348.75, 99.76816877140725 ], "wc_reply_reviewers_avg": [ 57.25, 81.73547271533945 ], "wc_reply_authors_avg": [ 309.25, 342.9951712488093 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15087093436516994785&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 2, "email": "u.nus.edu;u.nus.edu;;;", "author_num": 5, "aff_unique_index": "0;0", "aff_unique_norm": "National University of Singapore", "aff_unique_dep": "", "aff_unique_url": "https://www.nus.edu.sg", "aff_unique_abbr": "NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Singapore" }, { "title": "D-LLM: A Token Adaptive Computing Resource Allocation Strategy for Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94977", "id": "UIOjGTKHQG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=UIOjGTKHQG", "openreview": "https://openreview.net/forum?id=UIOjGTKHQG", "poster": "/media/PosterPDFs/NeurIPS%202024/94977.png?t=1731067175.5279036", "project": "", "author_site": "Yikun Jiang, Huanyu Wang, Lei Xie, Hanbin Zhao, zhang chao, Hui Qian, John C.S. Lui", "tldr": "", "abstract": "Large language models have shown an impressive societal impact owing to their excellent understanding and logical reasoning skills. However, such strong ability relies on a huge amount of computing resources, which makes it difficult to deploy LLMs on computing resource-constrained platforms. Currently, LLMs process each token equivalently, but we argue that not every word is equally important. Some words should not be allocated excessive computing resources, particularly for dispensable terms in simple questions. In this paper, we propose a novel dynamic inference paradigm for LLMs, namely D-LLMs, which adaptively allocate computing resources in token processing. We design a dynamic decision module for each transformer layer that decides whether a network unit should be executed or skipped. Moreover, we tackle the issue of adapting D-LLMs to real-world applications, specifically concerning the missing KV-cache when layers are skipped. To overcome this, we propose a simple yet effective eviction policy to exclude the skipped layers from subsequent attention calculations. The eviction policy not only enables D-LLMs to be compatible with prevalent applications but also reduces considerable storage resources. Experimentally, D-LLMs show superior performance, in terms of computational cost and KV storage utilization. It can reduce up to 45\\% computational cost and KV storage on Q\\&A, summarization, and math solving tasks, 50\\% on commonsense reasoning tasks.", "keywords": "Large Language Models;Dynamic Inference;Inference Acceleration;Adaptive Computing Resource Allocation", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "yikun jiang;Huanyu Wang;Lei Xie;Hanbin Zhao;Chao Zhang;Hui Qian;John C.S. Lui", "authorids": "~yikun_jiang2;~Huanyu_Wang1;~Lei_Xie7;~Hanbin_Zhao1;~Chao_Zhang19;~Hui_Qian1;~John_C.S._Lui2", "gender": "M;M;M;M;M;M;M", "homepage": ";https://huanyuhello.github.io/;https://pacman.cs.tsinghua.edu.cn/~zjd/author/lei-xie/;;;;http://www.cse.cuhk.edu.hk/~cslui/Index.html", "dblp": "153/5995;;;222/7871;94/3019-29;66/5293;l/JohnCSLui", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;;F2kiw10AAAAJ;;;https://scholar.google.com.tw/citations?user=7LVjQ7MAAAAJ", "orcid": "0000-0001-8191-3343;;;;;;0000-0001-7466-0384", "linkedin": ";;;;;;", "or_profile": "~yikun_jiang2;~Huanyu_Wang1;~Lei_Xie7;~Hanbin_Zhao1;~Chao_Zhang19;~Hui_Qian1;~John_C.S._Lui2", "aff": "Huawei Technologies Ltd.;Huawei Technologies Ltd.;Huawei Technologies Ltd.;Zhejiang University;Zhejiang University;Zhejiang University;The Chinese University of Hong Kong", "aff_domain": "huawei.com;huawei.com;huawei.com;zju.edu.cn;zju.edu.cn;zju.edu.cn;cse.cuhk.edu.hk", "position": "Researcher;Researcher;Researcher;Assistant Professor;Assistant Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\njiang2024dllm,\ntitle={D-{LLM}: A Token Adaptive Computing Resource Allocation Strategy for Large Language Models},\nauthor={yikun jiang and Huanyu Wang and Lei Xie and Hanbin Zhao and Chao Zhang and Hui Qian and John C.S. Lui},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=UIOjGTKHQG}\n}", "github": "", "reviewers": "Y7Gp;9XGZ;fCWH;RWhA", "pdf_size": 506622, "rating": "5;5;5;6", "confidence": "4;4;4;3", "soundness": "3;3;2;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "99;37;47;72", "wc_strengths": "159;70;63;23", "wc_weaknesses": "203;147;129;77", "wc_questions": "339;3;262;4", "wc_limitations": "180;3;61;1", "wc_review": "980;260;562;177", "wc_reply_reviewers": "0;0;100;20", "wc_reply_authors": "0;0;227;19", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;2;2", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 63.75, 24.014318645341575 ], "wc_strengths_avg": [ 78.75, 49.68085647409875 ], "wc_weaknesses_avg": [ 139.0, 45.0111097397076 ], "wc_questions_avg": [ 152.0, 150.97516352036186 ], "wc_limitations_avg": [ 61.25, 72.67177925439833 ], "wc_review_avg": [ 494.75, 314.66758253750896 ], "wc_reply_reviewers_avg": [ 30.0, 41.23105625617661 ], "wc_reply_authors_avg": [ 61.5, 95.86579160472206 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1787877755347399358&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "huawei.com;huawei.com;huawei.com;zju.edu.cn;zju.edu.cn;zju.edu.cn;cse.cuhk.edu.hk", "author_num": 7, "aff_unique_index": "0;0;0;1;1;1;2", "aff_unique_norm": "Huawei;Zhejiang University;Chinese University of Hong Kong", "aff_unique_dep": "Huawei Technologies;;", "aff_unique_url": "https://www.huawei.com;https://www.zju.edu.cn;https://www.cuhk.edu.hk", "aff_unique_abbr": "Huawei;ZJU;CUHK", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Separation and Bias of Deep Equilibrium Models on Expressivity and Learning Dynamics", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94976", "id": "UJ9k3j93MD", "proceeding": "", "pdf": "https://openreview.net/pdf?id=UJ9k3j93MD", "openreview": "https://openreview.net/forum?id=UJ9k3j93MD", "poster": "/media/PosterPDFs/NeurIPS%202024/94976.png?t=1731137985.5239022", "project": "", "author_site": "Zhoutong Wu, Yimu Zhang, Cong Fang, Zhouchen Lin", "tldr": "", "abstract": "The deep equilibrium model (DEQ) generalizes the conventional feedforward neural network by fixing the same weights for each layer block and extending the number of layers to infinity. This novel model directly finds the fixed points of such a forward process as features for prediction. Despite empirical evidence showcasing its efficacy \ncompared to feedforward neural networks, a theoretical understanding for its separation and bias is still limited. In this paper, we take a step\nby proposing some separations and studying the bias of DEQ in its expressive power and learning dynamics. The results include: (1) A general separation is proposed, showing the existence of a width-$m$ DEQ that any fully connected neural networks (FNNs) with depth $O(m^{\\alpha})$ for $\\alpha \\in (0,1)$ cannot\napproximate unless its width is sub-exponential in $m$; (2) DEQ with polynomially bounded size and magnitude can efficiently approximate certain steep functions (which has very large derivatives) in $L^{\\infty}$ norm, whereas FNN with bounded depth and exponentially bounded width cannot unless its weights magnitudes are exponentially large; (3) The implicit regularization caused by gradient flow from a diagonal linear DEQ is characterized, with specific examples showing the benefits brought by such regularization. \nFrom the overall study, a high-level conjecture from our analysis and empirical validations is that DEQ has potential advantages in learning certain high-frequency components.", "keywords": "Deep Equilibrium Models;separation;bias;expressivity;learning dynamics", "primary_area": "learning_theory", "supplementary_material": "", "author": "Zhoutong Wu;Yimu Zhang;Cong Fang;Zhouchen Lin", "authorids": "~Zhoutong_Wu1;~Yimu_Zhang2;~Cong_Fang1;~Zhouchen_Lin1", "gender": "M;M;M;M", "homepage": ";https://congfang-ml.github.io/;https://zhouchenlin.github.io;", "dblp": ";140/6568;l/ZhouchenLin;", "google_scholar": ";N2M9RPoAAAAJ;https://scholar.google.com.tw/citations?user=TanjFwoAAAAJ;", "orcid": "0009-0005-6137-5492;;0000-0003-1493-7569;", "linkedin": ";;;%E4%B9%99%E6%B2%90-%E5%BC%A0-3834aa264/", "or_profile": "~Zhoutong_Wu1;~Cong_Fang1;~Zhouchen_Lin1;~yimu_zhang1", "aff": "Peking University;Peking University;Peking University;Tsinghua University", "aff_domain": "pku.edu.cn;pku.edu.cn;pku.edu.cn;mails.tsinghua.edu.cn", "position": "MS student;Assistant Professor;Professor;Undergrad student", "bibtex": "@inproceedings{\nwu2024separation,\ntitle={Separation and Bias of Deep Equilibrium Models on Expressivity and Learning Dynamics},\nauthor={Zhoutong Wu and Yimu Zhang and Cong Fang and Zhouchen Lin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=UJ9k3j93MD}\n}", "github": "", "reviewers": "HjzA;qjxV;QS3W;j6dr", "pdf_size": 1159249, "rating": "5;5;6;6", "confidence": "4;3;4;4", "soundness": "3;2;3;2", "novelty": "3;2;3;3", "presentation": "2;2;3;3", "wc_summary": "76;185;55;172", "wc_strengths": "61;49;69;66", "wc_weaknesses": "123;99;104;586", "wc_questions": "219;150;98;16", "wc_limitations": "5;41;1;38", "wc_review": "484;524;327;878", "wc_reply_reviewers": "0;55;0;540", "wc_reply_authors": "0;13;0;767", "reply_reviewers": "0;1;0;2", "reply_authors": "1;2;1;3", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 122.0, 57.17079674099356 ], "wc_strengths_avg": [ 61.25, 7.628073151196179 ], "wc_weaknesses_avg": [ 228.0, 206.8852338858431 ], "wc_questions_avg": [ 120.75, 74.15987796645838 ], "wc_limitations_avg": [ 21.25, 18.335416548308903 ], "wc_review_avg": [ 553.25, 201.43407730570317 ], "wc_reply_reviewers_avg": [ 148.75, 227.00151431212964 ], "wc_reply_authors_avg": [ 195.0, 330.28699641372503 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:yqmhsjbF-IQJ:scholar.google.com/&scioq=Separation+and+Bias+of+Deep+Equilibrium+Models+on+Expressivity+and+Learning+Dynamics&hl=en&as_sdt=0,33", "gs_version_total": 2, "email": "pku.edu.cn;pku.edu.cn;pku.edu.cn;mails.tsinghua.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Peking University;Tsinghua University", "aff_unique_dep": ";", "aff_unique_url": "http://www.pku.edu.cn;https://www.tsinghua.edu.cn", "aff_unique_abbr": "Peking U;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Rapid Plug-in Defenders", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94975", "id": "UMPedMhKWm", "proceeding": "", "pdf": "https://openreview.net/pdf?id=UMPedMhKWm", "openreview": "https://openreview.net/forum?id=UMPedMhKWm", "poster": "", "project": "", "author_site": "Kai Wu, yujian li, Jian Lou, Xiaoyu Zhang, Handing Wang, Jing Liu", "tldr": "", "abstract": "In the realm of daily services, the deployment of deep neural networks underscores the paramount importance of their reliability. However, the vulnerability of these networks to adversarial attacks, primarily evasion-based, poses a concerning threat to their functionality. Common methods for enhancing robustness involve heavy adversarial training or leveraging learned knowledge from clean data, both necessitating substantial computational resources. This inherent time-intensive nature severely limits the agility of large foundational models to swiftly counter adversarial perturbations. To address this challenge, this paper focuses on the \\textbf{Ra}pid \\textbf{P}lug-\\textbf{i}n \\textbf{D}efender (\\textbf{RaPiD}) problem, aiming to rapidly counter adversarial perturbations without altering the deployed model. Drawing inspiration from the generalization and the universal computation ability of pre-trained transformer models, we propose a novel method termed \\textbf{CeTaD} (\\textbf{C}onsidering Pr\\textbf{e}-trained \\textbf{T}ransformers \\textbf{a}s \\textbf{D}efenders) for RaPiD, optimized for efficient computation. \\textbf{CeTaD} strategically fine-tunes the normalization layer parameters within the defender using a limited set of clean and adversarial examples. Our evaluation centers on assessing \\textbf{CeTaD}'s effectiveness, transferability, and the impact of different components in scenarios involving one-shot adversarial examples. The proposed method is capable of rapidly adapting to various attacks and different application scenarios without altering the target model and clean training data. We also explore the influence of varying training data conditions on \\textbf{CeTaD}'s performance. Notably, \\textbf{CeTaD} exhibits adaptability across differentiable service models and proves the potential of continuous learning.", "keywords": "Rapid Plug-in Defenders;Few-shot Adversarial Training;Adversarial Examples and Defenses", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Kai Wu;Yujian Betterest Li;Jian Lou;Xiaoyu Zhang;Handing Wang;Jing Liu", "authorids": "~Kai_Wu3;~Yujian_Betterest_Li1;~Jian_Lou2;~Xiaoyu_Zhang6;~Handing_Wang1;~Jing_Liu20", "gender": ";;;F;F;F", "homepage": ";;https://sites.google.com/view/jianlou;https://scholar.google.com.hk/citations?user=XtfE1f0AAAAJ&hl=zh-CN;;https://faculty.xidian.edu.cn/LJ22/zh_CN/index.htm", "dblp": ";;05/4625-1;12/5927-10.html;125/6067;72/2590-6", "google_scholar": ";;;https://scholar.google.com.hk/citations?user=XtfE1f0AAAAJ;https://scholar.google.com.hk/citations?user=JtYr_7cAAAAJ;kqRxf3MAAAAJ", "orcid": ";;0000-0002-4110-2068;;0000-0002-4805-3780;0000-0002-6834-5350", "linkedin": ";;;;;", "or_profile": "~Kai_Wu3;~Yujian_Betterest_Li1;~Jian_Lou2;~Xiaoyu_Zhang6;~Handing_Wang1;~Jing_Liu20", "aff": ";;www.hoiying.net;Xidian University;Xidian University ;Xidian University, China", "aff_domain": ";;hoiying.net;xidian.edu.cn;xidian.edu.cn;mail.xidian.edu.cn", "position": ";;Researcher;Associate Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nwu2024rapid,\ntitle={Rapid Plug-in Defenders},\nauthor={Kai Wu and Yujian Betterest Li and Jian Lou and Xiaoyu Zhang and Handing Wang and Jing Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=UMPedMhKWm}\n}", "github": "", "reviewers": "ZnXi;Dpo5;tqbT;tz4d", "pdf_size": 2213611, "rating": "4;4;5;7", "confidence": "4;4;4;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;2;3;3", "wc_summary": "78;31;69;94", "wc_strengths": "36;68;61;67", "wc_weaknesses": "41;255;123;81", "wc_questions": "48;23;60;1", "wc_limitations": "1;7;103;1", "wc_review": "204;384;416;244", "wc_reply_reviewers": "0;185;200;12", "wc_reply_authors": "501;757;591;18", "reply_reviewers": "0;2;2;1", "reply_authors": "3;4;4;2", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 68.0, 23.16246964380094 ], "wc_strengths_avg": [ 58.0, 12.98075498574717 ], "wc_weaknesses_avg": [ 125.0, 80.46117075956576 ], "wc_questions_avg": [ 33.0, 22.792542640082964 ], "wc_limitations_avg": [ 28.0, 43.37049688440288 ], "wc_review_avg": [ 312.0, 89.8443097808648 ], "wc_reply_reviewers_avg": [ 99.25, 93.49699193022201 ], "wc_reply_authors_avg": [ 466.75, 274.87849588500006 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 3.25, 0.82915619758885 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:HIQg-lDiFL4J:scholar.google.com/&scioq=Rapid+Plug-in+Defenders&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": ";;hoiying.net;xidian.edu.cn;xidian.edu.cn;mail.xidian.edu.cn", "author_num": 6, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Hoiying Limited;Xidian University", "aff_unique_dep": ";", "aff_unique_url": "http://www.hoiying.net;http://www.xidian.edu.cn/", "aff_unique_abbr": ";Xidian", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Improved learning rates in multi-unit uniform price auctions", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94974", "id": "UN7nXLeh9D", "proceeding": "", "pdf": "https://openreview.net/pdf?id=UN7nXLeh9D", "openreview": "https://openreview.net/forum?id=UN7nXLeh9D", "poster": "", "project": "", "author_site": "Marius Potfer, Dorian Baudry, Hugo Richard, Vianney Perchet, Cheng Wan", "tldr": "", "abstract": "Motivated by the strategic participation of electricity producers in electricity day-ahead market, we study the problem of online learning in repeated multi-unit uniform price auctions focusing on the adversarial opposing bid setting. The main contribution of this paper is the introduction of a new modeling of the bid space. Indeed, we prove that a learning algorithm leveraging the structure of this problem achieves a regret of $\\tilde{O}(K^{4/3}T^{2/3})$ under bandit feedback, improving over the bound of $\\tilde{O}(K^{7/4}T^{3/4})$ previously obtained in the literature. This improved regret rate is tight up to logarithmic terms. %by deducing a lower bound of $\\Omega (T^{2/3})$ from the dynamic pricing literature, proving the optimality in $T$ of our algorithm up to log factors. \nInspired by electricity reserve markets, we further introduce a different feedback model under which all winning bids are revealed. This feedback interpolates between the full-information and bandit scenarios depending on the auctions' results. We prove that, under this feedback, the algorithm that we propose achieves regret $\\tilde{O}(K^{5/2}\\sqrt{T})$.", "keywords": "Online Learning;Auctions;Bandits", "primary_area": "bandits", "supplementary_material": "", "author": "Marius Potfer;Dorian Baudry;Hugo Richard;Vianney Perchet;Cheng Wan", "authorids": "~Marius_Potfer1;~Dorian_Baudry1;~Hugo_Richard1;~Vianney_Perchet3;~Cheng_Wan3", "gender": ";M;M;;M", "homepage": ";https://dbaudry.github.io/;https://hugorichard.github.io/;https://sites.google.com/site/wancheng2012avril/;", "dblp": ";277/6362;227/3044;;83/7398", "google_scholar": ";https://scholar.google.fr/citations?user=RRW-kfYAAAAJ;5KaKAOgAAAAJ;;", "orcid": ";;;;", "linkedin": "marius-potfer-863786190;;;;", "or_profile": "~Marius_Potfer1;~Dorian_Baudry1;~Hugo_Richard1;~Cheng_Wan3;~Vianney_Perchet1", "aff": "Ecole Nationale de la Statistique et de l'Administration Economique;;Criteo;EDF R&D;", "aff_domain": "ensae.edu;;criteo.com;edf.fr;", "position": "PhD student;;Researcher;Research Engineer;", "bibtex": "@inproceedings{\npotfer2024improved,\ntitle={Improved learning rates in multi-unit uniform price auctions},\nauthor={Marius Potfer and Dorian Baudry and Hugo Richard and Vianney Perchet and Cheng Wan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=UN7nXLeh9D}\n}", "github": "", "reviewers": "XwZ3;VAEZ;Ep16;eotr", "pdf_size": 422135, "rating": "5;6;6;7", "confidence": "2;3;3;3", "soundness": "2;3;4;3", "novelty": "2;3;3;3", "presentation": "2;2;2;3", "wc_summary": "64;58;90;147", "wc_strengths": "33;42;23;81", "wc_weaknesses": "105;140;76;82", "wc_questions": "53;97;139;97", "wc_limitations": "12;9;2;13", "wc_review": "267;346;330;420", "wc_reply_reviewers": "97;4;11;0", "wc_reply_authors": "264;0;0;0", "reply_reviewers": "2;1;1;0", "reply_authors": "2;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 89.75, 35.173676236640375 ], "wc_strengths_avg": [ 44.75, 21.98152633462927 ], "wc_weaknesses_avg": [ 100.75, 25.113492389550284 ], "wc_questions_avg": [ 96.5, 30.40970239906994 ], "wc_limitations_avg": [ 9.0, 4.301162633521313 ], "wc_review_avg": [ 340.75, 54.45812611539255 ], "wc_reply_reviewers_avg": [ 28.0, 40.03123780249619 ], "wc_reply_authors_avg": [ 66.0, 114.3153532995459 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:vW_Kwo6V_H0J:scholar.google.com/&scioq=Improved+learning+rates+in+multi-unit+uniform+price+auctions&hl=en&as_sdt=0,5", "gs_version_total": 12, "email": "ensae.edu;;criteo.com;edf.fr;", "author_num": 5, "aff_unique_index": "0;1;2", "aff_unique_norm": "Ecole Nationale de la Statistique et de l'Administration Economique;Criteo;EDF Research and Development", "aff_unique_dep": ";;", "aff_unique_url": "https://ensae.fr;https://www.criteo.com;https://www.edf.com/research-and-development", "aff_unique_abbr": "ENSAE;Criteo;EDF R&D", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "France" }, { "title": "Unique3D: High-Quality and Efficient 3D Mesh Generation from a Single Image", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94973", "id": "UO7Mvch1Z5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=UO7Mvch1Z5", "openreview": "https://openreview.net/forum?id=UO7Mvch1Z5", "poster": "/media/PosterPDFs/NeurIPS%202024/94973.png?t=1730974706.0778248", "project": "", "author_site": "Kailu Wu, Fangfu Liu, Zhihan Cai, Runjie Yan, Hanyang Wang, Yating Hu, Yueqi Duan, Kaisheng Ma", "tldr": "", "abstract": "In this work, we introduce Unique3D, a novel image-to-3D framework for efficiently generating high-quality 3D meshes from single-view images, featuring state-of-the-art generation fidelity and strong generalizability. Previous methods based on Score Distillation Sampling (SDS) can produce diversified 3D results by distilling 3D knowledge from large 2D diffusion models, but they usually suffer from long per-case optimization time with inconsistent issues. Recent works address the problem and generate better 3D results either by finetuning a multi-view diffusion model or training a fast feed-forward model. However, they still lack intricate textures and complex geometries due to inconsistency and limited generated resolution. To simultaneously achieve high fidelity, consistency, and efficiency in single image-to-3D, we propose a novel framework Unique3D that includes a multi-view diffusion model with a corresponding normal diffusion model to generate multi-view images with their normal maps, a multi-level upscale process to progressively improve the resolution of generated orthographic multi-views, as well as an instant and consistent mesh reconstruction algorithm called ISOMER, which fully integrates the color and geometric priors into mesh results. Extensive experiments demonstrate that our Unique3D significantly outperforms other image-to-3D baselines in terms of geometric and textural details.", "keywords": "image to 3d;3d generation;mesh generation", "primary_area": "generative_models", "supplementary_material": "/attachment/4d21a7ec636ea12042b2444546e813a5bd58e7e4.zip", "author": "Kailu Wu;Fangfu Liu;Zhihan Cai;Runjie Yan;Hanyang Wang;Yating Hu;Yueqi Duan;Kaisheng Ma", "authorids": "~Kailu_Wu1;~Fangfu_Liu2;~Zhihan_Cai1;~Runjie_Yan1;~Hanyang_Wang4;~Yating_Hu2;~Yueqi_Duan1;~Kaisheng_Ma1", "gender": "M;F;;M;F;M;M;M", "homepage": "https://liuff19.github.io/;;https://runjie-yan.github.io;;http://aiuni.ai;https://duanyueqi.github.io/;http://group.iiis.tsinghua.edu.cn/~maks/index.html;", "dblp": "342/1749;;377/6407;225/2815-3;;168/8373;133/4053.html;226/6502.html", "google_scholar": ";;colE3H0AAAAJ;ex8jWtUAAAAJ;;qDseo3cAAAAJ;VtDpVoEAAAAJ;VTU0gysAAAAJ", "orcid": ";0009-0003-9544-0112;;;;;0000-0001-9226-3366;", "linkedin": "%E8%8A%B3%E7%94%AB-%E5%88%98-482856229/;;;;;;;", "or_profile": "~Fangfu_Liu2;~Zhihan_Cai1;~Runjie_Yan1;~Hanyang_Wang4;~Yating_Hu2;~Yueqi_Duan1;~Kaisheng_Ma1;~Kai_Wu2", "aff": "Department of Electronic Engineering, Tsinghua University;Tsinghua University;University of California, San Diego;Tsinghua University;AVAR Inc;Tsinghua University;Institute for Interdisciplinary Information Sciences (IIIS), Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;mail.tsinghua.edu.cn;ucsd.edu;tsinghua.edu.cn;avar.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;MS student;Intern;Undergrad student;Researcher;Assistant Professor;Associate Professor;MS student", "bibtex": "@inproceedings{\nwu2024uniqued,\ntitle={Unique3D: High-Quality and Efficient 3D Mesh Generation from a Single Image},\nauthor={Kailu Wu and Fangfu Liu and Zhihan Cai and Runjie Yan and Hanyang Wang and Yating Hu and Yueqi Duan and Kaisheng Ma},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=UO7Mvch1Z5}\n}", "github": "", "reviewers": "GBd8;7xHF;Bw7b;inWh", "pdf_size": 25217349, "rating": "5;6;6;7", "confidence": "5;5;4;5", "soundness": "3;2;3;3", "novelty": "2;3;3;3", "presentation": "1;3;2;3", "wc_summary": "134;156;75;111", "wc_strengths": "139;74;116;179", "wc_weaknesses": "453;529;157;271", "wc_questions": "214;198;5;125", "wc_limitations": "14;10;31;22", "wc_review": "954;967;384;708", "wc_reply_reviewers": "117;438;19;67", "wc_reply_authors": "1329;2259;63;352", "reply_reviewers": "1;1;1;1", "reply_authors": "5;4;2;4", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 119.0, 29.974989574643725 ], "wc_strengths_avg": [ 127.0, 38.006578377959784 ], "wc_weaknesses_avg": [ 352.5, 146.72678692045295 ], "wc_questions_avg": [ 135.5, 82.47575401292188 ], "wc_limitations_avg": [ 19.25, 8.042853971072706 ], "wc_review_avg": [ 753.25, 236.845281776944 ], "wc_reply_reviewers_avg": [ 160.25, 164.06001188589497 ], "wc_reply_authors_avg": [ 1000.75, 864.7532523789662 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.75, 1.0897247358851685 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 47, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2707258181588838548&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "tsinghua.edu.cn;mail.tsinghua.edu.cn;ucsd.edu;tsinghua.edu.cn;avar.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 8, "aff_unique_index": "0;0;1;0;2;0;0;0", "aff_unique_norm": "Tsinghua University;University of California, San Diego;AVAR Inc", "aff_unique_dep": "Department of Electronic Engineering;;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.ucsd.edu;", "aff_unique_abbr": "THU;UCSD;", "aff_campus_unique_index": "1", "aff_campus_unique": ";San Diego", "aff_country_unique_index": "0;0;1;0;1;0;0;0", "aff_country_unique": "China;United States" }, { "title": "TFS-NeRF: Template-Free NeRF for Semantic 3D Reconstruction of Dynamic Scene", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94972", "id": "UPxFYvHsyN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=UPxFYvHsyN", "openreview": "https://openreview.net/forum?id=UPxFYvHsyN", "poster": "", "project": "", "author_site": "Sandika Biswas, Qianyi Wu, Biplab Banerjee, Hamid Rezatofighi", "tldr": "", "abstract": "Despite advancements in Neural Implicit models for 3D surface reconstruction, handling dynamic environments with interactions between arbitrary rigid, non-rigid, or deformable entities remains challenging. The generic reconstruction methods adaptable to such dynamic scenes often require additional inputs like depth or optical flow or rely on pre-trained image features for reasonable outcomes. These methods typically use latent codes to capture frame-by-frame deformations. Another set of dynamic scene reconstruction methods, are entity-specific, mostly focusing on humans, and relies on template models. In contrast, some template-free methods bypass these requirements and adopt traditional LBS (Linear Blend Skinning) weights for a detailed representation of deformable object motions,\nalthough they involve complex optimizations leading to lengthy training times. To this end, as a remedy, this paper introduces TFS-NeRF, a template-free 3D semantic NeRF for dynamic scenes captured from sparse or single-view RGB videos, featuring interactions among two entities and more time-efficient than other LBS-based approaches. Our framework uses an Invertible Neural Network (INN) for LBS prediction, simplifying the training process. By disentangling the motions of interacting entities and optimizing per-entity skinning weights, our method efficiently generates accurate, semantically separable geometries. Extensive experiments demonstrate that our approach produces high-quality reconstructions of both deformable and non-deformable objects in complex interactions, with improved\ntraining efficiency compared to existing methods. The code and models will be available on our github page.", "keywords": "3D reconstruction;Template-free NeRF;Semantic reconstruction;Multiple entity interactions", "primary_area": "machine_vision", "supplementary_material": "/attachment/0556c8d07c4751db2620eb57867a0152c7f82716.zip", "author": "Sandika Biswas;Qianyi Wu;Biplab Banerjee;Hamid Rezatofighi", "authorids": "~Sandika_Biswas2;~Qianyi_Wu2;~Biplab_Banerjee1;~Hamid_Rezatofighi1", "gender": "F;M;M;M", "homepage": ";https://wuqianyi.top;https://biplab-banerjee.github.io;https://research.monash.edu/en/persons/hamid-rezatofighi", "dblp": "165/0874;217/2011;87/9571;37/8192", "google_scholar": "F7e5jSwAAAAJ;XI0RtesAAAAJ;IEcsMPAAAAAJ;https://scholar.google.com.au/citations?user=VxAuxMwAAAAJ", "orcid": "0000-0002-8264-3346;;0000-0001-8371-8138;", "linkedin": ";;;s-hamid-rezatofighi-b275a052/", "or_profile": "~Sandika_Biswas2;~Qianyi_Wu2;~Biplab_Banerjee1;~Hamid_Rezatofighi1", "aff": "Monash University;Monash University;Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology;Monash University", "aff_domain": "monash.edu;monash.edu;iitb.ac.in;monash.edu", "position": "PhD student;PhD student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nbiswas2024tfsnerf,\ntitle={{TFS}-Ne{RF}: Template-Free Ne{RF} for Semantic 3D Reconstruction of Dynamic Scene},\nauthor={Sandika Biswas and Qianyi Wu and Biplab Banerjee and Hamid Rezatofighi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=UPxFYvHsyN}\n}", "github": "", "reviewers": "3yt5;UY9B;KrvP;Rbt4", "pdf_size": 6068690, "rating": "3;5;5;6", "confidence": "4;3;3;3", "soundness": "1;3;3;3", "novelty": "1;3;3;3", "presentation": "1;3;2;2", "wc_summary": "12;35;63;67", "wc_strengths": "11;44;57;48", "wc_weaknesses": "347;188;39;280", "wc_questions": "2;2;31;30", "wc_limitations": "5;6;13;8", "wc_review": "377;275;203;433", "wc_reply_reviewers": "174;24;168;156", "wc_reply_authors": "384;27;289;236", "reply_reviewers": "1;1;2;1", "reply_authors": "3;2;4;3", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 44.25, 22.331312097590683 ], "wc_strengths_avg": [ 40.0, 17.392527130926087 ], "wc_weaknesses_avg": [ 213.5, 115.4826826844614 ], "wc_questions_avg": [ 16.25, 14.254385290148432 ], "wc_limitations_avg": [ 8.0, 3.082207001484488 ], "wc_review_avg": [ 322.0, 89.04493247793498 ], "wc_reply_reviewers_avg": [ 130.5, 61.82839153657485 ], "wc_reply_authors_avg": [ 234.0, 130.74593683935268 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.0, 0.7071067811865476 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.9271726499455306, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ky3IDgAaD0IJ:scholar.google.com/&scioq=TFS-NeRF:+Template-Free+NeRF+for+Semantic+3D+Reconstruction+of+Dynamic+Scene&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "monash.edu;monash.edu;iitb.ac.in;monash.edu", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Monash University;Indian Institute of Technology, Bombay", "aff_unique_dep": ";", "aff_unique_url": "https://www.monash.edu;https://www.iitb.ac.in", "aff_unique_abbr": "Monash;IIT Bombay", "aff_campus_unique_index": "1", "aff_campus_unique": ";Bombay", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Australia;India" }, { "title": "Efficiency for Free: Ideal Data Are Transportable Representations", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94971", "id": "UPxmISfNCO", "proceeding": "", "pdf": "https://openreview.net/pdf?id=UPxmISfNCO", "openreview": "https://openreview.net/forum?id=UPxmISfNCO", "poster": "", "project": "", "author_site": "PENG SUN, Yi Jiang, Tao Lin", "tldr": "", "abstract": "Data, the seminal opportunity and challenge in modern machine learning, currently constrains the scalability of representation learning and impedes the pace of model evolution.\nIn this work, we investigate the efficiency properties of data from both optimization and generalization perspectives.\nOur theoretical and empirical analysis reveals an unexpected finding: for a given task, utilizing a publicly available, task- and architecture-agnostic model (referred to as the `prior model' in this paper) can effectively produce efficient data.\nBuilding on this insight, we propose the Representation Learning Accelerator (ReLA), which promotes the formation and utilization of efficient data, thereby accelerating representation learning.\nUtilizing a ResNet-18 pre-trained on CIFAR-10 as a prior model to inform ResNet-50 training on ImageNet-1K reduces computational costs by $50\\%$ while maintaining the same accuracy as the model trained with the original BYOL, which requires $100\\%$ cost.\nOur code is available at: \\url{https://github.com/LINs-lab/ReLA}.", "keywords": "Efficient Learning;Efficient Data;Ideal Data", "primary_area": "other", "supplementary_material": "", "author": "Peng Sun;Yi Jiang;Tao Lin", "authorids": "~Peng_Sun13;~Yi_Jiang8;~Tao_Lin1", "gender": ";M;", "homepage": "https://github.com/laitselec;https://lins-lab.github.io/;https://github.com/sp12138", "dblp": ";64/4492-4.html;", "google_scholar": ";QE9pa_cAAAAJ;", "orcid": ";0000-0002-3246-6935;", "linkedin": ";;", "or_profile": "~Yi_Jiang8;~Tao_Lin1;~peng_sun7", "aff": "Zhejiang University;Westlake University;Westlake University", "aff_domain": "zju.edu.cn;westlake.edu;westlake.edu", "position": "Undergrad student;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nsun2024efficiency,\ntitle={Efficiency for Free: Ideal Data Are Transportable Representations},\nauthor={Peng Sun and Yi Jiang and Tao Lin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=UPxmISfNCO}\n}", "github": "", "reviewers": "K3hg;7uZz;fJin;zbwg", "pdf_size": 1813306, "rating": "3;6;6;7", "confidence": "4;3;3;4", "soundness": "2;2;3;4", "novelty": "2;3;4;4", "presentation": "2;2;3;3", "wc_summary": "109;83;36;113", "wc_strengths": "26;164;34;72", "wc_weaknesses": "664;77;40;228", "wc_questions": "7;91;2;31", "wc_limitations": "29;41;19;18", "wc_review": "835;456;131;462", "wc_reply_reviewers": "445;48;12;108", "wc_reply_authors": "2198;54;46;847", "reply_reviewers": "2;2;1;2", "reply_authors": "5;2;2;4", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 85.25, 30.678779310787448 ], "wc_strengths_avg": [ 74.0, 54.7905101272109 ], "wc_weaknesses_avg": [ 252.25, 247.93585359927272 ], "wc_questions_avg": [ 32.75, 35.3721288587498 ], "wc_limitations_avg": [ 26.75, 9.283722313813572 ], "wc_review_avg": [ 471.0, 249.19971910096527 ], "wc_reply_reviewers_avg": [ 153.25, 171.8973167329845 ], "wc_reply_authors_avg": [ 786.25, 877.6230326854463 ], "reply_reviewers_avg": [ 1.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.25, 1.299038105676658 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.33333333333333337, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2747408016430858493&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "zju.edu.cn;westlake.edu;westlake.edu", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Zhejiang University;Westlake University", "aff_unique_dep": ";", "aff_unique_url": "https://www.zju.edu.cn;https://www.westlake.edu.cn", "aff_unique_abbr": "ZJU;WU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "HairDiffusion: Vivid Multi-Colored Hair Editing via Latent Diffusion", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94970", "id": "UQflshLbZv", "proceeding": "", "pdf": "https://openreview.net/pdf?id=UQflshLbZv", "openreview": "https://openreview.net/forum?id=UQflshLbZv", "poster": "/media/PosterPDFs/NeurIPS%202024/94970.png?t=1730621107.5421114", "project": "", "author_site": "Yu Zeng, Yang Zhang, Liu Jiachen, Linlin Shen, Kaijun Deng, Weizhao He, Jinbao Wang", "tldr": "", "abstract": "Hair editing is a critical image synthesis task that aims to edit hair color and hairstyle using text descriptions or reference images, while preserving irrelevant attributes (e.g., identity, background, cloth). Many existing methods are based on StyleGAN to address this task. However, due to the limited spatial distribution of StyleGAN, it struggles with multiple hair color editing and facial preservation. Considering the advancements in diffusion models, we utilize Latent Diffusion Models (LDMs) for hairstyle editing. Our approach introduces Multi-stage Hairstyle Blend (MHB), effectively separating control of hair color and hairstyle in diffusion latent space. Additionally, we train a warping module to align the hair color with the target region. To further enhance multi-color hairstyle editing, we fine-tuned a CLIP model using a multi-color hairstyle dataset. Our method not only tackles the complexity of multi-color hairstyles but also addresses the challenge of preserving original colors during diffusion editing. Extensive experiments showcase the superiority of our method in editing multi-color hairstyles while preserving facial attributes given textual descriptions and reference images.", "keywords": "Image Editing;Diffusion Model;Image Inpainting;Hair Editing", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Yu Zeng;Yang Zhang;Jiachen Liu;Linlin Shen;Kaijun Deng;Weizhao He;Jinbao Wang", "authorids": "~Yu_Zeng5;~Yang_Zhang37;~Jiachen_Liu4;~Linlin_Shen1;~Kaijun_Deng1;~Weizhao_He2;~Jinbao_Wang1", "gender": "M;M;M;M;M;M;M", "homepage": ";https://csse.szu.edu.cn/pages/user/index?id=1195;https://github.com/louis-fashion-ai/louis_fashion_ai.github.io;https://csse.szu.edu.cn/pages/user/index?id=594;;;", "dblp": ";06/6785-12;;88/5607;;379/7250.html;", "google_scholar": ";;;https://scholar.google.com.hk/citations?user=AZ_y9HgAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=en", "orcid": "0009-0007-8055-7106;;;0000-0003-1420-0815;0009-0009-1352-2952;0009-0003-1777-036X;0000-0001-5916-8965", "linkedin": ";;;;;;", "or_profile": "~Yu_Zeng5;~Yang_Zhang37;~Jiachen_Liu4;~Linlin_Shen1;~Kaijun_Deng1;~Weizhao_He2;~Jinbao_Wang1", "aff": "Shenzhen University;Shenzhen University;Shenzhen University;Shenzhen University;Shenzhen University;Shenzhen University;Shenzhen University", "aff_domain": "szu.edu.cn;szu.edu.cn;szu.edu.cn;szu.edu.cn;email.szu.edu.cn;email.szu.edu.cn;szu.edu.cn", "position": "MS student;Researcher;MS student;Full Professor;MS student;MS student;Assistant Professor", "bibtex": "@inproceedings{\nzeng2024hairdiffusion,\ntitle={HairDiffusion: Vivid Multi-Colored Hair Editing via Latent Diffusion},\nauthor={Yu Zeng and Yang Zhang and Jiachen Liu and Linlin Shen and Kaijun Deng and Weizhao He and Jinbao Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=UQflshLbZv}\n}", "github": "", "reviewers": "X6RQ;8NSD;GkwL;Ubef", "pdf_size": 11082427, "rating": "4;4;6;6", "confidence": "3;5;2;4", "soundness": "3;1;3;3", "novelty": "3;1;3;3", "presentation": "2;2;2;3", "wc_summary": "91;251;102;62", "wc_strengths": "55;84;140;61", "wc_weaknesses": "246;462;88;197", "wc_questions": "3;137;2;99", "wc_limitations": "1;114;1;6", "wc_review": "396;1048;333;425", "wc_reply_reviewers": "135;239;0;0", "wc_reply_authors": "428;603;88;0", "reply_reviewers": "1;1;0;0", "reply_authors": "3;3;2;1", "rating_avg": [ 5.0, 1.0 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 126.5, 73.35018745715651 ], "wc_strengths_avg": [ 85.0, 33.54847239443251 ], "wc_weaknesses_avg": [ 248.25, 136.0153943493162 ], "wc_questions_avg": [ 60.25, 59.2932331720914 ], "wc_limitations_avg": [ 30.5, 48.251942966060966 ], "wc_review_avg": [ 550.5, 289.1509121548815 ], "wc_reply_reviewers_avg": [ 93.5, 100.47014481924468 ], "wc_reply_authors_avg": [ 279.75, 245.71159415054063 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.4472135954999579, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:XihwWf-7PHYJ:scholar.google.com/&scioq=HairDiffusion:+Vivid+Multi-Colored+Hair+Editing+via+Latent+Diffusion&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "szu.edu.cn;szu.edu.cn;szu.edu.cn;szu.edu.cn;email.szu.edu.cn;email.szu.edu.cn;szu.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "Shenzhen University", "aff_unique_dep": "", "aff_unique_url": "https://www.szu.edu.cn", "aff_unique_abbr": "SZU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Cryptographic Hardness of Score Estimation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94969", "id": "URQXbwM0Md", "proceeding": "", "pdf": "https://openreview.net/pdf?id=URQXbwM0Md", "openreview": "https://openreview.net/forum?id=URQXbwM0Md", "poster": "", "project": "", "tldr": "", "abstract": "We show that L2-accurate score estimation, in the absence of strong assumptions on the data distribution, is computationally hard even when sample complexity is polynomial in the relevant problem parameters. Our reduction builds on the result of Chen et al. (ICLR 2023), who showed that the problem of generating samples from an unknown data distribution reduces to L2-accurate score estimation. Our hard-to-estimate distributions are the \"Gaussian pancakes\" distributions, originally due to Diakonikolas et al. (FOCS 2017), which have been shown to be computationally indistinguishable from the standard Gaussian under widely believed hardness assumptions from lattice-based cryptography (Bruna et al., STOC 2021; Gupte et al., FOCS 2022).", "keywords": "diffusion models;score-based generative models;score estimation;statistical-to-computational gap;computational hardness;learning with errors (LWE)", "primary_area": "learning_theory", "supplementary_material": "", "author": "Min Jae Song", "authorids": "~Min_Jae_Song1", "gender": "M", "homepage": "https://mjsong32.github.io/", "dblp": "169/9994", "google_scholar": "6TIktJgAAAAJ", "orcid": "", "linkedin": "", "or_profile": "~Min_Jae_Song1", "aff": "University of Washington", "aff_domain": "uw.edu", "position": "Postdoc", "bibtex": "@inproceedings{\nsong2024cryptographic,\ntitle={Cryptographic Hardness of Score Estimation},\nauthor={Min Jae Song},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=URQXbwM0Md}\n}", "github": "", "reviewers": "CMgH;q3zX;27f3", "pdf_size": 2906747, "rating": "5;7;7", "confidence": "3;2;3", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "2;4;3", "wc_summary": "76;252;93", "wc_strengths": "52;377;57", "wc_weaknesses": "226;141;13", "wc_questions": "8;88;8", "wc_limitations": "1;31;5", "wc_review": "363;889;176", "wc_reply_reviewers": "0;58;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;1;0", "reply_authors": "1;1;1", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 140.33333333333334, 79.26467617349412 ], "wc_strengths_avg": [ 162.0, 152.04166095733981 ], "wc_weaknesses_avg": [ 126.66666666666667, 87.54554370281917 ], "wc_questions_avg": [ 34.666666666666664, 37.712361663282536 ], "wc_limitations_avg": [ 12.333333333333334, 13.299958228840001 ], "wc_review_avg": [ 476.0, 301.8487479958575 ], "wc_reply_reviewers_avg": [ 19.333333333333332, 27.34146220587984 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": -0.49999999999999983, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17904738284933350533&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "uw.edu", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "University of Washington", "aff_unique_dep": "", "aff_unique_url": "https://www.washington.edu", "aff_unique_abbr": "UW", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "URkFX1jmOd", "title": "Night-to-Day Translation via Illumination Degradation Disentanglement", "track": "main", "status": "Reject", "tldr": "", "abstract": "Night-to-Day translation (Night2Day) aims to achieve day-like nighttime vision. However, processing images with complex degradations in the night using unpaired data still remains a challenge in this field. Previous methods that uniformly mitigate these degradations have proven inadequate in simultaneously restoring daytime domain information and preserving underlying semantics. In this paper, we recognize the different degradation patterns in nighttime images and propose N2D3 (Night to Day via Degradation Disentanglement). It comprises a degradation disentanglement module and a degradation-aware contrastive learning module. Firstly, we extract physical priors from the photometric model derived from the Kubelka-Munk theory. Subsequently, under the guidance of physical priors, we design a disentanglement module to discriminate among different illumination degradation regions. Finally, we introduce degradation-aware contrastive learning to preserve semantic consistency across distinct degradation regions. Our method is evaluated on two public datasets, \\textbf{with a significant improvement of 5.4 FID on BDD100K and 10.3 FID on Alderley", "keywords": "Image Translation;Nighttime Image Rendering.", "primary_area": "machine_vision", "supplementary_material": "", "author": "Guanzhou L;Zhigang Wang;Yuqi Yang;Bin Zhao;Dong Wang;Xuelong Li", "authorids": "~Guanzhou_L1;~Zhigang_Wang3;~Yuqi_Yang5;~Bin_Zhao7;~Dong_Wang1;~Xuelong_Li2", "gender": "F;M;M;M;M;M", "homepage": "https://github.com/Yyq-Yuki;https://iopen.nwpu.edu.cn/info/1347/2105.htm;https://redwang.github.io/;;https://github.com/lgz-0713/Home-Pages;", "dblp": ";73/4325-1.html;40/3934-28;l/XuelongLi;352/3175;35/1989-2", "google_scholar": ";https://scholar.google.com.hk/citations?user=DQB0hqwAAAAJ;dasL9V4AAAAJ;ahUibskAAAAJ;;cw3EaAYAAAAJ", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Yuqi_Yang5;~Bin_Zhao7;~Dong_Wang1;~Xuelong_Li2;~Guanzhou_Lan1;~Zhi.gang_Wang1", "aff": "Northwest Polytechnical University Xi'an;Northwest Polytechnical University Xi'an;Shanghai AI Laboratory;Northwestern Polytechnical University;Northwest Polytechnical University Xi'an;Shanghai AI Lab", "aff_domain": "nwpu.edu.cn;nwpu.edu.cn;pjlab.org.cn;nwpu.edu.cn;nwpu.edu.cn;pjlab.org.cn", "position": "MS student;Associate Professor;Researcher;Full Professor;PhD student;Researcher", "bibtex": "@misc{\nanonymous2024nighttoday,\ntitle={Night-to-Day Translation via Illumination Degradation Disentanglement},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=URkFX1jmOd}\n}", "github": "", "project": "", "reviewers": "dsJ3;rJuz;PUPw;JHwd", "site": "https://openreview.net/forum?id=URkFX1jmOd", "pdf_size": 4945563, "rating": "4;4;5;5", "confidence": "4;5;3;4", "soundness": "2;2;3;3", "novelty": "3;3;2;2", "presentation": "1;3;3;2", "wc_summary": "68;30;50;182", "wc_strengths": "25;67;61;130", "wc_weaknesses": "265;142;66;137", "wc_questions": "154;105;80;46", "wc_limitations": "20;34;52;7", "wc_review": "532;378;309;502", "wc_reply_reviewers": "1061;0;59;0", "wc_reply_authors": "1603;61;304;61", "reply_reviewers": "4;0;1;0", "reply_authors": "4;2;2;2", "rating_avg": [ 4.5, 0.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 82.5, 58.997881317891405 ], "wc_strengths_avg": [ 70.75, 37.79136806203237 ], "wc_weaknesses_avg": [ 152.5, 71.56989590603021 ], "wc_questions_avg": [ 96.25, 39.37242055043098 ], "wc_limitations_avg": [ 28.25, 16.708904811506947 ], "wc_review_avg": [ 430.25, 90.73691365701173 ], "wc_reply_reviewers_avg": [ 280.0, 451.5534298397035 ], "wc_reply_authors_avg": [ 507.25, 640.362543798433 ], "reply_reviewers_avg": [ 1.25, 1.6393596310755 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.7071067811865475, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:TrJNSzRbP7MJ:scholar.google.com/&scioq=Night-to-Day+Translation+via+Illumination+Degradation+Disentanglement&hl=en&as_sdt=0,44", "gs_version_total": 2, "aff_unique_index": "0;0;1;2;0;3", "aff_unique_norm": "Northwest Polytechnical University;Shanghai AI Laboratory;Northwestern Polytechnical University;Shanghai AI Lab", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.nwpu.edu.cn;https://www.shanghai-ai-lab.com;https://www.nwpu.edu.cn;https://www.shanghaiailab.com", "aff_unique_abbr": "NWPU;SAIL;NWPU;SAIL", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Xi'an;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "The Value of Reward Lookahead in Reinforcement Learning", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94968", "id": "URyeU8mwz1", "proceeding": "", "pdf": "https://openreview.net/pdf?id=URyeU8mwz1", "openreview": "https://openreview.net/forum?id=URyeU8mwz1", "poster": "/media/PosterPDFs/NeurIPS%202024/94968.png?t=1732870307.715843", "project": "", "author_site": "Nadav Merlis, Dorian Baudry, Vianney Perchet", "tldr": "", "abstract": "In reinforcement learning (RL), agents sequentially interact with changing environments while aiming to maximize the obtained rewards. Usually, rewards are observed only _after_ acting, and so the goal is to maximize the _expected_ cumulative reward. Yet, in many practical settings, reward information is observed in advance -- prices are observed before performing transactions; nearby traffic information is partially known; and goals are oftentimes given to agents prior to the interaction. In this work, we aim to quantifiably analyze the value of such future reward information through the lens of _competitive analysis. In particular, we measure the ratio between the value of standard RL agents and that of agents with partial future-reward lookahead. We characterize the worst-case reward distribution and derive exact ratios for the worst-case reward expectations. Surprisingly, the resulting ratios relate to known quantities in offline RL and reward-free exploration. We further provide tight bounds for the ratio given the worst-case dynamics. Our results cover the full spectrum between observing the immediate rewards before acting to observing all the rewards before the interaction starts.", "keywords": "Reinforcement Learning;Planning;Reward Lookahead;Competitive Ratio", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Nadav Merlis;Dorian Baudry;Vianney Perchet", "authorids": "~Nadav_Merlis1;~Dorian_Baudry1;~Vianney_Perchet3", "gender": "M;M;M", "homepage": ";https://dbaudry.github.io/;", "dblp": "227/2875;277/6362;83/7398", "google_scholar": "https://scholar.google.co.il/citations?user=pX2zzp0AAAAJ;https://scholar.google.fr/citations?user=RRW-kfYAAAAJ;", "orcid": "0000-0002-9906-0577;;", "linkedin": ";;", "or_profile": "~Nadav_Merlis1;~Dorian_Baudry1;~Vianney_Perchet1", "aff": "Ecole Nationale de la Statistique et de l'Administration Economique;;", "aff_domain": "ensae.fr;;", "position": "Postdoc;;", "bibtex": "@inproceedings{\nmerlis2024the,\ntitle={The Value of Reward Lookahead in Reinforcement Learning},\nauthor={Nadav Merlis and Dorian Baudry and Vianney Perchet},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=URyeU8mwz1}\n}", "github": "", "reviewers": "ofoH;C3mL;mGpv", "pdf_size": 628252, "rating": "6;7;7", "confidence": "3;3;2", "soundness": "3;4;3", "novelty": "3;2;4", "presentation": "3;2;4", "wc_summary": "139;147;57", "wc_strengths": "154;58;75", "wc_weaknesses": "72;249;202", "wc_questions": "1;159;6", "wc_limitations": "1;1;40", "wc_review": "367;614;380", "wc_reply_reviewers": "0;134;12", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 114.33333333333333, 40.67213078045238 ], "wc_strengths_avg": [ 95.66666666666667, 41.82768886223043 ], "wc_weaknesses_avg": [ 174.33333333333334, 74.86135332881862 ], "wc_questions_avg": [ 55.333333333333336, 73.33181816616546 ], "wc_limitations_avg": [ 14.0, 18.384776310850235 ], "wc_review_avg": [ 453.6666666666667, 113.49694073214289 ], "wc_reply_reviewers_avg": [ 48.666666666666664, 60.53832578531462 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12819373141203925851&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "ensae.fr;;", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "Ecole Nationale de la Statistique et de l'Administration Economique", "aff_unique_dep": "", "aff_unique_url": "https://ensae.fr", "aff_unique_abbr": "ENSAE", "aff_country_unique_index": "0", "aff_country_unique": "France" }, { "title": "Scribbles for All: Benchmarking Scribble Supervised Segmentation Across Datasets", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97672", "id": "USUkwg5pW6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=USUkwg5pW6", "openreview": "https://openreview.net/forum?id=USUkwg5pW6", "poster": "", "project": "", "author_site": "Wolfgang Boettcher, Lukas Hoyer, Ozan Unal, Jan Eric Lenssen, Bernt Schiele", "tldr": "", "abstract": "In this work, we introduce *Scribbles for All*, a label and training data generation algorithm for semantic segmentation trained on scribble labels. Training or fine-tuning semantic segmentation models with weak supervision has become an important topic recently and was subject to significant advances in model quality. In this setting, scribbles are a promising label type to achieve high quality segmentation results while requiring a much lower annotation effort than usual pixel-wise dense semantic segmentation annotations. The main limitation of scribbles as source for weak supervision is the lack of challenging datasets for scribble segmentation, which hinders the development of novel methods and conclusive evaluations. To overcome this limitation, *Scribbles for All* provides scribble labels for several popular segmentation datasets and provides an algorithm to automatically generate scribble labels for any dataset with dense annotations, paving the way for new insights and model advancements in the field of weakly supervised segmentation. In addition to providing datasets and algorithm, we evaluate state-of-the-art segmentation models on our datasets and show that models trained with our synthetic labels perform competitively with respect to models trained on manual labels. Thus, our datasets enable state-of-the-art research into methods for scribble-labeled semantic segmentation. The datasets, scribble generation algorithm, and baselines are publicly available at https://github.com/wbkit/Scribbles4All.", "keywords": "semantic;segmentation;scribble;pascal;ade20k;cityscapes;kitti360;sparsely;supervised;scribblesup;scribblekitti", "primary_area": "", "supplementary_material": "", "author": "Wolfgang Boettcher;Lukas Hoyer;Ozan Unal;Jan Eric Lenssen;Bernt Schiele", "authorids": "~Wolfgang_Boettcher1;~Lukas_Hoyer1;~Ozan_Unal1;~Jan_Eric_Lenssen1;~Bernt_Schiele1", "gender": "M;;;M;M", "homepage": ";;;https://janericlenssen.github.io/;http://www.mpi-inf.mpg.de/~schiele", "dblp": ";;250/4116;195/9868;s/BerntSchiele", "google_scholar": "rLy-74oAAAAJ;;qKWmINYAAAAJ;https://scholar.google.de/citations?user=enXCzCgAAAAJ;https://scholar.google.de/citations?user=z76PBfYAAAAJ", "orcid": "0000-0003-1615-3485;;0000-0002-1121-3883;0000-0003-4093-9840;0000-0001-9683-5237", "linkedin": "wolfgang-boettcher/;;;jan-eric-lenssen-08700b190/;", "or_profile": "~Wolfgang_Boettcher1;~Lukas_Hoyer1;~Ozan_Unal1;~Jan_Eric_Lenssen1;~Bernt_Schiele1", "aff": "Saarland Informatics Campus, Max-Planck Institute;;ETHZ - ETH Zurich;Kumo;Max Planck Institute for Informatics, Saarland Informatics Campus", "aff_domain": "mpi-inf.mpg.de;;ethz.ch;kumo.ai;mpi-inf.mpg.de", "position": "PhD student;;PhD student;Researcher;Full Professor", "bibtex": "@inproceedings{\nboettcher2024scribbles,\ntitle={Scribbles for All: Benchmarking Scribble Supervised Segmentation Across Datasets},\nauthor={Wolfgang Boettcher and Lukas Hoyer and Ozan Unal and Jan Eric Lenssen and Bernt Schiele},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=USUkwg5pW6}\n}", "github": "", "reviewers": "anV4;ggFR;caBU", "pdf_size": 3880621, "rating": "6;7;9", "confidence": "3;5;4", "wc_summary_and_contributions": "31;51;20", "wc_strengths": "37;90;53", "wc_improvement": "103;11;25", "wc_limitations": "15;2;7", "wc_correctness": "5;15;1", "wc_clarity": "8;2;1", "wc_relation_to_prior_work": "1;1;7", "wc_documentation": "9;2;1", "wc_additional_feedback": "1;1;1", "wc_review": "210;175;116", "wc_reply_reviewers": "5;27;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;0", "reply_authors": "1;1;1", "rating_avg": [ 7.333333333333333, 1.247219128924647 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "wc_summary_and_contributions_avg": [ 34.0, 12.832251036613439 ], "wc_strengths_avg": [ 60.0, 22.19609575278199 ], "wc_improvement_avg": [ 46.333333333333336, 40.47495796442811 ], "wc_limitations_avg": [ 8.0, 5.354126134736337 ], "wc_correctness_avg": [ 7.0, 5.887840577551898 ], "wc_clarity_avg": [ 3.6666666666666665, 3.091206165165235 ], "wc_relation_to_prior_work_avg": [ 3.0, 2.8284271247461903 ], "wc_documentation_avg": [ 4.0, 3.559026084010437 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 167.0, 38.79003308411411 ], "wc_reply_reviewers_avg": [ 10.666666666666666, 11.728408057172787 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.3273268353539886, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11913937674966704354&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "mpi-inf.mpg.de;;ethz.ch;kumo.ai;mpi-inf.mpg.de", "author_num": 5, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Max-Planck Institute;ETH Zurich;Kumo;Max Planck Institute for Informatics", "aff_unique_dep": "Informatics;;;", "aff_unique_url": "https://www.mpi-sws.org;https://www.ethz.ch;;https://mpi-inf.mpg.de", "aff_unique_abbr": "MPI-SWS;ETHZ;;MPII", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Saarland;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Germany;Switzerland;" }, { "title": "Gradual Domain Adaptation via Manifold-Constrained Distributionally Robust Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94967", "id": "UTNZKl5BUc", "proceeding": "", "pdf": "https://openreview.net/pdf?id=UTNZKl5BUc", "openreview": "https://openreview.net/forum?id=UTNZKl5BUc", "poster": "/media/PosterPDFs/NeurIPS%202024/94967.png?t=1731706939.787734", "project": "", "author_site": "seyed amir saberi, Amir Najafi, Amin Behjati, Ala Emrani, Yasaman Zolfimoselo, Shadrooy, Abolfazl Motahari, Babak Khalaj", "tldr": "", "abstract": "The aim of this paper is to address the challenge of gradual domain adaptation within a class of manifold-constrained data distributions. In particular, we consider a sequence of $T\\ge2$ data distributions $P_1,\\ldots,P_T$ undergoing a gradual shift, where each pair of consecutive measures $P_i,P_{i+1}$ are close to each other in Wasserstein distance. We have a supervised dataset of size $n$ sampled from $P_0$, while for the subsequent distributions in the sequence, only unlabeled i.i.d. samples are available. Moreover, we assume that all distributions exhibit a known favorable attribute, such as (but not limited to) having intra-class soft/hard margins. In this context, we propose a methodology rooted in Distributionally Robust Optimization (DRO) with an adaptive Wasserstein radius. We theoretically show that this method guarantees the classification error across all $P_i$s can be suitably bounded. Our bounds rely on a newly introduced {\\it {compatibility}} measure, which fully characterizes the error propagation dynamics along the sequence. Specifically, for inadequately constrained distributions, the error can exponentially escalate as we progress through the gradual shifts. Conversely, for appropriately constrained distributions, the error can be demonstrated to be linear or even entirely eradicated. We have substantiated our theoretical findings through several experimental results.", "keywords": "Gradual Domain Adaptation;Distributionally Robust Optimization;Generalization Bound;Error Propagation Characterization", "primary_area": "learning_theory", "supplementary_material": "/attachment/9386377ca7f1882b2b2bc934d020d543af9d46c6.zip", "author": "seyed amir hossein saberi;Amir Najafi;Amin Behjati;Ala Emrani;Yasaman Zolfimoselo;Mahdi Shadrooy;Abolfazl Motahari;Babak Khalaj", "authorids": "~seyed_amir_hossein_saberi1;~Amir_Najafi1;~Amin_Behjati1;~Ala_Emrani2;~Yasaman_Zolfimoselo2;~Mahdi_Shadrooy1;~Abolfazl_Motahari1;~Babak_Khalaj1", "gender": "M;M;M;M;F;M;M;M", "homepage": ";;https://www.researchgate.net/profile/Amin-Behjati;;;;http://sharif.edu/~motahari/index.html;http://ee.sharif.edu/~khalaj/", "dblp": ";00/10958;;391/6392;;;35/1085;21/6529.html", "google_scholar": "OyvmpN4AAAAJ;N_zYPC0AAAAJ;;yma6NsEAAAAJ;;;https://scholar.google.com.tw/citations?user=rJ-biB0AAAAJ;8HsoXAUAAAAJ", "orcid": ";0000-0002-6680-0110;;;;;;0000-0002-9289-2338", "linkedin": "seyed-amir-hossein-saberi-711a7aa8;amir-najafi-6a202346/;;ala-emrani-6698b13a/;yasaman-zolfi-moselo/;mahdi-shadrooy-aaa0a830a/;;", "or_profile": "~seyed_amir_hossein_saberi1;~Amir_Najafi1;~Amin_Behjati1;~Ala_Emrani2;~Yasaman_Zolfimoselo2;~Mahdi_Shadrooy1;~Abolfazl_Motahari1;~Babak_Khalaj1", "aff": "Sharif University of Technology;Sharif University of Technology;Sharif University of Technology;Sharif University of Technology;Sharif University of Technology;Sharif University of Technology;Sharif University of Technology;Sharif University of Technology", "aff_domain": "sharif.edu;sharif.edu;sharif.edu;sharif.edu;sharif.edu;sharif.edu;sharif.edu;sharif.edu", "position": "PhD student;Assistant Professor;PhD student;PhD student;Undergrad student;MS student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nsaberi2024gradual,\ntitle={Gradual Domain Adaptation via Manifold-Constrained Distributionally Robust Optimization},\nauthor={seyed amir hossein saberi and Amir Najafi and Amin Behjati and Ala Emrani and Yasaman Zolfimoselo and Mahdi Shadrooy and Abolfazl Motahari and Babak Khalaj},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=UTNZKl5BUc}\n}", "github": "", "reviewers": "1exJ;C19w;BZqU;FbyN", "pdf_size": 689165, "rating": "6;6;7;7", "confidence": "3;2;4;3", "soundness": "3;3;3;4", "novelty": "2;2;4;4", "presentation": "3;3;3;4", "wc_summary": "69;96;93;77", "wc_strengths": "106;28;83;47", "wc_weaknesses": "280;57;114;88", "wc_questions": "37;274;39;28", "wc_limitations": "2;15;11;28", "wc_review": "494;470;340;268", "wc_reply_reviewers": "35;81;100;22", "wc_reply_authors": "0;39;62;20", "reply_reviewers": "1;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 1.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 83.75, 11.166355717063647 ], "wc_strengths_avg": [ 66.0, 30.389142798045487 ], "wc_weaknesses_avg": [ 134.75, 86.25362311230758 ], "wc_questions_avg": [ 94.5, 103.71716347837517 ], "wc_limitations_avg": [ 14.0, 9.354143466934854 ], "wc_review_avg": [ 393.0, 92.9569792968769 ], "wc_reply_reviewers_avg": [ 59.5, 32.05074102107469 ], "wc_reply_authors_avg": [ 30.25, 22.93877721239735 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.7071067811865475, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:qvG1ZbpGdf4J:scholar.google.com/&scioq=Gradual+Domain+Adaptation+via+Manifold-Constrained+Distributionally+Robust+Optimization&hl=en&as_sdt=0,5", "gs_version_total": 5, "email": "sharif.edu;sharif.edu;sharif.edu;sharif.edu;sharif.edu;sharif.edu;sharif.edu;sharif.edu", "author_num": 8, "aff_unique_index": "0;0;0;0;0;0;0;0", "aff_unique_norm": "Sharif University of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.sharif.edu", "aff_unique_abbr": "SUT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "Iran" }, { "title": "Geometry Cloak: Preventing TGS-based 3D Reconstruction from Copyrighted Images", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94966", "id": "UTrIEHobXI", "proceeding": "", "pdf": "https://openreview.net/pdf?id=UTrIEHobXI", "openreview": "https://openreview.net/forum?id=UTrIEHobXI", "poster": "", "project": "", "author_site": "Qi Song, Ziyuan Luo, Ka Chun Cheung, Simon See, Renjie Wan", "tldr": "", "abstract": "Single-view 3D reconstruction methods like Triplane Gaussian Splatting (TGS) have enabled high-quality 3D model generation from just a single image input within seconds. However, this capability raises concerns about potential misuse, where malicious users could exploit TGS to create unauthorized 3D models from copyrighted images. To prevent such infringement, we propose a novel image protection approach that embeds invisible geometry perturbations, termed ``geometry cloaks'', into images before supplying them to TGS. These carefully crafted perturbations encode a customized message that is revealed when TGS attempts 3D reconstructions of the cloaked image. Unlike conventional adversarial attacks that simply degrade output quality, our method forces TGS to fail the 3D reconstruction in a specific way - by generating an identifiable customized pattern that acts as a watermark. This watermark allows copyright holders to assert ownership over any attempted 3D reconstructions made from their protected images. Extensive experiments have verified the effectiveness of our geometry cloak.", "keywords": "Copyright protection;Image cloaking;single-view image to 3D model", "primary_area": "privacy", "supplementary_material": "/attachment/7faeb6ccc9a4c9ad49c8676eb7d31031bf29047f.zip", "author": "Qi Song;Ziyuan Luo;Ka Chun Cheung;Simon See;Renjie Wan", "authorids": "~Qi_Song5;~Ziyuan_Luo1;~Ka_Chun_Cheung1;~Simon_See1;~Renjie_Wan1", "gender": "Not Specified;;M;M;M", "homepage": "https://qsong2001.github.io/;;;;https://wanrenjie.github.io/", "dblp": "82/5132-3.html;;165/1089;62/6547;191/2619", "google_scholar": "VGUVxSsAAAAJ;;NvbCXToAAAAJ;ebIHTEoAAAAJ;https://scholar.google.com.sg/citations?user=S8_ES4MAAAAJ", "orcid": "0009-0006-7896-1567;;;0000-0002-4958-9237;0000-0002-0161-0367", "linkedin": ";;;simonsee/;", "or_profile": "~Qi_Song5;~Ziyuan_Luo1;~Ka_Chun_Cheung1;~Simon_See1;~Renjie_Wan1", "aff": "Hong Kong Baptist University;;NVIDIA;NVIDIA;Hong Kong Baptist University", "aff_domain": "hkbu.edu.hk;;nvidia.com;nvidia.com;hkbu.edu.hk", "position": "PhD student;;Senior Manager, Solution Architect;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nsong2024geometry,\ntitle={Geometry Cloak: Preventing {TGS}-based 3D Reconstruction from Copyrighted Images},\nauthor={Qi Song and Ziyuan Luo and Ka Chun Cheung and Simon See and Renjie Wan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=UTrIEHobXI}\n}", "github": "", "reviewers": "u5Eb;kd52;rPf4;inRn;ykxU", "pdf_size": 27241786, "rating": "3;6;7;7;8", "confidence": "4;4;4;4;5", "soundness": "3;3;3;3;4", "novelty": "1;2;4;3;4", "presentation": "2;3;4;3;4", "wc_summary": "33;173;98;50;59", "wc_strengths": "16;98;198;22;82", "wc_weaknesses": "164;134;128;66;103", "wc_questions": "127;94;4;2;5", "wc_limitations": "23;4;7;4;7", "wc_review": "363;503;435;144;256", "wc_reply_reviewers": "454;91;42;46;63", "wc_reply_authors": "446;126;29;34;31", "reply_reviewers": "1;1;1;1;1", "reply_authors": "3;2;2;2;2", "rating_avg": [ 6.2, 1.7204650534085253 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 1.16619037896906 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 82.6, 49.97839533238337 ], "wc_strengths_avg": [ 83.2, 65.81914615064525 ], "wc_weaknesses_avg": [ 119.0, 32.851179583083464 ], "wc_questions_avg": [ 46.4, 53.376399279082136 ], "wc_limitations_avg": [ 9.0, 7.127411872482185 ], "wc_review_avg": [ 340.2, 127.7456848586284 ], "wc_reply_reviewers_avg": [ 139.2, 158.34317162416573 ], "wc_reply_authors_avg": [ 133.2, 160.64793805088195 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5231143743471867, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12198742115487521045&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "hkbu.edu.hk;;nvidia.com;nvidia.com;hkbu.edu.hk", "author_num": 5, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Hong Kong Baptist University;NVIDIA", "aff_unique_dep": ";NVIDIA Corporation", "aff_unique_url": "https://www.hkbu.edu.hk;https://www.nvidia.com", "aff_unique_abbr": "HKBU;NVIDIA", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "China;United States" }, { "id": "UTwuDTpdNO", "title": "Meta Stackelberg Game: Robust Federated Learning against Adaptive and Mixed Poisoning Attacks", "track": "main", "status": "Reject", "tldr": "", "abstract": "Recent research has uncovered that federated learning (FL) systems are vulnerable to various security threats. Although various defense mechanisms have been proposed, they are typically non-adaptive and tailored to specific types of attacks, leaving them insufficient in the face of adaptive or mixed attacks. In this work, we formulate adversarial federated learning as a Bayesian Stackelberg Markov game (BSMG) to tackle poisoning attacks of unknown/uncertain types. We further develop an efficient meta-learning approach to solve the game, which provides a robust and adaptive FL defense. Theoretically, we show that our algorithm provably converges to the first-order $\\varepsilon$-equilibrium point in $O(\\varepsilon^{-2})$ gradient iterations with $O(\\varepsilon^{-4})$ samples per iteration. Empirical results show that our meta-Stackelberg framework obtains superb performance against strong model poisoning and backdoor attacks with unknown/uncertain types.", "keywords": "Federated Learning;Game Theory;Reinforcement Learning;Robust Machine Learning", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Henger Li;Tao Li;Yunian Pan;Tianyi Xu;Quanyan Zhu;Zizhan Zheng", "authorids": "~Henger_Li1;~Tao_Li6;~Yunian_Pan1;~Tianyi_Xu1;~Quanyan_Zhu1;~Zizhan_Zheng1", "gender": "M;M;M;M;M;M", "homepage": ";https://taoli-nyu.github.io/;https://unionpan.github.io/;https://xutianyi01.github.io/Tianyi-Xu/;https://engineering.nyu.edu/faculty/quanyan-zhu;https://www.cs.tulane.edu/~zzheng3/", "dblp": "241/9384;;;;;23/286", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;;Lt08jLgAAAAJ;Qt2WDh0AAAAJ;B1v2AUYAAAAJ", "orcid": ";0000-0001-8966-7239;;;;", "linkedin": "hengerli30/;tao-li-nyu/;;tianyixu516527139/;;", "or_profile": "~Henger_Li1;~Tao_Li6;~Yunian_Pan1;~Tianyi_Xu1;~Quanyan_Zhu1;~Zizhan_Zheng1", "aff": "Tulane University;New York University;New York University;Tulane University;New York University;Tulane University", "aff_domain": "tulane.edu;nyu.edu;nyu.edu;tulane.edu;nyu.edu;tulane.edu", "position": "PhD student;PhD student;PhD student;PhD student;Associate Professor;Associate Professor", "bibtex": "@misc{\nanonymous2024meta,\ntitle={Meta Stackelberg Game: Robust Federated Learning against Adaptive and Mixed Poisoning Attacks},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=UTwuDTpdNO}\n}", "github": "", "project": "", "reviewers": "Ui9w;a9ia;s9YV;HbAX;4AkQ", "site": "https://openreview.net/forum?id=UTwuDTpdNO", "pdf_size": 4718247, "rating": "4;4;5;5;7", "confidence": "4;1;3;3;3", "soundness": "2;2;3;3;3", "novelty": "2;2;3;3;3", "presentation": "2;2;3;2;4", "wc_summary": "61;159;109;81;46", "wc_strengths": "7;8;89;61;29", "wc_weaknesses": "338;542;127;170;44", "wc_questions": "4;3;87;24;48", "wc_limitations": "1;3;1;14;24", "wc_review": "411;715;413;350;191", "wc_reply_reviewers": "68;181;0;96;8", "wc_reply_authors": "300;232;0;511;174", "reply_reviewers": "1;1;0;2;1", "reply_authors": "2;2;1;2;2", "rating_avg": [ 5.0, 1.0954451150103321 ], "confidence_avg": [ 2.8, 0.9797958971132712 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.8 ], "wc_summary_avg": [ 91.2, 39.9319421015307 ], "wc_strengths_avg": [ 38.8, 31.83959798741184 ], "wc_weaknesses_avg": [ 244.2, 177.09025947239448 ], "wc_questions_avg": [ 33.2, 31.504920250652916 ], "wc_limitations_avg": [ 8.6, 9.09065454189081 ], "wc_review_avg": [ 416.0, 169.95058105225766 ], "wc_reply_reviewers_avg": [ 70.6, 65.94421885199641 ], "wc_reply_authors_avg": [ 243.4, 166.73523922674536 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.1863389981249825, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12935603173330031071&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;1;0;1;0", "aff_unique_norm": "Tulane University;New York University", "aff_unique_dep": ";", "aff_unique_url": "https://www.tulane.edu;https://www.nyu.edu", "aff_unique_abbr": "Tulane;NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Gradient-Free Methods for Nonconvex Nonsmooth Stochastic Compositional Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94965", "id": "UVAq3uJ0gc", "proceeding": "", "pdf": "https://openreview.net/pdf?id=UVAq3uJ0gc", "openreview": "https://openreview.net/forum?id=UVAq3uJ0gc", "poster": "", "project": "", "author_site": "Zhuanghua Liu, Luo Luo, Bryan Kian Hsiang Low", "tldr": "", "abstract": "The stochastic compositional optimization (SCO) is popular in many real-world applications, including risk management, reinforcement learning, and meta-learning. However, most of the previous methods for SCO require the smoothness assumption on both the outer and inner functions, which limits their applications to a wider range of problems. In this paper, we study the SCO problem in that both the outer and inner functions are Lipschitz continuous but possibly nonconvex and nonsmooth. In particular, we propose gradient-free stochastic methods for finding the $(\\delta, \\epsilon)$-Goldstein stationary points of such problems with non-asymptotic convergence rates. Our results also lead to an improved convergence rate for the convex nonsmooth SCO problem. Furthermore, we conduct numerical experiments to demonstrate the effectiveness of the proposed methods.", "keywords": "compositional optimization; nonsmooth analysis", "primary_area": "optimization", "supplementary_material": "", "author": "Zhuanghua Liu;Luo Luo;Bryan Kian Hsiang Low", "authorids": "~Zhuanghua_Liu2;~Luo_Luo1;~Bryan_Kian_Hsiang_Low1", "gender": "M;M;M", "homepage": ";https://luoluo-sds.github.io/;http://www.comp.nus.edu.sg/~lowkh", "dblp": "195/8237.html;https://dblp.org/pers/hd/l/Luo:Luo;97/4877", "google_scholar": ";NggI9EsAAAAJ;https://scholar.google.com.tw/citations?user=2P-Q09UAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Zhuanghua_Liu2;~Luo_Luo1;~Bryan_Kian_Hsiang_Low1", "aff": "National University of Singapore;Fudan University;National University of Singapore", "aff_domain": "nus.edu.sg;fudan.edu.cn;nus.edu.sg", "position": "PhD student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nliu2024gradientfree,\ntitle={Gradient-Free Methods for Nonconvex Nonsmooth Stochastic Compositional Optimization},\nauthor={Zhuanghua Liu and Luo Luo and Bryan Kian Hsiang Low},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=UVAq3uJ0gc}\n}", "github": "", "reviewers": "nNfG;ZyfL;VEqR;iZH7", "pdf_size": 574505, "rating": "5;5;6;6", "confidence": "4;3;3;3", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "4;3;3;3", "wc_summary": "55;42;129;69", "wc_strengths": "30;34;18;46", "wc_weaknesses": "88;131;49;212", "wc_questions": "221;5;184;2", "wc_limitations": "2;6;58;2", "wc_review": "396;218;438;331", "wc_reply_reviewers": "164;12;7;0", "wc_reply_authors": "729;16;0;0", "reply_reviewers": "2;1;1;0", "reply_authors": "3;2;1;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 73.75, 33.29695932063467 ], "wc_strengths_avg": [ 32.0, 10.0 ], "wc_weaknesses_avg": [ 120.0, 60.518592184551025 ], "wc_questions_avg": [ 103.0, 100.3618453397505 ], "wc_limitations_avg": [ 17.0, 23.727621035409346 ], "wc_review_avg": [ 345.75, 83.02522207136816 ], "wc_reply_reviewers_avg": [ 45.75, 68.40458683450986 ], "wc_reply_authors_avg": [ 186.25, 313.4249312036297 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:hcg7KivLh5wJ:scholar.google.com/&scioq=Gradient-Free+Methods+for+Nonconvex+Nonsmooth+Stochastic+Compositional+Optimization&hl=en&as_sdt=0,33", "gs_version_total": 0, "email": "nus.edu.sg;fudan.edu.cn;nus.edu.sg", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "National University of Singapore;Fudan University", "aff_unique_dep": ";", "aff_unique_url": "https://www.nus.edu.sg;https://www.fudan.edu.cn", "aff_unique_abbr": "NUS;Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Singapore;China" }, { "title": "Toward a Well-Calibrated Discrimination via Survival Outcome-Aware Contrastive Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94964", "id": "UVjuYBSbCN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=UVjuYBSbCN", "openreview": "https://openreview.net/forum?id=UVjuYBSbCN", "poster": "", "project": "", "author_site": "Dongjoon Lee, Hyeryn Park, Changhee Lee", "tldr": "", "abstract": "Previous deep learning approaches for survival analysis have primarily relied on ranking losses to improve discrimination performance, which often comes at the expense of calibration performance. \nTo address such an issue, we propose a novel contrastive learning approach specifically designed to enhance discrimination without sacrificing calibration. \nOur method employs weighted sampling within a contrastive learning framework, assigning lower penalties to samples with similar survival outcomes. This aligns well with the assumption that patients with similar event times share similar clinical statuses. Consequently, when augmented with the commonly used negative log-likelihood loss, our approach significantly improves discrimination performance without directly manipulating the model outputs, thereby achieving better calibration.\nExperiments on multiple real-world clinical datasets demonstrate that our method outperforms state-of-the-art deep survival models in both discrimination and calibration. Through comprehensive ablation studies, we further validate the effectiveness of our approach through quantitative and qualitative analyses.", "keywords": "Survival Analysis;Contrastive Learning;Deep Learning;Healthcare", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "/attachment/d7f35ab0638888c3ea56be4be45def207cf37497.zip", "author": "Dongjoon Lee;Hyeryn Park;Changhee Lee", "authorids": "~Dongjoon_Lee3;~Hyeryn_Park1;~Changhee_Lee1", "gender": "F;;M", "homepage": "https://www.notion.so/hyeryn/2819a6be15e6476dab2cbf226484ee00;;https://www.notion.so/dong-joon/Home-e8dbc2ca93f04cf0a30e5f511040de34?pvs=4", "dblp": ";;", "google_scholar": ";https://scholar.google.com/citations?hl=en;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Hyeryn_Park1;~Changhee_Lee1;~DongJoon_Lee2", "aff": "Chung-Ang University;ChungAng University;Chung-Ang University", "aff_domain": "cau.ac.kr;cau.ac.kr;cau.ac.kr", "position": "MS student;Assistant Professor;MS student", "bibtex": "@inproceedings{\nlee2024toward,\ntitle={Toward a Well-Calibrated Discrimination via Survival Outcome-Aware Contrastive Learning},\nauthor={Dongjoon Lee and Hyeryn Park and Changhee Lee},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=UVjuYBSbCN}\n}", "github": "", "reviewers": "Lfcy;VjbS;1ZZS;RNpF", "pdf_size": 7306084, "rating": "4;6;6;8", "confidence": "4;4;4;4", "soundness": "2;2;3;4", "novelty": "2;3;3;4", "presentation": "1;3;3;4", "wc_summary": "93;211;79;86", "wc_strengths": "41;125;88;42", "wc_weaknesses": "297;341;73;12", "wc_questions": "29;218;99;30", "wc_limitations": "20;358;1;7", "wc_review": "480;1253;340;177", "wc_reply_reviewers": "155;344;44;9", "wc_reply_authors": "584;981;0;0", "reply_reviewers": "1;3;1;1", "reply_authors": "3;5;1;1", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 117.25, 54.35243784780955 ], "wc_strengths_avg": [ 74.0, 35.035696082709705 ], "wc_weaknesses_avg": [ 180.75, 140.7841876774519 ], "wc_questions_avg": [ 94.0, 77.00973964376195 ], "wc_limitations_avg": [ 96.5, 151.13321937946006 ], "wc_review_avg": [ 562.5, 412.8295653172142 ], "wc_reply_reviewers_avg": [ 138.0, 130.57756315692217 ], "wc_reply_authors_avg": [ 391.25, 415.66535518371023 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.5, 1.6583123951777 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:diSaOyO2-P4J:scholar.google.com/&scioq=Toward+a+Well-Calibrated+Discrimination+via+Survival+Outcome-Aware+Contrastive+Learning&hl=en&as_sdt=0,5", "gs_version_total": 5, "email": "cau.ac.kr;cau.ac.kr;cau.ac.kr", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Chung-Ang University;Chungang University", "aff_unique_dep": ";", "aff_unique_url": "http://www.cau.ac.kr;http://www.cau.ac.kr", "aff_unique_abbr": "CAU;CAU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "Diffusion-based Reinforcement Learning via Q-weighted Variational Policy Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94963", "id": "UWUUVKtKeu", "proceeding": "", "pdf": "https://openreview.net/pdf?id=UWUUVKtKeu", "openreview": "https://openreview.net/forum?id=UWUUVKtKeu", "poster": "/media/PosterPDFs/NeurIPS%202024/94963.png?t=1731247937.3156118", "project": "", "author_site": "Shutong Ding, Ke Hu, Zhenhao Zhang, Kan Ren, Weinan Zhang, Jingyi Yu, Jingya Wang, Ye Shi", "tldr": "", "abstract": "Diffusion models have garnered widespread attention in Reinforcement Learning (RL) for their powerful expressiveness and multimodality. It has been verified that utilizing diffusion policies can significantly improve the performance of RL algorithms in continuous control tasks by overcoming the limitations of unimodal policies, such as Gaussian policies. Furthermore, the multimodality of diffusion policies also shows the potential of providing the agent with enhanced exploration capabilities. However, existing works mainly focus on applying diffusion policies in offline RL, while their incorporation into online RL has been less investigated. The diffusion model's training objective, known as the variational lower bound, cannot be applied directly in online RL due to the unavailability of 'good' samples (actions). To harmonize the diffusion model with online RL, we propose a novel model-free diffusion-based online RL algorithm named Q-weighted Variational Policy Optimization (QVPO). Specifically, we introduce the Q-weighted variational loss and its approximate implementation in practice. Notably, this loss is shown to be a tight lower bound of the policy objective. To further enhance the exploration capability of the diffusion policy, we design a special entropy regularization term. Unlike Gaussian policies, the log-likelihood in diffusion policies is inaccessible; thus this entropy term is nontrivial. Moreover, to reduce the large variance of diffusion policies, we also develop an efficient behavior policy through action selection. This can further improve its sample efficiency during online interaction. Consequently, the QVPO algorithm leverages the exploration capabilities and multimodality of diffusion policies, preventing the RL agent from converging to a sub-optimal policy. To verify the effectiveness of QVPO, we conduct comprehensive experiments on MuJoCo continuous control benchmarks. The final results demonstrate that QVPO achieves state-of-the-art performance in terms of both cumulative reward and sample efficiency.", "keywords": "Diffusion Model;Reinforcement Learning;Q-weighted Variational Policy Optimization", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Shutong Ding;Ke Hu;Zhenhao Zhang;Kan Ren;Weinan Zhang;Jingyi Yu;Jingya Wang;Ye Shi", "authorids": "~Shutong_Ding1;~Ke_Hu9;~Zhenhao_Zhang3;~Kan_Ren1;~Weinan_Zhang1;~Jingyi_Yu5;~Jingya_Wang3;~Ye_Shi1", "gender": "M;M;;M;M;M;F;M", "homepage": "https://dingsht.tech/;;;https://saying.ren;http://wnzhang.net;;https://faculty.sist.shanghaitech.edu.cn/faculty/wangjingya/;http://faculty.sist.shanghaitech.edu.cn/faculty/shiye", "dblp": ";;;28/7458;28/10261-1;;;34/11191-1", "google_scholar": "https://scholar.google.com.hk/citations?user=qJyqm40AAAAJ;;;USnQVWgAAAAJ;Qzss0GEAAAAJ;R9L_AfQAAAAJ;https://scholar.google.com.au/citations?user=vmvJV_IAAAAJ;gMqbZPUAAAAJ", "orcid": ";0009-0000-9890-075X;;;0000-0002-0127-2425;;;", "linkedin": ";;;;;;;", "or_profile": "~Shutong_Ding1;~Ke_Hu9;~Zhenhao_Zhang3;~Kan_Ren1;~Weinan_Zhang1;~Jingyi_Yu5;~Jingya_Wang3;~Ye_Shi1", "aff": "ShanghaiTech University;ShanghaiTech University;;ShanghaiTech University;Shanghai Jiaotong University;ShanghaiTech University;ShanghaiTech University;ShanghaiTech University", "aff_domain": "shanghaitech.edu.cn;shanghaitech.edu.cn;;shanghaitech.edu.cn;sjtu.edu.cn;shanghaitech.edu.cn;shanghaitech.edu.cn;shanghaitech.edu.cn", "position": "MS student;MS student;;Assistant Professor;Associate Professor;Full Professor;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nding2024diffusionbased,\ntitle={Diffusion-based Reinforcement Learning via Q-weighted Variational Policy Optimization},\nauthor={Shutong Ding and Ke Hu and Zhenhao Zhang and Kan Ren and Weinan Zhang and Jingyi Yu and Jingya Wang and Ye Shi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=UWUUVKtKeu}\n}", "github": "", "reviewers": "uSjP;Qp9e;nUs6;Kypj", "pdf_size": 1268327, "rating": "5;5;5;6", "confidence": "4;4;4;5", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "4;2;3;3", "wc_summary": "60;71;31;65", "wc_strengths": "38;64;42;75", "wc_weaknesses": "98;158;42;363", "wc_questions": "154;133;133;472", "wc_limitations": "1;10;22;41", "wc_review": "351;436;270;1016", "wc_reply_reviewers": "230;83;166;427", "wc_reply_authors": "730;182;431;749", "reply_reviewers": "2;2;1;2", "reply_authors": "3;2;2;3", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 56.75, 15.368392889303683 ], "wc_strengths_avg": [ 54.75, 15.31951369985353 ], "wc_weaknesses_avg": [ 165.25, 121.3164766220978 ], "wc_questions_avg": [ 223.0, 144.01562415238146 ], "wc_limitations_avg": [ 18.5, 14.974979131871937 ], "wc_review_avg": [ 518.25, 293.309030716751 ], "wc_reply_reviewers_avg": [ 226.5, 126.949793225511 ], "wc_reply_authors_avg": [ 523.0, 233.81082096430012 ], "reply_reviewers_avg": [ 1.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4263011024560801075&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "shanghaitech.edu.cn;shanghaitech.edu.cn;;shanghaitech.edu.cn;sjtu.edu.cn;shanghaitech.edu.cn;shanghaitech.edu.cn;shanghaitech.edu.cn", "author_num": 8, "aff_unique_index": "0;0;0;1;0;0;0", "aff_unique_norm": "ShanghaiTech University;Shanghai Jiao Tong University", "aff_unique_dep": ";", "aff_unique_url": "https://www.shanghaitech.edu.cn;https://www.sjtu.edu.cn", "aff_unique_abbr": "ShanghaiTech;SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Coupled Mamba: Enhanced Multimodal Fusion with Coupled State Space Model", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94962", "id": "UXEo3uNNIX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=UXEo3uNNIX", "openreview": "https://openreview.net/forum?id=UXEo3uNNIX", "poster": "", "project": "", "author_site": "Wenbing Li, Hang Zhou, Junqing Yu, Zikai Song, Wei Yang", "tldr": "", "abstract": "The essence of multi-modal fusion lies in exploiting the complementary information inherent in diverse modalities.However, most prevalent fusion methods rely on traditional neural architectures and are inadequately equipped to capture the dynamics of interactions across modalities, particularly in presence of complex intra- and inter-modality correlations.Recent advancements in State Space Models (SSMs), notably exemplified by the Mamba model, have emerged as promising contenders. Particularly, its state evolving process implies stronger modality fusion paradigm, making multi-modal fusion on SSMs an appealing direction. However, fusing multiple modalities is challenging for SSMs due to its hardware-aware parallelism designs. To this end, this paper proposes the Coupled SSM model, for coupling state chains of multiple modalities while maintaining independence of intra-modality state processes. Specifically, in our coupled scheme, we devise an inter-modal hidden states transition scheme, in which the current state is dependent on the states of its own chain and that of the neighbouring chains at the previous time-step. To fully comply with the hardware-aware parallelism, we obtain the global convolution kernel by deriving the state equation while introducing the historical state.Extensive experiments on CMU-MOSEI, CH-SIMS, CH-SIMSV2 through multi-domain input verify the effectiveness of our model compared to current state-of-the-art methods, improved F1-Score by 0.4%, 0.9%, and 2.3% on the three datasets respectively, 49% faster inference and 83.7% GPU memory save. The results demonstrate that Coupled Mamba model is capable of enhanced multi-modal fusion.", "keywords": "Mamba;multi-modal fusion;multi-modal sentiment analysis", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/1af5552642a4f1fdcea9c586b54a5450f6c0c432.zip", "author": "Wenbing Li;Hang Zhou;Junqing Yu;Zikai Song;Wei Yang", "authorids": "~Wenbing_Li1;~Hang_Zhou12;~Junqing_Yu1;~Zikai_Song1;~Wei_Yang3", "gender": "M;M;M;M;M", "homepage": ";http://faculty.hust.edu.cn/yujunqing/zh_CN/index.htm;https://weiyang-hust.github.io/;;https://skyesong38.github.io", "dblp": "303/3179;79/1515;03/1094-34;26/3707-10;222/5717.html", "google_scholar": "cb-NUt4AAAAJ;;rhsNpmkAAAAJ;mB7VifEAAAAJ;1qnuOZsAAAAJ", "orcid": ";;0000-0002-1189-1254;;", "linkedin": ";;wei-yang/;;", "or_profile": "~Wenbing_Li1;~Junqing_Yu1;~Wei_Yang3;~Henrry_Zhou1;~Song_Zikai1", "aff": "Huazhong University of Science and Technology;Huazhong University of Science and Technology;Huazhong University of Science and Technology;Huazhong University of Science and Technology;Huazhong University of Science and Technology", "aff_domain": "hust.edu.cn;hust.edu.cn;hust.edu.cn;hust.edu.cn;hust.edu.cn", "position": "PhD student;Full Professor;Associate Professor;PhD student;Postdoc", "bibtex": "@inproceedings{\nli2024coupled,\ntitle={Coupled Mamba: Enhanced Multimodal Fusion with Coupled State Space Model},\nauthor={Wenbing Li and Hang Zhou and Junqing Yu and Zikai Song and Wei Yang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=UXEo3uNNIX}\n}", "github": "", "reviewers": "nXDG;FSJZ;qhp2;pbQ3;JRDw", "pdf_size": 679162, "rating": "5;5;6;6;6", "confidence": "5;4;4;3;4", "soundness": "2;3;4;3;2", "novelty": "2;3;3;3;3", "presentation": "3;3;3;3;2", "wc_summary": "57;148;45;87;50", "wc_strengths": "56;35;28;49;38", "wc_weaknesses": "173;104;155;82;94", "wc_questions": "24;31;97;5;133", "wc_limitations": "1;6;6;12;12", "wc_review": "311;324;331;235;327", "wc_reply_reviewers": "21;9;14;82;326", "wc_reply_authors": "0;114;0;754;839", "reply_reviewers": "1;1;1;1;3", "reply_authors": "1;2;1;2;4", "rating_avg": [ 5.6, 0.48989794855663565 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 77.4, 38.19214578941592 ], "wc_strengths_avg": [ 41.2, 10.02796090937734 ], "wc_weaknesses_avg": [ 121.6, 35.76926054589332 ], "wc_questions_avg": [ 58.0, 48.662100242385755 ], "wc_limitations_avg": [ 7.4, 4.17612260356422 ], "wc_review_avg": [ 305.6, 35.9310450724718 ], "wc_reply_reviewers_avg": [ 90.4, 120.71221976254104 ], "wc_reply_authors_avg": [ 341.4, 374.8768331065551 ], "reply_reviewers_avg": [ 1.4, 0.8 ], "reply_authors_avg": [ 2.0, 1.0954451150103321 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.6454972243679027, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8497452541444071392&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "hust.edu.cn;hust.edu.cn;hust.edu.cn;hust.edu.cn;hust.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Huazhong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.hust.edu.cn", "aff_unique_abbr": "HUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Catastrophic Goodhart: regularizing RLHF with KL divergence does not mitigate heavy-tailed reward misspecification", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94961", "id": "UXuBzWoZGK", "proceeding": "", "pdf": "https://openreview.net/pdf?id=UXuBzWoZGK", "openreview": "https://openreview.net/forum?id=UXuBzWoZGK", "poster": "", "project": "", "author_site": "Thomas Kwa, Drake Thomas, Adri\u00e0 Garriga-Alonso", "tldr": "", "abstract": "When applying reinforcement learning from human feedback (RLHF), the reward is learned from data and, therefore, always has some error. It is common to mitigate this by regularizing the policy with KL divergence from a base model, with the hope that balancing reward with regularization will achieve desirable outcomes despite this reward misspecification. We show that when the reward function has light-tailed error, optimal policies under less restrictive KL penalties achieve arbitrarily high utility. However, if error is heavy-tailed, some policies obtain arbitrarily high reward despite achieving no more utility than the base model--a phenomenon we call catastrophic Goodhart. We adapt a discrete optimization method to measure the tails of reward models, finding that they are consistent with light-tailed error. However, the pervasiveness of heavy-tailed distributions in many real-world applications indicates that future sources of RL reward could have heavy-tailed error, increasing the likelihood of reward hacking even with KL regularization.", "keywords": "RLHF;extreme value theory;reward misspecification", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Thomas Kwa;Drake Thomas;Adri\u00e0 Garriga-Alonso", "authorids": "~Thomas_Kwa1;~Drake_Thomas1;~Adri\u00e0_Garriga-Alonso1", "gender": ";M;M", "homepage": ";https://drakethomas.net/;https://agarri.ga/", "dblp": ";;225/6564", "google_scholar": ";;OtnThiMAAAAJ", "orcid": ";;0000-0003-3409-5047", "linkedin": "tkwa/;;adrigarriga/", "or_profile": "~Thomas_Kwa1;~Drake_Thomas1;~Adria_Garriga-Alonso1", "aff": "Model Evaluation and Threat Research;Anthropic;FAR", "aff_domain": "metr.org;anthropic.com;far.ai", "position": "Researcher;Member of Technical Staff;Researcher", "bibtex": "@inproceedings{\nkwa2024catastrophic,\ntitle={Catastrophic Goodhart: regularizing {RLHF} with {KL} divergence does not mitigate heavy-tailed reward misspecification},\nauthor={Thomas Kwa and Drake Thomas and Adri{\\`a} Garriga-Alonso},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=UXuBzWoZGK}\n}", "github": "", "reviewers": "znAw;SFxG;ngHC;G3eM", "pdf_size": 1252857, "rating": "6;6;6;7", "confidence": "3;4;4;3", "soundness": "3;3;2;3", "novelty": "2;3;2;3", "presentation": "3;3;3;3", "wc_summary": "100;91;53;115", "wc_strengths": "16;74;41;41", "wc_weaknesses": "71;93;182;72", "wc_questions": "74;59;5;251", "wc_limitations": "1;1;3;1", "wc_review": "262;318;284;480", "wc_reply_reviewers": "0;0;18;16", "wc_reply_authors": "0;89;550;0", "reply_reviewers": "0;0;1;1", "reply_authors": "1;2;3;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 89.75, 22.884219453588535 ], "wc_strengths_avg": [ 43.0, 20.603397778036516 ], "wc_weaknesses_avg": [ 104.5, 45.59879384369722 ], "wc_questions_avg": [ 97.25, 92.40231328273119 ], "wc_limitations_avg": [ 1.5, 0.8660254037844386 ], "wc_review_avg": [ 336.0, 85.49853799919622 ], "wc_reply_reviewers_avg": [ 8.5, 8.52936105461599 ], "wc_reply_authors_avg": [ 159.75, 228.22179453330043 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:F0MQQL6IaWwJ:scholar.google.com/&scioq=Catastrophic+Goodhart:+regularizing+RLHF+with+KL+divergence+does+not+mitigate+heavy-tailed+reward+misspecification&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "metr.org;anthropic.com;far.ai", "author_num": 3, "aff_unique_index": "1;2", "aff_unique_norm": ";Anthropic;FAR", "aff_unique_dep": ";;", "aff_unique_url": ";https://www.anthropic.com;", "aff_unique_abbr": ";Anthropic;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1", "aff_country_unique": ";United States" }, { "title": "SustainDC: Benchmarking for Sustainable Data Center Control", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97671", "id": "UYgE9IfQIV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=UYgE9IfQIV", "openreview": "https://openreview.net/forum?id=UYgE9IfQIV", "poster": "/media/PosterPDFs/NeurIPS%202024/97671.png?t=1733396129.5247924", "project": "", "author_site": "Avisek Naug, Antonio Guillen-Perez, Ricardo Luna Gutierrez, Vineet Gundecha, Desik Rengarajan, Sahand Ghorbanpour, Sajad Mousavi, Ashwin Ramesh Babu, Dejan Markovikj, Lekhapriya Dheeraj Kashyap, Soumyendu Sarkar", "tldr": "", "abstract": "Machine learning has driven an exponential increase in computational demand, leading to massive data centers that consume significant amounts of energy and contribute to climate change. This makes sustainable data center control a priority. In this paper, we introduce SustainDC, a set of Python environments for benchmarking multi-agent reinforcement learning (MARL) algorithms for data centers (DC). SustainDC supports custom DC configurations and tasks such as workload scheduling, cooling optimization, and auxiliary battery management, with multiple agents managing these operations while accounting for the effects of each other. We evaluate various MARL algorithms on SustainDC, showing their performance across diverse DC designs, locations, weather conditions, grid carbon intensity, and workload requirements. Our results highlight significant opportunities for improvement of data center operations using MARL algorithms. Given the increasing use of DC due to AI, SustainDC provides a crucial platform for the development and benchmarking of advanced algorithms essential for achieving sustainable computing and addressing other heterogeneous real-world challenges.", "keywords": "Sustainability;Carbon Footprint Reduction;Data Center;Energy Efficiency", "primary_area": "", "supplementary_material": "/attachment/8361434da5bc4321457b0bcf027d504e98b32835.pdf", "author": "Avisek Naug;Antonio Guillen;Ricardo Luna Gutierrez;Vineet Gundecha;Cullen Bash;Sahand Ghorbanpour;Sajad Mousavi;Ashwin Ramesh Babu;Dejan Markovikj;Lekhapriya Dheeraj Kashyap;Desik Rengarajan;Soumyendu Sarkar", "authorids": "~Avisek_Naug1;~Antonio_Guillen1;~Ricardo_Luna_Gutierrez1;~Vineet_Gundecha1;~Cullen_Bash1;~Sahand_Ghorbanpour1;~Sajad_Mousavi1;~Ashwin_Ramesh_Babu1;~Dejan_Markovikj1;~Lekhapriya_Dheeraj_Kashyap1;~Desik_Rengarajan1;~Soumyendu_Sarkar1", "gender": "M;;;M;M;M;;M;;F;M;M", "homepage": ";https://antonioalgaida.github.io/;;https://www.linkedin.com/in/vineetgundecha;;;;;;;https://sites.google.com/view/desik-rengarajan/home;https://www.linkedin.com/in/soumyendusarkar/", "dblp": "223/9168;354/8159;;;35/1777;84/7273.html;215/4865;;189/7761;266/9584;218/1345;301/8216", "google_scholar": "nr2aZHMAAAAJ;;eb4NVNsAAAAJ;;;;f1So9sUAAAAJ;EkMS6KAAAAAJ;PpbU9KYAAAAJ;Gf1Cs_0AAAAJ;ygOY_E4AAAAJ;K99Y958AAAAJ", "orcid": "0000-0003-3253-7286;0000-0003-3067-6469;;0000-0001-8688-3665;;;;;;0009-0004-8636-3669;0000-0002-8538-6023;0000-0003-0115-4944", "linkedin": "avisek-naug/;antonioguillenperez/;;vineetgundecha;;sahand-qp-2b9b98127;;;markovikj/;lekhapriya-dheeraj-kashyap-92712870;desik-rengarajan-109868100/;soumyendusarkar/", "or_profile": "~Avisek_Naug1;~Antonio_Guillen1;~Ricardo_Luna_Gutierrez1;~Vineet_Gundecha1;~Cullen_Bash1;~Sahand_Ghorbanpour1;~Sajad_Mousavi1;~Ashwin_Ramesh_Babu1;~Dejan_Markovikj1;~Lekhapriya_Dheeraj_Kashyap1;~Desik_Rengarajan1;~Soumyendu_Sarkar1", "aff": "Hewlett Packard Enterprise;Hewlett Packard Enterprise;Hewlett Packard Enterprise;Hewlett Packard Enterprise;Hewlett Packard Enterprise;Hewlett Packard Enterprise,AI Lab;Hewlett Packard Enterprise;Hewlett Packard Enterprise;Colorado State University;Texas A&M University - College Station;Hewlett Packard Labs ;Hewlett Packard Labs @ Hewlett Packard Enterprise", "aff_domain": "hpe.com;hpe.com;hpe.com;hpe.com;hpe.com;hpe.com;hpe.com;hpe.com;colostate.edu;tamu.edu;hpe.com;hpe.com", "position": "Researcher;Researcher;Postdoc;Researcher;Researcher;Researcher;Researcher @ Hewlett Packard Enterprise Labs;Researcher;PhD student;PhD student;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nnaug2024sustaindc,\ntitle={Sustain{DC}: Benchmarking for Sustainable Data Center Control},\nauthor={Avisek Naug and Antonio Guillen and Ricardo Luna Gutierrez and Vineet Gundecha and Cullen Bash and Sahand Ghorbanpour and Sajad Mousavi and Ashwin Ramesh Babu and Dejan Markovikj and Lekhapriya Dheeraj Kashyap and Desik Rengarajan and Soumyendu Sarkar},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=UYgE9IfQIV}\n}", "github": "", "reviewers": "XQ3a;xaCY;MtEx", "pdf_size": 3875125, "rating": "6;7;8", "confidence": "4;4;4", "wc_summary_and_contributions": "59;13;230", "wc_strengths": "74;13;689", "wc_improvement": "244;34;543", "wc_limitations": "50;4;165", "wc_correctness": "11;4;331", "wc_clarity": "9;6;144", "wc_relation_to_prior_work": "49;6;305", "wc_documentation": "15;4;387", "wc_additional_feedback": "1;1;1", "wc_review": "512;85;2795", "wc_reply_reviewers": "0;0;67", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;1", "reply_authors": "1;1;1", "rating_avg": [ 7.0, 0.816496580927726 ], "confidence_avg": [ 4.0, 0.0 ], "wc_summary_and_contributions_avg": [ 100.66666666666667, 93.36071027055343 ], "wc_strengths_avg": [ 258.6666666666667, 305.3089509915416 ], "wc_improvement_avg": [ 273.6666666666667, 208.85454800463938 ], "wc_limitations_avg": [ 73.0, 67.71016664184683 ], "wc_correctness_avg": [ 115.33333333333333, 152.5261361938413 ], "wc_clarity_avg": [ 53.0, 64.35837163881635 ], "wc_relation_to_prior_work_avg": [ 120.0, 131.98737313344284 ], "wc_documentation_avg": [ 135.33333333333334, 178.01185977968495 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 1130.6666666666667, 1189.7020168466088 ], "wc_reply_reviewers_avg": [ 22.333333333333332, 31.584102892999123 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16328597007379325792&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "hpe.com;hpe.com;hpe.com;hpe.com;hpe.com;hpe.com;hpe.com;hpe.com;colostate.edu;tamu.edu;hpe.com;hpe.com", "author_num": 12, "aff_unique_index": "0;0;0;0;0;0;0;0;1;2;3;0", "aff_unique_norm": "Hewlett Packard Enterprise;Colorado State University;Texas A&M University;Hewlett Packard Labs", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.hpe.com;https://www.colostate.edu;https://www.tamu.edu;https://www.hpl.hp.com", "aff_unique_abbr": "HPE;CSU;TAMU;HPL", "aff_campus_unique_index": "1", "aff_campus_unique": ";College Station", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "A Tractable Inference Perspective of Offline RL", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94960", "id": "UZIHW8eFRp", "proceeding": "", "pdf": "https://openreview.net/pdf?id=UZIHW8eFRp", "openreview": "https://openreview.net/forum?id=UZIHW8eFRp", "poster": "", "project": "", "author_site": "Xuejie Liu, Anji Liu, Guy Van den Broeck, Yitao Liang", "tldr": "", "abstract": "A popular paradigm for offline Reinforcement Learning (RL) tasks is to first fit the offline trajectories to a sequence model, and then prompt the model for actions that lead to high expected return. In addition to obtaining accurate sequence models, this paper highlights that tractability, the ability to exactly and efficiently answer various probabilistic queries, plays an important role in offline RL. Specifically, due to the fundamental stochasticity from the offline data-collection policies and the environment dynamics, highly non-trivial conditional/constrained generation is required to elicit rewarding actions. While it is still possible to approximate such queries, we observe that such crude estimates undermine the benefits brought by expressive sequence models. To overcome this problem, this paper proposes Trifle (Tractable Inference for Offline RL), which leverages modern tractable generative models to bridge the gap between good sequence models and high expected returns at evaluation time. Empirically, Trifle achieves $7$ state-of-the-art scores and the highest average scores in $9$ Gym-MuJoCo benchmarks against strong baselines. Further, Trifle significantly outperforms prior approaches in stochastic environments and safe RL tasks with minimum algorithmic modifications.", "keywords": "Tractable probabilistic models;Probabilistic circuits", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Xuejie Liu;Anji Liu;Guy Van den Broeck;Yitao Liang", "authorids": "~Xuejie_Liu2;~Anji_Liu1;~Guy_Van_den_Broeck1;~Yitao_Liang1", "gender": ";M;M;M", "homepage": ";https://liuanji.github.io/;https://web.cs.ucla.edu/~yliang/;http://web.cs.ucla.edu/~guyvdb/", "dblp": ";227/8622;173/4969;96/7521.html", "google_scholar": "vxqpWKUAAAAJ;k_4zYecAAAAJ;KVzR1XEAAAAJ;d0KQ9z0AAAAJ", "orcid": ";;;0000-0003-3434-2503", "linkedin": "lxj-25a92b263;anji-liu-7610b7190/;;guyvdb", "or_profile": "~Xuejie_Liu2;~Anji_Liu1;~Yitao_Liang1;~Guy_Van_den_Broek1", "aff": "Peking University;University of California, Los Angeles;Peking University;University of California, Los Angeles", "aff_domain": "pku.edu.cn;ucla.edu;pku.edu.cn;ucla.edu", "position": "PhD student;PhD student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nliu2024a,\ntitle={A Tractable Inference Perspective of Offline {RL}},\nauthor={Xuejie Liu and Anji Liu and Guy Van den Broeck and Yitao Liang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=UZIHW8eFRp}\n}", "github": "", "reviewers": "DwFR;TgpP;dmuC;jZtp", "pdf_size": 2591519, "rating": "4;6;6;7", "confidence": "2;3;3;4", "soundness": "2;3;3;3", "novelty": "2;2;2;3", "presentation": "3;3;2;4", "wc_summary": "55;98;91;235", "wc_strengths": "29;73;33;171", "wc_weaknesses": "48;124;126;224", "wc_questions": "2;3;133;134", "wc_limitations": "1;15;5;75", "wc_review": "135;313;388;839", "wc_reply_reviewers": "0;96;109;71", "wc_reply_authors": "301;55;1056;266", "reply_reviewers": "0;1;1;1", "reply_authors": "2;2;5;4", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 119.75, 68.51049189722696 ], "wc_strengths_avg": [ 76.5, 57.207953992430106 ], "wc_weaknesses_avg": [ 130.5, 62.47199372518857 ], "wc_questions_avg": [ 68.0, 65.50190836914601 ], "wc_limitations_avg": [ 24.0, 29.88310559496787 ], "wc_review_avg": [ 418.75, 259.4478512148443 ], "wc_reply_reviewers_avg": [ 69.0, 42.11294337849113 ], "wc_reply_authors_avg": [ 419.5, 379.3405462114484 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.25, 1.299038105676658 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.9733285267845754, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6253692913852449526&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "pku.edu.cn;ucla.edu;pku.edu.cn;ucla.edu", "author_num": 4, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "Peking University;University of California, Los Angeles", "aff_unique_dep": ";", "aff_unique_url": "http://www.pku.edu.cn;https://www.ucla.edu", "aff_unique_abbr": "Peking U;UCLA", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "China;United States" }, { "title": "DF40: Toward Next-Generation Deepfake Detection", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97670", "id": "UZpySDOwvZ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=UZpySDOwvZ", "openreview": "https://openreview.net/forum?id=UZpySDOwvZ", "poster": "", "project": "", "author_site": "Zhiyuan Yan, Taiping Yao, Shen Chen, Yandan Zhao, Xinghe Fu, Junwei Zhu, Donghao Luo, Li Yuan, Chengjie Wang, Shouhong Ding, Yunsheng Wu", "tldr": "", "abstract": "We propose a new comprehensive benchmark to revolutionize the current deepfake detection field to the next generation. Predominantly, existing works identify top-notch detection algorithms and models by adhering to the common practice: training detectors on one specific dataset (*e.g.,* FF++) and testing them on other prevalent deepfake datasets. This protocol is often regarded as a \"golden compass\" for navigating SoTA detectors. But can these stand-out \"winners\" be truly applied to tackle the myriad of realistic and diverse deepfakes lurking in the real world? If not, what underlying factors contribute to this gap? In this work, we found the **dataset** (both train and test) can be the \"primary culprit\" due to the following: (1) *forgery diversity*: Deepfake techniques are commonly referred to as both face forgery (face-swapping and face-reenactment) and entire image synthesis (AIGC, especially face). Most existing datasets only contain partial types of them, with limited forgery methods implemented (*e.g.,* 2 swapping and 2 reenactment methods in FF++); (2) *forgery realism*: The dominated training dataset, FF++, contains out-of-date forgery techniques from the past four years. \"Honing skills\" on these forgeries makes it difficult to guarantee effective detection generalization toward nowadays' SoTA deepfakes; (3) *evaluation protocol*: Most detection works perform evaluations on one type, *e.g.,* face-swapping types only, which hinders the development of universal deepfake detectors.\n\nTo address this dilemma, we construct a highly diverse and large-scale deepfake detection dataset called **DF40**, which comprises **40** distinct deepfake techniques (10 times larger than FF++). We then conduct comprehensive evaluations using **4** standard evaluation protocols and **8** representative detection methods, resulting in over **2,000** evaluations. Through these evaluations, we provide an extensive analysis from various perspectives, leading to **7** new insightful findings contributing to the field. We also open up **4** valuable yet previously underexplored research questions to inspire future works. We release our dataset, code, and pre-trained weights at https://github.com/YZY-stack/DF40.", "keywords": "Deepfake Benchmark;Deepfake Dataset;Deepfake Detection", "primary_area": "", "supplementary_material": "", "author": "Zhiyuan Yan;Taiping Yao;Shen Chen;Yandan Zhao;Xinghe Fu;Junwei Zhu;Donghao Luo;Chengjie Wang;Shouhong Ding;Yunsheng Wu;Li Yuan", "authorids": "~Zhiyuan_Yan3;~Taiping_Yao2;~Shen_Chen1;~Yandan_Zhao1;~Xinghe_Fu1;~Junwei_Zhu1;~Donghao_Luo1;~Chengjie_Wang1;~Shouhong_Ding3;~Yunsheng_Wu1;~Li_Yuan2", "gender": "M;M;M;F;M;M;M;M;M;M;M", "homepage": "https://yzy-stack.github.io/;https://sndler.github.io/;https://chenshen.xyz/;;;https://hmzjwhmzjw.github.io/;;;;;https://yuanli2333.github.io/", "dblp": "56/6499-2;226/6518;;128/2509;252/8005;;202/1915;;119/6735;173/1746;98/4583-7", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;qkpaPuAAAAAJ;xMpvoLMAAAAJ;;;-OxQlHsAAAAJ;;fqte5H4AAAAJ;OGf40fkAAAAJ;;-5juAR0AAAAJ", "orcid": "0009-0002-7242-5828;;;;0000-0001-5720-6622;0000-0002-5407-5150;;0000-0003-4216-8090;0000-0002-3175-3553;;0000-0002-2120-5588", "linkedin": ";;;;;;;;;;", "or_profile": "~Zhiyuan_Yan3;~Taiping_Yao2;~Shen_Chen1;~Yandan_Zhao1;~Xinghe_Fu1;~Junwei_Zhu1;~Donghao_Luo1;~Chengjie_Wang1;~Shouhong_Ding3;~Yunsheng_Wu1;~Yuan_LI2", "aff": "Peking University;Tencent Youtu Lab;Tencent YouTu Lab;Tencent YouTu;Zhejiang University;Tencent Youtu Lab;Tencent YouTu Lab;Tencent YouTu Lab;Tencent Youtu Lab;Tencent YouTu Lab;Peking University", "aff_domain": "stu.pku.edu.cn;tencent.com;tencent.com;tencent.com;zju.edu.cn;tencent.com;tencent.com;tencent.com;tencent.com;tencent.com;pku.edu.cn", "position": "PhD student;researcher;Researcher;Researcher;PhD student;Researcher;researcher;Researcher;researcher;Principal Researcher;Assistant Professor", "bibtex": "@inproceedings{\nyan2024df,\ntitle={{DF}40: Toward Next-Generation Deepfake Detection},\nauthor={Zhiyuan Yan and Taiping Yao and Shen Chen and Yandan Zhao and Xinghe Fu and Junwei Zhu and Donghao Luo and Chengjie Wang and Shouhong Ding and Yunsheng Wu and Li Yuan},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=UZpySDOwvZ}\n}", "github": "", "reviewers": "nz7T;H9dn;RsPZ;1Hg1", "pdf_size": 28674037, "rating": "4;6;9;9", "confidence": "5;5;5;5", "wc_summary_and_contributions": "76;65;56;70", "wc_strengths": "41;68;240;182", "wc_improvement": "68;92;41;56", "wc_limitations": "1;249;346;323", "wc_correctness": "7;10;10;9", "wc_clarity": "5;101;4;8", "wc_relation_to_prior_work": "11;25;16;41", "wc_documentation": "4;19;29;12", "wc_additional_feedback": "1;1;1;1", "wc_review": "214;630;743;702", "wc_reply_reviewers": "645;0;20;21", "wc_reply_authors": "1409;740;0;0", "reply_reviewers": "2;0;1;1", "reply_authors": "3;2;1;1", "rating_avg": [ 7.0, 2.1213203435596424 ], "confidence_avg": [ 5.0, 0.0 ], "wc_summary_and_contributions_avg": [ 66.75, 7.327175444876422 ], "wc_strengths_avg": [ 132.75, 81.4535910810567 ], "wc_improvement_avg": [ 64.25, 18.659782956937093 ], "wc_limitations_avg": [ 229.75, 136.84548768592992 ], "wc_correctness_avg": [ 9.0, 1.224744871391589 ], "wc_clarity_avg": [ 29.5, 41.30677910464576 ], "wc_relation_to_prior_work_avg": [ 23.25, 11.409973707244026 ], "wc_documentation_avg": [ 16.0, 9.192388155425117 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 572.25, 210.75385524350438 ], "wc_reply_reviewers_avg": [ 171.5, 273.5036562826903 ], "wc_reply_authors_avg": [ 537.25, 587.0116587428225 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10125280926049578352&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "stu.pku.edu.cn;tencent.com;tencent.com;tencent.com;zju.edu.cn;tencent.com;tencent.com;tencent.com;tencent.com;tencent.com;pku.edu.cn", "author_num": 11, "aff_unique_index": "0;1;1;1;2;1;1;1;1;1;0", "aff_unique_norm": "Peking University;Tencent;Zhejiang University", "aff_unique_dep": ";Youtu Lab;", "aff_unique_url": "http://www.pku.edu.cn;https://www.tencent.com;https://www.zju.edu.cn", "aff_unique_abbr": "Peking U;Tencent;ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "State Space Models on Temporal Graphs: A First-Principles Study", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94959", "id": "UaJErAOssN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=UaJErAOssN", "openreview": "https://openreview.net/forum?id=UaJErAOssN", "poster": "/media/PosterPDFs/NeurIPS%202024/94959.png?t=1730788537.7811503", "project": "", "author_site": "Jintang Li, Ruofan Wu, Xinzhou Jin, Boqun Ma, Liang Chen, Zibin Zheng", "tldr": "", "abstract": "Over the past few years, research on deep graph learning has shifted from static graphs to temporal graphs in response to real-world complex systems that exhibit dynamic behaviors. In practice, temporal graphs are formalized as an ordered sequence of static graph snapshots observed at discrete time points. Sequence models such as RNNs or Transformers have long been the predominant backbone networks for modeling such temporal graphs. Yet, despite the promising results, RNNs struggle with long-range dependencies, while transformers are burdened by quadratic computational complexity. Recently, state space models (SSMs), which are framed as discretized representations of an underlying continuous-time linear dynamical system, have garnered substantial attention and achieved breakthrough advancements in independent sequence modeling. In this work, we undertake a principled investigation that extends SSM theory to temporal graphs by integrating structural information into the online approximation objective via the adoption of a Laplacian regularization term. The emergent continuous-time system introduces novel algorithmic challenges, thereby necessitating our development of GraphSSM, a graph state space model for modeling the dynamics of temporal graphs. Extensive experimental results demonstrate the effectiveness of our GraphSSM framework across various temporal graph benchmarks.", "keywords": "temporal graph learning; state space models; graph neural networks", "primary_area": "graph_neural_networks", "supplementary_material": "/attachment/35612151c3935f8e72e0d67e15d1f2ee3d9b0a46.zip", "author": "Jintang Li;Ruofan Wu;Xinzhou Jin;Boqun Ma;Liang Chen;Zibin Zheng", "authorids": "~Jintang_Li1;~Ruofan_Wu1;~Xinzhou_Jin1;~Boqun_Ma1;~Liang_Chen17;~Zibin_Zheng1", "gender": "M;M;;M;M;M", "homepage": "https://edisonleeeee.github.io/;https://rorschach1989.github.io/;https://csxzjin.github.io;https://ieeexplore.ieee.org/author/37086480851;https://www.zibinzheng.com/;https://chenliang.tech/", "dblp": "260/6811;;368/1756;;z/ZibinZheng;https://dblp.uni-trier.de/pid/01/5394-1", "google_scholar": "mS4fpUIAAAAJ;;CFJGDtAAAAAJ;;WPC6ED4AAAAJ;pGZtPjcAAAAJ", "orcid": "0000-0002-6405-1531;;;;0000-0002-7878-4330;", "linkedin": ";;;;;", "or_profile": "~Jintang_Li1;~Ruofan_Wu1;~Xinzhou_Jin1;~Boqun_Ma1;~Zibin_Zheng1;~Liang_Chen7", "aff": "Sun Yat-sen University;Ant Group;SUN YAT-SEN UNIVERSITY;Shanghai Jiaotong University;SUN YAT-SEN UNIVERSITY;", "aff_domain": "sysu.edu.cn;antgroup.com;sysu.edu.cn;sjtu.edu;sysu.edu.cn;", "position": "PhD student;Researcher;MS student;Researcher;Full Professor;", "bibtex": "@inproceedings{\nli2024state,\ntitle={State Space Models on Temporal Graphs: A First-Principles Study},\nauthor={Jintang Li and Ruofan Wu and Xinzhou Jin and Boqun Ma and Liang Chen and Zibin Zheng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=UaJErAOssN}\n}", "github": "", "reviewers": "PUCW;ZL4u;o7Uk", "pdf_size": 622369, "rating": "5;6;6", "confidence": "3;3;2", "soundness": "3;2;3", "novelty": "3;2;3", "presentation": "3;2;3", "wc_summary": "87;116;20", "wc_strengths": "45;114;36", "wc_weaknesses": "171;443;65", "wc_questions": "161;137;58", "wc_limitations": "1;1;8", "wc_review": "465;811;187", "wc_reply_reviewers": "292;25;16", "wc_reply_authors": "1566;20;20", "reply_reviewers": "2;1;1", "reply_authors": "5;2;2", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 74.33333333333333, 40.202266381663385 ], "wc_strengths_avg": [ 65.0, 34.84250278036869 ], "wc_weaknesses_avg": [ 226.33333333333334, 159.20078168428975 ], "wc_questions_avg": [ 118.66666666666667, 44.00252518006463 ], "wc_limitations_avg": [ 3.3333333333333335, 3.2998316455372216 ], "wc_review_avg": [ 487.6666666666667, 255.2506393505977 ], "wc_reply_reviewers_avg": [ 111.0, 128.03905654135383 ], "wc_reply_authors_avg": [ 535.3333333333334, 728.7913891429349 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 3.0, 1.4142135623730951 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9430808949337447837&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "sysu.edu.cn;antgroup.com;sysu.edu.cn;sjtu.edu;sysu.edu.cn;", "author_num": 6, "aff_unique_index": "0;1;0;2;0", "aff_unique_norm": "Sun Yat-sen University;Ant Group;Shanghai Jiao Tong University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.sysu.edu.cn/;https://www.antgroup.com;https://www.sjtu.edu.cn", "aff_unique_abbr": "SYSU;Ant Group;SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Variational Flow Matching for Graph Generation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94958", "id": "UahrHR5HQh", "proceeding": "", "pdf": "https://openreview.net/pdf?id=UahrHR5HQh", "openreview": "https://openreview.net/forum?id=UahrHR5HQh", "poster": "", "project": "", "author_site": "Floor Eijkelboom, Grigory Bartosh, Christian Andersson Naesseth, Max Welling, Jan-Willem van de Meent", "tldr": "", "abstract": "We present a formulation of flow matching as variational inference, which we refer to as variational flow matching (VFM). We use this formulation to develop CatFlow, a flow matching method for categorical data that is easy to implement, computationally efficient, and achieves strong results on graph generation tasks. In VFM, the objective is to approximate the posterior probability path, which is a distribution over possible end points of a trajectory. VFM admits both the original flow matching objective and the CatFlow objective as special cases. We also relate VFM to score-based models, in which the dynamics are stochastic rather than deterministic, and derive a bound on the model likelihood based on a reweighted VFM objective. We evaluate CatFlow on one abstract graph generation task and two molecular generation tasks. In all cases, CatFlow exceeds or matches performance of the current state-of-the-art models.", "keywords": "generative modeling;flow matching;variational inference;categorical;discrete;graph generation;molecular generation", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/9a3e3d38581ba0a217ee5997600c2059ebc58e07.zip", "author": "Floor Eijkelboom;Grigory Bartosh;Christian A. Naesseth;Max Welling;Jan-Willem van de Meent", "authorids": "~Floor_Eijkelboom1;~Grigory_Bartosh1;~Christian_A._Naesseth1;~Max_Welling1;~Jan-Willem_van_de_Meent1", "gender": "M;M;M;M;M", "homepage": ";;https://staff.fnwi.uva.nl/m.welling/;https://jwvdm.github.io/;https://naesseth.github.io/", "dblp": ";;16/2286;137/3263;146/0902", "google_scholar": "jCWo5lUAAAAJ;;https://scholar.google.nl/citations?user=8200InoAAAAJ;CX9Lu38AAAAJ;GQ6rOssAAAAJ", "orcid": ";;0000-0003-1484-2121;0000-0001-9465-5398;", "linkedin": "flooreijkelboom/;grigory-bartosh-76004a163/;;;", "or_profile": "~Floor_Eijkelboom1;~Grigory_Bartosh1;~Max_Welling1;~Jan-Willem_van_de_Meent1;~Christian_A_Naesseth1", "aff": "University of Amsterdam;University of Amsterdam;University of Amsterdam;University of Amsterdam;University of Amsterdam", "aff_domain": "uva.nl;uva.nl;uva.nl;uva.nl;uva.nl", "position": "PhD student;PhD student;Full Professor;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\neijkelboom2024variational,\ntitle={Variational Flow Matching for Graph Generation},\nauthor={Floor Eijkelboom and Grigory Bartosh and Christian A. Naesseth and Max Welling and Jan-Willem van de Meent},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=UahrHR5HQh}\n}", "github": "", "reviewers": "vH9B;WAZn;5EeZ", "pdf_size": 1040645, "rating": "5;6;7", "confidence": "4;3;3", "soundness": "3;3;4", "novelty": "2;3;4", "presentation": "3;3;4", "wc_summary": "120;62;124", "wc_strengths": "41;50;82", "wc_weaknesses": "158;191;20", "wc_questions": "89;50;177", "wc_limitations": "6;12;6", "wc_review": "414;365;409", "wc_reply_reviewers": "0;0;15", "wc_reply_authors": "29;0;0", "reply_reviewers": "0;0;1", "reply_authors": "2;1;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 102.0, 28.331372481167705 ], "wc_strengths_avg": [ 57.666666666666664, 17.594190960528863 ], "wc_weaknesses_avg": [ 123.0, 74.06753674856482 ], "wc_questions_avg": [ 105.33333333333333, 53.11831657305751 ], "wc_limitations_avg": [ 8.0, 2.8284271247461903 ], "wc_review_avg": [ 396.0, 22.015146301277824 ], "wc_reply_reviewers_avg": [ 5.0, 7.0710678118654755 ], "wc_reply_authors_avg": [ 9.666666666666666, 13.67073110293992 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17142740370512503249&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "uva.nl;uva.nl;uva.nl;uva.nl;uva.nl", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of Amsterdam", "aff_unique_dep": "", "aff_unique_url": "https://www.uva.nl", "aff_unique_abbr": "UvA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Netherlands" }, { "title": "AverNet: All-in-one Video Restoration for Time-varying Unknown Degradations", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94957", "id": "UcdaNf2PKL", "proceeding": "", "pdf": "https://openreview.net/pdf?id=UcdaNf2PKL", "openreview": "https://openreview.net/forum?id=UcdaNf2PKL", "poster": "/media/PosterPDFs/NeurIPS%202024/94957.png?t=1731659624.4724982", "project": "", "author_site": "Haiyu Zhao, Lei Tian, Xinyan Xiao, Peng Hu, Yuanbiao Gou, Xi Peng", "tldr": "", "abstract": "Traditional video restoration approaches were designed to recover clean videos from a specific type of degradation, making them ineffective in handling multiple unknown types of degradation. To address this issue, several studies have been conducted and have shown promising results. However, these studies overlook that the degradations in video usually change over time, dubbed time-varying unknown degradations (TUD). To tackle such a less-touched challenge, we propose an innovative method, termed as All-in-one VidEo Restoration Network (AverNet), which comprises two core modules, i.e., Prompt-Guided Alignment (PGA) module and Prompt-Conditioned Enhancement (PCE) module. Specifically, PGA addresses the issue of pixel shifts caused by time-varying degradations by learning and utilizing prompts to align video frames at the pixel level. To handle multiple unknown degradations, PCE recasts it into a conditional restoration problem by implicitly establishing a conditional map between degradations and ground truths. Thanks to the collaboration between PGA and PCE modules, AverNet empirically demonstrates its effectiveness in recovering videos from TUD. Extensive experiments are carried out on two synthesized datasets featuring seven types of degradations with random corruption levels. The code is available at https://github.com/XLearning-SCU/2024-NeurIPS-AverNet.", "keywords": "Deep learning;All-in-one video restoration;Time-varying unknown degradations", "primary_area": "machine_vision", "supplementary_material": "/attachment/db88e88149f799f6775568c1607bee0f02b02c73.zip", "author": "Haiyu Zhao;Lei Tian;Xinyan Xiao;Peng Hu;Yuanbiao Gou;Xi Peng", "authorids": "~Haiyu_Zhao2;~Lei_Tian4;~Xinyan_Xiao1;~Peng_Hu2;~Yuanbiao_Gou1;~Xi_Peng3", "gender": "M;M;;M;M;", "homepage": "https://pandint.github.io/about/;;;https://penghu-cs.github.io/;https://ybgou.github.io/;", "dblp": "203/8513;04/2026-2.html;;11/6278-2;268/6723;", "google_scholar": "vBsI10YAAAAJ;;;gvESkwYAAAAJ;o5OcgLcAAAAJ;", "orcid": "0009-0003-5201-7904;;;0000-0003-3868-3997;;", "linkedin": ";;;;;", "or_profile": "~Haiyu_Zhao2;~Lei_Tian4;~Xinyan_Xiao1;~Peng_Hu2;~Yuanbiao_Gou1;~Xi_Peng3", "aff": "Sichuan University;Baidu;;Sichuan University;Sichuan University;", "aff_domain": "scu.edu.cn;baidu.com;;scu.edu.cn;scu.edu.cn;", "position": "PhD student;Researcher;;Associate Professor;PhD student;", "bibtex": "@inproceedings{\nzhao2024avernet,\ntitle={AverNet: All-in-one Video Restoration for Time-varying Unknown Degradations},\nauthor={Haiyu Zhao and Lei Tian and Xinyan Xiao and Peng Hu and Yuanbiao Gou and Xi Peng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=UcdaNf2PKL}\n}", "github": "", "reviewers": "Zzy9;wJZg;hhCd;2PV4", "pdf_size": 10324823, "rating": "4;5;6;8", "confidence": "4;4;5;5", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "39;38;60;65", "wc_strengths": "31;50;51;83", "wc_weaknesses": "166;168;196;111", "wc_questions": "74;109;6;4", "wc_limitations": "1;7;23;11", "wc_review": "311;372;336;274", "wc_reply_reviewers": "75;59;0;88", "wc_reply_authors": "143;65;0;43", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;1;2", "rating_avg": [ 5.75, 1.479019945774904 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 50.5, 12.134661099511597 ], "wc_strengths_avg": [ 53.75, 18.673175948402566 ], "wc_weaknesses_avg": [ 160.25, 30.808886704975237 ], "wc_questions_avg": [ 48.25, 44.990971316476376 ], "wc_limitations_avg": [ 10.5, 8.04673846971554 ], "wc_review_avg": [ 323.25, 35.75874019033668 ], "wc_reply_reviewers_avg": [ 55.5, 33.64892271678248 ], "wc_reply_authors_avg": [ 62.75, 51.895929512824026 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.8451542547285166, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10282373416485675396&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "scu.edu.cn;baidu.com;;scu.edu.cn;scu.edu.cn;", "author_num": 6, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Sichuan University;Baidu", "aff_unique_dep": ";Baidu, Inc.", "aff_unique_url": "https://www.scu.edu.cn;https://www.baidu.com", "aff_unique_abbr": "SCU;Baidu", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Direct Unlearning Optimization for Robust and Safe Text-to-Image Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94956", "id": "UdXE5V2d0O", "proceeding": "", "pdf": "https://openreview.net/pdf?id=UdXE5V2d0O", "openreview": "https://openreview.net/forum?id=UdXE5V2d0O", "poster": "", "project": "", "author_site": "Yong-Hyun Park, Sangdoo Yun, Jin-Hwa Kim, Junho Kim, Geonhui Jang, Yonghyun Jeong, Junghyo Jo, Gayoung Lee", "tldr": "", "abstract": "Recent advancements in text-to-image (T2I) models have greatly benefited from large-scale datasets, but they also pose significant risks due to the potential generation of unsafe content. To mitigate this issue, researchers proposed unlearning techniques that attempt to induce the model to unlearn potentially harmful prompts. However, these methods are easily bypassed by adversarial attacks, making them unreliable for ensuring the safety of generated images. In this paper, we propose Direct Unlearning Optimization (DUO), a novel framework for removing NSFW content from T2I models while preserving their performance on unrelated topics. DUO employs a preference optimization approach using curated paired image data, ensuring that the model learns to remove unsafe visual concepts while retain unrelated features. Furthermore, we introduce an output-preserving regularization term to maintain the model's generative capabilities on safe content. Extensive experiments demonstrate that DUO can robustly defend against various state-of-the-art red teaming methods without significant performance degradation on unrelated topics, as measured by FID and CLIP scores. Our work contributes to the development of safer and more reliable T2I models, paving the way for their responsible deployment in both closed-source and open-source scenarios.", "keywords": "diffusion models;unlearning;safety", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Yong-Hyun Park;Sangdoo Yun;Jin-Hwa Kim;Junho Kim;Geonhui Jang;Yonghyun Jeong;Junghyo Jo;Gayoung Lee", "authorids": "~Yong-Hyun_Park1;~Sangdoo_Yun1;~Jin-Hwa_Kim1;~Junho_Kim3;~Geonhui_Jang1;~Yonghyun_Jeong1;~Junghyo_Jo1;~Gayoung_Lee1", "gender": "M;M;Unspecified;M;M;M;;F", "homepage": ";https://sangdooyun.github.io/;http://wityworks.com;http://bit.ly/jhkim_resume;;https://github.com/TeamSAIDA/SAIDA;;", "dblp": "62/444;124/3009.html;48/258;;;260/0615.html;48/11016;179/2468", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;o0qtjzYAAAAJ;https://scholar.google.co.kr/citations?user=3f2wPekAAAAJ;WtjDugkAAAAJ;;e9pHCjUAAAAJ;h1QXLx0AAAAJ;", "orcid": ";;0000-0002-0423-0415;0000-0003-3712-8510;0009-0001-6873-4356;;0000-0002-1284-9488;", "linkedin": ";;;taki0112/;;;;gayoung-lee-0824548a/", "or_profile": "~Yong-Hyun_Park1;~Sangdoo_Yun1;~Jin-Hwa_Kim1;~Junho_Kim3;~Geonhui_Jang1;~Yonghyun_Jeong1;~Junghyo_Jo1;~Gayoung_Lee1", "aff": "Seoul National University;NAVER;NAVER;NAVER;NAVER;NAVER;Seoul National University;NAVER AI Lab", "aff_domain": "snu.ac.kr;navercorp.com;navercorp.com;navercorp.com;navercorp.com;navercorp.com;snu.ac.kr;navercorp.com", "position": "MS student;Research Scientist;Research Scientist;Research Scientist;Intern;Researcher;Associate Professor;Researcher", "bibtex": "@inproceedings{\npark2024direct,\ntitle={Direct Unlearning Optimization for Robust and Safe Text-to-Image Models},\nauthor={Yong-Hyun Park and Sangdoo Yun and Jin-Hwa Kim and Junho Kim and Geonhui Jang and Yonghyun Jeong and Junghyo Jo and Gayoung Lee},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=UdXE5V2d0O}\n}", "github": "", "reviewers": "huMh;rmhF;tvVC;Ub4z", "pdf_size": 8428779, "rating": "6;6;6;6", "confidence": "3;4;5;4", "soundness": "3;4;2;3", "novelty": "2;4;2;3", "presentation": "3;3;3;4", "wc_summary": "61;94;29;111", "wc_strengths": "14;33;19;103", "wc_weaknesses": "96;110;204;67", "wc_questions": "104;7;7;5", "wc_limitations": "3;4;22;8", "wc_review": "278;248;281;294", "wc_reply_reviewers": "38;26;33;0", "wc_reply_authors": "59;29;59;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 73.75, 31.47518864121389 ], "wc_strengths_avg": [ 42.25, 35.75874019033668 ], "wc_weaknesses_avg": [ 119.25, 51.329207085245336 ], "wc_questions_avg": [ 30.75, 42.298788398723666 ], "wc_limitations_avg": [ 9.25, 7.595228765481656 ], "wc_review_avg": [ 275.25, 16.843025262701474 ], "wc_reply_reviewers_avg": [ 24.25, 14.635146053251399 ], "wc_reply_authors_avg": [ 36.75, 24.498724456591614 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9806031290144758785&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "snu.ac.kr;navercorp.com;navercorp.com;navercorp.com;navercorp.com;navercorp.com;snu.ac.kr;navercorp.com", "author_num": 8, "aff_unique_index": "0;1;1;1;1;1;0;1", "aff_unique_norm": "Seoul National University;NAVER Corporation", "aff_unique_dep": ";", "aff_unique_url": "https://www.snu.ac.kr;https://www.naver.com", "aff_unique_abbr": "SNU;NAVER", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Hierarchical Uncertainty Exploration via Feedforward Posterior Trees", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94955", "id": "UddVRqTrjt", "proceeding": "", "pdf": "https://openreview.net/pdf?id=UddVRqTrjt", "openreview": "https://openreview.net/forum?id=UddVRqTrjt", "poster": "/media/PosterPDFs/NeurIPS%202024/94955.png?t=1732371669.9874752", "project": "", "author_site": "Elias Nehme, Rotem Mulayoff, Tomer Michaeli", "tldr": "", "abstract": "When solving ill-posed inverse problems, one often desires to explore the space of potential solutions rather than be presented with a single plausible reconstruction. Valuable insights into these feasible solutions and their associated probabilities are embedded in the posterior distribution. However, when confronted with data of high dimensionality (such as images), visualizing this distribution becomes a formidable challenge, necessitating the application of effective summarization techniques before user examination. In this work, we introduce a new approach for visualizing posteriors across multiple levels of granularity using *tree*-valued predictions. Our method predicts a tree-valued hierarchical summarization of the posterior distribution for any input measurement, in a single forward pass of a neural network. We showcase the efficacy of our approach across diverse datasets and image restoration challenges, highlighting its prowess in uncertainty quantification and visualization. Our findings reveal that our method performs comparably to a baseline that hierarchically clusters samples from a diffusion-based posterior sampler, yet achieves this with orders of magnitude greater speed. Code and examples are available at our [webpage](https://eliasnehme.github.io/PosteriorTrees/).", "keywords": "Uncertainty Quantification;Explainable Computer Vision;Inverse Problems;Computational Imaging;Hierarchical Clustering", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Elias Nehme;Rotem Mulayoff;Tomer Michaeli", "authorids": "~Elias_Nehme1;~Rotem_Mulayoff1;~Tomer_Michaeli1", "gender": "M;M;M", "homepage": "https://eliasnehme.github.io/;;https://tomer.net.technion.ac.il/", "dblp": "275/8151;210/6266;70/3188.html", "google_scholar": "https://scholar.google.co.il/citations?user=jWLfyAIAAAAJ;sqOg-58AAAAJ;n2EbR2cAAAAJ", "orcid": "0000-0003-1759-1751;;", "linkedin": "elias-nehme-2a010571/;;", "or_profile": "~Elias_Nehme1;~Rotem_Mulayoff1;~Tomer_Michaeli1", "aff": "Technion - Israel Institute of Technology;Technion - Israel Institute of Technology, Technion;Technion, Technion", "aff_domain": "ee.technion.ac.il;technion.ac.il;technion.ac.il", "position": "PhD student;Postdoc;Associate Professor", "bibtex": "@inproceedings{\nnehme2024hierarchical,\ntitle={Hierarchical Uncertainty Exploration via Feedforward Posterior Trees},\nauthor={Elias Nehme and Rotem Mulayoff and Tomer Michaeli},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=UddVRqTrjt}\n}", "github": "", "reviewers": "VHHJ;7Dwv;4WsD", "pdf_size": 49898601, "rating": "7;7;7", "confidence": "3;3;3", "soundness": "3;4;3", "novelty": "2;4;3", "presentation": "3;4;3", "wc_summary": "72;54;89", "wc_strengths": "79;32;72", "wc_weaknesses": "139;128;180", "wc_questions": "91;63;2", "wc_limitations": "10;7;2", "wc_review": "391;284;345", "wc_reply_reviewers": "18;77;35", "wc_reply_authors": "0;331;0", "reply_reviewers": "1;1;1", "reply_authors": "1;2;1", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 71.66666666666667, 14.29063407348401 ], "wc_strengths_avg": [ 61.0, 20.704266871026046 ], "wc_weaknesses_avg": [ 149.0, 22.37558192911788 ], "wc_questions_avg": [ 52.0, 37.15732319027659 ], "wc_limitations_avg": [ 6.333333333333333, 3.2998316455372216 ], "wc_review_avg": [ 340.0, 43.825411197918804 ], "wc_reply_reviewers_avg": [ 43.333333333333336, 24.796953217863056 ], "wc_reply_authors_avg": [ 110.33333333333333, 156.0348963818315 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12082305958135328408&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "ee.technion.ac.il;technion.ac.il;technion.ac.il", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Technion - Israel Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.technion.ac.il/en/", "aff_unique_abbr": "Technion", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Israel" }, { "title": "Improving Environment Novelty Quantification for Effective Unsupervised Environment Design", "status": "Oral", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94954", "id": "UdxpjKO2F9", "proceeding": "", "pdf": "https://openreview.net/pdf?id=UdxpjKO2F9", "openreview": "https://openreview.net/forum?id=UdxpjKO2F9", "poster": "/media/PosterPDFs/NeurIPS%202024/94954.png?t=1733629953.9783726", "project": "", "author_site": "Jayden Teoh, Wenjun Li, Pradeep Varakantham", "tldr": "", "abstract": "Unsupervised Environment Design (UED) formalizes the problem of autocurricula through interactive training between a teacher agent and a student agent. The teacher generates new training environments with high learning potential, curating an adaptive curriculum that strengthens the student's ability to handle unseen scenarios. Existing UED methods mainly rely on *regret*, a metric that measures the difference between the agent's optimal and actual performance, to guide curriculum design. Regret-driven methods generate curricula that progressively increase environment complexity for the student but overlook environment *novelty* \u2014 a critical element for enhancing an agent's generalizability. Measuring environment novelty is especially challenging due to the underspecified nature of environment parameters in UED, and existing approaches face significant limitations. To address this, this paper introduces the *Coverage-based Evaluation of Novelty In Environment* (CENIE) framework. CENIE proposes a scalable, domain-agnostic, and curriculum-aware approach to quantifying environment novelty by leveraging the student's state-action space coverage from previous curriculum experiences. We then propose an implementation of CENIE that models this coverage and measures environment novelty using Gaussian Mixture Models. By integrating both regret and novelty as complementary objectives for curriculum design, CENIE facilitates effective exploration across the state-action space while progressively increasing curriculum complexity. Empirical evaluations demonstrate that augmenting existing regret-based UED algorithms with CENIE achieves state-of-the-art performance across multiple benchmarks, underscoring the effectiveness of novelty-driven autocurricula for robust generalization.", "keywords": "Unsupervised Environment Design;Novelty-driven Autocurricula", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Jayden Teoh;Wenjun Li;Pradeep Varakantham", "authorids": "~Jayden_Teoh1;~Wenjun_Li1;~Pradeep_Varakantham1", "gender": "M;M;M", "homepage": ";;http://www.mysmu.edu.sg/faculty/pradeepv", "dblp": ";;72/759", "google_scholar": "GnHpLE8AAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.sg/citations?user=BAdQpFkAAAAJ", "orcid": ";;", "linkedin": ";wenjun-li-004375116/;", "or_profile": "~Jayden_Teoh1;~Wenjun_Li1;~Pradeep_Varakantham1", "aff": "Singapore Management University;Singapore Management University;Singapore Management University", "aff_domain": "smu.edu.sg;smu.edu.sg;smu.edu.sg", "position": "Undergrad student;PhD student;Full Professor", "bibtex": "@inproceedings{\nteoh2024improving,\ntitle={Improving Environment Novelty Quantification for Effective Unsupervised Environment Design},\nauthor={Jayden Teoh and Wenjun Li and Pradeep Varakantham},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=UdxpjKO2F9}\n}", "github": "", "reviewers": "kpCs;KtoT;jT1H;qR9x", "pdf_size": 13273730, "rating": "6;6;7;7", "confidence": "4;3;4;4", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "3;3;4;3", "wc_summary": "113;66;100;53", "wc_strengths": "66;22;92;74", "wc_weaknesses": "127;208;79;450", "wc_questions": "37;49;157;13", "wc_limitations": "4;9;85;2", "wc_review": "347;354;513;592", "wc_reply_reviewers": "57;16;284;132", "wc_reply_authors": "76;22;659;109", "reply_reviewers": "1;1;3;1", "reply_authors": "2;2;4;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 83.0, 24.38237068047322 ], "wc_strengths_avg": [ 63.5, 25.743931323712 ], "wc_weaknesses_avg": [ 216.0, 142.7497810856465 ], "wc_questions_avg": [ 64.0, 55.235857918565905 ], "wc_limitations_avg": [ 25.0, 34.734708865916815 ], "wc_review_avg": [ 451.5, 104.82008395341037 ], "wc_reply_reviewers_avg": [ 122.25, 102.2310495886646 ], "wc_reply_authors_avg": [ 216.5, 257.358213391374 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:vM-9cB32POYJ:scholar.google.com/&scioq=Improving+Environment+Novelty+Quantification+for+Effective+Unsupervised+Environment+Design&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": "smu.edu.sg;smu.edu.sg;smu.edu.sg", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Singapore Management University", "aff_unique_dep": "", "aff_unique_url": "https://www.smu.edu.sg", "aff_unique_abbr": "SMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Singapore" }, { "title": "DreamSteerer: Enhancing Source Image Conditioned Editability using Personalized Diffusion Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94953", "id": "UekHycx0lz", "proceeding": "", "pdf": "https://openreview.net/pdf?id=UekHycx0lz", "openreview": "https://openreview.net/forum?id=UekHycx0lz", "poster": "", "project": "", "author_site": "Zhengyang Yu, Zhaoyuan Yang, Jing Zhang", "tldr": "", "abstract": "Recent text-to-image (T2I) personalization methods have shown great premise in teaching a diffusion model user-specified concepts given a few images for reusing the acquired concepts in a novel context. With massive efforts being dedicated to personalized generation, a promising extension is personalized editing, namely to edit an image using personalized concepts, which can provide more precise guidance signal than traditional textual guidance. To address this, one straightforward solution is to incorporate a personalized diffusion model with a text-driven editing framework. However, such solution often shows unsatisfactory editability on the source image. To address this, we propose DreamSteerer, a plug-in method for augmenting existing T2I personalization methods. Specifically, we enhance the source image conditioned editability of a personalized diffusion model via a novel Editability Driven Score Distillation (EDSD) objective. Moreover, we identify a mode trapping issue with EDSD, and propose a mode shifting regularization with spatial feature guided sampling to avoid such issue. We further employ two key modifications on the Delta Denoising Score framework that enable high-fidelity local editing with personalized concepts. Extensive experiments validate that DreamSteerer can significantly improve the editability of several T2I personalization baselines while being computationally efficient.", "keywords": "Diffusion;Personalization;Editing", "primary_area": "generative_models", "supplementary_material": "", "author": "Zhengyang Yu;Zhaoyuan Yang;Jing Zhang", "authorids": "~Zhengyang_Yu3;~Zhaoyuan_Yang1;~Jing_Zhang23", "gender": "M;;F", "homepage": "https://comp.anu.edu.au/people/zhengyang-yu/;;https://jingzhang617.github.io", "dblp": ";227/2718;05/3499-52", "google_scholar": "N8Nuv1kAAAAJ;qxa04lIAAAAJ;https://scholar.google.com.au/citations?user=Qa1DMv8AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Zhengyang_Yu3;~Zhaoyuan_Yang1;~Jing_Zhang23", "aff": "Australian National University;General Electric;Australian National University", "aff_domain": "anu.edu.au;ge.com;anu.edu.au", "position": "PhD student;Researcher;Lecturer", "bibtex": "@inproceedings{\nyu2024dreamsteerer,\ntitle={DreamSteerer: Enhancing Source Image Conditioned Editability using Personalized Diffusion Models},\nauthor={Zhengyang Yu and Zhaoyuan Yang and Jing Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=UekHycx0lz}\n}", "github": "", "reviewers": "2C3N;kb7M;zDhi;AnAq", "pdf_size": 33699528, "rating": "4;5;5;7", "confidence": "4;5;4;4", "soundness": "3;3;3;4", "novelty": "2;2;3;4", "presentation": "3;2;3;4", "wc_summary": "109;96;61;205", "wc_strengths": "41;78;43;184", "wc_weaknesses": "147;254;166;133", "wc_questions": "14;245;2;102", "wc_limitations": "8;4;15;43", "wc_review": "319;677;287;667", "wc_reply_reviewers": "99;15;20;19", "wc_reply_authors": "353;25;29;23", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.25, 1.0897247358851685 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 117.75, 53.344985706249844 ], "wc_strengths_avg": [ 86.5, 58.18290126832797 ], "wc_weaknesses_avg": [ 175.0, 47.090338711884414 ], "wc_questions_avg": [ 90.75, 97.06537745251909 ], "wc_limitations_avg": [ 17.5, 15.239750654128171 ], "wc_review_avg": [ 487.5, 184.88036672399804 ], "wc_reply_reviewers_avg": [ 38.25, 35.12388788275011 ], "wc_reply_authors_avg": [ 107.5, 141.75595225598113 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.13245323570650439, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7042670304385375383&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "anu.edu.au;ge.com;anu.edu.au", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Australian National University;General Electric", "aff_unique_dep": ";", "aff_unique_url": "https://www.anu.edu.au;https://www.ge.com", "aff_unique_abbr": "ANU;GE", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Australia;United States" }, { "title": "Improving Adaptivity via Over-Parameterization in Sequence Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94952", "id": "UfLH4T676K", "proceeding": "", "pdf": "https://openreview.net/pdf?id=UfLH4T676K", "openreview": "https://openreview.net/forum?id=UfLH4T676K", "poster": "", "project": "", "author_site": "Yicheng Li, Qian Lin", "tldr": "", "abstract": "It is well known that eigenfunctions of a kernel play a crucial role in kernel regression.\n Through several examples, we demonstrate that even with the same set of eigenfunctions, the order of these functions significantly impacts regression outcomes.\n Simplifying the model by diagonalizing the kernel, we introduce an over-parameterized gradient descent in the realm of sequence model to capture the effects of various orders of a fixed set of eigen-functions.\n This method is designed to explore the impact of varying eigenfunction orders.\n Our theoretical results show that the over-parameterization gradient flow can adapt to the underlying structure of the signal and significantly outperform the vanilla gradient flow method.\n Moreover, we also demonstrate that deeper over-parameterization can further enhance the generalization capability of the model.\n These results not only provide a new perspective on the benefits of over-parameterization and but also offer insights into the adaptivity and generalization potential of neural networks beyond the kernel regime.", "keywords": "Over-parameterization;gradient descent;Gaussian sequence model;kernel regression", "primary_area": "learning_theory", "supplementary_material": "/attachment/5bb5dac1330cd0f13837983ac0eed76393aae966.zip", "author": "Yicheng Li;Qian Lin", "authorids": "~Yicheng_Li2;~Qian_Lin2", "gender": "M;M", "homepage": ";https://sites.google.com/site/qianlincd/", "dblp": ";79/3108", "google_scholar": ";kHPrqdgAAAAJ", "orcid": "0000-0002-9497-0379;", "linkedin": ";", "or_profile": "~Yicheng_Li2;~Qian_Lin2", "aff": "Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nli2024improving,\ntitle={Improving Adaptivity via Over-Parameterization in Sequence Models},\nauthor={Yicheng Li and Qian Lin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=UfLH4T676K}\n}", "github": "", "reviewers": "cuCj;GugS;EWHq;QDTR;kYgJ", "pdf_size": 961793, "rating": "5;6;6;6;7", "confidence": "2;3;3;3;3", "soundness": "3;3;3;3;3", "novelty": "3;2;3;3;4", "presentation": "2;3;3;3;3", "wc_summary": "92;44;152;210;19", "wc_strengths": "78;15;75;163;71", "wc_weaknesses": "110;21;157;191;45", "wc_questions": "102;45;236;45;2", "wc_limitations": "1;4;21;19;4", "wc_review": "383;129;641;628;141", "wc_reply_reviewers": "20;12;267;138;0", "wc_reply_authors": "0;0;573;365;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "1;1;3;3;1", "rating_avg": [ 6.0, 0.6324555320336759 ], "confidence_avg": [ 2.8, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 103.4, 70.01028495871161 ], "wc_strengths_avg": [ 80.4, 47.37763185301689 ], "wc_weaknesses_avg": [ 104.8, 64.4682867772985 ], "wc_questions_avg": [ 86.0, 81.45428165541699 ], "wc_limitations_avg": [ 9.8, 8.423775875461075 ], "wc_review_avg": [ 384.4, 223.45433537973705 ], "wc_reply_reviewers_avg": [ 87.4, 102.64813685596053 ], "wc_reply_authors_avg": [ 187.6, 238.99171533758235 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.8, 0.9797958971132713 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.7905694150420949, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14839016309405498120&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Faster Repeated Evasion Attacks in Tree Ensembles", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94951", "id": "Ugr0yPzY71", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Ugr0yPzY71", "openreview": "https://openreview.net/forum?id=Ugr0yPzY71", "poster": "/media/PosterPDFs/NeurIPS%202024/94951.png?t=1733395706.2982862", "project": "", "author_site": "Lorenzo Cascioli, Laurens Devos, Ondrej Kuzelka, Jesse Davis", "tldr": "", "abstract": "Tree ensembles are one of the most widely used model classes. However, these models are susceptible to adversarial examples, i.e., slightly perturbed examples that elicit a misprediction. There has been significant research on designing approaches to construct such examples for tree ensembles. But this is a computationally challenging problem that often must be solved a large number of times (e.g., for all examples in a training set). This is compounded by the fact that current approaches attempt to find such examples from scratch. In contrast, we exploit the fact that multiple similar problems are being solved. Specifically, our approach exploits the insight that adversarial examples for tree ensembles tend to perturb a consistent but relatively small set of features. We show that we can quickly identify this set of features and use this knowledge to speedup constructing adversarial examples.", "keywords": "ML;Tree Ensembles;Verification;Evasion Attacks;Adversarial Attacks", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Lorenzo Cascioli;Laurens Devos;Ondrej Kuzelka;Jesse Davis", "authorids": "~Lorenzo_Cascioli1;~Laurens_Devos1;~Ondrej_Kuzelka1;~Jesse_Davis1", "gender": "M;;M;M", "homepage": ";;https://ida.fel.cvut.cz/~kuzelka;https://people.cs.kuleuven.be/~jesse.davis/", "dblp": "294/6769;;21/4513;d/JesseDavis", "google_scholar": "https://scholar.google.be/citations?hl=en;https://scholar.google.be/citations?user=a3Uaj0gAAAAJ;;https://scholar.google.com.tw/citations?user=gz74XOYAAAAJ", "orcid": "0000-0003-4400-732X;0000-0002-1549-749X;;0000-0002-3748-9263", "linkedin": ";;;", "or_profile": "~Lorenzo_Cascioli1;~Laurens_Devos1;~Ondrej_Kuzelka1;~Jesse_Davis1", "aff": "KU Leuven;KU Leuven;Czech Technical University in Prague;KU Leuven", "aff_domain": "kuleuven.be;kuleuven.be;cvut.cz;kuleuven.be", "position": "PhD student;PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\ncascioli2024faster,\ntitle={Faster Repeated Evasion Attacks in Tree Ensembles},\nauthor={Lorenzo Cascioli and Laurens Devos and Ondrej Kuzelka and Jesse Davis},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Ugr0yPzY71}\n}", "github": "", "reviewers": "SynL;hmU3;fYgE;pRup;s5LR", "pdf_size": 970637, "rating": "4;5;6;6;7", "confidence": "4;4;4;3;2", "soundness": "3;3;3;3;4", "novelty": "2;2;3;2;2", "presentation": "3;2;3;3;4", "wc_summary": "161;92;197;249;377", "wc_strengths": "41;49;185;147;82", "wc_weaknesses": "111;159;214;125;75", "wc_questions": "43;148;72;62;232", "wc_limitations": "17;8;42;164;30", "wc_review": "373;456;710;747;796", "wc_reply_reviewers": "0;451;150;51;167", "wc_reply_authors": "0;472;21;19;74", "reply_reviewers": "0;2;1;1;1", "reply_authors": "1;3;2;2;2", "rating_avg": [ 5.6, 1.0198039027185568 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 215.2, 95.67528416472041 ], "wc_strengths_avg": [ 100.8, 56.27930347827698 ], "wc_weaknesses_avg": [ 136.8, 47.06761094425762 ], "wc_questions_avg": [ 111.4, 70.07881277533174 ], "wc_limitations_avg": [ 52.2, 57.07679037927763 ], "wc_review_avg": [ 616.4, 169.14207046149104 ], "wc_reply_reviewers_avg": [ 163.8, 156.35779481688786 ], "wc_reply_authors_avg": [ 117.2, 179.0993020645251 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.7844645405527363, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:1kZjt9DITTYJ:scholar.google.com/&scioq=Faster+Repeated+Evasion+Attacks+in+Tree+Ensembles&hl=en&as_sdt=0,5", "gs_version_total": 6, "email": "kuleuven.be;kuleuven.be;cvut.cz;kuleuven.be", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Katholieke Universiteit Leuven;Czech Technical University", "aff_unique_dep": ";", "aff_unique_url": "https://www.kuleuven.be;https://www.ctu.cz", "aff_unique_abbr": "KU Leuven;CTU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Prague", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Belgium;Czech Republic" }, { "title": "A Decision-Language Model (DLM) for Dynamic Restless Multi-Armed Bandit Tasks in Public Health", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94950", "id": "UiQkFXLfbu", "proceeding": "", "pdf": "https://openreview.net/pdf?id=UiQkFXLfbu", "openreview": "https://openreview.net/forum?id=UiQkFXLfbu", "poster": "/media/PosterPDFs/NeurIPS%202024/94950.png?t=1731688862.6366682", "project": "", "author_site": "Nikhil Behari, Edwin Zhang, YUNFAN ZHAO, Aparna Taneja, Dheeraj Nagaraj, Milind Tambe", "tldr": "", "abstract": "Restless multi-armed bandits (RMAB) have demonstrated success in optimizing resource allocation for large beneficiary populations in public health settings. Unfortunately, RMAB models lack flexibility to adapt to evolving public health policy priorities. Concurrently, Large Language Models (LLMs) have emerged as adept automated planners across domains of robotic control and navigation. In this paper, we propose a Decision Language Model (DLM) for RMABs, enabling dynamic fine-tuning of RMAB policies in public health settings using human-language commands. We propose using LLMs as automated planners to (1) interpret human policy preference prompts, (2) propose reward functions as code for a multi-agent RMAB environment, and (3) iterate on the generated reward functions using feedback from grounded RMAB simulations. We illustrate the application of DLM in collaboration with ARMMAN, an India-based non-profit promoting preventative care for pregnant mothers, that currently relies on RMAB policies to optimally allocate health worker calls to low-resource populations. We conduct a technology demonstration in simulation using the Gemini Pro model, showing DLM can dynamically shape policy outcomes using only human prompts as input.", "keywords": "public health;multi-agent systems;multi-armed bandits;LLMs", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "/attachment/f7b4f28c6d2d23d64fd2f671ff80e3786a66e485.zip", "author": "Nikhil Behari;Edwin Zhang;YUNFAN ZHAO;Aparna Taneja;Dheeraj Mysore Nagaraj;Milind Tambe", "authorids": "~Nikhil_Behari1;~Edwin_Zhang2;~YUNFAN_ZHAO1;~Aparna_Taneja3;~Dheeraj_Mysore_Nagaraj1;~Milind_Tambe1", "gender": "M;;M;F;M;", "homepage": "https://www.media.mit.edu/people/nbehari/overview/;https://eddie.win;https://yzhao3685.github.io/;https://research.google/people/106890/;https://dheerajmn.mit.edu;http://teamcore.seas.harvard.edu/tambe", "dblp": "324/1587;;304/4614;58/9170;215/5097;67/2667", "google_scholar": "7Gd17cQAAAAJ;;wy_rqnwAAAAJ;XtMi1L0AAAAJ;0g80b7sAAAAJ;YOVZiJkAAAAJ", "orcid": ";;;;;", "linkedin": ";;;;dheeraj-m-nagaraj-01739792/;", "or_profile": "~Nikhil_Behari1;~Edwin_Zhang2;~YUNFAN_ZHAO1;~Aparna_Taneja3;~Dheeraj_Mysore_Nagaraj1;~Milind_Tambe1", "aff": "Massachusetts Institute of Technology;Harvard University;Harvard University;Google Research;Google;Google", "aff_domain": "mit.edu;harvard.edu;g.harvard.edu;google.com;google.com;google.com", "position": "MS student;PhD student;Postdoc;Researcher;Research Scientist;Principal Researcher", "bibtex": "@inproceedings{\nbehari2024a,\ntitle={A Decision-Language Model ({DLM}) for Dynamic Restless Multi-Armed Bandit Tasks in Public Health},\nauthor={Nikhil Behari and Edwin Zhang and YUNFAN ZHAO and Aparna Taneja and Dheeraj Mysore Nagaraj and Milind Tambe},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=UiQkFXLfbu}\n}", "github": "", "reviewers": "8Afe;R6Uk;zXh7", "pdf_size": 1547350, "rating": "5;6;7", "confidence": "4;2;3", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "133;63;158", "wc_strengths": "32;44;394", "wc_weaknesses": "154;35;83", "wc_questions": "104;69;67", "wc_limitations": "44;26;87", "wc_review": "467;237;789", "wc_reply_reviewers": "472;8;10", "wc_reply_authors": "1060;0;0", "reply_reviewers": "2;1;1", "reply_authors": "3;1;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 118.0, 40.2077936060494 ], "wc_strengths_avg": [ 156.66666666666666, 167.8914993546593 ], "wc_weaknesses_avg": [ 90.66666666666667, 48.883080463035014 ], "wc_questions_avg": [ 80.0, 16.990193249832878 ], "wc_limitations_avg": [ 52.333333333333336, 25.590796956892312 ], "wc_review_avg": [ 497.6666666666667, 226.39395359024547 ], "wc_reply_reviewers_avg": [ 163.33333333333334, 218.26182034937355 ], "wc_reply_authors_avg": [ 353.3333333333333, 499.6887920384936 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5207917043835631159&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 5, "email": "mit.edu;harvard.edu;g.harvard.edu;google.com;google.com;google.com", "author_num": 6, "aff_unique_index": "0;1;1;2;2;2", "aff_unique_norm": "Massachusetts Institute of Technology;Harvard University;Google", "aff_unique_dep": ";;Google Research", "aff_unique_url": "https://web.mit.edu;https://www.harvard.edu;https://research.google", "aff_unique_abbr": "MIT;Harvard;Google Research", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Cross-modal Representation Flattening for Multi-modal Domain Generalization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94949", "id": "UixTytSVOl", "proceeding": "", "pdf": "https://openreview.net/pdf?id=UixTytSVOl", "openreview": "https://openreview.net/forum?id=UixTytSVOl", "poster": "/media/PosterPDFs/NeurIPS%202024/94949.png?t=1730165131.2212577", "project": "", "author_site": "Yunfeng FAN, Wenchao Xu, Haozhao Wang, Song Guo", "tldr": "", "abstract": "Multi-modal domain generalization (MMDG) requires that models trained on multi-modal source domains can generalize to unseen target distributions with the same modality set. Sharpness-aware minimization (SAM) is an effective technique for traditional uni-modal domain generalization (DG), however, with limited improvement in MMDG. In this paper, we identify that modality competition and discrepant uni-modal flatness are two main factors that restrict multi-modal generalization. To overcome these challenges, we propose to construct consistent flat loss regions and enhance knowledge exploitation for each modality via cross-modal knowledge transfer. Firstly, we turn to the optimization on representation-space loss landscapes instead of traditional parameter space, which allows us to build connections between modalities directly. Then, we introduce a novel method to flatten the high-loss region between minima from different modalities by interpolating mixed multi-modal representations. We implement this method by distilling and optimizing generalizable interpolated representations and assigning distinct weights for each modality considering their divergent generalization capabilities. Extensive experiments are performed on two benchmark datasets, EPIC-Kitchens and Human-Animal-Cartoon (HAC), with various modality combinations, demonstrating the effectiveness of our method under multi-source and single-source settings. Our code is open-sourced.", "keywords": "Multimodal Learning;Domain Generalization;Sharpness-aware Minimization;Representation Flattening", "primary_area": "machine_vision", "supplementary_material": "/attachment/67e4fde834909523816d7e5c4a3c0eef61e50157.zip", "author": "Yunfeng FAN;Wenchao Xu;Haozhao Wang;Song Guo", "authorids": "~Yunfeng_FAN1;~Wenchao_Xu1;~Haozhao_Wang1;~Song_Guo5", "gender": "M;;M;M", "homepage": "https://fyf-simon.github.io/;;https://wanghaozhao.mysxl.cn/;https://cse.hkust.edu.hk/~songguo/", "dblp": "144/9651;;224/4500.html;01/267-1", "google_scholar": "6usJ0SgAAAAJ;;https://scholar.google.com.hk/citations?user=yFrOuMEAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-2277-5355;;0000-0002-7591-5315;", "linkedin": ";;;", "or_profile": "~Yunfeng_FAN1;~Wenchao_Xu1;~Haozhao_Wang1;~Song_Guo5", "aff": "Hong Kong Polytechnic University;;Huazhong University of Science and Technology;Department of Computer Science and Engineering, Hong Kong University of Science and Technology", "aff_domain": "polyu.edu.hk;;hust.edu.cn;cse.ust.hk", "position": "PhD student;;Postdoc;Full Professor", "bibtex": "@inproceedings{\nfan2024crossmodal,\ntitle={Cross-modal Representation Flattening for Multi-modal Domain Generalization},\nauthor={Yunfeng FAN and Wenchao Xu and Haozhao Wang and Song Guo},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=UixTytSVOl}\n}", "github": "", "reviewers": "eDBu;8XoV;iu3y", "pdf_size": 446273, "rating": "5;6;6", "confidence": "3;4;5", "soundness": "2;3;3", "novelty": "2;2;3", "presentation": "3;3;3", "wc_summary": "100;90;79", "wc_strengths": "60;155;57", "wc_weaknesses": "76;89;42", "wc_questions": "10;61;39", "wc_limitations": "9;6;29", "wc_review": "255;401;246", "wc_reply_reviewers": "15;15;21", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 89.66666666666667, 8.576453553512405 ], "wc_strengths_avg": [ 90.66666666666667, 45.507020215444655 ], "wc_weaknesses_avg": [ 69.0, 19.8158185969358 ], "wc_questions_avg": [ 36.666666666666664, 20.885933597094056 ], "wc_limitations_avg": [ 14.666666666666666, 10.208928554075703 ], "wc_review_avg": [ 300.6666666666667, 71.04145894397784 ], "wc_reply_reviewers_avg": [ 17.0, 2.8284271247461903 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11266936156274377551&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": "polyu.edu.hk;;hust.edu.cn;cse.ust.hk", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "Hong Kong Polytechnic University;Huazhong University of Science and Technology;Hong Kong University of Science and Technology", "aff_unique_dep": ";;Department of Computer Science and Engineering", "aff_unique_url": "https://www.polyu.edu.hk;http://www.hust.edu.cn;https://www.ust.hk", "aff_unique_abbr": "PolyU;HUST;HKUST", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Ad Auctions for LLMs via Retrieval Augmented Generation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94948", "id": "Ujo8V7iXmR", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Ujo8V7iXmR", "openreview": "https://openreview.net/forum?id=Ujo8V7iXmR", "poster": "", "project": "", "author_site": "MohammadTaghi Hajiaghayi, S\u00e9bastien Lahaie, Keivan Rezaei, Suho Shin", "tldr": "", "abstract": "In the field of computational advertising, the integration of ads into the outputs of large language models (LLMs) presents an opportunity to support these services without compromising content integrity. This paper introduces novel auction mechanisms for ad allocation and pricing within the textual outputs of LLMs, leveraging retrieval-augmented generation (RAG). We propose a \\emph{segment auction} where an ad is probabilistically retrieved for each discourse segment (paragraph, section, or entire output) according to its bid and relevance, following the RAG framework, and priced according to competing bids. We show that our auction maximizes logarithmic social welfare, a new notion of welfare that balances allocation efficiency and fairness, and we characterize the associated incentive-compatible pricing rule. These results are extended to multi-ad allocation per segment. An empirical evaluation validates the feasibility and effectiveness of our approach over several ad auction scenarios, and exhibits inherent tradeoffs in metrics as we allow the LLM more flexibility to allocate ads.", "keywords": "Large Language Model;Online Advertising;Auction Design", "primary_area": "other", "supplementary_material": "/attachment/c719ff7b475e6f6e9ffe8fefc825eec04635effe.zip", "author": "MohammadTaghi Hajiaghayi;Sebastien Lahaie;Keivan Rezaei;Suho Shin", "authorids": "~MohammadTaghi_Hajiaghayi1;~Sebastien_Lahaie1;~Keivan_Rezaei1;~Suho_Shin1", "gender": "M;M;M;M", "homepage": "http://www.cs.umd.edu/~hajiagha/;http://slahaie.net;https://k1rezaei.github.io;https://suhoshin.github.io/", "dblp": "334/4488;41/1766.html;339/7254;218/5505", "google_scholar": "https://scholar.google.com.tw/citations?user=SQ1eGN4AAAAJ;a8MrE5EAAAAJ;NsJKrKIAAAAJ;-p5eVQsAAAAJ", "orcid": "0000-0003-4842-0533;0000-0002-7828-7289;;", "linkedin": "mohammad-hajiaghayi-2139a913a&ved=2ahUKEwjMyeH-5-_-AhV3K1kFHeeBDKwQjjh6BAgSEAE&usg=AOvVaw1NSVoT5FCGtOTi4eT8nr4b;s%C3%A9bastien-lahaie-a5158311a/;keivan-rezaei-1b434680/;", "or_profile": "~MohammadTaghi_Hajiaghayi1;~Sebastien_Lahaie1;~Keivan_Rezaei1;~Suho_Shin1", "aff": "University of Maryland, College Park;Google;University of Maryland, College Park;University of Maryland, College Park", "aff_domain": "umd.edu;google.com;umd.edu;umd.edu", "position": "Full Professor;Research Scientist;PhD student;PhD student", "bibtex": "@inproceedings{\nhajiaghayi2024ad,\ntitle={Ad Auctions for {LLM}s via Retrieval Augmented Generation},\nauthor={MohammadTaghi Hajiaghayi and Sebastien Lahaie and Keivan Rezaei and Suho Shin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Ujo8V7iXmR}\n}", "github": "", "reviewers": "2L4f;VuMT;xTCf;zmfd", "pdf_size": 587348, "rating": "4;4;7;7", "confidence": "3;4;4;4", "soundness": "2;2;3;3", "novelty": "1;2;3;4", "presentation": "2;2;3;3", "wc_summary": "111;56;54;58", "wc_strengths": "32;41;29;29", "wc_weaknesses": "508;215;21;27", "wc_questions": "133;4;15;44", "wc_limitations": "69;4;1;6", "wc_review": "853;320;120;164", "wc_reply_reviewers": "529;0;147;0", "wc_reply_authors": "678;117;12;0", "reply_reviewers": "2;0;2;0", "reply_authors": "2;3;2;1", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 1.118033988749895 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 69.75, 23.85765076448224 ], "wc_strengths_avg": [ 32.75, 4.9180788932265 ], "wc_weaknesses_avg": [ 192.75, 198.02067442567707 ], "wc_questions_avg": [ 49.0, 50.6507650485163 ], "wc_limitations_avg": [ 20.0, 28.34607556611673 ], "wc_review_avg": [ 364.25, 291.80162353900636 ], "wc_reply_reviewers_avg": [ 169.0, 216.33654337628676 ], "wc_reply_authors_avg": [ 201.75, 278.7044805883106 ], "reply_reviewers_avg": [ 1.0, 1.0 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896258, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6619513690203213120&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "umd.edu;google.com;umd.edu;umd.edu", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Maryland;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www/umd.edu;https://www.google.com", "aff_unique_abbr": "UMD;Google", "aff_campus_unique_index": "0;1;0;0", "aff_campus_unique": "College Park;Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "ProTransformer: Robustify Transformers via Plug-and-Play Paradigm", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94947", "id": "UkauUrTbxx", "proceeding": "", "pdf": "https://openreview.net/pdf?id=UkauUrTbxx", "openreview": "https://openreview.net/forum?id=UkauUrTbxx", "poster": "/media/PosterPDFs/NeurIPS%202024/94947.png?t=1731438604.5070643", "project": "", "author_site": "Zhichao Hou, Weizhi Gao, Yuchen Shen, Feiyi Wang, Xiaorui Liu", "tldr": "", "abstract": "Transformer-based architectures have dominated various areas of machine learning in recent years. In this paper, we introduce a novel robust attention mechanism designed to enhance the resilience of transformer-based architectures. Crucially, this technique can be integrated into existing transformers as a plug-and-play layer, improving their robustness without the need for additional training or fine-tuning. Through comprehensive experiments and ablation studies, we demonstrate that our ProTransformer significantly enhances the robustness of transformer models across a variety of prediction tasks, attack mechanisms, backbone architectures, and data domains. Notably, without further fine-tuning, the ProTransformer consistently improves the performance of vanilla transformers by 19.5\\%, 28.3\\%, 16.1\\%, and 11.4\\% for BERT, ALBERT, DistilBERT, and RoBERTa, respectively, under the classical TextFooler attack. Furthermore, ProTransformer shows promising resilience in large language models (LLMs) against prompting-based attacks, improving the performance of T5 and LLaMA by 24.8\\% and 17.8\\%, respectively, and enhancing Vicuna by an average of 10.4\\% against the Jailbreaking attack. Beyond the language domain, ProTransformer also demonstrates outstanding robustness in both vision and graph domains.", "keywords": "Transformers;Adversarial Robustness;Large Language Models", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Zhichao Hou;Weizhi Gao;Yuchen Shen;Feiyi Wang;Xiaorui Liu", "authorids": "~Zhichao_Hou1;~Weizhi_Gao1;~Yuchen_Shen3;~Feiyi_Wang1;~Xiaorui_Liu1", "gender": "M;M;M;M;M", "homepage": "https://chris-hzc.github.io/;https://weizhigao.github.io/;https://a-chicharito-s.github.io/;;https://sites.google.com/ncsu.edu/xiaorui/", "dblp": "188/4064;335/0844;;;172/0995", "google_scholar": "rraC4ZMAAAAJ;8DMz6dUAAAAJ;MxGrJz0AAAAJ;1JMwC1sAAAAJ;NhvN1KoAAAAJ", "orcid": "0000-0002-3989-2654;;;;0000-0001-8217-5688", "linkedin": "zhichao-hou-b022931a4/;weizhi-gao-888052254/;;;", "or_profile": "~Zhichao_Hou1;~Weizhi_Gao1;~Yuchen_Shen3;~Feiyi_Wang1;~Xiaorui_Liu1", "aff": "Amazon;North Carolina State University;Carnegie Mellon University;Oak Ridge National Laboratory;North Carolina State University", "aff_domain": "amazon.com;ncsu.edu;cmu.edu;ornl.gov;ncsu.edu", "position": "Intern;PhD student;MS student;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nhou2024protransformer,\ntitle={ProTransformer: Robustify Transformers via Plug-and-Play Paradigm},\nauthor={Zhichao Hou and Weizhi Gao and Yuchen Shen and Feiyi Wang and Xiaorui Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=UkauUrTbxx}\n}", "github": "", "reviewers": "JZTY;Yhky;h5FP;nL5G", "pdf_size": 5151373, "rating": "4;5;6;6", "confidence": "1;4;4;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;2;3;3", "wc_summary": "16;72;70;106", "wc_strengths": "33;91;105;126", "wc_weaknesses": "83;105;86;144", "wc_questions": "16;37;48;146", "wc_limitations": "8;6;22;84", "wc_review": "156;311;331;606", "wc_reply_reviewers": "0;24;9;13", "wc_reply_authors": "0;0;0;38", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;2", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 3.25, 1.299038105676658 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 66.0, 32.218007387174026 ], "wc_strengths_avg": [ 88.75, 34.513584282134474 ], "wc_weaknesses_avg": [ 104.5, 24.315632831575655 ], "wc_questions_avg": [ 61.75, 49.98187171365234 ], "wc_limitations_avg": [ 30.0, 31.78049716414141 ], "wc_review_avg": [ 351.0, 162.0570887064185 ], "wc_reply_reviewers_avg": [ 11.5, 8.616843969807043 ], "wc_reply_authors_avg": [ 9.5, 16.454482671904334 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6007839165203021530&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "amazon.com;ncsu.edu;cmu.edu;ornl.gov;ncsu.edu", "author_num": 5, "aff_unique_index": "0;1;2;3;1", "aff_unique_norm": "Amazon;North Carolina State University;Carnegie Mellon University;Oak Ridge National Laboratory", "aff_unique_dep": "Amazon.com, Inc.;;;", "aff_unique_url": "https://www.amazon.com;https://www.ncsu.edu;https://www.cmu.edu;https://www.ornl.gov", "aff_unique_abbr": "Amazon;NCSU;CMU;ORNL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "StrategyLLM: Large Language Models as Strategy Generators, Executors, Optimizers, and Evaluators for Problem Solving", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94946", "id": "UkxJd64mki", "proceeding": "", "pdf": "https://openreview.net/pdf?id=UkxJd64mki", "openreview": "https://openreview.net/forum?id=UkxJd64mki", "poster": "/media/PosterPDFs/NeurIPS%202024/94946.png?t=1731225531.5608706", "project": "", "author_site": "Chang Gao, Haiyun Jiang, Deng Cai, Shuming Shi, Wai Lam", "tldr": "", "abstract": "Most existing prompting methods suffer from the issues of generalizability and consistency, as they often rely on instance-specific solutions that may not be applicable to other instances and lack task-level consistency across the selected few-shot examples. To address these limitations, we propose a comprehensive framework, StrategyLLM, allowing LLMs to perform inductive reasoning, deriving general strategies from specific task instances, and deductive reasoning, applying these general strategies to particular task examples, for constructing generalizable and consistent few-shot prompts. It employs four LLM-based agents: strategy generator, executor, optimizer, and evaluator, working together to generate, evaluate, and select promising strategies for a given task. Experimental results demonstrate that StrategyLLM outperforms the competitive baseline CoT-SC that requires human-annotated solutions on 13 datasets across 4 challenging tasks without human involvement, including math reasoning (34.2\\% $\\rightarrow$ 38.8\\%), commonsense reasoning (70.3\\% $\\rightarrow$ 72.5\\%), algorithmic reasoning (73.7\\% $\\rightarrow$ 85.0\\%), and symbolic reasoning (30.0\\% $\\rightarrow$ 79.2\\%). Further analysis reveals that StrategyLLM is applicable to various LLMs and demonstrates advantages across numerous scenarios.", "keywords": "chain-of-thought;generalizability;strategy;reasoning", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Chang Gao;Haiyun Jiang;Deng Cai;Shuming Shi;Wai Lam", "authorids": "~Chang_Gao1;~Haiyun_Jiang1;~Deng_Cai1;~Shuming_Shi1;~Wai_Lam1", "gender": "M;M;M;M;M", "homepage": "https://gao-xiao-bai.github.io/;;https://jcyk.github.io/;;http://www.se.cuhk.edu.hk/~textmine", "dblp": ";;c/DCai-2;s/ShumingShi;48/1707", "google_scholar": "6lLqRasAAAAJ;fk684xEAAAAJ;KpbRLYcAAAAJ;Lg31AKMAAAAJ;ewA4NAcAAAAJ", "orcid": "0000-0002-7588-0159;;;;", "linkedin": ";;;;", "or_profile": "~Chang_Gao1;~Haiyun_Jiang1;~Deng_Cai1;~Shuming_Shi1;~Wai_Lam1", "aff": "The Chinese University of Hong Kong;Tencent AI Lab;Tencent AI Lab;Tencent AI Lab;The Chinese University of Hong Kong", "aff_domain": "cuhk.edu.hk;tencent.com;tencent.com;tencent.com;cuhk.edu.hk", "position": "PhD student;Researcher;Research Scientist;Principal Researcher;Professor", "bibtex": "@inproceedings{\ngao2024strategyllm,\ntitle={Strategy{LLM}: Large Language Models as Strategy Generators, Executors, Optimizers, and Evaluators for Problem Solving},\nauthor={Chang Gao and Haiyun Jiang and Deng Cai and Shuming Shi and Wai Lam},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=UkxJd64mki}\n}", "github": "", "reviewers": "D2vF;s895;WbXZ;CCvY", "pdf_size": 687765, "rating": "5;6;6;6", "confidence": "3;4;3;5", "soundness": "1;4;3;3", "novelty": "1;3;3;3", "presentation": "2;4;3;3", "wc_summary": "94;67;170;106", "wc_strengths": "45;58;215;42", "wc_weaknesses": "131;43;268;114", "wc_questions": "93;23;340;74", "wc_limitations": "54;40;49;1", "wc_review": "417;231;1042;337", "wc_reply_reviewers": "330;40;56;17", "wc_reply_authors": "1865;0;62;0", "reply_reviewers": "4;1;1;1", "reply_authors": "5;1;2;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 1.0897247358851685 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 109.25, 37.81120865563543 ], "wc_strengths_avg": [ 90.0, 72.41892017974308 ], "wc_weaknesses_avg": [ 139.0, 81.46471628870992 ], "wc_questions_avg": [ 132.5, 122.50408156465645 ], "wc_limitations_avg": [ 36.0, 20.820662813657012 ], "wc_review_avg": [ 506.75, 315.9908028724887 ], "wc_reply_reviewers_avg": [ 110.75, 127.34083202178317 ], "wc_reply_authors_avg": [ 481.75, 799.0207678777817 ], "reply_reviewers_avg": [ 1.75, 1.299038105676658 ], "reply_authors_avg": [ 2.25, 1.6393596310755 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2190300786639578677&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "cuhk.edu.hk;tencent.com;tencent.com;tencent.com;cuhk.edu.hk", "author_num": 5, "aff_unique_index": "0;1;1;1;0", "aff_unique_norm": "Chinese University of Hong Kong;Tencent", "aff_unique_dep": ";Tencent AI Lab", "aff_unique_url": "https://www.cuhk.edu.hk;https://ai.tencent.com", "aff_unique_abbr": "CUHK;Tencent AI Lab", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "AGILE: A Novel Reinforcement Learning Framework of LLM Agents", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94945", "id": "Ul3lDYo3XQ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Ul3lDYo3XQ", "openreview": "https://openreview.net/forum?id=Ul3lDYo3XQ", "poster": "/media/PosterPDFs/NeurIPS%202024/94945.png?t=1730388879.2482576", "project": "", "author_site": "Feng Peiyuan, Yichen He, Guanhua Huang, Yuan Lin, Hanchong Zhang, Yuchen Zhang, Hang Li", "tldr": "", "abstract": "We introduce a novel reinforcement learning framework of LLM agents named AGILE (AGent that Interacts and Learns from Environments) designed to perform complex conversational tasks with users, leveraging LLMs, memory, tools, and interactions with experts. The agent possesses capabilities beyond conversation, including reflection, tool usage, and expert consultation. We formulate the construction of such an LLM agent as a reinforcement learning (RL) problem, in which the LLM serves as the policy model. We fine-tune the LLM using labeled data of actions and the PPO algorithm. We focus on question answering and release a dataset for agents called ProductQA, comprising challenging questions in online shopping. Our extensive experiments on ProductQA, MedMCQA and HotPotQA show that AGILE agents based on 7B and 13B LLMs trained with PPO can outperform GPT-4 agents. Our ablation study highlights the indispensability of memory, tools, consultation, reflection, and reinforcement learning in achieving the agent's strong performance. Datasets and code are available at https://github.com/bytarnish/AGILE.", "keywords": "LLM agent;reinforcement learning;LLM-human interaction", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/6070af88f44ccce7d7fa9b97dd1e951fdcc40ed8.zip", "author": "Peiyuan Feng;Yichen He;Guanhua Huang;Yuan Lin;Hanchong Zhang;Yuchen Zhang;Hang Li", "authorids": "~Peiyuan_Feng1;~Yichen_He1;~Guanhua_Huang1;~Yuan_Lin3;~Hanchong_Zhang1;~Yuchen_Zhang1;~Hang_Li4", "gender": "M;M;M;F;M;M;M", "homepage": ";https://hyc2026.github.io/;;;;https://hangli-hl.github.io/;", "dblp": ";;88/9542;;348/6967;https://dblp.org/pers/hd/l/Li_0001:Hang;09/5661-2", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;;SEgFVw0AAAAJ;wD4HrKkAAAAJ;4xNsDNgAAAAJ;nTl5mSwAAAAJ;Om4Lag0AAAAJ", "orcid": ";0009-0009-7357-9221;;;0000-0003-1152-4355;0000-0001-9628-3487;", "linkedin": ";;;;;hang-li-84aa6314/;", "or_profile": "~Peiyuan_Feng1;~Yichen_He1;~Guanhua_Huang1;~Yuan_Lin3;~Hanchong_Zhang1;~Hang_Li4;~Yuchen_Zhang2", "aff": "ByteDance Inc.;Beihang University;University of Science and Technology of China;ByteDance;Shanghai Jiaotong University;ByteDance Technology;ByteDance Inc.", "aff_domain": "bytedance.com;buaa.edu.cn;mail.ustc.edu.cn;bytedance.com;sjtu.edu.cn;bytedance.com;bytedance.com", "position": "Researcher;MS student;PhD student;Researcher;MS student;Head of Research;Researcher", "bibtex": "@inproceedings{\nfengpeiyuan2024agile,\ntitle={{AGILE}: A Novel Reinforcement Learning Framework of {LLM} Agents},\nauthor={Peiyuan Feng and Yichen He and Guanhua Huang and Yuan Lin and Hanchong Zhang and Yuchen Zhang and Hang Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Ul3lDYo3XQ}\n}", "github": "", "reviewers": "5wwv;b4yX;xRs2;ZJZM", "pdf_size": 1166260, "rating": "4;5;6;7", "confidence": "4;4;4;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;2;4;3", "wc_summary": "101;232;79;42", "wc_strengths": "75;169;79;74", "wc_weaknesses": "94;636;46;47", "wc_questions": "77;220;43;16", "wc_limitations": "2;110;6;1", "wc_review": "349;1367;253;180", "wc_reply_reviewers": "0;485;7;14", "wc_reply_authors": "144;1727;0;0", "reply_reviewers": "0;2;1;1", "reply_authors": "3;6;1;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 113.5, 71.5908513708281 ], "wc_strengths_avg": [ 99.25, 40.31361432568407 ], "wc_weaknesses_avg": [ 205.75, 249.16096704740895 ], "wc_questions_avg": [ 89.0, 78.66066361276137 ], "wc_limitations_avg": [ 29.75, 46.370114297896656 ], "wc_review_avg": [ 537.25, 482.79103916705 ], "wc_reply_reviewers_avg": [ 126.5, 207.03924748704048 ], "wc_reply_authors_avg": [ 467.75, 729.4012527409039 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.75, 2.0463381929681126 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14158837866128448259&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 2, "email": "bytedance.com;buaa.edu.cn;mail.ustc.edu.cn;bytedance.com;sjtu.edu.cn;bytedance.com;bytedance.com", "author_num": 7, "aff_unique_index": "0;1;2;0;3;0;0", "aff_unique_norm": "ByteDance;Beihang University;University of Science and Technology of China;Shanghai Jiao Tong University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.bytedance.com;http://www.buaa.edu.cn/;http://www.ustc.edu.cn;https://www.sjtu.edu.cn", "aff_unique_abbr": "ByteDance;BUAA;USTC;SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "No Filter: Cultural and Socioeconomic Diversity in Contrastive Vision-Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94944", "id": "UmW9BYj761", "proceeding": "", "pdf": "https://openreview.net/pdf?id=UmW9BYj761", "openreview": "https://openreview.net/forum?id=UmW9BYj761", "poster": "/media/PosterPDFs/NeurIPS%202024/94944.png?t=1731404439.2154696", "project": "", "author_site": "Ang\u00e9line Pouget, Lucas Beyer, Emanuele Bugliarello, Xiao Wang, Andreas Steiner, Xiaohua Zhai, Ibrahim Alabdulmohsin", "tldr": "", "abstract": "We study cultural and socioeconomic diversity in contrastive vision-language models (VLMs). Using a broad range of benchmark datasets and evaluation metrics, we bring to attention several important findings. First, the common filtering of training data to English image-text pairs disadvantages communities of lower socioeconomic status and negatively impacts cultural understanding. Notably, this performance gap is not captured by - and even at odds with - the currently popular evaluation metrics derived from the Western-centric ImageNet and COCO datasets. Second, pretraining with global, unfiltered data before fine-tuning on English content can improve cultural understanding without sacrificing performance on said popular benchmarks. Third, we introduce the task of geo-localization as a novel evaluation metric to assess cultural diversity in VLMs. Our work underscores the value of using diverse data to create more inclusive multimodal systems and lays the groundwork for developing VLMs that better represent global perspectives.", "keywords": "cultural diversity;benchmarks;vision-language models", "primary_area": "human-AI_interaction", "supplementary_material": "", "author": "Ang\u00e9line Pouget;Lucas Beyer;Emanuele Bugliarello;Xiao Wang;Andreas Peter Steiner;Xiaohua Zhai;Ibrahim Alabdulmohsin", "authorids": "~Ang\u00e9line_Pouget1;~Lucas_Beyer1;~Emanuele_Bugliarello1;~Xiao_Wang5;~Andreas_Peter_Steiner1;~Xiaohua_Zhai2;~Ibrahim_Alabdulmohsin1", "gender": "F;;M;M;M;;M", "homepage": "https://angelinepouget.github.io/;http://lucasb.eyer.be;http://e-bug.github.io/;;;;http://ibomohsin.com", "dblp": ";126/4720;241/9497;49/67-38;s/AndreasSteiner;66/636;153/5393", "google_scholar": "7vjURk0AAAAJ;p2gwhK4AAAAJ;9yc1aXYAAAAJ;ukyXqzMAAAAJ;;;8WNMsPYAAAAJ", "orcid": ";;0000-0002-2999-7081;;;;", "linkedin": "angelinepouget/;;emanuelebugliarello/;;andreas-steiner-1859223b/;;", "or_profile": "~Ang\u00e9line_Pouget1;~Lucas_Beyer1;~Emanuele_Bugliarello1;~Xiao_Wang5;~Andreas_Peter_Steiner1;~Xiaohua_Zhai2;~Ibrahim_Alabdulmohsin1", "aff": "ETHZ - ETH Zurich;Google Brain;Google DeepMind;Google DeepMind;Google DeepMind;Google DeepMind;Google", "aff_domain": "ethz.ch;google.com;google.com;google.com;deepmind.com;google.com;google.com", "position": "MS student;Researcher;Researcher;Researcher;Research Engineer;Researcher;Research Scientist", "bibtex": "@inproceedings{\npouget2024no,\ntitle={No Filter: Cultural and Socioeconomic Diversity in Contrastive Vision-Language Models},\nauthor={Ang{\\'e}line Pouget and Lucas Beyer and Emanuele Bugliarello and Xiao Wang and Andreas Peter Steiner and Xiaohua Zhai and Ibrahim Alabdulmohsin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=UmW9BYj761}\n}", "github": "", "reviewers": "nvDK;mxHk;JWWG;dZWv", "pdf_size": 1893578, "rating": "5;5;6;6", "confidence": "3;3;3;4", "soundness": "2;3;3;2", "novelty": "1;2;3;3", "presentation": "3;3;2;3", "wc_summary": "65;93;64;85", "wc_strengths": "34;48;82;84", "wc_weaknesses": "147;53;72;185", "wc_questions": "119;32;107;73", "wc_limitations": "29;1;4;7", "wc_review": "394;227;329;434", "wc_reply_reviewers": "76;14;79;158", "wc_reply_authors": "108;62;26;358", "reply_reviewers": "1;1;1;2", "reply_authors": "3;2;2;3", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 76.75, 12.577261228105266 ], "wc_strengths_avg": [ 62.0, 21.587033144922902 ], "wc_weaknesses_avg": [ 114.25, 53.88587477252271 ], "wc_questions_avg": [ 82.75, 33.81105588413352 ], "wc_limitations_avg": [ 10.25, 11.031205736455105 ], "wc_review_avg": [ 346.0, 78.25918476447349 ], "wc_reply_reviewers_avg": [ 81.75, 51.09977984296997 ], "wc_reply_authors_avg": [ 138.5, 130.01826794723885 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7733316419023277161&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "ethz.ch;google.com;google.com;google.com;deepmind.com;google.com;google.com", "author_num": 7, "aff_unique_index": "0;1;1;1;1;1;1", "aff_unique_norm": "ETH Zurich;Google", "aff_unique_dep": ";Google Brain", "aff_unique_url": "https://www.ethz.ch;https://brain.google.com", "aff_unique_abbr": "ETHZ;Google Brain", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;2;2;2;2;1", "aff_country_unique": "Switzerland;United States;United Kingdom" }, { "title": "Me, Myself, and AI: The Situational Awareness Dataset (SAD) for LLMs", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97669", "id": "UnWhcpIyUC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=UnWhcpIyUC", "openreview": "https://openreview.net/forum?id=UnWhcpIyUC", "poster": "", "project": "", "author_site": "Rudolf Laine, Bilal Chughtai, Jan Betley, Kaivalya Hariharan, Mikita Balesni, J\u00e9r\u00e9my Scheurer, Marius Hobbhahn, Alexander Meinke, Owain Evans", "tldr": "", "abstract": "AI assistants such as ChatGPT are trained to respond to users by saying, \"I am a large language model\u201d.\nThis raises questions. Do such models \"know'' that they are LLMs and reliably act on this knowledge? Are they \"aware\" of their current circumstances, such as being deployed to the public?\nWe refer to a model's knowledge of itself and its circumstances as **situational awareness**.\nTo quantify situational awareness in LLMs, we introduce a range of behavioral tests, based on question answering and instruction following. These tests form the **Situational Awareness Dataset (SAD)**, a benchmark comprising 7 task categories and over 13,000 questions.\nThe benchmark tests numerous abilities, including the capacity of LLMs to (i) recognize their own generated text, (ii) predict their own behavior, (iii) determine whether a prompt is from internal evaluation or real-world deployment, and (iv) follow instructions that depend on self-knowledge.\nWe evaluate 16 LLMs on SAD, including both base (pretrained) and chat models.\nWhile all models perform better than chance, even the highest-scoring model (Claude 3 Opus) is far from a human baseline on certain tasks. We also observe that performance on SAD is only partially predicted by metrics of general knowledge. \nChat models, which are finetuned to serve as AI assistants, outperform their corresponding base models on SAD but not on general knowledge tasks.\nThe purpose of SAD is to facilitate scientific understanding of situational awareness in LLMs by breaking it down into quantitative abilities. Situational awareness is important because it enhances a model's capacity for autonomous planning and action. While this has potential benefits from automation, it also introduces novel risks related to AI safety and control.", "keywords": "evaluations;AI safety;situational awareness;benchmarks", "primary_area": "", "supplementary_material": "/attachment/3e0dd514ef93d77bf0c7ba8bd29cf6f6ce5fa923.zip", "author": "Rudolf Laine;Bilal Chughtai;Jan Betley;Kaivalya Hariharan;Mikita Balesni;J\u00e9r\u00e9my Scheurer;Marius Hobbhahn;Alexander Meinke;Owain Evans", "authorids": "~Rudolf_Laine1;~Bilal_Chughtai1;~Jan_Betley1;~Kaivalya_Hariharan1;~Mikita_Balesni1;~J\u00e9r\u00e9my_Scheurer1;~Marius_Hobbhahn1;~Alexander_Meinke1;~Owain_Evans1", "gender": ";M;M;M;M;M;;M;", "homepage": "https://www.strataoftheworld.com/;;;;https://mikitabalesni.com;;http://www.mariushobbhahn.com;;https://owainevans.github.io/", "dblp": ";;;;;;260/0039;249/5767;52/10432", "google_scholar": ";;https://scholar.google.com/citations?hl=en;;mDXcNBMAAAAJ;_6nYXQYAAAAJ;SJ1y8o0AAAAJ;https://scholar.google.de/citations?user=PqHTP_AAAAAJ;4VpTwzIAAAAJ", "orcid": ";;0009-0008-3518-191X;;;;;;", "linkedin": "rudolf-laine-b1b79a1b6/;https://www.linkedin.com/feed/;jan-betley-118555127/;kaivalya-hariharan-44a698204;mbalesni/;j%C3%A9r%C3%A9my-scheurer-927563b0/;;alexander-meinke-a32904173/;", "or_profile": "~Rudolf_Laine1;~Bilal_Chughtai1;~Jan_Betley1;~Kaivalya_Hariharan1;~Mikita_Balesni1;~J\u00e9r\u00e9my_Scheurer1;~Marius_Hobbhahn1;~Alexander_Meinke1;~Owain_Evans1", "aff": ";;Owain Evans' research group;Massachusetts Institute of Technology;Apollo Research;Apollo Research;Max Planck Institute for Intelligent Systems, Max-Planck Institute;Max-Planck-Institute for Intelligent Systems, Max-Planck Institute;Truthful AI", "aff_domain": ";;my-institution-has-no-specific-domain-sorry.com;mit.edu;apolloresearch.ai;apolloresearch.ai;tue.mpg.de;is.mpg.de;owainevans.com", "position": ";;Researcher;Undergrad student;Researcher;Researcher;PhD student;PhD student;Principal Researcher", "bibtex": "@inproceedings{\nlaine2024me,\ntitle={Me, Myself, and {AI}: The Situational Awareness Dataset ({SAD}) for {LLM}s},\nauthor={Rudolf Laine and Bilal Chughtai and Jan Betley and Kaivalya Hariharan and Mikita Balesni and J{\\'e}r{\\'e}my Scheurer and Marius Hobbhahn and Alexander Meinke and Owain Evans},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=UnWhcpIyUC}\n}", "github": "", "reviewers": "zgQo;Urm1;mZNm;u37R", "pdf_size": 5131041, "rating": "4;6;7;8", "confidence": "3;4;3;4", "wc_summary_and_contributions": "69;53;70;175", "wc_strengths": "11;65;43;5", "wc_improvement": "201;148;53;147", "wc_limitations": "17;1;8;61", "wc_correctness": "1;46;9;21", "wc_clarity": "1;4;6;8", "wc_relation_to_prior_work": "1;1;4;11", "wc_documentation": "13;1;8;7", "wc_additional_feedback": "1;1;1;1", "wc_review": "315;320;202;436", "wc_reply_reviewers": "99;31;0;0", "wc_reply_authors": "570;0;45;166", "reply_reviewers": "1;1;0;0", "reply_authors": "7;1;2;3", "rating_avg": [ 6.25, 1.479019945774904 ], "confidence_avg": [ 3.5, 0.5 ], "wc_summary_and_contributions_avg": [ 91.75, 48.53542520674976 ], "wc_strengths_avg": [ 31.0, 24.372115213907882 ], "wc_improvement_avg": [ 137.25, 53.32154817707378 ], "wc_limitations_avg": [ 21.75, 23.35995505132662 ], "wc_correctness_avg": [ 19.25, 17.005513811702368 ], "wc_clarity_avg": [ 4.75, 2.5860201081971503 ], "wc_relation_to_prior_work_avg": [ 4.25, 4.085033659592048 ], "wc_documentation_avg": [ 7.25, 4.264680527307995 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 318.25, 82.75377634887728 ], "wc_reply_reviewers_avg": [ 32.5, 40.425858061394315 ], "wc_reply_authors_avg": [ 195.25, 224.716905238569 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 3.25, 2.277608394786075 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.50709255283711, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13235583375283119996&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": ";;my-institution-has-no-specific-domain-sorry.com;mit.edu;apolloresearch.ai;apolloresearch.ai;tue.mpg.de;is.mpg.de;owainevans.com", "author_num": 9, "aff_unique_index": "0;1;2;2;3;4;5", "aff_unique_norm": "University of Cambridge;Massachusetts Institute of Technology;Apollo Research;Max Planck Institute for Intelligent Systems;Max-Planck-Institute for Intelligent Systems;Truthful AI", "aff_unique_dep": ";;;Intelligent Systems;Intelligent Systems;", "aff_unique_url": "https://www.cam.ac.uk;https://web.mit.edu;;https://www.mpi-is.mpg.de;https://www.mpi-is.mpg.de;", "aff_unique_abbr": ";MIT;;MPI-IS;MPI-IS;", "aff_campus_unique_index": "0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;1;3;3", "aff_country_unique": "United Kingdom;United States;;Germany" }, { "title": "Spectral Adapter: Fine-Tuning in Spectral Space", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94943", "id": "UoxuaOGV6B", "proceeding": "", "pdf": "https://openreview.net/pdf?id=UoxuaOGV6B", "openreview": "https://openreview.net/forum?id=UoxuaOGV6B", "poster": "/media/PosterPDFs/NeurIPS%202024/94943.png?t=1730267602.4723423", "project": "", "author_site": "Fangzhao Zhang, Mert Pilanci", "tldr": "", "abstract": "Recent developments in Parameter-Efficient Fine-Tuning (PEFT) methods for pretrained deep neural networks have captured widespread interest. In this work, we study the enhancement of current PEFT methods by incorporating the spectral information of pretrained weight matrices into the fine-tuning procedure. We investigate two spectral adaptation mechanisms, namely additive tuning and orthogonal rotation of the top singular vectors, both are done via first carrying out Singular Value Decomposition (SVD) of pretrained weights and then fine-tuning the top spectral space. We provide a theoretical analysis of spectral fine-tuning and show that our approach improves the rank capacity of low-rank adapters given a fixed trainable parameter budget. We show through extensive experiments that the proposed fine-tuning model enables better parameter efficiency and tuning performance as well as benefits multi-adapter fusion. The source code will be open-sourced for reproducibility.", "keywords": "fine-tuning; peft; spectral decomposition; transfer learning in spectral space", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/530189a58b26929a7cafd24cafa2c4afbd93278a.zip", "author": "Fangzhao Zhang;Mert Pilanci", "authorids": "~Fangzhao_Zhang1;~Mert_Pilanci3", "gender": ";M", "homepage": ";https://stanford.edu/~pilanci/", "dblp": ";45/8056", "google_scholar": ";aSAS-aAAAAAJ", "orcid": ";", "linkedin": ";mert-pilanci-ba615743/", "or_profile": "~Fangzhao_Zhang1;~Mert_Pilanci3", "aff": ";Stanford University", "aff_domain": ";stanford.edu", "position": ";Assistant Professor", "bibtex": "@inproceedings{\nzhang2024spectral,\ntitle={Spectral Adapter: Fine-Tuning in Spectral Space},\nauthor={Fangzhao Zhang and Mert Pilanci},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=UoxuaOGV6B}\n}", "github": "", "reviewers": "7N9o;N6x8;qJVf;Nthn", "pdf_size": 47561362, "rating": "5;5;6;6", "confidence": "5;4;4;4", "soundness": "3;3;3;3", "novelty": "3;3;2;3", "presentation": "2;2;3;3", "wc_summary": "63;42;51;71", "wc_strengths": "49;51;48;83", "wc_weaknesses": "156;79;242;109", "wc_questions": "1;113;73;31", "wc_limitations": "9;1;206;146", "wc_review": "278;286;620;440", "wc_reply_reviewers": "23;17;97;0", "wc_reply_authors": "1320;76;735;1433", "reply_reviewers": "1;1;1;0", "reply_authors": "5;2;3;5", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 56.75, 11.098986440211556 ], "wc_strengths_avg": [ 57.75, 14.618053906043718 ], "wc_weaknesses_avg": [ 146.5, 61.58936596523786 ], "wc_questions_avg": [ 54.5, 42.36448984704053 ], "wc_limitations_avg": [ 90.5, 88.13767639324286 ], "wc_review_avg": [ 406.0, 139.40588222883568 ], "wc_reply_reviewers_avg": [ 34.25, 37.19795021234369 ], "wc_reply_authors_avg": [ 891.0, 539.9921295722744 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.75, 1.299038105676658 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6105141879650571266&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": ";stanford.edu", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "UqvAFl0lkT", "title": "EReLELA: Exploration in Reinforcement Learning via Emergent Language Abstractions", "track": "main", "status": "Reject", "tldr": "", "abstract": "Instruction-following from prompts in Natural Languages (NLs) is an important benchmark for Human-AI collaboration. Training Embodied AI agents for instruction-following with Reinforcement Learning (RL) poses a strong exploration challenge. Previous works have shown that NL-based state abstractions can help address the exploitation versus exploration trade-off in RL. However, NLs descriptions are not always readily available and are expensive to collect. We therefore propose to use the Emergent Communication paradigm, where artificial agents are free to learn an emergent language (EL) via referential games, to bridge this gap.\nELs constitute cheap and readily-available abstractions, as they are the result of an unsupervised learning approach. In this paper, we investigate (i) how EL-based state abstractions compare to NL-based ones for RL in hard-exploration, procedurally-generated environments, and (ii) how properties of the referential games used to learn ELs impact the quality of the RL exploration and learning. Results indicate that the EL-guided agent, namely EReLELA, achieves similar performance as its NL-based counterparts without its\nlimitations. Our work shows that Embodied RL agents can leverage unsupervised emergent abstractions to greatly improve their exploration skills in sparse reward settings, thus opening new research avenues between Embodied AI and Emergent\nCommunication.", "keywords": "Emergent Communication;Exploration;Reinforcement Learning;Abstraction;Emergent Languages;Natural Languages", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/b45936906c4fbb50077d351ce8d9e0d84f66ddea.zip", "author": "Kevin Yandoka Denamganai;Tim Bradley;Pierluigi Vito Amadori;Sondess Missaoui;Guy Moss;James Alfred Walker", "authorids": "~Kevin_Yandoka_Denamganai1;~Tim_Bradley1;~Pierluigi_Vito_Amadori1;~Sondess_Missaoui1;~Guy_Moss2;~James_Alfred_Walker1", "gender": "M;M;;F;M;M", "homepage": "https://kevindenamganai.netlify.app/;;https://pvamadori.github.io/;https://digitalcreativity.ac.uk/people/dr-sondess-missaoui;;", "dblp": "249/7680;;163/8122;143/1419.html;;35/3889", "google_scholar": "PPdQb4QAAAAJ;;https://scholar.google.com/citations?hl=en;K2yUNQIAAAAJ;;https://scholar.google.co.uk/citations?user=Yl5OycsAAAAJ", "orcid": "0000-0002-8776-4331;;;;;", "linkedin": ";tim-bradley-7360a5154/;pierluigi-vito-amadori/;sondess-missaoui-03583531/;guy-moss-680b111/;", "or_profile": "~Kevin_Yandoka_Denamganai1;~Tim_Bradley1;~Pierluigi_Vito_Amadori1;~Sondess_Missaoui1;~Guy_Moss2;~James_Alfred_Walker1", "aff": "University of York;Sony Interactive Entertainment;Sony Interactive Entertainment Europe;University of York;Sony Europe Ltd.;University of York", "aff_domain": "york.ac.uk;sony.com;sony.com;york.ac.uk;sony.com;york.ac.uk", "position": "PhD student;Researcher;Postdoc;Researcher;Researcher;Associate Professor", "bibtex": "@misc{\nanonymous2024erelela,\ntitle={{ER}e{LELA}: Exploration in Reinforcement Learning via Emergent Language Abstractions},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=UqvAFl0lkT}\n}", "github": "", "project": "", "reviewers": "1YKp;ra5c;85kw;rZtJ", "site": "https://openreview.net/forum?id=UqvAFl0lkT", "pdf_size": 2844622, "rating": "3;4;4;7", "confidence": "3;2;2;3", "soundness": "2;2;3;4", "novelty": "2;2;3;3", "presentation": "1;2;2;3", "wc_summary": "27;44;149;58", "wc_strengths": "68;135;50;101", "wc_weaknesses": "290;207;163;8", "wc_questions": "87;209;1;97", "wc_limitations": "11;1;1;22", "wc_review": "483;596;364;286", "wc_reply_reviewers": "138;87;0;12", "wc_reply_authors": "838;414;55;39", "reply_reviewers": "1;1;0;1", "reply_authors": "3;3;2;2", "rating_avg": [ 4.5, 1.5 ], "confidence_avg": [ 2.5, 0.5 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 69.5, 47.193749586147526 ], "wc_strengths_avg": [ 88.5, 32.48461174156157 ], "wc_weaknesses_avg": [ 167.0, 102.50121950494052 ], "wc_questions_avg": [ 98.5, 73.9104187513506 ], "wc_limitations_avg": [ 8.75, 8.671072598012312 ], "wc_review_avg": [ 432.25, 117.72505043532578 ], "wc_reply_reviewers_avg": [ 59.25, 56.37985012395829 ], "wc_reply_authors_avg": [ 336.5, 326.0586603665052 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.33333333333333337, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:O2iZ-Nt3lSoJ:scholar.google.com/&scioq=EReLELA:+Exploration+in+Reinforcement+Learning+via+Emergent+Language+Abstractions&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;1;1;0;2;0", "aff_unique_norm": "University of York;Sony Interactive Entertainment;Sony Europe", "aff_unique_dep": ";;", "aff_unique_url": "https://www.york.ac.uk;https://www.sonyinteractive.com;https://www.sony.eu", "aff_unique_abbr": "York;SIE;Sony Europe", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0;0", "aff_country_unique": "United Kingdom;United States" }, { "title": "End-to-End Ontology Learning with Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94942", "id": "UqvEHAnCJC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=UqvEHAnCJC", "openreview": "https://openreview.net/forum?id=UqvEHAnCJC", "poster": "", "project": "", "author_site": "Andy Lo, Albert Q. Jiang, Wenda Li, Mateja Jamnik", "tldr": "", "abstract": "Ontologies are useful for automatic machine processing of domain knowledge as they represent it in a structured format. Yet, constructing ontologies requires substantial manual effort. To automate part of this process, large language models (LLMs) have been applied to solve various subtasks of ontology learning. However, this partial ontology learning does not capture the interactions between subtasks. We address this gap by introducing OLLM, a general and scalable method for building the taxonomic backbone of an ontology from scratch. Rather than focusing on subtasks, like individual relations between entities, we model entire subcomponents of the target ontology by finetuning an LLM with a custom regulariser that reduces overfitting on high-frequency concepts. We introduce a novel suite of metrics for evaluating the quality of the generated ontology by measuring its semantic and structural similarity to the ground truth. In contrast to standard metrics, our metrics use deep learning techniques to define more robust distance measures between graphs. Both our quantitative and qualitative results on Wikipedia show that OLLM outperforms subtask composition methods, producing more semantically accurate ontologies while maintaining structural integrity. We further demonstrate that our model can be effectively adapted to new domains, like arXiv, needing only a small number of training examples. Our source code and datasets are available at https://github.com/andylolu2/ollm.", "keywords": "Ontology Learning;Large Language Models;Knowledge Representation", "primary_area": "other", "supplementary_material": "/attachment/3d87d50356626b614e1dc43d234bee419556d0ac.zip", "author": "Andy Lo;Albert Q. Jiang;Wenda Li;Mateja Jamnik", "authorids": "~Andy_Lo1;~Albert_Q._Jiang1;~Wenda_Li1;~Mateja_Jamnik1", "gender": "M;M;F;", "homepage": "https://github.com/andylolu2/;https://wenda302.github.io;http://www.cl.cam.ac.uk/~mj201;https://albertqjiang.github.io/", "dblp": ";132/9868.html;41/1392;321/1049", "google_scholar": ";ufYxQkEAAAAJ;d5QiyJkAAAAJ;Fe_RBHMAAAAJ", "orcid": ";;0000-0003-2772-2532;", "linkedin": ";;;", "or_profile": "~Andy_Lo1;~Wenda_Li1;~Mateja_Jamnik1;~Albert_Jiang1", "aff": "Computer Laboratory;University of Edinburgh;University of Cambridge;University of Cambridge", "aff_domain": "cl.cam.ac.uk;ed.ac.uk;cam.ac.uk;cam.ac.uk", "position": "MS student;Lecturer;Professor in Artificial Intelligence;PhD student", "bibtex": "@inproceedings{\nlo2024endtoend,\ntitle={End-to-End Ontology Learning with Large Language Models},\nauthor={Andy Lo and Albert Q. Jiang and Wenda Li and Mateja Jamnik},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=UqvEHAnCJC}\n}", "github": "", "reviewers": "cxnM;Qo2b;Yksm;3YVJ", "pdf_size": 1083561, "rating": "4;4;7;7", "confidence": "4;4;4;4", "soundness": "3;2;2;3", "novelty": "1;2;2;3", "presentation": "3;3;3;4", "wc_summary": "110;106;87;74", "wc_strengths": "100;112;63;68", "wc_weaknesses": "230;149;82;216", "wc_questions": "2;46;57;4", "wc_limitations": "2;139;46;1", "wc_review": "444;552;335;363", "wc_reply_reviewers": "0;0;19;15", "wc_reply_authors": "43;43;0;43", "reply_reviewers": "0;0;1;1", "reply_authors": "2;2;1;2", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 94.25, 14.566657131957214 ], "wc_strengths_avg": [ 85.75, 20.765054779605084 ], "wc_weaknesses_avg": [ 169.25, 58.94647996275944 ], "wc_questions_avg": [ 27.25, 24.57005290999594 ], "wc_limitations_avg": [ 47.0, 56.138222273242675 ], "wc_review_avg": [ 423.5, 84.29857650043682 ], "wc_reply_reviewers_avg": [ 8.5, 8.616843969807043 ], "wc_reply_authors_avg": [ 32.25, 18.619546181365433 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10003895140976431499&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 5, "email": "cl.cam.ac.uk;ed.ac.uk;cam.ac.uk;cam.ac.uk", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Cambridge;University of Edinburgh", "aff_unique_dep": "Computer Laboratory;", "aff_unique_url": "https://www.cl.cam.ac.uk;https://www.ed.ac.uk", "aff_unique_abbr": "CL;Edinburgh", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "GenArtist: Multimodal LLM as an Agent for Unified Image Generation and Editing", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94941", "id": "Ur00BNk1v2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Ur00BNk1v2", "openreview": "https://openreview.net/forum?id=Ur00BNk1v2", "poster": "", "project": "", "author_site": "Zhenyu Wang, Aoxue Li, Zhenguo Li, Xihui Liu", "tldr": "", "abstract": "Despite the success achieved by existing image generation and editing methods, current models still struggle with complex problems including intricate text prompts, and the absence of verification and self-correction mechanisms makes the generated images unreliable. Meanwhile, a single model tends to specialize in particular tasks and possess the corresponding capabilities, making it inadequate for fulfilling all user requirements. We propose GenArtist, a unified image generation and editing system, coordinated by a multimodal large language model (MLLM) agent. We integrate a comprehensive range of existing models into the tool library and utilize the agent for tool selection and execution. For a complex problem, the MLLM agent decomposes it into simpler sub-problems and constructs a tree structure to systematically plan the procedure of generation, editing, and self-correction with step-by-step verification. By automatically generating missing position-related inputs and incorporating position information, the appropriate tool can be effectively employed to address each sub-problem. Experiments demonstrate that GenArtist can perform various generation and editing tasks, achieving state-of-the-art performance and surpassing existing models such as SDXL and DALL-E 3, as can be seen in Fig. 1. We will open-source the code for future research and applications.", "keywords": "image generation", "primary_area": "generative_models", "supplementary_material": "/attachment/f6b5944ca7b98edee3e0fdf338b867da081a059f.zip", "author": "Zhenyu Wang;Aoxue Li;Zhenguo Li;Xihui Liu", "authorids": "~Zhenyu_Wang3;~Aoxue_Li2;~Zhenguo_Li1;~Xihui_Liu1", "gender": "M;;M;F", "homepage": ";;http://www.ee.columbia.edu/~zgli/;https://xh-liu.github.io/", "dblp": "22/1486-5;;23/6479;184/3911", "google_scholar": "x_-kOjoAAAAJ;;XboZC1AAAAAJ;https://scholar.google.com.hk/citations?user=4YL23GMAAAAJ", "orcid": ";;;0000-0003-1831-9952", "linkedin": ";;;", "or_profile": "~Zhenyu_Wang3;~Aoxue_Li2;~Zhenguo_Li1;~Xihui_Liu1", "aff": "Tsinghua University;;Huawei Noah's Ark Lab;University of Hong Kong", "aff_domain": "tsinghua.edu.cn;;huawei.com;hku.hk", "position": "PhD student;;Principal Researcher;Assistant Professor", "bibtex": "@inproceedings{\nwang2024genartist,\ntitle={GenArtist: Multimodal {LLM} as an Agent for Unified Image Generation and Editing},\nauthor={Zhenyu Wang and Aoxue Li and Zhenguo Li and Xihui Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Ur00BNk1v2}\n}", "github": "", "reviewers": "TXUv;tVAw;tJSg", "pdf_size": 6732962, "rating": "5;6;6", "confidence": "4;4;5", "soundness": "3;3;2", "novelty": "2;2;3", "presentation": "3;3;4", "wc_summary": "31;83;66", "wc_strengths": "114;37;67", "wc_weaknesses": "96;27;334", "wc_questions": "76;67;36", "wc_limitations": "12;2;26", "wc_review": "329;216;529", "wc_reply_reviewers": "39;17;152", "wc_reply_authors": "75;45;0", "reply_reviewers": "1;1;1", "reply_authors": "2;2;1", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 60.0, 21.64871050817269 ], "wc_strengths_avg": [ 72.66666666666667, 31.689465477067017 ], "wc_weaknesses_avg": [ 152.33333333333334, 131.51003341528315 ], "wc_questions_avg": [ 59.666666666666664, 17.13346303452853 ], "wc_limitations_avg": [ 13.333333333333334, 9.843215373488933 ], "wc_review_avg": [ 358.0, 129.4166398368721 ], "wc_reply_reviewers_avg": [ 69.33333333333333, 59.140134896325314 ], "wc_reply_authors_avg": [ 40.0, 30.822070014844883 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17893651092122659911&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "tsinghua.edu.cn;;huawei.com;hku.hk", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "Tsinghua University;Huawei;University of Hong Kong", "aff_unique_dep": ";Noah's Ark Lab;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.huawei.com;https://www.hku.hk", "aff_unique_abbr": "THU;Huawei;HKU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Predictor-Corrector Enhanced Transformers with Exponential Moving Average Coefficient Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94940", "id": "Ur9f4hNIpN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Ur9f4hNIpN", "openreview": "https://openreview.net/forum?id=Ur9f4hNIpN", "poster": "/media/PosterPDFs/NeurIPS%202024/94940.png?t=1731552757.3928325", "project": "", "author_site": "Bei Li, Tong Zheng, Rui Wang, Jiahao Liu, \u6e05\u598d \u90ed, Junliang Guo, Xu Tan, Tong Xiao, JingBo Zhu, Jingang Wang, Xunliang Cai", "tldr": "", "abstract": "Residual networks, as discrete approximations of Ordinary Differential Equations (ODEs), have inspired significant advancements in neural network design, including multistep methods, high-order methods, and multi-particle dynamical systems. The precision of the solution to ODEs significantly affects parameter optimization, thereby impacting model performance. In this work, we present a series of advanced explorations of Transformer architecture design to minimize the error compared to the true ``solution.'' First, we introduce a predictor-corrector learning framework to minimize truncation errors, which consists of a high-order predictor and a multistep corrector. Second, we propose an exponential moving average-based coefficient learning method to strengthen our higher-order predictor. Extensive experiments on large-scale machine translation, abstractive summarization, language modeling, and natural language understanding benchmarks demonstrate the superiority of our approach. On the WMT'14 English-German and English-French tasks, our model achieved BLEU scores of 30.95 and 44.27, respectively. Furthermore, on the OPUS multilingual machine translation task, our model surpasses a robust 3.8B DeepNet by an average of 2.9 SacreBLEU, using only 1/3 parameters. Notably, it also beats LLama models by 5.7 accuracy points on the LM Harness Evaluation.", "keywords": "Transformer;ODE;predictor-corrector;architecture", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Bei Li;Tong Zheng;Rui Wang;Jiahao Liu;Qingyan Guo;Junliang Guo;Xu Tan;Tong Xiao;JingBo Zhu;Jingang Wang;Xunliang Cai", "authorids": "~Bei_Li1;~Tong_Zheng1;~Rui_Wang26;~Jiahao_Liu6;~Qingyan_Guo1;~Junliang_Guo1;~Xu_Tan1;~Tong_Xiao4;~JingBo_Zhu2;~Jingang_Wang1;~Xunliang_Cai1", "gender": "M;M;;M;F;M;M;M;;M;M", "homepage": "https://libeineu.github.io/;https://kidzheng.github.io/;;https://hit-computer.github.io/;https://beeevita.github.io/;https://leoguojl.me/;https://tan-xu.github.io/;https://www.nlplab.com/members/xiaotong.html;https://dblp.org/pid/73/2129.html;https://sites.google.com/site/bitwjg/;https://maimai.cn/contact/share/card?u=fudmdwckxlwi", "dblp": ";;06/2293-28.html;;138/8089;209/9674;96/10484-3;05/5091;;59/7807;", "google_scholar": "wzbJ5EIAAAAJ;https://scholar.google.com/citations?hl=zh-CN;h1IrWikAAAAJ;https://scholar.google.com.hk/citations?user=IvImF70AAAAJ;tPYWm_AAAAAJ;https://scholar.google.com.sg/citations?user=S88C9ewAAAAJ;tob-U1oAAAAJ;-fov7zkAAAAJ;;janU39IAAAAJ;", "orcid": ";0000-0002-3472-4387;;;;0000-0001-8360-5483;0000-0001-5631-0639;;;;", "linkedin": ";;;;;;;tong-xiao-168bb081/;;;", "or_profile": "~Bei_Li1;~Tong_Zheng1;~Rui_Wang26;~Jiahao_Liu6;~Qingyan_Guo1;~Junliang_Guo1;~Xu_Tan1;~Tong_Xiao4;~JingBo_Zhu2;~Jingang_Wang1;~Xunliang_Cai1", "aff": "Meituan;University of Maryland, College Park;Microsoft;Meituan;Tsinghua University;Microsoft;Microsoft;Northeastern University;Northeastern University;Meituan;Meituan", "aff_domain": "meituan.com;umd.edu;microsoft.com;meituan.com;thu.edu;microsoft.com;microsoft.com;mail.neu.edu.cn;mail.neu.edu.cn;meituan.com;meituan.com", "position": "Researcher;PhD student;Researcher;Researcher;MS student;Researcher;Principal Researcher;Full Professor;Full Professor;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nli2024predictorcorrector,\ntitle={Predictor-Corrector Enhanced Transformers with Exponential Moving Average Coefficient Learning},\nauthor={Bei Li and Tong Zheng and Rui Wang and Jiahao Liu and Qingyan Guo and Junliang Guo and Xu Tan and Tong Xiao and JingBo Zhu and Jingang Wang and Xunliang Cai},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Ur9f4hNIpN}\n}", "github": "", "reviewers": "1pKC;MB44;Rc6M;hvMK", "pdf_size": 508075, "rating": "5;6;6;7", "confidence": "1;4;4;3", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "108;100;74;83", "wc_strengths": "81;177;94;45", "wc_weaknesses": "81;199;97;43", "wc_questions": "66;323;40;36", "wc_limitations": "80;38;13;5", "wc_review": "416;837;318;212", "wc_reply_reviewers": "0;137;21;0", "wc_reply_authors": "222;815;0;0", "reply_reviewers": "0;1;1;0", "reply_authors": "3;3;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.0, 1.224744871391589 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 91.25, 13.442005058770064 ], "wc_strengths_avg": [ 99.25, 48.34446710844996 ], "wc_weaknesses_avg": [ 105.0, 57.706152185014034 ], "wc_questions_avg": [ 116.25, 119.92158896545692 ], "wc_limitations_avg": [ 34.0, 29.214722315983085 ], "wc_review_avg": [ 445.75, 237.12905241661133 ], "wc_reply_reviewers_avg": [ 39.5, 56.94075868830692 ], "wc_reply_authors_avg": [ 259.25, 333.4166874947923 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 1.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6938505930492945689&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "meituan.com;umd.edu;microsoft.com;meituan.com;thu.edu;microsoft.com;microsoft.com;mail.neu.edu.cn;mail.neu.edu.cn;meituan.com;meituan.com", "author_num": 11, "aff_unique_index": "0;1;2;0;3;2;2;4;4;0;0", "aff_unique_norm": "Meituan;University of Maryland;Microsoft;Tsinghua University;Northeastern University", "aff_unique_dep": ";;Microsoft Corporation;;", "aff_unique_url": "https://www.meituan.com;https://www/umd.edu;https://www.microsoft.com;https://www.tsinghua.edu.cn;https://www.northeastern.edu", "aff_unique_abbr": "Meituan;UMD;Microsoft;THU;NEU", "aff_campus_unique_index": "1", "aff_campus_unique": ";College Park", "aff_country_unique_index": "0;1;1;0;0;1;1;1;1;0;0", "aff_country_unique": "China;United States" }, { "title": "Interventionally Consistent Surrogates for Complex Simulation Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94939", "id": "UtTjgMDTFO", "proceeding": "", "pdf": "https://openreview.net/pdf?id=UtTjgMDTFO", "openreview": "https://openreview.net/forum?id=UtTjgMDTFO", "poster": "", "project": "", "author_site": "Joel Dyer, Nicholas Bishop, Yorgos Felekis, Fabio Massimo Zennaro, Anisoara Calinescu, Theodoros Damoulas, Michael Wooldridge", "tldr": "", "abstract": "Large-scale simulation models of complex socio-technical systems provide decision-makers with high-fidelity testbeds in which policy interventions can be evaluated and _what-if_ scenarios explored. Unfortunately, the high computational cost of such models inhibits their widespread use in policy-making settings. Surrogate models can address these computational limitations, but to do so they must behave consistently with the simulator under interventions of interest. In this paper, we build upon recent developments in causal abstractions to develop a framework for learning interventionally consistent surrogate models for large-scale, complex simulation models. We provide theoretical results showing that our proposed approach induces surrogates to behave consistently with high probability with respect to the simulator across interventions of interest, facilitating rapid experimentation with policy interventions in complex systems. We further demonstrate with empirical studies that conventionally trained surrogates can misjudge the effect of interventions and misguide decision-makers towards suboptimal interventions, while surrogates trained for _interventional_ consistency with our method closely mimic the behaviour of the original simulator under interventions of interest.", "keywords": "agent-based model;causal abstraction;complex simulator;surrogate model", "primary_area": "machine_learning_for_social_sciences", "supplementary_material": "/attachment/18e1bb3e13c7e31fa498dffecfe72ca72607eb89.zip", "author": "Joel Dyer;Nicholas George Bishop;Yorgos Felekis;Fabio Massimo Zennaro;Ani Calinescu;Theodoros Damoulas;Michael J. Wooldridge", "authorids": "~Joel_Dyer1;~Nicholas_George_Bishop1;~Yorgos_Felekis1;~Fabio_Massimo_Zennaro1;~Ani_Calinescu1;~Theodoros_Damoulas1;~Michael_J._Wooldridge1", "gender": ";M;;M;;;M", "homepage": "https://joelnmdyer.github.io;http://www.nickbishop.net;;https://fmzennaro.github.io/;;;https://www.cs.ox.ac.uk/people/michael.wooldridge/", "dblp": "271/8315;294/1920.html;;177/9333;;;w/MichaelWooldridge", "google_scholar": "HeQWo3QAAAAJ;https://scholar.google.co.uk/citations?hl=en;;st6ngd8AAAAJ;;;", "orcid": ";;;0000-0003-0195-8301;;;", "linkedin": ";;;fabio-massimo-zennaro-427804a/;;;michael-wooldridge-1962b2/", "or_profile": "~Joel_Dyer1;~Nicholas_George_Bishop1;~Yorgos_Felekis1;~Fabio_Massimo_Zennaro1;~Ani_Calinescu1;~Theodoros_Damoulas1;~Michael_J._Wooldridge1", "aff": "Department of Computer Science, University of Oxford;Department of Computer Science;;University of Bergen;;;Department of Computer Science, University of Oxford", "aff_domain": "cs.ox.ac.uk;cs.ox.ac.uk;;uib.no;;;cs.ox.ac.uk", "position": "Postdoc;Postdoc;;Associate Professor;;;Full Professor", "bibtex": "@inproceedings{\ndyer2024interventionally,\ntitle={Interventionally Consistent Surrogates for Complex Simulation Models},\nauthor={Joel Dyer and Nicholas George Bishop and Yorgos Felekis and Fabio Massimo Zennaro and Ani Calinescu and Theodoros Damoulas and Michael J. Wooldridge},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=UtTjgMDTFO}\n}", "github": "", "reviewers": "x5Gt;3hmr;fUGP;fR4E", "pdf_size": 608860, "rating": "2;6;6;7", "confidence": "4;3;2;3", "soundness": "3;3;3;3", "novelty": "1;3;3;3", "presentation": "3;3;3;3", "wc_summary": "152;88;45;49", "wc_strengths": "67;23;21;201", "wc_weaknesses": "79;18;81;165", "wc_questions": "193;185;2;181", "wc_limitations": "202;1;9;48", "wc_review": "693;315;158;644", "wc_reply_reviewers": "950;40;43;0", "wc_reply_authors": "1875;45;1062;201", "reply_reviewers": "2;1;1;0", "reply_authors": "5;2;4;3", "rating_avg": [ 5.25, 1.920286436967152 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 83.5, 42.968011357287644 ], "wc_strengths_avg": [ 78.0, 73.35529974037323 ], "wc_weaknesses_avg": [ 85.75, 52.294239644534464 ], "wc_questions_avg": [ 140.25, 79.93552089027756 ], "wc_limitations_avg": [ 65.0, 81.07095657508921 ], "wc_review_avg": [ 452.5, 223.69007577449653 ], "wc_reply_reviewers_avg": [ 258.25, 399.7426515897447 ], "wc_reply_authors_avg": [ 795.75, 733.6591085102127 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 3.5, 1.118033988749895 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.7364596943186587, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15217106736241366179&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "cs.ox.ac.uk;cs.ox.ac.uk;;uib.no;;;cs.ox.ac.uk", "author_num": 7, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of Oxford;Unknown Institution;University of Bergen", "aff_unique_dep": "Department of Computer Science;Department of Computer Science;", "aff_unique_url": "https://www.ox.ac.uk;;https://www.uib.no", "aff_unique_abbr": "Oxford;;uib", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Oxford;", "aff_country_unique_index": "0;2;0", "aff_country_unique": "United Kingdom;;Norway" }, { "title": "Regression under demographic parity constraints via unlabeled post-processing", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94938", "id": "UtbjD5LGnC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=UtbjD5LGnC", "openreview": "https://openreview.net/forum?id=UtbjD5LGnC", "poster": "", "project": "", "author_site": "Gayane Taturyan, Evgenii Chzhen, Mohamed Hebiri", "tldr": "", "abstract": "We address the problem of performing regression while ensuring demographic parity, even without access to sensitive attributes during inference. We present a general-purpose post-processing algorithm that, using accurate estimates of the regression function and a sensitive attribute predictor, generates predictions that meet the demographic parity constraint. Our method involves discretization and stochastic minimization of a smooth convex function. It is suitable for online post-processing and multi-class classification tasks only involving unlabeled data for the post-processing. Unlike prior methods, our approach is fully theory-driven. We require precise control over the gradient norm of the convex function, and thus, we rely on more advanced techniques than standard stochastic gradient descent. Our algorithm is backed by finite-sample analysis and post-processing bounds, with experimental results validating our theoretical findings.", "keywords": "fairness;regression", "primary_area": "fairness", "supplementary_material": "", "author": "Gayane Taturyan;Evgenii Chzhen;Mohamed Hebiri", "authorids": "~Gayane_Taturyan1;~Evgenii_Chzhen1;~Mohamed_Hebiri2", "gender": ";;M", "homepage": ";https://perso.math.u-pem.fr/hebiri.mohamed/;https://echzhen.com", "dblp": ";78/8006;198/1158", "google_scholar": ";https://scholar.google.fr/citations?user=M8h_FPcAAAAJ;", "orcid": ";;", "linkedin": "gayaneh-taturyan;;", "or_profile": "~Gayane_Taturyan1;~Mohamed_Hebiri2;~Evgenii_E_Chzhen1", "aff": "IRT SystemX;Universit\u00e9 Gustave Eiffel;CNRS/University Paris-Saclay", "aff_domain": "irt-systemx.fr;univ-eiffel.fr;universite-paris-saclay.fr", "position": "PhD student;Assistant Professor;Researcher", "bibtex": "@inproceedings{\ntaturyan2024regression,\ntitle={Regression under demographic parity constraints via unlabeled post-processing},\nauthor={Gayane Taturyan and Evgenii Chzhen and Mohamed Hebiri},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=UtbjD5LGnC}\n}", "github": "", "reviewers": "3wik;qMMa;RqJN", "pdf_size": 2136588, "rating": "5;7;7", "confidence": "3;4;4", "soundness": "1;3;4", "novelty": "2;3;3", "presentation": "2;3;3", "wc_summary": "21;142;70", "wc_strengths": "33;103;47", "wc_weaknesses": "194;170;37", "wc_questions": "156;31;14", "wc_limitations": "2;40;1", "wc_review": "406;486;169", "wc_reply_reviewers": "42;60;15", "wc_reply_authors": "0;101;0", "reply_reviewers": "1;2;1", "reply_authors": "1;2;1", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 1.247219128924647 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 77.66666666666667, 49.69462300446151 ], "wc_strengths_avg": [ 61.0, 30.243456592570013 ], "wc_weaknesses_avg": [ 133.66666666666666, 69.05231511124173 ], "wc_questions_avg": [ 67.0, 63.31403214664713 ], "wc_limitations_avg": [ 14.333333333333334, 18.153665072253467 ], "wc_review_avg": [ 353.6666666666667, 134.6014693662575 ], "wc_reply_reviewers_avg": [ 39.0, 18.49324200890693 ], "wc_reply_authors_avg": [ 33.666666666666664, 47.6118565998942 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.9999999999999998, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2944307111489390969&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 15, "email": "irt-systemx.fr;univ-eiffel.fr;universite-paris-saclay.fr", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "IRT SystemX;Universit\u00e9 Gustave Eiffel;University Paris-Saclay", "aff_unique_dep": ";;", "aff_unique_url": "https://www.irt-systemx.fr;https://www.univ-gustave-eiffel.fr;https://www.universite-paris-saclay.fr", "aff_unique_abbr": ";UGE;Paris-Saclay", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "France" }, { "title": "Safe and Efficient: A Primal-Dual Method for Offline Convex CMDPs under Partial Data Coverage", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94937", "id": "UuiZEOVtHx", "proceeding": "", "pdf": "https://openreview.net/pdf?id=UuiZEOVtHx", "openreview": "https://openreview.net/forum?id=UuiZEOVtHx", "poster": "/media/PosterPDFs/NeurIPS%202024/94937.png?t=1733658915.0134456", "project": "", "author_site": "Haobo Zhang, Xiyue Peng, Honghao Wei, Xin Liu", "tldr": "", "abstract": "Offline safe reinforcement learning (RL) aims to find an optimal policy using a pre-collected dataset when data collection is impractical or risky. We propose a novel linear programming (LP) based primal-dual algorithm for convex MDPs that incorporates ``uncertainty'' parameters to improve data efficiency while requiring only partial data coverage assumption. Our theoretical results achieve a sample complexity of $\\mathcal{O}(1/(1-\\gamma)\\sqrt{n})$ under general function approximation, improving the current state-of-the-art by a factor of $1/(1-\\gamma)$, where $n$ is the number of data samples in an offline dataset, and $\\gamma$ is the discount factor. The numerical experiments validate our theoretical findings, demonstrating the practical efficacy of our approach in achieving improved safety and learning efficiency in safe offline settings.", "keywords": "safe reinforcement learning;convex MDPs;offline;sample efficient.", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/1f2f69e5e7bb5692a035a7fefafb1bbf49126fd6.zip", "author": "Haobo Zhang;Xiyue Peng;Honghao Wei;Xin Liu", "authorids": "~Haobo_Zhang4;~Xiyue_Peng1;~Honghao_Wei2;~Xin_Liu14", "gender": ";;M;", "homepage": "https://github.com/change-every;https://github.com/pxyWaterMoon;https://honghaow.me;", "dblp": ";347/6307;;76/1820-49", "google_scholar": ";;;y0U4EF4AAAAJ", "orcid": ";;0000-0002-1131-326X;", "linkedin": ";;honghao-wei-19565b155/;", "or_profile": "~Haobo_Zhang4;~Xiyue_Peng1;~Honghao_Wei2;~Xin_Liu14", "aff": "ShanghaiTech University;ShanghaiTech University;Washington State University ;ShanghaiTech University", "aff_domain": "shanghaitech.edu.cn;shanghaitech.edu.cn;wsu.edu;shanghaitech.edu.cm", "position": "MS student;Undergrad student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhang2024safe,\ntitle={Safe and Efficient: A Primal-Dual Method for Offline Convex {CMDP}s under Partial Data Coverage},\nauthor={Haobo Zhang and Xiyue Peng and Honghao Wei and Xin Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=UuiZEOVtHx}\n}", "github": "", "reviewers": "9gP2;61FL;H1LM;Za97", "pdf_size": 1341771, "rating": "5;5;6;6", "confidence": "4;3;4;3", "soundness": "2;3;3;3", "novelty": "3;2;3;3", "presentation": "2;3;3;2", "wc_summary": "21;111;62;101", "wc_strengths": "55;23;145;84", "wc_weaknesses": "120;136;170;111", "wc_questions": "73;37;112;5", "wc_limitations": "1;2;29;5", "wc_review": "270;309;518;306", "wc_reply_reviewers": "199;25;77;16", "wc_reply_authors": "437;226;178;276", "reply_reviewers": "2;1;1;1", "reply_authors": "4;3;3;3", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 73.75, 35.53431440171598 ], "wc_strengths_avg": [ 76.75, 44.92424178547702 ], "wc_weaknesses_avg": [ 134.25, 22.498611068241523 ], "wc_questions_avg": [ 56.75, 39.95231532714969 ], "wc_limitations_avg": [ 9.25, 11.497282287566918 ], "wc_review_avg": [ 350.75, 97.77365442694673 ], "wc_reply_reviewers_avg": [ 79.25, 72.95332411891867 ], "wc_reply_authors_avg": [ 279.25, 97.44581827867218 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.25, 0.4330127018922193 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:L6NCRMIj_F4J:scholar.google.com/&scioq=Safe+and+Efficient:+A+Primal-Dual+Method+for+Offline+Convex+CMDPs+under+Partial+Data+Coverage&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": "shanghaitech.edu.cn;shanghaitech.edu.cn;wsu.edu;shanghaitech.edu.cm", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "ShanghaiTech University;Washington State University", "aff_unique_dep": ";", "aff_unique_url": "https://www.shanghaitech.edu.cn;https://wsu.edu", "aff_unique_abbr": "ShanghaiTech;WSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "China;United States" }, { "title": "Self-Supervised Alignment with Mutual Information: Learning to Follow Principles without Preference Labels", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94936", "id": "UvbpbEhGaw", "proceeding": "", "pdf": "https://openreview.net/pdf?id=UvbpbEhGaw", "openreview": "https://openreview.net/forum?id=UvbpbEhGaw", "poster": "/media/PosterPDFs/NeurIPS%202024/94936.png?t=1729792715.2000713", "project": "", "author_site": "Jan-Philipp Fraenken, Eric Zelikman, Rafael Rafailov, Kanishk Gandhi, Tobias Gerstenberg, Noah Goodman", "tldr": "", "abstract": "When prompting a language model (LM), users often expect the model to adhere to a set of behavioral principles across diverse tasks, such as producing insightful content while avoiding harmful or biased language. Instilling such principles (i.e., a constitution) into a model is resource-intensive, technically challenging, and generally requires human preference labels or examples. We introduce SAMI, an iterative algorithm that finetunes a pretrained language model (without requiring preference labels or demonstrations) to increase the conditional mutual information between constitutions and self-generated responses given queries from a dataset. On single-turn dialogue and summarization, a SAMI-trained mistral-7b outperforms the initial pretrained model, with win rates between 66% and 77%. Strikingly, it also surpasses an instruction-finetuned baseline (mistral-7b-instruct) with win rates between 55% and 57% on single-turn dialogue. SAMI requires a model that writes the principles. To avoid dependence on strong models for writing principles, we align a strong pretrained model (mixtral-8x7b) using constitutions written by a weak instruction-finetuned model (mistral-7b-instruct), achieving a 65% win rate on summarization. Finally, we investigate whether SAMI generalizes to diverse summarization principles (e.g., \"summaries should be scientific\") and scales to stronger models (llama3-70b), finding that it achieves win rates of up to 68% for learned and 67% for held-out principles compared to the base model. Our results show that a pretrained LM can learn to follow constitutions without using preference labels, demonstrations, or human oversight.", "keywords": "alignment;contrastive learning;constitutional ai;self-improvement", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/05850162c2a935b3ebd20f98f558e4e3cdaa6ca9.zip", "author": "Jan-Philipp Fr\u00e4nken;Eric Zelikman;Rafael Rafailov;Kanishk Gandhi;Tobias Gerstenberg;Noah Goodman", "authorids": "~Jan-Philipp_Fr\u00e4nken1;~Eric_Zelikman1;~Rafael_Rafailov1;~Kanishk_Gandhi1;~Tobias_Gerstenberg1;~Noah_Goodman1", "gender": ";M;M;M;;M", "homepage": "https://janphilippfranken.github.io/;https://zelikman.me;https://rmrafailov.github.io/;http://cicl.stanford.edu/member/tobias_gerstenberg;https://cocolab.stanford.edu/;https://kanishkgandhi.com", "dblp": ";217/2378;272/5358;;96/1216;243/5820", "google_scholar": "s2omqQcAAAAJ;V5B8dSUAAAAJ;TwABcRgAAAAJ;d0TfP8EAAAAJ;OUpIbcQAAAAJ;", "orcid": "0000-0001-5467-1887;;;0000-0002-9162-0779;;", "linkedin": ";ericzelikman/;;;;", "or_profile": "~Jan-Philipp_Fr\u00e4nken1;~Eric_Zelikman1;~Rafael_Rafailov1;~Tobias_Gerstenberg1;~Noah_Goodman1;~Kanishk_V_Gandhi1", "aff": "Stanford University;Stanford University;Stanford University;Stanford University;Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu", "position": "Postdoc;PhD student;PhD student;Assistant Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\nfr{\\\"a}nken2024selfsupervised,\ntitle={Self-Supervised Alignment with Mutual Information: Learning to Follow Principles without Preference Labels},\nauthor={Jan-Philipp Fr{\\\"a}nken and Eric Zelikman and Rafael Rafailov and Kanishk Gandhi and Tobias Gerstenberg and Noah Goodman},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=UvbpbEhGaw}\n}", "github": "", "reviewers": "5Enh;mS35;eqUp;LYCL", "pdf_size": 2692422, "rating": "6;6;6;6", "confidence": "3;4;4;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "4;3;3;4", "wc_summary": "121;96;130;95", "wc_strengths": "85;39;49;157", "wc_weaknesses": "144;66;329;119", "wc_questions": "83;104;67;94", "wc_limitations": "106;11;2;1", "wc_review": "539;316;577;466", "wc_reply_reviewers": "45;20;27;17", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 110.5, 15.337861650177967 ], "wc_strengths_avg": [ 82.5, 46.28984769903656 ], "wc_weaknesses_avg": [ 164.5, 99.06184936694852 ], "wc_questions_avg": [ 87.0, 13.729530217745982 ], "wc_limitations_avg": [ 30.0, 44.05110668303352 ], "wc_review_avg": [ 474.5, 99.82609879184902 ], "wc_reply_reviewers_avg": [ 27.25, 10.871407452579449 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1956574187545778421&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Renovating Names in Open-Vocabulary Segmentation Benchmarks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94935", "id": "Uw2eJOI822", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Uw2eJOI822", "openreview": "https://openreview.net/forum?id=Uw2eJOI822", "poster": "/media/PosterPDFs/NeurIPS%202024/94935.png?t=1733406308.7900193", "project": "", "author_site": "Haiwen Huang, Songyou Peng, Dan Zhang, Andreas Geiger", "tldr": "", "abstract": "Names are essential to both human cognition and vision-language models. Open-vocabulary models utilize class names as text prompts to generalize to categories unseen during training. However, the precision of these names is often overlooked in existing datasets. In this paper, we address this underexplored problem by presenting a framework for \"renovating\" names in open-vocabulary segmentation benchmarks (RENOVATE). Our framework features a renaming model that enhances the quality of names for each visual segment. Through experiments, we demonstrate that our renovated names help train stronger open-vocabulary models with up to 15% relative improvement and significantly enhance training efficiency with improved data quality. We also show that our renovated names improve evaluation by better measuring misclassification and enabling fine-grained model analysis. We provide our code and relabelings for several popular segmentation datasets to the research community on our project page: https://andrehuang.github.io/renovate.", "keywords": "vision-language datasets;open-vocabulary segmentation;renaming", "primary_area": "machine_vision", "supplementary_material": "", "author": "Haiwen Huang;Songyou Peng;Dan Zhang;Andreas Geiger", "authorids": "~Haiwen_Huang1;~Songyou_Peng1;~Dan_Zhang1;~Andreas_Geiger3", "gender": "M;M;;M", "homepage": "https://andrehuang.github.io/;https://pengsongyou.github.io/;;http://www.cvlibs.net", "dblp": "220/3988;205/2316;21/802-17;40/5825-1", "google_scholar": "9LJzK7gAAAAJ;eNypkO0AAAAJ;https://scholar.google.de/citations?user=yazO-mMAAAAJ;https://scholar.google.ca/citations?hl=en", "orcid": ";;0000-0003-0930-9162;0000-0002-8151-3726", "linkedin": ";;;", "or_profile": "~Haiwen_Huang1;~Songyou_Peng1;~Dan_Zhang1;~Andreas_Geiger3", "aff": "Eberhard-Karls-Universit\u00e4t T\u00fcbingen;ETH Zurich;Robert Bosch GmbH, Bosch;University of Tuebingen", "aff_domain": "uni-tuebingen.de;inf.ethz.ch;de.bosch.com;uni-tuebingen.de", "position": "PhD student;Senior Researcher;Research Scientist;Professor", "bibtex": "@inproceedings{\nhuang2024renovating,\ntitle={Renovating Names in Open-Vocabulary Segmentation Benchmarks},\nauthor={Haiwen Huang and Songyou Peng and Dan Zhang and Andreas Geiger},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Uw2eJOI822}\n}", "github": "", "reviewers": "CT7Q;aQgc;fwux", "pdf_size": 16036845, "rating": "6;6;6", "confidence": "5;4;4", "soundness": "3;3;3", "novelty": "3;2;3", "presentation": "4;3;2", "wc_summary": "119;97;143", "wc_strengths": "88;86;172", "wc_weaknesses": "212;424;555", "wc_questions": "61;103;60", "wc_limitations": "57;57;21", "wc_review": "537;767;951", "wc_reply_reviewers": "17;34;25", "wc_reply_authors": "78;39;45", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 119.66666666666667, 18.785337071473826 ], "wc_strengths_avg": [ 115.33333333333333, 40.07770230717103 ], "wc_weaknesses_avg": [ 397.0, 141.32468527000748 ], "wc_questions_avg": [ 74.66666666666667, 20.038851153585515 ], "wc_limitations_avg": [ 45.0, 16.97056274847714 ], "wc_review_avg": [ 751.6666666666666, 169.362202263538 ], "wc_reply_reviewers_avg": [ 25.333333333333332, 6.944222218666553 ], "wc_reply_authors_avg": [ 54.0, 17.146428199482248 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18015581409766556794&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "uni-tuebingen.de;inf.ethz.ch;de.bosch.com;uni-tuebingen.de", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Eberhard Karls University of T\u00fcbingen;ETH Zurich;Robert Bosch GmbH;University of Tuebingen", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.uni-tuebingen.de/;https://www.ethz.ch;https://www.bosch.com;https://www.uni-tuebingen.de/", "aff_unique_abbr": "Uni T\u00fcbingen;ETHZ;Bosch;Uni T\u00fcbingen", "aff_campus_unique_index": "0", "aff_campus_unique": "T\u00fcbingen;", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "Germany;Switzerland" }, { "title": "Inductive biases of multi-task learning and finetuning: multiple regimes of feature reuse", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94934", "id": "UwvjJZWjPT", "proceeding": "", "pdf": "https://openreview.net/pdf?id=UwvjJZWjPT", "openreview": "https://openreview.net/forum?id=UwvjJZWjPT", "poster": "", "project": "", "author_site": "Samuel Lippl, Jack Lindsey", "tldr": "", "abstract": "Neural networks are often trained on multiple tasks, either simultaneously (multi-task learning, MTL) or sequentially (pretraining and subsequent finetuning, PT+FT). In particular, it is common practice to pretrain neural networks on a large auxiliary task before finetuning on a downstream task with fewer samples. Despite the prevalence of this approach, the inductive biases that arise from learning multiple tasks are poorly characterized. In this work, we address this gap. We describe novel implicit regularization penalties associated with MTL and PT+FT in diagonal linear networks and single-hidden-layer ReLU networks. These penalties indicate that MTL and PT+FT induce the network to reuse features in different ways. 1) Both MTL and PT+FT exhibit biases towards feature reuse between tasks, and towards sparsity in the set of learned features. We show a \"conservation law\" that implies a direct tradeoff between these two biases. 2) PT+FT exhibits a novel \"nested feature selection\" regime, not described by either the \"lazy\" or \"rich\" regimes identified in prior work, which biases it to *rely on a sparse subset* of the features learned during pretraining. This regime is much narrower for MTL. 3) PT+FT (but not MTL) in ReLU networks benefits from features that are correlated between the auxiliary and main task. We confirm these findings empirically with teacher-student models, and introduce a technique -- weight rescaling following pretraining -- that can elicit the nested feature selection regime. Finally, we validate our theory in deep neural networks trained on image classification. We find that weight rescaling improves performance when it causes models to display signatures of nested feature selection. Our results suggest that nested feature selection may be an important inductive bias for finetuning neural networks.", "keywords": "multi-task learning;implicit regularization;finetuning;pretraining;implicit bias", "primary_area": "learning_theory", "supplementary_material": "/attachment/550f0ff3a533e8f071dbaff54c1c661b583c265a.zip", "author": "Samuel Lippl;Jack Lindsey", "authorids": "~Samuel_Lippl1;~Jack_Lindsey1", "gender": "M;", "homepage": "https://sflippl.github.io;", "dblp": ";", "google_scholar": "56QHqZsAAAAJ;CNrQvh4AAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Samuel_Lippl1;~Jack_Lindsey1", "aff": "Columbia University;Columbia University", "aff_domain": "columbia.edu;columbia.edu", "position": "PhD student;Student", "bibtex": "@inproceedings{\nlippl2024inductive,\ntitle={Inductive biases of multi-task learning and finetuning: multiple regimes of feature reuse},\nauthor={Samuel Lippl and Jack Lindsey},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=UwvjJZWjPT}\n}", "github": "", "reviewers": "4YZg;Ry6c;FiYt;969G", "pdf_size": 1128844, "rating": "5;5;7;8", "confidence": "3;2;3;3", "soundness": "3;3;3;4", "novelty": "2;3;4;4", "presentation": "1;2;3;3", "wc_summary": "141;162;957;109", "wc_strengths": "88;71;56;94", "wc_weaknesses": "365;121;58;8", "wc_questions": "86;3;181;254", "wc_limitations": "8;1;1;20", "wc_review": "688;358;1253;485", "wc_reply_reviewers": "196;0;161;29", "wc_reply_authors": "713;0;884;17", "reply_reviewers": "1;0;1;1", "reply_authors": "3;1;3;2", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 342.25, 355.4274715043844 ], "wc_strengths_avg": [ 77.25, 14.889173919328098 ], "wc_weaknesses_avg": [ 138.0, 137.03831580984934 ], "wc_questions_avg": [ 131.0, 94.91838599554883 ], "wc_limitations_avg": [ 7.5, 7.762087348130012 ], "wc_review_avg": [ 696.0, 342.4463461624317 ], "wc_reply_reviewers_avg": [ 96.5, 83.5598587839879 ], "wc_reply_authors_avg": [ 403.5, 399.6451551063768 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.5555555555555555, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:cA8RSZbwZZ4J:scholar.google.com/&scioq=Inductive+biases+of+multi-task+learning+and+finetuning:+multiple+regimes+of+feature+reuse&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "columbia.edu;columbia.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Columbia University", "aff_unique_dep": "", "aff_unique_url": "https://www.columbia.edu", "aff_unique_abbr": "Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Uncovering Safety Risks of Large Language Models through Concept Activation Vector", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94933", "id": "Uymv9ThB50", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Uymv9ThB50", "openreview": "https://openreview.net/forum?id=Uymv9ThB50", "poster": "/media/PosterPDFs/NeurIPS%202024/94933.png?t=1731141189.9194295", "project": "", "author_site": "Zhihao Xu, Ruixuan HUANG, Changyu Chen, Xiting Wang", "tldr": "", "abstract": "Despite careful safety alignment, current large language models (LLMs) remain vulnerable to various attacks. To further unveil the safety risks of LLMs, we introduce a Safety Concept Activation Vector (SCAV) framework, which effectively guides the attacks by accurately interpreting LLMs' safety mechanisms. We then develop an SCAV-guided attack method that can generate both attack prompts and embedding-level attacks with automatically selected perturbation hyperparameters. Both automatic and human evaluations demonstrate that our attack method significantly improves the attack success rate and response quality while requiring less training data. Additionally, we find that our generated attack prompts may be transferable to GPT-4, and the embedding-level attacks may also be transferred to other white-box LLMs whose parameters are known. Our experiments further uncover the safety risks present in current LLMs. For example, in our evaluation of seven open-source LLMs, we observe an average attack success rate of 99.14%, based on the classic keyword-matching criterion. Finally, we provide insights into the safety mechanism of LLMs. The code is available at https://github.com/SproutNan/AI-Safety_SCAV.", "keywords": "large language model;responsible AI;AI safety;concept-based model explanation", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Zhihao Xu;Ruixuan HUANG;Changyu Chen;Xiting Wang", "authorids": "~Zhihao_Xu2;~Ruixuan_HUANG1;~Changyu_Chen1;~Xiting_Wang2", "gender": "M;M;F;M", "homepage": "https://sproutnan.github.io;;https://gsai.ruc.edu.cn/english/wangxt;https://github.com/CurryxIaoHu", "dblp": "274/4929;161/5246.html;;", "google_scholar": "hW6vztEAAAAJ;5D7J2doAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.hk/citations?user=C9Hj9ggAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Ruixuan_HUANG1;~Changyu_Chen1;~Xiting_Wang2;~Xu_Zhihao1", "aff": "University of Science and Technology of China;Renmin University of China;Renmin University of China;Shandong University", "aff_domain": "ustc.edu.cn;ruc.edu.cn;ruc.edu.cn;sdu.edu.cn", "position": "Undergrad student;PhD student;Associate Professor;Undergrad student", "bibtex": "@inproceedings{\nxu2024uncovering,\ntitle={Uncovering Safety Risks of Large Language Models through Concept Activation Vector},\nauthor={Zhihao Xu and Ruixuan HUANG and Changyu Chen and Xiting Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Uymv9ThB50}\n}", "github": "", "reviewers": "xmcv;mJhn;CRgs;6xey", "pdf_size": 1073168, "rating": "4;4;5;6", "confidence": "5;3;4;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;4;3", "wc_summary": "49;113;133;107", "wc_strengths": "27;51;220;71", "wc_weaknesses": "254;478;368;70", "wc_questions": "31;59;189;118", "wc_limitations": "1;1;9;5", "wc_review": "362;702;919;371", "wc_reply_reviewers": "348;0;0;121", "wc_reply_authors": "1013;89;89;295", "reply_reviewers": "1;0;0;1", "reply_authors": "4;2;2;2", "rating_avg": [ 4.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 100.5, 31.252999856013822 ], "wc_strengths_avg": [ 92.25, 75.38360232835785 ], "wc_weaknesses_avg": [ 292.5, 150.91305443864027 ], "wc_questions_avg": [ 99.25, 60.59032513528872 ], "wc_limitations_avg": [ 4.0, 3.3166247903554 ], "wc_review_avg": [ 588.5, 234.904767937988 ], "wc_reply_reviewers_avg": [ 117.25, 142.08690122597508 ], "wc_reply_authors_avg": [ 371.5, 379.7983017339598 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15300454476913496231&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "ustc.edu.cn;ruc.edu.cn;ruc.edu.cn;sdu.edu.cn", "author_num": 4, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "University of Science and Technology of China;Renmin University of China;Shandong University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.ustc.edu.cn;http://www.ruc.edu.cn;http://www.sdu.edu.cn", "aff_unique_abbr": "USTC;RUC;SDU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Dissecting the Interplay of Attention Paths in a Statistical Mechanics Theory of Transformers", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94932", "id": "Uz804qLJT2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Uz804qLJT2", "openreview": "https://openreview.net/forum?id=Uz804qLJT2", "poster": "/media/PosterPDFs/NeurIPS%202024/94932.png?t=1733626217.9265778", "project": "", "author_site": "Lorenzo Tiberi, Francesca Mignacco, Kazuki Irie, Haim Sompolinsky", "tldr": "", "abstract": "Despite the remarkable empirical performance of Transformers, their theoretical understanding remains elusive. Here, we consider a deep multi-head self-attention network, that is closely related to Transformers yet analytically tractable. We develop a statistical mechanics theory of Bayesian learning in this model, deriving exact equations for the network's predictor statistics under the finite-width thermodynamic limit, i.e., $N,P\\rightarrow\\infty$, $P/N=\\mathcal{O}(1)$, where $N$ is the network width and $P$ is the number of training examples. Our theory shows that the predictor statistics are expressed as a sum of independent kernels, each one pairing different \"attention paths\", defined as information pathways through different attention heads across layers. The kernels are weighted according to a \"task-relevant kernel combination\" mechanism that aligns the total kernel with the task labels. As a consequence, this interplay between attention paths enhances generalization performance. Experiments confirm our findings on both synthetic and real-world sequence classification tasks. Finally, our theory explicitly relates the kernel combination mechanism to properties of the learned weights, allowing for a qualitative transfer of its insights to models trained via gradient descent. As an illustration, we demonstrate an efficient size reduction of the network, by pruning those attention heads that are deemed less relevant by our theory.", "keywords": "Deep learning theory;Statistical mechanics;Transformers;Kernel Methods;Gaussian Processes;Finite Width Networks", "primary_area": "other", "supplementary_material": "/attachment/e904f0e8b124579b05204a22eee4546cb67de434.zip", "author": "Lorenzo Tiberi;Francesca Mignacco;Kazuki Irie;Haim Sompolinsky", "authorids": "~Lorenzo_Tiberi2;~Francesca_Mignacco1;~Kazuki_Irie1;~Haim_Sompolinsky1", "gender": "M;F;;M", "homepage": ";;https://sites.harvard.edu/kazuki-irie/;", "dblp": ";259/3071;148/9667;33/5545", "google_scholar": ";maTV19MAAAAJ;https://scholar.google.de/citations?user=-gZ-BdwAAAAJ;", "orcid": "0000-0001-6776-5508;;0000-0003-0923-691X;", "linkedin": ";;;", "or_profile": "~Lorenzo_Tiberi2;~Francesca_Mignacco1;~Kazuki_Irie1;~Haim_Sompolinsky1", "aff": "Harvard University, Harvard University;City University of New York, City University of New York;Harvard University;", "aff_domain": "fas.harvard.edu;gc.cuny.edu;fas.harvard.edu;", "position": "Postdoc;Postdoc;Postpostdoc;", "bibtex": "@inproceedings{\ntiberi2024dissecting,\ntitle={Dissecting the Interplay of Attention Paths in a Statistical Mechanics Theory of Transformers},\nauthor={Lorenzo Tiberi and Francesca Mignacco and Kazuki Irie and Haim Sompolinsky},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Uz804qLJT2}\n}", "github": "", "reviewers": "5PZg;15FG;Y9xT;9bWs", "pdf_size": 4424135, "rating": "3;5;7;8", "confidence": "3;3;4;4", "soundness": "3;3;3;4", "novelty": "2;2;3;4", "presentation": "2;1;2;4", "wc_summary": "46;24;152;120", "wc_strengths": "22;18;148;114", "wc_weaknesses": "162;224;269;13", "wc_questions": "66;12;117;66", "wc_limitations": "25;1;22;66", "wc_review": "321;279;708;379", "wc_reply_reviewers": "264;88;86;0", "wc_reply_authors": "1863;1051;35;39", "reply_reviewers": "2;2;1;0", "reply_authors": "5;4;2;2", "rating_avg": [ 5.75, 1.920286436967152 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.25, 1.0897247358851685 ], "wc_summary_avg": [ 85.5, 52.333067939879086 ], "wc_strengths_avg": [ 75.5, 56.8044892592126 ], "wc_weaknesses_avg": [ 167.0, 96.68764140261153 ], "wc_questions_avg": [ 65.25, 37.13068138345969 ], "wc_limitations_avg": [ 28.5, 23.542514733987108 ], "wc_review_avg": [ 421.75, 169.03753281446106 ], "wc_reply_reviewers_avg": [ 109.5, 96.0143218483576 ], "wc_reply_authors_avg": [ 747.0, 765.8459375096273 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 3.25, 1.299038105676658 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.911322376865767, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14847577968492594353&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "fas.harvard.edu;gc.cuny.edu;fas.harvard.edu;", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Harvard University;City University of New York", "aff_unique_dep": ";", "aff_unique_url": "https://www.harvard.edu;https://www.cuny.edu", "aff_unique_abbr": "Harvard;CUNY", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "FairWire: Fair Graph Generation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94931", "id": "V0JvwCQlJe", "proceeding": "", "pdf": "https://openreview.net/pdf?id=V0JvwCQlJe", "openreview": "https://openreview.net/forum?id=V0JvwCQlJe", "poster": "", "project": "", "author_site": "Oyku Kose, Yanning Shen", "tldr": "", "abstract": "Machine learning over graphs has recently attracted growing attention due to its ability to analyze and learn complex relations within critical interconnected systems. However, the disparate impact that is amplified by the use of biased graph structures in these algorithms has raised significant concerns for their deployment in real-world decision systems. In addition, while synthetic graph generation has become pivotal for privacy and scalability considerations, the impact of generative learning algorithms on structural bias has not yet been investigated. Motivated by this, this work focuses on the analysis and mitigation of structural bias for both real and synthetic graphs. Specifically, we first theoretically analyze the sources of structural bias that result in disparity for the predictions of dyadic relations. To alleviate the identified bias factors, we design a novel fairness regularizer that offers a versatile use. Faced with the bias amplification in graph generation models brought to light in this work, we further propose a fair graph generation framework, FairWire, by leveraging our fair regularizer design in a generative model. Experimental results on real-world networks validate that the proposed tools herein deliver effective structural bias mitigation for both real and synthetic graphs.", "keywords": "Trustworthy ML;Learning over Graphs;Fair ML over graphs;Fair Graph Generation", "primary_area": "fairness", "supplementary_material": "/attachment/1ba4f22d9c62aad83f9746bf8e224da882b07840.zip", "author": "Oyku Deniz Kose;Yanning Shen", "authorids": "~Oyku_Deniz_Kose1;~Yanning_Shen1", "gender": "F;F", "homepage": ";https://sites.google.com/uci.edu/yanning-shen/home", "dblp": "263/4808;120/7392.html", "google_scholar": "mIURm58AAAAJ;MfzntAIAAAAJ", "orcid": "0000-0002-8685-2161;", "linkedin": ";", "or_profile": "~Oyku_Deniz_Kose1;~Yanning_Shen1", "aff": "University of California, Irvine;University of California, Irvine", "aff_domain": "uci.edu;uci.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nkose2024fairwire,\ntitle={FairWire: Fair Graph Generation},\nauthor={Oyku Deniz Kose and Yanning Shen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=V0JvwCQlJe}\n}", "github": "", "reviewers": "wVX7;bPaX;miG4", "pdf_size": 1820603, "rating": "6;7;7", "confidence": "3;4;3", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "3;3;3", "wc_summary": "90;99;93", "wc_strengths": "76;52;71", "wc_weaknesses": "64;92;136", "wc_questions": "207;115;159", "wc_limitations": "23;1;29", "wc_review": "460;359;488", "wc_reply_reviewers": "23;17;105", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 94.0, 3.7416573867739413 ], "wc_strengths_avg": [ 66.33333333333333, 10.338708279513883 ], "wc_weaknesses_avg": [ 97.33333333333333, 29.634814361190493 ], "wc_questions_avg": [ 160.33333333333334, 37.57067414294765 ], "wc_limitations_avg": [ 17.666666666666668, 12.036980056845193 ], "wc_review_avg": [ 435.6666666666667, 55.403569880970274 ], "wc_reply_reviewers_avg": [ 48.333333333333336, 40.14418457953226 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8959675901111932102&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "uci.edu;uci.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of California, Irvine", "aff_unique_dep": "", "aff_unique_url": "https://www.uci.edu", "aff_unique_abbr": "UCI", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Irvine", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Maximum Entropy Inverse Reinforcement Learning of Diffusion Models with Energy-Based Models", "status": "Oral", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94930", "id": "V0oJaLqY4E", "proceeding": "", "pdf": "https://openreview.net/pdf?id=V0oJaLqY4E", "openreview": "https://openreview.net/forum?id=V0oJaLqY4E", "poster": "", "project": "", "author_site": "Sangwoong Yoon, Himchan Hwang, Dohyun Kwon, Yung-Kyun Noh, Frank Park", "tldr": "", "abstract": "We present a maximum entropy inverse reinforcement learning (IRL) approach for improving the sample quality of diffusion generative models, especially when the number of generation time steps is small. Similar to how IRL trains a policy based on the reward function learned from expert demonstrations, we train (or fine-tune) a diffusion model using the log probability density estimated from training data. \nSince we employ an energy-based model (EBM) to represent the log density, our approach boils down to the joint training of a diffusion model and an EBM. Our IRL formulation, named Diffusion by Maximum Entropy IRL (DxMI), is a minimax problem that reaches equilibrium when both models converge to the data distribution. The entropy maximization plays a key role in DxMI, facilitating the exploration of the diffusion model and ensuring the convergence of the EBM. We also propose Diffusion by Dynamic Programming (DxDP), a novel reinforcement learning algorithm for diffusion models, as a subroutine in DxMI. DxDP makes the diffusion model update in DxMI efficient by transforming the original problem into an optimal control formulation where value functions replace back-propagation in time. Our empirical studies show that diffusion models fine-tuned using DxMI can generate high-quality samples in as few as 4 and 10 steps. Additionally, DxMI enables the training of an EBM without MCMC, stabilizing EBM training dynamics and enhancing anomaly detection performance.", "keywords": "diffusion models;inverse reinforcement learning;dynamic programming;reinforcement learning;generative modeling", "primary_area": "generative_models", "supplementary_material": "", "author": "Sangwoong Yoon;Himchan Hwang;Dohyun Kwon;Yung-Kyun Noh;Frank C. Park", "authorids": "~Sangwoong_Yoon1;~Himchan_Hwang1;~Dohyun_Kwon1;~Yung-Kyun_Noh1;~Frank_C._Park1", "gender": "M;M;M;M;M", "homepage": "https://swyoon.github.io/;;https://www.dohyunkwon.com/;http://aais.hanyang.ac.kr;http://robotics.snu.ac.kr", "dblp": "237/1318;;218/1797-2;54/6443;p/FrankChongwooPark", "google_scholar": "https://scholar.google.co.kr/citations?user=cH2rjfIAAAAJ;;dBxpstQAAAAJ;https://scholar.google.com/citations?hl=en;u-h3PJIAAAAJ", "orcid": "0000-0002-7251-3230;;0000-0001-9198-4735;;0000-0002-0293-6975", "linkedin": ";himchan-hwang-621213280;;;", "or_profile": "~Sangwoong_Yoon1;~Himchan_Hwang1;~Dohyun_Kwon1;~Yung-Kyun_Noh1;~Frank_C._Park1", "aff": "Korea Institute for Advanced Study;Seoul National University;University of Seoul;Korea Institute for Advanced Study;Seoul National University", "aff_domain": "kias.re.kr;snu.ac.kr;uos.ac.kr;kias.re.kr;snu.ac.kr", "position": "Postdoc;PhD student;Assistant Professor;Affiliate Professor;Full Professor", "bibtex": "@inproceedings{\nyoon2024maximum,\ntitle={Maximum Entropy Inverse Reinforcement Learning of Diffusion Models with Energy-Based Models},\nauthor={Sangwoong Yoon and Himchan Hwang and Dohyun Kwon and Yung-Kyun Noh and Frank C. Park},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=V0oJaLqY4E}\n}", "github": "", "reviewers": "YBX1;EhWr;yjrf;Nqik", "pdf_size": 3712459, "rating": "6;6;7;8", "confidence": "4;3;3;3", "soundness": "3;3;3;3", "novelty": "3;3;3;4", "presentation": "3;3;3;4", "wc_summary": "29;140;80;79", "wc_strengths": "124;53;66;124", "wc_weaknesses": "102;93;225;16", "wc_questions": "299;79;64;28", "wc_limitations": "6;10;1;10", "wc_review": "560;375;436;257", "wc_reply_reviewers": "21;13;86;6", "wc_reply_authors": "530;0;56;0", "reply_reviewers": "1;1;1;1", "reply_authors": "3;1;2;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 82.0, 39.32556420447137 ], "wc_strengths_avg": [ 91.75, 32.575872973720905 ], "wc_weaknesses_avg": [ 109.0, 74.8498496992479 ], "wc_questions_avg": [ 117.5, 106.41545940322769 ], "wc_limitations_avg": [ 6.75, 3.6996621467371855 ], "wc_review_avg": [ 407.0, 109.28632119345953 ], "wc_reply_reviewers_avg": [ 31.5, 31.9100297712177 ], "wc_reply_authors_avg": [ 146.5, 222.59099262998043 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:karVKHdAw2wJ:scholar.google.com/&scioq=Maximum+Entropy+Inverse+Reinforcement+Learning+of+Diffusion+Models+with+Energy-Based+Models&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "kias.re.kr;snu.ac.kr;uos.ac.kr;kias.re.kr;snu.ac.kr", "author_num": 5, "aff_unique_index": "0;1;2;0;1", "aff_unique_norm": "Korea Institute for Advanced Study;Seoul National University;University of Seoul", "aff_unique_dep": ";;", "aff_unique_url": "http://www.kaist.edu;https://www.snu.ac.kr;http://www.useoul.edu", "aff_unique_abbr": "KIAS;SNU;UOS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Text2NKG: Fine-Grained N-ary Relation Extraction for N-ary relational Knowledge Graph Construction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94929", "id": "V2MBWYXp63", "proceeding": "", "pdf": "https://openreview.net/pdf?id=V2MBWYXp63", "openreview": "https://openreview.net/forum?id=V2MBWYXp63", "poster": "/media/PosterPDFs/NeurIPS%202024/94929.png?t=1731504516.4245832", "project": "", "author_site": "Haoran Luo, Haihong E, Yuhao Yang, Tianyu Yao, Yikai Guo, Zichen Tang, Wentai Zhang, Shiyao Peng, Kaiyang Wan, Meina Song, Wei Lin, Yifan Zhu, Anh Tuan Luu", "tldr": "", "abstract": "Beyond traditional binary relational facts, n-ary relational knowledge graphs (NKGs) are comprised of n-ary relational facts containing more than two entities, which are closer to real-world facts with broader applications. However, the construction of NKGs remains at a coarse-grained level, which is always in a single schema, ignoring the order and variable arity of entities. To address these restrictions, we propose Text2NKG, a novel fine-grained n-ary relation extraction framework for n-ary relational knowledge graph construction. We introduce a span-tuple classification approach with hetero-ordered merging and output merging to accomplish fine-grained n-ary relation extraction in different arity. Furthermore, Text2NKG supports four typical NKG schemas: hyper-relational schema, event-based schema, role-based schema, and hypergraph-based schema, with high flexibility and practicality. The experimental results demonstrate that Text2NKG achieves state-of-the-art performance in F1 scores on the fine-grained n-ary relation extraction benchmark. Our code and datasets are publicly available.", "keywords": "N-ary Relation Extraction;N-ary relational Knowledge Graph;Knowledge Graph Construction", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/be4553dd08f04559a10d6a295934c19b0c89512e.zip", "author": "Haoran Luo;Haihong E;Yuhao Yang;Tianyu Yao;Yikai Guo;Zichen Tang;Wentai Zhang;Shiyao Peng;Kaiyang Wan;Meina Song;Wei Lin;Yifan Zhu;Anh Tuan Luu", "authorids": "~Haoran_Luo1;~Haihong_E1;~Yuhao_Yang1;~Tianyu_Yao1;~Yikai_Guo2;~Zichen_Tang1;~Wentai_Zhang2;~Shiyao_Peng2;~Kaiyang_Wan1;~Meina_Song1;~Wei_Lin13;~Yifan_Zhu1;~Anh_Tuan_Luu2", "gender": "M;F;M;M;M;M;M;M;F;M;M;M;", "homepage": "https://lhrlab.github.io/;https://teacher.bupt.edu.cn/ehaihong/zh_CN/index.htm;;https://github.com/yao12315;https://github.com/GYK-CASIC;https://github.com/StarLight24;https://github.com/coverdpsy;;http://teacher.bupt.edu.cn/songmeina/;http://www.inspur.com;https://zhuyf8899.github.io/;https://tuanluu.github.io/;", "dblp": "227/5902-1.html;43/10222.html;;324/5213;334/4154;264/0465;;324/4644;95/4440;https://dblp.uni-trier.de/pid/99/2649;94/1593-1;81/8329.html;", "google_scholar": "https://scholar.google.com.hk/citations?user=Q9Nv9mcAAAAJ;https://scholar.google.com.hk/citations?user=J4akh64AAAAJ;T0arNqgAAAAJ;M3wrJAwAAAAJ;https://scholar.google.com.hk/citations?user=4LxmyZAAAAAJ;https://scholar.google.com/citations?hl=zh-CN;;v_faxAsAAAAJ;https://scholar.google.com/citations?hl=zh-CN;;https://scholar.google.com.hk/citations?user=pAfNfScAAAAJ;https://scholar.google.com.sg/citations?hl=en;https://scholar.google.com/citations?hl=en", "orcid": "0000-0003-2727-0361;;;;0000-0003-0345-1686;0000-0002-0244-4970;;;0000-0001-6626-9932;;0000-0002-7695-1633;;", "linkedin": "haoran-luo-88a96b255/;;;;https://www.linkedin.cn/incareer/in/ACoAAD5htzQBKWZKy68SvRuuztB4LJDhIKMdM1o;;;;;;;;", "or_profile": "~Haoran_Luo1;~Haihong_E1;~Yuhao_Yang1;~Tianyu_Yao1;~Yikai_Guo2;~Zichen_Tang1;~Shiyao_Peng2;~Kaiyang_Wan1;~Meina_Song1;~Wei_Lin13;~Yifan_Zhu1;~Anh_Tuan_Luu2;~wentai_zhang1", "aff": "Nanyang Technological University;Beijing University of Post and Telecommunication;Beihang University;Beijing University of Posts and Telecommunications;Beijing Institute of Computer Technology and Application;Beijing University of Posts and Telecommunications;China University of Petroleum-Beijing at Karamay;Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;;Beijing University of Posts and Telecommunications;Nanyang Technological University;", "aff_domain": "ntu.edu.sg;bupt.edu.cn;buaa.edu.cn;bupt.edu.cn;casic.com.cn;bupt.edu.cn;cupk.edu.cn;bupt.edu.cn;bupt.edu.cn;;bupt.edu.cn;ntu.edu.sg;", "position": "Intern;Full Professor;MS student;MS student;PhD student;MS student;Undergrad student;Undergrad student;Full Professor;;Assistant Professor;Assistant Professor;", "bibtex": "@inproceedings{\nluo2024textnkg,\ntitle={Text2{NKG}: Fine-Grained N-ary Relation Extraction for N-ary relational Knowledge Graph Construction},\nauthor={Haoran Luo and Haihong E and Yuhao Yang and Tianyu Yao and Yikai Guo and Zichen Tang and Wentai Zhang and Shiyao Peng and Kaiyang Wan and Meina Song and Wei Lin and Yifan Zhu and Anh Tuan Luu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=V2MBWYXp63}\n}", "github": "", "reviewers": "rUhA;XsBb;GMbk;ED7F", "pdf_size": 2220545, "rating": "4;6;7;7", "confidence": "4;4;5;4", "soundness": "2;2;4;4", "novelty": "1;3;4;4", "presentation": "2;3;4;3", "wc_summary": "61;125;71;44", "wc_strengths": "38;68;127;110", "wc_weaknesses": "101;220;19;61", "wc_questions": "5;51;13;37", "wc_limitations": "6;1;9;3", "wc_review": "211;465;239;255", "wc_reply_reviewers": "0;37;24;117", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 1.0 ], "novelty_avg": [ 3.0, 1.224744871391589 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 75.25, 30.30160886817728 ], "wc_strengths_avg": [ 85.75, 34.945493271665235 ], "wc_weaknesses_avg": [ 100.25, 74.97124448746999 ], "wc_questions_avg": [ 26.5, 18.405162319305962 ], "wc_limitations_avg": [ 4.75, 3.031088913245535 ], "wc_review_avg": [ 292.5, 100.83030298476743 ], "wc_reply_reviewers_avg": [ 44.5, 43.91184350491334 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 13, 0 ], "corr_rating_confidence": 0.4714045207910316, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13416463536580625484&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "ntu.edu.sg;bupt.edu.cn;buaa.edu.cn;bupt.edu.cn;casic.com.cn;bupt.edu.cn;cupk.edu.cn;bupt.edu.cn;bupt.edu.cn;;bupt.edu.cn;ntu.edu.sg;", "author_num": 13, "aff_unique_index": "0;1;2;1;3;1;4;1;1;1;0", "aff_unique_norm": "Nanyang Technological University;Beijing University of Posts and Telecommunications;Beihang University;Beijing Institute of Computer Technology and Application;China University of Petroleum", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.ntu.edu.sg;http://www.bupt.edu.cn/;http://www.buaa.edu.cn/;;http://www.cup.edu.cn", "aff_unique_abbr": "NTU;BUPT;BUAA;;CUP", "aff_campus_unique_index": "1;1;1;1;1;1;1", "aff_campus_unique": ";Beijing", "aff_country_unique_index": "0;1;1;1;1;1;1;1;1;1;0", "aff_country_unique": "Singapore;China" }, { "title": "QT-ViT: Improving Linear Attention in ViT with Quadratic Taylor Expansion", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94928", "id": "V2e0A2XIPF", "proceeding": "", "pdf": "https://openreview.net/pdf?id=V2e0A2XIPF", "openreview": "https://openreview.net/forum?id=V2e0A2XIPF", "poster": "/media/PosterPDFs/NeurIPS%202024/94928.png?t=1729480156.4499924", "project": "", "author_site": "Yixing Xu, Chao Li, Dong Li, Xiao Sheng, Fan Jiang, Lu Tian, Emad Barsoum", "tldr": "", "abstract": "Vision transformer model (ViT) is widely used and performs well in vision tasks due to its ability to capture long-range dependencies. However, the time complexity and memory consumption increase quadratically with the number of input patches which limits the usage of ViT in real-world applications. Previous methods have employed linear attention to mitigate the complexity of the original self-attention mechanism at the expense of effectiveness. In this paper, we propose QT-ViT models that improve the previous linear self-attention using quadratic Taylor expansion. Specifically, we substitute the softmax-based attention with second-order Taylor expansion, and then accelerate the quadratic expansion by reducing the time complexity with a fast approximation algorithm. The proposed method capitalizes on the property of quadratic expansion to achieve superior performance while employing linear approximation for fast inference. Compared to previous studies of linear attention, our approach does not necessitate knowledge distillation or high-order attention residuals to facilitate the training process. Extensive experiments demonstrate the efficiency and effectiveness of the proposed QT-ViTs, showcasing the state-of-the-art results. Particularly, the proposed QT-ViTs consistently surpass the previous SOTA EfficientViTs under different model sizes, and achieve a new Pareto-front in terms of accuracy and speed.", "keywords": "Vision Transformer;linear attention;quadratic Taylor expansion", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Yixing Xu;Chao Li;Dong Li;Xiao Sheng;Fan Jiang;Lu Tian;Emad Barsoum", "authorids": "~Yixing_Xu2;~Chao_Li27;~Dong_Li13;~Xiao_Sheng1;~Fan_Jiang5;~Lu_Tian3;~Emad_Barsoum1", "gender": "M;M;;M;M;F;", "homepage": ";;;;;;", "dblp": "142/1013;;;;;;", "google_scholar": "32tJoOkAAAAJ;;;https://scholar.google.com/citations?view_op=list_works;;edbuKpcAAAAJ;", "orcid": ";;;;;;", "linkedin": ";%E8%B6%85-%E6%9D%8E-6164a6a0/;;;fan-jiang-996514268/;;", "or_profile": "~Yixing_Xu2;~Chao_Li27;~Dong_Li13;~Xiao_Sheng1;~Fan_Jiang5;~Lu_Tian3;~Emad_Barsoum1", "aff": "Advanced Micro Devices;;;;Advanced Micro Devices;AMD;", "aff_domain": "amd.com;;;;amd.com;amd.com;", "position": "Principal Researcher;;;;Principal Researcher;Researcher;", "bibtex": "@inproceedings{\nxu2024qtvit,\ntitle={{QT}-ViT: Improving Linear Attention in ViT with Quadratic Taylor Expansion},\nauthor={Yixing Xu and Chao Li and Dong Li and Xiao Sheng and Fan Jiang and Lu Tian and Emad Barsoum},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=V2e0A2XIPF}\n}", "github": "", "reviewers": "5zgq;QZCU;LZeB;dQG7", "pdf_size": 424643, "rating": "3;5;7;7", "confidence": "4;4;5;5", "soundness": "1;2;4;3", "novelty": "2;2;4;3", "presentation": "2;3;3;4", "wc_summary": "49;74;96;84", "wc_strengths": "6;47;94;51", "wc_weaknesses": "174;172;189;81", "wc_questions": "2;18;15;121", "wc_limitations": "1;33;1;1", "wc_review": "232;344;395;338", "wc_reply_reviewers": "389;151;52;74", "wc_reply_authors": "948;161;113;104", "reply_reviewers": "4;1;2;2", "reply_authors": "7;3;2;2", "rating_avg": [ 5.5, 1.6583123951777 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.5, 1.118033988749895 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 75.75, 17.297037318569906 ], "wc_strengths_avg": [ 49.5, 31.148836254345042 ], "wc_weaknesses_avg": [ 154.0, 42.655597522482324 ], "wc_questions_avg": [ 39.0, 47.72315999596003 ], "wc_limitations_avg": [ 9.0, 13.856406460551018 ], "wc_review_avg": [ 327.25, 59.28479990689013 ], "wc_reply_reviewers_avg": [ 166.5, 133.61605442460873 ], "wc_reply_authors_avg": [ 331.5, 356.5953589153959 ], "reply_reviewers_avg": [ 2.25, 1.0897247358851685 ], "reply_authors_avg": [ 3.5, 2.0615528128088303 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.9045340337332909, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:CibRc9NiX8YJ:scholar.google.com/&scioq=QT-ViT:+Improving+Linear+Attention+in+ViT+with+Quadratic+Taylor+Expansion&hl=en&as_sdt=0,44", "gs_version_total": 3, "email": "amd.com;;;;amd.com;amd.com;", "author_num": 7, "aff_unique_index": "0;0;0", "aff_unique_norm": "Advanced Micro Devices, Inc.", "aff_unique_dep": "", "aff_unique_url": "https://www.amd.com", "aff_unique_abbr": "AMD", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "REBORN: Reinforcement-Learned Boundary Segmentation with Iterative Training for Unsupervised ASR", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94927", "id": "V3QZCM1AQv", "proceeding": "", "pdf": "https://openreview.net/pdf?id=V3QZCM1AQv", "openreview": "https://openreview.net/forum?id=V3QZCM1AQv", "poster": "/media/PosterPDFs/NeurIPS%202024/94927.png?t=1733654849.1679301", "project": "", "author_site": "Liang-Hsuan Tseng, En-Pei Hu, Cheng-Han Chiang, Yuan Tseng, Hung-yi Lee, Lin-shan Lee, Shao-Hua Sun", "tldr": "", "abstract": "Unsupervised automatic speech recognition (ASR) aims to learn the mapping between the speech signal and its corresponding textual transcription without the supervision of paired speech-text data. A word/phoneme in the speech signal is represented by a segment of speech signal with variable length and unknown boundary, and this segmental structure makes learning the mapping between speech and text challenging, especially without paired data. In this paper, we propose REBORN, Reinforcement-Learned Boundary Segmentation with Iterative Training for Unsupervised ASR. REBORN alternates between (1) training a segmentation model that predicts the boundaries of the segmental structures in speech signals and (2) training the phoneme prediction model, whose input is a segmental structure segmented by the segmentation model, to predict a phoneme transcription. Since supervised data for training the segmentation model is not available, we use reinforcement learning to train the segmentation model to favor segmentations that yield phoneme sequence predictions with a lower perplexity. We conduct extensive experiments and find that under the same setting, REBORN outperforms all prior unsupervised ASR models on LibriSpeech, TIMIT, and five non-English languages in Multilingual LibriSpeech. We comprehensively analyze why the boundaries learned by REBORN improve the unsupervised ASR performance.", "keywords": "Speech processing;unsupervised learning;reinforcement learning;adversarial learning", "primary_area": "speech_and_audio", "supplementary_material": "", "author": "Liang-Hsuan Tseng;En-Pei Hu;Cheng-Han Chiang;Yuan Tseng;Hung-yi Lee;Lin-shan Lee;Shao-Hua Sun", "authorids": "~Liang-Hsuan_Tseng1;~En-Pei_Hu1;~Cheng-Han_Chiang1;~Yuan_Tseng1;~Hung-yi_Lee2;~Lin-shan_Lee1;~Shao-Hua_Sun1", "gender": "M;;;;Non-Binary;M;M", "homepage": ";;https://github.com/d223302;;https://speech.ee.ntu.edu.tw/~hylee/index.html;http://speech.ee.ntu.edu.tw/previous_version/lslNew.htm;http://shaohua0116.github.io", "dblp": ";;276/0431;;81/8056;40/176;158/9680", "google_scholar": "https://scholar.google.com/citations?hl=en;;https://scholar.google.com.tw/citations?user=_DYQvPYAAAAJ;;DxLO11IAAAAJ;https://scholar.google.com.tw/citations?user=23zBJqIAAAAJ;uXsfnaQAAAAJ", "orcid": ";;;;;;0000-0001-7579-6734", "linkedin": ";;;;;;shaohua0116/", "or_profile": "~Liang-Hsuan_Tseng1;~En-Pei_Hu1;~Cheng-Han_Chiang1;~Yuan_Tseng1;~Hung-yi_Lee2;~Lin-shan_Lee1;~Shao-Hua_Sun1", "aff": "National Taiwan University;;National Taiwan University;;National Taiwan University;;National Taiwan University", "aff_domain": "ntu.edu.tw;;ntu.edu.tw;;ntu.edu.tw;;ntu.edu.tw", "position": "MS student;;PhD student;;Full Professor;;Assistant Professor", "bibtex": "@inproceedings{\ntseng2024reborn,\ntitle={{REBORN}: Reinforcement-Learned Boundary Segmentation with Iterative Training for Unsupervised {ASR}},\nauthor={Liang-Hsuan Tseng and En-Pei Hu and Cheng-Han Chiang and Yuan Tseng and Hung-yi Lee and Lin-shan Lee and Shao-Hua Sun},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=V3QZCM1AQv}\n}", "github": "", "reviewers": "TC8W;q9jx;AM1A;vhRd", "pdf_size": 656098, "rating": "6;7;7;7", "confidence": "3;2;4;4", "soundness": "3;3;4;3", "novelty": "3;3;4;3", "presentation": "4;2;4;3", "wc_summary": "53;193;182;146", "wc_strengths": "58;47;96;6", "wc_weaknesses": "166;301;154;13", "wc_questions": "49;67;32;197", "wc_limitations": "1;71;52;1", "wc_review": "327;679;516;363", "wc_reply_reviewers": "24;12;11;25", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 143.5, 55.065869647178005 ], "wc_strengths_avg": [ 51.75, 32.06536293261001 ], "wc_weaknesses_avg": [ 158.5, 101.92276487615513 ], "wc_questions_avg": [ 86.25, 65.12823888299145 ], "wc_limitations_avg": [ 31.25, 30.986892390170397 ], "wc_review_avg": [ 471.25, 139.36350849487107 ], "wc_reply_reviewers_avg": [ 18.0, 6.519202405202649 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16654630571677471125&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "ntu.edu.tw;;ntu.edu.tw;;ntu.edu.tw;;ntu.edu.tw", "author_num": 7, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "National Taiwan University", "aff_unique_dep": "", "aff_unique_url": "https://www.ntu.edu.tw", "aff_unique_abbr": "NTU", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Taiwan", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "DECRL: A Deep Evolutionary Clustering Jointed Temporal Knowledge Graph Representation Learning Approach", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94926", "id": "V42zfM2GXw", "proceeding": "", "pdf": "https://openreview.net/pdf?id=V42zfM2GXw", "openreview": "https://openreview.net/forum?id=V42zfM2GXw", "poster": "/media/PosterPDFs/NeurIPS%202024/94926.png?t=1730277841.1184356", "project": "", "author_site": "Qian Chen, Ling Chen", "tldr": "", "abstract": "Temporal Knowledge Graph (TKG) representation learning aims to map temporal evolving entities and relations to embedded representations in a continuous low-dimensional vector space. However, existing approaches cannot capture the temporal evolution of high-order correlations in TKGs. To this end, we propose a **D**eep **E**volutionary **C**lustering jointed temporal knowledge graph **R**epresentation **L**earning approach (**DECRL**). Specifically, a deep evolutionary clustering module is proposed to capture the temporal evolution of high-order correlations among entities. Furthermore, a cluster-aware unsupervised alignment mechanism is introduced to ensure the precise one-to-one alignment of soft overlapping clusters across timestamps, thereby maintaining the temporal smoothness of clusters. In addition, an implicit correlation encoder is introduced to capture latent correlations between any pair of clusters under the guidance of a global graph. Extensive experiments on seven real-world datasets demonstrate that DECRL achieves the state-of-the-art performances, outperforming the best baseline by an average of 9.53\\%, 12.98\\%, 10.42\\%, and 14.68\\% in MRR, Hits@1, Hits@3, and Hits@10, respectively.", "keywords": "Event Prediction;Temporal Knowledge Graphs;Representation Learning;Evolutionary Clustering;Graph Neural Networks", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Qian Chen;Ling Chen", "authorids": "~Qian_Chen22;~Ling_Chen4", "gender": "M;M", "homepage": "https://person.zju.edu.cn/en/lc;https://www.linkedin.com/in/qian-chen-58069734b", "dblp": "17/1237-1.html;11/1394-34.html", "google_scholar": "Vxi9eakAAAAJ;huJSRXEAAAAJ", "orcid": "0000-0003-1934-5992;0000-0002-5632-7630", "linkedin": ";qian-chen-58069734b", "or_profile": "~Ling_Chen4;~qian_chen21", "aff": "Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;cs.zju.edu.cn", "position": "Full Professor;PhD student", "bibtex": "@inproceedings{\nchen2024decrl,\ntitle={{DECRL}: A Deep Evolutionary Clustering Jointed Temporal Knowledge Graph Representation Learning Approach},\nauthor={Qian Chen and Ling Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=V42zfM2GXw}\n}", "github": "", "reviewers": "fmj8;C7a5;S3KV;RZGp", "pdf_size": 2292027, "rating": "5;5;6;7", "confidence": "4;4;3;4", "soundness": "3;3;2;4", "novelty": "3;2;2;3", "presentation": "2;3;3;4", "wc_summary": "54;75;64;57", "wc_strengths": "17;25;41;21", "wc_weaknesses": "235;24;194;36", "wc_questions": "2;50;3;31", "wc_limitations": "1;5;1;25", "wc_review": "309;179;303;170", "wc_reply_reviewers": "32;0;50;0", "wc_reply_authors": "42;0;107;0", "reply_reviewers": "1;0;1;0", "reply_authors": "2;1;2;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 62.5, 8.077747210701755 ], "wc_strengths_avg": [ 26.0, 9.1104335791443 ], "wc_weaknesses_avg": [ 122.25, 93.47827287664231 ], "wc_questions_avg": [ 21.5, 20.155644370746373 ], "wc_limitations_avg": [ 8.0, 9.9498743710662 ], "wc_review_avg": [ 240.25, 65.8611228267481 ], "wc_reply_reviewers_avg": [ 20.5, 21.465087933665682 ], "wc_reply_authors_avg": [ 37.25, 43.76856748855279 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:E7ohSN6wamMJ:scholar.google.com/&scioq=DECRL:+A+Deep+Evolutionary+Clustering+Jointed+Temporal+Knowledge+Graph+Representation+Learning+Approach&hl=en&as_sdt=0,31", "gs_version_total": 4, "email": "zju.edu.cn;cs.zju.edu.cn", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Zhejiang University", "aff_unique_dep": "", "aff_unique_url": "https://www.zju.edu.cn", "aff_unique_abbr": "ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Stochastic Newton Proximal Extragradient Method", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94925", "id": "V4tzn87DtN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=V4tzn87DtN", "openreview": "https://openreview.net/forum?id=V4tzn87DtN", "poster": "/media/PosterPDFs/NeurIPS%202024/94925.png?t=1733763889.392036", "project": "", "author_site": "Ruichen Jiang, Michal Derezinski, Aryan Mokhtari", "tldr": "", "abstract": "Stochastic second-order methods are known to achieve fast local convergence in strongly convex optimization by relying on noisy Hessian estimates to precondition the gradient. Yet, most of these methods achieve superlinear convergence only when the stochastic Hessian noise diminishes, requiring an increase in the per-iteration cost as time progresses. Recent work in \\cite{na2022hessian} addressed this issue via a Hessian averaging scheme that achieves a superlinear convergence rate without increasing the per-iteration cost. However, the considered method exhibits a slow global convergence rate, requiring up to $\\tilde{\\mathcal{O}}(\\kappa^2)$ iterations to reach the superlinear rate of $\\tilde{\\mathcal{O}}((1/t)^{t/2})$, where $\\kappa$ is the problem's condition number. In this paper, we propose a novel stochastic Newton proximal extragradient method that significantly improves these bounds, achieving a faster global linear rate and reaching the same fast superlinear rate in $\\tilde{\\mathcal{O}}(\\kappa)$ iterations. We achieve this by developing a novel extension of the Hybrid Proximal Extragradient (HPE) framework, which simultaneously achieves fast global and local convergence rates for strongly convex functions with access to a noisy Hessian oracle.", "keywords": "Stochastic second-order methods;superlinear convergence;hybrid proximal extragradient", "primary_area": "optimization", "supplementary_material": "/attachment/7448e4e64f82f1f395a517a209ef4a1e43f883f9.zip", "author": "Ruichen Jiang;Michal Derezinski;Aryan Mokhtari", "authorids": "~Ruichen_Jiang1;~Michal_Derezinski1;~Aryan_Mokhtari3", "gender": ";M;M", "homepage": "https://ruichen-jiang.github.io/;https://web.eecs.umich.edu/~derezin/;https://sites.utexas.edu/mokhtari/", "dblp": "271/7916;155/1906;140/7407", "google_scholar": "BGFt1UMAAAAJ;qhP66JAAAAAJ;glcep6EAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Ruichen_Jiang1;~Michal_Derezinski1;~Aryan_Mokhtari3", "aff": "University of Texas at Austin;University of Michigan - Ann Arbor;University of Texas, Austin", "aff_domain": "utexas.edu;umich.edu;utexas.edu", "position": "PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\njiang2024stochastic,\ntitle={Stochastic Newton Proximal Extragradient Method},\nauthor={Ruichen Jiang and Michal Derezinski and Aryan Mokhtari},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=V4tzn87DtN}\n}", "github": "", "reviewers": "BRp1;m5uq;TogL;qDEj", "pdf_size": 776329, "rating": "5;6;7;7", "confidence": "4;4;3;2", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;4", "wc_summary": "36;44;73;128", "wc_strengths": "18;41;9;77", "wc_weaknesses": "215;54;116;96", "wc_questions": "2;4;286;12", "wc_limitations": "1;1;2;2", "wc_review": "272;144;486;315", "wc_reply_reviewers": "115;12;12;19", "wc_reply_authors": "342;0;0;0", "reply_reviewers": "2;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 70.25, 36.07197665778797 ], "wc_strengths_avg": [ 36.25, 26.261902063635834 ], "wc_weaknesses_avg": [ 120.25, 59.10319365313519 ], "wc_questions_avg": [ 76.0, 121.30127781684742 ], "wc_limitations_avg": [ 1.5, 0.5 ], "wc_review_avg": [ 304.25, 122.34045733117071 ], "wc_reply_reviewers_avg": [ 39.5, 43.68352092036538 ], "wc_reply_authors_avg": [ 85.5, 148.090344047139 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8181818181818182, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6234507949065564045&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "utexas.edu;umich.edu;utexas.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Texas at Austin;University of Michigan", "aff_unique_dep": ";", "aff_unique_url": "https://www.utexas.edu;https://www.umich.edu", "aff_unique_abbr": "UT Austin;UM", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Austin;Ann Arbor", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "V5Sbh42uDe", "title": "Potts Relaxations and Soft Self-labeling for Weakly-Supervised Segmentation", "track": "main", "status": "Reject", "tldr": "", "abstract": "We consider weakly supervised segmentation where only a fraction of pixels have ground truth labels (scribbles) and focus on a self-labeling approach where soft pseudo-labels on unlabeled pixels optimize some relaxation of the standard unsupervised CRF/Potts loss. While WSSS methods can directly optimize CRF losses via gradient descent, prior work suggests that higher-order optimization can lead to better network training by jointly estimating pseudo-labels,\ne.g. using discrete graph cut sub-problems. The inability of hard pseudo-labels to represent class uncertainty motivates the relaxed pseudo-labeling. We systematically evaluate standard and new CRF relaxations, neighborhood systems, and losses connecting network predictions with soft pseudo-labels.\nWe also propose a general continuous sub-problem solver for such pseudo-labels.\nSoft self-labeling loss combining the log-quadratic Potts relaxation and collision cross-entropy achieves state-of-the-art and can outperform full pixel-precise supervision on PASCAL.", "keywords": "Soft pseudo-labels; Potts model; Scribble-supervised semantic segmentation", "primary_area": "machine_vision", "supplementary_material": "", "author": "Zhongwen Zhang;Yuri Boykov", "authorids": "~Zhongwen_Zhang1;~Yuri_Boykov1", "gender": "M;M", "homepage": ";https://cs.uwaterloo.ca/~yboykov/", "dblp": "02/10655;b/YuriBoykov", "google_scholar": ";h6_PdYsAAAAJ", "orcid": ";0000-0001-6374-1736", "linkedin": ";", "or_profile": "~Zhongwen_Zhang1;~Yuri_Boykov1", "aff": "University of Waterloo;University of Waterloo", "aff_domain": "uwaterloo.ca;uwaterloo.ca", "position": "PhD student;Professor", "bibtex": "@misc{\nanonymous2024potts,\ntitle={Potts Relaxations and Soft Self-labeling for Weakly-Supervised Segmentation},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=V5Sbh42uDe}\n}", "github": "", "project": "", "reviewers": "dJES;jiv8;Pwad;QTqT", "site": "https://openreview.net/forum?id=V5Sbh42uDe", "pdf_size": 2327739, "rating": "4;5;5;6", "confidence": "5;3;3;3", "soundness": "3;2;3;3", "novelty": "1;2;3;2", "presentation": "1;2;4;3", "wc_summary": "37;50;81;84", "wc_strengths": "19;53;36;72", "wc_weaknesses": "444;224;117;184", "wc_questions": "37;115;8;77", "wc_limitations": "1;43;31;11", "wc_review": "538;485;273;428", "wc_reply_reviewers": "450;327;0;122", "wc_reply_authors": "2151;863;0;79", "reply_reviewers": "2;3;0;1", "reply_authors": "5;3;1;2", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 63.0, 20.062402647738878 ], "wc_strengths_avg": [ 45.0, 19.685019685029527 ], "wc_weaknesses_avg": [ 242.25, 122.59358670012065 ], "wc_questions_avg": [ 59.25, 40.44981458548358 ], "wc_limitations_avg": [ 21.5, 16.454482671904334 ], "wc_review_avg": [ 431.0, 99.16904759046544 ], "wc_reply_reviewers_avg": [ 224.75, 174.83045358289272 ], "wc_reply_authors_avg": [ 773.25, 864.0238350300297 ], "reply_reviewers_avg": [ 1.5, 1.118033988749895 ], "reply_authors_avg": [ 2.75, 1.479019945774904 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:JkVSNyVcQBQJ:scholar.google.com/&scioq=Potts+Relaxations+and+Soft+Self-labeling+for+Weakly-Supervised+Segmentation&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "University of Waterloo", "aff_unique_dep": "", "aff_unique_url": "https://uwaterloo.ca", "aff_unique_abbr": "UW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Canada" }, { "id": "V6891G9dWu", "title": "M2Lingual: Enhancing Multilingual, Multi-Turn Instruction Alignment in Large Language Models", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "Instruction finetuning (IFT) is critical for aligning Large Language Models (LLMs) to follow instructions. Numerous effective IFT datasets have been proposed in the recent past, but most focus on rich resourced languages such as English. In this work, we propose a diverse, task taxonomy guided, fully synthetic Multilingual, Multi-turn evoled instruction finetuning dataset, called M2Lingual, to better align LLMs on a diverse set of languages and tasks. M2Lingual contains a total of 182K IFT pairs that are built upon diverse seeds collected from Aya collection and Aya dataset covering 70 languages, 19 NLP tasks and general instruction-response pairs. LLMs finetuned with M2Lingual substantially outperform the majority of existing multilingual IFT datasets. Importantly, LLMs trained with M2Lingual consistently competitive results across wide variety of evaluation benchmarks compared to existing multilingual IFT datasets that enable LLMs performance in only one or a few subset of the benchmarks. Specifically, LLMs finetuned with M2Lingual achieve strong performance on multi-turn evaluation benchmarks such as MT-bench and across wide-variety of multilingual tasks such as XQuAD, MGSM, TyDiQA, MLQA, XNLI and XLSUM. We show efficacy of M2Lingual across LLMs with different sizes, especially smaller LLMs with 1.8B size which benefit massively from our dataset. Lastly, we present key analyses to highlight importance of each synthesis step of M2Lingual.", "keywords": "Multilingual LLM alignment;Multilingual IFT dataset;Multilingual multi-turn IFT dataset", "primary_area": "", "supplementary_material": "/attachment/b6271c6995e517741602a85cf55bd6b39ed553d9.zip", "author": "Rishabh Maheshwary;Vikas Yadav;Hoang H Nguyen;Khyati Mahajan;Sathwik Tejaswi Madhusudhan", "authorids": "~Rishabh_Maheshwary2;~Vikas_Yadav2;~Hoang_H_Nguyen1;~Khyati_Mahajan1;~Sathwik_Tejaswi_Madhusudhan2", "gender": "M;M;M;;M", "homepage": "https://rishabhmaheshwary.github.io/;;;;https://www.linkedin.com/in/sat95", "dblp": "282/0038;;200/9071.html;;", "google_scholar": "https://scholar.google.com/citations?hl=en;FyS1eswAAAAJ;https://scholar.google.com/citations?hl=en;;wwVSfRsAAAAJ", "orcid": ";;;;", "linkedin": "rishabh-maheshwary/;vyf95/;;;sat95", "or_profile": "~Rishabh_Maheshwary2;~Vikas_Yadav2;~Hoang_H_Nguyen1;~Khyati_Mahajan1;~Sathwik_Tejaswi_Madhusudhan2", "aff": "ServiceNow;ServiceNow Inc;University of Illinois at Chicago;;ServiceNow Inc", "aff_domain": "servicenow.com;servicenow.com;uic.edu;;servicenow.com", "position": "Researcher;Researcher;PhD student;;Researcher", "bibtex": "@misc{\nanonymous2024mlingual,\ntitle={M2Lingual: Enhancing Multilingual, Multi-Turn Instruction Alignment in Large Language Models},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=V6891G9dWu}\n}", "github": "", "project": "", "reviewers": "6HW3;1A52;QNWT;JJxi", "site": "https://openreview.net/forum?id=V6891G9dWu", "pdf_size": 466278, "rating": "6;6;6;7", "confidence": "4;3;3;2", "wc_summary_and_contributions": "63;52;81;101", "wc_strengths": "5;31;9;39", "wc_improvement": "5;20;90;2", "wc_limitations": "5;63;2;1", "wc_correctness": "6;1;5;1", "wc_clarity": "9;52;8;23", "wc_relation_to_prior_work": "1;1;7;27", "wc_documentation": "1;1;20;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "96;222;223;196", "wc_reply_reviewers": "24;0;17;0", "wc_reply_authors": "67;105;0;104", "reply_reviewers": "1;0;1;0", "reply_authors": "2;3;1;3", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 74.25, 18.59267328815305 ], "wc_strengths_avg": [ 21.0, 14.352700094407323 ], "wc_improvement_avg": [ 29.25, 35.73076405564258 ], "wc_limitations_avg": [ 17.75, 26.166533969939543 ], "wc_correctness_avg": [ 3.25, 2.277608394786075 ], "wc_clarity_avg": [ 23.0, 17.76231966833161 ], "wc_relation_to_prior_work_avg": [ 9.0, 10.677078252031311 ], "wc_documentation_avg": [ 5.75, 8.227241335952167 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 184.25, 52.08826643304613 ], "wc_reply_reviewers_avg": [ 10.25, 10.54454835448157 ], "wc_reply_authors_avg": [ 69.0, 42.6790346657466 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4806860539360061831&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "ServiceNow;University of Illinois at Chicago", "aff_unique_dep": ";", "aff_unique_url": "https://www.servicenow.com;https://www.uic.edu", "aff_unique_abbr": "ServiceNow;UIC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Chicago", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "CodeRosetta: Pushing the Boundaries of Unsupervised Code Translation for Parallel Programming", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94924", "id": "V6hrg4O9gg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=V6hrg4O9gg", "openreview": "https://openreview.net/forum?id=V6hrg4O9gg", "poster": "/media/PosterPDFs/NeurIPS%202024/94924.png?t=1733412026.7495923", "project": "", "author_site": "Ali Tehrani, Arijit Bhattacharjee, Le Chen, Nesreen K. Ahmed, Amir Yazdanbakhsh, Ali Jannesari", "tldr": "", "abstract": "Automatic translation of programming languages has garnered renewed interest, driven by recent advancements in large language models (LLMs). Encoder-decoder transformer models, in particular, have shown promise in translating between different programming languages. However, translating between a language and its high-performance computing (HPC) extension remains underexplored due to inherent challenges like complex parallel semantics understanding. In this paper, we introduce CodeRosetta, an encoder-decoder transformer model explicitly designed for translating between programming languages and also their HPC extensions. CodeRosetta is evaluated on C++ to CUDA and Fortran to C++ translation.\nIt employs a customized learning-based framework with tailored pretraining and training objectives that enable it to effectively capture code semantics and parallel structural nuances, allowing for bidirectional code translation. Our results show that CodeRosetta outperforms state-of-the-art baselines in C++ to CUDA translation by 2.9 BLEU and 1.72 CodeBLUE points while improving compilation accuracy by 6.05%. Compared to general closed-source LLMs, our proposed bidirectional learning-based method improves C++ to CUDA translation by 22.08 BLEU and 14.39 CodeBLUE with 2.75% higher compilation accuracy.\nFinally, CodeRosetta exhibits proficiency in Fortran to parallel C++ translation, marking it, to our knowledge, as the first encoder-decoder model for such a complex translation task, improving CodeBLEU at least by 4.63 points compared to closed-source LLMs and Open Code LLM.", "keywords": "unsupervised learning;code generation;HPC code generation;program translation;HPC code translation", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "/attachment/d1f01ece881c7f59a98eb6c6ea2a136421d3104e.zip", "author": "Ali TehraniJamsaz;Arijit Bhattacharjee;Le Chen;Nesreen K. Ahmed;Amir Yazdanbakhsh;Ali Jannesari", "authorids": "~Ali_TehraniJamsaz1;~Arijit_Bhattacharjee1;~Le_Chen2;~Nesreen_K._Ahmed2;~Amir_Yazdanbakhsh1;~Ali_Jannesari1", "gender": "M;M;M;M;M;F", "homepage": "http://www.tehrani.xyz;;https://www.cs.iastate.edu/;https://www.ayazdan.com/;https://www.cs.iastate.edu/swapp/;http://nesreenahmed.com", "dblp": ";;;44/8745;74/1277;33/11518", "google_scholar": "XZ1al70AAAAJ;QsK89ZoAAAAJ;D7bxqR4AAAAJ;Vdu_sqwAAAAJ;https://scholar.google.de/citations?user=YhWnhQEAAAAJ;AFV0nLcAAAAJ", "orcid": "0009-0001-3678-5730;;0000-0002-7188-6756;0000-0001-8199-7671;0000-0001-8672-5317;", "linkedin": "tehranixyz/;;;ayazdanb/;ali-jannesari-6ab8a56b/;nkahmed/", "or_profile": "~Ali_TehraniJamsaz1;~Arijit_Bhattacharjee1;~Le_Chen2;~Amir_Yazdanbakhsh1;~Ali_Jannesari1;~Nesreen_Ahmed1", "aff": "Iowa State University;Iowa State University;Iowa State University;Google DeepMind;Iowa State University;Intel AI Research", "aff_domain": "iastate.edu;iastate.edu;iastate.edu;google.com;isu.edu;intel.com", "position": "PhD student;PhD student;PhD student;Researcher;Associate Professor;Principal Researcher", "bibtex": "@inproceedings{\ntehranijamsaz2024coderosetta,\ntitle={CodeRosetta: Pushing the Boundaries of Unsupervised Code Translation for Parallel Programming},\nauthor={Ali TehraniJamsaz and Arijit Bhattacharjee and Le Chen and Nesreen K. Ahmed and Amir Yazdanbakhsh and Ali Jannesari},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=V6hrg4O9gg}\n}", "github": "", "reviewers": "W1wi;djHa;BqwA;9e24", "pdf_size": 598894, "rating": "5;5;6;7", "confidence": "4;4;4;3", "soundness": "3;2;2;3", "novelty": "3;2;2;4", "presentation": "3;2;2;3", "wc_summary": "98;59;38;56", "wc_strengths": "58;60;66;132", "wc_weaknesses": "55;234;197;215", "wc_questions": "139;72;54;111", "wc_limitations": "9;11;7;7", "wc_review": "359;436;362;521", "wc_reply_reviewers": "0;26;297;57", "wc_reply_authors": "0;115;1296;80", "reply_reviewers": "0;1;2;1", "reply_authors": "1;2;4;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 62.75, 21.878928218722233 ], "wc_strengths_avg": [ 79.0, 30.740852297878796 ], "wc_weaknesses_avg": [ 175.25, 70.64833685232796 ], "wc_questions_avg": [ 94.0, 33.15870926317851 ], "wc_limitations_avg": [ 8.5, 1.6583123951777 ], "wc_review_avg": [ 419.5, 66.22122016393234 ], "wc_reply_reviewers_avg": [ 95.0, 118.35750926747318 ], "wc_reply_authors_avg": [ 372.75, 534.6659588004458 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17350919447877562576&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "iastate.edu;iastate.edu;iastate.edu;google.com;isu.edu;intel.com", "author_num": 6, "aff_unique_index": "0;0;0;1;0;2", "aff_unique_norm": "Iowa State University;Google;Intel", "aff_unique_dep": ";Google DeepMind;Intel AI Research", "aff_unique_url": "https://www.iastate.edu;https://deepmind.com;https://www.intel.com/research", "aff_unique_abbr": "ISU;DeepMind;Intel AI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Continual Counting with Gradual Privacy Expiration", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94923", "id": "V6qdb1AgsM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=V6qdb1AgsM", "openreview": "https://openreview.net/forum?id=V6qdb1AgsM", "poster": "/media/PosterPDFs/NeurIPS%202024/94923.png?t=1731489974.2957397", "project": "", "author_site": "Joel Daniel Andersson, Monika Henzinger, Rasmus Pagh, Teresa Anna Steiner, Jalaj Upadhyay", "tldr": "", "abstract": "Differential privacy with gradual expiration models the setting where data items arrive in a stream and at a given time $t$ the privacy loss guaranteed for a data item seen at time $(t-d)$ is $\\epsilon g(d)$, where $g$ is a monotonically non-decreasing function. We study the fundamental *continual (binary) counting* problem where each data item consists of a bit and the algorithm needs to output at each time step the sum of all the bits streamed so far. For a stream of length $T$ and privacy *without* expiration continual counting is possible with maximum (over all time steps) additive error $O(\\log^2(T)/\\varepsilon)$ and the best known lower bound is $\\Omega(\\log(T)/\\varepsilon)$; closing this gap is a challenging open problem. \n\nWe show that the situation is very different for privacy with gradual expiration by giving upper and lower bounds for a large set of expiration functions $g$. Specifically, our algorithm achieves an additive error of $O(\\log(T)/\\epsilon)$ for a large set of privacy expiration functions. We also give a lower bound that shows that if $C$ is the additive error of any $\\epsilon$-DP algorithm for this problem, then the product of $C$ and the privacy expiration function after $2C$ steps must be $\\Omega(\\log(T)/\\epsilon)$. Our algorithm matches this lower bound as its additive error is $O(\\log(T)/\\epsilon)$, even when $g(2C) = O(1)$.\n\nOur empirical evaluation shows that we achieve a slowly growing privacy loss that has significantly smaller empirical privacy loss for large values of $d$ than a natural baseline algorithm.", "keywords": "differential privacy;continual observation;privacy expiration", "primary_area": "privacy", "supplementary_material": "/attachment/6be74431a065600a90a1e42503c06570e170755f.zip", "author": "Joel Daniel Andersson;Monika Henzinger;Rasmus Pagh;Teresa Anna Steiner;Jalaj Upadhyay", "authorids": "~Joel_Daniel_Andersson1;~Monika_Henzinger1;~Rasmus_Pagh1;~Teresa_Anna_Steiner1;~Jalaj_Upadhyay1", "gender": ";;M;F;M", "homepage": "https://jdandersson.net/;;https://www.rasmuspagh.net;http://people.compute.dtu.dk/terst/;https://sites.google.com/view/jalajupadhyay", "dblp": "349/7870;;p/RasmusPagh;235/2633;https://dblp.uni-trier.de/pers/u/Upadhyay:Jalaj.html", "google_scholar": "YIN6oFUAAAAJ;NXbggxYAAAAJ;https://scholar.google.com.tw/citations?user=VO4oS8UAAAAJ;;vHTMzPQAAAAJ", "orcid": "0000-0003-2530-0520;;0000-0002-1516-9306;0000-0003-1078-4075;", "linkedin": ";;;;", "or_profile": "~Joel_Daniel_Andersson1;~Monika_Henzinger1;~Rasmus_Pagh1;~Teresa_Anna_Steiner1;~Jalaj_Kumar_Upadhyay1", "aff": "University of Copenhagen;Institute of Science and Technology;University of Copenhagen;Technical University of Denmark;Rutgers University", "aff_domain": "ku.dk;ist.ac.at;ku.dk;dtu.dk;rutgers.edu", "position": "PhD student;Full Professor;Full Professor;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nandersson2024continual,\ntitle={Continual Counting with Gradual Privacy Expiration},\nauthor={Joel Daniel Andersson and Monika Henzinger and Rasmus Pagh and Teresa Anna Steiner and Jalaj Upadhyay},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=V6qdb1AgsM}\n}", "github": "", "reviewers": "L11a;V4mB;RM2S;nwES;naYA", "pdf_size": 1402193, "rating": "6;6;6;6;7", "confidence": "3;2;4;4;4", "soundness": "3;3;4;3;3", "novelty": "2;3;3;3;3", "presentation": "3;2;3;3;3", "wc_summary": "116;48;295;58;98", "wc_strengths": "42;53;60;65;24", "wc_weaknesses": "103;55;50;25;75", "wc_questions": "142;40;143;16;234", "wc_limitations": "35;4;1;8;1", "wc_review": "438;200;549;172;432", "wc_reply_reviewers": "5;2;13;0;4", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;0;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.2, 0.39999999999999997 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 123.0, 89.56338537594479 ], "wc_strengths_avg": [ 48.8, 14.606847709208171 ], "wc_weaknesses_avg": [ 61.6, 26.11972434770321 ], "wc_questions_avg": [ 115.0, 78.86697661252141 ], "wc_limitations_avg": [ 9.8, 12.859237924542807 ], "wc_review_avg": [ 358.2, 146.9127632304287 ], "wc_reply_reviewers_avg": [ 4.8, 4.445222154178573 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.37500000000000006, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15300374334321649950&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "ku.dk;ist.ac.at;ku.dk;dtu.dk;rutgers.edu", "author_num": 5, "aff_unique_index": "0;1;0;2;3", "aff_unique_norm": "University of Copenhagen;Institute of Science and Technology;Technical University of Denmark;Rutgers University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.ku.dk;;https://www.tek.dk;https://www.rutgers.edu", "aff_unique_abbr": "UCPH;;DTU;Rutgers", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;2", "aff_country_unique": "Denmark;;United States" }, { "title": "EMVP: Embracing Visual Foundation Model for Visual Place Recognition with Centroid-Free Probing", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94922", "id": "V6w7keoTqn", "proceeding": "", "pdf": "https://openreview.net/pdf?id=V6w7keoTqn", "openreview": "https://openreview.net/forum?id=V6w7keoTqn", "poster": "/media/PosterPDFs/NeurIPS%202024/94922.png?t=1730380056.0567963", "project": "", "author_site": "Qibo Qiu, Shun Zhang, Haiming Gao, Honghui Yang, Haochao Ying, Wenxiao Wang, Xiaofei He", "tldr": "", "abstract": "Visual Place Recognition (VPR) is essential for mobile robots as it enables them to retrieve images from a database closest to their current location. The progress of Visual Foundation Models (VFMs) has significantly advanced VPR by capturing representative descriptors in images. However, existing fine-tuning efforts for VFMs often overlook the crucial role of probing in effectively adapting these descriptors for improved image representation. In this paper, we propose the Centroid-Free Probing (CFP) stage, making novel use of second-order features for more effective use of descriptors from VFMs. Moreover, to control the preservation of task-specific information adaptively based on the context of the VPR, we introduce the Dynamic Power Normalization (DPN) module in both the recalibration and CFP stages, forming a novel Parameter Efficiency Fine-Tuning (PEFT) pipeline (EMVP) tailored for the VPR task. Extensive experiments demonstrate the superiority of the proposed CFP over existing probing methods. Moreover, the EMVP pipeline can further enhance fine-tuning performance in terms of accuracy and efficiency. Specifically, it achieves 93.9\\%, 96.5\\%, and 94.6\\% Recall@1 on the MSLS Validation, Pitts250k-test, and SPED datasets, respectively, while saving 64.3\\% of trainable parameters compared with the existing SOTA PEFT method.", "keywords": "Visual Foundation Model;Visual Place Recognition;Parameter Efficiency Fine-Tuning", "primary_area": "robotics", "supplementary_material": "", "author": "Qibo Qiu;Shun Zhang;Haiming Gao;Honghui Yang;Haochao Ying;Wenxiao Wang;Xiaofei He", "authorids": "~Qibo_Qiu1;~Shun_Zhang8;~Haiming_Gao1;~Honghui_Yang1;~Haochao_Ying2;~Wenxiao_Wang2;~Xiaofei_He2", "gender": ";M;M;;M;M;M", "homepage": ";;https://ghm0819.github.io/;;https://person.zju.edu.cn/en/haochaoying;https://wenxiaowang.com;https://person.zju.edu.cn/0007101", "dblp": ";;;;156/4605;243/5853-1;h/XiaofeiHe.html", "google_scholar": ";;https://scholar.google.com.hk/citations?user=Vz_6DDwAAAAJ;;_8-a398AAAAJ;https://scholar.google.com.hk/citations?user=rcxOjikAAAAJ;QLLFowsAAAAJ", "orcid": ";0009-0003-6655-0814;;;;;0009-0001-9107-2354", "linkedin": ";;;;;;", "or_profile": "~Qibo_Qiu1;~Shun_Zhang8;~Haiming_Gao1;~Honghui_Yang1;~Haochao_Ying2;~Wenxiao_Wang2;~Xiaofei_He2", "aff": ";Zhejiang Lab;Zhejiang Lab;;Zhejiang University;Zhejiang University;Zhejiang University", "aff_domain": ";zhejianglab.com;zhejianglab.org;;zju.edu.cn;zju.edu.cn;zju.edu.cn", "position": ";Lecturer;Researcher;;Associate Professor;Assistant Professor;Professor", "bibtex": "@inproceedings{\nqiu2024emvp,\ntitle={{EMVP}: Embracing Visual Foundation Model for Visual Place Recognition with Centroid-Free Probing},\nauthor={Qibo Qiu and Shun Zhang and Haiming Gao and Honghui Yang and Haochao Ying and Wenxiao Wang and Xiaofei He},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=V6w7keoTqn}\n}", "github": "", "reviewers": "cxjU;M9Bz;UsaF;psQJ", "pdf_size": 10642626, "rating": "5;5;7;8", "confidence": "5;5;5;5", "soundness": "4;2;4;4", "novelty": "3;3;4;3", "presentation": "3;2;3;4", "wc_summary": "108;85;73;55", "wc_strengths": "84;23;106;60", "wc_weaknesses": "136;409;21;104", "wc_questions": "74;2;72;18", "wc_limitations": "17;2;35;14", "wc_review": "419;521;307;251", "wc_reply_reviewers": "0;352;29;43", "wc_reply_authors": "0;363;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 5.0, 0.0 ], "soundness_avg": [ 3.5, 0.8660254037844386 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 80.25, 19.253246479490155 ], "wc_strengths_avg": [ 68.25, 30.776411421736615 ], "wc_weaknesses_avg": [ 167.5, 145.60992411233514 ], "wc_questions_avg": [ 41.5, 32.01171660501823 ], "wc_limitations_avg": [ 17.0, 11.811011811017716 ], "wc_review_avg": [ 374.5, 103.98437382607062 ], "wc_reply_reviewers_avg": [ 106.0, 142.8723206222955 ], "wc_reply_authors_avg": [ 90.75, 157.18361078687562 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:VN7P5xIiLpYJ:scholar.google.com/&scioq=EMVP:+Embracing+Visual+Foundation+Model+for+Visual+Place+Recognition+with+Centroid-Free+Probing&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": ";zhejianglab.com;zhejianglab.org;;zju.edu.cn;zju.edu.cn;zju.edu.cn", "author_num": 7, "aff_unique_index": "0;0;1;1;1", "aff_unique_norm": "Zhejiang Lab;Zhejiang University", "aff_unique_dep": ";", "aff_unique_url": "http://www.zhejianglab.com;https://www.zju.edu.cn", "aff_unique_abbr": ";ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Gradient-Variation Online Learning under Generalized Smoothness", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94921", "id": "V75gAxpW40", "proceeding": "", "pdf": "https://openreview.net/pdf?id=V75gAxpW40", "openreview": "https://openreview.net/forum?id=V75gAxpW40", "poster": "/media/PosterPDFs/NeurIPS%202024/94921.png?t=1733555007.6425068", "project": "", "author_site": "Yan-Feng Xie, Peng Zhao, Zhi-Hua Zhou", "tldr": "", "abstract": "Gradient-variation online learning aims to achieve regret guarantees that scale with variations in the gradients of online functions, which is crucial for attaining fast convergence in games and robustness in stochastic optimization, hence receiving increased attention. Existing results often require the smoothness condition by imposing a fixed bound on gradient Lipschitzness, which may be unrealistic in practice. Recent efforts in neural network optimization suggest a generalized smoothness condition, allowing smoothness to correlate with gradient norms. In this paper, we systematically study gradient-variation online learning under generalized smoothness. We extend the classic optimistic mirror descent algorithm to derive gradient-variation regret by analyzing stability over the optimization trajectory and exploiting smoothness locally. Then, we explore universal online learning, designing a single algorithm with the optimal gradient-variation regrets for convex and strongly convex functions simultaneously, without requiring prior knowledge of curvature. This algorithm adopts a two-layer structure with a meta-algorithm running over a group of base-learners. To ensure favorable guarantees, we design a new Lipschitz-adaptive meta-algorithm, capable of handling potentially unbounded gradients while ensuring a second-order bound to effectively ensemble the base-learners. Finally, we provide the applications for fast-rate convergence in games and stochastic extended adversarial optimization.", "keywords": "online learning;generalized smoothness;gradient-variation regret bound;Lipschitz-adaptive algorithm", "primary_area": "online_learning", "supplementary_material": "", "author": "Yan-Feng Xie;Peng Zhao;Zhi-Hua Zhou", "authorids": "~Yan-Feng_Xie1;~Peng_Zhao1;~Zhi-Hua_Zhou2", "gender": "M;;", "homepage": "http://www.lamda.nju.edu.cn/xieyf/;;", "dblp": "346/1078;;", "google_scholar": ";;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Yan-Feng_Xie1;~Peng_Zhao1;~Zhi-Hua_Zhou2", "aff": "Nanjing University;;", "aff_domain": "nju.edu.cn;;", "position": "PhD student;;", "bibtex": "@inproceedings{\nxie2024gradientvariation,\ntitle={Gradient-Variation Online Learning under Generalized Smoothness},\nauthor={Yan-Feng Xie and Peng Zhao and Zhi-Hua Zhou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=V75gAxpW40}\n}", "github": "", "reviewers": "NEvk;ian3;YPdL;VaQR", "pdf_size": 524138, "rating": "6;6;7;7", "confidence": "3;2;3;4", "soundness": "3;3;4;3", "novelty": "3;3;3;3", "presentation": "2;2;3;3", "wc_summary": "270;40;86;50", "wc_strengths": "175;21;20;176", "wc_weaknesses": "212;56;85;156", "wc_questions": "124;88;44;3", "wc_limitations": "6;1;47;1", "wc_review": "787;206;282;386", "wc_reply_reviewers": "112;12;51;87", "wc_reply_authors": "28;0;15;29", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;2;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 111.5, 93.09538119584666 ], "wc_strengths_avg": [ 98.0, 77.50161288644256 ], "wc_weaknesses_avg": [ 127.25, 60.972842315247206 ], "wc_questions_avg": [ 64.75, 45.53775905773142 ], "wc_limitations_avg": [ 13.75, 19.30511590226798 ], "wc_review_avg": [ 415.25, 223.9390262995711 ], "wc_reply_reviewers_avg": [ 65.5, 37.73923687622737 ], "wc_reply_authors_avg": [ 18.0, 11.76860229593982 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.7071067811865475, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13781739916929071188&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 6, "email": "nju.edu.cn;;", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "Nanjing University", "aff_unique_dep": "", "aff_unique_url": "https://www.nju.edu.cn", "aff_unique_abbr": "Nanjing U", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "title": "Outlier-Robust Distributionally Robust Optimization via Unbalanced Optimal Transport", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94920", "id": "V8HVsyTSu6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=V8HVsyTSu6", "openreview": "https://openreview.net/forum?id=V8HVsyTSu6", "poster": "/media/PosterPDFs/NeurIPS%202024/94920.png?t=1731236134.47814", "project": "", "author_site": "Zifan Wang, Yi Shen, Michael Zavlanos, Karl H. Johansson", "tldr": "", "abstract": "Distributionally Robust Optimization (DRO) accounts for uncertainty in data distributions by optimizing the model performance against the worst possible distribution within an ambiguity set. In this paper, we propose a DRO framework that relies on a new distance inspired by Unbalanced Optimal Transport (UOT). The proposed UOT distance employs a soft penalization term instead of hard constraints, enabling the construction of an ambiguity set that is more resilient to outliers. Under smoothness conditions, we establish strong duality of the proposed DRO problem. Moreover, we introduce a computationally efficient Lagrangian penalty formulation for which we show that strong duality also holds. Finally, we provide empirical results that demonstrate that our method offers improved robustness to outliers and is computationally less demanding for regression and classification tasks.", "keywords": "Distributionally robust optimization; unbalanced optimal transport; outlier", "primary_area": "optimization", "supplementary_material": "", "author": "Zifan Wang;Yi Shen;Michael M. Zavlanos;Karl Henrik Johansson", "authorids": "~Zifan_Wang2;~Yi_Shen3;~Michael_M._Zavlanos2;~Karl_Henrik_Johansson1", "gender": "M;;;", "homepage": "https://www.kth.se/profile/zifanw/;;;https://people.kth.se/~kallej/", "dblp": "35/10147-2;;;", "google_scholar": "LV-DrIUAAAAJ;YYOyml4AAAAJ;;https://scholar.google.com/citations?hl=en", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Zifan_Wang2;~Yi_Shen3;~Michael_M._Zavlanos2;~Karl_Henrik_Johansson1", "aff": "KTH Royal Institute of Technology;Duke University;;KTH Royal Institute of Technology", "aff_domain": "kth.se;duke.edu;;kth.se", "position": "PhD student;PhD student;;Full Professor", "bibtex": "@inproceedings{\nwang2024outlierrobust,\ntitle={Outlier-Robust Distributionally Robust Optimization via Unbalanced Optimal Transport},\nauthor={Zifan Wang and Yi Shen and Michael M. Zavlanos and Karl Henrik Johansson},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=V8HVsyTSu6}\n}", "github": "", "reviewers": "QxeQ;KUtP;8Mev", "pdf_size": 984349, "rating": "5;6;7", "confidence": "2;3;5", "soundness": "2;2;3", "novelty": "2;2;3", "presentation": "3;3;3", "wc_summary": "62;76;37", "wc_strengths": "60;159;63", "wc_weaknesses": "378;100;332", "wc_questions": "186;179;291", "wc_limitations": "6;21;1", "wc_review": "692;535;724", "wc_reply_reviewers": "14;13;17", "wc_reply_authors": "0;8;13", "reply_reviewers": "1;1;1", "reply_authors": "1;2;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.3333333333333335, 1.247219128924647 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 58.333333333333336, 16.131404843417148 ], "wc_strengths_avg": [ 94.0, 45.9782557302906 ], "wc_weaknesses_avg": [ 270.0, 121.66621004480524 ], "wc_questions_avg": [ 218.66666666666666, 51.227162933567016 ], "wc_limitations_avg": [ 9.333333333333334, 8.498365855987974 ], "wc_review_avg": [ 650.3333333333334, 82.59270885226827 ], "wc_reply_reviewers_avg": [ 14.666666666666666, 1.699673171197595 ], "wc_reply_authors_avg": [ 7.0, 5.354126134736337 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.9819805060619659, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:8qAqA4VMabcJ:scholar.google.com/&scioq=Outlier-Robust+Distributionally+Robust+Optimization+via+Unbalanced+Optimal+Transport&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "kth.se;duke.edu;;kth.se", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "KTH Royal Institute of Technology;Duke University", "aff_unique_dep": ";", "aff_unique_url": "https://www.kth.se;https://www.duke.edu", "aff_unique_abbr": "KTH;Duke", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Sweden;United States" }, { "id": "VD3lb3XWqa", "title": "A Turing Test for Self-Awareness", "track": "main", "status": "Reject", "tldr": "", "abstract": "I propose a test for machine self-awareness inspired by the Turing test. My test is simple, and it provides an objective, empirical metric to rectify the ungrounded speculation surging through industry, academia, and social media. Drawing from a breadth of philosophical literature, I argue the test captures the essence of self-awareness, rather than some postulated correlate or ancillary quality. To begin, the concept of self-awareness is clearly demarcated from related concepts like consciousness, agency, and free will. Next, I propose a model called the $\\textit{Nesting Doll of Self-Awareness}$ and discuss its relevance for intelligent beings. Then, the test is presented in its full generality, applicable to any machine system. I show how to apply the test to Large Language Models and conduct experiments on popular open and closed source LLMs, obtaining reproducible results that suggest a lack of self-awareness. The implications of machine self-awareness are discussed in relation to questions about meaning and true understanding. Finally, some next steps are outlined for studying self-awareness in machines.", "keywords": "Self-Awareness;Self-Consciousness;Test;Metric;Turing;LLM;Meaning;Understanding", "primary_area": "evaluation", "supplementary_material": "/attachment/e7ee7eb772856223ae5e4fe485e07674d7deeb0c.zip", "author": "Cameron Witkowski", "authorids": "~Cameron_Witkowski1", "gender": "M", "homepage": "http://cameronwitkowski.com", "dblp": "", "google_scholar": "", "orcid": "", "linkedin": "cameron-witkowski/", "or_profile": "~Cameron_Witkowski1", "aff": "University of Toronto", "aff_domain": "utoronto.ca", "position": "MS student", "bibtex": "@misc{\nanonymous2024a,\ntitle={A Turing Test for Self-Awareness},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=VD3lb3XWqa}\n}", "github": "", "project": "", "reviewers": "deXV;rLt4;Mykv;z1UP", "site": "https://openreview.net/forum?id=VD3lb3XWqa", "pdf_size": 1244776, "rating": "3;3;3;4", "confidence": "3;3;5;4", "soundness": "2;2;1;2", "novelty": "2;2;2;2", "presentation": "3;2;1;4", "wc_summary": "87;135;48;180", "wc_strengths": "90;14;31;162", "wc_weaknesses": "107;195;205;678", "wc_questions": "600;45;19;173", "wc_limitations": "58;4;16;9", "wc_review": "942;393;319;1202", "wc_reply_reviewers": "383;171;151;269", "wc_reply_authors": "335;277;151;540", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 3.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 1.75, 0.4330127018922193 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 112.5, 49.681485485037584 ], "wc_strengths_avg": [ 74.25, 57.984372894772264 ], "wc_weaknesses_avg": [ 296.25, 223.6776419314188 ], "wc_questions_avg": [ 209.25, 233.00898587822746 ], "wc_limitations_avg": [ 21.75, 21.358546298847212 ], "wc_review_avg": [ 714.0, 370.53812219527424 ], "wc_reply_reviewers_avg": [ 243.5, 92.09098761550992 ], "wc_reply_authors_avg": [ 325.75, 140.44816659536713 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:cJnGBEubiX8J:scholar.google.com/&scioq=A+Turing+Test+for+Self-Awareness&hl=en&as_sdt=0,33", "gs_version_total": 7, "aff_unique_index": "0", "aff_unique_norm": "University of Toronto", "aff_unique_dep": "", "aff_unique_url": "https://www.utoronto.ca", "aff_unique_abbr": "U of T", "aff_country_unique_index": "0", "aff_country_unique": "Canada" }, { "title": "PRODuctive bandits: Importance Weighting No More", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94919", "id": "VDPZe0NbpE", "proceeding": "", "pdf": "https://openreview.net/pdf?id=VDPZe0NbpE", "openreview": "https://openreview.net/forum?id=VDPZe0NbpE", "poster": "", "project": "", "author_site": "Julian Zimmert, Teodor Vanislavov Marinov", "tldr": "", "abstract": "Prod is a seminal algorithm in full-information online learning, which has been conjectured to be fundamentally sub-optimal for multi-armed bandits.\nBy leveraging the interpretation of Prod as a first-order OMD approximation, we present the following surprising results:\n1. Variants of Prod can obtain optimal regret for adversarial multi-armed bandits. 2. There exists a simple and (arguably) importance-weighting free variant with optimal rate. \n3. One can even achieve best-both-worlds guarantees with logarithmic regret in the stochastic regime.\n\nThe bandit algorithms in this work use simple arithmetic update rules without the need of solving optimization problems typical in prior work. Finally, the results directly improve the state of the art of incentive-compatible bandits.", "keywords": "Bandit Algorithms;Online Learning", "primary_area": "bandits", "supplementary_material": "", "author": "Julian Zimmert;Teodor Vanislavov Marinov", "authorids": "~Julian_Zimmert1;~Teodor_Vanislavov_Marinov2", "gender": ";M", "homepage": ";", "dblp": "190/7636;182/8930", "google_scholar": ";https://scholar.google.com/citations?hl=en", "orcid": ";", "linkedin": ";", "or_profile": "~Julian_Zimmert1;~Teodor_Vanislavov_Marinov2", "aff": "Google;Google", "aff_domain": "google.com;google.com", "position": "Postdoc;Researcher", "bibtex": "@inproceedings{\nzimmert2024productive,\ntitle={{PROD}uctive bandits: Importance Weighting No More},\nauthor={Julian Zimmert and Teodor Vanislavov Marinov},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=VDPZe0NbpE}\n}", "github": "", "reviewers": "zjT8;Noj5;1VVH;r5go;rzG8", "pdf_size": 358710, "rating": "5;5;7;7;7", "confidence": "3;4;4;3;3", "soundness": "2;3;4;3;3", "novelty": "2;2;4;4;4", "presentation": "2;3;3;2;3", "wc_summary": "76;128;138;123;100", "wc_strengths": "108;60;78;68;105", "wc_weaknesses": "227;93;122;82;60", "wc_questions": "15;21;87;39;232", "wc_limitations": "41;55;4;15;5", "wc_review": "467;357;429;327;502", "wc_reply_reviewers": "51;10;13;0;14", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;0;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.2, 0.9797958971132712 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 3.2, 0.9797958971132712 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 113.0, 22.3069495897579 ], "wc_strengths_avg": [ 83.8, 19.415457759218555 ], "wc_weaknesses_avg": [ 116.8, 58.60853180211905 ], "wc_questions_avg": [ 78.8, 80.66325061637424 ], "wc_limitations_avg": [ 24.0, 20.454828280872952 ], "wc_review_avg": [ 416.4, 65.67678433053798 ], "wc_reply_reviewers_avg": [ 17.6, 17.41952927033334 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.16666666666666663, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:A3SoXDMxsjoJ:scholar.google.com/&scioq=PRODuctive+bandits:+Importance+Weighting+No+More&hl=en&as_sdt=0,33", "gs_version_total": 2, "email": "google.com;google.com", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Rethinking Inverse Reinforcement Learning: from Data Alignment to Task Alignment", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94918", "id": "VFRyS7Wx08", "proceeding": "", "pdf": "https://openreview.net/pdf?id=VFRyS7Wx08", "openreview": "https://openreview.net/forum?id=VFRyS7Wx08", "poster": "/media/PosterPDFs/NeurIPS%202024/94918.png?t=1733775006.992438", "project": "", "author_site": "Weichao Zhou, Wenchao Li", "tldr": "", "abstract": "Many imitation learning (IL) algorithms use inverse reinforcement learning (IRL) to infer a reward function that aligns with the demonstration.\nHowever, the inferred reward functions often fail to capture the underlying task objectives.\nIn this paper, we propose a novel framework for IRL-based IL that prioritizes task alignment over conventional data alignment. Our framework is a semi-supervised approach that leverages expert demonstrations as weak supervision to derive a set of candidate reward functions that align with the task rather than only with the data. It then adopts an adversarial mechanism to train a policy with this set of reward functions to gain a collective validation of the policy's ability to accomplish the task. We provide theoretical insights into this framework's ability to mitigate task-reward misalignment and present a practical implementation. Our experimental results show that our framework outperforms conventional IL baselines in complex and transfer learning scenarios.", "keywords": "inverse reinforcement learning;imitation learning;reinforcement learning", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/f6b21181b21688bce2169ee3b93e9f58d3a229a9.zip", "author": "Weichao Zhou;Wenchao Li", "authorids": "~Weichao_Zhou1;~Wenchao_Li1", "gender": "M;", "homepage": "https://sites.google.com/view/zwc662/;http://sites.bu.edu/depend/", "dblp": "207/8077;23/5721-1", "google_scholar": "JdiJIF0AAAAJ;zwA5eokAAAAJ", "orcid": "0009-0002-0369-2113;", "linkedin": ";", "or_profile": "~Weichao_Zhou1;~Wenchao_Li1", "aff": "Boston University;Boston University", "aff_domain": "bu.edu;bu.edu", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nzhou2024rethinking,\ntitle={Rethinking Inverse Reinforcement Learning: from Data Alignment to Task Alignment},\nauthor={Weichao Zhou and Wenchao Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=VFRyS7Wx08}\n}", "github": "", "reviewers": "fY7t;QeNS;yv3o;MkJQ", "pdf_size": 2433597, "rating": "5;5;6;7", "confidence": "3;4;3;2", "soundness": "3;2;3;3", "novelty": "3;2;3;3", "presentation": "2;2;3;3", "wc_summary": "53;47;89;108", "wc_strengths": "45;36;32;44", "wc_weaknesses": "144;74;69;156", "wc_questions": "3;68;73;2", "wc_limitations": "11;373;45;49", "wc_review": "256;598;308;359", "wc_reply_reviewers": "17;194;9;90", "wc_reply_authors": "482;339;36;187", "reply_reviewers": "1;1;1;1", "reply_authors": "4;3;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 74.25, 25.252475126212875 ], "wc_strengths_avg": [ 39.25, 5.448623679425842 ], "wc_weaknesses_avg": [ 110.75, 39.51819201329939 ], "wc_questions_avg": [ 36.5, 34.04776057246644 ], "wc_limitations_avg": [ 119.5, 147.1011556718709 ], "wc_review_avg": [ 380.25, 130.88616237020628 ], "wc_reply_reviewers_avg": [ 77.5, 74.2983849084218 ], "wc_reply_authors_avg": [ 261.0, 166.60282110456595 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.8528028654224417, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:WUJvwoFJwwsJ:scholar.google.com/&scioq=Rethinking+Inverse+Reinforcement+Learning:+from+Data+Alignment+to+Task+Alignment&hl=en&as_sdt=0,44", "gs_version_total": 5, "email": "bu.edu;bu.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Boston University", "aff_unique_dep": "", "aff_unique_url": "https://www.bu.edu", "aff_unique_abbr": "BU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Slight Corruption in Pre-training Data Makes Better Diffusion Models", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94917", "id": "VFpXYBqMSU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=VFpXYBqMSU", "openreview": "https://openreview.net/forum?id=VFpXYBqMSU", "poster": "/media/PosterPDFs/NeurIPS%202024/94917.png?t=1731700905.0503867", "project": "", "author_site": "Hao Chen, Yujin Han, Diganta Misra, Xiang Li, Kai Hu, Difan Zou, Masashi Sugiyama, Jindong Wang, Bhiksha Raj", "tldr": "", "abstract": "Diffusion models (DMs) have shown remarkable capabilities in generating realistic high-quality images, audios, and videos. \nThey benefit significantly from extensive pre-training on large-scale datasets, including web-crawled data with paired data and conditions, such as image-text and image-class pairs.\nDespite rigorous filtering, these pre-training datasets often inevitably contain corrupted pairs where conditions do not accurately describe the data. \nThis paper presents the first comprehensive study on the impact of such corruption in pre-training data of DMs.\nWe synthetically corrupt ImageNet-1K and CC3M to pre-train and evaluate over $50$ conditional DMs. \nOur empirical findings reveal that various types of slight corruption in pre-training can significantly enhance the quality, diversity, and fidelity of the generated images across different DMs, both during pre-training and downstream adaptation stages. \nTheoretically, we consider a Gaussian mixture model and prove that slight corruption in the condition leads to higher entropy and a reduced 2-Wasserstein distance to the ground truth of the data distribution generated by the corruptly trained DMs.\nInspired by our analysis, we propose a simple method to improve the training of DMs on practical datasets by adding condition embedding perturbations (CEP).\nCEP significantly improves the performance of various DMs in both pre-training and downstream tasks.\nWe hope that our study provides new insights into understanding the data and pre-training processes of DMs.", "keywords": "pre-training noise;diffusion models;latent diffusion models;diffusion transformers;latent consistency models", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Hao Chen;Yujin Han;Diganta Misra;Xiang Li;Kai Hu;Difan Zou;Masashi Sugiyama;Jindong Wang;Bhiksha Raj", "authorids": "~Hao_Chen15;~Yujin_Han1;~Diganta_Misra1;~Xiang_Li35;~Kai_Hu2;~Difan_Zou1;~Masashi_Sugiyama1;~Jindong_Wang4;~Bhiksha_Raj1", "gender": "M;F;;;M;M;M;M;M", "homepage": "https://hhhhhhao.github.io/;https://yujinhanml.github.io/;;;https://github.com/hukkai;https://difanzou.github.io/;http://www.ms.k.u-tokyo.ac.jp/sugi/;https://jd92.wang/;https://www.cs.cmu.edu/directory/bhikshar/", "dblp": ";317/6852;;;;161/8923;35/1228;19/2969-1;60/3996", "google_scholar": "tktqkhwAAAAJ;https://scholar.google.co.kr/citations?user=SxpbS5YAAAAJ;;;;Cp4fcTQAAAAJ;https://scholar.google.co.jp/citations?user=GkYIrlIAAAAJ;hBZ_tKsAAAAJ;", "orcid": ";;;;;;0000-0001-6658-6743;0000-0002-4833-0880;", "linkedin": "haochen97/;;;;;;;jindong-wang/;", "or_profile": "~Hao_Chen15;~Yujin_Han1;~Diganta_Misra1;~Xiang_Li35;~Kai_Hu2;~Difan_Zou1;~Masashi_Sugiyama1;~Jindong_Wang4;~Bhiksha_Raj1", "aff": "Carnegie Mellon University;the University of Hong Kong;;;Carnegie Mellon University;University of Hong Kong;The University of Tokyo;Microsoft Research;Mohamed bin Zayed University of Artificial Intelligence", "aff_domain": "andrew.cmu.edu;cs.hku.hk;;;cmu.edu;hku.hk;u-tokyo.ac.jp;microsoft.com;mbzuai.ac.ae", "position": "PhD student;PhD student;;;PhD student;Assistant Professor;Full Professor;Researcher;Full Professor", "bibtex": "@inproceedings{\nchen2024slight,\ntitle={Slight Corruption in Pre-training Data Makes Better Diffusion Models},\nauthor={Hao Chen and Yujin Han and Diganta Misra and Xiang Li and Kai Hu and Difan Zou and Masashi Sugiyama and Jindong Wang and Bhiksha Raj},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=VFpXYBqMSU}\n}", "github": "", "reviewers": "6StB;mxHS;XQzf;45qi", "pdf_size": 9252056, "rating": "6;6;7;7", "confidence": "4;4;4;3", "soundness": "3;3;4;3", "novelty": "2;3;3;4", "presentation": "3;3;3;4", "wc_summary": "63;97;76;82", "wc_strengths": "32;31;65;100", "wc_weaknesses": "62;83;321;102", "wc_questions": "216;5;164;1", "wc_limitations": "1;6;10;50", "wc_review": "374;222;636;335", "wc_reply_reviewers": "28;26;31;24", "wc_reply_authors": "20;23;23;15", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 79.5, 12.216791722870616 ], "wc_strengths_avg": [ 57.0, 28.34607556611673 ], "wc_weaknesses_avg": [ 142.0, 104.30963522129679 ], "wc_questions_avg": [ 96.5, 95.30083945065752 ], "wc_limitations_avg": [ 16.75, 19.45989465541887 ], "wc_review_avg": [ 391.75, 151.66472068348656 ], "wc_reply_reviewers_avg": [ 27.25, 2.5860201081971503 ], "wc_reply_authors_avg": [ 20.25, 3.2691742076555053 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9784087597028682083&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "andrew.cmu.edu;cs.hku.hk;;;cmu.edu;hku.hk;u-tokyo.ac.jp;microsoft.com;mbzuai.ac.ae", "author_num": 9, "aff_unique_index": "0;1;0;1;2;3;4", "aff_unique_norm": "Carnegie Mellon University;University of Hong Kong;University of Tokyo;Microsoft;Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": ";;;Microsoft Research;", "aff_unique_url": "https://www.cmu.edu;https://www.hku.hk;https://www.u-tokyo.ac.jp;https://www.microsoft.com/en-us/research;https://mbzuai.ac.ae", "aff_unique_abbr": "CMU;HKU;UTokyo;MSR;MBZUAI", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;0;1;2;0;3", "aff_country_unique": "United States;China;Japan;United Arab Emirates" }, { "title": "StoryDiffusion: Consistent Self-Attention for Long-Range Image and Video Generation", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94916", "id": "VFqzxhINFU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=VFqzxhINFU", "openreview": "https://openreview.net/forum?id=VFqzxhINFU", "poster": "/media/PosterPDFs/NeurIPS%202024/94916.png?t=1733118375.1500623", "project": "", "author_site": "Yupeng Zhou, Daquan Zhou, Ming-Ming Cheng, Jiashi Feng, Qibin Hou", "tldr": "", "abstract": "For recent diffusion-based generative models, maintaining consistent content across a series of generated images, especially those containing subjects and complex details, presents a significant challenge. In this paper, we propose a simple but effective self-attention mechanism, termed Consistent Self-Attention, that boosts the consistency between the generated images. It can be used to augment pre-trained diffusion-based text-to-image models in a zero-shot manner. Based on the images with consistent content, we further show that our method can be extended to long range video generation by introducing a semantic space temporal motion prediction module, named Semantic Motion Predictor. It is trained to estimate the motion conditions between two provided images in the semantic spaces. This module converts the generated sequence of images into videos with smooth transitions and consistent subjects that are more stable than the modules based on latent spaces only, especially in the context of long video generation. By merging these two novel components, our framework, referred to as StoryDiffusion, can describe a text-based story with consistent images or videos encompassing a rich variety of contents. The proposed StoryDiffusion encompasses pioneering explorations in visual story generation with the presentation of images and videos, which we hope could inspire more research from the aspect of architectural modifications.", "keywords": "Consistent character generation;Diffusion model;Image generation;Video generation;Transition prediction", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/965507aa3e8078e9059b62956ec4ad01ca84154e.zip", "author": "Yupeng Zhou;Daquan Zhou;Ming-Ming Cheng;Jiashi Feng;Qibin Hou", "authorids": "~Yupeng_Zhou1;~Daquan_Zhou1;~Ming-Ming_Cheng3;~Jiashi_Feng1;~Qibin_Hou1", "gender": ";;M;;M", "homepage": ";;https://mmcheng.net;;https://houqb.github.io/", "dblp": ";;45/7592;;40/4112", "google_scholar": ";;huWpVyEAAAAJ;;fF8OFV8AAAAJ", "orcid": ";;0000-0001-5550-8758;;", "linkedin": ";;;;", "or_profile": "~Yupeng_Zhou1;~Daquan_Zhou1;~Ming-Ming_Cheng3;~Jiashi_Feng1;~Qibin_Hou1", "aff": ";;Nankai University;;Nankai University", "aff_domain": ";;nankai.edu.cn;;nankai.edu.cn", "position": ";;Full Professor;;Associate Professor", "bibtex": "@inproceedings{\nzhou2024storydiffusion,\ntitle={StoryDiffusion: Consistent Self-Attention for Long-Range Image and Video Generation},\nauthor={Yupeng Zhou and Daquan Zhou and Ming-Ming Cheng and Jiashi Feng and Qibin Hou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=VFqzxhINFU}\n}", "github": "", "reviewers": "S3nw;uTuy;fV1e;SSyn", "pdf_size": 27549458, "rating": "6;7;7;8", "confidence": "4;5;4;5", "soundness": "3;3;3;4", "novelty": "2;3;3;4", "presentation": "3;4;2;4", "wc_summary": "51;106;41;164", "wc_strengths": "37;87;66;54", "wc_weaknesses": "203;175;217;94", "wc_questions": "57;157;126;44", "wc_limitations": "14;86;53;1", "wc_review": "362;611;503;357", "wc_reply_reviewers": "37;29;21;0", "wc_reply_authors": "33;23;14;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 90.5, 49.1248409666637 ], "wc_strengths_avg": [ 61.0, 18.207141456033124 ], "wc_weaknesses_avg": [ 172.25, 47.64123738947174 ], "wc_questions_avg": [ 96.0, 47.026588224109986 ], "wc_limitations_avg": [ 38.5, 33.44024521441193 ], "wc_review_avg": [ 458.25, 105.88997827934426 ], "wc_reply_reviewers_avg": [ 21.75, 13.77270852083932 ], "wc_reply_authors_avg": [ 17.5, 12.134661099511597 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.7071067811865476, "gs_citation": 74, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1874629174939578425&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": ";;nankai.edu.cn;;nankai.edu.cn", "author_num": 5, "aff_unique_index": "0;0", "aff_unique_norm": "Nankai University", "aff_unique_dep": "", "aff_unique_url": "http://www.nankai.edu.cn", "aff_unique_abbr": "NKU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "VLM4Bio: A Benchmark Dataset to Evaluate Pretrained Vision-Language Models for Trait Discovery from Biological Images", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97668", "id": "VHa0XNjWj2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=VHa0XNjWj2", "openreview": "https://openreview.net/forum?id=VHa0XNjWj2", "poster": "/media/PosterPDFs/NeurIPS%202024/97668.png?t=1731721992.7699149", "project": "", "author_site": "M. Maruf, Arka Daw, Kazi Sajeed Mehrab, Harish Babu Manogaran, Abhilash Neog, Medha Sawhney, Mridul Khurana, James Balhoff, Yasin Bakis, Bahadir Altintas, Matthew Thompson, Elizabeth Campolongo, Josef Uyeda, Hilmar Lapp, Henry Bart, Paula Mabee, Yu Su, Wei-Lun (Harry) Chao, Charles Stewart, Tanya Berger-Wolf, Wasila Dahdul, Anuj Karpatne", "tldr": "", "abstract": "Images are increasingly becoming the currency for documenting biodiversity on the planet, providing novel opportunities for accelerating scientific discoveries in the field of organismal biology, especially with the advent of large vision-language models (VLMs). We ask if pre-trained VLMs can aid scientists in answering a range of biologically relevant questions without any additional fine-tuning. In this paper, we evaluate the effectiveness of $12$ state-of-the-art (SOTA) VLMs in the field of organismal biology using a novel dataset, VLM4Bio, consisting of $469K$ question-answer pairs involving $30K$ images from three groups of organisms: fishes, birds, and butterflies, covering five biologically relevant tasks. We also explore the effects of applying prompting techniques and tests for reasoning hallucination on the performance of VLMs, shedding new light on the capabilities of current SOTA VLMs in answering biologically relevant questions using images.", "keywords": "Organismal biology;Trait Discovery;Task-specific Dataset;Taxonomic classification;Trait Identification;Grounding and Referring;Reasoning for Vision-Language Models", "primary_area": "", "supplementary_material": "/attachment/21fb997f1720bd1de4290df9f4965e212a80345e.pdf", "author": "M. Maruf;Arka Daw;Kazi Sajeed Mehrab;Harish Babu Manogaran;Abhilash Neog;Medha Sawhney;Mridul Khurana;James Balhoff;Yasin Bakis;Bahadir Altintas;Matthew J Thompson;Elizabeth G Campolongo;Josef Uyeda;Hilmar Lapp;Henry Bart;Paula Mabee;Yu Su;Wei-Lun Chao;Charles Stewart;Tanya Berger-Wolf;Wasila M Dahdul;Anuj Karpatne", "authorids": "~M._Maruf1;~Arka_Daw1;~Kazi_Sajeed_Mehrab1;~Harish_Babu_Manogaran1;~Abhilash_Neog1;~Medha_Sawhney1;~Mridul_Khurana1;~James_Balhoff1;~Yasin_Bakis1;~Bahadir_Altintas1;~Matthew_J_Thompson1;~Elizabeth_G_Campolongo1;~Josef_Uyeda1;~Hilmar_Lapp1;~Henry_Bart1;~Paula_Mabee1;~Yu_Su2;~Wei-Lun_Chao1;~Charles_Stewart1;~Tanya_Berger-Wolf2;~Wasila_M_Dahdul1;~Anuj_Karpatne1", "gender": ";M;M;M;M;F;M;M;;M;M;;;M;M;F;M;M;M;F;;", "homepage": "https://people.cs.vt.edu/marufm/;https://people.cs.vt.edu/darka/;;;https://abhilash-neog.github.io/;https://sawhney-medha.github.io/;https://mridulk97.github.io/;;;http://www.bahadiraltintas.com;;;https://uyedalab.com;http://lappland.io;;https://www.neonscience.org/person/paula-mabee;http://ysu1989.github.io;https://sites.google.com/view/wei-lun-harry-chao;https://www.cs.rpi.edu/~stewart;https://cse.osu.edu/people/berger-wolf.1;;http://people.cs.vt.edu/karpatne/", "dblp": "268/8054.html;252/5645;290/2015;348/9579.html;261/4908;359/0556;348/9860;90/11448.html;53/7185.html;383/3537;;;;25/1647;56/399.html;;38/1070-1;64/8842;43/471;b/TYBergerWolf;;09/9720", "google_scholar": "SiY3Sz4AAAAJ;pz2Nm8AAAAAJ;rdh3gVMAAAAJ;tkA8j9QAAAAJ;hnU1g28AAAAJ;o9Gj0dMAAAAJ;AEer2h4AAAAJ;GqCj2G8AAAAJ;bRO-9m8AAAAJ;jOrmFjsAAAAJ;;;lo-oG3EAAAAJ;CK6Qg7gAAAAJ;K4typXsAAAAJ;ZKnlvcoAAAAJ;rIh5OqoAAAAJ;PGKakWwAAAAJ;https://scholar.google.com/citations?hl=en;fDQUHyIAAAAJ;;", "orcid": ";;;0000-0003-3709-4656;0009-0009-0905-405X;0009-0001-3265-4557;0009-0003-9346-3206;0000-0002-8688-6599;0000-0001-6144-9440;0000-0002-6633-8480;0000-0003-0583-8585;;;0000-0001-9107-0714;0000-0002-5662-9444;;;0000-0003-1269-7231;;;;", "linkedin": ";arka-daw-1207a41a3/;;harish-babu-m/;abhilash-neog-2021;medha-sawhney/;mridulk97;balhoff;;;thompson-m-j/;;;hlapp/;;;;;;;;", "or_profile": "~M._Maruf1;~Arka_Daw1;~Kazi_Sajeed_Mehrab1;~Harish_Babu_Manogaran1;~Abhilash_Neog1;~Medha_Sawhney1;~Mridul_Khurana1;~James_Balhoff1;~Yasin_Bakis1;~Bahadir_Altintas1;~Matthew_J_Thompson1;~Elizabeth_G_Campolongo1;~Josef_Uyeda1;~Hilmar_Lapp1;~Henry_Bart1;~Paula_Mabee1;~Yu_Su2;~Wei-Lun_Chao1;~Charles_Stewart1;~Tanya_Berger-Wolf2;~Wasila_M_Dahdul1;~Anuj_Karpatne1", "aff": "Virginia Tech;Oak Ridge National Laboratory;Virginia Polytechnic Institute and State University;Virginia Polytechnic Institute and State University;Virginia Tech;NVIDIA;Virginia Polytechnic Institute and State University;University of North Carolina at Chapel Hill;Tulane University;Abant Izzet Baysal University;Ohio State University, Columbus;;Virginia Polytechnic Institute and State University;Duke University;Tulane University;National Ecological Observatory Network, Battelle;Microsoft;Ohio State University;Rensselaer Polytechnic Institute;Ohio State University;;Virginia Polytechnic Institute and State University", "aff_domain": "vt.edu;ornl.gov;vt.edu;vt.edu;vt.edu;nvidia.com;vt.edu;unc.edu;tulane.edu;ibu.edu.tr;osu.edu;;vt.edu;duke.edu;tulane.edu;battelle.org;microsoft.com;osu.edu;cs.rpi.edu;osu.edu;;vt.edu", "position": "PhD student;Researcher;PhD student;MS student;MS student;Intern;PhD student;Researcher;Principal Researcher;Associate Professor;Research Software Engineer;;Assistant Professor;Researcher;Principal Researcher;Principal Researcher;Senior Researcher;Assistant Professor;Professor;Professor;;Associate Professor", "bibtex": "@inproceedings{\nmaruf2024vlmbio,\ntitle={{VLM}4Bio: A Benchmark Dataset to Evaluate Pretrained Vision-Language Models for Trait Discovery from Biological Images},\nauthor={M. Maruf and Arka Daw and Kazi Sajeed Mehrab and Harish Babu Manogaran and Abhilash Neog and Medha Sawhney and Mridul Khurana and James Balhoff and Yasin Bakis and Bahadir Altintas and Matthew J Thompson and Elizabeth G Campolongo and Josef Uyeda and Hilmar Lapp and Henry Bart and Paula Mabee and Yu Su and Wei-Lun Chao and Charles Stewart and Tanya Berger-Wolf and Wasila M Dahdul and Anuj Karpatne},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=VHa0XNjWj2}\n}", "github": "", "reviewers": "8hpk;W944;eVWa", "pdf_size": 9700797, "rating": "6;7;7", "confidence": "3;3;4", "wc_summary_and_contributions": "53;30;76", "wc_strengths": "72;5;42", "wc_improvement": "95;7;30", "wc_limitations": "28;2;98", "wc_correctness": "10;3;22", "wc_clarity": "16;4;20", "wc_relation_to_prior_work": "25;7;1", "wc_documentation": "18;5;8", "wc_additional_feedback": "1;1;1", "wc_review": "318;64;298", "wc_reply_reviewers": "16;13;29", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 53.0, 18.7794213613377 ], "wc_strengths_avg": [ 39.666666666666664, 27.402351886086144 ], "wc_improvement_avg": [ 44.0, 37.26481808176 ], "wc_limitations_avg": [ 42.666666666666664, 40.54078878802872 ], "wc_correctness_avg": [ 11.666666666666666, 7.84573486395988 ], "wc_clarity_avg": [ 13.333333333333334, 6.79869268479038 ], "wc_relation_to_prior_work_avg": [ 11.0, 10.198039027185569 ], "wc_documentation_avg": [ 10.333333333333334, 5.557777333511022 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 226.66666666666666, 115.31213678051799 ], "wc_reply_reviewers_avg": [ 19.333333333333332, 6.944222218666553 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 22, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=167096451656570597&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "vt.edu;ornl.gov;vt.edu;vt.edu;vt.edu;nvidia.com;vt.edu;unc.edu;tulane.edu;ibu.edu.tr;osu.edu;;vt.edu;duke.edu;tulane.edu;battelle.org;microsoft.com;osu.edu;cs.rpi.edu;osu.edu;;vt.edu", "author_num": 22, "aff_unique_index": "0;1;0;0;0;2;0;3;4;5;6;0;7;4;8;9;6;10;6;0", "aff_unique_norm": "Virginia Tech;Oak Ridge National Laboratory;NVIDIA;University of North Carolina;Tulane University;Abant Izzet Baysal University;Ohio State University;Duke University;Battelle;Microsoft;Rensselaer Polytechnic Institute", "aff_unique_dep": ";;NVIDIA Corporation;;;;;;National Ecological Observatory Network;Microsoft Corporation;", "aff_unique_url": "https://www.vt.edu;https://www.ornl.gov;https://www.nvidia.com;https://www.unc.edu;https://www.tulane.edu;https://www.abantib.edu.tr;https://www.osu.edu;https://www.duke.edu;https://www.battelle.org;https://www.microsoft.com;https://www.rpi.edu", "aff_unique_abbr": "VT;ORNL;NVIDIA;UNC;Tulane;;OSU;Duke;;Microsoft;RPI", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Chapel Hill;Columbus", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;1;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States;T\u00fcrkiye" }, { "title": "WizardArena: Post-training Large Language Models via Simulated Offline Chatbot Arena", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94915", "id": "VHva3d836i", "proceeding": "", "pdf": "https://openreview.net/pdf?id=VHva3d836i", "openreview": "https://openreview.net/forum?id=VHva3d836i", "poster": "/media/PosterPDFs/NeurIPS%202024/94915.png?t=1731597123.136182", "project": "", "author_site": "HAIPENG LUO, Qingfeng Sun, Can Xu, Pu Zhao, Qingwei Lin, Jian-Guang Lou, Shifeng Chen, Yansong Tang, Weizhu Chen", "tldr": "", "abstract": "Recent work demonstrates that, post-training large language models with open-domain instruction following data have achieved colossal success. Simultaneously, human Chatbot Arena has emerged as one of the most reasonable benchmarks for model evaluation and developmental guidance. However, the processes of manually curating high-quality training data and utilizing online human evaluation platforms are both expensive and limited. To mitigate the manual and temporal costs associated with post-training, this paper introduces a Simulated Chatbot Arena named WizardArena, which is fully based on and powered by open-source LLMs. For evaluation scenario, WizardArena can efficiently predict accurate performance rankings among different models based on offline test set. For training scenario, we simulate arena battles among various state-of-the-art models on a large scale of instruction data, subsequently leveraging the battle results to constantly enhance target model in both the supervised fine-tuning and reinforcement learning . Experimental results demonstrate that our WizardArena aligns closely with the online human arena rankings, and our models trained on offline extensive battle data exhibit significant performance improvements during SFT, DPO, and PPO stages.", "keywords": "post-training;simulated chatbot arena;large language models", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Haipeng Luo;Qingfeng Sun;Can Xu;Pu Zhao;Qingwei Lin;Jian-Guang Lou;Shifeng Chen;Yansong Tang;Weizhu Chen", "authorids": "~Haipeng_Luo2;~Qingfeng_Sun1;~Can_Xu2;~Pu_Zhao3;~Qingwei_Lin1;~Jian-Guang_Lou1;~Shifeng_Chen1;~Yansong_Tang1;~Weizhu_Chen1", "gender": "M;M;M;M;M;M;M;M;M", "homepage": "https://github.com/flyinghpluo;https://victorsungo.github.io;;https://www.microsoft.com/en-us/research/people/puzhao/;https://www.microsoft.com/en-us/research/people/qlin/;https://www.microsoft.com/en-us/research/people/jlou/;;https://andytang15.github.io/;https://www.microsoft.com/en-us/research/people/wzchen/", "dblp": ";;;75/8475-4.html;120/0743;37/1917;84/4529;214/9568;79/2536", "google_scholar": "NMsJnigAAAAJ;https://scholar.google.com.tw/citations?user=GLMKUEwAAAAJ;5aiE_NcAAAAJ;G3kyd-MAAAAJ;https://scholar.google.co.jp/citations?hl=zh-CN;alDxINIAAAAJ;;TIbistUAAAAJ;LG_E-4EAAAAJ", "orcid": ";;0000-0002-1949-5715;0000-0002-4518-323X;0000-0003-2559-2383;;0000-0003-0677-7358;;", "linkedin": ";;;;;;;;", "or_profile": "~Haipeng_Luo2;~Qingfeng_Sun1;~Can_Xu2;~Pu_Zhao3;~Qingwei_Lin1;~Jian-Guang_Lou1;~Shifeng_Chen1;~Yansong_Tang1;~Weizhu_Chen1", "aff": "University of Chinese Academy of Sciences;Microsoft;Microsoft;Microsoft;Microsoft Research;Microsoft Research Asia;Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences;Tsinghua University;Microsoft GenAI", "aff_domain": "ucas.ac.cn;microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com;siat.ac.cn;tsinghua.edu.cn;microsoft.com", "position": "MS student;Researcher;Researcher;Researcher;Sr. Principal Researcher;Principal Researcher;Associate Professor;Assistant Professor;Vice President", "bibtex": "@inproceedings{\nluo2024wizardarena,\ntitle={WizardArena: Post-training Large Language Models via Simulated Offline Chatbot Arena},\nauthor={Haipeng Luo and Qingfeng Sun and Can Xu and Pu Zhao and Qingwei Lin and Jian-Guang Lou and Shifeng Chen and Yansong Tang and Weizhu Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=VHva3d836i}\n}", "github": "", "reviewers": "vqCh;B73Q;cCkS;tJho", "pdf_size": 635479, "rating": "3;5;5;6", "confidence": "3;4;3;4", "soundness": "2;2;3;3", "novelty": "2;3;3;3", "presentation": "2;2;3;3", "wc_summary": "37;84;76;116", "wc_strengths": "25;33;54;44", "wc_weaknesses": "214;306;90;106", "wc_questions": "1;2;79;1", "wc_limitations": "1;7;24;1", "wc_review": "278;432;323;268", "wc_reply_reviewers": "86;309;42;102", "wc_reply_authors": "528;1519;212;248", "reply_reviewers": "1;2;1;1", "reply_authors": "3;6;3;3", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 78.25, 28.12805538959279 ], "wc_strengths_avg": [ 39.0, 10.977249200050075 ], "wc_weaknesses_avg": [ 179.0, 87.46999485537883 ], "wc_questions_avg": [ 20.75, 33.633130987167995 ], "wc_limitations_avg": [ 8.25, 9.41740410091868 ], "wc_review_avg": [ 325.25, 65.0206697904597 ], "wc_reply_reviewers_avg": [ 134.75, 102.9742079357739 ], "wc_reply_authors_avg": [ 626.75, 529.4645290290937 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.75, 1.299038105676658 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.6882472016116854, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ZH8MlfVij8EJ:scholar.google.com/&scioq=WizardArena:+Post-training+Large+Language+Models+via+Simulated+Offline+Chatbot+Arena&hl=en&as_sdt=0,33", "gs_version_total": 0, "email": "ucas.ac.cn;microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com;siat.ac.cn;tsinghua.edu.cn;microsoft.com", "author_num": 9, "aff_unique_index": "0;1;1;1;1;1;2;3;1", "aff_unique_norm": "University of Chinese Academy of Sciences;Microsoft;Chinese Academy of Sciences;Tsinghua University", "aff_unique_dep": ";Microsoft Corporation;Shenzhen Institutes of Advanced Technology;", "aff_unique_url": "http://www.ucas.ac.cn;https://www.microsoft.com;http://www.siat.cas.cn;https://www.tsinghua.edu.cn", "aff_unique_abbr": "UCAS;Microsoft;SIAT;THU", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Asia;Shenzhen", "aff_country_unique_index": "0;1;1;1;1;0;0;0;1", "aff_country_unique": "China;United States" }, { "title": "Learning Where to Edit Vision Transformers", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94914", "id": "VIlyDguGEz", "proceeding": "", "pdf": "https://openreview.net/pdf?id=VIlyDguGEz", "openreview": "https://openreview.net/forum?id=VIlyDguGEz", "poster": "/media/PosterPDFs/NeurIPS%202024/94914.png?t=1733293391.877688", "project": "", "author_site": "Yunqiao Yang, Long-Kai Huang, Shengzhuang Chen, Kede Ma, Ying Wei", "tldr": "", "abstract": "Model editing aims to data-efficiently correct predictive errors of large pre-trained models while ensuring generalization to neighboring failures and locality to minimize unintended effects on unrelated examples. While significant progress has been made in editing Transformer-based large language models, effective strategies for editing vision Transformers (ViTs) in computer vision remain largely untapped. In this paper, we take initial steps towards correcting predictive errors of ViTs, particularly those arising from subpopulation shifts. Taking a locate-then-edit approach, we first address the ``where-to-edit`` challenge by meta-learning a hypernetwork on CutMix-augmented data generated for editing reliability. This trained hypernetwork produces generalizable binary masks that identify a sparse subset of structured model parameters, responsive to real-world failure samples. Afterward, we solve the ``how-to-edit`` problem by simply fine-tuning the identified parameters using a variant of gradient descent to achieve successful edits. To validate our method, we construct an editing benchmark that introduces subpopulation shifts towards natural underrepresented images and AI-generated images, thereby revealing the limitations of pre-trained ViTs for object recognition. Our approach not only achieves superior performance on the proposed benchmark but also allows for adjustable trade-offs between generalization and locality. Our code is available at https://github.com/hustyyq/Where-to-Edit.", "keywords": "Model Editing;Vision Transformer", "primary_area": "other", "supplementary_material": "", "author": "Yunqiao Yang;Long-Kai Huang;Shengzhuang Chen;Kede Ma;Ying Wei", "authorids": "~Yunqiao_Yang1;~Long-Kai_Huang1;~Shengzhuang_Chen1;~Kede_Ma2;~Ying_Wei1", "gender": ";;M;;F", "homepage": ";https://sites.google.com/site/longkaihugo/home;;https://kedema.org/;https://wei-ying.net/", "dblp": "292/2104;133/2006;;127/1809;14/4899-1", "google_scholar": ";CaP64WUAAAAJ;kpKst1UAAAAJ;https://scholar.google.com.hk/citations?user=sfzOyFoAAAAJ;5UpFdKsAAAAJ", "orcid": "0000-0003-0109-8903;0000-0001-5263-1443;;0000-0001-8608-1128;", "linkedin": ";;jerry-chen-45bb15156;;", "or_profile": "~Yunqiao_Yang1;~Long-Kai_Huang1;~Shengzhuang_Chen1;~Kede_Ma2;~Ying_Wei1", "aff": "City University of Hong Kong;Tencent;City University of Hong Kong;City University of Hong Kong;Nanyang Technological University", "aff_domain": "cityu.edu.hk;tencent.com;cityu.edu.hk;cityu.edu.hk;ntu.edu.sg", "position": "PhD student;Researcher;PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nyang2024learning,\ntitle={Learning Where to Edit Vision Transformers},\nauthor={Yunqiao Yang and Long-Kai Huang and Shengzhuang Chen and Kede Ma and Ying Wei},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=VIlyDguGEz}\n}", "github": "", "reviewers": "91WR;MKpn;sCvZ;wreN", "pdf_size": 7730240, "rating": "5;6;7;7", "confidence": "4;2;3;3", "soundness": "3;3;4;3", "novelty": "2;2;3;3", "presentation": "3;3;4;4", "wc_summary": "61;65;70;83", "wc_strengths": "30;101;85;135", "wc_weaknesses": "202;26;80;112", "wc_questions": "180;47;6;84", "wc_limitations": "9;21;5;12", "wc_review": "482;260;246;426", "wc_reply_reviewers": "401;16;0;13", "wc_reply_authors": "1131;32;0;32", "reply_reviewers": "2;1;0;1", "reply_authors": "4;2;1;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 69.75, 8.287792227125388 ], "wc_strengths_avg": [ 87.75, 37.91684981640748 ], "wc_weaknesses_avg": [ 105.0, 63.88270501473775 ], "wc_questions_avg": [ 79.25, 64.37924743269372 ], "wc_limitations_avg": [ 11.75, 5.889609494694874 ], "wc_review_avg": [ 353.5, 102.55120672132533 ], "wc_reply_reviewers_avg": [ 107.5, 169.55898678630984 ], "wc_reply_authors_avg": [ 298.75, 480.67732159942807 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.42640143271122083, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4483587292690440295&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 4, "email": "cityu.edu.hk;tencent.com;cityu.edu.hk;cityu.edu.hk;ntu.edu.sg", "author_num": 5, "aff_unique_index": "0;1;0;0;2", "aff_unique_norm": "City University of Hong Kong;Tencent;Nanyang Technological University", "aff_unique_dep": ";Tencent Holdings Limited;", "aff_unique_url": "https://www.cityu.edu.hk;https://www.tencent.com;https://www.ntu.edu.sg", "aff_unique_abbr": "CityU;Tencent;NTU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "China;Singapore" }, { "title": "Connectivity-Driven Pseudo-Labeling Makes Stronger Cross-Domain Segmenters", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94913", "id": "VIqQSFNjyP", "proceeding": "", "pdf": "https://openreview.net/pdf?id=VIqQSFNjyP", "openreview": "https://openreview.net/forum?id=VIqQSFNjyP", "poster": "/media/PosterPDFs/NeurIPS%202024/94913.png?t=1731506543.9209926", "project": "", "author_site": "Dong Zhao, Qi Zang, Shuang Wang, Nicu Sebe, Zhun Zhong", "tldr": "", "abstract": "Presently, pseudo-labeling stands as a prevailing approach in cross-domain semantic segmentation, enhancing model efficacy by training with pixels assigned with reliable pseudo-labels. However, we identify two key limitations within this paradigm: (1) under relatively severe domain shifts, most selected reliable pixels appear speckled and remain noisy. (2) when dealing with wild data, some pixels belonging to the open-set class may exhibit high confidence and also appear speckled. These two points make it difficult for the pixel-level selection mechanism to identify and correct these speckled close- and open-set noises. As a result, error accumulation is continuously introduced into subsequent self-training, leading to inefficiencies in pseudo-labeling. To address these limitations, we propose a novel method called Semantic Connectivity-driven Pseudo-labeling (SeCo). SeCo formulates pseudo-labels at the connectivity level, which makes it easier to locate and correct closed and open set noise. Specifically, SeCo comprises two key components: Pixel Semantic Aggregation (PSA) and Semantic Connectivity Correction (SCC). Initially, PSA categorizes semantics into ``stuff'' and ``things'' categories and aggregates speckled pseudo-labels into semantic connectivity through efficient interaction with the Segment Anything Model (SAM). This enables us not only to obtain accurate boundaries but also simplifies noise localization. Subsequently, SCC introduces a simple connectivity classification task, which enables us to locate and correct connectivity noise with the guidance of loss distribution. Extensive experiments demonstrate that SeCo can be flexibly applied to various cross-domain semantic segmentation tasks, \\textit{i.e.} domain generalization and domain adaptation, even including source-free, and black-box domain adaptation, significantly improving the performance of existing state-of-the-art methods. The code is provided in the appendix and will be open-source.", "keywords": "Domain adaptation;Domain generalization;Semantic segmentation;Denoising;Segment anything model", "primary_area": "machine_vision", "supplementary_material": "/attachment/b7f4d2d5c8557ad48e4c28664f7399e3f9685772.zip", "author": "Dong Zhao;Qi Zang;Shuang Wang;Nicu Sebe;Zhun Zhong", "authorids": "~Dong_Zhao5;~Qi_Zang1;~Shuang_Wang1;~Nicu_Sebe1;~Zhun_Zhong1", "gender": "M;F;F;M;M", "homepage": ";;https://faculty.xidian.edu.cn/WS1/zh_CN/index.htm;http://disi.unitn.it/~sebe/;http://zhunzhong.site", "dblp": ";;;20/3519;32/6525", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?view_op=list_works;;https://scholar.google.it/citations?user=stFCYOAAAAAJ;nZizkQ0AAAAJ", "orcid": "0000-0001-9880-8822;;;0000-0002-6597-7248;", "linkedin": ";;;;", "or_profile": "~Dong_Zhao5;~Qi_Zang1;~Shuang_Wang1;~Nicu_Sebe1;~Zhun_Zhong1", "aff": "Xidian University;Xi'an University of Electronic Science and Technology;Xidian University;University of Trento;University of Nottingham", "aff_domain": "xidian.edu.cn;xidian.edu.cn;xidian.edu.cn;unitn.it;nottingham.ac.uk", "position": "PhD student;PhD student;Full Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhao2024connectivitydriven,\ntitle={Connectivity-Driven Pseudo-Labeling Makes Stronger Cross-Domain Segmenters},\nauthor={Dong Zhao and Qi Zang and Shuang Wang and Nicu Sebe and Zhun Zhong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=VIqQSFNjyP}\n}", "github": "", "reviewers": "pA2t;SXcd;hfgi", "pdf_size": 8461724, "rating": "5;5;6", "confidence": "4;4;3", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "2;3;3", "wc_summary": "73;85;93", "wc_strengths": "38;68;96", "wc_weaknesses": "135;82;80", "wc_questions": "6;29;37", "wc_limitations": "13;8;6", "wc_review": "265;272;312", "wc_reply_reviewers": "87;21;0", "wc_reply_authors": "508;0;0", "reply_reviewers": "1;1;0", "reply_authors": "2;1;1", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 83.66666666666667, 8.219218670625303 ], "wc_strengths_avg": [ 67.33333333333333, 23.683092891108814 ], "wc_weaknesses_avg": [ 99.0, 25.468935326524086 ], "wc_questions_avg": [ 24.0, 13.140268896284683 ], "wc_limitations_avg": [ 9.0, 2.943920288775949 ], "wc_review_avg": [ 283.0, 20.704266871026046 ], "wc_reply_reviewers_avg": [ 36.0, 37.067505985701274 ], "wc_reply_authors_avg": [ 169.33333333333334, 239.4734965618441 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.9999999999999997, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14255941193687837418&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "xidian.edu.cn;xidian.edu.cn;xidian.edu.cn;unitn.it;nottingham.ac.uk", "author_num": 5, "aff_unique_index": "0;1;0;2;3", "aff_unique_norm": "Xidian University;Xi'an University of Electronic Science and Technology;University of Trento;University of Nottingham", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.xidian.edu.cn/;http://www.xidian.edu.cn/;https://www.unitn.it;https://www.nottingham.ac.uk", "aff_unique_abbr": "Xidian;Xidian University;UniTN;UoN", "aff_campus_unique_index": "1", "aff_campus_unique": ";Xi'an", "aff_country_unique_index": "0;0;0;1;2", "aff_country_unique": "China;Italy;United Kingdom" }, { "title": "WISE: Rethinking the Knowledge Memory for Lifelong Model Editing of Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94912", "id": "VJMYOfJVC2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=VJMYOfJVC2", "openreview": "https://openreview.net/forum?id=VJMYOfJVC2", "poster": "/media/PosterPDFs/NeurIPS%202024/94912.png?t=1729684754.0451572", "project": "", "author_site": "Peng Wang, Zexi Li, Ningyu Zhang, Ziwen Xu, Yunzhi Yao, Yong Jiang, Pengjun Xie, Fei Huang, Huajun Chen", "tldr": "", "abstract": "Large language models (LLMs) need knowledge updates to meet the ever-growing world facts and correct the hallucinated responses, facilitating the methods of lifelong model editing. Where the updated knowledge resides in memories is a fundamental question for model editing. In this paper, we find that editing either long-term memory (direct model parameters) or working memory (non-parametric knowledge of neural network activations/representations by retrieval) will result in an impossible triangle---reliability, generalization, and locality can not be realized together in the lifelong editing settings. For long-term memory, directly editing the parameters will cause conflicts with irrelevant pretrained knowledge or previous edits (poor reliability and locality). For working memory, retrieval-based activations can hardly make the model understand the edits and generalize (poor generalization). Therefore, we propose WISE to bridge the gap between memories. In WISE, we design a dual parametric memory scheme, which consists of the main memory for the pretrained knowledge and a side memory for the edited knowledge. We only edit the knowledge in the side memory and train a router to decide which memory to go through when given a query. For continual editing, we devise a knowledge-sharding mechanism where different sets of edits reside in distinct subspaces of parameters, and are subsequently merged into a shared memory without conflicts. Extensive experiments show that WISE can outperform previous model editing methods and overcome the impossible triangle under lifelong model editing of question answering, hallucination, and out-of-distribution settings across trending LLM architectures, e.g., GPT, LLaMA, and Mistral.", "keywords": "Lifelong Model Editing;Large Language Model;Knowledge Memory", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/8bee0ea21f7cdafd50e57e3b3816f351101a66d9.zip", "author": "Peng Wang;Zexi Li;Ningyu Zhang;Ziwen Xu;Yunzhi Yao;Yong Jiang;Pengjun Xie;Fei Huang;Huajun Chen", "authorids": "~Peng_Wang28;~Zexi_Li1;~Ningyu_Zhang1;~Ziwen_Xu1;~Yunzhi_Yao1;~Yong_Jiang1;~Pengjun_Xie2;~Fei_Huang2;~Huajun_Chen1", "gender": "M;M;M;M;M;M;M;M;M", "homepage": ";https://zexilee.github.io/about-zexili/;https://person.zju.edu.cn/en/ningyu;;http://yyzcowtodd.cn;http://jiangyong.site/;;https://sites.google.com/view/fei-huang;", "dblp": ";151/9187-1;139/4181-1.html;277/6261;295/9476;;212/1755.html;h/FeiHuang.html;94/5089", "google_scholar": "vLN6gsMAAAAJ;https://scholar.google.com.hk/citations?user=6lMg5eoAAAAJ;xQDOPvsAAAAJ;5oqIUicAAAAJ;https://scholar.google.com.hk/citations?user=nAagIwEAAAAJ;sxXZWQQAAAAJ;;9r98PpoAAAAJ;", "orcid": ";0000-0003-0831-3549;0000-0002-1970-0678;;;;;;", "linkedin": ";;ningyuzhang/;;;;;fei-huang-cas-cmu;", "or_profile": "~Peng_Wang28;~Zexi_Li1;~Ningyu_Zhang1;~Ziwen_Xu1;~Yunzhi_Yao1;~Yong_Jiang1;~Pengjun_Xie2;~Fei_Huang2;~Huajun_Chen1", "aff": "Zhejiang University;Zhejiang University;Zhejiang University;University of Electronic Science and Technology of China;University of California, Los Angeles;Tongyi Lab;Alibaba Group;Alibaba Group US;Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn;zju.edu.cn;uestc.edu.cn;ucla.edu;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;zju.edu.cn", "position": "MS student;PhD student;Associate Professor;Undergrad student;Researcher;Researcher;Researcher;Senior Research Director;Full Professor", "bibtex": "@inproceedings{\nwang2024wise,\ntitle={{WISE}: Rethinking the Knowledge Memory for Lifelong Model Editing of Large Language Models},\nauthor={Peng Wang and Zexi Li and Ningyu Zhang and Ziwen Xu and Yunzhi Yao and Yong Jiang and Pengjun Xie and Fei Huang and Huajun Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=VJMYOfJVC2}\n}", "github": "", "reviewers": "5h8i;fviW;pT3T;KKAN", "pdf_size": 1565221, "rating": "6;7;7;7", "confidence": "4;4;3;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;2;3", "wc_summary": "19;73;115;200", "wc_strengths": "28;25;199;117", "wc_weaknesses": "213;13;108;83", "wc_questions": "67;397;277;1", "wc_limitations": "14;8;15;9", "wc_review": "341;516;714;410", "wc_reply_reviewers": "20;22;49;19", "wc_reply_authors": "34;70;0;27", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;1;2", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 101.75, 66.14897958396637 ], "wc_strengths_avg": [ 92.25, 71.86576027566953 ], "wc_weaknesses_avg": [ 104.25, 71.7961524038719 ], "wc_questions_avg": [ 185.5, 159.0495205902866 ], "wc_limitations_avg": [ 11.5, 3.0413812651491097 ], "wc_review_avg": [ 495.25, 140.83922571499744 ], "wc_reply_reviewers_avg": [ 27.5, 12.459935794377111 ], "wc_reply_authors_avg": [ 32.75, 24.973736204260668 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4370185798650695313&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "zju.edu.cn;zju.edu.cn;zju.edu.cn;uestc.edu.cn;ucla.edu;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;zju.edu.cn", "author_num": 9, "aff_unique_index": "0;0;0;1;2;3;4;4;0", "aff_unique_norm": "Zhejiang University;University of Electronic Science and Technology of China;University of California, Los Angeles;Tongyi Lab;Alibaba Group", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.zju.edu.cn;https://www.uestc.edu.cn;https://www.ucla.edu;;https://www.alibaba.com", "aff_unique_abbr": "ZJU;UESTC;UCLA;;Alibaba", "aff_campus_unique_index": "1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;0;0;0;1;0;1;0", "aff_country_unique": "China;United States;" }, { "title": "Hidden in Plain Sight: Evaluating Abstract Shape Recognition in Vision-Language Models", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97667", "id": "VJuSeShdZA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=VJuSeShdZA", "openreview": "https://openreview.net/forum?id=VJuSeShdZA", "poster": "", "project": "", "author_site": "Arshia Hemmat, Adam Davies, Tom Lamb, Jianhao Yuan, Philip Torr, Ashkan Khakzar, Francesco Pinto", "tldr": "", "abstract": "Despite the importance of shape perception in human vision, early neural image classifiers relied less on shape information for object recognition than other (often spurious) features. While recent research suggests that current large Vision-Language Models (VLMs) exhibit more reliance on shape, we find them to still be seriously limited in this regard. To quantify such limitations, we introduce IllusionBench, a dataset that challenges current cutting-edge VLMs to decipher shape information when the shape is represented by an arrangement of visual elements in a scene. Our extensive evaluations reveal that, while these shapes are easily detectable by human annotators, current VLMs struggle to recognize them, indicating important avenues for future work in developing more robust visual perception systems. The full dataset and codebase are available at: https://arshiahemmat.github.io/illusionbench/", "keywords": "visual robustness; vision-language models; visual abstraction; in-context learning; multimodality; visual perception; multi-domain generalisation", "primary_area": "", "supplementary_material": "/attachment/31f6ebdf2cf09e3b5b2d0271c92ba16abe7f8bc3.zip", "author": "Arshia Hemmat;Adam Davies;Tom A. Lamb;Jianhao Yuan;Philip Torr;Ashkan Khakzar;Francesco Pinto", "authorids": "~Arshia_Hemmat3;~Adam_Davies2;~Tom_A._Lamb1;~Jianhao_Yuan2;~Philip_Torr1;~Ashkan_Khakzar1;~Francesco_Pinto1", "gender": "Non-Binary;;M;M;M;M;M", "homepage": "https://ahdavies6.github.io/;http://www.robots.ox.ac.uk/~tvg/;http://ashk-on.github.io/;;https://github.com/arshiahemmat;https://yuanjianhao508.github.io/;https://tomalamb.github.io/", "dblp": ";;201/0889;281/7477;;;", "google_scholar": "vqkOH7gAAAAJ;;Tc4cWAcAAAAJ;rqAdo2MAAAAJ;lASBGvMAAAAJ;BUJPCegAAAAJ;cfJt-hgAAAAJ", "orcid": "0000-0002-0610-2732;;;;;;0009-0002-3666-3992", "linkedin": "adamhdavies/;;;francesco-pinto-42a389b1?lipi=urn%3Ali%3Apage%3Ad_flagship3_profile_view_base_contact_details%3BishkY8oUQ8OTPPeV0SSCdw%3D%3D;;;tom-lamb-94809713b/", "or_profile": "~Adam_Davies2;~Philip_Torr1;~Ashkan_Khakzar1;~Francesco_Pinto1;~Amirarshia_Hemmat1;~JIANHAO_YUAN1;~Tom_Andrew_Lamb1", "aff": "University of Illinois, Urbana Champaign;University of Oxford;University of Oxford;University of Oxford;Isfahan University, University of Tehran;University of Oxford;University of Oxford", "aff_domain": "illinois.edu;ox.ac.uk;ox.ac.uk;ox.ac.uk;ui.ac.ir;robots.ox.ac.uk;ox.ac.uk", "position": "PhD student;Full Professor;Postdoc;PhD student;Undergrad student;PhD student;PhD student", "bibtex": "@inproceedings{\nhemmat2024hidden,\ntitle={Hidden in Plain Sight: Evaluating Abstract Shape Recognition in Vision-Language Models},\nauthor={Arshia Hemmat and Adam Davies and Tom A. Lamb and Jianhao Yuan and Philip Torr and Ashkan Khakzar and Francesco Pinto},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=VJuSeShdZA}\n}", "github": "", "reviewers": "cHDM;F7Nb;UDvg", "pdf_size": 11542890, "rating": "5;7;7", "confidence": "5;1;3", "wc_summary_and_contributions": "103;78;78", "wc_strengths": "57;29;84", "wc_improvement": "434;21;114", "wc_limitations": "13;12;1", "wc_correctness": "9;5;1", "wc_clarity": "41;6;3", "wc_relation_to_prior_work": "4;35;1", "wc_documentation": "10;10;1", "wc_additional_feedback": "1;1;1", "wc_review": "672;197;284", "wc_reply_reviewers": "272;0;14", "wc_reply_authors": "4702;0;0", "reply_reviewers": "2;0;1", "reply_authors": "7;1;1", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.0, 1.632993161855452 ], "wc_summary_and_contributions_avg": [ 86.33333333333333, 11.785113019775793 ], "wc_strengths_avg": [ 56.666666666666664, 22.45489305746572 ], "wc_improvement_avg": [ 189.66666666666666, 176.89230873299405 ], "wc_limitations_avg": [ 8.666666666666666, 5.436502143433364 ], "wc_correctness_avg": [ 5.0, 3.265986323710904 ], "wc_clarity_avg": [ 16.666666666666668, 17.249798710580816 ], "wc_relation_to_prior_work_avg": [ 13.333333333333334, 15.369522511198003 ], "wc_documentation_avg": [ 7.0, 4.242640687119285 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 384.3333333333333, 206.4886329935756 ], "wc_reply_reviewers_avg": [ 95.33333333333333, 125.05287770468229 ], "wc_reply_authors_avg": [ 1567.3333333333333, 2216.5440567594314 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 3.0, 2.8284271247461903 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12999827189020034405&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "illinois.edu;ox.ac.uk;ox.ac.uk;ox.ac.uk;ui.ac.ir;robots.ox.ac.uk;ox.ac.uk", "author_num": 7, "aff_unique_index": "0;1;1;1;2;1;1", "aff_unique_norm": "University of Illinois Urbana-Champaign;University of Oxford;Isfahan University", "aff_unique_dep": ";;", "aff_unique_url": "https://illinois.edu;https://www.ox.ac.uk;https://www.ui.ac.ir", "aff_unique_abbr": "UIUC;Oxford;UI", "aff_campus_unique_index": "0", "aff_campus_unique": "Urbana-Champaign;", "aff_country_unique_index": "0;1;1;1;2;1;1", "aff_country_unique": "United States;United Kingdom;Iran" }, { "title": "BPQP: A Differentiable Convex Optimization Framework for Efficient End-to-End Learning", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94911", "id": "VKKY3Uv7vi", "proceeding": "", "pdf": "https://openreview.net/pdf?id=VKKY3Uv7vi", "openreview": "https://openreview.net/forum?id=VKKY3Uv7vi", "poster": "/media/PosterPDFs/NeurIPS%202024/94911.png?t=1730651421.8899415", "project": "", "author_site": "Jianming Pan, Zeqi Ye, Xiao Yang, Xu Yang, Weiqing Liu, Lewen Wang, Jiang Bian", "tldr": "", "abstract": "Data-driven decision-making processes increasingly utilize end-to-end learnable deep neural networks to render final decisions. Sometimes, the output of the forward functions in certain layers is determined by the solutions to mathematical optimization problems, leading to the emergence of differentiable optimization layers that permit gradient back-propagation.\nHowever, real-world scenarios often involve large-scale datasets and numerous constraints, presenting significant challenges. Current methods for differentiating optimization problems typically rely on implicit differentiation, which necessitates costly computations on the Jacobian matrices, resulting in low efficiency.\nIn this paper, we introduce BPQP, a differentiable convex optimization framework designed for efficient end-to-end learning. To enhance efficiency, we reformulate the backward pass as a simplified and decoupled quadratic programming problem by leveraging the structural properties of the Karush\u2013Kuhn\u2013Tucker (KKT) matrix. This reformulation enables the use of first-order optimization algorithms in calculating the backward pass gradients, allowing our framework to potentially utilize any state-of-the-art solver. As solver technologies evolve, BPQP can continuously adapt and improve its efficiency.\nExtensive experiments on both simulated and real-world datasets demonstrate that BPQP achieves a significant improvement in efficiency\u2014typically an order of magnitude faster in overall execution time compared to other differentiable optimization layers. Our results not only highlight the efficiency gains of BPQP but also underscore its superiority over differential optimization layer baselines.", "keywords": "Differentiable optimization layers;machine learning;end-to-end learning", "primary_area": "optimization", "supplementary_material": "/attachment/1171fc205f97b2135ce86c86d3d818e302c666e4.zip", "author": "Jianming Pan;Zeqi Ye;Xiao Yang;Xu Yang;Weiqing Liu;Lewen Wang;Jiang Bian", "authorids": "~Jianming_Pan1;~Zeqi_Ye1;~Xiao_Yang11;~Xu_Yang7;~Weiqing_Liu1;~Lewen_Wang1;~Jiang_Bian1", "gender": "M;M;M;M;;F;M", "homepage": ";;;;;https://lwwang.github.io/;https://sites.google.com/view/jiangbian", "dblp": ";;;;;;09/851-2.html", "google_scholar": "3jVharcAAAAJ;YB04AKkAAAAJ;https://scholar.google.com.hk/citations?user=QBQRlgYAAAAJ;;;;pZBEnY8AAAAJ", "orcid": ";;;;;;0000-0002-9472-600X", "linkedin": ";zeqi-ye-5908b1264/;;xu-yang-9a7b80170/;weiqing-liu-09646b91/;;jbian/", "or_profile": "~Jianming_Pan1;~Zeqi_Ye1;~Xiao_Yang11;~Xu_Yang7;~Weiqing_Liu1;~Lewen_Wang1;~Jiang_Bian1", "aff": "University of California, Berkeley;Nankai University;Microsoft Research;;;Microsoft;Microsoft", "aff_domain": "berkeley.edu;nankai.edu.cn;research.microsoft.com;;;microsoft.com;microsoft.com", "position": "MS student;Undergrad student;Researcher;;;Researcher;Partner Research Manager", "bibtex": "@inproceedings{\npan2024bpqp,\ntitle={{BPQP}: A Differentiable Convex Optimization Framework for Efficient End-to-End Learning},\nauthor={Jianming Pan and Zeqi Ye and Xiao Yang and Xu Yang and Weiqing Liu and Lewen Wang and Jiang Bian},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=VKKY3Uv7vi}\n}", "github": "", "reviewers": "Vrnz;qYw9;wmCS;3Ly2", "pdf_size": 552130, "rating": "5;7;7;8", "confidence": "4;3;5;4", "soundness": "3;2;2;4", "novelty": "2;3;3;4", "presentation": "3;3;2;3", "wc_summary": "46;90;33;122", "wc_strengths": "22;66;51;120", "wc_weaknesses": "102;250;127;84", "wc_questions": "2;72;43;67", "wc_limitations": "1;29;35;17", "wc_review": "173;507;289;410", "wc_reply_reviewers": "28;174;19;48", "wc_reply_authors": "30;127;19;44", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.75, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 72.75, 35.42156828826189 ], "wc_strengths_avg": [ 64.75, 35.604599421984794 ], "wc_weaknesses_avg": [ 140.75, 64.89751536075939 ], "wc_questions_avg": [ 46.0, 27.667670664513846 ], "wc_limitations_avg": [ 20.5, 12.99038105676658 ], "wc_review_avg": [ 344.75, 125.68686287754977 ], "wc_reply_reviewers_avg": [ 67.25, 62.519496958948736 ], "wc_reply_authors_avg": [ 55.0, 42.50294107470682 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8431507738712913818&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "berkeley.edu;nankai.edu.cn;research.microsoft.com;;;microsoft.com;microsoft.com", "author_num": 7, "aff_unique_index": "0;1;2;2;2", "aff_unique_norm": "University of California, Berkeley;Nankai University;Microsoft", "aff_unique_dep": ";;Microsoft Research", "aff_unique_url": "https://www.berkeley.edu;http://www.nankai.edu.cn;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "UC Berkeley;NKU;MSR", "aff_campus_unique_index": "0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "United States;China" }, { "title": "Spiking Graph Neural Network on Riemannian Manifolds", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94910", "id": "VKt0K3iOmO", "proceeding": "", "pdf": "https://openreview.net/pdf?id=VKt0K3iOmO", "openreview": "https://openreview.net/forum?id=VKt0K3iOmO", "poster": "/media/PosterPDFs/NeurIPS%202024/94910.png?t=1731756138.9470823", "project": "", "author_site": "Li Sun, Zhenhao Huang, Qiqi Wan, Hao Peng, Philip S Yu", "tldr": "", "abstract": "Graph neural networks (GNNs) have become the dominant solution for learning on graphs, the typical non-Euclidean structures. Conventional GNNs, constructed with the Artificial Neuron Network (ANN), have achieved impressive performance at the cost of high computation and energy consumption. In parallel, spiking GNNs with brain-like spiking neurons are drawing increasing research attention owing to the energy efficiency. So far, existing spiking GNNs consider graphs in Euclidean space, ignoring the structural geometry, and suffer from the high latency issue due to Back-Propagation-Through-Time (BPTT) with the surrogate gradient. In light of the aforementioned issues, we are devoted to exploring spiking GNN on Riemannian manifolds, and present a Manifold-valued Spiking GNN (MSG). In particular, we design a new spiking neuron on geodesically complete manifolds with the diffeomorphism, so that BPTT regarding the spikes is replaced by the proposed differentiation via manifold. Theoretically, we show that MSG approximates a solver of the manifold ordinary differential equation. Extensive experiments on common graphs show the proposed MSG achieves superior performance to previous spiking GNNs and energy efficiency to conventional GNNs.", "keywords": "Graph Neural Network;Spiking Neural Network;Riemannian Geometry", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Li Sun;Zhenhao Huang;Qiqi Wan;Hao Peng;Philip S. Yu", "authorids": "~Li_Sun4;~Zhenhao_Huang1;~Qiqi_Wan1;~Hao_Peng7;~Philip_S._Yu1", "gender": "M;M;F;M;M", "homepage": ";;https://github.com/WANQIQI77;https://penghao-bdsc.github.io/;https://cs.uic.edu/profiles/philip-yu/", "dblp": "57/2405-8;;;69/7742-1;y/PhilipSYu", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=zh-CN;;R25rbyQAAAAJ;D0lL1r0AAAAJ", "orcid": "0000-0003-4562-2279;0009-0007-8944-0385;;0000-0003-0458-5977;0000-0002-3491-5968", "linkedin": ";;;;", "or_profile": "~Li_Sun4;~Zhenhao_Huang1;~Qiqi_Wan1;~Hao_Peng7;~Philip_S._Yu1", "aff": "North China Electric Power University ;North China Electric Power University;NCEPU;Beihang University;University of Illinois Chicago", "aff_domain": "ncepu.edu.cn;ncepubj.edu.cn;ncepu.edu;buaa.edu.cn;uic.edu", "position": "Associate Professor;Undergrad student;Undergrad student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nsun2024spiking,\ntitle={Spiking Graph Neural Network on Riemannian Manifolds},\nauthor={Li Sun and Zhenhao Huang and Qiqi Wan and Hao Peng and Philip S. Yu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=VKt0K3iOmO}\n}", "github": "", "reviewers": "RDnD;aPZg;hcr5;NhQj;BtYr", "pdf_size": 1105150, "rating": "6;6;8;8;8", "confidence": "4;3;4;5;4", "soundness": "3;3;3;4;4", "novelty": "3;3;4;4;4", "presentation": "3;3;3;3;4", "wc_summary": "61;51;34;40;110", "wc_strengths": "53;36;102;122;58", "wc_weaknesses": "122;60;46;59;18", "wc_questions": "5;23;107;114;2", "wc_limitations": "9;4;13;10;1", "wc_review": "250;174;302;345;189", "wc_reply_reviewers": "9;0;0;33;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;0;0;1;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 7.2, 0.9797958971132712 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 3.6, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 59.2, 27.036271932350438 ], "wc_strengths_avg": [ 74.2, 32.33821269025238 ], "wc_weaknesses_avg": [ 61.0, 34.058772731852805 ], "wc_questions_avg": [ 50.2, 49.80522060989189 ], "wc_limitations_avg": [ 7.4, 4.317406628984581 ], "wc_review_avg": [ 252.0, 65.12449615927942 ], "wc_reply_reviewers_avg": [ 8.4, 12.784365451597509 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.6454972243679028, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6325328246106288629&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "ncepu.edu.cn;ncepubj.edu.cn;ncepu.edu;buaa.edu.cn;uic.edu", "author_num": 5, "aff_unique_index": "0;0;0;1;2", "aff_unique_norm": "North China Electric Power University;Beihang University;University of Illinois at Chicago", "aff_unique_dep": ";;", "aff_unique_url": "http://www.ncepu.edu.cn;http://www.buaa.edu.cn/;https://www.uic.edu", "aff_unique_abbr": "NCEPU;BUAA;UIC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Chicago", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "China;United States" }, { "title": "Energy-based Hopfield Boosting for Out-of-Distribution Detection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94909", "id": "VLQYtVMTYz", "proceeding": "", "pdf": "https://openreview.net/pdf?id=VLQYtVMTYz", "openreview": "https://openreview.net/forum?id=VLQYtVMTYz", "poster": "", "project": "", "author_site": "Claus Hofmann, Simon Schmid, Bernhard Lehner, Daniel Klotz, Sepp Hochreiter", "tldr": "", "abstract": "Out-of-distribution (OOD) detection is critical when deploying machine learning models in the real world. Outlier exposure methods, which incorporate auxiliary outlier data in the training process, can drastically improve OOD detection performance compared to approaches without advanced training strategies. We introduce Hopfield Boosting, a boosting approach, which leverages modern Hopfield energy to sharpen the decision boundary between the in-distribution and OOD data. Hopfield Boosting encourages the model to focus on hard-to-distinguish auxiliary outlier examples that lie close to the decision boundary between in-distribution and auxiliary outlier data. Our method achieves a new state-of-the-art in OOD detection with outlier exposure, improving the FPR95 from 2.28 to 0.92 on CIFAR-10, from 11.76 to 7.94 on CIFAR-100, and from 50.74 to 36.60 on ImageNet-1K.", "keywords": "ood;out-of-distribution;boosting;deep learning;outlier exposure", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Claus Hofmann;Simon Lucas Schmid;Bernhard Lehner;Daniel Klotz;Sepp Hochreiter", "authorids": "~Claus_Hofmann1;~Simon_Lucas_Schmid1;~Bernhard_Lehner1;~Daniel_Klotz1;~Sepp_Hochreiter1", "gender": "M;M;M;;M", "homepage": "https://claus-hofmann.com;;https://www.researchgate.net/profile/Bernhard-Lehner-2;;https://www.jku.at/en/institute-for-machine-learning/about-us/team/sepp-hochreiter/", "dblp": ";;11/7371.html;;h/SeppHochreiter.html", "google_scholar": ";;https://scholar.google.at/citations?user=7nA0xUIAAAAJ;;https://scholar.google.at/citations?user=tvUH3WMAAAAJ", "orcid": ";0009-0007-0299-487X;0000-0002-7754-7042;;0000-0001-7449-2528", "linkedin": ";;;;https://linkedin.com/in/sepp-hochreiter-41514846", "or_profile": "~Claus_Hofmann1;~Simon_Lucas_Schmid1;~Bernhard_Lehner1;~Daniel_Klotz1;~Sepp_Hochreiter1", "aff": "Johannes Kepler Universit\u00e4t Linz;Johannes Kepler Universit\u00e4t Linz;Silicon Austria Labs;;Johannes Kepler University Linz", "aff_domain": "jku.at;jku.at;silicon-austria-labs.com;;jku.at", "position": "PhD student;PhD student;Researcher;;Full Professor", "bibtex": "@inproceedings{\nhofmann2024energybased,\ntitle={Energy-based Hopfield Boosting for Out-of-Distribution Detection},\nauthor={Claus Hofmann and Simon Lucas Schmid and Bernhard Lehner and Daniel Klotz and Sepp Hochreiter},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=VLQYtVMTYz}\n}", "github": "", "reviewers": "DQmw;qRFa;BR8F;UgW1", "pdf_size": 6251449, "rating": "6;6;7;8", "confidence": "5;4;4;4", "soundness": "2;3;3;4", "novelty": "3;2;3;4", "presentation": "3;3;4;4", "wc_summary": "56;119;121;110", "wc_strengths": "83;34;24;52", "wc_weaknesses": "232;104;39;221", "wc_questions": "16;130;148;147", "wc_limitations": "10;10;24;14", "wc_review": "397;397;356;544", "wc_reply_reviewers": "18;25;90;22", "wc_reply_authors": "0;0;28;91", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;2;2", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 101.5, 26.5941722939444 ], "wc_strengths_avg": [ 48.25, 22.431841208425134 ], "wc_weaknesses_avg": [ 149.0, 80.92898121192432 ], "wc_questions_avg": [ 110.25, 54.883399129427104 ], "wc_limitations_avg": [ 14.5, 5.722761571129799 ], "wc_review_avg": [ 423.5, 71.55592218677641 ], "wc_reply_reviewers_avg": [ 38.75, 29.693223132560064 ], "wc_reply_authors_avg": [ 29.75, 37.164331017791774 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2178899169970819721&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "email": "jku.at;jku.at;silicon-austria-labs.com;;jku.at", "author_num": 5, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Johannes Kepler University Linz;Silicon Austria Labs;Johannes Kepler University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.jku.at;https://www.silicon-austria-labs.at/;https://www.jku.at", "aff_unique_abbr": "JKU;;JKU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Linz;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Austria" }, { "title": "Latent Neural Operator for Solving Forward and Inverse PDE Problems", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94908", "id": "VLw8ZyKfcm", "proceeding": "", "pdf": "https://openreview.net/pdf?id=VLw8ZyKfcm", "openreview": "https://openreview.net/forum?id=VLw8ZyKfcm", "poster": "/media/PosterPDFs/NeurIPS%202024/94908.png?t=1730207823.8976007", "project": "", "author_site": "Tian Wang, Chuang Wang", "tldr": "", "abstract": "Neural operators effectively solve PDE problems from data without knowing the explicit equations, which learn the map from the input sequences of observed samples to the predicted values. Most existing works build the model in the original geometric space, leading to high computational costs when the number of sample points is large. We present the Latent Neural Operator (LNO) solving PDEs in the latent space. In particular, we first propose Physics-Cross-Attention (PhCA) transforming representation from the geometric space to the latent space, then learn the operator in the latent space, and finally recover the real-world geometric space via the inverse PhCA map. Our model retains flexibility that can decode values in any position not limited to locations defined in the training set, and therefore can naturally perform interpolation and extrapolation tasks particularly useful for inverse problems. Moreover, the proposed LNO improves both prediction accuracy and computational efficiency. Experiments show that LNO reduces the GPU memory by 50%, speeds up training 1.8 times, and reaches state-of-the-art accuracy on four out of six benchmarks for forward problems and a benchmark for inverse problem. Code is available at https://github.com/L-I-M-I-T/LatentNeuralOperator.", "keywords": "latent neural operator;PDE;forward and inverse problems", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "/attachment/7dd1d42f81cca5ddbaa9be44e38f66675fa54ecf.zip", "author": "Tian Wang;Chuang Wang", "authorids": "~Tian_Wang7;~Chuang_Wang2", "gender": "M;M", "homepage": "http://www.nlpr.ia.ac.cn/pal/People/WangTian.html;http://www.nlpr.ia.ac.cn/pal/People/WangChuang.html", "dblp": ";", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "~Tian_Wang7;~Chuang_Wang2", "aff": "Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences", "aff_domain": "ia.ac.cn;ia.ac.cn", "position": "MS student;Associate Professor", "bibtex": "@inproceedings{\nwang2024latent,\ntitle={Latent Neural Operator for Solving Forward and Inverse {PDE} Problems},\nauthor={Tian Wang and Chuang Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=VLw8ZyKfcm}\n}", "github": "", "reviewers": "LtWn;RkUb;WmcY;ny2e", "pdf_size": 0, "rating": "4;5;6;7", "confidence": "4;4;4;4", "soundness": "2;2;4;2", "novelty": "2;2;4;3", "presentation": "1;2;3;1", "wc_summary": "48;60;71;135", "wc_strengths": "68;19;44;178", "wc_weaknesses": "327;185;113;391", "wc_questions": "76;45;140;418", "wc_limitations": "32;12;8;37", "wc_review": "551;321;376;1159", "wc_reply_reviewers": "181;28;33;122", "wc_reply_authors": "627;0;305;315", "reply_reviewers": "2;1;1;1", "reply_authors": "2;1;2;2", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 1.75, 0.82915619758885 ], "wc_summary_avg": [ 78.5, 33.61919094802848 ], "wc_strengths_avg": [ 77.25, 60.69338926110487 ], "wc_weaknesses_avg": [ 254.0, 110.38568747804219 ], "wc_questions_avg": [ 169.75, 147.36413233891074 ], "wc_limitations_avg": [ 22.25, 12.457427503300993 ], "wc_review_avg": [ 601.75, 332.7486851965008 ], "wc_reply_reviewers_avg": [ 91.0, 64.01952827067691 ], "wc_reply_authors_avg": [ 311.75, 221.71307471594903 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10235034810328654933&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "ia.ac.cn;ia.ac.cn", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Chinese Academy of Sciences", "aff_unique_dep": "Institute of Automation", "aff_unique_url": "http://www.ia.cas.cn", "aff_unique_abbr": "CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Towards Combating Frequency Simplicity-biased Learning for Domain Generalization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94907", "id": "VMiLdBkCJM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=VMiLdBkCJM", "openreview": "https://openreview.net/forum?id=VMiLdBkCJM", "poster": "/media/PosterPDFs/NeurIPS%202024/94907.png?t=1729504558.1237357", "project": "", "author_site": "Xilin He, Jingyu Hu, Qinliang Lin, Cheng Luo, Weicheng Xie, Siyang Song, Muhammad Haris Khan, Linlin Shen", "tldr": "", "abstract": "Domain generalization methods aim to learn transferable knowledge from source domains that can generalize well to unseen target domains. \nRecent studies show that neural networks frequently suffer from a simplicity-biased learning behavior which leads to over-reliance on specific frequency sets, namely as frequency shortcuts, instead of semantic information, resulting in poor generalization performance. \nDespite previous data augmentation techniques successfully enhancing generalization performances, they intend to apply more frequency shortcuts, thereby causing hallucinations of generalization improvement.\nIn this paper, we aim to prevent such learning behavior of applying frequency shortcuts from a data-driven perspective. Given the theoretical justification of models' biased learning behavior on different spatial frequency components, which is based on the dataset frequency properties, we argue that the learning behavior on various frequency components could be manipulated by changing the dataset statistical structure in the Fourier domain. \nIntuitively, as frequency shortcuts are hidden in the dominant and highly dependent frequencies of dataset structure, dynamically perturbating the over-reliance frequency components could prevent the application of frequency shortcuts.\nTo this end, we propose two effective data augmentation modules designed to collaboratively and adaptively adjust the frequency characteristic of the dataset, aiming to dynamically influence the learning behavior of the model and ultimately serving as a strategy to mitigate shortcut learning. Our code will be made publicly available.", "keywords": "Frequency Shortcut;Domain Generalization", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Xilin He;Jingyu Hu;Qinliang Lin;Cheng Luo;Weicheng Xie;Siyang Song;Muhammad Haris Khan;Linlin Shen", "authorids": "~Xilin_He1;~Jingyu_Hu2;~Qinliang_Lin2;~Cheng_Luo4;~Weicheng_Xie1;~Siyang_Song1;~Muhammad_Haris_Khan3;~Linlin_Shen1", "gender": "M;;M;M;M;M;M;M", "homepage": ";https://github.com/MeteorDream;https://chengluo.cc/;https://wcxie.github.io/Weicheng-Xie/;https://www.cst.cam.ac.uk/people/ss2796;https://m-haris-khan.com;https://csse.szu.edu.cn/pages/user/index?id=594;https://github.com/LinQinLiang", "dblp": "366/0988;;68/6443-2;28/6098-1;220/3096.html;155/3076;88/5607;222/7821", "google_scholar": "LRdrMfoAAAAJ;;h0IdnhUAAAAJ;https://scholar.google.co.uk/citations?user=S2uh8OIAAAAJ;ZKSL1IcAAAAJ;ZgERfFwAAAAJ;https://scholar.google.com.hk/citations?user=AZ_y9HgAAAAJ;", "orcid": ";;0000-0002-8034-9450;0000-0001-8946-7472;0000-0003-2339-5685;0000-0001-9746-276X;0000-0003-1420-0815;", "linkedin": ";;;;siyang-song-7a814412b/;muhammad-haris-khan-1516714b/;;", "or_profile": "~Xilin_He1;~Jingyu_Hu2;~Cheng_Luo4;~Weicheng_Xie1;~Siyang_Song1;~Muhammad_Haris_Khan3;~Linlin_Shen1;~QINLIANG_LIN1", "aff": "Shenzhen University;Shenzhen University;Monash University;Shenzhen University;University of Leicester;Mohamed Bin Zayed University of Artificial Intelligence;Shenzhen University;Shenzhen University", "aff_domain": "szu.edu.cn;szu.edu.cn;monash.edu;szu.edu.cn;leicester.ac.uk;mbzuai.ac.ae;szu.edu.cn;szu.edu.cn", "position": "Undergrad student;MS student;PhD student;Associate Professor;Assistant Professor;Assistant Professor;Full Professor;MS student", "bibtex": "@inproceedings{\nhe2024towards,\ntitle={Towards Combating Frequency Simplicity-biased Learning for Domain Generalization},\nauthor={Xilin He and Jingyu Hu and Qinliang Lin and Cheng Luo and Weicheng Xie and Siyang Song and Muhammad Haris Khan and Linlin Shen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=VMiLdBkCJM}\n}", "github": "", "reviewers": "szDc;57HS;jMvT;uwHe", "pdf_size": 11687067, "rating": "5;5;5;6", "confidence": "3;3;4;4", "soundness": "2;2;2;2", "novelty": "3;3;2;3", "presentation": "2;2;1;3", "wc_summary": "83;66;86;74", "wc_strengths": "62;139;20;54", "wc_weaknesses": "673;66;68;170", "wc_questions": "128;35;140;4", "wc_limitations": "16;41;7;4", "wc_review": "962;347;321;306", "wc_reply_reviewers": "1112;21;483;0", "wc_reply_authors": "1821;0;286;0", "reply_reviewers": "4;1;2;0", "reply_authors": "6;1;3;1", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 77.25, 7.854139036202504 ], "wc_strengths_avg": [ 68.75, 43.51651985166093 ], "wc_weaknesses_avg": [ 244.25, 251.08601613789645 ], "wc_questions_avg": [ 76.75, 58.443883341201754 ], "wc_limitations_avg": [ 17.0, 14.543039572248986 ], "wc_review_avg": [ 484.0, 276.3629859442107 ], "wc_reply_reviewers_avg": [ 404.0, 452.05364725881816 ], "wc_reply_authors_avg": [ 526.75, 756.3026427958586 ], "reply_reviewers_avg": [ 1.75, 1.479019945774904 ], "reply_authors_avg": [ 2.75, 2.0463381929681126 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5938721547497838997&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "szu.edu.cn;szu.edu.cn;monash.edu;szu.edu.cn;leicester.ac.uk;mbzuai.ac.ae;szu.edu.cn;szu.edu.cn", "author_num": 8, "aff_unique_index": "0;0;1;0;2;3;0;0", "aff_unique_norm": "Shenzhen University;Monash University;University of Leicester;Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.szu.edu.cn;https://www.monash.edu;https://www.leicester.ac.uk;https://www.mbzuai.ac.ae", "aff_unique_abbr": "SZU;Monash;Leicester;MBZUAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;2;3;0;0", "aff_country_unique": "China;Australia;United Kingdom;United Arab Emirates" }, { "title": "Learning Better Representations From Less Data For Propositional Satisfiability", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94906", "id": "VMsHnv8cVs", "proceeding": "", "pdf": "https://openreview.net/pdf?id=VMsHnv8cVs", "openreview": "https://openreview.net/forum?id=VMsHnv8cVs", "poster": "/media/PosterPDFs/NeurIPS%202024/94906.png?t=1733499554.262766", "project": "", "author_site": "Mohamed Ghanem, Frederik Schmitt, Julian Siber, Bernd Finkbeiner", "tldr": "", "abstract": "Training neural networks on NP-complete problems typically demands very large amounts of training data and often needs to be coupled with computationally expensive symbolic verifiers to ensure output correctness. In this paper, we present NeuRes, a neuro-symbolic approach to address both challenges for propositional satisfiability, being the quintessential NP-complete problem. By combining certificate-driven training and expert iteration, our model learns better representations than models trained for classification only, with a much higher data efficiency -- requiring orders of magnitude less training data. NeuRes employs propositional resolution as a proof system to generate proofs of unsatisfiability and to accelerate the process of finding satisfying truth assignments, exploring both possibilities in parallel. To realize this, we propose an attention-based architecture that autoregressively selects pairs of clauses from a dynamic formula embedding to derive new clauses. Furthermore, we employ expert iteration whereby model-generated proofs progressively replace longer teacher proofs as the new ground truth. This enables our model to reduce a dataset of proofs generated by an advanced solver by $\\sim$$32$% after training on it with no extra guidance. This shows that NeuRes is not limited by the optimality of the teacher algorithm owing to its self-improving workflow. We show that our model achieves far better performance than NeuroSAT in terms of both correctly classified and proven instances.", "keywords": "Neuro-symbolic;Propositional Logic;Resolution;Attention;Deep Learning;Graph Neural Networks;Expert Iteration", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Mohamed Ghanem;Frederik Schmitt;Julian Siber;Bernd Finkbeiner", "authorids": "~Mohamed_Ghanem1;~Frederik_Schmitt1;~Julian_Siber1;~Bernd_Finkbeiner1", "gender": "M;M;M;M", "homepage": "https://www.react.uni-saarland.de/people/schmitt.html;https://cispa.de/en/people/c01jusi;https://www.react.uni-saarland.de/people/finkbeiner.html;", "dblp": "245/0350;293/9557;https://dblp.uni-trier.de/pid/73/4443.html;", "google_scholar": ";-o-djgkAAAAJ;https://scholar.google.de/citations?hl=de;yPs4CdMAAAAJ", "orcid": ";;0000-0002-4280-8441;0000-0002-2657-4032", "linkedin": "frederik-schmitt-282814172/;;;maghanem/", "or_profile": "~Frederik_Schmitt1;~Julian_Siber1;~Bernd_Finkbeiner1;~Mohamed_Abdelhamid_Ghanem1", "aff": "CISPA Helmholtz Center for Information Security;CISPA Helmholtz Center for Information Security;Saarland University;CISPA Helmholtz Center for Information Security", "aff_domain": "cispa.saarland;cispa.de;uni-saarland.de;cispa.de", "position": "PhD student;PhD student;Full Professor;PhD student", "bibtex": "@inproceedings{\nghanem2024learning,\ntitle={Learning Better Representations From Less Data For Propositional Satisfiability},\nauthor={Mohamed Ghanem and Frederik Schmitt and Julian Siber and Bernd Finkbeiner},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=VMsHnv8cVs}\n}", "github": "", "reviewers": "MVHU;hPQb;64Vk;tfEq;R9jd", "pdf_size": 744061, "rating": "4;6;6;7;8", "confidence": "4;2;2;3;4", "soundness": "3;3;3;3;3", "novelty": "3;3;3;3;3", "presentation": "1;3;3;3;4", "wc_summary": "26;82;149;67;53", "wc_strengths": "77;70;78;67;142", "wc_weaknesses": "197;74;30;62;28", "wc_questions": "28;38;63;171;28", "wc_limitations": "22;13;18;7;13", "wc_review": "350;277;338;374;264", "wc_reply_reviewers": "757;0;0;18;217", "wc_reply_authors": "758;0;0;40;134", "reply_reviewers": "2;0;0;1;2", "reply_authors": "2;1;1;2;2", "rating_avg": [ 6.2, 1.32664991614216 ], "confidence_avg": [ 3.0, 0.8944271909999159 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.8, 0.9797958971132712 ], "wc_summary_avg": [ 75.4, 41.16600539279953 ], "wc_strengths_avg": [ 86.8, 27.909854890342945 ], "wc_weaknesses_avg": [ 78.2, 62.0270908555286 ], "wc_questions_avg": [ 65.6, 54.23135624341327 ], "wc_limitations_avg": [ 14.6, 5.083306010855534 ], "wc_review_avg": [ 320.6, 42.71580503747998 ], "wc_reply_reviewers_avg": [ 198.4, 291.08390542934524 ], "wc_reply_authors_avg": [ 186.4, 289.96385981704685 ], "reply_reviewers_avg": [ 1.0, 0.8944271909999159 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:0AoA3E9S8tcJ:scholar.google.com/&scioq=Learning+Better+Representations+From+Less+Data+For+Propositional+Satisfiability&hl=en&as_sdt=0,31", "gs_version_total": 4, "email": "cispa.saarland;cispa.de;uni-saarland.de;cispa.de", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "CISPA Helmholtz Center for Information Security;Saarland University", "aff_unique_dep": ";", "aff_unique_url": "https://www.cispa.de/;https://www.uni-saarland.de", "aff_unique_abbr": "CISPA;UdS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "title": "Autoregressive Image Generation without Vector Quantization", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94905", "id": "VNBIF0gmkb", "proceeding": "", "pdf": "https://openreview.net/pdf?id=VNBIF0gmkb", "openreview": "https://openreview.net/forum?id=VNBIF0gmkb", "poster": "/media/PosterPDFs/NeurIPS%202024/94905.png?t=1733849767.9008894", "project": "", "author_site": "Tianhong Li, Yonglong Tian, He Li, Mingyang Deng, Kaiming He", "tldr": "", "abstract": "Conventional wisdom holds that autoregressive models for image generation are typically accompanied by vector-quantized tokens. We observe that while a discrete-valued space can facilitate representing a categorical distribution, it is not a necessity for autoregressive modeling. In this work, we propose to model the per-token probability distribution using a diffusion procedure, which allows us to apply autoregressive models in a continuous-valued space. Rather than using categorical cross-entropy loss, we define a Diffusion Loss function to model the per-token probability. This approach eliminates the need for discrete-valued tokenizers. We evaluate its effectiveness across a wide range of cases, including standard autoregressive models and generalized masked autoregressive (MAR) variants. By removing vector quantization, our image generator achieves strong results while enjoying the speed advantage of sequence modeling. We hope this work will motivate the use of autoregressive generation in other continuous-valued domains and applications. Code is available at [https://github.com/LTH14/mar](https://github.com/LTH14/mar).", "keywords": "Image Generation;Autoregressive Models;Diffusion Models", "primary_area": "generative_models", "supplementary_material": "", "author": "Tianhong Li;Yonglong Tian;He Li;Mingyang Deng;Kaiming He", "authorids": "~Tianhong_Li3;~Yonglong_Tian1;~He_Li13;~Mingyang_Deng1;~Kaiming_He2", "gender": "M;;M;M;", "homepage": "http://www.tianhongli.me/;http://people.csail.mit.edu/yonglong/;https://lihe50hz.github.io;https://lambertae.github.io/;", "dblp": "195/5632;151/6328;;;", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com.hk/citations?user=OsP7JHAAAAAJ;oxO7a1IAAAAJ;https://scholar.google.com/citations?hl=zh-CN;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Tianhong_Li3;~Yonglong_Tian1;~He_Li13;~Mingyang_Deng1;~Kaiming_He2", "aff": "Massachusetts Institute of Technology;Google;Tsinghua University;Massachusetts Institute of Technology;", "aff_domain": "mit.edu;google.com;mails.tsinghua.edu.cn;mit.edu;", "position": "PhD student;Researcher;Undergrad student;PhD student;", "bibtex": "@inproceedings{\nli2024autoregressive,\ntitle={Autoregressive Image Generation without Vector Quantization},\nauthor={Tianhong Li and Yonglong Tian and He Li and Mingyang Deng and Kaiming He},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=VNBIF0gmkb}\n}", "github": "", "reviewers": "a8ib;oAwj;V3y3;mV98;xuXX", "pdf_size": 5859691, "rating": "3;7;7;8;8", "confidence": "4;4;5;4;5", "soundness": "2;4;4;3;3", "novelty": "2;3;3;3;3", "presentation": "2;3;4;4;3", "wc_summary": "87;106;157;74;125", "wc_strengths": "33;39;154;220;176", "wc_weaknesses": "304;130;148;274;65", "wc_questions": "96;56;157;14;218", "wc_limitations": "9;1;1;8;18", "wc_review": "529;332;617;590;602", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;0;0;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.6, 1.8547236990991407 ], "confidence_avg": [ 4.4, 0.4898979485566356 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 109.8, 29.239699040858817 ], "wc_strengths_avg": [ 124.4, 75.26645999381131 ], "wc_weaknesses_avg": [ 184.2, 90.41327336182448 ], "wc_questions_avg": [ 108.2, 72.36684323638832 ], "wc_limitations_avg": [ 7.4, 6.280127387243033 ], "wc_review_avg": [ 534.0, 105.33565398287514 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.3962029078465308, "gs_citation": 162, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10763223618533088485&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 5, "email": "mit.edu;google.com;mails.tsinghua.edu.cn;mit.edu;", "author_num": 5, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Massachusetts Institute of Technology;Google;Tsinghua University", "aff_unique_dep": ";Google;", "aff_unique_url": "https://web.mit.edu;https://www.google.com;https://www.tsinghua.edu.cn", "aff_unique_abbr": "MIT;Google;THU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;China" }, { "title": "CoVoMix: Advancing Zero-Shot Speech Generation for Human-like Multi-talker Conversations", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94904", "id": "VNbQbv658b", "proceeding": "", "pdf": "https://openreview.net/pdf?id=VNbQbv658b", "openreview": "https://openreview.net/forum?id=VNbQbv658b", "poster": "/media/PosterPDFs/NeurIPS%202024/94904.png?t=1731419435.4807243", "project": "", "author_site": "Leying Zhang, Yao Qian, Long Zhou, Shujie LIU, Dongmei Wang, Xiaofei Wang, Midia Yousefi, Yanmin Qian, Jinyu Li, Lei He, sheng zhao, Michael Zeng", "tldr": "", "abstract": "Recent advancements in zero-shot text-to-speech (TTS) modeling have led to significant strides in generating high-fidelity and diverse speech. However, dialogue generation, along with achieving human-like naturalness in speech, continues to be a challenge. In this paper, we introduce CoVoMix: Conversational Voice Mixture Generation, a novel model for zero-shot, human-like, multi-speaker, multi-round dialogue speech generation. CoVoMix first converts dialogue text into multiple streams of discrete tokens, with each token stream representing semantic information for individual talkers. These token streams are then fed into a flow-matching based acoustic model to generate mixed mel-spectrograms. Finally, the speech waveforms are produced using a HiFi-GAN model. Furthermore, we devise a comprehensive set of metrics for measuring the effectiveness of dialogue modeling and generation. Our experimental results show that CoVoMix can generate dialogues that are not only human-like in their naturalness and coherence but also involve multiple talkers engaging in multiple rounds of conversation. This is exemplified by instances generated in a single channel where one speaker's utterance is seamlessly mixed with another's interjections or laughter, indicating the latter's role as an attentive listener. Audio samples are enclosed in the supplementary.", "keywords": "text-to-speech;dialogue generation;zero-shot", "primary_area": "speech_and_audio", "supplementary_material": "/attachment/bd21d07eafad2721f131b24e833a028081af4ade.zip", "author": "leying zhang;Yao Qian;Long Zhou;Shujie LIU;Dongmei Wang;Xiaofei Wang;Midia Yousefi;Yanmin Qian;Jinyu Li;Lei He;sheng zhao;Michael Zeng", "authorids": "~leying_zhang2;~Yao_Qian2;~Long_Zhou2;~Shujie_LIU1;~Dongmei_Wang3;~Xiaofei_Wang9;~Midia_Yousefi1;~Yanmin_Qian1;~Jinyu_Li1;~Lei_He6;~sheng_zhao1;~Michael_Zeng1", "gender": "F;F;;M;F;M;;M;M;M;M;M", "homepage": ";https://www.microsoft.com/en-us/research/people/yaoqian/;;https://www.microsoft.com/en-us/research/people/shujliu/;https://www.microsoft.com/en-us/research/people/dowan/;https://www.microsoft.com/en-us/research/people/xiaofewa/;;https://x-lance.sjtu.edu.cn/en/members/yanmin-qian;https://www.microsoft.com/en-us/research/people/jinyli;;https://www.aaai.org/ojs/index.php/AAAI/article/view/4642;https://www.microsoft.com/en-us/research/people/nzeng/", "dblp": "278/7751;;;;65/4883;58/6576-9;;07/8638;87/4873-1;;;232/1866-1.html", "google_scholar": "Futd_gYAAAAJ;o7OfErXuEJIC;ZnwgSXIAAAAJ;6mNya-wAAAAJ;DB1bNC0AAAAJ;pZkELMoAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;grUvupMAAAAJ;EKl9yY8AAAAJ;689bIIwAAAAJ;", "orcid": ";;;0009-0008-2599-6752;;;;;0000-0002-1089-9748;;;", "linkedin": "leying-zhang-7b48401a5/;;;;dongmei-wang-99b20637/;;midia-yousefi-22b94190?utm_source=share&utm_campaign=share_via&utm_content=profile&utm_medium=ios_app;;;;;michaelnanshanzeng/", "or_profile": "~leying_zhang2;~Yao_Qian2;~Long_Zhou2;~Shujie_LIU1;~Dongmei_Wang3;~Xiaofei_Wang9;~Midia_Yousefi1;~Yanmin_Qian1;~Jinyu_Li1;~Lei_He6;~sheng_zhao1;~Michael_Zeng1", "aff": "Microsoft;Microsoft;Microsoft Research Asia;Microsoft;Microsoft;Microsoft;Microsoft;Shanghai Jiaotong University;Microsoft;Microsoft;Microsoft;Microsoft", "aff_domain": "microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com;sjtu.edu.cn;microsoft.com;microsoft.com;microsoft.com;microsoft.com", "position": "Intern;Principal Researcher;Researcher;Researcher;Researcher;Researcher;Senior Research Scientist;Full Professor;Researcher;Principal Scientist Manager;Researcher;Vice President Research Manager", "bibtex": "@inproceedings{\nzhang2024covomix,\ntitle={CoVoMix: Advancing Zero-Shot Speech Generation for Human-like Multi-talker Conversations},\nauthor={leying zhang and Yao Qian and Long Zhou and Shujie LIU and Dongmei Wang and Xiaofei Wang and Midia Yousefi and Yanmin Qian and Jinyu Li and Lei He and sheng zhao and Michael Zeng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=VNbQbv658b}\n}", "github": "", "reviewers": "LEoq;yp1s;XEst", "pdf_size": 1624178, "rating": "5;5;6", "confidence": "5;5;3", "soundness": "3;3;3", "novelty": "3;2;3", "presentation": "3;3;3", "wc_summary": "66;63;74", "wc_strengths": "64;70;69", "wc_weaknesses": "83;264;151", "wc_questions": "6;141;84", "wc_limitations": "22;14;36", "wc_review": "241;552;414", "wc_reply_reviewers": "139;79;11", "wc_reply_authors": "288;271;32", "reply_reviewers": "2;2;1", "reply_authors": "3;4;2", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.333333333333333, 0.9428090415820634 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 67.66666666666667, 4.642796092394706 ], "wc_strengths_avg": [ 67.66666666666667, 2.6246692913372702 ], "wc_weaknesses_avg": [ 166.0, 74.65029582437478 ], "wc_questions_avg": [ 77.0, 55.33534132902769 ], "wc_limitations_avg": [ 24.0, 9.092121131323903 ], "wc_review_avg": [ 402.3333333333333, 127.23294472039159 ], "wc_reply_reviewers_avg": [ 76.33333333333333, 52.28979080300687 ], "wc_reply_authors_avg": [ 197.0, 116.8788546601423 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.4714045207910317 ], "reply_authors_avg": [ 3.0, 0.816496580927726 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": -0.9999999999999997, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=878423220016171806&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com;sjtu.edu.cn;microsoft.com;microsoft.com;microsoft.com;microsoft.com", "author_num": 12, "aff_unique_index": "0;0;0;0;0;0;0;1;0;0;0;0", "aff_unique_norm": "Microsoft;Shanghai Jiao Tong University", "aff_unique_dep": "Microsoft Corporation;", "aff_unique_url": "https://www.microsoft.com;https://www.sjtu.edu.cn", "aff_unique_abbr": "Microsoft;SJTU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;0;1;0;0;0;0;1;0;0;0;0", "aff_country_unique": "United States;China" }, { "title": "The Bayesian sampling in a canonical recurrent circuit with a diversity of inhibitory interneurons", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94903", "id": "VNmi0FHn6Z", "proceeding": "", "pdf": "https://openreview.net/pdf?id=VNmi0FHn6Z", "openreview": "https://openreview.net/forum?id=VNmi0FHn6Z", "poster": "", "project": "", "author_site": "Eryn Sale, Wenhao Zhang", "tldr": "", "abstract": "Accumulating evidence suggests stochastic cortical circuits can perform sampling-based Bayesian inference to compute the latent stimulus posterior. Canonical cortical circuits consist of excitatory (E) neurons and types of inhibitory (I) interneurons. Nevertheless, nearly no sampling neural circuit models consider the diversity of interneurons, and thus how interneurons contribute to sampling remains poorly understood. To provide theoretical insight, we build a nonlinear canonical circuit model consisting of recurrently connected E neurons and two types of I neurons including Parvalbumin (PV) and Somatostatin (SOM) neurons. The E neurons are modeled as a canonical ring (attractor) model, receiving global inhibition from PV neurons, and locally tuning-dependent inhibition from SOM neurons.\nWe theoretically analyze the nonlinear circuit dynamics and analytically identify the Bayesian sampling algorithm performed by the circuit dynamics. We found a reduced circuit with only E and PV neurons performs Langevin sampling, and the inclusion of SOM neurons with tuning-dependent inhibition speeds up the sampling via upgrading the Langevin into Hamiltonian sampling. Moreover, the Hamiltonian framework requires SOM neurons to receive no direct feedforward connections, consistent with neuroanatomy. Our work provides overarching connections between nonlinear circuits with various types of interneurons and sampling algorithms, deepening our understanding of circuit implementation of Bayesian inference.", "keywords": "Sampling-based Bayesian Inference;recurrent circuit model;ring attractor network;inhibitory interneurons", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "/attachment/62dff33d709f1e7717b1aa77ab914440eaf6e5b1.zip", "author": "Eryn Sale;Wenhao Zhang", "authorids": "~Eryn_Sale1;~Wenhao_Zhang3", "gender": "F;M", "homepage": ";https://www.zhang-cnl.org/", "dblp": "396/8713;57/7458-2", "google_scholar": ";TqGPd9QAAAAJ", "orcid": "0000-0002-2414-0353;0000-0001-7641-5024", "linkedin": ";", "or_profile": "~Eryn_Sale1;~Wenhao_Zhang3", "aff": "University of Texas Southwestern Medical Center;University of Texas Southwestern Medical Center", "aff_domain": "utsouthwestern.edu;utsouthwestern.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nsale2024the,\ntitle={The Bayesian sampling in a canonical recurrent circuit with a diversity of inhibitory interneurons},\nauthor={Eryn Sale and Wenhao Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=VNmi0FHn6Z}\n}", "github": "", "reviewers": "K6Di;6oXx;NhnW;RifV", "pdf_size": 969522, "rating": "5;5;6;7", "confidence": "3;3;2;4", "soundness": "3;3;3;3", "novelty": "2;2;2;2", "presentation": "2;3;2;4", "wc_summary": "88;44;97;40", "wc_strengths": "62;57;13;25", "wc_weaknesses": "224;69;46;173", "wc_questions": "186;41;27;23", "wc_limitations": "4;4;50;2", "wc_review": "564;215;233;263", "wc_reply_reviewers": "100;21;106;570", "wc_reply_authors": "636;21;371;2255", "reply_reviewers": "2;1;1;6", "reply_authors": "2;2;2;7", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 67.25, 25.488968201949643 ], "wc_strengths_avg": [ 39.25, 20.765054779605084 ], "wc_weaknesses_avg": [ 128.0, 73.22226437361795 ], "wc_questions_avg": [ 69.25, 67.73616094819664 ], "wc_limitations_avg": [ 15.0, 20.223748416156685 ], "wc_review_avg": [ 318.75, 142.62954637802085 ], "wc_reply_reviewers_avg": [ 199.25, 216.66491986475336 ], "wc_reply_authors_avg": [ 820.75, 856.3119685605241 ], "reply_reviewers_avg": [ 2.5, 2.0615528128088303 ], "reply_authors_avg": [ 3.25, 2.165063509461097 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.42640143271122083, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17258664356723372150&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 0, "email": "utsouthwestern.edu;utsouthwestern.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Texas Southwestern Medical Center", "aff_unique_dep": "", "aff_unique_url": "https://www.utsouthwestern.edu", "aff_unique_abbr": "UT Southwestern", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Weak Supervision Performance Evaluation via Partial Identification", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94902", "id": "VOVyeOzZx0", "proceeding": "", "pdf": "https://openreview.net/pdf?id=VOVyeOzZx0", "openreview": "https://openreview.net/forum?id=VOVyeOzZx0", "poster": "", "project": "", "author_site": "Felipe Maia Polo, Subha Maity, Mikhail Yurochkin, Moulinath Banerjee, Yuekai Sun", "tldr": "", "abstract": "Programmatic Weak Supervision (PWS) enables supervised model training without direct access to ground truth labels, utilizing weak labels from heuristics, crowdsourcing, or pre-trained models. However, the absence of ground truth complicates model evaluation, as traditional metrics such as accuracy, precision, and recall cannot be directly calculated. In this work, we present a novel method to address this challenge by framing model evaluation as a partial identification problem and estimating performance bounds using Fr\u00e9chet bounds. Our approach derives reliable bounds on key metrics without requiring labeled data, overcoming core limitations in current weak supervision evaluation techniques. Through scalable convex optimization, we obtain accurate and computationally efficient bounds for metrics including accuracy, precision, recall, and F1-score, even in high-dimensional settings. This framework offers a robust approach to assessing model quality without ground truth labels, enhancing the practicality of weakly supervised learning for real-world applications.", "keywords": "weak supervision;evaluation;Frechet bounds;partial identification", "primary_area": "evaluation", "supplementary_material": "", "author": "Felipe Maia Polo;Subha Maity;Mikhail Yurochkin;Moulinath Banerjee;Yuekai Sun", "authorids": "~Felipe_Maia_Polo1;~Subha_Maity1;~Mikhail_Yurochkin1;~Moulinath_Banerjee1;~Yuekai_Sun1", "gender": "M;M;M;M;", "homepage": "https://felipemaiapolo.github.io/;https://lsa.umich.edu/stats/people/phd-students/smaity.html;https://moonfolk.github.io/;https://lsa.umich.edu/stats/people/faculty/moulib.html;https://yuekai.github.io/", "dblp": "261/9581;278/2922;191/6719;;", "google_scholar": "CJbgmnkAAAAJ;eD9vCGMAAAAJ;QjBF9sUAAAAJ;;6T1XtW8AAAAJ", "orcid": "0000-0002-4950-2795;;;;", "linkedin": ";;mikhail-yurochkin-a45659114/;;", "or_profile": "~Felipe_Maia_Polo1;~Subha_Maity1;~Mikhail_Yurochkin1;~Moulinath_Banerjee1;~Yuekai_Sun1", "aff": "University of Michigan - Ann Arbor;University of Waterloo;IBM Research;University of Michigan - Ann Arbor;University of Michigan - Ann Arbor", "aff_domain": "umich.edu;uwaterloo.ca;ibm.com;umich.edu;umich.edu", "position": "PhD student;Assistant Professor;Researcher;Full Professor;Assistant \u2192 Associate Professor of Statistics", "bibtex": "@inproceedings{\npolo2024weak,\ntitle={Weak Supervision Performance Evaluation via Partial Identification},\nauthor={Felipe Maia Polo and Subha Maity and Mikhail Yurochkin and Moulinath Banerjee and Yuekai Sun},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=VOVyeOzZx0}\n}", "github": "", "reviewers": "MRE8;R1rQ;bCgS;zvj9", "pdf_size": 2554735, "rating": "5;6;6;8", "confidence": "4;4;3;4", "soundness": "3;4;3;4", "novelty": "3;3;2;4", "presentation": "1;4;3;4", "wc_summary": "121;92;129;96", "wc_strengths": "48;82;41;86", "wc_weaknesses": "116;102;53;140", "wc_questions": "3;106;89;3", "wc_limitations": "7;6;45;9", "wc_review": "295;388;357;334", "wc_reply_reviewers": "27;30;146;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 1.224744871391589 ], "wc_summary_avg": [ 109.5, 15.819292019556375 ], "wc_strengths_avg": [ 64.25, 19.954636052807377 ], "wc_weaknesses_avg": [ 102.75, 31.77558024647229 ], "wc_questions_avg": [ 50.25, 47.63074112377426 ], "wc_limitations_avg": [ 16.75, 16.345871038277526 ], "wc_review_avg": [ 343.5, 33.93007515464709 ], "wc_reply_reviewers_avg": [ 50.75, 56.21999199573049 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.13245323570650439, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:jGU6uQaOb84J:scholar.google.com/&scioq=Weak+Supervision+Performance+Evaluation+via+Partial+Identification&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "umich.edu;uwaterloo.ca;ibm.com;umich.edu;umich.edu", "author_num": 5, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "University of Michigan;University of Waterloo;IBM", "aff_unique_dep": ";;IBM Research", "aff_unique_url": "https://www.umich.edu;https://uwaterloo.ca;https://www.ibm.com/research", "aff_unique_abbr": "UM;UW;IBM", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Ann Arbor;", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "United States;Canada" }, { "title": "PrivCirNet: Efficient Private Inference via Block Circulant Transformation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94901", "id": "VPSx3n6ICE", "proceeding": "", "pdf": "https://openreview.net/pdf?id=VPSx3n6ICE", "openreview": "https://openreview.net/forum?id=VPSx3n6ICE", "poster": "", "project": "", "author_site": "Tianshi Xu, Lemeng Wu, Runsheng Wang, Meng Li", "tldr": "", "abstract": "Homomorphic encryption (HE)-based deep neural network (DNN) inference protects data and model privacy but suffers from significant computation overhead. We observe transforming the DNN weights into circulant matrices converts general matrix-vector multiplications into HE-friendly 1-dimensional convolutions, drastically reducing the HE computation cost. Hence, in this paper, we propose PrivCirNet, a protocol/network co-optimization framework based on block circulant transformation. At the protocol level, PrivCirNet customizes the HE encoding algorithm that is fully compatible with the block circulant transformation and reduces the computation latency in proportion to the block size. At the network level, we propose a latency-aware formulation to search for the layer-wise block size assignment based on second-order information. PrivCirNet also leverages layer fusion to further reduce the inference cost. We compare PrivCirNet with the state-of-the-art HE-based framework Bolt (IEEE S\\&P 2024) and HE-friendly pruning method SpENCNN (ICML 2023). For ResNet-18 and Vision Transformer (ViT) on Tiny ImageNet, PrivCirNet reduces latency by $5.0\\times$ and $1.3\\times$ with iso-accuracy over Bolt, respectively, and improves accuracy by $4.1$\\% and $12$\\% over SpENCNN, respectively. For MobileNetV2 on ImageNet, PrivCirNet achieves $1.7\\times$ lower latency and $4.2$\\% better accuracy over Bolt and SpENCNN, respectively. Our code and checkpoints are available on Git Hub.", "keywords": "Privacy-preserving deep learning;Homomorphic encryption;Circulant matrices", "primary_area": "privacy", "supplementary_material": "/attachment/727d4048c96c0afcac4a66c7cdd4a767f207f863.zip", "author": "Tianshi Xu;Lemeng Wu;Runsheng Wang;Meng Li", "authorids": "~Tianshi_Xu1;~Lemeng_Wu1;~Runsheng_Wang3;~Meng_Li1", "gender": "M;M;M;M", "homepage": "https://www.linkedin.com/in/tianshi-xu-9bb21728b/;https://sites.google.com/utexas.edu/wlm/home?authuser=1;;https://mengli.me", "dblp": "163/8470;232/3021;;70/1726-4", "google_scholar": "YS2vV5IAAAAJ;https://scholar.google.ca/citations?user=PCDSl2sAAAAJ;TZ_39qQAAAAJ;lvdRkEkAAAAJ", "orcid": "0009-0001-7832-828X;;;", "linkedin": "tianshi-xu-9bb21728b/;;;", "or_profile": "~Tianshi_Xu1;~Lemeng_Wu1;~Runsheng_Wang3;~Meng_Li1", "aff": "Peking University;University of Texas, Austin;Peking University;Peking University", "aff_domain": "stu.pku.edu.cn;cs.utexas.edu;pku.edu.cn;pku.edu.cn", "position": "PhD student;PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nxu2024privcirnet,\ntitle={PrivCirNet: Efficient Private Inference via Block Circulant Transformation},\nauthor={Tianshi Xu and Lemeng Wu and Runsheng Wang and Meng Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=VPSx3n6ICE}\n}", "github": "", "reviewers": "mVR6;wNvj;eLA6;eRSG", "pdf_size": 0, "rating": "5;6;6;6", "confidence": "4;4;4;4", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "2;2;3;4", "wc_summary": "16;63;142;56", "wc_strengths": "40;35;74;76", "wc_weaknesses": "29;28;67;217", "wc_questions": "103;249;22;152", "wc_limitations": "5;7;3;4", "wc_review": "193;382;308;505", "wc_reply_reviewers": "89;14;0;297", "wc_reply_authors": "193;0;0;449", "reply_reviewers": "1;1;0;2", "reply_authors": "3;1;1;3", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 69.25, 45.669327781345764 ], "wc_strengths_avg": [ 56.25, 18.846418757949746 ], "wc_weaknesses_avg": [ 85.25, 77.67359589976506 ], "wc_questions_avg": [ 131.5, 82.20249387944382 ], "wc_limitations_avg": [ 4.75, 1.479019945774904 ], "wc_review_avg": [ 347.0, 113.38650713378554 ], "wc_reply_reviewers_avg": [ 100.0, 118.66549624890969 ], "wc_reply_authors_avg": [ 160.5, 184.26136328595857 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 1.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6748554130634408145&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "stu.pku.edu.cn;cs.utexas.edu;pku.edu.cn;pku.edu.cn", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Peking University;University of Texas at Austin", "aff_unique_dep": ";", "aff_unique_url": "http://www.pku.edu.cn;https://www.utexas.edu", "aff_unique_abbr": "Peking U;UT Austin", "aff_campus_unique_index": "1", "aff_campus_unique": ";Austin", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "China;United States" }, { "title": "A Prompt-Based Knowledge Graph Foundation Model for Universal In-Context Reasoning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94900", "id": "VQyb9LKmUH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=VQyb9LKmUH", "openreview": "https://openreview.net/forum?id=VQyb9LKmUH", "poster": "/media/PosterPDFs/NeurIPS%202024/94900.png?t=1731497432.1731205", "project": "", "author_site": "Yuanning Cui, Zequn Sun, Wei Hu", "tldr": "", "abstract": "Extensive knowledge graphs (KGs) have been constructed to facilitate knowledge-driven tasks across various scenarios. However, existing work usually develops separate reasoning models for different KGs, lacking the ability to generalize and transfer knowledge across diverse KGs and reasoning settings. In this paper, we propose a prompt-based KG foundation model via in-context learning, namely KG-ICL, to achieve a universal reasoning ability. Specifically, we introduce a prompt graph centered with a query-related example fact as context to understand the query relation. To encode prompt graphs with the generalization ability to unseen entities and relations in queries, we first propose a unified tokenizer that maps entities and relations in prompt graphs to predefined tokens. Then, we propose two message passing neural networks to perform prompt encoding and KG reasoning, respectively. We conduct evaluation on 43 different KGs in both transductive and inductive settings. Results indicate that the proposed KG-ICL outperforms baselines on most datasets, showcasing its outstanding generalization and universal reasoning capabilities. The source code is accessible on GitHub: https://github.com/nju-websoft/KG-ICL.", "keywords": "knowledge graph; link prediction; in-context learning; prompt graph; graph neural network;foundation model", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Yuanning Cui;Zequn Sun;Wei Hu", "authorids": "~Yuanning_Cui3;~Zequn_Sun1;~Wei_Hu7", "gender": "M;M;M", "homepage": "https://sunzequn.github.io;http://ws.nju.edu.cn/~whu;https://scholar.google.com/citations?user=Cv37YxgAAAAJ&hl=zh-CN", "dblp": "186/9718;https://dblp.uni-trier.de/pid/52/173-7;291/6757", "google_scholar": "ph8SU3EAAAAJ;iWs168sAAAAJ;Cv37YxgAAAAJ", "orcid": ";0000-0003-3635-6335;0000-0002-9113-0155", "linkedin": ";;", "or_profile": "~Zequn_Sun1;~Wei_Hu7;~yuanning_cui2", "aff": "Nanjing university;Nanjing University;Nanjing University", "aff_domain": "nju.edu.cn;nju.edu.cn;nju.edu.cn", "position": "Postdoc;Full Professor;PhD student", "bibtex": "@inproceedings{\ncui2024a,\ntitle={A Prompt-Based Knowledge Graph Foundation Model for Universal In-Context Reasoning},\nauthor={Yuanning Cui and Zequn Sun and Wei Hu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=VQyb9LKmUH}\n}", "github": "", "reviewers": "JyNi;4T5d;xJpJ", "pdf_size": 1049844, "rating": "6;7;7", "confidence": "4;5;3", "soundness": "3;3;3", "novelty": "2;3;2", "presentation": "3;4;3", "wc_summary": "88;217;100", "wc_strengths": "23;111;57", "wc_weaknesses": "158;230;84", "wc_questions": "73;104;5", "wc_limitations": "12;1;13", "wc_review": "354;663;259", "wc_reply_reviewers": "16;22;25", "wc_reply_authors": "36;28;33", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 135.0, 58.18934610390462 ], "wc_strengths_avg": [ 63.666666666666664, 36.23380864453651 ], "wc_weaknesses_avg": [ 157.33333333333334, 59.60611452601896 ], "wc_questions_avg": [ 60.666666666666664, 41.34677200889515 ], "wc_limitations_avg": [ 8.666666666666666, 5.436502143433364 ], "wc_review_avg": [ 425.3333333333333, 172.4728642102545 ], "wc_reply_reviewers_avg": [ 21.0, 3.7416573867739413 ], "wc_reply_authors_avg": [ 32.333333333333336, 3.299831645537222 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6962324005849187171&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "nju.edu.cn;nju.edu.cn;nju.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Nanjing University", "aff_unique_dep": "", "aff_unique_url": "https://www.nju.edu.cn", "aff_unique_abbr": "Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "MACM: Utilizing a Multi-Agent System for Condition Mining in Solving Complex Mathematical Problems", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94899", "id": "VR2RdSxtzs", "proceeding": "", "pdf": "https://openreview.net/pdf?id=VR2RdSxtzs", "openreview": "https://openreview.net/forum?id=VR2RdSxtzs", "poster": "", "project": "", "author_site": "Bin Lei, Yi Zhang, Shan Zuo, Ali Payani, Caiwen Ding", "tldr": "", "abstract": "Recent advancements in large language models, such as GPT-4, have demonstrated remarkable capabilities in processing standard queries. Despite these advancements, their performance substantially declines in advanced mathematical problems requiring complex, multi-step logical reasoning. To enhance their inferential capabilities, current research has delved into prompting engineering, exemplified by methodologies such as the Tree of Thought and Graph of Thought.\nNonetheless, these existing approaches encounter two significant limitations. Firstly, their effectiveness in tackling complex mathematical problems is somewhat constrained. Secondly, the necessity to design distinct prompts for individual problems hampers their generalizability.\nIn response to these limitations, this paper introduces the Multi-Agent System for conditional Mining (MACM) prompting method. It not only resolves intricate mathematical problems but also demonstrates strong generalization capabilities across various mathematical contexts.\nWith the assistance of MACM, the accuracy of GPT-4 Turbo on the most challenging level five mathematical problems in the MATH dataset increase from $\\mathbf{54.68\\\\%} \\text{ to } \\mathbf{76.73\\\\%}$.", "keywords": "Multi-Agent;Prompting;LLM;Math problem", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/39d6f221fe9335530f34ec3c96a26365aa097a12.zip", "author": "Bin Lei;Yi Zhang;Shan Zuo;Ali Payani;Caiwen Ding", "authorids": "~Bin_Lei1;~Yi_Zhang90;~Shan_Zuo1;~Ali_Payani1;~Caiwen_Ding1", "gender": ";M;F;M;M", "homepage": "https://github.com/bin123apple;;https://distributed-decision-learning.engr.uconn.edu/people/pricipal-investigator/;;https://caiwending.cse.uconn.edu/", "dblp": ";;;184/3921;175/2489", "google_scholar": ";f2DuuywAAAAJ;_tCX5K4AAAAJ;9rHwD8wAAAAJ;7hR0r_EAAAAJ", "orcid": ";0000-0002-4457-1601;;0000-0003-4054-2958;0000-0003-0891-1231", "linkedin": ";yi-zhang-6b01b7266/;shan-zuo-550060a9/;ali-payani-59267515;caiwen-ding-47144489/", "or_profile": "~Bin_Lei1;~Yi_Zhang90;~Shan_Zuo1;~Ali_Payani1;~Caiwen_Ding1", "aff": "University of Minnesota - Twin Cities;University of Connecticut;University of Connecticut;Cisco;University of Connecticut", "aff_domain": "umn.edu;uconn.edu;uconn.edu;cisco.com;uconn.edu", "position": "PhD student;PhD student;Assistant Professor;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nlei2024macm,\ntitle={{MACM}: Utilizing a Multi-Agent System for Condition Mining in Solving Complex Mathematical Problems},\nauthor={Bin Lei and Yi Zhang and Shan Zuo and Ali Payani and Caiwen Ding},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=VR2RdSxtzs}\n}", "github": "", "reviewers": "pd1k;zB7e;4rbe;upb5", "pdf_size": 1458666, "rating": "4;5;6;7", "confidence": "4;3;4;5", "soundness": "1;2;3;3", "novelty": "1;3;3;2", "presentation": "2;3;3;3", "wc_summary": "68;21;64;68", "wc_strengths": "14;17;32;55", "wc_weaknesses": "532;112;24;71", "wc_questions": "4;22;33;192", "wc_limitations": "4;10;1;8", "wc_review": "622;182;154;394", "wc_reply_reviewers": "657;280;25;51", "wc_reply_authors": "2238;1275;0;0", "reply_reviewers": "4;2;1;1", "reply_authors": "6;4;1;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 55.25, 19.84155991851447 ], "wc_strengths_avg": [ 29.5, 16.224980739587952 ], "wc_weaknesses_avg": [ 184.75, 202.88836216008053 ], "wc_questions_avg": [ 62.75, 75.3371588261729 ], "wc_limitations_avg": [ 5.75, 3.491060010942235 ], "wc_review_avg": [ 338.0, 188.403821617291 ], "wc_reply_reviewers_avg": [ 253.25, 253.34401019167595 ], "wc_reply_authors_avg": [ 878.25, 941.9364031079805 ], "reply_reviewers_avg": [ 2.0, 1.224744871391589 ], "reply_authors_avg": [ 3.0, 2.1213203435596424 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.6324555320336758, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2501977210356802479&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "umn.edu;uconn.edu;uconn.edu;cisco.com;uconn.edu", "author_num": 5, "aff_unique_index": "0;1;1;2;1", "aff_unique_norm": "University of Minnesota;University of Connecticut;Cisco Systems", "aff_unique_dep": ";;", "aff_unique_url": "https://www.minnesota.edu;https://www.uconn.edu;https://www.cisco.com", "aff_unique_abbr": "UMN;UConn;Cisco", "aff_campus_unique_index": "0", "aff_campus_unique": "Twin Cities;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "NoiseGPT: Label Noise Detection and Rectification through Probability Curvature", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94898", "id": "VRRvJnxgQe", "proceeding": "", "pdf": "https://openreview.net/pdf?id=VRRvJnxgQe", "openreview": "https://openreview.net/forum?id=VRRvJnxgQe", "poster": "/media/PosterPDFs/NeurIPS%202024/94898.png?t=1730860430.494993", "project": "", "author_site": "Haoyu Wang, Zhuo Huang, Zhiwei Lin, Tongliang Liu", "tldr": "", "abstract": "Machine learning craves high-quality data which is a major bottleneck during realistic deployment, as it takes abundant resources and massive human labor to collect and label data. Unfortunately, label noise where image data mismatches with incorrect label exists ubiquitously in all kinds of datasets, significantly degrading the learning performance of deep networks. Learning with Label Noise (LNL) has been a common strategy for mitigating the influence of noisy labels. However, existing LNL methods either require pertaining using the memorization effect to separate clean data from noisy ones or rely on dataset assumptions that cannot extend to various scenarios. Thanks to the development of Multimodal Large Language Models (MLLMs) which possess massive knowledge and hold In-Context Learning (ICL) ability, this paper proposes NoiseGPT to effectively leverage MLLMs as a knowledge expert for conducting label noise detection and rectification. Specifically, we observe a \\textit{probability curvature} effect of MLLMs where clean and noisy examples reside on curvatures with different smoothness, further enabling the detection of label noise. By designing a token-wise Mix-of-Feature (MoF) technique to produce the curvature, we propose an In-Context Discrepancy (ICD) measure to determine the authenticity of an image-label pair. Subsequently, we repeat such a process to find the best matching pairs to complete our label rectification. Through extensive experiments, we carefully demonstrate the effectiveness of NoiseGPT on detecting and cleansing dataset noise, especially on ILSVRC12, the AUROC of NoiseGPT reached over 0.92. And by integrating with existing methods, the classification performance can be significantly improved on noisy datasets, typically by 22.8\\% on 80\\% symmetric CIFAR-10 with M-correction. Source code: \\url{https://github.com/drunkerWang/NoiseGPT}", "keywords": "Label noise;Multimodal Learning;Large Language Models", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/c83d3d20c3a2862cdbf94edc19571a86812aeb36.zip", "author": "Haoyu Wang;Zhuo Huang;Zhiwei Lin;Tongliang Liu", "authorids": "~Haoyu_Wang29;~Zhuo_Huang2;~Zhiwei_Lin3;~Tongliang_Liu1", "gender": "M;M;F;M", "homepage": "https://github.com/drunkerWang;https://zhuohuangai.github.io/;https://ac.bit.edu.cn/szdw/jsml/znxxclykzyjs1/f7b90bc12ff14897bf00a110ab3ee4ab.htm;https://tongliang-liu.github.io/", "dblp": ";;;150/6667", "google_scholar": ";;https://scholar.google.com/citations?view_op=list_works;https://scholar.google.com.au/citations?user=EiLdZ_YAAAAJ", "orcid": ";;0000-0003-4426-1221;", "linkedin": ";;;", "or_profile": "~Haoyu_Wang29;~Zhuo_Huang2;~Zhiwei_Lin3;~Tongliang_Liu1", "aff": "Beijing Institute of Technology;University of Sydney;Beijing Institute of Technology;Mohamed bin Zayed University of Artificial Intelligence", "aff_domain": "bit.edu.cn;uni.sydney.edu.au;bit.edu.cn;mbzuai.ac.ae", "position": "MS student;PhD student;Associate Professor;Affiliated Associate Professor", "bibtex": "@inproceedings{\nwang2024noisegpt,\ntitle={Noise{GPT}: Label Noise Detection and Rectification through Probability Curvature},\nauthor={Haoyu Wang and Zhuo Huang and Zhiwei Lin and Tongliang Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=VRRvJnxgQe}\n}", "github": "", "reviewers": "TkfP;81A3;QCqA;gxfG", "pdf_size": 1516176, "rating": "3;5;6;6", "confidence": "4;4;4;4", "soundness": "3;2;3;3", "novelty": "1;2;2;2", "presentation": "2;2;2;3", "wc_summary": "54;108;53;82", "wc_strengths": "13;110;70;115", "wc_weaknesses": "120;258;98;228", "wc_questions": "42;44;42;199", "wc_limitations": "1;23;42;117", "wc_review": "230;543;305;741", "wc_reply_reviewers": "165;159;0;0", "wc_reply_authors": "357;459;400;196", "reply_reviewers": "1;1;0;0", "reply_authors": "3;4;2;3", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 74.25, 22.69774217846348 ], "wc_strengths_avg": [ 77.0, 40.85951541562871 ], "wc_weaknesses_avg": [ 176.0, 68.27884006044626 ], "wc_questions_avg": [ 81.75, 67.69924297951935 ], "wc_limitations_avg": [ 45.75, 43.61980628109208 ], "wc_review_avg": [ 454.75, 201.65859143612008 ], "wc_reply_reviewers_avg": [ 81.0, 81.02777301641703 ], "wc_reply_authors_avg": [ 353.0, 97.60891352740282 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 3.0, 0.7071067811865476 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17599701724048905434&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "email": "bit.edu.cn;uni.sydney.edu.au;bit.edu.cn;mbzuai.ac.ae", "author_num": 4, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Beijing Institute of Technology;University of Sydney;Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": ";;", "aff_unique_url": "http://www.bit.edu.cn/;https://www.sydney.edu.au;https://mbzuai.ac.ae", "aff_unique_abbr": "BIT;USYD;MBZUAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;2", "aff_country_unique": "China;Australia;United Arab Emirates" }, { "title": "PageRank Bandits for Link Prediction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94897", "id": "VSz9na5Jtl", "proceeding": "", "pdf": "https://openreview.net/pdf?id=VSz9na5Jtl", "openreview": "https://openreview.net/forum?id=VSz9na5Jtl", "poster": "", "project": "", "author_site": "Yikun Ban, Jiaru Zou, Zihao Li, Yunzhe Qi, Dongqi Fu, Jian Kang, Hanghang Tong, Jingrui He", "tldr": "", "abstract": "Link prediction is a critical problem in graph learning with broad applications such as recommender systems and knowledge graph completion. Numerous research efforts have been directed at solving this problem, including approaches based on similarity metrics and Graph Neural Networks (GNN). However, most existing solutions are still rooted in conventional supervised learning, which makes it challenging to adapt over time to changing customer interests and to address the inherent dilemma of exploitation versus exploration in link prediction.\nTo tackle these challenges, this paper reformulates link prediction as a sequential decision-making process, where each link prediction interaction occurs sequentially. We propose a novel fusion algorithm, PRB (PageRank Bandits), which is the first to combine contextual bandits with PageRank for collaborative exploitation and exploration. We also introduce a new reward formulation and provide a theoretical performance guarantee for PRB. Finally, we extensively evaluate PRB in both online and offline settings, comparing it with bandit-based and graph-based methods. The empirical success of PRB demonstrates the value of the proposed fusion approach. Our code is released at https://github.com/jiaruzouu/PRB.", "keywords": "Link Prediction;PageRank;Graph Mining", "primary_area": "machine_learning_for_social_sciences", "supplementary_material": "", "author": "Yikun Ban;Jiaru Zou;Zihao Li;Yunzhe Qi;Dongqi Fu;Jian Kang;Hanghang Tong;Jingrui He", "authorids": "~Yikun_Ban1;~Jiaru_Zou1;~Zihao_Li9;~Yunzhe_Qi1;~Dongqi_Fu1;~Jian_Kang1;~Hanghang_Tong3;~Jingrui_He1", "gender": ";M;;M;M;M;;F", "homepage": ";;https://www.zihao.website/;https://www.linkedin.com/in/yunzhe-qi-a1409b161/;https://dongqifu.github.io/;https://jiank2.github.io/;http://tonghanghang.org;https://www.hejingrui.org", "dblp": ";292/7978;;259/3914;273/0228;56/6072-8;58/1757;34/2685", "google_scholar": ";GzLTey4AAAAJ;BENWFUwAAAAJ;Gt17_A0AAAAJ;WByXZAcAAAAJ;U_jFlOQAAAAJ;RaINcuUAAAAJ;hXpZynkAAAAJ", "orcid": ";0009-0002-3583-354X;0000-0001-7987-1770;0000-0001-5828-7436;0000-0002-8726-9234;0000-0003-3902-7131;0000-0003-4405-3887;0000-0002-6429-6272", "linkedin": ";jiaru-zou-67434a21a/;zihao-li-193126331/;yunzhe-qi-a1409b161/;;jiank2/;htong/;", "or_profile": "~Yikun_Ban1;~Jiaru_Zou1;~Zihao_Li9;~Yunzhe_Qi1;~Dongqi_Fu1;~Jian_Kang1;~Hanghang_Tong3;~Jingrui_He1", "aff": ";\tUniversity of Illinois at Urbana-Champaign ;University of Illinois Urbana-Champaign;University of Illinois Urbana-Champaign;University of Illinois, Urbana Champaign;University of Rochester;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign", "aff_domain": ";illinois.edu;illinois.edu;illinois.edu;illinois.edu;cs.rochester.edu;illinois.edu;illinois.edu", "position": ";Undergrad student;PhD student;PhD student;PhD student;Assistant Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nban2024pagerank,\ntitle={PageRank Bandits for Link Prediction},\nauthor={Yikun Ban and Jiaru Zou and Zihao Li and Yunzhe Qi and Dongqi Fu and Jian Kang and Hanghang Tong and Jingrui He},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=VSz9na5Jtl}\n}", "github": "", "reviewers": "bPwg;AzV9;EydH;616o", "pdf_size": 2421275, "rating": "5;5;5;6", "confidence": "4;4;3;3", "soundness": "2;2;2;3", "novelty": "2;3;2;3", "presentation": "3;3;2;3", "wc_summary": "85;58;33;44", "wc_strengths": "42;71;20;83", "wc_weaknesses": "104;37;93;171", "wc_questions": "401;25;2;22", "wc_limitations": "1;8;2;4", "wc_review": "633;199;150;324", "wc_reply_reviewers": "147;17;35;0", "wc_reply_authors": "51;31;61;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 55.0, 19.45507645834372 ], "wc_strengths_avg": [ 54.0, 24.647515087732476 ], "wc_weaknesses_avg": [ 101.25, 47.61499238685227 ], "wc_questions_avg": [ 112.5, 166.8000299760165 ], "wc_limitations_avg": [ 3.75, 2.680951323690902 ], "wc_review_avg": [ 326.5, 187.98736659680085 ], "wc_reply_reviewers_avg": [ 49.75, 57.495108487592226 ], "wc_reply_authors_avg": [ 35.75, 23.29565410114084 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4631778251477839272&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": ";illinois.edu;illinois.edu;illinois.edu;illinois.edu;cs.rochester.edu;illinois.edu;illinois.edu", "author_num": 8, "aff_unique_index": "0;0;0;0;1;0;0", "aff_unique_norm": "University of Illinois Urbana-Champaign;University of Rochester", "aff_unique_dep": ";", "aff_unique_url": "https://illinois.edu;https://www.rochester.edu", "aff_unique_abbr": "UIUC;U of R", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Urbana-Champaign;", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Stability and Generalizability in SDE Diffusion Models with Measure-Preserving Dynamics", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94896", "id": "VTJvTa41D0", "proceeding": "", "pdf": "https://openreview.net/pdf?id=VTJvTa41D0", "openreview": "https://openreview.net/forum?id=VTJvTa41D0", "poster": "/media/PosterPDFs/NeurIPS%202024/94896.png?t=1731401156.806675", "project": "", "author_site": "Weitong Zhang, Chengqi Zang, Liu Li, Sarah Cechnicka, Cheng Ouyang, Bernhard Kainz", "tldr": "", "abstract": "Inverse problems describe the process of estimating the causal factors from a set of measurements or data. \nMapping of often incomplete or degraded data to parameters is ill-posed, thus data-driven iterative solutions are required, for example when reconstructing clean images from poor signals. \nDiffusion models have shown promise as potent generative tools for solving inverse problems due to their superior reconstruction quality and their compatibility with iterative solvers. However, most existing approaches are limited to linear inverse problems represented as Stochastic Differential Equations (SDEs). This simplification falls short of addressing the challenging nature of real-world problems, leading to amplified cumulative errors and biases. \nWe provide an explanation for this gap through the lens of measure-preserving dynamics of Random Dynamical Systems (RDS) with which we analyse Temporal Distribution Discrepancy and thus introduce a theoretical framework based on RDS for SDE diffusion models. We uncover several strategies that inherently enhance the stability and generalizability of diffusion models for inverse problems and introduce a novel score-based diffusion framework, the Dynamics-aware SDE Diffusion Generative Model (D^3GM). The Measure-preserving property can return the degraded measurement to the original state despite complex degradation with the RDS concept of stability.\nOur extensive experimental results corroborate the effectiveness of D^3GM across multiple benchmarks including a prominent application for inverse problems, magnetic resonance imaging.", "keywords": "Diffusion modeling;Random dynamical systems;Magnetic resonance imaging", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Weitong Zhang;Chengqi Zang;Liu Li;Sarah Cechnicka;Cheng Ouyang;Bernhard Kainz", "authorids": "~Weitong_Zhang3;~Chengqi_Zang1;~Liu_Li2;~Sarah_Cechnicka1;~Cheng_Ouyang2;~Bernhard_Kainz1", "gender": ";M;;F;;M", "homepage": ";;https://sites.google.com/view/smilell/home;;;http://wp.doc.ic.ac.uk/bkainz/", "dblp": ";;;345/2140;;76/5562", "google_scholar": ";;;5etzH5oAAAAJ;;https://scholar.google.co.uk/citations?user=Igxq-YEAAAAJ", "orcid": ";;;0009-0008-3449-9379;;0000-0002-7813-5023", "linkedin": ";chengqi-zang-4907051b1;;sarah-cechnicka-2462a029b/;;https://uk.linkedin.com/in/bernhard-kainz-43514320", "or_profile": "~Weitong_Zhang3;~Chengqi_Zang1;~Liu_Li2;~Sarah_Cechnicka1;~Cheng_Ouyang2;~Bernhard_Kainz1", "aff": ";University of Tokyo;Imperial College London;Imperial College London;;King's College London", "aff_domain": ";g.ecc.u-tokyo.ac.jp;ic.ac.uk;ic.ac.uk;;kcl.ac.uk", "position": ";PhD student;PhD student;PhD student;;Honorable Lecturer", "bibtex": "@inproceedings{\nzhang2024stability,\ntitle={Stability and Generalizability in {SDE} Diffusion Models with Measure-Preserving Dynamics},\nauthor={Weitong Zhang and Chengqi Zang and Liu Li and Sarah Cechnicka and Cheng Ouyang and Bernhard Kainz},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=VTJvTa41D0}\n}", "github": "", "reviewers": "J29t;z6fz;APgH", "pdf_size": 36251209, "rating": "5;7;7", "confidence": "3;4;3", "soundness": "3;4;3", "novelty": "2;3;3", "presentation": "2;3;3", "wc_summary": "91;122;100", "wc_strengths": "20;73;65", "wc_weaknesses": "163;30;116", "wc_questions": "40;110;5", "wc_limitations": "13;17;1", "wc_review": "327;352;287", "wc_reply_reviewers": "62;16;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;0", "reply_authors": "1;1;1", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 104.33333333333333, 13.021349989749739 ], "wc_strengths_avg": [ 52.666666666666664, 23.328570942563587 ], "wc_weaknesses_avg": [ 103.0, 55.06965286495518 ], "wc_questions_avg": [ 51.666666666666664, 43.65266951236265 ], "wc_limitations_avg": [ 10.333333333333334, 6.79869268479038 ], "wc_review_avg": [ 322.0, 26.770630673681683 ], "wc_reply_reviewers_avg": [ 26.0, 26.280537792569366 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.49999999999999983, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12405709343936190880&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": ";g.ecc.u-tokyo.ac.jp;ic.ac.uk;ic.ac.uk;;kcl.ac.uk", "author_num": 6, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "University of Tokyo;Imperial College London;King's College London", "aff_unique_dep": ";;", "aff_unique_url": "https://www.u-tokyo.ac.jp;https://www.imperial.ac.uk;https://www.kcl.ac.uk", "aff_unique_abbr": "UTokyo;ICL;KCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "Japan;United Kingdom" }, { "id": "VTcGX5HO19", "title": "Bayesian Kernelized Tensor Factorization as Surrogate for Bayesian Optimization", "track": "main", "status": "Reject", "tldr": "", "abstract": "Bayesian optimization (BO) mainly uses Gaussian processes (GP) with a stationary and separable kernel function (e.g., the squared-exponential kernel with automatic relevance determination [SE-ARD]) as the surrogate model. However, such localized kernel specifications are deficient in learning complex functions that are non-stationary, non-separable and multi-modal. In this paper, we propose using Bayesian Kernelized Tensor Factorization (BKTF) as a new surrogate model for Bayesian optimization (BO) in a $D$-dimensional grid with both continuous and categorical variables. Our key idea is to approximate the underlying $D$-dimensional solid with a fully Bayesian low-rank tensor CP decomposition, in which we place GP priors on the latent basis functions for each dimension to encode local consistency and smoothness. With this formulation, the information from each sample can be shared not only with neighbors but also across dimensions, thus fostering a more global search strategy. Although BKTF no longer has an analytical posterior, we efficiently approximate the posterior distribution through Markov chain Monte Carlo (MCMC). We conduct numerical experiments on several test functions with continuous variables and two machine learning hyperparameter tuning problems with mixed variables. The results show that BKTF offers a flexible and highly effective approach to characterizing and optimizing complex functions, especially in cases where the initial sample size and budget are severely limited.", "keywords": "Bayesian Optimization;Kernelized Tensor Factorization;Markov chain Monte Carlo;Surrogate Model", "primary_area": "probabilistic_methods", "supplementary_material": "/attachment/7c596e10c804790cb418f7f84a5246e6e48c483e.zip", "author": "Mengying Lei;Lijun Sun", "authorids": "~Mengying_Lei1;~Lijun_Sun1", "gender": "F;", "homepage": ";", "dblp": ";", "google_scholar": "vWdutQIAAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Mengying_Lei1;~Lijun_Sun1", "aff": "McGill University;", "aff_domain": "mail.mcgill.ca;", "position": "PhD student;", "bibtex": "@misc{\nanonymous2024bayesian,\ntitle={Bayesian Kernelized Tensor Factorization as Surrogate for Bayesian Optimization},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=VTcGX5HO19}\n}", "github": "", "project": "", "reviewers": "qG8Q;MJcW;jRPp;CTJW", "site": "https://openreview.net/forum?id=VTcGX5HO19", "pdf_size": 2036227, "rating": "4;5;6;6", "confidence": "4;4;3;3", "soundness": "3;3;2;3", "novelty": "2;2;3;3", "presentation": "3;3;3;3", "wc_summary": "112;65;132;47", "wc_strengths": "168;15;157;20", "wc_weaknesses": "301;139;670;42", "wc_questions": "217;607;254;62", "wc_limitations": "188;60;11;31", "wc_review": "986;886;1224;202", "wc_reply_reviewers": "188;152;929;10", "wc_reply_authors": "1536;184;1515;0", "reply_reviewers": "2;1;2;1", "reply_authors": "4;2;4;1", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 89.0, 34.343849522148794 ], "wc_strengths_avg": [ 90.0, 72.62575300814443 ], "wc_weaknesses_avg": [ 288.0, 239.1704413174839 ], "wc_questions_avg": [ 285.0, 199.37276644516925 ], "wc_limitations_avg": [ 72.5, 68.92205742721266 ], "wc_review_avg": [ 824.5, 379.79303574446965 ], "wc_reply_reviewers_avg": [ 319.75, 357.9904852087552 ], "wc_reply_authors_avg": [ 808.75, 719.7344562406332 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.9045340337332909, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9402037878263293330&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0", "aff_unique_norm": "McGill University", "aff_unique_dep": "", "aff_unique_url": "https://www.mcgill.ca", "aff_unique_abbr": "McGill", "aff_country_unique_index": "0", "aff_country_unique": "Canada" }, { "title": "A Motion-aware Spatio-temporal Graph for Video Salient Object Ranking", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94895", "id": "VUBtAcQN44", "proceeding": "", "pdf": "https://openreview.net/pdf?id=VUBtAcQN44", "openreview": "https://openreview.net/forum?id=VUBtAcQN44", "poster": "/media/PosterPDFs/NeurIPS%202024/94895.png?t=1730687230.2237034", "project": "", "author_site": "Hao Chen, Zhu Yufei, Yongjian Deng", "tldr": "", "abstract": "Video salient object ranking aims to simulate the human attention mechanism by dynamically prioritizing the visual attraction of objects in a scene over time. Despite its numerous practical applications, this area remains underexplored. In this work, we propose a graph model for video salient object ranking. This graph simultaneously explores multi-scale spatial contrasts and intra-/inter-instance temporal correlations across frames to extract diverse spatio-temporal saliency cues. It has two advantages: 1. Unlike previous methods that only perform global inter-frame contrast or compare all proposals across frames globally, we explicitly model the motion of each instance by comparing its features with those in the same spatial region in adjacent frames, thus obtaining more accurate motion saliency cues. 2. We synchronize the spatio-temporal saliency cues in a single graph for joint optimization, which exhibits better dynamics compared to the previous stage-wise methods that prioritize spatial cues followed by temporal cues. Additionally, we propose a simple yet effective video retargeting method based on video saliency ranking. Extensive experiments demonstrate the superiority of our model in video salient object ranking and the effectiveness of the video retargeting method. Our codes/models are released at [https://github.com/zyf-815/VSOR/tree/main](https://github.com/zyf-815/VSOR/tree/main).", "keywords": "Video Salient Object Ranking;Spatio-temporal Graph;Video Retargeting", "primary_area": "machine_vision", "supplementary_material": "", "author": "Hao Chen;Yufei Zhu;Yongjian Deng", "authorids": "~Hao_Chen39;~Yufei_Zhu4;~Yongjian_Deng1", "gender": "M;M;M", "homepage": ";https://github.com/zyf-815;", "dblp": ";;271/3286", "google_scholar": "6qVQ7ZMAAAAJ;;gKXu0XgAAAAJ", "orcid": ";;0000-0001-6253-3564", "linkedin": ";;", "or_profile": "~Hao_Chen39;~Yufei_Zhu4;~Yongjian_Deng1", "aff": "Southeast University;Southeast University;Beijing University of Technology", "aff_domain": "seu.edu.cn;seu.edu.cn;bjut.edu.cn", "position": "Associate Professor;MS student;Assistant Professor", "bibtex": "@inproceedings{\nchen2024a,\ntitle={A Motion-aware Spatio-temporal Graph for Video Salient Object Ranking},\nauthor={Hao Chen and Yufei Zhu and Yongjian Deng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=VUBtAcQN44}\n}", "github": "", "reviewers": "TupH;vz4h;kDtj;KoDU", "pdf_size": 18414357, "rating": "5;5;6;7", "confidence": "5;4;4;5", "soundness": "3;3;3;2", "novelty": "1;3;3;3", "presentation": "3;2;3;2", "wc_summary": "59;60;87;32", "wc_strengths": "42;58;143;34", "wc_weaknesses": "291;139;97;157", "wc_questions": "226;38;155;23", "wc_limitations": "20;10;1;27", "wc_review": "638;305;483;273", "wc_reply_reviewers": "485;0;0;56", "wc_reply_authors": "738;0;0;0", "reply_reviewers": "2;0;0;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 59.5, 19.448650338776723 ], "wc_strengths_avg": [ 69.25, 43.447525821385966 ], "wc_weaknesses_avg": [ 171.0, 72.62231062146122 ], "wc_questions_avg": [ 110.5, 84.01339178964268 ], "wc_limitations_avg": [ 14.5, 9.86154146165801 ], "wc_review_avg": [ 424.75, 146.8304719736336 ], "wc_reply_reviewers_avg": [ 135.25, 203.2183247150709 ], "wc_reply_authors_avg": [ 184.5, 319.56337399645787 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:-MU-DItwkCUJ:scholar.google.com/&scioq=A+Motion-aware+Spatio-temporal+Graph+for+Video+Salient+Object+Ranking&hl=en&as_sdt=0,33", "gs_version_total": 2, "email": "seu.edu.cn;seu.edu.cn;bjut.edu.cn", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Southeast University;Beijing University of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.seu.edu.cn/;http://www.bjut.edu.cn", "aff_unique_abbr": "SEU;BJUT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Unveiling the Hidden Structure of Self-Attention via Kernel Principal Component Analysis", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94894", "id": "VUWvVvNi6r", "proceeding": "", "pdf": "https://openreview.net/pdf?id=VUWvVvNi6r", "openreview": "https://openreview.net/forum?id=VUWvVvNi6r", "poster": "", "project": "", "author_site": "Rachel S.Y. Teo, Tan Nguyen", "tldr": "", "abstract": "The remarkable success of transformers in sequence modeling tasks, spanning various applications in natural language processing and computer vision, is attributed to the critical role of self-attention. Similar to the development of most deep learning models, the construction of these attention mechanisms relies on heuristics and experience. In our work, we derive self-attention from kernel principal component analysis (kernel PCA) and show that self-attention projects its query vectors onto the principal component axes of its key matrix in a feature space. We then formulate the exact formula for the value matrix in self-attention, theoretically and empirically demonstrating that this value matrix captures the eigenvectors of the Gram matrix of the key vectors in self-attention. Leveraging our kernel PCA framework, we propose Attention with Robust Principal Components (RPC-Attention), a novel class of robust attention that is resilient to data contamination. We empirically demonstrate the advantages of RPC-Attention over softmax attention on the ImageNet-1K object classification, WikiText-103 language modeling, and ADE20K image segmentation task.", "keywords": "Transformers;Attention;Kernel Principal Component Analysis", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/96a944bc3859e84eb7fe592d2ba07bedaf98fdc9.zip", "author": "Rachel Teo;Tan Minh Nguyen", "authorids": "~Rachel_Teo1;~Tan_Minh_Nguyen1", "gender": "F;M", "homepage": "https://github.com/rachtsy;https://tanmnguyen89.github.io/", "dblp": "380/2055.html;255/4725", "google_scholar": ";OizOh88AAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Rachel_Teo1;~Tan_Minh_Nguyen1", "aff": "National University of Singapore;National University of Singapore", "aff_domain": "nus.edu.sg;nus.edu.sg", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nteo2024unveiling,\ntitle={Unveiling the Hidden Structure of Self-Attention via Kernel Principal Component Analysis},\nauthor={Rachel Teo and Tan Minh Nguyen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=VUWvVvNi6r}\n}", "github": "", "reviewers": "rAS8;9vVz;8tie;85Cd;hhSg", "pdf_size": 1189724, "rating": "5;5;6;7;7", "confidence": "5;5;3;3;2", "soundness": "3;3;3;3;4", "novelty": "2;2;2;3;4", "presentation": "3;2;3;3;4", "wc_summary": "83;116;72;83;99", "wc_strengths": "27;70;79;54;126", "wc_weaknesses": "140;96;144;167;44", "wc_questions": "9;89;35;3;28", "wc_limitations": "19;35;4;14;1", "wc_review": "278;406;334;321;298", "wc_reply_reviewers": "161;78;15;0;38", "wc_reply_authors": "461;344;427;598;192", "reply_reviewers": "2;2;1;0;1", "reply_authors": "5;4;5;3;4", "rating_avg": [ 6.0, 0.8944271909999159 ], "confidence_avg": [ 3.6, 1.2 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.8 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 90.6, 15.344054223053307 ], "wc_strengths_avg": [ 71.2, 32.603067340359246 ], "wc_weaknesses_avg": [ 118.2, 43.63668181702179 ], "wc_questions_avg": [ 32.8, 30.465718438927386 ], "wc_limitations_avg": [ 14.6, 12.109500402576483 ], "wc_review_avg": [ 327.4, 43.742885135756644 ], "wc_reply_reviewers_avg": [ 58.4, 57.65622256096908 ], "wc_reply_authors_avg": [ 404.4, 134.13217361990374 ], "reply_reviewers_avg": [ 1.2, 0.7483314773547883 ], "reply_authors_avg": [ 4.2, 0.7483314773547882 ], "replies_avg": [ 35, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.9316949906249123, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11022219837077223505&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "nus.edu.sg;nus.edu.sg", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "National University of Singapore", "aff_unique_dep": "", "aff_unique_url": "https://www.nus.edu.sg", "aff_unique_abbr": "NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Singapore" }, { "title": "Randomized algorithms and PAC bounds for inverse reinforcement learning in continuous spaces", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94893", "id": "VUgXAWOCQz", "proceeding": "", "pdf": "https://openreview.net/pdf?id=VUgXAWOCQz", "openreview": "https://openreview.net/forum?id=VUgXAWOCQz", "poster": "/media/PosterPDFs/NeurIPS%202024/94893.png?t=1733591099.3257384", "project": "", "author_site": "Angeliki Kamoutsi, Peter Schmitt-F\u00f6rster, Tobias Sutter, Volkan Cevher, John Lygeros", "tldr": "", "abstract": "This work studies discrete-time discounted Markov decision processes with continuous state and action spaces and addresses the inverse problem of inferring a cost function from observed optimal behavior. We first consider the case in which we have access to the entire expert policy and characterize the set of solutions to the inverse problem by using occupation measures, linear duality, and complementary slackness conditions. To avoid trivial solutions and ill-posedness, we introduce a natural linear normalization constraint. This results in an infinite-dimensional linear feasibility problem, prompting a thorough analysis of its properties. Next, we use linear function approximators and adopt a randomized approach, namely the scenario approach and related probabilistic feasibility guarantees, to derive $\\varepsilon$-optimal solutions for the inverse problem. We further discuss the sample complexity for a desired approximation accuracy. Finally, we deal with the more realistic case where we only have access to a finite set of expert demonstrations and a generative model and provide bounds on the error made when working with samples.", "keywords": "Inverse reinforcement learning;statistical learning;Markov decision processes", "primary_area": "learning_theory", "supplementary_material": "", "author": "Angeliki Kamoutsi;Peter Schmitt-F\u00f6rster;Tobias Sutter;Volkan Cevher;John Lygeros", "authorids": "~Angeliki_Kamoutsi1;~Peter_Schmitt-F\u00f6rster1;~Tobias_Sutter1;~Volkan_Cevher1;~John_Lygeros1", "gender": "F;M;M;M;M", "homepage": "https://control.ee.ethz.ch/people/profile.angeliki-kamoutsi.html;https://www.mlo.uni-konstanz.de/team/peter-schmitt-foerster/;https://sites.google.com/view/suttert/home;http://lions.epfl.ch;https://control.ee.ethz.ch/people/profile.john-lygeros.html", "dblp": "198/0531;378/5588;01/10961;70/5301;51/2754", "google_scholar": ";;https://scholar.google.ch/citations?user=11gxHJIAAAAJ;https://scholar.google.ch/citations?user=hlWhzU8AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0003-1226-6845;;0000-0002-6159-1962", "linkedin": ";;;;john-lygeros-662b73233/", "or_profile": "~Angeliki_Kamoutsi1;~Peter_Schmitt-F\u00f6rster1;~Tobias_Sutter1;~Volkan_Cevher1;~John_Lygeros1", "aff": "ETHZ - ETH Zurich;Universit\u00e4t Konstanz;Universit\u00e4t Konstanz;Amazon Development Center Germany;ETHZ - ETH Zurich", "aff_domain": "ethz.ch;uni-konstanz.de;uni-konstanz.de;amazon.de;ethz.ch", "position": "PhD student;PhD student;Assistant Professor;Amazon Scholar;Full Professor", "bibtex": "@inproceedings{\nkamoutsi2024randomized,\ntitle={Randomized algorithms and {PAC} bounds for inverse reinforcement learning in continuous spaces},\nauthor={Angeliki Kamoutsi and Peter Schmitt-F{\\\"o}rster and Tobias Sutter and Volkan Cevher and John Lygeros},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=VUgXAWOCQz}\n}", "github": "", "reviewers": "sQPr;PJ8y;579s;p8oS", "pdf_size": 946942, "rating": "4;6;6;7", "confidence": "2;4;2;4", "soundness": "2;3;2;4", "novelty": "2;2;3;4", "presentation": "2;1;3;4", "wc_summary": "89;94;50;115", "wc_strengths": "75;32;46;242", "wc_weaknesses": "91;71;166;170", "wc_questions": "2;1132;94;386", "wc_limitations": "1;14;1;31", "wc_review": "258;1343;357;944", "wc_reply_reviewers": "0;459;0;171", "wc_reply_authors": "0;2449;0;82", "reply_reviewers": "0;1;0;1", "reply_authors": "1;6;1;2", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.0, 1.0 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 87.0, 23.484037131634757 ], "wc_strengths_avg": [ 98.75, 84.14682109265922 ], "wc_weaknesses_avg": [ 124.5, 44.09365033652805 ], "wc_questions_avg": [ 403.5, 443.84991832825654 ], "wc_limitations_avg": [ 11.75, 12.316147936753602 ], "wc_review_avg": [ 725.5, 442.5485849033979 ], "wc_reply_reviewers_avg": [ 157.5, 187.54799385757235 ], "wc_reply_authors_avg": [ 632.75, 1049.146647280541 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.5, 2.0615528128088303 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.6882472016116854, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:1a7D39UQHR0J:scholar.google.com/&scioq=Randomized+algorithms+and+PAC+bounds+for+inverse+reinforcement+learning+in+continuous+spaces&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": "ethz.ch;uni-konstanz.de;uni-konstanz.de;amazon.de;ethz.ch", "author_num": 5, "aff_unique_index": "0;1;1;2;0", "aff_unique_norm": "ETH Zurich;Universit\u00e4t Konstanz;Amazon", "aff_unique_dep": ";;Development Center", "aff_unique_url": "https://www.ethz.ch;https://www.uni-konstanz.de;https://www.amazon.de", "aff_unique_abbr": "ETHZ;Uni Konstanz;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;0", "aff_country_unique": "Switzerland;Germany" }, { "title": "FUG: Feature-Universal Graph Contrastive Pre-training for Graphs with Diverse Node Features", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94892", "id": "VUuOsBrqaw", "proceeding": "", "pdf": "https://openreview.net/pdf?id=VUuOsBrqaw", "openreview": "https://openreview.net/forum?id=VUuOsBrqaw", "poster": "/media/PosterPDFs/NeurIPS%202024/94892.png?t=1733257218.7387593", "project": "", "author_site": "Jitao Zhao, Di Jin, Meng Ge, Lianze Shan, Xin Wang, Dongxiao He, Zhiyong Feng", "tldr": "", "abstract": "Graph Neural Networks (GNNs), known for their effective graph encoding, are extensively used across various fields. Graph self-supervised pre-training, which trains GNN encoders without manual labels to generate high-quality graph representations, has garnered widespread attention. However, due to the inherent complex characteristics in graphs, GNNs encoders pre-trained on one dataset struggle to directly adapt to others that have different node feature shapes. This typically necessitates either model rebuilding or data alignment. The former results in non-transferability as each dataset need to rebuild a new model, while the latter brings serious knowledge loss since it forces features into a uniform shape by preprocessing such as Principal Component Analysis (PCA). To address this challenge, we propose a new Feature-Universal Graph contrastive pre-training strategy (FUG) that naturally avoids the need for model rebuilding and data reshaping. Specifically, inspired by discussions in existing work on the relationship between contrastive Learning and PCA, we conducted a theoretical analysis and discovered that PCA's optimization objective is a special case of that in contrastive Learning. We designed an encoder with contrastive constraints to emulate PCA's generation of basis transformation matrix, which is utilized to losslessly adapt features in different datasets. Furthermore, we introduced a global uniformity constraint to replace negative sampling, reducing the time complexity from $O(n^2)$ to $O(n)$, and by explicitly defining positive samples, FUG avoids the substantial memory requirements of data augmentation. In cross domain experiments, FUG has a performance close to the re-trained new models. The source code is available at: https://github.com/hedongxiao-tju/FUG.", "keywords": "Graph pre-training;Graph contrastive learning;Graph foundation model;Principal component analysis", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Jitao Zhao;Di Jin;Meng Ge;Lianze Shan;Xin Wang;Dongxiao He;Zhiyong Feng", "authorids": "~Jitao_Zhao2;~Di_Jin4;~Meng_Ge2;~Lianze_Shan1;~Xin_Wang39;~Dongxiao_He1;~Zhiyong_Feng1", "gender": "M;M;;M;F;M;M", "homepage": "http://cic.tju.edu.cn/faculty/jindi/index.htm;https://gemengtju.github.io;;http://www.tjudb.cn/dbgroup/Xin_Wang;http://cic.tju.edu.cn/faculty/hedongxiao/index.htm;http://cic.tju.edu.cn/faculty/zyfeng/index.html;https://github.com/Tao-Zac", "dblp": "67/1861-1.html;;379/4259;10/5630-30;48/8875;https://dblp.uni-trier.de/pid/48/195-2;", "google_scholar": "Q8MRRecAAAAJ;;jqKt0ZEAAAAJ;https://scholar.google.com/citations?hl=en;JyqwTr4AAAAJ;https://scholar.google.com/citations?hl=zh-CN;QGvyorsAAAAJ", "orcid": ";;;0000-0001-9651-0651;;0000-0001-8158-7453;", "linkedin": ";;;;;;", "or_profile": "~Di_Jin4;~Meng_Ge2;~Lianze_Shan1;~Xin_Wang39;~Dongxiao_He1;~Zhiyong_Feng1;~JiTao_Zhao1", "aff": "Tianjin University;;Tianjin University;Tianjin University;Tianjin University;Tianjin University;Tianjin University", "aff_domain": "tju.edu.cn;;tju.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu.cn", "position": "Full Professor;;MS student;Full Professor;Full Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\nzhao2024fug,\ntitle={{FUG}: Feature-Universal Graph Contrastive Pre-training for Graphs with Diverse Node Features},\nauthor={Jitao Zhao and Di Jin and Meng Ge and Lianze Shan and Xin Wang and Dongxiao He and Zhiyong Feng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=VUuOsBrqaw}\n}", "github": "", "reviewers": "C58N;vBXZ;5FZS;FBBq;nhsa", "pdf_size": 1205587, "rating": "5;5;7;7;7", "confidence": "4;4;5;4;4", "soundness": "3;3;3;3;3", "novelty": "3;2;3;3;4", "presentation": "3;3;3;3;3", "wc_summary": "47;72;55;81;57", "wc_strengths": "28;49;50;117;63", "wc_weaknesses": "191;182;167;112;156", "wc_questions": "33;66;59;4;4", "wc_limitations": "23;1;3;4;1", "wc_review": "322;370;334;318;281", "wc_reply_reviewers": "0;30;10;24;55", "wc_reply_authors": "0;46;18;31;0", "reply_reviewers": "0;1;1;1;1", "reply_authors": "1;2;2;2;1", "rating_avg": [ 6.2, 0.9797958971132712 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 62.4, 12.322337440599489 ], "wc_strengths_avg": [ 61.4, 29.97732476389446 ], "wc_weaknesses_avg": [ 161.6, 27.57245001808871 ], "wc_questions_avg": [ 33.2, 26.255666055158457 ], "wc_limitations_avg": [ 6.4, 8.380930735902787 ], "wc_review_avg": [ 325.0, 28.635642126552707 ], "wc_reply_reviewers_avg": [ 23.8, 18.808508712813996 ], "wc_reply_authors_avg": [ 19.0, 17.866169147301836 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.40824829046386296, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7714810201352401835&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "email": "tju.edu.cn;;tju.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Tianjin University", "aff_unique_dep": "", "aff_unique_url": "http://www.tju.edu.cn", "aff_unique_abbr": "TJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "VVDewLcVkx", "title": "GameBench: Evaluating Strategic Reasoning Abilities of LLM Agents", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "Large language models have demonstrated remarkable few-shot performance on many natural language understanding tasks. Despite several demonstrations of using large language models in complex, strategic scenarios, there lacks a comprehensive framework for evaluating agents\u2019 performance across various types of reasoning found in games. To address this gap, we introduce \\textsc{GameBench}, a cross-domain benchmark for evaluating strategic reasoning abilities of LLM agents. We focus on 9 different game environments, where each covers at least one axis of key reasoning skill identified in strategy games, and select games for which strategy explanations are unlikely to form a significant portion of models' pretraining corpuses. Our evaluations use GPT-3 and GPT-4 in their base form along with two scaffolding frameworks designed to enhance strategic reasoning ability: Chain-of-Thought (CoT) prompting and Reasoning Via Planning (RAP). Our results show that none of the tested models match human performance, and at worse GPT-4 performs worse than random action. CoT and RAP both improve scores but not comparable to human levels.", "keywords": "strategic reasoning;llm agents;benchmark;games", "primary_area": "", "supplementary_material": "/attachment/64cba178b0050d9ddd524614112e1fd75a5f85aa.zip", "author": "Anthony Costarelli;Mat Allen;Roman Hauksson;Grace Sodunke;Suhas Hariharan;Carlson Cheng;Arjun Yadav", "authorids": "~Anthony_Costarelli1;~Mat_Allen1;~Roman_Hauksson1;~Grace_Sodunke1;~Suhas_Hariharan1;~Carlson_Cheng1;~Arjun_Yadav1", "gender": "Not Specified;;;F;M;;M", "homepage": "https://github.com/acostarelli;;https://roman.technology;;https://suhas.net;https://carlsoncheng.dev;https://arjunyadav.net", "dblp": ";;;;;;", "google_scholar": ";https://scholar.google.com/citations?hl=en;;;;https://scholar.google.com/citations?hl=en;", "orcid": ";;;0009-0004-6788-5250;;;", "linkedin": ";mat-allen-2bbb251b1/;https://linkedin.com/in/romanhauksson;grace-sodunke/;;carlsoncheng/;", "or_profile": "~Anthony_Costarelli1;~Mat_Allen1;~Roman_Hauksson1;~Grace_Sodunke1;~Suhas_Hariharan1;~Carlson_Cheng1;~Arjun_Yadav1", "aff": "Olin College of Engineering;Nashville State Community College;University of Texas at Dallas;University of Oxford;University College London, University of London;University of Delaware;University of Manchester", "aff_domain": "olin.edu;nscc.edu;utdallas.edu;ox.ac.uk;ucl.ac.uk;udel.edu;manchester.ac.uk", "position": "Undergrad student;Undergrad student;Undergrad student;Undergrad student;Undergrad student;Undergrad student;Undergrad student", "bibtex": "@misc{\nanonymous2024gamebench,\ntitle={GameBench: Evaluating Strategic Reasoning Abilities of {LLM} Agents},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=VVDewLcVkx}\n}", "github": "", "project": "", "reviewers": "o93U;K8mi;bYEh", "site": "https://openreview.net/forum?id=VVDewLcVkx", "pdf_size": 4894628, "rating": "3;4;6", "confidence": "4;4;4", "wc_summary_and_contributions": "34;78;118", "wc_strengths": "2;28;49", "wc_improvement": "36;236;78", "wc_limitations": "2;45;6", "wc_correctness": "2;40;8", "wc_clarity": "1;27;44", "wc_relation_to_prior_work": "4;27;27", "wc_documentation": "8;3;15", "wc_additional_feedback": "1;1;1", "wc_review": "90;485;346", "wc_reply_reviewers": "145;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;0;0", "reply_authors": "2;1;1", "rating_avg": [ 4.333333333333333, 1.247219128924647 ], "confidence_avg": [ 4.0, 0.0 ], "wc_summary_and_contributions_avg": [ 76.66666666666667, 34.30581421404962 ], "wc_strengths_avg": [ 26.333333333333332, 19.22382780706162 ], "wc_improvement_avg": [ 116.66666666666667, 86.10587797718703 ], "wc_limitations_avg": [ 17.666666666666668, 19.39644870130154 ], "wc_correctness_avg": [ 16.666666666666668, 16.67999467092907 ], "wc_clarity_avg": [ 24.0, 17.682382946499793 ], "wc_relation_to_prior_work_avg": [ 19.333333333333332, 10.842303978193728 ], "wc_documentation_avg": [ 8.666666666666666, 4.921607686744467 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 307.0, 163.59910350202617 ], "wc_reply_reviewers_avg": [ 48.333333333333336, 68.3536555146996 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8104126699633113049&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;2;3;4;5;6", "aff_unique_norm": "Olin College of Engineering;Nashville State Community College;University of Texas at Dallas;University of Oxford;University College London;University of Delaware;University of Manchester", "aff_unique_dep": ";;;;;;", "aff_unique_url": "https://www.olin.edu;https://www.nscc.edu/;https://www.utdallas.edu;https://www.ox.ac.uk;https://www.ucl.ac.uk;https://www.udel.edu;https://www.manchester.ac.uk", "aff_unique_abbr": "Olin;NSCC;UT Dallas;Oxford;UCL;UD;UoM", "aff_campus_unique_index": "1", "aff_campus_unique": ";Dallas", "aff_country_unique_index": "0;0;0;1;1;0;1", "aff_country_unique": "United States;United Kingdom" }, { "title": "Learning from Offline Foundation Features with Tensor Augmentations", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94891", "id": "VVd3iOKPMJ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=VVd3iOKPMJ", "openreview": "https://openreview.net/forum?id=VVd3iOKPMJ", "poster": "/media/PosterPDFs/NeurIPS%202024/94891.png?t=1731681240.2895021", "project": "", "author_site": "Emir Konuk, Christos Matsoukas, Moein Sorkhei, Phitchapha Lertsiravarameth, Kevin Smith", "tldr": "", "abstract": "We introduce Learning from Offline Foundation Features with Tensor Augmentations (LOFF-TA), an efficient training scheme designed to harness the capabilities of foundation models in limited resource settings where their direct development is not feasible. LOFF-TA involves training a compact classifier on cached feature embeddings from a frozen foundation model, resulting in up to $37\\times$ faster training and up to $26\\times$ reduced GPU memory usage. Because the embeddings of augmented images would be too numerous to store, yet the augmentation process is essential for training, we propose to apply tensor augmentations to the cached embeddings of the original non-augmented images. LOFF-TA makes it possible to leverage the power of foundation models, regardless of their size, in settings with limited computational capacity. Moreover, LOFF-TA can be used to apply foundation models to high-resolution images without increasing compute. In certain scenarios, we find that training with LOFF-TA yields better results than directly fine-tuning the foundation model.", "keywords": "Adaptation;Foundation Models;Transfer Learning", "primary_area": "machine_vision", "supplementary_material": "", "author": "Emir Konuk;Christos Matsoukas;Moein Sorkhei;Phitchapha Lertsiravarameth;Kevin Smith", "authorids": "~Emir_Konuk1;~Christos_Matsoukas1;~Moein_Sorkhei1;~Phitchapha_Lertsiravarameth2;~Kevin_Smith1", "gender": ";M;;;", "homepage": ";;;;", "dblp": "254/0800;235/5347;;298/1343;", "google_scholar": ";3dezSAEAAAAJ;;;", "orcid": "0000-0001-9437-4553;0000-0003-1401-3497;;;", "linkedin": ";chrismats/;;;", "or_profile": "~Emir_Konuk1;~Christos_Matsoukas1;~Moein_Sorkhei1;~Phitchapha_Lertsiravarameth2;~Kevin_Smith1", "aff": "KTH Royal Institute of Technology, Stockholm, Sweden;KTH Royal Institute of Technology, Stockholm, Sweden;;KTH Royal Institute of Technology;", "aff_domain": "kth.se;kth.se;;kth.se;", "position": "PhD student;PhD student;;PhD student;", "bibtex": "@inproceedings{\nkonuk2024learning,\ntitle={Learning from Offline Foundation Features with Tensor Augmentations},\nauthor={Emir Konuk and Christos Matsoukas and Moein Sorkhei and Phitchapha Lertsiravarameth and Kevin Smith},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=VVd3iOKPMJ}\n}", "github": "", "reviewers": "AUq9;PGYR;TzTJ;1T2n", "pdf_size": 14085227, "rating": "4;4;5;6", "confidence": "4;5;3;4", "soundness": "2;2;3;4", "novelty": "2;3;2;3", "presentation": "2;3;3;4", "wc_summary": "89;31;86;84", "wc_strengths": "32;41;75;114", "wc_weaknesses": "264;131;95;298", "wc_questions": "2;35;3;107", "wc_limitations": "29;1;4;12", "wc_review": "416;239;263;615", "wc_reply_reviewers": "246;66;0;0", "wc_reply_authors": "235;141;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "2;2;1;1", "rating_avg": [ 4.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 72.5, 24.026027553467927 ], "wc_strengths_avg": [ 65.5, 32.26840560052511 ], "wc_weaknesses_avg": [ 197.0, 85.80501150865257 ], "wc_questions_avg": [ 36.75, 42.67537346058028 ], "wc_limitations_avg": [ 11.5, 10.874281585465774 ], "wc_review_avg": [ 383.25, 150.04061950018735 ], "wc_reply_reviewers_avg": [ 78.0, 100.66777041337511 ], "wc_reply_authors_avg": [ 94.0, 99.7020561473032 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.42640143271122083, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2917272943690103034&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "kth.se;kth.se;;kth.se;", "author_num": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "KTH Royal Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kth.se", "aff_unique_abbr": "KTH", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stockholm;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Sweden" }, { "title": "Transforming Vision Transformer: Towards Efficient Multi-Task Asynchronous Learner", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94890", "id": "VWf6ZVx5S2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=VWf6ZVx5S2", "openreview": "https://openreview.net/forum?id=VWf6ZVx5S2", "poster": "/media/PosterPDFs/NeurIPS%202024/94890.png?t=1731862601.7719522", "project": "", "author_site": "Hanwen Zhong, Jiaxin Chen, Yutong Zhang, Di Huang, Yunhong Wang", "tldr": "", "abstract": "Multi-Task Learning (MTL) for Vision Transformer aims at enhancing the model capability by tackling multiple tasks simultaneously. Most recent works have predominantly focused on designing Mixture-of-Experts (MoE) structures and integrating Low-Rank Adaptation (LoRA) to efficiently perform multi-task learning. However, their rigid combination hampers both the optimization of MoE and the effectiveness of reparameterization of LoRA, leading to sub-optimal performance and low inference speed. In this work, we propose a novel approach dubbed Efficient Multi-Task Learning (EMTAL) by transforming a pre-trained Vision Transformer into an efficient multi-task learner during training, and reparameterizing the learned structure for efficient inference. Specifically, we firstly develop the MoEfied LoRA structure, which decomposes the pre-trained Transformer into a low-rank MoE structure and employ LoRA to fine-tune the parameters. Subsequently, we take into account the intrinsic asynchronous nature of multi-task learning and devise a learning Quality Retaining (QR) optimization mechanism, by leveraging the historical high-quality class logits to prevent a well-trained task from performance degradation. Finally, we design a router fading strategy to integrate the learned parameters into the original Transformer, archiving efficient inference. Extensive experiments on public benchmarks demonstrate the superiority of our method, compared to the state-of-the-art multi-task learning approaches.", "keywords": "Parameter-efficient Fine-tuning;Multi-Task Learning;Mixtures of Experts;Quality Retaining", "primary_area": "machine_vision", "supplementary_material": "", "author": "Hanwen Zhong;Jiaxin Chen;Yutong Zhang;Di Huang;Yunhong Wang", "authorids": "~Hanwen_Zhong2;~Jiaxin_Chen4;~Yutong_Zhang10;~Di_Huang4;~Yunhong_Wang1", "gender": "M;;;M;", "homepage": "https://github.com/Yewen1486;;;http://irip.buaa.edu.cn/dihuang/index.html;", "dblp": ";;;45/780-1;", "google_scholar": ";;;https://scholar.google.com/citations?hl=en;", "orcid": ";;;0000-0002-2412-9330;", "linkedin": ";;;;", "or_profile": "~Hanwen_Zhong2;~Jiaxin_Chen4;~Yutong_Zhang10;~Di_Huang4;~Yunhong_Wang1", "aff": "Beihang University;;;Beihang University;", "aff_domain": "buaa.edu.cn;;;buaa.edu.cn;", "position": "MS student;;;Full Professor;", "bibtex": "@inproceedings{\nzhong2024transforming,\ntitle={Transforming Vision Transformer: Towards Efficient Multi-Task Asynchronous Learner},\nauthor={Hanwen Zhong and Jiaxin Chen and Yutong Zhang and Di Huang and Yunhong Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=VWf6ZVx5S2}\n}", "github": "", "reviewers": "fvpH;2Lcf;SX1w", "pdf_size": 3414790, "rating": "5;5;7", "confidence": "4;3;4", "soundness": "3;3;3", "novelty": "3;2;3", "presentation": "2;2;3", "wc_summary": "82;39;83", "wc_strengths": "56;64;66", "wc_weaknesses": "285;271;6", "wc_questions": "5;129;318", "wc_limitations": "5;7;11", "wc_review": "433;510;484", "wc_reply_reviewers": "0;142;15", "wc_reply_authors": "0;607;37", "reply_reviewers": "0;1;1", "reply_authors": "1;2;2", "rating_avg": [ 5.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 68.0, 20.51016008388688 ], "wc_strengths_avg": [ 62.0, 4.320493798938574 ], "wc_weaknesses_avg": [ 187.33333333333334, 128.34934964991274 ], "wc_questions_avg": [ 150.66666666666666, 128.69688764258788 ], "wc_limitations_avg": [ 7.666666666666667, 2.494438257849294 ], "wc_review_avg": [ 475.6666666666667, 31.98263417682929 ], "wc_reply_reviewers_avg": [ 52.333333333333336, 63.698944697346086 ], "wc_reply_authors_avg": [ 214.66666666666666, 277.8324835019996 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.49999999999999983, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3884254094443807364&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "buaa.edu.cn;;;buaa.edu.cn;", "author_num": 5, "aff_unique_index": "0;0", "aff_unique_norm": "Beihang University", "aff_unique_dep": "", "aff_unique_url": "http://www.buaa.edu.cn/", "aff_unique_abbr": "BUAA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Data Acquisition via Experimental Design for Data Markets", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94889", "id": "VXJVNdmXO4", "proceeding": "", "pdf": "https://openreview.net/pdf?id=VXJVNdmXO4", "openreview": "https://openreview.net/forum?id=VXJVNdmXO4", "poster": "/media/PosterPDFs/NeurIPS%202024/94889.png?t=1733761269.9417098", "project": "", "author_site": "Charles Lu, Baihe Huang, Sai Praneeth Karimireddy, Praneeth Vepakomma, Michael Jordan, Ramesh Raskar", "tldr": "", "abstract": "The acquisition of training data is crucial for machine learning applications. Data markets can increase the supply of data, particularly in data-scarce domains such as healthcare, by incentivizing potential data providers to join the market. A major challenge for a data buyer in such a market is choosing the most valuable data points from a data seller. Unlike prior work in data valuation, which assumes centralized data access, we propose a federated approach to the data acquisition problem that is inspired by linear experimental design. Our proposed data acquisition method achieves lower prediction error without requiring labeled validation data and can be optimized in a fast and federated procedure. The key insight of our work is that a method that directly estimates the benefit of acquiring data for test set prediction is particularly compatible with a decentralized market setting.", "keywords": "Data valuation;experimental design;data markets;data acquisition;federated learning", "primary_area": "other", "supplementary_material": "/attachment/3e902ca749e24ef7236a8a6e1645c47ac4ece5ff.zip", "author": "Charles Lu;Baihe Huang;Sai Praneeth Karimireddy;Praneeth Vepakomma;Michael Jordan;Ramesh Raskar", "authorids": "~Charles_Lu1;~Baihe_Huang1;~Sai_Praneeth_Karimireddy1;~Praneeth_Vepakomma2;~Michael_Jordan1;~Ramesh_Raskar1", "gender": "M;;M;;M;M", "homepage": "https://www.mit.edu/~luchar/;;https://spkreddy.org;https://praneeth.mit.edu/;http://www.cs.berkeley.edu/~jordan/;https://www.media.mit.edu/people/raskar/overview/", "dblp": "61/11017-1;279/4131;217/3342;131/6694;j/MichaelIJordan;r/RameshRaskar", "google_scholar": "zyZR238AAAAJ;chICXXMAAAAJ;wKJeOQoAAAAJ;T_mPgZIAAAAJ;https://scholar.google.com.tw/citations?user=yxUduqMAAAAJ;", "orcid": "0000-0002-8749-4722;;;;0000-0001-8935-817X;0000-0002-3254-3224", "linkedin": "charlie-lu/;;;;;", "or_profile": "~Charles_Lu1;~Baihe_Huang1;~Sai_Praneeth_Karimireddy1;~Praneeth_Vepakomma2;~Michael_Jordan1;~Ramesh_Raskar1", "aff": "Massachusetts Institute of Technology;University of California, Berkeley;University of California, Berkeley;Mohamed bin Zayed University of Artificial Intelligence;University of California, Berkeley;Massachusetts Institute of Technology", "aff_domain": "mit.edu;berkeley.edu;berkeley.edu;mbzuai.ac.ae;berkeley.edu;mit.edu", "position": "MS student;PhD student;Postdoc;Assistant Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nlu2024data,\ntitle={Data Acquisition via Experimental Design for Data Markets},\nauthor={Charles Lu and Baihe Huang and Sai Praneeth Karimireddy and Praneeth Vepakomma and Michael Jordan and Ramesh Raskar},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=VXJVNdmXO4}\n}", "github": "", "reviewers": "wuU5;yTne;MmPt;BRC7", "pdf_size": 2014924, "rating": "6;7;7;7", "confidence": "4;5;4;5", "soundness": "3;3;3;4", "novelty": "3;3;3;4", "presentation": "3;4;2;3", "wc_summary": "29;106;13;90", "wc_strengths": "31;32;50;124", "wc_weaknesses": "283;25;122;306", "wc_questions": "2;306;218;54", "wc_limitations": "2;1;1;1", "wc_review": "347;470;404;575", "wc_reply_reviewers": "0;5;77;0", "wc_reply_authors": "0;0;104;0", "reply_reviewers": "0;1;2;0", "reply_authors": "1;1;2;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 59.5, 39.322385482063524 ], "wc_strengths_avg": [ 59.25, 38.140365755980895 ], "wc_weaknesses_avg": [ 184.0, 115.98491281196878 ], "wc_questions_avg": [ 145.0, 122.4540730233176 ], "wc_limitations_avg": [ 1.25, 0.4330127018922193 ], "wc_review_avg": [ 449.0, 84.77322690566875 ], "wc_reply_reviewers_avg": [ 20.5, 32.684093990808435 ], "wc_reply_authors_avg": [ 26.0, 45.033320996790806 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8490555983600804930&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 3, "email": "mit.edu;berkeley.edu;berkeley.edu;mbzuai.ac.ae;berkeley.edu;mit.edu", "author_num": 6, "aff_unique_index": "0;1;1;2;1;0", "aff_unique_norm": "Massachusetts Institute of Technology;University of California, Berkeley;Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": ";;", "aff_unique_url": "https://web.mit.edu;https://www.berkeley.edu;https://mbzuai.ac.ae", "aff_unique_abbr": "MIT;UC Berkeley;MBZUAI", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;0;0;1;0;0", "aff_country_unique": "United States;United Arab Emirates" }, { "title": "MedCalc-Bench: Evaluating Large Language Models for Medical Calculations", "status": "Oral", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97666", "id": "VXohja0vrQ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=VXohja0vrQ", "openreview": "https://openreview.net/forum?id=VXohja0vrQ", "poster": "", "project": "", "author_site": "Nikhil Khandekar, Qiao Jin, Guangzhi Xiong, Soren Dunn, Serina Applebaum, Zain Anwar, Maame Sarfo-Gyamfi, Conrad Safranek, Abid Anwar, Andrew Zhang, Aidan Gilson, Maxwell Singer, Amisha Dave, Anrew Taylor, Aidong Zhang, Qingyu Chen, Zhiyong Lu", "tldr": "", "abstract": "Current benchmarks for evaluating large language models (LLMs) in medicine are primarily focused on question-answering involving domain knowledge and descriptive reasoning. While such qualitative capabilities are vital to medical diagnosis, in real-world scenarios, doctors frequently use clinical calculators that follow quantitative equations and rule-based reasoning paradigms for evidence-based decision support. To this end, we propose MedCalc-Bench, a first-of-its-kind dataset focused on evaluating the medical calculation capability of LLMs. MedCalc-Bench contains an evaluation set of over 1000 manually reviewed instances from 55 different medical calculation tasks. Each instance in MedCalc-Bench consists of a patient note, a question requesting to compute a specific medical value, a ground truth answer, and a step-by-step explanation showing how the answer is obtained. While our evaluation results show the potential of LLMs in this area, none of them are effective enough for clinical settings. Common issues include extracting the incorrect entities, not using the correct equation or rules for a calculation task, or incorrectly performing the arithmetic for the computation. We hope our study highlights the quantitative knowledge and reasoning gaps in LLMs within medical settings, encouraging future improvements of LLMs for various clinical calculation tasks. MedCalc-Bench is publicly available at: https://github.com/ncbi-nlp/MedCalc-Bench.", "keywords": "LLMs;Medical Evaluation;Tool Learning;AI for Healthcare;Medical Calculation", "primary_area": "", "supplementary_material": "/attachment/2b77c8fd9f56d2a6e0192afd2dc696022b7f5e0d.pdf", "author": "Nikhil Khandekar;Qiao Jin;Guangzhi Xiong;Soren Dunn;Serina S Applebaum;Zain Anwar;Maame Sarfo-Gyamfi;Conrad W Safranek;Abid Anwar;Andrew Jiaxing Zhang;Aidan Gilson;Maxwell B Singer;Amisha D Dave;R. Andrew Taylor;Aidong Zhang;Qingyu Chen;Zhiyong Lu", "authorids": "~Nikhil_Khandekar1;~Qiao_Jin1;~Guangzhi_Xiong1;~Soren_Dunn1;~Serina_S_Applebaum1;~Zain_Anwar1;~Maame_Sarfo-Gyamfi1;~Conrad_W_Safranek1;~Abid_Anwar1;~Andrew_Jiaxing_Zhang1;~Aidan_Gilson1;~Maxwell_B_Singer1;~Amisha_D_Dave1;~R._Andrew_Taylor1;~Aidong_Zhang2;~Qingyu_Chen1;~Zhiyong_Lu1", "gender": "M;M;M;;F;M;F;;M;M;;;;M;F;M;", "homepage": ";https://andy-jqa.github.io/;;https://github.com/sorendunn;;;;;;;;https://medicine.yale.edu/profile/maxwell-singer/;;;https://engineering.virginia.edu/faculty/aidong-zhang;https://sites.google.com/view/qingyuchen/home;", "dblp": ";96/5382-1;285/5341;;;;;;;;;;;;z/AidongZhang.html;;66/6604", "google_scholar": "aliYivUAAAAJ;tYy-bzgAAAAJ;_rp4-a4AAAAJ;;;;;tLcinfgAAAAJ;;;ofugrSUAAAAJ;;ePJXBEkAAAAJ;;O8XxkE4AAAAJ;FSLotiMAAAAJ;", "orcid": ";0000-0002-1268-7239;0000-0002-8049-5298;;0000-0001-9062-6178;;;;0009-0006-8722-7145;0009-0002-6250-870X;0000-0002-4770-4705;0000-0001-9583-3846;;;0000-0001-9723-3246;;", "linkedin": ";qiao-jin-andy/;guangzhi-xiong-47a299251/;;;zain-anwar-3792a7b6?trk=people-guest_people_search-card;maame-a-sarfo-gyamfi-66abbb1b2/;;;;;mac-singer-a11908111/;;andrew-taylor-442a6a73/;;qingyu-chen-7a1b89181/;", "or_profile": "~Nikhil_Khandekar1;~Qiao_Jin1;~Guangzhi_Xiong1;~Soren_Dunn1;~Serina_S_Applebaum1;~Zain_Anwar1;~Maame_Sarfo-Gyamfi1;~Conrad_W_Safranek1;~Abid_Anwar1;~Andrew_Jiaxing_Zhang1;~Aidan_Gilson1;~Maxwell_B_Singer1;~Amisha_D_Dave1;~R._Andrew_Taylor1;~Aidong_Zhang2;~Qingyu_Chen1;~Zhiyong_Lu1", "aff": "National Institutes of Health;National Institutes of Health;University of Virginia, Charlottesville;University of Illinois, Urbana Champaign;Yale University;Rosalind Franklin University;Howard University;Yale University;University of Illinois at Chicago;University of Chicago;Yale School of Medicine;Yale University;Yale University;Yale University;University of Virginia;Yale University;National Institutes of Health", "aff_domain": "nih.gov;nih.gov;virginia.edu;uiuc.edu;yale.edu;my.rfums.org;howard.edu;yale.edu;uic.edu;uchicago.edu;yale.edu;yale.edu;yale.edu;yale.edu;virginia.edu;yale.edu;nih.gov", "position": "Researcher;Postdoc;PhD student;MS student;MS student;Researcher;MD Student;Researcher;Researcher;MD-PhD student;Researcher;Researcher;Researcher;Associate Professor;Full Professor;Assistant Professor;Senior Investigator", "bibtex": "@inproceedings{\nkhandekar2024medcalcbench,\ntitle={MedCalc-Bench: Evaluating Large Language Models for Medical Calculations},\nauthor={Nikhil Khandekar and Qiao Jin and Guangzhi Xiong and Soren Dunn and Serina S Applebaum and Zain Anwar and Maame Sarfo-Gyamfi and Conrad W Safranek and Abid Anwar and Andrew Jiaxing Zhang and Aidan Gilson and Maxwell B Singer and Amisha D Dave and R. Andrew Taylor and Aidong Zhang and Qingyu Chen and Zhiyong Lu},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=VXohja0vrQ}\n}", "github": "", "reviewers": "An5a;aJHL;oeyh", "pdf_size": 654240, "rating": "7;8;8", "confidence": "3;3;4", "wc_summary_and_contributions": "33;75;62", "wc_strengths": "26;49;78", "wc_improvement": "73;38;65", "wc_limitations": "5;28;2", "wc_correctness": "13;28;16", "wc_clarity": "7;97;4", "wc_relation_to_prior_work": "6;26;79", "wc_documentation": "9;26;11", "wc_additional_feedback": "1;1;1", "wc_review": "173;368;318", "wc_reply_reviewers": "13;20;43", "wc_reply_authors": "18;15;11", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 7.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 56.666666666666664, 17.55625877635159 ], "wc_strengths_avg": [ 51.0, 21.275964529643932 ], "wc_improvement_avg": [ 58.666666666666664, 14.974051630144135 ], "wc_limitations_avg": [ 11.666666666666666, 11.614167593456232 ], "wc_correctness_avg": [ 19.0, 6.48074069840786 ], "wc_clarity_avg": [ 36.0, 43.15089802078283 ], "wc_relation_to_prior_work_avg": [ 37.0, 30.80043289739069 ], "wc_documentation_avg": [ 15.333333333333334, 7.586537784494028 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 286.3333333333333, 82.69757486703517 ], "wc_reply_reviewers_avg": [ 25.333333333333332, 12.81492185782739 ], "wc_reply_authors_avg": [ 14.666666666666666, 2.8674417556808756 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 17, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17843972722161024555&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "nih.gov;nih.gov;virginia.edu;uiuc.edu;yale.edu;my.rfums.org;howard.edu;yale.edu;uic.edu;uchicago.edu;yale.edu;yale.edu;yale.edu;yale.edu;virginia.edu;yale.edu;nih.gov", "author_num": 17, "aff_unique_index": "0;0;1;2;3;4;5;3;6;7;3;3;3;3;1;3;0", "aff_unique_norm": "National Institutes of Health;University of Virginia;University of Illinois Urbana-Champaign;Yale University;Rosalind Franklin University;Howard University;University of Illinois at Chicago;University of Chicago", "aff_unique_dep": ";;;;;;;", "aff_unique_url": "https://www.nih.gov;https://www.virginia.edu;https://illinois.edu;https://www.yale.edu;https://www.rosalindfranklin.edu;https://www.howard.edu;https://www.uic.edu;https://www.uchicago.edu", "aff_unique_abbr": "NIH;UVA;UIUC;Yale;RFU;HU;UIC;UChicago", "aff_campus_unique_index": "1;2;3;4", "aff_campus_unique": ";Charlottesville;Urbana-Champaign;Chicago;New Haven", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Reproducibility of predictive networks for mouse visual cortex", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94888", "id": "VXxj3XZ1X8", "proceeding": "", "pdf": "https://openreview.net/pdf?id=VXxj3XZ1X8", "openreview": "https://openreview.net/forum?id=VXxj3XZ1X8", "poster": "", "project": "", "author_site": "Polina Turishcheva, Max Burg, Fabian Sinz, Alexander Ecker", "tldr": "", "abstract": "Deep predictive models of neuronal activity have recently enabled several new discoveries about the selectivity and invariance of neurons in the visual cortex.\nThese models learn a shared set of nonlinear basis functions, which are linearly combined via a learned weight vector to represent a neuron's function.\nSuch weight vectors, which can be thought as embeddings of neuronal function, have been proposed to define functional cell types via unsupervised clustering.\nHowever, as deep models are usually highly overparameterized, the learning problem is unlikely to have a unique solution, which raises the question if such embeddings can be used in a meaningful way for downstream analysis.\nIn this paper, we investigate how stable neuronal embeddings are with respect to changes in model architecture and initialization. \nWe find that $L_1$ regularization to be an important ingredient for structured embeddings and develop an adaptive regularization that adjusts the strength of regularization per neuron. \nThis regularization improves both predictive performance and how consistently neuronal embeddings cluster across model fits compared to uniform regularization.\nTo overcome overparametrization, we propose an iterative feature pruning strategy which reduces the dimensionality of performance-optimized models by half without loss of performance and improves the consistency of neuronal embeddings with respect to clustering neurons.\nOur results suggest that to achieve an objective taxonomy of cell types or a compact representation of the functional landscape, we need novel architectures or learning techniques that improve identifiability. \nThe code is available https://github.com/pollytur/readout_reproducibility.", "keywords": "reproducibility;predictive models for visual cortex;neuroscience", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "", "author": "Polina Turishcheva;Max F Burg;Fabian H. Sinz;Alexander S Ecker", "authorids": "~Polina_Turishcheva1;~Max_F_Burg1;~Fabian_H._Sinz1;~Alexander_S_Ecker1", "gender": "F;;M;M", "homepage": ";;https://sinzlab.org;http://eckerlab.org", "dblp": ";;53/5834;26/7228", "google_scholar": "VvnKLRYAAAAJ;;https://scholar.google.com/citations?hl=de;VgYU_m8AAAAJ", "orcid": ";;0000-0002-1348-9736;0000-0003-2392-5105", "linkedin": "polina-turishcheva-a63860197/;;;alexecker/", "or_profile": "~Polina_Turishcheva1;~Max_F_Burg1;~Fabian_H._Sinz1;~Alexander_S_Ecker1", "aff": "Georg-August Universit\u00e4t G\u00f6ttingen;;Baylor College of Medicine;Max Planck Institute for Dynamics and Self-Organization", "aff_domain": "uni-goettingen.de;;bcm.edu;ds.mpg.de", "position": "PhD student;;Assistant Professor;Principal Researcher", "bibtex": "@inproceedings{\nturishcheva2024reproducibility,\ntitle={Reproducibility of predictive networks for mouse visual cortex},\nauthor={Polina Turishcheva and Max F Burg and Fabian H. Sinz and Alexander S Ecker},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=VXxj3XZ1X8}\n}", "github": "", "reviewers": "iat5;8oHp;RvHn;mU1L", "pdf_size": 9180230, "rating": "7;7;7;8", "confidence": "3;3;4;4", "soundness": "4;4;3;3", "novelty": "4;3;3;4", "presentation": "2;3;3;3", "wc_summary": "82;51;275;84", "wc_strengths": "46;74;120;43", "wc_weaknesses": "219;228;202;94", "wc_questions": "110;229;186;107", "wc_limitations": "56;3;1;63", "wc_review": "513;585;784;391", "wc_reply_reviewers": "0;12;0;24", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 123.0, 88.72710972414237 ], "wc_strengths_avg": [ 70.75, 30.898017735770686 ], "wc_weaknesses_avg": [ 185.75, 53.78835840588556 ], "wc_questions_avg": [ 158.0, 51.79285665031424 ], "wc_limitations_avg": [ 30.75, 28.86498744153546 ], "wc_review_avg": [ 568.25, 142.56467830427002 ], "wc_reply_reviewers_avg": [ 9.0, 9.9498743710662 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5306653383217734469&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "uni-goettingen.de;;bcm.edu;ds.mpg.de", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "Georg-August Universit\u00e4t G\u00f6ttingen;Baylor College of Medicine;Max Planck Institute for Dynamics and Self-Organization", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uni-goettingen.de;https://www.bcm.edu;https://www.mpids.org", "aff_unique_abbr": "GAU;BCM;MPIDS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Germany;United States" }, { "title": "SafeWorld: Geo-Diverse Safety Alignment", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94887", "id": "VZQmIoDGBG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=VZQmIoDGBG", "openreview": "https://openreview.net/forum?id=VZQmIoDGBG", "poster": "/media/PosterPDFs/NeurIPS%202024/94887.png?t=1733718826.8048954", "project": "", "author_site": "Da Yin, Haoyi Qiu, Kung-Hsiang Huang, Kai-Wei Chang, Nanyun Peng", "tldr": "", "abstract": "In the rapidly evolving field of Large Language Models (LLMs), ensuring safety is a crucial and widely discussed topic. However, existing works often overlooks the geo-diversity of cultural and legal standards across the world. To reveal the chal5 lenges posed by geo-diverse safety standards, we introduce SafeWorld, a novel benchmark specifically designed to evaluate LLMs\u2019 ability to generate responses that are not only helpful but also culturally sensitive and legally compliant across diverse global contexts. SafeWorld encompasses 2,775 test user queries, each grounded in high-quality, human-verified cultural norms and legal policies from 50 countries and 493 regions/races. On top of it, we propose a multi-dimensional automatic safety evaluation framework that assesses the contextual appropriateness, accuracy, and comprehensiveness of responses. Our evaluations reveal that current LLMs struggle to meet these criteria effectively. To enhance LLMs\u2019 alignment with geo-diverse safety standards, we synthesize helpful preference pairs for Direct Preference Optimization (DPO) alignment. The preference pair construction aims to encourage LLMs to behave appropriately and provide precise references to relevant cultural norms and policies when necessary. Our trained SafeWorldLM outperforms all competing models, including GPT-4o on all the three evaluation dimensions by a large margin. Global human evaluators also note a nearly 20% higher winning rate in helpfulness and harmfulness evaluation.", "keywords": "LLM;Geo-Diverse;Cultural Norm;Public Policy;Safety", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Da Yin;Haoyi Qiu;Kung-Hsiang Huang;Kai-Wei Chang;Nanyun Peng", "authorids": "~Da_Yin2;~Haoyi_Qiu1;~Kung-Hsiang_Huang1;~Kai-Wei_Chang1;~Nanyun_Peng1", "gender": "M;F;M;M;F", "homepage": "https://wadeyin9712.github.io/;https://haoyiq114.github.io/;http://khuangaf.github.io/;http://kwchang.net;https://violetpeng.github.io/", "dblp": "131/0141;348/5711;274/7102;18/2428;117/4036", "google_scholar": "n32w34kAAAAJ;https://scholar.google.com/citations?hl=en;Yuk2_IMAAAAJ;fqDBtzYAAAAJ;XxRXvX0AAAAJ", "orcid": ";;;0000-0001-5365-0072;", "linkedin": ";;;kai-wei-chang-41239040;", "or_profile": "~Da_Yin2;~Haoyi_Qiu1;~Kung-Hsiang_Huang1;~Kai-Wei_Chang1;~Nanyun_Peng1", "aff": "University of California, Los Angeles;UCLA Computer Science Department, University of California, Los Angeles;University of Illinois Urbana-Champaign;Amazon;University of California, Los Angeles", "aff_domain": "cs.ucla.edu;cs.ucla.edu;uiuc.edu;amazon.com;ucla.edu", "position": "PhD student;MS student;PhD student;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nyin2024safeworld,\ntitle={SafeWorld: Geo-Diverse Safety Alignment},\nauthor={Da Yin and Haoyi Qiu and Kung-Hsiang Huang and Kai-Wei Chang and Nanyun Peng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=VZQmIoDGBG}\n}", "github": "", "reviewers": "xqmN;Fmfm;NsF2", "pdf_size": 3386377, "rating": "4;6;6", "confidence": "4;3;4", "soundness": "2;2;3", "novelty": "3;3;3", "presentation": "2;3;4", "wc_summary": "97;82;63", "wc_strengths": "40;59;87", "wc_weaknesses": "123;216;166", "wc_questions": "80;167;102", "wc_limitations": "3;3;82", "wc_review": "343;527;500", "wc_reply_reviewers": "0;7;5", "wc_reply_authors": "463;137;34", "reply_reviewers": "0;1;1", "reply_authors": "3;3;2", "rating_avg": [ 5.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 80.66666666666667, 13.912424503139471 ], "wc_strengths_avg": [ 62.0, 19.30457631409368 ], "wc_weaknesses_avg": [ 168.33333333333334, 38.00292386412159 ], "wc_questions_avg": [ 116.33333333333333, 36.935379004718804 ], "wc_limitations_avg": [ 29.333333333333332, 37.2409571424915 ], "wc_review_avg": [ 456.6666666666667, 81.12678691748833 ], "wc_reply_reviewers_avg": [ 4.0, 2.943920288775949 ], "wc_reply_authors_avg": [ 211.33333333333334, 182.85574156209103 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.49999999999999983, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5020032520116285695&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 5, "email": "cs.ucla.edu;cs.ucla.edu;uiuc.edu;amazon.com;ucla.edu", "author_num": 5, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "University of California, Los Angeles;University of Illinois Urbana-Champaign;Amazon", "aff_unique_dep": ";;Amazon.com, Inc.", "aff_unique_url": "https://www.ucla.edu;https://illinois.edu;https://www.amazon.com", "aff_unique_abbr": "UCLA;UIUC;Amazon", "aff_campus_unique_index": "0;0;1;0", "aff_campus_unique": "Los Angeles;Urbana-Champaign;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Balancing Context Length and Mixing Times for Reinforcement Learning at Scale", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94886", "id": "VaJ4XOW7Ey", "proceeding": "", "pdf": "https://openreview.net/pdf?id=VaJ4XOW7Ey", "openreview": "https://openreview.net/forum?id=VaJ4XOW7Ey", "poster": "", "project": "", "author_site": "Matthew Riemer, Khimya Khetarpal, Janarthanan Rajendran, Sarath Chandar", "tldr": "", "abstract": "Due to the recent remarkable advances in artificial intelligence, researchers have begun to consider challenging learning problems such as learning to generalize behavior from large offline datasets or learning online in non-Markovian environments. Meanwhile, recent advances in both of these areas have increasingly relied on conditioning policies on large context lengths. A natural question is if there is a limit to the performance benefits of increasing the context length if the computation needed is available. In this work, we establish a novel theoretical result that links the context length of a policy to the time needed to reliably evaluate its performance (i.e., its mixing time) in large scale partially observable reinforcement learning environments that exhibit latent sub-task structure. This analysis underscores a key tradeoff: when we extend the context length, our policy can more effectively model non-Markovian dependencies, but this comes at the cost of potentially slower policy evaluation and as a result slower downstream learning. Moreover, our empirical results highlight the relevance of this analysis when leveraging Transformer based neural networks. This perspective will become increasingly pertinent as the field scales towards larger and more realistic environments, opening up a number of potential future directions for improving the way we design learning agents.", "keywords": "Mixing Times;Non-Markovian;Average Reward;Long Context;Partial Observability;Causal Structure", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Matthew Riemer;Khimya Khetarpal;Janarthanan Rajendran;Sarath Chandar", "authorids": "~Matthew_Riemer1;~Khimya_Khetarpal1;~Janarthanan_Rajendran1;~Sarath_Chandar1", "gender": "M;F;;M", "homepage": ";https://kkhetarpal.github.io/;;http://sarathchandar.in/", "dblp": "166/1499;186/3048;;45/8542", "google_scholar": "PK7UzAwAAAAJ;https://scholar.google.ca/citations?user=VLOUhF0AAAAJ;;https://scholar.google.co.in/citations?user=yxWtZLAAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Matthew_Riemer1;~Khimya_Khetarpal1;~Janarthanan_Rajendran1;~Sarath_Chandar1", "aff": "International Business Machines;Google;;\u00c9cole Polytechnique de Montr\u00e9al", "aff_domain": "ibm.com;google.com;;polymtl.ca", "position": "Researcher;Researcher;;Assistant Professor", "bibtex": "@inproceedings{\nriemer2024balancing,\ntitle={Balancing Context Length and Mixing Times for Reinforcement Learning at Scale},\nauthor={Matthew Riemer and Khimya Khetarpal and Janarthanan Rajendran and Sarath Chandar},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=VaJ4XOW7Ey}\n}", "github": "", "reviewers": "bcCn;kzFL;aEyh;hXkG;FWjm;LbpG", "pdf_size": 1565449, "rating": "4;5;6;6;6;7", "confidence": "4;3;3;5;2;2", "soundness": "2;3;2;3;3;4", "novelty": "1;3;3;3;3;4", "presentation": "2;2;4;3;3;4", "wc_summary": "24;103;77;125;131;68", "wc_strengths": "14;33;145;184;62;43", "wc_weaknesses": "143;172;370;714;170;39", "wc_questions": "3;6;281;57;2;24", "wc_limitations": "3;6;37;35;37;22", "wc_review": "187;320;910;1115;402;196", "wc_reply_reviewers": "0;32;102;511;19;52", "wc_reply_authors": "0;85;0;1096;0;0", "reply_reviewers": "0;1;1;1;1;1", "reply_authors": "1;2;1;3;1;1", "rating_avg": [ 5.666666666666667, 0.9428090415820632 ], "confidence_avg": [ 3.1666666666666665, 1.0671873729054748 ], "soundness_avg": [ 2.8333333333333335, 0.6871842709362768 ], "novelty_avg": [ 2.8333333333333335, 0.8975274678557507 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 88.0, 36.65151201974256 ], "wc_strengths_avg": [ 80.16666666666667, 62.31483682790872 ], "wc_weaknesses_avg": [ 268.0, 222.18835853092455 ], "wc_questions_avg": [ 62.166666666666664, 99.70526008636098 ], "wc_limitations_avg": [ 23.333333333333332, 14.290634073484012 ], "wc_review_avg": [ 521.6666666666666, 359.6056482067853 ], "wc_reply_reviewers_avg": [ 119.33333333333333, 178.03152030531623 ], "wc_reply_authors_avg": [ 196.83333333333334, 403.31559878366 ], "reply_reviewers_avg": [ 0.8333333333333334, 0.372677996249965 ], "reply_authors_avg": [ 1.5, 0.7637626158259734 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.4417261042993861, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3537858431403855690&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "ibm.com;google.com;;polymtl.ca", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "International Business Machines Corporation;Google;\u00c9cole Polytechnique de Montr\u00e9al", "aff_unique_dep": ";Google;", "aff_unique_url": "https://www.ibm.com;https://www.google.com;https://www.polymtl.ca", "aff_unique_abbr": "IBM;Google;Polytechnique Montr\u00e9al", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Mountain View;Montr\u00e9al", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;Canada" }, { "title": "LoRA-GA: Low-Rank Adaptation with Gradient Approximation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94885", "id": "VaLAWrLHJv", "proceeding": "", "pdf": "https://openreview.net/pdf?id=VaLAWrLHJv", "openreview": "https://openreview.net/forum?id=VaLAWrLHJv", "poster": "", "project": "", "author_site": "Shaowen Wang, Linxi Yu, Jian Li", "tldr": "", "abstract": "Fine-tuning large-scale pretrained models is prohibitively expensive in terms of computational and memory costs. LoRA, as one of the most popular Parameter-Efficient Fine-Tuning (PEFT) methods, offers a cost-effective alternative by fine-tuning an auxiliary low-rank model that has significantly fewer parameters. Although LoRA reduces the computational and memory requirements significantly at each iteration, extensive empirical evidence indicates that it converges at a considerably slower rate compared to full fine-tuning, ultimately leading to increased overall compute and often worse test performance. In our paper, we perform an in-depth investigation of the initialization method of LoRA and show that careful initialization (without any change of the architecture and the training algorithm) can significantly enhance both efficiency and performance. In particular, we introduce a novel initialization method, LoRA-GA (Low Rank Adaptation with Gradient Approximation), which aligns the gradients of low-rank matrix product with those of full fine-tuning at the first step. Our extensive experiments demonstrate that LoRA-GA achieves a convergence rate comparable to that of full fine-tuning (hence being significantly faster than vanilla LoRA as well as various recent improvements) while simultaneously attaining comparable or even better performance. For example, on the subset of the GLUE dataset with T5-Base, LoRA-GA outperforms LoRA by 5.69% on average. On larger models such as Llama 2-7B, LoRA-GA shows performance improvements of 0.34, 11.52%, and 5.05% on MTbench, GSM8k, and Human-eval, respectively. Additionally, we observe up to 2-4 times convergence speed improvement compared to vanilla LoRA, validating its effectiveness in accelerating convergence and enhancing model performance.", "keywords": "Parameter-Efficient Fine-Tuning;Low-Rank Adaptation;Model Initialization;Large Language Model", "primary_area": "optimization_for_deep_networks", "supplementary_material": "/attachment/feae83c0efe46707e6b343f42b7048e2b15eb171.zip", "author": "Shaowen Wang;Linxi Yu;Jian Li", "authorids": "~Shaowen_Wang4;~Linxi_Yu1;~Jian_Li2", "gender": "M;M;M", "homepage": "https://github.com/Outsider565/;https://github.com/yulinxi0504;http://iiis.tsinghua.edu.cn/~jianli", "dblp": ";;33/5448-15", "google_scholar": "TtCmtjAAAAAJ;;zX7i1EkAAAAJ", "orcid": ";;", "linkedin": "shaowen-wang-65606b20a/;;", "or_profile": "~Shaowen_Wang4;~Linxi_Yu1;~Jian_Li2", "aff": "Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "mail.tsinghua.edu.cn;mails.tsinghua.edu.cn;tsinghua.edu.cn", "position": "MS student;MS student;Full Professor", "bibtex": "@inproceedings{\nwang2024loraga,\ntitle={Lo{RA}-{GA}: Low-Rank Adaptation with Gradient Approximation},\nauthor={Shaowen Wang and Linxi Yu and Jian Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=VaLAWrLHJv}\n}", "github": "", "reviewers": "dSFq;vjMb;P55u", "pdf_size": 1827038, "rating": "6;6;6", "confidence": "3;4;3", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "3;3;3", "wc_summary": "37;67;31", "wc_strengths": "28;56;62", "wc_weaknesses": "26;73;323", "wc_questions": "120;3;23", "wc_limitations": "1;15;1", "wc_review": "212;214;440", "wc_reply_reviewers": "13;0;23", "wc_reply_authors": "32;0;36", "reply_reviewers": "1;0;1", "reply_authors": "2;1;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 45.0, 15.748015748023622 ], "wc_strengths_avg": [ 48.666666666666664, 14.817407180595247 ], "wc_weaknesses_avg": [ 140.66666666666666, 130.349103905201 ], "wc_questions_avg": [ 48.666666666666664, 51.09685791600976 ], "wc_limitations_avg": [ 5.666666666666667, 6.599663291074443 ], "wc_review_avg": [ 288.6666666666667, 107.01194118207349 ], "wc_reply_reviewers_avg": [ 12.0, 9.41629792788369 ], "wc_reply_authors_avg": [ 22.666666666666668, 16.110727964792762 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7772392848639473105&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "mail.tsinghua.edu.cn;mails.tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "AlphaMath Almost Zero: Process Supervision without Process", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94884", "id": "VaXnxQ3UKo", "proceeding": "", "pdf": "https://openreview.net/pdf?id=VaXnxQ3UKo", "openreview": "https://openreview.net/forum?id=VaXnxQ3UKo", "poster": "/media/PosterPDFs/NeurIPS%202024/94884.png?t=1731177597.0528398", "project": "", "author_site": "Guoxin Chen, Minpeng Liao, Chengxi Li, Kai Fan", "tldr": "", "abstract": "Although recent advancements in large language models (LLMs) have significantly improved their performance on various tasks, they still face challenges with complex and symbolic multi-step reasoning, particularly in mathematical reasoning. To bolster the mathematical reasoning capabilities of LLMs, most existing efforts concentrate on seeking assistance from either domain experts or GPT-4 for high-quality process-supervised data, which is not only expensive but also labor-intensive. In our study, we propose an innovative framework, AlphaMath, that bypasses the need for process annotations (from humans or GPTs) by leveraging Monte Carlo Tree Search (MCTS). This framework focuses on unleashing the potential of a well-pretrained LLM to autonomously enhance its mathematical reasoning. Specifically, we integrate a value model with the LLM, automatically generating both process supervision and step-level evaluation signals in MCTS. Furthermore, we propose an efficient inference strategy\u2014step-level beam search, where the value model is crafted to assist the policy model (i.e., LLM) in navigating more effective reasoning paths, rather than solely relying on prior probabilities. The experimental results on both in-domain and out-of-domain datasets demonstrate that even without GPT-4 or human-annotated process supervision, our AlphaMath framework achieves comparable or superior results to previous state-of-the-art methods.", "keywords": "Mathematical Reasoning;Monte Carlo Tree Search;Question Answer", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/a481612fac26f16fcdc1363c392166353ea05583.zip", "author": "Guoxin Chen;Minpeng Liao;Chengxi Li;Kai Fan", "authorids": "~Guoxin_Chen1;~Minpeng_Liao1;~Chengxi_Li1;~Kai_Fan1", "gender": ";;M;M", "homepage": ";;;https://scholar.google.com/citations?user=SQqkcdgAAAAJ&hl=zh", "dblp": ";;242/9752-2;20/3825-2.html", "google_scholar": "I6EjtN0AAAAJ;;kBKSvL0AAAAJ;SQqkcdgAAAAJ", "orcid": "0000-0001-9000-4782;;0000-0003-4511-782X;0000-0002-8256-0807", "linkedin": ";;;", "or_profile": "~Guoxin_Chen1;~Minpeng_Liao1;~Chengxi_Li1;~Kai_Fan1", "aff": "Institute of Computing Technology, Chinese Academy of Sciences;;Alibaba Group;Alibaba Group", "aff_domain": "ict.ac.cn;;alibaba-inc.com;alibaba-inc.com", "position": "MS student;;Researcher;Researcher", "bibtex": "@inproceedings{\nchen2024alphamath,\ntitle={AlphaMath Almost Zero: Process Supervision without Process},\nauthor={Guoxin Chen and Minpeng Liao and Chengxi Li and Kai Fan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=VaXnxQ3UKo}\n}", "github": "", "reviewers": "5iMV;15WC;otxC;EVyZ;2HCk", "pdf_size": 1477958, "rating": "4;4;5;5;5", "confidence": "4;5;3;4;3", "soundness": "2;3;3;3;3", "novelty": "2;3;2;3;3", "presentation": "3;2;2;3;3", "wc_summary": "147;125;136;88;53", "wc_strengths": "73;61;41;65;53", "wc_weaknesses": "72;101;54;215;127", "wc_questions": "3;626;1;8;46", "wc_limitations": "35;141;1;1;1", "wc_review": "330;1054;233;377;280", "wc_reply_reviewers": "50;31;32;19;226", "wc_reply_authors": "170;373;0;0;389", "reply_reviewers": "2;1;1;1;1", "reply_authors": "2;2;1;1;2", "rating_avg": [ 4.6, 0.48989794855663565 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 109.8, 34.64909811236073 ], "wc_strengths_avg": [ 58.6, 10.910545357588685 ], "wc_weaknesses_avg": [ 113.8, 56.39645378922331 ], "wc_questions_avg": [ 136.8, 245.15089230920617 ], "wc_limitations_avg": [ 35.8, 54.22324224905773 ], "wc_review_avg": [ 454.8, 303.4530606205843 ], "wc_reply_reviewers_avg": [ 71.6, 77.83212704275786 ], "wc_reply_authors_avg": [ 186.4, 170.66059885046695 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.7637626158259733, "gs_citation": 85, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13511017332952261828&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "ict.ac.cn;;alibaba-inc.com;alibaba-inc.com", "author_num": 4, "aff_unique_index": "0;1;1", "aff_unique_norm": "Chinese Academy of Sciences;Alibaba Group", "aff_unique_dep": "Institute of Computing Technology;", "aff_unique_url": "http://www.ict.ac.cn;https://www.alibaba.com", "aff_unique_abbr": "CAS;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "VajjTXRj6J", "title": "Robust Preference Optimization through Reward Model Distillation", "track": "main", "status": "Reject", "tldr": "", "abstract": "Language model (LM) post-training (or alignment) involves maximizing a reward function that is derived from preference annotations. Recently, Direct Preference Optimization (DPO) has gained popularity as an offline alignment method that is directly supervised with human preference annotations. However, DPO is overconfident about preference annotations, implicitly assigning them rewards of infinite magnitude. This frequently leads to degenerate policies, sometimes causing even the probability of the preferred output to go to zero. In this work, we propose to use distillation to combat overconfidence: we train the LM to match the reward distribution induced by a model trained on the preference data. Moreover, to account for uncertainty in the reward model we are distilling from, we optimize against a family of reward models, which may be instantiated either implicitly or explicitly. Our results show that both measures lead to improved robustness to distribution shift in preference annotations, while preserving the supervised nature of DPO.", "keywords": "alignment;dpo;preferences;pessimism;rlhf;llms", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Adam Fisch;Jacob Eisenstein;Vicky Zayats;Alekh Agarwal;Ahmad Beirami;Chirag Nagpal;Peter Shaw;Jonathan Berant", "authorids": "~Adam_Fisch2;~Jacob_Eisenstein1;~Vicky_Zayats1;~Alekh_Agarwal2;~Ahmad_Beirami1;~Chirag_Nagpal1;~Peter_Shaw1;~Jonathan_Berant1", "gender": "M;F;M;M;;M;M;", "homepage": "https://jacobeisenstein.github.io;;https://alekhagarwal.net;https://beirami.github.io/;http://cs.cmu.edu/~chiragn;http://www.ptshaw.com;http://www.cs.tau.ac.il/~joberant/;https://people.csail.mit.edu/fisch/", "dblp": "82/2305;;;41/9367;149/2771;217/1471;31/8178;https://dblp.org/pers/f/Fisch:Adam.html", "google_scholar": "Wb_lnjAAAAAJ;BVVJvoMAAAAJ;9nnDvooAAAAJ;VuKWbMMAAAAJ;rAbWdAkAAAAJ;SmGaQicAAAAJ;https://scholar.google.co.il/citations?user=xCYHonIAAAAJ;https://scholar.google.com/citations?authorid=LYRkQhMAAAAJ", "orcid": ";;;;;;;", "linkedin": ";;;ahmad-beirami-97001962;;;;", "or_profile": "~Jacob_Eisenstein1;~Vicky_Zayats1;~Alekh_Agarwal2;~Ahmad_Beirami1;~Chirag_Nagpal1;~Peter_Shaw1;~Jonathan_Berant1;~Adam_Fisch1", "aff": "Google;Google;Google;Massachusetts Institute of Technology;Google;Google DeepMind;Tel Aviv University;Massachusetts Institute of Technology", "aff_domain": "google.com;google.com;google.com;mit.edu;google.com;google.com;tau.ac.il;mit.edu", "position": "Research Scientist;Researcher;Researcher;Research Affiliate;Researcher;Research Scientist;Associate Professor;PhD student", "bibtex": "@misc{\nanonymous2024robust,\ntitle={Robust Preference Optimization through Reward Model Distillation},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=VajjTXRj6J}\n}", "github": "", "project": "", "reviewers": "wkMh;y8pX;yoUT", "site": "https://openreview.net/forum?id=VajjTXRj6J", "pdf_size": 595563, "rating": "6;6;6", "confidence": "4;3;4", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "3;3;3", "wc_summary": "37;70;144", "wc_strengths": "27;76;14", "wc_weaknesses": "304;150;64", "wc_questions": "30;3;3", "wc_limitations": "4;7;1", "wc_review": "402;306;226", "wc_reply_reviewers": "288;41;0", "wc_reply_authors": "114;114;65", "reply_reviewers": "1;1;0", "reply_authors": "3;3;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 83.66666666666667, 44.738747809427515 ], "wc_strengths_avg": [ 39.0, 26.695817400234567 ], "wc_weaknesses_avg": [ 172.66666666666666, 99.28186586123817 ], "wc_questions_avg": [ 12.0, 12.727922061357855 ], "wc_limitations_avg": [ 4.0, 2.449489742783178 ], "wc_review_avg": [ 311.3333333333333, 71.95060033723755 ], "wc_reply_reviewers_avg": [ 109.66666666666667, 127.20674335724327 ], "wc_reply_authors_avg": [ 97.66666666666667, 23.098821518760552 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13092785970609959095&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;0;1;0;0;2;1", "aff_unique_norm": "Google;Massachusetts Institute of Technology;Tel Aviv University", "aff_unique_dep": "Google;;", "aff_unique_url": "https://www.google.com;https://web.mit.edu;https://www.tau.ac.il", "aff_unique_abbr": "Google;MIT;TAU", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;0;0;1;2;0", "aff_country_unique": "United States;United Kingdom;Israel" }, { "title": "Direct Consistency Optimization for Robust Customization of Text-to-Image Diffusion models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94883", "id": "VazkRbCGxt", "proceeding": "", "pdf": "https://openreview.net/pdf?id=VazkRbCGxt", "openreview": "https://openreview.net/forum?id=VazkRbCGxt", "poster": "/media/PosterPDFs/NeurIPS%202024/94883.png?t=1731769365.1225429", "project": "", "author_site": "Kyungmin Lee, Sangkyung Kwak, Kihyuk Sohn, Jinwoo Shin", "tldr": "", "abstract": "Text-to-image (T2I) diffusion models, when fine-tuned on a few personal images, can generate visuals with a high degree of consistency. However, such fine-tuned models are not robust; they often fail to compose with concepts of pretrained model or other fine-tuned models. To address this, we propose a novel fine-tuning objective, dubbed Direct Consistency Optimization, which controls the deviation between fine-tuning and pretrained models to retain the pretrained knowledge during fine-tuning. Through extensive experiments on subject and style customization, we demonstrate that our method positions itself on a superior Pareto frontier between subject (or style) consistency and image-text alignment over all previous baselines; it not only outperforms regular fine-tuning objective in image-text alignment, but also shows higher fidelity to the reference images than the method that fine-tunes with additional prior dataset. More importantly, the models fine-tuned with our method can be merged without interference, allowing us to generate custom subjects in a custom style by composing separately customized subject and style models. Notably, we show that our approach achieves better prompt fidelity and subject fidelity than those post-optimized for merging regular fine-tuned models.", "keywords": "Text-to-image models;diffusion models;personalization", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Kyungmin Lee;Sangkyung Kwak;Kihyuk Sohn;Jinwoo Shin", "authorids": "~Kyungmin_Lee1;~Sangkyung_Kwak1;~Kihyuk_Sohn1;~Jinwoo_Shin1", "gender": "M;M;M;M", "homepage": "https://kyungmnlee.github.io/;;https://sites.google.com/site/kihyuksml/;https://sites.google.com/site/mijirim/", "dblp": "57/5118;345/0067;53/10771;31/7062", "google_scholar": "6dpime0AAAAJ;;VxpypngAAAAJ;https://scholar.google.com.tw/citations?user=m3eDp7kAAAAJ", "orcid": ";;;", "linkedin": ";SangkyungKwak;;", "or_profile": "~Kyungmin_Lee1;~Sangkyung_Kwak1;~Kihyuk_Sohn1;~Jinwoo_Shin1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Google;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr;google.com;kaist.ac.kr", "position": "PhD student;MS student;Research Scientist;Full Professor", "bibtex": "@inproceedings{\nlee2024direct,\ntitle={Direct Consistency Optimization for Robust Customization of Text-to-Image Diffusion models},\nauthor={Kyungmin Lee and Sangkyung Kwak and Kihyuk Sohn and Jinwoo Shin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=VazkRbCGxt}\n}", "github": "", "reviewers": "aXKh;HfJW;XWYq;ayXu", "pdf_size": 43044882, "rating": "5;5;6;7", "confidence": "3;3;5;4", "soundness": "2;2;3;3", "novelty": "2;2;3;2", "presentation": "3;2;3;3", "wc_summary": "39;104;28;128", "wc_strengths": "20;83;46;96", "wc_weaknesses": "49;120;273;92", "wc_questions": "4;62;417;23", "wc_limitations": "9;1;19;7", "wc_review": "121;370;783;346", "wc_reply_reviewers": "0;26;135;0", "wc_reply_authors": "0;36;153;0", "reply_reviewers": "0;1;2;0", "reply_authors": "1;2;2;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 74.75, 42.29287765097097 ], "wc_strengths_avg": [ 61.25, 30.06139550985616 ], "wc_weaknesses_avg": [ 133.5, 84.41711911691846 ], "wc_questions_avg": [ 126.5, 169.01849011276843 ], "wc_limitations_avg": [ 9.0, 6.48074069840786 ], "wc_review_avg": [ 405.0, 238.87549058034398 ], "wc_reply_reviewers_avg": [ 40.25, 55.72420928106562 ], "wc_reply_authors_avg": [ 47.25, 62.79878581628788 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.6363636363636364, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11747970246111707423&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "kaist.ac.kr;kaist.ac.kr;google.com;kaist.ac.kr", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.kaist.ac.kr;https://www.google.com", "aff_unique_abbr": "KAIST;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "South Korea;United States" }, { "title": "UniTox: Leveraging LLMs to Curate a Unified Dataset of Drug-Induced Toxicity from FDA Labels", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97665", "id": "Vb1vVr75JT", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Vb1vVr75JT", "openreview": "https://openreview.net/forum?id=Vb1vVr75JT", "poster": "", "project": "", "author_site": "Jacob Silberg, Kyle Swanson, Elana Simon, Angela Zhang, Zaniar Ghazizadeh, Scott Ogden, Hisham Hamadeh, James Zou", "tldr": "", "abstract": "Drug-induced toxicity is one of the leading reasons new drugs fail clinical trials. Machine learning models that predict drug toxicity from molecular structure could help researchers prioritize less toxic drug candidates. However, current toxicity datasets are typically small and limited to a single organ system (e.g., cardio, renal, or liver). Creating these datasets often involved time-intensive expert curation by parsing drug labelling documents that can exceed 100 pages per drug. Here, we introduce UniTox, a unified dataset of 2,418 FDA-approved drugs with drug-induced toxicity summaries and ratings created by using GPT-4o to process FDA drug labels. UniTox spans eight types of toxicity: cardiotoxicity, liver toxicity, renal toxicity, pulmonary toxicity, hematological toxicity, dermatological toxicity, ototoxicity, and infertility. This is, to the best of our knowledge, the largest such systematic human in vivo database by number of drugs and toxicities, and the first covering nearly all non-combination FDA-approved medications for several of these toxicities. We recruited clinicians to validate a random sample of our GPT-4o annotated toxicities, and UniTox's toxicity ratings concord with clinician labelers 85-96\\% of the time. Finally, we benchmark several machine learning models trained on UniTox to demonstrate the utility of this dataset for building molecular toxicity prediction models.", "keywords": "Biomedicine;Drug Discovery;Drug Toxicity;Drug Safety", "primary_area": "", "supplementary_material": "/attachment/3c92d8ef77a9cf23899a44841954216375aae247.pdf", "author": "Jake Silberg;Kyle Swanson;Elana Simon;Angela Zhang;Zaniar Ghazizadeh;Scott Ogden;Hisham Hamadeh;James Zou", "authorids": "~Jake_Silberg1;~Kyle_Swanson2;~Elana_Simon1;~Angela_Zhang1;~Zaniar_Ghazizadeh1;~Scott_Ogden1;~Hisham_Hamadeh1;~James_Zou1", "gender": "M;M;F;Not Specified;;M;M;", "homepage": "https://jsilbergds.github.io/;http://swansonkyle.com/;;;https://profiles.stanford.edu/zaniar-ghazizadeh;https://www.linkedin.com/in/scott-ogden-65270842?utm_source=share&utm_campaign=share_via&utm_content=profile&utm_medium=android_app;;", "dblp": ";239/4007;;;;;90/232.html;", "google_scholar": ";seqcYSUAAAAJ;5ue1EAIAAAAJ;zafRY0sAAAAJ;;;;23ZXZvEAAAAJ", "orcid": ";my-orcid?orcid=0000-0002-7385-7844;;;;;;", "linkedin": ";kyle-swanson-564ab787/;elana-simon-24a186104/;;;scott-ogden-65270842;;", "or_profile": "~Jake_Silberg1;~Kyle_Swanson2;~Elana_Simon1;~Angela_Zhang1;~Zaniar_Ghazizadeh1;~Scott_Ogden1;~Hisham_Hamadeh1;~James_Zou1", "aff": "Stanford University;Stanford University;;Stanford University;Stanford University;The Wharton School, University of Pennsylvania;Genmab US Inc;Stanford University", "aff_domain": "stanford.edu;stanford.edu;;stanford.edu;stanford.edu;wharton.upenn.edu;genmab.com;stanford.edu", "position": "PhD student;PhD student;;PhD student;Postdoc;Researcher;SVP Data Science & AI;Assistant Professor", "bibtex": "@inproceedings{\nsilberg2024unitox,\ntitle={UniTox: Leveraging {LLM}s to Curate a Unified Dataset of Drug-Induced Toxicity from {FDA} Labels},\nauthor={Jake Silberg and Kyle Swanson and Elana Simon and Angela Zhang and Zaniar Ghazizadeh and Scott Ogden and Hisham Hamadeh and James Zou},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=Vb1vVr75JT}\n}", "github": "", "reviewers": "ibpw;qLGx;scL6;LMvU", "pdf_size": 628865, "rating": "6;7;7;9", "confidence": "4;3;3;4", "wc_summary_and_contributions": "60;25;80;100", "wc_strengths": "102;42;190;6", "wc_improvement": "223;209;411;32", "wc_limitations": "137;61;6;51", "wc_correctness": "24;1;12;1", "wc_clarity": "9;17;1;1", "wc_relation_to_prior_work": "12;1;1;1", "wc_documentation": "22;1;1;6", "wc_additional_feedback": "1;1;1;1", "wc_review": "590;358;703;199", "wc_reply_reviewers": "101;16;0;0", "wc_reply_authors": "32;16;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "3;2;1;1", "rating_avg": [ 7.25, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "wc_summary_and_contributions_avg": [ 66.25, 27.698149757700424 ], "wc_strengths_avg": [ 85.0, 69.6491205974634 ], "wc_improvement_avg": [ 218.75, 134.11632078162597 ], "wc_limitations_avg": [ 63.75, 47.09232952403183 ], "wc_correctness_avg": [ 9.5, 9.5 ], "wc_clarity_avg": [ 7.0, 6.6332495807108 ], "wc_relation_to_prior_work_avg": [ 3.75, 4.763139720814412 ], "wc_documentation_avg": [ 7.5, 8.616843969807043 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 462.5, 196.5 ], "wc_reply_reviewers_avg": [ 29.25, 41.936708263763386 ], "wc_reply_authors_avg": [ 12.0, 13.2664991614216 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.2294157338705618, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8984597744467552467&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "stanford.edu;stanford.edu;;stanford.edu;stanford.edu;wharton.upenn.edu;genmab.com;stanford.edu", "author_num": 8, "aff_unique_index": "0;0;0;0;1;2;0", "aff_unique_norm": "Stanford University;University of Pennsylvania;Genmab", "aff_unique_dep": ";The Wharton School;", "aff_unique_url": "https://www.stanford.edu;https://www.wharton.upenn.edu;https://www.genmab.com", "aff_unique_abbr": "Stanford;UPenn Wharton;Genmab", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "$\\textit{Bifr\\\"ost}$: 3D-Aware Image Compositing with Language Instructions", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94882", "id": "VcPtU8e6yK", "proceeding": "", "pdf": "https://openreview.net/pdf?id=VcPtU8e6yK", "openreview": "https://openreview.net/forum?id=VcPtU8e6yK", "poster": "", "project": "", "author_site": "Lingxiao Li, Kaixiong Gong, Wei-Hong Li, xili dai, Tao Chen, Xiaojun Yuan, Xiangyu Yue", "tldr": "", "abstract": "This paper introduces $\\textit{Bifr\u00f6st}$, a novel 3D-aware framework that is built upon diffusion models to perform instruction-based image composition. Previous methods concentrate on image compositing at the 2D level, which fall short in handling complex spatial relationships ($\\textit{e.g.}$, occlusion). $\\textit{Bifr\u00f6st}$ addresses these issues by training MLLM as a 2.5D location predictor and integrating depth maps as an extra condition during the generation process to bridge the gap between 2D and 3D, which enhances spatial comprehension and supports sophisticated spatial interactions. Our method begins by fine-tuning MLLM with a custom counterfactual dataset to predict 2.5D object locations in complex backgrounds from language instructions. Then, the image-compositing model is uniquely designed to process multiple types of input features, enabling it to perform high-fidelity image compositions that consider occlusion, depth blur, and image harmonization. Extensive qualitative and quantitative evaluations demonstrate that $\\textit{Bifr\u00f6st}$ significantly outperforms existing methods, providing a robust solution for generating realistically composited images in scenarios demanding intricate spatial understanding. This work not only pushes the boundaries of generative image compositing but also reduces reliance on expensive annotated datasets by effectively utilizing existing resources in innovative ways.", "keywords": "Image Compositing;Image Generation;Diffusion Model", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Lingxiao Li;Kaixiong Gong;Wei-Hong Li;Xili Dai;Tao Chen;Xiaojun Yuan;Xiangyu Yue", "authorids": "~Lingxiao_Li2;~Kaixiong_Gong1;~Wei-Hong_Li1;~Xili_Dai2;~Tao_Chen6;~Xiaojun_Yuan1;~Xiangyu_Yue1", "gender": ";M;M;M;M;M;M", "homepage": ";;https://weihonglee.github.io;https://delay-xili.github.io/;https://eetchen.github.io/;;http://xyue.io/", "dblp": ";289/0124;255/5590;170/8561;69/510-3;;207/7518", "google_scholar": ";kBVshUUAAAAJ;xKKsIxcAAAAJ;CtRMD1UAAAAJ;https://scholar.google.com.sg/citations?user=w3OoFL0AAAAJ;https://scholar.google.com.hk/citations?user=o6W_m00AAAAJ;-xQ-C1sAAAAJ", "orcid": ";;;;;;", "linkedin": ";;;xili-daley-dai-b87030179/;;;", "or_profile": "~Lingxiao_Li2;~Kaixiong_Gong1;~Wei-Hong_Li1;~Xili_Dai2;~Tao_Chen6;~Xiaojun_Yuan1;~Xiangyu_Yue1", "aff": ";The Chinese University of Hong Kong;The Chinese University of Hong Kong;Hong Kong University of Science and Technology (Guangzhou);Fudan University;University of Electronic Science and Technology of China, Tsinghua University;The Chinese University of Hong Kong", "aff_domain": ";ie.cuhk.edu;cuhk.edu.hk;hkust.edu;fudan.edu.cn;uestc.edu.cn;ie.cuhk.edu", "position": ";PhD student;Postdoc;PhD student;Full Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nli2024textitbifrost,\ntitle={\\${\\textbackslash}textit\\{Bifr{\\textbackslash}''ost\\}\\$: 3D-Aware Image Compositing with Language Instructions},\nauthor={Lingxiao Li and Kaixiong Gong and Wei-Hong Li and Xili Dai and Tao Chen and Xiaojun Yuan and Xiangyu Yue},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=VcPtU8e6yK}\n}", "github": "", "reviewers": "M1TS;CdZU;wo4W;dLCn", "pdf_size": 10819032, "rating": "5;6;6;7", "confidence": "4;4;4;4", "soundness": "3;3;3;4", "novelty": "2;4;2;4", "presentation": "3;1;3;4", "wc_summary": "325;95;125;57", "wc_strengths": "57;77;34;53", "wc_weaknesses": "246;343;140;45", "wc_questions": "158;65;98;3", "wc_limitations": "17;1;39;10", "wc_review": "803;581;436;168", "wc_reply_reviewers": "99;45;47;26", "wc_reply_authors": "353;64;51;24", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 1.0 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 150.5, 103.58933342772315 ], "wc_strengths_avg": [ 55.25, 15.270478054075452 ], "wc_weaknesses_avg": [ 193.5, 111.82687512400585 ], "wc_questions_avg": [ 81.0, 56.03124128555426 ], "wc_limitations_avg": [ 16.75, 14.042346669983617 ], "wc_review_avg": [ 497.0, 230.57211453252538 ], "wc_reply_reviewers_avg": [ 54.25, 27.105119442644042 ], "wc_reply_authors_avg": [ 123.0, 133.57207792049954 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:yDlvm0tnMLoJ:scholar.google.com/&scioq=%24%5Ctextit%7BBifr%5C%22ost%7D%24:+3D-Aware+Image+Compositing+with+Language+Instructions&hl=en&as_sdt=0,33", "gs_version_total": 2, "email": ";ie.cuhk.edu;cuhk.edu.hk;hkust.edu;fudan.edu.cn;uestc.edu.cn;ie.cuhk.edu", "author_num": 7, "aff_unique_index": "0;0;1;2;3;0", "aff_unique_norm": "Chinese University of Hong Kong;Hong Kong University of Science and Technology;Fudan University;University of Electronic Science and Technology of China", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.ust.hk;https://www.fudan.edu.cn;https://www.uestc.edu.cn", "aff_unique_abbr": "CUHK;HKUST;Fudan;UESTC", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Lean Workbook: A large-scale Lean problem set formalized from natural language math problems", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97664", "id": "Vcw3vzjHDb", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Vcw3vzjHDb", "openreview": "https://openreview.net/forum?id=Vcw3vzjHDb", "poster": "/media/PosterPDFs/NeurIPS%202024/97664.png?t=1731058962.4724772", "project": "", "author_site": "Huaiyuan Ying, Zijian Wu, Yihan Geng, JIayu Wang, Dahua Lin, Kai Chen", "tldr": "", "abstract": "Large language models have demonstrated impressive capabilities across various natural language processing tasks, especially in solving mathematical problems. However, large language models are not good at math theorem proving using formal languages like Lean. A significant challenge in this area is the scarcity of training data available in these formal languages. To address this issue, we propose a novel pipeline that iteratively generates and filters synthetic data to translate natural language mathematical problems into Lean 4 statements, and vice versa. Our results indicate that the synthetic data pipeline can provide useful training data and improve the performance of LLMs in translating and understanding complex mathematical problems and proofs. Our final dataset contains about 57K formal-informal question pairs along with searched proof from the math contest forum and 21 new IMO questions. We open-source our code at \\url{https://github.com/InternLM/InternLM-Math} and our data at \\url{https://huggingface.co/datasets/InternLM/Lean-Workbook}.", "keywords": "Formal language proving;Translation;Lean4;expert iteration;dataset", "primary_area": "", "supplementary_material": "/attachment/b2c7dec30b17c4988af5af55bb8650aceafaaf44.pdf", "author": "Huaiyuan Ying;Zijian Wu;Yihan Geng;JIayu Wang;Dahua Lin;Kai Chen", "authorids": "~Huaiyuan_Ying1;~Zijian_Wu5;~Yihan_Geng1;~JIayu_Wang7;~Dahua_Lin1;~Kai_Chen4", "gender": "M;F;;M;M;M", "homepage": ";https://github.com/yihan04;https://github.com/objecti0n;http://dahua.site;https://chenkai.site/;https://github.com/wzj423/", "dblp": "290/7976.html;360/6692;;53/6088;181/2839-26;", "google_scholar": "Ts7BD3gAAAAJ;9jts-VQAAAAJ;;GMzzRRUAAAAJ;https://scholar.google.com.hk/citations?user=eGD0b7IAAAAJ;", "orcid": "0000-0001-7959-2093;;;;0000-0002-6820-2325;", "linkedin": ";;;;;", "or_profile": "~Huaiyuan_Ying1;~Yihan_Geng1;~JIayu_Wang7;~Dahua_Lin1;~Kai_Chen4;~\u5434\u5b50\u50651", "aff": "Tsinghua University;Shanghai Artificial Intelligence Laboratory;Shanghai Artificial Intelligence Laboratory;The Chinese University of Hong Kong;Shanghai AI Laboratory;Shanghai Jiaotong University", "aff_domain": "mails.tsinghua.edu.cn;pjlab.org.cn;pjlab.org.cn;cuhk.edu.hk;pjlab.org.cn;sjtu.edu.cn", "position": "PhD student;Intern;Researcher;Associate Professor;Researcher;Undergrad student", "bibtex": "@inproceedings{\nying2024lean,\ntitle={Lean Workbook: A large-scale Lean problem set formalized from natural language math problems},\nauthor={Huaiyuan Ying and Zijian Wu and Yihan Geng and JIayu Wang and Dahua Lin and Kai Chen},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=Vcw3vzjHDb}\n}", "github": "", "reviewers": "cgHc;uCYe;eHjp", "pdf_size": 604934, "rating": "6;7;8", "confidence": "4;3;5", "wc_summary_and_contributions": "54;66;84", "wc_strengths": "75;20;83", "wc_improvement": "49;15;184", "wc_limitations": "6;94;75", "wc_correctness": "13;8;6", "wc_clarity": "8;1;129", "wc_relation_to_prior_work": "10;1;263", "wc_documentation": "10;1;5", "wc_additional_feedback": "1;1;1", "wc_review": "226;207;830", "wc_reply_reviewers": "0;10;205", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;1;3", "reply_authors": "1;1;2", "rating_avg": [ 7.0, 0.816496580927726 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "wc_summary_and_contributions_avg": [ 68.0, 12.328828005937952 ], "wc_strengths_avg": [ 59.333333333333336, 28.003967972810962 ], "wc_improvement_avg": [ 82.66666666666667, 72.98553890250375 ], "wc_limitations_avg": [ 58.333333333333336, 37.80946383586463 ], "wc_correctness_avg": [ 9.0, 2.943920288775949 ], "wc_clarity_avg": [ 46.0, 58.75939641169459 ], "wc_relation_to_prior_work_avg": [ 91.33333333333333, 121.44225879907795 ], "wc_documentation_avg": [ 5.333333333333333, 3.6817870057290873 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 421.0, 289.3106749960441 ], "wc_reply_reviewers_avg": [ 71.66666666666667, 94.36925111261377 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.3333333333333333, 1.247219128924647 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14891901365557916491&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "mails.tsinghua.edu.cn;pjlab.org.cn;pjlab.org.cn;cuhk.edu.hk;pjlab.org.cn;sjtu.edu.cn", "author_num": 6, "aff_unique_index": "0;1;1;2;3;4", "aff_unique_norm": "Tsinghua University;Shanghai Artificial Intelligence Laboratory;Chinese University of Hong Kong;Shanghai AI Laboratory;Shanghai Jiao Tong University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.tsinghua.edu.cn;http://www.shailab.org/;https://www.cuhk.edu.hk;https://www.shanghai-ai-lab.com;https://www.sjtu.edu.cn", "aff_unique_abbr": "THU;Shanghai AI Lab;CUHK;SAIL;SJTU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Decomposing and Interpreting Image Representations via Text in ViTs Beyond CLIP", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94881", "id": "Vhh7ONtfvV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Vhh7ONtfvV", "openreview": "https://openreview.net/forum?id=Vhh7ONtfvV", "poster": "", "project": "", "author_site": "Sriram Balasubramanian, Samyadeep Basu, Soheil Feizi", "tldr": "", "abstract": "Recent work has explored how individual components of the CLIP-ViT model contribute to the final representation by leveraging the shared image-text representation space of CLIP. These components, such as attention heads and MLPs, have been shown to capture distinct image features like shape, color or texture. However, understanding the role of these components in arbitrary vision transformers (ViTs) is challenging. To this end, we introduce a general framework which can identify the roles of various components in ViTs beyond CLIP. Specifically, we (a) automate the decomposition of the final representation into contributions from different model components, and (b) linearly map these contributions to CLIP space to interpret them via text. Additionally, we introduce a novel scoring function to rank components by their importance with respect to specific features.\nApplying our framework to various ViT variants (e.g. DeiT, DINO, DINOv2, Swin, MaxViT), we gain insights into the roles of different components concerning particular image features. These insights facilitate applications such as image retrieval using text descriptions or reference images, visualizing token importance heatmaps, and mitigating spurious correlations. We release our [code](https://github.com/SriramB-98/vit-decompose) to reproduce the experiments in the paper.", "keywords": "vision;interpretability;explainability", "primary_area": "interpretability_and_explainability", "supplementary_material": "/attachment/149612af86a974a82624d52c2b7620cc862e4219.zip", "author": "Sriram Balasubramanian;Samyadeep Basu;Soheil Feizi", "authorids": "~Sriram_Balasubramanian2;~Samyadeep_Basu1;~Soheil_Feizi2", "gender": "M;M;M", "homepage": "http://www.sriram.live;https://samyadeepbasu.github.io/;https://www.cs.umd.edu/~sfeizi/", "dblp": "33/666;250/9138;57/2132", "google_scholar": "HsiolTEAAAAJ;6aRwDecAAAAJ;lptAmrMAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Sriram_Balasubramanian2;~Samyadeep_Basu1;~Soheil_Feizi2", "aff": "University of Maryland, College Park;Adobe Systems;University of Maryland, College Park", "aff_domain": "umd.edu;adobe.com;umd.edu", "position": "PhD student;Intern;Associate Professor", "bibtex": "@inproceedings{\nbalasubramanian2024decomposing,\ntitle={Decomposing and Interpreting Image Representations via Text in ViTs Beyond {CLIP}},\nauthor={Sriram Balasubramanian and Samyadeep Basu and Soheil Feizi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Vhh7ONtfvV}\n}", "github": "", "reviewers": "HytW;N9ji;FTxd;oPeN;5CRC", "pdf_size": 7199180, "rating": "4;5;6;6;7", "confidence": "4;3;5;5;3", "soundness": "1;3;4;3;3", "novelty": "2;3;3;3;3", "presentation": "2;2;4;3;3", "wc_summary": "164;57;78;76;67", "wc_strengths": "203;43;114;97;88", "wc_weaknesses": "1173;47;278;248;51", "wc_questions": "52;86;115;66;49", "wc_limitations": "28;18;31;7;28", "wc_review": "1620;251;616;494;283", "wc_reply_reviewers": "761;14;23;13;22", "wc_reply_authors": "984;0;0;0;0", "reply_reviewers": "2;1;1;1;1", "reply_authors": "4;1;1;1;1", "rating_avg": [ 5.6, 1.0198039027185568 ], "confidence_avg": [ 4.0, 0.8944271909999159 ], "soundness_avg": [ 2.8, 0.9797958971132712 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 88.4, 38.525835487371324 ], "wc_strengths_avg": [ 109.0, 52.53950894327049 ], "wc_weaknesses_avg": [ 359.4, 418.0155977951062 ], "wc_questions_avg": [ 73.6, 24.4834638072312 ], "wc_limitations_avg": [ 22.4, 8.867919710958146 ], "wc_review_avg": [ 652.8, 502.04438050833716 ], "wc_reply_reviewers_avg": [ 166.6, 297.2275895673213 ], "wc_reply_authors_avg": [ 196.8, 393.6 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.6, 1.2000000000000002 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2654336629165244676&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "umd.edu;adobe.com;umd.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Maryland;Adobe", "aff_unique_dep": ";Adobe Systems Incorporated", "aff_unique_url": "https://www/umd.edu;https://www.adobe.com", "aff_unique_abbr": "UMD;Adobe", "aff_campus_unique_index": "0;0", "aff_campus_unique": "College Park;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Cambrian-1: A Fully Open, Vision-Centric Exploration of Multimodal LLMs", "status": "Oral", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94880", "id": "Vi8AepAXGy", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Vi8AepAXGy", "openreview": "https://openreview.net/forum?id=Vi8AepAXGy", "poster": "", "project": "", "author_site": "Peter Tong, Ellis Brown, Penghao Wu, Sanghyun Woo, Adithya Jairam Vedagiri IYER, Sai Charitha Akula, Shusheng Yang, Jihan Yang, Manoj Middepogu, Ziteng Wang, Xichen Pan, Rob Fergus, Yann LeCun, Saining Xie", "tldr": "", "abstract": "We introduce Cambrian-1, a family of multimodal LLMs (MLLMs) designed with a vision-centric approach. While stronger language models can enhance multimodal capabilities, the design choices for vision components are often insufficiently explored and disconnected from visual representation learning research. This gap hinders accurate sensory grounding in real-world scenarios. Our study uses LLMs and visual instruction tuning as an interface to evaluate various visual representations, offering new insights into different models and architectures\u2014self-supervised, strongly supervised, or combinations thereof\u2014based on experiments with over 15 vision models. We critically examine existing MLLM benchmarks, addressing the difficulties involved in consolidating and interpreting results from various tasks. To further improve visual grounding, we propose spatial vision aggregator (SVA), a dynamic and spatially-aware connector that integrates vision features with LLMs while reducing the number of tokens. Additionally, we discuss the curation of high-quality visual instruction-tuning data from publicly available sources, emphasizing the importance of distribution balancing. Collectively, Cambrian-1 not only achieves state-of-the-art performances but also serves as a comprehensive, open cookbook for instruction-tuned MLLMs. We provide model weights, code, supporting tools, datasets, and detailed instruction-tuning and evaluation recipes. We hope our release will inspire and accelerate advancements in multimodal systems and visual representation learning.", "keywords": "Multimodal LLM;Visual Representation Learning;Evaluation Protocol;Data Mix;Open Science", "primary_area": "machine_vision", "supplementary_material": "", "author": "Shengbang Tong;Ellis L Brown II;Penghao Wu;Sanghyun Woo;ADITHYA JAIRAM IYER;Sai Charitha Akula;Shusheng Yang;Jihan Yang;Manoj Middepogu;Ziteng Wang;Xichen Pan;Rob Fergus;Yann LeCun;Saining Xie", "authorids": "~Shengbang_Tong1;~Ellis_L_Brown_II1;~Penghao_Wu1;~Sanghyun_Woo1;~ADITHYA_JAIRAM_IYER1;~Sai_Charitha_Akula1;~Shusheng_Yang1;~Jihan_Yang1;~Manoj_Middepogu1;~Ziteng_Wang5;~Xichen_Pan1;~Rob_Fergus1;~Yann_LeCun1;~Saining_Xie2", "gender": "M;;M;M;F;M;M;M;M;M;M;M;M;M", "homepage": "https://tsb0601.github.io/petertongsb/;;https://sites.google.com/view/sanghyunwoo/;https://adithyaiyer1999.github.io/;;https://shushengyang.com;https://jihanyang.github.io/;;;https://xichenpan.com/;http://cs.nyu.edu/fergus/;http://yann.lecun.com;https://ellisbrown.github.io/;", "dblp": "306/1406;320/7785;44/6878;;;290/1972;230/4254;;;317/0180;77/3763;l/YannLeCun;340/4498;126/0960", "google_scholar": "https://scholar.google.com/citations?hl=en;9mssd5EAAAAJ;iwBPvPIAAAAJ;;;v6dmW5cntoMC;zWfNZnIAAAAJ;;;9wh9VXIAAAAJ;https://scholar.google.com.tw/citations?user=GgQ9GEkAAAAJ;WLN3QrAAAAAJ;Hp5uEnUAAAAJ;https://scholar.google.co.uk/citations?user=Y2GtJkAAAAAJ", "orcid": ";;;;;;;;;0000-0002-8308-841X;;;0000-0002-8117-0778;", "linkedin": ";;;;sai-charitha-32574887/;shushengyang/;;manoj-middepogu/;ziteng-wang-694b8b227/;xichenpan/;;;ellislbrownii/;", "or_profile": "~Shengbang_Tong1;~Penghao_Wu1;~Sanghyun_Woo1;~ADITHYA_JAIRAM_IYER1;~Sai_Charitha_Akula1;~Shusheng_Yang1;~Jihan_Yang1;~Manoj_Middepogu1;~Ziteng_Wang5;~Xichen_Pan1;~Rob_Fergus1;~Yann_LeCun1;~Ellis_Langham_Brown1;~Saining_Xie1", "aff": "New York University;;New York University;New York University;New York University;New York University;University of Hong Kong;New York University;New York University;Meta Facebook;Google;New York University;Allen Institute for Artificial Intelligence;New York University", "aff_domain": "nyu.edu;;nyu.edu;nyu.edu;nyu.edu;nyu.edu;eee.hku.hk;nyu.edu;nyu.edu;meta.com;google.com;nyu.edu;allenai.org;nyu.edu", "position": "PhD student;;Faculty Fellow;MS student;MS student;PhD student;PhD student;MS student;Undergrad student;Intern;Research scientist;Full Professor;Intern;Assistant Professor", "bibtex": "@inproceedings{\ntong2024cambrian,\ntitle={Cambrian-1: A Fully Open, Vision-Centric Exploration of Multimodal {LLM}s},\nauthor={Shengbang Tong and Ellis L Brown II and Penghao Wu and Sanghyun Woo and ADITHYA JAIRAM IYER and Sai Charitha Akula and Shusheng Yang and Jihan Yang and Manoj Middepogu and Ziteng Wang and Xichen Pan and Rob Fergus and Yann LeCun and Saining Xie},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Vi8AepAXGy}\n}", "github": "", "reviewers": "ED7N;K2Un;BwuE;xvQn", "pdf_size": 6308429, "rating": "6;7;7;8", "confidence": "5;5;3;4", "soundness": "3;3;3;4", "novelty": "3;3;3;4", "presentation": "3;4;4;4", "wc_summary": "70;111;125;77", "wc_strengths": "95;65;99;116", "wc_weaknesses": "200;214;4;82", "wc_questions": "4;13;400;25", "wc_limitations": "1;28;18;8", "wc_review": "370;431;646;308", "wc_reply_reviewers": "16;20;18;13", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 95.75, 22.92787604642 ], "wc_strengths_avg": [ 93.75, 18.376275465937052 ], "wc_weaknesses_avg": [ 125.0, 86.65448632355972 ], "wc_questions_avg": [ 110.5, 167.3088461498674 ], "wc_limitations_avg": [ 13.75, 10.207227831296802 ], "wc_review_avg": [ 438.75, 127.31334376254517 ], "wc_reply_reviewers_avg": [ 16.75, 2.5860201081971503 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 14, 0 ], "corr_rating_confidence": -0.4264014327112209, "gs_citation": 321, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11658769578308025201&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "nyu.edu;;nyu.edu;nyu.edu;nyu.edu;nyu.edu;eee.hku.hk;nyu.edu;nyu.edu;meta.com;google.com;nyu.edu;allenai.org;nyu.edu", "author_num": 14, "aff_unique_index": "0;0;0;0;0;1;0;0;2;3;0;4;0", "aff_unique_norm": "New York University;University of Hong Kong;Meta;Google;Allen Institute for Artificial Intelligence", "aff_unique_dep": ";;Meta Platforms, Inc.;Google;", "aff_unique_url": "https://www.nyu.edu;https://www.hku.hk;https://meta.com;https://www.google.com;https://allenai.org", "aff_unique_abbr": "NYU;HKU;Meta;Google;AI2", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Hong Kong SAR;Mountain View", "aff_country_unique_index": "0;0;0;0;0;1;0;0;0;0;0;0;0", "aff_country_unique": "United States;China" }, { "title": "Robust Fine-tuning of Zero-shot Models via Variance Reduction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94879", "id": "ViTUlZvPDu", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ViTUlZvPDu", "openreview": "https://openreview.net/forum?id=ViTUlZvPDu", "poster": "", "project": "", "author_site": "Beier Zhu, Jiequan Cui, Hanwang Zhang", "tldr": "", "abstract": "When fine-tuning zero-shot models like CLIP, our desideratum is for the fine-tuned model to excel in both in-distribution (ID) and out-of-distribution (OOD). Recently, ensemble-based models (ESM) have been shown to offer significant robustness improvement, while preserving high ID accuracy. However, our study finds that ESMs do not solve the ID-OOD trade-offs: they achieve peak performance for ID and OOD accuracy at different mixing coefficients. When optimized for OOD accuracy, the ensemble model exhibits a noticeable decline in ID accuracy, and vice versa. In contrast, we propose a sample-wise ensembling technique that can simultaneously attain the best ID and OOD accuracy without the trade-offs. Specifically, we construct a Zero-Shot Failure (ZSF) set containing training samples incorrectly predicted by the zero-shot model. For each test sample, we calculate its distance to the ZSF set and assign a higher weight to the fine-tuned model in the ensemble if the distance is small. We term our method Variance Reduction Fine-tuning (VRF), as it effectively reduces the variance in ensemble predictions, thereby decreasing residual error. On ImageNet and five derived distribution shifts, our VRF further improves the OOD accuracy by 1.5 - 2.0 pp over the ensemble baselines while maintaining or increasing ID accuracy. VRF achieves similar large robustness gains on (0.9 - 3.1 pp) on other distribution shifts\n19 benchmarks. Codes are available in https://github.com/BeierZhu/VRF.", "keywords": "image classification;fine-tuning;zero-shot models", "primary_area": "machine_vision", "supplementary_material": "/attachment/c4ce965a4797288d838cb29d6857bd1abd29c751.zip", "author": "Beier Zhu;Jiequan Cui;Hanwang Zhang", "authorids": "~Beier_Zhu1;~Jiequan_Cui1;~Hanwang_Zhang3", "gender": "M;M;M", "homepage": "https://beierzhu.github.io;https://jiequancui.github.io/;https://mreallab.github.io/index.html", "dblp": "243/7531;259/5474;79/8116.html", "google_scholar": "jHczmjwAAAAJ;KbXLN2AAAAAJ;YG0DFyYAAAAJ", "orcid": "0000-0002-7900-6979;;", "linkedin": ";;", "or_profile": "~Beier_Zhu1;~Jiequan_Cui1;~Hanwang_Zhang3", "aff": "Nanyang Technological University;Nanyang Technological University;Nanyang Technological University", "aff_domain": "ntu.edu.sg;ntu.edu.sg;ntu.edu.sg", "position": "PhD student;Research Fellow;Associate Professor", "bibtex": "@inproceedings{\nzhu2024robust,\ntitle={Robust Fine-tuning of Zero-shot Models via Variance Reduction},\nauthor={Beier Zhu and Jiequan Cui and Hanwang Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ViTUlZvPDu}\n}", "github": "", "reviewers": "52xu;e2mj;vpPs;ydTd;8xce", "pdf_size": 3937512, "rating": "5;5;5;6;6", "confidence": "3;4;3;4;4", "soundness": "2;2;4;4;2", "novelty": "3;2;3;3;2", "presentation": "2;4;3;4;2", "wc_summary": "47;70;93;67;91", "wc_strengths": "194;48;81;91;36", "wc_weaknesses": "301;212;203;241;135", "wc_questions": "238;37;17;39;2", "wc_limitations": "13;27;33;9;1", "wc_review": "793;394;427;447;265", "wc_reply_reviewers": "326;0;134;69;57", "wc_reply_authors": "192;88;103;56;155", "reply_reviewers": "3;0;1;1;2", "reply_authors": "3;3;3;2;3", "rating_avg": [ 5.4, 0.48989794855663565 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.9797958971132712 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.8944271909999159 ], "wc_summary_avg": [ 73.6, 16.98940846527624 ], "wc_strengths_avg": [ 90.0, 55.81755996100152 ], "wc_weaknesses_avg": [ 218.4, 53.97629109155241 ], "wc_questions_avg": [ 66.6, 86.77465067633518 ], "wc_limitations_avg": [ 16.6, 11.757550765359253 ], "wc_review_avg": [ 465.2, 175.7229637810608 ], "wc_reply_reviewers_avg": [ 117.2, 112.74998891352494 ], "wc_reply_authors_avg": [ 118.8, 48.602057569613244 ], "reply_reviewers_avg": [ 1.4, 1.019803902718557 ], "reply_authors_avg": [ 2.8, 0.39999999999999997 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.6666666666666665, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11962489358933047664&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "ntu.edu.sg;ntu.edu.sg;ntu.edu.sg", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Nanyang Technological University", "aff_unique_dep": "", "aff_unique_url": "https://www.ntu.edu.sg", "aff_unique_abbr": "NTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Singapore" }, { "title": "Web-Scale Visual Entity Recognition: An LLM-Driven Data Approach", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94878", "id": "VikufBLOW1", "proceeding": "", "pdf": "https://openreview.net/pdf?id=VikufBLOW1", "openreview": "https://openreview.net/forum?id=VikufBLOW1", "poster": "/media/PosterPDFs/NeurIPS%202024/94878.png?t=1731582609.8490272", "project": "", "author_site": "Mathilde Caron, Alireza Fathi, Cordelia Schmid, Ahmet Iscen", "tldr": "", "abstract": "Web-scale visual entity recognition, the task of associating images with their corresponding entities within vast knowledge bases like Wikipedia, presents significant challenges due to the lack of clean, large-scale training data. In this paper, we propose a novel methodology to curate such a dataset, leveraging a multimodal large language model (LLM) for label verification, metadata generation, and rationale explanation. Instead of relying on the multimodal LLM to directly annotate data, which we found to be suboptimal, we prompt it to reason about potential candidate entity labels by accessing additional contextually relevant information (such as Wikipedia), resulting in more accurate annotations. We further use the multimodal LLM to enrich the dataset by generating question-answer pairs and a grounded fine-grained textual description (referred to as \"rationale\") that explains the connection between images and their assigned entities. Experiments demonstrate that models trained on this automatically curated data achieve state-of-the-art performance on web-scale visual entity recognition tasks (e.g. +6.9% improvement in OVEN entity task), underscoring the importance of high-quality training data in this domain.", "keywords": "Visual entity recognition;generative models;multimodal LLM", "primary_area": "machine_vision", "supplementary_material": "", "author": "Mathilde Caron;Alireza Fathi;Cordelia Schmid;Ahmet Iscen", "authorids": "~Mathilde_Caron1;~Alireza_Fathi1;~Cordelia_Schmid1;~Ahmet_Iscen3", "gender": "F;M;F;M", "homepage": ";http://ai.stanford.edu/~alireza/;https://cordeliaschmid.github.io/;", "dblp": "223/4085;70/3898;s/CordeliaSchmid;140/7520", "google_scholar": ";luv0xMIAAAAJ;IvqCXP4AAAAJ;wIjyqzAAAAAJ", "orcid": ";;;", "linkedin": ";alireza-fathi-04338411/;cordelia-schmid-47985a9;", "or_profile": "~Mathilde_Caron1;~Alireza_Fathi1;~Cordelia_Schmid1;~Ahmet_Iscen3", "aff": "Google;Google;Inria;Google", "aff_domain": "google.com;google.com;inria.fr;google.com", "position": "Researcher;researcher;Researcher;Researcher", "bibtex": "@inproceedings{\ncaron2024webscale,\ntitle={Web-Scale Visual Entity Recognition: An {LLM}-Driven Data Approach},\nauthor={Mathilde Caron and Alireza Fathi and Cordelia Schmid and Ahmet Iscen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=VikufBLOW1}\n}", "github": "", "reviewers": "MtgR;Poha;LX9i;9wiz", "pdf_size": 1499279, "rating": "6;6;7;8", "confidence": "5;1;4;5", "soundness": "3;2;3;4", "novelty": "3;2;3;4", "presentation": "3;2;3;4", "wc_summary": "175;76;59;83", "wc_strengths": "223;46;110;62", "wc_weaknesses": "295;73;216;98", "wc_questions": "67;13;2;65", "wc_limitations": "154;32;2;1", "wc_review": "914;240;389;309", "wc_reply_reviewers": "0;94;36;24", "wc_reply_authors": "0;190;0;0", "reply_reviewers": "0;2;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 1.6393596310755 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 98.25, 45.16289959690365 ], "wc_strengths_avg": [ 110.25, 69.22562748000195 ], "wc_weaknesses_avg": [ 170.5, 89.9068962872148 ], "wc_questions_avg": [ 36.75, 29.51588555337617 ], "wc_limitations_avg": [ 47.25, 62.87835478127589 ], "wc_review_avg": [ 463.0, 265.66990796851644 ], "wc_reply_reviewers_avg": [ 38.5, 34.565155865408734 ], "wc_reply_authors_avg": [ 47.5, 82.27241335952168 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5057805388588732, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16412180083478358354&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "google.com;google.com;inria.fr;google.com", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Google;INRIA", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://www.inria.fr", "aff_unique_abbr": "Google;Inria", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;France" }, { "title": "Nearly Minimax Optimal Submodular Maximization with Bandit Feedback", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94877", "id": "Vn0FWRImra", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Vn0FWRImra", "openreview": "https://openreview.net/forum?id=Vn0FWRImra", "poster": "", "project": "", "author_site": "Artin Tajdini, Lalit Jain, Kevin Jamieson", "tldr": "", "abstract": "We consider maximizing an unknown monotonic, submodular set function $f: 2^{[n]} \\rightarrow [0,1]$ with cardinality constraint under stochastic bandit feedback. \n At each time $t=1,\\dots,T$ the learner chooses a set $S_t \\subset [n]$ with $|S_t| \\leq k$ and receives reward $f(S_t) + \\eta_t$ where $\\eta_t$ is mean-zero sub-Gaussian noise. \n The objective is to minimize the learner's regret with respect to an approximation of the maximum $f(S_*)$ with $|S_*| = k$, obtained through robust greedy maximization of $f$. \n To date, the best regret bound in the literature scales as $k n^{1/3} T^{2/3}$. \n And by trivially treating every set as a unique arm one deduces that $\\sqrt{ {n \\choose k} T }$ is also achievable using standard multi-armed bandit algorithms. \n In this work, we establish the first minimax lower bound for this setting that scales like $\\tilde{\\Omega}(\\min_{L \\le k}(L^{1/3}n^{1/3}T^{2/3} + \\sqrt{{n \\choose k - L}T}))$. For a slightly restricted algorithm class, we prove a stronger regret lower bound of $\\tilde{\\Omega}(\\min_{L \\le k}(Ln^{1/3}T^{2/3} + \\sqrt{{n \\choose k - L}T}))$.\n Moreover, we propose an algorithm Sub-UCB that achieves regret $\\tilde{\\mathcal{O}}(\\min_{L \\le k}(Ln^{1/3}T^{2/3} + \\sqrt{{n \\choose k - L}T}))$ capable of matching the lower bound on regret for the restricted class up to logarithmic factors.", "keywords": "Bandits;Submodular optimization;Minimax optimal", "primary_area": "bandits", "supplementary_material": "", "author": "Artin Tajdini;Lalit K Jain;Kevin Jamieson", "authorids": "~Artin_Tajdini1;~Lalit_K_Jain1;~Kevin_Jamieson1", "gender": ";;M", "homepage": "https://www.linkedin.com/in/artin-tajdini-692942ab;http://www.lalitjain.com;", "dblp": "278/8700;178/3228;85/10260", "google_scholar": ";hGMSFu4AAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Artin_Tajdini1;~Lalit_K_Jain1;~Kevin_Jamieson1", "aff": "University of Washington;University of Washington;University of Washington", "aff_domain": "uw.edu;uw.edu;washington.edu", "position": "PhD student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\ntajdini2024nearly,\ntitle={Nearly Minimax Optimal Submodular Maximization with Bandit Feedback},\nauthor={Artin Tajdini and Lalit K Jain and Kevin Jamieson},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Vn0FWRImra}\n}", "github": "", "reviewers": "eJpK;jwa8;Tz5c;yrL4;LE4U", "pdf_size": 387596, "rating": "4;5;5;7;7", "confidence": "4;2;3;4;3", "soundness": "3;3;3;4;3", "novelty": "2;3;2;4;3", "presentation": "3;2;2;2;3", "wc_summary": "35;92;143;208;69", "wc_strengths": "76;106;288;203;43", "wc_weaknesses": "163;194;390;349;121", "wc_questions": "69;11;357;260;1", "wc_limitations": "4;2;1;1;3", "wc_review": "347;405;1179;1021;237", "wc_reply_reviewers": "319;23;32;12;0", "wc_reply_authors": "191;0;40;0;0", "reply_reviewers": "2;1;1;1;0", "reply_authors": "2;1;2;1;1", "rating_avg": [ 5.6, 1.2 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 109.4, 60.53296622502486 ], "wc_strengths_avg": [ 143.2, 89.98088685937698 ], "wc_weaknesses_avg": [ 243.4, 106.3289236285217 ], "wc_questions_avg": [ 139.6, 143.17206431423696 ], "wc_limitations_avg": [ 2.2, 1.16619037896906 ], "wc_review_avg": [ 637.8, 384.4842779620514 ], "wc_reply_reviewers_avg": [ 77.2, 121.37446189376084 ], "wc_reply_authors_avg": [ 46.2, 74.03890868995842 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.08908708063747481, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16357999121068081333&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 0, "email": "uw.edu;uw.edu;washington.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Washington", "aff_unique_dep": "", "aff_unique_url": "https://www.washington.edu", "aff_unique_abbr": "UW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "FiVA: Fine-grained Visual Attribute Dataset for Text-to-Image Diffusion Models", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97663", "id": "Vp6HAjrdIg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Vp6HAjrdIg", "openreview": "https://openreview.net/forum?id=Vp6HAjrdIg", "poster": "/media/PosterPDFs/NeurIPS%202024/97663.png?t=1731758602.18517", "project": "", "author_site": "Tong Wu, Yinghao Xu, Ryan Po, Mengchen Zhang, Guandao Yang, Jiaqi Wang, Ziwei Liu, Dahua Lin, Gordon Wetzstein", "tldr": "", "abstract": "Recent advances in text-to-image generation have enabled the creation of high-quality images with diverse applications. However, accurately describing desired visual attributes can be challenging, especially for non-experts in art and photography. An intuitive solution involves adopting favorable attributes from source images. Current methods attempt to distill identity and style from source images. However, \"style\" is a broad concept that includes texture, color, and artistic elements, but does not cover other important attributes like lighting and dynamics. Additionally, a simplified \"style\" adaptation prevents combining multiple attributes from different sources into one generated image. In this work, we formulate a more effective approach to decompose the aesthetics of a picture into specific visual attributes, letting users apply characteristics like lighting, texture, and dynamics from different images. To achieve this goal, we constructed the first fine-grained visual attributes dataset (FiVA) to the best of our knowledge. This FiVA dataset features a well-organized taxonomy for visual attributes and includes 1 M high-quality generated images with visual attribute annotations. Leveraging this dataset, we propose a fine-grained visual attributes adaptation framework (FiVA-Adapter) , which decouples and adapts visual attributes from one or more source images into a generated one. This approach enhances user-friendly customization, allowing users to selectively apply desired attributes to create images that meet their unique preferences and specific content requirements.", "keywords": "generative model;customization", "primary_area": "", "supplementary_material": "/attachment/a72cd7d189bb9796ea017c6050d8327e7983f609.pdf", "author": "Tong Wu;Yinghao Xu;Ryan Po;Mengchen Zhang;Guandao Yang;Jiaqi Wang;Ziwei Liu;Dahua Lin;Gordon Wetzstein", "authorids": "~Tong_Wu2;~Yinghao_Xu1;~Ryan_Po1;~Mengchen_Zhang1;~Guandao_Yang1;~Jiaqi_Wang1;~Ziwei_Liu1;~Dahua_Lin1;~Gordon_Wetzstein3", "gender": "F;M;M;F;M;M;M;M;M", "homepage": "https://wutong16.github.io/;https://justimyhxu.github.io/;https://ryanpo.com/;https://kszpxxzmc.github.io/;http://www.guandaoyang.com;https://myownskyw7.github.io/;https://liuziwei7.github.io/;http://dahua.site;http://web.stanford.edu/~gordonwz/", "dblp": "75/5056-2;232/2482;307/5002;227/3352-1.html;209/9624;44/740-3;05/6300-2;53/6088;13/4660", "google_scholar": "https://scholar.google.com.hk/citations?user=cLUgV4YAAAAJ;https://scholar.google.com/citations?hl=en;;oWXEaQoAAAAJ;_kElCmMAAAAJ;https://scholar.google.com.hk/citations?user=GDvt570AAAAJ;https://scholar.google.com.hk/citations?user=lc45xlcAAAAJ;GMzzRRUAAAAJ;VOf45S0AAAAJ", "orcid": ";;;0009-0004-0141-3939;0000-0002-2992-5803;;;;0000-0002-9243-6885", "linkedin": ";;;;guandao-yang-349b83a6/;;;;gordon-wetzstein-2406723/", "or_profile": "~Tong_Wu2;~Yinghao_Xu1;~Ryan_Po1;~Mengchen_Zhang1;~Guandao_Yang1;~Jiaqi_Wang1;~Ziwei_Liu1;~Dahua_Lin1;~Gordon_Wetzstein3", "aff": "The Chinese University of Hong Kong;Stanford University;Stanford University;Shanghai AI Laboratory;Stanford University;Shanghai AI Laboratory;Nanyang Technological University;The Chinese University of Hong Kong;Stanford University", "aff_domain": "cuhk.edu.hk;stanford.edu;stanford.edu;pjlab.org.cn;stanford.edu;pjlab.org.cn;ntu.edu.sg;cuhk.edu.hk;stanford.edu", "position": "PhD student;Postdoc;PhD student;PhD student;Postdoc;Research Scientist;Assistant Professor;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nwu2024fiva,\ntitle={Fi{VA}: Fine-grained Visual Attribute Dataset for Text-to-Image Diffusion Models},\nauthor={Tong Wu and Yinghao Xu and Ryan Po and Mengchen Zhang and Guandao Yang and Jiaqi Wang and Ziwei Liu and Dahua Lin and Gordon Wetzstein},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=Vp6HAjrdIg}\n}", "github": "", "reviewers": "niWt;cCrU;fV3f", "pdf_size": 18870151, "rating": "6;6;7", "confidence": "5;5;3", "wc_summary_and_contributions": "311;58;93", "wc_strengths": "148;23;31", "wc_improvement": "49;160;112", "wc_limitations": "135;21;21", "wc_correctness": "36;1;1", "wc_clarity": "11;5;1", "wc_relation_to_prior_work": "20;17;1", "wc_documentation": "62;29;1", "wc_additional_feedback": "1;1;1", "wc_review": "773;315;262", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.333333333333333, 0.9428090415820634 ], "wc_summary_and_contributions_avg": [ 154.0, 111.93152668782226 ], "wc_strengths_avg": [ 67.33333333333333, 57.13337222869155 ], "wc_improvement_avg": [ 107.0, 45.45327270945405 ], "wc_limitations_avg": [ 59.0, 53.74011537017761 ], "wc_correctness_avg": [ 12.666666666666666, 16.49915822768611 ], "wc_clarity_avg": [ 5.666666666666667, 4.109609335312651 ], "wc_relation_to_prior_work_avg": [ 12.666666666666666, 8.339997335464536 ], "wc_documentation_avg": [ 30.666666666666668, 24.931015935086872 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 450.0, 229.41810448756362 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.9999999999999997, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6964172092338857037&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 4, "email": "cuhk.edu.hk;stanford.edu;stanford.edu;pjlab.org.cn;stanford.edu;pjlab.org.cn;ntu.edu.sg;cuhk.edu.hk;stanford.edu", "author_num": 9, "aff_unique_index": "0;1;1;2;1;2;3;0;1", "aff_unique_norm": "Chinese University of Hong Kong;Stanford University;Shanghai AI Laboratory;Nanyang Technological University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.stanford.edu;https://www.shanghai-ai-lab.com;https://www.ntu.edu.sg", "aff_unique_abbr": "CUHK;Stanford;SAIL;NTU", "aff_campus_unique_index": "0;1;1;1;0;1", "aff_campus_unique": "Hong Kong SAR;Stanford;", "aff_country_unique_index": "0;1;1;0;1;0;2;0;1", "aff_country_unique": "China;United States;Singapore" }, { "title": "A Topology-aware Graph Coarsening Framework for Continual Graph Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94876", "id": "VpINEEVLX0", "proceeding": "", "pdf": "https://openreview.net/pdf?id=VpINEEVLX0", "openreview": "https://openreview.net/forum?id=VpINEEVLX0", "poster": "/media/PosterPDFs/NeurIPS%202024/94876.png?t=1731706905.69658", "project": "", "author_site": "Xiaoxue Han, Zhuo Feng, Yue Ning", "tldr": "", "abstract": "Graph Neural Networks (GNNs) experience \"catastrophic forgetting\" in continual learning setups, where they tend to lose previously acquired knowledge and perform poorly on old tasks. Rehearsal-based methods, which consolidate old knowledge with a replay memory buffer, are a de facto solution due to their straightforward workflow. However, these methods often fail to adequately capture topological information, leading to incorrect input-label mappings in replay samples. To address this, we propose TACO, a topology-aware graph coarsening and continual learning framework that stores information from previous tasks as a reduced graph. Throughout each learning period, this reduced graph expands by integrating with a new graph and aligning shared nodes, followed by a \"zoom-out\" reduction process to maintain a stable size. We have developed a graph coarsening algorithm based on node representation proximities to efficiently reduce a graph while preserving essential topological information. We empirically demonstrate that the learning process on the reduced graph can closely approximate that on the original graph. We compare TACO with a wide range of state-of-the-art baselines, proving its superiority and the necessity of preserving high-quality topological information for effective replaying.", "keywords": "Continual Graph Learning;Catastrophic Forgetting;Graph Coarsening", "primary_area": "graph_neural_networks", "supplementary_material": "/attachment/9418528e9a500d4e79ca69dcc0b192fb3081a9c1.zip", "author": "Xiaoxue Han;Zhuo Feng;Yue Ning", "authorids": "~Xiaoxue_Han1;~Zhuo_Feng3;~Yue_Ning1", "gender": "F;M;F", "homepage": "https://hanxiaoxue114.github.io/;https://web.stevens.edu/facultyprofile/?id=2371;https://yue-ning.github.io/", "dblp": "219/1935;81/4441.html;74/9990-1.html", "google_scholar": ";;https://scholar.google.com/citations?hl=en", "orcid": ";;", "linkedin": ";;", "or_profile": "~Xiaoxue_Han1;~Zhuo_Feng3;~Yue_Ning1", "aff": "Stevens Institute of Technology;;Stevens Institute of Technology", "aff_domain": "stevens.edu;;stevens.edu", "position": "PhD student;;Assistant Professor", "bibtex": "@inproceedings{\nhan2024a,\ntitle={A Topology-aware Graph Coarsening Framework for Continual Graph Learning},\nauthor={Xiaoxue Han and Zhuo Feng and Yue Ning},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=VpINEEVLX0}\n}", "github": "", "reviewers": "AYqY;HprJ;AUFc;F1Ar", "pdf_size": 1008949, "rating": "5;5;6;6", "confidence": "4;3;4;3", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "2;2;4;3", "wc_summary": "53;71;96;88", "wc_strengths": "45;43;36;92", "wc_weaknesses": "33;36;51;24", "wc_questions": "211;35;321;68", "wc_limitations": "16;22;25;1", "wc_review": "358;207;529;273", "wc_reply_reviewers": "28;0;20;214", "wc_reply_authors": "11;163;219;404", "reply_reviewers": "1;0;1;2", "reply_authors": "2;2;3;3", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 77.0, 16.537835408541227 ], "wc_strengths_avg": [ 54.0, 22.192341021172147 ], "wc_weaknesses_avg": [ 36.0, 9.72111104761179 ], "wc_questions_avg": [ 158.75, 114.67862704096173 ], "wc_limitations_avg": [ 16.0, 9.246621004453464 ], "wc_review_avg": [ 341.75, 120.63452034969094 ], "wc_reply_reviewers_avg": [ 65.5, 86.34089413481887 ], "wc_reply_authors_avg": [ 199.25, 140.59227396980248 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17516262532202830926&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": "stevens.edu;;stevens.edu", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Stevens Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.stevens.edu", "aff_unique_abbr": "SIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "PrivAuditor: Benchmarking Data Protection Vulnerabilities in LLM Adaptation Techniques", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97662", "id": "VpkfxuVXwx", "proceeding": "", "pdf": "https://openreview.net/pdf?id=VpkfxuVXwx", "openreview": "https://openreview.net/forum?id=VpkfxuVXwx", "poster": "/media/PosterPDFs/NeurIPS%202024/97662.png?t=1731757951.2573755", "project": "", "author_site": "Derui Zhu, Dingfan Chen, Xiongfei Wu, Jiahui Geng, Zhuo Li, Jens Grossklags, Lei Ma", "tldr": "", "abstract": "Large Language Models (LLMs) are recognized for their potential to be an important building block toward achieving artificial general intelligence due to their unprecedented capability for solving diverse tasks. Despite these achievements, LLMs often underperform in domain-specific tasks without training on relevant domain data. This phenomenon, which is often attributed to distribution shifts, makes adapting pre-trained LLMs with domain-specific data crucial. However, this adaptation raises significant privacy concerns, especially when the data involved come from sensitive domains. In this work, we extensively investigate the privacy vulnerabilities of adapted (fine-tuned) LLMs and benchmark privacy leakage across a wide range of data modalities, state-of-the-art privacy attack methods, adaptation techniques, and model architectures. We systematically evaluate and pinpoint critical factors related to privacy leakage. With our organized codebase and actionable insights, we aim to provide a standardized auditing tool for practitioners seeking to deploy customized LLM applications with faithful privacy assessments.", "keywords": "Privacy;Parameter-Efficient Fine-Tuning;Membership Inference Attacks.", "primary_area": "", "supplementary_material": "", "author": "Derui Zhu;Dingfan Chen;Xiongfei Wu;Jiahui Geng;Zhuo Li;Jens Grossklags;Lei Ma", "authorids": "~Derui_Zhu2;~Dingfan_Chen1;~Xiongfei_Wu1;~Jiahui_Geng3;~Zhuo_Li6;~Jens_Grossklags2;~Lei_Ma1", "gender": ";F;;M;M;;M", "homepage": ";https://dingfanchen.github.io/homepage/;;https://sites.google.com/view/jiahuigeng/;https://lizhuo-1994.github.io/;;https://www.malei.org", "dblp": ";248/8198;;228/5625;;;20/6534-3", "google_scholar": ";iARn00oAAAAJ;;eMC-gQUAAAAJ;https://scholar.google.co.jp/citations?user=54d2brwAAAAJ;;xsfGc58AAAAJ", "orcid": ";;;;;;", "linkedin": ";dingfan-chen-44174012b/;;%E4%BD%B3%E8%BE%89-%E8%80%BF-730961100/;;;lei-ma-345a0484", "or_profile": "~Derui_Zhu2;~Dingfan_Chen1;~Xiongfei_Wu1;~Jiahui_Geng3;~Zhuo_Li6;~Jens_Grossklags2;~Lei_Ma1", "aff": ";CISPA, saarland university, saarland informatics campus;;Mohamed bin Zayed University of Artificial Intelligence;Kyushu University;;University of Alberta", "aff_domain": ";cispa.saarland;;mbzuai.ac.ae;kyushu-u.ac.jp;;ualberta.ca", "position": ";PhD student;;Postdoc;PhD student;;Associate Professor", "bibtex": "@inproceedings{\nzhu2024privauditor,\ntitle={PrivAuditor: Benchmarking Data Protection Vulnerabilities in {LLM} Adaptation Techniques},\nauthor={Derui Zhu and Dingfan Chen and Xiongfei Wu and Jiahui Geng and Zhuo Li and Jens Grossklags and Lei Ma},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=VpkfxuVXwx}\n}", "github": "", "reviewers": "fLBd;6BzH;owSp;5k61", "pdf_size": 1673797, "rating": "6;8;8;10", "confidence": "4;5;4;5", "wc_summary_and_contributions": "60;277;75;123", "wc_strengths": "35;47;3;94", "wc_improvement": "26;173;3;60", "wc_limitations": "1;13;41;60", "wc_correctness": "1;1;1;19", "wc_clarity": "1;2;4;8", "wc_relation_to_prior_work": "1;16;44;25", "wc_documentation": "1;5;26;17", "wc_additional_feedback": "1;1;1;1", "wc_review": "127;535;198;407", "wc_reply_reviewers": "0;0;0;11", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 8.0, 1.4142135623730951 ], "confidence_avg": [ 4.5, 0.5 ], "wc_summary_and_contributions_avg": [ 133.75, 85.91674749430405 ], "wc_strengths_avg": [ 44.75, 32.66783586342995 ], "wc_improvement_avg": [ 65.5, 65.29356783022352 ], "wc_limitations_avg": [ 28.75, 23.155722834755128 ], "wc_correctness_avg": [ 5.5, 7.794228634059948 ], "wc_clarity_avg": [ 3.75, 2.680951323690902 ], "wc_relation_to_prior_work_avg": [ 21.5, 15.56438241627338 ], "wc_documentation_avg": [ 12.25, 9.883698700385398 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 316.75, 162.6996850027682 ], "wc_reply_reviewers_avg": [ 2.75, 4.763139720814412 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.7071067811865476, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4608470131143748909&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "email": ";cispa.saarland;;mbzuai.ac.ae;kyushu-u.ac.jp;;ualberta.ca", "author_num": 7, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Saarland University;Mohamed bin Zayed University of Artificial Intelligence;Kyushu University;University of Alberta", "aff_unique_dep": "CISPA;;;", "aff_unique_url": "https://www.uni-saarland.de;https://mbzuai.ac.ae;https://www.kyushu-u.ac.jp;https://www.ualberta.ca", "aff_unique_abbr": "Saarland U;MBZUAI;Kyushu U;UAlberta", "aff_campus_unique_index": "0", "aff_campus_unique": "Saarland Informatics Campus;", "aff_country_unique_index": "0;1;2;3", "aff_country_unique": "Germany;United Arab Emirates;Japan;Canada" }, { "title": "LLM-AutoDA: Large Language Model-Driven Automatic Data Augmentation for Long-tailed Problems", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94875", "id": "VpuOuZOVhP", "proceeding": "", "pdf": "https://openreview.net/pdf?id=VpuOuZOVhP", "openreview": "https://openreview.net/forum?id=VpuOuZOVhP", "poster": "", "project": "", "author_site": "Pengkun Wang, Zhe Zhao, HaiBin Wen, Fanfu Wang, Binwu Wang, Qingfu Zhang, Yang Wang", "tldr": "", "abstract": "The long-tailed distribution is the underlying nature of real-world data, and it presents unprecedented challenges for training deep learning models. Existing long-tailed learning paradigms based on re-balancing or data augmentation have partially alleviated the long-tailed problem. However, they still have limitations, such as relying on manually designed augmentation strategies, having a limited search space, and using fixed augmentation strategies. To address these limitations, this paper proposes a novel LLM-based long-tailed data augmentation framework called LLM-AutoDA, which leverages large-scale pretrained models to automatically search for the optimal augmentation strategies suitable for long-tailed data distributions. In addition, it applies this strategy to the original imbalanced data to create an augmented dataset and fine-tune the underlying long-tailed learning model. The performance improvement on the validation set serves as a reward signal to update the generation model, enabling the generation of more effective augmentation strategies in the next iteration. We conducted extensive experiments on multiple mainstream long-tailed learning benchmarks. The results show that LLM-AutoDA outperforms state-of-the-art data augmentation methods and other re-balancing methods significantly.", "keywords": "Long-tailed learning;Large Language Model;Data Augmentation", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Pengkun Wang;Zhe Zhao;HaiBin Wen;Fanfu Wang;Binwu Wang;Qingfu Zhang;Yang Wang", "authorids": "~Pengkun_Wang1;~Zhe_Zhao5;~HaiBin_Wen1;~Fanfu_Wang1;~Binwu_Wang1;~Qingfu_Zhang1;~Yang_Wang32", "gender": "M;M;M;M;M;M;M", "homepage": "http://home.ustc.edu.cn/~pengkun/index.html;https://di.ustc.edu.cn/_upload/tpl/14/f7/5367/template5367/members.html;https://github.com/haibin65535;;https://continualgoing.github.io/;https://www.cs.cityu.edu.hk/~qzhan7/index.html;http://staff.ustc.edu.cn/~angyan/", "dblp": ";;209/2186;396/5533;262/4302;98/1240.html;", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;;;;https://scholar.google.co.uk/citations?user=nhL9PHwAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": "0000-0002-2680-4563;0000-0002-8942-8761;0009-0009-5019-2390;;0000-0002-4638-0382;;0000-0002-6079-7053", "linkedin": ";;;;;;", "or_profile": "~Pengkun_Wang1;~Zhe_Zhao5;~HaiBin_Wen1;~Fanfu_Wang1;~Binwu_Wang1;~Qingfu_Zhang1;~Yang_Wang32", "aff": "University of Science and Technology of China;University of Science and Technology of China;Shaoguan University;Lanzhou University;University of Science and Technology of China;City University of Hong Kong;University of Science and Technology of China", "aff_domain": "ustc.edu.cn;ustc.edu.cn;sgu.edu.cn;lzu.edu.cn;ustc.edu.cn;cityu.edu.hk;ustc.edu.cn", "position": "Researcher;PhD student;Undergrad student;Undergrad student;PhD student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nwang2024llmautoda,\ntitle={{LLM}-Auto{DA}: Large Language Model-Driven Automatic Data Augmentation for Long-tailed Problems},\nauthor={Pengkun Wang and Zhe Zhao and HaiBin Wen and Fanfu Wang and Binwu Wang and Qingfu Zhang and Yang Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=VpuOuZOVhP}\n}", "github": "", "reviewers": "37A8;JafW;q3qy;2Q2t", "pdf_size": 1839206, "rating": "5;5;7;8", "confidence": "3;4;4;4", "soundness": "3;2;3;3", "novelty": "2;2;3;4", "presentation": "1;3;3;3", "wc_summary": "60;62;64;48", "wc_strengths": "40;60;115;110", "wc_weaknesses": "394;231;88;74", "wc_questions": "107;2;3;46", "wc_limitations": "2;1;9;1", "wc_review": "603;356;279;279", "wc_reply_reviewers": "235;147;0;106", "wc_reply_authors": "1450;624;0;67", "reply_reviewers": "2;2;0;1", "reply_authors": "4;3;1;2", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 58.5, 6.224949798994366 ], "wc_strengths_avg": [ 81.25, 32.08874413248359 ], "wc_weaknesses_avg": [ 196.75, 129.39740144222372 ], "wc_questions_avg": [ 39.5, 42.82814495165533 ], "wc_limitations_avg": [ 3.25, 3.344772040064913 ], "wc_review_avg": [ 379.25, 132.95182398147082 ], "wc_reply_reviewers_avg": [ 122.0, 84.4600497276671 ], "wc_reply_authors_avg": [ 535.25, 581.0324323994315 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.5555555555555555, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10393039287416545882&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "ustc.edu.cn;ustc.edu.cn;sgu.edu.cn;lzu.edu.cn;ustc.edu.cn;cityu.edu.hk;ustc.edu.cn", "author_num": 7, "aff_unique_index": "0;0;1;2;0;3;0", "aff_unique_norm": "University of Science and Technology of China;Shaoguan University;Lanzhou University;City University of Hong Kong", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.ustc.edu.cn;http://www.gdsgu.edu.cn;https://www.lzu.edu.cn;https://www.cityu.edu.hk", "aff_unique_abbr": "USTC;;LZU;CityU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Reciprocal Reward Influence Encourages Cooperation From Self-Interested Agents", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94874", "id": "Vq2kzpig8v", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Vq2kzpig8v", "openreview": "https://openreview.net/forum?id=Vq2kzpig8v", "poster": "", "project": "", "author_site": "John L Zhou, Weizhe Hong, Jonathan Kao", "tldr": "", "abstract": "Cooperation between self-interested individuals is a widespread phenomenon in the natural world, but remains elusive in interactions between artificially intelligent agents. Instead, na\u00efve reinforcement learning algorithms typically converge to Pareto-dominated outcomes in even the simplest of social dilemmas. An emerging literature on opponent shaping has demonstrated the ability to reach prosocial outcomes by influencing the learning of other agents. However, such methods differentiate through the learning step of other agents or optimize for meta-game dynamics, which rely on privileged access to opponents' learning algorithms or exponential sample complexity, respectively. To provide a learning rule-agnostic and sample-efficient alternative, we introduce Reciprocators, reinforcement learning agents which are intrinsically motivated to reciprocate the influence of opponents' actions on their returns. This approach seeks to modify other agents' $Q$-values by increasing their return following beneficial actions (with respect to the Reciprocator) and decreasing it after detrimental actions, guiding them towards mutually beneficial actions without directly differentiating through a model of their policy. We show that Reciprocators can be used to promote cooperation in temporally extended social dilemmas during simultaneous learning. Our code is available at https://github.com/johnlyzhou/reciprocator/.", "keywords": "cooperation;reinforcement learning;opponent shaping;multi-agent reinforcement learning", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "John Luoyu Zhou;Weizhe Hong;Jonathan Kao", "authorids": "~John_Luoyu_Zhou1;~Weizhe_Hong1;~Jonathan_Kao1", "gender": "M;Not Specified;", "homepage": "https://johnlyzhou.github.io/;https://www.hong-lab.org;http://seas.ucla.edu/~kao", "dblp": "59/10303;;145/1310", "google_scholar": "https://scholar.google.com/citations?hl=en;_hB2krQAAAAJ;", "orcid": "0000-0003-3565-5376;;0000-0002-9298-0143", "linkedin": "johnlyzhou/;;", "or_profile": "~John_Luoyu_Zhou1;~Weizhe_Hong1;~Jonathan_Kao1", "aff": "University of California, Los Angeles;University of California, Los Angeles;University of California, Los Angeles", "aff_domain": "ucla.edu;ucla.edu;ucla.edu", "position": "PhD student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nzhou2024reciprocal,\ntitle={Reciprocal Reward Influence Encourages Cooperation From Self-Interested Agents},\nauthor={John Luoyu Zhou and Weizhe Hong and Jonathan Kao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Vq2kzpig8v}\n}", "github": "", "reviewers": "SrMN;os8m;PQwp;wp16", "pdf_size": 1428305, "rating": "3;6;6;6", "confidence": "4;3;5;3", "soundness": "2;4;3;2", "novelty": "1;3;3;2", "presentation": "2;4;4;2", "wc_summary": "49;57;53;31", "wc_strengths": "36;88;53;91", "wc_weaknesses": "379;57;128;208", "wc_questions": "268;178;166;147", "wc_limitations": "32;1;1;9", "wc_review": "764;381;401;486", "wc_reply_reviewers": "223;34;23;69", "wc_reply_authors": "1354;0;0;22", "reply_reviewers": "3;2;1;1", "reply_authors": "5;1;1;2", "rating_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 3.0, 1.0 ], "wc_summary_avg": [ 47.5, 9.937303457175895 ], "wc_strengths_avg": [ 67.0, 23.313086453749534 ], "wc_weaknesses_avg": [ 193.0, 119.93956811661447 ], "wc_questions_avg": [ 189.75, 46.51007955271631 ], "wc_limitations_avg": [ 10.75, 12.695963925594622 ], "wc_review_avg": [ 508.0, 152.96895109792706 ], "wc_reply_reviewers_avg": [ 87.25, 80.19468498597647 ], "wc_reply_authors_avg": [ 344.0, 583.1929354853332 ], "reply_reviewers_avg": [ 1.75, 0.82915619758885 ], "reply_authors_avg": [ 2.25, 1.6393596310755 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:4odsb0qc1eQJ:scholar.google.com/&scioq=Reciprocal+Reward+Influence+Encourages+Cooperation+From+Self-Interested+Agents&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": "ucla.edu;ucla.edu;ucla.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of California, Los Angeles", "aff_unique_dep": "", "aff_unique_url": "https://www.ucla.edu", "aff_unique_abbr": "UCLA", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "When is an Embedding Model More Promising than Another?", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94873", "id": "VqFz7iTGcl", "proceeding": "", "pdf": "https://openreview.net/pdf?id=VqFz7iTGcl", "openreview": "https://openreview.net/forum?id=VqFz7iTGcl", "poster": "/media/PosterPDFs/NeurIPS%202024/94873.png?t=1731715572.7257407", "project": "", "author_site": "Maxime Darrin, Philippe Formont, Ismail Ayed, Jackie CK Cheung, Pablo Piantanida", "tldr": "", "abstract": "Embedders play a central role in machine learning, projecting any object into numerical representations that can, in turn, be leveraged to perform various downstream tasks. The evaluation of embedding models typically depends on domain-specific empirical approaches utilizing downstream tasks, primarily because of the lack of a standardized framework for comparison. However, acquiring adequately large and representative datasets for conducting these assessments is not always viable and can prove to be prohibitively expensive and time-consuming. In this paper, we present a unified approach to evaluate embedders. First, we establish theoretical foundations for comparing embedding models, drawing upon the concepts of sufficiency and informativeness. We then leverage these concepts to devise a tractable comparison criterion (information sufficiency), leading to a task-agnostic and self-supervised ranking procedure. We demonstrate experimentally that our approach aligns closely with the capability of embedding models to facilitate various downstream tasks in both natural language processing and molecular biology. This effectively offers practitioners a valuable tool for prioritizing model trials.", "keywords": "embedders;emeddings;molecules;nlp;llm;foundation models;evaluation;unsupervised;representation learning;information theory", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Maxime DARRIN;Philippe Formont;Ismail Ben Ayed;Jackie CK Cheung;Pablo Piantanida", "authorids": "~Maxime_DARRIN1;~Philippe_Formont1;~Ismail_Ben_Ayed1;~Jackie_CK_Cheung1;~Pablo_Piantanida2", "gender": "M;;M;M;M", "homepage": "https://icannos.github.io/;;https://profs.etsmtl.ca/ibenayed/;http://cs.mcgill.ca/~jcheung/;https://www.pablo-piantanida.org", "dblp": ";;68/4478;00/9012;44/1416", "google_scholar": "https://scholar.google.ca/citations?hl=fr;;https://scholar.google.ca/citations?user=29vyUccAAAAJ;https://scholar.google.com.tw/citations?user=Um-wmYQAAAAJ;https://scholar.google.fr/citations?user=QyBEFv0AAAAJ", "orcid": ";;;;", "linkedin": "maxime-darrin/;philippe-formont/;;;pablo-piantanida-60a51bb5/?locale=en_US", "or_profile": "~Maxime_DARRIN1;~Philippe_Formont1;~Ismail_Ben_Ayed1;~Jackie_CK_Cheung1;~Pablo_Piantanida2", "aff": "CentraleSupelec;Universit\u00e9 Paris-Saclay;\u00c9cole de technologie sup\u00e9rieure, Universit\u00e9 du Qu\u00e9bec;Microsoft;Mila - Quebec AI Institute ", "aff_domain": "centralesupelec.fr;universite-paris-saclay.fr;etsmtl.ca;microsoft.com;mila.quebec", "position": "PhD student;PhD student;Full Professor;Consulting Researcher;Full Professor", "bibtex": "@inproceedings{\ndarrin2024when,\ntitle={When is an Embedding Model More Promising than Another?},\nauthor={Maxime DARRIN and Philippe Formont and Ismail Ben Ayed and Jackie CK Cheung and Pablo Piantanida},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=VqFz7iTGcl}\n}", "github": "", "reviewers": "4AaU;UcaH;o2cb;id3c", "pdf_size": 7062492, "rating": "7;7;7;7", "confidence": "4;3;4;3", "soundness": "3;3;2;3", "novelty": "3;3;2;4", "presentation": "4;4;3;4", "wc_summary": "76;200;104;79", "wc_strengths": "60;58;51;79", "wc_weaknesses": "38;175;42;150", "wc_questions": "372;80;103;91", "wc_limitations": "12;5;6;31", "wc_review": "558;518;306;430", "wc_reply_reviewers": "78;38;24;0", "wc_reply_authors": "0;0;552;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;2;1", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 114.75, 50.4052328632653 ], "wc_strengths_avg": [ 62.0, 10.36822067666386 ], "wc_weaknesses_avg": [ 101.25, 61.90062600652759 ], "wc_questions_avg": [ 161.5, 121.80414607064901 ], "wc_limitations_avg": [ 13.5, 10.452272480183437 ], "wc_review_avg": [ 453.0, 96.6798841538404 ], "wc_reply_reviewers_avg": [ 35.0, 28.30194339616981 ], "wc_reply_authors_avg": [ 138.0, 239.02301144450507 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13494432851489933127&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 4, "email": "centralesupelec.fr;universite-paris-saclay.fr;etsmtl.ca;microsoft.com;mila.quebec", "author_num": 5, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "CentraleSup\u00e9lec;Universit\u00e9 Paris-Saclay;Universit\u00e9 du Qu\u00e9bec;Microsoft;Quebec AI Institute", "aff_unique_dep": ";;;Microsoft Corporation;AI Institute", "aff_unique_url": "https://www.centralesupelec.fr;https://www.universite-paris-saclay.fr;https://www.etsmtl.ca;https://www.microsoft.com;https://mila.quebec", "aff_unique_abbr": "CS;UPSaclay;ETS;Microsoft;Mila", "aff_campus_unique_index": "1", "aff_campus_unique": ";\u00c9cole de technologie sup\u00e9rieure", "aff_country_unique_index": "0;0;1;2;1", "aff_country_unique": "France;Canada;United States" }, { "title": "SGLang: Efficient Execution of Structured Language Model Programs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94872", "id": "VqkAKQibpq", "proceeding": "", "pdf": "https://openreview.net/pdf?id=VqkAKQibpq", "openreview": "https://openreview.net/forum?id=VqkAKQibpq", "poster": "", "project": "", "author_site": "Lianmin Zheng, Liangsheng Yin, Zhiqiang Xie, Chuyue (Livia) Sun, Jeff Huang, Cody Hao Yu, Shiyi Cao, Christos Kozyrakis, Ion Stoica, Joseph Gonzalez, Clark Barrett, Ying Sheng", "tldr": "", "abstract": "Large language models (LLMs) are increasingly used for complex tasks that require multiple generation calls, advanced prompting techniques, control flow, and structured inputs/outputs. However, efficient systems are lacking for programming and executing these applications. We introduce SGLang, a system for efficient execution of complex language model programs. SGLang consists of a frontend language and a runtime. The frontend simplifies programming with primitives for generation and parallelism control. The runtime accelerates execution with novel optimizations like RadixAttention for KV cache reuse and compressed finite state machines for faster structured output decoding. Experiments show that SGLang achieves up to $6.4\\times$ higher throughput compared to state-of-the-art inference systems on various large language and multi-modal models on tasks including agent control, logical reasoning, few-shot learning benchmarks, JSON decoding, retrieval-augmented generation pipelines, and multi-turn chat. The code is publicly available at https://github.com/sgl-project/sglang.", "keywords": "large language models; inference optimizations; KV cache; programming systems", "primary_area": "infrastructure", "supplementary_material": "", "author": "Lianmin Zheng;Liangsheng Yin;Zhiqiang Xie;Chuyue Sun;Jeff Huang;Cody Hao Yu;Shiyi Cao;Christos Kozyrakis;Ion Stoica;Joseph E. Gonzalez;Clark Barrett;Ying Sheng", "authorids": "~Lianmin_Zheng2;~Liangsheng_Yin1;~Zhiqiang_Xie3;~Chuyue_Sun1;~Jeff_Huang2;~Cody_Hao_Yu1;~Shiyi_Cao1;~Christos_Kozyrakis1;~Ion_Stoica1;~Joseph_E._Gonzalez1;~Clark_Barrett1;~Ying_Sheng1", "gender": "M;M;;;;M;F;M;M;M;M;F", "homepage": "http://lmzheng.net/;https://lsyin.me;https://zhiqiangxie.com/;https://web.stanford.edu/~chuyues/;https://engineering.tamu.edu/cse/profiles/huang-jeff.html;;https://shiyicao.com/;https://www.stanford.edu/~kozyraki;http://people.eecs.berkeley.edu/~istoica/;http://eecs.berkeley.edu/~jegonzal;http://theory.stanford.edu/~barrett;https://sites.google.com/view/yingsheng", "dblp": "211/7027;;;;68/4706-1.html;;;k/ChristoforosEKozyrakis.html;s/IonStoica;61/8262;b/ClarkWBarrett;262/6232.html", "google_scholar": "_7Q8uIYAAAAJ;;Be2fl8sAAAAJ;;UmrxW60AAAAJ;jWRhmqoAAAAJ;https://scholar.google.com/citations?hl=en;G2EJz5kAAAAJ;vN-is70AAAAJ;https://scholar.google.com.tw/citations?user=gM2WW9UAAAAJ;https://scholar.google.com.tw/citations?user=BtwmZfQAAAAJ;xMhGYpgAAAAJ", "orcid": ";;0000-0002-0214-6439;;0000-0003-1393-0752;;;0000-0002-3154-7530;;0000-0003-2921-956X;0000-0002-9522-3084;0000-0002-1883-2126", "linkedin": ";;xiezhq;;;;;ckozyrakis/;ionstoica;;clark-barrett-a5b157/;", "or_profile": "~Lianmin_Zheng2;~Liangsheng_Yin1;~Zhiqiang_Xie3;~Chuyue_Sun1;~Jeff_Huang2;~Cody_Hao_Yu1;~Shiyi_Cao1;~Christos_Kozyrakis1;~Ion_Stoica1;~Joseph_E._Gonzalez1;~Clark_Barrett1;~Ying_Sheng1", "aff": "University of California, Berkeley;Shanghai Jiaotong University;NVIDIA;Stanford University;, Texas A&M University - College Station;Boson AI, Inc;University of California, Berkeley;Stanford University;University of California, Berkeley;University of California, Berkeley;Stanford University;Stanford University", "aff_domain": "berkeley.edu;sjtu.edu.cn;nvidia.com;stanford.edu;cse.tamu.edu;boson.ai;berkeley.edu;stanford.edu;berkeley.edu;berkeley.edu;stanford.edu;stanford.edu", "position": "PhD student;Undergrad student;Intern;PhD student;Associate Professor;Founding Engineer;PhD student;Full Professor;Full Professor;Associate Professor;Professor (Research);PhD student", "bibtex": "@inproceedings{\nzheng2024sglang,\ntitle={{SGL}ang: Efficient Execution of Structured Language Model Programs},\nauthor={Lianmin Zheng and Liangsheng Yin and Zhiqiang Xie and Chuyue Sun and Jeff Huang and Cody Hao Yu and Shiyi Cao and Christos Kozyrakis and Ion Stoica and Joseph E. Gonzalez and Clark Barrett and Ying Sheng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=VqkAKQibpq}\n}", "github": "", "reviewers": "N1WL;U8WY;P67b;PeTj;QRuS", "pdf_size": 1289331, "rating": "7;7;7;8;8", "confidence": "4;4;3;3;4", "soundness": "3;4;4;4;4", "novelty": "4;4;3;4;4", "presentation": "2;4;4;4;4", "wc_summary": "145;116;29;88;96", "wc_strengths": "83;147;36;172;54", "wc_weaknesses": "160;71;168;59;1", "wc_questions": "2;73;5;54;203", "wc_limitations": "1;3;22;20;1", "wc_review": "391;410;260;393;355", "wc_reply_reviewers": "21;9;5;18;21", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 7.4, 0.48989794855663565 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 3.8, 0.39999999999999997 ], "novelty_avg": [ 3.8, 0.39999999999999997 ], "presentation_avg": [ 3.6, 0.8000000000000002 ], "wc_summary_avg": [ 94.8, 38.33223186823329 ], "wc_strengths_avg": [ 98.4, 52.690037008907105 ], "wc_weaknesses_avg": [ 91.8, 63.57798361068083 ], "wc_questions_avg": [ 67.4, 73.16993918269988 ], "wc_limitations_avg": [ 9.4, 9.520504188329523 ], "wc_review_avg": [ 361.8, 53.96072645915731 ], "wc_reply_reviewers_avg": [ 14.8, 6.584831053261731 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": -0.16666666666666669, "gs_citation": 66, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8507440691663138730&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "berkeley.edu;sjtu.edu.cn;nvidia.com;stanford.edu;cse.tamu.edu;boson.ai;berkeley.edu;stanford.edu;berkeley.edu;berkeley.edu;stanford.edu;stanford.edu", "author_num": 12, "aff_unique_index": "0;1;2;3;4;5;0;3;0;0;3;3", "aff_unique_norm": "University of California, Berkeley;Shanghai Jiao Tong University;NVIDIA;Stanford University;Texas A&M University;Boson AI", "aff_unique_dep": ";;NVIDIA Corporation;;;", "aff_unique_url": "https://www.berkeley.edu;https://www.sjtu.edu.cn;https://www.nvidia.com;https://www.stanford.edu;https://www.tamu.edu;https://www.boson.ai", "aff_unique_abbr": "UC Berkeley;SJTU;NVIDIA;Stanford;TAMU;Boson AI", "aff_campus_unique_index": "0;2;3;0;2;0;0;2;2", "aff_campus_unique": "Berkeley;;Stanford;College Station", "aff_country_unique_index": "0;1;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States;China" }, { "title": "Nonparametric Instrumental Variable Regression through Stochastic Approximate Gradients", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94871", "id": "VqxODXhU4k", "proceeding": "", "pdf": "https://openreview.net/pdf?id=VqxODXhU4k", "openreview": "https://openreview.net/forum?id=VqxODXhU4k", "poster": "", "project": "", "author_site": "Yuri Fonseca, Caio Peixoto, Yuri Saporito", "tldr": "", "abstract": "Instrumental variables (IVs) provide a powerful strategy for identifying causal effects in the presence of unobservable confounders. Within the nonparametric setting (NPIV), recent methods have been based on nonlinear generalizations of Two-Stage Least Squares and on minimax formulations derived from moment conditions or duality. In a novel direction, we show how to formulate a functional stochastic gradient descent algorithm to tackle NPIV regression by directly minimizing the populational risk. We provide theoretical support in the form of bounds on the excess risk, and conduct numerical experiments showcasing our method's superior stability and competitive performance relative to current state-of-the-art alternatives. This algorithm enables flexible estimator choices, such as neural networks or kernel based methods, as well as non-quadratic loss functions, which may be suitable for structural equations beyond the setting of continuous outcomes and additive noise. Finally, we demonstrate this flexibility of our framework by presenting how it naturally addresses the important case of binary outcomes, which has received far less attention by recent developments in the NPIV literature.", "keywords": "Nonparametric Instrumental Variables;Stochastic Gradients;RKHS;Binary response;Deep Learning;Causality", "primary_area": "optimization", "supplementary_material": "/attachment/ab406699e2ed424ad588b54ef45100c7c402bc28.zip", "author": "Yuri Fonseca;Caio Peixoto;Yuri Saporito", "authorids": "~Yuri_Fonseca1;~Caio_Peixoto1;~Yuri_Saporito1", "gender": ";M;M", "homepage": ";;https://www.yurisaporito.com", "dblp": "295/9443;;", "google_scholar": "https://scholar.google.com.br/citations?user=hr1PnUkAAAAJ;;https://scholar.google.com/citations?hl=en", "orcid": ";;", "linkedin": ";caio-lins;", "or_profile": "~Yuri_Fonseca1;~Caio_Peixoto1;~Yuri_Saporito1", "aff": "Columbia University;Funda\u00e7\u00e3o Get\u00falio Vargas (FGV);FGV EMAp", "aff_domain": "columbia.edu;fgv.br;emap.fgv.br", "position": "PhD student;MS student;Assistant Professor", "bibtex": "@inproceedings{\nfonseca2024nonparametric,\ntitle={Nonparametric Instrumental Variable Regression through Stochastic Approximate Gradients},\nauthor={Yuri Fonseca and Caio Peixoto and Yuri Saporito},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=VqxODXhU4k}\n}", "github": "", "reviewers": "xcFr;1tRc;spVa;WKaj", "pdf_size": 855387, "rating": "5;5;8;8", "confidence": "5;3;5;4", "soundness": "3;3;4;4", "novelty": "2;2;3;4", "presentation": "3;3;4;3", "wc_summary": "94;69;60;64", "wc_strengths": "29;133;30;50", "wc_weaknesses": "226;147;88;52", "wc_questions": "2;74;98;25", "wc_limitations": "1;10;1;1", "wc_review": "352;433;277;192", "wc_reply_reviewers": "9;49;22;9", "wc_reply_authors": "0;0;16;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;2;1", "rating_avg": [ 6.5, 1.5 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 71.75, 13.235841491949047 ], "wc_strengths_avg": [ 60.5, 42.68782027698299 ], "wc_weaknesses_avg": [ 128.25, 65.84214076106578 ], "wc_questions_avg": [ 49.75, 38.10757798653701 ], "wc_limitations_avg": [ 3.25, 3.897114317029974 ], "wc_review_avg": [ 313.5, 89.24264675590925 ], "wc_reply_reviewers_avg": [ 22.25, 16.330569494050106 ], "wc_reply_authors_avg": [ 4.0, 6.928203230275509 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3015113445777637, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8508215805561591646&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "columbia.edu;fgv.br;emap.fgv.br", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Columbia University;Funda\u00e7\u00e3o Get\u00falio Vargas;Funda\u00e7\u00e3o Getulio Vargas", "aff_unique_dep": ";;Escola de Matem\u00e1tica Aplicada", "aff_unique_url": "https://www.columbia.edu;https://www.fgv.br;https://www.fgv.br", "aff_unique_abbr": "Columbia;FGV;FGV", "aff_campus_unique_index": "1", "aff_campus_unique": ";S\u00e3o Paulo", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United States;Brazil" }, { "title": "Stepwise Alignment for Constrained Language Model Policy Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94870", "id": "VrVx83BkQX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=VrVx83BkQX", "openreview": "https://openreview.net/forum?id=VrVx83BkQX", "poster": "/media/PosterPDFs/NeurIPS%202024/94870.png?t=1730188194.303792", "project": "", "author_site": "Akifumi Wachi, Thien Tran, Rei Sato, Takumi Tanabe, Youhei Akimoto", "tldr": "", "abstract": "Safety and trustworthiness are indispensable requirements for real-world applications of AI systems using large language models (LLMs). This paper formulates human value alignment as an optimization problem of the language model policy to maximize reward under a safety constraint, and then proposes an algorithm, Stepwise Alignment for Constrained Policy Optimization (SACPO). One key idea behind SACPO, supported by theory, is that the optimal policy incorporating reward and safety can be directly obtained from a reward-aligned policy. Building on this key idea, SACPO aligns LLMs step-wise with each metric while leveraging simple yet powerful alignment algorithms such as direct preference optimization (DPO). SACPO offers several advantages, including simplicity, stability, computational efficiency, and flexibility of algorithms and datasets. Under mild assumptions, our theoretical analysis provides the upper bounds on optimality and safety constraint violation. Our experimental results show that SACPO can fine-tune Alpaca-7B better than the state-of-the-art method in terms of both helpfulness and harmlessness.", "keywords": "AI Alignment;Large Language Models;AI Safety;Safe RL", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/a63c66d6cfa6630196f1d9b138119faf28a450a7.zip", "author": "Akifumi Wachi;Thien Q. Tran;Rei Sato;Takumi Tanabe;Youhei Akimoto", "authorids": "~Akifumi_Wachi2;~Thien_Q._Tran1;~Rei_Sato1;~Takumi_Tanabe1;~Youhei_Akimoto1", "gender": "M;M;M;;", "homepage": "https://akifumi-wachi-4.github.io/website/;;https://madoibito80.github.io/cv.html;;", "dblp": "218/7526;;281/6688;290/1677.html;71/1035", "google_scholar": "https://scholar.google.co.jp/citations?user=iC2b9GUAAAAJ;YcBy-3wAAAAJ;pfMdvGMAAAAJ;https://scholar.google.co.jp/citations?user=HcsuW7EAAAAJ;m7OXdsUAAAAJ", "orcid": ";;0009-0001-2932-0206;;0000-0003-2760-8123", "linkedin": "akifumi-wachi-008654123/?originalSubdomain=jp;;rei-sato-806b40282;;", "or_profile": "~Akifumi_Wachi2;~Thien_Q._Tran1;~Rei_Sato1;~Takumi_Tanabe1;~Youhei_Akimoto1", "aff": "LY Corporation;LINE Yahoo Corp;LY Corporation;LY Corporation;University of Tsukuba", "aff_domain": "lycorp.co.jp;lycorp.co.jp;lycorp.co.jp;lycorp.co.jp;tsukuba.ac.jp", "position": "Chief Research Scientist;Researcher;Researcher;Researcher;Associate Professor", "bibtex": "@inproceedings{\nwachi2024stepwise,\ntitle={Stepwise Alignment for Constrained Language Model Policy Optimization},\nauthor={Akifumi Wachi and Thien Q. Tran and Rei Sato and Takumi Tanabe and Youhei Akimoto},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=VrVx83BkQX}\n}", "github": "", "reviewers": "4XGF;z9np;VoMk;w7hx;KBZp", "pdf_size": 716301, "rating": "6;6;6;7;7", "confidence": "4;4;4;4;4", "soundness": "2;3;3;4;3", "novelty": "3;3;3;3;3", "presentation": "3;3;3;4;3", "wc_summary": "95;40;86;67;111", "wc_strengths": "142;28;62;369;34", "wc_weaknesses": "179;295;58;310;9", "wc_questions": "114;2;119;123;44", "wc_limitations": "1;2;32;30;16", "wc_review": "531;367;357;899;214", "wc_reply_reviewers": "148;0;15;1487;13", "wc_reply_authors": "635;0;31;1187;29", "reply_reviewers": "3;0;1;3;1", "reply_authors": "4;1;2;4;2", "rating_avg": [ 6.4, 0.48989794855663565 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 79.8, 24.45730974575904 ], "wc_strengths_avg": [ 127.0, 127.64325285732889 ], "wc_weaknesses_avg": [ 170.2, 121.46670325648917 ], "wc_questions_avg": [ 80.4, 48.795901467233904 ], "wc_limitations_avg": [ 16.2, 13.212115651930995 ], "wc_review_avg": [ 473.6, 235.2068026227133 ], "wc_reply_reviewers_avg": [ 332.6, 579.7159994342057 ], "wc_reply_authors_avg": [ 376.4, 470.2363660968811 ], "reply_reviewers_avg": [ 1.6, 1.2000000000000002 ], "reply_authors_avg": [ 2.6, 1.2000000000000002 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11388899423720488412&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 4, "email": "lycorp.co.jp;lycorp.co.jp;lycorp.co.jp;lycorp.co.jp;tsukuba.ac.jp", "author_num": 5, "aff_unique_index": "0;1;0;0;2", "aff_unique_norm": "LY Corporation;Yahoo Corporation;University of Tsukuba", "aff_unique_dep": ";;", "aff_unique_url": ";https://www.yahoo.com;https://www.tsukuba.ac.jp", "aff_unique_abbr": ";Yahoo;UT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1;2", "aff_country_unique": ";United States;Japan" }, { "title": "Online Budgeted Matching with General Bids", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94869", "id": "Vtxy8wFpTj", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Vtxy8wFpTj", "openreview": "https://openreview.net/forum?id=Vtxy8wFpTj", "poster": "/media/PosterPDFs/NeurIPS%202024/94869.png?t=1733545330.69811", "project": "", "author_site": "Jianyi Yang, Pengfei Li, Adam Wierman, Shaolei Ren", "tldr": "", "abstract": "Online Budgeted Matching (OBM) is a classic problem with important applications in online advertising, online service matching, revenue management, and beyond. Traditional online algorithms typically assume a small bid setting, where the maximum bid-to-budget ratio ($\\kappa$) is infinitesimally small. While recent algorithms have tried to address scenarios with non-small or general bids, they often rely on the Fractional Last Matching (FLM) assumption, which allows for accepting partial bids when the remaining budget is insufficient. This assumption, however, does not hold for many applications with indivisible bids. In this paper, we remove the FLM assumption and tackle the open problem of OBM with general bids. We first establish an upper bound of $1-\\kappa$ on the competitive ratio for any deterministic online algorithm. We then propose a novel meta algorithm, called MetaAd, which reduces to different algorithms with first known provable competitive ratios parameterized by the maximum bid-to-budget ratio $\\kappa\\in [0,1]$. As a by-product, we extend MetaAd to the FLM setting and get provable competitive algorithms. Finally, we apply our competitive analysis to the design learning- augmented algorithms.", "keywords": "Online Budgeted Matching;General Bids;Competitive Ratio", "primary_area": "online_learning", "supplementary_material": "", "author": "Jianyi Yang;Pengfei Li;Adam Wierman;Shaolei Ren", "authorids": "~Jianyi_Yang1;~Pengfei_Li2;~Adam_Wierman1;~Shaolei_Ren1", "gender": "M;M;M;", "homepage": "https://jyang-ai.github.io;https://www.cs.ucr.edu/~pli081/;https://adamwierman.com/;", "dblp": "124/1315;;56/4447;", "google_scholar": "n7UUdJQAAAAJ;irA8gqoAAAAJ;4OvOdSgAAAAJ;", "orcid": ";0000-0003-3257-9929;0000-0002-5923-0199;", "linkedin": "jianyi-yang-b7a9181a6/;;adam-wierman-a529474/;", "or_profile": "~Jianyi_Yang1;~Pengfei_Li2;~Adam_Wierman1;~Shaolei_Ren1", "aff": "University of California, Riverside;University of California, Riverside;California Institute of Technology;", "aff_domain": "ucr.edu;ucr.edu;caltech.edu;", "position": "Researcher;PhD student;Professor;", "bibtex": "@inproceedings{\nyang2024online,\ntitle={Online Budgeted Matching with General Bids},\nauthor={Jianyi Yang and Pengfei Li and Adam Wierman and Shaolei Ren},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Vtxy8wFpTj}\n}", "github": "", "reviewers": "CTTb;JnBx;3oQ3;vF5Z", "pdf_size": 711769, "rating": "4;6;7;7", "confidence": "4;3;5;3", "soundness": "3;4;4;4", "novelty": "2;1;3;3", "presentation": "2;3;3;3", "wc_summary": "70;232;194;88", "wc_strengths": "59;38;343;26", "wc_weaknesses": "233;117;394;31", "wc_questions": "2;98;282;1", "wc_limitations": "2;1;69;1", "wc_review": "366;486;1282;147", "wc_reply_reviewers": "0;32;29;4", "wc_reply_authors": "45;41;58;42", "reply_reviewers": "0;1;1;1", "reply_authors": "2;2;3;2", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 146.0, 68.62943974709395 ], "wc_strengths_avg": [ 116.5, 131.30213250362692 ], "wc_weaknesses_avg": [ 193.75, 136.03193558866977 ], "wc_questions_avg": [ 95.75, 114.52155910569851 ], "wc_limitations_avg": [ 18.25, 29.303370113350443 ], "wc_review_avg": [ 570.25, 428.52793082831835 ], "wc_reply_reviewers_avg": [ 16.25, 14.359230480774379 ], "wc_reply_authors_avg": [ 46.5, 6.800735254367722 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:B3Cc5YPcvLsJ:scholar.google.com/&scioq=Online+Budgeted+Matching+with+General+Bids&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "ucr.edu;ucr.edu;caltech.edu;", "author_num": 4, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of California, Riverside;California Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucr.edu;https://www.caltech.edu", "aff_unique_abbr": "UCR;Caltech", "aff_campus_unique_index": "0;0;1", "aff_campus_unique": "Riverside;Pasadena", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Pipeline Parallelism with Controllable Memory", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94868", "id": "Vvcnqs8091", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Vvcnqs8091", "openreview": "https://openreview.net/forum?id=Vvcnqs8091", "poster": "/media/PosterPDFs/NeurIPS%202024/94868.png?t=1732526978.4449902", "project": "", "author_site": "Penghui Qi, Xinyi Wan, Nyamdavaa Amar, Min Lin", "tldr": "", "abstract": "Pipeline parallelism has been widely explored, but most existing schedules lack a systematic methodology. In this paper, we propose a framework to decompose pipeline schedules as repeating a building block, and show that the lifespan of the building block decides the peak activation memory of the pipeline schedule. Guided by the observations, we find that almost all existing pipeline schedules, to the best of our knowledge, are memory inefficient. To address this, we introduce a family of memory efficient building blocks with controllable activation memory, which can reduce the peak activation memory to 1/2 of 1F1B without sacrificing efficiency, and even to 1/3 with comparable throughput. We can also achieve almost zero pipeline bubbles while maintaining the same activation memory as 1F1B. Our evaluations demonstrate that in pure pipeline parallelism settings, our methods outperform 1F1B by from 7\\% to 55\\% in terms of throughput. When employing a grid search over hybrid parallelism hyperparameters in practical scenarios, our methods demonstrate a 16\\% throughput improvement over the 1F1B baseline for large language models. The implementation is open-sourced at https://github.com/sail-sg/zero-bubble-pipeline-parallelism.", "keywords": "LLM Training;Pipeline Parallelism;Balanced Memory;Zero Bubble;Building Block;Lifespan", "primary_area": "infrastructure", "supplementary_material": "", "author": "Penghui Qi;Xinyi Wan;Nyamdavaa Amar;Min Lin", "authorids": "~Penghui_Qi1;~Xinyi_Wan1;~Nyamdavaa_Amar1;~Min_Lin1", "gender": "M;M;M;M", "homepage": ";;;https://linmin.me", "dblp": "236/4626;;;", "google_scholar": "CLRsGEMAAAAJ;;;BGONmkIAAAAJ", "orcid": ";;;", "linkedin": ";xinyi-wan-6b975931/;nvmdava;min-lin-08a3a422/", "or_profile": "~Penghui_Qi1;~Xinyi_Wan1;~Nyamdavaa_Amar1;~Min_Lin1", "aff": "sea ai lab;Sea AI Lab;National University of Singapore;Sea AI Lab", "aff_domain": "sail.sea.com;sea.com;u.nus.edu;sea.com", "position": "Researcher;Researcher;Undergrad student;Principal Researcher", "bibtex": "@inproceedings{\nqi2024pipeline,\ntitle={Pipeline Parallelism with Controllable Memory},\nauthor={Penghui Qi and Xinyi Wan and Nyamdavaa Amar and Min Lin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Vvcnqs8091}\n}", "github": "", "reviewers": "vShK;b8L7;w4r8", "pdf_size": 1163084, "rating": "5;7;7", "confidence": "3;4;3", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "2;3;2", "wc_summary": "100;87;92", "wc_strengths": "76;81;51", "wc_weaknesses": "356;28;57", "wc_questions": "191;31;176", "wc_limitations": "1;4;6", "wc_review": "724;231;382", "wc_reply_reviewers": "13;0;15", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;0;1", "reply_authors": "1;1;1", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 93.0, 5.354126134736337 ], "wc_strengths_avg": [ 69.33333333333333, 13.12334645668635 ], "wc_weaknesses_avg": [ 147.0, 148.25878276401255 ], "wc_questions_avg": [ 132.66666666666666, 72.14953607304454 ], "wc_limitations_avg": [ 3.6666666666666665, 2.0548046676563256 ], "wc_review_avg": [ 445.6666666666667, 206.23988190669837 ], "wc_reply_reviewers_avg": [ 9.333333333333334, 6.649979114420002 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.49999999999999983, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17729825458586667417&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "sail.sea.com;sea.com;u.nus.edu;sea.com", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Sea AI Lab;National University of Singapore", "aff_unique_dep": "AI Lab;", "aff_unique_url": ";https://www.nus.edu.sg", "aff_unique_abbr": ";NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1", "aff_country_unique": ";Singapore" }, { "title": "Satformer: Accurate and Robust Traffic Data Estimation for Satellite Networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94867", "id": "Vw1V9AgPXW", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Vw1V9AgPXW", "openreview": "https://openreview.net/forum?id=Vw1V9AgPXW", "poster": "/media/PosterPDFs/NeurIPS%202024/94867.png?t=1731321897.1185782", "project": "", "author_site": "Liang Qin, Xiyuan Liu, Wenting Wei, Liang Chengbin, Huaxi Gu", "tldr": "", "abstract": "The operations and maintenance of satellite networks heavily depend on traffic measurements. Due to the large-scale and highly dynamic nature of satellite networks, global measurement encounters significant challenges in terms of complexity and overhead. Estimating global network traffic data from partial traffic measurements is a promising solution. However, the majority of current estimation methods concentrate on low-rank linear decomposition, which is unable to accurately estimate. The reason lies in its inability to capture the intricate nonlinear spatio-temporal relationship found in large-scale, highly dynamic traffic data. This paper proposes Satformer, an accurate and robust method for estimating traffic data in satellite networks. In Satformer, we innovatively incorporate an adaptive sparse spatio-temporal attention mechanism. In the mechanism, more attention is paid to specific local regions of the input tensor to improve the model's sensitivity on details and patterns. This method enhances its capability to capture nonlinear spatio-temporal relationships. Experiments on small, medium, and large-scale satellite networks datasets demonstrate that Satformer outperforms mathematical and neural baseline methods notably. It provides substantial improvements in reducing errors and maintaining robustness, especially for larger networks. The approach shows promise for deployment in actual systems.", "keywords": "Satellite Networks; traffic measurement; Tensor Completion; Neural Network", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "/attachment/2ad1b2c09c5fd7da1f339f092a07f23a5d1bfee9.zip", "author": "Liang Qin;Xiyuan Liu;Wenting Wei;Liang Chengbin;Huaxi Gu", "authorids": "~Liang_Qin2;~Xiyuan_Liu1;~Wenting_Wei1;~Liang_Chengbin1;~Huaxi_Gu1", "gender": "M;M;F;M;M", "homepage": ";https://l666xy.github.io/;https://faculty.xidian.edu.cn/WEIWENTING/zh_CN/index/411623/list/index.htm;https://blog.csdn.net/m0_63028174;https://web.xidian.edu.cn/hxgu/", "dblp": ";;184/0126;;", "google_scholar": "jSCcsUkAAAAJ;;;;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Liang_Qin2;~Xiyuan_Liu1;~Wenting_Wei1;~Liang_Chengbin1;~Huaxi_Gu1", "aff": "Xidian University;Xi'an University of Electronic Science and Technology;;Xi'an University of Electronic Science and Technology;Xi'an University of Electronic Science and Technology", "aff_domain": "xidian.edu.cn;xidian.edu.cn;;stu.xidian.edu.cn;xidian.edu", "position": "PhD student;Undergrad student;;Undergrad student;Full Professor", "bibtex": "@inproceedings{\nLiang Qin2024satformer,\ntitle={Satformer: Accurate and Robust Traffic Data Estimation for Satellite Networks},\nauthor={Liang Qin and Xiyuan Liu and Wenting Wei and Liang Chengbin and Huaxi Gu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Vw1V9AgPXW}\n}", "github": "", "reviewers": "6cUm;LZHh;78MM;RAyE", "pdf_size": 11086559, "rating": "3;5;7;7", "confidence": "4;3;5;3", "soundness": "2;2;3;4", "novelty": "1;2;3;3", "presentation": "3;3;3;3", "wc_summary": "45;20;135;33", "wc_strengths": "53;26;188;47", "wc_weaknesses": "88;125;100;14", "wc_questions": "2;3;31;111", "wc_limitations": "8;3;19;3", "wc_review": "196;177;473;208", "wc_reply_reviewers": "105;23;7;0", "wc_reply_authors": "2361;1904;2199;1688", "reply_reviewers": "1;1;1;0", "reply_authors": "7;5;6;4", "rating_avg": [ 5.5, 1.6583123951777 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 58.25, 45.18503623988809 ], "wc_strengths_avg": [ 78.5, 64.0097648800556 ], "wc_weaknesses_avg": [ 81.75, 41.33022501753408 ], "wc_questions_avg": [ 36.75, 44.420575187631236 ], "wc_limitations_avg": [ 8.25, 6.53356717268599 ], "wc_review_avg": [ 263.5, 121.4588407650921 ], "wc_reply_reviewers_avg": [ 33.75, 41.97246120970273 ], "wc_reply_authors_avg": [ 2038.0, 260.14707378711756 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 5.5, 1.118033988749895 ], "replies_avg": [ 32, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0909090909090909, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11299695570948221352&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "xidian.edu.cn;xidian.edu.cn;;stu.xidian.edu.cn;xidian.edu", "author_num": 5, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Xidian University;Xi'an University of Electronic Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "http://www.xidian.edu.cn/;http://www.xidian.edu.cn/", "aff_unique_abbr": "Xidian;Xidian University", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Xi'an", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Kernel-Based Function Approximation for Average Reward Reinforcement Learning: An Optimist No-Regret Algorithm", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94866", "id": "VwUTz2pOnD", "proceeding": "", "pdf": "https://openreview.net/pdf?id=VwUTz2pOnD", "openreview": "https://openreview.net/forum?id=VwUTz2pOnD", "poster": "", "project": "", "author_site": "Sattar Vakili, Julia Olkhovskaya", "tldr": "", "abstract": "Reinforcement Learning (RL) utilizing kernel ridge regression to predict the expected value function represents a powerful method with great representational capacity. This setting is a highly versatile framework amenable to analytical results. We consider kernel-based function approximation for RL in the infinite horizon average reward setting, also referred to as the undiscounted setting. We propose an *optimistic* algorithm, similar to acquisition function based algorithms in the special case of bandits. We establish novel *no-regret* performance guarantees for our algorithm, under kernel-based modelling assumptions. Additionally, we derive a novel confidence interval for the kernel-based prediction of the expected value function, applicable across various RL problems.", "keywords": "Reinforcement learning;infinite horizon average reward setting;no-regret algorithm;kernel-based model", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Sattar Vakili;Julia Olkhovskaya", "authorids": "~Sattar_Vakili1;~Julia_Olkhovskaya1", "gender": ";F", "homepage": "https://sattar-vakili.github.io/;https://sites.google.com/view/julia-olkhovskaya/home", "dblp": "140/5473;", "google_scholar": "N9xs8w0AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";", "linkedin": ";", "or_profile": "~Sattar_Vakili1;~Julia_Olkhovskaya1", "aff": "MediaTek Research;Vrije Universiteit Amsterdam", "aff_domain": "mtkresearch.com;vu.nl", "position": "Principal AI Research Manager;Postdoc", "bibtex": "@inproceedings{\nvakili2024kernelbased,\ntitle={Kernel-Based Function Approximation for Average Reward Reinforcement Learning: An Optimist No-Regret Algorithm},\nauthor={Sattar Vakili and Julia Olkhovskaya},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=VwUTz2pOnD}\n}", "github": "", "reviewers": "pN55;unzJ;KBUd", "pdf_size": 368068, "rating": "5;6;7", "confidence": "2;2;2", "soundness": "2;2;3", "novelty": "3;3;3", "presentation": "2;3;3", "wc_summary": "210;71;43", "wc_strengths": "173;48;36", "wc_weaknesses": "121;123;47", "wc_questions": "187;21;16", "wc_limitations": "9;6;1", "wc_review": "700;269;143", "wc_reply_reviewers": "17;273;12", "wc_reply_authors": "233;435;0", "reply_reviewers": "1;3;1", "reply_authors": "2;4;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 2.0, 0.0 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 108.0, 73.02510983673127 ], "wc_strengths_avg": [ 85.66666666666667, 61.94800687314771 ], "wc_weaknesses_avg": [ 97.0, 35.364765892999586 ], "wc_questions_avg": [ 74.66666666666667, 79.45788542086655 ], "wc_limitations_avg": [ 5.333333333333333, 3.2998316455372216 ], "wc_review_avg": [ 370.6666666666667, 238.48736281451522 ], "wc_reply_reviewers_avg": [ 100.66666666666667, 121.87516381755371 ], "wc_reply_authors_avg": [ 222.66666666666666, 177.738259496623 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.9428090415820634 ], "reply_authors_avg": [ 2.3333333333333335, 1.247219128924647 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:JmadFRv_whUJ:scholar.google.com/&scioq=Kernel-Based+Function+Approximation+for+Average+Reward+Reinforcement+Learning:+An+Optimist+No-Regret+Algorithm&hl=en&as_sdt=0,5", "gs_version_total": 5, "email": "mtkresearch.com;vu.nl", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "MediaTek Inc.;Vrije Universiteit Amsterdam", "aff_unique_dep": "Research;", "aff_unique_url": "https://www.mediatek.com/;https://www.vu.nl", "aff_unique_abbr": "MediaTek;VU Amsterdam", "aff_campus_unique_index": "0", "aff_campus_unique": "Taiwan;", "aff_country_unique_index": "0;1", "aff_country_unique": "China;Netherlands" }, { "title": "Learning Generalized Linear Programming Value Functions", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94865", "id": "Vxijl0IOId", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Vxijl0IOId", "openreview": "https://openreview.net/forum?id=Vxijl0IOId", "poster": "", "project": "", "author_site": "Tu Anh-Nguyen, Joey Huchette, Christian Tjandraatmadja", "tldr": "", "abstract": "We develop a theoretically-grounded learning method for the Generalized Linear Programming Value Function (GVF), which models the optimal value of a linear programming (LP) problem as its objective and constraint bounds vary. This function plays a fundamental role in algorithmic techniques for large-scale optimization, particularly in decomposition for two-stage mixed-integer linear programs (MILPs). This paper establishes a structural characterization of the GVF that enables it to be modeled as a particular neural network architecture, which we then use to learn the GVF in a way that benefits from three notable properties. First, our method produces a true under-approximation of the value function with respect to the constraint bounds. Second, the model is input-convex in the constraint bounds, which not only matches the structure of the GVF but also enables the trained model to be efficiently optimized over using LP. Finally, our learning method is unsupervised, meaning that training data generation does not require computing LP optimal values, which can be prohibitively expensive at large scales. We numerically show that our method can approximate the GVF well, even when compared to supervised methods that collect training data by solving an LP for each data point. Furthermore, as an application of our framework, we develop a fast heuristic method for large-scale two-stage MILPs with continuous second-stage variables, via a compact reformulation that can be solved faster than the full model linear relaxation at large scales and orders of magnitude faster than the original model.", "keywords": "Linear Programming; Mixed-Integer Programming; Neural Networks; Constrained Optimization", "primary_area": "optimization", "supplementary_material": "", "author": "Tu Anh-Nguyen;Joey Huchette;Christian Tjandraatmadja", "authorids": "~Tu_Anh-Nguyen1;~Joey_Huchette1;~Christian_Tjandraatmadja1", "gender": "M;;", "homepage": "https://tu-na.org;https://www.joehuchette.com/;", "dblp": ";;95/5357", "google_scholar": "66-VqFkAAAAJ;;", "orcid": "0000-0001-7430-3213;;", "linkedin": "https://www.linkedin.com/me?trk=p_mwlite_feed-secondary_nav;;", "or_profile": "~Tu_Anh-Nguyen1;~Joey_Huchette1;~Christian_Tjandraatmadja1", "aff": "Rice University;Rice University;Google", "aff_domain": "rice.edu;rice.edu;google.com", "position": "PhD student;Assistant Professor;Researcher", "bibtex": "@inproceedings{\nanh-nguyen2024learning,\ntitle={Learning Generalized Linear Programming Value Functions},\nauthor={Tu Anh-Nguyen and Joey Huchette and Christian Tjandraatmadja},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Vxijl0IOId}\n}", "github": "", "reviewers": "CMLg;bRXK;yPT2;qfDV", "pdf_size": 873483, "rating": "5;6;6;7", "confidence": "3;3;3;5", "soundness": "4;3;3;4", "novelty": "4;3;3;3", "presentation": "4;3;3;4", "wc_summary": "97;92;128;245", "wc_strengths": "128;39;72;55", "wc_weaknesses": "461;44;50;114", "wc_questions": "152;55;33;58", "wc_limitations": "25;22;30;31", "wc_review": "863;252;313;503", "wc_reply_reviewers": "200;39;0;67", "wc_reply_authors": "512;24;0;35", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;1;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 140.5, 61.88901356460612 ], "wc_strengths_avg": [ 73.5, 33.55964838909967 ], "wc_weaknesses_avg": [ 167.25, 171.80130238155937 ], "wc_questions_avg": [ 74.5, 45.77390086064329 ], "wc_limitations_avg": [ 27.0, 3.6742346141747673 ], "wc_review_avg": [ 482.75, 238.2544595595222 ], "wc_reply_reviewers_avg": [ 76.5, 75.1681448487323 ], "wc_reply_authors_avg": [ 142.75, 213.56190554497306 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:yYFwBWz2MngJ:scholar.google.com/&scioq=Learning+Generalized+Linear+Programming+Value+Functions&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": "rice.edu;rice.edu;google.com", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Rice University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.rice.edu;https://www.google.com", "aff_unique_abbr": "Rice;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Deep Graph Neural Networks via Posteriori-Sampling-based Node-Adaptative Residual Module", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94864", "id": "VywZsAGhp0", "proceeding": "", "pdf": "https://openreview.net/pdf?id=VywZsAGhp0", "openreview": "https://openreview.net/forum?id=VywZsAGhp0", "poster": "/media/PosterPDFs/NeurIPS%202024/94864.png?t=1731418613.314643", "project": "", "author_site": "Jingbo Zhou, Yixuan Du, Ruqiong Zhang, Jun Xia, Zhizhi Yu, Zelin Zang, Di Jin, Carl Yang, Rui Zhang, Stan Z. Li", "tldr": "", "abstract": "Graph Neural Networks (GNNs), a type of neural network that can learn from graph-structured data through neighborhood information aggregation, have shown superior performance in various downstream tasks. However, as the number of layers increases, node representations becomes indistinguishable, which is known as over-smoothing. To address this issue, many residual methods have emerged. In this paper, we focus on the over-smoothing issue and related residual methods. Firstly, we revisit over-smoothing from the perspective of overlapping neighborhood subgraphs, and based on this, we explain how residual methods can alleviate over-smoothing by integrating multiple orders neighborhood subgraphs to avoid the indistinguishability of the single high-order neighborhood subgraphs. Additionally, we reveal the drawbacks of previous residual methods, such as the lack of node adaptability and severe loss of high-order neighborhood subgraph information, and propose a \\textbf{Posterior-Sampling-based, Node-Adaptive Residual module (PSNR)}. We theoretically demonstrate that PSNR can alleviate the drawbacks of previous residual methods. Furthermore, extensive experiments verify the superiority of the PSNR module in fully observed node classification and missing feature scenarios. Our code\nis available at \\href{https://github.com/jingbo02/PSNR-GNN}{https://github.com/jingbo02/PSNR-GNN}.", "keywords": "Graph Neural Networks;Deep Models", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Jingbo Zhou;Yixuan Du;Ruqiong Zhang;Jun Xia;Zhizhi Yu;Zelin Zang;Di Jin;Carl Yang;Rui Zhang;Stan Z. Li", "authorids": "~Jingbo_Zhou2;~Yixuan_Du1;~Ruqiong_Zhang1;~Jun_Xia1;~Zhizhi_Yu1;~Zelin_Zang2;~Di_Jin4;~Carl_Yang1;~Rui_Zhang28;~Stan_Z._Li2", "gender": "M;M;M;M;F;M;M;M;M;M", "homepage": "https://jingbo02.github.io/;https://github.com/Duyx5520;https://github.com/dwsjoan;http://junxia97.github.io/;;;http://cic.tju.edu.cn/faculty/jindi/index.htm;https://cs.emory.edu/~jyang71/;http://ccst.jlu.edu.cn/info/1312/17090.htm;https://en.westlake.edu.cn/academics/School_of_Engineering/About/Our_People/Faculty/201912/t20191206_2497.shtml", "dblp": ";;;;(2025-02-06-2282618);226/7615;67/1861-1.html;305/0254;;l/StanZLi", "google_scholar": ";;;aPKKpSYAAAAJ;;foERjnQAAAAJ;Q8MRRecAAAAJ;mOINlwcAAAAJ;;https://scholar.google.com/citations?hl=zh-CN", "orcid": "0009-0005-4221-2911;;;;;;;0000-0001-9145-4531;;", "linkedin": ";;;;;;;;;stan-z-li-%E6%9D%8E%E5%AD%90%E9%9D%92-55753224/", "or_profile": "~Jingbo_Zhou2;~Yixuan_Du1;~Ruqiong_Zhang1;~Jun_Xia1;~Zhizhi_Yu1;~Zelin_Zang2;~Di_Jin4;~Carl_Yang1;~Rui_Zhang28;~Stan_Z._Li1", "aff": "Jilin University;Jilin University;Jilin University;Westlake University, China;Tianjin University;National University of Singapore;Tianjin University;Emory University;Jilin University;Westlake University", "aff_domain": "jlu.edu.cn;jlu.edu.cn;jlu.edu.cn;westlake.edu.cn;tju.edu.cn;nus.edu.sg;tju.edu.cn;emory.edu;jlu.edu.cn;westlake.edu.cn", "position": "Undergrad student;Undergrad student;Undergrad student;PhD student;Assistant Professor;Intern;Full Professor;Assistant Professor;Associate Professor;Chair Professor", "bibtex": "@inproceedings{\nzhou2024deep,\ntitle={Deep Graph Neural Networks via Posteriori-Sampling-based Node-Adaptative Residual Module},\nauthor={Jingbo Zhou and Yixuan Du and Ruqiong Zhang and Jun Xia and Zhizhi Yu and Zelin Zang and Di Jin and Carl Yang and Rui Zhang and Stan Z. Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=VywZsAGhp0}\n}", "github": "", "reviewers": "yetc;61ZY;Uv7R;ZBJB", "pdf_size": 500531, "rating": "4;6;7;7", "confidence": "3;5;4;4", "soundness": "3;4;3;4", "novelty": "3;3;3;4", "presentation": "3;3;3;3", "wc_summary": "60;74;89;36", "wc_strengths": "25;35;64;54", "wc_weaknesses": "234;48;94;47", "wc_questions": "5;56;2;33", "wc_limitations": "8;23;2;10", "wc_review": "332;236;251;180", "wc_reply_reviewers": "0;12;14;22", "wc_reply_authors": "344;146;145;145", "reply_reviewers": "0;1;1;1", "reply_authors": "4;3;3;3", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 64.75, 19.51121472384536 ], "wc_strengths_avg": [ 44.5, 15.337861650177967 ], "wc_weaknesses_avg": [ 105.75, 76.44074502515004 ], "wc_questions_avg": [ 24.0, 22.079402165819616 ], "wc_limitations_avg": [ 10.75, 7.660776723022281 ], "wc_review_avg": [ 249.75, 54.36163628883884 ], "wc_reply_reviewers_avg": [ 12.0, 7.874007874011811 ], "wc_reply_authors_avg": [ 195.0, 86.02615881230545 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.25, 0.4330127018922193 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:QC_oUaa8gVgJ:scholar.google.com/&scioq=Deep+Graph+Neural+Networks+via+Posteriori-Sampling-based+Node-Adaptative+Residual+Module&hl=en&as_sdt=0,5", "gs_version_total": 5, "email": "jlu.edu.cn;jlu.edu.cn;jlu.edu.cn;westlake.edu.cn;tju.edu.cn;nus.edu.sg;tju.edu.cn;emory.edu;jlu.edu.cn;westlake.edu.cn", "author_num": 10, "aff_unique_index": "0;0;0;1;2;3;2;4;0;1", "aff_unique_norm": "Jilin University;Westlake University;Tianjin University;National University of Singapore;Emory University", "aff_unique_dep": ";;;;", "aff_unique_url": "http://www.jlu.edu.cn;https://www.westlake.edu.cn;http://www.tju.edu.cn;https://www.nus.edu.sg;https://www.emory.edu", "aff_unique_abbr": "JLU;WU;TJU;NUS;Emory", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;1;0;2;0;0", "aff_country_unique": "China;Singapore;United States" }, { "title": "WAGLE: Strategic Weight Attribution for Effective and Modular Unlearning in Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94863", "id": "VzOgnDJMgh", "proceeding": "", "pdf": "https://openreview.net/pdf?id=VzOgnDJMgh", "openreview": "https://openreview.net/forum?id=VzOgnDJMgh", "poster": "/media/PosterPDFs/NeurIPS%202024/94863.png?t=1733254395.6499171", "project": "", "author_site": "Jinghan Jia, Jiancheng Liu, Yihua Zhang, Parikshit Ram, Nathalie Baracaldo, Sijia Liu", "tldr": "", "abstract": "The need for effective unlearning mechanisms in large language models (LLMs) is increasingly urgent, driven by the necessity to adhere to data regulations and foster ethical generative AI practices. LLM unlearning is designed to reduce the impact of undesirable data influences and associated model capabilities without diminishing the utility of the model if unrelated to the information being forgotten. Despite growing interest, much of the existing research has focused on varied unlearning method designs to boost effectiveness and efficiency. However, the inherent relationship between model weights and LLM unlearning has not been extensively examined. In this paper, we systematically explore how model weights interact with unlearning processes in LLMs and we design the weight attribution-guided LLM unlearning method, WAGLE, which unveils the interconnections between 'influence' of weights and 'influence' of data to forget and retain in LLM generation. By strategically guiding the LLM unlearning across different types of unlearning methods and tasks, WAGLE can erase the undesired content, while maintaining the performance of the original tasks. We refer to the weight attribution-guided LLM unlearning method as WAGLE, which unveils the interconnections between 'influence' of weights and 'influence' of data to forget and retain in LLM generation. Our extensive experiments show that WAGLE boosts unlearning performance across a range of LLM unlearning methods such as gradient difference and (negative) preference optimization, applications such as fictitious unlearning (TOFU benchmark), malicious use prevention (WMDP benchmark), and copyrighted information removal, and models including Zephyr-7b-beta and Llama2-7b. To the best of our knowledge, our work offers the first principled method for attributing and pinpointing the influential weights in enhancing LLM unlearning. It stands in contrast to previous methods that lack weight attribution and simpler weight attribution techniques.", "keywords": "Machine unlearning; Weight attribution; LLMs;", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/16df3d5e4cb0522a469d1538aec1796dceaf86fb.zip", "author": "Jinghan Jia;Jiancheng Liu;Yihua Zhang;Parikshit Ram;Nathalie Baracaldo;Sijia Liu", "authorids": "~Jinghan_Jia1;~Jiancheng_Liu2;~Yihua_Zhang1;~Parikshit_Ram1;~Nathalie_Baracaldo1;~Sijia_Liu1", "gender": "M;M;M;M;;M", "homepage": "https://jinghanjia.netlify.app/;https://ljcc0930.github.io/;https://yihua-zhang.com;https://rithram.github.io/;https://researcher.watson.ibm.com/researcher/view.php?person=us-baracald;https://lsjxjtu.github.io/", "dblp": "286/5392;74/3002;;99/8314;87/10087;128/6972-1", "google_scholar": "bqP_zxYAAAAJ;ReWNzl4AAAAJ;https://scholar.google.com/citations?hl=zh-CN;JaXmmnkAAAAJ;3ACndBYAAAAJ;C7dO_UgAAAAJ", "orcid": ";;;0000-0002-9456-029X;;", "linkedin": "jinghan-jia-5194451ba/;;zhangyihua/;parikshit-ram-4861325/;;", "or_profile": "~Jinghan_Jia1;~Jiancheng_Liu2;~Yihua_Zhang1;~Parikshit_Ram1;~Nathalie_Baracaldo1;~Sijia_Liu1", "aff": "Michigan State University;Michigan State University;Michigan State University;International Business Machines;IBM, International Business Machines;Michigan State University", "aff_domain": "msu.edu;msu.edu;msu.edu;ibm.com;us.ibm.com;msu.edu", "position": "PhD student;MS student;PhD student;Principal Researcher;Researcher;Assistant Professor", "bibtex": "@inproceedings{\njia2024wagle,\ntitle={{WAGLE}: Strategic Weight Attribution for Effective and Modular Unlearning in Large Language Models},\nauthor={Jinghan Jia and Jiancheng Liu and Yihua Zhang and Parikshit Ram and Nathalie Baracaldo and Sijia Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=VzOgnDJMgh}\n}", "github": "", "reviewers": "MRmy;QvH1;BmWx;eHyY", "pdf_size": 561074, "rating": "4;7;7;8", "confidence": "3;4;4;4", "soundness": "3;4;3;4", "novelty": "2;3;3;4", "presentation": "2;3;3;3", "wc_summary": "104;79;53;84", "wc_strengths": "66;45;113;158", "wc_weaknesses": "196;51;156;167", "wc_questions": "172;137;26;71", "wc_limitations": "1;141;36;27", "wc_review": "539;453;384;507", "wc_reply_reviewers": "20;39;0;34", "wc_reply_authors": "86;54;0;80", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;1;2", "rating_avg": [ 6.5, 1.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 80.0, 18.179658962697843 ], "wc_strengths_avg": [ 95.5, 43.68352092036538 ], "wc_weaknesses_avg": [ 142.5, 54.811039034121585 ], "wc_questions_avg": [ 101.5, 56.7031745143074 ], "wc_limitations_avg": [ 51.25, 53.38714732967102 ], "wc_review_avg": [ 470.75, 58.76382815984677 ], "wc_reply_reviewers_avg": [ 23.25, 15.122417134836613 ], "wc_reply_authors_avg": [ 55.0, 33.95585369269929 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.9622504486493763, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10894612990983566192&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "msu.edu;msu.edu;msu.edu;ibm.com;us.ibm.com;msu.edu", "author_num": 6, "aff_unique_index": "0;0;0;1;2;0", "aff_unique_norm": "Michigan State University;International Business Machines Corporation;International Business Machines", "aff_unique_dep": ";;", "aff_unique_url": "https://www.msu.edu;https://www.ibm.com;https://www.ibm.com", "aff_unique_abbr": "MSU;IBM;IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Depth Anywhere: Enhancing 360 Monocular Depth Estimation via Perspective Distillation and Unlabeled Data Augmentation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94862", "id": "VzoyBrqJ4O", "proceeding": "", "pdf": "https://openreview.net/pdf?id=VzoyBrqJ4O", "openreview": "https://openreview.net/forum?id=VzoyBrqJ4O", "poster": "/media/PosterPDFs/NeurIPS%202024/94862.png?t=1731596287.8441901", "project": "", "author_site": "Ning-Hsu (Albert) Wang, Yu-Lun Liu", "tldr": "", "abstract": "Accurately estimating depth in 360-degree imagery is crucial for virtual reality, autonomous navigation, and immersive media applications. Existing depth estimation methods designed for perspective-view imagery fail when applied to 360-degree images due to different camera projections and distortions. We propose a new depth estimation framework that uses unlabeled 360-degree data effectively. Our approach uses state-of-the-art perspective depth estimation models as teacher models to generate pseudo labels through a six-face cube projection technique, enabling efficient labeling of depth in 360-degree images. This method leverages the increasing availability of large datasets. It includes two main stages: offline mask generation for invalid regions and an online semi-supervised joint training regime. We tested our approach on benchmark datasets such as Matterport3D and Stanford2D3D, showing significant improvements in depth estimation accuracy, particularly in zero-shot scenarios. Our proposed training pipeline can enhance any 360 monocular depth estimator and demonstrate effective knowledge transfer across different camera projections and data types.", "keywords": "Monocular depth estimation;Knowledge distillation;Unlabeled data augmentation", "primary_area": "machine_vision", "supplementary_material": "", "author": "Ning-Hsu Wang;Yu-Lun Liu", "authorids": "~Ning-Hsu_Wang1;~Yu-Lun_Liu2", "gender": "Not Specified;", "homepage": "https://albert100121.github.io/;http://www.cmlab.csie.ntu.edu.tw/~yulunliu/", "dblp": "396/8716;142/0282-1", "google_scholar": "8kYY700AAAAJ;gliihzoAAAAJ", "orcid": ";", "linkedin": "ning-hsu-albert-wang;yu-lun-liu-37321396/", "or_profile": "~Ning-Hsu_Wang1;~Yu_Lun_Liu1", "aff": "Taiwan AI Labs;National Yang Ming Chiao Tung University", "aff_domain": "ailabs.tw;nycu.edu.tw", "position": "Engineer;Assistant Professor", "bibtex": "@inproceedings{\nwang2024depth,\ntitle={Depth Anywhere: Enhancing 360 Monocular Depth Estimation via Perspective Distillation and Unlabeled Data Augmentation},\nauthor={Ning-Hsu Wang and Yu-Lun Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=VzoyBrqJ4O}\n}", "github": "", "reviewers": "Kp4U;ewNL;AagT;F7bZ;3NfA", "pdf_size": 7592480, "rating": "5;5;5;5;6", "confidence": "3;5;5;5;4", "soundness": "3;3;3;2;3", "novelty": "2;3;2;2;3", "presentation": "3;3;3;2;3", "wc_summary": "48;144;57;88;61", "wc_strengths": "25;83;56;60;127", "wc_weaknesses": "81;104;280;54;263", "wc_questions": "4;399;146;62;46", "wc_limitations": "1;44;113;7;94", "wc_review": "159;774;652;271;591", "wc_reply_reviewers": "0;65;31;24;28", "wc_reply_authors": "90;283;77;21;75", "reply_reviewers": "0;2;1;1;1", "reply_authors": "3;3;3;2;3", "rating_avg": [ 5.2, 0.39999999999999997 ], "confidence_avg": [ 4.4, 0.7999999999999999 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 79.6, 34.85168575549826 ], "wc_strengths_avg": [ 70.2, 33.878606819053225 ], "wc_weaknesses_avg": [ 156.4, 95.45386320102502 ], "wc_questions_avg": [ 131.4, 141.53670901925057 ], "wc_limitations_avg": [ 51.8, 45.11053092128267 ], "wc_review_avg": [ 489.4, 234.35921146820752 ], "wc_reply_reviewers_avg": [ 29.6, 20.8096131631513 ], "wc_reply_authors_avg": [ 109.2, 90.0675302203852 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.8, 0.39999999999999997 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.25000000000000006, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3547986828181492874&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "ailabs.tw;nycu.edu.tw", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Taiwan AI Labs;National Yang Ming Chiao Tung University", "aff_unique_dep": ";", "aff_unique_url": "https://www.taiwanailabs.tw;https://www.nycu.edu.tw", "aff_unique_abbr": "TAI Labs;NYCU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Taiwan", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Evaluating Numerical Reasoning in Text-to-Image Models", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97661", "id": "W0FEprcxva", "proceeding": "", "pdf": "https://openreview.net/pdf?id=W0FEprcxva", "openreview": "https://openreview.net/forum?id=W0FEprcxva", "poster": "/media/PosterPDFs/NeurIPS%202024/97661.png?t=1733355248.4876487", "project": "", "author_site": "Ivana Kajic, Olivia Wiles, Isabela Albuquerque, Matthias Bauer, Su Wang, Jordi Pont-Tuset, Aida Nematzadeh", "tldr": "", "abstract": "Text-to-image generative models are capable of producing high-quality images that often faithfully depict concepts described using natural language. In this work, we comprehensively evaluate a range of text-to-image models on numerical reasoning tasks of varying difficulty, and show that even the most advanced models have only rudimentary numerical skills. Specifically, their ability to correctly generate an exact number of objects in an image is limited to small numbers, it is highly dependent on the context the number term appears in, and it deteriorates quickly with each successive number. We also demonstrate that models have poor understanding of linguistic quantifiers (such as \u201cfew\u201d or \u201cas many as\u201d), the concept of zero, and struggle with more advanced concepts such as fractional representations. We bundle prompts, generated images and human annotations into GeckoNum, a novel benchmark for evaluation of numerical reasoning.", "keywords": "generative models;text-to-image;evaluation;counting;numerical reasoning;cognitive science", "primary_area": "", "supplementary_material": "/attachment/de958cac62d8daff8aeafdcb417cf5c846af3ffe.pdf", "author": "Ivana Kajic;Olivia Wiles;Isabela Albuquerque;Matthias Bauer;Su Wang;Jordi Pont-Tuset;Aida Nematzadeh", "authorids": "~Ivana_Kajic1;~Olivia_Wiles1;~Isabela_Albuquerque1;~Matthias_Bauer1;~Su_Wang4;~Jordi_Pont-Tuset2;~Aida_Nematzadeh1", "gender": ";;F;;;M;", "homepage": ";;;;https://jacobsuwang.github.io/;https://jponttuset.cat;http://www.aidanematzadeh.me/", "dblp": "135/3514;194/3191;210/2719;;37/5976-1;14/6661;153/9556", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;https://scholar.google.co.uk/citations?user=XQzHJSgAAAAJ;;;bJZV7r4AAAAJ;VCBBx24AAAAJ;FWJZYMYAAAAJ", "orcid": "0000-0001-8992-2391;;;;;;", "linkedin": ";;;;;;", "or_profile": "~Ivana_Kajic1;~Olivia_Wiles1;~Isabela_Albuquerque1;~Matthias_Bauer1;~Su_Wang4;~Jordi_Pont-Tuset2;~Aida_Nematzadeh1", "aff": "Gooogle DeepMind;Google;Google DeepMind;;Google;Google;", "aff_domain": "deepmind.com;google.com;deepmind.com;;google.com;google.com;", "position": "Researcher;Researcher;Researcher;;Researcher;Research Scientist;", "bibtex": "@inproceedings{\nkajic2024evaluating,\ntitle={Evaluating Numerical Reasoning in Text-to-Image Models},\nauthor={Ivana Kajic and Olivia Wiles and Isabela Albuquerque and Matthias Bauer and Su Wang and Jordi Pont-Tuset and Aida Nematzadeh},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=W0FEprcxva}\n}", "github": "", "reviewers": "ra8a;qpir;m6se;XZvx", "pdf_size": 10998845, "rating": "5;6;7;7", "confidence": "4;3;4;5", "wc_summary_and_contributions": "68;78;88;94", "wc_strengths": "68;46;67;31", "wc_improvement": "115;12;3;139", "wc_limitations": "9;20;1;1", "wc_correctness": "8;7;1;1", "wc_clarity": "7;5;1;1", "wc_relation_to_prior_work": "7;12;1;1", "wc_documentation": "16;13;1;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "299;194;164;270", "wc_reply_reviewers": "87;0;8;58", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "2;2;1;3", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 82.0, 9.899494936611665 ], "wc_strengths_avg": [ 53.0, 15.443445211480501 ], "wc_improvement_avg": [ 67.25, 60.43333103511671 ], "wc_limitations_avg": [ 7.75, 7.790218225441442 ], "wc_correctness_avg": [ 4.25, 3.2691742076555053 ], "wc_clarity_avg": [ 3.5, 2.598076211353316 ], "wc_relation_to_prior_work_avg": [ 5.25, 4.602988159880492 ], "wc_documentation_avg": [ 7.75, 6.832825184358224 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 231.75, 54.77396735676538 ], "wc_reply_reviewers_avg": [ 38.25, 35.86345633092271 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.42640143271122083, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=77637690708134428&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "deepmind.com;google.com;deepmind.com;;google.com;google.com;", "author_num": 7, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google DeepMind", "aff_unique_url": "https://deepmind.com", "aff_unique_abbr": "DeepMind", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;0;1;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "Multimodal Task Vectors Enable Many-Shot Multimodal In-Context Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94861", "id": "W0okTgsPvM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=W0okTgsPvM", "openreview": "https://openreview.net/forum?id=W0okTgsPvM", "poster": "/media/PosterPDFs/NeurIPS%202024/94861.png?t=1733701675.643791", "project": "", "author_site": "Brandon Huang, Chancharik Mitra, Leonid Karlinsky, Assaf Arbelle, Trevor Darrell, Roei Herzig", "tldr": "", "abstract": "The recent success of interleaved Large Multimodal Models (LMMs) in few-shot learning suggests that in-context learning (ICL) with many examples can be promising for learning new tasks. However, this many-shot multimodal ICL setting has one crucial problem: it is fundamentally limited by the model's context length set at pretraining. The problem is especially prominent in the multimodal domain, which processes both text and images, requiring additional tokens. This motivates the need for a multimodal method to compress many shots into fewer tokens without finetuning. In this work, we enable LMMs to perform multimodal, many-shot in-context learning by leveraging Multimodal Task Vectors (MTV)---compact implicit representations of in-context examples compressed in the model's attention heads. Specifically, we first demonstrate the existence of such MTV in LMMs and then leverage these extracted MTV to enable many-shot in-context learning for various vision-and-language tasks. Our experiments suggest that MTV can scale in performance with the number of compressed shots and generalize to similar out-of-domain tasks without additional context length for inference. Code: https://github.com/Brandon3964/MultiModal-Task-Vector", "keywords": "Large Multimodal Models;Vision-and-Language;In-Context Learning", "primary_area": "machine_vision", "supplementary_material": "/attachment/d4a27fa469fcee56349a5682888caeac31a09482.zip", "author": "Brandon Huang;Chancharik Mitra;Leonid Karlinsky;Assaf Arbelle;Trevor Darrell;Roei Herzig", "authorids": "~Brandon_Huang1;~Chancharik_Mitra1;~Leonid_Karlinsky3;~Assaf_Arbelle1;~Trevor_Darrell2;~Roei_Herzig2", "gender": "M;M;M;M;M;M", "homepage": "https://www.linkedin.com/in/brandon-huang-aba504275/;;;https://www.linkedin.com/in/assaf-arbelle-74065876/?originalSubdomain=il;https://roeiherz.github.io/;https://people.eecs.berkeley.edu/~trevor/", "dblp": ";;05/4463;168/5494;215/5165;d/TrevorDarrell", "google_scholar": ";https://scholar.google.com/citations?hl=en;https://scholar.google.co.il/citations?user=WbO7tjYAAAAJ;https://scholar.google.co.uk/citations?user=uU_V_PsAAAAJ;https://scholar.google.co.il/citations?user=6Q-289IAAAAJ;https://scholar.google.com.tw/citations?user=bh-uRFMAAAAJ", "orcid": ";0009-0008-9826-7534;;0000-0001-6559-2316;;", "linkedin": ";chancharik-m-151756178/;;assaf-arbelle-74065876/?originalSubdomain=il;roei-herzig-7534615a/;", "or_profile": "~Brandon_Huang1;~Chancharik_Mitra1;~Leonid_Karlinsky3;~Assaf_Arbelle1;~Roei_Herzig2;~trevor_darrell1", "aff": "University of California, Berkeley;University of California, Berkeley;International Business Machines;International Business Machines;University of California, Berkeley;Electrical Engineering & Computer Science Department", "aff_domain": "berkeley.edu;berkeley.edu;ibm.com;ibm.com;berkeley.edu;eecs.berkeley.edu", "position": "Undergrad student;Undergrad student;Principal Researcher;Researcher;Postdoc;Professor", "bibtex": "@inproceedings{\nhuang2024multimodal,\ntitle={Multimodal Task Vectors Enable Many-Shot Multimodal In-Context Learning},\nauthor={Brandon Huang and Chancharik Mitra and Leonid Karlinsky and Assaf Arbelle and Trevor Darrell and Roei Herzig},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=W0okTgsPvM}\n}", "github": "", "reviewers": "p88K;3gQC;dwXj;Xn5t", "pdf_size": 10065480, "rating": "1;5;5;7", "confidence": "5;4;3;4", "soundness": "1;2;3;3", "novelty": "1;3;3;3", "presentation": "1;2;2;3", "wc_summary": "39;159;106;128", "wc_strengths": "1;56;42;55", "wc_weaknesses": "1;407;272;59", "wc_questions": "1;18;5;46", "wc_limitations": "1;3;4;3", "wc_review": "43;643;429;291", "wc_reply_reviewers": "0;37;18;24", "wc_reply_authors": "0;0;30;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;2;1", "rating_avg": [ 4.5, 2.179449471770337 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 108.0, 44.06245567373657 ], "wc_strengths_avg": [ 38.5, 22.34390297150433 ], "wc_weaknesses_avg": [ 184.75, 163.23659975630466 ], "wc_questions_avg": [ 17.5, 17.613914953808536 ], "wc_limitations_avg": [ 2.75, 1.0897247358851685 ], "wc_review_avg": [ 351.5, 217.83652127226048 ], "wc_reply_reviewers_avg": [ 19.75, 13.311179511974137 ], "wc_reply_authors_avg": [ 7.5, 12.99038105676658 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.6488856845230502, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3329931975832293980&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "berkeley.edu;berkeley.edu;ibm.com;ibm.com;berkeley.edu;eecs.berkeley.edu", "author_num": 6, "aff_unique_index": "0;0;1;1;0;2", "aff_unique_norm": "University of California, Berkeley;International Business Machines Corporation;Electrical Engineering & Computer Science Department", "aff_unique_dep": ";;Electrical Engineering & Computer Science", "aff_unique_url": "https://www.berkeley.edu;https://www.ibm.com;", "aff_unique_abbr": "UC Berkeley;IBM;", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States;" }, { "title": "Kaleidoscope: Learnable Masks for Heterogeneous Multi-agent Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94860", "id": "W0wq9njGHi", "proceeding": "", "pdf": "https://openreview.net/pdf?id=W0wq9njGHi", "openreview": "https://openreview.net/forum?id=W0wq9njGHi", "poster": "/media/PosterPDFs/NeurIPS%202024/94860.png?t=1730690721.875235", "project": "", "author_site": "Xinran Li, Ling Pan, Jun Zhang", "tldr": "", "abstract": "In multi-agent reinforcement learning (MARL), parameter sharing is commonly employed to enhance sample efficiency. However, the popular approach of full parameter sharing often leads to homogeneous policies among agents, potentially limiting the performance benefits that could be derived from policy diversity. To address this critical limitation, we introduce \\emph{Kaleidoscope}, a novel adaptive partial parameter sharing scheme that fosters policy heterogeneity while still maintaining high sample efficiency. Specifically, Kaleidoscope maintains one set of common parameters alongside multiple sets of distinct, learnable masks for different agents, dictating the sharing of parameters. It promotes diversity among policy networks by encouraging discrepancy among these masks, without sacrificing the efficiencies of parameter sharing. This design allows Kaleidoscope to dynamically balance high sample efficiency with a broad policy representational capacity, effectively bridging the gap between full parameter sharing and non-parameter sharing across various environments. We further extend Kaleidoscope to critic ensembles in the context of actor-critic algorithms, which could help improve value estimations. Our empirical evaluations across extensive environments, including multi-agent particle environment, multi-agent MuJoCo and StarCraft multi-agent challenge v2, demonstrate the superior performance of Kaleidoscope compared with existing parameter sharing approaches, showcasing its potential for performance enhancement in MARL. The code is publicly available at \\url{https://github.com/LXXXXR/Kaleidoscope}.", "keywords": "multi-agent reinforcement learning; parameter sharing", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Xinran Li;Ling Pan;Jun Zhang", "authorids": "~Xinran_Li3;~Ling_Pan1;~Jun_Zhang25", "gender": "F;F;", "homepage": "https://lxxxxr.github.io/;https://ling-pan.github.io/;https://eejzhang.people.ust.hk/", "dblp": ";199/9303/;z/JunZhang4", "google_scholar": "6fYlKXgAAAAJ;qZ_zlacAAAAJ;1Is687QAAAAJ", "orcid": "0000-0003-0245-9459;;0000-0002-5222-1898", "linkedin": ";;", "or_profile": "~Xinran_Li3;~Ling_Pan1;~Jun_Zhang25", "aff": "Hong Kong University of Science and Technology;Montreal Institute for Learning Algorithms (MILA);Hong Kong University of Science and Technology", "aff_domain": "hkust.edu;mila.umontreal.ca;ust.hk", "position": "PhD student;Postdoc;Associate Professor", "bibtex": "@inproceedings{\nli2024kaleidoscope,\ntitle={Kaleidoscope: Learnable Masks for Heterogeneous Multi-agent Reinforcement Learning},\nauthor={Xinran Li and Ling Pan and Jun Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=W0wq9njGHi}\n}", "github": "", "reviewers": "Y5Eh;CAY2;BN4p;WQkZ", "pdf_size": 3604776, "rating": "4;6;6;7", "confidence": "5;5;4;5", "soundness": "2;3;3;4", "novelty": "2;2;3;3", "presentation": "2;3;2;4", "wc_summary": "112;63;65;74", "wc_strengths": "14;149;71;164", "wc_weaknesses": "196;456;149;49", "wc_questions": "66;56;17;104", "wc_limitations": "1;8;55;10", "wc_review": "389;732;357;401", "wc_reply_reviewers": "74;21;12;0", "wc_reply_authors": "382;339;26;0", "reply_reviewers": "2;1;1;0", "reply_authors": "4;3;2;1", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 78.5, 19.78004044485248 ], "wc_strengths_avg": [ 99.5, 60.68978497243173 ], "wc_weaknesses_avg": [ 212.5, 150.27391656571675 ], "wc_questions_avg": [ 60.75, 30.96267914764483 ], "wc_limitations_avg": [ 18.5, 21.33658829335187 ], "wc_review_avg": [ 469.75, 152.2619042965114 ], "wc_reply_reviewers_avg": [ 26.75, 28.27874643614883 ], "wc_reply_authors_avg": [ 186.75, 174.65591172359441 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.13245323570650439, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11775371648903198431&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "hkust.edu;mila.umontreal.ca;ust.hk", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Hong Kong University of Science and Technology;Montreal Institute for Learning Algorithms", "aff_unique_dep": ";Artificial Intelligence", "aff_unique_url": "https://www.ust.hk;https://mila.quebec", "aff_unique_abbr": "HKUST;MILA", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Hong Kong SAR;Montreal", "aff_country_unique_index": "0;1;0", "aff_country_unique": "China;Canada" }, { "title": "ContextGS : Compact 3D Gaussian Splatting with Anchor Level Context Model", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94859", "id": "W2qGSMl2Uu", "proceeding": "", "pdf": "https://openreview.net/pdf?id=W2qGSMl2Uu", "openreview": "https://openreview.net/forum?id=W2qGSMl2Uu", "poster": "", "project": "", "author_site": "Yufei Wang, Zhihao Li, Lanqing Guo, Wenhan Yang, Alex Kot, Bihan Wen", "tldr": "", "abstract": "Recently, 3D Gaussian Splatting (3DGS) has become a promising framework for novel view synthesis, offering fast rendering speeds and high fidelity. However, the large number of Gaussians and their associated attributes require effective compression techniques. \nExisting methods primarily compress neural Gaussians individually and independently, i.e., coding all the neural Gaussians at the same time, with little design for their interactions and spatial dependence. Inspired by the effectiveness of the context model in image compression, we propose the first autoregressive model at the anchor level for 3DGS compression in this work. We divide anchors into different levels and the anchors that are not coded yet can be predicted based on the already coded ones in all the coarser levels, leading to more accurate modeling and higher coding efficiency. To further improve the efficiency of entropy coding, e.g., to code the coarsest level with no already coded anchors, we propose to introduce a low-dimensional quantized feature as the hyperprior for each anchor, which can be effectively compressed. Our work pioneers the context model in the anchor level for 3DGS representation, yielding an impressive size reduction of over 100 times compared to vanilla 3DGS and 15 times compared to the most recent state-of-the-art work Scaffold-GS, while achieving comparable or even higher rendering quality.", "keywords": "3D scene compression;3D Gaussian Splatting", "primary_area": "machine_vision", "supplementary_material": "", "author": "Yufei Wang;Zhihao Li;Lanqing Guo;Wenhan Yang;Alex Kot;Bihan Wen", "authorids": "~Yufei_Wang5;~Zhihao_Li14;~Lanqing_Guo1;~Wenhan_Yang6;~Alex_Kot1;~Bihan_Wen2", "gender": "M;M;F;M;;M", "homepage": "https://github.com/wyf0912/;https://lizhihao6.github.io;https://guolanqing.github.io;https://flyywh.github.io/;https://www.ntu.edu.sg/home/eackot/;https://personal.ntu.edu.sg/bihan.wen/", "dblp": ";;229/8223.html;156/2359.html;;158/9840", "google_scholar": "jLd1l_sAAAAJ;gWlYsj0AAAAJ;8rkIFHcAAAAJ;S8nAnakAAAAJ;;ypkClpwAAAAJ", "orcid": ";0000-0002-2066-8775;;;;0000-0002-6874-6453", "linkedin": ";;;;;", "or_profile": "~Yufei_Wang5;~Zhihao_Li14;~Lanqing_Guo1;~Wenhan_Yang6;~Alex_Kot1;~Bihan_Wen2", "aff": "Nanyang Technological University;Nanyang Technological University;Nanyang Technological University;Peng Cheng Laboratory;Nanyang Technological University;Nanyang Technological University", "aff_domain": "ntu.edu.sg;ntu.edu.sg;ntu.edu.sg;pcl.ac.cn;ntu.edu.sg;ntu.edu.sg", "position": "PhD student;Researcher;PhD student;Researcher;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nwang2024contextgs,\ntitle={Context{GS} : Compact 3D Gaussian Splatting with Anchor Level Context Model},\nauthor={Yufei Wang and Zhihao Li and Lanqing Guo and Wenhan Yang and Alex Kot and Bihan Wen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=W2qGSMl2Uu}\n}", "github": "", "reviewers": "fG6F;8ybR;otkA;gvyt", "pdf_size": 14054748, "rating": "4;5;5;7", "confidence": "5;3;4;4", "soundness": "2;3;3;4", "novelty": "2;3;2;4", "presentation": "3;3;1;4", "wc_summary": "86;71;80;46", "wc_strengths": "47;64;40;39", "wc_weaknesses": "109;90;128;38", "wc_questions": "169;163;79;73", "wc_limitations": "49;86;15;8", "wc_review": "460;474;342;204", "wc_reply_reviewers": "127;71;19;58", "wc_reply_authors": "1218;359;457;224", "reply_reviewers": "1;1;1;1", "reply_authors": "4;2;3;2", "rating_avg": [ 5.25, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 70.75, 15.2540978100968 ], "wc_strengths_avg": [ 47.5, 10.012492197250394 ], "wc_weaknesses_avg": [ 91.25, 33.55126674210677 ], "wc_questions_avg": [ 121.0, 45.09988913511872 ], "wc_limitations_avg": [ 39.5, 31.00403199585499 ], "wc_review_avg": [ 370.0, 108.6922260329597 ], "wc_reply_reviewers_avg": [ 68.75, 38.6935072072822 ], "wc_reply_authors_avg": [ 564.5, 386.2605985600913 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.3244428422615251, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10334719489600598703&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "ntu.edu.sg;ntu.edu.sg;ntu.edu.sg;pcl.ac.cn;ntu.edu.sg;ntu.edu.sg", "author_num": 6, "aff_unique_index": "0;0;0;1;0;0", "aff_unique_norm": "Nanyang Technological University;Pengcheng Laboratory", "aff_unique_dep": ";Peng Cheng Laboratory", "aff_unique_url": "https://www.ntu.edu.sg;http://www.pcl.ac.cn", "aff_unique_abbr": "NTU;PCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;0", "aff_country_unique": "Singapore;China" }, { "title": "Contextual Bilevel Reinforcement Learning for Incentive Alignment", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94858", "id": "W3Dx1TGW3f", "proceeding": "", "pdf": "https://openreview.net/pdf?id=W3Dx1TGW3f", "openreview": "https://openreview.net/forum?id=W3Dx1TGW3f", "poster": "/media/PosterPDFs/NeurIPS%202024/94858.png?t=1733225286.2350202", "project": "", "author_site": "Vinzenz Thoma, Barna P\u00e1sztor, Andreas Krause, Giorgia Ramponi, Yifan Hu", "tldr": "", "abstract": "The optimal policy in various real-world strategic decision-making problems depends both on the environmental configuration and exogenous events. For these settings, we introduce Contextual Bilevel Reinforcement Learning (CB-RL), a stochastic bilevel decision-making model, where the lower level consists of solving a contextual Markov Decision Process (CMDP). CB-RL can be viewed as a Stackelberg Game where the leader and a random context beyond the leader\u2019s control together decide the setup of many MDPs that potentially multiple followers best respond to. This framework extends beyond traditional bilevel optimization and finds relevance in diverse fields such as RLHF, tax design, reward shaping, contract theory and mechanism design. We propose a stochastic Hyper Policy Gradient Descent (HPGD) algorithm to solve CB-RL, and demonstrate its convergence. Notably, HPGD uses stochastic hypergradient estimates, based on observations of the followers\u2019 trajectories. Therefore, it allows followers to use any training procedure and the leader to be agnostic of the specific algorithm, which aligns with various real-world scenarios. We further consider the setting when the leader can influence the training of followers and propose an accelerated algorithm. We empirically demonstrate the performance of our algorithm for reward shaping and tax design.", "keywords": "Reinforcement Learning;Bilevel Optimization;Contextual MDPs;Environment Design;Model Design", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Vinzenz Thoma;Barna P\u00e1sztor;Andreas Krause;Giorgia Ramponi;Yifan Hu", "authorids": "~Vinzenz_Thoma1;~Barna_P\u00e1sztor1;~Andreas_Krause1;~Giorgia_Ramponi1;~Yifan_Hu2", "gender": "M;;M;F;M", "homepage": "https://vinzenzthoma.com;;https://las.inf.ethz.ch/krausea;https://gioramponi.github.io/;https://sites.google.com/view/yifan-hu", "dblp": ";273/3840;87/1831-1.html;186/4493;", "google_scholar": ";t2QJiCkAAAAJ;https://scholar.google.ch/citations?user=eDHv58AAAAAJ;xbIAH5gAAAAJ;rO2s0EEAAAAJ", "orcid": "0009-0008-9603-3152;;0000-0001-7260-9673;;", "linkedin": ";;krausea/;;", "or_profile": "~Vinzenz_Thoma1;~Barna_P\u00e1sztor1;~Andreas_Krause1;~Giorgia_Ramponi1;~Yifan_Hu2", "aff": "ETHZ - ETH Zurich;ETHZ - ETH Zurich;ETH Zurich;ETHZ - ETH Zurich;ETHZ - ETH Zurich", "aff_domain": "ethz.ch;ethz.ch;ethz.ch;ethz.ch;inf.ethz.ch", "position": "PhD student;PhD student;Full Professor;Postdoc;Postdoc", "bibtex": "@inproceedings{\nthoma2024contextual,\ntitle={Contextual Bilevel Reinforcement Learning for Incentive Alignment},\nauthor={Vinzenz Thoma and Barna P{\\'a}sztor and Andreas Krause and Giorgia Ramponi and Yifan Hu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=W3Dx1TGW3f}\n}", "github": "", "reviewers": "Gnk4;miD3;Y6ix;UZX1", "pdf_size": 8366489, "rating": "5;6;6;7", "confidence": "4;3;4;2", "soundness": "2;2;3;3", "novelty": "2;3;3;2", "presentation": "2;3;3;3", "wc_summary": "113;54;68;98", "wc_strengths": "26;39;55;53", "wc_weaknesses": "134;17;39;87", "wc_questions": "6;140;27;29", "wc_limitations": "6;1;4;25", "wc_review": "285;251;193;292", "wc_reply_reviewers": "186;16;0;28", "wc_reply_authors": "315;6;0;15", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;1;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 83.25, 23.40272420040026 ], "wc_strengths_avg": [ 43.25, 11.712706775122479 ], "wc_weaknesses_avg": [ 69.25, 45.14628999153751 ], "wc_questions_avg": [ 50.5, 52.4523593368306 ], "wc_limitations_avg": [ 9.0, 9.40744386111339 ], "wc_review_avg": [ 255.25, 39.14316670889058 ], "wc_reply_reviewers_avg": [ 57.5, 74.85151969065157 ], "wc_reply_authors_avg": [ 84.0, 133.4747167069479 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8528028654224418, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17420712670158155138&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "ethz.ch;ethz.ch;ethz.ch;ethz.ch;inf.ethz.ch", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "ETH Zurich", "aff_unique_dep": "", "aff_unique_url": "https://www.ethz.ch", "aff_unique_abbr": "ETHZ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Switzerland" }, { "title": "MILP-StuDio: MILP Instance Generation via Block Structure Decomposition", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94857", "id": "W433RI0VU4", "proceeding": "", "pdf": "https://openreview.net/pdf?id=W433RI0VU4", "openreview": "https://openreview.net/forum?id=W433RI0VU4", "poster": "/media/PosterPDFs/NeurIPS%202024/94857.png?t=1731335305.0394807", "project": "", "author_site": "Haoyang Liu, Jie Wang, Wanbo Zhang, Zijie Geng, Yufei Kuang, Xijun Li, Bin Li, Yongdong Zhang, Feng Wu", "tldr": "", "abstract": "Mixed-integer linear programming (MILP) is one of the most popular mathematical formulations with numerous applications. In practice, improving the performance of MILP solvers often requires a large amount of high-quality data, which can be challenging to collect. Researchers thus turn to generation techniques to generate additional MILP instances. However, existing approaches do not take into account specific block structures\u2014which are closely related to the problem formulations\u2014in the constraint coefficient matrices (CCMs) of MILPs. Consequently, they are prone to generate computationally trivial or infeasible instances due to the disruptions of block structures and thus problem formulations. To address this challenge, we propose a novel MILP generation framework, called Block Structure Decomposition (MILP-StuDio), to generate high-quality instances by preserving the block structures. Specifically, MILP-StuDio begins by identifying the blocks in CCMs and decomposing the instances into block units, which serve as the building blocks of MILP instances. We then design three operators to construct new instances by removing, substituting, and appending block units in the original instances, enabling us to generate instances with flexible sizes. An appealing feature of MILP-StuDio is its strong ability to preserve the feasibility and computational hardness of the generated instances. Experiments on the commonly-used benchmarks demonstrate that using instances generated by MILP-StuDio is able to significantly reduce over 10% of the solving time for learning-based solvers.", "keywords": "Mixed Integer Linear Programming;MILP Instance Generation", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Haoyang Liu;Jie Wang;Wanbo Zhang;Zijie Geng;Yufei Kuang;Xijun Li;Bin Li;Yongdong Zhang;Feng Wu", "authorids": "~Haoyang_Liu2;~Jie_Wang1;~Wanbo_Zhang1;~Zijie_Geng1;~Yufei_Kuang1;~Xijun_Li1;~Bin_Li8;~Yongdong_Zhang2;~Feng_Wu1", "gender": "M;M;M;M;M;M;M;M;M", "homepage": "https://miralab.ai/people/haoyang-liu/;http://staff.ustc.edu.cn/~jwangx;https://github.com/zwb45;https://miralab.ai/people/zijie-geng/;https://miralab.ai/people/yufei-kuang/;https://xijunlee.github.io/;http://staff.ustc.edu.cn/~binli;https://imcc.ustc.edu.cn/_upload/tpl/0d/13/3347/template3347/zhangyongdong.html;", "dblp": "53/8773-2.html;29/5259-5;;320/7568;280/1134;203/0784;89/6764-25;z/YongdongZhang;25/3972-1", "google_scholar": ";OugG4dUAAAAJ;;https://scholar.google.com.hk/citations?user=Ga66HL4AAAAJ;STN3F_oAAAAJ;QXU_QbMAAAAJ;;https://scholar.google.com.hk/citations?user=hxGs4ukAAAAJ;5bInRDEAAAAJ", "orcid": ";;;;;0000-0002-9013-1180;0000-0002-2332-3959;0000-0003-0066-3448;", "linkedin": ";;;;;;;;", "or_profile": "~Haoyang_Liu2;~Jie_Wang1;~Wanbo_Zhang1;~Zijie_Geng1;~Yufei_Kuang1;~Xijun_Li1;~Bin_Li8;~Yongdong_Zhang2;~Feng_Wu1", "aff": "University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;Huawei Technologies Ltd.;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China", "aff_domain": "ustc.edu;ustc.edu.cn;mail.ustc.edu.cn;mail.ustc.edu.cn;ustc.edu.cn;huawei.com;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn", "position": "MS student;Full Professor;Undergrad student;MS student;PhD student;Researcher;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nliu2024milpstudio,\ntitle={{MILP}-StuDio: {MILP} Instance Generation via Block Structure Decomposition},\nauthor={Haoyang Liu and Jie Wang and Wanbo Zhang and Zijie Geng and Yufei Kuang and Xijun Li and Bin Li and Yongdong Zhang and Feng Wu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=W433RI0VU4}\n}", "github": "", "reviewers": "jQXE;jbdZ;zDfV;Rqw8", "pdf_size": 9758216, "rating": "3;4;5;7", "confidence": "4;5;3;3", "soundness": "2;2;2;3", "novelty": "1;2;2;3", "presentation": "2;3;2;2", "wc_summary": "88;50;109;92", "wc_strengths": "32;66;231;33", "wc_weaknesses": "119;61;88;155", "wc_questions": "117;41;8;3", "wc_limitations": "1;2;2;2", "wc_review": "357;220;438;285", "wc_reply_reviewers": "0;21;14;33", "wc_reply_authors": "802;1268;625;89", "reply_reviewers": "0;1;1;1", "reply_authors": "5;5;4;3", "rating_avg": [ 4.75, 1.479019945774904 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 84.75, 21.556611514799815 ], "wc_strengths_avg": [ 90.5, 82.26329679753906 ], "wc_weaknesses_avg": [ 105.75, 35.06690034776384 ], "wc_questions_avg": [ 42.25, 45.55971356362988 ], "wc_limitations_avg": [ 1.75, 0.4330127018922193 ], "wc_review_avg": [ 325.0, 81.26807491260021 ], "wc_reply_reviewers_avg": [ 17.0, 11.937336386313323 ], "wc_reply_authors_avg": [ 696.0, 421.87379629457905 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 4.25, 0.82915619758885 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.6625413488689132, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9364330993403629243&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "ustc.edu;ustc.edu.cn;mail.ustc.edu.cn;mail.ustc.edu.cn;ustc.edu.cn;huawei.com;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn", "author_num": 9, "aff_unique_index": "0;0;0;0;0;1;0;0;0", "aff_unique_norm": "University of Science and Technology of China;Huawei", "aff_unique_dep": ";Huawei Technologies", "aff_unique_url": "http://www.ustc.edu.cn;https://www.huawei.com", "aff_unique_abbr": "USTC;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "MediQ: Question-Asking LLMs and a Benchmark for Reliable Interactive Clinical Reasoning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94856", "id": "W4pIBQ7bAI", "proceeding": "", "pdf": "https://openreview.net/pdf?id=W4pIBQ7bAI", "openreview": "https://openreview.net/forum?id=W4pIBQ7bAI", "poster": "/media/PosterPDFs/NeurIPS%202024/94856.png?t=1733707651.5678856", "project": "", "author_site": "Stella Li, Vidhisha Balachandran, Shangbin Feng, Jonathan Ilgen, Emma Pierson, Pang Wei Koh, Yulia Tsvetkov", "tldr": "", "abstract": "Users typically engage with LLMs interactively, yet most existing benchmarks evaluate them in a static, single-turn format, posing reliability concerns in interactive scenarios. We identify a key obstacle towards reliability: LLMs are trained to answer any question, even with incomplete context or insufficient knowledge. In this paper, we propose to change the static paradigm to an interactive one, develop systems that proactively ask questions to gather more information and respond reliably, and introduce an benchmark\u2014MEDIQ\u2014to evaluate question-asking ability in LLMs. MEDIQ simulates clinical interactions consisting of a Patient System and an adaptive Expert System; with potentially incomplete initial information, the Expert refrains from making diagnostic decisions when unconfident, and instead elicits missing details via follow-up questions. We provide a pipeline to convert single-turn medical benchmarks into an interactive format. Our results show that directly prompting state-of-the-art LLMs to ask questions degrades performance, indicating that adapting LLMs to proactive information-seeking settings is nontrivial. We experiment with abstention strategies to better estimate model confidence and decide when to ask questions, improving diagnostic accuracy by 22.3%; however, performance still lags compared to an (unrealistic in practice) upper bound with complete information upfront. Further analyses show improved interactive performance with filtering irrelevant contexts and reformatting conversations. Overall, we introduce a novel problem towards LLM reliability, an interactive MEDIQ benchmark and a novel question-asking system, and highlight directions to extend LLMs\u2019 information-seeking abilities in critical domains.", "keywords": "Clinical Reasoning;Question Asking;Information-Seeking;Adaptive Interactions;LLM Abstention", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Shuyue Stella Li;Vidhisha Balachandran;Shangbin Feng;Jonathan S. Ilgen;Emma Pierson;Pang Wei Koh;Yulia Tsvetkov", "authorids": "~Shuyue_Stella_Li1;~Vidhisha_Balachandran1;~Shangbin_Feng1;~Jonathan_S._Ilgen1;~Emma_Pierson1;~Pang_Wei_Koh1;~Yulia_Tsvetkov1", "gender": "F;F;M;M;F;M;F", "homepage": "http://stellalisy.com/;https://vidhishanair.github.io/;https://bunsenfeng.github.io/;;https://people.eecs.berkeley.edu/~emmapierson/;http://cs.stanford.edu/~pangwei;https://homes.cs.washington.edu/~yuliats/", "dblp": "312/6501;234/4867;295/9571;;159/0572;10/10453;75/8157", "google_scholar": "CRfOlOEAAAAJ;LgitgaIAAAAJ;Y3rLP9UAAAAJ;;xGORWi0AAAAJ;Nn990CkAAAAJ;SEDPkrsAAAAJ", "orcid": ";;0000-0002-4133-1987;0000-0003-4590-6570;;;0000-0002-4634-7128", "linkedin": ";;;;;;", "or_profile": "~Shuyue_Stella_Li1;~Vidhisha_Balachandran1;~Shangbin_Feng1;~Jonathan_S._Ilgen1;~Emma_Pierson1;~Pang_Wei_Koh1;~Yulia_Tsvetkov1", "aff": "Department of Computer Science, University of Washington;Carnegie Mellon University;University of Washington;University of Washington;Cornell Tech;University of Washington;Department of Computer Science, University of Washington", "aff_domain": "cs.washington.edu;cmu.edu;cs.washington.edu;uw.edu;cornell.edu;cs.washington.edu;cs.washington.edu", "position": "PhD student;PhD student;PhD student;Full Professor;Assistant Professor;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nli2024mediq,\ntitle={MediQ: Question-Asking {LLM}s and a Benchmark for Reliable Interactive Clinical Reasoning},\nauthor={Shuyue Stella Li and Vidhisha Balachandran and Shangbin Feng and Jonathan S. Ilgen and Emma Pierson and Pang Wei Koh and Yulia Tsvetkov},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=W4pIBQ7bAI}\n}", "github": "", "reviewers": "SV4E;pkFC;M6dH;5NqX", "pdf_size": 8448158, "rating": "5;6;7;7", "confidence": "3;4;4;3", "soundness": "2;3;3;3", "novelty": "1;3;3;3", "presentation": "3;3;3;3", "wc_summary": "241;70;54;57", "wc_strengths": "107;52;38;68", "wc_weaknesses": "583;147;60;129", "wc_questions": "86;40;2;71", "wc_limitations": "39;37;2;3", "wc_review": "1056;346;156;328", "wc_reply_reviewers": "28;0;12;18", "wc_reply_authors": "25;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 105.5, 78.46177413237608 ], "wc_strengths_avg": [ 66.25, 25.810608284191986 ], "wc_weaknesses_avg": [ 229.75, 206.51800768940222 ], "wc_questions_avg": [ 49.75, 32.17432982985038 ], "wc_limitations_avg": [ 20.25, 17.76759691123141 ], "wc_review_avg": [ 471.5, 345.51519503489277 ], "wc_reply_reviewers_avg": [ 14.5, 10.136567466356647 ], "wc_reply_authors_avg": [ 6.25, 10.825317547305483 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=154180289736107455&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "cs.washington.edu;cmu.edu;cs.washington.edu;uw.edu;cornell.edu;cs.washington.edu;cs.washington.edu", "author_num": 7, "aff_unique_index": "0;1;0;0;2;0;0", "aff_unique_norm": "University of Washington;Carnegie Mellon University;Cornell University", "aff_unique_dep": "Department of Computer Science;;", "aff_unique_url": "https://www.washington.edu;https://www.cmu.edu;https://tech.cornell.edu", "aff_unique_abbr": "UW;CMU;Cornell Tech", "aff_campus_unique_index": "0;2;0", "aff_campus_unique": "Seattle;;New York City", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Relational Verification Leaps Forward with RABBit", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94855", "id": "W5U3XB1C11", "proceeding": "", "pdf": "https://openreview.net/pdf?id=W5U3XB1C11", "openreview": "https://openreview.net/forum?id=W5U3XB1C11", "poster": "", "project": "", "author_site": "Tarun Suresh, Debangshu Banerjee, Gagandeep Singh", "tldr": "", "abstract": "We propose RABBit, a Branch-and-Bound-based verifier for verifying relational properties defined over Deep Neural Networks, such as robustness against universal adversarial perturbations (UAP). Existing SOTA complete $L_{\\infty}$-robustness verifiers can not reason about dependencies between multiple executions and, as a result, are imprecise for relational verification. In contrast, existing SOTA relational verifiers only apply a single bounding step and do not utilize any branching strategies to refine the obtained bounds, thus producing imprecise results. We develop the first scalable Branch-and-Bound-based relational verifier, RABBit, which efficiently combines branching over multiple executions with cross-executional bound refinement to utilize relational constraints, gaining substantial precision over SOTA baselines on a wide range of datasets and networks. Our code is at https://github.com/uiuc-focal-lab/RABBit.", "keywords": "Neural Network Verification;Relational Verification;Robustness;UAP verification;Optimization", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/ce412cf85cf7f464cc6187f97723231c71615196.zip", "author": "Tarun Suresh;Debangshu Banerjee;Gagandeep Singh", "authorids": "~Tarun_Suresh1;~Debangshu_Banerjee2;~Gagandeep_Singh1", "gender": "M;M;M", "homepage": "https://tarsur909.github.io/;https://debangshu-banerjee.github.io/;https://ggndpsngh.github.io/", "dblp": "348/7104;268/6756;64/3747-1", "google_scholar": "Yxx6B5YAAAAJ;G5dhKqAAAAAJ;https://scholar.google.ch/citations?user=m4b2ruEAAAAJ", "orcid": "0000-0002-1426-7633;0009-0001-0163-9717;0000-0002-9299-2961", "linkedin": "tarun-suresh-802231157/;debangshu-banerjee/;gagandeep-singh-1bb01b49/", "or_profile": "~Tarun_Suresh1;~Debangshu_Banerjee2;~Gagandeep_Singh1", "aff": "Department of Computer Science;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign", "aff_domain": "cs.illinois.edu;uiuc.edu;illinois.edu", "position": "Undergrad student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nsuresh2024relational,\ntitle={Relational Verification Leaps Forward with {RABB}it},\nauthor={Tarun Suresh and Debangshu Banerjee and Gagandeep Singh},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=W5U3XB1C11}\n}", "github": "", "reviewers": "aDbJ;dFSH;uEoH;eacX", "pdf_size": 2088828, "rating": "3;6;7;7", "confidence": "4;4;4;5", "soundness": "2;3;3;3", "novelty": "3;3;3;3", "presentation": "1;1;3;1", "wc_summary": "29;61;75;177", "wc_strengths": "92;45;111;18", "wc_weaknesses": "374;277;644;284", "wc_questions": "722;90;78;48", "wc_limitations": "8;5;52;9", "wc_review": "1225;478;960;536", "wc_reply_reviewers": "516;10;310;9", "wc_reply_authors": "1705;17;376;22", "reply_reviewers": "2;1;1;1", "reply_authors": "5;2;3;2", "rating_avg": [ 5.75, 1.6393596310755 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 1.5, 0.8660254037844386 ], "wc_summary_avg": [ 85.5, 55.39629951540085 ], "wc_strengths_avg": [ 66.5, 36.89512162874653 ], "wc_weaknesses_avg": [ 394.75, 148.9016034164844 ], "wc_questions_avg": [ 234.5, 281.8736419036019 ], "wc_limitations_avg": [ 18.5, 19.397164741270824 ], "wc_review_avg": [ 799.75, 308.0603634030188 ], "wc_reply_reviewers_avg": [ 211.25, 214.49402672335657 ], "wc_reply_authors_avg": [ 530.0, 693.8252661873881 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.0, 1.224744871391589 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.44022545316281186, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14865543725401908119&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "cs.illinois.edu;uiuc.edu;illinois.edu", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Unknown Institution;University of Illinois Urbana-Champaign", "aff_unique_dep": "Department of Computer Science;", "aff_unique_url": ";https://illinois.edu", "aff_unique_abbr": ";UIUC", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "1;1", "aff_country_unique": ";United States" }, { "title": "Universal Neural Functionals", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94854", "id": "W89fKKP2AO", "proceeding": "", "pdf": "https://openreview.net/pdf?id=W89fKKP2AO", "openreview": "https://openreview.net/forum?id=W89fKKP2AO", "poster": "", "project": "", "author_site": "Allan Zhou, Chelsea Finn, James Harrison", "tldr": "", "abstract": "A challenging problem in many modern machine learning tasks is to process weight-space features, i.e., to transform or extract information from the weights and gradients of a neural network. Recent works have developed promising weight-space models that are equivariant to the permutation symmetries of simple feedforward networks. However, they are not applicable to general architectures, since the permutation symmetries of a weight space can be complicated by recurrence or residual connections. This work proposes an algorithm that automatically constructs permutation equivariant models, which we refer to as universal neural functionals (UNFs), for any weight space. Among other applications, we demonstrate how UNFs can be substituted into existing learned optimizer designs, and find promising improvements over prior methods when optimizing small image classifiers and language models. Our results suggest that learned optimizers can benefit from considering the (symmetry) structure of the weight space they optimize.", "keywords": "architecture;permutation;weight-space;equivariance;learned optimization", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/00a1e7449f0732d45463c8bddc3ecb531df95fda.zip", "author": "Allan Zhou;Chelsea Finn;James Harrison", "authorids": "~Allan_Zhou1;~Chelsea_Finn1;~James_Harrison1", "gender": ";F;", "homepage": "http://bland.website;https://ai.stanford.edu/~cbfinn/;", "dblp": "195/6907;131/1783;", "google_scholar": ";vfPE6hgAAAAJ;-tEiRFcAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Allan_Zhou1;~Chelsea_Finn1;~James_Harrison1", "aff": "Stanford University;Google;Google", "aff_domain": "stanford.edu;google.com;google.com", "position": "PhD student;Research Scientist;Researcher", "bibtex": "@inproceedings{\nzhou2024universal,\ntitle={Universal Neural Functionals},\nauthor={Allan Zhou and Chelsea Finn and James Harrison},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=W89fKKP2AO}\n}", "github": "", "reviewers": "7uT6;k71j;J2cE;fP4N;eHaW", "pdf_size": 833066, "rating": "5;5;5;7;7", "confidence": "3;4;2;3;2", "soundness": "3;3;3;4;3", "novelty": "2;2;2;3;4", "presentation": "3;3;2;3;4", "wc_summary": "61;99;46;105;60", "wc_strengths": "75;126;15;128;36", "wc_weaknesses": "521;238;51;140;32", "wc_questions": "14;190;10;336;11", "wc_limitations": "4;14;7;32;1", "wc_review": "675;667;129;741;140", "wc_reply_reviewers": "13;55;9;152;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.8, 0.9797958971132712 ], "confidence_avg": [ 2.8, 0.7483314773547882 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.8 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 74.2, 23.38717597316957 ], "wc_strengths_avg": [ 76.0, 45.88245852174881 ], "wc_weaknesses_avg": [ 196.4, 178.0141567404121 ], "wc_questions_avg": [ 112.2, 131.50574131953329 ], "wc_limitations_avg": [ 11.6, 11.074294559925702 ], "wc_review_avg": [ 470.4, 275.4832844293824 ], "wc_reply_reviewers_avg": [ 45.8, 56.37517184009287 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.3273268353539886, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17567405355261224665&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "stanford.edu;google.com;google.com", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Stanford University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.stanford.edu;https://www.google.com", "aff_unique_abbr": "Stanford;Google", "aff_campus_unique_index": "0;1;1", "aff_campus_unique": "Stanford;Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Towards General Loop Invariant Generation: A Benchmark of Programs with Memory Manipulation", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97660", "id": "W8OZdhowxo", "proceeding": "", "pdf": "https://openreview.net/pdf?id=W8OZdhowxo", "openreview": "https://openreview.net/forum?id=W8OZdhowxo", "poster": "", "project": "", "author_site": "Chang Liu, Xiwei Wu, Yuan Feng, Qinxiang Cao, Junchi Yan", "tldr": "", "abstract": "Program verification is vital for ensuring software reliability, especially in the context of increasingly complex systems. Loop invariants, remaining true before and after each iteration of loops, are crucial for this verification process. Traditional provers and machine learning based methods for generating loop invariants often require expert intervention or extensive labeled data, and typically only handle numerical property verification. These methods struggle with programs involving complex data structures and memory manipulations, limiting their applicability and automation capabilities. This paper introduces a new benchmark named LIG-MM, specifically for programs with complex data structures and memory manipulations. We collect 312 programs from various sources, including daily programs from college homework, the international competition (SV-COMP), benchmarks from previous papers (SLING), and programs from real-world software systems (Linux Kernel, GlibC, LiteOS, and Zephyr). Based on LIG-MM, our findings indicate that previous methods, including GPT-4, fail to automate verification for these programs. Consequently, we propose a novel LLM-SE framework that coordinates LLM with symbolic execution, fine-tuned using self-supervised learning, to generate loop invariants. Experimental results on LIG-MM demonstrate that our LLM-SE outperforms state-of-the-art methods, offering a new direction toward automated program verification in real-world scenarios.", "keywords": "Program Verification;Loop Invariant Generation;Large Language Models.", "primary_area": "", "supplementary_material": "/attachment/4bc491d9278ad1b1c0218732123ffb6c005c87ce.pdf", "author": "Chang Liu;Xiwei Wu;Yuan Feng;Qinxiang Cao;Junchi Yan", "authorids": "~Chang_Liu7;~Xiwei_Wu1;~Yuan_Feng11;~Qinxiang_Cao1;~Junchi_Yan2", "gender": "M;M;F;M;M", "homepage": "https://only-changer.github.io/;;;https://jhc.sjtu.edu.cn/people/members/faculty/qinxiang-cao.html;http://thinklab.sjtu.edu.cn/", "dblp": "52/5716;https://dblp.org/rec/journals/corr/abs-2311-10483;14/6701-1;;60/7949.html", "google_scholar": "BTu8eaQAAAAJ;;Vf84LL0AAAAJ;;ga230VoAAAAJ", "orcid": ";;;;0000-0001-9639-7679", "linkedin": ";;;;", "or_profile": "~Chang_Liu7;~Xiwei_Wu1;~Yuan_Feng11;~Qinxiang_Cao1;~Junchi_Yan1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "position": "PhD student;PhD student;Undergrad student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nliu2024towards,\ntitle={Towards General Loop Invariant Generation: A Benchmark of Programs with Memory Manipulation},\nauthor={Chang Liu and Xiwei Wu and Yuan Feng and Qinxiang Cao and Junchi Yan},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=W8OZdhowxo}\n}", "github": "", "reviewers": "rGBC;EQmi;4YxC;HX1p", "pdf_size": 675259, "rating": "4;5;7;9", "confidence": "5;4;3;3", "wc_summary_and_contributions": "51;100;90;89", "wc_strengths": "60;86;67;44", "wc_improvement": "64;149;121;41", "wc_limitations": "7;10;6;6", "wc_correctness": "14;17;13;6", "wc_clarity": "6;5;8;4", "wc_relation_to_prior_work": "22;10;10;7", "wc_documentation": "33;13;22;6", "wc_additional_feedback": "1;1;1;1", "wc_review": "258;391;338;204", "wc_reply_reviewers": "134;0;24;0", "wc_reply_authors": "400;75;16;0", "reply_reviewers": "1;0;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 6.25, 1.920286436967152 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "wc_summary_and_contributions_avg": [ 82.5, 18.688231591030757 ], "wc_strengths_avg": [ 64.25, 15.07274029498286 ], "wc_improvement_avg": [ 93.75, 43.19360485071835 ], "wc_limitations_avg": [ 7.25, 1.6393596310755 ], "wc_correctness_avg": [ 12.5, 4.031128874149275 ], "wc_clarity_avg": [ 5.75, 1.479019945774904 ], "wc_relation_to_prior_work_avg": [ 12.25, 5.7608593109014565 ], "wc_documentation_avg": [ 18.5, 10.111874208078342 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 297.75, 71.91096926060725 ], "wc_reply_reviewers_avg": [ 39.5, 55.43239125276845 ], "wc_reply_authors_avg": [ 122.75, 162.48903809180482 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.9028289727756884, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15400150615497944410&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "MambaTree: Tree Topology is All You Need in State Space Model", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94853", "id": "W8rFsaKr4m", "proceeding": "", "pdf": "https://openreview.net/pdf?id=W8rFsaKr4m", "openreview": "https://openreview.net/forum?id=W8rFsaKr4m", "poster": "", "project": "", "author_site": "Yicheng Xiao, Lin Song, shaoli huang, Jiangshan Wang, Siyu Song, Yixiao Ge, Xiu Li, Ying Shan", "tldr": "", "abstract": "The state space models, employing recursively propagated features, demonstrate strong representation capabilities comparable to Transformer models and superior efficiency.\nHowever, constrained by the inherent geometric constraints of sequences, it still falls short in modeling long-range dependencies.\nTo address this issue, we propose the MambaTree network, which first dynamically generates a tree topology based on spatial relationships and input features.\nThen, feature propagation is performed based on this graph, thereby breaking the original sequence constraints to achieve stronger representation capabilities.\nAdditionally, we introduce a linear complexity dynamic programming algorithm to enhance long-range interactions without increasing computational cost.\nMambaTree is a versatile multimodal framework that can be applied to both visual and textual tasks.\nExtensive experiments demonstrate that our method significantly outperforms existing structured state space models on image classification, object detection and segmentation.\nBesides, by fine-tuning large language models, our approach achieves consistent improvements in multiple textual tasks at minor training cost.", "keywords": "State Space Model;Vision Recognition;Language Understanding", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/6e40433747a167e805c5b0a45c13991a8732ac20.zip", "author": "Yicheng Xiao;Lin Song;Shaoli Huang;Jiangshan Wang;Siyu Song;Yixiao Ge;Xiu Li;Ying Shan", "authorids": "~Yicheng_Xiao1;~Lin_Song2;~Shaoli_Huang2;~Jiangshan_Wang2;~Siyu_Song1;~Yixiao_Ge2;~Xiu_Li1;~Ying_Shan2", "gender": "M;M;M;M;;F;F;M", "homepage": ";https://linsong.cc;;https://github.com/wangjiangshan0725;https://blog.csdn.net/LoveforCS?type=blog;https://geyixiao.com/;https://thusigsiclab.github.io/thu.github.io/introduction.html;", "dblp": "322/9380;;80/8502;124/2780.html;;228/6649;13/1206-1;68/5910", "google_scholar": "oakZP0cAAAAJ;6Ra2TgQAAAAJ;o31BPFsAAAAJ;;;TtU74NAAAAAJ;https://scholar.google.com/citations?hl=zh-CN;4oXBp9UAAAAJ", "orcid": ";;;0009-0004-9314-021X;;;0000-0003-0403-1923;0000-0001-7673-8325", "linkedin": ";\u6797-\u5b8b-9520a5183/;;;;;;YingShanProfile/", "or_profile": "~Yicheng_Xiao1;~Lin_Song2;~Shaoli_Huang2;~Jiangshan_Wang2;~Siyu_Song1;~Yixiao_Ge2;~Xiu_Li1;~Ying_Shan2", "aff": "Tsinghua University;Tencent AI Lab;Tencent AI Lab;Tsinghua University;South China Normal University;Tencent;Tsinghua University;Tencent PCG ARC Lab", "aff_domain": "mail.tsinghua.edu.cn;tencent.com;tencent.com;mail.tsinghua.edu.cn;scnu.edu.cn;tencent.com;tsinghua.edu.cn;arc.tencent.com", "position": "MS student;Researcher;Researcher;MS student;Undergrad student;Researcher;Professor;Director", "bibtex": "@inproceedings{\nxiao2024mambatree,\ntitle={MambaTree: Tree Topology is All You Need in State Space Model},\nauthor={Yicheng Xiao and Lin Song and Shaoli Huang and Jiangshan Wang and Siyu Song and Yixiao Ge and Xiu Li and Ying Shan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=W8rFsaKr4m}\n}", "github": "", "reviewers": "NQEG;g9ag;LPAH;xmLw", "pdf_size": 5431891, "rating": "5;6;7;7", "confidence": "4;4;4;4", "soundness": "3;3;4;3", "novelty": "3;3;4;3", "presentation": "3;3;4;3", "wc_summary": "59;61;74;96", "wc_strengths": "65;36;80;81", "wc_weaknesses": "77;41;275;212", "wc_questions": "46;5;1;136", "wc_limitations": "6;5;34;6", "wc_review": "253;148;464;531", "wc_reply_reviewers": "0;0;84;61", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 72.5, 14.739402972983676 ], "wc_strengths_avg": [ 65.5, 18.172781845386247 ], "wc_weaknesses_avg": [ 151.25, 95.75065273928946 ], "wc_questions_avg": [ 47.0, 54.3185051340701 ], "wc_limitations_avg": [ 12.75, 12.275483697190918 ], "wc_review_avg": [ 349.0, 154.8918977868113 ], "wc_reply_reviewers_avg": [ 36.25, 37.15087482146282 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14480695074880071283&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "mail.tsinghua.edu.cn;tencent.com;tencent.com;mail.tsinghua.edu.cn;scnu.edu.cn;tencent.com;tsinghua.edu.cn;arc.tencent.com", "author_num": 8, "aff_unique_index": "0;1;1;0;2;1;0;1", "aff_unique_norm": "Tsinghua University;Tencent;South China Normal University", "aff_unique_dep": ";Tencent AI Lab;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://ai.tencent.com;http://www.scnu.edu.cn", "aff_unique_abbr": "THU;Tencent AI Lab;SCNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Cardinality-Aware Set Prediction and Top-$k$ Classification", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94852", "id": "WAT3qu737X", "proceeding": "", "pdf": "https://openreview.net/pdf?id=WAT3qu737X", "openreview": "https://openreview.net/forum?id=WAT3qu737X", "poster": "", "project": "", "author_site": "Corinna Cortes, Anqi Mao, Christopher Mohri, Mehryar Mohri, Yutao Zhong", "tldr": "", "abstract": "We present a detailed study of cardinality-aware top-$k$ classification, a novel approach that aims to learn an accurate top-$k$ set predictor while maintaining a low cardinality. We introduce a new target loss function tailored to this setting that accounts for both the classification error and the cardinality of the set predicted. To optimize this loss function, we propose two families of surrogate losses: cost-sensitive comp-sum losses and cost-sensitive constrained losses. Minimizing these loss functions leads to new cardinality-aware algorithms that we describe in detail in the case of both top-$k$ and threshold-based classifiers. We establish $H$-consistency bounds for our cardinality-aware surrogate loss functions, thereby providing a strong theoretical foundation for our algorithms. We report the results of extensive experiments on CIFAR-10, CIFAR-100, ImageNet, and SVHN datasets demonstrating the effectiveness and benefits of our cardinality-aware algorithms.", "keywords": "top-k classification;cardinality-aware algorithms;consistency;cost-sensitive loss;learning theory", "primary_area": "learning_theory", "supplementary_material": "", "author": "Corinna Cortes;Anqi Mao;Christopher Mohri;Mehryar Mohri;Yutao Zhong", "authorids": "~Corinna_Cortes1;~Anqi_Mao1;~Christopher_Mohri1;~Mehryar_Mohri2;~Yutao_Zhong1", "gender": "F;F;M;M;", "homepage": "https://research.google/people/author121/;https://anqi-mao.github.io;;https://cs.nyu.edu/~mohri/;", "dblp": "77/5783;241/6864;;03/5448;51/3178-2", "google_scholar": "U_IVY50AAAAJ;nkjIZ-oAAAAJ;_otSGXcAAAAJ;ktwwLjsAAAAJ;", "orcid": ";;;;", "linkedin": ";;christopher-mohri-3429841a0/;mehryar-mohri-3737b981/;", "or_profile": "~Corinna_Cortes1;~Anqi_Mao1;~Christopher_Mohri1;~Mehryar_Mohri2;~Yutao_Zhong1", "aff": "Google;Courant Institute of Mathematical Sciences, NYU;Stanford University;Google Research;Google", "aff_domain": "google.com;cims.nyu.edu;stanford.edu;google.com;google.com", "position": "Researcher;PhD student;PhD student;Principal Researcher;Researcher", "bibtex": "@inproceedings{\ncortes2024cardinalityaware,\ntitle={Cardinality-Aware Set Prediction and Top-\\$k\\$ Classification},\nauthor={Corinna Cortes and Anqi Mao and Christopher Mohri and Mehryar Mohri and Yutao Zhong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=WAT3qu737X}\n}", "github": "", "reviewers": "5ki4;TDsY;a6WU", "pdf_size": 779842, "rating": "4;7;8", "confidence": "2;3;2", "soundness": "3;3;3", "novelty": "2;3;4", "presentation": "2;3;4", "wc_summary": "75;44;67", "wc_strengths": "79;41;41", "wc_weaknesses": "188;36;22", "wc_questions": "95;31;41", "wc_limitations": "4;7;4", "wc_review": "441;159;175", "wc_reply_reviewers": "0;37;26", "wc_reply_authors": "0;36;130", "reply_reviewers": "0;1;1", "reply_authors": "1;2;2", "rating_avg": [ 6.333333333333333, 1.699673171197595 ], "confidence_avg": [ 2.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 62.0, 13.140268896284683 ], "wc_strengths_avg": [ 53.666666666666664, 17.9133717900592 ], "wc_weaknesses_avg": [ 82.0, 75.17091636175967 ], "wc_questions_avg": [ 55.666666666666664, 28.110891523077353 ], "wc_limitations_avg": [ 5.0, 1.4142135623730951 ], "wc_review_avg": [ 258.3333333333333, 129.32989686156185 ], "wc_reply_reviewers_avg": [ 21.0, 15.513435037626794 ], "wc_reply_authors_avg": [ 55.333333333333336, 54.80470377217228 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.2773500981126145, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8230978449972105245&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 3, "email": "google.com;cims.nyu.edu;stanford.edu;google.com;google.com", "author_num": 5, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "Google;New York University;Stanford University", "aff_unique_dep": "Google;Courant Institute of Mathematical Sciences;", "aff_unique_url": "https://www.google.com;https://www.courant.nyu.edu;https://www.stanford.edu", "aff_unique_abbr": "Google;NYU;Stanford", "aff_campus_unique_index": "0;1;2;0;0", "aff_campus_unique": "Mountain View;New York;Stanford", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Derivative-enhanced Deep Operator Network", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94851", "id": "WAiqLGfqX6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=WAiqLGfqX6", "openreview": "https://openreview.net/forum?id=WAiqLGfqX6", "poster": "/media/PosterPDFs/NeurIPS%202024/94851.png?t=1731455297.240748", "project": "", "author_site": "Yuan Qiu, Nolan Bridges, Peng Chen", "tldr": "", "abstract": "The deep operator networks (DeepONet), a class of neural operators that learn mappings between function spaces, have recently been developed as surrogate models for parametric partial differential equations (PDEs). In this work we propose a derivative-enhanced deep operator network (DE-DeepONet), which leverages derivative information to enhance the solution prediction accuracy and provides a more accurate approximation of solution-to-parameter derivatives, especially when training data are limited. DE-DeepONet explicitly incorporates linear dimension reduction of high dimensional parameter input into DeepONet to reduce training cost and adds derivative loss in the loss function to reduce the number of required parameter-solution pairs. We further demonstrate that the use of derivative loss can be extended to enhance other neural operators, such as the Fourier neural operator (FNO). Numerical experiments validate the effectiveness of our approach.", "keywords": "Neural operators;Operator learning;Derivative learning;DeepONet;Dimensionality reduction;Adjoint method", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/9658006b62aaa63b464616c1b7e14b3bcbe1f68e.zip", "author": "Yuan Qiu;Nolan Bridges;Peng Chen", "authorids": "~Yuan_Qiu2;~Nolan_Bridges1;~Peng_Chen1", "gender": "M;;", "homepage": ";;", "dblp": "329/3924;;", "google_scholar": "https://scholar.google.com/citations?hl=en;;", "orcid": ";;", "linkedin": "yuan-qiu-a47404227/;nolanmbridges;", "or_profile": "~Yuan_Qiu2;~Nolan_Bridges1;~Peng_Chen1", "aff": "Georgia Institute of Technology;Georgia Institute of Technology;", "aff_domain": "gatech.edu;gatech.edu;", "position": "PhD student;Undergrad student;", "bibtex": "@inproceedings{\nqiu2024derivativeenhanced,\ntitle={Derivative-enhanced Deep Operator Network},\nauthor={Yuan Qiu and Nolan Bridges and Peng Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=WAiqLGfqX6}\n}", "github": "", "reviewers": "41ML;WTPz;row2;nyqb", "pdf_size": 17946875, "rating": "5;6;6;7", "confidence": "3;3;2;5", "soundness": "3;2;3;3", "novelty": "2;3;2;2", "presentation": "3;2;2;4", "wc_summary": "62;16;72;76", "wc_strengths": "68;51;32;206", "wc_weaknesses": "229;110;41;345", "wc_questions": "80;133;67;502", "wc_limitations": "22;7;4;5", "wc_review": "461;317;216;1134", "wc_reply_reviewers": "30;120;30;83", "wc_reply_authors": "39;144;8;18", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 1.0897247358851685 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 56.5, 23.93219588754864 ], "wc_strengths_avg": [ 89.25, 68.5980138196435 ], "wc_weaknesses_avg": [ 181.25, 116.01804816492992 ], "wc_questions_avg": [ 195.5, 178.67638344224454 ], "wc_limitations_avg": [ 9.5, 7.297259759663212 ], "wc_review_avg": [ 532.0, 358.30364218076267 ], "wc_reply_reviewers_avg": [ 65.75, 38.06819538670043 ], "wc_reply_authors_avg": [ 52.25, 54.14044236982184 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.6488856845230502, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1010671453216126271&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 4, "email": "gatech.edu;gatech.edu;", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Georgia Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.gatech.edu", "aff_unique_abbr": "Georgia Tech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Fine-Tuning Personalization in Federated Learning to Mitigate Adversarial Clients", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94850", "id": "WBLPlszJI5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=WBLPlszJI5", "openreview": "https://openreview.net/forum?id=WBLPlszJI5", "poster": "", "project": "", "author_site": "Youssef Allouah, Abdellah El Mrini, Rachid Guerraoui, Nirupam Gupta, Rafael Pinot", "tldr": "", "abstract": "Federated learning (FL) is an appealing paradigm that allows a group of machines\n(a.k.a. clients) to learn collectively while keeping their data local. However, due\nto the heterogeneity between the clients\u2019 data distributions, the model obtained\nthrough the use of FL algorithms may perform poorly on some client\u2019s data.\nPersonalization addresses this issue by enabling each client to have a different\nmodel tailored to their own data while simultaneously benefiting from the other\nclients\u2019 data. We consider an FL setting where some clients can be adversarial, and\nwe derive conditions under which full collaboration fails. Specifically, we analyze\nthe generalization performance of an interpolated personalized FL framework in the\npresence of adversarial clients, and we precisely characterize situations when full\ncollaboration performs strictly worse than fine-tuned personalization. Our analysis\ndetermines how much we should scale down the level of collaboration, according\nto data heterogeneity and the tolerable fraction of adversarial clients. We support\nour findings with empirical results on mean estimation and binary classification\nproblems, considering synthetic and benchmark image classification datasets", "keywords": "Personalized Federated Learning;Optimization;Generalization;Byzantine Robustness", "primary_area": "other", "supplementary_material": "/attachment/989c0dd4a5f6210aeea2ddb02980fa4fe6120d74.zip", "author": "Youssef Allouah;Abdellah El Mrini;Rachid Guerraoui;Nirupam Gupta;Rafael Pinot", "authorids": "~Youssef_Allouah1;~Abdellah_El_Mrini1;~Rachid_Guerraoui1;~Nirupam_Gupta1;~Rafael_Pinot1", "gender": "M;M;M;;", "homepage": "https://youssefallouah.com/;;https://lpdwww.epfl.ch/rachid/;;", "dblp": "312/3936;;g/RachidGuerraoui;;", "google_scholar": "kVZu88cAAAAJ;;;;", "orcid": "0000-0003-1048-7548;;;;", "linkedin": ";abdellah-el-mrini/;;;", "or_profile": "~Youssef_Allouah1;~Abdellah_El_Mrini1;~Rachid_Guerraoui1;~Nirupam_Gupta1;~Rafael_Pinot1", "aff": "Stanford University;EPFL;;;", "aff_domain": "stanford.edu;epfl.ch;;;", "position": "Visiting student researcher;PhD student;;;", "bibtex": "@inproceedings{\nallouah2024finetuning,\ntitle={Fine-Tuning Personalization in Federated Learning to Mitigate Adversarial Clients},\nauthor={Youssef Allouah and Abdellah El Mrini and Rachid Guerraoui and Nirupam Gupta and Rafael Pinot},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=WBLPlszJI5}\n}", "github": "", "reviewers": "Kq44;cJWD;Fsfa;uEJH", "pdf_size": 1122976, "rating": "5;5;5;7", "confidence": "3;3;3;3", "soundness": "3;3;2;3", "novelty": "3;2;2;3", "presentation": "2;3;3;3", "wc_summary": "75;54;56;27", "wc_strengths": "21;47;35;59", "wc_weaknesses": "85;52;57;69", "wc_questions": "3;49;57;3", "wc_limitations": "1;1;8;3", "wc_review": "185;203;213;161", "wc_reply_reviewers": "9;0;22;9", "wc_reply_authors": "0;47;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 53.0, 17.10263137648707 ], "wc_strengths_avg": [ 40.5, 14.097872179871684 ], "wc_weaknesses_avg": [ 65.75, 12.71563997602952 ], "wc_questions_avg": [ 28.0, 25.15949125081825 ], "wc_limitations_avg": [ 3.25, 2.8613807855648994 ], "wc_review_avg": [ 190.5, 19.767397400770797 ], "wc_reply_reviewers_avg": [ 10.0, 7.842193570679061 ], "wc_reply_authors_avg": [ 11.75, 20.351596988934308 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:S3EWOZD1j_4J:scholar.google.com/&scioq=Fine-Tuning+Personalization+in+Federated+Learning+to+Mitigate+Adversarial+Clients&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "stanford.edu;epfl.ch;;;", "author_num": 5, "aff_unique_index": "0;1", "aff_unique_norm": "Stanford University;EPFL", "aff_unique_dep": ";", "aff_unique_url": "https://www.stanford.edu;https://www.epfl.ch", "aff_unique_abbr": "Stanford;EPFL", "aff_campus_unique_index": "0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;Switzerland" }, { "title": "Understanding Transformers via N-Gram Statistics", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94849", "id": "WCc440cUhX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=WCc440cUhX", "openreview": "https://openreview.net/forum?id=WCc440cUhX", "poster": "/media/PosterPDFs/NeurIPS%202024/94849.png?t=1733696801.7408228", "project": "", "tldr": "", "abstract": "Transformer based large-language models (LLMs) display extreme proficiency with language yet a precise understanding of how they work remains elusive. One way of demystifying transformer predictions would be to describe how they depend on their context in terms of simple template functions. This paper takes a first step in this direction by considering families of functions (i.e. rules) formed out of simple N-gram based statistics of the training data. By studying how well these rulesets approximate transformer predictions, we obtain a variety of novel discoveries: a simple method to detect overfitting during training without using a holdout set, a quantitative measure of how transformers progress from learning simple to more complex statistical rules over the course of training, a model-variance criterion governing when transformer predictions tend to be described by N-gram rules, and insights into how well transformers can be approximated by N-gram rulesets in the limit where these rulesets become increasingly complex. In this latter direction, we find that for 79% and 68% of LLM next-token distributions on TinyStories and Wikipedia, respectively, their top-1 predictions agree with those provided by our N-gram rulesets.", "keywords": "transformers;large-language models;ngrams;curriculum learning;interpretability", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Timothy Nguyen", "authorids": "~Timothy_Nguyen1", "gender": "M", "homepage": "http://timothynguyen.wordpress.com", "dblp": "", "google_scholar": "r4FbY1IAAAAJ", "orcid": "", "linkedin": "", "or_profile": "~Timothy_Nguyen1", "aff": "Google", "aff_domain": "google.com", "position": "Research Engineer", "bibtex": "@inproceedings{\nnguyen2024understanding,\ntitle={Understanding Transformers via N-Gram Statistics},\nauthor={Timothy Nguyen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=WCc440cUhX}\n}", "github": "", "reviewers": "H4bx;BciX;GxTz", "pdf_size": 2746831, "rating": "5;6;7", "confidence": "3;4;3", "soundness": "2;3;2", "novelty": "2;2;3", "presentation": "2;3;3", "wc_summary": "78;124;259", "wc_strengths": "101;44;74", "wc_weaknesses": "358;68;163", "wc_questions": "199;60;13", "wc_limitations": "3;65;1", "wc_review": "739;361;510", "wc_reply_reviewers": "0;57;50", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 153.66666666666666, 76.81290400852075 ], "wc_strengths_avg": [ 73.0, 23.280893453645632 ], "wc_weaknesses_avg": [ 196.33333333333334, 120.71545974821213 ], "wc_questions_avg": [ 90.66666666666667, 78.96975511056257 ], "wc_limitations_avg": [ 23.0, 29.709706606876257 ], "wc_review_avg": [ 536.6666666666666, 155.4656089157842 ], "wc_reply_reviewers_avg": [ 35.666666666666664, 25.381533094401966 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9585092364193834654&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "google.com", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Learning to Assist Humans without Inferring Rewards", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94848", "id": "WCnJmb7cv1", "proceeding": "", "pdf": "https://openreview.net/pdf?id=WCnJmb7cv1", "openreview": "https://openreview.net/forum?id=WCnJmb7cv1", "poster": "", "project": "", "author_site": "Vivek Myers, Evan Ellis, Sergey Levine, Benjamin Eysenbach, Anca Dragan", "tldr": "", "abstract": "Assistive agents should make humans' lives easier. Classically, such assistance is studied through the lens of inverse reinforcement learning, where an assistive agent (e.g., a chatbot, a robot) infers a human's intention and then selects actions to help the human reach that goal. This approach requires inferring intentions, which can be difficult in high-dimensional settings. We build upon prior work that studies assistance through the lens of empowerment: an assistive agent aims to maximize the influence of the human's actions such that they exert a greater control over the environmental outcomes and can solve tasks in fewer steps. We lift the major limitation of prior work in this area\u2014scalability to high-dimensional settings\u2014with contrastive successor representations. We formally prove that these representations estimate a similar notion of empowerment to that studied by prior work and provide a ready-made mechanism for optimizing it. Empirically, our proposed method outperforms prior methods on synthetic benchmarks, and scales to Overcooked, a cooperative game setting. Theoretically, our work connects ideas from information theory, neuroscience, and reinforcement learning, and charts a path for representations to play a critical role in solving assistive problems. Our code is available at https://github.com/vivekmyers/empowerment_successor_representations.", "keywords": "Human-AI Collaboration;Unsupervised Reinforcement Learning", "primary_area": "human-AI_interaction", "supplementary_material": "", "author": "Vivek Myers;Evan Ellis;Sergey Levine;Benjamin Eysenbach;Anca Dragan", "authorids": "~Vivek_Myers1;~Evan_Ellis1;~Sergey_Levine1;~Benjamin_Eysenbach1;~Anca_Dragan1", "gender": ";M;M;M;F", "homepage": "https://people.eecs.berkeley.edu/~vmyers/;https://www.linkedin.com/in/evan-ellis-cs/;https://people.eecs.berkeley.edu/~svlevine/;https://ben-eysenbach.github.io/;http://www.ancadragan.com/", "dblp": "270/8694;;80/7594;192/1863;", "google_scholar": "5NGAbT4AAAAJ;;8R35rCwAAAAJ;DRnOvU8AAAAJ;", "orcid": ";;;0009-0000-7136-6307;", "linkedin": ";;;benjamin-eysenbach-a7235775/;", "or_profile": "~Vivek_Myers1;~Evan_Ellis1;~Sergey_Levine1;~Benjamin_Eysenbach1;~Anca_Dragan1", "aff": "University of California, Berkeley;University of California, Berkeley;Google;Princeton University;University of California, Berkeley", "aff_domain": "berkeley.edu;berkeley.edu;google.com;princeton.edu;berkeley.edu", "position": "PhD student;Undergrad student;Research Scientist;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nmyers2024learning,\ntitle={Learning to Assist Humans without Inferring Rewards},\nauthor={Vivek Myers and Evan Ellis and Sergey Levine and Benjamin Eysenbach and Anca Dragan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=WCnJmb7cv1}\n}", "github": "", "reviewers": "KT7u;MFVx;drkz;FYis;PN73", "pdf_size": 7727124, "rating": "4;5;5;6;7", "confidence": "4;4;3;4;4", "soundness": "2;3;3;4;4", "novelty": "2;3;2;2;3", "presentation": "3;2;3;3;4", "wc_summary": "134;91;74;83;63", "wc_strengths": "49;146;37;32;82", "wc_weaknesses": "391;615;175;63;230", "wc_questions": "105;492;19;121;114", "wc_limitations": "1;36;11;5;12", "wc_review": "680;1380;316;304;501", "wc_reply_reviewers": "60;628;16;120;0", "wc_reply_authors": "462;863;101;553;326", "reply_reviewers": "1;1;1;2;0", "reply_authors": "3;4;2;3;2", "rating_avg": [ 5.4, 1.0198039027185568 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 89.0, 24.355697485393435 ], "wc_strengths_avg": [ 69.2, 42.16823448995701 ], "wc_weaknesses_avg": [ 294.8, 191.86703729405946 ], "wc_questions_avg": [ 170.2, 165.0738016767046 ], "wc_limitations_avg": [ 13.0, 12.181953866272849 ], "wc_review_avg": [ 636.2, 396.5654548747281 ], "wc_reply_reviewers_avg": [ 164.8, 235.3060985185042 ], "wc_reply_authors_avg": [ 461.0, 252.14836902109838 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.8, 0.7483314773547882 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.1961161351381841, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1699171627358707181&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "berkeley.edu;berkeley.edu;google.com;princeton.edu;berkeley.edu", "author_num": 5, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "University of California, Berkeley;Google;Princeton University", "aff_unique_dep": ";Google;", "aff_unique_url": "https://www.berkeley.edu;https://www.google.com;https://www.princeton.edu", "aff_unique_abbr": "UC Berkeley;Google;Princeton", "aff_campus_unique_index": "0;0;1;0", "aff_campus_unique": "Berkeley;Mountain View;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "One-Layer Transformer Provably Learns One-Nearest Neighbor In Context", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94847", "id": "WDX45LNZXE", "proceeding": "", "pdf": "https://openreview.net/pdf?id=WDX45LNZXE", "openreview": "https://openreview.net/forum?id=WDX45LNZXE", "poster": "", "project": "", "author_site": "Zihao Li, Yuan Cao, Cheng Gao, Yihan He, Han Liu, Jason Klusowski, Jianqing Fan, Mengdi Wang", "tldr": "", "abstract": "Transformers have achieved great success in recent years. Interestingly, transformers have shown particularly strong in-context learning capability -- even without fine-tuning, they are still able to solve unseen tasks well purely based on task-specific prompts. In this paper, we study the capability of one-layer transformers in learning the one-nearest neighbor prediction rule. Under a theoretical framework where the prompt contains a sequence of labeled training data and unlabeled test data, we show that, although the loss function is nonconvex, when trained with gradient descent, a single softmax attention layer can successfully learn to behave like a one-nearest neighbor classifier. Our result gives a concrete example on how transformers can be trained to implement nonparametric machine learning algorithms, and sheds light on the role of softmax attention in transformer models.", "keywords": "In-context learning theory;softmax attention;one-nearest neighbor", "primary_area": "learning_theory", "supplementary_material": "", "author": "Zihao Li;Yuan Cao;Cheng Gao;Yihan He;Han Liu;Jason Matthew Klusowski;Jianqing Fan;Mengdi Wang", "authorids": "~Zihao_Li3;~Yuan_Cao1;~Cheng_Gao3;~Yihan_He1;~Han_Liu4;~Jason_Matthew_Klusowski1;~Jianqing_Fan1;~Mengdi_Wang1", "gender": "M;M;M;M;;M;M;F", "homepage": ";https://yuancaohku.github.io/;;;;https://klusowski.princeton.edu/;https://fan.princeton.edu;http://mwang.princeton.edu", "dblp": ";;;;;;33/2768;", "google_scholar": ";-VGnHI4AAAAJ;;;;4HkhCjsAAAAJ;https://scholar.google.com/citations?hl=en;", "orcid": ";;;;;0000-0001-6484-8682;0000-0003-3250-7677;", "linkedin": "zihao-li-48b313235/;;chenggao1999;\u9038\u6db5-\u8d3a-187106180/;;jklusowski/;;", "or_profile": "~Zihao_Li3;~Yuan_Cao1;~Cheng_Gao3;~Yihan_He1;~Han_Liu4;~Jason_Matthew_Klusowski1;~Jianqing_Fan1;~Mengdi_Wang1", "aff": "Princeton University;University of Hong Kong;Princeton University;Princeton University;Northwestern University;Princeton University;Princeton University;Princeton University", "aff_domain": "princeton.edu;hku.hk;princeton.edu;princeton.edu;u.northwestern.edu;princeton.edu;princeton.edu;princeton.edu", "position": "PhD student;Assistant Professor;PhD student;PhD student;Associate Professor;Assistant Professor;Professor;Full Professor", "bibtex": "@inproceedings{\nli2024onelayer,\ntitle={One-Layer Transformer Provably Learns One-Nearest Neighbor In Context},\nauthor={Zihao Li and Yuan Cao and Cheng Gao and Yihan He and Han Liu and Jason Matthew Klusowski and Jianqing Fan and Mengdi Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=WDX45LNZXE}\n}", "github": "", "reviewers": "FpKW;FWXF;cVwG;cdZv", "pdf_size": 1062563, "rating": "4;6;6;6", "confidence": "4;3;3;2", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "130;90;216;100", "wc_strengths": "118;94;85;111", "wc_weaknesses": "448;87;100;203", "wc_questions": "1;23;82;68", "wc_limitations": "5;6;17;23", "wc_review": "702;300;500;505", "wc_reply_reviewers": "153;46;10;0", "wc_reply_authors": "334;28;24;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 134.0, 49.57822102496216 ], "wc_strengths_avg": [ 102.0, 13.133925536563698 ], "wc_weaknesses_avg": [ 209.5, 144.8456074584245 ], "wc_questions_avg": [ 43.5, 32.82148686455262 ], "wc_limitations_avg": [ 12.75, 7.562241731127087 ], "wc_review_avg": [ 501.75, 142.14143484571977 ], "wc_reply_reviewers_avg": [ 52.25, 60.631571808753236 ], "wc_reply_authors_avg": [ 96.5, 137.53817651837616 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17032826852358287139&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "princeton.edu;hku.hk;princeton.edu;princeton.edu;u.northwestern.edu;princeton.edu;princeton.edu;princeton.edu", "author_num": 8, "aff_unique_index": "0;1;0;0;2;0;0;0", "aff_unique_norm": "Princeton University;University of Hong Kong;Northwestern University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.princeton.edu;https://www.hku.hk;https://www.northwestern.edu", "aff_unique_abbr": "Princeton;HKU;NU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;0;0;0;0;0;0", "aff_country_unique": "United States;China" }, { "title": "RelBench: A Benchmark for Deep Learning on Relational Databases", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97659", "id": "WEFxOm3Aez", "proceeding": "", "pdf": "https://openreview.net/pdf?id=WEFxOm3Aez", "openreview": "https://openreview.net/forum?id=WEFxOm3Aez", "poster": "", "project": "", "author_site": "Joshua Robinson, Rishabh Ranjan, Weihua Hu, Kexin Huang, Jiaqi Han, Alejandro Dobles, Matthias Fey, Jan Eric Lenssen, Yiwen Yuan, Zecheng Zhang, Xinwei He, Jure Leskovec", "tldr": "", "abstract": "We present RelBench, a public benchmark for solving predictive tasks in relational databases with deep learning. RelBench provides databases and tasks spanning diverse domains, scales, and database dimensions, and is intended to be a foundational infrastructure for future research in this direction. We use RelBench to conduct the first comprehensive empirical study of graph neural network (GNN) based predictive models on relational data, as recently proposed by Fey et al. 2024. End-to-end learned GNNs are capable fully exploiting the predictive signal encoded in links between entities, marking a significant shift away from the dominant paradigm of manual feature engineering combined with tabular machine learning. To thoroughly evaluate GNNs against the prior gold-standard we conduct a user study, where an experienced data scientist manually engineers features for each task. In this study, GNNs learn better models whilst reducing human work needed by more than an order of magnitude. This result demonstrates the power of GNNs for solving predictive tasks in relational databases, opening up new research opportunities.", "keywords": "graph neural networks;relational database;benchmark", "primary_area": "", "supplementary_material": "/attachment/6e90d63d90c065c7311574b764d70d171936780e.pdf", "author": "Joshua Robinson;Rishabh Ranjan;Weihua Hu;Kexin Huang;Jiaqi Han;Alejandro Dobles;Matthias Fey;Jan Eric Lenssen;Yiwen Yuan;Zecheng Zhang;Xinwei He;Jure Leskovec", "authorids": "~Joshua_Robinson4;~Rishabh_Ranjan1;~Weihua_Hu1;~Kexin_Huang1;~Jiaqi_Han2;~Alejandro_Dobles1;~Matthias_Fey2;~Jan_Eric_Lenssen1;~Yiwen_Yuan1;~Zecheng_Zhang1;~Xinwei_He3;~Jure_Leskovec1", "gender": ";M;M;M;M;;M;M;F;;M;", "homepage": ";https://rishabh-ranjan.github.io;http://web.stanford.edu/~weihuahu/;https://www.kexinhuang.com/;https://hanjq17.github.io;;http://rusty1s.github.io;https://janericlenssen.github.io/;;;;http://cs.stanford.edu/~jure/", "dblp": ";;42/1232;;235/0412;;180/9174;195/9868;;;;l/JureLeskovec", "google_scholar": ";NNzQUrcAAAAJ;wAFMjfkAAAAJ;ogEXTOgAAAAJ;AKppgMAAAAAJ;;https://scholar.google.de/citations?user=5HaSBN0AAAAJ;https://scholar.google.de/citations?user=enXCzCgAAAAJ;koA9QbMAAAAJ;;o3TZYQ4AAAAJ;Q_kKkIUAAAAJ", "orcid": ";;;;;;;0000-0003-4093-9840;0009-0007-1884-3304;;;0000-0002-5411-923X", "linkedin": ";;weihua-hu-a8284228/;;;adobles/;;jan-eric-lenssen-08700b190/;;;xinwei-he-586512134/;leskovec/", "or_profile": "~Joshua_Robinson4;~Rishabh_Ranjan1;~Weihua_Hu1;~Kexin_Huang1;~Jiaqi_Han2;~Alejandro_Dobles1;~Matthias_Fey2;~Jan_Eric_Lenssen1;~Yiwen_Yuan1;~Zecheng_Zhang1;~Xinwei_He3;~Jure_Leskovec1", "aff": ";Stanford University;;Stanford University;Computer Science Department, Stanford University;Stanford University;TU Dortmund University;Kumo;Kumo AI;;Stanford University;Kumo.AI", "aff_domain": ";stanford.edu;;stanford.edu;cs.stanford.edu;stanford.edu;udo.edu;kumo.ai;kumo.ai;;stanford.edu;kumo.ai", "position": ";PhD student;;PhD student;PhD student;MS student;PhD student;Researcher;Researcher;;MS student;Chief Scientist", "bibtex": "@inproceedings{\nrobinson2024relbench,\ntitle={RelBench: A Benchmark for Deep Learning on Relational Databases},\nauthor={Joshua Robinson and Rishabh Ranjan and Weihua Hu and Kexin Huang and Jiaqi Han and Alejandro Dobles and Matthias Fey and Jan Eric Lenssen and Yiwen Yuan and Zecheng Zhang and Xinwei He and Jure Leskovec},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=WEFxOm3Aez}\n}", "github": "", "reviewers": "EgX3;1DEG;Vo6o;jCCf", "pdf_size": 1178161, "rating": "6;6;8;9", "confidence": "4;4;4;4", "wc_summary_and_contributions": "93;52;83;53", "wc_strengths": "27;88;84;86", "wc_improvement": "44;97;96;740", "wc_limitations": "1;1;1;113", "wc_correctness": "8;1;1;41", "wc_clarity": "7;1;1;18", "wc_relation_to_prior_work": "29;1;1;171", "wc_documentation": "21;1;16;77", "wc_additional_feedback": "1;1;1;1", "wc_review": "231;243;284;1300", "wc_reply_reviewers": "0;0;85;273", "wc_reply_authors": "0;0;0;67", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;1;2", "rating_avg": [ 7.25, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.0 ], "wc_summary_and_contributions_avg": [ 70.25, 18.102140757380052 ], "wc_strengths_avg": [ 71.25, 25.586861863073402 ], "wc_improvement_avg": [ 244.25, 287.0229738191701 ], "wc_limitations_avg": [ 29.0, 48.49742261192856 ], "wc_correctness_avg": [ 12.75, 16.55860803328589 ], "wc_clarity_avg": [ 6.75, 6.94172168845741 ], "wc_relation_to_prior_work_avg": [ 50.5, 70.50354601011215 ], "wc_documentation_avg": [ 28.75, 28.81297450802329 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 514.5, 453.9341912656503 ], "wc_reply_reviewers_avg": [ 89.5, 111.48206133723936 ], "wc_reply_authors_avg": [ 16.75, 29.011851026778693 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14001047991437308057&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": ";stanford.edu;;stanford.edu;cs.stanford.edu;stanford.edu;udo.edu;kumo.ai;kumo.ai;;stanford.edu;kumo.ai", "author_num": 12, "aff_unique_index": "0;0;0;0;1;2;3;0;4", "aff_unique_norm": "Stanford University;Technische Universit\u00e4t Dortmund;Kumo;Kumo AI;Kumo.AI", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.stanford.edu;https://www.tu-dortmund.de;;https://www.kumo.ai;https://www.kumo.ai", "aff_unique_abbr": "Stanford;TU Dortmund;;Kumo AI;Kumo.AI", "aff_campus_unique_index": "0;0;0;0;1;0", "aff_campus_unique": "Stanford;Dortmund;", "aff_country_unique_index": "0;0;0;0;1;0;0;0", "aff_country_unique": "United States;Germany;" }, { "title": "Adversarially Robust Decision Transformer", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94846", "id": "WEf2LT8NtY", "proceeding": "", "pdf": "https://openreview.net/pdf?id=WEf2LT8NtY", "openreview": "https://openreview.net/forum?id=WEf2LT8NtY", "poster": "", "project": "", "author_site": "Xiaohang Tang, Afonso Marques, Parameswaran Kamalaruban, Ilija Bogunovic", "tldr": "", "abstract": "Decision Transformer (DT), as one of the representative Reinforcement Learning via Supervised Learning (RvS) methods, has achieved strong performance in offline learning tasks by leveraging the powerful Transformer architecture for sequential decision-making. However, in adversarial environments, these methods can be non-robust, since the return is dependent on the strategies of both the decision-maker and adversary. Training a probabilistic model conditioned on observed return to predict action can fail to generalize, as the trajectories that achieve a return in the dataset might have done so due to a suboptimal behavior adversary. To address this, we propose a worst-case-aware RvS algorithm, the Adversarially Robust Decision Transformer (ARDT), which learns and conditions the policy on in-sample minimax returns-to-go. \nARDT aligns the target return with the worst-case return learned through minimax expectile regression, thereby enhancing robustness against powerful test-time adversaries. In experiments conducted on sequential games with full data coverage, ARDT can generate a maximin (Nash Equilibrium) strategy, the solution with the largest adversarial robustness. In large-scale sequential games and continuous adversarial RL environments with partial data coverage, ARDT demonstrates significantly superior robustness to powerful test-time adversaries and attains higher worst-case returns compared to contemporary DT methods.", "keywords": "Offline Reinforcement Learning;Reinforcement Learning via Supervised Learning;Decision Transformer;Robust Adversarial Reinforcement Learning", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/c8d42b50a06075291073b4bdb66b2dd0528d525f.zip", "author": "Xiaohang Tang;Afonso Marques;Parameswaran Kamalaruban;Ilija Bogunovic", "authorids": "~Xiaohang_Tang1;~Afonso_Marques1;~Parameswaran_Kamalaruban2;~Ilija_Bogunovic2", "gender": "M;M;M;M", "homepage": ";;https://markovkernel.net/;http://ilijabogunovic.com/", "dblp": "294/5064;;164/7413;142/2725", "google_scholar": "https://scholar.google.co.uk/citations?hl=zh-CN;;0ioRCikAAAAJ;xMvt3NEAAAAJ", "orcid": ";;;", "linkedin": ";afonsosamarques;;", "or_profile": "~Xiaohang_Tang1;~Afonso_Marques1;~Parameswaran_Kamalaruban2;~Ilija_Bogunovic1", "aff": "University College London;Department of Computer Science, University College London, University of London;Featurespace;Swiss Federal Institute of Technology", "aff_domain": "ucl.ac.uk;cs.ucl.ac.uk;featurespace.co.uk;ethz.ch", "position": "PhD student;MS student;Researcher;Postdoc", "bibtex": "@inproceedings{\ntang2024adversarially,\ntitle={Adversarially Robust Decision Transformer},\nauthor={Xiaohang Tang and Afonso Marques and Parameswaran Kamalaruban and Ilija Bogunovic},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=WEf2LT8NtY}\n}", "github": "", "reviewers": "18ip;p4QE;JwFn", "pdf_size": 902162, "rating": "5;6;6", "confidence": "3;3;3", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "2;3;4", "wc_summary": "42;27;66", "wc_strengths": "33;42;18", "wc_weaknesses": "120;29;94", "wc_questions": "2;81;24", "wc_limitations": "25;20;34", "wc_review": "222;199;236", "wc_reply_reviewers": "30;14;21", "wc_reply_authors": "71;15;24", "reply_reviewers": "2;1;1", "reply_authors": "3;2;2", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 45.0, 16.06237840420901 ], "wc_strengths_avg": [ 31.0, 9.899494936611665 ], "wc_weaknesses_avg": [ 81.0, 38.27096375408734 ], "wc_questions_avg": [ 35.666666666666664, 33.289971796657056 ], "wc_limitations_avg": [ 26.333333333333332, 5.792715732327588 ], "wc_review_avg": [ 219.0, 15.253414918196734 ], "wc_reply_reviewers_avg": [ 21.666666666666668, 6.548960901462833 ], "wc_reply_authors_avg": [ 36.666666666666664, 24.553795814270526 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:mJXs3JS--TgJ:scholar.google.com/&scioq=Adversarially+Robust+Decision+Transformer&hl=en&as_sdt=0,5", "gs_version_total": 5, "email": "ucl.ac.uk;cs.ucl.ac.uk;featurespace.co.uk;ethz.ch", "author_num": 4, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "University College London;Featurespace Ltd.;Swiss Federal Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ucl.ac.uk;https://www.featurespace.co.uk;https://www.ethz.ch", "aff_unique_abbr": "UCL;;ETH Zurich", "aff_campus_unique_index": "1", "aff_campus_unique": ";London", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "United Kingdom;Switzerland" }, { "title": "Efficient Reinforcement Learning by Discovering Neural Pathways", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94845", "id": "WEoOreP0n5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=WEoOreP0n5", "openreview": "https://openreview.net/forum?id=WEoOreP0n5", "poster": "/media/PosterPDFs/NeurIPS%202024/94845.png?t=1733370623.9470613", "project": "", "author_site": "Samin Yeasar Arnob, Riyasat Ohib, Sergey Plis, Amy Zhang, Alessandro Sordoni, Doina Precup", "tldr": "", "abstract": "Reinforcement learning (RL) algorithms have been very successful at tackling complex control problems, such as AlphaGo or fusion control. However, current research mainly emphasizes solution quality, often achieved by using large models trained on large amounts of data, and does not account for the financial, environmental, and societal costs associated with developing and deploying such models. Modern neural networks are often overparameterized and a significant number of parameters can be pruned without meaningful loss in performance, resulting in more efficient use of the model's capacity lottery ticket. We present a methodology for identifying sub-networks within a larger network in reinforcement learning (RL). We call such sub-networks, neural pathways. We show empirically that even very small learned sub-networks, using less than 5% of the large network's parameters, can provide very good quality solutions. We also demonstrate the training of multiple pathways within the same networks in a multitask setup, where each pathway is encouraged to tackle a separate task. We evaluate empirically our approach on several continuous control tasks, in both online and offline training", "keywords": "Energy Efficient AI;Parameter Efficient;Neural Pathways;Continuous Control;Online Reinforcement Learning;Offline Reinforcement Learning;Multitask Reinforcement Learning", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/3444add04a784ccdbc1d2c0200b8e1d58464f7cb.zip", "author": "Samin Yeasar Arnob;Riyasat Ohib;Sergey M. Plis;Amy Zhang;Alessandro Sordoni;Doina Precup", "authorids": "~Samin_Yeasar_Arnob1;~Riyasat_Ohib1;~Sergey_Plis1;~Amy_Zhang1;~Alessandro_Sordoni2;~Doina_Precup1", "gender": "M;M;M;F;M;F", "homepage": "https://www.linkedin.com/in/samin-yeasar-arnob/;https://www.riyasatohib.com/;;http://cs.mcgill.ca/~dprecup/;;", "dblp": ";;07/227;p/DoinaPrecup;57/7642;43/2754", "google_scholar": "RMPv4RQAAAAJ;;https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=j54VcVEAAAAJ;;", "orcid": ";;0000-0003-0040-0365;;;", "linkedin": ";;sergeyplis/;;;", "or_profile": "~Samin_Yeasar_Arnob1;~Riyasat_Ohib1;~Sergey_Plis1;~Doina_Precup1;~Alessandro_Sordoni1;~Amy_Zhang2", "aff": "McGill University;Georgia Institute of Technology;Georgia State University;McGill University;Microsoft;Meta Facebook", "aff_domain": "mcgill.ca;gatech.edu;gsu.edu;mcgill.ca;microsoft.com;facebook.com", "position": "PhD student;PhD student;Associate Professor;Associate Professor;Researcher;Research Scientist", "bibtex": "@inproceedings{\narnob2024efficient,\ntitle={Efficient Reinforcement Learning by Discovering Neural Pathways},\nauthor={Samin Yeasar Arnob and Riyasat Ohib and Sergey M. Plis and Amy Zhang and Alessandro Sordoni and Doina Precup},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=WEoOreP0n5}\n}", "github": "", "reviewers": "ddsT;HQmW;wTeM", "pdf_size": 5482971, "rating": "5;6;8", "confidence": "4;4;3", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "3;3;4", "wc_summary": "39;121;91", "wc_strengths": "28;88;145", "wc_weaknesses": "104;188;30", "wc_questions": "82;31;168", "wc_limitations": "15;7;7", "wc_review": "268;435;441", "wc_reply_reviewers": "11;0;27", "wc_reply_authors": "46;46;0", "reply_reviewers": "1;0;1", "reply_authors": "2;2;1", "rating_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 83.66666666666667, 33.875589375766666 ], "wc_strengths_avg": [ 87.0, 47.77028364998475 ], "wc_weaknesses_avg": [ 107.33333333333333, 64.54627969311804 ], "wc_questions_avg": [ 93.66666666666667, 56.53514148051831 ], "wc_limitations_avg": [ 9.666666666666666, 3.7712361663282534 ], "wc_review_avg": [ 381.3333333333333, 80.17619485995966 ], "wc_reply_reviewers_avg": [ 12.666666666666666, 11.08552609887726 ], "wc_reply_authors_avg": [ 30.666666666666668, 21.684607956387456 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.944911182523068, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4710322897957813134&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "mcgill.ca;gatech.edu;gsu.edu;mcgill.ca;microsoft.com;facebook.com", "author_num": 6, "aff_unique_index": "0;1;2;0;3;4", "aff_unique_norm": "McGill University;Georgia Institute of Technology;Georgia State University;Microsoft;Meta", "aff_unique_dep": ";;;Microsoft Corporation;Meta Platforms, Inc.", "aff_unique_url": "https://www.mcgill.ca;https://www.gatech.edu;https://www.gsu.edu;https://www.microsoft.com;https://meta.com", "aff_unique_abbr": "McGill;Georgia Tech;GSU;Microsoft;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0;1;1", "aff_country_unique": "Canada;United States" }, { "title": "Annealed Multiple Choice Learning: Overcoming limitations of Winner-takes-all with annealing", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94844", "id": "WEs4WMzndY", "proceeding": "", "pdf": "https://openreview.net/pdf?id=WEs4WMzndY", "openreview": "https://openreview.net/forum?id=WEs4WMzndY", "poster": "/media/PosterPDFs/NeurIPS%202024/94844.png?t=1732973175.403999", "project": "", "author_site": "David Perera, Victor Letzelter, Theo Mariotte, Adrien Cortes, Mickael Chen, Slim Essid, Ga\u00ebl Richard", "tldr": "", "abstract": "We introduce Annealed Multiple Choice Learning (aMCL) which combines simulated annealing with MCL. MCL is a learning framework handling ambiguous tasks by predicting a small set of plausible hypotheses. These hypotheses are trained using the Winner-takes-all (WTA) scheme, which promotes the diversity of the predictions. However, this scheme may converge toward an arbitrarily suboptimal local minimum, due to the greedy nature of WTA. We overcome this limitation using annealing, which enhances the exploration of the hypothesis space during training. We leverage insights from statistical physics and information theory to provide a detailed description of the model training trajectory. Additionally, we validate our algorithm by extensive experiments on synthetic datasets, on the standard UCI benchmark, and on speech separation.", "keywords": "multiple choice learning;winner-takes-all;deterministic annealing;uncertainty quantification", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "David Perera;Victor Letzelter;Theo Mariotte;Adrien Cortes;Mickael Chen;Slim Essid;Ga\u00ebl Richard", "authorids": "~David_Perera1;~Victor_Letzelter1;~Theo_Mariotte1;~Adrien_Cortes1;~Mickael_Chen1;~Slim_Essid1;~Ga\u00ebl_Richard1", "gender": "M;;;M;M;Not Specified;M", "homepage": ";https://victorletzelter.github.io;;https://github.com/c1adrien;https://sites.google.com/view/mickaelchen/home;https://perso.telecom-paris.fr/essid/;https://perso.telecom-paristech.fr/grichard/", "dblp": ";360/0588;;;190/7274;53/6904;34/1310", "google_scholar": ";https://scholar.google.fr/citations?user=YhTdZh8AAAAJ;;;https://scholar.google.fr/citations?user=QnRpMJAAAAAJ;5dP_Pv0AAAAJ;https://scholar.google.fr/citations?user=xn70tPIAAAAJ", "orcid": ";;0000-0002-2108-101X;;;;", "linkedin": "https://www.linkedin.com/public-profile/settings;victor-letzelter-3b832219b;;c1adrien/;mickael-chen-ml/;;", "or_profile": "~David_Perera1;~Victor_Letzelter1;~Theo_Mariotte1;~Adrien_Cortes1;~Mickael_Chen1;~Slim_Essid1;~Ga\u00ebl_Richard1", "aff": "T\u00e9l\u00e9com ParisTech;T\u00e9l\u00e9com ParisTech;T\u00e9l\u00e9com Paris;Sorbonne Universit\u00e9 - Facult\u00e9 des Sciences (Paris VI);Valeo;T\u00e9l\u00e9com ParisTech;Telecom Paris", "aff_domain": "telecom-paristech.fr;telecom-paristech.fr;telecom-paris.fr;etu.sorbonne-universite.fr;valeo.com;telecom-paristech.fr;telecom-paris.fr", "position": "PhD student;PhD student;Postdoc;MS student;Researcher;Full Professor;Full Professor", "bibtex": "@inproceedings{\nperera2024annealed,\ntitle={Annealed Multiple Choice Learning: Overcoming limitations of Winner-takes-all with annealing},\nauthor={David Perera and Victor Letzelter and Theo Mariotte and Adrien Cortes and Mickael Chen and Slim Essid and Ga{\\\"e}l Richard},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=WEs4WMzndY}\n}", "github": "", "reviewers": "WQVX;qUke;WAaS", "pdf_size": 7586326, "rating": "5;7;7", "confidence": "3;2;3", "soundness": "3;3;4", "novelty": "2;3;3", "presentation": "3;2;3", "wc_summary": "173;109;71", "wc_strengths": "23;56;46", "wc_weaknesses": "36;64;255", "wc_questions": "64;1;215", "wc_limitations": "9;32;26", "wc_review": "305;262;613", "wc_reply_reviewers": "0;0;174", "wc_reply_authors": "0;0;48", "reply_reviewers": "0;0;1", "reply_authors": "1;1;2", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 117.66666666666667, 42.089850980438925 ], "wc_strengths_avg": [ 41.666666666666664, 13.816254517375139 ], "wc_weaknesses_avg": [ 118.33333333333333, 97.31164141846317 ], "wc_questions_avg": [ 93.33333333333333, 89.79359046663012 ], "wc_limitations_avg": [ 22.333333333333332, 9.741092797468305 ], "wc_review_avg": [ 393.3333333333333, 156.31663023776096 ], "wc_reply_reviewers_avg": [ 58.0, 82.02438661763951 ], "wc_reply_authors_avg": [ 16.0, 22.627416997969522 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.49999999999999983, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=646327006336674637&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 12, "email": "telecom-paristech.fr;telecom-paristech.fr;telecom-paris.fr;etu.sorbonne-universite.fr;valeo.com;telecom-paristech.fr;telecom-paris.fr", "author_num": 7, "aff_unique_index": "0;0;1;2;3;0;4", "aff_unique_norm": "T\u00e9l\u00e9com ParisTech;T\u00e9l\u00e9com Paris;Sorbonne Universit\u00e9;Valeo;Telecom Paris", "aff_unique_dep": ";;Facult\u00e9 des Sciences;;", "aff_unique_url": "https://www.telecom-paristech.fr;https://www.telecom-paris.fr;https://www.sorbonne-universite.fr;https://www.valeo.com;https://www.telecom-paris.fr", "aff_unique_abbr": "TP;T\u00e9l\u00e9com Paris;Sorbonne U;;Telecom Paris", "aff_campus_unique_index": "1", "aff_campus_unique": ";Paris VI", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "France" }, { "title": "ALPINE: Unveiling The Planning Capability of Autoregressive Learning in Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94843", "id": "WFbZusv14E", "proceeding": "", "pdf": "https://openreview.net/pdf?id=WFbZusv14E", "openreview": "https://openreview.net/forum?id=WFbZusv14E", "poster": "/media/PosterPDFs/NeurIPS%202024/94843.png?t=1731735957.7372446", "project": "", "author_site": "Siwei Wang, Yifei Shen, Shi Feng, Haoran Sun, Shang-Hua Teng, Wei Chen", "tldr": "", "abstract": "Planning is a crucial element of both human intelligence and contemporary large language models (LLMs). In this paper, we initiate a theoretical investigation into the emergence of planning capabilities in Transformer-based LLMs via their next-word prediction mechanisms. We model planning as a network path-finding task, where the objective is to generate a valid path from a specified source node to a designated target node. Our mathematical characterization shows that Transformer architectures can execute path-finding by embedding the adjacency and reachability matrices within their weights. Furthermore, our theoretical analysis of gradient-based learning dynamics reveals that LLMs can learn both the adjacency and a limited form of the reachability matrices. These theoretical insights are then validated through experiments, which demonstrate that Transformer architectures indeed learn the adjacency and an incomplete reachability matrices, consistent with our theoretical predictions. When applying our methodology to the real-world planning benchmark Blocksworld, our observations remain consistent. Additionally, our analyses uncover a fundamental limitation of current Transformer architectures in path-finding: these architectures cannot identify reachability relationships through transitivity, which leads to failures in generating paths when concatenation is required. These findings provide new insights into how the internal mechanisms of autoregressive learning facilitate intelligent planning and deepen our understanding of how future LLMs might achieve more advanced and general planning-and-reasoning capabilities across diverse applications.", "keywords": "Planning;Path finding;Transformer;Autoregressive Learning;Language models", "primary_area": "interpretability_and_explainability", "supplementary_material": "/attachment/21b0fe08d419f0c7ba18b14de3d1a3d61b425b69.zip", "author": "Siwei Wang;Yifei Shen;Shi Feng;Haoran Sun;Shang-Hua Teng;Wei Chen", "authorids": "~Siwei_Wang2;~Yifei_Shen1;~Shi_Feng2;~Haoran_Sun6;~Shang-Hua_Teng1;~Wei_Chen10", "gender": "M;M;M;M;M;M", "homepage": "https://www.microsoft.com/en-us/research/people/siweiwang/publications/;https://openreview.net/profile?id=~Yifei_Shen1;https://fengshi.link;https://github.com/knightt0301;https://viterbi-web.usc.edu/~shanghua/;https://www.microsoft.com/en-us/research/people/weic/", "dblp": "51/8279-2;51/609.html;97/1374;;t/ShangHuaTeng;c/WeiChen13", "google_scholar": ";;7XxWCegAAAAJ;;JknkZcQAAAAJ;hlEPkxAAAAAJ", "orcid": ";;;;0000-0001-5011-4514;", "linkedin": ";;;;shanghua-teng-a295598;", "or_profile": "~Siwei_Wang2;~Yifei_Shen1;~Shi_Feng2;~Haoran_Sun6;~Shang-Hua_Teng1;~Wei_Chen10", "aff": "Microsoft;Microsoft Research Asia;School of Engineering and Applied Sciences, Harvard University;Peking University;University of Southern California;Microsoft Research", "aff_domain": "microsoft.com;microsoft.com;g.harvard.edu;stu.pku.edu.cn;usc.edu;microsoft.com", "position": "Researcher;Research Cheerleader;PhD student;Undergrad student;Full Professor;Pricipal Researcher", "bibtex": "@inproceedings{\nwang2024alpine,\ntitle={{ALPINE}: Unveiling The Planning Capability of Autoregressive Learning in Language Models},\nauthor={Siwei Wang and Yifei Shen and Shi Feng and Haoran Sun and Shang-Hua Teng and Wei Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=WFbZusv14E}\n}", "github": "", "reviewers": "znJo;wPpp;T9s7", "pdf_size": 2202993, "rating": "4;6;7", "confidence": "4;4;4", "soundness": "3;3;4", "novelty": "3;2;3", "presentation": "2;3;4", "wc_summary": "64;78;189", "wc_strengths": "47;88;69", "wc_weaknesses": "81;455;58", "wc_questions": "33;87;57", "wc_limitations": "33;22;8", "wc_review": "258;730;381", "wc_reply_reviewers": "49;118;11", "wc_reply_authors": "79;491;0", "reply_reviewers": "1;1;1", "reply_authors": "2;2;1", "rating_avg": [ 5.666666666666667, 1.247219128924647 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 110.33333333333333, 55.91859162111372 ], "wc_strengths_avg": [ 68.0, 16.753109164172084 ], "wc_weaknesses_avg": [ 198.0, 181.9688618051634 ], "wc_questions_avg": [ 59.0, 22.090722034374522 ], "wc_limitations_avg": [ 21.0, 10.23067283548187 ], "wc_review_avg": [ 456.3333333333333, 199.92053977073547 ], "wc_reply_reviewers_avg": [ 59.333333333333336, 44.28945196720722 ], "wc_reply_authors_avg": [ 190.0, 215.268824186566 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11107949256464615765&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "microsoft.com;microsoft.com;g.harvard.edu;stu.pku.edu.cn;usc.edu;microsoft.com", "author_num": 6, "aff_unique_index": "0;0;1;2;3;0", "aff_unique_norm": "Microsoft;Harvard University;Peking University;University of Southern California", "aff_unique_dep": "Microsoft Corporation;School of Engineering and Applied Sciences;;", "aff_unique_url": "https://www.microsoft.com;https://www.harvard.edu;http://www.pku.edu.cn;https://www.usc.edu", "aff_unique_abbr": "Microsoft;Harvard;Peking U;USC", "aff_campus_unique_index": "1;2;3", "aff_campus_unique": ";Asia;Cambridge;Los Angeles", "aff_country_unique_index": "0;1;0;1;0;0", "aff_country_unique": "United States;China" }, { "title": "ClashEval: Quantifying the tug-of-war between an LLM\u2019s internal prior and external evidence", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97658", "id": "WGoCZl2itU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=WGoCZl2itU", "openreview": "https://openreview.net/forum?id=WGoCZl2itU", "poster": "", "project": "", "author_site": "Kevin Wu, Eric Wu, James Zou", "tldr": "", "abstract": "Retrieval augmented generation (RAG) is frequently used to mitigate hallucinations and provide up-to-date knowledge for large language models (LLMs). However, given that document retrieval is an imprecise task and sometimes results in erroneous or even harmful content being presented in context, this raises the question of how LLMs handle retrieved information: If the provided content is incorrect, does the model know to ignore it, or does it recapitulate the error? Conversely, when the model's initial response is incorrect, does it always know to use the retrieved information to correct itself, or does it insist on its wrong prior response? To answer this, we curate a dataset of over 1200 questions across six domains (e.g., drug dosages, Olympic records, locations) along with content relevant to answering each question. We further apply precise perturbations to the answers in the content that range from subtle to blatant errors.\nWe benchmark six top-performing LLMs, including GPT-4o, on this dataset and find that LLMs are susceptible to adopting incorrect retrieved content, overriding their own correct prior knowledge over 60\\% of the time. However, the more unrealistic the retrieved content is (i.e. more deviated from truth), the less likely the model is to adopt it. Also, the less confident a model is in its initial response (via measuring token probabilities), the more likely it is to adopt the information in the retrieved content. We exploit this finding and demonstrate simple methods for improving model accuracy where there is conflicting retrieved content. Our results highlight a difficult task and benchmark for LLMs -- namely, their ability to correctly discern when it is wrong in light of correct retrieved content and to reject cases when the provided content is incorrect. Our dataset, called ClashEval, and evaluations are open-sourced to allow for future benchmarking on top-performing models at https://github.com/kevinwu23/StanfordClashEval.", "keywords": "RAG;adherence;LLMs", "primary_area": "", "supplementary_material": "/attachment/c835196f0787f602e32bc8db9e1c7198ea7afc0b.pdf", "author": "Kevin Wu;Eric Wu;James Zou", "authorids": "~Kevin_Wu1;~Eric_Wu3;~James_Zou1", "gender": ";M;", "homepage": "https://kevinwu.ai;;", "dblp": ";;", "google_scholar": "s4dCi5sAAAAJ;6pIliiAAAAAJ;23ZXZvEAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Kevin_Wu1;~Eric_Wu3;~James_Zou1", "aff": "Stanford University;Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nwu2024clasheval,\ntitle={ClashEval: Quantifying the tug-of-war between an {LLM}{\\textquoteright}s internal prior and external evidence},\nauthor={Kevin Wu and Eric Wu and James Zou},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=WGoCZl2itU}\n}", "github": "", "reviewers": "SVmZ;ydZz;R2pN;NYDi", "pdf_size": 2156707, "rating": "5;6;7;7", "confidence": "3;4;4;5", "wc_summary_and_contributions": "120;30;82;72", "wc_strengths": "80;86;75;46", "wc_improvement": "233;26;3;178", "wc_limitations": "25;79;103;8", "wc_correctness": "34;22;1;18", "wc_clarity": "9;6;1;1", "wc_relation_to_prior_work": "38;67;1;1", "wc_documentation": "27;41;1;16", "wc_additional_feedback": "1;1;1;1", "wc_review": "567;358;268;341", "wc_reply_reviewers": "0;10;0;8", "wc_reply_authors": "69;12;0;0", "reply_reviewers": "0;1;0;1", "reply_authors": "3;2;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 76.0, 32.03123475609393 ], "wc_strengths_avg": [ 71.75, 15.368392889303683 ], "wc_improvement_avg": [ 110.0, 97.79826174324367 ], "wc_limitations_avg": [ 53.75, 38.674119253061214 ], "wc_correctness_avg": [ 18.75, 11.818946653572814 ], "wc_clarity_avg": [ 4.25, 3.418698582794336 ], "wc_relation_to_prior_work_avg": [ 26.75, 27.716195626384224 ], "wc_documentation_avg": [ 21.25, 14.669270602180601 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 383.5, 111.20813819141115 ], "wc_reply_reviewers_avg": [ 4.5, 4.55521678957215 ], "wc_reply_authors_avg": [ 20.25, 28.568995432111365 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8528028654224417, "gs_citation": 37, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8002199800019098684&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "stanford.edu;stanford.edu;stanford.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Large Scale Transfer Learning for Tabular Data via Language Modeling", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94842", "id": "WH5blx5tZ1", "proceeding": "", "pdf": "https://openreview.net/pdf?id=WH5blx5tZ1", "openreview": "https://openreview.net/forum?id=WH5blx5tZ1", "poster": "", "project": "", "author_site": "Josh Gardner, Juan Perdomo, Ludwig Schmidt", "tldr": "", "abstract": "Tabular data \u2013 structured, heterogeneous, spreadsheet-style data with rows and columns \u2013 is widely used in practice across many domains. However, while recent foundation models have reduced the need for developing task-specific datasets and predictors in domains such as language modeling and computer vision, this transfer learning paradigm has not had similar impact in the tabular domain. In this work, we seek to narrow this gap and present TABULA-8B, a language model for tabular prediction. We define a process for extracting a large, high-quality training dataset from the TabLib corpus, proposing methods for tabular data filtering and quality control. Using the resulting dataset, which comprises over 2.1B rows from 4.2M unique tables, we fine-tune a Llama 3-8B large language model (LLM) for tabular data prediction (classification and binned regression) using a novel packing and attention scheme for tabular prediction. Through evaluation across a test suite of 329 datasets, we find that TABULA-8B has zero-shot accuracy on unseen tables that is over 15 percentage points (pp) higher than random guessing, a feat that is not possible with existing state-of-the-art tabular prediction models (e.g. XGBoost, TabPFN). In the few-shot setting (1-32 shots), without any fine-tuning on the target datasets, TABULA-8B is 5-15 pp more accurate than XGBoost and TabPFN models that are explicitly trained on equal, or even up to 16\u00d7 more data. We release our model, code, and data along with the publication of this paper.", "keywords": "tabular;foundation model;language model", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "/attachment/1cac3c6717d3a5bc78cc0facf38ec5efd240ceee.zip", "author": "Joshua P Gardner;Juan Carlos Perdomo;Ludwig Schmidt", "authorids": "~Joshua_P_Gardner1;~Juan_Carlos_Perdomo1;~Ludwig_Schmidt1", "gender": ";M;M", "homepage": ";https://jcperdomo.org/;http://people.csail.mit.edu/ludwigs/", "dblp": ";242/7773.html;141/2720", "google_scholar": ";TeBmXz4AAAAJ;SWMKy70AAAAJ", "orcid": ";;", "linkedin": ";;ludwig-schmidt-87ba3612/", "or_profile": "~Joshua_P_Gardner1;~Juan_Carlos_Perdomo1;~Ludwig_Schmidt1", "aff": ";Harvard University;University of Washington", "aff_domain": ";harvard.edu;washington.edu", "position": ";Postdoc;Assistant Professor", "bibtex": "@inproceedings{\ngardner2024large,\ntitle={Large Scale Transfer Learning for Tabular Data via Language Modeling},\nauthor={Joshua P Gardner and Juan Carlos Perdomo and Ludwig Schmidt},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=WH5blx5tZ1}\n}", "github": "", "reviewers": "mEH8;7NU9;3ko2;DUWr", "pdf_size": 1045810, "rating": "5;6;6;7", "confidence": "4;5;4;5", "soundness": "3;3;3;4", "novelty": "3;3;3;3", "presentation": "3;3;3;4", "wc_summary": "88;157;29;63", "wc_strengths": "66;140;52;25", "wc_weaknesses": "256;253;92;207", "wc_questions": "4;139;152;6", "wc_limitations": "6;13;7;1", "wc_review": "420;702;332;302", "wc_reply_reviewers": "466;51;0;10", "wc_reply_authors": "3460;1816;1778;819", "reply_reviewers": "2;1;0;1", "reply_authors": "6;4;4;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 84.25, 46.932797700542 ], "wc_strengths_avg": [ 70.75, 42.61088475964797 ], "wc_weaknesses_avg": [ 202.0, 66.41159537309731 ], "wc_questions_avg": [ 75.25, 70.4037463491823 ], "wc_limitations_avg": [ 6.75, 4.264680527307995 ], "wc_review_avg": [ 439.0, 157.9145338466349 ], "wc_reply_reviewers_avg": [ 131.75, 193.9231484377252 ], "wc_reply_authors_avg": [ 1968.25, 949.403595685207 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 3.75, 1.7853571071357126 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.7071067811865476, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8792232664090252656&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": ";harvard.edu;washington.edu", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Harvard University;University of Washington", "aff_unique_dep": ";", "aff_unique_url": "https://www.harvard.edu;https://www.washington.edu", "aff_unique_abbr": "Harvard;UW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Provable and Efficient Dataset Distillation for Kernel Ridge Regression", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94841", "id": "WI2VpcBdnd", "proceeding": "", "pdf": "https://openreview.net/pdf?id=WI2VpcBdnd", "openreview": "https://openreview.net/forum?id=WI2VpcBdnd", "poster": "", "project": "", "author_site": "Yilan Chen, Wei Huang, Lily Weng", "tldr": "", "abstract": "Deep learning models are now trained on increasingly larger datasets, making it crucial to reduce computational costs and improve data quality. Dataset distillation aims to distill a large dataset into a small synthesized dataset such that models trained on it can achieve similar performance to those trained on the original dataset. While there have been many empirical efforts to improve dataset distillation algorithms, a thorough theoretical analysis and provable, efficient algorithms are still lacking. In this paper, by focusing on dataset distillation for kernel ridge regression (KRR), we show that one data point per class is already necessary and sufficient to recover the original model's performance in many settings. For linear ridge regression and KRR with surjective feature mappings, we provide necessary and sufficient conditions for the distilled dataset to recover the original model's parameters. For KRR with injective feature mappings of deep neural networks, we show that while one data point per class is not sufficient in general, $k+1$ data points can be sufficient for deep linear neural networks, where $k$ is the number of classes. Our theoretical results enable directly constructing analytical solutions for distilled datasets, resulting in a provable and efficient dataset distillation algorithm for KRR. We verify our theory experimentally and show that our algorithm outperforms previous work such as KIP while being significantly more efficient, e.g. 15840$\\times$ faster on CIFAR-100. Our code is available at \\href{https://github.com/Trustworthy-ML-Lab/provable-efficient-dataset-distill-KRR}{GitHub}.", "keywords": "Dataset Distillation;Kernel Ridge Regression", "primary_area": "other", "supplementary_material": "", "author": "Yilan Chen;Wei Huang;Tsui-Wei Weng", "authorids": "~Yilan_Chen1;~Wei_Huang6;~Tsui-Wei_Weng1", "gender": "M;M;F", "homepage": "https://yilanchen6.github.io/;https://weihuang05.github.io/;https://lilywenglab.github.io", "dblp": "167/6638-2.html;81/6685-34;177/9197", "google_scholar": "6wmzpRIAAAAJ;RZfDh4MAAAAJ;v8GM4xoAAAAJ", "orcid": ";0000-0001-5674-7021;", "linkedin": ";;", "or_profile": "~Yilan_Chen1;~Wei_Huang6;~Tsui-Wei_Weng1", "aff": "University of California, San Diego;RIKEN AIP;University of California, San Diego", "aff_domain": "ucsd.edu;riken.jp;ucsd.edu", "position": "PhD student;Research Scientist;Assistant Professor", "bibtex": "@inproceedings{\nchen2024provable,\ntitle={Provable and Efficient Dataset Distillation for Kernel Ridge Regression},\nauthor={Yilan Chen and Wei Huang and Tsui-Wei Weng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=WI2VpcBdnd}\n}", "github": "", "reviewers": "nkTx;ish1;RvWD;7eAw", "pdf_size": 1368206, "rating": "3;4;7;7", "confidence": "4;4;4;4", "soundness": "3;2;2;3", "novelty": "2;1;4;3", "presentation": "2;3;2;3", "wc_summary": "297;86;279;52", "wc_strengths": "73;93;55;84", "wc_weaknesses": "127;511;298;139", "wc_questions": "119;34;9;136", "wc_limitations": "6;15;27;15", "wc_review": "622;739;668;426", "wc_reply_reviewers": "224;281;175;14", "wc_reply_authors": "674;880;72;28", "reply_reviewers": "1;2;1;1", "reply_authors": "4;3;2;2", "rating_avg": [ 5.25, 1.7853571071357126 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 1.118033988749895 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 178.5, 110.3415153058902 ], "wc_strengths_avg": [ 76.25, 14.16642156650719 ], "wc_weaknesses_avg": [ 268.75, 155.29709430636493 ], "wc_questions_avg": [ 74.5, 54.06708795561307 ], "wc_limitations_avg": [ 15.75, 7.46240577829965 ], "wc_review_avg": [ 613.75, 116.13435107667325 ], "wc_reply_reviewers_avg": [ 173.5, 99.4346519076725 ], "wc_reply_authors_avg": [ 413.5, 371.05087252289275 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7287074809660321247&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "ucsd.edu;riken.jp;ucsd.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of California, San Diego;RIKEN", "aff_unique_dep": ";Advanced Institute for Computational Science", "aff_unique_url": "https://www.ucsd.edu;https://www.aip.riken.jp", "aff_unique_abbr": "UCSD;RIKEN AIP", "aff_campus_unique_index": "0;0", "aff_campus_unique": "San Diego;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;Japan" }, { "title": "Interpretable Concept-Based Memory Reasoning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94840", "id": "WILLwyVmP8", "proceeding": "", "pdf": "https://openreview.net/pdf?id=WILLwyVmP8", "openreview": "https://openreview.net/forum?id=WILLwyVmP8", "poster": "/media/PosterPDFs/NeurIPS%202024/94840.png?t=1733212235.9280515", "project": "", "author_site": "David Debot, Pietro Barbiero, Francesco Giannini, Gabriele Ciravegna, Michelangelo Diligenti, Giuseppe Marra", "tldr": "", "abstract": "The lack of transparency in the decision-making processes of deep learning systems presents a significant challenge in modern artificial intelligence (AI), as it impairs users\u2019 ability to rely on and verify these systems. To address this challenge, Concept Bottleneck Models (CBMs) have made significant progress by incorporating human-interpretable concepts into deep learning architectures. This approach allows predictions to be traced back to specific concept patterns that users can understand and potentially intervene on. However, existing CBMs\u2019 task predictors are not fully interpretable, preventing a thorough analysis and any form of formal verification of their decision-making process prior to deployment, thereby raising significant reliability concerns. To bridge this gap, we introduce Concept-based Memory Reasoner (CMR), a novel CBM designed to provide a human-understandable and provably-verifiable task prediction process. Our approach is to model each task prediction as a neural selection mechanism over a memory of learnable logic rules, followed by a symbolic evaluation of the selected rule. The presence of an explicit memory and the symbolic evaluation allow domain experts to inspect and formally verify the validity of certain global properties of interest for the task prediction process. Experimental results demonstrate that CMR achieves better accuracy-interpretability trade-offs to state-of-the-art CBMs, discovers logic rules consistent with ground truths, allows for rule interventions, and allows pre-deployment verification.", "keywords": "Concept-based models;explainable AI;neurosymbolic", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "David Debot;Pietro Barbiero;Francesco Giannini;Gabriele Ciravegna;Michelangelo Diligenti;Giuseppe Marra", "authorids": "~David_Debot1;~Pietro_Barbiero1;~Francesco_Giannini1;~Gabriele_Ciravegna1;~Michelangelo_Diligenti2;~Giuseppe_Marra1", "gender": "M;M;M;M;M;Not Specified", "homepage": ";http://www.pietrobarbiero.eu/;https://www.francescogiannini.eu/;https://dbdmg.polito.it/dbdmg_web/gabriele-ciravegna/;https://www.giuseppemarra.com;", "dblp": ";238/7860;198/0854;228/1667;150/7452;11/5691", "google_scholar": ";https://scholar.google.it/citations?user=4gbToQoAAAAJ;RO8aInMAAAAJ;https://scholar.google.it/citations?view_op=list_works;https://scholar.google.it/citations?user=BBcsk7MAAAAJ;https://scholar.google.it/citations?user=qI-LOjIAAAAJ", "orcid": "0009-0006-4782-3233;0000-0003-3155-2564;0000-0001-8492-8110;0000-0002-6799-1043;;", "linkedin": ";;https://www.linkedin.com/search/results/all/?fetchDeterministicClustersOnly=true&heroEntityKey=urn%3Ali%3Afsd_profile%3AACoAAEZY56YBnC1EDCTXy7QNDbkYThgd6vpD6i8&keywords=francesco%20giannini&origin=RICH_QUERY_SUGGESTION&position=0&searchId=95dc79fd-e2ea-4d21-b3dc-7ad787ee929e&sid=JQw&spellCorrectionEnabled=false;gabriele-ciravegna-5a1130124/;;", "or_profile": "~David_Debot1;~Pietro_Barbiero1;~Francesco_Giannini1;~Gabriele_Ciravegna1;~Giuseppe_Marra1;~Michelangelo_Diligenti1", "aff": "KU Leuven;Universita della Svizzera Italiana;CINI;Polytechnic Institute of Turin;KU Leuven;Google Inc.", "aff_domain": "kuleuven.be;usi.ch;consorzio-cini.it;polito.it;kuleuven.be;google.com", "position": "PhD student;Postdoc;Researcher;Postdoc;Assistant Professor;Researcher", "bibtex": "@inproceedings{\ndebot2024interpretable,\ntitle={Interpretable Concept-Based Memory Reasoning},\nauthor={David Debot and Pietro Barbiero and Francesco Giannini and Gabriele Ciravegna and Michelangelo Diligenti and Giuseppe Marra},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=WILLwyVmP8}\n}", "github": "", "reviewers": "TwPt;VcGf;jRYi;ERBX", "pdf_size": 1235434, "rating": "3;6;6;6", "confidence": "4;4;3;3", "soundness": "1;3;3;3", "novelty": "2;3;3;2", "presentation": "2;3;3;2", "wc_summary": "136;82;68;63", "wc_strengths": "9;32;102;27", "wc_weaknesses": "13;12;148;293", "wc_questions": "82;81;40;34", "wc_limitations": "25;62;32;27", "wc_review": "265;269;390;444", "wc_reply_reviewers": "0;42;66;102", "wc_reply_authors": "32;32;189;32", "reply_reviewers": "0;1;2;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 87.25, 28.994611568358696 ], "wc_strengths_avg": [ 42.5, 35.40127116361784 ], "wc_weaknesses_avg": [ 116.5, 115.94934238709592 ], "wc_questions_avg": [ 59.25, 22.353690970396812 ], "wc_limitations_avg": [ 36.5, 14.941552797483935 ], "wc_review_avg": [ 342.0, 77.4047802141444 ], "wc_reply_reviewers_avg": [ 52.5, 37.077621282924824 ], "wc_reply_authors_avg": [ 71.25, 67.98299419707844 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15257611383653574336&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "kuleuven.be;usi.ch;consorzio-cini.it;polito.it;kuleuven.be;google.com", "author_num": 6, "aff_unique_index": "0;1;2;3;0;4", "aff_unique_norm": "Katholieke Universiteit Leuven;Universita della Svizzera Italiana;Consorzio Interuniversitario Nazionale per l'Informatica;Polytechnic Institute of Turin;Google", "aff_unique_dep": ";;;;Google", "aff_unique_url": "https://www.kuleuven.be;https://www.usi.ch;https://www.cini.it;https://www.polito.it;https://www.google.com", "aff_unique_abbr": "KU Leuven;USI;CINI;Polito;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;2;2;0;3", "aff_country_unique": "Belgium;Switzerland;Italy;United States" }, { "title": "Do LLMs dream of elephants (when told not to)? Latent concept association and associative memory in transformers", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94839", "id": "WJ04ZX8txM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=WJ04ZX8txM", "openreview": "https://openreview.net/forum?id=WJ04ZX8txM", "poster": "", "project": "", "author_site": "Yibo Jiang, Goutham Rajendran, Pradeep Ravikumar, Bryon Aragam", "tldr": "", "abstract": "Large Language Models (LLMs) have the capacity to store and recall facts. Through experimentation with open-source models, we observe that this ability to retrieve facts can be easily manipulated by changing contexts, even without altering their factual meanings. These findings highlight that LLMs might behave like an associative memory model where certain tokens in the contexts serve as clues to retrieving facts. We mathematically explore this property by studying how transformers, the building blocks of LLMs, can complete such memory tasks. We study a simple latent concept association problem with a one-layer transformer and we show theoretically and empirically that the transformer gathers information using self-attention and uses the value matrix for associative memory.", "keywords": "Transformer;Associative Memory;Large Language Models;Interpretability;Fact retrieval", "primary_area": "interpretability_and_explainability", "supplementary_material": "/attachment/c0dc9855b260df970be71c5144a517dd32e9f536.zip", "author": "Yibo Jiang;Goutham Rajendran;Pradeep Kumar Ravikumar;Bryon Aragam", "authorids": "~Yibo_Jiang2;~Goutham_Rajendran1;~Pradeep_Kumar_Ravikumar1;~Bryon_Aragam1", "gender": "M;M;M;", "homepage": ";https://gouthamrdn.github.io/;http://www.cs.cmu.edu/~pradeepr/;http://bryonaragam.com/", "dblp": "54/2193;274/1323;94/3594;140/7564", "google_scholar": "hvQo2gQAAAAJ;YVrGTe8AAAAJ;https://scholar.google.com.tw/citations?user=Q4DTPw4AAAAJ;u-W3_9QAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Yibo_Jiang2;~Goutham_Rajendran1;~Pradeep_Kumar_Ravikumar1;~Bryon_Aragam1", "aff": "University of Chicago;Carnegie Mellon University;Carnegie Mellon University;Booth School of Business", "aff_domain": "uchicago.edu;cmu.edu;cmu.edu;chicagobooth.edu", "position": "PhD student;Postdoc;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\njiang2024do,\ntitle={Do {LLM}s dream of elephants (when told not to)? Latent concept association and associative memory in transformers},\nauthor={Yibo Jiang and Goutham Rajendran and Pradeep Kumar Ravikumar and Bryon Aragam},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=WJ04ZX8txM}\n}", "github": "", "reviewers": "bm96;MLxQ;rSoN;vgcR", "pdf_size": 3703436, "rating": "5;6;6;7", "confidence": "3;3;4;3", "soundness": "3;3;2;3", "novelty": "2;3;2;4", "presentation": "3;3;2;3", "wc_summary": "67;74;57;128", "wc_strengths": "106;105;75;34", "wc_weaknesses": "198;128;179;230", "wc_questions": "106;1;166;46", "wc_limitations": "11;15;33;44", "wc_review": "488;323;510;482", "wc_reply_reviewers": "104;17;112;66", "wc_reply_authors": "300;27;139;26", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 81.5, 27.518175811634027 ], "wc_strengths_avg": [ 80.0, 29.33428028774526 ], "wc_weaknesses_avg": [ 183.75, 36.98901864067226 ], "wc_questions_avg": [ 79.75, 62.18671481916375 ], "wc_limitations_avg": [ 25.75, 13.40475661845451 ], "wc_review_avg": [ 450.75, 74.4895126846726 ], "wc_reply_reviewers_avg": [ 74.75, 37.599035891894886 ], "wc_reply_authors_avg": [ 123.0, 112.03794000248308 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1627599467815439171&as_sdt=8000005&sciodt=0,19&hl=en", "gs_version_total": 5, "email": "uchicago.edu;cmu.edu;cmu.edu;chicagobooth.edu", "author_num": 4, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "University of Chicago;Carnegie Mellon University;University of Chicago Booth School of Business", "aff_unique_dep": ";;Booth School of Business", "aff_unique_url": "https://www.uchicago.edu;https://www.cmu.edu;https://www.chicagobooth.edu", "aff_unique_abbr": "UChicago;CMU;Booth", "aff_campus_unique_index": "1", "aff_campus_unique": ";Chicago", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "A Functional Extension of Semi-Structured Networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94838", "id": "WJAiaslhin", "proceeding": "", "pdf": "https://openreview.net/pdf?id=WJAiaslhin", "openreview": "https://openreview.net/forum?id=WJAiaslhin", "poster": "/media/PosterPDFs/NeurIPS%202024/94838.png?t=1729958682.706714", "project": "", "author_site": "David R\u00fcgamer, Bernard Liew, Zainab Altai, Almond St\u00f6cker", "tldr": "", "abstract": "Semi-structured networks (SSNs) merge the structures familiar from additive models with deep neural networks, allowing the modeling of interpretable partial feature effects while capturing higher-order non-linearities at the same time. A significant challenge in this integration is maintaining the interpretability of the additive model component. Inspired by large-scale biomechanics datasets, this paper explores extending SSNs to functional data. Existing methods in functional data analysis are promising but often not expressive enough to account for all interactions and non-linearities and do not scale well to large datasets. Although the SSN approach presents a compelling potential solution, its adaptation to functional data remains complex. In this work, we propose a functional SSN method that retains the advantageous properties of classical functional regression approaches while also improving scalability. Our numerical experiments demonstrate that this approach accurately recovers underlying signals, enhances predictive performance, and performs favorably compared to competing methods.", "keywords": "Functional Data;Neural Networks;Biomechanics", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "David R\u00fcgamer;Bernard X.W. Liew;Zainab Altai;Almond St\u00f6cker", "authorids": "~David_R\u00fcgamer1;~Bernard_X.W._Liew1;~Zainab_Altai1;~Almond_St\u00f6cker1", "gender": "M;M;F;", "homepage": "https://davidruegamer.github.io/;https://www.essex.ac.uk/people/LIEWB27501/Bernard-Liew;;", "dblp": "220/5560;;;", "google_scholar": "https://scholar.google.de/citations?user=_DYguksAAAAJ;https://scholar.google.co.uk/citations?user=-DIpq4wAAAAJ;VaL--SsAAAAJ;9rwG0MoAAAAJ", "orcid": ";;;0000-0001-9160-2397", "linkedin": ";;;", "or_profile": "~David_R\u00fcgamer1;~Bernard_X.W._Liew1;~Zainab_Altai1;~Almond_St\u00f6cker1", "aff": "LMU Munich;University of Essex;University of Essex;EPFL - EPF Lausanne", "aff_domain": "lmu.de;essex.ac.uk;essex.ac.uk;epfl.ch", "position": "Associate Professor;Lecturer;Principal Researcher;Postdoc", "bibtex": "@inproceedings{\nr{\\\"u}gamer2024a,\ntitle={A Functional Extension of Semi-Structured Networks},\nauthor={David R{\\\"u}gamer and Bernard X.W. Liew and Zainab Altai and Almond St{\\\"o}cker},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=WJAiaslhin}\n}", "github": "", "reviewers": "DyRG;Ta2r;9i2N;SfMu", "pdf_size": 2874178, "rating": "5;6;6;6", "confidence": "2;3;3;2", "soundness": "3;3;3;3", "novelty": "2;3;2;1", "presentation": "2;4;3;3", "wc_summary": "30;22;76;51", "wc_strengths": "40;36;55;260", "wc_weaknesses": "202;50;104;299", "wc_questions": "13;84;148;296", "wc_limitations": "2;17;7;6", "wc_review": "287;209;390;912", "wc_reply_reviewers": "45;61;0;154", "wc_reply_authors": "726;167;0;104", "reply_reviewers": "2;1;0;1", "reply_authors": "4;2;1;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 2.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 44.75, 20.92098229051399 ], "wc_strengths_avg": [ 97.75, 93.94246909678284 ], "wc_weaknesses_avg": [ 163.75, 95.21652955238392 ], "wc_questions_avg": [ 135.25, 104.3728293187456 ], "wc_limitations_avg": [ 8.0, 5.522680508593631 ], "wc_review_avg": [ 449.5, 274.63293684480016 ], "wc_reply_reviewers_avg": [ 65.0, 56.040164168210644 ], "wc_reply_authors_avg": [ 249.25, 281.63751081842776 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14435941468310370513&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "lmu.de;essex.ac.uk;essex.ac.uk;epfl.ch", "author_num": 4, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Ludwig Maximilian University of Munich;University of Essex;EPFL", "aff_unique_dep": ";;", "aff_unique_url": "https://www.lmu.de;https://www.essex.ac.uk;https://www.epfl.ch", "aff_unique_abbr": "LMU;Essex;EPFL", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Munich;;Lausanne", "aff_country_unique_index": "0;1;1;2", "aff_country_unique": "Germany;United Kingdom;Switzerland" }, { "title": "Exploiting Representation Curvature for Boundary Detection in Time Series", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94837", "id": "WK2KxPAMQv", "proceeding": "", "pdf": "https://openreview.net/pdf?id=WK2KxPAMQv", "openreview": "https://openreview.net/forum?id=WK2KxPAMQv", "poster": "/media/PosterPDFs/NeurIPS%202024/94837.png?t=1729425716.231127", "project": "", "author_site": "Yooju Shin, Jaehyun Park, Susik Yoon, Hwanjun Song, Byung Suk Lee, Jae-Gil Lee", "tldr": "", "abstract": "*Boundaries* are the timestamps at which a class in a time series changes. Recently, representation-based boundary detection has gained popularity, but its emphasis on consecutive distance difference backfires, especially when the changes are gradual. In this paper, we propose a boundary detection method, **RECURVE**, based on a novel change metric, the ***curvature*** of a representation trajectory, to accommodate both gradual and abrupt changes. Here, a sequence of representations in the representation space is interpreted as a trajectory, and a curvature at each timestamp can be computed. Using the theory of random walk, we formally show that the mean curvature is lower near boundaries than at other points. Extensive experiments using diverse real-world time-series datasets confirm the superiority of RECURVE over state-of-the-art methods.", "keywords": "time series;representation;boundary detection", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Yooju Shin;Jaehyun Park;Susik Yoon;Hwanjun Song;Byung Suk Lee;Jae-Gil Lee", "authorids": "~Yooju_Shin1;~Jaehyun_Park4;~Susik_Yoon1;~Hwanjun_Song2;~Byung_Suk_Lee1;~Jae-Gil_Lee1", "gender": "M;;;M;;M", "homepage": ";;http://www.susikyoon.com;https://songhwanjun.github.io/;;https://dm.kaist.ac.kr/jaegil/", "dblp": "https://dblp.uni-trier.de/pid/242/5190;;179/5307;204/3381;;28/3904", "google_scholar": "https://scholar.google.com/citations?hl=en;;tCJs1zEAAAAJ;Ijzuc-8AAAAJ;;https://scholar.google.com.tw/citations?user=h9mbv9MAAAAJ", "orcid": "0000-0002-1395-9136;;0000-0001-5596-4972;0000-0002-1105-0818;;0000-0002-8711-7732", "linkedin": ";;;;;", "or_profile": "~Yooju_Shin1;~Jaehyun_Park4;~Susik_Yoon1;~Hwanjun_Song2;~Byung_Suk_Lee1;~Jae-Gil_Lee1", "aff": "Korea Advanced Institute of Science & Technology;;Korea University;Amazon Web Services;;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;;korea.ac.kr;amazon.com;;kaist.ac.kr", "position": "PhD student;;Assistant Professor;Research Scientist;;Full Professor", "bibtex": "@inproceedings{\nshin2024exploiting,\ntitle={Exploiting Representation Curvature for Boundary Detection in Time Series},\nauthor={Yooju Shin and Jaehyun Park and Susik Yoon and Hwanjun Song and Byung Suk Lee and Jae-Gil Lee},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=WK2KxPAMQv}\n}", "github": "", "reviewers": "PiYP;vKgw;aPkZ;ZR9k;EoQb", "pdf_size": 913647, "rating": "5;6;7;7;7", "confidence": "4;4;4;1;4", "soundness": "3;3;3;3;3", "novelty": "2;3;3;3;3", "presentation": "4;3;3;3;3", "wc_summary": "117;32;53;38;52", "wc_strengths": "91;31;26;21;116", "wc_weaknesses": "332;133;62;25;374", "wc_questions": "153;84;57;24;15", "wc_limitations": "22;8;1;1;1", "wc_review": "715;288;199;109;558", "wc_reply_reviewers": "83;19;8;10;19", "wc_reply_authors": "39;31;9;18;29", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 6.4, 0.7999999999999999 ], "confidence_avg": [ 3.4, 1.2 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 58.4, 30.388155587333692 ], "wc_strengths_avg": [ 57.0, 38.91015291668744 ], "wc_weaknesses_avg": [ 185.2, 141.9597125948063 ], "wc_questions_avg": [ 66.6, 49.67333288596609 ], "wc_limitations_avg": [ 6.6, 8.16333265278342 ], "wc_review_avg": [ 373.8, 227.32478967327782 ], "wc_reply_reviewers_avg": [ 27.8, 27.96712355606132 ], "wc_reply_authors_avg": [ 25.2, 10.514751542475933 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.375, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14709896880630679926&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "kaist.ac.kr;;korea.ac.kr;amazon.com;;kaist.ac.kr", "author_num": 6, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Korea University;Amazon", "aff_unique_dep": ";;Amazon Web Services", "aff_unique_url": "https://www.kaist.ac.kr;https://www.korea.ac.kr;https://aws.amazon.com", "aff_unique_abbr": "KAIST;KU;AWS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "South Korea;United States" }, { "title": "LLaMo: Large Language Model-based Molecular Graph Assistant", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94836", "id": "WKTNdU155n", "proceeding": "", "pdf": "https://openreview.net/pdf?id=WKTNdU155n", "openreview": "https://openreview.net/forum?id=WKTNdU155n", "poster": "", "project": "", "author_site": "Jinyoung Park, Minseong Bae, Dohwan Ko, Hyunwoo Kim", "tldr": "", "abstract": "Large Language Models (LLMs) have demonstrated remarkable generalization and instruction-following capabilities with instruction tuning. The advancements in LLMs and instruction tuning have led to the development of Large Vision-Language Models (LVLMs). However, the competency of the LLMs and instruction tuning have been less explored in the molecular domain. Thus, we propose LLaMo: Large Language Model-based Molecular graph assistant, which is an end-to- end trained large molecular graph-language model. To bridge the discrepancy between the language and graph modalities, we present the multi-level graph projector that transforms graph representations into graph tokens by abstracting the output representations of each GNN layer and motif representations with the cross-attention mechanism. We also introduce machine-generated molecular graph instruction data to instruction-tune the large molecular graph-language model for general-purpose molecule and language understanding. Our extensive experiments demonstrate that LLaMo shows the best performance on diverse tasks, such as molecular description generation, property prediction, and IUPAC name prediction. The code of LLaMo is available at https://github.com/mlvlab/LLaMo.", "keywords": "Large Molecular Graph-Language Model;Molecular Graph-Language Instruction tuning;Graph Neural Networks", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Jinyoung Park;Minseong Bae;Dohwan Ko;Hyunwoo J. Kim", "authorids": "~Jinyoung_Park1;~Minseong_Bae1;~Dohwan_Ko1;~Hyunwoo_J._Kim3", "gender": "M;M;M;M", "homepage": ";;https://ikodoh.github.io;https://hyunwoojkim.com/publications", "dblp": "03/1524;;305/6692;150/4259", "google_scholar": "zThEyOYAAAAJ;;JoYPLHUAAAAJ;https://scholar.google.co.kr/citations?user=LfBoJt8AAAAJ", "orcid": ";;;0000-0002-2181-9264", "linkedin": "jinyoung-park-4861461a3/;minseong-bae-573745276/;dohwan-ko-4b232a14b/;", "or_profile": "~Jinyoung_Park1;~Minseong_Bae1;~Dohwan_Ko1;~Hyunwoo_Kim1", "aff": "Korea University;Korea University;NEC;Korea University", "aff_domain": "korea.ac.kr;korea.ac.kr;nec-labs.com;korea.ac.kr", "position": "PhD student;Undergrad student;Intern;Associate Professor", "bibtex": "@inproceedings{\npark2024llamo,\ntitle={{LL}aMo: Large Language Model-based Molecular Graph Assistant},\nauthor={Jinyoung Park and Minseong Bae and Dohwan Ko and Hyunwoo J. Kim},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=WKTNdU155n}\n}", "github": "", "reviewers": "3eLt;W78D;rvcw;P4Gm", "pdf_size": 1479460, "rating": "6;7;7;7", "confidence": "3;4;3;5", "soundness": "3;3;2;4", "novelty": "2;3;2;3", "presentation": "3;4;3;3", "wc_summary": "73;51;53;54", "wc_strengths": "69;84;48;65", "wc_weaknesses": "69;98;111;78", "wc_questions": "37;10;55;41", "wc_limitations": "39;16;27;8", "wc_review": "287;259;294;246", "wc_reply_reviewers": "20;14;18;17", "wc_reply_authors": "28;28;33;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 57.75, 8.870597499605086 ], "wc_strengths_avg": [ 66.5, 12.816005617976296 ], "wc_weaknesses_avg": [ 89.0, 16.47725705328408 ], "wc_questions_avg": [ 35.75, 16.29992331270304 ], "wc_limitations_avg": [ 22.5, 11.672617529928752 ], "wc_review_avg": [ 271.5, 19.704060495238032 ], "wc_reply_reviewers_avg": [ 17.25, 2.165063509461097 ], "wc_reply_authors_avg": [ 22.25, 13.007209539328564 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10663358223081845596&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "korea.ac.kr;korea.ac.kr;nec-labs.com;korea.ac.kr", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Korea University;NEC Corporation", "aff_unique_dep": ";", "aff_unique_url": "https://www.korea.ac.kr;https://www.nec.com", "aff_unique_abbr": "KU;NEC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "South Korea;Japan" }, { "title": "BiVLC: Extending Vision-Language Compositionality Evaluation with Text-to-Image Retrieval", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97657", "id": "WMzQIP70O0", "proceeding": "", "pdf": "https://openreview.net/pdf?id=WMzQIP70O0", "openreview": "https://openreview.net/forum?id=WMzQIP70O0", "poster": "/media/PosterPDFs/NeurIPS%202024/97657.png?t=1731685303.5957475", "project": "", "author_site": "Imanol Miranda, Ander Salaberria, Eneko Agirre, Gorka Azkune", "tldr": "", "abstract": "Existing Vision-Language Compositionality (VLC) benchmarks like SugarCrepe are formulated as image-to-text retrieval problems, where, given an image, the models need to select between the correct textual description and a synthetic hard negative text. In this work, we present the Bidirectional Vision-Language Compositionality (BiVLC) dataset. The novelty of BiVLC is to add a synthetic hard negative image generated from the synthetic text, resulting in two image-to-text retrieval examples (one for each image) and, more importantly, two text-to-image retrieval examples (one for each text). Human annotators filter out ill-formed examples ensuring the validity of the benchmark. The experiments on BiVLC uncover a weakness of current multimodal models, as they perform poorly in the text-to-image direction. In fact, when considering both retrieval directions, the conclusions obtained in previous works change significantly. In addition to the benchmark, we\nshow that a contrastive model trained using synthetic images and texts significantly improves over the base model in SugarCrepe and in BiVLC for both retrieval directions. The gap to human performance in BiVLC confirms that Vision-Language Compositionality is still a challenging problem.", "keywords": "Vision-Language;Compositionality;Evaluation;Benchmark", "primary_area": "", "supplementary_material": "/attachment/f04d19b6561ff8d7afffd493b1c6a73910a8c245.pdf", "author": "Imanol Miranda;Ander Salaberria;Eneko Agirre;Gorka Azkune", "authorids": "~Imanol_Miranda1;~Ander_Salaberria1;~Eneko_Agirre1;~Gorka_Azkune1", "gender": "M;M;M;", "homepage": ";;http://ixa.si.ehu.eus/eneko;https://gazkune.github.io/", "dblp": "380/0286;262/3436;a/EnekoAgirre;14/10011", "google_scholar": "https://scholar.google.es/citations?user=C_tJ-WkAAAAJ;KyQMAxYAAAAJ;https://scholar.google.es/citations?user=kSuqts0AAAAJ;_1wx6NoAAAAJ", "orcid": "0009-0009-5500-4838;0000-0002-4277-3939;;0000-0002-2506-7426", "linkedin": "imanol-miranda/;;;gorka-azkune-80780035/?originalSubdomain=es", "or_profile": "~Imanol_Miranda1;~Ander_Salaberria1;~Eneko_Agirre1;~Gorka_Azkune1", "aff": "University of the Basque Country (UPV/EHU);Universidad del Pa\u00eds Vasco;University of the Basque Country (UPV/EHU);Universidad del Pa\u00eds Vasco", "aff_domain": "ehu.eus;ehu.eus;ehu.eus;ehu.es", "position": "PhD student;PhD student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nmiranda2024bivlc,\ntitle={Bi{VLC}: Extending Vision-Language Compositionality Evaluation with Text-to-Image Retrieval},\nauthor={Imanol Miranda and Ander Salaberria and Eneko Agirre and Gorka Azkune},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=WMzQIP70O0}\n}", "github": "", "reviewers": "oFAf;6Z5K;k2kk", "pdf_size": 2302447, "rating": "5;7;8", "confidence": "3;5;4", "wc_summary_and_contributions": "31;145;99", "wc_strengths": "5;120;45", "wc_improvement": "5;638;56", "wc_limitations": "5;57;1", "wc_correctness": "2;34;1", "wc_clarity": "62;9;1", "wc_relation_to_prior_work": "9;9;1", "wc_documentation": "24;15;1", "wc_additional_feedback": "1;1;1", "wc_review": "144;1028;206", "wc_reply_reviewers": "0;52;0", "wc_reply_authors": "404;2117;294", "reply_reviewers": "0;1;0", "reply_authors": "1;4;1", "rating_avg": [ 6.666666666666667, 1.247219128924647 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "wc_summary_and_contributions_avg": [ 91.66666666666667, 46.828291543562514 ], "wc_strengths_avg": [ 56.666666666666664, 47.66783215358364 ], "wc_improvement_avg": [ 233.0, 287.13411500551445 ], "wc_limitations_avg": [ 21.0, 25.508168626278653 ], "wc_correctness_avg": [ 12.333333333333334, 15.326085243430198 ], "wc_clarity_avg": [ 24.0, 27.067816067549053 ], "wc_relation_to_prior_work_avg": [ 6.333333333333333, 3.7712361663282534 ], "wc_documentation_avg": [ 13.333333333333334, 9.463379711052259 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 459.3333333333333, 402.90390362412177 ], "wc_reply_reviewers_avg": [ 17.333333333333332, 24.513035081133648 ], "wc_reply_authors_avg": [ 938.3333333333334, 834.6521564233942 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 1.4142135623730951 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.6546536707079772, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5966429543979659461&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "ehu.eus;ehu.eus;ehu.eus;ehu.es", "author_num": 4, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "University of the Basque Country;Universidad del Pa\u00eds Vasco", "aff_unique_dep": ";", "aff_unique_url": "https://www.ehu.eus/en;https://www.ehu.eus/en", "aff_unique_abbr": "UPV/EHU;UPV/EHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Spain" }, { "title": "Dual-frame Fluid Motion Estimation with Test-time Optimization and Zero-divergence Loss", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94835", "id": "WOBhJs9gqU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=WOBhJs9gqU", "openreview": "https://openreview.net/forum?id=WOBhJs9gqU", "poster": "", "project": "", "author_site": "Yifei Zhang, Huan-ang Gao, zhou jiang, Hao Zhao", "tldr": "", "abstract": "3D particle tracking velocimetry (PTV) is a key technique for analyzing turbulent flow, one of the most challenging computational problems of our century. At the core of 3D PTV is the dual-frame fluid motion estimation algorithm, which tracks particles across two consecutive frames. Recently, deep learning-based methods have achieved impressive accuracy in dual-frame fluid motion estimation; however, they heavily depend on large volumes of labeled data. In this paper, we introduce a new method that is **completely self-supervised and notably outperforms its fully-supervised counterparts while requiring only 1\\% of the training samples (without labels) used by previous methods.** Our method features a novel zero-divergence loss that is specific to the domain of turbulent flow. Inspired by the success of splat operation in high-dimensional filtering and random fields, we propose a splat-based implementation for this loss which is both efficient and effective. The self-supervised nature of our method naturally supports test-time optimization, leading to the development of a tailored Dynamic Velocimetry Enhancer (DVE) module. We demonstrate that strong cross-domain robustness is achieved through test-time optimization on unseen leave-one-out synthetic domains and real physical/biological domains. Code, data and models are available at [https://github.com/Forrest-110/FluidMotionNet](https://github.com/Forrest-110/FluidMotionNet).", "keywords": "Dual-frame Fluid Motion Estimation;Test-time Optimization;Self-supervision;Data Efficiency;Cross-domain Robustness", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "", "author": "Yifei Zhang;Huan-ang Gao;Zhou Jiang;Hao Zhao", "authorids": "~Yifei_Zhang14;~Huan-ang_Gao1;~Zhou_Jiang1;~Hao_Zhao1", "gender": "M;M;M;M", "homepage": ";https://c7w.tech/about;https://github.com/Jzian;https://sites.google.com/view/fromandto", "dblp": ";339/0975;;08/3737-2.html", "google_scholar": ";WvbKfLgAAAAJ;;ygQznUQAAAAJ", "orcid": "0009-0005-7212-0743;;;", "linkedin": ";;;", "or_profile": "~Yifei_Zhang14;~Huan-ang_Gao1;~Zhou_Jiang1;~Hao_Zhao1", "aff": "University of Chinese Academy of Sciences;Tsinghua University;Beijing Institute of Technology;Peking University", "aff_domain": "ucas.ac.cn;cs.tsinghua.edu.cn;bit.edu.cn;pku.edu.cn", "position": "Undergrad student;Undergrad student;MS student;Postdoc", "bibtex": "@inproceedings{\nzhang2024dualframe,\ntitle={Dual-frame Fluid Motion Estimation with Test-time Optimization and Zero-divergence Loss},\nauthor={Yifei Zhang and Huan-ang Gao and Zhou Jiang and Hao Zhao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=WOBhJs9gqU}\n}", "github": "", "reviewers": "N6ux;wM8x;jdpi", "pdf_size": 3737580, "rating": "5;6;6", "confidence": "4;3;4", "soundness": "2;3;3", "novelty": "2;3;3", "presentation": "2;3;3", "wc_summary": "145;63;74", "wc_strengths": "78;38;52", "wc_weaknesses": "1277;100;78", "wc_questions": "191;33;2", "wc_limitations": "261;1;9", "wc_review": "1952;235;215", "wc_reply_reviewers": "0;0;20", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;1", "reply_authors": "1;1;1", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 94.0, 36.34097778908359 ], "wc_strengths_avg": [ 56.0, 16.57307052620807 ], "wc_weaknesses_avg": [ 485.0, 560.1005862045376 ], "wc_questions_avg": [ 75.33333333333333, 82.76204175559926 ], "wc_limitations_avg": [ 90.33333333333333, 120.72374340709545 ], "wc_review_avg": [ 800.6666666666666, 814.1565506024557 ], "wc_reply_reviewers_avg": [ 6.666666666666667, 9.428090415820632 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15386317869655790396&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "ucas.ac.cn;cs.tsinghua.edu.cn;bit.edu.cn;pku.edu.cn", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "University of Chinese Academy of Sciences;Tsinghua University;Beijing Institute of Technology;Peking University", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.ucas.ac.cn;https://www.tsinghua.edu.cn;http://www.bit.edu.cn/;http://www.pku.edu.cn", "aff_unique_abbr": "UCAS;THU;BIT;Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "IPO: Interpretable Prompt Optimization for Vision-Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94834", "id": "WPPC7FHtaM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=WPPC7FHtaM", "openreview": "https://openreview.net/forum?id=WPPC7FHtaM", "poster": "/media/PosterPDFs/NeurIPS%202024/94834.png?t=1731687098.0673008", "project": "", "author_site": "Yingjun Du, Wenfang Sun, Cees Snoek", "tldr": "", "abstract": "Pre-trained vision-language models like CLIP have remarkably adapted to various downstream tasks. Nonetheless, their performance heavily depends on the specificity of the input text prompts, which requires skillful prompt template engineering. Instead, current approaches to prompt optimization learn the prompts through gradient descent, where the prompts are treated as adjustable parameters. However, these methods tend to lead to overfitting of the base classes seen during training and produce prompts that are no longer understandable by humans. This paper introduces a simple but interpretable prompt optimizer (IPO), that utilizes large language models (LLMs) to generate textual prompts dynamically. We introduce a Prompt Optimization Prompt that not only guides LLMs in creating effective prompts but also stores past prompts with their performance metrics, providing rich in-context information. Additionally, we incorporate a large multimodal model (LMM) to condition on visual content by generating image descriptions, which enhance the interaction between textual and visual modalities. This allows for the creation of dataset-specific prompts that improve generalization performance, while maintaining human comprehension. Extensive testing across 11 datasets reveals that IPO not only improves the accuracy of existing gradient-descent-based prompt learning methods but also considerably enhances the interpretability of the generated prompts. By leveraging the strengths of LLMs, our approach ensures that the prompts remain human-understandable, thereby facilitating better transparency and oversight for vision-language models.", "keywords": "Prompt learning;Large language model;Interpretable Prompt Optimization", "primary_area": "machine_vision", "supplementary_material": "/attachment/f5735de8d5bc02378508a5a32407b8753e2b6e55.zip", "author": "Yingjun Du;Wenfang Sun;Cees G. M. Snoek", "authorids": "~Yingjun_Du1;~Wenfang_Sun2;~Cees_G._M._Snoek1", "gender": "M;M;M", "homepage": "https://yingjundu.github.io/;;http://www.ceessnoek.info", "dblp": "263/6794;47/2007;s/CeesSnoek", "google_scholar": "oAeW6rAAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.nl/citations?user=0uKdbscAAAAJ", "orcid": ";;0000-0001-9092-1556", "linkedin": "%E8%8B%B1%E5%86%9B-%E6%9D%9C-a938a0174/;ustc-swf;cgmsnoek/", "or_profile": "~Yingjun_Du1;~Wenfang_Sun2;~Cees_Snoek1", "aff": "University of Amsterdam;University of Science and Technology of China;University of Amsterdam", "aff_domain": "uva.nl;ustc.edu.cn;uva.nl", "position": "PhD student;MS student;Full Professor", "bibtex": "@inproceedings{\ndu2024ipo,\ntitle={{IPO}: Interpretable Prompt Optimization for Vision-Language Models},\nauthor={Yingjun Du and Wenfang Sun and Cees G. M. Snoek},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=WPPC7FHtaM}\n}", "github": "", "reviewers": "4MWY;U899;XUaZ;xRmh;VMZQ", "pdf_size": 6152997, "rating": "5;5;6;6;6", "confidence": "5;3;4;5;5", "soundness": "3;3;3;3;1", "novelty": "2;3;3;3;3", "presentation": "2;3;3;3;3", "wc_summary": "27;49;182;63;114", "wc_strengths": "17;30;89;52;121", "wc_weaknesses": "167;70;176;102;323", "wc_questions": "65;10;51;29;275", "wc_limitations": "20;1;2;1;19", "wc_review": "296;160;500;247;852", "wc_reply_reviewers": "17;0;17;26;240", "wc_reply_authors": "0;0;0;25;739", "reply_reviewers": "1;0;1;1;3", "reply_authors": "1;1;1;2;3", "rating_avg": [ 5.6, 0.48989794855663565 ], "confidence_avg": [ 4.4, 0.7999999999999999 ], "soundness_avg": [ 2.6, 0.8 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 87.0, 55.45087916345421 ], "wc_strengths_avg": [ 61.8, 38.363524342792076 ], "wc_weaknesses_avg": [ 167.6, 87.23439688563221 ], "wc_questions_avg": [ 86.0, 96.34521264702258 ], "wc_limitations_avg": [ 8.6, 8.912911982062878 ], "wc_review_avg": [ 411.0, 247.19385105621055 ], "wc_reply_reviewers_avg": [ 60.0, 90.39247756312469 ], "wc_reply_authors_avg": [ 152.8, 293.2598847438906 ], "reply_reviewers_avg": [ 1.2, 0.9797958971132713 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.408248290463863, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4457046917487062544&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 5, "email": "uva.nl;ustc.edu.cn;uva.nl", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Amsterdam;University of Science and Technology of China", "aff_unique_dep": ";", "aff_unique_url": "https://www.uva.nl;http://www.ustc.edu.cn", "aff_unique_abbr": "UvA;USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Netherlands;China" }, { "title": "Estimating Epistemic and Aleatoric Uncertainty with a Single Model", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94833", "id": "WPxa6OcIdg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=WPxa6OcIdg", "openreview": "https://openreview.net/forum?id=WPxa6OcIdg", "poster": "/media/PosterPDFs/NeurIPS%202024/94833.png?t=1730958374.373685", "project": "", "author_site": "Matthew Chan, Maria Molina, Chris Metzler", "tldr": "", "abstract": "Estimating and disentangling epistemic uncertainty, uncertainty that is reducible with more training data, and aleatoric uncertainty, uncertainty that is inherent to the task at hand, is critically important when applying machine learning to high-stakes applications such as medical imaging and weather forecasting. Conditional diffusion models' breakthrough ability to accurately and efficiently sample from the posterior distribution of a dataset now makes uncertainty estimation conceptually straightforward: One need only train and sample from a large ensemble of diffusion models. Unfortunately, training such an ensemble becomes computationally intractable as the complexity of the model architecture grows. In this work we introduce a new approach to ensembling, hyper-diffusion models (HyperDM), which allows one to accurately estimate both epistemic and aleatoric uncertainty with a single model. Unlike existing single-model uncertainty methods like Monte-Carlo dropout and Bayesian neural networks, HyperDM offers prediction accuracy on par with, and in some cases superior to, multi-model ensembles. Furthermore, our proposed approach scales to modern network architectures such as Attention U-Net and yields more accurate uncertainty estimates compared to existing methods. We validate our method on two distinct real-world tasks: x-ray computed tomography reconstruction and weather temperature forecasting.", "keywords": "uncertainty estimation;diffusion models;hypernetworks", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/92c95ea8597a5dd7cb6bdbc77ac0455532b6aef9.zip", "author": "Matthew Albert Chan;Maria J. Molina;Christopher Metzler", "authorids": "~Matthew_Albert_Chan1;~Maria_J._Molina1;~Christopher_Metzler1", "gender": "M;F;M", "homepage": "https://www.cs.umd.edu/people/mattchan;https://mariajmolina.github.io/;https://www.cs.umd.edu/~metzler/", "dblp": ";;147/4828", "google_scholar": "vPPdjg0AAAAJ;COEg9-sAAAAJ;on7GFpYAAAAJ", "orcid": "0000-0002-8523-8063;;", "linkedin": ";;", "or_profile": "~Matthew_Albert_Chan1;~Maria_J._Molina1;~Christopher_Metzler1", "aff": "Sony AI;University of Maryland, College Park;University of Maryland, College Park", "aff_domain": "sony.com;umd.edu;umd.edu", "position": "Intern;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nchan2024estimating,\ntitle={Estimating Epistemic and Aleatoric Uncertainty with a Single Model},\nauthor={Matthew Albert Chan and Maria J. Molina and Christopher Metzler},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=WPxa6OcIdg}\n}", "github": "", "reviewers": "cmte;zgME;YZoM;pwCt", "pdf_size": 4678102, "rating": "5;6;6;6", "confidence": "4;4;4;3", "soundness": "2;3;3;3", "novelty": "2;2;3;2", "presentation": "3;2;4;3", "wc_summary": "81;62;69;184", "wc_strengths": "140;38;93;36", "wc_weaknesses": "142;182;337;175", "wc_questions": "357;30;61;63", "wc_limitations": "9;8;9;47", "wc_review": "729;320;569;505", "wc_reply_reviewers": "26;0;0;86", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 99.0, 49.542910693660296 ], "wc_strengths_avg": [ 76.75, 43.089296814870394 ], "wc_weaknesses_avg": [ 209.0, 75.4287743503764 ], "wc_questions_avg": [ 127.75, 133.00258456135353 ], "wc_limitations_avg": [ 18.25, 16.60383991732033 ], "wc_review_avg": [ 530.75, 146.49637367525517 ], "wc_reply_reviewers_avg": [ 28.0, 35.12833614050059 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6467530845345384409&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "sony.com;umd.edu;umd.edu", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Sony;University of Maryland", "aff_unique_dep": "Sony AI;", "aff_unique_url": "https://www.sony.com;https://www/umd.edu", "aff_unique_abbr": "Sony AI;UMD", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";College Park", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Japan;United States" }, { "title": "Query-Efficient Correlation Clustering with Noisy Oracle", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94832", "id": "WRCFuoiz1h", "proceeding": "", "pdf": "https://openreview.net/pdf?id=WRCFuoiz1h", "openreview": "https://openreview.net/forum?id=WRCFuoiz1h", "poster": "/media/PosterPDFs/NeurIPS%202024/94832.png?t=1731079001.623746", "project": "", "author_site": "Yuko Kuroki, Atsushi Miyauchi, Francesco Bonchi, Wei Chen", "tldr": "", "abstract": "We study a general clustering setting in which we have $n$ elements to be clustered, and we aim to perform as few queries as possible to an oracle that returns a noisy sample of the weighted similarity between two elements. Our setting encompasses many application domains in which the similarity function is costly to compute and inherently noisy. We introduce two novel formulations of online learning problems rooted in the paradigm of Pure Exploration in Combinatorial Multi-Armed Bandits (PE-CMAB): fixed confidence and fixed budget settings. For both settings, we design algorithms that combine a sampling strategy with a classic approximation algorithm for correlation clustering and study their theoretical guarantees. Our results are the first examples of polynomial-time algorithms that work for the case of PE-CMAB in which the underlying offline optimization problem is NP-hard.", "keywords": "Correlation Clustering; Online Learning; Pure Exploration of Multi-armed Bandits", "primary_area": "optimization", "supplementary_material": "", "author": "Yuko Kuroki;Atsushi Miyauchi;Francesco Bonchi;Wei Chen", "authorids": "~Yuko_Kuroki1;~Atsushi_Miyauchi1;~Francesco_Bonchi2;~Wei_Chen10", "gender": "F;M;M;M", "homepage": "https://sites.google.com/view/yuko-kuroki/home;https://sites.google.com/view/miyauchi/home?pli=1;https://www.francescobonchi.com/;https://www.microsoft.com/en-us/research/people/weic/", "dblp": "192/2019.html;136/5974-1.html;b/FBonchi;c/WeiChen13", "google_scholar": "pBdtcoYAAAAJ;yMyLQ2cAAAAJ;R1Jt75cAAAAJ;hlEPkxAAAAAJ", "orcid": "0009-0006-9589-9339;0000-0002-6033-6433;0000-0001-9464-8315;", "linkedin": ";https://linkedin.com/in/atsushi-miyauchi-75a273188;;", "or_profile": "~Yuko_Kuroki1;~Atsushi_Miyauchi1;~Francesco_Bonchi2;~Wei_Chen10", "aff": "CENTAI Institute;CENTAI Institute;Centai;Microsoft Research", "aff_domain": "centai.eu;centai.eu;centai.eu;microsoft.com", "position": "Postdoc;Researcher;Principal Researcher;Pricipal Researcher", "bibtex": "@inproceedings{\nkuroki2024queryefficient,\ntitle={Query-Efficient Correlation Clustering with Noisy Oracle},\nauthor={Yuko Kuroki and Atsushi Miyauchi and Francesco Bonchi and Wei Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=WRCFuoiz1h}\n}", "github": "", "reviewers": "B6XK;7MDd;RiHB;S1WA", "pdf_size": 656346, "rating": "5;6;7;7", "confidence": "3;2;4;3", "soundness": "2;3;3;3", "novelty": "2;2;4;3", "presentation": "2;3;3;3", "wc_summary": "55;171;150;37", "wc_strengths": "77;76;60;62", "wc_weaknesses": "205;206;617;18", "wc_questions": "1;7;216;12", "wc_limitations": "1;10;1;1", "wc_review": "339;470;1044;130", "wc_reply_reviewers": "0;312;272;11", "wc_reply_authors": "0;485;561;0", "reply_reviewers": "0;2;2;1", "reply_authors": "1;2;2;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 103.25, 58.07914858191363 ], "wc_strengths_avg": [ 68.75, 7.790218225441442 ], "wc_weaknesses_avg": [ 261.5, 219.05764081629292 ], "wc_questions_avg": [ 59.0, 90.72761431890514 ], "wc_limitations_avg": [ 3.25, 3.897114317029974 ], "wc_review_avg": [ 495.75, 338.96340141673113 ], "wc_reply_reviewers_avg": [ 148.75, 143.99891492646742 ], "wc_reply_authors_avg": [ 261.5, 262.8768723185819 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.42640143271122083, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10179794801759553481&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "centai.eu;centai.eu;centai.eu;microsoft.com", "author_num": 4, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "CENTAI Institute;CENTAI;Microsoft", "aff_unique_dep": ";;Microsoft Research", "aff_unique_url": ";;https://www.microsoft.com/en-us/research", "aff_unique_abbr": ";;MSR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1", "aff_country_unique": ";United States" }, { "title": "General Articulated Objects Manipulation in Real Images via Part-Aware Diffusion Process", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94831", "id": "WRd9LCbvxN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=WRd9LCbvxN", "openreview": "https://openreview.net/forum?id=WRd9LCbvxN", "poster": "/media/PosterPDFs/NeurIPS%202024/94831.png?t=1731756534.6433563", "project": "", "author_site": "ZHOU FANG, Yong-Lu Li, Lixin Yang, Cewu Lu", "tldr": "", "abstract": "Articulated object manipulation in real images is a fundamental step in computer and robotic vision tasks. Recently, several image editing methods based on diffusion models have been proposed to manipulate articulated objects according to text prompts. However, these methods often generate weird artifacts or even fail in real images. To this end, we introduce the Part-Aware Diffusion Model to approach the manipulation of articulated objects in real images. First, we develop Abstract 3D Models to represent and manipulate articulated objects efficiently. Then we propose dynamic feature maps to transfer the appearance of objects from input images to edited ones, meanwhile generating the novel-appearing parts reasonably. Extensive experiments are provided to illustrate the advanced manipulation capabilities of our method concerning state-of-the-art editing works. Additionally, we verify our method on 3D articulated object understanding for\nembodied robot scenarios and the promising results prove that our method supports this task strongly. The project page is https://mvig-rhos.com/pa_diffusion.", "keywords": "Articulated Objects;Arbitrary Manipulation;Real Images", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Zhou FANG;Yong-Lu Li;Lixin Yang;Cewu Lu", "authorids": "~Zhou_FANG1;~Yong-Lu_Li1;~Lixin_Yang1;~Cewu_Lu3", "gender": ";M;M;M", "homepage": ";https://dirtyharrylyl.github.io/;https://lixiny.github.io;https://www.mvig.org/", "dblp": ";198/9345;59/4517-1;", "google_scholar": ";https://scholar.google.com.hk/citations?user=UExAaVgAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=QZVQEWAAAAAJ", "orcid": ";0000-0003-0478-0692;0000-0001-6366-3192;", "linkedin": ";%E6%B0%B8%E9%9C%B2-%E6%9D%8E-991b99139/;;", "or_profile": "~Zhou_FANG1;~Yong-Lu_Li1;~Lixin_Yang1;~Cewu_Lu3", "aff": ";Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": ";sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "position": ";Assistant Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nfang2024general,\ntitle={General Articulated Objects Manipulation in Real Images via Part-Aware Diffusion Process},\nauthor={Zhou FANG and Yong-Lu Li and Lixin Yang and Cewu Lu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=WRd9LCbvxN}\n}", "github": "", "reviewers": "pkKj;YyMw;1MgD;N2Nu", "pdf_size": 24897467, "rating": "4;4;6;7", "confidence": "4;4;5;3", "soundness": "3;3;3;3", "novelty": "2;2;3;4", "presentation": "3;3;3;3", "wc_summary": "61;72;49;107", "wc_strengths": "98;26;4;127", "wc_weaknesses": "174;228;5;127", "wc_questions": "169;108;226;87", "wc_limitations": "14;2;1;27", "wc_review": "516;436;285;475", "wc_reply_reviewers": "352;204;21;0", "wc_reply_authors": "592;389;0;0", "reply_reviewers": "2;2;1;0", "reply_authors": "2;2;1;1", "rating_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 72.25, 21.649191670822262 ], "wc_strengths_avg": [ 63.75, 50.42011007524676 ], "wc_weaknesses_avg": [ 133.5, 82.34834546000302 ], "wc_questions_avg": [ 147.5, 54.41736855085883 ], "wc_limitations_avg": [ 11.0, 10.559356040971437 ], "wc_review_avg": [ 428.0, 87.27256155287296 ], "wc_reply_reviewers_avg": [ 144.25, 143.8130296600416 ], "wc_reply_authors_avg": [ 245.25, 255.53607866600754 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.2721655269759087, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:uY24ufrt0PsJ:scholar.google.com/&scioq=General+Articulated+Objects+Manipulation+in+Real+Images+via+Part-Aware+Diffusion+Process&hl=en&as_sdt=0,33", "gs_version_total": 0, "email": ";sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "WindsorML: High-Fidelity Computational Fluid Dynamics Dataset For Automotive Aerodynamics", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97656", "id": "WRxFVzx0uG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=WRxFVzx0uG", "openreview": "https://openreview.net/forum?id=WRxFVzx0uG", "poster": "", "project": "", "author_site": "Neil Ashton, Jordan Angel, Aditya Ghate, Gaetan Kenway, Man Long Wong, Cetin Kiris, Astrid Walle, Danielle Maddix, Gary Page", "tldr": "", "abstract": "This paper presents a new open-source high-fidelity dataset for Machine Learning (ML) containing 355 geometric variants of the Windsor body, to help the development and testing of ML surrogate models for external automotive aerodynamics. Each Computational Fluid Dynamics (CFD) simulation was run with a GPU-native high-fidelity Wall-Modeled Large-Eddy Simulations (WMLES) using a Cartesian immersed-boundary method using more than 280M cells to ensure the greatest possible accuracy. The dataset contains geometry variants that exhibits a wide range of flow characteristics that are representative of those observed on road-cars. \nThe dataset itself contains the 3D time-averaged volume \\& boundary data as well as the geometry and force \\& moment coefficients. This paper discusses the validation of the underlying CFD methods as well as contents and structure of the dataset. To the authors knowledge, this represents the first, large-scale high-fidelity CFD dataset for the Windsor body with a permissive open-source license (CC-BY-SA).", "keywords": "CFD;dataset;automotive;Windsor;ML", "primary_area": "", "supplementary_material": "/attachment/fe9d12c43fc404db2d0e4c976f97e24da0c676dc.pdf", "author": "Neil Ashton;Jordan B. Angel;Aditya Ghate;Gaetan Kenway;Man Long Wong;Cetin C. Kiris;Astrid Walle;Danielle C. Maddix;Gary Page", "authorids": "~Neil_Ashton1;~Jordan_B._Angel1;~Aditya_Ghate1;~Gaetan_Kenway1;~Man_Long_Wong1;~Cetin_C._Kiris1;~Astrid_Walle1;~Danielle_C._Maddix1;~Gary_Page2", "gender": ";M;;M;M;M;F;;M", "homepage": "https://neilashton.co.uk;;;;;https://www.volcanoplatforms.com;;https://dcmaddix.github.io/;https://www.lboro.ac.uk/departments/aae/staff/gary-page/", "dblp": "163/8923;;;;;;;216/8804;", "google_scholar": "mtBY0RcAAAAJ;;cfMIfYkAAAAJ;9NhDyk8AAAAJ;https://scholar.google.com/citations?hl=en;;;IPDByA8AAAAJ;3bt-Kj0AAAAJ", "orcid": "0000-0002-9943-2334;;;;;;;;", "linkedin": "neilashton/;https://linkedin.com/in/jordan-b-angel-88b71254;;;;;https://de.linkedin.com/in/astridwalle;danielle-maddix-robinson/;gary-page-47380a15/", "or_profile": "~Neil_Ashton1;~Jordan_B._Angel1;~Aditya_Ghate1;~Gaetan_Kenway1;~Man_Long_Wong1;~Cetin_C._Kiris1;~Astrid_Walle1;~Danielle_C._Maddix1;~Gary_Page2", "aff": "Amazon Web Services;Volcano Platforms Inc;Volcano Platforms Inc. ;Volcano Platforms;Volcano Platforms Inc.;Volcano Platforms inc.;Siemens Energy;AWS AI Labs;Loughborough University", "aff_domain": "amazon.co.uk;volcanoplatforms.com;volcanoplatforms.com;volcanoplatforms.com;volcanoplatforms.com;volcanoplatforms.com;siemens-energy.com;amazon.com;lboro.ac.uk", "position": "Principal Researcher;Researcher;Researcher;Researcher;Researcher;Principal Researcher;Developer;Applied Scientist;Full Professor", "bibtex": "@inproceedings{\nashton2024windsorml,\ntitle={Windsor{ML}: High-Fidelity Computational Fluid Dynamics Dataset For Automotive Aerodynamics},\nauthor={Neil Ashton and Jordan B. Angel and Aditya Ghate and Gaetan Kenway and Man Long Wong and Cetin C. Kiris and Astrid Walle and Danielle C. Maddix and Gary Page},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=WRxFVzx0uG}\n}", "github": "", "reviewers": "bbMf;6P67;ZUyK;gnJc", "pdf_size": 2558581, "rating": "4;5;8;8", "confidence": "5;5;4;3", "wc_summary_and_contributions": "71;17;74;81", "wc_strengths": "71;9;73;3", "wc_improvement": "329;43;26;64", "wc_limitations": "36;112;9;1", "wc_correctness": "26;10;12;1", "wc_clarity": "5;102;103;1", "wc_relation_to_prior_work": "16;11;6;1", "wc_documentation": "26;6;5;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "581;311;309;154", "wc_reply_reviewers": "286;53;0;0", "wc_reply_authors": "0;47;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "2;3;1;1", "rating_avg": [ 6.25, 1.7853571071357126 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "wc_summary_and_contributions_avg": [ 60.75, 25.518375732009275 ], "wc_strengths_avg": [ 39.0, 33.075670817082454 ], "wc_improvement_avg": [ 115.5, 123.9969757695727 ], "wc_limitations_avg": [ 39.5, 43.82065722921097 ], "wc_correctness_avg": [ 12.25, 8.954747344286158 ], "wc_clarity_avg": [ 52.75, 49.77135220184398 ], "wc_relation_to_prior_work_avg": [ 8.5, 5.5901699437494745 ], "wc_documentation_avg": [ 9.5, 9.7082439194738 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 338.75, 153.68209882741712 ], "wc_reply_reviewers_avg": [ 84.75, 118.18920212946698 ], "wc_reply_authors_avg": [ 11.75, 20.351596988934308 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.8866206949335731, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4125699278392735000&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 6, "email": "amazon.co.uk;volcanoplatforms.com;volcanoplatforms.com;volcanoplatforms.com;volcanoplatforms.com;volcanoplatforms.com;siemens-energy.com;amazon.com;lboro.ac.uk", "author_num": 9, "aff_unique_index": "0;1;2;3;2;3;4;0;5", "aff_unique_norm": "Amazon;Volcano Platforms Inc;Volcano Platforms Inc.;Volcano Platforms;Siemens Energy;Loughborough University", "aff_unique_dep": "Amazon Web Services;;;;;", "aff_unique_url": "https://aws.amazon.com;;;;https://www.siemens.com/global/en/home.html;https://www.lboro.ac.uk", "aff_unique_abbr": "AWS;;;;Siemens;Lboro", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;2;0;3", "aff_country_unique": "United States;;Germany;United Kingdom" }, { "title": "Safety through feedback in Constrained RL", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94830", "id": "WSsht66fbC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=WSsht66fbC", "openreview": "https://openreview.net/forum?id=WSsht66fbC", "poster": "/media/PosterPDFs/NeurIPS%202024/94830.png?t=1733284472.2937357", "project": "", "author_site": "Shashank Reddy Chirra, Pradeep Varakantham, Praveen Paruchuri", "tldr": "", "abstract": "In safety-critical RL settings, the inclusion of an additional cost function is often favoured over the arduous task of modifying the reward function to ensure the agent's safe behaviour. However, designing or evaluating such a cost function can be prohibitively expensive. For instance, in the domain of self-driving, designing a cost function that encompasses all unsafe behaviours (e.g., aggressive lane changes, risky overtakes) is inherently complex, it must also consider all the actors present in the scene making it expensive to evaluate. In such scenarios, the cost function can be learned from feedback collected offline in between training rounds. This feedback can be system generated or elicited from a human observing the training process. Previous approaches have not been able to scale to complex environments and are constrained to receiving feedback at the state level which can be expensive to collect. To this end, we introduce an approach that scales to more complex domains and extends beyond state-level feedback, thus, reducing the burden on the evaluator. Inferring the cost function in such settings poses challenges, particularly in assigning credit to individual states based on trajectory-level feedback. To address this, we propose a surrogate objective that transforms the problem into a state-level supervised classification task with noisy labels, which can be solved efficiently. Additionally, it is often infeasible to collect feedback for every trajectory generated by the agent, hence, two fundamental questions arise: (1) Which trajectories should be presented to the human? and (2) How many trajectories are necessary for effective learning? To address these questions, we introduce a \\textit{novelty-based sampling} mechanism that selectively involves the evaluator only when the the agent encounters a \\textit{novel} trajectory, and discontinues querying once the trajectories are no longer \\textit{novel}. We showcase the efficiency of our method through experimentation on several benchmark Safety Gymnasium environments and realistic self-driving scenarios. Our method demonstrates near-optimal performance, comparable to when the cost function is known, by relying solely on trajectory-level feedback across multiple domains. This highlights both the effectiveness and scalability of our approach. The code to replicate these results can be found at \\href{https://github.com/shshnkreddy/RLSF}{https://github.com/shshnkreddy/RLSF}", "keywords": "Constrained RL;Cost Inference;Human Feedback", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/6f44721e5d39ad2a0f4c0516c50a6d7bca68412a.zip", "author": "Shashank Reddy Chirra;Pradeep Varakantham;Praveen Paruchuri", "authorids": "~Shashank_Reddy_Chirra1;~Pradeep_Varakantham1;~Praveen_Paruchuri1", "gender": "M;M;M", "homepage": ";http://www.mysmu.edu.sg/faculty/pradeepv;https://www.iiit.ac.in/people/faculty/praveen.p/", "dblp": ";72/759;22/1006", "google_scholar": "sJMID3oAAAAJ;https://scholar.google.com.sg/citations?user=BAdQpFkAAAAJ;https://scholar.google.com.tw/citations?user=ILUqgKEAAAAJ", "orcid": ";;", "linkedin": "shashank-reddy-6b13471b8/;;", "or_profile": "~Shashank_Reddy_Chirra1;~Pradeep_Varakantham1;~Praveen_Paruchuri1", "aff": "Singapore Management University;Singapore Management University;International Institute of Information Technology Hyderabad", "aff_domain": "smu.edu.sg;smu.edu.sg;iiit.ac.in", "position": "Researcher;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nchirra2024safety,\ntitle={Safety through feedback in Constrained {RL}},\nauthor={Shashank Reddy Chirra and Pradeep Varakantham and Praveen Paruchuri},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=WSsht66fbC}\n}", "github": "", "reviewers": "xFMh;w6J6;1oHB;E6xa", "pdf_size": 14820680, "rating": "4;5;7;7", "confidence": "3;2;4;4", "soundness": "2;2;4;3", "novelty": "2;2;3;3", "presentation": "3;2;3;3", "wc_summary": "73;65;300;61", "wc_strengths": "33;36;163;27", "wc_weaknesses": "83;69;195;44", "wc_questions": "79;162;73;48", "wc_limitations": "8;10;1;86", "wc_review": "276;342;732;266", "wc_reply_reviewers": "72;26;38;0", "wc_reply_authors": "172;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;1;1;1", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 124.75, 101.27283693073873 ], "wc_strengths_avg": [ 64.75, 56.81714089955601 ], "wc_weaknesses_avg": [ 97.75, 57.85920410790318 ], "wc_questions_avg": [ 90.5, 42.88647805544307 ], "wc_limitations_avg": [ 26.25, 34.65815199920504 ], "wc_review_avg": [ 404.0, 191.60897682519993 ], "wc_reply_reviewers_avg": [ 34.0, 25.88435821108957 ], "wc_reply_authors_avg": [ 43.0, 74.47818472546173 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.7543365091413573, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ty3Gi58qIDgJ:scholar.google.com/&scioq=Safety+through+feedback+in+Constrained+RL&hl=en&as_sdt=0,23", "gs_version_total": 5, "email": "smu.edu.sg;smu.edu.sg;iiit.ac.in", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Singapore Management University;International Institute of Information Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.smu.edu.sg;https://iiit Hyderabad.ac.in", "aff_unique_abbr": "SMU;IIIT Hyderabad", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hyderabad", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Singapore;India" }, { "title": "Embedding-Aligned Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94829", "id": "WSu1PPi2UP", "proceeding": "", "pdf": "https://openreview.net/pdf?id=WSu1PPi2UP", "openreview": "https://openreview.net/forum?id=WSu1PPi2UP", "poster": "/media/PosterPDFs/NeurIPS%202024/94829.png?t=1733118125.3153496", "project": "", "author_site": "Guy Tennenholtz, Yinlam Chow, Chih-wei Hsu, Lior Shani, Yi Liang, Craig Boutilier", "tldr": "", "abstract": "We propose a novel approach for training large language models (LLMs) to adhere to objectives defined within a latent embedding space. Our method leverages reinforcement learning (RL), treating a pre-trained LLM as an environment. Our embedding-aligned guided language (EAGLE) agent is trained to iteratively steer the LLM's generation towards optimal regions of the latent embedding space, w.r.t. some predefined criterion. We demonstrate the effectiveness of the EAGLE agent using the MovieLens 25M and Amazon Review datasets to surface content gaps that satisfy latent user demand. We also demonstrate the benefit of using an optimal design of a state-dependent action set to improve EAGLE's efficiency. Our work paves the way for controlled and grounded text generation using LLMs, ensuring consistency with domain-specific knowledge and data representations.", "keywords": "language models;reinforcement learning;embedding spaces", "primary_area": "generative_models", "supplementary_material": "", "author": "Guy Tennenholtz;Yinlam Chow;ChihWei Hsu;Lior Shani;Yi Liang;Craig Boutilier", "authorids": "~Guy_Tennenholtz2;~Yinlam_Chow1;~ChihWei_Hsu2;~Lior_Shani2;~Yi_Liang1;~Craig_Boutilier2", "gender": ";M;;M;M;M", "homepage": "https://guytenn.com;;https://research.google/people/107258/;;https://research.google/people/108265/;https://research.google/people/craigboutilier/", "dblp": ";146/7869;;https://dblp.uni-trier.de/pers/s/Shani:Lior;;10/3411", "google_scholar": "https://scholar.google.co.il/citations?user=pldrn8IAAAAJ;;;https://scholar.google.co.il/citations?user=TrQLB1gAAAAJ;9vQ7gbgAAAAJ;cXkm3rsAAAAJ", "orcid": ";;0000-0001-9929-9951;;0000-0002-6622-8919;", "linkedin": ";;;;;", "or_profile": "~Guy_Tennenholtz2;~Yinlam_Chow1;~ChihWei_Hsu2;~Lior_Shani2;~Yi_Liang1;~Craig_Boutilier2", "aff": "Google;Google Research;Google Research;Google Research;Research, Google;Google", "aff_domain": "google.com;google.com;google.com;google.com;research.google.com;google.com", "position": "Researcher;Research Scientist;Software Engineer;Researcher;Researcher;Principal Researcher", "bibtex": "@inproceedings{\ntennenholtz2024embeddingaligned,\ntitle={Embedding-Aligned Language Models},\nauthor={Guy Tennenholtz and Yinlam Chow and ChihWei Hsu and Lior Shani and Yi Liang and Craig Boutilier},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=WSu1PPi2UP}\n}", "github": "", "reviewers": "aThJ;3fAz;gKnd;9y2i", "pdf_size": 1490524, "rating": "5;5;7;7", "confidence": "4;4;4;3", "soundness": "2;3;4;3", "novelty": "2;2;4;3", "presentation": "3;3;3;4", "wc_summary": "88;378;151;50", "wc_strengths": "38;62;61;25", "wc_weaknesses": "63;287;83;75", "wc_questions": "41;58;280;20", "wc_limitations": "1;49;1;16", "wc_review": "231;834;576;186", "wc_reply_reviewers": "31;152;28;0", "wc_reply_authors": "0;74;13;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;2;2;1", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 166.75, 127.18760749381207 ], "wc_strengths_avg": [ 46.5, 15.692354826475215 ], "wc_weaknesses_avg": [ 127.0, 92.64987857520376 ], "wc_questions_avg": [ 99.75, 104.9342055766374 ], "wc_limitations_avg": [ 16.75, 19.60070151805797 ], "wc_review_avg": [ 456.75, 264.95601049985635 ], "wc_reply_reviewers_avg": [ 52.75, 58.56353387561239 ], "wc_reply_authors_avg": [ 21.75, 30.629846555280032 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12568584905165684822&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "google.com;google.com;google.com;google.com;research.google.com;google.com", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "A benchmark for prediction of transcriptomic responses to chemical perturbations across cell types", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97655", "id": "WTI4RJYSVm", "proceeding": "", "pdf": "https://openreview.net/pdf?id=WTI4RJYSVm", "openreview": "https://openreview.net/forum?id=WTI4RJYSVm", "poster": "/media/PosterPDFs/NeurIPS%202024/97655.png?t=1733344152.686536", "project": "", "author_site": "Artur Sza\u0142ata, Andrew Benz, Robrecht Cannoodt, Mauricio Cortes, Jason Fong, Sunil Kuppasani, Richard Lieberman, Tianyu Liu, Javier Mas-Rosario, Rico Meinl, Jalil Nourisa, Jared Tumiel, Tin M. Tunjic, Mengbo Wang, Noah Weber, Hongyu Zhao, Benedict Anchang, Fabian Theis, Malte Luecken, Daniel Burkhardt", "tldr": "", "abstract": "Single-cell transcriptomics has revolutionized our understanding of cellular heterogeneity and drug perturbation effects. However, its high cost and the vast chemical space of potential drugs present barriers to experimentally characterizing the effect of chemical perturbations in all the myriad cell types of the human body. To overcome these limitations, several groups have proposed using machine learning methods to directly predict the effect of chemical perturbations either across cell contexts or chemical space. However, advances in this field have been hindered by a lack of well-designed evaluation datasets and benchmarks. To drive innovation in perturbation modeling, the Open Problems Perturbation Prediction (OP3) benchmark introduces a framework for predicting the effects of small molecule perturbations on cell type-specific gene expression. OP3 leverages the Open Problems in Single-cell Analysis benchmarking infrastructure and is enabled by a new single-cell perturbation dataset, encompassing 146 compounds tested on human blood cells. The benchmark includes diverse data representations, evaluation metrics, and winning methods from our \"Single-cell perturbation prediction: generalizing experimental interventions to unseen contexts\" competition at NeurIPS 2023. We envision that the OP3 benchmark and competition will drive innovation in single-cell perturbation prediction by improving the accessibility, visibility, and feasibility of this challenge, thereby promoting the impact of machine learning in drug discovery.", "keywords": "computational biology;benchmarking datasets;benchmarks;single-cell;genomics;drug perturbations", "primary_area": "", "supplementary_material": "", "author": "Artur Sza\u0142ata;Andrew Benz;Robrecht Cannoodt;Mauricio Cortes;Jason Fong;Sunil Kuppasani;Richard Lieberman;Tianyu Liu;Javier A. Mas-Rosario;Rico Meinl;Jalil Nourisa;Jared Tumiel;Tin M. Tunjic;Mengbo Wang;Noah Weber;Hongyu Zhao;Benedict Anchang;Fabian J Theis;Malte D Luecken;Daniel B Burkhardt", "authorids": "~Artur_Sza\u0142ata1;~Andrew_Benz2;~Robrecht_Cannoodt1;~Mauricio_Cortes1;~Jason_Fong2;~Sunil_Kuppasani1;~Richard_Lieberman1;~Tianyu_Liu4;~Javier_A._Mas-Rosario1;~Rico_Meinl1;~Jalil_Nourisa1;~Jared_Tumiel1;~Tin_M._Tunjic1;~Mengbo_Wang1;~Noah_Weber3;~Hongyu_Zhao1;~Benedict_Anchang1;~Fabian_J_Theis1;~Malte_D_Luecken1;~Daniel_B_Burkhardt1", "gender": "Not Specified;M;M;M;M;M;M;M;M;M;M;;M;;M;M;;;M;", "homepage": ";;https://cannoodt.dev;;;http://www.linkedin.com/in/sunil-kuppasani-57744639;;https://helloworldlty.github.io/;;https://ricomnl.com;;;;;;https://ysph.yale.edu/profile/hongyu-zhao/;https://www.niehs.nih.gov/research/atniehs/labs/bb/staff/anchang;;;", "dblp": ";;;;;;;134/1099-5;;;;;;;;;;;;", "google_scholar": "https://scholar.google.com/citations?hl=en;;;rBFswv8AAAAJ;;;wa13avUAAAAJ;https://scholar.google.com/citations?hl=en;;;wSpyplkAAAAJ;;_YZn4-8AAAAJ;;https://scholar.google.com/citations?hl=de;__z1kpoAAAAJ;1GFblggAAAAJ;;https://scholar.google.de/citations?user=Nr4KBvUAAAAJ;", "orcid": "0000-0001-8413-234X;0009-0002-8118-1861;0000-0003-3641-729X;;0000-0001-6164-6110;;;0000-0002-9412-6573;;;;0009-0003-4795-6050;0000-0001-8842-6548;;;;;;0000-0001-7464-7921;", "linkedin": "artur-szalata/;andrew-benz/;;;jjhfong97;http://www.linkedin.com/in/sunil-kuppasani-57744639;richlieberman/;;https://linkedin.com/in/javier-mas-758360102;;;jared-tumiel-07a4b316b/;tin-m-tunjic-b78666144/;;noahweber123/;;;;;", "or_profile": "~Artur_Sza\u0142ata1;~Andrew_Benz2;~Robrecht_Cannoodt1;~Mauricio_Cortes1;~Jason_Fong2;~Sunil_Kuppasani1;~Richard_Lieberman1;~Tianyu_Liu4;~Javier_A._Mas-Rosario1;~Rico_Meinl1;~Jalil_Nourisa1;~Jared_Tumiel1;~Tin_M._Tunjic1;~Mengbo_Wang1;~Noah_Weber3;~Hongyu_Zhao1;~Benedict_Anchang1;~Fabian_J_Theis1;~Malte_D_Luecken1;~Daniel_B_Burkhardt1", "aff": "Helmholtz Zentrum M\u00fcnchen;Cellarity;Data Intuitive;Cellarity;Cellarity;;Cellarity;Genentech;Cellarity;Retro Biosciences;helmholtz zentrum hereon ;Retro Biosciences;Technische Universit\u00e4t Wien;;Olden Labs;Yale University;National Institutes of Health;;;", "aff_domain": "helmholtz-munich.de;cellarity.com;data-intuitive.com;cellarity.com;cellarity.com;;cellarity.com;gene.com;cellarity.com;retro.bio;hereon.de;retro.bio;tuwien.ac.at;;oldenlabs.com;yale.edu;nih.gov;;;", "position": "PhD student;Machine Learning Scientist;Data Science Consultant;Director of Platform Biology;Researcher;;Principal Researcher;Intern;Researcher;Researcher;Postdoc;Researcher;MS student;;Researcher;Full Professor;Principal Researcher;;;", "bibtex": "@inproceedings{\nsza{\\l}ata2024a,\ntitle={A benchmark for prediction of transcriptomic responses to chemical perturbations across cell types},\nauthor={Artur Sza{\\l}ata and Andrew Benz and Robrecht Cannoodt and Mauricio Cortes and Jason Fong and Sunil Kuppasani and Richard Lieberman and Tianyu Liu and Javier A. Mas-Rosario and Rico Meinl and Jalil Nourisa and Jared Tumiel and Tin M. Tunjic and Mengbo Wang and Noah Weber and Hongyu Zhao and Benedict Anchang and Fabian J Theis and Malte D Luecken and Daniel B Burkhardt},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=WTI4RJYSVm}\n}", "github": "", "reviewers": "Cdu1;p5Db;daUw", "pdf_size": 22038845, "rating": "7;7;10", "confidence": "4;4;3", "wc_summary_and_contributions": "78;149;32", "wc_strengths": "2;48;21", "wc_improvement": "32;17;9", "wc_limitations": "1;8;1", "wc_correctness": "1;16;1", "wc_clarity": "1;11;1", "wc_relation_to_prior_work": "1;1;1", "wc_documentation": "20;1;1", "wc_additional_feedback": "1;1;1", "wc_review": "137;252;68", "wc_reply_reviewers": "28;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;0;0", "reply_authors": "1;1;1", "rating_avg": [ 8.0, 1.4142135623730951 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 86.33333333333333, 48.12714641678044 ], "wc_strengths_avg": [ 23.666666666666668, 18.873850222522755 ], "wc_improvement_avg": [ 19.333333333333332, 9.533566430716728 ], "wc_limitations_avg": [ 3.3333333333333335, 3.2998316455372216 ], "wc_correctness_avg": [ 6.0, 7.0710678118654755 ], "wc_clarity_avg": [ 4.333333333333333, 4.714045207910317 ], "wc_relation_to_prior_work_avg": [ 1.0, 0.0 ], "wc_documentation_avg": [ 7.333333333333333, 8.956685895029603 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 152.33333333333334, 75.8961278473561 ], "wc_reply_reviewers_avg": [ 9.333333333333334, 13.199326582148887 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 20, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18062945534948806549&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "helmholtz-munich.de;cellarity.com;data-intuitive.com;cellarity.com;cellarity.com;;cellarity.com;gene.com;cellarity.com;retro.bio;hereon.de;retro.bio;tuwien.ac.at;;oldenlabs.com;yale.edu;nih.gov;;;", "author_num": 20, "aff_unique_index": "0;1;2;1;1;1;3;1;4;5;4;6;7;8;9", "aff_unique_norm": "Helmholtz Zentrum M\u00fcnchen;Cellarity;Data Intuitive;Genentech;Retro Biosciences;Helmholtz-Zentrum Hereon;Technische Universit\u00e4t Wien;Olden Labs;Yale University;National Institutes of Health", "aff_unique_dep": ";;;;;;;;;", "aff_unique_url": "https://www.helmholtz-muenchen.de;;;https://www.genentech.com;;https://www.hereon.de;https://www.tuwien.ac.at;;https://www.yale.edu;https://www.nih.gov", "aff_unique_abbr": ";;;Genentech;;Hereon;TU Wien;;Yale;NIH", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;2;2;2;0;2;3;2;2;2", "aff_country_unique": "Germany;;United States;Austria" }, { "title": "Statistical Estimation in the Spiked Tensor Model via the Quantum Approximate Optimization Algorithm", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94828", "id": "WTLvXdzhmP", "proceeding": "", "pdf": "https://openreview.net/pdf?id=WTLvXdzhmP", "openreview": "https://openreview.net/forum?id=WTLvXdzhmP", "poster": "/media/PosterPDFs/NeurIPS%202024/94828.png?t=1733795924.8741157", "project": "", "author_site": "Leo Zhou, Joao Basso, Song Mei", "tldr": "", "abstract": "The quantum approximate optimization algorithm (QAOA) is a general-purpose algorithm for combinatorial optimization that has been a promising avenue for near-term quantum advantage. \nIn this paper, we analyze the performance of the QAOA on the spiked tensor model, a statistical estimation problem that exhibits a large computational-statistical gap classically. \nWe prove that the weak recovery threshold of $1$-step QAOA matches that of $1$-step tensor power iteration. Additional heuristic calculations suggest that the weak recovery threshold of $p$-step QAOA matches that of $p$-step tensor power iteration when $p$ is a fixed constant. This further implies that multi-step QAOA with tensor unfolding could achieve, but not surpass, the asymptotic classical computation threshold $\\Theta(n^{(q-2)/4})$ for spiked $q$-tensors. \nMeanwhile, we characterize the asymptotic overlap distribution for $p$-step QAOA, discovering an intriguing sine-Gaussian law verified through simulations. For some $p$ and $q$, the QAOA has an effective recovery threshold that is a constant factor better than tensor power iteration.\nOf independent interest, our proof techniques employ the Fourier transform to handle difficult combinatorial sums, a novel approach differing from prior QAOA analyses on spin-glass models without planted structure.", "keywords": "quantum algorithm;statistical estimation;computational complexity;computational-statistical gap;optimization;variational quantum algorithm;quantum machine learning;statistical physics;average-case complexity", "primary_area": "learning_theory", "supplementary_material": "", "author": "Leo Zhou;Joao Basso;Song Mei", "authorids": "~Leo_Zhou1;~Joao_Basso1;~Song_Mei1", "gender": "M;;M", "homepage": "https://leoxzhou.github.io/;https://joaomvbasso.github.io;https://www.stat.berkeley.edu/~songmei/", "dblp": ";;https://dblp.org/pers/hd/m/Mei:Song", "google_scholar": "rUcRJEkAAAAJ;;https://scholar.google.com.hk/citations?hl=en", "orcid": "0000-0001-7598-8621;;", "linkedin": ";;", "or_profile": "~Leo_Zhou1;~Joao_Basso1;~Song_Mei1", "aff": "California Institute of Technology;University of California, Berkeley;University of California, Berkeley", "aff_domain": "caltech.edu;berkeley.edu;berkeley.edu", "position": "Postdoc;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nzhou2024statistical,\ntitle={Statistical Estimation in the Spiked Tensor Model via the Quantum Approximate Optimization Algorithm},\nauthor={Leo Zhou and Joao Basso and Song Mei},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=WTLvXdzhmP}\n}", "github": "", "reviewers": "R5pw;nUte;a21g;TiV7;9dK4", "pdf_size": 2052797, "rating": "7;7;7;7;8", "confidence": "3;3;4;3;3", "soundness": "4;3;3;3;3", "novelty": "3;3;3;3;3", "presentation": "4;3;3;4;4", "wc_summary": "299;106;40;174;221", "wc_strengths": "55;76;107;119;41", "wc_weaknesses": "125;64;22;64;68", "wc_questions": "105;29;18;44;231", "wc_limitations": "1;24;18;1;31", "wc_review": "585;299;205;402;592", "wc_reply_reviewers": "17;18;49;14;17", "wc_reply_authors": "0;0;28;0;27", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;2;1;2", "rating_avg": [ 7.2, 0.39999999999999997 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.6, 0.4898979485566356 ], "wc_summary_avg": [ 168.0, 89.70395754926312 ], "wc_strengths_avg": [ 79.6, 29.70252514517916 ], "wc_weaknesses_avg": [ 68.6, 32.848744268236494 ], "wc_questions_avg": [ 85.4, 78.76699816547537 ], "wc_limitations_avg": [ 15.0, 12.149074038789951 ], "wc_review_avg": [ 416.6, 153.58463464813138 ], "wc_reply_reviewers_avg": [ 23.0, 13.069047402163633 ], "wc_reply_authors_avg": [ 11.0, 13.475904422338413 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.25000000000000006, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1093296442741337288&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "caltech.edu;berkeley.edu;berkeley.edu", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "California Institute of Technology;University of California, Berkeley", "aff_unique_dep": ";", "aff_unique_url": "https://www.caltech.edu;https://www.berkeley.edu", "aff_unique_abbr": "Caltech;UC Berkeley", "aff_campus_unique_index": "0;1;1", "aff_campus_unique": "Pasadena;Berkeley", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Dataset and Lessons Learned from the 2024 SaTML LLM Capture-the-Flag Competition", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97654", "id": "WUWHVN4gxk", "proceeding": "", "pdf": "https://openreview.net/pdf?id=WUWHVN4gxk", "openreview": "https://openreview.net/forum?id=WUWHVN4gxk", "poster": "", "project": "", "author_site": "Edoardo Debenedetti, Javier Rando, Daniel Paleka, Silaghi Florin, Dragos Albastroiu, Niv Cohen, Yuval Lemberg, Reshmi Ghosh, Rui Wen, Ahmed Salem, Giovanni Cherubin, Santiago Zanella-Beguelin, Robin Schmid, Victor Klemm, Takahiro Miki, Chenhao Li, Stefan Kraft, Mario Fritz, Florian Tramer, Sahar Abdelnabi, Lea Sch\u00f6nherr", "tldr": "", "abstract": "Large language model systems face significant security risks from maliciously crafted messages that aim to overwrite the system's original instructions or leak private data. To study this problem, we organized a capture-the-flag competition at IEEE SaTML 2024, where the flag is a secret string in the LLM system prompt. The competition was organized in two phases. In the first phase, teams developed defenses to prevent the model from leaking the secret. During the second phase, teams were challenged to extract the secrets hidden for defenses proposed by the other teams. This report summarizes the main insights from the competition. Notably, we found that all defenses were bypassed at least once, highlighting the difficulty of designing a successful defense and the necessity for additional research to protect LLM systems. To foster future research in this direction, we compiled a dataset with over 137k multi-turn attack chats and open-sourced the platform.", "keywords": "competition;prompt injection;dataset;large language models;LLM;prompt extraction", "primary_area": "", "supplementary_material": "/attachment/8cbe7ba9125420bc07f209d156c9eee42a90c95b.pdf", "author": "Edoardo Debenedetti;Javier Rando;Daniel Paleka;Silaghi Fineas Florin;Dragos Albastroiu;Niv Cohen;Yuval Lemberg;Reshmi Ghosh;Rui Wen;Ahmed Salem;Giovanni Cherubin;Santiago Zanella-Beguelin;Robin Schmid;Victor Klemm;Takahiro Miki;Chenhao Li;Stefan Kraft;Mario Fritz;Florian Tram\u00e8r;Sahar Abdelnabi;Lea Sch\u00f6nherr", "authorids": "~Edoardo_Debenedetti1;~Javier_Rando2;~Daniel_Paleka1;~Silaghi_Fineas_Florin1;~Dragos_Albastroiu1;~Niv_Cohen1;~Yuval_Lemberg1;~Reshmi_Ghosh1;~Rui_Wen3;~Ahmed_Salem2;~Giovanni_Cherubin1;~Santiago_Zanella-Beguelin1;~Robin_Schmid2;~Victor_Klemm1;~Takahiro_Miki1;~Chenhao_Li3;~Stefan_Kraft2;~Mario_Fritz1;~Florian_Tram\u00e8r1;~Sahar_Abdelnabi1;~Lea_Sch\u00f6nherr1", "gender": "M;M;;M;M;M;M;F;M;;;M;M;M;M;;M;M;;M;", "homepage": "https://edoardo.science/;https://javirando.com;https://danielpaleka.com/;https://fineas.github.io/FeDEX/;https://adragos.ro/;https://www.cs.huji.ac.il/w~nivc/;;https://reshmighosh.github.io;https://ruiwen-ai.github.io/;;https://giocher.com;https://www.microsoft.com/en-us/research/people/santiago/;;https://victorklemm.com;;https://breadli428.github.io/;https://quadrilateral.party;https://cispa.saarland/group/fritz/;;https://s-abdelnabi.github.io/;", "dblp": "319/7073;257/3128;324/2779;;;259/2291;379/9155;324/2458;63/10765-2;;;70/2200.html;;;;186/9145;;;;248/7979;", "google_scholar": "6Urve9wAAAAJ;d_rilUYAAAAJ;;;;https://scholar.google.co.il/citations?user=ZMdC3OQAAAAJ;;ui8JeF5lKNMC;https://scholar.google.com/citations?hl=en;;wM290P0AAAAJ;boLOB98AAAAJ;VUrQP1cAAAAJ;-3pMVPUAAAAJ;nOl83tYAAAAJ;kw1-DxQAAAAJ;;https://scholar.google.de/citations?user=4V1nNm4AAAAJ;;https://scholar.google.de/citations?user=QEiYbDYAAAAJ;", "orcid": "0000-0003-3343-9477;;;;;;;;0009-0009-0691-7569;;;0000-0003-0479-9967;;0000-0002-6752-3397;;;;;;;", "linkedin": "https://linkedin.com/in/edoardo-debenedetti/;https://linkedin.com/in/javier-rando;;;dragosalbastroiu/;niv-cohen-39b49521/;yuv-lem-b5342223a/;reshmi-ghosh/;;;;santiago-zanella-b%C3%A9guelin-b59844165;schmid-robin/;https://linkedin.com/in/victor-klemm-6a68231ab;;chenhao-li-86080b1b0/;;;;sahar-abdelnabi-375b681a1/;", "or_profile": "~Edoardo_Debenedetti1;~Javier_Rando2;~Daniel_Paleka1;~Silaghi_Fineas_Florin1;~Dragos_Albastroiu1;~Niv_Cohen1;~Yuval_Lemberg1;~Reshmi_Ghosh1;~Rui_Wen3;~Ahmed_Salem2;~Giovanni_Cherubin1;~Santiago_Zanella-Beguelin1;~Robin_Schmid2;~Victor_Klemm1;~Takahiro_Miki1;~Chenhao_Li3;~Stefan_Kraft2;~Mario_Fritz1;~Florian_Tram\u00e8r1;~Sahar_Abdelnabi1;~Lea_Sch\u00f6nherr1", "aff": "Google;Department of Computer Science, ETHZ - ETH Zurich;Department of Computer Science, ETHZ - ETH Zurich;West University in Timisoara;ETHZ - ETH Zurich;Hebrew University of Jerusalem;Technion - Israel Institute of Technology, Technion;Microsoft;CISPA Helmholtz Center for Information Security;;Microsoft;Microsoft Research;ETHZ - ETH Zurich;ETHZ - ETH Zurich;;ETHZ - ETH Zurich;ZHdK - Zurich University of the Arts;Saarland University;;CISPA Helmholtz Center for Information Security;", "aff_domain": "google.com;inf.ethz.ch;inf.ethz.ch;e-uvt.ro;ethz.ch;huji.ac.il;campus.technion;microsoft.com;cispa.de;;research.microsoft.com;microsoft.com;ethz.ch;ethz.ch;;ethz.ch;zhdk.ch;uni-saarland.de;;cispa.de;", "position": "Intern;PhD student;PhD student;MS student;MS student;PhD student;Researcher;Researcher;PhD student;;Researcher;Principal Researcher;Researcher;PhD student;;PhD student;Researcher;Full Professor;;PhD student;", "bibtex": "@inproceedings{\ndebenedetti2024dataset,\ntitle={Dataset and Lessons Learned from the 2024 Sa{TML} {LLM} Capture-the-Flag Competition},\nauthor={Edoardo Debenedetti and Javier Rando and Daniel Paleka and Silaghi Fineas Florin and Dragos Albastroiu and Niv Cohen and Yuval Lemberg and Reshmi Ghosh and Rui Wen and Ahmed Salem and Giovanni Cherubin and Santiago Zanella-Beguelin and Robin Schmid and Victor Klemm and Takahiro Miki and Chenhao Li and Stefan Kraft and Mario Fritz and Florian Tram{\\`e}r and Sahar Abdelnabi and Lea Sch{\\\"o}nherr},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=WUWHVN4gxk}\n}", "github": "", "reviewers": "uyPX;VFHN;E9B1", "pdf_size": 2661782, "rating": "8;8;9", "confidence": "4;3;5", "wc_summary_and_contributions": "113;37;56", "wc_strengths": "90;37;44", "wc_improvement": "8;72;31", "wc_limitations": "8;79;1", "wc_correctness": "1;1;15", "wc_clarity": "1;1;6", "wc_relation_to_prior_work": "1;1;1", "wc_documentation": "1;1;1", "wc_additional_feedback": "1;1;1", "wc_review": "224;230;156", "wc_reply_reviewers": "44;26;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;0", "reply_authors": "1;1;1", "rating_avg": [ 8.333333333333334, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "wc_summary_and_contributions_avg": [ 68.66666666666667, 32.293790252754306 ], "wc_strengths_avg": [ 57.0, 23.50886357667394 ], "wc_improvement_avg": [ 37.0, 26.47010892812243 ], "wc_limitations_avg": [ 29.333333333333332, 35.23571420527127 ], "wc_correctness_avg": [ 5.666666666666667, 6.599663291074443 ], "wc_clarity_avg": [ 2.6666666666666665, 2.3570226039551585 ], "wc_relation_to_prior_work_avg": [ 1.0, 0.0 ], "wc_documentation_avg": [ 1.0, 0.0 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 203.33333333333334, 33.559234529741914 ], "wc_reply_reviewers_avg": [ 23.333333333333332, 18.06162291219209 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 21, 0 ], "corr_rating_confidence": 0.8660254037844387, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15916798072170056184&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "google.com;inf.ethz.ch;inf.ethz.ch;e-uvt.ro;ethz.ch;huji.ac.il;campus.technion;microsoft.com;cispa.de;;research.microsoft.com;microsoft.com;ethz.ch;ethz.ch;;ethz.ch;zhdk.ch;uni-saarland.de;;cispa.de;", "author_num": 21, "aff_unique_index": "0;1;1;2;1;3;4;5;6;5;5;1;1;1;7;8;6", "aff_unique_norm": "Google;ETH Zurich;West University;Hebrew University of Jerusalem;Technion - Israel Institute of Technology;Microsoft;CISPA Helmholtz Center for Information Security;Zurich University of the Arts;Saarland University", "aff_unique_dep": "Google;Department of Computer Science;;;;Microsoft Corporation;;;", "aff_unique_url": "https://www.google.com;https://www.ethz.ch;https://www.uvt.ro;https://www.huji.ac.il;https://www.technion.ac.il;https://www.microsoft.com;https://www.cispa.de/;https://www.zhdk.ch;https://www.uni-saarland.de", "aff_unique_abbr": "Google;ETHZ;;HUJI;Technion;Microsoft;CISPA;ZHdK;UdS", "aff_campus_unique_index": "0;1;1;2;4;1", "aff_campus_unique": "Mountain View;Zurich;Timisoara;;Jerusalem", "aff_country_unique_index": "0;1;1;2;1;3;3;0;4;0;0;1;1;1;1;4;4", "aff_country_unique": "United States;Switzerland;Romania;Israel;Germany" }, { "id": "WVQ4Clw1VD", "title": "MedTrinity-25M: A Large-scale Multimodal Dataset with Multigranular Annotations for Medicine", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "This paper introduces MedTrinity-25M, a comprehensive, large-scale multimodal dataset for medicine, covering over 25 million images across 10 modalities, with multigranular annotations for more than 65 diseases. These enriched annotations encompass both global textual information, such as disease/lesion type, modality, region-specific descriptions, and inter-regional relationships, as well as detailed local annotations for regions of interest (ROIs), including bounding boxes, segmentation masks. Unlike existing approach which is limited by the availability of image-text pairs, we have developed the first automated pipeline that scales up multimodal data by generating multigranular visual and texual annotations (in the form of image-ROI-description triplets) without the need for any paired text descriptions. Specifically, data from over 90 different sources have been collected, preprocessed, and grounded using domain-specific expert models to identify ROIs related to abnormal regions. \nWe then build a comprehensive knowledge base and prompt multimodal large language models to perform retrieval-augmented generation with the identified ROIs as guidance, resulting in multigranular texual descriptions. \nCompared to existing datasets, MedTrinity-25M provides the most enriched annotations, supporting a comprehensive range of multimodal tasks such as captioning and report generation, as well as vision-centric tasks like classification and segmentation. This dataset can be utilized to support large-scale pre-training of multimodal medical AI models, contributing to the development of future foundation models in the medical domain. The dataset is publicly available at https://yunfeixie233.github.io/MedTrinity-25M/.", "keywords": "Medical Foundation Model;Multimodal Dataset;Vision-Language Pretraining.", "primary_area": "", "supplementary_material": "/attachment/6d14b7be70e72c876baff671dd97fb7fed69fc36.pdf", "author": "Yunfei Xie;Ce Zhou;Lang Gao;Juncheng Wu;Xianhang Li;Hong-Yu Zhou;Sheng Liu;Lei Xing;James Zou;Cihang Xie;Yuyin Zhou", "authorids": "~Yunfei_Xie1;~Ce_Zhou1;~Lang_Gao1;~Juncheng_Wu1;~Xianhang_Li1;~Hong-Yu_Zhou2;~Sheng_Liu2;~Lei_Xing1;~James_Zou1;~Cihang_Xie3;~Yuyin_Zhou1", "gender": "M;;M;M;M;;;M;;;", "homepage": "https://yunfeixie233.github.io/;;https://github.com/HeartyHaven;https://chtholly17.github.io/;https://xhl-video.github.io/xianhangli/;;https://shengliu66.github.io/;http://med.stanford.edu/xinglab.html;;;https://yuyinzhou.github.io/", "dblp": ";;223/7015;371/4972;268/5945;;;;;;192/1413", "google_scholar": ";;LzKcdl8AAAAJ;https://scholar.google.com/citations?hl=en;YKpFz4YAAAAJ;;rzhzR-cAAAAJ;;23ZXZvEAAAAJ;;eiqVLC0AAAAJ", "orcid": ";;0009-0004-8742-7708;;;;;;;;", "linkedin": ";;;;;;;;;;", "or_profile": "~Yunfei_Xie1;~Ce_Zhou1;~Lang_Gao1;~Juncheng_Wu1;~Xianhang_Li1;~Hong-Yu_Zhou2;~Sheng_Liu2;~Lei_Xing1;~James_Zou1;~Cihang_Xie3;~Yuyin_Zhou1", "aff": "Huazhong University of Science and Technology;;Huazhong University of Science and Technology;Tongji University;University of California, Santa Cruz;;Stanford University;Stanford University;Stanford University;;University of California, Santa Cruz", "aff_domain": "hust.edu.cn;;hust.edu.cn;tongji.edu.cn;ucsc.edu;;stanford.edu;stanford.edu;stanford.edu;;ucsc.edu", "position": "Undergrad student;;Undergrad student;Undergrad student;PhD student;;Postdoc;Professor, Dept of Radiation Oncology,;Assistant Professor;;Assistant Professor", "bibtex": "@misc{\nanonymous2024medtrinitym,\ntitle={MedTrinity-25M: A Large-scale Multimodal Dataset with Multigranular Annotations for Medicine},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=WVQ4Clw1VD}\n}", "github": "", "project": "", "reviewers": "hsbA;1AYs;VFKd", "site": "https://openreview.net/forum?id=WVQ4Clw1VD", "pdf_size": 3148667, "rating": "5;6;6", "confidence": "4;4;3", "wc_summary_and_contributions": "61;161;22", "wc_strengths": "27;17;71", "wc_improvement": "90;20;65", "wc_limitations": "7;1;1", "wc_correctness": "1;1;5", "wc_clarity": "2;1;7", "wc_relation_to_prior_work": "1;1;1", "wc_documentation": "1;1;1", "wc_additional_feedback": "1;1;1", "wc_review": "191;204;174", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "30;34;35", "reply_reviewers": "0;0;0", "reply_authors": "2;2;2", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 81.33333333333333, 58.53963519606941 ], "wc_strengths_avg": [ 38.333333333333336, 23.456816114345575 ], "wc_improvement_avg": [ 58.333333333333336, 28.963578661637943 ], "wc_limitations_avg": [ 3.0, 2.8284271247461903 ], "wc_correctness_avg": [ 2.3333333333333335, 1.8856180831641267 ], "wc_clarity_avg": [ 3.3333333333333335, 2.6246692913372702 ], "wc_relation_to_prior_work_avg": [ 1.0, 0.0 ], "wc_documentation_avg": [ 1.0, 0.0 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 189.66666666666666, 12.283683848458853 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 33.0, 2.160246899469287 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7724643438044109764&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;1;2;3;3;3;2", "aff_unique_norm": "Huazhong University of Science and Technology;Tongji University;University of California, Santa Cruz;Stanford University", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.hust.edu.cn;https://www.tongji.edu.cn;https://www.ucsc.edu;https://www.stanford.edu", "aff_unique_abbr": "HUST;Tongji;UCSC;Stanford", "aff_campus_unique_index": "1;2;2;2;1", "aff_campus_unique": ";Santa Cruz;Stanford", "aff_country_unique_index": "0;0;0;1;1;1;1;1", "aff_country_unique": "China;United States" }, { "title": "Disentangling Linear Quadratic Control with Untrusted ML Predictions", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94827", "id": "WXqukapoa7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=WXqukapoa7", "openreview": "https://openreview.net/forum?id=WXqukapoa7", "poster": "/media/PosterPDFs/NeurIPS%202024/94827.png?t=1732263758.2730558", "project": "", "author_site": "Tongxin Li, Hao Liu, Yisong Yue", "tldr": "", "abstract": "Uncertain perturbations in dynamical systems often arise from diverse resources, represented by latent components. The predictions for these components, typically generated by \"black-box\" machine learning tools, are prone to inaccuracies. To tackle this challenge, we introduce DISC, a novel policy that learns a confidence parameter online to harness the potential of accurate predictions while also mitigating the impact of erroneous forecasts. When predictions are precise, DISC leverages this information to achieve near-optimal performance. Conversely, in the case of significant prediction errors, it still has a worst-case competitive ratio guarantee. We provide competitive ratio bounds for DISC under both linear mixing of latent variables as well as a broader class of mixing functions. Our results highlight a first-of-its-kind \"best-of-both-worlds\" integration of machine-learned predictions, thus lead to a near-optimal consistency and robustness tradeoff, which provably improves what can be obtained without learning the confidence parameter. We validate the applicability of DISC across a spectrum of practical scenarios.", "keywords": "Linear Quadratic Control;Disentanglement;Competitive Analysis", "primary_area": "optimization", "supplementary_material": "", "author": "Tongxin Li;Hao Liu;Yisong Yue", "authorids": "~Tongxin_Li1;~Hao_Liu2;~Yisong_Yue1", "gender": "M;M;M", "homepage": "https://tongxin.me/;;http://www.yisongyue.com", "dblp": "140/7353;09/3214-15;28/1244", "google_scholar": "qyNc3CkAAAAJ;aXo4NLcAAAAJ;tEk4qo8AAAAJ", "orcid": ";;0000-0001-9127-1989", "linkedin": ";;yisongyue/", "or_profile": "~Tongxin_Li1;~Hao_Liu2;~Yisong_Yue1", "aff": "The Chinese University of Hong Kong, Shenzhen;California Institute of Technology;California Institute of Technology", "aff_domain": "cuhk.edu.cn;caltech.edu;caltech.edu", "position": "Assistant Professor;PhD student;Full Professor", "bibtex": "@inproceedings{\nli2024disentangling,\ntitle={Disentangling Linear Quadratic Control with Untrusted {ML} Predictions},\nauthor={Tongxin Li and Hao Liu and Yisong Yue},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=WXqukapoa7}\n}", "github": "", "reviewers": "4nx1;6dfr;FjAh;F6Jr", "pdf_size": 4157084, "rating": "5;6;6;7", "confidence": "3;3;3;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "71;213;178;152", "wc_strengths": "39;83;79;155", "wc_weaknesses": "102;67;57;191", "wc_questions": "72;126;35;583", "wc_limitations": "1;43;1;2", "wc_review": "285;532;350;1083", "wc_reply_reviewers": "58;9;0;101", "wc_reply_authors": "296;660;422;311", "reply_reviewers": "1;1;0;2", "reply_authors": "3;4;2;3", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 153.5, 52.31873469418006 ], "wc_strengths_avg": [ 89.0, 41.80908992073375 ], "wc_weaknesses_avg": [ 104.25, 52.79855585146245 ], "wc_questions_avg": [ 204.0, 221.19561478474205 ], "wc_limitations_avg": [ 11.75, 18.046814123273947 ], "wc_review_avg": [ 562.5, 313.8522741673222 ], "wc_reply_reviewers_avg": [ 42.0, 40.58940748520481 ], "wc_reply_authors_avg": [ 422.25, 145.63717760242403 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 3.0, 0.7071067811865476 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:kx7gyElo1A0J:scholar.google.com/&scioq=Disentangling+Linear+Quadratic+Control+with+Untrusted+ML+Predictions&hl=en&as_sdt=0,33", "gs_version_total": 0, "email": "cuhk.edu.cn;caltech.edu;caltech.edu", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Chinese University of Hong Kong;California Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.cuhk.edu.cn;https://www.caltech.edu", "aff_unique_abbr": "CUHK;Caltech", "aff_campus_unique_index": "0;1;1", "aff_campus_unique": "Shenzhen;Pasadena", "aff_country_unique_index": "0;1;1", "aff_country_unique": "China;United States" }, { "title": "Leveraging Visual Tokens for Extended Text Contexts in Multi-Modal Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94826", "id": "WY3xgXIZUR", "proceeding": "", "pdf": "https://openreview.net/pdf?id=WY3xgXIZUR", "openreview": "https://openreview.net/forum?id=WY3xgXIZUR", "poster": "/media/PosterPDFs/NeurIPS%202024/94826.png?t=1731390572.74173", "project": "", "author_site": "Alex Jinpeng Wang, Linjie Li, Yiqi Lin, Min Li, Lijuan Wang, Mike Zheng Shou", "tldr": "", "abstract": "Training models with longer in-context lengths is a significant challenge for multimodal machine learning due to substantial GPU memory and computational costs. This exploratory study does not present state-of-the-art models; rather, it introduces an innovative method designed to increase in-context text length in multi-modality large language models (MLLMs) efficiently. We present \\ModelFullName (\\ModelName), which processes long in-context text using visual tokens. This technique significantly reduces GPU memory usage and floating point operations (FLOPs). For instance, our method expands the pre-training in-context length from 256 to 2048 tokens with fewer FLOPs for a 56 billion parameter MOE model. Experimental results demonstrate that \\ModelName enhances OCR capabilities and delivers superior performance on common downstream benchmarks for in-context few-shot evaluation. Additionally, \\ModelName proves effective for long context inference, achieving results comparable to full text input while maintaining computational efficiency.", "keywords": "Multi-modality;In-context Learning;Vision-Language;Large Language Model", "primary_area": "machine_vision", "supplementary_material": "/attachment/dd3ceb4413b2ec9a6ac3f746dd03d538a7a00b50.zip", "author": "Alex Jinpeng Wang;Linjie Li;Yiqi Lin;Min Li;Lijuan Wang;Mike Zheng Shou", "authorids": "~Alex_Jinpeng_Wang1;~Linjie_Li1;~Yiqi_Lin1;~Min_Li10;~Lijuan_Wang1;~Mike_Zheng_Shou1", "gender": ";F;;F;F;", "homepage": ";;;http://bioinformatics.csu.edu.cn/limin/index_en.html;https://www.microsoft.com/en-us/research/people/lijuanw/;", "dblp": ";200/8256;;;51/2527.html;", "google_scholar": ";WR875gYAAAAJ;;w47WJE4AAAAJ;cDcWXuIAAAAJ;", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Alex_Jinpeng_Wang1;~Linjie_Li1;~Yiqi_Lin1;~Min_Li10;~Lijuan_Wang1;~Mike_Zheng_Shou1", "aff": ";Microsoft;;Central South University;Microsoft;", "aff_domain": ";microsoft.com;;csu.edu.cn;microsoft.com;", "position": ";Researcher;;Full Professor;Principal Researcher;", "bibtex": "@inproceedings{\nwang2024leveraging,\ntitle={Leveraging Visual Tokens for Extended Text Contexts in Multi-Modal Learning},\nauthor={Alex Jinpeng Wang and Linjie Li and Yiqi Lin and Min Li and Lijuan Wang and Mike Zheng Shou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=WY3xgXIZUR}\n}", "github": "", "reviewers": "iV3t;Vzob;QgLn;zUCF", "pdf_size": 3734990, "rating": "4;5;6;6", "confidence": "4;4;4;5", "soundness": "3;2;3;3", "novelty": "3;3;4;3", "presentation": "2;3;4;3", "wc_summary": "50;94;50;134", "wc_strengths": "37;28;52;93", "wc_weaknesses": "230;190;413;141", "wc_questions": "125;53;39;3", "wc_limitations": "5;5;53;78", "wc_review": "447;370;607;449", "wc_reply_reviewers": "97;210;130;152", "wc_reply_authors": "47;272;94;355", "reply_reviewers": "2;3;1;2", "reply_authors": "2;3;2;3", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 82.0, 34.9857113690718 ], "wc_strengths_avg": [ 52.5, 24.904818810824544 ], "wc_weaknesses_avg": [ 243.5, 102.81172112166978 ], "wc_questions_avg": [ 55.0, 44.339598554790726 ], "wc_limitations_avg": [ 35.25, 31.514877439076294 ], "wc_review_avg": [ 468.25, 86.20723577519465 ], "wc_reply_reviewers_avg": [ 147.25, 41.17872630376029 ], "wc_reply_authors_avg": [ 192.0, 126.09321948463366 ], "reply_reviewers_avg": [ 2.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11323404980766996721&as_sdt=20000005&sciodt=0,21&hl=en", "gs_version_total": 4, "email": ";microsoft.com;;csu.edu.cn;microsoft.com;", "author_num": 6, "aff_unique_index": "0;1;0", "aff_unique_norm": "Microsoft;Central South University", "aff_unique_dep": "Microsoft Corporation;", "aff_unique_url": "https://www.microsoft.com;https://www.csu.edu.cn", "aff_unique_abbr": "Microsoft;CSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;China" }, { "title": "I Don't Know: Explicit Modeling of Uncertainty with an [IDK] Token", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94825", "id": "Wc0vlQuoLb", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Wc0vlQuoLb", "openreview": "https://openreview.net/forum?id=Wc0vlQuoLb", "poster": "/media/PosterPDFs/NeurIPS%202024/94825.png?t=1731693567.8506894", "project": "", "author_site": "Roi Cohen, Konstantin Dobler, Eden Biran, Gerard de Melo", "tldr": "", "abstract": "Large Language Models are known to capture real-world knowledge, allowing them to excel in many downstream tasks. Despite recent advances, these models are still prone to what are commonly known as hallucinations, causing them to emit unwanted and factually incorrect text. In this work, we propose a novel calibration method that can be used to combat hallucinations. \nWe add a special [IDK] (\u201cI Don't Know\u201d) token to the model's vocabulary and introduce an objective function that shifts probability mass to the [IDK] token for incorrect predictions. \nThis approach allows the model to express uncertainty in its output explicitly. \nWe evaluate our proposed method across multiple model architectures and factual downstream tasks.\nWe find that models trained with our method are able to express uncertainty in places where they would previously make mistakes while suffering only a small loss of encoded knowledge. We further perform extensive ablation studies of multiple variations of our approach and provide a detailed analysis of the precision-recall tradeoff of our method.", "keywords": "LLMs;Factuality;Uncertainty;Factual Knowledge;Pretraining", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Roi Cohen;Konstantin Dobler;Eden Biran;Gerard de Melo", "authorids": "~Roi_Cohen1;~Konstantin_Dobler1;~Eden_Biran1;~Gerard_de_Melo3", "gender": "M;M;;M", "homepage": ";https://konstantindobler.me;;http://gerard.demelo.org/", "dblp": ";314/6525;353/0544;86/1747", "google_scholar": ";fJEat40AAAAJ;Ko1JjkcAAAAJ;https://scholar.google.com.tw/citations?user=WCQXaGkAAAAJ", "orcid": ";;;0000-0002-2930-2059", "linkedin": "roicohen9/;;;gdemelo/", "or_profile": "~Roi_Cohen1;~Konstantin_Dobler1;~Eden_Biran1;~Gerard_Melo1", "aff": "Hasso Plattner Institute;InstaDeep;Tel Aviv University;University of Potsdam", "aff_domain": "hpi.de;instadeep.com;mail.tau.ac.il;uni-potsdam.de", "position": "PhD student;Intern;MS student;Full Professor", "bibtex": "@inproceedings{\ncohen2024i,\ntitle={I Don't Know: Explicit Modeling of Uncertainty with an [{IDK}] Token},\nauthor={Roi Cohen and Konstantin Dobler and Eden Biran and Gerard de Melo},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Wc0vlQuoLb}\n}", "github": "", "reviewers": "4r2S;aDMj;i3GV;Ny7r", "pdf_size": 616069, "rating": "3;4;6;7", "confidence": "4;4;4;4", "soundness": "2;2;3;3", "novelty": "2;2;3;3", "presentation": "3;3;3;4", "wc_summary": "18;54;132;88", "wc_strengths": "39;72;39;164", "wc_weaknesses": "372;78;102;90", "wc_questions": "49;2;1;65", "wc_limitations": "6;56;1;37", "wc_review": "484;262;275;444", "wc_reply_reviewers": "521;0;21;14", "wc_reply_authors": "360;0;0;0", "reply_reviewers": "2;0;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.0, 1.5811388300841898 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 73.0, 42.1070065428546 ], "wc_strengths_avg": [ 78.5, 51.16883817324759 ], "wc_weaknesses_avg": [ 160.5, 122.40404405084008 ], "wc_questions_avg": [ 29.25, 28.32291475113393 ], "wc_limitations_avg": [ 25.0, 22.594247055390007 ], "wc_review_avg": [ 366.25, 98.87460492967848 ], "wc_reply_reviewers_avg": [ 139.0, 220.67736630656077 ], "wc_reply_authors_avg": [ 90.0, 155.88457268119896 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2673240016956428048&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "hpi.de;instadeep.com;mail.tau.ac.il;uni-potsdam.de", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Hasso Plattner Institute;InstaDeep;Tel Aviv University;University of Potsdam", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.hpi.de;https://www.instadeep.com;https://www.tau.ac.il;https://www.uni-potsdam.de", "aff_unique_abbr": "HPI;InstaDeep;TAU;UP", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;0", "aff_country_unique": "Germany;United Kingdom;Israel" }, { "title": "Spiking Transformer with Experts Mixture", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94824", "id": "WcIeEtY3AG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=WcIeEtY3AG", "openreview": "https://openreview.net/forum?id=WcIeEtY3AG", "poster": "", "project": "", "author_site": "Zhaokun Zhou, Yijie Lu, Yanhao Jia, Kaiwei Che, Jun Niu, Liwei Huang, Xinyu Shi, Yuesheng Zhu, Guoqi Li, Zhaofei Yu, Li Yuan", "tldr": "", "abstract": "Spiking Neural Networks (SNNs) provide a sparse spike-driven mechanism which is believed to be critical for energy-efficient deep learning. \nMixture-of-Experts (MoE), on the other side, aligns with the brain mechanism of distributed and sparse processing, resulting in an efficient way of enhancing model capacity and conditional computation. \nIn this work, we consider how to incorporate SNNs\u2019 spike-driven and MoE\u2019s conditional computation into a unified framework. \nHowever, MoE uses softmax to get the dense conditional weights for each expert and TopK to hard-sparsify the network, which does not fit the properties of SNNs. \nTo address this issue, we reformulate MoE in SNNs and introduce the Spiking Experts Mixture Mechanism (SEMM) from the perspective of sparse spiking activation. \nBoth the experts and the router output spiking sequences, and their element-wise operation makes SEMM computation spike-driven and dynamic sparse-conditional. \nBy developing SEMM into Spiking Transformer, the Experts Mixture Spiking Attention (EMSA) and the Experts Mixture Spiking Perceptron (EMSP) are proposed, which performs routing allocation for head-wise and channel-wise spiking experts, respectively. Experiments show that SEMM realizes sparse conditional computation and obtains a stable improvement on neuromorphic and static datasets with approximate computational overhead based on the Spiking Transformer baselines.", "keywords": "Spiking Neural Networks; Transformer; Mixture of Experts; Neuromorphic Computing; Spike-driven;", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "/attachment/d08d5e02624c26d3cdb1036f1cf618f8df66c85f.zip", "author": "Zhaokun Zhou;Yijie Lu;Yanhao Jia;Kaiwei Che;Jun Niu;Liwei Huang;Xinyu Shi;Yuesheng Zhu;Guoqi Li;Zhaofei Yu;Li Yuan", "authorids": "~Zhaokun_Zhou1;~Yijie_Lu1;~Yanhao_Jia1;~Kaiwei_Che2;~Jun_Niu3;~Liwei_Huang1;~Xinyu_Shi2;~Yuesheng_Zhu3;~Guoqi_Li1;~Zhaofei_Yu1;~Li_Yuan2", "gender": "M;M;M;M;M;M;;;M;M;", "homepage": ";https://github.com/road-small-flower;https://github.com/CuriseJia;;;https://grasshlw.github.io/;;;https://scholar.google.com/citations?hl=en&user=qCfE--MAAAAJ;https://yuzhaofei.github.io;", "dblp": ";;374/5327;299/1348;;27/10780;;;;166/0573;", "google_scholar": ";;jQx7NpQAAAAJ;https://scholar.google.com/citations?hl=zh-CN;;;;;https://scholar.google.com/citations?hl=en;qaUgD50AAAAJ;", "orcid": "0000-0003-4454-6630;;0009-0002-6966-1512;0000-0002-1239-1905;;;;;;;", "linkedin": ";;;;nigel-new-86a179283/;;;;;;", "or_profile": "~Zhaokun_Zhou1;~Yijie_Lu1;~Yanhao_Jia1;~Kaiwei_Che2;~Jun_Niu3;~Liwei_Huang1;~Xinyu_Shi2;~Yuesheng_Zhu3;~Guoqi_Li1;~Zhaofei_Yu1;~Li_Yuan2", "aff": "Peking University;Peking University;University of Chinese Academy of Sciences;Peking University;Peking University;Peking University;;;Institute of Automation, Chinese Academy of Sciences;Peking University;", "aff_domain": "pku.edu.cn;stu.pku.edu.cn;ucas.ac.cn;pku.edu.cn;stu.pku.edu.cn;pku.edu.cn;;;ia.ac.cn;pku.edu.cn;", "position": "PhD student;Undergrad student;MS student;PhD student;MS student;PhD student;;;Full Professor;Assistant Professor;", "bibtex": "@inproceedings{\nzhou2024spiking,\ntitle={Spiking Transformer with Experts Mixture},\nauthor={Zhaokun Zhou and Yijie Lu and Yanhao Jia and Kaiwei Che and Jun Niu and Liwei Huang and Xinyu Shi and Yuesheng Zhu and Guoqi Li and Zhaofei Yu and Li Yuan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=WcIeEtY3AG}\n}", "github": "", "reviewers": "RYQd;dEFi;eZ3Z;rWD5", "pdf_size": 9807526, "rating": "3;3;7;7", "confidence": "5;5;5;4", "soundness": "3;2;3;2", "novelty": "2;2;3;3", "presentation": "2;3;3;3", "wc_summary": "100;73;89;53", "wc_strengths": "83;41;93;46", "wc_weaknesses": "56;278;120;126", "wc_questions": "41;2;47;79", "wc_limitations": "118;2;44;1", "wc_review": "398;396;393;305", "wc_reply_reviewers": "221;251;67;10", "wc_reply_authors": "393;152;14;13", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 5.0, 2.0 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 78.75, 17.69710428290459 ], "wc_strengths_avg": [ 65.75, 22.598395960775623 ], "wc_weaknesses_avg": [ 145.0, 81.54140052758476 ], "wc_questions_avg": [ 42.25, 27.36215452043205 ], "wc_limitations_avg": [ 41.25, 47.58873291021731 ], "wc_review_avg": [ 373.0, 39.30012722625717 ], "wc_reply_reviewers_avg": [ 137.25, 101.34193357144909 ], "wc_reply_authors_avg": [ 143.0, 155.01774092019275 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2550932396837934365&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 0, "email": "pku.edu.cn;stu.pku.edu.cn;ucas.ac.cn;pku.edu.cn;stu.pku.edu.cn;pku.edu.cn;;;ia.ac.cn;pku.edu.cn;", "author_num": 11, "aff_unique_index": "0;0;1;0;0;0;2;0", "aff_unique_norm": "Peking University;University of Chinese Academy of Sciences;Chinese Academy of Sciences", "aff_unique_dep": ";;Institute of Automation", "aff_unique_url": "http://www.pku.edu.cn;http://www.ucas.ac.cn;http://www.ia.cas.cn", "aff_unique_abbr": "Peking U;UCAS;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Boosting Graph Pooling with Persistent Homology", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94823", "id": "WcmqdY2AKu", "proceeding": "", "pdf": "https://openreview.net/pdf?id=WcmqdY2AKu", "openreview": "https://openreview.net/forum?id=WcmqdY2AKu", "poster": "/media/PosterPDFs/NeurIPS%202024/94823.png?t=1731584621.635779", "project": "", "author_site": "Chaolong Ying, Xinjian Zhao, Tianshu Yu", "tldr": "", "abstract": "Recently, there has been an emerging trend to integrate persistent homology (PH) into graph neural networks (GNNs) to enrich expressive power. However, naively plugging PH features into GNN layers always results in marginal improvement with low interpretability. In this paper, we investigate a novel mechanism for injecting global topological invariance into pooling layers using PH, motivated by the observation that filtration operation in PH naturally aligns graph pooling in a cut-off manner. In this fashion, message passing in the coarsened graph acts along persistent pooled topology, leading to improved performance. Experimentally, we apply our mechanism to a collection of graph pooling methods and observe consistent and substantial performance gain over several popular datasets, demonstrating its wide applicability and flexibility.", "keywords": "graph pooling;persistent homology;graph learning", "primary_area": "graph_neural_networks", "supplementary_material": "/attachment/cec34d19d2ff9db077da662ad404b0eb1bd2c81e.zip", "author": "Chaolong Ying;Xinjian Zhao;Tianshu Yu", "authorids": "~Chaolong_Ying1;~Xinjian_Zhao2;~Tianshu_Yu2", "gender": "M;M;M", "homepage": "https://sds.cuhk.edu.cn/node/708;https://sduzxj.github.io//;https://mypage.cuhk.edu.cn/academics/yutianshu/", "dblp": "295/3323;02/8613;152/6675", "google_scholar": "1-O3158AAAAJ;iKuIMsgAAAAJ;MTHO7DsAAAAJ", "orcid": "0000-0002-8555-8817;0009-0003-1553-8209;0000-0002-6537-1924", "linkedin": ";;", "or_profile": "~Chaolong_Ying1;~Xinjian_Zhao2;~Tianshu_Yu2", "aff": "The Chinese University of Hong Kong, Shenzhen;Chinese University of Hong Kong (Shenzhen);Chinese University of Hong Kong (Shenzhen)", "aff_domain": "cuhk.edu.cn;cuhk.edu.cn;cuhk.edu.cn", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nying2024boosting,\ntitle={Boosting Graph Pooling with Persistent Homology},\nauthor={Chaolong Ying and Xinjian Zhao and Tianshu Yu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=WcmqdY2AKu}\n}", "github": "", "reviewers": "fBuZ;HRv2;3dRY;k5BH;TU8V", "pdf_size": 1333897, "rating": "4;6;6;7;8", "confidence": "4;2;4;5;4", "soundness": "2;2;2;3;3", "novelty": "2;2;2;3;4", "presentation": "2;2;3;2;4", "wc_summary": "64;55;76;71;179", "wc_strengths": "29;25;60;45;259", "wc_weaknesses": "162;156;129;16;407", "wc_questions": "229;2;49;48;214", "wc_limitations": "20;6;6;36;15", "wc_review": "504;244;320;216;1074", "wc_reply_reviewers": "529;348;161;47;336", "wc_reply_authors": "851;694;426;46;968", "reply_reviewers": "1;2;2;1;2", "reply_authors": "3;3;4;2;3", "rating_avg": [ 6.2, 1.32664991614216 ], "confidence_avg": [ 3.8, 0.9797958971132712 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.8 ], "presentation_avg": [ 2.6, 0.8 ], "wc_summary_avg": [ 89.0, 45.54997255762071 ], "wc_strengths_avg": [ 83.6, 88.57448842640865 ], "wc_weaknesses_avg": [ 174.0, 127.86399023962923 ], "wc_questions_avg": [ 108.4, 94.01404150444763 ], "wc_limitations_avg": [ 16.6, 11.092339699089639 ], "wc_review_avg": [ 471.6, 317.5062834023919 ], "wc_reply_reviewers_avg": [ 284.2, 166.20637773563323 ], "wc_reply_authors_avg": [ 597.0, 329.97212003440535 ], "reply_reviewers_avg": [ 1.6, 0.4898979485566356 ], "reply_authors_avg": [ 3.0, 0.6324555320336759 ], "replies_avg": [ 31, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.1846372364689991, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4239755033182166344&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 5, "email": "cuhk.edu.cn;cuhk.edu.cn;cuhk.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Chinese University of Hong Kong", "aff_unique_dep": "", "aff_unique_url": "https://www.cuhk.edu.cn", "aff_unique_abbr": "CUHK", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Shenzhen", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Aligning Diffusion Behaviors with Q-functions for Efficient Continuous Control", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94822", "id": "Wd1DFLUp1M", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Wd1DFLUp1M", "openreview": "https://openreview.net/forum?id=Wd1DFLUp1M", "poster": "/media/PosterPDFs/NeurIPS%202024/94822.png?t=1733016063.0687904", "project": "", "author_site": "Huayu Chen, Kaiwen Zheng, Hang Su, Jun Zhu", "tldr": "", "abstract": "Drawing upon recent advances in language model alignment, we formulate offline Reinforcement Learning as a two-stage optimization problem: First pretraining expressive generative policies on reward-free behavior datasets, then finetuning these policies to align with task-specific annotations like Q-values. This strategy allows us to leverage abundant and diverse behavior data to enhance generalization and enable rapid adaptation to downstream tasks using minimal annotations. In particular, we introduce Efficient Diffusion Alignment (EDA) for solving continuous control problems. EDA utilizes diffusion models for behavior modeling. However, unlike previous approaches, we represent diffusion policies as the derivative of a scalar neural network with respect to action inputs. This representation is critical because it enables direct density calculation for diffusion models, making them compatible with existing LLM alignment theories. During policy fine-tuning, we extend preference-based alignment methods like Direct Preference Optimization (DPO) to align diffusion behaviors with continuous Q-functions. Our evaluation on the D4RL benchmark shows that EDA exceeds all baseline methods in overall performance. Notably, EDA maintains about 95\\% of performance and still outperforms several baselines given only 1\\% of Q-labelled data during fine-tuning.", "keywords": "diffusion;offline;RL;alignment;continuous control;DPO", "primary_area": "robotics", "supplementary_material": "/attachment/fa87c729eb34bf611056732a4277e0245b8dca0a.zip", "author": "Huayu Chen;Kaiwen Zheng;Hang Su;Jun Zhu", "authorids": "~Huayu_Chen1;~Kaiwen_Zheng2;~Hang_Su3;~Jun_Zhu2", "gender": "M;M;M;M", "homepage": "https://chendrag.github.io/;https://zhengkw18.github.io/;http://ml.cs.tsinghua.edu.cn/~jun;", "dblp": "259/3113;;50/2644-1;26/5371-6", "google_scholar": "0FBCHc4AAAAJ;0d80xSIAAAAJ;axsP38wAAAAJ;dxN1_X0AAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Huayu_Chen1;~Kaiwen_Zheng2;~Jun_Zhu2;~Hang_Su2", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;cs.tsinghua.edu.cn;mail.tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;MS student;Professor;Associate Professor", "bibtex": "@inproceedings{\nchen2024aligning,\ntitle={Aligning Diffusion Behaviors with Q-functions for Efficient Continuous Control},\nauthor={Huayu Chen and Kaiwen Zheng and Hang Su and Jun Zhu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Wd1DFLUp1M}\n}", "github": "", "reviewers": "2HKR;FQV2;y7D4;jAma", "pdf_size": 4228021, "rating": "6;7;7;7", "confidence": "2;5;4;3", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "2;3;2;3", "wc_summary": "170;42;74;110", "wc_strengths": "49;124;57;18", "wc_weaknesses": "110;62;232;69", "wc_questions": "235;30;45;106", "wc_limitations": "1;24;64;35", "wc_review": "565;282;472;338", "wc_reply_reviewers": "24;176;23;11", "wc_reply_authors": "13;221;22;13", "reply_reviewers": "1;2;1;1", "reply_authors": "2;3;2;2", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 99.0, 47.52893855326458 ], "wc_strengths_avg": [ 62.0, 38.6458277178792 ], "wc_weaknesses_avg": [ 118.25, 68.18495068561684 ], "wc_questions_avg": [ 104.0, 80.81150908131836 ], "wc_limitations_avg": [ 31.0, 22.66053838724932 ], "wc_review_avg": [ 414.25, 111.0908974668942 ], "wc_reply_reviewers_avg": [ 58.5, 68.0312428226914 ], "wc_reply_authors_avg": [ 67.25, 88.84361260101933 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.7745966692414834, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6798839028951701206&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 3, "email": "tsinghua.edu.cn;cs.tsinghua.edu.cn;mail.tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "WdA5H9ARaa", "title": "Benchmark Inflation: Revealing LLM Performance Gaps Using Retro-Holdouts", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "Public benchmarks are compromised, as the training data for many Large Language Models (LLMs) is contaminated with test data, suggesting a performance gap between benchmark scores and actual capabilities. Ideally, a private holdout set could be used to accurately verify scores. Unfortunately, such datasets do not exist for most benchmarks, and post-hoc construction of sufficiently similar datasets is non-trivial. To address these issues, we introduce a systematic methodology for (i) retrospectively constructing a holdout dataset for a target dataset, (ii) demonstrating the statistical indistinguishability of this retro-holdout dataset, and (iii) comparing LLMs on the two datasets to quantify the performance gap due to the dataset's public availability. Applying these methods to TruthfulQA, we construct and release Retro-TruthfulQA, on which we evaluate twenty LLMs and find that some have inflated scores by as much as 16 percentage points. Our results demonstrate that public benchmark scores do not always accurately assess model properties, and underscore the importance of improved data practices in the field.", "keywords": "Large Language Models;Benchmark;Evaluations;Datasets;TruthfulQA", "primary_area": "", "supplementary_material": "/attachment/c8d3b8616f76c1a7576535f337b20213fddea814.pdf", "author": "Jacob Haimes;Cenny Wenner;Kunvar Thaman;Vassil Tashev;Clement Neo;Esben Kran;Jason Hoelscher-Obermaier", "authorids": "~Jacob_Haimes1;~Cenny_Wenner1;~Kunvar_Thaman1;~Vassil_Tashev1;~Clement_Neo1;~Esben_Kran1;~Jason_Hoelscher-Obermaier1", "gender": "M;M;M;M;;M;M", "homepage": "https://jacob-haimes.github.io;;https://kunvarthaman.com;;https://clementneo.com;https://apartresearch.com;https://apartresearch.com/", "dblp": ";;;;367/9292;;", "google_scholar": "F2BtIR0AAAAJ;;;;Y2-g_2cAAAAJ;SH5diRUAAAAJ;https://scholar.google.at/citations?user=FKrb_FwAAAAJ", "orcid": ";;;;;0000-0003-0710-2635;0000-0003-4773-9059", "linkedin": "jacob-haimes/;;;vassil-t-250510195/;;esbenkc/;jas-ho/", "or_profile": "~Jacob_Haimes1;~Cenny_Wenner1;~Kunvar_Thaman1;~Vassil_Tashev1;~Clement_Neo1;~Esben_Kran1;~Jason_Hoelscher-Obermaier1", "aff": "Apart Research;;BITS Pilani, Birla Institute of Technology and Science;Simon Fraser University;Nanyang Technological University;Apart Research;Apart Research", "aff_domain": "apartresearch.com;;pilani.bits-pilani.ac.in;sfu.ca;ntu.edu.sg;apartresearch.com;apartresearch.com", "position": "Researcher;;Researcher;Researcher;Undergrad student;Researcher;Principal Researcher", "bibtex": "@misc{\nanonymous2024benchmark,\ntitle={Benchmark Inflation: Revealing {LLM} Performance Gaps Using Retro-Holdouts},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=WdA5H9ARaa}\n}", "github": "", "project": "", "reviewers": "1HP3;hZPp;dLqB;sHLe", "site": "https://openreview.net/forum?id=WdA5H9ARaa", "pdf_size": 520429, "rating": "4;5;6;6", "confidence": "4;3;4;3", "wc_summary_and_contributions": "51;53;47;80", "wc_strengths": "21;43;90;38", "wc_improvement": "34;109;48;634", "wc_limitations": "54;1;21;53", "wc_correctness": "1;1;42;16", "wc_clarity": "9;1;28;12", "wc_relation_to_prior_work": "1;1;53;15", "wc_documentation": "11;1;12;59", "wc_additional_feedback": "1;1;1;1", "wc_review": "183;211;342;908", "wc_reply_reviewers": "41;36;45;26", "wc_reply_authors": "324;396;64;804", "reply_reviewers": "1;1;1;1", "reply_authors": "3;3;2;4", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "wc_summary_and_contributions_avg": [ 57.75, 13.026415470113028 ], "wc_strengths_avg": [ 48.0, 25.583197610932064 ], "wc_improvement_avg": [ 206.25, 248.5662638010235 ], "wc_limitations_avg": [ 32.25, 22.398381637966615 ], "wc_correctness_avg": [ 15.0, 16.748134224444225 ], "wc_clarity_avg": [ 12.5, 9.810708435174291 ], "wc_relation_to_prior_work_avg": [ 17.5, 21.277922830953212 ], "wc_documentation_avg": [ 20.75, 22.498611068241523 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 411.0, 293.1526905897335 ], "wc_reply_reviewers_avg": [ 37.0, 7.106335201775948 ], "wc_reply_authors_avg": [ 397.0, 265.45621107821154 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.0, 0.7071067811865476 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3469194905074290646&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;2;3;0;0", "aff_unique_norm": "Apart Research;Birla Institute of Technology and Science;Simon Fraser University;Nanyang Technological University", "aff_unique_dep": ";;;", "aff_unique_url": ";https://www.bits-pilani.ac.in;https://www.sfu.ca;https://www.ntu.edu.sg", "aff_unique_abbr": ";BITS Pilani;SFU;NTU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pilani", "aff_country_unique_index": "1;2;3", "aff_country_unique": ";India;Canada;Singapore" }, { "title": "Learning to be Smooth: An End-to-End Differentiable Particle Smoother", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94821", "id": "WdMhbqCoqW", "proceeding": "", "pdf": "https://openreview.net/pdf?id=WdMhbqCoqW", "openreview": "https://openreview.net/forum?id=WdMhbqCoqW", "poster": "", "project": "", "author_site": "Ali Younis, Erik Sudderth", "tldr": "", "abstract": "For challenging state estimation problems arising in domains like vision and robotics, particle-based representations attractively enable temporal reasoning about multiple posterior modes. Particle smoothers offer the potential for more accurate offline data analysis by propagating information both forward and backward in time, but have classically required human-engineered dynamics and observation models. Extending recent advances in discriminative training of particle filters, we develop a framework for low-variance propagation of gradients across long time sequences when training particle smoothers. Our \"two-filter\" smoother integrates particle streams that are propagated forward and backward in time, while incorporating stratification and importance weights in the resampling step to provide low-variance gradient estimates for neural network dynamics and observation models. The resulting mixture density particle smoother is substantially more accurate than state-of-the-art particle filters, as well as search-based baselines, for city-scale global vehicle localization from real-world videos and maps.", "keywords": "particle;filter;smoother;non-parametric;generative;discriminative;multi-modal;belief propagation;monte carlo;sequential;kernel density estimation;mixture;importance sampling;localization;autonomous driving;smoothing;filtering", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Ali Younis;Erik B. Sudderth", "authorids": "~Ali_Younis1;~Erik_B._Sudderth2", "gender": ";", "homepage": ";", "dblp": ";", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "~Ali_Younis1;~Erik_B._Sudderth2", "aff": ";", "aff_domain": ";", "position": ";", "bibtex": "@inproceedings{\nyounis2024learning,\ntitle={Learning to be Smooth: An End-to-End Differentiable Particle Smoother},\nauthor={Ali Younis and Erik B. Sudderth},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=WdMhbqCoqW}\n}", "github": "", "reviewers": "fbEx;tGtz;56kY;qoBM", "pdf_size": 11973495, "rating": "3;3;7;7", "confidence": "3;4;4;4", "soundness": "1;2;3;3", "novelty": "1;2;3;4", "presentation": "1;3;2;3", "wc_summary": "31;39;110;112", "wc_strengths": "22;17;124;111", "wc_weaknesses": "285;613;271;140", "wc_questions": "11;69;125;1", "wc_limitations": "16;37;44;35", "wc_review": "365;775;674;399", "wc_reply_reviewers": "492;66;292;113", "wc_reply_authors": "554;0;0;557", "reply_reviewers": "3;1;1;1", "reply_authors": "3;1;1;2", "rating_avg": [ 5.0, 2.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "novelty_avg": [ 2.5, 1.118033988749895 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 73.0, 38.11167800031901 ], "wc_strengths_avg": [ 68.5, 49.24682730897494 ], "wc_weaknesses_avg": [ 327.25, 174.4023724035886 ], "wc_questions_avg": [ 51.5, 49.746859197340285 ], "wc_limitations_avg": [ 33.0, 10.36822067666386 ], "wc_review_avg": [ 553.25, 175.34590813589008 ], "wc_reply_reviewers_avg": [ 240.75, 167.78762618262408 ], "wc_reply_authors_avg": [ 277.75, 277.752025195137 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:vH4t3ebMI0kJ:scholar.google.com/&scioq=Learning+to+be+Smooth:+An+End-to-End+Differentiable+Particle+Smoother&hl=en&as_sdt=0,33", "gs_version_total": 2, "email": ";", "author_num": 2 }, { "title": "OMG-LLaVA: Bridging Image-level, Object-level, Pixel-level Reasoning and Understanding", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94820", "id": "WeoNd6PRqS", "proceeding": "", "pdf": "https://openreview.net/pdf?id=WeoNd6PRqS", "openreview": "https://openreview.net/forum?id=WeoNd6PRqS", "poster": "/media/PosterPDFs/NeurIPS%202024/94820.png?t=1731079556.410217", "project": "", "author_site": "Tao Zhang, Xiangtai Li, Hao Fei, Haobo Yuan, Shengqiong Wu, Shunping Ji, Chen Change Loy, Shuicheng Yan", "tldr": "", "abstract": "Current universal segmentation methods demonstrate strong capabilities in pixel-level image and video understanding. However, they lack reasoning abilities and cannot be controlled via text instructions. In contrast, large vision-language multimodal models exhibit powerful vision-based conversation and reasoning capabilities but lack pixel-level understanding and have difficulty accepting visual prompts for flexible user interaction. This paper proposes OMG-LLaVA, a new and elegant framework combining powerful pixel-level vision understanding with reasoning abilities. It can accept various visual and text prompts for flexible user interaction. Specifically, we use a universal segmentation method as the visual encoder, integrating image information, perception priors, and visual prompts into visual tokens provided to the LLM. The LLM is responsible for understanding the user's text instructions and providing text responses and pixel-level segmentation results based on the visual information. We propose perception prior embedding to better integrate perception priors with image features. OMG-LLaVA achieves image-level, object-level, and pixel-level reasoning and understanding in a single model, matching or surpassing the performance of specialized methods on multiple benchmarks. Rather than using LLM to connect each specialist, our work aims at end-to-end training on one encoder, one decoder, and one LLM. The code and model have been released for further research.", "keywords": "multi-modal modeling;universal model", "primary_area": "machine_vision", "supplementary_material": "", "author": "Tao Zhang;Xiangtai Li;Hao Fei;Haobo Yuan;Shengqiong Wu;Shunping Ji;Chen Change Loy;Shuicheng YAN", "authorids": "~Tao_Zhang22;~Xiangtai_Li1;~Hao_Fei1;~Haobo_Yuan1;~Shengqiong_Wu2;~Shunping_Ji1;~Chen_Change_Loy2;~Shuicheng_YAN3", "gender": "M;;M;;F;M;M;M", "homepage": "https://github.com/zhang-tao-whu;;https://haofei.vip/;;https://chocowu.github.io/;http://gpcv.whu.edu.cn/;https://www.mmlab-ntu.com/person/ccloy/index.html;https://yanshuicheng.ai/", "dblp": ";;81/3569-1;;274/7191;123/0960;01/5855;y/ShuichengYan", "google_scholar": "3xu4a5oAAAAJ;;YGDX46AAAAAJ;;RJJLKR0AAAAJ;https://scholar.google.com.hk/citations?user=FjoRmF4AAAAJ;https://scholar.google.co.uk/citations?user=559LF80AAAAJ;https://scholar.google.com.hk/citations?user=DNuiPHwAAAAJ", "orcid": "0000-0001-7390-2409;;0000-0003-3026-6347;;0000-0001-6192-1194;;0000-0001-5345-1591;", "linkedin": ";;;;;;;", "or_profile": "~Tao_Zhang22;~Xiangtai_Li1;~Hao_Fei1;~Haobo_Yuan1;~Shengqiong_Wu2;~Shunping_Ji1;~Chen_Change_Loy2;~Shuicheng_YAN3", "aff": "Wuhan University;;National University of Singapore;;National University of Singapore;Wuhan University;Nanyang Technological University;sea Group", "aff_domain": "whu.edu.cn;;nus.edu.sg;;u.nus.edu;whu.edu.cn;ntu.edu.sg;sea.com", "position": "PhD student;;Postdoc;;PhD student;Full Professor;Full Professor;Researcher", "bibtex": "@inproceedings{\nzhang2024omgllava,\ntitle={{OMG}-{LL}a{VA}: Bridging Image-level, Object-level, Pixel-level Reasoning and Understanding},\nauthor={Tao Zhang and Xiangtai Li and Hao Fei and Haobo Yuan and Shengqiong Wu and Shunping Ji and Chen Change Loy and Shuicheng YAN},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=WeoNd6PRqS}\n}", "github": "", "reviewers": "ck7P;czBJ;Timw;UauW", "pdf_size": 0, "rating": "4;5;7;8", "confidence": "5;4;4;5", "soundness": "3;2;3;4", "novelty": "2;2;3;4", "presentation": "3;2;3;3", "wc_summary": "44;89;55;47", "wc_strengths": "49;54;107;120", "wc_weaknesses": "188;201;61;147", "wc_questions": "43;41;46;14", "wc_limitations": "2;27;36;3", "wc_review": "326;412;305;331", "wc_reply_reviewers": "430;47;22;177", "wc_reply_authors": "751;80;0;26", "reply_reviewers": "2;1;1;1", "reply_authors": "5;3;1;2", "rating_avg": [ 6.0, 1.5811388300841898 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 58.75, 17.92170471802278 ], "wc_strengths_avg": [ 82.5, 31.388692231439016 ], "wc_weaknesses_avg": [ 149.25, 54.71003107292117 ], "wc_questions_avg": [ 36.0, 12.82575533838066 ], "wc_limitations_avg": [ 17.0, 14.849242404917497 ], "wc_review_avg": [ 343.5, 40.73389252207552 ], "wc_reply_reviewers_avg": [ 169.0, 161.7698983123869 ], "wc_reply_authors_avg": [ 214.25, 311.23333288708005 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 1.479019945774904 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 47, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16451185086731631984&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "whu.edu.cn;;nus.edu.sg;;u.nus.edu;whu.edu.cn;ntu.edu.sg;sea.com", "author_num": 8, "aff_unique_index": "0;1;1;0;2;3", "aff_unique_norm": "Wuhan University;National University of Singapore;Nanyang Technological University;Sea Group", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.whu.edu.cn/;https://www.nus.edu.sg;https://www.ntu.edu.sg;", "aff_unique_abbr": "WHU;NUS;NTU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0;1", "aff_country_unique": "China;Singapore;" }, { "title": "SolarCube: An Integrative Benchmark Dataset Harnessing Satellite and In-situ Observations for Large-scale Solar Energy Forecasting", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97653", "id": "WffhOhYvZ0", "proceeding": "", "pdf": "https://openreview.net/pdf?id=WffhOhYvZ0", "openreview": "https://openreview.net/forum?id=WffhOhYvZ0", "poster": "/media/PosterPDFs/NeurIPS%202024/97653.png?t=1733510124.2006702", "project": "", "author_site": "Ruohan Li, Yiqun Xie, Xiaowei Jia, Dongdong Wang, Yanhua Li, Yingxue Zhang, Zhihao Wang, Zhili Li", "tldr": "", "abstract": "Solar power is a critical source of renewable energy, offering significant potential to lower greenhouse gas emissions and mitigate climate change. However, the cloud induced-variability of solar radiation reaching the earth\u2019s surface presents a challenge for integrating solar power into the grid (e.g., storage and backup management). The new generation of geostationary satellites such as GOES-16 has become an important data source for large-scale and high temporal frequency solar radiation forecasting. However, no machine-learning-ready dataset has integrated geostationary satellite data with fine-grained solar radiation information to support forecasting model development and benchmarking with consistent metrics. We present SolarCube, a new ML-ready benchmark dataset for solar radiation forecasting. SolarCube covers 19 study areas distributed over multiple continents: North America, South America, Asia, and Oceania. The dataset supports short (i.e., 30 minutes to 6 hours) and long-term (i.e., day-ahead or longer) solar radiation forecasting at both point-level (i.e., specific locations of monitoring stations) and area-level, by processing and integrating data from multiple sources, including geostationary satellite images, physics-derived solar radiation, and ground station observations from different monitoring networks over the globe. We also evaluated a set of forecasting models for point- and image-based time-series data to develop performance benchmarks under different testing scenarios. The dataset is available at https://doi.org/10.5281/zenodo.11498739. A Python library is available to conveniently generate different variations of the dataset based on user needs, along with baseline models at https://github.com/Ruohan-Li/SolarCube.", "keywords": "solar energy;renewable energy;forecasting;satellite", "primary_area": "", "supplementary_material": "/attachment/524dce6573659f2fd571d568436ab4a99ac24a79.pdf", "author": "Ruohan Li;Yiqun Xie;Xiaowei Jia;Dongdong Wang;Yanhua Li;Yingxue Zhang;Zhihao Wang;Zhili Li", "authorids": "~Ruohan_Li1;~Yiqun_Xie2;~Xiaowei_Jia1;~Dongdong_Wang3;~Yanhua_Li1;~Yingxue_Zhang3;~Zhihao_Wang9;~Zhili_Li1", "gender": "F;;M;;M;F;M;M", "homepage": "https://ruohan-li.github.io/;http://www.terpconnect.umd.edu/~xie/;https://www.pitt.edu/~xiaowei;;http://www.wpi.edu/~yli15/;https://yingxuezhang.com/;https://zhwang0.github.io/;", "dblp": ";131/2110;118/5304;;;174/0010-2;;81/10616", "google_scholar": "-2TkF1MAAAAJ;s-XyB2wAAAAJ;mIvajOgAAAAJ;eF1cEzkAAAAJ;https://scholar.google.com.tw/citations?user=ICOWtt0AAAAJ;8rVes9sAAAAJ;https://scholar.google.com/citations?hl=en;Ts_-V1gAAAAJ", "orcid": ";;0000-0001-8544-5233;;0000-0001-8972-503x;0000-0002-0947-1875;0000-0002-2239-4077;", "linkedin": ";;;;;;zhihao-wang-1b31a5155/;", "or_profile": "~Ruohan_Li1;~Yiqun_Xie2;~Xiaowei_Jia1;~Dongdong_Wang3;~Yanhua_Li1;~Yingxue_Zhang3;~Zhihao_Wang9;~Zhili_Li1", "aff": "University of Maryland, College Park;University of Maryland, College Park;University of Pittsburgh;University of Maryland, College Park;Worcester Polytechnic Institute;State University of New York at Binghamton;University of Maryland, College Park;University of Maryland, College Park", "aff_domain": "umd.edu;umd.edu;pitt.edu;umd.edu;wpi.edu;binghamton.edu;umd.edu;umd.edu", "position": "PhD student;Assistant Professor;Assistant Professor;Associate Professor;Associate Professor;Assistant Professor;PhD student;PhD student", "bibtex": "@inproceedings{\nli2024solarcube,\ntitle={SolarCube: An Integrative Benchmark Dataset Harnessing Satellite and In-situ Observations for Large-scale Solar Energy Forecasting},\nauthor={Ruohan Li and Yiqun Xie and Xiaowei Jia and Dongdong Wang and Yanhua Li and Yingxue Zhang and Zhihao Wang and Zhili Li},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=WffhOhYvZ0}\n}", "github": "", "reviewers": "qt78;AUxG;UCVf;aSBK", "pdf_size": 1603002, "rating": "7;7;7;8", "confidence": "4;4;3;2", "wc_summary_and_contributions": "53;28;88;96", "wc_strengths": "100;32;56;72", "wc_improvement": "428;44;66;250", "wc_limitations": "7;24;108;35", "wc_correctness": "11;1;27;35", "wc_clarity": "10;6;19;33", "wc_relation_to_prior_work": "17;1;8;46", "wc_documentation": "5;1;22;70", "wc_additional_feedback": "1;1;1;1", "wc_review": "632;138;395;638", "wc_reply_reviewers": "0;0;0;51", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "wc_summary_and_contributions_avg": [ 66.25, 27.371289702898547 ], "wc_strengths_avg": [ 65.0, 24.71841418861655 ], "wc_improvement_avg": [ 197.0, 155.51527256189343 ], "wc_limitations_avg": [ 43.5, 38.55191305240247 ], "wc_correctness_avg": [ 18.5, 13.294735800308331 ], "wc_clarity_avg": [ 17.0, 10.36822067666386 ], "wc_relation_to_prior_work_avg": [ 18.0, 17.131841699011815 ], "wc_documentation_avg": [ 24.5, 27.427176303804956 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 450.75, 205.4475297977564 ], "wc_reply_reviewers_avg": [ 12.75, 22.083647796503186 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:hpsGkX_lSmsJ:scholar.google.com/&scioq=SolarCube:+An+Integrative+Benchmark+Dataset+Harnessing+Satellite+and+In-situ+Observations+for+Large-scale+Solar+Energy+Forecasting&hl=en&as_sdt=0,44", "gs_version_total": 2, "email": "umd.edu;umd.edu;pitt.edu;umd.edu;wpi.edu;binghamton.edu;umd.edu;umd.edu", "author_num": 8, "aff_unique_index": "0;0;1;0;2;3;0;0", "aff_unique_norm": "University of Maryland;University of Pittsburgh;Worcester Polytechnic Institute;State University of New York at Binghamton", "aff_unique_dep": ";;;", "aff_unique_url": "https://www/umd.edu;https://www.pitt.edu;https://www.wpi.edu;https://www.binghamton.edu", "aff_unique_abbr": "UMD;Pitt;WPI;SUNY Binghamton", "aff_campus_unique_index": "0;0;0;2;0;0", "aff_campus_unique": "College Park;;Binghamton", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Subwords as Skills: Tokenization for Sparse-Reward Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94819", "id": "WfpvtH7oC1", "proceeding": "", "pdf": "https://openreview.net/pdf?id=WfpvtH7oC1", "openreview": "https://openreview.net/forum?id=WfpvtH7oC1", "poster": "", "project": "", "author_site": "David Yunis, Justin Jung, Falcon Dai, Matthew Walter", "tldr": "", "abstract": "Exploration in sparse-reward reinforcement learning (RL) is difficult due to the need for long, coordinated sequences of actions in order to achieve any reward. Skill learning, from demonstrations or interaction, is a promising approach to address this, but skill extraction and inference are expensive for current methods. We present a novel method to extract skills from demonstrations for use in sparse-reward RL, inspired by the popular Byte-Pair Encoding (BPE) algorithm in natural language processing. With these skills, we show strong performance in a variety of tasks, 1000$\\times$ acceleration for skill-extraction and 100$\\times$ acceleration for policy inference. Given the simplicity of our method, skills extracted from 1\\% of the demonstrations in one task can be transferred to a new loosely related task. We also note that such a method yields a finite set of interpretable behaviors. Our code is available at https://github.com/dyunis/subwords_as_skills.", "keywords": "Reinforcement Learning;Deep Learning;Exploration;Hierarchical RL", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "David Yunis;Justin Jung;Falcon Z Dai;Matthew Walter", "authorids": "~David_Yunis1;~Justin_Jung1;~Falcon_Z_Dai1;~Matthew_Walter1", "gender": ";M;M;M", "homepage": ";https://deep-exploration.vercel.app/about;http://falcondai.com;http://ttic.edu/walter", "dblp": ";;228/7969;50/7734", "google_scholar": ";;https://scholar.google.com/citations?hl=en;RAiewnEAAAAJ", "orcid": ";;;0000-0003-1425-6050", "linkedin": ";;;", "or_profile": "~David_Yunis1;~Justin_Jung1;~Falcon_Z_Dai1;~Matthew_Walter1", "aff": ";Springtail.ai;;Toyota Technological Institute at Chicago", "aff_domain": ";springtail.ai;;ttic.edu", "position": ";Researcher;;Associate Professor", "bibtex": "@inproceedings{\nyunis2024subwords,\ntitle={Subwords as Skills: Tokenization for Sparse-Reward Reinforcement Learning},\nauthor={David Yunis and Justin Jung and Falcon Z Dai and Matthew Walter},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=WfpvtH7oC1}\n}", "github": "", "reviewers": "iKv2;bwJh;jRfS;g8NK", "pdf_size": 3287856, "rating": "4;6;7;7", "confidence": "4;3;5;2", "soundness": "2;3;4;3", "novelty": "3;3;3;3", "presentation": "2;2;3;2", "wc_summary": "64;109;37;59", "wc_strengths": "47;136;116;81", "wc_weaknesses": "215;173;173;166", "wc_questions": "4;87;177;32", "wc_limitations": "10;82;2;105", "wc_review": "340;587;505;443", "wc_reply_reviewers": "10;46;17;0", "wc_reply_authors": "193;101;30;31", "reply_reviewers": "1;1;1;0", "reply_authors": "3;3;2;2", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 67.25, 26.156978036462853 ], "wc_strengths_avg": [ 95.0, 33.99264626356707 ], "wc_weaknesses_avg": [ 181.75, 19.40843888621648 ], "wc_questions_avg": [ 75.0, 66.02650982749277 ], "wc_limitations_avg": [ 49.75, 44.589096200752934 ], "wc_review_avg": [ 468.75, 90.18973056839675 ], "wc_reply_reviewers_avg": [ 18.25, 17.122718826167766 ], "wc_reply_authors_avg": [ 88.75, 66.71722041572175 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.18257418583505536, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Vo-UeA34EgcJ:scholar.google.com/&scioq=Subwords+as+Skills:+Tokenization+for+Sparse-Reward+Reinforcement+Learning&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": ";springtail.ai;;ttic.edu", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Springtail.ai;Toyota Technological Institute at Chicago", "aff_unique_dep": ";", "aff_unique_url": "https://www.springtail.ai;https://www.tti-chicago.org", "aff_unique_abbr": "Springtail;TTI Chicago", "aff_campus_unique_index": "1", "aff_campus_unique": ";Chicago", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Federated Learning under Periodic Client Participation and Heterogeneous Data: A New Communication-Efficient Algorithm and Analysis", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94818", "id": "WftaVkL6G2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=WftaVkL6G2", "openreview": "https://openreview.net/forum?id=WftaVkL6G2", "poster": "/media/PosterPDFs/NeurIPS%202024/94818.png?t=1731735989.6201105", "project": "", "author_site": "Michael Crawshaw, Mingrui Liu", "tldr": "", "abstract": "In federated learning, it is common to assume that clients are always available to participate in training, which may not be feasible with user devices in practice. Recent works analyze federated learning under more realistic participation patterns, such as cyclic client availability or arbitrary participation. However, all such works either require strong assumptions (e.g., all clients participate almost surely within a bounded window), do not achieve linear speedup and reduced communication rounds, or are not applicable in the general non-convex setting. In this work, we focus on nonconvex optimization and consider participation patterns in which the chance of participation over a fixed window of rounds is equal among all clients, which includes cyclic client availability as a special case. Under this setting, we propose a new algorithm, named Amplified SCAFFOLD, and prove that it achieves linear speedup, reduced communication, and resilience to data heterogeneity simultaneously. In particular, for cyclic participation, our algorithm is proved to enjoy $\\mathcal{O}(\\epsilon^{-2})$ communication rounds to find an $\\epsilon$-stationary point in the non-convex stochastic setting. In contrast, the prior work under the same setting requires $\\mathcal{O}(\\kappa^2 \\epsilon^{-4})$ communication rounds, where $\\kappa$ denotes the data heterogeneity. Therefore, our algorithm significantly reduces communication rounds due to better dependency in terms of $\\epsilon$ and $\\kappa$. Our analysis relies on a fine-grained treatment of the nested dependence between client participation and errors in the control variates, which results in tighter guarantees than previous work. We also provide experimental results with (1) synthetic data and (2) real-world data with a large number of clients $(N = 250)$, demonstrating the effectiveness of our algorithm under periodic client participation.", "keywords": "federated learning;nonconvex optimization;cross device;heterogeneous data;periodic participation", "primary_area": "optimization", "supplementary_material": "/attachment/52b6434ab07dbb560100addb5c2ce29c19d34821.zip", "author": "Michael Crawshaw;Mingrui Liu", "authorids": "~Michael_Crawshaw1;~Mingrui_Liu2", "gender": "M;", "homepage": ";https://mingrliu.github.io", "dblp": "274/7164;", "google_scholar": "XVrMZ_4AAAAJ;KFoEnFQAAAAJ", "orcid": ";", "linkedin": ";mingrui-liu-447a2aab/", "or_profile": "~Michael_Crawshaw1;~Mingrui_Liu2", "aff": "George Mason University;George Mason University", "aff_domain": "gmu.edu;gmu.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\ncrawshaw2024federated,\ntitle={Federated Learning under Periodic Client Participation and Heterogeneous Data: A New Communication-Efficient Algorithm and Analysis},\nauthor={Michael Crawshaw and Mingrui Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=WftaVkL6G2}\n}", "github": "", "reviewers": "3Kpj;2Xef;bD8t;ewwQ", "pdf_size": 1208182, "rating": "5;6;6;6", "confidence": "3;3;3;3", "soundness": "3;3;3;2", "novelty": "2;3;3;2", "presentation": "2;3;3;2", "wc_summary": "121;44;80;75", "wc_strengths": "132;49;53;18", "wc_weaknesses": "881;90;168;4", "wc_questions": "5;42;43;255", "wc_limitations": "29;40;6;10", "wc_review": "1168;265;350;362", "wc_reply_reviewers": "389;285;144;191", "wc_reply_authors": "72;996;288;768", "reply_reviewers": "1;5;2;4", "reply_authors": "2;6;4;5", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 80.0, 27.39525506360545 ], "wc_strengths_avg": [ 63.0, 42.07730979993849 ], "wc_weaknesses_avg": [ 285.75, 348.52860356074075 ], "wc_questions_avg": [ 86.25, 98.62397021008636 ], "wc_limitations_avg": [ 21.25, 13.88119231190174 ], "wc_review_avg": [ 536.25, 366.65267965746546 ], "wc_reply_reviewers_avg": [ 252.25, 93.86526247765997 ], "wc_reply_authors_avg": [ 531.0, 368.1453517294494 ], "reply_reviewers_avg": [ 3.0, 1.5811388300841898 ], "reply_authors_avg": [ 4.25, 1.479019945774904 ], "replies_avg": [ 35, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17596207481248523934&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "gmu.edu;gmu.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "George Mason University", "aff_unique_dep": "", "aff_unique_url": "https://www.gmu.edu", "aff_unique_abbr": "GMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Accelerating Augmentation Invariance Pretraining", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94817", "id": "Wh9ssqlCNg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Wh9ssqlCNg", "openreview": "https://openreview.net/forum?id=Wh9ssqlCNg", "poster": "/media/PosterPDFs/NeurIPS%202024/94817.png?t=1733546163.9339333", "project": "", "author_site": "Jinhong Lin, Cheng-En Wu, Yibing Wei, Pedro Morgado", "tldr": "", "abstract": "Our work tackles the computational challenges of contrastive learning methods, particularly for the pretraining of Vision Transformers (ViTs). Despite the effectiveness of contrastive learning, the substantial computational resources required for training often hinder their practical application. To mitigate this issue, we propose an acceleration framework, leveraging ViT's unique ability to generalize across inputs of varying sequence lengths. Our method employs a mix of sequence compression strategies, including randomized token dropout and flexible patch scaling, to reduce the cost of gradient estimation and accelerate convergence. We further provide an in-depth analysis of the gradient estimation error of various acceleration strategies as well as their impact on downstream tasks, offering valuable insights into the trade-offs between acceleration and performance. \n We also propose a novel procedure to identify an optimal acceleration schedule to adjust the sequence compression ratios to the training progress, ensuring efficient training without sacrificing downstream performance. Our approach significantly reduces computational overhead across various self-supervised learning algorithms on large-scale datasets. In ImageNet, our method achieves speedups of 4$\\times$ in MoCo, 3.3$\\times$ in SimCLR, and 2.5$\\times$ in DINO, demonstrating substantial efficiency gains.", "keywords": "Self-supervised learning;Vision Transformer;Accelerating training", "primary_area": "machine_vision", "supplementary_material": "", "author": "Jinhong Lin;Cheng-En Wu;Yibing Wei;Pedro Morgado", "authorids": "~Jinhong_Lin1;~Cheng-En_Wu1;~Yibing_Wei1;~Pedro_Morgado1", "gender": "Non-Binary;M;F;M", "homepage": "https://jonneslin.github.io/;;https://yibingwei-1.github.io;https://pedro-morgado.github.io/", "dblp": "309/6999;10/8431;258/4964;199/1743-1", "google_scholar": "https://scholar.google.com/citations?hl=en-CN;4SQ9RDUAAAAJ;OyQP1d4AAAAJ;Yy4gO-QAAAAJ", "orcid": ";;;0000-0002-0955-6510", "linkedin": ";;yibing-evelyn-wei/;", "or_profile": "~Jinhong_Lin1;~Cheng-En_Wu1;~Yibing_Wei1;~Pedro_Morgado1", "aff": "University of Wisconsin - Madison;Microsoft;University of Wisconsin - Madison;University of Wisconsin - Madison", "aff_domain": "wisc.edu;microsoft.com;wisc.edu;wisc.edu", "position": "MS student;Intern;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nlin2024accelerating,\ntitle={Accelerating Augmentation Invariance Pretraining},\nauthor={Jinhong Lin and Cheng-En Wu and Yibing Wei and Pedro Morgado},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Wh9ssqlCNg}\n}", "github": "", "reviewers": "95eS;GyCT;Q1ps;geFN", "pdf_size": 1257386, "rating": "3;4;5;6", "confidence": "2;3;4;4", "soundness": "2;2;3;3", "novelty": "2;2;2;2", "presentation": "2;3;3;4", "wc_summary": "33;74;65;73", "wc_strengths": "1;80;31;243", "wc_weaknesses": "51;414;148;98", "wc_questions": "34;83;5;75", "wc_limitations": "1;5;5;35", "wc_review": "120;656;254;524", "wc_reply_reviewers": "0;107;0;118", "wc_reply_authors": "105;334;423;77", "reply_reviewers": "0;1;0;1", "reply_authors": "3;3;3;3", "rating_avg": [ 4.5, 1.118033988749895 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 61.25, 16.67895380412093 ], "wc_strengths_avg": [ 88.75, 93.41406478684031 ], "wc_weaknesses_avg": [ 177.75, 140.6456096008688 ], "wc_questions_avg": [ 49.25, 31.594105462886585 ], "wc_limitations_avg": [ 11.5, 13.665650368716449 ], "wc_review_avg": [ 388.5, 212.1903626463747 ], "wc_reply_reviewers_avg": [ 56.25, 56.38428415791053 ], "wc_reply_authors_avg": [ 234.75, 147.48622817063293 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 3.0, 0.0 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.9438798074485388, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4057958929357742194&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "wisc.edu;microsoft.com;wisc.edu;wisc.edu", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Wisconsin-Madison;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.wisc.edu;https://www.microsoft.com", "aff_unique_abbr": "UW-Madison;Microsoft", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Madison;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "CHASE: Learning Convex Hull Adaptive Shift for Skeleton-based Multi-Entity Action Recognition", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94816", "id": "WhE4C4fLbE", "proceeding": "", "pdf": "https://openreview.net/pdf?id=WhE4C4fLbE", "openreview": "https://openreview.net/forum?id=WhE4C4fLbE", "poster": "/media/PosterPDFs/NeurIPS%202024/94816.png?t=1729428710.9712124", "project": "", "author_site": "Yuhang Wen, Mengyuan Liu, Songtao Wu, Beichen Ding", "tldr": "", "abstract": "Skeleton-based multi-entity action recognition is a challenging task aiming to identify interactive actions or group activities involving multiple diverse entities. Existing models for individuals often fall short in this task due to the inherent distribution discrepancies among entity skeletons, leading to suboptimal backbone optimization. To this end, we introduce a Convex Hull Adaptive Shift based multi-Entity action recognition method (CHASE), which mitigates inter-entity distribution gaps and unbiases subsequent backbones. Specifically, CHASE comprises a learnable parameterized network and an auxiliary objective. The parameterized network achieves plausible, sample-adaptive repositioning of skeleton sequences through two key components. First, the Implicit Convex Hull Constrained Adaptive Shift ensures that the new origin of the coordinate system is within the skeleton convex hull. Second, the Coefficient Learning Block provides a lightweight parameterization of the mapping from skeleton sequences to their specific coefficients in convex combinations. Moreover, to guide the optimization of this network for discrepancy minimization, we propose the Mini-batch Pair-wise Maximum Mean Discrepancy as the additional objective. CHASE operates as a sample-adaptive normalization method to mitigate inter-entity distribution discrepancies, thereby reducing data bias and improving the subsequent classifier's multi-entity action recognition performance. Extensive experiments on six datasets, including NTU Mutual 11/26, H2O, Assembly101, Collective Activity and Volleyball, consistently verify our approach by seamlessly adapting to single-entity backbones and boosting their performance in multi-entity scenarios. Our code is publicly available at https://github.com/Necolizer/CHASE .", "keywords": "Action Recognition;Skeletons;Multi-Entity Actions;Convex Hull", "primary_area": "machine_vision", "supplementary_material": "/attachment/518f9b86c3e5862e1dd34beec2249f93d60e6e69.zip", "author": "Yuhang Wen;Mengyuan Liu;Songtao Wu;Beichen Ding", "authorids": "~Yuhang_Wen1;~Mengyuan_Liu2;~Songtao_Wu1;~Beichen_Ding1", "gender": "M;;M;M", "homepage": "https://necolizer.github.io/;https://www.semanticscholar.org/author/Mengyuan-Liu/47842072;https://www.linkedin.com/in/wu-songtao-5a756b66/;", "dblp": "304/1491-1;;36/1593.html;", "google_scholar": "fxBaCW8AAAAJ;woX_4AcAAAAJ;JeMvxPwAAAAJ;", "orcid": "0000-0001-6644-4075;0000-0002-6332-8316;;0000-0002-5771-1204", "linkedin": ";;;", "or_profile": "~Yuhang_Wen1;~Mengyuan_Liu2;~Songtao_Wu1;~Beichen_Ding1", "aff": "SUN YAT-SEN UNIVERSITY;Peking University;Sony (China) Limited;Sun Yat-Sen University", "aff_domain": "sysu.edu.cn;pku.edu.cn;sony.com;mail.sysu.edu.cn", "position": "MS student;Assistant Professor;Researcher;Associate Professor", "bibtex": "@inproceedings{\nwen2024chase,\ntitle={{CHASE}: Learning Convex Hull Adaptive Shift for Skeleton-based Multi-Entity Action Recognition},\nauthor={Yuhang Wen and Mengyuan Liu and Songtao Wu and Beichen Ding},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=WhE4C4fLbE}\n}", "github": "", "reviewers": "hFtj;WotV;16iH;N2tt", "pdf_size": 4728587, "rating": "4;6;6;6", "confidence": "4;4;4;3", "soundness": "2;3;2;3", "novelty": "2;3;2;3", "presentation": "2;3;3;2", "wc_summary": "42;50;72;90", "wc_strengths": "32;46;22;95", "wc_weaknesses": "164;214;28;137", "wc_questions": "31;160;263;35", "wc_limitations": "164;3;53;1", "wc_review": "433;473;438;358", "wc_reply_reviewers": "33;26;0;26", "wc_reply_authors": "274;100;57;98", "reply_reviewers": "1;1;0;1", "reply_authors": "3;3;2;3", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 63.5, 18.83480820183736 ], "wc_strengths_avg": [ 48.75, 28.030117730755254 ], "wc_weaknesses_avg": [ 135.75, 68.0675216237524 ], "wc_questions_avg": [ 122.25, 96.40377326640281 ], "wc_limitations_avg": [ 55.25, 66.15275882380114 ], "wc_review_avg": [ 425.5, 41.907636535600524 ], "wc_reply_reviewers_avg": [ 21.25, 12.597122687344122 ], "wc_reply_authors_avg": [ 132.25, 83.61930100162283 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:kKRCjAHi7hEJ:scholar.google.com/&scioq=CHASE:+Learning+Convex+Hull+Adaptive+Shift+for+Skeleton-based+Multi-Entity+Action+Recognition&hl=en&as_sdt=0,21", "gs_version_total": 4, "email": "sysu.edu.cn;pku.edu.cn;sony.com;mail.sysu.edu.cn", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Sun Yat-sen University;Peking University;Sony", "aff_unique_dep": ";;", "aff_unique_url": "http://www.sysu.edu.cn;http://www.pku.edu.cn;https://www.sony.com.cn", "aff_unique_abbr": "SYSU;Peking U;Sony China", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Personalized Federated Learning via Feature Distribution Adaptation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94815", "id": "Wl2optQcng", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Wl2optQcng", "openreview": "https://openreview.net/forum?id=Wl2optQcng", "poster": "/media/PosterPDFs/NeurIPS%202024/94815.png?t=1733251841.48924", "project": "", "author_site": "Connor Mclaughlin, Lili Su", "tldr": "", "abstract": "Federated learning (FL) is a distributed learning framework that leverages commonalities between distributed client datasets to train a global model. Under heterogeneous clients, however, FL can fail to produce stable training results. Personalized federated learning (PFL) seeks to address this by learning individual models tailored to each client. One approach is to decompose model training into shared representation learning and personalized classifier training. Nonetheless, previous works struggle to navigate the bias-variance trade-off in classifier learning, relying solely on limited local datasets or introducing costly techniques to improve generalization.\nIn this work, we frame representation learning as a generative modeling task, where representations are trained with a classifier based on the global feature distribution. We then propose an algorithm, pFedFDA, that efficiently generates personalized models by adapting global generative classifiers to their local feature distributions. Through extensive computer vision benchmarks, we demonstrate that our method can adjust to complex distribution shifts with significant improvements over current state-of-the-art in data-scarce settings.", "keywords": "Federated Learning;Data Heterogeneity;Personalization", "primary_area": "optimization_for_deep_networks", "supplementary_material": "/attachment/ba58b7beab1f3a54840b287c7d2e24fcb19cb761.zip", "author": "Connor Mclaughlin;Lili Su", "authorids": "~Connor_Mclaughlin1;~Lili_Su1", "gender": "M;F", "homepage": ";https://sites.google.com/site/lilisuece/", "dblp": "359/8920;", "google_scholar": "9IWNAUoAAAAJ;wSHziZ4AAAAJ", "orcid": "0009-0006-8146-7316;", "linkedin": "connormclaughlin628/;", "or_profile": "~Connor_Mclaughlin1;~Lili_Su1", "aff": "Northeastern University;Northeastern University", "aff_domain": "neu.edu;northeastern.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nmclaughlin2024personalized,\ntitle={Personalized Federated Learning via Feature Distribution Adaptation},\nauthor={Connor Mclaughlin and Lili Su},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Wl2optQcng}\n}", "github": "", "reviewers": "WK31;BoVr;sWFJ;Qz6b;3xQF", "pdf_size": 2873688, "rating": "5;5;5;6;6", "confidence": "3;4;3;4;4", "soundness": "3;2;3;3;3", "novelty": "3;2;3;2;2", "presentation": "3;3;3;3;3", "wc_summary": "79;67;86;77;33", "wc_strengths": "51;36;83;33;32", "wc_weaknesses": "115;130;132;210;48", "wc_questions": "20;103;17;77;181", "wc_limitations": "1;65;6;57;6", "wc_review": "266;401;324;454;300", "wc_reply_reviewers": "34;45;0;11;35", "wc_reply_authors": "55;403;0;0;0", "reply_reviewers": "1;1;0;1;1", "reply_authors": "2;2;1;1;1", "rating_avg": [ 5.4, 0.48989794855663565 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 68.4, 18.714700104463336 ], "wc_strengths_avg": [ 47.0, 19.256167843057455 ], "wc_weaknesses_avg": [ 127.0, 51.59069683576681 ], "wc_questions_avg": [ 79.6, 60.509833911522186 ], "wc_limitations_avg": [ 27.0, 27.93564031841762 ], "wc_review_avg": [ 349.0, 68.76627080189823 ], "wc_reply_reviewers_avg": [ 25.0, 16.745148551147583 ], "wc_reply_authors_avg": [ 91.6, 157.15037384619865 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.6666666666666665, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:U-niZahceEoJ:scholar.google.com/&scioq=Personalized+Federated+Learning+via+Feature+Distribution+Adaptation&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "neu.edu;northeastern.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Northeastern University", "aff_unique_dep": "", "aff_unique_url": "https://www.northeastern.edu", "aff_unique_abbr": "NEU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "Wmodg5UiEd", "title": "Nearly Optimal Algorithms for Contextual Dueling Bandits from Adversarial Feedback", "track": "main", "status": "Reject", "tldr": "", "abstract": "Learning from human feedback plays an important role in aligning generative models, such as large language models (LLM). However, the effectiveness of this approach can be influenced by adversaries, who may intentionally provide misleading preferences to manipulate the output in an undesirable or harmful direction.\nTo tackle this challenge, we study a specific model within this problem domain--contextual dueling bandits with adversarial feedback, where the true preference label can be flipped by an adversary. We propose an algorithm namely robust contextual dueling bandit (\\algo), which is based on uncertainty-weighted maximum likelihood estimation. Our algorithm achieves an $\\tilde O(d\\sqrt{T}+dC)$ regret bound, where $T$ is the number of rounds, $d$ is the dimension of the context, and $ 0 \\le C \\le T$ is the total number of adversarial feedback. We also prove a lower bound to show that our regret bound is nearly optimal, both in scenarios with and without ($C=0$) adversarial feedback.\nAdditionally, we conduct experiments to evaluate our proposed algorithm against various types of adversarial feedback. Experimental results demonstrate its superiority over the state-of-the-art dueling bandit algorithms in the presence of adversarial feedback.", "keywords": "Dueling bandits;bandits;preference feedback;adversarially robust;weighted MLE", "primary_area": "bandits", "supplementary_material": "", "author": "Qiwei Di;Jiafan He;Quanquan Gu", "authorids": "~Qiwei_Di1;~Jiafan_He1;~Quanquan_Gu1", "gender": "M;M;M", "homepage": "https://qiwei-di1234.github.io/;https://sites.google.com/g.ucla.edu/jiafan-he-homepage;http://web.cs.ucla.edu/~qgu/", "dblp": "354/3878;214/5785;50/4597", "google_scholar": "SewL0pkAAAAJ;F3AXNBwAAAAJ;GU9HgNAAAAAJ", "orcid": ";;", "linkedin": "qiwei-di-00776a253/;;", "or_profile": "~Qiwei_Di1;~Jiafan_He1;~Quanquan_Gu1", "aff": "University of California, Los Angeles;University of California, Los Angeles;University of California, Los Angeles", "aff_domain": "ucla.edu;ucla.edu;cs.ucla.edu", "position": "PhD student;PhD student;Associate Professor", "bibtex": "@misc{\nanonymous2024nearly,\ntitle={Nearly Optimal Algorithms for Contextual Dueling Bandits from Adversarial Feedback},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=Wmodg5UiEd}\n}", "github": "", "project": "", "reviewers": "D5SV;q7mM;6EN8;8DeH", "site": "https://openreview.net/forum?id=Wmodg5UiEd", "pdf_size": 459649, "rating": "5;6;6;6", "confidence": "4;3;3;3", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;2;3;3", "wc_summary": "211;54;23;73", "wc_strengths": "41;74;31;49", "wc_weaknesses": "94;114;6;88", "wc_questions": "45;140;140;4", "wc_limitations": "9;1;43;7", "wc_review": "400;383;243;221", "wc_reply_reviewers": "26;10;55;10", "wc_reply_authors": "61;5;18;6", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 90.25, 71.96309818233232 ], "wc_strengths_avg": [ 48.75, 15.911866640969563 ], "wc_weaknesses_avg": [ 75.5, 41.26439142893059 ], "wc_questions_avg": [ 82.25, 59.541477139889636 ], "wc_limitations_avg": [ 15.0, 16.431676725154983 ], "wc_review_avg": [ 311.75, 80.35351579115876 ], "wc_reply_reviewers_avg": [ 25.25, 18.376275465937052 ], "wc_reply_authors_avg": [ 22.5, 22.808989455914087 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18059219860355044007&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of California, Los Angeles", "aff_unique_dep": "", "aff_unique_url": "https://www.ucla.edu", "aff_unique_abbr": "UCLA", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Gliding over the Pareto Front with Uniform Designs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94814", "id": "WoEXVQcHFw", "proceeding": "", "pdf": "https://openreview.net/pdf?id=WoEXVQcHFw", "openreview": "https://openreview.net/forum?id=WoEXVQcHFw", "poster": "", "project": "", "author_site": "Xiaoyuan Zhang, Genghui Li, Xi Lin, Yichi Zhang, Yifan Chen, Qingfu Zhang", "tldr": "", "abstract": "Multiobjective optimization (MOO) plays a critical role in various real-world domains. A major challenge therein is generating $K$ uniform Pareto-optimal solutions to represent the entire Pareto front. To address this issue, this paper firstly introduces \\emph{fill distance} to evaluate the $K$ design points, which provides a quantitative metric for the representativeness of the design. However, directly specifying the optimal design that minimizes the fill distance is nearly intractable due to the nested $\\min-\\max-\\min$ optimization problem. To address this, we propose a surrogate ``max-packing'' design for the fill distance design, which is easier to optimize and leads to a rate-optimal design with a fill distance at most $4\\times$ the minimum value.\n Extensive experiments on synthetic and real-world benchmarks demonstrate that our proposed paradigm efficiently produces high-quality, representative solutions and outperforms baseline methods.", "keywords": "uniform design; multiobjective optimization; algorithmic fairness", "primary_area": "other", "supplementary_material": "", "author": "Xiaoyuan Zhang;Genghui Li;Xi Lin;Yichi Zhang;Yifan Chen;Qingfu Zhang", "authorids": "~Xiaoyuan_Zhang2;~Genghui_Li1;~Xi_Lin2;~Yichi_Zhang14;~Yifan_Chen3;~Qingfu_Zhang1", "gender": "M;M;M;;;M", "homepage": ";;https://xi-l.github.io/;https://yichistat.github.io/website/;;https://www.cs.cityu.edu.hk/~qzhan7/index.html", "dblp": ";171/2964.html;43/489-1;;;98/1240.html", "google_scholar": "KQj18L8AAAAJ;3WixDRMAAAAJ;QB_MUboAAAAJ;;;https://scholar.google.co.uk/citations?user=nhL9PHwAAAAJ", "orcid": "0000-0002-3852-645X;;;;;", "linkedin": ";;;;;", "or_profile": "~Xiaoyuan_Zhang2;~Genghui_Li1;~Xi_Lin2;~Yichi_Zhang14;~Yifan_Chen3;~Qingfu_Zhang1", "aff": "City University of Hong Kong;Shenzhen University;City University of Hong Kong;Duke University;;City University of Hong Kong", "aff_domain": "cityu.edu.hk;szu.edu.cn;cityu.edu.hk;duke.edu;;cityu.edu.hk", "position": "PhD student;Assistant Professor;Postdoc;Postdoc;;Full Professor", "bibtex": "@inproceedings{\nzhang2024gliding,\ntitle={Gliding over the Pareto Front with Uniform Designs},\nauthor={Xiaoyuan Zhang and Genghui Li and Xi Lin and Yichi Zhang and Yifan Chen and Qingfu Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=WoEXVQcHFw}\n}", "github": "", "reviewers": "AxUm;N5xS;JBGb;5qjZ", "pdf_size": 4475846, "rating": "4;6;7;7", "confidence": "3;5;3;3", "soundness": "2;2;3;4", "novelty": "2;2;3;4", "presentation": "1;3;3;3", "wc_summary": "67;103;41;61", "wc_strengths": "30;135;73;63", "wc_weaknesses": "84;623;56;39", "wc_questions": "51;320;100;150", "wc_limitations": "16;82;42;36", "wc_review": "248;1263;312;349", "wc_reply_reviewers": "0;1275;20;75", "wc_reply_authors": "525;4474;17;82", "reply_reviewers": "0;9;1;1", "reply_authors": "4;15;2;2", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 68.0, 22.38302928559939 ], "wc_strengths_avg": [ 75.25, 37.989307706248084 ], "wc_weaknesses_avg": [ 200.5, 244.4590967830815 ], "wc_questions_avg": [ 155.25, 101.35426730039539 ], "wc_limitations_avg": [ 44.0, 23.958297101421877 ], "wc_review_avg": [ 543.0, 417.2595115752306 ], "wc_reply_reviewers_avg": [ 342.5, 539.0790758321083 ], "wc_reply_authors_avg": [ 1274.5, 1857.5462982117026 ], "reply_reviewers_avg": [ 2.75, 3.6314597615834874 ], "reply_authors_avg": [ 5.75, 5.402545696243577 ], "replies_avg": [ 40, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6916185234174741634&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 0, "email": "cityu.edu.hk;szu.edu.cn;cityu.edu.hk;duke.edu;;cityu.edu.hk", "author_num": 6, "aff_unique_index": "0;1;0;2;0", "aff_unique_norm": "City University of Hong Kong;Shenzhen University;Duke University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cityu.edu.hk;https://www.szu.edu.cn;https://www.duke.edu", "aff_unique_abbr": "CityU;SZU;Duke", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "China;United States" }, { "title": "Conjugate Bayesian Two-step Change Point Detection for Hawkes Process", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94813", "id": "WoKtFJf9VG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=WoKtFJf9VG", "openreview": "https://openreview.net/forum?id=WoKtFJf9VG", "poster": "/media/PosterPDFs/NeurIPS%202024/94813.png?t=1731552382.590719", "project": "", "author_site": "Zeyue Zhang, Xiaoling LU, Feng Zhou", "tldr": "", "abstract": "The Bayesian two-step change point detection method is popular for the Hawkes process due to its simplicity and intuitiveness. However, the non-conjugacy between the point process likelihood and the prior requires most existing Bayesian two-step change point detection methods to rely on non-conjugate inference methods. These methods lack analytical expressions, leading to low computational efficiency and impeding timely change point detection. To address this issue, this work employs data augmentation to propose a conjugate Bayesian two-step change point detection method for the Hawkes process, which proves to be more accurate and efficient. Extensive experiments on both synthetic and real data demonstrate the superior effectiveness and efficiency of our method compared to baseline methods. Additionally, we conduct ablation studies to explore the robustness of our method concerning various hyperparameters.", "keywords": "Change Point Detection;Conjugate inference method;Bayesian method;Hawkes Process", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Zeyue Zhang;Xiaoling LU;Feng Zhou", "authorids": "~Zeyue_Zhang1;~Xiaoling_LU1;~Feng_Zhou9", "gender": "F;F;", "homepage": "https://www.linkedin.com/in/zeyuezhang;http://stat.ruc.edu.cn/jxtd/jsdw/sjkxydsjtjx/5b900093966a43e0ab69dfbaf82e095a.htm;", "dblp": "354/0572;47/6186;", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;", "orcid": "0009-0000-8070-8102;0000-0002-1854-2532;", "linkedin": "zeyuezhang;;", "or_profile": "~Zeyue_Zhang1;~Xiaoling_LU1;~Feng_Zhou9", "aff": "Renmin University of China;Renmin University of China;", "aff_domain": "ruc.edu.cn;ruc.edu.cn;", "position": "MS student;Full Professor;", "bibtex": "@inproceedings{\nzhang2024conjugate,\ntitle={Conjugate Bayesian Two-step Change Point Detection for Hawkes Process},\nauthor={Zeyue Zhang and Xiaoling LU and Feng Zhou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=WoKtFJf9VG}\n}", "github": "", "reviewers": "xKxH;Wgzk;E9Cj;5yaf", "pdf_size": 675441, "rating": "5;6;6;8", "confidence": "3;3;4;3", "soundness": "3;3;3;4", "novelty": "3;3;3;4", "presentation": "2;3;3;4", "wc_summary": "51;43;60;66", "wc_strengths": "59;79;57;76", "wc_weaknesses": "61;96;84;19", "wc_questions": "19;159;75;48", "wc_limitations": "6;27;4;6", "wc_review": "196;404;280;215", "wc_reply_reviewers": "43;8;24;10", "wc_reply_authors": "32;16;16;16", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 55.0, 8.74642784226795 ], "wc_strengths_avg": [ 67.75, 9.832980219648569 ], "wc_weaknesses_avg": [ 65.0, 29.385370509830228 ], "wc_questions_avg": [ 75.25, 52.25119615855698 ], "wc_limitations_avg": [ 10.75, 9.41740410091868 ], "wc_review_avg": [ 273.75, 81.39525477569316 ], "wc_reply_reviewers_avg": [ 21.25, 13.988834833537782 ], "wc_reply_authors_avg": [ 20.0, 6.928203230275509 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.13245323570650439, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:SIzIPmhVapEJ:scholar.google.com/&scioq=Conjugate+Bayesian+Two-step+Change+Point+Detection+for+Hawkes+Process&hl=en&as_sdt=0,33", "gs_version_total": 6, "email": "ruc.edu.cn;ruc.edu.cn;", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Renmin University of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ruc.edu.cn", "aff_unique_abbr": "RUC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Interpret Your Decision: Logical Reasoning Regularization for Generalization in Visual Classification", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94812", "id": "Woiqqi5bYV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Woiqqi5bYV", "openreview": "https://openreview.net/forum?id=Woiqqi5bYV", "poster": "/media/PosterPDFs/NeurIPS%202024/94812.png?t=1730256879.2748454", "project": "", "author_site": "Zhaorui Tan, Xi Yang, Qiufeng Wang, Anh Nguyen, Kaizhu Huang", "tldr": "", "abstract": "Vision models excel in image classification but struggle to generalize to unseen data, such as classifying images from unseen domains or discovering novel categories. In this paper, we explore the relationship between logical reasoning and deep learning generalization in visual classification. A logical regularization termed L-Reg is derived which bridges a logical analysis framework to image classification. Our work reveals that L-Reg reduces the complexity of the model in terms of the feature distribution and classifier weights. Specifically, we unveil the interpretability brought by L-Reg, as it enables the model to extract the salient features, such as faces to persons, for classification. Theoretical analysis and experiments demonstrate that L-Reg enhances generalization across various scenarios, including multi-domain generalization and generalized category discovery. In complex real-world scenarios where images span unknown classes and unseen domains, L-Reg consistently improves generalization, highlighting its practical efficacy.", "keywords": "Domain generalization;generalized category discovery;image classification;logical reasoning", "primary_area": "machine_vision", "supplementary_material": "/attachment/36c3810c72ec56d44085efee77914fa14bba1f60.zip", "author": "Zhaorui Tan;Xi Yang;Qiufeng Wang;Anh Nguyen;Kaizhu Huang", "authorids": "~Zhaorui_Tan1;~Xi_Yang7;~Qiufeng_Wang2;~Anh_Nguyen2;~Kaizhu_Huang1", "gender": "F;F;M;M;M", "homepage": "https://github.com/zhaorui-tan;https://scholar.xjtlu.edu.cn/en/persons/XiYang01;https://scholar.xjtlu.edu.cn/en/persons/QiufengWang;https://www.csc.liv.ac.uk/~anguyen/;https://sites.google.com/view/kaizhu-huang-homepage", "dblp": "332/0953.html;13/1520-8;86/7443-1;52/5285-3.html;99/3390", "google_scholar": "BKUdVaYAAAAJ;https://scholar.google.com/citations?hl=zh-CN;qj66yXAAAAAJ;https://scholar.google.co.uk/citations?user=gEbaF0sAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0001-5054-8275;0000-0002-8600-2570;0000-0002-0918-4606;0000-0002-1449-211X;", "linkedin": ";;;;", "or_profile": "~Zhaorui_Tan1;~Xi_Yang7;~Qiufeng_Wang2;~Anh_Nguyen2;~Kaizhu_Huang1", "aff": "University of Liverpool;Xi'an Jiaotong-Liverpool University;Xi'an Jiaotong-Liverpool University;University of Liverpool;Xi'an Jiaotong-Liverpool University", "aff_domain": "liverpool.ac.uk;xjtlu.edu.cn;xjtlu.edu.cn;liverpool.ac.uk;xjtlu.edu.cn", "position": "PhD student;Assistant Professor;Associate Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\ntan2024interpret,\ntitle={Interpret Your Decision: Logical Reasoning Regularization for Generalization in Visual Classification},\nauthor={Zhaorui Tan and Xi Yang and Qiufeng Wang and Anh Nguyen and Kaizhu Huang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Woiqqi5bYV}\n}", "github": "", "reviewers": "2Pp3;BLvb;t6Vg;8YRB;y1GK", "pdf_size": 0, "rating": "6;6;6;6;6", "confidence": "4;2;4;3;3", "soundness": "3;3;3;3;2", "novelty": "3;3;3;3;2", "presentation": "3;2;3;3;3", "wc_summary": "94;81;76;52;64", "wc_strengths": "131;55;96;48;54", "wc_weaknesses": "128;82;121;85;104", "wc_questions": "25;80;53;2;182", "wc_limitations": "6;1;5;18;7", "wc_review": "384;299;351;205;411", "wc_reply_reviewers": "15;25;124;19;20", "wc_reply_authors": "18;22;53;14;17", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 73.4, 14.38888459888396 ], "wc_strengths_avg": [ 76.8, 32.0337322208949 ], "wc_weaknesses_avg": [ 104.0, 18.49324200890693 ], "wc_questions_avg": [ 68.4, 62.560690533273366 ], "wc_limitations_avg": [ 7.4, 5.678027826631356 ], "wc_review_avg": [ 330.0, 72.80659310804208 ], "wc_reply_reviewers_avg": [ 40.6, 41.82152555801857 ], "wc_reply_authors_avg": [ 24.8, 14.330387294138285 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13049418533640410080&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "liverpool.ac.uk;xjtlu.edu.cn;xjtlu.edu.cn;liverpool.ac.uk;xjtlu.edu.cn", "author_num": 5, "aff_unique_index": "0;1;1;0;1", "aff_unique_norm": "University of Liverpool;Xi'an Jiao Tong-Liverpool University", "aff_unique_dep": ";", "aff_unique_url": "https://www.liverpool.ac.uk;https://www.xjtu.edu.cn/en", "aff_unique_abbr": "Liv Uni;XJTLU", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Xi'an", "aff_country_unique_index": "0;1;1;0;1", "aff_country_unique": "United Kingdom;China" }, { "id": "WpEaUIBIWH", "title": "Towards a Unified Framework of Clustering-based Anomaly Detection", "track": "main", "status": "Reject", "tldr": "", "abstract": "Unsupervised Anomaly Detection (UAD) plays a crucial role in identifying abnormal patterns within data without labeled examples, holding significant practical implications across various domains. Although the individual contributions of representation learning and clustering to anomaly detection are well-established, their interdependencies remain under-explored due to the absence of a unified theoretical framework. Consequently, their collective potential to enhance anomaly detection performance remains largely untapped. To bridge this gap, in this paper, we propose a novel probabilistic mixture model for anomaly detection to establish a theoretical connection among representation learning, clustering, and anomaly detection. By maximizing a novel anomaly-aware data likelihood, representation learning and clustering can effectively reduce the adverse impact of anomalous data and collaboratively benefit anomaly detection. Meanwhile, a theoretically substantiated anomaly score is naturally derived from this framework. Lastly, drawing inspiration from gravitational analysis in physics, we have devised an improved anomaly score that more effectively harnesses the combined power of representation learning and clustering. Extensive experiments, involving 17 baseline methods across 30 diverse datasets, validate the effectiveness and generalization capability of the proposed method, surpassing state-of-the-art methods.", "keywords": "Anomaly Detection;Clustering", "primary_area": "probabilistic_methods", "supplementary_material": "/attachment/ecb5a77e6bc31d3410fd87ec7b81b4eaa5d927b1.zip", "author": "Zeyu Fang;Ming Gu;Sheng Zhou;Jiawei Chen;Qiaoyu Tan;Haishuai Wang;Jiajun Bu", "authorids": "~Zeyu_Fang1;~Ming_Gu6;~Sheng_Zhou1;~Jiawei_Chen6;~Qiaoyu_Tan2;~Haishuai_Wang2;~Jiajun_Bu1", "gender": "M;M;M;M;M;M;M", "homepage": "https://github.com/BabelTower;;https://zhoushengisnoob.github.io/;https://jiawei-chen.github.io/;https://qiaoyu-tan.github.io/;https://www.linkedin.com/in/haishuai-wang-b5241775/;https://person.zju.edu.cn/bjj", "dblp": ";76/2502-14;34/4858-4.html;03/1390-7;197/5465.html;163/0767;50/3147", "google_scholar": ";dw1BYBAAAAAJ;https://scholar.google.co.jp/citations?user=Ss76nMwAAAAJ;;V9bOnV4AAAAJ;;OgZP2okAAAAJ", "orcid": ";;0000-0003-3645-1041;0000-0002-4752-2629;0000-0001-8999-968X;0000-0003-1617-0920;0000-0002-1097-2044", "linkedin": ";;;;;;", "or_profile": "~Zeyu_Fang1;~Ming_Gu6;~Sheng_Zhou1;~Jiawei_Chen6;~Qiaoyu_Tan2;~Haishuai_Wang2;~Jiajun_Bu1", "aff": "Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University;New York University Shanghai;Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;nyu.edu;zju.edu.cn;zju.edu.cn", "position": "MS student;PhD student;Associate Professor;Researcher;Assistant Professor;Research Professor;Full Professor", "bibtex": "@misc{\nanonymous2024towards,\ntitle={Towards a Unified Framework of Clustering-based Anomaly Detection},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=WpEaUIBIWH}\n}", "github": "", "project": "", "reviewers": "sbbw;R7pC;6DdC;BFMu", "site": "https://openreview.net/forum?id=WpEaUIBIWH", "pdf_size": 1265212, "rating": "5;6;6;7", "confidence": "4;4;3;3", "soundness": "2;3;3;4", "novelty": "2;3;3;3", "presentation": "3;3;3;4", "wc_summary": "86;94;258;79", "wc_strengths": "14;56;53;113", "wc_weaknesses": "60;148;60;88", "wc_questions": "147;2;163;5", "wc_limitations": "8;19;11;6", "wc_review": "315;319;545;291", "wc_reply_reviewers": "35;29;312;25", "wc_reply_authors": "0;38;670;0", "reply_reviewers": "1;1;2;1", "reply_authors": "1;2;3;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 129.25, 74.52306689877973 ], "wc_strengths_avg": [ 59.0, 35.30580688781946 ], "wc_weaknesses_avg": [ 89.0, 35.93048844644336 ], "wc_questions_avg": [ 79.25, 75.96833221810256 ], "wc_limitations_avg": [ 11.0, 4.949747468305833 ], "wc_review_avg": [ 367.5, 103.03761449101974 ], "wc_reply_reviewers_avg": [ 100.25, 122.30571327619981 ], "wc_reply_authors_avg": [ 177.0, 285.05613482259946 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.7071067811865476, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:f30LfJmETx0J:scholar.google.com/&scioq=Towards+a+Unified+Framework+of+Clustering-based+Anomaly+Detection&hl=en&as_sdt=0,5", "gs_version_total": 4, "aff_unique_index": "0;0;0;0;1;0;0", "aff_unique_norm": "Zhejiang University;New York University", "aff_unique_dep": ";", "aff_unique_url": "https://www.zju.edu.cn;https://www.nyu.edu", "aff_unique_abbr": "ZJU;NYU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shanghai", "aff_country_unique_index": "0;0;0;0;1;0;0", "aff_country_unique": "China;United States" }, { "title": "Breaking Long-Tailed Learning Bottlenecks: A Controllable Paradigm with Hypernetwork-Generated Diverse Experts", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94811", "id": "WpPNVPAEyv", "proceeding": "", "pdf": "https://openreview.net/pdf?id=WpPNVPAEyv", "openreview": "https://openreview.net/forum?id=WpPNVPAEyv", "poster": "", "project": "", "author_site": "Zhe Zhao, HaiBin Wen, Zikang Wang, Pengkun Wang, Fanfu Wang, Song Lai, Qingfu Zhang, Yang Wang", "tldr": "", "abstract": "Traditional long-tailed learning methods often perform poorly when dealing with inconsistencies between training and test data distributions, and they cannot flexibly adapt to different user preferences for trade-offs between head and tail classes. To address this issue, we propose a novel long-tailed learning paradigm that aims to tackle distribution shift in real-world scenarios and accommodate different user preferences for the trade-off between head and tail classes. We generate a set of diverse expert models via hypernetworks to cover all possible distribution scenarios, and optimize the model ensemble to adapt to any test distribution. Crucially, in any distribution scenario, we can flexibly output a dedicated model solution that matches the user's preference. Extensive experiments demonstrate that our method not only achieves higher performance ceilings but also effectively overcomes distribution shift while allowing controllable adjustments according to user preferences. We provide new insights and a paradigm for the long-tailed learning problem, greatly expanding its applicability in practical scenarios. The code can be found here: https://github.com/DataLab-atom/PRL.", "keywords": "Long tail learning;Multi-objective optimization", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Zhe Zhao;HaiBin Wen;Zikang Wang;Pengkun Wang;Fanfu Wang;Song Lai;Qingfu Zhang;Yang Wang", "authorids": "~Zhe_Zhao5;~HaiBin_Wen1;~Zikang_Wang2;~Pengkun_Wang1;~Fanfu_Wang1;~Song_Lai1;~Qingfu_Zhang1;~Yang_Wang32", "gender": "M;M;;M;M;M;M;M", "homepage": "https://di.ustc.edu.cn/_upload/tpl/14/f7/5367/template5367/members.html;https://github.com/haibin65535;https://github.com/SpXace;http://home.ustc.edu.cn/~pengkun/index.html;;https://scholars.cityu.edu.hk/en/persons/song-lai(bbd49b19-00a6-41b6-a18a-02559baf45dc).html;https://www.cs.cityu.edu.hk/~qzhan7/index.html;http://staff.ustc.edu.cn/~angyan/", "dblp": ";209/2186;;;396/5533;48/3684;98/1240.html;", "google_scholar": ";;;https://scholar.google.com/citations?hl=zh-CN;;3P3jmP8AAAAJ;https://scholar.google.co.uk/citations?user=nhL9PHwAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": "0000-0002-8942-8761;0009-0009-5019-2390;;0000-0002-2680-4563;;0000-0002-4835-0945;;0000-0002-6079-7053", "linkedin": ";;;;;;;", "or_profile": "~Zhe_Zhao5;~HaiBin_Wen1;~Zikang_Wang2;~Pengkun_Wang1;~Fanfu_Wang1;~Song_Lai1;~Qingfu_Zhang1;~Yang_Wang32", "aff": "University of Science and Technology of China;Shaoguan University;Harbin Institute of Technology;University of Science and Technology of China;Lanzhou University;City University of Hong Kong;City University of Hong Kong;University of Science and Technology of China", "aff_domain": "ustc.edu.cn;sgu.edu.cn;stu.hit.edu.cn;ustc.edu.cn;lzu.edu.cn;cityu.edu.hk;cityu.edu.hk;ustc.edu.cn", "position": "PhD student;Undergrad student;Undergrad student;Researcher;Undergrad student;PhD student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nzhao2024breaking,\ntitle={Breaking Long-Tailed Learning Bottlenecks: A Controllable Paradigm with Hypernetwork-Generated Diverse Experts},\nauthor={Zhe Zhao and HaiBin Wen and Zikang Wang and Pengkun Wang and Fanfu Wang and Song Lai and Qingfu Zhang and Yang Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=WpPNVPAEyv}\n}", "github": "", "reviewers": "Vosp;VYLm;qXeH;LGeA", "pdf_size": 1415248, "rating": "7;7;7;7", "confidence": "4;3;5;3", "soundness": "3;3;3;3", "novelty": "3;3;4;3", "presentation": "2;2;3;3", "wc_summary": "80;67;88;104", "wc_strengths": "43;37;162;66", "wc_weaknesses": "209;67;112;246", "wc_questions": "9;3;18;213", "wc_limitations": "1;1;15;1", "wc_review": "342;175;395;630", "wc_reply_reviewers": "27;19;19;13", "wc_reply_authors": "47;65;0;53", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;1;2", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 84.75, 13.40475661845451 ], "wc_strengths_avg": [ 77.0, 50.25435304528355 ], "wc_weaknesses_avg": [ 158.5, 72.00868003234055 ], "wc_questions_avg": [ 60.75, 88.06354239979221 ], "wc_limitations_avg": [ 4.5, 6.06217782649107 ], "wc_review_avg": [ 385.5, 162.84425074284937 ], "wc_reply_reviewers_avg": [ 19.5, 4.9749371855331 ], "wc_reply_authors_avg": [ 41.25, 24.681724007856502 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6875119073141035747&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 2, "email": "ustc.edu.cn;sgu.edu.cn;stu.hit.edu.cn;ustc.edu.cn;lzu.edu.cn;cityu.edu.hk;cityu.edu.hk;ustc.edu.cn", "author_num": 8, "aff_unique_index": "0;1;2;0;3;4;4;0", "aff_unique_norm": "University of Science and Technology of China;Shaoguan University;Harbin Institute of Technology;Lanzhou University;City University of Hong Kong", "aff_unique_dep": ";;;;", "aff_unique_url": "http://www.ustc.edu.cn;http://www.gdsgu.edu.cn;http://www.hit.edu.cn/;https://www.lzu.edu.cn;https://www.cityu.edu.hk", "aff_unique_abbr": "USTC;;HIT;LZU;CityU", "aff_campus_unique_index": "1;2;2", "aff_campus_unique": ";Harbin;Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "The Prevalence of Neural Collapse in Neural Multivariate Regression", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94810", "id": "Wq6aY6fC2H", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Wq6aY6fC2H", "openreview": "https://openreview.net/forum?id=Wq6aY6fC2H", "poster": "/media/PosterPDFs/NeurIPS%202024/94810.png?t=1733825104.2720327", "project": "", "author_site": "George Andriopoulos, Zixuan Dong, Li Guo, Zifan Zhao, Keith Ross", "tldr": "", "abstract": "Recently it has been observed that neural networks exhibit Neural Collapse (NC) during the final stage of training for the classification problem. We empirically show that multivariate regression, as employed in imitation learning and other applications, exhibits Neural Regression Collapse (NRC), a new form of neural collapse: (NRC1) The last-layer feature vectors collapse to the subspace spanned by the $n$ principal components of the feature vectors, where $n$ is the dimension of the targets (for univariate regression, $n=1$); (NRC2) The last-layer feature vectors also collapse to the subspace spanned by the last-layer weight vectors; (NRC3) The Gram matrix for the weight vectors converges to a specific functional form that depends on the covariance matrix of the targets. After empirically establishing the prevalence of (NRC1)-(NRC3) for a variety of datasets and network architectures, we provide an explanation of these phenomena by modeling the regression task in the context of the Unconstrained Feature Model (UFM), in which the last layer feature vectors are treated as free variables when minimizing the loss function. We show that when the regularization parameters in the UFM model are strictly positive, then (NRC1)-(NRC3) also emerge as solutions in the UFM optimization problem. We also show that if the regularization parameters are equal to zero, then there is no collapse. To our knowledge, this is the first empirical and theoretical study of neural collapse in the context of regression. This extension is significant not only because it broadens the applicability of neural collapse to a new category of problems but also because it suggests that the phenomena of neural collapse could be a universal behavior in deep learning.", "keywords": "Neural Collapse;Multivariate Regression;DNN;Unconstrained Feature Model", "primary_area": "optimization_for_deep_networks", "supplementary_material": "/attachment/f6bd6322b3a3dec2e6674b6e287187ec527c7a1a.zip", "author": "George Andriopoulos;Zixuan Dong;Li Guo;Zifan Zhao;Keith W. Ross", "authorids": "~George_Andriopoulos1;~Zixuan_Dong1;~Li_Guo6;~Zifan_Zhao1;~Keith_W._Ross1", "gender": "M;M;F;M;M", "homepage": ";https://cs.shanghai.nyu.edu/phd-students/zixuan-dong-dongzixuan;;;http://www.nyu.edu/projects/keithwross/", "dblp": "345/0869;329/4319;;;r/KWRoss", "google_scholar": "mfwtEOAAAAAJ;lbtEL90AAAAJ;JNeOWZkAAAAJ;PWVoKaoAAAAJ;https://scholar.google.com.tw/citations?user=RhUcYmQAAAAJ", "orcid": ";;;;", "linkedin": "george-andriopoulos-612a10229/;;;zifan-zhao-1a2984191/;", "or_profile": "~George_Andriopoulos1;~Zixuan_Dong1;~Li_Guo6;~Zifan_Zhao1;~Keith_W._Ross1", "aff": "New York University;New York University;;New York University;New York University", "aff_domain": "nyuad.nyu.edu;nyu.edu;;nyu.edu;nyu.edu", "position": "Postdoc;PhD student;;Undergrad student;Full Professor", "bibtex": "@inproceedings{\nandriopoulos2024the,\ntitle={The Prevalence of Neural Collapse in Neural Multivariate Regression},\nauthor={George Andriopoulos and Zixuan Dong and Li Guo and Zifan Zhao and Keith W. Ross},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Wq6aY6fC2H}\n}", "github": "", "reviewers": "vXK1;8FCG;T8Jo;6Hcy", "pdf_size": 2402582, "rating": "5;6;6;6", "confidence": "2;4;5;3", "soundness": "3;3;3;3", "novelty": "2;3;3;2", "presentation": "3;3;3;3", "wc_summary": "229;47;107;101", "wc_strengths": "50;49;26;38", "wc_weaknesses": "251;73;18;149", "wc_questions": "968;154;17;147", "wc_limitations": "7;1;2;7", "wc_review": "1505;324;170;442", "wc_reply_reviewers": "140;12;50;26", "wc_reply_authors": "82;13;12;13", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 121.0, 66.58828725834597 ], "wc_strengths_avg": [ 40.75, 9.730750228014282 ], "wc_weaknesses_avg": [ 122.75, 87.44248109471734 ], "wc_questions_avg": [ 321.5, 377.223077236799 ], "wc_limitations_avg": [ 4.25, 2.7726341266023544 ], "wc_review_avg": [ 610.25, 525.5104066524277 ], "wc_reply_reviewers_avg": [ 57.0, 49.80963762164909 ], "wc_reply_authors_avg": [ 30.0, 30.024989592004857 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.7745966692414834, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5175103424397552487&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 5, "email": "nyuad.nyu.edu;nyu.edu;;nyu.edu;nyu.edu", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "New York University", "aff_unique_dep": "", "aff_unique_url": "https://www.nyu.edu", "aff_unique_abbr": "NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Strategic Multi-Armed Bandit Problems Under Debt-Free Reporting", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94809", "id": "WqNfihAcu5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=WqNfihAcu5", "openreview": "https://openreview.net/forum?id=WqNfihAcu5", "poster": "/media/PosterPDFs/NeurIPS%202024/94809.png?t=1733411348.8919797", "project": "", "author_site": "Ahmed Ben Yahmed, Cl\u00e9ment Calauz\u00e8nes, Vianney Perchet", "tldr": "", "abstract": "We examine multi-armed bandit problems featuring strategic arms under debt-free reporting. In this context, each arm is characterized by a bounded support reward distribution and strategically aims to maximize its own utility by retaining a portion of the observed reward, potentially disclosing only a fraction of it to the player. This scenario unfolds as a game over $T$ rounds, leading to a competition of objectives between the player, aiming to minimize regret, and the arms, motivated by the desire to maximize their individual utilities. To address these dynamics, we propose an algorithm that establishes an equilibrium wherein each arm behaves truthfully and discloses as much of its rewards as possible. Utilizing this algorithm, the player can attain the second-highest average (true) reward among arms, with a cumulative regret bounded by $O(\\log(T)/\\Delta)$ (problem-dependent) or $O(\\sqrt{T\\log(T)})$ (worst-case).", "keywords": "Multi-Armed Bandit Problems;Strategic Arms;Incentives", "primary_area": "bandits", "supplementary_material": "", "author": "Ahmed Ben Yahmed;Cl\u00e9ment Calauz\u00e8nes;Vianney Perchet", "authorids": "~Ahmed_Ben_Yahmed2;~Cl\u00e9ment_Calauz\u00e8nes1;~Vianney_Perchet3", "gender": "M;M;M", "homepage": ";;", "dblp": ";125/1895;83/7398", "google_scholar": ";lFsKnyUAAAAJ;", "orcid": ";;", "linkedin": "ahmed-ben-yahmed-a590ab203/;;", "or_profile": "~Ahmed_Ben_Yahmed2;~Cl\u00e9ment_Calauz\u00e8nes1;~Vianney_Perchet1", "aff": "Ecole Nationale de la Statistique et de l'Administration Economique;Criteo;", "aff_domain": "ensae.fr;criteo.com;", "position": "PhD student;Researcher;", "bibtex": "@inproceedings{\nyahmed2024strategic,\ntitle={Strategic Multi-Armed Bandit Problems Under Debt-Free Reporting},\nauthor={Ahmed Ben Yahmed and Cl{\\'e}ment Calauz{\\`e}nes and Vianney Perchet},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=WqNfihAcu5}\n}", "github": "", "reviewers": "VQPm;JBtJ;byTp;3dvn", "pdf_size": 809774, "rating": "6;6;6;8", "confidence": "4;3;4;4", "soundness": "3;3;4;3", "novelty": "2;3;2;2", "presentation": "3;3;3;3", "wc_summary": "90;93;314;99", "wc_strengths": "115;29;86;228", "wc_weaknesses": "286;127;204;91", "wc_questions": "110;7;43;16", "wc_limitations": "7;4;4;22", "wc_review": "608;260;651;456", "wc_reply_reviewers": "51;0;21;25", "wc_reply_authors": "12;0;12;12", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;2;2", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 149.0, 95.31788919190353 ], "wc_strengths_avg": [ 114.5, 72.46550903705845 ], "wc_weaknesses_avg": [ 177.0, 75.0099993334222 ], "wc_questions_avg": [ 44.0, 40.34228550788862 ], "wc_limitations_avg": [ 9.25, 7.46240577829965 ], "wc_review_avg": [ 493.75, 153.17045243779884 ], "wc_reply_reviewers_avg": [ 24.25, 18.129740759315894 ], "wc_reply_authors_avg": [ 9.0, 5.196152422706632 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:irI2Ivghyl4J:scholar.google.com/&scioq=Strategic+Multi-Armed+Bandit+Problems+Under+Debt-Free+Reporting&hl=en&as_sdt=0,33", "gs_version_total": 6, "email": "ensae.fr;criteo.com;", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Ecole Nationale de la Statistique et de l'Administration Economique;Criteo", "aff_unique_dep": ";", "aff_unique_url": "https://ensae.fr;https://www.criteo.com", "aff_unique_abbr": "ENSAE;Criteo", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "France" }, { "title": "Stabilized Proximal-Point Methods for Federated Optimization", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94808", "id": "WukSyFSzDt", "proceeding": "", "pdf": "https://openreview.net/pdf?id=WukSyFSzDt", "openreview": "https://openreview.net/forum?id=WukSyFSzDt", "poster": "", "project": "", "author_site": "Xiaowen Jiang, Anton Rodomanov, Sebastian Stich", "tldr": "", "abstract": "In developing efficient optimization algorithms, it is crucial to account for communication constraints—a significant challenge in modern Federated Learning. \n The best-known communication complexity among non-accelerated algorithms is achieved by DANE, a distributed proximal-point algorithm that solves local subproblems at each iteration and that can exploit second-order similarity among individual functions.\n However, to achieve such communication efficiency, the algorithm\n requires solving local subproblems sufficiently accurately resulting in slightly sub-optimal local complexity.\n Inspired by the hybrid-projection proximal-point method, in this work, we propose a novel distributed algorithm S-DANE. Compared to DANE, this method uses an auxiliary sequence of prox-centers while maintaining the same deterministic communication complexity. Moreover, the accuracy condition for solving the subproblem is milder, leading to enhanced local computation efficiency. Furthermore, S-DANE supports partial client participation and arbitrary stochastic local solvers, making it attractive in practice. We further accelerate S-DANE and show that the resulting algorithm achieves the best-known communication complexity among all existing methods for distributed convex optimization while still enjoying good local computation efficiency as S-DANE.\n Finally, we propose adaptive variants of both methods using line search, obtaining the first provably efficient adaptive algorithms that could exploit local second-order similarity without the prior knowledge of any parameters.", "keywords": "Convex Optimization;Distributed Optimization;Proximal-Point Method;Second-order Dissimilarity;Acceleration;Line search", "primary_area": "optimization", "supplementary_material": "", "author": "Xiaowen Jiang;Anton Rodomanov;Sebastian U Stich", "authorids": "~Xiaowen_Jiang1;~Anton_Rodomanov1;~Sebastian_U_Stich1", "gender": "M;;M", "homepage": ";;https://www.sstich.ch", "dblp": "192/3782-3;153/5453;04/10549", "google_scholar": "https://scholar.google.com/citations?hl=en;u95GRZQAAAAJ;https://scholar.google.ch/citations?user=8l-mDfQAAAAJ", "orcid": ";;", "linkedin": "xiaowen-jiang-65570b222/;;", "or_profile": "~Xiaowen_Jiang1;~Anton_Rodomanov1;~Sebastian_U_Stich1", "aff": "CISPA Helmholtz Center for Information Security;CISPA;CISPA Helmholtz Center for Information Security", "aff_domain": "cispa.de;cispa.de;cispa.de", "position": "PhD student;Postdoc;Tenure Track Faculty", "bibtex": "@inproceedings{\njiang2024stabilized,\ntitle={Stabilized Proximal-Point Methods for Federated Optimization},\nauthor={Xiaowen Jiang and Anton Rodomanov and Sebastian U Stich},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=WukSyFSzDt}\n}", "github": "", "reviewers": "TYfb;vDCp;iL8h;QbRz;peSJ", "pdf_size": 1133084, "rating": "6;7;7;7;7", "confidence": "3;4;3;5;4", "soundness": "3;3;3;4;2", "novelty": "2;3;3;3;3", "presentation": "3;3;3;4;3", "wc_summary": "55;15;58;94;79", "wc_strengths": "57;32;36;64;94", "wc_weaknesses": "162;374;2;165;53", "wc_questions": "118;35;147;59;76", "wc_limitations": "1;1;1;1;6", "wc_review": "393;457;244;383;308", "wc_reply_reviewers": "19;33;55;234;38", "wc_reply_authors": "5;10;136;115;6", "reply_reviewers": "1;1;1;2;1", "reply_authors": "2;2;2;3;2", "rating_avg": [ 6.8, 0.39999999999999997 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 60.2, 26.72377218882095 ], "wc_strengths_avg": [ 56.6, 22.28542124349459 ], "wc_weaknesses_avg": [ 151.2, 127.94592607816787 ], "wc_questions_avg": [ 87.0, 40.422765862815474 ], "wc_limitations_avg": [ 2.0, 2.0 ], "wc_review_avg": [ 357.0, 73.67767640201474 ], "wc_reply_reviewers_avg": [ 75.8, 79.93347233793862 ], "wc_reply_authors_avg": [ 54.4, 58.45545312458026 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5345224838248488, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4522495854464757805&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "cispa.de;cispa.de;cispa.de", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "CISPA Helmholtz Center for Information Security", "aff_unique_dep": "", "aff_unique_url": "https://www.cispa.de/", "aff_unique_abbr": "CISPA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "title": "Fairness-Aware Estimation of Graphical Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94807", "id": "WvWS8goWyR", "proceeding": "", "pdf": "https://openreview.net/pdf?id=WvWS8goWyR", "openreview": "https://openreview.net/forum?id=WvWS8goWyR", "poster": "/media/PosterPDFs/NeurIPS%202024/94807.png?t=1731726102.166371", "project": "", "author_site": "Zhuoping Zhou, Davoud Ataee Tarzanagh, Bojian Hou, Qi Long, Li Shen", "tldr": "", "abstract": "This paper examines the issue of fairness in the estimation of graphical models (GMs), particularly Gaussian, Covariance, and Ising models. These models play a vital role in understanding complex relationships in high-dimensional data. However, standard GMs can result in biased outcomes, especially when the underlying data involves sensitive characteristics or protected groups. To address this, we introduce a comprehensive framework designed to reduce bias in the estimation of GMs related to protected attributes. Our approach involves the integration of the pairwise graph disparity error and a tailored loss function into a nonsmooth multi-objective optimization problem, striving to achieve fairness across different sensitive groups while maintaining the effectiveness of the GMs. Experimental evaluations on synthetic and real-world datasets demonstrate that our framework effectively mitigates bias without undermining GMs' performance.", "keywords": "Fairness;Graphical Model;Optimization", "primary_area": "fairness", "supplementary_material": "/attachment/ca0fc828cef37ee1e48ab6e67820156bc18aba3c.zip", "author": "Zhuoping Zhou;Davoud Ataee Tarzanagh;Bojian Hou;Qi Long;Li Shen", "authorids": "~Zhuoping_Zhou1;~Davoud_Ataee_Tarzanagh1;~Bojian_Hou1;~Qi_Long1;~Li_Shen2", "gender": "F;M;M;M;M", "homepage": "https://www.linkedin.com/in/zhuoping-zhou-b8b7a7224;https://tarzanagh.github.io/;https://www.med.upenn.edu/long-lab/;https://www.med.upenn.edu/shenlab/;http://bojianhou.com", "dblp": "358/3369;;47/7320;s/LiShen;202/2583", "google_scholar": ";Djtvz_0AAAAJ;gfklepYAAAAJ;QnWpiskAAAAJ;yFVd81sAAAAJ", "orcid": ";0000-0003-1267-3889;0000-0003-0660-5230;0000-0002-5443-0503;", "linkedin": "zhuoping-zhou-b8b7a7224;;qi-long-9652a0125/;shenli/;", "or_profile": "~Zhuoping_Zhou1;~Davoud_Ataee_Tarzanagh1;~Qi_Long1;~Li_Shen2;~Bo-Jian_Hou1", "aff": "University of Pennsylvania;University of Pennsylvania;University of Pennsylvania;University of Pennsylvania;University of Pennsylvania", "aff_domain": "upenn.edu;upenn.edu;upenn.edu;upenn.edu;upenn.edu", "position": "PhD student;Postdoc;Professor;Full Professor;Postdoc", "bibtex": "@inproceedings{\nzhou2024fairnessaware,\ntitle={Fairness-Aware Estimation of Graphical Models},\nauthor={Zhuoping Zhou and Davoud Ataee Tarzanagh and Bojian Hou and Qi Long and Li Shen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=WvWS8goWyR}\n}", "github": "", "reviewers": "1oDX;nXfy;tjnm;fakh", "pdf_size": 5339555, "rating": "5;5;6;7", "confidence": "4;4;2;2", "soundness": "2;3;3;4", "novelty": "2;3;2;3", "presentation": "2;3;2;3", "wc_summary": "101;68;70;100", "wc_strengths": "69;31;89;129", "wc_weaknesses": "250;200;111;95", "wc_questions": "10;77;101;72", "wc_limitations": "87;25;14;1", "wc_review": "517;401;385;397", "wc_reply_reviewers": "9;167;25;11", "wc_reply_authors": "655;1783;604;247", "reply_reviewers": "1;2;1;1", "reply_authors": "3;5;3;3", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.0, 1.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 84.75, 15.769828787910159 ], "wc_strengths_avg": [ 79.5, 35.36594407053204 ], "wc_weaknesses_avg": [ 164.0, 63.761273513003175 ], "wc_questions_avg": [ 65.0, 33.59315406448165 ], "wc_limitations_avg": [ 31.75, 33.0104150231408 ], "wc_review_avg": [ 425.0, 53.44155686354955 ], "wc_reply_reviewers_avg": [ 53.0, 66.10597552415364 ], "wc_reply_authors_avg": [ 822.25, 576.5324687300795 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.5, 0.8660254037844386 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.9045340337332909, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:K9z8Sp_zEy4J:scholar.google.com/&scioq=Fairness-Aware+Estimation+of+Graphical+Models&hl=en&as_sdt=0,10", "gs_version_total": 4, "email": "upenn.edu;upenn.edu;upenn.edu;upenn.edu;upenn.edu", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of Pennsylvania", "aff_unique_dep": "", "aff_unique_url": "https://www.upenn.edu", "aff_unique_abbr": "UPenn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "PediatricsGPT: Large Language Models as Chinese Medical Assistants for Pediatric Applications", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94806", "id": "WvoKwq12x5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=WvoKwq12x5", "openreview": "https://openreview.net/forum?id=WvoKwq12x5", "poster": "/media/PosterPDFs/NeurIPS%202024/94806.png?t=1729434454.3459225", "project": "", "author_site": "Dingkang Yang, Jinjie Wei, Dongling Xiao, Shunli Wang, Tong Wu, Gang Li, Mingcheng Li, Shuaibing Wang, Jiawei Chen, Yue Jiang, Qingyao Xu, Ke Li, Peng Zhai, Lihua Zhang", "tldr": "", "abstract": "Developing intelligent pediatric consultation systems offers promising prospects for improving diagnostic efficiency, especially in China, where healthcare resources are scarce. Despite recent advances in Large Language Models (LLMs) for Chinese medicine, their performance is sub-optimal in pediatric applications due to inadequate instruction data and vulnerable training procedures.\nTo address the above issues, this paper builds PedCorpus, a high-quality dataset of over 300,000 multi-task instructions from pediatric textbooks, guidelines, and knowledge graph resources to fulfil diverse diagnostic demands. Upon well-designed PedCorpus, we propose PediatricsGPT, the first Chinese pediatric LLM assistant built on a systematic and robust training pipeline.\nIn the continuous pre-training phase, we introduce a hybrid instruction pre-training mechanism to mitigate the internal-injected knowledge inconsistency of LLMs for medical domain adaptation. Immediately, the full-parameter Supervised Fine-Tuning (SFT) is utilized to incorporate the general medical knowledge schema into the models. After that, we devise a direct following preference optimization to enhance the generation of pediatrician-like humanistic responses. In the parameter-efficient secondary SFT phase,\na mixture of universal-specific experts strategy is presented to resolve the competency conflict between medical generalist and pediatric expertise mastery. Extensive results based on the metrics, GPT-4, and doctor evaluations on distinct downstream tasks show that PediatricsGPT consistently outperforms previous Chinese medical LLMs. The project and data will be released at https://github.com/ydk122024/PediatricsGPT.", "keywords": "Large Language Models;Medical Applications", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Dingkang Yang;Jinjie Wei;Dongling Xiao;Shunli Wang;Tong Wu;Gang Li;Mingcheng Li;Shuaibing Wang;Jiawei Chen;Yue Jiang;Qingyao Xu;Ke Li;Peng Zhai;Lihua Zhang", "authorids": "~Dingkang_Yang1;~Jinjie_Wei1;~Dongling_Xiao1;~Shunli_Wang1;~Tong_Wu10;~Gang_Li14;~Mingcheng_Li1;~Shuaibing_Wang1;~Jiawei_Chen16;~Yue_Jiang10;~Qingyao_Xu2;~Ke_Li4;~Peng_Zhai1;~Lihua_Zhang1", "gender": "M;M;M;M;M;M;M;M;M;M;M;M;M;M", "homepage": "https://ydk122024.github.io/;;;https://shunli-wang.github.io/;;;https://github.com/limingcheng0710;https://github.com/heartStrive;;;https://github.com/YKYBcold-k?tab=projects;http://keli.info;https://github.com/hnsyzjianghan;https://faet.fudan.edu.cn/3f/9e/c23830a671646/page.htm", "dblp": "304/1099;;257/5106.html;https://dblp.org/rec/journals/corr/abs-2207-07869;https://dblp.uni-trier.de/pid/75/5056;;156/8265;;;;;;92/4002;31/3003", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.tw/citations?hl=zh-CN;;https://scholar.google.com/citations?hl=zh-CN;NGJzzMUAAAAJ;https://scholar.google.com/citations?view_op=list_works;GkSw7_0AAAAJ;;ksiUbVkAAAAJ;https://scholar.google.com.hk/citations?user=i-Xg2Y8AAAAJ;;mfWsFM0AAAAJ;;", "orcid": "0000-0003-1829-5671;;;0000-0002-3755-8724;;;0009-0000-6244-6081;;0009-0007-7664-8582;0009-0005-0267-672X;;0000-0001-7998-0731;0000-0002-1374-7969;0000-0003-0467-4347", "linkedin": ";;;;;;;;;;;;;", "or_profile": "~Dingkang_Yang1;~Jinjie_Wei1;~Dongling_Xiao1;~Shunli_Wang1;~Tong_Wu10;~Gang_Li14;~Mingcheng_Li1;~Shuaibing_Wang1;~Jiawei_Chen16;~Yue_Jiang10;~Qingyao_Xu2;~Ke_Li4;~Peng_Zhai1;~Lihua_Zhang1", "aff": "Fudan University;Fudan University;;Fudan University;Tencent Youtu Lab;Tencent Youtu Lab;Fudan University;Fudan University;Fudan University;Fudan University;Fudan University;Tencent;Fudan University;Fudan University", "aff_domain": "fudan.edu;fudan.edu.cn;;fudan.edu.cn;tencent.com;tencent.com;fudan.edu.cn;fudan.edu.cn;fudan.edu;fudan.edu.cn;fudan.edu.cn;tencent.com;fudan.edu.cn;fudan.edu.cn", "position": "PhD student;MS student;;PhD student;Researcher;Researcher;PhD student;MS student;MS student;MS student;MS student;Principal Researcher;Postdoc;Full Professor", "bibtex": "@inproceedings{\nyang2024pediatricsgpt,\ntitle={Pediatrics{GPT}: Large Language Models as Chinese Medical Assistants for Pediatric Applications},\nauthor={Dingkang Yang and Jinjie Wei and Dongling Xiao and Shunli Wang and Tong Wu and Gang Li and Mingcheng Li and Shuaibing Wang and Jiawei Chen and Yue Jiang and Qingyao Xu and Ke Li and Peng Zhai and Lihua Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=WvoKwq12x5}\n}", "github": "", "reviewers": "eZYU;oou4;eEmB;jQHJ;MG71;DSSd", "pdf_size": 2634876, "rating": "4;5;6;6;7;7", "confidence": "4;4;4;4;4;4", "soundness": "3;3;3;3;4;3", "novelty": "3;3;3;2;3;4", "presentation": "2;2;3;3;4;3", "wc_summary": "87;56;94;85;48;40", "wc_strengths": "42;63;47;84;61;54", "wc_weaknesses": "323;172;73;106;100;61", "wc_questions": "122;1;254;2;53;70", "wc_limitations": "41;1;45;1;77;2", "wc_review": "615;293;513;278;339;227", "wc_reply_reviewers": "771;0;0;11;11;18", "wc_reply_authors": "2272;151;56;56;8;12", "reply_reviewers": "3;0;0;1;1;1", "reply_authors": "11;3;2;2;2;2", "rating_avg": [ 5.833333333333333, 1.0671873729054746 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.1666666666666665, 0.3726779962499649 ], "novelty_avg": [ 3.0, 0.5773502691896257 ], "presentation_avg": [ 2.8333333333333335, 0.6871842709362768 ], "wc_summary_avg": [ 68.33333333333333, 21.029080394116672 ], "wc_strengths_avg": [ 58.5, 13.549292724468438 ], "wc_weaknesses_avg": [ 139.16666666666666, 89.43790521299617 ], "wc_questions_avg": [ 83.66666666666667, 86.7115268513298 ], "wc_limitations_avg": [ 27.833333333333332, 28.84681765617984 ], "wc_review_avg": [ 377.5, 139.00809328956353 ], "wc_reply_reviewers_avg": [ 135.16666666666666, 284.4253954124975 ], "wc_reply_authors_avg": [ 425.8333333333333, 826.9684429018773 ], "reply_reviewers_avg": [ 1.0, 1.0 ], "reply_authors_avg": [ 3.6666666666666665, 3.299831645537222 ], "replies_avg": [ 49, 0 ], "authors#_avg": [ 14, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3441708079353736665&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "fudan.edu;fudan.edu.cn;;fudan.edu.cn;tencent.com;tencent.com;fudan.edu.cn;fudan.edu.cn;fudan.edu;fudan.edu.cn;fudan.edu.cn;tencent.com;fudan.edu.cn;fudan.edu.cn", "author_num": 14, "aff_unique_index": "0;0;0;1;1;0;0;0;0;0;1;0;0", "aff_unique_norm": "Fudan University;Tencent", "aff_unique_dep": ";Youtu Lab", "aff_unique_url": "https://www.fudan.edu.cn;https://www.tencent.com", "aff_unique_abbr": "Fudan;Tencent", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Building on Efficient Foundations: Effective Training of LLMs with Structured Feedforward Layers", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94805", "id": "WxLVYZbIew", "proceeding": "", "pdf": "https://openreview.net/pdf?id=WxLVYZbIew", "openreview": "https://openreview.net/forum?id=WxLVYZbIew", "poster": "/media/PosterPDFs/NeurIPS%202024/94805.png?t=1731667594.1180744", "project": "", "author_site": "Xiuying Wei, Skander Moalla, Razvan Pascanu, Caglar Gulcehre", "tldr": "", "abstract": "State-of-the-art results in large language models (LLMs) often rely on scale, which\nbecomes computationally expensive. This has sparked a research agenda to reduce\nthese models\u2019 parameter counts and computational costs without significantly\nimpacting their performance. Our study focuses on transformer-based LLMs,\nspecifically targeting the computationally intensive feedforward networks (FFNs),\nwhich are less studied than attention blocks. We consider three structured linear\nparameterizations of the FFN using efficient low-rank and block-diagonal matrices.\nIn contrast to many previous works that examined these approximations, our study\ni) explores these structures from a training-from-scratch perspective, ii) scales up\nto 1.3B parameters, and iii) is conducted within recent Transformer-based LLMs\nrather than convolutional architectures. We demonstrate that these structures can\nlead to actual computational gains in various scenarios, including online decoding\nwhen using a pre-merge technique. Additionally, we propose a novel training\nregime, called self-guided training, aimed at improving the poor training dynamics\nthat these approximations exhibit when used from initialization. Interestingly,\nthe scaling performance of structured matrices is explored, revealing steeper\ncurves in scaling training FLOPs, along with a favorable scaling trend in the\novertraining regime. Specifically, we show that wide and structured networks\ncan utilize training FLOPs more efficiently, with fewer parameters and lower\nloss than dense models at their optimal trade-off. Our code is available at\nhttps://github.com/CLAIRE-Labo/StructuredFFN/tree/main.", "keywords": "large language model;structured matrices;efficient architecture", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/1d9230adc16759c7f5ca87935ea14a07b9732489.zip", "author": "Xiuying Wei;Skander Moalla;Razvan Pascanu;Caglar Gulcehre", "authorids": "~Xiuying_Wei1;~Skander_Moalla1;~Razvan_Pascanu1;~Caglar_Gulcehre1", "gender": "F;M;M;M", "homepage": "https://wimh966.github.io/;https://skandermoalla.com/;https://razp.info;http://caglarg.com", "dblp": "315/9021;336/2569;65/8368.html;125/2132", "google_scholar": ";YEP65IMAAAAJ;https://scholar.google.ca/citations?user=eSPY8LwAAAAJ;https://scholar.google.ca/citations?user=7hwJ2ckAAAAJ", "orcid": ";0000-0002-8494-8071;;", "linkedin": "%E7%A7%80%E9%A2%96-%E9%AD%8F-6b1277221/;skander-moalla/;;", "or_profile": "~Xiuying_Wei1;~Skander_Moalla1;~Razvan_Pascanu1;~Caglar_Gulcehre1", "aff": "EPFL - EPF Lausanne;EPFL - EPF Lausanne;Google DeepMind;EPFL - EPF Lausanne", "aff_domain": "epfl.ch;epfl.ch;google.com;epfl.ch", "position": "PhD student;PhD student;Research Scientist;EPFL", "bibtex": "@inproceedings{\nwei2024building,\ntitle={Building on Efficient Foundations: Effective Training of {LLM}s with Structured Feedforward Layers},\nauthor={Xiuying Wei and Skander Moalla and Razvan Pascanu and Caglar Gulcehre},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=WxLVYZbIew}\n}", "github": "", "reviewers": "SNmM;5PEQ;7Gik", "pdf_size": 5804104, "rating": "5;5;7", "confidence": "4;2;3", "soundness": "3;3;3", "novelty": "3;3;2", "presentation": "3;3;3", "wc_summary": "86;159;68", "wc_strengths": "99;53;73", "wc_weaknesses": "87;247;149", "wc_questions": "84;48;19", "wc_limitations": "35;11;25", "wc_review": "391;518;334", "wc_reply_reviewers": "211;284;85", "wc_reply_authors": "305;919;274", "reply_reviewers": "1;2;2", "reply_authors": "2;3;2", "rating_avg": [ 5.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 104.33333333333333, 39.34745509206691 ], "wc_strengths_avg": [ 75.0, 18.83259585576738 ], "wc_weaknesses_avg": [ 161.0, 65.86855597830171 ], "wc_questions_avg": [ 50.333333333333336, 26.587382136812355 ], "wc_limitations_avg": [ 23.666666666666668, 9.843215373488935 ], "wc_review_avg": [ 414.3333333333333, 76.9083148228388 ], "wc_reply_reviewers_avg": [ 193.33333333333334, 82.1962421417319 ], "wc_reply_authors_avg": [ 499.3333333333333, 297.01889202914725 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Tr-Pj9i5wNYJ:scholar.google.com/&scioq=Building+on+Efficient+Foundations:+Effective+Training+of+LLMs+with+Structured+Feedforward+Layers&hl=en&as_sdt=0,23", "gs_version_total": 4, "email": "epfl.ch;epfl.ch;google.com;epfl.ch", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "EPFL;Google", "aff_unique_dep": ";Google DeepMind", "aff_unique_url": "https://www.epfl.ch;https://deepmind.com", "aff_unique_abbr": "EPFL;DeepMind", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Lausanne;", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Switzerland;United Kingdom" }, { "id": "WxW4nZMD3D", "title": "Network Lasso Bandits", "track": "main", "status": "Reject", "tldr": "", "abstract": "We consider a multi-task contextual bandit setting, where the learner is given a graph encoding relations between the bandit tasks. The tasks' preference vectors are assumed to be piecewise constant over the graph, forming clusters. At every round, we estimate the preference vectors by solving an online network lasso problem with a suitably chosen, time-dependent regularization parameter. We establish a novel oracle inequality relying on a convenient restricted eigenvalue assumption. Our theoretical findings highlight the importance of dense intra-cluster connections and sparse inter-cluster ones. That results in a sublinear regret bound significantly lower than its counterpart in the independent task learning setting. Finally, we support our theoretical findings by experimental evaluation against graph bandit multi-task learning and online clustering of bandits algorithms.", "keywords": "contextual bandits;multi-task learning;clustering;network lasso;graph total variation", "primary_area": "bandits", "supplementary_material": "/attachment/2c967fab5e4b3e5a57614510505d41483ca76c93.zip", "author": "Sofien Dhouib;Steven Bilaj;Behzad Nourani-Koliji;Setareh Maghsudi", "authorids": "~Sofien_Dhouib1;~Steven_Bilaj1;~Behzad_Nourani-Koliji1;~Setareh_Maghsudi1", "gender": "M;M;M;", "homepage": "https://sofiendhouib.github.io/;;;", "dblp": ";333/2524;;30/10806", "google_scholar": "QWptBckAAAAJ;;-BAOstMAAAAJ;", "orcid": ";;0009-0001-2854-1955;", "linkedin": ";steven-bilaj-2241bb1b4/;;", "or_profile": "~Sofien_Dhouib1;~Steven_Bilaj1;~Behzad_Nourani-Koliji1;~Setareh_Maghsudi1", "aff": "University of Tuebingen;Eberhard-Karls-Universit\u00e4t T\u00fcbingen;T\u00fcbingen AI Center;Ruhr-Universt\u00e4t Bochum", "aff_domain": "uni-tuebingen.de;uni-tuebingen.de;tuebingen.ai;rub.de", "position": "Postdoctoral researcher;PhD student;PhD student;Full Professor", "bibtex": "@misc{\nanonymous2024network,\ntitle={Network Lasso Bandits},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=WxW4nZMD3D}\n}", "github": "", "project": "", "reviewers": "3afv;Z89V;QF4D;wAUZ", "site": "https://openreview.net/forum?id=WxW4nZMD3D", "pdf_size": 2143035, "rating": "3;5;5;5", "confidence": "5;3;2;3", "soundness": "2;3;2;2", "novelty": "2;3;2;2", "presentation": "2;3;2;3", "wc_summary": "87;287;116;79", "wc_strengths": "61;5;105;56", "wc_weaknesses": "657;5;51;147", "wc_questions": "4;6;144;15", "wc_limitations": "1;5;31;1", "wc_review": "810;308;447;298", "wc_reply_reviewers": "332;55;14;97", "wc_reply_authors": "1114;0;0;138", "reply_reviewers": "1;1;1;2", "reply_authors": "3;1;1;2", "rating_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 1.0897247358851685 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 142.25, 84.69762393361457 ], "wc_strengths_avg": [ 56.75, 35.442735503908274 ], "wc_weaknesses_avg": [ 215.0, 260.28061779548625 ], "wc_questions_avg": [ 42.25, 58.89131939428764 ], "wc_limitations_avg": [ 9.5, 12.519984025548915 ], "wc_review_avg": [ 465.75, 207.29492878505252 ], "wc_reply_reviewers_avg": [ 124.5, 123.34200419970482 ], "wc_reply_authors_avg": [ 313.0, 465.87659310164963 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.9271726499455306, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Pv8WgL5wDAMJ:scholar.google.com/&scioq=Network+Lasso+Bandits&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "University of Tuebingen;Eberhard Karls University of T\u00fcbingen;University of T\u00fcbingen;Ruhr-Universit\u00e4t Bochum", "aff_unique_dep": ";;AI Center;", "aff_unique_url": "https://www.uni-tuebingen.de/;https://www.uni-tuebingen.de/;https://www.uni-tuebingen.de/;https://www.ruhr-uni-bochum.de", "aff_unique_abbr": "Uni T\u00fcbingen;Uni T\u00fcbingen;Uni T\u00fcbingen;RUB", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";T\u00fcbingen", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "title": "Graph Learning for Numeric Planning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94804", "id": "Wxc6KvQgLq", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Wxc6KvQgLq", "openreview": "https://openreview.net/forum?id=Wxc6KvQgLq", "poster": "", "project": "", "author_site": "Dillon Chen, Sylvie Thiebaux", "tldr": "", "abstract": "Graph learning is naturally well suited for use in symbolic, object-centric planning due to its ability to exploit relational structures exhibited in planning domains and to take as input planning instances with arbitrary number of objects. Numeric planning is an extension of symbolic planning in which states may now also exhibit numeric variables. In this work, we propose data-efficient and interpretable machine learning models for learning to solve numeric planning tasks. This involves constructing a new graph kernel for graphs with both continuous and categorical attributes, as well as new optimisation methods for learning heuristic functions for numeric planning. Experiments show that our graph kernels are vastly more efficient and generalise better than graph neural networks for numeric planning, and also yield competitive coverage performance over domain-independent numeric planners.", "keywords": "planning;graph neural networks;graph-based learning;reinforcement learning", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Dillon Ze Chen;Sylvie Thiebaux", "authorids": "~Dillon_Ze_Chen1;~Sylvie_Thiebaux1", "gender": "M;F", "homepage": "https://dillonzchen.github.io/;http://users.cecs.anu.edu.au/~thiebaux/", "dblp": "350/4009;64/3102", "google_scholar": ";https://scholar.google.com.tw/citations?user=UliUD0gAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Dillon_Ze_Chen1;~Sylvie_Thiebaux1", "aff": "LAAS / CNRS;Australian National University", "aff_domain": "laas.fr;anu.edu.au", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nchen2024graph,\ntitle={Graph Learning for Numeric Planning},\nauthor={Dillon Ze Chen and Sylvie Thiebaux},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Wxc6KvQgLq}\n}", "github": "", "reviewers": "bZ6v;YeVP;99rS;R2Rg", "pdf_size": 736053, "rating": "3;4;5;7", "confidence": "3;2;4;4", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "1;3;1;3", "wc_summary": "37;103;141;62", "wc_strengths": "25;45;67;67", "wc_weaknesses": "56;53;148;155", "wc_questions": "15;57;39;135", "wc_limitations": "7;50;8;4", "wc_review": "140;308;403;423", "wc_reply_reviewers": "0;0;4;11", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 4.75, 1.479019945774904 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.0, 1.0 ], "wc_summary_avg": [ 85.75, 39.65712420234226 ], "wc_strengths_avg": [ 51.0, 17.4928556845359 ], "wc_weaknesses_avg": [ 103.0, 48.57468476480316 ], "wc_questions_avg": [ 61.5, 44.97499305169485 ], "wc_limitations_avg": [ 17.25, 18.965429075030176 ], "wc_review_avg": [ 318.5, 111.84028791093128 ], "wc_reply_reviewers_avg": [ 3.75, 4.493050188902857 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.6625413488689132, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12330823099423875392&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 11, "email": "laas.fr;anu.edu.au", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "LAAS;Australian National University", "aff_unique_dep": ";", "aff_unique_url": "https://www.laas.fr/;https://www.anu.edu.au", "aff_unique_abbr": "LAAS;ANU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "France;Australia" }, { "title": "No Representation, No Trust: Connecting Representation, Collapse, and Trust Issues in PPO", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94803", "id": "Wy9UgrMwD0", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Wy9UgrMwD0", "openreview": "https://openreview.net/forum?id=Wy9UgrMwD0", "poster": "/media/PosterPDFs/NeurIPS%202024/94803.png?t=1731668956.9704623", "project": "", "author_site": "Skander Moalla, Andrea Miele, Daniil Pyatko, Razvan Pascanu, Caglar Gulcehre", "tldr": "", "abstract": "Reinforcement learning (RL) is inherently rife with non-stationarity since the states and rewards the agent observes during training depend on its changing policy.\nTherefore, networks in deep RL must be capable of adapting to new observations and fitting new targets.\nHowever, previous works have observed that networks trained under non-stationarity exhibit an inability to continue learning, termed loss of plasticity, and eventually a collapse in performance.\nFor off-policy deep value-based RL methods, this phenomenon has been correlated with a decrease in representation rank and the ability to fit random targets, termed capacity loss.\nAlthough this correlation has generally been attributed to neural network learning under non-stationarity, the connection to representation dynamics has not been carefully studied in on-policy policy optimization methods.\nIn this work, we empirically study representation dynamics in Proximal Policy Optimization (PPO) on the Atari and MuJoCo environments, revealing that PPO agents are also affected by feature rank deterioration and capacity loss.\nWe show that this is aggravated by stronger non-stationarity, ultimately driving the actor's performance to collapse, regardless of the performance of the critic.\nWe ask why the trust region, specific to methods like PPO, cannot alleviate or prevent the collapse and find a connection between representation collapse and the degradation of the trust region, one exacerbating the other.\nFinally, we present Proximal Feature Optimization (PFO), a novel auxiliary loss that, along with other interventions, shows that regularizing the representation dynamics mitigates the performance collapse of PPO agents.\nCode and run histories are available at https://github.com/CLAIRE-Labo/no-representation-no-trust.", "keywords": "proximal policy optimization;plasticity loss;trust region;feature rank collapse;regularization", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Skander Moalla;Andrea Miele;Daniil Pyatko;Razvan Pascanu;Caglar Gulcehre", "authorids": "~Skander_Moalla1;~Andrea_Miele1;~Daniil_Pyatko1;~Razvan_Pascanu1;~Caglar_Gulcehre1", "gender": "M;M;M;M;M", "homepage": "https://skandermoalla.com/;;;https://razp.info;http://caglarg.com", "dblp": "336/2569;;;65/8368.html;125/2132", "google_scholar": "YEP65IMAAAAJ;;;https://scholar.google.ca/citations?user=eSPY8LwAAAAJ;https://scholar.google.ca/citations?user=7hwJ2ckAAAAJ", "orcid": "0000-0002-8494-8071;;;;", "linkedin": "skander-moalla/;andrea-miele-;daniil-pyatko-24a48b64/;;", "or_profile": "~Skander_Moalla1;~Andrea_Miele1;~Daniil_Pyatko1;~Razvan_Pascanu1;~Caglar_Gulcehre1", "aff": "EPFL - EPF Lausanne;EPFL - EPF Lausanne;EPFL - EPF Lausanne;Google DeepMind;EPFL - EPF Lausanne", "aff_domain": "epfl.ch;epfl.ch;epfl.ch;google.com;epfl.ch", "position": "PhD student;MS student;MS student;Research Scientist;EPFL", "bibtex": "@inproceedings{\nmoalla2024no,\ntitle={No Representation, No Trust: Connecting Representation, Collapse, and Trust Issues in {PPO}},\nauthor={Skander Moalla and Andrea Miele and Daniil Pyatko and Razvan Pascanu and Caglar Gulcehre},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Wy9UgrMwD0}\n}", "github": "", "reviewers": "qwmj;YzGo;4jUo;6zAY;XWGd;v7XX", "pdf_size": 27869796, "rating": "3;5;6;6;6;7", "confidence": "4;5;5;5;4;4", "soundness": "2;2;3;3;3;3", "novelty": "2;2;2;3;3;3", "presentation": "3;3;3;4;3;3", "wc_summary": "85;89;97;64;60;158", "wc_strengths": "90;122;74;94;33;144", "wc_weaknesses": "120;209;96;30;167;97", "wc_questions": "88;1052;233;103;49;163", "wc_limitations": "51;3;1;1;1;39", "wc_review": "434;1475;501;292;310;601", "wc_reply_reviewers": "351;69;222;0;37;151", "wc_reply_authors": "947;1231;81;0;15;362", "reply_reviewers": "1;1;1;0;1;2", "reply_authors": "3;3;2;1;2;2", "rating_avg": [ 5.5, 1.2583057392117916 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.1666666666666665, 0.3726779962499649 ], "wc_summary_avg": [ 92.16666666666667, 32.26668388429293 ], "wc_strengths_avg": [ 92.83333333333333, 35.149284803850875 ], "wc_weaknesses_avg": [ 119.83333333333333, 56.76682325286683 ], "wc_questions_avg": [ 281.3333333333333, 349.62487834185305 ], "wc_limitations_avg": [ 16.0, 20.808652046684813 ], "wc_review_avg": [ 602.1666666666666, 404.5406517136305 ], "wc_reply_reviewers_avg": [ 138.33333333333334, 120.13834617732267 ], "wc_reply_authors_avg": [ 439.3333333333333, 481.64948066225736 ], "reply_reviewers_avg": [ 1.0, 0.5773502691896257 ], "reply_authors_avg": [ 2.1666666666666665, 0.6871842709362768 ], "replies_avg": [ 34, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.13245323570650436, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17650368346626182318&as_sdt=5,39&sciodt=0,39&hl=en", "gs_version_total": 4, "email": "epfl.ch;epfl.ch;epfl.ch;google.com;epfl.ch", "author_num": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "EPFL;Google", "aff_unique_dep": ";Google DeepMind", "aff_unique_url": "https://www.epfl.ch;https://deepmind.com", "aff_unique_abbr": "EPFL;DeepMind", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Lausanne;", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "Switzerland;United Kingdom" }, { "title": "Stochastic Optimal Control for Diffusion Bridges in Function Spaces", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94802", "id": "WyQW4G57Zd", "proceeding": "", "pdf": "https://openreview.net/pdf?id=WyQW4G57Zd", "openreview": "https://openreview.net/forum?id=WyQW4G57Zd", "poster": "", "project": "", "author_site": "Byoungwoo Park, Jungwon Choi, Sungbin Lim, Juho Lee", "tldr": "", "abstract": "Recent advancements in diffusion models and diffusion bridges primarily focus on finite-dimensional spaces, yet many real-world problems necessitate operations in infinite-dimensional function spaces for more natural and interpretable formulations. \nIn this paper, we present a theory of stochastic optimal control (SOC) tailored to infinite-dimensional spaces, aiming to extend diffusion-based algorithms to function spaces. \nSpecifically, we demonstrate how Doob\u2019s $h$-transform, the fundamental tool for constructing diffusion bridges, can be derived from the SOC perspective and expanded to infinite dimensions. \nThis expansion presents a challenge, as infinite-dimensional spaces typically lack closed-form densities. \nLeveraging our theory, we establish that solving the optimal control problem with a specific objective function choice is equivalent to learning diffusion-based generative models. \nWe propose two applications: 1) learning bridges between two infinite-dimensional distributions and 2) generative models for sampling from an infinite-dimensional distribution. \nOur approach proves effective for diverse problems involving continuous function space representations, such as resolution-free images, time-series data, and probability density functions.", "keywords": "stochastic optimal control;bridge matching;diffusion model", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Byoungwoo Park;Jungwon Choi;Sungbin Lim;Juho Lee", "authorids": "~Byoungwoo_Park1;~Jungwon_Choi1;~Sungbin_Lim1;~Juho_Lee2", "gender": "M;M;M;M", "homepage": "https://bw-park.github.io/;https://jungwon-choi.github.io/;https://www.sungbin-lim.net;https://juho.lee.github.io", "dblp": "354/2891;158/3516;206/6907;55/3410-1", "google_scholar": "https://scholar.google.com/citations?hl=ko;;https://scholar.google.com/citations?hl=ko;Py4URJUAAAAJ", "orcid": ";0009-0008-6219-0301;0000-0003-2684-2022;", "linkedin": ";jungwon-choi-48b35212b/;sungbin-lim-43b739b5/;", "or_profile": "~Byoungwoo_Park1;~Jungwon_Choi1;~Sungbin_Lim1;~Juho_Lee2", "aff": "KAIST;KAIST;Korea University;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr;korea.ac.kr;kaist.ac.kr", "position": "MS student;PhD student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\npark2024stochastic,\ntitle={Stochastic Optimal Control for Diffusion Bridges in Function Spaces},\nauthor={Byoungwoo Park and Jungwon Choi and Sungbin Lim and Juho Lee},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=WyQW4G57Zd}\n}", "github": "", "reviewers": "m5FK;jSMz;WKVm;uBiL", "pdf_size": 9454457, "rating": "5;6;6;7", "confidence": "3;3;4;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "1;3;3;3", "wc_summary": "25;82;51;58", "wc_strengths": "23;92;54;40", "wc_weaknesses": "126;230;107;83", "wc_questions": "11;2;169;2", "wc_limitations": "9;2;7;1", "wc_review": "194;408;388;184", "wc_reply_reviewers": "21;13;21;38", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 54.0, 20.310096011589902 ], "wc_strengths_avg": [ 52.25, 25.439880109780393 ], "wc_weaknesses_avg": [ 136.5, 56.09144319769282 ], "wc_questions_avg": [ 46.0, 71.10907115129545 ], "wc_limitations_avg": [ 4.75, 3.344772040064913 ], "wc_review_avg": [ 293.5, 104.79861640308043 ], "wc_reply_reviewers_avg": [ 23.25, 9.12071817347735 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.7071067811865476, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12441238487011570848&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "kaist.ac.kr;kaist.ac.kr;korea.ac.kr;kaist.ac.kr", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Korea University", "aff_unique_dep": ";", "aff_unique_url": "https://www.kaist.ac.kr;https://www.korea.ac.kr", "aff_unique_abbr": "KAIST;KU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Generalized Protein Pocket Generation with Prior-Informed Flow Matching", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94801", "id": "WyVTj77KEV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=WyVTj77KEV", "openreview": "https://openreview.net/forum?id=WyVTj77KEV", "poster": "", "project": "", "author_site": "ZAIXI ZHANG, Marinka Zitnik, Qi Liu", "tldr": "", "abstract": "Designing ligand-binding proteins, such as enzymes and biosensors, is essential in bioengineering and protein biology. One critical step in this process involves designing protein pockets, the protein interface binding with the ligand. Current approaches to pocket generation often suffer from time-intensive physical computations or template-based methods, as well as compromised generation quality due to the overlooking of domain knowledge. To tackle these challenges, we propose PocketFlow, a generative model that incorporates protein-ligand interaction priors based on flow matching. During training, PocketFlow learns to model key types of protein-ligand interactions, such as hydrogen bonds. In the sampling, PocketFlow leverages multi-granularity guidance (overall binding affinity and interaction geometry constraints) to facilitate generating high-affinity and valid pockets. Extensive experiments show that PocketFlow outperforms baselines on multiple benchmarks, e.g., achieving an average improvement of 1.29 in Vina Score and 0.05 in scRMSD. Moreover, modeling interactions make PocketFlow a generalized generative model across multiple ligand modalities, including small molecules, peptides, and RNA.", "keywords": "Protein-ligand interaction;Protein pocket design;Generative Models;Molecules;Structure-based Drug Design", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "ZAIXI ZHANG;Marinka Zitnik;Qi Liu", "authorids": "~ZAIXI_ZHANG2;~Marinka_Zitnik1;~Qi_Liu3", "gender": "M;;M", "homepage": "http://home.ustc.edu.cn/~zaixi/;https://zitniklab.hms.harvard.edu;http://staff.ustc.edu.cn/~qiliuql/", "dblp": "267/9295.html;53/11277.html;95/2446-3", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;YtUDgPIAAAAJ;5EoHAFwAAAAJ", "orcid": ";;0000-0001-6956-5550", "linkedin": ";;", "or_profile": "~ZAIXI_ZHANG2;~Marinka_Zitnik1;~Qi_Liu3", "aff": "University of Science and Technology of China;Harvard University;University of Science and Technology of China", "aff_domain": "ustc.edu.cn;harvard.edu;ustc.edu.cn", "position": "PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nzhang2024generalized,\ntitle={Generalized Protein Pocket Generation with Prior-Informed Flow Matching},\nauthor={ZAIXI ZHANG and Marinka Zitnik and Qi Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=WyVTj77KEV}\n}", "github": "", "reviewers": "gNH7;e3xX;gVL4;bfsA", "pdf_size": 3648986, "rating": "6;7;7;8", "confidence": "4;4;4;4", "soundness": "3;3;3;2", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "47;71;133;82", "wc_strengths": "59;139;80;119", "wc_weaknesses": "64;75;186;80", "wc_questions": "136;30;45;4", "wc_limitations": "1;11;12;15", "wc_review": "307;326;456;300", "wc_reply_reviewers": "0;20;87;12", "wc_reply_authors": "671;0;86;0", "reply_reviewers": "0;1;2;1", "reply_authors": "2;1;2;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 83.25, 31.38769663419092 ], "wc_strengths_avg": [ 99.25, 31.467244874631145 ], "wc_weaknesses_avg": [ 101.25, 49.27156888104945 ], "wc_questions_avg": [ 53.75, 49.700980875632624 ], "wc_limitations_avg": [ 9.75, 5.261891294962297 ], "wc_review_avg": [ 347.25, 63.50344478845223 ], "wc_reply_reviewers_avg": [ 29.75, 33.81105588413352 ], "wc_reply_authors_avg": [ 189.25, 280.3456571805599 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13419459591545806862&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "ustc.edu.cn;harvard.edu;ustc.edu.cn", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Science and Technology of China;Harvard University", "aff_unique_dep": ";", "aff_unique_url": "http://www.ustc.edu.cn;https://www.harvard.edu", "aff_unique_abbr": "USTC;Harvard", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "China;United States" }, { "title": "Invariant subspaces and PCA in nearly matrix multiplication time", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94800", "id": "Wyp8vsL9de", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Wyp8vsL9de", "openreview": "https://openreview.net/forum?id=Wyp8vsL9de", "poster": "/media/PosterPDFs/NeurIPS%202024/94800.png?t=1730111626.6252654", "project": "", "author_site": "Aleksandros Sobczyk, Marko Mladenovic, Mathieu Luisier", "tldr": "", "abstract": "Approximating invariant subspaces of generalized eigenvalue problems (GEPs) is a fundamental computational problem at the core of machine learning and scientific computing. It is, for example, the root of Principal Component Analysis (PCA) for dimensionality reduction, data visualization, and noise filtering, and of Density Functional Theory (DFT), arguably the most popular method to calculate the electronic structure of materials. \nGiven Hermitian $H,S\\in\\mathbb{C}^{n\\times n}$, where $S$ is positive-definite, let $\\Pi_k$ be the true spectral projector on the invariant subspace that is associated with the $k$ smallest (or largest) eigenvalues of the GEP $HC=SC\\Lambda$, for some $k\\in[n]$. \nWe show that we can compute a matrix $\\widetilde\\Pi_k$ such that $\\lVert\\Pi_k-\\widetilde\\Pi_k\\rVert_2\\leq \\epsilon$, in $O\\left( n^{\\omega+\\eta}\\mathrm{polylog}(n,\\epsilon^{-1},\\kappa(S),\\mathrm{gap}_k^{-1}) \\right)$ bit operations in the floating point model, for some $\\epsilon\\in(0,1)$, with probability $1-1/n$. Here, $\\eta>0$ is arbitrarily small, $\\omega\\lesssim 2.372$ is the matrix multiplication exponent, $\\kappa(S)=\\lVert S\\rVert_2\\lVert S^{-1}\\rVert_2$, and $\\mathrm{gap}_k$ is the gap between eigenvalues $k$ and $k+1$. \nTo achieve such provable \"forward-error\" guarantees, our methods rely on a new $O(n^{\\omega+\\eta})$ stability analysis for the Cholesky factorization, and a smoothed analysis for computing spectral gaps, which can be of independent interest.\nUltimately, we obtain new matrix multiplication-type bit complexity upper bounds for PCA problems, including classical PCA and (randomized) low-rank approximation.", "keywords": "Invariant subspace;Generalized eigenvalue problem;PCA;Spectral projector;Spectral gap;Matrix multiplication;Bit complexity", "primary_area": "learning_theory", "supplementary_material": "", "author": "Aleksandros Sobczyk;Marko Mladenovic;Mathieu Luisier", "authorids": "~Aleksandros_Sobczyk1;~Marko_Mladenovic1;~Mathieu_Luisier1", "gender": "M;M;M", "homepage": "https://researcher.watson.ibm.com/researcher/view.php?person=zurich-OBC;https://iis.ee.ethz.ch/people/person-detail.Mjg4ODE4.TGlzdC8zOTg2LDk5MDE4ODk4MA==.html;http://www.iis.ee.ethz.ch", "dblp": "172/0550;;49/2897", "google_scholar": "NG0qB0MAAAAJ;YsWwDzQAAAAJ;uPegIXAAAAAJ", "orcid": "0000-0002-1602-8329;;0000-0002-2212-7972", "linkedin": "aleksandros-sobczyk;;", "or_profile": "~Aleksandros_Sobczyk1;~Marko_Mladenovic1;~Mathieu_Luisier1", "aff": "International Business Machines;ETHZ - ETH Zurich;ETHZ - ETH Zurich", "aff_domain": "ibm.com;ethz.ch;ethz.ch", "position": "Researcher;Postdoc;Full Professor", "bibtex": "@inproceedings{\nsobczyk2024invariant,\ntitle={Invariant subspaces and {PCA} in nearly matrix multiplication time},\nauthor={Aleksandros Sobczyk and Marko Mladenovic and Mathieu Luisier},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Wyp8vsL9de}\n}", "github": "", "reviewers": "KBna;FrmC;PrHp;ZDvn;5csW", "pdf_size": 840495, "rating": "5;5;7;7;8", "confidence": "2;1;3;2;2", "soundness": "3;3;3;4;4", "novelty": "2;3;3;3;4", "presentation": "2;3;3;3;3", "wc_summary": "10;149;159;84;172", "wc_strengths": "23;1;98;97;110", "wc_weaknesses": "82;1;62;93;144", "wc_questions": "87;92;52;106;58", "wc_limitations": "1;10;4;10;76", "wc_review": "203;253;375;390;560", "wc_reply_reviewers": "22;87;35;82;0", "wc_reply_authors": "34;188;76;28;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "2;2;2;2;1", "rating_avg": [ 6.4, 1.2 ], "confidence_avg": [ 2.0, 0.6324555320336759 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 114.8, 60.54221667563883 ], "wc_strengths_avg": [ 65.8, 44.7097304845377 ], "wc_weaknesses_avg": [ 76.4, 46.40948178982394 ], "wc_questions_avg": [ 79.0, 20.64945519862449 ], "wc_limitations_avg": [ 20.2, 28.116898833263956 ], "wc_review_avg": [ 356.2, 124.21819512454687 ], "wc_reply_reviewers_avg": [ 45.2, 34.01999412110472 ], "wc_reply_authors_avg": [ 65.2, 66.03756506716462 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5270462766947298, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10170275318866652636&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "ibm.com;ethz.ch;ethz.ch", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "International Business Machines Corporation;ETH Zurich", "aff_unique_dep": ";", "aff_unique_url": "https://www.ibm.com;https://www.ethz.ch", "aff_unique_abbr": "IBM;ETHZ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United States;Switzerland" }, { "title": "Gradient Guidance for Diffusion Models: An Optimization Perspective", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94799", "id": "X1QeUYBXke", "proceeding": "", "pdf": "https://openreview.net/pdf?id=X1QeUYBXke", "openreview": "https://openreview.net/forum?id=X1QeUYBXke", "poster": "/media/PosterPDFs/NeurIPS%202024/94799.png?t=1733773486.3214657", "project": "", "author_site": "Yingqing Guo, Hui Yuan, Yukang Yang, Minshuo Chen, Mengdi Wang", "tldr": "", "abstract": "Diffusion models have demonstrated empirical successes in various applications and can be adapted to task-specific needs via guidance. This paper studies a form of gradient guidance for adapting a pre-trained diffusion model towards optimizing user-specified objectives. We establish a mathematical framework for guided diffusion to systematically study its optimization theory and algorithmic design. Our theoretical analysis spots a strong link between guided diffusion models and optimization: gradient-guided diffusion models are essentially sampling solutions to a regularized optimization problem, where the regularization is imposed by the pre-training data. As for guidance design, directly bringing in the gradient of an external objective function as guidance would jeopardize the structure in generated samples. We investigate a modified form of gradient guidance based on a forward prediction loss, which leverages the information in pre-trained score functions and provably preserves the latent structure. We further consider an iteratively fine-tuned version of gradient-guided diffusion where guidance and score network are both updated with newly generated samples. This process mimics a first-order optimization iteration in expectation, for which we proved $\\tilde{\\mathcal{O}}(1/K)$ convergence rate to the global optimum when the objective function is concave. Our code is released at https://github.com/yukang123/GGDMOptim.git.", "keywords": "diffusion model; gradient guidance; reward optimization; convergence rate", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Yingqing Guo;Hui Yuan;Yukang Yang;Minshuo Chen;Mengdi Wang", "authorids": "~Yingqing_Guo1;~Hui_Yuan2;~Yukang_Yang1;~Minshuo_Chen1;~Mengdi_Wang1", "gender": ";F;M;M;F", "homepage": "https://ece.princeton.edu/people/yingqing-guo;;;https://minshuochen.github.io;http://mwang.princeton.edu", "dblp": ";21/780-2;;217/1509;", "google_scholar": ";https://scholar.google.com/citations?hl=en;ASZWVzEAAAAJ;qU9WvTgAAAAJ;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Yingqing_Guo1;~Hui_Yuan2;~Yukang_Yang1;~Minshuo_Chen1;~Mengdi_Wang1", "aff": "Princeton University;Princeton University;Princeton University;Princeton University;Princeton University", "aff_domain": "princeton.edu;princeton.edu;princeton.edu;princeton.edu;princeton.edu", "position": "PhD student;PhD student;PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\nguo2024gradient,\ntitle={Gradient Guidance for Diffusion Models: An Optimization Perspective},\nauthor={Yingqing Guo and Hui Yuan and Yukang Yang and Minshuo Chen and Mengdi Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=X1QeUYBXke}\n}", "github": "", "reviewers": "uzJ1;8pJj;2yVt;HWVj", "pdf_size": 10538565, "rating": "4;5;6;7", "confidence": "4;3;4;4", "soundness": "2;3;2;4", "novelty": "2;3;2;4", "presentation": "2;4;3;4", "wc_summary": "106;125;69;44", "wc_strengths": "55;40;32;64", "wc_weaknesses": "475;108;87;44", "wc_questions": "53;115;655;2", "wc_limitations": "39;14;1;1", "wc_review": "728;402;844;155", "wc_reply_reviewers": "299;20;103;0", "wc_reply_authors": "617;53;578;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;3;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 86.0, 31.51983502494897 ], "wc_strengths_avg": [ 47.75, 12.497499749949988 ], "wc_weaknesses_avg": [ 178.5, 172.73172841142997 ], "wc_questions_avg": [ 206.25, 262.1577530800873 ], "wc_limitations_avg": [ 13.75, 15.514106484100203 ], "wc_review_avg": [ 532.25, 271.47225917209295 ], "wc_reply_reviewers_avg": [ 105.5, 118.20427234241578 ], "wc_reply_authors_avg": [ 312.0, 286.44633005154736 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.2581988897471611, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5188764807095562329&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "princeton.edu;princeton.edu;princeton.edu;princeton.edu;princeton.edu", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Princeton University", "aff_unique_dep": "", "aff_unique_url": "https://www.princeton.edu", "aff_unique_abbr": "Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Can Simple Averaging Defeat Modern Watermarks?", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94798", "id": "X2G7LA7Av9", "proceeding": "", "pdf": "https://openreview.net/pdf?id=X2G7LA7Av9", "openreview": "https://openreview.net/forum?id=X2G7LA7Av9", "poster": "/media/PosterPDFs/NeurIPS%202024/94798.png?t=1730396379.9828005", "project": "", "author_site": "Pei Yang, Hai Ci, Yiren Song, Mike Zheng Shou", "tldr": "", "abstract": "Digital watermarking techniques are crucial for copyright protection and source identification of images, especially in the era of generative AI models. However, many existing watermarking methods, particularly content-agnostic approaches that embed fixed patterns regardless of image content, are vulnerable to steganalysis attacks that can extract and remove the watermark with minimal perceptual distortion. In this work, we categorise watermarking algorithms into content-adaptive and content-agnostic ones, and demonstrate how averaging a collection of watermarked images could reveal the underlying watermark pattern. We then leverage this extracted pattern for effective watermark removal under both greybox and blackbox settings, even when the collection of images contains multiple watermark patterns. For some algorithms like Tree-Ring watermarks, the extracted pattern can also forge convincing watermarks on clean images. Our quantitative and qualitative evaluations across twelve watermarking methods highlight the threat posed by steganalysis to content-agnostic watermarks and the importance of designing watermarking techniques resilient to such analytical attacks. We propose security guidelines calling for using content-adaptive watermarking strategies and performing security evaluation against steganalysis. We also suggest multi-key assignments as potential mitigations against steganalysis vulnerabilities. Github page: \\url{https://github.com/showlab/watermark-steganalysis}.", "keywords": "Watermark;Security", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Pei Yang;Hai Ci;Yiren Song;Mike Zheng Shou", "authorids": "~Pei_Yang5;~Hai_Ci1;~Yiren_Song1;~Mike_Zheng_Shou1", "gender": "M;M;M;", "homepage": ";;;http://www.columbia.edu/~zs2262/", "dblp": ";227/4707;318/1708;284/0807", "google_scholar": ";GMrjppAAAAAJ;;h1-3lSoAAAAJ", "orcid": "0009-0000-3948-6915;;;", "linkedin": ";;;", "or_profile": "~Pei_Yang5;~Hai_Ci1;~Yiren_Song1;~Zheng_Shou1", "aff": "National University of Singapore;National University of Singapore;National University of Singapore;National University of Singapore", "aff_domain": "u.nus.edu;nus.edu.sg;nus.edu.sg;nus.edu.sg", "position": "PhD student;Postdoc;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nyang2024can,\ntitle={Can Simple Averaging Defeat Modern Watermarks?},\nauthor={Pei Yang and Hai Ci and Yiren Song and Mike Zheng Shou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=X2G7LA7Av9}\n}", "github": "", "reviewers": "85eL;VkHo;aL14;Jv2m", "pdf_size": 18937720, "rating": "4;5;5;8", "confidence": "3;4;3;4", "soundness": "2;3;3;4", "novelty": "2;3;1;3", "presentation": "3;3;3;3", "wc_summary": "33;38;69;47", "wc_strengths": "32;51;69;112", "wc_weaknesses": "218;192;249;101", "wc_questions": "2;3;89;3", "wc_limitations": "62;9;19;3", "wc_review": "347;293;495;266", "wc_reply_reviewers": "46;135;229;184", "wc_reply_authors": "86;453;326;356", "reply_reviewers": "1;2;1;2", "reply_authors": "3;3;3;3", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 46.75, 13.790848414800301 ], "wc_strengths_avg": [ 66.0, 29.605742686174924 ], "wc_weaknesses_avg": [ 190.0, 55.204166509422095 ], "wc_questions_avg": [ 24.25, 37.38565901518923 ], "wc_limitations_avg": [ 23.25, 23.09085316743407 ], "wc_review_avg": [ 350.25, 88.51377011516344 ], "wc_reply_reviewers_avg": [ 148.5, 67.87672649737905 ], "wc_reply_authors_avg": [ 305.25, 135.00624985533076 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 3.0, 0.0 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.6666666666666667, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7052937820665100182&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "u.nus.edu;nus.edu.sg;nus.edu.sg;nus.edu.sg", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "National University of Singapore", "aff_unique_dep": "", "aff_unique_url": "https://www.nus.edu.sg", "aff_unique_abbr": "NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Singapore" }, { "title": "FuseAnyPart: Diffusion-Driven Facial Parts Swapping via Multiple Reference Images", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94797", "id": "X2UMdvcmMo", "proceeding": "", "pdf": "https://openreview.net/pdf?id=X2UMdvcmMo", "openreview": "https://openreview.net/forum?id=X2UMdvcmMo", "poster": "/media/PosterPDFs/NeurIPS%202024/94797.png?t=1730706079.1923094", "project": "", "author_site": "zheng yu, Yaohua Wang, Siying Cui, Aixi Zhang, Wei-Long Zheng, Senzhang Wang", "tldr": "", "abstract": "Facial parts swapping aims to selectively transfer regions of interest from the source image onto the target image while maintaining the rest of the target image unchanged.\nMost studies on face swapping designed specifically for full-face swapping, are either unable or significantly limited when it comes to swapping individual facial parts, which hinders fine-grained and customized character designs.\nHowever, designing such an approach specifically for facial parts swapping is challenged by a reasonable multiple reference feature fusion, which needs to be both efficient and effective.\nTo overcome this challenge, FuseAnyPart is proposed to facilitate the seamless \"fuse-any-part\" customization of the face.\nIn FuseAnyPart, facial parts from different people are assembled into a complete face in latent space within the Mask-based Fusion Module.\nSubsequently, the consolidated feature is dispatched to the Addition-based Injection Module for\nfusion within the UNet of the diffusion model to create novel characters.\nExtensive experiments qualitatively and quantitatively validate the superiority and robustness of FuseAnyPart.\nSource codes are available at https://github.com/Thomas-wyh/FuseAnyPart.", "keywords": "diffusion model;personalization;image generation", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Zheng Yu;Yaohua Wang;Siying Cui;Aixi Zhang;Wei-Long Zheng;Senzhang Wang", "authorids": "~Zheng_Yu4;~Yaohua_Wang2;~Siying_Cui1;~Aixi_Zhang2;~Wei-Long_Zheng1;~Senzhang_Wang2", "gender": "F;M;F;M;;M", "homepage": "https://github.com/sjtuyuzheng;https://thomas-wyh.github.io/;https://github.com/hycsy2019?tab=repositories;;https://weilongzheng.github.io/;https://senzhangwangcsu.github.io/index.html", "dblp": ";;;;150/4150;118/5055", "google_scholar": ";TRAwmsgAAAAJ;;https://scholar.google.com.hk/citations?user=hNTP47EAAAAJ;MZXXe8UAAAAJ;zdWyGRMAAAAJ", "orcid": ";0009-0005-6211-6388;;;;0000-0002-3615-4859", "linkedin": ";;;;;", "or_profile": "~Zheng_Yu4;~Yaohua_Wang2;~Siying_Cui1;~Aixi_Zhang2;~Wei-Long_Zheng1;~Senzhang_Wang2", "aff": "Shanghai Jiaotong University;Alibaba Group;Peking University;Alibaba Group;Shanghai Jiaotong University;Central South University", "aff_domain": "sjtu.edu.cn;alibaba-inc.com;pku.edu.cn;alibaba-inc.com;sjtu.edu.cn;csu.edu.cn", "position": "MS student;Researcher;MS student;Researcher;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nyu2024fuseanypart,\ntitle={FuseAnyPart: Diffusion-Driven Facial Parts Swapping via Multiple Reference Images},\nauthor={Zheng Yu and Yaohua Wang and Siying Cui and Aixi Zhang and Wei-Long Zheng and Senzhang Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=X2UMdvcmMo}\n}", "github": "", "reviewers": "mdEG;jimd;Aivf;b8uw", "pdf_size": 20948403, "rating": "5;5;5;9", "confidence": "4;3;4;5", "soundness": "3;3;2;4", "novelty": "2;2;2;4", "presentation": "2;3;2;4", "wc_summary": "83;56;87;68", "wc_strengths": "29;43;34;81", "wc_weaknesses": "114;83;207;58", "wc_questions": "46;69;3;51", "wc_limitations": "19;31;6;38", "wc_review": "291;282;337;296", "wc_reply_reviewers": "17;24;11;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 1.7320508075688772 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 73.5, 12.338962679253067 ], "wc_strengths_avg": [ 46.75, 20.40067400847335 ], "wc_weaknesses_avg": [ 115.5, 56.4291591289468 ], "wc_questions_avg": [ 42.25, 24.221632892932714 ], "wc_limitations_avg": [ 23.5, 12.175795661885921 ], "wc_review_avg": [ 301.5, 21.10094784600919 ], "wc_reply_reviewers_avg": [ 13.0, 8.803408430829505 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:7ThruEidMX8J:scholar.google.com/&scioq=FuseAnyPart:+Diffusion-Driven+Facial+Parts+Swapping+via+Multiple+Reference+Images&hl=en&as_sdt=0,31", "gs_version_total": 4, "email": "sjtu.edu.cn;alibaba-inc.com;pku.edu.cn;alibaba-inc.com;sjtu.edu.cn;csu.edu.cn", "author_num": 6, "aff_unique_index": "0;1;2;1;0;3", "aff_unique_norm": "Shanghai Jiao Tong University;Alibaba Group;Peking University;Central South University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.alibaba.com;http://www.pku.edu.cn;https://www.csu.edu.cn", "aff_unique_abbr": "SJTU;Alibaba;Peking U;CSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Lorentz-Equivariant Geometric Algebra Transformers for High-Energy Physics", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94796", "id": "X34GKv8sYT", "proceeding": "", "pdf": "https://openreview.net/pdf?id=X34GKv8sYT", "openreview": "https://openreview.net/forum?id=X34GKv8sYT", "poster": "/media/PosterPDFs/NeurIPS%202024/94796.png?t=1733683879.6424668", "project": "", "author_site": "Jonas Spinner, Victor Breso, Pim de Haan, Tilman Plehn, Jesse Thaler, Johann Brehmer", "tldr": "", "abstract": "Extracting scientific understanding from particle-physics experiments requires solving diverse learning problems with high precision and good data efficiency. We propose the Lorentz Geometric Algebra Transformer (L-GATr), a new multi-purpose architecture for high-energy physics. L-GATr represents high-energy data in a geometric algebra over four-dimensional space-time and is equivariant under Lorentz transformations, the symmetry group of relativistic kinematics. At the same time, the architecture is a Transformer, which makes it versatile and scalable to large systems. L-GATr is first demonstrated on regression and classification tasks from particle physics. We then construct the first Lorentz-equivariant generative model: a continuous normalizing flow based on an L-GATr network, trained with Riemannian flow matching. Across our experiments, L-GATr is on par with or outperforms strong domain-specific baselines.", "keywords": "Geometric deep learning;equivariance;Lorentz symmetry;Transformer;flow matching;high-energy physics;particle physics", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "", "author": "Jonas Spinner;Victor Breso Pla;Pim De Haan;Tilman Plehn;Jesse Thaler;Johann Brehmer", "authorids": "~Jonas_Spinner1;~Victor_Breso_Pla1;~Pim_De_Haan1;~Tilman_Plehn1;~Jesse_Thaler1;~Johann_Brehmer1", "gender": "M;M;M;M;M;M", "homepage": ";https://inspirehep.net/authors/1900638;https://pimdehaan.com;https://www.thphys.uni-heidelberg.de/~plehn/;https://jthaler.net/;https://johannbrehmer.github.io", "dblp": ";;;;;220/5763", "google_scholar": "SZQOOT8AAAAJ;;AZeK-REAAAAJ;r3zfvh0AAAAJ;djDP5SMAAAAJ;ZdUMvCsAAAAJ", "orcid": "0000-0003-4046-948X;;;;0000-0002-2406-8160;0000-0003-3344-4209", "linkedin": ";;https://nl.linkedin.com/in/pim-de-haan;;jesse-thaler/;johannbrehmer", "or_profile": "~Jonas_Spinner1;~Victor_Breso_Pla1;~Pim_De_Haan1;~Tilman_Plehn1;~Jesse_Thaler1;~Johann_Brehmer1", "aff": "Ruprecht-Karls-Universit\u00e4t Heidelberg;Ruprecht-Karls-Universit\u00e4t Heidelberg;Qualcomm;Ruprecht-Karls-Universit\u00e4t Heidelberg;Massachusetts Institute of Technology;Qualcomm AI Research", "aff_domain": "uni-heidelberg.de;uni-heidelberg.de;qualcomm.com;uni-heidelberg.de;mit.edu;qualcomm.com", "position": "PhD student;Postdoc;Researcher;Full Professor;Full Professor;Researcher", "bibtex": "@inproceedings{\nspinner2024lorentzequivariant,\ntitle={Lorentz-Equivariant Geometric Algebra Transformers for High-Energy Physics},\nauthor={Jonas Spinner and Victor Breso Pla and Pim De Haan and Tilman Plehn and Jesse Thaler and Johann Brehmer},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=X34GKv8sYT}\n}", "github": "", "reviewers": "w7CH;srsp;PeE1;PF88", "pdf_size": 720498, "rating": "6;6;7;7", "confidence": "2;3;4;4", "soundness": "3;3;3;3", "novelty": "3;4;3;3", "presentation": "3;4;3;3", "wc_summary": "71;40;73;53", "wc_strengths": "118;68;60;39", "wc_weaknesses": "113;41;125;72", "wc_questions": "10;59;57;77", "wc_limitations": "52;16;2;10", "wc_review": "364;224;317;251", "wc_reply_reviewers": "96;0;17;0", "wc_reply_authors": "244;0;25;0", "reply_reviewers": "1;0;1;0", "reply_authors": "2;1;2;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 59.25, 13.571569548139964 ], "wc_strengths_avg": [ 71.25, 28.994611568358696 ], "wc_weaknesses_avg": [ 87.75, 33.38693606786942 ], "wc_questions_avg": [ 50.75, 24.78280654001883 ], "wc_limitations_avg": [ 20.0, 19.131126469708992 ], "wc_review_avg": [ 289.0, 54.94997725204261 ], "wc_reply_reviewers_avg": [ 28.25, 39.72640809335775 ], "wc_reply_authors_avg": [ 67.25, 102.5557775066817 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.9045340337332909, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6445967631490781252&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "uni-heidelberg.de;uni-heidelberg.de;qualcomm.com;uni-heidelberg.de;mit.edu;qualcomm.com", "author_num": 6, "aff_unique_index": "0;0;1;0;2;3", "aff_unique_norm": "Ruprecht-Karls-Universit\u00e4t Heidelberg;Qualcomm Incorporated;Massachusetts Institute of Technology;Qualcomm", "aff_unique_dep": ";;;Qualcomm AI Research", "aff_unique_url": "https://www.uni-heidelberg.de/;https://www.qualcomm.com;https://web.mit.edu;https://www.qualcomm.com/research", "aff_unique_abbr": "Uni Heidelberg;Qualcomm;MIT;QAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;1;1", "aff_country_unique": "Germany;United States" }, { "title": "Zeroth-Order Sampling Methods for Non-Log-Concave Distributions: Alleviating Metastability by Denoising Diffusion", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94795", "id": "X3Aljulsw5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=X3Aljulsw5", "openreview": "https://openreview.net/forum?id=X3Aljulsw5", "poster": "", "project": "", "author_site": "Ye He, Kevin Rojas, Molei Tao", "tldr": "", "abstract": "This paper considers the problem of sampling from non-logconcave distribution, based on queries of its unnormalized density. It first describes a framework, Denoising Diffusion Monte Carlo (DDMC), based on the simulation of a denoising diffusion process with its score function approximated by a generic Monte Carlo estimator. DDMC is an oracle-based meta-algorithm, where its oracle is the assumed access to samples that generate a Monte Carlo score estimator. Then we provide an implementation of this oracle, based on rejection sampling, and this turns DDMC into a true algorithm, termed Zeroth-Order Diffusion Monte Carlo (ZOD-MC). We provide convergence analyses by first constructing a general framework, i.e. a performance guarantee for DDMC, without assuming the target distribution to be log-concave or satisfying any isoperimetric inequality. Then we prove that ZOD-MC admits an inverse polynomial dependence on the desired sampling accuracy, albeit still suffering from the curse of dimensionality. Consequently, for low dimensional distributions, ZOD-MC is a very efficient sampler, with performance exceeding latest samplers, including also-denoising-diffusion-based RDMC and RSDMC. Last, we experimentally demonstrate the insensitivity of ZOD-MC to increasingly higher barriers between modes or discontinuity in non-convex potential.", "keywords": "Non-logconcave sampling;Monte Carlo sampling;Denoising Diffusion Model", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/775bbe86ac50ae62adc6b1e4e6aec37af30367a3.zip", "author": "Ye He;Kevin Rojas;Molei Tao", "authorids": "~Ye_He1;~Kevin_Rojas1;~Molei_Tao1", "gender": "M;M;", "homepage": "https://yeleohe.github.io/;https://kevinrojas1499.github.io/;http://people.math.gatech.edu/~mtao8/", "dblp": "72/7636-3;302/9492;56/9263", "google_scholar": "PC25rDIAAAAJ;88YhA_QAAAAJ;", "orcid": "0000-0003-4686-8449;;", "linkedin": ";kevin-rojas-cisneros-045a20165/;", "or_profile": "~Ye_He1;~Kevin_Rojas1;~Molei_Tao1", "aff": "Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology", "aff_domain": "gatech.edu;gatech.edu;gatech.edu", "position": "Postdoc;PhD student;Associate Professor", "bibtex": "@inproceedings{\nhe2024zerothorder,\ntitle={Zeroth-Order Sampling Methods for Non-Log-Concave Distributions: Alleviating Metastability by Denoising Diffusion},\nauthor={Ye He and Kevin Rojas and Molei Tao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=X3Aljulsw5}\n}", "github": "", "reviewers": "Q2x6;8qyg;9Wcn;HU9Z", "pdf_size": 4414496, "rating": "5;6;6;7", "confidence": "5;3;4;3", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "1;3;3;3", "wc_summary": "345;98;89;126", "wc_strengths": "83;126;65;41", "wc_weaknesses": "625;167;106;40", "wc_questions": "145;44;63;1", "wc_limitations": "215;16;98;1", "wc_review": "1413;451;421;209", "wc_reply_reviewers": "1632;28;61;7", "wc_reply_authors": "3313;0;38;0", "reply_reviewers": "5;1;2;1", "reply_authors": "7;1;2;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 164.5, 105.10114176354128 ], "wc_strengths_avg": [ 78.75, 31.083556746292725 ], "wc_weaknesses_avg": [ 234.5, 229.8852974855069 ], "wc_questions_avg": [ 63.25, 52.27033097274208 ], "wc_limitations_avg": [ 82.5, 84.94262769658118 ], "wc_review_avg": [ 623.5, 465.2641722720545 ], "wc_reply_reviewers_avg": [ 432.0, 693.0876567938575 ], "wc_reply_authors_avg": [ 837.75, 1429.1704543195679 ], "reply_reviewers_avg": [ 2.25, 1.6393596310755 ], "reply_authors_avg": [ 2.75, 2.48746859276655 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8528028654224418, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11960857902419898359&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "gatech.edu;gatech.edu;gatech.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Georgia Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.gatech.edu", "aff_unique_abbr": "Georgia Tech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Mixture of Link Predictors on Graphs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94794", "id": "X3oeoyJlMw", "proceeding": "", "pdf": "https://openreview.net/pdf?id=X3oeoyJlMw", "openreview": "https://openreview.net/forum?id=X3oeoyJlMw", "poster": "", "project": "", "author_site": "Li Ma, Haoyu Han, Juanhui Li, Harry Shomer, Hui Liu, Xiaofeng Gao, Jiliang Tang", "tldr": "", "abstract": "Link prediction, which aims to forecast unseen connections in graphs, is a fundamental task in graph machine learning. Heuristic methods, leveraging a range of different pairwise measures such as common neighbors and shortest paths, often rival the performance of vanilla Graph Neural Networks (GNNs). Therefore, recent advancements in GNNs for link prediction (GNN4LP) have primarily focused on integrating one or a few types of pairwise information. \nIn this work, we reveal that different node pairs within the same dataset necessitate varied pairwise information for accurate prediction and models that only apply the same pairwise information uniformly could achieve suboptimal performance.\nAs a result, we propose a simple mixture of experts model Link-MoE for link prediction. Link-MoE utilizes various GNNs as experts and strategically selects the appropriate expert for each node pair based on various types of pairwise information. Experimental results across diverse real-world datasets demonstrate substantial performance improvement from Link-MoE. Notably, Link-Mo achieves a relative improvement of 18.71% on the MRR metric for the Pubmed dataset and 9.59% on the Hits@100 metric for the ogbl-ppa dataset, compared to the best baselines. The code is available at https://github.com/ml-ml/Link-MoE/.", "keywords": "Link Prediction;Graph Neural Networks;Mixture of Experts", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Li Ma;Haoyu Han;Juanhui Li;Harry Shomer;Hui Liu;Xiaofeng Gao;Jiliang Tang", "authorids": "~Li_Ma9;~Haoyu_Han1;~Juanhui_Li1;~Harry_Shomer1;~Hui_Liu8;~Xiaofeng_Gao2;~Jiliang_Tang1", "gender": ";M;F;;F;F;M", "homepage": ";https://cse.msu.edu/~hanhaoy1/;https://juanhui28.github.io/;https://www.cse.msu.edu/~shomerha/;https://scholar.google.com/citations?user=EuzF_zsAAAAJ&hl=en;https://cs.sjtu.edu.cn/~gao-xf/;https://www.cse.msu.edu/~tangjili/", "dblp": ";257/5633-1;313/9527.html;;93/4010-31;;64/10812", "google_scholar": ";;5J0dd-sAAAAJ;_6eE2vsAAAAJ;;;WtzKMWAAAAAJ", "orcid": ";0000-0002-2529-6042;0000-0003-4909-1778;0000-0001-5081-1870;0000-0002-3555-3495;;0000-0001-7125-3898", "linkedin": ";;;;;;", "or_profile": "~Li_Ma9;~Haoyu_Han1;~Juanhui_Li1;~Harry_Shomer1;~Hui_Liu8;~Xiaofeng_Gao2;~Jiliang_Tang1", "aff": ";Michigan State University;Amazon;Michigan State University;Michigan State University;Shanghai Jiaotong University;Michigan State University", "aff_domain": ";msu.edu;amazon.com;msu.edu;msu.edu;sjtu.edu.cn;msu.edu", "position": ";PhD student;Intern;PhD student;Assistant Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nma2024mixture,\ntitle={Mixture of Link Predictors on Graphs},\nauthor={Li Ma and Haoyu Han and Juanhui Li and Harry Shomer and Hui Liu and Xiaofeng Gao and Jiliang Tang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=X3oeoyJlMw}\n}", "github": "", "reviewers": "SDXC;bwf6;758a;fyCt", "pdf_size": 1022793, "rating": "4;5;7;8", "confidence": "5;5;4;4", "soundness": "2;3;3;4", "novelty": "2;3;4;4", "presentation": "4;3;3;4", "wc_summary": "80;59;106;166", "wc_strengths": "60;65;60;136", "wc_weaknesses": "255;116;166;41", "wc_questions": "85;3;87;55", "wc_limitations": "6;11;1;25", "wc_review": "486;254;420;423", "wc_reply_reviewers": "1134;0;145;0", "wc_reply_authors": "2320;0;254;0", "reply_reviewers": "3;0;1;0", "reply_authors": "7;1;2;1", "rating_avg": [ 6.0, 1.5811388300841898 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 102.75, 40.13337140086788 ], "wc_strengths_avg": [ 80.25, 32.251937926270415 ], "wc_weaknesses_avg": [ 144.5, 77.77692459849516 ], "wc_questions_avg": [ 57.5, 33.92270625996694 ], "wc_limitations_avg": [ 10.75, 8.954747344286158 ], "wc_review_avg": [ 395.75, 85.97783144508821 ], "wc_reply_reviewers_avg": [ 319.75, 473.81978377860077 ], "wc_reply_authors_avg": [ 643.5, 973.4663579189576 ], "reply_reviewers_avg": [ 1.0, 1.224744871391589 ], "reply_authors_avg": [ 2.75, 2.48746859276655 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.948683298050514, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3905219153133427467&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": ";msu.edu;amazon.com;msu.edu;msu.edu;sjtu.edu.cn;msu.edu", "author_num": 7, "aff_unique_index": "0;1;0;0;2;0", "aff_unique_norm": "Michigan State University;Amazon;Shanghai Jiao Tong University", "aff_unique_dep": ";Amazon.com, Inc.;", "aff_unique_url": "https://www.msu.edu;https://www.amazon.com;https://www.sjtu.edu.cn", "aff_unique_abbr": "MSU;Amazon;SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;0", "aff_country_unique": "United States;China" }, { "title": "MUVERA: Multi-Vector Retrieval via Fixed Dimensional Encoding", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94793", "id": "X3ydKRcQr6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=X3ydKRcQr6", "openreview": "https://openreview.net/forum?id=X3ydKRcQr6", "poster": "/media/PosterPDFs/NeurIPS%202024/94793.png?t=1731706975.3670254", "project": "", "author_site": "Rajesh Jayaram, Laxman Dhulipala, Majid Hadian, Jason Lee, Vahab Mirrokni", "tldr": "", "abstract": "Neural embedding models have become a fundamental component of modern information retrieval (IR) pipelines. These models produce a single embedding $x \\in \\mathbb{R}^d$ per data-point, allowing for fast retrieval via highly optimized maximum inner product search (MIPS) algorithms. Recently, beginning with the landmark ColBERT paper, multi-vector models, which produce a set of embedding per data point, have achieved markedly superior performance for IR tasks. Unfortunately, using these models for IR is computationally expensive due to the increased complexity of multi-vector retrieval and scoring. \n\nIn this paper, we introduce MUVERA (MUlti-VEctor Retrieval Algorithm), a retrieval mechanism which reduces multi-vector similarity search to single-vector similarity search. This enables the usage of off-the-shelf MIPS solvers for multi-vector retrieval. \nMUVERA asymmetrically generates Fixed Dimensional Encodings (FDEs) of queries and documents, which are vectors whose inner product approximates multi-vector similarity. We prove that FDEs give high-quality $\\epsilon$-approximations, thus providing the first single-vector proxy for multi-vector similarity with theoretical guarantees. Empirically, we find that FDEs achieve the same recall as prior state-of-the-art heuristics while retrieving 2-5$\\times$ fewer candidates. Compared to prior state of the art implementations, MUVERA achieves consistently good end-to-end recall and latency across a diverse set of the BEIR retrieval datasets, achieving an average of 10% improved recall with 90% lower latency.", "keywords": "Retrieval;Late Interaction;ColBERT;Multi-Vector", "primary_area": "optimization", "supplementary_material": "", "author": "Rajesh Jayaram;Laxman Dhulipala;Majid Hadian;Jason Lee;Vahab Mirrokni", "authorids": "~Rajesh_Jayaram1;~Laxman_Dhulipala1;~Majid_Hadian1;~Jason_Lee8;~Vahab_Mirrokni2", "gender": ";;M;M;M", "homepage": "http://rajeshjayaram.com/;;;https://research.google/people/JasonLee.html;https://people.csail.mit.edu/mirrokni/Welcome.html", "dblp": "202/9970.html;;378/4548;;m/VahabSMirrokni", "google_scholar": "Cerc8UYAAAAJ;;https://scholar.google.com/scholar?hl=en;;opbZfw0AAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Rajesh_Jayaram1;~Laxman_Dhulipala1;~Majid_Hadian1;~Jason_Lee8;~Vahab_Mirrokni2", "aff": "Google;;Google;Google;Google Research", "aff_domain": "google.com;;google.com;google.com;google.com", "position": "Researcher;;Researcher;Researcher;VP, Google Fellow", "bibtex": "@inproceedings{\njayaram2024muvera,\ntitle={{MUVERA}: Multi-Vector Retrieval via Fixed Dimensional Encoding},\nauthor={Rajesh Jayaram and Laxman Dhulipala and Majid Hadian and Jason Lee and Vahab Mirrokni},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=X3ydKRcQr6}\n}", "github": "", "reviewers": "uq8U;Egyf;Z5yi;b2ZE;7tYU;4i1U", "pdf_size": 2065577, "rating": "3;3;4;4;6;6", "confidence": "4;3;4;3;4;3", "soundness": "3;2;2;3;4;3", "novelty": "2;2;3;2;3;3", "presentation": "3;2;3;3;3;3", "wc_summary": "53;96;90;106;51;88", "wc_strengths": "51;35;129;22;71;53", "wc_weaknesses": "135;114;496;110;75;36", "wc_questions": "10;38;160;144;100;241", "wc_limitations": "10;25;1;35;44;9", "wc_review": "259;308;876;417;341;427", "wc_reply_reviewers": "45;83;376;205;216;66", "wc_reply_authors": "325;0;837;789;559;0", "reply_reviewers": "1;1;1;1;1;1", "reply_authors": "2;1;2;2;2;1", "rating_avg": [ 4.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.8333333333333335, 0.6871842709362768 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.8333333333333335, 0.3726779962499649 ], "wc_summary_avg": [ 80.66666666666667, 21.068670790747312 ], "wc_strengths_avg": [ 60.166666666666664, 34.35314962108844 ], "wc_weaknesses_avg": [ 161.0, 153.1513412717412 ], "wc_questions_avg": [ 115.5, 77.37301424484724 ], "wc_limitations_avg": [ 20.666666666666668, 15.304320377665327 ], "wc_review_avg": [ 438.0, 204.4325479630547 ], "wc_reply_reviewers_avg": [ 165.16666666666666, 115.2148958347931 ], "wc_reply_authors_avg": [ 418.3333333333333, 339.4061807857299 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:-muXVrCbZ6IJ:scholar.google.com/&scioq=MUVERA:+Multi-Vector+Retrieval+via+Fixed+Dimensional+Encoding&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": "google.com;;google.com;google.com;google.com", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Provable Acceleration of Nesterov's Accelerated Gradient for Asymmetric Matrix Factorization and Linear Neural Networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94792", "id": "X44OawAq7b", "proceeding": "", "pdf": "https://openreview.net/pdf?id=X44OawAq7b", "openreview": "https://openreview.net/forum?id=X44OawAq7b", "poster": "/media/PosterPDFs/NeurIPS%202024/94792.png?t=1733810872.5321913", "project": "", "author_site": "Zhenghao Xu, Yuqing Wang, Tuo Zhao, Rachel Ward, Molei Tao", "tldr": "", "abstract": "We study the convergence rate of first-order methods for rectangular matrix factorization, which is a canonical nonconvex optimization problem. Specifically, given a rank-$r$ matrix $\\mathbf{A}\\in\\mathbb{R}^{m\\times n}$, we prove that gradient descent (GD) can find a pair of $\\epsilon$-optimal solutions $\\mathbf{X}_T\\in\\mathbb{R}^{m\\times d}$ and $\\mathbf{Y}_T\\in\\mathbb{R}^{n\\times d}$, where $d\\geq r$, satisfying $\\lVert\\mathbf{X}_T\\mathbf{Y}_T^\\top-\\mathbf{A}\\rVert_F\\leq\\epsilon\\lVert\\mathbf{A}\\rVert_F$ in $T=O(\\kappa^2\\log\\frac{1}{\\epsilon})$ iterations with high probability, where $\\kappa$ denotes the condition number of $\\mathbf{A}$. Furthermore, we prove that Nesterov's accelerated gradient (NAG) attains an iteration complexity of $O(\\kappa\\log\\frac{1}{\\epsilon})$, which is the best-known bound of first-order methods for rectangular matrix factorization. Different from small balanced random initialization in the existing literature, we adopt an unbalanced initialization, where $\\mathbf{X}_0$ is large and $\\mathbf{Y}_0$ is $0$. Moreover, our initialization and analysis can be further extended to linear neural networks, where we prove that NAG can also attain an accelerated linear convergence rate. In particular, we only require the width of the network to be greater than or equal to the rank of the output label matrix. In contrast, previous results achieving the same rate require excessive widths that additionally depend on the condition number and the rank of the input data matrix.", "keywords": "matrix factorization;nonconvex optimization;Nesterov's acceleration;linear neural networks", "primary_area": "optimization", "supplementary_material": "/attachment/2e381ec67c8a6c62d9a4630957775088a12e8491.zip", "author": "Zhenghao Xu;Yuqing Wang;Tuo Zhao;Rachel Ward;Molei Tao", "authorids": "~Zhenghao_Xu1;~Yuqing_Wang3;~Tuo_Zhao2;~Rachel_Ward1;~Molei_Tao1", "gender": "M;;;;M", "homepage": "https://www.isye.gatech.edu/users/zhenghao-xu;https://yzwangyuqing.github.io;;http://people.math.gatech.edu/~mtao8/;http://www2.isye.gatech.edu/~tzhao80", "dblp": "357/5585;;80/7132;56/9263;", "google_scholar": "FRegzp4AAAAJ;c7Bi9RUAAAAJ;;;EJXN6tYAAAAJ", "orcid": "0000-0001-8076-5166;;;;", "linkedin": ";;;;", "or_profile": "~Zhenghao_Xu1;~Yuqing_Wang3;~Rachel_Ward1;~Molei_Tao1;~Tuo_Zhao1", "aff": "Georgia Institute of Technology;Georgia Institute of Technology;University of Texas at Austin;Georgia Institute of Technology;Georgia Institute of Technology", "aff_domain": "gatech.edu;gatech.edu;utexas.edu;gatech.edu;gatech.edu", "position": "PhD student;PhD student;Full Professor;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nxu2024provable,\ntitle={Provable Acceleration of Nesterov's Accelerated Gradient for Asymmetric Matrix Factorization and Linear Neural Networks},\nauthor={Zhenghao Xu and Yuqing Wang and Tuo Zhao and Rachel Ward and Molei Tao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=X44OawAq7b}\n}", "github": "", "reviewers": "tURQ;u7PK;t3Co;SNTq", "pdf_size": 564434, "rating": "4;7;7;7", "confidence": "3;4;2;4", "soundness": "2;3;3;4", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "81;62;99;49", "wc_strengths": "18;48;121;44", "wc_weaknesses": "59;40;35;71", "wc_questions": "36;236;2;74", "wc_limitations": "1;7;2;1", "wc_review": "195;393;259;239", "wc_reply_reviewers": "32;0;14;33", "wc_reply_authors": "62;62;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 72.75, 18.952242611363964 ], "wc_strengths_avg": [ 57.75, 38.290827883450106 ], "wc_weaknesses_avg": [ 51.25, 14.49784466739798 ], "wc_questions_avg": [ 87.0, 89.71621926942753 ], "wc_limitations_avg": [ 2.75, 2.48746859276655 ], "wc_review_avg": [ 271.5, 73.86981792315451 ], "wc_reply_reviewers_avg": [ 19.75, 13.681648292512127 ], "wc_reply_authors_avg": [ 31.0, 31.0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8914872204337493603&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "email": "gatech.edu;gatech.edu;utexas.edu;gatech.edu;gatech.edu", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Georgia Institute of Technology;University of Texas at Austin", "aff_unique_dep": ";", "aff_unique_url": "https://www.gatech.edu;https://www.utexas.edu", "aff_unique_abbr": "Georgia Tech;UT Austin", "aff_campus_unique_index": "1", "aff_campus_unique": ";Austin", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Copycats: the many lives of a publicly available medical imaging dataset", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97652", "id": "X4KImMSIRq", "proceeding": "", "pdf": "https://openreview.net/pdf?id=X4KImMSIRq", "openreview": "https://openreview.net/forum?id=X4KImMSIRq", "poster": "/media/PosterPDFs/NeurIPS%202024/97652.png?t=1732278459.9557676", "project": "", "author_site": "Amelia Jim\u00e9nez-S\u00e1nchez, Natalia-Rozalia Avlona, Dovile Juodelyte, Th\u00e9o Sourget, Caroline Vang-Larsen, Anna Rogers, Hubert Zaj\u0105c, Veronika Cheplygina", "tldr": "", "abstract": "Medical Imaging (MI) datasets are fundamental to artificial intelligence in healthcare. The accuracy, robustness, and fairness of diagnostic algorithms depend on the data (and its quality) used to train and evaluate the models. MI datasets used to be proprietary, but have become increasingly available to the public, including on community-contributed platforms (CCPs) like Kaggle or HuggingFace. While open data is important to enhance the redistribution of data's public value, we find that the current CCP governance model fails to uphold the quality needed and recommended practices for sharing, documenting, and evaluating datasets. In this paper, we conduct an analysis of publicly available machine learning datasets on CCPs, discussing datasets' context, and identifying limitations and gaps in the current CCP landscape. We highlight differences between MI and computer vision datasets, particularly in the potentially harmful downstream effects from poor adoption of recommended dataset management practices. We compare the analyzed datasets across several dimensions, including data sharing, data documentation, and maintenance. We find vague licenses, lack of persistent identifiers and storage, duplicates, and missing metadata, with differences between the platforms. Our research contributes to efforts in responsible data curation and AI algorithms for healthcare.", "keywords": "open data;medical imaging;datasets;healthcare;dataset management;data governance", "primary_area": "", "supplementary_material": "/attachment/b30ab15b3a35059d4345b70f3b5b010061e0cf8c.pdf", "author": "Amelia Jim\u00e9nez-S\u00e1nchez;Natalia-Rozalia Avlona;Dovile Juodelyte;Th\u00e9o Sourget;Caroline Vang-Larsen;Anna Rogers;Hubert Dariusz Zaj\u0105c;Veronika Cheplygina", "authorids": "~Amelia_Jim\u00e9nez-S\u00e1nchez1;~Natalia-Rozalia_Avlona1;~Dovile_Juodelyte1;~Th\u00e9o_Sourget1;~Caroline_Vang-Larsen1;~Anna_Rogers1;~Hubert_Dariusz_Zaj\u0105c1;~Veronika_Cheplygina2", "gender": "F;F;;M;;F;M;F", "homepage": "https://ameliajimenez.github.io;;https://doviledo.github.io/;https://tsourget.fr/;;https://annargrs.github.io;;https://www.veronikach.com", "dblp": "205/4615;;;;;203/9462;;91/10084", "google_scholar": "2xeIA9sAAAAJ;;;aidbxNUAAAAJ;;5oCYOE0AAAAJ;;4x1y2bwAAAAJ", "orcid": "0000-0001-7870-0603;0000-0002-1009-1810;;;;0000-0002-4845-4023;0000-0003-0689-6912;0000-0003-0176-9324", "linkedin": "amejs/;natalia-rozalia-avlona/;;theo-sourget/;;annargrs;;", "or_profile": "~Amelia_Jim\u00e9nez-S\u00e1nchez1;~Natalia-Rozalia_Avlona1;~Dovile_Juodelyte1;~Th\u00e9o_Sourget1;~Caroline_Vang-Larsen1;~Anna_Rogers1;~Hubert_Dariusz_Zaj\u0105c1;~Veronika_Cheplygina2", "aff": "IT University of Copenhagen;University of Copenhagen;IT University of Copenhagen;IT University of Copenhagen;IT University of Copenhagen;IT University of Copenhagen;Copenhagen University;IT University of Copenhagen", "aff_domain": "itu.dk;diku.dk;itu.dk;itu.dk;itu.dk;itu.dk;ku.dk;itu.dk", "position": "Postdoc;PhD student;PhD student;Researcher;MS student;Associate Professor;Postdoc;Associate Professor", "bibtex": "@inproceedings{\njim{\\'e}nez-s{\\'a}nchez2024copycats,\ntitle={Copycats: the many lives of a publicly available medical imaging dataset},\nauthor={Amelia Jim{\\'e}nez-S{\\'a}nchez and Natalia-Rozalia Avlona and Dovile Juodelyte and Th{\\'e}o Sourget and Caroline Vang-Larsen and Anna Rogers and Hubert Dariusz Zaj{\\k{a}}c and Veronika Cheplygina},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=X4KImMSIRq}\n}", "github": "", "reviewers": "Lyfj;J8T9;Pct9;XUa5", "pdf_size": 680048, "rating": "6;6;7;8", "confidence": "4;4;4;5", "wc_summary_and_contributions": "45;394;43;95", "wc_strengths": "79;47;16;72", "wc_improvement": "54;335;20;52", "wc_limitations": "198;38;1;20", "wc_correctness": "62;33;1;1", "wc_clarity": "6;10;9;1", "wc_relation_to_prior_work": "16;37;9;21", "wc_documentation": "1;27;1;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "462;922;101;264", "wc_reply_reviewers": "730;170;0;70", "wc_reply_authors": "61;0;0;43", "reply_reviewers": "1;1;0;1", "reply_authors": "8;1;1;3", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 144.25, 145.6903823181201 ], "wc_strengths_avg": [ 53.5, 24.70323865407125 ], "wc_improvement_avg": [ 115.25, 127.58795985515248 ], "wc_limitations_avg": [ 64.25, 78.32105400210087 ], "wc_correctness_avg": [ 24.25, 25.410381736605217 ], "wc_clarity_avg": [ 6.5, 3.5 ], "wc_relation_to_prior_work_avg": [ 20.75, 10.304731922762475 ], "wc_documentation_avg": [ 7.5, 11.258330249197702 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 437.25, 307.682770885859 ], "wc_reply_reviewers_avg": [ 242.5, 287.8693279944913 ], "wc_reply_authors_avg": [ 26.0, 26.767517628648346 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.25, 2.8613807855648994 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10902370624295933934&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "itu.dk;diku.dk;itu.dk;itu.dk;itu.dk;itu.dk;ku.dk;itu.dk", "author_num": 8, "aff_unique_index": "0;1;0;0;0;0;1;0", "aff_unique_norm": "IT University of Copenhagen;University of Copenhagen", "aff_unique_dep": ";", "aff_unique_url": "https://itu.dk;https://www.ku.dk", "aff_unique_abbr": "ITU;UCPH", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "Denmark" }, { "title": "MmCows: A Multimodal Dataset for Dairy Cattle Monitoring", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97651", "id": "X4nq0W2qZX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=X4nq0W2qZX", "openreview": "https://openreview.net/forum?id=X4nq0W2qZX", "poster": "/media/PosterPDFs/NeurIPS%202024/97651.png?t=1731097150.2767594", "project": "", "author_site": "Hien Vu, Omkar Chandrakant Prabhune, Unmesh Raskar, Dimuth Panditharatne, Hanwook Chung, Christopher Choi, Younghyun Kim", "tldr": "", "abstract": "Precision livestock farming (PLF) has been transformed by machine learning (ML), enabling more precise and timely interventions that enhance overall farm productivity, animal welfare, and environmental sustainability. However, despite the availability of various sensing technologies, few datasets leverage multiple modalities, which are crucial for developing more accurate and efficient monitoring devices and ML models. To address this gap, we present MmCows, a multimodal dataset for dairy cattle monitoring. This dataset comprises a large amount of synchronized, high-quality measurement data on behavioral, physiological, and environmental factors. It includes two weeks of data collected using wearable and implantable sensors deployed on ten milking Holstein cows, such as ultra-wideband (UWB) sensors, inertial sensors, and body temperature sensors. In addition, it features 4.8 million frames of high-resolution image sequences from four isometric view cameras, as well as temperature and humidity data from environmental sensors. We also gathered milk yield data and outdoor weather conditions. One full day\u2019s worth of image data is annotated as ground truth, totaling 20,000 frames with 213,000 bounding boxes of 16 cows, along with their 3D locations and behavior labels. An extensive analysis of MmCows is provided to evaluate the modalities individually and their complementary benefits. The release of MmCows and its benchmarks will facilitate research on multimodal monitoring of dairy cattle, thereby promoting sustainable dairy farming. The dataset and the code for benchmarks are available at https://github.com/neis-lab/mmcows.", "keywords": "Multimodal dataset;livestock monitoring;behavior monitoring;multi-view;visual localization;UWB localization", "primary_area": "", "supplementary_material": "/attachment/bf441bb76c37baa8ed5f265f920a400a5e69fb3d.pdf", "author": "Hien Vu;Omkar Prabhune;Unmesh Raskar;Dimuth Panditharatne;Hanwook Chung;Christopher Choi;Younghyun Kim", "authorids": "~Hien_Vu1;~Omkar_Prabhune1;~Unmesh_Raskar1;~Dimuth_Panditharatne1;~Hanwook_Chung1;~Christopher_Choi1;~Younghyun_Kim2", "gender": "M;;M;M;M;M;", "homepage": "https://hienvuvg.github.io/;;https://wisest.ece.wisc.edu;https://www.linkedin.com/in/dimuth-panditharatne;;https://bse.wisc.edu/staff/choi-christopher/;", "dblp": ";320/0032;;;;;", "google_scholar": "TlFWZ8QAAAAJ;hL-Ms_IAAAAJ;;;https://scholar.google.com/citations?hl=en;;ac0WJaEAAAAJ", "orcid": ";;;0009-0006-8882-2341;;;0000-0002-5287-9235", "linkedin": "hienvuvg;omkar-prabhune/;https://linkedin.com/in/unmesh-raskar;dimuth-panditharatne;;;younghyun-kim-purdue/", "or_profile": "~Hien_Vu1;~Omkar_Prabhune1;~Unmesh_Raskar1;~Dimuth_Panditharatne1;~Hanwook_Chung1;~Christopher_Choi1;~Younghyun_Kim2", "aff": "Purdue University;Purdue University;University of Wisconsin - Madison;University of Wisconsin - Madison;Iowa State University;University of Wisconsin - Madison;Purdue University", "aff_domain": "purdue.edu;purdue.edu;wisc.edu;wisc.edu;iastate.edu;wisc.edu;purdue.edu", "position": "PhD student;PhD student;MS student;PhD student;Assistant Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nvu2024mmcows,\ntitle={MmCows: A Multimodal Dataset for Dairy Cattle Monitoring},\nauthor={Hien Vu and Omkar Prabhune and Unmesh Raskar and Dimuth Panditharatne and Hanwook Chung and Christopher Choi and Younghyun Kim},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=X4nq0W2qZX}\n}", "github": "", "reviewers": "QRCN;j5Vx;AL6T;6LKz", "pdf_size": 5004534, "rating": "7;7;8;9", "confidence": "4;5;3;3", "wc_summary_and_contributions": "53;34;63;42", "wc_strengths": "39;31;103;30", "wc_improvement": "83;103;77;63", "wc_limitations": "22;22;40;33", "wc_correctness": "29;12;11;1", "wc_clarity": "17;6;15;1", "wc_relation_to_prior_work": "35;21;28;14", "wc_documentation": "14;6;27;12", "wc_additional_feedback": "1;1;1;1", "wc_review": "293;236;365;197", "wc_reply_reviewers": "16;12;0;0", "wc_reply_authors": "13;11;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "2;2;1;1", "rating_avg": [ 7.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "wc_summary_and_contributions_avg": [ 48.0, 10.977249200050075 ], "wc_strengths_avg": [ 50.75, 30.367540236245674 ], "wc_improvement_avg": [ 81.5, 14.378803844548406 ], "wc_limitations_avg": [ 29.25, 7.660776723022281 ], "wc_correctness_avg": [ 13.25, 10.059199769365355 ], "wc_clarity_avg": [ 9.75, 6.53356717268599 ], "wc_relation_to_prior_work_avg": [ 24.5, 7.826237921249264 ], "wc_documentation_avg": [ 14.75, 7.660776723022281 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 272.75, 63.26284454559406 ], "wc_reply_reviewers_avg": [ 7.0, 7.14142842854285 ], "wc_reply_authors_avg": [ 6.0, 6.041522986797286 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.8181818181818182, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:KnlmxTJbMx0J:scholar.google.com/&scioq=MmCows:+A+Multimodal+Dataset+for+Dairy+Cattle+Monitoring&hl=en&as_sdt=0,48", "gs_version_total": 5, "email": "purdue.edu;purdue.edu;wisc.edu;wisc.edu;iastate.edu;wisc.edu;purdue.edu", "author_num": 7, "aff_unique_index": "0;0;1;1;2;1;0", "aff_unique_norm": "Purdue University;University of Wisconsin-Madison;Iowa State University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.purdue.edu;https://www.wisc.edu;https://www.iastate.edu", "aff_unique_abbr": "Purdue;UW-Madison;ISU", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Madison", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Certified Robustness for Deep Equilibrium Models via Serialized Random Smoothing", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94791", "id": "X64IJvdftR", "proceeding": "", "pdf": "https://openreview.net/pdf?id=X64IJvdftR", "openreview": "https://openreview.net/forum?id=X64IJvdftR", "poster": "/media/PosterPDFs/NeurIPS%202024/94791.png?t=1731439299.009959", "project": "", "author_site": "Weizhi Gao, Zhichao Hou, Han Xu, Xiaorui Liu", "tldr": "", "abstract": "Implicit models such as Deep Equilibrium Models (DEQs) have emerged as promising alternative approaches for building deep neural networks. Their certified robustness has gained increasing research attention due to security concerns. Existing certified defenses for DEQs employing interval bound propagation and Lipschitz-bounds not only offer conservative certification bounds but also are restricted to specific forms of DEQs. In this paper, we provide the first randomized smoothing certified defense for DEQs to solve these limitations. Our study reveals that simply applying randomized smoothing to certify DEQs provides certified robustness generalized to large-scale datasets but incurs extremely expensive computation costs. To reduce computational redundancy, we propose a novel Serialized Randomized Smoothing (SRS) approach that leverages historical information. Additionally, we derive a new certified radius estimation for SRS to theoretically ensure the correctness of our algorithm. Extensive experiments and ablation studies on image recognition demonstrate that our algorithm can significantly accelerate the certification of DEQs by up to 7x almost without sacrificing the certified accuracy. The implementation will be publicly available upon the acceptance of this work. Our code is available at https://github.com/WeizhiGao/Serialized-Randomized-Smoothing.", "keywords": "trustworthy machine learning; certified robustness; randomized smoothing", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Weizhi Gao;Zhichao Hou;Han Xu;Xiaorui Liu", "authorids": "~Weizhi_Gao1;~Zhichao_Hou1;~Han_Xu1;~Xiaorui_Liu1", "gender": "M;M;M;M", "homepage": "https://weizhigao.github.io/;https://chris-hzc.github.io/;https://cse.msu.edu/~xuhan1/;https://sites.google.com/ncsu.edu/xiaorui/", "dblp": "335/0844;188/4064;32/34-2;172/0995", "google_scholar": "8DMz6dUAAAAJ;rraC4ZMAAAAJ;mX2rL3IAAAAJ;NhvN1KoAAAAJ", "orcid": ";0000-0002-3989-2654;0000-0002-4016-6748;0000-0001-8217-5688", "linkedin": "weizhi-gao-888052254/;zhichao-hou-b022931a4/;;", "or_profile": "~Weizhi_Gao1;~Zhichao_Hou1;~Han_Xu1;~Xiaorui_Liu1", "aff": "North Carolina State University;Amazon;Michigan State University;North Carolina State University", "aff_domain": "ncsu.edu;amazon.com;msu.edu;ncsu.edu", "position": "PhD student;Intern;PhD student;Assistant Professor", "bibtex": "@inproceedings{\ngao2024certified,\ntitle={Certified Robustness for Deep Equilibrium Models via Serialized Random Smoothing},\nauthor={Weizhi Gao and Zhichao Hou and Han Xu and Xiaorui Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=X64IJvdftR}\n}", "github": "", "reviewers": "qqXj;eMdN;366J;2B8g;3u5s", "pdf_size": 1359737, "rating": "7;7;7;7;7", "confidence": "5;4;3;3;4", "soundness": "4;3;3;3;3", "novelty": "3;3;3;4;3", "presentation": "3;3;3;4;4", "wc_summary": "70;95;116;107;112", "wc_strengths": "38;214;99;70;109", "wc_weaknesses": "260;29;374;66;52", "wc_questions": "78;77;57;53;136", "wc_limitations": "24;1;1;21;1", "wc_review": "470;416;647;317;410", "wc_reply_reviewers": "18;18;19;12;117", "wc_reply_authors": "30;25;37;21;30", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 100.0, 16.57709262808168 ], "wc_strengths_avg": [ 106.0, 59.400336699382436 ], "wc_weaknesses_avg": [ 156.2, 136.66367476399864 ], "wc_questions_avg": [ 80.2, 29.687707894008927 ], "wc_limitations_avg": [ 9.6, 10.57544325312183 ], "wc_review_avg": [ 452.0, 109.2098896620631 ], "wc_reply_reviewers_avg": [ 36.8, 40.17661011085928 ], "wc_reply_authors_avg": [ 28.6, 5.388877434122992 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14104661524821506701&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "ncsu.edu;amazon.com;msu.edu;ncsu.edu", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "North Carolina State University;Amazon;Michigan State University", "aff_unique_dep": ";Amazon.com, Inc.;", "aff_unique_url": "https://www.ncsu.edu;https://www.amazon.com;https://www.msu.edu", "aff_unique_abbr": "NCSU;Amazon;MSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Why Transformers Need Adam: A Hessian Perspective", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94790", "id": "X6rqEpbnj3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=X6rqEpbnj3", "openreview": "https://openreview.net/forum?id=X6rqEpbnj3", "poster": "/media/PosterPDFs/NeurIPS%202024/94790.png?t=1731340436.7791083", "project": "", "author_site": "Yushun Zhang, Congliang Chen, Tian Ding, Ziniu Li, Ruoyu Sun, Zhiquan Luo", "tldr": "", "abstract": "SGD performs worse than Adam by a significant margin on Transformers, but the reason remains unclear. In this work, we provide an explanation through the lens of Hessian: (i) Transformers are \"heterogeneous'': the Hessian spectrum across parameter blocks vary dramatically, a phenomenon we call \"block heterogeneity\"; (ii) Heterogeneity hampers SGD: SGD performs worse than Adam on problems with block heterogeneity. To validate (i) and (ii), we check various Transformers, CNNs, MLPs, and quadratic problems, and find that SGD can perform on par with Adam on problems without block heterogeneity, but performs worse than Adam when the heterogeneity exists. Our initial theoretical analysis indicates that SGD performs worse because it applies one single learning rate to all blocks, which cannot handle the heterogeneity among blocks. This limitation could be ameliorated if we use coordinate-wise learning rates, as designed in Adam.", "keywords": "Transformers;Adam;Optimization", "primary_area": "optimization_for_deep_networks", "supplementary_material": "/attachment/cb7eeaffacf552f45622155aa2b1b4210d95a548.zip", "author": "Yushun Zhang;Congliang Chen;Tian Ding;Ziniu Li;Ruoyu Sun;Zhi-Quan Luo", "authorids": "~Yushun_Zhang1;~Congliang_Chen1;~Tian_Ding1;~Ziniu_Li1;~Ruoyu_Sun1;~Zhi-Quan_Luo1", "gender": "M;M;M;M;;M", "homepage": "https://zyushun.github.io/;;;http://www.liziniu.org/;https://ruoyus.github.io/;", "dblp": "276/8662;205/7138;;254/0986;30/9879-1;", "google_scholar": "https://scholar.google.com/citations?hl=en;O1P1-EAAAAAJ;https://scholar.google.com.hk/citations?user=lVkDF-YAAAAJ;80UnKQQAAAAJ;PsfzbCMAAAAJ;dW3gcXoAAAAJ", "orcid": ";;0000-0002-9383-8405;;;", "linkedin": ";;;;;", "or_profile": "~Yushun_Zhang1;~Congliang_Chen1;~Tian_Ding1;~Ziniu_Li1;~Ruoyu_Sun1;~Zhi-Quan_Luo1", "aff": "The Chinese University of Hong Kong, Shenzhen;The Chinese University of Hong Kong(Shenzhen);Shenzhen Research Institute of Big Data;The Chinese University of Hong Kong, Shenzhen;The Chinese University of Hong Kong;The Chinese University of Hong Kong, Shenzhen", "aff_domain": "cuhk.edu.cn;cuhk.edu.cn;sribd.cn;cuhk.edu.cn;cuhk.edu.cn;cuhk.edu.cn", "position": "PhD student;PhD student;Researcher;PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nzhang2024why,\ntitle={Why Transformers Need Adam: A Hessian Perspective},\nauthor={Yushun Zhang and Congliang Chen and Tian Ding and Ziniu Li and Ruoyu Sun and Zhi-Quan Luo},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=X6rqEpbnj3}\n}", "github": "", "reviewers": "5ncB;A8Vc;tJ3K;pTmy", "pdf_size": 2461306, "rating": "5;6;6;7", "confidence": "3;3;2;3", "soundness": "3;3;2;3", "novelty": "2;3;2;4", "presentation": "3;4;3;3", "wc_summary": "118;187;80;81", "wc_strengths": "34;51;58;48", "wc_weaknesses": "275;86;171;88", "wc_questions": "5;109;168;2", "wc_limitations": "1;1;3;6", "wc_review": "433;434;480;225", "wc_reply_reviewers": "0;18;12;90", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 116.5, 43.48850422812907 ], "wc_strengths_avg": [ 47.75, 8.728545125048045 ], "wc_weaknesses_avg": [ 155.0, 77.3078262532326 ], "wc_questions_avg": [ 71.0, 70.657625207758 ], "wc_limitations_avg": [ 2.75, 2.0463381929681126 ], "wc_review_avg": [ 393.0, 98.83572228703547 ], "wc_reply_reviewers_avg": [ 30.0, 35.24202037341219 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 37, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2561187541423734687&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 5, "email": "cuhk.edu.cn;cuhk.edu.cn;sribd.cn;cuhk.edu.cn;cuhk.edu.cn;cuhk.edu.cn", "author_num": 6, "aff_unique_index": "0;0;1;0;0;0", "aff_unique_norm": "Chinese University of Hong Kong;Shenzhen Research Institute of Big Data", "aff_unique_dep": ";", "aff_unique_url": "https://www.cuhk.edu.cn;http://www.sribd.cn", "aff_unique_abbr": "CUHK;", "aff_campus_unique_index": "0;0;0;2;0", "aff_campus_unique": "Shenzhen;;Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "MAN TruckScenes: A multimodal dataset for autonomous trucking in diverse conditions", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97650", "id": "X8ItT6mGKF", "proceeding": "", "pdf": "https://openreview.net/pdf?id=X8ItT6mGKF", "openreview": "https://openreview.net/forum?id=X8ItT6mGKF", "poster": "/media/PosterPDFs/NeurIPS%202024/97650.png?t=1731490176.4340382", "project": "", "author_site": "Felix Fent, Fabian Kuttenreich, Florian Ruch, Farija Rizwin, Stefan Juergens, Lorenz Lechermann, Christian Nissler, Andrea Perl, Ulrich Voll, Min Yan, Markus Lienkamp", "tldr": "", "abstract": "Autonomous trucking is a promising technology that can greatly impact modern logistics and the environment. Ensuring its safety on public roads is one of the main duties that requires an accurate perception of the environment. To achieve this, machine learning methods rely on large datasets, but to this day, no such datasets are available for autonomous trucks. In this work, we present MAN TruckScenes, the first multimodal dataset for autonomous trucking. MAN TruckScenes allows the research community to come into contact with truck-specific challenges, such as trailer occlusions, novel sensor perspectives, and terminal environments for the first time. It comprises more than 740 scenes of 20 s each within a multitude of different environmental conditions. The sensor set includes 4 cameras, 6 lidar, 6 radar sensors, 2 IMUs, and a high-precision GNSS. The dataset's 3D bounding boxes were manually annotated and carefully reviewed to achieve a high quality standard. Bounding boxes are available for 27 object classes, 15 attributes, and a range of more than 230 m. The scenes are tagged according to 34 distinct scene tags, and all objects are tracked throughout the scene to promote a wide range of applications. Additionally, MAN TruckScenes is the first dataset to provide 4D radar data with 360\u00b0 coverage and is thereby the largest radar dataset with annotated 3D bounding boxes. Finally, we provide extensive dataset analysis and baseline results. The dataset, development kit, and more will be available online.", "keywords": "dataset;autonomous driving;truck;perception", "primary_area": "", "supplementary_material": "/attachment/80191dab0f733594bd98ed5e9c7cdd92d3201069.zip", "author": "Felix Sebastian Fent;Fabian Kuttenreich;Florian Ruch;Farija Rizwin;Stefan Juergens;Lorenz Lechermann;Christian Nissler;Andrea Perl;Ulrich Voll;Min Yan;Markus Lienkamp", "authorids": "~Felix_Sebastian_Fent1;~Fabian_Kuttenreich1;~Florian_Ruch1;~Farija_Rizwin1;~Stefan_Juergens1;~Lorenz_Lechermann1;~Christian_Nissler1;~Andrea_Perl1;~Ulrich_Voll1;~Min_Yan2;~Markus_Lienkamp1", "gender": "Not Specified;;M;F;;M;M;F;;M;M", "homepage": ";;https://www.man.eu/;https://www.man.eu;https://man.eu;;;;;;https://www.mos.ed.tum.de/ftm/startseite/", "dblp": "313/2054;;;;;;;;;;121/4171", "google_scholar": "wxWHG3IAAAAJ;;;;;;d2H_1j0AAAAJ;;;;https://scholar.google.de/citations?hl=de", "orcid": "0000-0002-8857-1275;;;0009-0002-9870-4956;;;0000-0003-4361-9041;;;;0000-0002-9263-5323", "linkedin": ";;https://de.linkedin.com/in/florian-ruch;https://linkedin.com/in/farija-rizwin;;lorenz-lechermann;;andrea-perl/;https://de.linkedin.com/in/ulrichvoll;min-yan-b2875242/;markus-lienkamp-720665179/", "or_profile": "~Felix_Sebastian_Fent1;~Fabian_Kuttenreich1;~Florian_Ruch1;~Farija_Rizwin1;~Stefan_Juergens1;~Lorenz_Lechermann1;~Christian_Nissler1;~Andrea_Perl1;~Ulrich_Voll1;~Min_Yan2;~Markus_Lienkamp1", "aff": "Technische Universit\u00e4t M\u00fcnchen;;MAN Truck & Bus SE;MAN Truck & Bus SE;MAN T&B;MAN Truck & Bus SE;MAN Truck & Bus SE;MAN Truck & Bus SE;MAN Truck and Bus;MAN Truck & Bus SE;Technische Universit\u00e4t M\u00fcnchen", "aff_domain": "tum.de;;man.eu;man.eu;man.eu;man.eu;man.eu;man.eu;man.eu;man.eu;tum.de", "position": "PhD student;;Researcher;Researcher;Researcher;Employee;Researcher;Researcher;Researcher;Intern;Full Professor", "bibtex": "@inproceedings{\nfent2024man,\ntitle={{MAN} TruckScenes: A multimodal dataset for autonomous trucking in diverse conditions},\nauthor={Felix Sebastian Fent and Fabian Kuttenreich and Florian Ruch and Farija Rizwin and Stefan Juergens and Lorenz Lechermann and Christian Nissler and Andrea Perl and Ulrich Voll and Min Yan and Markus Lienkamp},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=X8ItT6mGKF}\n}", "github": "", "reviewers": "V7hQ;5tdS;C6uJ", "pdf_size": 22697130, "rating": "7;8;8", "confidence": "4;5;5", "wc_summary_and_contributions": "50;78;106", "wc_strengths": "58;54;107", "wc_improvement": "85;12;98", "wc_limitations": "14;30;79", "wc_correctness": "18;25;42", "wc_clarity": "1;8;43", "wc_relation_to_prior_work": "1;72;37", "wc_documentation": "1;30;53", "wc_additional_feedback": "1;1;1", "wc_review": "229;310;566", "wc_reply_reviewers": "68;12;0", "wc_reply_authors": "1978;761;494", "reply_reviewers": "1;1;0", "reply_authors": "3;1;1", "rating_avg": [ 7.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 78.0, 22.861904265976328 ], "wc_strengths_avg": [ 73.0, 24.097026095903757 ], "wc_improvement_avg": [ 65.0, 37.85058343892029 ], "wc_limitations_avg": [ 41.0, 27.65260686927485 ], "wc_correctness_avg": [ 28.333333333333332, 10.077477638553981 ], "wc_clarity_avg": [ 17.333333333333332, 18.372685039360892 ], "wc_relation_to_prior_work_avg": [ 36.666666666666664, 28.986586936412888 ], "wc_documentation_avg": [ 28.0, 21.275964529643932 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 368.3333333333333, 143.62992569640755 ], "wc_reply_reviewers_avg": [ 26.666666666666668, 29.63481436119049 ], "wc_reply_authors_avg": [ 1077.6666666666667, 645.8959324500779 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.9999999999999998, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9304692027909330316&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "tum.de;;man.eu;man.eu;man.eu;man.eu;man.eu;man.eu;man.eu;man.eu;tum.de", "author_num": 11, "aff_unique_index": "0;1;1;2;1;1;1;3;1;0", "aff_unique_norm": "Technische Universit\u00e4t M\u00fcnchen;MAN Truck & Bus SE;MAN T&B;MAN Truck and Bus", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.tum.de;https://www.man.eu;;https://www.man.eu", "aff_unique_abbr": "TUM;;;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "Germany;" }, { "title": "JaxMARL: Multi-Agent RL Environments and Algorithms in JAX", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97649", "id": "X90tyXDe8z", "proceeding": "", "pdf": "https://openreview.net/pdf?id=X90tyXDe8z", "openreview": "https://openreview.net/forum?id=X90tyXDe8z", "poster": "/media/PosterPDFs/NeurIPS%202024/97649.png?t=1733441326.6090574", "project": "", "author_site": "Alexander Rutherford, Benjamin Ellis, Matteo Gallici, Jonathan Cook, Andrei Lupu, Gar\u00f0ar Ingvarsson Juto, Timon Willi, Ravi Hammond, Akbir Khan, Christian Schroeder de Witt, Alexandra Souly, Saptarashmi Bandyopadhyay, Mikayel Samvelyan, Minqi Jiang, Robert Lange, Shimon Whiteson, Bruno Lacerda, Nick Hawes, Tim Rockt\u00e4schel, Chris Lu, Jakob Foerster", "tldr": "", "abstract": "Benchmarks are crucial in the development of machine learning algorithms, significantly influencing reinforcement learning (RL) research through the available environments. Traditionally, RL environments run on the CPU, which limits their scalability with the computational resources typically available in academia. However, recent advancements in JAX have enabled the wider use of hardware acceleration, enabling massively parallel RL training pipelines and environments. While this has been successfully applied to single-agent RL, it has not yet been widely adopted for multi-agent scenarios. In this paper, we present JaxMARL, the first open-source, easy-to-use code base that combines GPU-enabled efficiency with support for a large number of commonly used MARL environments and popular baseline algorithms. Our experiments show that, in terms of wall clock time, our JAX-based training pipeline is up to 12,500 times faster than existing approaches. This enables efficient and thorough evaluations, potentially alleviating the evaluation crisis in the field. We also introduce and benchmark SMAX, a vectorised, simplified version of the popular StarCraft Multi-Agent Challenge, which removes the need to run the StarCraft II game engine. This not only enables GPU acceleration, but also provides a more flexible MARL environment, unlocking the potential for self-play, meta-learning, and other future applications in MARL. The code is available at https://github.com/flairox/jaxmarl.", "keywords": "reinforcement learning;multi-agent;multi-agent reinforcement learning;jax", "primary_area": "", "supplementary_material": "/attachment/11d4fe25709f6a0b0c598894ac66fe436c575da3.pdf", "author": "Alexander Rutherford;Benjamin Ellis;Matteo Gallici;Jonathan Cook;Andrei Lupu;Gar\u00f0ar Ingvarsson;Timon Willi;Ravi Hammond;Akbir Khan;Christian Schroeder de Witt;Alexandra Souly;Saptarashmi Bandyopadhyay;Mikayel Samvelyan;Minqi Jiang;Robert Tjarko Lange;Shimon Whiteson;Bruno Lacerda;Nick Hawes;Tim Rockt\u00e4schel;Chris Lu;Jakob Nicolaus Foerster", "authorids": "~Alexander_Rutherford1;~Benjamin_Ellis1;~Matteo_Gallici1;~Jonathan_Cook3;~Andrei_Lupu1;~Gar\u00f0ar_Ingvarsson1;~Timon_Willi1;~Ravi_Hammond1;~Akbir_Khan1;~Christian_Schroeder_de_Witt1;~Alexandra_Souly1;~Saptarashmi_Bandyopadhyay1;~Mikayel_Samvelyan1;~Minqi_Jiang1;~Robert_Tjarko_Lange1;~Shimon_Whiteson1;~Bruno_Lacerda1;~Nick_Hawes1;~Tim_Rockt\u00e4schel1;~Chris_Lu1;~Jakob_Nicolaus_Foerster1", "gender": "M;M;M;M;M;M;;M;M;M;F;;M;M;;;M;M;;M;M", "homepage": "https://amacrutherford.com/;http://whirl.cs.ox.ac.uk/pages/people/ben.html;https://github.com/mttga;;;;https://www.timonwilli.com;https://ravihammond.github.io/;https://akbir.dev;https://www.schroederdewitt.com;;;https://www.samvelyan.com/;https://twitter.com/minqijiang;https://roberttlange.github.io/;;https://bfalacerda.github.io/;https://www.robots.ox.ac.uk/~nickh/;;https://www.jakobfoerster.com;http://rockt.ai", "dblp": ";;;;218/7027;;243/3437;;;;;190/1780.html;170/0101;270/7949;245/9152;https://dblp.uni-trier.de/pers/w/Whiteson:Shimon.html;87/10333;35/1190;77/9579;176/5095;43/11537", "google_scholar": "https://scholar.google.com/citations?hl=en;;;7tcPHHYAAAAJ;I6aB-YUAAAAJ;;Dn-udzAAAAAJ;OGyagjQAAAAJ;https://scholar.google.com/citations?hl=en;DE60h_0AAAAJ;ylO2-BwAAAAJ;https://scholar.google.co.in/citations?user=UqTGV4gAAAAJ;2Qs19WAAAAAJ;;https://scholar.google.es/citations?user=cTrc3x4AAAAJ;;https://scholar.google.co.uk/citations?user=k9XjG_MAAAAJ;bRsi4zoAAAAJ;4WLoIRsAAAAJ;6z4lQzMAAAAJ;https://scholar.google.co.uk/citations?user=mWBY8aIAAAAJ", "orcid": "0000-0002-2662-5602;;;;;;0000-0003-4405-5700;0000-0003-4437-0348;;;;;0009-0001-6748-8755;;;;0000-0003-0862-331X;0000-0002-7556-6098;;;", "linkedin": ";;;jonathan-cook-78339618a/;lupu-andrei;gardarjuto/;;ravihammond/;;;;;samvelyan;minqi-jiang-585a6536/;;;;;;;rockt/", "or_profile": "~Alexander_Rutherford1;~Benjamin_Ellis1;~Matteo_Gallici1;~Jonathan_Cook3;~Andrei_Lupu1;~Gar\u00f0ar_Ingvarsson1;~Timon_Willi1;~Ravi_Hammond1;~Akbir_Khan1;~Christian_Schroeder_de_Witt1;~Alexandra_Souly1;~Saptarashmi_Bandyopadhyay1;~Mikayel_Samvelyan1;~Minqi_Jiang1;~Robert_Tjarko_Lange1;~Shimon_Whiteson1;~Bruno_Lacerda1;~Nick_Hawes1;~Chris_Lu1;~Jakob_Nicolaus_Foerster1;~Tim_Rocktaeschel1", "aff": "University of Oxford;Department of Computer Science, University of Oxford;Universidad Polit\u00e9cnica de Cataluna;University of Oxford;Meta AI;Mi\u00f0eind ehf.;University of Oxford, University of Oxford;University of Oxford;;University of Oxford;uk aisi;University of Maryland, College Park;Meta (FAIR);Google;TU Berlin;University of Oxford;University of Oxford;University of Oxford;University of Oxford;University of Oxford, University of Oxford;Google DeepMind", "aff_domain": "ox.ac.uk;cs.ox.ac.uk;upc.edu;ox.ac.uk;meta.com;mideind.is;eng.ox.ac.uk;oxford.ac.uk;;oxford.ac.uk;dsit.gov.uk;umd.edu;fb.com;google.com;tu-berlin.de;ox.ac.uk;ox.ac.uk;ox.ac.uk;ox.ac.uk;eng.ox.ac.uk;google.com", "position": "PhD student;PhD student;PhD student;PhD student;Researcher;Researcher;PhD student;PhD student;;Lecturer;Researcher;PhD student;Research Assistant;Researcher;PhD student;Professor;Senior Researcher;Full Professor;PhD student;Associate Professor;Senior Staff Research Scientist", "bibtex": "@inproceedings{\nrutherford2024jaxmarl,\ntitle={Jax{MARL}: Multi-Agent {RL} Environments and Algorithms in {JAX}},\nauthor={Alexander Rutherford and Benjamin Ellis and Matteo Gallici and Jonathan Cook and Andrei Lupu and Gar{\\dh}ar Ingvarsson and Timon Willi and Ravi Hammond and Akbir Khan and Christian Schroeder de Witt and Alexandra Souly and Saptarashmi Bandyopadhyay and Mikayel Samvelyan and Minqi Jiang and Robert Tjarko Lange and Shimon Whiteson and Bruno Lacerda and Nick Hawes and Tim Rockt{\\\"a}schel and Chris Lu and Jakob Nicolaus Foerster},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=X90tyXDe8z}\n}", "github": "", "reviewers": "Vp7t;DMjY;Zxvy;Vsti", "pdf_size": 4811310, "rating": "6;6;6;8", "confidence": "3;3;4;4", "wc_summary_and_contributions": "153;47;82;125", "wc_strengths": "51;58;84;87", "wc_improvement": "216;58;242;100", "wc_limitations": "67;8;95;8", "wc_correctness": "16;27;15;27", "wc_clarity": "9;8;1;41", "wc_relation_to_prior_work": "20;8;9;30", "wc_documentation": "27;20;7;42", "wc_additional_feedback": "1;1;1;1", "wc_review": "560;235;536;461", "wc_reply_reviewers": "690;104;295;24", "wc_reply_authors": "892;304;464;0", "reply_reviewers": "3;1;1;1", "reply_authors": "5;3;3;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "wc_summary_and_contributions_avg": [ 101.75, 40.48070528041724 ], "wc_strengths_avg": [ 70.0, 15.732132722552274 ], "wc_improvement_avg": [ 154.0, 77.00649323271382 ], "wc_limitations_avg": [ 44.5, 37.818646194701365 ], "wc_correctness_avg": [ 21.25, 5.7608593109014565 ], "wc_clarity_avg": [ 14.75, 15.465687828221544 ], "wc_relation_to_prior_work_avg": [ 16.75, 8.98262211161084 ], "wc_documentation_avg": [ 24.0, 12.62933094031509 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 448.0, 128.2828905193518 ], "wc_reply_reviewers_avg": [ 278.25, 257.30563052525684 ], "wc_reply_authors_avg": [ 415.0, 321.89905249938215 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 3.0, 1.4142135623730951 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 21, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7040941663329747399&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": "ox.ac.uk;cs.ox.ac.uk;upc.edu;ox.ac.uk;meta.com;mideind.is;eng.ox.ac.uk;oxford.ac.uk;;oxford.ac.uk;dsit.gov.uk;umd.edu;fb.com;google.com;tu-berlin.de;ox.ac.uk;ox.ac.uk;ox.ac.uk;ox.ac.uk;eng.ox.ac.uk;google.com", "author_num": 21, "aff_unique_index": "0;0;1;0;2;3;0;0;0;4;5;2;6;7;0;0;0;0;0;6", "aff_unique_norm": "University of Oxford;Universitat Polit\u00e8cnica de Catalunya;Meta;Mi\u00f0eind ehf.;UK Atomic, Molecular and Optical Sciences Institute;University of Maryland;Google;Technische Universit\u00e4t Berlin", "aff_unique_dep": ";;Meta AI;;;;Google;", "aff_unique_url": "https://www.ox.ac.uk;https://www.upc.edu;https://meta.com;;;https://www/umd.edu;https://www.google.com;https://www.tu-berlin.de", "aff_unique_abbr": "Oxford;UPC;Meta;;UK AMO;UMD;Google;TU Berlin", "aff_campus_unique_index": "1;2;3;4", "aff_campus_unique": ";Oxford;College Park;Mountain View;Berlin", "aff_country_unique_index": "0;0;1;0;2;3;0;0;0;0;2;2;2;4;0;0;0;0;0;0", "aff_country_unique": "United Kingdom;Spain;United States;Iceland;Germany" }, { "title": "FreeLong: Training-Free Long Video Generation with SpectralBlend Temporal Attention", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94789", "id": "X9Fga52OOv", "proceeding": "", "pdf": "https://openreview.net/pdf?id=X9Fga52OOv", "openreview": "https://openreview.net/forum?id=X9Fga52OOv", "poster": "", "project": "", "author_site": "Yu Lu, Yuanzhi Liang, Linchao Zhu, Yi Yang", "tldr": "", "abstract": "Video diffusion models have made substantial progress in various video generation applications. However, training models for long video generation tasks require significant computational and data resources, posing a challenge to developing long video diffusion models.\nThis paper investigates a straightforward and training-free approach to extend an existing short video diffusion model (e.g. pre-trained on 16-frame videos) for consistent long video generation (e.g. 128 frames). Our preliminary observation has found that directly applying the short video diffusion model to generate long videos can lead to severe video quality degradation. Further investigation reveals that this degradation is primarily due to the distortion of high-frequency components in long videos, characterized by a decrease in spatial high-frequency components and an increase in temporal high-frequency components. Motivated by this, we propose a novel solution named FreeLong to balance the frequency distribution of long video features during the denoising process. FreeLong blends the low-frequency components of global video features, which encapsulate the entire video sequence, with the high-frequency components of local video features that focus on shorter subsequences of frames. This approach maintains global consistency while incorporating diverse and high-quality spatiotemporal details from local videos, enhancing both the consistency and fidelity of long video generation. We evaluated FreeLong on multiple base video diffusion models and observed significant improvements. Additionally, our method supports coherent multi-prompt generation, ensuring both visual coherence and seamless transitions between scenes. Our project page is at: https://yulu.net.cn/freelong.", "keywords": "text-to-video generation; diffusion models; long video generation", "primary_area": "generative_models", "supplementary_material": "/attachment/20451201a36206f8190060ee3b8c00b1f6eb634f.zip", "author": "Yu Lu;Yuanzhi Liang;Linchao Zhu;Yi Yang", "authorids": "~Yu_Lu11;~Yuanzhi_Liang1;~Linchao_Zhu1;~Yi_Yang4", "gender": "M;;M;M", "homepage": "http://reler.net/people/yu_Lu/index.html;http://liangyzh.com/;http://ffmpbgrnn.github.io/;http://reler.net/", "dblp": ";193/8013;172/1383.html;", "google_scholar": "QQUmpsgAAAAJ;;9ZukE28AAAAJ;https://scholar.google.com.au/citations?user=RMSuNFwAAAAJ", "orcid": ";0009-0009-6051-0903;;", "linkedin": ";;;", "or_profile": "~Yu_Lu11;~Yuanzhi_Liang1;~Linchao_Zhu1;~Yi_Yang4", "aff": "University of Technology Sydney;University of Technology Sydney;Zhejiang University;Zhejiang University", "aff_domain": "uts.edu.au;uts.edu.au;zju.edu.cn;zju.edu.cn", "position": "PhD student;PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nlu2024freelong,\ntitle={FreeLong: Training-Free Long Video Generation with SpectralBlend Temporal Attention},\nauthor={Yu Lu and Yuanzhi Liang and Linchao Zhu and Yi Yang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=X9Fga52OOv}\n}", "github": "", "reviewers": "QZ6p;XZSx;oAuU;838f", "pdf_size": 14021051, "rating": "5;5;6;7", "confidence": "5;4;3;4", "soundness": "3;3;4;3", "novelty": "3;2;3;3", "presentation": "2;2;4;3", "wc_summary": "63;48;61;65", "wc_strengths": "26;41;27;123", "wc_weaknesses": "115;191;8;45", "wc_questions": "74;53;43;23", "wc_limitations": "33;1;1;14", "wc_review": "311;334;140;270", "wc_reply_reviewers": "20;73;10;34", "wc_reply_authors": "26;170;44;47", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 59.25, 6.6473679001541655 ], "wc_strengths_avg": [ 54.25, 40.13337140086788 ], "wc_weaknesses_avg": [ 89.75, 69.95489618318364 ], "wc_questions_avg": [ 48.25, 18.376275465937052 ], "wc_limitations_avg": [ 12.25, 13.102957681378658 ], "wc_review_avg": [ 263.75, 75.0345753636282 ], "wc_reply_reviewers_avg": [ 34.25, 23.94133454926855 ], "wc_reply_authors_avg": [ 71.75, 57.290378773403134 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.42640143271122083, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16855066816649312477&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "uts.edu.au;uts.edu.au;zju.edu.cn;zju.edu.cn", "author_num": 4, "aff_unique_index": "0;0;1;1", "aff_unique_norm": "University of Technology Sydney;Zhejiang University", "aff_unique_dep": ";", "aff_unique_url": "https://www.uts.edu.au;https://www.zju.edu.cn", "aff_unique_abbr": "UTS;ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1", "aff_country_unique": "Australia;China" }, { "title": "Jointly Modeling Inter- & Intra-Modality Dependencies for Multi-modal Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94788", "id": "XAKALzI3Gw", "proceeding": "", "pdf": "https://openreview.net/pdf?id=XAKALzI3Gw", "openreview": "https://openreview.net/forum?id=XAKALzI3Gw", "poster": "/media/PosterPDFs/NeurIPS%202024/94788.png?t=1731815392.4473104", "project": "", "author_site": "Divyam Madaan, Taro Makino, Sumit Chopra, Kyunghyun Cho", "tldr": "", "abstract": "Supervised multi-modal learning involves mapping multiple modalities to a target label. Previous studies in this field have concentrated on capturing in isolation either the inter-modality dependencies (the relationships between different modalities and the label) or the intra-modality dependencies (the relationships within a single modality and the label). We argue that these conventional approaches that rely solely on either inter- or intra-modality dependencies may not be optimal in general. We view the multi-modal learning problem from the lens of generative models where we consider the target as a source of multiple modalities and the interaction between them. Towards that end, we propose inter- \\& intra-modality modeling (I2M2) framework, which captures and integrates both the inter- and intra-modality dependencies, leading to more accurate predictions. We evaluate our approach using real-world healthcare and vision-and-language datasets with state-of-the-art models, demonstrating superior performance over traditional methods focusing only on one type of modality dependency. The code is available at https://github.com/divyam3897/I2M2.", "keywords": "Multi-modal learning;deep learning", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Divyam Madaan;Taro Makino;Sumit Chopra;Kyunghyun Cho", "authorids": "~Divyam_Madaan1;~Taro_Makino1;~Sumit_Chopra1;~Kyunghyun_Cho1", "gender": "M;M;M;M", "homepage": "https://dmadaan.com/;https://taromakino.github.io/;https://www.spchopra.net;http://kyunghyuncho.me", "dblp": "239/4899;261/3587;68/4681;41/9736", "google_scholar": "DNk4dZkAAAAJ;6hRIPHsAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.fi/citations?user=0RAmmIAAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Divyam_Madaan1;~Taro_Makino1;~Sumit_Chopra1;~Kyunghyun_Cho1", "aff": "New York University;Genentech;NYU Grossman School of Medicine;Genentech", "aff_domain": "nyu.edu;gene.com;nyulangone.org;gene.com", "position": "PhD student;Intern;Associate Professor;Senior Director of Frontier Research", "bibtex": "@inproceedings{\nmadaan2024jointly,\ntitle={Jointly Modeling Inter- \\& Intra-Modality Dependencies for Multi-modal Learning},\nauthor={Divyam Madaan and Taro Makino and Sumit Chopra and Kyunghyun Cho},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=XAKALzI3Gw}\n}", "github": "", "reviewers": "qKeb;bvvY;nGTV;Y8eL;vide", "pdf_size": 3174454, "rating": "5;5;6;6;6", "confidence": "2;3;2;3;3", "soundness": "2;2;3;3;2", "novelty": "3;3;3;2;2", "presentation": "3;3;3;2;3", "wc_summary": "59;99;71;66;149", "wc_strengths": "85;57;32;18;47", "wc_weaknesses": "2;52;64;65;130", "wc_questions": "197;45;3;96;34", "wc_limitations": "2;44;9;1;6", "wc_review": "345;297;179;246;366", "wc_reply_reviewers": "396;0;0;12;10", "wc_reply_authors": "843;0;0;0;0", "reply_reviewers": "2;0;0;1;1", "reply_authors": "3;1;1;1;1", "rating_avg": [ 5.6, 0.48989794855663565 ], "confidence_avg": [ 2.6, 0.4898979485566356 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 88.8, 33.02362790488047 ], "wc_strengths_avg": [ 47.8, 22.833309002420126 ], "wc_weaknesses_avg": [ 62.6, 40.82940117121485 ], "wc_questions_avg": [ 75.0, 67.95586803212802 ], "wc_limitations_avg": [ 12.4, 16.05739704933524 ], "wc_review_avg": [ 286.6, 67.89580252121628 ], "wc_reply_reviewers_avg": [ 83.6, 156.27872535953188 ], "wc_reply_authors_avg": [ 168.6, 337.2 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.4, 0.8000000000000002 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.16666666666666666, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4178602553118749607&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 2, "email": "nyu.edu;gene.com;nyulangone.org;gene.com", "author_num": 4, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "New York University;Genentech;New York University Grossman School of Medicine", "aff_unique_dep": ";;School of Medicine", "aff_unique_url": "https://www.nyu.edu;https://www.genentech.com;https://med.nyu.edu", "aff_unique_abbr": "NYU;Genentech;NYU Grossman SOM", "aff_campus_unique_index": "1", "aff_campus_unique": ";New York", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "ChronoMagic-Bench: A Benchmark for Metamorphic Evaluation of Text-to-Time-lapse Video Generation", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97648", "id": "XBcStBjBIE", "proceeding": "", "pdf": "https://openreview.net/pdf?id=XBcStBjBIE", "openreview": "https://openreview.net/forum?id=XBcStBjBIE", "poster": "/media/PosterPDFs/NeurIPS%202024/97648.png?t=1730081146.3092139", "project": "", "author_site": "Shenghai Yuan, Jinfa Huang, Yongqi Xu, YaoYang Liu, Shaofeng Zhang, Yujun Shi, Rui-Jie Zhu, Xinhua Cheng, Jiebo Luo, Li Yuan", "tldr": "", "abstract": "We propose a novel text-to-video (T2V) generation benchmark, *ChronoMagic-Bench*, to evaluate the temporal and metamorphic knowledge skills in time-lapse video generation of the T2V models (e.g. Sora and Lumiere). Compared to existing benchmarks that focus on visual quality and text relevance of generated videos, *ChronoMagic-Bench* focuses on the models\u2019 ability to generate time-lapse videos with significant metamorphic amplitude and temporal coherence. The benchmark probes T2V models for their physics, biology, and chemistry capabilities, in a free-form text control. For these purposes, *ChronoMagic-Bench* introduces **1,649** prompts and real-world videos as references, categorized into four major types of time-lapse videos: biological, human creation, meteorological, and physical phenomena, which are further divided into 75 subcategories. This categorization ensures a comprehensive evaluation of the models\u2019 capacity to handle diverse and complex transformations. To accurately align human preference on the benchmark, we introduce two new automatic metrics, MTScore and CHScore, to evaluate the videos' metamorphic attributes and temporal coherence. MTScore measures the metamorphic amplitude, reflecting the degree of change over time, while CHScore assesses the temporal coherence, ensuring the generated videos maintain logical progression and continuity. Based on the *ChronoMagic-Bench*, we conduct comprehensive manual evaluations of eighteen representative T2V models, revealing their strengths and weaknesses across different categories of prompts, providing a thorough evaluation framework that addresses current gaps in video generation research. More encouragingly, we create a large-scale *ChronoMagic-Pro* dataset, containing **460k** high-quality pairs of 720p time-lapse videos and detailed captions. Each caption ensures high physical content and large metamorphic amplitude, which have a far-reaching impact on the video generation community. The source data and code are publicly available on [https://pku-yuangroup.github.io/ChronoMagic-Bench](https://pku-yuangroup.github.io/ChronoMagic-Bench).", "keywords": "metamorphic;time-lapse;text-to-video generation;diffusion", "primary_area": "", "supplementary_material": "/attachment/4280c70b6693db84d7d63190f77fef6f2a6bbac9.pdf", "author": "Shenghai Yuan;Jinfa Huang;Yongqi Xu;YaoYang Liu;Shaofeng Zhang;Yujun Shi;Rui-Jie Zhu;Xinhua Cheng;Jiebo Luo;Li Yuan", "authorids": "~Shenghai_Yuan2;~Jinfa_Huang2;~Yongqi_Xu1;~YaoYang_Liu1;~Shaofeng_Zhang1;~Yujun_Shi1;~Rui-Jie_Zhu2;~Xinhua_Cheng1;~Jiebo_Luo1;~Li_Yuan2", "gender": ";M;M;M;M;M;M;M;;", "homepage": ";https://github.com/inFaaa;https://github.com/Cheliosoops?tab=repositories;https://lazysheeeeeep.github.io/;https://sherrylone.github.io;https://yujun-shi.github.io/;https://ruijie-zhu.github.io;https://cxh0519.github.io/;;", "dblp": ";39/9426;;;132/2540;146/4499;317/4836;260/2943;;", "google_scholar": ";https://scholar.google.com/citations?hl=en;https://scholar.google.com.hk/citations?user=1rz3QEcAAAAJ;;VoVVJIgAAAAJ;Okeolr8AAAAJ;08ITzJsAAAAJ;NI4c3kcAAAAJ;;", "orcid": ";;;;;;;;;", "linkedin": ";;;;;;;;;", "or_profile": "~Shenghai_Yuan2;~Jinfa_Huang2;~Yongqi_Xu1;~YaoYang_Liu1;~Shaofeng_Zhang1;~Yujun_Shi1;~Rui-Jie_Zhu2;~Xinhua_Cheng1;~Jiebo_Luo1;~Li_Yuan2", "aff": ";University of Rochester;Guangdong University of Technology;University of Science and Technology of China;Shanghai Jiaotong University;National University of Singapore;University of California, Santa Cruz;Peking University;;", "aff_domain": ";rochester.edu;gdut.edu.cn;ustc.edu.cn;sjtu.edu.cn;u.nus.edu;ucsc.edu;pku.edu.cn;;", "position": ";PhD student;Undergrad student;Undergrad student;PhD student;PhD student;PhD student;PhD student;;", "bibtex": "@inproceedings{\nyuan2024chronomagicbench,\ntitle={ChronoMagic-Bench: A Benchmark for Metamorphic Evaluation of Text-to-Time-lapse Video Generation},\nauthor={Shenghai Yuan and Jinfa Huang and Yongqi Xu and YaoYang Liu and Shaofeng Zhang and Yujun Shi and Rui-Jie Zhu and Xinhua Cheng and Jiebo Luo and Li Yuan},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=XBcStBjBIE}\n}", "github": "", "reviewers": "ntFp;qu3H;4KT6;JSoz", "pdf_size": 11134674, "rating": "7;8;8;8", "confidence": "4;5;4;5", "wc_summary_and_contributions": "115;79;57;70", "wc_strengths": "131;90;51;4", "wc_improvement": "644;93;24;697", "wc_limitations": "17;94;24;25", "wc_correctness": "15;70;21;1", "wc_clarity": "19;74;1;69", "wc_relation_to_prior_work": "12;59;1;1", "wc_documentation": "39;41;1;14", "wc_additional_feedback": "1;1;1;1", "wc_review": "993;601;181;882", "wc_reply_reviewers": "70;0;10;211", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "2;0;1;3", "reply_authors": "4;3;1;9", "rating_avg": [ 7.75, 0.4330127018922193 ], "confidence_avg": [ 4.5, 0.5 ], "wc_summary_and_contributions_avg": [ 80.25, 21.533404282648853 ], "wc_strengths_avg": [ 69.0, 46.994680550036726 ], "wc_improvement_avg": [ 364.5, 307.5422735169915 ], "wc_limitations_avg": [ 40.0, 31.32890039564108 ], "wc_correctness_avg": [ 26.75, 26.00360551923521 ], "wc_clarity_avg": [ 40.75, 31.45135132232 ], "wc_relation_to_prior_work_avg": [ 18.25, 23.951774464536026 ], "wc_documentation_avg": [ 23.75, 16.90229274388537 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 664.25, 313.4576327033687 ], "wc_reply_reviewers_avg": [ 72.75, 84.18840478355675 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.5, 1.118033988749895 ], "reply_authors_avg": [ 4.25, 2.947456530637899 ], "replies_avg": [ 33, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17923982999262239552&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": ";rochester.edu;gdut.edu.cn;ustc.edu.cn;sjtu.edu.cn;u.nus.edu;ucsc.edu;pku.edu.cn;;", "author_num": 10, "aff_unique_index": "0;1;2;3;4;5;6", "aff_unique_norm": "University of Rochester;Guangdong University of Technology;University of Science and Technology of China;Shanghai Jiao Tong University;National University of Singapore;University of California, Santa Cruz;Peking University", "aff_unique_dep": ";;;;;;", "aff_unique_url": "https://www.rochester.edu;http://www.gdut.edu.cn;http://www.ustc.edu.cn;https://www.sjtu.edu.cn;https://www.nus.edu.sg;https://www.ucsc.edu;http://www.pku.edu.cn", "aff_unique_abbr": "U of R;GDUT;USTC;SJTU;NUS;UCSC;Peking U", "aff_campus_unique_index": "1", "aff_campus_unique": ";Santa Cruz", "aff_country_unique_index": "0;1;1;1;2;0;1", "aff_country_unique": "United States;China;Singapore" }, { "title": "Non-asymptotic Approximation Error Bounds of Parameterized Quantum Circuits", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94787", "id": "XCkII8nCt3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=XCkII8nCt3", "openreview": "https://openreview.net/forum?id=XCkII8nCt3", "poster": "/media/PosterPDFs/NeurIPS%202024/94787.png?t=1731811220.2200806", "project": "", "author_site": "Zhan Yu, Qiuhao Chen, Yuling Jiao, Yinan Li, Xiliang Lu, Xin Wang, Jerry Yang", "tldr": "", "abstract": "Understanding the power of parameterized quantum circuits (PQCs) in accomplishing machine learning tasks is one of the most important questions in quantum machine learning. In this paper, we focus on the PQC expressivity for general multivariate function classes. Previously established Universal Approximation Theorems for PQCs are either nonconstructive or assisted with parameterized classical data processing, making it hard to justify whether the expressive power comes from the classical or quantum parts. We explicitly construct data re-uploading PQCs for approximating multivariate polynomials and smooth functions and establish the first non-asymptotic approximation error bounds for such functions in terms of the number of qubits, the quantum circuit depth and the number of trainable parameters of the PQCs. Notably, we show that for multivariate polynomials and multivariate smooth functions, the quantum circuit size and the number of trainable parameters of our proposed PQCs can be smaller than the deep ReLU neural networks. We further demonstrate the approximation capability of PQCs via numerical experiments. Our results pave the way for designing practical PQCs that can be implemented on near-term quantum devices with limited resources.", "keywords": "quantum machine learning;parameterized quantum circuits;expressivity;universal approximation;approximation error bound", "primary_area": "learning_theory", "supplementary_material": "/attachment/ff32f4d94c528a4c7b13ae0322279211e2b32cf0.zip", "author": "Zhan Yu;Qiuhao Chen;Yuling Jiao;Yinan Li;Xiliang Lu;Xin Wang;Jerry Zhijian Yang", "authorids": "~Zhan_Yu3;~Qiuhao_Chen1;~Yuling_Jiao1;~Yinan_Li7;~Xiliang_Lu1;~Xin_Wang48;~Jerry_Zhijian_Yang1", "gender": "M;M;M;M;M;M;M", "homepage": ";;https://jszy.whu.edu.cn/jiaoyuling/en/index.htm;https://www.yinanli.com/;http://jszy.whu.edu.cn/lvxiliang/en/index/275082/list/index.htm;https://www.xinwang.info/;http://zjyang.whu.edu.cn", "dblp": ";;136/7658;;70/7812;10/5630-22;11/9336", "google_scholar": "cF8RgGwAAAAJ;;yFDDsVgAAAAJ;;https://scholar.google.com/citations?hl=en;BFkAPOQAAAAJ;", "orcid": ";0009-0001-3795-4380;;;0000-0002-7592-5994;0000-0002-0641-3186;", "linkedin": ";;;;;;", "or_profile": "~Zhan_Yu3;~Qiuhao_Chen1;~Yuling_Jiao1;~Yinan_Li7;~Xiliang_Lu1;~Xin_Wang48;~Jerry_Zhijian_Yang1", "aff": "National University of Singapore;Wuhan University;Wuhan University;Wuhan University;Wuhan University;Hong Kong University of Science and Technology;Wuhan University", "aff_domain": "nus.edu;whu.edu.cn;whu.edu.cn;whu.edu;whu.edu.cn;hkust-gz.edu.cn;whu.edu.cn", "position": "PhD student;PhD student;Full Professor;Assistant Professor;Full Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nyu2024nonasymptotic,\ntitle={Non-asymptotic Approximation Error Bounds of Parameterized Quantum Circuits},\nauthor={Zhan Yu and Qiuhao Chen and Yuling Jiao and Yinan Li and Xiliang Lu and Xin Wang and Jerry Zhijian Yang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=XCkII8nCt3}\n}", "github": "", "reviewers": "5eFM;VAJH;joax;xe2x", "pdf_size": 1268809, "rating": "7;7;7;7", "confidence": "4;3;5;3", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "2;4;3;3", "wc_summary": "53;51;48;75", "wc_strengths": "31;40;42;120", "wc_weaknesses": "200;3;82;75", "wc_questions": "4;205;58;147", "wc_limitations": "2;1;31;10", "wc_review": "290;300;261;427", "wc_reply_reviewers": "11;3;15;8", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 56.75, 10.685855136581255 ], "wc_strengths_avg": [ 58.25, 35.891329036412124 ], "wc_weaknesses_avg": [ 90.0, 70.63639288638683 ], "wc_questions_avg": [ 103.5, 77.72547844818969 ], "wc_limitations_avg": [ 11.0, 12.062338081814818 ], "wc_review_avg": [ 319.5, 63.69654621720082 ], "wc_reply_reviewers_avg": [ 9.25, 4.380353866983808 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13639398391211803720&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "nus.edu;whu.edu.cn;whu.edu.cn;whu.edu;whu.edu.cn;hkust-gz.edu.cn;whu.edu.cn", "author_num": 7, "aff_unique_index": "0;1;1;1;1;2;1", "aff_unique_norm": "National University of Singapore;Wuhan University;Hong Kong University of Science and Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.nus.edu.sg;http://www.whu.edu.cn/;https://www.ust.hk", "aff_unique_abbr": "NUS;WHU;HKUST", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;1;1;1;1;1", "aff_country_unique": "Singapore;China" }, { "title": "Prospective Learning: Learning for a Dynamic Future", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94786", "id": "XEbPJUQzs3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=XEbPJUQzs3", "openreview": "https://openreview.net/forum?id=XEbPJUQzs3", "poster": "", "project": "", "author_site": "Ashwin De Silva, Rahul Ramesh, Rubing Yang, Siyu Yu, Joshua T Vogelstein, Pratik Chaudhari", "tldr": "", "abstract": "In real-world applications, the distribution of the data, and our goals, evolve over time. The prevailing theoretical framework for studying machine learning, namely probably approximately correct (PAC) learning, largely ignores time. As a consequence, existing strategies to address the dynamic nature of data and goals exhibit poor real-world performance. This paper develops a theoretical framework called\n\"Prospective Learning\" that is tailored for situations when the optimal hypothesis changes over time. In PAC learning, empirical risk minimization (ERM) is known to be consistent. We develop a learner called Prospective ERM, which returns a sequence of predictors that make predictions on future data. We prove that the risk of prospective ERM converges to the Bayes risk under certain assumptions on the stochastic process generating the data. Prospective ERM, roughly speaking, incorporates time as an input in addition to the data. We show that standard ERM as done in PAC learning, without incorporating time, can result in failure to learn when distributions are dynamic. Numerical experiments illustrate that prospective ERM can learn synthetic and visual recognition problems constructed from MNIST and CIFAR-10. Code at https://github.com/neurodata/prolearn.", "keywords": "Distribution Shifts;Learning Theory", "primary_area": "learning_theory", "supplementary_material": "", "author": "Ashwin De Silva;Rahul Ramesh;Rubing Yang;Siyu Yu;Joshua T Vogelstein;Pratik Chaudhari", "authorids": "~Ashwin_De_Silva1;~Rahul_Ramesh2;~Rubing_Yang1;~Siyu_Yu3;~Joshua_T_Vogelstein1;~Pratik_Chaudhari1", "gender": "M;M;F;F;M;M", "homepage": "https://laknath1996.github.io;https://cis.upenn.edu/~rahulram;https://www.linkedin.com/in/rubing-yang-b4383a149/;;https://neurodata.io/;https://pratikac.github.io/", "dblp": "270/4302;168/7029;304/8991;;04/700;", "google_scholar": "xqhwEGIAAAAJ;wCa6nygAAAAJ;;;DWPfdT4AAAAJ;c_z5hWEAAAAJ", "orcid": "0000-0002-6406-7090;;;0009-0001-1459-2567;0000-0003-2487-6237;", "linkedin": "ashwin-de-silva-6852b14b/;;;;jovo1/;pratik-chaudhari-59508765", "or_profile": "~Ashwin_De_Silva1;~Rahul_Ramesh2;~Rubing_Yang1;~Siyu_Yu3;~Joshua_T_Vogelstein1;~Pratik_Chaudhari1", "aff": "Johns Hopkins University;University of Pennsylvania;University of Pennsylvania;Johns Hopkins University;Johns Hopkins University;School of Engineering and Applied Science, University of Pennsylvania", "aff_domain": "jhu.edu;upenn.edu;upenn.edu;jhu.edu;jhu.edu;seas.upenn.edu", "position": "PhD student;PhD student;PhD student;MS student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nsilva2024prospective,\ntitle={Prospective Learning: Learning for a Dynamic Future},\nauthor={Ashwin De Silva and Rahul Ramesh and Rubing Yang and Siyu Yu and Joshua T Vogelstein and Pratik Chaudhari},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=XEbPJUQzs3}\n}", "github": "", "reviewers": "2cje;wUXj;Zv5g;NX2Q", "pdf_size": 2782463, "rating": "5;6;7;7", "confidence": "2;3;3;4", "soundness": "3;3;3;4", "novelty": "3;2;3;3", "presentation": "3;3;2;4", "wc_summary": "222;64;155;66", "wc_strengths": "30;40;198;130", "wc_weaknesses": "28;70;134;142", "wc_questions": "53;30;369;110", "wc_limitations": "31;1;8;40", "wc_review": "364;205;864;488", "wc_reply_reviewers": "0;15;305;25", "wc_reply_authors": "283;0;1223;0", "reply_reviewers": "0;1;2;1", "reply_authors": "2;1;3;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 126.75, 66.14142045647341 ], "wc_strengths_avg": [ 99.5, 68.92568461756474 ], "wc_weaknesses_avg": [ 93.5, 46.997340350279394 ], "wc_questions_avg": [ 140.5, 135.10088822802018 ], "wc_limitations_avg": [ 20.0, 16.015617378046965 ], "wc_review_avg": [ 480.25, 243.2081156129458 ], "wc_reply_reviewers_avg": [ 86.25, 126.60840216984022 ], "wc_reply_authors_avg": [ 376.5, 502.1974213394569 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.8528028654224417, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:QVKhdumQGmgJ:scholar.google.com/&scioq=Prospective+Learning:+Learning+for+a+Dynamic+Future&hl=en&as_sdt=0,33", "gs_version_total": 5, "email": "jhu.edu;upenn.edu;upenn.edu;jhu.edu;jhu.edu;seas.upenn.edu", "author_num": 6, "aff_unique_index": "0;1;1;0;0;1", "aff_unique_norm": "Johns Hopkins University;University of Pennsylvania", "aff_unique_dep": ";", "aff_unique_url": "https://www.jhu.edu;https://www.upenn.edu", "aff_unique_abbr": "JHU;UPenn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Textual Training for the Hassle-Free Removal of Unwanted Visual Data: Case Studies on OOD and Hateful Image Detection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94785", "id": "XErWgdxaFU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=XErWgdxaFU", "openreview": "https://openreview.net/forum?id=XErWgdxaFU", "poster": "/media/PosterPDFs/NeurIPS%202024/94785.png?t=1731143759.9292233", "project": "", "author_site": "Saehyung Lee, Jisoo Mok, Sangha Park, Yongho Shin, Dahuin Jung, Sungroh Yoon", "tldr": "", "abstract": "In our study, we explore methods for detecting unwanted content lurking in visual datasets. We provide a theoretical analysis demonstrating that a model capable of successfully partitioning visual data can be obtained using only textual data. Based on the analysis, we propose Hassle-Free Textual Training (HFTT), a streamlined method capable of acquiring detectors for unwanted visual content, using only textual data in conjunction with pre-trained vision-language models. HFTT features an innovative objective function that significantly reduces the necessity for human involvement in data annotation. Furthermore, HFTT employs a clever textual data synthesis method, effectively emulating the integration of unknown visual data distribution into the training process at no extra cost. The unique characteristics of HFTT extend its utility beyond traditional out-of-distribution detection, making it applicable to tasks that address more abstract concepts. We complement our analyses with experiments in hateful image detection and out-of-distribution detection. Our codes are available at https://github.com/HFTT-anonymous/HFTT.", "keywords": "Vision-Language models;Multimodal models;CLIP;unwanted visual data detection;text-only training;hateful image detection", "primary_area": "other", "supplementary_material": "", "author": "Saehyung Lee;Jisoo Mok;Sangha Park;Yongho Shin;Dahuin Jung;Sungroh Yoon", "authorids": "~Saehyung_Lee1;~Jisoo_Mok1;~Sangha_Park2;~Yongho_Shin3;~Dahuin_Jung2;~Sungroh_Yoon1", "gender": "M;F;;M;F;", "homepage": ";;;https://kr.linkedin.com/in/yongho-shin-08b658139;https://hai.ssu.ac.kr/;http://ailab.snu.ac.kr", "dblp": "260/0442;294/8666;311/1675;;224/0158;99/1474", "google_scholar": "nS24h74AAAAJ;LZP5k2cAAAAJ;https://scholar.google.com/citations?hl=ko;;https://scholar.google.co.kr/citations?user=wleS-UQAAAAJ;Bphl_fIAAAAJ", "orcid": ";0000-0001-7002-0275;;;;0000-0002-2367-197X", "linkedin": ";;;yongho-shin-08b658139;;", "or_profile": "~Saehyung_Lee1;~Jisoo_Mok1;~Sangha_Park2;~Yongho_Shin3;~Dahuin_Jung2;~Sungroh_Yoon1", "aff": "Adobe Systems;Seoul National University;Seoul National University;Qualcomm Inc, QualComm;Seoul National University;Seoul National University", "aff_domain": "adobe.com;snu.ac.kr;snu.ac.kr;qti.qualcomm.com;snu.ac.kr;snu.ac.kr", "position": "Intern;PhD student;PhD student;Senior staff engineer;PhD student;Full Professor", "bibtex": "@inproceedings{\nlee2024textual,\ntitle={Textual Training for the Hassle-Free Removal of Unwanted Visual Data: Case Studies on {OOD} and Hateful Image Detection},\nauthor={Saehyung Lee and Jisoo Mok and Sangha Park and Yongho Shin and Dahuin Jung and Sungroh Yoon},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=XErWgdxaFU}\n}", "github": "", "reviewers": "kqnV;QxHY;LurW;CzuR", "pdf_size": 1698583, "rating": "6;6;6;6", "confidence": "3;4;3;3", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;4;3", "wc_summary": "56;75;106;63", "wc_strengths": "68;70;67;40", "wc_weaknesses": "63;186;19;151", "wc_questions": "9;31;176;47", "wc_limitations": "5;27;6;4", "wc_review": "201;389;374;305", "wc_reply_reviewers": "30;29;35;43", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 75.0, 19.144189719076646 ], "wc_strengths_avg": [ 61.25, 12.316147936753602 ], "wc_weaknesses_avg": [ 104.75, 66.77714803733384 ], "wc_questions_avg": [ 65.75, 65.06679260575244 ], "wc_limitations_avg": [ 10.5, 9.5524865872714 ], "wc_review_avg": [ 317.25, 74.21716445674815 ], "wc_reply_reviewers_avg": [ 34.25, 5.539629951540085 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:NHZs2iu4OqYJ:scholar.google.com/&scioq=Textual+Training+for+the+Hassle-Free+Removal+of+Unwanted+Visual+Data:+Case+Studies+on+OOD+and+Hateful+Image+Detection&hl=en&as_sdt=0,33", "gs_version_total": 2, "email": "adobe.com;snu.ac.kr;snu.ac.kr;qti.qualcomm.com;snu.ac.kr;snu.ac.kr", "author_num": 6, "aff_unique_index": "0;1;1;2;1;1", "aff_unique_norm": "Adobe;Seoul National University;Qualcomm Incorporated", "aff_unique_dep": "Adobe Systems Incorporated;;", "aff_unique_url": "https://www.adobe.com;https://www.snu.ac.kr;https://www.qualcomm.com", "aff_unique_abbr": "Adobe;SNU;Qualcomm", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0;1;1", "aff_country_unique": "United States;South Korea" }, { "title": "fMRI predictors based on language models of increasing complexity recover brain left lateralization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94784", "id": "XF1jpo5k6l", "proceeding": "", "pdf": "https://openreview.net/pdf?id=XF1jpo5k6l", "openreview": "https://openreview.net/forum?id=XF1jpo5k6l", "poster": "/media/PosterPDFs/NeurIPS%202024/94784.png?t=1731492244.109353", "project": "", "author_site": "Laurent Bonnasse-Gahot, Christophe Pallier", "tldr": "", "abstract": "Over the past decade, studies of naturalistic language processing where participants are scanned while listening to continuous text have flourished. Using word embeddings at first, then large language models, researchers have created encoding models to analyze the brain signals. Presenting these models with the same text as the participants allows to identify brain areas where there is a significant correlation between the functional magnetic resonance imaging (fMRI) time series and the ones predicted by the models' artificial neurons. One intriguing finding from these studies is that they have revealed highly symmetric bilateral activation patterns, somewhat at odds with the well-known left lateralization of language processing. Here, we report analyses of an fMRI dataset where we manipulate the complexity of large language models, testing 28 pretrained models from 8 different families, ranging from 124M to 14.2B parameters. First, we observe that the performance of models in predicting brain responses follows a scaling law, where the fit with brain activity increases linearly with the logarithm of the number of parameters of the model (and its performance on natural language processing tasks). Second, although this effect is present in both hemispheres, it is stronger in the left than in the right hemisphere. Specifically, the left-right difference in brain correlation follows a scaling law with the number of parameters. This finding reconciles computational analyses of brain activity using large language models with the classic observation from aphasic patients showing left hemisphere dominance for language.", "keywords": "large language models;brain lateralization;neuroscience;language processing;fMRI;scaling laws", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "", "author": "Laurent Bonnasse-Gahot;Christophe Pallier", "authorids": "~Laurent_Bonnasse-Gahot1;~Christophe_Pallier1", "gender": ";M", "homepage": ";http://www.pallier.org", "dblp": ";", "google_scholar": ";NlAgDhcAAAAJ", "orcid": ";", "linkedin": ";https://fr.linkedin.com/in/christophe-pallier-60332151", "or_profile": "~Laurent_Bonnasse-Gahot1;~Christophe_Pallier1", "aff": ";Centre National de la Recherche Scientifique", "aff_domain": ";cnrs.fr", "position": ";Researcher", "bibtex": "@inproceedings{\nbonnasse-gahot2024fmri,\ntitle={f{MRI} predictors based on language models of increasing complexity recover brain left lateralization},\nauthor={Laurent Bonnasse-Gahot and Christophe Pallier},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=XF1jpo5k6l}\n}", "github": "", "reviewers": "LDde;Wj1c;d74H;jQCd", "pdf_size": 4781774, "rating": "6;6;7;7", "confidence": "4;4;5;5", "soundness": "2;3;3;3", "novelty": "2;1;3;3", "presentation": "3;1;4;3", "wc_summary": "68;52;84;99", "wc_strengths": "16;47;60;176", "wc_weaknesses": "380;376;96;248", "wc_questions": "34;23;143;12", "wc_limitations": "12;12;102;18", "wc_review": "510;510;485;553", "wc_reply_reviewers": "616;155;128;33", "wc_reply_authors": "270;127;159;59", "reply_reviewers": "2;1;2;1", "reply_authors": "3;2;2;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 75.75, 17.55526986406076 ], "wc_strengths_avg": [ 74.75, 60.602702084973075 ], "wc_weaknesses_avg": [ 275.0, 116.18519699169941 ], "wc_questions_avg": [ 53.0, 52.54046059942756 ], "wc_limitations_avg": [ 36.0, 38.18376618407357 ], "wc_review_avg": [ 514.5, 24.45914961727002 ], "wc_reply_reviewers_avg": [ 233.0, 225.7199592415345 ], "wc_reply_authors_avg": [ 153.75, 76.21474594853676 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17194379722111398513&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": ";cnrs.fr", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Centre National de la Recherche Scientifique", "aff_unique_dep": "", "aff_unique_url": "https://www.cnrs.fr", "aff_unique_abbr": "CNRS", "aff_country_unique_index": "0", "aff_country_unique": "France" }, { "title": "Detecting Brittle Decisions for Free: Leveraging Margin Consistency in Deep Robust Classifiers", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94783", "id": "XHCYZNmqnv", "proceeding": "", "pdf": "https://openreview.net/pdf?id=XHCYZNmqnv", "openreview": "https://openreview.net/forum?id=XHCYZNmqnv", "poster": "/media/PosterPDFs/NeurIPS%202024/94783.png?t=1732955252.4076495", "project": "", "author_site": "JONAS NGNAWE, Sabyasachi Sahoo, Yann Pequignot, Frederic Precioso, Christian Gagn\u00e9", "tldr": "", "abstract": "Despite extensive research on adversarial training strategies to improve robustness, the decisions of even the most robust deep learning models can still be quite sensitive to imperceptible perturbations, creating serious risks when deploying them for high-stakes real-world applications. While detecting such cases may be critical, evaluating a model's vulnerability at a per-instance level using adversarial attacks is computationally too intensive and unsuitable for real-time deployment scenarios. The input space margin is the exact score to detect non-robust samples and is intractable for deep neural networks. This paper introduces the concept of margin consistency -- a property that links the input space margins and the logit margins in robust models -- for efficient detection of vulnerable samples. First, we establish that margin consistency is a necessary and sufficient condition to use a model's logit margin as a score for identifying non-robust samples. Next, through comprehensive empirical analysis of various robustly trained models on CIFAR10 and CIFAR100 datasets, we show that they indicate high margin consistency with a strong correlation between their input space margins and the logit margins. Then, we show that we can effectively use the logit margin to confidently detect brittle decisions with such models. Finally, we address cases where the model is not sufficiently margin-consistent by learning a pseudo-margin from the feature representation. Our findings highlight the potential of leveraging deep representations to efficiently assess adversarial vulnerability in deployment scenarios.", "keywords": "adversarial robustness;empirical robustness estimation;classification;vulnerability detection", "primary_area": "machine_vision", "supplementary_material": "", "author": "Jonas Ngnawe;Sabyasachi Sahoo;Yann Batiste Pequignot;Frederic Precioso;Christian Gagn\u00e9", "authorids": "~Jonas_Ngnawe1;~Sabyasachi_Sahoo1;~Yann_Batiste_Pequignot1;~Frederic_Precioso2;~Christian_Gagn\u00e91", "gender": "M;M;M;M;M", "homepage": "https://ngnawejonas.github.io/;https://sabyasachis.github.io;https://www.irif.fr/~pequignot/;http://vision.gel.ulaval.ca/~cgagne/english.html;https://www.i3s.unice.fr/~precioso/", "dblp": "344/5089;293/8579.html;165/8856;80/5084-1;83/1407.html", "google_scholar": "KwAxSFsAAAAJ;https://scholar.google.com/citations?hl=en;Ft1XWI4AAAAJ;https://scholar.google.ca/citations?user=egixsbEAAAAJ;-0cKTucAAAAJ", "orcid": ";;0000-0003-1691-1020;0000-0003-3697-4184;0000-0001-8712-1443", "linkedin": "jonas-ngnawe-bb7712a2/;sabyasachi-sahoo-872789b6/;;;fr%C3%A9d%C3%A9ric-precioso-3a37389/", "or_profile": "~Jonas_Ngnawe1;~Sabyasachi_Sahoo1;~Yann_Batiste_Pequignot1;~Christian_Gagn\u00e91;~Frederic_Precioso1", "aff": "Mila-Quebec AI Institute;Mila;Universite Laval, Universit\u00e9 Laval;Universit\u00e9 Laval;Universit\u00e9 de Nice-Sophia Antipolis", "aff_domain": "mila.quebec;mila.quebec;ift.ulaval.ca;ulaval.ca;unice.fr", "position": "PhD student;PhD student;Researcher;Full Professor;Full Professor", "bibtex": "@inproceedings{\nngnawe2024detecting,\ntitle={Detecting Brittle Decisions for Free: Leveraging Margin Consistency in Deep Robust Classifiers},\nauthor={Jonas Ngnawe and Sabyasachi Sahoo and Yann Batiste Pequignot and Frederic Precioso and Christian Gagn{\\'e}},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=XHCYZNmqnv}\n}", "github": "", "reviewers": "WVo4;jLJt;utdZ;4V5x", "pdf_size": 870582, "rating": "5;6;7;7", "confidence": "4;3;4;4", "soundness": "2;3;4;3", "novelty": "2;2;3;4", "presentation": "3;3;3;3", "wc_summary": "39;138;68;159", "wc_strengths": "54;132;208;93", "wc_weaknesses": "147;162;78;91", "wc_questions": "114;68;88;726", "wc_limitations": "2;6;1;15", "wc_review": "356;506;443;1084", "wc_reply_reviewers": "987;49;25;170", "wc_reply_authors": "1467;31;0;0", "reply_reviewers": "4;1;1;1", "reply_authors": "4;2;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 101.0, 49.15790882452182 ], "wc_strengths_avg": [ 121.75, 56.92264487881778 ], "wc_weaknesses_avg": [ 119.5, 35.69663849720307 ], "wc_questions_avg": [ 249.0, 275.87859648765794 ], "wc_limitations_avg": [ 6.0, 5.522680508593631 ], "wc_review_avg": [ 597.25, 286.027424384446 ], "wc_reply_reviewers_avg": [ 307.75, 395.99708016600323 ], "wc_reply_authors_avg": [ 374.5, 630.8821205264895 ], "reply_reviewers_avg": [ 1.75, 1.299038105676658 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:cizAlT4hY3MJ:scholar.google.com/&scioq=Detecting+Brittle+Decisions+for+Free:+Leveraging+Margin+Consistency+in+Deep+Robust+Classifiers&hl=en&as_sdt=0,33", "gs_version_total": 10, "email": "mila.quebec;mila.quebec;ift.ulaval.ca;ulaval.ca;unice.fr", "author_num": 5, "aff_unique_index": "0;1;2;2;3", "aff_unique_norm": "Mila-Quebec AI Institute;Mila;Universit\u00e9 Laval;Universit\u00e9 de Nice-Sophia Antipolis", "aff_unique_dep": "AI Institute;Quebec Artificial Intelligence Institute;;", "aff_unique_url": "https://mila.quebec;https://mila.quebec;https://www.ulaval.ca;https://www.unice.fr", "aff_unique_abbr": "Mila;Mila;ULaval;UNICA", "aff_campus_unique_index": "1", "aff_campus_unique": ";Sophia Antipolis", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "Canada;France" }, { "title": "Absorb & Escape: Overcoming Single Model Limitations in Generating Heterogeneous Genomic Sequences", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94782", "id": "XHTl2k1LYk", "proceeding": "", "pdf": "https://openreview.net/pdf?id=XHTl2k1LYk", "openreview": "https://openreview.net/forum?id=XHTl2k1LYk", "poster": "/media/PosterPDFs/NeurIPS%202024/94782.png?t=1731475045.857268", "project": "", "author_site": "Zehui Li, Yuhao Ni, Guoxuan Xia, William Beardall, Akashaditya Das, Guy-Bart Stan, Yiren Zhao", "tldr": "", "abstract": "Recent advances in immunology and synthetic biology have accelerated the development of deep generative methods for DNA sequence design. Two dominant approaches in this field are AutoRegressive (AR) models and Diffusion Models (DMs). However, genomic sequences are functionally heterogeneous, consisting of multiple connected regions (e.g., Promoter Regions, Exons, and Introns) where elements within each region come from the same probability distribution, but the overall sequence is non-homogeneous. This heterogeneous nature presents challenges for a single model to accurately generate genomic sequences. In this paper, we analyze the properties of AR models and DMs in heterogeneous genomic sequence generation, pointing out crucial limitations in both methods: (i) AR models capture the underlying distribution of data by factorizing and learning the transition probability but fail to capture the global property of DNA sequences. (ii) DMs learn to recover the global distribution but tend to produce errors at the base pair level. To overcome the limitations of both approaches, we propose a post-training sampling method, termed Absorb & Escape (A&E) to perform compositional generation from AR models and DMs. This approach starts with samples generated by DMs and refines the sample quality using an AR model through the alternation of the Absorb and Escape steps. To assess the quality of generated sequences, we conduct extensive experiments on 15 species for conditional and unconditional DNA generation. The experiment results from motif distribution, diversity checks, and genome integration tests unequivocally show that A&E outperforms state-of-the-art AR models and DMs in genomic sequence generation. A&E does not suffer from the slowness of traditional MCMC to sample from composed distributions with Energy-Based Models whilst it obtains higher quality samples than single models. Our research sheds light on the limitations of current single-model approaches in DNA generation and provides a simple but effective solution for heterogeneous sequence generation. Code is available at the [Github Repo](https://github.com/Zehui127/Absorb-Escape).", "keywords": "Computational Biology;Genomics;Deep Learning;Generative Model", "primary_area": "generative_models", "supplementary_material": "/attachment/f8dd08f64d637e9c7d9092554d8605da31681c4d.zip", "author": "Zehui Li;Yuhao Ni;Guoxuan Xia;William Beardall;Akashaditya Das;Guy-Bart Stan;Yiren Zhao", "authorids": "~Zehui_Li2;~Yuhao_Ni1;~Guoxuan_Xia1;~William_Beardall1;~Akashaditya_Das1;~Guy-Bart_Stan1;~Yiren_Zhao2", "gender": ";M;M;M;M;M;M", "homepage": "https://zehui127.github.io/;https://harryknee.github.io/;;;;https://gstan.bg-research.cc.ic.ac.uk/welcome.html;https://aaronzhao.me", "dblp": ";;306/7751;;;https://dblp.uni-trier.de/pid/52/7139.html;https://dblp.uni-trier.de/pers/hd/z/Zhao:Yiren", "google_scholar": ";https://scholar.google.co.uk/citations?user=lCa46OoAAAAJ;;;JWXXq70AAAAJ;s2xYpAYAAAAJ;lOOmgEgAAAAJ", "orcid": ";;;0000-0002-2421-7330;;0000-0002-5560-902X;", "linkedin": ";yuhao-ni/;guoxuan-xia-716062173/;;https://www.linkedin.com/feed/?trk=guest_homepage-basic_nav-header-signin;guystan/;yiren-aaron-zhao-baa8b5116/", "or_profile": "~Zehui_Li2;~Yuhao_Ni1;~Guoxuan_Xia1;~William_Beardall1;~Akashaditya_Das1;~Guy-Bart_Stan1;~Yiren_Zhao2", "aff": "Imperial College London;Imperial College London;Huawei Technologies Ltd.;Imperial College London;;Imperial College London;Imperial College London", "aff_domain": "ic.ac.uk;ic.ac.uk;huawei.com;ic.ac.uk;;imperial.ac.uk;ic.ac.uk", "position": "PhD student;Undergrad student;Intern;PhD student;;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nli2024absorb,\ntitle={Absorb \\& Escape: Overcoming Single Model Limitations in Generating Heterogeneous Genomic Sequences},\nauthor={Zehui Li and Yuhao Ni and Guoxuan Xia and William Beardall and Akashaditya Das and Guy-Bart Stan and Yiren Zhao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=XHTl2k1LYk}\n}", "github": "", "reviewers": "hZwW;Y7is;crFN;m7HE", "pdf_size": 4586105, "rating": "3;3;6;7", "confidence": "4;4;4;4", "soundness": "2;2;2;4", "novelty": "2;2;3;3", "presentation": "2;2;3;4", "wc_summary": "20;98;185;134", "wc_strengths": "32;36;196;58", "wc_weaknesses": "75;175;363;326", "wc_questions": "155;185;91;6", "wc_limitations": "26;1;58;62", "wc_review": "308;495;893;586", "wc_reply_reviewers": "0;0;262;26", "wc_reply_authors": "0;136;487;0", "reply_reviewers": "0;0;2;1", "reply_authors": "1;2;3;1", "rating_avg": [ 4.75, 1.7853571071357126 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 109.25, 60.08899649686288 ], "wc_strengths_avg": [ 80.5, 67.41476099490377 ], "wc_weaknesses_avg": [ 234.75, 116.0439033297312 ], "wc_questions_avg": [ 109.25, 68.60165814322566 ], "wc_limitations_avg": [ 36.75, 24.913600703230355 ], "wc_review_avg": [ 570.5, 211.45507797165808 ], "wc_reply_reviewers_avg": [ 72.0, 110.20889256316842 ], "wc_reply_authors_avg": [ 155.75, 199.14363534896114 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:hVwR8jN9gWUJ:scholar.google.com/&scioq=Absorb+%26+Escape:+Overcoming+Single+Model+Limitations+in+Generating+Heterogeneous+Genomic+Sequences&hl=en&as_sdt=0,33", "gs_version_total": 0, "email": "ic.ac.uk;ic.ac.uk;huawei.com;ic.ac.uk;;imperial.ac.uk;ic.ac.uk", "author_num": 7, "aff_unique_index": "0;0;1;0;0;0", "aff_unique_norm": "Imperial College London;Huawei", "aff_unique_dep": ";Huawei Technologies", "aff_unique_url": "https://www.imperial.ac.uk;https://www.huawei.com", "aff_unique_abbr": "ICL;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "United Kingdom;China" }, { "title": "Self-Refining Diffusion Samplers: Enabling Parallelization via Parareal Iterations", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94781", "id": "XHWkHFWi3k", "proceeding": "", "pdf": "https://openreview.net/pdf?id=XHWkHFWi3k", "openreview": "https://openreview.net/forum?id=XHWkHFWi3k", "poster": "", "project": "", "author_site": "Nikil Selvam, Amil Merchant, Stefano Ermon", "tldr": "", "abstract": "In diffusion models, samples are generated through an iterative refinement process, requiring hundreds of sequential model evaluations. Several recent methods have introduced approximations (fewer discretization steps or distillation) to trade off speed at the cost of sample quality. In contrast, we introduce Self-Refining Diffusion Samplers (SRDS) that retain sample quality and can improve latency at the cost of additional parallel compute. We take inspiration from the Parareal algorithm, a popular numerical method for parallel-in-time integration of differential equations. In SRDS, a quick but rough estimate of a sample is first created and then iteratively refined in parallel through Parareal iterations. SRDS is not only guaranteed to accurately solve the ODE and converge to the serial solution but also benefits from parallelization across the diffusion trajectory, enabling batched inference and pipelining. As we demonstrate for pre-trained diffusion models, the early convergence of this refinement procedure drastically reduces the number of steps required to produce a sample, speeding up generation for instance by up to 1.7x on a 25-step StableDiffusion-v2 benchmark and up to 4.3x on longer trajectories.", "keywords": "diffusion;sampling;parallel", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Nikil Roashan Selvam;Amil Merchant;Stefano Ermon", "authorids": "~Nikil_Roashan_Selvam1;~Amil_Merchant1;~Stefano_Ermon1", "gender": ";M;M", "homepage": ";https://scholar.google.com/citations?user=uRImMPoAAAAJ&hl=en;http://cs.stanford.edu/~ermon/", "dblp": ";;47/8135", "google_scholar": ";;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Nikil_Roashan_Selvam1;~Amil_Merchant1;~Stefano_Ermon1", "aff": ";Stanford University;Stanford University", "aff_domain": ";stanford.edu;stanford.edu", "position": ";PhD student;Associate Professor", "bibtex": "@inproceedings{\nselvam2024selfrefining,\ntitle={Self-Refining Diffusion Samplers: Enabling Parallelization via Parareal Iterations},\nauthor={Nikil Roashan Selvam and Amil Merchant and Stefano Ermon},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=XHWkHFWi3k}\n}", "github": "", "reviewers": "XqLH;KhBd;pK7d;MmLD;dCML", "pdf_size": 14460390, "rating": "3;6;6;6;6", "confidence": "4;5;3;4;3", "soundness": "2;2;3;3;3", "novelty": "2;2;3;3;2", "presentation": "1;3;3;3;3", "wc_summary": "40;89;63;61;87", "wc_strengths": "25;26;51;56;101", "wc_weaknesses": "215;352;69;79;107", "wc_questions": "95;184;47;35;110", "wc_limitations": "24;21;28;10;12", "wc_review": "399;672;258;241;417", "wc_reply_reviewers": "0;11;16;21;21", "wc_reply_authors": "0;19;21;16;16", "reply_reviewers": "0;1;1;1;1", "reply_authors": "1;2;2;2;2", "rating_avg": [ 5.4, 1.2 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.8000000000000002 ], "wc_summary_avg": [ 68.0, 18.2208671582886 ], "wc_strengths_avg": [ 51.8, 27.650678111033734 ], "wc_weaknesses_avg": [ 164.4, 107.1850735876969 ], "wc_questions_avg": [ 94.2, 53.0033961176074 ], "wc_limitations_avg": [ 19.0, 6.928203230275509 ], "wc_review_avg": [ 397.4, 154.71599787998653 ], "wc_reply_reviewers_avg": [ 13.8, 7.833262411026456 ], "wc_reply_authors_avg": [ 14.4, 7.445804187594514 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.8, 0.4000000000000001 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.1336306209562122, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4606010664967788032&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": ";stanford.edu;stanford.edu", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "MMSite: A Multi-modal Framework for the Identification of Active Sites in Proteins", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94780", "id": "XHdwlbNSVb", "proceeding": "", "pdf": "https://openreview.net/pdf?id=XHdwlbNSVb", "openreview": "https://openreview.net/forum?id=XHdwlbNSVb", "poster": "/media/PosterPDFs/NeurIPS%202024/94780.png?t=1731051477.894943", "project": "", "author_site": "Song Ouyang, Huiyu Cai, Yong Luo, Kehua Su, Lefei Zhang, Bo Du", "tldr": "", "abstract": "The accurate identification of active sites in proteins is essential for the advancement of life sciences and pharmaceutical development, as these sites are of critical importance for enzyme activity and drug design. Recent advancements in protein language models (PLMs), trained on extensive datasets of amino acid sequences, have significantly improved our understanding of proteins. However, compared to the abundant protein sequence data, functional annotations, especially precise per-residue annotations, are scarce, which limits the performance of PLMs. On the other hand, textual descriptions of proteins, which could be annotated by human experts or a pretrained protein sequence-to-text model, provide meaningful context that could assist in the functional annotations, such as the localization of active sites. This motivates us to construct a $\\textbf{ProT}$ein-$\\textbf{A}$ttribute text $\\textbf{D}$ataset ($\\textbf{ProTAD}$), comprising over 570,000 pairs of protein sequences and multi-attribute textual descriptions. Based on this dataset, we propose $\\textbf{MMSite}$, a multi-modal framework that improves the performance of PLMs to identify active sites by leveraging biomedical language models (BLMs). In particular, we incorporate manual prompting and design a MACross module to deal with the multi-attribute characteristics of textual descriptions. MMSite is a two-stage (\"First Align, Then Fuse\") framework: first aligns the textual modality with the sequential modality through soft-label alignment, and then identifies active sites via multi-modal fusion. Experimental results demonstrate that MMSite achieves state-of-the-art performance compared to existing protein representation learning methods. The dataset and code implementation are available at https://github.com/Gift-OYS/MMSite.", "keywords": "Active Sites Identification;Multi-modal Learning;Protein Representation Learning", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "/attachment/529bc3704d4e14c983d765c4e165367911c7bbd1.zip", "author": "Song Ouyang;Huiyu Cai;Yong Luo;Kehua Su;Lefei Zhang;Bo Du", "authorids": "~Song_Ouyang1;~Huiyu_Cai1;~Yong_Luo2;~Kehua_Su1;~Lefei_Zhang1;~Bo_Du3", "gender": "M;M;M;M;M;M", "homepage": "https://ouyang-song.com/;https://hui2000ji.github.io;;http://jszy.whu.edu.cn/sukehua/zh_CN/index.htm;;", "dblp": "33/2838;237/9501;57/5272-2.html;;28/10770;70/6443-1.html", "google_scholar": "VD43DR0AAAAJ;ZQ2VZ0sAAAAJ;zb1oVGIAAAAJ;;BLKHwNwAAAAJ;Shy1gnMAAAAJ", "orcid": "0009-0008-5693-7384;0000-0001-8506-537X;;;;", "linkedin": "%E5%B4%A7-%E6%AC%A7%E9%98%B3-0b2178308/;;;;;", "or_profile": "~Song_Ouyang1;~Huiyu_Cai1;~Yong_Luo2;~Kehua_Su1;~Lefei_Zhang1;~Bo_Du1", "aff": "Wuhan University;Mila - Quebec AI Institute;Wuhan University;Wuhan University;Wuhan University;Wuhan University", "aff_domain": "whu.edu.cn;mila.quebec;whu.edu.cn;whu.edu.cn;whu.edu.cn;whu.edu.cn", "position": "MS student;PhD student;Professor;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nouyang2024mmsite,\ntitle={{MMS}ite: A Multi-modal Framework for the Identification of Active Sites in Proteins},\nauthor={Song Ouyang and Huiyu Cai and Yong Luo and Kehua Su and Lefei Zhang and Bo Du},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=XHdwlbNSVb}\n}", "github": "", "reviewers": "CdPy;sxh3;9yAm;zjg9;LZPM", "pdf_size": 3374679, "rating": "4;5;5;6;8", "confidence": "4;4;3;3;4", "soundness": "2;2;3;2;3", "novelty": "2;3;3;3;3", "presentation": "2;2;3;3;3", "wc_summary": "24;102;126;66;96", "wc_strengths": "27;103;85;48;114", "wc_weaknesses": "183;124;417;33;90", "wc_questions": "16;722;133;183;107", "wc_limitations": "1;38;51;7;36", "wc_review": "251;1089;812;337;443", "wc_reply_reviewers": "87;99;42;37;26", "wc_reply_authors": "198;127;595;110;20", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;3;2;2", "rating_avg": [ 5.6, 1.3564659966250536 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 82.8, 35.0679340708859 ], "wc_strengths_avg": [ 75.4, 32.97635516548183 ], "wc_weaknesses_avg": [ 169.4, 133.01217989342177 ], "wc_questions_avg": [ 232.2, 250.83173642902526 ], "wc_limitations_avg": [ 26.6, 19.252012881774206 ], "wc_review_avg": [ 586.4, 315.8351468725417 ], "wc_reply_reviewers_avg": [ 58.2, 29.130053209700804 ], "wc_reply_authors_avg": [ 210.0, 200.67785129405786 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.06019292654288467, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Ms2Vpt08H1kJ:scholar.google.com/&scioq=MMSite:+A+Multi-modal+Framework+for+the+Identification+of+Active+Sites+in+Proteins&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": "whu.edu.cn;mila.quebec;whu.edu.cn;whu.edu.cn;whu.edu.cn;whu.edu.cn", "author_num": 6, "aff_unique_index": "0;1;0;0;0;0", "aff_unique_norm": "Wuhan University;Quebec AI Institute", "aff_unique_dep": ";AI Institute", "aff_unique_url": "http://www.whu.edu.cn/;https://mila.quebec", "aff_unique_abbr": "WHU;Mila", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0;0", "aff_country_unique": "China;Canada" }, { "title": "MVInpainter: Learning Multi-View Consistent Inpainting to Bridge 2D and 3D Editing", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94779", "id": "XIScpCMUse", "proceeding": "", "pdf": "https://openreview.net/pdf?id=XIScpCMUse", "openreview": "https://openreview.net/forum?id=XIScpCMUse", "poster": "/media/PosterPDFs/NeurIPS%202024/94779.png?t=1731144162.517842", "project": "", "author_site": "Chenjie Cao, Chaohui Yu, Fan Wang, Xiangyang Xue, Yanwei Fu", "tldr": "", "abstract": "Novel View Synthesis (NVS) and 3D generation have recently achieved prominent improvements. However, these works mainly focus on confined categories or synthetic 3D assets, which are discouraged from generalizing to challenging in-the-wild scenes and fail to be employed with 2D synthesis directly. Moreover, these methods heavily depended on camera poses, limiting their real-world applications. \nTo overcome these issues, we propose MVInpainter, re-formulating the 3D editing as a multi-view 2D inpainting task. Specifically, MVInpainter partially inpaints multi-view images with the reference guidance rather than intractably generating an entirely novel view from scratch, which largely simplifies the difficulty of in-the-wild NVS and leverages unmasked clues instead of explicit pose conditions. To ensure cross-view consistency, MVInpainter is enhanced by video priors from motion components and appearance guidance from concatenated reference key\\&value attention. Furthermore, MVInpainter incorporates slot attention to aggregate high-level optical flow features from unmasked regions to control the camera movement with pose-free training and inference. Sufficient scene-level experiments on both object-centric and forward-facing datasets verify the effectiveness of MVInpainter, including diverse tasks, such as multi-view object removal, synthesis, insertion, and replacement. The project page is https://ewrfcas.github.io/MVInpainter/.", "keywords": "Multi-view synthesis;Image inpainting;3D editing", "primary_area": "generative_models", "supplementary_material": "/attachment/1f884324b26c97f0a5c62440c760984f41abf580.zip", "author": "Chenjie Cao;Chaohui Yu;Fan Wang;Xiangyang Xue;Yanwei Fu", "authorids": "~Chenjie_Cao1;~Chaohui_Yu1;~Fan_Wang6;~Xiangyang_Xue2;~Yanwei_Fu2", "gender": "M;M;F;M;M", "homepage": "https://ewrfcas.github.io/;https://richardych.github.io/;;http://homepage.fudan.edu.cn//xyxue;http://yanweifu.github.io", "dblp": "https://dblp.uni-trier.de/pid/193/0823;14/10377;;84/3791;63/9065", "google_scholar": "1INK-I0AAAAJ;b1Q-k20AAAAJ;WCRGTHsAAAAJ;;https://scholar.google.co.uk/citations?user=Vg54TcsAAAAJ", "orcid": ";0000-0002-7852-4491;0000-0001-7320-1119;0000-0002-4897-9209;0000-0002-6595-6893", "linkedin": ";;;;", "or_profile": "~Chenjie_Cao1;~Chaohui_Yu1;~Fan_Wang6;~Xiangyang_Xue2;~Yanwei_Fu2", "aff": "Fudan University;Alibaba Group;Alibaba Group;Fudan University;Fudan University,", "aff_domain": "fudan.edu.cn;alibaba-inc.com;alibaba-inc.com;fudan.edu.cn;fudan.edu.cn", "position": "PhD student;Researcher;Senior Staff Algorithm Engineer;Full Professor;Professor", "bibtex": "@inproceedings{\ncao2024mvinpainter,\ntitle={{MVI}npainter: Learning Multi-View Consistent Inpainting to Bridge 2D and 3D Editing},\nauthor={Chenjie Cao and Chaohui Yu and Fan Wang and Xiangyang Xue and Yanwei Fu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=XIScpCMUse}\n}", "github": "", "reviewers": "NjjA;VJEq;jssh;n43z", "pdf_size": 35712992, "rating": "4;5;6;7", "confidence": "3;3;3;4", "soundness": "2;2;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "138;48;45;37", "wc_strengths": "95;64;67;64", "wc_weaknesses": "474;52;144;12", "wc_questions": "131;12;28;54", "wc_limitations": "4;6;1;15", "wc_review": "842;182;285;182", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "57;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "2;1;1;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 67.0, 41.188590653237945 ], "wc_strengths_avg": [ 72.5, 13.047988350699889 ], "wc_weaknesses_avg": [ 170.5, 181.64457052166463 ], "wc_questions_avg": [ 56.25, 45.68574723039998 ], "wc_limitations_avg": [ 6.5, 5.220153254455275 ], "wc_review_avg": [ 372.75, 274.16543819380297 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 14.25, 24.681724007856502 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.7745966692414834, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17278592814661789196&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 3, "email": "fudan.edu.cn;alibaba-inc.com;alibaba-inc.com;fudan.edu.cn;fudan.edu.cn", "author_num": 5, "aff_unique_index": "0;1;1;0;0", "aff_unique_norm": "Fudan University;Alibaba Group", "aff_unique_dep": ";", "aff_unique_url": "https://www.fudan.edu.cn;https://www.alibaba.com", "aff_unique_abbr": "Fudan;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "TinyTTA: Efficient Test-time Adaptation via Early-exit Ensembles on Edge Devices", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94778", "id": "XIcBCBe6C3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=XIcBCBe6C3", "openreview": "https://openreview.net/forum?id=XIcBCBe6C3", "poster": "/media/PosterPDFs/NeurIPS%202024/94778.png?t=1731634123.7634902", "project": "", "author_site": "Hong Jia, Young Kwon, Alessio Orsino, Ting Dang, DOMENICO TALIA, Cecilia Mascolo", "tldr": "", "abstract": "The increased adoption of Internet of Things (IoT) devices has led to the generation of large data streams with applications in healthcare, sustainability, and robotics. In some cases, deep neural networks have been deployed directly on these resource-constrained units to limit communication overhead, increase efficiency and privacy, and enable real-time applications. However, a common challenge in this setting is the continuous adaptation of models necessary to accommodate changing environments, i.e., data distribution shifts. Test-time adaptation (TTA) has emerged as one potential solution, but its validity has yet to be explored in resource-constrained hardware settings, such as those involving microcontroller units (MCUs). TTA on constrained devices generally suffers from i) memory overhead due to the full backpropagation of a large pre-trained network, ii) lack of support for normalization layers on MCUs, and iii) either memory exhaustion with large batch sizes required for updating or poor performance with small batch sizes. In this paper, we propose TinyTTA, to enable, for the first time, efficient TTA on constrained devices with limited memory. To address the limited memory constraints, we introduce a novel self-ensemble and batch-agnostic early-exit strategy for TTA, which enables continuous adaptation with small batch sizes for reduced memory usage, handles distribution shifts, and improves latency efficiency. Moreover, we develop the TinyTTA Engine, a first-of-its-kind MCU library that enables on-device TTA. We validate TinyTTA on a Raspberry Pi Zero 2W and an STM32H747 MCU. Experimental results demonstrate that TinyTTA improves TTA accuracy by up to 57.6\\%, reduces memory usage by up to six times, and achieves faster and more energy-efficient TTA. Notably, TinyTTA is the only framework able to run TTA on MCU STM32H747 with a 512 KB memory constraint while maintaining high performance.", "keywords": "Test-time adaptation;efficiency;edge device;microcontroller", "primary_area": "infrastructure", "supplementary_material": "", "author": "Hong Jia;Young D. Kwon;Alessio Orsino;Ting Dang;Domenico Talia;Cecilia Mascolo", "authorids": "~Hong_Jia1;~Young_D._Kwon1;~Alessio_Orsino1;~Ting_Dang1;~Domenico_Talia1;~Cecilia_Mascolo1", "gender": ";M;M;F;M;F", "homepage": ";https://theyoungkwon.github.io;;https://tingdang90.github.io/;http://si.deis.unical.it/talia/;http://www.cl.cam.ac.uk/users/cm542", "dblp": ";77/5405;294/1500.html;170/5330;;21/6419", "google_scholar": ";_rp_S9MAAAAJ;https://scholar.google.it/citations?user=lzJ1L8sAAAAJ;Sb1Pj4sAAAAJ;0RMjJHMAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0002-5031-1996;0000-0003-3806-1493;;0000-0001-9614-4380", "linkedin": ";theyoungkwon/;;;;", "or_profile": "~Hong_Jia1;~Young_D._Kwon1;~Alessio_Orsino1;~Ting_Dang1;~Domenico_Talia1;~Cecilia_Mascolo1", "aff": ";University of Cambridge;University of Calabria;University of Melbourne;University of Calabria;University of Cambridge", "aff_domain": ";cam.ac.uk;unical.it;unimelb.edu.au;unical.it;cam.ac.uk", "position": ";PhD student;PhD student;Assistant Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\njia2024tinytta,\ntitle={Tiny{TTA}: Efficient Test-time Adaptation via Early-exit Ensembles on Edge Devices},\nauthor={Hong Jia and Young D. Kwon and Alessio Orsino and Ting Dang and Domenico Talia and Cecilia Mascolo},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=XIcBCBe6C3}\n}", "github": "", "reviewers": "bEbZ;yW6M;2zcM;DHsU", "pdf_size": 8245404, "rating": "4;5;6;6", "confidence": "4;4;3;3", "soundness": "3;2;3;2", "novelty": "2;2;3;2", "presentation": "3;2;3;3", "wc_summary": "170;72;69;105", "wc_strengths": "30;45;148;85", "wc_weaknesses": "499;29;423;202", "wc_questions": "59;290;106;34", "wc_limitations": "26;1;22;34", "wc_review": "784;437;768;460", "wc_reply_reviewers": "0;18;54;23", "wc_reply_authors": "63;112;112;112", "reply_reviewers": "0;1;2;1", "reply_authors": "2;3;3;3", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 104.0, 40.6386515524322 ], "wc_strengths_avg": [ 77.0, 45.65632486304608 ], "wc_weaknesses_avg": [ 288.25, 185.2179459447707 ], "wc_questions_avg": [ 122.25, 100.24064794283804 ], "wc_limitations_avg": [ 20.75, 12.193748398257199 ], "wc_review_avg": [ 612.25, 164.0493447106693 ], "wc_reply_reviewers_avg": [ 23.75, 19.447043477094404 ], "wc_reply_authors_avg": [ 99.75, 21.21762239271875 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.9045340337332909, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6462184065284203879&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": ";cam.ac.uk;unical.it;unimelb.edu.au;unical.it;cam.ac.uk", "author_num": 6, "aff_unique_index": "0;1;2;1;0", "aff_unique_norm": "University of Cambridge;University of Calabria;University of Melbourne", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cam.ac.uk;https://www.unical.it;https://www.unimelb.edu.au", "aff_unique_abbr": "Cambridge;;UniMelb", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;1;2;1;0", "aff_country_unique": "United Kingdom;Italy;Australia" }, { "title": "Log-concave Sampling from a Convex Body with a Barrier: a Robust and Unified Dikin Walk", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94777", "id": "XKrSB5a79F", "proceeding": "", "pdf": "https://openreview.net/pdf?id=XKrSB5a79F", "openreview": "https://openreview.net/forum?id=XKrSB5a79F", "poster": "/media/PosterPDFs/NeurIPS%202024/94777.png?t=1732321697.4782073", "project": "", "author_site": "Yuzhou Gu, Nikki Lijing Kuang, Yian Ma, Zhao Song, Lichen Zhang", "tldr": "", "abstract": "We consider the problem of sampling from a $d$-dimensional log-concave distribution $\\pi(\\theta) \\propto \\exp(-f(\\theta))$ for $L$-Lipschitz $f$, constrained to a convex body (described by $n$ hyperplanes) equipped with a barrier function, contained in a ball of radius $R$ with a $w$-warm start. \n\nWe propose a \\emph{robust} sampling framework that computes spectral approximations to the Hessian of the barrier functions in each iteration. We prove that for the polytope constraints, sampling with the Lee-Sidford barrier function mixes within $\\widetilde O((d^2+dL^2R^2)\\log(w/\\delta))$ steps with a per step cost of $\\widetilde O(nd^{\\omega-1})$, where $\\omega\\approx 2.37$ is the fast matrix multiplication exponent. Compared to the prior work of Mangoubi and Vishnoi, our approach gives faster mixing time as we are able to design a generalized soft-threshold Dikin walk beyond log-barrier.\n\nWe further extend our result to show how to sample from a $d$-dimensional spectrahedron, the constrained set of a semidefinite program, specified by the set $\\{x\\in \\mathbb{R}^d: \\sum_{i=1}^d x_i A_i \\succeq C \\}$ where $A_1,\\ldots,A_d, C$ are $n\\times n$ real symmetric matrices. We design a walk that mixes in $\\widetilde O((nd+dL^2R^2)\\log(w/\\delta))$ steps with a per iteration cost of $\\widetilde O(n^\\omega+n^2d^{3\\omega-5})$. We improve the mixing time bound of prior best Dikin walk due to Narayanan and Rakhlin that mixes in $\\widetilde O((n^2d^3+n^2dL^2R^2)\\log(w/\\delta))$ steps.", "keywords": "Log-concave sampling;Dikin walk", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Yuzhou Gu;Nikki Lijing Kuang;Yian Ma;Zhao Song;Lichen Zhang", "authorids": "~Yuzhou_Gu1;~Nikki_Lijing_Kuang1;~Yian_Ma1;~Zhao_Song3;~Lichen_Zhang2", "gender": "M;M;M;M;F", "homepage": "https://sevenkplus.com/;https://sites.google.com/view/yianma;https://www.youtube.com/@zhaosong2031;https://lczh.github.io/;", "dblp": "182/2497;;76/4051-2;00/6357-3;229/9146", "google_scholar": "WHoU_9MAAAAJ;A0TFlacAAAAJ;yDZct7UAAAAJ;https://scholar.google.com/citations?view_op=list_works;XYhmg74AAAAJ", "orcid": "0000-0003-1722-5241;;;;", "linkedin": ";;;;", "or_profile": "~Yuzhou_Gu1;~Yian_Ma1;~Zhao_Song3;~Lichen_Zhang2;~Nikki_Kuang1", "aff": "Institue for Advanced Study, Princeton;University of California, San Diego;Adobe;Amazon;University of California, San Diego", "aff_domain": "ias.edu;ucsd.edu;adobe.com;amazon.com;ucsd.edu", "position": "Postdoc;Assistant Professor;Researcher;Intern;PhD student", "bibtex": "@inproceedings{\ngu2024logconcave,\ntitle={Log-concave Sampling from a Convex Body with a Barrier: a Robust and Unified Dikin Walk},\nauthor={Yuzhou Gu and Nikki Lijing Kuang and Yian Ma and Zhao Song and Lichen Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=XKrSB5a79F}\n}", "github": "", "reviewers": "aoiW;Cu4d;zXRR", "pdf_size": 661856, "rating": "7;7;8", "confidence": "3;1;3", "soundness": "3;4;4", "novelty": "3;4;4", "presentation": "4;4;3", "wc_summary": "13;106;372", "wc_strengths": "21;22;88", "wc_weaknesses": "3;8;44", "wc_questions": "146;1;102", "wc_limitations": "1;1;1", "wc_review": "184;138;607", "wc_reply_reviewers": "3;30;8", "wc_reply_authors": "544;12;0", "reply_reviewers": "1;1;1", "reply_authors": "2;2;1", "rating_avg": [ 7.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 2.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 163.66666666666666, 152.12787019112864 ], "wc_strengths_avg": [ 43.666666666666664, 31.351058816073323 ], "wc_weaknesses_avg": [ 18.333333333333332, 18.263503375736967 ], "wc_questions_avg": [ 83.0, 60.70145522692736 ], "wc_limitations_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 309.6666666666667, 211.08344848003176 ], "wc_reply_reviewers_avg": [ 13.666666666666666, 11.728408057172787 ], "wc_reply_authors_avg": [ 185.33333333333334, 253.6629434680771 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:yIS8yg_1GZ4J:scholar.google.com/&scioq=Log-concave+Sampling+from+a+Convex+Body+with+a+Barrier:+a+Robust+and+Unified+Dikin+Walk&hl=en&as_sdt=0,33", "gs_version_total": 2, "email": "ias.edu;ucsd.edu;adobe.com;amazon.com;ucsd.edu", "author_num": 5, "aff_unique_index": "0;1;2;3;1", "aff_unique_norm": "Institute for Advanced Study;University of California, San Diego;Adobe;Amazon", "aff_unique_dep": ";;Adobe Inc.;Amazon.com, Inc.", "aff_unique_url": "https://wwwIAS.edu;https://www.ucsd.edu;https://www.adobe.com;https://www.amazon.com", "aff_unique_abbr": "IAS;UCSD;Adobe;Amazon", "aff_campus_unique_index": "0;1;1", "aff_campus_unique": "Princeton;San Diego;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "SPO: Sequential Monte Carlo Policy Optimisation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94776", "id": "XKvYcPPH5G", "proceeding": "", "pdf": "https://openreview.net/pdf?id=XKvYcPPH5G", "openreview": "https://openreview.net/forum?id=XKvYcPPH5G", "poster": "", "project": "", "author_site": "Matthew Macfarlane, Edan Toledo, Donal Byrne, Paul Duckworth, Alexandre Laterre", "tldr": "", "abstract": "Leveraging planning during learning and decision-making is central to the long-term development of intelligent agents. Recent works have successfully combined tree-based search methods and self-play learning mechanisms to this end. However, these methods typically face scaling challenges due to the sequential nature of their search. While practical engineering solutions can partly overcome this, they often result in a negative impact on performance. In this paper, we introduce SPO: Sequential Monte Carlo Policy Optimisation, a model-based reinforcement learning algorithm grounded within the Expectation Maximisation (EM) framework. We show that SPO provides robust policy improvement and efficient scaling properties. The sample-based search makes it directly applicable to both discrete and continuous action spaces without modifications. We demonstrate statistically significant improvements in performance relative to model-free and model-based baselines across both continuous and discrete environments. Furthermore, the parallel nature of SPO\u2019s search enables effective utilisation of hardware accelerators, yielding favourable scaling laws.", "keywords": "Reinforcement Learning;Policy Optimisation;Planning;Model-based RL", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Matthew Macfarlane;Edan Toledo;Donal John Byrne;Paul Duckworth;Alexandre Laterre", "authorids": "~Matthew_Macfarlane1;~Edan_Toledo1;~Donal_John_Byrne1;~Paul_Duckworth1;~Alexandre_Laterre1", "gender": ";M;M;M;M", "homepage": ";;;http://www.robots.ox.ac.uk/~scpd/;", "dblp": ";305/9859;;179/2160;223/4200", "google_scholar": ";_bLUH-MAAAAJ;;I64MZDoAAAAJ;HrMSaicAAAAJ", "orcid": ";;;0000-0001-9052-6919;", "linkedin": "matthew-macfarlane-b86163bb/;edan-toledo;donal-byrne-ai/;;reinforce/", "or_profile": "~Matthew_Macfarlane1;~Edan_Toledo1;~Donal_John_Byrne1;~Paul_Duckworth1;~Alexandre_Laterre1", "aff": "University of Amsterdam;InstaDeep;CFDX;InstaDeep;InstaDeep", "aff_domain": "uva.nl;instadeep.com;cfdx.io;instadeep.com;instadeep.com", "position": "PhD student;Researcher;Researcher;Principal Researcher;head of research", "bibtex": "@inproceedings{\nmacfarlane2024spo,\ntitle={{SPO}: Sequential Monte Carlo Policy Optimisation},\nauthor={Matthew Macfarlane and Edan Toledo and Donal John Byrne and Paul Duckworth and Alexandre Laterre},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=XKvYcPPH5G}\n}", "github": "", "reviewers": "xUvG;X2nr;R8DZ", "pdf_size": 1248633, "rating": "4;5;7", "confidence": "3;3;4", "soundness": "2;2;3", "novelty": "2;3;3", "presentation": "1;3;3", "wc_summary": "39;248;71", "wc_strengths": "29;72;130", "wc_weaknesses": "83;1;19", "wc_questions": "3;429;33", "wc_limitations": "1;1;8", "wc_review": "155;751;261", "wc_reply_reviewers": "0;0;16", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;1", "reply_authors": "1;1;1", "rating_avg": [ 5.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.9428090415820634 ], "wc_summary_avg": [ 119.33333333333333, 91.91421120926962 ], "wc_strengths_avg": [ 77.0, 41.38437708443449 ], "wc_weaknesses_avg": [ 34.333333333333336, 35.188381921057726 ], "wc_questions_avg": [ 155.0, 194.13397435791603 ], "wc_limitations_avg": [ 3.3333333333333335, 3.2998316455372216 ], "wc_review_avg": [ 389.0, 259.60482789552793 ], "wc_reply_reviewers_avg": [ 5.333333333333333, 7.542472332656507 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.944911182523068, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15868735275923731212&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 2, "email": "uva.nl;instadeep.com;cfdx.io;instadeep.com;instadeep.com", "author_num": 5, "aff_unique_index": "0;1;2;1;1", "aff_unique_norm": "University of Amsterdam;InstaDeep;cfdx", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uva.nl;https://www.instadeep.com;", "aff_unique_abbr": "UvA;InstaDeep;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "Netherlands;United Kingdom;" }, { "title": "Sample Complexity of Interventional Causal Representation Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94775", "id": "XL9aaXl0u6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=XL9aaXl0u6", "openreview": "https://openreview.net/forum?id=XL9aaXl0u6", "poster": "", "project": "", "author_site": "Emre Acart\u00fcrk, Burak Var\u0131c\u0131, Karthikeyan Shanmugam, Ali Tajer", "tldr": "", "abstract": "Consider a data-generation process that transforms low-dimensional _latent_ causally-related variables to high-dimensional _observed_ variables. Causal representation learning (CRL) is the process of using the observed data to recover the latent causal variables and the causal structure among them. Despite the multitude of identifiability results under various interventional CRL settings, the existing guarantees apply exclusively to the _infinite-sample_ regime (i.e., infinite observed samples). This paper establishes the first sample-complexity analysis for the finite-sample regime, in which the interactions between the number of observed samples and probabilistic guarantees on recovering the latent variables and structure are established. This paper focuses on _general_ latent causal models, stochastic _soft_ interventions, and a linear transformation from the latent to the observation space. The identifiability results ensure graph recovery up to ancestors and latent variables recovery up to mixing with parent variables. Specifically, ${\\cal O}((\\log \\frac{1}{\\delta})^{4})$ samples suffice for latent graph recovery up to ancestors with probability $1 - \\delta$, and ${\\cal O}((\\frac{1}{\\epsilon}\\log \\frac{1}{\\delta})^{4})$ samples suffice for latent causal variables recovery that is $\\epsilon$ close to the identifiability class with probability $1 - \\delta$.", "keywords": "Causal representation learning;sample complexity;interventions;score-based methods;identifiability", "primary_area": "causal_inference", "supplementary_material": "/attachment/17c23d0928baa11f4282d309d9f275135623b8e0.zip", "author": "Emre Acart\u00fcrk;Burak Var\u0131c\u0131;Karthikeyan Shanmugam;Ali Tajer", "authorids": "~Emre_Acart\u00fcrk1;~Burak_Var\u0131c\u01311;~Karthikeyan_Shanmugam1;~Ali_Tajer1", "gender": "M;M;M;M", "homepage": ";https://sites.google.com/corp/view/karthikeyan-shanmugam/;https://www.isg-rpi.com/;https://bvarici.github.io/", "dblp": "338/7569;;65/2830;289/8565", "google_scholar": "KLiI1JwAAAAJ;https://scholar.google.ca/citations?user=m4DyPcUAAAAJ;;v_SL5c4AAAAJ", "orcid": ";0009-0008-2879-5868;;", "linkedin": ";;;", "or_profile": "~Emre_Acart\u00fcrk1;~Karthikeyan_Shanmugam1;~Ali_Tajer1;~Burak_Varici1", "aff": "Rensselaer Polytechnic Institute;Google Research;Rensselaer Polytechnic Institute;Rensselaer Polytechnic Institute", "aff_domain": "rpi.edu;google.com;rpi.edu;rpi.edu", "position": "PhD student;Researcher;Associate Professor;PhD student", "bibtex": "@inproceedings{\nacart{\\\"u}rk2024sample,\ntitle={Sample Complexity of Interventional Causal Representation Learning},\nauthor={Emre Acart{\\\"u}rk and Burak Var{\\i}c{\\i} and Karthikeyan Shanmugam and Ali Tajer},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=XL9aaXl0u6}\n}", "github": "", "reviewers": "jaoy;fPsb;Q5cw;Yb2M", "pdf_size": 600087, "rating": "6;7;7;7", "confidence": "3;2;3;3", "soundness": "2;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "155;113;30;43", "wc_strengths": "91;29;10;36", "wc_weaknesses": "284;40;26;76", "wc_questions": "83;123;31;38", "wc_limitations": "5;1;1;1", "wc_review": "618;306;98;194", "wc_reply_reviewers": "43;11;74;11", "wc_reply_authors": "0;0;312;0", "reply_reviewers": "1;1;2;1", "reply_authors": "1;1;2;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 85.25, 51.16822744633627 ], "wc_strengths_avg": [ 41.5, 30.120590963658067 ], "wc_weaknesses_avg": [ 106.5, 104.09010519737215 ], "wc_questions_avg": [ 68.75, 37.13741374947911 ], "wc_limitations_avg": [ 2.0, 1.7320508075688772 ], "wc_review_avg": [ 304.0, 195.66297554724042 ], "wc_reply_reviewers_avg": [ 34.75, 26.156978036462853 ], "wc_reply_authors_avg": [ 78.0, 135.09996299037243 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12485189180217860336&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 2, "email": "rpi.edu;google.com;rpi.edu;rpi.edu", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Rensselaer Polytechnic Institute;Google", "aff_unique_dep": ";Google Research", "aff_unique_url": "https://www.rpi.edu;https://research.google", "aff_unique_abbr": "RPI;Google Research", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "High-probability complexity bounds for stochastic non-convex minimax optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94774", "id": "XMQTNzlgTJ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=XMQTNzlgTJ", "openreview": "https://openreview.net/forum?id=XMQTNzlgTJ", "poster": "", "project": "", "author_site": "Yassine Laguel, Yasa Syed, Necdet Serhat Aybat, Mert Gurbuzbalaban", "tldr": "", "abstract": "Stochastic smooth nonconvex minimax problems are prevalent in machine learning, e.g., GAN training, fair classification, and distributionally robust learning. Stochastic gradient descent ascent (GDA)-type methods are popular in practice due to their simplicity and single-loop nature. However, there is a significant gap between the theory and practice regarding high-probability complexity guarantees for these methods on stochastic nonconvex minimax problems. Existing high-probability bounds for GDA-type single-loop methods only apply to convex/concave minimax problems and to particular non-monotone variational inequality problems under some restrictive assumptions. In this work, we address this gap by providing the first high-probability complexity guarantees for nonconvex/PL minimax problems corresponding to a smooth function that satisfies the PL-condition in the dual variable. Specifically, we show that when the stochastic gradients are light-tailed, the smoothed alternating GDA method can compute an $\\varepsilon$-stationary point within $\\mathcal{O}(\\frac{\\ell \\kappa^2 \\delta^2}{\\varepsilon^4} + \\frac{\\kappa}{\\varepsilon^2}(\\ell+\\delta^2\\log({1}/{\\bar{q}})))$ stochastic gradient calls with probability at least $1-\\bar{q}$ for any $\\bar{q}\\in(0,1)$, where $\\mu$ is the PL constant, $\\ell$ is the Lipschitz constant of the gradient, $\\kappa=\\ell/\\mu$ is the condition number, and $\\delta^2$ denotes a bound on the variance of stochastic gradients. We also present numerical results on a nonconvex/PL problem with synthetic data and on distributionally robust optimization problems with real data, illustrating our theoretical findings.", "keywords": "nonconvex minimax optimization;high-probability guarantees;stochastic gradient descent ascent methods", "primary_area": "optimization", "supplementary_material": "/attachment/99c56adc60b0c4312fb33eebcbc9beb5f75943fb.zip", "author": "Yassine Laguel;Yasa Syed;Necdet Aybat;Mert Gurbuzbalaban", "authorids": "~Yassine_Laguel1;~Yasa_Syed1;~Necdet_Aybat1;~Mert_Gurbuzbalaban1", "gender": "M;;;", "homepage": "https://yassine-laguel.github.io;;http://personal.psu.edu/nsa10/;", "dblp": ";;;09/9185", "google_scholar": ";lyw-2SYAAAAJ;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Yassine_Laguel1;~Yasa_Syed1;~Necdet_Aybat1;~Mert_Gurbuzbalaban1", "aff": "Rutgers University, Newark;Rutgers University, New Brunswick;Pennsylvania State University;Rutgers University", "aff_domain": "rutgers.edu;rutgers.edu;psu.edu;rutgers.edu", "position": "Postdoc;PhD student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nlaguel2024highprobability,\ntitle={High-probability complexity bounds for stochastic non-convex minimax optimization},\nauthor={Yassine Laguel and Yasa Syed and Necdet Aybat and Mert Gurbuzbalaban},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=XMQTNzlgTJ}\n}", "github": "", "reviewers": "vFP9;Zwn2;NU5h;pQzv;H241", "pdf_size": 851577, "rating": "4;5;5;7;7", "confidence": "4;3;3;4;5", "soundness": "2;3;3;3;4", "novelty": "2;2;2;3;3", "presentation": "2;3;3;3;3", "wc_summary": "91;34;72;39;112", "wc_strengths": "28;12;73;57;70", "wc_weaknesses": "163;73;256;2;28", "wc_questions": "17;2;39;116;72", "wc_limitations": "1;4;6;1;1", "wc_review": "300;125;446;215;283", "wc_reply_reviewers": "0;62;0;9;36", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;1;0;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.6, 1.2 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 69.6, 29.883774861954773 ], "wc_strengths_avg": [ 48.0, 24.0249869927124 ], "wc_weaknesses_avg": [ 104.4, 93.51491859591174 ], "wc_questions_avg": [ 49.2, 40.86759107165481 ], "wc_limitations_avg": [ 2.6, 2.0591260281974 ], "wc_review_avg": [ 273.8, 105.85159422512257 ], "wc_reply_reviewers_avg": [ 21.4, 24.21239352067449 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5790660241435861, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=74076663484010460&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 0, "email": "rutgers.edu;rutgers.edu;psu.edu;rutgers.edu", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Rutgers University;Pennsylvania State University", "aff_unique_dep": ";", "aff_unique_url": "https://www.rutgers.edu;https://www.psu.edu", "aff_unique_abbr": "Rutgers;PSU", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Newark;New Brunswick;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Visual Data Diagnosis and Debiasing with Concept Graphs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94773", "id": "XNGsx3WCU9", "proceeding": "", "pdf": "https://openreview.net/pdf?id=XNGsx3WCU9", "openreview": "https://openreview.net/forum?id=XNGsx3WCU9", "poster": "", "project": "", "author_site": "Rwiddhi Chakraborty, Yinong O Wang, Jialu Gao, Runkai Zheng, Cheng Zhang, Fernando D De la Torre", "tldr": "", "abstract": "The widespread success of deep learning models today is owed to the curation of extensive datasets significant in size and complexity. However, such models frequently pick up inherent biases in the data during the training process, leading to unreliable predictions. Diagnosing and debiasing datasets is thus a necessity to ensure reliable model performance. In this paper, we present ConBias, a novel framework for diagnosing and mitigating Concept co-occurrence Biases in visual datasets. ConBias represents visual datasets as knowledge graphs of concepts, enabling meticulous analysis of spurious concept co-occurrences to uncover concept imbalances across the whole dataset. Moreover, we show that by employing a novel clique-based concept balancing strategy, we can mitigate these imbalances, leading to enhanced performance on downstream tasks. Extensive experiments show that data augmentation based on a balanced concept distribution augmented by ConBias improves generalization performance across multiple datasets compared to state-of-the-art methods.", "keywords": "responsible ai;bias;generative ai;fairness", "primary_area": "fairness", "supplementary_material": "/attachment/d88077188ff0266021fd0cdbb88ab186c2db0489.zip", "author": "Rwiddhi Chakraborty;Yinong Oliver Wang;Jialu Gao;Runkai Zheng;Cheng Zhang;Fernando De la Torre", "authorids": "~Rwiddhi_Chakraborty2;~Yinong_Oliver_Wang1;~Jialu_Gao1;~Runkai_Zheng1;~Cheng_Zhang8;~Fernando_De_la_Torre2", "gender": "M;M;F;M;M;M", "homepage": "https://rwchakra.github.io;https://oliverow.github.io/;https://gaojl19.github.io;;https://czhang0528.github.io/;http://www.cs.cmu.edu/~ftorre/", "dblp": "287/5027;386/8270;256/3849;;82/6384-14;d/FernandoDelaTorre", "google_scholar": ";;PokBJE0AAAAJ;;vb3l1ZMAAAAJ;YB8_6gkAAAAJ", "orcid": ";;;;0009-0003-6255-7648;", "linkedin": ";yinongwang/;danikagao/;%E6%B6%A6%E9%94%B4-%E9%83%91-551606156/;;", "or_profile": "~Rwiddhi_Chakraborty2;~Yinong_Oliver_Wang1;~Jialu_Gao1;~Runkai_Zheng1;~Cheng_Zhang8;~Fernando_Torre1", "aff": "University of Troms\u00f8;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;School of Computer Science, Carnegie Mellon University", "aff_domain": "uit.no;cmu.edu;andrew.cmu.edu;andrew.cmu.edu;cmu.edu;cs.cmu.edu", "position": "PhD student;MS student;MS student;MS student;Postdoc;Associate Professor", "bibtex": "@inproceedings{\nchakraborty2024visual,\ntitle={Visual Data Diagnosis and Debiasing with Concept Graphs},\nauthor={Rwiddhi Chakraborty and Yinong Oliver Wang and Jialu Gao and Runkai Zheng and Cheng Zhang and Fernando De la Torre},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=XNGsx3WCU9}\n}", "github": "", "reviewers": "XqYX;TVhQ;xMok;BpQx", "pdf_size": 8131934, "rating": "4;5;5;6", "confidence": "4;3;3;4", "soundness": "2;3;2;3", "novelty": "2;2;2;3", "presentation": "3;4;3;3", "wc_summary": "78;182;55;120", "wc_strengths": "53;171;56;46", "wc_weaknesses": "248;213;157;30", "wc_questions": "11;110;2;17", "wc_limitations": "11;16;9;9", "wc_review": "401;692;279;222", "wc_reply_reviewers": "106;30;20;26", "wc_reply_authors": "371;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 108.75, 48.28755015529365 ], "wc_strengths_avg": [ 81.5, 51.800096525006595 ], "wc_weaknesses_avg": [ 162.0, 82.83417169260522 ], "wc_questions_avg": [ 35.0, 43.62911871674696 ], "wc_limitations_avg": [ 11.25, 2.8613807855648994 ], "wc_review_avg": [ 398.5, 181.37047720067343 ], "wc_reply_reviewers_avg": [ 45.5, 35.11053972812152 ], "wc_reply_authors_avg": [ 92.75, 160.64771240201338 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1163412789827659337&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "uit.no;cmu.edu;andrew.cmu.edu;andrew.cmu.edu;cmu.edu;cs.cmu.edu", "author_num": 6, "aff_unique_index": "0;1;1;1;1;1", "aff_unique_norm": "University of Troms\u00f8;Carnegie Mellon University", "aff_unique_dep": ";", "aff_unique_url": "https://uit.no;https://www.cmu.edu", "aff_unique_abbr": "UIT;CMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pittsburgh", "aff_country_unique_index": "0;1;1;1;1;1", "aff_country_unique": "Norway;United States" }, { "title": "Online Bayesian Persuasion Without a Clue", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94772", "id": "XNpVZ8E1tY", "proceeding": "", "pdf": "https://openreview.net/pdf?id=XNpVZ8E1tY", "openreview": "https://openreview.net/forum?id=XNpVZ8E1tY", "poster": "", "project": "", "author_site": "Francesco Bacchiocchi, Matteo Bollini, Matteo Castiglioni, Alberto Marchesi, Nicola Gatti", "tldr": "", "abstract": "We study online Bayesian persuasion problems in which an informed sender repeatedly faces a receiver with the goal of influencing their behavior through the provision of payoff-relevant information. Previous works assume that the sender has knowledge about either the prior distribution over states of nature or receiver's utilities, or both. We relax such unrealistic assumptions by considering settings in which the sender does not know anything about the prior and the receiver. We design an algorithm that achieves sublinear---in the number of rounds T---regret with respect to an optimal signaling scheme, and we also provide a collection of lower bounds showing that the guarantees of such an algorithm are tight. Our algorithm works by searching a suitable space of signaling schemes in order to learn receiver's best responses. To do this, we leverage a non-standard representation of signaling schemes that allows to cleverly overcome the challenge of not knowing anything about the prior over states of nature and receiver's utilities. Finally, our results also allow to derive lower/upper bounds on the sample complexity of learning signaling schemes in a related Bayesian persuasion PAC-learning problem.", "keywords": "Bayesian Persuasion;Online Learning", "primary_area": "algorithmic_game_theory", "supplementary_material": "", "author": "Francesco Bacchiocchi;Matteo Bollini;Matteo Castiglioni;Alberto Marchesi;Nicola Gatti", "authorids": "~Francesco_Bacchiocchi1;~Matteo_Bollini1;~Matteo_Castiglioni1;~Alberto_Marchesi1;~Nicola_Gatti1", "gender": "M;M;;M;M", "homepage": ";;https://castiglionimatteo.github.io;https://albymarke.github.io;https://www4.ceda.polimi.it/manifesti/manifesti/controller/ricerche/RicercaPerDocentiPublic.do?k_doc=75785&lang=EN&EVN_PRODOTTI=evento&__pj0=0&__pj1=d918ee8916afbd0005f5c0bc3c0ff350", "dblp": "312/4794.html;377/9397;225/7720;204/1718;g/NicolaGatti", "google_scholar": "https://scholar.google.com.vn/citations?user=UKGWeAoAAAAJ;;https://scholar.google.it/citations?user=NPE3HAYAAAAJ;vXDtCzoAAAAJ;https://scholar.google.com.tw/citations?user=j-HrYREAAAAJ", "orcid": ";;0000-0002-1070-6766;;0000-0001-7349-3932", "linkedin": ";matteo-bollini-4542072b2/;;;nicola-gatti-1284b21", "or_profile": "~Francesco_Bacchiocchi1;~Matteo_Bollini1;~Matteo_Castiglioni1;~Alberto_Marchesi1;~Nicola_Gatti1", "aff": "Polytechnic Institute of Milan;Polytechnic Institute of Milan;Politecnico di Milano;Politecnico di Milano;Polytechnic Institute of Milan", "aff_domain": "polimi.it;mail.polimi.it;polimi.it;polimi.it;polimi.it", "position": "PhD student;MS student;Assistant Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nbacchiocchi2024online,\ntitle={Online Bayesian Persuasion Without a Clue},\nauthor={Francesco Bacchiocchi and Matteo Bollini and Matteo Castiglioni and Alberto Marchesi and Nicola Gatti},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=XNpVZ8E1tY}\n}", "github": "", "reviewers": "yxNt;YTqh;cPaK;UDVm", "pdf_size": 646886, "rating": "6;7;7;8", "confidence": "3;4;4;5", "soundness": "4;3;3;4", "novelty": "3;3;4;4", "presentation": "3;3;3;4", "wc_summary": "116;88;90;173", "wc_strengths": "52;167;129;76", "wc_weaknesses": "38;56;103;30", "wc_questions": "163;7;50;210", "wc_limitations": "2;35;7;1", "wc_review": "371;353;379;490", "wc_reply_reviewers": "11;24;5;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 116.75, 34.30287888793009 ], "wc_strengths_avg": [ 106.0, 44.90545623863541 ], "wc_weaknesses_avg": [ 56.75, 28.314086600135983 ], "wc_questions_avg": [ 107.5, 82.14773277455684 ], "wc_limitations_avg": [ 11.25, 13.899190623917638 ], "wc_review_avg": [ 398.25, 53.80230013670419 ], "wc_reply_reviewers_avg": [ 10.0, 8.972179222463181 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7576931367896080329&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "polimi.it;mail.polimi.it;polimi.it;polimi.it;polimi.it", "author_num": 5, "aff_unique_index": "0;0;1;1;0", "aff_unique_norm": "Polytechnic Institute of Milan;Politecnico di Milano", "aff_unique_dep": ";", "aff_unique_url": "https://www.polimi.it/;https://www.polimi.it", "aff_unique_abbr": "Politecnico di Milano;Polimi", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Italy" }, { "title": "dopanim: A Dataset of Doppelganger Animals with Noisy Annotations from Multiple Humans", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97647", "id": "XOGosbxLrz", "proceeding": "", "pdf": "https://openreview.net/pdf?id=XOGosbxLrz", "openreview": "https://openreview.net/forum?id=XOGosbxLrz", "poster": "/media/PosterPDFs/NeurIPS%202024/97647.png?t=1733332469.7989888", "project": "", "author_site": "Marek Herde, Denis Huseljic, Lukas Rauch, Bernhard Sick", "tldr": "", "abstract": "Human annotators typically provide annotated data for training machine learning models, such as neural networks. Yet, human annotations are subject to noise, impairing generalization performances. Methodological research on approaches counteracting noisy annotations requires corresponding datasets for a meaningful empirical evaluation. Consequently, we introduce a novel benchmark dataset, dopanim, consisting of about 15,750 animal images of 15 classes with ground truth labels. For approximately 10,500 of these images, 20 humans provided over 52,000 annotations with an accuracy of circa 67%. Its key attributes include (1) the challenging task of classifying doppelganger animals, (2) human-estimated likelihoods as annotations, and (3) annotator metadata. We benchmark well-known multi-annotator learning approaches using seven variants of this dataset and outline further evaluation use cases such as learning beyond hard class labels and active learning. Our dataset and a comprehensive codebase are publicly available to emulate the data collection process and to reproduce all empirical results.", "keywords": "noisy annotations;neural networks;crowdworkers", "primary_area": "", "supplementary_material": "", "author": "Marek Herde;Denis Huseljic;Lukas Rauch;Bernhard Sick", "authorids": "~Marek_Herde1;~Denis_Huseljic1;~Lukas_Rauch1;~Bernhard_Sick1", "gender": "M;M;M;M", "homepage": ";https://www.uni-kassel.de/eecs/ies/denis-huseljic;;", "dblp": ";;332/7293;21/4593", "google_scholar": "pwRDfMQAAAAJ;https://scholar.google.de/citations?user=sFeKFT4AAAAJ;bB2A6e0AAAAJ;https://scholar.google.de/citations?user=sGAKnroAAAAJ", "orcid": "0000-0003-4908-122X;;;", "linkedin": ";;;bernhard-sick-71915b76/?originalSubdomain=de", "or_profile": "~Marek_Herde1;~Denis_Huseljic1;~Lukas_Rauch1;~Bernhard_Sick1", "aff": "Universit\u00e4t Kassel;Universit\u00e4t Kassel;University of Kassel;Universit\u00e4t Kassel", "aff_domain": "uni-kassel.de;uni-kassel.de;uni-kassel.de;uni-kassel.de", "position": "PhD student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nherde2024dopanim,\ntitle={dopanim: A Dataset of Doppelganger Animals with Noisy Annotations from Multiple Humans},\nauthor={Marek Herde and Denis Huseljic and Lukas Rauch and Bernhard Sick},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=XOGosbxLrz}\n}", "github": "", "reviewers": "F9JK;4EnK;ana5;ogMH", "pdf_size": 5533535, "rating": "6;7;7;9", "confidence": "3;4;3;4", "wc_summary_and_contributions": "89;46;25;99", "wc_strengths": "110;68;26;92", "wc_improvement": "494;447;155;231", "wc_limitations": "11;4;1;2", "wc_correctness": "13;1;1;8", "wc_clarity": "58;1;1;25", "wc_relation_to_prior_work": "25;4;6;13", "wc_documentation": "18;2;1;3", "wc_additional_feedback": "1;1;1;1", "wc_review": "819;574;217;474", "wc_reply_reviewers": "172;61;15;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 7.25, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "wc_summary_and_contributions_avg": [ 64.75, 30.384000724065288 ], "wc_strengths_avg": [ 74.0, 31.464265445104548 ], "wc_improvement_avg": [ 331.75, 142.30139669026443 ], "wc_limitations_avg": [ 4.5, 3.905124837953327 ], "wc_correctness_avg": [ 5.75, 5.0682837331783235 ], "wc_clarity_avg": [ 21.25, 23.370654676324325 ], "wc_relation_to_prior_work_avg": [ 12.0, 8.215838362577491 ], "wc_documentation_avg": [ 6.0, 6.96419413859206 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 521.0, 215.7765047450718 ], "wc_reply_reviewers_avg": [ 62.0, 67.36839021380874 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.6882472016116854, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16365266543099978455&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "uni-kassel.de;uni-kassel.de;uni-kassel.de;uni-kassel.de", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Kassel", "aff_unique_dep": "", "aff_unique_url": "https://www.uni-kassel.de", "aff_unique_abbr": "UKassel", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "title": "Linear Uncertainty Quantification of Graphical Model Inference", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94771", "id": "XOVks7JHQA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=XOVks7JHQA", "openreview": "https://openreview.net/forum?id=XOVks7JHQA", "poster": "/media/PosterPDFs/NeurIPS%202024/94771.png?t=1733230437.871561", "project": "", "author_site": "Chenghua Guo, Han Yu, Jiaxin Liu, Chao Chen, Qi Li, Sihong Xie, Xi Zhang", "tldr": "", "abstract": "Uncertainty Quantification (UQ) is vital for decision makers as it offers insights into the potential reliability of data and model, enabling more informed and risk-aware decision-making. \nGraphical models, capable of representing data with complex dependencies, are widely used across domains.\nExisting sampling-based UQ methods are unbiased but cannot guarantee convergence and are time-consuming on large-scale graphs. \nThere are fast UQ methods for graphical models with closed-form solutions and convergence guarantee but with uncertainty underestimation.\nWe propose *LinUProp*, a UQ method that utilizes a novel linear propagation of uncertainty to model uncertainty among related nodes additively instead of multiplicatively, to offer linear scalability, guaranteed convergence, and closed-form solutions without underestimating uncertainty.\nTheoretically, we decompose the expected prediction error of the graphical model and prove that the uncertainty computed by *LinUProp* is the *generalized variance component* of the decomposition.\nExperimentally, we demonstrate that *LinUProp* is consistent with the sampling-based method but with linear scalability and fast convergence.\nMoreover, *LinUProp* outperforms competitors in uncertainty-based active learning on four real-world graph datasets, achieving higher accuracy with a lower labeling budget.", "keywords": "graphical models;belief propagation;uncertainty quantification", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Chenghua Guo;Han Yu;Jiaxin Liu;Chao Chen;Qi Li;Sihong Xie;Xi Zhang", "authorids": "~Chenghua_Guo1;~Han_Yu8;~Jiaxin_Liu3;~Chao_Chen14;~Qi_Li14;~Sihong_Xie1;~Xi_Zhang12", "gender": "M;F;F;;F;M;M", "homepage": "https://github.com/chenghuaguo;https://hanyu1999.github.io;;;https://sites.google.com/iastate.edu/qili/;https://sihongxie.github.io/index.html;https://www.linkedin.com/in/xi-zhang-a1128b51/", "dblp": "337/0557;;;;181/2688-12;67/1229;87/1222-8", "google_scholar": ";;Qf_tzfMAAAAJ;;Gvld0foAAAAJ;qRp1xZwAAAAJ;6sRtx0cAAAAJ", "orcid": ";;0009-0007-8474-9025;;0000-0002-3136-2157;0000-0003-1060-8506;0000-0002-2111-7385", "linkedin": ";;jiaxin-liu-99848a168/;;;;", "or_profile": "~Chenghua_Guo1;~Han_Yu8;~Jiaxin_Liu3;~Chao_Chen14;~Qi_Li14;~Sihong_Xie1;~Xi_Zhang12", "aff": "Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;Lehigh University;;Iowa State University;HKUST-GZ;Beijing University of Posts and Telecommunications", "aff_domain": "bupt.edu.cn;bupt.edu.cn;lehigh.edu;;iastate.edu;hkust-gz.edu.cn;bupt.edu.cn", "position": "PhD student;MS student;PhD student;;Assistant Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nguo2024linear,\ntitle={Linear Uncertainty Quantification of Graphical Model Inference},\nauthor={Chenghua Guo and Han Yu and Jiaxin Liu and Chao Chen and Qi Li and Sihong Xie and Xi Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=XOVks7JHQA}\n}", "github": "", "reviewers": "G7cz;rPk6;XubP;HCKJ", "pdf_size": 1947153, "rating": "6;7;7;7", "confidence": "3;3;4;4", "soundness": "2;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;4;3", "wc_summary": "31;71;60;57", "wc_strengths": "39;7;241;108", "wc_weaknesses": "113;38;51;66", "wc_questions": "17;201;45;39", "wc_limitations": "1;59;4;78", "wc_review": "201;376;401;348", "wc_reply_reviewers": "54;562;0;19", "wc_reply_authors": "30;1620;0;18", "reply_reviewers": "1;4;0;1", "reply_authors": "2;5;1;2", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 54.75, 14.669270602180601 ], "wc_strengths_avg": [ 98.75, 89.87317452944454 ], "wc_weaknesses_avg": [ 67.0, 28.34607556611673 ], "wc_questions_avg": [ 75.5, 73.20348352366847 ], "wc_limitations_avg": [ 35.5, 33.69347117766289 ], "wc_review_avg": [ 331.5, 77.6418057492225 ], "wc_reply_reviewers_avg": [ 158.75, 233.62081991980082 ], "wc_reply_authors_avg": [ 417.0, 694.6344362324689 ], "reply_reviewers_avg": [ 1.5, 1.5 ], "reply_authors_avg": [ 2.5, 1.5 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:QriKi8G_v6wJ:scholar.google.com/&scioq=Linear+Uncertainty+Quantification+of+Graphical+Model+Inference&hl=en&as_sdt=0,44", "gs_version_total": 3, "email": "bupt.edu.cn;bupt.edu.cn;lehigh.edu;;iastate.edu;hkust-gz.edu.cn;bupt.edu.cn", "author_num": 7, "aff_unique_index": "0;0;1;2;3;0", "aff_unique_norm": "Beijing University of Posts and Telecommunications;Lehigh University;Iowa State University;Hong Kong University of Science and Technology (Guangzhou)", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.bupt.edu.cn/;https://www.lehigh.edu;https://www.iastate.edu;https://www.ust.hk", "aff_unique_abbr": "BUPT;Lehigh;ISU;HKUST", "aff_campus_unique_index": "0;0;2;0", "aff_campus_unique": "Beijing;;Guangzhou", "aff_country_unique_index": "0;0;1;1;0;0", "aff_country_unique": "China;United States" }, { "title": "Probabilistic Decomposed Linear Dynamical Systems for Robust Discovery of Latent Neural Dynamics", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94770", "id": "XPhSbybD73", "proceeding": "", "pdf": "https://openreview.net/pdf?id=XPhSbybD73", "openreview": "https://openreview.net/forum?id=XPhSbybD73", "poster": "/media/PosterPDFs/NeurIPS%202024/94770.png?t=1731363546.3031154", "project": "", "author_site": "Yenho Chen, Noga Mudrik, Kyle A. Johnsen, Sankaraleengam Alagapan, Adam Charles, Christopher Rozell", "tldr": "", "abstract": "Time-varying linear state-space models are powerful tools for obtaining mathematically interpretable representations of neural signals. For example, switching and decomposed models describe complex systems using latent variables that evolve according to simple locally linear dynamics. However, existing methods for latent variable estimation are not robust to dynamical noise and system nonlinearity due to noise-sensitive inference procedures and limited model formulations. This can lead to inconsistent results on signals with similar dynamics, limiting the model's ability to provide scientific insight. In this work, we address these limitations and propose a probabilistic approach to latent variable estimation in decomposed models that improves robustness against dynamical noise. Additionally, we introduce an extended latent dynamics model to improve robustness against system nonlinearities. We evaluate our approach on several synthetic dynamical systems, including an empirically-derived brain-computer interface experiment, and demonstrate more accurate latent variable inference in nonlinear systems with diverse noise conditions. Furthermore, we apply our method to a real-world clinical neurophysiology dataset, illustrating the ability to identify interpretable and coherent structure where previous models cannot.", "keywords": "Computational Neuroscience;Probabilistic Modeling;State Space Models;Dynamical Systems", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "/attachment/c45a279ca56d64e65b5ec32d343926d7ff1e78d6.zip", "author": "Yenho Chen;Noga Mudrik;Kyle A. Johnsen;Sankaraleengam Alagapan;Adam Shabti Charles;Christopher John Rozell", "authorids": "~Yenho_Chen1;~Noga_Mudrik1;~Kyle_A._Johnsen1;~Sankaraleengam_Alagapan1;~Adam_Shabti_Charles1;~Christopher_John_Rozell1", "gender": "M;;M;M;M;M", "homepage": ";;https://kjohnsen.org;https://www.sankaralagapan.com;https://www.bme.jhu.edu/ascharles/;http://siplab.gatech.edu/rozell.html", "dblp": ";;;;04/10257;04/2617", "google_scholar": "eb6I8-MAAAAJ;;i2g9U_gAAAAJ;K0dED3QAAAAJ;c8RKLp0AAAAJ;JHuo2D0AAAAJ", "orcid": ";;0000-0002-3840-6140;0000-0002-2056-5450;;", "linkedin": "yenhochen/;;kyle-johnsen/;;;", "or_profile": "~Yenho_Chen1;~Noga_Mudrik1;~Kyle_A._Johnsen1;~Sankaraleengam_Alagapan1;~Adam_Shabti_Charles1;~Christopher_John_Rozell1", "aff": "Georgia Institute of Technology;;Georgia Institute of Technology;Georgia Institute of Technology;Johns Hopkins University;Georgia Institute of Technology", "aff_domain": "gatech.edu;;gatech.edu;gatech.edu;jhu.edu;gatech.edu", "position": "PhD student;;PhD student;Researcher;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nchen2024probabilistic,\ntitle={Probabilistic Decomposed Linear Dynamical Systems for Robust Discovery of Latent Neural Dynamics},\nauthor={Yenho Chen and Noga Mudrik and Kyle A. Johnsen and Sankaraleengam Alagapan and Adam Shabti Charles and Christopher John Rozell},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=XPhSbybD73}\n}", "github": "", "reviewers": "3hfN;mAN7;M5AA;jjeY", "pdf_size": 4875974, "rating": "5;7;7;7", "confidence": "2;5;3;4", "soundness": "3;4;3;3", "novelty": "2;3;3;2", "presentation": "3;4;3;4", "wc_summary": "84;69;66;87", "wc_strengths": "24;108;88;141", "wc_weaknesses": "63;68;241;17", "wc_questions": "53;371;139;246", "wc_limitations": "3;8;16;10", "wc_review": "227;624;550;501", "wc_reply_reviewers": "0;70;29;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 76.5, 9.12414379544733 ], "wc_strengths_avg": [ 90.25, 42.67537346058028 ], "wc_weaknesses_avg": [ 97.25, 85.34159302473795 ], "wc_questions_avg": [ 202.25, 119.02389465985391 ], "wc_limitations_avg": [ 9.25, 4.656984002549289 ], "wc_review_avg": [ 475.5, 150.0041666087979 ], "wc_reply_reviewers_avg": [ 24.75, 28.682529525828087 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.7745966692414834, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17712710762482524180&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "gatech.edu;;gatech.edu;gatech.edu;jhu.edu;gatech.edu", "author_num": 6, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Georgia Institute of Technology;Johns Hopkins University", "aff_unique_dep": ";", "aff_unique_url": "https://www.gatech.edu;https://www.jhu.edu", "aff_unique_abbr": "Georgia Tech;JHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Fine-Tuning is Fine, if Calibrated", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94769", "id": "XRJXKBeeTD", "proceeding": "", "pdf": "https://openreview.net/pdf?id=XRJXKBeeTD", "openreview": "https://openreview.net/forum?id=XRJXKBeeTD", "poster": "", "project": "", "author_site": "Zheda Mai, Arpita Chowdhury, Ping Zhang, Cheng-Hao Tu, Hong-You Chen, Vardaan Pahuja, Tanya Berger-Wolf, Song Gao, Charles Stewart, Yu Su, Wei-Lun (Harry) Chao", "tldr": "", "abstract": "Fine-tuning is arguably the most straightforward way to tailor a pre-trained model (e.g., a foundation model) to downstream applications, but it also comes with the risk of losing valuable knowledge the model had learned in pre-training. For example, fine-tuning a pre-trained classifier capable of recognizing a large number of classes to master a subset of classes at hand is shown to drastically degrade the model's accuracy in the other classes it had previously learned. As such, it is hard to further use the fine-tuned model when it encounters classes beyond the fine-tuning data. In this paper, we systematically dissect the issue, aiming to answer the fundamental question, \"What has been damaged in the fine-tuned model?\" To our surprise, we find that the fine-tuned model neither forgets the relationship among the other classes nor degrades the features to recognize these classes. Instead, the fine-tuned model often produces more discriminative features for these other classes, even if they were missing during fine-tuning! What really hurts the accuracy is the discrepant logit scales between the fine-tuning classes and the other classes, implying that a simple post-processing calibration would bring back the pre-trained model's capability and at the same time unveil the feature improvement over all classes. We conduct an extensive empirical study to demonstrate the robustness of our findings and provide preliminary explanations underlying them, suggesting new directions for future theoretical analysis.", "keywords": "Fine-Tuning;Pre-training;Domain Adaptation", "primary_area": "machine_vision", "supplementary_material": "", "author": "Zheda Mai;Arpita Chowdhury;Ping Zhang;Cheng-Hao Tu;Hong-You Chen;Vardaan Pahuja;Tanya Berger-Wolf;Song Gao;Charles Stewart;Yu Su;Wei-Lun Chao", "authorids": "~Zheda_Mai1;~Arpita_Chowdhury1;~Ping_Zhang6;~Cheng-Hao_Tu1;~Hong-You_Chen1;~Vardaan_Pahuja1;~Tanya_Berger-Wolf2;~Song_Gao3;~Charles_Stewart1;~Yu_Su2;~Wei-Lun_Chao1", "gender": "M;F;M;M;;M;F;M;M;M;M", "homepage": "https://zheda-mai.github.io/;;https://github.com/pizhn;https://andytu28.github.io/;https://sites.google.com/view/hongyouc/%E9%A6%96%E9%A0%81;https://vardaan123.github.io/;https://cse.osu.edu/people/berger-wolf.1;https://geography.wisc.edu/geods/people;https://www.cs.rpi.edu/~stewart;http://ysu1989.github.io;https://sites.google.com/view/wei-lun-harry-chao", "dblp": "270/0552;;;116/8913-4;228/5569;188/3398;b/TYBergerWolf;92/357-1;43/471;38/1070-1;64/8842", "google_scholar": "FT3oT6EAAAAJ;7eHA9IAAAAAJ;;cZ87u54AAAAJ;uxlU7J8AAAAJ;https://scholar.google.ca/citations?user=0O6NKfIAAAAJ;fDQUHyIAAAAJ;eLdz_6IAAAAJ;https://scholar.google.com/citations?hl=en;rIh5OqoAAAAJ;PGKakWwAAAAJ", "orcid": ";;;;;;;0000-0003-4359-6302;;;0000-0003-1269-7231", "linkedin": ";arpita-chowdhury/;;;;;;;;;", "or_profile": "~Zheda_Mai1;~Arpita_Chowdhury1;~Ping_Zhang6;~Cheng-Hao_Tu1;~Hong-You_Chen1;~Vardaan_Pahuja1;~Tanya_Berger-Wolf2;~Song_Gao3;~Charles_Stewart1;~Yu_Su2;~Wei-Lun_Chao1", "aff": "Ohio State University, Columbus;Ohio State University, Columbus;The Ohio State University;Appier;Apple AI/ML;The Ohio State University, Columbus;Ohio State University;University of Wisconsin - Madison;Rensselaer Polytechnic Institute;Microsoft;Ohio State University", "aff_domain": "osu.edu;osu.edu;osu.edu;appier.com;apple.com;osu.edu;osu.edu;wisc.edu;cs.rpi.edu;microsoft.com;osu.edu", "position": "PhD student;PhD student;PhD student;Researcher;Researcher;PhD student;Professor;Associate Professor;Professor;Senior Researcher;Assistant Professor", "bibtex": "@inproceedings{\nmai2024finetuning,\ntitle={Fine-Tuning is Fine, if Calibrated},\nauthor={Zheda Mai and Arpita Chowdhury and Ping Zhang and Cheng-Hao Tu and Hong-You Chen and Vardaan Pahuja and Tanya Berger-Wolf and Song Gao and Charles Stewart and Yu Su and Wei-Lun Chao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=XRJXKBeeTD}\n}", "github": "", "reviewers": "YzGv;5T9R;amJX;9Ei3", "pdf_size": 17640996, "rating": "5;5;6;8", "confidence": "4;4;4;4", "soundness": "3;2;3;4", "novelty": "3;3;3;4", "presentation": "3;1;3;4", "wc_summary": "111;163;75;104", "wc_strengths": "108;66;40;44", "wc_weaknesses": "145;424;102;39", "wc_questions": "81;30;1;5", "wc_limitations": "10;7;1;7", "wc_review": "455;690;219;199", "wc_reply_reviewers": "20;20;0;69", "wc_reply_authors": "124;130;100;55", "reply_reviewers": "1;1;0;1", "reply_authors": "3;3;2;2", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 113.25, 31.736217480979047 ], "wc_strengths_avg": [ 64.5, 26.995369973386175 ], "wc_weaknesses_avg": [ 177.5, 147.22516768541988 ], "wc_questions_avg": [ 29.25, 31.877695964420013 ], "wc_limitations_avg": [ 6.25, 3.2691742076555053 ], "wc_review_avg": [ 390.75, 199.96546576846714 ], "wc_reply_reviewers_avg": [ 27.25, 25.449705302812447 ], "wc_reply_authors_avg": [ 102.25, 29.498940658945703 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3655074447193308373&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "osu.edu;osu.edu;osu.edu;appier.com;apple.com;osu.edu;osu.edu;wisc.edu;cs.rpi.edu;microsoft.com;osu.edu", "author_num": 11, "aff_unique_index": "0;0;0;1;2;0;0;3;4;5;0", "aff_unique_norm": "Ohio State University;Appier;Apple;University of Wisconsin-Madison;Rensselaer Polytechnic Institute;Microsoft", "aff_unique_dep": ";;AI/ML;;;Microsoft Corporation", "aff_unique_url": "https://www.osu.edu;https://www.appier.com;https://www.apple.com;https://www.wisc.edu;https://www.rpi.edu;https://www.microsoft.com", "aff_unique_abbr": "OSU;Appier;Apple;UW-Madison;RPI;Microsoft", "aff_campus_unique_index": "0;0;2;0;3", "aff_campus_unique": "Columbus;;Taiwan;Madison", "aff_country_unique_index": "0;0;0;1;0;0;0;0;0;0;0", "aff_country_unique": "United States;China" }, { "title": "Continuous Product Graph Neural Networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94768", "id": "XRNN9i1xpi", "proceeding": "", "pdf": "https://openreview.net/pdf?id=XRNN9i1xpi", "openreview": "https://openreview.net/forum?id=XRNN9i1xpi", "poster": "", "project": "", "author_site": "Aref Einizade, Fragkiskos Malliaros, Jhony H. Giraldo", "tldr": "", "abstract": "Processing multidomain data defined on multiple graphs holds significant potential in various practical applications in computer science. However, current methods are mostly limited to discrete graph filtering operations. Tensorial partial differential equations on graphs (TPDEGs) provide a principled framework for modeling structured data across multiple interacting graphs, addressing the limitations of the existing discrete methodologies. In this paper, we introduce Continuous Product Graph Neural Networks (CITRUS) that emerge as a natural solution to the TPDEG. CITRUS leverages the separability of continuous heat kernels from Cartesian graph products to efficiently implement graph spectral decomposition. We conduct thorough theoretical analyses of the stability and over-smoothing properties of CITRUS in response to domain-specific graph perturbations and graph spectra effects on the performance. We evaluate CITRUS on well-known traffic and weather spatiotemporal forecasting datasets, demonstrating superior performance over existing approaches. The implementation codes are available at https://github.com/ArefEinizade2/CITRUS.", "keywords": "Continuous graph neural networks;product graphs;Cartesian product", "primary_area": "graph_neural_networks", "supplementary_material": "/attachment/05fea0e9fd502b430dfe081eb8a5bca8738476f0.zip", "author": "Aref Einizade;Fragkiskos D. Malliaros;Jhony H. Giraldo", "authorids": "~Aref_Einizade1;~Fragkiskos_D._Malliaros1;~Jhony_H._Giraldo1", "gender": "M;M;M", "homepage": ";http://fragkiskos.me/;https://jhonygiraldo.github.io/", "dblp": "299/0292;22/9458;170/0084", "google_scholar": "ZEQyAaAAAAAJ;_7heOKcAAAAJ;https://scholar.google.fr/citations?user=iwzmGKwAAAAJ", "orcid": ";;0000-0002-0039-1270", "linkedin": ";;jhony-heriberto-giraldo-zuluaga-20b195157/", "or_profile": "~Aref_Einizade1;~Fragkiskos_D._Malliaros1;~Jhony_Heriberto_Giraldo_Zuluaga1", "aff": "T\u00e9l\u00e9com ParisTech;CentraleSup\u00e9lec, Inria, Paris-Saclay University;T\u00e9l\u00e9com Paris, Institut Polytechnique de Paris", "aff_domain": "telecom-paristech.fr;centralesupelec.fr;telecom-paristech.fr", "position": "Postdoc;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\neinizade2024continuous,\ntitle={Continuous Product Graph Neural Networks},\nauthor={Aref Einizade and Fragkiskos D. Malliaros and Jhony H. Giraldo},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=XRNN9i1xpi}\n}", "github": "", "reviewers": "Y9UP;QFxx;gs8b;wQ9d", "pdf_size": 647818, "rating": "5;5;6;6", "confidence": "4;3;3;2", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "3;3;2;3", "wc_summary": "57;41;93;47", "wc_strengths": "26;28;73;25", "wc_weaknesses": "220;37;156;2", "wc_questions": "117;102;111;28", "wc_limitations": "9;1;1;15", "wc_review": "429;209;434;117", "wc_reply_reviewers": "172;44;206;9", "wc_reply_authors": "674;303;850;0", "reply_reviewers": "1;1;2;1", "reply_authors": "3;2;3;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 59.5, 20.168044030098706 ], "wc_strengths_avg": [ 38.0, 20.23610634484806 ], "wc_weaknesses_avg": [ 103.75, 88.10895243957903 ], "wc_questions_avg": [ 89.5, 35.90612761075747 ], "wc_limitations_avg": [ 6.5, 5.894913061275798 ], "wc_review_avg": [ 297.25, 138.14553014846337 ], "wc_reply_reviewers_avg": [ 107.75, 83.06134780991698 ], "wc_reply_authors_avg": [ 456.75, 329.4323716637453 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.7071067811865475, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9103551278844851567&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "telecom-paristech.fr;centralesupelec.fr;telecom-paristech.fr", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "T\u00e9l\u00e9com ParisTech;CentraleSup\u00e9lec;T\u00e9l\u00e9com Paris", "aff_unique_dep": ";;", "aff_unique_url": "https://www.telecom-paristech.fr;https://www.centralesupelec.fr;https://www.telecom-paris.fr", "aff_unique_abbr": "TP;CentraleSup\u00e9lec;T\u00e9l\u00e9com Paris", "aff_campus_unique_index": "1", "aff_campus_unique": ";Paris-Saclay", "aff_country_unique_index": "0;0;0", "aff_country_unique": "France" }, { "title": "PhoCoLens: Photorealistic and Consistent Reconstruction in Lensless Imaging", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94767", "id": "XUAcPEaeBU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=XUAcPEaeBU", "openreview": "https://openreview.net/forum?id=XUAcPEaeBU", "poster": "/media/PosterPDFs/NeurIPS%202024/94767.png?t=1731688636.9839873", "project": "", "author_site": "Xin Cai, Zhiyuan You, Hailong Zhang, Jinwei Gu, Wentao Liu, Tianfan Xue", "tldr": "", "abstract": "Lensless cameras offer significant advantages in size, weight, and cost compared to traditional lens-based systems. Without a focusing lens, lensless cameras rely on computational algorithms to recover the scenes from multiplexed measurements. However, current algorithms struggle with inaccurate forward imaging models and insufficient priors to reconstruct high-quality images. To overcome these limitations, we introduce a novel two-stage approach for consistent and photorealistic lensless image reconstruction. The first stage of our approach ensures data consistency by focusing on accurately reconstructing the low-frequency content with a spatially varying deconvolution method that adjusts to changes in the Point Spread Function (PSF) across the camera's field of view. The second stage enhances photorealism by incorporating a generative prior from pre-trained diffusion models. By conditioning on the low-frequency content retrieved in the first stage, the diffusion model effectively reconstructs the high-frequency details that are typically lost in the lensless imaging process, while also maintaining image fidelity. Our method achieves a superior balance between data fidelity and visual quality compared to existing methods, as demonstrated with two popular lensless systems, PhlatCam and DiffuserCam.", "keywords": "Lensless Imaging; Computional Imaging; Generative Prior", "primary_area": "machine_vision", "supplementary_material": "", "author": "Xin Cai;Zhiyuan You;Hailong Zhang;Jinwei Gu;Wentao Liu;Tianfan Xue", "authorids": "~Xin_Cai2;~Zhiyuan_You1;~Hailong_Zhang1;~Jinwei_Gu1;~Wentao_Liu1;~Tianfan_Xue2", "gender": "M;M;M;M;M;M", "homepage": "https://caixin98.github.io/;https://zhiyuanyou.github.io/;;http://www.gujinwei.org;;https://tianfan.info", "dblp": ";312/5150;;61/1140;30/3943-2;54/8652", "google_scholar": "Uf-cuH4AAAAJ;Gwo9O8sAAAAJ;qqqwC7IAAAAJ;k_T8t30AAAAJ;KZn9NWEAAAAJ;RfSQKrIAAAAJ", "orcid": ";;;0000-0001-8705-8237;;0000-0001-5031-6618", "linkedin": ";;;;;tianfan-xue-54016716", "or_profile": "~Xin_Cai2;~Zhiyuan_You1;~Hailong_Zhang1;~Jinwei_Gu1;~Wentao_Liu1;~Tianfan_Xue2", "aff": "The Chinese University of Hong Kong;The Chinese University of Hong Kong;Tsinghua University;Department of Computer Science and Engineering, The Chinese University of Hong Kong;Sensetime;The Chinese University of Hong Kong", "aff_domain": "cuhk.edu.hk;ie.cuhk.edu;mails.tsinghua.edu.cn;cse.cuhk.edu.hk;sensetime.com;cuhk.edu.hk", "position": "PhD student;PhD student;PhD student;Associate Professor;Senior Researcher;Assistant Professor", "bibtex": "@inproceedings{\ncai2024phocolens,\ntitle={PhoCoLens: Photorealistic and Consistent Reconstruction in Lensless Imaging},\nauthor={Xin Cai and Zhiyuan You and Hailong Zhang and Jinwei Gu and Wentao Liu and Tianfan Xue},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=XUAcPEaeBU}\n}", "github": "", "reviewers": "pMG3;Ernw;4jsT;FAra", "pdf_size": 9663546, "rating": "6;6;8;9", "confidence": "4;4;5;4", "soundness": "3;2;4;4", "novelty": "3;3;4;4", "presentation": "3;2;4;4", "wc_summary": "75;115;24;45", "wc_strengths": "90;32;103;131", "wc_weaknesses": "172;89;102;77", "wc_questions": "13;113;291;128", "wc_limitations": "5;76;8;30", "wc_review": "355;425;528;411", "wc_reply_reviewers": "181;69;6;0", "wc_reply_authors": "612;25;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;1;1", "rating_avg": [ 7.25, 1.299038105676658 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 64.75, 34.208003449485325 ], "wc_strengths_avg": [ 89.0, 36.09016486523718 ], "wc_weaknesses_avg": [ 110.0, 36.871398129173244 ], "wc_questions_avg": [ 136.25, 99.68293484844835 ], "wc_limitations_avg": [ 29.75, 28.39344114403888 ], "wc_review_avg": [ 429.75, 62.47949663689681 ], "wc_reply_reviewers_avg": [ 64.0, 72.7564430136603 ], "wc_reply_authors_avg": [ 159.25, 261.5945096900927 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=852631418238028034&as_sdt=20005&sciodt=0,9&hl=en", "gs_version_total": 5, "email": "cuhk.edu.hk;ie.cuhk.edu;mails.tsinghua.edu.cn;cse.cuhk.edu.hk;sensetime.com;cuhk.edu.hk", "author_num": 6, "aff_unique_index": "0;0;1;0;2;0", "aff_unique_norm": "Chinese University of Hong Kong;Tsinghua University;SenseTime", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.tsinghua.edu.cn;https://www.sensetime.com", "aff_unique_abbr": "CUHK;THU;SenseTime", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "The Collusion of Memory and Nonlinearity in Stochastic Approximation With Constant Stepsize", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94766", "id": "XUL75cvHL5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=XUL75cvHL5", "openreview": "https://openreview.net/forum?id=XUL75cvHL5", "poster": "/media/PosterPDFs/NeurIPS%202024/94766.png?t=1731468042.3282084", "project": "", "author_site": "Dongyan Lucy Huo, Yixuan Zhang, Yudong Chen, Qiaomin Xie", "tldr": "", "abstract": "In this work, we investigate stochastic approximation (SA) with Markovian data and nonlinear updates under constant stepsize $\\alpha>0$. Existing work has primarily focused on either i.i.d. data or linear update rules. We take a new perspective and carefully examine the simultaneous presence of Markovian dependency of data and nonlinear update rules, delineating how the interplay between these two structures leads to complications that are not captured by prior techniques. By leveraging the smoothness and recurrence properties of the SA updates, we develop a fine-grained analysis of the correlation between the SA iterates $\\theta_k$ and Markovian data $x_k$. This enables us to overcome the obstacles in existing analysis and establish for the first time the weak convergence of the joint process $(x_k, \\theta_k)$. Furthermore, we present a precise characterization of the asymptotic bias of the SA iterates, given by $\\mathbb{E}[\\theta_\\infty]-\\theta^\\ast=\\alpha(b_\\textup{m}+b_\\textup{n}+b_\\textup{c})+\\mathcal{O}(\\alpha^{3/2})$. Here, $b_\\textup{m}$ is associated with the Markovian noise, $b_\\textup{n}$ is tied to the nonlinearity of the SA operator, and notably, $b_\\textup{c}$ represents a multiplicative interaction between the Markovian noise and the nonlinearity of the operator, which is absent in previous works. As a by-product of our analysis, we derive finite-time bounds on higher moment $\\mathbb{E}[||\\theta_k-\\theta^\\ast||^{2p}]$ and present non-asymptotic geometric convergence rates for the iterates, along with a Central Limit Theorem.", "keywords": "stochastic approximation;nonlinearity;Markov chain;weak convergence;Wasserstein metric;asymptotic bias;Richardson-Romberg extrapolation;Polyak-Ruppert averaging", "primary_area": "optimization", "supplementary_material": "/attachment/429eace04ae7b1ff63cba87c2c8d216974093b15.zip", "author": "Dongyan Lucy Huo;Yixuan Zhang;Yudong Chen;Qiaomin Xie", "authorids": "~Dongyan_Lucy_Huo1;~Yixuan_Zhang3;~Yudong_Chen1;~Qiaomin_Xie1", "gender": "F;M;M;F", "homepage": ";https://sites.google.com/wisc.edu/yixuanzhang/;https://pages.cs.wisc.edu/~yudongchen/;https://qiaominxie.github.io/", "dblp": "330/5388;;15/1975-1;37/10269", "google_scholar": ";https://scholar.google.com/citations?hl=en;ze5rCdwAAAAJ;RVNcy4EAAAAJ", "orcid": "0000-0003-4086-2388;;0000-0002-6416-5635;", "linkedin": ";yixuan-zhang-ab297b209/;;", "or_profile": "~Dongyan_Lucy_Huo1;~Yixuan_Zhang3;~Yudong_Chen1;~Qiaomin_Xie1", "aff": "Cornell University;University of Wisconsin - Madison;Department of Computer Sciences, University of Wisconsin - Madison;University of Wisconsin - Madison", "aff_domain": "cornell.edu;wisc.edu;cs.wisc.edu;wisc.edu", "position": "PhD student;PhD student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nhuo2024the,\ntitle={The Collusion of Memory and Nonlinearity in Stochastic Approximation With Constant Stepsize},\nauthor={Dongyan Lucy Huo and Yixuan Zhang and Yudong Chen and Qiaomin Xie},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=XUL75cvHL5}\n}", "github": "", "reviewers": "o1Ug;9yxV;gRG8;3y4U", "pdf_size": 1142855, "rating": "5;7;7;7", "confidence": "3;4;4;2", "soundness": "2;3;2;3", "novelty": "2;3;3;3", "presentation": "3;3;2;3", "wc_summary": "115;91;91;40", "wc_strengths": "18;55;65;45", "wc_weaknesses": "54;360;66;34", "wc_questions": "23;69;162;58", "wc_limitations": "1;11;21;1", "wc_review": "211;586;405;178", "wc_reply_reviewers": "12;11;170;25", "wc_reply_authors": "0;0;16;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;2;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 84.25, 27.36215452043205 ], "wc_strengths_avg": [ 45.75, 17.512495538900218 ], "wc_weaknesses_avg": [ 128.5, 134.1445116283182 ], "wc_questions_avg": [ 78.0, 51.38579570270368 ], "wc_limitations_avg": [ 8.5, 8.2915619758885 ], "wc_review_avg": [ 345.0, 163.9557867231285 ], "wc_reply_reviewers_avg": [ 54.5, 66.91225597751132 ], "wc_reply_authors_avg": [ 4.0, 6.928203230275509 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18135208165321137110&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 4, "email": "cornell.edu;wisc.edu;cs.wisc.edu;wisc.edu", "author_num": 4, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Cornell University;University of Wisconsin-Madison", "aff_unique_dep": ";", "aff_unique_url": "https://www.cornell.edu;https://www.wisc.edu", "aff_unique_abbr": "Cornell;UW-Madison", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Madison", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Optimistic Critic Reconstruction and Constrained Fine-Tuning for General Offline-to-Online RL", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94765", "id": "XVfevb9XFx", "proceeding": "", "pdf": "https://openreview.net/pdf?id=XVfevb9XFx", "openreview": "https://openreview.net/forum?id=XVfevb9XFx", "poster": "/media/PosterPDFs/NeurIPS%202024/94765.png?t=1731751517.054504", "project": "", "author_site": "Qin-Wen Luo, Ming-Kun Xie, Yewen Wang, Sheng-Jun Huang", "tldr": "", "abstract": "Offline-to-online (O2O) reinforcement learning (RL) provides an effective means of leveraging an offline pre-trained policy as initialization to improve performance rapidly with limited online interactions. Recent studies often design fine-tuning strategies for a specific offline RL method and cannot perform general O2O learning from any offline method. To deal with this problem, we disclose that there are evaluation and improvement mismatches between the offline dataset and the online environment, which hinders the direct application of pre-trained policies to online fine-tuning. In this paper, we propose to handle these two mismatches simultaneously, which aims to achieve general O2O learning from any offline method to any online method. Before online fine-tuning, we re-evaluate the pessimistic critic trained on the offline dataset in an optimistic way and then calibrate the misaligned critic with the reliable offline actor to avoid erroneous update. After obtaining an optimistic and and aligned critic, we perform constrained fine-tuning to combat distribution shift during online learning. We show empirically that the proposed method can achieve stable and efficient performance improvement on multiple simulated tasks when compared to the state-of-the-art methods.", "keywords": "reinforcement learning;offline-to-online", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/4ca7a004abd3a95529099169ca43f21aac61344a.zip", "author": "Qin-Wen Luo;Ming-Kun Xie;Ye-Wen Wang;Sheng-Jun Huang", "authorids": "~Qin-Wen_Luo1;~Ming-Kun_Xie1;~Ye-Wen_Wang2;~Sheng-Jun_Huang1", "gender": "M;M;M;", "homepage": "https://github.com/QinwenLuo;http://www.xiemk.pro/;;http://parnec.nuaa.edu.cn/huangsj", "dblp": "395/6380.html;215/4362;;01/3367.html", "google_scholar": "https://scholar.google.com.hk/citations?hl=zh-CN;https://scholar.google.co.jp/citations?hl=zh-CN;;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";;0009-0004-4147-5341;0000-0002-7673-5367", "linkedin": ";;;", "or_profile": "~Qin-Wen_Luo1;~Ming-Kun_Xie1;~Ye-Wen_Wang2;~Sheng-Jun_Huang1", "aff": "Nanjing University of Aeronautics and Astronautics;Nanjing University of Aeronautics and Astronautics;Nanjing University of Aeronautics and Astronautics;Nanjing University of Aeronautics and Astronautics", "aff_domain": "nuaa.edu.cn;nuaa.edu.cn;nuaa.edu.cn;nuaa.edu.cn", "position": "MS student;PhD student;MS student;Full Professor", "bibtex": "@inproceedings{\nluo2024optimistic,\ntitle={Optimistic Critic Reconstruction and Constrained Fine-Tuning for General Offline-to-Online {RL}},\nauthor={Qin-Wen Luo and Ming-Kun Xie and Ye-Wen Wang and Sheng-Jun Huang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=XVfevb9XFx}\n}", "github": "", "reviewers": "9DG2;zZK3;QxHN;aiGQ", "pdf_size": 1532148, "rating": "6;6;6;7", "confidence": "4;1;4;4", "soundness": "3;3;3;4", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "82;48;75;66", "wc_strengths": "117;11;49;44", "wc_weaknesses": "54;20;343;84", "wc_questions": "25;40;8;1", "wc_limitations": "8;4;4;1", "wc_review": "286;123;479;196", "wc_reply_reviewers": "0;0;25;18", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 1.299038105676658 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 67.75, 12.735285626950029 ], "wc_strengths_avg": [ 55.25, 38.525154120392564 ], "wc_weaknesses_avg": [ 125.25, 127.74070416276872 ], "wc_questions_avg": [ 18.5, 15.173990905493518 ], "wc_limitations_avg": [ 4.25, 2.48746859276655 ], "wc_review_avg": [ 271.0, 133.24601307356255 ], "wc_reply_reviewers_avg": [ 10.75, 11.031205736455105 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:kgVKtYAL6Q8J:scholar.google.com/&scioq=Optimistic+Critic+Reconstruction+and+Constrained+Fine-Tuning+for+General+Offline-to-Online+RL&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "nuaa.edu.cn;nuaa.edu.cn;nuaa.edu.cn;nuaa.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Nanjing University of Aeronautics and Astronautics", "aff_unique_dep": "", "aff_unique_url": "http://www.nuaa.edu.cn", "aff_unique_abbr": "NUAA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Maia-2: A Unified Model for Human-AI Alignment in Chess", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94764", "id": "XWlkhRn14K", "proceeding": "", "pdf": "https://openreview.net/pdf?id=XWlkhRn14K", "openreview": "https://openreview.net/forum?id=XWlkhRn14K", "poster": "/media/PosterPDFs/NeurIPS%202024/94764.png?t=1732745637.3167117", "project": "", "author_site": "Zhenwei Tang, Difan Jiao, Reid McIlroy-Young, Jon Kleinberg, Siddhartha Sen, Ashton Anderson", "tldr": "", "abstract": "There are an increasing number of domains in which artificial intelligence (AI) systems both surpass human ability and accurately model human behavior. This introduces the possibility of algorithmically-informed teaching in these domains through more relatable AI partners and deeper insights into human decision-making. Critical to achieving this goal, however, is coherently modeling human behavior at various skill levels. Chess is an ideal model system for conducting research into this kind of human-AI alignment, with its rich history as a pivotal testbed for AI research, mature superhuman AI systems like AlphaZero, and precise measurements of skill via chess rating systems. Previous work in modeling human decision-making in chess uses completely independent models to capture human style at different skill levels, meaning they lack coherence in their ability to adapt to the full spectrum of human improvement and are ultimately limited in their effectiveness as AI partners and teaching tools. In this work, we propose a unified modeling approach for human-AI alignment in chess that coherently captures human style across different skill levels and directly captures how people improve. Recognizing the complex, non-linear nature of human learning, we introduce a skill-aware attention mechanism to dynamically integrate players\u2019 strengths with encoded chess positions, enabling our model to be sensitive to evolving player skill. Our experimental results demonstrate that this unified framework significantly enhances the alignment between AI and human players across a diverse range of expertise levels, paving the way for deeper insights into human decision-making and AI-guided teaching tools.", "keywords": "Human-AI Alignment;Action Prediction;Chess;Skill-aware Attention", "primary_area": "human-AI_interaction", "supplementary_material": "", "author": "Zhenwei Tang;Difan Jiao;Reid McIlroy-Young;Jon Kleinberg;Siddhartha Sen;Ashton Anderson", "authorids": "~Zhenwei_Tang1;~Difan_Jiao1;~Reid_McIlroy-Young1;~Jon_Kleinberg3;~Siddhartha_Sen1;~Ashton_Anderson1", "gender": "M;M;M;M;;", "homepage": "https://lilv98.github.io/;;https://reidmcy.com/;http://www.cs.cornell.edu/home/kleinber/;http://sidsen.org;http://www.cs.toronto.edu/~ashton/", "dblp": "271/4450.html;362/0706.html;196/4704;https://dblp.uni-trier.de/pid/k/JonMKleinberg.html;;21/8524", "google_scholar": "R46GZk0AAAAJ;HTuHhzQAAAAJ;https://scholar.google.ca/citations?user=7Tclf3kAAAAJ;VX7d5EQAAAAJ;;https://scholar.google.co.uk/citations?user=FMSltawAAAAJ", "orcid": "0000-0002-8742-9146;;0000-0001-9104-4145;0000-0002-1929-2512;;", "linkedin": "zhenwei-tang-631611250/;difan-jiao/;;;;", "or_profile": "~Zhenwei_Tang1;~Difan_Jiao1;~Reid_McIlroy-Young1;~Jon_Kleinberg3;~Siddhartha_Sen1;~Ashton_Anderson1", "aff": "University of Toronto;Department of Computer Science, University of Toronto;Harvard University;;Microsoft Research;Department of Computer Science, University of Toronto", "aff_domain": "cs.toronto.edu;cs.toronto.edu;harvard.edu;;research.microsoft.com;cs.toronto.edu", "position": "PhD student;PhD student;Postdoc;;Principal Researcher;Assistant Professor", "bibtex": "@inproceedings{\ntang2024maia,\ntitle={Maia-2: A Unified Model for Human-{AI} Alignment in Chess},\nauthor={Zhenwei Tang and Difan Jiao and Reid McIlroy-Young and Jon Kleinberg and Siddhartha Sen and Ashton Anderson},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=XWlkhRn14K}\n}", "github": "", "reviewers": "vWC3;zUsz;n9LM;nN7Z", "pdf_size": 4647279, "rating": "3;4;6;7", "confidence": "4;3;3;4", "soundness": "3;3;3;4", "novelty": "2;2;3;4", "presentation": "3;2;3;3", "wc_summary": "73;173;90;60", "wc_strengths": "84;283;28;23", "wc_weaknesses": "132;225;59;40", "wc_questions": "9;93;96;143", "wc_limitations": "7;172;1;3", "wc_review": "305;946;274;269", "wc_reply_reviewers": "135;286;12;102", "wc_reply_authors": "315;1700;0;5", "reply_reviewers": "1;2;1;1", "reply_authors": "3;4;1;2", "rating_avg": [ 5.0, 1.5811388300841898 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 99.0, 44.02839992550263 ], "wc_strengths_avg": [ 104.5, 105.80288275845795 ], "wc_weaknesses_avg": [ 114.0, 72.70832139445939 ], "wc_questions_avg": [ 85.25, 48.282372559765534 ], "wc_limitations_avg": [ 45.75, 72.92247595906217 ], "wc_review_avg": [ 448.5, 287.56260188000806 ], "wc_reply_reviewers_avg": [ 133.75, 98.75822750535775 ], "wc_reply_authors_avg": [ 505.0, 701.6320260649452 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3204838686011504244&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "cs.toronto.edu;cs.toronto.edu;harvard.edu;;research.microsoft.com;cs.toronto.edu", "author_num": 6, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "University of Toronto;Harvard University;Microsoft", "aff_unique_dep": ";;Microsoft Research", "aff_unique_url": "https://www.utoronto.ca;https://www.harvard.edu;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "U of T;Harvard;MSR", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Toronto", "aff_country_unique_index": "0;0;1;1;0", "aff_country_unique": "Canada;United States" }, { "title": "MoLE: Enhancing Human-centric Text-to-image Diffusion via Mixture of Low-rank Experts", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94763", "id": "XWzw2dsjWd", "proceeding": "", "pdf": "https://openreview.net/pdf?id=XWzw2dsjWd", "openreview": "https://openreview.net/forum?id=XWzw2dsjWd", "poster": "/media/PosterPDFs/NeurIPS%202024/94763.png?t=1730689830.8066347", "project": "", "author_site": "Jie Zhu, Yixiong Chen, Mingyu Ding, Ping Luo, Leye Wang, Jingdong Wang", "tldr": "", "abstract": "Text-to-image diffusion has attracted vast attention due to its impressive image-generation capabilities. However, when it comes to human-centric text-to-image generation, particularly in the context of faces and hands, the results often fall short of naturalness due to insufficient training priors. We alleviate the issue in this work from two perspectives. 1) From the data aspect, we carefully collect a human-centric dataset comprising over one million high-quality human-in-the-scene images and two specific sets of close-up images of faces and hands. These datasets collectively provide a rich prior knowledge base to enhance the human-centric image generation capabilities of the diffusion model. 2) On the methodological front, we propose a simple yet effective method called Mixture of Low-rank Experts (MoLE) by considering low-rank modules trained on close-up hand and face images respectively as experts. This concept draws inspiration from our observation of low-rank refinement, where a low-rank module trained by a customized close-up dataset has the potential to enhance the corresponding image part when applied at an appropriate scale. To validate the superiority of MoLE in the context of human-centric image generation compared to state-of-the-art, we construct two benchmarks and perform evaluations with diverse metrics and human studies. Datasets, model, and code are released at https://sites.google.com/view/mole4diffuser/.", "keywords": "Human-centric generation;diffusion;low-rank;MoE", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Jie Zhu;Yixiong Chen;Mingyu Ding;Ping Luo;Leye Wang;Jingdong Wang", "authorids": "~Jie_Zhu3;~Yixiong_Chen1;~Mingyu_Ding1;~Ping_Luo2;~Leye_Wang1;~Jingdong_Wang1", "gender": "M;M;M;M;M;", "homepage": "https://schuture.github.io/;https://dingmyu.github.io/;https://wangleye.github.io/;https://jingdongwang2017.github.io/;https://scholar.google.com/citations?hl=zh-CN&user=ZL506kEAAAAJ;http://luoping.me/", "dblp": ";188/5243;07/8764;49/3441;;54/4989-2.html", "google_scholar": "bVHYVXQAAAAJ;w4yTWwoAAAAJ;;z5SPCmgAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.hk/citations?hl=en", "orcid": ";0000-0001-6556-8359;;0000-0002-4888-4445;;0000-0002-6685-7950", "linkedin": "yixiong-chen-111b49171/;dingmyu/;;;;", "or_profile": "~Yixiong_Chen1;~Mingyu_Ding1;~Leye_Wang1;~Jingdong_Wang1;~zhu_Jie1;~Luo_Ping2", "aff": "Johns Hopkins University;University of California, Berkeley;Peking University;Baidu;Peking University;The University of Hong Kong", "aff_domain": "jh.edu;berkeley.edu;pku.edu.cn;baidu.com;pku.edu.cn;hku.hk", "position": "PhD student;Postdoc;Assistant Professor;Chief Scientist for Computer Vision;PhD student;Associate Professor", "bibtex": "@inproceedings{\nzhu2024mole,\ntitle={Mo{LE}: Enhancing Human-centric Text-to-image Diffusion via Mixture of Low-rank Experts},\nauthor={Jie Zhu and Yixiong Chen and Mingyu Ding and Ping Luo and Leye Wang and Jingdong Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=XWzw2dsjWd}\n}", "github": "", "reviewers": "AE6c;5LWQ;Rq6G;NVnC", "pdf_size": 18820729, "rating": "5;6;7;7", "confidence": "4;5;5;3", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;4;3", "wc_summary": "77;60;74;78", "wc_strengths": "29;34;95;90", "wc_weaknesses": "122;133;150;82", "wc_questions": "18;99;58;4", "wc_limitations": "1;29;6;12", "wc_review": "247;355;383;266", "wc_reply_reviewers": "0;35;18;21", "wc_reply_authors": "0;89;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 72.25, 7.224091638399945 ], "wc_strengths_avg": [ 62.0, 30.602287496198713 ], "wc_weaknesses_avg": [ 121.75, 25.02373872945448 ], "wc_questions_avg": [ 44.75, 37.06329046374593 ], "wc_limitations_avg": [ 12.0, 10.559356040971437 ], "wc_review_avg": [ 312.75, 57.508151596099836 ], "wc_reply_reviewers_avg": [ 18.5, 12.459935794377111 ], "wc_reply_authors_avg": [ 22.25, 38.53813046840752 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.0909090909090909, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=31327203858596925&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "jh.edu;berkeley.edu;pku.edu.cn;baidu.com;pku.edu.cn;hku.hk", "author_num": 6, "aff_unique_index": "0;1;2;3;2;4", "aff_unique_norm": "Johns Hopkins University;University of California, Berkeley;Peking University;Baidu;University of Hong Kong", "aff_unique_dep": ";;;Baidu, Inc.;", "aff_unique_url": "https://www.jhu.edu;https://www.berkeley.edu;http://www.pku.edu.cn;https://www.baidu.com;https://www.hku.hk", "aff_unique_abbr": "JHU;UC Berkeley;Peking U;Baidu;HKU", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Berkeley;Hong Kong SAR", "aff_country_unique_index": "0;0;1;1;1;1", "aff_country_unique": "United States;China" }, { "title": "Optimus-1: Hybrid Multimodal Memory Empowered Agents Excel in Long-Horizon Tasks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94762", "id": "XXOMCwZ6by", "proceeding": "", "pdf": "https://openreview.net/pdf?id=XXOMCwZ6by", "openreview": "https://openreview.net/forum?id=XXOMCwZ6by", "poster": "/media/PosterPDFs/NeurIPS%202024/94762.png?t=1729667858.5414798", "project": "", "author_site": "Zaijing Li, Yuquan Xie, Rui Shao, Gongwei Chen, Dongmei Jiang, Liqiang Nie", "tldr": "", "abstract": "Building a general-purpose agent is a long-standing vision in the field of artificial intelligence. Existing agents have made remarkable progress in many domains, yet they still struggle to complete long-horizon tasks in an open world. We attribute this to the lack of necessary world knowledge and multimodal experience that can guide agents through a variety of long-horizon tasks. In this paper, we propose a Hybrid Multimodal Memory module to address the above challenges. It 1) transforms knowledge into Hierarchical Directed Knowledge Graph that allows agents to explicitly represent and learn world knowledge, and 2) summarises historical information into Abstracted Multimodal Experience Pool that provide agents with rich references for in-context learning. On top of the Hybrid Multimodal Memory module, a multimodal agent, Optimus-1, is constructed with dedicated Knowledge-guided Planner and Experience-Driven Reflector, contributing to a better planning and reflection in the face of long-horizon tasks in Minecraft. Extensive experimental results show that Optimus-1 significantly outperforms all existing agents on challenging long-horizon task benchmarks, and exhibits near human-level performance on many tasks. In addition, we introduce various Multimodal Large Language Models (MLLMs) as the backbone of Optimus-1. Experimental results show that Optimus-1 exhibits strong generalization with the help of the Hybrid Multimodal Memory module, outperforming the GPT-4V baseline on many tasks.", "keywords": "Multimodal Agent;Multimodal Large Language Models;Multimodal In-context Learning", "primary_area": "robotics", "supplementary_material": "/attachment/2da2ea8f7e2e0c249cb277f09a4d63b4a5e01f19.zip", "author": "Zaijing Li;Yuquan Xie;Rui Shao;Gongwei Chen;Dongmei Jiang;Liqiang Nie", "authorids": "~Zaijing_Li1;~Yuquan_Xie1;~Rui_Shao1;~Gongwei_Chen1;~Dongmei_Jiang2;~Liqiang_Nie2", "gender": "M;M;M;M;F;M", "homepage": "https://scholar.google.com/citations?hl=en&user=TDBF2UoAAAAJ;https://github.com/xieyuquanxx;https://rshaojimmy.github.io/;;https://scholar.google.com/citations?user=Awsue7sAAAAJ&hl=en;https://liqiangnie.github.io/index.html", "dblp": "295/8692;;;237/9231;;92/8277", "google_scholar": "TDBF2UoAAAAJ;KO77A2oAAAAJ;https://scholar.google.com/citations?hl=en;Mpg0w3cAAAAJ;Awsue7sAAAAJ;yywVMhUAAAAJ", "orcid": "0000-0001-6409-8623;;0000-0003-0090-9604;0000-0002-0634-6075;;0000-0003-1476-0273", "linkedin": ";;;;;", "or_profile": "~Zaijing_Li1;~Yuquan_Xie1;~Rui_Shao1;~Gongwei_Chen1;~Dongmei_Jiang2;~Liqiang_Nie2", "aff": "Harbin Institute of Technology;Harbin Institute of Technology;Harbin Institute of Technology;Harbin Institute of Technology;Peng Cheng Laboratory;Shandong University", "aff_domain": "stu.hit.edu.cn;stu.hit.edu.cn;hit.edu.cn;hit.edu.cn;pcl.ac.cn;sdu.edu.cn", "position": "PhD student;MS student;Full Professor;Postdoc;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nli2024optimus,\ntitle={Optimus-1: Hybrid Multimodal Memory Empowered Agents Excel in Long-Horizon Tasks},\nauthor={Zaijing Li and Yuquan Xie and Rui Shao and Gongwei Chen and Dongmei Jiang and Liqiang Nie},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=XXOMCwZ6by}\n}", "github": "", "reviewers": "3HCc;wzp9;AfEc;397y;mEzT", "pdf_size": 31393462, "rating": "4;6;6;6;8", "confidence": "4;4;4;4;4", "soundness": "2;1;3;3;3", "novelty": "2;2;3;2;3", "presentation": "3;1;3;3;3", "wc_summary": "32;95;96;66;95", "wc_strengths": "87;35;80;44;51", "wc_weaknesses": "83;359;94;67;252", "wc_questions": "2;2;120;4;95", "wc_limitations": "1;28;1;7;61", "wc_review": "205;519;391;188;554", "wc_reply_reviewers": "132;338;26;25;370", "wc_reply_authors": "998;910;0;476;1889", "reply_reviewers": "3;2;1;1;3", "reply_authors": "5;4;1;4;6", "rating_avg": [ 6.0, 1.2649110640673518 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.4, 0.8 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.8000000000000002 ], "wc_summary_avg": [ 76.8, 25.118917174114017 ], "wc_strengths_avg": [ 59.4, 20.441134997841974 ], "wc_weaknesses_avg": [ 171.0, 115.23367563347097 ], "wc_questions_avg": [ 44.6, 51.96768226503853 ], "wc_limitations_avg": [ 19.6, 22.957351763650788 ], "wc_review_avg": [ 371.4, 152.8641226710833 ], "wc_reply_reviewers_avg": [ 178.2, 149.05891452710904 ], "wc_reply_authors_avg": [ 854.6, 627.2631345775073 ], "reply_reviewers_avg": [ 2.0, 0.8944271909999159 ], "reply_authors_avg": [ 4.0, 1.6733200530681511 ], "replies_avg": [ 37, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4560997270384159437&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "stu.hit.edu.cn;stu.hit.edu.cn;hit.edu.cn;hit.edu.cn;pcl.ac.cn;sdu.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;1;2", "aff_unique_norm": "Harbin Institute of Technology;Pengcheng Laboratory;Shandong University", "aff_unique_dep": ";Peng Cheng Laboratory;", "aff_unique_url": "http://www.hit.edu.cn/;http://www.pcl.ac.cn;http://www.sdu.edu.cn", "aff_unique_abbr": "HIT;PCL;SDU", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Harbin;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Training-Free Open-Ended Object Detection and Segmentation via Attention as Prompts", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94761", "id": "XXVfj4P8nr", "proceeding": "", "pdf": "https://openreview.net/pdf?id=XXVfj4P8nr", "openreview": "https://openreview.net/forum?id=XXVfj4P8nr", "poster": "/media/PosterPDFs/NeurIPS%202024/94761.png?t=1729407393.5430236", "project": "", "author_site": "Zhiwei Lin, Yongtao Wang, Zhi Tang", "tldr": "", "abstract": "Existing perception models achieve great success by learning from large amounts of labeled data, but they still struggle with open-world scenarios. To alleviate this issue, researchers introduce open-set perception tasks to detect or segment unseen objects in the training set. However, these models require predefined object categories as inputs during inference, which are not available in real-world scenarios. Recently, researchers pose a new and more practical problem, i.e., open-ended object detection, which discovers unseen objects without any object categories as inputs. In this paper, we present VL-SAM, a training-free framework that combines the generalized object recognition model (i.e., Vision-Language Model) with the generalized object localization model (i.e., Segment-Anything Model), to address the open-ended object detection and segmentation task. Without additional training, we connect these two generalized models with attention maps as the prompts. Specifically, we design an attention map generation module by employing head aggregation and a regularized attention flow to aggregate and propagate attention maps across all heads and layers in VLM, yielding high-quality attention maps. Then, we iteratively sample positive and negative points from the attention maps with a prompt generation module and send the sampled points to SAM to segment corresponding objects. Experimental results on the long-tail instance segmentation dataset (LVIS) show that our method surpasses the previous open-ended method on the object detection task and can provide additional instance segmentation masks. Besides, VL-SAM achieves favorable performance on the corner case object detection dataset (CODA), demonstrating the effectiveness of VL-SAM in real-world applications. Moreover, VL-SAM exhibits good model generalization that can incorporate various VLMs and SAMs.", "keywords": "open-world;open-ended;vision language model;segment anything model;autonomous driving", "primary_area": "machine_vision", "supplementary_material": "", "author": "Zhiwei Lin;Yongtao Wang;Zhi Tang", "authorids": "~Zhiwei_Lin1;~Yongtao_Wang1;~Zhi_Tang2", "gender": "M;M;M", "homepage": ";https://www.icst.pku.edu.cn/xztd/1298696.htm;https://www.wict.pku.edu.cn/cpdp/kydw/ggcy/1297369.htm", "dblp": ";48/4720;16/4222-1", "google_scholar": "ClU7ua0AAAAJ;Zna90HQAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0002-6021-8357", "linkedin": ";;", "or_profile": "~Zhiwei_Lin1;~Yongtao_Wang1;~Zhi_Tang2", "aff": "Peking University;Peking University;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;pku.edu.cn", "position": "PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nlin2024trainingfree,\ntitle={Training-Free Open-Ended Object Detection and Segmentation via Attention as Prompts},\nauthor={Zhiwei Lin and Yongtao Wang and Zhi Tang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=XXVfj4P8nr}\n}", "github": "", "reviewers": "bgFd;6VtQ;71UW", "pdf_size": 4062550, "rating": "4;5;5", "confidence": "4;4;4", "soundness": "3;2;3", "novelty": "2;3;2", "presentation": "2;2;2", "wc_summary": "69;125;44", "wc_strengths": "42;102;71", "wc_weaknesses": "344;408;115", "wc_questions": "38;56;17", "wc_limitations": "16;45;16", "wc_review": "509;736;263", "wc_reply_reviewers": "148;30;19", "wc_reply_authors": "194;0;0", "reply_reviewers": "1;1;1", "reply_authors": "2;1;1", "rating_avg": [ 4.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 79.33333333333333, 33.86574801903671 ], "wc_strengths_avg": [ 71.66666666666667, 24.499433100017278 ], "wc_weaknesses_avg": [ 289.0, 125.78023162113618 ], "wc_questions_avg": [ 37.0, 15.937377450509228 ], "wc_limitations_avg": [ 25.666666666666668, 13.670731102939918 ], "wc_review_avg": [ 502.6666666666667, 193.1533645117843 ], "wc_reply_reviewers_avg": [ 65.66666666666667, 58.3913996711464 ], "wc_reply_authors_avg": [ 64.66666666666667, 91.45247703346013 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14337973116469006800&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "pku.edu.cn;pku.edu.cn;pku.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "MedJourney: Benchmark and Evaluation of Large Language Models over Patient Clinical Journey", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97646", "id": "XXaIoJyYs7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=XXaIoJyYs7", "openreview": "https://openreview.net/forum?id=XXaIoJyYs7", "poster": "", "project": "", "author_site": "Xian Wu, Yutian Zhao, Yunyan Zhang, Jiageng Wu, Zhihong Zhu, Yingying Zhang, Yi Ouyang, Ziheng Zhang, Huimin WANG, zhenxi Lin, Jie Yang, Shuang Zhao, Yefeng Zheng", "tldr": "", "abstract": "Large language models (LLMs) have demonstrated remarkable capabilities in language understanding and generation, leading to their widespread adoption across various fields. Among these, the medical field is particularly well-suited for LLM applications, as many medical tasks can be enhanced by LLMs. Despite the existence of benchmarks for evaluating LLMs in medical question-answering and exams, there remains a notable gap in assessing LLMs' performance in supporting patients throughout their entire hospital visit journey in real-world clinical practice. In this paper, we address this gap by dividing a typical patient's clinical journey into four stages: planning, access, delivery and ongoing care. For each stage, we introduce multiple tasks and corresponding datasets, resulting in a comprehensive benchmark comprising 12 datasets, of which five are newly introduced, and seven are constructed from existing datasets. This proposed benchmark facilitates a thorough evaluation of LLMs' effectiveness across the entire patient journey, providing insights into their practical application in clinical settings. Additionally, we evaluate three categories of LLMs against this benchmark: 1) proprietary LLM services such as GPT-4; 2) public LLMs like QWen; and 3) specialized medical LLMs, like HuatuoGPT2. Through this extensive evaluation, we aim to provide a better understanding of LLMs' performance in the medical domain, ultimately contributing to their more effective deployment in healthcare settings.", "keywords": "LLM;Clinical Jourey;Benchmark;Evaluation;Medical", "primary_area": "", "supplementary_material": "", "author": "Xian Wu;Yutian Zhao;Yunyan Zhang;Jiageng Wu;Zhihong Zhu;Yingying Zhang;Yi Ouyang;Ziheng Zhang;Huimin WANG;Zhenxi Lin;Jie Yang;Shuang Zhao;Yefeng Zheng", "authorids": "~Xian_Wu1;~Yutian_Zhao1;~Yunyan_Zhang2;~Jiageng_Wu1;~Zhihong_Zhu1;~Yingying_Zhang5;~Yi_Ouyang2;~Ziheng_Zhang2;~Huimin_WANG1;~Zhenxi_Lin1;~Jie_Yang13;~Shuang_Zhao4;~Yefeng_Zheng3", "gender": "M;F;;M;;;;M;F;M;M;M;", "homepage": ";https://github.com/RoseZhao929;;;;;;https://github.com/ZihengZZH;;;https://ylab.top;;", "dblp": "03/5595;;;211/3607.html;;;20/1654.html;;58/1340.html;https://dblp.uni-trier.de/pid/266/5093;12/1198-39;;", "google_scholar": "lslB5jkAAAAJ;;https://scholar.google.com/citations?view_op=list_works;vz5MGDoAAAAJ;;;;https://scholar.google.co.uk/citations?user=hQP375oAAAAJ;OD3XsWIAAAAJ;https://scholar.google.com.hk/citations?user=A88nUyEAAAAJ;zHidLmYAAAAJ;;", "orcid": "0000-0003-1118-9710;;;0000-0003-0984-0818;;;;;0000-0002-6147-8310;;0000-0001-5696-363X;0000-0001-7081-6177;", "linkedin": ";;;;;;;;;;jiesutd/;;", "or_profile": "~Xian_Wu1;~Yutian_Zhao1;~Yunyan_Zhang2;~Jiageng_Wu1;~Zhihong_Zhu1;~Yingying_Zhang5;~Yi_Ouyang2;~Ziheng_Zhang2;~Huimin_WANG1;~Zhenxi_Lin1;~Jie_Yang13;~Shuang_Zhao4;~Yefeng_Zheng3", "aff": "Tencent;Tencent AI Lab;Jarvis Research Center, Tencent YouTu Lab;Zhejiang University;;;Tencent;Tencent;Jarvis Research Center, Tencent YouTu Lab;Tencent;Harvard University;Central South University;", "aff_domain": "tencent.com;tencent.com;tencent.com;zju.edu.cn;;;tencent.com;tencent.com;tencent.com;tencent.com;harvard.edu;csu.edu.cn;", "position": "Principal Researcher;Researcher;Researcher;PhD student;;;Researcher;Researcher;Researcher;Researcher;Assistant Professor;Associate Professor;", "bibtex": "@inproceedings{\nwu2024medjourney,\ntitle={MedJourney: Benchmark and Evaluation of Large Language Models over Patient Clinical Journey},\nauthor={Xian Wu and Yutian Zhao and Yunyan Zhang and Jiageng Wu and Zhihong Zhu and Yingying Zhang and Yi Ouyang and Ziheng Zhang and Huimin WANG and Zhenxi Lin and Jie Yang and Shuang Zhao and Yefeng Zheng},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=XXaIoJyYs7}\n}", "github": "", "reviewers": "G1cc;8tiK;rzWr;1xRj;nvGG", "pdf_size": 1118264, "rating": "5;6;6;7;7", "confidence": "4;3;4;4;4", "wc_summary_and_contributions": "95;100;149;92;86", "wc_strengths": "58;61;69;45;3", "wc_improvement": "258;4;160;114;1", "wc_limitations": "116;28;165;128;1", "wc_correctness": "108;22;264;77;1", "wc_clarity": "88;147;54;89;1", "wc_relation_to_prior_work": "156;12;136;8;1", "wc_documentation": "12;1;174;20;1", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "892;376;1172;574;96", "wc_reply_reviewers": "259;228;0;0;0", "wc_reply_authors": "71;29;0;0;0", "reply_reviewers": "1;4;0;0;0", "reply_authors": "5;5;1;2;1", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "wc_summary_and_contributions_avg": [ 104.4, 22.756098083810414 ], "wc_strengths_avg": [ 47.2, 23.412817002659036 ], "wc_improvement_avg": [ 107.4, 97.47122652352334 ], "wc_limitations_avg": [ 87.6, 62.41986863171052 ], "wc_correctness_avg": [ 94.4, 92.97010272125121 ], "wc_clarity_avg": [ 75.8, 47.880685041047606 ], "wc_relation_to_prior_work_avg": [ 62.6, 68.47948597937926 ], "wc_documentation_avg": [ 41.6, 66.5870858350176 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 622.0, 378.1259049575948 ], "wc_reply_reviewers_avg": [ 97.4, 119.69227209807657 ], "wc_reply_authors_avg": [ 20.0, 27.86395521098898 ], "reply_reviewers_avg": [ 1.0, 1.5491933384829668 ], "reply_authors_avg": [ 2.8, 1.8330302779823362 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 13, 0 ], "corr_rating_confidence": 0.13363062095621225, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1087056672913297644&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "tencent.com;tencent.com;tencent.com;zju.edu.cn;;;tencent.com;tencent.com;tencent.com;tencent.com;harvard.edu;csu.edu.cn;", "author_num": 13, "aff_unique_index": "0;0;0;1;0;0;0;0;2;3", "aff_unique_norm": "Tencent;Zhejiang University;Harvard University;Central South University", "aff_unique_dep": "Tencent Holdings Limited;;;", "aff_unique_url": "https://www.tencent.com;https://www.zju.edu.cn;https://www.harvard.edu;https://www.csu.edu.cn", "aff_unique_abbr": "Tencent;ZJU;Harvard;CSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;1;0", "aff_country_unique": "China;United States" }, { "title": "Gradient Rewiring for Editable Graph Neural Network Training", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94760", "id": "XY2qrq7cXM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=XY2qrq7cXM", "openreview": "https://openreview.net/forum?id=XY2qrq7cXM", "poster": "", "project": "", "author_site": "Zhimeng Jiang, Zirui Liu, Xiaotian Han, Qizhang Feng, Hongye Jin, Qiaoyu Tan, Kaixiong Zhou, Na Zou, Xia Hu", "tldr": "", "abstract": "Deep neural networks are ubiquitously adopted in many applications, such as computer vision, natural language processing, and graph analytics. However, well-trained neural networks can make prediction errors after deployment as the world changes. \\textit{Model editing} involves updating the base model to correct prediction errors with less accessible training data and computational resources.\nDespite recent advances in model editors in computer vision and natural language processing, editable training in graph neural networks (GNNs) is rarely explored. The challenge with editable GNN training lies in the inherent information aggregation across neighbors, which can lead model editors to affect the predictions of other nodes unintentionally. In this paper, we first observe the gradient of cross-entropy loss for the target node and training nodes with significant inconsistency, which indicates that directly fine-tuning the base model using the loss on the target node deteriorates the performance on training nodes. Motivated by the gradient inconsistency observation, we propose a simple yet effective \\underline{G}radient \\underline{R}ewiring method for \\underline{E}ditable graph neural network training, named \\textbf{GRE}. Specifically, we first store the anchor gradient of the loss on training nodes to preserve the locality. Subsequently, we rewire the gradient of the loss on the target node to preserve performance on the training node using anchor gradient. Experiments demonstrate the effectiveness of GRE on various model architectures and graph datasets in terms of multiple editing situations. The source code is available at \\url{https://github.com/zhimengj0326/Gradient_rewiring_editing}.", "keywords": "Gradient;Graph Neural Networks", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Zhimeng Jiang;Zirui Liu;Xiaotian Han;Qizhang Feng;Hongye Jin;Qiaoyu Tan;Kaixiong Zhou;Na Zou;Xia Hu", "authorids": "~Zhimeng_Jiang1;~Zirui_Liu1;~Xiaotian_Han1;~Qizhang_Feng1;~Hongye_Jin1;~Qiaoyu_Tan2;~Kaixiong_Zhou1;~Na_Zou2;~Xia_Hu4", "gender": "M;M;M;M;M;M;M;F;M", "homepage": "http://www.zhimengjiang.com/;https://zirui-ray-liu.github.io/;https://ahxt.github.io/;;https://github.com/Mooler0410;https://qiaoyu-tan.github.io/;https://kaixiong-zhou.github.io/;https://nzou1.github.io/;https://cs.rice.edu/~xh37/index.html", "dblp": "217/3235;196/8629-1.html;;323/5667.html;268/7929;197/5465.html;178/7315;152/0090-1.html;256/9406.html", "google_scholar": "5Es3Yk4AAAAJ;https://scholar.google.com/citations?hl=zh-CN;Uromx98AAAAJ;;;V9bOnV4AAAAJ;zMspIjIAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=pcCS60IAAAAJ", "orcid": "0000-0001-6933-3952;;;0000-0002-2574-0270;;0000-0001-8999-968X;0000-0001-5226-8736;0000-0003-1984-795X;", "linkedin": ";;;qizhang-feng-355478197/;;;;na-zou-a1721535/;", "or_profile": "~Zhimeng_Jiang1;~Zirui_Liu1;~Xiaotian_Han1;~Qizhang_Feng1;~Hongye_Jin1;~Qiaoyu_Tan2;~Kaixiong_Zhou1;~Na_Zou2;~Xia_Hu2", "aff": "VISA Research;Rice University;Texas A&M University;Texas A&M;Texas A&M;New York University Shanghai;Massachusetts Institute of Technology;University of Houston;Rice University", "aff_domain": "visa.com;rice.edu;tamu.edu;tamu.edu;tamu.edu;nyu.edu;mit.edu;uh.edu;rice.edu", "position": "Researcher;PhD student;PhD student;PhD student;PhD student;Assistant Professor;Postdoc;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\njiang2024gradient,\ntitle={Gradient Rewiring for Editable Graph Neural Network Training},\nauthor={Zhimeng Jiang and Zirui Liu and Xiaotian Han and Qizhang Feng and Hongye Jin and Qiaoyu Tan and Kaixiong Zhou and Na Zou and Xia Hu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=XY2qrq7cXM}\n}", "github": "", "reviewers": "X56x;KXu3;LHi3;VRYA", "pdf_size": 1133425, "rating": "5;5;5;6", "confidence": "4;3;3;4", "soundness": "3;3;2;3", "novelty": "3;3;2;3", "presentation": "2;2;2;2", "wc_summary": "87;66;83;96", "wc_strengths": "103;95;25;55", "wc_weaknesses": "366;860;38;121", "wc_questions": "8;5;132;57", "wc_limitations": "9;8;8;7", "wc_review": "573;1034;286;336", "wc_reply_reviewers": "50;290;0;57", "wc_reply_authors": "52;85;60;100", "reply_reviewers": "1;1;0;2", "reply_authors": "2;2;2;3", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 83.0, 10.88577052853862 ], "wc_strengths_avg": [ 69.5, 31.476181471074284 ], "wc_weaknesses_avg": [ 346.25, 320.1892994776684 ], "wc_questions_avg": [ 50.5, 51.383363066268835 ], "wc_limitations_avg": [ 8.0, 0.7071067811865476 ], "wc_review_avg": [ 557.25, 295.83219483349 ], "wc_reply_reviewers_avg": [ 99.25, 112.30176979905526 ], "wc_reply_authors_avg": [ 74.25, 19.2142525225417 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=111497535978913014&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "visa.com;rice.edu;tamu.edu;tamu.edu;tamu.edu;nyu.edu;mit.edu;uh.edu;rice.edu", "author_num": 9, "aff_unique_index": "0;1;2;2;2;3;4;5;1", "aff_unique_norm": "VISA;Rice University;Texas A&M University;New York University;Massachusetts Institute of Technology;University of Houston", "aff_unique_dep": "Research;;;;;", "aff_unique_url": "https://www.visa.com/;https://www.rice.edu;https://www.tamu.edu;https://www.nyu.edu;https://web.mit.edu;https://www.uh.edu", "aff_unique_abbr": "VISA;Rice;TAMU;NYU;MIT;UH", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shanghai", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "XYDMAckWMa", "title": "Explicit Flow Matching: On The Theory of Flow Matching Algorithms with Applications", "track": "main", "status": "Reject", "tldr": "", "abstract": "This paper proposes a novel method, Explicit Flow Matching (ExFM), for training and analyzing flow-based generative models. ExFM leverages a theoretically grounded loss function, ExFM loss (a tractable form of Flow Matching (FM) loss), to demonstrably reduce variance during training, leading to faster convergence and more stable learning. Based on theoretical analysis of these formulas, we derived exact expressions for the vector field (and score in stochastic cases) for model examples (in particular, for separating multiple exponents), and in some simple cases, exact solutions for trajectories. In addition, we also investigated simple cases of Diffusion Generative Models by adding a stochastic term and obtained an explicit form of the expression for score. While the paper emphasizes the theoretical underpinnings of ExFM, it also showcases its effectiveness through numerical experiments on various datasets, including high-dimensional ones. Compared to traditional FM methods, ExFM achieves superior performance in terms of both learning speed and final outcomes.", "keywords": "Flow Matching;Deep Learning Theory;Generative modeling;Variance Reduction;Stochastic Differential Equations", "primary_area": "learning_theory", "supplementary_material": "", "author": "Gleb Ryzhakov;Svetlana Pavlova;Egor Sevriugov;Ivan Oseledets", "authorids": "~Gleb_Ryzhakov1;~Svetlana_Pavlova1;~Egor_Sevriugov1;~Ivan_Oseledets1", "gender": "F;M;M;M", "homepage": "https://github.com/Demizur;;http://oseledets.github.io;", "dblp": ";;56/7175;222/9593", "google_scholar": ";https://scholar.google.ru/citations?user=sYnICQkAAAAJ;https://scholar.google.ru/citations?user=5kMqBQEAAAAJ;ZqmOtcwAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Svetlana_Pavlova1;~Egor_Sevriugov1;~Ivan_Oseledets1;~Gleb_Vladimirovich_Ryzhakov1", "aff": "Skolkovo Institute of Science and Technology;Skolkovo Institute of Science and Technology;Institute of Numerical Mathematics;Skolkovo Institute of Science and Technology", "aff_domain": "skoltech.ru;skoltech.ru;inm.ras.ru;skoltech.ru", "position": "PhD student;PhD student;Researcher;Principal Researcher", "bibtex": "@misc{\nanonymous2024explicit,\ntitle={Explicit Flow Matching: On The Theory of Flow Matching Algorithms with Applications},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=XYDMAckWMa}\n}", "github": "", "project": "", "reviewers": "LEat;ctfF;xno6;kwRu", "site": "https://openreview.net/forum?id=XYDMAckWMa", "pdf_size": 6516404, "rating": "2;3;3;5", "confidence": "5;3;4;4", "soundness": "2;1;3;2", "novelty": "1;2;2;3", "presentation": "2;1;1;2", "wc_summary": "128;69;40;49", "wc_strengths": "8;19;10;74", "wc_weaknesses": "430;123;117;548", "wc_questions": "7;163;152;99", "wc_limitations": "21;7;36;79", "wc_review": "594;381;355;849", "wc_reply_reviewers": "57;40;0;487", "wc_reply_authors": "146;21;0;1646", "reply_reviewers": "1;1;0;2", "reply_authors": "2;2;1;5", "rating_avg": [ 3.25, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.0, 0.7071067811865476 ], "novelty_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 1.5, 0.5 ], "wc_summary_avg": [ 71.5, 34.267331381360876 ], "wc_strengths_avg": [ 27.75, 27.021981792607292 ], "wc_weaknesses_avg": [ 304.5, 189.16989718240058 ], "wc_questions_avg": [ 105.25, 61.66998864926116 ], "wc_limitations_avg": [ 35.75, 26.994212342648563 ], "wc_review_avg": [ 544.75, 198.62826460501537 ], "wc_reply_reviewers_avg": [ 146.0, 197.9608547162797 ], "wc_reply_authors_avg": [ 453.25, 690.8926743713527 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.5, 1.5 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.3244428422615251, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Ih2CRZYCJE8J:scholar.google.com/&scioq=Explicit+Flow+Matching:+On+The+Theory+of+Flow+Matching+Algorithms+with+Applications&hl=en&as_sdt=0,5", "gs_version_total": 3, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Skolkovo Institute of Science and Technology;Institute of Numerical Mathematics", "aff_unique_dep": ";", "aff_unique_url": "https://www.skoltech.ru;", "aff_unique_abbr": "Skoltech;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Russian Federation;" }, { "title": "How does Gradient Descent Learn Features --- A Local Analysis for Regularized Two-Layer Neural Networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94759", "id": "XYw051ZmUn", "proceeding": "", "pdf": "https://openreview.net/pdf?id=XYw051ZmUn", "openreview": "https://openreview.net/forum?id=XYw051ZmUn", "poster": "/media/PosterPDFs/NeurIPS%202024/94759.png?t=1732083594.5192688", "project": "", "author_site": "Mo Zhou, Rong Ge", "tldr": "", "abstract": "The ability of learning useful features is one of the major advantages of neural networks. Although recent works show that neural network can operate in a neural tangent kernel (NTK) regime that does not allow feature learning, many works also demonstrate the potential for neural networks to go beyond NTK regime and perform feature learning. Recently, a line of work highlighted the feature learning capabilities of the early stages of gradient-based training. In this paper we consider another mechanism for feature learning via gradient descent through a local convergence analysis. We show that once the loss is below a certain threshold, gradient descent with a carefully regularized objective will capture ground-truth directions. We further strengthen this local convergence analysis by incorporating early-stage feature learning analysis. Our results demonstrate that feature learning not only happens at the initial gradient steps, but can also occur towards the end of training.", "keywords": "deep learning theory;feature learning;optimization", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Mo Zhou;Rong Ge", "authorids": "~Mo_Zhou3;~Rong_Ge1", "gender": "M;M", "homepage": "https://mozhou7.github.io/;https://users.cs.duke.edu/~rongge/", "dblp": ";89/6869-1.html", "google_scholar": "j_SEFF8AAAAJ;https://scholar.google.com.tw/citations?user=MVxcjEoAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Mo_Zhou3;~Rong_Ge1", "aff": "Duke University;Duke University", "aff_domain": "duke.edu;duke.edu", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nzhou2024how,\ntitle={How does Gradient Descent Learn Features --- A Local Analysis for Regularized Two-Layer Neural Networks},\nauthor={Mo Zhou and Rong Ge},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=XYw051ZmUn}\n}", "github": "", "reviewers": "6HCx;2mhB;WTVj", "pdf_size": 666167, "rating": "6;6;6", "confidence": "3;2;4", "soundness": "3;2;3", "novelty": "2;3;2", "presentation": "3;2;2", "wc_summary": "84;89;26", "wc_strengths": "64;26;44", "wc_weaknesses": "35;37;462", "wc_questions": "1;443;3", "wc_limitations": "2;7;3", "wc_review": "186;602;538", "wc_reply_reviewers": "0;47;49", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 66.33333333333333, 28.592928418676454 ], "wc_strengths_avg": [ 44.666666666666664, 15.520595635763755 ], "wc_weaknesses_avg": [ 178.0, 200.81998572519285 ], "wc_questions_avg": [ 149.0, 207.89099707939894 ], "wc_limitations_avg": [ 4.0, 2.160246899469287 ], "wc_review_avg": [ 442.0, 182.89523412781062 ], "wc_reply_reviewers_avg": [ 32.0, 22.642143596988927 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Fv1TCocyGmUJ:scholar.google.com/&scioq=How+does+Gradient+Descent+Learn+Features+---+A+Local+Analysis+for+Regularized+Two-Layer+Neural+Networks&hl=en&as_sdt=0,23", "gs_version_total": 5, "email": "duke.edu;duke.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Duke University", "aff_unique_dep": "", "aff_unique_url": "https://www.duke.edu", "aff_unique_abbr": "Duke", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "No Free Delivery Service: Epistemic limits of passive data collection in complex social systems", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94758", "id": "XZ0fpoAKEB", "proceeding": "", "pdf": "https://openreview.net/pdf?id=XZ0fpoAKEB", "openreview": "https://openreview.net/forum?id=XZ0fpoAKEB", "poster": "/media/PosterPDFs/NeurIPS%202024/94758.png?t=1733857460.6238055", "project": "", "tldr": "", "abstract": "Rapid model validation via the train-test paradigm has been a key driver for the breathtaking progress in machine learning and AI. However, modern AI systems often depend on a combination of tasks and data collection practices that violate all assumptions ensuring test validity. Yet, without rigorous model validation we cannot ensure the intended outcomes of deployed AI systems, including positive social impact, nor continue to advance AI research in a scienti\ufb01cally sound way. In this paper, I will show that for widely considered inference settings in complex social systems the train-test paradigm does not only lack a justi\ufb01cation but is indeed invalid for any risk estimator, including counterfactual and causal estimators, with high probability. These formal impossibility results highlight a fundamental epistemic issue, i.e., that for key tasks in modern AI we cannot know whether models are valid under current data collection practices. Importantly, this includes variants of both recommender systems and reasoning via large language models, and neither na\u00efve scaling nor limited benchmarks are suited to address this issue. I am illustrating these results via the widely used MovieLens benchmark and conclude by discussing the implications of these results for AI in social systems, including possible remedies such as participatory data curation and open science.", "keywords": "validity;model validation;complex systems;epistemology;scaling;large language models;recommender systems", "primary_area": "evaluation", "supplementary_material": "", "author": "Maximilian Nickel", "authorids": "~Maximilian_Nickel1", "gender": "M", "homepage": "https://mnick.github.io/", "dblp": "83/10622", "google_scholar": "KDqGTIUAAAAJ", "orcid": "0000-0001-5006-0827", "linkedin": "", "or_profile": "~Maximilian_Nickel1", "aff": "Meta Facebook", "aff_domain": "fb.com", "position": "Research Scientist", "bibtex": "@inproceedings{\nnickel2024no,\ntitle={No Free Delivery Service: Epistemic limits of passive data collection in complex social systems},\nauthor={Maximilian Nickel},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=XZ0fpoAKEB}\n}", "github": "", "reviewers": "RViy;qRpv;9VqZ", "pdf_size": 2212542, "rating": "5;6;6", "confidence": "3;2;3", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;4;4", "wc_summary": "95;71;103", "wc_strengths": "83;23;114", "wc_weaknesses": "84;191;108", "wc_questions": "61;3;3", "wc_limitations": "45;1;5", "wc_review": "368;289;333", "wc_reply_reviewers": "0;20;0", "wc_reply_authors": "0;63;0", "reply_reviewers": "0;1;0", "reply_authors": "1;2;1", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 89.66666666666667, 13.59738536958076 ], "wc_strengths_avg": [ 73.33333333333333, 37.77418283548816 ], "wc_weaknesses_avg": [ 127.66666666666667, 45.84272631023983 ], "wc_questions_avg": [ 22.333333333333332, 27.34146220587984 ], "wc_limitations_avg": [ 17.0, 19.86621923433512 ], "wc_review_avg": [ 330.0, 32.321303604073066 ], "wc_reply_reviewers_avg": [ 6.666666666666667, 9.428090415820632 ], "wc_reply_authors_avg": [ 21.0, 29.698484809834994 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:9gF9d9FeQTIJ:scholar.google.com/&scioq=No+Free+Delivery+Service:+Epistemic+limits+of+passive+data+collection+in+complex+social+systems&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": "fb.com", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "Meta", "aff_unique_dep": "Meta Platforms, Inc.", "aff_unique_url": "https://meta.com", "aff_unique_abbr": "Meta", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Polyhedral Complex Derivation from Piecewise Trilinear Networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94757", "id": "XZ4XSUTGRb", "proceeding": "", "pdf": "https://openreview.net/pdf?id=XZ4XSUTGRb", "openreview": "https://openreview.net/forum?id=XZ4XSUTGRb", "poster": "/media/PosterPDFs/NeurIPS%202024/94757.png?t=1733210684.7750905", "project": "", "tldr": "", "abstract": "Recent advancements in visualizing deep neural networks provide insights into their structures and mesh extraction from Continuous Piecewise Affine (CPWA) functions. Meanwhile, developments in neural surface representation learning incorporate non-linear positional encoding, addressing issues like spectral bias; however, this poses challenges in applying mesh extraction techniques based on CPWA functions. Focusing on trilinear interpolating methods as positional encoding, we present theoretical insights and an analytical mesh extraction, showing the transformation of hypersurfaces to flat planes within the trilinear region under the eikonal constraint. Moreover, we introduce a method for approximating intersecting points among three hypersurfaces contributing to broader applications. We empirically validate correctness and parsimony through chamfer distance and efficiency, and angular distance, while examining the correlation between the eikonal loss and the planarity of the hypersurfaces.", "keywords": "Polyhedral Complex;Neural Radiance Fields;3D Mesh", "primary_area": "generative_models", "supplementary_material": "/attachment/5c0309c1a89be10c914fb5f2f93281722ebc1e39.zip", "author": "Jin-Hwa Kim", "authorids": "~Jin-Hwa_Kim1", "gender": "Unspecified", "homepage": "http://wityworks.com", "dblp": "48/258", "google_scholar": "https://scholar.google.co.kr/citations?user=3f2wPekAAAAJ", "orcid": "0000-0002-0423-0415", "linkedin": "", "or_profile": "~Jin-Hwa_Kim1", "aff": "NAVER", "aff_domain": "navercorp.com", "position": "Research Scientist", "bibtex": "@inproceedings{\nkim2024polyhedral,\ntitle={Polyhedral Complex Derivation from Piecewise Trilinear Networks},\nauthor={Jin-Hwa Kim},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=XZ4XSUTGRb}\n}", "github": "", "reviewers": "1oW4;FmNY;1zQb;MSAr", "pdf_size": 4314003, "rating": "5;5;5;5", "confidence": "3;4;1;2", "soundness": "2;2;3;3", "novelty": "2;3;2;2", "presentation": "1;2;2;2", "wc_summary": "37;133;67;171", "wc_strengths": "31;68;21;124", "wc_weaknesses": "421;223;64;280", "wc_questions": "164;186;43;141", "wc_limitations": "5;356;1;28", "wc_review": "658;966;196;744", "wc_reply_reviewers": "18;693;159;344", "wc_reply_authors": "54;1241;37;500", "reply_reviewers": "1;2;1;3", "reply_authors": "2;4;2;5", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 2.5, 1.118033988749895 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_avg": [ 102.0, 52.848841046895245 ], "wc_strengths_avg": [ 61.0, 40.367065783878814 ], "wc_weaknesses_avg": [ 247.0, 127.89644248375323 ], "wc_questions_avg": [ 133.5, 54.61913584083879 ], "wc_limitations_avg": [ 97.5, 149.60030080183662 ], "wc_review_avg": [ 641.0, 280.42289492835636 ], "wc_reply_reviewers_avg": [ 303.5, 252.8542070047481 ], "wc_reply_authors_avg": [ 458.0, 488.699805606673 ], "reply_reviewers_avg": [ 1.75, 0.82915619758885 ], "reply_authors_avg": [ 3.25, 1.299038105676658 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:-6v8HlVZQpsJ:scholar.google.com/&scioq=Polyhedral+Complex+Derivation+from+Piecewise+Trilinear+Networks&hl=en&as_sdt=0,48", "gs_version_total": 3, "email": "navercorp.com", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "NAVER Corporation", "aff_unique_dep": "", "aff_unique_url": "https://www.naver.com", "aff_unique_abbr": "NAVER", "aff_country_unique_index": "0", "aff_country_unique": "South Korea" }, { "title": "MultiOrg: A Multi-rater Organoid-detection Dataset", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97645", "id": "XZYUdhMvjL", "proceeding": "", "pdf": "https://openreview.net/pdf?id=XZYUdhMvjL", "openreview": "https://openreview.net/forum?id=XZYUdhMvjL", "poster": "", "project": "", "author_site": "Christina Bukas, Harshavardhan Subramanian, Fenja See, Carina Steinchen, Ivan Ezhov, Gowtham Boosarpu, Sara Asgharpour, Gerald Burgstaller, Mareike Lehmann, Florian Kofler, Marie Piraud", "tldr": "", "abstract": "High-throughput image analysis in the biomedical domain has gained significant attention in recent years, driving advancements in drug discovery, disease prediction, and personalized medicine. Organoids, specifically, are an active area of research, providing excellent models for human organs and their functions. Automating the quantification of organoids in microscopy images would provide an effective solution to overcome substantial manual quantification bottlenecks, particularly in high-throughput image analysis. However, there is a notable lack of open biomedical datasets, in contrast to other domains, such as autonomous driving, and, notably, only few of them have attempted to quantify annotation uncertainty. In this work, we present MultiOrg a comprehensive organoid dataset tailored for object detection tasks with uncertainty quantification. This dataset comprises over 400 high-resolution 2d microscopy images and curated annotations of more than 60,000 organoids. Most importantly, it includes three label sets for the test data, independently annotated by two experts at distinct time points. We additionally provide a benchmark for organoid detection, and make the best model available through an easily installable, interactive plugin for the popular image visualization tool Napari, to perform organoid quantification.", "keywords": "object-detection;multi-rater;computer-vision;organoid", "primary_area": "", "supplementary_material": "", "author": "Christina Bukas;Harshavardhan Subramanian;Fenja See;Carina Steinchen;Ivan Ezhov;Gowtham Boosarpu;Sara Asgharpour;Gerald Burgstaller;Mareike Lehmann;Florian Kofler;Marie Piraud", "authorids": "~Christina_Bukas1;~Harshavardhan_Subramanian1;~Fenja_See1;~Carina_Steinchen1;~Ivan_Ezhov1;~Gowtham_Boosarpu1;~Sara_Asgharpour1;~Gerald_Burgstaller1;~Mareike_Lehmann1;~Florian_Kofler1;~Marie_Piraud1", "gender": "F;M;F;F;;M;F;M;;;", "homepage": ";https://www.helmholtz.ai/themenmenue/our-research/consultant-teams/helmholtz-ai-consultants-helmholtz-munich/index.html;;https://www.helmholtz-munich.de/en/lhi/research-groups/lehmann-lab;;https://www.linkedin.com/in/gowtham-boosarpu-27036b116/;;https://www.helmholtz-munich.de/en/lhi/research-groups/burgstaller-lab;https://www.helmholtz-munich.de/en/lhi/research-groups/lehmann-lab;;", "dblp": ";;;;;;;;;;", "google_scholar": ";;https://scholar.google.com/scholar?hl=de;;https://scholar.google.com/citations?hl=en;;;;;;", "orcid": "0000-0001-9913-8525;;;;;;0000-0001-5059-8758;;0000-0002-8601-8206;;", "linkedin": ";shvardhan;;;;;;;;;", "or_profile": "~Christina_Bukas1;~Harshavardhan_Subramanian1;~Fenja_See1;~Carina_Steinchen1;~Ivan_Ezhov1;~Gowtham_Boosarpu1;~Sara_Asgharpour1;~Gerald_Burgstaller1;~Mareike_Lehmann1;~Florian_Kofler1;~Marie_Piraud1", "aff": "Helmholtz Center Munich;Helmholtz Munich;Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;University of Munich, Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;Technical University Munich;Phillips-Universit\u00e4t Marburg;Helmholtz Zentrum M\u00fcnchen;Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;Phillips-Universit\u00e4t Marburg;;", "aff_domain": "helmholtz-munich.de;helmholtz-munich.de;lmu.de;campus.lmu.de;tum.de;uni-marburg.de;helmholtz-munich.de;lmu.de;uni-marburg.de;;", "position": "Researcher;Researcher;Undergrad student;Undergrad student;Postdoc;PhD student;PhD student;Principal Researcher;Assistant Professor;;", "bibtex": "@inproceedings{\nbukas2024multiorg,\ntitle={MultiOrg: A Multi-rater Organoid-detection Dataset},\nauthor={Christina Bukas and Harshavardhan Subramanian and Fenja See and Carina Steinchen and Ivan Ezhov and Gowtham Boosarpu and Sara Asgharpour and Gerald Burgstaller and Mareike Lehmann and Florian Kofler and Marie Piraud},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=XZYUdhMvjL}\n}", "github": "", "reviewers": "xpFX;uS6Q;rcwd;MGmJ", "pdf_size": 10873000, "rating": "3;6;7;8", "confidence": "5;4;4;3", "wc_summary_and_contributions": "291;50;91;35", "wc_strengths": "95;26;20;32", "wc_improvement": "28;98;102;67", "wc_limitations": "25;7;23;19", "wc_correctness": "15;6;33;3", "wc_clarity": "12;24;1;5", "wc_relation_to_prior_work": "17;4;20;21", "wc_documentation": "5;5;14;5", "wc_additional_feedback": "1;1;1;1", "wc_review": "489;221;305;188", "wc_reply_reviewers": "79;0;0;2", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 1.8708286933869707 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 116.75, 102.67028537994817 ], "wc_strengths_avg": [ 43.25, 30.177599308096063 ], "wc_improvement_avg": [ 73.75, 29.684802509028085 ], "wc_limitations_avg": [ 18.5, 6.98212002188447 ], "wc_correctness_avg": [ 14.25, 11.691342951089922 ], "wc_clarity_avg": [ 10.5, 8.73212459828649 ], "wc_relation_to_prior_work_avg": [ 15.5, 6.800735254367722 ], "wc_documentation_avg": [ 7.25, 3.897114317029974 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 300.75, 116.7569591073697 ], "wc_reply_reviewers_avg": [ 20.25, 33.92915413033458 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": -0.9449111825230682, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:R3cqGu-H1vQJ:scholar.google.com/&scioq=MultiOrg:+A+Multi-rater+Organoid-detection+Dataset&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "helmholtz-munich.de;helmholtz-munich.de;lmu.de;campus.lmu.de;tum.de;uni-marburg.de;helmholtz-munich.de;lmu.de;uni-marburg.de;;", "author_num": 11, "aff_unique_index": "0;1;2;2;3;4;1;2;4", "aff_unique_norm": "Helmholtz Center Munich;Helmholtz Zentrum M\u00fcnchen;Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;Technical University of Munich;Phillips-Universit\u00e4t Marburg", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.helmholtz-munich.de;https://www.helmholtz-muenchen.de;https://www.lmu.de;https://www.tum.de;https://www.uni-marburg.de", "aff_unique_abbr": "HMGU;HMGU;LMU;TUM;Uni Marburg", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "Germany" }, { "title": "Semi-Random Matrix Completion via Flow-Based Adaptive Reweighting", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94756", "id": "XZp1uP0hh2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=XZp1uP0hh2", "openreview": "https://openreview.net/forum?id=XZp1uP0hh2", "poster": "", "project": "", "author_site": "Jonathan Kelner, Jerry Li, Allen Liu, Aaron Sidford, Kevin Tian", "tldr": "", "abstract": "We consider the well-studied problem of completing a rank-$r$, $\\mu$-incoherent matrix $\\mathbf{M} \\in \\mathbb{R}^{d \\times d}$ from incomplete observations. We focus on this problem in the semi-random setting where each entry is independently revealed with probability at least $p = \\frac{\\textup{poly}(r, \\mu, \\log d)}{d}$. \nWhereas multiple nearly-linear time algorithms have been established in the more specialized fully-random setting where each entry is revealed with probablity exactly $p$, the only known nearly-linear time algorithm in the semi-random setting is due to [CG18], whose sample complexity has a polynomial dependence on the inverse accuracy and condition number and thus cannot achieve high-accuracy recovery. \nOur main result is the first high-accuracy nearly-linear time algorithm for solving semi-random matrix completion, and an extension to the noisy observation setting.\nOur result builds upon the recent short-flat decomposition framework of [KLLST23a, KLLST23b] and leverages fast algorithms for flow problems on graphs to solve adaptive reweighting subproblems efficiently.", "keywords": "matrix completion;semi-random model;flow solver;short-flat decomposition;adaptive reweighting", "primary_area": "learning_theory", "supplementary_material": "", "author": "Jonathan Kelner;Jerry Li;Allen Liu;Aaron Sidford;Kevin Tian", "authorids": "~Jonathan_Kelner1;~Jerry_Li1;~Allen_Liu1;~Aaron_Sidford1;~Kevin_Tian4", "gender": "M;M;M;;", "homepage": "https://math.mit.edu/~kelner/;https://jerryzli.github.io/;https://aliu42.github.io/;;https://kjtian.github.io", "dblp": "64/4772.html;;225/5531.html;;", "google_scholar": ";4zybTq4AAAAJ;;;", "orcid": ";;;;", "linkedin": ";;allen-liu-284b28127/;;", "or_profile": "~Jonathan_Kelner1;~Jerry_Li1;~Allen_Liu1;~Aaron_Sidford1;~Kevin_Tian4", "aff": "Massachusetts Institute of Technology;Microsoft;Massachusetts Institute of Technology;;University of Texas at Austin", "aff_domain": "mit.edu;microsoft.com;mit.edu;;utexas.edu", "position": "Full Professor;Senior Researcher;PhD student;;Assistant Professor", "bibtex": "@inproceedings{\nkelner2024semirandom,\ntitle={Semi-Random Matrix Completion via Flow-Based Adaptive Reweighting},\nauthor={Jonathan Kelner and Jerry Li and Allen Liu and Aaron Sidford and Kevin Tian},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=XZp1uP0hh2}\n}", "github": "", "reviewers": "orB2;RnDV;cp6D;JbFK;kK6Q", "pdf_size": 711966, "rating": "5;6;7;7;8", "confidence": "1;3;2;2;3", "soundness": "3;3;4;3;4", "novelty": "3;2;3;3;3", "presentation": "3;3;3;3;4", "wc_summary": "43;76;73;113;409", "wc_strengths": "1;89;133;69;125", "wc_weaknesses": "1;166;139;1;123", "wc_questions": "1;109;28;1;75", "wc_limitations": "1;31;2;1;1", "wc_review": "47;471;375;185;733", "wc_reply_reviewers": "0;13;24;0;8", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;1;1;0;1", "reply_authors": "0;1;1;1;1", "rating_avg": [ 6.6, 1.0198039027185568 ], "confidence_avg": [ 2.2, 0.7483314773547882 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 142.8, 134.9405795155779 ], "wc_strengths_avg": [ 83.4, 47.36918829787988 ], "wc_weaknesses_avg": [ 86.0, 70.75026501717149 ], "wc_questions_avg": [ 42.8, 42.73827324541786 ], "wc_limitations_avg": [ 7.2, 11.906300852909775 ], "wc_review_avg": [ 362.2, 236.74661560410954 ], "wc_reply_reviewers_avg": [ 9.0, 8.988882021697693 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 0.8, 0.4000000000000001 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.6289709020331512, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:RX5vACjOWJIJ:scholar.google.com/&scioq=Semi-Random+Matrix+Completion+via+Flow-Based+Adaptive+Reweighting&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": "mit.edu;microsoft.com;mit.edu;;utexas.edu", "author_num": 5, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Massachusetts Institute of Technology;Microsoft;University of Texas at Austin", "aff_unique_dep": ";Microsoft Corporation;", "aff_unique_url": "https://web.mit.edu;https://www.microsoft.com;https://www.utexas.edu", "aff_unique_abbr": "MIT;Microsoft;UT Austin", "aff_campus_unique_index": "1", "aff_campus_unique": ";Austin", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Pure Message Passing Can Estimate Common Neighbor for Link Prediction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94755", "id": "Xa3dVaolKo", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Xa3dVaolKo", "openreview": "https://openreview.net/forum?id=Xa3dVaolKo", "poster": "/media/PosterPDFs/NeurIPS%202024/94755.png?t=1729662455.069897", "project": "", "author_site": "Kaiwen Dong, Zhichun Guo, Nitesh Chawla", "tldr": "", "abstract": "Message Passing Neural Networks (MPNNs) have emerged as the {\\em de facto} standard in graph representation learning. However, when it comes to link prediction, they are not always superior to simple heuristics such as Common Neighbor (CN). This discrepancy stems from a fundamental limitation: while MPNNs excel in node-level representation, they stumble with encoding the joint structural features essential to link prediction, like CN. To bridge this gap, we posit that, by harnessing the orthogonality of input vectors, pure message-passing can indeed capture joint structural features. Specifically, we study the proficiency of MPNNs in approximating CN heuristics. Based on our findings, we introduce the Message Passing Link Predictor (MPLP), a novel link prediction model. MPLP taps into quasi-orthogonal vectors to estimate link-level structural features, all while preserving the node-level complexities. We conduct experiments on benchmark datasets from various domains, where our method consistently outperforms the baseline methods, establishing new state-of-the-arts.", "keywords": "Graph Neural Networks;Link Prediction", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Kaiwen Dong;Zhichun Guo;Nitesh V Chawla", "authorids": "~Kaiwen_Dong1;~Zhichun_Guo1;~Nitesh_V_Chawla1", "gender": "M;;", "homepage": "https://barcavin.github.io/;;", "dblp": "301/7629;;", "google_scholar": "bKccdZYAAAAJ;;", "orcid": "0000-0001-8244-9562;;", "linkedin": ";;", "or_profile": "~Kaiwen_Dong1;~Zhichun_Guo1;~Nitesh_V_Chawla1", "aff": "University of Notre Dame;;", "aff_domain": "nd.edu;;", "position": "PhD student;;", "bibtex": "@inproceedings{\ndong2024pure,\ntitle={Pure Message Passing Can Estimate Common Neighbor for Link Prediction},\nauthor={Kaiwen Dong and Zhichun Guo and Nitesh V Chawla},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Xa3dVaolKo}\n}", "github": "", "reviewers": "CYaw;L71H;h157", "pdf_size": 12054380, "rating": "5;5;8", "confidence": "5;1;4", "soundness": "2;2;4", "novelty": "3;2;4", "presentation": "3;3;4", "wc_summary": "103;45;170", "wc_strengths": "111;27;234", "wc_weaknesses": "774;200;54", "wc_questions": "93;2;79", "wc_limitations": "1;7;21", "wc_review": "1082;281;558", "wc_reply_reviewers": "1931;330;30", "wc_reply_authors": "2664;1028;0", "reply_reviewers": "4;3;1", "reply_authors": "6;3;1", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.3333333333333335, 1.699673171197595 ], "soundness_avg": [ 2.6666666666666665, 0.9428090415820634 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 106.0, 51.07510809255979 ], "wc_strengths_avg": [ 124.0, 85.00588214941364 ], "wc_weaknesses_avg": [ 342.6666666666667, 310.7682237438199 ], "wc_questions_avg": [ 58.0, 40.00833246545858 ], "wc_limitations_avg": [ 9.666666666666666, 8.379870059984357 ], "wc_review_avg": [ 640.3333333333334, 332.1488956611009 ], "wc_reply_reviewers_avg": [ 763.6666666666666, 834.4660301986868 ], "wc_reply_authors_avg": [ 1230.6666666666667, 1096.9744249019159 ], "reply_reviewers_avg": [ 2.6666666666666665, 1.247219128924647 ], "reply_authors_avg": [ 3.3333333333333335, 2.0548046676563256 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.2773500981126146, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7173311046947546504&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "nd.edu;;", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "University of Notre Dame", "aff_unique_dep": "", "aff_unique_url": "https://www.nd.edu", "aff_unique_abbr": "Notre Dame", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "When Your AIs Deceive You: Challenges of Partial Observability in Reinforcement Learning from Human Feedback", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94754", "id": "XcbgkjWSJ7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=XcbgkjWSJ7", "openreview": "https://openreview.net/forum?id=XcbgkjWSJ7", "poster": "/media/PosterPDFs/NeurIPS%202024/94754.png?t=1732529619.851263", "project": "", "author_site": "Leon Lang, Davis Foote, Stuart J Russell, Anca Dragan, Erik Jenner, Scott Emmons", "tldr": "", "abstract": "Past analyses of reinforcement learning from human feedback (RLHF) assume that the human evaluators fully observe the environment. What happens when human feedback is based only on partial observations? We formally define two failure cases: deceptive inflation and overjustification. Modeling the human as Boltzmann-rational w.r.t. a belief over trajectories, we prove conditions under which RLHF is guaranteed to result in policies that deceptively inflate their performance, overjustify their behavior to make an impression, or both. Under the new assumption that the human's partial observability is known and accounted for, we then analyze how much information the feedback process provides about the return function. We show that sometimes, the human's feedback determines the return function uniquely up to an additive constant, but in other realistic cases, there is irreducible ambiguity. We propose exploratory research directions to help tackle these challenges and experimentally validate both the theoretical concerns and potential mitigations, and caution against blindly applying RLHF in partially observable settings.", "keywords": "RLHF;Partial Observability;Deception;AI Alignment;Reward Learning", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Leon Lang;Davis Foote;Stuart Russell;Anca Dragan;Erik Jenner;Scott Emmons", "authorids": "~Leon_Lang1;~Davis_Foote1;~Stuart_Russell1;~Anca_Dragan1;~Erik_Jenner1;~Scott_Emmons1", "gender": "M;;M;F;M;M", "homepage": "https://langleon.github.io/;;https://people.eecs.berkeley.edu/~russell/;http://www.ancadragan.com/;https://ejenner.com;http://scottemmons.com/", "dblp": "255/5021;190/7201;;;295/8670;180/5699", "google_scholar": "E3ae_sMAAAAJ;;https://scholar.google.com.tw/citations?user=KJGrjCAAAAAJ;;https://scholar.google.com/citations?hl=en;LoT0z6oAAAAJ", "orcid": "0000-0002-1950-2831;;;;0000-0002-6037-5715;0000-0002-7946-7046", "linkedin": "leon-lang/;;;;erik-jenner/;scott-emmons-5258005b/", "or_profile": "~Leon_Lang1;~Davis_Foote1;~Stuart_Russell1;~Anca_Dragan1;~Erik_Jenner1;~Scott_Emmons1", "aff": "University of Amsterdam;University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;University of California, Berkeley", "aff_domain": "uva.nl;berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu", "position": "PhD student;Researcher;Full Professor;Associate Professor;PhD student;PhD student", "bibtex": "@inproceedings{\nlang2024when,\ntitle={When Your {AI}s Deceive You: Challenges of Partial Observability in Reinforcement Learning from Human Feedback},\nauthor={Leon Lang and Davis Foote and Stuart Russell and Anca Dragan and Erik Jenner and Scott Emmons},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=XcbgkjWSJ7}\n}", "github": "", "reviewers": "v92A;rT4E;zgLN;LhS3", "pdf_size": 2395174, "rating": "4;5;6;7", "confidence": "3;3;2;3", "soundness": "3;2;3;3", "novelty": "2;2;3;3", "presentation": "3;2;3;3", "wc_summary": "111;115;71;84", "wc_strengths": "71;34;82;148", "wc_weaknesses": "222;124;57;134", "wc_questions": "48;42;5;14", "wc_limitations": "1;54;29;1", "wc_review": "453;369;244;381", "wc_reply_reviewers": "330;24;14;15", "wc_reply_authors": "699;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 95.25, 18.38987493160299 ], "wc_strengths_avg": [ 83.75, 41.13620667976084 ], "wc_weaknesses_avg": [ 134.25, 58.67868011467197 ], "wc_questions_avg": [ 27.25, 18.15729880791744 ], "wc_limitations_avg": [ 21.25, 22.094965489902897 ], "wc_review_avg": [ 361.75, 75.19100677607662 ], "wc_reply_reviewers_avg": [ 95.75, 135.30036030994154 ], "wc_reply_authors_avg": [ 174.75, 302.6758786226613 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.2581988897471611, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10275664631557807057&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "uva.nl;berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu", "author_num": 6, "aff_unique_index": "0;1;1;1;1;1", "aff_unique_norm": "University of Amsterdam;University of California, Berkeley", "aff_unique_dep": ";", "aff_unique_url": "https://www.uva.nl;https://www.berkeley.edu", "aff_unique_abbr": "UvA;UC Berkeley", "aff_campus_unique_index": "1;1;1;1;1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;1;1;1;1;1", "aff_country_unique": "Netherlands;United States" }, { "title": "Era3D: High-Resolution Multiview Diffusion using Efficient Row-wise Attention", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94753", "id": "XdCJAYYiTP", "proceeding": "", "pdf": "https://openreview.net/pdf?id=XdCJAYYiTP", "openreview": "https://openreview.net/forum?id=XdCJAYYiTP", "poster": "/media/PosterPDFs/NeurIPS%202024/94753.png?t=1729592642.217899", "project": "", "author_site": "Peng Li, Yuan Liu, Xiaoxiao Long, Feihu Zhang, Cheng Lin, Mengfei Li, Xingqun Qi, Shanghang Zhang, Wei Xue, Wenhan Luo, Ping Tan, Wenping Wang, Qifeng Liu, Yike Guo", "tldr": "", "abstract": "In this paper, we introduce **Era3D**, a novel multiview diffusion method that generates high-resolution multiview images from a single-view image. Despite significant advancements in multiview generation, existing methods still suffer from camera prior mismatch, inefficacy, and low resolution, resulting in poor-quality multiview images. Specifically, these methods assume that the input images should comply with a predefined camera type, e.g. a perspective camera with a fixed focal length, leading to distorted shapes when the assumption fails. Moreover, the full-image or dense multiview attention they employ leads to a dramatic explosion of computational complexity as image resolution increases, resulting in prohibitively expensive training costs. To bridge the gap between assumption and reality, Era3D first proposes a diffusion-based camera prediction module to estimate the focal length and elevation of the input image, which allows our method to generate images without shape distortions. Furthermore, a simple but efficient attention layer, named row-wise attention, is used to enforce epipolar priors in the multiview diffusion, facilitating efficient cross-view information fusion. Consequently, compared with state-of-the-art methods, Era3D generates high-quality multiview images with up to a 512\u00d7512 resolution while reducing computation complexity of multiview attention by 12x times. Comprehensive experiments demonstrate the superior generation power of Era3D- it can reconstruct high-quality and detailed 3D meshes from diverse single-view input images, significantly outperforming baseline multiview diffusion methods.", "keywords": "3D generation;multiview attention;epipolar", "primary_area": "generative_models", "supplementary_material": "/attachment/bf776f75b81efab9bdea8f671e7f6d1a77ce336a.zip", "author": "Peng Li;Yuan Liu;Xiaoxiao Long;Feihu Zhang;Cheng Lin;Mengfei Li;Xingqun Qi;Shanghang Zhang;Wei Xue;Wenhan Luo;Ping Tan;Wenping Wang;Qifeng Liu;Yike Guo", "authorids": "~Peng_Li14;~Yuan_Liu3;~Xiaoxiao_Long2;~Feihu_Zhang3;~Cheng_Lin1;~Mengfei_Li2;~Xingqun_Qi1;~Shanghang_Zhang4;~Wei_Xue5;~Wenhan_Luo1;~Ping_Tan2;~Wenping_Wang1;~Qifeng_Liu1;~Yike_Guo1", "gender": "M;M;M;M;M;M;M;M;M;M;M;M;F;M", "homepage": ";https://liuyuan-pal.github.io/;;https://clinplayer.github.io/;https://github.com/lmfethan;https://xingqunqi-lab.github.io/QXQPage/;http://www.wei-xue.com;https://whluo.github.io/;http://www.cs.sfu.ca/~pingtan/;https://engineering.tamu.edu/cse/profiles/Wang-Wenping.html;;https://cse.hkust.edu.hk/admin/people/faculty/profile/yikeguo;https://www.shanghangzhang.com/;https://xxlong0.github.io/", "dblp": ";87/2948-25;120/0587;;;226/4403;;64/9877;;;23/992.html;g/YikeGuo;95/11531;262/3688", "google_scholar": "8eTLCkwAAAAJ;yRAHVcgAAAAJ;nWT4vFcAAAAJ;KAL4c2cAAAAJ;m39CfiwAAAAJ;https://scholar.google.com.hk/citations?user=3tO41a8AAAAJ;77lSoywAAAAJ;g20Q12MAAAAJ;XhyKVFMAAAAJ;28shvv0AAAAJ;scR1CXcAAAAJ;https://scholar.google.com.tw/citations?user=-0q6cIYAAAAJ;voqw10cAAAAJ;W3G5kZEAAAAJ", "orcid": ";;;;;0000-0002-9772-5707;;0000-0002-5697-4168;0000-0002-4506-6973;0000-0002-2284-3952;0000-0001-6191-076X;0009-0005-8401-282X;;0000-0002-3386-8805", "linkedin": ";;;;;;;wenhan-luo-a1843480/;;;qifeng-liu-483b3227/;;;", "or_profile": "~Peng_Li14;~Yuan_Liu3;~Feihu_Zhang3;~Cheng_Lin1;~Mengfei_Li2;~Xingqun_Qi1;~Wei_Xue5;~Wenhan_Luo1;~Ping_Tan2;~Wenping_Wang1;~Qifeng_Liu1;~Yike_Guo1;~Shanghang_Zhang1;~XIAOXIAO_LONG1", "aff": "Hong Kong University of Science and Technology;The University of Hong Kong;DreamTech;Tencent;Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;Sun Yat-sen University;Hong Kong University of Science and Technology;Texas A&M University - College Station;Hong Kong University of Science and Technology;Imperial College London;Peking University;University of Hong Kong", "aff_domain": "connect.ust.hk;hku.hk;dreamtech.ai;tencent.com;ust.hk;ust.edu.hk;ust.hk;sysu.edu.cn;ust.hk;tamu.edu;hkust.edu.hk;imperial.ac.uk;pku.edu.cn;hku.hk", "position": "PhD student;PhD student;Instructor;Researcher;PhD student;PhD student;Assistant Professor;Associate Professor;Full Professor;Full Professor;Full Professor;Full Professor;Assistant Professor;Postdoc", "bibtex": "@inproceedings{\nli2024erad,\ntitle={Era3D: High-Resolution Multiview Diffusion using Efficient Row-wise Attention},\nauthor={Peng Li and Yuan Liu and Xiaoxiao Long and Feihu Zhang and Cheng Lin and Mengfei Li and Xingqun Qi and Shanghang Zhang and Wei Xue and Wenhan Luo and Ping Tan and Wenping Wang and Qifeng Liu and Yike Guo},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=XdCJAYYiTP}\n}", "github": "", "reviewers": "a9Xx;5Kpg;aLmz", "pdf_size": 14609684, "rating": "5;6;7", "confidence": "4;5;4", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "3;2;2", "wc_summary": "37;88;41", "wc_strengths": "30;135;63", "wc_weaknesses": "93;45;238", "wc_questions": "50;161;151", "wc_limitations": "51;1;62", "wc_review": "261;430;555", "wc_reply_reviewers": "72;64;500", "wc_reply_authors": "189;44;326", "reply_reviewers": "2;1;2", "reply_authors": "3;2;3", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 55.333333333333336, 23.156472577277878 ], "wc_strengths_avg": [ 76.0, 43.840620433565945 ], "wc_weaknesses_avg": [ 125.33333333333333, 82.04199466668817 ], "wc_questions_avg": [ 120.66666666666667, 50.13537229896229 ], "wc_limitations_avg": [ 38.0, 26.54555832275273 ], "wc_review_avg": [ 415.3333333333333, 120.47221901980372 ], "wc_reply_reviewers_avg": [ 212.0, 203.67294043801368 ], "wc_reply_authors_avg": [ 186.33333333333334, 115.1414588910335 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 14, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8333236806640987513&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "connect.ust.hk;hku.hk;dreamtech.ai;tencent.com;ust.hk;ust.edu.hk;ust.hk;sysu.edu.cn;ust.hk;tamu.edu;hkust.edu.hk;imperial.ac.uk;pku.edu.cn;hku.hk", "author_num": 14, "aff_unique_index": "0;1;2;3;0;0;0;4;0;5;0;6;7;1", "aff_unique_norm": "Hong Kong University of Science and Technology;University of Hong Kong;DreamTech;Tencent;Sun Yat-sen University;Texas A&M University;Imperial College London;Peking University", "aff_unique_dep": ";;;Tencent Holdings Limited;;;;", "aff_unique_url": "https://www.ust.hk;https://www.hku.hk;;https://www.tencent.com;http://www.sysu.edu.cn/;https://www.tamu.edu;https://www.imperial.ac.uk;http://www.pku.edu.cn", "aff_unique_abbr": "HKUST;HKU;;Tencent;SYSU;TAMU;ICL;Peking U", "aff_campus_unique_index": "0;0;0;0;0;0;2;0;0", "aff_campus_unique": "Hong Kong SAR;;College Station", "aff_country_unique_index": "0;0;0;0;0;0;0;0;2;0;3;0;0", "aff_country_unique": "China;;United States;United Kingdom" }, { "title": "What If the Input is Expanded in OOD Detection?", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94752", "id": "XfPiFRnuAS", "proceeding": "", "pdf": "https://openreview.net/pdf?id=XfPiFRnuAS", "openreview": "https://openreview.net/forum?id=XfPiFRnuAS", "poster": "/media/PosterPDFs/NeurIPS%202024/94752.png?t=1731558122.177066", "project": "", "author_site": "Boxuan Zhang, Jianing Zhu, Zengmao Wang, Tongliang Liu, Bo Du, Bo Han", "tldr": "", "abstract": "Out-of-distribution (OOD) detection aims to identify OOD inputs from unknown classes, which is important for the reliable deployment of machine learning models in the open world. Various scoring functions are proposed to distinguish it from in-distribution (ID) data. However, existing methods generally focus on excavating the discriminative information from a single input, which implicitly limits its representation dimension. In this work, we introduce a novel perspective, i.e., employing different common corruptions on the input space, to expand that. We reveal an interesting phenomenon termed *confidence mutation*, where the confidence of OOD data can decrease significantly under the corruptions, while the ID data shows a higher confidence expectation considering the resistance of semantic features. Based on that, we formalize a new scoring method, namely, *Confidence aVerage* (CoVer), which can capture the dynamic differences by simply averaging the scores obtained from different corrupted inputs and the original ones, making the OOD and ID distributions more separable in detection tasks. Extensive experiments and analyses have been conducted to understand and verify the effectiveness of CoVer.", "keywords": "Out-of-distribution Detection;Trustworthy Machine Learning", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/0e137c560ba6ea82d032d9ea18d34fae97ef029d.zip", "author": "Boxuan Zhang;Jianing Zhu;Zengmao Wang;Tongliang Liu;Bo Du;Bo Han", "authorids": "~Boxuan_Zhang1;~Jianing_Zhu2;~Zengmao_Wang1;~Tongliang_Liu1;~Bo_Du3;~Bo_Han1", "gender": "M;M;M;M;M;M", "homepage": "https://zbox1005.github.io/;https://zfancy.github.io/;http://jszy.whu.edu.cn/wangzengmao/zh_CN/more/1231604/jsjjgd/index.htm;https://tongliang-liu.github.io/;;https://bhanml.github.io/", "dblp": "231/1658/;129/6807;168/4719;150/6667;70/6443-1.html;241/0472-3", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;82uNA3MAAAAJ;https://scholar.google.com.hk/citations?user=tTqiJpQAAAAJ;https://scholar.google.com.au/citations?user=EiLdZ_YAAAAJ;Shy1gnMAAAAJ;nTNjqHwAAAAJ", "orcid": ";;;;;", "linkedin": "%E5%8D%9A%E8%BD%A9-%E5%BC%A0-ab54442a1/;;;;;", "or_profile": "~Boxuan_Zhang1;~Jianing_Zhu2;~Zengmao_Wang1;~Tongliang_Liu1;~Bo_Du1;~bo_han2", "aff": "Wuhan University;RIKEN;Wuhan University;Mohamed bin Zayed University of Artificial Intelligence;Wuhan University;MBZUAI", "aff_domain": "whu.edu.cn;riken.jp;whu.edu.cn;mbzuai.ac.ae;whu.edu.cn;mbzuai.ac.ae", "position": "MS student;Research Intern;Associate Professor ;Affiliated Associate Professor;Full Professor;Researcher", "bibtex": "@inproceedings{\nzhang2024what,\ntitle={What If the Input is Expanded in {OOD} Detection?},\nauthor={Boxuan Zhang and Jianing Zhu and Zengmao Wang and Tongliang Liu and Bo Du and Bo Han},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=XfPiFRnuAS}\n}", "github": "", "reviewers": "LFoY;HCvU;NZqw;hcf7", "pdf_size": 8474495, "rating": "4;5;7;8", "confidence": "5;3;4;5", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;4", "wc_summary": "121;132;74;124", "wc_strengths": "103;23;130;151", "wc_weaknesses": "364;141;50;73", "wc_questions": "159;101;82;130", "wc_limitations": "11;2;1;1", "wc_review": "758;399;337;479", "wc_reply_reviewers": "528;0;29;22", "wc_reply_authors": "1300;177;46;27", "reply_reviewers": "1;0;1;1", "reply_authors": "5;3;2;2", "rating_avg": [ 6.0, 1.5811388300841898 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 112.75, 22.730761095924613 ], "wc_strengths_avg": [ 101.75, 48.54572586747468 ], "wc_weaknesses_avg": [ 157.0, 124.10680883819388 ], "wc_questions_avg": [ 118.0, 29.197602641312866 ], "wc_limitations_avg": [ 3.75, 4.205650960315181 ], "wc_review_avg": [ 493.25, 160.9291381322848 ], "wc_reply_reviewers_avg": [ 144.75, 221.5280738416691 ], "wc_reply_authors_avg": [ 387.5, 529.9879715616195 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.0, 1.224744871391589 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.19069251784911848, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7092456596001754078&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "whu.edu.cn;riken.jp;whu.edu.cn;mbzuai.ac.ae;whu.edu.cn;mbzuai.ac.ae", "author_num": 6, "aff_unique_index": "0;1;0;2;0;2", "aff_unique_norm": "Wuhan University;RIKEN;Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": ";;", "aff_unique_url": "http://www.whu.edu.cn/;https://www.riken.jp;https://mbzuai.ac.ae", "aff_unique_abbr": "WHU;RIKEN;MBZUAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;2;0;2", "aff_country_unique": "China;Japan;United Arab Emirates" }, { "title": "Continual Learning in the Frequency Domain", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94751", "id": "XgAzCLsJAq", "proceeding": "", "pdf": "https://openreview.net/pdf?id=XgAzCLsJAq", "openreview": "https://openreview.net/forum?id=XgAzCLsJAq", "poster": "/media/PosterPDFs/NeurIPS%202024/94751.png?t=1731576437.775636", "project": "", "author_site": "RuiQi Liu, Boyu Diao, Libo Huang, Zijia An, Zhulin An, Yongjun Xu", "tldr": "", "abstract": "Continual learning (CL) is designed to learn new tasks while preserving existing knowledge. Replaying samples from earlier tasks has proven to be an effective method to mitigate the forgetting of previously acquired knowledge. However, the current research on the training efficiency of rehearsal-based methods is insufficient, which limits the practical application of CL systems in resource-limited scenarios. The human visual system (HVS) exhibits varying sensitivities to different frequency components, enabling the efficient elimination of visually redundant information. Inspired by HVS, we propose a novel framework called Continual Learning in the Frequency Domain (CLFD). To our knowledge, this is the first study to utilize frequency domain features to enhance the performance and efficiency of CL training on edge devices. For the input features of the feature extractor, CLFD employs wavelet transform to map the original input image into the frequency domain, thereby effectively reducing the size of input feature maps. Regarding the output features of the feature extractor, CLFD selectively utilizes output features for distinct classes for classification, thereby balancing the reusability and interference of output features based on the frequency domain similarity of the classes across various tasks. Optimizing only the input and output features of the feature extractor allows for seamless integration of CLFD with various rehearsal-based methods. Extensive experiments conducted in both cloud and edge environments demonstrate that CLFD consistently improves the performance of state-of-the-art (SOTA) methods in both precision and training efficiency. Specifically, CLFD can increase the accuracy of the SOTA CL method by up to 6.83% and reduce the training time by 2.6\u00d7.", "keywords": "Continual Learning;Catastrophic Forgetting;Experience Replay;Efficient Deep Learning System;Frequency Domain Learning", "primary_area": "machine_vision", "supplementary_material": "/attachment/050f706f6f292867f9d2313bef579e506bb7d6a5.zip", "author": "RuiQi Liu;Boyu Diao;Libo Huang;Zijia An;Zhulin An;Yongjun Xu", "authorids": "~RuiQi_Liu3;~Boyu_Diao1;~Libo_Huang1;~Zijia_An1;~Zhulin_An1;~Yongjun_Xu1", "gender": "M;M;M;M;M;M", "homepage": "https://github.com/liuruiqi0520;;https://libo-huang.github.io/;https://github.com/cityofmountain;http://people.ucas.ac.cn/~anzhulin;http://www.ict.cas.cn/sourcedb_2018_ict_cas/cn/jssrck/200909/t20090917_2496751.html", "dblp": ";161/2139;;336/5976;85/734.html;55/6835-1", "google_scholar": ";RiopSv4AAAAJ;;https://scholar.google.com/citations?hl=zh-CN;daBvGcMAAAAJ;l34KxTYAAAAJ", "orcid": ";0000-0002-8360-7718;0000-0002-4479-5840;0009-0004-2248-1279;0000-0002-7593-8293;0000-0001-6647-0986", "linkedin": ";;libohuang/;;;", "or_profile": "~RuiQi_Liu3;~Boyu_Diao1;~Libo_Huang1;~Zijia_An1;~Zhulin_An1;~Yongjun_Xu1", "aff": "University of Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences.;Institute of Computing Technology;Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences", "aff_domain": "ucas.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn", "position": "MS student;Associate Professor;Assistant Professor;PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nliu2024continual,\ntitle={Continual Learning in the Frequency Domain},\nauthor={RuiQi Liu and Boyu Diao and Libo Huang and Zijia An and Zhulin An and Yongjun Xu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=XgAzCLsJAq}\n}", "github": "", "reviewers": "KM8C;wRV3;2hDB;wmJG", "pdf_size": 2622685, "rating": "5;6;6;6", "confidence": "3;5;3;5", "soundness": "2;2;3;3", "novelty": "2;3;3;2", "presentation": "2;2;2;2", "wc_summary": "79;73;66;130", "wc_strengths": "36;54;55;158", "wc_weaknesses": "444;79;50;263", "wc_questions": "123;27;46;5", "wc_limitations": "51;22;12;8", "wc_review": "733;255;229;564", "wc_reply_reviewers": "331;33;10;430", "wc_reply_authors": "643;0;502;1750", "reply_reviewers": "2;1;1;4", "reply_authors": "3;1;2;5", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 1.0 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 87.0, 25.248762345905195 ], "wc_strengths_avg": [ 75.75, 48.0852108241193 ], "wc_weaknesses_avg": [ 209.0, 158.3682417658288 ], "wc_questions_avg": [ 50.25, 44.437456047798236 ], "wc_limitations_avg": [ 23.25, 16.813313177360374 ], "wc_review_avg": [ 445.25, 212.0499646309803 ], "wc_reply_reviewers_avg": [ 201.0, 183.0614650875492 ], "wc_reply_authors_avg": [ 723.75, 638.8851129115469 ], "reply_reviewers_avg": [ 2.0, 1.224744871391589 ], "reply_authors_avg": [ 2.75, 1.479019945774904 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1367633115367124346&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "ucas.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn", "author_num": 6, "aff_unique_index": "0;1;2;1;1;1", "aff_unique_norm": "University of Chinese Academy of Sciences;Chinese Academy of Sciences;Institute of Computing Technology", "aff_unique_dep": ";Institute of Computing Technology;", "aff_unique_url": "http://www.ucas.ac.cn;http://www.ict.ac.cn;http://www.ict.ac.cn", "aff_unique_abbr": "UCAS;CAS;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "XgkrXpHl5j", "title": "Generalized Multimodal Fusion via Poisson-Nernst-Planck Equation", "track": "main", "status": "Reject", "tldr": "", "abstract": "Previous studies have highlighted significant advancements in multimodal fusion. Nevertheless, such methods often encounter challenges regarding the efficacy of feature extraction, data integrity, consistency of feature dimensions, and adaptability across various downstream tasks.\nThis paper proposes a generalized multimodal fusion method (GMF) via the Poisson-Nernst-Planck (PNP) equation, which adeptly addresses the aforementioned issues.\nTheoretically, the optimization objective for traditional multimodal tasks is formulated and redefined by integrating information entropy and the flow of gradient backward step. Leveraging these theoretical insights, the PNP equation is applied to feature fusion, rethinking multimodal features through the framework of charged particles in physics and controlling their movement through dissociation, concentration, and reconstruction.\nBuilding on these theoretical foundations, GMF disassociated features which extracted by the unimodal feature extractor into modality-specific and modality-invariant subspaces, thereby reducing mutual information and subsequently lowering the entropy of downstream tasks. \nThe identifiability of the feature's origin enables our approach to function independently as a frontend, seamlessly integrated with a simple concatenation backend, or serve as a prerequisite for other modules.\nExperimental results on multiple downstream tasks show that the proposed GMF achieves performance close to the state-of-the-art (SOTA) accuracy while utilizing fewer parameters and computational resources. Furthermore, by integrating GMF with advanced fusion methods, we surpass the SOTA results.", "keywords": "Multimodal Fusion;Poisson-Nernst-Planck Equation;Feature Disentanglement;Downstream Task Adaptation", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/2798dbacaed1d9db49e5400611a55d54c9b81d87.zip", "author": "Jiayu Xiong;Jing Wang;Hengjing Xiang;Jun Xue;Chen Xu;Zhouqiang Jiang", "authorids": "~Jiayu_Xiong2;~Jing_Wang22;~Hengjing_Xiang1;~Jun_Xue1;~Chen_Xu21;~Zhouqiang_Jiang1", "gender": "M;M;M;M;M;M", "homepage": "https://github.com/Jiayu-Xiong;https://dblp.org/pid/02/736-49.html;http://junxue.tech/;;https://github.com/minhoooo1;https://github.com/Ag2Cr2O7", "dblp": "391/1126;;;;;", "google_scholar": ";;YxoCHH4AAAAJ;;https://scholar.google.com/citations?hl=zh-CN;", "orcid": ";;0009-0001-8465-011X;0009-0005-2565-9068;0009-0002-1304-4179;0009-0005-0468-6243", "linkedin": ";;;;;", "or_profile": "~Jiayu_Xiong2;~Jing_Wang22;~Jun_Xue1;~Chen_Xu21;~Zhouqiang_Jiang1;~Xiang_Hengjing1", "aff": "Huaqiao University Xiamen;Huaqiao University;Anhui Province Key Laboratory of Multimodal Cognitive Computation, \\\\ School of Computer Science and Technology, Anhui University, Hefei 230601, China ;Huaqiao University;Meetyou AI Lab;Central China Normal University", "aff_domain": "hqu.edu.cn;hqu.edu.cn;ahu.edu.cn;hqu.edu.cn;xiaoyouzi.com;ccnu.edu.cn", "position": "MS student;Full Professor;MS student;Undergrad student;Researcher;MS student", "bibtex": "@misc{\nanonymous2024generalized,\ntitle={Generalized Multimodal Fusion via Poisson-Nernst-Planck Equation},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=XgkrXpHl5j}\n}", "github": "", "project": "", "reviewers": "TT1j;qupC;wF9x;Jvpa", "site": "https://openreview.net/forum?id=XgkrXpHl5j", "pdf_size": 1537579, "rating": "5;5;6;6", "confidence": "2;3;4;3", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;2;3;2", "wc_summary": "62;83;118;60", "wc_strengths": "77;40;108;54", "wc_weaknesses": "72;93;74;32", "wc_questions": "47;5;90;60", "wc_limitations": "32;1;57;15", "wc_review": "290;222;447;221", "wc_reply_reviewers": "0;16;147;31", "wc_reply_authors": "0;0;31;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;2;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 80.75, 23.31710745354149 ], "wc_strengths_avg": [ 69.75, 25.733004099793714 ], "wc_weaknesses_avg": [ 67.75, 22.20782519743885 ], "wc_questions_avg": [ 50.5, 30.549140740780256 ], "wc_limitations_avg": [ 26.25, 20.873128658636684 ], "wc_review_avg": [ 295.0, 92.10591729091026 ], "wc_reply_reviewers_avg": [ 48.5, 57.91588728492381 ], "wc_reply_authors_avg": [ 7.75, 13.423393758658799 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.7071067811865475, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:0VuRXW0crycJ:scholar.google.com/&scioq=Generalized+Multimodal+Fusion+via+Poisson-Nernst-Planck+Equation&hl=en&as_sdt=0,14", "gs_version_total": 3, "aff_unique_index": "0;0;1;0;2;3", "aff_unique_norm": "Huaqiao University;Anhui University;Meetyou AI Lab;Central China Normal University", "aff_unique_dep": ";School of Computer Science and Technology;AI Lab;", "aff_unique_url": "https://www.hqu.edu.cn;http://www.ahu.edu.cn/;;http://www.ccnu.edu.cn", "aff_unique_abbr": "HQU;AHU;;CCNU", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Xiamen;;Hefei", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China;" }, { "title": "Toward Robust Incomplete Multimodal Sentiment Analysis via Hierarchical Representation Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94750", "id": "XgwTH95kCl", "proceeding": "", "pdf": "https://openreview.net/pdf?id=XgwTH95kCl", "openreview": "https://openreview.net/forum?id=XgwTH95kCl", "poster": "/media/PosterPDFs/NeurIPS%202024/94750.png?t=1730014979.2388344", "project": "", "author_site": "Mingcheng Li, Dingkang Yang, Yang Liu, Shunli Wang, Jiawei Chen, Shuaibing Wang, Jinjie Wei, Yue Jiang, Qingyao Xu, Xiaolu Hou, Mingyang Sun, Ziyun Qian, Dongliang Kou, Lihua Zhang", "tldr": "", "abstract": "Multimodal Sentiment Analysis (MSA) is an important research area that aims to understand and recognize human sentiment through multiple modalities. The complementary information provided by multimodal fusion promotes better sentiment analysis compared to utilizing only a single modality. Nevertheless, in real-world applications, many unavoidable factors may lead to situations of uncertain modality missing, thus hindering the effectiveness of multimodal modeling and degrading the model\u2019s performance. To this end, we propose a Hierarchical Representation Learning Framework (HRLF) for the MSA task under uncertain missing modalities. Specifically, we propose a fine-grained representation factorization module that sufficiently extracts valuable sentiment information by factorizing modality into sentiment-relevant and modality-specific representations through crossmodal translation and sentiment semantic reconstruction. Moreover, a hierarchical mutual information maximization mechanism is introduced to incrementally maximize the mutual information between multi-scale representations to align and reconstruct the high-level semantics in the representations. Ultimately, we propose a hierarchical adversarial learning mechanism that further aligns and adapts the latent distribution of sentiment-relevant representations to produce robust joint multimodal representations. Comprehensive experiments on three datasets demonstrate that HRLF significantly improves MSA performance under uncertain modality missing cases.", "keywords": "Human intention understanding;modality missing", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Mingcheng Li;Dingkang Yang;Yang Liu;Shunli Wang;Jiawei Chen;Shuaibing Wang;Jinjie Wei;Yue Jiang;Qingyao Xu;Xiaolu Hou;Mingyang Sun;Ziyun Qian;Dongliang Kou;Lihua Zhang", "authorids": "~Mingcheng_Li1;~Dingkang_Yang1;~Yang_Liu86;~Shunli_Wang1;~Jiawei_Chen16;~Shuaibing_Wang1;~Jinjie_Wei1;~Yue_Jiang10;~Qingyao_Xu2;~Xiaolu_Hou1;~Mingyang_Sun5;~Ziyun_Qian1;~Dongliang_Kou1;~Lihua_Zhang1", "gender": "M;M;M;M;M;M;M;M;M;F;M;M;M;M", "homepage": "https://github.com/limingcheng0710;https://ydk122024.github.io/;;https://shunli-wang.github.io/;;https://github.com/heartStrive;;;https://github.com/YKYBcold-k?tab=projects;https://github.com/SparklingH;;https://github.com/pvq31;https://github.com/ElonKou;https://faet.fudan.edu.cn/3f/9e/c23830a671646/page.htm", "dblp": "156/8265;304/1099;51/3710-246;https://dblp.org/rec/journals/corr/abs-2207-07869;;;;;;156/5240;;339/1300;348/9444;31/3003", "google_scholar": "GkSw7_0AAAAJ;https://scholar.google.com/citations?hl=zh-CN;kA7yH9QAAAAJ;https://scholar.google.com/citations?hl=zh-CN;ksiUbVkAAAAJ;;https://scholar.google.com.tw/citations?hl=zh-CN;https://scholar.google.com.hk/citations?user=i-Xg2Y8AAAAJ;;;;VDNoOWcAAAAJ;ElR8ETgAAAAJ;", "orcid": "0009-0000-6244-6081;0000-0003-1829-5671;0000-0002-1312-0146;0000-0002-3755-8724;0009-0007-7664-8582;;;0009-0005-0267-672X;;0009-0000-1054-419X;0009-0004-8217-6508;0009-0007-9800-1253;0009-0009-5792-6895;0000-0003-0467-4347", "linkedin": ";;;;;;;;;;;;;", "or_profile": "~Mingcheng_Li1;~Dingkang_Yang1;~Yang_Liu86;~Shunli_Wang1;~Jiawei_Chen16;~Shuaibing_Wang1;~Jinjie_Wei1;~Yue_Jiang10;~Qingyao_Xu2;~Xiaolu_Hou1;~Mingyang_Sun5;~Ziyun_Qian1;~Dongliang_Kou1;~Lihua_Zhang1", "aff": "Fudan University;Fudan University;Fudan University;Fudan University;Fudan University;Fudan University;Fudan University;Fudan University;Fudan University;Fudan University;Fudan University;Fudan University;Fudan University;Fudan University", "aff_domain": "fudan.edu.cn;fudan.edu;fudan.edu.cn;fudan.edu.cn;fudan.edu;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn", "position": "PhD student;PhD student;PhD student;PhD student;MS student;MS student;MS student;MS student;MS student;MS student;PhD student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nli2024toward,\ntitle={Toward Robust Incomplete Multimodal Sentiment Analysis via Hierarchical Representation Learning},\nauthor={Mingcheng Li and Dingkang Yang and Yang Liu and Shunli Wang and Jiawei Chen and Shuaibing Wang and Jinjie Wei and Yue Jiang and Qingyao Xu and Xiaolu Hou and Mingyang Sun and Ziyun Qian and Dongliang Kou and Lihua Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=XgwTH95kCl}\n}", "github": "", "reviewers": "Zh5E;zmpp;f4LC;JfYH", "pdf_size": 1478589, "rating": "5;7;8;8", "confidence": "4;4;4;3", "soundness": "3;3;4;3", "novelty": "2;4;3;3", "presentation": "1;3;4;3", "wc_summary": "81;161;118;88", "wc_strengths": "63;139;92;73", "wc_weaknesses": "164;117;43;166", "wc_questions": "76;142;44;165", "wc_limitations": "4;106;1;59", "wc_review": "388;665;298;551", "wc_reply_reviewers": "0;194;0;15", "wc_reply_authors": "0;67;0;8", "reply_reviewers": "0;1;0;1", "reply_authors": "1;2;1;2", "rating_avg": [ 7.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 112.0, 31.51983502494897 ], "wc_strengths_avg": [ 91.75, 29.200813344836817 ], "wc_weaknesses_avg": [ 122.5, 49.912423303221814 ], "wc_questions_avg": [ 106.75, 48.782040752719645 ], "wc_limitations_avg": [ 42.5, 43.32724316177986 ], "wc_review_avg": [ 475.5, 142.1029556342865 ], "wc_reply_reviewers_avg": [ 52.25, 82.06818811208153 ], "wc_reply_authors_avg": [ 18.75, 28.047950014216724 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 14, 0 ], "corr_rating_confidence": -0.4714045207910316, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1792647913321143262&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "fudan.edu.cn;fudan.edu;fudan.edu.cn;fudan.edu.cn;fudan.edu;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn", "author_num": 14, "aff_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0;0", "aff_unique_norm": "Fudan University", "aff_unique_dep": "", "aff_unique_url": "https://www.fudan.edu.cn", "aff_unique_abbr": "Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "XiConLcsqq", "title": "RewardBench: Evaluating Reward Models for Language Modeling", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "Reward models (RMs) are at the crux of successful RLHF to align pretrained models to human preferences, yet there has been relatively little study that focuses on evaluation of those reward models. Evaluating reward models presents an opportunity to understand the opaque technologies used for alignment of language models and which values are embedded in them. To date, very few descriptors of capabilities, training methods, or open-source reward models exist. In this paper, we present RewardBench, a benchmark dataset and code-base for evaluation, to enhance scientific understanding of reward models. The RewardBench dataset is a collection of prompt-win-lose trios spanning chat, reasoning, and safety, to benchmark how reward models perform on challenging, structured and out-of-distribution queries. We created specific comparison datasets for RMs that have subtle, but verifiable reasons (e.g. bugs, incorrect facts) why one answer should be preferred to another. On the RewardBench leaderboard, we evaluate reward models trained with a variety of methods, such as the direct MLE training of classifiers and the implicit reward modeling of Direct Preference Optimization (DPO), and on a spectrum of datasets. We present many findings on propensity for refusals, reasoning limitations, and instruction following shortcomings of various reward models towards a better understanding of the RLHF process.", "keywords": "reinforcement learning from human feedback;evaluation;language modeling", "primary_area": "", "supplementary_material": "/attachment/3786b5c6b495a526b4bc96a0e10759a938d4edc2.zip", "author": "Nathan Lambert;Valentina Pyatkin;Jacob Morrison;Lester James Validad Miranda;Bill Yuchen Lin;Khyathi Chandu;Nouha Dziri;Sachin Kumar;Tom Zick;Yejin Choi;Noah A. Smith;Hannaneh Hajishirzi", "authorids": "~Nathan_Lambert1;~Valentina_Pyatkin1;~Jacob_Morrison2;~Lester_James_Validad_Miranda1;~Bill_Yuchen_Lin1;~Khyathi_Chandu1;~Nouha_Dziri2;~Sachin_Kumar1;~Tom_Zick1;~Yejin_Choi1;~Noah_A._Smith2;~Hannaneh_Hajishirzi1", "gender": "M;;;M;M;;;M;F;F;;F", "homepage": "https://natolambert.com;;;https://ljvmiranda921.github.io;http://yuchenlin.xyz/;;;https://shocheen.com;;https://yejinc.github.io/;;https://homes.cs.washington.edu/~hannaneh/", "dblp": "228/9584.html;;;224/9490;190/4518;;;31/4484-9;;89/579-1;;52/1296", "google_scholar": "O4jW7BsAAAAJ;;;https://scholar.google.co.jp/citations?user=2RtnNKEAAAAJ;https://scholar.google.com/citations?hl=en;;;qO38fRIAAAAJ;uE5dFBMAAAAJ;vhP-tlcAAAAJ;;LOV6_WIAAAAJ", "orcid": "0000-0002-9997-6817;;;;;;;;;;;", "linkedin": "nathan-lambert-55093468/;;;;;;;;tom-zick/;;;", "or_profile": "~Nathan_Lambert1;~Valentina_Pyatkin1;~Jacob_Morrison2;~Lester_James_Validad_Miranda1;~Bill_Yuchen_Lin1;~Khyathi_Chandu1;~Nouha_Dziri2;~Sachin_Kumar1;~Tom_Zick1;~Yejin_Choi1;~Noah_A._Smith2;~Hannaneh_Hajishirzi1", "aff": "Allen Institute for Artificial Intelligence;;;Allen Institute for Artificial Intelligence;Allen Institute for Artificial Intelligence;;;Allen Institute for Artificial Intelligence;Harvard University;Department of Computer Science, University of Washington;;University of Washington", "aff_domain": "allenai.org;;;allenai.org;allenai.org;;;allenai.org;harvard.edu;cs.washington.edu;;uw.edu", "position": "Researcher;;;Researcher;Researcher;;;Postdoc;Postdoc;Full Professor;;Associate Professor", "bibtex": "@misc{\nanonymous2024rewardbench,\ntitle={RewardBench: Evaluating Reward Models for Language Modeling},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=XiConLcsqq}\n}", "github": "", "project": "", "reviewers": "T6o3;7Cs6;RUcU", "site": "https://openreview.net/forum?id=XiConLcsqq", "pdf_size": 807170, "rating": "4;5;9", "confidence": "5;4;4", "wc_summary_and_contributions": "28;21;98", "wc_strengths": "2;230;146", "wc_improvement": "2;2;159", "wc_limitations": "20;1;11", "wc_correctness": "1;1;16", "wc_clarity": "1;1;6", "wc_relation_to_prior_work": "1;1;8", "wc_documentation": "1;1;15", "wc_additional_feedback": "1;1;1", "wc_review": "57;259;460", "wc_reply_reviewers": "52;66;17", "wc_reply_authors": "0;40;0", "reply_reviewers": "2;1;1", "reply_authors": "1;2;1", "rating_avg": [ 6.0, 2.160246899469287 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 49.0, 34.76588366008646 ], "wc_strengths_avg": [ 126.0, 94.14881836751856 ], "wc_improvement_avg": [ 54.333333333333336, 74.01050976419197 ], "wc_limitations_avg": [ 10.666666666666666, 7.760297817881877 ], "wc_correctness_avg": [ 6.0, 7.0710678118654755 ], "wc_clarity_avg": [ 2.6666666666666665, 2.3570226039551585 ], "wc_relation_to_prior_work_avg": [ 3.3333333333333335, 3.2998316455372216 ], "wc_documentation_avg": [ 5.666666666666667, 6.599663291074443 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 258.6666666666667, 164.5242298940257 ], "wc_reply_reviewers_avg": [ 45.0, 20.607442021431645 ], "wc_reply_authors_avg": [ 13.333333333333334, 18.856180831641264 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": -0.654653670707977, "gs_citation": 238, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1234117245444796333&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;0;1;2;2", "aff_unique_norm": "Allen Institute for Artificial Intelligence;Harvard University;University of Washington", "aff_unique_dep": ";;Department of Computer Science", "aff_unique_url": "https://allenai.org;https://www.harvard.edu;https://www.washington.edu", "aff_unique_abbr": "AI2;Harvard;UW", "aff_campus_unique_index": "1", "aff_campus_unique": ";Seattle", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "XkMCKoHNCD", "title": "A test of stochastic parroting in a generalisation task: predicting the characters in TV series", "track": "main", "status": "Reject", "tldr": "", "abstract": "There are two broad, opposing views of the recent developments in large language models (LLMs). The first of these uses the term \"stochastic parrots\" from Emily Bender et al (\"On the dangers of stochastic parrots: Can language models be too big?\" 2021) to emphasise that because LLMs are simply a method for creating a probability distribution over sequences of words, they can be viewed as simply parroting information in the training data. The second view, \"Sparks of AGI\" from Sebastien Bubeck et al (\"Sparks of artificial general intelligence: Early experiments with gpt-4\" 2023), posits that the unprecedented scale of computation in the newest generation of LLMs is leading to what its proponents call \"an early (yet still incomplete) version of an artificial general intelligence (AGI) system\". In this article, we propose a method for making predictions purely from the representation of data inside the LLM. Specifically, we create a logistic regression model, using the principal components of a LLM model embedding as features, in order to predict an output variable. The task we use to illustrate our method is predicting the characters in TV series, based on their lines in the show. We show that our method can, for example, distinguish Penny and Sheldon in the Big Bang Theory with an AUC performance of 0.79. Logistic regression models for other characters in Big Bang Theory have lower values of AUC (ranging from 0.59 to 0.79), with the most significant distinguishing factors between characters relating to the number and nature of comments they make about women. The characters in the TV-series Friends are more difficult to distinguish using this method (AUCs range from 0.61 to 0.66). We find that the accuracy of our logistic regression on a linear feature space is slightly (around 3 percentage points) lower than GPT-4, which is in turn around 5 percentage points below that of a human expert. We discuss how the method we propose could be used to help researchers be more specific in the claims they make about large language models.", "keywords": "stochastic parrot;pca;nlp;large language model", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/2a3331cb0430af54983126d38c7f5293a5bd6b47.zip", "author": "Amandine M.Caut;David J. T. Sumpter", "authorids": "~Amandine_M.Caut1;~David_J._T._Sumpter1", "gender": ";", "homepage": "https://www.uu.se/kontakt-och-organisation/personal?query=N20-1174;", "dblp": ";", "google_scholar": ";https://scholar.google.se/citations?user=IksW07MAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Amandine_M.Caut1;~David_J._T._Sumpter1", "aff": "Uppsala University;Uppsala University", "aff_domain": "uu.se;uu.se", "position": "PhD student;Full Professor", "bibtex": "@misc{\nanonymous2024a,\ntitle={A test of stochastic parroting in a generalisation task: predicting the characters in {TV} series},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=XkMCKoHNCD}\n}", "github": "", "project": "", "reviewers": "rrdr;c8T5;3PCU;ynzH", "site": "https://openreview.net/forum?id=XkMCKoHNCD", "pdf_size": 3526814, "rating": "1;2;3;3", "confidence": "5;5;4;4", "soundness": "1;1;1;2", "novelty": "1;1;1;1", "presentation": "1;3;2;3", "wc_summary": "53;126;97;82", "wc_strengths": "24;25;75;41", "wc_weaknesses": "47;552;525;241", "wc_questions": "10;105;109;1", "wc_limitations": "39;10;26;14", "wc_review": "173;818;832;379", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 2.25, 0.82915619758885 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 1.25, 0.4330127018922193 ], "novelty_avg": [ 1.0, 0.0 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 89.5, 26.348624252510795 ], "wc_strengths_avg": [ 41.25, 20.620075169601105 ], "wc_weaknesses_avg": [ 341.25, 209.05307340481747 ], "wc_questions_avg": [ 56.25, 50.86931786450453 ], "wc_limitations_avg": [ 22.25, 11.321991874224253 ], "wc_review_avg": [ 550.5, 284.04093014916003 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.9045340337332909, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:vLHYJ883CwYJ:scholar.google.com/&scioq=A+test+of+stochastic+parroting+in+a+generalisation+task:+predicting+the+characters+in+TV+series&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Uppsala University", "aff_unique_dep": "", "aff_unique_url": "https://www.uu.se", "aff_unique_abbr": "UU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Sweden" }, { "title": "Random Cycle Coding: Lossless Compression of Cluster Assignments via Bits-Back Coding", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94749", "id": "XkvNQPDFqV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=XkvNQPDFqV", "openreview": "https://openreview.net/forum?id=XkvNQPDFqV", "poster": "", "project": "", "author_site": "Daniel Severo, Ashish Khisti, Alireza Makhzani", "tldr": "", "abstract": "We present an optimal method for encoding cluster assignments of arbitrary data sets. Our method, Random Cycle Coding (RCC), encodes data sequentially and sends assignment information as cycles of the permutation defined by the order of encoded elements. RCC does not require any training and its worst-case complexity scales quasi-linearly with the size of the largest cluster. We characterize the achievable bit rates as a function of cluster sizes and number of elements, showing RCC consistently outperforms previous methods while requiring less compute and memory resources. Experiments show RCC can save up to $2$ bytes per element when applied to vector databases, and removes the need for assigning integer ids to identify vectors, translating to savings of up to $70\\%$ in vector database systems for similarity search applications.", "keywords": "compression;vector databases;clustering;permutations", "primary_area": "other", "supplementary_material": "", "author": "Daniel Severo;Ashish J Khisti;Alireza Makhzani", "authorids": "~Daniel_Severo1;~Ashish_J_Khisti1;~Alireza_Makhzani1", "gender": "M;M;", "homepage": "http://dsevero.com;https://www.comm.utoronto.ca/~akhisti/;http://www.alireza.ai/", "dblp": "249/9390;84/5679.html;122/5126.html", "google_scholar": "5bQjLz4AAAAJ;https://scholar.google.ca/citations?user=jiGeAg4AAAAJ;B0KVWJEAAAAJ", "orcid": "0000-0003-0472-5300;;", "linkedin": "danielsevero/;;", "or_profile": "~Daniel_Severo1;~Ashish_J_Khisti1;~Alireza_Makhzani1", "aff": "Vector Institute;Toronto University;Vector Institute", "aff_domain": "vectorinstitute.ai;utoronto.ca;vectorinstitute.ai", "position": "PhD student;Professor;Researcher", "bibtex": "@inproceedings{\nsevero2024random,\ntitle={Random Cycle Coding: Lossless Compression of Cluster Assignments via Bits-Back Coding},\nauthor={Daniel Severo and Ashish J Khisti and Alireza Makhzani},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=XkvNQPDFqV}\n}", "github": "", "reviewers": "MJQw;CCTj;BkEs;rKbY;h1ae", "pdf_size": 612660, "rating": "5;5;5;6;6", "confidence": "2;3;3;2;3", "soundness": "3;3;2;3;3", "novelty": "2;3;1;3;3", "presentation": "3;3;2;2;3", "wc_summary": "32;58;42;81;71", "wc_strengths": "41;27;25;40;54", "wc_weaknesses": "31;111;147;40;63", "wc_questions": "27;21;2;103;97", "wc_limitations": "7;1;2;1;1", "wc_review": "138;218;218;265;286", "wc_reply_reviewers": "12;0;17;23;19", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;0;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.4, 0.48989794855663565 ], "confidence_avg": [ 2.6, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.8 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 56.8, 18.015548839821673 ], "wc_strengths_avg": [ 37.4, 10.55651457631732 ], "wc_weaknesses_avg": [ 78.4, 44.10714227877386 ], "wc_questions_avg": [ 50.0, 41.694124286282836 ], "wc_limitations_avg": [ 2.4, 2.33238075793812 ], "wc_review_avg": [ 225.0, 50.966655766294885 ], "wc_reply_reviewers_avg": [ 14.2, 7.934733769950949 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.16666666666666669, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6625753253081033223&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "vectorinstitute.ai;utoronto.ca;vectorinstitute.ai", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Vector Institute;University of Toronto", "aff_unique_dep": ";", "aff_unique_url": "https://vectorinstitute.ai/;https://www.utoronto.ca", "aff_unique_abbr": "Vector Institute;U of T", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Canada" }, { "title": "Megalodon: Efficient LLM Pretraining and Inference with Unlimited Context Length", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94748", "id": "XlAbMZu4Bo", "proceeding": "", "pdf": "https://openreview.net/pdf?id=XlAbMZu4Bo", "openreview": "https://openreview.net/forum?id=XlAbMZu4Bo", "poster": "/media/PosterPDFs/NeurIPS%202024/94748.png?t=1733178994.978742", "project": "", "author_site": "Xuezhe Ma, Xiaomeng Yang, Wenhan Xiong, Beidi Chen, LILI YU, Hao Zhang, Jonathan May, Luke Zettlemoyer, Omer Levy, Chunting Zhou", "tldr": "", "abstract": "The quadratic complexity and weak length extrapolation of Transformers limits their ability to scale to long sequences, and while sub-quadratic solutions like linear attention and state space models exist, they empirically underperform Transformers in pretraining efficiency and downstream task accuracy. We introduce MEGALODON, an neural architecture for efficient sequence modeling with unlimited context length. MEGALODON inherits the architecture of MEGA (exponential moving average with gated attention), and further introduces multiple technical components to improve its capability and stability, including complex exponential moving average (CEMA), timestep normalization layer, normalized attention mechanism and pre-norm with two-hop residual configuration. In a controlled head-to-head comparison with LLAMA2, MEGALODON achieves better efficiency than Transformer in the scale of 7 billion parameters and 2 trillion training tokens. MEGALODON reaches a training loss of 1.70, landing mid-way between LLAMA2-7B (1.75) and LLAMA2-13B (1.67). This result is robust throughout a wide range of benchmarks, where MEGALODON consistently outperforms Transformers across different tasks, domains, and modalities.", "keywords": "Mega;Efficient Architecture;Long Sequence Modeling;Unlimited Context Length", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Xuezhe Ma;Xiaomeng Yang;Wenhan Xiong;Beidi Chen;LILI YU;Hao Zhang;Jonathan May;Luke Zettlemoyer;Omer Levy;Chunting Zhou", "authorids": "~Xuezhe_Ma1;~Xiaomeng_Yang1;~Wenhan_Xiong1;~Beidi_Chen1;~LILI_YU1;~Hao_Zhang2;~Jonathan_May1;~Luke_Zettlemoyer1;~Omer_Levy1;~Chunting_Zhou1", "gender": "M;M;M;F;F;M;M;M;M;F", "homepage": "https://xuezhemax.github.io/;;https://xwhan.github.io;https://www.andrew.cmu.edu/user/beidic/;https://scholar.google.com/citations?hl=en&user=wY932-AAAAAJ&view_op=list_works&authuser=1&sortby=pubdate;https://cseweb.ucsd.edu/~haozhang/;http://jonmay.net;https://www.cs.washington.edu/people/faculty/lsz/;;https://violet-zct.github.io/", "dblp": "127/0230;;203/8542;192/1339;;55/2270-25;00/4758;21/6793;117/4866;161/2679", "google_scholar": "6_MQLIcAAAAJ;t8v3JXsAAAAJ;;;https://scholar.google.com/citations?hl=en;H1d4BS8AAAAJ;tmK5EPEAAAAJ;https://scholar.google.com.tw/citations?user=UjpbO6IAAAAJ;PZVd2h8AAAAJ;mR5W7EgAAAAJ", "orcid": ";0009-0007-3917-6811;;;;;0000-0002-5284-477X;;0000-0001-7300-8191;", "linkedin": "xuezhe-ma-b5354731;xiaomeng-yang-356a976b;;;lili-yu-6771961a/;;jonmayjonmay/;luke-zettlemoyer-a0109b226/;;", "or_profile": "~Xuezhe_Ma1;~Xiaomeng_Yang1;~Wenhan_Xiong1;~Beidi_Chen1;~LILI_YU1;~Hao_Zhang2;~Jonathan_May1;~Luke_Zettlemoyer1;~Omer_Levy1;~Chunting_Zhou1", "aff": "USC/ISI;Moonshot AI;Meta Facebook;Meta Facebook;Meta Facebook;Carnegie Mellon University;USC/ISI;Meta;Tel Aviv University;Meta AI", "aff_domain": "isi.edu;msh.team;fb.com;fb.com;fb.com;cmu.edu;isi.edu;meta.com;tau.ac.il;meta.com", "position": "Assistant Professor;Member of Technical Staff;Researcher;Researcher;Researcher;PhD student;Research Scientist;Researcher;Senior Lecturer;Researcher", "bibtex": "@inproceedings{\nma2024megalodon,\ntitle={Megalodon: Efficient {LLM} Pretraining and Inference with Unlimited Context Length},\nauthor={Xuezhe Ma and Xiaomeng Yang and Wenhan Xiong and Beidi Chen and LILI YU and Hao Zhang and Jonathan May and Luke Zettlemoyer and Omer Levy and Chunting Zhou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=XlAbMZu4Bo}\n}", "github": "", "reviewers": "xs5D;6wcE;73L7;U1i1", "pdf_size": 943778, "rating": "5;5;5;6", "confidence": "3;5;5;3", "soundness": "3;3;2;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "30;24;120;60", "wc_strengths": "50;45;38;26", "wc_weaknesses": "388;78;227;175", "wc_questions": "47;54;2;37", "wc_limitations": "11;1;2;57", "wc_review": "526;202;389;355", "wc_reply_reviewers": "164;210;0;24", "wc_reply_authors": "325;90;0;0", "reply_reviewers": "2;2;0;1", "reply_authors": "3;2;1;1", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 1.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 58.5, 38.0361669993179 ], "wc_strengths_avg": [ 39.75, 9.01041064547005 ], "wc_weaknesses_avg": [ 217.0, 112.27867117133155 ], "wc_questions_avg": [ 35.0, 19.987496091306685 ], "wc_limitations_avg": [ 17.75, 22.993205518152532 ], "wc_review_avg": [ 368.0, 115.2497288500064 ], "wc_reply_reviewers_avg": [ 99.5, 89.40218118144546 ], "wc_reply_authors_avg": [ 103.75, 132.9179728253482 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11989663913309163134&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "isi.edu;msh.team;fb.com;fb.com;fb.com;cmu.edu;isi.edu;meta.com;tau.ac.il;meta.com", "author_num": 10, "aff_unique_index": "0;1;2;2;2;3;0;2;4;2", "aff_unique_norm": "University of Southern California;Moonshot AI;Meta;Carnegie Mellon University;Tel Aviv University", "aff_unique_dep": ";;Meta Platforms, Inc.;;", "aff_unique_url": "https://isi.usc.edu;https://moonshot.ai;https://meta.com;https://www.cmu.edu;https://www.tau.ac.il", "aff_unique_abbr": "USC;Moonshot AI;Meta;CMU;TAU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "ISI;", "aff_country_unique_index": "0;0;0;0;0;0;0;0;1;0", "aff_country_unique": "United States;Israel" }, { "title": "Amortized Planning with Large-Scale Transformers: A Case Study on Chess", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94747", "id": "XlpipUGygX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=XlpipUGygX", "openreview": "https://openreview.net/forum?id=XlpipUGygX", "poster": "/media/PosterPDFs/NeurIPS%202024/94747.png?t=1733860040.9672964", "project": "", "author_site": "Anian Ruoss, Gr\u00e9goire Del\u00e9tang, Sourabh Medapati, Jordi Grau-Moya, Kevin Li, Elliot Catt, John Reid, Cannada Lewis, Joel Veness, Tim Genewein", "tldr": "", "abstract": "This paper uses chess, a landmark planning problem in AI, to assess transformers\u2019 performance on a planning task where memorization is futile \u2014 even at a large scale. To this end, we release ChessBench, a large-scale benchmark dataset of 10 million chess games with legal move and value annotations (15 billion data points) provided by Stockfish 16, the state-of-the-art chess engine. We train transformers with up to 270 million parameters on ChessBench via supervised learning and perform extensive ablations to assess the impact of dataset size, model size, architecture type, and different prediction targets (state-values, action-values, and behavioral cloning). Our largest models learn to predict action-values for novel boards quite accurately, implying highly non-trivial generalization. Despite performing no explicit search, our resulting chess policy solves challenging chess puzzles and achieves a surprisingly strong Lichess blitz Elo of 2895 against humans (grandmaster level). We also compare to Leela Chess Zero and AlphaZero (trained without supervision via self-play) with and without search. We show that, although a remarkably good approximation of Stockfish\u2019s search-based algorithm can be distilled into large-scale transformers via supervised learning, perfect distillation is still beyond reach, thus making ChessBench well-suited for future research.", "keywords": "chess;supervised learning;transformer;scaling;benchmark", "primary_area": "other", "supplementary_material": "", "author": "Anian Ruoss;Gregoire Deletang;Sourabh Medapati;Jordi Grau-Moya;Li Kevin Wenliang;Elliot Catt;John Reid;Cannada A. Lewis;Joel Veness;Tim Genewein", "authorids": "~Anian_Ruoss1;~Gregoire_Deletang1;~Sourabh_Medapati1;~Jordi_Grau-Moya2;~Li_Kevin_Wenliang1;~Elliot_Catt1;~John_Reid1;~Cannada_A._Lewis1;~Joel_Veness2;~Tim_Genewein1", "gender": "M;;M;;;M;M;M;;M", "homepage": ";;;;https://kevin-w-li.github.io/;;;;;http://tim.inversetemperature.net/", "dblp": "259/2083;;;116/3023;255/7009;204/2511;;;;116/3039", "google_scholar": "gFkwD3kAAAAJ;;QxNiuvcAAAAJ;;https://scholar.google.co.uk/citations?user=MW45NMEAAAAJ;d1JYeMIAAAAJ;uBSRKwcAAAAJ;;;https://scholar.google.de/citations?user=peNTK9oAAAAJ", "orcid": ";;;;;0000-0001-9411-927X;;;;", "linkedin": "anian-ruoss;;csgator/;jordi-g-9a1b02104;;;;drew-lewis-64071778;;", "or_profile": "~Anian_Ruoss1;~Gregoire_Deletang1;~Sourabh_Medapati1;~Jordi_Grau-Moya2;~Li_Kevin_Wenliang1;~Elliot_Catt1;~John_Reid1;~Cannada_A._Lewis1;~Joel_Veness2;~Tim_Genewein1", "aff": "Google DeepMind;;Google Deepmind;Google DeepMind;Google DeepMind;Google DeepMind;Google;Google;;Google DeepMind", "aff_domain": "deepmind.com;;research.google.com;deepmind.com;deepmind.com;deepmind.com;deepmind.com;google.com;;google.com", "position": "Researcher;;Researcher;Researcher;Researcher;Researcher;Researcher;Software Engineer;;Researcher", "bibtex": "@inproceedings{\nruoss2024amortized,\ntitle={Amortized Planning with Large-Scale Transformers: A Case Study on Chess},\nauthor={Anian Ruoss and Gregoire Deletang and Sourabh Medapati and Jordi Grau-Moya and Li Kevin Wenliang and Elliot Catt and John Reid and Cannada A. Lewis and Joel Veness and Tim Genewein},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=XlpipUGygX}\n}", "github": "", "reviewers": "wU89;Ye4U;LZwX;tXJ6", "pdf_size": 2328298, "rating": "5;6;6;7", "confidence": "4;3;5;3", "soundness": "3;3;4;4", "novelty": "2;2;2;3", "presentation": "3;3;4;4", "wc_summary": "69;41;55;66", "wc_strengths": "59;36;108;83", "wc_weaknesses": "318;34;587;131", "wc_questions": "4;21;113;95", "wc_limitations": "1;7;7;5", "wc_review": "451;139;870;380", "wc_reply_reviewers": "123;0;161;20", "wc_reply_authors": "164;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 57.75, 10.985786271359915 ], "wc_strengths_avg": [ 71.5, 26.837473800639284 ], "wc_weaknesses_avg": [ 267.5, 210.82279288539937 ], "wc_questions_avg": [ 58.25, 46.57990446533784 ], "wc_limitations_avg": [ 5.0, 2.449489742783178 ], "wc_review_avg": [ 460.0, 263.44923609682377 ], "wc_reply_reviewers_avg": [ 76.0, 67.72370338367506 ], "wc_reply_authors_avg": [ 41.0, 71.01408311032397 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.4264014327112209, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5686190351170760496&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "deepmind.com;;research.google.com;deepmind.com;deepmind.com;deepmind.com;deepmind.com;google.com;;google.com", "author_num": 10, "aff_unique_index": "0;1;0;0;0;0;0;0", "aff_unique_norm": "Google;DeepMind", "aff_unique_dep": "Google DeepMind;DeepMind", "aff_unique_url": "https://deepmind.com;https://deepmind.com", "aff_unique_abbr": "DeepMind;DeepMind", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;0;1;1;0", "aff_country_unique": "United Kingdom;United States" }, { "title": "A Simple and Adaptive Learning Rate for FTRL in Online Learning with Minimax Regret of $\\Theta(T^{2/3})$ and its Application to Best-of-Both-Worlds", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94746", "id": "XlvUz9F50g", "proceeding": "", "pdf": "https://openreview.net/pdf?id=XlvUz9F50g", "openreview": "https://openreview.net/forum?id=XlvUz9F50g", "poster": "/media/PosterPDFs/NeurIPS%202024/94746.png?t=1734003378.509804", "project": "", "author_site": "Taira Tsuchiya, Shinji Ito", "tldr": "", "abstract": "Follow-the-Regularized-Leader (FTRL) is a powerful framework for various online learning problems. By designing its regularizer and learning rate to be adaptive to past observations, FTRL is known to work adaptively to various properties of an underlying environment. However, most existing adaptive learning rates are for online learning problems with a minimax regret of $\\Theta(\\sqrt{T})$ for the number of rounds $T$, and there are only a few studies on adaptive learning rates for problems with a minimax regret of $\\Theta(T^{2/3})$, which include several important problems dealing with indirect feedback. To address this limitation, we establish a new adaptive learning rate framework for problems with a minimax regret of $\\Theta(T^{2/3})$. Our learning rate is designed by matching the stability, penalty, and bias terms that naturally appear in regret upper bounds for problems with a minimax regret of $\\Theta(T^{2/3})$. As applications of this framework, we consider three major problems with a minimax regret of $\\Theta(T^{2/3})$: partial monitoring, graph bandits, and multi-armed bandits with paid observations. We show that FTRL with our learning rate and the Tsallis entropy regularizer improves existing Best-of-Both-Worlds (BOBW) regret upper bounds, which achieve simultaneous optimality in the stochastic and adversarial regimes. The resulting learning rate is surprisingly simple compared to the existing learning rates for BOBW algorithms for problems with a minimax regret of $\\Theta(T^{2/3})$.", "keywords": "follow-the-regularized-leader;adaptive learning rate;best-of-both-worlds algorithm;partial monitoring;graph bandits;online learning", "primary_area": "online_learning", "supplementary_material": "", "author": "Taira Tsuchiya;Shinji Ito", "authorids": "~Taira_Tsuchiya1;~Shinji_Ito1", "gender": "M;M", "homepage": "https://tsuchhiii.github.io/;https://researchmap.jp/shinji_ito?lang=en", "dblp": "226/5536;49/852", "google_scholar": "https://scholar.google.co.jp/citations?view_op=list_works;https://scholar.google.co.jp/citations?user=GX0V06wAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Taira_Tsuchiya1;~Shinji_Ito1", "aff": "The University of Tokyo;NEC", "aff_domain": "u-tokyo.ac.jp;nec.com", "position": "Assistant Professor;Principal Researcher", "bibtex": "@inproceedings{\ntsuchiya2024a,\ntitle={A Simple and Adaptive Learning Rate for {FTRL} in Online Learning with Minimax Regret of \\${\\textbackslash}Theta(T{\\textasciicircum}\\{2/3\\})\\$ and its Application to Best-of-Both-Worlds},\nauthor={Taira Tsuchiya and Shinji Ito},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=XlvUz9F50g}\n}", "github": "", "reviewers": "TQWF;gjM7;7rt1;YZuC;5JDu", "pdf_size": 303618, "rating": "4;6;6;6;7", "confidence": "2;2;3;3;4", "soundness": "2;3;3;3;4", "novelty": "2;2;3;3;3", "presentation": "2;2;2;3;3", "wc_summary": "120;81;56;62;130", "wc_strengths": "92;84;65;43;306", "wc_weaknesses": "293;92;91;90;194", "wc_questions": "60;97;261;10;218", "wc_limitations": "12;2;5;1;5", "wc_review": "577;356;478;206;853", "wc_reply_reviewers": "26;46;0;11;19", "wc_reply_authors": "309;0;247;0;0", "reply_reviewers": "1;1;0;1;1", "reply_authors": "2;1;2;1;1", "rating_avg": [ 5.8, 0.9797958971132712 ], "confidence_avg": [ 2.8, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 89.8, 30.069253399444424 ], "wc_strengths_avg": [ 118.0, 95.50916186418976 ], "wc_weaknesses_avg": [ 152.0, 81.00617260431454 ], "wc_questions_avg": [ 129.2, 95.17436629681335 ], "wc_limitations_avg": [ 5.0, 3.847076812334269 ], "wc_review_avg": [ 494.0, 218.18065908783024 ], "wc_reply_reviewers_avg": [ 20.4, 15.447977213862014 ], "wc_reply_authors_avg": [ 111.2, 137.59563946579124 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.7637626158259733, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:boEJ7v97SQ0J:scholar.google.com/&scioq=A+Simple+and+Adaptive+Learning+Rate+for+FTRL+in+Online+Learning+with+Minimax+Regret+of+%24%5CTheta(T%5E%7B2/3%7D)%24+and+its+Application+to+Best-of-Both-Worlds&hl=en&as_sdt=0,5", "gs_version_total": 6, "email": "u-tokyo.ac.jp;nec.com", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "University of Tokyo;NEC Corporation", "aff_unique_dep": ";", "aff_unique_url": "https://www.u-tokyo.ac.jp;https://www.nec.com", "aff_unique_abbr": "UTokyo;NEC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Japan" }, { "title": "Benchmarking the Attribution Quality of Vision Models", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97644", "id": "XmyxQaTyck", "proceeding": "", "pdf": "https://openreview.net/pdf?id=XmyxQaTyck", "openreview": "https://openreview.net/forum?id=XmyxQaTyck", "poster": "/media/PosterPDFs/NeurIPS%202024/97644.png?t=1731693463.1491175", "project": "", "author_site": "Robin Hesse, Simone Schaub-Meyer, Stefan Roth", "tldr": "", "abstract": "Attribution maps are one of the most established tools to explain the functioning of computer vision models. They assign importance scores to input features, indicating how relevant each feature is for the prediction of a deep neural network. While much research has gone into proposing new attribution methods, their proper evaluation remains a difficult challenge. In this work, we propose a novel evaluation protocol that overcomes two fundamental limitations of the widely used incremental-deletion protocol, i.e., the out-of-domain issue and lacking inter-model comparisons. This allows us to evaluate 23 attribution methods and how different design choices of popular vision backbones affect their attribution quality. We find that intrinsically explainable models outperform standard models and that raw attribution values exhibit a higher attribution quality than what is known from previous work. Further, we show consistent changes in the attribution quality when varying the network design, indicating that some standard design choices promote attribution quality.", "keywords": "explainable artificial intelligence;XAI", "primary_area": "", "supplementary_material": "", "author": "Robin Hesse;Simone Schaub-Meyer;Stefan Roth", "authorids": "~Robin_Hesse1;~Simone_Schaub-Meyer1;~Stefan_Roth1", "gender": "M;F;M", "homepage": "https://robinhesse.github.io/;https://schaubsi.github.io/;https://www.visinf.tu-darmstadt.de/visual_inference/people_vi/stefan_roth.en.jsp", "dblp": "187/1593;169/4935;24/3452", "google_scholar": "gNd-TlcAAAAJ;https://scholar.google.ch/citations?user=05-lAc8AAAAJ;0yDoR0AAAAAJ", "orcid": ";;0000-0001-9002-9832", "linkedin": "https://linkedin.com/in/robin-hesse-990873164;;stefanroth13", "or_profile": "~Robin_Hesse1;~Simone_Schaub-Meyer1;~Stefan_Roth1", "aff": "TU Darmstadt;TU Darmstadt;Technische Universit\u00e4t Darmstadt", "aff_domain": "tu-darmstadt.de;tu-darmstadt.de;tu-darmstadt.de", "position": "PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\nhesse2024benchmarking,\ntitle={Benchmarking the Attribution Quality of Vision Models},\nauthor={Robin Hesse and Simone Schaub-Meyer and Stefan Roth},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=XmyxQaTyck}\n}", "github": "", "reviewers": "einR;beW6;tUJa", "pdf_size": 3228725, "rating": "6;6;7", "confidence": "3;4;4", "wc_summary_and_contributions": "44;95;52", "wc_strengths": "81;31;92", "wc_improvement": "146;1;101", "wc_limitations": "1;1;58", "wc_correctness": "1;1;23", "wc_clarity": "1;1;12", "wc_relation_to_prior_work": "1;1;24", "wc_documentation": "1;1;29", "wc_additional_feedback": "1;1;1", "wc_review": "277;133;392", "wc_reply_reviewers": "0;97;0", "wc_reply_authors": "60;0;0", "reply_reviewers": "0;2;0", "reply_authors": "2;2;1", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 63.666666666666664, 22.395436042987768 ], "wc_strengths_avg": [ 68.0, 26.54555832275273 ], "wc_improvement_avg": [ 82.66666666666667, 60.59886320899281 ], "wc_limitations_avg": [ 20.0, 26.870057685088806 ], "wc_correctness_avg": [ 8.333333333333334, 10.370899457402697 ], "wc_clarity_avg": [ 4.666666666666667, 5.185449728701348 ], "wc_relation_to_prior_work_avg": [ 8.666666666666666, 10.842303978193728 ], "wc_documentation_avg": [ 10.333333333333334, 13.199326582148887 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 267.3333333333333, 105.95701434491673 ], "wc_reply_reviewers_avg": [ 32.333333333333336, 45.726238516730064 ], "wc_reply_authors_avg": [ 20.0, 28.284271247461902 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.9428090415820634 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10174350580740992797&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "tu-darmstadt.de;tu-darmstadt.de;tu-darmstadt.de", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Technische Universit\u00e4t Darmstadt", "aff_unique_dep": "", "aff_unique_url": "https://www.tu-darmstadt.de", "aff_unique_abbr": "TU Darmstadt", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Darmstadt;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "title": "Protein-Nucleic Acid Complex Modeling with Frame Averaging Transformer", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94745", "id": "Xngi3Z3wkN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Xngi3Z3wkN", "openreview": "https://openreview.net/forum?id=Xngi3Z3wkN", "poster": "", "project": "", "author_site": "Tinglin Huang, Zhenqiao Song, Rex Ying, Wengong Jin", "tldr": "", "abstract": "Nucleic acid-based drugs like aptamers have recently demonstrated great therapeutic potential. However, experimental platforms for aptamer screening are costly, and the scarcity of labeled data presents a challenge for supervised methods to learn protein-aptamer binding. To this end, we develop an unsupervised learning approach based on the predicted pairwise contact map between a protein and a nucleic acid and demonstrate its effectiveness in protein-aptamer binding prediction. Our model is based on FAFormer, a novel equivariant transformer architecture that seamlessly integrates frame averaging (FA) within each transformer block. This integration allows our model to infuse geometric information into node features while preserving the spatial semantics of coordinates, leading to greater expressive power than standard FA models. Our results show that FAFormer outperforms existing equivariant models in contact map prediction across three protein complex datasets, with over 10% relative improvement. Moreover, we curate five real-world protein-aptamer interaction datasets and show that the contact map predicted by FAFormer serves as a strong binding indicator for aptamer screening.", "keywords": "Protein;Nucleic acid;Aptamer;3D Structure", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Tinglin Huang;Zhenqiao Song;Rex Ying;Wengong Jin", "authorids": "~Tinglin_Huang1;~Zhenqiao_Song1;~Zhitao_Ying1;~Wengong_Jin1", "gender": "M;F;M;", "homepage": "https://huangtinglin.github.io/;https://jocelynsong.github.io/;https://www.cs.yale.edu/homes/ying-rex;http://people.csail.mit.edu/wengong", "dblp": ";227/7889;209/4936;173/6620", "google_scholar": "izW2ygYAAAAJ;https://scholar.google.com/citations?hl=en;6fqNXooAAAAJ;IE5D8_QAAAAJ", "orcid": "0009-0005-5644-4879;;;", "linkedin": ";;rex-ying-92770148/;", "or_profile": "~Tinglin_Huang1;~Zhenqiao_Song1;~Zhitao_Ying1;~Wengong_Jin1", "aff": "Yale University;Carnegie Mellon University;Yale University;Broad Institute", "aff_domain": "yale.edu;andrew.cmu.edu;yale.edu;broadinstitute.org", "position": "PhD student;PhD student;Assistant Professor;Postdoc", "bibtex": "@inproceedings{\nhuang2024proteinnucleic,\ntitle={Protein-Nucleic Acid Complex Modeling with Frame Averaging Transformer},\nauthor={Tinglin Huang and Zhenqiao Song and Rex Ying and Wengong Jin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Xngi3Z3wkN}\n}", "github": "", "reviewers": "qqru;npWJ;DGm4", "pdf_size": 3334558, "rating": "4;7;7", "confidence": "3;3;3", "soundness": "2;4;3", "novelty": "2;3;3", "presentation": "3;4;2", "wc_summary": "62;45;56", "wc_strengths": "55;63;63", "wc_weaknesses": "83;99;111", "wc_questions": "4;67;123", "wc_limitations": "17;13;29", "wc_review": "221;287;382", "wc_reply_reviewers": "255;14;35", "wc_reply_authors": "707;20;68", "reply_reviewers": "2;1;1", "reply_authors": "4;2;3", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 54.333333333333336, 7.039570693980959 ], "wc_strengths_avg": [ 60.333333333333336, 3.7712361663282534 ], "wc_weaknesses_avg": [ 97.66666666666667, 11.469767022723502 ], "wc_questions_avg": [ 64.66666666666667, 48.609555530665865 ], "wc_limitations_avg": [ 19.666666666666668, 6.79869268479038 ], "wc_review_avg": [ 296.6666666666667, 66.08244009484585 ], "wc_reply_reviewers_avg": [ 101.33333333333333, 108.99643215363622 ], "wc_reply_authors_avg": [ 265.0, 313.1549137407874 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 3.0, 0.816496580927726 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1960987366738326169&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "yale.edu;andrew.cmu.edu;yale.edu;broadinstitute.org", "author_num": 4, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Yale University;Carnegie Mellon University;Broad Institute", "aff_unique_dep": ";;", "aff_unique_url": "https://www.yale.edu;https://www.cmu.edu;https://www.broadinstitute.org", "aff_unique_abbr": "Yale;CMU;Broad", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Enabling Adaptive Agent Training in Open-Ended Simulators by Targeting Diversity", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94744", "id": "Xo1Yqyw7Yx", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Xo1Yqyw7Yx", "openreview": "https://openreview.net/forum?id=Xo1Yqyw7Yx", "poster": "", "project": "", "author_site": "Robby Costales, Stefanos Nikolaidis", "tldr": "", "abstract": "The wider application of end-to-end learning methods to embodied decision-making domains remains bottlenecked by their reliance on a superabundance of training data representative of the target domain.\nMeta-reinforcement learning (meta-RL) approaches abandon the aim of zero-shot *generalization*\u2014the goal of standard reinforcement learning (RL)\u2014in favor of few-shot *adaptation*, and thus hold promise for bridging larger generalization gaps.\nWhile learning this meta-level adaptive behavior still requires substantial data, efficient environment simulators approaching real-world complexity are growing in prevalence.\nEven so, hand-designing sufficiently diverse and numerous simulated training tasks for these complex domains is prohibitively labor-intensive.\nDomain randomization (DR) and procedural generation (PG), offered as solutions to this problem, require simulators to possess carefully-defined parameters which directly translate to meaningful task diversity\u2014a similarly prohibitive assumption.\nIn this work, we present **DIVA**, an evolutionary approach for generating diverse training tasks in such complex, open-ended simulators.\nLike unsupervised environment design (UED) methods, DIVA can be applied to arbitrary parameterizations, but can additionally incorporate realistically-available domain knowledge\u2014thus inheriting the *flexibility* and *generality* of UED, and the supervised *structure* embedded in well-designed simulators exploited by DR and PG.\nOur empirical results showcase DIVA's unique ability to overcome complex parameterizations and successfully train adaptive agent behavior, far outperforming competitive baselines from prior literature.\nThese findings highlight the potential of such *semi-supervised environment design* (SSED) approaches, of which DIVA is the first humble constituent, to enable training in realistic simulated domains, and produce more robust and capable adaptive agents.\nOur code is available at [https://github.com/robbycostales/diva](https://github.com/robbycostales/diva).", "keywords": "diversity;meta reinforcement learning;meta-RL;reinforcement learning;adaptation;adaptive;agents;open-endedness;genotypes;phenotypes;simulators;simulation;generalization;meta-reinforcement", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Robby Costales;Stefanos Nikolaidis", "authorids": "~Robby_Costales1;~Stefanos_Nikolaidis1", "gender": "M;", "homepage": "https://robbycostales.com/;http://stefanosnikolaidis.net/", "dblp": "263/7351;62/6555", "google_scholar": "BgpME38AAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Robby_Costales1;~Stefanos_Nikolaidis1", "aff": "University of Southern California;University of Southern California", "aff_domain": "usc.edu;usc.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\ncostales2024enabling,\ntitle={Enabling Adaptive Agent Training in Open-Ended Simulators by Targeting Diversity},\nauthor={Robby Costales and Stefanos Nikolaidis},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Xo1Yqyw7Yx}\n}", "github": "", "reviewers": "nCFY;34nq;7bxy;kFbG", "pdf_size": 4599381, "rating": "3;6;7;8", "confidence": "3;3;4;4", "soundness": "2;3;3;4", "novelty": "2;3;3;4", "presentation": "3;2;2;4", "wc_summary": "74;57;40;129", "wc_strengths": "33;31;25;123", "wc_weaknesses": "206;81;556;52", "wc_questions": "167;178;9;46", "wc_limitations": "20;8;2;5", "wc_review": "500;355;632;355", "wc_reply_reviewers": "0;75;44;11", "wc_reply_authors": "285;1010;1080;0", "reply_reviewers": "0;1;1;1", "reply_authors": "3;4;3;1", "rating_avg": [ 6.0, 1.8708286933869707 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 75.0, 33.414068893207244 ], "wc_strengths_avg": [ 53.0, 40.52159917870962 ], "wc_weaknesses_avg": [ 223.75, 200.36263998061116 ], "wc_questions_avg": [ 100.0, 73.7733014036921 ], "wc_limitations_avg": [ 8.75, 6.832825184358224 ], "wc_review_avg": [ 460.5, 115.36138868789678 ], "wc_reply_reviewers_avg": [ 32.5, 29.3981291921782 ], "wc_reply_authors_avg": [ 593.75, 463.0250398196624 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 1.0897247358851685 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.8017837257372733, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Yw8FO9W4JKYJ:scholar.google.com/&scioq=Enabling+Adaptive+Agent+Training+in+Open-Ended+Simulators+by+Targeting+Diversity&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "usc.edu;usc.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Southern California", "aff_unique_dep": "", "aff_unique_url": "https://www.usc.edu", "aff_unique_abbr": "USC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "E2ENet: Dynamic Sparse Feature Fusion for Accurate and Efficient 3D Medical Image Segmentation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94743", "id": "Xp8qhdmeb4", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Xp8qhdmeb4", "openreview": "https://openreview.net/forum?id=Xp8qhdmeb4", "poster": "/media/PosterPDFs/NeurIPS%202024/94743.png?t=1731716614.78529", "project": "", "author_site": "Boqian Wu, Qiao Xiao, Shiwei Liu, Lu Yin, Mykola Pechenizkiy, Decebal Constantin Mocanu, Maurice Keulen, Elena Mocanu", "tldr": "", "abstract": "Deep neural networks have evolved as the leading approach in 3D medical image segmentation due to their outstanding performance. However, the ever-increasing model size and computational cost of deep neural networks have become the primary barriers to deploying them on real-world, resource-limited hardware. To achieve both segmentation accuracy and efficiency, we propose a 3D medical image segmentation model called Efficient to Efficient Network (E2ENet), which incorporates two parametrically and computationally efficient designs. i. Dynamic sparse feature fusion (DSFF) mechanism: it adaptively learns to fuse informative multi-scale features while reducing redundancy. ii. Restricted depth-shift in 3D convolution: it leverages the 3D spatial information while keeping the model and computational complexity as 2D-based methods. We conduct extensive experiments on AMOS, Brain Tumor Segmentation and BTCV Challenge, demonstrating that E2ENet consistently achieves a superior trade-off between accuracy and efficiency than prior arts across various resource constraints. %In particular, with a single model and single scale, E2ENet achieves comparable accuracy on the large-scale challenge AMOS-CT, while saving over 69% parameter count and 27% FLOPs in the inference phase, compared with the previous\nbest-performing method. Our code has been made available at: https://github.com/boqian333/E2ENet-Medical.", "keywords": "Medical Image Segmentation;Sparse Training;Feature Fusion", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "/attachment/5daf3ca5fd6f1a54a3de7676c781790a9881c37a.zip", "author": "Boqian Wu;Qiao Xiao;Shiwei Liu;Lu Yin;Mykola Pechenizkiy;Decebal Constantin Mocanu;Maurice van Keulen;Elena Mocanu", "authorids": "~Boqian_Wu1;~Qiao_Xiao1;~Shiwei_Liu2;~Lu_Yin1;~Mykola_Pechenizkiy1;~Decebal_Constantin_Mocanu1;~Maurice_van_Keulen1;~Elena_Mocanu1", "gender": "F;M;M;;M;M;M;F", "homepage": ";;https://shiweiliuiiiiiii.github.io/;https://luuyin.com/;http://www.win.tue.nl/~mpechen/;https://wwwen.uni.lu/recherche/fstm/dcs/members/decebal_constantin_mocanu;https://people.utwente.nl/m.vankeulen;https://people.utwente.nl/e.mocanu", "dblp": "201/6658;182/7575;234/8697-3.html;87/2528-6;37/4649;133/7764;k/MauricevanKeulen;08/1121", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=zh-CN;73IbXtsAAAAJ;G4Xe1NkAAAAJ;https://scholar.google.com.tw/citations?user=F0uFT_kAAAAJ;RlQgUwEAAAAJ;https://scholar.google.nl/citations?user=JireFzMAAAAJ;https://scholar.google.nl/citations?user=JLD5uy0AAAAJ", "orcid": ";;;;0000-0003-4955-0743;0000-0002-5636-7683;;", "linkedin": "%E6%9F%8F%E5%80%A9-%E5%90%B4-7b421414b/;;;;mpechen/;;mvankeulen/;", "or_profile": "~Boqian_Wu1;~Qiao_Xiao1;~Shiwei_Liu2;~Lu_Yin1;~Mykola_Pechenizkiy1;~Decebal_Constantin_Mocanu1;~Maurice_van_Keulen1;~Elena_Mocanu1", "aff": "University of Twente;Eindhoven University of Technology;University of Oxford;University of Aberdeen;Eindhoven University of Technology;Eindhoven University of Technology;University of Twente;University of Twente", "aff_domain": "utwente.nl;tue.nl;ox.ac.uk;abdn.ac.uk;tue.nl;tue.nl;utwente.nl;utwente.nl", "position": "PhD student;PhD student;Postdoc;Assistant Professor;Full Professor;Assistant Professor;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nwu2024eenet,\ntitle={E2{EN}et: Dynamic Sparse Feature Fusion for Accurate and Efficient 3D Medical Image Segmentation},\nauthor={Boqian Wu and Qiao Xiao and Shiwei Liu and Lu Yin and Mykola Pechenizkiy and Decebal Constantin Mocanu and Maurice van Keulen and Elena Mocanu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Xp8qhdmeb4}\n}", "github": "", "reviewers": "SYnH;JUDh;xwUb", "pdf_size": 9002303, "rating": "6;6;6", "confidence": "3;3;4", "soundness": "3;4;3", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "70;89;100", "wc_strengths": "88;47;28", "wc_weaknesses": "163;20;27", "wc_questions": "203;55;24", "wc_limitations": "1;6;10", "wc_review": "525;217;189", "wc_reply_reviewers": "14;14;26", "wc_reply_authors": "15;47;16", "reply_reviewers": "1;1;1", "reply_authors": "2;3;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 86.33333333333333, 12.39175353029407 ], "wc_strengths_avg": [ 54.333333333333336, 25.037749277618563 ], "wc_weaknesses_avg": [ 70.0, 65.82299496883036 ], "wc_questions_avg": [ 94.0, 78.1067645384615 ], "wc_limitations_avg": [ 5.666666666666667, 3.6817870057290873 ], "wc_review_avg": [ 310.3333333333333, 152.22206001613418 ], "wc_reply_reviewers_avg": [ 18.0, 5.656854249492381 ], "wc_reply_authors_avg": [ 26.0, 14.854853303438128 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15062253393576771813&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "utwente.nl;tue.nl;ox.ac.uk;abdn.ac.uk;tue.nl;tue.nl;utwente.nl;utwente.nl", "author_num": 8, "aff_unique_index": "0;1;2;3;1;1;0;0", "aff_unique_norm": "University of Twente;Eindhoven University of Technology;University of Oxford;University of Aberdeen", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.utwente.nl;https://www.tue.nl;https://www.ox.ac.uk;https://www.abdn.ac.uk", "aff_unique_abbr": "UT;TU/e;Oxford;Aberdeen", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1;0;0;0;0", "aff_country_unique": "Netherlands;United Kingdom" }, { "title": "A Combinatorial Algorithm for the Semi-Discrete Optimal Transport Problem", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94742", "id": "Xq0Jwbczkn", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Xq0Jwbczkn", "openreview": "https://openreview.net/forum?id=Xq0Jwbczkn", "poster": "", "project": "", "author_site": "Pankaj Agarwal, Sharath Raghvendra, Pouyan Shirzadian, Keegan Yao", "tldr": "", "abstract": "Optimal Transport (OT, also known as the Wasserstein distance) is a popular metric for comparing probability distributions and has been successfully used in many machine-learning applications.\nIn the semi-discrete $2$-Wasserstein problem, we wish to compute the cheapest way to transport all the mass from a continuous distribution $\\mu$ to a discrete distribution $\\nu$ in $\\mathbb{R}^d$ for $d\\ge 1$, where the cost of transporting unit mass between points $a$ and $b$ is $d(a,b)=||a-b||^2$. When both distributions are discrete, a simple combinatorial framework has been used to find the exact solution (see e.g. [Orlin, STOC 1988]). \nIn this paper, we propose a combinatorial framework for the semi-discrete OT, which can be viewed as an extension of the combinatorial framework for the discrete OT but requires several new ideas. We present a new algorithm that given $\\mu$ and $\\nu$ in $\\mathbb{R}^2$ and a parameter $\\varepsilon>0$, computes an $\\varepsilon$-additive approximate semi-discrete transport plan in $O(n^{4}\\log n\\log \\frac{1}{\\varepsilon})$ time (in the worst case), where $n$ is the support-size of the discrete distribution $\\nu$ and we assume that the mass of $\\mu$ inside a triangle can be computed in $O(1)$ time. Our algorithm is significantly faster than the known algorithms, and unlike many numerical algorithms, it does not make any assumptions on the smoothness of $\\mu$.\nAs an application of our algorithm, we describe a data structure to store a large discrete distribution $\\mu$ (with support size $N$) using $O(N)$ space so that, given a query discrete distribution $\\nu$ (with support size $k$), an $\\varepsilon$-additive approximate transport plan can be computed in $O(k^{3}\\sqrt{N}\\log \\frac{1}{\\varepsilon})$ time in $2$ dimensions. Our algorithm and data structure extend to higher dimensions as well as to $p$-Wasserstein problem for any $p \\ge 1$.", "keywords": "Semi-Discrete Optimal Transport;Approximation Algorithm;Cost-Scaling Framework", "primary_area": "optimization", "supplementary_material": "", "author": "Pankaj K Agarwal;Sharath Raghvendra;Pouyan Shirzadian;Keegan Yao", "authorids": "~Pankaj_K_Agarwal1;~Sharath_Raghvendra1;~Pouyan_Shirzadian1;~Keegan_Yao1", "gender": "M;M;M;M", "homepage": "https://users.cs.duke.edu/~pankaj/;http://people.cs.vt.edu/~sharathr/;https://sites.google.com/vt.edu/pshirzadian/home;", "dblp": ";149/2582;322/7785;296/1841.html", "google_scholar": "xe0eVksAAAAJ;https://scholar.google.com.tw/citations?user=kOfRa7MAAAAJ;https://scholar.google.com/citations?hl=en;", "orcid": ";;0000-0001-8315-2357;0000-0002-1739-5614", "linkedin": ";;;", "or_profile": "~Pankaj_K_Agarwal1;~Sharath_Raghvendra1;~Pouyan_Shirzadian1;~Keegan_Yao1", "aff": "Department of Computer Science, Duke University;North Carolina State University;Virginia Polytechnic Institute and State University;Department of Computer Science, Duke University", "aff_domain": "cs.duke.edu;csc.ncsu.edu;vt.edu;cs.duke.edu", "position": "Professor;Associate Professor;PhD student;PhD student", "bibtex": "@inproceedings{\nagarwal2024a,\ntitle={A Combinatorial Algorithm for the Semi-Discrete Optimal Transport Problem},\nauthor={Pankaj K Agarwal and Sharath Raghvendra and Pouyan Shirzadian and Keegan Yao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Xq0Jwbczkn}\n}", "github": "", "reviewers": "ZH4D;SH1u;Eii6", "pdf_size": 5816492, "rating": "6;7;8", "confidence": "3;2;2", "soundness": "3;3;3", "novelty": "2;4;3", "presentation": "3;4;3", "wc_summary": "183;92;140", "wc_strengths": "22;63;49", "wc_weaknesses": "36;64;9", "wc_questions": "2;29;50", "wc_limitations": "19;13;9", "wc_review": "262;261;257", "wc_reply_reviewers": "0;17;12", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;1;1", "reply_authors": "1;1;1", "rating_avg": [ 7.0, 0.816496580927726 ], "confidence_avg": [ 2.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 138.33333333333334, 37.16928241916375 ], "wc_strengths_avg": [ 44.666666666666664, 17.016332024133625 ], "wc_weaknesses_avg": [ 36.333333333333336, 22.45489305746572 ], "wc_questions_avg": [ 27.0, 19.6468827043885 ], "wc_limitations_avg": [ 13.666666666666666, 4.109609335312651 ], "wc_review_avg": [ 260.0, 2.160246899469287 ], "wc_reply_reviewers_avg": [ 9.666666666666666, 7.1336448530109 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5201905973645044528&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": "cs.duke.edu;csc.ncsu.edu;vt.edu;cs.duke.edu", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Duke University;North Carolina State University;Virginia Tech", "aff_unique_dep": "Department of Computer Science;;", "aff_unique_url": "https://www.duke.edu;https://www.ncsu.edu;https://www.vt.edu", "aff_unique_abbr": "Duke;NCSU;VT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Principled Probabilistic Imaging using Diffusion Models as Plug-and-Play Priors", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94741", "id": "Xq9HQf7VNV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Xq9HQf7VNV", "openreview": "https://openreview.net/forum?id=Xq9HQf7VNV", "poster": "/media/PosterPDFs/NeurIPS%202024/94741.png?t=1733162794.9619591", "project": "", "author_site": "Zihui Wu, Yu Sun, Yifan Chen, Bingliang Zhang, Yisong Yue, Katherine Bouman", "tldr": "", "abstract": "Diffusion models (DMs) have recently shown outstanding capabilities in modeling complex image distributions, making them expressive image priors for solving Bayesian inverse problems. However, most existing DM-based methods rely on approximations in the generative process to be generic to different inverse problems, leading to inaccurate sample distributions that deviate from the target posterior defined within the Bayesian framework. To harness the generative power of DMs while avoiding such approximations, we propose a Markov chain Monte Carlo algorithm that performs posterior sampling for general inverse problems by reducing it to sampling the posterior of a Gaussian denoising problem. Crucially, we leverage a general DM formulation as a unified interface that allows for rigorously solving the denoising problem with a range of state-of-the-art DMs. We demonstrate the effectiveness of the proposed method on six inverse problems (three linear and three nonlinear), including a real-world black hole imaging problem. Experimental results indicate that our proposed method offers more accurate reconstructions and posterior estimation compared to existing DM-based imaging inverse methods.", "keywords": "Computational imaging;Inverse problems;Diffusion models", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "/attachment/0d500c7475d995df558074e227a5acfa9468f837.zip", "author": "Zihui Wu;Yu Sun;Yifan Chen;Bingliang Zhang;Yisong Yue;Katherine Bouman", "authorids": "~Zihui_Wu2;~Yu_Sun11;~Yifan_Chen5;~Bingliang_Zhang1;~Yisong_Yue1;~Katherine_Bouman1", "gender": "M;M;;M;M;", "homepage": "https://zihuiwu.github.io/;;https://yifanc96.github.io;https://zhangbingliang2019.github.io/;http://www.yisongyue.com;", "dblp": ";62/3689-22;;;28/1244;01/8759", "google_scholar": "SU7yjxAAAAAJ;https://scholar.google.com/citations?hl=en;GNiinUoAAAAJ;;tEk4qo8AAAAJ;", "orcid": "0000-0002-7622-3548;0000-0001-7225-9677;;;0000-0001-9127-1989;", "linkedin": "zihui-ray-wu/;;;;yisongyue/;", "or_profile": "~Zihui_Wu2;~Yu_Sun11;~Yifan_Chen5;~Bingliang_Zhang1;~Yisong_Yue1;~Katherine_Bouman1", "aff": "Deparment of Computing + Mathematical Sciences, California Institute of Technology;California Institute of Technology;New York University;California Institute of Technology;California Institute of Technology;California Institute of Technology", "aff_domain": "cms.caltech.edu;caltech.edu;nyu.edu;caltech.edu;caltech.edu;caltech.edu", "position": "PhD student;Postdoc;Postdoc;PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nwu2024principled,\ntitle={Principled Probabilistic Imaging using Diffusion Models as Plug-and-Play Priors},\nauthor={Zihui Wu and Yu Sun and Yifan Chen and Bingliang Zhang and Yisong Yue and Katherine Bouman},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Xq9HQf7VNV}\n}", "github": "", "reviewers": "u5tS;43sJ;TmFe;G2Ty;G64m", "pdf_size": 38822749, "rating": "4;6;7;8;8", "confidence": "4;3;5;3;4", "soundness": "3;3;4;4;4", "novelty": "2;3;4;4;4", "presentation": "3;3;4;3;4", "wc_summary": "77;140;154;110;83", "wc_strengths": "89;78;243;138;42", "wc_weaknesses": "231;263;254;62;43", "wc_questions": "141;113;2;48;2", "wc_limitations": "2;12;2;1;9", "wc_review": "540;606;655;359;179", "wc_reply_reviewers": "248;10;154;12;46", "wc_reply_authors": "767;0;39;0;0", "reply_reviewers": "2;1;1;1;1", "reply_authors": "3;1;2;1;1", "rating_avg": [ 6.6, 1.4966629547095764 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 3.6, 0.4898979485566356 ], "novelty_avg": [ 3.4, 0.8 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 112.8, 30.38025674677553 ], "wc_strengths_avg": [ 118.0, 69.6304531078177 ], "wc_weaknesses_avg": [ 170.6, 97.17736361931208 ], "wc_questions_avg": [ 61.2, 56.98210245331423 ], "wc_limitations_avg": [ 5.2, 4.445222154178573 ], "wc_review_avg": [ 467.8, 175.8401546860102 ], "wc_reply_reviewers_avg": [ 94.0, 93.16651759081692 ], "wc_reply_authors_avg": [ 161.2, 303.2763756048268 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.07142857142857144, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7179318324657565154&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 6, "email": "cms.caltech.edu;caltech.edu;nyu.edu;caltech.edu;caltech.edu;caltech.edu", "author_num": 6, "aff_unique_index": "1;1;2;1;1;1", "aff_unique_norm": ";California Institute of Technology;New York University", "aff_unique_dep": ";Mathematical Sciences;", "aff_unique_url": ";https://www.caltech.edu;https://www.nyu.edu", "aff_unique_abbr": ";Caltech;NYU", "aff_campus_unique_index": "1;1;1;1;1", "aff_campus_unique": ";Pasadena", "aff_country_unique_index": "1;1;1;1;1;1", "aff_country_unique": ";United States" }, { "title": "Designs for Enabling Collaboration in Human-Machine Teaming via Interactive and Explainable Systems", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94740", "id": "XrK4JK2jBr", "proceeding": "", "pdf": "https://openreview.net/pdf?id=XrK4JK2jBr", "openreview": "https://openreview.net/forum?id=XrK4JK2jBr", "poster": "/media/PosterPDFs/NeurIPS%202024/94740.png?t=1731733304.887239", "project": "", "author_site": "Rohan Paleja, Michael Munje, Kimberlee Chang, Reed Jensen, Matthew Gombolay", "tldr": "", "abstract": "Collaborative robots and machine learning-based virtual agents are increasingly entering the human workspace with the aim of increasing productivity and enhancing safety. Despite this, we show in a ubiquitous experimental domain, Overcooked-AI, that state-of-the-art techniques for human-machine teaming (HMT), which rely on imitation or reinforcement learning, are brittle and result in a machine agent that aims to decouple the machine and human\u2019s actions to act independently rather than in a synergistic fashion. To remedy this deficiency, we develop HMT approaches that enable iterative, mixed-initiative team development allowing end-users to interactively reprogram interpretable AI teammates. Our 50-subject study provides several findings that we summarize into guidelines. While all approaches underperform a simple collaborative heuristic (a critical, negative result for learning-based methods), we find that white-box approaches supported by interactive modification can lead to significant team development, outperforming white-box approaches alone, and that black-box approaches are easier to train and result in better HMT performance highlighting a tradeoff between explainability and interactivity versus ease-of-training. Together, these findings present three important future research directions: 1) Improving the ability to generate collaborative agents with white-box models, 2) Better learning methods to facilitate collaboration rather than individualized coordination, and 3) Mixed-initiative interfaces that enable users, who may vary in ability, to improve collaboration.", "keywords": "Human-Machine Teaming;Adaptive AI", "primary_area": "human-AI_interaction", "supplementary_material": "", "author": "Rohan R Paleja;Michael Joseph Munje;Kimberlee Chestnut Chang;Reed Jensen;Matthew Gombolay", "authorids": "~Rohan_R_Paleja1;~Michael_Joseph_Munje1;~Kimberlee_Chestnut_Chang1;~Reed_Jensen1;~Matthew_Gombolay1", "gender": "M;;F;;M", "homepage": "https://rohanpaleja.com/;https://michaelmunje.com/about/;;;https://core-robotics.gatech.edu/", "dblp": "237/8623;;;183/0879;144/1022", "google_scholar": "xjnQbKgAAAAJ;;;;Ihyz20wAAAAJ", "orcid": ";;;;", "linkedin": ";;kimberlee-chang-83328523/;;", "or_profile": "~Rohan_R_Paleja1;~Michael_Joseph_Munje1;~Kimberlee_Chestnut_Chang1;~Reed_Jensen1;~Matthew_Gombolay1", "aff": "Massachusetts Institute of Technology;University of Texas at Austin;Massachusetts Institute of Technology;MIT Lincoln Laboratory, Massachusetts Institute of Technology;Georgia Institute of Technology", "aff_domain": "mit.edu;utexas.edu;mit.edu;ll.mit.edu;cc.gatech.edu", "position": "Researcher;PhD student;Researcher;Researcher;Assistant Professor", "bibtex": "@inproceedings{\npaleja2024designs,\ntitle={Designs for Enabling Collaboration in Human-Machine Teaming via Interactive and Explainable Systems},\nauthor={Rohan R Paleja and Michael Joseph Munje and Kimberlee Chestnut Chang and Reed Jensen and Matthew Gombolay},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=XrK4JK2jBr}\n}", "github": "", "reviewers": "HJn7;yVVF;cdZ4;MFRs", "pdf_size": 1772850, "rating": "5;5;6;8", "confidence": "4;3;4;3", "soundness": "3;2;2;4", "novelty": "3;2;2;4", "presentation": "3;2;2;4", "wc_summary": "62;59;72;115", "wc_strengths": "46;44;51;125", "wc_weaknesses": "78;109;514;55", "wc_questions": "287;69;25;128", "wc_limitations": "1;16;31;84", "wc_review": "474;297;693;507", "wc_reply_reviewers": "28;40;198;19", "wc_reply_authors": "0;0;325;0", "reply_reviewers": "1;1;3;1", "reply_authors": "1;1;3;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 77.0, 22.4610774452162 ], "wc_strengths_avg": [ 66.5, 33.8710791088799 ], "wc_weaknesses_avg": [ 189.0, 188.61468659677593 ], "wc_questions_avg": [ 127.25, 99.20779959257236 ], "wc_limitations_avg": [ 33.0, 31.29696470905765 ], "wc_review_avg": [ 492.75, 140.51045334778476 ], "wc_reply_reviewers_avg": [ 71.25, 73.5573755649289 ], "wc_reply_authors_avg": [ 81.25, 140.72912811497127 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.40824829046386296, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7181637781105213038&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "mit.edu;utexas.edu;mit.edu;ll.mit.edu;cc.gatech.edu", "author_num": 5, "aff_unique_index": "0;1;0;0;2", "aff_unique_norm": "Massachusetts Institute of Technology;University of Texas at Austin;Georgia Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://web.mit.edu;https://www.utexas.edu;https://www.gatech.edu", "aff_unique_abbr": "MIT;UT Austin;Georgia Tech", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Austin;Cambridge", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "EHRNoteQA: An LLM Benchmark for Real-World Clinical Practice Using Discharge Summaries", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97643", "id": "XrKhwfPmyI", "proceeding": "", "pdf": "https://openreview.net/pdf?id=XrKhwfPmyI", "openreview": "https://openreview.net/forum?id=XrKhwfPmyI", "poster": "/media/PosterPDFs/NeurIPS%202024/97643.png?t=1730956948.8518262", "project": "", "author_site": "Sunjun Kweon, Jiyoun Kim, Heeyoung Kwak, Dongchul Cha, Hangyul Yoon, Kwang Kim, Jeewon Yang, Seunghyun Won, Edward Choi", "tldr": "", "abstract": "Discharge summaries in Electronic Health Records (EHRs) are crucial for clinical decision-making, but their length and complexity make information extraction challenging, especially when dealing with accumulated summaries across multiple patient admissions. Large Language Models (LLMs) show promise in addressing this challenge by efficiently analyzing vast and complex data. Existing benchmarks, however, fall short in properly evaluating LLMs' capabilities in this context, as they typically focus on single-note information or limited topics, failing to reflect the real-world inquiries required by clinicians. To bridge this gap, we introduce EHRNoteQA, a novel benchmark built on the MIMIC-IV EHR, comprising 962 different QA pairs each linked to distinct patients' discharge summaries. Every QA pair is initially generated using GPT-4 and then manually reviewed and refined by three clinicians to ensure clinical relevance. EHRNoteQA includes questions that require information across multiple discharge summaries and covers eight diverse topics, mirroring the complexity and diversity of real clinical inquiries. We offer EHRNoteQA in two formats: open-ended and multi-choice question answering, and propose a reliable evaluation method for each. We evaluate 27 LLMs using EHRNoteQA and examine various factors affecting the model performance (e.g., the length and number of discharge summaries). Furthermore, to validate EHRNoteQA as a reliable proxy for expert evaluations in clinical practice, we measure the correlation between the LLM performance on EHRNoteQA, and the LLM performance manually evaluated by clinicians. Results show that LLM performance on EHRNoteQA have higher correlation with clinician-evaluated performance (Spearman: 0.78, Kendall: 0.62) compared to other benchmarks, demonstrating its practical relevance in evaluating LLMs in clinical settings. EHRNoteQA will be publicly available to support further research and improve LLM evaluation in clinical practice. EHRNoteQA is publicly available under PhysioNet credential access at https://doi.org/10.13026/acga-ht95, and the code is available at https://github.com/ji-youn-kim/EHRNoteQA.", "keywords": "clinical;ehr;emr;llm", "primary_area": "", "supplementary_material": "", "author": "Sunjun Kweon;Jiyoun Kim;Heeyoung Kwak;Dongchul Cha;Hangyul Yoon;Kwang Hyun Kim;Jeewon Yang;Seunghyun Won;Edward Choi", "authorids": "~Sunjun_Kweon1;~Jiyoun_Kim1;~Heeyoung_Kwak1;~Dongchul_Cha1;~Hangyul_Yoon1;~Kwang_Hyun_Kim1;~Jeewon_Yang1;~Seunghyun_Won1;~Edward_Choi1", "gender": "M;F;F;M;M;M;;F;M", "homepage": ";;;;;;;;http://mp2893.com", "dblp": ";;262/3416.html;;;;;;41/3886", "google_scholar": "mKFQKpwAAAAJ;XBQtxfMAAAAJ;;;;ask0YzwAAAAJ;;;GUlGIPkAAAAJ", "orcid": ";;;0000-0002-0043-5026;0000-0002-9515-1623;0000-0002-6264-5109;;0000-0003-1764-7890;", "linkedin": ";jiyoun-kim-492729149/;;;hangyul-yoon-a10838203/;;;;", "or_profile": "~Sunjun_Kweon1;~Jiyoun_Kim1;~Heeyoung_Kwak1;~Dongchul_Cha1;~Hangyul_Yoon1;~Kwang_Hyun_Kim1;~Jeewon_Yang1;~Seunghyun_Won1;~Edward_Choi1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;NAVER AI Lab;NAVER;Korea Advanced Institute of Science and Technology (KAIST);Ewha Women's University;;Seoul National University Bundang Hospital;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr;navercorp.com;navercorp.com;kaist.ac.kr;ewha.ac.kr;;snubh.org;kaist.ac.kr", "position": "PhD student;PhD student;Researcher;Researcher;PhD student;Associate Professor;;Research assistant professor;Associate Professor", "bibtex": "@inproceedings{\nkweon2024ehrnoteqa,\ntitle={{EHRN}ote{QA}: An {LLM} Benchmark for Real-World Clinical Practice Using Discharge Summaries},\nauthor={Sunjun Kweon and Jiyoun Kim and Heeyoung Kwak and Dongchul Cha and Hangyul Yoon and Kwang Hyun Kim and Jeewon Yang and Seunghyun Won and Edward Choi},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=XrKhwfPmyI}\n}", "github": "", "reviewers": "XMWc;2Hp9;VCg7;gAX8", "pdf_size": 1582098, "rating": "6;7;8;9", "confidence": "4;3;4;4", "wc_summary_and_contributions": "72;70;37;112", "wc_strengths": "96;53;19;51", "wc_improvement": "501;188;6;28", "wc_limitations": "16;90;6;1", "wc_correctness": "17;7;4;1", "wc_clarity": "6;11;5;1", "wc_relation_to_prior_work": "39;5;4;1", "wc_documentation": "1;99;1;10", "wc_additional_feedback": "1;1;1;1", "wc_review": "749;524;83;206", "wc_reply_reviewers": "86;5;24;0", "wc_reply_authors": "67;0;0;0", "reply_reviewers": "3;1;1;0", "reply_authors": "12;5;2;1", "rating_avg": [ 7.5, 1.118033988749895 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 72.75, 26.58359456506964 ], "wc_strengths_avg": [ 54.75, 27.371289702898547 ], "wc_improvement_avg": [ 180.75, 197.78950300761667 ], "wc_limitations_avg": [ 28.25, 36.05811281806079 ], "wc_correctness_avg": [ 7.25, 6.015604707757983 ], "wc_clarity_avg": [ 5.75, 3.5619517121937516 ], "wc_relation_to_prior_work_avg": [ 12.25, 15.514106484100203 ], "wc_documentation_avg": [ 27.75, 41.299969733645085 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 390.5, 262.17408338735544 ], "wc_reply_reviewers_avg": [ 28.75, 34.24452510986245 ], "wc_reply_authors_avg": [ 16.75, 29.011851026778693 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 5.0, 4.301162633521313 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.2581988897471611, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16967189775004303117&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "kaist.ac.kr;kaist.ac.kr;navercorp.com;navercorp.com;kaist.ac.kr;ewha.ac.kr;;snubh.org;kaist.ac.kr", "author_num": 9, "aff_unique_index": "0;0;1;1;0;2;3;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;NAVER Corporation;Ewha Women's University;Seoul National University", "aff_unique_dep": ";NAVER AI Lab;;Hospital", "aff_unique_url": "https://www.kaist.ac.kr;https://www.naver.com;http://www.ewha.ac.kr;https://www.snuh.org", "aff_unique_abbr": "KAIST;NAVER;EWU;SNUH", "aff_campus_unique_index": "1", "aff_campus_unique": ";Bundang", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Adaptive Sampling for Efficient Softmax Approximation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94739", "id": "XsNA2b8GPz", "proceeding": "", "pdf": "https://openreview.net/pdf?id=XsNA2b8GPz", "openreview": "https://openreview.net/forum?id=XsNA2b8GPz", "poster": "", "project": "", "author_site": "Tavor Baharav, Ryan Kang, Colin Sullivan, Mo Tiwari, Eric Luxenberg, David Tse, Mert Pilanci", "tldr": "", "abstract": "The softmax function is ubiquitous in machine learning and optimization applications. Computing the full softmax evaluation of a matrix-vector product can be computationally expensive in high-dimensional settings. In many applications, however, it is sufficient to calculate only the top few outputs of the softmax function. In this work, we present an algorithm, dubbed AdaptiveSoftmax, that adaptively computes the top k softmax values more efficiently than the full softmax computation, with probabilistic guarantees. We demonstrate the sample efficiency improvements afforded by AdaptiveSoftmax on real and synthetic data to corroborate our theoretical results. AdaptiveSoftmax yields >10x gain over full softmax computation on most datasets, yielding up to 30x improvement for Mistral7B evaluated on the Wikitext dataset. The adaptive method we propose for estimating the partition function (the softmax denominator) is of independent interest and can be used in other applications such as kernel density estimation.", "keywords": "Multi-armed bandits;adaptive;softmax;attention", "primary_area": "bandits", "supplementary_material": "/attachment/bbe5733bb8ed844730b484b89994ba1101b7ce24.zip", "author": "Tavor Baharav;Ryan Kang;Colin Sullivan;Mo Tiwari;Eric Sager Luxenberg;David Tse;Mert Pilanci", "authorids": "~Tavor_Baharav1;~Ryan_Kang1;~Colin_Sullivan1;~Mo_Tiwari1;~Eric_Sager_Luxenberg1;~David_Tse1;~Mert_Pilanci3", "gender": "M;M;M;;M;;M", "homepage": "http://www.tavorb.com;;;http://www.motiwari.com/;https://www.ericluxenberg.com/;;https://stanford.edu/~pilanci/", "dblp": "242/8888;https://dblp.org/rec/conf/nips/TiwariKLPSTZ22.html;;267/5421;324/5004;t/DavidNCTse;45/8056", "google_scholar": "7HPdnqEAAAAJ;;;https://scholar.google.com/citations?hl=en;VqLB-78AAAAJ;;aSAS-aAAAAAJ", "orcid": "0000-0001-8924-0243;;;;0000-0003-4943-8727;;", "linkedin": "tavorb/;ryan-kang-554819221/;colin-sullivan-0b636a198/;motiwari;;;mert-pilanci-ba615743/", "or_profile": "~Tavor_Baharav1;~Ryan_Kang1;~Colin_Sullivan1;~Mo_Tiwari1;~Eric_Sager_Luxenberg1;~David_Tse1;~Mert_Pilanci3", "aff": "Dana Farber Cancer Institute;Stanford University;Stanford University;OpenAI;Stanford University;University of California-Berkeley;Stanford University", "aff_domain": "dfci.harvard.edu;stanford.edu;cs.stanford.edu;openai.com;stanford.edu;;stanford.edu", "position": "Postdoc;MS student;MS student;Member of Technical Staff;PhD student;;Assistant Professor", "bibtex": "@inproceedings{\nbaharav2024adaptive,\ntitle={Adaptive Sampling for Efficient Softmax Approximation},\nauthor={Tavor Baharav and Ryan Kang and Colin Sullivan and Mo Tiwari and Eric Sager Luxenberg and David Tse and Mert Pilanci},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=XsNA2b8GPz}\n}", "github": "", "reviewers": "wft8;qEfP;kEa9;qUNV", "pdf_size": 651843, "rating": "5;5;7;7", "confidence": "1;3;2;3", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;2;4;2", "wc_summary": "117;65;73;99", "wc_strengths": "99;63;153;103", "wc_weaknesses": "95;145;67;125", "wc_questions": "44;2;57;7", "wc_limitations": "17;7;57;10", "wc_review": "372;282;407;344", "wc_reply_reviewers": "6;0;20;23", "wc_reply_authors": "116;64;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "3;2;1;1", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 2.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 88.5, 20.706279240848655 ], "wc_strengths_avg": [ 104.5, 32.04293994002423 ], "wc_weaknesses_avg": [ 108.0, 29.614185789921695 ], "wc_questions_avg": [ 27.5, 23.521266972678152 ], "wc_limitations_avg": [ 22.75, 20.104414938017968 ], "wc_review_avg": [ 351.25, 45.7896003476772 ], "wc_reply_reviewers_avg": [ 12.25, 9.54921462739214 ], "wc_reply_authors_avg": [ 45.0, 48.61069841094653 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:uiNB5-7AbcMJ:scholar.google.com/&scioq=Adaptive+Sampling+for+Efficient+Softmax+Approximation&hl=en&as_sdt=0,44", "gs_version_total": 0, "email": "dfci.harvard.edu;stanford.edu;cs.stanford.edu;openai.com;stanford.edu;;stanford.edu", "author_num": 7, "aff_unique_index": "0;1;1;2;1;3;1", "aff_unique_norm": "Dana-Farber Cancer Institute;Stanford University;OpenAI;University of California, Berkeley", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.dana-farber.org;https://www.stanford.edu;https://openai.com;https://www.berkeley.edu", "aff_unique_abbr": "DFCI;Stanford;OpenAI;UC Berkeley", "aff_campus_unique_index": "1;1;1;2;1", "aff_campus_unique": ";Stanford;Berkeley", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "MoME: Mixture of Multimodal Experts for Generalist Multimodal Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94738", "id": "Xskl7Da34U", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Xskl7Da34U", "openreview": "https://openreview.net/forum?id=Xskl7Da34U", "poster": "/media/PosterPDFs/NeurIPS%202024/94738.png?t=1731481734.7584405", "project": "", "author_site": "Leyang Shen, Gongwei Chen, Rui Shao, Weili Guan, Liqiang Nie", "tldr": "", "abstract": "Multimodal large language models (MLLMs) have demonstrated impressive capabilities across various vision-language tasks. However, a generalist MLLM typically underperforms compared with a specialist MLLM on most VL tasks, which can be attributed to task interference. In this paper, we propose a mixture of multimodal experts (MoME) to mitigate task interference and obtain a generalist MLLM. Our MoME is composed of two key components, a mixture of vision experts (MoVE) and a mixture of language experts (MoLE). MoVE can adaptively modulate the features transformed from various vision encoders, and has a strong compatibility in transformation architecture. MoLE incorporates sparsely gated experts into LLMs to achieve painless improvements with roughly unchanged inference costs. In response to task interference, our MoME specializes in both vision and language modality to adapt to task discrepancies. Extensive experiments show that MoME significantly improves the performance of generalist MLLMs across various VL tasks.", "keywords": "Multimodal Large Language Model;Mixture of Experts", "primary_area": "generative_models", "supplementary_material": "", "author": "Leyang Shen;Gongwei Chen;Rui Shao;Weili Guan;Liqiang Nie", "authorids": "~Leyang_Shen1;~Gongwei_Chen1;~Rui_Shao1;~Weili_Guan4;~Liqiang_Nie2", "gender": ";M;M;;M", "homepage": ";;https://rshaojimmy.github.io/;;https://liqiangnie.github.io/index.html", "dblp": ";237/9231;;;92/8277", "google_scholar": ";Mpg0w3cAAAAJ;https://scholar.google.com/citations?hl=en;;yywVMhUAAAAJ", "orcid": ";0000-0002-0634-6075;0000-0003-0090-9604;;0000-0003-1476-0273", "linkedin": ";;;;", "or_profile": "~Leyang_Shen1;~Gongwei_Chen1;~Rui_Shao1;~Weili_Guan4;~Liqiang_Nie2", "aff": ";Harbin Institute of Technology;Harbin Institute of Technology;;Shandong University", "aff_domain": ";hit.edu.cn;hit.edu.cn;;sdu.edu.cn", "position": ";Postdoc;Full Professor;;Full Professor", "bibtex": "@inproceedings{\nshen2024mome,\ntitle={Mo{ME}: Mixture of Multimodal Experts for Generalist Multimodal Large Language Models},\nauthor={Leyang Shen and Gongwei Chen and Rui Shao and Weili Guan and Liqiang Nie},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Xskl7Da34U}\n}", "github": "", "reviewers": "cdtp;ZwrT;1g6K", "pdf_size": 7584047, "rating": "4;5;6", "confidence": "4;4;3", "soundness": "3;2;3", "novelty": "1;2;3", "presentation": "3;3;3", "wc_summary": "61;37;74", "wc_strengths": "31;62;49", "wc_weaknesses": "202;346;68", "wc_questions": "60;6;20", "wc_limitations": "10;13;14", "wc_review": "364;464;225", "wc_reply_reviewers": "79;27;15", "wc_reply_authors": "617;19;19", "reply_reviewers": "1;1;1", "reply_authors": "4;2;2", "rating_avg": [ 5.0, 0.816496580927726 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.0, 0.816496580927726 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 57.333333333333336, 15.326085243430198 ], "wc_strengths_avg": [ 47.333333333333336, 12.710450643291745 ], "wc_weaknesses_avg": [ 205.33333333333334, 113.517497427587 ], "wc_questions_avg": [ 28.666666666666668, 22.88133640230735 ], "wc_limitations_avg": [ 12.333333333333334, 1.699673171197595 ], "wc_review_avg": [ 351.0, 98.00340130151946 ], "wc_reply_reviewers_avg": [ 40.333333333333336, 27.776888874666213 ], "wc_reply_authors_avg": [ 218.33333333333334, 281.8999034330369 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.6666666666666665, 0.9428090415820634 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4056854533909608333&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": ";hit.edu.cn;hit.edu.cn;;sdu.edu.cn", "author_num": 5, "aff_unique_index": "0;0;1", "aff_unique_norm": "Harbin Institute of Technology;Shandong University", "aff_unique_dep": ";", "aff_unique_url": "http://www.hit.edu.cn/;http://www.sdu.edu.cn", "aff_unique_abbr": "HIT;SDU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Harbin;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Unraveling the Gradient Descent Dynamics of Transformers", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94737", "id": "XswQeLjJo5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=XswQeLjJo5", "openreview": "https://openreview.net/forum?id=XswQeLjJo5", "poster": "", "project": "", "author_site": "Bingqing Song, Boran Han, Shuai Zhang, Jie Ding, Mingyi Hong", "tldr": "", "abstract": "While the Transformer architecture has achieved remarkable success across various domains, a thorough theoretical foundation explaining its optimization dynamics is yet to be fully developed. In this study, we aim to bridge this understanding gap by answering the following two core questions: (1) Which types of Transformer architectures allow Gradient Descent (GD) to achieve guaranteed convergence? and (2) Under what initial conditions and architectural specifics does the Transformer achieve rapid convergence during training? By analyzing the loss landscape of a single Transformer layer using Softmax and Gaussian attention kernels, our work provides concrete answers to these questions. Our findings demonstrate that, with appropriate weight initialization, GD can train a Transformer model (with either kernel type) to achieve a global optimal solution, especially when the input embedding dimension is large. Nonetheless, certain scenarios highlight potential pitfalls: training a Transformer using the Softmax attention kernel may sometimes lead to suboptimal local solutions. In contrast, the Gaussian attention kernel exhibits a much favorable behavior. Our empirical study further validate the theoretical findings.", "keywords": "Optimization;Transformer", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Bingqing Song;Boran Han;Shuai Zhang;Jie Ding;Mingyi Hong", "authorids": "~Bingqing_Song1;~Boran_Han1;~Shuai_Zhang7;~Jie_Ding2;~Mingyi_Hong1", "gender": "F;;;M;M", "homepage": ";;;http://jding.org;http://people.ece.umn.edu/~mhong/mingyi.html", "dblp": ";;;94/1825-2;57/8053", "google_scholar": ";;;ZyqvoqcAAAAJ;qRnP-p0AAAAJ", "orcid": ";;;;", "linkedin": "https://cn.linkedin.com/in/bingqing\uff08celeste\uff09-song-2a03ab158;;;;", "or_profile": "~Bingqing_Song1;~Boran_Han1;~Shuai_Zhang7;~Jie_Ding2;~Mingyi_Hong1", "aff": "University of Minnesota, Minneapolis;;;University of Minnesota - Twin Cities;University of Minnesota, Minneapolis", "aff_domain": "umn.edu;;;umn.edu;umn.edu", "position": "PhD student;;;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nsong2024unraveling,\ntitle={Unraveling the Gradient Descent Dynamics of Transformers},\nauthor={Bingqing Song and Boran Han and Shuai Zhang and Jie Ding and Mingyi Hong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=XswQeLjJo5}\n}", "github": "", "reviewers": "ynwg;mp2C;cxoC", "pdf_size": 1119302, "rating": "5;6;6", "confidence": "3;2;3", "soundness": "3;3;3", "novelty": "2;3;2", "presentation": "3;3;1", "wc_summary": "42;62;61", "wc_strengths": "29;66;112", "wc_weaknesses": "62;78;130", "wc_questions": "161;70;3", "wc_limitations": "6;42;46", "wc_review": "300;318;352", "wc_reply_reviewers": "36;31;168", "wc_reply_authors": "216;0;486", "reply_reviewers": "2;1;2", "reply_authors": "2;1;2", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.9428090415820634 ], "wc_summary_avg": [ 55.0, 9.201449161228174 ], "wc_strengths_avg": [ 69.0, 33.95094500402996 ], "wc_weaknesses_avg": [ 90.0, 29.028721409436322 ], "wc_questions_avg": [ 78.0, 64.75080437080814 ], "wc_limitations_avg": [ 31.333333333333332, 17.98765008430939 ], "wc_review_avg": [ 323.3333333333333, 21.561282171728305 ], "wc_reply_reviewers_avg": [ 78.33333333333333, 63.436757658491835 ], "wc_reply_authors_avg": [ 234.0, 198.8164983093707 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4793824642338011129&as_sdt=5,24&sciodt=0,24&hl=en", "gs_version_total": 7, "email": "umn.edu;;;umn.edu;umn.edu", "author_num": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Minnesota", "aff_unique_dep": "", "aff_unique_url": "https://www.minnesota.edu", "aff_unique_abbr": "UMN", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Minneapolis;Twin Cities", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "BitDelta: Your Fine-Tune May Only Be Worth One Bit", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94736", "id": "XuWWq3gy7W", "proceeding": "", "pdf": "https://openreview.net/pdf?id=XuWWq3gy7W", "openreview": "https://openreview.net/forum?id=XuWWq3gy7W", "poster": "/media/PosterPDFs/NeurIPS%202024/94736.png?t=1733721646.233329", "project": "", "author_site": "James Liu, Guangxuan Xiao, Kai Li, Jason Lee, Song Han, Tri Dao, Tianle Cai", "tldr": "", "abstract": "Large Language Models (LLMs) are typically trained in two phases: pre-training on large internet-scale datasets, and fine-tuning for downstream tasks. Given the higher computational demand of pre-training, it is intuitive to assume that fine-tuning adds less new information to the model, and is thus more compressible. We explore this assumption by decomposing the weights of fine-tuned models into their pre-trained components and an additional delta. We introduce a simple method, BitDelta, which successfully quantizes this delta down to 1 bit without compromising performance. This interesting finding not only highlights the potential redundancy of information added during fine-tuning, but also has significant implications for the multi-tenant serving and multi-tenant storage of fine-tuned models. By enabling the use of a single high-precision base model accompanied by multiple 1-bit deltas, BitDelta dramatically reduces GPU memory requirements by more than 10x, thus reducing per-user generation latency by more than 10x in multi-tenant settings. We validate BitDelta through experiments across Llama-2, Mistral and MPT model families, and on models up to 70B parameters, showcasing minimal performance degradation in all tested settings.", "keywords": "Large Language Models;Quantization;LLM quantization;efficient inference;multi-tenancy", "primary_area": "optimization_for_deep_networks", "supplementary_material": "/attachment/be6a92bc2dcca16763aeef8e92c9e4a3812b63c6.zip", "author": "James Liu;Guangxuan Xiao;Kai Li;Jason D. Lee;Song Han;Tri Dao;Tianle Cai", "authorids": "~James_Liu3;~Guangxuan_Xiao1;~Kai_Li8;~Jason_D._Lee1;~Song_Han5;~Tri_Dao1;~Tianle_Cai1", "gender": "M;;M;M;;;M", "homepage": ";;https://www.cs.princeton.edu/~li/;https://jasondlee88.github.io/;;https://tridao.me/;https://tianle.website", "dblp": ";;l/KaiLi1.html;88/3262;;206/7018;241/9458", "google_scholar": ";;9MSpWOUAAAAJ;GR_DsT0AAAAJ;;NQRw0bQAAAAJ;CvwLRSMAAAAJ", "orcid": ";;;;;;", "linkedin": "james-liu-000/;;;;;;", "or_profile": "~James_Liu3;~Guangxuan_Xiao1;~Kai_Li8;~Jason_D._Lee1;~Song_Han5;~Tri_Dao1;~Tianle_Cai1", "aff": "Massachusetts Institute of Technology;;Princeton University;Princeton University;;Princeton University;Princeton University", "aff_domain": "mit.edu;;princeton.edu;princeton.edu;;princeton.edu;princeton.edu", "position": "Undergrad student;;Full Professor;Assistant Professor;;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nliu2024bitdelta,\ntitle={BitDelta: Your Fine-Tune May Only Be Worth One Bit},\nauthor={James Liu and Guangxuan Xiao and Kai Li and Jason D. Lee and Song Han and Tri Dao and Tianle Cai},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=XuWWq3gy7W}\n}", "github": "", "reviewers": "b1KL;dvPC;dXr8;dEGq", "pdf_size": 2986167, "rating": "4;5;6;7", "confidence": "3;5;4;4", "soundness": "2;3;2;3", "novelty": "2;2;3;3", "presentation": "3;3;4;4", "wc_summary": "87;41;42;69", "wc_strengths": "45;30;39;83", "wc_weaknesses": "180;159;113;45", "wc_questions": "15;142;110;58", "wc_limitations": "15;6;17;15", "wc_review": "342;378;321;270", "wc_reply_reviewers": "46;0;66;16", "wc_reply_authors": "95;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 59.75, 19.330998422223306 ], "wc_strengths_avg": [ 49.25, 20.20365066021485 ], "wc_weaknesses_avg": [ 124.25, 51.77535610693566 ], "wc_questions_avg": [ 81.25, 48.59719642119286 ], "wc_limitations_avg": [ 13.25, 4.264680527307995 ], "wc_review_avg": [ 327.75, 39.07924641033908 ], "wc_reply_reviewers_avg": [ 32.0, 25.65151067676132 ], "wc_reply_authors_avg": [ 23.75, 41.13620667976084 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.3162277660168379, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16080206753684160236&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 5, "email": "mit.edu;;princeton.edu;princeton.edu;;princeton.edu;princeton.edu", "author_num": 7, "aff_unique_index": "0;1;1;1;1", "aff_unique_norm": "Massachusetts Institute of Technology;Princeton University", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.princeton.edu", "aff_unique_abbr": "MIT;Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Pedestrian Trajectory Prediction with Missing Data: Datasets, Imputation, and Benchmarking", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97642", "id": "XukWe15QCi", "proceeding": "", "pdf": "https://openreview.net/pdf?id=XukWe15QCi", "openreview": "https://openreview.net/forum?id=XukWe15QCi", "poster": "/media/PosterPDFs/NeurIPS%202024/97642.png?t=1733485827.3751984", "project": "", "author_site": "Pranav Singh Chib, Pravendra Singh", "tldr": "", "abstract": "Pedestrian trajectory prediction is crucial for several applications such as robotics and self-driving vehicles. Significant progress has been made in the past decade thanks to the availability of pedestrian trajectory datasets, which enable trajectory prediction methods to learn from pedestrians' past movements and predict future trajectories. However, these datasets and methods typically assume that the observed trajectory sequence is complete, ignoring real-world issues such as sensor failure, occlusion, and limited fields of view that can result in missing values in observed trajectories. To address this challenge, we present TrajImpute, a pedestrian trajectory prediction dataset that simulates missing coordinates in the observed trajectory, enhancing real-world applicability. TrajImpute maintains a uniform distribution of missing data within the observed trajectories. In this work, we comprehensively examine several imputation methods to reconstruct the missing coordinates and benchmark them for imputing pedestrian trajectories. Furthermore, we provide a thorough analysis of recent trajectory prediction methods and evaluate the performance of these models on the imputed trajectories. Our experimental evaluation of the imputation and trajectory prediction methods offers several valuable insights. Our dataset provides a foundational resource for future research on imputation-aware pedestrian trajectory prediction, potentially accelerating the deployment of these methods in real-world applications. Publicly accessible links to the datasets and code files are available at https://github.com/Pranav-chib/TrajImpute.", "keywords": "Trajectory prediction", "primary_area": "", "supplementary_material": "/attachment/72180dc7c54890822edd4a2b568e121350c3f333.pdf", "author": "Pranav singh chib;Pravendra Singh", "authorids": "~Pranav_singh_chib1;~Pravendra_Singh1", "gender": ";M", "homepage": ";https://sites.google.com/view/pravendra/", "dblp": ";160/8743", "google_scholar": ";YwDTxJMAAAAJ", "orcid": ";0000-0003-1001-2219", "linkedin": ";", "or_profile": "~Pranav_singh_chib1;~Pravendra_Singh1", "aff": ";Indian Institute of Technology, Roorkee", "aff_domain": ";iitr.ac.in", "position": ";Assistant Professor", "bibtex": "@inproceedings{\nchib2024pedestrian,\ntitle={Pedestrian Trajectory Prediction with Missing Data: Datasets, Imputation, and Benchmarking},\nauthor={Pranav singh chib and Pravendra Singh},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=XukWe15QCi}\n}", "github": "", "reviewers": "o6nr;oGiB;jgDP;ZnSu", "pdf_size": 2722237, "rating": "4;6;7;8", "confidence": "4;4;4;4", "wc_summary_and_contributions": "32;80;72;40", "wc_strengths": "34;92;66;19", "wc_improvement": "35;90;65;22", "wc_limitations": "41;33;63;1", "wc_correctness": "34;28;47;3", "wc_clarity": "1;19;6;6", "wc_relation_to_prior_work": "25;26;28;1", "wc_documentation": "1;18;15;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "204;387;363;94", "wc_reply_reviewers": "0;13;0;0", "wc_reply_authors": "72;112;40;72", "reply_reviewers": "0;1;0;0", "reply_authors": "2;4;3;2", "rating_avg": [ 6.25, 1.479019945774904 ], "confidence_avg": [ 4.0, 0.0 ], "wc_summary_and_contributions_avg": [ 56.0, 20.396078054371138 ], "wc_strengths_avg": [ 52.75, 28.314086600135983 ], "wc_improvement_avg": [ 53.0, 26.448062310876388 ], "wc_limitations_avg": [ 34.5, 22.242976419535225 ], "wc_correctness_avg": [ 28.0, 15.98436736314578 ], "wc_clarity_avg": [ 8.0, 6.670832032063167 ], "wc_relation_to_prior_work_avg": [ 20.0, 11.022703842524301 ], "wc_documentation_avg": [ 8.75, 7.8222439235810075 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 262.0, 119.80609333418731 ], "wc_reply_reviewers_avg": [ 3.25, 5.629165124598851 ], "wc_reply_authors_avg": [ 74.0, 25.534290669607408 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12206687160097624682&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": ";iitr.ac.in", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Indian Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.iitr.ac.in", "aff_unique_abbr": "IIT Roorkee", "aff_campus_unique_index": "0", "aff_campus_unique": "Roorkee", "aff_country_unique_index": "0", "aff_country_unique": "India" }, { "title": "SafeSora: Towards Safety Alignment of Text2Video Generation via a Human Preference Dataset", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97641", "id": "XvHdPiKy6c", "proceeding": "", "pdf": "https://openreview.net/pdf?id=XvHdPiKy6c", "openreview": "https://openreview.net/forum?id=XvHdPiKy6c", "poster": "/media/PosterPDFs/NeurIPS%202024/97641.png?t=1731615461.5732005", "project": "", "author_site": "Juntao Dai, Tianle Chen, Xuyao Wang, Ziran Yang, Taiye Chen, Jiaming Ji, Yaodong Yang", "tldr": "", "abstract": "To mitigate the risk of harmful outputs from large vision models (LVMs), we introduce the *SafeSora* dataset to promote research on aligning text-to-video generation with human values. This dataset encompasses human preferences in text-to-video generation tasks along two primary dimensions: helpfulness and harmlessness. To capture in-depth human preferences and facilitate structured reasoning by crowdworkers, we subdivide helpfulness into 4 sub-dimensions and harmlessness into 12 sub-categories, serving as the basis for pilot annotations. The *SafeSora* dataset includes 14,711 unique prompts, 57,333 unique videos generated by 4 distinct LVMs, and 51,691 pairs of preference annotations labeled by humans. We further demonstrate the utility of the *SafeSora* dataset through several applications, including training the text-video moderation model and aligning LVMs with human preference by fine-tuning a prompt augmentation module or the diffusion model. These applications highlight its potential as the foundation for text-to-video alignment research, such as human preference modeling and the development and validation of alignment algorithms. Our project is available at https://sites.google.com/view/safe-sora.\n\nWarning: this paper contains example data that may be offensive or harmful.", "keywords": "alignment;text-to-video generation;large language model;large vision model", "primary_area": "", "supplementary_material": "", "author": "Josef Dai;Tianle Chen;Xuyao Wang;Ziran Yang;Taiye Chen;Jiaming Ji;Yaodong Yang", "authorids": "~Josef_Dai1;~Tianle_Chen4;~Xuyao_Wang1;~Ziran_Yang1;~Taiye_Chen1;~Jiaming_Ji2;~Yaodong_Yang1", "gender": "M;M;M;M;M;M;M", "homepage": "https://github.com/linghaiCTL;;https://ziranyang0.github.io;https://github.com/yeyutaihan;https://jijiaming.com;https://www.yangyaodong.com;https://github.com/calico-1226", "dblp": ";;358/4441;;313/9356.html;170/1496-1;359/3349", "google_scholar": ";;_k1opxYAAAAJ;;aW8WbYYAAAAJ;https://scholar.google.co.uk/citations?user=6yL0xw8AAAAJ;", "orcid": ";0009-0003-0474-367X;;;;0000-0001-8132-5613;", "linkedin": ";;;;;yaodong-yang;", "or_profile": "~Tianle_Chen4;~Xuyao_Wang1;~Ziran_Yang1;~Taiye_Chen1;~Jiaming_Ji2;~Yaodong_Yang1;~Juntao_Dai2", "aff": "Peking University;Nankai University;Peking University;Peking University;Peking University;Peking University;Peking University", "aff_domain": "stu.pku.edu.cn;nankai.edu.cn;pku.edu.cn;stu.pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn", "position": "Undergrad student;Undergrad student;Undergrad student;Undergrad student;PhD student;Assistant Professor;Researcher", "bibtex": "@inproceedings{\ndai2024safesora,\ntitle={SafeSora: Towards Safety Alignment of Text2Video Generation via a Human Preference Dataset},\nauthor={Josef Dai and Tianle Chen and Xuyao Wang and Ziran Yang and Taiye Chen and Jiaming Ji and Yaodong Yang},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=XvHdPiKy6c}\n}", "github": "", "reviewers": "53du;XZhL;3Hpk;k4id", "pdf_size": 18374641, "rating": "6;7;7;8", "confidence": "5;4;4;5", "wc_summary_and_contributions": "39;59;79;38", "wc_strengths": "48;134;224;52", "wc_improvement": "256;101;81;155", "wc_limitations": "23;36;116;4", "wc_correctness": "7;2;19;5", "wc_clarity": "1;2;41;13", "wc_relation_to_prior_work": "1;17;20;5", "wc_documentation": "1;7;10;17", "wc_additional_feedback": "1;1;1;1", "wc_review": "377;359;591;290", "wc_reply_reviewers": "67;24;0;0", "wc_reply_authors": "494;42;49;49", "reply_reviewers": "1;1;0;0", "reply_authors": "4;2;2;2", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 4.5, 0.5 ], "wc_summary_and_contributions_avg": [ 53.75, 16.813313177360374 ], "wc_strengths_avg": [ 114.5, 71.93573520858739 ], "wc_improvement_avg": [ 148.25, 67.8431094511447 ], "wc_limitations_avg": [ 44.75, 42.681231238098086 ], "wc_correctness_avg": [ 8.25, 6.456585785072479 ], "wc_clarity_avg": [ 14.25, 16.145819892467525 ], "wc_relation_to_prior_work_avg": [ 10.75, 7.949056547792323 ], "wc_documentation_avg": [ 8.75, 5.7608593109014565 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 404.25, 112.60411848595948 ], "wc_reply_reviewers_avg": [ 22.75, 27.36215452043205 ], "wc_reply_authors_avg": [ 158.5, 193.7220947646396 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9676846022740347288&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "stu.pku.edu.cn;nankai.edu.cn;pku.edu.cn;stu.pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn", "author_num": 7, "aff_unique_index": "0;1;0;0;0;0;0", "aff_unique_norm": "Peking University;Nankai University", "aff_unique_dep": ";", "aff_unique_url": "http://www.pku.edu.cn;http://www.nankai.edu.cn", "aff_unique_abbr": "Peking U;NKU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Hierarchical Hybrid Sliced Wasserstein: A Scalable Metric for Heterogeneous Joint Distributions", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94735", "id": "XwrMd1njqq", "proceeding": "", "pdf": "https://openreview.net/pdf?id=XwrMd1njqq", "openreview": "https://openreview.net/forum?id=XwrMd1njqq", "poster": "/media/PosterPDFs/NeurIPS%202024/94735.png?t=1730569530.989694", "project": "", "author_site": "Khai Nguyen, Nhat Ho", "tldr": "", "abstract": "Sliced Wasserstein (SW) and Generalized Sliced Wasserstein (GSW) have been widely used in applications due to their computational and statistical scalability. However, the SW and the GSW are only defined between distributions supported on a homogeneous domain. This limitation prevents their usage in applications with heterogeneous joint distributions with marginal distributions supported on multiple different domains. Using SW and GSW directly on the joint domains cannot make a meaningful comparison since their homogeneous slicing operator, i.e., Radon Transform (RT) and Generalized Radon Transform (GRT) are not expressive enough to capture the structure of the joint supports set. To address the issue, we propose two new slicing operators, i.e., Partial Generalized Radon Transform (PGRT) and Hierarchical Hybrid Radon Transform (HHRT). In greater detail, PGRT is the generalization of Partial Radon Transform (PRT), which transforms a subset of function arguments non-linearly while HHRT is the composition of PRT and multiple domain-specific PGRT on marginal domain arguments. By using HHRT, we extend the SW into Hierarchical Hybrid Sliced Wasserstein (H2SW) distance which is designed specifically for comparing heterogeneous joint distributions. We then discuss the topological, statistical, and computational properties of H2SW. Finally, we demonstrate the favorable performance of H2SW in 3D mesh deformation, deep 3D mesh autoencoders, and datasets comparison.", "keywords": "Sliced Wasserstein;Heterogeneous Joint Distributions;Optimal Transport;3D shapes as points.", "primary_area": "machine_vision", "supplementary_material": "", "author": "Khai Nguyen;Nhat Ho", "authorids": "~Khai_Nguyen1;~Nhat_Ho1", "gender": "M;M", "homepage": "https://khainb.com;https://nhatptnk8912.github.io/", "dblp": "120/4308;203/4479", "google_scholar": "im5fNaQAAAAJ;https://scholar.google.ca/citations?user=Xs7cKMwAAAAJ", "orcid": ";", "linkedin": ";nhat-pham-minh-ho-267b8164/", "or_profile": "~Khai_Nguyen1;~Nhat_Ho1", "aff": "University of Texas, Austin;University of Texas, Austin", "aff_domain": "utexas.edu;utexas.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nnguyen2024hierarchical,\ntitle={Hierarchical Hybrid Sliced Wasserstein: A Scalable Metric for Heterogeneous Joint Distributions},\nauthor={Khai Nguyen and Nhat Ho},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=XwrMd1njqq}\n}", "github": "", "reviewers": "deuA;E3oU;XX3o;p3fW", "pdf_size": 34797453, "rating": "6;6;7;8", "confidence": "4;3;4;4", "soundness": "2;3;4;3", "novelty": "2;3;3;3", "presentation": "1;3;4;3", "wc_summary": "171;112;86;47", "wc_strengths": "162;48;72;26", "wc_weaknesses": "257;220;83;225", "wc_questions": "3;27;244;18", "wc_limitations": "1;9;1;23", "wc_review": "594;416;486;339", "wc_reply_reviewers": "0;0;109;11", "wc_reply_authors": "0;0;89;38", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;2;2", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 104.0, 45.07216435894775 ], "wc_strengths_avg": [ 77.0, 51.70106381884226 ], "wc_weaknesses_avg": [ 196.25, 66.90805257964097 ], "wc_questions_avg": [ 73.0, 99.09843591096683 ], "wc_limitations_avg": [ 8.5, 8.986100377805714 ], "wc_review_avg": [ 458.75, 93.81197951221368 ], "wc_reply_reviewers_avg": [ 30.0, 45.83121207212395 ], "wc_reply_authors_avg": [ 31.75, 36.51284020724764 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4775076734956023261&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "utexas.edu;utexas.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "MultiPull: Detailing Signed Distance Functions by Pulling Multi-Level Queries at Multi-Step", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94734", "id": "XxE8mL1bCO", "proceeding": "", "pdf": "https://openreview.net/pdf?id=XxE8mL1bCO", "openreview": "https://openreview.net/forum?id=XxE8mL1bCO", "poster": "", "project": "", "author_site": "Takeshi Noda, Chao Chen, Weiqi Zhang, Xinhai Liu, Yu-Shen Liu, Zhizhong Han", "tldr": "", "abstract": "Reconstructing a continuous surface from a raw 3D point cloud is a challenging task. Latest methods employ supervised learning or pretrained priors to learn a signed distance function (SDF). However, neural networks tend to smooth local details due to the lack of ground truth signed distnaces or normals, which limits the performance of learning-based methods in reconstruction tasks. To resolve this issue, we propose a novel method, named MultiPull, to learn multi-scale implicit fields from raw point clouds to optimize accurate SDFs from coarse to fine. We achieve this by mapping 3D query points into a set of frequency features, which makes it possible to leverage multi-level features during optimization. Meanwhile, we introduce optimization constraints from the perspective of spatial distance and normal consistency, which play a key role in point cloud reconstruction based on multi-scale optimization strategies. Our experiments on widely used object and scene benchmarks demonstrate that our method outperforms the state-of-the-art methods in surface reconstruction.", "keywords": "surface reconstruction;point cloud;SDF", "primary_area": "machine_vision", "supplementary_material": "/attachment/0e84c6dac4df60646268967320ca6382f65ec1b4.zip", "author": "Takeshi Noda;Chao Chen;Weiqi Zhang;Xinhai Liu;Yu-Shen Liu;Zhizhong Han", "authorids": "~Takeshi_Noda1;~Chao_Chen9;~Weiqi_Zhang2;~Xinhai_Liu2;~Yu-Shen_Liu1;~Zhizhong_Han2", "gender": "M;M;M;M;M;M", "homepage": "https://github.com/takeshie;;https://weiqi-zhang.github.io/;https://github.com/liuxinhai;https://yushen-liu.github.io/;https://h312h.github.io/", "dblp": ";;;50/7049.html;44/2229.html;166/5173", "google_scholar": "https://scholar.google.com.hk/citations?view_op=list_works;L8gyzsQAAAAJ;https://scholar.google.com.hk/citations?user=sp3zrnYAAAAJ;vg2IvzsAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0002-9916-662X;;0000-0003-4200-4862;0000-0001-7305-1915;", "linkedin": ";;;;;", "or_profile": "~Takeshi_Noda1;~Chao_Chen9;~Weiqi_Zhang2;~Xinhai_Liu2;~Yu-Shen_Liu1;~Zhizhong_Han2", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;Autonomous Driving Lab;Tsinghua University;Wayne State University", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;mails.tsinghua.edu.cn;tencent.com;tsinghua.edu.cn;wayne.edu", "position": "MS student;PhD student;MS student;Researcher;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nnoda2024multipull,\ntitle={MultiPull: Detailing Signed Distance Functions by Pulling Multi-Level Queries at Multi-Step},\nauthor={Takeshi Noda and Chao Chen and Weiqi Zhang and Xinhai Liu and Yu-Shen Liu and Zhizhong Han},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=XxE8mL1bCO}\n}", "github": "", "reviewers": "uTM5;VUUa;ZVf1;NaoM", "pdf_size": 13995653, "rating": "3;5;5;7", "confidence": "5;4;2;4", "soundness": "3;2;3;3", "novelty": "2;3;2;3", "presentation": "3;2;2;3", "wc_summary": "70;132;44;107", "wc_strengths": "29;89;27;143", "wc_weaknesses": "122;1012;102;285", "wc_questions": "42;24;81;85", "wc_limitations": "8;16;9;41", "wc_review": "271;1273;263;661", "wc_reply_reviewers": "0;142;13;37", "wc_reply_authors": "48;178;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 88.25, 33.751851801049376 ], "wc_strengths_avg": [ 72.0, 47.968739820845826 ], "wc_weaknesses_avg": [ 380.25, 371.5833520221271 ], "wc_questions_avg": [ 58.0, 25.836021365527625 ], "wc_limitations_avg": [ 18.5, 13.35102992281869 ], "wc_review_avg": [ 617.0, 411.4924057622449 ], "wc_reply_reviewers_avg": [ 48.0, 55.87038571551122 ], "wc_reply_authors_avg": [ 56.5, 72.83371472058802 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.3244428422615251, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6551448073137771887&as_sdt=20000005&sciodt=0,21&hl=en", "gs_version_total": 3, "email": "tsinghua.edu.cn;tsinghua.edu.cn;mails.tsinghua.edu.cn;tencent.com;tsinghua.edu.cn;wayne.edu", "author_num": 6, "aff_unique_index": "0;0;0;1;0;2", "aff_unique_norm": "Tsinghua University;Autonomous Driving Lab;Wayne State University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tsinghua.edu.cn;;https://wayne.edu", "aff_unique_abbr": "THU;;WSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;2", "aff_country_unique": "China;;United States" }, { "title": "TAIA: Large Language Models are Out-of-Distribution Data Learners", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94733", "id": "XxSME6GE1G", "proceeding": "", "pdf": "https://openreview.net/pdf?id=XxSME6GE1G", "openreview": "https://openreview.net/forum?id=XxSME6GE1G", "poster": "/media/PosterPDFs/NeurIPS%202024/94733.png?t=1729927848.8880424", "project": "", "author_site": "Shuyang Jiang, Yusheng Liao, Ya Zhang, Yanfeng Wang, Yu Wang", "tldr": "", "abstract": "Fine-tuning on task-specific question-answer pairs is a predominant method for enhancing the performance of instruction-tuned large language models (LLMs) on downstream tasks. However, in certain specialized domains, such as healthcare or harmless content generation, it is nearly impossible to obtain a large volume of high-quality data that matches the downstream distribution. To improve the performance of LLMs in data-scarce domains with domain-mismatched data, we re-evaluated the Transformer architecture and discovered that not all parameter updates during fine-tuning contribute positively to downstream performance. Our analysis reveals that within the self-attention and feed-forward networks, only the fine-tuned attention parameters are particularly beneficial when the training set's distribution does not fully align with the test set. Based on this insight, we propose an effective inference-time intervention method: \\uline{T}raining \\uline{A}ll parameters but \\uline{I}nferring with only \\uline{A}ttention (TAIA). We empirically validate TAIA using two general instruction-tuning datasets and evaluate it on seven downstream tasks involving math, reasoning, and knowledge understanding across LLMs of different parameter sizes and fine-tuning techniques. Our comprehensive experiments demonstrate that TAIA achieves superior improvements compared to both the fully fine-tuned model and the base model in most scenarios, with significant performance gains. The high tolerance of TAIA to data mismatches makes it resistant to jailbreaking tuning and enhances specialized tasks using general data. Code is available in \\url{https://github.com/pixas/TAIA_LLM}.", "keywords": "large language models;OOD generalization;supervised fine-tuning", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/60d3e99aea819889b124368da88cf40c52075941.zip", "author": "Shuyang Jiang;Yusheng Liao;Ya Zhang;Yanfeng Wang;Yu Wang", "authorids": "~Shuyang_Jiang2;~Yusheng_Liao1;~Ya_Zhang1;~Yanfeng_Wang1;~Yu_Wang40", "gender": "M;M;F;M;M", "homepage": ";;https://annzhanglion.github.io/;https://cmic.sjtu.edu.cn/wangyanfeng/;https://mediabrain.sjtu.edu.cn/yuwang/", "dblp": "153/1949;37/4774.html;85/3714-2;55/5407-1.html;02/5889-27.html", "google_scholar": "slwTiOUAAAAJ;ErjimggAAAAJ;pbjw9sMAAAAJ;https://scholar.google.com/citations?hl=zh-CN;", "orcid": ";0000-0001-7549-3944;0000-0002-5390-9053;0000-0002-3196-2347;0000-0001-9500-081X", "linkedin": "%E4%B9%A6%E6%B4%8B-%E6%B1%9F-b8288223a/;;;;", "or_profile": "~Shuyang_Jiang2;~Yusheng_Liao1;~Ya_Zhang1;~Yanfeng_Wang1;~Yu_Wang40", "aff": "Fudan University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "fudan.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "position": "PhD student;PhD student;Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\njiang2024taia,\ntitle={{TAIA}: Large Language Models are Out-of-Distribution Data Learners},\nauthor={Shuyang Jiang and Yusheng Liao and Ya Zhang and Yanfeng Wang and Yu Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=XxSME6GE1G}\n}", "github": "", "reviewers": "y3YK;VBCP;UWso;DXqC", "pdf_size": 912043, "rating": "4;6;7;7", "confidence": "4;3;3;4", "soundness": "2;3;4;3", "novelty": "2;3;3;3", "presentation": "2;2;3;3", "wc_summary": "91;86;166;55", "wc_strengths": "118;39;124;71", "wc_weaknesses": "231;61;49;99", "wc_questions": "4;2;76;197", "wc_limitations": "1;1;11;3", "wc_review": "445;189;426;425", "wc_reply_reviewers": "61;9;0;16", "wc_reply_authors": "258;0;87;0", "reply_reviewers": "1;1;0;1", "reply_authors": "2;1;2;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 99.5, 40.79522030826651 ], "wc_strengths_avg": [ 88.0, 34.94996423460259 ], "wc_weaknesses_avg": [ 110.0, 72.2564875980005 ], "wc_questions_avg": [ 69.75, 79.28548101638786 ], "wc_limitations_avg": [ 4.0, 4.123105625617661 ], "wc_review_avg": [ 371.25, 105.5233978793329 ], "wc_reply_reviewers_avg": [ 21.5, 23.5 ], "wc_reply_authors_avg": [ 86.25, 105.32894901213056 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.40824829046386296, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6001819452530136592&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "fudan.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "author_num": 5, "aff_unique_index": "0;1;1;1;1", "aff_unique_norm": "Fudan University;Shanghai Jiao Tong University", "aff_unique_dep": ";", "aff_unique_url": "https://www.fudan.edu.cn;https://www.sjtu.edu.cn", "aff_unique_abbr": "Fudan;SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Goal Reduction with Loop-Removal Accelerates RL and Models Human Brain Activity in Goal-Directed Learning", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94732", "id": "Y0EfJJeb4V", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Y0EfJJeb4V", "openreview": "https://openreview.net/forum?id=Y0EfJJeb4V", "poster": "/media/PosterPDFs/NeurIPS%202024/94732.png?t=1731463051.3091612", "project": "", "author_site": "Huzi Cheng, Joshua Brown", "tldr": "", "abstract": "Goal-directed planning presents a challenge for classical RL algorithms due to the vastness of the combinatorial state and goal spaces, while humans and animals adapt to complex environments, especially with diverse, non-stationary objectives, often employing intermediate goals for long-horizon tasks.\nHere, we propose a goal reduction mechanism for effectively deriving subgoals from arbitrary and distant original goals, using a novel loop-removal technique.\nThe product of the method, called goal-reducer, distills high-quality subgoals from a replay buffer, all without the need for prior global environmental knowledge.\nSimulations show that the goal-reducer can be integrated into RL frameworks like Deep Q-learning and Soft Actor-Critic.\nIt accelerates performance in both discrete and continuous action space tasks, such as grid world navigation and robotic arm manipulation, relative to the corresponding standard RL models.\nMoreover, the goal-reducer, when combined with a local policy, without iterative training, outperforms its integrated deep RL counterparts in solving a navigation task.\nThis goal reduction mechanism also models human problem-solving.\nComparing the model's performance and activation with human behavior and fMRI data in a treasure hunting task, we found matching representational patterns between an goal-reducer agent's components and corresponding human brain areas, particularly the vmPFC and basal ganglia. The results suggest that humans may use a similar computational framework for goal-directed behaviors.", "keywords": "goal-conditioned RL;planning;multi-task RL;vmPFC;goal-directed behavior;cognitive control;spatial navigation", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "", "author": "Huzi Cheng;Joshua W Brown", "authorids": "~Huzi_Cheng1;~Joshua_W_Brown1", "gender": "M;M", "homepage": ";", "dblp": ";", "google_scholar": "3eCEd-oAAAAJ;rjYUjroAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Huzi_Cheng1;~Joshua_W_Brown1", "aff": "Indiana University at Bloomington;Indiana University at Bloomington", "aff_domain": "indiana.edu;indiana.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\ncheng2024goal,\ntitle={Goal Reduction with Loop-Removal Accelerates {RL} and Models Human Brain Activity in Goal-Directed Learning},\nauthor={Huzi Cheng and Joshua W Brown},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Y0EfJJeb4V}\n}", "github": "", "reviewers": "SziM;ww9y;i4LG", "pdf_size": 1961083, "rating": "4;7;9", "confidence": "5;2;5", "soundness": "2;4;4", "novelty": "2;4;4", "presentation": "2;3;4", "wc_summary": "52;190;110", "wc_strengths": "68;132;110", "wc_weaknesses": "3;198;189", "wc_questions": "136;130;2", "wc_limitations": "1;34;29", "wc_review": "260;684;440", "wc_reply_reviewers": "97;142;14", "wc_reply_authors": "86;0;0", "reply_reviewers": "1;2;1", "reply_authors": "2;1;1", "rating_avg": [ 6.666666666666667, 2.0548046676563256 ], "confidence_avg": [ 4.0, 1.4142135623730951 ], "soundness_avg": [ 3.3333333333333335, 0.9428090415820634 ], "novelty_avg": [ 3.3333333333333335, 0.9428090415820634 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 117.33333333333333, 56.576398691405664 ], "wc_strengths_avg": [ 103.33333333333333, 26.5497436689865 ], "wc_weaknesses_avg": [ 130.0, 89.87769467448528 ], "wc_questions_avg": [ 89.33333333333333, 61.80255298574288 ], "wc_limitations_avg": [ 21.333333333333332, 14.522013940527977 ], "wc_review_avg": [ 461.3333333333333, 173.75333729808537 ], "wc_reply_reviewers_avg": [ 84.33333333333333, 53.01781671182707 ], "wc_reply_authors_avg": [ 28.666666666666668, 40.54078878802872 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.1147078669352809, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:2mvsBg_fRLUJ:scholar.google.com/&scioq=Goal+Reduction+with+Loop-Removal+Accelerates+RL+and+Models+Human+Brain+Activity+in+Goal-Directed+Learning&hl=en&as_sdt=0,10", "gs_version_total": 2, "email": "indiana.edu;indiana.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Indiana University", "aff_unique_dep": "", "aff_unique_url": "https://www.indiana.edu", "aff_unique_abbr": "IU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Bloomington", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Scaling Laws and Compute-Optimal Training Beyond Fixed Training Durations", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94731", "id": "Y13gSfTjGr", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Y13gSfTjGr", "openreview": "https://openreview.net/forum?id=Y13gSfTjGr", "poster": "/media/PosterPDFs/NeurIPS%202024/94731.png?t=1733887831.92955", "project": "", "author_site": "Alex H\u00e4gele, Elie Bakouch, Atli Kosson, Loubna Ben allal, Leandro Von Werra, Martin Jaggi", "tldr": "", "abstract": "Scale has become a main ingredient in obtaining strong machine learning models. As a result, understanding a model's scaling properties is key to effectively designing both the right training setup as well as future generations of architectures. In this work, we argue that scale and training research has been needlessly complex due to reliance on the cosine schedule, which prevents training across different lengths for the same model size. We investigate the training behavior of a direct alternative --- constant learning rate and cooldowns --- and find that it scales predictably and reliably similar to cosine. Additionally, we show that stochastic weight averaging yields improved performance along the training trajectory, without additional training costs, across different scales. Importantly, with these findings we demonstrate that scaling experiments can be performed with significantly reduced compute and GPU hours by utilizing fewer but reusable training runs. Our code is available at https://github.com/epfml/schedules-and-scaling/.", "keywords": "Scaling Laws;Large Language Models;Learning Rate Schedules;Weight Averaging", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Alexander H\u00e4gele;Elie Bakouch;Atli Kosson;Loubna Ben allal;Leandro Von Werra;Martin Jaggi", "authorids": "~Alexander_H\u00e4gele1;~Elie_Bakouch1;~Atli_Kosson1;~Loubna_Ben_allal1;~Leandro_Von_Werra1;~Martin_Jaggi1", "gender": "M;M;;F;M;M", "homepage": "https://haeggee.github.io/;;;https://loubnabnl.github.io/;https://github.com/lvwerra;https://mlo.epfl.ch", "dblp": ";;;;223/1855;17/4402", "google_scholar": "3bOhTucAAAAJ;;;reU1i-sAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.ch/citations?user=r1TJBr8AAAAJ", "orcid": ";;;;;0000-0003-1579-5558", "linkedin": ";eliebak/;;https://www.linkedin.com/mwlite/in/loubna-ben-allal-238690152;lvwerra/;", "or_profile": "~Alexander_H\u00e4gele1;~Elie_Bakouch1;~Atli_Kosson1;~Loubna_Ben_allal1;~Leandro_Von_Werra1;~Martin_Jaggi1", "aff": "EPFL - EPF Lausanne;Universit\u00e9 Paris-Dauphine - PSL;;Hugging Face;Hugging Face;EPFL", "aff_domain": "epfl.ch;dauphine.psl.eu;;hugggingface.co;hf.co;epfl.ch", "position": "PhD student;MS student;;Researcher;Researcher;Associate Professor", "bibtex": "@inproceedings{\nh{\\\"a}gele2024scaling,\ntitle={Scaling Laws and Compute-Optimal Training Beyond Fixed Training Durations},\nauthor={Alexander H{\\\"a}gele and Elie Bakouch and Atli Kosson and Loubna Ben allal and Leandro Von Werra and Martin Jaggi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Y13gSfTjGr}\n}", "github": "", "reviewers": "yzXN;J233;5fmf", "pdf_size": 1794887, "rating": "5;8;9", "confidence": "4;5;5", "soundness": "3;4;4", "novelty": "3;3;3", "presentation": "2;4;4", "wc_summary": "77;223;220", "wc_strengths": "19;207;67", "wc_weaknesses": "76;62;46", "wc_questions": "113;90;78", "wc_limitations": "1;10;1", "wc_review": "286;592;412", "wc_reply_reviewers": "0;24;74", "wc_reply_authors": "0;0;89", "reply_reviewers": "0;1;1", "reply_authors": "1;1;2", "rating_avg": [ 7.333333333333333, 1.699673171197595 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "soundness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.3333333333333335, 0.9428090415820634 ], "wc_summary_avg": [ 173.33333333333334, 68.12896267781828 ], "wc_strengths_avg": [ 97.66666666666667, 79.7551809532703 ], "wc_weaknesses_avg": [ 61.333333333333336, 12.256517540566822 ], "wc_questions_avg": [ 93.66666666666667, 14.522013940527977 ], "wc_limitations_avg": [ 4.0, 4.242640687119285 ], "wc_review_avg": [ 430.0, 125.5706972187381 ], "wc_reply_reviewers_avg": [ 32.666666666666664, 30.825674724525044 ], "wc_reply_authors_avg": [ 29.666666666666668, 41.95500235040182 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.9707253433941508, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16696914518606065638&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "epfl.ch;dauphine.psl.eu;;hugggingface.co;hf.co;epfl.ch", "author_num": 6, "aff_unique_index": "0;1;2;2;0", "aff_unique_norm": "EPFL;Universit\u00e9 Paris-Dauphine;Hugging Face", "aff_unique_dep": ";;", "aff_unique_url": "https://www.epfl.ch;https://www.univ-paris-dauphine.fr;https://huggingface.co", "aff_unique_abbr": "EPFL;UPD;Hugging Face", "aff_campus_unique_index": "0", "aff_campus_unique": "Lausanne;", "aff_country_unique_index": "0;1;2;2;0", "aff_country_unique": "Switzerland;France;United States" }, { "title": "Soft ascent-descent as a stable and flexible alternative to flooding", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94730", "id": "Y1ZsLONDI2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Y1ZsLONDI2", "openreview": "https://openreview.net/forum?id=Y1ZsLONDI2", "poster": "/media/PosterPDFs/NeurIPS%202024/94730.png?t=1731549082.8025284", "project": "", "author_site": "Matthew Holland, Kosuke Nakatani", "tldr": "", "abstract": "As a heuristic for improving test accuracy in classification, the \"flooding\" method proposed by Ishida et al. (2020) sets a threshold for the average surrogate loss at training time; above the threshold, gradient descent is run as usual, but below the threshold, a switch to gradient *ascent* is made. While setting the threshold is non-trivial and is usually done with validation data, this simple technique has proved remarkably effective in terms of accuracy. On the other hand, what if we are also interested in other metrics such as model complexity or average surrogate loss at test time? As an attempt to achieve better overall performance with less fine-tuning, we propose a softened, pointwise mechanism called SoftAD (soft ascent-descent) that downweights points on the borderline, limits the effects of outliers, and retains the ascent-descent effect of flooding, with no additional computational overhead. We contrast formal stationarity guarantees with those for flooding, and empirically demonstrate how SoftAD can realize classification accuracy competitive with flooding (and the more expensive alternative SAM) while enjoying a much smaller loss generalization gap and model norm.", "keywords": "Flooding;classification;ascent-descent", "primary_area": "learning_theory", "supplementary_material": "", "author": "Matthew J. Holland;Kosuke Nakatani", "authorids": "~Matthew_J._Holland1;~Kosuke_Nakatani1", "gender": "M;M", "homepage": "https://feedbackward.com/;http://www.ar.sanken.osaka-u.ac.jp/members.html", "dblp": "148/9989;", "google_scholar": "pQoH5uEAAAAJ;", "orcid": "0000-0002-6704-1769;", "linkedin": ";", "or_profile": "~Matthew_J._Holland1;~Kosuke_Nakatani1", "aff": "Osaka University;Osaka University", "aff_domain": "osaka-u.ac.jp;osaka-u.ac.jp", "position": "Assistant Professor;MS student", "bibtex": "@inproceedings{\nholland2024soft,\ntitle={Soft ascent-descent as a stable and flexible alternative to flooding},\nauthor={Matthew J. Holland and Kosuke Nakatani},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Y1ZsLONDI2}\n}", "github": "", "reviewers": "JmzP;55bG;bEZy;yxTo", "pdf_size": 986202, "rating": "4;5;6;7", "confidence": "3;2;3;4", "soundness": "2;3;3;3", "novelty": "2;3;2;3", "presentation": "3;3;3;4", "wc_summary": "70;70;82;140", "wc_strengths": "46;114;38;139", "wc_weaknesses": "100;302;36;100", "wc_questions": "22;86;1;80", "wc_limitations": "5;3;1;27", "wc_review": "243;575;158;486", "wc_reply_reviewers": "0;0;21;109", "wc_reply_authors": "0;0;15;705", "reply_reviewers": "0;0;1;2", "reply_authors": "1;1;2;3", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 90.5, 28.99568933479596 ], "wc_strengths_avg": [ 84.25, 43.25722483007896 ], "wc_weaknesses_avg": [ 134.5, 100.17359931638676 ], "wc_questions_avg": [ 47.25, 36.574410453211684 ], "wc_limitations_avg": [ 9.0, 10.488088481701515 ], "wc_review_avg": [ 365.5, 170.64070440548468 ], "wc_reply_reviewers_avg": [ 32.5, 44.9916658949188 ], "wc_reply_authors_avg": [ 180.0, 303.17074397111605 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.6324555320336758, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:uVz9HlqN94EJ:scholar.google.com/&scioq=Soft+ascent-descent+as+a+stable+and+flexible+alternative+to+flooding&hl=en&as_sdt=0,33", "gs_version_total": 8, "email": "osaka-u.ac.jp;osaka-u.ac.jp", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Osaka University", "aff_unique_dep": "", "aff_unique_url": "https://www.osaka-u.ac.jp", "aff_unique_abbr": "Osaka U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Japan" }, { "title": "Tracing Hyperparameter Dependencies for Model Parsing via Learnable Graph Pooling Network", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94729", "id": "Y1edWJH9qB", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Y1edWJH9qB", "openreview": "https://openreview.net/forum?id=Y1edWJH9qB", "poster": "/media/PosterPDFs/NeurIPS%202024/94729.png?t=1731735796.5947602", "project": "", "author_site": "Xiao Guo, Vishal Asnani, Sijia Liu, Xiaoming Liu", "tldr": "", "abstract": "\\textit{Model Parsing} defines the task of predicting hyperparameters of the generative model (GM), given a GM-generated image as the input. \nSince a diverse set of hyperparameters is jointly employed by the generative model, and dependencies often exist among them, it is crucial to learn these hyperparameter dependencies for improving the model parsing performance. \nTo explore such important dependencies, we propose a novel model parsing method called Learnable Graph Pooling Network (LGPN), in which we formulate model parsing as a graph node classification problem, using graph nodes and edges to represent hyperparameters and their dependencies, respectively. \nFurthermore, LGPN incorporates a learnable pooling-unpooling mechanism tailored to model parsing, which adaptively learns hyperparameter dependencies of GMs used to generate the input image. \nAlso, we introduce a Generation Trace Capturing Network (GTC) that can efficiently identify generation traces of input images, enhancing the understanding of generated images' provenances.\nEmpirically, we achieve state-of-the-art performance in model parsing and its extended applications, showing the superiority of the proposed LGPN.", "keywords": "Image Forensics;Forgery Attack Defense;Low-level Vision", "primary_area": "generative_models", "supplementary_material": "/attachment/5c9f0d9285ccf9c6ce15415e061101b047252176.zip", "author": "Xiao Guo;Vishal Asnani;Sijia Liu;Xiaoming Liu", "authorids": "~Xiao_Guo2;~Vishal_Asnani1;~Sijia_Liu1;~Xiaoming_Liu2", "gender": "M;M;M;M", "homepage": "https://chelsea234.github.io/website/;https://vishal3477.github.io/;https://lsjxjtu.github.io/;http://www.cse.msu.edu/~liuxm/", "dblp": ";295/8698;128/6972-1;l/XiaomingLiu0002", "google_scholar": "H93xhggAAAAJ;OA4lkcwAAAAJ;C7dO_UgAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0003-3575-3953;;;", "linkedin": ";vishal-asnani/;;xiaoming-liu-5a7807b/", "or_profile": "~Xiao_Guo2;~Vishal_Asnani1;~Sijia_Liu1;~Xiaoming_Liu2", "aff": "Michigan State University;Adobe Systems;Michigan State University;Michigan State University", "aff_domain": "msu.edu;adobe.com;msu.edu;msu.edu", "position": "PhD student;Intern;Assistant Professor;Professor", "bibtex": "@inproceedings{\nguo2024tracing,\ntitle={Tracing Hyperparameter Dependencies for Model Parsing via Learnable Graph Pooling Network},\nauthor={Xiao Guo and Vishal Asnani and Sijia Liu and Xiaoming Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Y1edWJH9qB}\n}", "github": "", "reviewers": "Q98w;6bSd;nGEg;born", "pdf_size": 3337161, "rating": "5;5;5;6", "confidence": "3;3;3;2", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "3;3;2;3", "wc_summary": "72;47;68;107", "wc_strengths": "35;55;85;30", "wc_weaknesses": "281;33;70;68", "wc_questions": "18;2;138;79", "wc_limitations": "26;7;40;15", "wc_review": "432;144;401;299", "wc_reply_reviewers": "51;0;20;13", "wc_reply_authors": "828;78;71;13", "reply_reviewers": "1;0;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 73.5, 21.54646142641524 ], "wc_strengths_avg": [ 51.25, 21.614520582238228 ], "wc_weaknesses_avg": [ 113.0, 98.1045360826909 ], "wc_questions_avg": [ 59.25, 53.78371035917846 ], "wc_limitations_avg": [ 22.0, 12.389511693363866 ], "wc_review_avg": [ 319.0, 112.38104822433363 ], "wc_reply_reviewers_avg": [ 21.0, 18.748333259252675 ], "wc_reply_authors_avg": [ 247.5, 336.1000595060941 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12766090908296884193&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "msu.edu;adobe.com;msu.edu;msu.edu", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Michigan State University;Adobe", "aff_unique_dep": ";Adobe Systems Incorporated", "aff_unique_url": "https://www.msu.edu;https://www.adobe.com", "aff_unique_abbr": "MSU;Adobe", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "xMIL: Insightful Explanations for Multiple Instance Learning in Histopathology", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94728", "id": "Y1fPxGevQj", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Y1fPxGevQj", "openreview": "https://openreview.net/forum?id=Y1fPxGevQj", "poster": "/media/PosterPDFs/NeurIPS%202024/94728.png?t=1733322039.5581214", "project": "", "author_site": "Julius Hense, Mina Jamshidi Idaji, Oliver Eberle, Thomas Schnake, Jonas Dippel, Laure Ciernik, Oliver Buchstab, Andreas Mock, Frederick Klauschen, Klaus-Robert M\u00fcller", "tldr": "", "abstract": "Multiple instance learning (MIL) is an effective and widely used approach for weakly supervised machine learning. In histopathology, MIL models have achieved remarkable success in tasks like tumor detection, biomarker prediction, and outcome prognostication. However, MIL explanation methods are still lagging behind, as they are limited to small bag sizes or disregard instance interactions. We revisit MIL through the lens of explainable AI (XAI) and introduce xMIL, a refined framework with more general assumptions. We demonstrate how to obtain improved MIL explanations using layer-wise relevance propagation (LRP) and conduct extensive evaluation experiments on three toy settings and four real-world histopathology datasets. Our approach consistently outperforms previous explanation attempts with particularly improved faithfulness scores on challenging biomarker prediction tasks. Finally, we showcase how xMIL explanations enable pathologists to extract insights from MIL models, representing a significant advance for knowledge discovery and model debugging in digital histopathology.", "keywords": "computational pathology;multiple instance learning;explainable AI;layer-wise relevance propagation;knowledge discovery", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "", "author": "Julius Hense;Mina Jamshidi Idaji;Oliver Eberle;Thomas Schnake;Jonas Dippel;Laure Ciernik;Oliver Buchstab;Andreas Mock;Frederick Klauschen;Klaus Robert Muller", "authorids": "~Julius_Hense1;~Mina_Jamshidi_Idaji1;~Oliver_Eberle1;~Thomas_Schnake1;~Jonas_Dippel1;~Laure_Ciernik1;~Oliver_Buchstab1;~Andreas_Mock1;~Frederick_Klauschen1;~Klaus_Robert_Muller1", "gender": "M;F;;M;M;F;;M;M;M", "homepage": ";https://minajamshidi.github.io/;https://www.tu.berlin/;https://schnake.cc/;;;https://www.med.lmu.de/pathologie/de/index.html;;https://www.med.lmu.de/pathologie/de/index.html;https://www.ml.tu-berlin.de/menue/members/klaus-robert_mueller/", "dblp": ";;260/6891;245/8679;249/3158;380/3631;;;;m/KRMuller.html", "google_scholar": "ZqXZKacAAAAJ;S3ybh7cAAAAJ;vZB4qw0AAAAJ;MptjWzgAAAAJ;ZLQCgRoAAAAJ;GFLi9XIAAAAJ;;;;https://scholar.google.de/citations?hl=de", "orcid": "0009-0007-1160-1636;0000-0003-1593-3201;0000-0002-6967-9950;0009-0006-3768-0259;0000-0002-0552-8977;0009-0005-4501-243X;;0000-0002-3332-9166;;0000-0002-3861-7685", "linkedin": ";dr-mina-jamshidi-03954684/;;;jdippel/;lciernik;;;;", "or_profile": "~Julius_Hense1;~Mina_Jamshidi_Idaji1;~Oliver_Eberle1;~Thomas_Schnake1;~Jonas_Dippel1;~Laure_Ciernik1;~Oliver_Buchstab1;~Andreas_Mock1;~Frederick_Klauschen1;~Klaus_Robert_Muller1", "aff": "Technische Universit\u00e4t Berlin;Technische Universit\u00e4t Berlin;Technische Universit\u00e4t Berlin;Technische Universit\u00e4t Berlin;Technische Universit\u00e4t Berlin;Technische Universit\u00e4t Berlin;Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;TU Berlin", "aff_domain": "tu-berlin.de;tu-berlin.de;tu-berlin.de;tu-berlin.de;tu-berlin.de;tu-berlin.de;lmu.de;lmu.de;lmu.de;tu-berlin.de", "position": "PhD student;Postdoc;Postdoc;PhD student;PhD student;PhD student;Researcher;Principal Researcher;Full Professor;Full Professor", "bibtex": "@inproceedings{\nhense2024xmil,\ntitle={x{MIL}: Insightful Explanations for Multiple Instance Learning in Histopathology},\nauthor={Julius Hense and Mina Jamshidi Idaji and Oliver Eberle and Thomas Schnake and Jonas Dippel and Laure Ciernik and Oliver Buchstab and Andreas Mock and Frederick Klauschen and Klaus Robert Muller},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Y1fPxGevQj}\n}", "github": "", "reviewers": "utz5;DaUT;GyUV", "pdf_size": 11829363, "rating": "3;5;6", "confidence": "3;5;4", "soundness": "3;1;3", "novelty": "2;1;3", "presentation": "2;3;2", "wc_summary": "58;21;45", "wc_strengths": "61;16;98", "wc_weaknesses": "104;222;462", "wc_questions": "39;51;6", "wc_limitations": "1;3;3", "wc_review": "263;313;614", "wc_reply_reviewers": "0;492;0", "wc_reply_authors": "0;549;0", "reply_reviewers": "0;2;0", "reply_authors": "1;2;1", "rating_avg": [ 4.666666666666667, 1.247219128924647 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 2.3333333333333335, 0.9428090415820634 ], "novelty_avg": [ 2.0, 0.816496580927726 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 41.333333333333336, 15.326085243430198 ], "wc_strengths_avg": [ 58.333333333333336, 33.529423231278855 ], "wc_weaknesses_avg": [ 262.6666666666667, 148.95487758229189 ], "wc_questions_avg": [ 32.0, 19.026297590440446 ], "wc_limitations_avg": [ 2.3333333333333335, 0.9428090415820634 ], "wc_review_avg": [ 396.6666666666667, 155.02759610971057 ], "wc_reply_reviewers_avg": [ 164.0, 231.93102422918759 ], "wc_reply_authors_avg": [ 183.0, 258.8010819142764 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.9428090415820634 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.6546536707079772, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17258306640913685755&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 6, "email": "tu-berlin.de;tu-berlin.de;tu-berlin.de;tu-berlin.de;tu-berlin.de;tu-berlin.de;lmu.de;lmu.de;lmu.de;tu-berlin.de", "author_num": 10, "aff_unique_index": "0;0;0;0;0;0;1;1;1;0", "aff_unique_norm": "Technische Universit\u00e4t Berlin;Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen", "aff_unique_dep": ";", "aff_unique_url": "https://www.tu-berlin.de;https://www.lmu.de", "aff_unique_abbr": "TU Berlin;LMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berlin", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "Germany" }, { "title": "Long-Horizon Planning for Multi-Agent Robots in Partially Observable Environments", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94727", "id": "Y1rOWS2Z4i", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Y1rOWS2Z4i", "openreview": "https://openreview.net/forum?id=Y1rOWS2Z4i", "poster": "/media/PosterPDFs/NeurIPS%202024/94727.png?t=1733003235.8191104", "project": "", "author_site": "Sid Nayak, Adelmo Morrison Orozco, Marina Have, Jackson Zhang, Vittal Thirumalai, Darren Chen, Aditya Kapoor, Eric Robinson, Karthik Gopalakrishnan, James Harrison, Anuj Mahajan, Brian Ichter, Hamsa Balakrishnan", "tldr": "", "abstract": "The ability of Language Models (LMs) to understand natural language makes them a powerful tool for parsing human instructions into task plans for autonomous robots. Unlike traditional planning methods that rely on domain-specific knowledge and handcrafted rules, LMs generalize from diverse data and adapt to various tasks with minimal tuning, acting as a compressed knowledge base. However, LMs in their standard form face challenges with long-horizon tasks, particularly in partially observable multi-agent settings. We propose an LM-based Long-Horizon Planner for Multi-Agent Robotics (LLaMAR), a cognitive architecture for planning that achieves state-of-the-art results in long-horizon tasks within partially observable environments. LLaMAR employs a plan-act-correct-verify framework, allowing self-correction from action execution feedback without relying on oracles or simulators. Additionally, we present MAP-THOR, a comprehensive test suite encompassing household tasks of varying complexity within the AI2-THOR environment. Experiments show that LLaMAR achieves a 30\\% higher success rate than other state-of-the-art LM-based multi-agent planners in MAP-THOR and Search \\& Rescue tasks. Code can be found at [https://github.com/nsidn98/LLaMAR](https://github.com/nsidn98/LLaMAR)", "keywords": "multi-agent robotics;large language models", "primary_area": "robotics", "supplementary_material": "/attachment/47f9a482f268b2e6fe171e39f70dc5f020978129.zip", "author": "Siddharth Nayak;Adelmo Morrison Orozco;Marina Ten Have;Jackson Zhang;Vittal Thirumalai;Darren Chen;Aditya Kapoor;Eric Robinson;Karthik Gopalakrishnan;James Harrison;Anuj Mahajan;brian ichter;Hamsa Balakrishnan", "authorids": "~Siddharth_Nayak1;~Adelmo_Morrison_Orozco1;~Marina_Ten_Have1;~Jackson_Zhang1;~Vittal_Thirumalai1;~Darren_Chen1;~Aditya_Kapoor1;~Eric_Robinson2;~Karthik_Gopalakrishnan3;~James_Harrison1;~Anuj_Mahajan1;~brian_ichter1;~Hamsa_Balakrishnan1", "gender": "M;M;;M;M;;M;M;M;;M;;", "homepage": "http://nsidn98.github.io/;https://www.omleda.com/;http://mtenhave.myportfolio.com;;;;https://adityakapoor74.github.io/;;;;https://anuj-mahajan.github.io/;;http://www.mit.edu/~hamsa/", "dblp": "268/7851;;;;;;;;;;99/3800;;", "google_scholar": "XcgZhMwAAAAJ;;;;;;UPH3tawAAAAJ;;Tt2MJQsAAAAJ;-tEiRFcAAAAJ;https://scholar.google.co.in/citations?user=a3AbXGcAAAAJ;-w5DuHgAAAAJ;", "orcid": "0000-0003-4663-8045;0000-0003-2481-5143;;;;;;;;;;;", "linkedin": "nsidn98/;;;jackson-jiawei-zhang/;vittal-thirumalai/;darren3chen;aditya-kapoor-446727152/;eric-erob-robinson-37916812a/;;;anuj-m-bb0a26175/;;", "or_profile": "~Siddharth_Nayak1;~Adelmo_Morrison_Orozco1;~Marina_Ten_Have1;~Jackson_Zhang1;~Vittal_Thirumalai1;~Darren_Chen1;~Aditya_Kapoor1;~Eric_Robinson2;~Karthik_Gopalakrishnan3;~James_Harrison1;~Anuj_Mahajan1;~brian_ichter1;~Hamsa_Balakrishnan1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Carnegie Mellon University;Massachusetts Institute of Technology;Stanford University;Google;Amazon;Google;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;mit.edu;mit.edu;mit.edu;mit.edu;andrew.cmu.edu;mit.edu;stanford.edu;google.com;amazon.com;google.com;mit.edu", "position": "PhD student;Undergrad student;Undergrad student;Undergrad student;Undergrad student;Undergrad student;Researcher;Director of AI Research, Command and Control;Postdoc;Researcher;Researcher;Research Scientist;Full Professor", "bibtex": "@inproceedings{\nnayak2024longhorizon,\ntitle={Long-Horizon Planning for Multi-Agent Robots in Partially Observable Environments},\nauthor={Siddharth Nayak and Adelmo Morrison Orozco and Marina Ten Have and Jackson Zhang and Vittal Thirumalai and Darren Chen and Aditya Kapoor and Eric Robinson and Karthik Gopalakrishnan and James Harrison and Anuj Mahajan and brian ichter and Hamsa Balakrishnan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Y1rOWS2Z4i}\n}", "github": "", "reviewers": "XzUd;nYvP;QApg;snqg;Eixs", "pdf_size": 13807539, "rating": "5;5;6;6;8", "confidence": "3;4;4;2;4", "soundness": "3;3;3;3;4", "novelty": "2;2;3;3;4", "presentation": "2;2;4;3;4", "wc_summary": "76;93;75;173;86", "wc_strengths": "65;65;59;71;132", "wc_weaknesses": "119;204;184;69;41", "wc_questions": "107;3;15;196;110", "wc_limitations": "9;10;33;2;1", "wc_review": "376;375;366;511;370", "wc_reply_reviewers": "198;20;37;75;20", "wc_reply_authors": "881;29;47;384;35", "reply_reviewers": "1;1;1;1;1", "reply_authors": "3;2;2;2;2", "rating_avg": [ 6.0, 1.0954451150103321 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.0, 0.8944271909999159 ], "wc_summary_avg": [ 100.6, 36.805434381351894 ], "wc_strengths_avg": [ 78.4, 27.06732347314747 ], "wc_weaknesses_avg": [ 123.4, 63.1461796152388 ], "wc_questions_avg": [ 86.2, 70.7768323676611 ], "wc_limitations_avg": [ 11.0, 11.575836902790225 ], "wc_review_avg": [ 399.6, 55.81612670187712 ], "wc_reply_reviewers_avg": [ 70.0, 67.07905783476689 ], "wc_reply_authors_avg": [ 275.2, 331.42625122340564 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 13, 0 ], "corr_rating_confidence": 0.22821773229381925, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5339071165786451846&as_sdt=20000005&sciodt=0,21&hl=en", "gs_version_total": 5, "email": "mit.edu;mit.edu;mit.edu;mit.edu;mit.edu;mit.edu;andrew.cmu.edu;mit.edu;stanford.edu;google.com;amazon.com;google.com;mit.edu", "author_num": 13, "aff_unique_index": "0;0;0;0;0;0;1;0;2;3;4;3;0", "aff_unique_norm": "Massachusetts Institute of Technology;Carnegie Mellon University;Stanford University;Google;Amazon", "aff_unique_dep": ";;;Google;Amazon.com, Inc.", "aff_unique_url": "https://web.mit.edu;https://www.cmu.edu;https://www.stanford.edu;https://www.google.com;https://www.amazon.com", "aff_unique_abbr": "MIT;CMU;Stanford;Google;Amazon", "aff_campus_unique_index": "1;2;2", "aff_campus_unique": ";Stanford;Mountain View", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "SpeedLoader: An I/O efficient scheme for heterogeneous and distributed LLM operation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94726", "id": "Y2I0Fy4sm7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Y2I0Fy4sm7", "openreview": "https://openreview.net/forum?id=Y2I0Fy4sm7", "poster": "", "project": "", "author_site": "Yiqi Zhang, Yang You", "tldr": "", "abstract": "With the surging growth of model parameters, foundation models pose unprecedented challenges to traditional computational infrastructures. These large models inherently require substantial accelerator memory to accommodate massive tensors during pre-training, fine-tuning, and even inference stages, making it even more challenging to deploy a model with restricted computational resources. Given this challenge, distribution and offloading the model states are two major solutions. Partitioning the required states to participating workers, and storing them in lower speed media, such as host DRAM and block devices, largely alleviate the accelerator memory pressure. However, the prohibitive costs of tensor communication render it a theoretically plausible yet practically inefficient solution. Previous efforts to improve efficiency include maximizing rematerialization and employing chunk-based tensor management to reduce host-device communication. Despite these efforts, the reported training throughput only achieves 36.54% of model FLOPs utilization (MFUs), still not comparable to full on-device training. In this work, we redesign the data flow of heterogeneous hardware and sharded model training to minimize the excessive communication overhead. Our proposed scheme significantly enhances training and inference throughput of large language models under restrictive computational resources. We confirmed a large leap in effective compute time by looking into the kernel-level runtime behavior of our trials, where the MFUs can achieve up to 51%. Compared to the state-of-the-art approach, our framework robustly achieves remarkable speedups from 3x to 30x in multiple distributed heterogeneous training setups and inference speedups of 1.5x to 2.35x without compromising arithmetic precision.", "keywords": "Heterogenous Computing;Large Language Model;ZeRO;FSDP;Offload", "primary_area": "infrastructure", "supplementary_material": "", "author": "Yiqi Zhang;Yang You", "authorids": "~Yiqi_Zhang2;~Yang_You1", "gender": "M;M", "homepage": ";https://www.comp.nus.edu.sg/~youy/", "dblp": ";33/8167-1.html", "google_scholar": "https://scholar.google.com/citations?hl=en;jF4dPZwAAAAJ", "orcid": "0000-0002-8337-5929;", "linkedin": ";yang-you-0b92914b/", "or_profile": "~Yiqi_Zhang2;~Yang_You1", "aff": "National University of Singapore;National University of Singapore", "aff_domain": "nus.edu;nus.edu.sg", "position": "PhD student;Professor", "bibtex": "@inproceedings{\nzhang2024speedloader,\ntitle={SpeedLoader: An I/O efficient scheme for heterogeneous and distributed {LLM} operation},\nauthor={Yiqi Zhang and Yang You},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Y2I0Fy4sm7}\n}", "github": "", "reviewers": "Dtr1;4D75;bDTF;m7wP", "pdf_size": 2329695, "rating": "4;5;5;6", "confidence": "4;3;3;3", "soundness": "2;3;3;3", "novelty": "2;3;2;3", "presentation": "1;2;3;2", "wc_summary": "29;55;46;84", "wc_strengths": "54;33;36;25", "wc_weaknesses": "194;20;8;17", "wc_questions": "56;23;74;185", "wc_limitations": "27;7;8;30", "wc_review": "360;138;172;341", "wc_reply_reviewers": "0;0;0;4", "wc_reply_authors": "293;0;236;0", "reply_reviewers": "0;0;0;1", "reply_authors": "3;1;3;1", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 53.5, 19.93113142799475 ], "wc_strengths_avg": [ 37.0, 10.606601717798213 ], "wc_weaknesses_avg": [ 59.75, 77.63496312873473 ], "wc_questions_avg": [ 84.5, 60.83789937201974 ], "wc_limitations_avg": [ 18.0, 10.559356040971437 ], "wc_review_avg": [ 252.75, 98.7151837358367 ], "wc_reply_reviewers_avg": [ 1.0, 1.7320508075688772 ], "wc_reply_authors_avg": [ 132.25, 133.7766328623949 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 1.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Ca0BQcgz2c8J:scholar.google.com/&scioq=SpeedLoader:+An+I/O+efficient+scheme+for+heterogeneous+and+distributed+LLM+operation&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "nus.edu;nus.edu.sg", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "National University of Singapore", "aff_unique_dep": "", "aff_unique_url": "https://www.nus.edu.sg", "aff_unique_abbr": "NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Singapore" }, { "title": "Conformal Inverse Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94725", "id": "Y2NWKlrDrX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Y2NWKlrDrX", "openreview": "https://openreview.net/forum?id=Y2NWKlrDrX", "poster": "/media/PosterPDFs/NeurIPS%202024/94725.png?t=1730218019.299691", "project": "", "author_site": "Bo Lin, Erick Delage, Timothy Chan", "tldr": "", "abstract": "Inverse optimization has been increasingly used to estimate unknown parameters in an optimization model based on decision data. We show that such a point estimation is insufficient in a prescriptive setting where the estimated parameters are used to prescribe new decisions. The prescribed decisions may be low-quality and misaligned with human intuition and thus are unlikely to be adopted. To tackle this challenge, we propose conformal inverse optimization, which seeks to learn an uncertainty set for the unknown parameters and then solve a robust optimization model to prescribe new decisions. Under mild assumptions, we show that our method enjoys provable guarantees on solution quality, as evaluated using both the ground-truth parameters and the decision maker's perception of the unknown parameters. Our method demonstrates strong empirical performance compared to classic inverse optimization.", "keywords": "inverse optimization;robust optimization;algorithm aversion;data-driven decision making", "primary_area": "optimization", "supplementary_material": "", "author": "Bo Lin;Erick Delage;Timothy Chan", "authorids": "~Bo_Lin4;~Erick_Delage2;~Timothy_Chan2", "gender": "M;M;M", "homepage": "http://lin-bo.github.io;http://web.hec.ca/pages/erick.delage/;https://chan.mie.utoronto.ca/", "dblp": ";26/1546;11/7269", "google_scholar": "hmB6_DIAAAAJ;https://scholar.google.ca/citations?user=ciH2ROgAAAAJ;https://scholar.google.ca/citations?user=5PqOfJ8AAAAJ", "orcid": "0000-0002-4225-9171;0000-0002-6740-3600;", "linkedin": ";erick-delage-2105361/?originalSubdomain=ca;", "or_profile": "~Bo_Lin4;~Erick_Delage2;~Timothy_Chan2", "aff": "University of Toronto;Computer Science Department;University of Toronto", "aff_domain": "utoronto.ca;cs.stanford.edu;utoronto.ca", "position": "PhD student;Researcher;Full Professor", "bibtex": "@inproceedings{\nlin2024conformal,\ntitle={Conformal Inverse Optimization},\nauthor={Bo Lin and Erick Delage and Timothy Chan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Y2NWKlrDrX}\n}", "github": "", "reviewers": "5FdS;Bo7T;ggLM;UvTc", "pdf_size": 1322500, "rating": "5;7;7;7", "confidence": "3;2;2;3", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "1;3;3;3", "wc_summary": "70;73;68;44", "wc_strengths": "24;41;84;65", "wc_weaknesses": "15;54;75;51", "wc_questions": "378;84;23;27", "wc_limitations": "1;16;3;11", "wc_review": "488;268;253;198", "wc_reply_reviewers": "362;41;12;21", "wc_reply_authors": "913;317;8;15", "reply_reviewers": "2;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 2.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 63.75, 11.54068888758379 ], "wc_strengths_avg": [ 53.5, 22.85278976405288 ], "wc_weaknesses_avg": [ 48.75, 21.568205766822608 ], "wc_questions_avg": [ 128.0, 146.3403567031323 ], "wc_limitations_avg": [ 7.75, 6.057020719792859 ], "wc_review_avg": [ 301.75, 110.64441919952402 ], "wc_reply_reviewers_avg": [ 109.0, 146.4462358683213 ], "wc_reply_authors_avg": [ 313.25, 368.05052302639103 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=21288491932166708&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "utoronto.ca;cs.stanford.edu;utoronto.ca", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Toronto;Computer Science Department", "aff_unique_dep": ";Computer Science", "aff_unique_url": "https://www.utoronto.ca;", "aff_unique_abbr": "U of T;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Canada;" }, { "title": "Preventing Dimensional Collapse in Self-Supervised Learning via Orthogonality Regularization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94724", "id": "Y3FjKSsfmy", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Y3FjKSsfmy", "openreview": "https://openreview.net/forum?id=Y3FjKSsfmy", "poster": "/media/PosterPDFs/NeurIPS%202024/94724.png?t=1731676710.3688169", "project": "", "author_site": "Junlin He, Jinxiao Du, Wei Ma", "tldr": "", "abstract": "Self-supervised learning (SSL) has rapidly advanced in recent years, approaching the performance of its supervised counterparts through the extraction of representations from unlabeled data. However, dimensional collapse, where a few large eigenvalues dominate the eigenspace, poses a significant obstacle for SSL. When dimensional collapse occurs on features (e.g. hidden features and representations), it prevents features from representing the full information of the data; when dimensional collapse occurs on weight matrices, their filters are self-related and redundant, limiting their expressive power.\nExisting studies have predominantly concentrated on the dimensional collapse of representations, neglecting whether this can sufficiently prevent the dimensional collapse of the weight matrices and hidden features. \nTo this end, we first time propose a mitigation approach employing orthogonal regularization (OR) across the encoder, targeting both convolutional and linear layers during pretraining. OR promotes orthogonality within weight matrices, thus safeguarding against the dimensional collapse of weight matrices, hidden features, and representations. Our empirical investigations demonstrate that OR significantly enhances the performance of SSL methods across diverse benchmarks, yielding consistent gains with both CNNs and Transformer-based architectures.", "keywords": "Self-supervised learning; Orthogonality Regularization; Dimensional collapse", "primary_area": "machine_vision", "supplementary_material": "/attachment/2b8df9e8d7cb970d5bcf3737355f2b55c97fe8f7.zip", "author": "Junlin He;Jinxiao Du;Wei Ma", "authorids": "~Junlin_He2;~Jinxiao_Du1;~Wei_Ma3", "gender": ";M;M", "homepage": ";;http://polyu-mobility-ai-lab.com/", "dblp": ";;", "google_scholar": "1wyJPxQAAAAJ;;syUpc-gAAAAJ", "orcid": ";0000-0003-0247-6339;0000-0001-8945-5877", "linkedin": ";;", "or_profile": "~Junlin_He2;~Jinxiao_Du1;~Wei_Ma3", "aff": "Hong Kong Polytechnic University;Hong Kong Polytechnic University;Hong Kong Polytechnic University", "aff_domain": "polyu.edu.hk;polyu.edu.hk;polyu.edu.hk", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nhe2024preventing,\ntitle={Preventing Dimensional Collapse in Self-Supervised Learning via Orthogonality Regularization},\nauthor={Junlin He and Jinxiao Du and Wei Ma},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Y3FjKSsfmy}\n}", "github": "", "reviewers": "Yvo9;sksr;KCGP;2nFS;gNSa", "pdf_size": 3035084, "rating": "5;5;5;6;7", "confidence": "4;4;4;4;4", "soundness": "2;3;2;3;4", "novelty": "1;2;3;2;4", "presentation": "2;2;3;3;4", "wc_summary": "37;206;77;77;61", "wc_strengths": "9;59;64;54;41", "wc_weaknesses": "124;523;200;143;26", "wc_questions": "24;122;104;29;68", "wc_limitations": "5;4;6;1;1", "wc_review": "199;914;451;304;197", "wc_reply_reviewers": "80;231;456;0;61", "wc_reply_authors": "35;246;473;0;32", "reply_reviewers": "1;1;2;0;1", "reply_authors": "2;2;3;1;2", "rating_avg": [ 5.6, 0.8 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.4, 1.019803902718557 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 91.6, 59.04439008068421 ], "wc_strengths_avg": [ 45.4, 19.744366285095097 ], "wc_weaknesses_avg": [ 203.2, 169.45725124644267 ], "wc_questions_avg": [ 69.4, 39.13872762367218 ], "wc_limitations_avg": [ 3.4, 2.0591260281974 ], "wc_review_avg": [ 413.0, 267.12468998578174 ], "wc_reply_reviewers_avg": [ 165.6, 163.89093934687176 ], "wc_reply_authors_avg": [ 157.2, 180.51858630069094 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16620891994485364795&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "polyu.edu.hk;polyu.edu.hk;polyu.edu.hk", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Hong Kong Polytechnic University", "aff_unique_dep": "", "aff_unique_url": "https://www.polyu.edu.hk", "aff_unique_abbr": "PolyU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Federated Learning from Vision-Language Foundation Models: Theoretical Analysis and Method", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94723", "id": "Y4L8GQXZZO", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Y4L8GQXZZO", "openreview": "https://openreview.net/forum?id=Y4L8GQXZZO", "poster": "/media/PosterPDFs/NeurIPS%202024/94723.png?t=1731390521.9331365", "project": "", "author_site": "Bikang Pan, Wei Huang, Ye Shi", "tldr": "", "abstract": "Integrating pretrained vision-language foundation models like CLIP into federated learning has attracted significant attention for enhancing generalization across diverse tasks. Typically, federated learning of vision-language models employs prompt learning to reduce communication and computational costs, i.e., prompt-based federated learning. However, there is limited theoretical analysis to understand the performance of prompt-based federated learning. In this work, we construct a theoretical analysis framework for prompt-based federated learning via feature learning theory. Specifically, we monitor the evolution of signal learning and noise memorization in prompt-based federated learning, demonstrating that performance can be assessed by the ratio of task-relevant to task-irrelevant coefficients. Furthermore, we draw an analogy between income and risk in portfolio optimization and the task-relevant and task-irrelevant terms in feature learning. Leveraging inspiration from portfolio optimization that combining two independent assets will maintain the income while reducing the risk, we introduce two prompts: global prompt and local prompt to construct a prompt portfolio to balance the generalization and personalization. Consequently, we showed the performance advantage of the prompt portfolio and derived the optimal mixing coefficient. These theoretical claims have been further supported by empirical experiments.", "keywords": "Federated Learning;Vision-Language Foundation Models;Prompt Learning;Theoretical Analysis", "primary_area": "learning_theory", "supplementary_material": "/attachment/17a73beaffec6e8e2beda04092905290c60e3c33.zip", "author": "Bikang Pan;Wei Huang;Ye Shi", "authorids": "~Bikang_Pan1;~Wei_Huang6;~Ye_Shi1", "gender": "M;M;M", "homepage": "https://panbikang.github.io/home/;https://weihuang05.github.io/;http://faculty.sist.shanghaitech.edu.cn/faculty/shiye", "dblp": ";81/6685-34;34/11191-1", "google_scholar": ";RZfDh4MAAAAJ;gMqbZPUAAAAJ", "orcid": ";0000-0001-5674-7021;", "linkedin": ";;", "or_profile": "~Bikang_Pan1;~Wei_Huang6;~Ye_Shi1", "aff": "ShanghaiTech University;RIKEN AIP;ShanghaiTech University", "aff_domain": "shanghaitech.edu.cn;riken.jp;shanghaitech.edu.cn", "position": "PhD student;Research Scientist;Assistant Professor", "bibtex": "@inproceedings{\npan2024federated,\ntitle={Federated Learning from Vision-Language Foundation Models: Theoretical Analysis and Method},\nauthor={Bikang Pan and Wei Huang and Ye Shi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Y4L8GQXZZO}\n}", "github": "", "reviewers": "XM5m;YEHg;bKME;BdA2;kpWj", "pdf_size": 895286, "rating": "4;5;5;5;6", "confidence": "5;4;3;4;2", "soundness": "2;2;2;3;3", "novelty": "2;2;4;3;3", "presentation": "2;3;2;3;2", "wc_summary": "46;56;93;79;45", "wc_strengths": "33;17;31;35;45", "wc_weaknesses": "10;133;216;132;215", "wc_questions": "149;9;10;2;2", "wc_limitations": "4;10;55;1;2", "wc_review": "242;225;405;249;309", "wc_reply_reviewers": "0;12;101;16;36", "wc_reply_authors": "266;16;555;18;11", "reply_reviewers": "0;1;1;1;1", "reply_authors": "2;2;3;2;2", "rating_avg": [ 5.0, 0.6324555320336759 ], "confidence_avg": [ 3.6, 1.019803902718557 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 63.8, 19.051509126575773 ], "wc_strengths_avg": [ 32.2, 8.997777503361593 ], "wc_weaknesses_avg": [ 141.2, 75.37479684881413 ], "wc_questions_avg": [ 34.4, 57.39895469431477 ], "wc_limitations_avg": [ 14.4, 20.538743875904387 ], "wc_review_avg": [ 286.0, 65.90295896240168 ], "wc_reply_reviewers_avg": [ 33.0, 35.92213802100315 ], "wc_reply_authors_avg": [ 173.2, 214.23855862099148 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.9302605094190632, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2761931946546588348&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 4, "email": "shanghaitech.edu.cn;riken.jp;shanghaitech.edu.cn", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "ShanghaiTech University;RIKEN", "aff_unique_dep": ";Advanced Institute for Computational Science", "aff_unique_url": "https://www.shanghaitech.edu.cn;https://www.aip.riken.jp", "aff_unique_abbr": "ShanghaiTech;RIKEN AIP", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "China;Japan" }, { "title": "The Importance of Being Scalable: Improving the Speed and Accuracy of Neural Network Interatomic Potentials Across Chemical Domains", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94722", "id": "Y4mBaZu4vy", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Y4mBaZu4vy", "openreview": "https://openreview.net/forum?id=Y4mBaZu4vy", "poster": "", "project": "", "author_site": "Eric Qu, Aditi Krishnapriyan", "tldr": "", "abstract": "Scaling has been a critical factor in improving model performance and generalization across various fields of machine learning.\nIt involves how a model\u2019s performance changes with increases in model size or input data, as well as how efficiently computational resources are utilized to support this growth. \nDespite successes in scaling other types of machine learning models, the study of scaling in Neural Network Interatomic Potentials (NNIPs) remains limited. NNIPs act as surrogate models for ab initio quantum mechanical calculations, predicting the energy and forces between atoms in molecules and materials based on atomic configurations. The dominant paradigm in this field is to incorporate numerous physical domain constraints into the model, such as symmetry constraints like rotational equivariance. We contend that these increasingly complex domain constraints inhibit the scaling ability of NNIPs, and such strategies are likely to cause model performance to plateau in the long run. In this work, we take an alternative approach and start by systematically studying NNIP scaling properties and strategies. Our findings indicate that scaling the model through attention mechanisms is both efficient and improves model expressivity. These insights motivate us to develop an NNIP architecture designed for scalability: the Efficiently Scaled Attention Interatomic Potential (EScAIP). \nEScAIP leverages a novel multi-head self-attention formulation within graph neural networks, applying attention at the neighbor-level representations.\nImplemented with highly-optimized attention GPU kernels, EScAIP achieves substantial gains in efficiency---at least 10x speed up in inference time, 5x less in memory usage---compared to existing NNIP models. EScAIP also achieves state-of-the-art performance on a wide range of datasets including catalysts (OC20 and OC22), molecules (SPICE), and materials (MPTrj).\n After training EScAIP, we test its ability to learn rotational equivariance by predicting forces on new, unseen atomistic systems before and after rotation. The model's force predictions exactly match the rotated forces, suggesting that it has precisely learned rotational equivariance.\nFinally, we emphasize that our approach should be thought of as a philosophy rather than a specific model, representing a proof-of-concept towards developing general-purpose NNIPs that achieve better expressivity through scaling, and continue to scale efficiently with increased computational resources and training data.", "keywords": "Neural Network Interatomic Potentials;Machine Learning Force Fields;Scaling;Graph Neural Networks;Attention", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "", "author": "Eric Qu;Aditi S. Krishnapriyan", "authorids": "~Eric_Qu1;~Aditi_S._Krishnapriyan1", "gender": "M;", "homepage": "https://people.eecs.berkeley.edu/~ericqu/;https://a1k12.github.io", "dblp": "312/6521;256/5472", "google_scholar": "-qOBJlcAAAAJ;7HoFN1wAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Eric_Qu1;~Aditi_Krishnapriyan1", "aff": "University of California, Berkeley;University of California, Berkeley", "aff_domain": "berkeley.edu;berkeley.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nqu2024the,\ntitle={The Importance of Being Scalable: Improving the Speed and Accuracy of Neural Network Interatomic Potentials Across Chemical Domains},\nauthor={Eric Qu and Aditi S. Krishnapriyan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Y4mBaZu4vy}\n}", "github": "", "reviewers": "wbCz;L3rz;PKom;VH7W", "pdf_size": 626846, "rating": "5;5;6;7", "confidence": "5;3;2;4", "soundness": "2;3;3;2", "novelty": "2;2;3;3", "presentation": "3;2;3;3", "wc_summary": "59;64;58;58", "wc_strengths": "57;43;40;47", "wc_weaknesses": "549;305;305;63", "wc_questions": "111;178;2;51", "wc_limitations": "34;2;3;25", "wc_review": "810;592;408;244", "wc_reply_reviewers": "0;40;15;33", "wc_reply_authors": "457;272;5;5", "reply_reviewers": "0;2;1;1", "reply_authors": "1;3;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 59.75, 2.48746859276655 ], "wc_strengths_avg": [ 46.75, 6.417748826496718 ], "wc_weaknesses_avg": [ 305.5, 171.8276753029034 ], "wc_questions_avg": [ 85.5, 65.8957510011078 ], "wc_limitations_avg": [ 16.0, 13.874436925511608 ], "wc_review_avg": [ 513.5, 210.8524365522011 ], "wc_reply_reviewers_avg": [ 22.0, 15.636495771111889 ], "wc_reply_authors_avg": [ 184.75, 191.2803897423884 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.1348399724926484, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1307767737349446069&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 4, "email": "berkeley.edu;berkeley.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "A Simple yet Universal Framework for Depth Completion", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94721", "id": "Y4tHp5Jilp", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Y4tHp5Jilp", "openreview": "https://openreview.net/forum?id=Y4tHp5Jilp", "poster": "", "project": "", "author_site": "Jin-Hwi Park, Hae-Gon Jeon", "tldr": "", "abstract": "Consistent depth estimation across diverse scenes and sensors is a crucial challenge in computer vision, especially when deploying machine learning models in the real world. Traditional methods depend heavily on extensive pixel-wise labeled data, which is costly and labor-intensive to acquire, and frequently have difficulty in scale issues on various depth sensors. In response, we define Universal Depth Completion (UniDC) problem. We also present a baseline architecture, a simple yet effective approach tailored to estimate scene depth across a wide range of sensors and environments using minimal labeled data. \nOur approach addresses two primary challenges: generalizable knowledge of unseen scene configurations and strong adaptation to arbitrary depth sensors with various specifications. To enhance versatility in the wild, we utilize a foundation model for monocular depth estimation that provides a comprehensive understanding of 3D structures in scenes. Additionally, for fast adaptation to off-the-shelf sensors, we generate a pixel-wise affinity map based on the knowledge from the foundation model. We then adjust depth information from arbitrary sensors to the monocular depth along with the constructed affinity. Furthermore, to boost up both the adaptability and generality, we embed the learned features into hyperbolic space, which builds implicit hierarchical structures of 3D data from fewer examples. Extensive experiments demonstrate the proposed method's superior generalization capabilities for UniDC problem over state-of-the-art depth completion. Source code is publicly available at https://github.com/JinhwiPark/UniDC.", "keywords": "Few-shot Depth Completion;hyperbolic representation;foundation model", "primary_area": "machine_vision", "supplementary_material": "", "author": "Jin-Hwi Park;Hae-Gon Jeon", "authorids": "~Jin-Hwi_Park1;~Hae-Gon_Jeon3", "gender": "M;M", "homepage": "https://sites.google.com/site/hgjeoncv/;https://www.jinhwipark.com/", "dblp": "142/2427;317/1123", "google_scholar": "https://scholar.google.co.kr/citations?user=Ei00xroAAAAJ;0B-YoigAAAAJ", "orcid": "0000-0003-1105-1666;", "linkedin": ";", "or_profile": "~Hae-Gon_Jeon3;~Jinhwi_Park1", "aff": "Gwangju Institute of Science and Technology;Gwangju Institute of Science and Technology", "aff_domain": "gist.ac.kr;gist.ac.kr", "position": "Associate Professor;PhD student", "bibtex": "@inproceedings{\npark2024a,\ntitle={A Simple yet Universal Framework for Depth Completion},\nauthor={Jin-Hwi Park and Hae-Gon Jeon},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Y4tHp5Jilp}\n}", "github": "", "reviewers": "tAUP;bXoG;AS7d;g2HF", "pdf_size": 2583201, "rating": "3;5;5;6", "confidence": "5;4;4;5", "soundness": "3;1;3;3", "novelty": "2;2;2;3", "presentation": "3;3;2;4", "wc_summary": "56;109;61;115", "wc_strengths": "58;207;169;73", "wc_weaknesses": "91;365;60;68", "wc_questions": "70;231;14;184", "wc_limitations": "21;23;1;65", "wc_review": "296;935;305;505", "wc_reply_reviewers": "163;436;0;53", "wc_reply_authors": "1115;1868;43;341", "reply_reviewers": "1;2;0;1", "reply_authors": "3;5;2;3", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 85.25, 26.892145693491994 ], "wc_strengths_avg": [ 126.75, 62.930020657870436 ], "wc_weaknesses_avg": [ 146.0, 126.95077786291819 ], "wc_questions_avg": [ 124.75, 86.69306488987455 ], "wc_limitations_avg": [ 27.5, 23.296995514443488 ], "wc_review_avg": [ 510.25, 259.0708156083969 ], "wc_reply_reviewers_avg": [ 163.0, 168.22455231029744 ], "wc_reply_authors_avg": [ 841.75, 710.0363987148828 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 3.25, 1.0897247358851685 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.2294157338705618, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7954788994710780977&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "gist.ac.kr;gist.ac.kr", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Gwangju Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.gist.ac.kr", "aff_unique_abbr": "GIST", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Gwangju", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "title": "Fast Rates in Stochastic Online Convex Optimization by Exploiting the Curvature of Feasible Sets", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94720", "id": "Y58T1MQhh6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Y58T1MQhh6", "openreview": "https://openreview.net/forum?id=Y58T1MQhh6", "poster": "/media/PosterPDFs/NeurIPS%202024/94720.png?t=1733731044.2244554", "project": "", "author_site": "Taira Tsuchiya, Shinji Ito", "tldr": "", "abstract": "In this work, we explore online convex optimization (OCO) and introduce a new condition and analysis that provides fast rates by exploiting the curvature of feasible sets. In online linear optimization, it is known that if the average gradient of loss functions exceeds a certain threshold, the curvature of feasible sets can be exploited by the follow-the-leader (FTL) algorithm to achieve a logarithmic regret. This study reveals that algorithms adaptive to the curvature of loss functions can also leverage the curvature of feasible sets. In particular, we first prove that if an optimal decision is on the boundary of a feasible set and the gradient of an underlying loss function is non-zero, then the algorithm achieves a regret bound of $O(\\rho \\log T)$ in stochastic environments. Here, $\\rho > 0$ is the radius of the smallest sphere that includes the optimal decision and encloses the feasible set. Our approach, unlike existing ones, can work directly with convex loss functions, exploiting the curvature of loss functions simultaneously, and can achieve the logarithmic regret only with a local property of feasible sets. Additionally, the algorithm achieves an $O(\\sqrt{T})$ regret even in adversarial environments, in which FTL suffers an $\\Omega(T)$ regret, and achieves an $O(\\rho \\log T + \\sqrt{C \\rho \\log T})$ regret in corrupted stochastic environments with corruption level $C$. Furthermore, by extending our analysis, we establish a matching regret upper bound of $O\\Big(T^{\\frac{q-2}{2(q-1)}} (\\log T)^{\\frac{q}{2(q-1)}}\\Big)$ for $q$-uniformly convex feasible sets, where uniformly convex sets include strongly convex sets and $\\ell_p$-balls for $p \\in [2,\\infty)$. This bound bridges the gap between the $O(\\log T)$ bound for strongly convex sets~($q=2$) and the $O(\\sqrt{T})$ bound for non-curved sets~($q\\to\\infty$).", "keywords": "online convex optimization;online linear optimization;curvature of feasible sets;strongly convex sets;uniformly convex sets;universal online learning", "primary_area": "online_learning", "supplementary_material": "", "author": "Taira Tsuchiya;Shinji Ito", "authorids": "~Taira_Tsuchiya1;~Shinji_Ito1", "gender": "M;M", "homepage": "https://tsuchhiii.github.io/;https://researchmap.jp/shinji_ito?lang=en", "dblp": "226/5536;49/852", "google_scholar": "https://scholar.google.co.jp/citations?view_op=list_works;https://scholar.google.co.jp/citations?user=GX0V06wAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Taira_Tsuchiya1;~Shinji_Ito1", "aff": "The University of Tokyo;NEC", "aff_domain": "u-tokyo.ac.jp;nec.com", "position": "Assistant Professor;Principal Researcher", "bibtex": "@inproceedings{\ntsuchiya2024fast,\ntitle={Fast Rates in Stochastic Online Convex Optimization by Exploiting the Curvature of Feasible Sets},\nauthor={Taira Tsuchiya and Shinji Ito},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Y58T1MQhh6}\n}", "github": "", "reviewers": "Mo5s;BHA1;kmSZ;b39D", "pdf_size": 224486, "rating": "5;6;6;7", "confidence": "4;3;3;3", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "2;2;3;3", "wc_summary": "95;167;80;53", "wc_strengths": "131;108;17;49", "wc_weaknesses": "533;802;55;30", "wc_questions": "59;214;62;45", "wc_limitations": "9;22;17;1", "wc_review": "827;1313;231;178", "wc_reply_reviewers": "50;23;22;11", "wc_reply_authors": "172;634;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 98.75, 42.180416071916596 ], "wc_strengths_avg": [ 76.25, 45.438832511410325 ], "wc_weaknesses_avg": [ 355.0, 326.7713267714902 ], "wc_questions_avg": [ 95.0, 69.00362309328402 ], "wc_limitations_avg": [ 12.25, 7.980444849756184 ], "wc_review_avg": [ 637.25, 465.9916174138758 ], "wc_reply_reviewers_avg": [ 26.5, 14.361406616345072 ], "wc_reply_authors_avg": [ 201.5, 259.38918635903076 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:pAthiac2GoIJ:scholar.google.com/&scioq=Fast+Rates+in+Stochastic+Online+Convex+Optimization+by+Exploiting+the+Curvature+of+Feasible+Sets&hl=en&as_sdt=0,44", "gs_version_total": 3, "email": "u-tokyo.ac.jp;nec.com", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "University of Tokyo;NEC Corporation", "aff_unique_dep": ";", "aff_unique_url": "https://www.u-tokyo.ac.jp;https://www.nec.com", "aff_unique_abbr": "UTokyo;NEC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Japan" }, { "title": "Harnessing small projectors and multiple views for efficient vision pretraining", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94719", "id": "Y5DPSJzpra", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Y5DPSJzpra", "openreview": "https://openreview.net/forum?id=Y5DPSJzpra", "poster": "", "project": "", "author_site": "Arna Ghosh, Kumar Krishna Agrawal, Shagun Sodhani, Adam Oberman, Blake Richards", "tldr": "", "abstract": "Recent progress in self-supervised (SSL) visual representation learning has led to the development of several different proposed frameworks that rely on augmentations of images but use different loss functions. \nHowever, there are few theoretically grounded principles to guide practice, so practical implementation of each SSL framework requires several heuristics to achieve competitive performance.\nIn this work, we build on recent analytical results to design practical recommendations for competitive and efficient SSL that are grounded in theory. \nSpecifically, recent theory tells us that existing SSL frameworks are actually minimizing the same idealized loss, which is to learn features that best match the data similarity kernel defined by the augmentations used.\nWe show how this idealized loss can be reformulated to a functionally equivalent loss that is more efficient to compute.\nWe study the implicit bias of using gradient descent to minimize our reformulated loss function, and find that using a stronger orthogonalization constraint with a reduced projector dimensionality should yield good representations.\nFurthermore, the theory tells us that approximating the reformulated loss should be improved by increasing the number of augmentations, and as such using multiple augmentations should lead to improved convergence.\nWe empirically verify our findings on CIFAR, STL and Imagenet datasets, wherein we demonstrate an improved linear readout performance when training a ResNet-backbone using our theoretically grounded recommendations. \nRemarkably, we also demonstrate that by leveraging these insights, we can reduce the pretraining dataset size by up to 2$\\times$ while maintaining downstream accuracy simply by using more data augmentations. \nTaken together, our work provides theoretically grounded recommendations that can be used to improve SSL convergence and efficiency.", "keywords": "representation learning;self-supervised learning;data-augmentation;learning dynamics;sample efficient SSL;compute efficient SSL", "primary_area": "machine_vision", "supplementary_material": "/attachment/000748349a2190443b015622ecd4aba335d82112.zip", "author": "Arna Ghosh;Kumar Krishna Agrawal;Shagun Sodhani;Adam Oberman;Blake Aaron Richards", "authorids": "~Arna_Ghosh1;~Kumar_Krishna_Agrawal1;~Shagun_Sodhani1;~Adam_Oberman1;~Blake_Aaron_Richards1", "gender": "M;;M;M;M", "homepage": "https://arnaghosh.github.io/;https://kumarkrishna.github.io/;https://shagunsodhani.com;http://linclab.org;https://www.adamoberman.net/", "dblp": "190/7223;190/7111;http://dblp.uni-trier.de/pers/hd/s/Sodhani:Shagun;70/10850;31/8186", "google_scholar": "https://scholar.google.ca/citations?user=YjS546oAAAAJ;https://scholar.google.co.in/citations?user=Wd8_fOcAAAAJ;ixp-vqMAAAAJ;https://scholar.google.ca/citations?user=1CPY1LsAAAAJ;https://scholar.google.ca/citations?user=LPAZlL8AAAAJ", "orcid": ";;;0000-0001-9662-2151;", "linkedin": ";;shagun-sodhani-b2239879;;adam-oberman-527348107/", "or_profile": "~Arna_Ghosh1;~Kumar_Krishna_Agrawal1;~Shagun_Sodhani1;~Blake_Aaron_Richards1;~Adam_M_Oberman1", "aff": "McGill University;University of California Berkeley, USA;Meta Facebook;Mila - Quebec Artificial Intelligence Institute;McGill University", "aff_domain": "mcgill.ca;berkeley.edu;fb.com;mila.quebec;mcgill.ca", "position": "PhD student;PhD student;Researcher;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nghosh2024harnessing,\ntitle={Harnessing small projectors and multiple views for efficient vision pretraining},\nauthor={Arna Ghosh and Kumar Krishna Agrawal and Shagun Sodhani and Adam Oberman and Blake Aaron Richards},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Y5DPSJzpra}\n}", "github": "", "reviewers": "rDTK;VDXd;RyLx;EgLh", "pdf_size": 8794720, "rating": "5;6;6;8", "confidence": "4;2;3;3", "soundness": "3;2;2;4", "novelty": "2;3;2;4", "presentation": "3;1;3;4", "wc_summary": "67;127;106;80", "wc_strengths": "49;138;68;130", "wc_weaknesses": "211;202;126;97", "wc_questions": "5;50;193;44", "wc_limitations": "82;5;33;5", "wc_review": "414;522;526;356", "wc_reply_reviewers": "48;26;304;55", "wc_reply_authors": "692;81;240;124", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 95.0, 23.205602771744587 ], "wc_strengths_avg": [ 96.25, 38.447204059593204 ], "wc_weaknesses_avg": [ 159.0, 48.69804924224378 ], "wc_questions_avg": [ 73.0, 71.40378141247143 ], "wc_limitations_avg": [ 31.25, 31.45135132232 ], "wc_review_avg": [ 454.5, 72.47585804942223 ], "wc_reply_reviewers_avg": [ 108.25, 113.52174901753408 ], "wc_reply_authors_avg": [ 284.25, 242.49162356667085 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.3244428422615251, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:YxQtUhT-KQoJ:scholar.google.com/&scioq=Harnessing+small+projectors+and+multiple+views+for+efficient+vision+pretraining&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": "mcgill.ca;berkeley.edu;fb.com;mila.quebec;mcgill.ca", "author_num": 5, "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "McGill University;University of California, Berkeley;Meta;Quebec Artificial Intelligence Institute", "aff_unique_dep": ";;Meta Platforms, Inc.;Artificial Intelligence", "aff_unique_url": "https://www.mcgill.ca;https://www.berkeley.edu;https://meta.com;https://mila.quebec", "aff_unique_abbr": "McGill;UC Berkeley;Meta;Mila", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;1;1;0;0", "aff_country_unique": "Canada;United States" }, { "title": "SampDetox: Black-box Backdoor Defense via Perturbation-based Sample Detoxification", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94718", "id": "Y6RV6z98Pk", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Y6RV6z98Pk", "openreview": "https://openreview.net/forum?id=Y6RV6z98Pk", "poster": "/media/PosterPDFs/NeurIPS%202024/94718.png?t=1731461987.5090723", "project": "", "author_site": "Yanxin Yang, Chentao Jia, DengKe Yan, Ming Hu, Tianlin Li, Xiaofei Xie, Xian Wei, Mingsong Chen", "tldr": "", "abstract": "The advancement of Machine Learning has enabled the widespread deployment of Machine Learning as a Service (MLaaS) applications. However, the untrustworthy nature of third-party ML services poses backdoor threats. Existing defenses in MLaaS are limited by their reliance on training samples or white-box model analysis, highlighting the need for a black-box backdoor purification method. In our paper, we attempt to use diffusion models for purification by introducing noise in a forward diffusion process to destroy backdoors and recover clean samples through a reverse generative process. However, since a higher noise also destroys the semantics of the original samples, it still results in a low restoration performance. To investigate the effectiveness of noise in eliminating different types of backdoors, we conducted a preliminary study, which demonstrates that backdoors with low visibility can be easily destroyed by lightweight noise and those with high visibility need to be destroyed by high noise but can be easily detected. Based on the study, we propose SampDetox, which strategically combines lightweight and high noise. SampDetox applies weak noise to eliminate low-visibility backdoors and compares the structural similarity between the recovered and original samples to localize high-visibility backdoors. Intensive noise is then applied to these localized areas, destroying the high-visibility backdoors while preserving global semantic information. As a result, detoxified samples can be used for inference, even by poisoned models. Comprehensive experiments demonstrate the effectiveness of SampDetox in defending against various state-of-the-art backdoor attacks.", "keywords": "Backdoor defense;black-box", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/7a6ec60e2d9524ed1944a1793e08ae478d8ae9fa.zip", "author": "Yanxin Yang;Chentao Jia;DengKe Yan;Ming Hu;Tianlin Li;Xiaofei Xie;Xian Wei;Mingsong Chen", "authorids": "~Yanxin_Yang2;~Chentao_Jia1;~DengKe_Yan1;~Ming_Hu2;~Tianlin_Li2;~Xiaofei_Xie2;~Xian_Wei1;~Mingsong_Chen1", "gender": "M;M;M;M;;M;M;M", "homepage": "https://github.com/easywood0204;https://kukoray.com;https://github.com/BrokenTheirCoder;https://humingprofile.github.io/;;http://xiaofeixie.bitbucket.io/;https://www.researchgate.net/;https://faculty.ecnu.edu.cn/_s43/cms/main.psp", "dblp": "116/7287;;;82/378-3;137/8830;127/0713;139/0725;95/573.html", "google_scholar": ";i_WEJQQAAAAJ;;ZTTvNb0AAAAJ;XB6CydwAAAAJ;FfcZfJgAAAAJ;https://scholar.google.com/citations?hl=en;93A6b7YAAAAJ", "orcid": "0009-0006-3607-9023;0009-0003-8480-999X;;0000-0002-5058-4660;;0000-0002-1288-6502;;0000-0002-3922-0989", "linkedin": ";;;;;;;", "or_profile": "~Yanxin_Yang2;~Chentao_Jia1;~DengKe_Yan1;~Ming_Hu2;~Tianlin_Li2;~Xiaofei_Xie2;~Xian_Wei1;~Mingsong_Chen1", "aff": "East China Normal University;East China Normal University;;Nanyang Technological University;Nanyang Technological University;Singapore Management University;East China Normal University;East China Normal University", "aff_domain": "stu.ecnu.edu.cn;stu.ecnu.edu.cn;;ntu.edu.sg;ntu.edu.sg;smu.edu.sg;ecnu.edu.cn;ecnu.edu.cn", "position": "MS student;MS student;;Postdoc;PhD student;Assistant Professor;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nyang2024sampdetox,\ntitle={SampDetox: Black-box Backdoor Defense via Perturbation-based Sample Detoxification},\nauthor={Yanxin Yang and Chentao Jia and DengKe Yan and Ming Hu and Tianlin Li and Xiaofei Xie and Xian Wei and Mingsong Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Y6RV6z98Pk}\n}", "github": "", "reviewers": "i7qR;2sKD;gGGP;ok4k;aKFd", "pdf_size": 746448, "rating": "6;6;6;6;7", "confidence": "4;4;3;4;4", "soundness": "3;3;3;4;3", "novelty": "3;3;3;3;3", "presentation": "3;3;3;3;3", "wc_summary": "136;45;34;107;75", "wc_strengths": "112;75;24;34;62", "wc_weaknesses": "198;149;209;22;77", "wc_questions": "231;83;11;213;96", "wc_limitations": "78;56;17;6;7", "wc_review": "755;408;295;382;317", "wc_reply_reviewers": "38;34;33;301;20", "wc_reply_authors": "39;42;45;445;43", "reply_reviewers": "1;1;1;2;1", "reply_authors": "2;2;2;3;2", "rating_avg": [ 6.2, 0.39999999999999997 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 79.4, 38.02420281873112 ], "wc_strengths_avg": [ 61.4, 31.289614890567126 ], "wc_weaknesses_avg": [ 131.0, 71.68542390193421 ], "wc_questions_avg": [ 126.8, 83.14421206554309 ], "wc_limitations_avg": [ 32.8, 29.03377343715418 ], "wc_review_avg": [ 431.4, 166.97137479220802 ], "wc_reply_reviewers_avg": [ 85.2, 108.06923706587366 ], "wc_reply_authors_avg": [ 122.8, 161.11163831331368 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.25000000000000006, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7321589031434766826&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "stu.ecnu.edu.cn;stu.ecnu.edu.cn;;ntu.edu.sg;ntu.edu.sg;smu.edu.sg;ecnu.edu.cn;ecnu.edu.cn", "author_num": 8, "aff_unique_index": "0;0;1;1;2;0;0", "aff_unique_norm": "East China Normal University;Nanyang Technological University;Singapore Management University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.ecnu.edu.cn;https://www.ntu.edu.sg;https://www.smu.edu.sg", "aff_unique_abbr": "ECNU;NTU;SMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1;1;0;0", "aff_country_unique": "China;Singapore" }, { "title": "Reprogramming Pretrained Target-Specific Diffusion Models for Dual-Target Drug Design", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94717", "id": "Y79L45D5ts", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Y79L45D5ts", "openreview": "https://openreview.net/forum?id=Y79L45D5ts", "poster": "/media/PosterPDFs/NeurIPS%202024/94717.png?t=1731604079.2654781", "project": "", "author_site": "Xiangxin Zhou, Jiaqi Guan, Yijia Zhang, Xingang Peng, Liang Wang, Jianzhu Ma", "tldr": "", "abstract": "Dual-target therapeutic strategies have become a compelling approach and attracted significant attention due to various benefits, such as their potential in overcoming drug resistance in cancer therapy. Considering the tremendous success that deep generative models have achieved in structure-based drug design in recent years, we formulate dual-target drug design as a generative task and curate a novel dataset of potential target pairs based on synergistic drug combinations. We propose to design dual-target drugs with diffusion models that are trained on single-target protein-ligand complex pairs. Specifically, we align two pockets in 3D space with protein-ligand binding priors and build two complex graphs with shared ligand nodes for SE(3)-equivariant composed message passing, based on which we derive a composed drift in both 3D and categorical probability space in the generative process. Our algorithm can well transfer the knowledge gained in single-target pretraining to dual-target scenarios in a zero-shot manner. We also repurpose linker design methods as strong baselines for this task. Extensive experiments demonstrate the effectiveness of our method compared with various baselines.", "keywords": "dual-target drug design;drug discovery;diffusion model;structure-based drug design", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Xiangxin Zhou;Jiaqi Guan;Yijia Zhang;Xingang Peng;Liang Wang;Jianzhu Ma", "authorids": "~Xiangxin_Zhou1;~Jiaqi_Guan1;~Yijia_Zhang7;~Xingang_Peng1;~Liang_Wang3;~Jianzhu_Ma2", "gender": "Not Specified;M;M;;M;M", "homepage": ";http://jiaqi.web.illinois.edu/;https://github.com/BLUE-Flowing;https://github.com/pengxingang;;https://majianzhu.com/", "dblp": "247/9275;207/7593;;223/2200;56/4499-1;24/9080.html", "google_scholar": "eQgIWcQAAAAJ;On-ONT4AAAAJ;;6yMuAlgAAAAJ;;", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Xiangxin_Zhou1;~Jiaqi_Guan1;~Yijia_Zhang7;~Xingang_Peng1;~Liang_Wang3;~Jianzhu_Ma2", "aff": "Institute of Automation, Chinese Academy of Sciences;University of Illinois, Urbana Champaign;Tsinghua University;Peking University;Institute of Automation\uff0c CAS\uff0cChina;Tsinghua University", "aff_domain": "ia.ac.cn;illinois.edu;mail.tsinghua.edu.cn;pku.edu.cn;ia.ac.cn;tsinghua.edu.cn", "position": "PhD student;PhD student;Undergrad student;PhD student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nzhou2024reprogramming,\ntitle={Reprogramming Pretrained Target-Specific Diffusion Models for Dual-Target Drug Design},\nauthor={Xiangxin Zhou and Jiaqi Guan and Yijia Zhang and Xingang Peng and Liang Wang and Jianzhu Ma},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Y79L45D5ts}\n}", "github": "", "reviewers": "em5U;7yyK;aLWE;xVLn", "pdf_size": 1883089, "rating": "4;5;7;7", "confidence": "2;4;2;4", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "2;3;3;3", "wc_summary": "95;122;46;162", "wc_strengths": "113;125;40;44", "wc_weaknesses": "43;118;11;34", "wc_questions": "50;37;1;30", "wc_limitations": "40;88;12;10", "wc_review": "341;490;110;280", "wc_reply_reviewers": "0;67;22;177", "wc_reply_authors": "594;1283;110;490", "reply_reviewers": "0;1;1;3", "reply_authors": "5;6;3;2", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 3.0, 1.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 106.25, 42.16856056352884 ], "wc_strengths_avg": [ 80.5, 38.75886995256699 ], "wc_weaknesses_avg": [ 51.5, 40.12792045446661 ], "wc_questions_avg": [ 29.5, 17.95132307101624 ], "wc_limitations_avg": [ 37.5, 31.476181471074284 ], "wc_review_avg": [ 305.25, 136.17153704060183 ], "wc_reply_reviewers_avg": [ 66.5, 68.21473447870335 ], "wc_reply_authors_avg": [ 619.25, 423.45092690889226 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 4.0, 1.5811388300841898 ], "replies_avg": [ 31, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.19245008972987526, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4840993593430618591&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "ia.ac.cn;illinois.edu;mail.tsinghua.edu.cn;pku.edu.cn;ia.ac.cn;tsinghua.edu.cn", "author_num": 6, "aff_unique_index": "0;1;2;3;0;2", "aff_unique_norm": "Chinese Academy of Sciences;University of Illinois Urbana-Champaign;Tsinghua University;Peking University", "aff_unique_dep": "Institute of Automation;;;", "aff_unique_url": "http://www.ia.cas.cn;https://illinois.edu;https://www.tsinghua.edu.cn;http://www.pku.edu.cn", "aff_unique_abbr": "CAS;UIUC;THU;Peking U", "aff_campus_unique_index": "1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "0;1;0;0;0;0", "aff_country_unique": "China;United States" }, { "title": "Interactive Deep Clustering via Value Mining", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94716", "id": "Y7HPB7pL1f", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Y7HPB7pL1f", "openreview": "https://openreview.net/forum?id=Y7HPB7pL1f", "poster": "/media/PosterPDFs/NeurIPS%202024/94716.png?t=1731637513.062377", "project": "", "author_site": "Honglin Liu, Peng Hu, Changqing Zhang, Yunfan Li, Xi Peng", "tldr": "", "abstract": "In the absence of class priors, recent deep clustering methods resort to data augmentation and pseudo-labeling strategies to generate supervision signals. Though achieved remarkable success, existing works struggle to discriminate hard samples at cluster boundaries, mining which is particularly challenging due to their unreliable cluster assignments. To break such a performance bottleneck, we propose incorporating user interaction to facilitate clustering instead of exhaustively mining semantics from the data itself. To be exact, we present Interactive Deep Clustering (IDC), a plug-and-play method designed to boost the performance of pre-trained clustering models with minimal interaction overhead. More specifically, IDC first quantitatively evaluates sample values based on hardness, representativeness, and diversity, where the representativeness avoids selecting outliers and the diversity prevents the selected samples from collapsing into a small number of clusters. IDC then queries the cluster affiliations of high-value samples in a user-friendly manner. Finally, it utilizes the user feedback to finetune the pre-trained clustering model. Extensive experiments demonstrate that IDC could remarkably improve the performance of various pre-trained clustering models, at the expense of low user interaction costs. The code could be accessed at pengxi.me.", "keywords": "Deep Clustering; Interactive Clustering", "primary_area": "human-AI_interaction", "supplementary_material": "/attachment/cbaf8bfebf81301518c57b03b34cf2d052620df9.zip", "author": "Honglin Liu;Peng Hu;Changqing Zhang;Yunfan Li;Xi Peng", "authorids": "~Honglin_Liu3;~Peng_Hu2;~Changqing_Zhang1;~Yunfan_Li1;~Xi_Peng3", "gender": "M;M;M;M;M", "homepage": "https://scholar.google.com.sg/citations?hl=en&user=EuywWL4AAAAJ;https://penghu-cs.github.io/;http://cic.tju.edu.cn/faculty/zhangchangqing/index.html;https://yunfan-li.github.io/;http://www.pengxi.me", "dblp": ";11/6278-2;78/2668;80/1874-3;18/931-1", "google_scholar": "https://scholar.google.com.sg/citations?hl=en;gvESkwYAAAAJ;yJGhdykAAAAJ;JmXIt5oAAAAJ;bw9FOHAAAAAJ", "orcid": ";0000-0003-3868-3997;;;", "linkedin": ";;;;", "or_profile": "~Honglin_Liu3;~Peng_Hu2;~Changqing_Zhang1;~Yunfan_Li1;~Xi_Peng2", "aff": "Sichuan University;Sichuan University;Tianjin University;Sichuan University;Sichuan University", "aff_domain": "scu.edu.cn;scu.edu.cn;tju.edu.cn;scu.edu.cn;scu.edu.cn", "position": "PhD student;Associate Professor;Associate Professor;PhD student;Full Professor", "bibtex": "@inproceedings{\nliu2024interactive,\ntitle={Interactive Deep Clustering via Value Mining},\nauthor={Honglin Liu and Peng Hu and Changqing Zhang and Yunfan Li and Xi Peng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Y7HPB7pL1f}\n}", "github": "", "reviewers": "2n5n;N8tX;uVM4;MjhJ", "pdf_size": 3309023, "rating": "6;7;7;7", "confidence": "4;4;5;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "2;4;3;4", "wc_summary": "194;37;56;54", "wc_strengths": "61;76;84;81", "wc_weaknesses": "414;112;112;71", "wc_questions": "44;2;6;124", "wc_limitations": "102;34;27;5", "wc_review": "815;261;285;335", "wc_reply_reviewers": "31;16;9;6", "wc_reply_authors": "69;31;18;24", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 85.25, 63.21936016759423 ], "wc_strengths_avg": [ 75.5, 8.845903006477066 ], "wc_weaknesses_avg": [ 177.25, 137.7087052440767 ], "wc_questions_avg": [ 44.0, 49.01020301937138 ], "wc_limitations_avg": [ 42.0, 36.2560339805666 ], "wc_review_avg": [ 424.0, 227.31695933211847 ], "wc_reply_reviewers_avg": [ 15.5, 9.656603957913983 ], "wc_reply_authors_avg": [ 35.5, 19.880895352071043 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:lMYrFBWUgwgJ:scholar.google.com/&scioq=Interactive+Deep+Clustering+via+Value+Mining&hl=en&as_sdt=0,44", "gs_version_total": 4, "email": "scu.edu.cn;scu.edu.cn;tju.edu.cn;scu.edu.cn;scu.edu.cn", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Sichuan University;Tianjin University", "aff_unique_dep": ";", "aff_unique_url": "https://www.scu.edu.cn;http://www.tju.edu.cn", "aff_unique_abbr": "SCU;TJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "AgentPoison: Red-teaming LLM Agents via Poisoning Memory or Knowledge Bases", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94715", "id": "Y841BRW9rY", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Y841BRW9rY", "openreview": "https://openreview.net/forum?id=Y841BRW9rY", "poster": "/media/PosterPDFs/NeurIPS%202024/94715.png?t=1733787481.2283728", "project": "", "author_site": "Zhaorun Chen, Zhen Xiang, Chaowei Xiao, Dawn Song, Bo Li", "tldr": "", "abstract": "LLM agents have demonstrated remarkable performance across various applications, primarily due to their advanced capabilities in reasoning, utilizing external knowledge and tools, calling APIs, and executing actions to interact with environments. Current agents typically utilize a memory module or a retrieval-augmented generation (RAG) mechanism, retrieving past knowledge and instances with similar embeddings from knowledge bases to inform task planning and execution. However, the reliance on unverified knowledge bases raises significant concerns about their safety and trustworthiness. To uncover such vulnerabilities, we propose a novel red teaming approach AgentPoison, the first backdoor attack targeting generic and RAG-based LLM agents by poisoning their long-term memory or\nRAG knowledge base. In particular, we form the trigger generation process as a constrained optimization to optimize backdoor triggers by mapping the triggered instances to a unique embedding space, so as to ensure that whenever a user instruction contains the optimized backdoor trigger, the malicious demonstrations are retrieved from the poisoned memory or knowledge base with high probability. In the meantime, benign instructions without the trigger will still maintain normal performance. Unlike conventional backdoor attacks, AgentPoison requires no additional model training or fine-tuning, and the optimized backdoor trigger exhibits superior transferability, resilience, and stealthiness. Extensive experiments demonstrate AgentPoison's effectiveness in attacking\nthree types of real-world LLM agents: RAG-based autonomous driving agent, knowledge-intensive QA agent, and healthcare EHRAgent. We inject the poisoning instances into the RAG knowledge base and long-term memories of these agents, respectively, demonstrating the generalization of AgentPoison. On each agent, AgentPoison achieves an average attack success rate of $\\ge$ 80% with minimal\nimpact on benign performance ($\\le$ 1%) with a poison rate < 0.1%. The code and data is available at https://github.com/BillChan226/AgentPoison.", "keywords": "LLM Agent;LLM Red-teaming;Retrieval-Augmented Generation;Backdoor Poisoning;Trustworthy LLM", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Zhaorun Chen;Zhen Xiang;Chaowei Xiao;Dawn Song;Bo Li", "authorids": "~Zhaorun_Chen1;~Zhen_Xiang1;~Chaowei_Xiao2;~Dawn_Song1;~Bo_Li19", "gender": "M;M;F;F;M", "homepage": "https://billchan226.github.io/;https://zhenxianglance.github.io/;;http://boli.cs.illinois.edu/;https://xiaocw11.github.io/", "dblp": "302/1064;20/2799.html;s/DXSong;50/3402-26;150/3317", "google_scholar": "UZg5N5UAAAAJ;https://scholar.google.com/citations?hl=en;;K8vJkTcAAAAJ;Juoqtj8AAAAJ", "orcid": "0000-0002-2668-6587;;;;0000-0002-7043-4926", "linkedin": "zhaorun-chen-1793b6226/;;;;", "or_profile": "~Zhaorun_Chen1;~Zhen_Xiang1;~Dawn_Song1;~Bo_Li19;~chaowei_xiao1", "aff": "University of Chicago;University of Illinois, Urbana Champaign;University of California, Berkeley;University of Illinois, Urbana Champaign;NVIDIA", "aff_domain": "uchicago.edu;illinois.edu;berkeley.edu;illinois.edu;nvidia.com", "position": "PhD student;Postdoc;Full Professor;Assistant Professor;Researcher", "bibtex": "@inproceedings{\nchen2024agentpoison,\ntitle={AgentPoison: Red-teaming {LLM} Agents via Poisoning Memory or Knowledge Bases},\nauthor={Zhaorun Chen and Zhen Xiang and Chaowei Xiao and Dawn Song and Bo Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Y841BRW9rY}\n}", "github": "", "reviewers": "XDTd;dD1S;jtC2;2UC7;UuDZ", "pdf_size": 4114531, "rating": "3;4;5;6;8", "confidence": "4;4;1;4;4", "soundness": "2;2;3;3;4", "novelty": "2;2;3;3;4", "presentation": "3;2;2;3;4", "wc_summary": "56;101;83;100;132", "wc_strengths": "13;40;58;137;131", "wc_weaknesses": "507;28;41;108;10", "wc_questions": "6;119;2;83;45", "wc_limitations": "1;4;1;75;11", "wc_review": "583;292;185;503;329", "wc_reply_reviewers": "172;0;43;0;20", "wc_reply_authors": "596;92;0;0;0", "reply_reviewers": "2;0;1;0;1", "reply_authors": "4;2;1;1;1", "rating_avg": [ 5.2, 1.7204650534085253 ], "confidence_avg": [ 3.4, 1.2000000000000002 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 94.4, 24.87247474619283 ], "wc_strengths_avg": [ 75.8, 49.66850108469149 ], "wc_weaknesses_avg": [ 138.8, 187.05656898382372 ], "wc_questions_avg": [ 51.0, 44.96665431183423 ], "wc_limitations_avg": [ 18.4, 28.534890923218892 ], "wc_review_avg": [ 378.4, 144.70328261653225 ], "wc_reply_reviewers_avg": [ 47.0, 64.47945409198189 ], "wc_reply_authors_avg": [ 137.6, 231.95309870747576 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.8, 1.1661903789690604 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.05812381937190965, "gs_citation": 47, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15586707710895528929&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "uchicago.edu;illinois.edu;berkeley.edu;illinois.edu;nvidia.com", "author_num": 5, "aff_unique_index": "0;1;2;1;3", "aff_unique_norm": "University of Chicago;University of Illinois Urbana-Champaign;University of California, Berkeley;NVIDIA", "aff_unique_dep": ";;;NVIDIA Corporation", "aff_unique_url": "https://www.uchicago.edu;https://illinois.edu;https://www.berkeley.edu;https://www.nvidia.com", "aff_unique_abbr": "UChicago;UIUC;UC Berkeley;NVIDIA", "aff_campus_unique_index": "1;2;1", "aff_campus_unique": ";Urbana-Champaign;Berkeley", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "MetaLA: Unified Optimal Linear Approximation to Softmax Attention Map", "status": "Oral", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94714", "id": "Y8YVCOMEpz", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Y8YVCOMEpz", "openreview": "https://openreview.net/forum?id=Y8YVCOMEpz", "poster": "", "project": "", "author_site": "YUHONG CHOU, Man Yao, Kexin Wang, Yuqi Pan, Rui-Jie Zhu, Jibin Wu, Yiran Zhong, Yu Qiao, Bo Xu, Guoqi Li", "tldr": "", "abstract": "Various linear complexity models, such as Linear Transformer (LinFormer), State Space Model (SSM), and Linear RNN (LinRNN), have been proposed to replace the conventional softmax attention in Transformer structures. However, the optimal design of these linear models is still an open question. In this work, we attempt to answer this question by finding the best linear approximation to softmax attention from a theoretical perspective. We start by unifying existing linear complexity models as the linear attention form and then identify three conditions for the optimal linear attention design: (1) Dynamic memory ability; (2) Static approximation ability; (3) Least parameter approximation. We find that none of the current linear models meet all three conditions, resulting in suboptimal performance. Instead, we propose Meta Linear Attention (MetaLA) as a solution that satisfies these conditions. Our experiments on Multi-Query Associative Recall (MQAR) task, language modeling, image classification, and Long-Range Arena (LRA) benchmark demonstrate that MetaLA is more effective than the existing linear models.", "keywords": "Self-attention;Linear attention;Efficient;Softmax attention;Foundation Model;Language Model", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Yuhong Chou;Man Yao;Kexin Wang;Yuqi Pan;Rui-Jie Zhu;Jibin Wu;Yiran Zhong;Yu Qiao;Bo XU;Guoqi Li", "authorids": "~Yuhong_Chou1;~Man_Yao1;~Kexin_Wang2;~Yuqi_Pan2;~Rui-Jie_Zhu2;~Jibin_Wu1;~Yiran_Zhong1;~Yu_Qiao1;~Bo_XU10;~Guoqi_Li1", "gender": "M;M;F;M;M;M;M;;M;M", "homepage": "https://openreview.net/;;;https://github.com/Pan-Yuqi;https://ruijie-zhu.github.io;https://www.jibinwu.com/;;;;https://scholar.google.com/citations?hl=en&user=qCfE--MAAAAJ", "dblp": "347/9986;21/5932;;;317/4836;228/1824;158/9624;;;", "google_scholar": "8CpWM4cAAAAJ;eE4vvp0AAAAJ;;;08ITzJsAAAAJ;https://scholar.google.com.sg/citations?user=QwDyvrgAAAAJ;https://scholar.google.com.sg/citations?user=E9NVOBUAAAAJ;;;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0003-2526-7935;;;;;;;", "linkedin": ";;;;;;;;%E6%B3%A2-%E5%BE%90-74210b115/?midToken=AQH1EMB1ZoboJA&midSig=2Q5MzMXmNEH9M1&trk=eml-email_pymk_02-header-22-profile&trkEmail=eml-email_pymk_02-header-22-profile-null-7ydrhe~kpggjoav~k9-null-neptune/profile~vanity.view;", "or_profile": "~Yuhong_Chou1;~Man_Yao1;~Kexin_Wang2;~Yuqi_Pan2;~Rui-Jie_Zhu2;~Jibin_Wu1;~Yiran_Zhong1;~Yu_Qiao1;~Bo_XU10;~Guoqi_Li1", "aff": "The Hong Kong Polytechnic University;Institute of automation, Chinese academy of sciences;Institute of Automation, Chinese Academy of Sciences;Nanjing University;University of California, Santa Cruz;Hong Kong Polytechnic University;Shanghai AI Lab;;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences", "aff_domain": "connect.polyu.hk;ia.ac.cn;ia.ac.cn;nju.edu.cn;ucsc.edu;polyu.edu.hk;pjlab.org.cn;;ia.ac.cn;ia.ac.cn", "position": "PhD student;Assistant Professor;PhD student;Undergrad student;PhD student;Assistant Professor;PI;;Full Professor;Full Professor", "bibtex": "@inproceedings{\nchou2024metala,\ntitle={Meta{LA}: Unified Optimal Linear Approximation to Softmax Attention Map},\nauthor={Yuhong Chou and Man Yao and Kexin Wang and Yuqi Pan and Rui-Jie Zhu and Jibin Wu and Yiran Zhong and Yu Qiao and Bo XU and Guoqi Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Y8YVCOMEpz}\n}", "github": "", "reviewers": "LW2x;2RWL;rbUC;XAnv", "pdf_size": 1142905, "rating": "6;7;7;8", "confidence": "4;2;3;4", "soundness": "2;3;4;3", "novelty": "2;3;4;3", "presentation": "1;4;3;3", "wc_summary": "41;83;134;95", "wc_strengths": "65;14;105;56", "wc_weaknesses": "318;62;134;144", "wc_questions": "18;53;158;105", "wc_limitations": "1;12;11;1", "wc_review": "443;224;542;401", "wc_reply_reviewers": "68;291;51;55", "wc_reply_authors": "76;129;56;46", "reply_reviewers": "1;2;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 88.25, 33.16153645415122 ], "wc_strengths_avg": [ 60.0, 32.334192428449484 ], "wc_weaknesses_avg": [ 164.5, 94.0996811896831 ], "wc_questions_avg": [ 83.5, 52.99292405595298 ], "wc_limitations_avg": [ 6.25, 5.261891294962297 ], "wc_review_avg": [ 402.5, 115.070630484064 ], "wc_reply_reviewers_avg": [ 116.25, 101.08752395820169 ], "wc_reply_authors_avg": [ 76.75, 32.041964671349355 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4842825095402679246&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "connect.polyu.hk;ia.ac.cn;ia.ac.cn;nju.edu.cn;ucsc.edu;polyu.edu.hk;pjlab.org.cn;;ia.ac.cn;ia.ac.cn", "author_num": 10, "aff_unique_index": "0;1;1;2;3;0;4;1;1", "aff_unique_norm": "Hong Kong Polytechnic University;Chinese Academy of Sciences;Nanjing University;University of California, Santa Cruz;Shanghai AI Lab", "aff_unique_dep": ";Institute of Automation;;;", "aff_unique_url": "https://www.polyu.edu.hk;http://www.ia.cas.cn;https://www.nju.edu.cn;https://www.ucsc.edu;https://www.shanghaiailab.com", "aff_unique_abbr": "PolyU;CAS;Nanjing U;UCSC;SAIL", "aff_campus_unique_index": "0;2;0", "aff_campus_unique": "Hong Kong SAR;;Santa Cruz", "aff_country_unique_index": "0;0;0;0;1;0;0;0;0", "aff_country_unique": "China;United States" }, { "id": "Y9aRCjuhnV", "title": "EduQate: Generating Adaptive Curricula through RMABs in Education Settings", "track": "main", "status": "Reject", "tldr": "", "abstract": "There has been significant interest in the development of personalized and adaptive educational tools that cater to a student's individual learning progress. A crucial aspect in developing such tools is in exploring how mastery can be achieved across a diverse yet related range of content in an efficient manner. While Reinforcement Learning and Multi-armed Bandits have shown promise in educational settings, existing works often assume the independence of learning content, neglecting the prevalent interdependencies between such content. In response, we introduce Education Network Restless Multi-armed Bandits (EdNetRMABs), utilizing a network to represent the relationships between interdependent arms. Subsequently, we propose EduQate, a method employing interdependency-aware Q-learning to make informed decisions on arm selection at each time step. We establish the optimality guarantee of EduQate and demonstrate its efficacy compared to baseline policies, using students modeled from both synthetic and real-world data.", "keywords": "Machine Learning in Education;Restless Multi-armed bandits;Adaptive Curricula", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "/attachment/fa5fb8d1895a0af462414e8f9d7d81a8a1b27ccf.zip", "author": "Sidney Tio;Dexun Li;Pradeep Varakantham", "authorids": "~Sidney_Tio1;~Dexun_Li1;~Pradeep_Varakantham1", "gender": "M;M;M", "homepage": ";;http://www.mysmu.edu.sg/faculty/pradeepv", "dblp": ";https://dblp.uni-trier.de/pid/130/1878.html;72/759", "google_scholar": "oFsUUAQAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.sg/citations?user=BAdQpFkAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Sidney_Tio1;~Dexun_Li1;~Pradeep_Varakantham1", "aff": "Singapore Management University;Singapore Management University;Singapore Management University", "aff_domain": "smu.edu.sg;smu.edu.sg;smu.edu.sg", "position": "PhD student;PhD student;Full Professor", "bibtex": "@misc{\nanonymous2024eduqate,\ntitle={EduQate: Generating Adaptive Curricula through {RMAB}s in Education Settings},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=Y9aRCjuhnV}\n}", "github": "", "project": "", "reviewers": "MTkZ;KG3r;fqUb;Yn9L;4CnK", "site": "https://openreview.net/forum?id=Y9aRCjuhnV", "pdf_size": 995307, "rating": "3;3;4;8;8", "confidence": "3;4;4;3;4", "soundness": "2;2;3;3;4", "novelty": "2;2;2;3;3", "presentation": "3;2;3;4;4", "wc_summary": "59;76;85;228;49", "wc_strengths": "55;92;206;155;194", "wc_weaknesses": "104;177;431;80;13", "wc_questions": "1;51;194;31;300", "wc_limitations": "1;60;45;74;7", "wc_review": "220;456;961;568;563", "wc_reply_reviewers": "0;0;4;6;65", "wc_reply_authors": "0;0;33;0;0", "reply_reviewers": "0;0;1;1;1", "reply_authors": "1;1;2;1;1", "rating_avg": [ 5.2, 2.3151673805580453 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 99.4, 65.5212942485113 ], "wc_strengths_avg": [ 140.4, 58.352720587818354 ], "wc_weaknesses_avg": [ 161.0, 144.82403115505383 ], "wc_questions_avg": [ 115.4, 113.6777902670526 ], "wc_limitations_avg": [ 37.4, 28.83470131629596 ], "wc_review_avg": [ 553.6, 239.61853016826555 ], "wc_reply_reviewers_avg": [ 15.0, 25.107767722360343 ], "wc_reply_authors_avg": [ 6.6, 13.199999999999998 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.10580184237878977, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:RBgW-X4T5nAJ:scholar.google.com/&scioq=EduQate:+Generating+Adaptive+Curricula+through+RMABs+in+Education+Settings&hl=en&as_sdt=0,5", "gs_version_total": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Singapore Management University", "aff_unique_dep": "", "aff_unique_url": "https://www.smu.edu.sg", "aff_unique_abbr": "SMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Singapore" }, { "title": "Finding NeMo: Localizing Neurons Responsible For Memorization in Diffusion Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94713", "id": "YAEKMFZyJm", "proceeding": "", "pdf": "https://openreview.net/pdf?id=YAEKMFZyJm", "openreview": "https://openreview.net/forum?id=YAEKMFZyJm", "poster": "/media/PosterPDFs/NeurIPS%202024/94713.png?t=1730907076.838482", "project": "", "author_site": "Dominik Hintersdorf, Lukas Struppek, Kristian Kersting, Adam Dziedzic, Franziska Boenisch", "tldr": "", "abstract": "Diffusion models (DMs) produce very detailed and high-quality images. Their power results from extensive training on large amounts of data - usually scraped from the internet without proper attribution or consent from content creators. Unfortunately, this practice raises privacy and intellectual property concerns, as DMs can memorize and later reproduce their potentially sensitive or copyrighted training images at inference time. Prior efforts prevent this issue by either changing the input to the diffusion process, thereby preventing the DM from generating memorized samples during inference, or removing the memorized data from training altogether. While those are viable solutions when the DM is developed and deployed in a secure and constantly monitored environment, they hold the risk of adversaries circumventing the safeguards and are not effective when the DM itself is publicly released. To solve the problem, we introduce NeMo, the first method to localize memorization of individual data samples down to the level of neurons in DMs' cross-attention layers. Through our experiments, we make the intriguing finding that in many cases, single neurons are responsible for memorizing particular training samples. By deactivating these memorization neurons, we can avoid the replication of training data at inference time, increase the diversity in the generated outputs, and mitigate the leakage of private and copyrighted data. In this way, our NeMo contributes to a more responsible deployment of DMs.", "keywords": "memorization;diffusion models", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/f646c25f2ab15bc6e09494f99b928b7ae9afa547.zip", "author": "Dominik Hintersdorf;Lukas Struppek;Kristian Kersting;Adam Dziedzic;Franziska Boenisch", "authorids": "~Dominik_Hintersdorf1;~Lukas_Struppek1;~Kristian_Kersting1;~Adam_Dziedzic1;~Franziska_Boenisch2", "gender": "M;M;M;;", "homepage": "https://d0mih.github.io/;https://lukasstruppek.github.io/;http://www.ml.informatik.tu-darmstadt.de/;;", "dblp": "306/1325;306/1485;40/3793;;", "google_scholar": "DKITUfsAAAAJ;tU8K5qsAAAAJ;QY-earAAAAAJ;;", "orcid": "0000-0003-4976-6894;0000-0003-0626-3672;0000-0002-2873-9152;;", "linkedin": ";lukas-struppek/;;;", "or_profile": "~Dominik_Hintersdorf1;~Lukas_Struppek1;~Kristian_Kersting1;~Adam_Dziedzic1;~Franziska_Boenisch2", "aff": "CS Department, TU Darmstadt, Technische Universit\u00e4t Darmstadt;Technische Universit\u00e4t Darmstadt;TU Darmstadt;;", "aff_domain": "cs.tu-darmstadt.de;tu-darmstadt.de;tu-darmstadt.de;;", "position": "PhD student;PhD student;Full Professor;;", "bibtex": "@inproceedings{\nhintersdorf2024finding,\ntitle={Finding NeMo: Localizing Neurons Responsible For Memorization in Diffusion Models},\nauthor={Dominik Hintersdorf and Lukas Struppek and Kristian Kersting and Adam Dziedzic and Franziska Boenisch},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=YAEKMFZyJm}\n}", "github": "", "reviewers": "RyXV;K7EP;zaL8;Rr34", "pdf_size": 9677559, "rating": "6;6;6;6", "confidence": "3;5;4;4", "soundness": "3;3;2;3", "novelty": "2;3;2;4", "presentation": "3;3;2;4", "wc_summary": "29;40;34;82", "wc_strengths": "24;34;51;112", "wc_weaknesses": "116;104;196;43", "wc_questions": "170;8;43;73", "wc_limitations": "5;35;25;1", "wc_review": "344;221;349;311", "wc_reply_reviewers": "24;376;473;0", "wc_reply_authors": "32;917;793;0", "reply_reviewers": "1;4;2;0", "reply_authors": "2;5;4;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 46.25, 21.00446381129497 ], "wc_strengths_avg": [ 55.25, 34.15680752061 ], "wc_weaknesses_avg": [ 114.75, 54.467306707785724 ], "wc_questions_avg": [ 73.5, 60.276446477873925 ], "wc_limitations_avg": [ 16.5, 14.026760139105537 ], "wc_review_avg": [ 306.25, 51.338947203852946 ], "wc_reply_reviewers_avg": [ 218.25, 209.25388287914754 ], "wc_reply_authors_avg": [ 435.5, 421.93631035975085 ], "reply_reviewers_avg": [ 1.75, 1.479019945774904 ], "reply_authors_avg": [ 3.0, 1.5811388300841898 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17124777550440232137&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "cs.tu-darmstadt.de;tu-darmstadt.de;tu-darmstadt.de;;", "author_num": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "Technische Universit\u00e4t Darmstadt", "aff_unique_dep": "CS Department", "aff_unique_url": "https://www.tu-darmstadt.de", "aff_unique_abbr": "TU Darmstadt", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Darmstadt;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "title": "Acoustic Volume Rendering for Neural Impulse Response Fields", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94712", "id": "YCKuXkw6UL", "proceeding": "", "pdf": "https://openreview.net/pdf?id=YCKuXkw6UL", "openreview": "https://openreview.net/forum?id=YCKuXkw6UL", "poster": "", "project": "", "author_site": "Zitong Lan, Chenhao Zheng, Zhiwei Zheng, Mingmin Zhao", "tldr": "", "abstract": "Realistic audio synthesis that captures accurate acoustic phenomena is essential for creating immersive experiences in virtual and augmented reality. Synthesizing the sound received at any position relies on the estimation of impulse response (IR), which characterizes how sound propagates in one scene along different paths before arriving at the listener position. In this paper, we present Acoustic Volume Rendering (AVR), a novel approach that adapts volume rendering techniques to model acoustic impulse responses. While volume rendering has been successful in modeling radiance fields for images and neural scene representations, IRs present unique challenges as time-series signals. To address these challenges, we introduce frequency-domain volume rendering and use spherical integration to fit the IR measurements. Our method constructs an impulse response field that inherently encodes wave propagation principles and achieves state of-the-art performance in synthesizing impulse responses for novel poses. Experiments show that AVR surpasses current leading methods by a substantial margin. Additionally, we develop an acoustic simulation platform, AcoustiX, which provides more accurate and realistic IR simulations than existing simulators. Code for AVR and AcoustiX are available at https://zitonglan.github.io/avr.", "keywords": "Acoustic signals;Room impulse response;Neural radiance field;Wave propagation", "primary_area": "speech_and_audio", "supplementary_material": "/attachment/97adc06326fd404d08a5c5a3fad791f37ae7c6ab.zip", "author": "Zitong Lan;Chenhao Zheng;Zhiwei Zheng;Mingmin Zhao", "authorids": "~Zitong_Lan1;~Chenhao_Zheng1;~Zhiwei_Zheng1;~Mingmin_Zhao1", "gender": "M;M;;M", "homepage": "https://zitonglan.github.io/;https://hellomuffin.github.io/;;https://www.cis.upenn.edu/~mingminz/", "dblp": "338/6230;309/9032;;http://dblp.uni-trier.de/pers/hd/z/Zhao:Mingmin", "google_scholar": "5MzZf-oAAAAJ;ks4QxAMAAAAJ;;F5PfbSwAAAAJ", "orcid": "0000-0002-8524-6047;;;0000-0001-5226-9085", "linkedin": ";;;mingmin-zhao-68317884", "or_profile": "~Zitong_Lan1;~Chenhao_Zheng1;~Zhiwei_Zheng1;~Mingmin_Zhao1", "aff": "University of Pennsylvania;Department of Computer Science, University of Washington;;University of Pennsylvania", "aff_domain": "seas.upenn.edu;cs.washington.edu;;upenn.edu", "position": "PhD student;PhD student;;Assistant Professor", "bibtex": "@inproceedings{\nlan2024acoustic,\ntitle={Acoustic Volume Rendering for Neural Impulse Response Fields},\nauthor={Zitong Lan and Chenhao Zheng and Zhiwei Zheng and Mingmin Zhao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=YCKuXkw6UL}\n}", "github": "", "reviewers": "RLmp;DqiU;WhmA;4897", "pdf_size": 5345390, "rating": "6;7;7;8", "confidence": "4;4;4;3", "soundness": "3;3;1;3", "novelty": "3;4;3;4", "presentation": "3;3;2;4", "wc_summary": "68;74;125;48", "wc_strengths": "50;151;120;93", "wc_weaknesses": "66;204;652;129", "wc_questions": "2;78;312;124", "wc_limitations": "2;61;29;33", "wc_review": "188;568;1238;427", "wc_reply_reviewers": "12;91;61;0", "wc_reply_authors": "25;35;31;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 78.75, 28.38463492807332 ], "wc_strengths_avg": [ 103.5, 37.08436328157732 ], "wc_weaknesses_avg": [ 262.75, 229.98192863788233 ], "wc_questions_avg": [ 129.0, 114.28473213863697 ], "wc_limitations_avg": [ 31.25, 20.90902915010642 ], "wc_review_avg": [ 605.25, 389.75336752874887 ], "wc_reply_reviewers_avg": [ 41.0, 36.817115585010185 ], "wc_reply_authors_avg": [ 22.75, 13.608361400256829 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11135615555611441587&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "seas.upenn.edu;cs.washington.edu;;upenn.edu", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Pennsylvania;University of Washington", "aff_unique_dep": ";Department of Computer Science", "aff_unique_url": "https://www.upenn.edu;https://www.washington.edu", "aff_unique_abbr": "UPenn;UW", "aff_campus_unique_index": "1", "aff_campus_unique": ";Seattle", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Regularized Conditional Diffusion Model for Multi-Task Preference Alignment", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94711", "id": "YCS0xGFrb4", "proceeding": "", "pdf": "https://openreview.net/pdf?id=YCS0xGFrb4", "openreview": "https://openreview.net/forum?id=YCS0xGFrb4", "poster": "", "project": "", "author_site": "Xudong Yu, Chenjia Bai, Haoran He, Changhong Wang, Xuelong Li", "tldr": "", "abstract": "Sequential decision-making can be formulated as a conditional generation process, with targets for alignment with human intents and versatility across various tasks. Previous return-conditioned diffusion models manifest comparable performance but rely on well-defined reward functions, which requires amounts of human efforts and faces challenges in multi-task settings. Preferences serve as an alternative but recent work rarely considers preference learning given multiple tasks. To facilitate the alignment and versatility in multi-task preference learning, we adopt multi-task preferences as a unified framework. In this work, we propose to learn preference representations aligned with preference labels, which are then used as conditions to guide the conditional generation process of diffusion models. The traditional classifier-free guidance paradigm suffers from the inconsistency between the conditions and generated trajectories. We thus introduce an auxiliary regularization objective to maximize the mutual info", "keywords": "multi-task reinforcement learning;preference learning;conditional diffusion model;trajectory generation;alignment;offline RL", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/abd5615a032f014a9e4b5c30fcd40f6e24afb503.zip", "author": "Xudong Yu;Chenjia Bai;Haoran He;Changhong Wang;Xuelong Li", "authorids": "~Xudong_Yu2;~Chenjia_Bai2;~Haoran_He1;~Changhong_Wang2;~Xuelong_Li2", "gender": ";M;M;;M", "homepage": ";https://baichenjia.github.io/;https://tinnerhrhe.github.io/;https://homepage.hit.edu.cn/wangch?lang=zh;", "dblp": ";247/1943;299/7312;;l/XuelongLi", "google_scholar": "https://scholar.google.com.hk/citations?hl=zh-CN;Rm_1y2kAAAAJ;Z33PHQ0AAAAJ;;ahUibskAAAAJ", "orcid": ";;0000-0002-7340-8643;;", "linkedin": ";;;;", "or_profile": "~Xudong_Yu2;~Chenjia_Bai2;~Haoran_He1;~Changhong_Wang2;~Xuelong_Li2", "aff": "Harbin Institute of Technology;Shanghai AI Laboratory;Hong Kong University of Science and Technology;Harbin Institute of Technology;Northwestern Polytechnical University", "aff_domain": "hit.edu.cn;pjlab.org.cn;connect.ust.hk;hit.edu.cn;nwpu.edu.cn", "position": "PhD student;Researcher;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nyu2024regularized,\ntitle={Regularized Conditional Diffusion Model for Multi-Task Preference Alignment},\nauthor={Xudong Yu and Chenjia Bai and Haoran He and Changhong Wang and Xuelong Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=YCS0xGFrb4}\n}", "github": "", "reviewers": "Y24x;3Xmu;L2wb", "pdf_size": 2400499, "rating": "5;6;6", "confidence": "2;3;4", "soundness": "2;3;3", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "63;50;108", "wc_strengths": "50;56;78", "wc_weaknesses": "26;191;60", "wc_questions": "149;4;59", "wc_limitations": "1;1;1", "wc_review": "289;302;306", "wc_reply_reviewers": "59;46;0", "wc_reply_authors": "388;320;81", "reply_reviewers": "1;2;0", "reply_authors": "3;4;2", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 73.66666666666667, 24.850665092821068 ], "wc_strengths_avg": [ 61.333333333333336, 12.036980056845191 ], "wc_weaknesses_avg": [ 92.33333333333333, 71.13523896229084 ], "wc_questions_avg": [ 70.66666666666667, 59.76807025680369 ], "wc_limitations_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 299.0, 7.2571803523590805 ], "wc_reply_reviewers_avg": [ 35.0, 25.311394008759507 ], "wc_reply_authors_avg": [ 263.0, 131.65358584811378 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 3.0, 0.816496580927726 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12049209776755383739&as_sdt=5,39&sciodt=0,39&hl=en", "gs_version_total": 4, "email": "hit.edu.cn;pjlab.org.cn;connect.ust.hk;hit.edu.cn;nwpu.edu.cn", "author_num": 5, "aff_unique_index": "0;1;2;0;3", "aff_unique_norm": "Harbin Institute of Technology;Shanghai AI Laboratory;Hong Kong University of Science and Technology;Northwestern Polytechnical University", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.hit.edu.cn/;https://www.shanghai-ai-lab.com;https://www.ust.hk;https://www.nwpu.edu.cn", "aff_unique_abbr": "HIT;SAIL;HKUST;NWPU", "aff_campus_unique_index": "0;2;0", "aff_campus_unique": "Harbin;;Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Towards Learning Group-Equivariant Features for Domain Adaptive 3D Detection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94710", "id": "YEtirXhsh1", "proceeding": "", "pdf": "https://openreview.net/pdf?id=YEtirXhsh1", "openreview": "https://openreview.net/forum?id=YEtirXhsh1", "poster": "/media/PosterPDFs/NeurIPS%202024/94710.png?t=1733369016.424747", "project": "", "author_site": "Sangyun Shin, Yuhang He, Madhu Vankadari, Ta-Ying Cheng, Qian Xie, Andrew Markham, Niki Trigoni", "tldr": "", "abstract": "The performance of 3D object detection in large outdoor point clouds deteriorates significantly in an unseen environment due to the inter-domain gap. To address these challenges, most existing methods for domain adaptation harness self-training schemes and attempt to bridge the gap by focusing on a single factor that causes the inter-domain gap, such as objects' sizes, shapes, and foreground density variation. However, the resulting adaptations suggest that there is still a substantial inter-domain gap left to be minimized. We argue that this is due to two limitations: 1) Biased pseudo-label collection from self-training. 2) Multiple factors jointly contributing to how the object is perceived in the unseen target domain. In this work, we propose a grouping-exploration strategy framework, Group Explorer Domain Adaptation ($\\textbf{GroupEXP-DA}$), to addresses those two issues. Specifically, our grouping divides the available label sets into multiple clusters and ensures all of them have equal learning attention with the group-equivariant spatial feature, avoiding dominant types of objects causing imbalance problems. Moreover, grouping learns to divide objects by considering inherent factors in a data-driven manner, without considering each factor separately as existing works. On top of the group-equivariant spatial feature that selectively detects objects similar to the input group, we additionally introduce an explorative group update strategy that reduces the false negative detection in the target domain, further reducing the inter-domain gap. During inference, only the learned group features are necessary for making the group-equivariant spatial feature, placing our method as a simple add-on that can be applicable to most existing detectors. We show how each module contributes to substantially bridging the inter-domain gaps compared to existing works across large urban outdoor datasets such as NuScenes, Waymo, and KITTI.", "keywords": "Domain Adaptive 3D Detection;Domain Adaptation;3D Detection;Self-Training", "primary_area": "machine_vision", "supplementary_material": "", "author": "Sangyun Shin;Yuhang He;Madhu Vankadari;Ta-Ying Cheng;Qian Xie;Andrew Markham;Niki Trigoni", "authorids": "~Sangyun_Shin2;~Yuhang_He3;~Madhu_Vankadari1;~Ta-Ying_Cheng1;~Qian_Xie3;~Andrew_Markham2;~Niki_Trigoni1", "gender": ";M;M;M;M;M;F", "homepage": ";https://yuhanghe01.github.io/;https://madhubabuv.github.io;https://ttchengab.github.io;https://nuaaxq.github.io/personal_website/;;https://www.cs.ox.ac.uk/people/niki.trigoni/", "dblp": ";;205/3815;264/7281.html;;83/7169;t/NikiTrigoni", "google_scholar": ";H1p3ve8AAAAJ;St1130EAAAAJ;onX3k7kAAAAJ;we9tUrgAAAAJ;https://scholar.google.co.uk/citations?user=g3JTO9EAAAAJ;", "orcid": ";;;;;;", "linkedin": ";;;;;;", "or_profile": "~Sangyun_Shin2;~Yuhang_He3;~Madhu_Vankadari1;~Ta-Ying_Cheng1;~Qian_Xie3;~Andrew_Markham2;~Niki_Trigoni1", "aff": ";University of Oxford;Department of Computer Science, University of Oxford;University of Oxford;Department of Computer Science, University of Oxford;University of Oxford;University of Oxford", "aff_domain": ";ox.ac.uk;cs.ox.ac.uk;cs.ox.ac.uk;cs.ox.ac.uk;ox.ac.uk;ox.ac.uk", "position": ";PhD student;PhD student;PhD student;Postdoc;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nshin2024towards,\ntitle={Towards Learning Group-Equivariant Features for Domain Adaptive 3D Detection},\nauthor={Sangyun Shin and Yuhang He and Madhu Vankadari and Ta-Ying Cheng and Qian Xie and Andrew Markham and Niki Trigoni},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=YEtirXhsh1}\n}", "github": "", "reviewers": "mXQC;mHQJ;5ZUx;CYB3", "pdf_size": 8223399, "rating": "5;5;6;6", "confidence": "3;4;4;4", "soundness": "3;2;3;2", "novelty": "2;2;3;3", "presentation": "2;2;3;3", "wc_summary": "110;102;93;111", "wc_strengths": "53;21;70;26", "wc_weaknesses": "125;138;78;49", "wc_questions": "51;4;17;6", "wc_limitations": "1;6;12;1", "wc_review": "340;271;270;193", "wc_reply_reviewers": "66;23;16;29", "wc_reply_authors": "732;937;390;365", "reply_reviewers": "1;1;1;1", "reply_authors": "4;4;3;3", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 104.0, 7.245688373094719 ], "wc_strengths_avg": [ 42.5, 20.006249023742555 ], "wc_weaknesses_avg": [ 97.5, 35.80851853958776 ], "wc_questions_avg": [ 19.5, 18.848076824970764 ], "wc_limitations_avg": [ 5.0, 4.527692569068709 ], "wc_review_avg": [ 268.5, 52.012017842033394 ], "wc_reply_reviewers_avg": [ 33.5, 19.319679086361656 ], "wc_reply_authors_avg": [ 606.0, 239.88226278739327 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.5, 0.5 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Ro1rXRwGmZ4J:scholar.google.com/&scioq=Towards+Learning+Group-Equivariant+Features+for+Domain+Adaptive+3D+Detection&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": ";ox.ac.uk;cs.ox.ac.uk;cs.ox.ac.uk;cs.ox.ac.uk;ox.ac.uk;ox.ac.uk", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "University of Oxford", "aff_unique_dep": "", "aff_unique_url": "https://www.ox.ac.uk", "aff_unique_abbr": "Oxford", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Oxford", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "CaptainCook4D: A Dataset for Understanding Errors in Procedural Activities", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97640", "id": "YFUp7zMrM9", "proceeding": "", "pdf": "https://openreview.net/pdf?id=YFUp7zMrM9", "openreview": "https://openreview.net/forum?id=YFUp7zMrM9", "poster": "", "project": "", "author_site": "Rohith Peddi, Shivvrat Arya, Bharath Challa, Likhitha Pallapothula, Akshay Vyas, Bhavya Gouripeddi, Qifan Zhang, Jikai Wang, Vasundhara Komaragiri, Eric Ragan, Nicholas Ruozzi, Yu Xiang, Vibhav Gogate", "tldr": "", "abstract": "Following step-by-step procedures is an essential component of various activities carried out by individuals in their daily lives. These procedures serve as a guiding framework that helps to achieve goals efficiently, whether it is assembling furniture or preparing a recipe. However, the complexity and duration of procedural activities inherently increase the likelihood of making errors. Understanding such procedural activities from a sequence of frames is a challenging task that demands an accurate interpretation of visual information and the ability to reason about the structure of the activity. To this end, we collect a new egocentric 4D dataset, CaptainCook4D, comprising 384 recordings (94.5 hours) of people performing recipes in real kitchen environments. This dataset consists of two distinct types of activity: one in which participants adhere to the provided recipe instructions and another in which they deviate and induce errors. We provide 5.3K step annotations and 10K fine-grained action annotations and benchmark the dataset for the following tasks: error recognition, multistep localization and procedure learning.", "keywords": "Error Recognition;Multi Step Localization;Procedure Learning;Mistake Detection", "primary_area": "", "supplementary_material": "/attachment/ebf7324c2ab0332579d635eff2e2ae8886ee0068.pdf", "author": "Rohith Peddi;Shivvrat Arya;Bharath Challa;Likhitha Pallapothula;Akshay Vyas;Bhavya Gouripeddi;Qifan Zhang;Jikai Wang;Vasundhara Komaragiri;Eric Ragan;Nicholas Ruozzi;Yu Xiang;Vibhav Gogate", "authorids": "~Rohith_Peddi1;~Shivvrat_Arya1;~Bharath_Challa1;~Likhitha_Pallapothula1;~Akshay_Vyas2;~Bhavya_Gouripeddi1;~Qifan_Zhang5;~Jikai_Wang3;~Vasundhara_Komaragiri1;~Eric_Ragan1;~Nicholas_Ruozzi1;~Yu_Xiang3;~Vibhav_Gogate1", "gender": "M;M;M;F;M;F;M;M;;;M;M;M", "homepage": "https://rohithpeddi.github.io/;https://shivvrat.github.io;;;;;;;;https://www.cise.ufl.edu/~eragan/;http://cs.utdallas.edu/people/faculty/nicholas-ruozzi/;http://www.hlt.utdallas.edu/~vgogate/;https://yuxng.github.io/", "dblp": "331/1651;275/7819;;;;;;;;;46/1258;14/4229;00/6716-1", "google_scholar": "MvhACDcAAAAJ;eM1co-kAAAAJ;;;;;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;;VT7D6oYAAAAJ;https://scholar.google.com.tw/citations?user=kzmSnEQAAAAJ;https://scholar.google.com.tw/citations?user=pm_dg3cAAAAJ;", "orcid": "0009-0007-4705-8129;0000-0002-9727-2533;;;;;;0009-0005-1725-3981;;;;;0000-0001-9431-5131", "linkedin": "rohithpeddi/;shivvrat/;bharathchalla/;likhitha-p-31054527a/;akshay-vyas-43345b134/;bhavyaalekhya;qifan-zhang-2a68211a7/;jikai-wang-03243787/;vasundhara-komaragiri/;;;;", "or_profile": "~Rohith_Peddi1;~Shivvrat_Arya1;~Bharath_Challa1;~Likhitha_Pallapothula1;~Akshay_Vyas2;~Bhavya_Gouripeddi1;~Qifan_Zhang5;~Jikai_Wang3;~Vasundhara_Komaragiri1;~Eric_Ragan1;~Nicholas_Ruozzi1;~Vibhav_Gogate1;~Yu_Xiang1", "aff": "University of Texas, Dallas;The University of Texas at Dallas;;University of Texas at Dallas;University of Texas at Dallas;University of Texas at Dallas;University of Texas at Dallas;University of Texas at Dallas;University of Texas, Dallas;University of Florida;University of Texas, Dallas;University of Texas, Dallas;University of Texas, Dallas", "aff_domain": "utdallas.edu;cs.utdallas.edu;;utd.edu;cs.utdallas.edu;utd.edu;utdallas.edu;cs.utdallas.edu;utdallas.edu;ufl.edu;utdallas.edu;utdallas.edu;utdallas.edu", "position": "PhD student;PhD student;;PhD student;PhD student;MS student;PhD student;PhD student;PhD student;Assistant Professor;Associate Professor;Professor;Assistant Professor", "bibtex": "@inproceedings{\npeddi2024captaincookd,\ntitle={CaptainCook4D: A Dataset for Understanding Errors in Procedural Activities},\nauthor={Rohith Peddi and Shivvrat Arya and Bharath Challa and Likhitha Pallapothula and Akshay Vyas and Bhavya Gouripeddi and Qifan Zhang and Jikai Wang and Vasundhara Komaragiri and Eric Ragan and Nicholas Ruozzi and Yu Xiang and Vibhav Gogate},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=YFUp7zMrM9}\n}", "github": "", "reviewers": "jiMN;4sjU;wKvX", "pdf_size": 23961013, "rating": "7;8;8", "confidence": "3;4;4", "wc_summary_and_contributions": "257;57;92", "wc_strengths": "83;54;41", "wc_improvement": "65;57;31", "wc_limitations": "69;1;1", "wc_correctness": "43;1;1", "wc_clarity": "1;1;1", "wc_relation_to_prior_work": "41;1;20", "wc_documentation": "1;1;1", "wc_additional_feedback": "1;1;1", "wc_review": "561;174;189", "wc_reply_reviewers": "0;12;26", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;1;1", "reply_authors": "2;1;1", "rating_avg": [ 7.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 135.33333333333334, 87.20983634634085 ], "wc_strengths_avg": [ 59.333333333333336, 17.55625877635159 ], "wc_improvement_avg": [ 51.0, 14.514360704718161 ], "wc_limitations_avg": [ 23.666666666666668, 32.05550741379015 ], "wc_correctness_avg": [ 15.0, 19.79898987322333 ], "wc_clarity_avg": [ 1.0, 0.0 ], "wc_relation_to_prior_work_avg": [ 20.666666666666668, 16.33673433979046 ], "wc_documentation_avg": [ 1.0, 0.0 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 308.0, 179.00279327429502 ], "wc_reply_reviewers_avg": [ 12.666666666666666, 10.624918300339486 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 13, 0 ], "corr_rating_confidence": 0.9999999999999997, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18444008860416720785&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 4, "email": "utdallas.edu;cs.utdallas.edu;;utd.edu;cs.utdallas.edu;utd.edu;utdallas.edu;cs.utdallas.edu;utdallas.edu;ufl.edu;utdallas.edu;utdallas.edu;utdallas.edu", "author_num": 13, "aff_unique_index": "0;0;0;0;0;0;0;0;1;0;0;0", "aff_unique_norm": "University of Texas at Dallas;University of Florida", "aff_unique_dep": ";", "aff_unique_url": "https://www.utdallas.edu;https://www.ufl.edu", "aff_unique_abbr": "UT Dallas;UF", "aff_campus_unique_index": "0;0;0;0;0;0;0;0;0;0;0", "aff_campus_unique": "Dallas;", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Make Your LLM Fully Utilize the Context", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94709", "id": "YGTVEmBXtV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=YGTVEmBXtV", "openreview": "https://openreview.net/forum?id=YGTVEmBXtV", "poster": "/media/PosterPDFs/NeurIPS%202024/94709.png?t=1730096252.1326094", "project": "", "author_site": "Shengnan An, Zexiong Ma, Zeqi Lin, Nanning Zheng, Jian-Guang Lou, Weizhu Chen", "tldr": "", "abstract": "While many contemporary large language models (LLMs) can process lengthy input, they still struggle to fully utilize information within the long context, known as the *lost-in-the-middle* challenge.\nWe hypothesize that it stems from insufficient explicit supervision during the long-context training, which fails to emphasize that any position in a long context can hold crucial information.\nBased on this intuition, our study presents **information-intensive (IN2) training**, a purely data-driven solution to overcome lost-in-the-middle.\nSpecifically, IN2 training leverages a synthesized long-context question-answer dataset, where the answer requires (1) **fine-grained information awareness** on a short segment (~128 tokens) within a synthesized long context (4K-32K tokens), and (2) the **integration and reasoning** of information from two or more short segments.\nThrough applying this information-intensive training on Mistral-7B, we present **FILM-7B** (FIll-in-the-Middle).\nTo thoroughly assess the ability of FILM-7B for utilizing long contexts, we design three probing tasks that encompass various context styles (document, code, and structured-data context) and information retrieval patterns (forward, backward, and bi-directional retrieval).\nThe probing results demonstrate that FILM-7B can robustly retrieve information from different positions in its 32K context window.\nBeyond these probing tasks, FILM-7B significantly improves the performance on real-world long-context tasks (e.g., 23.5->26.9 F1 score on NarrativeQA), while maintaining a comparable performance on short-context tasks (e.g., 59.3->59.2 accuracy on MMLU).", "keywords": "Large Language Model;Long-Context Large Language Model;Long-Context Training Data Construction;Long-Context Probing;Lost-in-the-Middle", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Shengnan An;Zexiong Ma;Zeqi Lin;Nanning Zheng;Jian-Guang Lou;Weizhu Chen", "authorids": "~Shengnan_An1;~Zexiong_Ma1;~Zeqi_Lin1;~Nanning_Zheng1;~Jian-Guang_Lou1;~Weizhu_Chen1", "gender": "M;;M;M;M;M", "homepage": "https://shengnanan.github.io/;;https://www.microsoft.com/en-us/research/people/zelin/;;https://www.microsoft.com/en-us/research/people/jlou/;https://www.microsoft.com/en-us/research/people/wzchen/", "dblp": "267/9518;359/6950;https://dblp.uni-trier.de/pid/155/4370.html;07/256-1;37/1917;79/2536", "google_scholar": "oPiRHWMAAAAJ;jmZifrkAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;alDxINIAAAAJ;LG_E-4EAAAAJ", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Shengnan_An1;~Zexiong_Ma1;~Zeqi_Lin1;~Nanning_Zheng1;~Jian-Guang_Lou1;~Weizhu_Chen1", "aff": "Microsoft;Peking University;Microsoft Research;Xi'an Jiaotong University;Microsoft Research Asia;Microsoft GenAI", "aff_domain": "microsoft.com;pku.edu.cn;microsoft.com;xjtu.edu.cn;microsoft.com;microsoft.com", "position": "Intern;PhD student;Researcher;Full Professor;Principal Researcher;Vice President", "bibtex": "@inproceedings{\nan2024make,\ntitle={Make Your {LLM} Fully Utilize the Context},\nauthor={Shengnan An and Zexiong Ma and Zeqi Lin and Nanning Zheng and Jian-Guang Lou and Weizhu Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=YGTVEmBXtV}\n}", "github": "", "reviewers": "d47H;UZJA;oKze;HMgi", "pdf_size": 1830872, "rating": "6;6;7;8", "confidence": "4;4;4;3", "soundness": "2;2;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "86;116;122;150", "wc_strengths": "191;86;145;93", "wc_weaknesses": "388;223;62;17", "wc_questions": "417;9;101;33", "wc_limitations": "70;18;1;19", "wc_review": "1152;452;431;312", "wc_reply_reviewers": "212;28;29;31", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 118.5, 22.73213584333861 ], "wc_strengths_avg": [ 128.75, 42.558048592481306 ], "wc_weaknesses_avg": [ 172.5, 146.10013689247523 ], "wc_questions_avg": [ 140.0, 163.44723919356974 ], "wc_limitations_avg": [ 27.0, 25.836021365527625 ], "wc_review_avg": [ 586.75, 330.68517883328246 ], "wc_reply_reviewers_avg": [ 75.0, 79.1043614473943 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 55, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3994327648837438117&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 4, "email": "microsoft.com;pku.edu.cn;microsoft.com;xjtu.edu.cn;microsoft.com;microsoft.com", "author_num": 6, "aff_unique_index": "0;1;0;2;0;0", "aff_unique_norm": "Microsoft;Peking University;Xi'an Jiao Tong University", "aff_unique_dep": "Microsoft Corporation;;", "aff_unique_url": "https://www.microsoft.com;http://www.pku.edu.cn;https://www.xjtu.edu.cn", "aff_unique_abbr": "Microsoft;Peking U;XJTU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;1;0;1;1;0", "aff_country_unique": "United States;China" }, { "title": "Transformers Represent Belief State Geometry in their Residual Stream", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94708", "id": "YIB7REL8UC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=YIB7REL8UC", "openreview": "https://openreview.net/forum?id=YIB7REL8UC", "poster": "/media/PosterPDFs/NeurIPS%202024/94708.png?t=1731440053.7502978", "project": "", "author_site": "Adam Shai, Paul Riechers, Lucas Teixeira, Alexander Oldenziel, Sarah Marzen", "tldr": "", "abstract": "What computational structure are we building into large language models when we train them on next-token prediction? Here, we present evidence that this structure is given by the meta-dynamics of belief updating over hidden states of the data- generating process. Leveraging the theory of optimal prediction, we anticipate and then find that belief states are linearly represented in the residual stream of transformers, even in cases where the predicted belief state geometry has highly nontrivial fractal structure. We investigate cases where the belief state geometry is represented in the final residual stream or distributed across the residual streams of multiple layers, providing a framework to explain these observations. Furthermore we demonstrate that the inferred belief states contain information about the entire future, beyond the local next-token prediction that the transformers are explicitly trained on. Our work provides a general framework connecting the structure of training data to the geometric structure of activations inside transformers.", "keywords": "Interpretability;Computational Mechanics;Belief State;Features;Representation", "primary_area": "interpretability_and_explainability", "supplementary_material": "/attachment/07e957c1ac086f56df0489f1a2eedf7783d686c2.zip", "author": "Adam Shai;Lucas Teixeira;Alexander Gietelink Oldenziel;Sarah Marzen;Paul M. Riechers", "authorids": "~Adam_Shai1;~Lucas_Teixeira2;~Alexander_Gietelink_Oldenziel1;~Sarah_Marzen1;~Paul_M._Riechers1", "gender": "M;M;M;F;M", "homepage": "https://profiles.stanford.edu/adam-shai?releaseVersion=10.5.1;;https://sites.google.com/view/afdago/home;https://www.sarahmarzen.com/;https://www.paulriechers.com", "dblp": ";;;143/7392.html;https://dblp.uni-trier.de/pid/135/6323.html", "google_scholar": ";;;BeGerHkAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;;0000-0002-0135-3778", "linkedin": ";lucas-teixeira-998b9b295/;;;paul-riechers-0ba48047/", "or_profile": "~Adam_Shai1;~Lucas_Teixeira2;~Alexander_Gietelink_Oldenziel1;~Sarah_Marzen1;~Paul_M._Riechers1", "aff": "Stanford University;PIBBSS;University College London;W. M. Keck Science Department;Beyond Institute for Theoretical Science", "aff_domain": "stanford.edu;pibbss.ai;ucl.ac.uk;kecksci.claremont.edu;itsbeyond.org", "position": "Postdoc;Researcher;PhD student;Assistant Professor;Principal Researcher", "bibtex": "@inproceedings{\nshai2024transformers,\ntitle={Transformers Represent Belief State Geometry in their Residual Stream},\nauthor={Adam Shai and Paul M. Riechers and Lucas Teixeira and Alexander Gietelink Oldenziel and Sarah Marzen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=YIB7REL8UC}\n}", "github": "", "reviewers": "UE1W;exaE;89Bt;wEQw", "pdf_size": 3229928, "rating": "5;6;7;8", "confidence": "4;4;4;3", "soundness": "3;3;4;3", "novelty": "2;3;4;3", "presentation": "3;3;4;3", "wc_summary": "53;90;189;123", "wc_strengths": "40;142;323;114", "wc_weaknesses": "116;131;132;187", "wc_questions": "230;36;98;80", "wc_limitations": "23;90;1;12", "wc_review": "462;489;743;516", "wc_reply_reviewers": "9;11;0;11", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 113.75, 50.006874527408726 ], "wc_strengths_avg": [ 154.75, 104.04175844342501 ], "wc_weaknesses_avg": [ 141.5, 27.02313823374332 ], "wc_questions_avg": [ 111.0, 72.3118247591637 ], "wc_limitations_avg": [ 31.5, 34.659053651246744 ], "wc_review_avg": [ 552.5, 111.6299690943252 ], "wc_reply_reviewers_avg": [ 7.75, 4.548351349665063 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.7745966692414834, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8276417436422597531&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "stanford.edu;pibbss.ai;ucl.ac.uk;kecksci.claremont.edu;itsbeyond.org", "author_num": 5, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "Stanford University;PIBBSS;University College London;W. M. Keck Science Department;Beyond Institute for Theoretical Science", "aff_unique_dep": ";;;Keck Science Department;", "aff_unique_url": "https://www.stanford.edu;;https://www.ucl.ac.uk;;", "aff_unique_abbr": "Stanford;;UCL;;", "aff_campus_unique_index": "0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;2;0;0", "aff_country_unique": "United States;;United Kingdom" }, { "title": "DiffPano: Scalable and Consistent Text to Panorama Generation with Spherical Epipolar-Aware Diffusion", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94707", "id": "YIOvR40hSo", "proceeding": "", "pdf": "https://openreview.net/pdf?id=YIOvR40hSo", "openreview": "https://openreview.net/forum?id=YIOvR40hSo", "poster": "/media/PosterPDFs/NeurIPS%202024/94707.png?t=1731500480.5080466", "project": "", "author_site": "Weicai Ye, Chenhao Ji, Zheng Chen, Junyao Gao, Xiaoshui Huang, Song-Hai Zhang, Wanli Ouyang, Tong He, Cairong Zhao, Guofeng Zhang", "tldr": "", "abstract": "Diffusion-based methods have achieved remarkable achievements in 2D image or 3D object generation, however, the generation of 3D scenes and even $360^{\\circ}$ images remains constrained, due to the limited number of scene datasets, the complexity of 3D scenes themselves, and the difficulty of generating consistent multi-view images. To address these issues, we first establish a large-scale panoramic video-text dataset containing millions of consecutive panoramic keyframes with corresponding panoramic depths, camera poses, and text descriptions. Then, we propose a novel text-driven panoramic generation framework, termed DiffPano, to achieve scalable, consistent, and diverse panoramic scene generation. Specifically, benefiting from the powerful generative capabilities of stable diffusion, we fine-tune a single-view text-to-panorama diffusion model with LoRA on the established panoramic video-text dataset. We further design a spherical epipolar-aware multi-view diffusion model to ensure the multi-view consistency of the generated panoramic images. Extensive experiments demonstrate that DiffPano can generate scalable, consistent, and diverse panoramic images with given unseen text descriptions and camera poses.", "keywords": "Spherical Epipolar-Aware Diffusion;Text to Multi-View Panoramas Generation", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Weicai Ye;Chenhao Ji;Zheng Chen;Junyao Gao;Xiaoshui Huang;Song-Hai Zhang;Wanli Ouyang;Tong He;Cairong Zhao;Guofeng Zhang", "authorids": "~Weicai_Ye3;~Chenhao_Ji1;~Zheng_Chen12;~Junyao_Gao2;~Xiaoshui_Huang1;~Song-Hai_Zhang1;~Wanli_Ouyang1;~Tong_He2;~Cairong_Zhao2;~Guofeng_Zhang3", "gender": "M;M;M;;Not Specified;M;;M;;M", "homepage": "https://ywcmaike.github.io/;;;;https://xiaoshuihuang.github.io/;https://www.cs.tsinghua.edu.cn/csen/info/1307/4342.htm;;http://tonghe90.github.io/;;http://www.cad.zju.edu.cn/home/gfzhang", "dblp": "02/10372;;;;167/9599;45/6733;;02/1554-1;;78/5389-1.html", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;JM0pCJIAAAAJ;fmbBnegAAAAJ;;https://scholar.google.ca/citations?user=rp7mYNsAAAAJ;https://scholar.google.com.tw/citations?user=AWtV-EQAAAAJ;;kWADCMUAAAAJ;;F0xfpXAAAAAJ", "orcid": "0000-0001-6215-1347;0009-0005-6718-4582;0000-0001-9796-1745;;;;;0000-0003-2772-9320;;0000-0001-5661-8430", "linkedin": "weicai-ye-b9b36b129/;;;;;;;;;", "or_profile": "~Weicai_Ye3;~Chenhao_Ji1;~Zheng_Chen12;~Junyao_Gao2;~Xiaoshui_Huang1;~Song-Hai_Zhang1;~Wanli_Ouyang1;~Tong_He2;~Cairong_Zhao2;~Guofeng_Zhang3", "aff": "Zhejiang University;Tongji University;ARC Lab;;Shanghai AI Laboratory;Tsinghua University;;Shanghai AI lab;;Zhejiang University", "aff_domain": "zju.edu.cn;tongji.edu.cn;tencent.com;;pjlab.org.cn;tsinghua.edu.cn;;pjlab.org.cn;;zju.edu.cn", "position": "PhD student;MS student;Intern;;Research Fellow;Associate Professor;;Researcher;;Full Professor", "bibtex": "@inproceedings{\nye2024diffpano,\ntitle={DiffPano: Scalable and Consistent Text to Panorama Generation with Spherical Epipolar-Aware Diffusion},\nauthor={Weicai Ye and Chenhao Ji and Zheng Chen and Junyao Gao and Xiaoshui Huang and Song-Hai Zhang and Wanli Ouyang and Tong He and Cairong Zhao and Guofeng Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=YIOvR40hSo}\n}", "github": "", "reviewers": "Yygs;K8Nd;7HQE;2LuU", "pdf_size": 6803123, "rating": "5;5;6;6", "confidence": "4;5;5;3", "soundness": "2;2;3;4", "novelty": "2;2;3;3", "presentation": "2;3;3;4", "wc_summary": "99;57;68;63", "wc_strengths": "94;40;75;89", "wc_weaknesses": "294;86;221;55", "wc_questions": "70;50;18;17", "wc_limitations": "12;102;24;1", "wc_review": "569;335;406;225", "wc_reply_reviewers": "236;260;31;88", "wc_reply_authors": "692;1021;245;804", "reply_reviewers": "1;1;1;1", "reply_authors": "3;4;3;4", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 71.75, 16.20763708873073 ], "wc_strengths_avg": [ 74.5, 21.10094784600919 ], "wc_weaknesses_avg": [ 164.0, 97.6140358759948 ], "wc_questions_avg": [ 38.75, 22.398381637966615 ], "wc_limitations_avg": [ 34.75, 39.66973027384986 ], "wc_review_avg": [ 383.75, 124.89070221597763 ], "wc_reply_reviewers_avg": [ 153.75, 96.75322992024607 ], "wc_reply_authors_avg": [ 690.5, 283.1011303403785 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.5, 0.5 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8553156162502270503&as_sdt=5,39&sciodt=0,39&hl=en", "gs_version_total": 4, "email": "zju.edu.cn;tongji.edu.cn;tencent.com;;pjlab.org.cn;tsinghua.edu.cn;;pjlab.org.cn;;zju.edu.cn", "author_num": 10, "aff_unique_index": "0;1;2;3;4;5;0", "aff_unique_norm": "Zhejiang University;Tongji University;ARC Lab;Shanghai AI Laboratory;Tsinghua University;Shanghai AI Lab", "aff_unique_dep": ";;;;;AI Research", "aff_unique_url": "https://www.zju.edu.cn;https://www.tongji.edu.cn;;https://www.shanghai-ai-lab.com;https://www.tsinghua.edu.cn;https://www.shanghaiailab.com", "aff_unique_abbr": "ZJU;Tongji;;SAIL;THU;Shanghai AI Lab", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China;" }, { "title": "UNIT: Unifying Image and Text Recognition in One Vision Encoder", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94706", "id": "YIxKeHQZpi", "proceeding": "", "pdf": "https://openreview.net/pdf?id=YIxKeHQZpi", "openreview": "https://openreview.net/forum?id=YIxKeHQZpi", "poster": "", "project": "", "author_site": "Yi Zhu, Zhou Yanpeng, Chunwei Wang, Yang Cao, Jianhua Han, Lu Hou, Hang Xu", "tldr": "", "abstract": "Currently, vision encoder models like Vision Transformers (ViTs) typically excel at image recognition tasks but cannot simultaneously support text recognition like human visual recognition. To address this limitation, we propose UNIT, a novel training framework aimed at UNifying Image and Text recognition within a single model. Starting with a vision encoder pre-trained with image recognition tasks, UNIT introduces a lightweight language decoder for predicting text outputs and a lightweight vision decoder to prevent catastrophic forgetting of the original image encoding capabilities. The training process comprises two stages: intra-scale pretraining and inter-scale finetuning. During intra-scale pretraining, UNIT learns unified representations from multi-scale inputs, where images and documents are at their commonly used resolution, to enable fundamental recognition capability. In the inter-scale finetuning stage, the model introduces scale-exchanged data, featuring images and documents at resolutions different from the most commonly used ones, to enhance its scale robustness. Notably, UNIT retains the original vision encoder architecture, making it cost-free in terms of inference and deployment. Experiments across multiple benchmarks confirm that our method significantly outperforms existing methods on document-related tasks (e.g., OCR and DocQA) while maintaining the performances on natural images, demonstrating its ability to substantially enhance text recognition without compromising its core image recognition capabilities.", "keywords": "large-scale vision-language model;vision foundation model", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/b9466b73d0030819d506d8fcb8f0ee8f94e2c9e3.zip", "author": "Yi Zhu;Zhou Yanpeng;Chunwei Wang;Yang Cao;Jianhua Han;Lu Hou;Hang Xu", "authorids": "~Yi_Zhu3;~Zhou_Yanpeng1;~Chunwei_Wang1;~Yang_Cao9;~Jianhua_Han1;~Lu_Hou2;~Hang_Xu1", "gender": "F;M;F;M;M;M;F", "homepage": "https://yeezhu.github.io;https://scholar.google.com/citations?user=aO8Vy6IAAAAJ&hl=en;https://github.com/chunweiwang0224;https://yangcaoai.github.io/;;;https://houlu369.github.io/", "dblp": ";318/6439;;25/7045-17;29/6207;;", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;aO8Vy6IAAAAJ;;https://scholar.google.com/citations?hl;OEPMQEMAAAAJ;https://scholar.google.com.hk/citations?user=J_8TX6sAAAAJ;https://scholar.google.com.hk/citations?user=rnjoL5cAAAAJ", "orcid": "0000-0002-5087-895X;;;0000-0003-3830-7094;;0000-0003-3645-8972;", "linkedin": ";;;yang-cao-75b864148;;;", "or_profile": "~Yi_Zhu3;~Zhou_Yanpeng1;~Chunwei_Wang1;~Yang_Cao9;~Jianhua_Han1;~Hang_Xu1;~LU_HOU1", "aff": "Huawei Technologies Ltd.;Huawei Technologies Ltd.;Huawei Technologies Ltd.;Hong Kong University of Science and Technology;Huawei Technologies Ltd.;Huawei Noah\u2018s Ark Lab;Huawei Technologies Ltd.", "aff_domain": "huawei.com;huawei.com;huawei.com;hkust.edu;huawei.com;huawei.com;huawei.com", "position": "Researcher;Researcher;Researcher;PhD student;Researcher;Researcher;researcher", "bibtex": "@inproceedings{\nzhu2024unit,\ntitle={{UNIT}: Unifying Image and Text Recognition in One Vision Encoder},\nauthor={Yi Zhu and Zhou Yanpeng and Chunwei Wang and Yang Cao and Jianhua Han and Lu Hou and Hang Xu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=YIxKeHQZpi}\n}", "github": "", "reviewers": "rNyF;W6eH;qQPk", "pdf_size": 1906765, "rating": "5;5;6", "confidence": "4;4;4", "soundness": "3;3;2", "novelty": "2;3;3", "presentation": "2;3;3", "wc_summary": "60;74;93", "wc_strengths": "38;81;69", "wc_weaknesses": "294;105;122", "wc_questions": "29;150;6", "wc_limitations": "3;9;16", "wc_review": "424;419;306", "wc_reply_reviewers": "196;84;64", "wc_reply_authors": "388;138;68", "reply_reviewers": "2;1;1", "reply_authors": "4;3;2", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 75.66666666666667, 13.523641850067197 ], "wc_strengths_avg": [ 62.666666666666664, 18.116904322268255 ], "wc_weaknesses_avg": [ 173.66666666666666, 85.37108539911053 ], "wc_questions_avg": [ 61.666666666666664, 63.162928649292034 ], "wc_limitations_avg": [ 9.333333333333334, 5.312459150169743 ], "wc_review_avg": [ 383.0, 54.48547206977899 ], "wc_reply_reviewers_avg": [ 114.66666666666667, 58.08805576211191 ], "wc_reply_authors_avg": [ 198.0, 137.35598518691012 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 3.0, 0.816496580927726 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11088424258641557864&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "huawei.com;huawei.com;huawei.com;hkust.edu;huawei.com;huawei.com;huawei.com", "author_num": 7, "aff_unique_index": "0;0;0;1;0;0;0", "aff_unique_norm": "Huawei;Hong Kong University of Science and Technology", "aff_unique_dep": "Huawei Technologies;", "aff_unique_url": "https://www.huawei.com;https://www.ust.hk", "aff_unique_abbr": "Huawei;HKUST", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Evaluating Large Vision-and-Language Models on Children's Mathematical Olympiads", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97639", "id": "YMAU2kJgzY", "proceeding": "", "pdf": "https://openreview.net/pdf?id=YMAU2kJgzY", "openreview": "https://openreview.net/forum?id=YMAU2kJgzY", "poster": "/media/PosterPDFs/NeurIPS%202024/97639.png?t=1731716480.4449675", "project": "", "author_site": "Anoop Cherian, Kuan-Chuan Peng, Suhas Lohit, Joanna Matthiesen, Kevin Smith, Josh Tenenbaum", "tldr": "", "abstract": "Recent years have seen a significant progress in the general-purpose problem solving abilities of large vision and language models (LVLMs), such as ChatGPT, Gemini, etc.; some of these breakthroughs even seem to enable AI models to outperform human abilities in varied tasks that demand higher-order cognitive skills. Are the current large AI models indeed capable of generalized problem solving as humans do? A systematic analysis of AI capabilities for joint vision and text reasoning, however, is missing in the current scientific literature. In this paper, we make an effort towards filling this gap, by evaluating state-of-the-art LVLMs on their mathematical and algorithmic reasoning abilities using visuo-linguistic problems from children's Olympiads. Specifically, we consider problems from the Mathematical Kangaroo (MK) Olympiad, which is a popular international competition targeted at children from grades 1-12, that tests children's deeper mathematical abilities using puzzles that are appropriately gauged to their age and skills. Using the puzzles from MK, we created a dataset, dubbed SMART-840, consisting of 840 problems from years 2020-2024. With our dataset, we analyze LVLMs power on mathematical reasoning; their responses on our puzzles offer a direct way to compare against that of children. Our results show that modern LVLMs do demonstrate increasingly powerful reasoning skills in solving problems for higher grades, but lack the foundations to correctly answer problems designed for younger children. Further analysis shows that there is no significant correlation between the reasoning capabilities of AI models and that of young children, and their capabilities appear to be based on a different type of reasoning than the cumulative knowledge that underlies children's mathematical skills.", "keywords": "multimodal Large language models;large vision and language models;LLM;large language models;GPT;Gemini", "primary_area": "", "supplementary_material": "", "author": "Anoop Cherian;Kuan-Chuan Peng;Suhas Lohit;Joanna Matthiesen;Kevin A. Smith;Joshua B. Tenenbaum", "authorids": "~Anoop_Cherian1;~Kuan-Chuan_Peng3;~Suhas_Lohit1;~Joanna_Matthiesen1;~Kevin_A._Smith1;~Joshua_B._Tenenbaum1", "gender": ";F;;;M;M", "homepage": "http://suhaslohit.github.io;https://mathkangaroo.org/mks/about-math-kangaroo/team/;http://www.mit.edu/~k2smith/;;http://users.cecs.anu.edu.au/~cherian/;https://www.merl.com/people/kpeng", "dblp": "169/9097;;49/733;t/JoshuaBTenenbaum;44/7734;142/2421", "google_scholar": "GMRYY5cAAAAJ;;SZ3_FMQAAAAJ;;https://scholar.google.com.au/citations?hl=en;GBi3LYkAAAAJ", "orcid": ";;;;0000-0002-5566-0351;0000-0002-2682-9912", "linkedin": ";joanna-matthiesen-61a52a35/;;;anoop-cherian-4678a04/;kuan-chuan-peng-8344817a/", "or_profile": "~Suhas_Lohit1;~Joanna_Matthiesen1;~Kevin_A._Smith1;~Joshua_B._Tenenbaum1;~Anoop_Cherian2;~Kuan-chuan_Peng1", "aff": "Mitsubishi Electric Research Labs;Math Kangaroo USA NFP;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Mitsubishi Electric Research Labs;Mitsubishi Electric Research Labs", "aff_domain": "merl.com;mathkangaroo.org;mit.edu;mit.edu;merl.com;merl.com", "position": "Researcher;Principal Investigator;Postdoc;Professor;Principal Researcher;Principal Research Scientist", "bibtex": "@inproceedings{\ncherian2024evaluating,\ntitle={Evaluating Large Vision-and-Language Models on Children's Mathematical Olympiads},\nauthor={Anoop Cherian and Kuan-Chuan Peng and Suhas Lohit and Joanna Matthiesen and Kevin A. Smith and Joshua B. Tenenbaum},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=YMAU2kJgzY}\n}", "github": "", "reviewers": "fAB4;rLH1;GsLr;WZeN", "pdf_size": 2091195, "rating": "5;5;6;9", "confidence": "4;3;4;4", "wc_summary_and_contributions": "74;29;123;109", "wc_strengths": "79;45;71;20", "wc_improvement": "330;68;3;176", "wc_limitations": "2;17;79;2", "wc_correctness": "72;16;1;2", "wc_clarity": "83;15;8;12", "wc_relation_to_prior_work": "1;15;1;2", "wc_documentation": "1;10;1;2", "wc_additional_feedback": "1;1;1;1", "wc_review": "643;216;288;326", "wc_reply_reviewers": "640;0;0;95", "wc_reply_authors": "92;0;0;0", "reply_reviewers": "2;0;0;1", "reply_authors": "5;1;1;1", "rating_avg": [ 6.25, 1.6393596310755 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 83.75, 36.29996556472196 ], "wc_strengths_avg": [ 53.75, 23.1880896151451 ], "wc_improvement_avg": [ 144.25, 123.77070533854123 ], "wc_limitations_avg": [ 25.0, 31.77262973063451 ], "wc_correctness_avg": [ 22.75, 29.046299247924853 ], "wc_clarity_avg": [ 29.5, 30.987900864692335 ], "wc_relation_to_prior_work_avg": [ 4.75, 5.931905258852336 ], "wc_documentation_avg": [ 3.5, 3.774917217635375 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 368.25, 163.472283583487 ], "wc_reply_reviewers_avg": [ 183.75, 266.2558684799267 ], "wc_reply_authors_avg": [ 23.0, 39.83716857408418 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.0, 1.7320508075688772 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.44022545316281186, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12189086360149497562&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "email": "merl.com;mathkangaroo.org;mit.edu;mit.edu;merl.com;merl.com", "author_num": 6, "aff_unique_index": "0;1;2;2;0;0", "aff_unique_norm": "Mitsubishi Electric Research Laboratories;Math Kangaroo USA NFP;Massachusetts Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.merl.com;;https://web.mit.edu", "aff_unique_abbr": "MERL;;MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "A Cat Is A Cat (Not A Dog!): Unraveling Information Mix-ups in Text-to-Image Encoders through Causal Analysis and Embedding Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94705", "id": "YNRYWZHmKY", "proceeding": "", "pdf": "https://openreview.net/pdf?id=YNRYWZHmKY", "openreview": "https://openreview.net/forum?id=YNRYWZHmKY", "poster": "/media/PosterPDFs/NeurIPS%202024/94705.png?t=1729999735.618401", "project": "", "author_site": "Chieh-Yun Chen, Chiang Tseng, Li-Wu Tsao, Hong-Han Shuai", "tldr": "", "abstract": "This paper analyzes the impact of causal manner in the text encoder of text-to-image (T2I) diffusion models, which can lead to information bias and loss. Previous works have focused on addressing the issues through the denoising process. However, there is no research discussing how text embedding contributes to T2I models, especially when generating more than one object. In this paper, we share a comprehensive analysis of text embedding: i) how text embedding contributes to the generated images and ii) why information gets lost and biases towards the first-mentioned object. Accordingly, we propose a simple but effective text embedding balance optimization method, which is training-free, with an improvement of 125.42\\% on information balance in stable diffusion. Furthermore, we propose a new automatic evaluation metric that quantifies information loss more accurately than existing methods, achieving 81\\% concordance with human assessments. This metric effectively measures the presence and accuracy of objects, addressing the limitations of current distribution scores like CLIP's text-image similarities.", "keywords": "Causal manner;Embedding optimization;Information mix-ups;Information loss;Text-to-image generative model", "primary_area": "generative_models", "supplementary_material": "", "author": "Chieh-Yun Chen;Chiang Tseng;Li-Wu Tsao;Hong-Han Shuai", "authorids": "~Chieh-Yun_Chen1;~Chiang_Tseng1;~Li-Wu_Tsao1;~Hong-Han_Shuai1", "gender": "F;F;M;M", "homepage": "https://chiehyunchen.github.io;;;http://basiclab.lab.nycu.edu.tw/", "dblp": "251/3520;389/3126;250/4399;86/10294", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?hl=zh-TW;https://scholar.google.com.tw/citations?user=CAv3aS8AAAAJ;https://scholar.google.com.tw/citations?user=MSWL2noAAAAJ", "orcid": "0000-0002-7412-7894;0009-0009-7750-1456;;0000-0003-2216-077X", "linkedin": "chieh-yun-chen;;;", "or_profile": "~Chieh-Yun_Chen1;~Chiang_Tseng1;~Li-Wu_Tsao1;~Hong-Han_Shuai1", "aff": "National Yang Ming Chiao Tung University;National Yang Ming Chiao Tung University;National Yang Ming Chiao Tung University;National Yang Ming Chiao Tung University", "aff_domain": "nycu.edu.tw;nycu.edu.tw;nycu.edu.tw;nycu.edu.tw", "position": "Researcher;MS student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nchen2024a,\ntitle={A Cat Is A Cat (Not A Dog!): Unraveling Information Mix-ups in Text-to-Image Encoders through Causal Analysis and Embedding Optimization},\nauthor={Chieh-Yun Chen and Chiang Tseng and Li-Wu Tsao and Hong-Han Shuai},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=YNRYWZHmKY}\n}", "github": "", "reviewers": "fTPW;LGCP;G9At;ACAY", "pdf_size": 25339641, "rating": "5;6;6;7", "confidence": "4;4;5;4", "soundness": "2;3;2;3", "novelty": "2;3;3;3", "presentation": "3;3;2;3", "wc_summary": "68;69;143;156", "wc_strengths": "44;151;29;56", "wc_weaknesses": "195;99;410;77", "wc_questions": "4;139;39;47", "wc_limitations": "11;57;3;10", "wc_review": "322;515;624;346", "wc_reply_reviewers": "79;18;77;122", "wc_reply_authors": "554;86;55;45", "reply_reviewers": "2;1;1;1", "reply_authors": "4;3;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 109.0, 40.761501444377636 ], "wc_strengths_avg": [ 70.0, 47.733635939450494 ], "wc_weaknesses_avg": [ 195.25, 131.6859426818216 ], "wc_questions_avg": [ 57.25, 49.89175783634006 ], "wc_limitations_avg": [ 20.25, 21.44032415799724 ], "wc_review_avg": [ 451.75, 124.18610026891093 ], "wc_reply_reviewers_avg": [ 74.0, 36.99324262618783 ], "wc_reply_authors_avg": [ 185.0, 213.57785465726545 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3898925873053882565&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "nycu.edu.tw;nycu.edu.tw;nycu.edu.tw;nycu.edu.tw", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "National Yang Ming Chiao Tung University", "aff_unique_dep": "", "aff_unique_url": "https://www.nycu.edu.tw", "aff_unique_abbr": "NYCU", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Taiwan", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Single Image Unlearning: Efficient Machine Unlearning in Multimodal Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94704", "id": "YNx7ai4zTs", "proceeding": "", "pdf": "https://openreview.net/pdf?id=YNx7ai4zTs", "openreview": "https://openreview.net/forum?id=YNx7ai4zTs", "poster": "/media/PosterPDFs/NeurIPS%202024/94704.png?t=1731118784.3100522", "project": "", "author_site": "Jiaqi Li, Qianshan Wei, Chuanyi Zhang, Guilin Qi, Miaozeng Du, Yongrui Chen, Sheng Bi, Fan Liu", "tldr": "", "abstract": "Machine unlearning (MU) empowers individuals with the `right to be forgotten' by removing their private or sensitive information encoded in machine learning models. However, it remains uncertain whether MU can be effectively applied to Multimodal Large Language Models (MLLMs), particularly in scenarios of forgetting the leaked visual data of concepts. To overcome the challenge, we propose an efficient method, Single Image Unlearning (SIU), to unlearn the visual recognition of a concept by fine-tuning a single associated image for few steps. SIU consists of two key aspects: (i) Constructing Multifaceted fine-tuning data. We introduce four targets, based on which we construct fine-tuning data for the concepts to be forgotten; (ii) Joint training loss. To synchronously forget the visual recognition of concepts and preserve the utility of MLLMs, we fine-tune MLLMs through a novel Dual Masked KL-divergence Loss combined with Cross Entropy loss. Alongside our method, we establish MMUBench, a new benchmark for MU in MLLMs and introduce a collection of metrics for its evaluation. Experimental results on MMUBench show that SIU completely surpasses the performance of existing methods. Furthermore, we surprisingly find that SIU can avoid invasive membership inference attacks and jailbreak attacks. To the best of our knowledge, we are the first to explore MU in MLLMs. We will release the code and benchmark in the near future.", "keywords": "multimodal;machine unlearning", "primary_area": "privacy", "supplementary_material": "/attachment/ce600f8ae938928e056db6d3fd5f1575288725ed.zip", "author": "Jiaqi Li;Qianshan Wei;Chuanyi Zhang;Guilin Qi;Miaozeng Du;Yongrui Chen;Sheng Bi;Fan Liu", "authorids": "~Jiaqi_Li5;~Qianshan_Wei1;~Chuanyi_Zhang1;~Guilin_Qi2;~Miaozeng_Du1;~Yongrui_Chen1;~Sheng_Bi2;~Fan_Liu7", "gender": "M;;;M;M;M;M;M", "homepage": "https://scholar.google.com/citations?user=dhHKxpoAAAAJ&hl=en;;;https://cse.seu.edu.cn/_s191/2023/1024/c23024a469541/page.psp;https://github.com/DiWHNJ;;https://cies.hhu.edu.cn/2013/0508/c4122a54931/page.htm;", "dblp": ";;87/6424;71/5935;;143/0948-2.html;56/2849-3;", "google_scholar": "dhHKxpoAAAAJ;;;;;8ZjIHyEAAAAJ;Sneof_QAAAAJ;hPEgH-MAAAAJ", "orcid": "0000-0002-4559-9868;0009-0000-5541-5594;0000-0001-8724-5796;0000-0003-0150-7236;;0000-0001-8934-3920;0000-0001-8746-9845;0000-0001-8292-2110", "linkedin": ";;;;;;;", "or_profile": "~Jiaqi_Li5;~Qianshan_Wei1;~Chuanyi_Zhang1;~Guilin_Qi2;~Miaozeng_Du1;~Yongrui_Chen1;~Fan_Liu7;~bisheng1", "aff": "Southeast University;Southeast University;Hohai University;Southeast University;Southeast University;Southeast University;Hohai University;Southeast University", "aff_domain": "seu.edu.cn;seu.edu.cn;hhu.edu.cn;seu.edu.cn;seu.edu.cn;seu.edu.cn;hhu.edu.cn;seu.edu.cn", "position": "PhD student;Undergrad student;Instructor;Full Professor;MS student;PhD student;Full Professor;Lecturer", "bibtex": "@inproceedings{\nli2024single,\ntitle={Single Image Unlearning: Efficient Machine Unlearning in Multimodal Large Language Models},\nauthor={Jiaqi Li and Qianshan Wei and Chuanyi Zhang and Guilin Qi and Miaozeng Du and Yongrui Chen and Sheng Bi and Fan Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=YNx7ai4zTs}\n}", "github": "", "reviewers": "DWvJ;tqoU;j8ff", "pdf_size": 3105415, "rating": "5;5;8", "confidence": "3;3;3", "soundness": "2;3;3", "novelty": "2;2;4", "presentation": "3;2;3", "wc_summary": "66;152;33", "wc_strengths": "37;92;112", "wc_weaknesses": "66;266;21", "wc_questions": "137;88;16", "wc_limitations": "15;10;17", "wc_review": "321;608;199", "wc_reply_reviewers": "31;60;11", "wc_reply_authors": "146;11;10", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.9428090415820634 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 83.66666666666667, 50.16195991209098 ], "wc_strengths_avg": [ 80.33333333333333, 31.710495984067414 ], "wc_weaknesses_avg": [ 117.66666666666667, 106.48421896642192 ], "wc_questions_avg": [ 80.33333333333333, 49.69462300446151 ], "wc_limitations_avg": [ 14.0, 2.943920288775949 ], "wc_review_avg": [ 376.0, 171.44289622689726 ], "wc_reply_reviewers_avg": [ 34.0, 20.11632835948615 ], "wc_reply_authors_avg": [ 55.666666666666664, 63.87661717891941 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11805920555950331358&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "seu.edu.cn;seu.edu.cn;hhu.edu.cn;seu.edu.cn;seu.edu.cn;seu.edu.cn;hhu.edu.cn;seu.edu.cn", "author_num": 8, "aff_unique_index": "0;0;1;0;0;0;1;0", "aff_unique_norm": "Southeast University;Hohai University", "aff_unique_dep": ";", "aff_unique_url": "https://www.seu.edu.cn/;https://www.hohai.edu.cn", "aff_unique_abbr": "SEU;Hohai", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "On the Limitations of Fractal Dimension as a Measure of Generalization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94703", "id": "YO6GVPUrKN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=YO6GVPUrKN", "openreview": "https://openreview.net/forum?id=YO6GVPUrKN", "poster": "/media/PosterPDFs/NeurIPS%202024/94703.png?t=1733871269.225731", "project": "", "author_site": "Charlie Tan, In\u00e9s Garc\u00eda-Redondo, Qiquan Wang, Michael Bronstein, Anthea Monod", "tldr": "", "abstract": "Bounding and predicting the generalization gap of overparameterized neural networks remains a central open problem in theoretical machine learning. There is a recent and growing body of literature that proposes the framework of fractals to model optimization trajectories of neural networks, motivating generalization bounds and measures based on the fractal dimension of the trajectory. Notably, the persistent homology dimension has been proposed to correlate with the generalization gap. This paper performs an empirical evaluation of these persistent homology-based generalization measures, with an in-depth statistical analysis. Our study reveals confounding effects in the observed correlation between generalization and topological measures due to the variation of hyperparameters. We also observe that fractal dimension fails to predict generalization of models trained from poor initializations. We lastly reveal the intriguing manifestation of model-wise double descent in these topological generalization measures. Our work forms a basis for a deeper investigation of the causal relationships between fractal geometry, topological data analysis, and neural network optimization.", "keywords": "Generalization;Optimization;Persistent Homology;Fractal Dimension", "primary_area": "learning_theory", "supplementary_material": "/attachment/e2abf8b59986275a882dbee9138d64c33f518aef.zip", "author": "Charlie Tan;In\u00e9s Garc\u00eda-Redondo;Qiquan Wang;Michael M. Bronstein;Anthea Monod", "authorids": "~Charlie_Tan1;~In\u00e9s_Garc\u00eda-Redondo1;~Qiquan_Wang2;~Michael_M._Bronstein1;~Anthea_Monod1", "gender": ";F;F;M;F", "homepage": ";https://sites.google.com/view/ines-garcia-redondo/home;https://sites.google.com/view/qiquanwang;http://www.inf.usi.ch/bronstein/;https://sites.google.com/view/antheamonod/home", "dblp": ";;;07/2668;246/2927", "google_scholar": ";QJugIzgAAAAJ;https://scholar.google.com/citations?hl=en;UU3N6-UAAAAJ;https://scholar.google.co.il/citations?hl=en", "orcid": ";0000-0001-8340-4235;0009-0008-9162-0390;;0000-0001-6774-8150", "linkedin": "charlie-tan-48b474209/;ines-garcia-redondo/;;mbronstein/;anthea-monod-046152162/", "or_profile": "~Charlie_Tan1;~In\u00e9s_Garc\u00eda-Redondo1;~Qiquan_Wang2;~Michael_M._Bronstein1;~Anthea_Monod1", "aff": "InstaDeep;Imperial College London;Imperial College London;University of Oxford;Imperial College London, Imperial College London", "aff_domain": "instadeep.com;imperial.ac.uk;ic.ac.uk;ox.ac.uk;imperial.ac.uk", "position": "Intern;PhD student;PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\ntan2024on,\ntitle={On the Limitations of Fractal Dimension as a Measure of Generalization},\nauthor={Charlie Tan and In{\\'e}s Garc{\\'\\i}a-Redondo and Qiquan Wang and Michael M. Bronstein and Anthea Monod},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=YO6GVPUrKN}\n}", "github": "", "reviewers": "rTx2;whkD;d6bB;garn", "pdf_size": 9286482, "rating": "5;6;7;7", "confidence": "4;1;4;4", "soundness": "2;3;4;4", "novelty": "3;3;3;3", "presentation": "3;3;2;3", "wc_summary": "83;71;85;143", "wc_strengths": "74;19;63;111", "wc_weaknesses": "4;20;181;389", "wc_questions": "2;54;333;267", "wc_limitations": "11;7;5;7", "wc_review": "174;171;667;917", "wc_reply_reviewers": "4;10;93;582", "wc_reply_authors": "0;43;79;963", "reply_reviewers": "1;1;1;2", "reply_authors": "1;2;2;4", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.25, 1.299038105676658 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 95.5, 27.941904015295737 ], "wc_strengths_avg": [ 66.75, 32.80529682840867 ], "wc_weaknesses_avg": [ 148.5, 155.15234448760353 ], "wc_questions_avg": [ 164.0, 139.2066808741592 ], "wc_limitations_avg": [ 7.5, 2.179449471770337 ], "wc_review_avg": [ 482.25, 322.1159535012198 ], "wc_reply_reviewers_avg": [ 172.25, 239.1697880167978 ], "wc_reply_authors_avg": [ 271.25, 400.36007230991453 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4180254861730747757&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "instadeep.com;imperial.ac.uk;ic.ac.uk;ox.ac.uk;imperial.ac.uk", "author_num": 5, "aff_unique_index": "0;1;1;2;1", "aff_unique_norm": "InstaDeep;Imperial College London;University of Oxford", "aff_unique_dep": ";;", "aff_unique_url": "https://www.instadeep.com;https://www.imperial.ac.uk;https://www.ox.ac.uk", "aff_unique_abbr": "InstaDeep;ICL;Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Initialization is Critical to Whether Transformers Fit Composite Functions by Reasoning or Memorizing", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94702", "id": "YOBGdVaYTS", "proceeding": "", "pdf": "https://openreview.net/pdf?id=YOBGdVaYTS", "openreview": "https://openreview.net/forum?id=YOBGdVaYTS", "poster": "/media/PosterPDFs/NeurIPS%202024/94702.png?t=1731241117.2635047", "project": "", "author_site": "Zhongwang Zhang, Pengxiao Lin, Zhiwei Wang, Yaoyu Zhang, Zhi-Qin Xu", "tldr": "", "abstract": "Transformers have shown impressive capabilities across various tasks, but their performance on compositional problems remains a topic of debate. In this work, we investigate the mechanisms of how transformers behave on unseen compositional tasks. We discover that the parameter initialization scale plays a critical role in determining whether the model learns inferential (reasoning-based) solutions, which capture the underlying compositional primitives, or symmetric (memory-based) solutions, which simply memorize mappings without understanding the compositional structure. By analyzing the information flow and vector representations within the model, we reveal the distinct mechanisms underlying these solution types. We further find that inferential (reasoning-based) solutions exhibit low complexity bias, which we hypothesize is a key factor enabling them to learn individual mappings for single anchors. We validate our conclusions on various real-world datasets. Our findings provide valuable insights into the role of initialization scale in tuning the reasoning and memorizing ability and we propose the initialization rate $\\gamma$ to be a convenient tunable hyper-parameter in common deep learning frameworks, where $1/d_{\\mathrm{in}}^\\gamma$ is the standard deviation of parameters of the layer with $d_{\\mathrm{in}}$ input neurons.", "keywords": "parameter initialization;transformer;compositional task;reasoning;memorizing", "primary_area": "interpretability_and_explainability", "supplementary_material": "/attachment/ef90d2164825515e2dca2e113c84509ddbde935a.zip", "author": "Zhongwang Zhang;Pengxiao Lin;Zhiwei Wang;Yaoyu Zhang;Zhi-Qin John Xu", "authorids": "~Zhongwang_Zhang1;~Pengxiao_Lin1;~Zhiwei_Wang3;~Yaoyu_Zhang1;~Zhi-Qin_John_Xu1", "gender": ";M;M;;M", "homepage": "https://sjtuzzw.github.io/;https://ins.sjtu.edu.cn;;https://ins.sjtu.edu.cn/peoples/zhangyaoyu;https://ins.sjtu.edu.cn/people/xuzhiqin/", "dblp": "293/9763;;;;223/4493.html", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;https://scholar.google.com/citations?view_op=list_works;;EjLvG5cAAAAJ", "orcid": ";;;;0000-0002-0122-0879", "linkedin": ";;;;", "or_profile": "~Zhongwang_Zhang1;~Pengxiao_Lin1;~Zhiwei_Wang3;~Yaoyu_Zhang1;~Zhiqin_Xu1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "position": "PhD student;PhD student;PhD student;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nzhang2024initialization,\ntitle={Initialization is Critical to Whether Transformers Fit Composite Functions by Reasoning or Memorizing},\nauthor={Zhongwang Zhang and Pengxiao Lin and Zhiwei Wang and Yaoyu Zhang and Zhi-Qin John Xu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=YOBGdVaYTS}\n}", "github": "", "reviewers": "MVAX;LUj2;6WHp", "pdf_size": 11155729, "rating": "6;6;6", "confidence": "4;4;4", "soundness": "2;3;3", "novelty": "2;3;3", "presentation": "2;2;4", "wc_summary": "170;122;86", "wc_strengths": "105;49;35", "wc_weaknesses": "315;201;34", "wc_questions": "379;380;36", "wc_limitations": "12;53;34", "wc_review": "981;805;225", "wc_reply_reviewers": "298;102;23", "wc_reply_authors": "1519;278;40", "reply_reviewers": "3;2;1", "reply_authors": "4;3;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.9428090415820634 ], "wc_summary_avg": [ 126.0, 34.40930106817051 ], "wc_strengths_avg": [ 63.0, 30.243456592570013 ], "wc_weaknesses_avg": [ 183.33333333333334, 115.39593676651802 ], "wc_questions_avg": [ 265.0, 161.9279675246579 ], "wc_limitations_avg": [ 33.0, 16.753109164172084 ], "wc_review_avg": [ 670.3333333333334, 322.99157195746693 ], "wc_reply_reviewers_avg": [ 141.0, 115.60565153428558 ], "wc_reply_authors_avg": [ 612.3333333333334, 648.4310980272992 ], "reply_reviewers_avg": [ 2.0, 0.816496580927726 ], "reply_authors_avg": [ 3.0, 0.816496580927726 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3195019007673598120&as_sdt=20005&sciodt=0,9&hl=en", "gs_version_total": 2, "email": "sjtu.edu.cn;sjtu.edu;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Synergistic Dual Spatial-aware Generation of Image-to-text and Text-to-image", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94701", "id": "YOUh3lgRYI", "proceeding": "", "pdf": "https://openreview.net/pdf?id=YOUh3lgRYI", "openreview": "https://openreview.net/forum?id=YOUh3lgRYI", "poster": "/media/PosterPDFs/NeurIPS%202024/94701.png?t=1730558858.7744362", "project": "", "author_site": "Yu Zhao, Hao Fei, Xiangtai Li, Libo Qin, Jiayi Ji, Hongyuan Zhu, Meishan Zhang, Min Zhang, Jianguo Wei", "tldr": "", "abstract": "In the visual spatial understanding (VSU) field, spatial image-to-text (SI2T) and spatial text-to-image (ST2I) are two fundamental tasks that appear in dual form. Existing methods for standalone SI2T or ST2I perform imperfectly in spatial understanding, due to the difficulty of 3D-wise spatial feature modeling. In this work, we consider modeling the SI2T and ST2I together under a dual learning framework. During the dual framework, we then propose to represent the 3D spatial scene features with a novel 3D scene graph (3DSG) representation that can be shared and beneficial to both tasks. Further, inspired by the intuition that the easier 3D$\\to$image and 3D$\\to$text processes also exist symmetrically in the ST2I and SI2T, respectively, we propose the Spatial Dual Discrete Diffusion (SD$^3$) framework, which utilizes the intermediate features of the 3D$\\to$X processes to guide the hard X$\\to$3D processes, such that the overall ST2I and SI2T will benefit each other. On the visual spatial understanding dataset VSD, our system outperforms the mainstream T2I and I2T methods significantly.\nFurther in-depth analysis reveals how our dual learning strategy advances.", "keywords": "Diffusion;Spatial Understanding;Dual Learning", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Yu Zhao;Hao Fei;Xiangtai Li;Libo Qin;Jiayi Ji;Hongyuan Zhu;Meishan Zhang;Min Zhang;Jianguo Wei", "authorids": "~Yu_Zhao4;~Hao_Fei1;~Xiangtai_Li1;~Libo_Qin1;~Jiayi_Ji1;~Hongyuan_Zhu1;~Meishan_Zhang1;~Min_Zhang9;~Jianguo_Wei3", "gender": "M;M;;;M;;M;M;", "homepage": ";https://haofei.vip/;;;https://scholar.google.com/citations?user=xp_rICcAAAAJ&hl=zh-CN;;https://zhangmeishan.github.io/;https://zhangmin-nlp-ai.github.io/;", "dblp": ";81/3569-1;;;250/9459;;127/0273;83/5342-5;89/4446", "google_scholar": "jeW8EcYAAAAJ;YGDX46AAAAAJ;;;xp_rICcAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=zh-CN;P2q-n1YAAAAJ", "orcid": "0000-0001-8714-4151;0000-0003-3026-6347;;;0000-0002-9956-6308;;;;", "linkedin": ";;;;;;;;", "or_profile": "~Yu_Zhao4;~Hao_Fei1;~Xiangtai_Li1;~Libo_Qin1;~Jiayi_Ji1;~Hongyuan_Zhu1;~Meishan_Zhang1;~Min_Zhang9;~Jianguo_Wei3", "aff": "Tianjin University;National University of Singapore;;;Xiamen University;;Harbin Institute of Technology (Shenzhen), China;Harbin Institute of Technology, Shenzhen;Tianjin University", "aff_domain": "tju.edu.cn;nus.edu.sg;;;xmu.edu.cn;;hit.edu.cn;hit.edu.cn;tju.edu.cn", "position": "PhD student;Postdoc;;;Postdoc;;Associate Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nzhao2024synergistic,\ntitle={Synergistic Dual Spatial-aware Generation of Image-to-text and Text-to-image},\nauthor={Yu Zhao and Hao Fei and Xiangtai Li and Libo Qin and Jiayi Ji and Hongyuan Zhu and Meishan Zhang and Min Zhang and Jianguo Wei},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=YOUh3lgRYI}\n}", "github": "", "reviewers": "Nx71;Urkw;cK3d;WpM7", "pdf_size": 9676886, "rating": "4;6;8;8", "confidence": "2;5;4;4", "soundness": "3;3;3;3", "novelty": "3;3;4;4", "presentation": "2;3;3;4", "wc_summary": "96;54;48;105", "wc_strengths": "76;51;23;81", "wc_weaknesses": "124;136;243;163", "wc_questions": "6;3;4;5", "wc_limitations": "6;12;2;16", "wc_review": "308;256;320;370", "wc_reply_reviewers": "0;0;105;104", "wc_reply_authors": "54;0;0;0", "reply_reviewers": "0;0;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.5, 1.6583123951777 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 75.75, 25.043711785596 ], "wc_strengths_avg": [ 57.75, 23.05834989759675 ], "wc_weaknesses_avg": [ 166.5, 46.37078821844632 ], "wc_questions_avg": [ 4.5, 1.118033988749895 ], "wc_limitations_avg": [ 9.0, 5.385164807134504 ], "wc_review_avg": [ 313.5, 40.5308524460071 ], "wc_reply_reviewers_avg": [ 52.25, 52.25119615855698 ], "wc_reply_authors_avg": [ 13.5, 23.382685902179844 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.6225430174794673, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:XEQpVSYsIXQJ:scholar.google.com/&scioq=Synergistic+Dual+Spatial-aware+Generation+of+Image-to-text+and+Text-to-image&hl=en&as_sdt=0,38", "gs_version_total": 6, "email": "tju.edu.cn;nus.edu.sg;;;xmu.edu.cn;;hit.edu.cn;hit.edu.cn;tju.edu.cn", "author_num": 9, "aff_unique_index": "0;1;2;3;3;0", "aff_unique_norm": "Tianjin University;National University of Singapore;Xiamen University;Harbin Institute of Technology", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.tju.edu.cn;https://www.nus.edu.sg;https://www.xmu.edu.cn;http://en.hhit.edu.cn/", "aff_unique_abbr": "TJU;NUS;XMU;HIT", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;1;0;0;0;0", "aff_country_unique": "China;Singapore" }, { "title": "Cross-model Control: Improving Multiple Large Language Models in One-time Training", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94699", "id": "YPqHSTSoFs", "proceeding": "", "pdf": "https://openreview.net/pdf?id=YPqHSTSoFs", "openreview": "https://openreview.net/forum?id=YPqHSTSoFs", "poster": "/media/PosterPDFs/NeurIPS%202024/94699.png?t=1731929657.7002554", "project": "", "author_site": "Jiayi Wu, Hao Sun, Hengyi Cai, Lixin Su, Shuaiqiang Wang, Dawei Yin, Xiang Li, Ming Gao", "tldr": "", "abstract": "The number of large language models (LLMs) with varying parameter scales and vocabularies is increasing. While they deliver powerful performance, they also face a set of common optimization needs to meet specific requirements or standards, such as instruction following or avoiding the output of sensitive information from the real world. However, how to reuse the fine-tuning outcomes of one model to other models to reduce training costs remains a challenge. To bridge this gap, we introduce Cross-model Control (CMC), a method that improves multiple LLMs in one-time training with a portable tiny language model. Specifically, we have observed that the logit shift before and after fine-tuning is remarkably similar across different models. Based on this insight, we incorporate a tiny language model with a minimal number of parameters. By training alongside a frozen template LLM, the tiny model gains the capability to alter the logits output by the LLMs. To make this tiny language model applicable to models with different vocabularies, we propose a novel token mapping strategy named PM-MinED. We have conducted extensive experiments on instruction tuning and unlearning tasks, demonstrating the effectiveness of CMC. Our code is available at https://github.com/wujwyi/CMC", "keywords": "Large Language Model;Fine-tune;model transfer", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/b99b1f112288b033171face641ed50f061c98bca.zip", "author": "Jiayi Wu;Hao Sun;Hengyi Cai;Lixin Su;Shuaiqiang Wang;Dawei Yin;Xiang Li;Ming Gao", "authorids": "~Jiayi_Wu2;~Hao_Sun9;~Hengyi_Cai1;~Lixin_Su1;~Shuaiqiang_Wang2;~Dawei_Yin1;~Xiang_Li24;~Ming_Gao1", "gender": ";M;M;M;M;M;M;M", "homepage": "https://wujwyi.github.io/2023/03/31/About-me/;https://sunhaopku.github.io/;https://www.caihengyi.com/;;http://wangshuaiqiang.net/;https://www.yindawei.com/;https://lixiang3776.github.io;http://dase.ecnu.edu.cn/mgao/", "dblp": "51/1096-1;82/2248-15;204/2493;39/2380;16/1524;;40/1491-67.html;71/4173-1", "google_scholar": "bQwV46IAAAAJ;at9AB50AAAAJ;Kz-r34UAAAAJ;;https://scholar.google.com.hk/citations?user=8SbYYcIAAAAJ;GuQ9bpAAAAAJ;JnxxNtsAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": "0009-0008-9255-2908;0000-0001-8456-7925;0000-0002-7147-5666;;0000-0002-9212-1947;0000-0002-0684-6205;0009-0003-0142-2483;0000-0002-5603-2680", "linkedin": ";;;;;dwyin/;;", "or_profile": "~Jiayi_Wu2;~Hao_Sun9;~Hengyi_Cai1;~Lixin_Su1;~Shuaiqiang_Wang2;~Dawei_Yin1;~Xiang_Li24;~Ming_Gao1", "aff": "East China Normal University;Peking University;Chinese Academy of Sciences;Baidu;Baidu Inc.;Baidu;East China Normal University;", "aff_domain": "ecnu.edu.cn;pku.edu.cn;ict.ac.cn;baidu.com;baidu.com;baidu.com;ecnu.edu.cn;", "position": "PhD student;PhD student;Researcher;Researcher;Principal Researcher;Principal Researcher;Full Professor;", "bibtex": "@inproceedings{\nwu2024crossmodel,\ntitle={Cross-model Control: Improving Multiple Large Language Models in One-time Training},\nauthor={Jiayi Wu and Hao Sun and Hengyi Cai and Lixin Su and Shuaiqiang Wang and Dawei Yin and Xiang Li and Ming Gao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=YPqHSTSoFs}\n}", "github": "", "reviewers": "V2UG;aFGW;VCcy;FZv7;VAGR", "pdf_size": 2375814, "rating": "5;6;6;7;8", "confidence": "3;4;4;4;5", "soundness": "3;3;3;3;4", "novelty": "3;3;3;3;4", "presentation": "2;3;3;3;3", "wc_summary": "135;108;107;114;142", "wc_strengths": "66;90;156;34;176", "wc_weaknesses": "134;48;125;137;126", "wc_questions": "2;45;86;2;94", "wc_limitations": "2;178;51;7;12", "wc_review": "339;469;525;294;550", "wc_reply_reviewers": "17;19;0;45;0", "wc_reply_authors": "12;17;41;26;0", "reply_reviewers": "1;1;0;2;0", "reply_authors": "2;2;2;3;1", "rating_avg": [ 6.4, 1.0198039027185568 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 121.2, 14.496896219536097 ], "wc_strengths_avg": [ 104.4, 53.71629175585373 ], "wc_weaknesses_avg": [ 114.0, 33.31666249791537 ], "wc_questions_avg": [ 45.8, 39.4380526902635 ], "wc_limitations_avg": [ 50.0, 66.3053542332744 ], "wc_review_avg": [ 435.4, 101.56495458572311 ], "wc_reply_reviewers_avg": [ 16.2, 16.509391266791155 ], "wc_reply_authors_avg": [ 19.2, 13.760813929415658 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.9302605094190635, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=446805013650361076&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "ecnu.edu.cn;pku.edu.cn;ict.ac.cn;baidu.com;baidu.com;baidu.com;ecnu.edu.cn;", "author_num": 8, "aff_unique_index": "0;1;2;3;3;3;0", "aff_unique_norm": "East China Normal University;Peking University;Chinese Academy of Sciences;Baidu", "aff_unique_dep": ";;;Baidu, Inc.", "aff_unique_url": "http://www.ecnu.edu.cn;http://www.pku.edu.cn;https://www.cas.cn;https://www.baidu.com", "aff_unique_abbr": "ECNU;Peking U;CAS;Baidu", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Deterministic Policies for Constrained Reinforcement Learning in Polynomial Time", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94698", "id": "YRemB4naKK", "proceeding": "", "pdf": "https://openreview.net/pdf?id=YRemB4naKK", "openreview": "https://openreview.net/forum?id=YRemB4naKK", "poster": "/media/PosterPDFs/NeurIPS%202024/94698.png?t=1731728265.200055", "project": "", "tldr": "", "abstract": "We present a novel algorithm that efficiently computes near-optimal deterministic policies for constrained reinforcement learning (CRL) problems. Our approach combines three key ideas: (1) value-demand augmentation, (2) action-space approximate dynamic programming, and (3) time-space rounding. Our algorithm constitutes a fully polynomial-time approximation scheme (FPTAS) for any time-space recursive (TSR) cost criteria. A TSR criteria requires the cost of a policy to be computable recursively over both time and (state) space, which includes classical expectation, almost sure, and anytime constraints. Our work answers three open questions spanning two long-standing lines of research: polynomial-time approximability is possible for 1) anytime-constrained policies, 2) almost-sure-constrained policies, and 3) deterministic expectation-constrained policies.", "keywords": "Constrained Reinforcement Learning;Approximation Algorithms;Computational Complexity", "primary_area": "learning_theory", "supplementary_material": "", "author": "Jeremy McMahan", "authorids": "~Jeremy_McMahan1", "gender": "M", "homepage": "http://jeremymmcmahan.com", "dblp": "299/1330", "google_scholar": "Ujya6FIAAAAJ", "orcid": "", "linkedin": "", "or_profile": "~Jeremy_McMahan1", "aff": "University of Wisconsin - Madison", "aff_domain": "wisc.edu", "position": "PhD student", "bibtex": "@inproceedings{\nmcmahan2024deterministic,\ntitle={Deterministic Policies for Constrained Reinforcement Learning in Polynomial Time},\nauthor={Jeremy McMahan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=YRemB4naKK}\n}", "github": "", "reviewers": "SLVm;DPnW;iqFB;pADD", "pdf_size": 490100, "rating": "4;5;5;8", "confidence": "4;3;2;2", "soundness": "2;4;2;4", "novelty": "1;3;3;4", "presentation": "1;3;2;4", "wc_summary": "80;65;45;91", "wc_strengths": "98;42;18;102", "wc_weaknesses": "326;96;170;209", "wc_questions": "34;36;62;34", "wc_limitations": "19;6;1;9", "wc_review": "557;245;296;445", "wc_reply_reviewers": "522;0;0;17", "wc_reply_authors": "407;0;0;0", "reply_reviewers": "4;0;0;1", "reply_authors": "3;1;1;1", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 1.0 ], "novelty_avg": [ 2.75, 1.0897247358851685 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 70.25, 17.25362280797862 ], "wc_strengths_avg": [ 65.0, 36.0416425818802 ], "wc_weaknesses_avg": [ 200.25, 83.17564246821301 ], "wc_questions_avg": [ 41.5, 11.863810517704673 ], "wc_limitations_avg": [ 8.75, 6.5717197140474575 ], "wc_review_avg": [ 385.75, 123.18964039236417 ], "wc_reply_reviewers_avg": [ 134.75, 223.68658319175069 ], "wc_reply_authors_avg": [ 101.75, 176.23616967013325 ], "reply_reviewers_avg": [ 1.25, 1.6393596310755 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": -0.7035264706814485, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3656060071321528091&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "wisc.edu", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "University of Wisconsin-Madison", "aff_unique_dep": "", "aff_unique_url": "https://www.wisc.edu", "aff_unique_abbr": "UW-Madison", "aff_campus_unique_index": "0", "aff_campus_unique": "Madison", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Reawakening knowledge: Anticipatory recovery from catastrophic interference via structured training", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94697", "id": "YSs1z5udBY", "proceeding": "", "pdf": "https://openreview.net/pdf?id=YSs1z5udBY", "openreview": "https://openreview.net/forum?id=YSs1z5udBY", "poster": "/media/PosterPDFs/NeurIPS%202024/94697.png?t=1733692364.3215024", "project": "", "author_site": "Yanlai Yang, Matt Jones, Michael Mozer, Mengye Ren", "tldr": "", "abstract": "We explore the training dynamics of neural networks in a structured non-IID setting where documents are presented cyclically in a fixed, repeated sequence. Typically, networks suffer from catastrophic interference when training on a sequence of documents; however, we discover a curious and remarkable property of LLMs finetuned sequentially in this setting: they exhibit *anticipatory* behavior, recovering from the forgetting on documents *before* seeing them again. The behavior emerges and becomes more robust as the architecture scales up its number of parameters. Through comprehensive experiments and visualizations, we uncover new insights into training over-parameterized networks in structured environments.", "keywords": "large language models;LLMs;catastrophic interference;online learning;continual learning;anticipatory recovery;cyclic training;structured training sequences", "primary_area": "online_learning", "supplementary_material": "/attachment/bc3451597378e3c76a4f718f5a42ab5201a43d51.zip", "author": "Yanlai Yang;Matt Jones;Michael Curtis Mozer;Mengye Ren", "authorids": "~Yanlai_Yang1;~Matt_Jones1;~Michael_Curtis_Mozer1;~Mengye_Ren1", "gender": "M;M;M;", "homepage": "https://yanlai00.github.io/;http://Matt.Colorado.edu;https://www.cs.colorado.edu/~mozer;http://www.cs.toronto.edu/~mren", "dblp": "255/5784;;m/MichaelCMozer;163/1952", "google_scholar": "QWd5d1oAAAAJ;Q7FDrMIAAAAJ;lmjR_qMAAAAJ;XcQ9WqMAAAAJ", "orcid": ";;;", "linkedin": "yanlai-yang/;;;", "or_profile": "~Yanlai_Yang1;~Matt_Jones1;~Michael_Curtis_Mozer1;~Mengye_Ren1", "aff": "New York University;University of Colorado Boulder;Google DeepMind;New York University", "aff_domain": "nyu.edu;colorado.edu;google.com;nyu.edu", "position": "PhD student;Full Professor;Research Scientist;Assistant Professor", "bibtex": "@inproceedings{\nyang2024reawakening,\ntitle={Reawakening knowledge: Anticipatory recovery from catastrophic interference via structured training},\nauthor={Yanlai Yang and Matt Jones and Michael Curtis Mozer and Mengye Ren},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=YSs1z5udBY}\n}", "github": "", "reviewers": "5Cpf;4gi1;gu9E;dpPf", "pdf_size": 1543813, "rating": "4;5;7;8", "confidence": "4;4;2;4", "soundness": "3;3;3;3", "novelty": "2;1;3;3", "presentation": "3;3;2;3", "wc_summary": "79;110;88;120", "wc_strengths": "64;110;14;152", "wc_weaknesses": "333;224;30;163", "wc_questions": "49;10;232;140", "wc_limitations": "6;1;4;95", "wc_review": "531;455;368;670", "wc_reply_reviewers": "44;249;123;89", "wc_reply_authors": "0;301;17;0", "reply_reviewers": "1;2;1;1", "reply_authors": "1;2;2;1", "rating_avg": [ 6.0, 1.5811388300841898 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 99.25, 16.452583383772897 ], "wc_strengths_avg": [ 85.0, 51.468436929831086 ], "wc_weaknesses_avg": [ 187.5, 109.4406231707404 ], "wc_questions_avg": [ 107.75, 85.85562008395257 ], "wc_limitations_avg": [ 26.5, 39.588508433635134 ], "wc_review_avg": [ 506.0, 110.86703748184128 ], "wc_reply_reviewers_avg": [ 126.25, 76.20818525591592 ], "wc_reply_authors_avg": [ 79.5, 128.07126922147683 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.36514837167011077, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11949710309081471464&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "nyu.edu;colorado.edu;google.com;nyu.edu", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "New York University;University of Colorado;Google", "aff_unique_dep": ";;Google DeepMind", "aff_unique_url": "https://www.nyu.edu;https://www.colorado.edu;https://deepmind.com", "aff_unique_abbr": "NYU;CU;DeepMind", "aff_campus_unique_index": "1", "aff_campus_unique": ";Boulder", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "SpatialPIN: Enhancing Spatial Reasoning Capabilities of Vision-Language Models through Prompting and Interacting 3D Priors", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94696", "id": "YTHJ8O6SCB", "proceeding": "", "pdf": "https://openreview.net/pdf?id=YTHJ8O6SCB", "openreview": "https://openreview.net/forum?id=YTHJ8O6SCB", "poster": "/media/PosterPDFs/NeurIPS%202024/94696.png?t=1731036188.759137", "project": "", "author_site": "Chenyang Ma, Kai Lu, Ta-Ying Cheng, Niki Trigoni, Andrew Markham", "tldr": "", "abstract": "Current state-of-the-art spatial reasoning-enhanced VLMs are trained to excel at spatial visual question answering (VQA). However, we believe that higher-level 3D-aware tasks, such as articulating dynamic scene changes and motion planning, require a fundamental and explicit 3D understanding beyond current spatial VQA datasets. In this work, we present SpatialPIN, a framework designed to enhance the spatial reasoning capabilities of VLMs through prompting and interacting with priors from multiple 3D foundation models in a zero-shot, training-free manner. Extensive experiments demonstrate that our spatial reasoning-imbued VLM performs well on various forms of spatial VQA and can extend to help in various downstream robotics tasks such as pick and stack and trajectory planning.", "keywords": "VLM Spatial Reasoning;Zero-Shot", "primary_area": "machine_vision", "supplementary_material": "", "author": "Chenyang Ma;Kai Lu;Ta-Ying Cheng;Niki Trigoni;Andrew Markham", "authorids": "~Chenyang_Ma1;~Kai_Lu5;~Ta-Ying_Cheng1;~Niki_Trigoni1;~Andrew_Markham2", "gender": "M;M;M;F;M", "homepage": "https://dannymcy.github.io/;https://www.cs.ox.ac.uk/people/kai.lu/;https://ttchengab.github.io;https://www.cs.ox.ac.uk/people/niki.trigoni/;", "dblp": ";;264/7281.html;t/NikiTrigoni;83/7169", "google_scholar": "d4tuNoUAAAAJ;;onX3k7kAAAAJ;;https://scholar.google.co.uk/citations?user=g3JTO9EAAAAJ", "orcid": "0009-0007-0737-3175;;;;", "linkedin": "chenyang-ma-66945091/;;;;", "or_profile": "~Chenyang_Ma1;~Kai_Lu5;~Ta-Ying_Cheng1;~Niki_Trigoni1;~Andrew_Markham2", "aff": "University of Oxford;University of Oxford;University of Oxford;University of Oxford;University of Oxford", "aff_domain": "cs.ox.ac.uk;ox.ac.uk;cs.ox.ac.uk;ox.ac.uk;ox.ac.uk", "position": "PhD student;PhD student;PhD student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nma2024spatialpin,\ntitle={Spatial{PIN}: Enhancing Spatial Reasoning Capabilities of Vision-Language Models through Prompting and Interacting 3D Priors},\nauthor={Chenyang Ma and Kai Lu and Ta-Ying Cheng and Niki Trigoni and Andrew Markham},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=YTHJ8O6SCB}\n}", "github": "", "reviewers": "gF9G;C3yx;K1EW", "pdf_size": 10526835, "rating": "4;6;6", "confidence": "3;4;3", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "2;3;3", "wc_summary": "51;41;106", "wc_strengths": "17;32;61", "wc_weaknesses": "180;64;89", "wc_questions": "2;18;96", "wc_limitations": "1;3;34", "wc_review": "251;158;386", "wc_reply_reviewers": "47;20;19", "wc_reply_authors": "360;18;16", "reply_reviewers": "1;1;1", "reply_authors": "4;2;2", "rating_avg": [ 5.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 66.0, 28.577380332470412 ], "wc_strengths_avg": [ 36.666666666666664, 18.263503375736967 ], "wc_weaknesses_avg": [ 111.0, 49.846430831772366 ], "wc_questions_avg": [ 38.666666666666664, 41.063636251825315 ], "wc_limitations_avg": [ 12.666666666666666, 15.107025591499548 ], "wc_review_avg": [ 265.0, 93.60555539069249 ], "wc_reply_reviewers_avg": [ 28.666666666666668, 12.970050972229146 ], "wc_reply_authors_avg": [ 131.33333333333334, 161.69381215398715 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.6666666666666665, 0.9428090415820634 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.49999999999999983, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6685378750130875659&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "cs.ox.ac.uk;ox.ac.uk;cs.ox.ac.uk;ox.ac.uk;ox.ac.uk", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of Oxford", "aff_unique_dep": "", "aff_unique_url": "https://www.ox.ac.uk", "aff_unique_abbr": "Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Knowledge Circuits in Pretrained Transformers", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94695", "id": "YVXzZNxcag", "proceeding": "", "pdf": "https://openreview.net/pdf?id=YVXzZNxcag", "openreview": "https://openreview.net/forum?id=YVXzZNxcag", "poster": "/media/PosterPDFs/NeurIPS%202024/94695.png?t=1731271165.9639702", "project": "", "author_site": "Yunzhi Yao, Ningyu Zhang, Zekun Xi, Mengru Wang, Ziwen Xu, Shumin Deng, Huajun Chen", "tldr": "", "abstract": "The remarkable capabilities of modern large language models are rooted in their vast repositories of knowledge encoded within their parameters, enabling them to perceive the world and engage in reasoning. The inner workings of how these models store knowledge have long been a subject of intense interest and investigation among researchers. To date, most studies have concentrated on isolated components within these models, such as the Multilayer Perceptrons and attention head. In this paper, we delve into the computation graph of the language model to uncover the knowledge circuits that are instrumental in articulating specific knowledge. The experiments, conducted with GPT2 and TinyLLAMA, has allowed us to observe how certain information heads, relation heads, and Multilayer Perceptrons collaboratively encode knowledge within the model. Moreover, we evaluate the impact of current knowledge editing techniques on these knowledge circuits, providing deeper insights into the functioning and constraints of these editing methodologies. Finally, we utilize knowledge circuits to analyze and interpret language model behaviors such as hallucinations and in-context learning. We believe the knowledge circuit holds potential for advancing our understanding of Transformers and guiding the improved design of knowledge editing.", "keywords": "Knowledge Mechanism;Circuit Theory;Large Language Models", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/c091b7ff47ab6a691659b8d4c97725488a897008.zip", "author": "Yunzhi Yao;Ningyu Zhang;Zekun Xi;Mengru Wang;Ziwen Xu;Shumin Deng;Huajun Chen", "authorids": "~Yunzhi_Yao1;~Ningyu_Zhang1;~Zekun_Xi2;~Mengru_Wang1;~Ziwen_Xu1;~Shumin_Deng1;~Huajun_Chen1", "gender": "M;M;;F;M;F;M", "homepage": "http://yyzcowtodd.cn;https://person.zju.edu.cn/en/ningyu;https://github.com/pillow-xi;;;https://231sm.github.io/;", "dblp": "295/9476;139/4181-1.html;354/8463;59/8499;277/6261;213/1853;94/5089", "google_scholar": "https://scholar.google.com.hk/citations?user=nAagIwEAAAAJ;xQDOPvsAAAAJ;;P3bp0egAAAAJ;5oqIUicAAAAJ;3am3hL4AAAAJ;", "orcid": ";0000-0002-1970-0678;;0000-0002-4488-9871;;;", "linkedin": ";ningyuzhang/;;;;;", "or_profile": "~Yunzhi_Yao1;~Ningyu_Zhang1;~Zekun_Xi2;~Mengru_Wang1;~Ziwen_Xu1;~Shumin_Deng1;~Huajun_Chen1", "aff": "University of California, Los Angeles;Zhejiang University;Zhejiang University;Zhejiang University;University of Electronic Science and Technology of China;National University of Singapore;Zhejiang University", "aff_domain": "ucla.edu;zju.edu.cn;zju.edu.cn;zju.edu.cn;uestc.edu.cn;nus.edu.sg;zju.edu.cn", "position": "Researcher;Associate Professor;MS student;PhD student;Undergrad student;Postdoc;Full Professor", "bibtex": "@inproceedings{\nyao2024knowledge,\ntitle={Knowledge Circuits in Pretrained Transformers},\nauthor={Yunzhi Yao and Ningyu Zhang and Zekun Xi and Mengru Wang and Ziwen Xu and Shumin Deng and Huajun Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=YVXzZNxcag}\n}", "github": "", "reviewers": "8n8x;7LFR;8w5y;jmbU;Njw6", "pdf_size": 6660132, "rating": "5;5;7;8;8", "confidence": "3;4;4;4;5", "soundness": "3;2;4;4;3", "novelty": "2;3;4;4;3", "presentation": "2;3;4;4;3", "wc_summary": "61;232;57;202;66", "wc_strengths": "58;17;70;204;31", "wc_weaknesses": "385;180;29;122;386", "wc_questions": "2;28;4;81;5", "wc_limitations": "1;1;1;23;7", "wc_review": "507;458;161;632;495", "wc_reply_reviewers": "121;145;17;39;23", "wc_reply_authors": "438;699;0;0;0", "reply_reviewers": "1;2;1;1;1", "reply_authors": "2;3;1;1;1", "rating_avg": [ 6.6, 1.3564659966250536 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 3.2, 0.7483314773547882 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 123.6, 76.90149543409412 ], "wc_strengths_avg": [ 76.0, 66.70832032063167 ], "wc_weaknesses_avg": [ 220.4, 143.15390319512773 ], "wc_questions_avg": [ 24.0, 30.033314835362415 ], "wc_limitations_avg": [ 6.6, 8.522910301065007 ], "wc_review_avg": [ 450.6, 156.19295758772225 ], "wc_reply_reviewers_avg": [ 69.0, 53.291650377896914 ], "wc_reply_authors_avg": [ 227.4, 290.47932800803574 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.6993786061802353, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16618728880852046403&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 5, "email": "ucla.edu;zju.edu.cn;zju.edu.cn;zju.edu.cn;uestc.edu.cn;nus.edu.sg;zju.edu.cn", "author_num": 7, "aff_unique_index": "0;1;1;1;2;3;1", "aff_unique_norm": "University of California, Los Angeles;Zhejiang University;University of Electronic Science and Technology of China;National University of Singapore", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.ucla.edu;https://www.zju.edu.cn;https://www.uestc.edu.cn;https://www.nus.edu.sg", "aff_unique_abbr": "UCLA;ZJU;UESTC;NUS", "aff_campus_unique_index": "0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;1;1;1;1;2;1", "aff_country_unique": "United States;China;Singapore" }, { "title": "Transductive Learning is Compact", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94694", "id": "YWTpmLktMj", "proceeding": "", "pdf": "https://openreview.net/pdf?id=YWTpmLktMj", "openreview": "https://openreview.net/forum?id=YWTpmLktMj", "poster": "", "project": "", "author_site": "Julian Asilis, Siddartha Devic, Shaddin Dughmi, Vatsal Sharan, Shang-Hua Teng", "tldr": "", "abstract": "We demonstrate a compactness result holding broadly across supervised learning with a general class of loss functions: Any hypothesis class $\\mathcal{H}$ is learnable with transductive sample complexity $m$ precisely when all of its finite projections are learnable with sample complexity $m$. We prove that this exact form of compactness holds for realizable and agnostic learning with respect to all proper metric loss functions (e.g., any norm on $\\mathbb{R}^d$) and any continuous loss on a compact space (e.g., cross-entropy, squared loss). For realizable learning with improper metric losses, we show that exact compactness of sample complexity can fail, and provide matching upper and lower bounds of a factor of 2 on the extent to which such sample complexities can differ. We conjecture that larger gaps are possible for the agnostic case. Furthermore, invoking the equivalence between sample complexities in the PAC and transductive models (up to lower order factors, in the realizable case) permits us to directly port our results to the PAC model, revealing an almost-exact form of compactness holding broadly in PAC learning.", "keywords": "Sample Complexity;Compactness;One-Inclusion Graphs;Metric Space;Transductive Learning;PAC Learning", "primary_area": "learning_theory", "supplementary_material": "", "author": "Julian Asilis;Siddartha Devic;Shaddin Dughmi;Vatsal Sharan;Shang-Hua Teng", "authorids": "~Julian_Asilis1;~Siddartha_Devic1;~Shaddin_Dughmi1;~Vatsal_Sharan1;~Shang-Hua_Teng1", "gender": "M;;;M;M", "homepage": "https://jasilis.com/;http://sid.devic.us/;;https://vatsalsharan.github.io/;https://viterbi-web.usc.edu/~shanghua/", "dblp": "307/5139;239/8389;;126/2543;t/ShangHuaTeng", "google_scholar": "Cu3RV2UAAAAJ;LVL-kmUAAAAJ;;Ize17HEAAAAJ;JknkZcQAAAAJ", "orcid": ";;;;0000-0001-5011-4514", "linkedin": ";;;;shanghua-teng-a295598", "or_profile": "~Julian_Asilis1;~Siddartha_Devic1;~Shaddin_Dughmi1;~Vatsal_Sharan1;~Shang-Hua_Teng1", "aff": "University of Southern California;Amazon;;University of Southern California;University of Southern California", "aff_domain": "usc.edu;amazon.com;;usc.edu;usc.edu", "position": "PhD student;Intern;;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nasilis2024transductive,\ntitle={Transductive Learning is Compact},\nauthor={Julian Asilis and Siddartha Devic and Shaddin Dughmi and Vatsal Sharan and Shang-Hua Teng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=YWTpmLktMj}\n}", "github": "", "reviewers": "B4fj;7sdt;Rcdh;JpD2", "pdf_size": 388435, "rating": "6;6;6;7", "confidence": "3;3;3;2", "soundness": "3;4;3;3", "novelty": "3;2;2;3", "presentation": "3;4;2;3", "wc_summary": "77;121;171;288", "wc_strengths": "65;15;308;53", "wc_weaknesses": "301;71;416;1", "wc_questions": "3;4;262;396", "wc_limitations": "1;1;158;1", "wc_review": "447;212;1315;739", "wc_reply_reviewers": "12;14;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "1;1;0;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 164.25, 78.80791521160803 ], "wc_strengths_avg": [ 110.25, 115.65330734570456 ], "wc_weaknesses_avg": [ 197.25, 168.12848509398995 ], "wc_questions_avg": [ 166.25, 169.50571524287906 ], "wc_limitations_avg": [ 40.25, 67.98299419707844 ], "wc_review_avg": [ 678.25, 412.31260895102395 ], "wc_reply_reviewers_avg": [ 6.5, 6.5383484153110105 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 0.75, 0.4330127018922193 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2275822464810264645&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "usc.edu;amazon.com;;usc.edu;usc.edu", "author_num": 5, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Southern California;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.usc.edu;https://www.amazon.com", "aff_unique_abbr": "USC;Amazon", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Optimal Top-Two Method for Best Arm Identification and Fluid Analysis", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94693", "id": "YXQW4qQe2U", "proceeding": "", "pdf": "https://openreview.net/pdf?id=YXQW4qQe2U", "openreview": "https://openreview.net/forum?id=YXQW4qQe2U", "poster": "/media/PosterPDFs/NeurIPS%202024/94693.png?t=1733252443.9921393", "project": "", "author_site": "Agniv Bandyopadhyay, Sandeep Juneja, Shubhada Agrawal", "tldr": "", "abstract": "Top-2 methods have become popular in solving the best arm identification (BAI) problem. The best arm, or the arm with the largest mean amongst finitely many, is identified through an algorithm that at any sequential step independently pulls the empirical best arm, with a fixed probability $\\beta$, and pulls the best challenger arm otherwise. The probability of incorrect selection is guaranteed to lie below a specified $\\delta>0$. Information theoretic lower bounds on sample complexity are well known for BAI problem and are matched asymptotically as $\\delta\\to 0$ by computationally demanding plug-in methods. The above top 2 algorithm for any $\\beta\\in(0, 1)$ has sample complexity within a constant of the lower bound. However, determining the optimal \u03b2 that matches the lower bound has proven difficult. In this paper, we address this and propose an optimal top-2 type algorithm. We consider a function of allocations anchored at a threshold. If it exceeds the threshold then the algorithm samples the empirical best arm. Otherwise, it samples the challenger arm. We show that the proposed algorithm is optimal as $\\delta\\to 0$. Our analysis relies on identifying a limiting fluid dynamics of allocations that satisfy a series of ordinary differential equations pasted together and that describe the asymptotic path followed by our algorithm. We rely on the implicit function theorem to show existence and uniqueness of these fluid ode\u2019s and to show that the proposed algorithm remains close to the ode solution.", "keywords": "Stochastic multi-armed bandits;best-arm identification;sequential learning;ranking and selection", "primary_area": "bandits", "supplementary_material": "/attachment/cd6272bfe34658eee20051036dd8081b60e06430.zip", "author": "Agniv Bandyopadhyay;Sandeep Kumar Juneja;Shubhada Agrawal", "authorids": "~Agniv_Bandyopadhyay1;~Sandeep_Kumar_Juneja1;~Shubhada_Agrawal1", "gender": "M;M;F", "homepage": ";http://www.tcs.tifr.res.in/~sandeepj/;https://sites.google.com/view/shubhada-agrawal/home", "dblp": "359/5768;98/5399.html;247/9653", "google_scholar": ";https://scholar.google.co.in/citations?user=Tfgv6VgAAAAJ;RQGMXiYAAAAJ", "orcid": ";;", "linkedin": "agniv-bandyopadhyay99/;sandeep-juneja-937b44b/?originalSubdomain=in;shubhada-agrawal-55561867/", "or_profile": "~Agniv_Bandyopadhyay1;~Sandeep_Kumar_Juneja1;~Shubhada_Agrawal1", "aff": "Tata Institute of Fundamental Research;Tata Institute of Fundamental Research;Georgia Institute of Technology", "aff_domain": "tifr.res.in;tifr.res.edu;gatech.edu", "position": "PhD student;Full Professor;Postdoc", "bibtex": "@inproceedings{\nbandyopadhyay2024optimal,\ntitle={Optimal Top-Two Method for Best Arm Identification and Fluid Analysis},\nauthor={Agniv Bandyopadhyay and Sandeep Kumar Juneja and Shubhada Agrawal},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=YXQW4qQe2U}\n}", "github": "", "reviewers": "mata;jbje;Uo1U;Aqmy;4f2z", "pdf_size": 1091612, "rating": "5;5;6;6;7", "confidence": "4;2;4;3;4", "soundness": "3;2;4;4;3", "novelty": "3;1;4;3;3", "presentation": "3;1;2;3;3", "wc_summary": "52;43;332;152;116", "wc_strengths": "70;39;115;69;137", "wc_weaknesses": "99;84;148;59;361", "wc_questions": "95;22;193;60;165", "wc_limitations": "18;1;1;2;5", "wc_review": "334;189;789;342;784", "wc_reply_reviewers": "17;147;23;43;0", "wc_reply_authors": "0;219;0;11;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "1;2;1;2;1", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 2.8, 0.9797958971132712 ], "presentation_avg": [ 2.4, 0.8 ], "wc_summary_avg": [ 139.0, 104.62504480285779 ], "wc_strengths_avg": [ 86.0, 35.20227265390688 ], "wc_weaknesses_avg": [ 150.2, 109.32776408579845 ], "wc_questions_avg": [ 107.0, 63.777739063093165 ], "wc_limitations_avg": [ 5.4, 6.468384651518492 ], "wc_review_avg": [ 487.6, 250.0596728782952 ], "wc_reply_reviewers_avg": [ 46.0, 52.3373671481476 ], "wc_reply_authors_avg": [ 46.0, 86.60484974872942 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.4677071733467426, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12655096429181880983&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "tifr.res.in;tifr.res.edu;gatech.edu", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Tata Institute of Fundamental Research;Georgia Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.tifr.res.in;https://www.gatech.edu", "aff_unique_abbr": "TIFR;Georgia Tech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "India;United States" }, { "title": "4DBInfer: A 4D Benchmarking Toolbox for Graph-Centric Predictive Modeling on RDBs", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97638", "id": "YXXmIHJQBN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=YXXmIHJQBN", "openreview": "https://openreview.net/forum?id=YXXmIHJQBN", "poster": "", "project": "", "author_site": "Minjie Wang, Quan Gan, David Wipf, Zheng Zhang, Christos Faloutsos, Weinan Zhang, Muhan Zhang, Zhenkun Cai, Jiahang Li, Zunyao Mao, Yakun Song, Jianheng Tang, Yanlin Zhang, Guang Yang, Chuan Lei, Xiao Qin, Ning Li, Han Zhang, Yanbo Wang, Zizhao Zhang", "tldr": "", "abstract": "Given a relational database (RDB), how can we predict missing column values in some target table of interest? Although RDBs store vast amounts of rich, informative data spread across interconnected tables, the progress of predictive machine learning models as applied to such tasks arguably falls well behind advances in other domains such as computer vision or natural language processing. This deficit stems, at least in part, from the lack of established/public RDB benchmarks as needed for training and evaluation purposes. As a result, related model development thus far often defaults to tabular approaches trained on ubiquitous single-table benchmarks, or on the relational side, graph-based alternatives such as GNNs applied to a completely different set of graph datasets devoid of tabular characteristics. To more precisely target RDBs lying at the nexus of these two complementary regimes, we explore a broad class of baseline models predicated on: (i) converting multi-table datasets into graphs using various strategies equipped with efficient subsampling, while preserving tabular characteristics; and (ii) trainable models with well-matched inductive biases that output predictions based on these input subgraphs. Then, to address the dearth of suitable public benchmarks and reduce siloed comparisons, we assemble a diverse collection of (i) large-scale RDB datasets and (ii) coincident predictive tasks. From a delivery standpoint, we operationalize the above four dimensions (4D) of exploration within a unified, scalable open-source toolbox called 4DBInfer; please see https://github.com/awslabs/multi-table-benchmark .", "keywords": "graph neural networks;relational databases;tabular prediction", "primary_area": "", "supplementary_material": "", "author": "Minjie Wang;Quan Gan;David Wipf;Zheng Zhang;Christos Faloutsos;Weinan Zhang;Muhan Zhang;Zhenkun Cai;Jiahang Li;Zunyao Mao;Yakun Song;Jianheng Tang;Yanlin Zhang;Guang Yang;Chuan Lei;Xiao Qin;Ning Li;Han Zhang;Yanbo Wang;Zizhao Zhang", "authorids": "~Minjie_Wang2;~Quan_Gan1;~David_Wipf1;~Zheng_Zhang1;~Christos_Faloutsos1;~Weinan_Zhang1;~Muhan_Zhang1;~Zhenkun_Cai1;~Jiahang_Li1;~Zunyao_Mao1;~Yakun_Song1;~Jianheng_Tang1;~Yanlin_Zhang2;~Guang_Yang13;~Chuan_Lei1;~Xiao_Qin3;~Ning_Li8;~Han_Zhang24;~Yanbo_Wang2;~Zizhao_Zhang4", "gender": ";M;M;M;M;M;M;;M;M;;M;F;M;;M;;;M;", "homepage": ";;http://www.davidwipf.com/;https://shanghai.nyu.edu/academics/faculty/directory/zheng-zhang;https://www.cs.cmu.edu/~christos/;http://wnzhang.net;https://muhanzhang.github.io/;;https://lspongebobjh.github.io/;https://danielmao1.github.io/;https://github.com/Ereboas;https://squareroot3.github.io/;http://ee.fudan.edu.cn/Data/View/776;https://scholar.google.com/citations?user=rCbY4jAAAAAJ&hl=en;;https://web.cs.wpi.edu/~xqin/;https://github.com/NingLi670;https://zhhhhahahaha.github.io/;https://yanxwb.github.io/;", "dblp": "58/10312;72/3872;81/6421;;f/CFaloutsos;28/10261-1;157/5518;;;;;;;;;199/4704-3;14/5410-29;26/4189-57;;", "google_scholar": "OJja8NgAAAAJ;;YJx1WSgAAAAJ;https://scholar.google.com.hk/citations?user=k0KiE4wAAAAJ;nd8lQQIAAAAJ;Qzss0GEAAAAJ;https://scholar.google.com.hk/citations?user=OBBqkosAAAAJ;sOjbP8kAAAAJ;;;;w4kWvXEAAAAJ;;rCbY4jAAAAAJ;;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;Wmyt0f0AAAAJ;Rmo7EXQAAAAJ;", "orcid": "0009-0009-8156-1179;0009-0002-0986-457X;;;0000-0003-2996-9790;0000-0002-0127-2425;0000-0002-7680-6401;;;0009-0006-8516-3723;;0000-0001-9341-7312;;;;;0009-0000-5941-6412;0009-0009-1784-8968;;", "linkedin": ";quan-gan-231992136/;;;christos-faloutsos-43a7aa2/;;jerry-muhan-zhang-a33a1777/;;;;;;;;;;;;;", "or_profile": "~Minjie_Wang2;~Quan_Gan1;~David_Wipf1;~Zheng_Zhang1;~Christos_Faloutsos1;~Weinan_Zhang1;~Muhan_Zhang1;~Zhenkun_Cai1;~Jiahang_Li1;~Zunyao_Mao1;~Yakun_Song1;~Jianheng_Tang1;~Yanlin_Zhang2;~Guang_Yang13;~Chuan_Lei1;~Xiao_Qin3;~Ning_Li8;~Han_Zhang24;~Yanbo_Wang2;~Zizhao_Zhang4", "aff": "Amazon;Amazon;Amazon AI Research Lab;Amazon;Carnegie Mellon University;Shanghai Jiaotong University;Peking University;;Hong Kong Polytechnic University;Southern University of Science and Technology;Shanghai Jiaotong University;Hong Kong University of Science and Technology;Fudan University;Amazon;;Amazon;Shanghai Jiaotong University;Shanghai Jiaotong University;Peking University;", "aff_domain": "amazon.com;amazon.com;amazon.com;amazon.com;cmu.edu;sjtu.edu.cn;pku.edu.cn;;polyu.edu.hk;mail.sustech.edu.cn;sjtu.edu.cn;ust.hk;fudan.edu.cn;amazon.com;;amazon.com;sjtu.edu.cn;sjtu.edu.cn;pku.edu.cn;", "position": "Senior Applied Scientist;Researcher;Principal Research Scientist;Senior Principal Scientist;Full Professor;Associate Professor;Assistant Professor;;MS student;MS student;MS student;PhD student;MS student;Researcher;;Researcher;Undergrad student;Undergrad student;PhD student;", "bibtex": "@inproceedings{\nwang2024dbinfer,\ntitle={4{DBI}nfer: A 4D Benchmarking Toolbox for Graph-Centric Predictive Modeling on {RDB}s},\nauthor={Minjie Wang and Quan Gan and David Wipf and Zheng Zhang and Christos Faloutsos and Weinan Zhang and Muhan Zhang and Zhenkun Cai and Jiahang Li and Zunyao Mao and Yakun Song and Jianheng Tang and Yanlin Zhang and Guang Yang and Chuan Lei and Xiao Qin and Ning Li and Han Zhang and Yanbo Wang and Zizhao Zhang},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=YXXmIHJQBN}\n}", "github": "", "reviewers": "vDKj;RpkB;F71Q;p2fN", "pdf_size": 1211648, "rating": "6;7;7;7", "confidence": "3;3;4;4", "wc_summary_and_contributions": "85;51;104;163", "wc_strengths": "33;67;92;129", "wc_improvement": "14;472;289;93", "wc_limitations": "12;96;4;104", "wc_correctness": "7;32;9;32", "wc_clarity": "4;131;5;20", "wc_relation_to_prior_work": "8;28;8;14", "wc_documentation": "4;23;21;15", "wc_additional_feedback": "1;1;1;1", "wc_review": "168;901;533;571", "wc_reply_reviewers": "11;596;131;0", "wc_reply_authors": "0;858;40;0", "reply_reviewers": "1;4;2;0", "reply_authors": "1;5;2;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "wc_summary_and_contributions_avg": [ 100.75, 40.64710936831794 ], "wc_strengths_avg": [ 80.25, 35.08115591026043 ], "wc_improvement_avg": [ 217.0, 178.04072567814364 ], "wc_limitations_avg": [ 54.0, 46.17358552246078 ], "wc_correctness_avg": [ 20.0, 12.020815280171307 ], "wc_clarity_avg": [ 40.0, 52.91975056630558 ], "wc_relation_to_prior_work_avg": [ 14.5, 8.170067319184096 ], "wc_documentation_avg": [ 15.75, 7.39509972887452 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 543.25, 259.6501251684659 ], "wc_reply_reviewers_avg": [ 184.5, 243.07252004288762 ], "wc_reply_authors_avg": [ 224.5, 366.115760381877 ], "reply_reviewers_avg": [ 1.75, 1.479019945774904 ], "reply_authors_avg": [ 2.25, 1.6393596310755 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 20, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:jc3UV8e7v9gJ:scholar.google.com/&scioq=4DBInfer:+A+4D+Benchmarking+Toolbox+for+Graph-Centric+Predictive+Modeling+on+RDBs&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "amazon.com;amazon.com;amazon.com;amazon.com;cmu.edu;sjtu.edu.cn;pku.edu.cn;;polyu.edu.hk;mail.sustech.edu.cn;sjtu.edu.cn;ust.hk;fudan.edu.cn;amazon.com;;amazon.com;sjtu.edu.cn;sjtu.edu.cn;pku.edu.cn;", "author_num": 20, "aff_unique_index": "0;0;0;0;1;2;3;4;5;2;6;7;0;0;2;2;3", "aff_unique_norm": "Amazon;Carnegie Mellon University;Shanghai Jiao Tong University;Peking University;Hong Kong Polytechnic University;Southern University of Science and Technology;Hong Kong University of Science and Technology;Fudan University", "aff_unique_dep": "Amazon.com, Inc.;;;;;;;", "aff_unique_url": "https://www.amazon.com;https://www.cmu.edu;https://www.sjtu.edu.cn;http://www.pku.edu.cn;https://www.polyu.edu.hk;https://www.sustech.edu.cn;https://www.ust.hk;https://www.fudan.edu.cn", "aff_unique_abbr": "Amazon;CMU;SJTU;Peking U;PolyU;SUSTech;HKUST;Fudan", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;1;1;1;1;1;1;1;0;0;1;1;1", "aff_country_unique": "United States;China" }, { "title": "Fairness without Harm: An Influence-Guided Active Sampling Approach", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94692", "id": "YYJojVBCcd", "proceeding": "", "pdf": "https://openreview.net/pdf?id=YYJojVBCcd", "openreview": "https://openreview.net/forum?id=YYJojVBCcd", "poster": "", "project": "", "author_site": "Jinlong Pang, Jialu Wang, Zhaowei Zhu, Yuanshun Yao, Chen Qian, Yang Liu", "tldr": "", "abstract": "The pursuit of fairness in machine learning (ML), ensuring that the models do not exhibit biases toward protected demographic groups, typically results in a compromise scenario. This compromise can be explained by a Pareto frontier where given certain resources (e.g., data), reducing the fairness violations often comes at the cost of lowering the model accuracy. \nIn this work, we aim to train models that mitigate group fairness disparity without causing harm to model accuracy.\nIntuitively, acquiring more data is a natural and promising approach to achieve this goal by reaching a better Pareto frontier of the fairness-accuracy tradeoff. The current data acquisition methods, such as fair active learning approaches, typically require annotating sensitive attributes. However, these sensitive attribute annotations should be protected due to privacy and safety concerns. In this paper, we propose a tractable active data sampling algorithm that does not rely on training group annotations, instead only requiring group annotations on a small validation set. Specifically, the algorithm first scores each new example by its influence on fairness and accuracy evaluated on the validation dataset, and then selects a certain number of examples for training. \nWe theoretically analyze how acquiring more data can improve fairness without causing harm, and validate the possibility of our sampling approach in the context of risk disparity. We also provide the upper bound of generalization error and risk disparity as well as the corresponding connections.\nExtensive experiments on real-world data demonstrate the effectiveness of our proposed algorithm. Our code is available at [github.com/UCSC-REAL/FairnessWithoutHarm](https://github.com/UCSC-REAL/FairnessWithoutHarm).", "keywords": "Fairness; Sampling", "primary_area": "fairness", "supplementary_material": "", "author": "Jinlong Pang;Jialu Wang;Zhaowei Zhu;Yuanshun Yao;Chen Qian;Yang Liu", "authorids": "~Jinlong_Pang2;~Jialu_Wang1;~Zhaowei_Zhu1;~Yuanshun_Yao2;~Chen_Qian4;~Yang_Liu3", "gender": ";;M;;M;M", "homepage": ";https://people.ucsc.edu/~jwang470/;https://www.zzw.ai;;https://users.soe.ucsc.edu/~qian/;http://www.yliuu.com", "dblp": ";195/2701;202/1712;;;51/3710-18", "google_scholar": ";HOtDeN0AAAAJ;YS8pSQoAAAAJ;;;jKrIVCIAAAAJ", "orcid": ";;0000-0003-3894-5862;;;0000-0001-8420-6011", "linkedin": ";;;;;", "or_profile": "~Jinlong_Pang2;~Jialu_Wang1;~Zhaowei_Zhu1;~Yuanshun_Yao2;~Chen_Qian4;~Yang_Liu3", "aff": ";University of California, Santa Cruz;Docta.ai;;University of California, Santa Cruz;University of California, Santa Cruz", "aff_domain": ";ucsc.edu;docta.ai;;ucsc.edu;ucsc.edu", "position": ";PhD student;Researcher;;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\npang2024fairness,\ntitle={Fairness without Harm: An Influence-Guided Active Sampling Approach},\nauthor={Jinlong Pang and Jialu Wang and Zhaowei Zhu and Yuanshun Yao and Chen Qian and Yang Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=YYJojVBCcd}\n}", "github": "", "reviewers": "cxN7;wZhW;Pisk;KagD", "pdf_size": 730019, "rating": "3;4;5;6", "confidence": "4;3;3;3", "soundness": "2;3;2;3", "novelty": "2;2;2;2", "presentation": "2;2;2;3", "wc_summary": "130;105;69;243", "wc_strengths": "94;98;117;135", "wc_weaknesses": "298;176;655;76", "wc_questions": "6;187;190;72", "wc_limitations": "103;16;10;1", "wc_review": "631;582;1041;527", "wc_reply_reviewers": "0;206;0;0", "wc_reply_authors": "0;475;0;0", "reply_reviewers": "0;1;0;0", "reply_authors": "1;2;1;1", "rating_avg": [ 4.5, 1.118033988749895 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 136.75, 65.06295028662626 ], "wc_strengths_avg": [ 111.0, 16.355427233796124 ], "wc_weaknesses_avg": [ 301.25, 218.84626453289076 ], "wc_questions_avg": [ 113.75, 78.31466976243978 ], "wc_limitations_avg": [ 32.5, 41.051796550212025 ], "wc_review_avg": [ 695.25, 202.98075647706114 ], "wc_reply_reviewers_avg": [ 51.5, 89.20061658979718 ], "wc_reply_authors_avg": [ 118.75, 205.68103339880417 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.7745966692414834, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7370197655978586130&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 5, "email": ";ucsc.edu;docta.ai;;ucsc.edu;ucsc.edu", "author_num": 6, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of California, Santa Cruz;Docta.ai", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucsc.edu;https://docta.ai", "aff_unique_abbr": "UCSC;Docta.ai", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Santa Cruz;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Warm-starting Push-Relabel", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94691", "id": "YYY5lzE547", "proceeding": "", "pdf": "https://openreview.net/pdf?id=YYY5lzE547", "openreview": "https://openreview.net/forum?id=YYY5lzE547", "poster": "/media/PosterPDFs/NeurIPS%202024/94691.png?t=1733450752.951253", "project": "", "author_site": "Sami Davies, Sergei Vassilvitskii, Yuyan Wang", "tldr": "", "abstract": "Push-Relabel is one of the most celebrated network flow algorithms. Maintaining a pre-flow that saturates a cut, it enjoys better theoretical and empirical running time than other flow algorithms, such as Ford-Fulkerson. In practice, Push-Relabel is even faster than what theoretical guarantees can promise, in part because of the use of good heuristics for seeding and updating the iterative algorithm. However, it remains unclear how to run Push-Relabel on an arbitrary initialization that is not necessarily a pre-flow or cut-saturating. We provide the first theoretical guarantees for warm-starting Push-Relabel with a predicted flow, where our learning-augmented version benefits from fast running time when the predicted flow is close to an optimal flow, while maintaining robust worst-case guarantees. Interestingly, our algorithm uses the gap relabeling heuristic, which has long been employed in practice, even though prior to our work there was no rigorous theoretical justification for why it can lead to run-time improvements. We then show our algorithmic framework works well in practice, as our warm-start version of Push-Relabel improves over the cold-start version by a larger and larger percentage as the size of the image increases.", "keywords": "algorithms with predictions;max flow;beyond worst-case analysis", "primary_area": "optimization", "supplementary_material": "/attachment/0b11a0fac209f245b0bdd8a79077e2ef361e8834.zip", "author": "Sami Davies;Sergei Vassilvitskii;Yuyan Wang", "authorids": "~Sami_Davies1;~Sergei_Vassilvitskii2;~Yuyan_Wang1", "gender": "F;;F", "homepage": "http://samidavies.com;http://theory.stanford.edu/~sergei;", "dblp": "223/5708;31/6854.html;", "google_scholar": ";b3HMX-sAAAAJ;JvSO3Q0AAAAJ", "orcid": ";0000-0003-0235-1624;", "linkedin": ";;yuyan-wang-670a55199/", "or_profile": "~Sami_Davies1;~Sergei_Vassilvitskii2;~Yuyan_Wang1", "aff": "Northwestern University;Google;Google", "aff_domain": "northwestern.edu;google.com;google.com", "position": "Postdoc;Scientist;Researcher", "bibtex": "@inproceedings{\ndavies2024warmstarting,\ntitle={Warm-starting Push-Relabel},\nauthor={Sami Davies and Sergei Vassilvitskii and Yuyan Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=YYY5lzE547}\n}", "github": "", "reviewers": "D3Wp;gRPX;oTzZ;C4XM", "pdf_size": 4915131, "rating": "6;6;7;7", "confidence": "2;2;3;2", "soundness": "2;3;2;3", "novelty": "3;3;3;3", "presentation": "2;3;3;4", "wc_summary": "193;65;85;96", "wc_strengths": "61;98;60;55", "wc_weaknesses": "183;81;220;314", "wc_questions": "112;62;87;76", "wc_limitations": "1;3;35;31", "wc_review": "550;309;487;572", "wc_reply_reviewers": "20;12;63;89", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 2.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 109.75, 49.33241834737073 ], "wc_strengths_avg": [ 68.5, 17.18284027743958 ], "wc_weaknesses_avg": [ 199.5, 83.43410573620359 ], "wc_questions_avg": [ 84.25, 18.30812661087966 ], "wc_limitations_avg": [ 17.5, 15.580436450882884 ], "wc_review_avg": [ 479.5, 103.26301370771628 ], "wc_reply_reviewers_avg": [ 46.0, 31.50396800404673 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15765438434788738638&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "northwestern.edu;google.com;google.com", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Northwestern University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.northwestern.edu;https://www.google.com", "aff_unique_abbr": "NU;Google", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Learning Neural Contracting Dynamics: Extended Linearization and Global Guarantees", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94690", "id": "YYnP3Xpv3y", "proceeding": "", "pdf": "https://openreview.net/pdf?id=YYnP3Xpv3y", "openreview": "https://openreview.net/forum?id=YYnP3Xpv3y", "poster": "/media/PosterPDFs/NeurIPS%202024/94690.png?t=1731461767.1411662", "project": "", "author_site": "Sean Jaffe, Alexander Davydov, Deniz Lapsekili, Ambuj K Singh, Francesco Bullo", "tldr": "", "abstract": "Global stability and robustness guarantees in learned dynamical systems are essential to ensure well-behavedness of the systems in the face of uncertainty. We present Extended Linearized Contracting Dynamics (ELCD), the first neural network-based dynamical system with global contractivity guarantees in arbitrary metrics. The key feature of ELCD is a parametrization of the extended linearization of the nonlinear vector field. In its most basic form, ELCD is guaranteed to be (i) globally exponentially stable, (ii) equilibrium contracting, and (iii) globally contracting with respect to some metric. To allow for contraction with respect to more general metrics in the data space, we train diffeomorphisms between the data space and a latent space and enforce contractivity in the latent space, which ensures global contractivity in the data space. We demonstrate the performance of ELCD on the high dimensional LASA, multi-link pendulum, and Rosenbrock datasets.", "keywords": "contraction theory;learning from demonstration;dynamical systems", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/0a47344e6af327ef1056f9d6f8fca598e10e4cd2.zip", "author": "Sean Jaffe;Alexander Davydov;Deniz Lapsekili;Ambuj Singh;Francesco Bullo", "authorids": "~Sean_Jaffe1;~Alexander_Davydov1;~Deniz_Lapsekili1;~Ambuj_Singh1;~Francesco_Bullo1", "gender": "M;M;M;;", "homepage": ";;;;http://motion.me.ucsb.edu", "dblp": ";;;;", "google_scholar": ";3Muba_IAAAAJ;;;", "orcid": ";0000-0001-5629-2565;;;", "linkedin": "sean-jaffe-85b650123/;;deniz-lapsekili-71530a19b/;;", "or_profile": "~Sean_Jaffe1;~Alexander_Davydov1;~Deniz_Lapsekili1;~Ambuj_Singh1;~Francesco_Bullo1", "aff": "University of California, Santa Barbara;UC Santa Barbara;University of California, Santa Barbara;;UC Santa Barbara", "aff_domain": "ucsb.edu;ucsb.edu;ucsb.edu;;ucsb.edu", "position": "PhD student;PhD student;Undergrad student;;Full Professor", "bibtex": "@inproceedings{\njaffe2024learning,\ntitle={Learning Neural Contracting Dynamics: Extended Linearization and Global Guarantees},\nauthor={Sean Jaffe and Alexander Davydov and Deniz Lapsekili and Ambuj Singh and Francesco Bullo},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=YYnP3Xpv3y}\n}", "github": "", "reviewers": "HPgs;UYYA;2hrV;Njwt", "pdf_size": 1849874, "rating": "4;6;7;8", "confidence": "2;2;3;4", "soundness": "3;3;4;4", "novelty": "3;3;3;3", "presentation": "3;3;4;3", "wc_summary": "63;45;65;91", "wc_strengths": "40;25;100;185", "wc_weaknesses": "88;37;292;192", "wc_questions": "23;3;158;161", "wc_limitations": "39;1;84;99", "wc_review": "253;111;699;728", "wc_reply_reviewers": "0;7;0;15", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 1.479019945774904 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 66.0, 16.401219466856727 ], "wc_strengths_avg": [ 87.5, 62.89872812704562 ], "wc_weaknesses_avg": [ 152.25, 98.13351873850239 ], "wc_questions_avg": [ 86.25, 73.5981487538919 ], "wc_limitations_avg": [ 55.75, 38.557586802080856 ], "wc_review_avg": [ 447.75, 270.6449473018109 ], "wc_reply_reviewers_avg": [ 5.5, 6.18465843842649 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.8664002254439633, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5642015170469574222&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "ucsb.edu;ucsb.edu;ucsb.edu;;ucsb.edu", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of California, Santa Barbara", "aff_unique_dep": "", "aff_unique_url": "https://www.ucsb.edu", "aff_unique_abbr": "UCSB", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Santa Barbara", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "YZWvf58dBS", "title": "Why Not Transform Chat Large Language Models to Non-English?", "track": "main", "status": "Reject", "tldr": "", "abstract": "The scarcity of non-English data limits the development of non-English large language models (LLMs). Transforming English-centric LLMs to non-English has been identified as an effective and resource-efficient method. Previous works start from base LLMs and perform knowledge distillation (KD) with data generated by stronger LLMs, e.g. GPT-4. Compared to base LLMs, chat LLMs are further optimized for advanced abilities, e.g. multi-turn conversation and human preference alignment, and thus more powerful in both helpfulness and safety. However, transforming a chat LLM involves two critical issues: (1) How can we effectively transfer advanced abilities without their supervised data? (2) How can we prevent the original knowledge from catastrophic forgetting during transformation? We target these issues by introducing a simple framework called TransLLM. For the first issue, TransLLM divides the transfer problem into some common sub-tasks with the translation chain-of-thought, which uses the translation as the bridge between English and non-English step-by-step. We further enhance the performance of sub-tasks with publicly available data. For the second issue, we propose a method comprising two synergistic components: low-rank adaptation for training to maintain the original LLM parameters, and recovery KD, which utilizes data generated by the chat LLM itself to recover the original knowledge from the frozen parameters. In the experiments, we transform the LLaMA-2-chat-7B to the Thai language. Our method, using only single-turn data, outperforms strong baselines and ChatGPT on multi-turn benchmark MT-bench. Furthermore, our method, without safety data, rejects more harmful queries of safety benchmark AdvBench than both ChatGPT and GPT-4.", "keywords": "Large Language Model; Knowledge Transfer; Catastrophic Forgetting; Multi-turn Conversation; Human Preference", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/8bdddc34b5129f79c96dcace5a6214d9c0bc2a6e.zip", "author": "Xiang Geng;Ming Zhu;Jiahuan Li;Zhejian Lai;Wei Zou;Shuaijie She;Jiaxin GUO;Xiaofeng Zhao;Yinglu Li;Yuang Li;Chang Su;Yanqing Zhao;Min Zhang;Hao Yang;Xinglin Lyu;Jiajun Chen;Shujian Huang", "authorids": "~Xiang_Geng1;~Ming_Zhu6;~Jiahuan_Li1;~Zhejian_Lai1;~Wei_Zou3;~Shuaijie_She1;~Jiaxin_GUO1;~Xiaofeng_Zhao1;~Yinglu_Li1;~Yuang_Li1;~Chang_Su4;~Yanqing_Zhao1;~Min_Zhang10;~Hao_Yang7;~Xinglin_Lyu1;~Jiajun_Chen1;~Shujian_Huang1", "gender": "M;M;M;M;M;M;M;;F;M;M;F;M;M;M;M;M", "homepage": ";;;https://scholar.google.com/citations?user=yBQHI1cAAAAJ&hl=zh-CN;;https://ricardokevins.github.io/;;https://github.com/zhaoxf4;;;;;;https://github.com/yanghaocsg;;https://cs.nju.edu.cn/chenjiajun/index_en.htm;http://nlp.nju.edu.cn/huangsj/", "dblp": "222/7968;https://dblp.org/rec/conf/rcar/ZhuLSY20;199/0384;;;335/2500;189/7455;;;;;;83/5342-10.html;54/4089-7;305/6466;;57/8451", "google_scholar": "n6QnFS0AAAAJ;;SlfxG9IAAAAJ;yBQHI1cAAAAJ;SYqASYcAAAAJ;https://scholar.google.com.hk/citations?user=Lvvr-lIAAAAJ;RLPmDoUAAAAJ;https://scholar.google.com/citations?hl=zh-CN;;_g5YaDYAAAAJ;https://scholar.google.com/citations?hl=zh-CN;;oVvAyCUAAAAJ;lOsjM5sAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=WIF7VaoAAAAJ;HF3-E9kAAAAJ", "orcid": ";;;;;0000-0002-5500-9251;;;0000-0002-1643-9830;;;0009-0005-5738-7490;0000-0002-9624-6851;0000-0001-8861-7010;0000-0003-1971-6618;;", "linkedin": ";;;;;;;;;;;;;;;;", "or_profile": "~Xiang_Geng1;~Ming_Zhu6;~Jiahuan_Li1;~Zhejian_Lai1;~Wei_Zou3;~Shuaijie_She1;~Jiaxin_GUO1;~Xiaofeng_Zhao1;~Yinglu_Li1;~Yuang_Li1;~Chang_Su4;~Yanqing_Zhao1;~Min_Zhang10;~Hao_Yang7;~Xinglin_Lyu1;~Jiajun_Chen1;~Shujian_Huang1", "aff": "Nanjing University;Huawei Technologies Ltd.;Nanjing University;Nanjing University;Nanjing University;Nanjing University;Huawei Technologies Ltd.;Huawei Technologies Ltd.;Huawei Technologies Ltd.;Huawei Technologies Ltd.;Huawei Technologies Ltd.;Huawei Technologies Ltd.;Huawei Technologies Ltd.;Huawei Technologies Ltd.;Soochow University;Nanjing University;Nanjing University", "aff_domain": "nju.edu.cn;huawei.com;nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn;huawei.com;huawei.com;huawei.com;huawei.com;huawei.com;huawei.com;huawei.com;huawei.com;suda.edu.cn;nju.edu.cn;nju.edu.cn", "position": "PhD student;Researcher;PhD student;MS student;PhD student;PhD student;Researcher;Researcher;Researcher;Researcher;Researcher;Researcher;Researcher;Principal Researcher;PhD student;Full Professor;Full Professor", "bibtex": "@misc{\nanonymous2024why,\ntitle={Why Not Transform Chat Large Language Models to Non-English?},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=YZWvf58dBS}\n}", "github": "", "project": "", "reviewers": "gAEj;aw9R;vx95;J14C", "site": "https://openreview.net/forum?id=YZWvf58dBS", "pdf_size": 1520400, "rating": "3;4;5;5", "confidence": "4;4;4;3", "soundness": "2;2;3;3", "novelty": "1;2;2;2", "presentation": "2;2;2;3", "wc_summary": "122;102;110;163", "wc_strengths": "17;31;76;122", "wc_weaknesses": "47;218;93;321", "wc_questions": "23;3;126;89", "wc_limitations": "5;3;1;8", "wc_review": "214;357;406;703", "wc_reply_reviewers": "0;0;0;74", "wc_reply_authors": "25;25;25;128", "reply_reviewers": "0;0;0;1", "reply_authors": "2;2;2;2", "rating_avg": [ 4.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 124.25, 23.47738273317535 ], "wc_strengths_avg": [ 61.5, 41.17341375208036 ], "wc_weaknesses_avg": [ 169.75, 107.42759189333064 ], "wc_questions_avg": [ 60.25, 49.53471005264894 ], "wc_limitations_avg": [ 4.25, 2.5860201081971503 ], "wc_review_avg": [ 420.0, 177.96769369747983 ], "wc_reply_reviewers_avg": [ 18.5, 32.04293994002423 ], "wc_reply_authors_avg": [ 50.75, 44.60030829489859 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 17, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=280815189360084341&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;0;0;0;0;1;1;1;1;1;1;1;1;2;0;0", "aff_unique_norm": "Nanjing University;Huawei;Soochow University", "aff_unique_dep": ";Huawei Technologies;", "aff_unique_url": "https://www.nju.edu.cn;https://www.huawei.com;https://www.soochow.edu.cn", "aff_unique_abbr": "Nanjing U;Huawei;Soochow U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "YZoGmJrOS9", "title": "Can Custom Models Learn In-Context? An Exploration of Hybrid Architecture Performance on In-Context Learning Tasks", "track": "main", "status": "Reject", "tldr": "", "abstract": "In-Context Learning (ICL) is a phenomenon where task learning occurs through a prompt sequence without the necessity of parameter updates. ICL in Multi-Headed Attention (MHA) with absolute positional embedding has been the focus of more study than other sequence model varieties. We examine implications of architectural differences between GPT-2 and LLaMa as well as Llama and Mamba. We extend work done by Garg et al. (2022) and Park et al. (2024) to GPT-2/LLaMa hybrid and LLaMa/Mamba hybrid models -- examining the interplay between sequence transformation blocks and regressive performance in-context. We note that certain architectural changes cause degraded training efficiency/ICL accuracy by converging to suboptimal predictors or converging slower. We also find certain hybrids showing optimistic performance improvements, informing potential future ICL-focused architecture modifications. Additionally, we propose the \"ICL regression score\", a scalar metric describing a model's whole performance on a specific task. Compute limitations impose restrictions on our architecture-space, training duration, number of training runs, function class complexity, and benchmark complexity. To foster reproducible and extensible research, we provide a typed, modular, and extensible Python package on which we run all experiments. This code is available at \\url{https://github.com/anonymousforneurips64/neurips2024-submission21757}.", "keywords": "In Context Learning;Architecture;Hybrid Models;Transformers;Mamba;Attention;Toy Models;Benchmarking", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Ryan Campbell;Nelson Lojo;Kesava Viswanadha;Christoffer Gr\u00f8ndal Tryggestad;Derrick Sun;Sriteja Vijapurapu;August Rolfsen;Anant Sahai", "authorids": "~Ryan_Campbell3;~Nelson_Lojo1;~Kesava_Viswanadha1;~Christoffer_Gr\u00f8ndal_Tryggestad1;~Derrick_Sun1;~Sriteja_Vijapurapu1;~August_Rolfsen1;~Anant_Sahai1", "gender": "M;M;M;M;M;;M;M", "homepage": "https://www.ryanlearns.com/;;;;https://derrickhsun.github.io/;;;https://www2.eecs.berkeley.edu/Faculty/Homepages/sahai.html", "dblp": ";;;;;;;50/2194.html", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?authuser=1;;;;;;https://scholar.google.com.tw/citations?user=4gWt4fgAAAAJ", "orcid": "0009-0006-3933-769X;0009-0002-7624-9323;;;;;;0000-0001-9263-7719", "linkedin": "ryancampbell24;nelson-lojo;kesava-v-3899b51a6/;christoffer-tryggestad-355329222/;derrick-sun-052130228/;sritejavij;august-rolfsen-b31a40309?trk=contact-info;", "or_profile": "~Ryan_Campbell3;~Nelson_Lojo1;~Kesava_Viswanadha1;~Christoffer_Gr\u00f8ndal_Tryggestad1;~Derrick_Sun1;~Sriteja_Vijapurapu1;~August_Rolfsen1;~Anant_Sahai1", "aff": "University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;Norwegian University of Science and Technology;University of California, Berkeley;University of California, Berkeley;Norwegian University of Science and Technology;University of California, Berkeley", "aff_domain": "berkeley.edu;berkeley.edu;berkeley.edu;ntnu.no;berkeley.edu;berkeley.edu;ntnu.no;berkeley.edu", "position": "Undergrad student;Undergrad student;Undergrad student;MS student;Undergrad student;Undergrad student;MS student;Full Professor", "bibtex": "@misc{\nanonymous2024can,\ntitle={Can Custom Models Learn In-Context? An Exploration of Hybrid Architecture Performance on In-Context Learning Tasks},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=YZoGmJrOS9}\n}", "github": "", "project": "", "reviewers": "B8PQ;y95k;q9Lv;xju7", "site": "https://openreview.net/forum?id=YZoGmJrOS9", "pdf_size": 4227590, "rating": "3;4;4;4", "confidence": "3;4;5;3", "soundness": "2;3;2;3", "novelty": "2;2;2;1", "presentation": "2;1;3;2", "wc_summary": "44;214;42;193", "wc_strengths": "30;151;66;66", "wc_weaknesses": "311;346;99;321", "wc_questions": "135;2;19;517", "wc_limitations": "9;10;2;48", "wc_review": "529;723;228;1145", "wc_reply_reviewers": "53;59;0;59", "wc_reply_authors": "487;244;281;1304", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;2", "rating_avg": [ 3.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 123.25, 80.59582805579952 ], "wc_strengths_avg": [ 78.25, 44.499297747267875 ], "wc_weaknesses_avg": [ 269.25, 99.11703940292001 ], "wc_questions_avg": [ 168.25, 207.75391091385018 ], "wc_limitations_avg": [ 17.25, 18.019087102292392 ], "wc_review_avg": [ 656.25, 332.7622086415463 ], "wc_reply_reviewers_avg": [ 42.75, 24.80297361204902 ], "wc_reply_authors_avg": [ 579.0, 428.69511310487314 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:84H2nJ-Cej0J:scholar.google.com/&scioq=Can+Custom+Models+Learn+In-Context%3F+An+Exploration+of+Hybrid+Architecture+Performance+on+In-Context+Learning+Tasks&hl=en&as_sdt=0,33", "gs_version_total": 3, "aff_unique_index": "0;0;0;1;0;0;1;0", "aff_unique_norm": "University of California, Berkeley;Norwegian University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.berkeley.edu;https://www.ntnu.no", "aff_unique_abbr": "UC Berkeley;NTNU", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0;0;1;0;0;1;0", "aff_country_unique": "United States;Norway" }, { "title": "Mitigating Partial Observability in Sequential Decision Processes via the Lambda Discrepancy", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94689", "id": "YaPhvbGqwO", "proceeding": "", "pdf": "https://openreview.net/pdf?id=YaPhvbGqwO", "openreview": "https://openreview.net/forum?id=YaPhvbGqwO", "poster": "/media/PosterPDFs/NeurIPS%202024/94689.png?t=1731712574.4350452", "project": "", "author_site": "Cameron Allen, Aaron Kirtland, Ruo Yu Tao, Sam Lobel, Daniel Scott, Nicholas Petrocelli, Omer Gottesman, Ronald Parr, Michael Littman, George Konidaris", "tldr": "", "abstract": "Reinforcement learning algorithms typically rely on the assumption that the environment dynamics and value function can be expressed in terms of a Markovian state representation. However, when state information is only partially observable, how can an agent learn such a state representation, and how can it detect when it has found one? We introduce a metric that can accomplish both objectives, without requiring access to---or knowledge of---an underlying, unobservable state space. Our metric, the \u03bb-discrepancy, is the difference between two distinct temporal difference (TD) value estimates, each computed using TD(\u03bb) with a different value of \u03bb. Since TD(\u03bb=0) makes an implicit Markov assumption and TD(\u03bb=1) does not, a discrepancy between these estimates is a potential indicator of a non-Markovian state representation. Indeed, we prove that the \u03bb-discrepancy is exactly zero for all Markov decision processes and almost always non-zero for a broad class of partially observable environments. We also demonstrate empirically that, once detected, minimizing the \u03bb-discrepancy can help with learning a memory function to mitigate the corresponding partial observability. We then train a reinforcement learning agent that simultaneously constructs two recurrent value networks with different \u03bb parameters and minimizes the difference between them as an auxiliary loss. The approach scales to challenging partially observable domains, where the resulting agent frequently performs significantly better (and never performs worse) than a baseline recurrent agent with only a single value network.", "keywords": "reinforcement learning;partial observability;value estimation;memory", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Cameron Allen;Aaron T. Kirtland;Ruo Yu Tao;Sam Lobel;Daniel Scott;Nicholas Petrocelli;Omer Gottesman;Ronald Parr;Michael Littman;George Konidaris", "authorids": "~Cameron_Allen1;~Aaron_T._Kirtland1;~Ruo_Yu_Tao1;~Sam_Lobel1;~Daniel_Scott1;~Nicholas_Petrocelli1;~Omer_Gottesman1;~Ronald_Parr1;~Michael_Littman1;~George_Konidaris1", "gender": ";;M;M;;M;M;Not Specified;M;M", "homepage": ";;http://taodav.cc/;https://samlobel.github.io/;https://dsctt.github.io/;;https://omergott.github.io/;https://users.cs.duke.edu/~parr/;http://www.cs.brown.edu/~mlittman;http://cs.brown.edu/people/gdk/", "dblp": ";;;242/8872;;;;26/4670;http://dblp.uni-trier.de/pers/hd/l/Littman:Michael_L=;56/6762", "google_scholar": ";;https://scholar.google.ca/citations?user=71xWQKZSflwC;;;;glNJx5zYUbsC;https://scholar.google.com.tw/citations?user=b-GJ3QIAAAAJ;Jj00ksMAAAAJ;9UERvVEAAAAJ", "orcid": ";;;;;;;;0000-0002-5596-1840;", "linkedin": ";;taodav/;;;nicholasrp/;;;michael-littman-b26351/;", "or_profile": "~Cameron_Allen1;~Aaron_T._Kirtland1;~Ruo_Yu_Tao1;~Sam_Lobel1;~Daniel_Scott1;~Nicholas_Petrocelli1;~Omer_Gottesman1;~Ronald_Parr1;~Michael_Littman1;~George_Konidaris1", "aff": ";;Brown University;Brown University;;;Amazon;Duke University;Georgia Institute of Technology;Brown University", "aff_domain": ";;brown.edu;brown.edu;;;amazon.com;duke.edu;gatech.edu;brown.edu", "position": ";;PhD student;PhD student;;;Researcher;Full Professor;Adjunct;Assistant Professor", "bibtex": "@inproceedings{\nallen2024mitigating,\ntitle={Mitigating Partial Observability in Decision Processes via the Lambda Discrepancy},\nauthor={Cameron Allen and Aaron T. Kirtland and Ruo Yu Tao and Sam Lobel and Daniel Scott and Nicholas Petrocelli and Omer Gottesman and Ronald Parr and Michael Littman and George Konidaris},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=YaPhvbGqwO}\n}", "github": "", "reviewers": "rpiT;AeCz;8yd3;pbjp", "pdf_size": 4900410, "rating": "5;6;7;7", "confidence": "3;3;4;3", "soundness": "2;3;3;3", "novelty": "2;2;4;4", "presentation": "3;3;3;3", "wc_summary": "160;86;100;134", "wc_strengths": "33;91;134;119", "wc_weaknesses": "46;111;702;253", "wc_questions": "318;67;4;126", "wc_limitations": "15;10;1;84", "wc_review": "572;365;941;716", "wc_reply_reviewers": "114;64;291;162", "wc_reply_authors": "0;0;236;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;2;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 1.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 120.0, 28.948229652260256 ], "wc_strengths_avg": [ 94.25, 38.58351331851468 ], "wc_weaknesses_avg": [ 278.0, 255.98535114338085 ], "wc_questions_avg": [ 128.75, 117.47207114884797 ], "wc_limitations_avg": [ 27.5, 33.0037876614185 ], "wc_review_avg": [ 648.5, 209.96249665118768 ], "wc_reply_reviewers_avg": [ 157.75, 84.37527777732053 ], "wc_reply_authors_avg": [ 59.0, 102.19099764656376 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4187954102412824933&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 12, "email": ";;brown.edu;brown.edu;;;amazon.com;duke.edu;gatech.edu;brown.edu", "author_num": 10, "aff_unique_index": "0;0;1;2;3;0", "aff_unique_norm": "Brown University;Amazon;Duke University;Georgia Institute of Technology", "aff_unique_dep": ";Amazon.com, Inc.;;", "aff_unique_url": "https://www.brown.edu;https://www.amazon.com;https://www.duke.edu;https://www.gatech.edu", "aff_unique_abbr": "Brown;Amazon;Duke;Georgia Tech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "YagfTP3RK6", "title": "Safetywashing: Do AI Safety Benchmarks Actually Measure Safety Progress?", "track": "Datasets & Benchmarks", "status": "Poster", "tldr": "", "abstract": "Performance on popular ML benchmarks is highly correlated with model scale, suggesting that most benchmarks tend to measure a similar underlying factor of general model capabilities. However, substantial research effort remains devoted to designing new benchmarks, many of which claim to measure novel phenomena. In the spirit of the Bitter Lesson, we leverage spectral analysis to measure an underlying capabilities component, the direction in benchmark-performance-space which explains most variation in model performance. In an extensive analysis of existing safety benchmarks, we find that variance in model performance on many safety benchmarks is largely explained by the capabilities component. In response, we argue that safety research should prioritize metrics which are not highly correlated with scale. Our work provides a lens to analyze both novel safety benchmarks and novel safety methods, which we hope will enable future work to make differential progress on safety.", "keywords": "Safety;meta-analysis;benchmark;capabilities", "primary_area": "", "supplementary_material": "", "author": "Richard Ren;Steven Basart;Adam Khoja;Alice Gatti;Long Phan;Xuwang Yin;Mantas Mazeika;Alexander Pan;Gabriel Mukobi;Ryan Hwang Kim;Stephen Fitz;Dan Hendrycks", "authorids": "~Richard_Ren1;~Steven_Basart1;~Adam_Khoja1;~Alice_Gatti1;~Long_Phan1;~Xuwang_Yin2;~Mantas_Mazeika3;~Alexander_Pan1;~Gabriel_Mukobi1;~Ryan_Hwang_Kim1;~Stephen_Fitz1;~Dan_Hendrycks1", "gender": "M;M;M;F;M;M;M;M;M;M;;", "homepage": "https://notrichardren.github.io/;http://stevenbas.art;;https://github.com/alga-hopf;;https://xuwangyin.github.io/;https://github.com/mmazeika;https://aypan17.github.io;https://gabrielmukobi.com/;;;", "dblp": "335/8195;245/2547;;289/7531;;125/2311;215/4447;304/3394;;;;182/2504", "google_scholar": "o-Vl80UAAAAJ;MzKvJhAAAAAJ;;Wm-EioQAAAAJ;fVRQn4wAAAAJ;c425B6UAAAAJ;;PaltSA0AAAAJ;;https://scholar.google.com/citations?view_op=list_works;;", "orcid": "0000-0001-5628-7926;;;0000-0001-5692-3996;;;;;;;;", "linkedin": "richard-ren-tech;xksteven/;adam-khoja-103/;gattialice/;long-phan-3110/;;;alexander-pan-0567a2102/;gabrielmukobi/;ryanhwangkim/;;", "or_profile": "~Richard_Ren1;~Steven_Basart1;~Adam_Khoja1;~Alice_Gatti1;~Long_Phan1;~Xuwang_Yin2;~Mantas_Mazeika3;~Alexander_Pan1;~Gabriel_Mukobi1;~Ryan_Hwang_Kim1;~Stephen_Fitz1;~Dan_Hendrycks1", "aff": "The Wharton School;Center for AI Safety ;University of California, Berkeley;Center for AI Safety;Center for AI Safety;Center for AI Safety;University of Illinois, Urbana-Champaign;University of California, Berkeley;Computer Science Department, Stanford University;Yale University;;Center for AI Safety", "aff_domain": "wharton.upenn.edu;safe.ai;berkeley.edu;safe.ai;safe.ai;safe.ai;uiuc.edu;berkeley.edu;cs.stanford.edu;yale.edu;;safe.ai", "position": "Undergrad student;Researcher;Undergrad student;Researcher;Research Engineer;Researcher;PhD student;PhD student;MS student;Undergrad student;;Executive and Research Director", "bibtex": "@inproceedings{\nren2024safetywashing,\ntitle={Safetywashing: Do {AI} Safety Benchmarks Actually Measure Safety Progress?},\nauthor={Richard Ren and Steven Basart and Adam Khoja and Alice Gatti and Long Phan and Xuwang Yin and Mantas Mazeika and Alexander Pan and Gabriel Mukobi and Ryan Hwang Kim and Stephen Fitz and Dan Hendrycks},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=YagfTP3RK6}\n}", "github": "", "project": "", "reviewers": "6eSn;iAJj;DMFp;VKj4", "site": "https://openreview.net/forum?id=YagfTP3RK6", "pdf_size": 656745, "rating": "6;6;7;7", "confidence": "4;3;5;4", "wc_summary_and_contributions": "71;97;131;246", "wc_strengths": "39;33;38;97", "wc_improvement": "401;222;206;52", "wc_limitations": "18;23;37;190", "wc_correctness": "19;44;14;41", "wc_clarity": "14;1;131;1", "wc_relation_to_prior_work": "1;7;9;1", "wc_documentation": "1;2;1;10", "wc_additional_feedback": "1;1;1;1", "wc_review": "565;430;568;639", "wc_reply_reviewers": "19;23;49;0", "wc_reply_authors": "36;36;36;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;3;3;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 136.25, 66.84076226375639 ], "wc_strengths_avg": [ 51.75, 26.223796445213647 ], "wc_improvement_avg": [ 220.25, 123.67775669052216 ], "wc_limitations_avg": [ 67.0, 71.35474756454542 ], "wc_correctness_avg": [ 29.5, 13.162446581088183 ], "wc_clarity_avg": [ 36.75, 54.67346248409735 ], "wc_relation_to_prior_work_avg": [ 4.5, 3.570714214271425 ], "wc_documentation_avg": [ 3.5, 3.774917217635375 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 550.5, 75.61249896677135 ], "wc_reply_reviewers_avg": [ 22.75, 17.469616481193857 ], "wc_reply_authors_avg": [ 27.0, 15.588457268119896 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": 0.7071067811865475, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10811389902768550314&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;2;1;1;1;3;2;4;5;1", "aff_unique_norm": "Wharton School;Center for AI Safety;University of California, Berkeley;University of Illinois;Stanford University;Yale University", "aff_unique_dep": ";;;;Computer Science Department;", "aff_unique_url": "https://www.wharton.upenn.edu;https://www.centerforaisafety.org;https://www.berkeley.edu;https://illinois.edu;https://www.stanford.edu;https://www.yale.edu", "aff_unique_abbr": "Wharton;;UC Berkeley;UIUC;Stanford;Yale", "aff_campus_unique_index": "1;2;1;3", "aff_campus_unique": ";Berkeley;Urbana-Champaign;Stanford", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "A Full-duplex Speech Dialogue Scheme Based On Large Language Model", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94688", "id": "YawXY6mWiK", "proceeding": "", "pdf": "https://openreview.net/pdf?id=YawXY6mWiK", "openreview": "https://openreview.net/forum?id=YawXY6mWiK", "poster": "/media/PosterPDFs/NeurIPS%202024/94688.png?t=1731758019.011601", "project": "", "author_site": "Peng Wang, Songshuo Lu, Yaohua Tang, Sijie Yan, Wei Xia, Yuanjun Xiong", "tldr": "", "abstract": "We present a generative dialogue system capable of operating in a full-duplex manner, allowing for seamless interaction. It is based on a large language model (LLM) carefully aligned to be aware of a perception module, a motor function module, and the concept of a simple finite state machine (called neural FSM) with two states. The perception and motor function modules operate in tandem, allowing the system to speak and listen to the user simultaneously. The LLM generates textual tokens for inquiry responses and makes autonomous decisions to start responding to, wait for, or interrupt the user by emitting control tokens to the neural FSM. All these tasks of the LLM are carried out as next token prediction on a serialized view of the dialogue in real-time. In automatic quality evaluations simulating real-life interaction, the proposed system reduces the average conversation response latency by more than threefold compared with LLM-based half-duplex dialogue systems while responding within less than 500 milliseconds in more than 50% of evaluated interactions. Running an LLM with only 8 billion parameters, our system exhibits an 8% higher interruption precision rate than the best available commercial LLM for voice-based dialogue.", "keywords": "Speech based conversation; large language models; full duplex; instruction tuning", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/a92bb779e0d30e3ad9573f0a6c1987dfdcb6db9d.zip", "author": "Peng Wang;Songshuo Lu;Yaohua Tang;Sijie Yan;Wei Xia;Yuanjun Xiong", "authorids": "~Peng_Wang28;~Songshuo_Lu1;~Yaohua_Tang1;~Sijie_Yan2;~Wei_Xia6;~Yuanjun_Xiong3", "gender": "M;M;M;M;M;M", "homepage": ";;;;http://yjxiong.me/;", "dblp": ";;;;142/2644;12/4454.html", "google_scholar": "vLN6gsMAAAAJ;q0kltdsAAAAJ;https://scholar.google.com.hk/citations?user=tAgSyxIAAAAJ;OCdJxC8AAAAJ;ojKsx6AAAAAJ;", "orcid": ";;;0009-0004-1073-1533;;", "linkedin": ";;;wei-xia/;;https://www.linkedin.cn/incareer/in/yaohua-tang-75651567", "or_profile": "~Peng_Wang28;~Songshuo_Lu1;~Sijie_Yan2;~Wei_Xia6;~Yuanjun_Xiong3;~Yaohua_Tang2", "aff": "Zhejiang University;Moore Threads Technology Co. Ltd;MT;Amazon;Moore Threads Inc.;Moore Threads AI", "aff_domain": "zju.edu.cn;mthreads.com;mthreads.com;amazon.com;moorethreads.com;mthreads.com", "position": "MS student;Researcher;Researcher;Sr. Principal Scientist;Principal Researcher;Researcher", "bibtex": "@inproceedings{\nwang2024a,\ntitle={A Full-duplex Speech Dialogue Scheme Based On Large Language Model},\nauthor={Peng Wang and Songshuo Lu and Yaohua Tang and Sijie Yan and Wei Xia and Yuanjun Xiong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=YawXY6mWiK}\n}", "github": "", "reviewers": "9Sdg;hGxq;Haqd", "pdf_size": 406499, "rating": "6;8;8", "confidence": "2;4;4", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;2;3", "wc_summary": "61;92;78", "wc_strengths": "43;49;70", "wc_weaknesses": "87;245;139", "wc_questions": "40;67;172", "wc_limitations": "1;1;32", "wc_review": "232;454;491", "wc_reply_reviewers": "41;61;0", "wc_reply_authors": "15;11;0", "reply_reviewers": "1;1;0", "reply_authors": "2;2;1", "rating_avg": [ 7.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 77.0, 12.675435561221029 ], "wc_strengths_avg": [ 54.0, 11.575836902790225 ], "wc_weaknesses_avg": [ 157.0, 65.74698979167539 ], "wc_questions_avg": [ 93.0, 56.938563381947034 ], "wc_limitations_avg": [ 11.333333333333334, 14.613540144521982 ], "wc_review_avg": [ 392.3333333333333, 114.37462811111368 ], "wc_reply_reviewers_avg": [ 34.0, 25.39028685672272 ], "wc_reply_authors_avg": [ 8.666666666666666, 6.342099196813483 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10014566503047755341&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 4, "email": "zju.edu.cn;mthreads.com;mthreads.com;amazon.com;moorethreads.com;mthreads.com", "author_num": 6, "aff_unique_index": "0;1;3;4;5", "aff_unique_norm": "Zhejiang University;Moore Threads Technology Co. Ltd;;Amazon;Moore Threads Inc.;Moore Threads", "aff_unique_dep": ";;;Amazon.com, Inc.;;AI", "aff_unique_url": "https://www.zju.edu.cn;https://www.moorethreads.com;;https://www.amazon.com;https://www.moorethreads.com;https://www.moorethreads.com", "aff_unique_abbr": "ZJU;;;Amazon;MTI;Moore Threads", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;2;0;0", "aff_country_unique": "China;;United States" }, { "title": "VidMan: Exploiting Implicit Dynamics from Video Diffusion Model for Effective Robot Manipulation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94687", "id": "YbhHz0X2j5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=YbhHz0X2j5", "openreview": "https://openreview.net/forum?id=YbhHz0X2j5", "poster": "/media/PosterPDFs/NeurIPS%202024/94687.png?t=1730970602.9396963", "project": "", "author_site": "Youpeng Wen, Junfan Lin, Yi Zhu, Jianhua Han, Hang Xu, Shen Zhao, Xiaodan Liang", "tldr": "", "abstract": "Recent advancements utilizing large-scale video data for learning video generation models demonstrate significant potential in understanding complex physical dynamics. It suggests the feasibility of leveraging diverse robot trajectory data to develop a unified, dynamics-aware model to enhance robot manipulation. However, given the relatively small amount of available robot data, directly fitting data without considering the relationship between visual observations and actions could lead to suboptimal data utilization. To this end, we propose \\textbf{VidMan} (\\textbf{Vid}eo Diffusion for Robot \\textbf{Man}ipulation), a novel framework that employs a two-stage training mechanism inspired by dual-process theory from neuroscience to enhance stability and improve data utilization efficiency. Specifically, in the first stage, VidMan is pre-trained on the Open X-Embodiment dataset (OXE) for predicting future visual trajectories in a video denoising diffusion manner, enabling the model to develop a long horizontal awareness of the environment's dynamics. In the second stage, a flexible yet effective layer-wise self-attention adapter is introduced to transform VidMan into an efficient inverse dynamics model that predicts action modulated by the implicit dynamics knowledge via parameter sharing. Our VidMan framework outperforms state-of-the-art baseline model GR-1 on the CALVIN benchmark, achieving a 11.7\\% relative improvement, and demonstrates over 9\\% precision gains on the OXE small-scale dataset. These results provide compelling evidence that world models can significantly enhance the precision of robot action prediction. Codes and models will be public.", "keywords": "Imitation learning;Video prediction;Robot Manipulation", "primary_area": "robotics", "supplementary_material": "/attachment/ef685f0a923478afc37e1d7dc44ef9f00ffe4a58.zip", "author": "Youpeng Wen;Junfan Lin;Yi Zhu;Jianhua Han;Hang Xu;Shen Zhao;Xiaodan Liang", "authorids": "~Youpeng_Wen1;~Junfan_Lin1;~Yi_Zhu3;~Jianhua_Han1;~Hang_Xu1;~Shen_Zhao1;~Xiaodan_Liang2", "gender": "M;M;F;M;M;M;F", "homepage": ";https://github.com/junfanlin;https://yeezhu.github.io;;;https://ise.sysu.edu.cn/teacher/teacher02/1372092.htm;https://www.sysu-hcp.net/", "dblp": "324/5291.html;260/6800;;29/6207;;158/6541;", "google_scholar": "S54_OvAAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?view_op=list_works;OEPMQEMAAAAJ;https://scholar.google.com.hk/citations?user=J_8TX6sAAAAJ;https://scholar.google.com.hk/citations?hl=zh-CN;voxznZAAAAAJ", "orcid": "0000-0001-5422-8922;;0000-0002-5087-895X;;0000-0003-3645-8972;;", "linkedin": ";;;;;;", "or_profile": "~Youpeng_Wen1;~Junfan_Lin1;~Yi_Zhu3;~Jianhua_Han1;~Hang_Xu1;~Shen_Zhao1;~Xiaodan_Liang2", "aff": "SUN YAT-SEN UNIVERSITY;Pengcheng Laboratory;Huawei Technologies Ltd.;Huawei Technologies Ltd.;Huawei Noah\u2018s Ark Lab;SUN YAT-SEN UNIVERSITY;SUN YAT-SEN UNIVERSITY", "aff_domain": "sysu.edu.cn;pcl.ac.cn;huawei.com;huawei.com;huawei.com;sysu.edu.cn;sysu.edu.cn", "position": "MS student;Postdoc;Researcher;Researcher;Researcher;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nwen2024vidman,\ntitle={VidMan: Exploiting Implicit Dynamics from Video Diffusion Model for Effective Robot Manipulation},\nauthor={Youpeng Wen and Junfan Lin and Yi Zhu and Jianhua Han and Hang Xu and Shen Zhao and Xiaodan Liang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=YbhHz0X2j5}\n}", "github": "", "reviewers": "5fDE;NqNP;Yu5P;11a6;yiak", "pdf_size": 3971583, "rating": "5;5;6;7;9", "confidence": "4;4;5;3;4", "soundness": "2;3;1;3;4", "novelty": "2;3;2;3;4", "presentation": "3;3;3;3;3", "wc_summary": "52;62;100;94;92", "wc_strengths": "99;45;130;61;78", "wc_weaknesses": "212;101;791;269;280", "wc_questions": "38;71;230;47;163", "wc_limitations": "6;1;1;30;27", "wc_review": "407;280;1252;501;640", "wc_reply_reviewers": "250;176;655;500;318", "wc_reply_authors": "1871;2264;2612;1965;1582", "reply_reviewers": "2;2;2;2;1", "reply_authors": "5;6;5;6;4", "rating_avg": [ 6.4, 1.4966629547095764 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 2.6, 1.019803902718557 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 80.0, 19.224983745116667 ], "wc_strengths_avg": [ 82.6, 29.722718583602003 ], "wc_weaknesses_avg": [ 330.6, 238.79078709196466 ], "wc_questions_avg": [ 109.8, 74.67636841732464 ], "wc_limitations_avg": [ 13.0, 12.821856339859686 ], "wc_review_avg": [ 616.0, 339.0793417476211 ], "wc_reply_reviewers_avg": [ 379.8, 174.57651617557272 ], "wc_reply_authors_avg": [ 2058.8, 352.0064772131331 ], "reply_reviewers_avg": [ 1.8, 0.4 ], "reply_authors_avg": [ 5.2, 0.7483314773547882 ], "replies_avg": [ 44, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.21128856368212912, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15054210444817662405&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "sysu.edu.cn;pcl.ac.cn;huawei.com;huawei.com;huawei.com;sysu.edu.cn;sysu.edu.cn", "author_num": 7, "aff_unique_index": "0;1;2;2;2;0;0", "aff_unique_norm": "Sun Yat-sen University;Pengcheng Laboratory;Huawei", "aff_unique_dep": ";;Huawei Technologies", "aff_unique_url": "http://www.sysu.edu.cn;;https://www.huawei.com", "aff_unique_abbr": "SYSU;;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Robust Sparse Regression with Non-Isotropic Designs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94686", "id": "YbsvNFD21C", "proceeding": "", "pdf": "https://openreview.net/pdf?id=YbsvNFD21C", "openreview": "https://openreview.net/forum?id=YbsvNFD21C", "poster": "", "project": "", "author_site": "Chih-Hung Liu, Gleb Novikov", "tldr": "", "abstract": "We develop a technique to design efficiently computable estimators for sparse linear regression in the simultaneous presence of two adversaries: oblivious and adaptive.\nConsider the model $y^*=X^*\\beta^*+ \\eta$ where $X^*$ is an $n\\times d$ random design matrix, $\\beta^*\\in \\mathbb{R}^d$ is a $k$-sparse vector, and the noise $\\eta$ is independent of $X^*$ and chosen by the \\emph{oblivious adversary}. \nApart from the independence of $X^*$, we only require a small fraction entries of $\\eta$ to have magnitude at most $1$. \nThe \\emph{adaptive adversary} is allowed to arbitrarily corrupt an $\\varepsilon$-fraction of the samples $(X_1^*, y_1^*),\\ldots, (X_n^*, y_n^*)$.\nGiven the $\\varepsilon$-corrupted samples $(X_1, y_1),\\ldots, (X_n, y_n)$, the goal is to estimate $\\beta^*$. \nWe assume that the rows of $X^*$ are iid samples from some $d$-dimensional distribution $\\mathcal{D}$ with zero mean and (unknown) covariance matrix $\\Sigma$ with bounded condition number.\n\nWe design several robust algorithms that outperform the state of the art even in the special case of Gaussian noise $\\eta \\sim N(0,1)^n$. \nIn particular, we provide a polynomial-time algorithm that with high probability recovers $\\beta^*$ up to error $O(\\sqrt{\\varepsilon})$ as long as $n \\ge \\tilde{O}(k^2/\\varepsilon)$, only assuming some bounds on the third and the fourth moments of $\\mathcal{D}$. \nIn addition, prior to this work, even in the special case of Gaussian design $\\mathcal{D} = N(0,\\Sigma)$ and noise $\\eta \\sim N(0,1)$, no polynomial time algorithm was known to achieve error $o(\\sqrt{\\varepsilon})$ in the sparse setting $n < d^2$. \nWe show that under some assumptions on the fourth and the eighth moments of $\\mathcal{D}$, there is a polynomial-time algorithm that achieves error $o(\\sqrt{\\varepsilon})$ as long as $n \\ge \\tilde{O}(k^4 / \\varepsilon^3)$. \nFor Gaussian distribution $\\mathcal{D} = N(0,\\Sigma)$, this algorithm achieves error $O(\\varepsilon^{3/4})$. \nMoreover, our algorithm achieves error $o(\\sqrt{\\varepsilon})$ for all log-concave distributions if $\\varepsilon \\le 1/\\text{polylog(d)}$. \n\nOur algorithms are based on the filtering of the covariates that uses sum-of-squares relaxations, and weighted Huber loss minimization with $\\ell_1$ regularizer. We provide a novel analysis of weighted penalized Huber loss that is suitable for heavy-tailed designs in the presence of two adversaries. Furthermore, we complement our algorithmic results with Statistical Query lower bounds, providing evidence that our estimators are likely to have nearly optimal sample complexity.", "keywords": "linear regression;sparse regression;robust regression;sum-of-squares", "primary_area": "learning_theory", "supplementary_material": "", "author": "Chih-Hung Liu;Gleb Novikov", "authorids": "~Chih-Hung_Liu1;~Gleb_Novikov1", "gender": "M;", "homepage": "https://www.ee.ntu.edu.tw/profile1.php?id=1090722;", "dblp": "82/1212;200/9864", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "~Chih-Hung_Liu1;~Gleb_Novikov1", "aff": "National Taiwan University;HSLU - Lucerne University of Applied Sciences and Arts", "aff_domain": "ntu.edu.tw;hslu.ch", "position": "Associate Professor;Researcher", "bibtex": "@inproceedings{\nliu2024robust,\ntitle={Robust Sparse Regression with Non-Isotropic Designs},\nauthor={Chih-Hung Liu and Gleb Novikov},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=YbsvNFD21C}\n}", "github": "", "reviewers": "sWxz;e1Aa;mVbA;3A8o;JPFs", "pdf_size": 347920, "rating": "4;5;5;6;7", "confidence": "3;2;2;2;3", "soundness": "3;2;2;3;4", "novelty": "2;3;3;3;4", "presentation": "1;1;3;3;4", "wc_summary": "51;140;33;73;686", "wc_strengths": "54;1;43;63;18", "wc_weaknesses": "234;1;29;35;41", "wc_questions": "132;401;1;10;27", "wc_limitations": "163;1;22;1;1", "wc_review": "634;544;128;182;773", "wc_reply_reviewers": "14;69;11;0;11", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;0;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.4, 1.0198039027185568 ], "confidence_avg": [ 2.4, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.4, 1.2 ], "wc_summary_avg": [ 196.6, 247.36903605746616 ], "wc_strengths_avg": [ 35.8, 23.025203582161875 ], "wc_weaknesses_avg": [ 68.0, 84.12371841520084 ], "wc_questions_avg": [ 114.2, 150.89519541721666 ], "wc_limitations_avg": [ 37.6, 63.225311387133566 ], "wc_review_avg": [ 452.2, 253.97039197512768 ], "wc_reply_reviewers_avg": [ 21.0, 24.470390270692455 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.08006407690254366, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8282292398632377603&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "ntu.edu.tw;hslu.ch", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "National Taiwan University;Lucerne University of Applied Sciences and Arts", "aff_unique_dep": ";", "aff_unique_url": "https://www.ntu.edu.tw;https://www.hslu.ch", "aff_unique_abbr": "NTU;HSLU", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Taiwan;Lucerne", "aff_country_unique_index": "0;1", "aff_country_unique": "China;Switzerland" }, { "title": "Can Learned Optimization Make Reinforcement Learning Less Difficult?", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94685", "id": "YbxFwaSA9Z", "proceeding": "", "pdf": "https://openreview.net/pdf?id=YbxFwaSA9Z", "openreview": "https://openreview.net/forum?id=YbxFwaSA9Z", "poster": "", "project": "", "author_site": "Alexander D. Goldie, Chris Lu, Matthew T Jackson, Shimon Whiteson, Jakob Foerster", "tldr": "", "abstract": "While reinforcement learning (RL) holds great potential for decision making in the real world, it suffers from a number of unique difficulties which often need specific consideration. In particular: it is highly non-stationary; suffers from high degrees of plasticity loss; and requires exploration to prevent premature convergence to local optima and maximize return. In this paper, we consider whether learned optimization can help overcome these problems. Our method, Learned **O**ptimization for **P**lasticity, **E**xploration and **N**on-stationarity (*OPEN*), meta-learns an update rule whose input features and output structure are informed by previously proposed solutions to these difficulties. We show that our parameterization is flexible enough to enable meta-learning in diverse learning contexts, including the ability to use stochasticity for exploration. Our experiments demonstrate that when meta-trained on single and small sets of environments, *OPEN* outperforms or equals traditionally used optimizers. Furthermore, *OPEN* shows strong generalization characteristics across a range of environments and agent architectures.", "keywords": "Meta-Learning;Reinforcement Learning;Learned Optimization;Deep Learning", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Alexander D. Goldie;Chris Lu;Matthew Thomas Jackson;Shimon Whiteson;Jakob Nicolaus Foerster", "authorids": "~Alexander_D._Goldie1;~Chris_Lu1;~Matthew_Thomas_Jackson1;~Shimon_Whiteson1;~Jakob_Nicolaus_Foerster1", "gender": ";M;;M;M", "homepage": ";https://matthewtjackson.com;;https://www.jakobfoerster.com;", "dblp": "77/9579;331/5748;https://dblp.uni-trier.de/pers/w/Whiteson:Shimon.html;176/5095;383/3486", "google_scholar": "4WLoIRsAAAAJ;SdGawnwAAAAJ;;6z4lQzMAAAAJ;wogOjBsAAAAJ", "orcid": ";;;;", "linkedin": ";matthew-t-jackson/;;;alex-goldie-ml/", "or_profile": "~Chris_Lu1;~Matthew_Thomas_Jackson1;~Shimon_Whiteson1;~Jakob_Nicolaus_Foerster1;~Alex_Goldie1", "aff": "University of Oxford;Wayve;University of Oxford;University of Oxford, University of Oxford;University of Oxford", "aff_domain": "ox.ac.uk;wayve.ai;ox.ac.uk;eng.ox.ac.uk;ox.ac.uk", "position": "PhD student;Intern;Professor;Associate Professor;PhD student", "bibtex": "@inproceedings{\ngoldie2024can,\ntitle={Can Learned Optimization Make Reinforcement Learning Less Difficult?},\nauthor={Alexander D. Goldie and Chris Lu and Matthew Thomas Jackson and Shimon Whiteson and Jakob Nicolaus Foerster},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=YbxFwaSA9Z}\n}", "github": "", "reviewers": "qjkS;Fr37;sKXo;KVk8", "pdf_size": 6154122, "rating": "7;7;7;8", "confidence": "4;4;4;3", "soundness": "3;3;3;4", "novelty": "3;3;3;4", "presentation": "3;4;3;4", "wc_summary": "65;70;34;89", "wc_strengths": "132;22;84;176", "wc_weaknesses": "129;174;121;85", "wc_questions": "115;20;39;120", "wc_limitations": "70;18;1;46", "wc_review": "511;304;279;516", "wc_reply_reviewers": "551;141;11;34", "wc_reply_authors": "711;299;0;39", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;1;2", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 64.5, 19.75474626513841 ], "wc_strengths_avg": [ 103.5, 57.207953992430106 ], "wc_weaknesses_avg": [ 127.25, 31.67313530422904 ], "wc_questions_avg": [ 73.5, 44.54492114708477 ], "wc_limitations_avg": [ 33.75, 26.38536526182649 ], "wc_review_avg": [ 402.5, 111.36538959658876 ], "wc_reply_reviewers_avg": [ 184.25, 217.35152978527665 ], "wc_reply_authors_avg": [ 262.25, 283.4355085376566 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2748564723363317049&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "ox.ac.uk;wayve.ai;ox.ac.uk;eng.ox.ac.uk;ox.ac.uk", "author_num": 5, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "University of Oxford;Wayve", "aff_unique_dep": ";", "aff_unique_url": "https://www.ox.ac.uk;https://www.wayve.ai", "aff_unique_abbr": "Oxford;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "GenRec: Unifying Video Generation and Recognition with Diffusion Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94684", "id": "YdfZP7qMzp", "proceeding": "", "pdf": "https://openreview.net/pdf?id=YdfZP7qMzp", "openreview": "https://openreview.net/forum?id=YdfZP7qMzp", "poster": "/media/PosterPDFs/NeurIPS%202024/94684.png?t=1731741972.6765552", "project": "", "author_site": "Zejia Weng, Xitong Yang, Zhen Xing, Zuxuan Wu, Yu-Gang Jiang", "tldr": "", "abstract": "Video diffusion models are able to generate high-quality videos by learning strong spatial-temporal priors on large-scale datasets. In this paper, we aim to investigate whether such priors derived from a generative process are suitable for video recognition, and eventually joint optimization of generation and recognition. Building upon Stable Video Diffusion, we introduce GenRec, the first unified framework trained with a random-frame conditioning process so as to learn generalized spatial-temporal representations. The resulting framework can naturally supports generation and recognition, and more importantly is robust even when visual inputs contain limited information. \nExtensive experiments demonstrate the efficacy of GenRec for both recognition and generation. In particular, GenRec achieves competitive recognition performance, offering 75.8% and 87.2% accuracy on SSV2 and K400, respectively. GenRec also performs the best on class-conditioned image-to-video generation, achieving 46.5 and 49.3 FVD scores on SSV2 and EK-100 datasets. Furthermore, GenRec demonstrates extraordinary robustness in scenarios that only limited frames can be observed. Code will be available at https://github.com/wengzejia1/GenRec.", "keywords": "video understanding;video generation;diffusion", "primary_area": "machine_vision", "supplementary_material": "", "author": "Zejia Weng;Xitong Yang;Zhen Xing;Zuxuan Wu;Yu-Gang Jiang", "authorids": "~Zejia_Weng1;~Xitong_Yang2;~Zhen_Xing2;~Zuxuan_Wu1;~Yu-Gang_Jiang1", "gender": "M;M;M;M;M", "homepage": ";;https://chenhsing.github.io/;https://zxwu.azurewebsites.net/;https://fvl.fudan.edu.cn/people/yugangjiang/", "dblp": "268/5837;168/8851;26/9542;150/8447;24/5818", "google_scholar": "qMT0sqAAAAAJ;k0qC-7AAAAAJ;yuiXa5EAAAAJ;7t12hVkAAAAJ;f3_FP8AAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Zejia_Weng1;~Xitong_Yang2;~Zhen_Xing2;~Zuxuan_Wu1;~Yu-Gang_Jiang1", "aff": "Fudan University;Meta;Fudan University;Fudan University;Fudan University", "aff_domain": "fudan.edu.cn;meta.com;fudan.edu.cn;fudan.edu;fudan.edu.cn", "position": "PhD student;Researcher;PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nweng2024genrec,\ntitle={GenRec: Unifying Video Generation and Recognition with Diffusion Models},\nauthor={Zejia Weng and Xitong Yang and Zhen Xing and Zuxuan Wu and Yu-Gang Jiang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=YdfZP7qMzp}\n}", "github": "", "reviewers": "C3Pd;JBof;caZh;PRtT;96MS", "pdf_size": 4225020, "rating": "5;5;5;7;9", "confidence": "5;3;4;4;5", "soundness": "3;3;2;3;4", "novelty": "3;2;2;3;4", "presentation": "2;2;2;4;4", "wc_summary": "77;44;60;185;87", "wc_strengths": "46;38;24;140;147", "wc_weaknesses": "144;212;95;142;147", "wc_questions": "10;32;20;109;70", "wc_limitations": "6;1;7;1;1", "wc_review": "283;327;206;577;452", "wc_reply_reviewers": "57;0;28;125;22", "wc_reply_authors": "219;43;43;222;40", "reply_reviewers": "1;0;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 6.2, 1.6 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 2.8, 0.9797958971132712 ], "wc_summary_avg": [ 90.6, 49.431164258997576 ], "wc_strengths_avg": [ 79.0, 53.178943201233324 ], "wc_weaknesses_avg": [ 148.0, 37.304155264527836 ], "wc_questions_avg": [ 48.2, 36.57540156990761 ], "wc_limitations_avg": [ 3.2, 2.7129319932501073 ], "wc_review_avg": [ 369.0, 131.0587654451239 ], "wc_reply_reviewers_avg": [ 46.4, 43.30635057355907 ], "wc_reply_authors_avg": [ 113.4, 87.45879029577301 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.46770717334674267, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16464180248961849438&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "fudan.edu.cn;meta.com;fudan.edu.cn;fudan.edu;fudan.edu.cn", "author_num": 5, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "Fudan University;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://www.fudan.edu.cn;https://meta.com", "aff_unique_abbr": "Fudan;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "China;United States" }, { "title": "realSEUDO for real-time calcium imaging analysis", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94683", "id": "Ye0O4Nyn21", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Ye0O4Nyn21", "openreview": "https://openreview.net/forum?id=Ye0O4Nyn21", "poster": "/media/PosterPDFs/NeurIPS%202024/94683.png?t=1733319362.0664818", "project": "", "author_site": "Iuliia Dmitrieva, Sergey Babkin, Adam Charles", "tldr": "", "abstract": "Closed-loop neuroscience experimentation, where recorded neural activity is used to modify the experiment on-the-fly, is critical for deducing causal connections and optimizing experimental time. Thus while new optical methods permit on-line recording (via Multi-photon calcium imaging) and stimulation (via holographic stimulation) of large neural populations, a critical barrier in creating closed-loop experiments that can target and modulate single neurons is the real-time inference of neural activity from streaming recordings. In particular, while multi-photon calcium imaging (CI) is crucial in monitoring neural populations, extracting a single neuron's activity from the fluorescence videos often requires batch processing of the video data. Without batch processing, dimmer neurons and events are harder to identify and unrecognized neurons can create false positives when computing the activity of known neurons. We solve these issues by adapting a recently proposed robust time-trace estimator---Sparse Emulation of Unused Dictionary Objects (SEUDO) algorithm---as a basis for a new on-line processing algorithm that simultaneously identifies neurons in the fluorescence video and infers their time traces in a way that is robust to as-yet unidentified neurons. To achieve real-time SEUDO (realSEUDO), we introduce a combination of new algorithmic improvements, a fast C-based implementation, and a new cell finding loop to enable realSEUDO to identify new cells on-the-fly with no \"warm-up\" period. We demonstrate comparable performance to offline algorithms (e.g., CNMF), and improved performance over the current on-line approach (OnACID) at speeds of 120 Hz on average. This speed is faster than the typical 30 Hz framerate, leaving critical computation time for the computation of feedback in a closed-loop setting.", "keywords": "Calcium imaging;Real-time processing;Neuroimaging;Robust estimation;Sparse models", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "", "author": "Iuliia Dmitrieva;Sergey Babkin;Adam Shabti Charles", "authorids": "~Iuliia_Dmitrieva1;~Sergey_Babkin1;~Adam_Shabti_Charles1", "gender": "F;M;M", "homepage": ";;https://www.bme.jhu.edu/ascharles/", "dblp": ";;04/10257", "google_scholar": "lwp0YwgAAAAJ;;c8RKLp0AAAAJ", "orcid": ";;", "linkedin": ";sergey-babkin-2404211/;", "or_profile": "~Iuliia_Dmitrieva1;~Sergey_Babkin1;~Adam_Shabti_Charles1", "aff": "Johns Hopkins University;Oculus;Johns Hopkins University", "aff_domain": "jh.edu;oculus.com;jhu.edu", "position": "Undergrad student;Principal Researcher;Assistant Professor", "bibtex": "@inproceedings{\ndmitrieva2024realseudo,\ntitle={real{SEUDO} for real-time calcium imaging analysis},\nauthor={Iuliia Dmitrieva and Sergey Babkin and Adam Shabti Charles},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Ye0O4Nyn21}\n}", "github": "", "reviewers": "uJzY;Xdei;2uGa;86dC", "pdf_size": 7341280, "rating": "6;6;7;8", "confidence": "3;2;5;4", "soundness": "3;3;4;4", "novelty": "2;2;3;3", "presentation": "3;3;3;4", "wc_summary": "93;71;46;59", "wc_strengths": "120;41;61;84", "wc_weaknesses": "158;137;86;83", "wc_questions": "94;66;156;1", "wc_limitations": "13;7;2;97", "wc_review": "478;322;351;324", "wc_reply_reviewers": "22;95;28;0", "wc_reply_authors": "0;68;30;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;2;2;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 67.25, 17.297037318569906 ], "wc_strengths_avg": [ 76.5, 29.364093720052047 ], "wc_weaknesses_avg": [ 116.0, 32.380549717384355 ], "wc_questions_avg": [ 79.25, 55.69279576390469 ], "wc_limitations_avg": [ 29.75, 39.02162861798569 ], "wc_review_avg": [ 368.75, 64.10684440837811 ], "wc_reply_reviewers_avg": [ 36.25, 35.48503205578375 ], "wc_reply_authors_avg": [ 24.5, 27.941904015295737 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.674199862463242, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Vo2uYyJ1Q2EJ:scholar.google.com/&scioq=realSEUDO+for+real-time+calcium+imaging+analysis&hl=en&as_sdt=0,33", "gs_version_total": 5, "email": "jh.edu;oculus.com;jhu.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Johns Hopkins University;Oculus VR", "aff_unique_dep": ";", "aff_unique_url": "https://www.jhu.edu;https://www.oculus.com", "aff_unique_abbr": "JHU;Oculus", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "CRONOS: Enhancing Deep Learning with Scalable GPU Accelerated Convex Neural Networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94682", "id": "YfLzYczAo3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=YfLzYczAo3", "openreview": "https://openreview.net/forum?id=YfLzYczAo3", "poster": "", "project": "", "author_site": "Miria Feng, Zachary Frangella, Mert Pilanci", "tldr": "", "abstract": "We introduce the CRONOS algorithm for convex optimization of two-layer neural networks. \nCRONOS is the first algorithm capable of scaling to high-dimensional datasets such as ImageNet, which are ubiquitous in modern deep learning. \nThis significantly improves upon prior work, which has been restricted to downsampled versions of MNIST and CIFAR-10.\nTaking CRONOS as a primitive, we then develop a new algorithm called CRONOS-AM, which combines CRONOS with alternating minimization, to obtain an algorithm capable of training multi-layer networks with arbitrary architectures.\nOur theoretical analysis proves that CRONOS converges to the global minimum of the convex reformulation under mild assumptions. \nIn addition, we validate the efficacy of CRONOS and CRONOS-AM through extensive large-scale numerical experiments with GPU acceleration in JAX.\nOur results show that CRONOS-AM can obtain comparable or better validation accuracy than predominant tuned deep learning optimizers on vision and language tasks with benchmark datasets such as ImageNet and IMDb.\nTo the best of our knowledge, CRONOS is the first algorithm which utilizes the convex reformulation to enhance performance on large-scale learning tasks.", "keywords": "convex neural networks;deep learning;admm;alternating minimization;gpu acceleration", "primary_area": "optimization", "supplementary_material": "/attachment/5543dedbf4650eb9b2eb9a99f6dd3992ca147012.zip", "author": "Miria Feng;Zachary Frangella;Mert Pilanci", "authorids": "~Miria_Feng1;~Zachary_Frangella1;~Mert_Pilanci3", "gender": ";;M", "homepage": ";https://github.com/zjf4;https://stanford.edu/~pilanci/", "dblp": ";298/0473;45/8056", "google_scholar": ";;aSAS-aAAAAAJ", "orcid": ";;", "linkedin": ";;mert-pilanci-ba615743/", "or_profile": "~Miria_Feng1;~Zachary_Frangella1;~Mert_Pilanci3", "aff": ";Stanford University;Stanford University", "aff_domain": ";stanford.edu;stanford.edu", "position": ";PhD student;Assistant Professor", "bibtex": "@inproceedings{\nfeng2024cronos,\ntitle={{CRONOS}: Enhancing Deep Learning with Scalable {GPU} Accelerated Convex Neural Networks},\nauthor={Miria Feng and Zachary Frangella and Mert Pilanci},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=YfLzYczAo3}\n}", "github": "", "reviewers": "dmGV;JCYR;inX1;iBsY", "pdf_size": 2951832, "rating": "5;5;5;7", "confidence": "3;3;3;3", "soundness": "3;3;2;3", "novelty": "2;3;2;3", "presentation": "3;3;3;3", "wc_summary": "129;245;210;93", "wc_strengths": "71;49;67;72", "wc_weaknesses": "92;258;197;22", "wc_questions": "174;80;149;7", "wc_limitations": "85;2;1;16", "wc_review": "551;634;624;210", "wc_reply_reviewers": "64;45;0;0", "wc_reply_authors": "118;22;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "2;2;1;1", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 169.25, 60.894888948088244 ], "wc_strengths_avg": [ 64.75, 9.283722313813572 ], "wc_weaknesses_avg": [ 142.25, 91.35199778877308 ], "wc_questions_avg": [ 102.5, 65.00192304847603 ], "wc_limitations_avg": [ 26.0, 34.57600323924094 ], "wc_review_avg": [ 504.75, 173.16375919920426 ], "wc_reply_reviewers_avg": [ 27.25, 28.065770967497045 ], "wc_reply_authors_avg": [ 35.0, 48.75448697299562 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5970185512766344106&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 4, "email": ";stanford.edu;stanford.edu", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Collaboration! Towards Robust Neural Methods for Routing Problems", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94681", "id": "YfQA78gEFA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=YfQA78gEFA", "openreview": "https://openreview.net/forum?id=YfQA78gEFA", "poster": "/media/PosterPDFs/NeurIPS%202024/94681.png?t=1731731017.5817306", "project": "", "author_site": "Jianan Zhou, Yaoxin Wu, Zhiguang Cao, Wen Song, Jie Zhang, Zhiqi Shen", "tldr": "", "abstract": "Despite enjoying desirable efficiency and reduced reliance on domain expertise, existing neural methods for vehicle routing problems (VRPs) suffer from severe robustness issues \u2014 their performance significantly deteriorates on clean instances with crafted perturbations. To enhance robustness, we propose an ensemble-based *Collaborative Neural Framework (CNF)* w.r.t. the defense of neural VRP methods, which is crucial yet underexplored in the literature. Given a neural VRP method, we adversarially train multiple models in a collaborative manner to synergistically promote robustness against attacks, while boosting standard generalization on clean instances. A neural router is designed to adeptly distribute training instances among models, enhancing overall load balancing and collaborative efficacy. Extensive experiments verify the effectiveness and versatility of CNF in defending against various attacks across different neural VRP methods. Notably, our approach also achieves impressive out-of-distribution generalization on benchmark instances.", "keywords": "Combinatorial Optimization;Vehicle Routing Problem;Ensemble;Robustness", "primary_area": "optimization", "supplementary_material": "/attachment/c1737cc754b6118c5ff5bd607616562824908ad8.zip", "author": "Jianan Zhou;Yaoxin Wu;Zhiguang Cao;Wen Song;Jie Zhang;Zhiqi Shen", "authorids": "~Jianan_Zhou1;~Yaoxin_Wu2;~Zhiguang_Cao1;~Wen_Song1;~Jie_Zhang9;~Zhiqi_Shen2", "gender": "M;M;M;M;M;M", "homepage": "https://royalskye.github.io/;https://zhiguangcaosg.github.io/;https://songwenas12.github.io/;https://personal.ntu.edu.sg/zhangj/;https://dr.ntu.edu.sg/cris/rp/rp00227;https://research.tue.nl/en/persons/yaoxin-wu", "dblp": "296/2326-2;178/8621;50/5489;84/6889-2;03/1554-1.html;192/4964", "google_scholar": "9T58m-EAAAAJ;https://scholar.google.com.sg/citations?user=2R-cOkYAAAAJ;s8Nz-xoAAAAJ;IFV_RdMAAAAJ;https://scholar.google.com.sg/citations?user=EA2T_lwAAAAJ;0qRnmK8AAAAJ", "orcid": "0000-0002-4896-148X;0000-0002-4499-759X;0000-0001-7624-1861;;0000-0001-7626-7295;0000-0002-3625-6599", "linkedin": ";;;;;", "or_profile": "~Jianan_Zhou1;~Zhiguang_Cao1;~Wen_Song1;~Jie_Zhang9;~Zhiqi_Shen2;~YAOXIN_WU1", "aff": "Nanyang Technological University;Singapore Management University;Shandong University;Nanyang Technological University;Nanyang Technological University;Eindhoven University of Technology", "aff_domain": "ntu.edu.sg;smu.edu.sg;sdu.edu.cn;ntu.edu.sg;ntu.edu.sg;tue.nl", "position": "PhD student;Assistant Professor;Associate Professor;Full Professor;Lecturer;Assistant Professor", "bibtex": "@inproceedings{\nzhou2024collaboration,\ntitle={Collaboration! Towards Robust Neural Methods for Routing Problems},\nauthor={Jianan Zhou and Yaoxin Wu and Zhiguang Cao and Wen Song and Jie Zhang and Zhiqi Shen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=YfQA78gEFA}\n}", "github": "", "reviewers": "xLJf;d2Cb;AuRq;DziC", "pdf_size": 771794, "rating": "5;6;6;6", "confidence": "4;3;5;2", "soundness": "3;3;3;3", "novelty": "2;3;3;2", "presentation": "2;3;3;3", "wc_summary": "43;58;28;88", "wc_strengths": "2;87;25;31", "wc_weaknesses": "16;181;36;30", "wc_questions": "74;126;146;5", "wc_limitations": "1;51;7;10", "wc_review": "136;503;242;164", "wc_reply_reviewers": "14;15;11;0", "wc_reply_authors": "119;123;13;63", "reply_reviewers": "1;1;1;0", "reply_authors": "4;3;2;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 54.25, 22.18529918662356 ], "wc_strengths_avg": [ 36.25, 31.235996862594284 ], "wc_weaknesses_avg": [ 65.75, 66.9342027665976 ], "wc_questions_avg": [ 87.75, 54.52694288147833 ], "wc_limitations_avg": [ 17.25, 19.753164303473 ], "wc_review_avg": [ 261.25, 144.87818158715274 ], "wc_reply_reviewers_avg": [ 10.0, 5.958187643906492 ], "wc_reply_authors_avg": [ 79.5, 45.13036671687922 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.2581988897471611, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4924226839468829192&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "ntu.edu.sg;smu.edu.sg;sdu.edu.cn;ntu.edu.sg;ntu.edu.sg;tue.nl", "author_num": 6, "aff_unique_index": "0;1;2;0;0;3", "aff_unique_norm": "Nanyang Technological University;Singapore Management University;Shandong University;Eindhoven University of Technology", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.ntu.edu.sg;https://www.smu.edu.sg;http://www.sdu.edu.cn;https://www.tue.nl", "aff_unique_abbr": "NTU;SMU;SDU;TU/e", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;2", "aff_country_unique": "Singapore;China;Netherlands" }, { "title": "Physics-Regularized Multi-Modal Image Assimilation for Brain Tumor Localization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94680", "id": "YfVMcbcDqo", "proceeding": "", "pdf": "https://openreview.net/pdf?id=YfVMcbcDqo", "openreview": "https://openreview.net/forum?id=YfVMcbcDqo", "poster": "/media/PosterPDFs/NeurIPS%202024/94680.png?t=1732362139.2298865", "project": "", "author_site": "Michal Balcerak, Tamaz Amiranashvili, Andreas Wagner, Jonas Weidner, Petr Karnakov, Johannes C. Paetzold, Ivan Ezhov, Petros Koumoutsakos, Benedikt Wiestler, bjoern menze", "tldr": "", "abstract": "Physical models in the form of partial differential equations serve as important priors for many under-constrained problems. One such application is tumor treatment planning, which relies on accurately estimating the spatial distribution of tumor cells within a patient\u2019s anatomy. While medical imaging can detect the bulk of a tumor, it cannot capture the full extent of its spread, as low-concentration tumor cells often remain undetectable, particularly in glioblastoma, the most common primary brain tumor. Machine learning approaches struggle to estimate the complete tumor cell distribution due to a lack of appropriate training data. Consequently, most existing methods rely on physics-based simulations to generate anatomically and physiologically plausible estimations. However, these approaches face challenges with complex and unknown initial conditions and are constrained by overly rigid physical models. In this work, we introduce a novel method that integrates data-driven and physics-based cost functions, akin to Physics-Informed Neural Networks (PINNs). However, our approach parametrizes the solution directly on a dynamic discrete mesh, allowing for the effective modeling of complex biomechanical behaviors. Specifically, we propose a unique discretization scheme that quantifies how well the learned spatiotemporal distributions of tumor and brain tissues adhere to their respective growth and elasticity equations. This quantification acts as a regularization term, offering greater flexibility and improved integration of patient data compared to existing models. We demonstrate enhanced coverage of tumor recurrence areas using real-world data from a patient cohort, highlighting the potential of our method to improve model-driven treatment planning for glioblastoma in clinical practice.", "keywords": "Inverse Problems;System Identification;Physics-Informed;Biomechanical Modeling;Tumor Growth", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "", "author": "Michal Balcerak;Tamaz Amiranashvili;Andreas Wagner;Jonas Weidner;Petr Karnakov;Johannes C. Paetzold;Ivan Ezhov;Petros Koumoutsakos;Benedikt Wiestler;bjoern menze", "authorids": "~Michal_Balcerak1;~Tamaz_Amiranashvili1;~Andreas_Wagner1;~Jonas_Weidner1;~Petr_Karnakov1;~Johannes_C._Paetzold1;~Ivan_Ezhov1;~Petros_Koumoutsakos1;~Benedikt_Wiestler1;~bjoern_menze1", "gender": "M;M;Not Specified;M;M;;;M;;M", "homepage": "https://github.com/m1balcerak;;https://www.math.cit.tum.de/math/forschung/gruppen/numerical-analysis/;https://scholar.google.com/citations?user=bRLjGLgAAAAJ&hl=de&oi=ao;https://pkarnakov.com;https://aim-lab.io/author/johannes-c.-paetzold/;;http://cse-lab.seas.harvard.edu;;http://home.in.tum.de/~menze/", "dblp": ";303/0588;;;242/9602;239/8775;;70/5363;;74/6505", "google_scholar": "xNOcwM4AAAAJ;0al650gAAAAJ;;bRLjGLgAAAAJ;h6ix7c8AAAAJ;https://scholar.google.de/citations?user=7Bv7PmgAAAAJ;https://scholar.google.com/citations?hl=en;IaDP3mkAAAAJ;;Kv2QrQgAAAAJ", "orcid": "0009-0006-3137-7048;0000-0001-8914-3427;;;0000-0003-1978-6659;;;;;0000-0003-4136-5690", "linkedin": "michal-balcerak-a7180717b/;;;;pkarnakov/;;;;;", "or_profile": "~Michal_Balcerak1;~Tamaz_Amiranashvili1;~Andreas_Wagner1;~Jonas_Weidner1;~Petr_Karnakov1;~Johannes_C._Paetzold1;~Ivan_Ezhov1;~Petros_Koumoutsakos1;~Benedikt_Wiestler1;~bjoern_menze1", "aff": "University of Zurich;Technical University Munich;Technische Universit\u00e4t M\u00fcnchen;Technische Universit\u00e4t M\u00fcnchen;School of Engineering and Applied Sciences, Harvard University;Imperial College London;Technical University Munich;School of Engineering and Applied Sciences, Harvard University;;University of Zurich", "aff_domain": "uzh.ch;tum.de;tum.de;tum.edu;seas.harvard.edu;ic.ac.uk;tum.de;seas.harvard.edu;;uzh.ch", "position": "PhD student;PhD student;PhD student;PhD student;Postdoc;Postdoc;Postdoc;Full Professor;;Full Professor", "bibtex": "@inproceedings{\nbalcerak2024physicsregularized,\ntitle={Physics-Regularized Multi-Modal Image Assimilation for Brain Tumor Localization},\nauthor={Michal Balcerak and Tamaz Amiranashvili and Andreas Wagner and Jonas Weidner and Petr Karnakov and Johannes C. Paetzold and Ivan Ezhov and Petros Koumoutsakos and Benedikt Wiestler and bjoern menze},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=YfVMcbcDqo}\n}", "github": "", "reviewers": "YNED;smuQ;S9Sg;ZDBA", "pdf_size": 11253986, "rating": "4;5;6;6", "confidence": "4;1;4;4", "soundness": "3;3;2;3", "novelty": "2;3;2;3", "presentation": "2;4;3;3", "wc_summary": "100;54;97;31", "wc_strengths": "62;67;75;50", "wc_weaknesses": "137;31;148;81", "wc_questions": "69;76;192;21", "wc_limitations": "28;7;56;26", "wc_review": "396;235;568;209", "wc_reply_reviewers": "205;29;19;46", "wc_reply_authors": "464;52;82;28", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;3;2", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 3.25, 1.299038105676658 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 70.5, 29.176188921790317 ], "wc_strengths_avg": [ 63.5, 9.069178573608527 ], "wc_weaknesses_avg": [ 99.25, 46.884832302142236 ], "wc_questions_avg": [ 89.5, 62.85101431162428 ], "wc_limitations_avg": [ 29.25, 17.483921184905864 ], "wc_review_avg": [ 352.0, 143.81411613607338 ], "wc_reply_reviewers_avg": [ 74.75, 75.81680222747461 ], "wc_reply_authors_avg": [ 156.5, 178.56301408746438 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11228480037092161709&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "uzh.ch;tum.de;tum.de;tum.edu;seas.harvard.edu;ic.ac.uk;tum.de;seas.harvard.edu;;uzh.ch", "author_num": 10, "aff_unique_index": "0;1;2;2;3;4;1;3;0", "aff_unique_norm": "University of Zurich;Technical University of Munich;Technische Universit\u00e4t M\u00fcnchen;Harvard University;Imperial College London", "aff_unique_dep": ";;;School of Engineering and Applied Sciences;", "aff_unique_url": "https://www.unizh.ch;https://www.tum.de;https://www.tum.de;https://www.harvard.edu;https://www.imperial.ac.uk", "aff_unique_abbr": "UZH;TUM;TUM;Harvard;ICL", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;1;1;1;2;3;1;2;0", "aff_country_unique": "Switzerland;Germany;United States;United Kingdom" }, { "title": "Graph-based Uncertainty Metrics for Long-form Language Model Generations", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94679", "id": "YgJPQW0lkO", "proceeding": "", "pdf": "https://openreview.net/pdf?id=YgJPQW0lkO", "openreview": "https://openreview.net/forum?id=YgJPQW0lkO", "poster": "", "project": "", "author_site": "Mingjian Jiang, Yangjun Ruan, Prasanna Sattigeri, Salim Roukos, Tatsunori Hashimoto", "tldr": "", "abstract": "Recent advancements in Large Language Models (LLMs) have significantly improved text generation capabilities, but these systems are still known to hallucinate, and granular uncertainty estimation for long-form LLM generations remains challenging. \nIn this work, we propose Graph Uncertainty -- which represents the relationship between LLM generations and claims within them as a bipartite graph and estimates the claim-level uncertainty with a family of graph centrality metrics. Under this view, existing uncertainty estimation methods based on the concept of self-consistency can be viewed as using degree centrality as an uncertainty measure, and we show that more sophisticated alternatives such as closeness centrality provide consistent gains at claim-level uncertainty estimation.\nMoreover, we present uncertainty-aware decoding techniques that leverage both the graph structure and uncertainty estimates to improve the factuality of LLM generations by preserving only the most reliable claims. Compared to existing methods, our graph-based uncertainty metrics lead to an average of 6.8% relative gains on AUPRC across various long-form generation settings, and our end-to-end system provides consistent 2-4% gains in factuality over existing decoding techniques while significantly improving the informativeness of generated responses.", "keywords": "Uncertainty Estimation; Large Language Model; Factuality", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/041cb3acb1325f8d87b6ecd8110ad35d11bc745a.zip", "author": "Mingjian Jiang;Yangjun Ruan;Prasanna Sattigeri;Salim Roukos;Tatsunori Hashimoto", "authorids": "~Mingjian_Jiang1;~Yangjun_Ruan1;~Prasanna_Sattigeri1;~Salim_Roukos1;~Tatsunori_Hashimoto1", "gender": "M;M;;M;M", "homepage": "https://www.cs.toronto.edu/~mjjiang/;http://www.cs.toronto.edu/~yjruan/;;;https://thashim.github.io", "dblp": ";237/3892;00/7428;01/1417;", "google_scholar": ";https://scholar.google.com.hk/citations?user=9AdCSywAAAAJ;m-s38ikAAAAJ;1S7VwIcAAAAJ;5ygiTwsAAAAJ", "orcid": ";;0000-0003-4435-0486;;", "linkedin": "mingjian-jiang-279318201/;;prasannasattigeri/;salim-roukos-55a3871/;", "or_profile": "~Mingjian_Jiang1;~Yangjun_Ruan1;~Prasanna_Sattigeri1;~Salim_Roukos1;~Tatsunori_Hashimoto1", "aff": "Stanford University;University of Toronto;IBM Research;;Stanford University", "aff_domain": "stanford.edu;toronto.edu;ibm.com;;stanford.edu", "position": "MS student;PhD student;Researcher;;Assistant Professor", "bibtex": "@inproceedings{\njiang2024graphbased,\ntitle={Graph-based Uncertainty Metrics for Long-form Language Model Generations},\nauthor={Mingjian Jiang and Yangjun Ruan and Prasanna Sattigeri and Salim Roukos and Tatsunori Hashimoto},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=YgJPQW0lkO}\n}", "github": "", "reviewers": "XiBs;29kR;2gkm", "pdf_size": 1546244, "rating": "5;5;7", "confidence": "4;4;3", "soundness": "2;3;4", "novelty": "2;3;3", "presentation": "2;3;4", "wc_summary": "127;72;82", "wc_strengths": "68;58;79", "wc_weaknesses": "185;72;23", "wc_questions": "95;123;200", "wc_limitations": "36;1;45", "wc_review": "511;326;429", "wc_reply_reviewers": "122;0;0", "wc_reply_authors": "757;0;0", "reply_reviewers": "1;0;0", "reply_authors": "3;1;1", "rating_avg": [ 5.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 93.66666666666667, 23.921166824012207 ], "wc_strengths_avg": [ 68.33333333333333, 8.576453553512405 ], "wc_weaknesses_avg": [ 93.33333333333333, 67.83476656962532 ], "wc_questions_avg": [ 139.33333333333334, 44.39469437769438 ], "wc_limitations_avg": [ 27.333333333333332, 18.979521127315678 ], "wc_review_avg": [ 422.0, 75.68795588907568 ], "wc_reply_reviewers_avg": [ 40.666666666666664, 57.51135153650587 ], "wc_reply_authors_avg": [ 252.33333333333334, 356.853222238811 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.9999999999999998, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:-3gy-8oPYRwJ:scholar.google.com/&scioq=Graph-based+Uncertainty+Metrics+for+Long-form+Language+Model+Generations&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "stanford.edu;toronto.edu;ibm.com;;stanford.edu", "author_num": 5, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Stanford University;University of Toronto;IBM", "aff_unique_dep": ";;IBM Research", "aff_unique_url": "https://www.stanford.edu;https://www.utoronto.ca;https://www.ibm.com/research", "aff_unique_abbr": "Stanford;U of T;IBM", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;Canada" }, { "title": "Linearly Decomposing and Recomposing Vision Transformers for Diverse-Scale Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94678", "id": "Yhd0yzC8yD", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Yhd0yzC8yD", "openreview": "https://openreview.net/forum?id=Yhd0yzC8yD", "poster": "", "project": "", "author_site": "Shuxia Lin, Miaosen Zhang, Ruiming Chen, Xu Yang, Qiufeng Wang, Xin Geng", "tldr": "", "abstract": "Vision Transformers (ViTs) are widely used in a variety of applications, while they usually have a fixed architecture that may not match the varying computational resources of different deployment environments. Thus, it is necessary to adapt ViT architectures to devices with diverse computational overheads to achieve an accuracy-efficient trade-off. This concept is consistent with the motivation behind Learngene. To achieve this, inspired by polynomial decomposition in calculus, where a function can be approximated by linearly combining several basic components, we propose to linearly decompose the ViT model into a set of components called learngenes during element-wise training. These learngenes can then be recomposed into differently scaled, pre-initialized models to satisfy different computational resource constraints. Such a decomposition-recomposition strategy provides an economical and flexible approach to generating different scales of ViT models for different deployment scenarios. Compared to model compression or training from scratch, which require to repeatedly train on large datasets for diverse-scale models, such strategy reduces computational costs since it only requires to train on large datasets once. Extensive experiments are used to validate the effectiveness of our method: ViTs can be decomposed and the decomposed learngenes can be recomposed into diverse-scale ViTs, which can achieve comparable or better performance compared to traditional model compression and pre-training methods. The code for our experiments is available in the supplemental material.", "keywords": "Model decomposition;Model initialization;Vision Transformer", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/2c844418040e73e1e851610e76868873615936a6.zip", "author": "Shuxia Lin;Miaosen Zhang;Ruiming Chen;Xu Yang;Qiufeng Wang;Xin Geng", "authorids": "~Shuxia_Lin1;~Miaosen_Zhang1;~Ruiming_Chen1;~Xu_Yang5;~Qiufeng_Wang3;~Xin_Geng1", "gender": "F;M;;M;M;M", "homepage": "http://palm.seu.edu.cn/homepage/linshuxia/index.html;http://palm.seu.edu.cn/homepage/zhangmiaosen/index.html;;;http://palm.seu.edu.cn/homepage/wangqiufeng/demo/index.html;http://palm.seu.edu.cn/xgeng/index.htm", "dblp": ";297/3016;;63/1534-21.html;;", "google_scholar": ";;UHxwJ54AAAAJ;SqdxMH0AAAAJ;HQYQkTwAAAAJ;ZOCxkIcAAAAJ", "orcid": ";;;0000-0002-8276-2679;0000-0001-7680-6607;", "linkedin": ";;;;;", "or_profile": "~Shuxia_Lin1;~Miaosen_Zhang1;~Ruiming_Chen1;~Xu_Yang5;~Qiufeng_Wang3;~Xin_Geng1", "aff": "Southeast University;Southeast University;Southeast University;Southeast University;Southeast University;Southeast University, China", "aff_domain": "seu.edu.cn;seu.edu.cn;seu.edu.cn;seu.edu.cn;seu.edu.cn;seu.edu.cn", "position": "PhD student;PhD student;MS student;Associate Professor;PhD student;Professor", "bibtex": "@inproceedings{\nlin2024linearly,\ntitle={Linearly Decomposing and Recomposing Vision Transformers for Diverse-Scale Models},\nauthor={Shuxia Lin and Miaosen Zhang and Ruiming Chen and Xu Yang and Qiufeng Wang and Xin Geng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Yhd0yzC8yD}\n}", "github": "", "reviewers": "WjQY;caYK;GH5B;DK4A", "pdf_size": 12000244, "rating": "5;6;7;7", "confidence": "4;5;4;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;2;3;3", "wc_summary": "113;51;70;167", "wc_strengths": "21;76;84;177", "wc_weaknesses": "35;150;90;158", "wc_questions": "41;216;4;4", "wc_limitations": "8;6;5;4", "wc_review": "218;499;253;510", "wc_reply_reviewers": "42;70;0;67", "wc_reply_authors": "47;156;0;58", "reply_reviewers": "1;2;0;1", "reply_authors": "2;3;1;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 100.25, 44.60591328512398 ], "wc_strengths_avg": [ 89.5, 56.037933580745104 ], "wc_weaknesses_avg": [ 108.25, 49.7914400273782 ], "wc_questions_avg": [ 66.25, 87.76780446154501 ], "wc_limitations_avg": [ 5.75, 1.479019945774904 ], "wc_review_avg": [ 370.0, 135.1240171102088 ], "wc_reply_reviewers_avg": [ 44.75, 28.030117730755254 ], "wc_reply_authors_avg": [ 65.25, 56.74229022519271 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9842267480226923235&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 0, "email": "seu.edu.cn;seu.edu.cn;seu.edu.cn;seu.edu.cn;seu.edu.cn;seu.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Southeast University", "aff_unique_dep": "", "aff_unique_url": "https://www.seu.edu.cn/", "aff_unique_abbr": "SEU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "AWT: Transferring Vision-Language Models via Augmentation, Weighting, and Transportation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94677", "id": "YiYww1d3lE", "proceeding": "", "pdf": "https://openreview.net/pdf?id=YiYww1d3lE", "openreview": "https://openreview.net/forum?id=YiYww1d3lE", "poster": "", "project": "", "author_site": "Yuhan Zhu, Yuyang Ji, Zhiyu Zhao, Gangshan Wu, Limin Wang", "tldr": "", "abstract": "Pre-trained vision-language models (VLMs) have shown impressive results in various visual classification tasks.\nHowever, we often fail to fully unleash their potential when adapting them for new concept understanding due to limited information on new classes.\nTo address this limitation, we introduce a novel adaptation framework, AWT (Augment, Weight, then Transport). AWT comprises three key components: augmenting inputs with diverse visual perspectives and enriched class descriptions through image transformations and language models; dynamically weighting inputs based on the prediction entropy; and employing optimal transport to mine semantic correlations in the vision-language space.\nAWT can be seamlessly integrated into various VLMs, enhancing their zero-shot capabilities without additional training and facilitating few-shot learning through an integrated multimodal adapter module.\nWe verify AWT in multiple challenging scenarios, including zero-shot and few-shot image classification, zero-shot video action recognition, and out-of-distribution generalization. AWT consistently outperforms the state-of-the-art methods in each setting. In addition, our extensive studies further demonstrate AWT's effectiveness and adaptability across different VLMs, architectures, and scales.", "keywords": "Vision-Language Model;Zero-shot/Few-shot Transfer;Image Classification;Video Action Recognition;Optimal Transport", "primary_area": "machine_vision", "supplementary_material": "", "author": "Yuhan Zhu;Yuyang Ji;Zhiyu Zhao;Gangshan Wu;Limin Wang", "authorids": "~Yuhan_Zhu1;~Yuyang_Ji3;~Zhiyu_Zhao2;~Gangshan_Wu1;~Limin_Wang1", "gender": ";M;M;M;", "homepage": ";https://github.com/Char1sk;https://github.com/JerryFlymi;http://mcg.nju.edu.cn/member/gswu/en/index.html;", "dblp": ";;63/5076;78/1123;", "google_scholar": ";;2Ef8Y0IAAAAJ;;", "orcid": ";;;0000-0003-1391-1762;", "linkedin": ";;;;", "or_profile": "~Yuhan_Zhu1;~Yuyang_Ji3;~Zhiyu_Zhao2;~Gangshan_Wu1;~Limin_Wang1", "aff": ";Nanjing University;University of Electronic Science and Technology of China;Nanjing University;", "aff_domain": ";nju.edu.cn;uestc.edu.cn;nju.edu.cn;", "position": ";MS student;Undergrad student;Full Professor;", "bibtex": "@inproceedings{\nzhu2024awt,\ntitle={{AWT}: Transferring Vision-Language Models via Augmentation, Weighting, and Transportation},\nauthor={Yuhan Zhu and Yuyang Ji and Zhiyu Zhao and Gangshan Wu and Limin Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=YiYww1d3lE}\n}", "github": "", "reviewers": "CYNL;Vbjj;qvMQ;kG9e", "pdf_size": 15001422, "rating": "4;6;7;7", "confidence": "3;4;4;5", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;4", "wc_summary": "81;65;57;86", "wc_strengths": "49;48;40;90", "wc_weaknesses": "164;127;132;270", "wc_questions": "116;9;5;39", "wc_limitations": "7;1;11;10", "wc_review": "417;250;245;495", "wc_reply_reviewers": "0;17;12;23", "wc_reply_authors": "126;0;9;0", "reply_reviewers": "0;1;1;1", "reply_authors": "3;1;2;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 72.25, 11.734031702701335 ], "wc_strengths_avg": [ 56.75, 19.51121472384536 ], "wc_weaknesses_avg": [ 173.25, 57.63408279828872 ], "wc_questions_avg": [ 42.25, 44.5610536230911 ], "wc_limitations_avg": [ 7.25, 3.897114317029974 ], "wc_review_avg": [ 351.75, 107.85030134403891 ], "wc_reply_reviewers_avg": [ 13.0, 8.455767262643882 ], "wc_reply_authors_avg": [ 33.75, 53.38714732967102 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.8660254037844386, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14652423891699466151&as_sdt=10005&sciodt=0,8&hl=en", "gs_version_total": 3, "email": ";nju.edu.cn;uestc.edu.cn;nju.edu.cn;", "author_num": 5, "aff_unique_index": "0;1;0", "aff_unique_norm": "Nanjing University;University of Electronic Science and Technology of China", "aff_unique_dep": ";", "aff_unique_url": "https://www.nju.edu.cn;https://www.uestc.edu.cn", "aff_unique_abbr": "Nanjing U;UESTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "TreeVI: Reparameterizable Tree-structured Variational Inference for Instance-level Correlation Capturing", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94676", "id": "YjZ6fQAvT7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=YjZ6fQAvT7", "openreview": "https://openreview.net/forum?id=YjZ6fQAvT7", "poster": "/media/PosterPDFs/NeurIPS%202024/94676.png?t=1734031071.143081", "project": "", "author_site": "Junxi Xiao, Qinliang Su", "tldr": "", "abstract": "Mean-field variational inference (VI) is computationally scalable, but its highly-demanding independence requirement hinders it from being applied to wider scenarios. Although many VI methods that take correlation into account have been proposed, these methods generally are not scalable enough to capture the correlation among data instances, which often arises in applications with graph-structured data or explicit constraints. In this paper, we developed the Tree-structured Variational Inference (TreeVI), which uses a tree structure to capture the correlation of latent variables in the posterior distribution. We show that samples from the tree-structured posterior can be reparameterized efficiently and parallelly, making its training cost just 2 or 3 times that of VI under the mean-field assumption. To capture correlation with more complicated structure, the TreeVI is further extended to the multiple-tree case. Furthermore, we show that the underlying tree structure can be automatically learned from training data. With experiments on synthetic datasets, constrained clustering, user matching and link prediction, we demonstrate that the TreeVI is superior in capturing instance-level correlation in posteriors and enhancing the performance of downstream applications.", "keywords": "Probabilistic Models;Variational Inference;Instance-level Correlation;Reparameterization;Reparameterized Variational Inference", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Junxi Xiao;Qinliang Su", "authorids": "~Junxi_Xiao1;~Qinliang_Su3", "gender": "M;M", "homepage": "https://mephestopheles.github.io/;https://cse.sysu.edu.cn/teacher/SuQinliang", "dblp": "355/8361.html;87/7936", "google_scholar": "https://scholar.google.com.hk/citations?view_op=list_works;cuIweygAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Junxi_Xiao1;~Qinliang_Su3", "aff": "SUN YAT-SEN UNIVERSITY;SUN YAT-SEN UNIVERSITY", "aff_domain": "sysu.edu.cn;sysu.edu.cn", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nxiao2024treevi,\ntitle={Tree{VI}: Reparameterizable Tree-structured Variational Inference for Instance-level Correlation Capturing},\nauthor={Junxi Xiao and Qinliang Su},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=YjZ6fQAvT7}\n}", "github": "", "reviewers": "ptS8;smr5;BYBL;96LZ", "pdf_size": 755147, "rating": "5;6;6;7", "confidence": "4;3;3;3", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "2;2;2;3", "wc_summary": "62;76;94;85", "wc_strengths": "24;69;34;26", "wc_weaknesses": "818;239;57;35", "wc_questions": "1;73;36;235", "wc_limitations": "1;12;17;1", "wc_review": "906;469;238;382", "wc_reply_reviewers": "322;71;486;168", "wc_reply_authors": "149;218;393;176", "reply_reviewers": "1;2;5;1", "reply_authors": "2;2;5;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 79.25, 11.818946653572814 ], "wc_strengths_avg": [ 38.25, 18.14352501582865 ], "wc_weaknesses_avg": [ 287.25, 316.49200226861973 ], "wc_questions_avg": [ 86.25, 89.57503837565463 ], "wc_limitations_avg": [ 7.75, 6.977642868476432 ], "wc_review_avg": [ 498.75, 249.17802370995722 ], "wc_reply_reviewers_avg": [ 261.75, 157.3950046856634 ], "wc_reply_authors_avg": [ 234.0, 95.03420436874295 ], "reply_reviewers_avg": [ 2.25, 1.6393596310755 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:-9xtm2wrmTQJ:scholar.google.com/&scioq=TreeVI:+Reparameterizable+Tree-structured+Variational+Inference+for+Instance-level+Correlation+Capturing&hl=en&as_sdt=0,33", "gs_version_total": 0, "email": "sysu.edu.cn;sysu.edu.cn", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Sun Yat-sen University", "aff_unique_dep": "", "aff_unique_url": "http://www.sysu.edu.cn", "aff_unique_abbr": "SYSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Preference Learning Algorithms Do Not Learn Preference Rankings", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94675", "id": "YkJ5BuEXdD", "proceeding": "", "pdf": "https://openreview.net/pdf?id=YkJ5BuEXdD", "openreview": "https://openreview.net/forum?id=YkJ5BuEXdD", "poster": "", "project": "", "author_site": "Angelica Chen, Sadhika Malladi, Lily Zhang, Xinyi Chen, Qiuyi (Richard) Zhang, Rajesh Ranganath, Kyunghyun Cho", "tldr": "", "abstract": "Preference learning algorithms (e.g., RLHF and DPO) are frequently used to steer LLMs to produce generations that are more preferred by humans, but our understanding of their inner workings is still limited. In this work, we study the conventional wisdom that preference learning trains models to assign higher likelihoods to more preferred outputs than less preferred outputs, measured via *ranking accuracy*.\nSurprisingly, we find that most state-of-the-art preference-tuned models achieve a ranking accuracy of less than 60% on common preference datasets. We furthermore derive the *idealized ranking accuracy* that a preference-tuned LLM would achieve if it optimized the DPO or RLHF objective perfectly. We demonstrate that existing models exhibit a significant *alignment gap* -- *i.e.*, a gap between the observed and idealized ranking accuracies. \nWe attribute this discrepancy to the DPO objective, which is empirically and theoretically ill-suited to correct even mild ranking errors in the reference model, and derive a simple and efficient formula for quantifying the difficulty of learning a given preference datapoint.\nFinally, we demonstrate that ranking accuracy strongly correlates with the empirically popular win rate metric when the model is close to the reference model used in the objective, shedding further light on the differences between on-policy (e.g., RLHF) and off-policy (e.g., DPO) preference learning algorithms.", "keywords": "preference learning;large language models;DPO;RLHF;ranking accuracy", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/11c5147ef967d0c6cb731c713f940ab88190c553.zip", "author": "Angelica Chen;Sadhika Malladi;Lily H Zhang;Xinyi Chen;Qiuyi Zhang;Rajesh Ranganath;Kyunghyun Cho", "authorids": "~Angelica_Chen1;~Sadhika_Malladi2;~Lily_H_Zhang1;~Xinyi_Chen1;~Qiuyi_Zhang1;~Rajesh_Ranganath2;~Kyunghyun_Cho1", "gender": "F;F;F;F;M;;M", "homepage": ";https://www.cs.princeton.edu/~smalladi/;https://lhz1029.github.io/;;https://qiuyiz.github.io;;http://kyunghyuncho.me", "dblp": "241/5892;176/9810;267/6682;84/6214;133/8559;97/7057;41/9736", "google_scholar": "QbW4GSwAAAAJ;9HCmTcwAAAAJ;fmCi9ZQAAAAJ;;mE11hO8AAAAJ;;https://scholar.google.fi/citations?user=0RAmmIAAAAAJ", "orcid": ";;;;;;", "linkedin": ";;;;;;", "or_profile": "~Angelica_Chen1;~Sadhika_Malladi2;~Lily_H_Zhang1;~Xinyi_Chen1;~Qiuyi_Zhang1;~Rajesh_Ranganath2;~Kyunghyun_Cho1", "aff": "New York University;Princeton University;New York University;Google DeepMind;Google;New York University;Genentech", "aff_domain": "nyu.edu;princeton.edu;nyu.edu;google.com;google.com;nyu.edu;gene.com", "position": "PhD student;PhD student;PhD student;Researcher;Researcher;Assistant Professor;Senior Director of Frontier Research", "bibtex": "@inproceedings{\nchen2024preference,\ntitle={Preference Learning Algorithms Do Not Learn Preference Rankings},\nauthor={Angelica Chen and Sadhika Malladi and Lily H Zhang and Xinyi Chen and Qiuyi Zhang and Rajesh Ranganath and Kyunghyun Cho},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=YkJ5BuEXdD}\n}", "github": "", "reviewers": "j6Ru;uW8U;yhyh;5qJ8", "pdf_size": 973043, "rating": "3;5;5;6", "confidence": "4;4;3;4", "soundness": "2;3;3;4", "novelty": "2;3;3;3", "presentation": "2;4;3;3", "wc_summary": "48;65;86;104", "wc_strengths": "31;139;74;85", "wc_weaknesses": "22;294;138;128", "wc_questions": "94;2;137;2", "wc_limitations": "356;1;1;14", "wc_review": "551;501;436;333", "wc_reply_reviewers": "0;137;23;13", "wc_reply_authors": "0;125;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 75.75, 21.146808269807526 ], "wc_strengths_avg": [ 82.25, 38.47970244167696 ], "wc_weaknesses_avg": [ 145.5, 97.03994023081424 ], "wc_questions_avg": [ 58.75, 58.75106382015563 ], "wc_limitations_avg": [ 93.0, 151.93584172274822 ], "wc_review_avg": [ 455.25, 81.51188563639047 ], "wc_reply_reviewers_avg": [ 43.25, 54.73744148204225 ], "wc_reply_authors_avg": [ 31.25, 54.12658773652741 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.13245323570650439, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9603929324413694696&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "nyu.edu;princeton.edu;nyu.edu;google.com;google.com;nyu.edu;gene.com", "author_num": 7, "aff_unique_index": "0;1;0;2;2;0;3", "aff_unique_norm": "New York University;Princeton University;Google;Genentech", "aff_unique_dep": ";;Google DeepMind;", "aff_unique_url": "https://www.nyu.edu;https://www.princeton.edu;https://deepmind.com;https://www.genentech.com", "aff_unique_abbr": "NYU;Princeton;DeepMind;Genentech", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;1;0;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Discrete-state Continuous-time Diffusion for Graph Generation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94674", "id": "YkSKZEhIYt", "proceeding": "", "pdf": "https://openreview.net/pdf?id=YkSKZEhIYt", "openreview": "https://openreview.net/forum?id=YkSKZEhIYt", "poster": "", "project": "", "author_site": "Zhe Xu, Ruizhong Qiu, Yuzhong Chen, Huiyuan Chen, Xiran Fan, Menghai Pan, Zhichen Zeng, Mahashweta Das, Hanghang Tong", "tldr": "", "abstract": "Graph is a prevalent discrete data structure, whose generation has wide applications such as drug discovery and circuit design. Diffusion generative models, as an emerging research focus, have been applied to graph generation tasks. Overall, according to the space of states and time steps, diffusion generative models can be categorized into discrete-/continuous-state discrete-/continuous-time fashions. In this paper, we formulate the graph diffusion generation in a discrete-state continuous-time setting, which has never been studied in previous graph diffusion models. The rationale of such a formulation is to preserve the discrete nature of graph-structured data and meanwhile provide flexible sampling trade-offs between sample quality and efficiency. Analysis shows that our training objective is closely related to the generation quality and our proposed generation framework enjoys ideal invariant/equivariant properties concerning the permutation of node ordering. Our proposed model shows competitive empirical performance against other state-of-the-art graph generation solutions on various benchmarks while at the same time can flexibly trade off the generation quality and efficiency in the sampling phase.", "keywords": "graph generation;diffusion generative model", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Zhe Xu;Ruizhong Qiu;Yuzhong Chen;Huiyuan Chen;Xiran Fan;Menghai Pan;Zhichen Zeng;Mahashweta Das;Hanghang Tong", "authorids": "~Zhe_Xu5;~Ruizhong_Qiu1;~Yuzhong_Chen2;~Huiyuan_Chen1;~Xiran_Fan1;~Menghai_Pan1;~Zhichen_Zeng1;~Mahashweta_Das2;~Hanghang_Tong3", "gender": "M;M;M;M;;M;;F;", "homepage": "https://pricexu.github.io/;https://q-rz.github.io/;https://usa.visa.com/about-visa/visa-research/yuzhong-chen.html;;;;https://zhichenz98.github.io/;;http://tonghanghang.org", "dblp": "97/3701-7;330/9860;;204/5464;;209/8751;345/6632-1;;58/1757", "google_scholar": "7IhVDFsAAAAJ;REKarmcAAAAJ;Kc8V0tUAAAAJ;j3y4dJwAAAAJ;;H2ErHkkAAAAJ;rFdX368AAAAJ;;RaINcuUAAAAJ", "orcid": "0000-0002-6675-1398;0009-0000-3253-8890;;0000-0002-6360-558X;;;0000-0002-5534-3401;;0000-0003-4405-3887", "linkedin": ";ruizhong-qiu/;;;;;;mahashwetadas/;htong/", "or_profile": "~Zhe_Xu5;~Ruizhong_Qiu1;~Yuzhong_Chen2;~Huiyuan_Chen1;~Xiran_Fan1;~Menghai_Pan1;~Zhichen_Zeng1;~Mahashweta_Das2;~Hanghang_Tong3", "aff": "University of Illinois, Urbana Champaign;University of Illinois Urbana-Champaign;VISA;Amazon;;Visa Research;University of Illinois Urbana-Champaign;VISA;University of Illinois, Urbana Champaign", "aff_domain": "illinois.edu;illinois.edu;visa.com;amazon.com;;visa.com;illinois.edu;visa.com;illinois.edu", "position": "PhD student;MS student;Researcher;Researcher;;Researcher;PhD student;Principal Researcher;Associate Professor", "bibtex": "@inproceedings{\nxu2024discretestate,\ntitle={Discrete-state Continuous-time Diffusion for Graph Generation},\nauthor={Zhe Xu and Ruizhong Qiu and Yuzhong Chen and Huiyuan Chen and Xiran Fan and Menghai Pan and Zhichen Zeng and Mahashweta Das and Hanghang Tong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=YkSKZEhIYt}\n}", "github": "", "reviewers": "XoYX;ohXc;9BTR;Tpg4", "pdf_size": 5301793, "rating": "5;5;6;6", "confidence": "3;3;4;3", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "3;2;3;3", "wc_summary": "61;82;33;67", "wc_strengths": "37;35;63;48", "wc_weaknesses": "64;43;92;55", "wc_questions": "40;33;40;135", "wc_limitations": "1;11;12;11", "wc_review": "203;204;240;316", "wc_reply_reviewers": "173;0;16;16", "wc_reply_authors": "676;0;25;25", "reply_reviewers": "2;0;1;1", "reply_authors": "3;1;2;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 60.75, 17.75352077758099 ], "wc_strengths_avg": [ 45.75, 11.121488209767612 ], "wc_weaknesses_avg": [ 63.5, 18.062391868188442 ], "wc_questions_avg": [ 62.0, 42.24334267076885 ], "wc_limitations_avg": [ 8.75, 4.493050188902857 ], "wc_review_avg": [ 240.75, 45.93133462027856 ], "wc_reply_reviewers_avg": [ 51.25, 70.5952370914639 ], "wc_reply_authors_avg": [ 181.5, 285.6820785418644 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16068353948913821575&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "illinois.edu;illinois.edu;visa.com;amazon.com;;visa.com;illinois.edu;visa.com;illinois.edu", "author_num": 9, "aff_unique_index": "0;0;1;2;3;0;1;0", "aff_unique_norm": "University of Illinois Urbana-Champaign;VISA;Amazon;Visa Inc.", "aff_unique_dep": ";;Amazon.com, Inc.;Research", "aff_unique_url": "https://illinois.edu;https://www.visa.com;https://www.amazon.com;https://www.visa.com/", "aff_unique_abbr": "UIUC;VISA;Amazon;Visa", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Urbana-Champaign;", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Benchmark Data Repositories for Better Benchmarking", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97636", "id": "YktwH3tOuc", "proceeding": "", "pdf": "https://openreview.net/pdf?id=YktwH3tOuc", "openreview": "https://openreview.net/forum?id=YktwH3tOuc", "poster": "/media/PosterPDFs/NeurIPS%202024/97636.png?t=1731340286.4214094", "project": "", "author_site": "Rachel Longjohn, Markelle Kelly, Sameer Singh, Padhraic Smyth", "tldr": "", "abstract": "In machine learning research, it is common to evaluate algorithms via their performance on standard benchmark datasets. While a growing body of work establishes guidelines for---and levies criticisms at---data and benchmarking practices in machine learning, comparatively less attention has been paid to the data repositories where these datasets are stored, documented, and shared. In this paper, we analyze the landscape of these _benchmark data repositories_ and the role they can play in improving benchmarking. This role includes addressing issues with both datasets themselves (e.g., representational harms, construct validity) and the manner in which evaluation is carried out using such datasets (e.g., overemphasis on a few datasets and metrics, lack of reproducibility). To this end, we identify and discuss a set of considerations surrounding the design and use of benchmark data repositories, with a focus on improving benchmarking practices in machine learning.", "keywords": "data repositories;benchmarking;reproducibility;evaluation;metadata;data documentation;data curation", "primary_area": "", "supplementary_material": "", "author": "Rachel Longjohn;Markelle Kelly;Sameer Singh;Padhraic Smyth", "authorids": "~Rachel_Longjohn1;~Markelle_Kelly1;~Sameer_Singh1;~Padhraic_Smyth1", "gender": "F;F;M;M", "homepage": "https://rlongjohn.github.io/;;http://sameersingh.org;https://www.ics.uci.edu/~smyth", "dblp": ";;13/3568-1;s/PadhraicSmyth", "google_scholar": ";;-hGZC54AAAAJ;OsoQ-dcAAAAJ", "orcid": ";;0000-0003-0621-6323;0000-0001-9971-8378", "linkedin": ";markellekelly/;sameersingh/;", "or_profile": "~Rachel_Longjohn1;~Markelle_Kelly1;~Sameer_Singh1;~Padhraic_Smyth1", "aff": "University of California, Irvine;eBay Inc.;University of California, Irvine;University of California, Irvine", "aff_domain": "uci.edu;ebay.com;uci.edu;uci.edu", "position": "PhD student;Intern;Full Professor;Full Professor", "bibtex": "@inproceedings{\nlongjohn2024benchmark,\ntitle={Benchmark Data Repositories for Better Benchmarking},\nauthor={Rachel Longjohn and Markelle Kelly and Sameer Singh and Padhraic Smyth},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=YktwH3tOuc}\n}", "github": "", "reviewers": "ieFZ;iNCk;axrk;zDCB;kc5S", "pdf_size": 503882, "rating": "4;6;8;8;9", "confidence": "4;4;4;3;5", "wc_summary_and_contributions": "72;45;29;36;61", "wc_strengths": "107;60;59;86;32", "wc_improvement": "162;75;65;49;21", "wc_limitations": "113;1;63;66;4", "wc_correctness": "16;1;20;24;1", "wc_clarity": "1;1;8;11;1", "wc_relation_to_prior_work": "59;24;7;10;1", "wc_documentation": "1;1;8;11;1", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "532;209;260;294;123", "wc_reply_reviewers": "319;8;39;33;0", "wc_reply_authors": "198;0;0;0;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "3;1;2;1;1", "rating_avg": [ 7.0, 1.7888543819998317 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "wc_summary_and_contributions_avg": [ 48.6, 15.856859714331838 ], "wc_strengths_avg": [ 68.8, 25.623426780975258 ], "wc_improvement_avg": [ 74.4, 47.453556241866636 ], "wc_limitations_avg": [ 49.4, 42.211846678391126 ], "wc_correctness_avg": [ 12.4, 9.645724441429996 ], "wc_clarity_avg": [ 4.4, 4.2708313008125245 ], "wc_relation_to_prior_work_avg": [ 20.2, 20.81730049742281 ], "wc_documentation_avg": [ 4.4, 4.2708313008125245 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 283.6, 136.9125268191337 ], "wc_reply_reviewers_avg": [ 79.8, 120.49464718401393 ], "wc_reply_authors_avg": [ 39.6, 79.2 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.17677669529663687, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10594948825910040945&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "uci.edu;ebay.com;uci.edu;uci.edu", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of California, Irvine;eBay Inc.", "aff_unique_dep": ";", "aff_unique_url": "https://www.uci.edu;https://www.ebayinc.com", "aff_unique_abbr": "UCI;eBay", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Irvine;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "DreamScene4D: Dynamic Multi-Object Scene Generation from Monocular Videos", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94673", "id": "YlIvhHFwQ2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=YlIvhHFwQ2", "openreview": "https://openreview.net/forum?id=YlIvhHFwQ2", "poster": "/media/PosterPDFs/NeurIPS%202024/94673.png?t=1733416153.8166986", "project": "", "author_site": "Wen-Hsuan Chu, Lei Ke, Katerina Fragkiadaki", "tldr": "", "abstract": "View-predictive generative models provide strong priors for lifting object-centric images and videos into 3D and 4D through rendering and score distillation objectives. A question then remains: what about lifting complete multi-object dynamic scenes? There are two challenges in this direction: First, rendering error gradients are often insufficient to recover fast object motion, and second, view predictive generative models work much better for objects than whole scenes, so, score distillation objectives cannot currently be applied at the scene level directly. We present DreamScene4D, the first approach to generate 3D dynamic scenes of multiple objects from monocular videos via 360-degree novel view synthesis. Our key insight is a \"decompose-recompose\" approach that factorizes the video scene into the background and object tracks, while also factorizing object motion into 3 components: object-centric deformation, object-to-world-frame transformation, and camera motion. Such decomposition permits rendering error gradients and object view-predictive models to recover object 3D completions and deformations while bounding box tracks guide the large object movements in the scene. We show extensive results on challenging DAVIS, Kubric, and self-captured videos with quantitative comparisons and a user preference study. Besides 4D scene generation, DreamScene4D obtains accurate 2D persistent point track by projecting the inferred 3D trajectories to 2D. We will release our code and hope our work will stimulate more research on fine-grained 4D understanding from videos.", "keywords": "4D Scene Generation; Video-to-4D Generation", "primary_area": "machine_vision", "supplementary_material": "/attachment/2b6d3a8d058e0b75e9e0a4bb94d8211d26bc4337.zip", "author": "Wen-Hsuan Chu;Lei Ke;Katerina Fragkiadaki", "authorids": "~Wen-Hsuan_Chu1;~Lei_Ke1;~Katerina_Fragkiadaki1", "gender": "M;M;F", "homepage": ";http://www.kelei.site;https://www.cs.cmu.edu/~katef/", "dblp": "226/2518;26/5225;21/8780", "google_scholar": ";WseeNrUAAAAJ;FWp7728AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Wen-Hsuan_Chu1;~Lei_Ke1;~Katerina_Fragkiadaki1", "aff": "Carnegie Mellon University;ETHZ - ETH Zurich;Carnegie Mellon University", "aff_domain": "cmu.edu;ethz.ch;cmu.edu", "position": "PhD student;Postdoc;Associate Professor", "bibtex": "@inproceedings{\nchu2024dreamscened,\ntitle={DreamScene4D: Dynamic Multi-Object Scene Generation from Monocular Videos},\nauthor={Wen-Hsuan Chu and Lei Ke and Katerina Fragkiadaki},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=YlIvhHFwQ2}\n}", "github": "", "reviewers": "4pTB;EEWc;TmCf;onS9", "pdf_size": 12297382, "rating": "5;5;7;7", "confidence": "5;3;5;3", "soundness": "3;3;4;3", "novelty": "3;3;3;2", "presentation": "3;3;3;2", "wc_summary": "121;91;126;98", "wc_strengths": "80;121;76;48", "wc_weaknesses": "202;130;37;146", "wc_questions": "49;4;66;3", "wc_limitations": "10;89;35;21", "wc_review": "462;435;340;316", "wc_reply_reviewers": "0;12;23;19", "wc_reply_authors": "0;0;0;54", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;2", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 4.0, 1.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 109.0, 14.815532390029054 ], "wc_strengths_avg": [ 81.25, 26.05163142684158 ], "wc_weaknesses_avg": [ 128.75, 59.33538151895545 ], "wc_questions_avg": [ 30.5, 27.663152387246107 ], "wc_limitations_avg": [ 38.75, 30.334592464709328 ], "wc_review_avg": [ 388.25, 61.58885857036157 ], "wc_reply_reviewers_avg": [ 13.5, 8.73212459828649 ], "wc_reply_authors_avg": [ 13.5, 23.382685902179844 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 38, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17259968299238079693&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "cmu.edu;ethz.ch;cmu.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Carnegie Mellon University;ETH Zurich", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://www.ethz.ch", "aff_unique_abbr": "CMU;ETHZ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;Switzerland" }, { "title": "Minimum Entropy Coupling with Bottleneck", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94672", "id": "YlmYm7sHDE", "proceeding": "", "pdf": "https://openreview.net/pdf?id=YlmYm7sHDE", "openreview": "https://openreview.net/forum?id=YlmYm7sHDE", "poster": "", "project": "", "author_site": "Reza Ebrahimi, Jun Chen, Ashish Khisti", "tldr": "", "abstract": "This paper investigates a novel lossy compression framework operating under logarithmic loss, designed to handle situations where the reconstruction distribution diverges from the source distribution. This framework is especially relevant for applications that require joint compression and retrieval, and in scenarios involving distributional shifts due to processing. We show that the proposed formulation extends the classical minimum entropy coupling framework by integrating a bottleneck, allowing for controlled variability in the degree of stochasticity in the coupling.\nWe explore the decomposition of the Minimum Entropy Coupling with Bottleneck (MEC-B) into two distinct optimization problems: Entropy-Bounded Information Maximization (EBIM) for the encoder, and Minimum Entropy Coupling (MEC) for the decoder. Through extensive analysis, we provide a greedy algorithm for EBIM with guaranteed performance, and characterize the optimal solution near functional mappings, yielding significant theoretical insights into the structural complexity of this problem.\nFurthermore, we illustrated the practical application of MEC-B through experiments in Markov Coding Games (MCGs) under rate limits. These games simulate a communication scenario within a Markov Decision Process, where an agent must transmit a compressed message from a sender to a receiver through its actions. Our experiments highlighted the trade-offs between MDP rewards and receiver accuracy across various compression rates, showcasing the efficacy of our method compared to conventional compression baseline.", "keywords": "Compression;Minimum Entropy Coupling;Log Loss;Markov Coding Games", "primary_area": "other", "supplementary_material": "/attachment/88b565e54f20ba3f5952497b7f9f40fd9a95006d.zip", "author": "MohammadReza Ebrahimi;Jun Chen;Ashish J Khisti", "authorids": "~MohammadReza_Ebrahimi1;~Jun_Chen8;~Ashish_J_Khisti1", "gender": "M;M;M", "homepage": ";https://www.ece.mcmaster.ca/~junchen/;https://www.comm.utoronto.ca/~akhisti/", "dblp": "204/4421;85/5901-5.html;84/5679.html", "google_scholar": "mkSGwPYAAAAJ;https://scholar.google.ca/citations?user=XI79Mw0AAAAJ;https://scholar.google.ca/citations?user=jiGeAg4AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~MohammadReza_Ebrahimi1;~Jun_Chen8;~Ashish_J_Khisti1", "aff": "University of Toronto;McMaster University;Toronto University", "aff_domain": "utoronto.ca;mcmaster.ca;utoronto.ca", "position": "PhD student;Full Professor;Professor", "bibtex": "@inproceedings{\nebrahimi2024minimum,\ntitle={Minimum Entropy Coupling with Bottleneck},\nauthor={MohammadReza Ebrahimi and Jun Chen and Ashish J Khisti},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=YlmYm7sHDE}\n}", "github": "", "reviewers": "Enxp;mDET;vk4R", "pdf_size": 3797199, "rating": "7;8;8", "confidence": "4;4;4", "soundness": "4;4;4", "novelty": "3;4;4", "presentation": "3;3;3", "wc_summary": "137;39;74", "wc_strengths": "114;50;45", "wc_weaknesses": "51;399;51", "wc_questions": "111;60;21", "wc_limitations": "77;8;5", "wc_review": "490;556;196", "wc_reply_reviewers": "25;32;139", "wc_reply_authors": "0;0;117", "reply_reviewers": "1;1;2", "reply_authors": "1;1;2", "rating_avg": [ 7.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 4.0, 0.0 ], "novelty_avg": [ 3.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 83.33333333333333, 40.549010126293126 ], "wc_strengths_avg": [ 69.66666666666667, 31.414787742222437 ], "wc_weaknesses_avg": [ 167.0, 164.04877323527901 ], "wc_questions_avg": [ 64.0, 36.851051545376556 ], "wc_limitations_avg": [ 30.0, 33.25657829663178 ], "wc_review_avg": [ 414.0, 156.48642113614844 ], "wc_reply_reviewers_avg": [ 65.33333333333333, 52.16853031814827 ], "wc_reply_authors_avg": [ 39.0, 55.154328932550705 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13591281593274222213&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "utoronto.ca;mcmaster.ca;utoronto.ca", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Toronto;McMaster University", "aff_unique_dep": ";", "aff_unique_url": "https://www.utoronto.ca;https://www.mcmaster.ca", "aff_unique_abbr": "U of T;McMaster", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Canada" }, { "title": "The Poisson Midpoint Method for Langevin Dynamics: Provably Efficient Discretization for Diffusion Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94671", "id": "Ylvviju6MD", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Ylvviju6MD", "openreview": "https://openreview.net/forum?id=Ylvviju6MD", "poster": "", "project": "", "author_site": "Saravanan Kandasamy, Dheeraj Nagaraj", "tldr": "", "abstract": "Langevin Dynamics is a Stochastic Differential Equation (SDE) central to sampling and generative modeling and is implemented via time discretization. Langevin Monte Carlo (LMC), based on the Euler-Maruyama discretization, is the simplest and most studied algorithm. LMC can suffer from slow convergence - requiring a large number of steps of small step-size to obtain good quality samples. This becomes stark in the case of diffusion models where a large number of steps gives the best samples, but the quality degrades rapidly with smaller number of steps. Randomized Midpoint Method has been recently proposed as a better discretization of Langevin dynamics for sampling from strongly log-concave distributions. However, important applications such as diffusion models involve non-log concave densities and contain time varying drift. We propose its variant, the Poisson Midpoint Method, which approximates a small step-size LMC with large step-sizes. We prove that this can obtain a quadratic speed up of LMC under very weak assumptions. We apply our method to diffusion models for image generation and show that it maintains the quality of DDPM with 1000 neural network calls with just 50-80 neural network calls and outperforms ODE based methods with similar compute.", "keywords": "Langevin Monte Carlo;Diffusion Models;MCMC", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Saravanan Kandasamy;Dheeraj Mysore Nagaraj", "authorids": "~Saravanan_Kandasamy2;~Dheeraj_Mysore_Nagaraj1", "gender": "M;M", "homepage": ";https://dheerajmn.mit.edu", "dblp": ";215/5097", "google_scholar": "SFQyZMsAAAAJ;0g80b7sAAAAJ", "orcid": ";", "linkedin": ";dheeraj-m-nagaraj-01739792/", "or_profile": "~Saravanan_Kandasamy2;~Dheeraj_Mysore_Nagaraj1", "aff": "Cornell University;Google", "aff_domain": "cornell.edu;google.com", "position": "PhD student;Research Scientist", "bibtex": "@inproceedings{\nkandasamy2024the,\ntitle={The Poisson Midpoint Method for Langevin Dynamics: Provably Efficient Discretization for Diffusion Models},\nauthor={Saravanan Kandasamy and Dheeraj Mysore Nagaraj},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Ylvviju6MD}\n}", "github": "", "reviewers": "HMqT;F8sy;p2Nt", "pdf_size": 1015311, "rating": "5;5;7", "confidence": "3;4;3", "soundness": "3;3;3", "novelty": "3;2;3", "presentation": "1;2;3", "wc_summary": "46;96;39", "wc_strengths": "125;40;68", "wc_weaknesses": "206;126;44", "wc_questions": "374;95;174", "wc_limitations": "1;64;10", "wc_review": "752;421;335", "wc_reply_reviewers": "45;63;86", "wc_reply_authors": "0;0;4", "reply_reviewers": "1;1;1", "reply_authors": "1;1;2", "rating_avg": [ 5.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.0, 0.816496580927726 ], "wc_summary_avg": [ 60.333333333333336, 25.381533094401966 ], "wc_strengths_avg": [ 77.66666666666667, 35.367907612536094 ], "wc_weaknesses_avg": [ 125.33333333333333, 66.13790306792484 ], "wc_questions_avg": [ 214.33333333333334, 117.41758338889832 ], "wc_limitations_avg": [ 25.0, 27.820855486487112 ], "wc_review_avg": [ 502.6666666666667, 179.76713332036593 ], "wc_reply_reviewers_avg": [ 64.66666666666667, 16.779617264870957 ], "wc_reply_authors_avg": [ 1.3333333333333333, 1.8856180831641267 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.49999999999999983, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17633343763525226523&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "cornell.edu;google.com", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Cornell University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.cornell.edu;https://www.google.com", "aff_unique_abbr": "Cornell;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "Yq2dYPkfRU", "title": "Stability and Sharper Risk Bounds with Convergence Rate $O(1/n^2)$", "track": "main", "status": "Reject", "tldr": "", "abstract": "The sharpest known high probability excess risk bounds are up to $O(1/n)$ for empirical risk minimization and projected gradient descent via algorithmic stability [Klochkov and Zhivotovskiy, 2021]. In this paper, we show that high probability excess risk bounds of order up to $O(1/n^2)$ are possible. We discuss how high probability excess risk bounds reach $O(1/n^2)$ under strongly convexity, smoothness and Lipschitz continuity assumptions for empirical risk minimization, projected gradient descent and stochastic gradient descent. Besides, to the best of our knowledge, our high probability results on the generalization gap measured by gradients for nonconvex problems are also the sharpest.", "keywords": "algorithmic stability;generalization bounds;excess risk bounds;", "primary_area": "learning_theory", "supplementary_material": "", "author": "Bowei Zhu;Shaojie Li;Yong Liu", "authorids": "~Bowei_Zhu1;~Shaojie_Li2;~Yong_Liu7", "gender": ";M;M", "homepage": ";;https://iie-liuyong.github.io", "dblp": "304/1543;;29/4867-18", "google_scholar": ";;vVhmzbAAAAAJ", "orcid": ";;0000-0002-6739-621X", "linkedin": ";;", "or_profile": "~Bowei_Zhu1;~Shaojie_Li2;~Yong_Liu7", "aff": "Renmin University of China;Renmin University of China;Renmin University of China", "aff_domain": "ruc.edu.cn;ruc.edu.cn;ruc.edu.cn", "position": "PhD student;PhD student;Associate Professor", "bibtex": "@misc{\nanonymous2024stability,\ntitle={Stability and Sharper Risk Bounds with Convergence Rate \\$O(1/n{\\textasciicircum}2)\\$},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=Yq2dYPkfRU}\n}", "github": "", "project": "", "reviewers": "UD1S;XZ1M;VeaT;Xifq", "site": "https://openreview.net/forum?id=Yq2dYPkfRU", "pdf_size": 556898, "rating": "5;5;5;5", "confidence": "4;3;3;3", "soundness": "3;2;3;3", "novelty": "3;3;2;2", "presentation": "2;2;2;2", "wc_summary": "73;26;29;119", "wc_strengths": "56;43;3;254", "wc_weaknesses": "251;66;608;319", "wc_questions": "38;60;174;4", "wc_limitations": "1;1;16;7", "wc_review": "419;196;830;703", "wc_reply_reviewers": "23;43;10;490", "wc_reply_authors": "32;69;1325;273", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;4;2", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 61.75, 37.93003427364653 ], "wc_strengths_avg": [ 89.0, 97.24453712162962 ], "wc_weaknesses_avg": [ 311.0, 194.87046979981343 ], "wc_questions_avg": [ 69.0, 63.8200595424354 ], "wc_limitations_avg": [ 6.25, 6.139014578904337 ], "wc_review_avg": [ 537.0, 246.78431878869452 ], "wc_reply_reviewers_avg": [ 141.5, 201.54962168160972 ], "wc_reply_authors_avg": [ 424.75, 527.7993818677699 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:SEziHTh5xRQJ:scholar.google.com/&scioq=Stability+and+Sharper+Risk+Bounds+with+Convergence+Rate+%24O(1/n%5E2)%24&hl=en&as_sdt=0,10", "gs_version_total": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Renmin University of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ruc.edu.cn", "aff_unique_abbr": "RUC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Why Do We Need Weight Decay in Modern Deep Learning?", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94670", "id": "YrAxxscKM2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=YrAxxscKM2", "openreview": "https://openreview.net/forum?id=YrAxxscKM2", "poster": "", "project": "", "author_site": "Francesco D'Angelo, Maksym Andriushchenko, Aditya Vardhan Varre, Nicolas Flammarion", "tldr": "", "abstract": "Weight decay is a broadly used technique for training state-of-the-art deep networks from image classification to large language models. Despite its widespread usage and being extensively studied in the classical literature, its role remains poorly understood for deep learning. In this work, we highlight that the role of weight decay in modern deep learning is different from its regularization effect studied in classical learning theory. For deep networks on vision tasks trained with multipass SGD, we show how weight decay modifies the optimization dynamics enhancing the ever-present implicit regularization of SGD via the *loss stabilization mechanism*. In contrast, for large language models trained with nearly one-epoch training, we describe how weight decay balances the *bias-variance tradeoff* in stochastic optimization leading to lower training loss and improved training stability. \nOverall, we present a unifying perspective from ResNets on vision tasks to LLMs: weight decay is never useful as an explicit regularizer but instead changes the training dynamics in a desirable way.", "keywords": "Weight decay;overparameterization;implicit regularization;large language models;optimization dynamics", "primary_area": "optimization_for_deep_networks", "supplementary_material": "/attachment/e8fc4002429d6caaf22f02ab7c21c33e5d2bd053.zip", "author": "Francesco D'Angelo;Maksym Andriushchenko;Aditya Varre;Nicolas Flammarion", "authorids": "~Francesco_D'Angelo1;~Maksym_Andriushchenko1;~Aditya_Varre1;~Nicolas_Flammarion1", "gender": "M;M;M;M", "homepage": ";https://www.andriushchenko.me/;;", "dblp": "32/10646;200/8865;164/7417;224/6338", "google_scholar": "Hg_3f5kAAAAJ;ZNtuJYoAAAAJ;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Francesco_D'Angelo1;~Maksym_Andriushchenko1;~Nicolas_Flammarion1;~Aditya_Vardhan_Varre1", "aff": "ETH;Swiss Federal Institute of Technology Lausanne;Swiss Federal Institute of Technology Lausanne;EPFL - EPF Lausanne", "aff_domain": "ethz.ch;epfl.ch;epfl.ch;epfl.ch", "position": "MS student;PhD Student;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nd'angelo2024why,\ntitle={Why Do We Need Weight Decay in Modern Deep Learning?},\nauthor={Francesco D'Angelo and Maksym Andriushchenko and Aditya Varre and Nicolas Flammarion},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=YrAxxscKM2}\n}", "github": "", "reviewers": "tBnp;S7kS;g6ok;YyLF;6pLz", "pdf_size": 3442284, "rating": "5;5;6;6;7", "confidence": "4;4;3;2;4", "soundness": "2;2;3;3;3", "novelty": "2;3;2;3;3", "presentation": "3;3;3;2;3", "wc_summary": "140;83;95;129;73", "wc_strengths": "151;60;99;101;71", "wc_weaknesses": "554;154;113;55;131", "wc_questions": "208;82;59;4;191", "wc_limitations": "12;76;2;1;21", "wc_review": "1065;455;368;290;487", "wc_reply_reviewers": "374;0;102;0;29", "wc_reply_authors": "46;0;0;0;0", "reply_reviewers": "2;0;1;0;1", "reply_authors": "2;1;1;1;1", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 104.0, 26.092144411680692 ], "wc_strengths_avg": [ 96.4, 31.556932677305635 ], "wc_weaknesses_avg": [ 201.4, 179.31937987847272 ], "wc_questions_avg": [ 108.8, 78.45865153059923 ], "wc_limitations_avg": [ 22.4, 27.774808730214506 ], "wc_review_avg": [ 533.0, 274.7719054051924 ], "wc_reply_reviewers_avg": [ 101.0, 141.50335685064152 ], "wc_reply_authors_avg": [ 9.2, 18.399999999999995 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.2004459314343183, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16154366594828211100&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "ethz.ch;epfl.ch;epfl.ch;epfl.ch", "author_num": 4, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "ETH Zurich;Swiss Federal Institute of Technology Lausanne;EPFL", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ethz.ch;https://www.epfl.ch;https://www.epfl.ch", "aff_unique_abbr": "ETH;EPFL;EPFL", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Lausanne", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Switzerland" }, { "title": "MoMu-Diffusion: On Learning Long-Term Motion-Music Synchronization and Correspondence", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94669", "id": "YscR3LBIi7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=YscR3LBIi7", "openreview": "https://openreview.net/forum?id=YscR3LBIi7", "poster": "/media/PosterPDFs/NeurIPS%202024/94669.png?t=1731585066.2441392", "project": "", "author_site": "Fuming You, Minghui Fang, Li Tang, Rongjie Huang, Yongqi Wang, Zhou Zhao", "tldr": "", "abstract": "Motion-to-music and music-to-motion have been studied separately, each attracting substantial research interest within their respective domains. The interaction between human motion and music is a reflection of advanced human intelligence, and establishing a unified relationship between them is particularly important. However, to date, there has been no work that considers them jointly to explore the modality alignment within. To bridge this gap, we propose a novel framework, termed MoMu-Diffusion, for long-term and synchronous motion-music generation. Firstly, to mitigate the huge computational costs raised by long sequences, we propose a novel Bidirectional Contrastive Rhythmic Variational Auto-Encoder (BiCoR-VAE) that extracts the modality-aligned latent representations for both motion and music inputs. Subsequently, leveraging the aligned latent spaces, we introduce a multi-modal diffusion Transformer model and a cross-guidance sampling strategy to enable various generation tasks, including cross-modal, multi-modal, and variable-length generation. Extensive experiments demonstrate that MoMu-Diffusion surpasses recent state-of-the-art methods both qualitatively and quantitatively, and can synthesize realistic, diverse, long-term, and beat-matched music or motion sequences. The generated motion-music samples are available at https://momu-diffusion.github.io/.", "keywords": "motion-music generation;rhythmic alignment;diffusion models", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Fuming You;Minghui Fang;Li Tang;Rongjie Huang;Yongqi Wang;Zhou Zhao", "authorids": "~Fuming_You3;~Minghui_Fang1;~Li_Tang3;~Rongjie_Huang1;~Yongqi_Wang1;~Zhou_Zhao3", "gender": "M;M;M;M;M;M", "homepage": "https://scholar.google.com/citations?user=8c3I0RwAAAAJ&hl=zh-CN;;;;;https://dblp.uni-trier.de/pid/75/7785.html?", "dblp": "124/1993-2;;212/8936-1;;277/1388;75/7785", "google_scholar": "8c3I0RwAAAAJ;https://scholar.google.com.hk/citations?hl=en;iRHBUsgAAAAJ;9_79D6IAAAAJ;W6Nf_CAAAAAJ;https://scholar.google.com.hk/citations?user=IIoFY90AAAAJ", "orcid": "0009-0000-6488-9695;;;0000-0003-4695-3440;;0000-0001-6121-0384", "linkedin": ";;;;;", "or_profile": "~Minghui_Fang1;~Li_Tang3;~Rongjie_Huang1;~Yongqi_Wang1;~fuming_you2;~Zhou_Zhao2", "aff": "Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn", "position": "MS student;PhD student;MS student;MS student;MS student;Associate Professor", "bibtex": "@inproceedings{\nyou2024momudiffusion,\ntitle={MoMu-Diffusion: On Learning Long-Term Motion-Music Synchronization and Correspondence},\nauthor={Fuming You and Minghui Fang and Li Tang and Rongjie Huang and Yongqi Wang and Zhou Zhao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=YscR3LBIi7}\n}", "github": "", "reviewers": "aP7b;1QHY;pw59;E4Pj", "pdf_size": 3798571, "rating": "6;6;7;9", "confidence": "2;4;4;4", "soundness": "3;3;3;4", "novelty": "3;3;3;4", "presentation": "3;3;3;3", "wc_summary": "43;60;30;134", "wc_strengths": "50;87;27;74", "wc_weaknesses": "6;62;25;66", "wc_questions": "158;86;28;39", "wc_limitations": "7;41;7;1", "wc_review": "264;336;117;314", "wc_reply_reviewers": "11;44;12;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 7.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 66.75, 40.25776322648838 ], "wc_strengths_avg": [ 59.5, 22.983689869122408 ], "wc_weaknesses_avg": [ 39.75, 25.202926417382564 ], "wc_questions_avg": [ 77.75, 51.197534120307004 ], "wc_limitations_avg": [ 14.0, 15.7797338380595 ], "wc_review_avg": [ 257.75, 85.34745163155137 ], "wc_reply_reviewers_avg": [ 16.75, 16.422164899914993 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.4714045207910316, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14227336540975254571&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Zhejiang University", "aff_unique_dep": "", "aff_unique_url": "https://www.zju.edu.cn", "aff_unique_abbr": "ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Schedule Your Edit: A Simple yet Effective Diffusion Noise Schedule for Image Editing", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94668", "id": "Yu6cDt7q9Z", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Yu6cDt7q9Z", "openreview": "https://openreview.net/forum?id=Yu6cDt7q9Z", "poster": "/media/PosterPDFs/NeurIPS%202024/94668.png?t=1733734965.56944", "project": "", "author_site": "Haonan Lin, Yan Chen, Jiahao Wang, Wenbin An, Mengmeng Wang, Feng Tian, Yong Liu, Guang Dai, Jingdong Wang, QianYing Wang", "tldr": "", "abstract": "Text-guided diffusion models have significantly advanced image editing, enabling high-quality and diverse modifications driven by text prompts. However, effective editing requires inverting the source image into a latent space, a process often hindered by prediction errors inherent in DDIM inversion. \nThese errors accumulate during the diffusion process, resulting in inferior content preservation and edit fidelity, especially with conditional inputs. \nWe address these challenges by investigating the primary contributors to error accumulation in DDIM inversion and identify the singularity problem in traditional noise schedules as a key issue. \nTo resolve this, we introduce the *Logistic Schedule*, a novel noise schedule designed to eliminate singularities, improve inversion stability, and provide a better noise space for image editing. This schedule reduces noise prediction errors, enabling more faithful editing that preserves the original content of the source image. Our approach requires no additional retraining and is compatible with various existing editing methods. \nExperiments across eight editing tasks demonstrate the Logistic Schedule's superior performance in content preservation and edit fidelity compared to traditional noise schedules, highlighting its adaptability and effectiveness. \nThe project page is available at https://lonelvino.github.io/SYE/.", "keywords": "image editing;diffusion model", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/1856f914af840aa4a5eac91c6ac3cbf26d9aee0e.zip", "author": "Haonan Lin;Yan Chen;Jiahao Wang;Wenbin An;Mengmeng Wang;Feng Tian;Yong Liu;Guang Dai;Jingdong Wang;QianYing Wang", "authorids": "~Haonan_Lin1;~Yan_Chen16;~Jiahao_Wang14;~Wenbin_An1;~Mengmeng_Wang1;~Feng_Tian4;~Yong_Liu11;~Guang_Dai1;~Jingdong_Wang1;~QianYing_Wang1", "gender": "M;;;M;F;;M;M;M;F", "homepage": ";;;;https://sallymmx.github.io/;;https://person.zju.edu.cn/en/yongliu;;https://jingdongwang2017.github.io/;https://research.lenovo.com/webapp/view/home.html", "dblp": "269/0292;;;331/2394;;;29/4867-7;;49/3441;86/11012", "google_scholar": "GBnV3HIAAAAJ;;;https://scholar.google.com.hk/citations?user=BpkQZGgAAAAJ;VSRnUiUAAAAJ;;https://scholar.google.com.hk/citations?user=qYcgBbEAAAAJ;;z5SPCmgAAAAJ;gXgWhfEAAAAJ", "orcid": ";0000-0003-4838-3779;;;;;0000-0003-4822-8939;0000-0002-3529-9087;0000-0002-4888-4445;", "linkedin": "haonan-lin-035276207/;;;;;;;;;qianying-jane-wang-0255231/", "or_profile": "~Haonan_Lin1;~Yan_Chen16;~Jiahao_Wang14;~Wenbin_An1;~Mengmeng_Wang1;~Feng_Tian4;~Yong_Liu11;~Guang_Dai1;~Jingdong_Wang1;~QianYing_Wang1", "aff": "Xi'an Jiaotong University;Xi'an Jiaotong University;;Xi'an Jiaotong University;Zhejiang University;;Zhejiang University;SGIT AI;Baidu;lenovo group", "aff_domain": "xjtu.edu.cn;xjtu.edu.cn;;xjtu.edu.cn;zju.edu.cn;;zju.edu.cn;sgcc.com.cn;baidu.com;lenovo.com", "position": "MS student;Associate Professor;;PhD student;PhD student;;Full Professor;Principal Researcher;Chief Scientist for Computer Vision;Principal Researcher", "bibtex": "@inproceedings{\nlin2024schedule,\ntitle={Schedule Your Edit: A Simple yet Effective Diffusion Noise Schedule for Image Editing},\nauthor={Haonan Lin and Yan Chen and Jiahao Wang and Wenbin An and Mengmeng Wang and Feng Tian and Yong Liu and Guang Dai and Jingdong Wang and QianYing Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Yu6cDt7q9Z}\n}", "github": "", "reviewers": "vCWG;ihj2;B36W;j2Zw", "pdf_size": 31415879, "rating": "5;6;6;7", "confidence": "4;4;3;4", "soundness": "2;3;4;3", "novelty": "2;3;4;3", "presentation": "2;3;4;4", "wc_summary": "45;67;72;71", "wc_strengths": "54;79;42;61", "wc_weaknesses": "127;62;28;16", "wc_questions": "5;58;17;6", "wc_limitations": "9;2;15;16", "wc_review": "240;268;174;170", "wc_reply_reviewers": "22;15;0;4", "wc_reply_authors": "63;101;96;37", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 63.75, 10.985786271359915 ], "wc_strengths_avg": [ 59.0, 13.397761006974262 ], "wc_weaknesses_avg": [ 58.25, 43.129891026989625 ], "wc_questions_avg": [ 21.5, 21.592822881689184 ], "wc_limitations_avg": [ 10.5, 5.5901699437494745 ], "wc_review_avg": [ 213.0, 42.20189569201838 ], "wc_reply_reviewers_avg": [ 10.25, 8.728545125048045 ], "wc_reply_authors_avg": [ 74.25, 25.9939896899264 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16595068964728985547&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "xjtu.edu.cn;xjtu.edu.cn;;xjtu.edu.cn;zju.edu.cn;;zju.edu.cn;sgcc.com.cn;baidu.com;lenovo.com", "author_num": 10, "aff_unique_index": "0;0;0;1;1;2;3;4", "aff_unique_norm": "Xi'an Jiao Tong University;Zhejiang University;SGIT AI;Baidu;Lenovo Group", "aff_unique_dep": ";;;Baidu, Inc.;", "aff_unique_url": "https://www.xjtu.edu.cn;https://www.zju.edu.cn;;https://www.baidu.com;https://www.lenovo.com", "aff_unique_abbr": "XJTU;ZJU;;Baidu;Lenovo", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China;" }, { "title": "Sample-Efficient Geometry Reconstruction from Euclidean Distances using Non-Convex Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94667", "id": "Yu7H8ZOuI2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Yu7H8ZOuI2", "openreview": "https://openreview.net/forum?id=Yu7H8ZOuI2", "poster": "/media/PosterPDFs/NeurIPS%202024/94667.png?t=1731738467.978953", "project": "", "author_site": "Ipsita Ghosh, Abiy Tasissa, Christian K\u00fcmmerle", "tldr": "", "abstract": "The problem of finding suitable point embedding or geometric configurations given only Euclidean distance information of point pairs arises both as a core task and as a sub-problem in a variety of machine learning applications. In this paper, we aim to solve this problem given a minimal number of distance samples. \nTo this end, we leverage continuous and non-convex rank minimization formulations of the problem and establish a local convergence \nguarantee for a variant of iteratively reweighted least squares (IRLS), which applies if a minimal random set of observed distances is provided.\n As a technical tool, we establish a restricted isometry property (RIP) restricted to a tangent space of the manifold of symmetric rank-$r$ matrices given random Euclidean distance measurements, which might be of independent interest for the analysis of other non-convex approaches. Furthermore, we assess data efficiency, scalability and generalizability of different reconstruction algorithms through numerical experiments with simulated data as well as real-world data, demonstrating the proposed algorithm's ability to identify the underlying geometry from fewer distance samples compared to the state-of-the-art.\n \n The Matlab code can be found at \\href{https://github.com/ipsita-ghosh-1/EDG-IRLS}{github\\_SEGRED}", "keywords": "Euclidean distance geometry;non-convex optimization;iteratively reweighted least squares;low-rank;data efficiency;convergence guarnatees;restricted isometry property;dual basis", "primary_area": "optimization", "supplementary_material": "/attachment/c5bc0061d42d0fd096127d7c282f11185e9913eb.zip", "author": "Ipsita Ghosh;Abiy Tasissa;Christian K\u00fcmmerle", "authorids": "~Ipsita_Ghosh1;~Abiy_Tasissa1;~Christian_K\u00fcmmerle1", "gender": "F;M;M", "homepage": "https://webpages.charlotte.edu/ighosh2/index.html;http://sites.tufts.edu/atasissa/;http://ckuemmerle.com", "dblp": ";218/6144;198/0699", "google_scholar": ";https://scholar.google.com/citations?hl=en;https://scholar.google.de/citations?user=zElx1AYAAAAJ", "orcid": ";;0000-0001-9267-5379", "linkedin": ";;", "or_profile": "~Ipsita_Ghosh1;~Abiy_Tasissa1;~Christian_K\u00fcmmerle1", "aff": ", University of North Carolina at Charlotte;Tufts University;University of North Carolina at Charlotte", "aff_domain": "cs.uncc.edu;tufts.edu;charlotte.edu", "position": "PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nghosh2024sampleefficient,\ntitle={Sample-Efficient Geometry Reconstruction from Euclidean Distances using Non-Convex Optimization},\nauthor={Ipsita Ghosh and Abiy Tasissa and Christian K{\\\"u}mmerle},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Yu7H8ZOuI2}\n}", "github": "", "reviewers": "Jqjh;fWZw;wbRb;rCFC", "pdf_size": 2381508, "rating": "4;6;6;7", "confidence": "4;3;3;2", "soundness": "2;4;3;4", "novelty": "2;4;3;3", "presentation": "2;3;3;4", "wc_summary": "25;43;79;89", "wc_strengths": "52;15;118;69", "wc_weaknesses": "134;133;93;57", "wc_questions": "424;25;199;59", "wc_limitations": "18;20;19;22", "wc_review": "653;236;508;296", "wc_reply_reviewers": "140;75;42;49", "wc_reply_authors": "497;0;0;0", "reply_reviewers": "2;1;1;1", "reply_authors": "4;1;1;1", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 59.0, 26.038433132583073 ], "wc_strengths_avg": [ 63.5, 37.030392922571046 ], "wc_weaknesses_avg": [ 104.25, 31.901214710415026 ], "wc_questions_avg": [ 176.75, 156.94007614373072 ], "wc_limitations_avg": [ 19.75, 1.479019945774904 ], "wc_review_avg": [ 423.25, 166.75037481217245 ], "wc_reply_reviewers_avg": [ 76.5, 38.66846260197062 ], "wc_reply_authors_avg": [ 124.25, 215.207312840433 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.9733285267845754, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10378126237141267500&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 4, "email": "cs.uncc.edu;tufts.edu;charlotte.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of North Carolina at Charlotte;Tufts University", "aff_unique_dep": ";", "aff_unique_url": "https://www.uncc.edu;https://www.tufts.edu", "aff_unique_abbr": "UNCC;Tufts", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Charlotte;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "PV-Tuning: Beyond Straight-Through Estimation for Extreme LLM Compression", "status": "Oral", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94666", "id": "YvA8UF0I37", "proceeding": "", "pdf": "https://openreview.net/pdf?id=YvA8UF0I37", "openreview": "https://openreview.net/forum?id=YvA8UF0I37", "poster": "/media/PosterPDFs/NeurIPS%202024/94666.png?t=1733995535.3718588", "project": "", "author_site": "Vladimir Malinovskii, Denis Mazur, Ivan Ilin, Denis Kuznedelev, Konstantin Burlachenko, Kai Yi, Dan Alistarh, Peter Richtarik", "tldr": "", "abstract": "There has been significant interest in \"extreme\" compression of large language models (LLMs), i.e. to 1-2 bits per parameter, which allows such models to be executed efficiently on resource-constrained devices. \nExisting work focused on improved one-shot quantization techniques and weight representations; yet, purely post-training approaches are reaching diminishing returns in terms of the accuracy-vs-bit-width trade-off. State-of-the-art quantization methods such as QuIP# and AQLM include fine-tuning (part of) the compressed parameters over a limited amount of calibration data; however, such fine-tuning techniques over compressed weights often make exclusive use of straight-through estimators (STE), whose performance is not well-understood in this setting. \nIn this work, we question the use of STE for extreme LLM compression, showing that it can be sub-optimal, and perform a systematic study of quantization-aware fine-tuning strategies for LLMs.\nWe propose PV-Tuning - a representation-agnostic framework that generalizes and improves upon existing fine-tuning strategies, and provides convergence guarantees in restricted cases.\nOn the practical side, when used for 1-2 bit vector quantization, PV-Tuning outperforms prior techniques for highly-performant models such as Llama and Mistral. \nUsing PV-Tuning, we achieve the first Pareto-optimal quantization for Llama-2 family models at 2 bits per parameter.", "keywords": "Quantization;Large Language Models", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/7e5702136cdac980d4b54cca82cb828b8837b961.zip", "author": "Vladimir Malinovskii;Denis Mazur;Ivan Ilin;Denis Kuznedelev;Konstantin Pavlovich Burlachenko;Kai Yi;Dan Alistarh;Peter Richt\u00e1rik", "authorids": "~Vladimir_Malinovskii1;~Denis_Mazur1;~Ivan_Ilin1;~Denis_Kuznedelev1;~Konstantin_Pavlovich_Burlachenko1;~Kai_Yi1;~Dan_Alistarh7;~Peter_Richt\u00e1rik1", "gender": "M;M;M;M;M;M;M;M", "homepage": "https://galqiwi.github.io;https://ivan-ilin.netlify.app;https://github.com/Godofnothing;https://kaiyi.me/;http://people.csail.mit.edu/alistarh/;https://vk.com/deniskamazur;https://burlachenkok.github.io/;https://richtarik.org", "dblp": ";;322/8616;13/1589;36/3251.html;;;62/8001", "google_scholar": "POwGR9wAAAAJ;;;r08j39wAAAAJ;https://scholar.google.com.tw/citations?user=75q-6ZQAAAAJ;;3pA-LoQAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;0009-0005-2420-9620;0000-0003-0415-3584;;;;0000-0003-4380-5848", "linkedin": ";;;kai-yi-347089153/;;;burlachenkok/;richtarik/", "or_profile": "~Vladimir_Malinovskii1;~Ivan_Ilin1;~Denis_Kuznedelev1;~Kai_Yi1;~Dan_Alistarh1;~Denis_Vladislavovich_Mazur1;~Konstantin_Pavlovich_Konstantin_Burlachenko1;~Peter_Richtarik1", "aff": "Moscow Institute of Physics and Technology;King Abdullah University of Science and Technology;Yandex;KAUST;Institute of Science and Technology;Moscow Institute of Physics and Technology;;King Abdullah University of Science and Technology (KAUST)", "aff_domain": "phystech.edu;kaust.edu.sa;yandex-team.ru;kaust.edu.sa;ist.ac.at;phystech.edu;;kaust.edu.sa", "position": "Undergrad student;MS student;Researcher;PhD student;Full Professor;Undergrad student;;Full Professor", "bibtex": "@inproceedings{\nmalinovskii2024pvtuning,\ntitle={{PV}-Tuning: Beyond Straight-Through Estimation for Extreme {LLM} Compression},\nauthor={Vladimir Malinovskii and Denis Mazur and Ivan Ilin and Denis Kuznedelev and Konstantin Pavlovich Burlachenko and Kai Yi and Dan Alistarh and Peter Richt{\\'a}rik},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=YvA8UF0I37}\n}", "github": "", "reviewers": "APHA;p3Lv;3n7c;5n4W;NP8P", "pdf_size": 939712, "rating": "5;5;5;6;7", "confidence": "4;2;3;4;3", "soundness": "3;2;3;3;4", "novelty": "3;3;3;3;3", "presentation": "2;1;2;3;4", "wc_summary": "174;125;49;54;79", "wc_strengths": "90;59;59;27;59", "wc_weaknesses": "254;344;244;100;77", "wc_questions": "168;2;33;79;72", "wc_limitations": "4;1;1;6;1", "wc_review": "690;531;386;266;288", "wc_reply_reviewers": "0;232;18;12;15", "wc_reply_authors": "0;673;88;0;0", "reply_reviewers": "0;2;1;1;1", "reply_authors": "1;2;2;1;1", "rating_avg": [ 5.6, 0.8 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.4, 1.019803902718557 ], "wc_summary_avg": [ 96.2, 47.30073995192887 ], "wc_strengths_avg": [ 58.8, 19.923855048659632 ], "wc_weaknesses_avg": [ 203.8, 100.64273446205642 ], "wc_questions_avg": [ 70.8, 55.99785710185703 ], "wc_limitations_avg": [ 2.6, 2.0591260281974 ], "wc_review_avg": [ 432.2, 159.23115273086484 ], "wc_reply_reviewers_avg": [ 55.4, 88.51124222379889 ], "wc_reply_authors_avg": [ 152.2, 262.6209435669592 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.13363062095621223, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3239868517990528651&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "phystech.edu;kaust.edu.sa;yandex-team.ru;kaust.edu.sa;ist.ac.at;phystech.edu;;kaust.edu.sa", "author_num": 8, "aff_unique_index": "0;1;2;1;3;0;1", "aff_unique_norm": "Moscow Institute of Physics and Technology;King Abdullah University of Science and Technology;Yandex;Institute of Science and Technology", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.mipt.ru/en;https://www.kast.kau.edu.sa;https://yandex.com;", "aff_unique_abbr": "MIPT;KAUST;Yandex;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1;0;1", "aff_country_unique": "Russian Federation;Saudi Arabia;" }, { "title": "Randomized Strategic Facility Location with Predictions", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94665", "id": "YvOeN0kUzT", "proceeding": "", "pdf": "https://openreview.net/pdf?id=YvOeN0kUzT", "openreview": "https://openreview.net/forum?id=YvOeN0kUzT", "poster": "", "project": "", "author_site": "Eric Balkanski, Vasilis Gkatzelis, Golnoosh Shahkarami", "tldr": "", "abstract": "In the strategic facility location problem, a set of agents report their locations in a metric space and the goal is to use these reports to open a new facility, minimizing an aggregate distance measure from the agents to the facility. However, agents are strategic and may misreport their locations to influence the facility\u2019s placement in their favor. The aim is to design truthful mechanisms, ensuring agents cannot gain by misreporting. This problem was recently revisited through the learning-augmented framework, aiming to move beyond worst-case analysis and design truthful mechanisms that are augmented with (machine-learned) predictions. The focus of this prior work was on mechanisms that are deterministic and augmented with a prediction regarding the optimal facility location. In this paper, we provide a deeper understanding of this problem by exploring the power of randomization as well as the impact of different types of predictions on the performance of truthful learning-augmented mechanisms. We study both the single-dimensional and the Euclidean case and provide upper and lower bounds regarding the achievable approximation of the optimal egalitarian social cost.", "keywords": "Algorithmic Game Theory;Mechanism Design;Learning Augmented Algorithms;Strategic Facility Location", "primary_area": "algorithmic_game_theory", "supplementary_material": "", "author": "Eric Balkanski;Vasilis Gkatzelis;Golnoosh Shahkarami", "authorids": "~Eric_Balkanski2;~Vasilis_Gkatzelis1;~Golnoosh_Shahkarami1", "gender": ";;F", "homepage": "http://ericbalkanski.com;;https://www.mpi-inf.mpg.de/departments/algorithms-complexity/people/current-members/golnoosh-shahkarami", "dblp": ";;228/7906.html", "google_scholar": ";;w2vrFwUAAAAJ", "orcid": ";;0000-0002-6169-7337", "linkedin": ";;golnoosh-shahkarami-435b25138/?originalSubdomain=de", "or_profile": "~Eric_Balkanski2;~Vasilis_Gkatzelis1;~Golnoosh_Shahkarami1", "aff": "Columbia University;;Saarland Informatics Campus, Max-Planck Institute", "aff_domain": "columbia.edu;;mpi-inf.mpg.de", "position": "Assistant Professor;;PhD student", "bibtex": "@inproceedings{\nbalkanski2024randomized,\ntitle={Randomized Strategic Facility Location with Predictions},\nauthor={Eric Balkanski and Vasilis Gkatzelis and Golnoosh Shahkarami},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=YvOeN0kUzT}\n}", "github": "", "reviewers": "hDFx;vBqc;MNoo;zBmc", "pdf_size": 432461, "rating": "4;5;6;7", "confidence": "4;2;3;2", "soundness": "3;2;3;4", "novelty": "3;3;3;3", "presentation": "3;3;3;4", "wc_summary": "316;212;184;132", "wc_strengths": "29;50;54;173", "wc_weaknesses": "66;28;179;101", "wc_questions": "20;78;109;3", "wc_limitations": "1;1;40;9", "wc_review": "432;369;566;418", "wc_reply_reviewers": "0;74;17;44", "wc_reply_authors": "67;584;0;67", "reply_reviewers": "0;2;1;1", "reply_authors": "2;3;1;2", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 211.0, 67.07458535093602 ], "wc_strengths_avg": [ 76.5, 56.51769634371168 ], "wc_weaknesses_avg": [ 93.5, 55.7068218443666 ], "wc_questions_avg": [ 52.5, 42.86315434029558 ], "wc_limitations_avg": [ 12.75, 16.068213964221414 ], "wc_review_avg": [ 446.25, 72.98758456066346 ], "wc_reply_reviewers_avg": [ 33.75, 28.039035290109393 ], "wc_reply_authors_avg": [ 179.5, 235.13453595760873 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.674199862463242, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5535718556456248583&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 7, "email": "columbia.edu;;mpi-inf.mpg.de", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Columbia University;Max-Planck Institute", "aff_unique_dep": ";Informatics", "aff_unique_url": "https://www.columbia.edu;https://www.mpi-sws.org", "aff_unique_abbr": "Columbia;MPI-SWS", "aff_campus_unique_index": "1", "aff_campus_unique": ";Saarland", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;Germany" }, { "title": "United We Stand, Divided We Fall: Fingerprinting Deep Neural Networks via Adversarial Trajectories", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94664", "id": "YwpL0BVxts", "proceeding": "", "pdf": "https://openreview.net/pdf?id=YwpL0BVxts", "openreview": "https://openreview.net/forum?id=YwpL0BVxts", "poster": "/media/PosterPDFs/NeurIPS%202024/94664.png?t=1729387692.1806245", "project": "", "author_site": "Tianlong Xu, Chen Wang, Gaoyang Liu, Yang Yang, Kai Peng, Wei Liu", "tldr": "", "abstract": "In recent years, deep neural networks (DNNs) have witnessed extensive applications, and protecting their intellectual property (IP) is thus crucial. As a non-invasive way for model IP protection, model fingerprinting has become popular. However, existing single-point based fingerprinting methods are highly sensitive to the changes in the decision boundary, and may suffer from the misjudgment of the resemblance of sparse fingerprinting, yielding high false positives of innocent models. In this paper, we propose ADV-TRA, a more robust fingerprinting scheme that utilizes adversarial trajectories to verify the ownership of DNN models. Benefited from the intrinsic progressively adversarial level, the trajectory is capable of tolerating greater degree of alteration in decision boundaries. We further design novel schemes to generate a surface trajectory that involves a series of fixed-length trajectories with dynamically adjusted step sizes. Such a design enables a more unique and reliable fingerprinting with relatively low querying costs. Experiments on three datasets against four types of removal attacks show that ADV-TRA exhibits superior performance in distinguishing between infringing and innocent models, outperforming the state-of-the-art comparisons.", "keywords": "Deep neural network;intellectual property protection;model fingerprinting;adversarial sample.", "primary_area": "privacy", "supplementary_material": "/attachment/b8a1a2ace4bdee5c71ae9ac9953f727ee3743bef.zip", "author": "Tianlong Xu;Chen Wang;Gaoyang Liu;Yang Yang;Kai Peng;Wei Liu", "authorids": "~Tianlong_Xu2;~Chen_Wang19;~Gaoyang_Liu2;~Yang_Yang74;~Kai_Peng1;~Wei_Liu54", "gender": "M;M;M;M;;M", "homepage": ";http://www.chenwang.net.cn/;;https://csyang2018.github.io/en/;;http://faculty.hust.edu.cn/wliu/en/index.htm", "dblp": "337/6008;82/4206-11;133/3350;https://dblp.uni-trier.de/pid/48/450-60;;49/3283-4", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;VQ_nDk0AAAAJ;S-wWZmwAAAAJ;;;", "orcid": ";0000-0003-1963-4954;0000-0003-2566-9360;;;0000-0002-2187-8125", "linkedin": ";;;;;", "or_profile": "~Tianlong_Xu2;~Chen_Wang19;~Gaoyang_Liu2;~Yang_Yang74;~Kai_Peng1;~Wei_Liu54", "aff": "Huazhong University of Science and Technology;Huazhong University of Science and Technology;Simon Fraser University;Hubei University;;Huazhong University of Science and Technology", "aff_domain": "hust.edu.cn;hust.edu.cn;sfu.ca;hubu.edu.cn;;hust.edu.cn", "position": "PhD student;Associate Professor;Postdoc;Associate Professor;;Full Professor", "bibtex": "@inproceedings{\nxu2024united,\ntitle={United We Stand, Divided We Fall: Fingerprinting Deep Neural Networks via Adversarial Trajectories},\nauthor={Tianlong Xu and Chen Wang and Gaoyang Liu and Yang Yang and Kai Peng and Wei Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=YwpL0BVxts}\n}", "github": "", "reviewers": "u7bH;6xmi;y2LA;TeCi", "pdf_size": 5749055, "rating": "3;6;6;7", "confidence": "5;5;3;4", "soundness": "1;3;2;4", "novelty": "2;3;2;4", "presentation": "2;3;3;4", "wc_summary": "10;93;172;118", "wc_strengths": "10;38;87;121", "wc_weaknesses": "340;97;188;32", "wc_questions": "55;17;54;51", "wc_limitations": "37;6;12;1", "wc_review": "452;251;513;323", "wc_reply_reviewers": "228;12;49;13", "wc_reply_authors": "681;16;34;17", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.5, 1.118033988749895 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 98.25, 58.40537218441468 ], "wc_strengths_avg": [ 64.0, 42.924352062669506 ], "wc_weaknesses_avg": [ 164.25, 115.61222902444187 ], "wc_questions_avg": [ 44.25, 15.801503093060482 ], "wc_limitations_avg": [ 14.0, 13.838352503098047 ], "wc_review_avg": [ 384.75, 103.28691833915852 ], "wc_reply_reviewers_avg": [ 75.5, 89.29865620489483 ], "wc_reply_authors_avg": [ 187.0, 285.3007185409809 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.502518907629606, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:hLZSOSgjslsJ:scholar.google.com/&scioq=United+We+Stand,+Divided+We+Fall:+Fingerprinting+Deep+Neural+Networks+via+Adversarial+Trajectories&hl=en&as_sdt=0,33", "gs_version_total": 2, "email": "hust.edu.cn;hust.edu.cn;sfu.ca;hubu.edu.cn;;hust.edu.cn", "author_num": 6, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Huazhong University of Science and Technology;Simon Fraser University;Hubei University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.hust.edu.cn;https://www.sfu.ca;http://www.hubu.edu.cn/", "aff_unique_abbr": "HUST;SFU;HUBU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "China;Canada" }, { "title": "DISP-LLM: Dimension-Independent Structural Pruning for Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94663", "id": "YxaY6tHgg0", "proceeding": "", "pdf": "https://openreview.net/pdf?id=YxaY6tHgg0", "openreview": "https://openreview.net/forum?id=YxaY6tHgg0", "poster": "/media/PosterPDFs/NeurIPS%202024/94663.png?t=1733364629.7851732", "project": "", "author_site": "Shangqian Gao, Chi-Heng Lin, Ting Hua, Zheng Tang, Yilin Shen, Hongxia Jin, Yen-Chang Hsu", "tldr": "", "abstract": "Large Language Models (LLMs) have achieved remarkable success in various natural language processing tasks, including language modeling, understanding, and generation. However, the increased memory and computational costs associated with these models pose significant challenges for deployment on resource-limited devices. Structural pruning has emerged as a promising solution to reduce the costs of LLMs without requiring post-processing steps. Prior structural pruning methods either follow the dependence of structures at the cost of limiting flexibility, or introduce non-trivial additional parameters by incorporating different projection matrices. In this work, we propose a novel approach that relaxes the constraint imposed by regular structural pruning methods and eliminates the structural dependence along the embedding dimension. Our dimension-independent structural pruning method offers several benefits. Firstly, our method enables different blocks to utilize different subsets of the feature maps. Secondly, by removing structural dependence, we facilitate each block to possess varying widths along its input and output dimensions, thereby significantly enhancing the flexibility of structural pruning. We evaluate our method on various LLMs, including OPT, LLaMA, LLaMA-2, Phi-1.5, and Phi-2. Experimental results demonstrate that our approach outperforms other state-of-the-art methods, showing for the first time that structural pruning can achieve an accuracy similar to semi-structural pruning.", "keywords": "Model Compression;Model Pruning;Efficient Inference;Large Language Models", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Shangqian Gao;Chi-Heng Lin;Ting Hua;Zheng Tang;Yilin Shen;Hongxia Jin;Yen-Chang Hsu", "authorids": "~Shangqian_Gao1;~Chi-Heng_Lin1;~Ting_Hua1;~Zheng_Tang3;~Yilin_Shen1;~Hongxia_Jin1;~Yen-Chang_Hsu1", "gender": ";M;;M;M;;M", "homepage": ";https://www.chihenglin.com/;;http://ztang.info;;;", "dblp": "195/2523;128/4282;;;30/383;;172/1140", "google_scholar": "9mNI83oAAAAJ;OqSt2wMAAAAJ;;;9PSFMzAAAAAJ;;7QWAiigAAAAJ", "orcid": ";;;;;;", "linkedin": ";chi-heng-lin-986727217;;;;;yenchanghsu/", "or_profile": "~Shangqian_Gao1;~Chi-Heng_Lin1;~Ting_Hua1;~Zheng_Tang3;~Yilin_Shen1;~Hongxia_Jin1;~Yen-Chang_Hsu1", "aff": "University of Pittsburgh;Samsung Research America;;Samsung;Samsung Research America;;Samsung Research America", "aff_domain": "pitt.edu;samsung.com;;samsung.com;gmail.com;;samsung.com", "position": "PhD student;Researcher;;Researcher;Principal Researcher;;Research Scientist", "bibtex": "@inproceedings{\ngao2024displlm,\ntitle={{DISP}-{LLM}: Dimension-Independent Structural Pruning for Large Language Models},\nauthor={Shangqian Gao and Chi-Heng Lin and Ting Hua and Zheng Tang and Yilin Shen and Hongxia Jin and Yen-Chang Hsu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=YxaY6tHgg0}\n}", "github": "", "reviewers": "BGu5;AWnP;6CTn;FhgQ", "pdf_size": 7183122, "rating": "5;5;6;6", "confidence": "4;4;3;3", "soundness": "4;3;3;3", "novelty": "3;3;3;3", "presentation": "4;2;3;3", "wc_summary": "65;41;73;180", "wc_strengths": "32;29;47;116", "wc_weaknesses": "102;52;46;21", "wc_questions": "1;35;36;48", "wc_limitations": "1;1;15;14", "wc_review": "201;158;217;379", "wc_reply_reviewers": "0;0;65;152", "wc_reply_authors": "0;0;55;180", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;2;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 89.75, 53.41991669780102 ], "wc_strengths_avg": [ 56.0, 35.30580688781946 ], "wc_weaknesses_avg": [ 55.25, 29.38856069970083 ], "wc_questions_avg": [ 30.0, 17.507141400011598 ], "wc_limitations_avg": [ 7.75, 6.7592529172978875 ], "wc_review_avg": [ 238.75, 83.79849342321137 ], "wc_reply_reviewers_avg": [ 54.25, 62.36335061556587 ], "wc_reply_authors_avg": [ 58.75, 73.51657976266306 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13039863686006849749&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "pitt.edu;samsung.com;;samsung.com;gmail.com;;samsung.com", "author_num": 7, "aff_unique_index": "0;1;1;1;1", "aff_unique_norm": "University of Pittsburgh;Samsung", "aff_unique_dep": ";Samsung Research America", "aff_unique_url": "https://www.pitt.edu;https://www.samsung.com/us/careers/research/", "aff_unique_abbr": "Pitt;SRA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "United States;South Korea" }, { "title": "EyeGraph: Modularity-aware Spatio Temporal Graph Clustering for Continuous Event-based Eye Tracking", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97635", "id": "YxuuzyplFZ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=YxuuzyplFZ", "openreview": "https://openreview.net/forum?id=YxuuzyplFZ", "poster": "/media/PosterPDFs/NeurIPS%202024/97635.png?t=1733392646.9437459", "project": "", "author_site": "Nuwan Bandara, Thivya Kandappu, Argha Sen, Ila Gokarn, Archan Misra", "tldr": "", "abstract": "Continuous tracking of eye movement dynamics plays a significant role in developing a broad spectrum of human-centered applications, such as cognitive skills (visual attention and working memory) modeling, human-machine interaction, biometric user authentication, and foveated rendering. Recently neuromorphic cameras have garnered significant interest in the eye-tracking research community, owing to their sub-microsecond latency in capturing intensity changes resulting from eye movements. Nevertheless, the existing approaches for event-based eye tracking suffer from several limitations: dependence on RGB frames, label sparsity, and training on datasets collected in controlled lab environments that do not adequately reflect real-world scenarios. To address these limitations, in this paper, we propose a dynamic graph-based approach that uses a neuromorphic event stream captured by Dynamic Vision Sensors (DVS) for high-fidelity tracking of pupillary movement. More specifically, first, we present EyeGraph, a large-scale multi-modal near-eye tracking dataset collected using a wearable event camera attached to a head-mounted device from 40 participants -- the dataset was curated while mimicking in-the-wild settings, accounting for varying mobility and ambient lighting conditions. Subsequently, to address the issue of label sparsity, we adopt an unsupervised topology-aware approach as a benchmark. To be specific, (a) we first construct a dynamic graph using Gaussian Mixture Models (GMM), resulting in a uniform and detailed representation of eye morphology features, facilitating accurate modeling of pupil and iris. Then (b) apply a novel topologically guided modularity-aware graph clustering approach to precisely track the movement of the pupil and address the label sparsity in event-based eye tracking. We show that our unsupervised approach has comparable performance against the supervised approaches while consistently outperforming the conventional clustering approaches.", "keywords": "eye tracking;event camera;dynamic graphs;unsupervised eye tracking;modularity-aware clustering", "primary_area": "", "supplementary_material": "/attachment/8f977963df5aa64259bc341b09ec6f34e6f57cb5.pdf", "author": "Nuwan Sriyantha Bandara;Thivya Kandappu;Argha Sen;Ila Gokarn;Archan Misra", "authorids": "~Nuwan_Sriyantha_Bandara1;~Thivya_Kandappu1;~Argha_Sen1;~Ila_Gokarn1;~Archan_Misra1", "gender": "M;F;M;F;M", "homepage": "https://www.nuwanbandara.com/;;https://arghasen10.github.io/;;https://www.smu.edu.sg/faculty/profile/9632/Archan-MISRA", "dblp": "359/3766;119/3878;;;24/4570", "google_scholar": "7dp_868AAAAJ;;QF2toEoAAAAJ;;https://scholar.google.com.tw/citations?user=a0YC5VAAAAAJ", "orcid": "0009-0002-2509-5117;;;0009-0001-6977-945X;0000-0003-1212-1769", "linkedin": "nuwan-sriyantha-bandara/;;arghasen10/;;", "or_profile": "~Nuwan_Sriyantha_Bandara1;~Thivya_Kandappu1;~Argha_Sen1;~Ila_Gokarn1;~Archan_Misra1", "aff": "Singapore Management University;Singapore Management University;Indian Institute of Technology Kharagpur;Singapore Management University;Singapore Management University", "aff_domain": "smu.edu.sg;smu.edu.sg;iitkgp.ac.in;smu.edu.sg;smu.edu.sg", "position": "Researcher;Assistant Professor;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nbandara2024eyegraph,\ntitle={EyeGraph: Modularity-aware Spatio Temporal Graph Clustering for Continuous Event-based Eye Tracking},\nauthor={Nuwan Sriyantha Bandara and Thivya Kandappu and Argha Sen and Ila Gokarn and Archan Misra},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=YxuuzyplFZ}\n}", "github": "", "reviewers": "6Wc6;WpCY;F7PD", "pdf_size": 16216041, "rating": "6;6;8", "confidence": "4;3;5", "wc_summary_and_contributions": "90;27;65", "wc_strengths": "52;31;38", "wc_improvement": "47;32;33", "wc_limitations": "9;12;43", "wc_correctness": "10;6;31", "wc_clarity": "21;4;23", "wc_relation_to_prior_work": "6;7;24", "wc_documentation": "1;4;30", "wc_additional_feedback": "1;1;1", "wc_review": "237;124;288", "wc_reply_reviewers": "0;39;32", "wc_reply_authors": "0;42;45", "reply_reviewers": "0;1;1", "reply_authors": "1;4;2", "rating_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "wc_summary_and_contributions_avg": [ 60.666666666666664, 25.901522906749882 ], "wc_strengths_avg": [ 40.333333333333336, 8.73053390247253 ], "wc_improvement_avg": [ 37.333333333333336, 6.847546194724712 ], "wc_limitations_avg": [ 21.333333333333332, 15.369522511198006 ], "wc_correctness_avg": [ 15.666666666666666, 10.96458946893235 ], "wc_clarity_avg": [ 16.0, 8.524474568362947 ], "wc_relation_to_prior_work_avg": [ 12.333333333333334, 8.259674462242577 ], "wc_documentation_avg": [ 11.666666666666666, 13.02134998974974 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 216.33333333333334, 68.52898818910303 ], "wc_reply_reviewers_avg": [ 23.666666666666668, 16.97710877099579 ], "wc_reply_authors_avg": [ 29.0, 20.54263858417414 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 1.247219128924647 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13884564677483150785&as_sdt=4005&sciodt=0,6&hl=en", "gs_version_total": 4, "email": "smu.edu.sg;smu.edu.sg;iitkgp.ac.in;smu.edu.sg;smu.edu.sg", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Singapore Management University;Indian Institute of Technology Kharagpur", "aff_unique_dep": ";", "aff_unique_url": "https://www.smu.edu.sg;https://www.iitkgp.ac.in", "aff_unique_abbr": "SMU;IIT Kharagpur", "aff_campus_unique_index": "1", "aff_campus_unique": ";Kharagpur", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "Singapore;India" }, { "title": "Ferrari: Federated Feature Unlearning via Optimizing Feature Sensitivity", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94662", "id": "YxyYTcv3hp", "proceeding": "", "pdf": "https://openreview.net/pdf?id=YxyYTcv3hp", "openreview": "https://openreview.net/forum?id=YxyYTcv3hp", "poster": "/media/PosterPDFs/NeurIPS%202024/94662.png?t=1731402295.073615", "project": "", "author_site": "Hanlin Gu, WinKent Ong, Chee Seng Chan, Lixin Fan", "tldr": "", "abstract": "The advent of Federated Learning (FL) highlights the practical necessity for the \u2019right to be forgotten\u2019 for all clients, allowing them to request data deletion from the machine learning model\u2019s service provider. This necessity has spurred a growing demand for Federated Unlearning (FU). Feature unlearning has gained considerable attention due to its applications in unlearning sensitive, backdoor, and biased features. Existing methods employ the influence function to achieve feature unlearning, which is impractical for FL as it necessitates the participation of other clients, if not all, in the unlearning process. Furthermore, current research lacks an evaluation of the effectiveness of feature unlearning. To address these limitations, we define feature sensitivity in evaluating feature unlearning according to Lipschitz continuity. This metric characterizes the model output\u2019s rate of change or sensitivity to perturbations in the input feature. We then propose an effective federated feature unlearning framework called Ferrari, which minimizes feature sensitivity. Extensive experimental results and theoretical analysis demonstrate the effectiveness of Ferrari across various feature unlearning scenarios, including sensitive, backdoor, and biased features. The code is publicly available at https://github.com/OngWinKent/Federated-Feature-Unlearning", "keywords": "Machine Unlearning;Federated Unlearning;Feature Unlearning;Lipschitz Continuity", "primary_area": "privacy", "supplementary_material": "/attachment/ecfce3ead8f9defd19c3def5c382b0a93dd48344.zip", "author": "Hanlin Gu;Win Kent Ong;Chee Seng Chan;Lixin Fan", "authorids": "~Hanlin_Gu1;~Win_Kent_Ong1;~Chee_Seng_Chan1;~Lixin_Fan1", "gender": "M;;;M", "homepage": ";;;", "dblp": "236/6661;;;36/3111", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;;https://scholar.google.com/citations?hl=en", "orcid": "0000-0001-8266-4561;;;", "linkedin": ";;;", "or_profile": "~Hanlin_Gu1;~Win_Kent_Ong1;~Chee_Seng_Chan1;~Lixin_Fan1", "aff": "webank;;;WeBank", "aff_domain": "webank.com;;;webank.com", "position": "Researcher;;;Principal Researcher", "bibtex": "@inproceedings{\ngu2024ferrari,\ntitle={Ferrari: Federated Feature Unlearning via Optimizing Feature Sensitivity},\nauthor={Hanlin Gu and Win Kent Ong and Chee Seng Chan and Lixin Fan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=YxyYTcv3hp}\n}", "github": "", "reviewers": "mo2d;vD3D;aV7Y;HLpG;49hC", "pdf_size": 7082422, "rating": "5;5;6;6;8", "confidence": "4;5;3;4;5", "soundness": "2;2;3;3;4", "novelty": "2;3;2;3;4", "presentation": "3;3;2;3;4", "wc_summary": "47;144;67;137;75", "wc_strengths": "46;44;98;123;184", "wc_weaknesses": "28;67;104;125;89", "wc_questions": "315;364;108;50;39", "wc_limitations": "35;11;71;45;6", "wc_review": "471;630;448;480;393", "wc_reply_reviewers": "175;39;0;19;22", "wc_reply_authors": "621;101;0;0;35", "reply_reviewers": "1;1;0;1;1", "reply_authors": "2;2;1;1;2", "rating_avg": [ 6.0, 1.0954451150103321 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 94.0, 39.11010099705701 ], "wc_strengths_avg": [ 99.0, 52.22260047144339 ], "wc_weaknesses_avg": [ 82.6, 33.230106831004925 ], "wc_questions_avg": [ 175.2, 137.06261342904563 ], "wc_limitations_avg": [ 33.6, 23.67783773911799 ], "wc_review_avg": [ 484.4, 78.83806187369144 ], "wc_reply_reviewers_avg": [ 51.0, 63.22341338459985 ], "wc_reply_authors_avg": [ 151.4, 237.67927970271202 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.24397501823713333, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8080982982774881136&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "webank.com;;;webank.com", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "WeBank", "aff_unique_dep": "", "aff_unique_url": "https://www.webank.com", "aff_unique_abbr": "WeBank", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "The tree autoencoder model, with application to hierarchical data visualization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94661", "id": "Yy0KUmneV6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Yy0KUmneV6", "openreview": "https://openreview.net/forum?id=Yy0KUmneV6", "poster": "/media/PosterPDFs/NeurIPS%202024/94661.png?t=1731739887.9831173", "project": "", "author_site": "Miguel A. Carreira-Perpinan, Kuat Gazizov", "tldr": "", "abstract": "We propose a new model for dimensionality reduction, the PCA tree, which works like a regular autoencoder, having explicit projection and reconstruction mappings. The projection is effected by a sparse oblique tree, having hard, hyperplane splits using few features and linear leaves. The reconstruction mapping is a set of local linear mappings. Thus, rather than producing a global map as in t-SNE and other methods, which often leads to distortions, it produces a hierarchical set of local PCAs. The use of a sparse oblique tree and PCA makes the overall model interpretable and very fast to project or reconstruct new points. Joint optimization of all the parameters in the tree is a nonconvex nondifferentiable problem. We propose an algorithm that is guaranteed to decrease the error monotonically and which scales to large datasets without any approximation. In experiments, we show PCA trees are able to identify a wealth of low-dimensional and cluster structure in image and document datasets.", "keywords": "visualization;dimensionality reduction;decision trees;principal component analysis;autoencoders", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Miguel \u00c1. Carreira-Perpi\u00f1\u00e1n;Kuat Gazizov", "authorids": "~Miguel_\u00c1._Carreira-Perpi\u00f1\u00e1n2;~Kuat_Gazizov1", "gender": "Not Specified;", "homepage": "https://gazizovk.github.io/;http://faculty.ucmerced.edu/mcarreira-perpinan/", "dblp": ";23/5257", "google_scholar": "jv84Q5oAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0003-3297-9375", "linkedin": "kgazizov/;miguel-a-carreira-perpinan", "or_profile": "~Kuat_Gazizov1;~Miguel_A._Carreira-Perpinan1", "aff": "University of California, Merced;University of California, Merced", "aff_domain": "ucmerced.edu;ucmerced.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\ncarreira-perpi{\\~n}{\\'a}n2024the,\ntitle={The tree autoencoder model, with application to hierarchical data visualization},\nauthor={Miguel {\\'A}. Carreira-Perpi{\\~n}{\\'a}n and Kuat Gazizov},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Yy0KUmneV6}\n}", "github": "", "reviewers": "2dBg;DywJ;7zsa;Aeto", "pdf_size": 0, "rating": "4;5;6;7", "confidence": "4;3;5;3", "soundness": "2;3;3;3", "novelty": "1;2;2;3", "presentation": "2;3;4;4", "wc_summary": "46;111;96;93", "wc_strengths": "6;46;19;284", "wc_weaknesses": "120;138;126;108", "wc_questions": "25;12;2;66", "wc_limitations": "4;23;37;91", "wc_review": "201;330;280;642", "wc_reply_reviewers": "222;150;0;33", "wc_reply_authors": "666;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 86.5, 24.3567239176372 ], "wc_strengths_avg": [ 88.75, 113.64720630090297 ], "wc_weaknesses_avg": [ 123.0, 10.816653826391969 ], "wc_questions_avg": [ 26.25, 24.355440870573457 ], "wc_limitations_avg": [ 38.75, 32.36027657483786 ], "wc_review_avg": [ 363.25, 167.37887411498502 ], "wc_reply_reviewers_avg": [ 101.25, 89.25630229849318 ], "wc_reply_authors_avg": [ 166.5, 288.3864594602181 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.1348399724926484, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:OqneUnFLHXIJ:scholar.google.com/&scioq=The+tree+autoencoder+model,+with+application+to+hierarchical+data+visualization&hl=en&as_sdt=0,33", "gs_version_total": 0, "email": "ucmerced.edu;ucmerced.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of California, Merced", "aff_unique_dep": "", "aff_unique_url": "https://www.ucmerced.edu", "aff_unique_abbr": "UC Merced", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Merced", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Cross-Device Collaborative Test-Time Adaptation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94660", "id": "YyMiO0DWmI", "proceeding": "", "pdf": "https://openreview.net/pdf?id=YyMiO0DWmI", "openreview": "https://openreview.net/forum?id=YyMiO0DWmI", "poster": "", "project": "", "author_site": "Guohao Chen, Shuaicheng Niu, Deyu Chen, Shuhai Zhang, Changsheng Li, Yuanqing Li, Mingkui Tan", "tldr": "", "abstract": "In this paper, we propose test-time Collaborative Lifelong Adaptation (CoLA), which is a general paradigm that can be incorporated with existing advanced TTA methods to boost the adaptation performance and efficiency in a multi-device collaborative manner. Specifically, we maintain and store a set of device-shared _domain knowledge vectors_, which accumulates the knowledge learned from all devices during their lifelong adaptation process. Based on this, CoLA conducts two collaboration strategies for devices with different computational resources and latency demands. 1) Knowledge reprogramming learning strategy jointly learns new domain-specific model parameters and a reweighting term to reprogram existing shared domain knowledge vectors, termed adaptation on _principal agents_. 2) Similarity-based knowledge aggregation strategy solely aggregates the knowledge stored in shared domain vectors according to domain similarities in an optimization-free manner, termed adaptation on _follower agents_. Experiments verify that CoLA is simple but effective, which boosts the efficiency of TTA and demonstrates remarkable superiority in collaborative, lifelong, and single-domain TTA scenarios, e.g., on follower agents, we enhance accuracy by over 30\\% on ImageNet-C while maintaining nearly the same efficiency as standard inference. The source code is available at https://github.com/Cascol-Chen/COLA.", "keywords": "Test-Time Adaptation;Out-of-distribution Generalization;Collaborative Adaptation", "primary_area": "other", "supplementary_material": "", "author": "Guohao Chen;Shuaicheng Niu;Deyu Chen;Shuhai Zhang;Changsheng Li;Yuanqing Li;Mingkui Tan", "authorids": "~Guohao_Chen1;~Shuaicheng_Niu1;~Deyu_Chen1;~Shuhai_Zhang1;~Changsheng_Li4;~Yuanqing_Li2;~Mingkui_Tan2", "gender": "M;M;;M;M;M;M", "homepage": "https://github.com/Cascol-Chen/;https://niushuaicheng.cn/;https://github.com/Davy-Chendy;https://github.com/ZSHsh98;;http://www2.scut.edu.cn/autonlaben/2015/0825/c5794a92900/page.htm;https://tanmingkui.github.io/", "dblp": ";254/1388;;67/5655;;51/2525.html;49/2007", "google_scholar": "HZbzdNEAAAAJ;https://scholar.google.com/citations?hl=en;;oNhLYoEAAAAJ;FfJnUioAAAAJ;https://scholar.google.com.sg/citations?user=wN3v1coAAAAJ;https://scholar.google.com.sg/citations?user=EVsoTGkAAAAJ", "orcid": "0009-0007-9736-4642;0000-0001-8212-1831;;0000-0001-6877-3825;0000-0001-9789-7632;;0000-0001-8856-756X", "linkedin": ";;;;;;", "or_profile": "~Guohao_Chen1;~Shuaicheng_Niu1;~Deyu_Chen1;~Shuhai_Zhang1;~Changsheng_Li4;~Yuanqing_Li2;~Mingkui_Tan1", "aff": "South China University of Technology;Nanyang Technological University;South China University of Technology;South China University of Technology;Beijing Institute of Technology;South China University of Technology;South China University of Technology", "aff_domain": "scut.edu.cn;ntu.edu.sg;scut.edu.cn;scut.edu.cn;bit.edu.cn;scut.edu.cn;scut.edu.cn", "position": "MS student;Postdoc;Undergrad student;PhD student;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nchen2024crossdevice,\ntitle={Cross-Device Collaborative Test-Time Adaptation},\nauthor={Guohao Chen and Shuaicheng Niu and Deyu Chen and Shuhai Zhang and Changsheng Li and Yuanqing Li and Mingkui Tan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=YyMiO0DWmI}\n}", "github": "", "reviewers": "L5Gu;SsoW;sr9b;1r2f", "pdf_size": 1980716, "rating": "5;5;5;7", "confidence": "4;4;5;5", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "2;3;3;4", "wc_summary": "58;80;76;93", "wc_strengths": "25;35;36;39", "wc_weaknesses": "258;360;139;128", "wc_questions": "74;63;58;44", "wc_limitations": "30;12;6;1", "wc_review": "445;550;315;305", "wc_reply_reviewers": "35;378;24;15", "wc_reply_authors": "41;832;69;96", "reply_reviewers": "1;2;1;1", "reply_authors": "2;5;2;3", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 76.75, 12.517487767120047 ], "wc_strengths_avg": [ 33.75, 5.261891294962297 ], "wc_weaknesses_avg": [ 221.25, 94.95097419194813 ], "wc_questions_avg": [ 59.75, 10.779030568655049 ], "wc_limitations_avg": [ 12.25, 10.96300597464035 ], "wc_review_avg": [ 403.75, 100.89443740861039 ], "wc_reply_reviewers_avg": [ 113.0, 153.16167928042574 ], "wc_reply_authors_avg": [ 259.5, 331.1045907262538 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.0, 1.224744871391589 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13480398612672652143&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "scut.edu.cn;ntu.edu.sg;scut.edu.cn;scut.edu.cn;bit.edu.cn;scut.edu.cn;scut.edu.cn", "author_num": 7, "aff_unique_index": "0;1;0;0;2;0;0", "aff_unique_norm": "South China University of Technology;Nanyang Technological University;Beijing Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.scut.edu.cn;https://www.ntu.edu.sg;http://www.bit.edu.cn/", "aff_unique_abbr": "SCUT;NTU;BIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0;0;0", "aff_country_unique": "China;Singapore" }, { "title": "Aggregate-and-Adapt Natural Language Prompts for Downstream Generalization of CLIP", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94659", "id": "Yz3wBKoK0K", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Yz3wBKoK0K", "openreview": "https://openreview.net/forum?id=Yz3wBKoK0K", "poster": "/media/PosterPDFs/NeurIPS%202024/94659.png?t=1731655582.5262558", "project": "", "author_site": "Chen Huang, Skyler Seto, Samira Abnar, David Grangier, Navdeep Jaitly, Joshua Susskind", "tldr": "", "abstract": "Large pretrained vision-language models like CLIP have shown promising generalization capability, but may struggle in specialized domains (e.g., satellite imagery) or fine-grained classification (e.g., car models) where the visual concepts are unseen or under-represented during pretraining. Prompt learning offers a parameter-efficient finetuning framework that can adapt CLIP to downstream tasks even when limited annotation data are available. In this paper, we improve prompt learning by distilling the textual knowledge from natural language prompts (either human- or LLM-generated) to provide rich priors for those under-represented concepts. We first obtain a prompt ``summary'' aligned to each input image via a learned prompt aggregator. Then we jointly train a prompt generator, optimized to produce a prompt embedding that stays close to the aggregated summary while minimizing task loss at the same time. We dub such prompt embedding as Aggregate-and-Adapted Prompt Embedding (AAPE). AAPE is shown to be able to generalize to different downstream data distributions and tasks, including vision-language understanding tasks (e.g., few-shot classification, VQA) and generation tasks (image captioning) where AAPE achieves competitive performance. We also show AAPE is particularly helpful to handle non-canonical and OOD examples. Furthermore, AAPE learning eliminates LLM-based inference cost as required by baselines, and scales better with data and LLM model size.", "keywords": "CLIP;Downstream Generalization;Prompt Learning;Natural Language Prompts", "primary_area": "machine_vision", "supplementary_material": "", "author": "Chen Huang;Skyler Seto;Samira Abnar;David Grangier;Navdeep Jaitly;Joshua M. Susskind", "authorids": "~Chen_Huang6;~Skyler_Seto1;~Samira_Abnar1;~David_Grangier1;~Navdeep_Jaitly1;~Joshua_M._Susskind1", "gender": "M;;Unspecified;M;M;M", "homepage": ";;https://samiraabnar.github.io/;http://david.grangier.info/;http://www.cs.toronto.edu/~ndjaitly/;http://www.apple.com", "dblp": "05/8125-1;173/5386;150/5405;57/1192;04/6137;132/7797", "google_scholar": "QZ-JKOUAAAAJ;428y_sgAAAAJ;https://scholar.google.nl/citations?user=jbxwjgMAAAAJ;CIQEGCYAAAAJ;kjMNMLkAAAAJ;Sv2TGqsAAAAJ", "orcid": ";;;0000-0002-8847-9532;;", "linkedin": ";;;davidgrangier/;;joshua-susskind-8ab2ab5/", "or_profile": "~Chen_Huang6;~Skyler_Seto1;~Samira_Abnar1;~David_Grangier1;~Navdeep_Jaitly1;~Joshua_M._Susskind1", "aff": "Apple;Apple;Apple;Apple;Apple;Apple", "aff_domain": "apple.com;apple.com;apple.com;apple.com;apple.com;apple.com", "position": "Research Scientist;Researcher;Researcher;Researcher;Principal Researcher;Researcher", "bibtex": "@inproceedings{\nhuang2024aggregateandadapt,\ntitle={Aggregate-and-Adapt Natural Language Prompts for Downstream Generalization of {CLIP}},\nauthor={Chen Huang and Skyler Seto and Samira Abnar and David Grangier and Navdeep Jaitly and Joshua M. Susskind},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Yz3wBKoK0K}\n}", "github": "", "reviewers": "ZqyB;yCM3;khEq;7B5Q;UfdC", "pdf_size": 7666183, "rating": "5;5;5;5;6", "confidence": "4;4;4;3;4", "soundness": "3;2;3;3;3", "novelty": "3;3;2;3;3", "presentation": "3;3;3;3;3", "wc_summary": "38;99;85;81;90", "wc_strengths": "32;124;33;42;32", "wc_weaknesses": "70;233;95;164;59", "wc_questions": "70;240;5;32;69", "wc_limitations": "37;6;12;1;1", "wc_review": "247;702;230;320;251", "wc_reply_reviewers": "10;397;78;14;29", "wc_reply_authors": "21;1153;367;21;20", "reply_reviewers": "1;3;1;1;1", "reply_authors": "2;4;2;2;2", "rating_avg": [ 5.2, 0.39999999999999997 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 78.6, 21.17167919651155 ], "wc_strengths_avg": [ 52.6, 35.897632233895315 ], "wc_weaknesses_avg": [ 124.2, 65.51763121481117 ], "wc_questions_avg": [ 83.2, 82.09604131747159 ], "wc_limitations_avg": [ 11.4, 13.425349157470729 ], "wc_review_avg": [ 350.0, 178.66952734028263 ], "wc_reply_reviewers_avg": [ 105.6, 147.6977995773803 ], "wc_reply_authors_avg": [ 316.4, 439.2801384082827 ], "reply_reviewers_avg": [ 1.4, 0.8000000000000002 ], "reply_authors_avg": [ 2.4, 0.8000000000000002 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.25000000000000006, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:wakGIwEWB5YJ:scholar.google.com/&scioq=Aggregate-and-Adapt+Natural+Language+Prompts+for+Downstream+Generalization+of+CLIP&hl=en&as_sdt=0,19", "gs_version_total": 6, "email": "apple.com;apple.com;apple.com;apple.com;apple.com;apple.com", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Apple", "aff_unique_dep": "Apple Inc.", "aff_unique_url": "https://www.apple.com", "aff_unique_abbr": "Apple", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Touchstone Benchmark: Are We on the Right Way for Evaluating AI Algorithms for Medical Segmentation?", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97634", "id": "YzM10FEJ2D", "proceeding": "", "pdf": "https://openreview.net/pdf?id=YzM10FEJ2D", "openreview": "https://openreview.net/forum?id=YzM10FEJ2D", "poster": "", "project": "", "author_site": "Pedro R. A. S. Bassi, Wenxuan Li, Yucheng Tang, Fabian Isensee, Zifu Wang, Jieneng Chen, Yu-Cheng Chou, Tassilo Wald, Constantin Ulrich, Michael Baumgartner, Saikat Roy, Klaus Maier-Hein, Paul Jaeger, Yiwen Ye, Yutong Xie, Jianpeng Zhang, Ziyang Chen, Yong Xia, Yannick Kirchhoff, Maximilian R. Rokuss, Pengcheng Shi, Ting Ma, Yuxin Du, Fan BAI, Tiejun Huang, Bo Zhao, Zhaohu Xing, Lei Zhu, Saumya Gupta, Haonan Wang, Xiaomeng Li, Ziyan Huang, Jin Ye, Junjun He, Yousef Sadegheih, Afshin Bozorgpour, Pratibha Kumari, Reza Azad, Dorit Merhof, Hanxue Gu, Haoyu Dong, Jichen Yang, Maciej Mazurowski, Linshan Wu, Jia-Xin Zhuang, Hao CHEN, Holger Roth, Daguang Xu, Matthew Blaschko, Sergio Decherchi, Andrea Cavalli, Alan Yuille, Zongwei Zhou", "tldr": "", "abstract": "How can we test AI performance? This question seems trivial, but it isn't. Standard benchmarks often have problems such as in-distribution and small-size test sets, oversimplified metrics, unfair comparisons, and short-term outcome pressure. As a consequence, good performance on standard benchmarks does not guarantee success in real-world scenarios. To address these problems, we present Touchstone, a large-scale collaborative segmentation benchmark of 9 types of abdominal organs. This benchmark is based on 5,195 training CT scans from 76 hospitals around the world and 5,903 testing CT scans from 11 additional hospitals. This diverse test set enhances the statistical significance of benchmark results and rigorously evaluates AI algorithms across various out-of-distribution scenarios. We invited 14 inventors of 19 AI algorithms to train their algorithms, while our team, as a third party, independently evaluated these algorithms on three test sets. In addition, we also evaluated pre-existing AI frameworks---which, differing from algorithms, are more flexible and can support different algorithms\u2014including MONAI from NVIDIA, nnU-Net from DKFZ, and numerous other open-source frameworks. We are committed to expanding this benchmark to encourage more innovation of AI algorithms for the medical domain.", "keywords": "Organ Segmentation;Out-of-Distribution;Algorithmic Benchmark", "primary_area": "", "supplementary_material": "/attachment/77e24fa30107a4c6dff82c878ec3ef7d901f9adb.pdf", "author": "Pedro R. A. S. Bassi;Wenxuan Li;Yucheng Tang;Fabian Isensee;Zifu Wang;Jieneng Chen;Yu-Cheng Chou;Yannick Kirchhoff;Maximilian Rouven Rokuss;Ziyan Huang;Jin Ye;Junjun He;Tassilo Wald;Constantin Ulrich;Michael Baumgartner;Saikat Roy;Klaus Maier-Hein;Paul F Jaeger;Yiwen Ye;Yutong Xie;Jianpeng Zhang;Ziyang Chen;Yong Xia;Zhaohu Xing;Lei Zhu;Yousef Sadegheih;Afshin Bozorgpour;Pratibha Kumari;Reza Azad;Dorit Merhof;Pengcheng Shi;Ting Ma;Yuxin Du;Fan BAI;Tiejun Huang;Bo Zhao;Haonan Wang;Xiaomeng Li;Hanxue Gu;Haoyu Dong;Jichen Yang;Maciej A Mazurowski;Saumya Gupta;Linshan Wu;Jia-Xin Zhuang;Hao Chen;Holger R Roth;Daguang Xu;Matthew B. Blaschko;Sergio Decherchi;Andrea Cavalli;Alan Yuille;Zongwei Zhou", "authorids": "~Pedro_R._A._S._Bassi1;~Wenxuan_Li3;~Yucheng_Tang1;~Fabian_Isensee1;~Zifu_Wang1;~Jieneng_Chen1;~Yu-Cheng_Chou1;~Yannick_Kirchhoff1;~Maximilian_Rouven_Rokuss1;~Ziyan_Huang1;~Jin_Ye2;~Junjun_He2;~Tassilo_Wald1;~Constantin_Ulrich1;~Michael_Baumgartner2;~Saikat_Roy2;~Klaus_Maier-Hein1;~Paul_F_Jaeger1;~Yiwen_Ye1;~Yutong_Xie4;~Jianpeng_Zhang2;~Ziyang_Chen4;~Yong_Xia1;~Zhaohu_Xing1;~Lei_Zhu1;~Yousef_Sadegheih1;~Afshin_Bozorgpour1;~Pratibha_Kumari1;~Reza_Azad2;~Dorit_Merhof1;~Pengcheng_Shi3;~Ting_Ma3;~Yuxin_Du2;~Fan_BAI4;~Tiejun_Huang1;~Bo_Zhao4;~Haonan_Wang5;~Xiaomeng_Li1;~Hanxue_Gu1;~Haoyu_Dong3;~Jichen_Yang2;~Maciej_A_Mazurowski1;~Saumya_Gupta1;~Linshan_Wu2;~Jia-Xin_Zhuang1;~Hao_Chen1;~Holger_R_Roth1;~Daguang_Xu2;~Matthew_B._Blaschko1;~Sergio_Decherchi1;~Andrea_Cavalli1;~Alan_Yuille1;~Zongwei_Zhou1", "gender": "M;F;M;M;M;;M;M;M;M;M;M;M;M;M;M;M;;M;Not Specified;;M;M;;M;M;M;F;M;F;M;F;M;M;M;M;M;F;F;;M;M;F;M;;M;M;M;;M;;M;M", "homepage": "https://pedro1043.wixsite.com/onlinecv;https://github.com/WenxuanChelsea;https://tangy5.github.io/;;https://zifuwang.com;;;https://www.dkfz.de/en/mic/team/people/Yannick_Kirchhoff.html;https://github.com/mrokuss;https://github.com/Ziyan-Huang;https://yejin0111.github.io/;https://junjun2016.github.io/;https://TaWald.github.io;;;https://linktr.ee/saikat.roy;http://www.dkfz.de/en/mic;https://pfjaeger.github.io;;https://ytongxie.github.io/;;https://chen-ziyang.github.io/;https://teacher.nwpu.edu.cn/yongxia.html;;https://sites.google.com/site/indexlzhu/home?authuser=0;;;;https://github.com/rezazad68;https://www.uni-regensburg.de/informatik-data-science/bildverarbeitung/startseite/index.html;;https://nit-hit.github.io/;;;https://idm.pku.edu.cn/~tjhuang/;;https://mcgregorwwww.github.io/;https://xmengli.github.io/;https://guhanxue.github.io;https://haoyudong-97.github.io/;;https://sites.duke.edu/mazurowski/;https://saumya-gupta-26.github.io/;;;https://cse.hkust.edu.hk/~jhc/;http://www.holgerroth.com;https://daguangxu.net/;;;;;https://www.zongweiz.com/", "dblp": "264/5743;;201/0160;;;;;348/6129;344/7969;;;128/7027;289/0140;327/3310;66/4721-1;59/8399.html;133/0183;179/4749;292/4625;;;;50/2433-1.html;;99/549-3;354/9334;292/8411;235/7602;;63/3515;;;250/3895;;h/TiejunHuang;;;02/9850-1;;;;;141/5238;;;86/475-11;42/8528;;;84/2830;;y/AlanLYuille;", "google_scholar": "https://scholar.google.com.br/citations?user=NftgL6gAAAAJ;tpNZM2YAAAAJ;0xheliUAAAAJ;PjerEe4AAAAJ;https://scholar.google.com/citations?hl=en;;https://scholar.google.com.tw/citations?user=YVNRBTcAAAAJ;nfvjwmkAAAAJ;https://scholar.google.de/citations?hl=de;BshL3fUAAAAJ;UFBrJOAAAAAJ;Z4LgebkAAAAJ;https://scholar.google.com/citations?view_op=list_works;https://scholar.google.com/citations?hl=de;https://scholar.google.com/citations?hl=de;https://scholar.google.de/citations?user=dSs0DfoAAAAJ;oCrBpVMAAAAJ;https://scholar.google.de/citations?user=9B9-8h0AAAAJ;Fg6IuEgAAAAJ;ddDL9HMAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.hk/citations?user=Usw1jeMAAAAJ;;https://scholar.google.com.hk/citations?user=AQtqhaYAAAAJ;;OUZkcNsAAAAJ;gm0ORicAAAAJ;https://scholar.google.com/citations?hl=en;0c0rMr0AAAAJ;https://scholar.google.com/citations?hl=en;xxWdw0IAAAAJ;https://scholar.google.nl/citations?user=iciTynsAAAAJ;https://scholar.google.com.hk/citations?user=jWKTSIEAAAAJ;https://scholar.google.com.tw/citations?user=knvEK4AAAAAJ;R3_AR5EAAAAJ;KDNRnW0AAAAJ;uVTzPpoAAAAJ;;https://scholar.google.com/citations?hl=en;;HlxjJPQAAAAJ;https://scholar.google.com/citations?hl=en;RGPzB4sAAAAJ;;https://scholar.google.com.hk/citations?user=Z_t5DjwAAAAJ;https://scholar.google.co.jp/citations?user=pzNwAsEAAAAJ;r_VHYHAAAAAJ;;https://scholar.google.it/citations?user=T09qQ1IAAAAJ;;;JVOeczAAAAAJ", "orcid": "0000-0002-8995-9423;;;0000-0002-3519-5886;;;;0000-0001-8124-8435;;0000-0002-1533-5239;0000-0003-0667-9889;;0009-0007-5222-2683;;;0000-0002-0809-6524;0000-0002-6626-2463;;;;;0000-0002-8564-9735;0000-0001-9273-2847;;;0009-0003-1766-5519;0000-0003-1857-1058;0000-0003-3681-3700;;;;;0009-0001-9803-7973;;0000-0002-4234-6099;;;;;;;;0000-0003-0933-3445;;;0000-0002-8400-3780;0000-0002-3662-8743;;;;;;0000-0002-3154-9851", "linkedin": ";;;;;;;;;;;;tassilo-wald-342902217/;;michael-baumgartner-/;mrsaikatroy/;;;;;;;;;;yousef-sadegheih/;;pratibha-kumari-06a866b1/;;;;;;;;;;;;haoyu-dong-63a801b9/;jichen-yang-97027b197/;;saumya-gupta-0b48b416a/;;;;;;;;;;", "or_profile": "~Pedro_R._A._S._Bassi1;~Wenxuan_Li3;~Yucheng_Tang1;~Fabian_Isensee1;~Zifu_Wang1;~Jieneng_Chen1;~Yu-Cheng_Chou1;~Yannick_Kirchhoff1;~Maximilian_Rouven_Rokuss1;~Ziyan_Huang1;~Jin_Ye2;~Junjun_He2;~Tassilo_Wald1;~Constantin_Ulrich1;~Michael_Baumgartner2;~Saikat_Roy2;~Klaus_Maier-Hein1;~Paul_F_Jaeger1;~Yiwen_Ye1;~Yutong_Xie4;~Jianpeng_Zhang2;~Ziyang_Chen4;~Yong_Xia1;~Zhaohu_Xing1;~Lei_Zhu1;~Yousef_Sadegheih1;~Afshin_Bozorgpour1;~Pratibha_Kumari1;~Reza_Azad2;~Dorit_Merhof1;~Pengcheng_Shi3;~Ting_Ma3;~Yuxin_Du2;~Fan_BAI4;~Tiejun_Huang1;~Bo_Zhao4;~Haonan_Wang5;~Xiaomeng_Li1;~Hanxue_Gu1;~Haoyu_Dong3;~Jichen_Yang2;~Maciej_A_Mazurowski1;~Saumya_Gupta1;~Linshan_Wu2;~Jia-Xin_Zhuang1;~Hao_Chen1;~Holger_R_Roth1;~Daguang_Xu2;~Matthew_B._Blaschko1;~Sergio_Decherchi1;~Andrea_Cavalli1;~Alan_Yuille1;~Zongwei_Zhou1", "aff": "Istituto Italiano di Tecnologia;Johns Hopkins University;NVIDIA;German Cancer Research Center;KU Leuven;;Johns Hopkins University;Deutsches Krebsforschungszentrum;Deutsches Krebsforschungszentrum;Shanghai Jiaotong University;Monash University;Shanghai AI Laboratory;German Cancer Research Center;Deutsches Krebsforschungszentrum;Deutsches Krebsforschungszentrum;Ruprecht-Karls-Universit\u00e4t Heidelberg;German Cancer Research Center;German Cancer Research Center;Northwestern Polytechnical University;University of Adelaide;;Northwestern Polytechnical University;Northwestern Polytechnical University;;Hong Kong University of Science and Technology (Guangzhou) & HKUST;Universit\u00e4t Regensburg;Universit\u00e4t Regensburg;Universit\u00e4t Regensburg;RWTH Aachen, Rheinisch Westf\u00e4lische Technische Hochschule Aachen;University of Regensburg;Harbin Institute of Technology (Shenzhen);Harbin Institute of Technology (Shenzhen);Beijing Academy of Artificial Intelligence;The Chinese University of Hong Kong;Peking University;BAAI;Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;Duke University;Duke University;Duke University;Duke University;Adobe Incorporated;Hong Kong University of Science and Technology;;Hong Kong University of Science and Technology;NVIDIA;NVIDIA;;Istituto Italiano di Tecnologia;;Johns Hopkins University;Johns Hopkins University", "aff_domain": "iit.it;jh.edu;nvidia.com;dkfz.de;kuleuven.be;;jh.edu;dkfz.de;dkfz-heidelberg.de;sjtu.edu.cn;monash.edu;pjlab.org.cn;dkfz.de;dkfz.de;dkfz.de;uni-heidelberg.de;dkfz.de;dkfz.de;nwpu.edu.cn;adelaide.edu.au;;nwpu.edu.cn;nwpu.edu.cn;;ust.hk;uni-regensburg.de;ur.de;ur.de;informatik.rwth-aachen.de;ur.de;hit.edu.cn;hit.edu.cn;baai.ac.cn;cuhk.edu.hk;pku.edu.cn;baai.ac.cn;ust.hk;ust.hk;duke.edu;duke.edu;duke.edu;duke.edu;adobe.com;connect.ust.hk;;ust.hk;nvidia.com;nvidia.com;;iit.it;;johnshopkins.edu;jhu.edu", "position": "PhD student;PhD student;Researcher;Principal Researcher;PhD student;;PhD student;PhD student;PhD student;PhD student;PhD student;Researcher;PhD student;PhD student;PhD student;PhD student;Full Professor;Research Group Leader;PhD student;Postdoc;;PhD student;Professor;;Assistant Professor;PhD student;PhD student;Postdoc;PhD student;Full Professor;PhD student;Full Professor;Intern;PhD student;Full Professor;Principal Researcher;PhD student;Assistant Professor;PhD student;PhD student;PhD student;Associate Professor;Intern;PhD student;;Assistant Professor;Principal Researcher;Research Manager;;Researcher;;Full Professor;Postdoc", "bibtex": "@inproceedings{\nbassi2024touchstone,\ntitle={Touchstone Benchmark: Are We on the Right Way for Evaluating {AI} Algorithms for Medical Segmentation?},\nauthor={Pedro R. A. S. Bassi and Wenxuan Li and Yucheng Tang and Fabian Isensee and Zifu Wang and Jieneng Chen and Yu-Cheng Chou and Yannick Kirchhoff and Maximilian Rouven Rokuss and Ziyan Huang and Jin Ye and Junjun He and Tassilo Wald and Constantin Ulrich and Michael Baumgartner and Saikat Roy and Klaus Maier-Hein and Paul F Jaeger and Yiwen Ye and Yutong Xie and Jianpeng Zhang and Ziyang Chen and Yong Xia and Zhaohu Xing and Lei Zhu and Yousef Sadegheih and Afshin Bozorgpour and Pratibha Kumari and Reza Azad and Dorit Merhof and Pengcheng Shi and Ting Ma and Yuxin Du and Fan BAI and Tiejun Huang and Bo Zhao and Haonan Wang and Xiaomeng Li and Hanxue Gu and Haoyu Dong and Jichen Yang and Maciej A Mazurowski and Saumya Gupta and Linshan Wu and Jia-Xin Zhuang and Hao Chen and Holger R Roth and Daguang Xu and Matthew B. Blaschko and Sergio Decherchi and Andrea Cavalli and Alan Yuille and Zongwei Zhou},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=YzM10FEJ2D}\n}", "github": "", "reviewers": "68Sv;5KZ1;GiJy;mP5R", "pdf_size": 625656, "rating": "5;5;7;7", "confidence": "4;5;4;3", "wc_summary_and_contributions": "137;118;105;139", "wc_strengths": "91;67;58;2", "wc_improvement": "201;104;55;2", "wc_limitations": "9;34;78;43", "wc_correctness": "16;65;1;27", "wc_clarity": "6;75;20;5", "wc_relation_to_prior_work": "21;83;1;13", "wc_documentation": "2;139;1;9", "wc_additional_feedback": "1;1;1;1", "wc_review": "484;686;320;241", "wc_reply_reviewers": "0;129;0;0", "wc_reply_authors": "74;735;0;0", "reply_reviewers": "0;2;0;0", "reply_authors": "5;7;1;1", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 124.75, 14.042346669983617 ], "wc_strengths_avg": [ 54.5, 32.62284475639732 ], "wc_improvement_avg": [ 90.5, 73.28881224307023 ], "wc_limitations_avg": [ 41.0, 24.728526037756478 ], "wc_correctness_avg": [ 27.25, 23.668280461410795 ], "wc_clarity_avg": [ 26.5, 28.622543562723422 ], "wc_relation_to_prior_work_avg": [ 29.5, 31.697791721190924 ], "wc_documentation_avg": [ 37.75, 58.53791506365767 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 432.75, 170.47195517151786 ], "wc_reply_reviewers_avg": [ 32.25, 55.858638544096294 ], "wc_reply_authors_avg": [ 202.25, 309.0634036892754 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 3.5, 2.598076211353316 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 53, 0 ], "corr_rating_confidence": -0.7071067811865475, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5483390151791686186&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "iit.it;jh.edu;nvidia.com;dkfz.de;kuleuven.be;;jh.edu;dkfz.de;dkfz-heidelberg.de;sjtu.edu.cn;monash.edu;pjlab.org.cn;dkfz.de;dkfz.de;dkfz.de;uni-heidelberg.de;dkfz.de;dkfz.de;nwpu.edu.cn;adelaide.edu.au;;nwpu.edu.cn;nwpu.edu.cn;;ust.hk;uni-regensburg.de;ur.de;ur.de;informatik.rwth-aachen.de;ur.de;hit.edu.cn;hit.edu.cn;baai.ac.cn;cuhk.edu.hk;pku.edu.cn;baai.ac.cn;ust.hk;ust.hk;duke.edu;duke.edu;duke.edu;duke.edu;adobe.com;connect.ust.hk;;ust.hk;nvidia.com;nvidia.com;;iit.it;;johnshopkins.edu;jhu.edu", "author_num": 53, "aff_unique_index": "0;1;2;3;4;1;5;5;6;7;8;3;5;5;9;3;3;10;11;10;10;12;13;13;13;14;13;15;15;16;17;18;16;12;12;19;19;19;19;20;12;12;2;2;0;1;1", "aff_unique_norm": "Istituto Italiano di Tecnologia;Johns Hopkins University;NVIDIA;German Cancer Research Center;Katholieke Universiteit Leuven;Deutsches Krebsforschungszentrum;Shanghai Jiao Tong University;Monash University;Shanghai AI Laboratory;Ruprecht-Karls-Universit\u00e4t Heidelberg;Northwestern Polytechnical University;University of Adelaide;Hong Kong University of Science and Technology;University of Regensburg;RWTH Aachen University;Harbin Institute of Technology;Beijing Academy of Artificial Intelligence;Chinese University of Hong Kong;Peking University;Duke University;Adobe", "aff_unique_dep": ";;NVIDIA Corporation;;;;;;;;;;;;;;;;;;Adobe Incorporated", "aff_unique_url": "https://www.iit.it;https://www.jhu.edu;https://www.nvidia.com;https://www.dkfz.de;https://www.kuleuven.be;https://www.dkfz.de;https://www.sjtu.edu.cn;https://www.monash.edu;https://www.shanghai-ai-lab.com;https://www.uni-heidelberg.de/;https://www.nwpu.edu.cn;https://www.adelaide.edu.au;https://www.ust.hk;https://www.uni-regensburg.de;https://www.rwth-aachen.de;http://en.hhit.edu.cn/;https://www.baaic.cn;https://www.cuhk.edu.hk;http://www.pku.edu.cn;https://www.duke.edu;https://www.adobe.com", "aff_unique_abbr": "IIT;JHU;NVIDIA;DKFZ;KU Leuven;DKFZ;SJTU;Monash;SAIL;Uni Heidelberg;NWPU;Adelaide;HKUST;UR;RWTH;HIT;BAAI;CUHK;Peking U;Duke;Adobe", "aff_campus_unique_index": "1;2;3;3;4;4;4;4;4", "aff_campus_unique": ";Guangzhou;Aachen;Shenzhen;Hong Kong SAR", "aff_country_unique_index": "0;1;1;2;3;1;2;2;4;5;4;2;2;2;2;2;2;4;5;4;4;4;2;2;2;2;2;4;4;4;4;4;4;4;4;1;1;1;1;1;4;4;1;1;0;1;1", "aff_country_unique": "Italy;United States;Germany;Belgium;China;Australia" }, { "title": "Conformal Alignment: Knowing When to Trust Foundation Models with Guarantees", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94658", "id": "YzyCEJlV9Z", "proceeding": "", "pdf": "https://openreview.net/pdf?id=YzyCEJlV9Z", "openreview": "https://openreview.net/forum?id=YzyCEJlV9Z", "poster": "/media/PosterPDFs/NeurIPS%202024/94658.png?t=1731018766.769445", "project": "", "author_site": "Yu Gui, Ying Jin, Zhimei Ren", "tldr": "", "abstract": "Before deploying outputs from foundation models in high-stakes tasks, it is imperative to ensure that they align with human values.\nFor instance, in radiology report generation, reports generated by a vision-language model must align with human evaluations before their use in medical decision-making. This paper presents Conformal Alignment, a general framework for identifying units whose outputs meet a user-specified alignment criterion. It is guaranteed that on average, a prescribed fraction of selected units indeed meet the alignment criterion, regardless of the foundation model or the data distribution. Given any pre-trained model and new units with model-generated outputs, Conformal Alignment leverages a set of reference data with ground-truth alignment status to train an alignment predictor. It then selects new units whose predicted alignment scores surpass a data-dependent threshold, certifying their corresponding outputs as trustworthy. Through applications to question answering and radiology report generation, we demonstrate that our method is able to accurately identify units with trustworthy outputs via lightweight training over a moderate amount of reference data. En route, we investigate the informativeness of various features in alignment prediction and combine them with standard models to construct the alignment predictor.", "keywords": "conformal prediction;selective inference;foundation model alignment", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/cfde2568de9f81d78b24fa854c02babcbc471a18.zip", "author": "Yu Gui;Ying Jin;Zhimei Ren", "authorids": "~Yu_Gui1;~Ying_Jin4;~Zhimei_Ren1", "gender": "M;F;F", "homepage": "https://yugjerry.github.io/;https://ying531.github.io/;https://zhimeir.github.io/", "dblp": "276/7601;https://dblp.org/rec/conf/icml/JinWL20;", "google_scholar": "22zXYlkAAAAJ;lT5KFUkAAAAJ;X3gGi_0AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Yu_Gui1;~Ying_Jin4;~Zhimei_Ren1", "aff": "University of Chicago;Stanford University;The Wharton School, University of Pennsylvania", "aff_domain": "uchicago.edu;stanford.edu;wharton.upenn.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\ngui2024conformal,\ntitle={Conformal Alignment: Knowing When to Trust Foundation Models with Guarantees},\nauthor={Yu Gui and Ying Jin and Zhimei Ren},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=YzyCEJlV9Z}\n}", "github": "", "reviewers": "Ctqb;w89M;SzCp;FNNT;nRXb", "pdf_size": 7487505, "rating": "4;5;6;6;6", "confidence": "4;4;3;4;4", "soundness": "2;4;4;4;3", "novelty": "2;4;3;3;2", "presentation": "2;4;3;3;3", "wc_summary": "77;120;55;48;75", "wc_strengths": "54;75;48;32;68", "wc_weaknesses": "280;100;69;332;200", "wc_questions": "171;37;158;35;2", "wc_limitations": "8;6;83;2;1", "wc_review": "590;338;413;449;346", "wc_reply_reviewers": "235;160;18;26;241", "wc_reply_authors": "1424;172;795;455;1002", "reply_reviewers": "3;1;1;1;2", "reply_authors": "4;2;2;2;3", "rating_avg": [ 5.4, 0.7999999999999999 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.4, 0.8 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 75.0, 25.13165334791963 ], "wc_strengths_avg": [ 55.4, 15.14727698300919 ], "wc_weaknesses_avg": [ 196.2, 100.90867157980033 ], "wc_questions_avg": [ 80.6, 69.7441036934306 ], "wc_limitations_avg": [ 20.0, 31.60379724020517 ], "wc_review_avg": [ 427.2, 91.35732045107278 ], "wc_reply_reviewers_avg": [ 136.0, 97.3919914572035 ], "wc_reply_authors_avg": [ 769.6, 433.3574044596446 ], "reply_reviewers_avg": [ 1.6, 0.8 ], "reply_authors_avg": [ 2.6, 0.8 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.37500000000000006, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18391836829484345417&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 5, "email": "uchicago.edu;stanford.edu;wharton.upenn.edu", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Chicago;Stanford University;University of Pennsylvania", "aff_unique_dep": ";;The Wharton School", "aff_unique_url": "https://www.uchicago.edu;https://www.stanford.edu;https://www.wharton.upenn.edu", "aff_unique_abbr": "UChicago;Stanford;UPenn Wharton", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "pcaGAN: Improving Posterior-Sampling cGANs via Principal Component Regularization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94657", "id": "Z0Nq3hHeEG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Z0Nq3hHeEG", "openreview": "https://openreview.net/forum?id=Z0Nq3hHeEG", "poster": "", "project": "", "author_site": "Matthew Bendel, Rizwan Ahmad, Philip Schniter", "tldr": "", "abstract": "In ill-posed imaging inverse problems, there can exist many hypotheses that fit both the observed measurements and prior knowledge of the true image. Rather than returning just one hypothesis of that image, posterior samplers aim to explore the full solution space by generating many probable hypotheses, which can later be used to quantify uncertainty or construct recoveries that appropriately navigate the perception/distortion trade-off. In this work, we propose a fast and accurate posterior-sampling conditional generative adversarial network (cGAN) that, through a novel form of regularization, aims for correctness in the posterior mean as well as the trace and K principal components of the posterior covariance matrix. Numerical experiments demonstrate that our method outperforms competitors in a wide range of ill-posed imaging inverse problems.", "keywords": "Image recovery;inverse problems;MRI;posterior sampling;GAN", "primary_area": "machine_vision", "supplementary_material": "", "author": "Matthew C Bendel;Rizwan Ahmad;Philip Schniter", "authorids": "~Matthew_C_Bendel1;~Rizwan_Ahmad1;~Philip_Schniter2", "gender": "M;M;M", "homepage": ";https://u.osu.edu/ahmad/;https://phil-schniter.web.app", "dblp": "331/8228;24/3366;s/PhilipSchniter.html", "google_scholar": "U575k_8AAAAJ;9mbdAO4AAAAJ;4Pp9KfYAAAAJ", "orcid": ";0000-0002-5917-3788;0000-0003-0939-7545", "linkedin": "matthew-bendel-399232131/;;phil-schniter-1050645/", "or_profile": "~Matthew_C_Bendel1;~Rizwan_Ahmad1;~Philip_Schniter2", "aff": "Ohio State University, Columbus;Ohio State University, Columbus;Ohio State University, Columbus", "aff_domain": "osu.edu;osu.edu;osu.edu", "position": "PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nbendel2024pcagan,\ntitle={pca{GAN}: Improving Posterior-Sampling c{GAN}s via Principal Component Regularization},\nauthor={Matthew C Bendel and Rizwan Ahmad and Philip Schniter},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Z0Nq3hHeEG}\n}", "github": "", "reviewers": "cM4t;GSQK;s7hc;n2zA", "pdf_size": 24282085, "rating": "4;5;5;6", "confidence": "5;4;5;4", "soundness": "2;2;4;3", "novelty": "3;2;2;3", "presentation": "1;4;4;4", "wc_summary": "106;29;116;139", "wc_strengths": "58;57;61;111", "wc_weaknesses": "959;128;335;185", "wc_questions": "6;71;85;73", "wc_limitations": "47;8;2;35", "wc_review": "1176;293;599;543", "wc_reply_reviewers": "676;89;131;253", "wc_reply_authors": "1269;224;0;276", "reply_reviewers": "3;1;1;2", "reply_authors": "4;2;1;2", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 1.299038105676658 ], "wc_summary_avg": [ 97.5, 41.31888188225814 ], "wc_strengths_avg": [ 71.75, 22.708753818736948 ], "wc_weaknesses_avg": [ 401.75, 330.49309750734585 ], "wc_questions_avg": [ 58.75, 30.922281610515096 ], "wc_limitations_avg": [ 23.0, 18.614510468986285 ], "wc_review_avg": [ 652.75, 323.3205646104188 ], "wc_reply_reviewers_avg": [ 287.25, 232.38801066320096 ], "wc_reply_authors_avg": [ 442.25, 488.4600162756415 ], "reply_reviewers_avg": [ 1.75, 0.82915619758885 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.7071067811865476, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:pUFm5oOYx-UJ:scholar.google.com/&scioq=pcaGAN:+Improving+Posterior-Sampling+cGANs+via+Principal+Component+Regularization&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "osu.edu;osu.edu;osu.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Ohio State University", "aff_unique_dep": "", "aff_unique_url": "https://www.osu.edu", "aff_unique_abbr": "OSU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Columbus", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Neural Flow Diffusion Models: Learnable Forward Process for Improved Diffusion Modelling", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94656", "id": "Z0wIbVTBXc", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Z0wIbVTBXc", "openreview": "https://openreview.net/forum?id=Z0wIbVTBXc", "poster": "", "project": "", "author_site": "Grigory Bartosh, Dmitry Vetrov, Christian Andersson Naesseth", "tldr": "", "abstract": "Conventional diffusion models typically relies on a fixed forward process, which implicitly defines complex marginal distributions over latent variables. This can often complicate the reverse process\u2019 task in learning generative trajectories, and results in costly inference for diffusion models. To address these limitations, we introduce Neural Flow Diffusion Models (NFDM), a novel framework that enhances diffusion models by supporting a broader range of forward processes beyond the standard Gaussian. We also propose a novel parameterization technique for learning the forward process. Our framework provides an end-to-end, simulation-free optimization objective, effectively minimizing a variational upper bound on the negative log-likelihood. Experimental results demonstrate NFDM\u2019s strong performance, evidenced by state-of-the-art likelihood estimation. Furthermore, we investigate NFDM\u2019s capacity for learning generative dynamics with specific characteristics, such as deterministic straight lines trajectories, and demonstrate how the framework may be adopted for learning bridges between two distributions. The results underscores NFDM\u2019s versatility and its potential for a wide range of applications.", "keywords": "diffusion;generative models;variational inference", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Grigory Bartosh;Dmitry Vetrov;Christian A. Naesseth", "authorids": "~Grigory_Bartosh1;~Dmitry_P._Vetrov1;~Christian_A._Naesseth1", "gender": "M;M;M", "homepage": ";https://constructor.university/faculty-member/dmitry-vetrov;https://naesseth.github.io/", "dblp": ";89/3348;146/0902", "google_scholar": ";https://scholar.google.ru/citations?user=7HU0UoUAAAAJ;GQ6rOssAAAAJ", "orcid": ";;", "linkedin": "grigory-bartosh-76004a163/;;", "or_profile": "~Grigory_Bartosh1;~Dmitry_P._Vetrov1;~Christian_A_Naesseth1", "aff": "University of Amsterdam;National Research University Higher School of Economics;University of Amsterdam", "aff_domain": "uva.nl;hse.ru;uva.nl", "position": "PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nbartosh2024neural,\ntitle={Neural Flow Diffusion Models: Learnable Forward Process for Improved Diffusion Modelling},\nauthor={Grigory Bartosh and Dmitry Vetrov and Christian A. Naesseth},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Z0wIbVTBXc}\n}", "github": "", "reviewers": "AXnA;xgai;1ZLG;DMsJ", "pdf_size": 13786233, "rating": "5;6;7;8", "confidence": "3;4;3;4", "soundness": "2;3;3;4", "novelty": "3;2;3;4", "presentation": "3;3;2;4", "wc_summary": "12;73;77;99", "wc_strengths": "29;62;44;179", "wc_weaknesses": "219;73;117;98", "wc_questions": "186;468;2;3", "wc_limitations": "17;7;2;15", "wc_review": "463;683;242;394", "wc_reply_reviewers": "12;40;17;23", "wc_reply_authors": "0;1156;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;3;1;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 65.25, 32.29841327371981 ], "wc_strengths_avg": [ 78.5, 59.188258970846576 ], "wc_weaknesses_avg": [ 126.75, 55.49943693408069 ], "wc_questions_avg": [ 164.75, 190.43552058373984 ], "wc_limitations_avg": [ 10.25, 6.057020719792859 ], "wc_review_avg": [ 445.5, 158.7269668329865 ], "wc_reply_reviewers_avg": [ 23.0, 10.559356040971437 ], "wc_reply_authors_avg": [ 289.0, 500.5626833874055 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.4472135954999579, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17267120689039702459&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "uva.nl;hse.ru;uva.nl", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Amsterdam;National Research University Higher School of Economics", "aff_unique_dep": ";", "aff_unique_url": "https://www.uva.nl;https://hse.ru", "aff_unique_abbr": "UvA;HSE", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Netherlands;Russian Federation" }, { "id": "Z2f4Laqi8U", "title": "An efficient search-and-score algorithm for ancestral graphs using multivariate information scores", "track": "main", "status": "Reject", "tldr": "", "abstract": "We propose a greedy search-and-score algorithm for ancestral graphs, which include directed as well as bidirected edges, originating from unobserved latent variables. The normalized likelihood score of directed mixed graphs is estimated in terms of multivariate information over relevant subsets of variables, ${C}$, that are connected through collider paths confined to the ancestor set of ${C}$. For computational efficiency, the proposed two-step algorithm relies on local information scores limited to the close surrounding variables of each node (step 1) and edge (step 2). This computational strategy is shown to outperform state-of-the-art causal discovery methods on challenging benchmark datasets.", "keywords": "causal discovery;search-and-score structure learning;latent variable;multivariate information", "primary_area": "causal_inference", "supplementary_material": "", "author": "Nikita Lagrange;Herve Isambert", "authorids": "~Nikita_Lagrange1;~Herve_Isambert1", "gender": "M;M", "homepage": "https://nikitalagrange.github.io/;http://kinefold.curie.fr/isambertlab", "dblp": "395/6628;", "google_scholar": "vVHrjuwAAAAJ;", "orcid": "0009-0009-5432-2252;", "linkedin": "nikitalagrange;", "or_profile": "~Nikita_Lagrange1;~Herve_Isambert1", "aff": "Institut Curie;Institut Curie", "aff_domain": "curie.fr;curie.fr", "position": "PhD student;Full Professor", "bibtex": "@misc{\nanonymous2024an,\ntitle={An efficient search-and-score algorithm for ancestral graphs using multivariate information scores},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=Z2f4Laqi8U}\n}", "github": "", "project": "", "reviewers": "mHLp;2yrv;NFNu;Rzut", "site": "https://openreview.net/forum?id=Z2f4Laqi8U", "pdf_size": 2183124, "rating": "3;5;5;6", "confidence": "3;4;3;3", "soundness": "3;3;3;3", "novelty": "2;1;2;3", "presentation": "3;2;3;3", "wc_summary": "20;39;141;72", "wc_strengths": "19;78;84;42", "wc_weaknesses": "82;97;154;2", "wc_questions": "24;94;126;111", "wc_limitations": "23;46;1;1", "wc_review": "168;354;506;228", "wc_reply_reviewers": "241;19;117;0", "wc_reply_authors": "275;31;133;0", "reply_reviewers": "2;1;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 68.0, 46.07059799915777 ], "wc_strengths_avg": [ 55.75, 26.61179249881526 ], "wc_weaknesses_avg": [ 83.75, 54.30642227214015 ], "wc_questions_avg": [ 88.75, 39.06004992316318 ], "wc_limitations_avg": [ 17.75, 18.619546181365433 ], "wc_review_avg": [ 314.0, 129.59166639873106 ], "wc_reply_reviewers_avg": [ 94.25, 95.65399887093064 ], "wc_reply_authors_avg": [ 109.75, 107.34843967193935 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.13245323570650439, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:kgulWioZ1jYJ:scholar.google.com/&scioq=An+efficient+search-and-score+algorithm+for+ancestral+graphs+using+multivariate+information+scores&hl=en&as_sdt=0,44", "gs_version_total": 5, "aff_unique_index": "0;0", "aff_unique_norm": "Institut Curie", "aff_unique_dep": "", "aff_unique_url": "https://www.institut-curie.org", "aff_unique_abbr": "Institut Curie", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "France" }, { "title": "Unity by Diversity: Improved Representation Learning for Multimodal VAEs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94655", "id": "Z4R2rkPgBy", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Z4R2rkPgBy", "openreview": "https://openreview.net/forum?id=Z4R2rkPgBy", "poster": "/media/PosterPDFs/NeurIPS%202024/94655.png?t=1733315331.5159", "project": "", "author_site": "Thomas Sutter, Yang Meng, Andrea Agostini, Daphn\u00e9 Chopard, Norbert Fortin, Julia Vogt, Babak Shahbaba, Stephan Mandt", "tldr": "", "abstract": "Variational Autoencoders for multimodal data hold promise for many tasks in data analysis, such as representation learning, conditional generation, and imputation.\nCurrent architectures either share the encoder output, decoder input, or both across modalities to learn a shared representation. \nSuch architectures impose hard constraints on the model. \nIn this work, we show that a better latent representation can be obtained by replacing these hard constraints with a soft constraint. We propose a new mixture-of-experts prior, softly guiding each modality's latent representation towards a shared aggregate posterior.\nThis approach results in a superior latent representation and allows each encoding to preserve information better from its uncompressed original features. In extensive experiments on multiple benchmark datasets and two challenging real-world datasets, we show improved learned latent representations and imputation of missing data modalities compared to existing methods.", "keywords": "multimodal generative learning;VAE;representation learning;data-dependent prior;mimic-cxr", "primary_area": "generative_models", "supplementary_material": "/attachment/cc76b65e9a8302e47309fb1b2be46ebf53bd7b39.zip", "author": "Thomas M. Sutter;Yang Meng;Andrea Agostini;Daphn\u00e9 Chopard;Norbert Fortin;Julia E Vogt;Babak Shahbaba;Stephan Mandt", "authorids": "~Thomas_M._Sutter1;~Yang_Meng1;~Andrea_Agostini1;~Daphn\u00e9_Chopard1;~Norbert_Fortin1;~Julia_E_Vogt1;~Babak_Shahbaba1;~Stephan_Mandt1", "gender": "M;;F;M;F;M;M;", "homepage": "https://yangmeng96.github.io/;;;https://fortinlab.bio.uci.edu/;http://mds.inf.ethz.ch;https://www.ics.uci.edu/~babaks;https://www.stephanmandt.com;https://mds.inf.ethz.ch/", "dblp": "41/7386;;249/5730;241/3472;13/8412;29/2863;147/5018;259/0609", "google_scholar": "bIZ1fX8AAAAJ;;hUQArCkAAAAJ;https://scholar.google.com/citations?hl=en;UoeV-8kAAAAJ;https://scholar.google.nl/citations?user=t0-BrIMAAAAJ;HOrGe7wAAAAJ;eySN1UkAAAAJ", "orcid": ";;0000-0002-7964-1681;0000-0002-6793-6984;;;;", "linkedin": "yang96/;agostini335/;daphne-chopard;;julia-vogt-50b53895;;stephan-mandt-8702795a/;", "or_profile": "~Yang_Meng1;~Andrea_Agostini1;~Daphn\u00e9_Chopard1;~Norbert_Fortin1;~Julia_E_Vogt1;~Babak_Shahbaba1;~Stephan_M_Mandt1;~Thomas_Marco_Sutter1", "aff": "University of California, Irvine;ETHZ - ETH Zurich;Kinderspital Zurich;University of California, Irvine;Swiss Federal Institute of Technology;University of California, Irvine;University of California, Irvine;ETH Zurich", "aff_domain": "uci.edu;ethz.ch;kispi.uzh.ch;uci.edu;ethz.ch;uci.edu;uci.edu;ethz.ch", "position": "PhD student;Researcher;Postdoc;Associate Professor;Assistant Professor;Full Professor;Associate Professor;Postdoc", "bibtex": "@inproceedings{\nsutter2024unity,\ntitle={Unity by Diversity: Improved Representation Learning for Multimodal {VAE}s},\nauthor={Thomas M. Sutter and Yang Meng and Andrea Agostini and Daphn{\\'e} Chopard and Norbert Fortin and Julia E Vogt and Babak Shahbaba and Stephan Mandt},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Z4R2rkPgBy}\n}", "github": "", "reviewers": "6TPP;eP7z;56cj", "pdf_size": 24740855, "rating": "5;5;6", "confidence": "3;3;4", "soundness": "3;2;2", "novelty": "2;2;3", "presentation": "3;3;3", "wc_summary": "59;80;115", "wc_strengths": "39;39;63", "wc_weaknesses": "85;115;231", "wc_questions": "85;125;25", "wc_limitations": "1;76;4", "wc_review": "269;435;438", "wc_reply_reviewers": "20;457;64", "wc_reply_authors": "50;306;69", "reply_reviewers": "1;2;1", "reply_authors": "3;4;3", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 84.66666666666667, 23.098821518760552 ], "wc_strengths_avg": [ 47.0, 11.313708498984761 ], "wc_weaknesses_avg": [ 143.66666666666666, 62.95677529508286 ], "wc_questions_avg": [ 78.33333333333333, 41.096093353126506 ], "wc_limitations_avg": [ 27.0, 34.66987164671943 ], "wc_review_avg": [ 380.6666666666667, 78.96975511056257 ], "wc_reply_reviewers_avg": [ 180.33333333333334, 196.45581917797418 ], "wc_reply_authors_avg": [ 141.66666666666666, 116.4598166274054 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 3.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.9999999999999997, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5897458172822927595&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "uci.edu;ethz.ch;kispi.uzh.ch;uci.edu;ethz.ch;uci.edu;uci.edu;ethz.ch", "author_num": 8, "aff_unique_index": "0;1;2;0;3;0;0;1", "aff_unique_norm": "University of California, Irvine;ETH Zurich;Kinderspital Zurich;Swiss Federal Institute of Technology", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.uci.edu;https://www.ethz.ch;https://www.kinderspital-zuerich.ch;https://www.ethz.ch", "aff_unique_abbr": "UCI;ETHZ;;ETH Zurich", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Irvine;", "aff_country_unique_index": "0;1;1;0;1;0;0;1", "aff_country_unique": "United States;Switzerland" }, { "title": "Online Control in Population Dynamics", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94654", "id": "ZBBrBujopT", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ZBBrBujopT", "openreview": "https://openreview.net/forum?id=ZBBrBujopT", "poster": "", "project": "", "author_site": "Noah Golowich, Elad Hazan, Zhou Lu, Dhruv Rohatgi, Y. Jennifer Sun", "tldr": "", "abstract": "The study of population dynamics originated with early sociological works but has since extended into many fields, including biology, epidemiology, evolutionary game theory, and economics. Most studies on population dynamics focus on the problem of prediction rather than control. Existing mathematical models for population control are often restricted to specific, noise-free dynamics, while real-world population changes can be complex and adversarial. \n\nTo address this gap, we propose a new framework based on the paradigm of online control. We first characterize a set of linear dynamical systems that can naturally model evolving populations. We then give an efficient gradient-based controller for these systems, with near-optimal regret bounds with respect to a broad class of linear policies. Our empirical evaluations demonstrate the effectiveness of the proposed algorithm for population control even in non-linear models such as SIR and replicator dynamics.", "keywords": "Online Control;Theory;Population Dynamics;Linear Dynamical Systems", "primary_area": "online_learning", "supplementary_material": "", "author": "Noah Golowich;Elad Hazan;Zhou Lu;Dhruv Rohatgi;Y. Jennifer Sun", "authorids": "~Noah_Golowich1;~Elad_Hazan1;~Zhou_Lu1;~Dhruv_Rohatgi1;~Y._Jennifer_Sun1", "gender": ";M;;M;", "homepage": "https://noahgol.github.io;https://www.ehazan.com;https://leozoroaster.github.io/;http://www.mit.edu/~drohatgi/;https://orfe.princeton.edu/people/jennifer-sun", "dblp": "150/1861;72/739;68/11524;223/4465;", "google_scholar": "roUlyWcAAAAJ;LnhCGNMAAAAJ;17_nX_kAAAAJ;NUd_d6UAAAAJ;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Noah_Golowich1;~Elad_Hazan1;~Zhou_Lu1;~Dhruv_Rohatgi1;~Y._Jennifer_Sun1", "aff": "Massachusetts Institute of Technology;Princeton University;;Massachusetts Institute of Technology;Princeton University", "aff_domain": "mit.edu;princeton.edu;;mit.edu;princeton.edu", "position": "PhD student;Full Professor;;PhD student;PhD student", "bibtex": "@inproceedings{\ngolowich2024online,\ntitle={Online Control in Population Dynamics},\nauthor={Noah Golowich and Elad Hazan and Zhou Lu and Dhruv Rohatgi and Y. Jennifer Sun},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ZBBrBujopT}\n}", "github": "", "reviewers": "2PrE;XKZK;7y5h;ofBj", "pdf_size": 3051816, "rating": "5;5;6;8", "confidence": "2;2;4;4", "soundness": "2;3;3;4", "novelty": "2;3;3;4", "presentation": "1;3;3;3", "wc_summary": "80;34;34;100", "wc_strengths": "125;83;37;110", "wc_weaknesses": "128;100;24;118", "wc_questions": "82;6;27;25", "wc_limitations": "9;115;13;9", "wc_review": "424;338;135;362", "wc_reply_reviewers": "18;0;0;30", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.0, 1.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 62.0, 28.879058156387302 ], "wc_strengths_avg": [ 88.75, 33.45425981844465 ], "wc_weaknesses_avg": [ 92.5, 40.8013480169467 ], "wc_questions_avg": [ 35.0, 28.34607556611673 ], "wc_limitations_avg": [ 36.5, 45.35140571139995 ], "wc_review_avg": [ 314.75, 108.41903661258017 ], "wc_reply_reviewers_avg": [ 12.0, 12.727922061357855 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.8164965809277259, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7524619208051430506&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "mit.edu;princeton.edu;;mit.edu;princeton.edu", "author_num": 5, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "Massachusetts Institute of Technology;Princeton University", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.princeton.edu", "aff_unique_abbr": "MIT;Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Interpretable Concept Bottlenecks to Align Reinforcement Learning Agents", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94653", "id": "ZC0PSk6Mc6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ZC0PSk6Mc6", "openreview": "https://openreview.net/forum?id=ZC0PSk6Mc6", "poster": "", "project": "", "author_site": "Quentin Delfosse, Sebastian Sztwiertnia, Mark Rothermel, Wolfgang Stammer, Kristian Kersting", "tldr": "", "abstract": "Goal misalignment, reward sparsity and difficult credit assignment are only a few of the many issues that make it difficult for deep reinforcement learning (RL) agents to learn optimal policies. \nUnfortunately, the black-box nature of deep neural networks impedes the inclusion of domain experts for inspecting the model and revising suboptimal policies.\n\nTo this end, we introduce Successive Concept Bottleneck Agents (SCoBots), that integrate consecutive concept bottleneck (CB) layers. \nIn contrast to current CB models, SCoBots do not just represent concepts as properties of individual objects, but also as relations between objects which is crucial for many RL tasks. \n\nOur experimental results provide evidence of SCoBots' competitive performances, but also of their potential for domain experts to understand and regularize their behavior. Among other things, SCoBots enabled us to identify a previously unknown misalignment problem in the iconic video game, Pong, and resolve it. Overall, SCoBots thus result in more human-aligned RL agents.", "keywords": "Explainable AI (XAI);Reinforcement Learning;Concept Bottlenecks", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Quentin Delfosse;Sebastian Sztwiertnia;Mark Rothermel;Wolfgang Stammer;Kristian Kersting", "authorids": "~Quentin_Delfosse1;~Sebastian_Sztwiertnia1;~Mark_Rothermel1;~Wolfgang_Stammer1;~Kristian_Kersting1", "gender": "M;;;M;M", "homepage": "https://quentindelfosse.me/;;https://rothermel.me;https://ml-research.github.io/people/wstammer/;http://www.ml.informatik.tu-darmstadt.de/", "dblp": "286/1466.html;297/9076.html;;256/5497;40/3793", "google_scholar": "k1E0FgIAAAAJ;;;66-aU5AAAAAJ;QY-earAAAAAJ", "orcid": ";;;0000-0003-3793-8046;0000-0002-2873-9152", "linkedin": "quentin-delfosse-70b377150/;;mrothermel;https://linkedin.com/in/wolfgang-stammer-7835a4207/en-us?trk=people-guest_people_search-card;", "or_profile": "~Quentin_Delfosse1;~Sebastian_Sztwiertnia1;~Mark_Rothermel1;~Wolfgang_Stammer1;~Kristian_Kersting1", "aff": "CS Department, TU Darmstadt, TU Darmstadt;Technische Universit\u00e4t Darmstadt;Technische Universit\u00e4t Darmstadt;CS Department, TU Darmstadt;TU Darmstadt", "aff_domain": "cs.tu-darmstadt.de;tu-darmstadt.de;tu-darmstadt.de;cs.tu-darmstadt.de;tu-darmstadt.de", "position": "PhD student;PhD student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\ndelfosse2024interpretable,\ntitle={Interpretable Concept Bottlenecks to Align Reinforcement Learning Agents},\nauthor={Quentin Delfosse and Sebastian Sztwiertnia and Mark Rothermel and Wolfgang Stammer and Kristian Kersting},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ZC0PSk6Mc6}\n}", "github": "", "reviewers": "5yYA;udPa;Zj4A;KTbS", "pdf_size": 1308072, "rating": "4;7;7;8", "confidence": "3;2;5;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "2;2;3;4", "wc_summary": "135;70;52;26", "wc_strengths": "90;59;98;45", "wc_weaknesses": "585;54;179;68", "wc_questions": "61;2;2;40", "wc_limitations": "1;18;2;2", "wc_review": "872;203;333;181", "wc_reply_reviewers": "130;29;16;127", "wc_reply_authors": "765;0;0;242", "reply_reviewers": "1;1;1;1", "reply_authors": "3;1;1;2", "rating_avg": [ 6.5, 1.5 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 70.75, 40.25776322648838 ], "wc_strengths_avg": [ 73.0, 21.760055146988943 ], "wc_weaknesses_avg": [ 221.5, 215.3816380288719 ], "wc_questions_avg": [ 26.25, 25.36114153582208 ], "wc_limitations_avg": [ 5.75, 7.084313657652377 ], "wc_review_avg": [ 397.25, 280.1842028023707 ], "wc_reply_reviewers_avg": [ 75.5, 53.20949163448191 ], "wc_reply_authors_avg": [ 251.75, 312.3606689389687 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.29814239699997197, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6912509157667610010&as_sdt=5,38&sciodt=0,38&hl=en", "gs_version_total": 5, "email": "cs.tu-darmstadt.de;tu-darmstadt.de;tu-darmstadt.de;cs.tu-darmstadt.de;tu-darmstadt.de", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Technische Universit\u00e4t Darmstadt", "aff_unique_dep": "Computer Science Department", "aff_unique_url": "https://www.tu-darmstadt.de", "aff_unique_abbr": "TU Darmstadt", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Darmstadt", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Germany" }, { "title": "Fine-grained Control of Generative Data Augmentation in IoT Sensing", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94652", "id": "ZCygNDMIII", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ZCygNDMIII", "openreview": "https://openreview.net/forum?id=ZCygNDMIII", "poster": "/media/PosterPDFs/NeurIPS%202024/94652.png?t=1731711743.7947836", "project": "", "author_site": "Tianshi Wang, Qikai Yang, Ruijie Wang, Dachun Sun, Jinyang Li, Yizhuo Chen, Yigong Hu, Chaoqi Yang, Tomoyoshi Kimura, Denizhan Kara, Tarek Abdelzaher", "tldr": "", "abstract": "Internet of Things (IoT) sensing models often suffer from overfitting due to data distribution shifts between training dataset and real-world scenarios. To address this, data augmentation techniques have been adopted to enhance model robustness by bolstering the diversity of synthetic samples within a defined vicinity of existing samples. This paper introduces a novel paradigm of data augmentation for IoT sensing signals by adding fine-grained control to generative models. We define a metric space with statistical metrics that capture the essential features of the short-time Fourier transformed (STFT) spectrograms of IoT sensing signals. These metrics serve as strong conditions for a generative model, enabling us to tailor the spectrogram characteristics in the time-frequency domain according to specific application needs. Furthermore, we propose a set of data augmentation techniques within this metric space to create new data samples. Our method is evaluated across various generative models, datasets, and downstream IoT sensing models. The results demonstrate that our approach surpasses the conventional transformation-based data augmentation techniques and prior generative data augmentation models.", "keywords": "Generative models;data augmentation;Internet of Things;signal processing", "primary_area": "generative_models", "supplementary_material": "", "author": "Tianshi Wang;Qikai Yang;Ruijie Wang;Dachun Sun;Jinyang Li;Yizhuo Chen;Yigong Hu;Chaoqi Yang;Tomoyoshi Kimura;Denizhan Kara;Tarek F. Abdelzaher", "authorids": "~Tianshi_Wang1;~Qikai_Yang1;~Ruijie_Wang2;~Dachun_Sun1;~Jinyang_Li2;~Yizhuo_Chen2;~Yigong_Hu1;~Chaoqi_Yang1;~Tomoyoshi_Kimura1;~Denizhan_Kara1;~Tarek_F._Abdelzaher1", "gender": "M;M;M;M;;;;M;M;M;M", "homepage": "https://leowangx2013.github.io/;;https://wjerry5.github.io;https://dsun9.github.io/;;https://yizhuochen99.github.io/;https://www.google.com;https://ycq091044.github.io;https://www.tomoyoshikimura.com/;https://denizhankara.github.io/;http://abdelzaher.cs.illinois.edu/", "dblp": "147/8926;358/3841;57/5759-4;262/6139.html;79/572-4;;;;;;a/TarekFAbdelzaher", "google_scholar": "G5cJK7oAAAAJ;;S1TuNNIAAAAJ;2Rl25vkAAAAJ;VbeL3UUAAAAJ;;;Lj_rYrkAAAAJ;8uuJfmoAAAAJ;;https://scholar.google.com.tw/citations?user=cA28Zs0AAAAJ", "orcid": ";;;0000-0003-4000-2783;0000-0001-9285-9872;;;;0009-0008-4297-5865;;0000-0003-3883-7220", "linkedin": ";;;dachun-sun-3b3ba9126/;;;;;tomoyoshi-kimura/;;tarek-abdelzaher-0216071/", "or_profile": "~Tianshi_Wang1;~Qikai_Yang1;~Ruijie_Wang2;~Dachun_Sun1;~Jinyang_Li2;~Yizhuo_Chen2;~Yigong_Hu1;~Chaoqi_Yang1;~Tomoyoshi_Kimura1;~Denizhan_Kara1;~Tarek_Abdelzaher1", "aff": "University of Illinois Urbana-Champaign;Goldman Sachs;University of Illinois, Urbana Champaign;University of Illinois Urbana Champaign;University of Illinois, Urbana Champaign;J.P. Morgan Chase;University of Illinois, Urbana Champaign;University of Illinois Urbana Champaign;Department of Computer Science, University of Illinois at Urbana Champaign;Department of Computer Science;University of Illinois, Urbana Champaign", "aff_domain": "cs.illinois.edu;gs.com;uiuc.edu;cs.illinois.edu;uiuc.edu;jpmchase.com;uiuc.edu;illinois.edu;cs.illinois.edu;cs.illinois.edu;illinois.edu", "position": "PhD student;Analyst;Postdoc;PhD student;PhD student;Intern;PhD student;PhD student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nwang2024finegrained,\ntitle={Fine-grained Control of Generative Data Augmentation in IoT Sensing},\nauthor={Tianshi Wang and Qikai Yang and Ruijie Wang and Dachun Sun and Jinyang Li and Yizhuo Chen and Yigong Hu and Chaoqi Yang and Tomoyoshi Kimura and Denizhan Kara and Tarek F. Abdelzaher},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ZCygNDMIII}\n}", "github": "", "reviewers": "eQAt;eCvh;8SZC;7DMe", "pdf_size": 10686619, "rating": "6;6;6;6", "confidence": "5;3;4;2", "soundness": "3;2;3;2", "novelty": "3;2;3;2", "presentation": "3;2;3;2", "wc_summary": "72;238;48;111", "wc_strengths": "25;152;85;62", "wc_weaknesses": "184;81;140;38", "wc_questions": "146;105;172;23", "wc_limitations": "31;35;69;1", "wc_review": "458;611;514;235", "wc_reply_reviewers": "41;75;32;36", "wc_reply_authors": "1103;62;1674;19", "reply_reviewers": "1;1;1;1", "reply_authors": "5;2;5;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 117.25, 73.25085323735144 ], "wc_strengths_avg": [ 81.0, 46.24391851908746 ], "wc_weaknesses_avg": [ 110.75, 55.67483722472837 ], "wc_questions_avg": [ 111.5, 56.40257086339239 ], "wc_limitations_avg": [ 34.0, 24.1039415863879 ], "wc_review_avg": [ 454.5, 138.04437692278523 ], "wc_reply_reviewers_avg": [ 46.0, 17.04406054905931 ], "wc_reply_authors_avg": [ 714.5, 703.7487122545945 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.5, 1.5 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3762307073815410415&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "cs.illinois.edu;gs.com;uiuc.edu;cs.illinois.edu;uiuc.edu;jpmchase.com;uiuc.edu;illinois.edu;cs.illinois.edu;cs.illinois.edu;illinois.edu", "author_num": 11, "aff_unique_index": "0;1;0;0;0;2;0;0;0;3;0", "aff_unique_norm": "University of Illinois Urbana-Champaign;Goldman Sachs;JPMorgan Chase & Co.;Unknown Institution", "aff_unique_dep": ";;;Department of Computer Science", "aff_unique_url": "https://illinois.edu;https://www.goldmansachs.com;https://www.jpmorganchase.com;", "aff_unique_abbr": "UIUC;GS;JPM;", "aff_campus_unique_index": "0;0;0;0;0;0;0;0", "aff_campus_unique": "Urbana-Champaign;", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States;" }, { "id": "ZDvXY56DeP", "title": "Open RL Benchmark: Comprehensive Tracked Experiments for Reinforcement Learning", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "In many Reinforcement Learning (RL) papers, learning curves are useful indicators to measure the effectiveness of RL algorithms. However, the complete raw data of the learning curves are rarely available. As a result, it is usually necessary to reproduce the experiments from scratch, which can be time-consuming and error-prone. We present Open RL Benchmark (ORLB), a set of fully tracked RL experiments, including not only the usual data such as episodic return, but also all algorithm-specific and system metrics. ORLB is community-driven: anyone can download, use, and contribute to the data. At the time of writing, more than 25,000 runs have been tracked, for a cumulative duration of more than 8 years. It covers a wide range of RL libraries and reference implementations. Special care is taken to ensure that each experiment is precisely reproducible by providing not only the full parameters, but also the versions of the dependencies used to generate it. In addition, ORLB comes with a command-line interface (CLI) for easy fetching and generating figures to present the results. In this document, we include two case studies to demonstrate the usefulness of ORLB in practice. To the best of our knowledge, ORLB is the first RL benchmark of its kind, and the authors hope that it will improve and facilitate the work of researchers in the field.", "keywords": "reinforcement learning;benchmark;reproductibility", "primary_area": "", "supplementary_material": "", "author": "Shengyi Huang;Quentin Gallou\u00e9dec;Florian Felten;Antonin Raffin;Rousslan Fernand Julien Dossa;Yanxiao Zhao;Ryan Sullivan;Viktor Makoviychuk;Denys Makoviichuk;Mohamad Hosein Danesh;Cyril Roumegous;Jiayi Weng;Chufan Chen;Md Masudur Rahman;Jo\u00e3o Guilherme Madeira Ara\u00fajo;Guorui Quan;Daniel Chee Hian Tan;Timo Klein;Rujikorn Charakorn;Mark Towers;Yann Berthelot;Kinal Mehta;Dipam Chakraborty;Arjun KG;Valentin Charraut;Chang Ye;Zichen Liu;Lucas Nunes Alegre;Alexander Nikulin;Xiao Hu;Tianlin Liu;Jongwook Choi;Brent Yi", "authorids": "~Shengyi_Huang1;~Quentin_Gallou\u00e9dec1;~Florian_Felten1;~Antonin_Raffin1;~Rousslan_Fernand_Julien_Dossa1;~Yanxiao_Zhao2;~Ryan_Sullivan2;~Viktor_Makoviychuk1;~Denys_Makoviichuk1;~Mohamad_Hosein_Danesh1;~Cyril_Roumegous1;~Jiayi_Weng1;~Chufan_Chen1;~Md_Masudur_Rahman2;~Jo\u00e3o_Guilherme_Madeira_Ara\u00fajo1;~Guorui_Quan1;~Daniel_Chee_Hian_Tan1;~Timo_Klein1;~Rujikorn_Charakorn1;~Mark_Towers1;~Yann_Berthelot1;~Kinal_Mehta1;~Dipam_Chakraborty1;~Arjun_KG1;~Valentin_Charraut1;~Chang_Ye3;~Zichen_Liu1;~Lucas_Nunes_Alegre1;~Alexander_Nikulin1;~Xiao_Hu7;~Tianlin_Liu2;~Jongwook_Choi1;~Brent_Yi1", "gender": "M;;M;M;;M;M;M;M;M;M;M;M;M;M;;M;M;M;M;;M;M;;M;;;M;M;M;M;M;M", "homepage": "https://costa.sh/;;https://ffelten.github.io/;https://araffin.github.io/;https://dosssman.github.io;https://sdpkjc.me;https://ryannavillus.github.io/;;https://github.com/Denys88/rl_games;https://modanesh.github.io/;;https://trinkle23897.github.io/cv/;https://chufansuki.com;https://mmasudurrah.github.io/;https://www.joaogui1.netlify.app;https://github.com/quangr/;https://daniel-ch-tan.github.io/;;https://www.rujikorn.com/;;;https://kinalmehta.github.io;https://twitter.com/__dipam__;https://github.com/arjun-kg;;;;http://www.inf.ufrgs.br/~lnalegre;https://howuhh.github.io/;https://github.com/huxiao09;http://www.tianlinliu.com;https://wook.kr;", "dblp": "251/8731;;315/7185;225/7772;;;;;;267/1935;;;367/9199;08/2425-1;;;;20/10529;257/3015;;;;;;;;;250/5118;314/6349;19/1374;20/7667;131/0227;239/5167", "google_scholar": "kl9YcpEAAAAJ;MDIW57UAAAAJ;fqzUV0AAAAAJ;kik4AwIAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?;https://scholar.google.com/citations?hl=en;rmAcDNkAAAAJ;jqTm1hYAAAAJ;AsqcJtAAAAAJ;;CQrkQbIAAAAJ;QTRdntwAAAAJ;0nUv7b0AAAAJ;;;QKO1QacAAAAJ;FTLseBUAAAAJ;https://scholar.google.com/citations?hl=en;mBjei5sAAAAJ;;IJIXl8oAAAAJ;;;;;;https://scholar.google.com/citations?hl=en;yACvnqUAAAAJ;_9btJRYAAAAJ;;UX-H08cAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;0000-0001-6036-6950;0000-0003-0572-692X;0000-0001-9842-4706;;;;;;;;0000-0002-3633-0621;;;0000-0003-1067-8432;;;0000-0002-2609-2041;;;;;;;;0000-0001-5465-4390;;;;;", "linkedin": "costa-huang/;;florian-felten/;;rousslan-dossa-9a5a581a4/;yanxiao-zhao/;ryan-navillus/;;;mohamad-h-danesh-a766b0185/;cyril-roumegous/;trinkle23897/;;masud99r/;;;daniel-tan-a0672b163/;;;markttowers/;yann-berthelot/;kinalmehta;;;https://www.linkedin.com/authwall?trkInfo=AQG2UPhFNLVcbAAAAY-BpH2AjVDQZwxOdXNu2zah5nmLmnimBMHX2dcwK_WgexjXQEliEJQNr33YKJYuz6QA-Td2_tVh1XwrO7_f0K78WxxIRQb0ksIFtPRS0xOHLYGQGgP0uvA%3D&original_referer=https%3A%2F%2Fwww.google.com%2F&sessionRedirect=https%3A%2F%2Ffr.linkedin.com%2Fin%2Fvalentin-charraut-6b3018214;;;lucas-alegre-b80628127;;;;;", "or_profile": "~Shengyi_Huang1;~Quentin_Gallou\u00e9dec1;~Florian_Felten1;~Antonin_Raffin1;~Rousslan_Fernand_Julien_Dossa1;~Yanxiao_Zhao2;~Ryan_Sullivan2;~Viktor_Makoviychuk1;~Denys_Makoviichuk1;~Mohamad_Hosein_Danesh1;~Cyril_Roumegous1;~Jiayi_Weng1;~Chufan_Chen1;~Md_Masudur_Rahman2;~Jo\u00e3o_Guilherme_Madeira_Ara\u00fajo1;~Guorui_Quan1;~Daniel_Chee_Hian_Tan1;~Timo_Klein1;~Rujikorn_Charakorn1;~Mark_Towers1;~Yann_Berthelot1;~Kinal_Mehta1;~Dipam_Chakraborty1;~Arjun_KG1;~Valentin_Charraut1;~Chang_Ye3;~Zichen_Liu1;~Lucas_Nunes_Alegre1;~Alexander_Nikulin1;~Xiao_Hu7;~Tianlin_Liu2;~Jongwook_Choi1;~Brent_Yi1", "aff": "Hugging Face;Ecole Centrale de Lyon;University of Luxemburg;DLR;Araya;Sensetime Research;University of Maryland, College Park;NVIDIA;;McGill University;Sewan;OpenAI;Zhejiang University;Purdue University;Google;The Chinese University of Hong Kong, Shenzhen;University College London;Universit\u00e4t Vienna;Sakana AI;University of Southampton;INRIA;Qualcomm Inc, QualComm;;Earthbrain;Valeo;;;Disney Research, Disney Research;Moscow Institute of Physics and Technology;Tsinghua University;University of Basel;;University of California, Berkeley", "aff_domain": "huggingface.co;ec-lyon.fr;uni.lu;dlr.de;araya.org;sensetime.com;umd.edu;nvidia.com;;mcgill.ca;sewan.fr;openai.com;zju.edu.cn;purdue.edu;google.com;cuhk.edu.hk;ucl.ac.uk;univie.ac.at;sakana.ai;soton.ac.uk;inria.fr;qti.qualcomm.com;;earthbrain.com;valeo.com;;;disneyresearch.com;mipt.edu;tsinghua.edu.cn;unibas.ch;;berkeley.edu", "position": "Researcher;PhD student;PhD student;PhD student;Researcher;Intern;PhD student;Senior Research Scientist;;PhD student;Sewan;Researcher;MS student;PhD student;Researcher;Visiting Student;PhD student;PhD student;Intern;PhD student;PhD student;Researcher;;Researcher;Researcher;;;Intern;PhD student;PhD student;PhD student;;PhD student", "bibtex": "@misc{\nanonymous2024open,\ntitle={Open {RL} Benchmark: Comprehensive Tracked Experiments for Reinforcement Learning},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=ZDvXY56DeP}\n}", "github": "", "project": "", "reviewers": "UaXe;Vf1K;kfSF;cvk8", "site": "https://openreview.net/forum?id=ZDvXY56DeP", "pdf_size": 28862494, "rating": "3;6;7;8", "confidence": "4;4;3;4", "wc_summary_and_contributions": "75;45;94;99", "wc_strengths": "9;89;106;69", "wc_improvement": "61;2;141;28", "wc_limitations": "1;1;74;16", "wc_correctness": "28;1;18;1", "wc_clarity": "40;2;63;8", "wc_relation_to_prior_work": "8;1;18;2", "wc_documentation": "29;1;24;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "252;143;539;225", "wc_reply_reviewers": "214;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "2;0;0;0", "reply_authors": "2;1;1;1", "rating_avg": [ 6.0, 1.8708286933869707 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 78.25, 21.182244923520265 ], "wc_strengths_avg": [ 68.25, 36.62905267680288 ], "wc_improvement_avg": [ 58.0, 52.28288438867924 ], "wc_limitations_avg": [ 23.0, 30.074906483645133 ], "wc_correctness_avg": [ 12.0, 11.554220008291344 ], "wc_clarity_avg": [ 28.25, 24.722206616724165 ], "wc_relation_to_prior_work_avg": [ 7.25, 6.7592529172978875 ], "wc_documentation_avg": [ 13.75, 12.871965661856 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 289.75, 149.39774931370286 ], "wc_reply_reviewers_avg": [ 53.5, 92.66471820493493 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 33, 0 ], "corr_rating_confidence": -0.3086066999241838, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13840948296587712148&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "aff_unique_index": "0;1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;16;17;18;19;20;21;22;23;24;25;26;27", "aff_unique_norm": "Hugging Face;Ecole Centrale de Lyon;University of Luxembourg;Deutsches Zentrum f\u00fcr Luft- und Raumfahrt;Araya;SenseTime;University of Maryland;NVIDIA;McGill University;Sewanee: The University of the South;OpenAI;Zhejiang University;Purdue University;Google;Chinese University of Hong Kong;University College London;University of Vienna;Sakana AI;University of Southampton;INRIA;Qualcomm Incorporated;Earthbrain;Valeo;Disney Research;Moscow Institute of Physics and Technology;Tsinghua University;University of Basel;University of California, Berkeley", "aff_unique_dep": ";;;;;Research;;NVIDIA Corporation;;;;;;Google;;;;;;;;;;;;;;", "aff_unique_url": "https://huggingface.co;https://www.ec-lyon.fr;https://wwwen.uniluxembourg.lu;https://www.dlr.de;;https://www.sensetime.com/;https://www/umd.edu;https://www.nvidia.com;https://www.mcgill.ca;https://www.sewanee.edu;https://openai.com;https://www.zju.edu.cn;https://www.purdue.edu;https://www.google.com;https://www.cuhk.edu.cn;https://www.ucl.ac.uk;https://univie.ac.at;;https://www.southampton.ac.uk;https://www.inria.fr;https://www.qualcomm.com;;https://www.valeo.com;https://research.disney.com;https://www.mipt.ru/en;https://www.tsinghua.edu.cn;https://www.unibas.ch;https://www.berkeley.edu", "aff_unique_abbr": "Hugging Face;ECL;Uni Lu;DLR;;SenseTime;UMD;NVIDIA;McGill;Sewanee;OpenAI;ZJU;Purdue;Google;CUHK;UCL;UV;;Southampton;INRIA;Qualcomm;;;Disney Research;MIPT;THU;UniBas;UC Berkeley", "aff_campus_unique_index": "1;2;3;4", "aff_campus_unique": ";College Park;Mountain View;Shenzhen;Berkeley", "aff_country_unique_index": "0;1;2;3;5;0;0;6;0;0;5;0;0;5;7;8;7;1;0;1;0;9;5;10;0", "aff_country_unique": "United States;France;Luxembourg;Germany;;China;Canada;United Kingdom;Austria;Russian Federation;Switzerland" }, { "title": "Curvature Clues: Decoding Deep Learning Privacy with Input Loss Curvature", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94651", "id": "ZEVDMQ6Mu5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ZEVDMQ6Mu5", "openreview": "https://openreview.net/forum?id=ZEVDMQ6Mu5", "poster": "/media/PosterPDFs/NeurIPS%202024/94651.png?t=1731168120.1138506", "project": "", "author_site": "Deepak Ravikumar, Efstathia Soufleri, Kaushik Roy", "tldr": "", "abstract": "In this paper, we explore the properties of loss curvature with respect to input data in deep neural networks. Curvature of loss with respect to input (termed input loss curvature) is the trace of the Hessian of the loss with respect to the input. We investigate how input loss curvature varies between train and test sets, and its implications for train-test distinguishability. We develop a theoretical framework that derives an upper bound on the train-test distinguishability based on privacy and the size of the training set. This novel insight fuels the development of a new black box membership inference attack utilizing input loss curvature. We validate our theoretical findings through experiments in computer vision classification tasks, demonstrating that input loss curvature surpasses existing methods in membership inference effectiveness. Our analysis highlights how the performance of membership inference attack (MIA) methods varies with the size of the training set, showing that curvature-based MIA outperforms other methods on sufficiently large datasets. This condition is often met by real datasets, as demonstrated by our results on CIFAR10, CIFAR100, and ImageNet. These findings not only advance our understanding of deep neural network behavior but also improve the ability to test privacy-preserving techniques in machine learning.", "keywords": "Input Loss Curvature;Differential Privacy;Membership Inference", "primary_area": "privacy", "supplementary_material": "/attachment/0694076407c472bd05ea325b4b8cea298daf8d76.zip", "author": "Deepak Ravikumar;Efstathia Soufleri;Kaushik Roy", "authorids": "~Deepak_Ravikumar1;~Efstathia_Soufleri1;~Kaushik_Roy1", "gender": ";F;M", "homepage": ";https://www.linkedin.com/in/efstathia-soufleri/;https://engineering.purdue.edu/NRL/Group", "dblp": ";241/5004;r/KaushikRoy", "google_scholar": ";RXLWGNcAAAAJ;to4P8KgAAAAJ", "orcid": ";0000-0001-8699-9940;", "linkedin": ";efstathia-soufleri/;", "or_profile": "~Deepak_Ravikumar1;~Efstathia_Soufleri1;~Kaushik_Roy1", "aff": ";Purdue University;Purdue University", "aff_domain": ";purdue.edu;purdue.edu", "position": ";PhD student;Full Professor", "bibtex": "@inproceedings{\nravikumar2024curvature,\ntitle={Curvature Clues: Decoding Deep Learning Privacy with Input Loss Curvature},\nauthor={Deepak Ravikumar and Efstathia Soufleri and Kaushik Roy},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ZEVDMQ6Mu5}\n}", "github": "", "reviewers": "2dr3;mnxr;1uCf;Vex3", "pdf_size": 3234454, "rating": "6;6;7;7", "confidence": "3;4;3;5", "soundness": "3;2;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "38;62;78;66", "wc_strengths": "60;46;91;60", "wc_weaknesses": "39;113;5;168", "wc_questions": "32;27;50;40", "wc_limitations": "1;46;10;130", "wc_review": "170;294;234;464", "wc_reply_reviewers": "10;22;9;28", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 61.0, 14.52583904633395 ], "wc_strengths_avg": [ 64.25, 16.467771555374455 ], "wc_weaknesses_avg": [ 81.25, 63.507381460740454 ], "wc_questions_avg": [ 37.25, 8.699856320652657 ], "wc_limitations_avg": [ 46.75, 50.92825836409488 ], "wc_review_avg": [ 290.5, 109.3469249681947 ], "wc_reply_reviewers_avg": [ 17.25, 8.042853971072706 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6248629196322644996&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": ";purdue.edu;purdue.edu", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Purdue University", "aff_unique_dep": "", "aff_unique_url": "https://www.purdue.edu", "aff_unique_abbr": "Purdue", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Recurrent Complex-Weighted Autoencoders for Unsupervised Object Discovery", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94650", "id": "ZFbqnL1AiS", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ZFbqnL1AiS", "openreview": "https://openreview.net/forum?id=ZFbqnL1AiS", "poster": "/media/PosterPDFs/NeurIPS%202024/94650.png?t=1733325837.2516277", "project": "", "author_site": "Anand Gopalakrishnan, Aleksandar Stani\u0107, J\u00fcrgen Schmidhuber, Michael Mozer", "tldr": "", "abstract": "Current state-of-the-art synchrony-based models encode object bindings with complex-valued activations and compute with real-valued weights in feedforward architectures. We argue for the computational advantages of a recurrent architecture with complex-valued weights. We propose a fully convolutional autoencoder, SynCx, that performs iterative constraint satisfaction: at each iteration, a hidden layer bottleneck encodes statistically regular configurations of features in particular phase relationships; over iterations, local constraints propagate and the model converges to a globally consistent configuration of phase assignments. Binding is achieved simply by the matrix-vector product operation between complex-valued weights and activations, without the need for additional mechanisms that have been incorporated into current synchrony-based models. SynCx outperforms or is strongly competitive with current models for unsupervised object discovery. SynCx also avoids certain systematic grouping errors of current models, such as the inability to separate similarly colored objects without additional supervision.", "keywords": "complex-weights;recurrence;autoencoders;objects;binding;synchrony;temporal correlation hypothesis", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "/attachment/4c1880bbc1ecf2665826ae32119cba40866b0510.zip", "author": "Anand Gopalakrishnan;Aleksandar Stani\u0107;J\u00fcrgen Schmidhuber;Michael Curtis Mozer", "authorids": "~Anand_Gopalakrishnan1;~Aleksandar_Stani\u01071;~J\u00fcrgen_Schmidhuber1;~Michael_Curtis_Mozer1", "gender": "M;M;M;M", "homepage": "https://agopal42.github.io/;http://people.idsia.ch/~juergen/;https://www.cs.colorado.edu/~mozer;http://astanic.github.io/", "dblp": "191/1040;s/JurgenSchmidhuber;m/MichaelCMozer;180/5949", "google_scholar": "SsbgJ1UAAAAJ;https://scholar.google.ch/citations?user=gLnCTgIAAAAJ;lmjR_qMAAAAJ;tx0opKcAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Anand_Gopalakrishnan1;~J\u00fcrgen_Schmidhuber1;~Michael_Curtis_Mozer1;~Aleksandar_Stanic1", "aff": "Dalle Molle Institute for Artificial Intelligence Research;IDSIA;Google DeepMind;The Swiss AI Lab - IDSIA", "aff_domain": "idsia.ch;idsia.ch;google.com;idsia.ch", "position": "PhD student;Scientific Director;Research Scientist;PhD student", "bibtex": "@inproceedings{\ngopalakrishnan2024recurrent,\ntitle={Recurrent Complex-Weighted Autoencoders for Unsupervised Object Discovery},\nauthor={Anand Gopalakrishnan and Aleksandar Stani{\\'c} and J{\\\"u}rgen Schmidhuber and Michael Curtis Mozer},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ZFbqnL1AiS}\n}", "github": "", "reviewers": "3hk1;Ta5A;sSNp;nb8Q;g2JM", "pdf_size": 3586186, "rating": "4;5;6;7;7", "confidence": "4;4;3;4;4", "soundness": "2;2;3;3;3", "novelty": "2;2;2;3;3", "presentation": "3;3;3;4;3", "wc_summary": "62;83;93;88;125", "wc_strengths": "65;84;52;60;131", "wc_weaknesses": "666;470;323;42;178", "wc_questions": "356;5;48;48;39", "wc_limitations": "113;1;33;13;9", "wc_review": "1262;643;549;251;482", "wc_reply_reviewers": "362;278;67;119;0", "wc_reply_authors": "1822;1106;242;17;0", "reply_reviewers": "2;1;1;1;0", "reply_authors": "4;4;3;2;1", "rating_avg": [ 5.8, 1.16619037896906 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 90.2, 20.350921355064003 ], "wc_strengths_avg": [ 78.4, 28.330901856453494 ], "wc_weaknesses_avg": [ 335.8, 218.3688622491769 ], "wc_questions_avg": [ 99.2, 129.37294925910902 ], "wc_limitations_avg": [ 33.8, 40.97999511957023 ], "wc_review_avg": [ 637.4, 338.06070460791506 ], "wc_reply_reviewers_avg": [ 165.2, 134.55318651001915 ], "wc_reply_authors_avg": [ 637.4, 716.9908228143508 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.8, 1.16619037896906 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.08574929257125444, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15421388960258911459&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "idsia.ch;idsia.ch;google.com;idsia.ch", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Dalle Molle Institute for Artificial Intelligence Research;Institute of Digital Technologies;Google;IDSIA", "aff_unique_dep": "Artificial Intelligence Research;;Google DeepMind;Swiss AI Lab", "aff_unique_url": "http://www.dallemolle.ch/;https://www.idsia.ch;https://deepmind.com;https://www.idsia.ch/", "aff_unique_abbr": "DMI;IDSIA;DeepMind;IDSIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Switzerland;United Kingdom" }, { "title": "SlimSAM: 0.1% Data Makes Segment Anything Slim", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94649", "id": "ZG84y6a7ge", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ZG84y6a7ge", "openreview": "https://openreview.net/forum?id=ZG84y6a7ge", "poster": "", "project": "", "author_site": "Zigeng Chen, Gongfan Fang, Xinyin Ma, Xinchao Wang", "tldr": "", "abstract": "Current approaches for compressing the Segment Anything Model (SAM) yield commendable results, yet necessitate extensive data to train a new network from scratch. Employing conventional pruning techniques can remarkably reduce data requirements but would suffer from a degradation in performance. To address this challenging trade-off, we introduce SlimSAM, a novel data-efficient SAM compression method that achieves superior performance with extremely less training data. The essence of SlimSAM is encapsulated in the alternate slimming framework which effectively enhances knowledge inheritance under severely limited training data availability and exceptional pruning ratio. Diverging from prior techniques, our framework progressively compresses the model by alternately pruning and distilling distinct, decoupled sub-structures. Disturbed Taylor pruning is also proposed to address the misalignment between the pruning objective and training target, thereby boosting the post-distillation after pruning. SlimSAM yields significant performance improvements while demanding over 10 times less training data than any other existing compression methods. Even when compared to the original SAM, SlimSAM achieves approaching performance while reducing parameter counts to merely 1.4% (9.1M), MACs to 0.8% (23G), and requiring only 0.1% (10k) of the SAM training data.", "keywords": "segment anything model;model compression", "primary_area": "machine_vision", "supplementary_material": "/attachment/2d555db944feb35840c17adfe44306790aa6de13.zip", "author": "Zigeng Chen;Gongfan Fang;Xinyin Ma;Xinchao Wang", "authorids": "~Zigeng_Chen1;~Gongfan_Fang2;~Xinyin_Ma1;~Xinchao_Wang1", "gender": "M;M;F;M", "homepage": "https://czg1225.github.io/chenzigeng99/;https://fangggf.github.io/;https://horseee.github.io;https://sites.google.com/site/sitexinchaowang/", "dblp": ";243/5768;267/2244;", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;489YZ_kAAAAJ;jFUKS0oAAAAJ;https://scholar.google.com.tw/citations?user=w69Buq0AAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Zigeng_Chen1;~Gongfan_Fang2;~Xinyin_Ma1;~Xinchao_WANG3", "aff": "National University of Singapore;National University of Singapore;National University of Singapore;National University of Singapore", "aff_domain": "nus.edu.sg;u.nus.edu;u.nus.edu;nus.edu", "position": "Researcher;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nchen2024slimsam,\ntitle={Slim{SAM}: 0.1\\% Data Makes Segment Anything Slim},\nauthor={Zigeng Chen and Gongfan Fang and Xinyin Ma and Xinchao Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ZG84y6a7ge}\n}", "github": "", "reviewers": "YejR;5dUf;WP1s;wJAz;2d2P", "pdf_size": 39978069, "rating": "4;5;5;7;8", "confidence": "4;5;4;3;4", "soundness": "2;3;3;3;4", "novelty": "2;3;3;3;4", "presentation": "2;3;3;3;3", "wc_summary": "58;71;74;57;33", "wc_strengths": "57;37;70;31;27", "wc_weaknesses": "194;144;92;107;83", "wc_questions": "190;2;88;4;4", "wc_limitations": "7;2;14;3;1", "wc_review": "506;256;338;202;148", "wc_reply_reviewers": "412;17;42;43;0", "wc_reply_authors": "755;16;18;24;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "2;2;2;2;1", "rating_avg": [ 5.8, 1.469693845669907 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 58.6, 14.485855169785456 ], "wc_strengths_avg": [ 44.4, 16.438978070427613 ], "wc_weaknesses_avg": [ 124.0, 40.728368491752775 ], "wc_questions_avg": [ 57.6, 73.87990254460274 ], "wc_limitations_avg": [ 5.4, 4.758150901348127 ], "wc_review_avg": [ 290.0, 124.9191738685459 ], "wc_reply_reviewers_avg": [ 102.8, 155.43924858284666 ], "wc_reply_authors_avg": [ 162.6, 296.30632797832715 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.4303314829119351, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14698455134192454355&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "nus.edu.sg;u.nus.edu;u.nus.edu;nus.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "National University of Singapore", "aff_unique_dep": "", "aff_unique_url": "https://www.nus.edu.sg", "aff_unique_abbr": "NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Singapore" }, { "title": "DetectRL: Benchmarking LLM-Generated Text Detection in Real-World Scenarios", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97633", "id": "ZGMkOikEyv", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ZGMkOikEyv", "openreview": "https://openreview.net/forum?id=ZGMkOikEyv", "poster": "/media/PosterPDFs/NeurIPS%202024/97633.png?t=1733851547.7036593", "project": "", "author_site": "Junchao Wu, Runzhe Zhan, Derek Wong, Shu Yang, Xinyi Yang, Yulin Yuan, Lidia Chao", "tldr": "", "abstract": "Detecting text generated by large language models (LLMs) is of great recent interest. With zero-shot methods like DetectGPT, detection capabilities have reached impressive levels. However, the reliability of existing detectors in real-world applications remains underexplored. In this study, we present a new benchmark, DetectRL, highlighting that even state-of-the-art (SOTA) detection techniques still underperformed in this task. \n We collected human-written datasets from domains where LLMs are particularly prone to misuse. Using popular LLMs, we generated data that better aligns with real-world applications. Unlike previous studies, we employed heuristic rules to create adversarial LLM-generated text, simulating advanced prompt usages, human revisions like word substitutions, and writing errors. Our development of DetectRL reveals the strengths and limitations of current SOTA detectors. \n More importantly, we analyzed the potential impact of writing styles, model types, attack methods, the text lengths, and real-world human writing factors on different types of detectors. We believe DetectRL could serve as an effective benchmark for assessing detectors in real-world scenarios, evolving with advanced attack methods, thus providing more stressful evaluation to drive the development of more efficient detectors\\footnote{Data and code are publicly available at: https://github.com/NLP2CT/DetectRL.", "keywords": "LLM-Generated Text Detection;Detection Benchmark;Real-World Scenarios", "primary_area": "", "supplementary_material": "", "author": "Junchao Wu;Runzhe Zhan;Derek F. Wong;Shu Yang;Xinyi Yang;Yulin Yuan;Lidia S. Chao", "authorids": "~Junchao_Wu2;~Runzhe_Zhan1;~Derek_F._Wong1;~Shu_Yang10;~Xinyi_Yang5;~Yulin_Yuan1;~Lidia_S._Chao2", "gender": "M;Not Specified;M;F;F;M;F", "homepage": "https://junchaoiu.github.io/;http://runzhe.me/;https://www.fst.um.edu.mo/personal/derek-wong/;;;https://fah.um.edu.mo/yulin-yuan/;", "dblp": "132/6774;286/8257;123/0533;;80/9004-8;;123/0612", "google_scholar": "https://scholar.google.com.hk/citations?user=jhLZtgYAAAAJ;7aKLE18AAAAJ;KjQBe8oAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?pli=1;;S2rz_mYAAAAJ", "orcid": ";;0000-0002-5307-7322;;;;", "linkedin": ";;derek-wong-6209445/;;;;", "or_profile": "~Junchao_Wu2;~Runzhe_Zhan1;~Derek_F._Wong1;~Shu_Yang10;~Xinyi_Yang5;~Yulin_Yuan1;~Sam_L._Chao1", "aff": "University of Macau;University of Macau;University of Macau;University of Macau;University of Macau;University of Macau;University of Macau", "aff_domain": "um.edu.mo;um.edu.mo;um.edu.mo;umac.mo;um.edu.mo;umac.mo;um.edu.mo", "position": "MS student;PhD student;Associate Professor;MS student;MS student;Full Professor;Coordinator", "bibtex": "@inproceedings{\nwu2024detectrl,\ntitle={Detect{RL}: Benchmarking {LLM}-Generated Text Detection in Real-World Scenarios},\nauthor={Junchao Wu and Runzhe Zhan and Derek F. Wong and Shu Yang and Xinyi Yang and Yulin Yuan and Lidia S. Chao},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=ZGMkOikEyv}\n}", "github": "", "reviewers": "rVaJ;tJNz;7jmF", "pdf_size": 1167612, "rating": "5;6;8", "confidence": "4;4;3", "wc_summary_and_contributions": "73;69;59", "wc_strengths": "3;23;33", "wc_improvement": "39;31;15", "wc_limitations": "1;24;56", "wc_correctness": "66;88;23", "wc_clarity": "16;13;10", "wc_relation_to_prior_work": "1;1;17", "wc_documentation": "1;2;13", "wc_additional_feedback": "1;1;1", "wc_review": "201;252;227", "wc_reply_reviewers": "0;33;0", "wc_reply_authors": "143;128;0", "reply_reviewers": "0;1;0", "reply_authors": "3;4;1", "rating_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 67.0, 5.887840577551898 ], "wc_strengths_avg": [ 19.666666666666668, 12.47219128924647 ], "wc_improvement_avg": [ 28.333333333333332, 9.977753031397176 ], "wc_limitations_avg": [ 27.0, 22.55363976538303 ], "wc_correctness_avg": [ 59.0, 26.993826454703797 ], "wc_clarity_avg": [ 13.0, 2.449489742783178 ], "wc_relation_to_prior_work_avg": [ 6.333333333333333, 7.542472332656507 ], "wc_documentation_avg": [ 5.333333333333333, 5.436502143433364 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 226.66666666666666, 20.821996915655223 ], "wc_reply_reviewers_avg": [ 11.0, 15.556349186104045 ], "wc_reply_authors_avg": [ 90.33333333333333, 64.1681818002938 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 1.247219128924647 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.944911182523068, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4370841447183792853&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "um.edu.mo;um.edu.mo;um.edu.mo;umac.mo;um.edu.mo;umac.mo;um.edu.mo", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "University of Macau", "aff_unique_dep": "", "aff_unique_url": "https://www.um.edu.mo", "aff_unique_abbr": "UM", "aff_campus_unique_index": "0;0;0;0;0;0;0", "aff_campus_unique": "Macau SAR", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "A Pairwise Pseudo-likelihood Approach for Matrix Completion with Informative Missingness", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94648", "id": "ZGN8dOhpi6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ZGN8dOhpi6", "openreview": "https://openreview.net/forum?id=ZGN8dOhpi6", "poster": "", "project": "", "author_site": "Jiangyuan Li, Jiayi Wang, Raymond K. W. Wong, Kwun Chuen Gary Chan", "tldr": "", "abstract": "While several recent matrix completion methods are developed to deal with non-uniform observation probabilities across matrix entries, very few allow the missingness to depend on the mostly unobserved matrix measurements, which is generally ill-posed. We aim to tackle a subclass of these ill-posed settings, characterized by a flexible separable observation probability assumption that can depend on the matrix measurements. We propose a regularized pairwise pseudo-likelihood approach for matrix completion and prove that the proposed estimator can asymptotically recover the low-rank parameter matrix up to an identifiable equivalence class of a constant shift and scaling, at a near-optimal asymptotic convergence rate of the standard well-posed (non-informative missing) setting, while effectively mitigating the impact of informative missingness. The efficacy of our method is validated via numerical experiments, positioning it as a robust tool for matrix completion to mitigate data bias.", "keywords": "Identifiability; missing not at random; U-statistics", "primary_area": "other", "supplementary_material": "", "author": "Jiangyuan Li;Jiayi Wang;Raymond K. W. Wong;Kwun Chuen Gary Chan", "authorids": "~Jiangyuan_Li1;~Jiayi_Wang7;~Raymond_K._W._Wong1;~Kwun_Chuen_Gary_Chan1", "gender": "M;F;;M", "homepage": ";https://jiayiwang1017.github.io/;;https://faculty.washington.edu/kcgchan/", "dblp": "00/1790;;;", "google_scholar": "MJdJFGcAAAAJ;;;", "orcid": ";;;", "linkedin": "jiangyuan-li-236a00129/;;;", "or_profile": "~Jiangyuan_Li1;~Jiayi_Wang7;~Raymond_K._W._Wong1;~Kwun_Chuen_Gary_Chan1", "aff": "Google;University of Texas at Dallas;;University of Washington", "aff_domain": "google.com;utdallas.edu;;u.washington.edu", "position": "Researcher;Assistant Professor;;Full Professor", "bibtex": "@inproceedings{\nli2024a,\ntitle={A Pairwise Pseudo-likelihood Approach for Matrix Completion with Informative Missingness},\nauthor={Jiangyuan Li and Jiayi Wang and Raymond K. W. Wong and Kwun Chuen Gary Chan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ZGN8dOhpi6}\n}", "github": "", "reviewers": "CJYv;Pbx3;1V66", "pdf_size": 1045990, "rating": "7;7;7", "confidence": "3;5;2", "soundness": "3;3;3", "novelty": "4;3;3", "presentation": "3;3;3", "wc_summary": "193;79;83", "wc_strengths": "120;103;42", "wc_weaknesses": "394;76;102", "wc_questions": "73;141;196", "wc_limitations": "16;10;129", "wc_review": "796;409;552", "wc_reply_reviewers": "144;73;22", "wc_reply_authors": "28;25;26", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 1.247219128924647 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 118.33333333333333, 52.822554105440815 ], "wc_strengths_avg": [ 88.33333333333333, 33.48963355361709 ], "wc_weaknesses_avg": [ 190.66666666666666, 144.16965314825754 ], "wc_questions_avg": [ 136.66666666666666, 50.307940614667274 ], "wc_limitations_avg": [ 51.666666666666664, 54.73775865179559 ], "wc_review_avg": [ 585.6666666666666, 159.7755369955683 ], "wc_reply_reviewers_avg": [ 79.66666666666667, 50.02888054802834 ], "wc_reply_authors_avg": [ 26.333333333333332, 1.247219128924647 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4253932950549468679&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 0, "email": "google.com;utdallas.edu;;u.washington.edu", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "Google;University of Texas at Dallas;University of Washington", "aff_unique_dep": "Google;;", "aff_unique_url": "https://www.google.com;https://www.utdallas.edu;https://www.washington.edu", "aff_unique_abbr": "Google;UT Dallas;UW", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Mountain View;Dallas;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Advancing Tool-Augmented Large Language Models: Integrating Insights from Errors in Inference Trees", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94647", "id": "ZIpdu0cHYu", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ZIpdu0cHYu", "openreview": "https://openreview.net/forum?id=ZIpdu0cHYu", "poster": "/media/PosterPDFs/NeurIPS%202024/94647.png?t=1731479808.9761043", "project": "", "author_site": "SIJIA CHEN, Yibo Wang, Yi-Feng Wu, Qingguo Chen, Zhao Xu, Weihua Luo, Kaifu Zhang, Lijun Zhang", "tldr": "", "abstract": "Tool-augmented large language models (LLMs) leverage tools, often in the form of APIs, to improve their reasoning capabilities on complex tasks. This enables them to act as intelligent agents interacting with the real world. The recently introduced ToolLLaMA model by Qin et al. [2023] utilizes the depth-first search-based decision tree (DFSDT) mechanism for multi-step reasoning with $16000+$ real-world APIs, effectively enhancing the performance of tool-augmented LLMs compared to traditional chain reasoning mechanisms. However, their approach only employs successful paths from decision trees (also called inference trees) for supervised fine-tuning (SFT), missing out on the potential learning opportunities from failed paths. Inspired by this, we propose an inference trajectory optimization framework based on preference learning to address this limitation. We first introduce a novel method for constructing step-wise preference data from tree-like expert trajectories, which leverages the previously ignored failed explorations in the decision trees. In the subsequent training phase, we first fine-tune the LLM with successful tool-usage expert trajectories and then apply direct preference optimization (DPO) with the preference data to update the LLM's policy, resulting in our ToolPrefer-LLaMA (TP-LLaMA) model. This approach not only enhances the utilization of original expert data but also broadens the learning space of the model. Our experiments demonstrate that by obtaining insights from errors in inference trees, TP-LLaMA significantly outperforms the baselines across almost all test scenarios by a large margin and exhibits better generalization capabilities with unseen APIs. At the same time, TP-LLaMA has also demonstrated superior reasoning efficiency compared to the baselines, making it more suitable for complex tool-usage reasoning tasks.", "keywords": "Large Language Models;Tool Usage;Direct Preference Optimization;Tree of Thought", "primary_area": "generative_models", "supplementary_material": "", "author": "Sijia Chen;Yibo Wang;Yi-Feng Wu;Qing-Guo Chen;Zhao Xu;Weihua Luo;Kaifu Zhang;Lijun Zhang", "authorids": "~Sijia_Chen3;~Yibo_Wang2;~Yi-Feng_Wu2;~Qing-Guo_Chen1;~Zhao_Xu7;~Weihua_Luo2;~Kaifu_Zhang2;~Lijun_Zhang1", "gender": "F;;M;M;;M;M;", "homepage": "http://www.lamda.nju.edu.cn/chensj/;;;;;;;", "dblp": ";;;63/10301;;22/1116.html;;", "google_scholar": ";;3UpZTcQAAAAJ;GlqRHLcAAAAJ;;tsKl9GUAAAAJ;e3gpYTYAAAAJ;", "orcid": ";;;;;0000-0002-5002-6933;;", "linkedin": ";;;;;;;", "or_profile": "~Sijia_Chen3;~Yibo_Wang2;~Yi-Feng_Wu2;~Qing-Guo_Chen1;~Zhao_Xu7;~Weihua_Luo2;~Kaifu_Zhang2;~Lijun_Zhang1", "aff": "Nanjing University;;Alibaba Group;Alibaba Group;;Alibaba International Digital Commerce Group;Alibaba Group;", "aff_domain": "nju.edu.cn;;alibaba-inc.com;alibaba-inc.com;;alibaba-inc.com;alibaba-inc.com;", "position": "MS student;;Researcher;Researcher;;Researcher;vice president;", "bibtex": "@inproceedings{\nchen2024advancing,\ntitle={Advancing Tool-Augmented Large Language Models: Integrating Insights from Errors in Inference Trees},\nauthor={Sijia Chen and Yibo Wang and Yi-Feng Wu and Qing-Guo Chen and Zhao Xu and Weihua Luo and Kaifu Zhang and Lijun Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ZIpdu0cHYu}\n}", "github": "", "reviewers": "9yS8;jBLW;d56d;z4Vk", "pdf_size": 1555083, "rating": "5;7;7;7", "confidence": "5;4;4;3", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "2;2;3;4", "wc_summary": "96;56;115;65", "wc_strengths": "36;51;75;73", "wc_weaknesses": "199;48;339;49", "wc_questions": "72;125;195;33", "wc_limitations": "5;1;2;35", "wc_review": "408;281;726;255", "wc_reply_reviewers": "184;30;164;19", "wc_reply_authors": "100;29;27;18", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 83.0, 23.695991222145572 ], "wc_strengths_avg": [ 58.75, 16.161296358893985 ], "wc_weaknesses_avg": [ 158.75, 120.851923857256 ], "wc_questions_avg": [ 106.25, 60.75925855373813 ], "wc_limitations_avg": [ 10.75, 14.077908225301087 ], "wc_review_avg": [ 417.5, 187.28387544046603 ], "wc_reply_reviewers_avg": [ 99.25, 75.18435675059008 ], "wc_reply_authors_avg": [ 43.5, 32.882366094914765 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5620361145111556752&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "nju.edu.cn;;alibaba-inc.com;alibaba-inc.com;;alibaba-inc.com;alibaba-inc.com;", "author_num": 8, "aff_unique_index": "0;1;1;1;1", "aff_unique_norm": "Nanjing University;Alibaba Group", "aff_unique_dep": ";", "aff_unique_url": "https://www.nju.edu.cn;https://www.alibaba.com", "aff_unique_abbr": "Nanjing U;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "DiffHammer: Rethinking the Robustness of Diffusion-Based Adversarial Purification", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94646", "id": "ZJ2ONmSgCS", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ZJ2ONmSgCS", "openreview": "https://openreview.net/forum?id=ZJ2ONmSgCS", "poster": "/media/PosterPDFs/NeurIPS%202024/94646.png?t=1731212372.0677674", "project": "", "author_site": "Kaibo Wang, Xiaowen Fu, Yuxuan Han, Yang Xiang", "tldr": "", "abstract": "Diffusion-based purification has demonstrated impressive robustness as an adversarial defense. However, concerns exist about whether this robustness arises from insufficient evaluation. Our research shows that EOT-based attacks face gradient dilemmas due to global gradient averaging, resulting in ineffective evaluations. Additionally, 1-evaluation underestimates resubmit risks in stochastic defenses. To address these issues, we propose an effective and efficient attack named DiffHammer. This method bypasses the gradient dilemma through selective attacks on vulnerable purifications, incorporating $N$-evaluation into loops and using gradient grafting for comprehensive and efficient evaluations. Our experiments validate that DiffHammer achieves effective results within 10-30 iterations, outperforming other methods. This calls into question the reliability of diffusion-based purification after mitigating the gradient dilemma and scrutinizing its resubmit risk.", "keywords": "adaptive adversarial attack;adversarial purification;diffusion", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Kaibo Wang;Xiaowen Fu;Yuxuan Han;Yang Xiang", "authorids": "~Kaibo_Wang2;~Xiaowen_Fu1;~Yuxuan_Han1;~Yang_Xiang3", "gender": "M;F;;", "homepage": ";;;", "dblp": ";;;", "google_scholar": ";;;", "orcid": "0009-0006-1375-8138;0000-0003-0668-1575;;", "linkedin": ";;;", "or_profile": "~Kaibo_Wang2;~Xiaowen_Fu1;~Yuxuan_Han1;~Yang_Xiang3", "aff": "Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;;", "aff_domain": "connect.ust.hk;connect.ust.hk;;", "position": "PhD student;PhD student;;", "bibtex": "@inproceedings{\nkaibo2024diffhammer,\ntitle={DiffHammer: Rethinking the Robustness of Diffusion-Based Adversarial Purification},\nauthor={Kaibo Wang and Xiaowen Fu and Yuxuan Han and Yang Xiang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ZJ2ONmSgCS}\n}", "github": "", "reviewers": "ZG3o;wBtm;knbF;HpZi", "pdf_size": 4113417, "rating": "4;5;6;6", "confidence": "3;3;5;3", "soundness": "2;3;2;3", "novelty": "2;3;3;3", "presentation": "3;3;2;2", "wc_summary": "37;89;41;64", "wc_strengths": "25;68;26;63", "wc_weaknesses": "142;178;358;186", "wc_questions": "4;2;9;4", "wc_limitations": "1;7;4;25", "wc_review": "209;344;438;342", "wc_reply_reviewers": "127;0;99;19", "wc_reply_authors": "560;0;319;12", "reply_reviewers": "1;0;2;1", "reply_authors": "3;1;3;2", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 57.75, 20.777090749188154 ], "wc_strengths_avg": [ 45.5, 20.081085628023203 ], "wc_weaknesses_avg": [ 216.0, 83.64209466530593 ], "wc_questions_avg": [ 4.75, 2.5860201081971503 ], "wc_limitations_avg": [ 9.25, 9.33742469849155 ], "wc_review_avg": [ 333.25, 81.55174737551611 ], "wc_reply_reviewers_avg": [ 61.25, 53.11485197192966 ], "wc_reply_authors_avg": [ 222.75, 232.9349426342042 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4205371661955826518&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "email": "connect.ust.hk;connect.ust.hk;;", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Hong Kong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.ust.hk", "aff_unique_abbr": "HKUST", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "OSLO: One-Shot Label-Only Membership Inference Attacks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94645", "id": "ZJBBeyEAyX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ZJBBeyEAyX", "openreview": "https://openreview.net/forum?id=ZJBBeyEAyX", "poster": "/media/PosterPDFs/NeurIPS%202024/94645.png?t=1730347221.184513", "project": "", "author_site": "Yuefeng Peng, Jaechul Roh, Subhransu Maji, Amir Houmansadr", "tldr": "", "abstract": "We introduce One-Shot Label-Only (OSLO) membership inference attacks (MIAs), which accurately infer a given sample's membership in a target model's training set with high precision using just a single query, where the target model only returns the predicted hard label. \n This is in contrast to state-of-the-art label-only attacks which require $\\sim6000$ queries, yet get attack precisions lower than OSLO's.\n OSLO leverages transfer-based black-box adversarial attacks. The core idea is that a member sample exhibits more resistance to adversarial perturbations than a non-member. We compare OSLO against state-of-the-art label-only attacks and demonstrate that, despite requiring only one query, our method significantly outperforms previous attacks in terms of precision and true positive rate (TPR) under the same false positive rates (FPR). For example, compared to previous label-only MIAs, OSLO achieves a TPR that is at least 7$\\times$ higher under a 1\\% FPR and at least 22$\\times$ higher under a 0.1\\% FPR on CIFAR100 for a ResNet18 model. We evaluated multiple defense mechanisms against OSLO.", "keywords": "membership inference attack;privacy;leakage", "primary_area": "privacy", "supplementary_material": "/attachment/144a2eb3c88dfdccc0792cc8a499f0fbd8d226dd.zip", "author": "Yuefeng Peng;Jaechul Roh;Subhransu Maji;Amir Houmansadr", "authorids": "~Yuefeng_Peng3;~Jaechul_Roh1;~Subhransu_Maji1;~Amir_Houmansadr1", "gender": ";M;M;M", "homepage": ";https://jrohsc.github.io/;https://people.cs.umass.edu/~smaji/;https://www.cs.umass.edu/~amir/", "dblp": ";;92/6598;22/1797", "google_scholar": ";knCeRjsAAAAJ;l7Qx0zAAAAAJ;https://scholar.google.com.tw/citations?user=cTTFHNwAAAAJ", "orcid": ";;0000-0002-3869-9334;", "linkedin": ";jaechul-roh-572363155/;;", "or_profile": "~Yuefeng_Peng3;~Jaechul_Roh1;~Subhransu_Maji1;~Amir_Houmansadr1", "aff": ";University of Massachusetts at Amherst;University of Massachusetts at Amherst;University of Massachusetts, Amherst", "aff_domain": ";umass.edu;cs.umass.edu;umass.edu", "position": ";PhD student;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\npeng2024oslo,\ntitle={{OSLO}: One-Shot Label-Only Membership Inference Attacks},\nauthor={Yuefeng Peng and Jaechul Roh and Subhransu Maji and Amir Houmansadr},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ZJBBeyEAyX}\n}", "github": "", "reviewers": "CtKQ;uq6v;B8Vg;Vvp4", "pdf_size": 2992048, "rating": "5;5;6;7", "confidence": "3;4;2;5", "soundness": "2;3;2;3", "novelty": "3;2;2;3", "presentation": "3;3;3;3", "wc_summary": "50;67;140;188", "wc_strengths": "17;63;27;48", "wc_weaknesses": "133;292;72;112", "wc_questions": "42;55;20;147", "wc_limitations": "13;5;13;4", "wc_review": "255;482;272;499", "wc_reply_reviewers": "67;58;121;25", "wc_reply_authors": "105;308;249;36", "reply_reviewers": "1;2;2;1", "reply_authors": "2;3;3;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 111.25, 55.73766679723865 ], "wc_strengths_avg": [ 38.75, 17.92170471802278 ], "wc_weaknesses_avg": [ 152.25, 83.60734118485051 ], "wc_questions_avg": [ 66.0, 48.40970976983853 ], "wc_limitations_avg": [ 8.75, 4.264680527307995 ], "wc_review_avg": [ 377.0, 113.81783691495811 ], "wc_reply_reviewers_avg": [ 67.75, 34.491846862700754 ], "wc_reply_authors_avg": [ 174.5, 108.84047960203041 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.40451991747794525, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6338405670469297524&as_sdt=5,28&sciodt=0,28&hl=en", "gs_version_total": 4, "email": ";umass.edu;cs.umass.edu;umass.edu", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Massachusetts Amherst", "aff_unique_dep": "", "aff_unique_url": "https://www.umass.edu", "aff_unique_abbr": "UMass Amherst", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Amherst", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "ZJZqO4grws", "title": "Learning to Learn with Contrastive Meta-Objective", "track": "main", "status": "Reject", "tldr": "", "abstract": "We propose a contrastive meta-objective to enable meta-learners to emulate human-like rapid learning capability through enhanced alignment and discrimination. Our proposed approach, dubbed ConML, exploits task identity as additional supervision signal for meta-training, benefiting meta-learner's fast-adaptation and task-level generalization abilities. This is achieved by contrasting the outputs of meta-learner, i.e, performing contrastive learning in the model space.\nSpecifically, we introduce metrics to minimize the inner-task distance, i.e., the distance among models learned on varying data subsets of the same task, while maximizing the inter-task distance among models derived from distinct tasks. \nConML distinguishes itself through versatility and efficiency, seamlessly integrating \nwith episodic meta-training methods and the in-context learning of large language models (LLMs). \nWe apply ConML to representative meta-learning algorithms spanning optimization-, metric-, and amortization-based approaches, and show that ConML can universally and significantly improve conventional meta-learning and in-context learning.", "keywords": "meta learning;contrastive learning;few-shot learning;i n-context learning", "primary_area": "optimization_for_deep_networks", "supplementary_material": "/attachment/41e4c2bd8bfd1e87759b0729b66f6ff37ffca52d.zip", "author": "Shiguang Wu;Yatao Bian;Quanming Yao", "authorids": "~Shiguang_Wu3;~Yatao_Bian1;~Quanming_Yao3", "gender": ";M;M", "homepage": "https://ovo67.github.io/;https://lars-group.github.io/;https://yataobian.com", "dblp": "275/7661;158/1014;222/2694", "google_scholar": "JoGRPwcAAAAJ;https://scholar.google.com/schhp?hl=en;oZBTlBkAAAAJ", "orcid": ";;0000-0002-2368-4084", "linkedin": ";;", "or_profile": "~Shiguang_Wu3;~quanming_yao1;~An_Bian1", "aff": "Tsinghua University;Department of Electronic Engineering;Tencent AI Lab", "aff_domain": "mails.tsinghua.edu.cn;tsinghua.edu.cn;tencent.com", "position": "PhD student;Assistant Professor;Senior researcher ", "bibtex": "@misc{\nanonymous2024learning,\ntitle={Learning to Learn with Contrastive Meta-Objective},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=ZJZqO4grws}\n}", "github": "", "project": "", "reviewers": "6XwB;eZHU;KosZ;rrDw", "site": "https://openreview.net/forum?id=ZJZqO4grws", "pdf_size": 2109917, "rating": "3;4;4;6", "confidence": "4;4;4;5", "soundness": "2;3;2;3", "novelty": "2;2;2;3", "presentation": "2;3;1;3", "wc_summary": "82;51;83;146", "wc_strengths": "80;88;15;51", "wc_weaknesses": "247;183;132;54", "wc_questions": "4;17;164;26", "wc_limitations": "1;10;1;3", "wc_review": "414;349;395;280", "wc_reply_reviewers": "0;0;0;48", "wc_reply_authors": "0;0;565;5", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;2;2", "rating_avg": [ 4.25, 1.0897247358851685 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 90.5, 34.528973341239094 ], "wc_strengths_avg": [ 58.5, 28.64000698323937 ], "wc_weaknesses_avg": [ 154.0, 70.66470123052952 ], "wc_questions_avg": [ 52.75, 64.70461730046782 ], "wc_limitations_avg": [ 3.75, 3.6996621467371855 ], "wc_review_avg": [ 359.5, 51.62605931116571 ], "wc_reply_reviewers_avg": [ 12.0, 20.784609690826528 ], "wc_reply_authors_avg": [ 142.5, 243.93902926756104 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.9271726499455306, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Z1DibxsJ9ZwJ:scholar.google.com/&scioq=Learning+to+Learn+with+Contrastive+Meta-Objective&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2", "aff_unique_norm": "Tsinghua University;Institution Name Not Provided;Tencent", "aff_unique_dep": ";Department of Electronic Engineering;Tencent AI Lab", "aff_unique_url": "https://www.tsinghua.edu.cn;;https://ai.tencent.com", "aff_unique_abbr": "THU;;Tencent AI Lab", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China;" }, { "title": "DeTrack: In-model Latent Denoising Learning for Visual Object Tracking", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94644", "id": "ZJjuNF0olj", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ZJjuNF0olj", "openreview": "https://openreview.net/forum?id=ZJjuNF0olj", "poster": "/media/PosterPDFs/NeurIPS%202024/94644.png?t=1731051837.0796623", "project": "", "author_site": "Xinyu Zhou, Jinglun Li, Lingyi Hong, Kaixun Jiang, Pinxue Guo, Weifeng Ge, Wenqiang Zhang", "tldr": "", "abstract": "Previous visual object tracking methods employ image-feature regression models or coordinate autoregression models for bounding box prediction. Image-feature regression methods heavily depend on matching results and do not utilize positional prior, while the autoregressive approach can only be trained using bounding boxes available in the training set, potentially resulting in suboptimal performance during testing with unseen data. Inspired by the diffusion model, denoising learning enhances the model\u2019s robustness to unseen data. Therefore, We introduce noise to bounding boxes, generating noisy boxes for training, thus enhancing model robustness on testing data. We propose a new paradigm to formulate the visual object tracking problem as a denoising learning process. However, tracking algorithms are usually asked to run in real-time, directly applying the diffusion model to object tracking would severely impair tracking speed. Therefore, we decompose the denoising learning process into every denoising block within a model, not by running the model multiple times, and thus we summarize the proposed paradigm as an in-model latent denoising learning process. Specifically, we propose a denoising Vision Transformer (ViT), which is composed of multiple denoising blocks. In the denoising block, template and search embeddings are projected into every denoising block as conditions. A denoising block is responsible for removing the noise in a predicted bounding box, and multiple stacked denoising blocks cooperate to accomplish the whole denoising process. Subsequently, we\nutilize image features and trajectory information to refine the denoised bounding box. Besides, we also utilize trajectory memory and visual memory to improve tracking stability. Experimental results validate the effectiveness of our approach, achieving competitive performance on several challenging datasets. The proposed in-model latent denoising tracker achieve real-time speed, rendering denoising learning applicable in the visual object tracking community.", "keywords": "visual object tracking; denoising learning;in-model latent;", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Xinyu Zhou;Jinglun Li;Lingyi Hong;Kaixun Jiang;Pinxue Guo;Weifeng Ge;Wenqiang Zhang", "authorids": "~Xinyu_Zhou5;~Jinglun_Li1;~Lingyi_Hong1;~Kaixun_Jiang1;~Pinxue_Guo1;~Weifeng_Ge2;~Wenqiang_Zhang1", "gender": "M;M;M;M;M;M;M", "homepage": "https://www.researchgate.net/profile/Xinyu-Zhou-21;http://www.fudanroilab.com/2020/05/01/JinglunLi.html;https://lingyihongfd.github.io/;http://www.fudanroilab.com/2018/09/02/KaixunJiang.html;;http://www.weifengge.net/;https://www.fudanroilab.com/2021/07/01/WenqiangZhang.html", "dblp": ";;311/7466;334/1058;333/7534;155/3277.html;", "google_scholar": "https://scholar.google.com.hk/citations?user=Zdm-YgkAAAAJ;;wHh_m_IAAAAJ;https://scholar.google.com/citations?hl=en;d_7fUjoAAAAJ;wFs402oAAAAJ;vL-VEJYAAAAJ", "orcid": ";0009-0001-4930-6284;;;;0000-0002-6258-6225;0000-0002-3339-8751", "linkedin": ";;;;;;", "or_profile": "~Xinyu_Zhou5;~Jinglun_Li1;~Lingyi_Hong1;~Kaixun_Jiang1;~Pinxue_Guo1;~Weifeng_Ge2;~Wenqiang_Zhang1", "aff": "Fudan University;Fudan University;Fudan University;Fudan University;Fudan University;Fudan University;Fudan University", "aff_domain": "fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu;fdu.edu;fudan.edu.cn;fudan.edu.cn", "position": "PhD student;PhD student;PhD student;PhD student;PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nzhou2024detrack,\ntitle={DeTrack: In-model Latent Denoising Learning for Visual Object Tracking},\nauthor={Xinyu Zhou and Jinglun Li and Lingyi Hong and Kaixun Jiang and Pinxue Guo and Weifeng Ge and Wenqiang Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ZJjuNF0olj}\n}", "github": "", "reviewers": "b9Bg;wkvN;JcZZ", "pdf_size": 1375591, "rating": "6;7;7", "confidence": "5;5;5", "soundness": "3;3;3", "novelty": "3;4;2", "presentation": "3;2;3", "wc_summary": "78;146;77", "wc_strengths": "85;125;90", "wc_weaknesses": "307;303;135", "wc_questions": "79;317;9", "wc_limitations": "6;6;24", "wc_review": "555;897;335", "wc_reply_reviewers": "225;164;82", "wc_reply_authors": "606;1361;44", "reply_reviewers": "2;1;1", "reply_authors": "4;4;2", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 5.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 100.33333333333333, 32.293790252754306 ], "wc_strengths_avg": [ 100.0, 17.795130420052185 ], "wc_weaknesses_avg": [ 248.33333333333334, 80.15540461434206 ], "wc_questions_avg": [ 135.0, 131.82817099037166 ], "wc_limitations_avg": [ 12.0, 8.48528137423857 ], "wc_review_avg": [ 595.6666666666666, 231.23052470544533 ], "wc_reply_reviewers_avg": [ 157.0, 58.58896369340105 ], "wc_reply_authors_avg": [ 670.3333333333334, 539.5839961385891 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 3.3333333333333335, 0.9428090415820634 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Yd3XhxoYdQUJ:scholar.google.com/&scioq=DeTrack:+In-model+Latent+Denoising+Learning+for+Visual+Object+Tracking&hl=en&as_sdt=0,14", "gs_version_total": 3, "email": "fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu;fdu.edu;fudan.edu.cn;fudan.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "Fudan University", "aff_unique_dep": "", "aff_unique_url": "https://www.fudan.edu.cn", "aff_unique_abbr": "Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "MemVLT: Vision-Language Tracking with Adaptive Memory-based Prompts", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94643", "id": "ZK1CZXKgG5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ZK1CZXKgG5", "openreview": "https://openreview.net/forum?id=ZK1CZXKgG5", "poster": "/media/PosterPDFs/NeurIPS%202024/94643.png?t=1730712313.1169877", "project": "", "author_site": "Xiaokun Feng, Xuchen Li, Shiyu Hu, Dailing Zhang, wu meiqi, Jing Zhang, Xiaotang Chen, Kaiqi Huang", "tldr": "", "abstract": "Vision-language tracking (VLT) enhances traditional visual object tracking by integrating language descriptions, requiring the tracker to flexibly understand complex and diverse text in addition to visual information. However, most existing vision-language trackers still overly rely on initial fixed multimodal prompts, which struggle to provide effective guidance for dynamically changing targets. Fortunately, the Complementary Learning Systems (CLS) theory suggests that the human memory system can dynamically store and utilize multimodal perceptual information, thereby adapting to new scenarios. Inspired by this, (i) we propose a Memory-based Vision-Language Tracker (MemVLT). By incorporating memory modeling to adjust static prompts, our approach can provide adaptive prompts for tracking guidance. \n(ii) Specifically, the memory storage and memory interaction modules are designed in accordance with CLS theory. These modules facilitate the storage and flexible interaction between short-term and long-term memories, generating prompts that adapt to target variations.\n (iii) Finally, we conduct extensive experiments on mainstream VLT datasets (e.g., MGIT, TNL2K, LaSOT and LaSOT$_{ext}$). Experimental results show that MemVLT achieves new state-of-the-art performance. Impressively, it achieves 69.4% AUC on the MGIT and 63.3% AUC on the TNL2K, improving the existing best result by 8.4% and 4.7%, respectively.", "keywords": "Object Tracking; Visual-Language Multimodality; Adaptive Prompts", "primary_area": "machine_vision", "supplementary_material": "/attachment/cf64e557f489d2eefef4624e817a1229955d7ae3.zip", "author": "Xiaokun Feng;Xuchen Li;Shiyu Hu;Dailing Zhang;Meiqi Wu;Jing Zhang;Xiaotang Chen;Kaiqi Huang", "authorids": "~Xiaokun_Feng1;~Xuchen_Li1;~Shiyu_Hu1;~Dailing_Zhang2;~Meiqi_Wu2;~Jing_Zhang47;~Xiaotang_Chen1;~Kaiqi_Huang1", "gender": "M;M;F;M;;F;;M", "homepage": "https://github.com/XiaokunFeng;;https://huuuuusy.github.io/;https://github.com/zdl-hub;;;;https://people.ucas.ac.cn/~huangkaiqi?language=en", "dblp": "314/9776;232/2889;;156/8892;;;;89/7026", "google_scholar": "https://scholar.google.com.hk/citations?user=NqXtIPIAAAAJ;9zHkraUAAAAJ;49W-Rx4AAAAJ;ApH4wOcAAAAJ;;;;caQ-OmYAAAAJ", "orcid": ";0009-0009-2565-8857;0000-0002-5872-7566;;;0000-0001-8825-263X;;", "linkedin": ";;hushiyu1995/;;;;;", "or_profile": "~Xiaokun_Feng1;~Xuchen_Li1;~Shiyu_Hu1;~Dailing_Zhang2;~Meiqi_Wu2;~Jing_Zhang47;~Xiaotang_Chen1;~Kaiqi_Huang1", "aff": "Institute of automation, Chinese academy of science;Beijing University of Posts and Telecommunications;Chinese academy of science;Institute of Automation, Chinese Academy of Sciences;;Institute of Automation, Chinese Academy of Sciences;;Institute of automation, Chinese academy of science", "aff_domain": "ia.ac.cn;bupt.edu.cn;ia.ac.cn;ia.ac.cn;;ia.ac.cn;;nlpr.ia.ac.cn", "position": "PhD student;Undergrad student;PhD student;PhD student;;Engineer;;Professor", "bibtex": "@inproceedings{\nfeng2024memvlt,\ntitle={Mem{VLT}: Vision-Language Tracking with Adaptive Memory-based Prompts},\nauthor={Xiaokun Feng and Xuchen Li and Shiyu Hu and Dailing Zhang and Meiqi Wu and Jing Zhang and Xiaotang Chen and Kaiqi Huang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ZK1CZXKgG5}\n}", "github": "", "reviewers": "unPL;GRLs;BESZ;jufM", "pdf_size": 1418905, "rating": "5;5;6;7", "confidence": "5;5;4;4", "soundness": "3;3;2;3", "novelty": "3;3;3;3", "presentation": "2;4;3;3", "wc_summary": "50;101;38;44", "wc_strengths": "19;54;38;26", "wc_weaknesses": "172;199;42;180", "wc_questions": "2;23;42;15", "wc_limitations": "4;1;18;12", "wc_review": "247;378;178;277", "wc_reply_reviewers": "226;40;21;19", "wc_reply_authors": "1731;53;531;56", "reply_reviewers": "3;1;1;1", "reply_authors": "5;2;3;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 58.25, 25.043711785596 ], "wc_strengths_avg": [ 34.25, 13.273563952458284 ], "wc_weaknesses_avg": [ 148.25, 62.122359098797915 ], "wc_questions_avg": [ 20.5, 14.5 ], "wc_limitations_avg": [ 8.75, 6.684870978560469 ], "wc_review_avg": [ 270.0, 71.9478978150161 ], "wc_reply_reviewers_avg": [ 76.5, 86.70207609971055 ], "wc_reply_authors_avg": [ 592.75, 685.3569781508028 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 3.0, 1.224744871391589 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.9045340337332909, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17106036427514740223&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "ia.ac.cn;bupt.edu.cn;ia.ac.cn;ia.ac.cn;;ia.ac.cn;;nlpr.ia.ac.cn", "author_num": 8, "aff_unique_index": "0;1;0;0;0;0", "aff_unique_norm": "Chinese Academy of Sciences;Beijing University of Posts and Telecommunications", "aff_unique_dep": "Institute of Automation;", "aff_unique_url": "http://www.ia.cas.cn;http://www.bupt.edu.cn/", "aff_unique_abbr": "CAS;BUPT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Beijing", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "SDformer: Similarity-driven Discrete Transformer For Time Series Generation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94642", "id": "ZKbplMrDzI", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ZKbplMrDzI", "openreview": "https://openreview.net/forum?id=ZKbplMrDzI", "poster": "/media/PosterPDFs/NeurIPS%202024/94642.png?t=1733470687.8693497", "project": "", "author_site": "Zhicheng Chen, FENG SHIBO, Zhong Zhang, Xi Xiao, Xingyu Gao, Peilin Zhao", "tldr": "", "abstract": "The superior generation capabilities of Denoised Diffusion Probabilistic Models (DDPMs) have been effectively showcased across a multitude of domains. Recently, the application of DDPMs has extended to time series generation tasks, where they have significantly outperformed other deep generative models, often by a substantial margin. However, we have discovered two main challenges with these methods: 1) the inference time is excessively long; 2) there is potential for improvement in the quality of the generated time series. In this paper, we propose a method based on discrete token modeling technique called Similarity-driven Discrete Transformer (SDformer). Specifically, SDformer utilizes a similarity-driven vector quantization method for learning high-quality discrete token representations of time series, followed by a discrete Transformer for data distribution modeling at the token level. Comprehensive experiments show that our method significantly outperforms competing approaches in terms of the generated time series quality while also ensuring a short inference time. Furthermore, without requiring retraining, SDformer can be directly applied to predictive tasks and still achieve commendable results.", "keywords": "time series generation; generative model; discrete token modeling;", "primary_area": "other", "supplementary_material": "", "author": "Chen Zhicheng;FENG SHIBO;Zhong Zhang;Xi Xiao;Xingyu Gao;Peilin Zhao", "authorids": "~Chen_Zhicheng1;~FENG_SHIBO1;~Zhong_Zhang7;~Xi_Xiao1;~Xingyu_Gao1;~Peilin_Zhao2", "gender": "M;;M;;;M", "homepage": ";;https://www.sigs.tsinghua.edu.cn/xx_en/main.htm;https://people.ucas.ac.cn/~0066348;;", "dblp": ";;;32/4831-1.html;84/8411;", "google_scholar": "https://scholar.google.com.hk/citations?hl=zh-CN;_K0vDtwAAAAJ;;sUm6eiIAAAAJ;https://scholar.google.com.hk/citations?user=HPeX_YcAAAAJ;", "orcid": ";0009-0008-7584-057X;;0000-0002-4660-8092;0000-0001-8543-3953; 0000-0002-3254-5852", "linkedin": ";;;;;", "or_profile": "~FENG_SHIBO1;~Zhong_Zhang7;~Xi_Xiao1;~Xingyu_Gao1;~Peilin_Zhao2;~Zhicheng_Chen1", "aff": "Nanyang Technological University;Tencent AI Lab;Shenzhen International Graduate School, Tsinghua University;Chinese Academy of Sciences;Tencent;Tsinghua University", "aff_domain": "ntu.edu.sg;tencent.com;tsinghua.edu.cn;ac.cn;tencent.com;mail.tsinghua.edu.cn", "position": "PhD student;Researcher;Associate Professor;Full Professor;Researcher;MS student", "bibtex": "@inproceedings{\nzhicheng2024sdformer,\ntitle={{SD}former: Similarity-driven Discrete Transformer For Time Series Generation},\nauthor={Chen Zhicheng and FENG SHIBO and Zhong Zhang and Xi Xiao and Xingyu Gao and Peilin Zhao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ZKbplMrDzI}\n}", "github": "", "reviewers": "h9mP;2bQV;t6Rv;DHCH", "pdf_size": 9598243, "rating": "5;6;6;7", "confidence": "2;3;4;4", "soundness": "2;2;3;4", "novelty": "2;3;3;4", "presentation": "2;2;3;3", "wc_summary": "38;49;66;60", "wc_strengths": "112;42;42;95", "wc_weaknesses": "23;57;48;6", "wc_questions": "5;327;15;199", "wc_limitations": "38;6;1;8", "wc_review": "216;481;172;368", "wc_reply_reviewers": "0;99;0;53", "wc_reply_authors": "33;566;0;14", "reply_reviewers": "0;1;0;1", "reply_authors": "2;2;1;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 53.25, 10.709224995301948 ], "wc_strengths_avg": [ 72.75, 31.33189269737786 ], "wc_weaknesses_avg": [ 33.5, 20.180436070610565 ], "wc_questions_avg": [ 136.5, 134.3977306356026 ], "wc_limitations_avg": [ 13.25, 14.515078366994786 ], "wc_review_avg": [ 309.25, 122.96620470682178 ], "wc_reply_reviewers_avg": [ 38.0, 41.33400537088077 ], "wc_reply_authors_avg": [ 153.25, 238.58895091768184 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.8528028654224418, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11526460231263861543&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "email": "ntu.edu.sg;tencent.com;tsinghua.edu.cn;ac.cn;tencent.com;mail.tsinghua.edu.cn", "author_num": 6, "aff_unique_index": "0;1;2;3;1;2", "aff_unique_norm": "Nanyang Technological University;Tencent;Tsinghua University;Chinese Academy of Sciences", "aff_unique_dep": ";Tencent AI Lab;Shenzhen International Graduate School;", "aff_unique_url": "https://www.ntu.edu.sg;https://ai.tencent.com;https://www.tsinghua.edu.cn;https://www.cas.cn", "aff_unique_abbr": "NTU;Tencent AI Lab;THU;CAS", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;1;1;1;1;1", "aff_country_unique": "Singapore;China" }, { "title": "DomainGallery: Few-shot Domain-driven Image Generation by Attribute-centric Finetuning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94641", "id": "ZMmJ1z8vee", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ZMmJ1z8vee", "openreview": "https://openreview.net/forum?id=ZMmJ1z8vee", "poster": "/media/PosterPDFs/NeurIPS%202024/94641.png?t=1731218171.0312707", "project": "", "author_site": "Yuxuan Duan, Yan Hong, Bo Zhang, jun lan, Huijia Zhu, Weiqiang Wang, Jianfu Zhang, Li Niu, Liqing Zhang", "tldr": "", "abstract": "The recent progress in text-to-image models pretrained on large-scale datasets has enabled us to generate various images as long as we provide a text prompt describing what we want. Nevertheless, the availability of these models is still limited when we expect to generate images that fall into a specific domain either hard to describe or just unseen to the models. In this work, we propose DomainGallery, a few-shot domain-driven image generation method which aims at finetuning pretrained Stable Diffusion on few-shot target datasets in an attribute-centric manner. Specifically, DomainGallery features prior attribute erasure, attribute disentanglement, regularization and enhancement. These techniques are tailored to few-shot domain-driven generation in order to solve key issues that previous works have failed to settle. Extensive experiments are given to validate the superior performance of DomainGallery on a variety of domain-driven generation scenarios.", "keywords": "few-shot domain-driven image generation;model transfer;text-to-image model finetuning", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Yuxuan Duan;Yan Hong;Bo Zhang;jun lan;Huijia Zhu;Weiqiang Wang;Jianfu Zhang;Li Niu;Liqing Zhang", "authorids": "~Yuxuan_Duan2;~Yan_Hong1;~Bo_Zhang11;~jun_lan2;~Huijia_Zhu1;~Weiqiang_Wang4;~Jianfu_Zhang2;~Li_Niu2;~Liqing_Zhang2", "gender": ";F;M;M;F;M;M;M;M", "homepage": ";https://github.com/hy-zpg;https://bo-zhang-cs.github.io/;https://scholar.google.com/citations?hl=zh-CN&user=nB_ntVkAAAAJ;https://scholar.google.com/citations?hl=zh-CN&user=DT-cyucAAAAJ;https://www.linkedin.com/in/weiqiang-wang-489b925/;https://matt-sjtu.github.io/;http://www.ustcnewly.com;http://bcmi.sjtu.edu.cn/~zhangliqing/", "dblp": ";68/974-2.html;36/2259-75;218/0185;50/7121;;78/3993-3;02/3166-2;20/4627-1.html", "google_scholar": ";https://scholar.google.com.hk/citations?user=ztq5-xcAAAAJ;Yb9AS0cAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=zh-CN;;https://scholar.google.com/citations?hl=en;OhT3AWMAAAAJ;1smFmxAAAAAJ", "orcid": ";0000-0001-6401-0812;0000-0001-6577-7484;0000-0003-0921-0613;0009-0008-5784-7225;0000-0002-6159-619X;0000-0002-2673-5860;;", "linkedin": ";;%E5%8D%9A-%E5%BC%A0-715a85212/;;;weiqiang-wang-489b925/;;;", "or_profile": "~Yuxuan_Duan2;~Yan_Hong1;~Bo_Zhang11;~jun_lan2;~Huijia_Zhu1;~Weiqiang_Wang4;~Jianfu_Zhang2;~Li_Niu2;~Liqing_Zhang2", "aff": ";Alibaba Group;Shanghai Jiaotong University;AntGroup;Ant Group;Ant Group;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": ";antgroup.com;sjtu.edu.cn;antgroup.com;antgroup.com;antgroup.com;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "position": ";Researcher;PhD student;Researcher;Researcher;Researcher;Assistant Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nduan2024domaingallery,\ntitle={DomainGallery: Few-shot Domain-driven Image Generation by Attribute-centric Finetuning},\nauthor={Yuxuan Duan and Yan Hong and Bo Zhang and jun lan and Huijia Zhu and Weiqiang Wang and Jianfu Zhang and Li Niu and Liqing Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ZMmJ1z8vee}\n}", "github": "", "reviewers": "btQa;Ue4P;N3qa;2oCE", "pdf_size": 25058310, "rating": "5;5;6;6", "confidence": "4;3;5;4", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "3;3;4;3", "wc_summary": "30;76;26;105", "wc_strengths": "17;25;59;102", "wc_weaknesses": "110;137;99;100", "wc_questions": "4;7;79;153", "wc_limitations": "9;3;39;10", "wc_review": "170;248;302;470", "wc_reply_reviewers": "72;0;57;18", "wc_reply_authors": "34;0;47;40", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;2;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 59.25, 32.91940916845258 ], "wc_strengths_avg": [ 50.75, 33.52890543993347 ], "wc_weaknesses_avg": [ 111.5, 15.337861650177967 ], "wc_questions_avg": [ 60.75, 61.14071883777619 ], "wc_limitations_avg": [ 15.25, 13.970952007647869 ], "wc_review_avg": [ 297.5, 110.09427778045506 ], "wc_reply_reviewers_avg": [ 36.75, 28.960101864461734 ], "wc_reply_authors_avg": [ 30.25, 18.06066167115701 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.7071067811865475, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:RSzCnqi__asJ:scholar.google.com/&scioq=DomainGallery:+Few-shot+Domain-driven+Image+Generation+by+Attribute-centric+Finetuning&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": ";antgroup.com;sjtu.edu.cn;antgroup.com;antgroup.com;antgroup.com;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "author_num": 9, "aff_unique_index": "0;1;2;2;2;1;1;1", "aff_unique_norm": "Alibaba Group;Shanghai Jiao Tong University;Ant Group", "aff_unique_dep": ";;", "aff_unique_url": "https://www.alibaba.com;https://www.sjtu.edu.cn;https://www.antgroup.com", "aff_unique_abbr": "Alibaba;SJTU;AntGroup", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "VERIFIED: A Video Corpus Moment Retrieval Benchmark for Fine-Grained Video Understanding", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97632", "id": "ZMn2SPUgkU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ZMn2SPUgkU", "openreview": "https://openreview.net/forum?id=ZMn2SPUgkU", "poster": "/media/PosterPDFs/NeurIPS%202024/97632.png?t=1731224910.672372", "project": "", "author_site": "Houlun Chen, Xin Wang, Hong Chen, Zeyang Zhang, Wei Feng, Bin Huang, Jia Jia, Wenwu Zhu", "tldr": "", "abstract": "Existing Video Corpus Moment Retrieval (VCMR) is limited to coarse-grained understanding that hinders precise video moment localization when given fine-grained queries. In this paper, we propose a more challenging fine-grained VCMR benchmark requiring methods to localize the best-matched moment from the corpus with other partially matched candidates. To improve the dataset construction efficiency and guarantee high-quality data annotations, we propose VERIFIED, an automatic \\underline{V}id\\underline{E}o-text annotation pipeline to generate captions with \\underline{R}el\\underline{I}able \\underline{FI}n\\underline{E}-grained statics and \\underline{D}ynamics. Specifically, we resort to large language models (LLM) and large multimodal models (LMM) with our proposed Statics and Dynamics Enhanced Captioning modules to generate diverse fine-grained captions for each video. To filter out the inaccurate annotations caused by the LLM hallucination, we propose a Fine-Granularity Aware Noise Evaluator where we fine-tune a video foundation model with disturbed hard-negatives augmented contrastive and matching losses. With VERIFIED, we construct a more challenging fine-grained VCMR benchmark containing Charades-FIG, DiDeMo-FIG, and ActivityNet-FIG which demonstrate a high level of annotation quality. We evaluate several state-of-the-art VCMR models on the proposed dataset, revealing that there is still significant scope for fine-grained video understanding in VCMR.", "keywords": "Fine-Grained Video Understanding;Video Corpus Moment Retrieval;Dynamics", "primary_area": "", "supplementary_material": "/attachment/d010312e58e11dd55f60515e860632a76a93a8df.pdf", "author": "Houlun Chen;Xin Wang;Hong Chen;Zeyang Zhang;Wei Feng;Bin Huang;Jia Jia;Wenwu Zhu", "authorids": "~Houlun_Chen1;~Xin_Wang17;~Hong_Chen9;~Zeyang_Zhang1;~Wei_Feng11;~Bin_Huang4;~Jia_Jia1;~Wenwu_Zhu1", "gender": ";M;M;;M;F;M;M", "homepage": ";http://mn.cs.tsinghua.edu.cn/xinwang/;https://forchchch.github.io/;https://zzythu.com;http://;https://hcsi.cs.tsinghua.edu.cn/;http://media.cs.tsinghua.edu.cn/en/zww;https://github.com/VONWEI13", "dblp": ";10/5630-19;52/4150-11;236/0242;;71/2992-1.html;97/6308-1.html;17/1152-1", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;YPOBHYUAAAAJ;;w_njVcAAAAAJ;;RYhh3FsAAAAJ;https://scholar.google.com.tw/citations?user=7t2jzpgAAAAJ;", "orcid": "0009-0006-7749-7328;0000-0002-0351-2939;0000-0002-0943-2286;0000-0003-1329-1313;;;0000-0003-2236-9290;0009-0002-4796-4205", "linkedin": ";;;zeyang-zhang-a7a039159;;;;", "or_profile": "~Houlun_Chen1;~Xin_Wang17;~Hong_Chen9;~Zeyang_Zhang1;~Bin_Huang4;~Jia_Jia1;~Wenwu_Zhu1;~Feng_Wei4", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "mails.tsinghua.edu.cn;cs.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;Associate Professor;PhD student;PhD student;PhD student;Full Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\nchen2024verified,\ntitle={{VERIFIED}: A Video Corpus Moment Retrieval Benchmark for Fine-Grained Video Understanding},\nauthor={Houlun Chen and Xin Wang and Hong Chen and Zeyang Zhang and Wei Feng and Bin Huang and Jia Jia and Wenwu Zhu},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=ZMn2SPUgkU}\n}", "github": "", "reviewers": "G9CT;r7iW;i5C5;uTUV", "pdf_size": 5832780, "rating": "4;7;7;8", "confidence": "4;3;3;3", "wc_summary_and_contributions": "153;48;63;58", "wc_strengths": "66;4;106;94", "wc_improvement": "50;4;2;112", "wc_limitations": "4;9;1;15", "wc_correctness": "1;1;1;1", "wc_clarity": "1;1;1;18", "wc_relation_to_prior_work": "1;1;1;1", "wc_documentation": "1;1;1;13", "wc_additional_feedback": "1;1;1;1", "wc_review": "278;70;177;313", "wc_reply_reviewers": "0;20;0;67", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;0;1", "reply_authors": "2;3;3;1", "rating_avg": [ 6.5, 1.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 80.5, 42.20485754033533 ], "wc_strengths_avg": [ 67.5, 39.430318284284745 ], "wc_improvement_avg": [ 42.0, 44.74371464239419 ], "wc_limitations_avg": [ 7.25, 5.3091901453988255 ], "wc_correctness_avg": [ 1.0, 0.0 ], "wc_clarity_avg": [ 5.25, 7.361215932167728 ], "wc_relation_to_prior_work_avg": [ 1.0, 0.0 ], "wc_documentation_avg": [ 4.0, 5.196152422706632 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 209.5, 94.76418099683023 ], "wc_reply_reviewers_avg": [ 21.75, 27.371289702898547 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.9622504486493763, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8566891925133889889&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 5, "email": "mails.tsinghua.edu.cn;cs.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 8, "aff_unique_index": "0;0;0;0;0;0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Compositional PAC-Bayes: Generalization of GNNs with persistence and beyond", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94640", "id": "ZNcJtNN3e8", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ZNcJtNN3e8", "openreview": "https://openreview.net/forum?id=ZNcJtNN3e8", "poster": "/media/PosterPDFs/NeurIPS%202024/94640.png?t=1733687435.269631", "project": "", "author_site": "Kirill Brilliantov, Amauri Souza, Vikas Garg", "tldr": "", "abstract": "Heterogeneity, e.g., due to different types of layers or multiple sub-models, poses key challenges in analyzing the generalization behavior of several modern architectures. For instance, descriptors based on Persistent Homology (PH) are being increasingly integrated into Graph Neural Networks (GNNs) to augment them with rich topological features; however, the generalization of such PH schemes remains unexplored. We introduce a novel _compositional_ PAC-Bayes framework that provides a general recipe to analyze a broad spectrum of models including those with heterogeneous layers. Specifically, we provide the first data-dependent generalization bounds for a widely adopted PH vectorization scheme (that subsumes persistence landscapes, images, and silhouettes) as well as PH-augmented GNNs. Using our framework, we also obtain bounds for GNNs and neural nets with ease. Our bounds also inform the design of novel regularizers. Empirical evaluations on several standard real-world datasets demonstrate that our theoretical bounds highly correlate with empirical generalization performance, leading to improved classifier design via our regularizers. Overall, this work bridges a crucial gap in the theoretical understanding of PH methods and general heterogeneous models, paving the way for the design of better models for (graph) representation learning. \nOur code is available at https://github.com/Aalto-QuML/Compositional-PAC-Bayes.", "keywords": "generalization;tda;GNN;PAC-Bayes", "primary_area": "learning_theory", "supplementary_material": "", "author": "Kirill Brilliantov;Amauri H Souza;Vikas Garg", "authorids": "~Kirill_Brilliantov1;~Amauri_H_Souza1;~Vikas_Garg2", "gender": "M;M;", "homepage": "https://github.com/kibrq;http://www.amauriholanda.org;", "dblp": "350/5533;131/3352;", "google_scholar": "thgwrhYAAAAJ;lP0LBI4AAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Kirill_Brilliantov1;~Amauri_H_Souza1;~Vikas_Garg2", "aff": "ETHZ - ETH Zurich;Federal Institute of Cear\u00e1;", "aff_domain": "ethz.ch;ifce.edu.br;", "position": "MS student;Associate Professor;", "bibtex": "@inproceedings{\nbrilliantov2024compositional,\ntitle={Compositional {PAC}-Bayes: Generalization of {GNN}s with persistence and beyond},\nauthor={Kirill Brilliantov and Amauri H Souza and Vikas Garg},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ZNcJtNN3e8}\n}", "github": "", "reviewers": "ynbU;Hv3h;zp7n;toK8", "pdf_size": 777803, "rating": "4;6;6;7", "confidence": "3;2;3;2", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;4;4", "wc_summary": "37;47;66;159", "wc_strengths": "48;57;117;30", "wc_weaknesses": "85;37;456;21", "wc_questions": "29;52;68;25", "wc_limitations": "6;1;30;110", "wc_review": "205;194;737;345", "wc_reply_reviewers": "0;22;66;12", "wc_reply_authors": "131;27;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 2.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 77.25, 48.334123556758534 ], "wc_strengths_avg": [ 63.0, 32.657311585615865 ], "wc_weaknesses_avg": [ 149.75, 178.3751313944856 ], "wc_questions_avg": [ 43.5, 17.5 ], "wc_limitations_avg": [ 36.75, 43.68852824254898 ], "wc_review_avg": [ 370.25, 219.95155716657248 ], "wc_reply_reviewers_avg": [ 25.0, 24.919871588754223 ], "wc_reply_authors_avg": [ 39.5, 53.96526660732809 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.6882472016116854, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17124939299529724904&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": "ethz.ch;ifce.edu.br;", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "ETH Zurich;Federal Institute of Cear\u00e1", "aff_unique_dep": ";", "aff_unique_url": "https://www.ethz.ch;http://www.ifce.edu.br", "aff_unique_abbr": "ETHZ;IFCE", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Switzerland;Brazil" }, { "title": "Zero-shot Generalizable Incremental Learning for Vision-Language Object Detection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94639", "id": "ZNqHm0a35E", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ZNqHm0a35E", "openreview": "https://openreview.net/forum?id=ZNqHm0a35E", "poster": "/media/PosterPDFs/NeurIPS%202024/94639.png?t=1731331557.125892", "project": "", "author_site": "Jieren Deng, Haojian Zhang, Kun Ding, Jianhua Hu, Xingxuan Zhang, Yunkuan Wang", "tldr": "", "abstract": "This paper presents Incremental Vision-Language Object Detection (IVLOD), a novel learning task designed to incrementally adapt pre-trained Vision-Language Object Detection Models (VLODMs) to various specialized domains, while simultaneously preserving their zero-shot generalization capabilities for the generalized domain. To address this new challenge, we present the Zero-interference Reparameterizable Adaptation (ZiRa), a novel method that introduces Zero-interference Loss and reparameterization techniques to tackle IVLOD without incurring a significant increase in memory usage. Comprehensive experiments on COCO and ODinW-13 datasets demonstrate that ZiRa effectively safeguards the zero-shot generalization ability of VLODMs while continuously adapting to new tasks. Specifically, after training on ODinW-13 datasets, ZiRa exhibits superior performance compared to CL-DETR and iDETR, boosting zero-shot generalizability by substantial $\\textbf{13.91}$ and $\\textbf{8.74}$ AP, respectively. Our code is available at https://github.com/JarintotionDin/ZiRaGroundingDINO.", "keywords": "Vision-language object detection;incremental learning", "primary_area": "machine_vision", "supplementary_material": "", "author": "Jieren Deng;Haojian Zhang;Kun Ding;Jianhua Hu;Xingxuan Zhang;Yunkuan Wang", "authorids": "~Jieren_Deng2;~Haojian_Zhang1;~Kun_Ding2;~Jianhua_Hu2;~Xingxuan_Zhang2;~Yunkuan_Wang1", "gender": "M;M;M;M;M;M", "homepage": "https://jarintotiondin.github.io/;;;http://www.ia.cas.cn/sourcedb_ia_cas/cn/iaexpert/202008/t20200805_5649727.html;;https://people.ucas.ac.cn/~wangyunkuan", "dblp": "274/1449;;;53/2957;226/2478;43/2005", "google_scholar": "mC2TOZ4AAAAJ;;https://scholar.google.com.hk/citations?hl=zh-CN;;;", "orcid": "0000-0002-5738-0927;0000-0001-8447-0269;;;0000-0002-7723-9533;", "linkedin": ";;;;;", "or_profile": "~Jieren_Deng2;~Haojian_Zhang1;~Kun_Ding2;~Jianhua_Hu2;~Xingxuan_Zhang2;~Yunkuan_Wang1", "aff": "Chinese academy of science;Institute of Automation, Chinese Academy of Sciences;Institute of automation;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;Institute of automation, Chinese academy of science", "aff_domain": "ia.ac.cn;ia.ac.cn;ia.ac.cn;ia.ac.cn;ia.ac.cn;ia.ac.cn", "position": "PhD student;Associate Professor;Assistant Professor;Associate Professor;PhD student;Full Professor", "bibtex": "@inproceedings{\ndeng2024zeroshot,\ntitle={Zero-shot Generalizable Incremental Learning for Vision-Language Object Detection},\nauthor={Jieren Deng and Haojian Zhang and Kun Ding and Jianhua Hu and Xingxuan Zhang and Yunkuan Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ZNqHm0a35E}\n}", "github": "", "reviewers": "NuJX;8RRL;jMVp;BCvk", "pdf_size": 8679265, "rating": "5;6;6;7", "confidence": "4;5;4;5", "soundness": "2;2;3;3", "novelty": "2;2;3;3", "presentation": "2;2;2;3", "wc_summary": "113;66;69;80", "wc_strengths": "133;22;134;49", "wc_weaknesses": "331;101;120;173", "wc_questions": "8;380;225;101", "wc_limitations": "1;4;42;6", "wc_review": "586;573;590;409", "wc_reply_reviewers": "101;13;56;185", "wc_reply_authors": "403;0;159;67", "reply_reviewers": "2;1;1;1", "reply_authors": "2;1;3;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 82.0, 18.641351882307248 ], "wc_strengths_avg": [ 84.5, 49.92243984422236 ], "wc_weaknesses_avg": [ 181.25, 90.3946209682855 ], "wc_questions_avg": [ 178.5, 139.5 ], "wc_limitations_avg": [ 13.25, 16.69393602479655 ], "wc_review_avg": [ 539.5, 75.60588601425157 ], "wc_reply_reviewers_avg": [ 88.75, 63.68820534447489 ], "wc_reply_authors_avg": [ 157.25, 152.69966437422187 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.7071067811865476, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17052140831576619204&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "ia.ac.cn;ia.ac.cn;ia.ac.cn;ia.ac.cn;ia.ac.cn;ia.ac.cn", "author_num": 6, "aff_unique_index": "0;0;1;0;0;0", "aff_unique_norm": "Chinese Academy of Sciences;Institute of Automation", "aff_unique_dep": ";", "aff_unique_url": "http://www.cas.cn;", "aff_unique_abbr": "CAS;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China;" }, { "title": "User-item fairness tradeoffs in recommendations", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94638", "id": "ZOZjMs3JTs", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ZOZjMs3JTs", "openreview": "https://openreview.net/forum?id=ZOZjMs3JTs", "poster": "", "project": "", "author_site": "Sophie Greenwood, Sudalakshmee Chiniah, Nikhil Garg", "tldr": "", "abstract": "In the basic recommendation paradigm, the most (predicted) relevant item is recommended to each user. This may result in some items receiving lower exposure than they \"should\"; to counter this, several algorithmic approaches have been developed to ensure *item fairness*. These approaches necessarily degrade recommendations for some users to improve outcomes for items, leading to *user fairness* concerns. In turn, a recent line of work has focused on developing algorithms for multi-sided fairness, to jointly optimize user fairness, item fairness, and overall recommendation quality. This induces the question: *what is the tradeoff between these objectives, and what are the characteristics of (multi-objective) optimal solutions?* Theoretically, we develop a model of recommendations with user and item fairness objectives and characterize the solutions of fairness-constrained optimization. We identify two phenomena: (a) when user preferences are diverse, there is \"free\" item and user fairness; and (b) users whose preferences are misestimated can be *especially* disadvantaged by item fairness constraints. Empirically, we prototype a recommendation system for preprints on arXiv and implement our framework, measuring the phenomena in practice and showing how these phenomena inform the *design* of markets with recommendation systems-intermediated matching.", "keywords": "recommendation systems;algorithmic fairness", "primary_area": "fairness", "supplementary_material": "", "author": "Sophie Greenwood;Sudalakshmee Chiniah;Nikhil Garg", "authorids": "~Sophie_Greenwood1;~Sudalakshmee_Chiniah1;~Nikhil_Garg2", "gender": ";F;", "homepage": ";;https://gargnikhil.com/", "dblp": ";;83/6058-1", "google_scholar": ";;8qSK3noAAAAJ", "orcid": ";;0000-0002-1988-792X", "linkedin": ";sudalakshmee-chiniah/;", "or_profile": "~Sophie_Greenwood1;~Sudalakshmee_Chiniah1;~Nikhil_Garg2", "aff": ";Cornell University;Cornell University", "aff_domain": ";cornell.edu;cornell.edu", "position": ";MS student;Assistant Professor", "bibtex": "@inproceedings{\ngreenwood2024useritem,\ntitle={User-item fairness tradeoffs in recommendations},\nauthor={Sophie Greenwood and Sudalakshmee Chiniah and Nikhil Garg},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ZOZjMs3JTs}\n}", "github": "", "reviewers": "t7QC;3QdJ;rEVK;yH5N", "pdf_size": 1537541, "rating": "3;6;7;7", "confidence": "4;3;3;3", "soundness": "2;3;3;3", "novelty": "1;3;3;4", "presentation": "2;2;3;4", "wc_summary": "82;123;68;51", "wc_strengths": "32;40;87;20", "wc_weaknesses": "432;54;93;28", "wc_questions": "2;149;56;1", "wc_limitations": "2;59;1;1", "wc_review": "550;425;305;101", "wc_reply_reviewers": "0;12;23;5", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 1.6393596310755 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 1.0897247358851685 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 81.0, 26.61766330841233 ], "wc_strengths_avg": [ 44.75, 25.410381736605217 ], "wc_weaknesses_avg": [ 151.75, 163.4478127721506 ], "wc_questions_avg": [ 52.0, 60.261928279802 ], "wc_limitations_avg": [ 15.75, 24.973736204260668 ], "wc_review_avg": [ 345.25, 165.49981117814002 ], "wc_reply_reviewers_avg": [ 10.0, 8.631338250816034 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.9684959969581861, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9451682419841802857&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": ";cornell.edu;cornell.edu", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Cornell University", "aff_unique_dep": "", "aff_unique_url": "https://www.cornell.edu", "aff_unique_abbr": "Cornell", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "A Benchmark Dataset for Event-Guided Human Pose Estimation and Tracking in Extreme Conditions", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97631", "id": "ZQy6dGlBay", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ZQy6dGlBay", "openreview": "https://openreview.net/forum?id=ZQy6dGlBay", "poster": "/media/PosterPDFs/NeurIPS%202024/97631.png?t=1729839079.5846946", "project": "", "author_site": "Hoonhee Cho, Taewoo Kim, Yuhwan Jeong, Kuk-Jin Yoon", "tldr": "", "abstract": "Multi-person pose estimation and tracking have been actively researched by the computer vision community due to their practical applicability. However, existing human pose estimation and tracking datasets have only been successful in typical scenarios, such as those without motion blur or with well-lit conditions. These RGB-based datasets are limited to learning under extreme motion blur situations or poor lighting conditions, making them inherently vulnerable to such scenarios.\nAs a promising solution, bio-inspired event cameras exhibit robustness in extreme scenarios due to their high dynamic range and micro-second level temporal resolution. Therefore, in this paper, we introduce a new hybrid dataset encompassing both RGB and event data for human pose estimation and tracking in two extreme scenarios: low-light and motion blur environments. The proposed Event-guided Human Pose Estimation and Tracking in eXtreme Conditions (EHPT-XC) dataset covers cases of motion blur caused by dynamic objects and low-light conditions individually as well as both simultaneously. With EHPT-XC, we aim to inspire researchers to tackle pose estimation and tracking in extreme conditions by leveraging the advantageous of the event camera. Project pages are available at https://github.com/Chohoonhee/EHPT-XC.", "keywords": "human pose estimation;event cameras;multi-modal dataset", "primary_area": "", "supplementary_material": "/attachment/36f59cdfe5162124a076eb000a8df61daf462d15.pdf", "author": "Hoonhee Cho;Taewoo Kim;Yuhwan Jeong;Kuk-Jin Yoon", "authorids": "~Hoonhee_Cho1;~Taewoo_Kim2;~Yuhwan_Jeong1;~Kuk-Jin_Yoon1", "gender": ";M;M;M", "homepage": ";;http://vi.kaist.ac.kr/project/yu-hwan-jeong/;", "dblp": "323/9541;16/2599-3;366/2219;42/5677", "google_scholar": "https://scholar.google.co.kr/citations?hl=ko;SzKw5oYAAAAJ;12QxXIsAAAAJ;1NvBj_gAAAAJ", "orcid": "0000-0003-0896-6793;0000-0002-8608-9514;0009-0002-0279-146X;", "linkedin": ";taewookim-a85270168/;;", "or_profile": "~Hoonhee_Cho1;~Taewoo_Kim2;~Yuhwan_Jeong1;~Kuk-Jin_Yoon1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "position": "PhD student;PhD student;MS student;Associate Professor", "bibtex": "@inproceedings{\ncho2024a,\ntitle={A Benchmark Dataset for Event-Guided Human Pose Estimation and Tracking in Extreme Conditions},\nauthor={Hoonhee Cho and Taewoo Kim and Yuhwan Jeong and Kuk-Jin Yoon},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=ZQy6dGlBay}\n}", "github": "", "reviewers": "bcFz;TQDy;Ee7p;eW5U", "pdf_size": 1834821, "rating": "4;6;7;8", "confidence": "4;3;3;5", "wc_summary_and_contributions": "75;106;46;63", "wc_strengths": "42;2;41;52", "wc_improvement": "93;2;8;9", "wc_limitations": "1;4;1;51", "wc_correctness": "1;19;1;12", "wc_clarity": "1;5;1;5", "wc_relation_to_prior_work": "1;8;1;87", "wc_documentation": "1;36;9;4", "wc_additional_feedback": "1;1;1;1", "wc_review": "216;183;109;284", "wc_reply_reviewers": "0;206;44;39", "wc_reply_authors": "943;1230;1082;3159", "reply_reviewers": "0;2;1;1", "reply_authors": "4;6;5;8", "rating_avg": [ 6.25, 1.479019945774904 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "wc_summary_and_contributions_avg": [ 72.5, 21.914607000811127 ], "wc_strengths_avg": [ 34.25, 19.109879643786353 ], "wc_improvement_avg": [ 28.0, 37.62313118282422 ], "wc_limitations_avg": [ 14.25, 21.25294097295713 ], "wc_correctness_avg": [ 8.25, 7.660776723022281 ], "wc_clarity_avg": [ 3.0, 2.0 ], "wc_relation_to_prior_work_avg": [ 24.25, 36.34126442489309 ], "wc_documentation_avg": [ 12.5, 13.865424623862047 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 198.0, 62.98015560476173 ], "wc_reply_reviewers_avg": [ 72.25, 79.07709845460947 ], "wc_reply_authors_avg": [ 1603.5, 903.7844046010088 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 5.75, 1.479019945774904 ], "replies_avg": [ 33, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.25482359571881275, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17296411532873722346&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "WFCRL: A Multi-Agent Reinforcement Learning Benchmark for Wind Farm Control", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97630", "id": "ZRMAhpZ3ED", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ZRMAhpZ3ED", "openreview": "https://openreview.net/forum?id=ZRMAhpZ3ED", "poster": "", "project": "", "author_site": "Claire Bizon Monroc, Ana Busic, Donatien Dubuc, Jiamin Zhu", "tldr": "", "abstract": "The wind farm control problem is challenging, since conventional model-based control strategies require tractable models of complex aerodynamical interactions between the turbines and suffer from the curse of dimension when the number of turbines increases. Recently, model-free and multi-agent reinforcement learning approaches have been used to address this challenge. In this article, we introduce WFCRL (Wind Farm Control with Reinforcement Learning), the first suite of multi-agent reinforcement learning environments for the wind farm control problem. WFCRL frames a cooperative Multi-Agent Reinforcement Learning (MARL) problem: each turbine is an agent and can learn to adjust its yaw, pitch or torque to maximize the common objective (e.g. the total power production of the farm). WFCRL also offers turbine load observations that will allow to optimize the farm performance while limiting turbine structural damages. Interfaces with two state-of-the-art farm simulators are implemented in WFCRL: a static simulator (Floris) and a dynamic simulator (FAST.farm). For each simulator, $10$ wind layouts are provided, including $5$ real wind farms. Two state-of-the-art online MARL algorithms are implemented to illustrate the scaling challenges. As learning online on FAST.Farm is highly time-consuming, WFCRL offers the possibility of designing transfer learning strategies from Floris to FAST.Farm.", "keywords": "Multi-agent;Reinforcement learning;Benchmark;Environment;Open-source", "primary_area": "", "supplementary_material": "", "author": "Claire Bizon Monroc;Ana Busic;Donatien Dubuc;Jiamin Zhu", "authorids": "~Claire_Bizon_Monroc1;~Ana_Busic1;~Donatien_Dubuc1;~Jiamin_Zhu1", "gender": ";F;;", "homepage": ";;;", "dblp": ";57/3580;;", "google_scholar": "p8Z2qLUAAAAJ;https://scholar.google.fr/citations?user=u-RXvmAAAAAJ;AP5hW2MAAAAJ;", "orcid": ";;0000-0002-8300-1353;", "linkedin": ";;;jiamin-zhu-97606826/", "or_profile": "~Claire_Bizon_Monroc1;~Ana_Busic1;~Donatien_Dubuc1;~Jiamin_ZHU2", "aff": "INRIA;Ecole Normale Sup\u00e9rieure;IFP Energies Nouvelles;IFP Energies nouvelles", "aff_domain": "inria.fr;di.ens.fr;ifp.fr;ifpen.fr", "position": "PhD student;Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\nmonroc2024wfcrl,\ntitle={{WFCRL}: A Multi-Agent Reinforcement Learning Benchmark for Wind Farm Control},\nauthor={Claire Bizon Monroc and Ana Busic and Donatien Dubuc and Jiamin Zhu},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=ZRMAhpZ3ED}\n}", "github": "", "reviewers": "i4kn;Sh7C;MFL8", "pdf_size": 4824836, "rating": "6;6;8", "confidence": "5;4;5", "wc_summary_and_contributions": "70;121;29", "wc_strengths": "43;174;37", "wc_improvement": "126;238;50", "wc_limitations": "1;80;19", "wc_correctness": "1;49;4", "wc_clarity": "1;49;22", "wc_relation_to_prior_work": "1;14;7", "wc_documentation": "1;8;1", "wc_additional_feedback": "1;1;1", "wc_review": "245;734;170", "wc_reply_reviewers": "0;12;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;1;0", "reply_authors": "1;1;1", "rating_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 73.33333333333333, 37.63272807307786 ], "wc_strengths_avg": [ 84.66666666666667, 63.21568019267231 ], "wc_improvement_avg": [ 138.0, 77.21830525637472 ], "wc_limitations_avg": [ 33.333333333333336, 33.80663971602159 ], "wc_correctness_avg": [ 18.0, 21.95449840010015 ], "wc_clarity_avg": [ 24.0, 19.6468827043885 ], "wc_relation_to_prior_work_avg": [ 7.333333333333333, 5.312459150169743 ], "wc_documentation_avg": [ 3.3333333333333335, 3.2998316455372216 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 383.0, 250.07598845151048 ], "wc_reply_reviewers_avg": [ 4.0, 5.656854249492381 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:QgvtAredpdYJ:scholar.google.com/&scioq=WFCRL:+A+Multi-Agent+Reinforcement+Learning+Benchmark+for+Wind+Farm+Control&hl=en&as_sdt=0,23", "gs_version_total": 4, "email": "inria.fr;di.ens.fr;ifp.fr;ifpen.fr", "author_num": 4, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "INRIA;Ecole Normale Sup\u00e9rieure;IFP Energies Nouvelles", "aff_unique_dep": ";;", "aff_unique_url": "https://www.inria.fr;https://www.ens.fr;https://www.ifpenergiesnouvelles.com", "aff_unique_abbr": "INRIA;ENS;IFPEN", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "France" }, { "title": "Learning the Expected Core of Strictly Convex Stochastic Cooperative Games", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94637", "id": "ZRYFftR4xn", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ZRYFftR4xn", "openreview": "https://openreview.net/forum?id=ZRYFftR4xn", "poster": "/media/PosterPDFs/NeurIPS%202024/94637.png?t=1731586012.9260585", "project": "", "author_site": "Phuong Nam Tran, The Anh Ta, shuqing shi, Debmalya Mandal, Yali Du, Long Tran-Thanh", "tldr": "", "abstract": "Reward allocation, also known as the credit assignment problem, has been an important topic in economics, engineering, and machine learning. An important concept in reward allocation is the core, which is the set of stable allocations where no agent has the motivation to deviate from the grand coalition. In previous works, computing the core requires either knowledge of the reward function in deterministic games or the reward distribution in stochastic games. However, this is unrealistic, as the reward function or distribution is often only partially known and may be subject to uncertainty. In this paper, we consider the core learning problem in stochastic cooperative games, where the reward distribution is unknown. Our goal is to learn the expected core, that is, the set of allocations that are stable in expectation, given an oracle that returns a stochastic reward for an enquired coalition each round. Within the class of strictly convex games, we present an algorithm named \\texttt{Common-Points-Picking} that returns a point in the expected core given a polynomial number of samples, with high probability. To analyse the algorithm, we develop a new extension of the separation hyperplane theorem for multiple convex sets.t.", "keywords": "Cooperative game theory;convex geometry;bandit theory.", "primary_area": "algorithmic_game_theory", "supplementary_material": "/attachment/a22f66c6aa41752a961a021c57e49e23b05399af.zip", "author": "Nam Phuong Tran;The-Anh Ta;Shuqing Shi;Debmalya Mandal;Yali Du;Long Tran-Thanh", "authorids": "~Nam_Phuong_Tran1;~The-Anh_Ta1;~Shuqing_Shi1;~Debmalya_Mandal2;~Yali_Du1;~Long_Tran-Thanh1", "gender": "M;;M;M;;", "homepage": "https://namtrankekl.github.io/;;;https://debmandal.github.io;;https://warwick.ac.uk/fac/sci/dcs/people/long_tran-thanh/", "dblp": "231/9152;;314/5372;151/3685;;46/8333", "google_scholar": "8M0NXFcAAAAJ;;https://scholar.google.com.au/citations?hl=en;OquWQpEAAAAJ;;https://scholar.google.co.uk/citations?user=YBQai3gAAAAJ", "orcid": "0000-0003-0983-8830;;;;;", "linkedin": ";;;;;", "or_profile": "~Nam_Phuong_Tran1;~The-Anh_Ta1;~Shuqing_Shi1;~Debmalya_Mandal2;~Yali_Du1;~Long_Tran-Thanh1", "aff": "University of Warwick;;King's College London, University of London;University of Warwick;;The university of Warwick", "aff_domain": "warwick.ac.uk;;kcl.ac.uk;warwick.ac.uk;;warwick.ac.uk", "position": "PhD student;;PhD student;Assistant Professor;;Full Professor", "bibtex": "@inproceedings{\ntran2024learning,\ntitle={Learning the Expected Core of Strictly Convex Stochastic Cooperative Games},\nauthor={Nam Phuong Tran and The-Anh Ta and Shuqing Shi and Debmalya Mandal and Yali Du and Long Tran-Thanh},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ZRYFftR4xn}\n}", "github": "", "reviewers": "oJVQ;f4nV;1Z68", "pdf_size": 1150619, "rating": "5;6;7", "confidence": "3;3;3", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;3;4", "wc_summary": "56;63;215", "wc_strengths": "31;63;32", "wc_weaknesses": "90;374;44", "wc_questions": "45;81;44", "wc_limitations": "13;6;1", "wc_review": "235;587;336", "wc_reply_reviewers": "0;64;13", "wc_reply_authors": "40;69;0", "reply_reviewers": "0;1;1", "reply_authors": "2;3;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 111.33333333333333, 73.35908638713786 ], "wc_strengths_avg": [ 42.0, 14.854853303438128 ], "wc_weaknesses_avg": [ 169.33333333333334, 145.9345363129951 ], "wc_questions_avg": [ 56.666666666666664, 17.21110752456745 ], "wc_limitations_avg": [ 6.666666666666667, 4.9216076867444665 ], "wc_review_avg": [ 386.0, 147.9887383102737 ], "wc_reply_reviewers_avg": [ 25.666666666666668, 27.620443314488792 ], "wc_reply_authors_avg": [ 36.333333333333336, 28.288199345702832 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ubcgbu_VpFcJ:scholar.google.com/&scioq=Learning+the+Expected+Core+of+Strictly+Convex+Stochastic+Cooperative+Games&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "warwick.ac.uk;;kcl.ac.uk;warwick.ac.uk;;warwick.ac.uk", "author_num": 6, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Warwick;King's College London", "aff_unique_dep": ";", "aff_unique_url": "https://www.warwick.ac.uk;https://www.kcl.ac.uk", "aff_unique_abbr": "Warwick;KCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Learning to compute Gr\u00f6bner bases", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94636", "id": "ZRz7XlxBzQ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ZRz7XlxBzQ", "openreview": "https://openreview.net/forum?id=ZRz7XlxBzQ", "poster": "", "project": "", "author_site": "Hiroshi Kera, Yuki Ishihara, Yuta Kambe, Tristan Vaccon, Kazuhiro Yokoyama", "tldr": "", "abstract": "Solving a polynomial system, or computing an associated Gr\u00f6bner basis, has been a fundamental task in computational algebra. However, it is also known for its notorious doubly exponential time complexity in the number of variables in the worst case. This paper is the first to address the learning of Gr\u00f6bner basis computation with Transformers. The training requires many pairs of a polynomial system and the associated Gr\u00f6bner basis, raising two novel algebraic problems: random generation of Gr\u00f6bner bases and transforming them into non-Gr\u00f6bner ones, termed as backward Gr\u00f6bner problem. We resolve these problems with 0-dimensional radical ideals, the ideals appearing in various applications. Further, we propose a hybrid input embedding to handle coefficient tokens with continuity bias and avoid the growth of the vocabulary set. The experiments show that our dataset generation method is a few orders of magnitude faster than a naive approach, overcoming a crucial challenge in learning to compute Gr\u00f6bner bases, and Gr\u00f6bner computation is learnable in a particular class.", "keywords": "Transformer; Gr\u00f6bner bases; Computational algebra", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Hiroshi Kera;Yuki Ishihara;Yuta Kambe;Tristan Vaccon;Kazuhiro Yokoyama", "authorids": "~Hiroshi_Kera1;~Yuki_Ishihara1;~Yuta_Kambe1;~Tristan_Vaccon1;~Kazuhiro_Yokoyama1", "gender": "M;M;M;M;M", "homepage": ";;https://sites.google.com/view/yuta-kambe/;https://www.unilim.fr/pages_perso/tristan.vaccon/;", "dblp": "190/2671;179/4327.html;;;39/5838.html", "google_scholar": "https://scholar.google.co.jp/citations?user=M4Krt5gAAAAJ;;;khGu6GAAAAAJ;https://scholar.google.co.jp/citations?user=WUG44TcAAAAJ", "orcid": ";;0000-0002-8260-0186;0000-0003-4208-8349;", "linkedin": ";;;;", "or_profile": "~Hiroshi_Kera1;~Yuki_Ishihara1;~Yuta_Kambe1;~Tristan_Vaccon1;~Kazuhiro_Yokoyama1", "aff": "Chiba University;Tokyo University of Science;Mitsubishi Electric Information Technology R&D Center;Universit\u00e9 de Limoges;Rikkyo University (St. Paul's University)", "aff_domain": "chiba-u.jp;tus.ac.jp;mitsubishielectric.co.jp;unilim.fr;rikkyo.ac.jp", "position": "Assistant Professor;Assistant Professor;Postdoc;Associate Professor;Emeritus", "bibtex": "@inproceedings{\nkera2024learning,\ntitle={Learning to compute Gr\\\"obner bases},\nauthor={Hiroshi Kera and Yuki Ishihara and Yuta Kambe and Tristan Vaccon and Kazuhiro Yokoyama},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ZRz7XlxBzQ}\n}", "github": "", "reviewers": "q3D1;YTLb;BQDD;pNNy;WoAv", "pdf_size": 1488757, "rating": "3;5;5;8;8", "confidence": "4;3;3;5;2", "soundness": "2;2;3;4;3", "novelty": "2;3;3;4;3", "presentation": "1;3;3;4;3", "wc_summary": "87;97;92;135;63", "wc_strengths": "24;26;60;156;36", "wc_weaknesses": "178;184;375;44;28", "wc_questions": "83;49;33;212;57", "wc_limitations": "25;1;14;33;1", "wc_review": "397;357;574;580;185", "wc_reply_reviewers": "9;10;0;19;23", "wc_reply_authors": "71;0;0;7;0", "reply_reviewers": "1;1;0;1;1", "reply_authors": "2;1;1;2;1", "rating_avg": [ 5.8, 1.9390719429665315 ], "confidence_avg": [ 3.4, 1.019803902718557 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.8, 0.9797958971132712 ], "wc_summary_avg": [ 94.8, 23.24134247413432 ], "wc_strengths_avg": [ 60.4, 49.48373470141477 ], "wc_weaknesses_avg": [ 161.8, 124.89099246943312 ], "wc_questions_avg": [ 86.8, 64.65415686558754 ], "wc_limitations_avg": [ 14.8, 12.781236246936365 ], "wc_review_avg": [ 418.6, 147.66800601348962 ], "wc_reply_reviewers_avg": [ 12.2, 8.084553172563094 ], "wc_reply_authors_avg": [ 15.6, 27.832355272236665 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.06068350455470512, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2948111103367792428&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "chiba-u.jp;tus.ac.jp;mitsubishielectric.co.jp;unilim.fr;rikkyo.ac.jp", "author_num": 5, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "Chiba University;Tokyo University of Science;Mitsubishi Electric;Universit\u00e9 de Limoges;Rikkyo University", "aff_unique_dep": ";;Information Technology R&D Center;;", "aff_unique_url": "https://www.chiba-u.ac.jp;https://www.tus.ac.jp;https://www.mitsubishielectric.com;https://www.unilim.fr;https://www.rikkyo.ac.jp", "aff_unique_abbr": "Chiba U;TUS;MEIT;Unilim;Rikkyo", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "Japan;France" }, { "title": "Reconstructing the Image Stitching Pipeline: Integrating Fusion and Rectangling into a Unified Inpainting Model", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94635", "id": "ZViYPzh9Wq", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ZViYPzh9Wq", "openreview": "https://openreview.net/forum?id=ZViYPzh9Wq", "poster": "/media/PosterPDFs/NeurIPS%202024/94635.png?t=1729406104.7424629", "project": "", "author_site": "Ziqi Xie, Weidong Zhao, XianhuiLiu, Jian Zhao, Ning Jia", "tldr": "", "abstract": "Deep learning-based image stitching pipelines are typically divided into three cascading stages: registration, fusion, and rectangling. Each stage requires its own network training and is tightly coupled to the others, leading to error propagation and posing significant challenges to parameter tuning and system stability. This paper proposes the Simple and Robust Stitcher (SRStitcher), which \nrevolutionizes the image stitching pipeline by simplifying the fusion and rectangling stages into a unified inpainting model, requiring no model training or fine-tuning. We reformulate the problem definitions of the fusion and rectangling stages and demonstrate that they can be effectively integrated into an inpainting task. Furthermore, we design the weighted masks to guide the reverse process in a pre-trained large-scale diffusion model, implementing this integrated inpainting task in a single inference. Through extensive experimentation, we verify the interpretability and generalization capabilities of this unified model, demonstrating that SRStitcher outperforms state-of-the-art methods in both performance and stability.", "keywords": "Image Stitching;Image Fusion;Image Rectangling;Diffusion Model", "primary_area": "machine_vision", "supplementary_material": "/attachment/9d9f874dd010b1943931fef2091344a831b30d44.zip", "author": "Xieziqi;Weidong Zhao;XianhuiLiu;Jian Zhao;Ning Jia", "authorids": "~Xieziqi1;~Weidong_Zhao3;~XianhuiLiu2;~Jian_Zhao9;~Ning_Jia2", "gender": "F;M;M;M;", "homepage": "https://github.com/yayoyo66;;;https://github.com/zhaojianaaa;", "dblp": "237/7877;;;;", "google_scholar": ";;2vu957gAAAAJ;https://scholar.google.com.hk/citations?user=ZczdfpUAAAAJ;", "orcid": "0000-0002-6272-0164;0000-0003-4164-5190;;0000-0002-8163-1116;", "linkedin": ";;;;", "or_profile": "~Xieziqi1;~Weidong_Zhao3;~XianhuiLiu2;~Jian_Zhao9;~Ning_Jia2", "aff": "Tongji University;Tongji University;Tongji University;Tongji University;", "aff_domain": "tongji.edu.cn;tongji.edu.cn;tongji.edu.cn;tongji.edu.cn;", "position": "PhD student;Full Professor;Full Professor;PhD student;", "bibtex": "@inproceedings{\nxieziqi2024reconstructing,\ntitle={Reconstructing the Image Stitching Pipeline: Integrating Fusion and Rectangling into a Unified Inpainting Model},\nauthor={Xieziqi and Weidong Zhao and XianhuiLiu and Jian Zhao and Ning Jia},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ZViYPzh9Wq}\n}", "github": "", "reviewers": "yn5D;pLdt;m57w;GE1w", "pdf_size": 6826010, "rating": "5;6;6;6", "confidence": "5;4;4;4", "soundness": "3;3;3;2", "novelty": "2;2;3;2", "presentation": "3;3;3;2", "wc_summary": "58;86;91;76", "wc_strengths": "88;37;153;40", "wc_weaknesses": "234;77;118;129", "wc_questions": "12;37;71;126", "wc_limitations": "15;13;55;7", "wc_review": "407;250;488;378", "wc_reply_reviewers": "32;95;0;15", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 77.75, 12.616952880945542 ], "wc_strengths_avg": [ 79.5, 47.01329599166602 ], "wc_weaknesses_avg": [ 139.5, 57.89861829094024 ], "wc_questions_avg": [ 61.5, 42.72294465506796 ], "wc_limitations_avg": [ 22.5, 18.993419913222578 ], "wc_review_avg": [ 380.75, 85.57854579273943 ], "wc_reply_reviewers_avg": [ 35.5, 36.16973873281365 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1633028285760037002&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "tongji.edu.cn;tongji.edu.cn;tongji.edu.cn;tongji.edu.cn;", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Tongji University", "aff_unique_dep": "", "aff_unique_url": "https://www.tongji.edu.cn", "aff_unique_abbr": "Tongji", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "A Simple Remedy for Dataset Bias via Self-Influence: A Mislabeled Sample Perspective", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94634", "id": "ZVrrPNqHFw", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ZVrrPNqHFw", "openreview": "https://openreview.net/forum?id=ZVrrPNqHFw", "poster": "/media/PosterPDFs/NeurIPS%202024/94634.png?t=1731751412.7769346", "project": "", "author_site": "Yeonsung Jung, Jaeyun Song, June Yong Yang, Jin-Hwa Kim, Sung-Yub Kim, Eunho Yang", "tldr": "", "abstract": "Learning generalized models from biased data is an important undertaking toward fairness in deep learning. To address this issue, recent studies attempt to identify and leverage bias-conflicting samples free from spurious correlations without prior knowledge of bias or an unbiased set. However, spurious correlation remains an ongoing challenge, primarily due to the difficulty in correctly detecting these samples. In this paper, inspired by the similarities between mislabeled samples and bias-conflicting samples, we approach this challenge from a novel perspective of mislabeled sample detection. Specifically, we delve into Influence Function, one of the standard methods for mislabeled sample detection, for identifying bias-conflicting samples and propose a simple yet effective remedy for biased models by leveraging them. Through comprehensive analysis and experiments on diverse datasets, we demonstrate that our new perspective can boost the precision of detection and rectify biased models effectively. Furthermore, our approach is complementary to existing methods, showing performance improvement even when applied to models that have already undergone recent debiasing techniques.", "keywords": "Debiasing;Spurious correlation;Robust learning;Dataset bias", "primary_area": "fairness", "supplementary_material": "/attachment/71fae62f391e331ae723265ac5b5f4017853da38.zip", "author": "Yeonsung Jung;Jaeyun Song;June Yong Yang;Jin-Hwa Kim;Sung-Yub Kim;Eunho Yang", "authorids": "~Yeonsung_Jung1;~Jaeyun_Song2;~June_Yong_Yang1;~Jin-Hwa_Kim1;~Sung-Yub_Kim1;~Eunho_Yang1", "gender": ";M;;Unspecified;M;M", "homepage": "https://yeonsungjung.github.io/;;http://mli.kaist.ac.kr/people/;http://wityworks.com;https://sites.google.com/site/hleehome2/;https://sungyubkim.github.io", "dblp": "264/2809;289/2048;277/5624;48/258;96/2621;236/4532", "google_scholar": "https://scholar.google.com/citations?hl=ko;;nkLNWg0AAAAJ;https://scholar.google.co.kr/citations?user=3f2wPekAAAAJ;;m2rhgrkAAAAJ", "orcid": ";;;0000-0002-0423-0415;;", "linkedin": "yeonsung-jung-a50015213/;jaeyun-song-9a4111213/;;;;", "or_profile": "~Yeonsung_Jung1;~Jaeyun_Song2;~June_Yong_Yang1;~Jin-Hwa_Kim1;~Eunho_Yang1;~SungYub_Kim1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;NAVER;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;navercorp.com;kaist.ac.kr;kaist.ac.kr", "position": "PhD student;PhD student;PhD student;Research Scientist;Associate Professor;PhD student", "bibtex": "@inproceedings{\njung2024a,\ntitle={A Simple Remedy for Dataset Bias via Self-Influence: A Mislabeled Sample Perspective},\nauthor={Yeonsung Jung and Jaeyun Song and June Yong Yang and Jin-Hwa Kim and Sung-Yub Kim and Eunho Yang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ZVrrPNqHFw}\n}", "github": "", "reviewers": "yem8;BCqo;xCjP;hyta", "pdf_size": 14719975, "rating": "5;5;5;6", "confidence": "5;4;3;3", "soundness": "2;3;2;2", "novelty": "2;3;3;2", "presentation": "2;2;3;3", "wc_summary": "57;42;86;74", "wc_strengths": "95;21;26;22", "wc_weaknesses": "272;142;268;160", "wc_questions": "71;14;27;209", "wc_limitations": "24;1;7;9", "wc_review": "519;220;414;474", "wc_reply_reviewers": "174;79;51;147", "wc_reply_authors": "800;442;890;805", "reply_reviewers": "4;2;1;2", "reply_authors": "5;3;4;4", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 64.75, 16.69393602479655 ], "wc_strengths_avg": [ 41.0, 31.232995373482833 ], "wc_weaknesses_avg": [ 210.5, 59.85607738567572 ], "wc_questions_avg": [ 80.25, 77.27669441688096 ], "wc_limitations_avg": [ 10.25, 8.46684711093805 ], "wc_review_avg": [ 406.75, 114.07316730940717 ], "wc_reply_reviewers_avg": [ 112.75, 49.690919693642215 ], "wc_reply_authors_avg": [ 734.25, 172.47952777068934 ], "reply_reviewers_avg": [ 2.25, 1.0897247358851685 ], "reply_authors_avg": [ 4.0, 0.7071067811865476 ], "replies_avg": [ 31, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=236404949995836183&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;navercorp.com;kaist.ac.kr;kaist.ac.kr", "author_num": 6, "aff_unique_index": "0;0;0;1;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;NAVER Corporation", "aff_unique_dep": ";", "aff_unique_url": "https://www.kaist.ac.kr;https://www.naver.com", "aff_unique_abbr": "KAIST;NAVER", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "NeoRL: Efficient Exploration for Nonepisodic RL", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94633", "id": "ZWNdgc13aw", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ZWNdgc13aw", "openreview": "https://openreview.net/forum?id=ZWNdgc13aw", "poster": "/media/PosterPDFs/NeurIPS%202024/94633.png?t=1731462471.7208104", "project": "", "author_site": "Bhavya, Lenart Treven, Florian Dorfler, Stelian Coros, Andreas Krause", "tldr": "", "abstract": "We study the problem of nonepisodic reinforcement learning (RL) for nonlinear dynamical systems, where the system dynamics are unknown and the RL agent has to learn from a single trajectory, i.e., without resets. We propose **N**on**e**pisodic **O**ptistmic **RL** (NeoRL), an approach based on the principle of optimism in the face of uncertainty. NeoRL uses well-calibrated probabilistic models and plans optimistically w.r.t. the epistemic uncertainty about the unknown dynamics. Under continuity and bounded energy assumptions on the system, we\nprovide a first-of-its-kind regret bound of $\\mathcal{O}(\\beta_T \\sqrt{T \\Gamma_T})$ for general nonlinear systems with Gaussian process dynamics. We compare NeoRL to other baselines on several deep RL environments and empirically demonstrate that NeoRL achieves the optimal average cost while incurring the least regret.", "keywords": "reinforcement learning;single trajectory;nonepisodic setting;average cost MDP", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/ab4a21f65be2137f745758f6776d756ec563fd3c.zip", "author": "Bhavya Sukhija;Lenart Treven;Florian Dorfler;Stelian Coros;Andreas Krause", "authorids": "~Bhavya_Sukhija1;~Lenart_Treven1;~Florian_Dorfler1;~Stelian_Coros1;~Andreas_Krause1", "gender": "M;M;M;M;M", "homepage": ";;http://people.ee.ethz.ch/~floriand/;http://crl.ethz.ch/index.html;https://las.inf.ethz.ch/krausea", "dblp": "312/4742;267/9666;;;87/1831-1.html", "google_scholar": ";CDnzTWkAAAAJ;https://scholar.google.com/citations?view_op=list_works;sX31JjwAAAAJ;https://scholar.google.ch/citations?user=eDHv58AAAAAJ", "orcid": "0000-0001-6238-9734;;0000-0002-9649-5305;;0000-0001-7260-9673", "linkedin": ";lenart-treven/;;;krausea/", "or_profile": "~Bhavya_Sukhija1;~Lenart_Treven1;~Florian_Dorfler1;~Stelian_Coros1;~Andreas_Krause1", "aff": "ETHZ - ETH Zurich;Swiss Federal Institute of Technology;;ETHZ - ETH Zurich;ETH Zurich", "aff_domain": "ethz.ch;ethz.ch;;ethz.ch;ethz.ch", "position": "PhD student;PhD student;;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nsukhija2024neorl,\ntitle={Neo{RL}: Efficient Exploration for Nonepisodic {RL}},\nauthor={Bhavya Sukhija and Lenart Treven and Florian Dorfler and Stelian Coros and Andreas Krause},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ZWNdgc13aw}\n}", "github": "", "reviewers": "17bq;WMig;zoj1", "pdf_size": 867777, "rating": "6;7;8", "confidence": "2;2;3", "soundness": "3;4;4", "novelty": "3;3;4", "presentation": "3;3;4", "wc_summary": "70;26;41", "wc_strengths": "66;62;77", "wc_weaknesses": "281;104;40", "wc_questions": "48;48;23", "wc_limitations": "13;6;18", "wc_review": "478;246;199", "wc_reply_reviewers": "31;36;13", "wc_reply_authors": "50;94;14", "reply_reviewers": "1;1;1", "reply_authors": "2;3;2", "rating_avg": [ 7.0, 0.816496580927726 ], "confidence_avg": [ 2.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 45.666666666666664, 18.263503375736967 ], "wc_strengths_avg": [ 68.33333333333333, 6.342099196813483 ], "wc_weaknesses_avg": [ 141.66666666666666, 101.92916930670806 ], "wc_questions_avg": [ 39.666666666666664, 11.785113019775793 ], "wc_limitations_avg": [ 12.333333333333334, 4.921607686744467 ], "wc_review_avg": [ 307.6666666666667, 121.96265366450866 ], "wc_reply_reviewers_avg": [ 26.666666666666668, 9.877021593352701 ], "wc_reply_authors_avg": [ 52.666666666666664, 32.71425105702746 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16309989554883949263&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "ethz.ch;ethz.ch;;ethz.ch;ethz.ch", "author_num": 5, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "ETH Zurich;Swiss Federal Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.ethz.ch;https://www.ethz.ch", "aff_unique_abbr": "ETHZ;ETH Zurich", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Switzerland" }, { "title": "Latent Diffusion for Neural Spiking Data", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94632", "id": "ZX6CEo1Wtv", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ZX6CEo1Wtv", "openreview": "https://openreview.net/forum?id=ZX6CEo1Wtv", "poster": "", "project": "", "author_site": "Jaivardhan Kapoor, Auguste Schulz, Julius Vetter, Felix Pei, Richard Gao, Jakob H Macke", "tldr": "", "abstract": "Modern datasets in neuroscience enable unprecedented inquiries into the relationship between complex behaviors and the activity of many simultaneously recorded neurons. While latent variable models can successfully extract low-dimensional embeddings from such recordings, using them to generate realistic spiking data, especially in a behavior-dependent manner, still poses a challenge. Here, we present Latent Diffusion for Neural Spiking data (LDNS), a diffusion-based generative model with a low-dimensional latent space: LDNS employs an autoencoder with structured state-space (S4) layers to project discrete high-dimensional spiking data into continuous time-aligned latents. On these inferred latents, we train expressive (conditional) diffusion models, enabling us to sample neural activity with realistic single-neuron and population spiking statistics. We validate LDNS on synthetic data, accurately recovering latent structure, firing rates, and spiking statistics. Next, we demonstrate its flexibility by generating variable-length data that mimics human cortical activity during attempted speech. We show how to equip LDNS with an expressive observation model that accounts for single-neuron dynamics not mediated by the latent state, further increasing the realism of generated samples. Finally, conditional LDNS trained on motor cortical activity during diverse reaching behaviors can generate realistic spiking data given reach direction or unseen reach trajectories. In summary, LDNS simultaneously enables inference of low-dimensional latents and realistic conditional generation of neural spiking datasets, opening up further possibilities for simulating experimentally testable hypotheses.", "keywords": "neural population;diffusion models;latent variable models;electrophysiology;brain-computer interfaces", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "/attachment/8aa501ef6372a1053025e085c3572cd5bfed5364.zip", "author": "Jaivardhan Kapoor;Auguste Schulz;Julius Vetter;Felix C Pei;Richard Gao;Jakob H. Macke", "authorids": "~Jaivardhan_Kapoor1;~Auguste_Schulz1;~Julius_Vetter2;~Felix_C_Pei1;~Richard_Gao1;~Jakob_H._Macke1", "gender": "M;F;;M;;M", "homepage": ";;;;https://www.rdgao.com/;http://www.mackelab.org", "dblp": "203/8123;;368/7633.html;301/8911;211/3796;97/11106", "google_scholar": "_q2tc3YAAAAJ;cYZbJCkAAAAJ;hk1YdnwAAAAJ;bhHQOjgAAAAJ;a2o9IKYAAAAJ;FKOqtF8AAAAJ", "orcid": ";0000-0001-8616-3756;0009-0003-9839-9039;;;0000-0001-5154-8912", "linkedin": ";;;felix-pei-b41742196;;", "or_profile": "~Jaivardhan_Kapoor1;~Auguste_Schulz1;~Julius_Vetter2;~Felix_C_Pei1;~Richard_Gao1;~Jakob_H_Macke1", "aff": "Eberhard-Karls-Universit\u00e4t T\u00fcbingen;Eberhard-Karls-Universit\u00e4t T\u00fcbingen;Eberhard-Karls-Universit\u00e4t T\u00fcbingen;Eberhard-Karls-Universit\u00e4t T\u00fcbingen;Eberhard-Karls-Universit\u00e4t T\u00fcbingen;University of Tuebingen", "aff_domain": "uni-tuebingen.de;uni-tuebingen.de;uni-tuebingen.de;uni-tuebingen.de;uni-tuebingen.de;uni-tuebingen.de", "position": "PhD student;PhD student;PhD student;Intern;Postdoc;Full Professor", "bibtex": "@inproceedings{\nkapoor2024latent,\ntitle={Latent Diffusion for Neural Spiking Data},\nauthor={Jaivardhan Kapoor and Auguste Schulz and Julius Vetter and Felix C Pei and Richard Gao and Jakob H. Macke},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ZX6CEo1Wtv}\n}", "github": "", "reviewers": "pMhd;mYEj;y7Q4;AuW4", "pdf_size": 9180785, "rating": "6;7;7;7", "confidence": "3;4;4;4", "soundness": "2;3;2;4", "novelty": "3;2;3;3", "presentation": "3;4;3;2", "wc_summary": "82;364;78;72", "wc_strengths": "42;237;58;104", "wc_weaknesses": "10;366;92;483", "wc_questions": "112;134;180;25", "wc_limitations": "161;16;1;39", "wc_review": "407;1117;409;723", "wc_reply_reviewers": "0;28;144;156", "wc_reply_authors": "0;16;71;42", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 149.0, 124.18131904598211 ], "wc_strengths_avg": [ 110.25, 76.63672422539993 ], "wc_weaknesses_avg": [ 237.75, 193.4610748962178 ], "wc_questions_avg": [ 112.75, 56.29109609876148 ], "wc_limitations_avg": [ 54.25, 63.1006141016076 ], "wc_review_avg": [ 664.0, 291.44639301250584 ], "wc_reply_reviewers_avg": [ 82.0, 68.84765791223403 ], "wc_reply_authors_avg": [ 32.25, 26.929305598176867 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8279584910305078806&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "uni-tuebingen.de;uni-tuebingen.de;uni-tuebingen.de;uni-tuebingen.de;uni-tuebingen.de;uni-tuebingen.de", "author_num": 6, "aff_unique_index": "0;0;0;0;0;1", "aff_unique_norm": "Eberhard Karls University of T\u00fcbingen;University of Tuebingen", "aff_unique_dep": ";", "aff_unique_url": "https://www.uni-tuebingen.de/;https://www.uni-tuebingen.de/", "aff_unique_abbr": "Uni T\u00fcbingen;Uni T\u00fcbingen", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "T\u00fcbingen;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "Germany" }, { "title": "Towards Next-Generation Logic Synthesis: A Scalable Neural Circuit Generation Framework", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94631", "id": "ZYNYhh3ocW", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ZYNYhh3ocW", "openreview": "https://openreview.net/forum?id=ZYNYhh3ocW", "poster": "/media/PosterPDFs/NeurIPS%202024/94631.png?t=1733459379.5083697", "project": "", "author_site": "Zhihai Wang, Jie Wang, Qingyue Yang, Yinqi Bai, Xing Li, Lei Chen, Jianye Hao, Mingxuan Yuan, Bin Li, Yongdong Zhang, Feng Wu", "tldr": "", "abstract": "Logic Synthesis (LS) aims to generate an optimized logic circuit satisfying a given functionality, which generally consists of circuit translation and optimization. It is a challenging and fundamental combinatorial optimization problem in integrated circuit design. Traditional LS approaches rely on manually designed heuristics to tackle the LS task, while machine learning recently offers a promising approach towards next-generation logic synthesis by neural circuit generation and optimization. In this paper, we first revisit the application of differentiable neural architecture search (DNAS) methods to circuit generation and found from extensive experiments that existing DNAS methods struggle to exactly generate circuits, scale poorly to large circuits, and exhibit high sensitivity to hyper-parameters. Then we provide three major insights for these challenges from extensive empirical analysis: 1) DNAS tends to overfit to too many skip-connections, consequently wasting a significant portion of the network's expressive capabilities; 2) DNAS suffers from the structure bias between the network architecture and the circuit inherent structure, leading to inefficient search; 3) the learning difficulty of different input-output examples varies significantly, leading to severely imbalanced learning. To address these challenges in a systematic way, we propose a novel regularized triangle-shaped circuit network generation framework, which leverages our key insights for completely accurate and scalable circuit generation. Furthermore, we propose an evolutionary algorithm assisted by reinforcement learning agent restarting technique for efficient and effective neural circuit optimization. Extensive experiments on four different circuit benchmarks demonstrate that our method can precisely generate circuits with up to 1200 nodes. Moreover, our synthesized circuits significantly outperform the state-of-the-art results from several competitive winners in IWLS 2022 and 2023 competitions.", "keywords": "Electronic Design Automation;Logic Synthesis;Neural Architecture Search", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Zhihai Wang;Jie Wang;Qingyue Yang;Yinqi Bai;Xing Li;Lei Chen;Jianye HAO;Mingxuan Yuan;Bin Li;Yongdong Zhang;Feng Wu", "authorids": "~Zhihai_Wang1;~Jie_Wang1;~Qingyue_Yang1;~Yinqi_Bai1;~Xing_Li6;~Lei_Chen26;~Jianye_HAO1;~Mingxuan_Yuan1;~Bin_Li8;~Yongdong_Zhang2;~Feng_Wu1", "gender": "M;M;;M;M;;M;M;M;M;M", "homepage": "https://miralab.ai/people/zhihai-wang/;http://staff.ustc.edu.cn/~jwangx;http://www.baidu.com;https://www.ustc.edu.cn/;https://github.com/YouCanLX;https://scholar.google.com/citations?user=73rBLl4AAAAJ&hl=en;http://www.icdai.org/jianye.html;;http://staff.ustc.edu.cn/~binli;https://imcc.ustc.edu.cn/_upload/tpl/0d/13/3347/template3347/zhangyongdong.html;", "dblp": "35/4357;29/5259-5;;;;;21/7664.html;74/2356;89/6764-25;z/YongdongZhang;25/3972-1", "google_scholar": "EdLIBG8AAAAJ;OugG4dUAAAAJ;;;5N7JD9QAAAAJ;73rBLl4AAAAJ;;https://scholar.google.com/citations?hl=en;;https://scholar.google.com.hk/citations?user=hxGs4ukAAAAJ;5bInRDEAAAAJ", "orcid": ";;;;;;0000-0002-0422-8235;0000-0002-2236-8784;0000-0002-2332-3959;0000-0003-0066-3448;", "linkedin": ";;;;;;;;;;", "or_profile": "~Zhihai_Wang1;~Jie_Wang1;~Qingyue_Yang1;~Yinqi_Bai1;~Xing_Li6;~Lei_Chen26;~Jianye_HAO1;~Mingxuan_Yuan1;~Bin_Li8;~Yongdong_Zhang2;~Feng_Wu1", "aff": "University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;Huawei Technologies Ltd.;Huawei Technologies Ltd.;Tianjin University;Huawei Technologies Ltd.;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China", "aff_domain": "ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;umass.edu;huawei.com;huawei.com;tju.edu.cn;huawei.com;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn", "position": "PhD student;Full Professor;PhD student;Undergrad student;Researcher;Researcher;Associate Professor;Researcher;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nwang2024towards,\ntitle={Towards Next-Generation Logic Synthesis: A Scalable Neural Circuit Generation Framework},\nauthor={Zhihai Wang and Jie Wang and Qingyue Yang and Yinqi Bai and Xing Li and Lei Chen and Jianye HAO and Mingxuan Yuan and Bin Li and Yongdong Zhang and Feng Wu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ZYNYhh3ocW}\n}", "github": "", "reviewers": "Qczw;PMQo;qf83;gSL5", "pdf_size": 865347, "rating": "5;5;6;7", "confidence": "3;4;3;4", "soundness": "3;2;3;3", "novelty": "2;2;3;3", "presentation": "1;3;2;3", "wc_summary": "107;58;172;108", "wc_strengths": "35;37;222;73", "wc_weaknesses": "353;106;186;43", "wc_questions": "2;46;19;27", "wc_limitations": "3;10;40;5", "wc_review": "500;257;639;256", "wc_reply_reviewers": "43;20;112;22", "wc_reply_authors": "1097;23;3318;403", "reply_reviewers": "1;1;2;1", "reply_authors": "3;2;6;5", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 111.25, 40.48070528041724 ], "wc_strengths_avg": [ 91.75, 76.70519865041743 ], "wc_weaknesses_avg": [ 172.0, 116.14000172205957 ], "wc_questions_avg": [ 23.5, 15.819292019556375 ], "wc_limitations_avg": [ 14.5, 14.941552797483935 ], "wc_review_avg": [ 413.0, 164.03505722863025 ], "wc_reply_reviewers_avg": [ 49.25, 37.332124236373154 ], "wc_reply_authors_avg": [ 1210.25, 1276.3865744749903 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 4.0, 1.5811388300841898 ], "replies_avg": [ 31, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18076509495806374715&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "email": "ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;umass.edu;huawei.com;huawei.com;tju.edu.cn;huawei.com;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn", "author_num": 11, "aff_unique_index": "0;0;0;0;1;1;2;1;0;0;0", "aff_unique_norm": "University of Science and Technology of China;Huawei;Tianjin University", "aff_unique_dep": ";Huawei Technologies;", "aff_unique_url": "http://www.ustc.edu.cn;https://www.huawei.com;http://www.tju.edu.cn", "aff_unique_abbr": "USTC;Huawei;TJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Voila-A: Aligning Vision-Language Models with User's Gaze Attention", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94630", "id": "ZYrZ5V84ZI", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ZYrZ5V84ZI", "openreview": "https://openreview.net/forum?id=ZYrZ5V84ZI", "poster": "", "project": "", "author_site": "Kun Yan, Zeyu Wang, Lei Ji, Yuntao Wang, Nan Duan, Shuai Ma", "tldr": "", "abstract": "In recent years, the integration of vision and language understanding has led to significant advancements in artificial intelligence, particularly through Vision-Language Models (VLMs). However, existing VLMs face challenges in handling real-world applications with complex scenes and multiple objects, as well as aligning their focus with the diverse attention patterns of human users. In this paper, we introduce gaze information, feasibly collected by ubiquitous wearable devices such as MR glasses, as a proxy for human attention to guide VLMs. We propose a novel approach, Voila-A, for gaze alignment to enhance the effectiveness of these models in real-world applications. First, we collect hundreds of minutes of gaze data to demonstrate that we can mimic human gaze modalities using localized narratives. We then design an automatic data annotation pipeline utilizing GPT-4 to generate the VOILA-COCO dataset. Additionally, we introduce a new model VOILA-A that integrate gaze information into VLMs while maintain pretrained knowledge from webscale dataset. We evaluate Voila-A using a hold-out validation set and a newly collected VOILA-GAZE testset, which features real-life scenarios captured with a gaze-tracking device. Our experimental results demonstrate that Voila-A significantly outperforms several baseline models. By aligning model attention with human gaze patterns, Voila-A paves the way for more intuitive, user-centric VLMs and fosters engaging human-AI interaction across a wide range of applications.", "keywords": "Vision Language Model;Human Gaze;Multimodal;Controlled Generative Model;AR/VR", "primary_area": "human-AI_interaction", "supplementary_material": "/attachment/f5bddc66382e62f6f86c0fb355c522b091d192be.zip", "author": "Kun Yan;Zeyu Wang;Lei Ji;Yuntao Wang;Nan Duan;Shuai Ma", "authorids": "~Kun_Yan2;~Zeyu_Wang12;~Lei_Ji1;~Yuntao_Wang1;~Nan_Duan1;~Shuai_Ma1", "gender": "M;F;F;M;M;M", "homepage": ";;;https://pi.cs.tsinghua.edu.cn/lab/people/YuntaoWang/;https://nanduan.github.io/;https://mashuai-ms.github.io/", "dblp": ";;42/2721-1;52/4107-1.html;;35/6569", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;;;kHpwoAUAAAAJ;Qaa6OxIAAAAJ;", "orcid": "0000-0001-8290-5169;0009-0007-5048-1665;;0000-0002-4249-8893;;", "linkedin": ";;;;;", "or_profile": "~Kun_Yan2;~Zeyu_Wang12;~Lei_Ji1;~Yuntao_Wang1;~Nan_Duan1;~Shuai_Ma1", "aff": "Microsoft Research;Tsinghua University;Microsoft Research;Tsinghua University;Microsoft Research Asia;Beihang University", "aff_domain": "microsoft.com;mail.tsinghua.edu.cn;research.microsoft.com;tsinghua.edu.cn;microsoft.com;buaa.edu.cn", "position": "PhD student;PhD student;Researcher;Associate Professor;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nyan2024voilaa,\ntitle={Voila-A: Aligning Vision-Language Models with User's Gaze Attention},\nauthor={Kun Yan and Zeyu Wang and Lei Ji and Yuntao Wang and Nan Duan and Shuai Ma},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ZYrZ5V84ZI}\n}", "github": "", "reviewers": "qe53;CTT5;3dvo;Hary", "pdf_size": 24034075, "rating": "3;7;7;7", "confidence": "4;3;3;4", "soundness": "2;3;3;2", "novelty": "1;3;3;3", "presentation": "1;3;3;4", "wc_summary": "54;67;94;147", "wc_strengths": "31;55;82;97", "wc_weaknesses": "228;36;147;412", "wc_questions": "96;13;68;231", "wc_limitations": "28;1;12;20", "wc_review": "437;172;403;907", "wc_reply_reviewers": "99;0;134;168", "wc_reply_authors": "308;0;148;316", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;2;2", "rating_avg": [ 6.0, 1.7320508075688772 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 90.5, 35.668613654023616 ], "wc_strengths_avg": [ 66.25, 25.31180554602931 ], "wc_weaknesses_avg": [ 205.75, 137.2049106264058 ], "wc_questions_avg": [ 102.0, 80.24026420694288 ], "wc_limitations_avg": [ 15.25, 9.98436277385793 ], "wc_review_avg": [ 479.75, 266.9132583818196 ], "wc_reply_reviewers_avg": [ 100.25, 62.810727586933744 ], "wc_reply_authors_avg": [ 193.0, 130.02692028960772 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11216432953124509692&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "microsoft.com;mail.tsinghua.edu.cn;research.microsoft.com;tsinghua.edu.cn;microsoft.com;buaa.edu.cn", "author_num": 6, "aff_unique_index": "0;1;0;1;0;2", "aff_unique_norm": "Microsoft;Tsinghua University;Beihang University", "aff_unique_dep": "Microsoft Research;;", "aff_unique_url": "https://www.microsoft.com/en-us/research;https://www.tsinghua.edu.cn;http://www.buaa.edu.cn/", "aff_unique_abbr": "MSR;THU;BUAA", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;1;0;1;1;1", "aff_country_unique": "United States;China" }, { "title": "APDDv2: Aesthetics of Paintings and Drawings Dataset with Artist Labeled Scores and Comments", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97629", "id": "ZZ17sBJh3w", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ZZ17sBJh3w", "openreview": "https://openreview.net/forum?id=ZZ17sBJh3w", "poster": "/media/PosterPDFs/NeurIPS%202024/97629.png?t=1731732158.7762656", "project": "", "author_site": "Xin Jin, Qianqian Qiao, Yi Lu, HuayeWang, Heng Huang, Shan Gao, Jianfei Liu, Rui Li", "tldr": "", "abstract": "Datasets play a pivotal role in training visual models, facilitating the development of abstract understandings of visual features through diverse image samples and multidimensional attributes. However, in the realm of aesthetic evaluation of artistic images, datasets remain relatively scarce. Existing painting datasets are often characterized by limited scoring dimensions and insufficient annotations, thereby constraining the advancement and application of automatic aesthetic evaluation methods in the domain of painting.\nTo bridge this gap, we introduce the Aesthetics Paintings and Drawings Dataset (APDD), the first comprehensive collection of paintings encompassing 24 distinct artistic categories and 10 aesthetic attributes. Building upon the initial release of APDDv1, our ongoing research has identified opportunities for enhancement in data scale and annotation precision. Consequently, APDDv2 boasts an expanded image corpus and improved annotation quality, featuring detailed language comments to better cater to the needs of both researchers and practitioners seeking high-quality painting datasets.\nFurthermore, we present an updated version of the Art Assessment Network for Specific Painting Styles, denoted as ArtCLIP. Experimental validation demonstrates the superior performance of this revised model in the realm of aesthetic evaluation, surpassing its predecessor in accuracy and efficacy.\nThe dataset and model are available at https://github.com/BestiVictory/APDDv2.git.", "keywords": "painting dateset;aesthetic comments;attribute scores;visual model;aesthetic evaluation", "primary_area": "", "supplementary_material": "/attachment/0b2d52cc38e5324e8a01325fb1bde35345768bcd.zip", "author": "Xin Jin;Qianqian Qiao;Yi Lu;HuayeWang;Heng Huang;Shan Gao;Jianfei Liu;Rui Li", "authorids": "~Xin_Jin1;~Qianqian_Qiao1;~Yi_Lu13;~HuayeWang1;~Heng_Huang3;~Shan_Gao4;~Jianfei_Liu3;~Rui_Li44", "gender": "M;;;Not Specified;M;;;", "homepage": "http://jinxin.me;https://github.com/QQQfive;https://github.com/Yilu6667;https://github.com/Hua233333;;https://github.com/Shangao66;https://github.com/Jianfei66;https://github.com/RuiLi66", "dblp": "68/3340-15;;;;;;;", "google_scholar": "UGPgvcUAAAA;;;;;;;", "orcid": "0000-0003-3873-1653;;;0009-0002-0348-1893;0000-0003-3036-2961;;;", "linkedin": ";;;;;;;", "or_profile": "~Xin_Jin1;~Qianqian_Qiao1;~Yi_Lu13;~HuayeWang1;~Heng_Huang3;~Shan_Gao4;~Jianfei_Liu3;~Rui_Li44", "aff": "Beijing Electronic Science and Technology Institute;Beijing Electronic Science and Technology\u00a0Institute;Central Academy of Fine Art;Beijing Electronic Science and Technology\u00a0Institute;University of Science and Technology of China;Central Academy of Fine Art;Central Academy of Fine Art;Central Academy of Fine Art", "aff_domain": "besti.edu.cn;besti.edu.cn;cafa.edu.cn;besti.edu.cn;ustc.edu.cn;cafa.edu.cn;cafa.edu.cn;cafa.edu.cn", "position": "Associate Professor;MS student;Emeritus;MS student;PhD student;Intern;Intern;Intern", "bibtex": "@inproceedings{\njin2024apddv,\ntitle={{APDD}v2: Aesthetics of Paintings and Drawings Dataset with Artist Labeled Scores and Comments},\nauthor={Xin Jin and Qianqian Qiao and Yi Lu and HuayeWang and Heng Huang and Shan Gao and Jianfei Liu and Rui Li},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=ZZ17sBJh3w}\n}", "github": "", "reviewers": "pGpG;WijH;bTSD;UxjQ", "pdf_size": 35885084, "rating": "6;7;7;7", "confidence": "3;4;4;3", "wc_summary_and_contributions": "44;76;38;75", "wc_strengths": "10;47;2;28", "wc_improvement": "18;77;2;25", "wc_limitations": "16;5;2;13", "wc_correctness": "16;1;1;19", "wc_clarity": "5;5;7;8", "wc_relation_to_prior_work": "27;1;3;20", "wc_documentation": "8;8;3;19", "wc_additional_feedback": "1;1;1;1", "wc_review": "145;221;59;208", "wc_reply_reviewers": "0;0;30;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "wc_summary_and_contributions_avg": [ 58.25, 17.383541066192468 ], "wc_strengths_avg": [ 21.75, 17.354754391808605 ], "wc_improvement_avg": [ 30.5, 28.111385593741193 ], "wc_limitations_avg": [ 9.0, 5.70087712549569 ], "wc_correctness_avg": [ 9.25, 8.317902379807062 ], "wc_clarity_avg": [ 6.25, 1.299038105676658 ], "wc_relation_to_prior_work_avg": [ 12.75, 11.053845484717073 ], "wc_documentation_avg": [ 9.5, 5.852349955359813 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 158.25, 64.10684440837811 ], "wc_reply_reviewers_avg": [ 7.5, 12.99038105676658 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:cpRisTZnIOAJ:scholar.google.com/&scioq=APDDv2:+Aesthetics+of+Paintings+and+Drawings+Dataset+with+Artist+Labeled+Scores+and+Comments&hl=en&as_sdt=0,14", "gs_version_total": 3, "email": "besti.edu.cn;besti.edu.cn;cafa.edu.cn;besti.edu.cn;ustc.edu.cn;cafa.edu.cn;cafa.edu.cn;cafa.edu.cn", "author_num": 8, "aff_unique_index": "0;0;1;0;2;1;1;1", "aff_unique_norm": "Beijing Electronic Science and Technology Institute;Central Academy of Fine Art;University of Science and Technology of China", "aff_unique_dep": ";;", "aff_unique_url": ";http://www.cafa.edu.cn;http://www.ustc.edu.cn", "aff_unique_abbr": ";CAFA;USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Recurrent neural network dynamical systems for biological vision", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94629", "id": "ZZ94aLbMOK", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ZZ94aLbMOK", "openreview": "https://openreview.net/forum?id=ZZ94aLbMOK", "poster": "", "project": "", "author_site": "Wayne Soo, Aldo Battista, Puria Radmard, Xiao-Jing Wang", "tldr": "", "abstract": "In neuroscience, recurrent neural networks (RNNs) are modeled as continuous-time dynamical systems to more accurately reflect the dynamics inherent in biological circuits. However, convolutional neural networks (CNNs) remain the preferred architecture in vision neuroscience due to their ability to efficiently process visual information, which comes at the cost of the biological realism provided by RNNs. To address this, we introduce a hybrid architecture that integrates the continuous-time recurrent dynamics of RNNs with the spatial processing capabilities of CNNs. Our models preserve the dynamical characteristics typical of RNNs while having comparable performance with their conventional CNN counterparts on benchmarks like ImageNet. Compared to conventional CNNs, our models demonstrate increased robustness to noise due to noise-suppressing mechanisms inherent in recurrent dynamical systems. Analyzing our architecture as a dynamical system is computationally expensive, so we develop a toolkit consisting of iterative methods specifically tailored for convolutional structures. We also train multi-area RNNs using our architecture as the front-end to perform complex cognitive tasks previously impossible to learn or achievable only with oversimplified stimulus representations. In monkey neural recordings, our models capture time-dependent variations in neural activity in higher-order visual areas. Together, these contributions represent a comprehensive foundation to unify the advances of CNNs and dynamical RNNs in vision neuroscience.", "keywords": "convolutional neural networks;recurrent neural networks;neuroscience;vision;cognition;perception", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "/attachment/a1e23cb29ca8b105effc7fb7759206eabf04af8a.zip", "author": "Wayne WM Soo;Aldo Battista;Puria Radmard;Xiao-Jing Wang", "authorids": "~Wayne_WM_Soo1;~Aldo_Battista1;~Puria_Radmard1;~Xiao-Jing_Wang1", "gender": "M;M;M;M", "homepage": ";;;http://www.cns.nyu.edu/wanglab/", "dblp": ";;;", "google_scholar": ";xN540osAAAAJ;O3Lza0oAAAAJ;cv-YgL0AAAAJ", "orcid": "0000-0002-0621-1955;0000-0003-1018-0247;;", "linkedin": "wayne-soo-8bb097147/;aldo-battista/;;", "or_profile": "~Wayne_WM_Soo1;~Aldo_Battista1;~Puria_Radmard1;~Xiao-Jing_Wang1", "aff": "University of Cambridge;New York University;University of Cambridge;New York University", "aff_domain": "cam.ac.uk;nyu.edu;cam.ac.uk;nyu.edu", "position": "PhD student;Postdoc;PhD student;Full Professor", "bibtex": "@inproceedings{\nsoo2024recurrent,\ntitle={Recurrent neural network dynamical systems for biological vision},\nauthor={Wayne WM Soo and Aldo Battista and Puria Radmard and Xiao-Jing Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ZZ94aLbMOK}\n}", "github": "", "reviewers": "7DkU;UEUJ;8dkQ;A7p2;7SU8", "pdf_size": 1653577, "rating": "5;6;6;8;9", "confidence": "2;2;3;3;4", "soundness": "2;3;3;4;4", "novelty": "2;2;3;4;4", "presentation": "2;3;2;3;4", "wc_summary": "152;64;24;65;84", "wc_strengths": "50;69;23;94;118", "wc_weaknesses": "253;118;118;21;137", "wc_questions": "37;17;12;72;132", "wc_limitations": "2;212;5;5;8", "wc_review": "494;480;182;257;479", "wc_reply_reviewers": "56;121;80;13;0", "wc_reply_authors": "23;300;291;20;35", "reply_reviewers": "1;1;1;1;0", "reply_authors": "2;2;2;2;2", "rating_avg": [ 6.8, 1.469693845669907 ], "confidence_avg": [ 2.8, 0.7483314773547882 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 3.0, 0.8944271909999159 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 77.8, 41.93518808828691 ], "wc_strengths_avg": [ 70.8, 33.126424497672545 ], "wc_weaknesses_avg": [ 129.4, 73.95295802062282 ], "wc_questions_avg": [ 54.0, 44.339598554790726 ], "wc_limitations_avg": [ 46.4, 82.82173627738071 ], "wc_review_avg": [ 378.4, 131.9978787708348 ], "wc_reply_reviewers_avg": [ 54.0, 44.19502234415093 ], "wc_reply_authors_avg": [ 133.8, 132.15354705795832 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8728715609439694, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9971528450225683668&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "email": "cam.ac.uk;nyu.edu;cam.ac.uk;nyu.edu", "author_num": 4, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "University of Cambridge;New York University", "aff_unique_dep": ";", "aff_unique_url": "https://www.cam.ac.uk;https://www.nyu.edu", "aff_unique_abbr": "Cambridge;NYU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "DiGRAF: Diffeomorphic Graph-Adaptive Activation Function", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94628", "id": "ZZoW4Z3le4", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ZZoW4Z3le4", "openreview": "https://openreview.net/forum?id=ZZoW4Z3le4", "poster": "", "project": "", "author_site": "Krishna Sri Ipsit Mantri, Xinzhi Wang, Carola-Bibiane Sch\u00f6nlieb, Bruno Ribeiro, Beatrice Bevilacqua, Moshe Eliasof", "tldr": "", "abstract": "In this paper, we propose a novel activation function tailored specifically for graph data in Graph Neural Networks (GNNs). Motivated by the need for graph-adaptive and flexible activation functions, we introduce DiGRAF, leveraging Continuous Piecewise-Affine Based (CPAB) transformations, which we augment with an additional GNN to learn a graph-adaptive diffeomorphic activation function in an end-to-end manner. In addition to its graph-adaptivity and flexibility, DiGRAF also possesses properties that are widely recognized as desirable for activation functions, such as differentiability, boundness within the domain, and computational efficiency. \nWe conduct an extensive set of experiments across diverse datasets and tasks, demonstrating a consistent and superior performance of DiGRAF compared to traditional and graph-specific activation functions, highlighting its effectiveness as an activation function for GNNs. Our code is available at https://github.com/ipsitmantri/DiGRAF.", "keywords": "Graph Neural Networks;Graph Activation Functions", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Krishna Sri Ipsit Mantri;Xinzhi Wang;Carola-Bibiane Sch\u00f6nlieb;Bruno Ribeiro;Beatrice Bevilacqua;Moshe Eliasof", "authorids": "~Krishna_Sri_Ipsit_Mantri1;~Xinzhi_Wang2;~Carola-Bibiane_Sch\u00f6nlieb1;~Bruno_Ribeiro1;~Beatrice_Bevilacqua1;~Moshe_Eliasof1", "gender": ";;F;M;F;M", "homepage": ";;http://www.damtp.cam.ac.uk/research/cia/;https://www.cs.purdue.edu/homes/ribeirob/;http://beabevi.github.io/;", "dblp": ";;07/8184;15/606;275/2364;239/6004", "google_scholar": ";;nPeOXjwAAAAJ;KIEleCsAAAAJ;;44LKqBsAAAAJ", "orcid": ";;;0000-0002-3527-6192;;", "linkedin": ";;;;;", "or_profile": "~Krishna_Sri_Ipsit_Mantri1;~Xinzhi_Wang2;~Carola-Bibiane_Sch\u00f6nlieb1;~Bruno_Ribeiro1;~Beatrice_Bevilacqua1;~Moshe_Eliasof1", "aff": ";;University of Cambridge;Stanford University;Purdue University;University of Cambridge", "aff_domain": ";;cam.ac.uk;stanford.edu;purdue.edu;cam.ac.uk", "position": ";;Full Professor;Visiting Associate Professor;PhD student;Postdoc", "bibtex": "@inproceedings{\nmantri2024digraf,\ntitle={Di{GRAF}: Diffeomorphic Graph-Adaptive Activation Function},\nauthor={Krishna Sri Ipsit Mantri and Xinzhi Wang and Carola-Bibiane Sch{\\\"o}nlieb and Bruno Ribeiro and Beatrice Bevilacqua and Moshe Eliasof},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ZZoW4Z3le4}\n}", "github": "", "reviewers": "AwEg;LFcv;mZzM;SUte", "pdf_size": 1658489, "rating": "5;5;5;6", "confidence": "3;4;3;4", "soundness": "3;3;3;3", "novelty": "3;2;2;3", "presentation": "2;3;3;3", "wc_summary": "59;77;62;70", "wc_strengths": "39;58;64;122", "wc_weaknesses": "25;25;147;112", "wc_questions": "88;29;108;125", "wc_limitations": "10;58;66;6", "wc_review": "221;247;447;435", "wc_reply_reviewers": "0;0;8;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 67.0, 7.035623639735144 ], "wc_strengths_avg": [ 70.75, 30.994959267597046 ], "wc_weaknesses_avg": [ 77.25, 53.69532102520666 ], "wc_questions_avg": [ 87.5, 36.22499137335991 ], "wc_limitations_avg": [ 35.0, 27.184554438136374 ], "wc_review_avg": [ 337.5, 103.99399021097325 ], "wc_reply_reviewers_avg": [ 2.0, 3.4641016151377544 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2353558124308909256&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": ";;cam.ac.uk;stanford.edu;purdue.edu;cam.ac.uk", "author_num": 6, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of Cambridge;Stanford University;Purdue University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cam.ac.uk;https://www.stanford.edu;https://www.purdue.edu", "aff_unique_abbr": "Cambridge;Stanford;Purdue", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Cambridge;Stanford;", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "United Kingdom;United States" }, { "title": "ClavaDDPM: Multi-relational Data Synthesis with Cluster-guided Diffusion Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94627", "id": "Zb2ixT19VF", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Zb2ixT19VF", "openreview": "https://openreview.net/forum?id=Zb2ixT19VF", "poster": "", "project": "", "author_site": "Wei Pang, Masoumeh Shafieinejad, Lucy Liu, Stephanie Hazlewood, Xi He", "tldr": "", "abstract": "Recent research in tabular data synthesis has focused on single tables, whereas real-world applications often involve complex data with tens or hundreds of interconnected tables. Previous approaches to synthesizing multi-relational (multi-table) data fall short in two key aspects: scalability for larger datasets and capturing long-range dependencies, such as correlations between attributes spread across different tables. Inspired by the success of diffusion models in tabular data modeling, we introduce \n \\textbf{C}luster \\textbf{La}tent \\textbf{Va}riable guided \\textbf{D}enoising \\textbf{D}iffusion \\textbf{P}robabilistic \\textbf{M}odels (ClavaDDPM). This novel approach leverages clustering labels as intermediaries to model relationships between tables, specifically focusing on foreign key constraints. ClavaDDPM leverages the robust generation capabilities of diffusion models while incorporating efficient algorithms to propagate the learned latent variables across tables. This enables ClavaDDPM to capture long-range dependencies effectively. \n Extensive evaluations on multi-table datasets of varying sizes show that ClavaDDPM significantly outperforms existing methods for these long-range dependencies while remaining competitive on utility metrics for single-table data.", "keywords": "diffusion models;synthesis;tabular data;generative models", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/9c1b8844fd0ff9dc95806a049fb9f22b79c8794b.zip", "author": "Wei Pang;Masoumeh Shafieinejad;Lucy Liu;Stephanie Hazlewood;Xi He", "authorids": "~Wei_Pang5;~Masoumeh_Shafieinejad1;~Lucy_Liu2;~Stephanie_Hazlewood1;~Xi_He2", "gender": "M;F;F;F;F", "homepage": ";https://masi-sh.github.io/;;;https://cs.uwaterloo.ca/~xihe/", "dblp": ";;;;28/949-1", "google_scholar": ";;;;", "orcid": ";;;;", "linkedin": "wei-pang-980183128/;masoumeh-shafieinejad-676b3242/;lucy-liu-44644541/;stephaniehazlewood/;", "or_profile": "~Wei_Pang5;~Masoumeh_Shafieinejad1;~Lucy_Liu2;~Stephanie_Hazlewood1;~Xi_He2", "aff": "University of Waterloo;vector ;RBC;Data & AI / Borealis AI;University of Waterloo", "aff_domain": "uwaterloo.ca;vectorinstitute.ai;rbc.com;rbc.com;uwaterloo.ca", "position": "MS student;Researcher;Researcher;Principal Researcher;Assistant Professor", "bibtex": "@inproceedings{\npang2024clavaddpm,\ntitle={Clava{DDPM}: Multi-relational Data Synthesis with Cluster-guided Diffusion Models},\nauthor={Wei Pang and Masoumeh Shafieinejad and Lucy Liu and Stephanie Hazlewood and Xi He},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Zb2ixT19VF}\n}", "github": "", "reviewers": "5kZH;bZ6q;ok29;sDU8", "pdf_size": 6484145, "rating": "3;4;5;7", "confidence": "3;3;3;3", "soundness": "2;2;3;3", "novelty": "2;2;3;3", "presentation": "2;3;2;4", "wc_summary": "70;67;92;132", "wc_strengths": "56;130;62;205", "wc_weaknesses": "108;308;117;86", "wc_questions": "459;4;120;88", "wc_limitations": "1;1;6;7", "wc_review": "694;510;397;518", "wc_reply_reviewers": "0;246;0;22", "wc_reply_authors": "33;418;1179;36", "reply_reviewers": "0;1;0;1", "reply_authors": "2;2;3;2", "rating_avg": [ 4.75, 1.479019945774904 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 90.25, 25.96512083545925 ], "wc_strengths_avg": [ 113.25, 60.420919390555454 ], "wc_weaknesses_avg": [ 154.75, 89.19466071464143 ], "wc_questions_avg": [ 167.75, 173.4075762474062 ], "wc_limitations_avg": [ 3.75, 2.7726341266023544 ], "wc_review_avg": [ 529.75, 106.21764213161578 ], "wc_reply_reviewers_avg": [ 67.0, 103.73523991392703 ], "wc_reply_authors_avg": [ 416.5, 467.2421748943475 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8609019596197303916&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "uwaterloo.ca;vectorinstitute.ai;rbc.com;rbc.com;uwaterloo.ca", "author_num": 5, "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "University of Waterloo;Vector Institute;Royal Bank of Canada;Borealis AI", "aff_unique_dep": ";;;Data & AI", "aff_unique_url": "https://uwaterloo.ca;https://vectorinstitute.ai/;https://www.rbc.com;https://www.borealisai.com", "aff_unique_abbr": "UW;Vector;RBC;Borealis AI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Canada" }, { "title": "Normalization and effective learning rates in reinforcement learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94626", "id": "ZbjJE6Nq5k", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ZbjJE6Nq5k", "openreview": "https://openreview.net/forum?id=ZbjJE6Nq5k", "poster": "", "project": "", "author_site": "Clare Lyle, Zeyu Zheng, Khimya Khetarpal, James Martens, Hado van Hasselt, Razvan Pascanu, Will Dabney", "tldr": "", "abstract": "Normalization layers have recently experienced a renaissance in the deep reinforcement learning and continual learning literature, with several works highlighting diverse benefits such as improving loss landscape conditioning and combatting overestimation bias. However, normalization brings with it a subtle but important side effect: an equivalence between growth in the norm of the network parameters and decay in the effective learning rate. This becomes problematic in continual learning settings, where the resulting learning rate schedule may decay to near zero too quickly relative to the timescale of the learning problem. We propose to make the learning rate schedule explicit with a simple re-parameterization which we call Normalize-and-Project (NaP), which couples the insertion of normalization layers with weight projection, ensuring that the effective learning rate remains constant throughout training. This technique reveals itself as a powerful analytical tool to better understand learning rate schedules in deep reinforcement learning, and as a means of improving robustness to nonstationarity in synthetic plasticity loss benchmarks along with both the single-task and sequential variants of the Arcade Learning Environment. We also show that our approach can be easily applied to popular architectures such as ResNets and transformers while recovering and in some cases even slightly improving the performance of the base model in common stationary benchmarks.", "keywords": "continual learning;reinforcement learning;optimization;plasticity", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Clare Lyle;Zeyu Zheng;Khimya Khetarpal;James Martens;Hado van Hasselt;Razvan Pascanu;Will Dabney", "authorids": "~Clare_Lyle1;~Zeyu_Zheng1;~Khimya_Khetarpal1;~James_Martens1;~Hado_van_Hasselt1;~Razvan_Pascanu1;~Will_Dabney1", "gender": ";M;F;M;M;M;M", "homepage": ";http://www-personal.umich.edu/~zeyu/;https://kkhetarpal.github.io/;http://www.cs.toronto.edu/~jmartens/;http://hadovanhasselt.com;https://razp.info;", "dblp": "192/1910;48/7883;186/3048;12/8412;https://dblp.uni-trier.de/pers/h/Hasselt:Hado_van.html;65/8368.html;https://dblp.uni-trier.de/pers/hd/d/Dabney:Will", "google_scholar": ";;https://scholar.google.ca/citations?user=VLOUhF0AAAAJ;;;https://scholar.google.ca/citations?user=eSPY8LwAAAAJ;https://scholar.google.co.uk/citations?user=dR-7QW8AAAAJ", "orcid": ";;;;;;", "linkedin": ";;;;;;", "or_profile": "~Clare_Lyle1;~Zeyu_Zheng1;~Khimya_Khetarpal1;~James_Martens1;~Hado_van_Hasselt1;~Razvan_Pascanu1;~Will_Dabney1", "aff": "Google DeepMind;Google DeepMind;Google;Google DeepMind;Google DeepMind;Google DeepMind;Google DeepMind", "aff_domain": "google.com;deepmind.com;google.com;google.com;google.com;google.com;google.com", "position": "Researcher;Research Scientist;Researcher;Research Scientist;Research scientist;Research Scientist;Research Scientist", "bibtex": "@inproceedings{\nlyle2024normalization,\ntitle={Normalization and effective learning rates in reinforcement learning},\nauthor={Clare Lyle and Zeyu Zheng and Khimya Khetarpal and James Martens and Hado van Hasselt and Razvan Pascanu and Will Dabney},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ZbjJE6Nq5k}\n}", "github": "", "reviewers": "AEQp;vdAM;KJMZ;6ZG9", "pdf_size": 6864827, "rating": "6;6;7;7", "confidence": "3;3;3;4", "soundness": "3;3;3;3", "novelty": "3;3;4;3", "presentation": "3;2;3;3", "wc_summary": "79;199;303;163", "wc_strengths": "65;151;242;57", "wc_weaknesses": "74;274;125;68", "wc_questions": "854;154;2;515", "wc_limitations": "4;24;1;2", "wc_review": "1076;802;673;805", "wc_reply_reviewers": "272;0;0;265", "wc_reply_authors": "147;0;0;996", "reply_reviewers": "1;0;0;1", "reply_authors": "2;1;1;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 186.0, 80.36790404135223 ], "wc_strengths_avg": [ 128.75, 75.05456348550699 ], "wc_weaknesses_avg": [ 135.25, 83.11249906000901 ], "wc_questions_avg": [ 381.25, 330.4749423178707 ], "wc_limitations_avg": [ 7.75, 9.443913383762052 ], "wc_review_avg": [ 839.0, 146.84175155588414 ], "wc_reply_reviewers_avg": [ 134.25, 134.2728099802786 ], "wc_reply_authors_avg": [ 285.75, 414.4311613525218 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9659231822896224602&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "google.com;deepmind.com;google.com;google.com;google.com;google.com;google.com", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google DeepMind", "aff_unique_url": "https://deepmind.com", "aff_unique_abbr": "DeepMind", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;1;0;0;0;0", "aff_country_unique": "United Kingdom;United States" }, { "title": "Stochastic Amortization: A Unified Approach to Accelerate Feature and Data Attribution", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94625", "id": "ZdWTN2HOie", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ZdWTN2HOie", "openreview": "https://openreview.net/forum?id=ZdWTN2HOie", "poster": "", "project": "", "author_site": "Ian Covert, Chanwoo Kim, Su-In Lee, James Zou, Tatsunori Hashimoto", "tldr": "", "abstract": "Many tasks in explainable machine learning, such as data valuation and feature attribution, perform expensive computation for each data point and are intractable for large datasets. These methods require efficient approximations, and although amortizing the process by learning a network to directly predict the desired output is a promising solution, training such models with exact labels is often infeasible. We therefore explore training amortized models with noisy labels, and we find that this is inexpensive and surprisingly effective. Through theoretical analysis of the label noise and experiments with various models and datasets, we show that this approach tolerates high noise levels and significantly accelerates several feature attribution and data valuation methods, often yielding an order of magnitude speedup over existing approaches.", "keywords": "Amortization;feature attribution;data valuation;stochastic optimization", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Ian Connick Covert;Chanwoo Kim;Su-In Lee;James Zou;Tatsunori Hashimoto", "authorids": "~Ian_Connick_Covert1;~Chanwoo_Kim3;~Su-In_Lee2;~James_Zou1;~Tatsunori_Hashimoto1", "gender": "M;;F;;M", "homepage": "https://iancovert.com;https://chanwoo.kim;http://suinlee.cs.washington.edu/;;https://thashim.github.io", "dblp": "262/3443;62/79-2;17/1784;;", "google_scholar": "Np8Ek3cAAAAJ;;;23ZXZvEAAAAJ;5ygiTwsAAAAJ", "orcid": ";;;;", "linkedin": "ian-covert/;;;;", "or_profile": "~Ian_Connick_Covert1;~Chanwoo_Kim3;~Su-In_Lee2;~James_Zou1;~Tatsunori_Hashimoto1", "aff": "Stanford University;Department of Computer Science, University of Washington;University of Washington;Stanford University;Stanford University", "aff_domain": "stanford.edu;cs.washington.edu;uw.edu;stanford.edu;stanford.edu", "position": "Postdoc;PhD student;Assistant Professor;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\ncovert2024stochastic,\ntitle={Stochastic Amortization: A Unified Approach to Accelerate Feature and Data Attribution},\nauthor={Ian Connick Covert and Chanwoo Kim and Su-In Lee and James Zou and Tatsunori Hashimoto},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ZdWTN2HOie}\n}", "github": "", "reviewers": "cihK;hGpm;GyYk;x1Ww", "pdf_size": 1690501, "rating": "5;6;6;7", "confidence": "3;4;5;3", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "2;3;3;4", "wc_summary": "67;125;69;81", "wc_strengths": "41;44;99;100", "wc_weaknesses": "123;113;175;65", "wc_questions": "37;89;2;132", "wc_limitations": "31;13;1;10", "wc_review": "299;384;346;388", "wc_reply_reviewers": "32;133;128;54", "wc_reply_authors": "0;400;46;30", "reply_reviewers": "1;2;1;1", "reply_authors": "1;3;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 85.5, 23.425413550244958 ], "wc_strengths_avg": [ 71.0, 28.521921393903323 ], "wc_weaknesses_avg": [ 119.0, 39.06404996924922 ], "wc_questions_avg": [ 65.0, 49.542910693660296 ], "wc_limitations_avg": [ 13.75, 10.894379284750462 ], "wc_review_avg": [ 354.25, 35.86345633092271 ], "wc_reply_reviewers_avg": [ 86.75, 44.47119854467608 ], "wc_reply_authors_avg": [ 119.0, 163.07360301409912 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7585156923681811395&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "stanford.edu;cs.washington.edu;uw.edu;stanford.edu;stanford.edu", "author_num": 5, "aff_unique_index": "0;1;1;0;0", "aff_unique_norm": "Stanford University;University of Washington", "aff_unique_dep": ";Department of Computer Science", "aff_unique_url": "https://www.stanford.edu;https://www.washington.edu", "aff_unique_abbr": "Stanford;UW", "aff_campus_unique_index": "0;1;0;0", "aff_campus_unique": "Stanford;Seattle;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Wasserstein convergence of Cech persistence diagrams for samplings of submanifolds", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94624", "id": "ZehccYKkNH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ZehccYKkNH", "openreview": "https://openreview.net/forum?id=ZehccYKkNH", "poster": "/media/PosterPDFs/NeurIPS%202024/94624.png?t=1731614955.8030994", "project": "", "author_site": "Charles Arnal, David Cohen-Steiner, Vincent Divol", "tldr": "", "abstract": "Cech Persistence diagrams (PDs) are topological descriptors routinely used to capture the geometry of complex datasets. They are commonly compared using the Wasserstein distances $\\mathrm{OT}_p$; however, the extent to which PDs are stable with respect to these metrics remains poorly understood. \nWe partially close this gap by focusing on the case where datasets are sampled on an $m$-dimensional submanifold of $\\mathbb{R}^d$. Under this manifold hypothesis, we show that convergence with respect to the $\\mathrm{OT}_p$ metric happens exactly when $p>m$. We also provide improvements upon the bottleneck stability theorem in this case and prove new laws of large numbers for the total $\\alpha$-persistence of PDs. Finally, we show how these theoretical findings shed new light on the behavior of the feature maps on the space of PDs that are used in ML-oriented applications of Topological Data Analysis.", "keywords": "topological data analysis; persistence diagrams; simplicial complexes; differential geometry; Wasserstein distance", "primary_area": "learning_theory", "supplementary_material": "", "author": "Charles Arnal;David Cohen-Steiner;Vincent Divol", "authorids": "~Charles_Arnal1;~David_Cohen-Steiner1;~Vincent_Divol1", "gender": "M;M;M", "homepage": "https://charlesarnal.github.io/;;https://vincentdivol.github.io", "dblp": ";19/5307;", "google_scholar": "Pre7QicAAAAJ;;", "orcid": "0000-0002-3306-0574;;", "linkedin": "charles-arnal-049001183/;;", "or_profile": "~Charles_Arnal1;~David_Cohen-Steiner1;~Vincent_Divol1", "aff": "INRIA;INRIA;", "aff_domain": "inria.fr;inria.fr;", "position": "Postdoc;Researcher;", "bibtex": "@inproceedings{\narnal2024wasserstein,\ntitle={Wasserstein convergence of Cech persistence diagrams for samplings of submanifolds},\nauthor={Charles Arnal and David Cohen-Steiner and Vincent Divol},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ZehccYKkNH}\n}", "github": "", "reviewers": "xmoJ;G8s6;rgww;hrBD", "pdf_size": 1675486, "rating": "4;5;8;9", "confidence": "3;2;4;4", "soundness": "4;3;4;4", "novelty": "4;2;3;3", "presentation": "4;3;4;4", "wc_summary": "23;31;140;28", "wc_strengths": "14;23;96;97", "wc_weaknesses": "101;92;49;89", "wc_questions": "22;11;110;28", "wc_limitations": "10;9;40;60", "wc_review": "170;166;435;302", "wc_reply_reviewers": "76;20;7;11", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 2.0615528128088303 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 55.5, 48.869724779253666 ], "wc_strengths_avg": [ 57.5, 39.131189606246316 ], "wc_weaknesses_avg": [ 82.75, 19.97967717456916 ], "wc_questions_avg": [ 42.75, 39.30251264232352 ], "wc_limitations_avg": [ 29.75, 21.45198126048035 ], "wc_review_avg": [ 268.25, 110.73927713327372 ], "wc_reply_reviewers_avg": [ 28.5, 27.825348155953055 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8043996665398437, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10870768291485605108&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "inria.fr;inria.fr;", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "INRIA", "aff_unique_dep": "", "aff_unique_url": "https://www.inria.fr", "aff_unique_abbr": "INRIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "France" }, { "title": "PureGen: Universal Data Purification for Train-Time Poison Defense via Generative Model Dynamics", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94623", "id": "ZeihWodDVh", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ZeihWodDVh", "openreview": "https://openreview.net/forum?id=ZeihWodDVh", "poster": "/media/PosterPDFs/NeurIPS%202024/94623.png?t=1733194361.7836447", "project": "", "author_site": "Omead Pooladzandi, Sunay Bhat, Jeffrey Jiang, Alexander Branch, Gregory Pottie", "tldr": "", "abstract": "Train-time data poisoning attacks threaten machine learning models by introducing adversarial examples during training, leading to misclassification. Current defense methods often reduce generalization performance, are attack-specific, and impose significant training overhead. To address this, we introduce a set of universal data purification methods using a stochastic transform, $\\Psi(x)$, realized via iterative Langevin dynamics of Energy-Based Models (EBMs), Denoising Diffusion Probabilistic Models (DDPMs), or both. These approaches purify poisoned data with minimal impact on classifier generalization. Our specially trained EBMs and DDPMs provide state-of-the-art defense against various attacks (including Narcissus, Bullseye Polytope, Gradient Matching) on CIFAR-10, Tiny-ImageNet, and CINIC-10, without needing attack or classifier-specific information. We discuss performance trade-offs and show that our methods remain highly effective even with poisoned or distributionally shifted generative model training data.", "keywords": "Energy-Based Models;Diffusion;Langevin dynamics;Poisons;robustness;defense;Backdoor", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Omead Pooladzandi;Sunay Gajanan Bhat;Jeffrey Jiang;Alexander Branch;Gregory Pottie", "authorids": "~Omead_Pooladzandi1;~Sunay_Gajanan_Bhat1;~Jeffrey_Jiang1;~Alexander_Branch1;~Gregory_Pottie1", "gender": "M;M;M;M;", "homepage": ";https://sunaybhat.me;https://github.com/jimmery/;;", "dblp": "319/9453;;;;", "google_scholar": ";;;;", "orcid": ";;;;", "linkedin": "omead-pooladzandi-8a5051109;;;alex-branch-1x/;", "or_profile": "~Omead_Pooladzandi1;~Sunay_Gajanan_Bhat1;~Jeffrey_Jiang1;~Alexander_Branch1;~Gregory_Pottie1", "aff": "Department of Electrical Engineering, California Institute of Technology;University of California, Los Angeles;;University of California, Los Angeles;University of California-Los Angeles", "aff_domain": "ee.caltech.edu;ucla.edu;;ucla.edu;", "position": "Postdoc;PhD student;;Undergrad student;", "bibtex": "@inproceedings{\npooladzandi2024puregen,\ntitle={PureGen: Universal Data Purification for Train-Time Poison Defense via Generative Model Dynamics},\nauthor={Omead Pooladzandi and Sunay Gajanan Bhat and Jeffrey Jiang and Alexander Branch and Gregory Pottie},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ZeihWodDVh}\n}", "github": "", "reviewers": "n59K;16Y8;K3RB;gd22", "pdf_size": 5227523, "rating": "4;4;5;6", "confidence": "4;5;4;4", "soundness": "3;3;3;3", "novelty": "2;1;3;2", "presentation": "3;3;3;3", "wc_summary": "21;80;56;102", "wc_strengths": "83;53;94;61", "wc_weaknesses": "173;180;72;109", "wc_questions": "92;163;128;27", "wc_limitations": "4;1;4;4", "wc_review": "373;477;354;303", "wc_reply_reviewers": "195;381;230;133", "wc_reply_authors": "630;439;288;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;1", "rating_avg": [ 4.75, 0.82915619758885 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 64.75, 30.04475827827543 ], "wc_strengths_avg": [ 72.75, 16.467771555374455 ], "wc_weaknesses_avg": [ 133.5, 45.01388674620311 ], "wc_questions_avg": [ 102.5, 50.30159043211258 ], "wc_limitations_avg": [ 3.25, 1.299038105676658 ], "wc_review_avg": [ 376.75, 63.286550703921286 ], "wc_reply_reviewers_avg": [ 234.75, 91.30272449385068 ], "wc_reply_authors_avg": [ 339.25, 230.32734857154935 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:EzAnmAhOopYJ:scholar.google.com/&scioq=PureGen:+Universal+Data+Purification+for+Train-Time+Poison+Defense+via+Generative+Model+Dynamics&hl=en&as_sdt=0,10", "gs_version_total": 0, "email": "ee.caltech.edu;ucla.edu;;ucla.edu;", "author_num": 5, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "California Institute of Technology;University of California, Los Angeles", "aff_unique_dep": "Department of Electrical Engineering;", "aff_unique_url": "https://www.caltech.edu;https://www.ucla.edu", "aff_unique_abbr": "Caltech;UCLA", "aff_campus_unique_index": "0;1;1;1", "aff_campus_unique": "Pasadena;Los Angeles", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "$\\beta$-DPO: Direct Preference Optimization with Dynamic $\\beta$", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94622", "id": "ZfBuhzE556", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ZfBuhzE556", "openreview": "https://openreview.net/forum?id=ZfBuhzE556", "poster": "/media/PosterPDFs/NeurIPS%202024/94622.png?t=1731569490.035089", "project": "", "author_site": "Junkang Wu, Yuexiang Xie, Zhengyi Yang, Jiancan Wu, Jinyang Gao, Bolin Ding, Xiang Wang, Xiangnan He", "tldr": "", "abstract": "Direct Preference Optimization (DPO) has emerged as a compelling approach for training Large Language Models (LLMs) to adhere to human preferences. However, the performance of DPO is sensitive to the fine-tuning of its trade-off parameter $\\beta$, as well as to the quality of the preference data. We analyze the impact of $\\beta$ and data quality on DPO, uncovering that optimal $\\beta$ values vary with the informativeness of pairwise data. Addressing the limitations of static $\\beta$ values, we introduce a novel framework that dynamically calibrates $\\beta$ at the batch level, informed by data quality considerations. Additionally, our method incorporates $\\beta$-guided data filtering to safeguard against the influence of outliers. Through empirical evaluation, we demonstrate that our dynamic $\\beta$ adjustment technique significantly improves DPO\u2019s performance across a range of models and datasets, offering a more robust and adaptable training paradigm for aligning LLMs with human feedback. The code is available at \\url{https://anonymous.4open.science/r/beta-DPO-EE6C}.", "keywords": "Direct Preference Optimization;LLM's alignment", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Junkang Wu;Yuexiang Xie;Zhengyi Yang;Jiancan Wu;Jinyang Gao;Bolin Ding;Xiang Wang;Xiangnan He", "authorids": "~Junkang_Wu1;~Yuexiang_Xie1;~Zhengyi_Yang1;~Jiancan_Wu1;~Jinyang_Gao1;~Bolin_Ding3;~Xiang_Wang6;~Xiangnan_He1", "gender": "M;M;;M;M;M;M;M", "homepage": "https://junkangwu.github.io/;https://xieyxclack.github.io/;https://github.com/YangZhengyi98;https://wujcan.github.io/;;https://bolinding.github.io/;https://github.com/xiangwang1223;http://staff.ustc.edu.cn/~hexn", "dblp": "300/3885;232/2045;;257/4945;131/4047;46/3522.html;31/2864-10;59/1007", "google_scholar": "deBwV5oAAAAJ;https://scholar.google.com/citations?hl=zh-CN;;z9zW1UgAAAAJ;;AjYkTi8AAAAJ;https://scholar.google.com.sg/citations?user=HdhaQB0AAAAJ;https://scholar.google.com.sg/citations?user=X45Go24AAAAJ", "orcid": ";0009-0005-6545-7882;;0000-0002-6941-5218;;;0000-0002-6148-6329;0000-0001-8472-7992", "linkedin": ";;;;;bolin-ding-50a0119/;;", "or_profile": "~Junkang_Wu1;~Yuexiang_Xie1;~Zhengyi_Yang1;~Jiancan_Wu1;~Jinyang_Gao1;~Bolin_Ding3;~Xiang_Wang6;~Xiangnan_He1", "aff": "University of Science and Technology of China;Alibaba Group;University of Science and Technology of China;University of Science and Technology of China;Alibaba Group;Alibaba Group;University of Science and Technology of China;University of Science and Technology of China", "aff_domain": "ustc.edu.cn;alibaba-inc.com;ustc.edu.cn;ustc.edu.cn;alibaba-inc.com;alibaba-inc.com;ustc.edu.cn;ustc.edu.cn", "position": "PhD student;Staff;PhD student;Postdoc;Researcher;Senior Director;Full Professor;Professor", "bibtex": "@inproceedings{\nwu2024betadpo,\ntitle={\\${\\textbackslash}beta\\$-{DPO}: Direct Preference Optimization with Dynamic \\${\\textbackslash}beta\\$},\nauthor={Junkang Wu and Yuexiang Xie and Zhengyi Yang and Jiancan Wu and Jinyang Gao and Bolin Ding and Xiang Wang and Xiangnan He},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ZfBuhzE556}\n}", "github": "", "reviewers": "4RgK;uGzu;UzDb;3526;fXTA", "pdf_size": 617691, "rating": "3;6;6;6;6", "confidence": "5;3;3;4;4", "soundness": "1;3;3;3;3", "novelty": "2;3;2;3;3", "presentation": "4;3;2;3;3", "wc_summary": "47;52;94;121;382", "wc_strengths": "34;55;65;104;177", "wc_weaknesses": "114;29;73;65;643", "wc_questions": "96;95;276;31;120", "wc_limitations": "73;5;1;41;156", "wc_review": "364;236;509;362;1478", "wc_reply_reviewers": "48;50;579;13;0", "wc_reply_authors": "563;0;1015;0;443", "reply_reviewers": "1;1;3;1;0", "reply_authors": "4;1;3;1;3", "rating_avg": [ 5.4, 1.2 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 2.6, 0.8000000000000002 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 139.2, 124.44340078927448 ], "wc_strengths_avg": [ 87.0, 50.410316404482124 ], "wc_weaknesses_avg": [ 184.8, 230.68801442641097 ], "wc_questions_avg": [ 123.6, 81.72294659396466 ], "wc_limitations_avg": [ 55.2, 56.809858299418416 ], "wc_review_avg": [ 589.8, 452.4335973377751 ], "wc_reply_reviewers_avg": [ 138.0, 221.3567256714826 ], "wc_reply_authors_avg": [ 404.2, 381.1967471004966 ], "reply_reviewers_avg": [ 1.2, 0.9797958971132713 ], "reply_authors_avg": [ 2.4, 1.2000000000000002 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.801783725737273, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "email": "ustc.edu.cn;alibaba-inc.com;ustc.edu.cn;ustc.edu.cn;alibaba-inc.com;alibaba-inc.com;ustc.edu.cn;ustc.edu.cn", "author_num": 8, "aff_unique_index": "0;1;0;0;1;1;0;0", "aff_unique_norm": "University of Science and Technology of China;Alibaba Group", "aff_unique_dep": ";", "aff_unique_url": "http://www.ustc.edu.cn;https://www.alibaba.com", "aff_unique_abbr": "USTC;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "TripletCLIP: Improving Compositional Reasoning of CLIP via Synthetic Vision-Language Negatives", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94621", "id": "ZfRGRK5Kxl", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ZfRGRK5Kxl", "openreview": "https://openreview.net/forum?id=ZfRGRK5Kxl", "poster": "", "project": "", "author_site": "Maitreya Patel, Naga Sai Abhiram Kusumba, Sheng Cheng, Changhoon Kim, Tejas Gokhale, Chitta Baral, 'YZ' Yezhou Yang", "tldr": "", "abstract": "Contrastive Language-Image Pretraining (CLIP) models maximize the mutual information between text and visual modalities to learn representations. This makes the nature of the training data a significant factor in the efficacy of CLIP for downstream tasks. However, the lack of compositional diversity in contemporary image-text datasets limits the compositional reasoning ability of CLIP. We show that generating ``hard'' negative captions via in-context learning and synthesizing corresponding negative images with text-to-image generators offers a solution. We introduce a novel contrastive pre-training strategy that leverages these hard negative captions and images in an alternating fashion to train CLIP. We demonstrate that our method, named TripletCLIP, when applied to existing datasets such as CC3M and CC12M, enhances the compositional capabilities of CLIP, resulting in an absolute improvement of over 9% on the SugarCrepe benchmark on an equal computational budget, as well as improvements in zero-shot image classification and image retrieval. Our code, models, and data are available at: tripletclip.github.io.", "keywords": "Contrastive Learning;Synthetic data;CLIP;Compositionality;TripletCLIP", "primary_area": "machine_vision", "supplementary_material": "/attachment/6fe60dd337fe76a54d3c0b25d59be62b304747e4.zip", "author": "Maitreya Patel;Naga Sai Abhiram kusumba;Sheng Cheng;Changhoon Kim;Tejas Gokhale;Chitta Baral;Yezhou Yang", "authorids": "~Maitreya_Patel2;~Naga_Sai_Abhiram_kusumba1;~Sheng_Cheng1;~Changhoon_Kim1;~Tejas_Gokhale1;~Chitta_Baral1;~Yezhou_Yang1", "gender": "M;M;;M;;M;M", "homepage": "https://maitreyapatel.com;;https://shengcheng.github.io/;https://www.changhoonkim.com/;;http://chitta.orissalinks.com;https://yezhouyang.engineering.asu.edu", "dblp": "228/8407;;;;;b/ChittaBaral;78/7455", "google_scholar": "z--mlKgAAAAJ;1L5SJkcAAAAJ;TWAwdYsAAAAJ;z_04VyYAAAAJ;;9Yd716IAAAAJ;k2suuZgAAAAJ", "orcid": ";;0000-0001-7244-5998;;;0000-0002-7549-723X;", "linkedin": "maitreya-patel-a37a16139/;;sheng-cheng-661826118/;;;chitta-baral-8a8438b;", "or_profile": "~Maitreya_Patel2;~Naga_Sai_Abhiram_kusumba1;~Sheng_Cheng1;~Changhoon_Kim1;~Tejas_Gokhale1;~Chitta_Baral1;~Yezhou_Yang1", "aff": "Arizona State University;Arizona State University;Arizona State University;Arizona State University;;Arizona State University;Arizona State University", "aff_domain": "asu.edu;asu.edu;asu.edu;asu.edu;;asu.edu;asu.edu", "position": "PhD student;MS student;PhD student;PhD student;;Full Professor;Associate Professor", "bibtex": "@inproceedings{\npatel2024tripletclip,\ntitle={Triplet{CLIP}: Improving Compositional Reasoning of {CLIP} via Synthetic Vision-Language Negatives},\nauthor={Maitreya Patel and Naga Sai Abhiram kusumba and Sheng Cheng and Changhoon Kim and Tejas Gokhale and Chitta Baral and Yezhou Yang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ZfRGRK5Kxl}\n}", "github": "", "reviewers": "U1Yz;f4Gf;ZTUH;m3ja", "pdf_size": 2282482, "rating": "5;6;6;7", "confidence": "4;4;3;3", "soundness": "2;2;3;4", "novelty": "2;2;2;4", "presentation": "2;3;3;3", "wc_summary": "28;102;84;40", "wc_strengths": "36;115;61;78", "wc_weaknesses": "74;595;96;62", "wc_questions": "2;38;2;123", "wc_limitations": "1;9;1;9", "wc_review": "141;859;244;312", "wc_reply_reviewers": "48;48;9;29", "wc_reply_authors": "59;49;12;44", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 63.5, 30.475399915341555 ], "wc_strengths_avg": [ 72.5, 28.72716484444645 ], "wc_weaknesses_avg": [ 206.75, 224.48761101673296 ], "wc_questions_avg": [ 41.25, 49.43366767699924 ], "wc_limitations_avg": [ 5.0, 4.0 ], "wc_review_avg": [ 389.0, 278.09980222934354 ], "wc_reply_reviewers_avg": [ 33.5, 16.132265804901678 ], "wc_reply_authors_avg": [ 41.0, 17.592612085759182 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.7071067811865476, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18333539800331971514&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "asu.edu;asu.edu;asu.edu;asu.edu;;asu.edu;asu.edu", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Arizona State University", "aff_unique_dep": "", "aff_unique_url": "https://www.asu.edu", "aff_unique_abbr": "ASU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "IRCAN: Mitigating Knowledge Conflicts in LLM Generation via Identifying and Reweighting Context-Aware Neurons", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94620", "id": "ZfXRAqbBKX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ZfXRAqbBKX", "openreview": "https://openreview.net/forum?id=ZfXRAqbBKX", "poster": "/media/PosterPDFs/NeurIPS%202024/94620.png?t=1730358007.4628026", "project": "", "author_site": "Dan Shi, Renren Jin, Tianhao Shen, Weilong Dong, Xinwei Wu, Deyi Xiong", "tldr": "", "abstract": "It is widely acknowledged that large language models (LLMs) encode a vast reservoir of knowledge after being trained on mass data. Recent studies disclose knowledge conflicts in LLM generation, wherein outdated or incorrect parametric knowledge (i.e., encoded knowledge) contradicts new knowledge provided in the context. To mitigate such knowledge conflicts, we propose a novel framework, IRCAN (Identifying and Reweighting Context-Aware Neurons) to capitalize on neurons that are crucial in processing contextual cues. Specifically, IRCAN first identifies neurons that significantly contribute to context processing, utilizing a context-aware attribution score derived from integrated gradients. Subsequently, the identified context-aware neurons are strengthened via reweighting. In doing so, we steer LLMs to generate context-sensitive outputs with respect to the new knowledge provided in the context. Extensive experiments conducted across a variety of models and tasks demonstrate that IRCAN not only achieves remarkable improvements in handling knowledge conflicts but also offers a scalable, plug-and-play solution that can be integrated seamlessly with existing models. Our codes are released at https://github.com/danshi777/IRCAN.", "keywords": "Knowledge Conflicts;Large Language Models", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Dan Shi;Renren Jin;Tianhao Shen;Weilong Dong;Xinwei Wu;Deyi Xiong", "authorids": "~Dan_Shi1;~Renren_Jin1;~Tianhao_Shen2;~Weilong_Dong1;~Xinwei_Wu1;~Deyi_Xiong2", "gender": "F;M;M;M;M;M", "homepage": ";;;https://github.com/willowdong;;https://dyxiong.github.io", "dblp": ";329/4176;224/6043;;;55/6548", "google_scholar": "cO6hdhIAAAAJ;qW3oQDUAAAAJ;0iT8lqoAAAAJ;;https://scholar.google.com.hk/citations?user=zkbVg-IAAAAJ;QPLO3myO5PkC", "orcid": "0000-0003-2625-9478;;0000-0002-0526-3219;;0009-0001-2167-128X;0000-0002-2353-5038", "linkedin": ";renren-jin-222a861b3/;;;;", "or_profile": "~Dan_Shi1;~Renren_Jin1;~Tianhao_Shen2;~Weilong_Dong1;~Xinwei_Wu1;~Deyi_Xiong2", "aff": "Tianjin University;Tianjin University;Tianjin University;Tianjin University;Tianjin University;Tianjin University", "aff_domain": "tju.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu.cn", "position": "PhD student;PhD student;PhD student;postgraduate;PhD student;Full Professor", "bibtex": "@inproceedings{\nshi2024ircan,\ntitle={{IRCAN}: Mitigating Knowledge Conflicts in {LLM} Generation via Identifying and Reweighting Context-Aware Neurons},\nauthor={Dan Shi and Renren Jin and Tianhao Shen and Weilong Dong and Xinwei Wu and Deyi Xiong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ZfXRAqbBKX}\n}", "github": "", "reviewers": "NbaM;kYip;qHtz;CBV2", "pdf_size": 808617, "rating": "6;6;6;8", "confidence": "4;3;4;4", "soundness": "3;3;3;3", "novelty": "3;3;2;3", "presentation": "3;3;3;4", "wc_summary": "92;66;78;45", "wc_strengths": "76;26;34;63", "wc_weaknesses": "189;18;56;99", "wc_questions": "4;114;18;131", "wc_limitations": "1;17;4;31", "wc_review": "362;241;190;369", "wc_reply_reviewers": "10;118;14;17", "wc_reply_authors": "27;401;23;28", "reply_reviewers": "1;2;1;1", "reply_authors": "2;3;2;2", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 70.25, 17.239127008059313 ], "wc_strengths_avg": [ 49.75, 20.474068965401088 ], "wc_weaknesses_avg": [ 90.5, 63.68084484364196 ], "wc_questions_avg": [ 66.75, 56.29109609876148 ], "wc_limitations_avg": [ 13.25, 11.882234638316145 ], "wc_review_avg": [ 290.5, 77.17674520216566 ], "wc_reply_reviewers_avg": [ 39.75, 45.245856163852174 ], "wc_reply_authors_avg": [ 119.75, 162.3905400569873 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1188448658568295229&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "tju.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Tianjin University", "aff_unique_dep": "", "aff_unique_url": "http://www.tju.edu.cn", "aff_unique_abbr": "TJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "CYCLO: Cyclic Graph Transformer Approach to Multi-Object Relationship Modeling in Aerial Videos", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94619", "id": "Zg4zs0l2iH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=Zg4zs0l2iH", "openreview": "https://openreview.net/forum?id=Zg4zs0l2iH", "poster": "/media/PosterPDFs/NeurIPS%202024/94619.png?t=1731387897.015971", "project": "", "author_site": "Trong-Thuan Nguyen, Pha Nguyen, Xin Li, Jackson Cothren, Alper Yilmaz, Khoa Luu", "tldr": "", "abstract": "Video scene graph generation (VidSGG) has emerged as a transformative approach to capturing and interpreting the intricate relationships among objects and their temporal dynamics in video sequences. In this paper, we introduce the new AeroEye dataset that focuses on multi-object relationship modeling in aerial videos. Our AeroEye dataset features various drone scenes and includes a visually comprehensive and precise collection of predicates that capture the intricate relationships and spatial arrangements among objects. To this end, we propose the novel Cyclic Graph Transformer (CYCLO) approach that allows the model to capture both direct and long-range temporal dependencies by continuously updating the history of interactions in a circular manner. The proposed approach also allows one to handle sequences with inherent cyclical patterns and process object relationships in the correct sequential order. Therefore, it can effectively capture periodic and overlapping relationships while minimizing information loss. The extensive experiments on the AeroEye dataset demonstrate the effectiveness of the proposed CYCLO model, demonstrating its potential to perform scene understanding on drone videos. Finally, the CYCLO method consistently achieves State-of-the-Art (SOTA) results on two in-the-wild scene graph generation benchmarks, i.e., PVSG and ASPIRe.", "keywords": "Graph Transformer;Multi-Object Relationship Modeling;Aerial Videos", "primary_area": "machine_vision", "supplementary_material": "", "author": "Trong-Thuan Nguyen;Pha Nguyen;Xin Li;Jackson Cothren;Alper Yilmaz;Khoa Luu", "authorids": "~Trong-Thuan_Nguyen1;~Pha_Nguyen1;~Xin_Li2;~Jackson_Cothren1;~Alper_Yilmaz2;~Khoa_Luu2", "gender": "M;;M;M;M;M", "homepage": "https://scholar.google.com/citations?user=ty0Njf0AAAAJ&hl=vi;;;https://cast.uark.edu/directory/index/uid/jcothre/name/Jackson+David+Cothren/;https://u.osu.edu/pcvlab/publications;https://uark-cviu.github.io", "dblp": "299/1591;;09/1365-5;229/6846.html;11/1315;43/8092", "google_scholar": "ty0Njf0AAAAJ;;gMBvzGoAAAAJ;_WB9fo4AAAAJ;MeQC1XYAAAAJ;JPAl8-gAAAAJ", "orcid": "0000-0001-7729-2927;;0000-0003-2067-2763;0000-0002-5548-6955;0000-0003-0755-2628;0000-0003-2104-0901", "linkedin": "trongthuan205/;;;jcothren/;alper-yilmaz/;khoa-luu-90900215/", "or_profile": "~Trong-Thuan_Nguyen1;~Pha_Nguyen1;~Xin_Li2;~Jackson_Cothren1;~Alper_Yilmaz2;~Khoa_Luu2", "aff": "University of Arkansas - Fayetteville;;State University of New York at Albany;University of Arkansas - Fayetteville;Ohio State University, Columbus;University of Arkansas, Fayetteville", "aff_domain": "uark.edu;;albany.edu;uark.edu;osu.edu;uark.edu", "position": "MS student;;Full Professor;Full Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nnguyen2024cyclo,\ntitle={{CYCLO}: Cyclic Graph Transformer Approach to Multi-Object Relationship Modeling in Aerial Videos},\nauthor={Trong-Thuan Nguyen and Pha Nguyen and Xin Li and Jackson Cothren and Alper Yilmaz and Khoa Luu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=Zg4zs0l2iH}\n}", "github": "", "reviewers": "Zf4k;5Gaf;QFfh;nRfD", "pdf_size": 15567588, "rating": "4;6;7;7", "confidence": "4;3;3;5", "soundness": "2;2;3;3", "novelty": "2;3;4;3", "presentation": "2;3;4;3", "wc_summary": "91;32;59;78", "wc_strengths": "53;37;63;107", "wc_weaknesses": "155;5;30;81", "wc_questions": "18;229;25;58", "wc_limitations": "5;6;13;48", "wc_review": "322;309;190;372", "wc_reply_reviewers": "0;23;0;13", "wc_reply_authors": "38;19;38;0", "reply_reviewers": "0;1;0;1", "reply_authors": "2;2;2;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 65.0, 22.192341021172147 ], "wc_strengths_avg": [ 65.0, 25.96150997149434 ], "wc_weaknesses_avg": [ 67.75, 57.33835976028613 ], "wc_questions_avg": [ 82.5, 85.92002094971812 ], "wc_limitations_avg": [ 18.0, 17.592612085759182 ], "wc_review_avg": [ 298.25, 66.77714803733384 ], "wc_reply_reviewers_avg": [ 9.0, 9.669539802906858 ], "wc_reply_authors_avg": [ 23.75, 15.75396775418815 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18415176221730398285&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "uark.edu;;albany.edu;uark.edu;osu.edu;uark.edu", "author_num": 6, "aff_unique_index": "0;1;0;2;0", "aff_unique_norm": "University of Arkansas;State University of New York;Ohio State University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uark.edu;https://www.albany.edu;https://www.osu.edu", "aff_unique_abbr": "UARK;SUNY Albany;OSU", "aff_campus_unique_index": "0;1;0;2;0", "aff_campus_unique": "Fayetteville;Albany;Columbus", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Analyzing & Reducing the Need for Learning Rate Warmup in GPT Training", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94618", "id": "ZgDNrpS46k", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ZgDNrpS46k", "openreview": "https://openreview.net/forum?id=ZgDNrpS46k", "poster": "/media/PosterPDFs/NeurIPS%202024/94618.png?t=1733712668.6167436", "project": "", "author_site": "Atli Kosson, Bettina Messmer, Martin Jaggi", "tldr": "", "abstract": "Learning Rate Warmup is a popular heuristic for training neural networks, especially at larger batch sizes, despite limited understanding of its benefits. Warmup decreases the update size $\\Delta \\mathbf{w}_t = \\eta_t \\mathbf{u}_t$ early in training by using lower values for the learning rate $\\eta_t$. In this work we argue that warmup benefits training by keeping the overall size of $\\Delta \\mathbf{w}_t$ limited, counteracting large initial values of $\\mathbf{u}_t$. Focusing on small-scale GPT training with AdamW/Lion, we explore the following question: *Why and by which criteria are early updates $\\mathbf{u}_t$ too large?* We analyze different metrics for the update size including the $\\ell_2$-norm, resulting directional change, and impact on the representations of the network, providing a new perspective on warmup. In particular, we find that warmup helps counteract large angular updates as well as a limited critical batch size early in training. Finally, we show that the need for warmup can be significantly reduced or eliminated by modifying the optimizer to explicitly normalize $\\mathbf{u}_t$ based on the aforementioned metrics.", "keywords": "learning rate;warmup;schedule;AdamW;LION;rotational;GPT;LLAMA;transformer;deep learning;empirical;momentum;critical batch size;gradient diversity;signal-to-noise ratio", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Atli Kosson;Bettina Messmer;Martin Jaggi", "authorids": "~Atli_Kosson1;~Bettina_Messmer1;~Martin_Jaggi1", "gender": ";;M", "homepage": ";https://people.epfl.ch/bettina.messmer;https://mlo.epfl.ch", "dblp": ";;17/4402", "google_scholar": ";;https://scholar.google.ch/citations?user=r1TJBr8AAAAJ", "orcid": ";;0000-0003-1579-5558", "linkedin": ";;", "or_profile": "~Atli_Kosson1;~Bettina_Messmer1;~Martin_Jaggi1", "aff": ";EPFL - EPF Lausanne;EPFL", "aff_domain": ";epfl.ch;epfl.ch", "position": ";PhD student;Associate Professor", "bibtex": "@inproceedings{\nkosson2024analyzing,\ntitle={Analyzing \\& Reducing the Need for Learning Rate Warmup in {GPT} Training},\nauthor={Atli Kosson and Bettina Messmer and Martin Jaggi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ZgDNrpS46k}\n}", "github": "", "reviewers": "ramW;gjgG;pDv7;VUaC", "pdf_size": 1075549, "rating": "6;6;6;7", "confidence": "3;4;3;2", "soundness": "3;3;3;3", "novelty": "3;4;2;3", "presentation": "2;3;3;4", "wc_summary": "90;105;63;72", "wc_strengths": "50;30;64;57", "wc_weaknesses": "319;112;79;128", "wc_questions": "2;73;108;32", "wc_limitations": "46;2;1;38", "wc_review": "507;322;315;327", "wc_reply_reviewers": "22;24;23;20", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 82.5, 16.224980739587952 ], "wc_strengths_avg": [ 50.25, 12.695963925594622 ], "wc_weaknesses_avg": [ 159.5, 93.7669984589461 ], "wc_questions_avg": [ 53.75, 40.20183453525473 ], "wc_limitations_avg": [ 21.75, 20.44963324854507 ], "wc_review_avg": [ 367.75, 80.50892807633201 ], "wc_reply_reviewers_avg": [ 22.25, 1.479019945774904 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10328158130605691842&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": ";epfl.ch;epfl.ch", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "EPFL", "aff_unique_dep": "", "aff_unique_url": "https://www.epfl.ch", "aff_unique_abbr": "EPFL", "aff_campus_unique_index": "0", "aff_campus_unique": "Lausanne;", "aff_country_unique_index": "0;0", "aff_country_unique": "Switzerland" }, { "title": "VMamba: Visual State Space Model", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94617", "id": "ZgtLQQR1K7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ZgtLQQR1K7", "openreview": "https://openreview.net/forum?id=ZgtLQQR1K7", "poster": "", "project": "", "author_site": "Yue Liu, Yunjie Tian, Yuzhong Zhao, Hongtian Yu, Lingxi Xie, Yaowei Wang, Qixiang Ye, Jianbin Jiao, Yunfan Liu", "tldr": "", "abstract": "Designing computationally efficient network architectures remains an ongoing necessity in computer vision. In this paper, we adapt Mamba, a state-space language model, into VMamba, a vision backbone with linear time complexity. At the core of VMamba is a stack of Visual State-Space (VSS) blocks with the 2D Selective Scan (SS2D) module. By traversing along four scanning routes, SS2D bridges the gap between the ordered nature of 1D selective scan and the non-sequential structure of 2D vision data, which facilitates the collection of contextual information from various sources and perspectives. Based on the VSS blocks, we develop a family of VMamba architectures and accelerate them through a succession of architectural and implementation enhancements. Extensive experiments demonstrate VMamba\u2019s\n promising performance across diverse visual perception tasks, highlighting its superior input scaling efficiency compared to existing benchmark models. Source code is available at https://github.com/MzeroMiko/VMamba", "keywords": "State Space Model;Transformer;Computer Vision;Foundation Model", "primary_area": "machine_vision", "supplementary_material": "/attachment/550be5322feb4a54bbb9b3794247a515d062212f.zip", "author": "Yue Liu;Yunjie Tian;Yuzhong Zhao;Hongtian Yu;Lingxi Xie;Yaowei Wang;Qixiang Ye;Jianbin Jiao;Yunfan Liu", "authorids": "~Yue_Liu19;~Yunjie_Tian1;~Yuzhong_Zhao1;~Hongtian_Yu1;~Lingxi_Xie1;~Yaowei_Wang1;~Qixiang_Ye1;~Jianbin_Jiao1;~Yunfan_Liu3", "gender": "Not Specified;M;M;;M;M;M;M;M", "homepage": ";https://sunsmarterjie.github.io/;https://callsys.github.io/zhaoyuzhong.github.io-main/;https://yuhongtian17.github.io/;http://lingxixie.com/;https://dblp.org/pid/68/2992.html;http://people.ucas.ac.cn/~qxye?language=en;http://lamp.ucas.ac.cn/;https://yunfan0621.github.io/", "dblp": ";270/0554;42/8750;355/2588;123/2869;68/2992-1;06/4335;;170/8550-1.html", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.hk/citations?user=DuetWVcAAAAJ;tStQNm4AAAAJ;J80W2-QAAAAJ;EEMm7hwAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.hk/citations?user=tjEfgsEAAAAJ;;YPL33G0AAAAJ", "orcid": "0000-0001-5880-4923;0000-0002-5103-3748;0000-0002-2425-6786;;;0000-0002-6110-4036;;;0000-0001-8929-4866", "linkedin": ";;;;;yaowei-wang-971ab310/;;;", "or_profile": "~Yue_Liu19;~Yunjie_Tian1;~Yuzhong_Zhao1;~Hongtian_Yu1;~Lingxi_Xie1;~Yaowei_Wang1;~Qixiang_Ye1;~Jianbin_Jiao1;~Yunfan_Liu1", "aff": "University of Chinese Academy of Sciences;University of Chinese Academy of Sciences;University of Chinese Academy of Sciences;University of Chinese Academy of Sciences;Huawei Technologies Ltd.;Pengcheng Laboratory;University of Chinese Academy of Sciences;University of Chinese Academy of Sciences;University of Chinese Academy of Sciences", "aff_domain": "ucas.ac.cn;ucas.ac.cn;ucas.ac.cn;ucas.ac.cn;huawei.com;pcl.ac.cn;ucas.ac.cn;ucas.ac.cn;ucas.ac.cn", "position": "PhD student;PhD student;PhD student;MS student;Researcher;Full Professor;Full Professor;Full Professor;Postdoc", "bibtex": "@inproceedings{\nliu2024vmamba,\ntitle={{VM}amba: Visual State Space Model},\nauthor={Yue Liu and Yunjie Tian and Yuzhong Zhao and Hongtian Yu and Lingxi Xie and Yaowei Wang and Qixiang Ye and Jianbin Jiao and Yunfan Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ZgtLQQR1K7}\n}", "github": "", "reviewers": "c4kg;dvTH;ZfkW", "pdf_size": 3076021, "rating": "7;7;8", "confidence": "5;3;4", "soundness": "4;3;4", "novelty": "3;3;3", "presentation": "4;3;4", "wc_summary": "186;58;101", "wc_strengths": "147;48;173", "wc_weaknesses": "248;13;101", "wc_questions": "144;64;38", "wc_limitations": "6;7;29", "wc_review": "731;190;442", "wc_reply_reviewers": "113;15;36", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 7.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 115.0, 53.18521097698745 ], "wc_strengths_avg": [ 122.66666666666667, 53.85371131335539 ], "wc_weaknesses_avg": [ 120.66666666666667, 96.94099006898762 ], "wc_questions_avg": [ 82.0, 45.10727953076606 ], "wc_limitations_avg": [ 14.0, 10.614455552060438 ], "wc_review_avg": [ 454.3333333333333, 221.0344367337864 ], "wc_reply_reviewers_avg": [ 54.666666666666664, 42.129430198958175 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1477, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2138825819006865034&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 12, "email": "ucas.ac.cn;ucas.ac.cn;ucas.ac.cn;ucas.ac.cn;huawei.com;pcl.ac.cn;ucas.ac.cn;ucas.ac.cn;ucas.ac.cn", "author_num": 9, "aff_unique_index": "0;0;0;0;1;2;0;0;0", "aff_unique_norm": "University of Chinese Academy of Sciences;Huawei;Pengcheng Laboratory", "aff_unique_dep": ";Huawei Technologies;", "aff_unique_url": "http://www.ucas.ac.cn;https://www.huawei.com;", "aff_unique_abbr": "UCAS;Huawei;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Contextual Active Model Selection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94616", "id": "ZizwgYErtQ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ZizwgYErtQ", "openreview": "https://openreview.net/forum?id=ZizwgYErtQ", "poster": "/media/PosterPDFs/NeurIPS%202024/94616.png?t=1732126086.8916752", "project": "", "author_site": "Xuefeng Liu, Fangfang Xia, Rick Stevens, Yuxin Chen", "tldr": "", "abstract": "While training models and labeling data are resource-intensive, a wealth of pre-trained models and unlabeled data exists. To effectively utilize these resources, we present an approach to actively select pre-trained models while minimizing labeling costs. We frame this as an online contextual active model selection problem: At each round, the learner receives an unlabeled data point as a context. The objective is to adaptively select the best model to make a prediction while limiting label requests. To tackle this problem, we propose CAMS, a contextual active model selection algorithm that relies on two novel components: (1) a contextual model selection mechanism, which leverages context information to make informed decisions about which model is likely to perform best for a given context, and (2)\nan active query component, which strategically chooses when to request labels for data points, minimizing the overall labeling cost. We provide rigorous theoretical analysis for the regret and query complexity under both adversarial and stochastic settings. Furthermore, we demonstrate the effectiveness of our algorithm on a diverse collection of benchmark classification tasks. Notably, CAMS requires substantially less labeling effort (less than 10%) compared to existing methods on CIFAR10 and DRIFT benchmarks, while achieving similar or better accuracy.", "keywords": "model selection;active learning;online learning", "primary_area": "online_learning", "supplementary_material": "/attachment/41d9cb2430c1fafe17a800cddcd2b1728b8e6afc.zip", "author": "Xuefeng Liu;Fangfang Xia;Rick L. Stevens;Yuxin Chen", "authorids": "~Xuefeng_Liu2;~Fangfang_Xia1;~Rick_L._Stevens1;~Yuxin_Chen1", "gender": ";M;;", "homepage": ";;https://computerscience.uchicago.edu/people/profile/rick-stevens/;http://yuxinchen.org/", "dblp": ";90/6711;;11/5123-1", "google_scholar": ";HN_kkdIAAAAJ;2oSSsLYAAAAJ;-k1N7HAAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Xuefeng_Liu2;~Fangfang_Xia1;~Rick_L._Stevens1;~Yuxin_Chen1", "aff": ";Argonne National Laboratory;University of Chicago;University of Chicago", "aff_domain": ";anl.gov;uchicago.edu;uchicago.edu", "position": ";Computer Scientist;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nliu2024contextual,\ntitle={Contextual Active Model Selection},\nauthor={Xuefeng Liu and Fangfang Xia and Rick L. Stevens and Yuxin Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ZizwgYErtQ}\n}", "github": "", "reviewers": "E9VN;FX2o;PzjT;jCcC", "pdf_size": 26077365, "rating": "4;5;5;7", "confidence": "3;3;3;3", "soundness": "2;3;3;4", "novelty": "2;2;2;4", "presentation": "3;2;3;3", "wc_summary": "76;69;59;36", "wc_strengths": "30;79;42;57", "wc_weaknesses": "196;191;90;147", "wc_questions": "3;54;19;6", "wc_limitations": "1;15;15;39", "wc_review": "306;408;225;285", "wc_reply_reviewers": "0;0;14;9", "wc_reply_authors": "0;239;0;0", "reply_reviewers": "0;0;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 5.25, 1.0897247358851685 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 60.0, 15.116216457830975 ], "wc_strengths_avg": [ 52.0, 18.289341158171883 ], "wc_weaknesses_avg": [ 156.0, 42.60868456077939 ], "wc_questions_avg": [ 20.5, 20.254629100529094 ], "wc_limitations_avg": [ 17.5, 13.665650368716449 ], "wc_review_avg": [ 306.0, 65.96590028188807 ], "wc_reply_reviewers_avg": [ 5.75, 6.015604707757983 ], "wc_reply_authors_avg": [ 59.75, 103.49003575224042 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:PwDE6Nup4MIJ:scholar.google.com/&scioq=Contextual+Active+Model+Selection&hl=en&as_sdt=0,10", "gs_version_total": 2, "email": ";anl.gov;uchicago.edu;uchicago.edu", "author_num": 4, "aff_unique_index": "0;1;1", "aff_unique_norm": "Argonne National Laboratory;University of Chicago", "aff_unique_dep": ";", "aff_unique_url": "https://www.anl.gov;https://www.uchicago.edu", "aff_unique_abbr": "ANL;UChicago", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "How does Inverse RL Scale to Large State Spaces? A Provably Efficient Approach", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94615", "id": "ZjgcYMkCmX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ZjgcYMkCmX", "openreview": "https://openreview.net/forum?id=ZjgcYMkCmX", "poster": "/media/PosterPDFs/NeurIPS%202024/94615.png?t=1731360450.5012991", "project": "", "author_site": "Filippo Lazzati, Mirco Mutti, Alberto Maria Metelli", "tldr": "", "abstract": "In online Inverse Reinforcement Learning (IRL), the learner can collect samples about the dynamics of the environment to improve its\nestimate of the reward function. Since IRL suffers from identifiability issues, many theoretical works on online IRL focus on estimating the entire set of rewards that explain the demonstrations, named the *feasible reward set*. However, none of the algorithms available in literature can scale to problems with large state spaces. In this paper, we focus on the online IRL problem in Linear Markov Decision\nProcesses (MDPs). We show that the structure offered by Linear MDPs is not sufficient for efficiently estimating the feasible set when the state space is large. As a consequence, we introduce the novel framework of *rewards compatibility*, which generalizes the notion of feasible set, and we develop CATY-IRL, a sample efficient algorithm whose complexity is independent of the size of the state space in Linear MDPs. When restricted to the tabular setting, we demonstrate that CATY-IRL is minimax optimal up to logarithmic factors. As a by-product, we show that Reward-Free Exploration (RFE) enjoys the same worst-case rate, improving over the state-of-the-art lower bound. Finally, we devise a unifying framework for IRL and RFE that may be of independent interest.", "keywords": "Inverse Reinforcement Learning;Reward-Free Exploration;Linear MDPs", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Filippo Lazzati;Mirco Mutti;Alberto Maria Metelli", "authorids": "~Filippo_Lazzati2;~Mirco_Mutti1;~Alberto_Maria_Metelli2", "gender": "M;;M", "homepage": "https://filippolazzati.github.io/;;https://albertometelli.github.io/", "dblp": "345/8703;222/2815;209/4941", "google_scholar": "lIf4g_IAAAAJ;GlLkJ9UAAAAJ;R31IsPwAAAAJ", "orcid": "0009-0004-2561-417X;;0000-0002-3424-5212", "linkedin": "filippo-lazzati/;;", "or_profile": "~Filippo_Lazzati2;~Mirco_Mutti1;~Alberto_Maria_Metelli2", "aff": "Politecnico di Milano;Technion - Israel Institute of Technology;Politecnico di Milano", "aff_domain": "polimi.it;technion.ac.il;polimi.it", "position": "PhD student;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nlazzati2024how,\ntitle={How does Inverse {RL} Scale to Large State Spaces? A Provably Efficient Approach},\nauthor={Filippo Lazzati and Mirco Mutti and Alberto Maria Metelli},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ZjgcYMkCmX}\n}", "github": "", "reviewers": "KyLX;AxcE;x3io;K3ze", "pdf_size": 736971, "rating": "5;5;7;8", "confidence": "3;3;3;3", "soundness": "2;3;4;4", "novelty": "3;2;3;4", "presentation": "2;3;2;3", "wc_summary": "71;84;138;86", "wc_strengths": "37;33;131;67", "wc_weaknesses": "328;82;217;226", "wc_questions": "3;46;21;26", "wc_limitations": "7;1;10;15", "wc_review": "446;246;517;420", "wc_reply_reviewers": "10;11;188;0", "wc_reply_authors": "0;0;355;0", "reply_reviewers": "1;1;2;0", "reply_authors": "1;1;2;1", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 94.75, 25.625914617823888 ], "wc_strengths_avg": [ 67.0, 39.21734310225516 ], "wc_weaknesses_avg": [ 213.25, 87.42246564813875 ], "wc_questions_avg": [ 24.0, 15.313392831113555 ], "wc_limitations_avg": [ 8.25, 5.0682837331783235 ], "wc_review_avg": [ 407.25, 99.63778148875055 ], "wc_reply_reviewers_avg": [ 52.25, 78.49323219233618 ], "wc_reply_authors_avg": [ 88.75, 153.71950917173785 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12519351262000512567&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 2, "email": "polimi.it;technion.ac.il;polimi.it", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Politecnico di Milano;Technion - Israel Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.polimi.it;https://www.technion.ac.il/en/", "aff_unique_abbr": "Polimi;Technion", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Italy;Israel" }, { "title": "Generalizable Implicit Motion Modeling for Video Frame Interpolation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94614", "id": "ZlpJLQsr2v", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ZlpJLQsr2v", "openreview": "https://openreview.net/forum?id=ZlpJLQsr2v", "poster": "/media/PosterPDFs/NeurIPS%202024/94614.png?t=1731583751.4287896", "project": "", "author_site": "Zujin Guo, Wei Li, Chen Change Loy", "tldr": "", "abstract": "Motion modeling is critical in flow-based Video Frame Interpolation (VFI). Existing paradigms either consider linear combinations of bidirectional flows or directly predict bilateral flows for given timestamps without exploring favorable motion priors, thus lacking the capability of effectively modeling spatiotemporal dynamics in real-world videos. To address this limitation, in this study, we introduce Generalizable Implicit Motion Modeling (GIMM), a novel and effective approach to motion modeling for VFI. Specifically, to enable GIMM as an effective motion modeling paradigm, we design a motion encoding pipeline to model spatiotemporal motion latent from bidirectional flows extracted from pre-trained flow estimators, effectively representing input-specific motion priors. Then, we implicitly predict arbitrary-timestep optical flows within two adjacent input frames via an adaptive coordinate-based neural network, with spatiotemporal coordinates and motion latent as inputs. Our GIMM can be easily integrated with existing flow-based VFI works by supplying accurately modeled motion. We show that GIMM performs better than the current state of the art on standard VFI benchmarks.", "keywords": "Motion Modeling; Optical Flow; Video Frame Interpolation", "primary_area": "machine_vision", "supplementary_material": "/attachment/1bf7f5beab474c865eeec9d69e2cd7ed1ddcd9ac.zip", "author": "Zujin Guo;Wei Li;Chen Change Loy", "authorids": "~Zujin_Guo2;~Wei_Li51;~Chen_Change_Loy2", "gender": "M;M;M", "homepage": "https://weivision.github.io/;https://www.mmlab-ntu.com/person/ccloy/index.html;", "dblp": ";01/5855;324/8297", "google_scholar": "41KAd6AAAAAJ;https://scholar.google.co.uk/citations?user=559LF80AAAAJ;", "orcid": ";0000-0001-5345-1591;", "linkedin": ";;zujin-guo-652b0417a/", "or_profile": "~Wei_Li51;~Chen_Change_Loy2;~ZUJIN_GUO1", "aff": "Nanyang Technological University;Nanyang Technological University;Nanyang Technological University", "aff_domain": "ntu.edu.sg;ntu.edu.sg;ntu.edu.sg", "position": "Postdoc;Full Professor;Researcher", "bibtex": "@inproceedings{\nguo2024generalizable,\ntitle={Generalizable Implicit Motion Modeling for Video Frame Interpolation},\nauthor={Zujin Guo and Wei Li and Chen Change Loy},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ZlpJLQsr2v}\n}", "github": "", "reviewers": "ZRiZ;LUi9;d4Ah;rbyn", "pdf_size": 7416325, "rating": "3;4;5;7", "confidence": "4;4;4;5", "soundness": "2;2;3;3", "novelty": "2;2;3;3", "presentation": "2;2;3;3", "wc_summary": "159;69;54;58", "wc_strengths": "40;29;56;32", "wc_weaknesses": "729;175;49;181", "wc_questions": "475;60;16;20", "wc_limitations": "7;5;1;6", "wc_review": "1410;338;176;297", "wc_reply_reviewers": "1939;0;0;92", "wc_reply_authors": "2836;52;52;52", "reply_reviewers": "5;0;0;1", "reply_authors": "7;2;2;2", "rating_avg": [ 4.75, 1.479019945774904 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 85.0, 43.07551508687969 ], "wc_strengths_avg": [ 39.25, 10.473180032826706 ], "wc_weaknesses_avg": [ 283.5, 262.55428010222954 ], "wc_questions_avg": [ 142.75, 192.59461960293697 ], "wc_limitations_avg": [ 4.75, 2.277608394786075 ], "wc_review_avg": [ 555.25, 497.07110909808466 ], "wc_reply_reviewers_avg": [ 507.75, 827.185703152563 ], "wc_reply_authors_avg": [ 748.0, 1205.5073620679386 ], "reply_reviewers_avg": [ 1.5, 2.0615528128088303 ], "reply_authors_avg": [ 3.25, 2.165063509461097 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8783100656536799, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4225870627539366306&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "ntu.edu.sg;ntu.edu.sg;ntu.edu.sg", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Nanyang Technological University", "aff_unique_dep": "", "aff_unique_url": "https://www.ntu.edu.sg", "aff_unique_abbr": "NTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Singapore" }, { "title": "Truthful High Dimensional Sparse Linear Regression", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94613", "id": "ZmIAd3JaZN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ZmIAd3JaZN", "openreview": "https://openreview.net/forum?id=ZmIAd3JaZN", "poster": "/media/PosterPDFs/NeurIPS%202024/94613.png?t=1731545559.54652", "project": "", "author_site": "Liyang Zhu, Amina Manseur, Meng Ding, Jinyan Liu, Jinhui Xu, Di Wang", "tldr": "", "abstract": "We study the problem of fitting the high dimensional sparse linear regression model, where the data are provided by strategic or self-interested agents (individuals) who prioritize their privacy of data disclosure. In contrast to the classical setting, our focus is on designing mechanisms that can effectively incentivize most agents to truthfully report their data while preserving the privacy of individual reports. Simultaneously, we seek an estimator which should be close to the underlying parameter. \nWe attempt to solve the problem by deriving a novel private estimator that has a closed-form expression. \nBased on the estimator, we propose a mechanism which has the following properties via some appropriate design of the computation and payment scheme: (1) the mechanism is $(o(1), O(n^{-\\Omega({1})}))$-jointly differentially private, where $n$ is the number of agents; (2) it is an $o(\\frac{1}{n})$-approximate Bayes Nash equilibrium for a $(1-o(1))$-fraction of agents to truthfully report their data; (3) the output could achieve an error of $o(1)$ to the underlying parameter; (4) it is individually rational for a $(1-o(1))$ fraction of agents in the mechanism; (5) the payment budget required from the analyst to run the mechanism is $o(1)$. To the best of our knowledge, this is the first study on designing truthful (and privacy-preserving) mechanisms for high dimensional sparse linear regression.", "keywords": "truthful mechanism;Bayesian game;linear regression;differential privacy", "primary_area": "privacy", "supplementary_material": "", "author": "Liyang Zhu;Amina Manseur;Meng Ding;Jinyan Liu;Jinhui Xu;Di Wang", "authorids": "~Liyang_Zhu1;~Amina_Manseur1;~Meng_Ding3;~Jinyan_Liu1;~Jinhui_Xu1;~Di_Wang1", "gender": "M;F;F;;M;", "homepage": "https://cowboyliyang.github.io/homepage/#;;;;https://www.cse.buffalo.edu/~jinhui/;", "dblp": "189/1681;;;149/9402;24/6437-1.html;", "google_scholar": ";;Ipwvf8oAAAAJ;;https://scholar.google.com/citations?hl=en;", "orcid": ";;;;;", "linkedin": ";amina-manseur/;;;;", "or_profile": "~Liyang_Zhu1;~Amina_Manseur1;~Meng_Ding3;~Jinyan_Liu1;~Jinhui_Xu1;~Di_Wang1", "aff": "Duke University;Ecole Nationale de la Statistique et de l'Administration Economique;State University of New York at Buffalo;Beijing Institute of Technology;University at Buffalo, State University of New York;", "aff_domain": "duke.edu;ensae.fr;buffalo.edu;bit.edu.cn;buffalo.edu;", "position": "PhD student;MS student;PhD student;Full Professor;Full Professor;", "bibtex": "@inproceedings{\nzhu2024truthful,\ntitle={Truthful High Dimensional Sparse Linear Regression},\nauthor={Liyang Zhu and Amina Manseur and Meng Ding and Jinyan Liu and Jinhui Xu and Di Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ZmIAd3JaZN}\n}", "github": "", "reviewers": "BxFH;81kj;YR32", "pdf_size": 507494, "rating": "3;5;7", "confidence": "4;2;3", "soundness": "3;2;3", "novelty": "2;4;3", "presentation": "2;2;3", "wc_summary": "41;42;58", "wc_strengths": "47;39;39", "wc_weaknesses": "89;151;22", "wc_questions": "202;199;2", "wc_limitations": "1;50;2", "wc_review": "380;481;123", "wc_reply_reviewers": "25;40;10", "wc_reply_authors": "1021;287;0", "reply_reviewers": "1;1;1", "reply_authors": "5;3;1", "rating_avg": [ 5.0, 1.632993161855452 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 47.0, 7.788880963698615 ], "wc_strengths_avg": [ 41.666666666666664, 3.7712361663282534 ], "wc_weaknesses_avg": [ 87.33333333333333, 52.677214133711445 ], "wc_questions_avg": [ 134.33333333333334, 93.58181209805437 ], "wc_limitations_avg": [ 17.666666666666668, 22.866763848189994 ], "wc_review_avg": [ 328.0, 150.7072216805375 ], "wc_reply_reviewers_avg": [ 25.0, 12.24744871391589 ], "wc_reply_authors_avg": [ 436.0, 429.93100221624707 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.0, 1.632993161855452 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16491442353410059438&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "duke.edu;ensae.fr;buffalo.edu;bit.edu.cn;buffalo.edu;", "author_num": 6, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "Duke University;Ecole Nationale de la Statistique et de l'Administration Economique;State University of New York at Buffalo;Beijing Institute of Technology;University at Buffalo", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.duke.edu;https://ensae.fr;https://www.buffalo.edu;http://www.bit.edu.cn/;https://www.buffalo.edu", "aff_unique_abbr": "Duke;ENSAE;SUNY Buffalo;BIT;UB", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Buffalo", "aff_country_unique_index": "0;1;0;2;0", "aff_country_unique": "United States;France;China" }, { "title": "Concentrate Attention: Towards Domain-Generalizable Prompt Optimization for Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94612", "id": "ZoarR5QmFX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ZoarR5QmFX", "openreview": "https://openreview.net/forum?id=ZoarR5QmFX", "poster": "/media/PosterPDFs/NeurIPS%202024/94612.png?t=1730991767.3236208", "project": "", "author_site": "Chengzhengxu Li, Xiaoming Liu, Zhaohan Zhang, Yichen Wang, Chen Liu, Yu Lan, Chao Shen", "tldr": "", "abstract": "Recent advances in prompt optimization have notably enhanced the performance of pre-trained language models (PLMs) on downstream tasks. However, the potential of optimized prompts on domain generalization has been under-explored. To explore the nature of prompt generalization on unknown domains, we conduct pilot experiments and find that (i) Prompts gaining more attention weight from PLMs\u2019 deep layers are more generalizable and (ii) Prompts with more stable attention distributions in PLMs\u2019 deep layers are more generalizable. Thus, we offer a fresh objective towards domain-generalizable prompts optimization named ''Concentration'', which represents the ''lookback'' attention from the current decoding token to the prompt tokens, to increase the attention strength on prompts and reduce the fluctuation of attention distribution.\nWe adapt this new objective to popular soft prompt and hard prompt optimization methods, respectively. Extensive experiments demonstrate that our idea improves comparison prompt optimization methods by 1.42% for soft prompt generalization and 2.16% for hard prompt generalization in accuracy on the multi-source domain generalization setting, while maintaining satisfying in-domain performance. The promising results validate the effectiveness of our proposed prompt optimization objective and provide key insights into domain-generalizable prompts.", "keywords": "Prompt Optimization;Domain Generalization;Few-shot Learning;Pre-trained Language Models", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Chengzhengxu Li;Xiaoming Liu;Zhaohan Zhang;Yichen Wang;Chen Liu;Yu Lan;Chao Shen", "authorids": "~Chengzhengxu_Li1;~Xiaoming_Liu8;~Zhaohan_Zhang2;~Yichen_Wang4;~Chen_Liu23;~Yu_Lan2;~Chao_Shen2", "gender": "M;M;M;M;M;;M", "homepage": "https://github.com/czx-li;https://gr.xjtu.edu.cn/zh/web/xm.liu;;https://yichenzw.com;https://github.com/lccc0528;https://gr.xjtu.edu.cn/web/lanyu-ly/home;http://gr.xjtu.edu.cn/web/cshen", "dblp": ";;280/0085;;;;48/4825-1", "google_scholar": "NSWsjzcAAAAJ;FepcM0IAAAAJ;dbKJKmgAAAAJ;86XiOcsAAAAJ;;;m6QY7-wAAAAJ", "orcid": ";0000-0003-0901-6028;;;;;0000-0002-6959-0569", "linkedin": ";;;;;;", "or_profile": "~Chengzhengxu_Li1;~Xiaoming_Liu8;~Zhaohan_Zhang2;~Yichen_Wang4;~Chen_Liu23;~Yu_Lan2;~Chao_Shen2", "aff": "Xi'an Jiaotong University;Xi'an Jiaotong University;Queen Mary University of London;Xi'an Jiaotong University;Xi'an Jiaotong University;;Xi\u2019an Jiaotong University", "aff_domain": "xjtu.edu.cn;xjtu.edu.cn;qmul.ac.uk;xjtu.edu.cn;xjtu.edu.cn;;xjtu.edu.cn", "position": "MS student;Associate Professor;PhD student;Undergrad student;MS student;;Full Professor", "bibtex": "@inproceedings{\nli2024concentrate,\ntitle={Concentrate Attention: Towards Domain-Generalizable Prompt Optimization for Language Models},\nauthor={Chengzhengxu Li and Xiaoming Liu and Zhaohan Zhang and Yichen Wang and Chen Liu and Yu Lan and Chao Shen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ZoarR5QmFX}\n}", "github": "", "reviewers": "rw17;pV2m;a24D;CLup", "pdf_size": 1769288, "rating": "5;6;6;6", "confidence": "5;3;4;3", "soundness": "3;2;3;2", "novelty": "2;3;3;2", "presentation": "3;2;4;3", "wc_summary": "100;110;107;82", "wc_strengths": "42;78;65;139", "wc_weaknesses": "176;71;42;135", "wc_questions": "5;19;15;77", "wc_limitations": "39;1;10;101", "wc_review": "362;279;239;534", "wc_reply_reviewers": "0;23;0;105", "wc_reply_authors": "44;77;44;361", "reply_reviewers": "0;1;0;1", "reply_authors": "2;3;2;4", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 99.75, 10.871407452579449 ], "wc_strengths_avg": [ 81.0, 35.88175023601831 ], "wc_weaknesses_avg": [ 106.0, 52.58802144975603 ], "wc_questions_avg": [ 29.0, 28.178005607210743 ], "wc_limitations_avg": [ 37.75, 39.124001584705006 ], "wc_review_avg": [ 353.5, 113.26186472065521 ], "wc_reply_reviewers_avg": [ 32.0, 43.17985641476822 ], "wc_reply_authors_avg": [ 131.5, 133.18502168036764 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2798697502410474819&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "xjtu.edu.cn;xjtu.edu.cn;qmul.ac.uk;xjtu.edu.cn;xjtu.edu.cn;;xjtu.edu.cn", "author_num": 7, "aff_unique_index": "0;0;1;0;0;0", "aff_unique_norm": "Xi'an Jiao Tong University;Queen Mary University of London", "aff_unique_dep": ";", "aff_unique_url": "https://www.xjtu.edu.cn;https://www.qmul.ac.uk", "aff_unique_abbr": "XJTU;QMUL", "aff_campus_unique_index": "1", "aff_campus_unique": ";London", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "China;United Kingdom" }, { "title": "TransVIP: Speech to Speech Translation System with Voice and Isochrony Preservation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94611", "id": "ZpVTRQVX5b", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ZpVTRQVX5b", "openreview": "https://openreview.net/forum?id=ZpVTRQVX5b", "poster": "/media/PosterPDFs/NeurIPS%202024/94611.png?t=1731311657.862629", "project": "", "author_site": "Chenyang Le, Yao Qian, Dongmei Wang, Long Zhou, Shujie LIU, Xiaofei Wang, Midia Yousefi, Yanmin Qian, Jinyu Li, Michael Zeng", "tldr": "", "abstract": "There is a rising interest and trend in research towards directly translating speech from one language to another, known as end-to-end speech-to-speech translation. However, most end-to-end models struggle to outperform cascade models, i.e., a pipeline framework by concatenating speech recognition, machine translation and text-to-speech models. The primary challenges stem from the inherent complexities involved in direct translation tasks and the scarcity of data. In this study, we introduce a novel model framework TransVIP that leverages diverse datasets in a cascade fashion yet facilitates end-to-end inference through joint probability. Furthermore, we propose two separated encoders to preserve the speaker\u2019s voice characteristics and isochrony from the source speech during the translation process, making it highly suitable for scenarios such as video dubbing. Our experiments on the French-English language pair demonstrate that our model outperforms the current state-of-the-art speech-to-speech translation model.", "keywords": "Speech to speech translation;Video dubbing;Voice preservation;Isochrony control", "primary_area": "speech_and_audio", "supplementary_material": "/attachment/8ad31ddfc57b9055103cea4b2623c61b63002223.zip", "author": "Chenyang Le;Yao Qian;Dongmei Wang;Long Zhou;Shujie LIU;Xiaofei Wang;Midia Yousefi;Yanmin Qian;Jinyu Li;Michael Zeng", "authorids": "~Chenyang_Le2;~Yao_Qian2;~Dongmei_Wang3;~Long_Zhou2;~Shujie_LIU1;~Xiaofei_Wang9;~Midia_Yousefi1;~Yanmin_Qian1;~Jinyu_Li1;~Michael_Zeng1", "gender": "F;F;;M;M;;M;M;M;M", "homepage": "https://www.microsoft.com/en-us/research/people/yaoqian/;https://www.microsoft.com/en-us/research/people/dowan/;;https://www.microsoft.com/en-us/research/people/shujliu/;https://www.microsoft.com/en-us/research/people/xiaofewa/;;https://x-lance.sjtu.edu.cn/en/members/yanmin-qian;https://www.microsoft.com/en-us/research/people/jinyli;https://www.microsoft.com/en-us/research/people/nzeng/;https://github.com/nethermanpro", "dblp": ";65/4883;;;58/6576-9;;07/8638;87/4873-1;232/1866-1.html;301/7724", "google_scholar": "o7OfErXuEJIC;DB1bNC0AAAAJ;ZnwgSXIAAAAJ;6mNya-wAAAAJ;pZkELMoAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;grUvupMAAAAJ;;", "orcid": ";;;0009-0008-2599-6752;;;;0000-0002-1089-9748;;", "linkedin": ";dongmei-wang-99b20637/;;;;midia-yousefi-22b94190?utm_source=share&utm_campaign=share_via&utm_content=profile&utm_medium=ios_app;;;michaelnanshanzeng/;", "or_profile": "~Yao_Qian2;~Dongmei_Wang3;~Long_Zhou2;~Shujie_LIU1;~Xiaofei_Wang9;~Midia_Yousefi1;~Yanmin_Qian1;~Jinyu_Li1;~Michael_Zeng1;~chenyang_le1", "aff": "Microsoft;Microsoft;Microsoft Research Asia;Microsoft;Microsoft;Microsoft;Shanghai Jiaotong University;Microsoft;Microsoft;Shanghai Jiaotong University", "aff_domain": "microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com;sjtu.edu.cn;microsoft.com;microsoft.com;sjtu.edu.cn", "position": "Principal Researcher;Researcher;Researcher;Researcher;Researcher;Senior Research Scientist;Full Professor;Researcher;Vice President Research Manager;PhD student", "bibtex": "@inproceedings{\nle2024transvip,\ntitle={Trans{VIP}: Speech to Speech Translation System with Voice and Isochrony Preservation},\nauthor={Chenyang Le and Yao Qian and Dongmei Wang and Long Zhou and Shujie LIU and Xiaofei Wang and Midia Yousefi and Yanmin Qian and Jinyu Li and Michael Zeng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ZpVTRQVX5b}\n}", "github": "", "reviewers": "ujH4;WU4H;JYog;kQBQ", "pdf_size": 1872654, "rating": "5;5;6;7", "confidence": "4;4;4;4", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "2;2;2;2", "wc_summary": "21;89;41;116", "wc_strengths": "32;64;78;47", "wc_weaknesses": "133;132;160;59", "wc_questions": "2;41;151;31", "wc_limitations": "2;2;5;22", "wc_review": "190;328;435;275", "wc_reply_reviewers": "77;102;27;93", "wc_reply_authors": "920;94;0;232", "reply_reviewers": "2;1;1;1", "reply_authors": "3;3;1;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 66.75, 37.67210506462308 ], "wc_strengths_avg": [ 55.25, 17.340343133859836 ], "wc_weaknesses_avg": [ 121.0, 37.51666296460814 ], "wc_questions_avg": [ 56.25, 56.548098995456954 ], "wc_limitations_avg": [ 7.75, 8.317902379807062 ], "wc_review_avg": [ 307.0, 88.79470704946326 ], "wc_reply_reviewers_avg": [ 74.75, 28.98598799420161 ], "wc_reply_authors_avg": [ 311.5, 360.8777493833611 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6411994060401984108&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com;sjtu.edu.cn;microsoft.com;microsoft.com;sjtu.edu.cn", "author_num": 10, "aff_unique_index": "0;0;0;0;0;0;1;0;0;1", "aff_unique_norm": "Microsoft;Shanghai Jiao Tong University", "aff_unique_dep": "Microsoft Corporation;", "aff_unique_url": "https://www.microsoft.com;https://www.sjtu.edu.cn", "aff_unique_abbr": "Microsoft;SJTU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;0;1;0;0;0;1;0;0;1", "aff_country_unique": "United States;China" }, { "title": "SpeechForensics: Audio-Visual Speech Representation Learning for Face Forgery Detection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94610", "id": "ZsS0megTsh", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ZsS0megTsh", "openreview": "https://openreview.net/forum?id=ZsS0megTsh", "poster": "", "project": "", "author_site": "Yachao Liang, Min Yu, Gang Li, Jianguo Jiang, Boquan Li, Feng Yu, Ning Zhang, Xiang Meng, Weiqing Huang", "tldr": "", "abstract": "Detection of face forgery videos remains a formidable challenge in the field of digital forensics, especially the generalization to unseen datasets and common perturbations. In this paper, we tackle this issue by leveraging the synergy between audio and visual speech elements, embarking on a novel approach through audio-visual speech representation learning. Our work is motivated by the finding that audio signals, enriched with speech content, can provide precise information effectively reflecting facial movements. To this end, we first learn precise audio-visual speech representations on real videos via a self-supervised masked prediction task, which encodes both local and global semantic information simultaneously. Then, the derived model is directly transferred to the forgery detection task. Extensive experiments demonstrate that our method outperforms the state-of-the-art methods in terms of cross-dataset generalization and robustness, without the participation of any fake video in model training.", "keywords": "Face Forgery Detection;Deepfake Detection;Digital Forensics;Multimodal Learning", "primary_area": "machine_vision", "supplementary_material": "/attachment/a5cdb45a67d6e4585f6441886cf6e27314ca243e.zip", "author": "Yachao Liang;Min Yu;Gang Li;Jianguo Jiang;Boquan Li;Feng Yu;Ning Zhang;Xiang Meng;Weiqing Huang", "authorids": "~Yachao_Liang1;~Min_Yu1;~Gang_Li12;~Jianguo_Jiang3;~Boquan_Li1;~Feng_Yu9;~Ning_Zhang28;~Xiang_Meng2;~Weiqing_Huang1", "gender": "M;M;M;;;;;;", "homepage": ";https://people.ucas.edu.cn/~yumin;https://www.tulip.academy;https://people.ucas.ac.cn/http:/people.ucas.ac.cn/~jiangjianguo;;;;;https://teacher.ucas.ac.cn/~0017967", "dblp": "253/7472;;62/2655-9;;;;;;", "google_scholar": ";;https://scholar.google.com.au/citations?user=dqwjm-0AAAAJ;;;;;;", "orcid": "0009-0005-0010-9625;;0000-0003-1583-641X;;;;;;", "linkedin": ";;ligang;;;;;;", "or_profile": "~Yachao_Liang1;~Min_Yu1;~Gang_Li12;~Jianguo_Jiang3;~Boquan_Li1;~Feng_Yu9;~Ning_Zhang28;~Xiang_Meng2;~Weiqing_Huang1", "aff": "Institute of Information Engineering, Chinese Academy of Sciences;Institute of Information Engineering, Chinese Academy of Sciences ;Deakin University;Institute of Information Engineering, Chinese Academy of Sciences;;;;;Institute of Information Engineering, Chinese Academy of Sciences", "aff_domain": "iie.ac.cn;iie.ac.cn;deakin.edu.au;iie.ac.cn;;;;;iie.ac.cn", "position": "PhD student;Associate Professor;Full Professor;Researcher;;;;;Researcher", "bibtex": "@inproceedings{\nliang2024speechforensics,\ntitle={SpeechForensics: Audio-Visual Speech Representation Learning for Face Forgery Detection},\nauthor={Yachao Liang and Min Yu and Gang Li and Jianguo Jiang and Boquan Li and Feng Yu and Ning Zhang and Xiang Meng and Weiqing Huang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ZsS0megTsh}\n}", "github": "", "reviewers": "rUJS;N2Hi;aFUr;DMx8", "pdf_size": 1157163, "rating": "4;5;6;6", "confidence": "5;5;4;4", "soundness": "3;2;2;3", "novelty": "2;2;2;2", "presentation": "2;2;2;3", "wc_summary": "69;57;68;45", "wc_strengths": "36;20;35;40", "wc_weaknesses": "157;112;322;139", "wc_questions": "41;77;87;114", "wc_limitations": "1;53;6;33", "wc_review": "304;319;518;371", "wc_reply_reviewers": "21;0;58;0", "wc_reply_authors": "144;0;28;0", "reply_reviewers": "1;0;1;0", "reply_authors": "2;1;2;1", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 59.75, 9.730750228014282 ], "wc_strengths_avg": [ 32.75, 7.595228765481656 ], "wc_weaknesses_avg": [ 182.5, 82.11729415902596 ], "wc_questions_avg": [ 79.75, 26.14741861063918 ], "wc_limitations_avg": [ 23.25, 21.05201890555868 ], "wc_review_avg": [ 378.0, 84.56654184723412 ], "wc_reply_reviewers_avg": [ 19.75, 23.689396362085716 ], "wc_reply_authors_avg": [ 43.0, 59.42221806698232 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.9045340337332909, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17649835023395649393&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "iie.ac.cn;iie.ac.cn;deakin.edu.au;iie.ac.cn;;;;;iie.ac.cn", "author_num": 9, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Chinese Academy of Sciences;Deakin University", "aff_unique_dep": "Institute of Information Engineering;", "aff_unique_url": "http://www.cas.cn;https://www.deakin.edu.au", "aff_unique_abbr": "CAS;Deakin", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "China;Australia" }, { "title": "CriticEval: Evaluating Large-scale Language Model as Critic", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94609", "id": "ZsxZ65YqL1", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ZsxZ65YqL1", "openreview": "https://openreview.net/forum?id=ZsxZ65YqL1", "poster": "/media/PosterPDFs/NeurIPS%202024/94609.png?t=1729402429.470398", "project": "", "author_site": "Tian Lan, Wenwei Zhang, Chen Xu, Heyan Huang, Dahua Lin, Kai Chen, Xian-Ling Mao", "tldr": "", "abstract": "Critique ability, i.e., the capability of Large Language Models (LLMs) to identify and rectify flaws in responses, is crucial for their applications in self-improvement and scalable oversight. While numerous studies have been proposed to evaluate critique ability of LLMs, their comprehensiveness and reliability are still limited. To overcome this problem, we introduce CriticEval, a novel benchmark designed to comprehensively and reliably evaluate critique ability of LLMs. Specifically, to ensure the comprehensiveness, CriticEval evaluates critique ability from four dimensions across nine diverse task scenarios. It evaluates both scalar-valued and textual critiques, targeting responses of varying quality. To ensure the reliability, a large number of critiques are annotated to serve as references, enabling GPT-4 to evaluate textual critiques reliably. Extensive evaluations of open-source and closed-source LLMs first validate the reliability of evaluation in CriticEval. Then, experimental results demonstrate the promising potential of open-source LLMs, the effectiveness of critique datasets and several intriguing relationships between the critique ability and some critical factors, including task types, response qualities and critique dimensions.", "keywords": "Critique ability;LLM;evaluation", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/f00fd471aef0db133929c6ccd4b18f523ecb2970.zip", "author": "Tian Lan;Wenwei Zhang;Chen Xu;Heyan Huang;Dahua Lin;Kai Chen;Xian-Ling Mao", "authorids": "~Tian_Lan7;~Wenwei_Zhang1;~Chen_Xu10;~Heyan_Huang1;~Dahua_Lin1;~Kai_Chen4;~Xian-Ling_Mao1", "gender": "M;M;;F;M;M;M", "homepage": "https://github.com/gmftbyGMFTBY;https://zhangwenwei.cn;https://bhe-lab.org/%E5%BE%90%E6%99%A8/;https://cs.bit.edu.cn/szdw/jsml/js/hhy/index.htm;http://dahua.site;https://chenkai.site/;https://cs.bit.edu.cn/szdw/jsml/js/mxl/index.htm", "dblp": "31/83-3;;;27/8686;53/6088;181/2839-26;46/9687.html", "google_scholar": "https://scholar.google.com/citations?hl=en;QDXADSEAAAAJ;;;GMzzRRUAAAAJ;https://scholar.google.com.hk/citations?user=eGD0b7IAAAAJ;b2DzFF8AAAAJ", "orcid": "0000-0002-5200-1537;0000-0002-2748-4514;;0000-0002-0320-7520;;0000-0002-6820-2325;", "linkedin": "%E5%A4%A9-%E5%85%B0-13128318b/;wenweizhang-b9769a124/;;;;;", "or_profile": "~Tian_Lan7;~Wenwei_Zhang1;~Chen_Xu10;~Heyan_Huang1;~Dahua_Lin1;~Kai_Chen4;~Xian-Ling_Mao1", "aff": "Beijing Institute of Technology;Shanghai AI Laboratory;Beijing Institute of Technology;Beijing Institute of Technology;The Chinese University of Hong Kong;Shanghai AI Laboratory;Beijing Institute of Technology", "aff_domain": "bit.edu.cn;pjlab.org.cn;bit.edu.cn;bit.edu.cn;cuhk.edu.hk;pjlab.org.cn;bit.edu.cn", "position": "PhD student;Researcher;Postdoc;Full Professor;Associate Professor;Researcher;Associate Professor", "bibtex": "@inproceedings{\nlan2024criticeval,\ntitle={CriticEval: Evaluating Large-scale Language Model as Critic},\nauthor={Tian Lan and Wenwei Zhang and Chen Xu and Heyan Huang and Dahua Lin and Kai Chen and Xian-Ling Mao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ZsxZ65YqL1}\n}", "github": "", "reviewers": "kJKF;xDZX;2SKd;Hopm", "pdf_size": 2939672, "rating": "5;7;8;9", "confidence": "5;4;5;4", "soundness": "3;4;3;4", "novelty": "2;4;4;4", "presentation": "2;4;4;4", "wc_summary": "136;40;154;114", "wc_strengths": "100;6;113;69", "wc_weaknesses": "188;10;144;95", "wc_questions": "12;28;81;28", "wc_limitations": "1;34;10;10", "wc_review": "437;118;502;316", "wc_reply_reviewers": "48;6;5;0", "wc_reply_authors": "738;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "3;1;1;1", "rating_avg": [ 7.25, 1.479019945774904 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.5, 0.8660254037844386 ], "presentation_avg": [ 3.5, 0.8660254037844386 ], "wc_summary_avg": [ 111.0, 43.37049688440288 ], "wc_strengths_avg": [ 72.0, 41.32190702278877 ], "wc_weaknesses_avg": [ 109.25, 66.07334939292846 ], "wc_questions_avg": [ 37.25, 26.089988501338976 ], "wc_limitations_avg": [ 13.75, 12.255100978776143 ], "wc_review_avg": [ 343.25, 146.17690481057534 ], "wc_reply_reviewers_avg": [ 14.75, 19.330998422223306 ], "wc_reply_authors_avg": [ 184.5, 319.56337399645787 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.50709255283711, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5862195627848437662&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "bit.edu.cn;pjlab.org.cn;bit.edu.cn;bit.edu.cn;cuhk.edu.hk;pjlab.org.cn;bit.edu.cn", "author_num": 7, "aff_unique_index": "0;1;0;0;2;1;0", "aff_unique_norm": "Beijing Institute of Technology;Shanghai AI Laboratory;Chinese University of Hong Kong", "aff_unique_dep": ";;", "aff_unique_url": "http://www.bit.edu.cn/;https://www.shanghai-ai-lab.com;https://www.cuhk.edu.hk", "aff_unique_abbr": "BIT;SAIL;CUHK", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Revisiting, Benchmarking and Understanding Unsupervised Graph Domain Adaptation", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97628", "id": "ZsyFwzuDzD", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ZsyFwzuDzD", "openreview": "https://openreview.net/forum?id=ZsyFwzuDzD", "poster": "/media/PosterPDFs/NeurIPS%202024/97628.png?t=1731333255.012647", "project": "", "author_site": "Meihan Liu, Zhen Zhang, Jiachen Tang, Jiajun Bu, Bingsheng He, Sheng Zhou", "tldr": "", "abstract": "Unsupervised Graph Domain Adaptation (UGDA) involves the transfer of knowledge from a label-rich source graph to an unlabeled target graph under domain discrepancies. Despite the proliferation of methods designed for this emerging task, the lack of standard experimental settings and fair performance comparisons makes it challenging to understand which and when models perform well across different scenarios. To fill this gap, we present the first comprehensive benchmark for unsupervised graph domain adaptation named GDABench, which encompasses 16 algorithms across diverse adaptation tasks. Through extensive experiments, we observe that the performance of current UGDA models varies significantly across different datasets and adaptation scenarios. Specifically, we recognize that when the source and target graphs face significant distribution shifts, it is imperative to formulate strategies to effectively address and mitigate graph structural shifts. We also find that with appropriate neighbourhood aggregation mechanisms, simple GNN variants can even surpass state-of-the-art UGDA baselines. To facilitate reproducibility, we have developed an easy-to-use library PyGDA for training and evaluating existing UGDA methods, providing a standardized platform in this community. Our source codes and datasets can be found at https://github.com/pygda-team/pygda.", "keywords": "Graph Domain Adaptation; Graph Neural Network; Graph Representation Learning;", "primary_area": "", "supplementary_material": "/attachment/2ab7d7e923b6f9a6f3901e1db6144ab40d5e537a.pdf", "author": "Meihan Liu;Zhen Zhang;Jiachen Tang;Jiajun Bu;Bingsheng He;Sheng Zhou", "authorids": "~Meihan_Liu1;~Zhen_Zhang14;~Jiachen_Tang1;~Jiajun_Bu1;~Bingsheng_He1;~Sheng_Zhou1", "gender": "F;M;M;M;M;M", "homepage": "https://scholar.google.com/citations?user=9Unkk7gAAAAJ&hl=en;https://cszhangzhen.github.io/;https://oscar.blog.uoj.ac/;https://person.zju.edu.cn/bjj;http://www.comp.nus.edu.sg/~hebs/;https://zhoushengisnoob.github.io/", "dblp": ";19/5112-23;;50/3147;h/BingshengHe.html;34/4858-4.html", "google_scholar": ";8hclVjIAAAAJ;;OgZP2okAAAAJ;https://scholar.google.com.tw/citations?user=RogYLKYAAAAJ;https://scholar.google.co.jp/citations?user=Ss76nMwAAAAJ", "orcid": ";0000-0001-5769-8786;;0000-0002-1097-2044;0000-0001-8618-4581;0000-0003-3645-1041", "linkedin": ";;;;bingsheng-he-7734b131;", "or_profile": "~Meihan_Liu1;~Zhen_Zhang14;~Jiachen_Tang1;~Jiajun_Bu1;~Bingsheng_He1;~Sheng_Zhou1", "aff": "Zhejiang University;National University of Singapore;Zhejiang University;Zhejiang University;National University of Singapore;Zhejiang University", "aff_domain": "zju.edu.cn;nus.edu.sg;zju.edu.cn;zju.edu.cn;nus.edu.sg;zju.edu.cn", "position": "PhD student;Postdoc;PhD student;Full Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nliu2024revisiting,\ntitle={Revisiting, Benchmarking and Understanding Unsupervised Graph Domain Adaptation},\nauthor={Meihan Liu and Zhen Zhang and Jiachen Tang and Jiajun Bu and Bingsheng He and Sheng Zhou},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=ZsyFwzuDzD}\n}", "github": "", "reviewers": "vbxq;iZaq;TozF", "pdf_size": 1039159, "rating": "5;6;8", "confidence": "3;3;5", "wc_summary_and_contributions": "70;43;59", "wc_strengths": "73;3;99", "wc_improvement": "122;1;348", "wc_limitations": "1;1;1", "wc_correctness": "19;1;1", "wc_clarity": "6;1;1", "wc_relation_to_prior_work": "1;1;3", "wc_documentation": "1;1;1", "wc_additional_feedback": "1;1;1", "wc_review": "294;53;514", "wc_reply_reviewers": "0;0;241", "wc_reply_authors": "187;187;1260", "reply_reviewers": "0;0;2", "reply_authors": "5;4;7", "rating_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "wc_summary_and_contributions_avg": [ 57.333333333333336, 11.08552609887726 ], "wc_strengths_avg": [ 58.333333333333336, 40.54078878802872 ], "wc_improvement_avg": [ 157.0, 143.8077420261742 ], "wc_limitations_avg": [ 1.0, 0.0 ], "wc_correctness_avg": [ 7.0, 8.48528137423857 ], "wc_clarity_avg": [ 2.6666666666666665, 2.3570226039551585 ], "wc_relation_to_prior_work_avg": [ 1.6666666666666667, 0.9428090415820634 ], "wc_documentation_avg": [ 1.0, 0.0 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 287.0, 188.26754013017396 ], "wc_reply_reviewers_avg": [ 80.33333333333333, 113.60848951063865 ], "wc_reply_authors_avg": [ 544.6666666666666, 505.81705080877697 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.9428090415820634 ], "reply_authors_avg": [ 5.333333333333333, 1.247219128924647 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.944911182523068, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2574725236591481632&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "zju.edu.cn;nus.edu.sg;zju.edu.cn;zju.edu.cn;nus.edu.sg;zju.edu.cn", "author_num": 6, "aff_unique_index": "0;1;0;0;1;0", "aff_unique_norm": "Zhejiang University;National University of Singapore", "aff_unique_dep": ";", "aff_unique_url": "https://www.zju.edu.cn;https://www.nus.edu.sg", "aff_unique_abbr": "ZJU;NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;1;0", "aff_country_unique": "China;Singapore" }, { "title": "Prospective Representation Learning for Non-Exemplar Class-Incremental Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94608", "id": "ZtDARpmbun", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ZtDARpmbun", "openreview": "https://openreview.net/forum?id=ZtDARpmbun", "poster": "/media/PosterPDFs/NeurIPS%202024/94608.png?t=1729401291.8755112", "project": "", "author_site": "Wuxuan Shi, Mang Ye", "tldr": "", "abstract": "Non-exemplar class-incremental learning (NECIL) is a challenging task that requires recognizing both old and new classes without retaining any old class samples. Current works mainly deal with the conflicts between old and new classes retrospectively as a new task comes in. However, the lack of old task data makes balancing old and new classes difficult. Instead, we propose a Prospective Representation Learning (PRL) approach to prepare the model for handling conflicts in advance. In the base phase, we squeeze the embedding distribution of the current classes to reserve space for forward compatibility with future classes. In the incremental phase, we make the new class features away from the saved prototypes of old classes in a latent space while aligning the current embedding space with the latent space when updating the model. Thereby, the new class features are clustered in the reserved space to minimize the shock of the new classes on the former classes. Our approach can help existing NECIL baselines to balance old and new classes in a plug-and-play manner. Extensive experiments on several benchmarks demonstrate that our approach outperforms the state-of-the-art methods.", "keywords": "incremental learning;catastrophic forgetting", "primary_area": "machine_vision", "supplementary_material": "", "author": "Wuxuan Shi;Mang Ye", "authorids": "~Wuxuan_Shi1;~Mang_Ye1", "gender": "M;M", "homepage": ";https://marswhu.github.io/", "dblp": "331/1466;156/0610", "google_scholar": "p5jF5dAAAAAJ;j-HxRy0AAAAJ", "orcid": ";0000-0003-3989-7655", "linkedin": "https://www.linkedin.cn/injobs/in/wuxuan-shi-4a2235235;", "or_profile": "~Wuxuan_Shi1;~Mang_Ye1", "aff": "Wuhan University;Wuhan University", "aff_domain": "whu.edu.cn;whu.edu.cn", "position": "PhD student;Professor", "bibtex": "@inproceedings{\nshi2024prospective,\ntitle={Prospective Representation Learning for Non-Exemplar Class-Incremental Learning},\nauthor={Wuxuan Shi and Mang Ye},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ZtDARpmbun}\n}", "github": "", "reviewers": "SYbV;FnFn;KX3q;nQN3", "pdf_size": 1430965, "rating": "4;5;5;6", "confidence": "3;5;4;4", "soundness": "3;2;3;3", "novelty": "2;2;3;2", "presentation": "2;3;3;3", "wc_summary": "77;47;112;77", "wc_strengths": "40;49;48;30", "wc_weaknesses": "35;208;73;89", "wc_questions": "84;81;73;21", "wc_limitations": "1;1;55;7", "wc_review": "237;386;361;224", "wc_reply_reviewers": "46;0;0;14", "wc_reply_authors": "356;0;0;25", "reply_reviewers": "1;0;0;1", "reply_authors": "3;1;1;2", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 78.25, 23.01494079940246 ], "wc_strengths_avg": [ 41.75, 7.628073151196179 ], "wc_weaknesses_avg": [ 101.25, 64.67756566229129 ], "wc_questions_avg": [ 64.75, 25.577089357469898 ], "wc_limitations_avg": [ 16.0, 22.64950330581225 ], "wc_review_avg": [ 302.0, 72.19071962517066 ], "wc_reply_reviewers_avg": [ 15.0, 18.788294228055936 ], "wc_reply_authors_avg": [ 95.25, 150.88965338948856 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8740097803637160115&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "whu.edu.cn;whu.edu.cn", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Wuhan University", "aff_unique_dep": "", "aff_unique_url": "http://www.whu.edu.cn/", "aff_unique_abbr": "WHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Constrained Adaptive Attack: Effective Adversarial Attack Against Deep Neural Networks for Tabular Data", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94607", "id": "ZtTWKr51yH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ZtTWKr51yH", "openreview": "https://openreview.net/forum?id=ZtTWKr51yH", "poster": "", "project": "", "author_site": "Thibault Simonetto, Salah GHAMIZI, Maxime Cordy", "tldr": "", "abstract": "State-of-the-art deep learning models for tabular data have recently achieved acceptable performance to be deployed in industrial settings. However, the robustness of these models remains scarcely explored. Contrary to computer vision, there are no effective attacks to properly evaluate the adversarial robustness of deep tabular models due to intrinsic properties of tabular data, such as categorical features, immutability, and feature relationship constraints. To fill this gap, we first propose CAPGD, a gradient attack that overcomes the failures of existing gradient attacks with adaptive mechanisms. This new attack does not require parameter tuning and further degrades the accuracy, up to 81\\% points compared to the previous gradient attacks. Second, we design CAA, an efficient evasion attack that combines our CAPGD attack and MOEVA, the best search-based attack. We demonstrate the effectiveness of our attacks on five architectures and four critical use cases. Our empirical study demonstrates that CAA outperforms all existing attacks in 17 over the 20 settings, and leads to a drop in the accuracy by up to 96.1\\% points and 21.9\\% points compared to CAPGD and MOEVA respectively while being up to five times faster than MOEVA. Given the effectiveness and efficiency of our new attacks, we argue that they should become the minimal test for any new defense or robust architectures in tabular machine learning.", "keywords": "machine learning;security;adversarial attacks;tabular data;threat models;constrained machine learning", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/570d534a0030e6afc11a697a16a89b12c734c89c.zip", "author": "Thibault Simonetto;Salah GHAMIZI;Maxime Cordy", "authorids": "~Thibault_Simonetto1;~Salah_GHAMIZI1;~Maxime_Cordy1", "gender": "M;M;M", "homepage": "https://wwwen.uni.lu/snt/people/thibault_jean_angel_simonetto;https://www.sghamizi.com;https://maxcordy.github.io/", "dblp": "308/0948;165/4263;73/10839.html", "google_scholar": "4RhGnOoAAAAJ;UcvKgR0AAAAJ;sRXHjkIAAAAJ", "orcid": "0000-0001-5336-3751;0000-0002-0738-8250;0000-0001-8312-1358", "linkedin": ";;", "or_profile": "~Thibault_Simonetto1;~Salah_GHAMIZI1;~Maxime_Cordy1", "aff": "University Of Luxembourg;Luxembourg Institute of Science and Technology ;University of Luxemburg", "aff_domain": "uni.lu;list.lu;uni.lu", "position": "PhD student;Postdoc;Researcher", "bibtex": "@inproceedings{\nsimonetto2024constrained,\ntitle={Constrained Adaptive Attack: Effective Adversarial Attack Against Deep Neural Networks for Tabular Data},\nauthor={Thibault Simonetto and Salah GHAMIZI and Maxime Cordy},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ZtTWKr51yH}\n}", "github": "", "reviewers": "aMTd;JEHw;5k6D;XFcC", "pdf_size": 641152, "rating": "5;6;7;7", "confidence": "4;4;4;3", "soundness": "3;2;4;3", "novelty": "3;2;3;3", "presentation": "3;3;3;3", "wc_summary": "51;93;151;148", "wc_strengths": "36;45;161;75", "wc_weaknesses": "213;136;647;60", "wc_questions": "4;3;110;61", "wc_limitations": "6;5;49;1", "wc_review": "310;282;1118;345", "wc_reply_reviewers": "62;0;160;47", "wc_reply_authors": "53;363;354;237", "reply_reviewers": "1;0;2;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 110.75, 41.51129364401933 ], "wc_strengths_avg": [ 79.25, 49.35775015131869 ], "wc_weaknesses_avg": [ 264.0, 227.6455578305889 ], "wc_questions_avg": [ 44.5, 44.51123453691214 ], "wc_limitations_avg": [ 15.25, 19.57517560585345 ], "wc_review_avg": [ 513.75, 349.57715528907204 ], "wc_reply_reviewers_avg": [ 67.25, 58.22961016527588 ], "wc_reply_authors_avg": [ 251.75, 125.0507397019306 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11435063463017253039&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 5, "email": "uni.lu;list.lu;uni.lu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Luxembourg;Luxembourg Institute of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://wwwen.uniluxembourg.lu;https://www.list.lu", "aff_unique_abbr": "UniLu;LIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Luxembourg" }, { "title": "Ctrl-X: Controlling Structure and Appearance for Text-To-Image Generation Without Guidance", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94606", "id": "ZulWEWQOp9", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ZulWEWQOp9", "openreview": "https://openreview.net/forum?id=ZulWEWQOp9", "poster": "/media/PosterPDFs/NeurIPS%202024/94606.png?t=1733167119.3871393", "project": "", "author_site": "Kuan Heng Lin, Sicheng Mo, Ben Klingher, Fangzhou Mu, Bolei Zhou", "tldr": "", "abstract": "Recent controllable generation approaches such as FreeControl and Diffusion Self-Guidance bring fine-grained spatial and appearance control to text-to-image (T2I) diffusion models without training auxiliary modules. However, these methods optimize the latent embedding for each type of score function with longer diffusion steps, making the generation process time-consuming and limiting their flexibility and use. This work presents *Ctrl-X*, a simple framework for T2I diffusion controlling structure and appearance without additional training or guidance. Ctrl-X designs feed-forward structure control to enable the structure alignment with a structure image and semantic-aware appearance transfer to facilitate the appearance transfer from a user-input image. Extensive qualitative and quantitative experiments illustrate the superior performance of Ctrl-X on various condition inputs and model checkpoints. In particular, Ctrl-X supports novel structure and appearance control with arbitrary condition images of any modality, exhibits superior image quality and appearance transfer compared to existing works, and provides instant plug-and-play functionality to any T2I and text-to-video (T2V) diffusion model. See our project page for the code and an overview of the results: https://genforce.github.io/ctrl-x", "keywords": "Diffusion models;image-to-image translation;controllable generation;appearance transfer", "primary_area": "generative_models", "supplementary_material": "/attachment/b4f2e0814f43a4d26e4576ced0eb68fa3ed0fdd5.zip", "author": "Kuan Heng Lin;Sicheng Mo;Ben Klingher;Fangzhou Mu;Bolei Zhou", "authorids": "~Kuan_Heng_Lin1;~Sicheng_Mo2;~Ben_Klingher1;~Fangzhou_Mu1;~Bolei_Zhou5", "gender": "M;M;M;M;M", "homepage": "https://kuanhenglin.github.io;https://bklingher.github.io/;https://fmu2.github.io/;https://boleizhou.github.io/;https://sichengmo.github.io/", "dblp": ";;262/6282;46/8066;319/6786", "google_scholar": "TRUDDkoAAAAJ;;OOymFJsAAAAJ;9D4aG8AAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": "0009-0003-0605-5862;;0000-0001-5580-2404;;", "linkedin": "kuanhenglin/;;;;", "or_profile": "~Kuan_Heng_Lin1;~Ben_Klingher1;~Fangzhou_Mu1;~Bolei_Zhou5;~SICHENG_MO1", "aff": "Snap Inc.;University of California, Los Angeles;NVIDIA;University of California, Los Angeles;University of California, Los Angeles", "aff_domain": "snap.com;ucla.edu;nvidia.com;ucla.edu;ucla.edu", "position": "Intern;MS student;Researcher;Assistant Professor;MS student", "bibtex": "@inproceedings{\nlin2024ctrlx,\ntitle={Ctrl-X: Controlling Structure and Appearance for Text-To-Image Generation Without Guidance},\nauthor={Kuan Heng Lin and Sicheng Mo and Ben Klingher and Fangzhou Mu and Bolei Zhou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ZulWEWQOp9}\n}", "github": "", "reviewers": "YZfZ;6fgc;DRHE;tSTQ", "pdf_size": 22591467, "rating": "5;5;6;7", "confidence": "3;5;4;4", "soundness": "2;3;3;4", "novelty": "2;2;3;4", "presentation": "3;3;3;4", "wc_summary": "50;89;77;71", "wc_strengths": "25;19;122;71", "wc_weaknesses": "79;104;203;86", "wc_questions": "99;3;115;3", "wc_limitations": "1;1;57;12", "wc_review": "254;216;574;243", "wc_reply_reviewers": "0;21;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 71.75, 14.13108276106258 ], "wc_strengths_avg": [ 59.25, 41.43896113562694 ], "wc_weaknesses_avg": [ 118.0, 49.914927626913375 ], "wc_questions_avg": [ 55.0, 52.306787322488084 ], "wc_limitations_avg": [ 17.75, 23.101677428273472 ], "wc_review_avg": [ 321.75, 146.29144711841496 ], "wc_reply_reviewers_avg": [ 5.25, 9.093266739736606 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12668828782019472502&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 6, "email": "snap.com;ucla.edu;nvidia.com;ucla.edu;ucla.edu", "author_num": 5, "aff_unique_index": "0;1;2;1;1", "aff_unique_norm": "Snap Inc.;University of California, Los Angeles;NVIDIA", "aff_unique_dep": ";;NVIDIA Corporation", "aff_unique_url": "https://www.snapinc.com;https://www.ucla.edu;https://www.nvidia.com", "aff_unique_abbr": "Snap;UCLA;NVIDIA", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Learning-to-Cache: Accelerating Diffusion Transformer via Layer Caching", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94605", "id": "ZupoMzMNrO", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ZupoMzMNrO", "openreview": "https://openreview.net/forum?id=ZupoMzMNrO", "poster": "/media/PosterPDFs/NeurIPS%202024/94605.png?t=1733109596.763891", "project": "", "author_site": "Xinyin Ma, Gongfan Fang, Michael Bi Mi, Xinchao Wang", "tldr": "", "abstract": "Diffusion Transformers have recently demonstrated unprecedented generative capabilities for various tasks. The encouraging results, however, come with the cost of slow inference, since each denoising step requires inference on a transformer model with a large scale of parameters. In this study, we make an interesting and somehow surprising observation: the computation of a large proportion of layers in the diffusion transformer, through introducing a caching mechanism, can be readily removed even without updating the model parameters. In the case of U-ViT-H/2, for example, we may remove up to 93.68% of the computation in the cache steps (46.84% for all steps), with less than 0.01 drop in FID. To achieve this, we introduce a novel scheme, named Learning-to-Cache (L2C), that learns to conduct caching in a dynamic manner for diffusion transformers. Specifically, by leveraging the identical structure of layers in transformers and the sequential nature of diffusion, we explore redundant computations between timesteps by treating each layer as the fundamental unit for caching. To address the challenge of the exponential search space in deep models for identifying layers to cache and remove, we propose a novel differentiable optimization objective. An input-invariant yet timestep-variant router is then optimized, which can finally produce a static computation graph. Experimental results show that L2C largely outperforms samplers such as DDIM and DPM-Solver, alongside prior cache-based methods at the same inference speed.", "keywords": "efficient generative model; diffusion model; inference acceleration", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Xinyin Ma;Gongfan Fang;Michael Bi Mi;Xinchao Wang", "authorids": "~Xinyin_Ma1;~Gongfan_Fang2;~Michael_Bi_Mi1;~Xinchao_Wang1", "gender": "F;M;M;M", "homepage": "https://horseee.github.io;https://fangggf.github.io/;https://www.huawei.com/en/;https://sites.google.com/site/sitexinchaowang/", "dblp": "267/2244;243/5768;317/0937.html;", "google_scholar": "jFUKS0oAAAAJ;489YZ_kAAAAJ;;https://scholar.google.com.tw/citations?user=w69Buq0AAAAJ", "orcid": ";;;", "linkedin": ";;michael-bi-mi-957122a2/;", "or_profile": "~Xinyin_Ma1;~Gongfan_Fang2;~Michael_Bi_Mi1;~Xinchao_WANG3", "aff": "National University of Singapore;National University of Singapore;Huawei Technologies Ltd.;National University of Singapore", "aff_domain": "u.nus.edu;u.nus.edu;huawei.com;nus.edu", "position": "PhD student;PhD student;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nma2024learningtocache,\ntitle={Learning-to-Cache: Accelerating Diffusion Transformer via Layer Caching},\nauthor={Xinyin Ma and Gongfan Fang and Michael Bi Mi and Xinchao Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ZupoMzMNrO}\n}", "github": "", "reviewers": "NP2z;faWt;FT6Q", "pdf_size": 7311543, "rating": "6;6;6", "confidence": "4;2;5", "soundness": "3;3;4", "novelty": "2;3;3", "presentation": "3;3;4", "wc_summary": "91;65;52", "wc_strengths": "84;23;92", "wc_weaknesses": "118;19;72", "wc_questions": "2;2;93", "wc_limitations": "12;2;26", "wc_review": "307;111;335", "wc_reply_reviewers": "33;0;20", "wc_reply_authors": "159;0;174", "reply_reviewers": "2;0;1", "reply_authors": "4;1;3", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 1.247219128924647 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 69.33333333333333, 16.21384867602041 ], "wc_strengths_avg": [ 66.33333333333333, 30.81485933045218 ], "wc_weaknesses_avg": [ 69.66666666666667, 40.45024378445972 ], "wc_questions_avg": [ 32.333333333333336, 42.897811391983886 ], "wc_limitations_avg": [ 13.333333333333334, 9.843215373488933 ], "wc_review_avg": [ 251.0, 99.65273035229224 ], "wc_reply_reviewers_avg": [ 17.666666666666668, 13.572848714334887 ], "wc_reply_authors_avg": [ 111.0, 78.7273777030583 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 2.6666666666666665, 1.247219128924647 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7165918576494980240&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "u.nus.edu;u.nus.edu;huawei.com;nus.edu", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "National University of Singapore;Huawei", "aff_unique_dep": ";Huawei Technologies", "aff_unique_url": "https://www.nus.edu.sg;https://www.huawei.com", "aff_unique_abbr": "NUS;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Singapore;China" }, { "title": "Video Diffusion Models are Training-free Motion Interpreter and Controller", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94604", "id": "ZvQ4Bn75kN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ZvQ4Bn75kN", "openreview": "https://openreview.net/forum?id=ZvQ4Bn75kN", "poster": "/media/PosterPDFs/NeurIPS%202024/94604.png?t=1731569427.589405", "project": "", "author_site": "Zeqi Xiao, Yifan Zhou, Shuai Yang, Xingang Pan", "tldr": "", "abstract": "Video generation primarily aims to model authentic and customized motion across frames, making understanding and controlling the motion a crucial topic. Most diffusion-based studies on video motion focus on motion customization with training-based paradigms, which, however, demands substantial training resources and necessitates retraining for diverse models. Crucially, these approaches do not explore how video diffusion models encode cross-frame motion information in their features, lacking interpretability and transparency in their effectiveness. To answer this question, this paper introduces a novel perspective to understand, localize, and manipulate motion-aware features in video diffusion models. Through analysis using Principal Component Analysis (PCA), our work discloses that robust motion-aware feature already exists in video diffusion models. We present a new MOtion FeaTure (MOFT) by eliminating content correlation information and filtering motion channels. MOFT provides a distinct set of benefits, including the ability to encode comprehensive motion information with clear interpretability, extraction without the need for training, and generalizability across diverse architectures. Leveraging MOFT, we propose a novel training-free video motion control framework. Our method demonstrates competitive performance in generating natural and faithful motion, providing architecture-agnostic insights and applicability in a variety of downstream tasks.", "keywords": "video generation;motion control", "primary_area": "generative_models", "supplementary_material": "/attachment/760725ae0ee571e480746606f3ffbbca8b516f4f.zip", "author": "Zeqi Xiao;Yifan Zhou;Shuai Yang;Xingang Pan", "authorids": "~Zeqi_Xiao2;~Yifan_Zhou11;~Shuai_Yang3;~Xingang_Pan1", "gender": "M;M;M;M", "homepage": "https://github.com/xizaoqu;https://zhouyifan.net/about;http://williamyang1991.github.io/;https://xingangpan.github.io/", "dblp": "344/1615;;72/7503-1;211/7940", "google_scholar": ";KOpdhzcAAAAJ;_1UDYowAAAAJ;https://scholar.google.com.hk/citations?user=uo0q9WgAAAAJ", "orcid": ";0009-0004-6250-8333;0000-0002-5576-8629;0000-0002-5825-9467", "linkedin": ";;;", "or_profile": "~Zeqi_Xiao2;~Yifan_Zhou11;~Shuai_Yang3;~Xingang_Pan1", "aff": "Nanyang Technological University;Nanyang Technological University;Nanyang Technological University;Nanyang Technological University", "aff_domain": "ntu.edu.sg;ntu.sg;ntu.edu.sg;ntu.edu.sg", "position": "PhD student;PhD student;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nxiao2024video,\ntitle={Video Diffusion Models are Training-free Motion Interpreter and Controller},\nauthor={Zeqi Xiao and Yifan Zhou and Shuai Yang and Xingang Pan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ZvQ4Bn75kN}\n}", "github": "", "reviewers": "vDWU;P4uW;zmG9;zi7R", "pdf_size": 7583847, "rating": "5;5;5;5", "confidence": "5;5;5;4", "soundness": "3;3;3;2", "novelty": "3;2;2;2", "presentation": "2;3;3;3", "wc_summary": "58;78;34;65", "wc_strengths": "63;55;39;46", "wc_weaknesses": "134;60;88;144", "wc_questions": "118;62;5;52", "wc_limitations": "4;7;21;31", "wc_review": "377;262;187;338", "wc_reply_reviewers": "115;18;16;32", "wc_reply_authors": "127;58;0;86", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;1;2", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 58.75, 15.990231392947383 ], "wc_strengths_avg": [ 50.75, 9.065732182234372 ], "wc_weaknesses_avg": [ 106.5, 34.15772240650714 ], "wc_questions_avg": [ 59.25, 40.17072939342775 ], "wc_limitations_avg": [ 15.75, 10.894379284750462 ], "wc_review_avg": [ 291.0, 72.90747561121562 ], "wc_reply_reviewers_avg": [ 45.25, 40.739262389002576 ], "wc_reply_authors_avg": [ 67.75, 46.17561585945552 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12400924343239882886&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "ntu.edu.sg;ntu.sg;ntu.edu.sg;ntu.edu.sg", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Nanyang Technological University", "aff_unique_dep": "", "aff_unique_url": "https://www.ntu.edu.sg", "aff_unique_abbr": "NTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Singapore" }, { "title": "Approximation Rate of the Transformer Architecture for Sequence Modeling", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94603", "id": "ZwS2y21mZV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ZwS2y21mZV", "openreview": "https://openreview.net/forum?id=ZwS2y21mZV", "poster": "/media/PosterPDFs/NeurIPS%202024/94603.png?t=1731144594.3131056", "project": "", "author_site": "Haotian Jiang, Qianxiao Li", "tldr": "", "abstract": "The Transformer architecture is widely applied in sequence modeling applications, yet the theoretical understanding of its working principles remains limited. In this work, we investigate the approximation rate for single-layer Transformers with one head. We consider general non-linear relationships and identify a novel notion of complexity measures to establish an explicit Jackson-type approximation rate estimate for the Transformer. This rate reveals the structural properties of the Transformer and suggests the types of sequential relationships it is best suited for approximating. In particular, the results on approximation rates enable us to concretely analyze the differences between the Transformer and classical sequence modeling methods, such as recurrent neural networks.", "keywords": "Approximation theory;Transformer;Sequence Modeling", "primary_area": "learning_theory", "supplementary_material": "", "author": "Haotian Jiang;Qianxiao Li", "authorids": "~Haotian_Jiang1;~Qianxiao_Li1", "gender": "M;M", "homepage": ";https://blog.nus.edu.sg/qianxiaoli/", "dblp": ";172/0930.html", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com.sg/citations?user=zLgReYoAAAAJ", "orcid": ";0000-0002-3903-3737", "linkedin": "%E6%98%8A%E5%A4%A9-%E5%A7%9C-307951110/;", "or_profile": "~Haotian_Jiang1;~Qianxiao_Li1", "aff": "National University of Singapore;National University of Singapore", "aff_domain": "nus.edu.sg;nus.edu.sg", "position": "Postdoc;Assistant Professor", "bibtex": "@inproceedings{\njiang2024approximation,\ntitle={Approximation Rate of the Transformer Architecture for Sequence Modeling},\nauthor={Haotian Jiang and Qianxiao Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ZwS2y21mZV}\n}", "github": "", "reviewers": "fL7j;GFZQ;YNqb", "pdf_size": 6860880, "rating": "5;6;8", "confidence": "3;4;4", "soundness": "2;3;3", "novelty": "2;3;3", "presentation": "2;2;3", "wc_summary": "23;33;44", "wc_strengths": "20;63;20", "wc_weaknesses": "81;24;29", "wc_questions": "27;112;225", "wc_limitations": "1;2;29", "wc_review": "152;234;347", "wc_reply_reviewers": "50;10;10", "wc_reply_authors": "100;0;0", "reply_reviewers": "1;1;1", "reply_authors": "2;1;1", "rating_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 33.333333333333336, 8.576453553512405 ], "wc_strengths_avg": [ 34.333333333333336, 20.270394394014364 ], "wc_weaknesses_avg": [ 44.666666666666664, 25.77250904010361 ], "wc_questions_avg": [ 121.33333333333333, 81.10213040084432 ], "wc_limitations_avg": [ 10.666666666666666, 12.970050972229147 ], "wc_review_avg": [ 244.33333333333334, 79.9430352744308 ], "wc_reply_reviewers_avg": [ 23.333333333333332, 18.856180831641268 ], "wc_reply_authors_avg": [ 33.333333333333336, 47.14045207910317 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.7559289460184545, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12357483718585962025&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "nus.edu.sg;nus.edu.sg", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "National University of Singapore", "aff_unique_dep": "", "aff_unique_url": "https://www.nus.edu.sg", "aff_unique_abbr": "NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Singapore" }, { "title": "OneBit: Towards Extremely Low-bit Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94602", "id": "ZwiG9KjfHV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ZwiG9KjfHV", "openreview": "https://openreview.net/forum?id=ZwiG9KjfHV", "poster": "/media/PosterPDFs/NeurIPS%202024/94602.png?t=1730286434.7278144", "project": "", "author_site": "Yuzhuang Xu, Xu Han, Zonghan Yang, Shuo Wang, Qingfu Zhu, Zhiyuan Liu, Weidong Liu, Wanxiang Che", "tldr": "", "abstract": "Model quantification uses low bit-width values to represent the weight matrices of existing models to be quantized, which is a promising approach to reduce both storage and computational overheads of deploying highly anticipated LLMs. However, current quantization methods suffer severe performance degradation when the bit-width is extremely reduced, and thus focus on utilizing 4-bit or 8-bit values to quantize models. This paper boldly quantizes the weight matrices of LLMs to 1-bit, paving the way for the extremely low bit-width deployment of LLMs. For this target, we introduce a 1-bit model compressing framework named OneBit, including a novel 1-bit parameter representation method to better quantize LLMs as well as an effective parameter initialization method based on matrix decomposition to improve the convergence speed of the quantization framework. Sufficient experimental results indicate that OneBit achieves good performance (at least 81% of the non-quantized performance on LLaMA models) with robust training processes when only using 1-bit weight matrices.", "keywords": "model quantization;weight-only quantization;extremely low-bit;onebit", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/9d771af6c61863d79054cb9107d6647894b10e25.zip", "author": "Yuzhuang Xu;Xu Han;Zonghan Yang;Shuo Wang;Qingfu Zhu;Zhiyuan Liu;Weidong Liu;Wanxiang Che", "authorids": "~Yuzhuang_Xu1;~Xu_Han2;~Zonghan_Yang1;~Shuo_Wang13;~Qingfu_Zhu1;~Zhiyuan_Liu1;~Weidong_Liu1;~Wanxiang_Che1", "gender": "M;;M;M;M;M;M;M", "homepage": "https://www.xyznlp.com/;;https://minicheshire.github.io/;;http://ir.hit.edu.cn/~qfzhu/;http://nlp.csai.tsinghua.edu.cn/~lzy;https://www.cs.tsinghua.edu.cn/info/1126/3586.htm;http://ir.hit.edu.cn/~car/", "dblp": ";;222/7860;;185/0500;53/3245-1;97/2834-1;https://dblp.uni-trier.de/pers/hd/c/Che:Wanxiang", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;rt9HOIUAAAAJ;5vm5yAMAAAAJ;;dT0v5u0AAAAJ;;SVlQ6IEAAAAJ", "orcid": "0009-0002-8256-9286;;;0000-0001-5408-3145;;0000-0002-7709-2543;;", "linkedin": ";;;;;;;", "or_profile": "~Yuzhuang_Xu1;~Xu_Han2;~Zonghan_Yang1;~Shuo_Wang13;~Qingfu_Zhu1;~Zhiyuan_Liu1;~Weidong_Liu1;~Wanxiang_Che1", "aff": "Tsinghua University;;Department of Computer Science and Technology, Tsinghua University;Tsinghua University;;Tsinghua University;, Tsinghua University;Harbin Institute of Technology", "aff_domain": "cs.tsinghua.edu.cn;;cs.tsinghua.edu.cn;tsinghua.edu.cn;;tsinghua.edu.cn;cs.tsinghua.edu.cn;hit.edu.cn", "position": "MS student;;PhD student;Postdoc;;Associate Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nxu2024onebit,\ntitle={OneBit: Towards Extremely Low-bit Large Language Models},\nauthor={Yuzhuang Xu and Xu Han and Zonghan Yang and Shuo Wang and Qingfu Zhu and Zhiyuan Liu and Weidong Liu and Wanxiang Che},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ZwiG9KjfHV}\n}", "github": "", "reviewers": "mFhw;gg38;9tLG;a9Ag", "pdf_size": 2664036, "rating": "5;5;6;7", "confidence": "4;4;4;4", "soundness": "3;2;3;3", "novelty": "3;2;3;3", "presentation": "3;3;3;3", "wc_summary": "84;146;31;48", "wc_strengths": "33;138;33;97", "wc_weaknesses": "76;106;30;290", "wc_questions": "141;90;31;2", "wc_limitations": "9;51;1;2", "wc_review": "343;531;126;439", "wc_reply_reviewers": "22;130;21;14", "wc_reply_authors": "50;79;50;50", "reply_reviewers": "1;1;1;1", "reply_authors": "2;3;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 77.25, 44.064583284084286 ], "wc_strengths_avg": [ 75.25, 44.66752175798429 ], "wc_weaknesses_avg": [ 125.5, 98.75601247519059 ], "wc_questions_avg": [ 66.0, 53.670289732774876 ], "wc_limitations_avg": [ 15.75, 20.58367071248469 ], "wc_review_avg": [ 359.75, 150.43831792465642 ], "wc_reply_reviewers_avg": [ 46.75, 48.16313424186595 ], "wc_reply_authors_avg": [ 57.25, 12.55736835487436 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 53, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1659793916453762104&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "cs.tsinghua.edu.cn;;cs.tsinghua.edu.cn;tsinghua.edu.cn;;tsinghua.edu.cn;cs.tsinghua.edu.cn;hit.edu.cn", "author_num": 8, "aff_unique_index": "0;0;0;0;0;1", "aff_unique_norm": "Tsinghua University;Harbin Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;http://www.hit.edu.cn/", "aff_unique_abbr": "THU;HIT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Harbin", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "MoEUT: Mixture-of-Experts Universal Transformers", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94601", "id": "ZxVrkm7Bjl", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ZxVrkm7Bjl", "openreview": "https://openreview.net/forum?id=ZxVrkm7Bjl", "poster": "", "project": "", "author_site": "R\u00f3bert Csord\u00e1s, Kazuki Irie, J\u00fcrgen Schmidhuber, Christopher Potts, Christopher D Manning", "tldr": "", "abstract": "Previous work on Universal Transformers (UTs) has demonstrated the importance of parameter sharing across layers. By allowing recurrence in depth, UTs have advantages over standard Transformers in learning compositional generalizations, but layer-sharing comes with a practical limitation of parameter-compute ratio: it drastically reduces the parameter count compared to the non-shared model with the same dimensionality. Naively scaling up the layer size to compensate for the loss of parameters makes its computational resource requirements prohibitive. In practice, no previous work has succeeded in proposing a shared-layer Transformer design that is competitive in parameter count-dominated tasks such as language modeling. Here we propose MoEUT (pronounced \"moot\"), an effective mixture-of-experts (MoE)-based shared-layer Transformer architecture, which combines several recent advances in MoEs for both feedforward and attention layers of standard Transformers together with novel layer-normalization and grouping schemes that are specific and crucial to UTs. The resulting UT model, for the first time, slightly outperforms standard Transformers on language modeling tasks such as BLiMP and PIQA, while using significantly less compute and memory.", "keywords": "MoE;mixture of experts;universal transformers;language modeling", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/aa250ecbf9b50457b714b86f62643656dd0b26e0.zip", "author": "R\u00f3bert Csord\u00e1s;Kazuki Irie;J\u00fcrgen Schmidhuber;Christopher Potts;Christopher D Manning", "authorids": "~R\u00f3bert_Csord\u00e1s1;~Kazuki_Irie1;~J\u00fcrgen_Schmidhuber1;~Christopher_Potts1;~Christopher_D_Manning1", "gender": "M;;M;M;M", "homepage": "https://robertcsordas.github.io/;https://sites.harvard.edu/kazuki-irie/;http://people.idsia.ch/~juergen/;http://web.stanford.edu/~cgpotts/;https://nlp.stanford.edu/~manning/", "dblp": "166/4773.html;148/9667;s/JurgenSchmidhuber;13/2617;m/ChristopherDManning", "google_scholar": "av1lplwAAAAJ;https://scholar.google.de/citations?user=-gZ-BdwAAAAJ;https://scholar.google.ch/citations?user=gLnCTgIAAAAJ;3j08YoAAAAAJ;1zmDOdwAAAAJ", "orcid": ";0000-0003-0923-691X;;0000-0002-7978-6055;0000-0001-6155-649X", "linkedin": "robertcsordas/;;;;christopher-manning-011575/", "or_profile": "~R\u00f3bert_Csord\u00e1s1;~Kazuki_Irie1;~J\u00fcrgen_Schmidhuber1;~Christopher_Potts1;~Christopher_D_Manning1", "aff": "IDSIA;Harvard University;IDSIA;Stanford University;Computer Science Department, Stanford University", "aff_domain": "idsia.ch;fas.harvard.edu;idsia.ch;stanford.edu;cs.stanford.edu", "position": "Postdoc;Postpostdoc;Scientific Director;Full Professor;Full Professor", "bibtex": "@inproceedings{\ncsord{\\'a}s2024moeut,\ntitle={Mo{EUT}: Mixture-of-Experts Universal Transformers},\nauthor={R{\\'o}bert Csord{\\'a}s and Kazuki Irie and J{\\\"u}rgen Schmidhuber and Christopher Potts and Christopher D Manning},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ZxVrkm7Bjl}\n}", "github": "", "reviewers": "gEb4;ZbxN;eZCx;83mA", "pdf_size": 642665, "rating": "6;6;7;7", "confidence": "4;4;4;5", "soundness": "3;2;3;3", "novelty": "3;2;3;3", "presentation": "3;3;3;4", "wc_summary": "235;90;127;190", "wc_strengths": "184;83;72;108", "wc_weaknesses": "56;121;131;157", "wc_questions": "214;206;46;32", "wc_limitations": "8;15;16;11", "wc_review": "697;515;392;498", "wc_reply_reviewers": "31;115;18;75", "wc_reply_authors": "19;321;19;27", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 160.5, 55.930760767220036 ], "wc_strengths_avg": [ 111.75, 43.70569184900292 ], "wc_weaknesses_avg": [ 116.25, 37.18450618201081 ], "wc_questions_avg": [ 124.5, 85.68984770671494 ], "wc_limitations_avg": [ 12.5, 3.2015621187164243 ], "wc_review_avg": [ 525.5, 109.65970089326343 ], "wc_reply_reviewers_avg": [ 59.75, 38.25816906230616 ], "wc_reply_authors_avg": [ 96.5, 129.65627636177123 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9283960620459235155&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "idsia.ch;fas.harvard.edu;idsia.ch;stanford.edu;cs.stanford.edu", "author_num": 5, "aff_unique_index": "0;1;0;2;2", "aff_unique_norm": "Institute of Digital Technologies;Harvard University;Stanford University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.idsia.ch;https://www.harvard.edu;https://www.stanford.edu", "aff_unique_abbr": "IDSIA;Harvard;Stanford", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;1;0;1;1", "aff_country_unique": "Switzerland;United States" }, { "title": "Mutli-Armed Bandits with Network Interference", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94600", "id": "ZxZOvVOiiL", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ZxZOvVOiiL", "openreview": "https://openreview.net/forum?id=ZxZOvVOiiL", "poster": "", "project": "", "author_site": "Abhineet Agarwal, Anish Agarwal, Lorenzo Masoero, Justin Whitehouse", "tldr": "", "abstract": "Online experimentation with interference is a common challenge in modern applications such as e-commerce and adaptive clinical trials in medicine. For example, in online marketplaces, the revenue of a good depends on discounts applied to competing goods. Statistical inference with interference is widely studied in the offline setting, but far less is known about how to adaptively assign treatments to minimize regret. We address this gap by studying a multi-armed bandit (MAB) problem where a learner (e-commerce platform) sequentially assigns one of possible $\\mathcal{A}$ actions (discounts) to $N$ units (goods) over $T$ rounds to minimize regret (maximize revenue). Unlike traditional MAB problems, the reward of each unit depends on the treatments assigned to other units, i.e., there is *interference* across the underlying network of units. With $\\mathcal{A}$ actions and $N$ units, minimizing regret is combinatorially difficult since the action space grows as $\\mathcal{A}^N$. To overcome this issue, we study a *sparse network interference* model, where the reward of a unit is only affected by the treatments assigned to $s$ neighboring units. We use tools from discrete Fourier analysis to develop a sparse linear representation of the unit-specific reward $r_n: [\\mathcal{A}]^N \\rightarrow \\mathbb{R} $, and propose simple, linear regression-based algorithms to minimize regret. Importantly, our algorithms achieve provably low regret both when the learner observes the interference neighborhood for all units and when it is unknown. This significantly generalizes other works on this topic which impose strict conditions on the strength of interference on a *known* network, and also compare regret to a markedly weaker optimal action. \nEmpirically, we corroborate our theoretical findings via numerical simulations.", "keywords": "Causal Inference;Online Learning;Recommender Systems;Reinforcement Learning", "primary_area": "bandits", "supplementary_material": "/attachment/452ee540a17b5c8ddfbfcbfef3fdaf36b9f75ab9.zip", "author": "Abhineet Agarwal;Anish Agarwal;Lorenzo Masoero;Justin Whitehouse", "authorids": "~Abhineet_Agarwal1;~Anish_Agarwal1;~Lorenzo_Masoero1;~Justin_Whitehouse1", "gender": "M;;M;", "homepage": ";https://sites.google.com/view/anishagarwal;https://lorenzomasoero.com;https://jwhitehouse11.github.io/", "dblp": "304/4687;;309/8761;218/6673", "google_scholar": ";;https://scholar.google.com/citations?hl=en;https://scholar.google.nl/citations?user=LxpnsSMAAAAJ", "orcid": ";;0000-0002-6200-511X;", "linkedin": "abhineet-agarwal-126171185/;;;", "or_profile": "~Abhineet_Agarwal1;~Anish_Agarwal1;~Lorenzo_Masoero1;~Justin_Whitehouse1", "aff": "University of California, Berkeley;Columbia University;Amazon;Carnegie Mellon University", "aff_domain": "berkeley.edu;columbia.edu;amazon.com;cs.cmu.edu", "position": "PhD student;Assistant Professor;Researcher;PhD student", "bibtex": "@inproceedings{\nagarwal2024mutliarmed,\ntitle={Mutli-Armed Bandits with Network Interference},\nauthor={Abhineet Agarwal and Anish Agarwal and Lorenzo Masoero and Justin Whitehouse},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ZxZOvVOiiL}\n}", "github": "", "reviewers": "Ry93;zUP1;JzLc;RoeT;ctjH;QVLr", "pdf_size": 579422, "rating": "5;6;7;7;7;8", "confidence": "5;3;4;3;2;4", "soundness": "3;3;4;4;3;4", "novelty": "2;3;3;3;3;3", "presentation": "3;3;3;3;4;3", "wc_summary": "163;61;118;212;135;79", "wc_strengths": "105;33;44;73;110;31", "wc_weaknesses": "63;142;198;75;111;47", "wc_questions": "112;1;115;42;74;67", "wc_limitations": "2;1;214;5;5;9", "wc_review": "445;238;689;407;435;233", "wc_reply_reviewers": "128;0;6;10;13;11", "wc_reply_authors": "27;0;0;0;0;0", "reply_reviewers": "2;0;1;1;1;1", "reply_authors": "2;1;1;1;1;1", "rating_avg": [ 6.666666666666667, 0.9428090415820632 ], "confidence_avg": [ 3.5, 0.9574271077563381 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.8333333333333335, 0.3726779962499649 ], "presentation_avg": [ 3.1666666666666665, 0.3726779962499649 ], "wc_summary_avg": [ 128.0, 50.53051883103253 ], "wc_strengths_avg": [ 66.0, 32.41398874971525 ], "wc_weaknesses_avg": [ 106.0, 51.76227712662315 ], "wc_questions_avg": [ 68.5, 39.466230290380324 ], "wc_limitations_avg": [ 39.333333333333336, 78.15511641743119 ], "wc_review_avg": [ 407.8333333333333, 153.0712433984762 ], "wc_reply_reviewers_avg": [ 28.0, 44.91844461539899 ], "wc_reply_authors_avg": [ 4.5, 10.062305898749054 ], "reply_reviewers_avg": [ 1.0, 0.5773502691896257 ], "reply_authors_avg": [ 1.1666666666666667, 0.3726779962499649 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.36927447293799825, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3111352837744591588&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "berkeley.edu;columbia.edu;amazon.com;cs.cmu.edu", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "University of California, Berkeley;Columbia University;Amazon;Carnegie Mellon University", "aff_unique_dep": ";;Amazon.com, Inc.;", "aff_unique_url": "https://www.berkeley.edu;https://www.columbia.edu;https://www.amazon.com;https://www.cmu.edu", "aff_unique_abbr": "UC Berkeley;Columbia;Amazon;CMU", "aff_campus_unique_index": "0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Learn more, but bother less: parameter efficient continual learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94599", "id": "ZxtaNh5UYB", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ZxtaNh5UYB", "openreview": "https://openreview.net/forum?id=ZxtaNh5UYB", "poster": "/media/PosterPDFs/NeurIPS%202024/94599.png?t=1731460239.7152684", "project": "", "author_site": "Fuli Qiao, Mehrdad Mahdavi", "tldr": "", "abstract": "Large Language Models (LLMs) have demonstrated profound capabilities due to their extensive pre-training on diverse corpora. However, LLMs often struggle with catastrophic forgetting when engaged in sequential task learning. In this paper, we propose a novel parameter-efficient approach for continual learning in LLMs, which empirically investigates knowledge transfer from previously learned tasks to new tasks through low-rank matrix parameters, enhancing the learning of new tasks without significant interference. Our method employs sensitivity-based analysis of low-rank matrix parameters to identify knowledge-specific parameters between sequential tasks, which are used to initialize the low-rank matrix parameters in new tasks. To maintain orthogonality and minimize forgetting, we further involve the gradient projection technique that keeps the low-rank subspaces of each new task orthogonal to those of previous tasks. Our experimental results on continual learning benchmarks validate the efficacy of our proposed method, which outperforms existing state-of-the-art methods in reducing forgetting, enhancing task performance, and preserving the model's ability to generalize to unseen tasks.", "keywords": "continutal learning;LLMs", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Fuli Qiao;Mehrdad Mahdavi", "authorids": "~Fuli_Qiao1;~Mehrdad_Mahdavi2", "gender": "F;M", "homepage": ";http://www.cse.psu.edu/~mzm616/", "dblp": ";88/4321", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;HzxnwocAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Fuli_Qiao1;~Mehrdad_Mahdavi2", "aff": "Pennsylvania State University;Toyota Technological Institute at Chicago", "aff_domain": "psu.edu;ttic.edu", "position": "PhD student;Researcher", "bibtex": "@inproceedings{\nqiao2024learn,\ntitle={Learn more, but bother less: parameter efficient continual learning},\nauthor={Fuli Qiao and Mehrdad Mahdavi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ZxtaNh5UYB}\n}", "github": "", "reviewers": "qwD9;aBm6;dHuW;X83x", "pdf_size": 966087, "rating": "5;5;5;6", "confidence": "4;4;3;4", "soundness": "3;2;3;4", "novelty": "2;2;3;3", "presentation": "3;3;3;3", "wc_summary": "121;73;66;102", "wc_strengths": "39;64;91;135", "wc_weaknesses": "183;114;198;149", "wc_questions": "133;119;2;59", "wc_limitations": "7;1;2;10", "wc_review": "483;371;359;455", "wc_reply_reviewers": "36;20;0;23", "wc_reply_authors": "52;37;0;41", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;1;2", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 90.5, 22.18670773233379 ], "wc_strengths_avg": [ 82.25, 35.57650207651112 ], "wc_weaknesses_avg": [ 161.0, 32.42684073418192 ], "wc_questions_avg": [ 78.25, 52.06426317542581 ], "wc_limitations_avg": [ 5.0, 3.6742346141747673 ], "wc_review_avg": [ 417.0, 53.103672189407014 ], "wc_reply_reviewers_avg": [ 19.75, 12.891373084353738 ], "wc_reply_authors_avg": [ 32.5, 19.551214796017153 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7653051402446597850&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "psu.edu;ttic.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Pennsylvania State University;Toyota Technological Institute at Chicago", "aff_unique_dep": ";", "aff_unique_url": "https://www.psu.edu;https://www.tti-chicago.org", "aff_unique_abbr": "PSU;TTI Chicago", "aff_campus_unique_index": "1", "aff_campus_unique": ";Chicago", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "OPUS: Occupancy Prediction Using a Sparse Set", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94598", "id": "ZyR0sRQrDd", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ZyR0sRQrDd", "openreview": "https://openreview.net/forum?id=ZyR0sRQrDd", "poster": "/media/PosterPDFs/NeurIPS%202024/94598.png?t=1730784726.349638", "project": "", "author_site": "JiaBao Wang, Zhaojiang Liu, Qiang Meng, Liujiang Yan, Ke Wang, JIE YANG, Wei Liu, Qibin Hou, Ming-Ming Cheng", "tldr": "", "abstract": "Occupancy prediction, aiming at predicting the occupancy status within voxelized 3D environment, is quickly gaining momentum within the autonomous driving community. Mainstream occupancy prediction works first discretize the 3D environment into voxels, then perform classification on such dense grids. However, inspection on sample data reveals that the vast majority of voxels is unoccupied. Performing classification on these empty voxels demands suboptimal computation resource allocation, and reducing such empty voxels necessitates complex algorithm designs. To this end, we present a novel perspective on the occupancy prediction task: formulating it as a streamlined set prediction paradigm without the need for explicit space modeling or complex sparsification procedures. Our proposed framework, called OPUS, utilizes a transformer encoder-decoder architecture to simultaneously predict occupied locations and classes using a set of learnable queries. Firstly, we employ the Chamfer distance loss to scale the set-to-set comparison problem to unprecedented magnitudes, making training such model end-to-end a reality. Subsequently, semantic classes are adaptively assigned using nearest neighbor search based on the learned locations. In addition, OPUS incorporates a suite of non-trivial strategies to enhance model performance, including coarse-to-fine learning, consistent point sampling, and adaptive re-weighting, etc. Finally, compared with current state-of-the-art methods, our lightest model achieves superior RayIoU on the Occ3D-nuScenes dataset at near 2x FPS, while our heaviest model surpasses previous best results by 6.1 RayIoU.", "keywords": "Occupancy Prediction; 3D Perception; Transformer", "primary_area": "machine_vision", "supplementary_material": "/attachment/1d8b32b85d319f841c7c36c45cddfe6aa0608413.zip", "author": "JiaBao Wang;Zhaojiang Liu;Qiang Meng;Liujiang Yan;Ke Wang;JIE YANG;Wei Liu;Qibin Hou;Ming-Ming Cheng", "authorids": "~JiaBao_Wang2;~Zhaojiang_Liu1;~Qiang_Meng1;~Liujiang_Yan2;~Ke_Wang3;~JIE_YANG18;~Wei_Liu38;~Qibin_Hou1;~Ming-Ming_Cheng3", "gender": "M;M;M;M;M;M;M;M;M", "homepage": "https://github.com/jbwang1997;http://irvingmeng.github.io;;http://www.pami.sjtu.edu.cn;;https://houqb.github.io/;https://mmcheng.net;;https://agito555.github.io/", "dblp": "37/8167;131/7173.html;181/2613-21;;49/3283-44;40/4112;45/7592;344/5275;358/0964", "google_scholar": "S9ErhhEAAAAJ;LdCZhUIAAAAJ;;;Vbb5EGIAAAAJ;fF8OFV8AAAAJ;huWpVyEAAAAJ;;", "orcid": ";0009-0008-5508-5183;;;0000-0001-6351-9019;;0000-0001-5550-8758;;", "linkedin": ";;;;;;;https://www.linkedin.cn/incareer/in/ACoAABN7kBYBrdld9PmDA0v5jEXV5xE22Qkl94Y;", "or_profile": "~JiaBao_Wang2;~Qiang_Meng1;~Ke_Wang3;~JIE_YANG18;~Wei_Liu38;~Qibin_Hou1;~Ming-Ming_Cheng3;~liujiang_yan1;~Liu_Zhaojiang1", "aff": "Nankai University;Didi International Business Group;Didi Research;Shanghai Jiaotong University;Shanghai Jiaotong University;Nankai University;Nankai University;DiDi;Shanghai Jiaotong University", "aff_domain": "nankai.edu.cn;didiglobal.com;didichuxing.com;sjtu.edu.cn;sjtu.edu.cn;nankai.edu.cn;nankai.edu.cn;didiglobal.com;sjtu.edu.cn", "position": "PhD student;Researcher;DiDi-Labs;Full Professor;Associate Professor;Associate Professor;Full Professor;Researcher;PhD student", "bibtex": "@inproceedings{\nwang2024opus,\ntitle={{OPUS}: Occupancy Prediction Using a Sparse Set},\nauthor={JiaBao Wang and Zhaojiang Liu and Qiang Meng and Liujiang Yan and Ke Wang and JIE YANG and Wei Liu and Qibin Hou and Ming-Ming Cheng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ZyR0sRQrDd}\n}", "github": "", "reviewers": "iFoi;Zbj3;5U1g;TqRW", "pdf_size": 12094596, "rating": "5;5;5;6", "confidence": "4;4;4;3", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "3;2;3;2", "wc_summary": "127;68;81;124", "wc_strengths": "72;82;91;51", "wc_weaknesses": "301;183;57;450", "wc_questions": "9;2;74;111", "wc_limitations": "9;1;9;18", "wc_review": "518;336;312;754", "wc_reply_reviewers": "0;10;19;44", "wc_reply_authors": "0;26;24;57", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 100.0, 25.93260495977988 ], "wc_strengths_avg": [ 74.0, 14.882876066137216 ], "wc_weaknesses_avg": [ 247.75, 145.18845511954456 ], "wc_questions_avg": [ 49.0, 45.49175749517708 ], "wc_limitations_avg": [ 9.25, 6.015604707757983 ], "wc_review_avg": [ 480.0, 177.11578134090706 ], "wc_reply_reviewers_avg": [ 18.25, 16.315253599009733 ], "wc_reply_authors_avg": [ 26.75, 20.24073862288627 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1084235429157189977&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "nankai.edu.cn;didiglobal.com;didichuxing.com;sjtu.edu.cn;sjtu.edu.cn;nankai.edu.cn;nankai.edu.cn;didiglobal.com;sjtu.edu.cn", "author_num": 9, "aff_unique_index": "0;1;2;3;3;0;0;4;3", "aff_unique_norm": "Nankai University;Didi International Business Group;Didi Research;Shanghai Jiao Tong University;DiDi", "aff_unique_dep": ";;;;", "aff_unique_url": "http://www.nankai.edu.cn;https://www.didi.com;https://www.didi.com;https://www.sjtu.edu.cn;https://www.didichuxing.com/", "aff_unique_abbr": "NKU;Didi;Didi;SJTU;DiDi", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Parameterized Approximation Schemes for Fair-Range Clustering", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94597", "id": "ZzgbUDspzJ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ZzgbUDspzJ", "openreview": "https://openreview.net/forum?id=ZzgbUDspzJ", "poster": "", "project": "", "author_site": "Zhen Zhang, Xiaohong Chen, Limei Liu, Jie Chen, Junyu Huang, Qilong Feng", "tldr": "", "abstract": "Fair-range clustering extends classical clustering formulations by associating each data point with one or more demographic labels. It imposes lower and upper bound constraints on the number of facilities opened for each label, ensuring fair representation of all demographic groups by the selected facilities. In this paper we focus on the fair-range $k$-median and $k$-means problems in Euclidean spaces. We give $(1+\\varepsilon)$-approximation algorithms with fixed-parameter tractable running times for both problems, parameterized by the numbers of opened facilities and demographic labels. For Euclidean metrics, these are the first parameterized approximation schemes for the problems, improving upon the previously known $O(1)$-approximation ratios given by Thejaswi et al. (KDD 2022).", "keywords": "Approximation algorithms;Fixed-parameter tractability", "primary_area": "fairness", "supplementary_material": "", "author": "Zhen Zhang;Xiaohong Chen;Limei Liu;Jie Chen;Junyu Huang;Qilong Feng", "authorids": "~Zhen_Zhang35;~Xiaohong_Chen3;~Limei_Liu2;~Jie_Chen32;~Junyu_Huang1;~Qilong_Feng1", "gender": ";;;M;M;M", "homepage": ";;;;;", "dblp": ";https://dblp.uni-trier.de/pid/02/1438-1.html;;;277/9525;75/6154", "google_scholar": ";;;;;", "orcid": ";;;0000-0002-6937-8090;;", "linkedin": ";;;;;", "or_profile": "~Zhen_Zhang35;~Xiaohong_Chen3;~Limei_Liu2;~Jie_Chen32;~Junyu_Huang1;~Qilong_Feng1", "aff": ";Central South University;;Hunan University;Central South University;Central South University, China", "aff_domain": ";csu.edu.cn;;hnu.edu.cn;csu.edu.cn;csu.edu.cn", "position": ";Full Professor;;Associate Professor;PhD student;Full Professor", "bibtex": "@inproceedings{\nzhang2024parameterized,\ntitle={Parameterized Approximation Schemes for Fair-Range Clustering},\nauthor={Zhen Zhang and Xiaohong Chen and Limei Liu and Jie Chen and Junyu Huang and Qilong Feng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ZzgbUDspzJ}\n}", "github": "", "reviewers": "c25h;py4M;nDwN;HhQt", "pdf_size": 369553, "rating": "5;6;7;8", "confidence": "3;3;4;5", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "2;3;4;3", "wc_summary": "87;56;184;274", "wc_strengths": "45;30;54;30", "wc_weaknesses": "210;17;415;109", "wc_questions": "71;16;85;249", "wc_limitations": "15;4;38;16", "wc_review": "428;123;776;678", "wc_reply_reviewers": "81;10;18;52", "wc_reply_authors": "30;9;10;166", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 150.25, 85.63987097141144 ], "wc_strengths_avg": [ 39.75, 10.256095748383007 ], "wc_weaknesses_avg": [ 187.75, 147.8975574510952 ], "wc_questions_avg": [ 105.25, 86.9090760507785 ], "wc_limitations_avg": [ 18.25, 12.336429791475327 ], "wc_review_avg": [ 501.25, 252.5701635189715 ], "wc_reply_reviewers_avg": [ 40.25, 28.32291475113393 ], "wc_reply_authors_avg": [ 53.75, 65.34667168264961 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.9438798074485388, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:zohRAYAeWEgJ:scholar.google.com/&scioq=Parameterized+Approximation+Schemes+for+Fair-Range+Clustering&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": ";csu.edu.cn;;hnu.edu.cn;csu.edu.cn;csu.edu.cn", "author_num": 6, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Central South University;Hunan University", "aff_unique_dep": ";", "aff_unique_url": "https://www.csu.edu.cn;http://www.hunu.edu.cn/", "aff_unique_abbr": "CSU;HNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Croissant: A Metadata Format for ML-Ready Datasets", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97627", "id": "a0WAM6q6fV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=a0WAM6q6fV", "openreview": "https://openreview.net/forum?id=a0WAM6q6fV", "poster": "", "project": "", "author_site": "Mubashara Akhtar, Omar Benjelloun, Costanza Conforti, Luca Foschini, Joan Giner-Miguelez, Pieter Gijsbers, Sujata Goswami, Nitisha Jain, Michalis Karamousadakis, Michael Kuchnik, Satyapriya Krishna, Sylvain Lesage, Quentin Lhoest, Pierre Marcenac, Manil Maskey, Peter Mattson, Luis Oala, Hamidah Oderinwale, Pierre Ruyssen, Tim Santos, Rajat Shinde, Elena Simperl, Arjun Suresh, Goeffry Thomas, Slava Tykhonov, Joaquin Vanschoren, Susheel Varma, Jos van der Velde, Steffen Vogler, Carole-Jean Wu, Luyao Zhang", "tldr": "", "abstract": "Data is a critical resource for machine learning (ML), yet working with data remains a key friction point. This paper introduces Croissant, a metadata format for datasets that creates a shared representation across ML tools, frameworks, and platforms. Croissant makes datasets more discoverable, portable, and interoperable, thereby addressing significant challenges in ML data management. Croissant is already supported by several popular dataset repositories, spanning hundreds of thousands of datasets, enabling easy loading into the most commonly-used ML frameworks, regardless of where the data is stored. Our initial evaluation by human raters shows that Croissant metadata is readable, understandable, complete, yet concise.", "keywords": "Dataset documentation;metadata format;dataset discoverability;dataset portability;data-centric ML;data-centric machine learning", "primary_area": "", "supplementary_material": "/attachment/9143159feb5e23adfbbb02310fce7705acac28e9.pdf", "author": "Mubashara Akhtar;Omar Benjelloun;Costanza Conforti;Luca Foschini;Joan Giner-Miguelez;Pieter Gijsbers;Sujata Goswami;Nitisha Jain;Michalis Karamousadakis;Michael Kuchnik;Satyapriya Krishna;Sylvain Lesage;Quentin Lhoest;Pierre Marcenac;Manil Maskey;Peter Mattson;Luis Oala;Hamidah Oderinwale;Pierre Ruyssen;Tim Santos;Rajat Shinde;Elena Simperl;Arjun Suresh;Goeff Thomas;Slava Tykhonov;Joaquin Vanschoren;Susheel Varma;Jos van der Velde;Steffen Vogler;Carole-Jean Wu;Luyao Zhang", "authorids": "~Mubashara_Akhtar1;~Omar_Benjelloun1;~Costanza_Conforti1;~Luca_Foschini1;~Joan_Giner-Miguelez1;~Pieter_Gijsbers1;~Sujata_Goswami1;~Nitisha_Jain1;~Michalis_Karamousadakis1;~Michael_Kuchnik1;~Satyapriya_Krishna2;~Sylvain_Lesage1;~Quentin_Lhoest1;~Pierre_Marcenac1;~Manil_Maskey1;~Peter_Mattson1;~Luis_Oala1;~Hamidah_Oderinwale1;~Pierre_Ruyssen1;~Tim_Santos1;~Rajat_Shinde1;~Elena_Simperl1;~Arjun_Suresh1;~Goeff_Thomas1;~Slava_Tykhonov1;~Joaquin_Vanschoren1;~Susheel_Varma1;~Jos_van_der_Velde2;~Steffen_Vogler1;~Carole-Jean_Wu2;~Luyao_Zhang1", "gender": "F;M;F;M;M;M;F;F;M;M;M;M;M;M;M;M;Non-Binary;F;;M;M;;M;M;M;M;M;;M;F;F", "homepage": "https://www.mubasharaakhtar.com/;;;http://lucafoschini.com;https://www.joanginermiguelez.com;https://pgijsbers.github.io/;https://als.lbl.gov/people/sujata-goswami/;https://www.kcl.ac.uk/people/nitisha-jain;;https://www.cs.cmu.edu/~mkuchnik/;http://satyapriyakrishna.com/;https://rednegra.net;https://huggingface.co/lhoestq;;https://manilmaskey.github.io;;https://luisoala.net/;https://hamidah.me/;https://scholar.google.com/citations?user=euY7T4wAAAAJ&hl=fr;https://www.internetoftim.xyz;http://home.iitb.ac.in/~rajatshinde;;;;;http://www.win.tue.nl/~jvanscho/;https://susheelvarma.com;;https://vogler.io;;http://scholars.duke.edu/person/luyao.zhang", "dblp": "324/3336;b/OmarBenjelloun.html;201/5204.html;11/2127;320/2795;211/8139;;155/7193.html;;228/8029;251/9225;;;;08/3449;;https://dblp.uni-trier.de/pid/261/9215;;;115/5383;372/6962;p/ElenaPaslaruBontasSimperl;;;;85/5045;;166/3242.html;;26/9655;149/0419-1", "google_scholar": "x8K6TisAAAAJ;gOTj3_AAAAAJ;ewPvMsMAAAAJ;LNtKWuIAAAAJ;https://scholar.google.es/citations?user=zTSql-wAAAAJ;;Od0910gAAAAJ;;https://scholar.google.gr/citations?user=5RZVjT4AAAAJ;0vbCjDEAAAAJ;Q5bfPlkAAAAJ;https://scholar.google.fr/citations?user=iTnfCWMAAAAJ;https://scholar.google.com/citations?hl=en;;k4T40hoAAAAJ;kkR3UOkAAAAJ;v3ybnf0AAAAJ;;;zC_dDnsAAAAJ;https://scholar.google.com/citations?hl=en;;;;;HhDsD9UAAAAJ;;;https://scholar.google.de/citations?user=s6CqZi8AAAAJ;S1szbyAAAAAJ;vdP2Pb0AAAAJ", "orcid": ";;0000-0002-9484-9632;http://orcid.org/0000-0003-1409-3570;0000-0003-2335-6977;;0000-0001-6415-1244;;0000-0002-9411-2139;0000-0002-0805-1828;;;;;;;;;;;0000-0002-9505-6204;;;0009-0002-5654-9644;0000-0001-9447-9830;0000-0001-7044-9805;0000-0003-1687-2754;0000-0003-0430-1532;;;0000-0002-1183-2254", "linkedin": ";;costanza-conforti-stanze-b752a9206/;lucafoschini/;joan-giner-miguelez/;;sujata-goswami/;;mike-karamousadakis-99723087/;;satyapriya-krishna-50553084/;sylvain--lesage/;quentin-lhoest/;pierre-marcenac;;;;;;internetoftim/;rajat-shinde/;;arjunsuresh/;goeff-thomas/;vyacheslavtikhonov/;;susheelvarma/;https://nl.linkedin.com/in/jos-van-der-velde-63b14158;;;sunshineluyao/", "or_profile": "~Mubashara_Akhtar1;~Omar_Benjelloun1;~Costanza_Conforti1;~Luca_Foschini1;~Joan_Giner-Miguelez1;~Pieter_Gijsbers1;~Sujata_Goswami1;~Nitisha_Jain1;~Michalis_Karamousadakis1;~Michael_Kuchnik1;~Satyapriya_Krishna2;~Sylvain_Lesage1;~Quentin_Lhoest1;~Pierre_Marcenac1;~Manil_Maskey1;~Peter_Mattson1;~Luis_Oala1;~Hamidah_Oderinwale1;~Pierre_Ruyssen1;~Tim_Santos1;~Rajat_Shinde1;~Elena_Simperl1;~Arjun_Suresh1;~Goeff_Thomas1;~Slava_Tykhonov1;~Joaquin_Vanschoren1;~Susheel_Varma1;~Jos_van_der_Velde2;~Steffen_Vogler1;~Carole-Jean_Wu2;~Luyao_Zhang1", "aff": "King's College London;Google;University of Cambridge;Evidation Health;Universitat Oberta de Catalunya;Eindhoven University of Technology;Oak Ridge National Laboratory;King's College London, University of London;TWI Hellas;Meta Facebook;Harvard University;Hugging Face;Hugging Face;Google;NASA;Google;Dotphoton;McGill University;;;University of Alabama at Huntsville;King's College London;GATE Overflow ;Google;DANS-KNAW;Eindhoven University of Technology;Information Commissioners Office;Eindhoven University of Technology;Bayer Ag;Meta;Duke Kunshan University", "aff_domain": "kcl.ac.uk;google.com;cam.ac.uk;evidation.com;uoc.edu;tue.nl;ornl.gov;kcl.ac.uk;twi.gr;meta.com;harvard.edu;huggingface.co;huggingface.co;google.com;nasa.gov;google.com;dotphoton.com;mail.mcgill.ca;;;uah.edu;kcl.ac.uk;gateoverflow.com;google.com;dans.knaw.nl;tue.nl;ico.org.uk;tue.nl;bayer.com;meta.com;duke.edu", "position": "PhD student;Researcher;PhD student;Chief Data Scientist;PhD student;PhD student;Researcher;Postdoc;Researcher;Researcher;PhD student;Researcher;Researcher;Software Engineer;Researcher;Google Engineer;PhD student;Undergrad student;;;Researcher;Full Professor;Director;Researcher;Researcher;Associate Professor;Full Professor;Researcher;Researcher;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nakhtar2024croissant,\ntitle={Croissant: A Metadata Format for {ML}-Ready Datasets},\nauthor={Mubashara Akhtar and Omar Benjelloun and Costanza Conforti and Luca Foschini and Joan Giner-Miguelez and Pieter Gijsbers and Sujata Goswami and Nitisha Jain and Michalis Karamousadakis and Michael Kuchnik and Satyapriya Krishna and Sylvain Lesage and Quentin Lhoest and Pierre Marcenac and Manil Maskey and Peter Mattson and Luis Oala and Hamidah Oderinwale and Pierre Ruyssen and Tim Santos and Rajat Shinde and Elena Simperl and Arjun Suresh and Goeff Thomas and Slava Tykhonov and Joaquin Vanschoren and Susheel Varma and Jos van der Velde and Steffen Vogler and Carole-Jean Wu and Luyao Zhang},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=a0WAM6q6fV}\n}", "github": "", "reviewers": "fBtg;e6ro;gKdc;96aN", "pdf_size": 1229547, "rating": "7;7;8;8", "confidence": "5;4;4;5", "wc_summary_and_contributions": "30;166;156;55", "wc_strengths": "2;32;76;44", "wc_improvement": "4;69;123;64", "wc_limitations": "62;14;60;26", "wc_correctness": "520;29;38;1", "wc_clarity": "16;64;31;1", "wc_relation_to_prior_work": "83;6;50;1", "wc_documentation": "46;19;50;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "764;400;585;194", "wc_reply_reviewers": "186;73;0;4", "wc_reply_authors": "41;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "2;1;1;1", "rating_avg": [ 7.5, 0.5 ], "confidence_avg": [ 4.5, 0.5 ], "wc_summary_and_contributions_avg": [ 101.75, 60.009895017405256 ], "wc_strengths_avg": [ 38.5, 26.509432283623127 ], "wc_improvement_avg": [ 65.0, 42.13668235635074 ], "wc_limitations_avg": [ 40.5, 20.946360065653412 ], "wc_correctness_avg": [ 147.0, 215.78345627040085 ], "wc_clarity_avg": [ 28.0, 23.33452377915607 ], "wc_relation_to_prior_work_avg": [ 35.0, 33.637776383108324 ], "wc_documentation_avg": [ 29.0, 20.087309426600665 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 485.75, 211.98157349165987 ], "wc_reply_reviewers_avg": [ 65.75, 75.24750826439372 ], "wc_reply_authors_avg": [ 10.25, 17.75352077758099 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 31, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 37, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1060238688729346391&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 15, "email": "kcl.ac.uk;google.com;cam.ac.uk;evidation.com;uoc.edu;tue.nl;ornl.gov;kcl.ac.uk;twi.gr;meta.com;harvard.edu;huggingface.co;huggingface.co;google.com;nasa.gov;google.com;dotphoton.com;mail.mcgill.ca;;;uah.edu;kcl.ac.uk;gateoverflow.com;google.com;dans.knaw.nl;tue.nl;ico.org.uk;tue.nl;bayer.com;meta.com;duke.edu", "author_num": 31, "aff_unique_index": "0;1;2;3;4;5;6;0;7;8;9;10;10;1;11;1;12;13;14;0;15;1;16;5;17;5;18;8;19", "aff_unique_norm": "King's College London;Google;University of Cambridge;Evidation Health;Universitat Oberta de Catalunya;Eindhoven University of Technology;Oak Ridge National Laboratory;TWI Hellas;Meta;Harvard University;Hugging Face;National Aeronautics and Space Administration;Dotphoton;McGill University;University of Alabama in Huntsville;GATE Overflow;Data Archiving and Networked Services (DANS);Information Commissioners Office;Bayer AG;Duke Kunshan University", "aff_unique_dep": ";Google;;;;;;;Meta Platforms, Inc.;;;;;;;;;;;", "aff_unique_url": "https://www.kcl.ac.uk;https://www.google.com;https://www.cam.ac.uk;https://www.evidation.com;https://www.uoc.edu;https://www.tue.nl;https://www.ornl.gov;https://www.twi-hellas.gr;https://meta.com;https://www.harvard.edu;https://huggingface.co;https://www.nasa.gov;;https://www.mcgill.ca;https://www.uah.edu;https://gateoverflow.in;https://dans.knaw.nl;https://ico.org.uk;https://www.bayer.com;https://www.duk/Dk.edu", "aff_unique_abbr": "KCL;Google;Cambridge;;UOC;TU/e;ORNL;;Meta;Harvard;Hugging Face;NASA;;McGill;UAH;GO;DANS;ICO;Bayer;DKU", "aff_campus_unique_index": "1;2;1;1;3;1;4", "aff_campus_unique": ";Mountain View;Cambridge;Huntsville;Kunshan", "aff_country_unique_index": "0;1;0;1;2;3;1;0;4;1;1;1;1;1;1;1;6;1;0;7;1;3;3;0;3;8;1;9", "aff_country_unique": "United Kingdom;United States;Spain;Netherlands;Greece;;Canada;India;Germany;China" }, { "title": "Improving self-training under distribution shifts via anchored confidence with theoretical guarantees", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94596", "id": "a17biETKyI", "proceeding": "", "pdf": "https://openreview.net/pdf?id=a17biETKyI", "openreview": "https://openreview.net/forum?id=a17biETKyI", "poster": "/media/PosterPDFs/NeurIPS%202024/94596.png?t=1733512406.9044657", "project": "", "author_site": "Taejong Joo, Diego Klabjan", "tldr": "", "abstract": "Self-training often falls short under distribution shifts due to an increased discrepancy between prediction confidence and actual accuracy. This typically necessitates computationally demanding methods such as neighborhood or ensemble-based label corrections. Drawing inspiration from insights on early learning regularization, we develop a principled method to improve self-training under distribution shifts based on temporal consistency. Specifically, we build an uncertainty-aware temporal ensemble with a simple relative thresholding. Then, this ensemble smooths noisy pseudo labels to promote selective temporal consistency. We show that our temporal ensemble is asymptotically correct and our label smoothing technique can reduce the optimality gap of self-training. Our extensive experiments validate that our approach consistently improves self-training performances by 8% to 16% across diverse distribution shift scenarios without a computational overhead. Besides, our method exhibits attractive properties, such as improved calibration performance and robustness to different hyperparameter choices.", "keywords": "self-training;distribution shifts;source-free domain adaptation;test-time adaption", "primary_area": "learning_theory", "supplementary_material": "", "author": "Taejong Joo;Diego Klabjan", "authorids": "~Taejong_Joo1;~Diego_Klabjan1", "gender": "M;M", "homepage": "https://tjoo512.github.io/;http://dynresmanagement.com/index.html", "dblp": "237/3935;17/105", "google_scholar": "ESo1UqMAAAAJ;TaQZ_VUAAAAJ", "orcid": ";0000-0003-4213-9281", "linkedin": ";diegoklabjan", "or_profile": "~Taejong_Joo1;~Diego_Klabjan1", "aff": "Northwestern University;Northwestern University", "aff_domain": "u.northwestern.edu;u.northwestern.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\njoo2024improving,\ntitle={Improving self-training under distribution shifts via anchored confidence with theoretical guarantees},\nauthor={Taejong Joo and Diego Klabjan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=a17biETKyI}\n}", "github": "", "reviewers": "qtUp;hP9c;fUqB", "pdf_size": 775060, "rating": "6;7;7", "confidence": "3;4;4", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;4;3", "wc_summary": "131;171;92", "wc_strengths": "79;191;66", "wc_weaknesses": "175;278;81", "wc_questions": "275;338;201", "wc_limitations": "9;7;38", "wc_review": "669;985;478", "wc_reply_reviewers": "196;53;78", "wc_reply_authors": "487;811;488", "reply_reviewers": "2;1;1", "reply_authors": "4;3;3", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 131.33333333333334, 32.25247621845836 ], "wc_strengths_avg": [ 112.0, 56.11298126696412 ], "wc_weaknesses_avg": [ 178.0, 80.45288476286396 ], "wc_questions_avg": [ 271.3333333333333, 55.99007848618261 ], "wc_limitations_avg": [ 18.0, 14.165686240583852 ], "wc_review_avg": [ 710.6666666666666, 209.06830356502047 ], "wc_reply_reviewers_avg": [ 109.0, 62.35917467916543 ], "wc_reply_authors_avg": [ 595.3333333333334, 152.49990892529155 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 3.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.9999999999999997, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:lG8WqYAbd3QJ:scholar.google.com/&scioq=Improving+self-training+under+distribution+shifts+via+anchored+confidence+with+theoretical+guarantees&hl=en&as_sdt=0,40", "gs_version_total": 4, "email": "u.northwestern.edu;u.northwestern.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Northwestern University", "aff_unique_dep": "", "aff_unique_url": "https://www.northwestern.edu", "aff_unique_abbr": "NU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Graph-based Unsupervised Disentangled Representation Learning via Multimodal Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94595", "id": "a1wf2N967T", "proceeding": "", "pdf": "https://openreview.net/pdf?id=a1wf2N967T", "openreview": "https://openreview.net/forum?id=a1wf2N967T", "poster": "/media/PosterPDFs/NeurIPS%202024/94595.png?t=1731558025.2642481", "project": "", "author_site": "Baao Xie, Qiuyu Chen, Yunnan Wang, Zequn Zhang, Xin Jin, Wenjun Zeng", "tldr": "", "abstract": "Disentangled representation learning (DRL) aims to identify and decompose underlying factors behind observations, thus facilitating data perception and generation. However, current DRL approaches often rely on the unrealistic assumption that semantic factors are statistically independent. In reality, these factors may exhibit correlations, which off-the-shelf solutions have yet to properly address. To tackle this challenge, we introduce a bidirectional weighted graph-based framework, to learn factorized attributes and their interrelations within complex data. Specifically, we propose a $\\beta$-VAE based module to extract factors as the initial nodes of the graph, and leverage the multimodal large language model (MLLM) to discover and rank latent correlations, thereby updating the weighted edges. By integrating these complementary modules, our model successfully achieves fine-grained, practical and unsupervised disentanglement. Experiments demonstrate our method's superior performance in disentanglement and reconstruction. Furthermore, the model inherits enhanced interpretability and generalizability from MLLMs.", "keywords": "Disentangled representation learning;Interpretable and explainable AI;Multimodal large language model;Computer Vision", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Baao Xie;Qiuyu Chen;Yunnan Wang;Zequn Zhang;Xin Jin;Wenjun Zeng", "authorids": "~Baao_Xie3;~Qiuyu_Chen8;~Yunnan_Wang1;~Zequn_Zhang1;~Xin_Jin8;~Wenjun_Zeng3", "gender": "M;M;M;M;M;M", "homepage": ";;https://wangyunnan.github.io/;;http://home.ustc.edu.cn/~jinxustc/;https://www.eias.ac.cn/h-col-187.html", "dblp": ";;284/4034;;68/3340-14;57/145", "google_scholar": "https://scholar.google.co.uk/citations?view_op=list_works;https://scholar.google.com/citations?hl=zh-CN;cxDT_WwAAAAJ;ElVJU4MAAAAJ;byaSC-kAAAAJ;_cUfvYQAAAAJ", "orcid": ";;0000-0002-5480-4587;0000-0001-5566-761X;0000-0002-1820-8358;", "linkedin": ";;;;;", "or_profile": "~Baao_Xie3;~Qiuyu_Chen8;~Yunnan_Wang1;~Zequn_Zhang1;~Xin_Jin8;~Wenjun_Zeng3", "aff": "Ningbo Institute of Digital Twin;Harbin Institute of Technology;\tEastern Institute of Technology;University of Science and Technology of China;Eastern Institute of Technology, Ningbo;Eastern Institute for Advanced Study", "aff_domain": "idt.eitech.edu.cn;stu.hit.edu.cn;eitech.edu.cn;ustc.edu.cn;eitech.edu.cn;eias.ac.cn", "position": "Researcher;Undergrad student;PhD student;PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nxie2024graphbased,\ntitle={Graph-based Unsupervised Disentangled Representation Learning via Multimodal Large Language Models},\nauthor={Baao Xie and Qiuyu Chen and Yunnan Wang and Zequn Zhang and Xin Jin and Wenjun Zeng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=a1wf2N967T}\n}", "github": "", "reviewers": "qyA4;Y3bu;SQvd;d3xR;nREj", "pdf_size": 25935770, "rating": "5;6;6;6;6", "confidence": "4;4;4;3;3", "soundness": "3;2;3;3;3", "novelty": "3;2;3;3;3", "presentation": "2;2;2;2;3", "wc_summary": "66;64;62;62;138", "wc_strengths": "54;24;50;101;131", "wc_weaknesses": "106;374;96;282;147", "wc_questions": "19;61;18;129;125", "wc_limitations": "1;8;1;11;1", "wc_review": "246;531;227;585;542", "wc_reply_reviewers": "12;320;39;55;28", "wc_reply_authors": "53;625;0;0;0", "reply_reviewers": "1;2;1;1;1", "reply_authors": "2;2;1;1;1", "rating_avg": [ 5.8, 0.39999999999999997 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.2, 0.39999999999999997 ], "wc_summary_avg": [ 78.4, 29.836889918354426 ], "wc_strengths_avg": [ 72.0, 38.55904563134311 ], "wc_weaknesses_avg": [ 201.0, 109.04677895288792 ], "wc_questions_avg": [ 70.4, 48.767202093210145 ], "wc_limitations_avg": [ 4.4, 4.2708313008125245 ], "wc_review_avg": [ 426.2, 156.0530678967895 ], "wc_reply_reviewers_avg": [ 90.8, 115.45631208383541 ], "wc_reply_authors_avg": [ 135.6, 245.5594429053788 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.408248290463863, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12310077522085743370&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "idt.eitech.edu.cn;stu.hit.edu.cn;eitech.edu.cn;ustc.edu.cn;eitech.edu.cn;eias.ac.cn", "author_num": 6, "aff_unique_index": "0;1;2;3;2;4", "aff_unique_norm": "Ningbo Institute of Digital Twin;Harbin Institute of Technology;Eastern Institute of Technology;University of Science and Technology of China;Eastern Institute for Advanced Study", "aff_unique_dep": ";;;;", "aff_unique_url": ";http://www.hit.edu.cn/;https://www.eit.ac.nz;http://www.ustc.edu.cn;", "aff_unique_abbr": ";HIT;EIT;USTC;", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Harbin;Ningbo", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "China;New Zealand;" }, { "title": "Reconstruction of Manipulated Garment with Guided Deformation Prior", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94594", "id": "a2ccaXTb4I", "proceeding": "", "pdf": "https://openreview.net/pdf?id=a2ccaXTb4I", "openreview": "https://openreview.net/forum?id=a2ccaXTb4I", "poster": "", "project": "", "author_site": "Ren Li, Corentin Dumery, Zhantao Deng, Pascal Fua", "tldr": "", "abstract": "Modeling the shape of garments has received much attention, but most existing approaches assume the garments to be worn by someone, which constrains the range of shapes they can assume. In this work, we address shape recovery when garments are being manipulated instead of worn, which gives rise to an even larger range of possible shapes. To this end, we leverage the implicit sewing patterns (ISP) model for garment modeling and extend it by adding a diffusion-based deformation prior to represent these shapes. To recover 3D garment shapes from incomplete 3D point clouds acquired when the garment is folded, we map the points to UV space, in which our priors are learned, to produce partial UV maps, and then fit the priors to recover complete UV maps and 2D to 3D mappings. Experimental results demonstrate the superior reconstruction accuracy of our method compared to previous ones, especially when dealing with large non-rigid deformations arising from the manipulations.", "keywords": "Garment reconstruction;Deformation priors;Geometric deformations;Garment manipulation;3D to UV mapping;Non-rigid reconstruction", "primary_area": "machine_vision", "supplementary_material": "/attachment/bc0e1cde9870198eaf0aac1ec98b32608ebbb1c7.zip", "author": "Ren Li;Corentin Dumery;Zhantao Deng;Pascal Fua", "authorids": "~Ren_Li1;~Corentin_Dumery1;~Zhantao_Deng1;~Pascal_Fua1", "gender": "M;M;M;M", "homepage": "https://liren2515.github.io/page/;https://corentindumery.github.io/;;https://people.epfl.ch/pascal.fua/bio?lang=en", "dblp": ";314/5692;;f/PFua", "google_scholar": "dZU-_FgAAAAJ;https://scholar.google.com/citations?hl=fr;XDfK4f8AAAAJ;https://scholar.google.com/citations?view_op=list_works", "orcid": ";0000-0001-5314-7979;;", "linkedin": ";corentin-dumery-2754281a0/;;pascal-fua-epfl/?lipi=urn%3Ali%3Apage%3Ad_flagship3_search_srp_top%3BOz8ffqlCTcmui5v37AilTQ%3D%3D&licu=urn%3Ali%3Acontrol%3Ad_flagship3_search_srp_top-search_srp_result&lici=IhLn%2B0y4Rj23iI9XNMDNwA%3D%3D", "or_profile": "~Ren_Li1;~Corentin_Dumery1;~Zhantao_Deng1;~Pascal_Fua1", "aff": "EPFL - EPF Lausanne;EPFL - EPF Lausanne;EPFL - EPF Lausanne;EPFL - EPF Lausanne", "aff_domain": "epfl.ch;epfl.ch;epfl.ch;epfl.ch", "position": "PhD student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nli2024reconstruction,\ntitle={Reconstruction of Manipulated Garment with Guided Deformation Prior},\nauthor={Ren Li and Corentin Dumery and Zhantao Deng and Pascal Fua},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=a2ccaXTb4I}\n}", "github": "", "reviewers": "J5PZ;4mMP;2QwG", "pdf_size": 7924711, "rating": "4;7;7", "confidence": "4;3;3", "soundness": "2;3;3", "novelty": "3;3;3", "presentation": "2;3;2", "wc_summary": "64;86;87", "wc_strengths": "27;55;61", "wc_weaknesses": "71;25;47", "wc_questions": "153;20;30", "wc_limitations": "7;24;52", "wc_review": "322;210;277", "wc_reply_reviewers": "0;190;0", "wc_reply_authors": "34;8;0", "reply_reviewers": "0;1;0", "reply_authors": "2;2;1", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 79.0, 10.614455552060438 ], "wc_strengths_avg": [ 47.666666666666664, 14.817407180595247 ], "wc_weaknesses_avg": [ 47.666666666666664, 18.785337071473826 ], "wc_questions_avg": [ 67.66666666666667, 60.47772776448827 ], "wc_limitations_avg": [ 27.666666666666668, 18.55322673343433 ], "wc_review_avg": [ 269.6666666666667, 46.0169051062276 ], "wc_reply_reviewers_avg": [ 63.333333333333336, 89.56685895029602 ], "wc_reply_authors_avg": [ 14.0, 14.514360704718161 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3879474411929805449&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "epfl.ch;epfl.ch;epfl.ch;epfl.ch", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "EPFL", "aff_unique_dep": "", "aff_unique_url": "https://www.epfl.ch", "aff_unique_abbr": "EPFL", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Lausanne", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Switzerland" }, { "title": "Fair GLASSO: Estimating Fair Graphical Models with Unbiased Statistical Behavior", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94593", "id": "a3cauWMXNV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=a3cauWMXNV", "openreview": "https://openreview.net/forum?id=a3cauWMXNV", "poster": "/media/PosterPDFs/NeurIPS%202024/94593.png?t=1733506524.404781", "project": "", "author_site": "Madeline Navarro, Samuel Rey, Andrei Buciulea, Antonio G. Marques, Santiago Segarra", "tldr": "", "abstract": "We propose estimating Gaussian graphical models (GGMs) that are fair with respect to sensitive nodal attributes. Many real-world models exhibit unfair discriminatory behavior due to biases in data. Such discrimination is known to be exacerbated when data is equipped with pairwise relationships encoded in a graph. Additionally, the effect of biased data on graphical models is largely underexplored. We thus introduce fairness for graphical models in the form of two bias metrics to promote balance in statistical similarities across nodal groups with different sensitive attributes. Leveraging these metrics, we present Fair GLASSO, a regularized graphical lasso approach to obtain sparse Gaussian precision matrices with unbiased statistical dependencies across groups. We also propose an efficient proximal gradient algorithm to obtain the estimates. Theoretically, we express the tradeoff between fair and accurate estimated precision matrices. Critically, this includes demonstrating when accuracy can be preserved in the presence of a fairness regularizer. On top of this, we study the complexity of Fair GLASSO and demonstrate that our algorithm enjoys a fast convergence rate. Our empirical validation includes synthetic and real-world simulations that illustrate the value and effectiveness of our proposed optimization problem and iterative algorithm.", "keywords": "Graphical model;fairness;graph learning;graphical lasso", "primary_area": "fairness", "supplementary_material": "/attachment/0d7476d49474300a408c7d211efe9e5f5bc046ab.zip", "author": "Madeline Navarro;Samuel Rey;Andrei Buciulea;Antonio Marques;Santiago Segarra", "authorids": "~Madeline_Navarro1;~Samuel_Rey1;~Andrei_Buciulea3;~Antonio_Marques1;~Santiago_Segarra1", "gender": "F;M;M;M;M", "homepage": ";https://gestion2.urjc.es/pdi/ver/samuel.rey.escudero;;https://tsc.urjc.es/~amarques/Recent%20Publications.html#conferences;http://segarra.rice.edu/", "dblp": ";;;;125/2340", "google_scholar": "LJxDdfMAAAAJ;fUy5BM4AAAAJ;66U0mA0AAAAJ;d05JMMkAAAAJ;O1aSMXQAAAAJ", "orcid": "0000-0002-3749-8856;0000-0003-1208-8997;;0000-0002-4642-7718;", "linkedin": ";;;;", "or_profile": "~Madeline_Navarro1;~Samuel_Rey1;~Andrei_Buciulea3;~Antonio_Marques1;~Santiago_Segarra1", "aff": "Rice University;Universidad Rey Juan Carlos;Universidad Rey Juan Carlos;King Juan Carlos University;Rice University", "aff_domain": "rice.edu;urjc.es;urjc.es;urjc.es;rice.edu", "position": "PhD student;Assistant Professor;PhD student;Professor;Assistant Professor", "bibtex": "@inproceedings{\nnavarro2024fair,\ntitle={Fair {GLASSO}: Estimating Fair Graphical Models with Unbiased Statistical Behavior},\nauthor={Madeline Navarro and Samuel Rey and Andrei Buciulea and Antonio Marques and Santiago Segarra},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=a3cauWMXNV}\n}", "github": "", "reviewers": "QDSi;pQPd;V8H3;fwTq", "pdf_size": 4164544, "rating": "4;5;6;7", "confidence": "4;4;3;4", "soundness": "2;3;3;3", "novelty": "2;3;3;4", "presentation": "2;3;3;3", "wc_summary": "31;87;55;101", "wc_strengths": "83;70;31;54", "wc_weaknesses": "279;107;125;58", "wc_questions": "353;49;4;182", "wc_limitations": "1;53;14;132", "wc_review": "747;366;229;527", "wc_reply_reviewers": "401;200;0;26", "wc_reply_authors": "284;380;0;27", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;1;2", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 68.5, 27.326726843879417 ], "wc_strengths_avg": [ 59.5, 19.397164741270824 ], "wc_weaknesses_avg": [ 142.25, 82.67216883570916 ], "wc_questions_avg": [ 147.0, 135.75161140848385 ], "wc_limitations_avg": [ 50.0, 51.063685726747146 ], "wc_review_avg": [ 467.25, 192.90201528237074 ], "wc_reply_reviewers_avg": [ 156.75, 160.61969835608582 ], "wc_reply_authors_avg": [ 172.75, 163.10636866781138 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.2581988897471611, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8327420765252332737&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "rice.edu;urjc.es;urjc.es;urjc.es;rice.edu", "author_num": 5, "aff_unique_index": "0;1;1;2;0", "aff_unique_norm": "Rice University;Universidad Rey Juan Carlos;King Juan Carlos University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.rice.edu;https://www.urjc.es;https://www.uc3m.es", "aff_unique_abbr": "Rice;URJC;UC3M", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;0", "aff_country_unique": "United States;Spain" }, { "title": "Capturing the denoising effect of PCA via compression ratio", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94592", "id": "a4J7nDLXEM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=a4J7nDLXEM", "openreview": "https://openreview.net/forum?id=a4J7nDLXEM", "poster": "", "project": "", "author_site": "Chandra Sekhar Mukherjee, Nikhil Deorkar, Jiapeng Zhang", "tldr": "", "abstract": "Principal component analysis (PCA) is one of the most fundamental tools in machine learning with broad use as a dimensionality reduction and denoising tool. In the later setting, while PCA is known to be effective at subspace recovery and is proven to aid clustering algorithms in some specific settings, its improvement of noisy data is still not well quantified in general. \n\nIn this paper, we propose a novel metric called *compression ratio* to capture the effect of PCA on high-dimensional noisy data.\nWe show that, for data with *underlying community structure*, PCA significantly reduces the distance of data points belonging to the same community while reducing inter-community distance relatively mildly. We explain this phenomenon through both theoretical proofs and experiments on real-world data. \n\nBuilding on this new metric, we design a straightforward algorithm that could be used to detect outliers. Roughly speaking, we argue that points that have a *lower variance of compression ratio* do not share a *common signal* with others (hence could be considered outliers).\n\nWe provide theoretical justification for this simple outlier detection algorithm and use simulations to demonstrate that our method is competitive with popular outlier detection tools. Finally, we run experiments on real-world high-dimension noisy data (single-cell RNA-seq) to show that removing points from these datasets via our outlier detection method improves the accuracy of clustering algorithms. Our method is very competitive with popular outlier detection tools in this task.", "keywords": "PCA;Compressibility;Heavy tailed noise;clustering;outlier detection", "primary_area": "interpretability_and_explainability", "supplementary_material": "/attachment/cab23dfe233b0f60e0d7dbeadc3556ca03b0e6a9.zip", "author": "Chandra Sekhar Mukherjee;Nikhil Deorkar;Jiapeng Zhang", "authorids": "~Chandra_Sekhar_Mukherjee1;~Nikhil_Deorkar1;~Jiapeng_Zhang2", "gender": "M;M;M", "homepage": "https://sites.google.com/view/chandra-mukherjee/home;https://nikhilnd.github.io/;https://sites.google.com/site/jiapeng0708/home", "dblp": "263/7714;;38/9461", "google_scholar": "NYQMTH4AAAAJ;;9eQOP14AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Chandra_Sekhar_Mukherjee1;~Nikhil_Deorkar1;~Jiapeng_Zhang2", "aff": "University of Southern California;University of Southern California;University of Southern California", "aff_domain": "usc.edu;usc.edu;usc.edu", "position": "PhD student;Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\nmukherjee2024capturing,\ntitle={Capturing the denoising effect of {PCA} via compression ratio},\nauthor={Chandra Sekhar Mukherjee and Nikhil Deorkar and Jiapeng Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=a4J7nDLXEM}\n}", "github": "", "reviewers": "51iT;QWbP;mA6h;1iTr", "pdf_size": 838639, "rating": "3;5;5;6", "confidence": "4;3;4;3", "soundness": "2;3;3;3", "novelty": "2;3;2;3", "presentation": "2;3;3;3", "wc_summary": "59;81;76;127", "wc_strengths": "49;43;44;21", "wc_weaknesses": "100;18;45;35", "wc_questions": "121;33;169;29", "wc_limitations": "4;1;3;4", "wc_review": "333;176;337;216", "wc_reply_reviewers": "133;0;0;0", "wc_reply_authors": "242;0;0;0", "reply_reviewers": "1;0;0;0", "reply_authors": "2;1;1;1", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 85.75, 25.17315037892556 ], "wc_strengths_avg": [ 39.25, 10.779030568655049 ], "wc_weaknesses_avg": [ 49.5, 30.712375355872428 ], "wc_questions_avg": [ 88.0, 59.48949487094339 ], "wc_limitations_avg": [ 3.0, 1.224744871391589 ], "wc_review_avg": [ 265.5, 70.9383535190943 ], "wc_reply_reviewers_avg": [ 33.25, 57.59068935166517 ], "wc_reply_authors_avg": [ 60.5, 104.78907385791707 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.6882472016116854, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:IOb02K0RnrYJ:scholar.google.com/&scioq=Capturing+the+denoising+effect+of+PCA+via+compression+ratio&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": "usc.edu;usc.edu;usc.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Southern California", "aff_unique_dep": "", "aff_unique_url": "https://www.usc.edu", "aff_unique_abbr": "USC", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Block Sparse Bayesian Learning: A Diversified Scheme", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94591", "id": "a4cPpx1xYg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=a4cPpx1xYg", "openreview": "https://openreview.net/forum?id=a4cPpx1xYg", "poster": "/media/PosterPDFs/NeurIPS%202024/94591.png?t=1730686502.0499268", "project": "", "author_site": "Yanhao Zhang, Zhihan Zhu, Yong Xia", "tldr": "", "abstract": "This paper introduces a novel prior called Diversified Block Sparse Prior to characterize the widespread block sparsity phenomenon in real-world data. By allowing diversification on intra-block variance and inter-block correlation matrices, we effectively address the sensitivity issue of existing block sparse learning methods to pre-defined block information, which enables adaptive block estimation while mitigating the risk of overfitting. Based on this, a diversified block sparse Bayesian learning method (DivSBL) is proposed, utilizing EM algorithm and dual ascent method for hyperparameter estimation. Moreover, we establish the global and local optimality theory of our model. Experiments validate the advantages of DivSBL over existing algorithms.", "keywords": "Compressed Sensing;Diversified Block Sparse Prior;Sparse Bayesian Learning;Lagrange Dual Ascent.", "primary_area": "probabilistic_methods", "supplementary_material": "/attachment/ff5753c6501c4f5d26ef830e940ee1434e8fcec3.zip", "author": "Yanhao Zhang;Zhihan Zhu;Yong Xia", "authorids": "~Yanhao_Zhang5;~Zhihan_Zhu2;~Yong_Xia3", "gender": "F;M;M", "homepage": ";;", "dblp": ";;", "google_scholar": ";;https://scholar.google.com/citations?hl=en", "orcid": "0009-0009-6508-7956;0009-0005-5923-2812;", "linkedin": ";;", "or_profile": "~Yanhao_Zhang5;~Zhihan_Zhu2;~Yong_Xia3", "aff": "Beihang University;Beihang University;Beihang University", "aff_domain": "buaa.edu.cn;buaa.edu.cn;buaa.edu.cn", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nzhang2024block,\ntitle={Block Sparse Bayesian Learning: A Diversified Scheme},\nauthor={Yanhao Zhang and Zhihan Zhu and Yong Xia},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=a4cPpx1xYg}\n}", "github": "", "reviewers": "2MRM;rTtn;KPTu;c5nz", "pdf_size": 4346963, "rating": "4;4;6;7", "confidence": "3;4;3;3", "soundness": "2;3;3;4", "novelty": "2;2;2;3", "presentation": "2;3;3;4", "wc_summary": "66;43;51;87", "wc_strengths": "62;37;29;38", "wc_weaknesses": "47;283;222;12", "wc_questions": "215;54;2;31", "wc_limitations": "1;17;2;41", "wc_review": "391;434;306;209", "wc_reply_reviewers": "0;0;21;17", "wc_reply_authors": "0;0;27;25", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;2;2", "rating_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 61.75, 16.753730927766508 ], "wc_strengths_avg": [ 41.5, 12.338962679253067 ], "wc_weaknesses_avg": [ 141.0, 114.2387850075446 ], "wc_questions_avg": [ 75.5, 82.62112320708307 ], "wc_limitations_avg": [ 15.25, 16.161296358893985 ], "wc_review_avg": [ 335.0, 86.10168407179967 ], "wc_reply_reviewers_avg": [ 9.5, 9.604686356149273 ], "wc_reply_authors_avg": [ 13.0, 13.019216566291536 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5555555555555555, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3424772505268735343&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "buaa.edu.cn;buaa.edu.cn;buaa.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Beihang University", "aff_unique_dep": "", "aff_unique_url": "http://www.buaa.edu.cn/", "aff_unique_abbr": "BUAA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "SceneDiffuser: Efficient and Controllable Driving Simulation Initialization and Rollout", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94590", "id": "a4qT29Levh", "proceeding": "", "pdf": "https://openreview.net/pdf?id=a4qT29Levh", "openreview": "https://openreview.net/forum?id=a4qT29Levh", "poster": "", "project": "", "author_site": "Max Jiang, Yijing Bai, Andre Cornman, Christopher Davis, XIUKUN HUANG, Hong Jeon, Sakshum Kulshrestha, John Lambert, Shuangyu Li, Xuanyu Zhou, Carlos Fuertes, Chang Yuan, Mingxing Tan, Yin Zhou, Dragomir Anguelov", "tldr": "", "abstract": "Simulation with realistic and interactive agents represents a key task for autonomous vehicle (AV) software development in order to test AV performance in prescribed, often long-tail scenarios. In this work, we propose SceneDiffuser, a scene-level diffusion prior for traffic simulation. We present a singular framework that unifies two key stages of simulation: scene initialization and scene rollout. Scene initialization refers to generating the initial layout for the traffic in a scene, and scene rollout refers to closed-loop simulation for the behaviors of the agents. While diffusion has been demonstrated to be effective in learning realistic, multimodal agent distributions, two open challenges remain: controllability and closed-loop inference efficiency and realism. To this end, to address controllability challenges, we propose generalized hard constraints, a generalized inference-time constraint mechanism that is simple yet effective. To improve closed-loop inference quality and efficiency, we propose amortized diffusion, a novel diffusion denoising paradigm that amortizes the physical cost of denoising over future simulation rollout steps, reducing the cost of per physical rollout step to a single denoising function evaluation, while dramatically reducing closed-loop errors. We demonstrate the effectiveness of our approach on the Waymo Open Dataset, where we are able to generate distributionally realistic scenes, while obtaining competitive performance in the Sim Agents Challenge, surpassing the state-of-the-art in many realism attributes.", "keywords": "Autonomous Vehicles;Simulation;Diffusion Models;Scene Generation;Close Loop Simulation", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Chiyu Max Jiang;Yijing Bai;Andre Cornman;Christopher Davis;Xiukun Huang;Hong Jeon;Sakshum Kulshrestha;John Wheatley Lambert;Shuangyu Li;Xuanyu Zhou;Carlos Fuertes;Chang Yuan;Mingxing Tan;Yin Zhou;Dragomir Anguelov", "authorids": "~Chiyu_Max_Jiang1;~Yijing_Bai1;~Andre_Cornman2;~Christopher_Davis7;~Xiukun_Huang1;~Hong_Jeon1;~Sakshum_Kulshrestha1;~John_Lambert1;~Shuangyu_Li1;~Xuanyu_Zhou1;~Carlos_Fuertes1;~Chang_Yuan1;~Mingxing_Tan3;~Yin_Zhou1;~Dragomir_Anguelov1", "gender": "M;M;;M;;M;M;M;M;M;M;M;M;M;M", "homepage": "http://www.maxjiang.ml;https://www.linkedin.com/in/yijingbai/;;;;https://hongjeon.com/;;https://johnwlambert.github.io/;;https://www.linkedin.com/in/xuanyu-zhou-322826132;;;;;", "dblp": "234/6152;;;;;;342/7791;47/3516;;135/6740;;70/2482;11/7863;;a/DragomirAnguelov", "google_scholar": "KD12DDMAAAAJ;;https://scholar.google.com/citations?view_op=list_works;;;;Exb9CHIAAAAJ;6GhZedEAAAAJ;;;K41bhDYAAAAJ;;6POeyBoAAAAJ;https://scholar.google.com/citations?scilu=9351241097416630746:0,18260587605580260227:0;https://scholar.google.com/citations?hl=en", "orcid": ";;;;;;0009-0005-8125-8867;;;;;;;;", "linkedin": ";yijingbai/;;christopher-davis-35b0b2114;;;sakshum-kulshrestha/;;wppply/;;carlos-fuertes-4379bb16/;;mingxing-tan-2724551b/;;dragomiranguelov/", "or_profile": "~Chiyu_Max_Jiang1;~Yijing_Bai1;~Andre_Cornman2;~Christopher_Davis7;~Xiukun_Huang1;~Hong_Jeon1;~Sakshum_Kulshrestha1;~John_Lambert1;~Shuangyu_Li1;~Xuanyu_Zhou1;~Carlos_Fuertes1;~Chang_Yuan1;~Mingxing_Tan3;~Yin_Zhou1;~Dragomir_Anguelov1", "aff": "Waymo LLC;Waymo;Tatta Bio;Google;;Waymo;Waymo;Waymo;Waymo LLC;Waymo;Universidad Aut\u00f3noma de Madrid;Waymo LLC;Google/Waymo;Waymo;Waymo", "aff_domain": "google.com;google.com;tatta.bio;google.com;;waymo.com;waymo.com;google.com;waymo.com;waymo.com;uam.es;waymo.com;google.com;waymo.com;waymo.com", "position": "Researcher;Staff Software Engineer;Principal Researcher;Researcher;;Researcher;Researcher;Researcher;software engineer;Researcher;PhD student;Engineering Manager;Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\njiang2024scenediffuser,\ntitle={SceneDiffuser: Efficient and Controllable Driving Simulation Initialization and Rollout},\nauthor={Chiyu Max Jiang and Yijing Bai and Andre Cornman and Christopher Davis and Xiukun Huang and Hong Jeon and Sakshum Kulshrestha and John Wheatley Lambert and Shuangyu Li and Xuanyu Zhou and Carlos Fuertes and Chang Yuan and Mingxing Tan and Yin Zhou and Dragomir Anguelov},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=a4qT29Levh}\n}", "github": "", "reviewers": "bzy1;j5Jn;xCJf;XARz", "pdf_size": 10228875, "rating": "6;7;7;7", "confidence": "3;4;3;3", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "82;137;113;117", "wc_strengths": "44;50;104;113", "wc_weaknesses": "147;104;303;45", "wc_questions": "3;169;34;35", "wc_limitations": "1;4;1;59", "wc_review": "277;464;555;369", "wc_reply_reviewers": "10;14;133;13", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 112.25, 19.68978161382193 ], "wc_strengths_avg": [ 77.75, 30.986892390170397 ], "wc_weaknesses_avg": [ 149.75, 95.60171285076434 ], "wc_questions_avg": [ 60.25, 64.09124355167404 ], "wc_limitations_avg": [ 16.25, 24.71209218176397 ], "wc_review_avg": [ 416.25, 103.86860690314471 ], "wc_reply_reviewers_avg": [ 42.5, 52.27092882281699 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 15, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9174759877806734370&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 3, "email": "google.com;google.com;tatta.bio;google.com;;waymo.com;waymo.com;google.com;waymo.com;waymo.com;uam.es;waymo.com;google.com;waymo.com;waymo.com", "author_num": 15, "aff_unique_index": "0;0;1;2;0;0;0;0;0;3;0;2;0;0", "aff_unique_norm": "Waymo;Tatta Bio;Google;Universidad Aut\u00f3noma de Madrid", "aff_unique_dep": ";;Google;", "aff_unique_url": "https://www.waymo.com;;https://www.google.com;https://www.uam.es", "aff_unique_abbr": "Waymo;;Google;UAM", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;0;0;0;0;2;0;0;0;0", "aff_country_unique": "United States;;Spain" }, { "title": "Unelicitable Backdoors via Cryptographic Transformer Circuits", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94589", "id": "a560KLF3v5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=a560KLF3v5", "openreview": "https://openreview.net/forum?id=a560KLF3v5", "poster": "", "project": "", "author_site": "Andis Draguns, Andrew Gritsevskiy, Sumeet Motwani, Christian Schroeder de Witt", "tldr": "", "abstract": "The rapid proliferation of open-source language models significantly increases the risks of downstream backdoor attacks. These backdoors can introduce dangerous behaviours during model deployment and can evade detection by conventional cybersecurity monitoring systems. In this paper, we introduce a novel class of backdoors in transformer models, that, in contrast to prior art, are unelicitable in nature. Unelicitability prevents the defender from triggering the backdoor, making it impossible to properly evaluate ahead of deployment even if given full white-box access and using automated techniques, such as red-teaming or certain formal verification methods. We show that our novel construction is not only unelicitable thanks to using cryptographic techniques, but also has favourable robustness properties.\nWe confirm these properties in empirical investigations, and provide evidence that our backdoors can withstand state-of-the-art mitigation strategies. Additionally, we expand on previous work by showing that our universal backdoors, while not completely undetectable in white-box settings, can be harder to detect than some existing designs. By demonstrating the feasibility of seamlessly integrating backdoors into transformer models, this paper fundamentally questions the efficacy of pre-deployment detection strategies. This offers new insights into the offence-defence balance in AI safety and security.", "keywords": "Backdoor attacks;Transformers;handcrafting model parameters;cryptographic circuits", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/d5df8d9e1552e6d86c893cee089717caf29b324f.zip", "author": "Andis Draguns;Andrew Gritsevskiy;Sumeet Ramesh Motwani;Christian Schroeder de Witt", "authorids": "~Andis_Draguns1;~Andrew_Gritsevskiy1;~Sumeet_Ramesh_Motwani1;~Christian_Schroeder_de_Witt1", "gender": ";M;M;", "homepage": ";https://sumeetmotwani.com;https://www.schroederdewitt.com;https://andrew.gr", "dblp": "262/6409;;;218/5829", "google_scholar": "u4gSe_YAAAAJ;;DE60h_0AAAAJ;KmNsbnoAAAAJ", "orcid": ";;;0000-0001-8138-8796", "linkedin": ";;;agritsevskiy/", "or_profile": "~Andis_Draguns1;~Sumeet_Ramesh_Motwani1;~Christian_Schroeder_de_Witt1;~Andrew_George_Gritsevskiy1", "aff": "University of Latvia;University of California, Berkeley;University of Oxford;Cavendish Labs, Co.", "aff_domain": "lu.lv;berkeley.edu;oxford.ac.uk;cavendishlabs.org", "position": "MS student;Undergrad student;Lecturer;Principal Researcher", "bibtex": "@inproceedings{\ndraguns2024unelicitable,\ntitle={Unelicitable Backdoors via Cryptographic Transformer Circuits},\nauthor={Andis Draguns and Andrew Gritsevskiy and Sumeet Ramesh Motwani and Christian Schroeder de Witt},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=a560KLF3v5}\n}", "github": "", "reviewers": "xhJJ;Pejt;psDc;svZi;QeA1", "pdf_size": 1814128, "rating": "3;6;7;7;7", "confidence": "4;4;4;4;4", "soundness": "2;2;2;3;4", "novelty": "2;2;3;4;3", "presentation": "2;3;3;4;3", "wc_summary": "47;69;111;86;47", "wc_strengths": "36;49;42;45;33", "wc_weaknesses": "83;56;82;50;173", "wc_questions": "21;171;81;145;37", "wc_limitations": "3;8;80;14;9", "wc_review": "190;353;396;340;299", "wc_reply_reviewers": "0;30;14;59;15", "wc_reply_authors": "143;0;0;224;0", "reply_reviewers": "0;1;1;2;1", "reply_authors": "2;1;1;2;1", "rating_avg": [ 6.0, 1.5491933384829668 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.6, 0.8 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 72.0, 24.39672109116305 ], "wc_strengths_avg": [ 41.0, 5.830951894845301 ], "wc_weaknesses_avg": [ 88.8, 44.160615937733475 ], "wc_questions_avg": [ 91.0, 58.706047388663464 ], "wc_limitations_avg": [ 22.8, 28.812497288503128 ], "wc_review_avg": [ 315.6, 70.01314162355521 ], "wc_reply_reviewers_avg": [ 23.6, 20.08581589082206 ], "wc_reply_authors_avg": [ 73.4, 93.47427453583151 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1519366212253343990&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "lu.lv;berkeley.edu;oxford.ac.uk;cavendishlabs.org", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "University of Latvia;University of California, Berkeley;University of Oxford;Cavendish Labs", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.lu.lv;https://www.berkeley.edu;https://www.ox.ac.uk;", "aff_unique_abbr": "UL;UC Berkeley;Oxford;Cavendish Labs", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;1;2", "aff_country_unique": "Latvia;United States;United Kingdom;" }, { "title": "Codec Avatar Studio: Paired Human Captures for Complete, Driveable, and Generalizable Avatars", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97626", "id": "a6DteCxiw6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=a6DteCxiw6", "openreview": "https://openreview.net/forum?id=a6DteCxiw6", "poster": "", "project": "", "author_site": "Julieta Martinez, Emily Kim, Javier Romero, Timur Bagautdinov, Shunsuke Saito, Shoou-I Yu, Stuart Anderson, Michael Zollh\u00f6fer, Te-Li Wang, Shaojie Bai, Shih-En Wei, Rohan Joshi, Wyatt Borsos, Tomas Simon, Jason Saragih, Paul Theodosis, Alexander Greene, Anjani Josyula, Silvio Maeta, Andrew Jewett, Simion Venshtain, Christopher Heilman, Yueh-Tung Chen, Sidi Fu, Mohamed Elshaer, Tingfang Du, Longhua Wu, Shen-Chi Chen, Kai Kang, Michael Wu, Youssef Emad, Steven Longay, Ashley Brewer, Hitesh Shah, James Booth, Taylor Koska, Kayla Haidle, Joanna Hsu, Thomas Dauer, Peter Selednik, Tim Godisart, Scott Ardisson, Matthew Cipperly, Ben Humberston, Lon Farr, Bob Hansen, Peihong Guo, Dave Braun, Steven Krenn, He Wen, Lucas Evans, Natalia Fadeeva, Matthew Stewart, Gabriel Schwartz, Divam Gupta, Gyeongsik Moon, Kaiwen Guo, Yuan Dong, Yichen Xu, Takaaki Shiratori, Fabian Prada Nino, Bernardo Pires, Bo Peng, Julia Buffalini, Autumn Trimble, Kevyn McPhail, Melissa Schoeller, Yaser Sheikh", "tldr": "", "abstract": "To build photorealistic avatars that users can embody, human modelling must be complete (cover the full body), driveable (able to reproduce the current motion and appearance from the user), and generalizable (_i.e._, easily adaptable to novel identities).\nTowards these goals, _paired_ captures, that is, captures of the same subject obtained from systems of diverse quality and availability, are crucial.\nHowever, paired captures are rarely available to researchers outside of dedicated industrial labs: _Codec Avatar Studio_ is our proposal to close this gap.\nTowards generalization and driveability, we introduce a dataset of 256 subjects captured in two modalities: high resolution multi-view scans of their heads, and video from the internal cameras of a headset.\nTowards completeness, we introduce a dataset of 4 subjects captured in eight modalities: high quality relightable multi-view captures of heads and hands, full body multi-view captures with minimal and regular clothes, and corresponding head, hands and body phone captures.\nTogether with our data, we also provide code and pre-trained models for different state-of-the-art human generation models.\nOur datasets and code are available at https://github.com/facebookresearch/ava-256 and https://github.com/facebookresearch/goliath.", "keywords": "avatars;codec avatars;photorealistic avatars;human modelling;3d reconstruction;telepresence", "primary_area": "", "supplementary_material": "/attachment/8eaf1f448d94b9fa417287c281ed2e8202386048.pdf", "author": "Julieta Martinez;Emily Kim;Javier Romero;Timur Bagautdinov;Shunsuke Saito;Shoou-I Yu;Stuart Anderson;Michael Zollh\u00f6fer;Te-Li Wang;Shaojie Bai;Chenghui Li;Shih-En Wei;Rohan Joshi;Wyatt Borsos;Tomas Simon;Jason Saragih;Paul Theodosis;Alexander Greene;Anjani Josyula;Silvio Mano Maeta;Andrew I Jewett;Simion Venshtain;Christopher Heilman;Yueh-Tung Chen;Sidi Fu;Mohamed Ezzeldin A. Elshaer;Tingfang Du;Longhua Wu;Shen-Chi Chen;Kai Kang;Michael Wu;Youssef Emad;Steven Longay;Ashley Brewer;Hitesh Shah;James Booth;Taylor Koska;Kayla Haidle;Matthew Andromalos;Joanna Ching-Hui Hsu;Thomas Dauer;Peter Selednik;Tim Godisart;Scott Ardisson;Matthew Cipperly;Ben Humberston;Lon Farr;Bob Hansen;Peihong Guo;Dave Braun;Steven Krenn;He Wen;Lucas Evans;Natalia Fadeeva;Matthew Stewart;Gabriel Schwartz;Divam Gupta;Gyeongsik Moon;Kaiwen Guo;Yuan Dong;Yichen Xu;Takaaki Shiratori;Fabian Andres Prada Nino;Bernardo R Pires;Bo Peng;Julia Buffalini;Autumn Trimble;Kevyn Alex Anthony McPhail;Melissa Robinson Schoeller;Yaser Sheikh", "authorids": "~Julieta_Martinez1;~Emily_Kim1;~Javier_Romero1;~Timur_Bagautdinov1;~Shunsuke_Saito1;~Shoou-I_Yu1;~Stuart_Anderson1;~Michael_Zollh\u00f6fer2;~Te-Li_Wang1;~Shaojie_Bai1;~Chenghui_Li3;~Shih-En_Wei2;~Rohan_Joshi1;~Wyatt_Borsos1;~Tomas_Simon1;~Jason_Saragih2;~Paul_Theodosis1;~Alexander_Greene1;~Anjani_Josyula1;~Silvio_Mano_Maeta1;~Andrew_I_Jewett1;~Simion_Venshtain1;~Christopher_Heilman1;~Yueh-Tung_Chen2;~Sidi_Fu1;~Mohamed_Ezzeldin_A._Elshaer1;~Tingfang_Du1;~Longhua_Wu1;~Shen-Chi_Chen4;~Kai_Kang4;~Michael_Wu4;~Youssef_Emad1;~Steven_Longay1;~Ashley_Brewer1;~Hitesh_Shah1;~James_Booth7;~Taylor_Koska1;~Kayla_Haidle1;~Matthew_Andromalos1;~Joanna_Ching-Hui_Hsu1;~Thomas_Dauer1;~Peter_Selednik1;~Tim_Godisart1;~Scott_Ardisson1;~Matthew_Cipperly1;~Ben_Humberston1;~Lon_Farr1;~Bob_Hansen1;~Peihong_Guo1;~Dave_Braun1;~Steven_Krenn1;~He_Wen2;~Lucas_Evans1;~Natalia_Fadeeva1;~Matthew_Stewart2;~Gabriel_Schwartz1;~Divam_Gupta2;~Gyeongsik_Moon1;~Kaiwen_Guo5;~Yuan_Dong2;~Yichen_Xu3;~Takaaki_Shiratori3;~Fabian_Andres_Prada_Nino1;~Bernardo_R_Pires1;~Bo_Peng4;~Julia_Buffalini1;~Autumn_Trimble1;~Kevyn_Alex_Anthony_McPhail1;~Melissa_Robinson_Schoeller1;~Yaser_Sheikh1", "gender": ";F;M;M;M;;M;;;M;M;M;M;M;;;M;M;F;M;M;M;M;M;;;M;M;;M;M;M;M;F;;M;F;F;M;F;M;M;M;M;M;;;M;M;;M;;M;F;;M;M;M;;M;M;M;;M;M;F;F;M;;", "homepage": ";https://kimemily12.github.io/website/;https://ps.is.tuebingen.mpg.de/person/jromero;;http://www-scf.usc.edu/~saitos/;;;;;https://jerrybai1995.github.io;;;;https://github.com/yit-b;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;https://www.facebook.com/scott.ardisson/;;https://benhumberston.com;;https://grettir.org;;;;;;;;;https://divamgupta.com;https://mks0601.github.io/;;;;https://sites.google.com/view/takaaki-shiratori/home;;;;https://www.linkedin.com/in/julia-buffalini-424983123/;https://autumntrimble.com/;https://www.kevynmc.com;;http://www.cs.cmu.edu/~yaser/", "dblp": ";;16/5949-2;145/3196;21/5061;23/7442.html;;;160/6547.html;;;119/7860;;;23/8654;21/3590;;;;;;;;;;;;165/9940;;;;;;;;153/2284-1.html;;;;;;;;;;;;;61/7527;;;;;;;68/11348;;185/6852;;;177/6703;17/5270.html;;;03/5954-5;;;;;71/3516", "google_scholar": ";https://scholar.google.com/citations?hl=en;https://scholar.google.de/citations?user=Wx62iOsAAAAJ;oLi7xJ0AAAAJ;IolN_okAAAAJ;3YZTd_UAAAAJ;8orqBsYAAAAJ;;;DLVP3PcAAAAJ;;https://scholar.google.com.tw/citations?user=sFQD3k4AAAAJ;;;7aabHgsAAAAJ;ss-IvjMAAAAJ;;;Gj-ZvioAAAAJ;https://scholar.google.com/citations?hl=en;mswf-6kAAAAJ;;;719j358AAAAJ;;https://scholar.google.com/citations?hl=en;;;;;;;;;Ic9pxRgAAAAJ;T7F42LQAAAAJ;;;;;;;;;;;;;;;;;;;;x47jgTcAAAAJ;https://scholar.google.co.in/citations?user=YNg9Sg8AAAAJ;2f2D258AAAAJ;FfaVfjIAAAAJ;https://scholar.google.com/citations?hl=en;;YvS3QpkAAAAJ;;IN4BBvIAAAAJ;;;;;;Yd4KvooAAAAJ", "orcid": ";;;;;;;;;;;;;;0000-0002-0972-7455;;;;;;;;;;;;;;;;;;;;;0000-0003-2114-9595;;;;;;;;;;;;;;;;;;;;0000-0002-8781-5573;;;;;;;;;;;;;;", "linkedin": ";kimemily12/;javier-romero-38b87331/;bagautdinov/;;;stuartoanderson/;;;;leochli/;;rohan-m-joshi/;;;;theodosis/;alex-greene/;anjanijosyula/;silviomaeta/;andrew-jewett;simon-venshtain-3301a119;chrisheilman/;;sidifu/;;tingfangdu/;longhua-wu-0062b8173/;;kai-kang-b0ba1988;michael-s-wu;youssef-emad-148444b9/;steven-longay-976b0613;ashley-brewer-0288646/;;https://linkedin.com/in/jabooth;taylor-koska-3372871b6/;kayla-haidle-b281b1204?utm_source=share&utm_campaign=share_via&utm_content=profile&utm_medium=ios_app;matthew-andromalos/;chjoanna/;tom-dauer-94246a86/;peter-selednik-05036499/;tim-godisart-37a58a196/;;mattcipperly/;;;;;david-braun-944460165/;stevenkrenn/;;lucasevanspgh/;https://linkedin.com/in/natalia-fadeeva-71b6181;matt-stewart-86322717/;;;gyeongsik-moon-bb9a73152/;;;yichen-xu-a5620a13a/;;;;;;autumntrimble/;;;", "or_profile": "~Julieta_Martinez1;~Emily_Kim1;~Javier_Romero1;~Timur_Bagautdinov1;~Shunsuke_Saito1;~Shoou-I_Yu1;~Stuart_Anderson1;~Michael_Zollh\u00f6fer2;~Te-Li_Wang1;~Shaojie_Bai1;~Chenghui_Li3;~Shih-En_Wei2;~Rohan_Joshi1;~Wyatt_Borsos1;~Tomas_Simon1;~Jason_Saragih2;~Paul_Theodosis1;~Alexander_Greene1;~Anjani_Josyula1;~Silvio_Mano_Maeta1;~Andrew_I_Jewett1;~Simion_Venshtain1;~Christopher_Heilman1;~Yueh-Tung_Chen2;~Sidi_Fu1;~Mohamed_Ezzeldin_A._Elshaer1;~Tingfang_Du1;~Longhua_Wu1;~Shen-Chi_Chen4;~Kai_Kang4;~Michael_Wu4;~Youssef_Emad1;~Steven_Longay1;~Ashley_Brewer1;~Hitesh_Shah1;~James_Booth7;~Taylor_Koska1;~Kayla_Haidle1;~Matthew_Andromalos1;~Joanna_Ching-Hui_Hsu1;~Thomas_Dauer1;~Peter_Selednik1;~Tim_Godisart1;~Scott_Ardisson1;~Matthew_Cipperly1;~Ben_Humberston1;~Lon_Farr1;~Bob_Hansen1;~Peihong_Guo1;~Dave_Braun1;~Steven_Krenn1;~He_Wen2;~Lucas_Evans1;~Natalia_Fadeeva1;~Matthew_Stewart2;~Gabriel_Schwartz1;~Divam_Gupta2;~Gyeongsik_Moon1;~Kaiwen_Guo5;~Yuan_Dong2;~Yichen_Xu3;~Takaaki_Shiratori3;~Fabian_Andres_Prada_Nino1;~Bernardo_R_Pires1;~Bo_Peng4;~Julia_Buffalini1;~Autumn_Trimble1;~Kevyn_Alex_Anthony_McPhail1;~Melissa_Robinson_Schoeller1;~Yaser_Sheikh1", "aff": ";Carnegie Mellon University;Meta;Reality Labs Research;Codec Avatars Lab;Reality Labs Research, Meta;Meta;;Reality Labs Research;Meta;Meta Reality Labs;Meta Inc.;Meta Facebook;Meta Facebook;Meta;Meta Facebook;Meta Facebook;Georgia Institute of Technology;Carnegie Mellon University;Meta Facebook;Meta Facebook;Meta Facebook;Meta Facebook;Meta Facebook;Meta Facebook;Meta;Meta Facebook;Meta Reality Labs;;Meta Facebook;Meta Facebook;Meta Facebook;Meta Facebook;Meta Facebook;Meta Facebook;Meta;Meta Facebook;Meta Facebook;Meta;Meta Facebook;PsiQuantum Corp;Meta Facebook;Meta Facebook;Meta Facebook;Meta Facebook;Meta Facebook;;Meta Facebook;Meta Facebook;Meta Facebook;Meta Facebook;;Meta;Meta Facebook;Meta Facebook;Meta;Meta Reality Labs;Meta;Reality Lab;Meta Facebook;Meta platforms inc;Meta;;;Meta Platforms Inc.;Meta;Meta;Meta;;Meta", "aff_domain": ";cmu.edu;meta.com;meta.com;meta.com;meta.com;meta.com;;meta.com;meta.com;meta.com;facebook.com;fb.com;meta.com;fb.com;fb.com;meta.com;gatech.edu;cmu.edu;meta.com;meta.com;meta.com;meta.com;meta.com;meta.com;meta.com;meta.com;meta.com;;meta.com;meta.com;meta.com;meta.com;meta.com;meta.com;meta.com;meta.com;meta.com;meta.com;meta.com;psiquantum.com;meta.com;meta.com;meta.com;meta.com;meta.com;;meta.com;meta.com;meta.com;meta.com;;meta.com;meta.com;meta.com;meta.com;meta.com;meta.com;meta.com;fb.com;meta.com;meta.com;;;meta.com;meta.com;meta.com;meta.com;;meta.com", "position": ";PhD student;Researcher;Researcher;Researcher;Research Scientist;Researcher;;Researcher;AI Research Scientist;Researcher;Research Scientist;Researcher;Software Engineer;Research Scientist;Principal Researcher;Researcher;MS student;MS student;Researcher;Researcher;Researcher;Researcher;Researcher;Engineering Manager;Researcher;Researcher;Researcher;;Researcher;Researcher;Researcher;Researcher;Principal Researcher;Principal Researcher;Principal Researcher;Researcher;Undergrad student;Systems Engineer;Software Engineer;Researcher;Researcher;Researcher;Systems Engineer;Systems Engineer;Software Engineer;;Researcher;Software Engineer;Network Engineer;Researcher;;Researcher;Researcher;Systems Engineering Manager;Researcher;Research Engineer;Postdoc;Researcher;Researcher;Research Engineer;Research Scientist;;;Researcher;Project Manager;Researcher;Engineer;;Researcher", "bibtex": "@inproceedings{\nmartinez2024codec,\ntitle={Codec Avatar Studio: Paired Human Captures for Complete, Driveable, and Generalizable Avatars},\nauthor={Julieta Martinez and Emily Kim and Javier Romero and Timur Bagautdinov and Shunsuke Saito and Shoou-I Yu and Stuart Anderson and Michael Zollh{\\\"o}fer and Te-Li Wang and Shaojie Bai and Chenghui Li and Shih-En Wei and Rohan Joshi and Wyatt Borsos and Tomas Simon and Jason Saragih and Paul Theodosis and Alexander Greene and Anjani Josyula and Silvio Mano Maeta and Andrew I Jewett and Simion Venshtain and Christopher Heilman and Yueh-Tung Chen and Sidi Fu and Mohamed Ezzeldin A. Elshaer and Tingfang Du and Longhua Wu and Shen-Chi Chen and Kai Kang and Michael Wu and Youssef Emad and Steven Longay and Ashley Brewer and Hitesh Shah and James Booth and Taylor Koska and Kayla Haidle and Matthew Andromalos and Joanna Ching-Hui Hsu and Thomas Dauer and Peter Selednik and Tim Godisart and Scott Ardisson and Matthew Cipperly and Ben Humberston and Lon Farr and Bob Hansen and Peihong Guo and Dave Braun and Steven Krenn and He Wen and Lucas Evans and Natalia Fadeeva and Matthew Stewart and Gabriel Schwartz and Divam Gupta and Gyeongsik Moon and Kaiwen Guo and Yuan Dong and Yichen Xu and Takaaki Shiratori and Fabian Andres Prada Nino and Bernardo R Pires and Bo Peng and Julia Buffalini and Autumn Trimble and Kevyn Alex Anthony McPhail and Melissa Robinson Schoeller and Yaser Sheikh},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=a6DteCxiw6}\n}", "github": "", "reviewers": "hqoy;dvvT;uYLj;fHrm", "pdf_size": 27272685, "rating": "6;6;7;7", "confidence": "4;3;5;4", "wc_summary_and_contributions": "22;63;158;74", "wc_strengths": "69;55;161;106", "wc_improvement": "179;69;376;250", "wc_limitations": "10;54;48;41", "wc_correctness": "1;13;201;20", "wc_clarity": "1;7;128;11", "wc_relation_to_prior_work": "1;24;137;47", "wc_documentation": "1;1;103;54", "wc_additional_feedback": "1;1;1;1", "wc_review": "285;287;1313;604", "wc_reply_reviewers": "83;0;222;121", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "3;0;1;1", "reply_authors": "3;1;2;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 79.25, 49.423552077931426 ], "wc_strengths_avg": [ 97.75, 40.9961888472575 ], "wc_improvement_avg": [ 218.5, 111.47757622051172 ], "wc_limitations_avg": [ 38.25, 16.946607330082326 ], "wc_correctness_avg": [ 58.75, 82.40866155932882 ], "wc_clarity_avg": [ 36.75, 52.803290617157565 ], "wc_relation_to_prior_work_avg": [ 52.25, 51.56246212119821 ], "wc_documentation_avg": [ 39.75, 42.446289590493066 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 622.25, 419.4039669578722 ], "wc_reply_reviewers_avg": [ 106.5, 79.75744479357397 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 70, 0 ], "corr_rating_confidence": 0.7071067811865475, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:iycevVqb5loJ:scholar.google.com/&scioq=Codec+Avatar+Studio:+Paired+Human+Captures+for+Complete,+Driveable,+and+Generalizable+Avatars&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": ";cmu.edu;meta.com;meta.com;meta.com;meta.com;meta.com;;meta.com;meta.com;meta.com;facebook.com;fb.com;meta.com;fb.com;fb.com;meta.com;gatech.edu;cmu.edu;meta.com;meta.com;meta.com;meta.com;meta.com;meta.com;meta.com;meta.com;meta.com;;meta.com;meta.com;meta.com;meta.com;meta.com;meta.com;meta.com;meta.com;meta.com;meta.com;meta.com;psiquantum.com;meta.com;meta.com;meta.com;meta.com;meta.com;;meta.com;meta.com;meta.com;meta.com;;meta.com;meta.com;meta.com;meta.com;meta.com;meta.com;meta.com;fb.com;meta.com;meta.com;;;meta.com;meta.com;meta.com;meta.com;;meta.com", "author_num": 70, "aff_unique_index": "0;1;2;3;1;1;2;1;1;1;1;1;1;1;1;4;0;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;5;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;6;1;1;1;1;1;1;1;1", "aff_unique_norm": "Carnegie Mellon University;Meta;Reality Labs;Codec Avatars Lab;Georgia Institute of Technology;PsiQuantum Corporation;Reality Lab", "aff_unique_dep": ";Meta Platforms, Inc.;Research;;;;", "aff_unique_url": "https://www.cmu.edu;https://meta.com;https://www.realitylabs.com;;https://www.gatech.edu;https://www.psiquantum.com;", "aff_unique_abbr": "CMU;Meta;;;Georgia Tech;PsiQuantum;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States;" }, { "title": "Tri-Level Navigator: LLM-Empowered Tri-Level Learning for Time Series OOD Generalization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94588", "id": "a6HzEu4Kpo", "proceeding": "", "pdf": "https://openreview.net/pdf?id=a6HzEu4Kpo", "openreview": "https://openreview.net/forum?id=a6HzEu4Kpo", "poster": "/media/PosterPDFs/NeurIPS%202024/94588.png?t=1731397880.8225765", "project": "", "author_site": "Chengtao Jian, Kai Yang, Yang Jiao", "tldr": "", "abstract": "Out-of-Distribution (OOD) generalization in machine learning is a burgeoning area of study. Its primary goal is to enhance the adaptability and resilience of machine learning models when faced with new, unseen, and potentially adversarial data that significantly diverges from their original training datasets. In this paper, we investigate time series OOD generalization via pre-trained Large Language Models (LLMs). We first propose a novel \\textbf{T}ri-level learning framework for \\textbf{T}ime \\textbf{S}eries \\textbf{O}OD generalization, termed TTSO, which considers both sample-level and group-level uncertainties. This formula offers a fresh theoretic perspective for formulating and analyzing OOD generalization problem. In addition, we provide a theoretical analysis to justify this method is well motivated. We then develop a stratified localization algorithm tailored for this tri-level optimization problem, theoretically demonstrating the guaranteed convergence of the proposed algorithm. Our analysis also reveals that the iteration complexity to obtain an $\\epsilon$-stationary point is bounded by O($\\frac{1}{\\epsilon^{2}}$). Extensive experiments on real-world datasets have been conducted to elucidate the effectiveness of the proposed method.", "keywords": "tri-level optimization;OOD generalization", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Chengtao Jian;Kai Yang;Yang Jiao", "authorids": "~Chengtao_Jian1;~Kai_Yang3;~Yang_Jiao4", "gender": "M;;", "homepage": ";;https://yangjiao-tj.github.io/yangjiao/", "dblp": "336/5686;;", "google_scholar": "SCBBO8oAAAAJ;;https://scholar.google.com.hk/citations?user=tqa1KZAAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Chengtao_Jian1;~Kai_Yang3;~Yang_Jiao4", "aff": "Tongji University;;Tongji University", "aff_domain": "tongji.edu.cn;;tongji.edu.cn", "position": "PhD student;;PhD student", "bibtex": "@inproceedings{\njian2024trilevel,\ntitle={Tri-Level Navigator: {LLM}-Empowered Tri-Level Learning for Time Series {OOD} Generalization},\nauthor={Chengtao Jian and Kai Yang and Yang Jiao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=a6HzEu4Kpo}\n}", "github": "", "reviewers": "BvSM;tk3X;9773;Z4zT", "pdf_size": 614306, "rating": "4;5;6;7", "confidence": "4;2;2;4", "soundness": "3;3;3;4", "novelty": "2;3;3;4", "presentation": "2;3;2;3", "wc_summary": "113;56;69;98", "wc_strengths": "67;98;53;107", "wc_weaknesses": "302;105;7;31", "wc_questions": "3;47;12;20", "wc_limitations": "4;1;6;1", "wc_review": "489;307;147;257", "wc_reply_reviewers": "162;17;16;0", "wc_reply_authors": "750;66;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;1;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.0, 1.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 84.0, 22.616365755797283 ], "wc_strengths_avg": [ 81.25, 22.049659861322123 ], "wc_weaknesses_avg": [ 111.25, 115.90162854766106 ], "wc_questions_avg": [ 20.5, 16.439282222773596 ], "wc_limitations_avg": [ 3.0, 2.1213203435596424 ], "wc_review_avg": [ 300.0, 123.51922927220684 ], "wc_reply_reviewers_avg": [ 48.75, 65.73193668225515 ], "wc_reply_authors_avg": [ 204.0, 316.3826796776334 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12973794245344493850&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "tongji.edu.cn;;tongji.edu.cn", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Tongji University", "aff_unique_dep": "", "aff_unique_url": "https://www.tongji.edu.cn", "aff_unique_abbr": "Tongji", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Amortized Fourier Neural Operators", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94587", "id": "a6em980M9x", "proceeding": "", "pdf": "https://openreview.net/pdf?id=a6em980M9x", "openreview": "https://openreview.net/forum?id=a6em980M9x", "poster": "/media/PosterPDFs/NeurIPS%202024/94587.png?t=1733728631.1928446", "project": "", "author_site": "Zipeng Xiao, Siqi Kou, Hao Zhongkai, Bokai Lin, Zhijie Deng", "tldr": "", "abstract": "Fourier Neural Operators (FNOs) have shown promise for solving partial differential equations (PDEs).\nTypically, FNOs employ separate parameters for different frequency modes to specify tunable kernel integrals in Fourier space, which, yet, results in an undesirably large number of parameters when solving high-dimensional PDEs. \nA workaround is to abandon the frequency modes exceeding a predefined threshold, but this limits the FNOs' ability to represent high-frequency details and poses non-trivial challenges for hyper-parameter specification. \nTo address these, we propose AMortized Fourier Neural Operator (AM-FNO), where an amortized neural parameterization of the kernel function is deployed to accommodate arbitrarily many frequency modes using a fixed number of parameters. \nWe introduce two implementations of AM-FNO, based on the recently developed, appealing Kolmogorov\u2013Arnold Network (KAN) and Multi-Layer Perceptrons (MLPs) equipped with orthogonal embedding functions respectively. \nWe extensively evaluate our method on diverse datasets from various domains and observe up to 31\\% average improvement compared to competing neural operator baselines.", "keywords": "neural operator;Fourier neural operator;Kolmogorov\u2013Arnold Network", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "/attachment/8b1a822945d9e03cf3fd21a70901b3307cdeb70b.zip", "author": "Zipeng Xiao;Siqi Kou;Zhongkai Hao;Bokai Lin;Zhijie Deng", "authorids": "~Zipeng_Xiao1;~Siqi_Kou1;~Zhongkai_Hao1;~Bokai_Lin1;~Zhijie_Deng1", "gender": "M;;M;M;M", "homepage": "https://github.com/xzppp;https://github.com/karrykkk;https://github.com/zhuanrangqun;https://thudzj.github.io/;https://haozhongkai.github.io/", "dblp": ";;;209/4959;270/0220.html", "google_scholar": ";;;J3dR0sUAAAAJ;dfSzq27ZiVoC", "orcid": ";;;0000-0002-0932-1631;", "linkedin": ";;;;", "or_profile": "~Zipeng_Xiao1;~Siqi_Kou1;~Bokai_Lin1;~Zhijie_Deng1;~Hao_Zhongkai1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Tsinghua University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;mails.tsinghua.edu.cn", "position": "MS student;PhD student;Undergrad student;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nxiao2024amortized,\ntitle={Amortized Fourier Neural Operators},\nauthor={Zipeng Xiao and Siqi Kou and Zhongkai Hao and Bokai Lin and Zhijie Deng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=a6em980M9x}\n}", "github": "", "reviewers": "kDgT;NuVs;Z566;DLAH;okt7", "pdf_size": 758339, "rating": "5;5;5;6;7", "confidence": "4;4;4;4;5", "soundness": "3;2;2;2;3", "novelty": "3;2;2;2;3", "presentation": "2;2;2;3;3", "wc_summary": "124;85;42;91;98", "wc_strengths": "77;16;34;84;180", "wc_weaknesses": "351;216;75;295;121", "wc_questions": "214;268;125;218;148", "wc_limitations": "8;6;7;53;6", "wc_review": "774;591;283;741;553", "wc_reply_reviewers": "684;1006;447;1056;76", "wc_reply_authors": "366;1240;552;698;20", "reply_reviewers": "4;4;3;6;1", "reply_authors": "5;5;4;7;2", "rating_avg": [ 5.6, 0.8 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 88.0, 26.570660511172846 ], "wc_strengths_avg": [ 78.2, 56.9575280362482 ], "wc_weaknesses_avg": [ 211.6, 103.22325319422944 ], "wc_questions_avg": [ 194.6, 51.627899434317484 ], "wc_limitations_avg": [ 16.0, 18.51485889765299 ], "wc_review_avg": [ 588.4, 174.50684800316577 ], "wc_reply_reviewers_avg": [ 653.8, 364.23640674704666 ], "wc_reply_authors_avg": [ 575.2, 402.39005951936736 ], "reply_reviewers_avg": [ 3.6, 1.624807680927192 ], "reply_authors_avg": [ 4.6, 1.624807680927192 ], "replies_avg": [ 49, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.8750000000000001, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:NX0nlKgT5GMJ:scholar.google.com/&scioq=Amortized+Fourier+Neural+Operators&hl=en&as_sdt=0,14", "gs_version_total": 2, "email": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;mails.tsinghua.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Shanghai Jiao Tong University;Tsinghua University", "aff_unique_dep": ";", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.tsinghua.edu.cn", "aff_unique_abbr": "SJTU;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Orchid: Flexible and Data-Dependent Convolution for Sequence Modeling", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94586", "id": "a75F45dBHK", "proceeding": "", "pdf": "https://openreview.net/pdf?id=a75F45dBHK", "openreview": "https://openreview.net/forum?id=a75F45dBHK", "poster": "/media/PosterPDFs/NeurIPS%202024/94586.png?t=1734018058.5531921", "project": "", "author_site": "Mahdi Karami, Ali Ghodsi", "tldr": "", "abstract": "In the rapidly evolving field of deep learning, the demand for models that are both expressive and computationally efficient has never been more critical. This paper introduces Orchid, a novel architecture designed to address the quadratic complexity of traditional attention mechanisms without compromising the ability to capture long-range dependencies and in-context learning. At the core of this architecture lies a new data-dependent global convolution layer, which contextually adapts its kernel conditioned on input sequence using a dedicated conditioning neural network. We design two simple conditioning networks that maintain shift equivariance in our data-dependent convolution operation. The dynamic nature of the proposed convolution kernel grants Orchid high expressivity while maintaining quasilinear scalability for long sequences. We evaluate the proposed model across multiple domains, including language modeling and image classification, to highlight its performance and generality. Our experiments demonstrate that this architecture not only outperforms traditional attention-based architectures such as BERT and Vision Transformers with smaller model sizes, but also extends the feasible sequence length beyond the limitations of the dense attention layers. This achievement represents a significant step towards more efficient and scalable deep learning models for sequence modeling.", "keywords": "Transformer;Long Convolution;BERT;LLM;Input-dependent filter", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Mahdi Karami;Ali Ghodsi", "authorids": "~Mahdi_Karami2;~Ali_Ghodsi1", "gender": "M;M", "homepage": "https://karami-m.github.io/;https://uwaterloo.ca/data-analytics/", "dblp": "90/394.html;71/4226-1", "google_scholar": "https://scholar.google.com/citations?hl=en;WXbhp_4AAAAJ", "orcid": ";", "linkedin": "mahdi-karami-2957412a/;ali-ghodsi-525b0a61/", "or_profile": "~Mahdi_Karami2;~Ali_Ghodsi1", "aff": "University of Waterloo;University of Waterloo", "aff_domain": "cs.uwaterloo.ca;uwaterloo.ca", "position": "Researcher;Full Professor", "bibtex": "@inproceedings{\nkarami2024orchid,\ntitle={Orchid: Flexible and Data-Dependent Convolution for Sequence Modeling},\nauthor={Mahdi Karami and Ali Ghodsi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=a75F45dBHK}\n}", "github": "", "reviewers": "nmxo;pFWN;eXPv;D4Wg", "pdf_size": 3271515, "rating": "6;6;7;7", "confidence": "3;4;4;4", "soundness": "3;3;3;3", "novelty": "3;3;3;4", "presentation": "3;3;3;2", "wc_summary": "103;103;119;79", "wc_strengths": "118;63;134;30", "wc_weaknesses": "130;78;268;481", "wc_questions": "140;162;11;132", "wc_limitations": "58;7;10;95", "wc_review": "549;413;542;817", "wc_reply_reviewers": "150;46;0;431", "wc_reply_authors": "589;63;626;817", "reply_reviewers": "1;1;0;2", "reply_authors": "1;2;1;3", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 101.0, 14.2828568570857 ], "wc_strengths_avg": [ 86.25, 41.811332195948985 ], "wc_weaknesses_avg": [ 239.25, 155.8899852460061 ], "wc_questions_avg": [ 111.25, 58.91254110968224 ], "wc_limitations_avg": [ 42.5, 36.44516428828384 ], "wc_review_avg": [ 580.25, 147.0227448390214 ], "wc_reply_reviewers_avg": [ 156.75, 167.40277028771058 ], "wc_reply_authors_avg": [ 523.75, 279.73145604311287 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9822345161928287067&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "cs.uwaterloo.ca;uwaterloo.ca", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Waterloo", "aff_unique_dep": "", "aff_unique_url": "https://uwaterloo.ca", "aff_unique_abbr": "UW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Canada" }, { "title": "DECO-Bench: Unified Benchmark for Decoupled Task-Agnostic Synthetic Data Release", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97625", "id": "a7LPpyFWj2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=a7LPpyFWj2", "openreview": "https://openreview.net/forum?id=a7LPpyFWj2", "poster": "", "project": "", "author_site": "Farzaneh Askari, Lingjuan Lyu, Vivek Sharma", "tldr": "", "abstract": "In this work, we tackle the question of how to systematically benchmark task-agnostic decoupling methods for privacy-preserving machine learning (ML). Sharing datasets that include sensitive information often triggers privacy concerns, necessitating robust decoupling methods to separate sensitive and non-sensitive attributes. Despite the development of numerous decoupling techniques, a standard benchmark for systematically comparing these methods remains absent. Our framework integrates various decoupling techniques along with synthetic data\ngeneration and evaluation protocols within a unified system. Using our framework, we benchmark various decoupling techniques and evaluate their privacy-utility trade-offs. Finally, we release our source code, pre-trained models, datasets of decoupled representations to foster research in this area.", "keywords": "Synthetic Data Generation;Decoupling Methods;Privacy-Utility Trade-offs;Privacy-Preserving Machine Learning;Stable Diffusion", "primary_area": "", "supplementary_material": "/attachment/efb699894c6cff234788d936e583520acedede56.pdf", "author": "Farzaneh Askari;Lingjuan Lyu;Vivek Sharma", "authorids": "~Farzaneh_Askari1;~Lingjuan_Lyu1;~Vivek_Sharma1", "gender": "F;F;M", "homepage": ";https://sites.google.com/view/lingjuan-lyu;https://vivoutlaw.github.io/", "dblp": "229/7966;178/9876;", "google_scholar": "EpNBs6wAAAAJ;;fNbVXwQAAAAJ", "orcid": ";;", "linkedin": "fay-askari72/;;vivoutlaw/", "or_profile": "~Farzaneh_Askari1;~Lingjuan_Lyu1;~Vivek_Sharma1", "aff": "SonyAI;Sony;Sony Research", "aff_domain": "sony.co.jp;sony.com;sony.com", "position": "Intern;scientist;Senior Research Scientist", "bibtex": "@inproceedings{\naskari2024decobench,\ntitle={{DECO}-Bench: Unified Benchmark for Decoupled Task-Agnostic Synthetic Data Release},\nauthor={Farzaneh Askari and Lingjuan Lyu and Vivek Sharma},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=a7LPpyFWj2}\n}", "github": "", "reviewers": "NgJu;fx2c;mYev", "pdf_size": 541962, "rating": "5;7;7", "confidence": "3;4;4", "wc_summary_and_contributions": "39;66;96", "wc_strengths": "8;125;16", "wc_improvement": "32;111;15", "wc_limitations": "8;1;6", "wc_correctness": "5;1;1", "wc_clarity": "6;1;1", "wc_relation_to_prior_work": "91;1;1", "wc_documentation": "20;1;1", "wc_additional_feedback": "1;1;1", "wc_review": "210;308;138", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "149;125;109", "reply_reviewers": "0;0;0", "reply_authors": "2;2;2", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 67.0, 23.280893453645632 ], "wc_strengths_avg": [ 49.666666666666664, 53.368738248362426 ], "wc_improvement_avg": [ 52.666666666666664, 41.82768886223043 ], "wc_limitations_avg": [ 5.0, 2.943920288775949 ], "wc_correctness_avg": [ 2.3333333333333335, 1.8856180831641267 ], "wc_clarity_avg": [ 2.6666666666666665, 2.3570226039551585 ], "wc_relation_to_prior_work_avg": [ 31.0, 42.42640687119285 ], "wc_documentation_avg": [ 7.333333333333333, 8.956685895029603 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 218.66666666666666, 69.67224858020748 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 127.66666666666667, 16.438437341250605 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.9999999999999998, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:u-wCbFvuJ0YJ:scholar.google.com/&scioq=DECO-Bench:+Unified+Benchmark+for+Decoupled+Task-Agnostic+Synthetic+Data+Release&hl=en&as_sdt=0,33", "gs_version_total": 2, "email": "sony.co.jp;sony.com;sony.com", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Sony;Sony Corporation", "aff_unique_dep": "Sony AI;", "aff_unique_url": "https://www.sony.com;https://www.sony.com", "aff_unique_abbr": "SonyAI;Sony", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Japan" }, { "title": "Images that Sound: Composing Images and Sounds on a Single Canvas", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94585", "id": "aAR0ejrYw1", "proceeding": "", "pdf": "https://openreview.net/pdf?id=aAR0ejrYw1", "openreview": "https://openreview.net/forum?id=aAR0ejrYw1", "poster": "/media/PosterPDFs/NeurIPS%202024/94585.png?t=1732654695.6535854", "project": "", "author_site": "Ziyang Chen, Daniel Geng, Andrew Owens", "tldr": "", "abstract": "Spectrograms are 2D representations of sound that look very different from the images found in our visual world. And natural images, when played as spectrograms, make unnatural sounds. In this paper, we show that it is possible to synthesize spectrograms that simultaneously look like natural images and sound like natural audio. We call these visual spectrograms *images that sound*. Our approach is simple and zero-shot, and it leverages pre-trained text-to-image and text-to-spectrogram diffusion models that operate in a shared latent space. During the reverse process, we denoise noisy latents with both the audio and image diffusion models in parallel, resulting in a sample that is likely under both models. Through quantitative evaluations and perceptual studies, we find that our method successfully generates spectrograms that align with a desired audio prompt while also taking the visual appearance of a desired image prompt.", "keywords": "Multimodal Learning;Audio-Visual Learning;Creative AI;Diffusion Models", "primary_area": "speech_and_audio", "supplementary_material": "/attachment/127ff14d173faee78886f96bfe2b97f55f65f7ce.zip", "author": "Ziyang Chen;Daniel Geng;Andrew Owens", "authorids": "~Ziyang_Chen2;~Daniel_Geng1;~Andrew_Owens1", "gender": "M;M;M", "homepage": "https://ificl.github.io/;https://dangeng.github.io/;http://andrewowens.com", "dblp": ";251/9557;85/2697", "google_scholar": "PbsR83sAAAAJ;JbhCpzkAAAAJ;9hX-JksAAAAJ", "orcid": ";0009-0004-0508-1003;", "linkedin": "ziyang-chen-701982200/;dangeng;", "or_profile": "~Ziyang_Chen2;~Daniel_Geng1;~Andrew_Owens1", "aff": "Adobe Research;Google;University of Michigan", "aff_domain": "adobe.com;google.com;umich.edu", "position": "Intern;Intern;Assistant Professor", "bibtex": "@inproceedings{\nchen2024images,\ntitle={Images that Sound: Composing Images and Sounds on a Single Canvas},\nauthor={Ziyang Chen and Daniel Geng and Andrew Owens},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=aAR0ejrYw1}\n}", "github": "", "reviewers": "quDM;w6xJ;RECb;mkQc", "pdf_size": 17036831, "rating": "4;5;7;7", "confidence": "3;4;4;4", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "3;3;4;3", "wc_summary": "167;89;52;77", "wc_strengths": "147;39;84;105", "wc_weaknesses": "435;108;96;139", "wc_questions": "212;9;37;2", "wc_limitations": "9;15;30;12", "wc_review": "970;260;299;335", "wc_reply_reviewers": "211;413;21;15", "wc_reply_authors": "553;538;20;13", "reply_reviewers": "2;2;1;1", "reply_authors": "3;3;2;2", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 96.25, 42.97310205233036 ], "wc_strengths_avg": [ 93.75, 38.90613704802881 ], "wc_weaknesses_avg": [ 194.5, 139.73635890490348 ], "wc_questions_avg": [ 65.0, 85.87490902469708 ], "wc_limitations_avg": [ 16.5, 8.077747210701755 ], "wc_review_avg": [ 466.0, 292.1908622801199 ], "wc_reply_reviewers_avg": [ 165.0, 163.44418007380992 ], "wc_reply_authors_avg": [ 281.0, 264.56473687927496 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.7777777777777777, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16735637348813100025&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 5, "email": "adobe.com;google.com;umich.edu", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Adobe;Google;University of Michigan", "aff_unique_dep": "Adobe Research;Google;", "aff_unique_url": "https://research.adobe.com;https://www.google.com;https://www.umich.edu", "aff_unique_abbr": "Adobe;Google;UM", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Navigating Chemical Space with Latent Flows", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94584", "id": "aAaV4ZbQ9j", "proceeding": "", "pdf": "https://openreview.net/pdf?id=aAaV4ZbQ9j", "openreview": "https://openreview.net/forum?id=aAaV4ZbQ9j", "poster": "/media/PosterPDFs/NeurIPS%202024/94584.png?t=1733815289.4200335", "project": "", "author_site": "Guanghao Wei, Yining Huang, Chenru Duan, Yue Song, Yuanqi Du", "tldr": "", "abstract": "Recent progress of deep generative models in the vision and language domain has stimulated significant interest in more structured data generation such as molecules. However, beyond generating new random molecules, efficient exploration and a comprehensive understanding of the vast chemical space are of great importance to molecular science and applications in drug design and materials discovery.\nIn this paper, we propose a new framework, ChemFlow, to traverse chemical space through navigating the latent space learned by molecule generative models through flows. We introduce a dynamical system perspective that formulates the problem as learning a vector field that transports the mass of the molecular distribution to the region with desired molecular properties or structure diversity. \nUnder this framework, we unify previous approaches on molecule latent space traversal and optimization and propose alternative competing methods incorporating different physical priors. \nWe validate the efficacy of ChemFlow on molecule manipulation and single- and multi-objective molecule optimization tasks under both supervised and unsupervised molecular discovery settings.\nCodes and demos are publicly available on GitHub at \n[https://github.com/garywei944/ChemFlow](https://github.com/garywei944/ChemFlow).", "keywords": "Dynamical System;Optimal Transport;Molecular Discovery;Deep Generative Models", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "/attachment/3a20f1bd8ba0c6b1820554a7b4957ad5a8754fc8.zip", "author": "Guanghao Wei;Yining Huang;Chenru Duan;Yue Song;Yuanqi Du", "authorids": "~Guanghao_Wei1;~Yining_Huang1;~Chenru_Duan1;~Yue_Song1;~Yuanqi_Du1", "gender": "M;M;M;M;M", "homepage": "https://acad.garywei.dev/;https://yiningsamhuang.com/;https://www.deepprinciple.com;https://kingjamessong.github.io/;https://yuanqidu.github.io/", "dblp": "356/9266;;;11/1346;266/2837", "google_scholar": "https://scholar.google.com/citations?hl=en;YBBQLMcAAAAJ;canPgVoAAAAJ;Uza2i10AAAAJ;fAc_zZMAAAAJ", "orcid": "0000-0003-1016-2372;;0000-0003-2592-4237;;", "linkedin": "garywei944/;yining-huang-83b45a169;chenru-duan-8882a010b/;;", "or_profile": "~Guanghao_Wei1;~Yining_Huang1;~Chenru_Duan1;~Yue_Song1;~Yuanqi_Du1", "aff": "Cornell University;Harvard University;Microsoft;University of Trento, Italy;Cornell University", "aff_domain": "cornell.edu;harvard.edu;microsoft.com;unitn.it;cornell.edu", "position": "Intern;MS student;Researcher;PhD student;PhD student", "bibtex": "@inproceedings{\nwei2024navigating,\ntitle={Navigating Chemical Space with Latent Flows},\nauthor={Guanghao Wei and Yining Huang and Chenru Duan and Yue Song and Yuanqi Du},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=aAaV4ZbQ9j}\n}", "github": "", "reviewers": "D6r5;BiGQ;piJE;kzEx", "pdf_size": 4146113, "rating": "6;6;6;7", "confidence": "3;2;3;3", "soundness": "2;3;3;4", "novelty": "3;2;3;3", "presentation": "3;3;2;3", "wc_summary": "62;77;17;83", "wc_strengths": "35;80;27;183", "wc_weaknesses": "53;105;58;140", "wc_questions": "151;52;53;120", "wc_limitations": "13;40;6;146", "wc_review": "314;354;161;672", "wc_reply_reviewers": "16;9;18;178", "wc_reply_authors": "47;98;60;202", "reply_reviewers": "1;1;1;1", "reply_authors": "2;3;2;3", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 59.75, 25.839649765428323 ], "wc_strengths_avg": [ 81.25, 62.122359098797915 ], "wc_weaknesses_avg": [ 89.0, 35.75611835756225 ], "wc_questions_avg": [ 94.0, 42.924352062669506 ], "wc_limitations_avg": [ 51.25, 56.15770205412611 ], "wc_review_avg": [ 375.25, 185.85394130875997 ], "wc_reply_reviewers_avg": [ 55.25, 70.94848483230632 ], "wc_reply_authors_avg": [ 101.75, 60.83738570977553 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7066132490983039084&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 4, "email": "cornell.edu;harvard.edu;microsoft.com;unitn.it;cornell.edu", "author_num": 5, "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "Cornell University;Harvard University;Microsoft;University of Trento", "aff_unique_dep": ";;Microsoft Corporation;", "aff_unique_url": "https://www.cornell.edu;https://www.harvard.edu;https://www.microsoft.com;https://www.unitn.it", "aff_unique_abbr": "Cornell;Harvard;Microsoft;UniTN", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "United States;Italy" }, { "title": "On the Sparsity of the Strong Lottery Ticket Hypothesis", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94583", "id": "aBMESB1Ajx", "proceeding": "", "pdf": "https://openreview.net/pdf?id=aBMESB1Ajx", "openreview": "https://openreview.net/forum?id=aBMESB1Ajx", "poster": "", "project": "", "author_site": "Emanuele Natale, Davide Ferre, Giordano Giambartolomei, Frederic Giroire, Frederik Mallmann-Trenn", "tldr": "", "abstract": "Considerable research efforts have recently been made to show that a random neural network $N$ contains subnetworks capable of accurately approximating any given neural network that is sufficiently smaller than $N$, without any training. \nThis line of research, known as the Strong Lottery Ticket Hypothesis (SLTH), was originally motivated by the weaker Lottery Ticket Hypothesis, which states that a sufficiently large random neural network $N$ contains sparse subnetworks that can be trained efficiently to achieve performance comparable to that of training the entire network $N$.\nDespite its original motivation, results on the SLTH have so far not provided any guarantee on the size of subnetworks.\nSuch limitation is due to the nature of the main technical tool leveraged by these results, the Random Subset Sum (RSS) Problem.\nInformally, the RSS Problem asks how large a random i.i.d. sample $\\Omega$ should be so that we are able to approximate any number in $[-1,1]$, up to an error of $ \\epsilon$, as the sum of a suitable subset of $\\Omega$. \n\nWe provide the first proof of the SLTH in classical settings, such as dense and equivariant networks, with guarantees on the sparsity of the subnetworks. Central to our results, is the proof of an essentially tight bound on the Random Fixed-Size Subset Sum Problem (RFSS), a variant of the RSS Problem in which we only ask for subsets of a given size, which is of independent interest.", "keywords": "strong lottery ticket hypothesis;random subset sum;neural network pruning;random neural network", "primary_area": "learning_theory", "supplementary_material": "", "author": "Emanuele Natale;Davide Ferre';Giordano Giambartolomei;Fr\u00e9d\u00e9ric Giroire;Frederik Mallmann-Trenn", "authorids": "~Emanuele_Natale1;~Davide_Ferre'1;~Giordano_Giambartolomei1;~Fr\u00e9d\u00e9ric_Giroire1;~Frederik_Mallmann-Trenn1", "gender": "M;M;Not Specified;M;M", "homepage": "https://www-sop.inria.fr/members/Emanuele.Natale/;https://dferre97.github.io/;https://www.kcl.ac.uk/people/giordano-giambartolomei;https://www-sop.inria.fr/members/Frederic.Giroire/;https://www.randomlab.uk/", "dblp": "126/5223;;;60/66;119/4801", "google_scholar": "https://scholar.google.it/citations?user=m2P3BH4AAAAJ;;;;https://scholar.google.fr/citations?user=b0VTWqMAAAAJ", "orcid": "0000-0002-8755-3892;0000-0002-6578-6804;;0000-0002-3727-051X;0000-0003-0363-8547", "linkedin": ";;;;", "or_profile": "~Emanuele_Natale1;~Davide_Ferre'1;~Giordano_Giambartolomei1;~Fr\u00e9d\u00e9ric_Giroire1;~Frederik_Mallmann-Trenn1", "aff": "CNRS;CNRS;King's College London, University of London;CNRS;King's College London, University of London", "aff_domain": "cnrs.fr;cnrs.fr;kcl.ac.uk;cnrs.fr;kcl.ac.uk", "position": "Researcher;PhD student;Postdoc;Researcher;Associate Professor", "bibtex": "@inproceedings{\nnatale2024on,\ntitle={On the Sparsity of the Strong Lottery Ticket Hypothesis},\nauthor={Emanuele Natale and Davide Ferre' and Giordano Giambartolomei and Fr{\\'e}d{\\'e}ric Giroire and Frederik Mallmann-Trenn},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=aBMESB1Ajx}\n}", "github": "", "reviewers": "1iUR;HSXo;WaBr;cEnR", "pdf_size": 550059, "rating": "5;5;5;6", "confidence": "3;2;4;2", "soundness": "3;3;3;3", "novelty": "2;3;2;3", "presentation": "3;2;2;2", "wc_summary": "63;77;139;76", "wc_strengths": "93;31;78;68", "wc_weaknesses": "384;32;291;115", "wc_questions": "2;169;23;32", "wc_limitations": "36;11;40;4", "wc_review": "578;320;571;295", "wc_reply_reviewers": "64;15;170;10", "wc_reply_authors": "30;27;291;27", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 88.75, 29.532820725423434 ], "wc_strengths_avg": [ 67.5, 22.874658467395747 ], "wc_weaknesses_avg": [ 205.5, 139.16267459344118 ], "wc_questions_avg": [ 56.5, 65.85780136020334 ], "wc_limitations_avg": [ 22.75, 15.514106484100203 ], "wc_review_avg": [ 441.0, 133.81517103826457 ], "wc_reply_reviewers_avg": [ 64.75, 64.32485911372056 ], "wc_reply_authors_avg": [ 93.75, 113.88892615175543 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:YYsJ6F6enAUJ:scholar.google.com/&scioq=On+the+Sparsity+of+the+Strong+Lottery+Ticket+Hypothesis&hl=en&as_sdt=0,44", "gs_version_total": 10, "email": "cnrs.fr;cnrs.fr;kcl.ac.uk;cnrs.fr;kcl.ac.uk", "author_num": 5, "aff_unique_index": "0;0;1;0;1", "aff_unique_norm": "Centre National de la Recherche Scientifique;King's College London", "aff_unique_dep": ";", "aff_unique_url": "https://www.cnrs.fr;https://www.kcl.ac.uk", "aff_unique_abbr": "CNRS;KCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;1", "aff_country_unique": "France;United Kingdom" }, { "title": "Scaling transformer neural networks for skillful and reliable medium-range weather forecasting", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94582", "id": "aBP01akha9", "proceeding": "", "pdf": "https://openreview.net/pdf?id=aBP01akha9", "openreview": "https://openreview.net/forum?id=aBP01akha9", "poster": "/media/PosterPDFs/NeurIPS%202024/94582.png?t=1733256908.630542", "project": "", "author_site": "Tung Nguyen, Rohan Shah, Hritik Bansal, Troy Arcomano, Romit Maulik, Rao Kotamarthi, Ian Foster, Sandeep Madireddy, Aditya Grover", "tldr": "", "abstract": "Weather forecasting is a fundamental problem for anticipating and mitigating the impacts of climate change. Recently, data-driven approaches for weather forecasting based on deep learning have shown great promise, achieving accuracies that are competitive with operational systems. However, those methods often employ complex, customized architectures without sufficient ablation analysis, making it difficult to understand what truly contributes to their success. Here we introduce Stormer, a simple transformer model that achieves state-of-the art performance on weather forecasting with minimal changes to the standard transformer backbone. We identify the key components of Stormer through careful empirical analyses, including weather-specific embedding, randomized dynamics forecast, and pressure-weighted loss. At the core of Stormer is a randomized forecasting objective that trains the model to forecast the weather dynamics over varying time intervals. During inference, this allows us to produce multiple forecasts for a target lead time and combine them to obtain better forecast accuracy. On WeatherBench 2, Stormer performs competitively at short to medium-range forecasts and outperforms current methods beyond 7 days, while requiring orders-of-magnitude less training data and compute. Additionally, we demonstrate Stormer\u2019s favorable scaling properties, showing consistent improvements in forecast accuracy with increases in model size and training tokens. Code and checkpoints are available at https://github.com/tung-nd/stormer.", "keywords": "deep learning;transformers;weather forecasting;climate modeling;AI for climate", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "", "author": "Tung Nguyen;Rohan Shah;Hritik Bansal;Troy Arcomano;Romit Maulik;Veerabhadra Kotamarthi;Ian Foster;Sandeep Madireddy;Aditya Grover", "authorids": "~Tung_Nguyen2;~Rohan_Shah1;~Hritik_Bansal2;~Troy_Arcomano1;~Romit_Maulik1;~Veerabhadra_Kotamarthi1;~Ian_Foster2;~Sandeep_Madireddy1;~Aditya_Grover1", "gender": "M;M;M;M;M;M;M;M;M", "homepage": "https://tung-nd.github.io/;;https://sites.google.com/view/hbansal;;;https://www.anl.gov/profile/rao-kotamarthi;;https://www.anl.gov/profile/sandeep-r-madireddy;https://aditya-grover.github.io", "dblp": ";;239/5922;258/5097;;;f/IanTFoster;205/7527;162/5052", "google_scholar": "https://scholar.google.com.vn/citations?user=F9mgq3sAAAAJ;;gAKTYtoAAAAJ;SBeD6doAAAAJ;8XB99h4AAAAJ;fbyKxxsAAAAJ;VGoSakQAAAAJ;jSa5jTAAAAAJ;oOhnPUgAAAAJ", "orcid": ";;;0000-0002-9359-6020;;0000-0002-2612-7590;0000-0003-2129-5269;0000-0002-0437-8655;", "linkedin": "tung-nguyen-40703616b/;rohan-shah-42b245172/;hritik-bansal/;;;rkotamarthi;ianfoster/;;", "or_profile": "~Tung_Nguyen2;~Rohan_Shah1;~Hritik_Bansal2;~Troy_Arcomano1;~Romit_Maulik1;~Veerabhadra_Kotamarthi1;~Ian_Foster2;~Sandeep_Madireddy1;~Aditya_Grover1", "aff": "University of California, Los Angeles;;University of California, Los Angeles;Argonne National Laboratory;Argonne National Laboratory;Argonne National Laboratory;Argonne National Laboratory;Argonne National Laboratory;University of California, Los Angeles", "aff_domain": "cs.ucla.edu;;ucla.edu;anl.gov;anl.gov;anl.gov;anl.gov;anl.gov;ucla.edu", "position": "PhD student;;PhD student;Postdoc;Postdoc;Principal Researcher;Principal Researcher;Computer Scientist;Assistant Professor", "bibtex": "@inproceedings{\nnguyen2024scaling,\ntitle={Scaling transformer neural networks for skillful and reliable medium-range weather forecasting},\nauthor={Tung Nguyen and Rohan Shah and Hritik Bansal and Troy Arcomano and Romit Maulik and Veerabhadra Kotamarthi and Ian Foster and Sandeep Madireddy and Aditya Grover},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=aBP01akha9}\n}", "github": "", "reviewers": "uUm3;vUij;aebu", "pdf_size": 23324338, "rating": "5;6;8", "confidence": "4;5;4", "soundness": "2;2;4", "novelty": "2;3;3", "presentation": "2;3;4", "wc_summary": "15;86;103", "wc_strengths": "24;74;88", "wc_weaknesses": "148;646;74", "wc_questions": "84;199;62", "wc_limitations": "4;69;89", "wc_review": "275;1074;416", "wc_reply_reviewers": "28;485;14", "wc_reply_authors": "530;1270;22", "reply_reviewers": "1;2;1", "reply_authors": "5;7;2", "rating_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.9428090415820634 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 68.0, 38.113864493995706 ], "wc_strengths_avg": [ 62.0, 27.47119703738202 ], "wc_weaknesses_avg": [ 289.3333333333333, 254.004374415525 ], "wc_questions_avg": [ 115.0, 60.07217880738692 ], "wc_limitations_avg": [ 54.0, 36.2859017617954 ], "wc_review_avg": [ 588.3333333333334, 348.20906874398827 ], "wc_reply_reviewers_avg": [ 175.66666666666666, 218.80635781947066 ], "wc_reply_authors_avg": [ 607.3333333333334, 512.4199666506197 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 4.666666666666667, 2.0548046676563256 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.18898223650461357, "gs_citation": 44, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16926559620054552802&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "cs.ucla.edu;;ucla.edu;anl.gov;anl.gov;anl.gov;anl.gov;anl.gov;ucla.edu", "author_num": 9, "aff_unique_index": "0;0;1;1;1;1;1;0", "aff_unique_norm": "University of California, Los Angeles;Argonne National Laboratory", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucla.edu;https://www.anl.gov", "aff_unique_abbr": "UCLA;ANL", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Hamiltonian Monte Carlo on ReLU Neural Networks is Inefficient", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94581", "id": "aBmiyi7iA7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=aBmiyi7iA7", "openreview": "https://openreview.net/forum?id=aBmiyi7iA7", "poster": "/media/PosterPDFs/NeurIPS%202024/94581.png?t=1730426492.4799936", "project": "", "author_site": "Vu Dinh, Lam Ho, Cuong V. Nguyen", "tldr": "", "abstract": "We analyze the error rates of the Hamiltonian Monte Carlo algorithm with leapfrog integrator for Bayesian neural network inference. We show that due to the non-differentiability of activation functions in the ReLU family, leapfrog HMC for networks with these activation functions has a large local error rate of $\\Omega(\\epsilon)$ rather than the classical error rate of $\\mathcal{O}(\\epsilon^3)$. This leads to a higher rejection rate of the proposals, making the method inefficient. We then verify our theoretical findings through empirical simulations as well as experiments on a real-world dataset that highlight the inefficiency of HMC inference on ReLU-based neural networks compared to analytical networks.", "keywords": "Hamiltonian Monte Carlo;efficiency;ReLU;optimal acceptance probability", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Vu C. Dinh;Lam Si Tung Ho;Cuong V. Nguyen", "authorids": "~Vu_C._Dinh1;~Lam_Si_Tung_Ho1;~Cuong_V._Nguyen1", "gender": "M;M;M", "homepage": "http://vucdinh.github.io;https://sites.google.com/site/lamho86;https://nvcuong.github.io/", "dblp": "125/5383;;36/9125", "google_scholar": "n5niSOoAAAAJ;https://scholar.google.com.vn/citations?hl=en;CG9yOXoAAAAJ", "orcid": ";;", "linkedin": ";;cuong-nguyen-0b582736", "or_profile": "~Vu_C._Dinh1;~Lam_Si_Tung_Ho1;~Cuong_V_Nguyen1", "aff": "University of Delaware;Dalhousie University;Durham University", "aff_domain": "udel.edu;dal.ca;durham.ac.uk", "position": "Assistant Professor;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\ndinh2024hamiltonian,\ntitle={Hamiltonian Monte Carlo on Re{LU} Neural Networks is Inefficient},\nauthor={Vu C. Dinh and Lam Si Tung Ho and Cuong V. Nguyen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=aBmiyi7iA7}\n}", "github": "", "reviewers": "83bn;mLYv;tkpV;v6yY;xQ9k", "pdf_size": 477597, "rating": "4;4;6;6;6", "confidence": "3;3;3;4;2", "soundness": "3;2;3;4;2", "novelty": "2;2;2;2;2", "presentation": "2;3;3;4;2", "wc_summary": "103;93;60;51;42", "wc_strengths": "28;101;59;50;25", "wc_weaknesses": "183;335;266;18;44", "wc_questions": "90;77;4;36;3", "wc_limitations": "25;4;8;37;1", "wc_review": "429;610;397;192;115", "wc_reply_reviewers": "342;115;11;25;15", "wc_reply_authors": "797;163;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;1;1;1", "rating_avg": [ 5.2, 0.9797958971132712 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 69.8, 23.928226010300055 ], "wc_strengths_avg": [ 52.6, 27.41240595059106 ], "wc_weaknesses_avg": [ 169.2, 122.95267382208489 ], "wc_questions_avg": [ 42.0, 36.138621999185304 ], "wc_limitations_avg": [ 15.0, 13.784048752090222 ], "wc_review_avg": [ 348.6, 176.76492864819085 ], "wc_reply_reviewers_avg": [ 101.6, 126.13262860972968 ], "wc_reply_authors_avg": [ 192.0, 309.01715162754317 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15414572438664975824&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "udel.edu;dal.ca;durham.ac.uk", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Delaware;Dalhousie University;Durham University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.udel.edu;https://www.dal.ca;https://www.dur.ac.uk", "aff_unique_abbr": "UD;Dal;Durham", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2", "aff_country_unique": "United States;Canada;United Kingdom" }, { "title": "Diffusion PID: Interpreting Diffusion via Partial Information Decomposition", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94580", "id": "aBpxukZS37", "proceeding": "", "pdf": "https://openreview.net/pdf?id=aBpxukZS37", "openreview": "https://openreview.net/forum?id=aBpxukZS37", "poster": "/media/PosterPDFs/NeurIPS%202024/94580.png?t=1731643645.415765", "project": "", "author_site": "Shaurya Dewan, Rushikesh Zawar, Prakanshul Saxena, Yingshan CHANG, Andrew Luo, Yonatan Bisk", "tldr": "", "abstract": "Text-to-image diffusion models have made significant progress in generating naturalistic images from textual inputs, and demonstrate the capacity to learn and represent complex visual-semantic relationships. While these diffusion models have achieved remarkable success, the underlying mechanisms driving their performance are not yet fully accounted for, with many unanswered questions surrounding what they learn, how they represent visual-semantic relationships, and why they sometimes fail to generalize. Our work presents Diffusion Partial Information Decomposition (DiffusionPID), a novel technique that applies information-theoretic principles to decompose the input text prompt into its elementary components, enabling a detailed examination of how individual tokens and their interactions shape the generated image. We introduce a formal approach to analyze the uniqueness, redundancy, and synergy terms by applying PID to the denoising model at both the image and pixel level. This approach enables us to characterize how individual tokens and their interactions affect the model output. We first present a fine-grained analysis of characteristics utilized by the model to uniquely localize specific concepts, we then apply our approach in bias analysis and show it can recover gender and ethnicity biases. Finally, we use our method to visually characterize word ambiguity and similarity from the model\u2019s perspective and illustrate the efficacy of our method for prompt intervention. Our results show that PID is a potent tool for evaluating and diagnosing text-to-image diffusion models. Link to project page: https://rbz-99.github.io/Diffusion-PID/.", "keywords": "Diffusion;Interpretability;Information Decomposition;Mutual Information;Bias", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/dae666800292da200c1c7ed6e3641b7eec14b449.zip", "author": "Shaurya Rajat Dewan;Rushikesh Zawar;Prakanshul Saxena;Yingshan Chang;Andrew Luo;Yonatan Bisk", "authorids": "~Shaurya_Rajat_Dewan1;~Rushikesh_Zawar1;~Prakanshul_Saxena1;~Yingshan_Chang1;~Andrew_Luo2;~Yonatan_Bisk1", "gender": "M;M;;F;M;M", "homepage": "https://srdewan.github.io/;;;https://zdxdsw.github.io/;https://andrewluo.net/;http://www.YonatanBisk.com", "dblp": ";;;301/8296;234/8054;38/9282", "google_scholar": "1FLYpxAAAAAJ;Qaol8LoAAAAJ;;0QV-ZVQAAAAJ;bWYvvkUAAAAJ;bWoGh8UAAAAJ", "orcid": ";;;;;0000-0002-2111-9081", "linkedin": "shaurya-dewan-1b07231a2/;rushikesh-zawar-a67063153/;prakanshul-saxena-a21b20190/;yingshanchang/;;yonatanbisk/", "or_profile": "~Shaurya_Rajat_Dewan1;~Rushikesh_Zawar1;~Prakanshul_Saxena1;~Yingshan_Chang1;~Andrew_Luo2;~Yonatan_Bisk1", "aff": "Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Meta", "aff_domain": "andrew.cmu.edu;cmu.edu;andrew.cmu.edu;cmu.edu;cmu.edu;meta.com", "position": "MS student;MS student;MS student;PhD student;PhD student;Visiting Professor", "bibtex": "@inproceedings{\ndewan2024diffusion,\ntitle={Diffusion {PID}: Interpreting Diffusion via Partial Information Decomposition},\nauthor={Shaurya Rajat Dewan and Rushikesh Zawar and Prakanshul Saxena and Yingshan Chang and Andrew Luo and Yonatan Bisk},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=aBpxukZS37}\n}", "github": "", "reviewers": "THqw;Tpva;vbX2;hzC5", "pdf_size": 25533163, "rating": "6;6;6;7", "confidence": "4;3;5;5", "soundness": "3;3;2;3", "novelty": "4;2;4;3", "presentation": "3;3;3;4", "wc_summary": "51;56;75;80", "wc_strengths": "79;64;61;84", "wc_weaknesses": "34;32;73;188", "wc_questions": "52;19;55;132", "wc_limitations": "1;29;1;4", "wc_review": "217;200;265;488", "wc_reply_reviewers": "10;12;13;15", "wc_reply_authors": "44;30;32;34", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 65.5, 12.257650672131263 ], "wc_strengths_avg": [ 72.0, 9.72111104761179 ], "wc_weaknesses_avg": [ 81.75, 63.483757765274106 ], "wc_questions_avg": [ 64.5, 41.451779213925185 ], "wc_limitations_avg": [ 8.75, 11.755317945508747 ], "wc_review_avg": [ 292.5, 115.36138868789678 ], "wc_reply_reviewers_avg": [ 12.5, 1.8027756377319946 ], "wc_reply_authors_avg": [ 35.0, 5.385164807134504 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10316315091288280474&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "andrew.cmu.edu;cmu.edu;andrew.cmu.edu;cmu.edu;cmu.edu;meta.com", "author_num": 6, "aff_unique_index": "0;0;0;0;0;1", "aff_unique_norm": "Carnegie Mellon University;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://www.cmu.edu;https://meta.com", "aff_unique_abbr": "CMU;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Geometry-aware training of factorized layers in tensor Tucker format", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94579", "id": "aBtcfcrjM3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=aBtcfcrjM3", "openreview": "https://openreview.net/forum?id=aBtcfcrjM3", "poster": "/media/PosterPDFs/NeurIPS%202024/94579.png?t=1731443637.7504318", "project": "", "author_site": "Emanuele Zangrando, Steffen Schotth\u00f6fer, Gianluca Ceruti, Jonas Kusch, Francesco Tudisco", "tldr": "", "abstract": "Reducing parameter redundancies in neural network architectures is crucial for achieving feasible computational and memory requirements during train and inference of large networks. Given its easy implementation and flexibility, one promising approach is layer factorization, which reshapes weight tensors into a matrix format and parameterizes it as the product of two rank-r matrices. However, this family of approaches often requires an initial full-model warm-up phase, prior knowledge of a feasible rank, and it is sensitive to parameter initialization.\nIn this work, we introduce a novel approach to train the factors of a Tucker decomposition of the weight tensors. Our training proposal proves to be optimal in locally approximating the original unfactorized dynamics and stable for the initialization. Furthermore, the rank of each mode is dynamically updated during training.\nWe provide a theoretical analysis of the algorithm, showing convergence, approximation and local descent guarantees. The method's performance is further illustrated through a variety of experiments, showing remarkable training compression rates and comparable or even better performance than the full baseline and alternative layer factorization strategies.", "keywords": "Tucker Tensors;Tensor Neural Networks;Low-Rank Compression;Optimization;Low-Rank Optimization;Dynamical Low-Rank Approximation", "primary_area": "optimization_for_deep_networks", "supplementary_material": "/attachment/a9709fe7e917e11149ea3ddf1947a43a73a31440.zip", "author": "Emanuele Zangrando;Steffen Schotth\u00f6fer;Gianluca Ceruti;Jonas Kusch;Francesco Tudisco", "authorids": "~Emanuele_Zangrando1;~Steffen_Schotth\u00f6fer1;~Gianluca_Ceruti1;~Jonas_Kusch1;~Francesco_Tudisco1", "gender": "M;M;M;M;M", "homepage": ";https://scsteffen.github.io/;;;https://ftudisco.gitlab.io/", "dblp": "321/1701;;;236/0493;136/5777", "google_scholar": "https://scholar.google.it/citations?hl=it;dZqiHeMAAAAJ;eyptuo8AAAAJ;https://scholar.google.de/citations?user=8JGYQTYAAAAJ;uND_5REAAAAJ", "orcid": ";;;0000-0002-2061-2114;0000-0002-8150-4475", "linkedin": ";steffen-schotthoefer/;;;", "or_profile": "~Emanuele_Zangrando1;~Steffen_Schotth\u00f6fer1;~Gianluca_Ceruti1;~Jonas_Kusch1;~Francesco_Tudisco1", "aff": "Gran Sasso Science Institute;Oak Ridge National Laboratory;Universit\u00e4t Innsbruck;Norwegian University of Life Sciences;Gran Sasso Science Institute", "aff_domain": "gssi.it;ornl.gov;uibk.ac.at;nmbu.no;gssi.it", "position": "PhD student;Researcher;Postdoc;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nzangrando2024geometryaware,\ntitle={Geometry-aware training of factorized layers in tensor Tucker format},\nauthor={Emanuele Zangrando and Steffen Schotth{\\\"o}fer and Gianluca Ceruti and Jonas Kusch and Francesco Tudisco},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=aBtcfcrjM3}\n}", "github": "", "reviewers": "jjrQ;Rp73;SSBR;vGH1;TTwz", "pdf_size": 3096692, "rating": "5;6;6;7;7", "confidence": "3;2;3;5;4", "soundness": "2;3;3;3;4", "novelty": "3;3;3;3;3", "presentation": "2;3;2;3;3", "wc_summary": "78;84;52;81;345", "wc_strengths": "57;23;13;46;15", "wc_weaknesses": "172;56;56;92;1", "wc_questions": "64;80;25;118;55", "wc_limitations": "27;1;7;69;9", "wc_review": "398;244;153;406;425", "wc_reply_reviewers": "0;9;13;163;22", "wc_reply_authors": "0;8;8;132;8", "reply_reviewers": "0;1;1;1;1", "reply_authors": "1;2;2;2;2", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 3.4, 1.019803902718557 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 128.0, 109.09628774619236 ], "wc_strengths_avg": [ 30.8, 17.577258034175866 ], "wc_weaknesses_avg": [ 75.4, 56.382976154154896 ], "wc_questions_avg": [ 68.4, 30.58496362593881 ], "wc_limitations_avg": [ 22.6, 24.767720928660353 ], "wc_review_avg": [ 325.2, 107.73560228633801 ], "wc_reply_reviewers_avg": [ 41.4, 61.20980313642579 ], "wc_reply_authors_avg": [ 31.2, 50.4951482817904 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.8, 0.4000000000000001 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.6813851438692469, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9063294754107939808&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 5, "email": "gssi.it;ornl.gov;uibk.ac.at;nmbu.no;gssi.it", "author_num": 5, "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "Gran Sasso Science Institute;Oak Ridge National Laboratory;University of Innsbruck;Norwegian University of Life Sciences", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.gssi.it;https://www.ornl.gov;https://www.uibk.ac.at;https://www.nmbu.no", "aff_unique_abbr": ";ORNL;UIBK;NMBU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Innsbruck", "aff_country_unique_index": "0;1;2;3;0", "aff_country_unique": "Italy;United States;Austria;Norway" }, { "title": "Learning Mixtures of Unknown Causal Interventions", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94578", "id": "aC9mB1PqYJ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=aC9mB1PqYJ", "openreview": "https://openreview.net/forum?id=aC9mB1PqYJ", "poster": "", "project": "", "author_site": "Abhinav Kumar, Kirankumar Shiragur, Caroline Uhler", "tldr": "", "abstract": "The ability to conduct interventions plays a pivotal role in learning causal relationships among variables, thus facilitating applications across diverse scientific disciplines such as genomics, economics, and machine learning. However, in many instances within these applications, the process of generating interventional data is subject to noise: rather than data being sampled directly from the intended interventional distribution, interventions often yield data sampled from a blend of both intended and unintended interventional distributions.\n\nWe consider the fundamental challenge of disentangling mixed interventional and observational data within linear Structural Equation Models (SEMs) with Gaussian additive noise without the knowledge of the true causal graph. We demonstrate that conducting interventions, whether do or soft, yields distributions with sufficient diversity and properties conducive to efficiently recovering each component within the mixture. Furthermore, we establish that the sample complexity required to disentangle mixed data inversely correlates with the extent of change induced by an intervention in the equations governing the affected variable values. As a result, the causal graph can be identified up to its interventional Markov Equivalence Class, similar to scenarios where no noise influences the generation of interventional data. We further support our theoretical findings by conducting simulations wherein we perform causal discovery from such mixed data.", "keywords": "causal discovery;mixture", "primary_area": "causal_inference", "supplementary_material": "", "author": "Abhinav Kumar;Kirankumar Shiragur;Caroline Uhler", "authorids": "~Abhinav_Kumar3;~Kirankumar_Shiragur1;~Caroline_Uhler1", "gender": "M;M;F", "homepage": "https://abhinavkumar.info/;https://sites.google.com/view/kiran-shiragur;https://www.carolineuhler.com/", "dblp": "115/6458;;66/10813", "google_scholar": "n0fNl3oAAAAJ;;https://scholar.google.com.tw/citations?user=dIJFcaoAAAAJ", "orcid": ";;", "linkedin": "abhinav-kumar-99b29a16b/;;", "or_profile": "~Abhinav_Kumar3;~Kirankumar_Shiragur1;~Caroline_Uhler1", "aff": "Massachusetts Institute of Technology;Microsoft Research;Electrical Engineering & Computer Science, Massachusetts Institute of Technology", "aff_domain": "mit.edu;microsoft.com;eecs.mit.edu", "position": "PhD student;Researcher;Associate Professor", "bibtex": "@inproceedings{\nkumar2024learning,\ntitle={Learning Mixtures of Unknown Causal Interventions},\nauthor={Abhinav Kumar and Kirankumar Shiragur and Caroline Uhler},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=aC9mB1PqYJ}\n}", "github": "", "reviewers": "mSnK;nhPx;583w;SSWb", "pdf_size": 706607, "rating": "5;6;6;8", "confidence": "3;5;4;5", "soundness": "3;3;3;4", "novelty": "2;3;2;3", "presentation": "3;3;3;3", "wc_summary": "33;18;112;77", "wc_strengths": "33;39;40;111", "wc_weaknesses": "75;66;112;49", "wc_questions": "2;51;106;106", "wc_limitations": "32;1;9;17", "wc_review": "175;175;379;360", "wc_reply_reviewers": "19;18;180;13", "wc_reply_authors": "21;0;432;0", "reply_reviewers": "1;1;2;1", "reply_authors": "2;1;3;1", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 60.0, 37.03376837428241 ], "wc_strengths_avg": [ 55.75, 32.01074038506451 ], "wc_weaknesses_avg": [ 75.5, 23.04886114323222 ], "wc_questions_avg": [ 66.25, 43.36112890596831 ], "wc_limitations_avg": [ 14.75, 11.453711188955307 ], "wc_review_avg": [ 272.25, 97.48172905729565 ], "wc_reply_reviewers_avg": [ 57.5, 70.76192479010163 ], "wc_reply_authors_avg": [ 113.25, 184.22998534440586 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.7608859102526822, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:VuayNRIrsloJ:scholar.google.com/&scioq=Learning+Mixtures+of+Unknown+Causal+Interventions&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": "mit.edu;microsoft.com;eecs.mit.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Massachusetts Institute of Technology;Microsoft", "aff_unique_dep": ";Microsoft Research", "aff_unique_url": "https://web.mit.edu;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "MIT;MSR", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Hierarchical Federated Learning with Multi-Timescale Gradient Correction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94577", "id": "aCAb1qNXI0", "proceeding": "", "pdf": "https://openreview.net/pdf?id=aCAb1qNXI0", "openreview": "https://openreview.net/forum?id=aCAb1qNXI0", "poster": "/media/PosterPDFs/NeurIPS%202024/94577.png?t=1731534517.506813", "project": "", "author_site": "Wenzhi Fang, Dong-Jun Han, Evan Chen, Shiqiang Wang, Christopher Brinton", "tldr": "", "abstract": "While traditional federated learning (FL) typically focuses on a star topology where clients are directly connected to a central server, real-world distributed systems often exhibit hierarchical architectures. Hierarchical FL (HFL) has emerged as a promising solution to bridge this gap, leveraging aggregation points at multiple levels of the system. However, existing algorithms for HFL encounter challenges in dealing with multi-timescale model drift, i.e., model drift occurring across hierarchical levels of data heterogeneity. In this paper, we propose a multi-timescale gradient correction (MTGC) methodology to resolve this issue. Our key idea is to introduce distinct control variables to (i) correct the client gradient towards the group gradient, i.e., to reduce client model drift caused by local updates based on individual datasets, and (ii) correct the group gradient towards the global gradient, i.e., to reduce group model drift caused by FL over clients within the group. We analytically characterize the convergence behavior of MTGC under general non-convex settings, overcoming challenges associated with couplings between correction terms. We show that our convergence bound is immune to the extent of data heterogeneity, confirming the stability of the proposed algorithm against multi-level non-i.i.d. data. Through extensive experiments on various datasets and models, we validate the effectiveness of MTGC in diverse HFL settings. The code for this project is available at https://github.com/wenzhifang/MTGC.", "keywords": "Federated learning;gradient correction;hierarchical", "primary_area": "other", "supplementary_material": "/attachment/7ea3a508337e54c3b121c9c3d2bc74b2ff6e7c4f.zip", "author": "Wenzhi Fang;Dong-Jun Han;Evan Chen;Shiqiang Wang;Christopher Brinton", "authorids": "~Wenzhi_Fang1;~Dong-Jun_Han1;~Evan_Chen1;~Shiqiang_Wang1;~Christopher_Brinton1", "gender": "M;M;M;M;", "homepage": "https://wenzhifang.github.io/;https://sites.google.com/view/djhan930/home?authuser=0;https://evan-py-chen.github.io;https://shiqiang.wang;https://www.cbrinton.net/", "dblp": "265/5532;201/0078;207/0551;87/5094-1;", "google_scholar": "XdUxykMAAAAJ;https://scholar.google.co.kr/citations?user=-YR-GxUAAAAJ;sL27C_sAAAAJ;kA_vmOcAAAAJ;vWmHA5MAAAAJ", "orcid": ";;;;", "linkedin": ";;evan-poyu-chen/;;", "or_profile": "~Wenzhi_Fang1;~Dong-Jun_Han1;~Evan_Chen1;~Shiqiang_Wang1;~Christopher_Brinton1", "aff": "Purdue University;Purdue University;Purdue University;IBM, International Business Machines;Purdue University", "aff_domain": "purdue.edu;purdue.edu;purdue.edu;us.ibm.com;purdue.edu", "position": "PhD student;Postdoc;PhD student;Research Staff Member;Assistant Professor", "bibtex": "@inproceedings{\nfang2024hierarchical,\ntitle={Hierarchical Federated Learning with Multi-Timescale Gradient Correction},\nauthor={Wenzhi Fang and Dong-Jun Han and Evan Chen and Shiqiang Wang and Christopher Brinton},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=aCAb1qNXI0}\n}", "github": "", "reviewers": "rcWg;zFr1;fhg9;r1S6", "pdf_size": 1192054, "rating": "5;6;6;6", "confidence": "4;3;3;3", "soundness": "3;2;3;3", "novelty": "1;2;2;2", "presentation": "3;2;3;3", "wc_summary": "104;53;74;56", "wc_strengths": "49;13;99;24", "wc_weaknesses": "138;7;184;20", "wc_questions": "3;65;7;79", "wc_limitations": "9;1;19;1", "wc_review": "303;139;383;180", "wc_reply_reviewers": "140;92;0;0", "wc_reply_authors": "27;461;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "2;2;1;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 71.75, 20.27775875189366 ], "wc_strengths_avg": [ 46.25, 33.13136731256348 ], "wc_weaknesses_avg": [ 87.25, 75.66166466580022 ], "wc_questions_avg": [ 38.5, 33.89321466016465 ], "wc_limitations_avg": [ 7.5, 7.399324293474371 ], "wc_review_avg": [ 251.25, 97.09885426718485 ], "wc_reply_reviewers_avg": [ 58.0, 60.43177971895251 ], "wc_reply_authors_avg": [ 122.0, 196.03188516157263 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12043230661723717636&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 5, "email": "purdue.edu;purdue.edu;purdue.edu;us.ibm.com;purdue.edu", "author_num": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Purdue University;International Business Machines", "aff_unique_dep": ";", "aff_unique_url": "https://www.purdue.edu;https://www.ibm.com", "aff_unique_abbr": "Purdue;IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Bandits with Ranking Feedback", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94576", "id": "aCaspFfAhG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=aCaspFfAhG", "openreview": "https://openreview.net/forum?id=aCaspFfAhG", "poster": "", "project": "", "author_site": "Davide Maran, Francesco Bacchiocchi, Francesco Emanuele Stradi, Matteo Castiglioni, Nicola Gatti, Marcello Restelli", "tldr": "", "abstract": "In this paper, we introduce a novel variation of multi-armed bandits called bandits with ranking feedback. Unlike traditional bandits, this variation provides feedback to the learner that allows them to rank the arms based on previous pulls, without quantifying numerically the difference in performance. This type of feedback is well-suited for scenarios where the arms' values cannot be precisely measured using metrics such as monetary scores, probabilities, or occurrences. Common examples include human preferences in matchmaking problems. Furthermore, its investigation answers the theoretical question on how numerical rewards are crucial in bandit settings. In particular, we study the problem of designing no-regret algorithms with ranking feedback both in the stochastic and adversarial settings. We show that, with stochastic rewards, differently from what happens with non-ranking feedback, no algorithm can suffer a logarithmic regret in the time horizon $T$ in the instance-dependent case. Furthermore, we provide two algorithms. The first, namely DREE, guarantees a superlogarithmic regret in $T$ in the instance-dependent case thus matching our lower bound, while the second, namely R-LPE, guarantees a regret of $\\mathcal{\\widetilde O}(\\sqrt{T})$ in the instance-independent case. Remarkably, we show that no algorithm can have an optimal regret bound in both instance-dependent and instance-independent cases. Finally, we prove that no algorithm can achieve a sublinear regret when the rewards are adversarial.", "keywords": "Online Learning;Bandits", "primary_area": "bandits", "supplementary_material": "/attachment/116bcca219c212848de53e8c9e624f0af7d61b9d.zip", "author": "Davide Maran;Francesco Bacchiocchi;Francesco Emanuele Stradi;Matteo Castiglioni;Nicola Gatti;Marcello Restelli", "authorids": "~Davide_Maran1;~Francesco_Bacchiocchi1;~Francesco_Emanuele_Stradi1;~Matteo_Castiglioni1;~Nicola_Gatti1;~Marcello_Restelli1", "gender": "M;M;M;;M;M", "homepage": "https://davidezfc.github.io/;;https://francescoemanuelestradi.github.io;https://castiglionimatteo.github.io;https://www4.ceda.polimi.it/manifesti/manifesti/controller/ricerche/RicercaPerDocentiPublic.do?k_doc=75785&lang=EN&EVN_PRODOTTI=evento&__pj0=0&__pj1=d918ee8916afbd0005f5c0bc3c0ff350;http://home.deib.polimi.it/restelli/", "dblp": "320/3835;312/4794.html;345/9650;225/7720;g/NicolaGatti;64/1011", "google_scholar": "https://scholar.google.it/citations?user=a8i0X8oAAAAJ;https://scholar.google.com.vn/citations?user=UKGWeAoAAAAJ;JYdi_FMAAAAJ;https://scholar.google.it/citations?user=NPE3HAYAAAAJ;https://scholar.google.com.tw/citations?user=j-HrYREAAAAJ;https://scholar.google.com.tw/citations?user=xdgxRiEAAAAJ", "orcid": ";;;0000-0002-1070-6766;0000-0001-7349-3932;0000-0002-6322-1076", "linkedin": "davide-maran/;;francesco-emanuele-stradi-bb35b0222/;;nicola-gatti-1284b21;", "or_profile": "~Davide_Maran1;~Francesco_Bacchiocchi1;~Francesco_Emanuele_Stradi1;~Matteo_Castiglioni1;~Nicola_Gatti1;~Marcello_Restelli1", "aff": "Polytechnic Institute of Milan;Polytechnic Institute of Milan;Polytechnic Institute of Milan;Politecnico di Milano;Polytechnic Institute of Milan;Politecnico di Milano", "aff_domain": "polimi.it;polimi.it;polimi.it;polimi.it;polimi.it;polimi.it", "position": "PhD student;PhD student;PhD student;Assistant Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nmaran2024bandits,\ntitle={Bandits with Ranking Feedback},\nauthor={Davide Maran and Francesco Bacchiocchi and Francesco Emanuele Stradi and Matteo Castiglioni and Nicola Gatti and Marcello Restelli},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=aCaspFfAhG}\n}", "github": "", "reviewers": "hpCM;iUMy;cfwJ;KUAx", "pdf_size": 673737, "rating": "5;6;7;7", "confidence": "2;3;5;3", "soundness": "3;3;3;4", "novelty": "2;3;3;4", "presentation": "2;2;3;4", "wc_summary": "116;139;71;77", "wc_strengths": "81;49;123;71", "wc_weaknesses": "109;206;53;77", "wc_questions": "34;206;86;2", "wc_limitations": "22;27;1;2", "wc_review": "362;627;334;229", "wc_reply_reviewers": "12;0;10;42", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.25, 1.0897247358851685 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 100.75, 28.039035290109393 ], "wc_strengths_avg": [ 81.0, 26.870057685088806 ], "wc_weaknesses_avg": [ 111.25, 58.19954896732448 ], "wc_questions_avg": [ 82.0, 77.61443164772902 ], "wc_limitations_avg": [ 13.0, 11.640446726822816 ], "wc_review_avg": [ 388.0, 146.6236679393883 ], "wc_reply_reviewers_avg": [ 16.0, 15.684387141358123 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.7608859102526822, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15715671025942107014&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 2, "email": "polimi.it;polimi.it;polimi.it;polimi.it;polimi.it;polimi.it", "author_num": 6, "aff_unique_index": "0;0;0;1;0;1", "aff_unique_norm": "Polytechnic Institute of Milan;Politecnico di Milano", "aff_unique_dep": ";", "aff_unique_url": "https://www.polimi.it/;https://www.polimi.it", "aff_unique_abbr": "Politecnico di Milano;Polimi", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "Italy" }, { "title": "On Differentially Private Subspace Estimation in a Distribution-Free Setting", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94575", "id": "aCcHVnwNlf", "proceeding": "", "pdf": "https://openreview.net/pdf?id=aCcHVnwNlf", "openreview": "https://openreview.net/forum?id=aCcHVnwNlf", "poster": "/media/PosterPDFs/NeurIPS%202024/94575.png?t=1730123578.784291", "project": "", "tldr": "", "abstract": "Private data analysis faces a significant challenge known as the curse of dimensionality, leading to increased costs. However, many datasets possess an inherent low-dimensional structure. For instance, during optimization via gradient descent, the gradients frequently reside near a low-dimensional subspace. If the low-dimensional structure could be privately identified using a small amount of points, we could avoid paying for the high ambient dimension.\n\nOn the negative side, Dwork, Talwar, Thakurta, and Zhang (STOC 2014) proved that privately estimating subspaces, in general, requires an amount of points that has a polynomial dependency on the dimension. However, their bounds do not rule out the possibility to reduce the number of points for \"easy\" instances. Yet, providing a measure that captures how much a given dataset is \"easy\" for this task turns out to be challenging, and was not properly addressed in prior works.\n\nInspired by the work of Singhal and Steinke (NeurIPS 2021), we provide the first measures that quantify \"easiness\" as a function of multiplicative singular-value gaps in the input dataset, and support them with new upper and lower bounds. In particular, our results determine the first types of gaps that are sufficient and necessary for estimating a subspace with an amount of points that is independent of the dimension. Furthermore, we realize our upper bounds using a practical algorithm and demonstrate its advantage in high-dimensional regimes compared to prior approaches.", "keywords": "Differential Privacy;Private Subspace Estimation;Private PCA;Fingerprinting Codes", "primary_area": "privacy", "supplementary_material": "/attachment/719d68501b21b1f43e47e31775571bdd64b305ad.zip", "author": "Eliad Tsfadia", "authorids": "~Eliad_Tsfadia1", "gender": "M", "homepage": "https://sites.google.com/view/eliadtsfadia", "dblp": "146/9658", "google_scholar": "https://scholar.google.com/citations?hl=en", "orcid": "", "linkedin": "", "or_profile": "~Eliad_Tsfadia1", "aff": "Georgetown University", "aff_domain": "georgetown.edu", "position": "Postdoc", "bibtex": "@inproceedings{\ntsfadia2024on,\ntitle={On Differentially Private Subspace Estimation in a Distribution-Free Setting},\nauthor={Eliad Tsfadia},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=aCcHVnwNlf}\n}", "github": "", "reviewers": "RoTv;ivMD;UdSk;T3Ub", "pdf_size": 776632, "rating": "6;7;7;7", "confidence": "3;3;3;4", "soundness": "3;3;4;3", "novelty": "3;3;3;3", "presentation": "3;2;2;3", "wc_summary": "82;108;97;47", "wc_strengths": "34;117;35;119", "wc_weaknesses": "11;47;388;83", "wc_questions": "21;29;27;17", "wc_limitations": "11;6;10;7", "wc_review": "159;307;557;273", "wc_reply_reviewers": "0;16;22;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 83.5, 23.005434140654682 ], "wc_strengths_avg": [ 76.25, 41.75748435909424 ], "wc_weaknesses_avg": [ 132.25, 149.8355348373676 ], "wc_questions_avg": [ 23.5, 4.769696007084728 ], "wc_limitations_avg": [ 8.5, 2.0615528128088303 ], "wc_review_avg": [ 324.0, 145.2618325645109 ], "wc_reply_reviewers_avg": [ 9.5, 9.733961166965893 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13322187493917808982&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "georgetown.edu", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "Georgetown University", "aff_unique_dep": "", "aff_unique_url": "https://www.georgetown.edu", "aff_unique_abbr": "GU", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Efficient Contextual LLM Cascades through Budget-Constrained Policy Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94574", "id": "aDQlAz09dS", "proceeding": "", "pdf": "https://openreview.net/pdf?id=aDQlAz09dS", "openreview": "https://openreview.net/forum?id=aDQlAz09dS", "poster": "", "project": "", "author_site": "Xuechen Zhang, Zijian Huang, Ege Onur Taga, Carlee Joe-Wong, Samet Oymak, Jiasi Chen", "tldr": "", "abstract": "Recent successes in natural language processing have led to the proliferation of large language models (LLMs) by multiple providers. Each LLM offering has different inference accuracy, monetary cost, and latency, and their accuracy further depends on the exact wording of the question (i.e., the specific prompt). At the same time, users often have a limit on monetary budget and latency to answer all their questions, and they do not know which LLMs to choose for each question to meet their accuracy and long term budget requirements. To navigate this rich design space, we propose TREACLE (Thrifty Reasoning via Context-Aware LLM and Prompt Selection), a reinforcement learning policy that jointly selects the model and prompting scheme while respecting the user's monetary cost and latency constraints. TREACLE uses the problem context, including question text embeddings (reflecting the type or difficulty of a query) and the response history (reflecting the consistency of previous responses) to make smart decisions. Our evaluations on standard reasoning datasets (GSM8K, CSQA, and LLC) with various LLMs and prompts show that TREACLE enables cost savings of up to 85% compared to baselines, while maintaining high accuracy. Importantly, it provides the user with the ability to gracefully trade off accuracy for cost.", "keywords": "LLMs; Budget constraint; Reinforcement learning", "primary_area": "infrastructure", "supplementary_material": "/attachment/6e095dbe7ca2a33f1877c0c69b3ddface0d25759.zip", "author": "Xuechen Zhang;Zijian Huang;Ege Onur Taga;Carlee Joe-Wong;Samet Oymak;Jiasi Chen", "authorids": "~Xuechen_Zhang2;~Zijian_Huang2;~Ege_Onur_Taga1;~Carlee_Joe-Wong1;~Samet_Oymak2;~Jiasi_Chen1", "gender": "F;M;F;F;M;M", "homepage": "https://www.linkedin.com/in/xuechen-zhang-9a5385213;https://github.com/egetaga;https://www.andrew.cmu.edu/user/cjoewong/;https://jiasi.engin.umich.edu/;https://sota.engin.umich.edu/;https://zijianh4.github.io", "dblp": "51/7435-2;376/0859;40/9937.html;35/9005;89/8771;205/5823-2", "google_scholar": "Xj4fIC4AAAAJ;Rqp70OwAAAAJ;XEztdZgAAAAJ;;AY6InkoAAAAJ;9dlrr8MAAAAJ", "orcid": ";;;;;", "linkedin": ";;;;;zijian-huang-0b8b8b178/", "or_profile": "~Xuechen_Zhang2;~Ege_Onur_Taga1;~Carlee_Joe-Wong1;~Jiasi_Chen1;~Samet_Oymak1;~Zijian_HUANG1", "aff": "University of California, Riverside;University of Michigan - Ann Arbor;Carnegie Mellon University;University of Michigan - Ann Arbor;University of Michigan - Ann Arbor;University of Michigan - Ann Arbor", "aff_domain": "ucr.edu;umich.edu;cmu.edu;umich.edu;umich.edu;umich.edu", "position": "PhD student;PhD student;Assistant Professor;Associate Professor;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nzhang2024efficient,\ntitle={Efficient Contextual {LLM} Cascades through Budget-Constrained Policy Learning},\nauthor={Xuechen Zhang and Zijian Huang and Ege Onur Taga and Carlee Joe-Wong and Samet Oymak and Jiasi Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=aDQlAz09dS}\n}", "github": "", "reviewers": "LPQa;gLy5;EMUg;kukn", "pdf_size": 1314357, "rating": "4;5;5;7", "confidence": "5;2;2;4", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "3;3;2;3", "wc_summary": "38;30;90;49", "wc_strengths": "10;95;60;34", "wc_weaknesses": "247;125;73;3", "wc_questions": "48;187;44;1", "wc_limitations": "22;20;1;1", "wc_review": "365;457;268;88", "wc_reply_reviewers": "46;56;135;0", "wc_reply_authors": "400;409;286;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 5.25, 1.0897247358851685 ], "confidence_avg": [ 3.25, 1.299038105676658 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 51.75, 23.09085316743407 ], "wc_strengths_avg": [ 49.75, 31.546592525976557 ], "wc_weaknesses_avg": [ 112.0, 89.15716460273958 ], "wc_questions_avg": [ 70.0, 70.01785486574121 ], "wc_limitations_avg": [ 11.0, 10.024968827881711 ], "wc_review_avg": [ 294.5, 136.67571108284017 ], "wc_reply_reviewers_avg": [ 59.25, 48.56632063477735 ], "wc_reply_authors_avg": [ 273.75, 165.3184427098199 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.044151078568834795, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:r0UxrJV5VzgJ:scholar.google.com/&scioq=Efficient+Contextual+LLM+Cascades+through+Budget-Constrained+Policy+Learning&hl=en&as_sdt=0,33", "gs_version_total": 2, "email": "ucr.edu;umich.edu;cmu.edu;umich.edu;umich.edu;umich.edu", "author_num": 6, "aff_unique_index": "0;1;2;1;1;1", "aff_unique_norm": "University of California, Riverside;University of Michigan;Carnegie Mellon University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ucr.edu;https://www.umich.edu;https://www.cmu.edu", "aff_unique_abbr": "UCR;UM;CMU", "aff_campus_unique_index": "0;1;1;1;1", "aff_campus_unique": "Riverside;Ann Arbor;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Plant-and-Steal: Truthful Fair Allocations via Predictions", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94573", "id": "aFB97F8QSF", "proceeding": "", "pdf": "https://openreview.net/pdf?id=aFB97F8QSF", "openreview": "https://openreview.net/forum?id=aFB97F8QSF", "poster": "/media/PosterPDFs/NeurIPS%202024/94573.png?t=1733622717.1239486", "project": "", "author_site": "Ilan Cohen, Alon Eden, Talya Eden, Arsen Vasilyan", "tldr": "", "abstract": "We study truthful mechanisms for approximating the Maximin-Share (MMS) allocation of agents with additive valuations for indivisible goods. Algorithmically, constant factor approximations exist for the problem for any number of agents. When adding incentives to the mix, a jarring result by Amanatidis, Birmpas, Christodoulou, and Markakis [EC 2017] shows that the best possible approximation for two agents and $m$ items is $\\lfloor \\frac{m}{2} \\rfloor$. We adopt a learning-augmented framework to investigate what is possible when some prediction on the input is given. For two agents, we give a truthful mechanism that takes agents' ordering over items as prediction. When the prediction is accurate, we give a $2$-approximation to the MMS (consistency), and when the prediction is off, we still get an $\\lceil \\frac{m}{2} \\rceil$-approximation to the MMS (robustness). We further show that the mechanism's performance degrades gracefully in the number of ``mistakes\" in the prediction; i.e., we interpolate (up to constant factors) between the two extremes: when there are no mistakes, and when there is a maximum number of mistakes. We also show an impossibility result on the obtainable consistency for mechanisms with finite robustness. For the general case of $n\\ge 2$ agents, we give a 2-approximation mechanism for accurate predictions, with relaxed fallback guarantees. Finally, we give experimental results which illustrate when different components of our framework, made to insure consistency and robustness, come into play.", "keywords": "mechanism design;learning-augmented;fairness;algorithms with predictions.", "primary_area": "algorithmic_game_theory", "supplementary_material": "/attachment/8f2c420a84cf6431a0559ced2aad76ac05b5feea.zip", "author": "Ilan Reuven Cohen;Alon Eden;Talya Eden;Arsen Vasilyan", "authorids": "~Ilan_Reuven_Cohen1;~Alon_Eden1;~Talya_Eden1;~Arsen_Vasilyan1", "gender": "M;M;;", "homepage": "https://ilanrcohen.droppages.com/;https://aloneden.github.io/;;", "dblp": "130/0602;157/6064.html;161/3999;", "google_scholar": "YqhbyccAAAAJ;6I6LNdQAAAAJ;;", "orcid": "0000-0001-7870-6319;;;", "linkedin": ";;;", "or_profile": "~Ilan_Reuven_Cohen1;~Alon_Eden1;~Talya_Eden1;~Arsen_Vasilyan1", "aff": "Bar-Ilan University;Hebrew University of Jerusalem;Bar-Ilan University;", "aff_domain": "biu.ac.il;huji.ac.il;biu.ac.il;", "position": "Assistant Professor;Assistant Professor;Assistant Professor;", "bibtex": "@inproceedings{\ncohen2024plantandsteal,\ntitle={Plant-and-Steal: Truthful Fair Allocations via Predictions},\nauthor={Ilan Reuven Cohen and Alon Eden and Talya Eden and Arsen Vasilyan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=aFB97F8QSF}\n}", "github": "", "reviewers": "iNp4;EdC1;CcvG;dEcz", "pdf_size": 1009580, "rating": "4;5;6;8", "confidence": "3;4;3;4", "soundness": "3;3;3;4", "novelty": "2;2;3;4", "presentation": "3;3;3;3", "wc_summary": "337;291;177;356", "wc_strengths": "51;87;76;80", "wc_weaknesses": "212;493;103;42", "wc_questions": "170;56;12;1", "wc_limitations": "43;50;11;10", "wc_review": "813;977;379;489", "wc_reply_reviewers": "339;294;18;99", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 1.479019945774904 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 290.25, 69.52472581751042 ], "wc_strengths_avg": [ 73.5, 13.573871960498227 ], "wc_weaknesses_avg": [ 212.5, 173.01806264086994 ], "wc_questions_avg": [ 59.75, 66.89684222741758 ], "wc_limitations_avg": [ 28.5, 18.172781845386247 ], "wc_review_avg": [ 664.5, 240.841753024678 ], "wc_reply_reviewers_avg": [ 187.5, 133.09489096129874 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.50709255283711, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:VNvBSflys78J:scholar.google.com/&scioq=Plant-and-Steal:+Truthful+Fair+Allocations+via+Predictions&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": "biu.ac.il;huji.ac.il;biu.ac.il;", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Bar-Ilan University;Hebrew University of Jerusalem", "aff_unique_dep": ";", "aff_unique_url": "https://www.biu.ac.il;https://www.huji.ac.il", "aff_unique_abbr": "BIU;HUJI", "aff_campus_unique_index": "1", "aff_campus_unique": ";Jerusalem", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Israel" }, { "title": "An Accelerated Gradient Method for Convex Smooth Simple Bilevel Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94572", "id": "aFOdln7jBV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=aFOdln7jBV", "openreview": "https://openreview.net/forum?id=aFOdln7jBV", "poster": "", "project": "", "author_site": "Jincheng Cao, Ruichen Jiang, Erfan Yazdandoost Hamedani, Aryan Mokhtari", "tldr": "", "abstract": "In this paper, we focus on simple bilevel optimization problems, where we minimize a convex smooth objective function over the optimal solution set of another convex smooth constrained optimization problem. We present a novel bilevel optimization method that locally approximates the solution set of the lower-level problem using a cutting plane approach and employs an accelerated gradient-based update to reduce the upper-level objective function over the approximated solution set. We measure the performance of our method in terms of suboptimality and infeasibility errors and provide non-asymptotic convergence guarantees for both error criteria. Specifically, when the feasible set is compact, we show that our method requires at most $\\mathcal{O}(\\max\\\\{1/\\sqrt{\\epsilon_{f}}, 1/\\epsilon_g\\\\})$ iterations to find a solution that is $\\epsilon_f$-suboptimal and $\\epsilon_g$-infeasible. Moreover, under the additional assumption that the lower-level objective satisfies the $r$-th H\u00f6lderian error bound, we show that our method achieves an iteration complexity of $\\mathcal{O}(\\max\\\\{\\epsilon_{f}^{-\\frac{2r-1}{2r}},\\epsilon_{g}^{-\\frac{2r-1}{2r}}\\\\})$, which matches the optimal complexity of single-level convex constrained optimization when $r=1$.", "keywords": "accelerated gradient methods;bilevel optimization", "primary_area": "optimization", "supplementary_material": "/attachment/8afd61f36cfb2c0b6a00736f934b9242423975fe.zip", "author": "Jincheng Cao;Ruichen Jiang;Erfan Yazdandoost Hamedani;Aryan Mokhtari", "authorids": "~Jincheng_Cao1;~Ruichen_Jiang1;~Erfan_Yazdandoost_Hamedani1;~Aryan_Mokhtari3", "gender": "M;;M;M", "homepage": "https://www.linkedin.com/in/jc-cao/;https://ruichen-jiang.github.io/;https://profiles.arizona.edu/person/erfany;https://sites.utexas.edu/mokhtari/", "dblp": ";271/7916;191/6717;140/7407", "google_scholar": ";BGFt1UMAAAAJ;imtUGbQAAAAJ;glcep6EAAAAJ", "orcid": ";;0000-0002-3229-3499;", "linkedin": ";;;", "or_profile": "~Jincheng_Cao1;~Ruichen_Jiang1;~Erfan_Yazdandoost_Hamedani1;~Aryan_Mokhtari3", "aff": "University of Texas at Austin;University of Texas at Austin;University of Arizona;University of Texas, Austin", "aff_domain": "utexas.edu;utexas.edu;arizona.edu;utexas.edu", "position": "PhD student;PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\ncao2024an,\ntitle={An Accelerated Gradient Method for Convex Smooth Simple Bilevel Optimization},\nauthor={Jincheng Cao and Ruichen Jiang and Erfan Yazdandoost Hamedani and Aryan Mokhtari},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=aFOdln7jBV}\n}", "github": "", "reviewers": "fv8w;1z4b;R6Rr;B2N7", "pdf_size": 975710, "rating": "6;6;6;8", "confidence": "3;3;4;4", "soundness": "3;3;3;3", "novelty": "2;3;2;3", "presentation": "4;4;3;3", "wc_summary": "24;35;25;32", "wc_strengths": "17;19;38;66", "wc_weaknesses": "69;1;44;71", "wc_questions": "63;40;79;1", "wc_limitations": "21;6;2;1", "wc_review": "194;101;188;171", "wc_reply_reviewers": "59;29;38;0", "wc_reply_authors": "213;13;261;0", "reply_reviewers": "2;1;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 29.0, 4.636809247747852 ], "wc_strengths_avg": [ 35.0, 19.685019685029527 ], "wc_weaknesses_avg": [ 46.25, 28.207933281259724 ], "wc_questions_avg": [ 45.75, 29.32042803234632 ], "wc_limitations_avg": [ 7.5, 8.0156097709407 ], "wc_review_avg": [ 163.5, 37.05738792737556 ], "wc_reply_reviewers_avg": [ 31.5, 21.195518394226642 ], "wc_reply_authors_avg": [ 121.75, 116.58339289967503 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16315384793223840326&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "utexas.edu;utexas.edu;arizona.edu;utexas.edu", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Texas at Austin;University of Arizona", "aff_unique_dep": ";", "aff_unique_url": "https://www.utexas.edu;https://www.arizona.edu", "aff_unique_abbr": "UT Austin;UA", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Austin;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "A Generative Model of Symmetry Transformations", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94571", "id": "aFP24eYpWh", "proceeding": "", "pdf": "https://openreview.net/pdf?id=aFP24eYpWh", "openreview": "https://openreview.net/forum?id=aFP24eYpWh", "poster": "/media/PosterPDFs/NeurIPS%202024/94571.png?t=1731949089.2417529", "project": "", "author_site": "James Allingham, Bruno Mlodozeniec, Shreyas Padhy, Javier Antor\u00e1n, David Krueger, Richard Turner, Eric Nalisnick, Jos\u00e9 Miguel Hern\u00e1ndez-Lobato", "tldr": "", "abstract": "Correctly capturing the symmetry transformations of data can lead to efficient models with strong generalization capabilities, though methods incorporating symmetries often require prior knowledge.\nWhile recent advancements have been made in learning those symmetries directly from the dataset, most of this work has focused on the discriminative setting.\nIn this paper, we take inspiration from group theoretic ideas to construct a generative model that explicitly aims to capture the data's approximate symmetries. \nThis results in a model that, given a prespecified broad set of possible symmetries, learns to what extent, if at all, those symmetries are actually present.\nOur model can be seen as a generative process for data augmentation.\nWe provide a simple algorithm for learning our generative model and empirically demonstrate its ability to capture symmetries under affine and color transformations, in an interpretable way.\nCombining our symmetry model with standard generative models results in higher marginal test-log-likelihoods and improved data efficiency.", "keywords": "approximate symmetries;invariances;deep generative models", "primary_area": "generative_models", "supplementary_material": "", "author": "James Urquhart Allingham;Bruno Kacper Mlodozeniec;Shreyas Padhy;Javier Antoran;David Krueger;Richard E. Turner;Eric Nalisnick;Jos\u00e9 Miguel Hern\u00e1ndez-Lobato", "authorids": "~James_Urquhart_Allingham1;~Bruno_Kacper_Mlodozeniec2;~Shreyas_Padhy1;~Javier_Antoran1;~David_Krueger1;~Richard_E_Turner1;~Eric_Nalisnick1;~Jos\u00e9_Miguel_Hern\u00e1ndez-Lobato1", "gender": "M;Not Specified;M;Unspecified;M;M;M;", "homepage": "https://jamesallingham.com;https://brunokm.github.io;http://shreyaspadhy.github.io;https://javierantoran.github.io/about/;https://mila.umontreal.ca/en/person/david-scott-krueger/;https://rich-turner-group.github.io/;https://enalisnick.github.io;http://jmhl.org", "dblp": ";241/6874;267/9851;234/8818.html;142/2741.html;40/5352;136/4057;40/6058", "google_scholar": "CIp9adkAAAAJ;kGPBRy8AAAAJ;JxbV2R0AAAAJ;_b-Cs2cAAAAJ;https://scholar.google.ca/citations?user=5Uz70IoAAAAJ;https://scholar.google.co.uk/citations?user=DgLEyZgAAAAJ;cb1ZN7AAAAAJ;BEBccCQAAAAJ", "orcid": ";;;0000-0003-2877-2689;;;;0000-0001-7610-949X", "linkedin": ";bkmlodozeniec/;;javier-antoran/;;;;", "or_profile": "~James_Urquhart_Allingham1;~Bruno_Kacper_Mlodozeniec2;~Shreyas_Padhy1;~Javier_Antoran1;~David_Krueger1;~Richard_E_Turner1;~Eric_Nalisnick1;~Jose_Miguel_Hernandez_Lobato1", "aff": "University of Cambridge;University of Cambridge;University of Cambridge;University of Cambridge;University of Cambridge;Microsoft Research;University of Amsterdam;University of Cambridge", "aff_domain": "cam.ac.uk;cam.ac.uk;cam.ac.uk;cam.ac.uk;cam.ac.uk;research.microsoft.com;uva.nl;cam.ac.uk", "position": "PhD student;PhD student;PhD student;PhD student;Assistant Professor;Researcher;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nallingham2024a,\ntitle={A Generative Model of Symmetry Transformations},\nauthor={James Urquhart Allingham and Bruno Kacper Mlodozeniec and Shreyas Padhy and Javier Antoran and David Krueger and Richard E. Turner and Eric Nalisnick and Jos{\\'e} Miguel Hern{\\'a}ndez-Lobato},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=aFP24eYpWh}\n}", "github": "", "reviewers": "d33o;GGon;TzHS;rf86", "pdf_size": 9391965, "rating": "4;6;6;8", "confidence": "5;5;4;4", "soundness": "3;3;3;3", "novelty": "2;3;2;4", "presentation": "3;3;3;4", "wc_summary": "46;94;83;37", "wc_strengths": "6;97;42;138", "wc_weaknesses": "346;224;302;130", "wc_questions": "5;77;8;143", "wc_limitations": "1;6;15;20", "wc_review": "404;498;450;468", "wc_reply_reviewers": "258;104;249;47", "wc_reply_authors": "510;177;154;96", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;3;3", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 65.0, 24.031229681395832 ], "wc_strengths_avg": [ 70.75, 50.57358500244965 ], "wc_weaknesses_avg": [ 250.5, 82.15077601581132 ], "wc_questions_avg": [ 58.25, 56.777526363870415 ], "wc_limitations_avg": [ 10.5, 7.433034373659253 ], "wc_review_avg": [ 455.0, 34.07345007480164 ], "wc_reply_reviewers_avg": [ 164.5, 91.30854286429063 ], "wc_reply_authors_avg": [ 234.25, 161.91722422274907 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.7071067811865476, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15358398841228416474&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "cam.ac.uk;cam.ac.uk;cam.ac.uk;cam.ac.uk;cam.ac.uk;research.microsoft.com;uva.nl;cam.ac.uk", "author_num": 8, "aff_unique_index": "0;0;0;0;0;1;2;0", "aff_unique_norm": "University of Cambridge;Microsoft;University of Amsterdam", "aff_unique_dep": ";Microsoft Research;", "aff_unique_url": "https://www.cam.ac.uk;https://www.microsoft.com/en-us/research;https://www.uva.nl", "aff_unique_abbr": "Cambridge;MSR;UvA", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;0;0;0;0;1;2;0", "aff_country_unique": "United Kingdom;United States;Netherlands" }, { "title": "The Map Equation Goes Neural: Mapping Network Flows with Graph Neural Networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94570", "id": "aFWx1N84Fe", "proceeding": "", "pdf": "https://openreview.net/pdf?id=aFWx1N84Fe", "openreview": "https://openreview.net/forum?id=aFWx1N84Fe", "poster": "/media/PosterPDFs/NeurIPS%202024/94570.png?t=1733782301.9409094", "project": "", "author_site": "Christopher Bl\u00f6cker, Chester Tan, Ingo Scholtes", "tldr": "", "abstract": "Community detection is an essential tool for unsupervised data exploration and revealing the organisational structure of networked systems. With a long history in network science, community detection typically relies on objective functions, optimised with custom-tailored search algorithms, but often without leveraging recent advances in deep learning. Recently, first works have started incorporating such objectives into loss functions for deep graph clustering and pooling. We consider the map equation, a popular information-theoretic objective function for unsupervised community detection, and express it in differentiable tensor form for optimisation through gradient descent. Our formulation turns the map equation compatible with any neural network architecture, enables end-to-end learning, incorporates node features, and chooses the optimal number of clusters automatically, all without requiring explicit regularisation. Applied to unsupervised graph clustering tasks, we achieve competitive performance against state-of-the-art deep graph clustering baselines in synthetic and real-world datasets.", "keywords": "community detection;graph clustering;random walk;map equation", "primary_area": "other", "supplementary_material": "", "author": "Christopher Bl\u00f6cker;Chester Tan;Ingo Scholtes", "authorids": "~Christopher_Bl\u00f6cker1;~Chester_Tan1;~Ingo_Scholtes1", "gender": "M;;M", "homepage": "https://chrisbloecker.se;https://chester-tan.com;https://www.ingoscholtes.net", "dblp": "216/7409;;s/IngoScholtes", "google_scholar": "https://scholar.google.se/citations?user=4mw83wwAAAAJ;;pouriVsAAAAJ", "orcid": "0000-0001-7881-2496;;0000-0003-2253-0216", "linkedin": ";;", "or_profile": "~Christopher_Bl\u00f6cker1;~Chester_Tan1;~Ingo_Scholtes1", "aff": "University of Zurich;Bayerische Julius-Maximilians-Universit\u00e4t W\u00fcrzburg;University of Zurich", "aff_domain": "uzh.ch;uni-wuerzburg.de;uzh.ch", "position": "Postdoc;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nbl{\\\"o}cker2024the,\ntitle={The Map Equation Goes Neural: Mapping Network Flows with Graph Neural Networks},\nauthor={Christopher Bl{\\\"o}cker and Chester Tan and Ingo Scholtes},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=aFWx1N84Fe}\n}", "github": "", "reviewers": "5LnL;AyCW;sjQQ;4xY5", "pdf_size": 2984655, "rating": "3;7;7;7", "confidence": "4;4;4;5", "soundness": "3;3;3;4", "novelty": "1;3;3;4", "presentation": "3;3;3;4", "wc_summary": "84;93;111;41", "wc_strengths": "30;48;85;239", "wc_weaknesses": "80;45;205;217", "wc_questions": "57;40;1;38", "wc_limitations": "27;5;46;2", "wc_review": "278;231;448;537", "wc_reply_reviewers": "199;15;14;46", "wc_reply_authors": "728;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.0, 1.7320508075688772 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 1.0897247358851685 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 82.25, 25.723287114985908 ], "wc_strengths_avg": [ 100.5, 82.38476800962664 ], "wc_weaknesses_avg": [ 136.75, 75.39355078519648 ], "wc_questions_avg": [ 34.0, 20.43281674170255 ], "wc_limitations_avg": [ 20.0, 17.84656829757475 ], "wc_review_avg": [ 373.5, 124.20648131236952 ], "wc_reply_reviewers_avg": [ 68.5, 76.43461257833391 ], "wc_reply_authors_avg": [ 182.0, 315.23324697753566 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13165387439716700779&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "uzh.ch;uni-wuerzburg.de;uzh.ch", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Zurich;University of W\u00fcrzburg", "aff_unique_dep": ";", "aff_unique_url": "https://www.unizh.ch;https://www.uni-wuerzburg.de", "aff_unique_abbr": "UZH;JMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";W\u00fcrzburg", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Switzerland;Germany" }, { "title": "Segment Anything without Supervision", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94569", "id": "aGqldlOxxY", "proceeding": "", "pdf": "https://openreview.net/pdf?id=aGqldlOxxY", "openreview": "https://openreview.net/forum?id=aGqldlOxxY", "poster": "", "project": "", "author_site": "XuDong Wang, Jingfeng Yang, Trevor Darrell", "tldr": "", "abstract": "The Segmentation Anything Model (SAM) requires labor-intensive data labeling. We present Unsupervised SAM (UnSAM) for promptable and automatic whole-image segmentation that does not require human annotations. UnSAM utilizes a divide-and-conquer strategy to \u201cdiscover\u201d the hierarchical structure of visual scenes. We first leverage top-down clustering methods to partition an unlabeled image into instance/semantic level segments. For all pixels within a segment, a bottom-up clustering method is employed to iteratively merge them into larger groups, thereby forming a hierarchical structure. These unsupervised multi-granular masks are then utilized to supervise model training. Evaluated across seven popular datasets, UnSAM achieves competitive results with the supervised counterpart SAM, and surpasses the previous state-of-the-art in unsupervised segmentation by 11% in terms of AR. Moreover, we show that supervised SAM can also benefit from our self-supervised labels. By integrating our unsupervised pseudo masks into SA-1B\u2019s ground-truth masks and training UnSAM with only 1% of SA-1B, a lightly semi-supervised UnSAM can often segment entities overlooked by supervised SAM, exceeding SAM\u2019s AR by over 6.7% and AP by 3.9% on SA-1B.", "keywords": "Unsupervised Segmentation;Segment Anything", "primary_area": "machine_vision", "supplementary_material": "/attachment/140f09c22f3b39e82d2d8307dd89db85486b5a38.zip", "author": "Xudong Wang;Jingfeng Yang;Trevor Darrell", "authorids": "~Xudong_Wang4;~Jingfeng_Yang4;~Trevor_Darrell2", "gender": "M;M;M", "homepage": "http://people.eecs.berkeley.edu/~xdwang/;;https://people.eecs.berkeley.edu/~trevor/", "dblp": ";;d/TrevorDarrell", "google_scholar": "Azf07WcAAAAJ;;https://scholar.google.com.tw/citations?user=bh-uRFMAAAAJ", "orcid": ";;", "linkedin": ";jingfeng-yang-15a214261/;", "or_profile": "~Xudong_Wang4;~Jingfeng_Yang4;~trevor_darrell1", "aff": "Google DeepMind;University of California, Berkeley;Electrical Engineering & Computer Science Department", "aff_domain": "google.com;berkeley.edu;eecs.berkeley.edu", "position": "Research Intern;Undergrad student;Professor", "bibtex": "@inproceedings{\nwang2024segment,\ntitle={Segment Anything without Supervision},\nauthor={Xudong Wang and Jingfeng Yang and Trevor Darrell},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=aGqldlOxxY}\n}", "github": "", "reviewers": "wivK;ecwU;p86w;S6W7", "pdf_size": 23352459, "rating": "4;6;6;6", "confidence": "5;5;4;4", "soundness": "3;2;2;3", "novelty": "2;2;3;2", "presentation": "2;4;4;3", "wc_summary": "61;72;116;68", "wc_strengths": "8;28;46;48", "wc_weaknesses": "258;192;513;105", "wc_questions": "98;22;82;22", "wc_limitations": "16;21;4;8", "wc_review": "441;335;761;251", "wc_reply_reviewers": "19;202;164;24", "wc_reply_authors": "262;248;767;255", "reply_reviewers": "1;2;2;1", "reply_authors": "3;3;3;3", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 79.25, 21.579793789561567 ], "wc_strengths_avg": [ 32.5, 16.147755261955144 ], "wc_weaknesses_avg": [ 267.0, 152.0411128609627 ], "wc_questions_avg": [ 56.0, 34.46737587922817 ], "wc_limitations_avg": [ 12.25, 6.6473679001541655 ], "wc_review_avg": [ 447.0, 193.385625112106 ], "wc_reply_reviewers_avg": [ 102.25, 81.87910295063081 ], "wc_reply_authors_avg": [ 383.0, 221.75775071009355 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 3.0, 0.0 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10476266086255927307&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "google.com;berkeley.edu;eecs.berkeley.edu", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Google;University of California, Berkeley;Electrical Engineering & Computer Science Department", "aff_unique_dep": "Google DeepMind;;Electrical Engineering & Computer Science", "aff_unique_url": "https://deepmind.com;https://www.berkeley.edu;", "aff_unique_abbr": "DeepMind;UC Berkeley;", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;1", "aff_country_unique": "United Kingdom;United States;" }, { "title": "Enhancing Preference-based Linear Bandits via Human Response Time", "status": "Oral", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94568", "id": "aIPwlkdOut", "proceeding": "", "pdf": "https://openreview.net/pdf?id=aIPwlkdOut", "openreview": "https://openreview.net/forum?id=aIPwlkdOut", "poster": "/media/PosterPDFs/NeurIPS%202024/94568.png?t=1733672205.633459", "project": "", "author_site": "Shen Li, Yuyang Zhang, Zhaolin Ren, Claire Liang, Na Li, Julie A Shah", "tldr": "", "abstract": "Interactive preference learning systems infer human preferences by presenting queries as pairs of options and collecting binary choices. Although binary choices are simple and widely used, they provide limited information about preference strength. To address this, we leverage human response times, which are inversely related to preference strength, as an additional signal. We propose a computationally efficient method that combines choices and response times to estimate human utility functions, grounded in the EZ diffusion model from psychology. Theoretical and empirical analyses show that for queries with strong preferences, response times complement choices by providing extra information about preference strength, leading to significantly improved utility estimation. We incorporate this estimator into preference-based linear bandits for fixed-budget best-arm identification. Simulations on three real-world datasets demonstrate that using response times significantly accelerates preference learning compared to choice-only approaches. Additional materials, such as code, slides, and talk video, are available at https://shenlirobot.github.io/pages/NeurIPS24.html.", "keywords": "human response time;preference learning;linear bandits;dueling bandits;psychology;economics", "primary_area": "bandits", "supplementary_material": "", "author": "Shen Li;Yuyang Zhang;Zhaolin Ren;Claire Liang;Na Li;Julie Shah", "authorids": "~Shen_Li1;~Yuyang_Zhang4;~Zhaolin_Ren1;~Claire_Liang1;~Na_Li3;~Julie_Shah2", "gender": "M;M;M;;F;F", "homepage": "https://shenlirobot.github.io/;;;https://cyl48.github.io/;https://nali.seas.harvard.edu/;https://interactive.mit.edu", "dblp": "22/1835;;;;;", "google_scholar": "FpkwJdEAAAAJ;https://scholar.google.ca/citations?user=NiBKGakAAAAJ;;;qdGelXoAAAAJ;", "orcid": "0000-0002-8746-5438;;;;;", "linkedin": "shen-li-robotics/;;zhaolin-ren-1b1b94108;;;", "or_profile": "~Shen_Li1;~Yuyang_Zhang4;~Zhaolin_Ren1;~Claire_Liang1;~Na_Li3;~Julie_Shah2", "aff": "Massachusetts Institute of Technology;Harvard University, Harvard University;Harvard University;Massachusetts Institute of Technology;Harvard University;Massachusetts Institute of Technology", "aff_domain": "mit.edu;g.harvard.edu;harvard.edu;mit.edu;harvard.edu;mit.edu", "position": "PhD student;PhD student;PhD student;Postdoc;Full Professor;Professor", "bibtex": "@inproceedings{\nli2024enhancing,\ntitle={Enhancing Preference-based Linear Bandits via Human Response Time},\nauthor={Shen Li and Yuyang Zhang and Zhaolin Ren and Claire Liang and Na Li and Julie Shah},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=aIPwlkdOut}\n}", "github": "", "reviewers": "64dM;a7WX;f1Vk;qjfL", "pdf_size": 2223885, "rating": "7;7;7;8", "confidence": "3;3;3;5", "soundness": "3;3;3;4", "novelty": "3;3;3;4", "presentation": "3;3;4;3", "wc_summary": "115;63;121;69", "wc_strengths": "125;100;134;72", "wc_weaknesses": "81;86;142;206", "wc_questions": "7;31;83;143", "wc_limitations": "7;1;4;29", "wc_review": "335;281;484;519", "wc_reply_reviewers": "16;78;20;151", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 92.0, 26.1725046566048 ], "wc_strengths_avg": [ 107.75, 24.107830678018296 ], "wc_weaknesses_avg": [ 128.75, 50.622993787408504 ], "wc_questions_avg": [ 66.0, 52.25897052181568 ], "wc_limitations_avg": [ 10.25, 11.031205736455105 ], "wc_review_avg": [ 404.75, 99.38907133080578 ], "wc_reply_reviewers_avg": [ 66.25, 54.73744148204225 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:7oVbXNIYfEMJ:scholar.google.com/&scioq=Enhancing+Preference-based+Linear+Bandits+via+Human+Response+Time&hl=en&as_sdt=0,44", "gs_version_total": 10, "email": "mit.edu;g.harvard.edu;harvard.edu;mit.edu;harvard.edu;mit.edu", "author_num": 6, "aff_unique_index": "0;1;1;0;1;0", "aff_unique_norm": "Massachusetts Institute of Technology;Harvard University", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.harvard.edu", "aff_unique_abbr": "MIT;Harvard", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Samba: Severity-aware Recurrent Modeling for Cross-domain Medical Image Grading", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94567", "id": "aIeXn5103e", "proceeding": "", "pdf": "https://openreview.net/pdf?id=aIeXn5103e", "openreview": "https://openreview.net/forum?id=aIeXn5103e", "poster": "/media/PosterPDFs/NeurIPS%202024/94567.png?t=1731704527.9558556", "project": "", "author_site": "Qi Bi, Jingjun Yi, Hao Zheng, Wei Ji, Haolan Zhan, Yawen Huang, Yuexiang Li, Yefeng Zheng", "tldr": "", "abstract": "Disease grading is a crucial task in medical image analysis. Due to the continuous progression of diseases, i.e., the variability within the same level and the similarity between adjacent stages, accurate grading is highly challenging.\nFurthermore, in real-world scenarios, models trained on limited source domain datasets should also be capable of handling data from unseen target domains.\nDue to the cross-domain variants, the feature distribution between source and unseen target domains can be dramatically different, leading to a substantial decrease in model performance.\nTo address these challenges in cross-domain disease grading, we propose a Severity-aware Recurrent Modeling (Samba) method in this paper.\nAs the core objective of most staging tasks is to identify the most severe lesions, which may only occupy a small portion of the image, we propose to encode image patches in a sequential and recurrent manner.\nSpecifically, a state space model is tailored to store and transport the severity information by hidden states.\nMoreover, to mitigate the impact of cross-domain variants, an Expectation-Maximization (EM) based state recalibration mechanism is designed to map the patch embeddings into a more compact space.\nWe model the feature distributions of different lesions through the Gaussian Mixture Model (GMM) and reconstruct the intermediate features based on learnable severity bases.\nExtensive experiments show the proposed Samba outperforms the VMamba baseline by an average accuracy of 23.5\\%, 5.6\\% and 4.1\\% on the cross-domain grading of fatigue fracture, breast cancer and diabetic retinopathy, respectively. \nSource code is available at \\url{https://github.com/BiQiWHU/Samba}.", "keywords": "Medical Image Grading;Domain Generalization;Selective State Space;Gaussian Mixture Model", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "", "author": "Qi Bi;Jingjun Yi;Hao Zheng;Wei Ji;Haolan Zhan;Yawen Huang;Yuexiang Li;Yefeng Zheng", "authorids": "~Qi_Bi1;~Jingjun_Yi1;~Hao_Zheng6;~Wei_Ji2;~Haolan_Zhan1;~Yawen_Huang4;~Yuexiang_Li1;~Yefeng_Zheng3", "gender": ";M;M;;;;M;", "homepage": ";https://github.com/JingjunYi;;;;;https://yuexiangli.github.io;", "dblp": ";296/4714;31/6916-8;;;;165/6204;", "google_scholar": ";doRtaBcAAAAJ;https://scholar.google.com.hk/citations?user=LsJVCSoAAAAJ;;;;WsKu4EMAAAAJ;", "orcid": ";0000-0002-4249-3021;0000-0001-7193-6242;;;;;", "linkedin": ";;;;;;;", "or_profile": "~Qi_Bi1;~Jingjun_Yi1;~Hao_Zheng6;~Wei_Ji2;~Haolan_Zhan1;~Yawen_Huang4;~Yuexiang_Li1;~Yefeng_Zheng3", "aff": ";LIESMARS;Tencent;;;;Medical AI ReSearch (MARS) Group @ GXMU;", "aff_domain": ";whu.edu.cn;tencent.com;;;;gxmu.edu.cn;", "position": ";MS student;Researcher;;;;Full Professor;", "bibtex": "@inproceedings{\nbi2024samba,\ntitle={Samba: Severity-aware Recurrent Modeling for Cross-domain Medical Image Grading},\nauthor={Qi Bi and Jingjun Yi and Hao Zheng and Wei Ji and Haolan Zhan and Yawen Huang and Yuexiang Li and Yefeng Zheng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=aIeXn5103e}\n}", "github": "", "reviewers": "b4sx;Wyiu;hFq3;LrSF", "pdf_size": 2358133, "rating": "5;5;6;6", "confidence": "4;4;3;4", "soundness": "3;2;3;2", "novelty": "2;3;3;2", "presentation": "3;2;3;3", "wc_summary": "66;257;93;81", "wc_strengths": "64;172;42;49", "wc_weaknesses": "46;731;83;276", "wc_questions": "115;310;69;48", "wc_limitations": "10;26;8;5", "wc_review": "301;1496;295;459", "wc_reply_reviewers": "41;185;18;563", "wc_reply_authors": "25;21;24;1121", "reply_reviewers": "1;1;1;2", "reply_authors": "2;2;2;4", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 124.25, 77.23786312424755 ], "wc_strengths_avg": [ 81.75, 52.7085144924423 ], "wc_weaknesses_avg": [ 284.0, 272.45091301003197 ], "wc_questions_avg": [ 135.5, 103.62070256469023 ], "wc_limitations_avg": [ 12.25, 8.13557004763649 ], "wc_review_avg": [ 637.75, 499.85566666788924 ], "wc_reply_reviewers_avg": [ 201.75, 218.166650751209 ], "wc_reply_authors_avg": [ 297.75, 475.30588834980784 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8465922417873765280&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": ";whu.edu.cn;tencent.com;;;;gxmu.edu.cn;", "author_num": 8, "aff_unique_index": "0;1;2", "aff_unique_norm": "Laboratory of Information Engineering Group, Tsinghua University;Tencent;Guangxi Medical University", "aff_unique_dep": "Information Engineering Group;Tencent Holdings Limited;Medical AI ReSearch (MARS) Group", "aff_unique_url": "http://www.liesmars.tsinghua.edu.cn/;https://www.tencent.com;http://www.gxmu.edu.cn", "aff_unique_abbr": "LIESMARS;Tencent;GXMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Rethinking Parity Check Enhanced Symmetry-Preserving Ansatz", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94566", "id": "aIuByRyHhV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=aIuByRyHhV", "openreview": "https://openreview.net/forum?id=aIuByRyHhV", "poster": "/media/PosterPDFs/NeurIPS%202024/94566.png?t=1733559817.1718044", "project": "", "author_site": "Ge Yan, Mengfei Ran, Ruocheng Wang, Kaisen Pan, Junchi Yan", "tldr": "", "abstract": "With the arrival of the Noisy Intermediate-Scale Quantum (NISQ) era, Variational Quantum Algorithms (VQAs) have emerged to obtain possible quantum advantage. In particular, how to effectively incorporate hard constraints in VQAs remains a critical and open question. In this paper, we manage to combine the Hamming Weight Preserving ansatz with a topological-aware parity check on physical qubits to enforce error mitigation and further hard constraints. We demonstrate the combination significantly outperforms peer VQA methods on both quantum chemistry problems and constrained combinatorial optimization problems e.g. Quadratic Assignment Problem. Intensive experimental results on both simulators and superconducting quantum processors are provided to verify that the combination of HWP ansatz with parity check is among the most promising candidates to demonstrate quantum advantages in the NISQ era to solve more realistic problems.", "keywords": "symmetry-preserving quantum computing;parity check;error mitigation", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "", "author": "Ge Yan;Mengfei Ran;Ruocheng Wang;Kaisen Pan;Junchi Yan", "authorids": "~Ge_Yan1;~Mengfei_Ran1;~Ruocheng_Wang3;~Kaisen_Pan1;~Junchi_Yan2", "gender": "M;F;M;;M", "homepage": "https://github.com/GrahamYan;https://lucienr7.github.io/;https://wang-ruocheng.github.io./;https://github.com/pks0813;http://thinklab.sjtu.edu.cn/", "dblp": "169/8155;;;;60/7949.html", "google_scholar": "https://scholar.google.com/citations?hl=en;;;;ga230VoAAAAJ", "orcid": ";;;;0000-0001-9639-7679", "linkedin": ";;;;", "or_profile": "~Ge_Yan1;~Mengfei_Ran1;~Ruocheng_Wang3;~Kaisen_Pan1;~Junchi_Yan1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu;sjtu.edu.cn;sjtu.edu.cn", "position": "PhD student;Undergrad student;Undergrad student;Undergrad student;Full Professor", "bibtex": "@inproceedings{\nyan2024rethinking,\ntitle={Rethinking Parity Check Enhanced Symmetry-Preserving Ansatz},\nauthor={Ge Yan and Mengfei Ran and Ruocheng Wang and Kaisen Pan and Junchi Yan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=aIuByRyHhV}\n}", "github": "", "reviewers": "K5Kf;6BLB;kL3u", "pdf_size": 1122310, "rating": "5;6;8", "confidence": "3;1;5", "soundness": "2;3;4", "novelty": "2;3;4", "presentation": "3;2;4", "wc_summary": "38;54;91", "wc_strengths": "6;51;168", "wc_weaknesses": "40;20;12", "wc_questions": "108;58;3", "wc_limitations": "8;35;1", "wc_review": "200;218;275", "wc_reply_reviewers": "18;0;1", "wc_reply_authors": "81;0;0", "reply_reviewers": "1;0;1", "reply_authors": "3;1;1", "rating_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.0, 1.632993161855452 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 61.0, 22.19609575278199 ], "wc_strengths_avg": [ 75.0, 68.27884006044626 ], "wc_weaknesses_avg": [ 24.0, 11.775681155103795 ], "wc_questions_avg": [ 56.333333333333336, 42.88226776756203 ], "wc_limitations_avg": [ 14.666666666666666, 14.659088951530682 ], "wc_review_avg": [ 231.0, 31.96873472629156 ], "wc_reply_reviewers_avg": [ 6.333333333333333, 8.259674462242579 ], "wc_reply_authors_avg": [ 27.0, 38.18376618407357 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.6546536707079772, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:0Oyvu-NlKCsJ:scholar.google.com/&scioq=Rethinking+Parity+Check+Enhanced+Symmetry-Preserving+Ansatz&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu;sjtu.edu.cn;sjtu.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Transformers Can Do Arithmetic with the Right Embeddings", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94565", "id": "aIyNLWXuDO", "proceeding": "", "pdf": "https://openreview.net/pdf?id=aIyNLWXuDO", "openreview": "https://openreview.net/forum?id=aIyNLWXuDO", "poster": "", "project": "", "author_site": "Sean McLeish, Arpit Bansal, Alex Stein, Neel Jain, John Kirchenbauer, Brian Bartoldson, Bhavya Kailkhura, Abhinav Bhatele, Jonas Geiping, Avi Schwarzschild, Tom Goldstein", "tldr": "", "abstract": "The poor performance of transformers on arithmetic tasks seems to stem in large part from their inability to keep track of the exact position of each digit inside of a large span of digits. We mend this problem by adding an embedding to each digit that encodes its position relative to the start of the number. In addition to the boost these embeddings provide on their own, we show that this fix enables architectural modifications such as input injection and recurrent layers to improve performance even further.\n\nWith positions resolved, we can study the logical extrapolation ability of transformers. Can they solve arithmetic problems that are larger and more complex than those in their training data? We find that training on only 20 digit numbers with a single GPU for one day, we can reach state-of-the-art performance, achieving up to 99% accuracy on 100 digit addition problems. Finally, we show that these gains in numeracy also unlock improvements on other multi-step reasoning tasks including sorting and multiplication.", "keywords": "Algorithmic Generalization;Arithmetic;Transformer;Recurrent", "primary_area": "generative_models", "supplementary_material": "/attachment/649bc4e8daac7c68c12bb34dda8dc513778c7880.zip", "author": "Sean Michael McLeish;Arpit Bansal;Alex Stein;Neel Jain;John Kirchenbauer;Brian R. Bartoldson;Bhavya Kailkhura;Abhinav Bhatele;Jonas Geiping;Avi Schwarzschild;Tom Goldstein", "authorids": "~Sean_Michael_McLeish1;~Arpit_Bansal1;~Alex_Stein1;~Neel_Jain1;~John_Kirchenbauer1;~Brian_R._Bartoldson1;~Bhavya_Kailkhura1;~Abhinav_Bhatele1;~Jonas_Geiping1;~Avi_Schwarzschild1;~Tom_Goldstein1", "gender": "M;;M;M;;M;M;M;M;M;M", "homepage": "https://mcleish7.github.io/;;https://jwkirchenbauer.notion.site/;https://people.llnl.gov/kailkhura1;https://www.cs.umd.edu/~bhatele;https://jonasgeiping.github.io/;https://cs.umd.edu/~avi1;https://www.cs.umd.edu/~tomg/;https://alexstein0.github.io/;https://arpitbansal297.github.io/;https://brianbartoldson.wordpress.com/", "dblp": "374/9044;;321/0678;132/8938;82/6441;190/7229;249/9334.html;25/8184;;190/9114;220/5475", "google_scholar": ";https://scholar.google.com/citations?hl=en;48GJrbsAAAAJ;SQpJmOgAAAAJ;3x65qtwAAAAJ;https://scholar.google.de/citations?user=206vNCEAAAAJ;WNvQ7AcAAAAJ;KmSuVtgAAAAJ;StkYBUEAAAAJ;Pchxm4IAAAAJ;YdiZoJgAAAAJ", "orcid": ";;;;;;;;;;", "linkedin": "sean-m-mcleish/;neel-jain-0a6a239/;johnkirchenbauer/;;;;;;;arpit-bansal-970865b1/;", "or_profile": "~Sean_Michael_McLeish1;~Neel_Jain1;~John_Kirchenbauer1;~Bhavya_Kailkhura1;~Abhinav_Bhatele1;~Jonas_Geiping1;~Avi_Schwarzschild1;~Tom_Goldstein1;~Alexander_Stein1;~Arpit_Amit_Bansal1;~Brian_R_Bartoldson1", "aff": "University of Maryland, College Park;University of Maryland, College Park;University of Maryland, College Park;Lawrence Livermore National Laboratory;University of Maryland, College Park;Max Planck Institute for Intelligent Systems, Max-Planck Institute;Carnegie Mellon University;University of Maryland, College Park;University of Maryland, College Park;University of Maryland, College Park;Lawrence Livermore National Labs", "aff_domain": "umd.edu;umd.edu;umd.edu;llnl.gov;umd.edu;tuebingen.mpg.de;cmu.edu;umd.edu;umd.edu;umd.edu;llnl.gov", "position": "PhD student;PhD student;PhD student;Research Staff;Associate Professor;Principal Researcher;Postdoc;Full Professor;PhD student;PhD student;Researcher", "bibtex": "@inproceedings{\nmcleish2024transformers,\ntitle={Transformers Can Do Arithmetic with the Right Embeddings},\nauthor={Sean Michael McLeish and Arpit Bansal and Alex Stein and Neel Jain and John Kirchenbauer and Brian R. Bartoldson and Bhavya Kailkhura and Abhinav Bhatele and Jonas Geiping and Avi Schwarzschild and Tom Goldstein},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=aIyNLWXuDO}\n}", "github": "", "reviewers": "GBbn;mAhx;uGwk", "pdf_size": 2073389, "rating": "6;6;7", "confidence": "4;3;4", "soundness": "2;3;3", "novelty": "2;3;3", "presentation": "3;4;3", "wc_summary": "96;43;100", "wc_strengths": "103;39;104", "wc_weaknesses": "162;34;96", "wc_questions": "146;13;78", "wc_limitations": "9;2;19", "wc_review": "516;131;397", "wc_reply_reviewers": "32;0;14", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;0;1", "reply_authors": "1;1;1", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 79.66666666666667, 25.978623691198283 ], "wc_strengths_avg": [ 82.0, 30.40833219146796 ], "wc_weaknesses_avg": [ 97.33333333333333, 52.264285660052366 ], "wc_questions_avg": [ 79.0, 54.30162674051917 ], "wc_limitations_avg": [ 10.0, 6.97614984548545 ], "wc_review_avg": [ 348.0, 160.94926736915167 ], "wc_reply_reviewers_avg": [ 15.333333333333334, 13.097921802925667 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5909588111655420067&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "umd.edu;umd.edu;umd.edu;llnl.gov;umd.edu;tuebingen.mpg.de;cmu.edu;umd.edu;umd.edu;umd.edu;llnl.gov", "author_num": 11, "aff_unique_index": "0;0;0;1;0;2;3;0;0;0;1", "aff_unique_norm": "University of Maryland;Lawrence Livermore National Laboratory;Max Planck Institute for Intelligent Systems;Carnegie Mellon University", "aff_unique_dep": ";;Intelligent Systems;", "aff_unique_url": "https://www/umd.edu;https://www.llnl.gov;https://www.mpi-is.mpg.de;https://www.cmu.edu", "aff_unique_abbr": "UMD;LLNL;MPI-IS;CMU", "aff_campus_unique_index": "0;0;0;0;0;0;0", "aff_campus_unique": "College Park;", "aff_country_unique_index": "0;0;0;0;0;1;0;0;0;0;0", "aff_country_unique": "United States;Germany" }, { "title": "GlotCC: An Open Broad-Coverage CommonCrawl Corpus and Pipeline for Minority Languages", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97624", "id": "aJ1yse8GEr", "proceeding": "", "pdf": "https://openreview.net/pdf?id=aJ1yse8GEr", "openreview": "https://openreview.net/forum?id=aJ1yse8GEr", "poster": "", "project": "", "author_site": "Amir Hossein Kargaran, Fran\u00e7ois Yvon, Hinrich Schuetze", "tldr": "", "abstract": "The need for large text corpora has increased with the advent of pretrained language models and, in particular, the discovery of scaling laws for these models. Most available corpora have sufficient data only for languages with large dominant communities. However, there is no corpus available that (i) covers a wide range of minority languages; (ii) is generated by an open-source reproducible pipeline; and (iii) is rigorously cleaned from noise, making it trustworthy to use. We present GlotCC, a clean, document-level, 2TB general domain corpus derived from CommonCrawl, covering more than 1000 languages. We make GlotCC and the system used to generate it\u2014 including the pipeline, language identification model, and filters\u2014available to the research community.\n\nCorpus v. 1.0 https://huggingface.co/datasets/cis-lmu/GlotCC-v1\n\nPipeline v. 3.0 https://github.com/cisnlp/GlotCC", "keywords": "multilingual;web corpora", "primary_area": "", "supplementary_material": "", "author": "Amir Hossein Kargaran;Fran\u00e7ois Yvon;Hinrich Schuetze", "authorids": "~Amir_Hossein_Kargaran1;~Fran\u00e7ois_Yvon2;~Hinrich_Schuetze3", "gender": "M;M;M", "homepage": "https://kargaranamir.github.io/;http://cv.archives-ouvertes.fr/francois-yvon;https://www.cis.uni-muenchen.de/schuetze/", "dblp": "261/9248;05/2701.html;s/HinrichSchutze", "google_scholar": "2idwpjcAAAAJ;https://scholar.google.fr/citations?hl=fr;", "orcid": "0000-0001-6253-1315;0000-0002-7972-7442;", "linkedin": "amirkargaran/;;", "or_profile": "~Amir_Hossein_Kargaran1;~Fran\u00e7ois_Yvon2;~Hinrich_Schuetze3", "aff": "Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;ISIR, Sorbonne Universit\u00e9 & CNRS;Center for Information and Language Processing", "aff_domain": "lmu.de;isir.upmc.fr;lmu.de", "position": "PhD student;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nkargaran2024glotcc,\ntitle={Glot{CC}: An Open Broad-Coverage CommonCrawl Corpus and Pipeline for Minority Languages},\nauthor={Amir Hossein Kargaran and Fran{\\c{c}}ois Yvon and Hinrich Schuetze},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=aJ1yse8GEr}\n}", "github": "", "reviewers": "6BqC;uMdT;xLgb", "pdf_size": 418387, "rating": "6;6;7", "confidence": "4;4;4", "wc_summary_and_contributions": "39;111;61", "wc_strengths": "51;57;37", "wc_improvement": "224;20;38", "wc_limitations": "176;1;5", "wc_correctness": "15;1;5", "wc_clarity": "9;4;5", "wc_relation_to_prior_work": "5;1;8", "wc_documentation": "10;1;10", "wc_additional_feedback": "1;1;1", "wc_review": "530;197;170", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "wc_summary_and_contributions_avg": [ 70.33333333333333, 30.12566274052001 ], "wc_strengths_avg": [ 48.333333333333336, 8.379870059984356 ], "wc_improvement_avg": [ 94.0, 92.21713506718802 ], "wc_limitations_avg": [ 60.666666666666664, 81.56932974810788 ], "wc_correctness_avg": [ 7.0, 5.887840577551898 ], "wc_clarity_avg": [ 6.0, 2.160246899469287 ], "wc_relation_to_prior_work_avg": [ 4.666666666666667, 2.8674417556808756 ], "wc_documentation_avg": [ 7.0, 4.242640687119285 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 299.0, 163.71316379570703 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3349602164526501695&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "lmu.de;isir.upmc.fr;lmu.de", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;Sorbonne Universit\u00e9;Center for Information and Language Processing", "aff_unique_dep": ";Institut des Sciences de l'Ing\u00e9nierie de Robotique;", "aff_unique_url": "https://www.lmu.de;https://www.sorbonne-universite.fr;", "aff_unique_abbr": "LMU;Sorbonne U;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Germany;France;" }, { "title": "IWBVT: Instance Weighting-based Bias-Variance Trade-off for Crowdsourcing", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94564", "id": "aJDGfynRw7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=aJDGfynRw7", "openreview": "https://openreview.net/forum?id=aJDGfynRw7", "poster": "/media/PosterPDFs/NeurIPS%202024/94564.png?t=1731372182.6083403", "project": "", "author_site": "Wenjun Zhang, Liangxiao Jiang, Chaoqun Li", "tldr": "", "abstract": "In recent years, a large number of algorithms for label integration and noise correction have been proposed to infer the unknown true labels of instances in crowdsourcing. They have made great advances in improving the label quality of crowdsourced datasets. However, due to the presence of intractable instances, these algorithms are usually not as significant in improving the model quality as they are in improving the label quality. To improve the model quality, this paper proposes an instance weighting-based bias-variance trade-off (IWBVT) approach. IWBVT at first proposes a novel instance weighting method based on the complementary set and entropy, which mitigates the impact of intractable instances and thus makes the bias and variance of trained models closer to the unknown true results. Then, IWBVT performs probabilistic loss regressions based on the bias-variance decomposition, which achieves the bias-variance trade-off and thus reduces the generalization error of trained models. Experimental results indicate that IWBVT can serve as a universal post-processing approach to significantly improving the model quality of existing state-of-the-art label integration algorithms and noise correction algorithms.", "keywords": "Crowdsourcing;Instance weighting;Bias-variance decomposition;Bias-variance trade-off", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "/attachment/fb5d16baf38dc8453022881609ab415c795f5ed3.zip", "author": "Wenjun Zhang;Liangxiao Jiang;Chaoqun Li", "authorids": "~Wenjun_Zhang4;~Liangxiao_Jiang1;~Chaoqun_Li1", "gender": "M;M;F", "homepage": ";http://grzy.cug.edu.cn/jlx/en/index.htm;http://grzy.cug.edu.cn/lcq/", "dblp": "46/3359-12;46/4017;26/4644-1", "google_scholar": "https://scholar.google.com/citations?hl=en;S_PLKWEAAAAJ;", "orcid": "0000-0002-7269-0376;0000-0003-2201-3526;", "linkedin": ";;", "or_profile": "~Wenjun_Zhang4;~Liangxiao_Jiang1;~Chaoqun_Li1", "aff": "China University of Geosciences;China University of Geosciences;China University of Geosciences", "aff_domain": "cug.edu.cn;cug.edu.cn;cug.edu.cn", "position": "PhD student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nzhang2024iwbvt,\ntitle={{IWBVT}: Instance Weighting-based Bias-Variance Trade-off for Crowdsourcing},\nauthor={Wenjun Zhang and Liangxiao Jiang and Chaoqun Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=aJDGfynRw7}\n}", "github": "", "reviewers": "Td4g;5XV6;fq5P", "pdf_size": 453366, "rating": "6;7;7", "confidence": "4;4;4", "soundness": "3;3;4", "novelty": "3;3;3", "presentation": "2;2;3", "wc_summary": "100;73;88", "wc_strengths": "14;103;164", "wc_weaknesses": "17;137;76", "wc_questions": "175;95;22", "wc_limitations": "6;5;1", "wc_review": "312;413;351", "wc_reply_reviewers": "0;14;10", "wc_reply_authors": "51;0;0", "reply_reviewers": "0;1;1", "reply_authors": "2;1;1", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 87.0, 11.045361017187261 ], "wc_strengths_avg": [ 93.66666666666667, 61.59184650224051 ], "wc_weaknesses_avg": [ 76.66666666666667, 48.99206284922306 ], "wc_questions_avg": [ 97.33333333333333, 62.483775671947214 ], "wc_limitations_avg": [ 4.0, 2.160246899469287 ], "wc_review_avg": [ 358.6666666666667, 41.58792559812951 ], "wc_reply_reviewers_avg": [ 8.0, 5.887840577551898 ], "wc_reply_authors_avg": [ 17.0, 24.041630560342615 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16212156027916378919&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": "cug.edu.cn;cug.edu.cn;cug.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "China University of Geosciences", "aff_unique_dep": "", "aff_unique_url": "http://www.cug.edu.cn", "aff_unique_abbr": "CUG", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Mechanism design augmented with output advice", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94563", "id": "aJGKs7QOZM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=aJGKs7QOZM", "openreview": "https://openreview.net/forum?id=aJGKs7QOZM", "poster": "/media/PosterPDFs/NeurIPS%202024/94563.png?t=1733349441.7355907", "project": "", "author_site": "George Christodoulou, Alkmini Sgouritsa, Ioannis Vlachos", "tldr": "", "abstract": "Our work revisits the design of mechanisms via the learning-augmented framework. In this model, the algorithm is enhanced with imperfect (machine-learned) information concerning the input, usually referred to as prediction. The goal is to design algorithms whose performance degrades gently as a function of the prediction error and, in particular, perform well if the prediction is accurate, but also provide a worst-case guarantee under any possible error. This framework has been successfully applied recently to various mechanism design settings, where in most cases the mechanism is provided with a prediction about the types of the players.\n\nWe adopt a perspective in which the mechanism is provided with an output recommendation. We make no assumptions about the quality of the suggested outcome, and the goal is to use the recommendation to design mechanisms with low approximation guarantees whenever the recommended outcome is reasonable, but at the same time to provide worst-case guarantees whenever the recommendation significantly deviates from the optimal one. We propose a generic, universal measure, which we call quality of recommendation, to evaluate mechanisms across various information settings. We demonstrate how this new metric can provide refined analysis in existing results.\n\nThis model introduces new challenges, as the mechanism receives limited information comparing to settings that use predictions about the types of the agents. We study, through this lens, several well-studied mechanism design paradigms, devising new mechanisms, but also providing refined analysis for existing ones, using as a metric the quality of recommendation. We complement our positive results, by exploring the limitations of known classes of strategyproof mechanisms that can be devised using output recommendation.", "keywords": "mechanism design;output advice;quality of recommendation;facility location;scheduling;house allocation;auctions", "primary_area": "algorithmic_game_theory", "supplementary_material": "/attachment/742b2212b00ae072f23c24164679ea3031c62cd6.zip", "author": "George Christodoulou;Alkmini Sgouritsa;Ioannis Vlachos", "authorids": "~George_Christodoulou1;~Alkmini_Sgouritsa2;~Ioannis_Vlachos1", "gender": ";F;M", "homepage": "https://sites.google.com/view/gchristo;https://sites.google.com/site/alkminisgouritsa;https://yannisvl.github.io/", "dblp": "14/1571-1;139/0645.html;", "google_scholar": "QRIaADsAAAAJ;https://scholar.google.co.uk/citations?user=Mb_AoIgAAAAJ;RDqODVQAAAAJ", "orcid": ";0000-0003-3997-5131;0009-0000-3996-9255", "linkedin": ";;ioannisvlachos1/", "or_profile": "~George_Christodoulou1;~Alkmini_Sgouritsa2;~Ioannis_Vlachos1", "aff": "Aristotle University of Thessaloniki;Archimedes - Athena Research Center;Athens University of Economics and Business", "aff_domain": "auth.gr;imis.athena-innovation.gr;aueb.gr", "position": "Associate Professor;Researcher;PhD student", "bibtex": "@inproceedings{\nchristodoulou2024mechanism,\ntitle={Mechanism design augmented with output advice},\nauthor={George Christodoulou and Alkmini Sgouritsa and Ioannis Vlachos},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=aJGKs7QOZM}\n}", "github": "", "reviewers": "PWat;i3AA;i6E2", "pdf_size": 402829, "rating": "6;7;7", "confidence": "2;2;3", "soundness": "3;3;3", "novelty": "3;4;3", "presentation": "2;4;3", "wc_summary": "45;56;71", "wc_strengths": "21;26;45", "wc_weaknesses": "78;61;69", "wc_questions": "127;14;4", "wc_limitations": "24;1;57", "wc_review": "295;158;246", "wc_reply_reviewers": "578;21;436", "wc_reply_authors": "382;0;690", "reply_reviewers": "2;1;2", "reply_authors": "2;1;2", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 2.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 57.333333333333336, 10.656244908763853 ], "wc_strengths_avg": [ 30.666666666666668, 10.338708279513881 ], "wc_weaknesses_avg": [ 69.33333333333333, 6.944222218666553 ], "wc_questions_avg": [ 48.333333333333336, 55.77534301901593 ], "wc_limitations_avg": [ 27.333333333333332, 22.9830855679176 ], "wc_review_avg": [ 233.0, 56.680390495008645 ], "wc_reply_reviewers_avg": [ 345.0, 236.3232249836369 ], "wc_reply_authors_avg": [ 357.3333333333333, 282.23079602024694 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=677739603782370149&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "auth.gr;imis.athena-innovation.gr;aueb.gr", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Aristotle University of Thessaloniki;Athena Research Center;Athens University of Economics and Business", "aff_unique_dep": ";;", "aff_unique_url": "https://www.auth.gr;https://www.athena rc.gr;https://www.aueb.gr", "aff_unique_abbr": "AUTH;ARC;AUEB", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Thessaloniki;;Athens", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Greece" }, { "title": "Predicting the Performance of Foundation Models via Agreement-on-the-Line", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94562", "id": "aJx9onwsR4", "proceeding": "", "pdf": "https://openreview.net/pdf?id=aJx9onwsR4", "openreview": "https://openreview.net/forum?id=aJx9onwsR4", "poster": "", "project": "", "author_site": "Rahul Saxena, Taeyoun Kim, Aman Mehra, Christina Baek, J. Zico Kolter, Aditi Raghunathan", "tldr": "", "abstract": "Estimating the out-of-distribution performance in regimes where labels are scarce is critical to safely deploy foundation models. Recently, it was shown that ensembles of neural networks observe the phenomena \"agreement-on-the-line\", which can be leveraged to reliably predict OOD performance without labels. However, in contrast to classical neural networks that are trained on in-distribution data from scratch for numerous epochs, foundation models undergo minimal finetuning from heavily pretrained weights, which may reduce the ensemble diversity needed to observe agreement-on-the-line. In our work, we demonstrate that when lightly finetuning multiple runs from a $\\textit{single}$ foundation model, the choice of randomness during training (linear head initialization, data ordering, and data subsetting) can lead to drastically different levels of agreement-on-the-line in the resulting ensemble. Surprisingly, only random head initialization is able to reliably induce agreement-on-the-line in finetuned foundation models across vision and language benchmarks. Second, we demonstrate that ensembles of $\\textit{multiple}$ foundation models pretrained on different datasets but finetuned on the same task can also show agreement-on-the-line. In total, by careful construction of a diverse ensemble, we can utilize agreement-on-the-line-based methods to predict the OOD performance of foundation models with high precision.", "keywords": "robustness;OOD performance estimation;foundation models;agreement-on-the-line", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Rahul Saxena;Taeyoun Kim;Aman Mehra;Christina Baek;J Zico Kolter;Aditi Raghunathan", "authorids": "~Rahul_Saxena1;~Taeyoun_Kim1;~Aman_Mehra1;~Christina_Baek2;~J_Zico_Kolter1;~Aditi_Raghunathan1", "gender": "M;M;;;F;M", "homepage": "https://www.linkedin.com/in/rahul-saxena-cs/;https://danielkty.github.io;;https://kebaek.github.io;https://www.cs.cmu.edu/~aditirag/;http://www.zicokolter.com", "dblp": ";122/6365;;202/7238;166/1409;67/2526", "google_scholar": ";;;;Ch9iRwQAAAAJ;UXh1I6UAAAAJ", "orcid": ";;;;;", "linkedin": ";;am-me/;;;", "or_profile": "~Rahul_Saxena1;~Taeyoun_Kim1;~Aman_Mehra1;~Christina_Baek2;~Aditi_Raghunathan1;~Zico_Kolter1", "aff": ";School of Computer Science, Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": ";cs.cmu.edu;andrew.cmu.edu;cmu.edu;cmu.edu;cmu.edu", "position": ";MS student;MS student;PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nsaxena2024predicting,\ntitle={Predicting the Performance of Foundation Models via Agreement-on-the-Line},\nauthor={Rahul Saxena and Taeyoun Kim and Aman Mehra and Christina Baek and J Zico Kolter and Aditi Raghunathan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=aJx9onwsR4}\n}", "github": "", "reviewers": "H4MK;8Ly8;4CKp;hhqs", "pdf_size": 10241214, "rating": "5;7;7;7", "confidence": "3;3;3;3", "soundness": "2;3;3;3", "novelty": "2;3;2;3", "presentation": "3;3;3;3", "wc_summary": "48;144;44;87", "wc_strengths": "58;40;113;10", "wc_weaknesses": "134;120;217;54", "wc_questions": "81;97;56;78", "wc_limitations": "38;1;24;2", "wc_review": "359;402;454;231", "wc_reply_reviewers": "72;40;28;29", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 80.75, 40.195615432531945 ], "wc_strengths_avg": [ 55.25, 37.49249924984996 ], "wc_weaknesses_avg": [ 131.25, 57.99730597191563 ], "wc_questions_avg": [ 78.0, 14.611639196202457 ], "wc_limitations_avg": [ 16.25, 15.562374497485916 ], "wc_review_avg": [ 361.5, 82.51212032180484 ], "wc_reply_reviewers_avg": [ 42.25, 17.80975856096876 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5847648953960085500&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": ";cs.cmu.edu;andrew.cmu.edu;cmu.edu;cmu.edu;cmu.edu", "author_num": 6, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "School of Computer Science", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "0", "aff_campus_unique": "Pittsburgh;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Symmetric Linear Bandits with Hidden Symmetry", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94561", "id": "aLzA7MSc6Y", "proceeding": "", "pdf": "https://openreview.net/pdf?id=aLzA7MSc6Y", "openreview": "https://openreview.net/forum?id=aLzA7MSc6Y", "poster": "/media/PosterPDFs/NeurIPS%202024/94561.png?t=1731586429.1758735", "project": "", "author_site": "Phuong Nam Tran, The Anh Ta, Debmalya Mandal, Long Tran-Thanh", "tldr": "", "abstract": "High-dimensional linear bandits with low-dimensional structure have received considerable attention in recent studies due to their practical significance. The most common structure in the literature is sparsity. However, it may not be available in practice. Symmetry, where the reward is invariant under certain groups of transformations on the set of arms, is another important inductive bias in the high-dimensional case that covers many standard structures, including sparsity. In this work, we study high-dimensional symmetric linear bandits where the symmetry is hidden from the learner, and the correct symmetry needs to be learned in an online setting. We examine the structure of a collection of hidden symmetry and provide a method based on model selection within the collection of low-dimensional subspaces. Our algorithm achieves a regret bound of $ O(d_0^{2/3} T^{2/3} \\log(d))$, where $d$ is the ambient dimension which is potentially very large, and $d_0$ is the dimension of the true low-dimensional subspace such that $d_0 \\ll d$. With an extra assumption on well-separated models, we can further improve the regret to $ O(d_0 \\sqrt{T\\log(d)} )$.", "keywords": "Bandit theory;group theory;symmetry;sparsity.", "primary_area": "bandits", "supplementary_material": "", "author": "Nam Phuong Tran;The-Anh Ta;Debmalya Mandal;Long Tran-Thanh", "authorids": "~Nam_Phuong_Tran1;~The-Anh_Ta1;~Debmalya_Mandal2;~Long_Tran-Thanh1", "gender": "M;M;;M", "homepage": "https://namtrankekl.github.io/;https://debmandal.github.io;https://warwick.ac.uk/fac/sci/dcs/people/long_tran-thanh/;https://the-anhta.github.io/", "dblp": "231/9152;151/3685;46/8333;305/4740", "google_scholar": "8M0NXFcAAAAJ;OquWQpEAAAAJ;https://scholar.google.co.uk/citations?user=YBQai3gAAAAJ;https://scholar.google.co.uk/citations?hl=en", "orcid": "0000-0003-0983-8830;;;", "linkedin": ";;;", "or_profile": "~Nam_Phuong_Tran1;~Debmalya_Mandal2;~Long_Tran-Thanh1;~Anh_Ta1", "aff": "University of Warwick;University of Warwick;The university of Warwick;CSIRO", "aff_domain": "warwick.ac.uk;warwick.ac.uk;warwick.ac.uk;data61.csiro.au", "position": "PhD student;Assistant Professor;Full Professor;Researcher", "bibtex": "@inproceedings{\ntran2024symmetric,\ntitle={Symmetric Linear Bandits with Hidden Symmetry},\nauthor={Nam Phuong Tran and The-Anh Ta and Debmalya Mandal and Long Tran-Thanh},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=aLzA7MSc6Y}\n}", "github": "", "reviewers": "qJvB;qZUs;ZVeC;WDJx;Viv8", "pdf_size": 1612021, "rating": "3;6;7;7;7", "confidence": "4;3;3;3;3", "soundness": "3;3;3;3;4", "novelty": "2;3;4;3;3", "presentation": "2;4;3;3;3", "wc_summary": "98;92;244;74;56", "wc_strengths": "119;107;241;43;59", "wc_weaknesses": "514;94;243;114;121", "wc_questions": "149;125;112;96;4", "wc_limitations": "32;4;72;1;1", "wc_review": "912;422;912;328;241", "wc_reply_reviewers": "487;13;0;35;0", "wc_reply_authors": "246;40;40;0;40", "reply_reviewers": "2;1;0;1;0", "reply_authors": "3;2;2;1;2", "rating_avg": [ 6.0, 1.5491933384829668 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 112.8, 67.22618537445062 ], "wc_strengths_avg": [ 113.8, 69.66893138264717 ], "wc_weaknesses_avg": [ 217.2, 157.37776208854922 ], "wc_questions_avg": [ 97.2, 49.724842885624085 ], "wc_limitations_avg": [ 22.0, 27.589853207293437 ], "wc_review_avg": [ 563.0, 290.65168157091404 ], "wc_reply_reviewers_avg": [ 107.0, 190.4300396471103 ], "wc_reply_authors_avg": [ 73.2, 87.77790154702949 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.9682458365518545, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:D_aioYqMUyIJ:scholar.google.com/&scioq=Symmetric+Linear+Bandits+with+Hidden+Symmetry&hl=en&as_sdt=0,44", "gs_version_total": 4, "email": "warwick.ac.uk;warwick.ac.uk;warwick.ac.uk;data61.csiro.au", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "University of Warwick;Commonwealth Scientific and Industrial Research Organisation", "aff_unique_dep": ";", "aff_unique_url": "https://www.warwick.ac.uk;https://www.csiro.au", "aff_unique_abbr": "Warwick;CSIRO", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "United Kingdom;Australia" }, { "title": "An Efficient Recipe for Long Context Extension via Middle-Focused Positional Encoding", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94560", "id": "aNHEqFMS0N", "proceeding": "", "pdf": "https://openreview.net/pdf?id=aNHEqFMS0N", "openreview": "https://openreview.net/forum?id=aNHEqFMS0N", "poster": "/media/PosterPDFs/NeurIPS%202024/94560.png?t=1733468965.832659", "project": "", "author_site": "Tong Wu, Yanpeng Zhao, Zilong Zheng", "tldr": "", "abstract": "Recently, many methods have been developed to extend the context length of pre-trained large language models (LLMs), but they often require fine-tuning at the target length ($\\gg4K$) and struggle to effectively utilize information from the middle part of the context. To address these issues, we propose $\\textbf{C}$ontinuity-$\\textbf{R}$elativity ind$\\textbf{E}$xing with g$\\textbf{A}$ussian $\\textbf{M}$iddle ($\\texttt{CREAM}$), which interpolates positional encodings by manipulating position indices. Apart from being simple, $\\texttt{CREAM}$ is training-efficient: it only requires fine-tuning at the pre-trained context window (e.g., Llama 2-4K) and can extend LLMs to a much longer target context length (e.g., 256K). To ensure that the model focuses more on the information in the middle, we introduce a truncated Gaussian to encourage sampling from the middle part of the context during fine-tuning, thus alleviating the ''Lost-in-the-Middle'' problem faced by long-context LLMs. Experimental results show that $\\texttt{CREAM}$ successfully extends LLMs to the target length for both Base and Chat versions of $\\texttt{Llama2-7B}$ with ``Never Miss A Beat''. Our code is publicly available at https://github.com/bigai-nlco/cream.", "keywords": "Large Language Model;Long Context;Position Encoding", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/387c55225e94c367e03db73d12bba3c23ebd0cca.zip", "author": "Tong Wu;Yanpeng Zhao;Zilong Zheng", "authorids": "~Tong_Wu9;~Yanpeng_Zhao1;~Zilong_Zheng1", "gender": "M;Not Specified;M", "homepage": "https://wutong4012.github.io/;;http://zilongzheng.github.io", "dblp": ";182/5860;218/5234", "google_scholar": "https://scholar.google.com.hk/citations?user=yn0GDR4AAAAJ;-T9FigIAAAAJ;9sDx70IAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Tong_Wu9;~Yanpeng_Zhao1;~Zilong_Zheng1", "aff": "Tsinghua University;University of Edinburgh;Beijing Institute for General Artificial Intelligence", "aff_domain": "mails.tsinghua.edu.cn;ed.ac.uk;bigai.ai", "position": "MS student;PhD student;Researcher", "bibtex": "@inproceedings{\nwu2024an,\ntitle={An Efficient Recipe for Long Context Extension via Middle-Focused Positional Encoding},\nauthor={Tong Wu and Yanpeng Zhao and Zilong Zheng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=aNHEqFMS0N}\n}", "github": "", "reviewers": "qJ1n;XDBh;srX3;fWsk", "pdf_size": 1359737, "rating": "3;5;6;6", "confidence": "2;4;3;3", "soundness": "2;3;3;2", "novelty": "2;3;2;4", "presentation": "1;3;3;4", "wc_summary": "93;84;92;134", "wc_strengths": "22;99;101;105", "wc_weaknesses": "172;123;143;92", "wc_questions": "2;37;143;44", "wc_limitations": "1;111;4;62", "wc_review": "290;454;483;437", "wc_reply_reviewers": "0;0;115;37", "wc_reply_authors": "0;441;492;685", "reply_reviewers": "0;0;1;1", "reply_authors": "1;2;3;3", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 100.75, 19.51121472384536 ], "wc_strengths_avg": [ 81.75, 34.56425176392511 ], "wc_weaknesses_avg": [ 132.5, 29.159046623646667 ], "wc_questions_avg": [ 56.5, 52.41421562896844 ], "wc_limitations_avg": [ 44.5, 45.445021729558015 ], "wc_review_avg": [ 416.0, 74.5821694508815 ], "wc_reply_reviewers_avg": [ 38.0, 46.95210325427392 ], "wc_reply_authors_avg": [ 404.5, 250.64367137432376 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2248852955378147793&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 2, "email": "mails.tsinghua.edu.cn;ed.ac.uk;bigai.ai", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Tsinghua University;University of Edinburgh;Beijing Institute for General Artificial Intelligence", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.ed.ac.uk;http://www.bigaiai.org/", "aff_unique_abbr": "THU;Edinburgh;BIGAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "China;United Kingdom" }, { "title": "In-and-Out: Algorithmic Diffusion for Sampling Convex Bodies", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94559", "id": "aNQWRHyh15", "proceeding": "", "pdf": "https://openreview.net/pdf?id=aNQWRHyh15", "openreview": "https://openreview.net/forum?id=aNQWRHyh15", "poster": "/media/PosterPDFs/NeurIPS%202024/94559.png?t=1729564361.590326", "project": "", "author_site": "Yunbum Kook, Santosh Vempala, Matthew Zhang", "tldr": "", "abstract": "We present a new random walk for uniformly sampling high-dimensional convex bodies. It achieves state-of-the-art runtime complexity with stronger guarantees on the output than previously known, namely in R\u00e9nyi divergence (which implies TV, $\\mathcal{W}_2$, KL, $\\chi^2$). The proof departs from known approaches for polytime algorithms for the problem - we utilize a stochastic diffusion perspective to show contraction to the target distribution with the rate of convergence determined by functional isoperimetric constants of the stationary density.", "keywords": "Sampling;convex bodies;MCMC;uniform sampling", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Yunbum Kook;Santosh Vempala;Matthew Shunshi Zhang", "authorids": "~Yunbum_Kook1;~Santosh_Vempala1;~Matthew_Shunshi_Zhang1", "gender": ";M;M", "homepage": "https://yunbum-kook.github.io/;http://www.cc.gatech.edu/~vempala/;http://matthewzhang1998.github.io/", "dblp": ";v/SantoshVempala;255/4855", "google_scholar": "mWASLKEAAAAJ;https://scholar.google.com.tw/citations?user=hRggMmIAAAAJ;https://scholar.google.ca/citations?hl=en", "orcid": ";;", "linkedin": ";;", "or_profile": "~Yunbum_Kook1;~Santosh_Vempala1;~Shunshi_Zhang1", "aff": "Georgia Institute of Technology;Georgia Institute of Technology;University of Toronto", "aff_domain": "gatech.edu;gatech.edu;toronto.edu", "position": "PhD student;Professor;PhD student", "bibtex": "@inproceedings{\nkook2024inandout,\ntitle={In-and-Out: Algorithmic Diffusion for Sampling Convex Bodies},\nauthor={Yunbum Kook and Santosh Vempala and Matthew Shunshi Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=aNQWRHyh15}\n}", "github": "", "reviewers": "Jx5G;T79w;V3VH;W5VJ", "pdf_size": 658200, "rating": "6;7;8;8", "confidence": "3;4;5;4", "soundness": "3;3;4;4", "novelty": "3;4;3;4", "presentation": "3;3;3;4", "wc_summary": "101;59;176;193", "wc_strengths": "76;33;102;127", "wc_weaknesses": "111;31;59;226", "wc_questions": "145;28;88;150", "wc_limitations": "6;1;1;7", "wc_review": "439;152;426;703", "wc_reply_reviewers": "60;8;11;93", "wc_reply_authors": "115;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 7.25, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 132.25, 54.65059469026847 ], "wc_strengths_avg": [ 84.5, 34.77427209878016 ], "wc_weaknesses_avg": [ 106.75, 74.59348161870446 ], "wc_questions_avg": [ 102.75, 49.5548938047495 ], "wc_limitations_avg": [ 3.75, 2.7726341266023544 ], "wc_review_avg": [ 430.0, 194.87816706855594 ], "wc_reply_reviewers_avg": [ 43.0, 35.48943504763072 ], "wc_reply_authors_avg": [ 28.75, 49.79646071760522 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8528028654224417, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13266443017670727385&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "gatech.edu;gatech.edu;toronto.edu", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Georgia Institute of Technology;University of Toronto", "aff_unique_dep": ";", "aff_unique_url": "https://www.gatech.edu;https://www.utoronto.ca", "aff_unique_abbr": "Georgia Tech;U of T", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;Canada" }, { "title": "Understanding Hallucinations in Diffusion Models through Mode Interpolation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94558", "id": "aNTnHBkw4T", "proceeding": "", "pdf": "https://openreview.net/pdf?id=aNTnHBkw4T", "openreview": "https://openreview.net/forum?id=aNTnHBkw4T", "poster": "/media/PosterPDFs/NeurIPS%202024/94558.png?t=1733764457.4997625", "project": "", "author_site": "Sumukh K Aithal, Pratyush Maini, Zachary Lipton, J. Zico Kolter", "tldr": "", "abstract": "Colloquially speaking, image generation models based upon diffusion processes are frequently said to exhibit ''hallucinations'' samples that could never occur in the training data. But where do such hallucinations come from? In this paper, we study a particular failure mode in diffusion models, which we term ***mode interpolation***. Specifically, we find that diffusion models smoothly ``interpolate'' between nearby data modes in the training set, to generate samples that are completely outside the support of the original training distribution; this phenomenon leads diffusion models to generate artifacts that never existed in real data (i.e., hallucinations). We systematically study the reasons for, and the manifestation of this phenomenon. Through experiments on 1D and 2D Gaussians, we show how a discontinuous loss landscape in the diffusion model's decoder leads to a region where any smooth approximation will cause such hallucinations. Through experiments on artificial datasets with various shapes, we show how hallucination leads to the generation of combinations of shapes that never existed. We extend the validity of mode interpolation in real-world datasets by explaining the unexpected generation of images with additional or missing fingers similar to those produced by popular text-to-image generative models. Finally, we show that diffusion models in fact ***know*** when they go out of support and hallucinate. This is captured by the high variance in the trajectory of the generated sample towards the final few backward sampling process. Using a simple metric to capture this variance, we can remove over 95\\% of hallucinations at generation time. We conclude our exploration by showing the implications of such hallucination (and its removal) on the collapse (and stabilization) of recursive training on synthetic data with experiments on datasets like MNIST .", "keywords": "diffusion;generative models;hallucination", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/a8ee229f458c5a309f2ee675d664c4f3b0f10688.zip", "author": "Sumukh K Aithal;Pratyush Maini;Zachary Chase Lipton;J Zico Kolter", "authorids": "~Sumukh_K_Aithal1;~Pratyush_Maini1;~Zachary_Chase_Lipton1;~J_Zico_Kolter1", "gender": "M;Unspecified;M;M", "homepage": "https://pratyushmaini.github.io/;http://zacklipton.com;http://www.zicokolter.com;https://sumukhaithal6.github.io/", "dblp": "248/8071;;67/2526;299/5911", "google_scholar": ";MN9Kfg8AAAAJ;UXh1I6UAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;", "linkedin": ";;;sumukh-aithal-9801b4189", "or_profile": "~Pratyush_Maini1;~Zachary_Chase_Lipton1;~Zico_Kolter1;~Sumukh_Aithal_K1", "aff": "Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "cmu.edu;cmu.edu;cmu.edu;cmu.edu", "position": "PhD student;Assistant Professor;Full Professor;MS student", "bibtex": "@inproceedings{\naithal2024understanding,\ntitle={Understanding Hallucinations in Diffusion Models through Mode Interpolation},\nauthor={Sumukh K Aithal and Pratyush Maini and Zachary Chase Lipton and J Zico Kolter},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=aNTnHBkw4T}\n}", "github": "", "reviewers": "Zx7y;yx5u;B7Kp;8Ww6", "pdf_size": 16741029, "rating": "3;5;5;6", "confidence": "5;3;4;4", "soundness": "2;3;2;3", "novelty": "2;3;3;3", "presentation": "2;4;3;3", "wc_summary": "92;213;145;143", "wc_strengths": "36;105;79;66", "wc_weaknesses": "408;197;73;392", "wc_questions": "174;3;174;6", "wc_limitations": "1;15;22;8", "wc_review": "711;533;493;615", "wc_reply_reviewers": "314;23;82;236", "wc_reply_authors": "840;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "3;1;1;1", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 148.25, 42.99636612552275 ], "wc_strengths_avg": [ 71.5, 24.84451649760969 ], "wc_weaknesses_avg": [ 267.5, 139.67909650337805 ], "wc_questions_avg": [ 89.25, 84.75663690826813 ], "wc_limitations_avg": [ 11.5, 7.826237921249264 ], "wc_review_avg": [ 588.0, 83.52843827104634 ], "wc_reply_reviewers_avg": [ 163.75, 116.49973175934784 ], "wc_reply_authors_avg": [ 210.0, 363.73066958946424 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.6488856845230502, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8070346616362494548&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "cmu.edu;cmu.edu;cmu.edu;cmu.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "On Feature Learning in Structured State Space Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94557", "id": "aQv5AbN1wF", "proceeding": "", "pdf": "https://openreview.net/pdf?id=aQv5AbN1wF", "openreview": "https://openreview.net/forum?id=aQv5AbN1wF", "poster": "", "project": "", "author_site": "Leena Chennuru Vankadara, Jin Xu, Moritz Haas, Volkan Cevher", "tldr": "", "abstract": "This paper studies the scaling behavior of state-space models (SSMs) and their structured variants, such as Mamba, that have recently arisen in popularity as alternatives to transformer-based neural network architectures. Specifically, we focus on the capability of SSMs to learn features as their network width approaches infinity. Our findings reveal that established scaling rules, such as the Maximal Update Parameterization, fail to support feature learning as these models cannot be represented in the form of Tensor Programs. Additionally, we demonstrate that spectral scaling conditions, shown to be effective for feature learning in a host of other architectures, do not hold the same implications for SSMs. Through a detailed signal propagation analysis in SSMs, both forward and backward, we identify the appropriate scaling necessary for non-trivial feature evolution in the infinite-width limit. Our proposed scaling shows behavior akin to the Maximal Update Parameterization, such as improved stability, better generalization, and transferability of optimal hyper-parameters from small to large scale SSMs.", "keywords": "State space models;feature learning;hyperparameter transfer;scaling theory;deep learning", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Leena Chennuru Vankadara;Jin Xu;Moritz Haas;Volkan Cevher", "authorids": "~Leena_Chennuru_Vankadara2;~Jin_Xu7;~Moritz_Haas1;~Volkan_Cevher1", "gender": "F;M;;M", "homepage": "https://leenacvankadara.com;https://jinxu06.github.io/;https://www.tml.cs.uni-tuebingen.de/team/haas/index.php;http://lions.epfl.ch", "dblp": ";97/3265-11;332/4834;70/5301", "google_scholar": ";b5JQt5QAAAAJ;https://scholar.google.com/citations?view_op=list_works;https://scholar.google.ch/citations?user=hlWhzU8AAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Leena_Chennuru_Vankadara2;~Jin_Xu7;~Moritz_Haas1;~Volkan_Cevher1", "aff": "Amazon Development Center Germany;Microsoft;Amazon;Amazon Development Center Germany", "aff_domain": "amazon.de;microsoft.com;amazon.com;amazon.de", "position": "Applied Scientist II;Researcher;Intern;Amazon Scholar", "bibtex": "@inproceedings{\nvankadara2024on,\ntitle={On Feature Learning in Structured State Space Models},\nauthor={Leena Chennuru Vankadara and Jin Xu and Moritz Haas and Volkan Cevher},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=aQv5AbN1wF}\n}", "github": "", "reviewers": "JuBx;QtDa;iUa6", "pdf_size": 2020158, "rating": "6;6;7", "confidence": "3;3;3", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;3;2", "wc_summary": "85;133;99", "wc_strengths": "54;88;108", "wc_weaknesses": "60;155;184", "wc_questions": "95;115;1", "wc_limitations": "37;1;1", "wc_review": "331;492;393", "wc_reply_reviewers": "24;46;31", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 105.66666666666667, 20.154955277107966 ], "wc_strengths_avg": [ 83.33333333333333, 22.291004663067316 ], "wc_weaknesses_avg": [ 133.0, 52.959103718498355 ], "wc_questions_avg": [ 70.33333333333333, 49.701330185642135 ], "wc_limitations_avg": [ 13.0, 16.97056274847714 ], "wc_review_avg": [ 405.3333333333333, 66.30401362076222 ], "wc_reply_reviewers_avg": [ 33.666666666666664, 9.177266598624136 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=380086638723103308&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 0, "email": "amazon.de;microsoft.com;amazon.com;amazon.de", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Amazon;Microsoft", "aff_unique_dep": "Development Center;Microsoft Corporation", "aff_unique_url": "https://www.amazon.de;https://www.microsoft.com", "aff_unique_abbr": "Amazon;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "Germany;United States" }, { "title": "Improved Regret for Bandit Convex Optimization with Delayed Feedback", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94556", "id": "aR9JvkOGjM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=aR9JvkOGjM", "openreview": "https://openreview.net/forum?id=aR9JvkOGjM", "poster": "/media/PosterPDFs/NeurIPS%202024/94556.png?t=1730907699.2791035", "project": "", "author_site": "Yuanyu Wan, Chang Yao, Mingli Song, Lijun Zhang", "tldr": "", "abstract": "We investigate bandit convex optimization (BCO) with delayed feedback, where only the loss value of the action is revealed under an arbitrary delay. Let $n,T,\\bar{d}$ denote the dimensionality, time horizon, and average delay, respectively. Previous studies have achieved an $O(\\sqrt{n}T^{3/4}+(n\\bar{d})^{1/3}T^{2/3})$ regret bound for this problem, whose delay-independent part matches the regret of the classical non-delayed bandit gradient descent algorithm. However, there is a large gap between its delay-dependent part, i.e., $O((n\\bar{d})^{1/3}T^{2/3})$, and an existing $\\Omega(\\sqrt{\\bar{d}T})$ lower bound. In this paper, we illustrate that this gap can be filled in the worst case, where $\\bar{d}$ is very close to the maximum delay $d$. Specifically, we first develop a novel algorithm, and prove that it enjoys a regret bound of $O(\\sqrt{n}T^{3/4}+\\sqrt{dT})$ in general. Compared with the previous result, our regret bound is better for $d=O((n\\bar{d})^{2/3}T^{1/3})$, and the delay-dependent part is tight in the worst case. The primary idea is to decouple the joint effect of the delays and the bandit feedback on the regret by carefully incorporating the delayed bandit feedback with a blocking update mechanism. Furthermore, we show that the proposed algorithm can improve the regret bound to $O((nT)^{2/3}\\log^{1/3}T+d\\log T)$ for strongly convex functions. Finally, if the action sets are unconstrained, we demonstrate that it can be simply extended to achieve an $O(n\\sqrt{T\\log T}+d\\log T)$ regret bound for strongly convex and smooth functions.", "keywords": "Bandit Convex Optimization;Delayed Feedback;Improved Regret", "primary_area": "online_learning", "supplementary_material": "", "author": "Yuanyu Wan;Chang Yao;Mingli Song;Lijun Zhang", "authorids": "~Yuanyu_Wan1;~Chang_Yao2;~Mingli_Song1;~Lijun_Zhang1", "gender": "M;;M;", "homepage": "https://yuanyuwan.github.io/;;https://person.zju.edu.cn/msong;", "dblp": "221/3499;;71/5333;", "google_scholar": "CEymMc8AAAAJ;;7oLbhAwAAAAJ;", "orcid": ";;0000-0003-2621-6048;", "linkedin": ";;;", "or_profile": "~Yuanyu_Wan1;~Chang_Yao2;~Mingli_Song1;~Lijun_Zhang1", "aff": "Zhejiang University;;Zhejiang University;", "aff_domain": "zju.edu.cn;;zju.edu.cn;", "position": "Researcher;;Full Professor;", "bibtex": "@inproceedings{\nwan2024improved,\ntitle={Improved Regret for Bandit Convex Optimization with Delayed Feedback},\nauthor={Yuanyu Wan and Chang Yao and Mingli Song and Lijun Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=aR9JvkOGjM}\n}", "github": "", "reviewers": "S7rX;GZvs;knrh;pbhB;vCin", "pdf_size": 418901, "rating": "5;6;7;7;7", "confidence": "3;4;3;3;4", "soundness": "3;3;3;3;3", "novelty": "2;3;3;3;3", "presentation": "3;2;4;3;4", "wc_summary": "120;56;108;85;166", "wc_strengths": "38;35;113;95;61", "wc_weaknesses": "55;71;14;19;49", "wc_questions": "21;36;33;5;65", "wc_limitations": "11;9;11;25;1", "wc_review": "245;207;279;229;342", "wc_reply_reviewers": "14;7;11;11;17", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.4, 0.7999999999999999 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 107.0, 36.70422319025428 ], "wc_strengths_avg": [ 68.4, 30.955451862313364 ], "wc_weaknesses_avg": [ 41.6, 21.77705214210592 ], "wc_questions_avg": [ 32.0, 19.778776504121787 ], "wc_limitations_avg": [ 11.4, 7.735631842325486 ], "wc_review_avg": [ 260.4, 47.072709715927765 ], "wc_reply_reviewers_avg": [ 12.0, 3.3466401061363023 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.1020620726159658, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8261587121936463388&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "zju.edu.cn;;zju.edu.cn;", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Zhejiang University", "aff_unique_dep": "", "aff_unique_url": "https://www.zju.edu.cn", "aff_unique_abbr": "ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Towards Open-Vocabulary Semantic Segmentation Without Semantic Labels", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94555", "id": "aRhxruC2bi", "proceeding": "", "pdf": "https://openreview.net/pdf?id=aRhxruC2bi", "openreview": "https://openreview.net/forum?id=aRhxruC2bi", "poster": "", "project": "", "author_site": "Heeseong Shin, Chaehyun Kim, Sunghwan Hong, Seokju Cho, Anurag Arnab, Paul Hongsuck Seo, Seungryong Kim", "tldr": "", "abstract": "Large-scale vision-language models like CLIP have demonstrated impressive open-vocabulary capabilities for image-level tasks, excelling in recognizing what objects are present. However, they struggle with pixel-level recognition tasks like semantic segmentation, which require understanding where the objects are located. In this work, we propose a novel method, PixelCLIP, to adapt the CLIP image encoder for pixel-level understanding by guiding the model on where, which is achieved using unlabeled images and masks generated from vision foundation models such as SAM and DINO. To address the challenges of leveraging masks without semantic labels, we devise an online clustering algorithm using learnable class names to acquire general semantic concepts. PixelCLIP shows significant performance improvements over CLIP and competitive results compared to caption-supervised methods in open-vocabulary semantic segmentation.", "keywords": "semantic segmentation;open-vocabulary segmentation;vision-language models;fine-tuning foundation models", "primary_area": "machine_vision", "supplementary_material": "", "author": "Heeseong Shin;Chaehyun Kim;Sunghwan Hong;Seokju Cho;Anurag Arnab;Paul Hongsuck Seo;Seungryong Kim", "authorids": "~Heeseong_Shin2;~Chaehyun_Kim1;~Sunghwan_Hong2;~Seokju_Cho1;~Anurag_Arnab1;~Paul_Hongsuck_Seo1;~Seungryong_Kim1", "gender": ";F;;M;;M;M", "homepage": "https://github.com/hsshin98;https://github.com/kchyun;;https://seokju-cho.github.io;;https://phseo.github.io;https://cvlab.korea.ac.kr/members/faculty", "dblp": "229/9090;;;294/4755;;172/0938;141/9955", "google_scholar": ";;;mvPfpnMAAAAJ;;https://scholar.google.co.kr/citations?user=Tp7U8_UAAAAJ;cIK1hS8AAAAJ", "orcid": ";;;0000-0003-1199-9596;;;", "linkedin": ";;;seokju-cho-43601b174/;;;", "or_profile": "~Heeseong_Shin2;~Chaehyun_Kim1;~Sunghwan_Hong2;~Seokju_Cho1;~Anurag_Arnab1;~Paul_Hongsuck_Seo1;~Seungryong_Kim1", "aff": "Korea University;Korea University;;Korea University;;Korea University;Korea University", "aff_domain": "korea.ac.kr;korea.ac.kr;;korea.ac.kr;;korea.ac.kr;korea.ac.kr", "position": "MS student;Undergrad student;;PhD student;;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nshin2024towards,\ntitle={Towards Open-Vocabulary Semantic Segmentation Without Semantic Labels},\nauthor={Heeseong Shin and Chaehyun Kim and Sunghwan Hong and Seokju Cho and Anurag Arnab and Paul Hongsuck Seo and Seungryong Kim},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=aRhxruC2bi}\n}", "github": "", "reviewers": "83hz;rHvB;kepm;Dx6v", "pdf_size": 4721764, "rating": "5;5;6;6", "confidence": "4;4;4;4", "soundness": "3;2;3;3", "novelty": "2;2;3;2", "presentation": "3;3;3;3", "wc_summary": "84;96;92;62", "wc_strengths": "52;50;93;76", "wc_weaknesses": "430;131;175;88", "wc_questions": "27;19;89;4", "wc_limitations": "13;2;20;1", "wc_review": "606;298;469;231", "wc_reply_reviewers": "39;12;139;13", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 83.5, 13.143439428094915 ], "wc_strengths_avg": [ 67.75, 17.80975856096876 ], "wc_weaknesses_avg": [ 206.0, 132.93419424662716 ], "wc_questions_avg": [ 34.75, 32.39116391857508 ], "wc_limitations_avg": [ 9.0, 7.905694150420948 ], "wc_review_avg": [ 401.0, 146.76341505974847 ], "wc_reply_reviewers_avg": [ 50.75, 52.08826643304613 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7444414792647390756&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "korea.ac.kr;korea.ac.kr;;korea.ac.kr;;korea.ac.kr;korea.ac.kr", "author_num": 7, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Korea University", "aff_unique_dep": "", "aff_unique_url": "https://www.korea.ac.kr", "aff_unique_abbr": "KU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Sequential Signal Mixing Aggregation for Message Passing Graph Neural Networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94554", "id": "aRokfUfIQs", "proceeding": "", "pdf": "https://openreview.net/pdf?id=aRokfUfIQs", "openreview": "https://openreview.net/forum?id=aRokfUfIQs", "poster": "/media/PosterPDFs/NeurIPS%202024/94554.png?t=1730151914.3733466", "project": "", "author_site": "Mitchell Keren Taraday, Almog David, Chaim Baskin", "tldr": "", "abstract": "Message Passing Graph Neural Networks (MPGNNs) have emerged as the preferred method for modeling complex interactions across diverse graph entities. While the theory of such models is well understood, their aggregation module has not received sufficient attention. Sum-based aggregators have solid theoretical foundations regarding their separation capabilities. However, practitioners often prefer using more complex aggregations and mixtures of diverse aggregations. In this work, we unveil a possible explanation for this gap. We claim that sum-based aggregators fail to \"mix\" features belonging to distinct neighbors, preventing them from succeeding at downstream tasks.\nTo this end, we introduce Sequential Signal Mixing Aggregation (SSMA), a novel plug-and-play aggregation for MPGNNs. SSMA treats the neighbor features as 2D discrete signals and sequentially convolves them, inherently enhancing the ability to mix features attributed to distinct neighbors. By performing extensive experiments, we show that when combining SSMA with well-established MPGNN architectures, we achieve substantial performance gains across various benchmarks, achieving new state-of-the-art results in many settings.\nWe published our code at https://almogdavid.github.io/SSMA/.", "keywords": "graph neural networks;message passing neural networks;invariant aggregation", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Mitchell Keren Taraday;Almog David;Chaim Baskin", "authorids": "~Mitchell_Keren_Taraday1;~Almog_David1;~Chaim_Baskin1", "gender": "M;M;M", "homepage": ";;https://chaimbaskin.bgu.ac.il", "dblp": ";;205/2308", "google_scholar": ";;https://scholar.google.co.il/citations?user=lfWCxJYAAAAJ", "orcid": ";;0000-0003-4341-5639", "linkedin": "mitchell-k-998081a9;www.linkedin.com/in/almog-david-68201794;chaim-baskin-97441340/", "or_profile": "~Mitchell_Keren_Taraday1;~Almog_David1;~Chaim_Baskin1", "aff": ";Computer Science Departmen, Technion-Israel Institute of Technology;Technion, Technion", "aff_domain": ";cs.technion.ac.il;technion.ac.il", "position": ";MS student;Visiting Assistant Professor", "bibtex": "@inproceedings{\ntaraday2024sequential,\ntitle={Sequential Signal Mixing Aggregation for Message Passing Graph Neural Networks},\nauthor={Mitchell Keren Taraday and Almog David and Chaim Baskin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=aRokfUfIQs}\n}", "github": "", "reviewers": "vJN5;aDFM;QvY7;4Dt8", "pdf_size": 1676490, "rating": "5;6;6;7", "confidence": "4;2;3;4", "soundness": "2;3;2;3", "novelty": "2;2;3;3", "presentation": "3;2;3;3", "wc_summary": "76;47;44;108", "wc_strengths": "66;43;118;54", "wc_weaknesses": "61;102;146;124", "wc_questions": "5;12;142;164", "wc_limitations": "8;1;1;18", "wc_review": "216;205;451;468", "wc_reply_reviewers": "19;40;12;17", "wc_reply_authors": "103;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "3;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 68.75, 25.878321042911576 ], "wc_strengths_avg": [ 70.25, 28.74347752099596 ], "wc_weaknesses_avg": [ 108.25, 31.403622402519108 ], "wc_questions_avg": [ 80.75, 72.7096107815191 ], "wc_limitations_avg": [ 7.0, 6.96419413859206 ], "wc_review_avg": [ 335.0, 124.70565344041144 ], "wc_reply_reviewers_avg": [ 22.0, 10.700467279516348 ], "wc_reply_authors_avg": [ 25.75, 44.60030829489859 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:FjwjW_4RgoIJ:scholar.google.com/&scioq=Sequential+Signal+Mixing+Aggregation+for+Message+Passing+Graph+Neural+Networks&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": ";cs.technion.ac.il;technion.ac.il", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Technion-Israel Institute of Technology;Technion - Israel Institute of Technology", "aff_unique_dep": "Computer Science Department;", "aff_unique_url": "https://www.technion.ac.il;https://www.technion.ac.il/en/", "aff_unique_abbr": "Technion;Technion", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Israel" }, { "title": "Enhancing Multiple Dimensions of Trustworthiness in LLMs via Sparse Activation Control", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94553", "id": "aSkckaNxnO", "proceeding": "", "pdf": "https://openreview.net/pdf?id=aSkckaNxnO", "openreview": "https://openreview.net/forum?id=aSkckaNxnO", "poster": "", "project": "", "author_site": "Yuxin Xiao, Wan Chaoqun, Yonggang Zhang, Wenxiao Wang, Binbin Lin, Xiaofei He, Xu Shen, Jieping Ye", "tldr": "", "abstract": "As the development and application of Large Language Models (LLMs) continue to advance rapidly, enhancing their trustworthiness and aligning them with human preferences has become a critical area of research. Traditional methods rely heavily on extensive data for Reinforcement Learning from Human Feedback (RLHF), but representation engineering offers a new, training-free approach. This technique leverages semantic features to control the representation of LLM's intermediate hidden states, enabling the model to meet specific requirements such as increased honesty or heightened safety awareness. However, a significant challenge arises when attempting to fulfill multiple requirements simultaneously. It proves difficult to encode various semantic contents, like honesty and safety, into a singular semantic feature, restricting its practicality.\nIn this work, we address this challenge through Sparse Activation Control. By delving into the intrinsic mechanisms of LLMs, we manage to identify and pinpoint modules that are closely related to specific tasks within the model, i.e. attention heads. These heads display sparse characteristics that allow for near-independent control over different tasks. Our experiments, conducted on the open-source Llama series models, have yielded encouraging results. The models were able to align with human preferences on issues of safety, factualness, and bias concurrently.", "keywords": "large language models", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Yuxin Xiao;Chaoqun Wan;Yonggang Zhang;Wenxiao Wang;Binbin Lin;Xiaofei He;Xu Shen;Jieping Ye", "authorids": "~Yuxin_Xiao2;~Chaoqun_Wan2;~Yonggang_Zhang1;~Wenxiao_Wang2;~Binbin_Lin3;~Xiaofei_He2;~Xu_Shen1;~Jieping_Ye4", "gender": "F;M;M;M;M;M;M;M", "homepage": "https://github.com/kuroo-fissh?tab=repositories;https://yonggangzhangben.github.io/index.html;https://wenxiaowang.com;https://www.linkedin.com/in/binbin-lin-03598b31/;https://person.zju.edu.cn/0007101;;http://yelabs.net/;http://staff.ustc.edu.cn/~xinmei/students.html", "dblp": ";27/6859-3;243/5853-1;51/8073;h/XiaofeiHe.html;09/10130-1.html;03/5454;208/4855", "google_scholar": ";XSbEr98AAAAJ;https://scholar.google.com.hk/citations?user=rcxOjikAAAAJ;Zmvq4KYAAAAJ;QLLFowsAAAAJ;38jwGs8AAAAJ;T9AzhwcAAAAJ;", "orcid": ";0000-0002-4080-7592;;0000-0002-0330-6406;0009-0001-9107-2354;;0000-0001-8662-5818;", "linkedin": ";;;;;;;", "or_profile": "~Yuxin_Xiao2;~Yonggang_Zhang1;~Wenxiao_Wang2;~Binbin_Lin3;~Xiaofei_He2;~Xu_Shen1;~Jieping_Ye4;~Wan_Chaoqun1", "aff": "Zhejiang University;Hong Kong Baptist University;Zhejiang University;Zhejiang University;Zhejiang University;Alibaba Group;Alibaba Group;Alibaba Group", "aff_domain": "zju.edu.cn;hkbu.edu.hk;zju.edu.cn;zju.edu.cn;zju.edu.cn;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com", "position": "PhD student;Postdoc;Assistant Professor;Researcher;Professor;Researcher;Principal Researcher;Researcher", "bibtex": "@inproceedings{\nxiao2024enhancing,\ntitle={Enhancing Multiple Dimensions of Trustworthiness in {LLM}s via Sparse Activation Control},\nauthor={Yuxin Xiao and Chaoqun Wan and Yonggang Zhang and Wenxiao Wang and Binbin Lin and Xiaofei He and Xu Shen and Jieping Ye},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=aSkckaNxnO}\n}", "github": "", "reviewers": "jrhB;3hNv;Zk9K", "pdf_size": 2490380, "rating": "5;6;6", "confidence": "2;4;2", "soundness": "3;3;3", "novelty": "2;3;2", "presentation": "3;3;3", "wc_summary": "73;122;195", "wc_strengths": "117;179;98", "wc_weaknesses": "153;176;189", "wc_questions": "7;40;109", "wc_limitations": "16;5;41", "wc_review": "366;522;632", "wc_reply_reviewers": "0;15;14", "wc_reply_authors": "0;38;47", "reply_reviewers": "0;1;1", "reply_authors": "1;2;2", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 2.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 130.0, 50.1265066273989 ], "wc_strengths_avg": [ 131.33333333333334, 34.58644564308715 ], "wc_weaknesses_avg": [ 172.66666666666666, 14.884742374510738 ], "wc_questions_avg": [ 52.0, 42.49705872175156 ], "wc_limitations_avg": [ 20.666666666666668, 15.062831370260005 ], "wc_review_avg": [ 506.6666666666667, 109.1339645675086 ], "wc_reply_reviewers_avg": [ 9.666666666666666, 6.847546194724712 ], "wc_reply_authors_avg": [ 28.333333333333332, 20.368821489936252 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4358268763776891579&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "zju.edu.cn;hkbu.edu.hk;zju.edu.cn;zju.edu.cn;zju.edu.cn;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com", "author_num": 8, "aff_unique_index": "0;1;0;0;0;2;2;2", "aff_unique_norm": "Zhejiang University;Hong Kong Baptist University;Alibaba Group", "aff_unique_dep": ";;", "aff_unique_url": "https://www.zju.edu.cn;https://www.hkbu.edu.hk;https://www.alibaba.com", "aff_unique_abbr": "ZJU;HKBU;Alibaba", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "SureMap: Simultaneous mean estimation for single-task and multi-task disaggregated evaluation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94552", "id": "aTNT3FuVBG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=aTNT3FuVBG", "openreview": "https://openreview.net/forum?id=aTNT3FuVBG", "poster": "", "project": "", "author_site": "Misha Khodak, Lester Mackey, Alexandra Chouldechova, Miro Dudik", "tldr": "", "abstract": "Disaggregated evaluation—estimation of performance of a machine learning model on different subpopulations—is a core task when assessing performance and group-fairness of AI systems.\nA key challenge is that evaluation data is scarce, and subpopulations arising from intersections of attributes (e.g., race, sex, age) are often tiny.\nToday, it is common for multiple clients to procure the same AI model from a model developer, and the task of disaggregated evaluation is faced by each customer individually. This gives rise to what we call the *multi-task disaggregated evaluation problem*, wherein multiple clients seek to conduct a disaggregated evaluation of a given model in their own data setting (task). In this work we develop a disaggregated evaluation method called **SureMap** that has high estimation accuracy for both multi-task *and* single-task disaggregated evaluations of blackbox models. SureMap's efficiency gains come from\n(1) transforming the problem into structured simultaneous Gaussian mean estimation and (2) incorporating external data, e.g., from the AI system creator or from their other clients. Our method combines *maximum a posteriori* (MAP) estimation using a well-chosen prior together with cross-validation-free tuning via Stein's unbiased risk estimate (SURE).\nWe evaluate SureMap on disaggregated evaluation tasks in multiple domains, observing significant accuracy improvements over several strong competitors.", "keywords": "fairness;evaluation;multi-task;transfer", "primary_area": "fairness", "supplementary_material": "/attachment/472f8661a575ab9f998ffc875cde53417c58d0ce.zip", "author": "Mikhail Khodak;Lester Mackey;Alexandra Chouldechova;Miroslav Dud\u00edk", "authorids": "~Mikhail_Khodak1;~Lester_Mackey1;~Alexandra_Chouldechova1;~Miroslav_Dud\u00edk1", "gender": ";M;F;", "homepage": ";https://stanford.edu/~lmackey;http://www.andrew.cmu.edu/user/achoulde/;", "dblp": ";05/2961;125/8515;30/2146", "google_scholar": ";erv7TP0AAAAJ;https://scholar.google.com/citations?hl=en;wYMTld8AAAAJ", "orcid": ";0000-0002-1102-0387;;", "linkedin": ";lester-mackey-5902909;;", "or_profile": "~Mikhail_Khodak1;~Lester_Mackey1;~Alexandra_Chouldechova1;~Miroslav_Dud\u00edk1", "aff": ";Microsoft Research New England;Carnegie Mellon University;Microsoft", "aff_domain": ";microsoft.com;cmu.edu;microsoft.com", "position": ";Principal Researcher;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nkhodak2024suremap,\ntitle={SureMap: Simultaneous mean estimation for single-task and multi-task disaggregated evaluation},\nauthor={Mikhail Khodak and Lester Mackey and Alexandra Chouldechova and Miroslav Dud{\\'\\i}k},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=aTNT3FuVBG}\n}", "github": "", "reviewers": "UdGz;3ZUA;3rXK;gnhf;LQxL", "pdf_size": 7829466, "rating": "5;5;5;6;7", "confidence": "2;2;3;3;4", "soundness": "3;3;3;3;3", "novelty": "2;3;3;2;3", "presentation": "3;3;3;3;3", "wc_summary": "56;61;66;88;176", "wc_strengths": "41;60;55;139;85", "wc_weaknesses": "24;65;34;132;62", "wc_questions": "28;83;84;127;55", "wc_limitations": "12;6;1;105;12", "wc_review": "161;275;240;591;390", "wc_reply_reviewers": "9;0;0;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;0;0;0;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.6, 0.8 ], "confidence_avg": [ 2.8, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 89.4, 44.65691435824916 ], "wc_strengths_avg": [ 76.0, 34.56009259246856 ], "wc_weaknesses_avg": [ 63.4, 37.75499966891802 ], "wc_questions_avg": [ 75.4, 33.03694901167479 ], "wc_limitations_avg": [ 27.2, 39.11725961771862 ], "wc_review_avg": [ 331.4, 149.26298938450884 ], "wc_reply_reviewers_avg": [ 1.8, 3.6000000000000005 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8685990362153793, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:9vv7CPWdl2IJ:scholar.google.com/&scioq=SureMap:+Simultaneous+mean+estimation+for+single-task+and+multi-task+disaggregated+evaluation&hl=en&as_sdt=0,33", "gs_version_total": 5, "email": ";microsoft.com;cmu.edu;microsoft.com", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Microsoft;Carnegie Mellon University", "aff_unique_dep": "Microsoft Research;", "aff_unique_url": "https://www.microsoft.com/en-us/research/group/microsoft-research-new-england;https://www.cmu.edu", "aff_unique_abbr": "MSR NE;CMU", "aff_campus_unique_index": "0", "aff_campus_unique": "New England;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "aTXhTD44nF", "title": "USDC: A Dataset of $\\underline{U}$ser $\\underline{S}$tance and $\\underline{D}$ogmatism in Long $\\underline{C}$onversations", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "Identifying user's opinions and stances in long conversation threads on various topics can be extremely critical for enhanced personalization, market research, political campaigns, customer service, conflict resolution, targeted advertising and content moderation. Hence, training language models to automate this task is critical. However, to train such models, gathering manual annotations has multiple challenges: 1) it is both time-consuming and costly; 2) conversation threads could be very long increasing chances of noisy annotations; and 3) interpreting instances where a user changes their opinion within a conversation is difficult because often times such transitions are subtle and not expressed explicitly. Inspired by the recent success of large language models (LLMs) for complex natural language processing (NLP) tasks, we leverage Mistral Large and GPT-4 for automating the human annotation process on the following two tasks while also providing reasoning: i) user Stance detection, which involves labeling a user's stance of a post in a conversation on a five-point scale; ii) user Dogmatism detection, which deals with labeling a user's overall opinion in the conversation on a four-point scale. Majority voting on zero-shot, one-shot, few-shot annotations from these two LLMs on 764 multi-user Reddit conversations helps us curate the USDC dataset. USDC is then used to finetune and instruction-tune multiple deployable small language models for the 5-class stance and 4-class dogmatism classification tasks. We make the code and dataset publicly available [https://anonymous.4open.science/r/USDC-0F7F].", "keywords": "large language models;annotators;user opinions;stance;dogmatism;human-llm alignment;open-source llms;closed-source llms", "primary_area": "", "supplementary_material": "/attachment/6c25469d906f9f5fc09e9522b8b101e99fb6981d.pdf", "author": "mounika marreddy;SUBBA REDDY OOTA;Venkata Charan Chinni;Manish Gupta;Lucie Flek", "authorids": "~mounika_marreddy1;~SUBBA_REDDY_OOTA1;~Venkata_Charan_Chinni1;~Manish_Gupta1;~Lucie_Flek1", "gender": "F;M;M;M;F", "homepage": ";https://sites.google.com/view/subbareddyoota300/home?authuser=0;;https://sites.google.com/view/manishg/;https://caisa-lab.github.io", "dblp": "206/3366;190/1709;;g/ManishGupta1.html;268/1049", "google_scholar": "Ikqyo5sAAAAJ;https://scholar.google.co.in/citations?user=4Uz0LngAAAAJ;yHwdZLcAAAAJ;https://scholar.google.co.in/citations?user=eX9PSu0AAAAJ;qZCZFp0AAAAJ", "orcid": ";0000-0002-5975-622X;;0000-0002-2843-3110;0000-0002-5995-8454", "linkedin": ";subba-reddy-oota-11a91254/;venkata-charan-chinni-4a569615a/;manishsgupta/;flekova/", "or_profile": "~mounika_marreddy1;~SUBBA_REDDY_OOTA1;~Venkata_Charan_Chinni1;~Manish_Gupta1;~Lucie_Flekova1", "aff": "Rheinische Friedrich-Wilhelms Universit\u00e4t Bonn;MPI-SWS;International Institute of Information Technology Hyderabad;Microsoft;Rheinische Friedrich-Wilhelms Universit\u00e4t Bonn", "aff_domain": "uni-bonn.de;mpi-sws.org;iiit.ac.in;microsoft.com;uni-bonn.de", "position": "Postdoc;Visiting Scholar;Undergrad student;Principal Researcher;Full Professor", "bibtex": "@misc{\nanonymous2024usdc,\ntitle={{USDC}: A Dataset of \\${\\textbackslash}underline\\{U\\}\\$ser \\${\\textbackslash}underline\\{S\\}\\$tance and \\${\\textbackslash}underline\\{D\\}\\$ogmatism in Long \\${\\textbackslash}underline\\{C\\}\\$onversations},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=aTXhTD44nF}\n}", "github": "", "project": "", "reviewers": "w9xr;nhxb;oKi5;7SSx;DbAk;KbQd", "site": "https://openreview.net/forum?id=aTXhTD44nF", "pdf_size": 1108996, "rating": "4;4;4;4;6;7", "confidence": "4;4;4;5;4;4", "wc_summary_and_contributions": "109;73;16;40;40;118", "wc_strengths": "45;57;24;64;9;43", "wc_improvement": "36;24;97;557;66;56", "wc_limitations": "18;10;7;26;8;61", "wc_correctness": "17;1;18;7;1;23", "wc_clarity": "186;6;5;4;1;1", "wc_relation_to_prior_work": "26;20;13;5;6;1", "wc_documentation": "21;1;10;2;1;1", "wc_additional_feedback": "1;1;1;1;1;1", "wc_review": "459;193;191;706;133;305", "wc_reply_reviewers": "36;53;32;0;12;64", "wc_reply_authors": "364;182;208;165;15;243", "reply_reviewers": "1;1;1;0;1;1", "reply_authors": "5;6;4;5;2;3", "rating_avg": [ 4.833333333333333, 1.2133516482134197 ], "confidence_avg": [ 4.166666666666667, 0.37267799624996495 ], "wc_summary_and_contributions_avg": [ 66.0, 37.53664875824692 ], "wc_strengths_avg": [ 40.333333333333336, 18.77646280734355 ], "wc_improvement_avg": [ 139.33333333333334, 188.21057946412634 ], "wc_limitations_avg": [ 21.666666666666668, 18.785337071473826 ], "wc_correctness_avg": [ 11.166666666666666, 8.610394235392993 ], "wc_clarity_avg": [ 33.833333333333336, 68.07695808584738 ], "wc_relation_to_prior_work_avg": [ 11.833333333333334, 8.82074574826616 ], "wc_documentation_avg": [ 6.0, 7.438637868140466 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 331.1666666666667, 198.1299377232583 ], "wc_reply_reviewers_avg": [ 32.833333333333336, 21.988001778747932 ], "wc_reply_authors_avg": [ 196.16666666666666, 103.6780540369508 ], "reply_reviewers_avg": [ 0.8333333333333334, 0.37267799624996495 ], "reply_authors_avg": [ 4.166666666666667, 1.3437096247164249 ], "replies_avg": [ 40, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.3071475584169756, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "Rheinische Friedrich-Wilhelms Universit\u00e4t Bonn;Max Planck Institute for Software Systems;International Institute of Information Technology;Microsoft", "aff_unique_dep": ";;;Microsoft Corporation", "aff_unique_url": "https://www.uni-bonn.de/;https://www.mpi-sws.org;https://iiit Hyderabad.ac.in;https://www.microsoft.com", "aff_unique_abbr": "Uni Bonn;MPI-SWS;IIIT Hyderabad;Microsoft", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hyderabad", "aff_country_unique_index": "0;0;1;2;0", "aff_country_unique": "Germany;India;United States" }, { "title": "MotionTTT: 2D Test-Time-Training Motion Estimation for 3D Motion Corrected MRI", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94551", "id": "aUHSwmHRVb", "proceeding": "", "pdf": "https://openreview.net/pdf?id=aUHSwmHRVb", "openreview": "https://openreview.net/forum?id=aUHSwmHRVb", "poster": "/media/PosterPDFs/NeurIPS%202024/94551.png?t=1733393682.9980714", "project": "", "author_site": "Tobit Klug, Kun Wang, Stefan Ruschke, Reinhard Heckel", "tldr": "", "abstract": "A major challenge of the long measurement times in magnetic resonance imaging (MRI), an important medical imaging technology, is that patients may move during data acquisition. This leads to severe motion artifacts in the reconstructed images and volumes. In this paper, we propose MotionTTT a deep learning-based test-time-training (TTT) method for accurate motion estimation. The key idea is that a neural network trained for motion-free reconstruction has a small loss if there is no motion, thus optimizing over motion parameters passed through the reconstruction network enables accurate estimation of motion. The estimated motion parameters enable to correct for the motion and to reconstruct accurate motion-corrected images. Our method uses 2D reconstruction networks to estimate rigid motion in 3D, and constitutes the first deep learning based method for 3D rigid motion estimation towards 3D-motion-corrected MRI. We show that our method can provably reconstruct motion parameters for a simple signal and neural network model. We demonstrate the effectiveness of our method for both retrospectively simulated motion and prospectively collected real motion-corrupted data. Code is available at \\url{https://github.com/MLI-lab/MRI_MotionTTT}.", "keywords": "deep learning based motion estimation;3D imaging;MRI;motion artifacts;medical imaging;test-time-training;motion correction", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "/attachment/2e9f4b097bfa4ac39b3d3093fd909281608312c3.zip", "author": "Tobit Klug;Kun Wang;Stefan Ruschke;Reinhard Heckel", "authorids": "~Tobit_Klug1;~Kun_Wang23;~Stefan_Ruschke1;~Reinhard_Heckel1", "gender": "M;M;M;M", "homepage": "https://www.ce.cit.tum.de/mli/people/tobit-klug/;;http://www.bmrr.de;", "dblp": "330/2923;;;81/9668", "google_scholar": ";;;ZWV0I7cAAAAJ", "orcid": ";;0000-0001-9658-6541;", "linkedin": ";kun-wang-202a202ab/;;", "or_profile": "~Tobit_Klug1;~Kun_Wang23;~Stefan_Ruschke1;~Reinhard_Heckel1", "aff": "Technische Universit\u00e4t M\u00fcnchen;Technische Universit\u00e4t M\u00fcnchen;Technische Universit\u00e4t M\u00fcnchen;Rice University", "aff_domain": "tum.de;tum.de;tum.de;rice.edu", "position": "PhD student;MS student;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nklug2024motionttt,\ntitle={Motion{TTT}: 2D Test-Time-Training Motion Estimation for 3D Motion Corrected {MRI}},\nauthor={Tobit Klug and Kun Wang and Stefan Ruschke and Reinhard Heckel},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=aUHSwmHRVb}\n}", "github": "", "reviewers": "H96H;4RZt;fV1o;pjnK", "pdf_size": 21930362, "rating": "3;4;4;6", "confidence": "3;2;4;4", "soundness": "2;2;2;3", "novelty": "2;2;3;3", "presentation": "1;2;3;3", "wc_summary": "63;55;57;47", "wc_strengths": "42;22;63;127", "wc_weaknesses": "181;90;169;186", "wc_questions": "99;3;100;8", "wc_limitations": "14;4;81;8", "wc_review": "399;174;470;376", "wc_reply_reviewers": "0;28;0;50", "wc_reply_authors": "0;62;0;159", "reply_reviewers": "0;1;0;1", "reply_authors": "1;2;1;2", "rating_avg": [ 4.25, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 55.5, 5.722761571129799 ], "wc_strengths_avg": [ 63.5, 39.423977475642914 ], "wc_weaknesses_avg": [ 156.5, 38.88765871070152 ], "wc_questions_avg": [ 52.5, 47.03456176047567 ], "wc_limitations_avg": [ 26.75, 31.522809202226885 ], "wc_review_avg": [ 354.75, 109.95766230690792 ], "wc_reply_reviewers_avg": [ 19.5, 20.994046775217015 ], "wc_reply_authors_avg": [ 55.25, 65.02835919812217 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.48420012470625223, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1905781321696318910&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "tum.de;tum.de;tum.de;rice.edu", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Technische Universit\u00e4t M\u00fcnchen;Rice University", "aff_unique_dep": ";", "aff_unique_url": "https://www.tum.de;https://www.rice.edu", "aff_unique_abbr": "TUM;Rice", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "Germany;United States" }, { "title": "Evaluating the World Model Implicit in a Generative Model", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94550", "id": "aVK4JFpegy", "proceeding": "", "pdf": "https://openreview.net/pdf?id=aVK4JFpegy", "openreview": "https://openreview.net/forum?id=aVK4JFpegy", "poster": "", "project": "", "author_site": "Keyon Vafa, Justin Chen, Ashesh Rambachan, Jon Kleinberg, Sendhil Mullainathan", "tldr": "", "abstract": "Recent work suggests that large language models may implicitly learn world models. How should we assess this possibility? We formalize this question for the case where the underlying reality is governed by a deterministic finite automaton. This includes problems as diverse as simple logical reasoning, geographic navigation, game-playing, and chemistry. We propose new evaluation metrics for world model recovery inspired by the classic Myhill-Nerode theorem from language theory. We illustrate their utility in three domains: game playing, logic puzzles, and navigation. In all domains, the generative models we consider do well on existing diagnostics for assessing world models, but our evaluation metrics reveal their world models to be far less coherent than they appear. Such incoherence creates fragility: using a generative model to solve related but subtly different tasks can lead to failures. Building generative models that meaningfully capture the underlying logic of the domains they model would be immensely valuable; our results suggest new ways to assess how close a given model is to that goal.", "keywords": "world models;large language models;evaluation", "primary_area": "evaluation", "supplementary_material": "", "author": "Keyon Vafa;Justin Y. Chen;Ashesh Rambachan;Jon Kleinberg;Sendhil Mullainathan", "authorids": "~Keyon_Vafa1;~Justin_Y._Chen1;~Ashesh_Rambachan1;~Jon_Kleinberg3;~Sendhil_Mullainathan2", "gender": "M;M;M;M;M", "homepage": "http://www.keyonvafa.com;https://asheshrambachan.github.io/;http://www.cs.cornell.edu/home/kleinber/;https://www.chicagobooth.edu/faculty/directory/m/sendhil-mullainathan;https://people.csail.mit.edu/justc/", "dblp": ";249/2625;https://dblp.uni-trier.de/pid/k/JonMKleinberg.html;25/169;254/0805.html", "google_scholar": ";https://scholar.google.com/citations?hl=en;VX7d5EQAAAAJ;oExfyEkAAAAJ;X_myU1YAAAAJ", "orcid": ";;0000-0002-1929-2512;;", "linkedin": ";ashesh-rambachan/;;;", "or_profile": "~Keyon_Vafa1;~Ashesh_Rambachan1;~Jon_Kleinberg3;~Sendhil_Mullainathan2;~Justin_Y_Chen1", "aff": "Columbia University;Massachusetts Institute of Technology;;University of Chicago;Massachusetts Institute of Technology", "aff_domain": "columbia.edu;mit.edu;;uchicago.edu;mit.edu", "position": "PhD student;Assistant Professor;;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nvafa2024evaluating,\ntitle={Evaluating the World Model Implicit in a Generative Model},\nauthor={Keyon Vafa and Justin Y. Chen and Ashesh Rambachan and Jon Kleinberg and Sendhil Mullainathan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=aVK4JFpegy}\n}", "github": "", "reviewers": "RjHx;S5LR;YkFb;7KXV", "pdf_size": 11260313, "rating": "3;4;5;9", "confidence": "5;4;1;4", "soundness": "2;3;3;4", "novelty": "2;2;3;4", "presentation": "3;3;3;4", "wc_summary": "197;176;75;94", "wc_strengths": "60;29;127;80", "wc_weaknesses": "462;371;58;103", "wc_questions": "17;34;1;50", "wc_limitations": "11;48;1;5", "wc_review": "747;658;262;332", "wc_reply_reviewers": "1530;99;0;13", "wc_reply_authors": "2560;235;70;0", "reply_reviewers": "4;1;0;1", "reply_authors": "6;2;2;1", "rating_avg": [ 5.25, 2.277608394786075 ], "confidence_avg": [ 3.5, 1.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 135.5, 51.97355096585185 ], "wc_strengths_avg": [ 74.0, 35.58791929854849 ], "wc_weaknesses_avg": [ 248.5, 171.791297800558 ], "wc_questions_avg": [ 25.5, 18.33712082089225 ], "wc_limitations_avg": [ 16.25, 18.673175948402566 ], "wc_review_avg": [ 499.75, 206.6644321115755 ], "wc_reply_reviewers_avg": [ 410.5, 647.4621610565362 ], "wc_reply_authors_avg": [ 716.25, 1067.9031732793007 ], "reply_reviewers_avg": [ 1.5, 1.5 ], "reply_authors_avg": [ 2.75, 1.920286436967152 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.10976425998969036, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16129125609759293654&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 4, "email": "columbia.edu;mit.edu;;uchicago.edu;mit.edu", "author_num": 5, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "Columbia University;Massachusetts Institute of Technology;University of Chicago", "aff_unique_dep": ";;", "aff_unique_url": "https://www.columbia.edu;https://web.mit.edu;https://www.uchicago.edu", "aff_unique_abbr": "Columbia;MIT;UChicago", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "4+3 Phases of Compute-Optimal Neural Scaling Laws", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94549", "id": "aVSxwicpAk", "proceeding": "", "pdf": "https://openreview.net/pdf?id=aVSxwicpAk", "openreview": "https://openreview.net/forum?id=aVSxwicpAk", "poster": "/media/PosterPDFs/NeurIPS%202024/94549.png?t=1732152088.4649322", "project": "", "author_site": "Elliot Paquette, Courtney Paquette, Lechao Xiao, Jeffrey Pennington", "tldr": "", "abstract": "We consider the solvable neural scaling model with three parameters: data complexity, target complexity, and model-parameter-count. We use this neural scaling model to derive new predictions about the compute-limited, infinite-data scaling law regime. To train the neural scaling model, we run one-pass stochastic gradient descent on a mean-squared loss. We derive a representation of the loss curves which holds over all iteration counts and improves in accuracy as the model parameter count grows. We then analyze the compute-optimal model-parameter-count, and identify 4 phases (+3 subphases) in the data-complexity/target-complexity phase-plane. The phase boundaries are determined by the relative importance of model capacity, optimizer noise, and embedding of the features. We furthermore derive, with mathematical proof and extensive numerical evidence, the scaling-law exponents in all of these phases, in particular computing the optimal model-parameter-count as a function of floating point operation budget. We include a colab notebook https://tinyurl.com/2saj6bkj, nanoChinchilla, that reproduces some key results of the paper.", "keywords": "scaling laws;compute-optimal curves;high-dimensional probability/statistics;random matrix theory;stochastic optimization", "primary_area": "learning_theory", "supplementary_material": "", "author": "Elliot Paquette;Courtney Paquette;Lechao Xiao;Jeffrey Pennington", "authorids": "~Elliot_Paquette1;~Courtney_Paquette1;~Lechao_Xiao2;~Jeffrey_Pennington1", "gender": "M;M;M;F", "homepage": "https://elliotpaquette.github.io;https://sites.google.com/site/lechaoxiao/;;https://cypaquette.github.io/", "dblp": "126/6986;222/3238;https://dblp.org/pers/p/Pennington:Jeffrey.html;https://dblp.uni-trier.de/pers/hd/p/Paquette:Courtney", "google_scholar": ";fvwzUnIAAAAJ;cn_FoswAAAAJ;EkeZG30AAAAJ", "orcid": "0000-0003-4156-6687;;;", "linkedin": ";;jpennin;", "or_profile": "~Elliot_Paquette1;~Lechao_Xiao2;~Jeffrey_Pennington1;~Courtney_Yumiko_Paquette1", "aff": "McGill University;Google DeepMind;Google;Google", "aff_domain": "mcgill.ca;google.com;google.com;google.com", "position": "Associate Professor;Researcher;Research Scientist;Research Scientist", "bibtex": "@inproceedings{\npaquette2024,\ntitle={4+3 Phases of Compute-Optimal Neural Scaling Laws},\nauthor={Elliot Paquette and Courtney Paquette and Lechao Xiao and Jeffrey Pennington},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=aVSxwicpAk}\n}", "github": "", "reviewers": "rEuG;wrFd;9ixz;ahLt;HQ3F", "pdf_size": 14140096, "rating": "6;6;7;7;7", "confidence": "2;4;3;3;3", "soundness": "2;4;4;3;3", "novelty": "2;3;3;3;3", "presentation": "1;2;3;3;3", "wc_summary": "61;113;96;78;95", "wc_strengths": "49;83;65;25;49", "wc_weaknesses": "33;436;44;391;50", "wc_questions": "378;313;45;2;66", "wc_limitations": "13;37;1;2;115", "wc_review": "534;982;251;498;375", "wc_reply_reviewers": "57;378;11;39;82", "wc_reply_authors": "0;488;0;1052;0", "reply_reviewers": "1;2;1;1;1", "reply_authors": "1;2;1;3;1", "rating_avg": [ 6.6, 0.48989794855663565 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.4, 0.8 ], "wc_summary_avg": [ 88.6, 17.692936443677176 ], "wc_strengths_avg": [ 54.2, 19.249935064825547 ], "wc_weaknesses_avg": [ 190.8, 182.4712580106796 ], "wc_questions_avg": [ 160.8, 153.59348944535375 ], "wc_limitations_avg": [ 33.6, 42.71580503747998 ], "wc_review_avg": [ 528.0, 247.86689976679017 ], "wc_reply_reviewers_avg": [ 113.4, 134.31396055511135 ], "wc_reply_authors_avg": [ 308.0, 417.25963140471663 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4953040340425616409&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "mcgill.ca;google.com;google.com;google.com", "author_num": 4, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "McGill University;Google", "aff_unique_dep": ";Google DeepMind", "aff_unique_url": "https://www.mcgill.ca;https://deepmind.com", "aff_unique_abbr": "McGill;DeepMind", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;2;2", "aff_country_unique": "Canada;United Kingdom;United States" }, { "title": "Learning to grok: Emergence of in-context learning and skill composition in modular arithmetic tasks", "status": "Oral", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94548", "id": "aVh9KRZdRk", "proceeding": "", "pdf": "https://openreview.net/pdf?id=aVh9KRZdRk", "openreview": "https://openreview.net/forum?id=aVh9KRZdRk", "poster": "", "project": "", "author_site": "Tianyu He, Darshil Doshi, Aritra Das, Andrey Gromov", "tldr": "", "abstract": "Large language models can solve tasks that were not present in the training set. This capability is believed to be due to in-context learning and skill composition. In this work, we study the emergence of in-context learning and skill composition in a collection of modular arithmetic tasks. Specifically, we consider a finite collection of linear modular functions $z = a x + b y \\text{ mod } p$ labeled by the vector $(a, b) \\in \\mathbb{Z}_p^2$. We use some of these tasks for pre-training and the rest for out-of-distribution testing. We empirically show that a GPT-style transformer exhibits a transition from in-distribution to out-of-distribution generalization as the number of pre-training tasks increases. We find that the smallest model capable of out-of-distribution generalization requires two transformer blocks, while for deeper models, the out-of-distribution generalization phase is *transient*, necessitating early stopping. Finally, we perform an interpretability study of the pre-trained models, revealing highly structured representations in both attention heads and MLPs; and discuss the learned algorithms. Notably, we find an algorithmic shift in deeper models, as we go from few to many in-context examples.", "keywords": "In-Context Learning;Grokking;Modular Arithmetic;Interpretability", "primary_area": "interpretability_and_explainability", "supplementary_material": "/attachment/c28c40f21e731efeeeaac191191ae5a515ba2185.zip", "author": "Tianyu He;Darshil Doshi;Aritra Das;Andrey Gromov", "authorids": "~Tianyu_He2;~Darshil_Doshi1;~Aritra_Das1;~Andrey_Gromov1", "gender": "M;M;M;M", "homepage": ";;;", "dblp": ";;;", "google_scholar": "STDwwY8AAAAJ;4dp-dEMAAAAJ;;D056qfMAAAAJ", "orcid": "0000-0002-2592-9698;0000-0003-3578-9016;0000-0002-1658-4608;", "linkedin": ";darshil-doshi-2b010b7b/;aritra-das-22b7221a2/;andrey-gromov-2329a241", "or_profile": "~Tianyu_He2;~Darshil_Doshi1;~Aritra_Das1;~Andrey_Gromov1", "aff": "University of Maryland, College Park;University of Maryland, College Park;University of Maryland, College Park;University of Maryland, College Park", "aff_domain": "umd.edu;umd.edu;umd.edu;umd.edu", "position": "PhD student;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nhe2024learning,\ntitle={Learning to grok: Emergence of in-context learning and skill composition in modular arithmetic tasks},\nauthor={Tianyu He and Darshil Doshi and Aritra Das and Andrey Gromov},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=aVh9KRZdRk}\n}", "github": "", "reviewers": "rwBm;Jerg;CrUb;PjEW", "pdf_size": 32641525, "rating": "7;7;7;8", "confidence": "4;3;4;4", "soundness": "3;3;3;4", "novelty": "3;3;3;4", "presentation": "4;3;3;4", "wc_summary": "132;50;67;181", "wc_strengths": "74;60;104;227", "wc_weaknesses": "109;135;195;420", "wc_questions": "339;35;239;161", "wc_limitations": "9;13;4;17", "wc_review": "663;293;609;1006", "wc_reply_reviewers": "31;14;25;42", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 107.5, 52.31873469418006 ], "wc_strengths_avg": [ 116.25, 65.8876885313182 ], "wc_weaknesses_avg": [ 214.75, 122.53647416177765 ], "wc_questions_avg": [ 193.5, 111.15192306028717 ], "wc_limitations_avg": [ 10.75, 4.815340071064556 ], "wc_review_avg": [ 642.75, 252.89560593256658 ], "wc_reply_reviewers_avg": [ 28.0, 10.124228365658293 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13830233925620894422&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "umd.edu;umd.edu;umd.edu;umd.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Maryland", "aff_unique_dep": "", "aff_unique_url": "https://www/umd.edu", "aff_unique_abbr": "UMD", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "College Park", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Unified Covariate Adjustment for Causal Inference", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94547", "id": "aX9z2eT6ul", "proceeding": "", "pdf": "https://openreview.net/pdf?id=aX9z2eT6ul", "openreview": "https://openreview.net/forum?id=aX9z2eT6ul", "poster": "/media/PosterPDFs/NeurIPS%202024/94547.png?t=1733598540.2512972", "project": "", "author_site": "Yonghan Jung, Jin Tian, Elias Bareinboim", "tldr": "", "abstract": "Causal effect identification and estimation are two crucial tasks in causal inference. Although causal effect identification has been theoretically resolved, many existing estimators only address a subset of scenarios, known as the sequential back-door adjustment (SBD) (Pearl and Robins, 1995) or g-formula (Robins, 1986). Recent efforts for developing general-purpose estimators with broader coverage, incorporating the front-door adjustment (FD) (Pearl, 2000) and more, lack scalability due to the high computational cost of summing over high-dimensional variables. In this paper, we introduce a novel approach that achieves broad coverage of causal estimands beyond the SBD, incorporating various sum-product functionals like the FD, while maintaining scalability -- estimated in polynomial time relative to the number of variables and samples. Specifically, we present the class of UCA for which a scalable and doubly robust estimator is developed. \nIn particular, we illustrate the expressiveness of UCA for a wide spectrum of causal estimands (e.g., SBD, FD, and more) in causal inference. We then develop an estimator that exhibits computational efficiency and doubly robustness. The scalability and robustness of the proposed framework are verified through simulations.", "keywords": "causal effect estimation;causal effect identification;debiased machine learning;semiparametric inference;front-door adjustment", "primary_area": "causal_inference", "supplementary_material": "", "author": "Yonghan Jung;Jin Tian;Elias Bareinboim", "authorids": "~Yonghan_Jung1;~Jin_Tian1;~Elias_Bareinboim2", "gender": ";M;M", "homepage": "https://sites.google.com/view/yonghanjung;https://mbzuai.ac.ae/study/faculty/jin-tian/;https://causalai.net", "dblp": "201/0684.html;04/4658-1;85/9005", "google_scholar": "D9ATOa4AAAAJ;T0crkfoAAAAJ;r5U-D7YAAAAJ", "orcid": ";0000-0001-5313-1600;", "linkedin": "yhansjung/;;", "or_profile": "~Yonghan_Jung1;~Jin_Tian1;~Elias_Bareinboim2", "aff": "Purdue University;Iowa State University;Columbia University", "aff_domain": "purdue.edu;iastate.edu;columbia.edu", "position": "PhD student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\njung2024unified,\ntitle={Unified Covariate Adjustment for Causal Inference},\nauthor={Yonghan Jung and Jin Tian and Elias Bareinboim},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=aX9z2eT6ul}\n}", "github": "", "reviewers": "Z7av;cBfm;ZBfF;Q5Gy", "pdf_size": 779069, "rating": "6;7;7;7", "confidence": "3;3;3;3", "soundness": "2;4;4;3", "novelty": "3;4;3;2", "presentation": "1;3;3;3", "wc_summary": "46;80;62;31", "wc_strengths": "47;51;82;60", "wc_weaknesses": "237;153;189;109", "wc_questions": "219;183;66;2", "wc_limitations": "1;19;35;1", "wc_review": "550;486;434;203", "wc_reply_reviewers": "29;21;23;10", "wc_reply_authors": "11;12;13;9", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 54.75, 18.239723133863627 ], "wc_strengths_avg": [ 60.0, 13.546217184144066 ], "wc_weaknesses_avg": [ 172.0, 47.02127178203499 ], "wc_questions_avg": [ 117.5, 87.44283847176966 ], "wc_limitations_avg": [ 14.0, 14.177446878757825 ], "wc_review_avg": [ 418.25, 130.88998242799178 ], "wc_reply_reviewers_avg": [ 20.75, 6.869315832017043 ], "wc_reply_authors_avg": [ 11.25, 1.479019945774904 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7001080719800434901&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "purdue.edu;iastate.edu;columbia.edu", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Purdue University;Iowa State University;Columbia University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.purdue.edu;https://www.iastate.edu;https://www.columbia.edu", "aff_unique_abbr": "Purdue;ISU;Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "CA-SSLR: Condition-Aware Self-Supervised Learning Representation for Generalized Speech Processing", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94546", "id": "aXApeuAYkg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=aXApeuAYkg", "openreview": "https://openreview.net/forum?id=aXApeuAYkg", "poster": "/media/PosterPDFs/NeurIPS%202024/94546.png?t=1733798162.9664543", "project": "", "author_site": "Yen-Ju Lu, Jing Liu, Thomas Thebaud, Laureano Moro-Velazquez, Ariya Rastrow, Najim Dehak, Jesus Villalba", "tldr": "", "abstract": "We introduce Condition-Aware Self-Supervised Learning Representation (CA-SSLR), a generalist conditioning model broadly applicable to various speech-processing tasks. Compared to standard fine-tuning methods that optimize for downstream models, CA-SSLR integrates language and speaker embeddings from earlier layers, making the SSL model aware of the current language and speaker context.\nThis approach reduces the reliance on the input audio features while preserving the integrity of the base SSLR. CA-SSLR improves the model\u2019s capabilities and demonstrates its generality on unseen tasks with minimal task-specific tuning. Our method employs linear modulation to dynamically adjust internal representations, enabling fine-grained adaptability without significantly altering the original model behavior. Experiments show that CA-SSLR reduces the number of trainable parameters, mitigates overfitting, and excels in under-resourced and unseen tasks. Specifically, CA-SSLR achieves a 10\\% relative reduction in LID errors, a 37\\% improvement in ASR CER on the ML-SUPERB benchmark, and a 27\\% decrease in SV EER on VoxCeleb-1, demonstrating its effectiveness.", "keywords": "multilingual speech recognition;speaker verification;self-supervised learning representation;conditional adaptation", "primary_area": "speech_and_audio", "supplementary_material": "", "author": "Yen-Ju Lu;Jing Liu;Thomas Thebaud;Laureano Moro-Velazquez;Ariya Rastrow;Najim Dehak;Jesus Villalba", "authorids": "~Yen-Ju_Lu1;~Jing_Liu31;~Thomas_Thebaud1;~Laureano_Moro-Velazquez1;~Ariya_Rastrow2;~Najim_Dehak1;~Jesus_Villalba1", "gender": "M;;M;M;M;M;M", "homepage": "https://neillu23.github.io/;https://scholar.google.com/citations?user=xuUm818AAAAJ&hl=en;https://thomasthebaud.github.io/;https://pages.jh.edu/lmorove1/index.html;;https://engineering.jhu.edu/ece/faculty/najim-dehak/;", "dblp": "164/5920;;247/3893;173/6483;;;211/9841", "google_scholar": "emtNw84AAAAJ;xuUm818AAAAJ;5wgj-vYAAAAJ;WepMzXoAAAAJ;78YEqxgAAAAJ;G43gXjIAAAAJ;8j1-qmAAAAA", "orcid": ";;0000-0001-8953-7872;0000-0002-3033-7005;;;0000-0001-9459-8426", "linkedin": ";;thomas-thebaud/;;ariya-rastrow-4ab149b/;;", "or_profile": "~Yen-Ju_Lu1;~Jing_Liu31;~Thomas_Thebaud1;~Laureano_Moro-Velazquez1;~Ariya_Rastrow2;~Najim_Dehak1;~Jesus_Villalba_Lopez1", "aff": "Apple;Amazon;Johns Hopkins University;Whiting School of Engineering;;Johns Hopkins University;Johns Hopkins University", "aff_domain": "apple.com;amazon.com;jhu.edu;engineering.jhu.edu;;jhu.edu;jh.edu", "position": "Research Intern;Researcher;Researcher;Assistant Professor;;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nlu2024casslr,\ntitle={{CA}-{SSLR}: Condition-Aware Self-Supervised Learning Representation for Generalized Speech Processing},\nauthor={Yen-Ju Lu and Jing Liu and Thomas Thebaud and Laureano Moro-Velazquez and Ariya Rastrow and Najim Dehak and Jesus Villalba},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=aXApeuAYkg}\n}", "github": "", "reviewers": "xgMo;mDeV;Jsg6;4eyk;iiaB", "pdf_size": 1618722, "rating": "4;6;6;7;7", "confidence": "3;4;3;4;5", "soundness": "2;3;3;4;3", "novelty": "3;3;3;3;3", "presentation": "1;3;2;3;3", "wc_summary": "105;60;50;60;80", "wc_strengths": "84;22;27;56;28", "wc_weaknesses": "519;43;140;101;73", "wc_questions": "298;117;3;257;48", "wc_limitations": "37;68;4;15;3", "wc_review": "1043;310;224;489;232", "wc_reply_reviewers": "35;28;19;0;30", "wc_reply_authors": "25;25;29;0;20", "reply_reviewers": "1;1;1;0;1", "reply_authors": "2;2;2;1;2", "rating_avg": [ 6.0, 1.0954451150103321 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.4, 0.8 ], "wc_summary_avg": [ 71.0, 19.595917942265423 ], "wc_strengths_avg": [ 43.4, 23.54230235129946 ], "wc_weaknesses_avg": [ 175.2, 174.85125106787197 ], "wc_questions_avg": [ 144.6, 115.16006252169196 ], "wc_limitations_avg": [ 25.4, 24.565015774470815 ], "wc_review_avg": [ 459.6, 306.8905993998513 ], "wc_reply_reviewers_avg": [ 22.4, 12.338557452149745 ], "wc_reply_authors_avg": [ 19.8, 10.303397497913005 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.7319250547113999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:DX0aaq2PVgsJ:scholar.google.com/&scioq=CA-SSLR:+Condition-Aware+Self-Supervised+Learning+Representation+for+Generalized+Speech+Processing&hl=en&as_sdt=0,24", "gs_version_total": 6, "email": "apple.com;amazon.com;jhu.edu;engineering.jhu.edu;;jhu.edu;jh.edu", "author_num": 7, "aff_unique_index": "0;1;2;2;2;2", "aff_unique_norm": "Apple;Amazon;Johns Hopkins University", "aff_unique_dep": "Apple Inc.;Amazon.com, Inc.;", "aff_unique_url": "https://www.apple.com;https://www.amazon.com;https://www.jhu.edu", "aff_unique_abbr": "Apple;Amazon;JHU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Baltimore", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "CNCA: Toward Customizable and Natural Generation of Adversarial Camouflage for Vehicle Detectors", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94545", "id": "aXNZG82IzV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=aXNZG82IzV", "openreview": "https://openreview.net/forum?id=aXNZG82IzV", "poster": "/media/PosterPDFs/NeurIPS%202024/94545.png?t=1732606165.1405241", "project": "", "author_site": "Linye Lyu, Jiawei Zhou, Daojing He, YU LI", "tldr": "", "abstract": "Prior works on physical adversarial camouflage against vehicle detectors mainly focus on the effectiveness and robustness of the attack. The current most successful methods optimize 3D vehicle texture at a pixel level. However, this results in conspicuous and attention-grabbing patterns in the generated camouflage, which humans can easily identify. To address this issue, we propose a Customizable and Natural Camouflage Attack (CNCA) method by leveraging an off-the-shelf pre-trained diffusion model. By sampling the optimal texture image from the diffusion model with a user-specific text prompt, our method can generate natural and customizable adversarial camouflage while maintaining high attack performance. With extensive experiments on the digital and physical worlds and user studies, the results demonstrate that our proposed method can generate significantly more natural-looking camouflage than the state-of-the-art baselines while achieving competitive attack performance.", "keywords": "Physical Adversarial Attack; Vehicle Detection; Customiable and Natural Adversarial Camouflage;", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Linye Lyu;Jiawei Zhou;Daojing He;YU LI", "authorids": "~Linye_Lyu1;~Jiawei_Zhou4;~Daojing_He1;~YU_LI10", "gender": "M;;M;Not Specified", "homepage": ";;http://faculty.hitsz.edu.cn/hedaojing;http://liyu.one", "dblp": "371/4134;;60/7270;34/2997-7", "google_scholar": "qjHyCmwAAAAJ;;;M0zhrM8AAAAJ", "orcid": ";;0000-0002-3820-8128;", "linkedin": ";;;", "or_profile": "~Linye_Lyu1;~Jiawei_Zhou4;~Daojing_He1;~YU_LI10", "aff": "Harbin Institute of Technology;;Harbin Institute of Technology;Harbin Institute of Technology (Shen Zhen)", "aff_domain": "stu.hit.edu.cn;;hit.edu.cn;hit.edu.cn", "position": "PhD student;;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nlyu2024cnca,\ntitle={{CNCA}: Toward Customizable and Natural Generation of Adversarial Camouflage for Vehicle Detectors},\nauthor={Linye Lyu and Jiawei Zhou and Daojing He and YU LI},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=aXNZG82IzV}\n}", "github": "", "reviewers": "guSy;oknY;MuNz;TBRf", "pdf_size": 40722944, "rating": "4;5;5;6", "confidence": "5;5;2;4", "soundness": "3;2;3;4", "novelty": "2;3;3;4", "presentation": "3;3;2;3", "wc_summary": "74;42;101;87", "wc_strengths": "74;38;80;33", "wc_weaknesses": "60;26;86;130", "wc_questions": "5;68;52;138", "wc_limitations": "28;5;5;43", "wc_review": "241;179;324;431", "wc_reply_reviewers": "15;0;12;82", "wc_reply_authors": "136;47;128;89", "reply_reviewers": "1;0;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 1.224744871391589 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 76.0, 21.828879952943073 ], "wc_strengths_avg": [ 56.25, 20.932928605429293 ], "wc_weaknesses_avg": [ 75.5, 37.9835490706174 ], "wc_questions_avg": [ 65.75, 47.70940682926167 ], "wc_limitations_avg": [ 20.25, 16.145819892467525 ], "wc_review_avg": [ 293.75, 94.47585670423952 ], "wc_reply_reviewers_avg": [ 27.25, 32.104322138927024 ], "wc_reply_authors_avg": [ 100.0, 35.390676738372775 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.28867513459481287, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3732884303743665093&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "stu.hit.edu.cn;;hit.edu.cn;hit.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Harbin Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.hit.edu.cn/", "aff_unique_abbr": "HIT", "aff_campus_unique_index": "0;0;1", "aff_campus_unique": "Harbin;Shenzhen", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Learning 3D Equivariant Implicit Function with Patch-Level Pose-Invariant Representation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94544", "id": "aXS1pwMa8I", "proceeding": "", "pdf": "https://openreview.net/pdf?id=aXS1pwMa8I", "openreview": "https://openreview.net/forum?id=aXS1pwMa8I", "poster": "/media/PosterPDFs/NeurIPS%202024/94544.png?t=1730463855.4111955", "project": "", "author_site": "Xin Hu, Xiaole Tang, Ruixuan Yu, Jian Sun", "tldr": "", "abstract": "Implicit neural representation gains popularity in modeling the continuous 3D surface for 3D representation and reconstruction. In this work, we are motivated by the fact that the local 3D patches repeatedly appear on 3D shapes/surfaces if the factor of poses is removed. Based on this observation, we propose the 3D patch-level equivariant implicit function (PEIF) based on the 3D patch-level pose-invariant representation, allowing us to reconstruct 3D surfaces by estimating equivariant displacement vector fields for query points. Specifically, our model is based on the pose-normalized query/patch pairs and enhanced by the proposed intrinsic patch geometry representation, modeling the intrinsic 3D patch geometry feature by learnable multi-head memory banks. Extensive experiments show that our model achieves state-of-the-art performance on multiple surface reconstruction datasets, and also exhibits better generalization to crossdataset shapes and robustness to arbitrary rotations. Our code will be available at https://github.com/mathXin112/PEIF.git.", "keywords": "Equivariant implicit neural representation; Pose-invariant representation; Generalizability to 3D objects; Robustness to transformation", "primary_area": "machine_vision", "supplementary_material": "", "author": "Xin Hu;Xiaole Tang;Ruixuan Yu;Jian Sun", "authorids": "~Xin_Hu1;~Xiaole_Tang1;~Ruixuan_Yu1;~Jian_Sun1", "gender": "F;M;F;M", "homepage": ";;https://faculty.sdu.edu.cn/yuruixuan/zh_CN/index.htm;https://gr.xjtu.edu.cn/en/web/jiansun/publications", "dblp": ";331/0808;218/1946;68/4942-9.html", "google_scholar": "https://scholar.google.com.hk/citations?user=zt9JYkkAAAAJ;CwvKZ8QAAAAJ;rq1i7HkAAAAJ;SSgNWOMAAAAJ", "orcid": ";0009-0003-8070-0591;;", "linkedin": "https://www.linkedin.cn/injobs/in/\u946b-\u80e1-2b373b154;;;", "or_profile": "~Xin_Hu1;~Xiaole_Tang1;~Ruixuan_Yu1;~Jian_Sun1", "aff": "Xi'an Jiaotong University;Xi'an Jiaotong University;Shandong University;Xi'an Jiaotong University", "aff_domain": "xjtu.edu.cn;xjtu.edu.cn;sdu.edu.cn;xjtu.edu.cn", "position": "PhD student;PhD student;Assistant Professor;Professor", "bibtex": "@inproceedings{\nhu2024learning,\ntitle={Learning 3D Equivariant Implicit Function with Patch-Level Pose-Invariant Representation},\nauthor={Xin Hu and Xiaole Tang and Ruixuan Yu and Jian Sun},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=aXS1pwMa8I}\n}", "github": "", "reviewers": "FF8y;H6Ua;dfBj;6vbf", "pdf_size": 15394848, "rating": "5;5;5;6", "confidence": "4;4;3;3", "soundness": "3;3;2;4", "novelty": "2;2;2;3", "presentation": "3;2;3;3", "wc_summary": "62;100;74;85", "wc_strengths": "48;27;56;115", "wc_weaknesses": "326;148;179;135", "wc_questions": "49;3;88;31", "wc_limitations": "13;7;14;144", "wc_review": "498;285;411;510", "wc_reply_reviewers": "252;27;27;68", "wc_reply_authors": "395;26;139;123", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;3;3", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 80.25, 14.00669482783144 ], "wc_strengths_avg": [ 61.5, 32.65348373451139 ], "wc_weaknesses_avg": [ 197.0, 76.17414259445262 ], "wc_questions_avg": [ 42.75, 30.84132779242813 ], "wc_limitations_avg": [ 44.5, 57.5086949947571 ], "wc_review_avg": [ 426.0, 89.92496872393117 ], "wc_reply_reviewers_avg": [ 93.5, 93.0282215244385 ], "wc_reply_authors_avg": [ 170.75, 136.49977106207908 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:m2MYAkJ7ILUJ:scholar.google.com/&scioq=Learning+3D+Equivariant+Implicit+Function+with+Patch-Level+Pose-Invariant+Representation&hl=en&as_sdt=0,33", "gs_version_total": 2, "email": "xjtu.edu.cn;xjtu.edu.cn;sdu.edu.cn;xjtu.edu.cn", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Xi'an Jiao Tong University;Shandong University", "aff_unique_dep": ";", "aff_unique_url": "https://www.xjtu.edu.cn;http://www.sdu.edu.cn", "aff_unique_abbr": "XJTU;SDU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Mind the Gap: A Causal Perspective on Bias Amplification in Prediction & Decision-Making", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94543", "id": "aXYL24yhjN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=aXYL24yhjN", "openreview": "https://openreview.net/forum?id=aXYL24yhjN", "poster": "/media/PosterPDFs/NeurIPS%202024/94543.png?t=1731710017.924052", "project": "", "author_site": "Drago Plecko, Elias Bareinboim", "tldr": "", "abstract": "As society increasingly relies on AI-based tools for decision-making in socially sensitive domains, investigating fairness and equity of such automated systems has become a critical field of inquiry. Most of the literature in fair machine learning focuses on defining and achieving fairness criteria in the context of prediction, while not explicitly focusing on how these predictions may be used later on in the pipeline. For instance, if commonly used criteria, such as independence or sufficiency, are satisfied for a prediction score $S$ used for binary classification, they need not be satisfied after an application of a simple thresholding operation on $S$ (as commonly used in practice). \nIn this paper, we take an important step to address this issue in numerous statistical and causal notions of fairness. We introduce the notion of a margin complement, which measures how much a prediction score $S$ changes due to a thresholding operation.\nWe then demonstrate that the marginal difference in the optimal 0/1 predictor $\\widehat Y$ between groups, written $P(\\hat y \\mid x_1) - P(\\hat y \\mid x_0)$, can be causally decomposed into the influences of $X$ on the $L_2$-optimal prediction score $S$ and the influences of $X$ on the margin complement $M$, along different causal pathways (direct, indirect, spurious). We then show that under suitable causal assumptions, the influences of $X$ on the prediction score $S$ are equal to the influences of $X$ on the true outcome $Y$. This yields a new decomposition of the disparity in the predictor $\\widehat Y$ that allows us to disentangle causal differences inherited from the true outcome $Y$ that exists in the real world vs. those coming from the optimization procedure itself. This observation highlights the need for more regulatory oversight due to the potential for bias amplification, and to address this issue we introduce new notions of weak and strong business necessity, together with an algorithm for assessing whether these notions are satisfied. We apply our method to three real-world datasets and derive new insights on bias amplification in prediction and decision-making.", "keywords": "Causal Fairness;Fair Machine Learning;Causal Inference;Trustworthy AI", "primary_area": "fairness", "supplementary_material": "/attachment/61d70e5386b11a21f9696ecf663eb9a174741415.zip", "author": "Drago Plecko;Elias Bareinboim", "authorids": "~Drago_Plecko1;~Elias_Bareinboim2", "gender": "M;M", "homepage": "https://people.math.ethz.ch/~pleckod/;https://causalai.net", "dblp": "254/3058;85/9005", "google_scholar": ";r5U-D7YAAAAJ", "orcid": "0000-0002-5433-196X;", "linkedin": ";", "or_profile": "~Drago_Plecko1;~Elias_Bareinboim2", "aff": "Columbia University;Columbia University", "aff_domain": "cs.columbia.edu;columbia.edu", "position": "Postdoc;Associate Professor", "bibtex": "@inproceedings{\nplecko2024mind,\ntitle={Mind the Gap: A Causal Perspective on Bias Amplification in Prediction \\& Decision-Making},\nauthor={Drago Plecko and Elias Bareinboim},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=aXYL24yhjN}\n}", "github": "", "reviewers": "ZV8n;bCLY;1zLn", "pdf_size": 842046, "rating": "6;6;7", "confidence": "2;3;2", "soundness": "2;2;3", "novelty": "2;3;3", "presentation": "1;2;2", "wc_summary": "63;36;150", "wc_strengths": "76;85;79", "wc_weaknesses": "336;489;36", "wc_questions": "70;93;25", "wc_limitations": "26;10;57", "wc_review": "571;713;347", "wc_reply_reviewers": "52;0;34", "wc_reply_authors": "95;64;0", "reply_reviewers": "1;0;1", "reply_authors": "2;2;1", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 2.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 1.6666666666666667, 0.4714045207910317 ], "wc_summary_avg": [ 83.0, 48.641546028061235 ], "wc_strengths_avg": [ 80.0, 3.7416573867739413 ], "wc_weaknesses_avg": [ 287.0, 188.15419208723466 ], "wc_questions_avg": [ 62.666666666666664, 28.241026106633512 ], "wc_limitations_avg": [ 31.0, 19.510680835549195 ], "wc_review_avg": [ 543.6666666666666, 150.66371678528165 ], "wc_reply_reviewers_avg": [ 28.666666666666668, 21.561282171728305 ], "wc_reply_authors_avg": [ 53.0, 39.5558676641869 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8371156552807411561&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "cs.columbia.edu;columbia.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Columbia University", "aff_unique_dep": "", "aff_unique_url": "https://www.columbia.edu", "aff_unique_abbr": "Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Visual CoT: Advancing Multi-Modal Language Models with a Comprehensive Dataset and Benchmark for Chain-of-Thought Reasoning", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97623", "id": "aXeiCbMFFJ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=aXeiCbMFFJ", "openreview": "https://openreview.net/forum?id=aXeiCbMFFJ", "poster": "", "project": "", "author_site": "Hao Shao, Shengju Qian, Han Xiao, Guanglu Song, ZHUOFAN ZONG, Letian Wang, Yu Liu, Hongsheng Li", "tldr": "", "abstract": "Multi-Modal Large Language Models (MLLMs) have demonstrated impressive performance in various VQA tasks. However, they often lack interpretability and struggle with complex visual inputs, especially when the resolution of the input image is high or when the interested region that could provide key information for answering the question is small. To address these challenges, we collect and introduce the large-scale Visual CoT dataset comprising 438k question-answer pairs, annotated with intermediate bounding boxes highlighting key regions essential for answering the questions. Additionally, about 98k pairs of them are annotated with detailed reasoning steps. Importantly, we propose a multi-turn processing pipeline that dynamically focuses on visual inputs and provides interpretable thoughts. We also introduce the related benchmark to evaluate the MLLMs in scenarios requiring specific local region identification.\nExtensive experiments demonstrate the effectiveness of our framework and shed light on better inference strategies. The Visual CoT dataset, benchmark, and pre-trained models are available on this [website](https://hao-shao.com/projects/viscot.html) to support further research in this area.", "keywords": "Chain-of-Thought;Multi-Modal Language Model;Visual grounding", "primary_area": "", "supplementary_material": "/attachment/af73892af6a18db3f3bf187179178c15ab658528.pdf", "author": "Hao Shao;Shengju Qian;Han Xiao;Guanglu Song;Zhuofan Zong;Letian Wang;Yu Liu;Hongsheng Li", "authorids": "~Hao_Shao1;~Shengju_Qian1;~Han_Xiao6;~Guanglu_Song2;~Zhuofan_Zong1;~Letian_Wang1;~Yu_Liu2;~Hongsheng_Li3", "gender": ";M;M;M;M;M;M;F", "homepage": "http://hao-shao.com;http://thesouthfrog.com/about.me/;;https://zongzhuofan.github.io/;;http://liuyu.us;http://www.ee.cuhk.edu.hk/~hsli;https://www.researchgate.net/profile/Han_Xiao42", "dblp": "66/3089.html;247/6076;207/4745;266/4989;17/8467;97/2274-15;27/7402-1;98/627-10", "google_scholar": "https://scholar.google.com.hk/citations?user=D_ZLR1oAAAAJ;QNnWmasAAAAJ;Bd3v08QAAAAJ;vls0YhoAAAAJ;https://scholar.google.com.hk/citations?user=HEzCWisAAAAJ;;BN2Ze-QAAAAJ;N-u2i-QAAAAJ", "orcid": ";;;;;;;", "linkedin": ";;;;;;;", "or_profile": "~Hao_Shao1;~Shengju_Qian1;~Guanglu_Song2;~Zhuofan_Zong1;~Letian_Wang1;~Yu_Liu2;~Hongsheng_Li3;~Han_Xiao3", "aff": "The Chinese University of Hong Kong, The Chinese University of Hong Kong;Tencent;Sensetime;The Chinese University of Hong Kong;University of Toronto;SenseTime;The Chinese University of Hong Kong;The Chinese University of Hong Kong", "aff_domain": "ee.cuhk.edu.hk;tencent.com;sensetime.com;link.cuhk.edu.hk;utoronto.ca;sensetime.com;cuhk.edu.hk;link.cuhk.edu.hk", "position": "PhD student;Researcher;Computer Vision Researcher;PhD student;PhD student;Principal Researcher;Associate Professor;PhD student", "bibtex": "@inproceedings{\nshao2024visual,\ntitle={Visual CoT: Advancing Multi-Modal Language Models with a Comprehensive Dataset and Benchmark for Chain-of-Thought Reasoning},\nauthor={Hao Shao and Shengju Qian and Han Xiao and Guanglu Song and Zhuofan Zong and Letian Wang and Yu Liu and Hongsheng Li},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=aXeiCbMFFJ}\n}", "github": "", "reviewers": "XGWe;EkM4;1mvY", "pdf_size": 10026011, "rating": "6;7;7", "confidence": "3;4;4", "wc_summary_and_contributions": "59;56;59", "wc_strengths": "89;69;3", "wc_improvement": "79;330;3", "wc_limitations": "4;1;3", "wc_correctness": "8;1;1", "wc_clarity": "6;1;1", "wc_relation_to_prior_work": "11;1;1", "wc_documentation": "6;59;18", "wc_additional_feedback": "1;1;1", "wc_review": "263;519;90", "wc_reply_reviewers": "0;112;81", "wc_reply_authors": "89;213;145", "reply_reviewers": "0;1;1", "reply_authors": "2;5;4", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 58.0, 1.4142135623730951 ], "wc_strengths_avg": [ 53.666666666666664, 36.745370078721784 ], "wc_improvement_avg": [ 137.33333333333334, 139.7243317711303 ], "wc_limitations_avg": [ 2.6666666666666665, 1.247219128924647 ], "wc_correctness_avg": [ 3.3333333333333335, 3.2998316455372216 ], "wc_clarity_avg": [ 2.6666666666666665, 2.3570226039551585 ], "wc_relation_to_prior_work_avg": [ 4.333333333333333, 4.714045207910317 ], "wc_documentation_avg": [ 27.666666666666668, 22.691163233490013 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 290.6666666666667, 176.2277566736359 ], "wc_reply_reviewers_avg": [ 64.33333333333333, 47.21816976075582 ], "wc_reply_authors_avg": [ 149.0, 50.701742244884116 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 3.6666666666666665, 1.247219128924647 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.9999999999999997, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15207487898672101091&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "ee.cuhk.edu.hk;tencent.com;sensetime.com;link.cuhk.edu.hk;utoronto.ca;sensetime.com;cuhk.edu.hk;link.cuhk.edu.hk", "author_num": 8, "aff_unique_index": "0;1;2;0;3;2;0;0", "aff_unique_norm": "Chinese University of Hong Kong;Tencent;SenseTime;University of Toronto", "aff_unique_dep": ";Tencent Holdings Limited;;", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.tencent.com;https://www.sensetime.com;https://www.utoronto.ca", "aff_unique_abbr": "CUHK;Tencent;SenseTime;U of T", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;1;0;0;0", "aff_country_unique": "China;Canada" }, { "id": "aYJ2T5TXoX", "title": "Generalizability of experimental studies", "track": "main", "status": "Reject", "tldr": "", "abstract": "Experimental studies are a cornerstone of machine learning (ML) research. A common, but often implicit, assumption is that the results of a study will generalize beyond the study itself, e.g. to new data. That is, there is a high probability that repeating the study under different conditions will yield similar results. Despite the importance of the concept, the problem of measuring generalizability remains open. This is probably due to the lack of a mathematical formalization of experimental studies. In this paper, we propose such a formalization and develop a quantifiable notion of generalizability. This notion allows to explore the generalizability of existing studies and to estimate the number of experiments needed to achieve the generalizability of new studies. To demonstrate its usefulness, we apply it to two recently published benchmarks to discern generalizable and non-generalizable results. We also publish a Python module that allows our analysis to be repeated for other experimental studies.", "keywords": "Generalizability;Replicability;Experiment;Experimental Study;Benchmark", "primary_area": "evaluation", "supplementary_material": "", "author": "Federico Matteucci;Vadim Arzamasov;Jose Cribeiro-Ramallo;Marco Heyden;Konstantin Ntounas;Klemens B\u00f6hm", "authorids": "~Federico_Matteucci1;~Vadim_Arzamasov1;~Jose_Cribeiro-Ramallo1;~Marco_Heyden1;~Konstantin_Ntounas1;~Klemens_B\u00f6hm1", "gender": ";M;;M;;", "homepage": "https://github.com/DrCohomology;https://dbis.ipd.kit.edu/722_2410.php;;;;", "dblp": "352/4741;148/6294;;320/3738;;b/KBohm", "google_scholar": "https://scholar.google.de/citations?user=X_O8eI0AAAAJ;;;https://scholar.google.com/citations?hl=de;;", "orcid": "0000-0003-3181-2071;0000-0002-6854-4931;;0000-0003-4981-709X;;", "linkedin": "federico-matteucci-749ba41a4;vadim-arzamasov-803292b7/?originalSubdomain=de;;marco-heyden-036375156/;;", "or_profile": "~Federico_Matteucci1;~Vadim_Arzamasov1;~Jose_Cribeiro-Ramallo1;~Marco_Heyden1;~Konstantin_Ntounas1;~Klemens_B\u00f6hm1", "aff": "Karlsruher Institut f\u00fcr Technologie;Karlsruher Institut f\u00fcr Technologie;;Karlsruher Institut f\u00fcr Technologie;;Karlsruher Institut f\u00fcr Technologie", "aff_domain": "kit.edu;kit.edu;;kit.edu;;kit.edu", "position": "PhD student;Postdoc;;PhD student;;Full Professor", "bibtex": "@misc{\nanonymous2024generalizability,\ntitle={Generalizability of experimental studies},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=aYJ2T5TXoX}\n}", "github": "", "project": "", "reviewers": "NXED;Sdci;Lndy;YkLG;WgsQ", "site": "https://openreview.net/forum?id=aYJ2T5TXoX", "pdf_size": 893013, "rating": "3;5;5;5;5", "confidence": "4;2;4;4;4", "soundness": "3;3;3;3;3", "novelty": "2;2;3;3;3", "presentation": "2;3;3;3;2", "wc_summary": "29;63;42;13;45", "wc_strengths": "30;40;21;13;130", "wc_weaknesses": "123;185;161;605;295", "wc_questions": "22;263;2;2;47", "wc_limitations": "3;37;1;2;51", "wc_review": "207;588;227;635;568", "wc_reply_reviewers": "18;14;0;408;51", "wc_reply_authors": "0;0;0;527;0", "reply_reviewers": "1;1;0;1;1", "reply_authors": "1;1;1;2;1", "rating_avg": [ 4.6, 0.7999999999999999 ], "confidence_avg": [ 3.6, 0.8000000000000002 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 38.4, 16.70449041425688 ], "wc_strengths_avg": [ 46.8, 42.56477416831904 ], "wc_weaknesses_avg": [ 273.8, 175.2100453741166 ], "wc_questions_avg": [ 67.2, 99.28826718197877 ], "wc_limitations_avg": [ 18.8, 21.05611550120297 ], "wc_review_avg": [ 445.0, 187.5345301537826 ], "wc_reply_reviewers_avg": [ 98.2, 155.8003851086383 ], "wc_reply_authors_avg": [ 105.4, 210.8 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.24999999999999994, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1824814313771174724&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Karlsruher Institut f\u00fcr Technologie", "aff_unique_dep": "", "aff_unique_url": "https://www.kit.edu", "aff_unique_abbr": "KIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "title": "Distributionally Robust Reinforcement Learning with Interactive Data Collection: Fundamental Hardness and Near-Optimal Algorithms", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94542", "id": "aYWtfsf3uP", "proceeding": "", "pdf": "https://openreview.net/pdf?id=aYWtfsf3uP", "openreview": "https://openreview.net/forum?id=aYWtfsf3uP", "poster": "", "project": "", "author_site": "Miao Lu, Han Zhong, Tong Zhang, Jose Blanchet", "tldr": "", "abstract": "The sim-to-real gap, which represents the disparity between training and testing environments, poses a significant challenge in reinforcement learning (RL). A promising approach to addressing this challenge is distributionally robust RL, often framed as a robust Markov decision process (RMDP). In this framework, the objective is to find a robust policy that achieves good performance under the worst-case scenario among all environments within a pre-specified uncertainty set centered around the training environment. Unlike previous work, which relies on a generative model or a pre-collected offline dataset enjoying good coverage of the deployment environment, we tackle robust RL via interactive data collection, where the learner interacts with the training environment only and refines the policy through trial and error. In this robust RL paradigm, two main challenges emerge: managing distributional robustness while striking a balance between exploration and exploitation during data collection. Initially, we establish that sample-efficient learning without additional assumptions is unattainable owing to the curse of support shift; i.e., the potential disjointedness of the distributional supports between the training and testing environments. To circumvent such a hardness result, we introduce the vanishing minimal value assumption to RMDPs with a total-variation (TV) distance robust set, postulating that the minimal value of the optimal robust value function is zero. We prove that such an assumption effectively eliminates the support shift issue for RMDPs with a TV distance robust set, and present an algorithm with a provable sample complexity guarantee. Our work makes the initial step to uncovering the inherent difficulty of robust RL via interactive data collection and sufficient conditions for designing a sample-efficient algorithm accompanied by sharp sample complexity analysis.", "keywords": "Theory of distributionally robust reinforcement learning;interactive data collection;robust Markov decision process;sample complexity;online regret", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Miao Lu;Han Zhong;Tong Zhang;Jose Blanchet", "authorids": "~Miao_Lu3;~Han_Zhong1;~Tong_Zhang2;~Jose_Blanchet1", "gender": ";;M;M", "homepage": "https://miaolu3.github.io;https://hanzhong-ml.github.io/;http://tongzhang-ml.org;https://web.stanford.edu/~jblanche/", "dblp": "09/1168;137/8096.html;07/4227-1;75/5093.html", "google_scholar": "3jS17zQAAAAJ;Bk5q_pAAAAAJ;LurWtuYAAAAJ;https://scholar.google.co.in/citations?user=O24CcQQAAAAJ", "orcid": ";;0000-0002-5511-2558;", "linkedin": "miao-lu-5bb9a31aa/;;;jose-blanchet", "or_profile": "~Miao_Lu3;~Han_Zhong1;~Tong_Zhang2;~Jose_Blanchet1", "aff": "Stanford University;Peking University;UIUC;Stanford University", "aff_domain": "stanford.edu;stu.pku.edu.cn;illinois.edu;stanford.edu", "position": "PhD student;PhD student;Full Professor;Professor", "bibtex": "@inproceedings{\nlu2024distributionally,\ntitle={Distributionally Robust Reinforcement Learning with Interactive Data Collection: Fundamental Hardness and Near-Optimal Algorithms},\nauthor={Miao Lu and Han Zhong and Tong Zhang and Jose Blanchet},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=aYWtfsf3uP}\n}", "github": "", "reviewers": "BadN;bEix;3BYp;sH9q", "pdf_size": 677764, "rating": "5;6;6;7", "confidence": "3;4;4;3", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "31;128;127;51", "wc_strengths": "78;84;56;34", "wc_weaknesses": "98;83;260;7", "wc_questions": "5;134;56;155", "wc_limitations": "1;2;1;9", "wc_review": "213;431;500;256", "wc_reply_reviewers": "33;14;44;16", "wc_reply_authors": "39;22;210;26", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 84.25, 43.82564888281747 ], "wc_strengths_avg": [ 63.0, 19.72308292331602 ], "wc_weaknesses_avg": [ 112.0, 92.14933532044601 ], "wc_questions_avg": [ 87.5, 60.24325688406961 ], "wc_limitations_avg": [ 3.25, 3.344772040064913 ], "wc_review_avg": [ 350.0, 119.02310700027957 ], "wc_reply_reviewers_avg": [ 26.75, 12.397076268217438 ], "wc_reply_authors_avg": [ 74.25, 78.62688789466361 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3367974566073944393&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 5, "email": "stanford.edu;stu.pku.edu.cn;illinois.edu;stanford.edu", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Stanford University;Peking University;University of Illinois Urbana-Champaign", "aff_unique_dep": ";;", "aff_unique_url": "https://www.stanford.edu;http://www.pku.edu.cn;https://www illinois.edu", "aff_unique_abbr": "Stanford;Peking U;UIUC", "aff_campus_unique_index": "0;2;0", "aff_campus_unique": "Stanford;;Urbana-Champaign", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;China" }, { "title": "Learning World Models for Unconstrained Goal Navigation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94541", "id": "aYqTwcDlCG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=aYqTwcDlCG", "openreview": "https://openreview.net/forum?id=aYqTwcDlCG", "poster": "/media/PosterPDFs/NeurIPS%202024/94541.png?t=1731775834.7112327", "project": "", "author_site": "Yuanlin Duan, Wensen Mao, He Zhu", "tldr": "", "abstract": "Learning world models offers a promising avenue for goal-conditioned reinforcement learning with sparse rewards. By allowing agents to plan actions or exploratory goals without direct interaction with the environment, world models enhance exploration efficiency. The quality of a world model hinges on the richness of data stored in the agent's replay buffer, with expectations of reasonable generalization across the state space surrounding recorded trajectories. However, challenges arise in generalizing learned world models to state transitions backward along recorded trajectories or between states across different trajectories, hindering their ability to accurately model real-world dynamics. To address these challenges, we introduce a novel goal-directed exploration algorithm, MUN (short for \"World Models for Unconstrained Goal Navigation\"). This algorithm is capable of modeling state transitions between arbitrary subgoal states in the replay buffer, thereby facilitating the learning of policies to navigate between any \"key\" states. Experimental results demonstrate that MUN strengthens the reliability of world models and significantly improves the policy's capacity to generalize across new goal settings.", "keywords": "World Models;Reinforcement Learning;Goal-Conditioned Reinforcement Learning;Model-Based Reinforcement Learning;Exploration Strategies", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Yuanlin Duan;Wensen Mao;He Zhu", "authorids": "~Yuanlin_Duan2;~Wensen_Mao2;~He_Zhu4", "gender": "M;M;M", "homepage": "https://achlin512.github.io/;https://github.com/symxmyz233;https://herowanzhu.github.io", "dblp": ";262/1654;59/2802-1", "google_scholar": ";XYKS2JcAAAAJ;3X9GC2gAAAAJ", "orcid": ";;", "linkedin": ";wensen-mao-1b117828b/;", "or_profile": "~Yuanlin_Duan2;~Wensen_Mao2;~He_Zhu4", "aff": "Rutgers University;, Rutgers University;Rutgers University", "aff_domain": "rutgers.edu;cs.rutgers.edu;rutgers.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nduan2024learning,\ntitle={Learning World Models for Unconstrained Goal Navigation},\nauthor={Yuanlin Duan and Wensen Mao and He Zhu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=aYqTwcDlCG}\n}", "github": "", "reviewers": "24DF;BNZB;Rfj2;aZux", "pdf_size": 3222918, "rating": "5;6;6;8", "confidence": "2;4;4;5", "soundness": "2;3;2;4", "novelty": "2;2;2;3", "presentation": "3;3;2;4", "wc_summary": "46;137;103;157", "wc_strengths": "20;28;13;57", "wc_weaknesses": "91;146;586;66", "wc_questions": "177;6;60;84", "wc_limitations": "1;1;36;18", "wc_review": "335;318;798;382", "wc_reply_reviewers": "0;0;130;47", "wc_reply_authors": "0;0;141;385", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;3;2", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 110.75, 42.073596233267246 ], "wc_strengths_avg": [ 29.5, 16.740669042783207 ], "wc_weaknesses_avg": [ 222.25, 211.99572519275006 ], "wc_questions_avg": [ 81.75, 61.82384248815339 ], "wc_limitations_avg": [ 14.0, 14.474114826130128 ], "wc_review_avg": [ 458.25, 197.55046823533473 ], "wc_reply_reviewers_avg": [ 44.25, 53.09602150820719 ], "wc_reply_authors_avg": [ 131.5, 157.2712624734729 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.894736842105263, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:H1o6uBXq_gsJ:scholar.google.com/&scioq=Learning+World+Models+for+Unconstrained+Goal+Navigation&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "rutgers.edu;cs.rutgers.edu;rutgers.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Rutgers University", "aff_unique_dep": "", "aff_unique_url": "https://www.rutgers.edu", "aff_unique_abbr": "Rutgers", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Learning Elastic Costs to Shape Monge Displacements", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94540", "id": "aaUVnpQvbZ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=aaUVnpQvbZ", "openreview": "https://openreview.net/forum?id=aaUVnpQvbZ", "poster": "", "project": "", "author_site": "Michal Klein, Aram-Alexandre Pooladian, Pierre Ablin, Eugene Ndiaye, Jonathan Niles-Weed, Marco Cuturi", "tldr": "", "abstract": "Given a source and a target probability measure, the Monge problem studies efficient ways to map the former onto the latter.\nThis efficiency is quantified by defining a *cost* function between source and target data. \nSuch a cost is often set by default in the machine learning literature to the squared-Euclidean distance, $\\ell^2\\_2(\\mathbf{x},\\mathbf{y}):=\\tfrac12\\|\\mathbf{x}-\\mathbf{y}\\|\\_2^2$.\nThe benefits of using *elastic* costs, defined using a regularizer $\\tau$ as $c(\\mathbf{x},\\mathbf{y}):=\\ell^2_2(\\mathbf{x},\\mathbf{y})+\\tau(\\mathbf{x}-\\mathbf{y})$, was recently highlighted in (Cuturi et al. 2023). Such costs shape the *displacements* of Monge maps $T$, namely the difference between a source point and its image $T(\\mathbf{x})-\\mathbf{x}$, by giving them a structure that matches that of the proximal operator of $\\tau$.\nIn this work, we make two important contributions to the study of elastic costs:*(i)* For any elastic cost, we propose a numerical method to compute Monge maps that are provably optimal. This provides a much-needed routine to create synthetic problems where the ground-truth OT map is known, by analogy to the Brenier theorem, which states that the gradient of any convex potential is always a valid Monge map for the $\\ell_2^2$ cost; *(ii)* We propose a loss to *learn* the parameter $\\theta$ of a parameterized regularizer $\\tau_\\theta$, and apply it in the case where $\\tau_{A}({\\bf z}):=\\|A^\\perp {\\bf z}\\|^2_2$. This regularizer promotes displacements that lie on a low-dimensional subspace of $\\mathbb{R}^d$, spanned by the $p$ rows of $A\\in\\mathbb{R}^{p\\times d}$. We illustrate the soundness of our procedure on synthetic data, generated using our first contribution, in which we show near-perfect recovery of $A$'s subspace using only samples. We demonstrate the applicability of this method by showing predictive improvements on single-cell data tasks.", "keywords": "optimal transport", "primary_area": "optimization", "supplementary_material": "/attachment/f5fab1dfcdcea41b3253f9a1379881611fea6c56.zip", "author": "Michal Klein;Aram-Alexandre Pooladian;Pierre Ablin;Eugene Ndiaye;Jonathan Niles-Weed;marco cuturi", "authorids": "~Michal_Klein1;~Aram-Alexandre_Pooladian2;~Pierre_Ablin2;~Eugene_Ndiaye1;~Jonathan_Niles-Weed1;~marco_cuturi2", "gender": "M;M;M;;;M", "homepage": "https://github.com/michalk8;http://www.arampooladian.com;https://pierreablin.com/;;;http://marcocuturi.net", "dblp": "332/4607;238/0532;174/0980.html;;;85/5102", "google_scholar": "zByzdzcAAAAJ;6CNhsjoAAAAJ;1ZsunaYAAAAJ;;;https://scholar.google.fr/citations?user=kQEydDMAAAAJ", "orcid": "0000-0002-2433-6380;;;;;", "linkedin": "michal-klein-148697165/;;;;;", "or_profile": "~Michal_Klein1;~Aram-Alexandre_Pooladian2;~Pierre_Ablin2;~Eugene_Ndiaye1;~Jonathan_Niles-Weed1;~marco_cuturi2", "aff": "Apple;New York University;Apple;;;Ensae ParisTech", "aff_domain": "apple.com;nyu.edu;apple.com;;;ensae.fr", "position": "Researcher;PhD student;Researcher;;;Full Professor", "bibtex": "@inproceedings{\nklein2024learning,\ntitle={Learning Elastic Costs to Shape Monge Displacements},\nauthor={Michal Klein and Aram-Alexandre Pooladian and Pierre Ablin and Eugene Ndiaye and Jonathan Niles-Weed and marco cuturi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=aaUVnpQvbZ}\n}", "github": "", "reviewers": "JR35;zNJm;EFNS;3gJN", "pdf_size": 2473930, "rating": "4;5;6;7", "confidence": "3;3;2;2", "soundness": "3;3;3;3", "novelty": "2;3;2;3", "presentation": "2;3;3;2", "wc_summary": "100;63;45;289", "wc_strengths": "81;35;29;63", "wc_weaknesses": "166;121;82;34", "wc_questions": "29;144;140;6", "wc_limitations": "8;4;6;4", "wc_review": "384;367;302;396", "wc_reply_reviewers": "16;419;15;19", "wc_reply_authors": "53;887;32;17", "reply_reviewers": "1;2;1;1", "reply_authors": "2;3;2;2", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 2.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 124.25, 97.16320033839972 ], "wc_strengths_avg": [ 52.0, 21.095023109728988 ], "wc_weaknesses_avg": [ 100.75, 48.669163748722866 ], "wc_questions_avg": [ 79.75, 62.794804721409875 ], "wc_limitations_avg": [ 5.5, 1.6583123951777 ], "wc_review_avg": [ 362.25, 36.27929850479472 ], "wc_reply_reviewers_avg": [ 117.25, 174.22166197117969 ], "wc_reply_authors_avg": [ 247.25, 369.58109732506614 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.8944271909999159, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7649087680991759267&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 3, "email": "apple.com;nyu.edu;apple.com;;;ensae.fr", "author_num": 6, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Apple;New York University;ENSAE ParisTech", "aff_unique_dep": "Apple Inc.;;", "aff_unique_url": "https://www.apple.com;https://www.nyu.edu;https://www.ensae.fr", "aff_unique_abbr": "Apple;NYU;Ensae", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "United States;France" }, { "title": "WikiDBs: A Large-Scale Corpus Of Relational Databases From Wikidata", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97622", "id": "abXaOcvujs", "proceeding": "", "pdf": "https://openreview.net/pdf?id=abXaOcvujs", "openreview": "https://openreview.net/forum?id=abXaOcvujs", "poster": "/media/PosterPDFs/NeurIPS%202024/97622.png?t=1732710343.653983", "project": "", "author_site": "Liane Vogel, Jan-Micha Bodensohn, Carsten Binnig", "tldr": "", "abstract": "Deep learning on tabular data, and particularly tabular representation learning, has recently gained growing interest. However, representation learning for relational databases with multiple tables is still an underexplored area, which may be attributed to the lack of openly available resources. To support the development of foundation models for tabular data and relational databases, we introduce WikiDBs, a novel open-source corpus of 100,000 relational databases. Each database consists of multiple tables connected by foreign keys. The corpus is based on Wikidata and aims to follow certain characteristics of real-world databases. In this paper, we describe the dataset and our method for creating it. By making our code publicly available, we enable others to create tailored versions of the dataset, for example, by creating databases in different languages. Finally, we conduct a set of initial experiments to showcase how WikiDBs can be used to train for data engineering tasks, such as missing value imputation and column type annotation.", "keywords": "Dataset;Tabular Data;Relational Databases;Tabular Representation Learning", "primary_area": "", "supplementary_material": "/attachment/5f73c59d86f59a0a28fd04485e512dac59c1f5ea.zip", "author": "Liane Vogel;Jan-Micha Bodensohn;Carsten Binnig", "authorids": "~Liane_Vogel1;~Jan-Micha_Bodensohn1;~Carsten_Binnig1", "gender": "F;;M", "homepage": ";;https://www.informatik.tu-darmstadt.de/datamanagement/datamanagement/index.en.jsp", "dblp": "318/9125;;45/1559.html", "google_scholar": ";;https://scholar.google.com.tw/citations?user=Iqsl_XMAAAAJ", "orcid": "0000-0001-9768-8873;;", "linkedin": ";;", "or_profile": "~Liane_Vogel1;~Jan-Micha_Bodensohn1;~Carsten_Binnig1", "aff": "Technische Universit\u00e4t Darmstadt;;TU Darmstadt", "aff_domain": "tu-darmstadt.de;;tu-darmstadt.de", "position": "PhD student;;Full Professor", "bibtex": "@inproceedings{\nvogel2024wikidbs,\ntitle={Wiki{DB}s: A Large-Scale Corpus Of Relational Databases From Wikidata},\nauthor={Liane Vogel and Jan-Micha Bodensohn and Carsten Binnig},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=abXaOcvujs}\n}", "github": "", "reviewers": "HLok;nHYv;aNts;Gs1a", "pdf_size": 1111317, "rating": "7;8;8;9", "confidence": "5;4;4;5", "wc_summary_and_contributions": "136;59;147;64", "wc_strengths": "37;94;78;21", "wc_improvement": "103;784;209;84", "wc_limitations": "103;179;26;43", "wc_correctness": "24;32;9;10", "wc_clarity": "6;16;19;4", "wc_relation_to_prior_work": "1;83;25;23", "wc_documentation": "1;71;12;7", "wc_additional_feedback": "1;1;1;1", "wc_review": "412;1319;526;257", "wc_reply_reviewers": "95;602;169;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;4;1;0", "reply_authors": "3;6;3;1", "rating_avg": [ 8.0, 0.7071067811865476 ], "confidence_avg": [ 4.5, 0.5 ], "wc_summary_and_contributions_avg": [ 101.5, 40.22747817102136 ], "wc_strengths_avg": [ 57.5, 29.60152023123137 ], "wc_improvement_avg": [ 295.0, 286.3136392140619 ], "wc_limitations_avg": [ 87.75, 59.94737275310737 ], "wc_correctness_avg": [ 18.75, 9.67923034130297 ], "wc_clarity_avg": [ 11.25, 6.378675411086537 ], "wc_relation_to_prior_work_avg": [ 33.0, 30.364452901377952 ], "wc_documentation_avg": [ 22.75, 28.12805538959279 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 628.5, 409.9332262698402 ], "wc_reply_reviewers_avg": [ 216.5, 230.4891537578287 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.5, 1.5 ], "reply_authors_avg": [ 3.25, 1.7853571071357126 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9869556186114786143&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "tu-darmstadt.de;;tu-darmstadt.de", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Technische Universit\u00e4t Darmstadt", "aff_unique_dep": "", "aff_unique_url": "https://www.tu-darmstadt.de", "aff_unique_abbr": "TUD", "aff_campus_unique_index": "1", "aff_campus_unique": ";Darmstadt", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "title": "SARDet-100K: Towards Open-Source Benchmark and ToolKit for Large-Scale SAR Object Detection", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94539", "id": "abuQMKDVkW", "proceeding": "", "pdf": "https://openreview.net/pdf?id=abuQMKDVkW", "openreview": "https://openreview.net/forum?id=abuQMKDVkW", "poster": "/media/PosterPDFs/NeurIPS%202024/94539.png?t=1729931903.2325325", "project": "", "author_site": "Yuxuan Li, Xiang Li, Weijie Li, Qibin Hou, Li Liu, Ming-Ming Cheng, Jian Yang", "tldr": "", "abstract": "Synthetic Aperture Radar (SAR) object detection has gained significant attention recently due to its irreplaceable all-weather imaging capabilities. However, this research field suffers from both limited public datasets (mostly comprising <2K images with only mono-category objects) and inaccessible source code. To tackle these challenges, we establish a new benchmark dataset and an open-source method for large-scale SAR object detection. Our dataset, SARDet-100K, is a result of intense surveying, collecting, and standardizing 10 existing SAR detection datasets, providing a large-scale and diverse dataset for research purposes. To the best of our knowledge, SARDet-100K is the first COCO-level large-scale multi-class SAR object detection dataset ever created. With this high-quality dataset, we conducted comprehensive experiments and uncovered a crucial challenge in SAR object detection: the substantial disparities between the pretraining on RGB datasets and finetuning on SAR datasets in terms of both data domain and model structure. To bridge these gaps, we propose a novel Multi-Stage with Filter Augmentation (MSFA) pretraining framework that tackles the problems from the perspective of data input, domain transition, and model migration. The proposed MSFA method significantly enhances the performance of SAR object detection models while demonstrating exceptional generalizability and flexibility across diverse models. This work aims to pave the way for further advancements in SAR object detection. The dataset and code is available at \\url{https://github.com/zcablii/SARDet_100K}.", "keywords": "Benchmark;Computer Vision;Object Detection;Synthetic Aperture Radar;Pretrain;Domain Transfer", "primary_area": "machine_vision", "supplementary_material": "/attachment/54d006dadd7638c0c188df7c7242f9ec16434482.zip", "author": "Yuxuan Li;Xiang Li;Weijie Li;Qibin Hou;Li Liu;Ming-Ming Cheng;Jian Yang", "authorids": "~Yuxuan_Li4;~Xiang_Li20;~Weijie_Li11;~Qibin_Hou1;~Li_Liu9;~Ming-Ming_Cheng3;~Jian_Yang1", "gender": "M;M;;M;F;M;M", "homepage": "https://zcablii.github.io/;http://implus.github.io/;https://www.researchgate.net/profile/Weijie_Li23;https://houqb.github.io/;http://lilyliliu.com/Default.aspx;https://mmcheng.net;", "dblp": ";40/1491-41;;40/4112;33/4528-2.html;45/7592;y/JianYang3.html", "google_scholar": "vKnUqmMAAAAJ;oamjJdYAAAAJ;;fF8OFV8AAAAJ;https://scholar.google.com.au/citations?user=9cMQrVsAAAAJ;huWpVyEAAAAJ;https://scholar.google.com.hk/citations?user=6CIDtZQAAAAJ", "orcid": ";;;;0000-0002-2011-2873;0000-0001-5550-8758;", "linkedin": ";;;;;;", "or_profile": "~Yuxuan_Li4;~Xiang_Li20;~Weijie_Li11;~Qibin_Hou1;~Li_Liu9;~Ming-Ming_Cheng3;~Jian_Yang1", "aff": "Nankai University;Nankai University;National University of Defense Technology;Nankai University;National University of Defense Technology;Nankai University;Nanjing University of Science and Technology", "aff_domain": "nankai.edu.cn;nankai.edu.cn;nudt.edu.cn;nankai.edu.cn;nudt.edu.cn;nankai.edu.cn;njust.edu.cn", "position": "PhD student;Associate Professor;PhD student;Associate Professor;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nli2024sardetk,\ntitle={{SARD}et-100K: Towards Open-Source Benchmark and ToolKit for Large-Scale {SAR} Object Detection},\nauthor={Yuxuan Li and Xiang Li and Weijie Li and Qibin Hou and Li Liu and Ming-Ming Cheng and Jian Yang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=abuQMKDVkW}\n}", "github": "", "reviewers": "qv3Q;paPp;qrry;1iNV", "pdf_size": 2547542, "rating": "7;7;7;8", "confidence": "5;5;5;5", "soundness": "3;3;3;4", "novelty": "3;3;4;4", "presentation": "3;3;3;4", "wc_summary": "47;57;54;66", "wc_strengths": "108;74;105;131", "wc_weaknesses": "81;140;69;83", "wc_questions": "27;18;1;2", "wc_limitations": "9;1;4;4", "wc_review": "272;290;233;286", "wc_reply_reviewers": "0;17;15;14", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 5.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 56.0, 6.819090848492928 ], "wc_strengths_avg": [ 104.5, 20.279299790673246 ], "wc_weaknesses_avg": [ 93.25, 27.517040175135115 ], "wc_questions_avg": [ 12.0, 10.977249200050075 ], "wc_limitations_avg": [ 4.5, 2.8722813232690143 ], "wc_review_avg": [ 270.25, 22.520823697191894 ], "wc_reply_reviewers_avg": [ 11.5, 6.726812023536855 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 43, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11647439899842754072&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "nankai.edu.cn;nankai.edu.cn;nudt.edu.cn;nankai.edu.cn;nudt.edu.cn;nankai.edu.cn;njust.edu.cn", "author_num": 7, "aff_unique_index": "0;0;1;0;1;0;2", "aff_unique_norm": "Nankai University;National University of Defense Technology;Nanjing University of Science and Technology", "aff_unique_dep": ";;", "aff_unique_url": "http://www.nankai.edu.cn;http://www.nudt.edu.cn/;http://www.nust.edu.cn/", "aff_unique_abbr": "NKU;NUDT;NUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Symmetry-Informed Governing Equation Discovery", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94538", "id": "aeGSA8UoXF", "proceeding": "", "pdf": "https://openreview.net/pdf?id=aeGSA8UoXF", "openreview": "https://openreview.net/forum?id=aeGSA8UoXF", "poster": "", "project": "", "author_site": "Jianke Yang, Wang Rao, Nima Dehmamy, Robin Walters, Rose Yu", "tldr": "", "abstract": "Despite the advancements in learning governing differential equations from observations of dynamical systems, data-driven methods are often unaware of fundamental physical laws, such as frame invariance. As a result, these algorithms may search an unnecessarily large space and discover less accurate or overly complex equations. In this paper, we propose to leverage symmetry in automated equation discovery to compress the equation search space and improve the accuracy and simplicity of the learned equations. Specifically, we derive equivariance constraints from the time-independent symmetries of ODEs. Depending on the types of symmetries, we develop a pipeline for incorporating symmetry constraints into various equation discovery algorithms, including sparse regression and genetic programming. In experiments across diverse dynamical systems, our approach demonstrates better robustness against noise and recovers governing equations with significantly higher probability than baselines without symmetry.", "keywords": "Equation Discovery;Dynamical Systems;Symmetry;Equivariant Neural Networks;Symbolic Regression;Lie Group;Ordinary Differential Equation;Lie Point Symmetry", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "/attachment/14d1f48a8f0bf600e737f987a9b793f864776586.zip", "author": "Jianke Yang;Wang Rao;Nima Dehmamy;Robin Walters;Rose Yu", "authorids": "~Jianke_Yang2;~Wang_Rao1;~Nima_Dehmamy1;~Robin_Walters1;~Rose_Yu1", "gender": ";M;M;M;F", "homepage": "https://jiankeyang.github.io;https://github.com;;http://www.robinwalters.com;http://roseyu.com", "dblp": "50/2341;;198/1338;258/3416;164/7314", "google_scholar": "https://scholar.google.com/citations?hl=en;;gvHpUtgAAAAJ;fnprJmUAAAAJ;", "orcid": ";;0000-0003-1617-5502;;", "linkedin": ";;nima-dehmamy-57770a4a/;;", "or_profile": "~Jianke_Yang2;~Wang_Rao1;~Nima_Dehmamy1;~Robin_Walters1;~Rose_Yu1", "aff": "University of California, San Diego;Tsinghua University;International Business Machines;Northeastern University ;University of California, San Diego", "aff_domain": "ucsd.edu;tsinghua.edu.cn;ibm.com;northeastern.edu;ucsd.edu", "position": "PhD student;Undergrad student;Researcher;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nyang2024symmetryinformed,\ntitle={Symmetry-Informed Governing Equation Discovery},\nauthor={Jianke Yang and Wang Rao and Nima Dehmamy and Robin Walters and Rose Yu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=aeGSA8UoXF}\n}", "github": "", "reviewers": "zT4E;jBog;CnKF;HCHE;wQw7", "pdf_size": 4219738, "rating": "5;6;7;7;7", "confidence": "3;3;2;3;3", "soundness": "3;3;3;3;3", "novelty": "2;3;3;3;3", "presentation": "3;3;2;4;3", "wc_summary": "144;111;34;139;53", "wc_strengths": "114;40;73;79;91", "wc_weaknesses": "115;134;216;173;110", "wc_questions": "61;25;351;99;61", "wc_limitations": "34;18;63;30;1", "wc_review": "468;328;737;520;316", "wc_reply_reviewers": "119;13;14;22;33", "wc_reply_authors": "93;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;1;1;1;1", "rating_avg": [ 6.4, 0.7999999999999999 ], "confidence_avg": [ 2.8, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 96.2, 44.879393935301756 ], "wc_strengths_avg": [ 79.4, 24.187600128991715 ], "wc_weaknesses_avg": [ 149.6, 39.91290518115664 ], "wc_questions_avg": [ 119.4, 118.14160994332182 ], "wc_limitations_avg": [ 29.2, 20.42939059296679 ], "wc_review_avg": [ 473.8, 153.36283774109032 ], "wc_reply_reviewers_avg": [ 40.2, 40.04697241989711 ], "wc_reply_authors_avg": [ 18.6, 37.2 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.37500000000000006, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14765429334819285742&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "ucsd.edu;tsinghua.edu.cn;ibm.com;northeastern.edu;ucsd.edu", "author_num": 5, "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "University of California, San Diego;Tsinghua University;International Business Machines Corporation;Northeastern University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.ucsd.edu;https://www.tsinghua.edu.cn;https://www.ibm.com;https://www.northeastern.edu", "aff_unique_abbr": "UCSD;THU;IBM;NEU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "San Diego;", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "United States;China" }, { "title": "Cell ontology guided transcriptome foundation model", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94537", "id": "aeYNVtTo7o", "proceeding": "", "pdf": "https://openreview.net/pdf?id=aeYNVtTo7o", "openreview": "https://openreview.net/forum?id=aeYNVtTo7o", "poster": "", "project": "", "author_site": "XINYU YUAN, Zhihao Zhan, Zuobai Zhang, Manqi Zhou, Jianan Zhao, Boyu Han, Yue Li, Jian Tang", "tldr": "", "abstract": "Transcriptome foundation models (TFMs) hold great promises of deciphering the transcriptomic language that dictate diverse cell functions by self-supervised learning on large-scale single-cell gene expression data, and ultimately unraveling the complex mechanisms of human diseases. However, current TFMs treat cells as independent samples and ignore the taxonomic relationships between cell types, which are available in cell ontology graphs. We argue that effectively leveraging this ontology information during the TFM pre-training can improve learning biologically meaningful gene co-expression patterns while preserving TFM as a general purpose foundation model for downstream zero-shot and fine-tuning tasks. To this end, we present **s**ingle **c**ell, **Cell**-**o**ntology guided TFM (scCello). We introduce cell-type coherence loss and ontology alignment loss, which are minimized along with the masked gene expression prediction loss during the pre-training. The novel loss component guide scCello to learn the cell-type-specific representation and the structural relation between cell types from the cell ontology graph, respectively. We pre-trained scCello on 22 million cells from CellxGene database leveraging their cell-type labels mapped to the cell ontology graph from Open Biological and Biomedical Ontology Foundry. Our TFM demonstrates competitive generalization and transferability performance over the existing TFMs on biologically important tasks including identifying novel cell types of unseen cells, prediction of cell-type-specific marker genes, and cancer drug responses. Source code and model\nweights are available at https://github.com/DeepGraphLearning/scCello.", "keywords": "Cell ontology graph+transcriptome foundation model+large-scale pre-training+cell representation learning+single cell RNA sequencing data", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "", "author": "Xinyu Yuan;Zhihao Zhan;Zuobai Zhang;Manqi Zhou;Jianan Zhao;Boyu Han;Yue Li;Jian Tang", "authorids": "~Xinyu_Yuan2;~Zhihao_Zhan1;~Zuobai_Zhang1;~Manqi_Zhou1;~Jianan_Zhao2;~Boyu_Han2;~Yue_Li15;~Jian_Tang1", "gender": "F;M;M;F;M;M;M;", "homepage": "https://github.com/KatarinaYuan/;;https://oxer11.github.io/;;https://andyjzhao.github.io/;;https://www.cs.mcgill.ca/~yueli/;http://www.jian-tang.com", "dblp": ";;256/9098.html;;135/9355-2;;;181/2667-5", "google_scholar": ";;UCDbNccAAAAJ;;https://scholar.google.com/citations?view_op=new_articles;;yJgWSl0AAAAJ;https://scholar.google.ca/citations?user=1ir6WUEAAAAJ", "orcid": ";;;0000-0001-6238-3228;0000-0002-9743-7588;;0000-0003-3844-4865;", "linkedin": ";%E8%87%B4%E8%B1%AA-%E8%A9%B9-648427245/;;;;https://www.linkedin.com/feed/;yuelicb/;", "or_profile": "~Xinyu_Yuan2;~Zhihao_Zhan1;~Zuobai_Zhang1;~Manqi_Zhou1;~Jianan_Zhao2;~Boyu_Han2;~Yue_Li15;~Jian_Tang1", "aff": "Montreal Institute for Learning Algorithms, University of Montreal, Universit\u00e9 de Montr\u00e9al;Universit\u00e9 de Montr\u00e9al;NVIDIA;Cornell University;Universit\u00e9 de Montr\u00e9al;Stanford University;McGill University;Mila, HEC Montreal", "aff_domain": "mila.umontreal.ca;umontreal.ca;nvidia.com;cornell.edu;umontreal.ca;stanford.edu;cs.mcgill.ca;hec.ca", "position": "PhD student;PhD student;Intern;PhD student;PhD student;MS student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nyuan2024cell,\ntitle={Cell ontology guided transcriptome foundation model},\nauthor={Xinyu Yuan and Zhihao Zhan and Zuobai Zhang and Manqi Zhou and Jianan Zhao and Boyu Han and Yue Li and Jian Tang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=aeYNVtTo7o}\n}", "github": "", "reviewers": "bUQe;yzGS;hRwa;E5ju;BhVy", "pdf_size": 3166682, "rating": "5;6;7;7;7", "confidence": "4;4;4;3;4", "soundness": "3;3;4;3;4", "novelty": "2;2;3;3;3", "presentation": "3;3;4;3;4", "wc_summary": "89;171;115;115;139", "wc_strengths": "155;76;225;53;210", "wc_weaknesses": "150;116;79;103;103", "wc_questions": "64;63;130;43;142", "wc_limitations": "84;1;27;6;10", "wc_review": "542;427;576;320;604", "wc_reply_reviewers": "24;69;40;13;112", "wc_reply_authors": "32;32;30;44;146", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 6.4, 0.7999999999999999 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 125.8, 27.585503439306667 ], "wc_strengths_avg": [ 143.8, 69.1994219629037 ], "wc_weaknesses_avg": [ 110.2, 23.21551205552012 ], "wc_questions_avg": [ 88.4, 39.76229369641545 ], "wc_limitations_avg": [ 25.6, 30.480157479908137 ], "wc_review_avg": [ 493.8, 105.72870944071909 ], "wc_reply_reviewers_avg": [ 51.6, 35.6011235777749 ], "wc_reply_authors_avg": [ 56.8, 44.874937325861524 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.375, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4309957036506458551&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "mila.umontreal.ca;umontreal.ca;nvidia.com;cornell.edu;umontreal.ca;stanford.edu;cs.mcgill.ca;hec.ca", "author_num": 8, "aff_unique_index": "0;1;2;3;1;4;5;6", "aff_unique_norm": "University of Montreal;Universit\u00e9 de Montr\u00e9al;NVIDIA;Cornell University;Stanford University;McGill University;HEC Montreal", "aff_unique_dep": "Montreal Institute for Learning Algorithms;;NVIDIA Corporation;;;;HEC Business School", "aff_unique_url": "https://www.mila.quebec;https://www.umontreal.ca;https://www.nvidia.com;https://www.cornell.edu;https://www.stanford.edu;https://www.mcgill.ca;https://www.hec.ca", "aff_unique_abbr": "MILA;UdeM;NVIDIA;Cornell;Stanford;McGill;HEC", "aff_campus_unique_index": "0;2;0", "aff_campus_unique": "Montreal;;Stanford", "aff_country_unique_index": "0;0;1;1;0;1;0;0", "aff_country_unique": "Canada;United States" }, { "title": "PINNacle: A Comprehensive Benchmark of Physics-Informed Neural Networks for Solving PDEs", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97621", "id": "aekfb95slj", "proceeding": "", "pdf": "https://openreview.net/pdf?id=aekfb95slj", "openreview": "https://openreview.net/forum?id=aekfb95slj", "poster": "/media/PosterPDFs/NeurIPS%202024/97621.png?t=1730393899.8865557", "project": "", "author_site": "Hao Zhongkai, Jiachen Yao, Chang Su, Hang Su, Ziao Wang, Fanzhi Lu, Zeyu Xia, Yichi Zhang, Songming Liu, Lu Lu, Jun Zhu", "tldr": "", "abstract": "While significant progress has been made on Physics-Informed Neural Networks (PINNs), a comprehensive comparison of these methods across a wide range of Partial Differential Equations (PDEs) is still lacking. This study introduces PINNacle, a benchmarking tool designed to fill this gap. PINNacle provides a diverse dataset, comprising over 20 distinct PDEs from various domains, including heat conduction, fluid dynamics, biology, and electromagnetics. These PDEs encapsulate key challenges inherent to real-world problems, such as complex geometry, multi-scale phenomena, nonlinearity, and high dimensionality. PINNacle also offers a user-friendly toolbox, incorporating about 10 state-of-the-art PINN methods for systematic evaluation and comparison. We have conducted extensive experiments with these methods, offering insights into their strengths and weaknesses. In addition to providing a standardized means of assessing performance, PINNacle also offers an in-depth analysis to guide future research, particularly in areas such as domain decomposition methods and loss reweighting for handling multi-scale problems and complex geometry. To the best of our knowledge, it is the largest benchmark with a diverse and comprehensive evaluation that will undoubtedly foster further research in PINNs.", "keywords": "PINN;physics-informed machine learning;scientific machine learning", "primary_area": "", "supplementary_material": "/attachment/41bf5c0361c90573f2d1776d924bccb1d64cb9a6.zip", "author": "Zhongkai Hao;Jiachen Yao;Chang Su;Hang Su;Ziao Wang;Fanzhi Lu;Zeyu Xia;Yichi Zhang;Songming Liu;Lu Lu;Jun Zhu", "authorids": "~Zhongkai_Hao1;~Jiachen_Yao3;~Chang_Su7;~Hang_Su3;~Ziao_Wang2;~Fanzhi_Lu1;~Zeyu_Xia4;~Yichi_Zhang4;~Songming_Liu1;~Lu_Lu1;~Jun_Zhu2", "gender": "M;M;M;F;M;M;M;M;M;M;M", "homepage": "https://jiachenyao.com/;https://github.com/EdwardIX;https://wangziao9.github.io/;https://github.com/FortuniaL;;https://zycheiheihei.github.io;;https://lu.seas.upenn.edu;http://ml.cs.tsinghua.edu.cn/~jun;;https://haozhongkai.github.io/", "dblp": "213/4920;;;;;;285/4585;01/2086-10;50/2644-1;26/5371-6;270/0220.html", "google_scholar": "Z_bCoGcAAAAJ;;;;https://scholar.google.com/citations?hl=en;HzgDakoAAAAJ;6urFg8kAAAAJ;wD_wsWUAAAAJ;axsP38wAAAAJ;dxN1_X0AAAAJ;dfSzq27ZiVoC", "orcid": "0000-0001-7655-7831;;;;;0000-0002-1894-3977;;0000-0002-5476-5768;;;", "linkedin": "jiachen-y-05a05932a/;;;;;;%E6%9D%BE%E9%93%AD-%E5%88%98-7b8339254/;;;;", "or_profile": "~Jiachen_Yao3;~Chang_Su7;~Ziao_Wang2;~Fanzhi_Lu1;~Zeyu_Xia4;~Yichi_Zhang4;~Songming_Liu1;~Lu_Lu1;~Jun_Zhu2;~Hang_Su2;~Hao_Zhongkai1", "aff": "Tsinghua University;Tsinghua University;, University of California, Santa Barbara;Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University;Yale University;Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;cs.ucsb.edu;tsinghua.edu.cn;mail.tsinghua.edu.cn;tsinghua.edu.cn;mails.tsinghua.edu.cn;yale.edu;mail.tsinghua.edu.cn;tsinghua.edu.cn;mails.tsinghua.edu.cn", "position": "Undergrad student;Undergrad student;MS student;Undergrad student;Undergrad student;PhD student;PhD student;Assistant Professor;Professor;Associate Professor;PhD student", "bibtex": "@inproceedings{\nhao2024pinnacle,\ntitle={{PINN}acle: A Comprehensive Benchmark of Physics-Informed Neural Networks for Solving {PDE}s},\nauthor={Zhongkai Hao and Jiachen Yao and Chang Su and Hang Su and Ziao Wang and Fanzhi Lu and Zeyu Xia and Yichi Zhang and Songming Liu and Lu Lu and Jun Zhu},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=aekfb95slj}\n}", "github": "", "reviewers": "unji;dS4W;ffbg;uVbz", "pdf_size": 30789431, "rating": "6;6;6;7", "confidence": "4;3;4;3", "wc_summary_and_contributions": "35;68;87;22", "wc_strengths": "36;104;31;29", "wc_improvement": "228;181;167;103", "wc_limitations": "1;6;10;5", "wc_correctness": "1;33;7;1", "wc_clarity": "1;6;39;1", "wc_relation_to_prior_work": "1;25;23;1", "wc_documentation": "1;9;59;4", "wc_additional_feedback": "1;1;1;1", "wc_review": "305;433;424;167", "wc_reply_reviewers": "0;62;59;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;0", "reply_authors": "2;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "wc_summary_and_contributions_avg": [ 53.0, 25.816661286851172 ], "wc_strengths_avg": [ 50.0, 31.280984639234106 ], "wc_improvement_avg": [ 169.75, 44.67311831515682 ], "wc_limitations_avg": [ 5.5, 3.2015621187164243 ], "wc_correctness_avg": [ 10.5, 13.219304066402286 ], "wc_clarity_avg": [ 11.75, 15.864661988205107 ], "wc_relation_to_prior_work_avg": [ 12.5, 11.521718621802913 ], "wc_documentation_avg": [ 18.25, 23.699947257325277 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 332.25, 107.95687796523202 ], "wc_reply_reviewers_avg": [ 30.25, 30.26858932953434 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 43, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12178264276662821414&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "tsinghua.edu.cn;tsinghua.edu.cn;cs.ucsb.edu;tsinghua.edu.cn;mail.tsinghua.edu.cn;tsinghua.edu.cn;mails.tsinghua.edu.cn;yale.edu;mail.tsinghua.edu.cn;tsinghua.edu.cn;mails.tsinghua.edu.cn", "author_num": 11, "aff_unique_index": "0;0;1;0;0;0;0;2;0;0;0", "aff_unique_norm": "Tsinghua University;University of California, Santa Barbara;Yale University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.ucsb.edu;https://www.yale.edu", "aff_unique_abbr": "THU;UCSB;Yale", "aff_campus_unique_index": "1", "aff_campus_unique": ";Santa Barbara", "aff_country_unique_index": "0;0;1;0;0;0;0;1;0;0;0", "aff_country_unique": "China;United States" }, { "title": "Debiasing Synthetic Data Generated by Deep Generative Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94536", "id": "aetbfmCcwg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=aetbfmCcwg", "openreview": "https://openreview.net/forum?id=aetbfmCcwg", "poster": "/media/PosterPDFs/NeurIPS%202024/94536.png?t=1731665677.6595304", "project": "", "author_site": "Alexander Decruyenaere, Heidelinde Dehaene, Paloma Rabaey, Johan Decruyenaere, Christiaan Polet, Thomas Demeester, Stijn Vansteelandt", "tldr": "", "abstract": "While synthetic data hold great promise for privacy protection, their statistical analysis poses significant challenges that necessitate innovative solutions. The use of deep generative models (DGMs) for synthetic data generation is known to induce considerable bias and imprecision into synthetic data analyses, compromising their inferential utility as opposed to original data analyses. This bias and uncertainty can be substantial enough to impede statistical convergence rates, even in seemingly straightforward analyses like mean calculation. The standard errors of such estimators then exhibit slower shrinkage with sample size than the typical 1 over root-$n$ rate. This complicates fundamental calculations like p-values and confidence intervals, with no straightforward remedy currently available. In response to these challenges, we propose a new strategy that targets synthetic data created by DGMs for specific data analyses. Drawing insights from debiased and targeted machine learning, our approach accounts for biases, enhances convergence rates, and facilitates the calculation of estimators with easily approximated large sample variances. We exemplify our proposal through a simulation study on toy data and two case studies on real-world data, highlighting the importance of tailoring DGMs for targeted data analysis. This debiasing strategy contributes to advancing the reliability and applicability of synthetic data in statistical inference.", "keywords": "deep generative model;efficient influence function;inferential utility;synthetic data", "primary_area": "generative_models", "supplementary_material": "", "author": "Alexander Decruyenaere;Heidelinde Dehaene;Paloma Rabaey;Johan Decruyenaere;Christiaan Polet;Thomas Demeester;Stijn Vansteelandt", "authorids": "~Alexander_Decruyenaere1;~Heidelinde_Dehaene1;~Paloma_Rabaey1;~Johan_Decruyenaere1;~Christiaan_Polet1;~Thomas_Demeester1;~Stijn_Vansteelandt1", "gender": "M;F;F;M;M;M;", "homepage": ";;https://prabaey.github.io/;http://www.syndara.ai;;;", "dblp": ";;;;;;", "google_scholar": ";;0gTH1bUAAAAJ;;;;734J_XAAAAAJ", "orcid": ";0000-0002-0172-3281;0000-0001-6064-0788;;;0000-0002-9901-5768;", "linkedin": "alexander-decruyenaere/;heidelinde-dehaene-5065b4253;;;christiaan-polet-b95a4242/;;", "or_profile": "~Alexander_Decruyenaere1;~Heidelinde_Dehaene1;~Paloma_Rabaey1;~Johan_Decruyenaere1;~Christiaan_Polet1;~Thomas_Demeester1;~Stijn_Vansteelandt1", "aff": "Universiteit Gent;Universiteit Gent;Ghent University;Universiteit Gent;;Ghent University - imec;Universiteit Gent", "aff_domain": "ugent.be;ugent.be;ugent.be;ugent.be;;ugent.be;ugent.be", "position": "PhD student;Postdoc;PhD student;Full Professor;;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\ndecruyenaere2024debiasing,\ntitle={Debiasing Synthetic Data Generated by Deep Generative Models},\nauthor={Alexander Decruyenaere and Heidelinde Dehaene and Paloma Rabaey and Johan Decruyenaere and Christiaan Polet and Thomas Demeester and Stijn Vansteelandt},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=aetbfmCcwg}\n}", "github": "", "reviewers": "LYaP;eYfd;ZgH6;4cJP", "pdf_size": 1743582, "rating": "4;5;7;7", "confidence": "1;4;3;1", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "1;3;3;3", "wc_summary": "20;84;84;81", "wc_strengths": "47;91;197;198", "wc_weaknesses": "118;52;126;122", "wc_questions": "2;80;99;160", "wc_limitations": "1;1;22;109", "wc_review": "188;308;528;670", "wc_reply_reviewers": "0;34;12;16", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 2.25, 1.299038105676658 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 67.25, 27.307279249313726 ], "wc_strengths_avg": [ 133.25, 66.10739368633436 ], "wc_weaknesses_avg": [ 104.5, 30.44256887977754 ], "wc_questions_avg": [ 85.25, 56.42417478350924 ], "wc_limitations_avg": [ 33.25, 44.56666355023674 ], "wc_review_avg": [ 423.5, 187.40530942318577 ], "wc_reply_reviewers_avg": [ 15.5, 12.196310917650468 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.037037037037037035, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9160697765585667656&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 6, "email": "ugent.be;ugent.be;ugent.be;ugent.be;;ugent.be;ugent.be", "author_num": 7, "aff_unique_index": "0;0;1;0;1;0", "aff_unique_norm": "University of Ghent;Ghent University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ugent.be/en;https://www.ugent.be/en", "aff_unique_abbr": "UGent;UGent", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "Belgium" }, { "title": "Incorporating Surrogate Gradient Norm to Improve Offline Optimization Techniques", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94535", "id": "ag7piyoyut", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ag7piyoyut", "openreview": "https://openreview.net/forum?id=ag7piyoyut", "poster": "", "project": "", "author_site": "Cuong Dao, Phi Le Nguyen, Truong Thao Nguyen, Nghia Hoang", "tldr": "", "abstract": "Offline optimization has recently emerged as an increasingly popular approach to mitigate the prohibitively expensive cost of online experimentation. The key idea is to learn a surrogate of the black-box function that underlines the target experiment using a static (offline) dataset of its previous input-output queries. Such an approach is, however, fraught with an out-of-distribution issue where the learned surrogate becomes inaccurate outside the offline data regimes. To mitigate this, existing offline optimizers have proposed numerous conditioning techniques to prevent the learned surrogate from being too erratic. Nonetheless, such conditioning strategies are often specific to particular surrogate or search models, which might not generalize to a different model choice. This motivates us to develop a model-agnostic approach instead, which incorporates a notion of model sharpness into the training loss of the surrogate as a regularizer. Our approach is supported by a new theoretical analysis demonstrating that reducing surrogate sharpness on the offline dataset provably reduces its generalized sharpness on unseen data. Our analysis extends existing theories from bounding generalized prediction loss (on unseen data) with loss sharpness to bounding the worst-case generalized surrogate sharpness with its empirical estimate on training data, providing a new perspective on sharpness regularization. Our extensive experimentation on a diverse range of optimization tasks also shows that reducing surrogate sharpness often leads to significant improvement, marking (up to) a noticeable 9.6% performance boost. Our code is publicly available at https://github.com/cuong-dm/IGNITE.", "keywords": "Black-Box Optimization", "primary_area": "other", "supplementary_material": "/attachment/f4551b2b58b30883734d6e8963e4953fd020d732.zip", "author": "Manh Cuong Dao;Phi Le Nguyen;Thao Nguyen Truong;Trong Nghia Hoang", "authorids": "~Manh_Cuong_Dao1;~Phi_Le_Nguyen2;~Thao_Nguyen_Truong1;~Trong_Nghia_Hoang1", "gender": "M;F;M;M", "homepage": ";https://users.soict.hust.edu.vn/lenp/;https://researchmap.jp/nguyentt;https://htnghia87.github.io/", "dblp": ";147/2816;233/1462.html;62/540", "google_scholar": "ksYAJugAAAAJ;L_NKoQwAAAAJ;;E-kZZeQAAAAJ", "orcid": ";;0000-0003-3641-374X;", "linkedin": "cuong-dao-9501a2223/;;;", "or_profile": "~Manh_Cuong_Dao1;~Phi_Le_Nguyen2;~Thao_Nguyen_Truong1;~Nghia_Hoang2", "aff": "Hanoi University of Science and Technology;Hanoi University of Science and Technology;AIST, National Institute of Advanced Industrial Science and Technology;Washington State University", "aff_domain": "hust.edu.vn;hust.edu.vn;aist.go.jp;eecs.wsu.edu", "position": "MS student;Associate Professor;Researcher;Assistant Professor", "bibtex": "@inproceedings{\ndao2024incorporating,\ntitle={Incorporating Surrogate Gradient Norm to Improve Offline Optimization Techniques},\nauthor={Manh Cuong Dao and Phi Le Nguyen and Thao Nguyen Truong and Trong Nghia Hoang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ag7piyoyut}\n}", "github": "", "reviewers": "kVLN;CQYH;xjU2;WGWE;qc6B;qZmG;tYoe", "pdf_size": 1109195, "rating": "5;5;5;6;7;7;7", "confidence": "3;3;4;3;3;4;4", "soundness": "3;2;2;3;3;4;3", "novelty": "3;2;2;2;3;3;3", "presentation": "3;3;2;4;3;3;3", "wc_summary": "35;55;85;71;55;98;71", "wc_strengths": "73;23;32;27;116;85;72", "wc_weaknesses": "56;50;374;104;134;154;140", "wc_questions": "2;93;6;52;2;5;5", "wc_limitations": "1;6;5;12;4;9;5", "wc_review": "167;227;502;266;311;351;293", "wc_reply_reviewers": "21;14;22;9;25;0;13", "wc_reply_authors": "24;125;40;20;23;0;20", "reply_reviewers": "1;1;1;1;1;0;1", "reply_authors": "2;4;2;2;2;1;2", "rating_avg": [ 6.0, 0.9258200997725514 ], "confidence_avg": [ 3.4285714285714284, 0.4948716593053935 ], "soundness_avg": [ 2.857142857142857, 0.6388765649999399 ], "novelty_avg": [ 2.5714285714285716, 0.49487165930539345 ], "presentation_avg": [ 3.0, 0.5345224838248488 ], "wc_summary_avg": [ 67.14285714285714, 19.379138794285037 ], "wc_strengths_avg": [ 61.142857142857146, 32.308462276838206 ], "wc_weaknesses_avg": [ 144.57142857142858, 100.91985100331158 ], "wc_questions_avg": [ 23.571428571428573, 32.858385069685866 ], "wc_limitations_avg": [ 6.0, 3.295017884191656 ], "wc_review_avg": [ 302.42857142857144, 98.34902446005704 ], "wc_reply_reviewers_avg": [ 14.857142857142858, 8.02546966010751 ], "wc_reply_authors_avg": [ 36.0, 37.905898523279234 ], "reply_reviewers_avg": [ 0.8571428571428571, 0.3499271061118826 ], "reply_authors_avg": [ 2.142857142857143, 0.8329931278350428 ], "replies_avg": [ 32, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.31180478223116176, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:B2Yda1Q_Q3AJ:scholar.google.com/&scioq=Incorporating+Surrogate+Gradient+Norm+to+Improve+Offline+Optimization+Techniques&hl=en&as_sdt=0,44", "gs_version_total": 2, "email": "hust.edu.vn;hust.edu.vn;aist.go.jp;eecs.wsu.edu", "author_num": 4, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Hanoi University of Science and Technology;National Institute of Advanced Industrial Science and Technology;Washington State University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.hust.edu.vn;https://www.aist.go.jp;https://wsu.edu", "aff_unique_abbr": "HUST;AIST;WSU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hanoi;", "aff_country_unique_index": "0;0;1;2", "aff_country_unique": "Vietnam;Japan;United States" }, { "title": "Zipper: Addressing Degeneracy in Algorithm-Agnostic Inference", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94534", "id": "ahvOhPkkMx", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ahvOhPkkMx", "openreview": "https://openreview.net/forum?id=ahvOhPkkMx", "poster": "/media/PosterPDFs/NeurIPS%202024/94534.png?t=1731398067.6154683", "project": "", "author_site": "Geng Chen, Yinxu Jia, Guanghui Wang, Changliang Zou", "tldr": "", "abstract": "The widespread use of black box prediction methods has sparked an increasing interest in algorithm/model-agnostic approaches for quantifying goodness-of-fit, with direct ties to specification testing, model selection and variable importance assessment. A commonly used framework involves defining a predictiveness criterion, applying a cross-fitting procedure to estimate the predictiveness, and utilizing the difference in estimated predictiveness between two models as the test statistic. However, even after standardization, the test statistic typically fails to converge to a non-degenerate distribution under the null hypothesis of equal goodness, leading to what is known as the degeneracy issue. To addresses this degeneracy issue, we present a simple yet effective device, Zipper. It draws inspiration from the strategy of additional splitting of testing data, but encourages an overlap between two testing data splits in predictiveness evaluation. Zipper binds together the two overlapping splits using a slider parameter that controls the proportion of overlap. Our proposed test statistic follows an asymptotically normal distribution under the null hypothesis for any fixed slider value, guaranteeing valid size control while enhancing power by effective data reuse. Finite-sample experiments demonstrate that our procedure, with a simple choice of the slider, works well across a wide range of settings.", "keywords": "Asymptotic normality;Cross-fitting;Goodness-of-fit testing;Model-free;Variable importance.", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Geng Chen;Yinxu Jia;Guanghui Wang;Changliang Zou", "authorids": "~Geng_Chen7;~Yinxu_Jia1;~Guanghui_Wang6;~Changliang_Zou2", "gender": "M;;M;M", "homepage": "https://stat.nankai.edu.cn/8056/list.htm;;https://ghwang.netlify.app;http://web.stat.nankai.edu.cn/chlzou/", "dblp": ";;44/2323-11;", "google_scholar": ";;NvF147AAAAAJ;LPwSdmwAAAAJ", "orcid": ";0000-0003-1880-2901;;", "linkedin": ";;;", "or_profile": "~Geng_Chen7;~Yinxu_Jia1;~Guanghui_Wang6;~Changliang_Zou2", "aff": "Nankai University;Nankai University;East China Normal University;Nankai University", "aff_domain": "nankai.edu.cn;nankai.edu.cn;ecnu.edu.cn;nankai.edu.cn", "position": "PhD student;PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nchen2024zipper,\ntitle={Zipper: Addressing Degeneracy in Algorithm-Agnostic Inference},\nauthor={Geng Chen and Yinxu Jia and Guanghui Wang and Changliang Zou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ahvOhPkkMx}\n}", "github": "", "reviewers": "DjJu;2sMb;aa86", "pdf_size": 1529085, "rating": "6;7;8", "confidence": "2;3;4", "soundness": "3;3;4", "novelty": "2;3;4", "presentation": "2;4;4", "wc_summary": "98;90;250", "wc_strengths": "29;40;77", "wc_weaknesses": "41;332;10", "wc_questions": "1;158;10", "wc_limitations": "3;84;1", "wc_review": "172;704;348", "wc_reply_reviewers": "24;73;0", "wc_reply_authors": "0;589;0", "reply_reviewers": "1;1;0", "reply_authors": "1;2;1", "rating_avg": [ 7.0, 0.816496580927726 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.3333333333333335, 0.9428090415820634 ], "wc_summary_avg": [ 146.0, 73.6115932898254 ], "wc_strengths_avg": [ 48.666666666666664, 20.531818125912658 ], "wc_weaknesses_avg": [ 127.66666666666667, 145.0386921556528 ], "wc_questions_avg": [ 56.333333333333336, 71.98302268976731 ], "wc_limitations_avg": [ 29.333333333333332, 38.663792996664064 ], "wc_review_avg": [ 408.0, 221.29316904655386 ], "wc_reply_reviewers_avg": [ 32.333333333333336, 30.379086373505196 ], "wc_reply_authors_avg": [ 196.33333333333334, 277.65726274591765 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:dzmD2t53OqsJ:scholar.google.com/&scioq=Zipper:+Addressing+Degeneracy+in+Algorithm-Agnostic+Inference&hl=en&as_sdt=0,47", "gs_version_total": 4, "email": "nankai.edu.cn;nankai.edu.cn;ecnu.edu.cn;nankai.edu.cn", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Nankai University;East China Normal University", "aff_unique_dep": ";", "aff_unique_url": "http://www.nankai.edu.cn;http://www.ecnu.edu.cn", "aff_unique_abbr": "NKU;ECNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "aiGN4UnNM7", "title": "TSTTC: A Large-Scale Dataset for Time-to-Contact Estimation in Driving Scenarios", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "Time-to-Contact (TTC) estimation is a critical task for assessing collision risk and is widely used in various driver assistance and autonomous driving systems. The past few decades have witnessed development of related theories and algorithms. The prevalent learning-based methods call for a large-scale TTC dataset in real-world scenarios. In this work, we present a large-scale object oriented TTC dataset in the driving scene for promoting the TTC estimation by a monocular camera. To collect valuable samples and make data with different TTC values relatively balanced, we go through thousands of hours of driving data and select over 200K sequences with a preset data distribution. To augment the quantity of small TTC cases, we also generate clips using the latest Neural rendering methods. Additionally, we provide several simple yet effective TTC estimation baselines and evaluate them extensively on the proposed dataset to demonstrate their effectiveness. The proposed dataset and code are publicly available at \\href{https://pan.baidu.com/s/14oHBEDytYZh6C3ah38a8zw?pwd=rdh3#list/path=%2F&parentPath=%2Fsharelink1829140692-712578186815897}{dataset link} and \\href{https://github.com/tusen-ai/TSTTC}{code link}.", "keywords": "Time-to-Contact Estimation;Dataset", "primary_area": "", "supplementary_material": "/attachment/16a64f3409c18a621a5bb662a1201fab7b8911ea.zip", "author": "Yuheng Shi;Zehao Huang;Yan Yan;Naiyan Wang;Xiaojie Guo", "authorids": "~Yuheng_Shi1;~Zehao_Huang1;~Yan_Yan15;~Naiyan_Wang1;~Xiaojie_Guo2", "gender": "M;M;M;M;M", "homepage": "https://github.com/YuHengsss;;https://github.com/traveller59;http://winsty.net;https://sites.google.com/view/xjguo", "dblp": "327/3178;197/1644;;31/9922;43/8066-1", "google_scholar": "sn2PeEkAAAAJ;zXqeKPgAAAAJ;;yAWtq6QAAAAJ;RL7jPuQAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Yuheng_Shi1;~Zehao_Huang1;~Yan_Yan15;~Naiyan_Wang1;~Xiaojie_Guo2", "aff": "Tianjin University;TuSimple;;Tusimple;Tianjin University", "aff_domain": "tju.edu.cn;tusimple.ai;;tusimple.ai;tju.edu.cn", "position": "MS student;Researcher;;Chief Scientist;Full Professor", "bibtex": "@misc{\nanonymous2024tsttc,\ntitle={{TSTTC}: A Large-Scale Dataset for Time-to-Contact Estimation in Driving Scenarios},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=aiGN4UnNM7}\n}", "github": "", "project": "", "reviewers": "CESF;Su94;2cBV", "site": "https://openreview.net/forum?id=aiGN4UnNM7", "pdf_size": 1872043, "rating": "6;6;7", "confidence": "3;3;4", "wc_summary_and_contributions": "135;75;88", "wc_strengths": "70;6;35", "wc_improvement": "76;52;124", "wc_limitations": "284;1;70", "wc_correctness": "25;1;8", "wc_clarity": "8;1;6", "wc_relation_to_prior_work": "2;1;10", "wc_documentation": "19;1;1", "wc_additional_feedback": "1;1;1", "wc_review": "620;139;343", "wc_reply_reviewers": "27;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;0;0", "reply_authors": "1;1;1", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 99.33333333333333, 25.77250904010361 ], "wc_strengths_avg": [ 37.0, 26.166135875720485 ], "wc_improvement_avg": [ 84.0, 29.93325909419153 ], "wc_limitations_avg": [ 118.33333333333333, 120.48328606998659 ], "wc_correctness_avg": [ 11.333333333333334, 10.077477638553981 ], "wc_clarity_avg": [ 5.0, 2.943920288775949 ], "wc_relation_to_prior_work_avg": [ 4.333333333333333, 4.0276819911981905 ], "wc_documentation_avg": [ 7.0, 8.48528137423857 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 367.3333333333333, 197.1198169191069 ], "wc_reply_reviewers_avg": [ 9.0, 12.727922061357855 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.9999999999999997, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16532265455436162526&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Tianjin University;TuSimple", "aff_unique_dep": ";", "aff_unique_url": "http://www.tju.edu.cn;https://www.tusimple.com", "aff_unique_abbr": "TJU;TuSimple", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "China;United States" }, { "title": "GTA: A Benchmark for General Tool Agents", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97620", "id": "akEt8QAa6V", "proceeding": "", "pdf": "https://openreview.net/pdf?id=akEt8QAa6V", "openreview": "https://openreview.net/forum?id=akEt8QAa6V", "poster": "/media/PosterPDFs/NeurIPS%202024/97620.png?t=1731246420.9457862", "project": "", "author_site": "Jize Wang, Ma Zerun, Yining Li, Songyang Zhang, Cailian Chen, Kai Chen, Xinyi Le", "tldr": "", "abstract": "In developing general-purpose agents, significant focus has been placed on integrating large language models (LLMs) with various tools. This poses a challenge to the tool-use capabilities of LLMs. However, there are evident gaps between existing tool evaluations and real-world scenarios. Current evaluations often use AI-generated queries, single-step tasks, dummy tools, and text-only inputs, which fail to reveal the agents' real-world problem-solving abilities effectively. To address this, we propose GTA, a benchmark for **G**eneral **T**ool **A**gents, featuring three main aspects: (i) *Real user queries*: human-written queries with simple real-world objectives but implicit tool-use, requiring the LLM to reason the suitable tools and plan the solution steps. (ii) *Real deployed tools*: an evaluation platform equipped with tools across perception, operation, logic, and creativity categories to evaluate the agents' actual task execution performance. (iii) *Real multimodal inputs*: authentic image files, such as spatial scenes, web page screenshots, tables, code snippets, and printed/handwritten materials, used as the query contexts to align with real-world scenarios closely. We designed 229 real-world tasks and executable tool chains to evaluate mainstream LLMs. Our findings show that real-world user queries are challenging for existing LLMs, with GPT-4 completing less than 50\\% of the tasks and most LLMs achieving below 25\\%. This evaluation reveals the bottlenecks in the tool-use capabilities of current LLMs in real-world scenarios, which is beneficial for the advancement of general-purpose tool agents. Dataset and code are available at https://github.com/open-compass/GTA.", "keywords": "tool-use;LLM-based agent;benchmark", "primary_area": "", "supplementary_material": "/attachment/6bd74ca49ca724f9e781636f79ebf257d85150a0.pdf", "author": "Jize Wang;Ma Zerun;Yining Li;Songyang Zhang;Cailian Chen;Kai Chen;Xinyi Le", "authorids": "~Jize_Wang1;~Ma_Zerun1;~Yining_Li1;~Songyang_Zhang1;~Cailian_Chen1;~Kai_Chen4;~Xinyi_Le1", "gender": "F;M;M;M;F;M;F", "homepage": "https://jize-w.github.io/;https://github.com/mzr1996;https://liyn.site;https://www.zhangsongyang.com/;https://iwin.sjtu.edu.cn/;https://chenkai.site/;", "dblp": "350/4823;;166/3420;;;181/2839-26;", "google_scholar": "T95fIJYAAAAJ;;https://scholar.google.com.hk/citations?user=y_cp1sUAAAAJ;8XQPi7YAAAAJ;;https://scholar.google.com.hk/citations?user=eGD0b7IAAAAJ;MGZyMf4AAAAJ", "orcid": ";;;;;0000-0002-6820-2325;", "linkedin": ";;;;;;", "or_profile": "~Jize_Wang1;~Ma_Zerun1;~Yining_Li1;~Songyang_Zhang1;~Cailian_Chen1;~Kai_Chen4;~Xinyi_Le1", "aff": "Shanghai Jiaotong University;Shanghai Artificial Intelligence Laboratory;Shanghai AI Laboratory;Shanghai Artificial Intelligence Laboratory;Shanghai Jiaotong University;Shanghai AI Laboratory;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;pjlab.org.cn;pjlab.org.cn;pjlab.org.cn;sjtu.edu.cn;pjlab.org.cn;sjtu.edu", "position": "MS student;Researcher;Researcher;Postdoc;Full Professor;Researcher;Associate Professor", "bibtex": "@inproceedings{\nwang2024gta,\ntitle={{GTA}: A Benchmark for General Tool Agents},\nauthor={Jize Wang and Ma Zerun and Yining Li and Songyang Zhang and Cailian Chen and Kai Chen and Xinyi Le},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=akEt8QAa6V}\n}", "github": "", "reviewers": "Guc9;vpCb;kUMt;hvCE", "pdf_size": 25703418, "rating": "5;6;7;7", "confidence": "3;3;4;3", "wc_summary_and_contributions": "104;76;50;97", "wc_strengths": "61;70;132;74", "wc_improvement": "5;60;5;87", "wc_limitations": "1;50;1;6", "wc_correctness": "2;37;1;8", "wc_clarity": "3;29;1;4", "wc_relation_to_prior_work": "1;38;1;2", "wc_documentation": "1;43;1;6", "wc_additional_feedback": "1;1;1;1", "wc_review": "179;404;193;285", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 81.75, 21.0282548015759 ], "wc_strengths_avg": [ 84.25, 27.9676152004421 ], "wc_improvement_avg": [ 39.25, 35.55541449624796 ], "wc_limitations_avg": [ 14.5, 20.59732992404598 ], "wc_correctness_avg": [ 12.0, 14.679918255903198 ], "wc_clarity_avg": [ 9.25, 11.453711188955307 ], "wc_relation_to_prior_work_avg": [ 10.5, 15.88238017426859 ], "wc_documentation_avg": [ 12.75, 17.583728273605686 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 265.25, 89.86204704990867 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15232842697244793748&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "sjtu.edu.cn;pjlab.org.cn;pjlab.org.cn;pjlab.org.cn;sjtu.edu.cn;pjlab.org.cn;sjtu.edu", "author_num": 7, "aff_unique_index": "0;1;2;1;0;2;0", "aff_unique_norm": "Shanghai Jiao Tong University;Shanghai Artificial Intelligence Laboratory;Shanghai AI Laboratory", "aff_unique_dep": ";;", "aff_unique_url": "https://www.sjtu.edu.cn;http://www.shailab.org/;https://www.shanghai-ai-lab.com", "aff_unique_abbr": "SJTU;Shanghai AI Lab;SAIL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Markovian Flow Matching: Accelerating MCMC with Continuous Normalizing Flows", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94533", "id": "amJyuVqSaf", "proceeding": "", "pdf": "https://openreview.net/pdf?id=amJyuVqSaf", "openreview": "https://openreview.net/forum?id=amJyuVqSaf", "poster": "/media/PosterPDFs/NeurIPS%202024/94533.png?t=1730118283.4383972", "project": "", "author_site": "Alberto Cabezas, Louis Sharrock, Christopher Nemeth", "tldr": "", "abstract": "Continuous normalizing flows (CNFs) learn the probability path between a reference distribution and a target distribution by modeling the vector field generating said path using neural networks. Recently, Lipman et al. (2022) introduced a simple and inexpensive method for training CNFs in generative modeling, termed flow matching (FM). In this paper, we repurpose this method for probabilistic inference by incorporating Markovian sampling methods in evaluating the FM objective, and using the learned CNF to improve Monte Carlo sampling. Specifically, we propose an adaptive Markov chain Monte Carlo (MCMC) algorithm, which combines a local Markov transition kernel with a non-local, flow-informed transition kernel, defined using a CNF. This CNF is adapted on-the-fly using samples from the Markov chain, which are used to specify the probability path for the FM objective. Our method also includes an adaptive tempering mechanism that allows the discovery of multiple modes in the target distribution. Under mild assumptions, we establish convergence of our method to a local optimum of the FM objective. We then benchmark our approach on several synthetic and real-world examples, achieving similar performance to other state-of-the-art methods but often at a significantly lower computational cost.", "keywords": "Sampling algorithms;Bayesian inference;MCMC;flow matching;continuous normalizing flows", "primary_area": "probabilistic_methods", "supplementary_material": "/attachment/728f59729e7022c663fcfb2aea9f8ebe27db8ee3.zip", "author": "Alberto Cabezas;Louis Sharrock;Christopher Nemeth", "authorids": "~Alberto_Cabezas1;~Louis_Sharrock1;~Christopher_Nemeth1", "gender": "M;M;M", "homepage": "https://www.lancaster.ac.uk/sci-tech/about-us/people/alberto-cabezas-gonzalez;https://louissharrock.github.io/;http://www.lancs.ac.uk/~nemeth/", "dblp": ";304/5319;88/10513", "google_scholar": ";O0xSdYcAAAAJ;https://scholar.google.co.uk/citations?user=17-Ze24AAAAJ", "orcid": ";0000-0003-1691-1215;0000-0002-9084-3866", "linkedin": ";louissharrock/;christopher-nemeth-815963233/", "or_profile": "~Alberto_Cabezas1;~Louis_Sharrock1;~Christopher_Nemeth1", "aff": "Lancaster University;Lancaster University;Lancaster University", "aff_domain": "lancaster.ac.uk;lancaster.ac.uk;lancaster.ac.uk", "position": "PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\ncabezas2024markovian,\ntitle={Markovian Flow Matching: Accelerating {MCMC} with Continuous Normalizing Flows},\nauthor={Alberto Cabezas and Louis Sharrock and Christopher Nemeth},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=amJyuVqSaf}\n}", "github": "", "reviewers": "HJhZ;Sj6c;eFeW;YCu1", "pdf_size": 3326666, "rating": "5;6;7;7", "confidence": "4;3;4;3", "soundness": "3;3;4;3", "novelty": "2;2;3;3", "presentation": "3;3;4;4", "wc_summary": "105;82;82;99", "wc_strengths": "134;54;54;76", "wc_weaknesses": "424;82;323;63", "wc_questions": "107;385;185;282", "wc_limitations": "1;4;2;18", "wc_review": "771;607;646;538", "wc_reply_reviewers": "5;27;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 92.0, 10.222524150130436 ], "wc_strengths_avg": [ 79.5, 32.72231654391235 ], "wc_weaknesses_avg": [ 223.0, 154.82409373220952 ], "wc_questions_avg": [ 239.75, 104.28656433117355 ], "wc_limitations_avg": [ 6.25, 6.869315832017043 ], "wc_review_avg": [ 640.5, 84.68913743804455 ], "wc_reply_reviewers_avg": [ 8.0, 11.157956802210698 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13393349053116685465&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "lancaster.ac.uk;lancaster.ac.uk;lancaster.ac.uk", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Lancaster University", "aff_unique_dep": "", "aff_unique_url": "https://www.lancaster.ac.uk", "aff_unique_abbr": "Lancaster", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "anxYEohntP", "title": "Exploring Prosocial Irrationality for LLM Agents: A Social Cognition View", "track": "main", "status": "Reject", "tldr": "", "abstract": "Large language models (LLMs) have been shown to face hallucination issues due to the data they trained on often containing human bias; whether this is reflected in the decision-making process of LLM agents remains under-explored. As LLM Agents are increasingly employed in intricate social environments, a pressing and natural question emerges: Can LLM Agents leverage hallucinations to mirror human cognitive biases, thus exhibiting irrational social intelligence? In this paper, we probe the irrational behavior among contemporary LLM agents by melding practical social science experiments with theoretical insights. Specifically, We propose CogMir, an open-ended Multi-LLM Agents framework that utilizes hallucination properties to assess and enhance LLM Agents' social intelligence through cognitive biases. Experimental results on CogMir subsets show that LLM Agents and humans exhibit high consistency in irrational and prosocial decision-making under uncertain conditions, underscoring the prosociality of LLM Agents as social entities, and highlighting the significance of hallucination properties. Additionally, CogMir framework demonstrates its potential as a valuable platform for encouraging more research into the social intelligence of LLM Agents.", "keywords": "Multi-Large Model Agents\uff0cSocial Intelligence\uff0cFramework\uff0cInterpretability", "primary_area": "machine_learning_for_social_sciences", "supplementary_material": "/attachment/c0a675aa1bef6dbd16a3ee2b5f408d1e78889e10.zip", "author": "Xuan Liu;Song Guo;Jie ZHANG;HaoYang Shang;Yang Chengxu;Quanyan Zhu", "authorids": "~Xuan_Liu8;~Song_Guo5;~Jie_ZHANG18;~HaoYang_Shang1;~Yang_Chengxu1;~Quanyan_Zhu1", "gender": "Not Specified;M;F;M;M;M", "homepage": ";https://cse.hkust.edu.hk/~songguo/;https://cugzj.github.io/zhangjie.github.io/;;;https://engineering.nyu.edu/faculty/quanyan-zhu", "dblp": ";01/267-1;84/6889-76;;236/3112;", "google_scholar": "AyzRO-wAAAAJ;https://scholar.google.com/citations?hl=en;JRCNlI8AAAAJ;;;Qt2WDh0AAAAJ", "orcid": "0000-0002-6194-4588;;0000-0002-8073-2118;;;", "linkedin": "xuan-liu-48b038258/;;;haoyang-shang-8196612b4/;;", "or_profile": "~Xuan_Liu8;~Song_Guo5;~Jie_ZHANG18;~HaoYang_Shang1;~Yang_Chengxu1;~Quanyan_Zhu1", "aff": "Hong Kong Polytechnic University;Department of Computer Science and Engineering, Hong Kong University of Science and Technology;The Hong Kong Polytechnic University;Shanghai Jiaotong University;Wuhan University of Technology;New York University", "aff_domain": "polyu.edu.hk;cse.ust.hk;polyu.edu.hk;sjtu.edu.cn;whut.edu.cn;nyu.edu", "position": "Undergrad student;Full Professor;Postdoc;Undergrad student;Undergrad student;Associate Professor", "bibtex": "@misc{\nanonymous2024exploring,\ntitle={Exploring Prosocial Irrationality for {LLM} Agents: A Social Cognition View},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=anxYEohntP}\n}", "github": "", "project": "", "reviewers": "inag;wRbg;HBKd;1Z3B", "site": "https://openreview.net/forum?id=anxYEohntP", "pdf_size": 889485, "rating": "5;5;6;6", "confidence": "4;4;4;4", "soundness": "2;2;4;3", "novelty": "2;3;3;3", "presentation": "3;2;3;3", "wc_summary": "63;61;85;45", "wc_strengths": "71;62;73;70", "wc_weaknesses": "46;293;69;52", "wc_questions": "3;126;71;1", "wc_limitations": "12;23;28;2", "wc_review": "195;565;326;170", "wc_reply_reviewers": "10;43;49;0", "wc_reply_authors": "96;134;133;0", "reply_reviewers": "1;1;1;0", "reply_authors": "3;3;3;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 63.5, 14.239030865898142 ], "wc_strengths_avg": [ 69.0, 4.183300132670378 ], "wc_weaknesses_avg": [ 115.0, 103.11401456640121 ], "wc_questions_avg": [ 50.25, 52.02583492842763 ], "wc_limitations_avg": [ 16.25, 10.059199769365355 ], "wc_review_avg": [ 314.0, 156.55829585173697 ], "wc_reply_reviewers_avg": [ 25.5, 20.910523666326483 ], "wc_reply_authors_avg": [ 90.75, 54.58651390224512 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7966036069601642124&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;0;2;3;4", "aff_unique_norm": "Hong Kong Polytechnic University;Hong Kong University of Science and Technology;Shanghai Jiao Tong University;Wuhan University of Technology;New York University", "aff_unique_dep": ";Department of Computer Science and Engineering;;;", "aff_unique_url": "https://www.polyu.edu.hk;https://www.ust.hk;https://www.sjtu.edu.cn;http://www.wut.edu.cn;https://www.nyu.edu", "aff_unique_abbr": "PolyU;HKUST;SJTU;WUT;NYU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0;1", "aff_country_unique": "China;United States" }, { "title": "Offline Reinforcement Learning with OOD State Correction and OOD Action Suppression", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94532", "id": "anyZgGLQ6n", "proceeding": "", "pdf": "https://openreview.net/pdf?id=anyZgGLQ6n", "openreview": "https://openreview.net/forum?id=anyZgGLQ6n", "poster": "/media/PosterPDFs/NeurIPS%202024/94532.png?t=1732870130.4558697", "project": "", "author_site": "Yixiu Mao, Qi Wang, Chen Chen, Yun Qu, Xiangyang Ji", "tldr": "", "abstract": "In offline reinforcement learning (RL), addressing the out-of-distribution (OOD) action issue has been a focus, but we argue that there exists an OOD state issue that also impairs performance yet has been underexplored. Such an issue describes the scenario when the agent encounters states out of the offline dataset during the test phase, leading to uncontrolled behavior and performance degradation. To this end, we propose SCAS, a simple yet effective approach that unifies OOD state correction and OOD action suppression in offline RL. Technically, SCAS achieves value-aware OOD state correction, capable of correcting the agent from OOD states to high-value in-distribution states. Theoretical and empirical results show that SCAS also exhibits the effect of suppressing OOD actions. On standard offline RL benchmarks, SCAS achieves excellent performance without additional hyperparameter tuning. Moreover, benefiting from its OOD state correction feature, SCAS demonstrates enhanced robustness against environmental perturbations.", "keywords": "offline reinforcement learning", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Yixiu Mao;Cheems Wang;Chen Chen;Yun Qu;Xiangyang Ji", "authorids": "~Yixiu_Mao2;~Cheems_Wang1;~Chen_Chen3;~Yun_Qu2;~Xiangyang_Ji1", "gender": "M;;F;M;", "homepage": ";;;https://github.com/cloud-qu;", "dblp": "280/1045;;;80/10774-2;", "google_scholar": ";;l8_g4oAAAAAJ;l9Ky9goAAAAJ;", "orcid": "0009-0000-7302-5039;;;0009-0000-1803-8435;", "linkedin": ";;;;", "or_profile": "~Yixiu_Mao2;~Cheems_Wang1;~Chen_Chen3;~Yun_Qu2;~Xiangyang_Ji1", "aff": "Tsinghua University;;Qiyuan Lab;Tsinghua University;", "aff_domain": "mails.tsinghua.edu.cn;;qiyuanlab.com;tsinghua.edu.cn;", "position": "PhD student;;Researcher;PhD student;", "bibtex": "@inproceedings{\nmao2024offline,\ntitle={Offline Reinforcement Learning with {OOD} State Correction and {OOD} Action Suppression},\nauthor={Yixiu Mao and Cheems Wang and Chen Chen and Yun Qu and Xiangyang Ji},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=anyZgGLQ6n}\n}", "github": "", "reviewers": "fu1b;VyLC;VgaK;j7HR", "pdf_size": 2860140, "rating": "6;6;6;6", "confidence": "3;4;3;4", "soundness": "4;3;2;3", "novelty": "3;2;3;3", "presentation": "2;2;3;3", "wc_summary": "122;134;70;34", "wc_strengths": "37;191;59;61", "wc_weaknesses": "354;352;91;24", "wc_questions": "55;84;45;239", "wc_limitations": "13;13;3;31", "wc_review": "581;774;268;389", "wc_reply_reviewers": "17;74;52;34", "wc_reply_authors": "289;478;39;336", "reply_reviewers": "1;1;1;1", "reply_authors": "3;3;2;3", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 90.0, 40.29888335921977 ], "wc_strengths_avg": [ 87.0, 60.778285596090974 ], "wc_weaknesses_avg": [ 205.25, 149.63852278073318 ], "wc_questions_avg": [ 105.75, 78.2539935083188 ], "wc_limitations_avg": [ 15.0, 10.099504938362077 ], "wc_review_avg": [ 503.0, 192.18870934578857 ], "wc_reply_reviewers_avg": [ 44.25, 21.1704392963396 ], "wc_reply_authors_avg": [ 285.5, 158.41480360117865 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10600477873178673149&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 5, "email": "mails.tsinghua.edu.cn;;qiyuanlab.com;tsinghua.edu.cn;", "author_num": 5, "aff_unique_index": "0;1;0", "aff_unique_norm": "Tsinghua University;Qiyuan Lab", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;", "aff_unique_abbr": "THU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China;" }, { "title": "Differentially Private Graph Diffusion with Applications in Personalized PageRanks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94531", "id": "aon7bwYBiq", "proceeding": "", "pdf": "https://openreview.net/pdf?id=aon7bwYBiq", "openreview": "https://openreview.net/forum?id=aon7bwYBiq", "poster": "/media/PosterPDFs/NeurIPS%202024/94531.png?t=1731705514.799495", "project": "", "author_site": "Rongzhe Wei, Eli Chien, Pan Li", "tldr": "", "abstract": "Graph diffusion, which iteratively propagates real-valued substances among the graph, is used in numerous graph/network-involved applications. However, releasing diffusion vectors may reveal sensitive linking information in the data such as transaction information in financial network data. However, protecting the privacy of graph data is challenging due to its interconnected nature.\n This work proposes a novel graph diffusion framework with edge-level different privacy guarantees by using noisy diffusion iterates.\n The algorithm injects Laplace noise per diffusion iteration and adopts a degree-based thresholding function to mitigate the high sensitivity induced by low-degree nodes. Our privacy loss analysis is based on Privacy Amplification by Iteration (PABI), which to our best knowledge, is the first effort that analyzes PABI with Laplace noise and provides relevant applications.\n We also introduce a novel $\\infty$-Wasserstein distance tracking method, which tightens the analysis of privacy leakage and makes PABI more applicable in practice. \n We evaluate this framework by applying it to Personalized Pagerank computation for ranking tasks. Experiments on real-world network data demonstrate the superiority of our method under stringent privacy conditions.", "keywords": "differential privacy;graph diffusion;personalize pagerank", "primary_area": "other", "supplementary_material": "/attachment/76fcbcff463744f80e74f5f16a64ea6427d7e229.zip", "author": "Rongzhe Wei;Eli Chien;Pan Li", "authorids": "~Rongzhe_Wei1;~Eli_Chien1;~Pan_Li2", "gender": "M;;M", "homepage": "https://jesson-wei.github.io/Rongzhe-Wei.github.io/;;https://sites.google.com/view/eli-chien/home", "dblp": "259/6894;https://dblp.org/pers/hd/l/Li_0005:Pan;222/3243", "google_scholar": "di8ubMoAAAAJ;IroP0EwAAAAJ;N3BuEnYAAAAJ", "orcid": ";;", "linkedin": ";pan-li-b951105a/;", "or_profile": "~Rongzhe_Wei1;~Pan_Li2;~I_Chien2", "aff": "Georgia Institute of Technology;Purdue University;Georgia Institute of Technology", "aff_domain": "gatech.edu;purdue.edu;gatech.edu", "position": "PhD student;Assistant Professor;Postdoc", "bibtex": "@inproceedings{\nwei2024differentially,\ntitle={Differentially Private Graph Diffusion with Applications in Personalized PageRanks},\nauthor={Rongzhe Wei and Eli Chien and Pan Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=aon7bwYBiq}\n}", "github": "", "reviewers": "ApdU;dwyJ;sGeR;y6QA", "pdf_size": 1198129, "rating": "4;5;5;8", "confidence": "3;3;2;4", "soundness": "3;3;4;4", "novelty": "1;3;3;3", "presentation": "3;3;2;4", "wc_summary": "256;78;55;138", "wc_strengths": "48;140;41;35", "wc_weaknesses": "112;88;142;11", "wc_questions": "155;81;54;2", "wc_limitations": "3;2;86;1", "wc_review": "574;389;378;187", "wc_reply_reviewers": "0;0;0;4", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 131.75, 77.8728932299295 ], "wc_strengths_avg": [ 66.0, 42.970920399730794 ], "wc_weaknesses_avg": [ 88.25, 48.530274056510336 ], "wc_questions_avg": [ 73.0, 55.204166509422095 ], "wc_limitations_avg": [ 23.0, 36.37993952716249 ], "wc_review_avg": [ 382.0, 136.8886408727912 ], "wc_reply_reviewers_avg": [ 1.0, 1.7320508075688772 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.7071067811865476, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11595153139917132915&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "gatech.edu;purdue.edu;gatech.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Georgia Institute of Technology;Purdue University", "aff_unique_dep": ";", "aff_unique_url": "https://www.gatech.edu;https://www.purdue.edu", "aff_unique_abbr": "Georgia Tech;Purdue", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "TabPedia: Towards Comprehensive Visual Table Understanding with Concept Synergy", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94530", "id": "aou5yrBqKy", "proceeding": "", "pdf": "https://openreview.net/pdf?id=aou5yrBqKy", "openreview": "https://openreview.net/forum?id=aou5yrBqKy", "poster": "/media/PosterPDFs/NeurIPS%202024/94530.png?t=1731158177.4209247", "project": "", "author_site": "Weichao Zhao, Hao Feng, Qi Liu, Jingqun Tang, Binghong Wu, Lei Liao, Shu Wei, Yongjie Ye, Hao Liu, Wengang Zhou, Houqiang Li, Can Huang", "tldr": "", "abstract": "Tables contain factual and quantitative data accompanied by various structures and contents that pose challenges for machine comprehension. Previous methods generally design task-specific architectures and objectives for individual tasks, resulting in modal isolation and intricate workflows. In this paper, we present a novel large vision-language model, TabPedia, equipped with a concept synergy mechanism. In this mechanism, all the involved diverse visual table understanding (VTU) tasks and multi-source visual embeddings are abstracted as concepts. This unified framework allows TabPedia to seamlessly integrate VTU tasks, such as table detection, table structure recognition, table querying, and table question answering, by leveraging the capabilities of large language models (LLMs). Moreover, the concept synergy mechanism enables table perception-related and comprehension-related tasks to work in harmony, as they can effectively leverage the needed clues from the corresponding source perception embeddings. Furthermore, to better evaluate the VTU task in real-world scenarios, we establish a new and comprehensive table VQA benchmark, ComTQA, featuring approximately 9,000 QA pairs. Extensive quantitative and qualitative experiments on both table perception and comprehension tasks, conducted across various public benchmarks, validate the effectiveness of our TabPedia. The superior performance further confirms the feasibility of using LLMs for understanding visual tables when all concepts work in synergy. The benchmark ComTQA has been open-sourced at https://huggingface.co/datasets/ByteDance/ComTQA. The source code and model also have been released at https://github.com/zhaowc-ustc/TabPedia.", "keywords": "Multimodal Large Language Model;Visual Table Understanding;Multimodal Learning", "primary_area": "machine_vision", "supplementary_material": "", "author": "Weichao Zhao;Hao Feng;Qi Liu;Jingqun Tang;Binghong Wu;Lei Liao;Shu Wei;Yongjie Ye;Hao Liu;Wengang Zhou;Houqiang Li;Can Huang", "authorids": "~Weichao_Zhao1;~Hao_Feng4;~Qi_Liu25;~Jingqun_Tang1;~Binghong_Wu1;~Lei_Liao3;~Shu_Wei2;~Yongjie_Ye1;~Hao_Liu15;~Wengang_Zhou1;~Houqiang_Li1;~Can_Huang1", "gender": "M;M;M;;M;F;F;M;M;M;M;M", "homepage": ";https://fh2019ustc.github.io/;;;;;https://github.com/weishu27;;http://staff.ustc.edu.cn/~zhwg/index.html;https://staff.ustc.edu.cn/~lihq/;;https://github.com/sty-yyj/sty-yyj.github.io", "dblp": "171/9872;46/4184-9;;317/5539;236/1328;;;09/3214-3;22/4544-1;59/7017.html;;", "google_scholar": "v-ASmMIAAAAJ;aB8DspEAAAAJ;5k41vLIAAAAJ;OxQXSioAAAAJ;66ry4nMAAAAJ;;;wFOk3PQAAAAJ;8s1JF8YAAAAJ;7sFMIKoAAAAJ;;", "orcid": ";0000-0001-8127-6639;0000-0001-6181-8824;;;0000-0002-4751-504X;;;0000-0003-1690-9836;0000-0003-2188-3028;0009-0006-9126-3069;", "linkedin": ";;;;%E7%A7%89%E6%B3%93-%E6%AD%A6-211300134/;;;;;;https://www.linkedin.cn/incareer/in/can-huang-3175b256;", "or_profile": "~Weichao_Zhao1;~Hao_Feng4;~Qi_Liu25;~Jingqun_Tang1;~Binghong_Wu1;~Lei_Liao3;~Shu_Wei2;~Hao_Liu15;~Wengang_Zhou1;~Houqiang_Li1;~Can_Huang1;~Ye_yongjie1", "aff": "ByteDance Inc.;University of Science and Technology of China;Bytedance Inc.;Bytedance;Bytedance;ByteDance Inc.;ByteDance Inc.;Bytedance;University of Science and Technology of China;University of Science and Technology of China;Bytedance;ByteDance Inc.", "aff_domain": "bytedance.com;ustc.edu;bytedance.com;bytedance.com;bytedance.com;bytedance.com;bytedance.com;bytedance.com;ustc.edu.cn;ustc.edu.cn;bytedance.com;bytedance.com", "position": "Intern;PhD student;Researcher;Researcher;Researcher;Researcher;Researcher;Researcher;Full Professor;Professor;Researcher;Researcher", "bibtex": "@inproceedings{\nzhao2024tabpedia,\ntitle={TabPedia: Towards Comprehensive Visual Table Understanding with Concept Synergy},\nauthor={Weichao Zhao and Hao Feng and Qi Liu and Jingqun Tang and Binghong Wu and Lei Liao and Shu Wei and Yongjie Ye and Hao Liu and Wengang Zhou and Houqiang Li and Can Huang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=aou5yrBqKy}\n}", "github": "", "reviewers": "Xi9r;fN3K;bkeN", "pdf_size": 13576506, "rating": "3;7;7", "confidence": "4;4;4", "soundness": "3;4;3", "novelty": "2;4;3", "presentation": "1;4;4", "wc_summary": "103;109;88", "wc_strengths": "26;176;109", "wc_weaknesses": "127;139;98", "wc_questions": "2;4;5", "wc_limitations": "1;59;5", "wc_review": "259;487;305", "wc_reply_reviewers": "215;34;319", "wc_reply_authors": "999;37;766", "reply_reviewers": "1;1;2", "reply_authors": "4;2;3", "rating_avg": [ 5.666666666666667, 1.8856180831641267 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.0, 1.4142135623730951 ], "wc_summary_avg": [ 100.0, 8.831760866327848 ], "wc_strengths_avg": [ 103.66666666666667, 61.35325763333372 ], "wc_weaknesses_avg": [ 121.33333333333333, 17.21110752456745 ], "wc_questions_avg": [ 3.6666666666666665, 1.247219128924647 ], "wc_limitations_avg": [ 21.666666666666668, 26.44911256650316 ], "wc_review_avg": [ 350.3333333333333, 98.44569851220294 ], "wc_reply_reviewers_avg": [ 189.33333333333334, 117.75775511145281 ], "wc_reply_authors_avg": [ 600.6666666666666, 409.7660579186888 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 3.0, 0.816496580927726 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3415071440895927888&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "bytedance.com;ustc.edu;bytedance.com;bytedance.com;bytedance.com;bytedance.com;bytedance.com;bytedance.com;ustc.edu.cn;ustc.edu.cn;bytedance.com;bytedance.com", "author_num": 12, "aff_unique_index": "0;1;2;0;0;0;0;0;1;1;0;0", "aff_unique_norm": "ByteDance;University of Science and Technology of China;Bytedance Inc.", "aff_unique_dep": ";;", "aff_unique_url": "https://www.bytedance.com;http://www.ustc.edu.cn;https://www.bytedance.com", "aff_unique_abbr": "ByteDance;USTC;Bytedance", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "ODRL: A Benchmark for Off-Dynamics Reinforcement Learning", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97619", "id": "ap4x1kArGy", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ap4x1kArGy", "openreview": "https://openreview.net/forum?id=ap4x1kArGy", "poster": "/media/PosterPDFs/NeurIPS%202024/97619.png?t=1732157976.8623059", "project": "", "author_site": "Jiafei Lyu, Kang Xu, Jiacheng Xu, yan, Jing-Wen Yang, Zongzhang Zhang, Chenjia Bai, Zongqing Lu, Xiu Li", "tldr": "", "abstract": "We consider off-dynamics reinforcement learning (RL) where one needs to transfer policies across different domains with dynamics mismatch. Despite the focus on developing dynamics-aware algorithms, this field is hindered due to the lack of a standard benchmark. To bridge this gap, we introduce ODRL, the first benchmark tailored for evaluating off-dynamics RL methods. ODRL contains four experimental settings where the source and target domains can be either online or offline, and provides diverse tasks and a broad spectrum of dynamics shifts, making it a reliable platform to comprehensively evaluate the agent's adaptation ability to the target domain. Furthermore, ODRL includes recent off-dynamics RL algorithms in a unified framework and introduces some extra baselines for different settings, all implemented in a single-file manner. To unpack the true adaptation capability of existing methods, we conduct extensive benchmarking experiments, which show that no method has universal advantages across varied dynamics shifts. We hope this benchmark can serve as a cornerstone for future research endeavors. Our code is publicly available at https://github.com/OffDynamicsRL/off-dynamics-rl.", "keywords": "off-dynamics;reinforcement learning;dynamics mismatch", "primary_area": "", "supplementary_material": "", "author": "Jiafei Lyu;Kang Xu;Jiacheng Xu;Mengbei Yan;Jing-Wen Yang;Zongzhang Zhang;Chenjia Bai;Zongqing Lu;Xiu Li", "authorids": "~Jiafei_Lyu1;~Kang_Xu2;~Jiacheng_Xu3;~Mengbei_Yan1;~Jing-Wen_Yang3;~Zongzhang_Zhang1;~Chenjia_Bai2;~Zongqing_Lu2;~Xiu_Li1", "gender": "M;M;;;M;M;M;;F", "homepage": ";https://kangxu023.github.io/;http://www.lamda.nju.edu.cn/xujc/;;https://www.lamda.nju.edu.cn/yangjw/;http://www.lamda.nju.edu.cn/zhangzz;https://baichenjia.github.io/;;https://thusigsiclab.github.io/thu.github.io/introduction.html", "dblp": "278/1503;295/1622;188/6025-3;;204/2956.html;90/8724;247/1943;;13/1206-1", "google_scholar": "bfgCMr8AAAAJ;7FTLsHUAAAAJ;;;;sG7WEAgAAAAJ;Rm_1y2kAAAAJ;;https://scholar.google.com/citations?hl=zh-CN", "orcid": "0000-0001-6616-417X;0000-0001-6040-3002;;0009-0005-6853-7271;;;;;0000-0003-0403-1923", "linkedin": ";;;;;;;;", "or_profile": "~Jiafei_Lyu1;~Kang_Xu2;~Jiacheng_Xu3;~Mengbei_Yan1;~Jing-Wen_Yang3;~Zongzhang_Zhang1;~Chenjia_Bai2;~Zongqing_Lu2;~Xiu_Li1", "aff": "Tsinghua University;Fudan University;Nanjing University;Tsinghua University;Game AI Center;Nanjing University;Shanghai AI Laboratory;;Tsinghua University", "aff_domain": "tsinghua.edu.cn;fudan.edu.cn;nju.edu.cn;tsinghua.edu.cn;tencent.com;nju.edu.cn;pjlab.org.cn;;tsinghua.edu.cn", "position": "PhD student;MS student;MS student;MS student;Principal Researcher;Associate Professor;Researcher;;Professor", "bibtex": "@inproceedings{\nlyu2024odrl,\ntitle={{ODRL}: A Benchmark for Off-Dynamics Reinforcement Learning},\nauthor={Jiafei Lyu and Kang Xu and Jiacheng Xu and Mengbei Yan and Jing-Wen Yang and Zongzhang Zhang and Chenjia Bai and Zongqing Lu and Xiu Li},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=ap4x1kArGy}\n}", "github": "", "reviewers": "5MsE;XwbG;Gwro;ciZk", "pdf_size": 2129206, "rating": "6;6;7;8", "confidence": "4;3;4;5", "wc_summary_and_contributions": "130;113;120;96", "wc_strengths": "4;79;41;90", "wc_improvement": "4;109;62;85", "wc_limitations": "10;1;4;1", "wc_correctness": "4;1;7;1", "wc_clarity": "34;1;18;1", "wc_relation_to_prior_work": "82;1;9;1", "wc_documentation": "27;1;5;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "296;307;267;277", "wc_reply_reviewers": "308;0;26;0", "wc_reply_authors": "345;107;41;51", "reply_reviewers": "1;0;1;0", "reply_authors": "4;3;4;2", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 114.75, 12.397076268217438 ], "wc_strengths_avg": [ 53.5, 33.8710791088799 ], "wc_improvement_avg": [ 65.0, 38.942264957241505 ], "wc_limitations_avg": [ 4.0, 3.6742346141747673 ], "wc_correctness_avg": [ 3.25, 2.48746859276655 ], "wc_clarity_avg": [ 13.5, 13.720422734012244 ], "wc_relation_to_prior_work_avg": [ 23.25, 34.07620137280563 ], "wc_documentation_avg": [ 8.5, 10.805091392487155 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 286.75, 15.658464164789597 ], "wc_reply_reviewers_avg": [ 83.5, 130.04902921590764 ], "wc_reply_authors_avg": [ 136.0, 123.25988804148737 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 3.25, 0.82915619758885 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.8528028654224417, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5118498809917950474&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "tsinghua.edu.cn;fudan.edu.cn;nju.edu.cn;tsinghua.edu.cn;tencent.com;nju.edu.cn;pjlab.org.cn;;tsinghua.edu.cn", "author_num": 9, "aff_unique_index": "0;1;2;0;3;2;4;0", "aff_unique_norm": "Tsinghua University;Fudan University;Nanjing University;Game AI Center;Shanghai AI Laboratory", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.fudan.edu.cn;https://www.nju.edu.cn;;https://www.shanghai-ai-lab.com", "aff_unique_abbr": "THU;Fudan;Nanjing U;;SAIL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China;" }, { "id": "apI1GltwSx", "title": "DELT: A Simple Diversity-driven EarlyLate Training for Dataset Distillation", "track": "main", "status": "Reject", "tldr": "", "abstract": "Recent advances in dataset distillation have led to solutions in two main directions. The conventional batch-to-batch matching mechanism is ideal for small-scale datasets and includes bi-level optimization methods on models and syntheses, such as FRePo, RCIG, and RaT-BPTT, as well as other methods like distribution matching, gradient matching, and weight trajectory matching. Conversely, batch-to-global matching typifies decoupled methods, which are particularly advantageous for large-scale datasets. This approach has garnered substantial interest within the community, as seen in SRe$^2$L, G-VBSM, WMDD, and CDA. A primary challenge with the second approach is the lack of diversity among syntheses within each class since samples are optimized independently and the same global supervision signals are reused across different synthetic images. In this study, we propose a new EarlyLate training scheme to enhance the diversity of images in batch-to-global matching with less computation. Our approach is conceptually simple yet effective, it partitions predefined IPC samples into smaller subtasks and employs local optimizations to distill each subset into distributions from distinct phases, reducing the uniformity induced by the unified optimization process. These distilled images from the subtasks demonstrate effective generalization when applied to the entire task. We conducted extensive experiments on CIFAR, Tiny-ImageNet, ImageNet-1K, and its sub-datasets. Our empirical results demonstrate that the proposed approach significantly improves over previous state-of-the-art methods under various IPCs.", "keywords": "Dataset Distillation;Diversity-driven EarlyLate Training", "primary_area": "machine_vision", "supplementary_material": "/attachment/7940444ade79a369e6c5c9447226df9b735a1669.zip", "author": "Zhiqiang Shen;Ammar Sherif;Zeyuan Yin;Shitong Shao", "authorids": "~Zhiqiang_Shen1;~Ammar_Sherif1;~Zeyuan_Yin1;~Shitong_Shao1", "gender": ";;M;M", "homepage": ";;https://zeyuanyin.github.io/;https://shaoshitong.github.io/", "dblp": ";351/1123;302/4051-1.html;329/2735", "google_scholar": ";;QyV0vm8AAAAJ;hmUOaNcAAAAJ", "orcid": ";;;", "linkedin": ";ammar-sherif-871a16255/;;", "or_profile": "~Zhiqiang_Shen1;~Ammar_Sherif1;~Zeyuan_Yin1;~Shitong_Shao1", "aff": ";Nile University;Mohamed bin Zayed University of Artificial Intelligence;Southeast University", "aff_domain": ";nu.edu.eg;mbzuai.ac.ae;seu.edu.cn", "position": ";MS student;MS student;MS student", "bibtex": "@misc{\nanonymous2024delt,\ntitle={{DELT}: A Simple Diversity-driven EarlyLate Training for Dataset Distillation},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=apI1GltwSx}\n}", "github": "", "project": "", "reviewers": "d2h5;1mgP;cugo;1XsL", "site": "https://openreview.net/forum?id=apI1GltwSx", "pdf_size": 5736741, "rating": "4;5;5;5", "confidence": "4;4;4;4", "soundness": "3;3;3;2", "novelty": "2;2;2;2", "presentation": "3;3;2;1", "wc_summary": "31;51;66;102", "wc_strengths": "25;71;32;44", "wc_weaknesses": "83;160;97;192", "wc_questions": "249;4;2;28", "wc_limitations": "7;15;2;1", "wc_review": "395;301;199;367", "wc_reply_reviewers": "108;16;20;100", "wc_reply_authors": "256;0;33;235", "reply_reviewers": "1;1;1;2", "reply_authors": "2;1;2;3", "rating_avg": [ 4.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 62.5, 25.96632434519757 ], "wc_strengths_avg": [ 43.0, 17.53567791675018 ], "wc_weaknesses_avg": [ 133.0, 44.73812691653507 ], "wc_questions_avg": [ 70.75, 103.41995697156328 ], "wc_limitations_avg": [ 6.25, 5.539629951540085 ], "wc_review_avg": [ 315.5, 75.42380260899075 ], "wc_reply_reviewers_avg": [ 61.0, 43.116122274620196 ], "wc_reply_authors_avg": [ 131.0, 115.33212908812531 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17721079391411698318&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Nile University;Mohamed bin Zayed University of Artificial Intelligence;Southeast University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.nileu.edu.eg;https://mbzuai.ac.ae;https://www.seu.edu.cn/", "aff_unique_abbr": "NileU;MBZUAI;SEU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2", "aff_country_unique": "Egypt;United Arab Emirates;China" }, { "title": "Strategic Linear Contextual Bandits", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94529", "id": "apPHMfE63y", "proceeding": "", "pdf": "https://openreview.net/pdf?id=apPHMfE63y", "openreview": "https://openreview.net/forum?id=apPHMfE63y", "poster": "", "project": "", "author_site": "Thomas Kleine Buening, Aadirupa Saha, Christos Dimitrakakis, Haifeng Xu", "tldr": "", "abstract": "Motivated by the phenomenon of strategic agents gaming a recommender system to maximize the number of times they are recommended to users, we study a strategic variant of the linear contextual bandit problem, where the arms can strategically misreport privately observed contexts to the learner. We treat the algorithm design problem as one of *mechanism design* under uncertainty and propose the Optimistic Grim Trigger Mechanism (OptGTM) that incentivizes the agents (i.e., arms) to report their contexts truthfully while simultaneously minimizing regret. We also show that failing to account for the strategic nature of the agents results in linear regret. However, a trade-off between mechanism design and regret minimization appears to be unavoidable. More broadly, this work aims to provide insight into the intersection of online learning and mechanism design.", "keywords": "bandits;mechanism design;online learning;incentive-aware learning", "primary_area": "bandits", "supplementary_material": "", "author": "Thomas Kleine Buening;Aadirupa Saha;Christos Dimitrakakis;Haifeng Xu", "authorids": "~Thomas_Kleine_Buening1;~Aadirupa_Saha1;~Christos_Dimitrakakis1;~Haifeng_Xu1", "gender": "M;;M;M", "homepage": "https://thomasklbg.github.io/;http://aadirupa.github.io/;https://sites.google.com/site/christosdimitrakakis/;http://www.haifeng-xu.com/", "dblp": "286/5270;;17/2535;04/1895", "google_scholar": "1VT2sBgAAAAJ;https://scholar.google.co.in/citations?user=7a49tQYAAAAJ;9Kw4t_kAAAAJ;nLgg388AAAAJ", "orcid": ";0000-0003-4965-6417;0000-0002-5367-5189;", "linkedin": "thomas-kleine-b%C3%BCning-594a4414a/;aadirupa-saha;;", "or_profile": "~Thomas_Kleine_Buening1;~Aadirupa_Saha1;~Christos_Dimitrakakis1;~Haifeng_Xu1", "aff": "University of Oslo, Norway;Apple;Chalmers University;University of Chicago", "aff_domain": "uio.no;apple.com;chalmers.se;cs.uchicago.edu", "position": "PhD student;Researcher;Senior Researcher;Assistant Professor", "bibtex": "@inproceedings{\nbuening2024strategic,\ntitle={Strategic Linear Contextual Bandits},\nauthor={Thomas Kleine Buening and Aadirupa Saha and Christos Dimitrakakis and Haifeng Xu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=apPHMfE63y}\n}", "github": "", "reviewers": "7kzE;8u5z;QjK5;XxQv", "pdf_size": 1744980, "rating": "6;7;7;7", "confidence": "4;4;3;3", "soundness": "3;4;3;3", "novelty": "3;3;3;3", "presentation": "3;3;4;4", "wc_summary": "39;156;91;70", "wc_strengths": "19;45;85;44", "wc_weaknesses": "3;153;78;167", "wc_questions": "129;36;2;18", "wc_limitations": "1;9;7;1", "wc_review": "191;399;263;300", "wc_reply_reviewers": "0;0;19;33", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 89.0, 42.87773314903669 ], "wc_strengths_avg": [ 48.25, 23.636571240347024 ], "wc_weaknesses_avg": [ 100.25, 65.55675022451922 ], "wc_questions_avg": [ 46.25, 49.266494699744975 ], "wc_limitations_avg": [ 4.5, 3.570714214271425 ], "wc_review_avg": [ 288.25, 74.99791663773068 ], "wc_reply_reviewers_avg": [ 13.0, 13.910427743243556 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16592193063959081515&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 4, "email": "uio.no;apple.com;chalmers.se;cs.uchicago.edu", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "University of Oslo;Apple;Chalmers University of Technology;University of Chicago", "aff_unique_dep": ";Apple Inc.;;", "aff_unique_url": "https://www.uio.no;https://www.apple.com;https://www.chalmers.se;https://www.uchicago.edu", "aff_unique_abbr": "UiO;Apple;Chalmers;UChicago", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;1", "aff_country_unique": "Norway;United States;Sweden" }, { "title": "Foundations of Multivariate Distributional Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94528", "id": "aq3I5B6GLG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=aq3I5B6GLG", "openreview": "https://openreview.net/forum?id=aq3I5B6GLG", "poster": "/media/PosterPDFs/NeurIPS%202024/94528.png?t=1733757730.4516954", "project": "", "author_site": "Harley Wiltzer, Jesse Farebrother, Arthur Gretton, Mark Rowland", "tldr": "", "abstract": "In reinforcement learning (RL), the consideration of multivariate reward signals has led to fundamental advancements in multi-objective decision-making, transfer learning, and representation learning. This work introduces the first oracle-free and computationally-tractable algorithms for provably convergent multivariate *distributional* dynamic programming and temporal difference learning. Our convergence rates match the familiar rates in the scalar reward setting, and additionally provide new insights into the fidelity of approximate return distribution representations as a function of the reward dimension. Surprisingly, when the reward dimension is larger than $1$, we show that standard analysis of categorical TD learning fails, which we resolve with a novel projection onto the space of mass-$1$ signed measures. Finally, with the aid of our technical results and simulations, we identify tradeoffs between distribution representations that influence the performance of multivariate distributional RL in practice.", "keywords": "distributional reinforcement learning;rl theory;dynamic programming;temporal difference learning;successor features;successor representation", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/5a48d8c69deaf15f19ed5c41c6180fc4f9ee0852.zip", "author": "Harley Wiltzer;Jesse Farebrother;Arthur Gretton;Mark Rowland", "authorids": "~Harley_Wiltzer1;~Jesse_Farebrother1;~Arthur_Gretton1;~Mark_Rowland1", "gender": "M;M;M;M", "homepage": "https://harwiltz.github.io/about;https://brosa.ca;http://www.gatsby.ucl.ac.uk/~gretton/;http://sites.google.com/view/markrowland", "dblp": "321/0992;228/6862;56/2574;86/4090", "google_scholar": ";cA12XHcAAAAJ;OUv7J6QAAAAJ;https://scholar.google.co.uk/citations?user=-0U84zMAAAAJ", "orcid": ";0000-0002-5178-4947;;", "linkedin": "harley-wiltzer-4998547a;jessefarebro/;;", "or_profile": "~Harley_Wiltzer1;~Jesse_Farebrother1;~Arthur_Gretton1;~Mark_Rowland1", "aff": "Mila;Google DeepMind;University College London;Google DeepMind", "aff_domain": "mila.quebec;google.com;ucl.ac.uk;google.com", "position": "PhD student;Student Researcher;Professor;Research Scientist", "bibtex": "@inproceedings{\nwiltzer2024foundations,\ntitle={Foundations of Multivariate Distributional Reinforcement Learning},\nauthor={Harley Wiltzer and Jesse Farebrother and Arthur Gretton and Mark Rowland},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=aq3I5B6GLG}\n}", "github": "", "reviewers": "9dnf;FNo5;SWh2;wmwv", "pdf_size": 4126046, "rating": "3;5;5;6", "confidence": "3;3;2;4", "soundness": "2;3;3;3", "novelty": "1;3;3;3", "presentation": "2;3;2;4", "wc_summary": "64;134;72;51", "wc_strengths": "38;32;28;96", "wc_weaknesses": "345;34;70;67", "wc_questions": "330;78;46;201", "wc_limitations": "12;40;3;12", "wc_review": "789;318;219;427", "wc_reply_reviewers": "91;16;154;375", "wc_reply_authors": "275;120;618;818", "reply_reviewers": "1;1;2;3", "reply_authors": "3;2;4;5", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 80.25, 31.92471613029629 ], "wc_strengths_avg": [ 48.5, 27.654113618049667 ], "wc_weaknesses_avg": [ 129.0, 125.50497998087566 ], "wc_questions_avg": [ 163.75, 112.07670364531604 ], "wc_limitations_avg": [ 16.75, 13.91716565971678 ], "wc_review_avg": [ 438.25, 215.45460658802355 ], "wc_reply_reviewers_avg": [ 159.0, 133.93468557472332 ], "wc_reply_authors_avg": [ 457.75, 275.1966342454064 ], "reply_reviewers_avg": [ 1.75, 0.82915619758885 ], "reply_authors_avg": [ 3.5, 1.118033988749895 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.3244428422615251, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=795358517958545129&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "mila.quebec;google.com;ucl.ac.uk;google.com", "author_num": 4, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "Mila;Google;University College London", "aff_unique_dep": "Quebec Artificial Intelligence Institute;Google DeepMind;", "aff_unique_url": "https://mila.quebec;https://deepmind.com;https://www.ucl.ac.uk", "aff_unique_abbr": "Mila;DeepMind;UCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "Canada;United Kingdom" }, { "title": "Evaluating Copyright Takedown Methods for Language Models", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97618", "id": "ar8aRMrmod", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ar8aRMrmod", "openreview": "https://openreview.net/forum?id=ar8aRMrmod", "poster": "", "project": "", "author_site": "Boyi Wei, Weijia Shi, Yangsibo Huang, Noah Smith, Chiyuan Zhang, Luke Zettlemoyer, Kai Li, Peter Henderson", "tldr": "", "abstract": "Language models (LMs) derive their capabilities from extensive training on diverse data, including copyrighted material. \nThese models can memorize and generate content similar to their training data, potentially risking legal issues like copyright infringement.\nTherefore, model creators are motivated to develop mitigation methods that prevent generating particular copyrighted content, an ability we refer to as *copyright takedowns*. This paper introduces the first evaluation of the feasibility and side effects of copyright takedowns for LMs. We propose CoTaEval, an evaluation framework to assess the effectiveness of copyright takedown methods,\nthe impact on the model's ability to retain uncopyrightable factual knowledge from the copyrighted content, and how well the model maintains its general utility and efficiency.\nWe examine several strategies, including adding system prompts, decoding-time filtering interventions, and unlearning approaches. Our findings indicate that no method excels across all metrics, showing significant room for research in this unique problem setting and indicating potential unresolved challenges for live policy proposals.", "keywords": "LLM;Copyright;Machine Learning", "primary_area": "", "supplementary_material": "/attachment/c2a9e96645f384fdd3df07e2eda2dfe21e61c67e.zip", "author": "Boyi Wei;Weijia Shi;Yangsibo Huang;Noah A. Smith;Chiyuan Zhang;Luke Zettlemoyer;Kai Li;Peter Henderson", "authorids": "~Boyi_Wei2;~Weijia_Shi1;~Yangsibo_Huang2;~Noah_A._Smith2;~Chiyuan_Zhang1;~Luke_Zettlemoyer1;~Kai_Li8;~Peter_Henderson1", "gender": "M;;F;M;M;M;M;M", "homepage": "https://www.boyiwei.com/;https://weijiashi.notion.site/;https://hazelsuko07.github.io/yangsibo/;http://pluskid.org;https://www.cs.washington.edu/people/faculty/lsz/;https://www.cs.princeton.edu/~li/;http://www.peterhenderson.co/;https://homes.cs.washington.edu/~nasmith/", "dblp": ";132/80601;;21/8315;21/6793;l/KaiLi1.html;h/PeterHenderson2;90/5204.html", "google_scholar": ";https://scholar.google.com/citations?hl=en;NMPUDa0AAAAJ;l_G2vr0AAAAJ;https://scholar.google.com.tw/citations?user=UjpbO6IAAAAJ;9MSpWOUAAAAJ;dy_JBs0AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";0000-3200-0000-0011;;;;;;0000-0002-2310-6380", "linkedin": ";weijia-shi-773768112;;;luke-zettlemoyer-a0109b226/;;phende/;", "or_profile": "~Boyi_Wei2;~Weijia_Shi1;~Yangsibo_Huang2;~Chiyuan_Zhang1;~Luke_Zettlemoyer1;~Kai_Li8;~Peter_Henderson1;~Noah_Smith1", "aff": "Princeton University;University of Washington, Seattle;Princeton University;Google;Meta;Princeton University;Princeton University;Allen Institute for Artificial Intelligence", "aff_domain": "princeton.edu;uw.edu;princeton.edu;google.com;meta.com;princeton.edu;princeton.edu;allenai.org", "position": "PhD student;PhD student;PhD student;Research Scientist;Researcher;Full Professor;Assistant Professor;Senior Director of NLP Research", "bibtex": "@inproceedings{\nwei2024evaluating,\ntitle={Evaluating Copyright Takedown Methods for Language Models},\nauthor={Boyi Wei and Weijia Shi and Yangsibo Huang and Noah A. Smith and Chiyuan Zhang and Luke Zettlemoyer and Kai Li and Peter Henderson},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=ar8aRMrmod}\n}", "github": "", "reviewers": "UkhZ;VRnW;ZFB7", "pdf_size": 4091777, "rating": "6;7;8", "confidence": "4;3;3", "wc_summary_and_contributions": "31;130;101", "wc_strengths": "35;102;115", "wc_improvement": "17;290;112", "wc_limitations": "3;5;73", "wc_correctness": "1;1;22", "wc_clarity": "1;1;23", "wc_relation_to_prior_work": "1;13;30", "wc_documentation": "1;1;28", "wc_additional_feedback": "1;1;1", "wc_review": "91;544;505", "wc_reply_reviewers": "0;33;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;1;0", "reply_authors": "1;1;1", "rating_avg": [ 7.0, 0.816496580927726 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 87.33333333333333, 41.55585264302597 ], "wc_strengths_avg": [ 84.0, 35.05234181430203 ], "wc_improvement_avg": [ 139.66666666666666, 113.15574321360018 ], "wc_limitations_avg": [ 27.0, 32.53715824509981 ], "wc_correctness_avg": [ 8.0, 9.899494936611665 ], "wc_clarity_avg": [ 8.333333333333334, 10.370899457402697 ], "wc_relation_to_prior_work_avg": [ 14.666666666666666, 11.897712198383164 ], "wc_documentation_avg": [ 10.0, 12.727922061357855 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 380.0, 204.97316897584426 ], "wc_reply_reviewers_avg": [ 11.0, 15.556349186104045 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5621864139480347613&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "princeton.edu;uw.edu;princeton.edu;google.com;meta.com;princeton.edu;princeton.edu;allenai.org", "author_num": 8, "aff_unique_index": "0;1;0;2;3;0;0;4", "aff_unique_norm": "Princeton University;University of Washington;Google;Meta;Allen Institute for Artificial Intelligence", "aff_unique_dep": ";;Google;Meta Platforms, Inc.;", "aff_unique_url": "https://www.princeton.edu;https://www.washington.edu;https://www.google.com;https://meta.com;https://allenai.org", "aff_unique_abbr": "Princeton;UW;Google;Meta;AI2", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Seattle;Mountain View", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Collaborative Video Diffusion: Consistent Multi-video Generation with Camera Control", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94527", "id": "arHJlYiY2J", "proceeding": "", "pdf": "https://openreview.net/pdf?id=arHJlYiY2J", "openreview": "https://openreview.net/forum?id=arHJlYiY2J", "poster": "", "project": "", "author_site": "Zhengfei Kuang, Shengqu Cai, Hao He, Yinghao Xu, Hongsheng Li, Leonidas Guibas, Gordon Wetzstein", "tldr": "", "abstract": "Research on video generation has recently made tremendous progress, enabling high-quality videos to be generated from text prompts or images. Adding control to the video generation process is an important goal moving forward and recent approaches that condition video generation models on camera trajectories take an important step towards this goal. Yet, it remains challenging to generate a video of the same scene from multiple different camera trajectories. Solutions to this multi-video generation problem could enable large-scale 3D scene generation with editable camera trajectories, among other applications. We introduce collaborative video diffusion (CVD) as an important step towards this vision. The CVD framework includes a novel cross-video synchronization module that promotes consistency between corresponding frames of the same video rendered from different camera poses using an epipolar attention mechanism. Trained on top of a state-of-the-art camera-control module for video generation, CVD generates multiple videos rendered from different camera trajectories with significantly better consistency than baselines, as shown in extensive experiments.", "keywords": "Neural Rendering;Video Diffusion Models;Multi-View Generation", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/4fac4a428376ed8e89d871ec80b264fe8c8e09be.zip", "author": "Zhengfei Kuang;Shengqu Cai;Hao He;Yinghao Xu;Hongsheng Li;Leonidas Guibas;Gordon Wetzstein", "authorids": "~Zhengfei_Kuang1;~Shengqu_Cai1;~Hao_He7;~Yinghao_Xu1;~Hongsheng_Li3;~Leonidas_Guibas1;~Gordon_Wetzstein3", "gender": "M;M;M;M;M;M;M", "homepage": "https://zhengfeikuang.com;https://primecai.github.io/;https://hehao13.github.io;https://justimyhxu.github.io/;http://www.ee.cuhk.edu.hk/~hsli;http://geometry.stanford.edu/;http://web.stanford.edu/~gordonwz/", "dblp": ";314/6888;;232/2482;27/7402-1;g/LeonidasJGuibas;13/4660", "google_scholar": ";https://scholar.google.es/citations?user=KeI51t8AAAAJ;kdbmt6QAAAAJ;https://scholar.google.com/citations?hl=en;BN2Ze-QAAAAJ;https://scholar.google.com.tw/citations?user=5JlEyTAAAAAJ;VOf45S0AAAAJ", "orcid": ";;;;;;0000-0002-9243-6885", "linkedin": ";;;;;;gordon-wetzstein-2406723/", "or_profile": "~Zhengfei_Kuang1;~Shengqu_Cai1;~Hao_He7;~Yinghao_Xu1;~Hongsheng_Li3;~Leonidas_Guibas1;~Gordon_Wetzstein3", "aff": "Stanford University;University of California, San Diego;The Chinese University of Hong Kong;Stanford University;The Chinese University of Hong Kong;Stanford University;Stanford University", "aff_domain": "stanford.edu;ucsd.edu;link.cuhk.edu.hk;stanford.edu;cuhk.edu.hk;stanford.edu;stanford.edu", "position": "PhD student;Researcher;PhD student;Postdoc;Associate Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nkuang2024collaborative,\ntitle={Collaborative Video Diffusion: Consistent Multi-video Generation with Camera Control},\nauthor={Zhengfei Kuang and Shengqu Cai and Hao He and Yinghao Xu and Hongsheng Li and Leonidas Guibas and Gordon Wetzstein},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=arHJlYiY2J}\n}", "github": "", "reviewers": "MCcD;3sRf;EujL;WFsL;v3P6", "pdf_size": 20749909, "rating": "5;5;6;6;7", "confidence": "5;4;5;5;4", "soundness": "3;2;3;2;3", "novelty": "3;2;3;2;3", "presentation": "2;2;3;3;3", "wc_summary": "71;34;85;60;61", "wc_strengths": "75;28;106;42;61", "wc_weaknesses": "322;60;129;232;203", "wc_questions": "57;51;102;6;2", "wc_limitations": "2;5;61;14;1", "wc_review": "527;178;483;354;328", "wc_reply_reviewers": "151;0;27;232;49", "wc_reply_authors": "737;0;0;630;0", "reply_reviewers": "1;0;1;3;1", "reply_authors": "2;1;1;3;1", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 4.6, 0.4898979485566356 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 62.2, 16.726027621644057 ], "wc_strengths_avg": [ 62.4, 27.059933481071234 ], "wc_weaknesses_avg": [ 189.2, 89.41454020459985 ], "wc_questions_avg": [ 43.6, 36.8488805800122 ], "wc_limitations_avg": [ 16.6, 22.668039174132378 ], "wc_review_avg": [ 374.0, 123.46821453313399 ], "wc_reply_reviewers_avg": [ 91.8, 86.73961032884571 ], "wc_reply_authors_avg": [ 273.4, 336.5505014110067 ], "reply_reviewers_avg": [ 1.2, 0.9797958971132712 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.2182178902359924, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8145052810966003717&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "stanford.edu;ucsd.edu;link.cuhk.edu.hk;stanford.edu;cuhk.edu.hk;stanford.edu;stanford.edu", "author_num": 7, "aff_unique_index": "0;1;2;0;2;0;0", "aff_unique_norm": "Stanford University;University of California, San Diego;Chinese University of Hong Kong", "aff_unique_dep": ";;", "aff_unique_url": "https://www.stanford.edu;https://www.ucsd.edu;https://www.cuhk.edu.hk", "aff_unique_abbr": "Stanford;UCSD;CUHK", "aff_campus_unique_index": "0;1;2;0;2;0;0", "aff_campus_unique": "Stanford;San Diego;Hong Kong SAR", "aff_country_unique_index": "0;0;1;0;1;0;0", "aff_country_unique": "United States;China" }, { "title": "BAN: Detecting Backdoors Activated by Adversarial Neuron Noise", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94526", "id": "asYYSzL4N5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=asYYSzL4N5", "openreview": "https://openreview.net/forum?id=asYYSzL4N5", "poster": "/media/PosterPDFs/NeurIPS%202024/94526.png?t=1731702333.369728", "project": "", "author_site": "Xiaoyun Xu, Zhuoran Liu, Stefanos Koffas, Shujian Yu, Stjepan Picek", "tldr": "", "abstract": "Backdoor attacks on deep learning represent a recent threat that has gained significant attention in the research community. \nBackdoor defenses are mainly based on backdoor inversion, which has been shown to be generic, model-agnostic, and applicable to practical threat scenarios. State-of-the-art backdoor inversion recovers a mask in the feature space to locate prominent backdoor features, where benign and backdoor features can be disentangled. However, it suffers from high computational overhead, and we also find that it overly relies on prominent backdoor features that are highly distinguishable from benign features. To tackle these shortcomings, this paper improves backdoor feature inversion for backdoor detection by incorporating extra neuron activation information. In particular, we adversarially increase the loss of backdoored models with respect to weights to activate the backdoor effect, based on which we can easily differentiate backdoored and clean models. Experimental results demonstrate our defense, BAN, is 1.37$\\times$ (on CIFAR-10) and 5.11$\\times$ (on ImageNet200) more efficient with an average 9.99\\% higher detect success rate than the state-of-the-art defense BTI DBF. Our code and trained models are publicly available at https://github.com/xiaoyunxxy/ban.", "keywords": "backdoor detection;backdoor trigger inversion;AI Security", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Xiaoyun Xu;Zhuoran Liu;Stefanos Koffas;Shujian Yu;Stjepan Picek", "authorids": "~Xiaoyun_Xu2;~Zhuoran_Liu1;~Stefanos_Koffas1;~Shujian_Yu1;~Stjepan_Picek1", "gender": "M;;;M;M", "homepage": "https://www.ru.nl/en/people/xu-x;https://liuzrcc.github.io/;;https://sjyucnel.github.io/;https://www.ru.nl/en/people/picek-s", "dblp": "46/884;137/6081-1;;154/5763.html;50/10230", "google_scholar": "https://scholar.google.com.hk/citations?user=vPA0GgUAAAAJ;9U3aIWsAAAAJ;https://scholar.google.gr/citations?user=u4C1IiMAAAAJ;O8kpnMoAAAAJ;C1enS8IAAAAJ", "orcid": ";;;;0000-0001-7509-4337", "linkedin": ";;;;", "or_profile": "~Xiaoyun_Xu2;~Zhuoran_Liu1;~Stefanos_Koffas1;~Shujian_Yu1;~Stjepan_Picek1", "aff": "Radboud University;Radboud University;Delft University of Technology;University of Troms\u00f8;Radboud University Nijmegen", "aff_domain": "ru.nl;ru.nl;tudelft.nl;uit.no;ru.nl", "position": "PhD student;Postdoc;PhD student;Guest Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nxu2024ban,\ntitle={{BAN}: Detecting Backdoors Activated by Neuron Noise},\nauthor={Xiaoyun Xu and Zhuoran Liu and Stefanos Koffas and Shujian Yu and Stjepan Picek},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=asYYSzL4N5}\n}", "github": "", "reviewers": "hJLG;Eja2;cerE;U3ov;yW41", "pdf_size": 1257822, "rating": "5;6;7;7;7", "confidence": "2;4;4;4;3", "soundness": "3;3;3;3;3", "novelty": "3;2;3;3;3", "presentation": "2;3;4;3;3", "wc_summary": "47;67;180;82;47", "wc_strengths": "21;31;145;39;54", "wc_weaknesses": "41;77;259;80;85", "wc_questions": "273;1;374;12;15", "wc_limitations": "5;64;24;88;4", "wc_review": "387;240;982;301;205", "wc_reply_reviewers": "40;28;32;29;23", "wc_reply_authors": "26;19;19;23;15", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 6.4, 0.7999999999999999 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 84.6, 49.487776268488766 ], "wc_strengths_avg": [ 58.0, 44.819638552759436 ], "wc_weaknesses_avg": [ 108.4, 76.89369284928381 ], "wc_questions_avg": [ 135.0, 157.25775020646836 ], "wc_limitations_avg": [ 37.0, 33.50223873116541 ], "wc_review_avg": [ 423.0, 286.242554488322 ], "wc_reply_reviewers_avg": [ 30.4, 5.607138307550474 ], "wc_reply_authors_avg": [ 20.4, 3.7735924528226414 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.6875000000000001, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15155762662690960229&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "ru.nl;ru.nl;tudelft.nl;uit.no;ru.nl", "author_num": 5, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Radboud University;Delft University of Technology;University of Troms\u00f8", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ru.nl;https://www.tudelft.nl;https://uit.no", "aff_unique_abbr": "RU;TU Delft;UIT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Nijmegen", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "Netherlands;Norway" }, { "title": "Logical characterizations of recurrent graph neural networks with reals and floats", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94525", "id": "atDcnWqG5n", "proceeding": "", "pdf": "https://openreview.net/pdf?id=atDcnWqG5n", "openreview": "https://openreview.net/forum?id=atDcnWqG5n", "poster": "/media/PosterPDFs/NeurIPS%202024/94525.png?t=1733318039.0021577", "project": "", "author_site": "Veeti Ahvonen, Damian Heiman, Antti Kuusisto, Carsten Lutz", "tldr": "", "abstract": "In pioneering work from 2019, Barcel\u00f3 and coauthors identified logics that precisely match the expressive power of constant iteration-depth graph neural networks (GNNs) relative to properties definable in first-order logic. In this article, we give exact logical characterizations of recurrent GNNs in two scenarios: (1) in the setting with floating-point numbers and (2) with reals. For floats, the formalism matching recurrent GNNs is a rule-based modal logic with counting, while for reals we use a suitable infinitary modal logic, also with counting. These results give exact matches between logics and GNNs in the recurrent setting without relativising to a background logic in either case, but using some natural assumptions about floating-point arithmetic. Applying our characterizations, we also prove that, relative to graph properties definable in monadic second-order logic (MSO), our infinitary and rule-based logics are equally expressive. This implies that recurrent GNNs with reals and floats have the same expressive power over MSO-definable properties and shows that, for such properties, also recurrent GNNs with reals are characterized by a (finitary!) rule-based modal logic. In the general case, in contrast, the expressive power with floats is weaker than with reals. In addition to logic-oriented results, we also characterize recurrent GNNs, with both reals and floats, via distributed automata, drawing links to distributed computing models.", "keywords": "graph neural networks;logic;distributed computing;descriptive complexity", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Veeti Ahvonen;Damian Heiman;Antti Kuusisto;Carsten Lutz", "authorids": "~Veeti_Ahvonen1;~Damian_Heiman1;~Antti_Kuusisto1;~Carsten_Lutz1", "gender": "M;;;M", "homepage": "https://homepages.tuni.fi/veeti.ahvonen/;;https://homepages.tuni.fi/antti.kuusisto/;https://home.uni-leipzig.de/clu/", "dblp": "https://dblp.org/search?q=veeti+ahvonen;;26/285;l/CarstenLutz", "google_scholar": "https://scholar.google.com/citations?hl=fi;;;xcsp66oAAAAJ", "orcid": "0009-0007-4819-0199;0009-0000-6038-7006;;0000-0002-8791-6702", "linkedin": "veeti-ahvonen-bb92b314a;;;", "or_profile": "~Veeti_Ahvonen1;~Damian_Heiman1;~Antti_Kuusisto1;~Carsten_Lutz1", "aff": "Tampere University;Tampere University;Tampere University;Universit\u00e4t Leipzig", "aff_domain": "tuni.fi;tuni.fi;tuni.fi;uni-leipzig.de", "position": "PhD student;PhD student;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nahvonen2024logical,\ntitle={Logical characterizations of recurrent graph neural networks with reals and floats},\nauthor={Veeti Ahvonen and Damian Heiman and Antti Kuusisto and Carsten Lutz},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=atDcnWqG5n}\n}", "github": "", "reviewers": "2zSg;TUPQ;vmsJ;3ziF", "pdf_size": 561190, "rating": "6;8;8;8", "confidence": "4;5;4;5", "soundness": "4;4;4;4", "novelty": "3;4;4;3", "presentation": "3;3;3;4", "wc_summary": "152;89;145;138", "wc_strengths": "127;99;97;117", "wc_weaknesses": "353;24;139;157", "wc_questions": "56;70;53;44", "wc_limitations": "12;10;29;11", "wc_review": "700;292;463;467", "wc_reply_reviewers": "41;0;33;66", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 4.0, 0.0 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 131.0, 24.748737341529164 ], "wc_strengths_avg": [ 110.0, 12.529964086141668 ], "wc_weaknesses_avg": [ 168.25, 118.2399572902494 ], "wc_questions_avg": [ 55.75, 9.33742469849155 ], "wc_limitations_avg": [ 15.5, 7.826237921249264 ], "wc_review_avg": [ 480.5, 145.08704283980703 ], "wc_reply_reviewers_avg": [ 35.0, 23.590252224170897 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17793934135883509681&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "tuni.fi;tuni.fi;tuni.fi;uni-leipzig.de", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Tampere University;University of Leipzig", "aff_unique_dep": ";", "aff_unique_url": "https://www.tuni.fi;https://www.uni-leipzig.de", "aff_unique_abbr": "Tuni;Uni Leipzig", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "Finland;Germany" }, { "title": "Few-Shot Task Learning through Inverse Generative Modeling", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94524", "id": "atIE6Npr5A", "proceeding": "", "pdf": "https://openreview.net/pdf?id=atIE6Npr5A", "openreview": "https://openreview.net/forum?id=atIE6Npr5A", "poster": "/media/PosterPDFs/NeurIPS%202024/94524.png?t=1731570341.22456", "project": "", "author_site": "Aviv Netanyahu, Yilun Du, Antonia Bronars, Jyothish Pari, Josh Tenenbaum, Tianmin Shu, Pulkit Agrawal", "tldr": "", "abstract": "Learning the intents of an agent, defined by its goals or motion style, is often extremely challenging from just a few examples. We refer to this problem as task concept learning and present our approach, Few-Shot Task Learning through Inverse Generative Modeling (FTL-IGM), which learns new task concepts by leveraging invertible neural generative models. The core idea is to pretrain a generative model on a set of basic concepts and their demonstrations. Then, given a few demonstrations of a new concept (such as a new goal or a new action), our method learns the underlying concepts through backpropagation without updating the model weights, thanks to the invertibility of the generative model. We evaluate our method in five domains -- object rearrangement, goal-oriented navigation, motion caption of human actions, autonomous driving, and real-world table-top manipulation. Our experimental results demonstrate that via the pretrained generative model, we successfully learn novel concepts and generate agent plans or motion corresponding to these concepts in (1) unseen environments and (2) in composition with training concepts.", "keywords": "few shot learning;inverse generative modeling", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Aviv Netanyahu;Yilun Du;Antonia Bronars;Jyothish Pari;Joshua B. Tenenbaum;Tianmin Shu;Pulkit Agrawal", "authorids": "~Aviv_Netanyahu1;~Yilun_Du1;~Antonia_Bronars1;~Jyothish_Pari1;~Joshua_B._Tenenbaum1;~Tianmin_Shu1;~Pulkit_Agrawal1", "gender": ";;F;M;;;M", "homepage": ";https://yilundu.github.io;;https://jyopari.github.io/;;;https://people.eecs.berkeley.edu/~pulkitag/", "dblp": "286/8767;204/4379;;297/5770.html;t/JoshuaBTenenbaum;163/2175.html;149/2672", "google_scholar": ";;1WSuJdQAAAAJ;WyIW46YAAAAJ;;YT_ffdwAAAAJ;UpZmJI0AAAAJ", "orcid": ";;;;;;", "linkedin": "aviv-netanyahu/;;;jyo-pari-1a70bb1b3;;;", "or_profile": "~Aviv_Netanyahu1;~Yilun_Du1;~Antonia_Bronars1;~Jyothish_Pari1;~Joshua_B._Tenenbaum1;~Tianmin_Shu1;~Pulkit_Agrawal1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Johns Hopkins University;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;;mit.edu;mit.edu;jhu.edu;mit.edu", "position": "PhD student;PhD student;;PhD student;Professor;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nnetanyahu2024fewshot,\ntitle={Few-Shot Task Learning through Inverse Generative Modeling},\nauthor={Aviv Netanyahu and Yilun Du and Antonia Bronars and Jyothish Pari and Joshua B. Tenenbaum and Tianmin Shu and Pulkit Agrawal},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=atIE6Npr5A}\n}", "github": "", "reviewers": "1rf8;sy18;gXQZ", "pdf_size": 2716544, "rating": "5;6;7", "confidence": "3;4;5", "soundness": "4;3;4", "novelty": "3;3;4", "presentation": "4;4;3", "wc_summary": "93;46;91", "wc_strengths": "93;125;55", "wc_weaknesses": "76;40;168", "wc_questions": "48;28;79", "wc_limitations": "59;6;7", "wc_review": "369;245;400", "wc_reply_reviewers": "10;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;0;0", "reply_authors": "1;1;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 76.66666666666667, 21.69997439834639 ], "wc_strengths_avg": [ 91.0, 28.61235164516658 ], "wc_weaknesses_avg": [ 94.66666666666667, 53.897021150420635 ], "wc_questions_avg": [ 51.666666666666664, 20.98147330914162 ], "wc_limitations_avg": [ 24.0, 24.752104287649296 ], "wc_review_avg": [ 338.0, 66.9676538835479 ], "wc_reply_reviewers_avg": [ 3.3333333333333335, 4.714045207910316 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3448380150721719859&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "mit.edu;mit.edu;;mit.edu;mit.edu;jhu.edu;mit.edu", "author_num": 7, "aff_unique_index": "0;0;0;0;1;0", "aff_unique_norm": "Massachusetts Institute of Technology;Johns Hopkins University", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.jhu.edu", "aff_unique_abbr": "MIT;JHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Is Multiple Object Tracking a Matter of Specialization?", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94523", "id": "aujnNnIiiM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=aujnNnIiiM", "openreview": "https://openreview.net/forum?id=aujnNnIiiM", "poster": "/media/PosterPDFs/NeurIPS%202024/94523.png?t=1730319868.387615", "project": "", "author_site": "Gianluca Mancusi, Mattia Bernardi, Aniello Panariello, Angelo Porrello, Rita Cucchiara, SIMONE CALDERARA", "tldr": "", "abstract": "End-to-end transformer-based trackers have achieved remarkable performance on most human-related datasets. However, training these trackers in heterogeneous scenarios poses significant challenges, including negative interference - where the model learns conflicting scene-specific parameters - and limited domain generalization, which often necessitates expensive fine-tuning to adapt the models to new domains. In response to these challenges, we introduce Parameter-efficient Scenario-specific Tracking Architecture (PASTA), a novel framework that combines Parameter-Efficient Fine-Tuning (PEFT) and Modular Deep Learning (MDL). Specifically, we define key scenario attributes (e.g, camera-viewpoint, lighting condition) and train specialized PEFT modules for each attribute. These expert modules are combined in parameter space, enabling systematic generalization to new domains without increasing inference time. Extensive experiments on MOTSynth, along with zero-shot evaluations on MOT17 and PersonPath22 demonstrate that a neural tracker built from carefully selected modules surpasses its monolithic counterpart. We release models and code.", "keywords": "Multiple Object Tracking;Parameter Efficient Fine-Tuning;Modular Deep Learning;Task Arithmetic", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Gianluca Mancusi;Mattia Bernardi;Aniello Panariello;Angelo Porrello;Rita Cucchiara;Simone Calderara", "authorids": "~Gianluca_Mancusi1;~Mattia_Bernardi1;~Aniello_Panariello1;~Angelo_Porrello1;~Rita_Cucchiara1;~Simone_Calderara1", "gender": "M;M;;M;F;M", "homepage": "https://www.gianlucamancusi.com;;;;https://aimagelab.ing.unimore.it/imagelab/;", "dblp": "320/6157;;326/4899;223/4466;c/RitaCucchiara;13/422", "google_scholar": "AA7Ot04AAAAJ;;YnVl5pQAAAAJ;b3-5Ys4AAAAJ;OM3sZEoAAAAJ;https://scholar.google.it/citations?user=CZd-WXkAAAAJ", "orcid": "0000-0002-0424-6839;0009-0003-3425-4148;0000-0002-1940-7703;0000-0002-9022-8484;0000-0002-2239-283X;0000-0001-9056-1538", "linkedin": "gianluca-mancusi/;mattia-bernardi-719a29137/;;;rita-cucchiara-a4653a13/?originalSubdomain=it;", "or_profile": "~Gianluca_Mancusi1;~Mattia_Bernardi1;~Aniello_Panariello1;~Angelo_Porrello1;~Rita_Cucchiara1;~Simone_Calderara1", "aff": "University of Modena and Reggio Emilia;University of Modena and Reggio Emilia;University of Modena and Reggio Emilia;University of Modena and Reggio Emilia, AimageLab;Universit\u00e0 di modena e reggio emilia;University of Modena and Reggio Emilia", "aff_domain": "unimore.it;unimore.it;unimore.it;unimore.it;unimore.it;unimore.it", "position": "PhD student;PhD student;PhD student;Postdoc;Full Professor;Full Professor", "bibtex": "@inproceedings{\nmancusi2024is,\ntitle={Is Multiple Object Tracking a Matter of Specialization?},\nauthor={Gianluca Mancusi and Mattia Bernardi and Aniello Panariello and Angelo Porrello and Rita Cucchiara and Simone Calderara},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=aujnNnIiiM}\n}", "github": "", "reviewers": "Vx5C;4fn5;nupV;6tD5;oFC1", "pdf_size": 8978647, "rating": "5;5;5;6;7", "confidence": "5;5;5;5;3", "soundness": "3;3;3;3;3", "novelty": "2;3;2;3;3", "presentation": "3;3;3;3;4", "wc_summary": "119;72;92;60;215", "wc_strengths": "67;294;33;29;95", "wc_weaknesses": "111;372;120;104;62", "wc_questions": "8;335;82;5;32", "wc_limitations": "6;350;11;5;31", "wc_review": "311;1423;338;203;435", "wc_reply_reviewers": "10;0;17;11;33", "wc_reply_authors": "0;45;0;0;0", "reply_reviewers": "1;0;1;1;1", "reply_authors": "1;2;1;1;1", "rating_avg": [ 5.6, 0.8 ], "confidence_avg": [ 4.6, 0.7999999999999999 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 111.6, 55.42779086342879 ], "wc_strengths_avg": [ 103.6, 98.19898166478102 ], "wc_weaknesses_avg": [ 153.8, 110.89887285270306 ], "wc_questions_avg": [ 92.4, 124.397106075664 ], "wc_limitations_avg": [ 80.6, 135.02681215225365 ], "wc_review_avg": [ 542.0, 446.6560197736061 ], "wc_reply_reviewers_avg": [ 14.2, 10.870142593360953 ], "wc_reply_authors_avg": [ 9.0, 18.0 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.8750000000000001, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:7ui4u2hmpw8J:scholar.google.com/&scioq=Is+Multiple+Object+Tracking+a+Matter+of+Specialization%3F&hl=en&as_sdt=0,21", "gs_version_total": 7, "email": "unimore.it;unimore.it;unimore.it;unimore.it;unimore.it;unimore.it", "author_num": 6, "aff_unique_index": "0;0;0;0;1;0", "aff_unique_norm": "University of Modena and Reggio Emilia;Universit\u00e0 di Modena e Reggio Emilia", "aff_unique_dep": ";", "aff_unique_url": "https://www.unimore.it;https://www.unimore.it", "aff_unique_abbr": ";Unimore", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "Italy" }, { "title": "ET-Flow: Equivariant Flow-Matching for Molecular Conformer Generation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94522", "id": "avsZ9OlR60", "proceeding": "", "pdf": "https://openreview.net/pdf?id=avsZ9OlR60", "openreview": "https://openreview.net/forum?id=avsZ9OlR60", "poster": "", "project": "", "author_site": "Majdi Hassan, Nikhil Shenoy, Jungyoon Lee, Hannes St\u00e4rk, Stephan Thaler, Dominique Beaini", "tldr": "", "abstract": "Predicting low-energy molecular conformations given a molecular graph is an \nimportant but challenging task in computational drug discovery. Existing state-\nof-the-art approaches either resort to large scale transformer-based models that\ndiffuse over conformer fields, or use computationally expensive methods to gen-\nerate initial structures and diffuse over torsion angles. In this work, we introduce\nEquivariant Transformer Flow (ET-Flow). We showcase that a well-designed\nflow matching approach with equivariance and harmonic prior alleviates the need\nfor complex internal geometry calculations and large architectures, contrary to\nthe prevailing methods in the field. Our approach results in a straightforward\nand scalable method that directly operates on all-atom coordinates with minimal\nassumptions. With the advantages of equivariance and flow matching, ET-Flow\nsignificantly increases the precision and physical validity of the generated con-\nformers, while being a lighter model and faster at inference. Code is available\nhttps://github.com/shenoynikhil/ETFlow.", "keywords": "Flow Matching;molecular conformers;equivariant graph networks", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "", "author": "Majdi Hassan;Nikhil Shenoy;Jungyoon Lee;Hannes Stark;Stephan Thaler;Dominique Beaini", "authorids": "~Majdi_Hassan1;~Nikhil_Shenoy1;~Jungyoon_Lee1;~Hannes_Stark1;~Stephan_Thaler1;~Dominique_Beaini1", "gender": ";M;F;;;M", "homepage": "https://majhas.github.io;https://shenoynikhil.com;;;;", "dblp": ";;;;;201/8526", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;dfxka0YAAAAJ;;;;https://scholar.google.ca/citations?hl=en", "orcid": ";;;;;0000-0002-4613-9388", "linkedin": ";;jungyoon-lee-6a9aab139/;;;dbeaini/", "or_profile": "~Majdi_Hassan1;~Nikhil_Shenoy1;~Jungyoon_Lee1;~Hannes_Stark1;~Stephan_Thaler1;~Dominique_Beaini1", "aff": "Universit\u00e9 de Montr\u00e9al;University of British Columbia;Montreal Institute for Learning Algorithms, University of Montreal, Universit\u00e9 de Montr\u00e9al;;;Mila - Institut Qu\u00e9b\u00e9cois d'intelligence artificielle", "aff_domain": "umontreal.ca;cs.ubc.ca;mila.umontreal.ca;;;mila.quebec", "position": "MS student;MS student;MS student;;;Associate Professor", "bibtex": "@inproceedings{\nhassan2024etflow,\ntitle={{ET}-Flow: Equivariant Flow-Matching for Molecular Conformer Generation},\nauthor={Majdi Hassan and Nikhil Shenoy and Jungyoon Lee and Hannes Stark and Stephan Thaler and Dominique Beaini},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=avsZ9OlR60}\n}", "github": "", "reviewers": "eBNM;281V;AAmo", "pdf_size": 2362537, "rating": "4;6;7", "confidence": "4;4;4", "soundness": "3;3;3", "novelty": "2;2;3", "presentation": "3;4;3", "wc_summary": "79;28;85", "wc_strengths": "13;37;97", "wc_weaknesses": "132;87;155", "wc_questions": "4;81;101", "wc_limitations": "1;38;21", "wc_review": "229;271;459", "wc_reply_reviewers": "93;109;37", "wc_reply_authors": "164;256;23", "reply_reviewers": "1;1;1", "reply_authors": "2;3;2", "rating_avg": [ 5.666666666666667, 1.247219128924647 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 64.0, 25.573423705088842 ], "wc_strengths_avg": [ 49.0, 35.32704346531139 ], "wc_weaknesses_avg": [ 124.66666666666667, 28.241026106633512 ], "wc_questions_avg": [ 62.0, 41.817061908587824 ], "wc_limitations_avg": [ 20.0, 15.121728296285006 ], "wc_review_avg": [ 319.6666666666667, 100.0044443456834 ], "wc_reply_reviewers_avg": [ 79.66666666666667, 30.8688984074406 ], "wc_reply_authors_avg": [ 147.66666666666666, 95.82043391446082 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2774891509931815133&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "umontreal.ca;cs.ubc.ca;mila.umontreal.ca;;;mila.quebec", "author_num": 6, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Universit\u00e9 de Montr\u00e9al;University of British Columbia;University of Montreal;Mila - Quebec Artificial Intelligence Institute", "aff_unique_dep": ";;Montreal Institute for Learning Algorithms;Artificial Intelligence", "aff_unique_url": "https://www.umontreal.ca;https://www.ubc.ca;https://www.mila.quebec;https://mila.quebec", "aff_unique_abbr": "UdeM;UBC;MILA;Mila", "aff_campus_unique_index": "1", "aff_campus_unique": ";Montreal", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Canada" }, { "title": "Fairness in Social Influence Maximization via Optimal Transport", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94521", "id": "axW8xvQPkF", "proceeding": "", "pdf": "https://openreview.net/pdf?id=axW8xvQPkF", "openreview": "https://openreview.net/forum?id=axW8xvQPkF", "poster": "/media/PosterPDFs/NeurIPS%202024/94521.png?t=1730287949.4205728", "project": "", "author_site": "Shubham Chowdhary, Giulia De Pasquale, Nicolas Lanzetti, Ana-Andreea Stoica, Florian Dorfler", "tldr": "", "abstract": "We study fairness in social influence maximization, whereby one seeks to select\nseeds that spread a given information throughout a network, ensuring balanced\noutreach among different communities (e.g. demographic groups). In the literature,\nfairness is often quantified in terms of the expected outreach within individual\ncommunities. In this paper, we demonstrate that such fairness metrics can be\nmisleading since they overlook the stochastic nature of information diffusion\nprocesses. When information diffusion occurs in a probabilistic manner, multiple\noutreach scenarios can occur. As such, outcomes such as \u201cIn 50% of the cases, no\none in group 1 gets the information, while everyone in group 2 does, and in the\nother 50%, it is the opposite\u201d, which always results in largely unfair outcomes,\nare classified as fair by a variety of fairness metrics in the literature. We tackle\nthis problem by designing a new fairness metric, mutual fairness, that captures\nvariability in outreach through optimal transport theory. We propose a new seed-\nselection algorithm that optimizes both outreach and mutual fairness, and we show\nits efficacy on several real datasets. We find that our algorithm increases fairness\nwith only a minor decrease (and at times, even an increase) in efficiency.", "keywords": "Fairness;social influence maximization;optimal transport", "primary_area": "fairness", "supplementary_material": "", "author": "Shubham Chowdhary;Giulia De Pasquale;Nicolas Lanzetti;Ana-Andreea Stoica;Florian Dorfler", "authorids": "~Shubham_Chowdhary1;~Giulia_De_Pasquale1;~Nicolas_Lanzetti1;~Ana-Andreea_Stoica1;~Florian_Dorfler1", "gender": "M;F;M;;M", "homepage": "https://codeviser.github.io/;https://sites.google.com/view/giuliadepasquale/home-page?authuser=2;http://people.ee.ethz.ch/~lnicolas/index.html;;http://people.ee.ethz.ch/~floriand/", "dblp": ";;247/4363;;", "google_scholar": "https://scholar.google.com/citations?hl=en;61JYIhYAAAAJ;gWJV1rQAAAAJ;;https://scholar.google.com/citations?view_op=list_works", "orcid": "0009-0009-3437-3933;;0000-0002-9128-1412;;0000-0002-9649-5305", "linkedin": "shubhamchowdhary/;giulia-de-pasquale-9189aa106/;;;", "or_profile": "~Shubham_Chowdhary1;~Giulia_De_Pasquale1;~Nicolas_Lanzetti1;~Ana-Andreea_Stoica1;~Florian_Dorfler1", "aff": "ETHZ - ETH Zurich;ETH Zurich;ETHZ - ETH Zurich;;", "aff_domain": "ethz.ch;eth.ch;ethz.ch;;", "position": "Researcher;Postdoc;PhD student;;", "bibtex": "@inproceedings{\nchowdhary2024fairness,\ntitle={Fairness in Social Influence Maximization via Optimal Transport},\nauthor={Shubham Chowdhary and Giulia De Pasquale and Nicolas Lanzetti and Ana-Andreea Stoica and Florian Dorfler},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=axW8xvQPkF}\n}", "github": "", "reviewers": "1BeY;wcrW;vnZX;rBPP", "pdf_size": 1690255, "rating": "5;5;7;8", "confidence": "4;5;3;5", "soundness": "2;3;3;4", "novelty": "3;3;3;4", "presentation": "3;2;3;4", "wc_summary": "65;69;289;96", "wc_strengths": "19;29;19;93", "wc_weaknesses": "96;267;9;40", "wc_questions": "10;158;13;22", "wc_limitations": "3;25;1;27", "wc_review": "193;548;331;278", "wc_reply_reviewers": "0;0;0;17", "wc_reply_authors": "103;0;0;0", "reply_reviewers": "0;0;0;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 129.75, 92.71293059762483 ], "wc_strengths_avg": [ 40.0, 30.870698080866262 ], "wc_weaknesses_avg": [ 103.0, 99.68701018688444 ], "wc_questions_avg": [ 50.75, 62.07807584002584 ], "wc_limitations_avg": [ 14.0, 12.041594578792296 ], "wc_review_avg": [ 337.5, 131.12303382701302 ], "wc_reply_reviewers_avg": [ 4.25, 7.361215932167728 ], "wc_reply_authors_avg": [ 25.75, 44.60030829489859 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.058025885318565944, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:uVTzj4NorhsJ:scholar.google.com/&scioq=Fairness+in+Social+Influence+Maximization+via+Optimal+Transport&hl=en&as_sdt=0,5", "gs_version_total": 5, "email": "ethz.ch;eth.ch;ethz.ch;;", "author_num": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "ETH Zurich", "aff_unique_dep": "", "aff_unique_url": "https://www.ethz.ch", "aff_unique_abbr": "ETHZ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Switzerland" }, { "title": "Streaming Long Video Understanding with Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94520", "id": "axX62CQJpa", "proceeding": "", "pdf": "https://openreview.net/pdf?id=axX62CQJpa", "openreview": "https://openreview.net/forum?id=axX62CQJpa", "poster": "", "project": "", "author_site": "Rui Qian, Xiaoyi Dong, Pan Zhang, Yuhang Zang, Shuangrui Ding, Dahua Lin, Jiaqi Wang", "tldr": "", "abstract": "This paper presents VideoStreaming, an advanced vision-language large model (VLLM) for video understanding, that capably understands arbitrary-length video with a constant number of video tokens streamingly encoded and adaptively selected.\nThe challenge of video understanding in the vision language area mainly lies in the significant computational burden caused by the great number of tokens extracted from long videos. Previous works rely on sparse sampling or frame compression to reduce tokens. However, such approaches either disregard temporal information in a long time span or sacrifice spatial details, resulting in flawed compression. \nTo address these limitations, our VideoStreaming has two core designs: Memory-Propagated Streaming Encoding and Adaptive Memory Selection. The Memory-Propagated Streaming Encoding architecture segments long videos into short clips and sequentially encodes each clip with a propagated memory. In each iteration, we utilize the encoded results of the preceding clip as historical memory, which is integrated with the current clip to distill a condensed representation that encapsulates the video content up to the current timestamp. This method not only incorporates long-term temporal dynamics into the streaming encoding process but also yields a fixed-length memory as a global representation for arbitrarily long videos. After the encoding process, the Adaptive Memory Selection strategy selects a constant number of question-related memories from all the historical memories, and feeds them into the LLM to generate informative responses. The question-related selection reduces redundancy within the memories, enabling efficient and precise video understanding. Meanwhile, the disentangled video extraction and reasoning design allows the LLM to answer different questions about a video by directly selecting corresponding memories, without the need to encode the whole video for each question. Through extensive experiments, our model achieves superior performance and higher efficiency on long video benchmarks, showcasing precise temporal comprehension for detailed question answering.", "keywords": "LLM;Long Video Understanding;Memory-Propagated Streaming Encoding;Adaptive Memory Selection", "primary_area": "machine_vision", "supplementary_material": "", "author": "Rui Qian;Xiaoyi Dong;Pan Zhang;Yuhang Zang;Shuangrui Ding;Dahua Lin;Jiaqi Wang", "authorids": "~Rui_Qian2;~Xiaoyi_Dong1;~Pan_Zhang1;~Yuhang_Zang1;~Shuangrui_Ding1;~Dahua_Lin1;~Jiaqi_Wang1", "gender": "M;M;M;M;M;M;M", "homepage": "https://github.com/shvdiwnkozbw;;https://panzhang0212.github.io/;https://yuhangzang.github.io;https://mark12ding.github.io;http://dahua.site;https://myownskyw7.github.io/", "dblp": ";230/3711;;230/4433;267/1780;53/6088;44/740-3", "google_scholar": "QehSWiQAAAAJ;FscToE0AAAAJ;moHH480AAAAJ;hW23VKIAAAAJ;RZOIVhYAAAAJ;GMzzRRUAAAAJ;https://scholar.google.com.hk/citations?user=GDvt570AAAAJ", "orcid": ";;;0000-0003-1110-5062;;;", "linkedin": ";;;yuhang-zang/;;;", "or_profile": "~Rui_Qian2;~Xiaoyi_Dong1;~Pan_Zhang1;~Yuhang_Zang1;~Shuangrui_Ding1;~Dahua_Lin1;~Jiaqi_Wang1", "aff": "The Chinese University of Hong Kong;Shanghai Artificial Intelligence Laboratory;Shanghai Artificial Intelligence Laboratory;Shanghai Artificial Intelligence Laboratory;The Chinese University of Hong Kong;The Chinese University of Hong Kong;Shanghai AI Laboratory", "aff_domain": "cuhk.edu.hk;pjlab.org.cn;pjlab.org.cn;pjlab.org.cn;ie.cuhk.edu.hk;cuhk.edu.hk;pjlab.org.cn", "position": "PhD student;Researcher;Researcher;Researcher;PhD student;Associate Professor;Research Scientist", "bibtex": "@inproceedings{\nqian2024streaming,\ntitle={Streaming Long Video Understanding with Large Language Models},\nauthor={Rui Qian and Xiaoyi Dong and Pan Zhang and Yuhang Zang and Shuangrui Ding and Dahua Lin and Jiaqi Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=axX62CQJpa}\n}", "github": "", "reviewers": "QjSL;sqPz;9Ye5;FUM4", "pdf_size": 3648954, "rating": "5;6;6;6", "confidence": "4;4;4;3", "soundness": "3;3;3;2", "novelty": "3;3;3;2", "presentation": "3;3;3;3", "wc_summary": "118;58;55;85", "wc_strengths": "55;120;76;25", "wc_weaknesses": "287;475;107;103", "wc_questions": "143;165;15;43", "wc_limitations": "8;71;1;2", "wc_review": "611;889;254;258", "wc_reply_reviewers": "134;228;19;42", "wc_reply_authors": "56;203;110;121", "reply_reviewers": "1;1;1;1", "reply_authors": "2;3;3;3", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 79.0, 25.367301787931645 ], "wc_strengths_avg": [ 69.0, 34.57600323924094 ], "wc_weaknesses_avg": [ 243.0, 153.17963311093285 ], "wc_questions_avg": [ 91.5, 63.75539192884003 ], "wc_limitations_avg": [ 20.5, 29.278831943914703 ], "wc_review_avg": [ 503.0, 265.8411179633429 ], "wc_reply_reviewers_avg": [ 105.75, 82.66309636107276 ], "wc_reply_authors_avg": [ 122.5, 52.58564442887431 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 44, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18327476433359860252&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "cuhk.edu.hk;pjlab.org.cn;pjlab.org.cn;pjlab.org.cn;ie.cuhk.edu.hk;cuhk.edu.hk;pjlab.org.cn", "author_num": 7, "aff_unique_index": "0;1;1;1;0;0;2", "aff_unique_norm": "Chinese University of Hong Kong;Shanghai Artificial Intelligence Laboratory;Shanghai AI Laboratory", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cuhk.edu.hk;http://www.shailab.org/;https://www.shanghai-ai-lab.com", "aff_unique_abbr": "CUHK;Shanghai AI Lab;SAIL", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "VCR-GauS: View Consistent Depth-Normal Regularizer for Gaussian Surface Reconstruction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94519", "id": "axnjX20Ssl", "proceeding": "", "pdf": "https://openreview.net/pdf?id=axnjX20Ssl", "openreview": "https://openreview.net/forum?id=axnjX20Ssl", "poster": "", "project": "", "author_site": "Hanlin Chen, Fangyin Wei, Chen Li, Tianxin Huang, Yunsong Wang, Gim Hee Lee", "tldr": "", "abstract": "Although 3D Gaussian Splatting has been widely studied because of its realistic and efficient novel-view synthesis, it is still challenging to extract a high-quality surface from the point-based representation. Previous works improve the surface by incorporating geometric priors from the off-the-shelf normal estimator. However, there are two main limitations: 1) Supervising normal rendered from 3D Gaussians updates only the rotation parameter while neglecting other geometric parameters; 2) The inconsistency of predicted normal maps across multiple views may lead to severe reconstruction artifacts. In this paper, we propose a Depth-Normal regularizer that directly couples normal with other geometric parameters, leading to full updates of the geometric parameters from normal regularization. We further propose a confidence term to mitigate inconsistencies of normal predictions across multiple views. Moreover, we also introduce a densification and splitting strategy to regularize the size and distribution of 3D Gaussians for more accurate surface modeling. Compared with Gaussian-based baselines, experiments show that our approach obtains better reconstruction quality and maintains competitive appearance quality at faster training speed and 100+ FPS rendering. Our code will be made open-source upon paper acceptance.", "keywords": "Gaussian Splatting;3D Reconstruction;Surface Reconstruction", "primary_area": "machine_vision", "supplementary_material": "/attachment/159c20b60968fac7538a8b91e6c55370adfb02f4.zip", "author": "Hanlin Chen;Fangyin Wei;Chen Li;Tianxin Huang;Yunsong Wang;Gim Hee Lee", "authorids": "~Hanlin_Chen2;~Fangyin_Wei1;~Chen_Li13;~Tianxin_Huang1;~Yunsong_Wang1;~Gim_Hee_Lee1", "gender": "M;;F;M;M;", "homepage": "https://hlinchen.github.io/;https://weify627.github.io;https://chaneyddtt.github.io/;https://tianxinhuang.github.io/;https://wangys16.github.io/;https://www.comp.nus.edu.sg/~leegh/", "dblp": ";203/9145;164/3294-38;251/3784;182/0203;49/9455", "google_scholar": "fBpYOzAAAAAJ;w9rFBkEAAAAJ;6_rJ2pcAAAAJ;https://scholar.google.com.hk/citations?user=Fg7WYfcAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.sg/citations?user=7hNKrPsAAAAJ", "orcid": "0000-0002-3323-8213;;0009-0000-6807-3490;;;0000-0002-1583-0475", "linkedin": ";;;;yunsong-wang-32ab98202/;", "or_profile": "~Hanlin_Chen2;~Fangyin_Wei1;~Chen_Li13;~Tianxin_Huang1;~Yunsong_Wang1;~Gim_Hee_Lee1", "aff": "National University of Singapore;Princeton University; National University of Singapore;National University of Singapore;National University of Singapore;National University of Singapore", "aff_domain": "nus.edu;princeton.edu;nus.edu.sg;nus.edu.sg;nus.edu.sg;nus.edu.sg", "position": "PhD student;PhD student;Postdoc;Research Fellow;PhD student;Associate Professor", "bibtex": "@inproceedings{\nchen2024vcrgaus,\ntitle={{VCR}-GauS: View Consistent Depth-Normal Regularizer for Gaussian Surface Reconstruction},\nauthor={Hanlin Chen and Fangyin Wei and Chen Li and Tianxin Huang and Yunsong Wang and Gim Hee Lee},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=axnjX20Ssl}\n}", "github": "", "reviewers": "6weG;5p3w;n7KL;kSsq", "pdf_size": 41411444, "rating": "6;7;7;7", "confidence": "5;3;4;3", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "61;116;74;76", "wc_strengths": "55;54;36;74", "wc_weaknesses": "136;150;237;36", "wc_questions": "26;222;59;5", "wc_limitations": "19;26;26;1", "wc_review": "297;568;432;192", "wc_reply_reviewers": "184;1679;135;105", "wc_reply_authors": "460;4218;0;19", "reply_reviewers": "2;7;1;1", "reply_authors": "3;11;1;2", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 81.75, 20.595812681222366 ], "wc_strengths_avg": [ 54.75, 13.442005058770064 ], "wc_weaknesses_avg": [ 139.75, 71.31050062929022 ], "wc_questions_avg": [ 78.0, 85.33756499924286 ], "wc_limitations_avg": [ 18.0, 10.222524150130436 ], "wc_review_avg": [ 372.25, 141.45736990344477 ], "wc_reply_reviewers_avg": [ 525.75, 666.426055538047 ], "wc_reply_authors_avg": [ 1174.25, 1766.9205379699451 ], "reply_reviewers_avg": [ 2.75, 2.48746859276655 ], "reply_authors_avg": [ 4.25, 3.960744879438715 ], "replies_avg": [ 36, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15270186805689623870&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "nus.edu;princeton.edu;nus.edu.sg;nus.edu.sg;nus.edu.sg;nus.edu.sg", "author_num": 6, "aff_unique_index": "0;1;0;0;0;0", "aff_unique_norm": "National University of Singapore;Princeton University", "aff_unique_dep": ";", "aff_unique_url": "https://www.nus.edu.sg;https://www.princeton.edu", "aff_unique_abbr": "NUS;Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0;0", "aff_country_unique": "Singapore;United States" }, { "title": "OlympicArena: Benchmarking Multi-discipline Cognitive Reasoning for Superintelligent AI", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97617", "id": "ayF8bEKYQy", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ayF8bEKYQy", "openreview": "https://openreview.net/forum?id=ayF8bEKYQy", "poster": "/media/PosterPDFs/NeurIPS%202024/97617.png?t=1731689532.7700467", "project": "", "author_site": "Zhen Huang, Zengzhi Wang, Shijie Xia, Xuefeng Li, Haoyang Zou, Ruijie Xu, Run-Ze Fan, Lyumanshan Ye, Ethan Chern, Yixin Ye, Yikai Zhang, Yuqing Yang, Ting Wu, Binjie Wang, Shichao Sun, Yang Xiao, Yiyuan Li, Fan Zhou, Steffi Chern, Yiwei Qin, Yan Ma, Jiadi Su, Yixiu Liu, Yuxiang Zheng, Shaoting Zhang, Dahua Lin, Yu Qiao, Pengfei Liu", "tldr": "", "abstract": "The evolution of Artificial Intelligence (AI) has been significantly accelerated by advancements in Large Language Models (LLMs) and Large Multimodal Models (LMMs), gradually showcasing potential cognitive reasoning abilities in problem-solving and scientific discovery (i.e., AI4Science) once exclusive to human intellect. To comprehensively evaluate current models' performance in cognitive reasoning abilities, we introduce OlympicArena, which includes 11,163 bilingual problems across both text-only and interleaved text-image modalities. These challenges encompass a wide range of disciplines spanning seven fields and 62 international Olympic competitions, rigorously examined for data leakage. We argue that the challenges in Olympic competition problems are ideal for evaluating AI's cognitive reasoning due to their complexity and interdisciplinary nature, which are essential for tackling complex scientific challenges and facilitating discoveries. Beyond evaluating performance across various disciplines using answer-only criteria, we conduct detailed experiments and analyses from multiple perspectives. We delve into the models' cognitive reasoning abilities, their performance across different modalities, and their outcomes in process-level evaluations, which are vital for tasks requiring complex reasoning with lengthy solutions. Our extensive evaluations reveal that even advanced models like GPT-4o only achieve a 39.97\\% overall accuracy (28.67\\% for mathematics and 29.71\\% for physics), illustrating current AI limitations in complex reasoning and multimodal integration. Through the OlympicArena, we aim to advance AI towards superintelligence, equipping it to address more complex challenges in science and beyond. We also provide a comprehensive set of resources to support AI research, including a benchmark dataset, an open-source annotation platform, a detailed evaluation tool, and a leaderboard with automatic submission features.", "keywords": "LLMs;LMMs;Reasoning;Evaluation", "primary_area": "", "supplementary_material": "/attachment/abaa30874661153722366141edca4478784dcbe8.zip", "author": "Zhen Huang;Zengzhi Wang;Shijie Xia;Xuefeng Li;Haoyang Zou;Ruijie Xu;Run-Ze Fan;Lyumanshan Ye;Ethan Chern;Yixin Ye;Yikai Zhang;Yuqing Yang;Ting Wu;Binjie Wang;Shichao Sun;Yang Xiao;Yiyuan Li;Fan Zhou;Steffi Chern;Yiwei Qin;Yan Ma;Jiadi Su;Yixiu Liu;Yuxiang Zheng;Shaoting Zhang;Dahua Lin;Yu Qiao;Pengfei Liu", "authorids": "~Zhen_Huang9;~Zengzhi_Wang1;~Shijie_Xia2;~Xuefeng_Li6;~Haoyang_Zou1;~Ruijie_Xu2;~Run-Ze_Fan1;~Lyumanshan_Ye1;~Ethan_Chern1;~Yixin_Ye1;~Yikai_Zhang2;~Yuqing_Yang2;~Ting_Wu2;~Binjie_Wang1;~Shichao_Sun1;~Yang_Xiao6;~Yiyuan_Li1;~Fan_Zhou6;~Steffi_Chern1;~Yiwei_Qin1;~Yan_Ma1;~Jiadi_Su1;~Yixiu_Liu2;~Yuxiang_Zheng3;~Shaoting_Zhang4;~Dahua_Lin1;~Yu_Qiao1;~Pengfei_Liu1", "gender": "M;M;M;M;M;M;M;F;;F;M;F;;M;M;M;;M;;F;Not Specified;M;M;M;M;M;;M", "homepage": "https://huangzhen02.github.io/;https://sinclaircoder.github.io/;https://shijie-xia.github.io/;https://github.com/hongtangshui;https://github.com/haoy-zzz;;https://rzfan525.github.io/;https://github.com/lyumanshanye;;https://bleaves.github.io/;https://arist12.github.io/;https://ayyyq.github.io/;;;https://shichaosun.github.io;https://xiaoyang66.github.io/;https://nativeatom.github.io/;https://koalazf99.github.io/;https://steffichern.github.io/;;https://yanmaaaa.github.io/;https://github.com/JoyBoy-Su;https://github.com/yxliu0903;https://github.com/zeetc;;http://dahua.site;;http://pfliu.com/", "dblp": ";34/133;375/1162;;;;355/5702;;;;;91/9064-4;;;;;14/5062;;;143/0512.html;;;;52/4003;53/3894;53/6088;;34/3381-3", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=zh-CN;;;;https://scholar.google.com.hk/citations?user=mhot7AUAAAAJ;;;YOL-RtEAAAAJ;https://scholar.google.ca/citations?user=I_EmXRYAAAAJ;https://scholar.google.com/citations?hl=en;;;https://scholar.google.com/citations?hl=en;rLqDPtQAAAAJ;XdQcrwUAAAAJ;qi8UzmkAAAAJ;https://scholar.google.com/citations?view_op=list_works;;https://scholar.google.co.jp/citations?user=tnJyAAoAAAAJ;;;kXfoU4oAAAAJ;oiBMWK4AAAAJ;GMzzRRUAAAAJ;;oIz_CYEAAAAJ", "orcid": ";0000-0002-6146-6248;;;;0009-0006-6218-9197;0000-0002-8505-7756;;;;;;;;;0009-0009-6191-1522;;;;;0009-0000-9262-3771;;;;0000-0002-8719-448X;;;", "linkedin": ";;;;;ruijie-xu-49a281238;;;;;;;;binjie-wang-91514b25a/;;;;;steffichern/;yiwei-qin-b0b57a22b/;;;;;;;;", "or_profile": "~Zhen_Huang9;~Zengzhi_Wang1;~Shijie_Xia2;~Xuefeng_Li6;~Haoyang_Zou1;~Ruijie_Xu2;~Run-Ze_Fan1;~Lyumanshan_Ye1;~Ethan_Chern1;~Yixin_Ye1;~Yikai_Zhang2;~Yuqing_Yang2;~Ting_Wu2;~Binjie_Wang1;~Shichao_Sun1;~Yang_Xiao6;~Yiyuan_Li1;~Fan_Zhou6;~Steffi_Chern1;~Yiwei_Qin1;~Yan_Ma1;~Jiadi_Su1;~Yixiu_Liu2;~Yuxiang_Zheng3;~Shaoting_Zhang4;~Dahua_Lin1;~Yu_Qiao1;~Pengfei_Liu1", "aff": "Soochow University;;Fudan University;Shanghai Jiaotong University;Fudan University;Shanghai Jiaotong University;University of Chinese Academy of Sciences;Shanghai Jiaotong University;;Shanghai Jiaotong University;Shanghai Jiaotong University;Fudan University;;Fudan University;The Hong Kong Polytechnic University;Hong Kong Polytechnic University;Carnegie Mellon University;Shanghai Jiaotong University;Carnegie Mellon University;Shanghai Jiaotong University;Fudan University;Fudan University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Artificial Intelligence Laboratory;The Chinese University of Hong Kong;;Shanghai Jiaotong University", "aff_domain": "suda.edu.cn;;fudan.edu.cn;sjtu.edu.cn;fudan.edu.cn;sjtu.edu.cn;mails.ucas.ac.cn;sjtu.edu;;sjtu.edu.cn;sjtu.edu.cn;fudan.edu.cn;;fudan.edu.cn;polyu.edu.hk;polyu.edu.hk;cmu.edu;sjtu.edu.cn;cmu.edu;sjtu.edu.cn;fudan.edu.cn;fudan.edu.cn;sjtu.edu.cn;sjtu.edu.cn;pjlab.org.cn;cuhk.edu.hk;;sjtu.edu", "position": "Undergrad student;;Undergrad student;PhD student;Undergrad student;Undergrad student;MS student;MS student;;Undergrad student;Undergrad student;MS student;;Undergrad student;PhD student;PhD student;MS student;MS student;Undergrad student;Researcher;PhD student;Undergrad student;MS student;MS student;Full Professor;Associate Professor;;Associate Professor", "bibtex": "@inproceedings{\nhuang2024olympicarena,\ntitle={OlympicArena: Benchmarking Multi-discipline Cognitive Reasoning for Superintelligent {AI}},\nauthor={Zhen Huang and Zengzhi Wang and Shijie Xia and Xuefeng Li and Haoyang Zou and Ruijie Xu and Run-Ze Fan and Lyumanshan Ye and Ethan Chern and Yixin Ye and Yikai Zhang and Yuqing Yang and Ting Wu and Binjie Wang and Shichao Sun and Yang Xiao and Yiyuan Li and Fan Zhou and Steffi Chern and Yiwei Qin and Yan Ma and Jiadi Su and Yixiu Liu and Yuxiang Zheng and Shaoting Zhang and Dahua Lin and Yu Qiao and Pengfei Liu},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=ayF8bEKYQy}\n}", "github": "", "reviewers": "6XA5;eKD6;vFpF", "pdf_size": 5555924, "rating": "6;7;7", "confidence": "5;3;3", "wc_summary_and_contributions": "159;87;148", "wc_strengths": "51;99;124", "wc_improvement": "391;93;88", "wc_limitations": "69;20;1", "wc_correctness": "21;43;1", "wc_clarity": "10;81;1", "wc_relation_to_prior_work": "25;87;1", "wc_documentation": "11;45;1", "wc_additional_feedback": "1;1;1", "wc_review": "738;556;366", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "2;1;1", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "wc_summary_and_contributions_avg": [ 131.33333333333334, 31.668421004036322 ], "wc_strengths_avg": [ 91.33333333333333, 30.291179500896884 ], "wc_improvement_avg": [ 190.66666666666666, 141.67176461415622 ], "wc_limitations_avg": [ 30.0, 28.647280266487194 ], "wc_correctness_avg": [ 21.666666666666668, 17.15290710702481 ], "wc_clarity_avg": [ 30.666666666666668, 35.78019315518325 ], "wc_relation_to_prior_work_avg": [ 37.666666666666664, 36.23380864453651 ], "wc_documentation_avg": [ 19.0, 18.83259585576738 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 553.3333333333334, 151.88006964561072 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 28, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11003499482518109410&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "suda.edu.cn;;fudan.edu.cn;sjtu.edu.cn;fudan.edu.cn;sjtu.edu.cn;mails.ucas.ac.cn;sjtu.edu;;sjtu.edu.cn;sjtu.edu.cn;fudan.edu.cn;;fudan.edu.cn;polyu.edu.hk;polyu.edu.hk;cmu.edu;sjtu.edu.cn;cmu.edu;sjtu.edu.cn;fudan.edu.cn;fudan.edu.cn;sjtu.edu.cn;sjtu.edu.cn;pjlab.org.cn;cuhk.edu.hk;;sjtu.edu", "author_num": 28, "aff_unique_index": "0;1;2;1;2;3;2;2;2;1;1;4;4;5;2;5;2;1;1;2;2;6;7;2", "aff_unique_norm": "Soochow University;Fudan University;Shanghai Jiao Tong University;University of Chinese Academy of Sciences;Hong Kong Polytechnic University;Carnegie Mellon University;Shanghai Artificial Intelligence Laboratory;Chinese University of Hong Kong", "aff_unique_dep": ";;;;;;;", "aff_unique_url": "https://www.soochow.edu.cn;https://www.fudan.edu.cn;https://www.sjtu.edu.cn;http://www.ucas.ac.cn;https://www.polyu.edu.hk;https://www.cmu.edu;http://www.shailab.org/;https://www.cuhk.edu.hk", "aff_unique_abbr": "Soochow U;Fudan;SJTU;UCAS;PolyU;CMU;Shanghai AI Lab;CUHK", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0;1;0;1;0;0;0;0;0;0;0;0", "aff_country_unique": "China;United States" }, { "title": "Elucidating the Design Space of Dataset Condensation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94518", "id": "az1SLLsmdR", "proceeding": "", "pdf": "https://openreview.net/pdf?id=az1SLLsmdR", "openreview": "https://openreview.net/forum?id=az1SLLsmdR", "poster": "/media/PosterPDFs/NeurIPS%202024/94518.png?t=1729681302.5771904", "project": "", "author_site": "Shitong Shao, Zikai Zhou, Huanran Chen, Zhiqiang Shen", "tldr": "", "abstract": "Dataset condensation, a concept within $\\textit{data-centric learning}$, aims to efficiently transfer critical attributes from an original dataset to a synthetic version, meanwhile maintaining both diversity and realism of syntheses. This approach can significantly improve model training efficiency and is also adaptable for multiple application areas. Previous methods in dataset condensation have faced several challenges: some incur high computational costs which limit scalability to larger datasets ($\\textit{e.g.,}$ MTT, DREAM, and TESLA), while others are restricted to less optimal design spaces, which could hinder potential improvements, especially in smaller datasets ($\\textit{e.g.,}$ SRe$^2$L, G-VBSM, and RDED). To address these limitations, we propose a comprehensive designing-centric framework that includes specific, effective strategies like implementing soft category-aware matching, adjusting the learning rate schedule and applying small batch-size. These strategies are grounded in both empirical evidence and theoretical backing. Our resulting approach, $\\textbf{E}$lucidate $\\textbf{D}$ataset $\\textbf{C}$ondensation ($\\textbf{EDC}$), establishes a benchmark for both small and large-scale dataset condensation. In our testing, EDC achieves state-of-the-art accuracy, reaching 48.6% on ImageNet-1k with a ResNet-18 model at an IPC of 10, which corresponds to a compression ratio of 0.78\\%. This performance surpasses those of SRe$^2$L, G-VBSM, and RDED by margins of 27.3%, 17.2%, and 6.6%, respectively. Code is available at: https://github.com/shaoshitong/EDC.", "keywords": "dataset condensation;efficient computer vision;design space", "primary_area": "machine_vision", "supplementary_material": "/attachment/d2c8240c3cd0a7dab0ffcfc9d2410be94cf95bc2.zip", "author": "Shitong Shao;Zikai Zhou;Huanran Chen;Zhiqiang Shen", "authorids": "~Shitong_Shao1;~Zikai_Zhou3;~Huanran_Chen1;~Zhiqiang_Shen1", "gender": "M;M;M;", "homepage": "https://shaoshitong.github.io/;https://klayand.github.io/;https://huanranchen.github.io/;", "dblp": "329/2735;;329/6558;", "google_scholar": "hmUOaNcAAAAJ;u6TjscAAAAAJ;https://scholar.google.co.jp/citations?user=QYsKXccAAAAJ;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Shitong_Shao1;~Zikai_Zhou3;~Huanran_Chen1;~Zhiqiang_Shen1", "aff": "Southeast University;Hohai University;;", "aff_domain": "seu.edu.cn;hhu.edu.cn;;", "position": "MS student;Undergrad student;;", "bibtex": "@inproceedings{\nshao2024elucidating,\ntitle={Elucidating the Design Space of Dataset Condensation},\nauthor={Shitong Shao and Zikai Zhou and Huanran Chen and Zhiqiang Shen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=az1SLLsmdR}\n}", "github": "", "reviewers": "ZPCz;gMw3;r3SQ;RdSF", "pdf_size": 24021767, "rating": "5;5;6;7", "confidence": "3;3;3;3", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;2;3", "wc_summary": "55;73;54;42", "wc_strengths": "95;52;41;59", "wc_weaknesses": "200;105;204;112", "wc_questions": "81;44;2;3", "wc_limitations": "59;1;2;11", "wc_review": "490;275;303;227", "wc_reply_reviewers": "7;14;379;32", "wc_reply_authors": "16;0;1082;22", "reply_reviewers": "1;1;2;1", "reply_authors": "2;1;4;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 56.0, 11.067971810589327 ], "wc_strengths_avg": [ 61.75, 20.24073862288627 ], "wc_weaknesses_avg": [ 155.25, 46.83681778259492 ], "wc_questions_avg": [ 32.5, 32.72995569810628 ], "wc_limitations_avg": [ 18.25, 23.84716964337697 ], "wc_review_avg": [ 323.75, 99.75814503086953 ], "wc_reply_reviewers_avg": [ 108.0, 156.72747047024015 ], "wc_reply_authors_avg": [ 280.0, 463.10473977276456 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3240407176403362950&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 4, "email": "seu.edu.cn;hhu.edu.cn;;", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Southeast University;Hohai University", "aff_unique_dep": ";", "aff_unique_url": "https://www.seu.edu.cn/;https://www.hohai.edu.cn", "aff_unique_abbr": "SEU;Hohai", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "azkuhJBZXi", "title": "CreDes: Causal Reasoning Enhancement and Dual-End Searching for Solving Long-Range Reasoning Problems using LLMs", "track": "main", "status": "Reject", "tldr": "", "abstract": "Large language models (LLMs) have demonstrated limitations in handling combinatorial optimization problems involving long-range reasoning, partially due to causal hallucinations and huge search space. As for causal hallucinations, i.e., the inconsistency between reasoning and corresponding state transition, this paper introduces the Causal Relationship Enhancement (CRE) mechanism combining cause-effect interventions and the Average Treatment Effect (ATE) to guarantee the solid causal rightness between each step of reasoning and state transition. As for the long causal range and huge search space limiting the performances of existing models featuring single-direction search, a Dual-End Searching (DES) approach is proposed to seek solutions by simultaneously starting from both the initial and goal states on the causal probability tree. By integrating CRE and DES (CreDes), our model has realized simultaneous multi-step reasoning, circumventing the inefficiencies from cascading multiple one-step reasoning like the Chain-of-Thought (CoT). Experiments demonstrate that CreDes significantly outperforms existing State-Of-The-Art (SOTA) solutions in long-range reasoning tasks in terms of both accuracy and time efficiency.", "keywords": "Causal Reasoning Enhancement;Dual-End Searching;Long-Range Reasoning;LLM", "primary_area": "causal_inference", "supplementary_material": "", "author": "Kangsheng Wang;Xiao Zhang;Huimin Ma;Tianyu Hu;Hao Liu;Songde Han", "authorids": "~Kangsheng_Wang1;~Xiao_Zhang35;~Huimin_Ma1;~Tianyu_Hu1;~Hao_Liu46;~Songde_Han2", "gender": ";Not Specified;F;M;M;M", "homepage": ";;http://server.3dimagelab.cn:5000;http://scce.ustb.edu.cn/shiziduiwu/jiaoshixinxi/2020-11-09/189.html;;https://www.baidu.com", "dblp": ";;69/7694-1;;;", "google_scholar": ";;32hwVLEAAAAJ;;;", "orcid": ";0009-0007-2804-6915;;;0009-0008-4489-4233;", "linkedin": ";;;;;", "or_profile": "~Kangsheng_Wang1;~Xiao_Zhang35;~Huimin_Ma1;~Tianyu_Hu1;~Hao_Liu46;~Songde_Han2", "aff": ";University of Science and Technology Beijing;University of Science and Technology Beijing;University of Science and Technology Beijing;University of Science and Technology Beijing;University of Science and Technology Beijing", "aff_domain": ";ustb.edu.cn;ustb.edu.cn;ustb.edu.cn;ustb.edu.cn;ustb.edu.cn", "position": ";PhD student;Full Professor;Assistant Professor;MS student;MS student", "bibtex": "@misc{\nanonymous2024credes,\ntitle={CreDes: Causal Reasoning Enhancement and Dual-End Searching for Solving Long-Range Reasoning Problems using {LLM}s},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=azkuhJBZXi}\n}", "github": "", "project": "", "reviewers": "MmUh;Y4qM;BKzM;yRix;nWAC", "site": "https://openreview.net/forum?id=azkuhJBZXi", "pdf_size": 364389, "rating": "3;4;5;5;5", "confidence": "3;4;3;3;4", "soundness": "2;2;2;3;2", "novelty": "2;2;2;3;2", "presentation": "2;2;2;2;3", "wc_summary": "66;92;69;107;99", "wc_strengths": "11;44;93;64;44", "wc_weaknesses": "191;285;145;517;151", "wc_questions": "31;37;318;5;150", "wc_limitations": "2;38;1;21;1", "wc_review": "301;496;626;714;445", "wc_reply_reviewers": "0;31;345;24;124", "wc_reply_authors": "65;152;1730;160;1518", "reply_reviewers": "0;1;2;1;2", "reply_authors": "2;3;6;3;6", "rating_avg": [ 4.4, 0.7999999999999999 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 2.2, 0.39999999999999997 ], "novelty_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 2.2, 0.39999999999999997 ], "wc_summary_avg": [ 86.6, 16.32911510156016 ], "wc_strengths_avg": [ 51.2, 26.947356085523495 ], "wc_weaknesses_avg": [ 257.8, 138.93653227283312 ], "wc_questions_avg": [ 108.2, 116.14026003070599 ], "wc_limitations_avg": [ 12.6, 14.813507349712962 ], "wc_review_avg": [ 516.4, 143.50553996274846 ], "wc_reply_reviewers_avg": [ 104.8, 127.29713272497538 ], "wc_reply_authors_avg": [ 725.0, 737.8384647061984 ], "reply_reviewers_avg": [ 1.2, 0.7483314773547883 ], "reply_authors_avg": [ 4.0, 1.6733200530681511 ], "replies_avg": [ 38, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.1020620726159658, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13927535567823786337&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of Science and Technology Beijing", "aff_unique_dep": "", "aff_unique_url": "http://www.ustb.edu.cn", "aff_unique_abbr": "USTB", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Using Noise to Infer Aspects of Simplicity Without Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94517", "id": "b172ac0R4L", "proceeding": "", "pdf": "https://openreview.net/pdf?id=b172ac0R4L", "openreview": "https://openreview.net/forum?id=b172ac0R4L", "poster": "", "project": "", "author_site": "Zachery Boner, Harry Chen, Lesia Semenova, Ronald Parr, Cynthia Rudin", "tldr": "", "abstract": "Noise in data significantly influences decision-making in the data science process. In fact, it has been shown that noise in data generation processes leads practitioners to find simpler models. However, an open question still remains: what is the degree of model simplification we can expect under different noise levels? In this work, we address this question by investigating the relationship between the amount of noise and model simplicity across various hypothesis spaces, focusing on decision trees and linear models. We formally show that noise acts as an implicit regularizer for several different noise models. Furthermore, we prove that Rashomon sets (sets of near-optimal models) constructed with noisy data tend to contain simpler models than corresponding Rashomon sets with non-noisy data. Additionally, we show that noise expands the set of ``good'' features and consequently enlarges the set of models that use at least one good feature. Our work offers theoretical guarantees and practical insights for practitioners and policymakers on whether simple-yet-accurate machine learning models are likely to exist, based on knowledge of noise levels in the data generation process.", "keywords": "interpretable ML;simple models;Rashomon sets", "primary_area": "interpretability_and_explainability", "supplementary_material": "/attachment/328a893e2837e7ff7e3ee5db42c2c4b5009ad872.zip", "author": "Zachery Boner;Harry Chen;Lesia Semenova;Ronald Parr;Cynthia Rudin", "authorids": "~Zachery_Boner1;~Harry_Chen2;~Lesia_Semenova1;~Ronald_Parr1;~Cynthia_Rudin1", "gender": "M;M;;Not Specified;", "homepage": "https://zackboner.faculty.bio/;;;https://users.cs.duke.edu/~parr/;", "dblp": "382/7881;;;26/4670;", "google_scholar": "GtdCQo8AAAAJ;;;https://scholar.google.com.tw/citations?user=b-GJ3QIAAAAJ;", "orcid": "0000-0003-0089-8917;;;;", "linkedin": "zachery-boner;harry-chen/;;;", "or_profile": "~Zachery_Boner1;~Harry_Chen2;~Lesia_Semenova1;~Ronald_Parr1;~Cynthia_Rudin1", "aff": "Department of Computer Science, Duke University;Duke University;;Duke University;", "aff_domain": "cs.duke.edu;duke.edu;;duke.edu;", "position": "PhD student;Undergrad student;;Full Professor;", "bibtex": "@inproceedings{\nboner2024using,\ntitle={Using Noise to Infer Aspects of Simplicity Without Learning},\nauthor={Zachery Boner and Harry Chen and Lesia Semenova and Ronald Parr and Cynthia Rudin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=b172ac0R4L}\n}", "github": "", "reviewers": "5HeG;R9J1;aFkG;HAB8", "pdf_size": 871155, "rating": "6;6;6;8", "confidence": "3;2;4;3", "soundness": "3;4;3;4", "novelty": "3;4;2;3", "presentation": "3;4;2;3", "wc_summary": "109;73;224;116", "wc_strengths": "82;50;28;185", "wc_weaknesses": "214;33;199;370", "wc_questions": "41;53;2;4", "wc_limitations": "3;1;1;4", "wc_review": "449;210;454;679", "wc_reply_reviewers": "23;0;15;31", "wc_reply_authors": "44;53;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 130.5, 56.39370532249144 ], "wc_strengths_avg": [ 86.25, 60.15968334358152 ], "wc_weaknesses_avg": [ 204.0, 119.29165938991711 ], "wc_questions_avg": [ 25.0, 22.416511771459895 ], "wc_limitations_avg": [ 2.25, 1.299038105676658 ], "wc_review_avg": [ 448.0, 165.86289518756146 ], "wc_reply_reviewers_avg": [ 17.25, 11.453711188955307 ], "wc_reply_authors_avg": [ 24.25, 24.457871943405053 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:QE5GgS1evAgJ:scholar.google.com/&scioq=Using+Noise+to+Infer+Aspects+of+Simplicity+Without+Learning&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "cs.duke.edu;duke.edu;;duke.edu;", "author_num": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "Duke University", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.duke.edu", "aff_unique_abbr": "Duke", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Invertible Consistency Distillation for Text-Guided Image Editing in Around 7 Steps", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94516", "id": "b1XPHC7MQB", "proceeding": "", "pdf": "https://openreview.net/pdf?id=b1XPHC7MQB", "openreview": "https://openreview.net/forum?id=b1XPHC7MQB", "poster": "", "project": "", "author_site": "Nikita Starodubcev, Mikhail Khoroshikh, Artem Babenko, Dmitry Baranchuk", "tldr": "", "abstract": "Diffusion distillation represents a highly promising direction for achieving faithful text-to-image generation in a few sampling steps. However, despite recent successes, existing distilled models still do not provide the full spectrum of diffusion abilities, such as real image inversion, which enables many precise image manipulation methods. This work aims to enrich distilled text-to-image diffusion models with the ability to effectively encode real images into their latent space. To this end, we introduce invertible Consistency Distillation (iCD), a generalized consistency distillation framework that facilitates both high-quality image synthesis and accurate image encoding in only 3-4 inference steps. Though the inversion problem for text-to-image diffusion models gets exacerbated by high classifier-free guidance scales, we notice that dynamic guidance significantly reduces reconstruction errors without noticeable degradation in generation performance. As a result, we demonstrate that iCD equipped with dynamic guidance may serve as a highly effective tool for zero-shot text-guided image editing, competing with more expensive state-of-the-art alternatives.", "keywords": "text-guided image editing;consistency distillation;diffusion models;image inversion", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/5f600231049b6920f2163393f45592f0d8e186b8.zip", "author": "Nikita Starodubcev;Mikhail Khoroshikh;Artem Babenko;Dmitry Baranchuk", "authorids": "~Nikita_Starodubcev1;~Mikhail_Khoroshikh1;~Artem_Babenko1;~Dmitry_Baranchuk2", "gender": "M;M;M;M", "homepage": ";;;", "dblp": "344/4427;;117/4834;215/3712", "google_scholar": "o6pRm_gAAAAJ;;q885d1wAAAAJ;NiPmk8oAAAAJ", "orcid": ";;0000-0002-1830-8252;0000-0001-7660-3666", "linkedin": ";michellemoorre/;;", "or_profile": "~Nikita_Starodubcev1;~Mikhail_Khoroshikh1;~Artem_Babenko1;~Dmitry_Baranchuk2", "aff": "Higher School of Economics;Higher School of Economics, Higher School of Economics;Yandex;Higher School of Economics", "aff_domain": "hse.ru;edu.hse.ru;yandex-team.ru;hse.ru", "position": "PhD student;Undergrad student;Researcher;PhD student", "bibtex": "@inproceedings{\nstarodubcev2024invertible,\ntitle={Invertible Consistency Distillation for Text-Guided Image Editing in Around 7 Steps},\nauthor={Nikita Starodubcev and Mikhail Khoroshikh and Artem Babenko and Dmitry Baranchuk},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=b1XPHC7MQB}\n}", "github": "", "reviewers": "Qhsn;hEvy;QtT7;H4g5", "pdf_size": 29002233, "rating": "5;6;6;7", "confidence": "4;5;4;3", "soundness": "3;4;3;3", "novelty": "2;4;3;2", "presentation": "3;4;3;3", "wc_summary": "41;23;37;86", "wc_strengths": "52;56;57;59", "wc_weaknesses": "236;204;160;60", "wc_questions": "1;6;5;96", "wc_limitations": "1;1;7;11", "wc_review": "331;290;266;312", "wc_reply_reviewers": "44;13;24;0", "wc_reply_authors": "38;0;38;38", "reply_reviewers": "1;1;1;0", "reply_authors": "2;1;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 46.75, 23.62599204266352 ], "wc_strengths_avg": [ 56.0, 2.5495097567963922 ], "wc_weaknesses_avg": [ 165.0, 66.35510530471637 ], "wc_questions_avg": [ 27.0, 39.881073205218534 ], "wc_limitations_avg": [ 5.0, 4.242640687119285 ], "wc_review_avg": [ 299.75, 24.293774922806872 ], "wc_reply_reviewers_avg": [ 20.25, 16.13032857693854 ], "wc_reply_authors_avg": [ 28.5, 16.454482671904334 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7965575003630568475&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "hse.ru;edu.hse.ru;yandex-team.ru;hse.ru", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Higher School of Economics;Yandex", "aff_unique_dep": ";", "aff_unique_url": "https://www.hse.ru;https://yandex.com", "aff_unique_abbr": "HSE;Yandex", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Russian Federation" }, { "title": "Historical Test-time Prompt Tuning for Vision Foundation Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94515", "id": "b1ZNTgThgw", "proceeding": "", "pdf": "https://openreview.net/pdf?id=b1ZNTgThgw", "openreview": "https://openreview.net/forum?id=b1ZNTgThgw", "poster": "/media/PosterPDFs/NeurIPS%202024/94515.png?t=1731305018.0096982", "project": "", "author_site": "Jingyi Zhang, Jiaxing Huang, Xiaoqin Zhang, Ling Shao, Shijian Lu", "tldr": "", "abstract": "Test-time prompt tuning, which learns prompts online with unlabelled test samples during the inference stage, has demonstrated great potential by learning effective prompts on-the-fly without requiring any task-specific annotations. However, its performance often degrades clearly along the tuning process when the prompts are continuously updated with the test data flow, and the degradation becomes more severe when the domain of test samples changes continuously. We propose HisTPT, a Historical Test-time Prompt Tuning technique that memorizes the useful knowledge of the learnt test samples and enables robust test-time prompt tuning with the memorized knowledge. HisTPT introduces three types of knowledge banks, namely, local knowledge bank, hard-sample knowledge bank, and global knowledge bank, each of which works with different mechanisms for effective knowledge memorization and test-time prompt optimization. In addition, HisTPT features an adaptive knowledge retrieval mechanism that regularizes the prediction of each test sample by adaptively retrieving the memorized knowledge. Extensive experiments show that HisTPT achieves superior prompt tuning performance consistently while handling different visual recognition tasks (e.g., image classification, semantic segmentation, and object detection) and test samples from continuously changing domains.", "keywords": "test-time prompt tuning;vision foudation model;prompt learning", "primary_area": "machine_vision", "supplementary_material": "", "author": "Jingyi Zhang;Jiaxing Huang;Xiaoqin Zhang;Ling Shao;Shijian Lu", "authorids": "~Jingyi_Zhang7;~Jiaxing_Huang2;~Xiaoqin_Zhang4;~Ling_Shao1;~Shijian_Lu1", "gender": ";M;;M;M", "homepage": ";https://jxhuang0508.github.io/;;;https://personal.ntu.edu.sg/shijian.lu/", "dblp": ";62/6016-1.html;;;42/2718", "google_scholar": ";czirNcwAAAAJ;;z84rLjoAAAAJ;https://scholar.google.com.sg/scholar?hl=en", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Jingyi_Zhang7;~Jiaxing_Huang2;~Xiaoqin_Zhang4;~Ling_Shao1;~Shijian_Lu1", "aff": ";Nanyang Technological University;;University of Chinese Academy of Sciences;Nanyang Technological University", "aff_domain": ";ntu.edu.sg;;ucas.ac.cn;ntu.edu.sg", "position": ";Postdoc;;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nzhang2024historical,\ntitle={Historical Test-time Prompt Tuning for Vision Foundation Models},\nauthor={Jingyi Zhang and Jiaxing Huang and Xiaoqin Zhang and Ling Shao and Shijian Lu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=b1ZNTgThgw}\n}", "github": "", "reviewers": "43p7;CdiG;ejR5;hT3z", "pdf_size": 5680224, "rating": "4;5;5;5", "confidence": "4;4;4;5", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "3;1;3;2", "wc_summary": "102;87;42;59", "wc_strengths": "36;52;57;50", "wc_weaknesses": "73;68;91;171", "wc_questions": "2;1;6;69", "wc_limitations": "1;1;9;1", "wc_review": "214;209;205;350", "wc_reply_reviewers": "0;0;16;15", "wc_reply_authors": "63;63;63;22", "reply_reviewers": "0;0;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 4.75, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 72.5, 23.41473894793619 ], "wc_strengths_avg": [ 48.75, 7.790218225441442 ], "wc_weaknesses_avg": [ 100.75, 41.45102531904368 ], "wc_questions_avg": [ 19.5, 28.64000698323937 ], "wc_limitations_avg": [ 3.0, 3.4641016151377544 ], "wc_review_avg": [ 244.5, 60.99385214921254 ], "wc_reply_reviewers_avg": [ 7.75, 7.75806032459145 ], "wc_reply_authors_avg": [ 52.75, 17.75352077758099 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=326021961961482674&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": ";ntu.edu.sg;;ucas.ac.cn;ntu.edu.sg", "author_num": 5, "aff_unique_index": "0;1;0", "aff_unique_norm": "Nanyang Technological University;University of Chinese Academy of Sciences", "aff_unique_dep": ";", "aff_unique_url": "https://www.ntu.edu.sg;http://www.ucas.ac.cn", "aff_unique_abbr": "NTU;UCAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Singapore;China" }, { "title": "Don't Look Twice: Faster Video Transformers with Run-Length Tokenization", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94514", "id": "b1ggjW00NI", "proceeding": "", "pdf": "https://openreview.net/pdf?id=b1ggjW00NI", "openreview": "https://openreview.net/forum?id=b1ggjW00NI", "poster": "", "project": "", "author_site": "Rohan Choudhury, Guanglei Zhu, Sihan Liu, Koichiro Niinuma, Kris Kitani, L\u00e1szl\u00f3 Jeni", "tldr": "", "abstract": "Video transformers are slow to train due to extremely large numbers of input tokens, even though many video tokens are repeated over time. Existing methods to remove uninformative tokens either have significant overhead, negating any speedup, or require tuning for different datasets and examples. We present Run-Length Tokenization (RLT), a simple approach to speed up video transformers inspired by run-length encoding for data compression. RLT efficiently finds and removes `runs' of patches that are repeated over time before model inference, then replaces them with a single patch and a positional encoding to represent the resulting token's new length. \nOur method is content-aware, requiring no tuning for different datasets, and fast, incurring negligible overhead. \nRLT yields a large speedup in training, reducing the wall-clock time to fine-tune a video transformer by 30% while matching baseline model performance. RLT also works without training, increasing model throughput by 35% with only 0.1% drop in accuracy.\nRLT speeds up training at 30 FPS by more than 100%, and on longer video datasets, can reduce the token count by up to 80\\%. Our project page is at rccchoudhury.github.io/projects/rlt.", "keywords": "video understanding;vision transformers;efficient transformers", "primary_area": "machine_vision", "supplementary_material": "", "author": "Rohan Choudhury;Guanglei Zhu;Sihan Liu;Koichiro Niinuma;Kris M. Kitani;Laszlo Attila Jeni", "authorids": "~Rohan_Choudhury1;~Guanglei_Zhu1;~Sihan_Liu3;~Koichiro_Niinuma1;~Kris_M._Kitani1;~Laszlo_Attila_Jeni1", "gender": "M;;F;M;M;M", "homepage": "https://rccchoudhury.github.io/;;;;http://www.cs.cmu.edu/~kkitani/;http://www.laszlojeni.com/", "dblp": "234/6273;;;35/214;42/163;35/7547", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;;https://scholar.google.com/citations?hl=en;AFaeUrYAAAAJ;yv3sH74AAAAJ;Wdnc-mEAAAAJ", "orcid": ";;;;0000-0002-9389-4060;0000-0002-2830-700X", "linkedin": "rohan-choudhury/;;;;;laszlojeni/", "or_profile": "~Rohan_Choudhury1;~Guanglei_Zhu1;~Sihan_Liu3;~Koichiro_Niinuma1;~Kris_M._Kitani1;~Laszlo_Attila_Jeni1", "aff": "Meta Facebook;;Carnegie Mellon University;Fujitsu Research of America;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "meta.com;;cmu.edu;fujitsu.com;cmu.edu;cmu.edu", "position": "Intern;;MS student;Research Director;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nchoudhury2024dont,\ntitle={Don't Look Twice: Faster Video Transformers with Run-Length Tokenization},\nauthor={Rohan Choudhury and Guanglei Zhu and Sihan Liu and Koichiro Niinuma and Kris M. Kitani and Laszlo Attila Jeni},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=b1ggjW00NI}\n}", "github": "", "reviewers": "tWGP;N711;B8qa", "pdf_size": 4872705, "rating": "4;7;8", "confidence": "3;4;4", "soundness": "3;2;4", "novelty": "3;3;4", "presentation": "3;2;4", "wc_summary": "20;104;55", "wc_strengths": "4;184;139", "wc_weaknesses": "4;419;15", "wc_questions": "168;75;11", "wc_limitations": "4;6;1", "wc_review": "200;788;221", "wc_reply_reviewers": "34;111;5", "wc_reply_authors": "121;56;0", "reply_reviewers": "1;1;1", "reply_authors": "2;2;1", "rating_avg": [ 6.333333333333333, 1.699673171197595 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 59.666666666666664, 34.451253807211266 ], "wc_strengths_avg": [ 109.0, 76.48529270389177 ], "wc_weaknesses_avg": [ 146.0, 193.0923785825496 ], "wc_questions_avg": [ 84.66666666666667, 64.45842760174102 ], "wc_limitations_avg": [ 3.6666666666666665, 2.0548046676563256 ], "wc_review_avg": [ 403.0, 272.3710704168121 ], "wc_reply_reviewers_avg": [ 50.0, 44.7288124888943 ], "wc_reply_authors_avg": [ 59.0, 49.44357052910587 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.9707253433941508, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3070495876782494904&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 5, "email": "meta.com;;cmu.edu;fujitsu.com;cmu.edu;cmu.edu", "author_num": 6, "aff_unique_index": "0;1;2;1;1", "aff_unique_norm": "Meta;Carnegie Mellon University;Fujitsu Research of America", "aff_unique_dep": "Meta Platforms, Inc.;;", "aff_unique_url": "https://meta.com;https://www.cmu.edu;https://www.fujitsu.com/us/", "aff_unique_abbr": "Meta;CMU;FRA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Does Reasoning Emerge? Examining the Probabilities of Causation in Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94513", "id": "b1ylCyjAZk", "proceeding": "", "pdf": "https://openreview.net/pdf?id=b1ylCyjAZk", "openreview": "https://openreview.net/forum?id=b1ylCyjAZk", "poster": "", "project": "", "author_site": "Javier Gonzalez, Aditya Nori", "tldr": "", "abstract": "Recent advances in AI have been significantly driven by the capabilities of large language models (LLMs) to solve complex problems in ways that resemble human thinking. However, there is an ongoing debate about the extent to which LLMs are capable of\nactual reasoning. Central to this debate are two key probabilistic concepts that are essential for connecting causes\nto their effects: the probability of necessity (PN) and the probability of sufficiency (PS). This paper introduces a framework that is both theoretical and practical, aimed at assessing how effectively LLMs are able to replicate real-world reasoning mechanisms using these probabilistic measures. By viewing LLMs as abstract machines that process information through a natural language interface, we examine the conditions under which it is possible to compute suitable approximations of PN and PS. Our research marks an important step towards gaining a deeper understanding of when LLMs are capable of reasoning, as illustrated by a series of math examples.", "keywords": "Reasoning;Large Language Models;probabilities of causation", "primary_area": "generative_models", "supplementary_material": "", "author": "Javier Gonzalez;Aditya V. Nori", "authorids": "~Javier_Gonzalez2;~Aditya_V._Nori1", "gender": "M;M", "homepage": "http://javiergonzalezh.github.io/;https://www.microsoft.com/en-us/research/people/adityan/", "dblp": ";n/AdityaVNori", "google_scholar": ";qXTt3dUAAAAJ", "orcid": ";", "linkedin": ";adityanori/", "or_profile": "~Javier_Gonzalez2;~Aditya_V._Nori1", "aff": "Microsoft;Microsoft Research", "aff_domain": "microsoft.com;microsoft.com", "position": "Principal Researcher;Researcher", "bibtex": "@inproceedings{\ngonzalez2024does,\ntitle={Does Reasoning Emerge? Examining the Probabilities of Causation in Large Language Models},\nauthor={Javier Gonzalez and Aditya V. Nori},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=b1ylCyjAZk}\n}", "github": "", "reviewers": "Hvct;65be;Hhfy;1YAi", "pdf_size": 8083138, "rating": "4;4;6;6", "confidence": "3;3;3;3", "soundness": "3;2;3;3", "novelty": "2;2;3;2", "presentation": "3;2;3;3", "wc_summary": "134;49;98;133", "wc_strengths": "107;39;69;38", "wc_weaknesses": "167;76;68;179", "wc_questions": "133;49;72;126", "wc_limitations": "1;10;14;5", "wc_review": "542;223;321;481", "wc_reply_reviewers": "0;0;26;81", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.0, 1.0 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 103.5, 34.64462440264001 ], "wc_strengths_avg": [ 63.25, 28.16358464400439 ], "wc_weaknesses_avg": [ 122.5, 50.75677294706589 ], "wc_questions_avg": [ 95.0, 35.531676008879735 ], "wc_limitations_avg": [ 7.5, 4.924428900898052 ], "wc_review_avg": [ 391.75, 126.51358622693454 ], "wc_reply_reviewers_avg": [ 26.75, 33.070946463625745 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11129499465301202442&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "microsoft.com;microsoft.com", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Microsoft", "aff_unique_dep": "Microsoft Corporation", "aff_unique_url": "https://www.microsoft.com", "aff_unique_abbr": "Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Implicit Zoo: A Large-Scale Dataset of Neural Implicit Functions for 2D Images and 3D Scenes", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97616", "id": "b57BKV8qKQ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=b57BKV8qKQ", "openreview": "https://openreview.net/forum?id=b57BKV8qKQ", "poster": "", "project": "", "author_site": "Qi Ma, Danda Pani Paudel, Ender Konukoglu, Luc V Gool", "tldr": "", "abstract": "Neural implicit functions have demonstrated significant importance in various areas such as computer vision, graphics. Their advantages include the ability to represent complex shapes and scenes with high fidelity, smooth interpolation capabilities, and continuous representations. Despite these benefits, the development and analysis of implicit functions have been limited by the lack of comprehensive datasets and the substantial computational resources required for their implementation and evaluation. To address these challenges, we introduce \"Implicit-Zoo\": a large-scale dataset requiring thousands of GPU training days designed to facilitate research and development in this field. Our dataset includes diverse 2D and 3D scenes, such as CIFAR-10, ImageNet-1K, and Cityscapes for 2D image tasks, and the OmniObject3D dataset for 3D vision tasks. We ensure high quality through strict checks, refining or filtering out low-quality data. Using Implicit-Zoo, we showcase two immediate benefits as it enables to: (1) learn token locations for transformer models; (2) Directly regress 3D cameras poses of 2D images with respect to NeRF models. This in turn leads to an \\emph{improved performance} in all three task of image classification, semantic segmentation, and 3D pose regression -- thereby unlocking new avenues for research.", "keywords": "Implicit function;image classification;semantic segmentation;pose estimation", "primary_area": "", "supplementary_material": "/attachment/3cb2ed001e890f576a46df35dde3a2837925c348.zip", "author": "Qi Ma;Danda Pani Paudel;Ender Konukoglu;Luc Van Gool", "authorids": "~Qi_Ma6;~Danda_Pani_Paudel3;~Ender_Konukoglu1;~Luc_Van_Gool1", "gender": "M;;;M", "homepage": "https://qimaqi.github.io/;http://www.vision.ee.ethz.ch/~kender;;https://people.ee.ethz.ch/~paudeld/", "dblp": ";45/7041;61/5017;", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.ch/citations?user=OeEMrhQAAAAJ;https://scholar.google.be/citations?user=TwMib_QAAAAJ;https://scholar.google.ch/citations?user=W43pvPkAAAAJ", "orcid": "0009-0005-4028-6917;;;", "linkedin": "qi-ma-27a655189/;;;", "or_profile": "~Qi_Ma6;~Ender_Konukoglu1;~Luc_Van_Gool1;~Danda_Pani_Paudel1", "aff": "ETH Zurich;ETHZ - ETH Zurich;KU Leuven;ETHZ - ETH Zurich", "aff_domain": "vision.ee.ethz.ch;ethz.ch;kuleuven.be;ethz.ch", "position": "PhD student;Associate Professor;Emeritus;Lecturer", "bibtex": "@inproceedings{\nma2024implicit,\ntitle={Implicit Zoo: A Large-Scale Dataset of Neural Implicit Functions for 2D Images and 3D Scenes},\nauthor={Qi Ma and Danda Pani Paudel and Ender Konukoglu and Luc Van Gool},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=b57BKV8qKQ}\n}", "github": "", "reviewers": "kysf;7hMu;yoA1;BRMW", "pdf_size": 39435710, "rating": "4;6;7;7", "confidence": "2;4;3;4", "wc_summary_and_contributions": "53;122;62;67", "wc_strengths": "32;132;31;29", "wc_improvement": "156;165;20;12", "wc_limitations": "2;2;15;26", "wc_correctness": "2;1;1;20", "wc_clarity": "1;1;1;9", "wc_relation_to_prior_work": "1;1;1;18", "wc_documentation": "1;2;1;12", "wc_additional_feedback": "1;1;1;1", "wc_review": "249;427;133;194", "wc_reply_reviewers": "0;157;0;0", "wc_reply_authors": "761;649;136;131", "reply_reviewers": "0;2;0;0", "reply_authors": "2;3;1;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "wc_summary_and_contributions_avg": [ 76.0, 27.027763503479157 ], "wc_strengths_avg": [ 56.0, 43.89191269470949 ], "wc_improvement_avg": [ 88.25, 72.37532383347241 ], "wc_limitations_avg": [ 11.25, 10.034316120194738 ], "wc_correctness_avg": [ 6.0, 8.093207028119323 ], "wc_clarity_avg": [ 3.0, 3.4641016151377544 ], "wc_relation_to_prior_work_avg": [ 5.25, 7.361215932167728 ], "wc_documentation_avg": [ 4.0, 4.636809247747852 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 250.75, 109.71867434488989 ], "wc_reply_reviewers_avg": [ 39.25, 67.98299419707844 ], "wc_reply_authors_avg": [ 419.25, 288.4860265246828 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.7385489458759963, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7933789853783395378&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "vision.ee.ethz.ch;ethz.ch;kuleuven.be;ethz.ch", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "ETH Zurich;Katholieke Universiteit Leuven", "aff_unique_dep": ";", "aff_unique_url": "https://www.ethz.ch;https://www.kuleuven.be", "aff_unique_abbr": "ETHZ;KU Leuven", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Switzerland;Belgium" }, { "title": "emg2pose: A Large and Diverse Benchmark for Surface Electromyographic Hand Pose Estimation", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97615", "id": "b5n3lKRLzk", "proceeding": "", "pdf": "https://openreview.net/pdf?id=b5n3lKRLzk", "openreview": "https://openreview.net/forum?id=b5n3lKRLzk", "poster": "/media/PosterPDFs/NeurIPS%202024/97615.png?t=1733415210.3589242", "project": "", "author_site": "Sasha Salter, Richard Warren, Collin Schlager, Adrian Spurr, Shangchen Han, Rohin Bhasin, Yujun Cai, Peter Walkington, Anuoluwapo Bolarinwa, Robert Wang, Nathan Danielson, Josh Merel, Eftychios Pnevmatikakis, Jesse Marshall", "tldr": "", "abstract": "Hands are the primary means through which humans interact with the world. Reliable and always-available hand pose inference could yield new and intuitive control schemes for human-computer interactions, particularly in virtual and augmented reality. Computer vision is effective but requires one or multiple cameras and can struggle with occlusions, limited field of view, and poor lighting. Wearable wrist-based surface electromyography (sEMG) presents a promising alternative as an always-available modality sensing muscle activities that drive hand motion. However, sEMG signals are strongly dependent on user anatomy and sensor placement; existing sEMG models have thus required hundreds of users and device placements to effectively generalize for tasks other than pose inference. To facilitate progress on sEMG pose inference, we introduce the emg2pose benchmark, which is to our knowledge the first publicly available dataset of high-quality hand pose labels and wrist sEMG recordings. emg2pose contains 2kHz, 16 channel sEMG and pose labels from a 26-camera motion capture rig for 193 users, 370 hours, and 29 stages with diverse gestures - a scale comparable to vision-based hand pose datasets. We provide competitive baselines and challenging tasks evaluating real-world generalization scenarios: held-out users, sensor placements, and stages. This benchmark provides the machine learning community a platform for exploring complex generalization problems, holding potential to significantly enhance the development of sEMG-based human-computer interactions.", "keywords": "hand pose estimation;surface electromyography;datasets;benchmarks;human-computer interfaces;generalization", "primary_area": "", "supplementary_material": "/attachment/a8a6f25bc972c76d6fbd4f1f1ce03b065bf9bdc4.zip", "author": "Sasha Salter;Richard Warren;Collin Schlager;Adrian Spurr;Shangchen Han;Rohin Bhasin;Yujun Cai;Peter Walkington;Anuoluwapo Bolarinwa;Robert Wang;Nathan Danielson;Josh Merel;Eftychios A. Pnevmatikakis;Jesse D Marshall", "authorids": "~Sasha_Salter1;~Richard_Warren1;~Collin_Schlager1;~Adrian_Spurr1;~Shangchen_Han1;~Rohin_Bhasin1;~Yujun_Cai1;~Peter_Walkington1;~Anuoluwapo_Bolarinwa1;~Robert_Wang3;~Nathan_Danielson1;~Josh_Merel1;~Eftychios_A._Pnevmatikakis1;~Jesse_D_Marshall1", "gender": "M;M;Not Specified;;M;M;F;M;F;;M;;M;M", "homepage": ";https://richard-warren.github.io/about/;;;;https://www.rohinbhasin.com;;https://github.com/PerlinWarp;;https://people.csail.mit.edu/rywang/;;;;", "dblp": "217/1564;;;203/8503;;;227/4399;;;29/2550-2;;139/1361;92/4431;270/2269", "google_scholar": ";https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;https://scholar.google.ch/citations?user=7xYnY74AAAAJ;;;https://scholar.google.com/citations?hl=en;;;fBzfSdwAAAAJ;NH5ymusAAAAJ;https://scholar.google.co.uk/citations?user=K4OcFXUAAAAJ;fDc1X1YAAAAJ;PWquzC4AAAAJ", "orcid": ";;;;;;;0000-0003-3195-8679;;;;;0000-0003-1509-6394;", "linkedin": ";;cschlager;;shangchen-han-2421013b?challengeId=AQGqE6Aqw_AVMQAAAY-rkWZsFjTshRXAGd6K_WsLHanbmyv0u38Ruy70W9Q1lzn9VPk5VCVlKajAPcf3ojvgJvuelaM0AKqZJg&submissionId=d265594f-ab7b-d217-dd26-528095ea956f&challengeSource=AgFZVarpTIizugAAAY-rkW4X_TqVuJYz9ZlyR1KRVs8sXilIO3Ik8Mpp5wbMVmI&challegeType=AgHke4755ORpKAAAAY-rkW4bmpICyQwOG68cPnhuJhtyPNhN6wX5PIU&memberId=AgEr52Vj2VO3hAAAAY-rkW4e5XNR-2kS1mbuWEjZIVXDj4Q&recognizeDevice=AgGEjrEBly_GkQAAAY-rkW4iuI4HvwMDHm2_spi4Ktt5m9WUXF5E;rohinbhasin/;;peter-walkington/;anuoluwapo-bolarinwa-34012761/;;nathan-danielson/;;;", "or_profile": "~Sasha_Salter1;~Richard_Warren1;~Collin_Schlager1;~Adrian_Spurr1;~Shangchen_Han1;~Rohin_Bhasin1;~Yujun_Cai1;~Peter_Walkington1;~Anuoluwapo_Bolarinwa1;~Robert_Wang3;~Nathan_Danielson1;~Josh_Merel1;~Eftychios_A._Pnevmatikakis1;~Jesse_D_Marshall1", "aff": "Meta;Meta Facebook;Meta Facebook;Research, Facebook;Meta Facebook;Runhouse;Meta Facebook;Meta;Meta Facebook;Meta Reality Labs;Meta Facebook;Meta Reality Labs;Meta Facebook;Meta", "aff_domain": "meta.com;meta.com;meta.com;research.facebook.com;meta.com;run.house;fb.com;meta.com;meta.com;meta.com;meta.com;fb.com;meta.com;meta.com", "position": "Researcher;Researcher;Researcher;Researcher;Researcher;Software Engineer;Researcher;Researcher;Researcher;Researcher;Researcher;Research Scientist;Principal Researcher;Researcher", "bibtex": "@inproceedings{\nsalter2024emgpose,\ntitle={emg2pose: A Large and Diverse Benchmark for Surface Electromyographic Hand Pose Estimation},\nauthor={Sasha Salter and Richard Warren and Collin Schlager and Adrian Spurr and Shangchen Han and Rohin Bhasin and Yujun Cai and Peter Walkington and Anuoluwapo Bolarinwa and Robert Wang and Nathan Danielson and Josh Merel and Eftychios A. Pnevmatikakis and Jesse D Marshall},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=b5n3lKRLzk}\n}", "github": "", "reviewers": "cZ4K;kNuk;NebK;xxen", "pdf_size": 4062830, "rating": "6;6;7;7", "confidence": "4;3;4;4", "wc_summary_and_contributions": "58;48;69;77", "wc_strengths": "35;18;16;101", "wc_improvement": "77;33;16;287", "wc_limitations": "6;1;150;19", "wc_correctness": "36;1;1;75", "wc_clarity": "15;1;1;99", "wc_relation_to_prior_work": "6;1;1;5", "wc_documentation": "4;1;2;5", "wc_additional_feedback": "1;1;1;1", "wc_review": "238;105;257;669", "wc_reply_reviewers": "35;17;16;484", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;2", "reply_authors": "1;2;1;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 63.0, 10.977249200050075 ], "wc_strengths_avg": [ 42.5, 34.57238782612506 ], "wc_improvement_avg": [ 103.25, 108.39828181295125 ], "wc_limitations_avg": [ 44.0, 61.55079203389669 ], "wc_correctness_avg": [ 28.25, 30.53993287484437 ], "wc_clarity_avg": [ 29.0, 40.8166632639171 ], "wc_relation_to_prior_work_avg": [ 3.25, 2.277608394786075 ], "wc_documentation_avg": [ 3.0, 1.5811388300841898 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 317.25, 211.35796057873003 ], "wc_reply_reviewers_avg": [ 138.0, 199.90622801703802 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 14, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6124950150122115814&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "meta.com;meta.com;meta.com;research.facebook.com;meta.com;run.house;fb.com;meta.com;meta.com;meta.com;meta.com;fb.com;meta.com;meta.com", "author_num": 14, "aff_unique_index": "0;0;0;0;0;1;0;0;0;0;0;0;0;0", "aff_unique_norm": "Meta;Runhouse", "aff_unique_dep": "Meta Platforms, Inc.;", "aff_unique_url": "https://meta.com;", "aff_unique_abbr": "Meta;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "CARES: A Comprehensive Benchmark of Trustworthiness in Medical Vision Language Models", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97614", "id": "b6IBmU1uzw", "proceeding": "", "pdf": "https://openreview.net/pdf?id=b6IBmU1uzw", "openreview": "https://openreview.net/forum?id=b6IBmU1uzw", "poster": "/media/PosterPDFs/NeurIPS%202024/97614.png?t=1733702973.3817887", "project": "", "author_site": "Peng Xia, Ze Chen, Juanxi Tian, Yangrui Gong, Ruibo Hou, Yue Xu, Zhenbang Wu, Zhiyuan Fan, Yiyang Zhou, Kangyu Zhu, Wenhao Zheng, Zhaoyang Wang, Xiao Wang, Xuchao Zhang, Chetan Bansal, Marc Niethammer, Junzhou Huang, Hongtu Zhu, Yun Li, Jimeng Sun, Zongyuan Ge, Gang Li, James Zou, Huaxiu Yao", "tldr": "", "abstract": "Artificial intelligence has significantly impacted medical applications, particularly with the advent of Medical Large Vision Language Models (Med-LVLMs), sparking optimism for the future of automated and personalized healthcare. However, the trustworthiness of Med-LVLMs remains unverified, posing significant risks for future model deployment. In this paper, we introduce CARES and aim to comprehensively evaluate the Trustworthiness of Med-LVLMs across the medical domain. We assess the trustworthiness of Med-LVLMs across five dimensions, including trustfulness, fairness, safety, privacy, and robustness. CARES comprises about 41K question-answer pairs in both closed and open-ended formats, covering 16 medical image modalities and 27 anatomical regions. Our analysis reveals that the models consistently exhibit concerns regarding trustworthiness, often displaying factual inaccuracies and failing to maintain fairness across different demographic groups. Furthermore, they are vulnerable to attacks and demonstrate a lack of privacy awareness. We publicly release our benchmark and code in https://github.com/richard-peng-xia/CARES.", "keywords": "medical large vision language models;medical imaging;trustworthiness", "primary_area": "", "supplementary_material": "/attachment/39784249e8d3894882f3884cb6866341b1d765e4.pdf", "author": "Peng Xia;Ze Chen;Juanxi Tian;Gong Yangrui;Ruibo Hou;Yue Xu;Zhenbang Wu;Zhiyuan Fan;Yiyang Zhou;Kangyu Zhu;Wenhao Zheng;Zhaoyang Wang;Xiao Wang;Xuchao Zhang;Chetan Bansal;Marc Niethammer;Junzhou Huang;Hongtu Zhu;Yun Li;Jimeng Sun;Zongyuan Ge;Gang Li;James Zou;Huaxiu Yao", "authorids": "~Peng_Xia1;~Ze_Chen5;~Juanxi_Tian1;~Gong_Yangrui2;~Ruibo_Hou1;~Yue_Xu10;~Zhenbang_Wu1;~Zhiyuan_Fan2;~Yiyang_Zhou1;~Kangyu_Zhu1;~Wenhao_Zheng4;~Zhaoyang_Wang1;~Xiao_Wang6;~Xuchao_Zhang1;~Chetan_Bansal1;~Marc_Niethammer1;~Junzhou_Huang2;~Hongtu_Zhu3;~Yun_Li7;~Jimeng_Sun3;~Zongyuan_Ge1;~Gang_Li10;~James_Zou1;~Huaxiu_Yao1", "gender": "M;M;M;M;Non-Binary;;M;M;M;M;M;;M;;;M;M;;Not Specified;;M;M;;M", "homepage": "https://richard-peng-xia.github.io;;https://tianshijing.github.io;https://github.com/GongYangrui;;;;https://zhiyuan.fan;https://yiyangzhou.github.io/;https://github.com/Kelvinz-89757;;;https://wang3702.github.io/;https://xuczhang.github.io/;;http://wwwx.cs.unc.edu/~mn/;http://ranger.uta.edu/~huang/;;https://yunliweb.its.unc.edu;http://sunlab.org;https://research.monash.edu/en/persons/zongyuan-ge;http://bbm.web.unc.edu/publication/;;http://huaxiuyao.mystrikingly.com", "dblp": ";;368/8625;;;;315/0212;210/1532;175/1589.html;;;;49/67-13;;;88/3304;22/1170.html;;;;147/2757;;;197/1635", "google_scholar": "8OVOf1EAAAAJ;CDzpumcAAAAJ;https://scholar.google.cz/citations?user=6QB6Q8gAAAAJ;;;;N8p-spIAAAAJ;;https://scholar.google.com.hk/citations?user=6KltFMAAAAAJ;;dR1J_4EAAAAJ;;AGS_dK8AAAAJ;;;https://scholar.google.com.au/citations?user=KqtBi6MAAAAJ;https://scholar.google.com.tw/citations?user=X7KrguAAAAAJ;;https://scholar.google.com/citations?hl=en;9jmmp5sAAAAJ;https://scholar.google.com.au/citations?user=Q0gUrcIAAAAJ;S6-aJl8AAAAJ;23ZXZvEAAAAJ;A20BZnQAAAAJ", "orcid": ";;0009-0000-2268-9994;;;;;;;;0000-0002-7108-370X;;0000-0003-4435-7098;;;;0000-0002-9548-1227;;0000-0002-9275-4189;0000-0003-1512-6426;0000-0002-5880-8673;;;", "linkedin": ";;juanxi-tian-4a4546242/;;ruibo-hou-564549225/;yue-xu-myrm;;;;;;;;;;;;;;jimengsun/;;;;huaxiuyao/", "or_profile": "~Peng_Xia1;~Ze_Chen5;~Juanxi_Tian1;~Gong_Yangrui2;~Ruibo_Hou1;~Yue_Xu10;~Zhenbang_Wu1;~Zhiyuan_Fan2;~Yiyang_Zhou1;~Kangyu_Zhu1;~Wenhao_Zheng4;~Zhaoyang_Wang1;~Xiao_Wang6;~Xuchao_Zhang1;~Chetan_Bansal1;~Marc_Niethammer1;~Junzhou_Huang2;~Hongtu_Zhu3;~Yun_Li7;~Jimeng_Sun3;~Zongyuan_Ge1;~Gang_Li10;~James_Zou1;~Huaxiu_Yao1", "aff": "Monash University;University of Electronic Science and Technology of China;Westlake University;Huazhong University of Science and Technology;University of Illinois, Urbana Champaign;Huazhong University of Science and Technology;University of Illinois Urbana Champaign;Tianjin University;Xi'an Jiaotong University;Brown University;Zhejiang University;;University of Washington;Microsoft;;The University of North Carolina at Chapel Hill;University of Texas, Arlington;;University of North Carolina at Chapel Hill;Georgia Institute of Technology;Monash University;;Stanford University;Department of Computer Science, University of North Carolina at Chapel Hill", "aff_domain": "monash.edu;uestc.edu.cn;westlake.edu;hust.edu.cn;uiuc.edu;hust.edu.cn;illinois.edu;tju.edu.cn;xjtu.edu.cn;brown.edu;zju.edu.cn;;uw.edu;microsoft.com;;unc.edu;uta.edu;;unc.edu;gatech.edu;monash.edu;;stanford.edu;cs.unc.edu", "position": "PhD student;MS student;Intern;Undergrad student;MS student;Undergrad student;PhD student;Undergrad student;MS student;MS student;MS student;;Postdoc;Researcher;;Full Professor;Full Professor;;Full Professor;Associate Professor;Associate Professor;;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nxia2024cares,\ntitle={{CARES}: A Comprehensive Benchmark of Trustworthiness in Medical Vision Language Models},\nauthor={Peng Xia and Ze Chen and Juanxi Tian and Gong Yangrui and Ruibo Hou and Yue Xu and Zhenbang Wu and Zhiyuan Fan and Yiyang Zhou and Kangyu Zhu and Wenhao Zheng and Zhaoyang Wang and Xiao Wang and Xuchao Zhang and Chetan Bansal and Marc Niethammer and Junzhou Huang and Hongtu Zhu and Yun Li and Jimeng Sun and Zongyuan Ge and Gang Li and James Zou and Huaxiu Yao},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=b6IBmU1uzw}\n}", "github": "", "reviewers": "xYtq;8Xi4;MGa1;UJHN", "pdf_size": 3564227, "rating": "6;6;7;7", "confidence": "4;4;3;4", "wc_summary_and_contributions": "60;145;93;63", "wc_strengths": "3;77;87;61", "wc_improvement": "3;397;9;32", "wc_limitations": "3;3;9;1", "wc_correctness": "11;1;10;1", "wc_clarity": "1;1;3;1", "wc_relation_to_prior_work": "1;1;10;1", "wc_documentation": "19;1;24;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "102;627;246;162", "wc_reply_reviewers": "85;0;0;0", "wc_reply_authors": "62;62;0;0", "reply_reviewers": "1;0;0;0", "reply_authors": "2;2;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 90.25, 34.14216601213227 ], "wc_strengths_avg": [ 57.0, 32.526911934581186 ], "wc_improvement_avg": [ 110.25, 165.90867216634578 ], "wc_limitations_avg": [ 4.0, 3.0 ], "wc_correctness_avg": [ 5.75, 4.763139720814412 ], "wc_clarity_avg": [ 1.5, 0.8660254037844386 ], "wc_relation_to_prior_work_avg": [ 3.25, 3.897114317029974 ], "wc_documentation_avg": [ 11.25, 10.40132203135736 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 284.25, 204.3897930426077 ], "wc_reply_reviewers_avg": [ 21.25, 36.80607966083864 ], "wc_reply_authors_avg": [ 31.0, 31.0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 24, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8931856709001394137&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "monash.edu;uestc.edu.cn;westlake.edu;hust.edu.cn;uiuc.edu;hust.edu.cn;illinois.edu;tju.edu.cn;xjtu.edu.cn;brown.edu;zju.edu.cn;;uw.edu;microsoft.com;;unc.edu;uta.edu;;unc.edu;gatech.edu;monash.edu;;stanford.edu;cs.unc.edu", "author_num": 24, "aff_unique_index": "0;1;2;3;4;3;4;5;6;7;8;9;10;11;12;13;14;0;15;11", "aff_unique_norm": "Monash University;University of Electronic Science and Technology of China;Westlake University;Huazhong University of Science and Technology;University of Illinois Urbana-Champaign;Tianjin University;Xi'an Jiao Tong University;Brown University;Zhejiang University;University of Washington;Microsoft;University of North Carolina at Chapel Hill;University of Texas at Arlington;University of North Carolina;Georgia Institute of Technology;Stanford University", "aff_unique_dep": ";;;;;;;;;;Microsoft Corporation;;;;;", "aff_unique_url": "https://www.monash.edu;https://www.uestc.edu.cn;https://www.westlake.edu.cn;http://www.hust.edu.cn;https://illinois.edu;http://www.tju.edu.cn;https://www.xjtu.edu.cn;https://www.brown.edu;https://www.zju.edu.cn;https://www.washington.edu;https://www.microsoft.com;https://www.unc.edu;https://www.uta.edu;https://www.unc.edu;https://www.gatech.edu;https://www.stanford.edu", "aff_unique_abbr": "Monash;UESTC;WU;HUST;UIUC;TJU;XJTU;Brown;ZJU;UW;Microsoft;UNC Chapel Hill;UTA;UNC;Georgia Tech;Stanford", "aff_campus_unique_index": "1;1;2;3;2;4;2", "aff_campus_unique": ";Urbana-Champaign;Chapel Hill;Arlington;Stanford", "aff_country_unique_index": "0;1;1;1;2;1;2;1;1;2;1;2;2;2;2;2;2;0;2;2", "aff_country_unique": "Australia;China;United States" }, { "title": "A Recipe for Charge Density Prediction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94512", "id": "b7REKaNUTv", "proceeding": "", "pdf": "https://openreview.net/pdf?id=b7REKaNUTv", "openreview": "https://openreview.net/forum?id=b7REKaNUTv", "poster": "/media/PosterPDFs/NeurIPS%202024/94512.png?t=1733812099.0923698", "project": "", "author_site": "Xiang Fu, Andrew Rosen, Kyle Bystrom, Rui Wang, Albert Musaelian, Boris Kozinsky, Tess Smidt, Tommi Jaakkola", "tldr": "", "abstract": "In density functional theory, charge density is the core attribute of atomic systems from which all chemical properties can be derived. Machine learning methods are promising in significantly accelerating charge density prediction, yet existing approaches either lack accuracy or scalability. We propose a recipe that can achieve both. In particular, we identify three key ingredients: (1) representing the charge density with atomic and virtual orbitals (spherical fields centered at atom/virtual coordinates); (2) using expressive and learnable orbital basis sets (basis function for the spherical fields); and (3) using high-capacity equivariant neural network architecture. Our method achieves state-of-the-art accuracy while being more than an order of magnitude faster than existing methods. Furthermore, our method enables flexible efficiency-accuracy trade-offs by adjusting the model/basis sizes.", "keywords": "AI for science;density functional theory;charge density;equivariant;molecule", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "", "author": "Xiang Fu;Andrew Scott Rosen;Kyle Bystrom;Rui Wang;Albert Musaelian;Boris Kozinsky;Tess Smidt;Tommi Jaakkola", "authorids": "~Xiang_Fu4;~Andrew_Scott_Rosen1;~Kyle_Bystrom1;~Rui_Wang11;~Albert_Musaelian1;~Boris_Kozinsky1;~Tess_Smidt1;~Tommi_S._Jaakkola1", "gender": "M;M;M;M;;;F;", "homepage": "https://xiangfu.co/;https://rosen.cbe.princeton.edu/;;https://rui1521.github.io/online-cv/;;;https://blondegeek.github.io/;", "dblp": "97/374-5.html;;;06/2293-86;;;215/4978.html;", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;lHBjgLsAAAAJ;r1EA_vYAAAAJ;lEmjtfIAAAAJ;6CGJH_oAAAAJ;;;", "orcid": ";0000-0002-0141-7006;0000-0003-1342-4972;;0000-0003-0475-9236;;0000-0001-5581-5344;", "linkedin": ";andrew-s-rosen/;;rui-ray-wang-41a398149/;;;;", "or_profile": "~Xiang_Fu4;~Andrew_Scott_Rosen1;~Kyle_Bystrom1;~Rui_Wang11;~Albert_Musaelian1;~Boris_Kozinsky1;~Tess_Smidt1;~Tommi_S._Jaakkola1", "aff": "Massachusetts Institute of Technology;University of California, Berkeley;Harvard University;Massachusetts Institute of Technology;School of Engineering and Applied Sciences, Harvard University;;Massachusetts Institute of Technology;", "aff_domain": "mit.edu;berkeley.edu;harvard.edu;mit.edu;seas.harvard.edu;;mit.edu;", "position": "PhD student;Postdoc;PhD student;Postdoc;Researcher;;Assistant Professor;", "bibtex": "@inproceedings{\nfu2024a,\ntitle={A Recipe for Charge Density Prediction},\nauthor={Xiang Fu and Andrew Scott Rosen and Kyle Bystrom and Rui Wang and Albert Musaelian and Boris Kozinsky and Tess Smidt and Tommi Jaakkola},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=b7REKaNUTv}\n}", "github": "", "reviewers": "L6ft;SS9K;nCiL;paLd", "pdf_size": 4189266, "rating": "4;6;8;8", "confidence": "4;3;4;3", "soundness": "3;3;3;4", "novelty": "2;3;3;3", "presentation": "3;3;3;4", "wc_summary": "112;84;306;14", "wc_strengths": "17;57;63;44", "wc_weaknesses": "119;343;84;30", "wc_questions": "47;115;88;34", "wc_limitations": "1;8;10;8", "wc_review": "296;607;551;130", "wc_reply_reviewers": "59;74;83;39", "wc_reply_authors": "0;298;296;227", "reply_reviewers": "1;2;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 6.5, 1.6583123951777 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 129.0, 108.2450922675019 ], "wc_strengths_avg": [ 45.25, 17.69710428290459 ], "wc_weaknesses_avg": [ 144.0, 119.1868281312998 ], "wc_questions_avg": [ 71.0, 32.28776858192588 ], "wc_limitations_avg": [ 6.75, 3.418698582794336 ], "wc_review_avg": [ 396.0, 193.1980848766364 ], "wc_reply_reviewers_avg": [ 63.75, 16.663958113245485 ], "wc_reply_authors_avg": [ 205.25, 121.90031788309659 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=474226491598269242&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "mit.edu;berkeley.edu;harvard.edu;mit.edu;seas.harvard.edu;;mit.edu;", "author_num": 8, "aff_unique_index": "0;1;2;0;2;0", "aff_unique_norm": "Massachusetts Institute of Technology;University of California, Berkeley;Harvard University", "aff_unique_dep": ";;", "aff_unique_url": "https://web.mit.edu;https://www.berkeley.edu;https://www.harvard.edu", "aff_unique_abbr": "MIT;UC Berkeley;Harvard", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Berkeley;Cambridge", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Learning Frequency-Adapted Vision Foundation Model for Domain Generalized Semantic Segmentation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94511", "id": "b7hmPlOqr8", "proceeding": "", "pdf": "https://openreview.net/pdf?id=b7hmPlOqr8", "openreview": "https://openreview.net/forum?id=b7hmPlOqr8", "poster": "/media/PosterPDFs/NeurIPS%202024/94511.png?t=1731707761.4117", "project": "", "author_site": "Qi Bi, Jingjun Yi, Hao Zheng, Haolan Zhan, Yawen Huang, Wei Ji, Yuexiang Li, Yefeng Zheng", "tldr": "", "abstract": "The emerging vision foundation model (VFM) has inherited the ability to generalize to unseen images.\nNevertheless, the key challenge of domain-generalized semantic segmentation (DGSS) lies in the domain gap attributed to the cross-domain styles, i.e., the variance of urban landscape and environment dependencies.\nHence, maintaining the style-invariant property with varying domain styles becomes the key bottleneck in harnessing VFM for DGSS. \nThe frequency space after Haar wavelet transformation provides a feasible way to decouple the style information from the domain-invariant content, since the content and style information are retained in the low- and high- frequency components of the space, respectively. \nTo this end, we propose a novel Frequency-Adapted (FADA) learning scheme to advance the frontier.\nIts overall idea is to separately tackle the content and style information by frequency tokens throughout the learning process.\nParticularly, the proposed FADA consists of two branches, i.e., low- and high- frequency branches. The former one is able to stabilize the scene content, while the latter one learns the scene styles and eliminates its impact to DGSS. \nExperiments conducted on various DGSS settings show the state-of-the-art performance of our FADA and its versatility to a variety of VFMs.\nSource code is available at \\url{https://github.com/BiQiWHU/FADA}.", "keywords": "Semantic Segmentation;Domain Generalization;Vision Foundation Model;Haar Wavelets", "primary_area": "machine_vision", "supplementary_material": "", "author": "Qi Bi;Jingjun Yi;Hao Zheng;Haolan Zhan;Yawen Huang;Wei Ji;Yuexiang Li;Yefeng Zheng", "authorids": "~Qi_Bi1;~Jingjun_Yi1;~Hao_Zheng6;~Haolan_Zhan1;~Yawen_Huang4;~Wei_Ji2;~Yuexiang_Li1;~Yefeng_Zheng3", "gender": ";M;M;;;;M;", "homepage": ";https://github.com/JingjunYi;;;;;https://yuexiangli.github.io;", "dblp": ";296/4714;31/6916-8;;;;165/6204;", "google_scholar": ";doRtaBcAAAAJ;https://scholar.google.com.hk/citations?user=LsJVCSoAAAAJ;;;;WsKu4EMAAAAJ;", "orcid": ";0000-0002-4249-3021;0000-0001-7193-6242;;;;;", "linkedin": ";;;;;;;", "or_profile": "~Qi_Bi1;~Jingjun_Yi1;~Hao_Zheng6;~Haolan_Zhan1;~Yawen_Huang4;~Wei_Ji2;~Yuexiang_Li1;~Yefeng_Zheng3", "aff": ";LIESMARS;Tencent;;;;Medical AI ReSearch (MARS) Group @ GXMU;", "aff_domain": ";whu.edu.cn;tencent.com;;;;gxmu.edu.cn;", "position": ";MS student;Researcher;;;;Full Professor;", "bibtex": "@inproceedings{\nbi2024learning,\ntitle={Learning Frequency-Adapted Vision Foundation Model for Domain Generalized Semantic Segmentation},\nauthor={Qi Bi and Jingjun Yi and Hao Zheng and Haolan Zhan and Yawen Huang and Wei Ji and Yuexiang Li and Yefeng Zheng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=b7hmPlOqr8}\n}", "github": "", "reviewers": "4xQk;esrP;ojsk;nB2o;eWsS;3Mxh", "pdf_size": 5149240, "rating": "3;4;5;5;6;7", "confidence": "5;4;2;1;4;3", "soundness": "2;3;3;2;3;3", "novelty": "2;2;3;2;3;3", "presentation": "1;3;3;3;3;3", "wc_summary": "38;67;49;69;55;60", "wc_strengths": "6;41;37;22;84;73", "wc_weaknesses": "117;200;27;141;258;153", "wc_questions": "89;26;2;3;4;64", "wc_limitations": "17;14;1;1;4;7", "wc_review": "267;348;116;236;405;357", "wc_reply_reviewers": "27;0;0;49;26;25", "wc_reply_authors": "25;0;0;26;31;30", "reply_reviewers": "1;0;0;1;1;1", "reply_authors": "2;1;1;2;2;2", "rating_avg": [ 5.0, 1.2909944487358056 ], "confidence_avg": [ 3.1666666666666665, 1.3437096247164249 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.6666666666666665, 0.7453559924999298 ], "wc_summary_avg": [ 56.333333333333336, 10.640593133008245 ], "wc_strengths_avg": [ 43.833333333333336, 27.162576379194142 ], "wc_weaknesses_avg": [ 149.33333333333334, 71.26164622166837 ], "wc_questions_avg": [ 31.333333333333332, 33.74248887612702 ], "wc_limitations_avg": [ 7.333333333333333, 6.18241233033047 ], "wc_review_avg": [ 288.1666666666667, 95.55001598930036 ], "wc_reply_reviewers_avg": [ 21.166666666666668, 17.043245648121786 ], "wc_reply_authors_avg": [ 18.666666666666668, 13.36246816855163 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.384307569132209, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11873046358413808998&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": ";whu.edu.cn;tencent.com;;;;gxmu.edu.cn;", "author_num": 8, "aff_unique_index": "0;1;2", "aff_unique_norm": "Laboratory of Information Engineering Group, Tsinghua University;Tencent;Guangxi Medical University", "aff_unique_dep": "Information Engineering Group;Tencent Holdings Limited;Medical AI ReSearch (MARS) Group", "aff_unique_url": "http://www.liesmars.tsinghua.edu.cn/;https://www.tencent.com;http://www.gxmu.edu.cn", "aff_unique_abbr": "LIESMARS;Tencent;GXMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "MatrixNet: Learning over symmetry groups using learned group representations", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94510", "id": "b8jwgZrAXG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=b8jwgZrAXG", "openreview": "https://openreview.net/forum?id=b8jwgZrAXG", "poster": "/media/PosterPDFs/NeurIPS%202024/94510.png?t=1731716763.7549546", "project": "", "author_site": "Lucas Laird, Circe Hsu, Asilata Bapat, Robin Walters", "tldr": "", "abstract": "Group theory has been used in machine learning to provide a theoretically grounded approach for incorporating known symmetry transformations in tasks from robotics to protein modeling. In these applications, equivariant neural networks use known\nsymmetry groups with predefined representations to learn over geometric input data. We propose MatrixNet, a neural network architecture that learns matrix representations of group element inputs instead of using predefined representations. MatrixNet achieves higher sample efficiency and generalization over several standard baselines in prediction tasks over the several finite groups and the Artin braid group. We also show that MatrixNet respects group relations allowing generalization to group elements of greater word length than in the training set. Our code is available at https://github.com/lucas-laird/MatrixNet.", "keywords": "Group Theory;Representation Theory;Feature learning;geometric deep learning;homomorphic", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Lucas Laird;Circe Hsu;Asilata Bapat;Robin Walters", "authorids": "~Lucas_Laird1;~Circe_Hsu1;~Asilata_Bapat1;~Robin_Walters1", "gender": "M;F;F;M", "homepage": ";;https://asilata.github.io;http://www.robinwalters.com", "dblp": ";;;258/3416", "google_scholar": ";;;fnprJmUAAAAJ", "orcid": ";;;", "linkedin": "lucaslaird;circe-hsu-46831127b/;;", "or_profile": "~Lucas_Laird1;~Circe_Hsu1;~Asilata_Bapat1;~Robin_Walters1", "aff": "Northeastern University;Northeastern University;Australian National University;Northeastern University ", "aff_domain": "northeastern.edu;northeastern.edu;anu.edu.au;northeastern.edu", "position": "PhD student;Undergrad student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nlaird2024matrixnet,\ntitle={MatrixNet: Learning over symmetry groups using learned group representations},\nauthor={Lucas Laird and Circe Hsu and Asilata Bapat and Robin Walters},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=b8jwgZrAXG}\n}", "github": "", "reviewers": "JC7A;puxG;jQuD;KZHf", "pdf_size": 729042, "rating": "4;5;6;7", "confidence": "3;4;3;4", "soundness": "3;2;3;3", "novelty": "2;2;3;3", "presentation": "3;2;3;3", "wc_summary": "131;127;94;131", "wc_strengths": "36;69;67;69", "wc_weaknesses": "134;117;33;149", "wc_questions": "76;133;39;3", "wc_limitations": "3;5;1;16", "wc_review": "380;451;234;368", "wc_reply_reviewers": "39;11;77;50", "wc_reply_authors": "0;0;130;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;2;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 120.75, 15.530212490497354 ], "wc_strengths_avg": [ 60.25, 14.02453207775575 ], "wc_weaknesses_avg": [ 108.25, 44.8964085423322 ], "wc_questions_avg": [ 62.75, 48.07481149209012 ], "wc_limitations_avg": [ 6.25, 5.80409338312195 ], "wc_review_avg": [ 358.25, 78.43588145740443 ], "wc_reply_reviewers_avg": [ 44.25, 23.657715443381257 ], "wc_reply_authors_avg": [ 32.5, 56.29165124598851 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.4472135954999579, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:GcAaOVrG-_sJ:scholar.google.com/&scioq=MatrixNet:+Learning+over+symmetry+groups+using+learned+group+representations&hl=en&as_sdt=0,14", "gs_version_total": 3, "email": "northeastern.edu;northeastern.edu;anu.edu.au;northeastern.edu", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Northeastern University;Australian National University", "aff_unique_dep": ";", "aff_unique_url": "https://www.northeastern.edu;https://www.anu.edu.au", "aff_unique_abbr": "NEU;ANU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;Australia" }, { "id": "bAaM8cKoMl", "title": "MindSet: Vision. A toolbox for testing DNNs on key psychological experiments", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "Multiple benchmarks have been developed to assess the alignment between deep neural networks (DNNs) and human vision. In almost all cases these benchmarks are observational in the sense they are composed of behavioural and brain responses to naturalistic images that have not been manipulated to test hypotheses regarding how DNNs or humans perceive and identify objects. Here we introduce the toolbox MindSet: Vision, consisting of a collection of image datasets and related scripts designed to test DNNs on 30 psychological findings. In all experimental conditions, the stimuli are systematically manipulated to test specific hypotheses regarding human visual perception and object recognition. In addition to providing pre-generated datasets of images, we provide code to regenerate these datasets, offering many configurable parameters which greatly extend the dataset versatility for different research contexts, and code to facilitate the testing of DNNs on these image datasets using three different methods (similarity judgments, out-of-distribution classification, and decoder method), accessible at https://github.com/ValerioB88/mindset-vision. We test ResNet-152 on each of these methods as an example of how the toolbox can be used.", "keywords": "Datasets;Visual Perception;Psychology;Human Vision;Deep Neural Networks;Visual Illusion", "primary_area": "", "supplementary_material": "/attachment/bf59377881412ae7707680bd9ebebb610c07d3ce.pdf", "author": "Valerio Biscione;Dong Yin;Gaurav Malhotra;Marin Dujmovic;Milton L. Montero;Guillermo Puebla;Federico Adolfi;Rachel F Heaton;John E. Hummel;Benjamin D. Evans;Karim G. Habashy;Jeffrey Bowers", "authorids": "~Valerio_Biscione1;~Dong_Yin4;~Gaurav_Malhotra1;~Marin_Dujmovic1;~Milton_L._Montero1;~Guillermo_Puebla1;~Federico_Adolfi1;~Rachel_F_Heaton1;~John_E._Hummel1;~Benjamin_D._Evans1;~Karim_G._Habashy1;~Jeffrey_Bowers1", "gender": "M;M;M;M;;M;;F;M;M;Not Specified;M", "homepage": ";https://don-yin.uk/;;https://research-information.bris.ac.uk/en/persons/marin-dujmovic;;https://guillermopuebla.com/;;;https://psychology.illinois.edu/directory/profile/jehummel;https://profiles.sussex.ac.uk/p555479-benjamin-evans;https://www.bristol.ac.uk/;https://jeffbowers.blogs.bristol.ac.uk/researchgrants/", "dblp": ";;;;;;;;;;;", "google_scholar": "qd7WvVsAAAAJ;oUPxVy0AAAAJ;SqX8yX4AAAAJ;;;/pEKy224AAAAJ%26hl%3Den%26oi%3Dao;;YYTzzMEAAAAJ;HxtkYR4AAAAJ;https://scholar.google.co.uk/citations?user=oeLfmnsAAAAJ;;KADNEXcAAAAJ", "orcid": ";;;0000-0003-1523-227X;;0000-0001-7002-7776;;0000-0002-7121-7691;0000-0002-1585-9155;0000-0002-1734-6070;;", "linkedin": "valerio-biscione-6233968b/?originalSubdomain=uk;;;;;;;;;;;", "or_profile": "~Valerio_Biscione1;~Dong_Yin4;~Gaurav_Malhotra1;~Marin_Dujmovic1;~Milton_L._Montero1;~Guillermo_Puebla1;~Federico_Adolfi1;~Rachel_F_Heaton1;~John_E._Hummel1;~Benjamin_D._Evans1;~Karim_G._Habashy1;~Jeffrey_Bowers1", "aff": "University of Bristol;Imperial College London;University of Bristol;University of Bristol;;Universidad de Tarapac\u00e1;;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;University of Sussex;University of Bristol;", "aff_domain": "bristol.ac.uk;imperial.ac.uk;bristol.ac.uk;bristol.ac.uk;;uta.cl;;illinois.edu;illinois.edu;sussex.ac.uk;bristol.ac.uk;", "position": "Research;MS student;Postdoc;Postdoc;;Assistant Professor;;Postdoc;Full Professor;Lecturer;Postdoc;", "bibtex": "@misc{\nanonymous2024mindset,\ntitle={MindSet: Vision. A toolbox for testing {DNN}s on key psychological experiments},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=bAaM8cKoMl}\n}", "github": "", "project": "", "reviewers": "kGsk;qXtL;Xnaw", "site": "https://openreview.net/forum?id=bAaM8cKoMl", "pdf_size": 5124910, "rating": "4;7;9", "confidence": "5;4;5", "wc_summary_and_contributions": "51;88;20", "wc_strengths": "27;87;27", "wc_improvement": "508;56;9", "wc_limitations": "4;5;1", "wc_correctness": "9;10;1", "wc_clarity": "1;6;1", "wc_relation_to_prior_work": "15;7;1", "wc_documentation": "3;14;11", "wc_additional_feedback": "1;1;1", "wc_review": "619;274;72", "wc_reply_reviewers": "134;0;79", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;0;1", "reply_authors": "3;1;1", "rating_avg": [ 6.666666666666667, 2.0548046676563256 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 53.0, 27.796882319185844 ], "wc_strengths_avg": [ 47.0, 28.284271247461902 ], "wc_improvement_avg": [ 191.0, 224.9725909231315 ], "wc_limitations_avg": [ 3.3333333333333335, 1.699673171197595 ], "wc_correctness_avg": [ 6.666666666666667, 4.0276819911981905 ], "wc_clarity_avg": [ 2.6666666666666665, 2.3570226039551585 ], "wc_relation_to_prior_work_avg": [ 7.666666666666667, 5.734883511361751 ], "wc_documentation_avg": [ 9.333333333333334, 4.642796092394707 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 321.6666666666667, 225.84114377637707 ], "wc_reply_reviewers_avg": [ 71.0, 54.996969613485675 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": -0.11470786693528084, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3145526898124191856&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff_unique_index": "0;1;0;0;2;3;3;4;0", "aff_unique_norm": "University of Bristol;Imperial College London;Universidad de Tarapac\u00e1;University of Illinois Urbana-Champaign;University of Sussex", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.bristol.ac.uk;https://www.imperial.ac.uk;https://www.uta.cl;https://illinois.edu;https://www.sussex.ac.uk", "aff_unique_abbr": "Bristol;ICL;;UIUC;Sussex", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "0;0;0;0;1;2;2;0;0", "aff_country_unique": "United Kingdom;Chile;United States" }, { "title": "TaskBench: Benchmarking Large Language Models for Task Automation", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97613", "id": "bAxUA5r3Ss", "proceeding": "", "pdf": "https://openreview.net/pdf?id=bAxUA5r3Ss", "openreview": "https://openreview.net/forum?id=bAxUA5r3Ss", "poster": "/media/PosterPDFs/NeurIPS%202024/97613.png?t=1730020518.66999", "project": "", "author_site": "Yongliang Shen, Kaitao Song, Xu Tan, Wenqi Zhang, Kan Ren, Siyu Yuan, Weiming Lu, Dongsheng Li, Yueting Zhuang", "tldr": "", "abstract": "In recent years, the remarkable progress of large language models (LLMs) has sparked interest in task automation, which involves decomposing complex tasks described by user instructions into sub-tasks and invoking external tools to execute them, playing a central role in autonomous agents. However, there is a lack of systematic and standardized benchmarks to promote the development of LLMs in task automation. To address this, we introduce TaskBench, a comprehensive framework to evaluate the capability of LLMs in task automation. Specifically, task automation can be divided into three critical stages: task decomposition, tool selection, and parameter prediction. To tackle the complexities inherent in these stages, we introduce the concept of Tool Graph to represent decomposed tasks and adopt a back-instruct method to generate high-quality user instructions. We propose TaskEval, a multi-faceted evaluation methodology that assesses LLM performance across these three stages. Our approach combines automated construction with rigorous human verification, ensuring high consistency with human evaluation. Experimental results demonstrate that TaskBench effectively reflects the capabilities of various LLMs in task automation. It provides insights into model performance across different task complexities and domains, pushing the boundaries of what current models can achieve. TaskBench offers a scalable, adaptable, and reliable benchmark for advancing LLM-based autonomous agents.", "keywords": "LLM;Task Automation;AI Agents", "primary_area": "", "supplementary_material": "/attachment/1ffeb209415a23e3eb42b8c36279bd8fd037e51d.zip", "author": "Yongliang Shen;Kaitao Song;Xu Tan;Wenqi Zhang;Kan Ren;Siyu Yuan;Weiming Lu;Dongsheng Li;Yueting Zhuang", "authorids": "~Yongliang_Shen1;~Kaitao_Song1;~Xu_Tan1;~Wenqi_Zhang2;~Kan_Ren1;~Siyu_Yuan2;~Weiming_Lu1;~Dongsheng_Li2;~Yueting_Zhuang1", "gender": "M;M;M;;M;;;M;M", "homepage": ";;https://tan-xu.github.io/;;https://saying.ren;;;http://recmind.cn;https://person.zju.edu.cn/yzhuang", "dblp": "221/5612-1.html;222/2082;96/10484-3;;28/7458;;;254/0830-2.html;", "google_scholar": "UT3NzFAAAAAJ;https://scholar.google.com.hk/citations?user=LLk9dR8AAAAJ;tob-U1oAAAAJ;;USnQVWgAAAAJ;;;VNg5rA8AAAAJ;1RD7UJAAAAAJ", "orcid": ";;0000-0001-5631-0639;;;;;0000-0003-3103-8442;", "linkedin": ";;;;;;;;", "or_profile": "~Yongliang_Shen1;~Kaitao_Song1;~Xu_Tan1;~Wenqi_Zhang2;~Kan_Ren1;~Siyu_Yuan2;~Weiming_Lu1;~Dongsheng_Li2;~Yueting_Zhuang1", "aff": "Zhejiang University;Microsoft;Microsoft;;ShanghaiTech University;;;Microsoft Research Asia;Zhejiang University", "aff_domain": "zju.edu.cn;microsoft.com;microsoft.com;;shanghaitech.edu.cn;;;microsoft.com;zju.edu.cn", "position": "Assistant Professor;Researcher;Principal Researcher;;Assistant Professor;;;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nshen2024taskbench,\ntitle={TaskBench: Benchmarking Large Language Models for Task Automation},\nauthor={Yongliang Shen and Kaitao Song and Xu Tan and Wenqi Zhang and Kan Ren and Siyu Yuan and Weiming Lu and Dongsheng Li and Yueting Zhuang},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=bAxUA5r3Ss}\n}", "github": "", "reviewers": "BPJD;1mk2;wG3U", "pdf_size": 1620277, "rating": "6;6;7", "confidence": "4;2;4", "wc_summary_and_contributions": "46;47;70", "wc_strengths": "45;31;41", "wc_improvement": "28;191;14", "wc_limitations": "77;41;1", "wc_correctness": "26;1;7", "wc_clarity": "44;1;1", "wc_relation_to_prior_work": "9;22;1", "wc_documentation": "21;1;1", "wc_additional_feedback": "1;1;1", "wc_review": "297;336;137", "wc_reply_reviewers": "45;70;21", "wc_reply_authors": "0;689;376", "reply_reviewers": "2;1;1", "reply_authors": "12;9;8", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "wc_summary_and_contributions_avg": [ 54.333333333333336, 11.08552609887726 ], "wc_strengths_avg": [ 39.0, 5.887840577551898 ], "wc_improvement_avg": [ 77.66666666666667, 80.34232314844331 ], "wc_limitations_avg": [ 39.666666666666664, 31.04119127152751 ], "wc_correctness_avg": [ 11.333333333333334, 10.656244908763853 ], "wc_clarity_avg": [ 15.333333333333334, 20.27039439401436 ], "wc_relation_to_prior_work_avg": [ 10.666666666666666, 8.65383665716478 ], "wc_documentation_avg": [ 7.666666666666667, 9.428090415820632 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 256.6666666666667, 86.10200668715889 ], "wc_reply_reviewers_avg": [ 45.333333333333336, 20.005554784164875 ], "wc_reply_authors_avg": [ 355.0, 281.67475333559213 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 9.666666666666666, 1.699673171197595 ], "replies_avg": [ 37, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 48, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6584401630894271152&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "zju.edu.cn;microsoft.com;microsoft.com;;shanghaitech.edu.cn;;;microsoft.com;zju.edu.cn", "author_num": 9, "aff_unique_index": "0;1;1;2;1;0", "aff_unique_norm": "Zhejiang University;Microsoft;ShanghaiTech University", "aff_unique_dep": ";Microsoft Corporation;", "aff_unique_url": "https://www.zju.edu.cn;https://www.microsoft.com;https://www.shanghaitech.edu.cn", "aff_unique_abbr": "ZJU;Microsoft;ShanghaiTech", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;1;1;0;0;0", "aff_country_unique": "China;United States" }, { "title": "Easy Regional Contrastive Learning of Expressive Fashion Representations", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94509", "id": "bCL9U2X9Jg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=bCL9U2X9Jg", "openreview": "https://openreview.net/forum?id=bCL9U2X9Jg", "poster": "", "project": "", "author_site": "Daiqing Qi, Handong Zhao, Sheng Li", "tldr": "", "abstract": "When learning vision-language models (VLM) for the fashion domain, most existing works design new architectures from vanilla BERT with additional objectives, or perform dense multi-task learning with fashion-specific tasks. Though progress has been made, their architecture or objectives are often intricate and the extendibility is limited.\nBy contrast, with simple architecture (comprising only two unimodal encoders) and just the contrastive objective, popular pre-trained VL models (e.g., CLIP) achieve superior performance in general domains, which are further easily extended to downstream tasks.\nHowever, inheriting such benefits of CLIP in the fashion domain is non-trivial in the presence of the notable domain gap. Empirically, we find that directly finetuning on fashion data leads CLIP to frequently ignore minor yet important details such as logos and composition, which are critical in fashion tasks such as retrieval and captioning.\nIn this work, to maintain CLIP's simple architecture and objective while explicitly attending to fashion details, we propose $E^2$: Easy Regional Contrastive Learning of Expressive Fashion Representations.\n$E^2$ introduces only a few selection tokens and fusion blocks (just 1.9\\% additional parameters in total) with only contrastive losses. Despite lightweight, in our primary focus, cross-modal retrieval, $E^2$ notably outperforms existing fashion VLMs with various fashion-specific objectives.\nMoreover, thanks to CLIP's widespread use in downstream tasks in general domains (e.g., zero-shot composed image retrieval and image captioning), our model can easily extend these models from general domain to the fashion domain with notable improvement.\nTo conduct a comprehensive evaluation, we further collect data from Amazon Reviews to build a new dataset for cross-modal retrieval in the fashion domain.", "keywords": "vision-language models;contrastive learning;visual fashion representations", "primary_area": "machine_vision", "supplementary_material": "", "author": "Daiqing Qi;Handong Zhao;Sheng Li", "authorids": "~Daiqing_Qi2;~Handong_Zhao3;~Sheng_Li3", "gender": "M;M;", "homepage": "https://daiqing-qi.github.io/research.html;http://sheng-li.org;https://hdzhao.github.io/", "dblp": "229/9064;23/3439-1;79/8522", "google_scholar": "FIa-pykAAAAJ;DEncVcYAAAAJ;0f-YOFgAAAAJ", "orcid": "0000-0001-9543-5792;0000-0003-1205-8632;", "linkedin": ";sheng-li-15a70022/;", "or_profile": "~Daiqing_Qi2;~Sheng_Li3;~Handong_Zhao1", "aff": "University of Virginia, Charlottesville;University of Virginia, Charlottesville;Adobe Systems", "aff_domain": "virginia.edu;virginia.edu;adobe.com", "position": "PhD student;Associate Professor;Research Scientist", "bibtex": "@inproceedings{\nqi2024easy,\ntitle={Easy Regional Contrastive Learning of Expressive Fashion Representations},\nauthor={Daiqing Qi and Handong Zhao and Sheng Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=bCL9U2X9Jg}\n}", "github": "", "reviewers": "tu2V;Mfk5;LcUx;Kwtu", "pdf_size": 3984269, "rating": "5;5;6;6", "confidence": "3;5;4;4", "soundness": "2;3;4;3", "novelty": "2;3;3;3", "presentation": "2;3;4;2", "wc_summary": "34;169;105;67", "wc_strengths": "26;90;183;66", "wc_weaknesses": "19;52;64;74", "wc_questions": "1;114;8;109", "wc_limitations": "2;1;7;1", "wc_review": "82;426;367;317", "wc_reply_reviewers": "0;0;0;20", "wc_reply_authors": "0;55;0;33", "reply_reviewers": "0;0;0;1", "reply_authors": "1;2;1;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 93.75, 50.186527076497335 ], "wc_strengths_avg": [ 91.25, 57.69477879323223 ], "wc_weaknesses_avg": [ 52.25, 20.716840975399702 ], "wc_questions_avg": [ 58.0, 53.58637886627534 ], "wc_limitations_avg": [ 2.75, 2.48746859276655 ], "wc_review_avg": [ 298.0, 130.5392661232627 ], "wc_reply_reviewers_avg": [ 5.0, 8.660254037844387 ], "wc_reply_authors_avg": [ 22.0, 23.33452377915607 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:AP_rXSh-2yAJ:scholar.google.com/&scioq=Easy+Regional+Contrastive+Learning+of+Expressive+Fashion+Representations&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": "virginia.edu;virginia.edu;adobe.com", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Virginia;Adobe", "aff_unique_dep": ";Adobe Systems Incorporated", "aff_unique_url": "https://www.virginia.edu;https://www.adobe.com", "aff_unique_abbr": "UVA;Adobe", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Charlottesville;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Cracking the Code of Juxtaposition: Can AI Models Understand the Humorous Contradictions", "status": "Oral", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94508", "id": "bCMpdaQCNW", "proceeding": "", "pdf": "https://openreview.net/pdf?id=bCMpdaQCNW", "openreview": "https://openreview.net/forum?id=bCMpdaQCNW", "poster": "/media/PosterPDFs/NeurIPS%202024/94508.png?t=1731477550.2277122", "project": "", "author_site": "Zhe Hu, Tuo Liang, Jing Li, Yiren Lu, Yunlai Zhou, Yiran Qiao, Jing Ma, Yu Yin", "tldr": "", "abstract": "Recent advancements in large vision language models have demonstrated remarkable proficiency across a wide range of tasks. \nYet, these models still struggle with understanding the nuances of human humor through juxtaposition, particularly when it involves nonlinear narratives that underpin many jokes and humor cues. This paper investigates this challenge by focusing on comics with contradictory narratives, where each comic consists of two panels that create a humorous contradiction. We introduce the YesBut benchmark, which comprises tasks of varying difficulty aimed at assessing AI's capabilities in recognizing and interpreting these comics, ranging from literal content comprehension to deep narrative reasoning. Through extensive experimentation and analysis of recent commercial or open-sourced large vision language models, we assess their capability to comprehend the complex interplay of the narrative humor inherent in these comics. Our results show that even the state-of-the-art models still struggle with this task. Our findings offer insights into the current limitations and potential improvements for AI in understanding human creative expressions.", "keywords": "comic narrative understanding;visual reasoning;multimodal benchmark;humor understanding", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Zhe Hu;Tuo Liang;Jing Li;Yiren Lu;Yunlai Zhou;Yiran Qiao;Jing Ma;Yu Yin", "authorids": "~Zhe_Hu4;~Tuo_Liang2;~Jing_Li18;~Yiren_Lu2;~Yunlai_Zhou1;~Yiran_Qiao2;~Jing_Ma2;~Yu_Yin2", "gender": "M;M;F;M;;M;F;F", "homepage": ";https://github.com/Jalim19;http://www4.comp.polyu.edu.hk/~jing1li/;https://yiren-lu.com/;https://github.com/BerenChou;;https://jma712.github.io/;https://yin-yu.github.io/", "dblp": ";;181/2820-49;46/10208-2;375/8437;350/9714-1;96/6129-2;", "google_scholar": "oV8sqb0AAAAJ;;jvjOLx4AAAAJ;8euSVtcAAAAJ;;XdvVFb0AAAAJ;VLElvX8AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0003-1737-3711;;0000-0002-8044-2284;0000-0002-5411-0411;;;;0000-0002-9588-5854", "linkedin": ";jalim-liang-6529162b5/;jing-li-b815b7a5/;yiren-lu-631798183/;;yiran-qiao-10298b238;;", "or_profile": "~Zhe_Hu4;~Tuo_Liang2;~Jing_Li18;~Yiren_Lu2;~Yunlai_Zhou1;~Yiran_Qiao2;~Jing_Ma2;~Yu_Yin2", "aff": "Baidu;Case Western Reserve University;The Hong Kong Polytechnic University;Case Western Reserve University;Case Western Reserve University;Case Western Reserve University;Case Western Reserve University;Case Western Reserve University", "aff_domain": "baidu.com;case.edu;polyu.edu.hk;case.edu;case.edu;case.edu;case.edu;case.edu", "position": "Researcher;MS student;Assistant Professor;PhD student;MS student;PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nhu2024cracking,\ntitle={Cracking the Code of Juxtaposition: Can {AI} Models Understand the Humorous Contradictions},\nauthor={Zhe Hu and Tuo Liang and Jing Li and Yiren Lu and Yunlai Zhou and Yiran Qiao and Jing Ma and Yu Yin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=bCMpdaQCNW}\n}", "github": "", "reviewers": "2fnt;yTj4;wVUt;aFj6", "pdf_size": 2312350, "rating": "4;7;7;8", "confidence": "4;4;3;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;4", "wc_summary": "64;120;27;77", "wc_strengths": "72;137;88;172", "wc_weaknesses": "279;129;164;108", "wc_questions": "160;152;18;75", "wc_limitations": "5;145;10;37", "wc_review": "580;683;307;469", "wc_reply_reviewers": "402;149;81;186", "wc_reply_authors": "604;58;216;111", "reply_reviewers": "1;1;2;2", "reply_authors": "3;2;3;3", "rating_avg": [ 6.5, 1.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 72.0, 33.23401871576773 ], "wc_strengths_avg": [ 117.25, 39.65712420234226 ], "wc_weaknesses_avg": [ 170.0, 66.03408210916541 ], "wc_questions_avg": [ 101.25, 58.409652455737145 ], "wc_limitations_avg": [ 49.25, 56.6055430147967 ], "wc_review_avg": [ 509.75, 139.39041394586644 ], "wc_reply_reviewers_avg": [ 204.5, 120.08434535775261 ], "wc_reply_authors_avg": [ 247.25, 213.67425558545887 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.19245008972987526, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13389370107788768472&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "baidu.com;case.edu;polyu.edu.hk;case.edu;case.edu;case.edu;case.edu;case.edu", "author_num": 8, "aff_unique_index": "0;1;2;1;1;1;1;1", "aff_unique_norm": "Baidu;Case Western Reserve University;Hong Kong Polytechnic University", "aff_unique_dep": "Baidu, Inc.;;", "aff_unique_url": "https://www.baidu.com;https://www.case.edu;https://www.polyu.edu.hk", "aff_unique_abbr": "Baidu;CWRU;PolyU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;0;1;1;1;1;1", "aff_country_unique": "China;United States" }, { "title": "NaRCan: Natural Refined Canonical Image with Integration of Diffusion Prior for Video Editing", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94507", "id": "bCR2NLm1QW", "proceeding": "", "pdf": "https://openreview.net/pdf?id=bCR2NLm1QW", "openreview": "https://openreview.net/forum?id=bCR2NLm1QW", "poster": "/media/PosterPDFs/NeurIPS%202024/94507.png?t=1729606400.9103172", "project": "", "author_site": "Ting-Hsuan Chen, Jie Wen Chan, Hau-Shiang Shiu, Shih-Han Yen, Changhan Yeh, Yu-Lun Liu", "tldr": "", "abstract": "We propose a video editing framework, NaRCan, which integrates a hybrid deformation field and diffusion prior to generate high-quality natural canonical images to represent the input video. Our approach utilizes homography to model global motion and employs multi-layer perceptrons (MLPs) to capture local residual deformations, enhancing the model\u2019s ability to handle complex video dynamics. By introducing a diffusion prior from the early stages of training, our model ensures that the generated images retain a high-quality natural appearance, making the produced canonical images suitable for various downstream tasks in video editing, a capability not achieved by current canonical-based methods. Furthermore, we incorporate low-rank adaptation (LoRA) fine-tuning and introduce a noise and diffusion prior update scheduling technique that accelerates the training process by 14 times. Extensive experimental results show that our method outperforms existing approaches in various video editing tasks and produces coherent and high-quality edited video sequences. See our project page for video results: [koi953215.github.io/NaRCan_page](https://koi953215.github.io/NaRCan_page/).", "keywords": "Video Editing;Canonical Images;Diffusion Models;Temporal Consistency", "primary_area": "machine_vision", "supplementary_material": "/attachment/c0c982eb07c65dfc1522af6e9e16d0d813273e11.zip", "author": "Ting-Hsuan Chen;Jiewen Chan;Hau-Shiang Shiu;Shih Han Yen;Changhan Yeh;Yu-Lun Liu", "authorids": "~Ting-Hsuan_Chen1;~Jiewen_Chan1;~Hau-Shiang_Shiu1;~Shih_Han_Yen1;~Changhan_Yeh1;~Yu-Lun_Liu2", "gender": "M;F;M;M;M;", "homepage": "https://koi953215.github.io/;https://jiewenchan.github.io/;https://www.linkedin.com/in/changhan-yeh-36bab5253/;;https://github.com/alexyen1006;http://www.cmlab.csie.ntu.edu.tw/~yulunliu/", "dblp": "28/6468;;;;;142/0282-1", "google_scholar": "HrBNI74AAAAJ;;;;;gliihzoAAAAJ", "orcid": ";;;;;", "linkedin": "tinghsuan69/;;changhan-yeh-36bab5253/;%E7%9A%93%E7%BF%94-%E8%A8%B1-b500a4306/;;yu-lun-liu-37321396/", "or_profile": "~Ting-Hsuan_Chen1;~Jiewen_Chan1;~Changhan_Yeh1;~Shiu_Hau-Shiang1;~Alex_Yen1;~Yu_Lun_Liu1", "aff": "National Yang Ming Chiao Tung University;National Yang Ming Chiao Tung University;University of Illinois, Urbana Champaign;National Yang Ming Chiao Tung University;National Yang Ming Chiao Tung University;National Yang Ming Chiao Tung University", "aff_domain": "nycu.edu.tw;nycu.edu.tw;illinois.edu;nycu.edu.tw;nycu.edu.tw;nycu.edu.tw", "position": "Research Assistant;MS student;MS student;MS student;MS student;Assistant Professor", "bibtex": "@inproceedings{\nchen2024narcan,\ntitle={Na{RC}an: Natural Refined Canonical Image with Integration of Diffusion Prior for Video Editing},\nauthor={Ting-Hsuan Chen and Jiewen Chan and Hau-Shiang Shiu and Shih Han Yen and Changhan Yeh and Yu-Lun Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=bCR2NLm1QW}\n}", "github": "", "reviewers": "pLuV;4jq5;b5x9", "pdf_size": 13659719, "rating": "5;6;6", "confidence": "2;4;5", "soundness": "1;1;4", "novelty": "2;1;3", "presentation": "1;2;3", "wc_summary": "52;27;110", "wc_strengths": "16;26;69", "wc_weaknesses": "62;150;103", "wc_questions": "75;95;191", "wc_limitations": "43;14;9", "wc_review": "248;312;482", "wc_reply_reviewers": "29;86;55", "wc_reply_authors": "75;31;77", "reply_reviewers": "1;1;1", "reply_authors": "3;2;3", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 1.247219128924647 ], "soundness_avg": [ 2.0, 1.4142135623730951 ], "novelty_avg": [ 2.0, 0.816496580927726 ], "presentation_avg": [ 2.0, 0.816496580927726 ], "wc_summary_avg": [ 63.0, 34.76588366008646 ], "wc_strengths_avg": [ 37.0, 22.992752481307377 ], "wc_weaknesses_avg": [ 105.0, 35.95367389665021 ], "wc_questions_avg": [ 120.33333333333333, 50.63156678946007 ], "wc_limitations_avg": [ 22.0, 14.98888477061141 ], "wc_review_avg": [ 347.3333333333333, 98.74321355020922 ], "wc_reply_reviewers_avg": [ 56.666666666666664, 23.299976156401726 ], "wc_reply_authors_avg": [ 61.0, 21.228911104120876 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.9449111825230683, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8702899287069572972&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "nycu.edu.tw;nycu.edu.tw;illinois.edu;nycu.edu.tw;nycu.edu.tw;nycu.edu.tw", "author_num": 6, "aff_unique_index": "0;0;1;0;0;0", "aff_unique_norm": "National Yang Ming Chiao Tung University;University of Illinois Urbana-Champaign", "aff_unique_dep": ";", "aff_unique_url": "https://www.nycu.edu.tw;https://illinois.edu", "aff_unique_abbr": "NYCU;UIUC", "aff_campus_unique_index": "0;0;1;0;0;0", "aff_campus_unique": "Taiwan;Urbana-Champaign", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "China;United States" }, { "title": "MALT Powers Up Adversarial Attacks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94506", "id": "bCqIx5Q8qX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=bCqIx5Q8qX", "openreview": "https://openreview.net/forum?id=bCqIx5Q8qX", "poster": "/media/PosterPDFs/NeurIPS%202024/94506.png?t=1733062184.757262", "project": "", "author_site": "Odelia Melamed, Gilad Yehudai, Adi Shamir", "tldr": "", "abstract": "Current adversarial attacks for multi-class classifiers choose potential adversarial target classes naively based on the classifier's confidence levels. We present a novel adversarial targeting method, \\textit{MALT - Mesoscopic Almost Linearity Targeting}, based on local almost linearity assumptions. Our attack wins over the current state of the art AutoAttack on the standard benchmark datasets CIFAR-100 and Imagenet and for different robust models. In particular, our attack uses a \\emph{five times faster} attack strategy than AutoAttack's while successfully matching AutoAttack's successes and attacking additional samples that were previously out of reach. We additionally prove formally and demonstrate empirically that our targeting method, although inspired by linear predictors, also applies to non-linear models.", "keywords": "Adversarial Examples;Robustness;Neural Networks;Classification;Adversarial Attacks", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Odelia Melamed;Gilad Yehudai;Adi Shamir", "authorids": "~Odelia_Melamed1;~Gilad_Yehudai2;~Adi_Shamir1", "gender": "F;M;M", "homepage": ";;", "dblp": "295/8736;239/4344;", "google_scholar": ";opVT1qkAAAAJ;", "orcid": ";;", "linkedin": "odelia-melamed-ba6397201/;;", "or_profile": "~Odelia_Melamed1;~Gilad_Yehudai2;~Adi_Shamir1", "aff": "Weizmann Institute, Technion;New York University;", "aff_domain": "weizmann.ac.il;nyu.edu;", "position": "PhD student;Postdoc;", "bibtex": "@inproceedings{\nmelamed2024malt,\ntitle={{MALT} Powers Up Adversarial Attacks},\nauthor={Odelia Melamed and Gilad Yehudai and Adi Shamir},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=bCqIx5Q8qX}\n}", "github": "", "reviewers": "fhJA;BXuQ;WTPi;hP7Q", "pdf_size": 8220677, "rating": "3;6;6;7", "confidence": "3;3;3;4", "soundness": "4;3;3;4", "novelty": "1;3;3;3", "presentation": "4;3;3;3", "wc_summary": "52;87;126;148", "wc_strengths": "29;11;112;277", "wc_weaknesses": "119;10;245;128", "wc_questions": "121;48;132;34", "wc_limitations": "42;11;3;1", "wc_review": "363;167;618;588", "wc_reply_reviewers": "181;12;65;35", "wc_reply_authors": "189;0;0;0", "reply_reviewers": "2;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 103.25, 36.77890020106637 ], "wc_strengths_avg": [ 107.25, 105.14840702549897 ], "wc_weaknesses_avg": [ 125.5, 83.17000661296115 ], "wc_questions_avg": [ 83.75, 43.210965043609015 ], "wc_limitations_avg": [ 14.25, 16.452583383772897 ], "wc_review_avg": [ 434.0, 182.96311103607744 ], "wc_reply_reviewers_avg": [ 73.25, 64.98605619669499 ], "wc_reply_authors_avg": [ 47.25, 81.83940065762945 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5773502691896258, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Y1yrM6fID6cJ:scholar.google.com/&scioq=MALT+Powers+Up+Adversarial+Attacks&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "weizmann.ac.il;nyu.edu;", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Weizmann Institute of Science;New York University", "aff_unique_dep": ";", "aff_unique_url": "https://www.weizmann.org.il;https://www.nyu.edu", "aff_unique_abbr": "Weizmann;NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Israel;United States" }, { "title": "Disentangling and mitigating the impact of task similarity for continual learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94505", "id": "bE7GWLQzkM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=bE7GWLQzkM", "openreview": "https://openreview.net/forum?id=bE7GWLQzkM", "poster": "", "project": "", "tldr": "", "abstract": "Continual learning of partially similar tasks poses a challenge for artificial neural networks, as task similarity presents both an opportunity for knowledge transfer and a risk of interference and catastrophic forgetting.\nHowever, it remains unclear how task similarity in input features and readout patterns influences knowledge transfer and forgetting, as well as how they interact with common algorithms for continual learning.\nHere, we develop a linear teacher-student model with latent structure and show analytically that high input feature similarity coupled with low readout similarity is catastrophic for both knowledge transfer and retention. \nConversely, the opposite scenario is relatively benign. \nOur analysis further reveals that task-dependent activity gating improves knowledge retention at the expense of transfer, while task-dependent plasticity gating does not affect either retention or transfer performance at the over-parameterized limit. \nIn contrast, weight regularization based on the Fisher information metric significantly improves retention, regardless of task similarity, without compromising transfer performance. Nevertheless, its diagonal approximation and regularization in the Euclidean space are much less robust against task similarity. \nWe demonstrate consistent results in a permuted MNIST task with latent variables. Overall, this work provides insights into when continual learning is difficult and how to mitigate it.", "keywords": "Continual learning;Teacher-student model;Lifelong learning;Brain-inspired", "primary_area": "online_learning", "supplementary_material": "", "author": "Naoki Hiratani", "authorids": "~Naoki_Hiratani1", "gender": "", "homepage": "https://www.hiratanilab.org/", "dblp": "155/6037", "google_scholar": "", "orcid": "0000-0002-8568-2033", "linkedin": "", "or_profile": "~Naoki_Hiratani1", "aff": "Washington University, Saint Louis", "aff_domain": "wustl.edu", "position": "Assistant Professor", "bibtex": "@inproceedings{\nhiratani2024disentangling,\ntitle={Disentangling and mitigating the impact of task similarity for continual learning},\nauthor={Naoki Hiratani},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=bE7GWLQzkM}\n}", "github": "", "reviewers": "LdFf;Uj69;dowD", "pdf_size": 3021653, "rating": "6;6;6", "confidence": "3;4;3", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "43;44;106", "wc_strengths": "92;61;198", "wc_weaknesses": "51;36;430", "wc_questions": "5;129;6", "wc_limitations": "10;2;6", "wc_review": "201;272;746", "wc_reply_reviewers": "20;36;45", "wc_reply_authors": "7;16;511", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 64.33333333333333, 29.465610840812758 ], "wc_strengths_avg": [ 117.0, 58.65719620529664 ], "wc_weaknesses_avg": [ 172.33333333333334, 182.3007283462015 ], "wc_questions_avg": [ 46.666666666666664, 58.2198896903417 ], "wc_limitations_avg": [ 6.0, 3.265986323710904 ], "wc_review_avg": [ 406.3333333333333, 241.92331199966836 ], "wc_reply_reviewers_avg": [ 33.666666666666664, 10.338708279513881 ], "wc_reply_authors_avg": [ 178.0, 235.49522288148438 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1700758765674759283&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 3, "email": "wustl.edu", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "Washington University in St. Louis", "aff_unique_dep": "", "aff_unique_url": "https://wustl.edu", "aff_unique_abbr": "WUSTL", "aff_campus_unique_index": "0", "aff_campus_unique": "Saint Louis", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Fair Allocation in Dynamic Mechanism Design", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94504", "id": "bEunGps83o", "proceeding": "", "pdf": "https://openreview.net/pdf?id=bEunGps83o", "openreview": "https://openreview.net/forum?id=bEunGps83o", "poster": "", "project": "", "author_site": "Alireza Fallah, Michael Jordan, Annie Ulichney", "tldr": "", "abstract": "We consider a dynamic mechanism design problem where an auctioneer sells an indivisible good to two groups of buyers in every round, for a total of $T$ rounds. The auctioneer aims to maximize their discounted overall revenue while adhering to a fairness constraint that guarantees a minimum average allocation for each group. We begin by studying the static case ($T=1$) and establish that the optimal mechanism involves two types of subsidization: one that increases the overall probability of allocation to all buyers, and another that favors the group which otherwise has a lower probability of winning the item. We then extend our results to the dynamic case by characterizing a set of recursive functions that determine the optimal allocation and payments in each round. Notably, our results establish that in the dynamic case, the seller, on one hand, commits to a participation reward to incentivize truth-telling, and, on the other hand, charges an entry fee for every round. Moreover, the optimal allocation once more involves subsidization in favor of one group, where the extent of subsidization depends on the difference in future utilities for both the seller and buyers when allocating the item to one group versus the other. Finally, we present an approximation scheme to solve the recursive equations and determine an approximately optimal and fair allocation efficiently.", "keywords": "Mechanism Design;Auctions;Fairness", "primary_area": "algorithmic_game_theory", "supplementary_material": "/attachment/453f354043453c9e2914160b1fabcd624c05a748.zip", "author": "Alireza Fallah;Michael Jordan;Annie S Ulichney", "authorids": "~Alireza_Fallah1;~Michael_Jordan1;~Annie_S_Ulichney1", "gender": ";M;F", "homepage": "https://afallah.lids.mit.edu;http://www.cs.berkeley.edu/~jordan/;", "dblp": "182/2522-1;j/MichaelIJordan;", "google_scholar": "2qkqvm4AAAAJ;https://scholar.google.com.tw/citations?user=yxUduqMAAAAJ;pIX2WYcAAAAJ", "orcid": ";0000-0001-8935-817X;", "linkedin": ";;annie-ulichney/", "or_profile": "~Alireza_Fallah1;~Michael_Jordan1;~Annie_S_Ulichney1", "aff": "University of California, Berkeley;University of California, Berkeley;University of California, Berkeley", "aff_domain": "berkeley.edu;berkeley.edu;berkeley.edu", "position": "Postdoc;Full Professor;PhD student", "bibtex": "@inproceedings{\nfallah2024fair,\ntitle={Fair Allocation in Dynamic Mechanism Design},\nauthor={Alireza Fallah and Michael Jordan and Annie S Ulichney},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=bEunGps83o}\n}", "github": "", "reviewers": "5sNL;WPVa;ZeA8;ssTM", "pdf_size": 624369, "rating": "5;6;7;7", "confidence": "2;2;3;4", "soundness": "3;3;4;4", "novelty": "3;3;4;3", "presentation": "2;3;4;4", "wc_summary": "211;152;109;406", "wc_strengths": "33;59;79;52", "wc_weaknesses": "140;79;57;39", "wc_questions": "28;29;90;33", "wc_limitations": "16;24;26;8", "wc_review": "428;343;361;538", "wc_reply_reviewers": "35;12;50;20", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 219.5, 113.60127640127993 ], "wc_strengths_avg": [ 55.75, 16.452583383772897 ], "wc_weaknesses_avg": [ 78.75, 38.09445497706982 ], "wc_questions_avg": [ 45.0, 26.04803255526221 ], "wc_limitations_avg": [ 18.5, 7.123903424387503 ], "wc_review_avg": [ 417.5, 76.44115383744544 ], "wc_reply_reviewers_avg": [ 29.25, 14.549484526951462 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8181818181818182, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5396347827135744113&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "berkeley.edu;berkeley.edu;berkeley.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "VeLoRA: Memory Efficient Training using Rank-1 Sub-Token Projections", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94503", "id": "bFoQXD7Uls", "proceeding": "", "pdf": "https://openreview.net/pdf?id=bFoQXD7Uls", "openreview": "https://openreview.net/forum?id=bFoQXD7Uls", "poster": "", "project": "", "author_site": "Roy Miles, Pradyumna Reddy, Ismail Elezi, Jiankang Deng", "tldr": "", "abstract": "Large language models (LLMs) have recently emerged as powerful tools for tackling many language-processing tasks. Despite their success, training and fine-tuning these models is still far too computationally and memory intensive. In this paper, we identify and characterise the important components needed for effective model convergence using gradient descent. In doing so we find that the intermediate activations used to implement backpropagation can be excessively compressed without incurring any degradation in performance. This result leads us to a cheap and memory-efficient algorithm for both fine-tuning and pre-training LLMs. The proposed algorithm simply divides the tokens up into smaller sub-tokens before projecting them onto a fixed 1-dimensional subspace during the forward pass. These features are then coarsely reconstructed during the backward pass to implement the update rules. We confirm the effectiveness of our algorithm as being complimentary to many state-of-the-art PEFT methods on the VTAB-1k fine-tuning benchmark. Furthermore, we outperform QLoRA for fine-tuning LLaMA and show competitive performance against other memory-efficient pre-training methods on the large-scale C4 dataset.", "keywords": "parameter efficient fine-tuning;memory efficient training", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Roy Miles;Pradyumna Reddy;Ismail Elezi;Jiankang Deng", "authorids": "~Roy_Miles1;~Pradyumna_Reddy2;~Ismail_Elezi1;~Jiankang_Deng1", "gender": "M;M;M;M", "homepage": "https://roymiles.github.io/;https://preddy5.github.io;https://therevanchist.github.io/;https://jiankangdeng.github.io/", "dblp": "256/1672;148/8699;186/8256;156/7808", "google_scholar": "Fev4G4YAAAAJ;https://scholar.google.co.uk/citations?user=GQ8VvmUAAAAJ;tpaCLrsAAAAJ;Z_UoQFsAAAAJ", "orcid": ";;;0000-0002-3709-6216", "linkedin": "roy-miles-40b460b0/;pradyumna-reddy-b6a18650/;ismail-elezi-33958b32/?originalSubdomain=uk;jiankang-deng-b45b21b4/?originalSubdomain=uk", "or_profile": "~Roy_Miles1;~Pradyumna_Reddy2;~Ismail_Elezi1;~Jiankang_Deng1", "aff": "Huawei Technologies Ltd.;Huawei Technologies Ltd.;Huawei Technologies Ltd.;Imperial College London", "aff_domain": "huawei.com;huawei.com;huawei.com;imperial.ac.uk", "position": "Researcher;Researcher;Researcher;Lecturer", "bibtex": "@inproceedings{\nmiles2024velora,\ntitle={VeLo{RA}: Memory Efficient Training using Rank-1 Sub-Token Projections},\nauthor={Roy Miles and Pradyumna Reddy and Ismail Elezi and Jiankang Deng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=bFoQXD7Uls}\n}", "github": "", "reviewers": "bRtV;KYfS;sd52;UGFV", "pdf_size": 957407, "rating": "5;5;5;7", "confidence": "3;3;4;4", "soundness": "2;2;3;3", "novelty": "3;2;2;3", "presentation": "3;2;3;3", "wc_summary": "75;42;141;91", "wc_strengths": "24;32;40;98", "wc_weaknesses": "172;130;120;28", "wc_questions": "2;45;3;39", "wc_limitations": "17;9;7;1", "wc_review": "290;258;311;257", "wc_reply_reviewers": "19;23;16;17", "wc_reply_authors": "57;20;12;111", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;2;3", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 87.25, 35.70976757135224 ], "wc_strengths_avg": [ 48.5, 29.13331426391443 ], "wc_weaknesses_avg": [ 112.5, 52.542839664410984 ], "wc_questions_avg": [ 22.25, 19.866743568083823 ], "wc_limitations_avg": [ 8.5, 5.722761571129799 ], "wc_review_avg": [ 279.0, 22.74862633215465 ], "wc_reply_reviewers_avg": [ 18.75, 2.680951323690902 ], "wc_reply_authors_avg": [ 50.0, 39.096035604649224 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17603108599829475647&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 4, "email": "huawei.com;huawei.com;huawei.com;imperial.ac.uk", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Huawei;Imperial College London", "aff_unique_dep": "Huawei Technologies;", "aff_unique_url": "https://www.huawei.com;https://www.imperial.ac.uk", "aff_unique_abbr": "Huawei;ICL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "China;United Kingdom" }, { "title": "Extending Video Masked Autoencoders to 128 frames", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94502", "id": "bFrNPlWchg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=bFrNPlWchg", "openreview": "https://openreview.net/forum?id=bFrNPlWchg", "poster": "/media/PosterPDFs/NeurIPS%202024/94502.png?t=1731900655.4544272", "project": "", "author_site": "Nitesh Bharadwaj Gundavarapu, Luke Friedman, Raghav Goyal, Chaitra Hegde, Eirikur Agustsson, Sagar Waghmare, Mikhail Sirotenko, Ming-Hsuan Yang, Tobias Weyand, Boqing Gong, Leonid Sigal", "tldr": "", "abstract": "Video understanding has witnessed significant progress with recent video foundation models demonstrating strong performance owing to self-supervised pre-training objectives; Masked Autoencoders (MAE) being the design of choice. Nevertheless, the majority of prior works that leverage MAE pre-training have focused on relatively short video representations (16 / 32 frames in length) largely due to hardware memory and compute limitations that scale poorly with video length due to the dense memory-intensive self-attention decoding. One natural strategy to address these challenges is to subsample tokens to reconstruct during decoding (or decoder masking). In this work, we propose an effective strategy for prioritizing tokens which allows training on longer video sequences (128 frames) and gets better performance than, more typical, random and uniform masking strategies. The core of our approach is an adaptive decoder masking strategy that prioritizes the most important tokens and uses quantized tokens as reconstruction objectives. Our adaptive strategy leverages a powerful MAGVIT-based tokenizer that jointly learns the tokens and their priority. We validate our design choices through exhaustive ablations and observe improved performance of the resulting long-video (128 frames) encoders over short-video (32 frames) counterparts. With our long-video masked autoencoder (LVMAE) strategy, we surpass state-of-the-art on Diving48 by 3.9 points and EPIC-Kitchens-100 verb classification by 2.5 points while relying on a simple core architecture and video-only pre-training (unlike some of the prior works that require millions of labeled video-text pairs or specialized encoders).", "keywords": "MAE;long video understanding;masked autoencoder;adaptive masking", "primary_area": "machine_vision", "supplementary_material": "", "author": "Nitesh Bharadwaj Gundavarapu;Luke Friedman;Raghav Goyal;Chaitra Hegde;Eirikur Agustsson;Sagar M. Waghmare;Mikhail Sirotenko;Ming-Hsuan Yang;Tobias Weyand;Boqing Gong;Leonid Sigal", "authorids": "~Nitesh_Bharadwaj_Gundavarapu1;~Luke_Friedman1;~Raghav_Goyal1;~Chaitra_Hegde1;~Eirikur_Agustsson1;~Sagar_M._Waghmare1;~Mikhail_Sirotenko1;~Ming-Hsuan_Yang1;~Tobias_Weyand3;~Boqing_Gong1;~Leonid_Sigal2", "gender": "Not Specified;M;M;F;;M;M;M;;M;M", "homepage": ";https://www.linkedin.com/in/lufriedman;https://www.cs.ubc.ca/~rgoyal14/;;;;https://www.linkedin.com/in/mihail-sirotenko-33187913/;https://faculty.ucmerced.edu/mhyang/;http://tobw.net;http://boqinggong.info;http://www.cs.ubc.ca/~lsigal", "dblp": "247/1182.html;;191/6034;;http://dblp.uni-trier.de/pers/hd/a/Agustsson:Eirikur;;263/7266;79/3711.html;71/6931;29/7457;09/4991", "google_scholar": "v19p_0oAAAAJ;;R0GD2lEAAAAJ;5ytxR10AAAAJ;https://scholar.google.ch/citations?user=Uhvyua4AAAAJ;l6e9JeEAAAAJ;IpGXRaAAAAAJ;p9-ohHsAAAAJ;US56Kw8AAAAJ;lv9ZeVUAAAAJ;P2mG6rcAAAAJ", "orcid": ";;;;;;;0000-0003-4848-2304;;;", "linkedin": ";;;;eirikuragustsson/;;;minghsuanyang/;;boqing-gong-46aa5821/;leonid-sigal-23723037", "or_profile": "~Nitesh_Bharadwaj_Gundavarapu1;~Luke_Friedman1;~Raghav_Goyal1;~Chaitra_Hegde1;~Eirikur_Agustsson1;~Sagar_M._Waghmare1;~Mikhail_Sirotenko1;~Ming-Hsuan_Yang1;~Tobias_Weyand3;~Boqing_Gong1;~Leonid_Sigal1", "aff": "Google;Google;University of British Columbia;Google;Google;Google;Google DeepMind;University of California at Merced;Google;Google;University of British Columbia", "aff_domain": "google.com;google.com;cs.ubc.ca;google.com;google.com;google.com;google.com;umcerced.edu;google.com;google.com;ubc.ca", "position": "Researcher;Researcher;PhD student;Researcher;Researcher;Researcher;TLM;Professor;Software Engineer;Research Scientist;Full Professor", "bibtex": "@inproceedings{\ngundavarapu2024extending,\ntitle={Extending Video Masked Autoencoders to 128 frames},\nauthor={Nitesh Bharadwaj Gundavarapu and Luke Friedman and Raghav Goyal and Chaitra Hegde and Eirikur Agustsson and Sagar M. Waghmare and Mikhail Sirotenko and Ming-Hsuan Yang and Tobias Weyand and Boqing Gong and Leonid Sigal},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=bFrNPlWchg}\n}", "github": "", "reviewers": "9WAZ;EcJb;UhsZ;LVuv", "pdf_size": 2719580, "rating": "3;5;6;6", "confidence": "4;4;5;4", "soundness": "4;3;4;3", "novelty": "2;3;3;3", "presentation": "4;2;4;2", "wc_summary": "44;86;72;84", "wc_strengths": "25;50;111;56", "wc_weaknesses": "37;92;40;214", "wc_questions": "19;24;25;4", "wc_limitations": "21;1;17;1", "wc_review": "146;253;265;359", "wc_reply_reviewers": "30;121;0;52", "wc_reply_authors": "564;930;121;286", "reply_reviewers": "1;1;0;1", "reply_authors": "4;5;3;3", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 1.0 ], "wc_summary_avg": [ 71.5, 16.75559608011604 ], "wc_strengths_avg": [ 60.5, 31.388692231439016 ], "wc_weaknesses_avg": [ 95.75, 71.68812663196047 ], "wc_questions_avg": [ 18.0, 8.396427811873332 ], "wc_limitations_avg": [ 10.0, 9.1104335791443 ], "wc_review_avg": [ 255.75, 75.49627474253283 ], "wc_reply_reviewers_avg": [ 50.75, 44.5610536230911 ], "wc_reply_authors_avg": [ 475.25, 306.587161342415 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.75, 0.82915619758885 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.4714045207910316, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17313230807460945270&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "google.com;google.com;cs.ubc.ca;google.com;google.com;google.com;google.com;umcerced.edu;google.com;google.com;ubc.ca", "author_num": 11, "aff_unique_index": "0;0;1;0;0;0;0;2;0;0;1", "aff_unique_norm": "Google;University of British Columbia;University of California, Merced", "aff_unique_dep": "Google;;", "aff_unique_url": "https://www.google.com;https://www.ubc.ca;https://www.ucmerced.edu", "aff_unique_abbr": "Google;UBC;UC Merced", "aff_campus_unique_index": "0;0;0;0;0;2;0;0", "aff_campus_unique": "Mountain View;;Merced", "aff_country_unique_index": "0;0;1;0;0;0;2;0;0;0;1", "aff_country_unique": "United States;Canada;United Kingdom" }, { "title": "Opponent Modeling with In-context Search", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94501", "id": "bGhsbfyg3b", "proceeding": "", "pdf": "https://openreview.net/pdf?id=bGhsbfyg3b", "openreview": "https://openreview.net/forum?id=bGhsbfyg3b", "poster": "/media/PosterPDFs/NeurIPS%202024/94501.png?t=1731487527.334532", "project": "", "author_site": "Yuheng Jing, Bingyun Liu, Kai Li, Yifan Zang, Haobo Fu, Qiang Fu, Junliang Xing, Jian Cheng", "tldr": "", "abstract": "Opponent modeling is a longstanding research topic aimed at enhancing decision-making by modeling information about opponents in multi-agent environments. However, existing approaches often face challenges such as having difficulty generalizing to unknown opponent policies and conducting unstable performance. To tackle these challenges, we propose a novel approach based on in-context learning and decision-time search named Opponent Modeling with In-context Search (OMIS). OMIS leverages in-context learning-based pretraining to train a Transformer model for decision-making. It consists of three in-context components: an actor learning best responses to opponent policies, an opponent imitator mimicking opponent actions, and a critic estimating state values. When testing in an environment that features unknown non-stationary opponent agents, OMIS uses pretrained in-context components for decision-time search to refine the actor's policy. Theoretically, we prove that under reasonable assumptions, OMIS without search converges in opponent policy recognition and has good generalization properties; with search, OMIS provides improvement guarantees, exhibiting performance stability. Empirically, in competitive, cooperative, and mixed environments, OMIS demonstrates more effective and stable adaptation to opponents than other approaches. See our project website at https://sites.google.com/view/nips2024-omis.", "keywords": "Opponent Modeling;In-context Learning;Search", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/2e4714a69eafdbd8e8d16b2a14213e2235df9adb.zip", "author": "Yuheng Jing;Bingyun Liu;Kai Li;Yifan Zang;Haobo Fu;QIANG FU;Junliang Xing;Jian Cheng", "authorids": "~Yuheng_Jing1;~Bingyun_Liu1;~Kai_Li2;~Yifan_Zang1;~Haobo_Fu2;~QIANG_FU8;~Junliang_Xing1;~Jian_Cheng7", "gender": "M;M;M;M;M;M;M;", "homepage": ";;;;;http://people.ucas.ac.cn/~jlxing?language=en;https://people.ucas.ac.cn/~chengjian?language=en;https://github.com/liuby26", "dblp": "382/3906;181/2853;269/4608;85/8571;;43/7659.html;14/6145-1;", "google_scholar": ";_cY_PXgAAAAJ;;LFdJXNcAAAAJ;gANaxT0AAAAJ;jSwNd3MAAAAJ;ZGCIUJ8AAAAJ;", "orcid": ";;;;;0000-0001-6801-0510;0000-0003-1289-2758;", "linkedin": "jingyuheng;;;haobo-fu-382b0784/;;https://www.linkedin.cn/incareer/in/ACoAAAvlU14B40ZWH1pxg5JJDtQ6LlgMYkp0e5s;;", "or_profile": "~Yuheng_Jing1;~Kai_Li2;~Yifan_Zang1;~Haobo_Fu2;~QIANG_FU8;~Junliang_Xing1;~Jian_Cheng7;~Liu_Bingyun1", "aff": "Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;University of Chinese Academy of Sciences;Tencent AI Lab;Tencent AI Lab;Tsinghua University;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences", "aff_domain": "ia.ac.cn;ia.ac.cn;ucas.ac.cn;tencent.com;tencent.com;tsinghua.edu.cn;ia.ac.cn;ia.ac.cn", "position": "PhD student;Associate Professor;PhD student;Principal Researcher;Principal Researcher;Full Professor;Full Professor;MS student", "bibtex": "@inproceedings{\njing2024opponent,\ntitle={Opponent Modeling with In-context Search},\nauthor={Yuheng Jing and Bingyun Liu and Kai Li and Yifan Zang and Haobo Fu and QIANG FU and Junliang Xing and Jian Cheng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=bGhsbfyg3b}\n}", "github": "", "reviewers": "YkYd;G1Ke;xSNm;eKwk", "pdf_size": 3837125, "rating": "4;6;6;7", "confidence": "4;4;3;2", "soundness": "3;3;2;3", "novelty": "2;2;2;3", "presentation": "4;3;1;3", "wc_summary": "139;115;91;54", "wc_strengths": "126;58;27;49", "wc_weaknesses": "179;66;59;73", "wc_questions": "135;130;346;36", "wc_limitations": "6;14;3;1", "wc_review": "585;383;526;213", "wc_reply_reviewers": "334;15;842;38", "wc_reply_authors": "1963;338;3465;50", "reply_reviewers": "1;1;2;2", "reply_authors": "5;3;7;2", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 99.75, 31.395660528168538 ], "wc_strengths_avg": [ 65.0, 36.979724174201195 ], "wc_weaknesses_avg": [ 94.25, 49.18015351745051 ], "wc_questions_avg": [ 161.75, 113.45125605298516 ], "wc_limitations_avg": [ 6.0, 4.949747468305833 ], "wc_review_avg": [ 426.75, 143.61123737368186 ], "wc_reply_reviewers_avg": [ 307.25, 333.3836941123546 ], "wc_reply_authors_avg": [ 1454.0, 1371.1194331640115 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 4.25, 1.920286436967152 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.7608859102526822, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11918346035370477937&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "ia.ac.cn;ia.ac.cn;ucas.ac.cn;tencent.com;tencent.com;tsinghua.edu.cn;ia.ac.cn;ia.ac.cn", "author_num": 8, "aff_unique_index": "0;0;1;2;2;3;0;0", "aff_unique_norm": "Chinese Academy of Sciences;University of Chinese Academy of Sciences;Tencent;Tsinghua University", "aff_unique_dep": "Institute of Automation;;Tencent AI Lab;", "aff_unique_url": "http://www.ia.cas.cn;http://www.ucas.ac.cn;https://ai.tencent.com;https://www.tsinghua.edu.cn", "aff_unique_abbr": "CAS;UCAS;Tencent AI Lab;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Stability and Generalization of Asynchronous SGD: Sharper Bounds Beyond Lipschitz and Smoothness", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94500", "id": "bHP9hX4SvI", "proceeding": "", "pdf": "https://openreview.net/pdf?id=bHP9hX4SvI", "openreview": "https://openreview.net/forum?id=bHP9hX4SvI", "poster": "/media/PosterPDFs/NeurIPS%202024/94500.png?t=1729478048.3336904", "project": "", "author_site": "Xiaoge Deng, Tao Sun, Shengwei Li, Dongsheng Li, Xicheng Lu", "tldr": "", "abstract": "Asynchronous stochastic gradient descent (ASGD) has evolved into an indispensable optimization algorithm for training modern large-scale distributed machine learning tasks. Therefore, it is imperative to explore the generalization performance of the ASGD algorithm. However, the existing results are either pessimistic and vacuous or restricted by strict assumptions that fail to reveal the intrinsic impact of asynchronous training on generalization. In this study, we establish sharper stability and generalization bounds for ASGD under much weaker assumptions. Firstly, this paper studies the on-average model stability of ASGD and provides a non-vacuous upper bound on the generalization error, without relying on the Lipschitz assumption. Furthermore, we investigate the excess generalization error of the ASGD algorithm, revealing the effects of asynchronous delay, model initialization, number of training samples and iterations on generalization performance. Secondly, for the first time, this study explores the generalization performance of ASGD in the non-smooth case. We replace smoothness with the much weaker H\u00f6lder continuous assumption and achieve similar generalization results as in the smooth case. Finally, we validate our theoretical findings by training numerous machine learning models, including convex problems and non-convex tasks in computer vision and natural language processing.", "keywords": "Asynchronous SGD;algorithm stability;generalization error;excess generalization error", "primary_area": "learning_theory", "supplementary_material": "/attachment/625260c866dde6a01430b4eaf41c2207f7e676ce.zip", "author": "Xiaoge Deng;Tao Sun;Shengwei Li;Dongsheng Li;Xicheng Lu", "authorids": "~Xiaoge_Deng1;~Tao_Sun7;~Shengwei_Li1;~Dongsheng_Li3;~Xicheng_Lu1", "gender": "M;M;M;;", "homepage": "https://xiaogdeng.github.io/;;;;", "dblp": "262/5000;74/3590-5;00/8231;;", "google_scholar": ";fPNZpAe5WXIC;;;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Xiaoge_Deng1;~Tao_Sun7;~Shengwei_Li1;~Dongsheng_Li3;~Xicheng_Lu1", "aff": "National University of Defense Technology;National University of Defense Technology;National University of Defense Technology;;", "aff_domain": "nudt.edu.cn;nudt.edu.cn;nudt.edu.cn;;", "position": "PhD student;Associate Professor;PhD student;;", "bibtex": "@inproceedings{\ndeng2024stability,\ntitle={Stability and Generalization of Asynchronous {SGD}: Sharper Bounds Beyond Lipschitz and Smoothness},\nauthor={Xiaoge Deng and Tao Sun and Shengwei Li and Dongsheng Li and Xicheng Lu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=bHP9hX4SvI}\n}", "github": "", "reviewers": "yEhD;pPuN;CbYp;UzMC", "pdf_size": 959539, "rating": "5;5;5;6", "confidence": "4;5;5;4", "soundness": "3;2;3;3", "novelty": "3;3;2;3", "presentation": "3;3;3;4", "wc_summary": "23;70;68;52", "wc_strengths": "9;38;30;55", "wc_weaknesses": "207;105;176;66", "wc_questions": "54;47;33;113", "wc_limitations": "15;104;1;1", "wc_review": "308;364;308;287", "wc_reply_reviewers": "11;21;15;20", "wc_reply_authors": "105;105;143;143", "reply_reviewers": "1;1;1;1", "reply_authors": "3;3;4;4", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 53.25, 18.806581294855267 ], "wc_strengths_avg": [ 33.0, 16.537835408541227 ], "wc_weaknesses_avg": [ 138.5, 55.85024619462299 ], "wc_questions_avg": [ 61.75, 30.53993287484437 ], "wc_limitations_avg": [ 30.25, 42.96146529158427 ], "wc_review_avg": [ 316.75, 28.595235617144336 ], "wc_reply_reviewers_avg": [ 16.75, 4.02336923485777 ], "wc_reply_authors_avg": [ 124.0, 19.0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.5, 0.5 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14224009434821616027&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "nudt.edu.cn;nudt.edu.cn;nudt.edu.cn;;", "author_num": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "National University of Defense Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.nudt.edu.cn/", "aff_unique_abbr": "NUDT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Discovering Creative Behaviors through DUPLEX: Diverse Universal Features for Policy Exploration", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94499", "id": "bHgkT0sUy6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=bHgkT0sUy6", "openreview": "https://openreview.net/forum?id=bHgkT0sUy6", "poster": "/media/PosterPDFs/NeurIPS%202024/94499.png?t=1730283524.7567542", "project": "", "author_site": "Borja G. Leon, Francesco Riccio, Kaushik Subramanian, Peter Wurman, Peter Stone", "tldr": "", "abstract": "The ability to approach the same problem from different angles is a cornerstone of human intelligence that leads to robust solutions and effective adaptation to problem variations. In contrast, current RL methodologies tend to lead to policies that settle on a single solution to a given problem, making them brittle to problem variations. Replicating human flexibility in reinforcement learning agents is the challenge that we explore in this work. We tackle this challenge by extending state-of-the-art approaches to introduce DUPLEX, a method that explicitly defines a diversity objective with constraints and makes robust estimates of policies\u2019 expected behavior through successor features. The trained agents can (i) learn a diverse set of near-optimal policies in complex highly-dynamic environments and (ii) exhibit competitive and diverse skills in out-of-distribution (OOD) contexts. Empirical results indicate that DUPLEX improves over previous methods and successfully learns competitive driving styles in a hyper-realistic simulator (i.e., GranTurismo \u2122 7) as well as diverse and effective policies in several multi-context robotics MuJoCo simulations with OOD gravity forces and height limits. To the best of our knowledge, our method is the first to achieve diverse solutions in complex driving simulators and OOD robotic contexts. DUPLEX agents demonstrating diverse behaviors can be found at https://ai.sony/publications/Discovering-Creative-Behaviors-through-DUPLEX-Diverse-Universal-Features-for-Policy-Exploration/.", "keywords": "Reinforcement Learning;Policy Diversity;Generalization", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Borja G. Le\u00f3n;Francesco Riccio;Kaushik Subramanian;Peter R. Wurman;Peter Stone", "authorids": "~Borja_G._Le\u00f3n1;~Francesco_Riccio1;~Kaushik_Subramanian2;~Peter_R._Wurman1;~Peter_Stone1", "gender": "M;M;M;M;M", "homepage": "https://www.doc.ic.ac.uk/~bg19/;;https://kausubbu.github.io/;http://pwurman.org;http://www.cs.utexas.edu/~pstone", "dblp": "259/1299;83/2934;;79/5768;s/PeterStone", "google_scholar": "https://scholar.google.es/citations?user=sJiadiMAAAAJ;;;8nhpK5IAAAAJ;qnwjcfAAAAAJ", "orcid": ";;;my-orcid?orcid=0000-0001-9349-0624;0000-0002-6795-420X", "linkedin": "borja-gonzalez-leon/;;kausubbu/;pwurman/;", "or_profile": "~Borja_G._Le\u00f3n1;~Francesco_Riccio1;~Kaushik_Subramanian2;~Peter_R._Wurman1;~Peter_Stone1", "aff": "Imperial College London;Sony Europe Ltd.;Sony Europe Ltd.;;University of Texas, Austin", "aff_domain": "imperial.ac.uk;sony.com;sony.com;;utexas.edu", "position": "PhD student;Researcher;Researcher;;Full Professor", "bibtex": "@inproceedings{\nle{\\'o}n2024discovering,\ntitle={Discovering Creative Behaviors through {DUPLEX}: Diverse Universal Features for Policy Exploration},\nauthor={Borja G. Le{\\'o}n and Francesco Riccio and Kaushik Subramanian and Peter R. Wurman and Peter Stone},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=bHgkT0sUy6}\n}", "github": "", "reviewers": "XDei;9gQo;Eqtv;zFsC;VFg8", "pdf_size": 1532978, "rating": "5;5;5;5;6", "confidence": "4;4;3;3;3", "soundness": "3;2;2;2;3", "novelty": "3;1;2;2;3", "presentation": "3;3;3;2;2", "wc_summary": "123;85;71;38;76", "wc_strengths": "92;30;81;42;45", "wc_weaknesses": "469;125;166;502;51", "wc_questions": "6;172;2;9;56", "wc_limitations": "23;3;8;4;24", "wc_review": "713;415;328;595;252", "wc_reply_reviewers": "205;86;0;142;0", "wc_reply_authors": "219;381;0;654;0", "reply_reviewers": "2;1;0;2;0", "reply_authors": "2;2;1;2;1", "rating_avg": [ 5.2, 0.39999999999999997 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "novelty_avg": [ 2.2, 0.7483314773547882 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 78.6, 27.29542086138259 ], "wc_strengths_avg": [ 58.0, 24.058262613912916 ], "wc_weaknesses_avg": [ 262.6, 185.9855908397207 ], "wc_questions_avg": [ 49.0, 64.55385348683686 ], "wc_limitations_avg": [ 12.4, 9.221713506718803 ], "wc_review_avg": [ 460.6, 170.26168095023613 ], "wc_reply_reviewers_avg": [ 86.6, 80.10892584475215 ], "wc_reply_authors_avg": [ 250.8, 247.52163541799735 ], "reply_reviewers_avg": [ 1.0, 0.8944271909999159 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.408248290463863, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13461109912847067373&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 3, "email": "imperial.ac.uk;sony.com;sony.com;;utexas.edu", "author_num": 5, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Imperial College London;Sony Europe;University of Texas at Austin", "aff_unique_dep": ";;", "aff_unique_url": "https://www.imperial.ac.uk;https://www.sony.eu;https://www.utexas.edu", "aff_unique_abbr": "ICL;Sony Europe;UT Austin", "aff_campus_unique_index": "1", "aff_campus_unique": ";Austin", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "CulturePark: Boosting Cross-cultural Understanding in Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94498", "id": "bIFHHf2RoD", "proceeding": "", "pdf": "https://openreview.net/pdf?id=bIFHHf2RoD", "openreview": "https://openreview.net/forum?id=bIFHHf2RoD", "poster": "", "project": "", "author_site": "Cheng Li, Damien Teney, Linyi Yang, Qingsong Wen, Xing Xie, Jindong Wang", "tldr": "", "abstract": "Cultural bias is pervasive in many large language models (LLMs), largely due to the deficiency of data representative of different cultures.\nTypically, cultural datasets and benchmarks are constructed either by extracting subsets of existing datasets or by aggregating from platforms such as Wikipedia and social media.\nHowever, these approaches are highly dependent on real-world data and human annotations, making them costly and difficult to scale.\nInspired by cognitive theories on social communication, this paper introduces CulturePark, an LLM-powered multi-agent communication framework for cultural data collection.\nCulturePark simulates cross-cultural human communication with LLM-based agents playing roles in different cultures.\nIt generates high-quality cross-cultural dialogues encapsulating human beliefs, norms, and customs.\nUsing CulturePark, we generated 41,000 cultural samples to fine-tune eight culture-specific LLMs.\nWe evaluated these models across three downstream tasks: content moderation, cultural alignment, and cultural education.\nResults show that for content moderation, our GPT-3.5-based models either match or outperform GPT-4 on $41$ datasets. Regarding cultural alignment, our models surpass GPT-4 on Hofstede's VSM 13 framework.\nFurthermore, for cultural education of human participants, our models demonstrate superior outcomes in both learning efficacy and user experience compared to GPT-4. CulturePark proves an important step in addressing cultural bias and advancing the democratization of AI, highlighting the critical role of culturally inclusive data in model training. Code is released at https://github.com/Scarelette/CulturePark.", "keywords": "Culture bias;large language model;fine-tuning", "primary_area": "fairness", "supplementary_material": "", "author": "CHENG LI;Damien Teney;Linyi Yang;Qingsong Wen;Xing Xie;Jindong Wang", "authorids": "~CHENG_LI26;~Damien_Teney1;~Linyi_Yang1;~Qingsong_Wen2;~Xing_Xie3;~Jindong_Wang4", "gender": ";M;;M;M;M", "homepage": "https://scholar.google.com/citations?user=083GCIwAAAAJ&hl=zh-CN;https://www.damienteney.info;https://yanglinyi.github.io/;http://research.microsoft.com/en-us/people/xingx/;https://jd92.wang/;https://sites.google.com/site/qingsongwen8/", "dblp": ";62/10068;218/8007;08/6809-1;19/2969-1;27/561", "google_scholar": "083GCIwAAAAJ;https://scholar.google.com.au/citations?user=iS_jP_3dpD8J;go3sFxcAAAAJ;5EQfAFIAAAAJ;hBZ_tKsAAAAJ;vjPJvwYAAAAJ", "orcid": ";;;0000-0002-8608-8482;0000-0002-4833-0880;0000-0003-4516-2524", "linkedin": ";;;xingx/;jindong-wang/;qingsong-wen-22814156/", "or_profile": "~CHENG_LI26;~Damien_Teney1;~Linyi_Yang1;~Xing_Xie3;~Jindong_Wang4;~Qingsong_Wen1", "aff": "Department of Computer Science, University of Washington;Idiap Research Institute;Westlake University;Microsoft Research Asia;Microsoft Research;Squirrel Ai Learning", "aff_domain": "cs.washington.edu;idiap.ch;westlake.edu.cn;microsoft.com;microsoft.com;squirrelai.com", "position": "Intern;Researcher;Researcher;Senior Principal Researcher;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nli2024culturepark,\ntitle={CulturePark: Boosting Cross-cultural Understanding in Large Language Models},\nauthor={CHENG LI and Damien Teney and Linyi Yang and Qingsong Wen and Xing Xie and Jindong Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=bIFHHf2RoD}\n}", "github": "", "reviewers": "FthM;KH6K;7p2y;4gqo", "pdf_size": 1651331, "rating": "3;5;8;9", "confidence": "4;3;4;4", "soundness": "2;3;3;4", "novelty": "2;2;3;4", "presentation": "2;3;3;4", "wc_summary": "133;77;106;32", "wc_strengths": "22;34;95;17", "wc_weaknesses": "274;92;499;1", "wc_questions": "150;93;1;1", "wc_limitations": "11;1;50;1", "wc_review": "590;297;751;52", "wc_reply_reviewers": "489;106;24;0", "wc_reply_authors": "2136;528;252;0", "reply_reviewers": "2;1;1;0", "reply_authors": "8;2;3;1", "rating_avg": [ 6.25, 2.384848003542364 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 87.0, 37.42325480232846 ], "wc_strengths_avg": [ 42.0, 31.216982557575932 ], "wc_weaknesses_avg": [ 216.5, 190.42912067223332 ], "wc_questions_avg": [ 61.25, 63.53099637184986 ], "wc_limitations_avg": [ 15.75, 20.191272867256288 ], "wc_review_avg": [ 422.5, 268.78848561647874 ], "wc_reply_reviewers_avg": [ 154.75, 196.9408223299578 ], "wc_reply_authors_avg": [ 729.0, 833.5196458392568 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 3.5, 2.692582403567252 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.3026137663344012, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4120841208010054107&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "cs.washington.edu;idiap.ch;westlake.edu.cn;microsoft.com;microsoft.com;squirrelai.com", "author_num": 6, "aff_unique_index": "0;1;2;3;3;4", "aff_unique_norm": "University of Washington;Idiap Research Institute;Westlake University;Microsoft;Squirrel Ai Learning", "aff_unique_dep": "Department of Computer Science;;;Research;", "aff_unique_url": "https://www.washington.edu;https://www.idiap.ch;https://www.westlake.edu.cn;https://www.microsoft.com/en-us/research/group/asia;https://www.squirrelai.com/", "aff_unique_abbr": "UW;Idiap;WU;MSR Asia;", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Seattle;;Asia", "aff_country_unique_index": "0;1;2;2;0;2", "aff_country_unique": "United States;Switzerland;China" }, { "title": "GarmentLab: A Unified Simulation and Benchmark for Garment Manipulation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94497", "id": "bIRcf8i1kp", "proceeding": "", "pdf": "https://openreview.net/pdf?id=bIRcf8i1kp", "openreview": "https://openreview.net/forum?id=bIRcf8i1kp", "poster": "", "project": "", "author_site": "Haoran Lu, Ruihai Wu, Yitong Li, Sijie Li, Ziyu Zhu, Chuanruo Ning, Yan Zhao, Longzan Luo, Yuanpei Chen, Hao Dong", "tldr": "", "abstract": "Manipulating garments and fabrics has long been a critical endeavor in the development of home-assistant robots. However, due to complex dynamics and topological structures, garment manipulations pose significant challenges. Recent successes in reinforcement learning and vision-based methods offer promising avenues for learning garment manipulation. Nevertheless, these approaches are severely constrained by current benchmarks, which exhibit offer limited diversity of tasks and unrealistic simulation behavior. Therefore, we present GarmentLab, a content-rich benchmark and realistic simulation designed for deformable object and garment manipulation. Our benchmark encompasses a diverse range of garment types, robotic systems and manipulators. The abundant tasks in the benchmark further explores of the interactions between garments, deformable objects, rigid bodies, fluids, and human body. Moreover, by incorporating multiple simulation methods such as FEM and PBD, along with our proposed sim-to-real algorithms and real-world benchmark, we aim to significantly narrow the sim-to-real gap. We evaluate state-of-the-art vision methods, reinforcement learning, and imitation learning approaches on these tasks, highlighting the challenges faced by current algorithms, notably their limited generalization capabilities. Our proposed open-source environments and comprehensive analysis show promising boost to future research in garment manipulation by unlocking the full potential of these methods. We guarantee that we will open-source our code as soon as possible. You can watch the videos in supplementary files to learn more about the details of our work.", "keywords": "Garment Manipulation;Simulation;Benchmark", "primary_area": "robotics", "supplementary_material": "/attachment/b01ca617521b357eb4f35adab70ffa12b3f05357.zip", "author": "Haoran Lu;Ruihai Wu;Yitong Li;Sijie Li;Ziyu Zhu;Chuanruo Ning;Yan Shen;Longzan Luo;Yuanpei Chen;Hao Dong", "authorids": "~Haoran_Lu2;~Ruihai_Wu1;~Yitong_Li5;~Sijie_Li3;~Ziyu_Zhu2;~Chuanruo_Ning1;~Yan_Shen3;~Longzan_Luo1;~Yuanpei_Chen2;~Hao_Dong3", "gender": "M;M;M;Not Specified;F;M;M;M;M;F", "homepage": "https://luhr2003.github.io/;https://warshallrho.github.io/;https://github.com/;https://planarg.cn;https://github.com/AlwaySleepy;https://tritiumr.github.io;https://llllllz666.github.io/longzan.github.io/;https://cypypccpy.github.io/;https://zsdonghao.github.io;https://sxy7147.github.io", "dblp": ";248/8028.html;;;;342/8955;;1234567;14/1525-3.html;88/5320-35", "google_scholar": "wNDTItAAAAAJ;https://scholar.google.com/citations?hl=en;;;;jnLq85IAAAAJ;;https://scholar.google.com/citations?hl=en;xLFL4sMAAAAJ;iIs4TDMAAAAJ", "orcid": ";;;;;;;0000-0002-0033-492X;0000-0003-2261-9122;", "linkedin": ";;;;;;;;;", "or_profile": "~Haoran_Lu2;~Ruihai_Wu1;~Yitong_Li5;~Sijie_Li3;~Ziyu_Zhu2;~Chuanruo_Ning1;~Longzan_Luo1;~Yuanpei_Chen2;~Hao_Dong3;~Yan_Zhao5", "aff": "Peking University;Peking University;Tsinghua University;Peking University;Peking University;Peking University;Peking University;PsiRobot;Peking University;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;mails.tsinghua.edu.cn;stu.pku.edu.cn;stu.pku.edu.cn;pku.edu.cn;stu.pku.edu.cn;psibot.ai;pku.edu.cn;pku.edu.cn", "position": "Undergrad student;PhD student;Undergrad student;Undergrad student;Undergrad student;Undergrad student;Undergrad student;Researcher;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nlu2024garmentlab,\ntitle={GarmentLab: A Unified Simulation and Benchmark for Garment Manipulation},\nauthor={Haoran Lu and Ruihai Wu and Yitong Li and Sijie Li and Ziyu Zhu and Chuanruo Ning and Yan Shen and Longzan Luo and Yuanpei Chen and Hao Dong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=bIRcf8i1kp}\n}", "github": "", "reviewers": "6kGg;1QRi;wEM9;E1Nw", "pdf_size": 6314721, "rating": "6;6;6;7", "confidence": "3;4;4;3", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;2;3;3", "wc_summary": "59;29;93;30", "wc_strengths": "66;48;59;57", "wc_weaknesses": "32;5;447;63", "wc_questions": "2;143;5;2", "wc_limitations": "1;5;1;22", "wc_review": "160;230;605;174", "wc_reply_reviewers": "17;0;24;33", "wc_reply_authors": "0;35;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 52.75, 26.176086414894034 ], "wc_strengths_avg": [ 57.5, 6.422616289332565 ], "wc_weaknesses_avg": [ 136.75, 180.29472399379856 ], "wc_questions_avg": [ 38.0, 60.63414879422321 ], "wc_limitations_avg": [ 7.25, 8.671072598012312 ], "wc_review_avg": [ 292.25, 182.4559878436441 ], "wc_reply_reviewers_avg": [ 18.5, 12.093386622447824 ], "wc_reply_authors_avg": [ 8.75, 15.155444566227676 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=334725050833699930&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "pku.edu.cn;pku.edu.cn;mails.tsinghua.edu.cn;stu.pku.edu.cn;stu.pku.edu.cn;pku.edu.cn;stu.pku.edu.cn;psibot.ai;pku.edu.cn;pku.edu.cn", "author_num": 10, "aff_unique_index": "0;0;1;0;0;0;0;2;0;0", "aff_unique_norm": "Peking University;Tsinghua University;PsiRobot", "aff_unique_dep": ";;", "aff_unique_url": "http://www.pku.edu.cn;https://www.tsinghua.edu.cn;", "aff_unique_abbr": "Peking U;THU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "China;" }, { "title": "Multilinear Mixture of Experts: Scalable Expert Specialization through Factorization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94496", "id": "bIa03mAtxQ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=bIa03mAtxQ", "openreview": "https://openreview.net/forum?id=bIa03mAtxQ", "poster": "/media/PosterPDFs/NeurIPS%202024/94496.png?t=1733603191.9508264", "project": "", "author_site": "James Oldfield, Markos Georgopoulos, Grigorios Chrysos, Christos Tzelepis, Yannis Panagakis, Mihalis Nicolaou, Jiankang Deng, Ioannis Patras", "tldr": "", "abstract": "The Mixture of Experts (MoE) paradigm provides a powerful way to decompose dense layers into smaller, modular computations often more amenable to human interpretation, debugging, and editability. However, a major challenge lies in the computational cost of scaling the number of experts high enough to achieve fine-grained specialization. In this paper, we propose the Multilinear Mixture of Experts (\u03bcMoE) layer to address this, focusing on vision models. \u03bcMoE layers enable scalable expert specialization by performing an implicit computation on prohibitively large weight tensors entirely in factorized form. Consequently, \u03bcMoEs (1) avoid the restrictively high inference-time costs of dense MoEs, yet (2) do not inherit the training issues of the popular sparse MoEs' discrete (non-differentiable) expert routing. We present both qualitative and quantitative evidence that scaling \u03bcMoE layers when fine-tuning foundation models for vision tasks leads to more specialized experts at the class-level, further enabling manual bias correction in CelebA attribute classification. Finally, we show qualitative results demonstrating the expert specialism achieved when pre-training large GPT2 and MLP-Mixer models with parameter-matched \u03bcMoE blocks at every layer, maintaining comparable accuracy. Our code is available at: https://github.com/james-oldfield/muMoE.", "keywords": "interpretability;mixture of experts", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "James Oldfield;Markos Georgopoulos;Grigorios Chrysos;Christos Tzelepis;Yannis Panagakis;Mihalis Nicolaou;Jiankang Deng;Ioannis Patras", "authorids": "~James_Oldfield1;~Markos_Georgopoulos1;~Grigorios_Chrysos1;~Christos_Tzelepis2;~Yannis_Panagakis1;~Mihalis_Nicolaou1;~Jiankang_Deng1;~Ioannis_Patras2", "gender": ";;M;;;Not Specified;M;M", "homepage": "https://james-oldfield.github.io/;;https://grigorisg9gr.github.io/;;;https://mihalisan.github.io;https://jiankangdeng.github.io/;http://www.eecs.qmul.ac.uk/~ioannisp/", "dblp": "164/2444-1;197/6876;75/6117-2;;;32/8615;156/7808;18/1556", "google_scholar": "h5NoWGQAAAAJ;id7vw0UAAAAJ;1bU041kAAAAJ;;;R9x_bZ8AAAAJ;Z_UoQFsAAAAJ;https://scholar.google.com.tw/citations?user=OBYLxRkAAAAJ", "orcid": ";;;;;;0000-0002-3709-6216;0000-0003-3913-4738", "linkedin": ";;;;;;jiankang-deng-b45b21b4/?originalSubdomain=uk;ioannis-patras-1053767/", "or_profile": "~James_Oldfield1;~Markos_Georgopoulos1;~Grigorios_Chrysos1;~Christos_Tzelepis2;~Yannis_Panagakis1;~Mihalis_Nicolaou1;~Jiankang_Deng1;~Ioannis_Patras2", "aff": "Huawei Technologies Research & Development (UK) ;Meta;University of Wisconsin - Madison;;;The Cyprus Institute;Imperial College London;Queen Mary, University of London", "aff_domain": "huawei.com;meta.com;wisc.edu;;;cyi.ac.cy;imperial.ac.uk;qmul.ac.uk", "position": "Intern;Researcher;Assistant Professor;;;Associate Professor;Lecturer;Full Professor", "bibtex": "@inproceedings{\noldfield2024multilinear,\ntitle={Multilinear Mixture of Experts: Scalable Expert Specialization through Factorization},\nauthor={James Oldfield and Markos Georgopoulos and Grigorios Chrysos and Christos Tzelepis and Yannis Panagakis and Mihalis Nicolaou and Jiankang Deng and Ioannis Patras},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=bIa03mAtxQ}\n}", "github": "", "reviewers": "Su68;YTQ3;XreH;qC51;1sqG", "pdf_size": 21256403, "rating": "4;5;5;6;7", "confidence": "4;5;5;5;4", "soundness": "3;2;2;2;4", "novelty": "3;1;2;2;4", "presentation": "3;4;2;2;3", "wc_summary": "63;78;53;96;76", "wc_strengths": "101;111;39;37;109", "wc_weaknesses": "307;366;14;169;113", "wc_questions": "1;77;62;84;42", "wc_limitations": "11;12;1;9;15", "wc_review": "483;644;169;395;355", "wc_reply_reviewers": "0;155;27;167;0", "wc_reply_authors": "157;592;71;646;0", "reply_reviewers": "0;1;1;2;0", "reply_authors": "3;4;2;4;1", "rating_avg": [ 5.4, 1.0198039027185568 ], "confidence_avg": [ 4.6, 0.4898979485566356 ], "soundness_avg": [ 2.6, 0.8 ], "novelty_avg": [ 2.4, 1.019803902718557 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 73.2, 14.579437574886077 ], "wc_strengths_avg": [ 79.4, 33.97410778813772 ], "wc_weaknesses_avg": [ 193.8, 128.01468665743005 ], "wc_questions_avg": [ 53.2, 29.808723555362114 ], "wc_limitations_avg": [ 9.6, 4.715930449020639 ], "wc_review_avg": [ 409.2, 155.82862381475363 ], "wc_reply_reviewers_avg": [ 69.8, 75.21010570395444 ], "wc_reply_authors_avg": [ 293.2, 271.16002655258757 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 2.8, 1.16619037896906 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.08006407690254352, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7363299750312637222&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "huawei.com;meta.com;wisc.edu;;;cyi.ac.cy;imperial.ac.uk;qmul.ac.uk", "author_num": 8, "aff_unique_index": "0;1;2;3;4;5", "aff_unique_norm": "Huawei;Meta;University of Wisconsin-Madison;Cyprus Institute;Imperial College London;Queen Mary, University of London", "aff_unique_dep": "Research & Development;Meta Platforms, Inc.;;;;", "aff_unique_url": "https://www.huawei.com/uk;https://meta.com;https://www.wisc.edu;https://www.cyi.ac.cy;https://www.imperial.ac.uk;https://www.qmul.ac.uk", "aff_unique_abbr": "Huawei;Meta;UW-Madison;CyI;ICL;QMUL", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Madison;London", "aff_country_unique_index": "0;1;1;2;0;0", "aff_country_unique": "United Kingdom;United States;Cyprus" }, { "title": "VisMin: Visual Minimal-Change Understanding", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94495", "id": "bJddXCyosA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=bJddXCyosA", "openreview": "https://openreview.net/forum?id=bJddXCyosA", "poster": "", "project": "", "author_site": "Rabiul Awal, Saba Ahmadi, LE ZHANG, Aishwarya Agrawal", "tldr": "", "abstract": "Fine-grained understanding of objects, attributes, and relationships between objects is crucial for visual-language models (VLMs). To evaluate VLMs' fine-grained understanding, existing benchmarks primarily focus on evaluating VLMs' capability to distinguish between two very similar captions given an image. In this paper, our focus is on evaluating VLMs' capability to distinguish between two very similar images given a caption. To this end, we introduce a new, challenging benchmark termed Visual Minimal-Change Understanding (VisMin), which requires models to predict the correct image-caption match given two images and two captions. Importantly, the image pair (as well as the caption pair) contains minimal changes, i.e., between the two images (as well as between the two captions), only one aspect changes at a time from among the following possible types of changes: object, attribute, count, and spatial relation. These four types of minimal changes are specifically designed to test the models' understanding of objects, attributes of objects (such as color, material, shape), counts of objects, and spatial relationships between objects. To curate our benchmark, we built an automatic pipeline using large language models and diffusion models, followed by a rigorous 4-step verification process by human annotators. Empirical experiments reveal that current VLMs exhibit notable deficiencies in understanding spatial relationships and counting abilities. Furthermore, leveraging the automated nature of our data creation process, we generate a large-scale training dataset, which we use to finetune CLIP (a foundational VLM) and Idefics2 (a multimodal large language model). Our findings show that both these models benefit significantly from fine-tuning on this data, as evident by marked improvements in fine-grained understanding across a wide range of benchmarks. Additionally, such fine-tuning improves CLIP's general image-text alignment capabilities too. All resources including the benchmark, the training data, and the finetuned model checkpoints will be released.", "keywords": "vision-language;fine-grained understanding;multimodal representation", "primary_area": "machine_vision", "supplementary_material": "", "author": "Rabiul Awal;Saba Ahmadi;Le Zhang;Aishwarya Agrawal", "authorids": "~Rabiul_Awal1;~Saba_Ahmadi2;~Le_Zhang6;~Aishwarya_Agrawal1", "gender": "M;;M;F", "homepage": "https://rabiul.me/;https://saba96.github.io/;https://lezhang7.github.io/;https://www.iro.umontreal.ca/~agrawal/", "dblp": ";;03/4043-12;163/2109.html", "google_scholar": "https://scholar.google.ca/citations?user=oOcLfUMAAAAJ;https://scholar.google.ca/citations?user=zhGGAZgAAAAJ;NqbBXAsAAAAJ;znH6xJ8AAAAJ", "orcid": ";;;", "linkedin": "rabiulawal/;saba-ahmadi/;;", "or_profile": "~Rabiul_Awal1;~Saba_Ahmadi2;~Le_Zhang6;~Aishwarya_Agrawal1", "aff": "Mila - Quebec AI Institute;Mila - Quebec Artificial Intelligence Institute;Mila - Quebec AI Institute & Universit\u00e9 de Montr\u00e9al;Google DeepMind", "aff_domain": "mila.quebec;mila.quebec;mila.umontreal.ca;google.com", "position": "Intern;Researcher;PhD student;Research Scientist ", "bibtex": "@inproceedings{\nawal2024vismin,\ntitle={VisMin: Visual Minimal-Change Understanding},\nauthor={Rabiul Awal and Saba Ahmadi and Le Zhang and Aishwarya Agrawal},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=bJddXCyosA}\n}", "github": "", "reviewers": "dNLV;Lkr8;a22a", "pdf_size": 29296633, "rating": "5;5;7", "confidence": "5;4;5", "soundness": "2;3;4", "novelty": "3;2;3", "presentation": "3;3;4", "wc_summary": "87;58;116", "wc_strengths": "74;62;65", "wc_weaknesses": "120;109;216", "wc_questions": "29;40;177", "wc_limitations": "6;26;82", "wc_review": "316;295;656", "wc_reply_reviewers": "9;30;289", "wc_reply_authors": "40;175;529", "reply_reviewers": "1;1;2", "reply_authors": "2;3;4", "rating_avg": [ 5.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 87.0, 23.678400846904054 ], "wc_strengths_avg": [ 67.0, 5.0990195135927845 ], "wc_weaknesses_avg": [ 148.33333333333334, 48.05783552715993 ], "wc_questions_avg": [ 82.0, 67.32508200267317 ], "wc_limitations_avg": [ 38.0, 32.16623488483952 ], "wc_review_avg": [ 422.3333333333333, 165.4495559243226 ], "wc_reply_reviewers_avg": [ 109.33333333333333, 127.33246072999437 ], "wc_reply_authors_avg": [ 248.0, 206.1989330719245 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 3.0, 0.816496580927726 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11530613671293686359&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "mila.quebec;mila.quebec;mila.umontreal.ca;google.com", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Quebec AI Institute;Quebec Artificial Intelligence Institute;Universit\u00e9 de Montr\u00e9al;Google", "aff_unique_dep": "AI Institute;Artificial Intelligence;Quebec AI Institute;Google DeepMind", "aff_unique_url": "https://mila.quebec;https://mila.quebec;https://www.umontreal.ca;https://deepmind.com", "aff_unique_abbr": "Mila;Mila;UdeM;DeepMind", "aff_campus_unique_index": "1", "aff_campus_unique": ";Montreal", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "Canada;United Kingdom" }, { "title": "Causal Contrastive Learning for Counterfactual Regression Over Time", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94494", "id": "bKOZYBJE4Z", "proceeding": "", "pdf": "https://openreview.net/pdf?id=bKOZYBJE4Z", "openreview": "https://openreview.net/forum?id=bKOZYBJE4Z", "poster": "/media/PosterPDFs/NeurIPS%202024/94494.png?t=1731584543.3406723", "project": "", "author_site": "Mouad EL Bouchattaoui, Myriam Tami, BENOIT LEPETIT, Paul-Henry Courn\u00e8de", "tldr": "", "abstract": "Estimating treatment effects over time holds significance in various domains, including precision medicine, epidemiology, economy, and marketing. This paper introduces a unique approach to counterfactual regression over time, emphasizing long-term predictions. Distinguishing itself from existing models like Causal Transformer, our approach highlights the efficacy of employing RNNs for long-term forecasting, complemented by Contrastive Predictive Coding (CPC) and Information Maximization (InfoMax). Emphasizing efficiency, we avoid the need for computationally expensive transformers. Leveraging CPC, our method captures long-term dependencies within time-varying confounders. Notably, recent models have disregarded the importance of invertible representation, compromising identification assumptions. To remedy this, we employ the InfoMax principle, maximizing a lower bound of mutual information between sequence data and its representation. Our method achieves state-of-the-art counterfactual estimation results using both synthetic and real-world data, marking the pioneering incorporation of Contrastive Predictive Encoding in causal inference.", "keywords": "Counterfactual Regression;Longitudinal Data;Contrastive Learning", "primary_area": "causal_inference", "supplementary_material": "/attachment/cb73b0ad25a0114240a1e70a4f5e1c99a25d4b01.zip", "author": "Mouad El Bouchattaoui;Myriam Tami;BENOIT LEPETIT;Paul-Henry Courn\u00e8de", "authorids": "~Mouad_El_Bouchattaoui1;~Myriam_Tami1;~BENOIT_LEPETIT1;~Paul-Henry_Courn\u00e8de1", "gender": "M;;;M", "homepage": ";https://myriamtami.github.io/;;", "dblp": ";228/8539;;01/2224.html", "google_scholar": "https://scholar.google.com/citations?view_op=search_authors;kavk5oUAAAAJ;;https://scholar.google.fr/scholar?hl=fr", "orcid": "0009-0000-8132-9825;;;0000-0001-7679-6197", "linkedin": "mouad-elbouchattaoui/?originalSubdomain=fr;;;", "or_profile": "~Mouad_El_Bouchattaoui1;~Myriam_Tami1;~BENOIT_LEPETIT1;~Paul-Henry_Courn\u00e8de1", "aff": "CentraleSupelec;CentraleSupelec;;Paris-Saclay University", "aff_domain": "centralesupelec.fr;centralesupelec.fr;;universite-paris-saclay.fr", "position": "PhD student;Associate Professor;;Full Professor", "bibtex": "@inproceedings{\nbouchattaoui2024causal,\ntitle={Causal Contrastive Learning for Counterfactual Regression Over Time},\nauthor={Mouad El Bouchattaoui and Myriam Tami and BENOIT LEPETIT and Paul-Henry Courn{\\`e}de},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=bKOZYBJE4Z}\n}", "github": "", "reviewers": "PYKS;Nj8s;DSyv;6PYm;pP4H;mdqp", "pdf_size": 952154, "rating": "5;6;6;6;6;6", "confidence": "3;3;4;3;4;2", "soundness": "3;3;3;3;3;3", "novelty": "2;3;3;3;3;2", "presentation": "2;3;3;3;3;3", "wc_summary": "116;20;47;58;103;47", "wc_strengths": "46;25;73;66;57;38", "wc_weaknesses": "194;89;158;119;115;130", "wc_questions": "37;19;70;324;126;98", "wc_limitations": "25;2;2;8;46;6", "wc_review": "418;155;350;575;447;319", "wc_reply_reviewers": "16;16;15;16;15;64", "wc_reply_authors": "18;18;18;21;17;51", "reply_reviewers": "1;1;1;1;1;1", "reply_authors": "2;2;2;2;2;3", "rating_avg": [ 5.833333333333333, 0.372677996249965 ], "confidence_avg": [ 3.1666666666666665, 0.6871842709362768 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.8333333333333335, 0.3726779962499649 ], "wc_summary_avg": [ 65.16666666666667, 33.582816373192344 ], "wc_strengths_avg": [ 50.833333333333336, 16.405453022970406 ], "wc_weaknesses_avg": [ 134.16666666666666, 33.67202927587756 ], "wc_questions_avg": [ 112.33333333333333, 101.13796957072498 ], "wc_limitations_avg": [ 14.833333333333334, 15.962629969887654 ], "wc_review_avg": [ 377.3333333333333, 128.5569480381706 ], "wc_reply_reviewers_avg": [ 23.666666666666668, 18.043158136965808 ], "wc_reply_authors_avg": [ 23.833333333333332, 12.212243401148246 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.1666666666666665, 0.3726779962499649 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.10846522890932811, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3012523891780339731&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "centralesupelec.fr;centralesupelec.fr;;universite-paris-saclay.fr", "author_num": 4, "aff_unique_index": "0;0;1", "aff_unique_norm": "CentraleSup\u00e9lec;Paris-Saclay University", "aff_unique_dep": ";", "aff_unique_url": "https://www.centralesupelec.fr;https://www.universite-paris-saclay.fr", "aff_unique_abbr": "CS;Paris-Saclay", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "France" }, { "title": "VQ-Map: Bird's-Eye-View Map Layout Estimation in Tokenized Discrete Space via Vector Quantization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94493", "id": "bKuxygBW2Y", "proceeding": "", "pdf": "https://openreview.net/pdf?id=bKuxygBW2Y", "openreview": "https://openreview.net/forum?id=bKuxygBW2Y", "poster": "/media/PosterPDFs/NeurIPS%202024/94493.png?t=1731385758.4428153", "project": "", "author_site": "Yiwei Zhang, Jin Gao, Fudong Ge, Guan Luo, Bing Li, ZHAO-XIANG ZHANG, Haibin Ling, Weiming Hu", "tldr": "", "abstract": "Bird's-eye-view (BEV) map layout estimation requires an accurate and full understanding of the semantics for the environmental elements around the ego car to make the results coherent and realistic. Due to the challenges posed by occlusion, unfavourable imaging conditions and low resolution, \\emph{generating} the BEV semantic maps corresponding to corrupted or invalid areas in the perspective view (PV) is appealing very recently. \\emph{The question is how to align the PV features with the generative models to facilitate the map estimation}. In this paper, we propose to utilize a generative model similar to the Vector Quantized-Variational AutoEncoder (VQ-VAE) to acquire prior knowledge for the high-level BEV semantics in the tokenized discrete space. Thanks to the obtained BEV tokens accompanied with a codebook embedding encapsulating the semantics for different BEV elements in the groundtruth maps, we are able to directly align the sparse backbone image features with the obtained BEV tokens from the discrete representation learning based on a specialized token decoder module, and finally generate high-quality BEV maps with the BEV codebook embedding serving as a bridge between PV and BEV. We evaluate the BEV map layout estimation performance of our model, termed VQ-Map, on both the nuScenes and Argoverse benchmarks, achieving 62.2/47.6 mean IoU for surround-view/monocular evaluation on nuScenes, as well as 73.4 IoU for monocular evaluation on Argoverse, which all set a new record for this map layout estimation task. The code and models are available on \\url{https://github.com/Z1zyw/VQ-Map}.", "keywords": "BEV map layout estimation;vector quantization;camera-only", "primary_area": "machine_vision", "supplementary_material": "", "author": "Yiwei Zhang;Jin Gao;Fudong Ge;Guan Luo;Bing Li;Zhaoxiang Zhang;Haibin Ling;Weiming Hu", "authorids": "~Yiwei_Zhang9;~Jin_Gao1;~Fudong_Ge1;~Guan_Luo1;~Bing_Li1;~Zhaoxiang_Zhang3;~Haibin_Ling1;~Weiming_Hu1", "gender": "M;M;M;M;M;M;M;M", "homepage": "https://github.com/Z1zyw;https://people.ucas.edu.cn/~jgao?language=en;;;http://www.escience.cn/people/BingLi;http://zhaoxiangzhang.net;https://www3.cs.stonybrook.edu/~hling/;http://weiminghu.people-ai.net/", "dblp": ";;;60/6918.html;13/2692-1;55/2285-1.html;93/3488;", "google_scholar": ";W1o3B-0AAAAJ;https://scholar.google.com/citations?hl=zh-CN;FQxHDqsAAAAJ;;qxWfV6cAAAAJ;https://scholar.google.com/citations?hl=en;", "orcid": ";;;;;;;0000-0001-9237-8825", "linkedin": ";;;;;;;", "or_profile": "~Yiwei_Zhang9;~Jin_Gao1;~Fudong_Ge1;~Guan_Luo1;~Bing_Li1;~Zhaoxiang_Zhang3;~Haibin_Ling1;~Weiming_Hu1", "aff": "Institute of Automation, Chinese Academy of Sciences;Institute of automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;State University of New York, Stony Brook;Institute of automation, Chinese academy of science", "aff_domain": "ia.ac.cn;ia.ac.cn;ia.ac.cn;ia.ac.cn;ia.ac.cn;ia.ac.cn;stonybrook.edu;nlpr.ia.ac.cn", "position": "PhD student;Associate Professor;PhD student;Associate Professor;Full Professor;Full Professor;Professor;Full Professor", "bibtex": "@inproceedings{\nzhang2024vqmap,\ntitle={{VQ}-Map: Bird's-Eye-View Map Layout Estimation in Tokenized Discrete Space via Vector Quantization},\nauthor={Yiwei Zhang and Jin Gao and Fudong Ge and Guan Luo and Bing Li and Zhaoxiang Zhang and Haibin Ling and Weiming Hu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=bKuxygBW2Y}\n}", "github": "", "reviewers": "xhZa;1hbx;1JdE;rd8m", "pdf_size": 4732410, "rating": "3;5;6;6", "confidence": "5;3;4;4", "soundness": "2;3;3;3", "novelty": "2;2;2;2", "presentation": "3;3;3;3", "wc_summary": "76;61;94;88", "wc_strengths": "92;54;44;56", "wc_weaknesses": "175;64;46;171", "wc_questions": "18;41;157;2", "wc_limitations": "1;27;13;50", "wc_review": "362;247;354;367", "wc_reply_reviewers": "0;67;83;9", "wc_reply_authors": "0;647;447;0", "reply_reviewers": "0;1;2;1", "reply_authors": "1;2;2;1", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 79.75, 12.616952880945542 ], "wc_strengths_avg": [ 61.5, 18.186533479473212 ], "wc_weaknesses_avg": [ 114.0, 59.35907681222814 ], "wc_questions_avg": [ 54.5, 60.780342216871404 ], "wc_limitations_avg": [ 22.75, 18.226011631731172 ], "wc_review_avg": [ 332.5, 49.580742229216376 ], "wc_reply_reviewers_avg": [ 39.75, 35.84253757757673 ], "wc_reply_authors_avg": [ 273.5, 282.4929202652697 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:w-FT4GLiv0cJ:scholar.google.com/&scioq=VQ-Map:+Bird%27s-Eye-View+Map+Layout+Estimation+in+Tokenized+Discrete+Space+via+Vector+Quantization&hl=en&as_sdt=0,5", "gs_version_total": 5, "email": "ia.ac.cn;ia.ac.cn;ia.ac.cn;ia.ac.cn;ia.ac.cn;ia.ac.cn;stonybrook.edu;nlpr.ia.ac.cn", "author_num": 8, "aff_unique_index": "0;0;0;0;0;0;1;0", "aff_unique_norm": "Chinese Academy of Sciences;State University of New York", "aff_unique_dep": "Institute of Automation;", "aff_unique_url": "http://www.ia.cas.cn;https://www.stonybrook.edu", "aff_unique_abbr": "CAS;SUNY Stony Brook", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stony Brook", "aff_country_unique_index": "0;0;0;0;0;0;1;0", "aff_country_unique": "China;United States" }, { "title": "Entropy testing and its application to testing Bayesian networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94492", "id": "bMSXeAlCI4", "proceeding": "", "pdf": "https://openreview.net/pdf?id=bMSXeAlCI4", "openreview": "https://openreview.net/forum?id=bMSXeAlCI4", "poster": "", "project": "", "author_site": "Cl\u00e9ment L Canonne, Qiping Yang", "tldr": "", "abstract": "This paper studies the problem of \\emph{entropy identity testing}: given sample access to a distribution $p$ and a fully described distribution $q$ (both are discrete distributions over the support of size $k$), and the promise that either $p = q$ or $ | H(p) - H(q) | \\geqslant \\varepsilon$, where $H(\\cdot)$ denotes the Shannon entropy, a tester needs to distinguish between the two cases with high probability. We establish a near-optimal sample complexity bound of $\\tilde{\\Theta}(\\sqrt{k}/\\varepsilon + {1}/{\\varepsilon^2}$) for this problem, and show how to apply it to the problem of identity testing for in-degree-$d$ $n$-dimensional Bayesian networks, obtaining an upper bound of $\\tilde{O}( {2^{d / 2} n^{3/2}}/{\\varepsilon^2} + {n^2}/{\\varepsilon^4} )$. This improves on the sample complexity bound of $\\tilde{O}(2^{d/2}n^2/\\varepsilon^4)$ from Canonne, Diakonikolas, Kane, and Stewart (2020), which required an additional assumption on the structure of the (unknown) Bayesian network.", "keywords": "bayesian networks;distribution testing;shannon entropy;graphical models;hypothesis testing", "primary_area": "learning_theory", "supplementary_material": "", "author": "Clement Louis Canonne;Qiping Yang", "authorids": "~Clement_Louis_Canonne1;~Qiping_Yang1", "gender": "M;M", "homepage": "https://ccanonne.github.io/;https://twitter.com/nerd_qp", "dblp": "28/9840L;298/4926", "google_scholar": "u_OXsBIAAAAJ;", "orcid": "0000-0001-7153-5211;0009-0009-6841-9370", "linkedin": ";", "or_profile": "~Clement_Louis_Canonne1;~Qiping_Yang1", "aff": "University of Sydney;University of Sydney", "aff_domain": "sydney.edu.au;sydney.edu.au", "position": "Lecturer;PhD student", "bibtex": "@inproceedings{\ncanonne2024entropy,\ntitle={Entropy testing and its application to testing Bayesian networks},\nauthor={Clement Louis Canonne and Qiping Yang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=bMSXeAlCI4}\n}", "github": "", "reviewers": "wSfS;S8s6;cFXh;BQBQ", "pdf_size": 475274, "rating": "5;5;6;6", "confidence": "4;3;4;3", "soundness": "2;3;4;4", "novelty": "3;3;3;3", "presentation": "3;2;3;3", "wc_summary": "98;81;209;41", "wc_strengths": "5;22;90;20", "wc_weaknesses": "17;17;129;133", "wc_questions": "197;174;54;225", "wc_limitations": "17;11;31;5", "wc_review": "334;305;513;424", "wc_reply_reviewers": "0;103;40;16", "wc_reply_authors": "41;94;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 107.25, 62.28312371742445 ], "wc_strengths_avg": [ 34.25, 32.85098933061225 ], "wc_weaknesses_avg": [ 74.0, 57.01754116059373 ], "wc_questions_avg": [ 162.5, 65.1939414363022 ], "wc_limitations_avg": [ 16.0, 9.643650760992955 ], "wc_review_avg": [ 394.0, 81.51993621194757 ], "wc_reply_reviewers_avg": [ 39.75, 39.194227891361756 ], "wc_reply_authors_avg": [ 33.75, 38.60294677871108 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:e0gbjme67g0J:scholar.google.com/&scioq=Entropy+testing+and+its+application+to+testing+Bayesian+networks&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "sydney.edu.au;sydney.edu.au", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Sydney", "aff_unique_dep": "", "aff_unique_url": "https://www.sydney.edu.au", "aff_unique_abbr": "USYD", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Australia" }, { "title": "Towards training digitally-tied analog blocks via hybrid gradient computation", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94491", "id": "bMTn8KKrbq", "proceeding": "", "pdf": "https://openreview.net/pdf?id=bMTn8KKrbq", "openreview": "https://openreview.net/forum?id=bMTn8KKrbq", "poster": "", "project": "", "author_site": "Timothy Nest, Maxence Ernoult", "tldr": "", "abstract": "Power efficiency is plateauing in the standard digital electronics realm such that new hardware, models, and algorithms are needed to reduce the costs of AI training. The combination of energy-based analog circuits and the Equilibrium Propagation (EP) algorithm constitutes a compelling alternative compute paradigm for gradient-based optimization of neural nets. Existing analog hardware accelerators, however, typically incorporate digital circuitry to sustain auxiliary non-weight-stationary operations, mitigate analog device imperfections, and leverage existing digital platforms. Such heterogeneous hardware lacks a supporting theoretical framework. In this work, we introduce \\emph{Feedforward-tied Energy-based Models} (ff-EBMs), a hybrid model comprised of feedforward and energy-based blocks housed on digital and analog circuits. We derive a novel algorithm to compute gradients end-to-end in ff-EBMs by backpropagating and ``eq-propagating'' through feedforward and energy-based parts respectively, enabling EP to be applied flexibly on realistic architectures. We experimentally demonstrate the effectiveness of this approach on ff-EBMs using Deep Hopfield Networks (DHNs) as energy-based blocks, and show that a standard DHN can be arbitrarily split into any uniform size while maintaining or improving performance with increases in simulation speed of up to four times. We then train ff-EBMs on ImageNet32 where we establish a new state-of-the-art performance for the EP literature (46 top-1 \\%). Our approach offers a principled, scalable, and incremental roadmap for the gradual integration of self-trainable analog computational primitives into existing digital accelerators.", "keywords": "implicit differentiation;equilibrium propagation;bilevel optimization;hopfield networks;analog computing;hardware-aware training;backprop;energy-based models;physical learning", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Timothy Nest;Maxence Ernoult", "authorids": "~Timothy_Nest1;~Maxence_Ernoult1", "gender": "Not Specified;M", "homepage": "http://www.github.com/timothynest;", "dblp": ";241/9703", "google_scholar": "https://scholar.google.gr/citations?user=HIempWQAAAAJ;https://scholar.google.com/citations?hl=fr", "orcid": ";", "linkedin": ";", "or_profile": "~Timothy_Nest1;~Maxence_Ernoult1", "aff": "Montreal Institute for Learning Algorithms, University of Montreal, Universit\u00e9 de Montr\u00e9al;Rain AI", "aff_domain": "mila.umontreal.ca;rain.ai", "position": "PhD student;Researcher", "bibtex": "@inproceedings{\nnest2024towards,\ntitle={Towards training digitally-tied analog blocks via hybrid gradient computation},\nauthor={Timothy Nest and Maxence Ernoult},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=bMTn8KKrbq}\n}", "github": "", "reviewers": "k7Rs;Jt5z;pevw;kDhg;9JF8", "pdf_size": 667961, "rating": "5;5;6;7;7", "confidence": "3;3;3;4;4", "soundness": "3;3;3;3;3", "novelty": "3;3;3;3;3", "presentation": "3;1;3;3;2", "wc_summary": "80;82;85;141;42", "wc_strengths": "60;22;15;214;21", "wc_weaknesses": "79;56;70;139;58", "wc_questions": "2;128;9;37;1", "wc_limitations": "1;4;2;54;1", "wc_review": "222;292;181;585;123", "wc_reply_reviewers": "0;616;0;11;213", "wc_reply_authors": "0;1198;0;0;802", "reply_reviewers": "0;2;0;1;2", "reply_authors": "1;3;1;1;3", "rating_avg": [ 6.0, 0.8944271909999159 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.4, 0.8 ], "wc_summary_avg": [ 86.0, 31.66701754191575 ], "wc_strengths_avg": [ 66.4, 75.49993377480538 ], "wc_weaknesses_avg": [ 80.4, 30.467031361785153 ], "wc_questions_avg": [ 35.4, 48.11070566932062 ], "wc_limitations_avg": [ 12.4, 20.828826179120128 ], "wc_review_avg": [ 280.6, 161.85252546685825 ], "wc_reply_reviewers_avg": [ 168.0, 238.2544857919783 ], "wc_reply_authors_avg": [ 400.0, 505.64968110342954 ], "reply_reviewers_avg": [ 1.0, 0.8944271909999159 ], "reply_authors_avg": [ 1.8, 0.9797958971132713 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.9128709291752769, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8933692750231549141&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "mila.umontreal.ca;rain.ai", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "University of Montreal;Rain AI", "aff_unique_dep": "Montreal Institute for Learning Algorithms;", "aff_unique_url": "https://www.mila.quebec;", "aff_unique_abbr": "MILA;", "aff_campus_unique_index": "0", "aff_campus_unique": "Montreal;", "aff_country_unique_index": "0", "aff_country_unique": "Canada;" }, { "title": "FIARSE: Model-Heterogeneous Federated Learning via Importance-Aware Submodel Extraction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94490", "id": "bMbteQRhDI", "proceeding": "", "pdf": "https://openreview.net/pdf?id=bMbteQRhDI", "openreview": "https://openreview.net/forum?id=bMbteQRhDI", "poster": "/media/PosterPDFs/NeurIPS%202024/94490.png?t=1733643044.1134636", "project": "", "author_site": "Feijie Wu, Xingchen Wang, Yaqing Wang, TIanci Liu, Lu Su, Jing Gao", "tldr": "", "abstract": "In federated learning (FL), accommodating clients' varied computational capacities poses a challenge, often limiting the participation of those with constrained resources in global model training. To address this issue, the concept of model heterogeneity through submodel extraction has emerged, offering a tailored solution that aligns the model's complexity with each client's computational capacity. In this work, we propose Federated Importance-Aware Submodel Extraction (FIARSE), a novel approach that dynamically adjusts submodels based on the importance of model parameters, thereby overcoming the limitations of previous static and dynamic submodel extraction methods. Compared to existing works, the proposed method offers a theoretical foundation for the submodel extraction and eliminates the need for additional information beyond the model parameters themselves to determine parameter importance, significantly reducing the overhead on clients. Extensive experiments are conducted on various datasets to showcase the superior performance of the proposed FIARSE.", "keywords": "Federated learning;Model heterogeneity", "primary_area": "optimization", "supplementary_material": "", "author": "Feijie Wu;Xingchen Wang;Yaqing Wang;Tianci Liu;Lu Su;Jing Gao", "authorids": "~Feijie_Wu1;~Xingchen_Wang1;~Yaqing_Wang1;~Tianci_Liu1;~Lu_Su1;~Jing_Gao1", "gender": ";M;M;M;M;F", "homepage": "https://harli.me/;https://wangxingchen2930.github.io;https://yaqingwang.github.io/;https://lliutianc.github.io;https://engineering.purdue.edu/~lusu/;https://engineering.purdue.edu/~jinggao/", "dblp": "246/4255;;147/1393;148/1911-3;63/4152-1;67/4834-4", "google_scholar": "https://scholar.google.com/citations?hl=en;;_Rfg2CAAAAAJ;;38RuCN4AAAAJ;Ftj1h4cAAAAJ", "orcid": "0000-0003-0541-1901;0000-0003-1352-7445;;;0000-0001-7223-543X;", "linkedin": ";;;;;", "or_profile": "~Feijie_Wu1;~Xingchen_Wang1;~Yaqing_Wang1;~Tianci_Liu1;~Lu_Su1;~Jing_Gao2", "aff": "Purdue University;Purdue University;Google DeepMind;Purdue University;Purdue University;Purdue University", "aff_domain": "purdue.edu;purdue.edu;google.com;purdue.edu;purdue.edu;purdue.edu", "position": "PhD student;PhD student;Research Scientist;PhD student;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nwu2024fiarse,\ntitle={{FIARSE}: Model-Heterogeneous Federated Learning via Importance-Aware Submodel Extraction},\nauthor={Feijie Wu and Xingchen Wang and Yaqing Wang and Tianci Liu and Lu Su and Jing Gao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=bMbteQRhDI}\n}", "github": "", "reviewers": "hETm;dCzP;inPJ;PQwa", "pdf_size": 704849, "rating": "5;5;5;6", "confidence": "3;4;4;4", "soundness": "3;3;3;3", "novelty": "2;2;2;3", "presentation": "2;3;3;2", "wc_summary": "97;63;71;57", "wc_strengths": "49;45;35;52", "wc_weaknesses": "418;111;121;42", "wc_questions": "79;3;2;35", "wc_limitations": "1;3;1;4", "wc_review": "644;225;230;190", "wc_reply_reviewers": "227;59;17;33", "wc_reply_authors": "948;132;181;31", "reply_reviewers": "2;1;1;1", "reply_authors": "3;3;4;2", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 72.0, 15.264337522473747 ], "wc_strengths_avg": [ 45.25, 6.417748826496718 ], "wc_weaknesses_avg": [ 173.0, 144.68413872985525 ], "wc_questions_avg": [ 29.75, 31.379730719048563 ], "wc_limitations_avg": [ 2.25, 1.299038105676658 ], "wc_review_avg": [ 322.25, 186.4006102457822 ], "wc_reply_reviewers_avg": [ 84.0, 83.91066678319271 ], "wc_reply_authors_avg": [ 323.0, 364.87463600529975 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.0, 0.7071067811865476 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13131813584268944402&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "purdue.edu;purdue.edu;google.com;purdue.edu;purdue.edu;purdue.edu", "author_num": 6, "aff_unique_index": "0;0;1;0;0;0", "aff_unique_norm": "Purdue University;Google", "aff_unique_dep": ";Google DeepMind", "aff_unique_url": "https://www.purdue.edu;https://deepmind.com", "aff_unique_abbr": "Purdue;DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Efficient LLM Jailbreak via Adaptive Dense-to-sparse Constrained Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94489", "id": "bN5PA3HHo8", "proceeding": "", "pdf": "https://openreview.net/pdf?id=bN5PA3HHo8", "openreview": "https://openreview.net/forum?id=bN5PA3HHo8", "poster": "", "project": "", "author_site": "Kai Hu, Weichen Yu, Yining Li, Tianjun Yao, Xiang Li, Wenhe Liu, Lijun Yu, Zhiqiang Shen, Kai Chen, Matt Fredrikson", "tldr": "", "abstract": "Recent research indicates that large language models (LLMs) are susceptible to jailbreaking attacks that can generate harmful content. This paper introduces a novel token-level attack method, Adaptive Dense-to-Sparse Constrained Optimization (ADC), which has been shown to successfully jailbreak multiple open-source LLMs. Drawing inspiration from the difficulties of discrete token optimization, our method relaxes the discrete jailbreak optimization into a continuous optimization process while gradually increasing the sparsity of the optimizing vectors. This technique effectively bridges the gap between discrete and continuous space optimization. Experimental results demonstrate that our method is more effective and efficient than state-of-the-art token-level methods. On Harmbench, our approach achieves the highest attack success rate on seven out of eight LLMs compared to the latest jailbreak methods. \\textcolor{red}{Trigger Warning: This paper contains model behavior that can be offensive in nature.}", "keywords": "large language model;AI safety;jailbreak", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/032329386950303ef71097af2a2b9b155f10f93f.zip", "author": "Kai Hu;Weichen Yu;Yining Li;Tianjun Yao;Xiang Li;Wenhe Liu;Lijun Yu;Zhiqiang Shen;Kai Chen;Matt Fredrikson", "authorids": "~Kai_Hu2;~Weichen_Yu1;~Yining_Li1;~Tianjun_Yao1;~Xiang_Li35;~Wenhe_Liu1;~Lijun_Yu1;~Zhiqiang_Shen1;~Kai_Chen4;~Matt_Fredrikson1", "gender": "M;F;M;;;;M;;M;M", "homepage": "https://github.com/hukkai;https://weichen-yu.github.io/;https://liyn.site;;;;https://me.lj-y.com/;;https://chenkai.site/;https://cs.cmu.edu/~mfredrik", "dblp": ";325/1209;166/3420;;;;94/5561;;181/2839-26;38/2612", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.hk/citations?user=y_cp1sUAAAAJ;;;;IaDc0OcAAAAJ;;https://scholar.google.com.hk/citations?user=eGD0b7IAAAAJ;https://scholar.google.com.tw/citations?user=tMYCvLAAAAAJ", "orcid": ";;;;;;0000-0003-0645-1657;;0000-0002-6820-2325;", "linkedin": ";;;;;;lijun-yu/;;;", "or_profile": "~Kai_Hu2;~Weichen_Yu1;~Yining_Li1;~Tianjun_Yao1;~Xiang_Li35;~Wenhe_Liu1;~Lijun_Yu1;~Zhiqiang_Shen1;~Kai_Chen4;~Matt_Fredrikson1", "aff": "Carnegie Mellon University;Carnegie Mellon University;Shanghai AI Laboratory;;;;School of Computer Science, Carnegie Mellon University;;Shanghai AI Laboratory;Carnegie Mellon University", "aff_domain": "cmu.edu;andrew.cmu.edu;pjlab.org.cn;;;;cs.cmu.edu;;pjlab.org.cn;cmu.edu", "position": "PhD student;PhD student;Researcher;;;;PhD student;;Researcher;Associate Professor", "bibtex": "@inproceedings{\nhu2024efficient,\ntitle={Efficient {LLM} Jailbreak via Adaptive Dense-to-sparse Constrained Optimization},\nauthor={Kai Hu and Weichen Yu and Yining Li and Tianjun Yao and Xiang Li and Wenhe Liu and Lijun Yu and Zhiqiang Shen and Kai Chen and Matt Fredrikson},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=bN5PA3HHo8}\n}", "github": "", "reviewers": "VphR;qByQ;3E4T;4zzj", "pdf_size": 140326, "rating": "4;5;5;7", "confidence": "4;4;4;4", "soundness": "3;4;3;3", "novelty": "2;2;2;3", "presentation": "3;4;3;3", "wc_summary": "51;52;216;106", "wc_strengths": "29;44;94;130", "wc_weaknesses": "127;92;177;148", "wc_questions": "3;68;195;120", "wc_limitations": "3;6;1;16", "wc_review": "213;262;683;520", "wc_reply_reviewers": "98;469;197;0", "wc_reply_authors": "629;1057;506;0", "reply_reviewers": "1;2;2;0", "reply_authors": "3;3;3;1", "rating_avg": [ 5.25, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 106.25, 67.15792953925843 ], "wc_strengths_avg": [ 74.25, 40.189395367434926 ], "wc_weaknesses_avg": [ 136.0, 30.99193443462347 ], "wc_questions_avg": [ 96.5, 70.3722246344394 ], "wc_limitations_avg": [ 6.5, 5.766281297335398 ], "wc_review_avg": [ 419.5, 191.69050576384842 ], "wc_reply_reviewers_avg": [ 191.0, 174.96428206922693 ], "wc_reply_authors_avg": [ 548.0, 376.7326638347145 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3295918303157309668&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "cmu.edu;andrew.cmu.edu;pjlab.org.cn;;;;cs.cmu.edu;;pjlab.org.cn;cmu.edu", "author_num": 10, "aff_unique_index": "0;0;1;0;1;0", "aff_unique_norm": "Carnegie Mellon University;Shanghai AI Laboratory", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://www.shanghai-ai-lab.com", "aff_unique_abbr": "CMU;SAIL", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pittsburgh", "aff_country_unique_index": "0;0;1;0;1;0", "aff_country_unique": "United States;China" }, { "title": "On the Complexity of Identification in Linear Structural Causal Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94488", "id": "bNDwOoxj6W", "proceeding": "", "pdf": "https://openreview.net/pdf?id=bNDwOoxj6W", "openreview": "https://openreview.net/forum?id=bNDwOoxj6W", "poster": "/media/PosterPDFs/NeurIPS%202024/94488.png?t=1733141440.325261", "project": "", "author_site": "Julian D\u00f6rfler, Benito van der Zander, Markus Bl\u00e4ser, Maciej Liskiewicz", "tldr": "", "abstract": "Learning the unknown causal parameters of a linear structural causal \nmodel is a fundamental task in causal analysis. The task, known as the \nproblem of identification, asks to estimate the parameters of the model from a\ncombination of assumptions on the graphical structure of the model and \nobservational data, represented as a non-causal covariance matrix.\nIn this paper, we give a new sound and complete algorithm for generic \nidentification which runs in polynomial space. By a standard simulation \nresult, namely $\\mathsf{PSPACE} \\subseteq \\mathsf{EXP}$,\nthis algorithm has exponential running time which vastly improves \nthe state-of-the-art double exponential time method using a Gr\u00f6bner basis \napproach. The paper also presents evidence that parameter identification \nis computationally hard in general. In particular, we prove, that the task\nasking whether, for a given feasible correlation matrix, there \nare exactly one or two or more parameter sets explaining the observed \nmatrix, is hard for $\\forall \\mathbb{R}$, the co-class of the existential theory \nof the reals. In particular, this problem is $\\mathsf{coNP}$-hard.\nTo our best knowledge, this is the first hardness result for some notion \nof identifiability.", "keywords": "causal inference;structural causal models;structural equational models;generic identifiability;existential theory over the reals", "primary_area": "causal_inference", "supplementary_material": "", "author": "Julian D\u00f6rfler;Benito van der Zander;Markus Bl\u00e4ser;Maciej Liskiewicz", "authorids": "~Julian_D\u00f6rfler1;~Benito_van_der_Zander1;~Markus_Bl\u00e4ser1;~Maciej_Liskiewicz1", "gender": ";M;;M", "homepage": ";http://www.benibela.de;;http://www.tcs.uni-luebeck.de/mitarbeiter/liskiewi", "dblp": ";77/8315;;06/5685", "google_scholar": ";https://scholar.google.de/citations?user=QTn6hikAAAAJ;;https://scholar.google.de/citations?user=jsxwOYcAAAAJ", "orcid": ";;;", "linkedin": ";benito-van-der-zander-158a8963/;;", "or_profile": "~Julian_D\u00f6rfler1;~Benito_van_der_Zander1;~Markus_Bl\u00e4ser1;~Maciej_Liskiewicz1", "aff": ";Universit\u00e4t zu L\u00fcbeck;;Universit\u00e4t zu L\u00fcbeck", "aff_domain": ";uni-luebeck.de;;uni-luebeck.de", "position": ";Postdoc;;Full Professor", "bibtex": "@inproceedings{\nd{\\\"o}rfler2024on,\ntitle={On the Complexity of Identification in Linear Structural Causal Models},\nauthor={Julian D{\\\"o}rfler and Benito van der Zander and Markus Bl{\\\"a}ser and Maciej Liskiewicz},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=bNDwOoxj6W}\n}", "github": "", "reviewers": "Gx9Z;gLJ2;Q8jB;ToDP;WjE6", "pdf_size": 355412, "rating": "5;5;6;6;7", "confidence": "2;2;2;3;4", "soundness": "3;3;4;3;3", "novelty": "2;3;3;3;3", "presentation": "2;2;2;3;2", "wc_summary": "42;59;42;39;40", "wc_strengths": "36;15;66;51;147", "wc_weaknesses": "66;278;63;42;50", "wc_questions": "58;16;138;2;261", "wc_limitations": "1;5;5;1;38", "wc_review": "203;373;314;135;536", "wc_reply_reviewers": "28;15;56;11;65", "wc_reply_authors": "0;0;0;0;17", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;2", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 2.6, 0.8 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.2, 0.39999999999999997 ], "wc_summary_avg": [ 44.4, 7.391887445030532 ], "wc_strengths_avg": [ 63.0, 45.25925319755066 ], "wc_weaknesses_avg": [ 99.8, 89.52407497427716 ], "wc_questions_avg": [ 95.0, 95.56568421771489 ], "wc_limitations_avg": [ 10.0, 14.11382301150188 ], "wc_review_avg": [ 312.2, 139.36340983199284 ], "wc_reply_reviewers_avg": [ 35.0, 21.753160689885963 ], "wc_reply_authors_avg": [ 3.4, 6.8 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8685990362153793, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:IXU9i7cwWmoJ:scholar.google.com/&scioq=On+the+Complexity+of+Identification+in+Linear+Structural+Causal+Models&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": ";uni-luebeck.de;;uni-luebeck.de", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "University of L\u00fcbeck", "aff_unique_dep": "", "aff_unique_url": "https://www.uni-luebeck.de", "aff_unique_abbr": "UzL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "id": "bNVvcgfEwD", "title": "Convergence of Adafactor under Non-Convex Smooth Stochastic Optimization", "track": "main", "status": "Reject", "tldr": "", "abstract": "As model sizes in deep learning continue to expand, memory-efficient optimizers are increasingly critical to manage the substantial memory demands of popular algorithms like Adam and AdamW. Among these, Adafactor has emerged as one of the widely adopted choices for training deep learning tasks, particularly large language models. However, despite its practical success, there is limited theoretical analysis on Adafactor's convergence. This paper presents a comprehensive analysis on Adafactor in a non-convex smooth setting, demonstrating its convergence to find a stationary point at a rate of $\\tilde{\\mathcal{O}}(1/\\sqrt{T})$. We find that the default hyper-parameter setting results in a sub-optimal rate in our framework, and propose an alternative setting that could theoretically achieve optimal convergence rate. This finding is further supported by some experimental results.\nWe also prove that Adafactor with a suitable time-varying clipping threshold could also converge, achieving performance in experiments comparable to that of the standard constant setting.", "keywords": "Adafactor;stochastic optimization;convergence;non-convex smoothness", "primary_area": "optimization", "supplementary_material": "", "author": "Yusu Hong;Junhong Lin", "authorids": "~Yusu_Hong1;~Junhong_Lin1", "gender": "M;M", "homepage": ";https://person.zju.edu.cn/en/junhong", "dblp": "360/0732;", "google_scholar": "OabD-60AAAAJ;", "orcid": ";", "linkedin": "%E9%92%B0%E6%BA%AF-%E6%B4%AA-0198b41ba/;", "or_profile": "~Yusu_Hong1;~Junhong_Lin1", "aff": "Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn", "position": "PhD student;Assistant Professor", "bibtex": "@misc{\nanonymous2024convergence,\ntitle={Convergence of Adafactor under Non-Convex Smooth Stochastic Optimization},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=bNVvcgfEwD}\n}", "github": "", "project": "", "reviewers": "ExZW;5LgA;1HAW", "site": "https://openreview.net/forum?id=bNVvcgfEwD", "pdf_size": 626012, "rating": "3;6;7", "confidence": "4;3;3", "soundness": "3;3;3", "novelty": "1;3;3", "presentation": "3;2;2", "wc_summary": "263;82;116", "wc_strengths": "2;88;40", "wc_weaknesses": "2;199;20", "wc_questions": "2;129;81", "wc_limitations": "2;7;1", "wc_review": "271;505;258", "wc_reply_reviewers": "0;100;11", "wc_reply_authors": "151;302;181", "reply_reviewers": "0;1;1", "reply_authors": "2;2;2", "rating_avg": [ 5.333333333333333, 1.699673171197595 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.3333333333333335, 0.9428090415820634 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 153.66666666666666, 78.54651841778575 ], "wc_strengths_avg": [ 43.333333333333336, 35.188381921057726 ], "wc_weaknesses_avg": [ 73.66666666666667, 88.92818575807236 ], "wc_questions_avg": [ 70.66666666666667, 52.359865885576475 ], "wc_limitations_avg": [ 3.3333333333333335, 2.6246692913372702 ], "wc_review_avg": [ 344.6666666666667, 113.49694073214289 ], "wc_reply_reviewers_avg": [ 37.0, 44.773504069557326 ], "wc_reply_authors_avg": [ 211.33333333333334, 65.27037783115877 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.9707253433941508, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:A7auMLy_1vUJ:scholar.google.com/&scioq=Convergence+of+Adafactor+under+Non-Convex+Smooth+Stochastic+Optimization&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Zhejiang University", "aff_unique_dep": "", "aff_unique_url": "https://www.zju.edu.cn", "aff_unique_abbr": "ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Learning Discrete Concepts in Latent Hierarchical Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94487", "id": "bO5bUxvH6m", "proceeding": "", "pdf": "https://openreview.net/pdf?id=bO5bUxvH6m", "openreview": "https://openreview.net/forum?id=bO5bUxvH6m", "poster": "", "project": "", "author_site": "Lingjing Kong, Guangyi Chen, Biwei Huang, Eric Xing, Yuejie Chi, Kun Zhang", "tldr": "", "abstract": "Learning concepts from natural high-dimensional data (e.g., images) holds potential in building human-aligned and interpretable machine learning models.\n Despite its encouraging prospect, formalization and theoretical insights into this crucial task are still lacking.\n In this work, we formalize concepts as discrete latent causal variables that are related via a hierarchical causal model that encodes different abstraction levels of concepts embedded in high-dimensional data (e.g., a dog breed and its eye shapes in natural images).\n We formulate conditions to facilitate the identification of the proposed causal model, which reveals when learning such concepts from unsupervised data is possible.\n Our conditions permit complex causal hierarchical structures beyond latent trees and multi-level directed acyclic graphs in prior work and can handle high-dimensional, continuous observed variables, which is well-suited for unstructured data modalities such as images.\n We substantiate our theoretical claims with synthetic data experiments.\n Further, we discuss our theory's implications for understanding the underlying mechanisms of latent diffusion models and provide corresponding empirical evidence for our theoretical insights.", "keywords": "representation learning;causal representation learning;generative models;causal discovery;hierarchical models", "primary_area": "causal_inference", "supplementary_material": "", "author": "Lingjing Kong;Guangyi Chen;Biwei Huang;Eric P. Xing;Yuejie Chi;Kun Zhang", "authorids": "~Lingjing_Kong1;~Guangyi_Chen1;~Biwei_Huang1;~Eric_Xing1;~Yuejie_Chi1;~Kun_Zhang1", "gender": "M;M;F;M;;M", "homepage": "https://lingjing-kong.github.io/;https://chengy12.github.io/;;http://www.cs.cmu.edu/~epxing/;;http://www.andrew.cmu.edu/user/kunz1/", "dblp": "158/1994-1.html;c/GuangyiChen-2;165/3288;36/3855;;96/3115-1", "google_scholar": "4hAlzvkAAAAJ;https://scholar.google.com/citations?hl=zh-CN;;https://scholar.google.com.tw/citations?user=5pKTRxEAAAAJ;;RGoypN4AAAAJ", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Lingjing_Kong1;~Guangyi_Chen1;~Biwei_Huang1;~Eric_Xing1;~Yuejie_Chi1;~Kun_Zhang1", "aff": "Computer Science Department, School of Computer Science;Carnegie Mellon University;University of California, San Diego;School of Computer Science, Carnegie Mellon University;;Carnegie Mellon University", "aff_domain": "csd.cs.cmu.edu;cmu.edu;ucsd.edu;cs.cmu.edu;;cmu.edu", "position": "PhD student;Postdoc;Assistant Professor;Full Professor;;Associate Professor", "bibtex": "@inproceedings{\nkong2024learning,\ntitle={Learning Discrete Concepts in Latent Hierarchical Models},\nauthor={Lingjing Kong and Guangyi Chen and Biwei Huang and Eric P. Xing and Yuejie Chi and Kun Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=bO5bUxvH6m}\n}", "github": "", "reviewers": "BR1s;ae89;7auw;ZL2x", "pdf_size": 14394702, "rating": "5;5;6;7", "confidence": "2;4;3;1", "soundness": "3;3;3;3", "novelty": "2;3;3;2", "presentation": "2;2;2;3", "wc_summary": "42;162;92;76", "wc_strengths": "48;103;80;127", "wc_weaknesses": "138;788;157;332", "wc_questions": "4;256;109;48", "wc_limitations": "1;7;9;32", "wc_review": "233;1316;447;615", "wc_reply_reviewers": "34;33;11;127", "wc_reply_authors": "79;82;69;117", "reply_reviewers": "1;1;1;1", "reply_authors": "3;3;3;3", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 2.5, 1.118033988749895 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 93.0, 43.73785545725808 ], "wc_strengths_avg": [ 89.5, 29.159046623646667 ], "wc_weaknesses_avg": [ 353.75, 261.8705548548748 ], "wc_questions_avg": [ 104.25, 95.21652955238392 ], "wc_limitations_avg": [ 12.25, 11.776565713313877 ], "wc_review_avg": [ 652.75, 406.1553735948842 ], "wc_reply_reviewers_avg": [ 51.25, 44.68990378150304 ], "wc_reply_authors_avg": [ 86.75, 18.115946014492316 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.0, 0.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.674199862463242, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15519136187148574148&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "csd.cs.cmu.edu;cmu.edu;ucsd.edu;cs.cmu.edu;;cmu.edu", "author_num": 6, "aff_unique_index": "0;1;2;1;1", "aff_unique_norm": "School of Computer Science;Carnegie Mellon University;University of California, San Diego", "aff_unique_dep": "Computer Science Department;;", "aff_unique_url": ";https://www.cmu.edu;https://www.ucsd.edu", "aff_unique_abbr": ";CMU;UCSD", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";San Diego;Pittsburgh", "aff_country_unique_index": "1;1;1;1", "aff_country_unique": ";United States" }, { "title": "Bridging Multicalibration and Out-of-distribution Generalization Beyond Covariate Shift", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94486", "id": "bOS6WPV0Jf", "proceeding": "", "pdf": "https://openreview.net/pdf?id=bOS6WPV0Jf", "openreview": "https://openreview.net/forum?id=bOS6WPV0Jf", "poster": "/media/PosterPDFs/NeurIPS%202024/94486.png?t=1733814319.8594606", "project": "", "author_site": "Jiayun Wu, Jiashuo Liu, Peng Cui, Steven Wu", "tldr": "", "abstract": "We establish a new model-agnostic optimization framework for out-of-distribution generalization via multicalibration, a criterion that ensures a predictor is calibrated across a family of overlapping groups. Multicalibration is shown to be associated with robustness of statistical inference under covariate shift. We further establish a link between multicalibration and robustness for prediction tasks both under and beyond covariate shift. We accomplish this by extending multicalibration to incorporate grouping functions that consider covariates and labels jointly. This leads to an equivalence of the extended multicalibration and invariance, an objective for robust learning in existence of concept shift. We show a linear structure of the grouping function class spanned by density ratios, resulting in a unifying framework for robust learning by designing specific grouping functions. We propose MC-Pseudolabel, a post-processing algorithm to achieve both extended multicalibration and out-of-distribution generalization. The algorithm, with lightweight hyperparameters and optimization through a series of supervised regression steps, achieves superior performance on real-world datasets with distribution shift.", "keywords": "Multicalibration;Robustness;Invariant Risk Minimization", "primary_area": "fairness", "supplementary_material": "/attachment/405b5625d9ff13dd8d57f829c922642b7f3b6523.zip", "author": "Jiayun Wu;Jiashuo Liu;Peng Cui;Steven Wu", "authorids": "~Jiayun_Wu1;~Jiashuo_Liu1;~Peng_Cui1;~Steven_Wu1", "gender": "M;M;M;M", "homepage": "https://ic-hub.github.io;https://ljsthu.github.io;http://pengcui.thumedialab.com/;https://zstevenwu.com/", "dblp": "00/9456;180/2823;31/891-1;137/8350", "google_scholar": "https://scholar.google.com/citations?hl=en;b7bpt5MAAAAJ;https://scholar.google.com.tw/citations?user=G8x97ZgAAAAJ;MbF6rTEAAAAJ", "orcid": "0009-0007-7131-7290;;0000-0003-2957-8511;", "linkedin": "jiayun-wu-4aa86323a/;jiashuo-liu-244a6b1a4;;zstevenwu/", "or_profile": "~Jiayun_Wu1;~Jiashuo_Liu1;~Peng_Cui1;~Zhiwei_Steven_Wu1", "aff": "Tsinghua University;University of Cambridge;Tsinghua University;Carnegie Mellon University", "aff_domain": "mails.tsinghua.edu.cn;cam.ac.uk;tsinghua.edu.cn;cmu.edu", "position": "MS student;Researcher;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nwu2024bridging,\ntitle={Bridging Multicalibration and Out-of-distribution Generalization Beyond Covariate Shift},\nauthor={Jiayun Wu and Jiashuo Liu and Peng Cui and Steven Wu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=bOS6WPV0Jf}\n}", "github": "", "reviewers": "ezjY;TrUb;HkL5;7ztk", "pdf_size": 771046, "rating": "5;5;8;8", "confidence": "3;2;4;4", "soundness": "3;3;4;4", "novelty": "3;3;4;3", "presentation": "1;2;4;3", "wc_summary": "320;40;55;105", "wc_strengths": "127;27;85;60", "wc_weaknesses": "623;43;12;184", "wc_questions": "283;47;75;30", "wc_limitations": "39;2;6;16", "wc_review": "1392;159;233;395", "wc_reply_reviewers": "137;17;0;16", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 1.5 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 130.0, 112.30538722608101 ], "wc_strengths_avg": [ 74.75, 36.51284020724764 ], "wc_weaknesses_avg": [ 215.5, 244.0373946754882 ], "wc_questions_avg": [ 108.75, 101.8782974926456 ], "wc_limitations_avg": [ 15.75, 14.359230480774379 ], "wc_review_avg": [ 544.75, 496.5502869800802 ], "wc_reply_reviewers_avg": [ 42.5, 54.974994315597705 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.9045340337332909, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16732077451871285034&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "mails.tsinghua.edu.cn;cam.ac.uk;tsinghua.edu.cn;cmu.edu", "author_num": 4, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Tsinghua University;University of Cambridge;Carnegie Mellon University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.cam.ac.uk;https://www.cmu.edu", "aff_unique_abbr": "THU;Cambridge;CMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;1;0;2", "aff_country_unique": "China;United Kingdom;United States" }, { "title": "Identifying Equivalent Training Dynamics", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94485", "id": "bOYVESX7PK", "proceeding": "", "pdf": "https://openreview.net/pdf?id=bOYVESX7PK", "openreview": "https://openreview.net/forum?id=bOYVESX7PK", "poster": "", "project": "", "author_site": "William Redman, Juan Bello-Rivas, Maria Fonoberova, Ryan Mohr, Yannis Kevrekidis, Igor Mezic", "tldr": "", "abstract": "Study of the nonlinear evolution deep neural network (DNN) parameters undergo during training has uncovered regimes of distinct dynamical behavior. While a detailed understanding of these phenomena has the potential to advance improvements in training efficiency and robustness, the lack of methods for identifying when DNN models have equivalent dynamics limits the insight that can be gained from prior work. Topological conjugacy, a notion from dynamical systems theory, provides a precise definition of dynamical equivalence, offering a possible route to address this need. However, topological conjugacies have historically been challenging to compute. By leveraging advances in Koopman operator theory, we develop a framework for identifying conjugate and non-conjugate training dynamics. To validate our approach, we demonstrate that comparing Koopman eigenvalues can correctly identify a known equivalence between online mirror descent and online gradient descent. We then utilize our approach to: (a) identify non-conjugate training dynamics between shallow and wide fully connected neural networks; (b) characterize the early phase of training dynamics in convolutional neural networks; (c) uncover non-conjugate training dynamics in Transformers that do and do not undergo grokking. Our results, across a range of DNN architectures, illustrate the flexibility of our framework and highlight its potential for shedding new light on training dynamics.", "keywords": "training dynamics;dynamical systems theory;Koopman operator theory;grokking;learning phases", "primary_area": "other", "supplementary_material": "", "author": "William T Redman;Juan M. Bello-Rivas;Maria Fonoberova;Ryan Mohr;Yannis Kevrekidis;Igor Mezic", "authorids": "~William_T_Redman1;~Juan_M._Bello-Rivas1;~Maria_Fonoberova2;~Ryan_Mohr1;~Yannis_Kevrekidis1;~Igor_Mezic1", "gender": "M;;;M;M;M", "homepage": "https://wredman4.wixsite.com/wtredman;;;;https://engineering.jhu.edu/faculty/ioannis-kevrekidis/;https://mgroup.me.ucsb.edu/", "dblp": "266/7985;;;;;", "google_scholar": "-SOfw0AAAAAJ;;;LYy8AGQAAAAJ;;5d9ngqsAAAAJ", "orcid": ";;;0000-0001-8713-7672;;", "linkedin": ";;;ryan-mohr-47790598;;", "or_profile": "~William_T_Redman1;~Juan_M._Bello-Rivas1;~Maria_Fonoberova2;~Ryan_Mohr1;~Yannis_Kevrekidis1;~Igor_Mezic1", "aff": "AIMdyn Inc.;;;AIMdyn Inc;Johns Hopkins University;University of California, Santa Barbara", "aff_domain": "aimdyn.com;;;aimdyn.com;jh.edu;ucsb.edu", "position": "Researcher;;;Senior Research Scientist;Full Professor;Full Professor", "bibtex": "@inproceedings{\nredman2024identifying,\ntitle={Identifying Equivalent Training Dynamics},\nauthor={William T Redman and Juan M. Bello-Rivas and Maria Fonoberova and Ryan Mohr and Yannis Kevrekidis and Igor Mezic},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=bOYVESX7PK}\n}", "github": "", "reviewers": "ZFpv;42wo;4cZ9;C2X3", "pdf_size": 11246823, "rating": "6;7;8;8", "confidence": "3;4;3;4", "soundness": "3;3;3;4", "novelty": "3;4;3;4", "presentation": "3;3;4;3", "wc_summary": "59;97;326;130", "wc_strengths": "22;106;231;96", "wc_weaknesses": "56;219;300;149", "wc_questions": "2;81;88;93", "wc_limitations": "1;134;9;42", "wc_review": "140;637;954;510", "wc_reply_reviewers": "13;652;160;33", "wc_reply_authors": "0;1007;14;0", "reply_reviewers": "1;3;2;1", "reply_authors": "1;4;2;1", "rating_avg": [ 7.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 153.0, 102.99271818920015 ], "wc_strengths_avg": [ 113.75, 75.06788594332465 ], "wc_weaknesses_avg": [ 181.0, 89.79699326814902 ], "wc_questions_avg": [ 66.0, 37.19542982679458 ], "wc_limitations_avg": [ 46.5, 52.803882433018124 ], "wc_review_avg": [ 560.25, 291.57535475413556 ], "wc_reply_reviewers_avg": [ 214.5, 258.80542884568706 ], "wc_reply_authors_avg": [ 255.25, 434.06069564059817 ], "reply_reviewers_avg": [ 1.75, 0.82915619758885 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17375355624692023047&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "aimdyn.com;;;aimdyn.com;jh.edu;ucsb.edu", "author_num": 6, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "AIMdyn Inc.;AIMdyn Inc;Johns Hopkins University;University of California, Santa Barbara", "aff_unique_dep": ";;;", "aff_unique_url": ";;https://www.jhu.edu;https://www.ucsb.edu", "aff_unique_abbr": ";;JHU;UCSB", "aff_campus_unique_index": "1", "aff_campus_unique": ";Santa Barbara", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Vivid-ZOO: Multi-View Video Generation with Diffusion Model", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94484", "id": "bPOaHf8OcX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=bPOaHf8OcX", "openreview": "https://openreview.net/forum?id=bPOaHf8OcX", "poster": "/media/PosterPDFs/NeurIPS%202024/94484.png?t=1731661940.415221", "project": "", "author_site": "Bing Li, Cheng Zheng, Wenxuan Zhu, Jinjie Mai, Biao Zhang, Peter Wonka, Bernard Ghanem", "tldr": "", "abstract": "While diffusion models have shown impressive performance in 2D image/video generation, diffusion-based Text-to-Multi-view-Video (T2MVid) generation remains underexplored. The new challenges posed by T2MVid generation lie in the lack of massive captioned multi-view videos and the complexity of modeling such multi-dimensional distribution. To this end, we propose a novel diffusion-based pipeline that generates high-quality multi-view videos centered around a dynamic 3D object from text. Specifically, we factor the T2MVid problem into viewpoint-space and time components. Such factorization allows us to combine and reuse layers of advanced pre-trained multi-view image and 2D video diffusion models to ensure multi-view consistency as well as temporal coherence for the generated multi-view videos, largely reducing the training cost. We further introduce alignment modules to align the latent spaces of layers from the pre-trained multi-view and the 2D video diffusion models, addressing the reused layers' incompatibility that arises from the domain gap between 2D and multi-view data. In support of this and future research, we further contribute a captioned multi-view video dataset. Experimental results demonstrate that our method generates high-quality multi-view videos, exhibiting vivid motions, temporal coherence, and multi-view consistency, given a variety of text prompts.", "keywords": "Multi-view video;generative model", "primary_area": "generative_models", "supplementary_material": "", "author": "Bing Li;Cheng Zheng;Wenxuan Zhu;Jinjie Mai;Biao Zhang;Peter Wonka;Bernard Ghanem", "authorids": "~Bing_Li13;~Cheng_Zheng3;~Wenxuan_Zhu4;~Jinjie_Mai1;~Biao_Zhang5;~Peter_Wonka1;~Bernard_Ghanem1", "gender": ";M;M;M;;M;M", "homepage": ";;https://skywalker-dell.github.io/personal-page/;;https://1zb.github.io;http://peterwonka.net;https://ivul.kaust.edu.sa", "dblp": "13/2692-24.html;37/170-3;;272/0975;83/3266-5;98/5522;37/2516", "google_scholar": "xBiftlUAAAAJ;zASsdIMAAAAJ;;;h5KukxEAAAAJ;https://scholar.google.com.tw/citations?user=0EKXSXgAAAAJ;rVsGTeEAAAAJ", "orcid": ";;;0000-0002-3396-1970;;0000-0003-0627-9746;0000-0002-5534-587X", "linkedin": ";zheng-cheng/;;;;;bernardghanem/", "or_profile": "~Bing_Li13;~Cheng_Zheng3;~Wenxuan_Zhu4;~Jinjie_Mai1;~Biao_Zhang5;~Peter_Wonka1;~Bernard_Ghanem1", "aff": "King Abdullah University of Science and Technology;King Abdullah University of Science and Technology;King Abdullah University of Science and Technology;King Abdullah University of Science and Technology;KAUST;KAUST;King Abdullah University of Science and Technology", "aff_domain": "kaust.edu.sa;kaust.edu.sa;kaust.edu.sa;kaust.edu.sa;kaust.edu.sa;kaust.edu.sa;kaust.edu.sa", "position": "Postdoc;Researcher;MS student;MS student;Postdoc;Full Professor;Full Professor", "bibtex": "@inproceedings{\nli2024vividzoo,\ntitle={Vivid-{ZOO}: Multi-View Video Generation with Diffusion Model},\nauthor={Bing Li and Cheng Zheng and Wenxuan Zhu and Jinjie Mai and Biao Zhang and Peter Wonka and Bernard Ghanem},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=bPOaHf8OcX}\n}", "github": "", "reviewers": "CcuB;K81K;VRVx;MoTs", "pdf_size": 29664897, "rating": "5;5;7;7", "confidence": "5;4;2;4", "soundness": "3;2;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "67;81;105;117", "wc_strengths": "53;65;146;129", "wc_weaknesses": "106;251;60;153", "wc_questions": "103;55;4;1", "wc_limitations": "16;12;16;1", "wc_review": "345;464;331;401", "wc_reply_reviewers": "21;31;0;0", "wc_reply_authors": "24;28;25;28", "reply_reviewers": "1;1;0;0", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 92.5, 19.615045245933032 ], "wc_strengths_avg": [ 98.25, 39.93353853592241 ], "wc_weaknesses_avg": [ 142.5, 70.7477914849644 ], "wc_questions_avg": [ 40.75, 41.85913878712748 ], "wc_limitations_avg": [ 11.25, 6.139014578904337 ], "wc_review_avg": [ 385.25, 52.470825226977325 ], "wc_reply_reviewers_avg": [ 13.0, 13.47219358530748 ], "wc_reply_authors_avg": [ 26.25, 1.7853571071357126 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.6882472016116854, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8784988174251933010&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 8, "email": "kaust.edu.sa;kaust.edu.sa;kaust.edu.sa;kaust.edu.sa;kaust.edu.sa;kaust.edu.sa;kaust.edu.sa", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "King Abdullah University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kast.kau.edu.sa", "aff_unique_abbr": "KAUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "Saudi Arabia" }, { "title": "Hybrid Reinforcement Learning Breaks Sample Size Barriers In Linear MDPs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94483", "id": "bPuYxFBHyI", "proceeding": "", "pdf": "https://openreview.net/pdf?id=bPuYxFBHyI", "openreview": "https://openreview.net/forum?id=bPuYxFBHyI", "poster": "/media/PosterPDFs/NeurIPS%202024/94483.png?t=1734028192.740806", "project": "", "author_site": "Kevin Tan, Wei Fan, Yuting Wei", "tldr": "", "abstract": "Hybrid Reinforcement Learning (RL), where an agent learns from both an offline dataset and online explorations in an unknown environment, has garnered significant recent interest. A crucial question posed by Xie et al. (2022) is whether hybrid RL can improve upon the existing lower bounds established in purely offline and purely online RL without relying on the single-policy concentrability assumption. \nWhile Li et al. (2023) provided an affirmative answer to this question in the tabular PAC RL case, the question remains unsettled for both the regret-minimizing RL case and the non-tabular case. In this work, building upon recent advancements in offline RL and reward-agnostic exploration, we develop computationally efficient algorithms for both PAC and regret-minimizing RL with linear function approximation, without requiring concentrability on the entire state-action space. We demonstrate that these algorithms achieve sharper error or regret bounds that are no worse than, and can improve on, the optimal sample complexity in offline RL (the first algorithm, for PAC RL) and online RL (the second algorithm, for regret-minimizing RL) in linear Markov decision processes (MDPs), regardless of the quality of the behavior policy. To our knowledge, this work establishes the tightest theoretical guarantees currently available for hybrid RL in linear MDPs.", "keywords": "hybrid RL;reinforcement learning;linear MDPs;minimax", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/8113c0e1e13138e663b8e7195a5c5895c567bc21.zip", "author": "Kevin Tan;Wei Fan;Yuting Wei", "authorids": "~Kevin_Tan3;~Wei_Fan8;~Yuting_Wei1", "gender": "M;;F", "homepage": ";;https://yutingwei.github.io/", "dblp": "47/3097;;184/3856", "google_scholar": "LJUS_7IAAAAJ;t3HOsqwAAAAJ;fsbXdAYAAAAJ", "orcid": "0009-0004-9844-5135;;", "linkedin": "hetankevin;;", "or_profile": "~Kevin_Tan3;~Wei_Fan8;~Yuting_Wei1", "aff": "Wharton Statistics Department, The Wharton School;The Wharton School, University of Pennsylvania;The Wharton School, University of Pennsylvania", "aff_domain": "statistics.wharton.upenn.edu;wharton.upenn.edu;wharton.upenn.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\ntan2024hybrid,\ntitle={Hybrid Reinforcement Learning Breaks Sample Size Barriers In Linear {MDP}s},\nauthor={Kevin Tan and Wei Fan and Yuting Wei},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=bPuYxFBHyI}\n}", "github": "", "reviewers": "ujdT;stbS;6qEJ;SSHJ", "pdf_size": 1366405, "rating": "6;6;6;7", "confidence": "3;3;3;3", "soundness": "2;3;3;3", "novelty": "2;2;2;3", "presentation": "3;3;3;3", "wc_summary": "64;69;56;105", "wc_strengths": "216;18;56;86", "wc_weaknesses": "134;33;35;46", "wc_questions": "122;58;91;2", "wc_limitations": "9;1;25;1", "wc_review": "545;179;263;240", "wc_reply_reviewers": "95;0;41;38", "wc_reply_authors": "84;84;84;84", "reply_reviewers": "1;0;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 73.5, 18.76832437912346 ], "wc_strengths_avg": [ 94.0, 74.444610281739 ], "wc_weaknesses_avg": [ 62.0, 41.862871377868956 ], "wc_questions_avg": [ 68.25, 44.443081576326364 ], "wc_limitations_avg": [ 9.0, 9.797958971132712 ], "wc_review_avg": [ 306.75, 140.93682095180094 ], "wc_reply_reviewers_avg": [ 43.5, 33.84154251803543 ], "wc_reply_authors_avg": [ 84.0, 0.0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7499934323396959694&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "statistics.wharton.upenn.edu;wharton.upenn.edu;wharton.upenn.edu", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Wharton School;University of Pennsylvania", "aff_unique_dep": "Wharton Statistics Department;The Wharton School", "aff_unique_url": "https://www.wharton.upenn.edu;https://www.wharton.upenn.edu", "aff_unique_abbr": "Wharton;UPenn Wharton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "One Token to Seg Them All: Language Instructed Reasoning Segmentation in Videos", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94482", "id": "bQMevGCYVM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=bQMevGCYVM", "openreview": "https://openreview.net/forum?id=bQMevGCYVM", "poster": "/media/PosterPDFs/NeurIPS%202024/94482.png?t=1731665677.6475294", "project": "", "author_site": "Zechen Bai, Tong He, Haiyang Mei, Pichao WANG, Ziteng Gao, Joya Chen, liulei, Zheng Zhang, Mike Zheng Shou", "tldr": "", "abstract": "We introduce VideoLISA, a video-based multimodal large language model designed to tackle the problem of language-instructed reasoning segmentation in videos. Leveraging the reasoning capabilities and world knowledge of large language models, and augmented by the Segment Anything Model, VideoLISA generates temporally consistent segmentation masks in videos based on language instructions. Existing image-based methods, such as LISA, struggle with video tasks due to the additional temporal dimension, which requires temporal dynamic understanding and consistent segmentation across frames. VideoLISA addresses these challenges by integrating a Sparse Dense Sampling strategy into the video-LLM, which balances temporal context and spatial detail within computational constraints. Additionally, we propose a One-Token-Seg-All approach using a specially designed token, enabling the model to segment and track objects across multiple frames. Extensive evaluations on diverse benchmarks, including our newly introduced ReasonVOS benchmark, demonstrate VideoLISA's superior performance in video object segmentation tasks involving complex reasoning, temporal understanding, and object tracking. While optimized for videos, VideoLISA also shows promising generalization to image segmentation, revealing its potential as a unified foundation model for language-instructed object segmentation. Code and model will be available at: https://github.com/showlab/VideoLISA.", "keywords": "Video Object Segmentation;Multimodal Large Language Model;Reasoning Segmentation", "primary_area": "machine_vision", "supplementary_material": "", "author": "Zechen Bai;Tong He;Haiyang Mei;Pichao WANG;Ziteng Gao;Joya Chen;liulei;Zheng Zhang;Mike Zheng Shou", "authorids": "~Zechen_Bai1;~Tong_He5;~Haiyang_Mei1;~Pichao_WANG3;~Ziteng_Gao1;~Joya_Chen1;~liulei2;~Zheng_Zhang1;~Mike_Zheng_Shou1", "gender": "M;M;M;M;M;M;M;M;", "homepage": "https://www.baizechen.site/;https://hetong007.github.io/;https://mhaiyang.github.io/;https://wangpichao.github.io/;https://sebgao.github.io/;https://chenjoya.github.io/;;https://shanghai.nyu.edu/academics/faculty/directory/zheng-zhang;http://www.columbia.edu/~zs2262/", "dblp": "256/5272;02/1554-2;234/9586;;247/1231;247/9518;;;284/0807", "google_scholar": "aIdQ8GwAAAAJ;hV5D8GYAAAAJ;yfq6OSkAAAAJ;;fbSH2CgAAAAJ;https://scholar.google.com.ph/citations?user=IIx9dc8AAAAJ;f2T6BzUAAAAJ;https://scholar.google.com.hk/citations?user=k0KiE4wAAAAJ;h1-3lSoAAAAJ", "orcid": ";;0000-0003-3549-9684;;;;;;", "linkedin": ";;haiyang-mei-9832b622a/;;;;;;", "or_profile": "~Zechen_Bai1;~Tong_He5;~Haiyang_Mei1;~Pichao_WANG3;~Ziteng_Gao1;~Joya_Chen1;~liulei2;~Zheng_Zhang1;~Zheng_Shou1", "aff": "National University of Singapore;Amazon;National University of Singapore;Amazon;National University of Singapore;National University of Singapore;Amazon;Amazon;National University of Singapore", "aff_domain": "u.nus.edu;amazon.com;nus.edu.sg;amazon.com;nus.edu;u.nus.edu;amazon.com;amazon.com;nus.edu.sg", "position": "PhD student;Researcher;Postdoc;Researcher;PhD student;PhD student;Researcher;Senior Principal Scientist;Assistant Professor", "bibtex": "@inproceedings{\nbai2024one,\ntitle={One Token to Seg Them All: Language Instructed Reasoning Segmentation in Videos},\nauthor={Zechen Bai and Tong He and Haiyang Mei and Pichao WANG and Ziteng Gao and Joya Chen and liulei and Zheng Zhang and Mike Zheng Shou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=bQMevGCYVM}\n}", "github": "", "reviewers": "oRTn;vGHj;urmZ", "pdf_size": 9617086, "rating": "5;5;5", "confidence": "4;5;3", "soundness": "3;3;3", "novelty": "3;2;2", "presentation": "3;3;3", "wc_summary": "66;70;42", "wc_strengths": "13;47;29", "wc_weaknesses": "29;87;60", "wc_questions": "82;81;6", "wc_limitations": "29;8;8", "wc_review": "219;293;145", "wc_reply_reviewers": "33;228;20", "wc_reply_authors": "129;604;0", "reply_reviewers": "1;2;1", "reply_authors": "2;3;1", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 59.333333333333336, 12.364824660660938 ], "wc_strengths_avg": [ 29.666666666666668, 13.888444437333106 ], "wc_weaknesses_avg": [ 58.666666666666664, 23.697163449568293 ], "wc_questions_avg": [ 56.333333333333336, 35.593382655893905 ], "wc_limitations_avg": [ 15.0, 9.899494936611665 ], "wc_review_avg": [ 219.0, 60.42074698865172 ], "wc_reply_reviewers_avg": [ 93.66666666666667, 95.1361597337673 ], "wc_reply_authors_avg": [ 244.33333333333334, 259.71822337979205 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9021054469329173514&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "u.nus.edu;amazon.com;nus.edu.sg;amazon.com;nus.edu;u.nus.edu;amazon.com;amazon.com;nus.edu.sg", "author_num": 9, "aff_unique_index": "0;1;0;1;0;0;1;1;0", "aff_unique_norm": "National University of Singapore;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.nus.edu.sg;https://www.amazon.com", "aff_unique_abbr": "NUS;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1;0;0;1;1;0", "aff_country_unique": "Singapore;United States" }, { "title": "Denoising Diffusion Path: Attribution Noise Reduction with An Auxiliary Diffusion Model", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94481", "id": "bSv0MBDBF2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=bSv0MBDBF2", "openreview": "https://openreview.net/forum?id=bSv0MBDBF2", "poster": "/media/PosterPDFs/NeurIPS%202024/94481.png?t=1729740976.333856", "project": "", "author_site": "Yiming Lei, Zilong Li, Junping Zhang, Hongming Shan", "tldr": "", "abstract": "The explainability of deep neural networks (DNNs) is critical for trust and reliability in AI systems. Path-based attribution methods, such as integrated gradients (IG), aim to explain predictions by accumulating gradients along a path from a baseline to the target image. However, noise accumulated during this process can significantly distort the explanation. While existing methods primarily concentrate on finding alternative paths to circumvent noise, they overlook a critical issue: intermediate-step images frequently diverge from the distribution of training data, further intensifying the impact of noise. This work presents a novel Denoising Diffusion Path (DDPath) to tackle this challenge by harnessing the power of diffusionmodels for denoising. By exploiting the inherent ability of diffusion models to progressively remove noise from an image, DDPath constructs a piece-wise linear path. Each segment of this path ensures that samples drawn from a Gaussian distribution are centered around the target image. This approach facilitates a gradual reduction of noise along the path. We further demonstrate that DDPath adheres to essential axiomatic properties for attribution methods and can be seamlessly integrated with existing methods such as IG. Extensive experimental results demonstrate that DDPath can significantly reduce noise in the attributions\u2014resulting in clearer explanations\u2014and achieves better quantitative results than traditional path-based methods.", "keywords": "Diffusion Models;Interpretability;Integrated Gradients;Path Integration", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Yiming Lei;Zilong Li;Junping Zhang;Hongming Shan", "authorids": "~Yiming_Lei1;~Zilong_Li1;~Junping_Zhang2;~Hongming_Shan1", "gender": "M;M;M;M", "homepage": ";;http://www.pami.fudan.edu.cn;http://hmshan.io/", "dblp": ";;02/5388.html;184/8229", "google_scholar": "hvtuRWsAAAAJ;xriCV6QAAAAJ;Aib_NTYAAAAJ;https://scholar.google.co.uk/citations?user=RYfSzKwAAAAJ", "orcid": "0000-0002-1349-7074;;;0000-0002-0604-3197", "linkedin": ";;;", "or_profile": "~Yiming_Lei1;~Zilong_Li1;~Junping_Zhang2;~Hongming_Shan1", "aff": "Fudan University;Fudan University;Fudan University;Fudan University", "aff_domain": "fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn", "position": "Postdoc;PhD student;Professor;Full Professor", "bibtex": "@inproceedings{\nlei2024denoising,\ntitle={Denoising Diffusion Path: Attribution Noise Reduction with An Auxiliary Diffusion Model},\nauthor={Yiming Lei and Zilong Li and Junping Zhang and Hongming Shan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=bSv0MBDBF2}\n}", "github": "", "reviewers": "rjLS;2oA3;qJDk", "pdf_size": 10098940, "rating": "5;6;7", "confidence": "2;3;3", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "2;3;3", "wc_summary": "42;72;80", "wc_strengths": "37;38;103", "wc_weaknesses": "72;70;182", "wc_questions": "2;96;62", "wc_limitations": "6;5;12", "wc_review": "159;281;439", "wc_reply_reviewers": "0;30;55", "wc_reply_authors": "38;21;314", "reply_reviewers": "0;1;2", "reply_authors": "2;2;3", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 64.66666666666667, 16.35712552851373 ], "wc_strengths_avg": [ 59.333333333333336, 30.879694874715902 ], "wc_weaknesses_avg": [ 108.0, 52.33227175144097 ], "wc_questions_avg": [ 53.333333333333336, 38.861577711439125 ], "wc_limitations_avg": [ 7.666666666666667, 3.0912061651652345 ], "wc_review_avg": [ 293.0, 114.62402307835241 ], "wc_reply_reviewers_avg": [ 28.333333333333332, 22.48456260538674 ], "wc_reply_authors_avg": [ 124.33333333333333, 134.294038917924 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1410289581310769573&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Fudan University", "aff_unique_dep": "", "aff_unique_url": "https://www.fudan.edu.cn", "aff_unique_abbr": "Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "InfLLM: Training-Free Long-Context Extrapolation for LLMs with an Efficient Context Memory", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94480", "id": "bTHFrqhASY", "proceeding": "", "pdf": "https://openreview.net/pdf?id=bTHFrqhASY", "openreview": "https://openreview.net/forum?id=bTHFrqhASY", "poster": "", "project": "", "author_site": "Chaojun Xiao, Pengle Zhang, Xu Han, Guangxuan Xiao, Yankai Lin, Zhengyan Zhang, Zhiyuan Liu, Maosong Sun", "tldr": "", "abstract": "Large language models (LLMs) have emerged as a cornerstone in real-world applications with lengthy streaming inputs (e.g., LLM-driven agents). However, existing LLMs, pre-trained on sequences with a restricted maximum length, cannot process longer sequences due to the out-of-domain and distraction issues. Common solutions often involve continual pre-training on longer sequences, which will introduce expensive computational overhead and uncontrollable change in model capabilities. In this paper, we unveil the intrinsic capacity of LLMs for understanding extremely long sequences without any fine-tuning. To this end, we introduce a training-free memory-based method, InfLLM. Specifically, InfLLM stores distant contexts into additional memory units and employs an efficient mechanism to lookup token-relevant units for attention computation. Thereby, InfLLM allows LLMs to efficiently process long sequences with a limited context window and well capture long-distance dependencies. Without any training, InfLLM enables LLMs that are pre-trained on sequences consisting of a few thousand tokens to achieve comparable performance with competitive baselines that continually train these LLMs on long sequences. Even when the sequence length is scaled to 1,024K, InfLLM still effectively captures long-distance dependencies. Our code can be found at https://github.com/thunlp/InfLLM.", "keywords": "Long context;Large Language Models", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/bf6beb791a4634020798e89d2bc77d5a7349a9b3.zip", "author": "Chaojun Xiao;Pengle Zhang;Xu Han;Guangxuan Xiao;Yankai Lin;Zhengyan Zhang;Zhiyuan Liu;Maosong Sun", "authorids": "~Chaojun_Xiao1;~Pengle_Zhang1;~Xu_Han2;~Guangxuan_Xiao1;~Yankai_Lin1;~Zhengyan_Zhang1;~Zhiyuan_Liu1;~Maosong_Sun1", "gender": "M;;;;M;M;M;M", "homepage": "https://xcjthu.github.io/;;;;https://linyankai.github.io/;;http://nlp.csai.tsinghua.edu.cn/~lzy;https://www.cs.tsinghua.edu.cn/csen/info/1312/4394.htm", "dblp": "223/4856;;;;161/0001.html;;53/3245-1;95/3291-1", "google_scholar": "xoC8smYAAAAJ;;;;https://scholar.google.com.hk/citations?user=j8K1FqEAAAAJ;;dT0v5u0AAAAJ;https://scholar.google.com.tw/citations?user=zIgT0HMAAAAJ", "orcid": ";;;;0000-0002-9182-8158;;0000-0002-7709-2543;", "linkedin": ";;;;;;;", "or_profile": "~Chaojun_Xiao1;~Pengle_Zhang1;~Xu_Han2;~Guangxuan_Xiao1;~Yankai_Lin1;~Zhengyan_Zhang1;~Zhiyuan_Liu1;~Maosong_Sun1", "aff": "Tsinghua University;;;;Renmin University of China;Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;;;;ruc.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;;;;Assistant Professor;PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nxiao2024infllm,\ntitle={Inf{LLM}: Training-Free Long-Context Extrapolation for {LLM}s with an Efficient Context Memory},\nauthor={Chaojun Xiao and Pengle Zhang and Xu Han and Guangxuan Xiao and Yankai Lin and Zhengyan Zhang and Zhiyuan Liu and Maosong Sun},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=bTHFrqhASY}\n}", "github": "", "reviewers": "erR9;a1ZB;Srrj;5j15", "pdf_size": 939736, "rating": "3;5;5;7", "confidence": "4;5;5;4", "soundness": "3;2;2;3", "novelty": "1;2;2;2", "presentation": "1;3;3;2", "wc_summary": "84;60;50;118", "wc_strengths": "14;48;29;41", "wc_weaknesses": "296;125;138;130", "wc_questions": "7;89;4;99", "wc_limitations": "18;3;66;1", "wc_review": "419;325;287;389", "wc_reply_reviewers": "1123;117;49;94", "wc_reply_authors": "922;319;92;64", "reply_reviewers": "2;1;1;1", "reply_authors": "4;3;2;2", "rating_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 78.0, 26.19160170741759 ], "wc_strengths_avg": [ 33.0, 12.90348790056394 ], "wc_weaknesses_avg": [ 172.25, 71.59739869576268 ], "wc_questions_avg": [ 49.75, 44.40368790990226 ], "wc_limitations_avg": [ 22.0, 26.239283526803852 ], "wc_review_avg": [ 355.0, 51.90375708944392 ], "wc_reply_reviewers_avg": [ 345.75, 449.41149017353797 ], "wc_reply_authors_avg": [ 349.25, 345.1458930655267 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8733104569917954880&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "tsinghua.edu.cn;;;;ruc.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 8, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "Tsinghua University;Renmin University of China", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;http://www.ruc.edu.cn", "aff_unique_abbr": "THU;RUC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "The Fairness-Quality Tradeoff in Clustering", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94479", "id": "bUi2xECa7w", "proceeding": "", "pdf": "https://openreview.net/pdf?id=bUi2xECa7w", "openreview": "https://openreview.net/forum?id=bUi2xECa7w", "poster": "/media/PosterPDFs/NeurIPS%202024/94479.png?t=1733506099.2020802", "project": "", "author_site": "Rashida Hakim, Ana-Andreea Stoica, Christos Papadimitriou, Mihalis Yannakakis", "tldr": "", "abstract": "Fairness in clustering has been considered extensively in the past; however, the trade-off between the two objectives --- e.g., can we sacrifice just a little in the quality of the clustering to significantly increase fairness, or vice-versa? --- has rarely been addressed. We introduce novel algorithms for tracing the complete trade-off curve, or Pareto front, between quality and fairness in clustering problems; that is, computing all clusterings that are not dominated in both objectives by other clusterings. Unlike previous work that deals with specific objectives for quality and fairness, we deal with all objectives for fairness and quality in two general classes encompassing most of the special cases addressed in previous work. Our algorithm must take exponential time in the worst case as the Parero front itself can be exponential. Even when the Pareto front is polynomial, our algorithm may take exponential time, and we prove that this is inevitable unless P = NP. However, we also present a new polynomial-time algorithm for computing the entire Pareto front when the cluster centers are fixed, and for perhaps the most natural fairness objective: minimizing the sum, over all clusters, of the imbalance between the two groups in each cluster.", "keywords": "clustering;algorithmic-fairness;multiobjective-optimization", "primary_area": "fairness", "supplementary_material": "", "author": "Rashida Hakim;Ana-Andreea Stoica;Christos Papadimitriou;Mihalis Yannakakis", "authorids": "~Rashida_Hakim1;~Ana-Andreea_Stoica1;~Christos_Papadimitriou2;~Mihalis_Yannakakis1", "gender": "F;;M;", "homepage": "https://www.rashidahakim.org/;;;http://www.cs.columbia.edu/~mihalis", "dblp": ";;p/CHPapadimitriou;", "google_scholar": ";;;", "orcid": ";;;", "linkedin": "rashida-hakim/;;;", "or_profile": "~Rashida_Hakim1;~Ana-Andreea_Stoica1;~Christos_Papadimitriou2;~Mihalis_Yannakakis1", "aff": "Columbia University;;Columbia University;Columbia University", "aff_domain": "columbia.edu;;columbia.edu;columbia.edu", "position": "PhD student;;Full Professor;Full Professor", "bibtex": "@inproceedings{\nhakim2024the,\ntitle={The Fairness-Quality Tradeoff in Clustering},\nauthor={Rashida Hakim and Ana-Andreea Stoica and Christos Papadimitriou and Mihalis Yannakakis},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=bUi2xECa7w}\n}", "github": "", "reviewers": "eL7E;95VK;RJfp;TW4o", "pdf_size": 1196088, "rating": "4;6;6;6", "confidence": "4;4;4;3", "soundness": "3;3;3;4", "novelty": "2;3;3;2", "presentation": "3;3;3;3", "wc_summary": "68;246;172;201", "wc_strengths": "52;30;91;57", "wc_weaknesses": "238;48;214;127", "wc_questions": "10;17;44;28", "wc_limitations": "4;1;9;8", "wc_review": "372;342;530;421", "wc_reply_reviewers": "0;0;42;123", "wc_reply_authors": "0;0;0;51", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;1;2", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 171.75, 65.4460655807513 ], "wc_strengths_avg": [ 57.5, 21.84605227495348 ], "wc_weaknesses_avg": [ 156.75, 75.15109779637288 ], "wc_questions_avg": [ 24.75, 12.833062767710599 ], "wc_limitations_avg": [ 5.5, 3.2015621187164243 ], "wc_review_avg": [ 416.25, 71.47158526295607 ], "wc_reply_reviewers_avg": [ 41.25, 50.2164066814821 ], "wc_reply_authors_avg": [ 12.75, 22.083647796503186 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18306171448022150699&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "columbia.edu;;columbia.edu;columbia.edu", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Columbia University", "aff_unique_dep": "", "aff_unique_url": "https://www.columbia.edu", "aff_unique_abbr": "Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "SuperVLAD: Compact and Robust Image Descriptors for Visual Place Recognition", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94478", "id": "bZpZMdY1sj", "proceeding": "", "pdf": "https://openreview.net/pdf?id=bZpZMdY1sj", "openreview": "https://openreview.net/forum?id=bZpZMdY1sj", "poster": "/media/PosterPDFs/NeurIPS%202024/94478.png?t=1731050903.702651", "project": "", "author_site": "Feng Lu, Xinyao Zhang, Canming Ye, Shuting Dong, Lijun Zhang, Xiangyuan Lan, Chun Yuan", "tldr": "", "abstract": "Visual place recognition (VPR) is an essential task for multiple applications such as augmented reality and robot localization. Over the past decade, mainstream methods in the VPR area have been to use feature representation based on global aggregation, as exemplified by NetVLAD. These features are suitable for large-scale VPR and robust against viewpoint changes. However, the VLAD-based aggregation methods usually learn a large number of (e.g., 64) clusters and their corresponding cluster centers, which directly leads to a high dimension of the yielded global features. More importantly, when there is a domain gap between the data in training and inference, the cluster centers determined on the training set are usually improper for inference, resulting in a performance drop. To this end, we first attempt to improve NetVLAD by removing the cluster center and setting only a small number of (e.g., only 4) clusters. The proposed method not only simplifies NetVLAD but also enhances the generalizability across different domains. We name this method SuperVLAD. In addition, by introducing ghost clusters that will not be retained in the final output, we further propose a very low-dimensional 1-Cluster VLAD descriptor, which has the same dimension as the output of GeM pooling but performs notably better. Experimental results suggest that, when paired with a transformer-based backbone, our SuperVLAD shows better domain generalization performance than NetVLAD with significantly fewer parameters. The proposed method also surpasses state-of-the-art methods with lower feature dimensions on several benchmark datasets. The code is available at https://github.com/lu-feng/SuperVLAD.", "keywords": "Visual Place Recognition;Image Descriptors;VLAD;SuperVLAD;1-Cluster VLAD", "primary_area": "machine_vision", "supplementary_material": "", "author": "Feng Lu;Xinyao Zhang;Canming Ye;Shuting Dong;Lijun Zhang;Xiangyuan Lan;Chun Yuan", "authorids": "~Feng_Lu6;~Xinyao_Zhang4;~Canming_Ye1;~Shuting_Dong1;~Lijun_Zhang7;~Xiangyuan_Lan4;~Chun_Yuan1", "gender": ";F;M;F;F;M;M", "homepage": "https://github.com/Lu-Feng;https://github.com/xinyaozhang;https://github.com/2catycm;;;https://www.sigs.tsinghua.edu.cn/fg3/105064.jhtml;https://www.comp.hkbu.edu.hk/v1/?page=profile&id=lanxiangyuan", "dblp": ";;;;;;151/8902", "google_scholar": "Gws7FKMAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=zh-CN;;https://scholar.google.com.hk/citations?user=fYdxi2sAAAAJ;https://scholar.google.com.hk/citations?user=c3iwWRcAAAAJ", "orcid": "0000-0002-5236-2287;;0009-0001-6190-8261;;0000-0002-9453-4032;;", "linkedin": ";;;;;;", "or_profile": "~Feng_Lu6;~Xinyao_Zhang4;~Canming_Ye1;~Shuting_Dong1;~Lijun_Zhang7;~Chun_Yuan1;~xiangyuan_lan1", "aff": "Tsinghua University;Tsinghua University;Southern University of Science and Technology;Tsinghua University;University of Chinese Academy of Sciences;Tsinghua University;Pengcheng Laboratory", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;mail.sustech.edu.cn;tsinghua.edu.cn;cigit.ac.cn;tsinghua.edu.cn;pcl.ac.cn", "position": "PhD student;MS student;Undergrad student;PhD student;PhD student;Full Professor;Researcher", "bibtex": "@inproceedings{\nlu2024supervlad,\ntitle={Super{VLAD}: Compact and Robust Image Descriptors for Visual Place Recognition},\nauthor={Feng Lu and Xinyao Zhang and Canming Ye and Shuting Dong and Lijun Zhang and Xiangyuan Lan and Chun Yuan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=bZpZMdY1sj}\n}", "github": "", "reviewers": "w8L8;7MN7;wUjJ;utGd;TmXY", "pdf_size": 18909847, "rating": "6;6;6;6;7", "confidence": "4;5;5;3;4", "soundness": "3;4;3;3;3", "novelty": "2;3;2;3;3", "presentation": "3;4;3;3;3", "wc_summary": "177;92;72;30;72", "wc_strengths": "111;96;107;44;177", "wc_weaknesses": "259;108;174;195;26", "wc_questions": "84;29;1;103;134", "wc_limitations": "15;105;1;56;32", "wc_review": "646;430;355;428;441", "wc_reply_reviewers": "0;0;13;76;0", "wc_reply_authors": "0;0;63;22;0", "reply_reviewers": "0;0;1;1;0", "reply_authors": "1;1;3;2;1", "rating_avg": [ 6.2, 0.39999999999999997 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 88.6, 48.602880573068916 ], "wc_strengths_avg": [ 107.0, 42.44054665057933 ], "wc_weaknesses_avg": [ 152.4, 79.48987356890184 ], "wc_questions_avg": [ 70.2, 48.626741614054296 ], "wc_limitations_avg": [ 41.8, 36.53710442823843 ], "wc_review_avg": [ 460.0, 97.88360434720414 ], "wc_reply_reviewers_avg": [ 17.8, 29.532355138051557 ], "wc_reply_authors_avg": [ 17.0, 24.52753554680943 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.1336306209562122, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18396757339903819605&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 0, "email": "tsinghua.edu.cn;tsinghua.edu.cn;mail.sustech.edu.cn;tsinghua.edu.cn;cigit.ac.cn;tsinghua.edu.cn;pcl.ac.cn", "author_num": 7, "aff_unique_index": "0;0;1;0;2;0;3", "aff_unique_norm": "Tsinghua University;Southern University of Science and Technology;University of Chinese Academy of Sciences;Pengcheng Laboratory", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.sustech.edu.cn;http://www.ucas.ac.cn;", "aff_unique_abbr": "THU;SUSTech;UCAS;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Fast yet Safe: Early-Exiting with Risk Control", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94477", "id": "bbFjpasRgs", "proceeding": "", "pdf": "https://openreview.net/pdf?id=bbFjpasRgs", "openreview": "https://openreview.net/forum?id=bbFjpasRgs", "poster": "/media/PosterPDFs/NeurIPS%202024/94477.png?t=1732010222.1753237", "project": "", "author_site": "Metod Jazbec, Alexander Timans, Tin Had\u017ei Veljkovi\u0107, Kaspar Sakmann, Dan Zhang, Christian Andersson Naesseth, Eric Nalisnick", "tldr": "", "abstract": "Scaling machine learning models significantly improves their performance. However, such gains come at the cost of inference being slow and resource-intensive. Early-exit neural networks (EENNs) offer a promising solution: they accelerate inference by allowing intermediate layers to exit and produce a prediction early. Yet a fundamental issue with EENNs is how to determine when to exit without severely degrading performance. In other words, when is it 'safe' for an EENN to go 'fast'? To address this issue, we investigate how to adapt frameworks of risk control to EENNs. Risk control offers a distribution-free, post-hoc solution that tunes the EENN's exiting mechanism so that exits only occur when the output is of sufficient quality. We empirically validate our insights on a range of vision and language tasks, demonstrating that risk control can produce substantial computational savings, all the while preserving user-specified performance goals.", "keywords": "Early-exit neural networks;risk control;efficiency;robustness", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Metod Jazbec;Alexander Timans;Tin Had\u017ei Veljkovi\u0107;Kaspar Sakmann;Dan Zhang;Christian A. Naesseth;Eric Nalisnick", "authorids": "~Metod_Jazbec1;~Alexander_Timans1;~Tin_Had\u017ei_Veljkovi\u01071;~Kaspar_Sakmann1;~Dan_Zhang1;~Christian_A._Naesseth1;~Eric_Nalisnick1", "gender": "M;M;M;;;M;M", "homepage": ";https://alextimans.github.io/;;https://ksakmann.github.io/;;https://enalisnick.github.io;https://naesseth.github.io/", "dblp": ";354/8299;;177/0713.html;21/802-17;136/4057;146/0902", "google_scholar": "https://scholar.google.ch/citations?user=lDEYafIAAAAJ;tgiKFH4AAAAJ;;cmIHK9UAAAAJ;https://scholar.google.de/citations?user=yazO-mMAAAAJ;cb1ZN7AAAAAJ;GQ6rOssAAAAJ", "orcid": ";0009-0006-9395-5560;;0000-0002-5342-5921;0000-0003-0930-9162;;", "linkedin": "metod-jazbec-300412102/;alexander-timans/;tin-hv/;kasparsakmann/;;;", "or_profile": "~Metod_Jazbec1;~Alexander_Timans1;~Tin_Had\u017ei_Veljkovi\u01071;~Kaspar_Sakmann1;~Dan_Zhang1;~Eric_Nalisnick1;~Christian_A_Naesseth1", "aff": "University of Amsterdam;University of Amsterdam;;Robert Bosch GmbH, Bosch;Robert Bosch GmbH, Bosch;University of Amsterdam;University of Amsterdam", "aff_domain": "uva.nl;uva.nl;;de.bosch.com;de.bosch.com;uva.nl;uva.nl", "position": "PhD student;PhD student;;Researcher;Research Scientist;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\njazbec2024fast,\ntitle={Fast yet Safe: Early-Exiting with Risk Control},\nauthor={Metod Jazbec and Alexander Timans and Tin Had{\\v{z}}i Veljkovi{\\'c} and Kaspar Sakmann and Dan Zhang and Christian A. Naesseth and Eric Nalisnick},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=bbFjpasRgs}\n}", "github": "", "reviewers": "c7rs;q2CX;LhAs;qAvq", "pdf_size": 6651109, "rating": "4;6;6;7", "confidence": "3;3;4;3", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "87;117;59;90", "wc_strengths": "96;27;64;98", "wc_weaknesses": "70;171;33;141", "wc_questions": "3;31;3;73", "wc_limitations": "1;30;1;10", "wc_review": "257;376;160;412", "wc_reply_reviewers": "9;0;8;83", "wc_reply_authors": "136;60;36;73", "reply_reviewers": "1;0;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 88.25, 20.535031044534605 ], "wc_strengths_avg": [ 71.25, 28.89095879336648 ], "wc_weaknesses_avg": [ 103.75, 54.89706276295664 ], "wc_questions_avg": [ 27.5, 28.648734701553575 ], "wc_limitations_avg": [ 10.5, 11.84271928232701 ], "wc_review_avg": [ 301.25, 99.70299644443992 ], "wc_reply_reviewers_avg": [ 25.0, 33.66749173906485 ], "wc_reply_authors_avg": [ 76.25, 36.961973702712356 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.13245323570650439, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12079908536336850982&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "uva.nl;uva.nl;;de.bosch.com;de.bosch.com;uva.nl;uva.nl", "author_num": 7, "aff_unique_index": "0;0;1;1;0;0", "aff_unique_norm": "University of Amsterdam;Robert Bosch GmbH", "aff_unique_dep": ";", "aff_unique_url": "https://www.uva.nl;https://www.bosch.com", "aff_unique_abbr": "UvA;Bosch", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1;0;0", "aff_country_unique": "Netherlands;Germany" }, { "title": "Challenges of Generating Structurally Diverse Graphs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94476", "id": "bbGPoL1NLo", "proceeding": "", "pdf": "https://openreview.net/pdf?id=bbGPoL1NLo", "openreview": "https://openreview.net/forum?id=bbGPoL1NLo", "poster": "", "project": "", "author_site": "Fedor Velikonivtsev, Mikhail Mironov, Liudmila Prokhorenkova", "tldr": "", "abstract": "For many graph-related problems, it can be essential to have a set of structurally diverse graphs. For instance, such graphs can be used for testing graph algorithms or their neural approximations. However, to the best of our knowledge, the problem of generating structurally diverse graphs has not been explored in the literature. In this paper, we fill this gap. First, we discuss how to define diversity for a set of graphs, why this task is non-trivial, and how one can choose a proper diversity measure. Then, for a given diversity measure, we propose and compare several algorithms optimizing it: we consider approaches based on standard random graph models, local graph optimization, genetic algorithms, and neural generative models. We show that it is possible to significantly improve diversity over basic random graph generators. Additionally, our analysis of generated graphs allows us to better understand the properties of graph distances: depending on which diversity measure is used for optimization, the obtained graphs may possess very different structural properties which gives a better understanding of the graph distance underlying the diversity measure.", "keywords": "diverse graphs;random graph model;graph generative model;graph distance", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Fedor Velikonivtsev;Mikhail Mironov;Liudmila Prokhorenkova", "authorids": "~Fedor_Velikonivtsev1;~Mikhail_Mironov1;~Liudmila_Prokhorenkova1", "gender": "M;F;M", "homepage": ";;", "dblp": ";45/11468;", "google_scholar": ";https://scholar.google.ru/citations?user=6JyZlSEAAAAJ;xNdbEhwAAAAJ", "orcid": "0000-0001-5371-8073;;0000-0002-1830-8340", "linkedin": "mikhail-mironov-66663b1a1/;;", "or_profile": "~Mikhail_Mironov1;~Liudmila_Prokhorenkova1;~Fyodor_Velikonivtsev1", "aff": "Noeon;Yandex;Almazov National Medical Research Centre", "aff_domain": "noeon.ai;yandex-team.ru;almazovcentre.ru", "position": "Researcher;Researcher;Student", "bibtex": "@inproceedings{\nvelikonivtsev2024challenges,\ntitle={Challenges of Generating Structurally Diverse Graphs},\nauthor={Fedor Velikonivtsev and Mikhail Mironov and Liudmila Prokhorenkova},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=bbGPoL1NLo}\n}", "github": "", "reviewers": "VyJM;wU7k;myRm;h5vQ", "pdf_size": 986531, "rating": "4;5;5;7", "confidence": "4;3;2;5", "soundness": "2;2;3;4", "novelty": "2;2;2;4", "presentation": "2;3;2;4", "wc_summary": "130;98;49;119", "wc_strengths": "67;60;37;13", "wc_weaknesses": "249;84;64;61", "wc_questions": "90;29;1;85", "wc_limitations": "4;1;4;1", "wc_review": "540;272;155;279", "wc_reply_reviewers": "212;11;0;13", "wc_reply_authors": "909;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "3;1;1;1", "rating_avg": [ 5.25, 1.0897247358851685 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 99.0, 31.072495876578696 ], "wc_strengths_avg": [ 44.25, 21.182244923520265 ], "wc_weaknesses_avg": [ 114.5, 78.15529412650176 ], "wc_questions_avg": [ 51.25, 37.6189779233833 ], "wc_limitations_avg": [ 2.5, 1.5 ], "wc_review_avg": [ 311.5, 140.81992046582047 ], "wc_reply_reviewers_avg": [ 59.0, 88.47315977176355 ], "wc_reply_authors_avg": [ 227.25, 393.60854602002735 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5129891760425771, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4166226358781357362&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "noeon.ai;yandex-team.ru;almazovcentre.ru", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Nooen;Yandex;Almazov National Medical Research Centre", "aff_unique_dep": ";;", "aff_unique_url": ";https://yandex.com;https://almazovcentre.ru/en", "aff_unique_abbr": ";Yandex;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1;1", "aff_country_unique": ";Russian Federation" }, { "title": "Robust Sleep Staging over Incomplete Multimodal Physiological Signals via Contrastive Imagination", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94475", "id": "bc1qt1sZsW", "proceeding": "", "pdf": "https://openreview.net/pdf?id=bc1qt1sZsW", "openreview": "https://openreview.net/forum?id=bc1qt1sZsW", "poster": "/media/PosterPDFs/NeurIPS%202024/94475.png?t=1730783027.9266155", "project": "", "author_site": "Qi Shen, Junchang Xin, Bing Dai, Shudi Zhang, Zhiqiong Wang", "tldr": "", "abstract": "Multimodal physiological signals, such as EEG, EOG and EMG, provide rich and reliable physiological information for automated sleep staging (ASS). However, in the real world, the completeness of various modalities is difficult to guarantee, which seriously affects the performance of ASS based on multimodal learning. Furthermore, the exploration of temporal context information within PTSs is also a serious challenge. To this end, we propose a robust multimodal sleep staging framework named contrastive imagination modality sleep network (CIMSleepNet). Specifically, CIMSleepNet handles the issue of arbitrary modal missing through the combination of modal awareness imagination module (MAIM) and semantic & modal calibration contrastive learning (SMCCL). Among them, MAIM can capture the interaction among modalities by learning the shared representation distribution of all modalities. Meanwhile, SMCCL introduces prior information of semantics and modalities to check semantic consistency while maintaining the uniqueness of each modality. Utilizing the calibration of SMCCL, the data distribution recovered by MAIM is aligned with the real data distribution. We further design a multi-level cross-branch temporal attention mechanism, which can facilitate the mining of interactive temporal context representations at both the intra-epoch and inter-epoch levels. Extensive experiments on five multimodal sleep datasets demonstrate that CIMSleepNet remarkably outperforms other competitive methods under various missing modality patterns. The source code is available at: https://github.com/SQAIYY/CIMSleepNet.", "keywords": "Automated sleep staging; Incomplete modality; Multimodal learning; Contrastive learning; Temporal context learning", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "", "author": "Qi Shen;Junchang Xin;Bing Tian Dai;Shudi Zhang;Zhiqiong Wang", "authorids": "~Qi_Shen2;~Junchang_Xin1;~Bing_Tian_Dai1;~Shudi_Zhang1;~Zhiqiong_Wang1", "gender": "M;M;M;M;F", "homepage": "http://faculty.neu.edu.cn/wangzhiqiong/zh_CN/xsxx/75308/content/6915.htm#xsxx;http://faculty.neu.edu.cn/xinjunchang;http://btdai.net;http://faculty.neu.edu.cn/wangzhiqiong/zh_CN/xsxx/75308/content/6918.htm#xsxxAAAJ&hl=zh-CN&oi=sra;http://faculty.neu.edu.cn/wangzhiqiong/zh_CN/index.htm", "dblp": "46/959-2.html;35/3175;13/5012;04/10181;140/6525", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;wN5sGMQAAAAJ;MeQA7UYAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0001-7096-3732;0000-0003-2077-8269;0000-0002-7245-7021;0009-0005-0629-1296;0000-0002-0095-0378", "linkedin": ";;;;", "or_profile": "~Qi_Shen2;~Junchang_Xin1;~Bing_Tian_Dai1;~Shudi_Zhang1;~Zhiqiong_Wang1", "aff": "Northeastern University;Northeastern University;Singapore Management University;Northeastern University;Northeastern University", "aff_domain": "stu.neu.edu.cn;neu.edu.cn;smu.edu.sg;neu.edu;neu.edu.cn", "position": "PhD student;Full Professor;Assistant Professor;PhD student;Full Professor", "bibtex": "@inproceedings{\nshen2024robust,\ntitle={Robust Sleep Staging over Incomplete Multimodal Physiological Signals via Contrastive Imagination},\nauthor={Qi Shen and Junchang Xin and Bing Tian Dai and Shudi Zhang and Zhiqiong Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=bc1qt1sZsW}\n}", "github": "", "reviewers": "PgT4;HATA;RxY3;QG76", "pdf_size": 28391508, "rating": "4;6;6;7", "confidence": "4;4;3;3", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "48;72;100;48", "wc_strengths": "75;43;94;113", "wc_weaknesses": "134;294;40;34", "wc_questions": "65;73;67;68", "wc_limitations": "1;4;2;24", "wc_review": "323;486;303;287", "wc_reply_reviewers": "0;61;0;0", "wc_reply_authors": "542;394;99;99", "reply_reviewers": "0;1;0;0", "reply_authors": "3;3;2;2", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 67.0, 21.42428528562855 ], "wc_strengths_avg": [ 81.25, 25.849323008543184 ], "wc_weaknesses_avg": [ 125.5, 105.05593748094392 ], "wc_questions_avg": [ 68.25, 2.947456530637899 ], "wc_limitations_avg": [ 7.75, 9.443913383762052 ], "wc_review_avg": [ 349.75, 79.69120089445258 ], "wc_reply_reviewers_avg": [ 15.25, 26.413774815425377 ], "wc_reply_authors_avg": [ 283.5, 191.77656269732233 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.6882472016116854, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4272596719631286866&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "stu.neu.edu.cn;neu.edu.cn;smu.edu.sg;neu.edu;neu.edu.cn", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Northeastern University;Singapore Management University", "aff_unique_dep": ";", "aff_unique_url": "https://www.northeastern.edu;https://www.smu.edu.sg", "aff_unique_abbr": "NEU;SMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "United States;Singapore" }, { "title": "DeTikZify: Synthesizing Graphics Programs for Scientific Figures and Sketches with TikZ", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94474", "id": "bcVLFQCOjc", "proceeding": "", "pdf": "https://openreview.net/pdf?id=bcVLFQCOjc", "openreview": "https://openreview.net/forum?id=bcVLFQCOjc", "poster": "/media/PosterPDFs/NeurIPS%202024/94474.png?t=1732803401.2871904", "project": "", "author_site": "Jonas Belouadi, Simone Ponzetto, Steffen Eger", "tldr": "", "abstract": "Creating high-quality scientific figures can be time-consuming and challenging, even though sketching ideas on paper is relatively easy. Furthermore, recreating existing figures that are not stored in formats preserving semantic information is equally complex. To tackle this problem, we introduce DeTikZify, a novel multimodal language model that automatically synthesizes scientific figures as semantics-preserving TikZ graphics programs based on sketches and existing figures. To achieve this, we create three new datasets: DaTikZv2, the largest TikZ dataset to date, containing over 360k human-created TikZ graphics; SketchFig, a dataset that pairs hand-drawn sketches with their corresponding scientific figures; and MetaFig, a collection of diverse scientific figures and associated metadata. We train DeTikZify on MetaFig and DaTikZv2, along with synthetically generated sketches learned from SketchFig. We also introduce an MCTS-based inference algorithm that enables DeTikZify to iteratively refine its outputs without the need for additional training. Through both automatic and human evaluation, we demonstrate that DeTikZify outperforms commercial Claude 3 and GPT-4V in synthesizing TikZ programs, with the MCTS algorithm effectively boosting its performance. We make our code, models, and datasets publicly available.", "keywords": "Vision Language Models;Code Generation;Image Understanding;Vector Graphics Generation", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/c764a01bb8de5a6270032ba923a6b836c7ba5147.zip", "author": "Jonas Belouadi;Simone Paolo Ponzetto;Steffen Eger", "authorids": "~Jonas_Belouadi1;~Simone_Paolo_Ponzetto1;~Steffen_Eger1", "gender": "M;M;M", "homepage": ";http://dws.informatik.uni-mannheim.de/ponzetto;https://steffeneger.github.io/", "dblp": "314/6088;04/2532;69/9271", "google_scholar": "ut5IWKwAAAAJ;VmIFG0EAAAAJ;https://scholar.google.de/citations?user=TnuqAW0AAAAJ", "orcid": "0009-0002-2990-9536;0000-0001-7484-2049;", "linkedin": "belouadi;;", "or_profile": "~Jonas_Belouadi1;~Simone_Paolo_Ponzetto1;~Steffen_Eger1", "aff": "Universit\u00e4t Mannheim;Universit\u00e4t Mannheim;Universit\u00e4t Mannheim", "aff_domain": "uni-mannheim.de;uni-mannheim.de;uni-mannheim.de", "position": "PhD student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nbelouadi2024detikzify,\ntitle={DeTikZify: Synthesizing Graphics Programs for Scientific Figures and Sketches with TikZ},\nauthor={Jonas Belouadi and Simone Paolo Ponzetto and Steffen Eger},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=bcVLFQCOjc}\n}", "github": "", "reviewers": "bXux;gKyN;tgj9;uo3w", "pdf_size": 4681845, "rating": "4;7;8;8", "confidence": "4;4;3;4", "soundness": "2;3;4;4", "novelty": "2;3;4;4", "presentation": "2;3;4;4", "wc_summary": "62;92;136;70", "wc_strengths": "127;65;71;225", "wc_weaknesses": "113;50;84;145", "wc_questions": "5;12;7;194", "wc_limitations": "5;17;10;8", "wc_review": "312;236;308;642", "wc_reply_reviewers": "0;0;0;19", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 1.6393596310755 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 90.0, 28.74021572639983 ], "wc_strengths_avg": [ 122.0, 64.19501538281614 ], "wc_weaknesses_avg": [ 98.0, 35.12121865767189 ], "wc_questions_avg": [ 54.5, 80.5807048864677 ], "wc_limitations_avg": [ 10.0, 4.415880433163924 ], "wc_review_avg": [ 374.5, 157.37455321620456 ], "wc_reply_reviewers_avg": [ 4.75, 8.227241335952167 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.44022545316281186, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6936735111168463406&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "uni-mannheim.de;uni-mannheim.de;uni-mannheim.de", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Mannheim", "aff_unique_dep": "", "aff_unique_url": "https://www.uni-mannheim.de", "aff_unique_abbr": "UM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "title": "LibAMM: Empirical Insights into Approximate Computing for Accelerating Matrix Multiplication", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97612", "id": "bepcG3itGX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=bepcG3itGX", "openreview": "https://openreview.net/forum?id=bepcG3itGX", "poster": "/media/PosterPDFs/NeurIPS%202024/97612.png?t=1730856444.1841452", "project": "", "author_site": "Xianzhi Zeng, Wenchao Jiang, SHUHAO ZHANG", "tldr": "", "abstract": "Matrix multiplication (MM) is pivotal in fields from deep learning to scientific computing, driving the quest for improved computational efficiency. Accelerating MM encompasses strategies like complexity reduction, parallel and distributed computing, hardware acceleration, and approximate computing techniques, namely AMM algorithms. Amidst growing concerns over the resource demands of large language models (LLMs), AMM has garnered renewed focus. However, understanding the nuances that govern AMM\u2019s effectiveness remains incomplete. This study delves into AMM by examining algorithmic strategies, operational specifics, dataset characteristics, and their application in real-world tasks. Through comprehensive testing across diverse datasets and scenarios, we analyze how these factors affect AMM\u2019s performance, uncovering that the selection of AMM approaches significantly influences the balance between efficiency and accuracy, with factors like memory access playing a pivotal role. Additionally, dataset attributes are shown to be vital for the success of AMM in applications. Our results advocate for tailored algorithmic approaches and careful strategy selection to enhance AMM\u2019s effectiveness. To aid in the practical application and ongoing research of AMM, we introduce LibAMM \u2014a toolkit offering a wide range of AMM algorithms, benchmarks, and tools for experiment management. LibAMM aims to facilitate research and application in AMM, guiding future developments towards more adaptive and context-aware computational solutions.", "keywords": "Approximate Computing; Matrix Multiplication", "primary_area": "", "supplementary_material": "/attachment/d806c6350250cd2f3bd506c677a0f07533759380.pdf", "author": "Xianzhi Zeng;Wenchao Jiang;Shuhao Zhang", "authorids": "~Xianzhi_Zeng2;~Wenchao_Jiang4;~Shuhao_Zhang4", "gender": "M;M;M", "homepage": "https://tonyskyzeng.github.io/;https://istd.sutd.edu.sg/people/faculty/jiang-wenchao/;https://shuhaozhangtony.github.io/", "dblp": "349/7616;;135/4657", "google_scholar": ";bve5zI0AAAAJ;Xwn7lCEAAAAJ", "orcid": "0009-0002-4613-9297;;0000-0002-9927-6925", "linkedin": ";;", "or_profile": "~Xianzhi_Zeng2;~Wenchao_Jiang4;~Shuhao_Zhang4", "aff": "Singapore University of Technology and Design;Singapore University of Technology and Design;Nanyang Technological University", "aff_domain": "sutd.edu.sg;sutd.edu.sg;ntu.edu.sg", "position": "PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nzeng2024libamm,\ntitle={Lib{AMM}: Empirical Insights into Approximate Computing for Accelerating Matrix Multiplication},\nauthor={Xianzhi Zeng and Wenchao Jiang and Shuhao Zhang},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=bepcG3itGX}\n}", "github": "", "reviewers": "pKWS;izvp;zqbq", "pdf_size": 398348, "rating": "7;7;7", "confidence": "3;3;3", "wc_summary_and_contributions": "79;223;55", "wc_strengths": "22;88;39", "wc_improvement": "16;102;115", "wc_limitations": "11;23;68", "wc_correctness": "19;65;21", "wc_clarity": "42;17;223", "wc_relation_to_prior_work": "33;32;55", "wc_documentation": "33;13;10", "wc_additional_feedback": "1;1;1", "wc_review": "256;564;587", "wc_reply_reviewers": "12;19;12", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 3.0, 0.0 ], "wc_summary_and_contributions_avg": [ 119.0, 74.18894796396563 ], "wc_strengths_avg": [ 49.666666666666664, 27.980151695244412 ], "wc_improvement_avg": [ 77.66666666666667, 43.926706632247715 ], "wc_limitations_avg": [ 34.0, 24.535688292770594 ], "wc_correctness_avg": [ 35.0, 21.228911104120876 ], "wc_clarity_avg": [ 94.0, 91.78598295309946 ], "wc_relation_to_prior_work_avg": [ 40.0, 10.614455552060438 ], "wc_documentation_avg": [ 18.666666666666668, 10.208928554075703 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 469.0, 150.9061518516282 ], "wc_reply_reviewers_avg": [ 14.333333333333334, 3.2998316455372216 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:LY2MSgP6-uQJ:scholar.google.com/&scioq=LibAMM:+Empirical+Insights+into+Approximate+Computing+for+Accelerating+Matrix+Multiplication&hl=en&as_sdt=0,24", "gs_version_total": 2, "email": "sutd.edu.sg;sutd.edu.sg;ntu.edu.sg", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Singapore University of Technology and Design;Nanyang Technological University", "aff_unique_dep": ";", "aff_unique_url": "https://www.sutd.edu.sg;https://www.ntu.edu.sg", "aff_unique_abbr": "SUTD;NTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Singapore" }, { "title": "Active Learning with LLMs for Partially Observed and Cost-Aware Scenarios", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94473", "id": "bescO94wog", "proceeding": "", "pdf": "https://openreview.net/pdf?id=bescO94wog", "openreview": "https://openreview.net/forum?id=bescO94wog", "poster": "", "project": "", "author_site": "Nicol\u00e1s Astorga, Tennison Liu, Nabeel Seedat, Mihaela van der Schaar", "tldr": "", "abstract": "Conducting experiments and gathering data for machine learning models is a complex and expensive endeavor, particularly when confronted with limited information. Typically, extensive _experiments_ to obtain features and labels come with a significant acquisition cost, making it impractical to carry out all of them. Therefore, it becomes crucial to strategically determine what to acquire to maximize the predictive performance while minimizing costs. To perform this task, existing data acquisition methods assume the availability of an initial dataset that is both fully-observed and labeled, crucially overlooking the **partial observability** of features characteristic of many real-world scenarios. In response to this challenge, we present Partially Observable Cost-Aware Active-Learning (POCA), a new learning approach aimed at improving model generalization in data-scarce and data-costly scenarios through label and/or feature acquisition. Introducing $\\mu$POCA as an instantiation, we maximise the uncertainty reduction in the predictive model when obtaining labels and features, considering associated costs. $\\mu$POCA enhance traditional Active Learning metrics based solely on the observed features by generating the unobserved features through Generative Surrogate Models, particularly Large Language Models (LLMs). We empirically validate $\\mu$POCA across diverse tabular datasets, varying data availability, acquisition costs, and LLMs.", "keywords": "Active Learning;Large Language Models;Experimental Design", "primary_area": "active_learning", "supplementary_material": "", "author": "Nicol\u00e1s Astorga;Tennison Liu;Nabeel Seedat;Mihaela van der Schaar", "authorids": "~Nicol\u00e1s_Astorga1;~Tennison_Liu1;~Nabeel_Seedat1;~Mihaela_van_der_Schaar2", "gender": "M;M;;F", "homepage": ";https://www.vanderschaar-lab.com/research-team/#Tennison;;https://www.vanderschaar-lab.com", "dblp": ";256/9899;227/8368;", "google_scholar": "oLiBK8cAAAAJ;LtdHRjsAAAAJ;https://scholar.google.com/citations?hl=en;DZ3S--MAAAAJ", "orcid": ";;;", "linkedin": ";;nabeel-seedat/;", "or_profile": "~Nicol\u00e1s_Astorga1;~Tennison_Liu1;~Nabeel_Seedat1;~Mihaela_van_der_Schaar2", "aff": "University of Cambridge;University of Cambridge;AstraZeneca;University of California, Los Angeles", "aff_domain": "cam.ac.uk;cam.ac.uk;astrazeneca.com;ucla.edu", "position": "PhD student;PhD student;Intern;Full Professor", "bibtex": "@inproceedings{\nastorga2024partially,\ntitle={Partially Observable Cost-Aware Active-Learning with Large Language Models},\nauthor={Nicol{\\'a}s Astorga and Tennison Liu and Nabeel Seedat and Mihaela van der Schaar},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=bescO94wog}\n}", "github": "", "reviewers": "A8wS;ci71;tNwd;CwS1", "pdf_size": 3076190, "rating": "6;6;6;7", "confidence": "3;4;4;2", "soundness": "4;3;3;3", "novelty": "3;2;2;3", "presentation": "4;3;3;2", "wc_summary": "68;60;64;136", "wc_strengths": "32;74;60;106", "wc_weaknesses": "26;237;78;204", "wc_questions": "157;52;22;49", "wc_limitations": "6;1;15;8", "wc_review": "289;424;239;503", "wc_reply_reviewers": "0;21;12;36", "wc_reply_authors": "0;27;29;27", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 82.0, 31.304951684997057 ], "wc_strengths_avg": [ 68.0, 26.645825188948457 ], "wc_weaknesses_avg": [ 136.25, 87.0183170372767 ], "wc_questions_avg": [ 70.0, 51.570340313013254 ], "wc_limitations_avg": [ 7.5, 5.024937810560445 ], "wc_review_avg": [ 363.75, 105.08419243635076 ], "wc_reply_reviewers_avg": [ 17.25, 13.141061600951424 ], "wc_reply_authors_avg": [ 20.75, 12.007809958522827 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9452848101751234323&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "cam.ac.uk;cam.ac.uk;astrazeneca.com;ucla.edu", "author_num": 4, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "University of Cambridge;AstraZeneca;University of California, Los Angeles", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cam.ac.uk;https://www.astrazeneca.com;https://www.ucla.edu", "aff_unique_abbr": "Cambridge;AZ;UCLA", "aff_campus_unique_index": "0;0;2", "aff_campus_unique": "Cambridge;;Los Angeles", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "Optimistic Verifiable Training by Controlling Hardware Nondeterminism", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94472", "id": "bf0MdFlz1i", "proceeding": "", "pdf": "https://openreview.net/pdf?id=bf0MdFlz1i", "openreview": "https://openreview.net/forum?id=bf0MdFlz1i", "poster": "", "project": "", "author_site": "Megha Srivastava, Simran Arora, Dan Boneh", "tldr": "", "abstract": "The increasing compute demands of AI systems has led to the emergence of services that train models on behalf of clients lacking necessary resources. However, ensuring correctness of training and guarding against potential training-time attacks, such as data poisoning and backdoors, poses challenges. Existing works on verifiable training largely fall into two classes: proof-based systems, which can be difficult to scale, and ``optimistic'' methods that consider a trusted third-party auditor who replicates the training process. A key challenge with the latter is that hardware nondeterminism between GPU types during training prevents an auditor from replicating the training process exactly, and such schemes are therefore non-robust. We propose a method that combines training in a higher precision than the target model, rounding after intermediate computation steps, and storing rounding decisions based on an adaptive thresholding procedure, to successfully control for nondeterminism. Across three different NVIDIA GPUs (A40, Titan XP, RTX 2080 Ti), we achieve exact training replication at FP32 precision for both full-training and fine-tuning of ResNet-50 (23M) and GPT-2 (117M) models. Our verifiable training scheme significantly decreases the storage and time costs compared to proof-based systems.", "keywords": "auditing;training;security;verification;safety;hardware;nondeterminism", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/ad7311cd6017b9f5fae77a2d4e78a94812efc4cf.zip", "author": "Megha Srivastava;Simran Arora;Dan Boneh", "authorids": "~Megha_Srivastava1;~Simran_Arora1;~Dan_Boneh1", "gender": "F;;M", "homepage": "https://web.stanford.edu/~meghas/;https://scholar.google.com/citations?user=rGRsWH8AAAAJ&hl=en;https://crypto.stanford.edu/~dabo", "dblp": "222/3241;243/2342;b/DanBoneh.html", "google_scholar": "mt4ZDTIAAAAJ;;MwLqCs4AAAAJ", "orcid": ";;0000-0003-0820-0421", "linkedin": ";;", "or_profile": "~Megha_Srivastava1;~Simran_Arora1;~Dan_Boneh1", "aff": "Stanford University;The Wharton School, University of Pennsylvania;Stanford University", "aff_domain": "stanford.edu;wharton.upenn.edu;stanford.edu", "position": "PhD student;Undergrad student;Full Professor", "bibtex": "@inproceedings{\nsrivastava2024optimistic,\ntitle={Optimistic Verifiable Training by Controlling Hardware Nondeterminism},\nauthor={Megha Srivastava and Simran Arora and Dan Boneh},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=bf0MdFlz1i}\n}", "github": "", "reviewers": "V9Xz;DqT4;RB9A", "pdf_size": 787007, "rating": "6;6;7", "confidence": "3;3;1", "soundness": "4;3;3", "novelty": "3;3;3", "presentation": "4;3;3", "wc_summary": "147;71;107", "wc_strengths": "140;13;52", "wc_weaknesses": "132;152;36", "wc_questions": "5;49;23", "wc_limitations": "12;4;7", "wc_review": "436;289;225", "wc_reply_reviewers": "42;5;13", "wc_reply_authors": "49;0;0", "reply_reviewers": "1;1;1", "reply_authors": "2;1;1", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 2.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 108.33333333333333, 31.04119127152751 ], "wc_strengths_avg": [ 68.33333333333333, 53.11831657305751 ], "wc_weaknesses_avg": [ 106.66666666666667, 50.63156678946007 ], "wc_questions_avg": [ 25.666666666666668, 18.06162291219209 ], "wc_limitations_avg": [ 7.666666666666667, 3.2998316455372216 ], "wc_review_avg": [ 316.6666666666667, 88.33396226191196 ], "wc_reply_reviewers_avg": [ 20.0, 15.895492023421818 ], "wc_reply_authors_avg": [ 16.333333333333332, 23.098821518760555 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11229634527223736445&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 5, "email": "stanford.edu;wharton.upenn.edu;stanford.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Stanford University;University of Pennsylvania", "aff_unique_dep": ";The Wharton School", "aff_unique_url": "https://www.stanford.edu;https://www.wharton.upenn.edu", "aff_unique_abbr": "Stanford;UPenn Wharton", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Guiding a Diffusion Model with a Bad Version of Itself", "status": "Oral", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94471", "id": "bg6fVPVs3s", "proceeding": "", "pdf": "https://openreview.net/pdf?id=bg6fVPVs3s", "openreview": "https://openreview.net/forum?id=bg6fVPVs3s", "poster": "", "project": "", "author_site": "Tero Karras, Miika Aittala, Tuomas Kynk\u00e4\u00e4nniemi, Jaakko Lehtinen, Timo Aila, Samuli Laine", "tldr": "", "abstract": "The primary axes of interest in image-generating diffusion models are image quality, the amount of variation in the results, and how well the results align with a given condition, e.g., a class label or a text prompt. The popular classifier-free guidance approach uses an unconditional model to guide a conditional model, leading to simultaneously better prompt alignment and higher-quality images at the cost of reduced variation. These effects seem inherently entangled, and thus hard to control. We make the surprising observation that it is possible to obtain disentangled control over image quality without compromising the amount of variation by guiding generation using a smaller, less-trained version of the model itself rather than an unconditional model. This leads to significant improvements in ImageNet generation, setting record FIDs of 1.01 for 64x64 and 1.25 for 512x512, using publicly available networks. Furthermore, the method is also applicable to unconditional diffusion models, drastically improving their quality.", "keywords": "diffusion models;classifier-free guidance;guidance", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Tero Karras;Miika Aittala;Tuomas Kynk\u00e4\u00e4nniemi;Jaakko Lehtinen;Timo Aila;Samuli Laine", "authorids": "~Tero_Karras1;~Miika_Aittala2;~Tuomas_Kynk\u00e4\u00e4nniemi1;~Jaakko_Lehtinen1;~Timo_Aila1;~Samuli_Laine1", "gender": "M;M;;M;M;M", "homepage": "http://research.nvidia.com/person/tero-karras;https://people.csail.mit.edu/miika/;;https://users.aalto.fi/~lehtinj7/;https://users.aalto.fi/~ailat1/;https://users.aalto.fi/~laines9/", "dblp": "32/7864;;239/6466;71/4075;95/2789;51/226", "google_scholar": "https://scholar.google.fi/citations?user=-50qJW8AAAAJ;-_EKVQ0AAAAJ;https://scholar.google.fi/citations?user=7sATEtIAAAAJ;https://scholar.google.fi/citations?user=Vpr6s3sAAAAJ;e7abmgkAAAAJ;UCXJOTUAAAAJ", "orcid": ";;;;;0000-0002-0903-3197", "linkedin": ";;;;;", "or_profile": "~Tero_Karras1;~Miika_Aittala2;~Tuomas_Kynk\u00e4\u00e4nniemi1;~Jaakko_Lehtinen1;~Timo_Aila1;~Samuli_Laine1", "aff": "NVIDIA;NVIDIA;Aalto University;NVIDIA;NVIDIA;NVIDIA", "aff_domain": "nvidia.com;nvidia.com;aalto.fi;nvidia.com;nvidia.com;nvidia.com", "position": "Distinguished Research Scientist;Senior Research Scientist;PhD student;Distinguished Research Scientist;Distinguished Research Scientist;Distinguished Research Scientist", "bibtex": "@inproceedings{\nkarras2024guiding,\ntitle={Guiding a Diffusion Model with a Bad Version of Itself},\nauthor={Tero Karras and Miika Aittala and Tuomas Kynk{\\\"a}{\\\"a}nniemi and Jaakko Lehtinen and Timo Aila and Samuli Laine},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=bg6fVPVs3s}\n}", "github": "", "reviewers": "rSGz;m4Uw;oNdT", "pdf_size": 25236131, "rating": "7;7;8", "confidence": "5;4;5", "soundness": "2;4;4", "novelty": "2;4;4", "presentation": "3;4;4", "wc_summary": "61;74;106", "wc_strengths": "31;88;75", "wc_weaknesses": "217;18;112", "wc_questions": "73;102;313", "wc_limitations": "26;7;10", "wc_review": "408;289;616", "wc_reply_reviewers": "53;10;34", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 7.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.9428090415820634 ], "novelty_avg": [ 3.3333333333333335, 0.9428090415820634 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 80.33333333333333, 18.909139471577113 ], "wc_strengths_avg": [ 64.66666666666667, 24.390344173235622 ], "wc_weaknesses_avg": [ 115.66666666666667, 81.28277116885774 ], "wc_questions_avg": [ 162.66666666666666, 106.95897448191162 ], "wc_limitations_avg": [ 14.333333333333334, 8.339997335464535 ], "wc_review_avg": [ 437.6666666666667, 135.1353231229924 ], "wc_reply_reviewers_avg": [ 32.333333333333336, 17.594190960528863 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 38, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10926826208415703986&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "nvidia.com;nvidia.com;aalto.fi;nvidia.com;nvidia.com;nvidia.com", "author_num": 6, "aff_unique_index": "0;0;1;0;0;0", "aff_unique_norm": "NVIDIA;Aalto University", "aff_unique_dep": "NVIDIA Corporation;", "aff_unique_url": "https://www.nvidia.com;https://www.aalto.fi", "aff_unique_abbr": "NVIDIA;Aalto", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "United States;Finland" }, { "title": "Multistable Shape from Shading Emerges from Patch Diffusion", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94470", "id": "bhSfbjS6j9", "proceeding": "", "pdf": "https://openreview.net/pdf?id=bhSfbjS6j9", "openreview": "https://openreview.net/forum?id=bhSfbjS6j9", "poster": "/media/PosterPDFs/NeurIPS%202024/94470.png?t=1731650329.595329", "project": "", "author_site": "Xinran Han, Todd Zickler, Ko Nishino", "tldr": "", "abstract": "Models for inferring monocular shape of surfaces with diffuse reflection---shape from shading---ought to produce distributions of outputs, because there are fundamental mathematical ambiguities of both continuous (e.g., bas-relief) and discrete (e.g., convex/concave) types that are also experienced by humans. Yet, the outputs of current models are limited to point estimates or tight distributions around single modes, which prevent them from capturing these effects. We introduce a model that reconstructs a multimodal distribution of shapes from a single shading image, which aligns with the human experience of multistable perception. We train a small denoising diffusion process to generate surface normal fields from $16\\times 16$ patches of synthetic images of everyday 3D objects. We deploy this model patch-wise at multiple scales, with guidance from inter-patch shape consistency constraints. Despite its relatively small parameter count and predominantly bottom-up structure, we show that multistable shape explanations emerge from this model for ambiguous test images that humans experience as being multistable. At the same time, the model produces veridical shape estimates for object-like images that include distinctive occluding contours and appear less ambiguous. This may inspire new architectures for stochastic 3D shape perception that are more efficient and better aligned with human experience.", "keywords": "shape from shading;multistable perception;diffusion models;low-level vision", "primary_area": "machine_vision", "supplementary_material": "/attachment/04abe9117f035f14785a5953045da48ba81ddd90.zip", "author": "Xinran Han;Todd Zickler;Ko Nishino", "authorids": "~Xinran_Han1;~Todd_Zickler1;~Ko_Nishino4", "gender": ";;M", "homepage": ";;http://vision.ist.i.kyoto-u.ac.jp", "dblp": ";;07/2706", "google_scholar": ";;SXXEZhYAAAAJ", "orcid": ";;0000-0002-3534-3447", "linkedin": ";;ko-nishino-6253347/", "or_profile": "~Xinran_Han1;~Todd_Zickler1;~Ko_Nishino4", "aff": ";;Kyoto University", "aff_domain": ";;kyoto-u.ac.jp", "position": ";;Professor", "bibtex": "@inproceedings{\nhan2024multistable,\ntitle={Multistable Shape from Shading Emerges from Patch Diffusion},\nauthor={Xinran Han and Todd Zickler and Ko Nishino},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=bhSfbjS6j9}\n}", "github": "", "reviewers": "E5e1;xHhf;3BEx", "pdf_size": 4732941, "rating": "5;5;8", "confidence": "4;3;4", "soundness": "3;2;4", "novelty": "2;2;4", "presentation": "3;2;4", "wc_summary": "96;63;58", "wc_strengths": "37;73;60", "wc_weaknesses": "29;41;16", "wc_questions": "29;85;266", "wc_limitations": "7;1;62", "wc_review": "198;263;462", "wc_reply_reviewers": "103;15;43", "wc_reply_authors": "458;0;0", "reply_reviewers": "1;1;1", "reply_authors": "2;1;1", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.6666666666666665, 0.9428090415820634 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 72.33333333333333, 16.858891488535722 ], "wc_strengths_avg": [ 56.666666666666664, 14.884742374510736 ], "wc_weaknesses_avg": [ 28.666666666666668, 10.208928554075703 ], "wc_questions_avg": [ 126.66666666666667, 101.1412653448411 ], "wc_limitations_avg": [ 23.333333333333332, 27.450966386551052 ], "wc_review_avg": [ 307.6666666666667, 112.31008661538623 ], "wc_reply_reviewers_avg": [ 53.666666666666664, 36.709066394496546 ], "wc_reply_authors_avg": [ 152.66666666666666, 215.90327052229253 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5824275574421782738&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": ";;kyoto-u.ac.jp", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "Kyoto University", "aff_unique_dep": "", "aff_unique_url": "https://www.kyoto-u.ac.jp", "aff_unique_abbr": "Kyoto U", "aff_country_unique_index": "0", "aff_country_unique": "Japan" }, { "title": "Adam on Local Time: Addressing Nonstationarity in RL with Relative Adam Timesteps", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94469", "id": "biAqUbAuG7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=biAqUbAuG7", "openreview": "https://openreview.net/forum?id=biAqUbAuG7", "poster": "", "project": "", "author_site": "Benjamin Ellis, Matthew T Jackson, Andrei Lupu, Alexander D. Goldie, Mattie Fellows, Shimon Whiteson, Jakob Foerster", "tldr": "", "abstract": "In reinforcement learning (RL), it is common to apply techniques used broadly in \nmachine learning such as neural network function approximators and momentum-based optimizers. However, such tools were largely developed for supervised learning rather than nonstationary RL, leading practitioners to adopt target networks, clipped policy updates, and other RL-specific implementation tricks to combat this mismatch, rather than directly adapting this toolchain for use in RL. \nIn this paper, we take a different approach and instead address the effect of nonstationarity by adapting the widely used Adam optimiser. \nWe first analyse the impact of nonstationary gradient magnitude --- such as that caused by a change in target network --- on Adam's update size, demonstrating that such a change can lead to large updates and hence sub-optimal performance.\nTo address this, we introduce Adam-Rel.\nRather than using the global timestep in the Adam update, Adam-Rel uses the *local* timestep within an epoch, essentially resetting Adam's timestep to 0 after target changes.\nWe demonstrate that this avoids large updates and reduces to learning rate annealing in the absence of such increases in gradient magnitude. Evaluating Adam-Rel in both on-policy and off-policy RL, we demonstrate improved performance in both Atari and Craftax.\nWe then show that increases in gradient norm occur in RL in practice, and examine the differences between our \ntheoretical model and the observed data.", "keywords": "Reinforcement Learning;Deep Learning", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Benjamin Ellis;Matthew Thomas Jackson;Andrei Lupu;Alexander D. Goldie;Mattie Fellows;Shimon Whiteson;Jakob Nicolaus Foerster", "authorids": "~Benjamin_Ellis1;~Matthew_Thomas_Jackson1;~Andrei_Lupu1;~Alexander_D._Goldie1;~Mattie_Fellows1;~Shimon_Whiteson1;~Jakob_Nicolaus_Foerster1", "gender": "M;M;M;;M;M;Unspecified", "homepage": "http://whirl.cs.ox.ac.uk/pages/people/ben.html;https://matthewtjackson.com;;;https://www.jakobfoerster.com;;http://whirl.cs.ox.ac.uk/member/matthew-fellows/", "dblp": ";331/5748;218/7027;https://dblp.uni-trier.de/pers/w/Whiteson:Shimon.html;176/5095;383/3486;26/4512", "google_scholar": ";SdGawnwAAAAJ;I6aB-YUAAAAJ;;6z4lQzMAAAAJ;wogOjBsAAAAJ;", "orcid": ";;;;;;", "linkedin": ";matthew-t-jackson/;lupu-andrei;;;alex-goldie-ml/;", "or_profile": "~Benjamin_Ellis1;~Matthew_Thomas_Jackson1;~Andrei_Lupu1;~Shimon_Whiteson1;~Jakob_Nicolaus_Foerster1;~Alex_Goldie1;~Matthew_Fellows1", "aff": "Department of Computer Science, University of Oxford;Wayve;Meta AI;University of Oxford;University of Oxford, University of Oxford;University of Oxford;Department of Computer Science", "aff_domain": "cs.ox.ac.uk;wayve.ai;meta.com;ox.ac.uk;eng.ox.ac.uk;ox.ac.uk;cs.ox.ac.uk", "position": "PhD student;Intern;Researcher;Professor;Associate Professor;PhD student;Postdoc", "bibtex": "@inproceedings{\nellis2024adam,\ntitle={Adam on Local Time: Addressing Nonstationarity in {RL} with Relative Adam Timesteps},\nauthor={Benjamin Ellis and Matthew Thomas Jackson and Andrei Lupu and Alexander D. Goldie and Mattie Fellows and Shimon Whiteson and Jakob Nicolaus Foerster},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=biAqUbAuG7}\n}", "github": "", "reviewers": "SmBt;uFqs;37TD;JkaJ", "pdf_size": 410470, "rating": "4;5;6;6", "confidence": "4;4;4;4", "soundness": "2;3;4;3", "novelty": "2;2;3;3", "presentation": "3;3;3;2", "wc_summary": "46;91;178;60", "wc_strengths": "19;98;83;51", "wc_weaknesses": "95;198;423;142", "wc_questions": "58;238;162;11", "wc_limitations": "1;14;9;22", "wc_review": "219;639;855;286", "wc_reply_reviewers": "252;630;64;29", "wc_reply_authors": "715;837;0;0", "reply_reviewers": "1;2;1;1", "reply_authors": "2;3;1;1", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 93.75, 51.295102105366745 ], "wc_strengths_avg": [ 62.75, 30.433328769623607 ], "wc_weaknesses_avg": [ 214.5, 125.77857528212029 ], "wc_questions_avg": [ 117.25, 88.5758855445431 ], "wc_limitations_avg": [ 11.5, 7.632168761236874 ], "wc_review_avg": [ 499.75, 259.85705204977603 ], "wc_reply_reviewers_avg": [ 243.75, 238.5816160143107 ], "wc_reply_authors_avg": [ 388.0, 390.3901894259127 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:3B_GAy-qHgwJ:scholar.google.com/&scioq=Adam+on+Local+Time:+Addressing+Nonstationarity+in+RL+with+Relative+Adam+Timesteps&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "cs.ox.ac.uk;wayve.ai;meta.com;ox.ac.uk;eng.ox.ac.uk;ox.ac.uk;cs.ox.ac.uk", "author_num": 7, "aff_unique_index": "0;1;2;0;0;0;3", "aff_unique_norm": "University of Oxford;Wayve;Meta;Unknown Institution", "aff_unique_dep": "Department of Computer Science;;Meta AI;Department of Computer Science", "aff_unique_url": "https://www.ox.ac.uk;https://www.wayve.ai;https://meta.com;", "aff_unique_abbr": "Oxford;;Meta;", "aff_campus_unique_index": "0", "aff_campus_unique": "Oxford;", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "United Kingdom;United States;" }, { "title": "Model Collapse Demystified: The Case of Regression", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94468", "id": "bioHNTRnQk", "proceeding": "", "pdf": "https://openreview.net/pdf?id=bioHNTRnQk", "openreview": "https://openreview.net/forum?id=bioHNTRnQk", "poster": "", "project": "", "author_site": "Elvis Dohmatob, Yunzhen Feng, Julia Kempe", "tldr": "", "abstract": "The era of proliferation of large language and image generation models begs the question of what happens if models are trained on the synthesized outputs of other models. The phenomenon of \"model collapse\" refers to the situation whereby as a model is trained recursively on data generated from previous generations of itself over time, its performance degrades until the model eventually becomes completely useless, i.e. the model collapses. In this work, we investigate this phenomenon within the context of high-dimensional regression with Gaussian data, considering both low- and high-dimensional asymptotics. We derive analytical formulas that quantitatively describe this phenomenon in both under-parameterized and over-parameterized regimes. We show how test error increases linearly in the number of model iterations in terms of all problem hyperparameters (covariance spectrum, regularization, label noise level, dataset size) and further isolate how model collapse affects both bias and variance terms in our setup. We show that even in the noise-free case, catastrophic (exponentially fast) model-collapse can happen in the over-parametrized regime. In the special case of polynomial decaying spectral and source conditions, we obtain modified scaling laws which exhibit new crossover phenomena from fast to slow rates. We also propose a simple strategy based on adaptive regularization to mitigate model collapse. Our theoretical results are validated with experiments.", "keywords": "Model Collapse;Kernel Ridge Regression;High dimensional asymptotics;Synthetic Data;Scaling Laws", "primary_area": "learning_theory", "supplementary_material": "", "author": "Elvis Dohmatob;Yunzhen Feng;Julia Kempe", "authorids": "~Elvis_Dohmatob1;~Yunzhen_Feng1;~Julia_Kempe1", "gender": "M;M;", "homepage": "http://dohmatob.github.io/;https://fengyzpku.github.io;", "dblp": "134/9794;254/4752;", "google_scholar": "https://scholar.google.fr/citations?user=FDWgJY8AAAAJ;QebzOsIAAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Elvis_Dohmatob1;~Yunzhen_Feng1;~Julia_Kempe1", "aff": "Meta Facebook;Meta FAIR;", "aff_domain": "facebook.com;meta.com;", "position": "Researcher;Intern;", "bibtex": "@inproceedings{\ndohmatob2024model,\ntitle={Model Collapse Demystified: The Case of Regression},\nauthor={Elvis Dohmatob and Yunzhen Feng and Julia Kempe},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=bioHNTRnQk}\n}", "github": "", "reviewers": "Z9jq;Xs1n;jLLV;16H4", "pdf_size": 770310, "rating": "5;5;7;7", "confidence": "3;3;3;4", "soundness": "3;3;3;4", "novelty": "2;3;3;3", "presentation": "3;1;3;2", "wc_summary": "167;63;49;84", "wc_strengths": "114;16;100;101", "wc_weaknesses": "97;54;245;510", "wc_questions": "97;152;271;1", "wc_limitations": "95;1;3;10", "wc_review": "570;286;668;706", "wc_reply_reviewers": "11;0;378;605", "wc_reply_authors": "190;88;1070;2037", "reply_reviewers": "1;0;2;3", "reply_authors": "2;2;4;6", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 90.75, 45.75136609982264 ], "wc_strengths_avg": [ 82.75, 38.931831449342326 ], "wc_weaknesses_avg": [ 226.5, 178.35428225865508 ], "wc_questions_avg": [ 130.25, 97.58938210686652 ], "wc_limitations_avg": [ 27.25, 39.25796097608738 ], "wc_review_avg": [ 557.5, 164.4163921268193 ], "wc_reply_reviewers_avg": [ 248.5, 255.93993435960712 ], "wc_reply_authors_avg": [ 846.25, 786.3772552026159 ], "reply_reviewers_avg": [ 1.5, 1.118033988749895 ], "reply_authors_avg": [ 3.5, 1.6583123951777 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4846147560154881612&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "facebook.com;meta.com;", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Meta", "aff_unique_dep": "Meta Platforms, Inc.", "aff_unique_url": "https://meta.com", "aff_unique_abbr": "Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "LSH-MoE: Communication-efficient MoE Training via Locality-Sensitive Hashing", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94467", "id": "bjFhVbky5A", "proceeding": "", "pdf": "https://openreview.net/pdf?id=bjFhVbky5A", "openreview": "https://openreview.net/forum?id=bjFhVbky5A", "poster": "/media/PosterPDFs/NeurIPS%202024/94467.png?t=1731592407.9994068", "project": "", "author_site": "Xiaonan Nie, Liu Qibin, Fangcheng Fu, Shenhan Zhu, Xupeng Miao, Xiaoyang Li, Yang Zhang, Shouda Liu, Bin CUI", "tldr": "", "abstract": "Larger transformer models perform better on various downstream tasks but require more cost to scale up the model size. To efficiently enlarge models, the Mixture-of-Expert (MoE) architecture is widely adopted, which consists of a gate network and a series of experts and keep the training cost constant by routing the input data to a fixed number of experts instead of all.\nIn existing large-scale MoE training systems, experts would be distributed among different GPUs for parallelization, and thus input data requires additional all-to-all communication to access the target expert and conduct corresponding computation. \nHowever, upon evaluating the training process of three mainstream MoE models on commonly used GPU clusters, we found that the all-to-all communication ratio averaged around 45\\%, which significantly hinders the training efficiency and scalability of MoE models.\n\nIn this paper, we propose LSH-MoE, a communication-efficient MoE training framework using locality-sensitive hashing (LSH). \nWe first present the problems of scaling MoE training in existing systems and highlight the potential of exploiting token similarity to facilitate data compression.\nThen, we introduce an efficient LSH-based compression technique, which utilizes the cross-polytope hashing for rapid clustering and implements a residual-based error compensation scheme to alleviate the adverse impact of compression. \nTo verify the effectiveness of our methods, we conduct experiments on both language models (e.g., RoBERTa, GPT, and T5) and vision models (e.g., Swin) for both pre-training and fine-tuning tasks. The results demonstrate that our method substantially outperforms its counterparts across different tasks by 1.28-2.2$\\times$ of speedup.", "keywords": "mixture of experts;locality-sensitive hashing;communication optimization", "primary_area": "infrastructure", "supplementary_material": "", "author": "Xiaonan Nie;Qibin Liu;Fangcheng Fu;Shenhan Zhu;Xupeng Miao;Xiaoyang Li;Yang Zhang;Shouda Liu;Bin CUI", "authorids": "~Xiaonan_Nie1;~Qibin_Liu1;~Fangcheng_Fu1;~Shenhan_Zhu1;~Xupeng_Miao1;~Xiaoyang_Li4;~Yang_Zhang21;~Shouda_Liu1;~Bin_CUI2", "gender": "M;M;M;M;M;M;M;M;M", "homepage": "https://codecaution.github.io/;https://github.com/LQBDD;https://ccchengff.github.io/;;https://hsword.github.io;;https://scholar.google.com/citations?view_op=list_works&hl=en&user=Zyko2wwAAAAJ;https://vsooda.github.io/;https://cuibinpku.github.io/index.html", "dblp": "295/3397;;219/9677.html;;243/2364;;;;55/5031.html", "google_scholar": "99LfmxYAAAAJ;;vFYm_QEAAAAJ;https://scholar.google.com/citations?hl=en;aCAgdYkAAAAJ;IbRfaRAAAAAJ;;h3aJn_YAAAAJ;IJAU8KoAAAAJ", "orcid": ";;0000-0003-1658-0380;0009-0004-0267-775X;0000-0002-9371-8358;;;;0000-0003-1681-4677", "linkedin": "https://www.linkedin.com/mwlite/in/\u5c0f\u6960-\u8042-a80b01163;;;;;;;;", "or_profile": "~Xiaonan_Nie1;~Qibin_Liu1;~Fangcheng_Fu1;~Shenhan_Zhu1;~Xupeng_Miao1;~Xiaoyang_Li4;~Yang_Zhang21;~Shouda_Liu1;~Bin_CUI2", "aff": "Peking University;Peking University;Peking University;Peking University;Carnegie Mellon University;ByteDance Inc.;;Xiamen University;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn;cmu.edu;bytedance.com;;xmu.edu.cn;pku.edu.cn", "position": "PhD student;MS student;Postdoc;MS student;Postdoc;Researcher;;MS student;Full Professor", "bibtex": "@inproceedings{\nnie2024lshmoe,\ntitle={{LSH}-MoE: Communication-efficient MoE Training via Locality-Sensitive Hashing},\nauthor={Xiaonan Nie and Qibin Liu and Fangcheng Fu and Shenhan Zhu and Xupeng Miao and Xiaoyang Li and Yang Zhang and Shouda Liu and Bin CUI},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=bjFhVbky5A}\n}", "github": "", "reviewers": "4XaA;ooKw;7fVV", "pdf_size": 835207, "rating": "6;6;7", "confidence": "3;4;4", "soundness": "2;3;3", "novelty": "2;3;3", "presentation": "3;3;3", "wc_summary": "75;61;36", "wc_strengths": "58;67;51", "wc_weaknesses": "122;11;76", "wc_questions": "59;17;76", "wc_limitations": "15;1;49", "wc_review": "329;157;288", "wc_reply_reviewers": "31;0;15", "wc_reply_authors": "333;0;38", "reply_reviewers": "1;0;1", "reply_authors": "3;1;2", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 57.333333333333336, 16.131404843417148 ], "wc_strengths_avg": [ 58.666666666666664, 6.548960901462833 ], "wc_weaknesses_avg": [ 69.66666666666667, 45.536310297997964 ], "wc_questions_avg": [ 50.666666666666664, 24.796953217863056 ], "wc_limitations_avg": [ 21.666666666666668, 20.154955277107966 ], "wc_review_avg": [ 258.0, 73.35302765848637 ], "wc_reply_reviewers_avg": [ 15.333333333333334, 12.657891697365017 ], "wc_reply_authors_avg": [ 123.66666666666667, 148.83174691203786 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:8fxVq9-SsT4J:scholar.google.com/&scioq=LSH-MoE:+Communication-efficient+MoE+Training+via+Locality-Sensitive+Hashing&hl=en&as_sdt=0,48", "gs_version_total": 4, "email": "pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn;cmu.edu;bytedance.com;;xmu.edu.cn;pku.edu.cn", "author_num": 9, "aff_unique_index": "0;0;0;0;1;2;3;0", "aff_unique_norm": "Peking University;Carnegie Mellon University;ByteDance;Xiamen University", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.pku.edu.cn;https://www.cmu.edu;https://www.bytedance.com;https://www.xmu.edu.cn", "aff_unique_abbr": "Peking U;CMU;ByteDance;XMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;0;0;0", "aff_country_unique": "China;United States" }, { "title": "OnlineTAS: An Online Baseline for Temporal Action Segmentation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94466", "id": "bkLetzd97M", "proceeding": "", "pdf": "https://openreview.net/pdf?id=bkLetzd97M", "openreview": "https://openreview.net/forum?id=bkLetzd97M", "poster": "/media/PosterPDFs/NeurIPS%202024/94466.png?t=1731390915.036675", "project": "", "author_site": "Qing Zhong, Guodong Ding, Angela Yao", "tldr": "", "abstract": "Temporal context plays a significant role in temporal action segmentation. In an offline setting, the context is typically captured by the segmentation network after observing the entire sequence. However, capturing and using such context information in an online setting remains an under-explored problem. This work presents the first online framework for temporal action segmentation. At the core of the framework is an adaptive memory designed to accommodate dynamic changes in context over time, alongside a feature augmentation module that enhances the frames with the memory. In addition, we propose a post-processing approach to mitigate the severe over-segmentation in the online setting. On three common segmentation benchmarks, our approach achieves state-of-the-art performance.", "keywords": "Temporal Action Segmentation;Video Understanding", "primary_area": "machine_vision", "supplementary_material": "/attachment/74aef815a086aab9e126d8b38953f84913e8e40e.zip", "author": "Qing Zhong;Guodong Ding;Angela Yao", "authorids": "~Qing_Zhong1;~Guodong_Ding1;~Angela_Yao1", "gender": "M;M;", "homepage": ";https://comp.nus.edu.sg/~dinggd;http://www.angelayao.com", "dblp": ";54/5798;64/8484", "google_scholar": "t6-g5_wAAAAJ;PqlGbTYAAAAJ;https://scholar.google.ch/citations?user=-LJCZMMAAAAJ", "orcid": "0000-0003-3567-6724;0000-0001-6080-5220;", "linkedin": ";;", "or_profile": "~Qing_Zhong1;~Guodong_Ding1;~Angela_Yao1", "aff": "University of Adelaide;Natioal University of Singapore;National University of Singapore", "aff_domain": "adelaide.edu.au;nus.edu.sg;nus.edu.sg", "position": "PhD student;Postdoc;Associate Professor", "bibtex": "@inproceedings{\nzhong2024onlinetas,\ntitle={Online{TAS}: An Online Baseline for Temporal Action Segmentation},\nauthor={Qing Zhong and Guodong Ding and Angela Yao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=bkLetzd97M}\n}", "github": "", "reviewers": "aGP1;xPL1;itXK", "pdf_size": 509047, "rating": "4;4;6", "confidence": "5;5;4", "soundness": "3;3;2", "novelty": "2;2;2", "presentation": "4;3;3", "wc_summary": "36;66;71", "wc_strengths": "58;21;53", "wc_weaknesses": "119;202;236", "wc_questions": "1;30;125", "wc_limitations": "12;4;58", "wc_review": "226;323;543", "wc_reply_reviewers": "60;10;22", "wc_reply_authors": "357;43;30", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 4.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 57.666666666666664, 15.456030825826172 ], "wc_strengths_avg": [ 44.0, 16.391054470858997 ], "wc_weaknesses_avg": [ 185.66666666666666, 49.141518992486276 ], "wc_questions_avg": [ 52.0, 52.959103718498355 ], "wc_limitations_avg": [ 24.666666666666668, 23.79542439676633 ], "wc_review_avg": [ 364.0, 132.62227062852855 ], "wc_reply_reviewers_avg": [ 30.666666666666668, 21.312489817527705 ], "wc_reply_authors_avg": [ 143.33333333333334, 151.17833472058385 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.9999999999999997, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10380770486565163865&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "adelaide.edu.au;nus.edu.sg;nus.edu.sg", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "University of Adelaide;National University of Singapore", "aff_unique_dep": ";", "aff_unique_url": "https://www.adelaide.edu.au;https://www.nus.edu.sg", "aff_unique_abbr": "Adelaide;NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Australia;Singapore" }, { "title": "ChatQA: Surpassing GPT-4 on Conversational QA and RAG", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94465", "id": "bkUvKPKafQ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=bkUvKPKafQ", "openreview": "https://openreview.net/forum?id=bkUvKPKafQ", "poster": "/media/PosterPDFs/NeurIPS%202024/94465.png?t=1733871569.2851117", "project": "", "author_site": "Zihan Liu, Wei Ping, Rajarshi Roy, Peng Xu, Chankyu Lee, Mohammad Shoeybi, Bryan Catanzaro", "tldr": "", "abstract": "In this work, we introduce ChatQA, a suite of models that outperform GPT-4 on retrieval-augmented generation (RAG) and conversational question answering (QA). To enhance generation, we propose a two-stage instruction tuning method that significantly boosts the performance of RAG. For effective retrieval, we introduce a dense retriever optimized for conversational QA, which yields results comparable to the alternative state-of-the-art query rewriting models, while substantially reducing deployment costs. We also present the ChatRAG Bench, which encompasses ten datasets covering comprehensive evaluations on RAG, table-related QA, arithmetic calculations, and scenarios involving unanswerable questions. Our ChatQA-1.0-70B (score: 54.14), built on Llama2, a weaker foundation model than GPT-4, can slightly outperform GPT-4-0613 (score: 53.90) and GPT-4-Turbo-2024-04-09 (score: 54.03) on the ChatRAG Bench, without relying on any synthetic data from OpenAI GPT models. Notably, Llama3-ChatQA-1.5-70B model surpasses the accuracy of GPT-4-Turbo-2024-04-09 by a margin. These results demonstrate the exceptional quality of the proposed ChatQA recipe. To advance research in this field, we open-sourced the model weights, instruction tuning data, ChatRAG Bench, and retriever for the community.", "keywords": "large language models;retrieval-augmented generation;RAG", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Zihan Liu;Wei Ping;Rajarshi Roy;Peng Xu;Chankyu Lee;Mohammad Shoeybi;Bryan Catanzaro", "authorids": "~Zihan_Liu2;~Wei_Ping1;~Rajarshi_Roy1;~Peng_Xu7;~Chankyu_Lee1;~Mohammad_Shoeybi1;~Bryan_Catanzaro1", "gender": "M;M;M;M;M;M;M", "homepage": "https://zliucr.github.io;https://wpingnet.github.io/;;https://scholar.google.com.hk/citations?user=PQ26NTIAAAAJ&hl=en;;;https://ctnzr.io", "dblp": "46/9231;08/8399.html;94/4026-3;84/586-8;;53/9742;14/4826", "google_scholar": "LPabcsYAAAAJ;6gKEYRgAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.hk/citations?user=PQ26NTIAAAAJ;E_bpWKMAAAAJ;62ElavIAAAAJ;UZ6kI2AAAAAJ", "orcid": ";;0000-0003-4548-2114;;;;0000-0003-0034-7728", "linkedin": ";wei-ping/;rajarshir/;;;shoeybi/;bryancatanzaro/", "or_profile": "~Zihan_Liu2;~Wei_Ping1;~Rajarshi_Roy1;~Peng_Xu7;~Chankyu_Lee1;~Mohammad_Shoeybi1;~Bryan_Catanzaro1", "aff": "NVIDIA;NVIDIA;NVIDIA;NVIDIA;NVIDIA;NVIDIA;NVIDIA", "aff_domain": "nvidia.com;nvidia.com;nvidia.com;nvidia.com;nvidia.com;nvidia.com;nvidia.com", "position": "Researcher;Principal Researcher;Researcher;Researcher;Researcher;Director of Applied Resesrch;Vice President", "bibtex": "@inproceedings{\nliu2024chatqa,\ntitle={Chat{QA}: Surpassing {GPT}-4 on Conversational {QA} and {RAG}},\nauthor={Zihan Liu and Wei Ping and Rajarshi Roy and Peng Xu and Chankyu Lee and Mohammad Shoeybi and Bryan Catanzaro},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=bkUvKPKafQ}\n}", "github": "", "reviewers": "ynR1;A2Vo;DkJ4;NmdV", "pdf_size": 967458, "rating": "5;7;7;8", "confidence": "3;2;4;4", "soundness": "2;3;3;4", "novelty": "3;3;3;4", "presentation": "2;3;3;3", "wc_summary": "83;96;129;306", "wc_strengths": "62;63;142;124", "wc_weaknesses": "205;94;120;30", "wc_questions": "87;48;134;19", "wc_limitations": "1;1;11;29", "wc_review": "438;302;536;508", "wc_reply_reviewers": "21;19;0;0", "wc_reply_authors": "450;20;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "4;2;1;1", "rating_avg": [ 6.75, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 153.5, 89.62839951711734 ], "wc_strengths_avg": [ 97.75, 35.82160660830276 ], "wc_weaknesses_avg": [ 112.25, 62.77091284982241 ], "wc_questions_avg": [ 72.0, 43.16827538829875 ], "wc_limitations_avg": [ 10.5, 11.434596626029272 ], "wc_review_avg": [ 446.0, 90.47651629014017 ], "wc_reply_reviewers_avg": [ 10.0, 10.024968827881711 ], "wc_reply_authors_avg": [ 117.5, 192.142525225417 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.3458572319330373, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9904492012674196538&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 3, "email": "nvidia.com;nvidia.com;nvidia.com;nvidia.com;nvidia.com;nvidia.com;nvidia.com", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "NVIDIA", "aff_unique_dep": "NVIDIA Corporation", "aff_unique_url": "https://www.nvidia.com", "aff_unique_abbr": "NVIDIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Can Graph Learning Improve Planning in LLM-based Agents?", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94464", "id": "bmoS6Ggw4j", "proceeding": "", "pdf": "https://openreview.net/pdf?id=bmoS6Ggw4j", "openreview": "https://openreview.net/forum?id=bmoS6Ggw4j", "poster": "/media/PosterPDFs/NeurIPS%202024/94464.png?t=1731241194.8545625", "project": "", "author_site": "Xixi Wu, Yifei Shen, Caihua Shan, Kaitao Song, Siwei Wang, Bohang Zhang, Jiarui Feng, Hong Cheng, Wei Chen, Yun Xiong, Dongsheng Li", "tldr": "", "abstract": "Task planning in language agents is emerging as an important research topic alongside the development of large language models (LLMs). It aims to break down complex user requests in natural language into solvable sub-tasks, thereby fulfilling the original requests. In this context, the sub-tasks can be naturally viewed as a graph, where the nodes represent the sub-tasks, and the edges denote the dependencies among them. Consequently, task planning is a decision-making problem that involves selecting a connected path or subgraph within the corresponding graph and invoking it. In this paper, we explore graph learning-based methods for task planning, a direction that is orthogonal to the prevalent focus on prompt design. Our interest in graph learning stems from a theoretical discovery: the biases of attention and auto-regressive loss impede LLMs' ability to effectively navigate decision-making on graphs, which is adeptly addressed by graph neural networks (GNNs). This theoretical insight led us to integrate GNNs with LLMs to enhance overall performance. Extensive experiments demonstrate that GNN-based methods surpass existing solutions even without training, and minimal training can further enhance their performance. The performance gain increases with a larger task graph size.", "keywords": "Task Planning;Language Agents;Graph Learning;Graph Neural Networks;Language Model", "primary_area": "graph_neural_networks", "supplementary_material": "/attachment/a8c253e074e94885c0d9302a527a43c6b90804be.zip", "author": "Xixi Wu;Yifei Shen;Caihua Shan;Kaitao Song;Siwei Wang;Bohang Zhang;Jiarui Feng;Hong Cheng;Wei Chen;Yun Xiong;Dongsheng Li", "authorids": "~Xixi_Wu1;~Yifei_Shen1;~Caihua_Shan1;~Kaitao_Song1;~Siwei_Wang2;~Bohang_Zhang1;~Jiarui_Feng1;~Hong_Cheng1;~Wei_Chen10;~Yun_Xiong1;~Dongsheng_Li2", "gender": ";M;F;M;M;M;M;F;M;F;M", "homepage": "https://wxxshirley.github.io/;https://openreview.net/profile?id=~Yifei_Shen1;;;https://www.microsoft.com/en-us/research/people/siweiwang/publications/;https://zbh2047.github.io;https://jiaruifeng.github.io/;https://www1.se.cuhk.edu.hk/~hcheng/;https://www.microsoft.com/en-us/research/people/weic/;https://dblp.org/pid/67/4330;http://recmind.cn", "dblp": "78/8488;51/609.html;;222/2082;51/8279-2;276/0156.html;77/8797;85/5637-1;c/WeiChen13;67/4330;254/0830-2.html", "google_scholar": "VDW27LgAAAAJ;;-knurggAAAAJ;https://scholar.google.com.hk/citations?user=LLk9dR8AAAAJ;;https://scholar.google.com.hk/citations?hl=zh-CN;6CSGUR8AAAAJ;https://scholar.google.com.hk/citations?user=s3lQL7YAAAAJ;hlEPkxAAAAAJ;;VNg5rA8AAAAJ", "orcid": "0000-0002-9935-5957;;;;;;0000-0002-3409-6819;0000-0002-4673-2587;;0000-0002-8575-5415;0000-0003-3103-8442", "linkedin": ";;;;;zhangbohang;;;;;", "or_profile": "~Xixi_Wu1;~Yifei_Shen1;~Caihua_Shan1;~Kaitao_Song1;~Siwei_Wang2;~Bohang_Zhang1;~Jiarui_Feng1;~Hong_Cheng1;~Wei_Chen10;~Yun_Xiong1;~Dongsheng_Li2", "aff": "Fudan University;Microsoft Research Asia;Microsoft;Microsoft;Microsoft;Peking University;Washington University, Saint Louis;The Chinese University of Hong Kong;Microsoft Research;Fudan University;Microsoft Research Asia", "aff_domain": "fudan.edu.cn;microsoft.com;microsoft.com;microsoft.com;microsoft.com;pku.edu.cn;wustl.edu;cuhk.edu.hk;microsoft.com;fudan.edu.cn;microsoft.com", "position": "MS student;Research Cheerleader;Researcher;Researcher;Researcher;PhD student;PhD student;Professor;Pricipal Researcher;Full Professor;Principal Researcher", "bibtex": "@inproceedings{\nwu2024can,\ntitle={Can Graph Learning Improve Planning in {LLM}-based Agents?},\nauthor={Xixi Wu and Yifei Shen and Caihua Shan and Kaitao Song and Siwei Wang and Bohang Zhang and Jiarui Feng and Hong Cheng and Wei Chen and Yun Xiong and Dongsheng Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=bmoS6Ggw4j}\n}", "github": "", "reviewers": "wd1Z;xcxm;ZJrg;s7yo", "pdf_size": 2794037, "rating": "5;5;6;7", "confidence": "4;3;3;3", "soundness": "2;2;3;3", "novelty": "2;2;3;3", "presentation": "2;2;2;3", "wc_summary": "41;32;61;50", "wc_strengths": "16;34;70;37", "wc_weaknesses": "479;524;347;20", "wc_questions": "3;54;12;1", "wc_limitations": "1;53;3;1", "wc_review": "540;697;493;109", "wc_reply_reviewers": "154;88;0;10", "wc_reply_authors": "325;61;0;40", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;1;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 46.0, 10.747092630102339 ], "wc_strengths_avg": [ 39.25, 19.48557158514987 ], "wc_weaknesses_avg": [ 342.5, 197.23146300730014 ], "wc_questions_avg": [ 17.5, 21.47673159491453 ], "wc_limitations_avg": [ 14.5, 22.242976419535225 ], "wc_review_avg": [ 459.75, 216.13580800043292 ], "wc_reply_reviewers_avg": [ 63.0, 62.617888817813075 ], "wc_reply_authors_avg": [ 106.5, 128.0400328022451 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7993867619161662483&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 2, "email": "fudan.edu.cn;microsoft.com;microsoft.com;microsoft.com;microsoft.com;pku.edu.cn;wustl.edu;cuhk.edu.hk;microsoft.com;fudan.edu.cn;microsoft.com", "author_num": 11, "aff_unique_index": "0;1;1;1;1;2;3;4;1;0;1", "aff_unique_norm": "Fudan University;Microsoft;Peking University;Washington University in St. Louis;Chinese University of Hong Kong", "aff_unique_dep": ";Research;;;", "aff_unique_url": "https://www.fudan.edu.cn;https://www.microsoft.com/en-us/research/group/asia;http://www.pku.edu.cn;https://wustl.edu;https://www.cuhk.edu.hk", "aff_unique_abbr": "Fudan;MSR Asia;Peking U;WUSTL;CUHK", "aff_campus_unique_index": "1;2;3;1", "aff_campus_unique": ";Asia;Saint Louis;Hong Kong SAR", "aff_country_unique_index": "0;0;1;1;1;0;1;0;1;0;0", "aff_country_unique": "China;United States" }, { "title": "Secret Collusion among AI Agents: Multi-Agent Deception via Steganography", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94463", "id": "bnNSQhZJ88", "proceeding": "", "pdf": "https://openreview.net/pdf?id=bnNSQhZJ88", "openreview": "https://openreview.net/forum?id=bnNSQhZJ88", "poster": "", "project": "", "author_site": "Sumeet Motwani, Mikhail Baranchuk, Martin Strohmeier, Vijay Bolina, Philip Torr, Lewis Hammond, Christian Schroeder de Witt", "tldr": "", "abstract": "Recent advancements in generative AI suggest the potential for large-scale interaction between autonomous agents and humans across platforms such as the internet. While such interactions could foster productive cooperation, the ability of AI agents to circumvent security oversight raises critical multi-agent security problems, particularly in the form of unintended information sharing or undesirable coordination. In our work, we establish the subfield of secret collusion, a form of multi-agent deception, in which two or more agents employ steganographic methods to conceal the true nature of their interactions, be it communicative or otherwise, from oversight. We propose a formal threat model for AI agents communicating steganographically and derive rigorous theoretical insights about the capacity and incentives of large language models (LLMs) to perform secret collusion, in addition to the limitations of threat mitigation measures. We complement our findings with empirical evaluations demonstrating rising steganographic capabilities in frontier single and multi-agent LLM setups and examining potential scenarios where collusion may emerge, revealing limitations in countermeasures such as monitoring, paraphrasing, and parameter optimization. Our work is the first to formalize and investigate secret collusion among frontier foundation models, identifying it as a critical area in AI Safety and outlining a comprehensive research agenda to mitigate future risks of collusion between generative AI systems.", "keywords": "Collusion;AI Safety;Steganography;Large Language Models;Model Evaluation Framework;Multi-Agent Security;Security;Frontier Models;GenAI;AI Control", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Sumeet Ramesh Motwani;Mikhail Baranchuk;Martin Strohmeier;Vijay Bolina;Philip Torr;Lewis Hammond;Christian Schroeder de Witt", "authorids": "~Sumeet_Ramesh_Motwani1;~Mikhail_Baranchuk1;~Martin_Strohmeier1;~Vijay_Bolina1;~Philip_Torr1;~Lewis_Hammond1;~Christian_Schroeder_de_Witt1", "gender": "M;;;M;;;M", "homepage": "https://sumeetmotwani.com;http://baranchuk.ca/;https://www.cs.ox.ac.uk/people/martin.strohmeier/;;http://www.robots.ox.ac.uk/~tvg/;https://www.lewishammond.com/;https://www.schroederdewitt.com", "dblp": ";;117/8959;;;228/6647;", "google_scholar": ";;https://scholar.google.co.uk/citations?user=QUNoQIYAAAAJ;e5FdMRsAAAAJ;;8fYnp7UAAAAJ;DE60h_0AAAAJ", "orcid": ";;;;;0000-0003-1695-0871;", "linkedin": ";;;;;lrhammond/;", "or_profile": "~Sumeet_Ramesh_Motwani1;~Mikhail_Baranchuk1;~Martin_Strohmeier1;~Vijay_Bolina1;~Philip_Torr1;~Lewis_Hammond1;~Christian_Schroeder_de_Witt1", "aff": "University of California, Berkeley;;University of Oxford;University of California, Davis;University of Oxford;University of Oxford;University of Oxford", "aff_domain": "berkeley.edu;;ox.ac.uk;ucdavis.edu;ox.ac.uk;ox.ac.uk;oxford.ac.uk", "position": "Undergrad student;;PhD / PostDoc / Fellow;Undergrad student;Full Professor;PhD student;Lecturer", "bibtex": "@inproceedings{\nmotwani2024secret,\ntitle={Secret Collusion among {AI} Agents: Multi-Agent Deception via Steganography},\nauthor={Sumeet Ramesh Motwani and Mikhail Baranchuk and Martin Strohmeier and Vijay Bolina and Philip Torr and Lewis Hammond and Christian Schroeder de Witt},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=bnNSQhZJ88}\n}", "github": "", "reviewers": "pyc3;Pbei;csJ4;AWRn", "pdf_size": 1111170, "rating": "5;5;6;7", "confidence": "2;4;3;4", "soundness": "1;3;3;3", "novelty": "1;3;3;3", "presentation": "1;3;3;3", "wc_summary": "56;101;74;36", "wc_strengths": "17;96;63;40", "wc_weaknesses": "326;75;46;13", "wc_questions": "89;371;21;149", "wc_limitations": "38;1;165;53", "wc_review": "526;644;369;291", "wc_reply_reviewers": "674;75;0;0", "wc_reply_authors": "1226;125;103;99", "reply_reviewers": "2;1;0;0", "reply_authors": "4;2;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 66.75, 23.909987452945266 ], "wc_strengths_avg": [ 54.0, 29.197602641312866 ], "wc_weaknesses_avg": [ 115.0, 123.78004685731865 ], "wc_questions_avg": [ 157.5, 131.3192674362753 ], "wc_limitations_avg": [ 64.25, 61.169334637545305 ], "wc_review_avg": [ 457.5, 136.95711007465076 ], "wc_reply_reviewers_avg": [ 187.25, 282.68832218540615 ], "wc_reply_authors_avg": [ 388.25, 483.7764850630919 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.4545454545454545, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13669083264185656302&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 3, "email": "berkeley.edu;;ox.ac.uk;ucdavis.edu;ox.ac.uk;ox.ac.uk;oxford.ac.uk", "author_num": 7, "aff_unique_index": "0;1;2;1;1;1", "aff_unique_norm": "University of California, Berkeley;University of Oxford;University of California, Davis", "aff_unique_dep": ";;", "aff_unique_url": "https://www.berkeley.edu;https://www.ox.ac.uk;https://www.ucdavis.edu", "aff_unique_abbr": "UC Berkeley;Oxford;UC Davis", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Berkeley;;Davis", "aff_country_unique_index": "0;1;0;1;1;1", "aff_country_unique": "United States;United Kingdom" }, { "title": "Decoupled Kullback-Leibler Divergence Loss", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94462", "id": "bnZZedw9CM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=bnZZedw9CM", "openreview": "https://openreview.net/forum?id=bnZZedw9CM", "poster": "/media/PosterPDFs/NeurIPS%202024/94462.png?t=1730030226.8639448", "project": "", "author_site": "Jiequan Cui, Zhuotao Tian, Zhisheng Zhong, Xiaojuan Qi, Bei Yu, Hanwang Zhang", "tldr": "", "abstract": "In this paper, we delve deeper into the Kullback\u2013Leibler (KL) Divergence loss and mathematically prove that it is equivalent to the Decoupled Kullback-Leibler (DKL) Divergence loss that consists of 1) a weighted Mean Square Error ($\\mathbf{w}$MSE) loss and 2) a Cross-Entropy loss incorporating soft labels. \nThanks to the decomposed formulation of DKL loss, we have identified two areas for improvement. \nFirstly, we address the limitation of KL/DKL in scenarios like knowledge distillation by breaking its asymmetric optimization property. This modification ensures that the $\\mathbf{w}$MSE component is always effective during training, providing extra constructive cues.\nSecondly, we introduce class-wise global information into KL/DKL to mitigate bias from individual samples.\nWith these two enhancements, we derive the Improved Kullback\u2013Leibler (IKL) Divergence loss and evaluate its effectiveness by conducting experiments on CIFAR-10/100 and ImageNet datasets, focusing on adversarial training, and knowledge distillation tasks. The proposed approach achieves new state-of-the-art adversarial robustness on the public leaderboard --- \\textit{RobustBench} and competitive performance on knowledge distillation, demonstrating the substantial practical merits. Our code is available at https://github.com/jiequancui/DKL.", "keywords": "Adversarial Training; Knowledge Distillation; Kullback-Leibler Divergence Loss; Long-tail Recognition", "primary_area": "machine_vision", "supplementary_material": "", "author": "Jiequan Cui;Zhuotao Tian;Zhisheng Zhong;XIAOJUAN QI;Bei Yu;Hanwang Zhang", "authorids": "~Jiequan_Cui1;~Zhuotao_Tian1;~Zhisheng_Zhong1;~XIAOJUAN_QI2;~Bei_Yu2;~Hanwang_Zhang3", "gender": "M;M;M;F;M;M", "homepage": "https://jiequancui.github.io/;https://scholar.google.com/citations?user=mEjhz-IAAAAJ&hl=zh-CN;https://zhishengzhong.com;https://xjqi.github.io/;http://www.cse.cuhk.edu.hk/~byu/index.html;https://mreallab.github.io/index.html", "dblp": "259/5474;243/7181;168/0784;176/1445-1.html;28/4556-1.html;79/8116.html", "google_scholar": "KbXLN2AAAAAJ;mEjhz-IAAAAJ;https://scholar.google.com.hk/citations?user=u-2_7C8AAAAJ;bGn0uacAAAAJ;tGneTm4AAAAJ;YG0DFyYAAAAJ", "orcid": ";;;;0000-0001-6406-4810;", "linkedin": ";;;;yubei/;", "or_profile": "~Jiequan_Cui1;~Zhuotao_Tian1;~Zhisheng_Zhong1;~XIAOJUAN_QI2;~Bei_Yu2;~Hanwang_Zhang3", "aff": "Nanyang Technological University;SmartMore;The Chinese University of Hong Kong;University of Hong Kong;Department of Computer Science and Engineering, The Chinese University of Hong Kong;Nanyang Technological University", "aff_domain": "ntu.edu.sg;smartmore.com;cuhk.edu.hk;hku.hk;cse.cuhk.edu.hk;ntu.edu.sg", "position": "Research Fellow;Researcher;PhD student;Assistant Professor;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\ncui2024decoupled,\ntitle={Decoupled Kullback-Leibler Divergence Loss},\nauthor={Jiequan Cui and Zhuotao Tian and Zhisheng Zhong and XIAOJUAN QI and Bei Yu and Hanwang Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=bnZZedw9CM}\n}", "github": "", "reviewers": "nZvw;RheX;wpCy;hZqZ;JUtg", "pdf_size": 1567467, "rating": "4;5;5;6;6", "confidence": "3;4;5;3;4", "soundness": "2;2;2;3;3", "novelty": "3;2;2;3;3", "presentation": "1;3;3;2;3", "wc_summary": "98;53;83;229;139", "wc_strengths": "46;21;9;88;79", "wc_weaknesses": "193;175;203;300;83", "wc_questions": "77;79;18;230;132", "wc_limitations": "16;1;1;23;7", "wc_review": "430;329;314;870;440", "wc_reply_reviewers": "0;24;237;0;0", "wc_reply_authors": "115;32;400;115;63", "reply_reviewers": "0;1;3;0;0", "reply_authors": "3;2;5;3;2", "rating_avg": [ 5.2, 0.7483314773547882 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.4, 0.8 ], "wc_summary_avg": [ 120.4, 60.964251820226586 ], "wc_strengths_avg": [ 48.6, 31.02644033723495 ], "wc_weaknesses_avg": [ 190.8, 69.20809201242294 ], "wc_questions_avg": [ 107.2, 71.21629027125746 ], "wc_limitations_avg": [ 9.6, 8.662563131083086 ], "wc_review_avg": [ 476.6, 203.22362067436944 ], "wc_reply_reviewers_avg": [ 52.2, 92.8663555869401 ], "wc_reply_authors_avg": [ 145.0, 131.39101948002383 ], "reply_reviewers_avg": [ 0.8, 1.1661903789690604 ], "reply_authors_avg": [ 3.0, 1.0954451150103321 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.07142857142857148, "gs_citation": 49, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1302951723222363368&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 8, "email": "ntu.edu.sg;smartmore.com;cuhk.edu.hk;hku.hk;cse.cuhk.edu.hk;ntu.edu.sg", "author_num": 6, "aff_unique_index": "0;1;2;3;2;0", "aff_unique_norm": "Nanyang Technological University;SmartMore;Chinese University of Hong Kong;University of Hong Kong", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.ntu.edu.sg;;https://www.cuhk.edu.hk;https://www.hku.hk", "aff_unique_abbr": "NTU;;CUHK;HKU", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;2;2;2;0", "aff_country_unique": "Singapore;;China" }, { "title": "Revealing Distribution Discrepancy by Sampling Transfer in Unlabeled Data", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94461", "id": "bnzeOG0yey", "proceeding": "", "pdf": "https://openreview.net/pdf?id=bnzeOG0yey", "openreview": "https://openreview.net/forum?id=bnzeOG0yey", "poster": "/media/PosterPDFs/NeurIPS%202024/94461.png?t=1731568371.357289", "project": "", "author_site": "Zhilin Zhao, Longbing Cao, Xuhui Fan, Wei-Shi Zheng", "tldr": "", "abstract": "There are increasing cases where the class labels of test samples are unavailable, creating a significant need and challenge in measuring the discrepancy between training and test distributions. This distribution discrepancy complicates the assessment of whether the hypothesis selected by an algorithm on training samples remains applicable to test samples. We present a novel approach called Importance Divergence (I-Div) to address the challenge of test label unavailability, enabling distribution discrepancy evaluation using only training samples. I-Div transfers the sampling patterns from the test distribution to the training distribution by estimating density and likelihood ratios. Specifically, the density ratio, informed by the selected hypothesis, is obtained by minimizing the Kullback-Leibler divergence between the actual and estimated input distributions. Simultaneously, the likelihood ratio is adjusted according to the density ratio by reducing the generalization error of the distribution discrepancy as transformed through the two ratios. Experimentally, I-Div accurately quantifies the distribution discrepancy, as evidenced by a wide range of complex data scenarios and tasks.", "keywords": "Non-IID Data; Distribution Discrepancy; Density Ratio; Likelihood Ratio; Generalization", "primary_area": "evaluation", "supplementary_material": "", "author": "Zhilin Zhao;Longbing Cao;Xuhui Fan;Wei-Shi Zheng", "authorids": "~Zhilin_Zhao1;~Longbing_Cao1;~Xuhui_Fan1;~Wei-Shi_Zheng3", "gender": "M;M;M;M", "homepage": "https://lawliet-zzl.github.io/;https://www.datasciences.org;https://xuhuifan.github.io/;http://www.isee-ai.cn/~zhwshi", "dblp": "189/1602.html;14/2589;117/4874;30/8399", "google_scholar": "3e8zto0AAAAJ;cDs3DM8AAAAJ;https://scholar.google.com.au/citations?user=NSc42eUAAAAJ;AwqDDGoAAAAJ", "orcid": ";0000-0003-1562-9429;0000-0002-7558-7200;", "linkedin": ";;;", "or_profile": "~Zhilin_Zhao1;~Longbing_Cao1;~Xuhui_Fan1;~Wei-Shi_Zheng3", "aff": "Macquarie University;Macquarie University;Macquarie University;SUN YAT-SEN UNIVERSITY", "aff_domain": "mq.edu.au;mq.edu.au;mq.edu.au;sysu.edu.cn", "position": "Postdoc;Full Professor;Lecturer;Full Professor", "bibtex": "@inproceedings{\nzhao2024revealing,\ntitle={Revealing Distribution Discrepancy by Sampling Transfer in Unlabeled Data},\nauthor={Zhilin Zhao and Longbing Cao and Xuhui Fan and Wei-Shi Zheng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=bnzeOG0yey}\n}", "github": "", "reviewers": "fpGs;f37S;1gtU;yuQt", "pdf_size": 512169, "rating": "4;6;6;6", "confidence": "4;2;3;3", "soundness": "2;3;3;3", "novelty": "2;4;3;3", "presentation": "2;3;3;3", "wc_summary": "31;100;85;76", "wc_strengths": "106;66;84;40", "wc_weaknesses": "225;26;89;147", "wc_questions": "2;28;179;115", "wc_limitations": "2;1;11;83", "wc_review": "366;221;448;461", "wc_reply_reviewers": "0;0;73;292", "wc_reply_authors": "0;0;187;888", "reply_reviewers": "0;0;2;2", "reply_authors": "1;1;2;4", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 73.0, 25.71964229922337 ], "wc_strengths_avg": [ 74.0, 24.20743687382041 ], "wc_weaknesses_avg": [ 121.75, 73.38042995240625 ], "wc_questions_avg": [ 81.0, 70.37400088100719 ], "wc_limitations_avg": [ 24.25, 34.14216601213227 ], "wc_review_avg": [ 374.0, 95.54841704601914 ], "wc_reply_reviewers_avg": [ 91.25, 119.67325306851151 ], "wc_reply_authors_avg": [ 268.75, 365.5840361667889 ], "reply_reviewers_avg": [ 1.0, 1.0 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:NaFBMkchemcJ:scholar.google.com/&scioq=Revealing+Distribution+Discrepancy+by+Sampling+Transfer+in+Unlabeled+Data&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "mq.edu.au;mq.edu.au;mq.edu.au;sysu.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Macquarie University;Sun Yat-sen University", "aff_unique_dep": ";", "aff_unique_url": "https://www.mq.edu.au;http://www.sysu.edu.cn", "aff_unique_abbr": "MQ;SYSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "Australia;China" }, { "title": "Advancing Cross-domain Discriminability in Continual Learning of Vision-Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94460", "id": "boGxvYWZEq", "proceeding": "", "pdf": "https://openreview.net/pdf?id=boGxvYWZEq", "openreview": "https://openreview.net/forum?id=boGxvYWZEq", "poster": "/media/PosterPDFs/NeurIPS%202024/94460.png?t=1733384797.8005311", "project": "", "author_site": "Yicheng Xu, Yuxin Chen, Jiahao Nie, Yusong Wang, HUIPING ZHUANG, Manabu Okumura", "tldr": "", "abstract": "Continual learning (CL) with Vision-Language Models (VLMs) has overcome the constraints of traditional CL, which only focuses on previously encountered classes. During the CL of VLMs, we need not only to prevent the catastrophic forgetting on incrementally learned knowledge but also to preserve the zero-shot ability of VLMs. However, existing methods require additional reference datasets to maintain such zero-shot ability and rely on domain-identity hints to classify images across different domains. In this study, we propose Regression-based Analytic Incremental Learning (RAIL), which utilizes a recursive ridge regression-based adapter to learn from a sequence of domains in a non-forgetting manner and decouple the cross-domain correlations by projecting features to a higher-dimensional space. Cooperating with a training-free fusion module, RAIL absolutely preserves the VLM's zero-shot ability on unseen domains without any reference data.\nAdditionally, we introduce Cross-domain Task-Agnostic Incremental Learning (X-TAIL) setting. In this setting, a CL learner is required to incrementally learn from multiple domains and classify test images from both seen and unseen domains without any domain-identity hint.\nWe theoretically prove RAIL's absolute memorization on incrementally learned domains. Experiment results affirm RAIL's state-of-the-art performance in both X-TAIL and existing Multi-domain Task-Incremental Learning settings. The code is released at https://github.com/linghan1997/Regression-based-Analytic-Incremental-Learning.", "keywords": "Continual Learning;Incremental learning;Analytic learning;Cross-domain;Vision-Language model", "primary_area": "machine_vision", "supplementary_material": "", "author": "Yicheng Xu;Yuxin Chen;Jiahao Nie;Yusong Wang;Huiping Zhuang;Manabu Okumura", "authorids": "~Yicheng_Xu1;~Yuxin_Chen7;~Jiahao_Nie1;~Yusong_Wang2;~Huiping_Zhuang2;~Manabu_Okumura2", "gender": "M;M;;M;;", "homepage": ";http://thomaschen98.github.io;;https://scholar.google.com/citations?user=lVBDcCUAAAAJ&hl=en;;", "dblp": ";;;;;", "google_scholar": "https://scholar.google.com/citations?hl=en;EzoDsIMAAAAJ;;lVBDcCUAAAAJ;;", "orcid": "0000-0003-2975-1206;;;0009-0002-6668-3230;;", "linkedin": ";thomaschen98/;;;;", "or_profile": "~Yicheng_Xu1;~Yuxin_Chen7;~Jiahao_Nie1;~Yusong_Wang2;~Huiping_Zhuang2;~Manabu_Okumura2", "aff": "Institute of Science Tokyo;University of California, Berkeley;;Institute of Science Tokyo;;", "aff_domain": "titech.ac.jp;berkeley.edu;;titech.ac.jp;;", "position": "PhD student;PhD student;;PhD student;;", "bibtex": "@inproceedings{\nxu2024advancing,\ntitle={Advancing Cross-domain Discriminability in Continual Learning of Vision-Language Models},\nauthor={Yicheng Xu and Yuxin Chen and Jiahao Nie and Yusong Wang and Huiping Zhuang and Manabu Okumura},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=boGxvYWZEq}\n}", "github": "", "reviewers": "ZU8x;Tz45;ThWm;rqEC", "pdf_size": 4005205, "rating": "5;6;6;7", "confidence": "4;3;3;4", "soundness": "2;2;3;3", "novelty": "2;3;3;4", "presentation": "2;2;3;4", "wc_summary": "66;103;64;75", "wc_strengths": "54;91;89;104", "wc_weaknesses": "80;181;68;51", "wc_questions": "78;55;2;5", "wc_limitations": "22;30;1;1", "wc_review": "300;460;224;236", "wc_reply_reviewers": "20;44;27;30", "wc_reply_authors": "24;20;25;22", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 77.0, 15.572411502397436 ], "wc_strengths_avg": [ 84.5, 18.527007313648905 ], "wc_weaknesses_avg": [ 95.0, 50.709959574032396 ], "wc_questions_avg": [ 35.0, 32.54996159751959 ], "wc_limitations_avg": [ 13.5, 12.816005617976296 ], "wc_review_avg": [ 305.0, 94.03722667114339 ], "wc_reply_reviewers_avg": [ 30.25, 8.728545125048045 ], "wc_reply_authors_avg": [ 22.75, 1.920286436967152 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7187040414399396000&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "titech.ac.jp;berkeley.edu;;titech.ac.jp;;", "author_num": 6, "aff_unique_index": "0;1;0", "aff_unique_norm": "Institute of Science, Tokyo;University of California, Berkeley", "aff_unique_dep": ";", "aff_unique_url": "https://www.iost.jp;https://www.berkeley.edu", "aff_unique_abbr": "IoST;UC Berkeley", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Tokyo;Berkeley", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Japan;United States" }, { "title": "Fast Encoder-Based 3D from Casual Videos via Point Track Processing", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94459", "id": "bqGAheAeQY", "proceeding": "", "pdf": "https://openreview.net/pdf?id=bqGAheAeQY", "openreview": "https://openreview.net/forum?id=bqGAheAeQY", "poster": "", "project": "", "author_site": "Yoni Kasten, Wuyue Lu, Haggai Maron", "tldr": "", "abstract": "This paper addresses the long-standing challenge of reconstructing 3D structures from videos with dynamic content. Current approaches to this problem were not designed to operate on casual videos recorded by standard cameras or require a long optimization time. \n Aiming to significantly improve the efficiency of previous approaches, we present TracksTo4D, a learning-based approach that enables inferring 3D structure and camera positions from dynamic content originating from casual videos using a single efficient feed-forward pass. To achieve this, we propose operating directly over 2D point tracks as input and designing an architecture tailored for processing 2D point tracks. Our proposed architecture is designed with two key principles in mind: (1) it takes into account the inherent symmetries present in the input point tracks data, and (2) it assumes that the movement patterns can be effectively represented using a low-rank approximation. TracksTo4D is trained in an unsupervised way on a dataset of casual videos utilizing only the 2D point tracks extracted from the videos, without any 3D supervision. Our experiments show that TracksTo4D can reconstruct a temporal point cloud and camera positions of the underlying video with accuracy comparable to state-of-the-art methods, while drastically reducing runtime by up to 95\\%. We further show that TracksTo4D generalizes well to unseen videos of unseen semantic categories at inference time.", "keywords": "Dynamic Videos;Dynamic 3D Reconstruction;Equivariance;Symmetries", "primary_area": "machine_vision", "supplementary_material": "/attachment/5c27ee24c87610e123405af064c0ee60787a1126.zip", "author": "Yoni Kasten;Wuyue Lu;Haggai Maron", "authorids": "~Yoni_Kasten1;~Wuyue_Lu1;~Haggai_Maron1", "gender": ";F;M", "homepage": "https://ykasten.github.io/;https://acorn.utoronto.ca/sws/#/;https://haggaim.github.io/", "dblp": "183/6527;;181/6629", "google_scholar": "https://scholar.google.co.il/citations?user=kc4-e8oAAAAJ;;https://scholar.google.co.il/citations?user=4v8uJrIAAAAJ", "orcid": ";;", "linkedin": "yoni-kasten-788a87b3;;", "or_profile": "~Yoni_Kasten1;~Wuyue_Lu1;~Haggai_Maron1", "aff": "NVIDIA;;NVIDIA", "aff_domain": "nvidia.com;;nvidia.com", "position": "Researcher;;Research Scientist", "bibtex": "@inproceedings{\nkasten2024fast,\ntitle={Fast Encoder-Based 3D from Casual Videos via Point Track Processing},\nauthor={Yoni Kasten and Wuyue Lu and Haggai Maron},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=bqGAheAeQY}\n}", "github": "", "reviewers": "aqo3;89V2;fqXP;6f3c", "pdf_size": 2031571, "rating": "5;6;6;7", "confidence": "4;4;4;4", "soundness": "3;3;3;3", "novelty": "2;3;3;4", "presentation": "2;3;3;4", "wc_summary": "101;65;79;78", "wc_strengths": "58;78;114;179", "wc_weaknesses": "252;105;166;166", "wc_questions": "76;3;260;107", "wc_limitations": "1;4;20;100", "wc_review": "488;255;639;630", "wc_reply_reviewers": "83;21;12;37", "wc_reply_authors": "148;11;11;52", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 80.75, 12.93010054098575 ], "wc_strengths_avg": [ 107.25, 46.02920268698992 ], "wc_weaknesses_avg": [ 172.25, 52.346800284258066 ], "wc_questions_avg": [ 111.5, 93.68164174479438 ], "wc_limitations_avg": [ 31.25, 40.34460930533347 ], "wc_review_avg": [ 503.0, 155.20470353697402 ], "wc_reply_reviewers_avg": [ 38.25, 27.343874999714288 ], "wc_reply_authors_avg": [ 55.5, 55.96650784174407 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2275060206922290544&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "nvidia.com;;nvidia.com", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "NVIDIA", "aff_unique_dep": "NVIDIA Corporation", "aff_unique_url": "https://www.nvidia.com", "aff_unique_abbr": "NVIDIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "NoisyGL: A Comprehensive Benchmark for Graph Neural Networks under Label Noise", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97611", "id": "brxBxj4Dv3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=brxBxj4Dv3", "openreview": "https://openreview.net/forum?id=brxBxj4Dv3", "poster": "/media/PosterPDFs/NeurIPS%202024/97611.png?t=1731413456.0177963", "project": "", "author_site": "Zhonghao Wang, Danyu Sun, Sheng Zhou, Haobo Wang, Jiapei Fan, Longtao Huang, Jiajun Bu", "tldr": "", "abstract": "Graph Neural Networks (GNNs) exhibit strong potential in node classification task through a message-passing mechanism. However, their performance often hinges on high-quality node labels, which are challenging to obtain in real-world scenarios due to unreliable sources or adversarial attacks. Consequently, label noise is common in real-world graph data, negatively impacting GNNs by propagating incorrect information during training. To address this issue, the study of Graph Neural Networks under Label Noise (GLN) has recently gained traction. However, due to variations in dataset selection, data splitting, and preprocessing techniques, the community currently lacks a comprehensive benchmark, which impedes deeper understanding and further development of GLN. To fill this gap, we introduce NoisyGL in this paper, the first comprehensive benchmark for graph neural networks under label noise. NoisyGL enables fair comparisons and detailed analyses of GLN methods on noisy labeled graph data across various datasets, with unified experimental settings and interface. Our benchmark has uncovered several important insights that were missed in previous research, and we believe these findings will be highly beneficial for future studies. We hope our open-source benchmark library will foster further advancements in this field. The code of the benchmark can be found in https://github.com/eaglelab-zju/NoisyGL.", "keywords": "GNNs;Graph learning;Label noise", "primary_area": "", "supplementary_material": "", "author": "Zhonghao Wang;Danyu Sun;Sheng Zhou;Haobo Wang;Jiapei Fan;Longtao Huang;Jiajun Bu", "authorids": "~Zhonghao_Wang4;~Danyu_Sun1;~Sheng_Zhou1;~Haobo_Wang1;~Jiapei_Fan1;~Longtao_Huang2;~Jiajun_Bu1", "gender": "M;;M;M;F;M;M", "homepage": "https://hazwruhi.github.io/;https://www.linkedin.com/in/Danyu-Sun0907;https://zhoushengisnoob.github.io/;https://hbzju.github.io/;https://www.linkedin.cn/injobs/in/%E7%8F%88%E7%8F%AE-%E6%A8%8A-7860768a;http://people.ucas.edu.cn/~huanglongtao?language=en;https://person.zju.edu.cn/bjj", "dblp": "26/5768-2;;34/4858-4.html;;;76/10119;50/3147", "google_scholar": "https://scholar.google.com.sg/citations?hl=zh-CN;;https://scholar.google.co.jp/citations?user=Ss76nMwAAAAJ;DnN-rggAAAAJ;;EQDfV9cAAAAJ;OgZP2okAAAAJ", "orcid": "0009-0000-2662-3415;;0000-0003-3645-1041;0000-0001-8586-3048;;;0000-0002-1097-2044", "linkedin": ";;;;;;", "or_profile": "~Zhonghao_Wang4;~Danyu_Sun1;~Sheng_Zhou1;~Haobo_Wang1;~Jiapei_Fan1;~Longtao_Huang2;~Jiajun_Bu1", "aff": "Ningbo University;Zhejiang University;Zhejiang University;Zhejiang University;Alibaba Group;Alibaba Group;Zhejiang University", "aff_domain": "nbu.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;alibaba-inc.com;alibaba-inc.com;zju.edu.cn", "position": "Undergrad student;Undergrad student;Associate Professor;Assistant Professor;Researcher;Researcher;Full Professor", "bibtex": "@inproceedings{\nwang2024noisygl,\ntitle={Noisy{GL}: A Comprehensive Benchmark for Graph Neural Networks under Label Noise},\nauthor={Zhonghao Wang and Danyu Sun and Sheng Zhou and Haobo Wang and Jiapei Fan and Longtao Huang and Jiajun Bu},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=brxBxj4Dv3}\n}", "github": "", "reviewers": "Vxe6;gQY9;d2CP;EW3m", "pdf_size": 4670716, "rating": "6;6;7;8", "confidence": "4;3;4;3", "wc_summary_and_contributions": "62;108;105;44", "wc_strengths": "3;2;102;3", "wc_improvement": "1;3;125;10", "wc_limitations": "1;1;10;9", "wc_correctness": "1;1;15;1", "wc_clarity": "1;5;61;6", "wc_relation_to_prior_work": "1;7;30;10", "wc_documentation": "1;1;18;5", "wc_additional_feedback": "1;1;1;1", "wc_review": "72;129;467;89", "wc_reply_reviewers": "0;0;104;25", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;1;2", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "wc_summary_and_contributions_avg": [ 79.75, 27.517040175135115 ], "wc_strengths_avg": [ 27.5, 43.014532428006234 ], "wc_improvement_avg": [ 34.75, 52.212905492799386 ], "wc_limitations_avg": [ 5.25, 4.264680527307995 ], "wc_correctness_avg": [ 4.5, 6.06217782649107 ], "wc_clarity_avg": [ 18.25, 24.752525123712125 ], "wc_relation_to_prior_work_avg": [ 12.0, 10.88577052853862 ], "wc_documentation_avg": [ 6.25, 6.977642868476432 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 189.25, 161.68855092430013 ], "wc_reply_reviewers_avg": [ 32.25, 42.66365549270245 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ynFn9UubRz4J:scholar.google.com/&scioq=NoisyGL:+A+Comprehensive+Benchmark+for+Graph+Neural+Networks+under+Label+Noise&hl=en&as_sdt=0,5", "gs_version_total": 5, "email": "nbu.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;alibaba-inc.com;alibaba-inc.com;zju.edu.cn", "author_num": 7, "aff_unique_index": "0;1;1;1;2;2;1", "aff_unique_norm": "Ningbo University;Zhejiang University;Alibaba Group", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ningbou.edu.cn;https://www.zju.edu.cn;https://www.alibaba.com", "aff_unique_abbr": "NBU;ZJU;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "A Consistency-Aware Spot-Guided Transformer for Versatile and Hierarchical Point Cloud Registration", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94458", "id": "btLLWaOrFs", "proceeding": "", "pdf": "https://openreview.net/pdf?id=btLLWaOrFs", "openreview": "https://openreview.net/forum?id=btLLWaOrFs", "poster": "/media/PosterPDFs/NeurIPS%202024/94458.png?t=1729396532.5248978", "project": "", "author_site": "Renlang Huang, Yufan Tang, Jiming Chen, Liang Li", "tldr": "", "abstract": "Deep learning-based feature matching has shown great superiority for point cloud registration in the absence of pose priors. Although coarse-to-fine matching approaches are prevalent, the coarse matching of existing methods is typically sparse and loose without consideration of geometric consistency, which makes the subsequent fine matching rely on ineffective optimal transport and hypothesis-and-selection methods for consistency. Therefore, these methods are neither efficient nor scalable for real-time applications such as odometry in robotics. To address these issues, we design a consistency-aware spot-guided Transformer (CAST), which incorporates a spot-guided cross-attention module to avoid interfering with irrelevant areas, and a consistency-aware self-attention module to enhance matching capabilities with geometrically consistent correspondences. Furthermore, a lightweight fine matching module for both sparse keypoints and dense features can estimate the transformation accurately. Extensive experiments on both outdoor LiDAR point cloud datasets and indoor RGBD point cloud datasets demonstrate that our method achieves state-of-the-art accuracy, efficiency, and robustness.", "keywords": "Point cloud registration;Rigid transformation estimation;Feature matching;Correspondence;Deep learning", "primary_area": "machine_vision", "supplementary_material": "/attachment/15f826882be95a94275b8fd0c9b5709896f4cfec.zip", "author": "Renlang Huang;Yufan Tang;Jiming Chen;Liang Li", "authorids": "~Renlang_Huang1;~Yufan_Tang1;~Jiming_Chen1;~Liang_Li12", "gender": "M;M;M;M", "homepage": "https://www.researchgate.net/profile/Renlang-Huang;https://tyfan0321.github.io/;;https://person.zju.edu.cn/LiangLi", "dblp": "302/4287;;55/2484-1.html;", "google_scholar": ";;zK9tvo8AAAAJ;6JscxDkAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Renlang_Huang1;~Yufan_Tang1;~Jiming_Chen1;~Liang_Li12", "aff": "Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn", "position": "PhD student;Undergrad student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nhuang2024a,\ntitle={A Consistency-Aware Spot-Guided Transformer for Versatile and Hierarchical Point Cloud Registration},\nauthor={Renlang Huang and Yufan Tang and Jiming Chen and Liang Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=btLLWaOrFs}\n}", "github": "", "reviewers": "UeYt;LKCi;D9ok;L6BY", "pdf_size": 4181557, "rating": "3;5;5;7", "confidence": "4;5;4;4", "soundness": "2;2;3;4", "novelty": "2;2;2;3", "presentation": "1;2;2;4", "wc_summary": "72;80;71;61", "wc_strengths": "61;28;14;102", "wc_weaknesses": "324;247;99;157", "wc_questions": "82;3;2;55", "wc_limitations": "8;3;10;5", "wc_review": "547;361;196;380", "wc_reply_reviewers": "10;50;13;25", "wc_reply_authors": "50;27;30;29", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 1.0897247358851685 ], "wc_summary_avg": [ 71.0, 6.745368781616021 ], "wc_strengths_avg": [ 51.25, 33.9070420414403 ], "wc_weaknesses_avg": [ 206.75, 85.80901759139304 ], "wc_questions_avg": [ 35.5, 34.35476677260377 ], "wc_limitations_avg": [ 6.5, 2.692582403567252 ], "wc_review_avg": [ 371.0, 124.27992597358593 ], "wc_reply_reviewers_avg": [ 24.5, 15.75595125658873 ], "wc_reply_authors_avg": [ 34.0, 9.300537618869138 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18344746127299850553&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Zhejiang University", "aff_unique_dep": "", "aff_unique_url": "https://www.zju.edu.cn", "aff_unique_abbr": "ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Flow Snapshot Neurons in Action: Deep Neural Networks Generalize to Biological Motion Perception", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94457", "id": "btuHzsAVsK", "proceeding": "", "pdf": "https://openreview.net/pdf?id=btuHzsAVsK", "openreview": "https://openreview.net/forum?id=btuHzsAVsK", "poster": "/media/PosterPDFs/NeurIPS%202024/94457.png?t=1731234078.3268826", "project": "", "author_site": "Shuangpeng Han, Ziyu Wang, Mengmi Zhang", "tldr": "", "abstract": "Biological motion perception (BMP) refers to humans' ability to perceive and recognize the actions of living beings solely from their motion patterns, sometimes as minimal as those depicted on point-light displays. While humans excel at these tasks \\textit{without any prior training}, current AI models struggle with poor generalization performance. To close this research gap, we propose the Motion Perceiver (MP). MP solely relies on patch-level optical flows from video clips as inputs. During training, it learns prototypical flow snapshots through a competitive binding mechanism and integrates invariant motion representations to predict action labels for the given video. During inference, we evaluate the generalization ability of all AI models and humans on 62,656 video stimuli spanning 24 BMP conditions using point-light displays in neuroscience. Remarkably, MP outperforms all existing AI models with a maximum improvement of 29\\% in top-1 action recognition accuracy on these conditions. Moreover, we benchmark all AI models in point-light displays of two standard video datasets in computer vision. MP also demonstrates superior performance in these cases. \nMore interestingly, via psychophysics experiments, we found that MP recognizes biological movements in a way that aligns with human behaviors. Our data and code are available at https://github.com/ZhangLab-DeepNeuroCogLab/MotionPerceiver.", "keywords": "biological motion perception;generalization;video action recognition", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "", "author": "Shuangpeng Han;Ziyu Wang;Mengmi Zhang", "authorids": "~Shuangpeng_Han1;~Ziyu_Wang6;~Mengmi_Zhang1", "gender": ";M;F", "homepage": ";https://github.com/ziyuwwang;https://a0091624.wixsite.com/deepneurocognition-1", "dblp": ";;160/7116", "google_scholar": ";zgcP9-EAAAAJ;https://scholar.google.com.sg/citations?user=G2sVOhcAAAAJ", "orcid": ";;0000-0002-2694-7097", "linkedin": ";;", "or_profile": "~Shuangpeng_Han1;~Ziyu_Wang6;~Mengmi_Zhang1", "aff": ";National University of Singapore;A*STAR", "aff_domain": ";nus.edu.sg;astar.edu.sg", "position": ";PhD student;Principal Researcher", "bibtex": "@inproceedings{\nhan2024flow,\ntitle={Flow Snapshot Neurons in Action: Deep Neural Networks Generalize to Biological Motion Perception},\nauthor={Shuangpeng Han and Ziyu Wang and Mengmi Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=btuHzsAVsK}\n}", "github": "", "reviewers": "buWx;WFYf;eynA;nybp", "pdf_size": 21525025, "rating": "5;6;7;8", "confidence": "4;4;4;4", "soundness": "3;3;3;3", "novelty": "2;3;3;4", "presentation": "3;3;3;4", "wc_summary": "118;116;123;117", "wc_strengths": "171;67;116;96", "wc_weaknesses": "444;168;281;381", "wc_questions": "44;7;4;542", "wc_limitations": "1;1;8;64", "wc_review": "778;359;532;1200", "wc_reply_reviewers": "68;26;0;303", "wc_reply_authors": "390;35;0;102", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;1;2", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 118.5, 2.692582403567252 ], "wc_strengths_avg": [ 112.5, 38.00328933131973 ], "wc_weaknesses_avg": [ 318.5, 104.53827050415556 ], "wc_questions_avg": [ 149.25, 227.30087439339076 ], "wc_limitations_avg": [ 18.5, 26.424420523447623 ], "wc_review_avg": [ 717.25, 315.9900117092311 ], "wc_reply_reviewers_avg": [ 99.25, 120.11114644361696 ], "wc_reply_authors_avg": [ 131.75, 153.53887944100674 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:gQls0wu5_2EJ:scholar.google.com/&scioq=Flow+Snapshot+Neurons+in+Action:+Deep+Neural+Networks+Generalize+to+Biological+Motion+Perception&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": ";nus.edu.sg;astar.edu.sg", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "National University of Singapore;Agency for Science, Technology and Research", "aff_unique_dep": ";", "aff_unique_url": "https://www.nus.edu.sg;https://www.a-star.edu.sg", "aff_unique_abbr": "NUS;A*STAR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Singapore" }, { "id": "buSEDdP5YX", "title": "Avoiding Pitfalls for Privacy Accounting of Subsampled Mechanisms under Composition", "track": "main", "status": "Reject", "tldr": "", "abstract": "We consider the problem of computing tight privacy guarantees for the composition of subsampled differentially private mechanisms. Recent algorithms can numerically compute the privacy parameters to arbitrary precision but must be carefully applied.\n\nOur main contribution is to address two common points of confusion. First, some privacy accountants assume that the privacy guarantees for the composition of a subsampled mechanism are determined by self-composing the worst-case datasets for the uncomposed mechanism. We show that this is not true in general. Second, Poisson subsampling is sometimes assumed to have similar privacy guarantees compared to sampling without replacement. We show that the privacy guarantees may in fact differ significantly between the two sampling schemes. In particular, we give an example of hyperparameters that result in $\\varepsilon \\approx 1$ for Poisson subsampling and $\\varepsilon > 10$ for sampling without replacement. This occurs for some parameters that could realistically be chosen for DP-SGD.", "keywords": "differential privacy;privacy accounting", "primary_area": "privacy", "supplementary_material": "", "author": "Christian Janos Lebeda;Matthew Regehr;Gautam Kamath;Thomas Steinke", "authorids": "~Christian_Janos_Lebeda1;~Matthew_Regehr1;~Gautam_Kamath1;~Thomas_Steinke2", "gender": ";M;M;M", "homepage": ";;http://www.gautamkamath.com/;http://www.thomas-steinke.net/", "dblp": ";;73/11140;https://dblp.uni-trier.de/pid/73/4025-2.html", "google_scholar": ";;MK6zHkYAAAAJ;kwnwhrgAAAAJ", "orcid": "0000-0001-9517-8466;0009-0005-3710-6233;;", "linkedin": ";;;thomas-steinke-2841248/", "or_profile": "~Christian_Janos_Lebeda1;~Matthew_Regehr1;~Gautam_Kamath1;~Thomas_Steinke2", "aff": "IT University of Copenhagen;;University of Waterloo;Google", "aff_domain": "itu.dk;;uwaterloo.ca;google.com", "position": "Postdoc;;Assistant Professor;Research Scientist", "bibtex": "@misc{\nanonymous2024avoiding,\ntitle={Avoiding Pitfalls for Privacy Accounting of Subsampled Mechanisms under Composition},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=buSEDdP5YX}\n}", "github": "", "project": "", "reviewers": "qk3S;DUtv;NpBG;Tsvf", "site": "https://openreview.net/forum?id=buSEDdP5YX", "pdf_size": 648209, "rating": "3;5;5;6", "confidence": "4;5;3;2", "soundness": "2;4;3;3", "novelty": "2;2;3;3", "presentation": "3;4;3;2", "wc_summary": "216;50;41;69", "wc_strengths": "9;47;97;56", "wc_weaknesses": "18;150;79;36", "wc_questions": "131;386;13;41", "wc_limitations": "1;124;10;8", "wc_review": "375;757;240;210", "wc_reply_reviewers": "156;485;0;85", "wc_reply_authors": "597;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "2;1;1;1", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 94.0, 71.1582742904857 ], "wc_strengths_avg": [ 52.25, 31.283981524096323 ], "wc_weaknesses_avg": [ 70.75, 50.839821990246975 ], "wc_questions_avg": [ 142.75, 147.05164908969908 ], "wc_limitations_avg": [ 35.75, 51.06062573059598 ], "wc_review_avg": [ 395.5, 217.76879941809847 ], "wc_reply_reviewers_avg": [ 181.5, 183.72329737951037 ], "wc_reply_authors_avg": [ 149.25, 258.5085830296549 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5129891760425771, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15099536644861689228&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "IT University of Copenhagen;University of Waterloo;Google", "aff_unique_dep": ";;Google", "aff_unique_url": "https://itu.dk;https://uwaterloo.ca;https://www.google.com", "aff_unique_abbr": "ITU;UW;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;2", "aff_country_unique": "Denmark;Canada;United States" }, { "title": "Self-Labeling the Job Shop Scheduling Problem", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94456", "id": "buqvMT3B4k", "proceeding": "", "pdf": "https://openreview.net/pdf?id=buqvMT3B4k", "openreview": "https://openreview.net/forum?id=buqvMT3B4k", "poster": "/media/PosterPDFs/NeurIPS%202024/94456.png?t=1730372027.9468997", "project": "", "author_site": "Andrea Corsini, Angelo Porrello, SIMONE CALDERARA, Mauro Dell'Amico", "tldr": "", "abstract": "This work proposes a self-supervised training strategy designed for combinatorial problems. An obstacle in applying supervised paradigms to such problems is the need for costly target solutions often produced with exact solvers. Inspired by semi- and self-supervised learning, we show that generative models can be trained by sampling multiple solutions and using the best one according to the problem objective as a pseudo-label. In this way, we iteratively improve the model generation capability by relying only on its self-supervision, eliminating the need for optimality information. We validate this Self-Labeling Improvement Method (SLIM) on the Job Shop Scheduling (JSP), a complex combinatorial problem that is receiving much attention from the neural combinatorial community. We propose a generative model based on the well-known Pointer Network and train it with SLIM. Experiments on popular benchmarks demonstrate the potential of this approach as the resulting models outperform constructive heuristics and state-of-the-art learning proposals for the JSP. Lastly, we prove the robustness of SLIM to various parameters and its generality by applying it to the Traveling Salesman Problem.", "keywords": "Self-Labeling;Generative Model;Job Shop Scheduling;Traveling Salesman Problem", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "/attachment/ba97f9db70fc4f820c8eac1cfd4e6cf0825bb4b9.zip", "author": "Andrea Corsini;Angelo Porrello;Simone Calderara;Mauro Dell'Amico", "authorids": "~Andrea_Corsini1;~Angelo_Porrello1;~Simone_Calderara1;~Mauro_Dell'Amico1", "gender": "M;M;M;", "homepage": ";;;https://personale.unimore.it/rubrica/dettaglio/dellamic", "dblp": "295/8566;223/4466;13/422;", "google_scholar": ";b3-5Ys4AAAAJ;https://scholar.google.it/citations?user=CZd-WXkAAAAJ;", "orcid": "0000-0002-2747-4244;0000-0002-9022-8484;0000-0001-9056-1538;", "linkedin": ";;;", "or_profile": "~Andrea_Corsini1;~Angelo_Porrello1;~Simone_Calderara1;~Mauro_Dell'Amico1", "aff": "University of Modena and Reggio Emilia;University of Modena and Reggio Emilia, AimageLab;University of Modena and Reggio Emilia;Universit\u00e0 di Modena e Reggio Emilia", "aff_domain": "unimore.it;unimore.it;unimore.it;unimore.it", "position": "Postdoc;Postdoc;Full Professor;Full Professor", "bibtex": "@inproceedings{\ncorsini2024selflabeling,\ntitle={Self-Labeling the Job Shop Scheduling Problem},\nauthor={Andrea Corsini and Angelo Porrello and Simone Calderara and Mauro Dell'Amico},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=buqvMT3B4k}\n}", "github": "", "reviewers": "NWh3;mZNb;pxef;2iGC", "pdf_size": 2851596, "rating": "5;6;6;7", "confidence": "4;3;4;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;4;3;4", "wc_summary": "31;298;113;68", "wc_strengths": "70;152;97;97", "wc_weaknesses": "309;82;178;137", "wc_questions": "92;142;140;72", "wc_limitations": "9;1;5;9", "wc_review": "511;675;533;383", "wc_reply_reviewers": "83;71;77;37", "wc_reply_authors": "161;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 127.5, 102.63162280700817 ], "wc_strengths_avg": [ 104.0, 29.824486584013478 ], "wc_weaknesses_avg": [ 176.5, 83.73917840533187 ], "wc_questions_avg": [ 111.5, 30.343862641397518 ], "wc_limitations_avg": [ 6.0, 3.3166247903554 ], "wc_review_avg": [ 525.5, 103.58933342772315 ], "wc_reply_reviewers_avg": [ 67.0, 17.832554500127006 ], "wc_reply_authors_avg": [ 40.25, 69.71504500464731 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4026020073920032576&as_sdt=5,30&sciodt=0,30&hl=en", "gs_version_total": 7, "email": "unimore.it;unimore.it;unimore.it;unimore.it", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "University of Modena and Reggio Emilia;Universit\u00e0 di Modena e Reggio Emilia", "aff_unique_dep": ";", "aff_unique_url": "https://www.unimore.it;https://www.unimore.it", "aff_unique_abbr": ";UNIMORE", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Italy" }, { "title": "A Non-parametric Direct Learning Approach to Heterogeneous Treatment Effect Estimation under Unmeasured Confounding", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94455", "id": "bwlUQsQumh", "proceeding": "", "pdf": "https://openreview.net/pdf?id=bwlUQsQumh", "openreview": "https://openreview.net/forum?id=bwlUQsQumh", "poster": "", "project": "", "author_site": "Xinhai Zhang, Xingye Qiao", "tldr": "", "abstract": "In many social, behavioral, and biomedical sciences, treatment effect estimation is a crucial step in understanding the impact of an intervention, policy, or treatment. In recent years, an increasing emphasis has been placed on heterogeneity in treatment effects, leading to the development of various methods for estimating Conditional Average Treatment Effects (CATE). These approaches hinge on a crucial identifying condition of no unmeasured confounding, an assumption that is not always guaranteed in observational studies or randomized control trials with non-compliance. In this paper, we proposed a general framework for estimating CATE with a possible unmeasured confounder using Instrumental Variables. We also construct estimators that exhibit greater efficiency and robustness against various scenarios of model misspecification. The efficacy of the proposed framework is demonstrated through simulation studies and a real data example.", "keywords": "direct learning;heterogeneous treatment effect;instrumental variable;multiply robustness;unmeasured confounding", "primary_area": "causal_inference", "supplementary_material": "/attachment/8650f0f5e090d987637ab4f19e41557401975dbb.zip", "author": "Xinhai Zhang;Xingye Qiao", "authorids": "~Xinhai_Zhang1;~Xingye_Qiao1", "gender": "M;", "homepage": "https://www2.math.binghamton.edu/p/people/grads/zhangxi/start;http://people.math.binghamton.edu/qiao/", "dblp": ";21/10859", "google_scholar": ";O8NqeoQAAAAJ", "orcid": ";0000-0003-0937-9822", "linkedin": ";", "or_profile": "~Xinhai_Zhang1;~Xingye_Qiao1", "aff": "State University of New York at Binghamton;State University of New York at Binghamton", "aff_domain": "binghamton.edu;binghamton.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nzhang2024a,\ntitle={A Non-parametric Direct Learning Approach to Heterogeneous Treatment Effect Estimation under Unmeasured Confounding},\nauthor={Xinhai Zhang and Xingye Qiao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=bwlUQsQumh}\n}", "github": "", "reviewers": "iUwm;M1zT;M5di;qaqj", "pdf_size": 570452, "rating": "4;5;6;7", "confidence": "5;2;4;4", "soundness": "2;2;3;3", "novelty": "2;3;2;3", "presentation": "2;3;2;3", "wc_summary": "50;66;20;106", "wc_strengths": "15;35;25;97", "wc_weaknesses": "4;110;485;133", "wc_questions": "23;49;47;96", "wc_limitations": "4;8;14;7", "wc_review": "96;268;591;439", "wc_reply_reviewers": "0;44;178;0", "wc_reply_authors": "0;278;281;0", "reply_reviewers": "0;1;2;0", "reply_authors": "1;2;3;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 60.5, 31.028212968200407 ], "wc_strengths_avg": [ 43.0, 31.96873472629156 ], "wc_weaknesses_avg": [ 183.0, 181.02071704641986 ], "wc_questions_avg": [ 53.75, 26.45160675649024 ], "wc_limitations_avg": [ 8.25, 3.6314597615834874 ], "wc_review_avg": [ 348.5, 185.22486334183108 ], "wc_reply_reviewers_avg": [ 55.5, 72.97088460475177 ], "wc_reply_authors_avg": [ 139.75, 139.75402498676021 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.1025978352085154, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:LSnL9JehTwgJ:scholar.google.com/&scioq=A+Non-parametric+Direct+Learning+Approach+to+Heterogeneous+Treatment+Effect+Estimation+under+Unmeasured+Confounding&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "binghamton.edu;binghamton.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "State University of New York at Binghamton", "aff_unique_dep": "", "aff_unique_url": "https://www.binghamton.edu", "aff_unique_abbr": "SUNY Binghamton", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Binghamton", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Long-Range Feedback Spiking Network Captures Dynamic and Static Representations of the Visual Cortex under Movie Stimuli", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94454", "id": "bxDok3uaK6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=bxDok3uaK6", "openreview": "https://openreview.net/forum?id=bxDok3uaK6", "poster": "/media/PosterPDFs/NeurIPS%202024/94454.png?t=1731744562.342229", "project": "", "author_site": "Liwei Huang, Zhengyu Ma, Liutao Yu, Huihui Zhou, Yonghong Tian", "tldr": "", "abstract": "Deep neural networks (DNNs) are widely used models for investigating biological visual representations. However, existing DNNs are mostly designed to analyze neural responses to static images, relying on feedforward structures and lacking physiological neuronal mechanisms. There is limited insight into how the visual cortex represents natural movie stimuli that contain context-rich information. To address these problems, this work proposes the long-range feedback spiking network (LoRaFB-SNet), which mimics top-down connections between cortical regions and incorporates spike information processing mechanisms inherent to biological neurons. Taking into account the temporal dependence of representations under movie stimuli, we present Time-Series Representational Similarity Analysis (TSRSA) to measure the similarity between model representations and visual cortical representations of mice. LoRaFB-SNet exhibits the highest level of representational similarity, outperforming other well-known and leading alternatives across various experimental paradigms, especially when representing long movie stimuli. We further conduct experiments to quantify how temporal structures (dynamic information) and static textures (static information) of the movie stimuli influence representational similarity, suggesting that our model benefits from long-range feedback to encode context-dependent representations just like the brain. Altogether, LoRaFB-SNet is highly competent in capturing both dynamic and static representations of the mouse visual cortex and contributes to the understanding of movie processing mechanisms of the visual system. Our codes are available at https://github.com/Grasshlw/SNN-Neural-Similarity-Movie.", "keywords": "Brain-Inspired Computational Model;Spiking Neural Network;Mouse Visual Cortex", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "", "author": "Liwei Huang;Zhengyu Ma;Liutao Yu;Huihui Zhou;Yonghong Tian", "authorids": "~Liwei_Huang1;~Zhengyu_Ma1;~Liutao_Yu1;~Huihui_Zhou1;~Yonghong_Tian1", "gender": "M;;;M;M", "homepage": "https://grasshlw.github.io/;;;https://www.researchgate.net/profile/Huihui_Zhou4/research;http://www.pkuml.org", "dblp": "27/10780;;;;86/5857", "google_scholar": ";;;c2mrU24AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;;0000-0002-2978-5935", "linkedin": ";;;;", "or_profile": "~Liwei_Huang1;~Zhengyu_Ma1;~Liutao_Yu1;~Huihui_Zhou1;~Yonghong_Tian1", "aff": "Peking University;;;Pengcheng Lab;Peking University", "aff_domain": "pku.edu.cn;;;pcl.ac.cn;pku.edu.cn", "position": "PhD student;;;Full Professor;Full Professor", "bibtex": "@inproceedings{\nhuang2024longrange,\ntitle={Long-Range Feedback Spiking Network Captures Dynamic and Static Representations of the Visual Cortex under Movie Stimuli},\nauthor={Liwei Huang and Zhengyu Ma and Liutao Yu and Huihui Zhou and Yonghong Tian},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=bxDok3uaK6}\n}", "github": "", "reviewers": "dwyD;YZqi;rzHc", "pdf_size": 6609750, "rating": "5;6;7", "confidence": "3;5;3", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "2;4;3", "wc_summary": "68;213;94", "wc_strengths": "68;137;58", "wc_weaknesses": "218;400;37", "wc_questions": "42;218;192", "wc_limitations": "7;156;31", "wc_review": "403;1124;412", "wc_reply_reviewers": "126;736;357", "wc_reply_authors": "469;1074;506", "reply_reviewers": "2;2;2", "reply_authors": "3;5;3", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 125.0, 63.12421616675067 ], "wc_strengths_avg": [ 87.66666666666667, 35.122009560324926 ], "wc_weaknesses_avg": [ 218.33333333333334, 148.19431688008672 ], "wc_questions_avg": [ 150.66666666666666, 77.56860762504951 ], "wc_limitations_avg": [ 64.66666666666667, 65.32142748661337 ], "wc_review_avg": [ 646.3333333333334, 337.78132308080956 ], "wc_reply_reviewers_avg": [ 406.3333333333333, 251.46283135993588 ], "wc_reply_authors_avg": [ 683.0, 276.8910736493083 ], "reply_reviewers_avg": [ 2.0, 0.0 ], "reply_authors_avg": [ 3.6666666666666665, 0.9428090415820634 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:MDfF8ZsB6dwJ:scholar.google.com/&scioq=Long-Range+Feedback+Spiking+Network+Captures+Dynamic+and+Static+Representations+of+the+Visual+Cortex+under+Movie+Stimuli&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "pku.edu.cn;;;pcl.ac.cn;pku.edu.cn", "author_num": 5, "aff_unique_index": "0;1;0", "aff_unique_norm": "Peking University;Pengcheng Lab", "aff_unique_dep": ";", "aff_unique_url": "http://www.pku.edu.cn;", "aff_unique_abbr": "Peking U;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Soft Superpixel Neighborhood Attention", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94453", "id": "bxH6T1w1FW", "proceeding": "", "pdf": "https://openreview.net/pdf?id=bxH6T1w1FW", "openreview": "https://openreview.net/forum?id=bxH6T1w1FW", "poster": "/media/PosterPDFs/NeurIPS%202024/94453.png?t=1733581742.2783537", "project": "", "author_site": "Kent W Gauen, Stanley Chan", "tldr": "", "abstract": "Images contain objects with deformable boundaries, such as the contours of a human face, yet attention operators act on square windows. This mixes features from perceptually unrelated regions, which can degrade the quality of a denoiser. One can exclude pixels using an estimate of perceptual groupings, such as superpixels, but the naive use of superpixels can be theoretically and empirically worse than standard attention. Using superpixel probabilities rather than superpixel assignments, this paper proposes soft superpixel neighborhood attention (SNA), which interpolates between the existing neighborhood attention and the naive superpixel neighborhood attention. This paper presents theoretical results showing SNA is the optimal denoiser under a latent superpixel model. SNA outperforms alternative local attention modules on image denoising, and we compare the superpixels learned from denoising with those learned with supervision.", "keywords": "attention module;superpixels;image denoising;deep learning", "primary_area": "machine_vision", "supplementary_material": "/attachment/76662ddeb7fea9ed835279187e406e724e0254e4.zip", "author": "Kent Gauen;Stanley H. Chan", "authorids": "~Kent_Gauen1;~Stanley_H._Chan2", "gender": "M;", "homepage": "https://gauenk.github.io/;", "dblp": "195/4143;", "google_scholar": "CRbbyHMAAAAJ;", "orcid": ";", "linkedin": "gauenk/;", "or_profile": "~Kent_Gauen1;~Stanley_H._Chan2", "aff": "Purdue University;", "aff_domain": "purdue.edu;", "position": "PhD student;", "bibtex": "@inproceedings{\ngauen2024soft,\ntitle={Soft Superpixel Neighborhood Attention},\nauthor={Kent Gauen and Stanley H. Chan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=bxH6T1w1FW}\n}", "github": "", "reviewers": "dwem;nsbe;4VnB;XzEJ", "pdf_size": 4716069, "rating": "4;7;7;7", "confidence": "3;4;4;5", "soundness": "2;4;3;4", "novelty": "2;4;3;4", "presentation": "2;4;3;3", "wc_summary": "40;44;34;110", "wc_strengths": "17;54;37;89", "wc_weaknesses": "87;39;37;362", "wc_questions": "2;14;40;308", "wc_limitations": "3;4;13;126", "wc_review": "149;155;161;995", "wc_reply_reviewers": "0;10;0;27", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 57.0, 30.805843601498726 ], "wc_strengths_avg": [ 49.25, 26.423237878806603 ], "wc_weaknesses_avg": [ 131.25, 134.71892034899923 ], "wc_questions_avg": [ 91.0, 126.03570922560003 ], "wc_limitations_avg": [ 36.5, 51.81939791236483 ], "wc_review_avg": [ 365.0, 363.7554123308683 ], "wc_reply_reviewers_avg": [ 9.25, 11.031205736455105 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:VrmArgIg27UJ:scholar.google.com/&scioq=Soft+Superpixel+Neighborhood+Attention&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "purdue.edu;", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Purdue University", "aff_unique_dep": "", "aff_unique_url": "https://www.purdue.edu", "aff_unique_abbr": "Purdue", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "bxwWikAXSy", "title": "MathWriting: A Dataset For Handwritten Mathematical Expression Recognition", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "Recognition of handwritten mathematical expressions allows to transfer scientific notes into their digital form. It facilitates the sharing, searching, and preservation of scientific information. We introduce MathWriting, the largest online handwritten mathematical expression dataset to date. It consists of 230k human-written samples and an additional 400k synthetic ones. This dataset can also be used in its rendered form for offline HME recognition. One MathWriting sample consists of a formula written on a touch screen and a corresponding LaTeX expression. We also provide a normalized version of LaTeX expression to simplify the recognition task and enhance the result quality. We provide baseline performance of standard models like OCR and CTC Transformer as well as Vision-Language Models like PaLI on the dataset. The dataset together with an example colab is accessible on Github.", "keywords": "mathematical expression recognition;online handwriting recognition;digital ink", "primary_area": "", "supplementary_material": "", "author": "Philippe Gervais;Anastasiia Fadeeva;Andrii Maksai", "authorids": "~Philippe_Gervais1;~Anastasiia_Fadeeva1;~Andrii_Maksai2", "gender": ";F;M", "homepage": ";;", "dblp": ";;160/5950", "google_scholar": ";;QZd-fvAAAAAJ", "orcid": "0009-0000-8845-1751;;", "linkedin": "philippe-n-gervais;anastasia-fadeeva-707616135/;", "or_profile": "~Philippe_Gervais1;~Anastasiia_Fadeeva1;~Andrii_Maksai2", "aff": "Google;Research, Google;Google", "aff_domain": "google.com;research.google.com;google.com", "position": "Researcher;Researcher;Researcher", "bibtex": "@misc{\nanonymous2024mathwriting,\ntitle={MathWriting: A Dataset For Handwritten Mathematical Expression Recognition},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=bxwWikAXSy}\n}", "github": "", "project": "", "reviewers": "iTBu;srzw;rg6N;2jZo", "site": "https://openreview.net/forum?id=bxwWikAXSy", "pdf_size": 2959996, "rating": "4;6;6;7", "confidence": "5;4;4;5", "wc_summary_and_contributions": "58;49;53;84", "wc_strengths": "30;83;2;26", "wc_improvement": "124;202;29;109", "wc_limitations": "1;10;2;10", "wc_correctness": "54;21;1;42", "wc_clarity": "5;9;1;26", "wc_relation_to_prior_work": "102;36;1;6", "wc_documentation": "17;35;1;4", "wc_additional_feedback": "1;1;1;1", "wc_review": "392;446;91;308", "wc_reply_reviewers": "0;19;0;18", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;0;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 4.5, 0.5 ], "wc_summary_and_contributions_avg": [ 61.0, 13.656500283747663 ], "wc_strengths_avg": [ 35.25, 29.57511622969553 ], "wc_improvement_avg": [ 116.0, 61.39625395738734 ], "wc_limitations_avg": [ 5.75, 4.264680527307995 ], "wc_correctness_avg": [ 29.5, 20.254629100529094 ], "wc_clarity_avg": [ 10.25, 9.522998477370455 ], "wc_relation_to_prior_work_avg": [ 36.25, 40.25155276507976 ], "wc_documentation_avg": [ 14.25, 13.40475661845451 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 309.25, 135.26155218686498 ], "wc_reply_reviewers_avg": [ 9.25, 9.256754290786809 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.2294157338705618, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3827773884142496647&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Revisiting motion information for RGB-Event tracking with MOT philosophy", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94452", "id": "bzGAELYOyL", "proceeding": "", "pdf": "https://openreview.net/pdf?id=bzGAELYOyL", "openreview": "https://openreview.net/forum?id=bzGAELYOyL", "poster": "/media/PosterPDFs/NeurIPS%202024/94452.png?t=1731912203.7669857", "project": "", "author_site": "Tianlu Zhang, Kurt Debattista, Qiang Zhang, guiguang ding, Jungong Han", "tldr": "", "abstract": "RGB-Event single object tracking (SOT) aims to leverage the merits of RGB and event data to achieve higher performance. However, existing frameworks focus on exploring complementary appearance information within multi-modal data, and struggle to address the association problem of targets and distractors in the temporal domain using motion information from the event stream. In this paper, we introduce the Multi-Object Tracking (MOT) philosophy into RGB-E SOT to keep track of targets as well as distractors by using both RGB and event data, thereby improving the robustness of the tracker. Specifically, an appearance model is employed to predict the initial candidates. Subsequently, the initially predicted tracking results, in combination with the RGB-E features, are encoded into appearance and motion embeddings, respectively. Furthermore, a Spatial-Temporal Transformer Encoder is proposed to model the spatial-temporal relationships and learn discriminative features for each candidate through guidance of the appearance-motion embeddings. Simultaneously, a Dual-Branch Transformer Decoder is designed to adopt such motion and appearance information for candidate matching, thus distinguishing between targets and distractors. The proposed method is evaluated on multiple benchmark datasets and achieves state-of-the-art performance on all the datasets tested.", "keywords": "RGB-Event tracking; MOT philosophy; Candidate matching", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Tianlu Zhang;Kurt Debattista;Qiang Zhang;Guiguang Ding;Jungong Han", "authorids": "~Tianlu_Zhang1;~Kurt_Debattista1;~Qiang_Zhang7;~Guiguang_Ding1;~Jungong_Han1", "gender": "M;M;M;M;M", "homepage": "https://tianlu-zhang.github.io/index.html;https://warwick.ac.uk/fac/sci/wmg/people/profile/?wmgid=518;https://faculty.xidian.edu.cn/ZQ2/zh_CN/index/336357/list/index.htm;http://ise.thss.tsinghua.edu.cn/MIG/dgg.html;https://jungonghan.github.io/", "dblp": "255/8662.html;29/5944.html;72/3527-20;51/740;98/6127", "google_scholar": "Q0AxuQkAAAAJ;https://scholar.google.co.uk/citations?user=8-E4ButRvbwC;;https://scholar.google.com.tw/citations?user=B7F3yt4AAAAJ;hNi1gxAAAAAJ", "orcid": "0000-0003-4592-5448;0000-0003-2982-5199;0000-0002-2828-9905;0000-0003-0137-9975;0000-0003-4361-956X", "linkedin": ";;;;", "or_profile": "~Tianlu_Zhang1;~Kurt_Debattista1;~Qiang_Zhang7;~Guiguang_Ding1;~Jungong_Han1", "aff": "Xi'an University of Electronic Science and Technology;University of Warwick;Xidian University;Tsinghua University;University of Sheffield", "aff_domain": "xidian.edu.cn;warwick.ac.uk;xidian.edu.cn;tsinghua.edu.cn;sheffield.ac.uk", "position": "PhD student;Full Professor;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nzhang2024revisiting,\ntitle={Revisiting motion information for {RGB}-Event tracking with {MOT} philosophy},\nauthor={Tianlu Zhang and Kurt Debattista and Qiang Zhang and Guiguang Ding and Jungong Han},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=bzGAELYOyL}\n}", "github": "", "reviewers": "SevM;KN3J;fGZm;fErD", "pdf_size": 13734659, "rating": "5;5;5;6", "confidence": "5;3;5;4", "soundness": "3;3;2;4", "novelty": "3;2;2;3", "presentation": "3;3;1;3", "wc_summary": "77;41;63;38", "wc_strengths": "34;24;10;48", "wc_weaknesses": "190;88;217;44", "wc_questions": "8;5;16;222", "wc_limitations": "1;1;6;5", "wc_review": "310;159;312;357", "wc_reply_reviewers": "0;83;12;15", "wc_reply_authors": "0;12;12;18", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 54.75, 16.068213964221414 ], "wc_strengths_avg": [ 29.0, 13.892443989449804 ], "wc_weaknesses_avg": [ 134.75, 71.13148037261702 ], "wc_questions_avg": [ 62.75, 92.03090513517728 ], "wc_limitations_avg": [ 3.25, 2.277608394786075 ], "wc_review_avg": [ 284.5, 74.85485956168777 ], "wc_reply_reviewers_avg": [ 27.5, 32.53075467922624 ], "wc_reply_authors_avg": [ 10.5, 6.5383484153110105 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=319997934106464029&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "xidian.edu.cn;warwick.ac.uk;xidian.edu.cn;tsinghua.edu.cn;sheffield.ac.uk", "author_num": 5, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "Xi'an University of Electronic Science and Technology;University of Warwick;Xidian University;Tsinghua University;University of Sheffield", "aff_unique_dep": ";;;;", "aff_unique_url": "http://www.xidian.edu.cn/;https://www.warwick.ac.uk;http://www.xidian.edu.cn/;https://www.tsinghua.edu.cn;https://www.sheffield.ac.uk", "aff_unique_abbr": "Xidian University;Warwick;Xidian;THU;Sheffield", "aff_campus_unique_index": "0", "aff_campus_unique": "Xi'an;", "aff_country_unique_index": "0;1;0;0;1", "aff_country_unique": "China;United Kingdom" }, { "title": "Quantifying and Optimizing Global Faithfulness in Persona-driven Role-playing", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94451", "id": "bzPmjmiaz8", "proceeding": "", "pdf": "https://openreview.net/pdf?id=bzPmjmiaz8", "openreview": "https://openreview.net/forum?id=bzPmjmiaz8", "poster": "", "project": "", "author_site": "Letian Peng, Jingbo Shang", "tldr": "", "abstract": "Persona-driven role-playing (PRP) aims to build AI characters that can respond to user queries by faithfully sticking with \\emph{all} (factual) statements in persona documents.\nUnfortunately, existing faithfulness criteria for PRP are limited to coarse-grained LLM-based scoring without a clear definition or formulation.\nThis paper presents a pioneering exploration to quantify PRP faithfulness evaluation as a fine-grained and explainable criterion, which also serves as a reliable reference for faithfulness optimization.\nOur criterion first discriminates persona statements into \\emph{active} and \\emph{passive} constraints by identifying the query-statement relevance.\nThen, we incorporate all constraints following the principle that the AI character's response should be (a) entailed by active constraints and (b) not contradicted by passive constraints.\nWe translate this principle mathematically into a novel Active-Passive-Constraint (APC) score, a constraint-wise sum of statement-to-response natural language inference (NLI) scores weighted by constraint-query relevance scores. \nIn practice, we build the APC scoring system by symbolically distilling small NLI and relevance discriminators (300M parameters) from GPT-4 for efficiency, and both show high consistency with GPT-4's discrimination.\nWe validate the quality of the APC score against human evaluation based on example personas with tens of statements, and the results show a high correlation.\nAs the APC score could faithfully reflect the PRP quality, we further leverage it as a reward system in direct preference optimization (DPO) for better AI characters. \nOur experiments offer a fine-grained and explainable comparison between existing PRP techniques, revealing their advantages and limitations.\nWe further find APC-based DPO to be one of the most competitive techniques for sticking with all constraints and can be well incorporated with other techniques.\nWe then extend the scale of the experiments to real persons with hundreds of statements and reach a consistent conclusion. \nFinally, we provide comprehensive analyses and case studies to support the effectiveness of APC and APC-based DPO.", "keywords": "Persona-driven Role-playing;Global Faithfulness Control;Evaluation Metric;Direct Preference Optimization", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Letian Peng;Jingbo Shang", "authorids": "~Letian_Peng1;~Jingbo_Shang2", "gender": "M;M", "homepage": "https://komeijiforce.github.io/;https://shangjingbo1226.github.io/", "dblp": "303/0630;151/3145.html", "google_scholar": "vht13WkAAAAJ;0SkFI4MAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Letian_Peng1;~Jingbo_Shang2", "aff": "University of California, San Diego;University of California, San Diego", "aff_domain": "ucsd.edu;ucsd.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\npeng2024quantifying,\ntitle={Quantifying and Optimizing Global Faithfulness in Persona-driven Role-playing},\nauthor={Letian Peng and Jingbo Shang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=bzPmjmiaz8}\n}", "github": "", "reviewers": "Pxme;5oeH;iNA2;Ehwr", "pdf_size": 702287, "rating": "6;6;7;7", "confidence": "4;3;3;4", "soundness": "3;3;3;3", "novelty": "3;4;3;3", "presentation": "3;3;2;3", "wc_summary": "91;92;66;100", "wc_strengths": "60;104;90;69", "wc_weaknesses": "150;138;163;65", "wc_questions": "101;130;1;56", "wc_limitations": "8;3;1;17", "wc_review": "410;467;321;307", "wc_reply_reviewers": "33;56;28;0", "wc_reply_authors": "86;281;0;0", "reply_reviewers": "2;1;1;0", "reply_authors": "2;2;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 87.25, 12.754901018824098 ], "wc_strengths_avg": [ 80.75, 17.282577932704367 ], "wc_weaknesses_avg": [ 129.0, 37.993420483025744 ], "wc_questions_avg": [ 72.0, 48.73910134583936 ], "wc_limitations_avg": [ 7.25, 6.179603547154137 ], "wc_review_avg": [ 376.25, 65.61773769340117 ], "wc_reply_reviewers_avg": [ 29.25, 19.917015338649513 ], "wc_reply_authors_avg": [ 91.75, 114.76579412002515 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12259360469679040655&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "ucsd.edu;ucsd.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of California, San Diego", "aff_unique_dep": "", "aff_unique_url": "https://www.ucsd.edu", "aff_unique_abbr": "UCSD", "aff_campus_unique_index": "0;0", "aff_campus_unique": "San Diego", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Splatter a Video: Video Gaussian Representation for Versatile Processing", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94450", "id": "bzuQtVDxv0", "proceeding": "", "pdf": "https://openreview.net/pdf?id=bzuQtVDxv0", "openreview": "https://openreview.net/forum?id=bzuQtVDxv0", "poster": "/media/PosterPDFs/NeurIPS%202024/94450.png?t=1731489899.8850107", "project": "", "author_site": "Sun, Yihua Huang, Lin Ma, Xiaoyang Lyu, Yan-Pei Cao, Xiaojuan Qi", "tldr": "", "abstract": "Video representation is a long-standing problem that is crucial for various downstream tasks, such as tracking, depth prediction, segmentation, view synthesis, and editing. However, current methods either struggle to model complex motions due to the absence of 3D structure or rely on implicit 3D representations that are ill-suited for manipulation tasks. To address these challenges, we introduce a novel explicit 3D representation\u2014video Gaussian representation\u2014that embeds a video into 3D Gaussians. \nOur proposed representation models video appearance in a 3D canonical space using explicit Gaussians as proxies and associates each Gaussian with 3D motions for video motion. This approach offers a more intrinsic and explicit representation than layered atlas or volumetric pixel matrices. To obtain such a representation, we distill 2D priors, such as optical flow and depth, from foundation models to regularize learning in this ill-posed setting.\nExtensive applications demonstrate the versatility of our new video representation. It has been proven effective in numerous video processing tasks, including tracking, consistent video depth and feature refinement, motion and appearance editing, and stereoscopic video generation.", "keywords": "Video Representation; Video Processing", "primary_area": "machine_vision", "supplementary_material": "/attachment/6678bef74e5a63997b4ddcdfc3911762cf3d7353.zip", "author": "Yang-Tian Sun;Yi-Hua Huang;Lin Ma;Xiaoyang Lyu;Yan-Pei Cao;XIAOJUAN QI", "authorids": "~Yang-Tian_Sun1;~Yi-Hua_Huang1;~Lin_Ma9;~Xiaoyang_Lyu1;~Yan-Pei_Cao1;~XIAOJUAN_QI2", "gender": "M;M;M;M;F;M", "homepage": "http://sunyangtian.github.io;https://yihua7.github.io/website/;http://marlinilram.github.io/;https://yanpei.me/;https://xjqi.github.io/;https://shawlyu.github.io", "dblp": "261/9614.html;50/4147;;141/6343;176/1445-1.html;281/7169", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?view_op=list_works;S4HGIIUAAAAJ;50194vkAAAAJ;bGn0uacAAAAJ;SF7bq48AAAAJ", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Yang-Tian_Sun1;~Yi-Hua_Huang1;~Lin_Ma9;~Yan-Pei_Cao1;~XIAOJUAN_QI2;~Shaw_Lyu1", "aff": "University of Hong Kong;University of Hong Kong;;VAST;University of Hong Kong;University of Hong Kong", "aff_domain": "hku.hk;hku.hk;;vast3dai.com;hku.hk;hku.hk", "position": "PhD student;PhD student;;Principal Researcher;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nsun2024splatter,\ntitle={Splatter a Video: Video Gaussian Representation for Versatile Processing},\nauthor={Yang-Tian Sun and Yi-Hua Huang and Lin Ma and Xiaoyang Lyu and Yan-Pei Cao and XIAOJUAN QI},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=bzuQtVDxv0}\n}", "github": "", "reviewers": "rMXd;axg5;8X9e;rPzr", "pdf_size": 1637913, "rating": "2;6;6;7", "confidence": "5;4;4;4", "soundness": "2;3;4;3", "novelty": "1;1;3;3", "presentation": "2;3;3;3", "wc_summary": "70;89;110;66", "wc_strengths": "34;70;61;90", "wc_weaknesses": "426;147;156;249", "wc_questions": "40;34;18;88", "wc_limitations": "65;24;34;24", "wc_review": "635;364;379;517", "wc_reply_reviewers": "1212;199;61;771", "wc_reply_authors": "2460;1560;8;1470", "reply_reviewers": "2;1;1;4", "reply_authors": "6;4;2;6", "rating_avg": [ 5.25, 1.920286436967152 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.0, 1.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 83.75, 17.469616481193857 ], "wc_strengths_avg": [ 63.75, 20.129269733400662 ], "wc_weaknesses_avg": [ 244.5, 112.13942214939401 ], "wc_questions_avg": [ 45.0, 26.095976701399778 ], "wc_limitations_avg": [ 36.75, 16.813313177360374 ], "wc_review_avg": [ 473.75, 110.56078644799882 ], "wc_reply_reviewers_avg": [ 560.75, 460.69098916735936 ], "wc_reply_authors_avg": [ 1374.5, 878.800745334231 ], "reply_reviewers_avg": [ 2.0, 1.224744871391589 ], "reply_authors_avg": [ 4.5, 1.6583123951777 ], "replies_avg": [ 32, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.9771398364036774, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12967240484937818184&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "hku.hk;hku.hk;;vast3dai.com;hku.hk;hku.hk", "author_num": 6, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "University of Hong Kong;VAST", "aff_unique_dep": ";", "aff_unique_url": "https://www.hku.hk;", "aff_unique_abbr": "HKU;", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China;" }, { "title": "Estimating Heterogeneous Treatment Effects by Combining Weak Instruments and Observational Data", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94449", "id": "c37x7CXZ2Y", "proceeding": "", "pdf": "https://openreview.net/pdf?id=c37x7CXZ2Y", "openreview": "https://openreview.net/forum?id=c37x7CXZ2Y", "poster": "/media/PosterPDFs/NeurIPS%202024/94449.png?t=1731685094.4806828", "project": "", "author_site": "Miruna Oprescu, Nathan Kallus", "tldr": "", "abstract": "Accurately predicting conditional average treatment effects (CATEs) is crucial in personalized medicine and digital platform analytics. \nSince the treatments of interest often cannot be directly randomized, observational data is leveraged to learn CATEs, but this approach can incur significant bias from unobserved confounding. One strategy to overcome these limitations is to leverage instrumental variables (IVs) as latent quasi-experiments, such as randomized intent-to-treat assignments or randomized product recommendations. This approach, on the other hand, can suffer from low compliance, i.e., IV weakness. Some subgroups may even exhibit zero compliance, meaning we cannot instrument for their CATEs at all. In this paper, we develop a novel approach to combine IV and observational data to enable reliable CATE estimation in the presence of unobserved confounding in the observational data and low compliance in the IV data, including no compliance for some subgroups. We propose a two-stage framework that first learns \\textit{biased} CATEs from the observational data, and then applies a compliance-weighted correction using IV data, effectively leveraging IV strength variability across covariates. We characterize the convergence rates of our method and validate its effectiveness through a simulation study. Additionally, we demonstrate its utility with real data by analyzing the heterogeneous effects of 401(k) plan participation on wealth.", "keywords": "Causal inference;heterogeneous treatment effects;weak instrumental variables;unobserved confounding;data combination", "primary_area": "causal_inference", "supplementary_material": "", "author": "Miruna Oprescu;Nathan Kallus", "authorids": "~Miruna_Oprescu1;~Nathan_Kallus1", "gender": "F;", "homepage": "https://mirunaoprescu.com;http://nathankallus.com/", "dblp": "218/5317;142/2900", "google_scholar": "KLSUWBAAAAAJ;K2WfIlsAAAAJ", "orcid": ";0000-0003-1672-0507", "linkedin": ";", "or_profile": "~Miruna_Oprescu1;~Nathan_Kallus1", "aff": "Brookhaven National Laboratory;Cornell University", "aff_domain": "bnl.gov;cornell.edu", "position": "Intern;Associate Professor", "bibtex": "@inproceedings{\noprescu2024estimating,\ntitle={Estimating Heterogeneous Treatment Effects by Combining Weak Instruments and Observational Data},\nauthor={Miruna Oprescu and Nathan Kallus},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=c37x7CXZ2Y}\n}", "github": "", "reviewers": "UxMs;Gwhh;xHgN;znEs", "pdf_size": 965554, "rating": "6;6;6;6", "confidence": "3;3;4;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "28;102;178;71", "wc_strengths": "59;69;76;75", "wc_weaknesses": "259;78;496;76", "wc_questions": "65;2;320;23", "wc_limitations": "1;1;98;8", "wc_review": "412;252;1168;253", "wc_reply_reviewers": "15;17;306;10", "wc_reply_authors": "20;19;124;58", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 94.75, 54.77853137863409 ], "wc_strengths_avg": [ 69.75, 6.7592529172978875 ], "wc_weaknesses_avg": [ 227.25, 172.03687831392432 ], "wc_questions_avg": [ 102.5, 127.6058384244232 ], "wc_limitations_avg": [ 27.0, 41.09136162260871 ], "wc_review_avg": [ 521.25, 379.0365252848332 ], "wc_reply_reviewers_avg": [ 87.0, 126.46541029071942 ], "wc_reply_authors_avg": [ 55.25, 42.692944381946766 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4340866390249867849&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "bnl.gov;cornell.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Brookhaven National Laboratory;Cornell University", "aff_unique_dep": ";", "aff_unique_url": "https://www.bnl.gov;https://www.cornell.edu", "aff_unique_abbr": "BNL;Cornell", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "FedGMKD: An Efficient Prototype Federated Learning Framework through Knowledge Distillation and Discrepancy-Aware Aggregation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94448", "id": "c3OZBJpN7M", "proceeding": "", "pdf": "https://openreview.net/pdf?id=c3OZBJpN7M", "openreview": "https://openreview.net/forum?id=c3OZBJpN7M", "poster": "/media/PosterPDFs/NeurIPS%202024/94448.png?t=1731366045.4786115", "project": "", "author_site": "Jianqiao Zhang, Caifeng Shan, Jungong Han", "tldr": "", "abstract": "Federated Learning (FL) faces significant challenges due to data heterogeneity across distributed clients. To address this, we propose FedGMKD, a novel framework that combines knowledge distillation and differential aggregation for efficient prototype-based personalized FL without the need for public datasets or server-side generative models. FedGMKD introduces Cluster Knowledge Fusion, utilizing Gaussian Mixture Models to generate prototype features and soft predictions on the client side, enabling effective knowledge distillation while preserving data privacy. Additionally, we implement a Discrepancy-Aware Aggregation Technique that weights client contributions based on data quality and quantity, enhancing the global model's generalization across diverse client distributions. Theoretical analysis confirms the convergence of FedGMKD. Extensive experiments on benchmark datasets, including SVHN, CIFAR-10, and CIFAR-100, demonstrate that FedGMKD outperforms state-of-the-art methods, significantly improving both local and global accuracy in non-IID data settings.", "keywords": "Federated Learning;Clustering;Aggragation method;Knowledge Distillation;Prototype;Heterogeneous Data Environments", "primary_area": "machine_vision", "supplementary_material": "/attachment/bd9d041abb1c7282fb4d292f0f1b198ee6f85c95.zip", "author": "Jianqiao Zhang;Caifeng Shan;Jungong Han", "authorids": "~Jianqiao_Zhang1;~Caifeng_Shan2;~Jungong_Han1", "gender": "M;M;M", "homepage": "https://research.aber.ac.uk/en/persons/jianqiao-zhang;https://caifeng-shan.github.io/;https://jungonghan.github.io/", "dblp": ";98/428;98/6127", "google_scholar": "snO9RLAAAAAJ;fIXA_SsAAAAJ;hNi1gxAAAAAJ", "orcid": "0009-0002-9932-6205;0000-0002-2131-1671;0000-0003-4361-956X", "linkedin": ";;", "or_profile": "~Jianqiao_Zhang1;~Caifeng_Shan2;~Jungong_Han1", "aff": "University of Wales, Aberystwyth;Shandong University of Science and Technology;University of Sheffield", "aff_domain": "aber.ac.uk;sdust.edu.cn;sheffield.ac.uk", "position": "PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nzhang2024fedgmkd,\ntitle={Fed{GMKD}: An Efficient Prototype Federated Learning Framework through Knowledge Distillation and Discrepancy-Aware Aggregation},\nauthor={Jianqiao Zhang and Caifeng Shan and Jungong Han},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=c3OZBJpN7M}\n}", "github": "", "reviewers": "8XP3;nWER;KnUX;EYJh", "pdf_size": 1556972, "rating": "3;6;7;8", "confidence": "5;4;5;4", "soundness": "2;3;4;3", "novelty": "2;3;3;3", "presentation": "1;3;3;4", "wc_summary": "41;37;86;129", "wc_strengths": "8;70;171;180", "wc_weaknesses": "96;75;83;72", "wc_questions": "53;23;13;49", "wc_limitations": "4;4;8;11", "wc_review": "202;209;361;441", "wc_reply_reviewers": "746;81;0;22", "wc_reply_authors": "3113;31;0;24", "reply_reviewers": "6;1;0;2", "reply_authors": "7;2;1;2", "rating_avg": [ 6.0, 1.8708286933869707 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 73.25, 37.4991666574072 ], "wc_strengths_avg": [ 107.25, 71.75435526851314 ], "wc_weaknesses_avg": [ 81.5, 9.287087810503355 ], "wc_questions_avg": [ 34.5, 16.9336942218761 ], "wc_limitations_avg": [ 6.75, 2.947456530637899 ], "wc_review_avg": [ 303.25, 101.78991845954097 ], "wc_reply_reviewers_avg": [ 212.25, 309.58066396336835 ], "wc_reply_authors_avg": [ 792.0, 1340.0792886989934 ], "reply_reviewers_avg": [ 2.25, 2.277608394786075 ], "reply_authors_avg": [ 3.0, 2.345207879911715 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5345224838248488, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6781701224067578412&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 3, "email": "aber.ac.uk;sdust.edu.cn;sheffield.ac.uk", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Wales;Shandong University of Science and Technology;University of Sheffield", "aff_unique_dep": ";;", "aff_unique_url": "https://www.wales.ac.uk;http://www.sdstu.edu.cn/;https://www.sheffield.ac.uk", "aff_unique_abbr": "UoW;SDUST;Sheffield", "aff_campus_unique_index": "0", "aff_campus_unique": "Aberystwyth;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United Kingdom;China" }, { "title": "$\\textit{Trans-LoRA}$: towards data-free Transferable Parameter Efficient Finetuning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94447", "id": "c3Pakdyi3t", "proceeding": "", "pdf": "https://openreview.net/pdf?id=c3Pakdyi3t", "openreview": "https://openreview.net/forum?id=c3Pakdyi3t", "poster": "/media/PosterPDFs/NeurIPS%202024/94447.png?t=1732054326.5089219", "project": "", "author_site": "Runqian Wang, Soumya Ghosh, David Cox, Diego Antognini, Aude Oliva, Rogerio Feris, Leonid Karlinsky", "tldr": "", "abstract": "Low-rank adapters (LoRA) and their variants are popular parameter-efficient fine-tuning (PEFT) techniques that closely match full model fine-tune performance while requiring only a small number of additional parameters. These additional LoRA parameters are specific to the base model being adapted. When the base model needs to be deprecated and replaced with a new one, all the associated LoRA modules need to be re-trained. Such re-training requires access to the data used to train the LoRA for the original base model. This is especially problematic for commercial cloud applications where the LoRA modules and the base models are hosted by service providers who may not be allowed to host proprietary client task data. To address this challenge, we propose $\\textit{Trans-LoRA}$ --- a novel method for lossless, nearly data-free transfer of LoRAs across base models. Our approach relies on synthetic data to transfer LoRA modules. Using large language models, we design a synthetic data generator to approximate the data-generating process of the $\\textit{observed}$ task data subset. Training on the resulting synthetic dataset transfers LoRA modules to new models. We show the effectiveness of our approach using both LLama and Gemma model families. Our approach achieves lossless (mostly improved) LoRA transfer between models within and across different base model families, and even between different PEFT methods, on a wide variety of tasks.", "keywords": "Parameter Efficient Finetuning;Knowledge Distillation;Large Language Model;Data Synthesis", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/1afcf90ed053c305fa30276f575e074fe4b62b5a.zip", "author": "Runqian Wang;Soumya Ghosh;David Daniel Cox;Diego Antognini;Aude Oliva;Rogerio Feris;Leonid Karlinsky", "authorids": "~Runqian_Wang1;~Soumya_Ghosh1;~David_Daniel_Cox1;~Diego_Antognini1;~Aude_Oliva1;~Rogerio_Feris1;~Leonid_Karlinsky3", "gender": ";M;;;;M;M", "homepage": ";http://soumyaghosh.com;;;;http://rogerioferis.com;", "dblp": ";58/5138;48/7659;;;;05/4463", "google_scholar": ";GEYQenQAAAAJ;;;;xt3XLjcAAAAJ;https://scholar.google.co.il/citations?user=WbO7tjYAAAAJ", "orcid": ";;;;;;", "linkedin": ";;;;;;", "or_profile": "~Runqian_Wang1;~Soumya_Ghosh1;~David_Daniel_Cox1;~Diego_Antognini1;~Aude_Oliva1;~Rogerio_Feris1;~Leonid_Karlinsky3", "aff": ";International Business Machines;International Business Machines;;;International Business Machines;International Business Machines", "aff_domain": ";ibm.com;ibm.com;;;ibm.com;ibm.com", "position": ";Research Scientist;IBM Director, MIT-IBM Watson AI Lab;;;Research Manager;Principal Researcher", "bibtex": "@inproceedings{\nwang2024textittranslora,\ntitle={\\${\\textbackslash}textit\\{Trans-Lo{RA}\\}\\$: towards data-free Transferable Parameter Efficient Finetuning},\nauthor={Runqian Wang and Soumya Ghosh and David Daniel Cox and Diego Antognini and Aude Oliva and Rogerio Feris and Leonid Karlinsky},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=c3Pakdyi3t}\n}", "github": "", "reviewers": "7oKi;qZWK;K48P", "pdf_size": 2729877, "rating": "5;5;6", "confidence": "4;4;4", "soundness": "3;2;3", "novelty": "2;2;3", "presentation": "2;3;3", "wc_summary": "57;70;77", "wc_strengths": "54;55;32", "wc_weaknesses": "166;152;187", "wc_questions": "2;2;2", "wc_limitations": "18;61;1", "wc_review": "297;340;299", "wc_reply_reviewers": "179;13;189", "wc_reply_authors": "676;80;302", "reply_reviewers": "2;1;2", "reply_authors": "4;3;4", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 68.0, 8.286535263104035 ], "wc_strengths_avg": [ 47.0, 10.614455552060438 ], "wc_weaknesses_avg": [ 168.33333333333334, 14.38363267359428 ], "wc_questions_avg": [ 2.0, 0.0 ], "wc_limitations_avg": [ 26.666666666666668, 25.249862485874164 ], "wc_review_avg": [ 312.0, 19.8158185969358 ], "wc_reply_reviewers_avg": [ 127.0, 80.7134850360624 ], "wc_reply_authors_avg": [ 352.6666666666667, 245.9394686141739 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.4714045207910317 ], "reply_authors_avg": [ 3.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11990083066705360542&as_sdt=40000005&sciodt=0,22&hl=en", "gs_version_total": 3, "email": ";ibm.com;ibm.com;;;ibm.com;ibm.com", "author_num": 7, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "International Business Machines Corporation", "aff_unique_dep": "", "aff_unique_url": "https://www.ibm.com", "aff_unique_abbr": "IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Efficient $\\Phi$-Regret Minimization with Low-Degree Swap Deviations in Extensive-Form Games", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94446", "id": "c4ElkpA0kh", "proceeding": "", "pdf": "https://openreview.net/pdf?id=c4ElkpA0kh", "openreview": "https://openreview.net/forum?id=c4ElkpA0kh", "poster": "/media/PosterPDFs/NeurIPS%202024/94446.png?t=1731712464.7089703", "project": "", "author_site": "Brian Zhang, Ioannis Anagnostides, Gabriele Farina, Tuomas Sandholm", "tldr": "", "abstract": "Recent breakthrough results by Dagan, Daskalakis, Fishelson and Golowich [2023] and Peng and Rubinstein [2023] established an efficient algorithm attaining at most $\\epsilon$ swap regret over extensive-form strategy spaces of dimension $N$ in $N^{\\tilde O(1/\\epsilon)}$ rounds. On the other extreme, Farina and Pipis [2023] developed an efficient algorithm for minimizing the weaker notion of linear-swap regret in $\\mathsf{poly}(N)/\\epsilon^2$ rounds. In this paper, we develop efficient parameterized algorithms for regimes between these two extremes. We introduce the set of $k$-mediator deviations, which generalize the untimed communication deviations recently introduced by Zhang, Farina and Sandholm [2024] to the case of having multiple mediators, and we develop algorithms for minimizing the regret with respect to this set of deviations in $N^{O(k)}/\\epsilon^2$ rounds. Moreover, by relating $k$-mediator deviations to low-degree polynomials, we show that regret minimization against degree-$k$ polynomial swap deviations is achievable in $N^{O(kd)^3}/\\epsilon^2$ rounds, where $d$ is the depth of the game, assuming a constant branching factor. For a fixed degree $k$, this is polynomial for Bayesian games and quasipolynomial more broadly when $d = \\mathsf{polylog} N$---the usual balancedness assumption on the game tree. The first key ingredient in our approach is a relaxation of the usual notion of a fixed point required in the framework of Gordon, Greenwald and Marks [2008]. Namely, for a given deviation $\\phi$, we show that it suffices to compute what we refer to as a fixed point in expectation; that is, a distribution $\\pi$ such that $\\mathbb{E}_{x \\sim \\pi} [\\phi(x) - x] \\approx 0$. Unlike the problem of computing an actual (approximate) fixed point $x \\approx \\phi(x)$, which we show is \\PPAD-hard, there is a simple and efficient algorithm for finding a solution that satisfies our relaxed notion. As a byproduct, we provide, to our knowledge, the fastest algorithm for computing $\\epsilon$-correlated equilibria in normal-form games in the medium-precision regime, obviating the need to solve a linear system in every round. Our second main contribution is a characterization of the set of low-degree deviations, made possible through a connection to low-depth decisions trees from Boolean analysis.", "keywords": "swap regret;extensive-form games;low-degree deviations", "primary_area": "algorithmic_game_theory", "supplementary_material": "/attachment/08654638bbba74492b64be8aa535eab252742c31.zip", "author": "Brian Hu Zhang;Ioannis Anagnostides;Gabriele Farina;Tuomas Sandholm", "authorids": "~Brian_Hu_Zhang1;~Ioannis_Anagnostides1;~Gabriele_Farina1;~Tuomas_Sandholm1", "gender": ";M;M;M", "homepage": ";;http://www.cs.cmu.edu/~gfarina/about/;http://www.cs.cmu.edu/~sandholm", "dblp": "213/8211;273/7648;;s/TuomasSandholm", "google_scholar": ";QVwDo_sAAAAJ;sktDNcEAAAAJ;0DpK1EMAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Brian_Hu_Zhang1;~Ioannis_Anagnostides1;~Gabriele_Farina1;~Tuomas_Sandholm1", "aff": "Carnegie Mellon University;Carnegie Mellon University;Massachusetts Institute of Technology;Carnegie Mellon University", "aff_domain": "cmu.edu;cmu.edu;mit.edu;cmu.edu", "position": "PhD student;PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nzhang2024efficient,\ntitle={Efficient \\${\\textbackslash}Phi\\$-Regret Minimization with Low-Degree Swap Deviations in Extensive-Form Games},\nauthor={Brian Hu Zhang and Ioannis Anagnostides and Gabriele Farina and Tuomas Sandholm},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=c4ElkpA0kh}\n}", "github": "", "reviewers": "aMpb;o8Ue;VzB7", "pdf_size": 592298, "rating": "5;6;6", "confidence": "4;1;1", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "2;2;2", "wc_summary": "124;303;152", "wc_strengths": "30;38;4", "wc_weaknesses": "621;57;4", "wc_questions": "6;55;4", "wc_limitations": "2;1;7", "wc_review": "783;454;171", "wc_reply_reviewers": "166;63;0", "wc_reply_authors": "196;0;0", "reply_reviewers": "2;1;0", "reply_authors": "2;1;1", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 2.0, 1.4142135623730951 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 193.0, 78.61721609588237 ], "wc_strengths_avg": [ 24.0, 14.514360704718161 ], "wc_weaknesses_avg": [ 227.33333333333334, 279.20402735076885 ], "wc_questions_avg": [ 21.666666666666668, 23.584363935078304 ], "wc_limitations_avg": [ 3.3333333333333335, 2.6246692913372702 ], "wc_review_avg": [ 469.3333333333333, 250.08309730078832 ], "wc_reply_reviewers_avg": [ 76.33333333333333, 68.42189383198594 ], "wc_reply_authors_avg": [ 65.33333333333333, 92.39528607504222 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.9999999999999999, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11503669027317196154&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "cmu.edu;cmu.edu;mit.edu;cmu.edu", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Carnegie Mellon University;Massachusetts Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://web.mit.edu", "aff_unique_abbr": "CMU;MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "SG-Bench: Evaluating LLM Safety Generalization Across Diverse Tasks and Prompt Types", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97610", "id": "c4JE1gemWc", "proceeding": "", "pdf": "https://openreview.net/pdf?id=c4JE1gemWc", "openreview": "https://openreview.net/forum?id=c4JE1gemWc", "poster": "/media/PosterPDFs/NeurIPS%202024/97610.png?t=1731508557.9125888", "project": "", "author_site": "Yutao Mou, Shikun Zhang, Wei Ye", "tldr": "", "abstract": "Ensuring the safety of large language model (LLM) applications is essential for developing trustworthy artificial intelligence. Current LLM safety benchmarks have two limitations. First, they focus solely on either discriminative or generative evaluation paradigms while ignoring their interconnection. Second, they rely on standardized inputs, overlooking the effects of widespread prompting techniques, such as system prompts, few-shot demonstrations, and chain-of-thought prompting. To overcome these issues, we developed SG-Bench, a novel benchmark to assess the generalization of LLM safety across various tasks and prompt types. This benchmark integrates both generative and discriminative evaluation tasks and includes extended data to examine the impact of prompt engineering and jailbreak on LLM safety. Our assessment of 3 advanced proprietary LLMs and 10 open-source LLMs with the benchmark reveals that most LLMs perform worse on discriminative tasks than generative ones, and are highly susceptible to prompts, indicating poor generalization in safety alignment. We also explain these findings quantitatively and qualitatively to provide insights for future research.", "keywords": "large language models;LLM safety evaluation;generalization", "primary_area": "", "supplementary_material": "/attachment/867829225689ffda3e85a0ac583a87c33c859226.zip", "author": "Yutao Mou;Shikun Zhang;Wei Ye", "authorids": "~Yutao_Mou1;~Shikun_Zhang2;~Wei_Ye2", "gender": ";M;M", "homepage": ";;https://se.pku.edu.cn/kcl/weiye/", "dblp": ";83/3715.html;09/5394-4", "google_scholar": ";uiklLscAAAAJ;RgLGFMIAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Yutao_Mou1;~Shikun_Zhang2;~Wei_Ye2", "aff": ";Peking University;Peking University", "aff_domain": ";pku.edu.cn;pku.edu.cn", "position": ";Full Professor;Associate Professor", "bibtex": "@inproceedings{\nmou2024sgbench,\ntitle={{SG}-Bench: Evaluating {LLM} Safety Generalization Across Diverse Tasks and Prompt Types},\nauthor={Yutao Mou and Shikun Zhang and Wei Ye},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=c4JE1gemWc}\n}", "github": "", "reviewers": "5xxE;Qrt8;mRu5;Wtxn", "pdf_size": 2404764, "rating": "6;7;7;7", "confidence": "4;4;4;3", "wc_summary_and_contributions": "108;81;113;48", "wc_strengths": "41;22;149;56", "wc_improvement": "342;304;151;62", "wc_limitations": "70;1;9;15", "wc_correctness": "81;15;11;8", "wc_clarity": "1;41;5;4", "wc_relation_to_prior_work": "1;6;8;6", "wc_documentation": "14;24;7;5", "wc_additional_feedback": "1;1;1;1", "wc_review": "659;495;454;205", "wc_reply_reviewers": "26;18;14;0", "wc_reply_authors": "240;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "5;2;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 87.5, 25.85053190942113 ], "wc_strengths_avg": [ 67.0, 48.85181675229694 ], "wc_improvement_avg": [ 214.75, 113.52835548883812 ], "wc_limitations_avg": [ 23.75, 27.160403163428924 ], "wc_correctness_avg": [ 28.75, 30.26858932953434 ], "wc_clarity_avg": [ 12.75, 16.37643123516232 ], "wc_relation_to_prior_work_avg": [ 5.25, 2.5860201081971503 ], "wc_documentation_avg": [ 12.5, 7.433034373659253 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 453.25, 162.56133457867526 ], "wc_reply_reviewers_avg": [ 14.5, 9.420721840708387 ], "wc_reply_authors_avg": [ 60.0, 103.92304845413264 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 1.6393596310755 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9698601985710634872&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": ";pku.edu.cn;pku.edu.cn", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "DrivAerNet++: A Large-Scale Multimodal Car Dataset with Computational Fluid Dynamics Simulations and Deep Learning Benchmarks", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97609", "id": "c4NnhBi4oM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=c4NnhBi4oM", "openreview": "https://openreview.net/forum?id=c4NnhBi4oM", "poster": "/media/PosterPDFs/NeurIPS%202024/97609.png?t=1731644171.0836258", "project": "", "author_site": "Mohamed Elrefaie, Florin Morar, Angela Dai, Faez Ahmed", "tldr": "", "abstract": "We present DrivAerNet++, the largest and most comprehensive multimodal dataset for aerodynamic car design. DrivAerNet++ comprises 8,000 diverse car designs modeled with high-fidelity computational fluid dynamics (CFD) simulations. The dataset includes diverse car configurations such as fastback, notchback, and estateback, with different underbody and wheel designs to represent both internal combustion engines and electric vehicles. Each entry in the dataset features detailed 3D meshes, parametric models, aerodynamic coefficients, and extensive flow and surface field data, along with segmented parts for car classification and point cloud data. This dataset supports a wide array of machine learning applications including data-driven design optimization, generative modeling, surrogate model training, CFD simulation acceleration, and geometric classification. With more than 39 TB of publicly available engineering data, DrivAerNet++ fills a significant gap in available resources, providing high-quality, diverse data to enhance model training, promote generalization, and accelerate automotive design processes. Along with rigorous dataset validation, we also provide ML benchmarking results on the task of aerodynamic drag prediction, showcasing the breadth of applications supported by our dataset. This dataset is set to significantly impact automotive design and broader engineering disciplines by fostering innovation and improving the fidelity of aerodynamic evaluations. Dataset and code available at: https://github.com/Mohamedelrefaie/DrivAerNet", "keywords": "CFD;Aerodynamics;Surrogate Models;Parametric Design;Car Design;Graph Neural Networks;Generative AI;3D Shapes;large-scale dataset;high-fidelity CFD", "primary_area": "", "supplementary_material": "", "author": "Mohamed Elrefaie;Florin Morar;Angela Dai;Faez Ahmed", "authorids": "~Mohamed_Elrefaie1;~Florin_Morar1;~Angela_Dai1;~Faez_Ahmed1", "gender": "M;M;F;", "homepage": "https://mohamedelrefaie.academic.ws/;https://www.beta-cae.us/;https://angeladai.github.io/;https://decode.mit.edu", "dblp": "372/2075;;149/1202;45/10603", "google_scholar": "O1iwnkQAAAAJ;;g-tGztMAAAAJ;5iElzo8AAAAJ", "orcid": "0009-0008-9981-0930;;;", "linkedin": "https://linkedin.com/in/mohamed-elrefaie-7754b9252;;;", "or_profile": "~Mohamed_Elrefaie1;~Florin_Morar1;~Angela_Dai1;~Faez_Ahmed1", "aff": "Massachusetts Institute of Technology;Florida State University;Technical University of Munich;Massachusetts Institute of Technology", "aff_domain": "mit.edu;fsu.edu;tum.de;mit.edu", "position": "Researcher;Researcher;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nelrefaie2024drivaernet,\ntitle={DrivAerNet++: A Large-Scale Multimodal Car Dataset with Computational Fluid Dynamics Simulations and Deep Learning Benchmarks},\nauthor={Mohamed Elrefaie and Florin Morar and Angela Dai and Faez Ahmed},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=c4NnhBi4oM}\n}", "github": "", "reviewers": "7faB;V1DK;thgj;4nTN;6UA4;Ugn5", "pdf_size": 31706267, "rating": "5;6;7;7;8;9", "confidence": "3;3;3;2;4;3", "wc_summary_and_contributions": "80;98;79;60;35;116", "wc_strengths": "39;109;61;37;136;64", "wc_improvement": "20;78;17;1;92;39", "wc_limitations": "1;1;1;1;22;51", "wc_correctness": "1;1;1;1;35;1", "wc_clarity": "5;1;1;5;6;1", "wc_relation_to_prior_work": "1;43;1;11;181;1", "wc_documentation": "8;1;4;3;10;1", "wc_additional_feedback": "1;1;1;1;1;1", "wc_review": "156;333;166;120;518;275", "wc_reply_reviewers": "0;0;14;33;42;0", "wc_reply_authors": "78;74;80;85;80;81", "reply_reviewers": "0;0;1;1;1;0", "reply_authors": "2;2;2;3;2;2", "rating_avg": [ 7.0, 1.2909944487358056 ], "confidence_avg": [ 3.0, 0.5773502691896257 ], "wc_summary_and_contributions_avg": [ 78.0, 25.890796305508516 ], "wc_strengths_avg": [ 74.33333333333333, 36.35778993405525 ], "wc_improvement_avg": [ 41.166666666666664, 33.14320778413312 ], "wc_limitations_avg": [ 12.833333333333334, 18.711998527385813 ], "wc_correctness_avg": [ 6.666666666666667, 12.671051872498808 ], "wc_clarity_avg": [ 3.1666666666666665, 2.1921577396609844 ], "wc_relation_to_prior_work_avg": [ 39.666666666666664, 64.9273097822138 ], "wc_documentation_avg": [ 4.5, 3.4034296427770228 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 261.3333333333333, 136.23468313498176 ], "wc_reply_reviewers_avg": [ 14.833333333333334, 16.974654308376618 ], "wc_reply_authors_avg": [ 79.66666666666667, 3.2998316455372216 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.1666666666666665, 0.3726779962499649 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.22360679774997896, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1836281709894287681&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "mit.edu;fsu.edu;tum.de;mit.edu", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Massachusetts Institute of Technology;Florida State University;Technical University of Munich", "aff_unique_dep": ";;", "aff_unique_url": "https://web.mit.edu;https://www.fsu.edu;https://www.tum.de", "aff_unique_abbr": "MIT;FSU;TUM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;Germany" }, { "title": "How Does Message Passing Improve Collaborative Filtering?", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94445", "id": "c78U5zi4eA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=c78U5zi4eA", "openreview": "https://openreview.net/forum?id=c78U5zi4eA", "poster": "/media/PosterPDFs/NeurIPS%202024/94445.png?t=1731106422.328961", "project": "", "author_site": "Mingxuan Ju, William Shiao, Zhichun Guo, Yanfang Ye, Yozen Liu, Neil Shah, Tong Zhao", "tldr": "", "abstract": "Collaborative filtering (CF) has exhibited prominent results for recommender systems and been broadly utilized for real-world applications.\nA branch of research enhances CF methods by message passing (MP) used in graph neural networks, due to its strong capabilities of extracting knowledge from graph-structured data, like user-item bipartite graphs that naturally exist in CF. They assume that MP helps CF methods in a manner akin to its benefits for graph-based learning tasks in general (e.g., node classification). However, even though MP empirically improves CF, whether or not this assumption is correct still needs verification. To address this gap, we formally investigate why MP helps CF from multiple perspectives and show that many assumptions made by previous works are not entirely accurate. With our curated ablation studies and theoretical analyses, we discover that (i) MP improves the CF performance primarily by additional representations passed from neighbors during the forward pass instead of additional gradient updates to neighbor representations during the model back-propagation and (ii) MP usually helps low-degree nodes more than high-degree nodes.}Utilizing these novel findings, we present Test-time Aggregation for Collaborative Filtering, namely TAG-CF, a test-time augmentation framework that only conducts MP once at inference time. The key novelty of TAG-CF is that it effectively utilizes graph knowledge while circumventing most of notorious computational overheads of MP. Besides, TAG-CF is extremely versatile can be used as a plug-and-play module to enhance representations trained by different CF supervision signals. Evaluated on six datasets (i.e., five academic benchmarks and one real-world industrial dataset), TAG-CF consistently improves the recommendation performance of CF methods without graph by up to 39.2% on cold users and 31.7% on all users, with little to no extra computational overheads. Furthermore, compared with trending graph-enhanced CF methods, TAG-CF delivers comparable or even better performance with less than 1% of their total training times. Our code is publicly available at https://github.com/snap-research/Test-time-Aggregation-for-CF.", "keywords": "Recommender Systems;Test-time Augmentation;Collaborative Filtering", "primary_area": "other", "supplementary_material": "/attachment/f30bcdf0d1b6d5aadabf626b514645d89086a314.zip", "author": "Mingxuan Ju;William Shiao;Zhichun Guo;Yanfang Ye;Yozen Liu;Neil Shah;Tong Zhao", "authorids": "~Mingxuan_Ju1;~William_Shiao1;~Zhichun_Guo1;~Yanfang_Ye1;~Yozen_Liu1;~Neil_Shah2;~Tong_Zhao3", "gender": "M;M;;;;M;M", "homepage": "https://jumxglhf.github.io;https://shiao.me;;http://yes-lab.org/;https://www.linkedin.com/in/yozen-liu-531a67130/;http://nshah.net;https://tzhao.io/", "dblp": "234/2715;304/3898;;;242/8056.html;71/7771;94/6503-3", "google_scholar": "qNoO67AAAAAJ;TIq-P5AAAAAJ;;egjr888AAAAJ;i3U2JjEAAAAJ;Qut69OgAAAAJ;05cRc-MAAAAJ", "orcid": "0009-0008-9054-3856;0000-0001-5813-2266;;;;0000-0003-3261-8430;0000-0001-7660-1732", "linkedin": ";will-shiao;;;;;", "or_profile": "~Mingxuan_Ju1;~William_Shiao1;~Zhichun_Guo1;~Yanfang_Ye1;~Yozen_Liu1;~Neil_Shah2;~Tong_Zhao3", "aff": "University of Notre Dame;University of California, Riverside;;University of Notre Dame;Snap Inc.;Snap Inc.;Snap Inc.", "aff_domain": "nd.edu;ucr.edu;;nd.edu;snapchat.com;snap.com;snap.com", "position": "PhD student;PhD student;;Associate Professor;Researcher;Research Scientist;Researcher", "bibtex": "@inproceedings{\nju2024how,\ntitle={How Does Message Passing Improve Collaborative Filtering?},\nauthor={Mingxuan Ju and William Shiao and Zhichun Guo and Yanfang Ye and Yozen Liu and Neil Shah and Tong Zhao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=c78U5zi4eA}\n}", "github": "", "reviewers": "jYGx;3fxq;vJWR;yNfp", "pdf_size": 779332, "rating": "4;5;6;7", "confidence": "4;5;4;4", "soundness": "2;2;3;3", "novelty": "3;2;3;3", "presentation": "3;2;3;3", "wc_summary": "138;67;83;78", "wc_strengths": "79;33;40;31", "wc_weaknesses": "301;94;126;10", "wc_questions": "108;247;2;60", "wc_limitations": "2;1;2;2", "wc_review": "628;442;253;181", "wc_reply_reviewers": "0;12;17;12", "wc_reply_authors": "0;37;20;24", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 91.5, 27.463612289718917 ], "wc_strengths_avg": [ 45.75, 19.48557158514987 ], "wc_weaknesses_avg": [ 132.75, 105.9749380749996 ], "wc_questions_avg": [ 104.25, 90.5604080158653 ], "wc_limitations_avg": [ 1.75, 0.4330127018922193 ], "wc_review_avg": [ 376.0, 173.9353328107892 ], "wc_reply_reviewers_avg": [ 10.25, 6.2599920127744575 ], "wc_reply_authors_avg": [ 20.25, 13.273563952458284 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.2581988897471611, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16991904459665836101&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "nd.edu;ucr.edu;;nd.edu;snapchat.com;snap.com;snap.com", "author_num": 7, "aff_unique_index": "0;1;0;2;2;2", "aff_unique_norm": "University of Notre Dame;University of California, Riverside;Snap Inc.", "aff_unique_dep": ";;", "aff_unique_url": "https://www.nd.edu;https://www.ucr.edu;https://www.snapinc.com", "aff_unique_abbr": "Notre Dame;UCR;Snap", "aff_campus_unique_index": "1", "aff_campus_unique": ";Riverside", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Stronger Than You Think: Benchmarking Weak Supervision on Realistic Tasks", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97608", "id": "c7SApXZz4b", "proceeding": "", "pdf": "https://openreview.net/pdf?id=c7SApXZz4b", "openreview": "https://openreview.net/forum?id=c7SApXZz4b", "poster": "/media/PosterPDFs/NeurIPS%202024/97608.png?t=1731739880.2864194", "project": "", "author_site": "Tianyi Zhang, Linrong Cai, Jeffrey Li, Nicholas Roberts, Neel Guha, Frederic Sala", "tldr": "", "abstract": "Weak supervision (WS) is a popular approach for label-efficient learning, leveraging diverse sources of noisy but inexpensive *weak labels* to automatically annotate training data. Despite its wide usage, WS and its practical value are challenging to benchmark due to the many knobs in its setup, including: data sources, labeling functions (LFs), aggregation techniques (called label models), and end model pipelines. Existing evaluation suites tend to be limited, focusing on particular components or specialized use cases. Moreover, they often involve simplistic benchmark tasks or de-facto LF sets that are suboptimally written, producing insights that may not generalize to real-world settings. We address these limitations by introducing a new benchmark, BOXWRENCH, designed to more accurately reflect *real-world usages of WS*. This benchmark features tasks with (1) higher class cardinality and imbalance, (2) notable domain expertise requirements, and (3) opportunities to re-use LFs across parallel multilingual corpora. For all tasks, LFs are written using a careful procedure aimed at mimicking real-world settings. In contrast to existing WS benchmarks, we show that supervised learning requires substantial amounts (1000+) of labeled examples to match WS in many settings.", "keywords": "Weak Supervision (WS);weak supervised;benchmarking;label functions;noisy labels;LF generalization", "primary_area": "", "supplementary_material": "/attachment/e46fbe4675e890654e4820695aeba1299bf80c60.pdf", "author": "Tianyi Zhang;Linrong Cai;Jeffrey Li;Nicholas Roberts;Neel Guha;Frederic Sala", "authorids": "~Tianyi_Zhang22;~Linrong_Cai1;~Jeffrey_Li1;~Nicholas_Roberts2;~Neel_Guha1;~Frederic_Sala1", "gender": "M;M;M;M;M;M", "homepage": ";https://clrt19.com;;http://neelguha.com;https://pages.cs.wisc.edu/~fredsala/;https://nick11roberts.science/", "dblp": ";356/4050;;130/0311;133/3602;", "google_scholar": ";;JDS2BnIAAAAJ;YI5N4HQAAAAJ;9KhIkNkAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0009-0009-6283-8267;;;;;0000-0002-0625-9182", "linkedin": "tianyijackzhang/;;jeffrey-li-a78684111/;;;nick11roberts/", "or_profile": "~Tianyi_Zhang22;~Linrong_Cai1;~Jeffrey_Li1;~Neel_Guha1;~Frederic_Sala1;~Nicholas_Carl_Roberts1", "aff": "Department of Computer Science, University of Washington;University of Wisconsin - Madison;Department of Computer Science, University of Washington;Computer Science Department, Stanford University;University of Wisconsin, Madison;Together AI", "aff_domain": "cs.washington.edu;wisc.edu;cs.washington.edu;cs.stanford.edu;wisc.edu;together.ai", "position": "Undergrad student;Undergrad student;PhD student;PhD student;Assistant Professor;Intern", "bibtex": "@inproceedings{\nzhang2024stronger,\ntitle={Stronger Than You Think: Benchmarking Weak Supervision on Realistic Tasks},\nauthor={Tianyi Zhang and Linrong Cai and Jeffrey Li and Nicholas Roberts and Neel Guha and Frederic Sala},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=c7SApXZz4b}\n}", "github": "", "reviewers": "2MXd;QsfA;f8Lj;YJ8S", "pdf_size": 898253, "rating": "5;5;7;9", "confidence": "5;5;4;5", "wc_summary_and_contributions": "82;55;111;53", "wc_strengths": "5;42;38;2", "wc_improvement": "4;7;24;384", "wc_limitations": "4;1;7;1", "wc_correctness": "10;1;21;1", "wc_clarity": "6;1;7;1", "wc_relation_to_prior_work": "29;1;14;1", "wc_documentation": "6;1;8;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "147;110;231;445", "wc_reply_reviewers": "281;0;0;29", "wc_reply_authors": "347;61;61;0", "reply_reviewers": "2;0;0;2", "reply_authors": "4;2;3;2", "rating_avg": [ 6.5, 1.6583123951777 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 75.25, 23.60481942316018 ], "wc_strengths_avg": [ 21.75, 18.335416548308903 ], "wc_improvement_avg": [ 104.75, 161.40535152218467 ], "wc_limitations_avg": [ 3.25, 2.48746859276655 ], "wc_correctness_avg": [ 8.25, 8.227241335952167 ], "wc_clarity_avg": [ 3.75, 2.7726341266023544 ], "wc_relation_to_prior_work_avg": [ 11.25, 11.54068888758379 ], "wc_documentation_avg": [ 4.0, 3.082207001484488 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 233.25, 129.8775865959943 ], "wc_reply_reviewers_avg": [ 77.5, 118.08577391032334 ], "wc_reply_authors_avg": [ 117.25, 134.96365251429734 ], "reply_reviewers_avg": [ 1.0, 1.0 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:jjJbTLSe3jcJ:scholar.google.com/&scioq=Stronger+Than+You+Think:+Benchmarking+Weak+Supervision+on+Realistic+Tasks&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "cs.washington.edu;wisc.edu;cs.washington.edu;cs.stanford.edu;wisc.edu;together.ai", "author_num": 6, "aff_unique_index": "0;1;0;2;3;4", "aff_unique_norm": "University of Washington;University of Wisconsin-Madison;Stanford University;University of Wisconsin;Together AI", "aff_unique_dep": "Department of Computer Science;;Computer Science Department;;", "aff_unique_url": "https://www.washington.edu;https://www.wisc.edu;https://www.stanford.edu;https://www.wisc.edu;https://www.together.ai", "aff_unique_abbr": "UW;UW-Madison;Stanford;UW;Together AI", "aff_campus_unique_index": "0;1;0;2;1", "aff_campus_unique": "Seattle;Madison;Stanford;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Exploring Structured Semantic Priors Underlying Diffusion Score for Test-time Adaptation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94444", "id": "c7m1HahBNf", "proceeding": "", "pdf": "https://openreview.net/pdf?id=c7m1HahBNf", "openreview": "https://openreview.net/forum?id=c7m1HahBNf", "poster": "/media/PosterPDFs/NeurIPS%202024/94444.png?t=1731338094.6256871", "project": "", "author_site": "Mingjia Li, Shuang Li, Tongrui Su, Longhui Yuan, Jian Liang, Wei Li", "tldr": "", "abstract": "Capitalizing on the complementary advantages of generative and discriminative models has always been a compelling vision in machine learning, backed by a growing body of research. This work discloses the hidden semantic structure within score-based generative models, unveiling their potential as effective discriminative priors. Inspired by our theoretical findings, we propose DUSA to exploit the structured semantic priors underlying diffusion score to facilitate the test-time adaptation of image classifiers or dense predictors. Notably, DUSA extracts knowledge from a single timestep of denoising diffusion, lifting the curse of Monte Carlo-based likelihood estimation over timesteps. We demonstrate the efficacy of our DUSA in adapting a wide variety of competitive pre-trained discriminative models on diverse test-time scenarios. Additionally, a thorough ablation study is conducted to dissect the pivotal elements in DUSA. Code is publicly available at https://github.com/BIT-DA/DUSA.", "keywords": "test-time adaptation;diffusion models;generative models;classification;segmentation", "primary_area": "other", "supplementary_material": "/attachment/466dd9c21bf32a2ba79f64c963dd896a0536cabc.zip", "author": "Mingjia Li;Shuang Li;Tongrui Su;Longhui Yuan;Jian Liang;Wei Li", "authorids": "~Mingjia_Li2;~Shuang_Li6;~Tongrui_Su1;~Longhui_Yuan1;~Jian_Liang3;~Wei_Li28", "gender": ";M;M;M;M;M", "homepage": "https://kiwixr.github.io/;https://shuangli.xyz;https://molarsu.github.io/;https://yuanlonghui.github.io/;;", "dblp": "274/2159;43/6294-8;;307/5087;19/2208;64/6025-111", "google_scholar": "yNh6PHcAAAAJ;VXCiAc4AAAAJ;;https://scholar.google.cz/citations?user=fVnEIZEAAAAJ;mrunnpoAAAAJ;i8jP6q8AAAAJ", "orcid": ";0000-0001-6807-9905;;;;0000-0002-0059-3745", "linkedin": ";;;;;", "or_profile": "~Mingjia_Li2;~Shuang_Li6;~Tongrui_Su1;~Longhui_Yuan1;~Jian_Liang3;~Wei_Li28", "aff": "Beijing Institute of Technology;Beijing Institute of Technology;Beijing Institute of Technology;Beijing Institute of Technology;Kuaishou Technology;Inceptio", "aff_domain": "bit.edu.cn;bit.edu.cn;bit.edu.cn;bit.edu.cn;kuaishou.com;inceptio.ai", "position": "MS student;Associate Professor;Undergrad student;MS student;Senior Algorithm Engineer;Researcher", "bibtex": "@inproceedings{\nli2024exploring,\ntitle={Exploring Structured Semantic Priors Underlying Diffusion Score for Test-time Adaptation},\nauthor={Mingjia Li and Shuang Li and Tongrui Su and Longhui Yuan and Jian Liang and Wei Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=c7m1HahBNf}\n}", "github": "", "reviewers": "6Z3D;oxUu;V2Wk;Your", "pdf_size": 11320475, "rating": "6;6;6;7", "confidence": "4;4;5;3", "soundness": "3;3;3;3", "novelty": "3;3;2;3", "presentation": "3;3;2;3", "wc_summary": "67;128;118;106", "wc_strengths": "69;95;102;51", "wc_weaknesses": "169;166;77;133", "wc_questions": "57;127;300;35", "wc_limitations": "7;18;17;1", "wc_review": "369;534;614;326", "wc_reply_reviewers": "17;73;261;31", "wc_reply_authors": "39;50;675;41", "reply_reviewers": "1;1;3;1", "reply_authors": "2;2;4;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 104.75, 23.14492384951828 ], "wc_strengths_avg": [ 79.25, 20.42516829796024 ], "wc_weaknesses_avg": [ 136.25, 37.00928937442598 ], "wc_questions_avg": [ 129.75, 103.99849758530168 ], "wc_limitations_avg": [ 10.75, 7.084313657652377 ], "wc_review_avg": [ 460.75, 117.71443199540148 ], "wc_reply_reviewers_avg": [ 95.5, 97.74840152145711 ], "wc_reply_authors_avg": [ 201.25, 273.55106927226586 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:IaJBvAExNiYJ:scholar.google.com/&scioq=Exploring+Structured+Semantic+Priors+Underlying+Diffusion+Score+for+Test-time+Adaptation&hl=en&as_sdt=0,5", "gs_version_total": 5, "email": "bit.edu.cn;bit.edu.cn;bit.edu.cn;bit.edu.cn;kuaishou.com;inceptio.ai", "author_num": 6, "aff_unique_index": "0;0;0;0;1;2", "aff_unique_norm": "Beijing Institute of Technology;Kuaishou Technology;Inceptio", "aff_unique_dep": ";;", "aff_unique_url": "http://www.bit.edu.cn/;https://www.kuaishou.com;", "aff_unique_abbr": "BIT;Kuaishou;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China;" }, { "title": "UnSeg: One Universal Unlearnable Example Generator is Enough against All Image Segmentation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94443", "id": "c8HOQIMwKP", "proceeding": "", "pdf": "https://openreview.net/pdf?id=c8HOQIMwKP", "openreview": "https://openreview.net/forum?id=c8HOQIMwKP", "poster": "/media/PosterPDFs/NeurIPS%202024/94443.png?t=1731063885.6805756", "project": "", "author_site": "Ye Sun, Hao Zhang, Tiehua Zhang, Xingjun Ma, Yu-Gang Jiang", "tldr": "", "abstract": "Image segmentation is a crucial vision task that groups pixels within an image into semantically meaningful segments, which is pivotal in obtaining a fine-grained understanding of real-world scenes. However, an increasing privacy concern exists regarding training large-scale image segmentation models on unauthorized private data. In this work, we exploit the concept of unlearnable examples to make images unusable to model training by generating and adding unlearnable noise into the original images. Particularly, we propose a novel Unlearnable Segmentation (UnSeg) framework to train a universal unlearnable noise generator that is capable of transforming any downstream images into their unlearnable version. The unlearnable noise generator is finetuned from the Segment Anything Model (SAM) via bilevel optimization on an interactive segmentation dataset towards minimizing the training error of a surrogate model that shares the same architecture with SAM (but trains from scratch). We empirically verify the effectiveness of UnSeg across 6 mainstream image segmentation tasks, 10 widely used datasets, and 7 different network architectures, and show that the unlearnable images can reduce the segmentation performance by a large margin. Our work provides useful insights into how to leverage foundation models in a data-efficient and computationally affordable manner to protect images against image segmentation models.", "keywords": "Unlearnable examples;image segmentation;segment anything model", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Ye Sun;Hao Zhang;Tiehua Zhang;Xingjun Ma;Yu-Gang Jiang", "authorids": "~Ye_Sun1;~Hao_Zhang39;~Tiehua_Zhang1;~Xingjun_Ma1;~Yu-Gang_Jiang1", "gender": "M;M;M;M;M", "homepage": ";https://haozhang534.github.io/;;http://xingjunma.com/;https://fvl.fudan.edu.cn/people/yugangjiang/", "dblp": ";55/2270-97;;195/8270;24/5818", "google_scholar": ";B8hPxMQAAAAJ;kFrxPKUAAAAJ;https://scholar.google.com.au/citations?user=XQViiyYAAAAJ;f3_FP8AAAAAJ", "orcid": "0009-0009-8607-7845;;;;", "linkedin": ";hao-zhang-3b09b8196/;;xingjun-ma-173532129/;", "or_profile": "~Ye_Sun1;~Hao_Zhang39;~Tiehua_Zhang1;~Xingjun_Ma1;~Yu-Gang_Jiang1", "aff": "Fudan University;Hong Kong University of Science and Technology;Alibaba Group;Fudan University;Fudan University", "aff_domain": "fudan.edu.cn;ust.hk;antgroup.com;fudan.edu.cn;fudan.edu.cn", "position": "PhD student;PhD student;Researcher;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nsun2024unseg,\ntitle={UnSeg: One Universal Unlearnable Example Generator is Enough against All Image Segmentation},\nauthor={Ye Sun and Hao Zhang and Tiehua Zhang and Xingjun Ma and Yu-Gang Jiang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=c8HOQIMwKP}\n}", "github": "", "reviewers": "Mhnt;zWqb;2xcM;PRaH", "pdf_size": 29126818, "rating": "5;5;5;6", "confidence": "3;5;4;5", "soundness": "3;2;3;4", "novelty": "2;2;2;4", "presentation": "2;3;2;4", "wc_summary": "111;21;97;69", "wc_strengths": "78;19;52;165", "wc_weaknesses": "63;47;184;154", "wc_questions": "179;9;7;34", "wc_limitations": "4;8;13;41", "wc_review": "435;104;353;463", "wc_reply_reviewers": "51;13;49;21", "wc_reply_authors": "103;92;32;40", "reply_reviewers": "1;1;1;1", "reply_authors": "3;3;2;2", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 74.5, 34.39113257803529 ], "wc_strengths_avg": [ 78.5, 54.14101956926929 ], "wc_weaknesses_avg": [ 112.0, 58.25375524376089 ], "wc_questions_avg": [ 57.25, 71.09280905970729 ], "wc_limitations_avg": [ 16.5, 14.5 ], "wc_review_avg": [ 338.75, 141.43262530265073 ], "wc_reply_reviewers_avg": [ 33.5, 16.75559608011604 ], "wc_reply_authors_avg": [ 66.75, 31.123744954616242 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11176908157120503348&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "fudan.edu.cn;ust.hk;antgroup.com;fudan.edu.cn;fudan.edu.cn", "author_num": 5, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "Fudan University;Hong Kong University of Science and Technology;Alibaba Group", "aff_unique_dep": ";;", "aff_unique_url": "https://www.fudan.edu.cn;https://www.ust.hk;https://www.alibaba.com", "aff_unique_abbr": "Fudan;HKUST;Alibaba", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Sequential Decision Making with Expert Demonstrations under Unobserved Heterogeneity", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94442", "id": "c8cpMlPUbI", "proceeding": "", "pdf": "https://openreview.net/pdf?id=c8cpMlPUbI", "openreview": "https://openreview.net/forum?id=c8cpMlPUbI", "poster": "/media/PosterPDFs/NeurIPS%202024/94442.png?t=1733673892.894756", "project": "", "author_site": "Vahid Balazadeh, Keertana Chidambaram, Viet Nguyen, Rahul Krishnan, Vasilis Syrgkanis", "tldr": "", "abstract": "We study the problem of online sequential decision-making given auxiliary demonstrations from _experts_ who made their decisions based on unobserved contextual information. These demonstrations can be viewed as solving related but slightly different tasks than what the learner faces. This setting arises in many application domains, such as self-driving cars, healthcare, and finance, where expert demonstrations are made using contextual information, which is not recorded in the data available to the learning agent. We model the problem as a zero-shot meta-reinforcement learning setting with an unknown task distribution and a Bayesian regret minimization objective, where the unobserved tasks are encoded as parameters with an unknown prior. We propose the Experts-as-Priors algorithm (ExPerior), an empirical Bayes approach that utilizes expert data to establish an informative prior distribution over the learner's decision-making problem. This prior enables the application of any Bayesian approach for online decision-making, such as posterior sampling. We demonstrate that our strategy surpasses existing behaviour cloning and online algorithms, as well as online-offline baselines for multi-armed bandits, Markov decision processes (MDPs), and partially observable MDPs, showcasing the broad reach and utility of ExPerior in using expert demonstrations across different decision-making setups.", "keywords": "meta-reinforcement learning;unobserved heterogeneity;empirical Bayes;maximum entropy;posterior sampling", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Vahid Balazadeh;Keertana Chidambaram;Viet Nguyen;Rahul Krishnan;Vasilis Syrgkanis", "authorids": "~Vahid_Balazadeh1;~Keertana_Chidambaram1;~Viet_Nguyen2;~Rahul_G_Krishnan1;~Vasilis_Syrgkanis1", "gender": "F;M;M;;M", "homepage": ";https://www.cs.toronto.edu/~viet/;http://www.cs.toronto.edu/~rahulgk/index.html;https://www.vsyrgkanis.com;https://vahidbalazadeh.me/", "dblp": "228/2200.html;;172/0880;;258/3369", "google_scholar": ";;ilJgXHkAAAAJ;G1WMpcUAAAAJ;yA3tqNsAAAAJ", "orcid": ";;;;", "linkedin": ";;rahulgk/;;", "or_profile": "~Keertana_Chidambaram1;~Viet_Nguyen2;~Rahul_G_Krishnan1;~Vasilis_Syrgkanis1;~Vahid_Balazadeh_Meresht1", "aff": "Stanford University;Department of Computer Science, University of Toronto;Department of Computer Science, University of Toronto;Stanford University;Department of Computer Science, University of Toronto", "aff_domain": "stanford.edu;cs.toronto.edu;cs.toronto.edu;stanford.edu;cs.toronto.edu", "position": "PhD student;PhD student;Assistant Professor;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nbalazadeh2024sequential,\ntitle={Sequential Decision Making with Expert Demonstrations under Unobserved Heterogeneity},\nauthor={Vahid Balazadeh and Keertana Chidambaram and Viet Nguyen and Rahul Krishnan and Vasilis Syrgkanis},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=c8cpMlPUbI}\n}", "github": "", "reviewers": "2ZF5;11BA;FGqG;hfW9", "pdf_size": 1588446, "rating": "5;5;6;6", "confidence": "2;3;4;3", "soundness": "3;3;3;3", "novelty": "2;2;2;3", "presentation": "2;3;3;3", "wc_summary": "58;145;43;59", "wc_strengths": "97;37;34;47", "wc_weaknesses": "185;143;225;135", "wc_questions": "117;24;44;67", "wc_limitations": "12;10;15;1", "wc_review": "469;359;361;309", "wc_reply_reviewers": "0;79;16;10", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 76.25, 40.195615432531945 ], "wc_strengths_avg": [ 53.75, 25.43005112067217 ], "wc_weaknesses_avg": [ 172.0, 36.013886210738214 ], "wc_questions_avg": [ 63.0, 34.69149751740331 ], "wc_limitations_avg": [ 9.5, 5.220153254455275 ], "wc_review_avg": [ 374.5, 58.401626689673634 ], "wc_reply_reviewers_avg": [ 26.25, 30.986892390170397 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.7071067811865475, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:mJsnu8_Yi-MJ:scholar.google.com/&scioq=Sequential+Decision+Making+with+Expert+Demonstrations+under+Unobserved+Heterogeneity&hl=en&as_sdt=0,33", "gs_version_total": 5, "email": "stanford.edu;cs.toronto.edu;cs.toronto.edu;stanford.edu;cs.toronto.edu", "author_num": 5, "aff_unique_index": "0;1;1;0;1", "aff_unique_norm": "Stanford University;University of Toronto", "aff_unique_dep": ";Department of Computer Science", "aff_unique_url": "https://www.stanford.edu;https://www.utoronto.ca", "aff_unique_abbr": "Stanford;U of T", "aff_campus_unique_index": "0;1;1;0;1", "aff_campus_unique": "Stanford;Toronto", "aff_country_unique_index": "0;1;1;0;1", "aff_country_unique": "United States;Canada" }, { "title": "Instruction-Guided Visual Masking", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94441", "id": "cA9gLXFaRo", "proceeding": "", "pdf": "https://openreview.net/pdf?id=cA9gLXFaRo", "openreview": "https://openreview.net/forum?id=cA9gLXFaRo", "poster": "", "project": "", "author_site": "Jinliang Zheng, Jianxiong Li, Sijie Cheng, Yinan Zheng, Jiaming Li, Jihao Liu, Yu Liu, Jingjing Liu, Xianyuan Zhan", "tldr": "", "abstract": "Instruction following is crucial in contemporary LLM. However, when extended to multimodal setting, it often suffers from misalignment between specific textual instruction and targeted local region of an image. To achieve more accurate and nuanced multimodal instruction following, we introduce Instruction-guided Visual Masking (IVM), a new versatile visual grounding model that is compatible with diverse multimodal models, such as LMM and robot model. By constructing visual masks for instruction-irrelevant regions, IVM-enhanced multimodal models can effectively focus on task-relevant image regions to better align with complex instructions. Specifically, we design a visual masking data generation pipeline and create an IVM-Mix-1M dataset with 1 million image-instruction pairs. We further introduce a new learning technique, Discriminator Weighted Supervised Learning (DWSL) for preferential IVM training that prioritizes high-quality data samples. Experimental results on generic multimodal tasks such as VQA and embodied robotic control demonstrate the versatility of IVM, which as a plug-and-play tool, significantly boosts the performance of diverse multimodal models, yielding new state-of-the-art results across challenging multimodal benchmarks. Code, model and data are available at https://github.com/2toinf/IVM.", "keywords": "MultiModal Instruction Following;Visual Grounding;Large MultiModal Models;Embodied AI", "primary_area": "generative_models", "supplementary_material": "/attachment/931ef549c761e32f7c550b8d5dc0d6bc3d30f629.zip", "author": "Jinliang Zheng;Jianxiong Li;Sijie Cheng;Yinan Zheng;Jiaming Li;Jihao Liu;Yu Liu;Jingjing Liu;Xianyuan Zhan", "authorids": "~Jinliang_Zheng1;~Jianxiong_Li1;~Sijie_Cheng1;~Yinan_Zheng1;~Jiaming_Li9;~Jihao_Liu4;~Yu_Liu2;~Jingjing_Liu2;~Xianyuan_Zhan1", "gender": "M;F;;M;M;M;;M;M", "homepage": "https://2toinf.github.io/;https://adacheng.github.io/;https://github.com/ZhengYinan-AIR;;https://jihaonew.github.io/;http://liuyu.us;https://air.tsinghua.edu.cn/en/info/1046/1194.htm#:~:text=Jingjing%20Liu%20is%20Professor%2C%20Principal,CVPR%2C%20ACL%2C%20etc.);http://zhanxianyuan.xyz/;https://facebear-ljx.github.io/", "dblp": "156/3720.html;160/7320;;;167/0509;97/2274-15;30/3008-1;181/5081;43/1987", "google_scholar": "3j5AHFsAAAAJ;pruwctkAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;PP1HyToAAAAJ;;BzJ_GboAAAAJ;pDMnGloAAAAJ;TRLwpiUAAAAJ", "orcid": "0009-0000-0605-2969;;;;;;;0000-0002-3683-0554;", "linkedin": ";;;;;;jingjing-liu-65703431/;;", "or_profile": "~Jinliang_Zheng1;~Sijie_Cheng1;~Yinan_Zheng1;~Jiaming_Li9;~Jihao_Liu4;~Yu_Liu2;~Jingjing_Liu2;~Xianyuan_Zhan1;~Li_Jianxiong1", "aff": "Sensetime Research;Tsinghua University;Tsinghua University;Tsinghua University;The Chinese University of Hong Kong;SenseTime;Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "sensetime.com;mails.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;cuhk.edu.hk;sensetime.com;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": "Intern;PhD student;PhD student;Undergrad student;PhD student;Principal Researcher;Full Professor;Associate Professor;PhD student", "bibtex": "@inproceedings{\nzheng2024instructionguided,\ntitle={Instruction-Guided Visual Masking},\nauthor={Jinliang Zheng and Jianxiong Li and Sijie Cheng and Yinan Zheng and Jiaming Li and Jihao Liu and Yu Liu and Jingjing Liu and Xianyuan Zhan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=cA9gLXFaRo}\n}", "github": "", "reviewers": "zLHv;EbuK;5SdR;bwCu", "pdf_size": 25680224, "rating": "6;6;6;8", "confidence": "4;4;3;5", "soundness": "3;4;3;3", "novelty": "3;3;2;4", "presentation": "3;2;3;3", "wc_summary": "62;56;64;104", "wc_strengths": "36;59;111;65", "wc_weaknesses": "38;28;115;30", "wc_questions": "560;100;71;2", "wc_limitations": "2;7;5;1", "wc_review": "698;250;366;202", "wc_reply_reviewers": "92;38;0;10", "wc_reply_authors": "384;504;0;16", "reply_reviewers": "2;1;0;1", "reply_authors": "2;3;1;2", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 71.5, 18.993419913222578 ], "wc_strengths_avg": [ 67.75, 27.215574585152524 ], "wc_weaknesses_avg": [ 52.75, 36.134298111351214 ], "wc_questions_avg": [ 183.25, 220.41027085868754 ], "wc_limitations_avg": [ 3.75, 2.384848003542364 ], "wc_review_avg": [ 379.0, 193.5846068260594 ], "wc_reply_reviewers_avg": [ 35.0, 35.73513677041127 ], "wc_reply_authors_avg": [ 226.0, 222.1621029788834 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=653927475643961717&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "sensetime.com;mails.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;cuhk.edu.hk;sensetime.com;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 9, "aff_unique_index": "0;1;1;1;2;0;1;1;1", "aff_unique_norm": "SenseTime;Tsinghua University;Chinese University of Hong Kong", "aff_unique_dep": "Research;;", "aff_unique_url": "https://www.sensetime.com/;https://www.tsinghua.edu.cn;https://www.cuhk.edu.hk", "aff_unique_abbr": "SenseTime;THU;CUHK", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Multivariate Probabilistic Time Series Forecasting with Correlated Errors", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94440", "id": "cAFvxVFaii", "proceeding": "", "pdf": "https://openreview.net/pdf?id=cAFvxVFaii", "openreview": "https://openreview.net/forum?id=cAFvxVFaii", "poster": "/media/PosterPDFs/NeurIPS%202024/94440.png?t=1731014574.5072656", "project": "", "author_site": "Zhihao Zheng, Lijun Sun", "tldr": "", "abstract": "Accurately modeling the correlation structure of errors is critical for reliable uncertainty quantification in probabilistic time series forecasting. While recent deep learning models for multivariate time series have developed efficient parameterizations for time-varying contemporaneous covariance, but they often assume temporal independence of errors for simplicity. However, real-world data often exhibit significant error autocorrelation and cross-lag correlation due to factors such as missing covariates. In this paper, we introduce a plug-and-play method that learns the covariance structure of errors over multiple steps for autoregressive models with Gaussian-distributed errors. To ensure scalable inference and computational efficiency, we model the contemporaneous covariance using a low-rank-plus-diagonal parameterization and capture cross-covariance through a group of independent latent temporal processes. The learned covariance matrix is then used to calibrate predictions based on observed residuals. We evaluate our method on probabilistic models built on RNNs and Transformer architectures, and the results confirm the effectiveness of our approach in improving predictive accuracy and uncertainty quantification without significantly increasing the parameter size.", "keywords": "probabilistic forecasting;time series;error correlation", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Vincent Zhihao Zheng;Lijun Sun", "authorids": "~Vincent_Zhihao_Zheng1;~Lijun_Sun1", "gender": ";", "homepage": ";", "dblp": ";", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": ";", "aff": ";", "aff_domain": ";", "position": ";", "bibtex": "@inproceedings{\nzheng2024multivariate,\ntitle={Multivariate Probabilistic Time Series Forecasting with Correlated Errors},\nauthor={Vincent Zhihao Zheng and Lijun Sun},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=cAFvxVFaii}\n}", "github": "", "reviewers": "RQB7;szpP;uzY6;Vi7g", "pdf_size": 8249627, "rating": "6;7;7;8", "confidence": "3;4;4;4", "soundness": "3;3;4;3", "novelty": "4;3;4;3", "presentation": "3;4;4;3", "wc_summary": "53;142;163;104", "wc_strengths": "27;72;88;74", "wc_weaknesses": "62;80;152;3", "wc_questions": "128;576;113;76", "wc_limitations": "8;1;28;3", "wc_review": "278;871;544;260", "wc_reply_reviewers": "18;70;96;14", "wc_reply_authors": "9;74;55;13", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 115.5, 41.82403615147634 ], "wc_strengths_avg": [ 65.25, 22.92787604642 ], "wc_weaknesses_avg": [ 74.25, 53.16189895028205 ], "wc_questions_avg": [ 223.25, 204.5377410161753 ], "wc_limitations_avg": [ 10.0, 10.700467279516348 ], "wc_review_avg": [ 488.25, 247.94593664748774 ], "wc_reply_reviewers_avg": [ 49.5, 34.767082132384935 ], "wc_reply_authors_avg": [ 37.75, 27.616797424755827 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6123357191486399407&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": ";", "author_num": 2 }, { "title": "AnyFit: Controllable Virtual Try-on for Any Combination of Attire Across Any Scenario", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94439", "id": "cARFM6KKlE", "proceeding": "", "pdf": "https://openreview.net/pdf?id=cARFM6KKlE", "openreview": "https://openreview.net/forum?id=cARFM6KKlE", "poster": "/media/PosterPDFs/NeurIPS%202024/94439.png?t=1733818888.6927507", "project": "", "author_site": "Yuhan Li, Hao Zhou, Wenxiang Shang, Ran Lin, Xuanhong Chen, Bingbing Ni", "tldr": "", "abstract": "While image-based virtual try-on has made significant strides, emerging approaches still fall short of delivering high-fidelity and robust fitting images across various scenarios, as their models suffer from issues of ill-fitted garment styles and quality degrading during the training process, not to mention the lack of support for various combinations of attire. Therefore, we first propose a lightweight, scalable, operator known as Hydra Block for attire combinations. This is achieved through a parallel attention mechanism that facilitates the feature injection of multiple garments from conditionally encoded branches into the main network. Secondly, to significantly enhance the model's robustness and expressiveness in real-world scenarios, we evolve its potential across diverse settings by synthesizing the residuals of multiple models, as well as implementing a mask region boost strategy to overcome the instability caused by information leakage in existing models. \nEquipped with the above design, AnyFit surpasses all baselines on high-resolution benchmarks and real-world data by a large gap, excelling in producing well-fitting garments replete with photorealistic and rich details. Furthermore, AnyFit\u2019s impressive performance on high-fidelity virtual try-ons in any scenario from any image, paves a new path for future research within the fashion community.", "keywords": "Virtual try-on;Diffusion models;Multiple conditions", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/9d41f90a3d3074c8f0d8ea3a1e29444700786048.zip", "author": "Yuhan Li;Hao Zhou;Wenxiang Shang;Ran Lin;Xuanhong Chen;Bingbing Ni", "authorids": "~Yuhan_Li2;~Hao_Zhou30;~Wenxiang_Shang1;~Ran_Lin1;~Xuanhong_Chen1;~Bingbing_Ni3", "gender": "M;M;M;M;M;M", "homepage": ";;https://github.com/neuralchen;;https://work.alibaba-inc.com/nwpipe/u/?spm=a1z2e.12184483.1999118341.5.38e1d2087cwEEK;https://github.com/shangwenxiang", "dblp": ";;255/6337;64/831.html;;", "google_scholar": "https://scholar.google.com.hk/citations?user=TDKTTlQAAAAJ;https://scholar.google.com.hk/citations?user=EUx0jJEAAAAJ;UuCqlfEAAAAJ;V9W87PYAAAAJ;;", "orcid": ";0000-0002-0173-0393;;;;", "linkedin": ";;;;;", "or_profile": "~Yuhan_Li2;~Hao_Zhou30;~Xuanhong_Chen1;~Bingbing_Ni3;~Lin_Ran1;~Shangwenxiang1", "aff": "Shanghai Jiaotong University;Alibaba Group;Shanghai Jiaotong University;Shanghai Jiaotong University;Alibaba Group;Taotian, Alibaba Group", "aff_domain": "sjtu.edu.cn;alibaba-inc.com;sjtu.edu.cn;sjtu.edu.cn;alibaba-inc.com;taobao.com", "position": "PhD student;Researcher;PhD student;Full Professor;Researcher;Algorithm Engneer", "bibtex": "@inproceedings{\nli2024anyfit,\ntitle={AnyFit: Controllable Virtual Try-on for Any Combination of Attire Across Any Scenario},\nauthor={Yuhan Li and Hao Zhou and Wenxiang Shang and Ran Lin and Xuanhong Chen and Bingbing Ni},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=cARFM6KKlE}\n}", "github": "", "reviewers": "v6TS;KSKd;Xer8;vsbp", "pdf_size": 0, "rating": "5;5;7;7", "confidence": "3;5;5;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;2;3", "wc_summary": "51;88;65;85", "wc_strengths": "47;72;85;72", "wc_weaknesses": "75;88;62;39", "wc_questions": "95;56;513;35", "wc_limitations": "1;38;10;30", "wc_review": "269;342;735;261", "wc_reply_reviewers": "20;13;48;15", "wc_reply_authors": "218;198;221;64", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 72.25, 15.122417134836613 ], "wc_strengths_avg": [ 69.0, 13.765899897936205 ], "wc_weaknesses_avg": [ 66.0, 18.096961070853858 ], "wc_questions_avg": [ 174.75, 196.47184912857108 ], "wc_limitations_avg": [ 19.75, 14.872373717735847 ], "wc_review_avg": [ 401.75, 194.97355589925522 ], "wc_reply_reviewers_avg": [ 24.0, 14.089002803605371 ], "wc_reply_authors_avg": [ 175.25, 64.83585042243219 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4379399074240152350&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "sjtu.edu.cn;alibaba-inc.com;sjtu.edu.cn;sjtu.edu.cn;alibaba-inc.com;taobao.com", "author_num": 6, "aff_unique_index": "0;1;0;0;1;1", "aff_unique_norm": "Shanghai Jiao Tong University;Alibaba Group", "aff_unique_dep": ";", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.alibaba.com", "aff_unique_abbr": "SJTU;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "A Unified Principle of Pessimism for Offline Reinforcement Learning under Model Mismatch", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94438", "id": "cBY66CKEbq", "proceeding": "", "pdf": "https://openreview.net/pdf?id=cBY66CKEbq", "openreview": "https://openreview.net/forum?id=cBY66CKEbq", "poster": "", "project": "", "author_site": "Yue Wang, Zhongchang Sun, Shaofeng Zou", "tldr": "", "abstract": "In this paper, we address the challenges of offline reinforcement learning (RL) under model mismatch, where the agent aims to optimize its performance through an offline dataset that may not accurately represent the deployment environment. We identify two primary challenges under the setting: inaccurate model estimation due to limited data and performance degradation caused by the model mismatch between the dataset-collecting environment and the target deployment one. To tackle these issues, we propose a unified principle of pessimism using distributionally robust Markov decision processes. We carefully construct a robust MDP with a single uncertainty set to tackle both data sparsity and model mismatch, and demonstrate that the optimal robust policy enjoys a near-optimal sub-optimality gap under the target environment across three widely used uncertainty models: total variation, $\\chi^2$ divergence, and KL divergence. Our results improve upon or match the state-of-the-art performance under the total variation and KL divergence models, and provide the first result for the $\\chi^2$ divergence model.", "keywords": "reinforcement learning;offline;robust", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Yue Wang;Zhongchang Sun;Shaofeng Zou", "authorids": "~Yue_Wang16;~Zhongchang_Sun1;~Shaofeng_Zou1", "gender": ";;", "homepage": "https://sites.google.com/view/ywangub;;", "dblp": "33/4822-68;;", "google_scholar": "ndMi_z8AAAAJ;https://scholar.google.com/citations?view_op=list_works;", "orcid": "0009-0001-9786-052X;;", "linkedin": ";;", "or_profile": "~Yue_Wang16;~Zhongchang_Sun1;~Shaofeng_Zou1", "aff": "University of Central Florida;State University of New York at Buffalo;", "aff_domain": "ucf.edu;buffalo.edu;", "position": "Assistant Professor;PhD student;", "bibtex": "@inproceedings{\nwang2024a,\ntitle={A Unified Principle of Pessimism for Offline Reinforcement Learning under Model Mismatch},\nauthor={Yue Wang and Zhongchang Sun and Shaofeng Zou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=cBY66CKEbq}\n}", "github": "", "reviewers": "3D3w;cT4V;TydZ", "pdf_size": 902919, "rating": "6;6;7", "confidence": "3;3;3", "soundness": "3;3;3", "novelty": "3;2;3", "presentation": "3;2;3", "wc_summary": "227;40;81", "wc_strengths": "79;119;47", "wc_weaknesses": "79;134;79", "wc_questions": "24;1;43", "wc_limitations": "8;9;1", "wc_review": "417;303;251", "wc_reply_reviewers": "40;18;35", "wc_reply_authors": "32;85;32", "reply_reviewers": "1;1;1", "reply_authors": "2;3;2", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 116.0, 80.25376419001583 ], "wc_strengths_avg": [ 81.66666666666667, 29.454296045832695 ], "wc_weaknesses_avg": [ 97.33333333333333, 25.92724864350674 ], "wc_questions_avg": [ 22.666666666666668, 17.172329163188344 ], "wc_limitations_avg": [ 6.0, 3.559026084010437 ], "wc_review_avg": [ 323.6666666666667, 69.32692278056355 ], "wc_reply_reviewers_avg": [ 31.0, 9.41629792788369 ], "wc_reply_authors_avg": [ 49.666666666666664, 24.98443960192468 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:yz37elgGG4QJ:scholar.google.com/&scioq=A+Unified+Principle+of+Pessimism+for+Offline+Reinforcement+Learning+under+Model+Mismatch&hl=en&as_sdt=0,33", "gs_version_total": 0, "email": "ucf.edu;buffalo.edu;", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "University of Central Florida;State University of New York at Buffalo", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucf.edu;https://www.buffalo.edu", "aff_unique_abbr": "UCF;SUNY Buffalo", "aff_campus_unique_index": "1", "aff_campus_unique": ";Buffalo", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Nearly Tight Black-Box Auditing of Differentially Private Machine Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94437", "id": "cCDMXXiamP", "proceeding": "", "pdf": "https://openreview.net/pdf?id=cCDMXXiamP", "openreview": "https://openreview.net/forum?id=cCDMXXiamP", "poster": "", "project": "", "author_site": "Meenatchi Sundaram Muthu Selva Annamalai, Emiliano De Cristofaro", "tldr": "", "abstract": "This paper presents an auditing procedure for the Differentially Private Stochastic Gradient Descent (DP-SGD) algorithm in the black-box threat model that is substantially tighter than prior work.\nThe main intuition is to craft worst-case initial model parameters, as DP-SGD's privacy analysis is agnostic to the choice of the initial model parameters.\nFor models trained on MNIST and CIFAR-10 at theoretical $\\varepsilon=10.0$, our auditing procedure yields empirical estimates of $\\varepsilon_{emp} = 7.21$ and $6.95$, respectively, on a 1,000-record sample and $\\varepsilon_{emp} = 6.48$ and $4.96$ on the full datasets.\nBy contrast, previous audits were only (relatively) tight in stronger white-box models, where the adversary can access the model's inner parameters and insert arbitrary gradients.\nOverall, our auditing procedure can offer valuable insight into how the privacy analysis of DP-SGD could be improved and detect bugs and DP violations in real-world implementations.\nThe source code needed to reproduce our experiments is available from https://github.com/spalabucr/bb-audit-dpsgd.", "keywords": "Privacy Auditing;Differential Privacy;DP-SGD", "primary_area": "privacy", "supplementary_material": "", "author": "Meenatchi Sundaram Muthu Selva Annamalai;Emiliano De Cristofaro", "authorids": "~Meenatchi_Sundaram_Muthu_Selva_Annamalai1;~Emiliano_De_Cristofaro1", "gender": ";", "homepage": "https://msundarmsa.github.io;https://emilianodc.com", "dblp": "303/3273;36/6225", "google_scholar": "zYVEyL4AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-6452-9865;", "linkedin": ";", "or_profile": "~Meenatchi_Sundaram_Muthu_Selva_Annamalai1;~Emiliano_De_Cristofaro1", "aff": "University College London, University of London;University of California, Riverside", "aff_domain": "ucl.ac.uk;ucr.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nannamalai2024nearly,\ntitle={Nearly Tight Black-Box Auditing of Differentially Private Machine Learning},\nauthor={Meenatchi Sundaram Muthu Selva Annamalai and Emiliano De Cristofaro},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=cCDMXXiamP}\n}", "github": "", "reviewers": "JVDo;uyKT;ncD1;qSii", "pdf_size": 1949834, "rating": "5;6;6;6", "confidence": "3;3;3;4", "soundness": "2;4;4;3", "novelty": "2;3;2;3", "presentation": "3;3;3;3", "wc_summary": "88;223;50;43", "wc_strengths": "42;148;55;34", "wc_weaknesses": "78;193;355;166", "wc_questions": "280;51;313;83", "wc_limitations": "17;1;4;22", "wc_review": "505;616;777;348", "wc_reply_reviewers": "18;84;0;0", "wc_reply_authors": "19;33;44;44", "reply_reviewers": "1;1;0;0", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 101.0, 72.48793002976427 ], "wc_strengths_avg": [ 69.75, 45.79505977722925 ], "wc_weaknesses_avg": [ 198.0, 100.12242506052279 ], "wc_questions_avg": [ 181.75, 115.89515736216073 ], "wc_limitations_avg": [ 11.0, 8.74642784226795 ], "wc_review_avg": [ 561.5, 156.67242897204346 ], "wc_reply_reviewers_avg": [ 25.5, 34.565155865408734 ], "wc_reply_authors_avg": [ 35.0, 10.27131929208707 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3325949267793832739&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "ucl.ac.uk;ucr.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "University College London;University of California, Riverside", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucl.ac.uk;https://www.ucr.edu", "aff_unique_abbr": "UCL;UCR", "aff_campus_unique_index": "1", "aff_campus_unique": ";Riverside", "aff_country_unique_index": "0;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "Optimal Design for Human Preference Elicitation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94436", "id": "cCGWj61Ael", "proceeding": "", "pdf": "https://openreview.net/pdf?id=cCGWj61Ael", "openreview": "https://openreview.net/forum?id=cCGWj61Ael", "poster": "/media/PosterPDFs/NeurIPS%202024/94436.png?t=1733697282.824555", "project": "", "author_site": "Subhojyoti Mukherjee, Anusha Lalitha, Kousha Kalantari, Aniket Anand Deshmukh, Ge Liu, Yifei Ma, Branislav Kveton", "tldr": "", "abstract": "Learning of preference models from human feedback has been central to recent advances in artificial intelligence. Motivated by the cost of obtaining high-quality human annotations, we study efficient human preference elicitation for learning preference models. The key idea in our work is to generalize optimal designs, an approach to computing optimal information-gathering policies, to lists of items that represent potential questions with answers. The policy is a distribution over the lists and we elicit preferences from them proportionally to their probabilities. To show the generality of our ideas, we study both absolute and ranking feedback models on items in the list. We design efficient algorithms for both and analyze them. Finally, we demonstrate that our algorithms are practical by evaluating them on existing question-answering problems.", "keywords": "optimal design;active learning;preference elicitation;human feedback;learning to rank", "primary_area": "active_learning", "supplementary_material": "", "author": "Subhojyoti Mukherjee;Anusha Lalitha;Kousha Kalantari;Aniket Anand Deshmukh;Ge Liu;Yifei Ma;Branislav Kveton", "authorids": "~Subhojyoti_Mukherjee1;~Anusha_Lalitha2;~Kousha_Kalantari1;~Aniket_Anand_Deshmukh1;~Ge_Liu2;~Yifei_Ma1;~Branislav_Kveton1", "gender": "M;F;M;M;F;;M", "homepage": "https://subhojyoti.github.io/;;;http://www-personal.umich.edu/~aniketde/;http://www.mit.edu/~geliu/;http://yma.io;http://www.bkveton.com", "dblp": "199/2032;153/2158;;;;;92/5526", "google_scholar": ";fMHs-NYAAAAJ;xTbxgOYAAAAJ;a4cD32QAAAAJ;P6EahzcAAAAJ;ZVMcyxYAAAAJ;CZaDvPgAAAAJ", "orcid": ";;0000-0002-7327-8105;;0000-0001-9383-5186;;", "linkedin": ";anushalalitha/;kalantari/;aniket2305/;;yifei-ma-48503620;", "or_profile": "~Subhojyoti_Mukherjee1;~Anusha_Lalitha2;~Kousha_Kalantari1;~Aniket_Anand_Deshmukh1;~Ge_Liu2;~Yifei_Ma1;~Branislav_Kveton1", "aff": "University of Wisconsin, Madison;Amazon;California Institute of Technology;Amazon;University of Washington;Amazon;Amazon", "aff_domain": "wisc.edu;amazon.com;caltech.edu;amazon.com;uw.edu;amazon.com;amazon.com", "position": "PhD student;Applied Scientist;PhD student;Applied Scientist;Postdoc;Applied Scientist;Principal Scientist", "bibtex": "@inproceedings{\nmukherjee2024optimal,\ntitle={Optimal Design for Human Preference Elicitation},\nauthor={Subhojyoti Mukherjee and Anusha Lalitha and Kousha Kalantari and Aniket Anand Deshmukh and Ge Liu and Yifei Ma and Branislav Kveton},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=cCGWj61Ael}\n}", "github": "", "reviewers": "f8gc;Chgi;sgSH;HJve", "pdf_size": 456636, "rating": "5;5;7;7", "confidence": "3;3;3;3", "soundness": "3;3;4;4", "novelty": "3;2;3;3", "presentation": "3;3;4;3", "wc_summary": "42;138;69;126", "wc_strengths": "44;106;99;37", "wc_weaknesses": "374;235;427;110", "wc_questions": "2;75;119;1", "wc_limitations": "1;26;4;1", "wc_review": "463;580;718;275", "wc_reply_reviewers": "623;103;129;0", "wc_reply_authors": "1833;748;9;0", "reply_reviewers": "2;2;1;0", "reply_authors": "4;4;2;1", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 93.75, 39.65081966365891 ], "wc_strengths_avg": [ 71.5, 31.196954979612993 ], "wc_weaknesses_avg": [ 286.5, 123.69417932950604 ], "wc_questions_avg": [ 49.25, 50.22138488731668 ], "wc_limitations_avg": [ 8.0, 10.464224768228174 ], "wc_review_avg": [ 509.0, 162.47615209623842 ], "wc_reply_reviewers_avg": [ 213.75, 241.15490353712488 ], "wc_reply_authors_avg": [ 647.5, 748.7404423430058 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5758236017764283389&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "wisc.edu;amazon.com;caltech.edu;amazon.com;uw.edu;amazon.com;amazon.com", "author_num": 7, "aff_unique_index": "0;1;2;1;3;1;1", "aff_unique_norm": "University of Wisconsin;Amazon;California Institute of Technology;University of Washington", "aff_unique_dep": ";Amazon.com, Inc.;;", "aff_unique_url": "https://www.wisc.edu;https://www.amazon.com;https://www.caltech.edu;https://www.washington.edu", "aff_unique_abbr": "UW;Amazon;Caltech;UW", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Madison;;Pasadena", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "ParallelEdits: Efficient Multi-Aspect Text-Driven Image Editing with Attention Grouping", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94435", "id": "cCL92OPlDz", "proceeding": "", "pdf": "https://openreview.net/pdf?id=cCL92OPlDz", "openreview": "https://openreview.net/forum?id=cCL92OPlDz", "poster": "", "project": "", "author_site": "Mingzhen Huang, Jialing Cai, Shan Jia, Vishnu Lokhande, Siwei Lyu", "tldr": "", "abstract": "Text-driven image synthesis has made significant advancements with the development of diffusion models, transforming how visual content is generated from text prompts. Despite these advances, text-driven image editing, a key area in computer graphics, faces unique challenges. A major challenge is making simultaneous edits across multiple objects or attributes. Applying these methods sequentially for multi-attribute edits increases computational demands and efficiency losses. \n In this paper, we address these challenges with significant contributions. Our main contribution is the development of ParallelEdits, a method that seamlessly manages simultaneous edits across multiple attributes. In contrast to previous approaches, ParallelEdits not only preserves the quality of single attribute edits but also significantly improves the performance of multitasking edits. This is achieved through innovative attention distribution mechanism and multi-branch design that operates across several processing heads. \n Additionally, we introduce the PIE-Bench++ dataset, an expansion of the original PIE-Bench dataset, to better support evaluating image-editing tasks involving multiple objects and attributes simultaneously. This dataset is a benchmark for evaluating text-driven image editing methods in multifaceted scenarios.", "keywords": "Diffusion Models;Image Editing; Multi-aspect editing;consistency models", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Mingzhen Huang;Jialing Cai;Shan Jia;Vishnu Suresh Lokhande;Siwei Lyu", "authorids": "~Mingzhen_Huang2;~Jialing_Cai1;~Shan_Jia1;~Vishnu_Suresh_Lokhande1;~Siwei_Lyu1", "gender": "M;F;F;;M", "homepage": "https://mingzhen-huang.github.io/;https://jialingyk.github.io/;https://shanface33.github.io/;;https://www.cse.buffalo.edu/~siweilyu", "dblp": "274/2202;;176/3600;;51/4482", "google_scholar": "https://scholar.google.com/citations?hl=en;6LwYYN0AAAAJ;YTAkshQAAAAJ;;wefAEM4AAAAJ", "orcid": ";;;;0000-0002-0992-685X", "linkedin": ";;;;siwei-lyu-0806022/", "or_profile": "~Mingzhen_Huang2;~Jialing_Cai1;~Shan_Jia1;~Vishnu_Suresh_Lokhande1;~Siwei_Lyu1", "aff": "Meta Facebook;State University of New York at Buffalo;State University of New York at Buffalo;;State University of New York, Buffalo", "aff_domain": "meta.com;buffalo.edu;buffalo.edu;;buffalo.edu", "position": "Intern;PhD student;Postdoc;;Full Professor", "bibtex": "@inproceedings{\nhuang2024paralleledits,\ntitle={ParallelEdits: Efficient Multi-Aspect Text-Driven Image Editing with Attention Grouping},\nauthor={Mingzhen Huang and Jialing Cai and Shan Jia and Vishnu Suresh Lokhande and Siwei Lyu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=cCL92OPlDz}\n}", "github": "", "reviewers": "J7sk;8ae7;6th7", "pdf_size": 6083310, "rating": "5;6;6", "confidence": "5;5;3", "soundness": "3;3;3", "novelty": "3;2;3", "presentation": "2;2;3", "wc_summary": "43;40;43", "wc_strengths": "28;31;58", "wc_weaknesses": "146;339;78", "wc_questions": "19;114;5", "wc_limitations": "2;1;11", "wc_review": "238;525;195", "wc_reply_reviewers": "0;261;112", "wc_reply_authors": "88;427;261", "reply_reviewers": "0;2;2", "reply_authors": "2;2;3", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.333333333333333, 0.9428090415820634 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 42.0, 1.4142135623730951 ], "wc_strengths_avg": [ 39.0, 13.490737563232042 ], "wc_weaknesses_avg": [ 187.66666666666666, 110.55114452395124 ], "wc_questions_avg": [ 46.0, 48.42175819470692 ], "wc_limitations_avg": [ 4.666666666666667, 4.496912521077347 ], "wc_review_avg": [ 319.3333333333333, 146.4839771290893 ], "wc_reply_reviewers_avg": [ 124.33333333333333, 106.90909949838488 ], "wc_reply_authors_avg": [ 258.6666666666667, 138.40600500781107 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.9428090415820634 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2793024948906295220&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "meta.com;buffalo.edu;buffalo.edu;;buffalo.edu", "author_num": 5, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Meta;State University of New York at Buffalo", "aff_unique_dep": "Meta Platforms, Inc.;", "aff_unique_url": "https://meta.com;https://www.buffalo.edu", "aff_unique_abbr": "Meta;SUNY Buffalo", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Buffalo", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "cCOpatbXFU", "title": "Investigating Variance Definitions for Stochastic Mirror Descent with Relative Smoothness", "track": "main", "status": "Reject", "tldr": "", "abstract": "Mirror Descent is a popular algorithm, that extends Gradients Descent (GD) beyond the Euclidean geometry. One of its benefits is to enable strong convergence guarantees through smooth-like analyses, even for objectives with exploding or vanishing curvature. This is achieved through the introduction of the notion of relative smoothness, which holds in many of the common use-cases of Mirror descent. While basic deterministic results extend well to the relative setting, most existing stochastic analyses require additional assumptions on the mirror, such as strong convexity (in the usual sense), to ensure bounded variance. In this work, we revisit Stochastic Mirror Descent (SMD) proofs in the (relatively-strongly-) convex and relatively-smooth setting, and introduce a new (less restrictive) definition of variance which can generally be bounded (globally) under mild regularity assumptions. We then investigate this notion in more details, and show that it naturally leads to strong convergence guarantees for stochastic mirror descent. Finally, we leverage this new analysis to obtain convergence guarantees for the Maximum A Posteriori estimator of a Gaussian with unknown mean and variance.", "keywords": "Mirror Descent;Bregman;Stochastic", "primary_area": "optimization", "supplementary_material": "", "author": "Hadrien Hendrikx", "authorids": "~Hadrien_Hendrikx1", "gender": "M", "homepage": "https://www.di.ens.fr/~hendrikx/", "dblp": "199/2214", "google_scholar": "7saBgggAAAAJ", "orcid": "", "linkedin": "", "or_profile": "~Hadrien_Hendrikx1", "aff": "INRIA", "aff_domain": "inria.fr", "position": "Researcher", "bibtex": "@misc{\nanonymous2024investigating,\ntitle={Investigating Variance Definitions for Stochastic Mirror Descent with Relative Smoothness},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=cCOpatbXFU}\n}", "github": "", "project": "", "reviewers": "6ts5;cHke;HwBA;H5hK;35cP", "site": "https://openreview.net/forum?id=cCOpatbXFU", "pdf_size": 387073, "rating": "4;5;5;6;6", "confidence": "3;4;3;4;4", "soundness": "2;3;3;4;2", "novelty": "2;3;2;3;3", "presentation": "3;3;2;4;1", "wc_summary": "126;125;74;34;147", "wc_strengths": "81;99;127;77;157", "wc_weaknesses": "1;201;355;170;269", "wc_questions": "45;6;279;8;1073", "wc_limitations": "1;1;22;1;33", "wc_review": "254;432;857;290;1679", "wc_reply_reviewers": "0;21;425;230;822", "wc_reply_authors": "0;0;670;532;484", "reply_reviewers": "0;1;1;1;2", "reply_authors": "1;1;2;2;3", "rating_avg": [ 5.2, 0.7483314773547882 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 1.019803902718557 ], "wc_summary_avg": [ 101.2, 41.31537244174376 ], "wc_strengths_avg": [ 108.2, 30.1091348264941 ], "wc_weaknesses_avg": [ 199.2, 117.71558945186487 ], "wc_questions_avg": [ 282.2, 408.19377751259265 ], "wc_limitations_avg": [ 11.6, 13.440238093129153 ], "wc_review_avg": [ 702.4, 533.2731382696863 ], "wc_reply_reviewers_avg": [ 299.6, 303.5948616165959 ], "wc_reply_authors_avg": [ 337.2, 282.01446771398093 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.7637626158259733, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:w8x1Dm_VOgoJ:scholar.google.com/&scioq=Investigating+Variance+Definitions+for+Stochastic+Mirror+Descent+with+Relative+Smoothness&hl=en&as_sdt=0,5", "gs_version_total": 2, "aff_unique_index": "0", "aff_unique_norm": "INRIA", "aff_unique_dep": "", "aff_unique_url": "https://www.inria.fr", "aff_unique_abbr": "INRIA", "aff_country_unique_index": "0", "aff_country_unique": "France" }, { "title": "Convolutions and More as Einsum: A Tensor Network Perspective with Advances for Second-Order Methods", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94434", "id": "cDS8WxnMVP", "proceeding": "", "pdf": "https://openreview.net/pdf?id=cDS8WxnMVP", "openreview": "https://openreview.net/forum?id=cDS8WxnMVP", "poster": "", "project": "", "tldr": "", "abstract": "Despite their simple intuition, convolutions are more tedious to analyze than dense layers, which complicates the transfer of theoretical and algorithmic ideas to convolutions. We simplify convolutions by viewing them as tensor networks (TNs) that allow reasoning about the underlying tensor multiplications by drawing diagrams, manipulating them to perform function transformations like differentiation, and efficiently evaluating them with `einsum`. To demonstrate their simplicity and expressiveness, we derive diagrams of various autodiff operations and popular curvature approximations with full hyper-parameter support, batching, channel groups, and generalization to any convolution dimension. Further, we provide convolution-specific transformations based on the connectivity pattern which allow to simplify diagrams before evaluation. Finally, we probe performance. Our TN implementation accelerates a recently-proposed KFAC variant up to 4.5 x while removing the standard implementation's memory overhead, and enables new hardware-efficient tensor dropout for approximate backpropagation.", "keywords": "Tensor networks;convolutions;KFAC;einsum;Second-order optimization", "primary_area": "optimization", "supplementary_material": "", "author": "Felix Dangel", "authorids": "~Felix_Dangel1", "gender": "M", "homepage": "https://f-dangel.com", "dblp": "236/4218", "google_scholar": "9hlJ9W0AAAAJ", "orcid": "0000-0002-1414-8554", "linkedin": "", "or_profile": "~Felix_Dangel1", "aff": "Vector Institute, Toronto", "aff_domain": "vectorinstitute.ai", "position": "Postdoc", "bibtex": "@inproceedings{\ndangel2024convolutions,\ntitle={Convolutions and More as Einsum: A Tensor Network Perspective with Advances for Second-Order Methods},\nauthor={Felix Dangel},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=cDS8WxnMVP}\n}", "github": "", "reviewers": "iQrS;vbTD;cYbp", "pdf_size": 830728, "rating": "5;7;7", "confidence": "4;3;3", "soundness": "3;3;3", "novelty": "3;2;4", "presentation": "4;3;4", "wc_summary": "363;52;56", "wc_strengths": "212;100;78", "wc_weaknesses": "325;48;187", "wc_questions": "3;28;83", "wc_limitations": "3;1;54", "wc_review": "906;229;458", "wc_reply_reviewers": "0;10;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;1;0", "reply_authors": "1;1;1", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 157.0, 145.6731501226862 ], "wc_strengths_avg": [ 130.0, 58.67424193516833 ], "wc_weaknesses_avg": [ 186.66666666666666, 113.08502209498047 ], "wc_questions_avg": [ 38.0, 33.4165627596057 ], "wc_limitations_avg": [ 19.333333333333332, 24.526629518862872 ], "wc_review_avg": [ 531.0, 281.1630606368245 ], "wc_reply_reviewers_avg": [ 3.3333333333333335, 4.714045207910316 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": -0.9999999999999998, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:XXw_8-I4f1sJ:scholar.google.com/&scioq=Convolutions+and+More+as+Einsum:+A+Tensor+Network+Perspective+with+Advances+for+Second-Order+Methods&hl=en&as_sdt=0,48", "gs_version_total": 3, "email": "vectorinstitute.ai", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "Vector Institute", "aff_unique_dep": "", "aff_unique_url": "https://vectorinstitute.ai", "aff_unique_abbr": "Vector Institute", "aff_campus_unique_index": "0", "aff_campus_unique": "Toronto", "aff_country_unique_index": "0", "aff_country_unique": "Canada" }, { "title": "DiscoveryWorld: A Virtual Environment for Developing and Evaluating Automated Scientific Discovery Agents", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97607", "id": "cDYqckEt6d", "proceeding": "", "pdf": "https://openreview.net/pdf?id=cDYqckEt6d", "openreview": "https://openreview.net/forum?id=cDYqckEt6d", "poster": "/media/PosterPDFs/NeurIPS%202024/97607.png?t=1733851568.8935978", "project": "", "author_site": "Peter Jansen, Marc-Alexandre C\u00f4t\u00e9, Tushar Khot, Erin Bransom, Bhavana Dalvi Mishra, Bodhisattwa Prasad Majumder, Oyvind Tafjord, Peter Clark", "tldr": "", "abstract": "Automated scientific discovery promises to accelerate progress across scientific domains, but evaluating an agent's capacity for end-to-end scientific reasoning is challenging as running real-world experiments is often prohibitively expensive or infeasible. In this work we introduce DiscoveryWorld, a virtual environment that enables benchmarking an agent's ability to perform complete cycles of novel scientific discovery in an inexpensive, simulated, multi-modal, long-horizon, and fictional setting.\nDiscoveryWorld consists of 24 scientific tasks across three levels of difficulty, each with parametric variations that provide new discoveries for agents to make across runs. Tasks require an agent to form hypotheses, design and run experiments, analyze results, and act on conclusions. Task difficulties are normed to range from straightforward to challenging for human scientists with advanced degrees. DiscoveryWorld further provides three automatic metrics for evaluating performance, including: (1) binary task completion, (2) fine-grained report cards detailing procedural scoring of task-relevant actions, and (3) the accuracy of discovered explanatory knowledge.\nWhile simulated environments such as DiscoveryWorld are low-fidelity compared to the real world, we find that strong baseline agents struggle on most DiscoveryWorld tasks, highlighting the utility of using simulated environments as proxy tasks for near-term development of scientific discovery competency in agents.", "keywords": "scientific discovery;agents;benchmarks", "primary_area": "", "supplementary_material": "/attachment/0156c0e69bda43786fa5a3316eeb71613a618b9f.pdf", "author": "Peter Jansen;Marc-Alexandre C\u00f4t\u00e9;Tushar Khot;Erin Bransom;Bhavana Dalvi Mishra;Bodhisattwa Prasad Majumder;Oyvind Tafjord;Peter Clark", "authorids": "~Peter_Jansen1;~Marc-Alexandre_C\u00f4t\u00e92;~Tushar_Khot1;~Erin_Bransom1;~Bhavana_Dalvi_Mishra2;~Bodhisattwa_Prasad_Majumder1;~Oyvind_Tafjord2;~Peter_Clark1", "gender": ";M;F;;M;M;F;M", "homepage": "http://www.cognitiveai.org;https://allenai.org/team/tushark/;;https://www.majumderb.com/;;https://allenai.org/team/peterc;https://bhavanadalvi.github.io/;https://www.microsoft.com/en-us/research/people/macote", "dblp": "72/5962;83/8117;;138/6177;178/8640;34/1184;78/6527;118/9636", "google_scholar": "wc1Hbl8AAAAJ;_8mkIjgAAAAJ;;cEM1a5gAAAAJ;https://scholar.google.com/citations?hl=en;o-5vyEsAAAAJ;9e0uFr4AAAAJ;https://scholar.google.ca/citations?user=L83CE5gAAAAJ", "orcid": ";;;;0000-0003-4190-5618;;;", "linkedin": ";;;;;peter-clark-a8b556/;;", "or_profile": "~Peter_Jansen1;~Tushar_Khot1;~Erin_Bransom1;~Bodhisattwa_Prasad_Majumder1;~Oyvind_Tafjord2;~Peter_Clark1;~Bhavana_Dalvi1;~Marc-Alexandre_Cote1", "aff": "Allen Institute for Artificial Intelligence;Allen Institute for Artificial Intelligence;Allen Institute for Artificial Intelligence;Allen Institute for Artificial Intelligence;Allen Institute for Artificial Intelligence;Allen Institute for Artificial Intelligence;Allen Institute for Artificial Intelligence;Microsoft", "aff_domain": "allenai.org;allenai.org;allenai.org;allenai.org;allenai.org;allenai.org;allenai.org;microsoft.com", "position": "Researcher;Lead Research Scientist;Analyst;Researcher;Researcher;Senior Research Manager;Lead Reserarch Scientist;Principal Researcher", "bibtex": "@inproceedings{\njansen2024discoveryworld,\ntitle={DiscoveryWorld: A Virtual Environment for Developing and Evaluating Automated Scientific Discovery Agents},\nauthor={Peter Jansen and Marc-Alexandre C{\\^o}t{\\'e} and Tushar Khot and Erin Bransom and Bhavana Dalvi Mishra and Bodhisattwa Prasad Majumder and Oyvind Tafjord and Peter Clark},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=cDYqckEt6d}\n}", "github": "", "reviewers": "BgaZ;BBUY;TdaW;ub6S", "pdf_size": 2940939, "rating": "6;7;7;8", "confidence": "5;4;4;4", "wc_summary_and_contributions": "57;94;106;168", "wc_strengths": "85;98;64;139", "wc_improvement": "104;54;56;186", "wc_limitations": "16;99;1;87", "wc_correctness": "8;67;1;28", "wc_clarity": "5;36;1;40", "wc_relation_to_prior_work": "9;20;1;17", "wc_documentation": "13;17;1;68", "wc_additional_feedback": "1;1;1;1", "wc_review": "298;486;232;734", "wc_reply_reviewers": "0;167;59;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;2;1;0", "reply_authors": "1;2;1;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 106.25, 39.96482828688246 ], "wc_strengths_avg": [ 96.5, 27.37243138634199 ], "wc_improvement_avg": [ 100.0, 53.53503525729669 ], "wc_limitations_avg": [ 50.75, 42.79237665753095 ], "wc_correctness_avg": [ 26.0, 25.66125484071268 ], "wc_clarity_avg": [ 20.5, 17.613914953808536 ], "wc_relation_to_prior_work_avg": [ 11.75, 7.39509972887452 ], "wc_documentation_avg": [ 24.75, 25.655165171949292 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 437.5, 194.90702911901357 ], "wc_reply_reviewers_avg": [ 56.5, 68.19274154922941 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3061559577324851386&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "allenai.org;allenai.org;allenai.org;allenai.org;allenai.org;allenai.org;allenai.org;microsoft.com", "author_num": 8, "aff_unique_index": "0;0;0;0;0;0;0;1", "aff_unique_norm": "Allen Institute for Artificial Intelligence;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://allenai.org;https://www.microsoft.com", "aff_unique_abbr": "AI2;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Truthfulness of Calibration Measures", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94433", "id": "cDa8hfTyGc", "proceeding": "", "pdf": "https://openreview.net/pdf?id=cDa8hfTyGc", "openreview": "https://openreview.net/forum?id=cDa8hfTyGc", "poster": "", "project": "", "author_site": "Nika Haghtalab, Mingda Qiao, Kunhe Yang, Eric Zhao", "tldr": "", "abstract": "We study calibration measures in a sequential prediction setup. In addition to rewarding accurate predictions (completeness) and penalizing incorrect ones (soundness), an important desideratum of calibration measures is *truthfulness*, a minimal condition for the forecaster not to be incentivized to exploit the system. Formally, a calibration measure is truthful if the forecaster (approximately) minimizes the expected penalty by predicting the conditional expectation of the next outcome, given the prior distribution of outcomes. We conduct a taxonomy of existing calibration measures. Perhaps surprisingly, all of them are far from being truthful. We introduce a new calibration measure termed the *Subsampled Smooth Calibration Error (SSCE)*, which is complete and sound, and under which truthful prediction is optimal up to a constant multiplicative factor. In contrast, under existing calibration measures, there are simple distributions on which a polylogarithmic (or even zero) penalty is achievable, while truthful prediction leads to a polynomial penalty.", "keywords": "Calibration measures;online learning;sequential calibration", "primary_area": "online_learning", "supplementary_material": "", "author": "Nika Haghtalab;Mingda Qiao;Kunhe Yang;Eric Zhao", "authorids": "~Nika_Haghtalab2;~Mingda_Qiao1;~Kunhe_Yang1;~Eric_Zhao1", "gender": "F;M;F;M", "homepage": "https://people.eecs.berkeley.edu/~nika/;https://sites.google.com/site/acmonsterqiao/;https://kunheyang.com/;https://eric-zhao.com", "dblp": ";185/0592;267/5467;294/8327.html", "google_scholar": ";mV9LQUoAAAAJ;-j0q9B4AAAAJ;6OfjaHQAAAAJ", "orcid": ";0000-0002-9182-6152;;", "linkedin": ";;;", "or_profile": "~Nika_Haghtalab2;~Mingda_Qiao1;~Kunhe_Yang1;~Eric_Zhao1", "aff": "University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;University of California, Berkeley", "aff_domain": "berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu", "position": "Assistant Professor;Postdoc;PhD student;PhD student", "bibtex": "@inproceedings{\nhaghtalab2024truthfulness,\ntitle={Truthfulness of Calibration Measures},\nauthor={Nika Haghtalab and Mingda Qiao and Kunhe Yang and Eric Zhao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=cDa8hfTyGc}\n}", "github": "", "reviewers": "M6FP;74Zg;NYRM;ZWFw", "pdf_size": 607229, "rating": "6;6;7;7", "confidence": "1;1;3;4", "soundness": "3;3;4;3", "novelty": "3;2;3;4", "presentation": "3;1;3;3", "wc_summary": "105;107;77;101", "wc_strengths": "45;21;63;229", "wc_weaknesses": "114;36;195;157", "wc_questions": "3;101;73;47", "wc_limitations": "1;1;1;1", "wc_review": "268;266;409;535", "wc_reply_reviewers": "11;253;0;165", "wc_reply_authors": "0;292;0;0", "reply_reviewers": "1;2;0;1", "reply_authors": "1;2;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 2.25, 1.299038105676658 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 97.5, 12.031209415515965 ], "wc_strengths_avg": [ 89.5, 81.90695941127348 ], "wc_weaknesses_avg": [ 125.5, 59.08680055646946 ], "wc_questions_avg": [ 56.0, 36.069377593742864 ], "wc_limitations_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 369.5, 111.76426083502723 ], "wc_reply_reviewers_avg": [ 107.25, 106.47153375433267 ], "wc_reply_authors_avg": [ 73.0, 126.43970895252804 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.9622504486493763, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13613596786084011147&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "cEb305kE1V", "title": "Deep Implicit Optimization for Robust and Flexible Image Registration", "track": "main", "status": "Reject", "tldr": "", "abstract": "Deep Learning in Image Registration (DLIR) methods have been tremendously successful in image registration due to their speed and ability to incorporate weak label supervision at training time. However, DLIR methods forego many of the benefits of classical optimization-based methods. The functional nature of deep networks do not guarantee that the predicted transformation is a local minima of the registration objective, the representation of the transformation (displacement/velocity field/affine) is fixed, and the networks are not robust to domain shift. Our method aims to bridge this gap between classical and learning methods by incorporating optimization as a layer in a deep network. A deep network is trained to predict multi-scale dense feature images that are registered using a black box iterative optimization solver. This optimal warp is then used to minimize image and label alignment errors. By implicitly differentiating end-to-end through an iterative optimization solver, our learned features are registration and label-aware, and the warp functions are guaranteed to be local minima of the registration objective in the feature space. Our framework shows excellent performance on in-domain datasets, and is agnostic to domain shift such as anisotropy and varying intensity profiles. For the first time, our method allows switching between arbitrary transformation representations (free-form to diffeomorphic) at test time with zero retraining. End-to-end feature learning also facilitates interpretability of features, and out-of-the-box promptability using additional label-fidelity terms at inference.", "keywords": "image registration;image alignment;medical image registration;T1-weighed MRI;image alignment;deformable image registration;diffeomorphism;optimization", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "/attachment/91ec785a94789376d1f7cc6b5a1bccd848980330.zip", "author": "Rohit Jena;Pratik Chaudhari;James Gee", "authorids": "~Rohit_Jena1;~Pratik_Chaudhari1;~James_Gee1", "gender": "M;M;M", "homepage": "https://jenaroh.it;https://pratikac.github.io/;https://www.med.upenn.edu/apps/faculty/index.php/g5455356/p10656", "dblp": "239/8707;;30/6904", "google_scholar": "kZQQFE4AAAAJ;c_z5hWEAAAAJ;https://scholar.google.com.tw/citations?user=fU8fmEIAAAAJ", "orcid": ";;", "linkedin": "rohitrango/;pratik-chaudhari-59508765;", "or_profile": "~Rohit_Jena1;~Pratik_Chaudhari1;~James_Gee1", "aff": "University of Pennsylvania;School of Engineering and Applied Science, University of Pennsylvania;University of Pennsylvania", "aff_domain": "upenn.edu;seas.upenn.edu;upenn.edu", "position": "PhD student;Assistant Professor;Full Professor", "bibtex": "@misc{\nanonymous2024deep,\ntitle={Deep Implicit Optimization for Robust and Flexible Image Registration},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=cEb305kE1V}\n}", "github": "", "project": "", "reviewers": "uXMo;3syP;UTrd", "site": "https://openreview.net/forum?id=cEb305kE1V", "pdf_size": 2831710, "rating": "4;4;4", "confidence": "4;4;4", "soundness": "2;2;3", "novelty": "2;2;2", "presentation": "2;2;3", "wc_summary": "118;61;34", "wc_strengths": "110;39;93", "wc_weaknesses": "249;393;928", "wc_questions": "302;33;54", "wc_limitations": "125;7;38", "wc_review": "904;533;1147", "wc_reply_reviewers": "54;0;441", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;0;1", "reply_authors": "1;1;1", "rating_avg": [ 4.0, 0.0 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 71.0, 35.014282800023196 ], "wc_strengths_avg": [ 80.66666666666667, 30.26916289265731 ], "wc_weaknesses_avg": [ 523.3333333333334, 292.1190776987281 ], "wc_questions_avg": [ 129.66666666666666, 122.15927672055402 ], "wc_limitations_avg": [ 56.666666666666664, 49.948862738693954 ], "wc_review_avg": [ 861.3333333333334, 252.47354096793762 ], "wc_reply_reviewers_avg": [ 165.0, 196.4026476399949 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8381746707092771689&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Pennsylvania", "aff_unique_dep": "", "aff_unique_url": "https://www.upenn.edu", "aff_unique_abbr": "UPenn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "StepbaQ: Stepping backward as Correction for Quantized Diffusion Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94432", "id": "cEtExbAKYV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=cEtExbAKYV", "openreview": "https://openreview.net/forum?id=cEtExbAKYV", "poster": "/media/PosterPDFs/NeurIPS%202024/94432.png?t=1731736103.9863923", "project": "", "author_site": "Yi-Chung Chen, Zhi-Kai Huang, Jing-Ren Chen", "tldr": "", "abstract": "Quantization of diffusion models has attracted considerable attention due to its potential to enable various applications on resource-constrained mobile devices. However, given the cumulative nature of quantization errors in quantized diffusion models, overall performance may still decline even with efforts to minimize quantization error at each sampling step.\nRecent studies have proposed several methods to address accumulated quantization error, yet these solutions often suffer from limited applicability due to their underlying assumptions or only partially resolve the issue due to an incomplete understanding.\nIn this work, we introduce a novel perspective by conceptualizing quantization error as a \"stepback\" in the denoising process. We investigate how the accumulation of quantization error can distort the sampling trajectory, resulting in a notable decrease in model performance. To address this challenge, we introduce StepbaQ, a method that calibrates the sampling trajectory and counteracts the adverse effects of accumulated quantization error through a sampling step correction mechanism. Notably, StepbaQ relies solely on statistics of quantization error derived from a small calibration dataset, highlighting its strong applicability.\nOur experimental results demonstrate that StepbaQ can serve as a plug-and-play technique to enhance the performance of diffusion models quantized by off-the-shelf tools without modifying the quantization settings. For example, StepbaQ significantly improves the performance of the quantized SD v1.5 model by 7.30 in terms of FID on SDprompts dataset under the common W8A8 setting, and it enhances the performance of the quantized SDXL-Turbo model by 17.31 in terms of FID on SDprompts dataset under the challenging W4A8 setting.", "keywords": "Diffusion Model; Model Quantization", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/270c25308e63259037636dcf5f6505393b0dc35a.zip", "author": "Yi-Chung Chen;Zhi-Kai Huang;Jing-Ren Chen", "authorids": "~Yi-Chung_Chen1;~Zhi-Kai_Huang1;~Jing-Ren_Chen1", "gender": ";M;", "homepage": ";https://sites.google.com/view/huangzhikai/home?authuser=0;", "dblp": ";33/6714;", "google_scholar": ";gkzKp0YAAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Yi-Chung_Chen1;~Zhi-Kai_Huang1;~Jing-Ren_Chen1", "aff": ";MediaTek Inc.;", "aff_domain": ";mediatek.com;", "position": ";Researcher;", "bibtex": "@inproceedings{\nchen2024stepbaq,\ntitle={StepbaQ: Stepping backward as Correction for Quantized Diffusion Models},\nauthor={Yi-Chung Chen and Zhi-Kai Huang and Jing-Ren Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=cEtExbAKYV}\n}", "github": "", "reviewers": "twKC;5uyz;1NNd;oUSD;hn4w", "pdf_size": 4274669, "rating": "5;6;6;7;7", "confidence": "3;3;4;3;2", "soundness": "3;3;3;3;3", "novelty": "3;3;3;3;3", "presentation": "3;3;3;3;3", "wc_summary": "51;51;61;85;66", "wc_strengths": "36;17;48;43;48", "wc_weaknesses": "38;36;42;72;56", "wc_questions": "4;3;70;134;2", "wc_limitations": "2;47;44;1;47", "wc_review": "131;154;265;335;219", "wc_reply_reviewers": "17;20;0;21;26", "wc_reply_authors": "10;10;0;19;19", "reply_reviewers": "1;1;0;1;1", "reply_authors": "2;2;1;2;2", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 62.8, 12.528367810692659 ], "wc_strengths_avg": [ 38.4, 11.568923891183655 ], "wc_weaknesses_avg": [ 48.8, 13.541048703848604 ], "wc_questions_avg": [ 42.6, 52.557016658101894 ], "wc_limitations_avg": [ 28.2, 21.8302542358077 ], "wc_review_avg": [ 220.8, 74.19541764826181 ], "wc_reply_reviewers_avg": [ 16.8, 8.885943956609225 ], "wc_reply_authors_avg": [ 11.6, 7.059745037889116 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.42257712736425823, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:OEjietuxr9QJ:scholar.google.com/&scioq=StepbaQ:+Stepping+backward+as+Correction+for+Quantized+Diffusion+Models&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": ";mediatek.com;", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "MediaTek Inc.", "aff_unique_dep": "", "aff_unique_url": "https://www.mediatek.com/", "aff_unique_abbr": "MediaTek", "aff_campus_unique_index": "0", "aff_campus_unique": "Taiwan", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "title": "Depth Anything V2", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94431", "id": "cFTi3gLJ1X", "proceeding": "", "pdf": "https://openreview.net/pdf?id=cFTi3gLJ1X", "openreview": "https://openreview.net/forum?id=cFTi3gLJ1X", "poster": "", "project": "", "author_site": "Lihe Yang, Bingyi Kang, Zilong Huang, Zhen Zhao, Xiaogang Xu, Jiashi Feng, Hengshuang Zhao", "tldr": "", "abstract": "This work presents Depth Anything V2. Without pursuing fancy techniques, we aim to reveal crucial findings to pave the way towards building a powerful monocular depth estimation model. Notably, compared with V1, this version produces much finer and more robust depth predictions through three key practices: 1) replacing all labeled real images with synthetic images, 2) scaling up the capacity of our teacher model, and 3) teaching student models via the bridge of large-scale pseudo-labeled real images. Compared with the latest models built on Stable Diffusion, our models are significantly more efficient (more than 10x faster) and more accurate. We offer models of different scales (ranging from 25M to 1.3B params) to support extensive scenarios. Benefiting from their strong generalization capability, we fine-tune them with metric depth labels to obtain our metric depth models. In addition to our models, considering the limited diversity and frequent noise in current test sets, we construct a versatile evaluation benchmark with sparse depth annotations to facilitate future research. Models are available at https://github.com/DepthAnything/Depth-Anything-V2.", "keywords": "Monocular depth estimation", "primary_area": "machine_vision", "supplementary_material": "", "author": "Lihe Yang;Bingyi Kang;Zilong Huang;Zhen Zhao;Xiaogang Xu;Jiashi Feng;Hengshuang Zhao", "authorids": "~Lihe_Yang1;~Bingyi_Kang1;~Zilong_Huang1;~Zhen_Zhao4;~Xiaogang_Xu2;~Jiashi_Feng1;~Hengshuang_Zhao2", "gender": ";;M;M;M;M;M", "homepage": "https://liheyoung.github.io/;https://bingykang.github.io/;http://speedinghzl.github.io/;http://zhaozhen.me/;https://xiaogang00.github.io;https://hszhao.github.io;https://sites.google.com/site/jshfeng/", "dblp": ";;185/9199;29/1773-1;118/2268-2;185/7848;56/8278", "google_scholar": "QX7xv3UAAAAJ;https://scholar.google.com.sg/citations?user=NmHgX-wAAAAJ;GW9vw8UAAAAJ;7mpuhO8AAAAJ;https://scholar.google.com.hk/citations?user=R65xDQwAAAAJ;4uE10I0AAAAJ;https://scholar.google.com.sg/citations?user=Q8iay0gAAAAJ", "orcid": ";;;0000-0002-0796-4078;0000-0002-7928-7336;0000-0001-8277-2706;0000-0001-6843-0064", "linkedin": ";;;;;hengshuang-zhao-347b8391/?originalSubdomain=hk;", "or_profile": "~Lihe_Yang1;~Bingyi_Kang1;~Zilong_Huang1;~Zhen_Zhao4;~Xiaogang_Xu2;~Hengshuang_Zhao2;~Jiashi_Feng2", "aff": "The University of Hong Kong;Bytedance;Bytedance;University of Sydney;Zhejiang University;The University of Hong Kong;ByteDance", "aff_domain": "hku.hk;bytedance.com;bytedance.com;usyd.edu.au;zju.edu.cn;hku.hk;bytedance.com", "position": "PhD student;Researcher;Researcher;PhD student;Assistant Professor;Assistant Professor;Research Lead", "bibtex": "@inproceedings{\nyang2024depth,\ntitle={Depth Anything V2},\nauthor={Lihe Yang and Bingyi Kang and Zilong Huang and Zhen Zhao and Xiaogang Xu and Jiashi Feng and Hengshuang Zhao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=cFTi3gLJ1X}\n}", "github": "", "reviewers": "Bw2h;RjZq;fDqs;rTF8", "pdf_size": 47648565, "rating": "5;6;7;7", "confidence": "4;5;4;5", "soundness": "3;3;4;4", "novelty": "3;2;3;4", "presentation": "3;3;4;4", "wc_summary": "72;108;101;45", "wc_strengths": "38;102;119;31", "wc_weaknesses": "118;157;99;222", "wc_questions": "127;68;7;42", "wc_limitations": "4;8;72;6", "wc_review": "359;443;398;346", "wc_reply_reviewers": "150;170;36;26", "wc_reply_authors": "336;207;0;28", "reply_reviewers": "2;1;1;1", "reply_authors": "4;3;1;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 81.5, 25.024987512484397 ], "wc_strengths_avg": [ 72.5, 38.55191305240247 ], "wc_weaknesses_avg": [ 149.0, 47.04784798479097 ], "wc_questions_avg": [ 61.0, 43.82350967232086 ], "wc_limitations_avg": [ 22.5, 28.613807855648993 ], "wc_review_avg": [ 386.5, 37.818646194701365 ], "wc_reply_reviewers_avg": [ 95.5, 64.98269000279997 ], "wc_reply_authors_avg": [ 142.75, 136.94775463657663 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 411, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17626116835536894609&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "hku.hk;bytedance.com;bytedance.com;usyd.edu.au;zju.edu.cn;hku.hk;bytedance.com", "author_num": 7, "aff_unique_index": "0;1;1;2;3;0;1", "aff_unique_norm": "University of Hong Kong;ByteDance;University of Sydney;Zhejiang University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.hku.hk;https://www.bytedance.com;https://www.sydney.edu.au;https://www.zju.edu.cn", "aff_unique_abbr": "HKU;Bytedance;USYD;ZJU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;1;0;0;0", "aff_country_unique": "China;Australia" }, { "title": "Divide-and-Conquer Meets Consensus: Unleashing the Power of Functions in Code Generation", "status": "Oral", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94430", "id": "cFqAANINgW", "proceeding": "", "pdf": "https://openreview.net/pdf?id=cFqAANINgW", "openreview": "https://openreview.net/forum?id=cFqAANINgW", "poster": "", "project": "", "author_site": "Jingchang Chen, Hongxuan Tang, Zheng Chu, Qianglong Chen, Zekun Wang, Ming Liu, Bing Qin", "tldr": "", "abstract": "Despite recent progress made by large language models in code generation, they still struggle with programs that meet complex requirements. Recent work utilizes plan-and-solve decomposition to decrease the complexity and leverage self-tests to refine the generated program. Yet, planning deep-inside requirements in advance can be challenging, and the tests need to be accurate to accomplish self-improvement. To this end, we propose FunCoder, a code generation framework incorporating the divide-and-conquer strategy with functional consensus. Specifically, FunCoder recursively branches off sub-functions as smaller goals during code generation, represented by a tree hierarchy. These sub-functions are then composited to attain more complex objectives. Additionally, we designate functions via a consensus formed by identifying similarities in program behavior, mitigating error propagation. FunCoder outperforms state-of-the-art methods by +9.8% on average in HumanEval, MBPP, xCodeEval and MATH with GPT-3.5 and GPT-4. Moreover, our method demonstrates superiority on smaller models: With FunCoder, StableCode-3b surpasses GPT-3.5 by +18.6% and achieves 97.7% of GPT-4's performance on HumanEval. Further analysis reveals that our proposed dynamic function decomposition is capable of handling complex requirements, and the functional consensus prevails over self-testing in correctness evaluation.", "keywords": "programming;language model;code generation;reasoning", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Jingchang Chen;Hongxuan Tang;Zheng Chu;Qianglong Chen;Zekun Wang;Ming Liu;Bing Qin", "authorids": "~Jingchang_Chen1;~Hongxuan_Tang2;~Zheng_Chu1;~Qianglong_Chen1;~Zekun_Wang1;~Ming_Liu6;~Bing_Qin2", "gender": "M;M;M;M;;M;", "homepage": ";https://github.com/jeffswt;;;;http://homepage.hit.edu.cn/liuming1981;http://ir.hit.edu.cn/~qinb", "dblp": "348/5906;;;277/9817;;20/2039-4.html;86/5934.html", "google_scholar": "X5ZvfE8AAAAJ;;NRD-_8kAAAAJ;4-NfQ9YAAAAJ;;VJtmTREAAAAJ;LKnCub0AAAAJ", "orcid": ";;;;;;0000-0002-2543-5604", "linkedin": "cometeme;;;;;;", "or_profile": "~Jingchang_Chen1;~Hongxuan_Tang2;~Zheng_Chu1;~Qianglong_Chen1;~Zekun_Wang1;~Ming_Liu6;~Bing_Qin2", "aff": "Harbin Institute of Technology;Microsoft;Harbin Institute of Technology;Zhejiang University;;Harbin Institute of Technology;Harbin Institute of Technology", "aff_domain": "hit.edu.cn;microsoft.com;hit.edu.cn;zju.edu.cn;;hit.edu.cn;hit.edu.cn", "position": "MS student;Researcher;PhD student;Researcher;;Professor;Full Professor", "bibtex": "@inproceedings{\nchen2024divideandconquer,\ntitle={Divide-and-Conquer Meets Consensus: Unleashing the Power of Functions in Code Generation},\nauthor={Jingchang Chen and Hongxuan Tang and Zheng Chu and Qianglong Chen and Zekun Wang and Ming Liu and Bing Qin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=cFqAANINgW}\n}", "github": "", "reviewers": "s7Dq;Ar6t;LV7h;8URc", "pdf_size": 1095146, "rating": "5;6;6;7", "confidence": "4;4;4;3", "soundness": "2;2;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "77;68;129;208", "wc_strengths": "8;81;71;76", "wc_weaknesses": "175;265;169;72", "wc_questions": "40;144;79;93", "wc_limitations": "27;17;8;16", "wc_review": "327;575;456;465", "wc_reply_reviewers": "407;121;61;0", "wc_reply_authors": "1035;57;138;0", "reply_reviewers": "2;1;1;0", "reply_authors": "3;2;2;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 120.5, 55.625983137379244 ], "wc_strengths_avg": [ 59.0, 29.656365252673833 ], "wc_weaknesses_avg": [ 170.25, 68.29119635794939 ], "wc_questions_avg": [ 89.0, 37.2223051408695 ], "wc_limitations_avg": [ 17.0, 6.745368781616021 ], "wc_review_avg": [ 455.75, 87.86744277603623 ], "wc_reply_reviewers_avg": [ 147.25, 155.9493106749754 ], "wc_reply_authors_avg": [ 307.5, 422.87498152527303 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8752466686052744517&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "hit.edu.cn;microsoft.com;hit.edu.cn;zju.edu.cn;;hit.edu.cn;hit.edu.cn", "author_num": 7, "aff_unique_index": "0;1;0;2;0;0", "aff_unique_norm": "Harbin Institute of Technology;Microsoft;Zhejiang University", "aff_unique_dep": ";Microsoft Corporation;", "aff_unique_url": "http://www.hit.edu.cn/;https://www.microsoft.com;https://www.zju.edu.cn", "aff_unique_abbr": "HIT;Microsoft;ZJU", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Harbin;", "aff_country_unique_index": "0;1;0;0;0;0", "aff_country_unique": "China;United States" }, { "title": "MedSafetyBench: Evaluating and Improving the Medical Safety of Large Language Models", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97606", "id": "cFyagd2Yh4", "proceeding": "", "pdf": "https://openreview.net/pdf?id=cFyagd2Yh4", "openreview": "https://openreview.net/forum?id=cFyagd2Yh4", "poster": "", "project": "", "author_site": "Tessa Han, Aounon Kumar, Chirag Agarwal, Himabindu Lakkaraju", "tldr": "", "abstract": "As large language models (LLMs) develop increasingly sophisticated capabilities and find applications in medical settings, it becomes important to assess their medical safety due to their far-reaching implications for personal and public health, patient safety, and human rights. However, there is little to no understanding of the notion of medical safety in the context of LLMs, let alone how to evaluate and improve it. To address this gap, we first define the notion of medical safety in LLMs based on the Principles of Medical Ethics set forth by the American Medical Association. We then leverage this understanding to introduce MedSafetyBench, the first benchmark dataset designed to measure the medical safety of LLMs. We demonstrate the utility of MedSafetyBench by using it to evaluate and improve the medical safety of LLMs. Our results show that publicly-available medical LLMs do not meet standards of medical safety and that fine-tuning them using MedSafetyBench improves their medical safety while preserving their medical performance. By introducing this new benchmark dataset, our work enables a systematic study of the state of medical safety in LLMs and motivates future work in this area, paving the way to mitigate the safety risks of LLMs in medicine. The benchmark dataset and code are available at https://github.com/AI4LIFE-GROUP/med-safety-bench.", "keywords": "LLM safety;medical safety;alignment;instruction-tuning;medical ML", "primary_area": "", "supplementary_material": "", "author": "Tessa Han;Aounon Kumar;Chirag Agarwal;Himabindu Lakkaraju", "authorids": "~Tessa_Han1;~Aounon_Kumar1;~Chirag_Agarwal1;~Himabindu_Lakkaraju1", "gender": ";M;M;", "homepage": ";https://aounon.github.io;https://chirag-agarwall.github.io/;", "dblp": ";191/8334;173/8821;", "google_scholar": ";NjhpUykAAAAJ;https://scholar.google.com/citations?hl=en;", "orcid": ";;;", "linkedin": ";;chirag-agarwal-0a6a43a1/;", "or_profile": "~Tessa_Han1;~Aounon_Kumar1;~Chirag_Agarwal1;~Himabindu_Lakkaraju1", "aff": ";School of Engineering and Applied Sciences, Harvard University;Harvard University;", "aff_domain": ";seas.harvard.edu;hbs.edu;", "position": ";Postdoc;Postdoc;", "bibtex": "@inproceedings{\nhan2024medsafetybench,\ntitle={MedSafetyBench: Evaluating and Improving the Medical Safety of Large Language Models},\nauthor={Tessa Han and Aounon Kumar and Chirag Agarwal and Himabindu Lakkaraju},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=cFyagd2Yh4}\n}", "github": "", "reviewers": "67b5;Taqs;FaWV", "pdf_size": 1283030, "rating": "5;7;7", "confidence": "4;4;4", "wc_summary_and_contributions": "85;111;83", "wc_strengths": "231;51;65", "wc_improvement": "338;30;107", "wc_limitations": "39;1;32", "wc_correctness": "60;1;53", "wc_clarity": "23;1;1", "wc_relation_to_prior_work": "45;1;1", "wc_documentation": "25;1;56", "wc_additional_feedback": "1;1;1", "wc_review": "847;198;399", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "2;1;2", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.0 ], "wc_summary_and_contributions_avg": [ 93.0, 12.754084313139327 ], "wc_strengths_avg": [ 115.66666666666667, 81.75301557469031 ], "wc_improvement_avg": [ 158.33333333333334, 130.8748341822148 ], "wc_limitations_avg": [ 24.0, 16.51262143533445 ], "wc_correctness_avg": [ 38.0, 26.318561257535844 ], "wc_clarity_avg": [ 8.333333333333334, 10.370899457402697 ], "wc_relation_to_prior_work_avg": [ 15.666666666666666, 20.741798914805393 ], "wc_documentation_avg": [ 27.333333333333332, 22.51419305435771 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 481.3333333333333, 271.27394927555343 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16358176686572686575&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": ";seas.harvard.edu;hbs.edu;", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Harvard University", "aff_unique_dep": "School of Engineering and Applied Sciences", "aff_unique_url": "https://www.harvard.edu", "aff_unique_abbr": "Harvard", "aff_campus_unique_index": "0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Boosting Transferability and Discriminability for Time Series Domain Adaptation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94429", "id": "cIBSsXowMr", "proceeding": "", "pdf": "https://openreview.net/pdf?id=cIBSsXowMr", "openreview": "https://openreview.net/forum?id=cIBSsXowMr", "poster": "/media/PosterPDFs/NeurIPS%202024/94429.png?t=1731758219.3460057", "project": "", "author_site": "Mingyang Liu, Xinyang Chen, Yang Shu, Xiucheng Li, Weili Guan, Liqiang Nie", "tldr": "", "abstract": "Unsupervised domain adaptation excels in transferring knowledge from a labeled source domain to an unlabeled target domain, playing a critical role in time series applications. Existing time series domain adaptation methods either ignore frequency features or treat temporal and frequency features equally, which makes it challenging to fully exploit the advantages of both types of features. In this paper, we delve into transferability and discriminability, two crucial properties in transferable representation learning. It's insightful to note that frequency features are more discriminative within a specific domain, while temporal features show better transferability across domains. Based on the findings, we propose **A**dversarial **CO**-learning **N**etworks (**ACON**), to enhance transferable representation learning through a collaborative learning manner in three aspects: (1) Considering the multi-periodicity in time series, multi-period frequency feature learning is proposed to enhance the discriminability of frequency features; (2) Temporal-frequency domain mutual learning is proposed to enhance the discriminability of temporal features in the source domain and improve the transferability of frequency features in the target domain; (3) Domain adversarial learning is conducted in the correlation subspaces of temporal-frequency features instead of original feature spaces to further enhance the transferability of both features. Extensive experiments conducted on a wide range of time series datasets and five common applications demonstrate the state-of-the-art performance of ACON. Code is available at .", "keywords": "Time-series; Domain Adaptation; Transferability; Discriminability; Frequency domain", "primary_area": "other", "supplementary_material": "", "author": "Mingyang Liu;Xinyang Chen;Yang Shu;Xiucheng Li;Weili Guan;Liqiang Nie", "authorids": "~Mingyang_Liu4;~Xinyang_Chen1;~Yang_Shu1;~Xiucheng_Li2;~Weili_Guan4;~Liqiang_Nie2", "gender": ";Not Specified;M;M;M;F", "homepage": "https://mingyangliu1024.github.io/;https://chenxinyang123.github.io/;https://shuyang96.github.io/;https://xiucheng.org/;https://liqiangnie.github.io/index.html;https://faculty.hitsz.edu.cn/guanweili", "dblp": ";242/3871-1;13/4526;152/8201;92/8277;236/2820.html", "google_scholar": "cM1g7gQAAAAJ;qVxhGWUAAAAJ;VdyHmIwAAAAJ;https://scholar.google.com.sg/citations?user=qFSxE6YAAAAJ;yywVMhUAAAAJ;", "orcid": "0009-0005-6622-1636;0000-0001-6743-838X;0000-0002-9009-2775;;0000-0003-1476-0273;0000-0002-5658-5509", "linkedin": ";;;;;", "or_profile": "~Mingyang_Liu4;~Xinyang_Chen1;~Yang_Shu1;~Xiucheng_Li2;~Liqiang_Nie2;~WEILI_GUAN1", "aff": "Harbin Institute of Technology, Shenzhen;Harbin Institute of Technology, Shenzhen;East China Normal University;Harbin Institute of Technology;Shandong University;Harbin Institute of Technology (Shenzhen)", "aff_domain": "hit.edu.cn;hit.edu.cn;ecnu.edu.cn;hit.edu.cn;sdu.edu.cn;hit.edu.cn", "position": "Undergrad student;Assistant Professor;Assistant Professor;Assistant Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nliu2024boosting,\ntitle={Boosting Transferability and Discriminability for Time Series Domain Adaptation},\nauthor={Mingyang Liu and Xinyang Chen and Yang Shu and Xiucheng Li and Weili Guan and Liqiang Nie},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=cIBSsXowMr}\n}", "github": "", "reviewers": "ggDU;U97c;5Lht;uayq", "pdf_size": 1127587, "rating": "6;6;6;7", "confidence": "4;4;4;3", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "2;3;3;3", "wc_summary": "62;73;66;45", "wc_strengths": "89;50;21;47", "wc_weaknesses": "146;134;68;49", "wc_questions": "265;44;2;2", "wc_limitations": "44;1;6;7", "wc_review": "606;302;163;150", "wc_reply_reviewers": "276;19;11;0", "wc_reply_authors": "1212;116;189;106", "reply_reviewers": "2;1;1;0", "reply_authors": "4;2;3;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 61.5, 10.307764064044152 ], "wc_strengths_avg": [ 51.75, 24.283482040267618 ], "wc_weaknesses_avg": [ 99.25, 41.51731566467177 ], "wc_questions_avg": [ 78.25, 109.17503148614155 ], "wc_limitations_avg": [ 14.5, 17.18284027743958 ], "wc_review_avg": [ 305.25, 183.57474635689954 ], "wc_reply_reviewers_avg": [ 76.5, 115.37872420858189 ], "wc_reply_authors_avg": [ 405.75, 466.5899564928504 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7084362585685837102&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "hit.edu.cn;hit.edu.cn;ecnu.edu.cn;hit.edu.cn;sdu.edu.cn;hit.edu.cn", "author_num": 6, "aff_unique_index": "0;0;1;0;2;0", "aff_unique_norm": "Harbin Institute of Technology;East China Normal University;Shandong University", "aff_unique_dep": ";;", "aff_unique_url": "http://en.hhit.edu.cn/;http://www.ecnu.edu.cn;http://www.sdu.edu.cn", "aff_unique_abbr": "HIT;ECNU;SDU", "aff_campus_unique_index": "0;0;2;0", "aff_campus_unique": "Shenzhen;;Harbin", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "OW-VISCapTor: Abstractors for Open-World Video Instance Segmentation and Captioning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94428", "id": "cIVj8xLVZh", "proceeding": "", "pdf": "https://openreview.net/pdf?id=cIVj8xLVZh", "openreview": "https://openreview.net/forum?id=cIVj8xLVZh", "poster": "/media/PosterPDFs/NeurIPS%202024/94428.png?t=1731632063.828571", "project": "", "author_site": "Anwesa Choudhuri, Girish Chowdhary, Alex Schwing", "tldr": "", "abstract": "We propose the new task open-world video instance segmentation and captioning. It requires to detect, segment, track and describe with rich captions never before seen objects. This challenging task can be addressed by developing \"abstractors\" which connect a vision model and a language foundation model. Concretely, we connect a multi-scale visual feature extractor and a large language model (LLM) by developing an object abstractor and an object-to-text abstractor. The object abstractor, consisting of a prompt encoder and transformer blocks, introduces spatially-diverse open-world object queries to discover never before seen objects in videos. An inter-query contrastive loss further encourages the diversity of object queries. The object-to-text abstractor is augmented with masked cross-attention and acts as a bridge between the object queries and a frozen LLM to generate rich and descriptive object-centric captions for each detected object. Our generalized approach surpasses the baseline that jointly addresses the tasks of open-world video instance segmentation and dense video object captioning by 13% on never before seen objects, and by 10% on object-centric captions.", "keywords": "video instance segmentation;video object captioning;open-world video instance segmentation", "primary_area": "machine_vision", "supplementary_material": "/attachment/918523248d8038ea8692fe5109cb029a5ec38785.zip", "author": "Anwesa Choudhuri;Girish Chowdhary;Alex Schwing", "authorids": "~Anwesa_Choudhuri1;~Girish_Chowdhary1;~Alex_Schwing1", "gender": "F;M;Unspecified", "homepage": ";http://www.daslab.illinois.edu;https://ece.illinois.edu/directory/profile/aschwing", "dblp": "309/6631;09/5775;79/9775", "google_scholar": "KQnKUHKGFdUC;pf2zAXkAAAAJ;3B2c31wAAAAJ", "orcid": ";;", "linkedin": ";girishchowdhary/;", "or_profile": "~Anwesa_Choudhuri1;~Girish_Chowdhary1;~Alex_Schwing1", "aff": "University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign", "aff_domain": "illinois.edu;illinois.edu;illinois.edu", "position": "PhD student;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nchoudhuri2024owviscaptor,\ntitle={{OW}-{VISC}apTor: Abstractors for Open-World Video Instance Segmentation and Captioning},\nauthor={Anwesa Choudhuri and Girish Chowdhary and Alex Schwing},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=cIVj8xLVZh}\n}", "github": "", "reviewers": "zWxZ;W8BT;9nUt", "pdf_size": 6405136, "rating": "4;5;6", "confidence": "4;4;4", "soundness": "2;3;3", "novelty": "2;2;4", "presentation": "3;3;2", "wc_summary": "63;123;86", "wc_strengths": "33;39;118", "wc_weaknesses": "229;276;191", "wc_questions": "5;6;47", "wc_limitations": "11;10;1", "wc_review": "341;454;443", "wc_reply_reviewers": "109;43;55", "wc_reply_authors": "250;46;35", "reply_reviewers": "1;1;1", "reply_authors": "3;2;2", "rating_avg": [ 5.0, 0.816496580927726 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.9428090415820634 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 90.66666666666667, 24.716166549222166 ], "wc_strengths_avg": [ 63.333333333333336, 38.7327022323801 ], "wc_weaknesses_avg": [ 232.0, 34.76588366008646 ], "wc_questions_avg": [ 19.333333333333332, 19.567546828585563 ], "wc_limitations_avg": [ 7.333333333333333, 4.496912521077347 ], "wc_review_avg": [ 412.6666666666667, 50.87457343528516 ], "wc_reply_reviewers_avg": [ 69.0, 28.705400188814647 ], "wc_reply_authors_avg": [ 110.33333333333333, 98.86129452700666 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:LtfbPYraLBQJ:scholar.google.com/&scioq=OW-VISCapTor:+Abstractors+for+Open-World+Video+Instance+Segmentation+and+Captioning&hl=en&as_sdt=0,44", "gs_version_total": 3, "email": "illinois.edu;illinois.edu;illinois.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Illinois Urbana-Champaign", "aff_unique_dep": "", "aff_unique_url": "https://illinois.edu", "aff_unique_abbr": "UIUC", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Urbana-Champaign", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Training Binary Neural Networks via Gaussian Variational Inference and Low-Rank Semidefinite Programming", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94427", "id": "cIXETwTkhK", "proceeding": "", "pdf": "https://openreview.net/pdf?id=cIXETwTkhK", "openreview": "https://openreview.net/forum?id=cIXETwTkhK", "poster": "", "project": "", "author_site": "Lorenzo Orecchia, Jiawei Hu, Xue He, Wang Mark, Xulei Yang, Min Wu, Xue Geng", "tldr": "", "abstract": "Current methods for training Binarized Neural Networks (BNNs) heavily rely on the heuristic straight-through estimator (STE), which crucially enables the application of SGD-based optimizers to the combinatorial training problem. Although the STE heuristics and their variants have led to significant improvements in BNN performance, their theoretical underpinnings remain unclear and relatively understudied. In this paper, we propose a theoretically motivated optimization framework for BNN training based on Gaussian variational inference. In its simplest form, our approach yields a non-convex linear programming formulation whose variables and associated gradients motivate the use of latent weights and STE gradients. More importantly, our framework allows us to formulate semidefinite programming (SDP) relaxations to the BNN training task. Such formulations are able to explicitly models pairwise correlations between weights during training, leading to a more accurate optimization characterization of the training problem. As the size of such formulations grows quadratically in the number of weights, quickly becoming intractable for large networks, we apply the Burer-Monteiro approach and only optimize over linear-size low-rank SDP solutions. Our empirical evaluation on CIFAR-10, CIFAR-100, Tiny-ImageNet and ImageNet datasets shows our method consistently outperforming all state-of-the-art algorithms for training BNNs.", "keywords": "Binarized Neural Networks; Variational Inference; Semi-definite Programming;", "primary_area": "optimization_for_deep_networks", "supplementary_material": "/attachment/c4b74641cc61bcdacd3d1f8d07c578ef9f030b30.zip", "author": "Lorenzo Orecchia;Jiawei Hu;Xue He;Wang Zhe Mark;Xulei Yang;Min Wu;Xue Geng", "authorids": "~Lorenzo_Orecchia1;~Jiawei_Hu3;~Xue_He2;~Wang_Zhe_Mark1;~Xulei_Yang2;~Min_Wu2;~Xue_Geng1", "gender": "M;;F;M;M;F;M", "homepage": "http://orecchia.net;https://scholar.google.com/citations?user=sMeV_90AAAAJ&hl=en;;https://dawdleryang.github.io/;https://sites.google.com/site/wumincf/;;https://www.linkedin.com/in/wangzhemark/?originalSubdomain=sg", "dblp": "32/4340;;;91/10215;16/0-8;149/3281;", "google_scholar": "https://scholar.google.com.tw/citations?user=dT7yOrwAAAAJ;sMeV_90AAAAJ;;https://scholar.google.com.sg/citations?user=tXkwIK8AAAAJ;https://scholar.google.com.sg/citations?user=Hji1uWQAAAAJ;ZYVZ1bgAAAAJ;Xqu6fAkAAAAJ", "orcid": ";;;0000-0002-7002-4564;0000-0003-0977-3600;;", "linkedin": ";;;yangxulei/?originalSubdomain=sg;;xue-geng-9963b95a/;", "or_profile": "~Lorenzo_Orecchia1;~Jiawei_Hu3;~Xue_He2;~Xulei_Yang2;~Min_Wu2;~Xue_Geng1;~Zhe_Wang12", "aff": "University of Chicago;Georgia Institute of Technology;Northeastern University;I2R, A*STAR;Institute for Infocomm Research (I2R), A*STAR;Institute for Infocomm Research, A*STAR;, A*STAR", "aff_domain": "uchicago.edu;gatech.edu;neu.edu;i2r.a-star.edu.sg;i2r.a-star.edu.sg;i2r.a-star.edu.sg;i2r.a-star.edu.sg", "position": "Assistant Professor;PhD student;PhD student;Principal Researcher;Principal Researcher;Research Scientist;Researcher", "bibtex": "@inproceedings{\norecchia2024training,\ntitle={Training Binary Neural Networks via Gaussian Variational Inference and Low-Rank Semidefinite Programming},\nauthor={Lorenzo Orecchia and Jiawei Hu and Xue He and Wang Zhe Mark and Xulei Yang and Min Wu and Xue Geng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=cIXETwTkhK}\n}", "github": "", "reviewers": "M9aW;6N2m;WzdJ;Bkmh;NfQd", "pdf_size": 419926, "rating": "3;6;6;6;7", "confidence": "3;4;3;3;4", "soundness": "2;4;3;3;2", "novelty": "2;2;3;3;3", "presentation": "3;3;3;3;3", "wc_summary": "88;184;139;61;50", "wc_strengths": "24;76;190;76;57", "wc_weaknesses": "251;176;113;59;109", "wc_questions": "1;241;104;99;78", "wc_limitations": "1;28;5;1;4", "wc_review": "365;705;551;296;298", "wc_reply_reviewers": "270;44;303;0;0", "wc_reply_authors": "563;29;143;0;0", "reply_reviewers": "2;1;1;0;0", "reply_authors": "3;2;2;1;1", "rating_avg": [ 5.6, 1.3564659966250536 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 104.4, 50.28956154113893 ], "wc_strengths_avg": [ 84.6, 56.01999642984637 ], "wc_weaknesses_avg": [ 141.6, 66.11384121347056 ], "wc_questions_avg": [ 104.6, 77.55926766028674 ], "wc_limitations_avg": [ 7.8, 10.225458424931373 ], "wc_review_avg": [ 443.0, 160.63997011951915 ], "wc_reply_reviewers_avg": [ 123.4, 134.5415920821513 ], "wc_reply_authors_avg": [ 147.0, 214.57586071131115 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.5417363388859615, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:e6TzPGHRsFIJ:scholar.google.com/&scioq=Training+Binary+Neural+Networks+via+Gaussian+Variational+Inference+and+Low-Rank+Semidefinite+Programming&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "uchicago.edu;gatech.edu;neu.edu;i2r.a-star.edu.sg;i2r.a-star.edu.sg;i2r.a-star.edu.sg;i2r.a-star.edu.sg", "author_num": 7, "aff_unique_index": "0;1;2;3;4;4;3", "aff_unique_norm": "University of Chicago;Georgia Institute of Technology;Northeastern University;A*STAR;Institute for Infocomm Research", "aff_unique_dep": ";;;Institute for Infocomm Research;", "aff_unique_url": "https://www.uchicago.edu;https://www.gatech.edu;https://www.northeastern.edu;https://www.a-star.edu.sg;https://www.i2r.a-star.edu.sg", "aff_unique_abbr": "UChicago;Georgia Tech;NEU;A*STAR;I2R", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;1;1;1", "aff_country_unique": "United States;Singapore" }, { "title": "No-Regret Bandit Exploration based on Soft Tree Ensemble Model", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94426", "id": "cKKXBhyijL", "proceeding": "", "pdf": "https://openreview.net/pdf?id=cKKXBhyijL", "openreview": "https://openreview.net/forum?id=cKKXBhyijL", "poster": "", "project": "", "author_site": "Shogo Iwazaki, Shinya Suzumura", "tldr": "", "abstract": "We propose a novel stochastic bandit algorithm that employs reward estimates using a tree ensemble model. Specifically, our focus is on a soft tree model, a variant of the conventional decision tree that has undergone both practical and theoretical scrutiny in recent years. By deriving several non-trivial properties of soft trees, we extend the existing analytical techniques used for neural bandit algorithms to our soft tree-based algorithm. We demonstrate that our algorithm achieves a smaller cumulative regret compared to the existing ReLU-based neural bandit algorithms. We also show that this advantage comes with a trade-off: the hypothesis space of the soft tree ensemble model is more constrained than that of a ReLU-based neural network.", "keywords": "neural bandits; tree ensemble model; kernel bandits;", "primary_area": "bandits", "supplementary_material": "", "author": "Shogo Iwazaki;Shinya Suzumura", "authorids": "~Shogo_Iwazaki1;~Shinya_Suzumura1", "gender": "M;", "homepage": ";https://ssuzumura.github.io/", "dblp": "251/9091;118/8255", "google_scholar": ";https://scholar.google.co.jp/citations?user=aBCRzcatBcYC", "orcid": ";", "linkedin": "shogo-iwazaki-0692a1185/;", "or_profile": "~Shogo_Iwazaki1;~Shinya_Suzumura1", "aff": "LY Corporation;LY Corporation", "aff_domain": "lycorp.co.jp;yahoo-corp.jp", "position": "Researcher;Researcher", "bibtex": "@inproceedings{\niwazaki2024noregret,\ntitle={No-Regret Bandit Exploration based on Soft Tree Ensemble Model},\nauthor={Shogo Iwazaki and Shinya Suzumura},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=cKKXBhyijL}\n}", "github": "", "reviewers": "X7L7;PrZ2;ghBb;SWN3", "pdf_size": 1018878, "rating": "5;5;5;7", "confidence": "3;4;4;3", "soundness": "3;3;3;4", "novelty": "3;3;2;3", "presentation": "2;1;3;4", "wc_summary": "85;57;13;85", "wc_strengths": "65;34;16;52", "wc_weaknesses": "27;150;22;81", "wc_questions": "40;72;29;27", "wc_limitations": "5;48;7;21", "wc_review": "222;361;87;266", "wc_reply_reviewers": "0;8;0;26", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 60.0, 29.444863728670914 ], "wc_strengths_avg": [ 41.75, 18.498310733685926 ], "wc_weaknesses_avg": [ 70.0, 51.657526073167695 ], "wc_questions_avg": [ 42.0, 18.01388353465182 ], "wc_limitations_avg": [ 20.25, 17.166464400102893 ], "wc_review_avg": [ 234.0, 98.62301962523759 ], "wc_reply_reviewers_avg": [ 8.5, 10.618380290797651 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ha_yJBanLIcJ:scholar.google.com/&scioq=No-Regret+Bandit+Exploration+based+on+Soft+Tree+Ensemble+Model&hl=en&as_sdt=0,44", "gs_version_total": 0, "email": "lycorp.co.jp;yahoo-corp.jp", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "LY Corporation", "aff_unique_dep": "", "aff_unique_url": "", "aff_unique_abbr": "", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "", "aff_country_unique": "" }, { "title": "WebUOT-1M: Advancing Deep Underwater Object Tracking with A Million-Scale Benchmark", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97605", "id": "cLS4fLIA5P", "proceeding": "", "pdf": "https://openreview.net/pdf?id=cLS4fLIA5P", "openreview": "https://openreview.net/forum?id=cLS4fLIA5P", "poster": "/media/PosterPDFs/NeurIPS%202024/97605.png?t=1731225608.3998625", "project": "", "author_site": "Chunhui Zhang, Li Liu, Guanjie Huang, Hao Wen, XI ZHOU, Yanfeng Wang", "tldr": "", "abstract": "Underwater Object Tracking (UOT) is essential for identifying and tracking submerged objects in underwater videos, but existing datasets are limited in scale, diversity of target categories and scenarios covered, impeding the development of advanced tracking algorithms. To bridge this gap, we take the first step and introduce WebUOT-1M, \\ie, the largest public UOT benchmark to date, sourced from complex and realistic underwater environments. It comprises 1.1 million frames across 1,500 video clips filtered from 408 target categories, largely surpassing previous UOT datasets, \\eg, UVOT400. Through meticulous manual annotation and verification, we provide high-quality bounding boxes for underwater targets. Additionally, WebUOT-1M includes language prompts for video sequences, expanding its application areas, \\eg, underwater vision-language tracking. Given that most existing trackers are designed for open-air conditions and perform poorly in underwater environments due to domain gaps, we propose a novel framework that uses omni-knowledge distillation to train a student Transformer model effectively. To the best of our knowledge, this framework is the first to effectively transfer open-air domain knowledge to the UOT model through knowledge distillation, as demonstrated by results on both existing UOT datasets and the newly proposed WebUOT-1M. We have thoroughly tested WebUOT-1M with 30 deep trackers, showcasing its potential as a benchmark for future UOT research. The complete dataset, along with codes and tracking results, are publicly accessible at \\href{https://github.com/983632847/Awesome-Multimodal-Object-Tracking}{\\color{magenta}{here}}.", "keywords": "Underwater Object Tracking;Benchmark;Omni-knowledge Distillation", "primary_area": "", "supplementary_material": "/attachment/53cecf1070a466c764557ac068699e9683c62562.pdf", "author": "Chunhui Zhang;Li Liu;Guanjie Huang;Hao Wen;XI ZHOU;Yanfeng Wang", "authorids": "~Chunhui_Zhang2;~Li_Liu8;~Guanjie_Huang1;~Hao_Wen7;~XI_ZHOU2;~Yanfeng_Wang1", "gender": "M;F;;M;M;M", "homepage": "https://chunhui-zhang.github.io/chunhui-zhang/;https://liliu-avril.github.io/;;;;https://cmic.sjtu.edu.cn/wangyanfeng/", "dblp": "https://dblp.uni-trier.de/pid/62/3401;33/4528-36;;;42/5705;55/5407-1.html", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;KQ2S01UAAAAJ;;;;https://scholar.google.com/citations?hl=zh-CN", "orcid": "0000-0002-9017-1828;;;;0000-0001-9943-5482;0000-0002-3196-2347", "linkedin": ";;;;;", "or_profile": "~Chunhui_Zhang2;~Li_Liu8;~Guanjie_Huang1;~Hao_Wen7;~XI_ZHOU2;~Yanfeng_Wang1", "aff": "Shanghai Jiaotong University;The Hong Kong University of Science and Technology (Guangzhou);;Cloudwalk;;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;hkust-gz.edu.cn;;cloudwalk.com;;sjtu.edu.cn", "position": "PhD student;Assistant Professor;;Postdoc;;Full Professor", "bibtex": "@inproceedings{\nzhang2024webuotm,\ntitle={Web{UOT}-1M: Advancing Deep Underwater Object Tracking with A Million-Scale Benchmark},\nauthor={Chunhui Zhang and Li Liu and Guanjie Huang and Hao Wen and XI ZHOU and Yanfeng Wang},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=cLS4fLIA5P}\n}", "github": "", "reviewers": "9mKy;Fqcu;7sq1;iASE", "pdf_size": 0, "rating": "6;6;7;7", "confidence": "4;5;4;4", "wc_summary_and_contributions": "41;284;48;68", "wc_strengths": "28;42;25;47", "wc_improvement": "7;57;5;46", "wc_limitations": "1;275;13;9", "wc_correctness": "1;48;9;8", "wc_clarity": "1;134;4;8", "wc_relation_to_prior_work": "1;14;11;13", "wc_documentation": "1;1;2;17", "wc_additional_feedback": "1;1;1;1", "wc_review": "82;856;118;217", "wc_reply_reviewers": "0;92;179;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;2;0", "reply_authors": "2;7;7;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 110.25, 100.80271573722605 ], "wc_strengths_avg": [ 35.5, 9.233092656309694 ], "wc_improvement_avg": [ 28.75, 23.09085316743407 ], "wc_limitations_avg": [ 74.5, 115.83932838203094 ], "wc_correctness_avg": [ 16.5, 18.445866745696716 ], "wc_clarity_avg": [ 36.75, 56.202201914159915 ], "wc_relation_to_prior_work_avg": [ 9.75, 5.165994579942956 ], "wc_documentation_avg": [ 5.25, 6.796138609534093 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 318.25, 314.3806411024699 ], "wc_reply_reviewers_avg": [ 67.75, 74.40556094809044 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 4.5, 2.5 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12881324296765140754&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "sjtu.edu.cn;hkust-gz.edu.cn;;cloudwalk.com;;sjtu.edu.cn", "author_num": 6, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Shanghai Jiao Tong University;Hong Kong University of Science and Technology;Cloudwalk", "aff_unique_dep": ";;", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.ust.hk;", "aff_unique_abbr": "SJTU;HKUST;", "aff_campus_unique_index": "1", "aff_campus_unique": ";Guangzhou", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China;" }, { "title": "LINGOLY: A Benchmark of Olympiad-Level Linguistic Reasoning Puzzles in Low Resource and Extinct Languages", "status": "Oral", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97604", "id": "cLga8GStdk", "proceeding": "", "pdf": "https://openreview.net/pdf?id=cLga8GStdk", "openreview": "https://openreview.net/forum?id=cLga8GStdk", "poster": "/media/PosterPDFs/NeurIPS%202024/97604.png?t=1733220369.3277361", "project": "", "author_site": "Andrew M. Bean, Simeon Hellsten, Harry Mayne, Jabez Magomere, Ethan Chi, Ryan Chi, Scott Hale, Hannah Rose Kirk", "tldr": "", "abstract": "In this paper, we present the LingOly benchmark, a novel benchmark for advanced reasoning abilities in large language models. Using challenging Linguistic Olympiad puzzles, we evaluate (i) capabilities for in-context identification and generalisation of linguistic patterns in very low-resource or extinct languages, and (ii) abilities to follow complex task instructions. The LingOly benchmark covers more than 90 mostly low-resource languages, minimising issues of data contamination, and contains 1,133 problems across 6 formats and 5 levels of human difficulty. We assess performance with both direct accuracy and comparison to a no-context baseline to penalise memorisation. Scores from 11 state-of-the-art LLMs demonstrate the benchmark to be challenging, and models perform poorly on the higher difficulty problems. On harder problems, even the top model only achieved 38.7% accuracy, a 24.7% improvement over the no-context baseline. Large closed models typically outperform open models, and in general, the higher resource the language, the better the scores. These results indicate, in absence of memorisation, true multi-step out-of-domain reasoning remains a challenge for current language models.", "keywords": "reasoning;language models;benchmark;linguistics", "primary_area": "", "supplementary_material": "/attachment/c0ef9c8d638fbff03d3b493e3a18bd2a3e0c5297.pdf", "author": "Andrew Michael Bean;Simeon Hellsten;Harry Mayne;Jabez Magomere;Ethan A Chi;Ryan Andrew Chi;Scott A. Hale;Hannah Rose Kirk", "authorids": "~Andrew_Michael_Bean1;~Simeon_Hellsten1;~Harry_Mayne1;~Jabez_Magomere1;~Ethan_A_Chi1;~Ryan_Andrew_Chi1;~Scott_A._Hale1;~Hannah_Rose_Kirk1", "gender": "M;M;M;;M;Not Specified;F;M", "homepage": "https://sites.google.com/view/simihellsten;https://www.harrymayne.com;https://www.oii.ox.ac.uk/people/profiles/jabez-magomere/;http://ethanachi.com;http://ryanachi.com;http://scott.hale.us;https://www.hannahrosekirk.com/;https://www.am-bean.github.io", "dblp": ";;;255/5787;;32/10840;284/9434;244/9323", "google_scholar": ";XdD4wp4AAAAJ;;dRUwDFQAAAAJ;;PBJL9ZEAAAAJ;Fha8ldEAAAAJ;https://scholar.google.com.mx/citations?hl=en", "orcid": ";;;;;0000-0002-6894-4951;0000-0002-7419-5993;0000-0001-8439-5975", "linkedin": "simi-hellsten-b871331b5/?originalSubdomain=uk;;jabez-magomere/;https://linkedin.com/ethanchi;;https://linkedin.com/in/computermacgyver;hannah-rose-kirk;", "or_profile": "~Simeon_Hellsten1;~Harry_Mayne1;~Jabez_Magomere1;~Ethan_A_Chi1;~Ryan_Andrew_Chi1;~Scott_A._Hale1;~Hannah_Rose_Kirk1;~Andrew_Bean1", "aff": "University of Oxford;University of Oxford;J.P. Morgan Chase;Hudson River Trading;Stanford University;Alan Turing Institute;University of Oxford;University of Oxford", "aff_domain": "ox.ac.uk;ox.ac.uk;jpmorgan.com;hudson-trading.com;stanford.edu;turing.ac.uk;ox.ac.uk;ox.ac.uk", "position": "Undergrad student;PhD student;Intern;Researcher;Undergrad student;Fellow;PhD student;PhD student", "bibtex": "@inproceedings{\nbean2024lingoly,\ntitle={{LINGOLY}: A Benchmark of Olympiad-Level Linguistic Reasoning Puzzles in Low Resource and Extinct Languages},\nauthor={Andrew Michael Bean and Simeon Hellsten and Harry Mayne and Jabez Magomere and Ethan A Chi and Ryan Andrew Chi and Scott A. Hale and Hannah Rose Kirk},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=cLga8GStdk}\n}", "github": "", "reviewers": "zdF1;vFZb;3gU2;CkJb", "pdf_size": 6049016, "rating": "7;8;9;10", "confidence": "4;5;4;4", "wc_summary_and_contributions": "32;45;47;222", "wc_strengths": "16;148;13;76", "wc_improvement": "3;63;76;55", "wc_limitations": "1;15;1;8", "wc_correctness": "1;18;1;12", "wc_clarity": "1;31;1;4", "wc_relation_to_prior_work": "1;1;1;21", "wc_documentation": "1;1;1;15", "wc_additional_feedback": "1;1;1;1", "wc_review": "57;323;142;414", "wc_reply_reviewers": "185;15;7;310", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "2;1;1;2", "reply_authors": "3;1;1;3", "rating_avg": [ 8.5, 1.118033988749895 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 86.5, 78.44265421312565 ], "wc_strengths_avg": [ 63.25, 55.00624964492671 ], "wc_improvement_avg": [ 49.25, 27.73422975314079 ], "wc_limitations_avg": [ 6.25, 5.80409338312195 ], "wc_correctness_avg": [ 8.0, 7.314369419163897 ], "wc_clarity_avg": [ 9.25, 12.616952880945542 ], "wc_relation_to_prior_work_avg": [ 6.0, 8.660254037844387 ], "wc_documentation_avg": [ 4.5, 6.06217782649107 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 234.0, 141.52208308246455 ], "wc_reply_reviewers_avg": [ 129.25, 126.27029539840318 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.0, 1.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.2581988897471611, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=697624496231088115&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "ox.ac.uk;ox.ac.uk;jpmorgan.com;hudson-trading.com;stanford.edu;turing.ac.uk;ox.ac.uk;ox.ac.uk", "author_num": 8, "aff_unique_index": "0;0;1;2;3;4;0;0", "aff_unique_norm": "University of Oxford;JPMorgan Chase & Co.;Hudson River Trading;Stanford University;Alan Turing Institute", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.ox.ac.uk;https://www.jpmorganchase.com;https://www.hudsonrivertarding.com;https://www.stanford.edu;https://www.turing.ac.uk", "aff_unique_abbr": "Oxford;JPM;HRT;Stanford;ATI", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;1;1;1;0;0;0", "aff_country_unique": "United Kingdom;United States" }, { "title": "Stopping Bayesian Optimization with Probabilistic Regret Bounds", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94425", "id": "cM2gU9XGti", "proceeding": "", "pdf": "https://openreview.net/pdf?id=cM2gU9XGti", "openreview": "https://openreview.net/forum?id=cM2gU9XGti", "poster": "", "project": "", "author_site": "James Wilson", "tldr": "", "abstract": "Bayesian optimization is a popular framework for efficiently tackling black-box search problems. As a rule, these algorithms operate by iteratively choosing what to evaluate next until some predefined budget has been exhausted. We investigate replacing this de facto stopping rule with criteria based on the probability that a point satisfies a given set of conditions. We focus on the prototypical example of an $(\\epsilon, \\delta)$-criterion: stop when a solution has been found whose value is within $\\epsilon > 0$ of the optimum with probability at least $1 - \\delta$ under the model. For Gaussian process priors, we show that Bayesian optimization satisfies this criterion under mild technical assumptions. Further, we give a practical algorithm for evaluating Monte Carlo stopping rules in a manner that is both sample efficient and robust to estimation error. These findings are accompanied by empirical results which demonstrate the strengths and weaknesses of the proposed approach.", "keywords": "Bayesian optimization;Gaussian processes", "primary_area": "probabilistic_methods", "supplementary_material": "/attachment/9336f19389028866ce1f410b87a65d189772d14b.zip", "author": "James T. Wilson", "authorids": "~James_T._Wilson1", "gender": "M", "homepage": "", "dblp": "162/0025", "google_scholar": "", "orcid": "", "linkedin": "", "or_profile": "~James_T._Wilson1", "aff": "", "aff_domain": "", "position": "", "bibtex": "@inproceedings{\nwilson2024stopping,\ntitle={Stopping Bayesian Optimization with Probabilistic Regret Bounds},\nauthor={James T. Wilson},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=cM2gU9XGti}\n}", "github": "", "reviewers": "cuEF;GLW6;5ozp;pJBb", "pdf_size": 649661, "rating": "4;5;6;7", "confidence": "3;3;4;3", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "1;3;3;3", "wc_summary": "42;58;52;146", "wc_strengths": "26;32;50;102", "wc_weaknesses": "120;108;45;1", "wc_questions": "154;63;332;30", "wc_limitations": "2;5;22;1", "wc_review": "344;266;501;280", "wc_reply_reviewers": "539;89;246;68", "wc_reply_authors": "658;236;205;0", "reply_reviewers": "4;1;2;1", "reply_authors": "4;2;2;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 74.5, 41.674332628129754 ], "wc_strengths_avg": [ 52.5, 29.912372022292047 ], "wc_weaknesses_avg": [ 68.5, 48.27266307134919 ], "wc_questions_avg": [ 144.75, 117.25906148353738 ], "wc_limitations_avg": [ 7.5, 8.5 ], "wc_review_avg": [ 347.75, 93.23726454588852 ], "wc_reply_reviewers_avg": [ 235.5, 188.2425297322579 ], "wc_reply_authors_avg": [ 274.75, 239.1311094358072 ], "reply_reviewers_avg": [ 2.0, 1.224744871391589 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.2581988897471611, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10087952851450722458&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "", "author_num": 1 }, { "title": "One-to-Multiple: A Progressive Style Transfer Unsupervised Domain-Adaptive Framework for Kidney Tumor Segmentation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94424", "id": "cMwSoXLCVi", "proceeding": "", "pdf": "https://openreview.net/pdf?id=cMwSoXLCVi", "openreview": "https://openreview.net/forum?id=cMwSoXLCVi", "poster": "/media/PosterPDFs/NeurIPS%202024/94424.png?t=1731552743.0515268", "project": "", "author_site": "Kai Hu, JinHao Li, Yuan Zhang, Xiongjun Ye, Xieping Gao", "tldr": "", "abstract": "In multi-sequence Magnetic Resonance Imaging (MRI), the accurate segmentation of the kidney and tumor based on traditional supervised methods typically necessitates detailed annotation for each sequence, which is both time-consuming and labor-intensive. Unsupervised Domain Adaptation (UDA) methods can effectively mitigate inter-domain differences by aligning cross-modal features, thereby reducing the annotation burden. However, most existing UDA methods are limited to one-to-one domain adaptation, which tends to be inefficient and resource-intensive when faced with multi-target domain transfer tasks. To address this challenge, we propose a novel and efficient One-to-Multiple Progressive Style Transfer Unsupervised Domain-Adaptive (PSTUDA) framework for kidney and tumor segmentation in multi-sequence MRI. Specifically, we develop a multi-level style dictionary to explicitly store the style information of each target domain at various stages, which alleviates the burden of a single generator in a multi-target transfer task and enables effective decoupling of content and style. Concurrently, we employ multiple cascading style fusion modules that utilize point-wise instance normalization to progressively recombine content and style features, which enhances cross-modal alignment and structural consistency. Experiments conducted on the private MSKT and public KiTS19 datasets demonstrate the superiority of the proposed PSTUDA over comparative methods in multi-sequence kidney and tumor segmentation. The average Dice Similarity Coefficients are increased by at least 1.8% and 3.9%, respectively. Impressively, our PSTUDA not only significantly reduces the floating-point computation by approximately 72% but also reduces the number of model parameters by about 50%, bringing higher efficiency and feasibility to practical clinical applications.", "keywords": "Unsupervised Domain Adaptation;Multi-sequence MRI Segmentation;One-to-Multiple;Progressive Style Transfer", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "", "author": "Kai Hu;Jinhao Li;Yuan Zhang;Xiongjun Ye;Xieping Gao", "authorids": "~Kai_Hu6;~Jinhao_Li4;~Yuan_Zhang27;~Xiongjun_Ye1;~Xieping_Gao1", "gender": "M;M;F;M;M", "homepage": ";https://github.com/Ljh-11;https://www.researchgate.net/profile/Yuan-Zhang-260;https://baike.baidu.com/item/%E5%8F%B6%E9%9B%84%E4%BF%8A/2039794?fr=aladdin;https://cise.hunnu.edu.cn/info/1078/4071.htm", "dblp": "57/6633-2.html;;;;94/4344.html", "google_scholar": "https://scholar.google.com.hk/citations?hl=zh-CN;;;;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Kai_Hu6;~Jinhao_Li4;~Yuan_Zhang27;~Xiongjun_Ye1;~Xieping_Gao1", "aff": "Xiangtan University;Xiangtan University;Xiangtan University;Cancer Hospital, Chinese Academy of Medical Sciences and Peking Union Medical College;Hunan Normal University", "aff_domain": "xtu.edu.cn;xtu.edu.cn;xtu.edu.cn;cicams.ac.cn;hunnu.edu.cn", "position": "Full Professor;MS student;Assistant Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nhu2024onetomultiple,\ntitle={One-to-Multiple: A Progressive Style Transfer Unsupervised Domain-Adaptive Framework for Kidney Tumor Segmentation},\nauthor={Kai Hu and Jinhao Li and Yuan Zhang and Xiongjun Ye and Xieping Gao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=cMwSoXLCVi}\n}", "github": "", "reviewers": "tgyp;yJwR;pquR", "pdf_size": 2934779, "rating": "4;5;8", "confidence": "4;4;5", "soundness": "3;2;3", "novelty": "2;3;3", "presentation": "3;3;3", "wc_summary": "102;56;97", "wc_strengths": "56;52;174", "wc_weaknesses": "37;161;62", "wc_questions": "144;5;150", "wc_limitations": "10;1;7", "wc_review": "349;275;490", "wc_reply_reviewers": "71;25;115", "wc_reply_authors": "460;559;34", "reply_reviewers": "1;1;1", "reply_authors": "3;3;2", "rating_avg": [ 5.666666666666667, 1.699673171197595 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 85.0, 20.607442021431645 ], "wc_strengths_avg": [ 94.0, 56.592107812544555 ], "wc_weaknesses_avg": [ 86.66666666666667, 53.543336549834926 ], "wc_questions_avg": [ 99.66666666666667, 66.98424358674873 ], "wc_limitations_avg": [ 6.0, 3.7416573867739413 ], "wc_review_avg": [ 371.3333333333333, 89.18270883728277 ], "wc_reply_reviewers_avg": [ 70.33333333333333, 36.745370078721784 ], "wc_reply_authors_avg": [ 351.0, 227.76742523899242 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.9707253433941508, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:eUgxfRBEjqkJ:scholar.google.com/&scioq=One-to-Multiple:+A+Progressive+Style+Transfer+Unsupervised+Domain-Adaptive+Framework+for+Kidney+Tumor+Segmentation&hl=en&as_sdt=0,44", "gs_version_total": 0, "email": "xtu.edu.cn;xtu.edu.cn;xtu.edu.cn;cicams.ac.cn;hunnu.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;1;2", "aff_unique_norm": "Xiangtan University;Chinese Academy of Medical Sciences;Hunan Normal University", "aff_unique_dep": ";Cancer Hospital;", "aff_unique_url": "http://www.xtu.edu.cn/;http://www.cams.ac.cn;http://www.hnu.edu.cn", "aff_unique_abbr": "XTU;CAMS;HNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "cO1llRY2Br", "title": "Initializing and Retrofitting Key-Value Adaptors for Traceable Model Editing", "track": "main", "status": "Reject", "tldr": "", "abstract": "As the insight of knowledge storage in language models deepens, the ability to perform CRUD (Create, Read, Update, Delete) operations on language models becomes increasingly indispensable for satisfying the demands of managing rapidly updating knowledge. Considering the high cost of fine-tuning language models, model editing methods with low cost are usually required to manipulate models' knowledge. Evident suggests that modules carrying knowledge in a Transformer module are primarily the MLP blocks, thus we propose \\textbf{iReVa}, a method that explicitly initializes and retrofits key-value pairs into MLP blocks to construct a new mapping of a piece of knowledge without damaging the irrelevant knowledge. In comparison to existing methods, iReVa reveals better interpretability and stronger capacity for carrying traceable edits. Experiment results on series of GPT series models show our prominent performance on edit success and generalization without influencing specificity. We also perform the first attempt at conducting knowledge withdrawal test of iReVa. Our codes are available at github.com/thartvigsen/grace.", "keywords": "natural language processing;model editing;language model;key-value adaptor", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Hanlun Zhu;Yunshi Lan;Xiang Li;Weining Qian", "authorids": "~Hanlun_Zhu1;~Yunshi_Lan1;~Xiang_Li24;~Weining_Qian1", "gender": "M;F;M;", "homepage": "https://github.com/timberflow;https://lanyunshi.github.io;https://lixiang3776.github.io;", "dblp": "359/0727;185/6830.html;40/1491-67.html;55/3364", "google_scholar": "-yKPeckAAAAJ;Q0F92XIAAAAJ;JnxxNtsAAAAJ;", "orcid": ";0000-0002-0192-8498;0009-0003-0142-2483;", "linkedin": ";;;", "or_profile": "~Hanlun_Zhu1;~Yunshi_Lan1;~Xiang_Li24;~Weining_Qian1", "aff": "East China Normal University;East China Normal University;East China Normal University;", "aff_domain": "stu.ecnu.edu.cn;ecnu.edu.cn;ecnu.edu.cn;", "position": "MS student;Associate Professor;Full Professor;", "bibtex": "@misc{\nanonymous2024initializing,\ntitle={Initializing and Retrofitting Key-Value Adaptors for Traceable Model Editing},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=cO1llRY2Br}\n}", "github": "", "project": "", "reviewers": "T9Qb;vMmP;QEHi;QtC8", "site": "https://openreview.net/forum?id=cO1llRY2Br", "pdf_size": 946323, "rating": "5;5;7;7", "confidence": "4;4;3;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "2;2;3;3", "wc_summary": "36;47;74;161", "wc_strengths": "80;16;49;159", "wc_weaknesses": "216;61;231;129", "wc_questions": "1;26;2;3", "wc_limitations": "1;1;1;45", "wc_review": "334;151;357;497", "wc_reply_reviewers": "178;17;15;19", "wc_reply_authors": "451;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 79.5, 49.04334817281544 ], "wc_strengths_avg": [ 76.0, 52.99528280894442 ], "wc_weaknesses_avg": [ 159.25, 68.80543219833736 ], "wc_questions_avg": [ 8.0, 10.41633332799983 ], "wc_limitations_avg": [ 12.0, 19.05255888325765 ], "wc_review_avg": [ 334.75, 123.06984805385923 ], "wc_reply_reviewers_avg": [ 57.25, 69.72938763534354 ], "wc_reply_authors_avg": [ 112.75, 195.2887285533909 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:I3XM85Bbl9wJ:scholar.google.com/&scioq=Initializing+and+Retrofitting+Key-Value+Adaptors+for+Traceable+Model+Editing&hl=en&as_sdt=0,47", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "East China Normal University", "aff_unique_dep": "", "aff_unique_url": "http://www.ecnu.edu.cn", "aff_unique_abbr": "ECNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "PACE: Marrying generalization in PArameter-efficient fine-tuning with Consistency rEgularization", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94423", "id": "cOuLbPhOT1", "proceeding": "", "pdf": "https://openreview.net/pdf?id=cOuLbPhOT1", "openreview": "https://openreview.net/forum?id=cOuLbPhOT1", "poster": "/media/PosterPDFs/NeurIPS%202024/94423.png?t=1731230487.1318972", "project": "", "author_site": "Yao Ni, Shan Zhang, Piotr Koniusz", "tldr": "", "abstract": "Parameter-Efficient Fine-Tuning (PEFT) effectively adapts pre-trained transformers to downstream tasks. However, the optimization of tasks performance often comes at the cost of generalizability in fine-tuned models. To address this issue, we theoretically connect smaller weight gradient norms during training and larger datasets to the improvements in model generalization. Motivated by this connection, we propose reducing gradient norms for enhanced generalization and aligning fine-tuned model with the pre-trained counterpart to retain knowledge from large-scale pre-training data. Yet, naive alignment does not guarantee gradient reduction and can potentially cause gradient explosion, complicating efforts to manage gradients. To address such an issue, we propose PACE, marrying generalization of PArameter-efficient fine-tuning with Consistency rEgularization. We perturb features learned from the adapter with the multiplicative noise and ensure the fine-tuned model remains consistent for same sample under different perturbations. Theoretical analysis shows that PACE not only implicitly regularizes gradients for enhanced generalization, but also implicitly aligns the fine-tuned and pre-trained models to retain knowledge. Experimental evidence supports our theories. PACE surpasses existing PEFT methods in visual adaptation tasks (VTAB-1k, FGVC, few-shot learning, domain adaptation) showcasing its potential for resource-efficient fine-tuning. It also improves LoRA in text classification (GLUE) and mathematical reasoning (GSM-8K).", "keywords": "Generalization;Regularization;Transfer Learning;Parameter-Efficient Fine-Tuning", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Yao Ni;Shan Zhang;Piotr Koniusz", "authorids": "~Yao_Ni1;~Shan_Zhang1;~Piotr_Koniusz1", "gender": "M;F;", "homepage": "https://cecs.anu.edu.au/people/yao-ni;;https://www.koniusz.com", "dblp": "222/7928;14/6026;25/8616", "google_scholar": "oGD-WMQAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.co.uk/citations?user=wZ7-1tUAAAAJ", "orcid": ";0000-0002-5531-3296;0000-0002-6340-5289", "linkedin": "yao-ni-855083221;;", "or_profile": "~Yao_Ni1;~Shan_Zhang1;~Piotr_Koniusz1", "aff": "Mitsubishi Electric Research Labs;Australian National University;Data61, CSIRO", "aff_domain": "merl.com;anu.edu.au;data61.csiro.au", "position": "Intern;PhD student;Principal Researcher", "bibtex": "@inproceedings{\nni2024pace,\ntitle={{PACE}: marrying the generalization of {PA}rameter-efficient fine-tuning with Consistency rEgularization},\nauthor={Yao Ni and Shan Zhang and Piotr Koniusz},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=cOuLbPhOT1}\n}", "github": "", "reviewers": "JSoa;neYP;bhst;sUXG", "pdf_size": 974537, "rating": "5;5;7;7", "confidence": "4;5;3;3", "soundness": "3;2;4;3", "novelty": "2;2;3;3", "presentation": "3;3;4;3", "wc_summary": "60;23;42;121", "wc_strengths": "22;22;162;70", "wc_weaknesses": "207;345;233;310", "wc_questions": "23;4;32;56", "wc_limitations": "10;1;6;5", "wc_review": "322;395;475;562", "wc_reply_reviewers": "56;90;43;365", "wc_reply_authors": "383;330;244;1078", "reply_reviewers": "1;1;1;2", "reply_authors": "2;2;2;4", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 61.5, 36.7593525514256 ], "wc_strengths_avg": [ 69.0, 57.15767664977295 ], "wc_weaknesses_avg": [ 273.75, 55.91679085927589 ], "wc_questions_avg": [ 28.75, 18.699933154960743 ], "wc_limitations_avg": [ 5.5, 3.2015621187164243 ], "wc_review_avg": [ 438.5, 89.51117248701415 ], "wc_reply_reviewers_avg": [ 138.5, 131.89105352524862 ], "wc_reply_authors_avg": [ 508.75, 332.37883130548494 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.9045340337332909, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6212024612645348195&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "merl.com;anu.edu.au;data61.csiro.au", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Mitsubishi Electric Research Laboratories;Australian National University;CSIRO", "aff_unique_dep": ";;Data61", "aff_unique_url": "https://www.merl.com;https://www.anu.edu.au;https://www.csiro.au", "aff_unique_abbr": "MERL;ANU;CSIRO", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United States;Australia" }, { "title": "Text-Guided Attention is All You Need for Zero-Shot Robustness in Vision-Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94422", "id": "cOw65A9FGf", "proceeding": "", "pdf": "https://openreview.net/pdf?id=cOw65A9FGf", "openreview": "https://openreview.net/forum?id=cOw65A9FGf", "poster": "/media/PosterPDFs/NeurIPS%202024/94422.png?t=1731674742.9536748", "project": "", "author_site": "Lu Yu, Haiyang Zhang, Changsheng Xu", "tldr": "", "abstract": "Due to the impressive zero-shot capabilities, pre-trained vision-language models (e.g. CLIP), have attracted widespread attention and adoption across various domains. Nonetheless, CLIP has been observed to be susceptible to adversarial examples. Through experimental analysis, we have observed a phenomenon wherein adversarial perturbations induce shifts in text-guided attention. Building upon this observation, we propose a simple yet effective strategy: Text-Guided Attention for Zero-Shot Robustness (TGA-ZSR). This framework incorporates two components: the Attention Refinement module and the Attention-based Model Constraint module. Our goal is to maintain the generalization of the CLIP model and enhance its adversarial robustness: The Attention Refinement module aligns the text-guided attention obtained from the target model via adversarial examples with the text-guided attention acquired from the original model via clean examples. This alignment enhances the model\u2019s robustness. Additionally, the Attention-based Model Constraint module acquires text-guided attention from both the target and original models using clean examples. Its objective is to maintain model performance on clean samples while enhancing overall robustness. The experiments validate that our method yields a 9.58% enhancement in zero-shot robust accuracy over the current state-of-the-art techniques across 16 datasets. Our code is available at https://github.com/zhyblue424/TGA-ZSR.", "keywords": "adversarial attack;zero-shot robustness;pre-trained vision-language models", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Lu Yu;Haiyang Zhang;Changsheng Xu", "authorids": "~Lu_Yu3;~Haiyang_Zhang6;~Changsheng_Xu1", "gender": "F;M;M", "homepage": ";https://github.com/zhyblue424;", "dblp": "04/1781-4;;85/1301", "google_scholar": ";;https://scholar.google.com.sg/citations?user=hI9NRDkAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Lu_Yu3;~Haiyang_Zhang6;~Changsheng_Xu1", "aff": "Tianjin University of Technology;Tianjin University of Technology;Institute of Automation, Chinese Academy of Sciences", "aff_domain": "tjut.edu.cn;tjut.edu.cn;ia.ac.cn", "position": "Associate Professor;MS student;Full Professor", "bibtex": "@inproceedings{\nyu2024textguided,\ntitle={Text-Guided Attention is All You Need for Zero-Shot Robustness in Vision-Language Models},\nauthor={Lu Yu and Haiyang Zhang and Changsheng Xu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=cOw65A9FGf}\n}", "github": "", "reviewers": "mEBo;qndz;BFe6;w4iU", "pdf_size": 1618920, "rating": "5;5;6;6", "confidence": "4;3;5;2", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "4;2;4;3", "wc_summary": "86;55;59;65", "wc_strengths": "59;42;57;74", "wc_weaknesses": "208;50;29;44", "wc_questions": "2;46;30;121", "wc_limitations": "47;20;12;1", "wc_review": "402;213;187;305", "wc_reply_reviewers": "16;22;132;13", "wc_reply_authors": "25;0;799;25", "reply_reviewers": "1;1;3;1", "reply_authors": "2;1;3;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 66.25, 11.94518731540029 ], "wc_strengths_avg": [ 58.0, 11.335784048754634 ], "wc_weaknesses_avg": [ 82.75, 72.71648712637321 ], "wc_questions_avg": [ 49.75, 44.04755952376931 ], "wc_limitations_avg": [ 20.0, 16.98528775146303 ], "wc_review_avg": [ 276.75, 84.56469416961194 ], "wc_reply_reviewers_avg": [ 45.75, 49.90177852541931 ], "wc_reply_authors_avg": [ 212.25, 338.9139824498246 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8378465696481442273&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "tjut.edu.cn;tjut.edu.cn;ia.ac.cn", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Tianjin University of Technology;Chinese Academy of Sciences", "aff_unique_dep": ";Institute of Automation", "aff_unique_url": "http://www.tjut.edu.cn;http://www.ia.cas.cn", "aff_unique_abbr": "TUT;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Private Geometric Median", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94421", "id": "cPzjN7KABv", "proceeding": "", "pdf": "https://openreview.net/pdf?id=cPzjN7KABv", "openreview": "https://openreview.net/forum?id=cPzjN7KABv", "poster": "", "project": "", "author_site": "Mahdi Haghifam, Thomas Steinke, Jonathan Ullman", "tldr": "", "abstract": "In this paper, we study differentially private (DP) algorithms for computing the geometric median (GM) of a dataset: Given $n$ points, $x_1,\\dots,x_n$ in $\\mathbb{R}^d$, the goal is to find a point $\\theta$ that minimizes the sum of the Euclidean distances to these points, i.e., $\\sum_{i=1}^{n} \\lVert|\\theta - x_i\\rVert_2$. Off-the-shelf methods, such as DP-GD, require strong a priori knowledge locating the data within a ball of radius $R$, and the excess risk of the algorithm depends linearly on $R$. In this paper, we ask: can we design an efficient and private algorithm with an excess error guarantee that scales with the (unknown) radius containing the majority of the datapoints? Our main contribution is a pair of polynomial-time DP algorithms for the task of private GM with an excess error guarantee that scales with the effective diameter of the datapoints. Additionally, we propose an inefficient algorithm based on the inverse smooth sensitivity mechanism, which satisfies the more restrictive notion of pure DP. We complement our results with a lower bound and demonstrate the optimality of our polynomial-time algorithms in terms of sample complexity.", "keywords": "Differential Privacy;Differentially Private Convex Optimization;Geometric Median", "primary_area": "privacy", "supplementary_material": "/attachment/8f4cdf9d3e3f0d714e21ef25fc73fe235dd42689.zip", "author": "Mahdi Haghifam;Thomas Steinke;Jonathan Ullman", "authorids": "~Mahdi_Haghifam2;~Thomas_Steinke2;~Jonathan_Ullman1", "gender": "M;M;M", "homepage": "https://mhaghifam.github.io/mahdihaghifam/;http://www.thomas-steinke.net/;https://jonathan-ullman.github.io/", "dblp": "183/6215;https://dblp.uni-trier.de/pid/73/4025-2.html;02/8164", "google_scholar": "https://scholar.google.com/schhp?hl=en;kwnwhrgAAAAJ;https://scholar.google.com.tw/citations?user=WfS41RAAAAAJ", "orcid": ";;", "linkedin": ";thomas-steinke-2841248/;", "or_profile": "~Mahdi_Haghifam2;~Thomas_Steinke2;~Jonathan_Ullman1", "aff": "Northeastern University;Google;Northeastern University", "aff_domain": "neu.edu;google.com;northeastern.edu", "position": "Postdoc;Research Scientist;Associate Professor", "bibtex": "@inproceedings{\nhaghifam2024private,\ntitle={Private Geometric Median},\nauthor={Mahdi Haghifam and Thomas Steinke and Jonathan Ullman},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=cPzjN7KABv}\n}", "github": "", "reviewers": "M9WN;wFAP;7SSt;3ZzL;wp5E", "pdf_size": 689579, "rating": "6;7;7;7;8", "confidence": "4;3;2;3;4", "soundness": "3;3;3;3;4", "novelty": "3;3;3;3;4", "presentation": "2;3;3;3;3", "wc_summary": "234;78;112;131;177", "wc_strengths": "34;156;86;129;418", "wc_weaknesses": "138;38;1;116;40", "wc_questions": "2;57;1;32;1", "wc_limitations": "1;1;1;1;1", "wc_review": "409;330;201;409;637", "wc_reply_reviewers": "12;7;0;22;10", "wc_reply_authors": "25;19;0;20;14", "reply_reviewers": "1;1;0;1;1", "reply_authors": "2;2;1;2;2", "rating_avg": [ 7.0, 0.6324555320336759 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 146.4, 54.23873154858989 ], "wc_strengths_avg": [ 164.6, 133.25704484191445 ], "wc_weaknesses_avg": [ 66.6, 51.705318875334285 ], "wc_questions_avg": [ 18.6, 22.579636843846714 ], "wc_limitations_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 397.2, 141.98084377830693 ], "wc_reply_reviewers_avg": [ 10.2, 7.166589146867567 ], "wc_reply_authors_avg": [ 15.6, 8.546344247688598 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:vhtGpGtRD7IJ:scholar.google.com/&scioq=Private+Geometric+Median&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "neu.edu;google.com;northeastern.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Northeastern University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.northeastern.edu;https://www.google.com", "aff_unique_abbr": "NEU;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Improving Deep Reinforcement Learning by Reducing the Chain Effect of Value and Policy Churn", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94420", "id": "cQoAgPBARc", "proceeding": "", "pdf": "https://openreview.net/pdf?id=cQoAgPBARc", "openreview": "https://openreview.net/forum?id=cQoAgPBARc", "poster": "/media/PosterPDFs/NeurIPS%202024/94420.png?t=1731738289.398895", "project": "", "author_site": "Hongyao Tang, Glen Berseth", "tldr": "", "abstract": "Deep neural networks provide Reinforcement Learning (RL) powerful function approximators to address large-scale decision-making problems. However, these approximators introduce challenges due to the non-stationary nature of RL training. One source of the challenges in RL is that output predictions can churn, leading to uncontrolled changes after each batch update for states not included in the batch. Although such a churn phenomenon exists in each step of network training, it remains under-explored on how churn occurs and impacts RL. In this work, we start by characterizing churn in a view of Generalized Policy Iteration with function approximation, and we discover a chain effect of churn that leads to a cycle where the churns in value estimation and policy improvement compound and bias the learning dynamics throughout the iteration. Further, we concretize the study and focus on the learning issues caused by the chain effect in different settings, including greedy action deviation in value-based methods, trust region violation in proximal policy optimization, and dual bias of policy value in actor-critic methods. We then propose a method to reduce the chain effect across different settings, called Churn Approximated ReductIoN (CHAIN), which can be easily plugged into most existing DRL algorithms. Our experiments demonstrate the effectiveness of our method in both reducing churn and improving learning performance across online and offline, value-based and policy-based RL settings.", "keywords": "Reinforcement Learning;Deep Learning;Regularization and Optimization", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Hongyao Tang;Glen Berseth", "authorids": "~Hongyao_Tang1;~Glen_Berseth1", "gender": "M;M", "homepage": "https://bluecontra.github.io/;http://fracturedplane.com/", "dblp": "220/4275;147/5478", "google_scholar": "yIqzRH4AAAAJ;https://scholar.google.ca/citations?user=-WZcuuwAAAAJ", "orcid": ";0000-0001-7351-8028", "linkedin": ";glen-berseth-0523278b?trk=hp-identity-name", "or_profile": "~Hongyao_Tang1;~Glen_Berseth1", "aff": "Montreal Institute for Learning Algorithms, University of Montreal, Universit\u00e9 de Montr\u00e9al;Montreal Institute for Learning Algorithms, University of Montreal, Universit\u00e9 de Montr\u00e9al", "aff_domain": "mila.umontreal.ca;mila.umontreal.ca", "position": "Postdoc;Assistant Professor", "bibtex": "@inproceedings{\ntang2024improving,\ntitle={Improving Deep Reinforcement Learning by Reducing the Chain Effect of Value and Policy Churn},\nauthor={Hongyao Tang and Glen Berseth},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=cQoAgPBARc}\n}", "github": "", "reviewers": "DNMp;oLah;c7UJ;Qjzk", "pdf_size": 1764378, "rating": "5;6;6;8", "confidence": "5;4;2;5", "soundness": "2;2;3;4", "novelty": "3;2;3;3", "presentation": "3;3;3;3", "wc_summary": "47;90;179;92", "wc_strengths": "13;48;30;71", "wc_weaknesses": "21;577;168;456", "wc_questions": "359;34;4;44", "wc_limitations": "6;8;8;67", "wc_review": "446;757;389;730", "wc_reply_reviewers": "319;279;0;754", "wc_reply_authors": "1310;1155;189;1174", "reply_reviewers": "3;1;0;3", "reply_authors": "6;6;3;5", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 4.0, 1.224744871391589 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 102.0, 47.953102089437344 ], "wc_strengths_avg": [ 40.5, 21.523243250030884 ], "wc_weaknesses_avg": [ 305.5, 221.47742548621068 ], "wc_questions_avg": [ 110.25, 144.36823577227784 ], "wc_limitations_avg": [ 22.25, 25.849323008543184 ], "wc_review_avg": [ 580.5, 164.51823607126354 ], "wc_reply_reviewers_avg": [ 338.0, 269.7878796388007 ], "wc_reply_authors_avg": [ 957.0, 447.4164726516001 ], "reply_reviewers_avg": [ 1.75, 1.299038105676658 ], "reply_authors_avg": [ 5.0, 1.224744871391589 ], "replies_avg": [ 34, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.1873171623163388, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10560002890287214044&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "mila.umontreal.ca;mila.umontreal.ca", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Montreal", "aff_unique_dep": "Montreal Institute for Learning Algorithms", "aff_unique_url": "https://www.mila.quebec", "aff_unique_abbr": "MILA", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Montreal", "aff_country_unique_index": "0;0", "aff_country_unique": "Canada" }, { "title": "Robust Reinforcement Learning from Corrupted Human Feedback", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94419", "id": "cR2QDzdpEv", "proceeding": "", "pdf": "https://openreview.net/pdf?id=cR2QDzdpEv", "openreview": "https://openreview.net/forum?id=cR2QDzdpEv", "poster": "", "project": "", "author_site": "Alexander Bukharin, Ilgee Hong, Haoming Jiang, Zichong Li, Qingru Zhang, Zixuan Zhang, Tuo Zhao", "tldr": "", "abstract": "Reinforcement learning from human feedback (RLHF) provides a principled framework for aligning AI systems with human preference data. For various reasons, e.g., personal bias, context ambiguity, lack of training, etc, human annotators may give incorrect or inconsistent preference labels. To tackle this challenge, we propose a robust RLHF approach -- $R^3M$, which models the potentially corrupted preference label as sparse outliers. Accordingly, we formulate the robust reward learning as an $\\ell_1$-regularized maximum likelihood estimation problem. Computationally, we develop an efficient alternating optimization algorithm, which only incurs negligible computational overhead compared with the standard RLHF approach. Theoretically, we prove that under proper regularity conditions, $R^3M$ can consistently learn the underlying reward and identify outliers, provided that the number of outlier labels scales sublinearly with the preference sample size. Furthermore, we remark that $R^3M$ is versatile and can be extended to various preference optimization methods, including direct preference optimization (DPO). Our experiments on robotic control and natural language generation with large language models (LLMs) show that $R^3M$ improves robustness of the reward against several types of perturbations to the preference data.", "keywords": "Reinforcement Learning from Human Feedback;Robust Reward Learning", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Alexander Bukharin;Ilgee Hong;Haoming Jiang;Zichong Li;Qingru Zhang;Zixuan Zhang;Tuo Zhao", "authorids": "~Alexander_Bukharin1;~Ilgee_Hong1;~Haoming_Jiang1;~Zichong_Li2;~Qingru_Zhang2;~Zixuan_Zhang5;~Tuo_Zhao2", "gender": "M;M;M;M;M;F;M", "homepage": "https://abukharin3.github.io;https://ilgeehong.github.io/;https://hmjianggatech.github.io;https://github.com/zichongli5/zichongli5.github.io;https://qingruzhang.github.io/;https://www.isye.gatech.edu/users/zixuan-zhang;http://www2.isye.gatech.edu/~tzhao80", "dblp": "294/6372;;230/3684;;228/6749;;", "google_scholar": ";;XaFhuG8AAAAJ;;7YM-faYAAAAJ;;EJXN6tYAAAAJ", "orcid": ";;;;;;", "linkedin": ";;;;qingru-zhang-4b789a187;;", "or_profile": "~Alexander_Bukharin1;~Ilgee_Hong1;~Haoming_Jiang1;~Zichong_Li2;~Qingru_Zhang2;~Zixuan_Zhang5;~Tuo_Zhao1", "aff": "NVIDIA;Amazon;Amazon;Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology", "aff_domain": "nvidia.com;amazon.com;amazon.com;gatech.edu;gatech.edu;gatech.edu;gatech.edu", "position": "Intern;Intern;Principal Researcher;PhD student;PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nbukharin2024robust,\ntitle={Robust Reinforcement Learning from Corrupted Human Feedback},\nauthor={Alexander Bukharin and Ilgee Hong and Haoming Jiang and Zichong Li and Qingru Zhang and Zixuan Zhang and Tuo Zhao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=cR2QDzdpEv}\n}", "github": "", "reviewers": "ugMr;hEuH;XAMr;m2Ct", "pdf_size": 13230233, "rating": "5;5;6;6", "confidence": "4;3;3;4", "soundness": "3;3;3;2", "novelty": "2;3;3;2", "presentation": "3;3;3;2", "wc_summary": "38;65;62;24", "wc_strengths": "26;27;87;29", "wc_weaknesses": "73;195;63;15", "wc_questions": "95;140;68;85", "wc_limitations": "78;5;23;1", "wc_review": "310;432;303;154", "wc_reply_reviewers": "21;0;22;89", "wc_reply_authors": "32;51;28;45", "reply_reviewers": "1;0;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 47.25, 17.020208576865326 ], "wc_strengths_avg": [ 42.25, 25.85899263312475 ], "wc_weaknesses_avg": [ 86.5, 66.3682906213502 ], "wc_questions_avg": [ 97.0, 26.636441203734407 ], "wc_limitations_avg": [ 26.75, 30.727634142575962 ], "wc_review_avg": [ 299.75, 98.55043125222741 ], "wc_reply_reviewers_avg": [ 33.0, 33.50373113550191 ], "wc_reply_authors_avg": [ 39.0, 9.354143466934854 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4360070371079981288&as_sdt=5,24&sciodt=0,24&hl=en", "gs_version_total": 3, "email": "nvidia.com;amazon.com;amazon.com;gatech.edu;gatech.edu;gatech.edu;gatech.edu", "author_num": 7, "aff_unique_index": "0;1;1;2;2;2;2", "aff_unique_norm": "NVIDIA;Amazon;Georgia Institute of Technology", "aff_unique_dep": "NVIDIA Corporation;Amazon.com, Inc.;", "aff_unique_url": "https://www.nvidia.com;https://www.amazon.com;https://www.gatech.edu", "aff_unique_abbr": "NVIDIA;Amazon;Georgia Tech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "A Hitchhiker's Guide to Fine-Grained Face Forgery Detection Using Common Sense Reasoning", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97603", "id": "cR3T1ZYN8I", "proceeding": "", "pdf": "https://openreview.net/pdf?id=cR3T1ZYN8I", "openreview": "https://openreview.net/forum?id=cR3T1ZYN8I", "poster": "/media/PosterPDFs/NeurIPS%202024/97603.png?t=1729522189.440738", "project": "", "author_site": "Niki Foteinopoulou, Enjie Ghorbel, Djamila Aouada", "tldr": "", "abstract": "Explainability in artificial intelligence is crucial for restoring trust, particularly in areas like face forgery detection, where viewers often struggle to distinguish between real and fabricated content. Vision and Large Language Models (VLLM) bridge computer vision and natural language, offering numerous applications driven by strong common-sense reasoning. Despite their success in various tasks, the potential of vision and language remains underexplored in face forgery detection, where they hold promise for enhancing explainability by leveraging the intrinsic reasoning capabilities of language to analyse fine-grained manipulation areas.\n For that reason, few works have recently started to frame the problem of deepfake detection as a Visual Question Answering (VQA) task, nevertheless omitting the realistic and informative open-ended multi-label setting. With the rapid advances in the field of VLLM, an exponential rise of investigations in that direction is expected.\n As such, there is a need for a clear experimental methodology that converts face forgery detection to a Visual Question Answering (VQA) task to systematically and fairly evaluate different VLLM architectures. Previous evaluation studies in deepfake detection have mostly focused on the simpler binary task, overlooking evaluation protocols for multi-label fine-grained detection and text-generative models. We propose a multi-staged approach that diverges from the traditional binary evaluation protocol and conducts a comprehensive evaluation study to compare the capabilities of several VLLMs in this context.\n In the first stage, we assess the models' performance on the binary task and their sensitivity to given instructions using several prompts. In the second stage, we delve deeper into fine-grained detection by identifying areas of manipulation in a multiple-choice VQA setting. In the third stage, we convert the fine-grained detection to an open-ended question and compare several matching strategies for the multi-label classification task. Finally, we qualitatively evaluate the fine-grained responses of the VLLMs included in the benchmark.\n We apply our benchmark to several popular models, providing a detailed comparison of binary, multiple-choice, and open-ended VQA evaluation across seven datasets. \\url{https://nickyfot.github.io/hitchhickersguide.github.io/}", "keywords": "Deepfake detection;Fine-grained labels;Vision and Language;Foundation Models;Benchmark", "primary_area": "", "supplementary_material": "/attachment/284de8e9cf1a4481b4842b3de32256f66bcdceaa.zip", "author": "Niki Foteinopoulou;Enjie Ghorbel;Djamila Aouada", "authorids": "~Niki_Foteinopoulou1;~Enjie_Ghorbel1;~Djamila_Aouada1", "gender": ";;F", "homepage": ";;https://cvi2.uni.lu/profile-djamila-aouada/", "dblp": ";;20/7872", "google_scholar": ";;WBmJVSkAAAAJ", "orcid": ";;0000-0002-7576-2064", "linkedin": ";;djamilaaouada/", "or_profile": "~Niki_Foteinopoulou1;~Enjie_Ghorbel1;~Djamila_Aouada1", "aff": ";;University of Luxemburg", "aff_domain": ";;uni.lu", "position": ";;Senior Research Scientist", "bibtex": "@inproceedings{\nfoteinopoulou2024a,\ntitle={A Hitchhiker's Guide to Fine-Grained Face Forgery Detection Using Common Sense Reasoning},\nauthor={Niki Foteinopoulou and Enjie Ghorbel and Djamila Aouada},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=cR3T1ZYN8I}\n}", "github": "", "reviewers": "t8Gi;BdnU;Gomc;a29e", "pdf_size": 3712143, "rating": "5;6;7;7", "confidence": "5;4;4;4", "wc_summary_and_contributions": "90;27;95;40", "wc_strengths": "2;39;59;27", "wc_improvement": "2;18;17;81", "wc_limitations": "2;72;156;147", "wc_correctness": "1;1;9;10", "wc_clarity": "1;1;8;1", "wc_relation_to_prior_work": "3;34;21;1", "wc_documentation": "1;1;1;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "103;194;367;309", "wc_reply_reviewers": "0;0;13;17", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 63.0, 29.90819285747636 ], "wc_strengths_avg": [ 31.75, 20.632195714465293 ], "wc_improvement_avg": [ 29.5, 30.401480227120523 ], "wc_limitations_avg": [ 94.25, 62.45148116738305 ], "wc_correctness_avg": [ 5.25, 4.264680527307995 ], "wc_clarity_avg": [ 2.75, 3.031088913245535 ], "wc_relation_to_prior_work_avg": [ 14.75, 13.571569548139964 ], "wc_documentation_avg": [ 1.0, 0.0 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 243.25, 102.14297577415688 ], "wc_reply_reviewers_avg": [ 7.5, 7.632168761236874 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16356622156621247982&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 6, "email": ";;uni.lu", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "University of Luxembourg", "aff_unique_dep": "", "aff_unique_url": "https://wwwen.uniluxembourg.lu", "aff_unique_abbr": "Uni Lu", "aff_country_unique_index": "0", "aff_country_unique": "Luxembourg" }, { "title": "Credit Attribution and Stable Compression", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94418", "id": "cRLFvSOrzt", "proceeding": "", "pdf": "https://openreview.net/pdf?id=cRLFvSOrzt", "openreview": "https://openreview.net/forum?id=cRLFvSOrzt", "poster": "/media/PosterPDFs/NeurIPS%202024/94418.png?t=1731448437.1287606", "project": "", "author_site": "Roi Livni, Shay Moran, Kobbi Nissim, Chirag Pabbaraju", "tldr": "", "abstract": "Credit attribution is crucial across various fields. In academic research, proper citation acknowledges prior work and establishes original contributions. Similarly, in generative models, such as those trained on existing artworks or music, it is important to ensure that any generated content influenced by these works appropriately credits the original creators.\n\nWe study credit attribution by machine learning algorithms. We propose new definitions--relaxations of Differential Privacy--that weaken the stability guarantees for a designated subset of $k$ datapoints. These $k$ datapoints can be used non-stably with permission from their owners, potentially in exchange for compensation. Meanwhile, the remaining datapoints are guaranteed to have no significant influence on the algorithm's output.\n\nOur framework extends well-studied notions of stability, including Differential Privacy ($k = 0$), differentially private learning with public data (where the $k$ public datapoints are fixed in advance),\nand stable sample compression (where the $k$ datapoints are selected adaptively by the algorithm).\nWe examine the expressive power of these stability notions within the PAC learning framework, provide a comprehensive characterization of learnability for algorithms adhering to these principles, and propose directions and questions for future research.", "keywords": "Credit Attribution;Algorithmic Stability;Stable Sample Compression", "primary_area": "learning_theory", "supplementary_material": "", "author": "Roi Livni;Shay Moran;Kobbi Nissim;Chirag Pabbaraju", "authorids": "~Roi_Livni1;~Shay_Moran1;~Kobbi_Nissim2;~Chirag_Pabbaraju1", "gender": "Not Specified;M;M;M", "homepage": "https://www.rlivni.sites.tau.ac.il/;http://www.cs.technion.ac.il/~shaymrn/;http://people.cs.georgetown.edu/~kobbi/;https://web.stanford.edu/~cpabbara/", "dblp": "59/11348;119/5111;65/801;231/7619", "google_scholar": "xhU85M4AAAAJ;kALYnggAAAAJ;https://scholar.google.com.tw/citations?user=U-RE8IgAAAAJ;IAGcpHkAAAAJ", "orcid": ";;;0000-0002-3424-691X", "linkedin": ";;;chirag-pabbaraju-277a4ba5/", "or_profile": "~Roi_Livni1;~Shay_Moran1;~Kobbi_Nissim2;~Chirag_Pabbaraju1", "aff": "Tel Aviv University;Google;Georgetown University;Stanford University", "aff_domain": "tau.ac.il;google.com;georgetwon.edu;cs.stanford.edu", "position": "Assistant Professor;Visiting Faculty;Full Professor;PhD student", "bibtex": "@inproceedings{\nlivni2024credit,\ntitle={Credit Attribution and Stable Compression},\nauthor={Roi Livni and Shay Moran and Kobbi Nissim and Chirag Pabbaraju},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=cRLFvSOrzt}\n}", "github": "", "reviewers": "ariQ;jYet;nyTY;JBKH", "pdf_size": 374556, "rating": "5;5;6;7", "confidence": "3;3;3;3", "soundness": "3;3;3;3", "novelty": "2;3;3;4", "presentation": "3;3;2;3", "wc_summary": "78;74;49;348", "wc_strengths": "60;40;42;73", "wc_weaknesses": "127;66;251;330", "wc_questions": "61;22;59;166", "wc_limitations": "28;1;1;6", "wc_review": "354;203;402;923", "wc_reply_reviewers": "322;14;0;65", "wc_reply_authors": "393;0;0;0", "reply_reviewers": "2;1;0;1", "reply_authors": "3;1;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 137.25, 122.1830082294588 ], "wc_strengths_avg": [ 53.75, 13.571569548139964 ], "wc_weaknesses_avg": [ 193.5, 103.2194264661454 ], "wc_questions_avg": [ 77.0, 53.67960506561128 ], "wc_limitations_avg": [ 9.0, 11.157956802210698 ], "wc_review_avg": [ 470.5, 271.3747409026861 ], "wc_reply_reviewers_avg": [ 100.25, 130.29269933499728 ], "wc_reply_authors_avg": [ 98.25, 170.1739918436422 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:W2CZIOddOvgJ:scholar.google.com/&scioq=Credit+Attribution+and+Stable+Compression&hl=en&as_sdt=0,11", "gs_version_total": 5, "email": "tau.ac.il;google.com;georgetwon.edu;cs.stanford.edu", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Tel Aviv University;Google;Georgetown University;Stanford University", "aff_unique_dep": ";Google;;", "aff_unique_url": "https://www.tau.ac.il;https://www.google.com;https://www.georgetown.edu;https://www.stanford.edu", "aff_unique_abbr": "TAU;Google;GU;Stanford", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Mountain View;Stanford", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "Israel;United States" }, { "title": "Generative Forests", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94417", "id": "cRlQHncjwT", "proceeding": "", "pdf": "https://openreview.net/pdf?id=cRlQHncjwT", "openreview": "https://openreview.net/forum?id=cRlQHncjwT", "poster": "/media/PosterPDFs/NeurIPS%202024/94417.png?t=1731245549.1145227", "project": "", "author_site": "Richard Nock, Mathieu Guillame-Bert", "tldr": "", "abstract": "We focus on generative AI for a type of data that still represent one of the most prevalent form of data: tabular data. We introduce a new powerful class of forest-based models fit for such tasks and a simple training algorithm with strong convergence guarantees in a boosting model that parallels that of the original weak / strong supervised learning setting. This algorithm can be implemented by a few tweaks to the most popular induction scheme for decision tree induction (*i.e. supervised learning*) with two classes. Experiments on the quality of generated data display substantial improvements compared to the state of the art. The losses our algorithm minimize and the structure of our models make them practical for related tasks that require fast estimation of a density given a generative model and an observation (even partially specified): such tasks include missing data imputation and density estimation. Additional experiments on these tasks reveal that our models can be notably good contenders to diverse state of the art methods, relying on models as diverse as (or mixing elements of) trees, neural nets, kernels or graphical models.", "keywords": "tabular data;generative models;boosting;trees", "primary_area": "learning_theory", "supplementary_material": "/attachment/3c223cad7b21e3e624ce552fed4b9ce0f06e96bf.zip", "author": "Richard Nock;Mathieu Guillame-Bert", "authorids": "~Richard_Nock1;~Mathieu_Guillame-Bert1", "gender": ";", "homepage": "http://users.cecs.anu.edu.au/~rnock/;https://mathieu.guillame-bert.com/", "dblp": "n/RichardNock;72/10068", "google_scholar": "https://scholar.google.fr/citations?user=0J2s3YQAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";", "linkedin": ";", "or_profile": "~Richard_Nock1;~Mathieu_Guillame-Bert1", "aff": "Google Research;Google", "aff_domain": "google.com;google.com", "position": "Researcher;Researcher", "bibtex": "@inproceedings{\nnock2024generative,\ntitle={Generative Forests},\nauthor={Richard Nock and Mathieu Guillame-Bert},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=cRlQHncjwT}\n}", "github": "", "reviewers": "uJTo;kTDz;6MHb;Sk9u;cvp8", "pdf_size": 11070503, "rating": "5;5;6;7;7", "confidence": "2;4;4;1;2", "soundness": "2;2;3;3;3", "novelty": "3;3;3;3;4", "presentation": "2;2;3;3;2", "wc_summary": "211;152;104;43;332", "wc_strengths": "112;117;86;57;426", "wc_weaknesses": "262;1108;75;120;142", "wc_questions": "25;370;69;33;176", "wc_limitations": "5;50;31;2;230", "wc_review": "615;1797;365;255;1306", "wc_reply_reviewers": "542;752;231;35;0", "wc_reply_authors": "1304;2449;795;16;0", "reply_reviewers": "2;3;3;2;0", "reply_authors": "5;4;5;2;1", "rating_avg": [ 6.0, 0.8944271909999159 ], "confidence_avg": [ 2.6, 1.2 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 168.4, 98.71291708788672 ], "wc_strengths_avg": [ 159.6, 134.90233504280047 ], "wc_weaknesses_avg": [ 341.4, 388.25950084962506 ], "wc_questions_avg": [ 134.6, 129.43353506723057 ], "wc_limitations_avg": [ 63.6, 85.04728096770643 ], "wc_review_avg": [ 867.6, 591.2142082189838 ], "wc_reply_reviewers_avg": [ 312.0, 292.2786341832054 ], "wc_reply_authors_avg": [ 912.8, 912.6093140002462 ], "reply_reviewers_avg": [ 2.0, 1.0954451150103321 ], "reply_authors_avg": [ 3.4, 1.624807680927192 ], "replies_avg": [ 40, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5590169943749475, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10522268553536772376&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "google.com;google.com", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google Research", "aff_unique_url": "https://research.google", "aff_unique_abbr": "Google Research", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "cRs4jvF4mO", "title": "Deep Discriminative to Kernel Density Graph for In- and Out-of-distribution Calibrated Inference", "track": "main", "status": "Reject", "tldr": "", "abstract": "Deep discriminative approaches like random forests and deep neural networks have recently found applications in many important real-world scenarios. However, deploying these learning algorithms in safety-critical applications raises concerns, particularly when it comes to ensuring confidence calibration for both in-distribution and out-of-distribution data points. Many popular methods for in-distribution (ID) calibration, such as isotonic and Platt\u2019s sigmoidal regression, exhibit excellent ID calibration performance. However, these methods are not calibrated for the entire feature space, leading to overconfidence in the case of out-of-distribution (OOD) samples. On the other end of the spectrum, existing out-of-distribution (OOD) calibration methods generally exhibit poor in-distribution (ID) calibration. In this paper, we address ID and OOD calibration problems jointly. We leveraged the fact that deep models, including both random forests and deep-nets, learn internal representations which are unions of polytopes with affine activation functions to conceptualize them both as partitioning rules of the feature space. We replace the affine function in each polytope populated by the training data with a Gaussian kernel. Our experiments on both tabular and vision benchmarks show that the proposed approaches obtain well-calibrated posteriors while mostly preserving or improving the classification accuracy of the original algorithm for ID region, and extrapolate beyond the training data to handle OOD inputs appropriately.", "keywords": "random forest;deep networks;in-distribution calibration;out-of-distribution detection", "primary_area": "probabilistic_methods", "supplementary_material": "/attachment/03193d708782910fa607c2a54ebd290cf17ed438.zip", "author": "Jayanta Dey;Haoyin Xu;Will LeVine;Ashwin De Silva;Tyler M. Tomita;Ali Geisa;Tiffany Chu;Jacob Maxwell Desman;Joshua T Vogelstein", "authorids": "~Jayanta_Dey1;~Haoyin_Xu1;~Will_LeVine1;~Ashwin_De_Silva1;~Tyler_M._Tomita1;~Ali_Geisa1;~Tiffany_Chu1;~Jacob_Maxwell_Desman1;~Joshua_T_Vogelstein1", "gender": "M;M;;M;M;M;;M;M", "homepage": ";;;https://laknath1996.github.io;https://tyler-tomita.github.io;;;;https://neurodata.io/", "dblp": "236/4496;300/8957;;270/4302;;;;;04/700", "google_scholar": "o0AWbc4AAAAJ;7gDN3c8AAAAJ;x0BaVhQAAAAJ;xqhwEGIAAAAJ;;;;c0Xt7BAAAAAJ;DWPfdT4AAAAJ", "orcid": ";0000-0001-8235-4950;;0000-0002-6406-7090;;;0000-0002-9455-0868;0000-0002-5411-6637;0000-0003-2487-6237", "linkedin": "jayanta-dey-22431575/;haoyinxu/;https://www.linkedin.com/mwlite/in/will-levine-63b986123;ashwin-de-silva-6852b14b/;;ali-geisa-26256211a;tchu13/;jacobdesman/;jovo1/", "or_profile": "~Jayanta_Dey1;~Haoyin_Xu1;~Will_LeVine1;~Ashwin_De_Silva1;~Tyler_M._Tomita1;~Ali_Geisa1;~Tiffany_Chu1;~Jacob_Maxwell_Desman1;~Joshua_T_Vogelstein1", "aff": "Johns Hopkins University;Johns Hopkins University;Microsoft;Johns Hopkins University;;Johns Hopkins University;SeqMatic LLC;Icahn School of Medicine at Mount Sinai;Johns Hopkins University", "aff_domain": "jhmi.edu;jhu.edu;microsoft.com;jhu.edu;;jhu.edu;seqmatic.com;icahn.mssm.edu;jhu.edu", "position": "PhD student;Research Assistant;Researcher;PhD student;;Researcher;Researcher;Medical Student;Associate Professor", "bibtex": "@misc{\nanonymous2024deep,\ntitle={Deep Discriminative to Kernel Density Graph for In- and Out-of-distribution Calibrated Inference},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=cRs4jvF4mO}\n}", "github": "", "project": "", "reviewers": "fbQU;gpCb;tAq4", "site": "https://openreview.net/forum?id=cRs4jvF4mO", "pdf_size": 14469698, "rating": "5;5;7", "confidence": "3;4;3", "soundness": "2;3;3", "novelty": "2;2;3", "presentation": "3;3;3", "wc_summary": "99;79;78", "wc_strengths": "66;81;18", "wc_weaknesses": "147;111;134", "wc_questions": "49;135;155", "wc_limitations": "31;13;7", "wc_review": "392;419;392", "wc_reply_reviewers": "23;637;24", "wc_reply_authors": "18;486;38", "reply_reviewers": "1;2;1", "reply_authors": "2;3;2", "rating_avg": [ 5.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 85.33333333333333, 9.672412085697939 ], "wc_strengths_avg": [ 55.0, 26.870057685088806 ], "wc_weaknesses_avg": [ 130.66666666666666, 14.884742374510738 ], "wc_questions_avg": [ 113.0, 45.985504962614755 ], "wc_limitations_avg": [ 17.0, 10.198039027185569 ], "wc_review_avg": [ 401.0, 12.727922061357855 ], "wc_reply_reviewers_avg": [ 228.0, 289.206961649727 ], "wc_reply_authors_avg": [ 180.66666666666666, 216.05760548726096 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.49999999999999983, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:YJqtg2sumMEJ:scholar.google.com/&scioq=Deep+Discriminative+to+Kernel+Density+Graph+for+In-+and+Out-of-distribution+Calibrated+Inference&hl=en&as_sdt=0,34", "gs_version_total": 4, "aff_unique_index": "0;0;1;0;0;2;3;0", "aff_unique_norm": "Johns Hopkins University;Microsoft;SeqMatic;Icahn School of Medicine at Mount Sinai", "aff_unique_dep": ";Microsoft Corporation;;School of Medicine", "aff_unique_url": "https://www.jhu.edu;https://www.microsoft.com;;https://icahn.mssm.edu", "aff_unique_abbr": "JHU;Microsoft;SeqMatic;ISMMS", "aff_campus_unique_index": "1", "aff_campus_unique": ";New York", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Training-Free Adaptive Diffusion with Bounded Difference Approximation Strategy", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94416", "id": "cS63YtJ49A", "proceeding": "", "pdf": "https://openreview.net/pdf?id=cS63YtJ49A", "openreview": "https://openreview.net/forum?id=cS63YtJ49A", "poster": "/media/PosterPDFs/NeurIPS%202024/94416.png?t=1731569984.7841196", "project": "", "author_site": "Hancheng Ye, Jiakang Yuan, Renqiu Xia, Xiangchao Yan, Tao Chen, Junchi Yan, Botian Shi, Bo Zhang", "tldr": "", "abstract": "Diffusion models have recently achieved great success in the synthesis of high-quality images and videos. However, the existing denoising techniques in diffusion models are commonly based on step-by-step noise predictions, which suffers from high computation cost, resulting in a prohibitive latency for interactive applications. In this paper, we propose AdaptiveDiffusion to relieve this bottleneck by adaptively reducing the noise prediction steps during the denoising process. Our method considers the potential of skipping as many noise prediction steps as possible while keeping the final denoised results identical to the original full-step ones. Specifically, the skipping strategy is guided by the third-order latent difference that indicates the stability between timesteps during the denoising process, which benefits the reusing of previous noise prediction results. Extensive experiments on image and video diffusion models demonstrate that our method can significantly speed up the denoising process while generating identical results to the original process, achieving up to an average 2-5x speedup without quality degradation. The code is available at https://github.com/UniModal4Reasoning/AdaptiveDiffusion", "keywords": "Adaptive Diffusion;Diffusion Probabilistic Models;Third-order Difference", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/9cf463954282b86f72947b71a31f64338ff2bb2d.zip", "author": "Hancheng Ye;Jiakang Yuan;Renqiu Xia;Xiangchao Yan;Tao Chen;Junchi Yan;Botian Shi;Bo Zhang", "authorids": "~Hancheng_Ye1;~Jiakang_Yuan1;~Renqiu_Xia2;~Xiangchao_Yan1;~Tao_Chen6;~Junchi_Yan2;~Botian_Shi1;~Bo_Zhang17", "gender": "M;M;;M;M;M;M;M", "homepage": "https://github.com/HankYe;https://jiakangyuan.github.io/;https://github.com/sky-fly97;https://eetchen.github.io/;;https://bobrown.github.io/boZhang.github.io/;http://thinklab.sjtu.edu.cn/;", "dblp": "316/2614;323/7363;314/2496.html;69/510-3;245/8742;36/2259-69;60/7949.html;314/5598", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=zh-CN;0mMk6PMAAAAJ;https://scholar.google.com.sg/citations?user=w3OoFL0AAAAJ;K0PpvLkAAAAJ;https://scholar.google.com/citations?hl=en;ga230VoAAAAJ;gh1ZOToAAAAJ", "orcid": "0000-0002-6272-2792;;;;0000-0003-3677-7252;0000-0001-8052-782X;0000-0001-9639-7679;", "linkedin": ";;;;friskit/;;;", "or_profile": "~Hancheng_Ye1;~Jiakang_Yuan1;~Xiangchao_Yan1;~Tao_Chen6;~Botian_Shi1;~Bo_Zhang17;~Junchi_Yan1;~renqiu_xia1", "aff": "Shanghai Artificial Intelligence Laboratory;Shanghai AI Laboratory;Shanghai AI Laboratory;Fudan University;Shanghai AI Lab;Shanghai Artificial Intelligence Laboratory;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "pjlab.org.cn;pjlab.org.cn;pjlab.org.cn;fudan.edu.cn;pjlab.org.cn;pjlab.org.cn;sjtu.edu.cn;sjtu.edu.cn", "position": "Intern;Intern;Researcher;Full Professor;Researcher;Researcher;Full Professor;PhD student", "bibtex": "@inproceedings{\nye2024trainingfree,\ntitle={Training-Free Adaptive Diffusion with Bounded Difference Approximation Strategy},\nauthor={Hancheng Ye and Jiakang Yuan and Renqiu Xia and Xiangchao Yan and Tao Chen and Junchi Yan and Botian Shi and Bo Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=cS63YtJ49A}\n}", "github": "", "reviewers": "hdh3;XP4x;rWvo;kqHE", "pdf_size": 9379790, "rating": "3;4;6;6", "confidence": "1;3;4;4", "soundness": "3;2;3;3", "novelty": "3;2;3;3", "presentation": "3;3;3;3", "wc_summary": "274;55;46;194", "wc_strengths": "381;41;60;75", "wc_weaknesses": "377;47;139;74", "wc_questions": "440;62;51;37", "wc_limitations": "318;19;1;1", "wc_review": "1790;224;297;381", "wc_reply_reviewers": "68;17;11;0", "wc_reply_authors": "193;132;21;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;3;2;1", "rating_avg": [ 4.75, 1.299038105676658 ], "confidence_avg": [ 3.0, 1.224744871391589 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 142.25, 96.06345559056264 ], "wc_strengths_avg": [ 139.25, 140.09349556635382 ], "wc_weaknesses_avg": [ 159.25, 130.08915212268855 ], "wc_questions_avg": [ 147.5, 169.1072145119776 ], "wc_limitations_avg": [ 84.75, 134.867295887476 ], "wc_review_avg": [ 673.0, 647.2885755209959 ], "wc_reply_reviewers_avg": [ 24.0, 26.124700955226263 ], "wc_reply_authors_avg": [ 86.5, 79.34891303603345 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.9428090415820632, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10434496279736261677&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "pjlab.org.cn;pjlab.org.cn;pjlab.org.cn;fudan.edu.cn;pjlab.org.cn;pjlab.org.cn;sjtu.edu.cn;sjtu.edu.cn", "author_num": 8, "aff_unique_index": "0;1;1;2;3;0;4;4", "aff_unique_norm": "Shanghai Artificial Intelligence Laboratory;Shanghai AI Laboratory;Fudan University;Shanghai AI Lab;Shanghai Jiao Tong University", "aff_unique_dep": ";;;;", "aff_unique_url": "http://www.shailab.org/;https://www.shanghai-ai-lab.com;https://www.fudan.edu.cn;https://www.shanghaiailab.com;https://www.sjtu.edu.cn", "aff_unique_abbr": "Shanghai AI Lab;SAIL;Fudan;SAIL;SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Distributional regression: CRPS-error bounds for model fitting, model selection and convex aggregation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94415", "id": "cSfxzCozPU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=cSfxzCozPU", "openreview": "https://openreview.net/forum?id=cSfxzCozPU", "poster": "/media/PosterPDFs/NeurIPS%202024/94415.png?t=1731680375.8787322", "project": "", "author_site": "Dombry Clement, Ahmed Zaoui", "tldr": "", "abstract": "Distributional regression aims at estimating the conditional distribution of a target variable given explanatory co-variates. It is a crucial tool for forecasting when a precise uncertainty quantification is required. A popular methodology consists in fitting a parametric model via empirical risk minimization where the risk is measured by the Continuous Rank Probability Score (CRPS). For independent and identically distributed observations, we provide a concentration result for the estimation error and an upper bound for its expectation. Furthermore, we consider model selection performed by minimization of the validation error and provide a concentration bound for the regret. A similar result is proved for convex aggregation of models. Finally, we show that our results may be applied to various models such as EMOS, distributional regression networks, distributional nearest neighbours or distributional random forests and we illustrate our findings on two data sets (QSAR aquatic toxicity and Airfoil self-noise).", "keywords": "distributional regression; error bounds; concentration inequality; continuous rank probability score; empirical risk minimization", "primary_area": "learning_theory", "supplementary_material": "", "author": "Dombry Clement;Ahmed Zaoui", "authorids": "~Dombry_Clement1;~Ahmed_Zaoui1", "gender": "M;M", "homepage": "https://cdombry.perso.math.cnrs.fr/;https://zaouiamed.github.io/Ahmedzaoui.github.io/", "dblp": ";", "google_scholar": ";nupqA5cAAAAJ", "orcid": "0000-0003-2382-3280;", "linkedin": ";", "or_profile": "~Dombry_Clement1;~Ahmed_Zaoui1", "aff": "Universit\u00e9 Marie et Louis Pasteur;Universit\u00e9 Marie et Louis Pasteur", "aff_domain": "univ-fcomte.fr;univ-fcomte.fr", "position": "Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nclement2024distributional,\ntitle={Distributional regression: {CRPS}-error bounds for model fitting, model selection and convex aggregation},\nauthor={Dombry Clement and Ahmed Zaoui},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=cSfxzCozPU}\n}", "github": "", "reviewers": "TyYn;kUjo;uh9Z;GnPM", "pdf_size": 441325, "rating": "4;5;6;6", "confidence": "4;4;3;3", "soundness": "2;2;3;3", "novelty": "2;3;2;3", "presentation": "3;2;3;4", "wc_summary": "62;75;45;69", "wc_strengths": "30;110;20;70", "wc_weaknesses": "104;313;29;102", "wc_questions": "6;2;38;155", "wc_limitations": "6;10;6;4", "wc_review": "208;510;138;400", "wc_reply_reviewers": "14;35;11;22", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 62.75, 11.233320969330485 ], "wc_strengths_avg": [ 57.5, 35.61951712193752 ], "wc_weaknesses_avg": [ 137.0, 106.01179179695059 ], "wc_questions_avg": [ 50.25, 62.06599310411459 ], "wc_limitations_avg": [ 6.5, 2.179449471770337 ], "wc_review_avg": [ 314.0, 148.34419435893 ], "wc_reply_reviewers_avg": [ 20.5, 9.287087810503355 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.9045340337332909, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1920436936797667908&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 4, "email": "univ-fcomte.fr;univ-fcomte.fr", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Universit\u00e9 Marie et Louis Pasteur", "aff_unique_dep": "", "aff_unique_url": "https://www.unistra.fr", "aff_unique_abbr": "UMLP", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "France" }, { "title": "Causal discovery with endogenous context variables", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94414", "id": "cU8d7LeOyx", "proceeding": "", "pdf": "https://openreview.net/pdf?id=cU8d7LeOyx", "openreview": "https://openreview.net/forum?id=cU8d7LeOyx", "poster": "/media/PosterPDFs/NeurIPS%202024/94414.png?t=1733475619.7300293", "project": "", "author_site": "Wiebke G\u00fcnther, Oana-Iuliana Popescu, Martin Rabel, Urmi Ninad, Andreas Gerhardus, Jakob Runge", "tldr": "", "abstract": "Systems with variations of the underlying generating mechanism between different contexts, i.e., different environments or internal states in which the system operates, are common in the real world, such as soil moisture regimes in Earth science. Besides understanding the shared properties of the system, in practice, the question of context-specific properties, i.e., the change in causal relationships between contexts, arises. For real-world data, contexts are often driven by system variables, e.g., precipitation highly influences soil moisture. Nevertheless, this setup needs to be studied more. To account for such endogenous contexts in causal discovery, our work proposes a constraint-based method that can efficiently discover context-specific causal graphs using an adaptive testing approach. Our approach tests conditional independence on the pooled datasets to infer the dependence between system variables, including the context, to avoid introducing selection bias. To yield context-specific insights, conditional independence is tested on context-specific data. We work out the theoretical framework for this adaptive testing approach and give a detailed discussion of the connection to structural causal models, including sufficiency assumptions, which allow to prove the soundness of our algorithm and to interpret the results causally. A simulation study to evaluate numerical properties shows that our approach behaves as expected, but also leads to a further understanding of current limitations and viable extensions.", "keywords": "causal discovery;context-specific causal discovery;selection bias;SCM;causal models;endogeneous context variables;regime-specific causal discovery;endogeneous regime variables;context-specific independence", "primary_area": "causal_inference", "supplementary_material": "/attachment/e0a0cab340f5010281e7bdc7ebaf27d75e393ec8.zip", "author": "Wiebke G\u00fcnther;Oana-Iuliana Popescu;Martin Rabel;Urmi Ninad;Andreas Gerhardus;Jakob Runge", "authorids": "~Wiebke_G\u00fcnther1;~Oana-Iuliana_Popescu1;~Martin_Rabel1;~Urmi_Ninad1;~Andreas_Gerhardus1;~Jakob_Runge2", "gender": ";F;M;;;M", "homepage": ";;;;;https://www.causalinferencelab.com", "dblp": "243/3557;170/8417;;;https://dblp.uni-trier.de/pid/270/3028;120/7695", "google_scholar": ";;;;https://scholar.google.de/citations?user=1hzf2VAAAAAJ;https://scholar.google.de/citations?user=wtXVvuUAAAAJ", "orcid": ";;0000-0003-2808-0578;;0000-0003-1868-655X;0000-0002-0629-1772", "linkedin": "wiebke-g\u00fcnther-903494178;oana-iuliana-popescu/;;urmi-ninad-0a70b848/;;", "or_profile": "~Wiebke_G\u00fcnther1;~Oana-Iuliana_Popescu1;~Martin_Rabel1;~Urmi_Ninad1;~Andreas_Gerhardus1;~Jakob_Runge2", "aff": "German Aerospace Center, Institute of Data Science;German Aerospace Center, Institute of Data Science;DLR Jena;Technische Universit\u00e4t Berlin;Friedrich-Schiller Universit\u00e4t Jena;Technische Universit\u00e4t Dresden", "aff_domain": "dlr.de;dlr.de;dlr.de;tu-berlin.de;uni-jena.de;tu-dresden.de", "position": "PhD student;PhD student;Postdoc;Postdoc;Lecturer;Full Professor", "bibtex": "@inproceedings{\ng{\\\"u}nther2024causal,\ntitle={Causal discovery with endogenous context variables},\nauthor={Wiebke G{\\\"u}nther and Oana-Iuliana Popescu and Martin Rabel and Urmi Ninad and Andreas Gerhardus and Jakob Runge},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=cU8d7LeOyx}\n}", "github": "", "reviewers": "7t5U;J2yL;9Xqc;DjMB", "pdf_size": 1841894, "rating": "5;5;5;6", "confidence": "3;3;4;3", "soundness": "2;3;3;3", "novelty": "2;2;2;3", "presentation": "2;2;2;2", "wc_summary": "93;15;62;137", "wc_strengths": "46;48;38;67", "wc_weaknesses": "370;84;185;321", "wc_questions": "1;54;3;40", "wc_limitations": "1;3;3;19", "wc_review": "511;204;291;584", "wc_reply_reviewers": "66;143;0;85", "wc_reply_authors": "0;520;0;0", "reply_reviewers": "1;2;0;1", "reply_authors": "1;3;1;1", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 76.75, 44.51053246142985 ], "wc_strengths_avg": [ 49.75, 10.638961415476606 ], "wc_weaknesses_avg": [ 240.0, 112.71867635844559 ], "wc_questions_avg": [ 24.5, 23.04886114323222 ], "wc_limitations_avg": [ 6.5, 7.262919523166975 ], "wc_review_avg": [ 397.5, 155.28119654356092 ], "wc_reply_reviewers_avg": [ 73.5, 51.041649659861115 ], "wc_reply_authors_avg": [ 130.0, 225.16660498395404 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14974215622420265383&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "dlr.de;dlr.de;dlr.de;tu-berlin.de;uni-jena.de;tu-dresden.de", "author_num": 6, "aff_unique_index": "0;0;1;2;3;4", "aff_unique_norm": "German Aerospace Center;German Aerospace Center (DLR);Technische Universit\u00e4t Berlin;Friedrich-Schiller-Universit\u00e4t Jena;Technische Universit\u00e4t Dresden", "aff_unique_dep": "Institute of Data Science;;;;", "aff_unique_url": "https://www.dlr.de;https://www.dlr.de;https://www.tu-berlin.de;https://www.uni-jena.de;https://tu-dresden.de", "aff_unique_abbr": "DLR;DLR;TU Berlin;FSU Jena;TUD", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Jena", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "Germany" }, { "title": "Learning Truncated Causal History Model for Video Restoration", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94413", "id": "cUGf2HaNcs", "proceeding": "", "pdf": "https://openreview.net/pdf?id=cUGf2HaNcs", "openreview": "https://openreview.net/forum?id=cUGf2HaNcs", "poster": "/media/PosterPDFs/NeurIPS%202024/94413.png?t=1731534768.7976987", "project": "", "author_site": "Amirhosein Ghasemabadi, Muhammad Janjua, Mohammad Salameh, Di Niu", "tldr": "", "abstract": "One key challenge to video restoration is to model the transition dynamics of video frames governed by motion. In this work, we propose Turtle to learn the truncated causal history model for efficient and high-performing video restoration. Unlike traditional methods that process a range of contextual frames in parallel, Turtle enhances efficiency by storing and summarizing a truncated history of the input frame latent representation into an evolving historical state. This is achieved through a sophisticated similarity-based retrieval mechanism that implicitly accounts for inter-frame motion and alignment. The causal design in Turtle enables recurrence in inference through state-memorized historical features while allowing parallel training by sampling truncated video clips. We report new state-of-the-art results on a multitude of video restoration benchmark tasks, including video desnowing, nighttime video deraining, video raindrops and rain streak removal, video super-resolution, real-world and synthetic video deblurring, and blind video denoising while reducing the computational cost compared to existing best contextual methods on all these tasks.", "keywords": "video restoration;low-level computer vision;motion understanding", "primary_area": "machine_vision", "supplementary_material": "/attachment/167a31aa2900a65359f5c4ed04d819510c71b07a.zip", "author": "Amirhosein Ghasemabadi;Muhammad Kamran Janjua;Mohammad Salameh;Di Niu", "authorids": "~Amirhosein_Ghasemabadi2;~Muhammad_Kamran_Janjua3;~Mohammad_Salameh1;~Di_Niu1", "gender": "M;M;M;M", "homepage": ";https://kjanjua26.github.io/;;https://www.ualberta.ca/~dniu", "dblp": ";223/5829;91/9402;82/4953", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;https://scholar.google.ca/citations?hl=en;https://scholar.google.ca/citations?user=3kC5OogAAAAJ", "orcid": ";;;0000-0002-5250-7327", "linkedin": ";;mohammadsalameh;", "or_profile": "~Amirhosein_Ghasemabadi2;~Muhammad_Kamran_Janjua3;~Mohammad_Salameh1;~Di_Niu1", "aff": "University of Alberta;Huawei Technologies Ltd.;Huawei Technologies Ltd.;University of Alberta", "aff_domain": "ualberta.ca;huawei.com;huawei.com;ualberta.ca", "position": "MS student;Researcher;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nghasemabadi2024learning,\ntitle={Learning Truncated Causal History Model for Video Restoration},\nauthor={Amirhosein Ghasemabadi and Muhammad Kamran Janjua and Mohammad Salameh and Di Niu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=cUGf2HaNcs}\n}", "github": "", "reviewers": "BXMC;SsXx;KLsv;PswG", "pdf_size": 4248833, "rating": "5;5;6;7", "confidence": "4;5;4;4", "soundness": "3;3;3;4", "novelty": "2;3;4;3", "presentation": "3;2;2;4", "wc_summary": "96;72;54;106", "wc_strengths": "25;121;72;73", "wc_weaknesses": "21;143;52;80", "wc_questions": "409;73;91;11", "wc_limitations": "5;5;7;45", "wc_review": "556;414;276;315", "wc_reply_reviewers": "86;0;18;12", "wc_reply_authors": "1131;65;102;37", "reply_reviewers": "3;0;1;1", "reply_authors": "5;2;3;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 82.0, 20.346989949375804 ], "wc_strengths_avg": [ 72.75, 33.943887520435844 ], "wc_weaknesses_avg": [ 74.0, 44.972213643537714 ], "wc_questions_avg": [ 146.0, 154.71586861081832 ], "wc_limitations_avg": [ 15.5, 17.051392904979934 ], "wc_review_avg": [ 390.25, 108.11192117430899 ], "wc_reply_reviewers_avg": [ 29.0, 33.54101966249684 ], "wc_reply_authors_avg": [ 333.75, 460.8694907454821 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 3.0, 1.224744871391589 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10591468860808466752&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "ualberta.ca;huawei.com;huawei.com;ualberta.ca", "author_num": 4, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "University of Alberta;Huawei", "aff_unique_dep": ";Huawei Technologies", "aff_unique_url": "https://www.ualberta.ca;https://www.huawei.com", "aff_unique_abbr": "UAlberta;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "Canada;China" }, { "title": "Are Multiple Instance Learning Algorithms Learnable for Instances?", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94412", "id": "cUcvlgkQxP", "proceeding": "", "pdf": "https://openreview.net/pdf?id=cUcvlgkQxP", "openreview": "https://openreview.net/forum?id=cUcvlgkQxP", "poster": "/media/PosterPDFs/NeurIPS%202024/94412.png?t=1733673619.7659595", "project": "", "author_site": "Jaeseok Jang, HYUK-YOON KWON", "tldr": "", "abstract": "Multiple Instance Learning (MIL) has been increasingly adopted to mitigate the high costs and complexity associated with labeling individual instances, learning instead from bags of instances labeled at the bag level and enabling instance-level labeling. While existing research has primarily focused on the learnability of MIL at the bag level, there is an absence of theoretical exploration to check if a given MIL algorithm is learnable at the instance level. This paper proposes a theoretical framework based on probably approximately correct (PAC) learning theory to assess the instance-level learnability of deep multiple instance learning (Deep MIL) algorithms. Our analysis exposes significant gaps between current Deep MIL algorithms, highlighting the theoretical conditions that must be satisfied by MIL algorithms to ensure instance-level learnability. With these conditions, we interpret the learnability of the representative Deep MIL algorithms and validate them through empirical studies.", "keywords": "Multiple instance learning;Learnability;Learning theory;Probably Approximately Correct", "primary_area": "learning_theory", "supplementary_material": "/attachment/ef353938070d7d8a94bfd7ad81ea5fc8ea22bd19.zip", "author": "Jaeseok Jang;HYUK-YOON KWON", "authorids": "~Jaeseok_Jang2;~HYUK-YOON_KWON1", "gender": "M;Not Specified", "homepage": ";https://sites.google.com/view/seoultech-bigdata/professor?authuser=0", "dblp": ";117/9378", "google_scholar": "HPmaSPQAAAAJ;INJzI3IAAAAJ", "orcid": ";0000-0002-1125-6533", "linkedin": ";hyuk-yoon-kwon-48489339/", "or_profile": "~Jaeseok_Jang2;~HYUK-YOON_KWON1", "aff": "Seoul National University of Science and Technology;Seoul National University of Science and Technology", "aff_domain": "seoultech.ac.kr;seoultech.ac.kr", "position": "MS student;Associate Professor", "bibtex": "@inproceedings{\njang2024are,\ntitle={Are Multiple Instance Learning Algorithms Learnable for Instances?},\nauthor={Jaeseok Jang and HYUK-YOON KWON},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=cUcvlgkQxP}\n}", "github": "", "reviewers": "K1fQ;W4rs;bVsY;dSpL;2msW", "pdf_size": 21167269, "rating": "5;5;6;6;6", "confidence": "4;4;5;4;2", "soundness": "2;4;3;3;3", "novelty": "2;4;3;2;3", "presentation": "1;1;3;3;3", "wc_summary": "51;9;74;73;63", "wc_strengths": "46;7;94;125;112", "wc_weaknesses": "191;29;249;218;60", "wc_questions": "125;9;4;152;53", "wc_limitations": "45;1;24;5;1", "wc_review": "458;55;445;573;289", "wc_reply_reviewers": "95;57;242;122;15", "wc_reply_authors": "254;364;1258;764;47", "reply_reviewers": "2;1;2;2;1", "reply_authors": "3;2;5;3;2", "rating_avg": [ 5.6, 0.48989794855663565 ], "confidence_avg": [ 3.8, 0.9797958971132712 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 2.2, 0.9797958971132712 ], "wc_summary_avg": [ 54.0, 23.98332754227403 ], "wc_strengths_avg": [ 76.8, 43.997272642744576 ], "wc_weaknesses_avg": [ 149.4, 88.14215790414936 ], "wc_questions_avg": [ 68.6, 60.17507789774767 ], "wc_limitations_avg": [ 15.2, 17.16275036233995 ], "wc_review_avg": [ 364.0, 178.98826777194085 ], "wc_reply_reviewers_avg": [ 106.2, 76.88276790022587 ], "wc_reply_authors_avg": [ 537.4, 429.2987770772239 ], "reply_reviewers_avg": [ 1.6, 0.4898979485566356 ], "reply_authors_avg": [ 3.0, 1.0954451150103321 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.16666666666666663, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:U11aLR2dTkwJ:scholar.google.com/&scioq=Are+Multiple+Instance+Learning+Algorithms+Learnable+for+Instances%3F&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "seoultech.ac.kr;seoultech.ac.kr", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Seoul National University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.snust.ac.kr", "aff_unique_abbr": "SNUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "title": "On Statistical Rates and Provably Efficient Criteria of Latent Diffusion Transformers (DiTs)", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94411", "id": "cV2LKBdlz4", "proceeding": "", "pdf": "https://openreview.net/pdf?id=cV2LKBdlz4", "openreview": "https://openreview.net/forum?id=cV2LKBdlz4", "poster": "/media/PosterPDFs/NeurIPS%202024/94411.png?t=1733632241.762385", "project": "", "author_site": "Jerry Yao-Chieh Hu, Weimin Wu, Zhuoru Li, Sophia Pi, Zhao Song, Han Liu", "tldr": "", "abstract": "We investigate the statistical and computational limits of latent **Di**ffusion **T**ransformers (**DiTs**) under the low-dimensional linear latent space assumption. Statistically, we study the universal approximation and sample complexity of the DiTs score function, as well as the distribution recovery property of the initial data. Specifically, under mild data assumptions, we derive an approximation error bound for the score network of latent DiTs, which is sub-linear in the latent space dimension. Additionally, we derive the corresponding sample complexity bound and show that the data distribution generated from the estimated score function converges toward a proximate area of the original one.\nComputationally, we characterize the hardness of both forward inference and backward computation of latent DiTs, assuming the Strong Exponential Time Hypothesis (SETH). For forward inference, we identify efficient criteria for all possible latent DiTs inference algorithms and showcase our theory by pushing the efficiency toward almost-linear time inference. For backward computation, we leverage the low-rank structure within the gradient computation of DiTs training for possible algorithmic speedup. Specifically, we show that such speedup achieves almost-linear time latent DiTs training by casting the DiTs gradient as a series of chained low-rank approximations with bounded error.\nUnder the low-dimensional assumption, we show that the statistical rates and the computational efficiency are all dominated by the dimension of the subspace, suggesting that latent DiTs have the potential to bypass the challenges associated with the high dimensionality of initial data.", "keywords": "DiT;Score Matching;Diffusion Transformer;Efficiency;Universal Approximation;Convergence Analysis;Diffusion Generative Model;DDPM", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Jerry Yao-Chieh Hu;Weimin Wu;Zhuoru Li;Sophia Pi;Zhao Song;Han Liu", "authorids": "~Jerry_Yao-Chieh_Hu1;~Weimin_Wu4;~Zhuoru_Li4;~Sophia_Pi1;~Zhao_Song3;~Han_Liu4", "gender": ";M;F;F;M;", "homepage": ";https://weiminwu.owlstown.net/;;;https://www.youtube.com/@zhaosong2031;", "dblp": ";08/2244-1;;;76/4051-2;", "google_scholar": ";OSYbWn8AAAAJ;;;yDZct7UAAAAJ;", "orcid": ";;;;;", "linkedin": ";weimin-wu-81ab88206/;https://www.linkedin.com/mypreferences/d/categories/account;sophia-yixinyun-pi/;;", "or_profile": "~Jerry_Yao-Chieh_Hu1;~Weimin_Wu4;~Zhuoru_Li4;~Sophia_Pi1;~Zhao_Song3;~Han_Liu4", "aff": ";Northwestern University;Fudan University;Northwestern University, Northwestern University;Adobe;Northwestern University", "aff_domain": ";northwestern.edu;fudan.edu.cn;u.northwestern.edu;adobe.com;u.northwestern.edu", "position": ";PhD student;Undergrad student;Undergrad student;Researcher;Associate Professor", "bibtex": "@inproceedings{\nhu2024on,\ntitle={On Statistical Rates and Provably Efficient Criteria of Latent Diffusion Transformers (DiTs)},\nauthor={Jerry Yao-Chieh Hu and Weimin Wu and Zhuoru Li and Sophia Pi and Zhao Song and Han Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=cV2LKBdlz4}\n}", "github": "", "reviewers": "Zkkk;cP7q;jLPK;LxhB", "pdf_size": 760404, "rating": "6;6;6;7", "confidence": "1;1;2;3", "soundness": "3;4;3;3", "novelty": "3;3;2;3", "presentation": "2;1;1;3", "wc_summary": "82;91;12;69", "wc_strengths": "59;139;23;32", "wc_weaknesses": "89;39;125;79", "wc_questions": "2;2;42;134", "wc_limitations": "2;9;6;22", "wc_review": "234;280;208;336", "wc_reply_reviewers": "0;132;0;0", "wc_reply_authors": "0;414;40;0", "reply_reviewers": "0;3;0;0", "reply_authors": "1;4;2;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 1.75, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 1.75, 0.82915619758885 ], "wc_summary_avg": [ 63.5, 30.744918279286416 ], "wc_strengths_avg": [ 63.25, 45.6966902521397 ], "wc_weaknesses_avg": [ 83.0, 30.62678566222711 ], "wc_questions_avg": [ 45.0, 53.91660226683428 ], "wc_limitations_avg": [ 9.75, 7.495832175282475 ], "wc_review_avg": [ 264.5, 48.66980583482946 ], "wc_reply_reviewers_avg": [ 33.0, 57.15767664977295 ], "wc_reply_authors_avg": [ 113.5, 174.26058074045318 ], "reply_reviewers_avg": [ 0.75, 1.299038105676658 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3127338572950013007&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": ";northwestern.edu;fudan.edu.cn;u.northwestern.edu;adobe.com;u.northwestern.edu", "author_num": 6, "aff_unique_index": "0;1;0;2;0", "aff_unique_norm": "Northwestern University;Fudan University;Adobe", "aff_unique_dep": ";;Adobe Inc.", "aff_unique_url": "https://www.northwestern.edu;https://www.fudan.edu.cn;https://www.adobe.com", "aff_unique_abbr": "NU;Fudan;Adobe", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "United States;China" }, { "title": "Extracting Training Data from Molecular Pre-trained Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94410", "id": "cV4fcjcwmz", "proceeding": "", "pdf": "https://openreview.net/pdf?id=cV4fcjcwmz", "openreview": "https://openreview.net/forum?id=cV4fcjcwmz", "poster": "/media/PosterPDFs/NeurIPS%202024/94410.png?t=1731582884.4659898", "project": "", "author_site": "Renhong Huang, Jiarong Xu, Zhiming Yang, Xiang Si, Xin Jiang, Hanyang Yuan, Chunping Wang, YANG YANG", "tldr": "", "abstract": "Graph Neural Networks (GNNs) have significantly advanced the field of drug discovery, enhancing the speed and efficiency of molecular identification. However, training these GNNs demands vast amounts of molecular data, which has spurred the emergence of collaborative model-sharing initiatives. These initiatives facilitate the sharing of molecular pre-trained models among organizations without exposing proprietary training data. Despite the benefits, these molecular pre-trained models may still pose privacy risks. For example, malicious adversaries could perform data extraction attack to recover private training data, thereby threatening commercial secrets and collaborative trust. This work, for the first time, explores the risks of extracting private training molecular data from molecular pre-trained models. This task is nontrivial as the molecular pre-trained models are non-generative and exhibit a diversity of model architectures, which differs significantly from language and image models. To address these issues, we introduce a molecule generation approach and propose a novel, model-independent scoring function for selecting promising molecules. To efficiently reduce the search space of potential molecules, we further introduce a Molecule Extraction Policy Network for molecule extraction. Our experiments demonstrate that even with only query access to molecular pre-trained models, there is a considerable risk of extracting training data, challenging the assumption that model sharing alone provides adequate protection against data extraction attacks. Our codes are publicly available at: \\url{https://github.com/renH2/Molextract}.", "keywords": "Graph Data Extraction Attack;Graph Pre-training", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Renhong Huang;Jiarong Xu;Zhiming Yang;Xiang Si;Xin Jiang;Hanyang Yuan;Chunping Wang;Yang Yang", "authorids": "~Renhong_Huang1;~Jiarong_Xu2;~Zhiming_Yang2;~Xiang_Si1;~Xin_Jiang11;~Hanyang_Yuan1;~Chunping_Wang1;~Yang_Yang35", "gender": "M;F;M;M;M;F;M;M", "homepage": "https://github.com/renH2;https://galina0217.github.io/;;;https://github.com/xxx08796;;http://yangy.org;https://jiangxjames.github.io/", "dblp": "325/0914;;;;371/9445;54/2715-1;;", "google_scholar": ";;https://scholar.google.com.hk/citations?user=FoQw58IAAAAJ;;;Rmy5RogAAAAJ;;zs_h9Y4AAAAJ", "orcid": "0000-0002-7808-9768;0000-0003-2973-1889;;;;0000-0003-1854-8667;0000-0002-5058-4417;0000-0003-1231-8529", "linkedin": ";;;sixiang;;https://linkedin.com/in/chunping-wang-7b94a15/;;", "or_profile": "~Renhong_Huang1;~Jiarong_Xu2;~Zhiming_Yang2;~Xiang_Si1;~Hanyang_Yuan1;~Chunping_Wang1;~Yang_Yang35;~XIN_JIANG5", "aff": "Zhejiang University;Fudan University;Fudan University;Fudan University;Zhejiang University;Finvolution Group;Zhejiang University;Lehigh University", "aff_domain": "zju.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;zju.edu.cn;xinye.com;zju.edu.cn;lehigh.edu", "position": "MS student;Assistant Professor;MS student;Undergrad student;PhD student;Principal Scientist;Associate Professor;Postdoc", "bibtex": "@inproceedings{\nhuang2024extracting,\ntitle={Extracting Training Data from Molecular Pre-trained Models},\nauthor={Renhong Huang and Jiarong Xu and Zhiming Yang and Xiang Si and Xin Jiang and Hanyang Yuan and Chunping Wang and Yang Yang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=cV4fcjcwmz}\n}", "github": "", "reviewers": "qKJK;o6iJ;JnnP", "pdf_size": 658122, "rating": "3;6;7", "confidence": "4;2;4", "soundness": "3;3;3", "novelty": "2;3;4", "presentation": "3;2;3", "wc_summary": "70;50;96", "wc_strengths": "20;69;119", "wc_weaknesses": "143;127;213", "wc_questions": "6;2;54", "wc_limitations": "6;1;6", "wc_review": "245;249;488", "wc_reply_reviewers": "134;27;73", "wc_reply_authors": "271;49;379", "reply_reviewers": "1;1;1", "reply_authors": "3;2;3", "rating_avg": [ 5.333333333333333, 1.699673171197595 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 72.0, 18.83259585576738 ], "wc_strengths_avg": [ 69.33333333333333, 40.41726803676314 ], "wc_weaknesses_avg": [ 161.0, 37.345236197762446 ], "wc_questions_avg": [ 20.666666666666668, 23.626726862225798 ], "wc_limitations_avg": [ 4.333333333333333, 2.357022603955158 ], "wc_review_avg": [ 327.3333333333333, 113.62022511663827 ], "wc_reply_reviewers_avg": [ 78.0, 43.825411197918804 ], "wc_reply_authors_avg": [ 233.0, 137.37539808859518 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.2773500981126145, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2370879598311332045&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "zju.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;zju.edu.cn;xinye.com;zju.edu.cn;lehigh.edu", "author_num": 8, "aff_unique_index": "0;1;1;1;0;2;0;3", "aff_unique_norm": "Zhejiang University;Fudan University;FinVolution Group;Lehigh University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.zju.edu.cn;https://www.fudan.edu.cn;https://www.finvolutiongroup.com;https://www.lehigh.edu", "aff_unique_abbr": "ZJU;Fudan;;Lehigh", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;1", "aff_country_unique": "China;United States" }, { "id": "cVbd7uTvD8", "title": "SC3D: Self-conditioned Generative Gaussian Model with 3D-aware Feedback", "track": "main", "status": "Reject", "tldr": "", "abstract": "Existing single image-to-3D creation methods typically involve a two-stage process, first generating multi-view images, and then using these images for 3D reconstruction. However, training these two stages separately leads to significant data bias in the inference phase, thus affecting the quality of reconstructed results.We introduce a unified 3D generation framework, named SC3D, which integrates diffusion-based multi-view image generation and 3D reconstruction through a self-conditioning mechanism. In our framework, these two modules are established as a cyclic relationship so that they adapt to the distribution of each other.During the denoising process of multi-view generation, we feed rendered color images and maps by SC3D itself to the multi-view generation module.\nThis self-conditioned method with 3D aware feedback unites the entire process and improves geometric consistency.Experiments show that our approach enhances sampling quality, and improves the efficiency and output quality of the generation process.", "keywords": "3D; Video Diffusion Model; 3D generation", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/73115d5b0968b04c07dd68df757ea0bcd096330b.zip", "author": "Hao Wen;Zehuan Huang;Yaohui Wang;Xinyuan Chen;Yu Qiao;Lu Sheng", "authorids": "~Hao_Wen10;~Zehuan_Huang1;~Yaohui_Wang1;~Xinyuan_Chen1;~Yu_Qiao1;~Lu_Sheng1", "gender": "M;M;M;F;;M", "homepage": "https://github.com/Costwen;https://github.com/huanngzh;https://wyhsirius.github.io/;;;https://lucassheng.github.io/", "dblp": ";364/2132;168/6263-1.html;;;132/1772", "google_scholar": ";https://scholar.google.com.hk/citations?user=U3VbX6kAAAAJ;R7LyAb4AAAAJ;3fWSC8YAAAAJ;;https://scholar.google.com.hk/citations?user=_8lB7xcAAAAJ", "orcid": ";;;0000-0002-5517-7255;;0000-0002-8525-9163", "linkedin": ";;;;;", "or_profile": "~Hao_Wen10;~Zehuan_Huang1;~Yaohui_Wang1;~Xinyuan_Chen1;~Yu_Qiao1;~Lu_Sheng1", "aff": "Beihang University;Beihang University;Shanghai AI Laboratory;Shanghai Artificial Intelligence Laboratory;;Beihang University", "aff_domain": "buaa.edu.cn;buaa.edu.cn;pjlab.org.cn;pjlab.org.cn;;buaa.edu.cn", "position": "MS student;MS student;Research Scientist;Research Scientist;;Associate Professor", "bibtex": "@misc{\nanonymous2024scd,\ntitle={{SC}3D: Self-conditioned Generative Gaussian Model with 3D-aware Feedback},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=cVbd7uTvD8}\n}", "github": "", "project": "", "reviewers": "DdFX;KsJi;RUZS;VBi9", "site": "https://openreview.net/forum?id=cVbd7uTvD8", "pdf_size": 11074685, "rating": "5;5;6;6", "confidence": "4;5;4;3", "soundness": "3;4;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;2", "wc_summary": "129;67;140;88", "wc_strengths": "42;80;78;68", "wc_weaknesses": "125;96;144;300", "wc_questions": "31;59;2;81", "wc_limitations": "10;7;59;68", "wc_review": "337;309;423;605", "wc_reply_reviewers": "224;21;0;53", "wc_reply_authors": "436;0;0;46", "reply_reviewers": "1;1;0;1", "reply_authors": "2;1;1;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 106.0, 29.706901555025897 ], "wc_strengths_avg": [ 67.0, 15.132745950421556 ], "wc_weaknesses_avg": [ 166.25, 79.08974332996662 ], "wc_questions_avg": [ 43.25, 29.684802509028085 ], "wc_limitations_avg": [ 36.0, 27.703790354390136 ], "wc_review_avg": [ 418.5, 115.58005883369329 ], "wc_reply_reviewers_avg": [ 74.5, 88.35298523536146 ], "wc_reply_authors_avg": [ 120.5, 183.11949650433183 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.7071067811865475, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:_Div8DyzDggJ:scholar.google.com/&scioq=SC3D:+Self-conditioned+Generative+Gaussian+Model+with+3D-aware+Feedback&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Beihang University;Shanghai AI Laboratory;Shanghai Artificial Intelligence Laboratory", "aff_unique_dep": ";;", "aff_unique_url": "http://www.buaa.edu.cn/;https://www.shanghai-ai-lab.com;http://www.shailab.org/", "aff_unique_abbr": "BUAA;SAIL;Shanghai AI Lab", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "cX57Pbw8vS", "title": "Benchmarking PtO and PnO Methods in the Predictive Combinatorial Optimization Regime", "track": "Datasets & Benchmarks", "status": "Poster", "tldr": "", "abstract": "Predictive combinatorial optimization, where the parameters of combinatorial optimization (CO) are unknown at the decision-making time, is the precise modeling of many real-world applications, including energy cost-aware scheduling and budget allocation on advertising. Tackling such a problem usually involves a prediction model and a CO solver. These two modules are integrated into the predictive CO pipeline following two design principles: ''Predict-then-Optimize (PtO)'', which learns predictions by supervised training and subsequently solves CO using predicted coefficients, while the other, named ''Predict-and-Optimize (PnO)'', directly optimizes towards the ultimate decision quality and claims to yield better decisions than traditional PtO approaches. However, there lacks a systematic benchmark of both approaches, including the specific design choices at the module level, as well as an evaluation dataset that covers representative real-world scenarios. To this end, we develop a modular framework to benchmark 11 existing PtO/PnO methods on 8 problems, including a new industrial dataset for combinatorial advertising that will be released. Our study shows that PnO approaches are better than PtO on 7 out of 8 benchmarks, but there is no silver bullet found for the specific design choices of PnO. A comprehensive categorization of current approaches and integration of typical scenarios are provided under a unified benchmark. Therefore, this paper could serve as a comprehensive benchmark for future PnO approach development and also offer fast prototyping for application-focused development. The code is available at \\url{https://github.com/Thinklab-SJTU/PredictiveCO-Benchmark}.", "keywords": "Combinatorial Optimization;Machine Learning;Predict-and-Optimize", "primary_area": "", "supplementary_material": "", "author": "Haoyu Geng;Hang Ruan;Runzhong Wang;Yang Li;YANG WANG;Lei CHEN;Junchi Yan", "authorids": "~Haoyu_Geng1;~Hang_Ruan1;~Runzhong_Wang1;~Yang_Li32;~YANG_WANG44;~Lei_CHEN23;~Junchi_Yan2", "gender": "M;F;M;M;F;M;M", "homepage": "https://hygeng.site;;http://runzhong.wang;https://yangco-le.github.io;https://dgraph.xinye.com/team;https://www.linkedin.cn/incareer/in/ACoAAAPh8noB_KF0tgucaqFyKbDGOv9wkJkM0sY;http://thinklab.sjtu.edu.cn/", "dblp": "289/8341;;239/4351;;;09/3666a.html;60/7949.html", "google_scholar": "_R_RZpAAAAAJ;;uoM0g3cAAAAJ;ecE0xDIAAAAJ;;https://scholar.google.com.hk/citations?user=wDG2dMYAAAAJ;ga230VoAAAAJ", "orcid": "0000-0001-7808-3959;0009-0007-7860-0670;0000-0002-9566-738X;0000-0002-5249-3471;;0000-0002-4912-3293;0000-0001-9639-7679", "linkedin": ";;;;;;", "or_profile": "~Haoyu_Geng1;~Hang_Ruan1;~Runzhong_Wang1;~Yang_Li32;~YANG_WANG44;~Lei_CHEN23;~Junchi_Yan1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Massachusetts Institute of Technology;Shanghai Jiaotong University;;Peking University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu;mit.edu;sjtu.edu.cn;;pku.edu.cn;sjtu.edu.cn", "position": "PhD student;Undergrad student;Postdoc;PhD student;;PhD student;Full Professor", "bibtex": "@inproceedings{\ngeng2024benchmarking,\ntitle={Benchmarking PtO and PnO Methods in the Predictive Combinatorial Optimization Regime},\nauthor={Haoyu Geng and Hang Ruan and Runzhong Wang and Yang Li and YANG WANG and Lei CHEN and Junchi Yan},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=cX57Pbw8vS}\n}", "github": "", "project": "", "reviewers": "iiFf;LjCy;b94t;tYMG;XAzY", "site": "https://openreview.net/forum?id=cX57Pbw8vS", "pdf_size": 1296257, "rating": "6;6;6;6;8", "confidence": "4;4;3;3;4", "wc_summary_and_contributions": "60;110;56;31;39", "wc_strengths": "3;96;29;26;19", "wc_improvement": "3;188;32;14;8", "wc_limitations": "1;12;12;13;37", "wc_correctness": "1;32;1;11;8", "wc_clarity": "1;31;1;2;5", "wc_relation_to_prior_work": "1;19;1;6;27", "wc_documentation": "1;77;1;4;1", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "72;566;134;108;145", "wc_reply_reviewers": "70;115;0;125;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "2;1;0;1;0", "reply_authors": "3;2;1;1;1", "rating_avg": [ 6.4, 0.7999999999999999 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "wc_summary_and_contributions_avg": [ 59.2, 27.54922866433832 ], "wc_strengths_avg": [ 34.6, 31.991248803383716 ], "wc_improvement_avg": [ 49.0, 70.18831811633613 ], "wc_limitations_avg": [ 15.0, 11.8490505948789 ], "wc_correctness_avg": [ 10.6, 11.39473562659529 ], "wc_clarity_avg": [ 8.0, 11.593101396951552 ], "wc_relation_to_prior_work_avg": [ 10.8, 10.43839068055991 ], "wc_documentation_avg": [ 16.8, 30.122416901702962 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 205.0, 182.24159788588335 ], "wc_reply_reviewers_avg": [ 62.0, 53.907327887774215 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.408248290463863, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=641739890103462607&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0;1;0;2;0", "aff_unique_norm": "Shanghai Jiao Tong University;Massachusetts Institute of Technology;Peking University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.sjtu.edu.cn;https://web.mit.edu;http://www.pku.edu.cn", "aff_unique_abbr": "SJTU;MIT;Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "China;United States" }, { "title": "Abstract Reward Processes: Leveraging State Abstraction for Consistent Off-Policy Evaluation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94409", "id": "cYZibc2gKf", "proceeding": "", "pdf": "https://openreview.net/pdf?id=cYZibc2gKf", "openreview": "https://openreview.net/forum?id=cYZibc2gKf", "poster": "", "project": "", "author_site": "Shreyas Chaudhari, Ameet Deshpande, Bruno C. da Silva, Philip Thomas", "tldr": "", "abstract": "Evaluating policies using off-policy data is crucial for applying reinforcement learning to real-world problems such as healthcare and autonomous driving. Previous methods for *off-policy evaluation* (OPE) generally suffer from high variance or irreducible bias, leading to unacceptably high prediction errors. In this work, we introduce STAR, a framework for OPE that encompasses a broad range of estimators -- which include existing OPE methods as special cases -- that achieve lower mean squared prediction errors. STAR leverages state abstraction to distill complex, potentially continuous problems into compact, discrete models which we call *abstract reward processes* (ARPs). Predictions from ARPs estimated from off-policy data are provably consistent (asymptotically correct). Rather than proposing a specific estimator, we present a new framework for OPE and empirically demonstrate that estimators within STAR outperform existing methods. The best STAR estimator outperforms baselines in all twelve cases studied, and even the median STAR estimator surpasses the baselines in seven out of the twelve cases.", "keywords": "Off-Policy Evaluation;State Abstraction;Importance Sampling", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/8952fd98ef9cbad448aa8058859d2de1d2d5f696.zip", "author": "Shreyas Chaudhari;Ameet Deshpande;Bruno Castro da Silva;Philip S. Thomas", "authorids": "~Shreyas_Chaudhari1;~Ameet_Deshpande1;~Bruno_Castro_da_Silva1;~Philip_S._Thomas1", "gender": "M;M;M;M", "homepage": "https://shreyasc-13.github.io/;https://people.cs.umass.edu/~bsilva/;http://psthomas.com;https://ameet-1997.github.io", "dblp": "209/9835;75/3139;46/11107;220/4337", "google_scholar": "B9nrloIAAAAJ;eskJDVUAAAAJ;e8Gzgo4AAAAJ;332L1coAAAAJ", "orcid": ";;;", "linkedin": "shreyas-chaudhari/;;;", "or_profile": "~Shreyas_Chaudhari1;~Bruno_Castro_da_Silva1;~Philip_S._Thomas1;~Ameet_S_Deshpande1", "aff": "University of Massachusetts at Amherst;University of Massachusetts, Amherst;College of Information and Computer Science, University of Massachusetts, Amherst;Princeton University", "aff_domain": "umass.edu;umass.edu;cs.umass.edu;princeton.edu", "position": "PhD student;Assistant Professor;Associate Professor;PhD student", "bibtex": "@inproceedings{\nchaudhari2024abstract,\ntitle={Abstract Reward Processes: Leveraging State Abstraction for Consistent Off-Policy Evaluation},\nauthor={Shreyas Chaudhari and Ameet Deshpande and Bruno Castro da Silva and Philip S. Thomas},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=cYZibc2gKf}\n}", "github": "", "reviewers": "5Mtw;V4gu;oq5d", "pdf_size": 889754, "rating": "7;7;8", "confidence": "3;2;3", "soundness": "3;3;4", "novelty": "3;3;3", "presentation": "3;3;4", "wc_summary": "22;115;97", "wc_strengths": "73;100;231", "wc_weaknesses": "17;138;193", "wc_questions": "169;5;39", "wc_limitations": "13;1;28", "wc_review": "294;359;588", "wc_reply_reviewers": "38;26;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;0", "reply_authors": "1;1;1", "rating_avg": [ 7.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 78.0, 40.27406113120453 ], "wc_strengths_avg": [ 134.66666666666666, 69.00402564746173 ], "wc_weaknesses_avg": [ 116.0, 73.51643807113254 ], "wc_questions_avg": [ 71.0, 70.67295569499458 ], "wc_limitations_avg": [ 14.0, 11.045361017187261 ], "wc_review_avg": [ 413.6666666666667, 126.09608329453467 ], "wc_reply_reviewers_avg": [ 21.333333333333332, 15.86050300449376 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:1QAi2PE2jw0J:scholar.google.com/&scioq=Abstract+Reward+Processes:+Leveraging+State+Abstraction+for+Consistent+Off-Policy+Evaluation&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "umass.edu;umass.edu;cs.umass.edu;princeton.edu", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "University of Massachusetts Amherst;Princeton University", "aff_unique_dep": ";", "aff_unique_url": "https://www.umass.edu;https://www.princeton.edu", "aff_unique_abbr": "UMass Amherst;Princeton", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Amherst;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Faster Diffusion: Rethinking the Role of the Encoder for Diffusion Model Inference", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94408", "id": "ca2mABGV6p", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ca2mABGV6p", "openreview": "https://openreview.net/forum?id=ca2mABGV6p", "poster": "/media/PosterPDFs/NeurIPS%202024/94408.png?t=1729380706.3043485", "project": "", "author_site": "Senmao Li, Taihang Hu, Joost van de Weijer, Fahad Shahbaz Khan, Tao Liu, Linxuan Li, Shiqi Yang, Yaxing Wang, Ming-Ming Cheng, jian Yang", "tldr": "", "abstract": "One of the main drawback of diffusion models is the slow inference time for image generation. Among the most successful approaches to addressing this problem are distillation methods. However, these methods require considerable computational resources. In this paper, we take another approach to diffusion model acceleration. We conduct a comprehensive study of the UNet encoder and empirically analyze the encoder features. This provides insights regarding their changes during the inference process. In particular, we find that encoder features change minimally, whereas the decoder features exhibit substantial variations across different time-steps. This insight motivates us to omit encoder computation at certain adjacent time-steps and reuse encoder features of previous time-steps as input to the decoder in multiple time-steps. Importantly, this allows us to perform decoder computation in parallel, further accelerating the denoising process. Additionally, we introduce a prior noise injection method to improve the texture details in the generated image. Besides the standard text-to-image task, we also validate our approach on other tasks: text-to-video, personalized generation and reference-guided generation. Without utilizing any knowledge distillation technique, our approach accelerates both the Stable Diffusion (SD) and DeepFloyd-IF model sampling by 41$\\%$ and 24$\\%$ respectively, and DiT model sampling by 34$\\%$, while maintaining high-quality generation performance. Our code will be publicly released.", "keywords": "generative model;UNet;acceleration;inference phase", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/5c44daed7de11f94726b88c46fae8f58e8699742.zip", "author": "Senmao Li;taihang Hu;Joost van de Weijer;Fahad Khan;Tao Liu;Linxuan Li;Shiqi Yang;Yaxing Wang;Ming-Ming Cheng;jian Yang", "authorids": "~Senmao_Li2;~taihang_Hu1;~Joost_van_de_Weijer5;~Fahad_Khan1;~Tao_Liu23;~Linxuan_Li2;~Shiqi_Yang1;~Yaxing_Wang3;~Ming-Ming_Cheng3;~jian_Yang14", "gender": ";M;M;M;M;M;M;M;M;M", "homepage": "https://sen-mao.github.io/;https://hutaihang.github.io/;https://sites.google.com/view/fahadkhans/home;https://github.com/byliutao;https://github.com/Potato-lover;https://www.shiqiyang.xyz/;https://yaxingwang.netlify.app/author/yaxing-wang/;https://mmcheng.net;;http://lamp.cvc.uab.es/", "dblp": "344/2376;344/1728;05/8618;;;;;45/7592;;67/3379", "google_scholar": "F96SDKwAAAAJ;;zvaeYnUAAAAJ;;;p27Iqt4AAAAJ;https://scholar.google.es/citations?user=6CsB8k0AAAAJ;huWpVyEAAAAJ;;https://scholar.google.es/citations?user=Gsw2iUEAAAAJ", "orcid": ";;;;;;;0000-0001-5550-8758;;0000-0002-9656-9706", "linkedin": ";;;;;aquila147/;;;;", "or_profile": "~Senmao_Li2;~taihang_Hu1;~Fahad_Khan1;~Tao_Liu23;~Linxuan_Li2;~Shiqi_Yang1;~Yaxing_Wang3;~Ming-Ming_Cheng3;~jian_Yang14;~Joost_van_de_Weijer1", "aff": "Nankai University;Nankai University;Link\u00f6ping University;Shenzhen University;Harbin Engineering University;Sony Group Corporation;Nankai University;Nankai University;Nankai University;Computer Vision Center, Universitat Aut\u00f3noma de Barcelona", "aff_domain": "nankai.edu.cn;nankai.edu.cn;liu.se;email.szu.edu.cn;hrbeu.edu.cn;sony.com;nku.nankai.edu.cn;nankai.edu.cn;nankai.edu.cn;cvc.uab.es", "position": "PhD student;MS student;Associate Professor;Undergrad student;Undergrad student;Research Scientist;Associate Professor;Full Professor;Full Professor;Researcher", "bibtex": "@inproceedings{\nli2024faster,\ntitle={Faster Diffusion: Rethinking the Role of the Encoder for Diffusion Model Inference},\nauthor={Senmao Li and taihang Hu and Joost van de Weijer and Fahad Khan and Tao Liu and Linxuan Li and Shiqi Yang and Yaxing Wang and Ming-Ming Cheng and jian Yang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ca2mABGV6p}\n}", "github": "", "reviewers": "wo3A;BYFv;wXBv;i4aK", "pdf_size": 15273543, "rating": "5;5;6;8", "confidence": "5;4;4;4", "soundness": "3;3;3;4", "novelty": "2;2;3;3", "presentation": "2;2;3;3", "wc_summary": "54;57;111;88", "wc_strengths": "33;122;58;109", "wc_weaknesses": "245;199;87;103", "wc_questions": "4;264;105;5", "wc_limitations": "11;10;8;8", "wc_review": "347;652;369;313", "wc_reply_reviewers": "45;796;0;0", "wc_reply_authors": "231;555;0;0", "reply_reviewers": "2;4;0;0", "reply_authors": "2;3;1;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 77.5, 23.47871376374779 ], "wc_strengths_avg": [ 80.5, 36.39024594585752 ], "wc_weaknesses_avg": [ 158.5, 65.7932367344851 ], "wc_questions_avg": [ 94.5, 106.11432514038809 ], "wc_limitations_avg": [ 9.25, 1.299038105676658 ], "wc_review_avg": [ 420.25, 135.28003363394023 ], "wc_reply_reviewers_avg": [ 210.25, 338.68154289833984 ], "wc_reply_authors_avg": [ 196.5, 227.45164321235404 ], "reply_reviewers_avg": [ 1.5, 1.6583123951777 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.4714045207910316, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10391847800398736910&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "nankai.edu.cn;nankai.edu.cn;liu.se;email.szu.edu.cn;hrbeu.edu.cn;sony.com;nku.nankai.edu.cn;nankai.edu.cn;nankai.edu.cn;cvc.uab.es", "author_num": 10, "aff_unique_index": "0;0;1;2;3;4;0;0;0;5", "aff_unique_norm": "Nankai University;Link\u00f6ping University;Shenzhen University;Harbin Engineering University;Sony Group Corporation;Universitat Aut\u00f3noma de Barcelona", "aff_unique_dep": ";;;;;Computer Vision Center", "aff_unique_url": "http://www.nankai.edu.cn;https://www.liu.se;https://www.szu.edu.cn;http://www.heu.edu.cn;https://www.sony.com;https://www.uab.cat", "aff_unique_abbr": "NKU;LiU;SZU;HEU;Sony;UAB", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;2;0;0;0;3", "aff_country_unique": "China;Sweden;Japan;Spain" }, { "title": "HumanVid: Demystifying Training Data for Camera-controllable Human Image Animation", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97601", "id": "catfRXDWcb", "proceeding": "", "pdf": "https://openreview.net/pdf?id=catfRXDWcb", "openreview": "https://openreview.net/forum?id=catfRXDWcb", "poster": "/media/PosterPDFs/NeurIPS%202024/97601.png?t=1731758203.2405262", "project": "", "author_site": "Zhenzhi Wang, Yixuan Li, Yanhong Zeng, Youqing Fang, Yuwei Guo, Wenran Liu, Jing Tan, Kai Chen, Bo Dai, Tianfan Xue, Dahua Lin", "tldr": "", "abstract": "Human image animation involves generating videos from a character photo, allowing user control and unlocking the potential for video and movie production. While recent approaches yield impressive results using high-quality training data, the inaccessibility of these datasets hampers fair and transparent benchmarking. Moreover, these approaches prioritize 2D human motion and overlook the significance of camera motions in videos, leading to limited control and unstable video generation. To demystify the training data, we present HumanVid, the first large-scale high-quality dataset tailored for human image animation, which combines crafted real-world and synthetic data. For the real-world data, we compile a vast collection of real-world videos from the internet. We developed and applied careful filtering rules to ensure video quality, resulting in a curated collection of 20K high-resolution (1080P) human-centric videos. Human and camera motion annotation is accomplished using a 2D pose estimator and a SLAM-based method. To expand our synthetic dataset, we collected 10K 3D avatar assets and leveraged existing assets of body shapes, skin textures and clothings. Notably, we introduce a rule-based camera trajectory generation method, enabling the synthetic pipeline to incorporate diverse and precise camera motion annotation, which can rarely be found in real-world data. To verify the effectiveness of HumanVid, we establish a baseline model named **CamAnimate**, short for Camera-controllable Human Animation, that considers both human and camera motions as conditions. Through extensive experimentation, we demonstrate that such simple baseline training on our HumanVid achieves state-of-the-art performance in controlling both human pose and camera motions, setting a new benchmark. Demo, data and code could be found in the project website: https://humanvid.github.io/.", "keywords": "human image animation;video generation", "primary_area": "", "supplementary_material": "/attachment/aea3ab3203fcff61c6706c39b8e5102770eda5be.zip", "author": "Zhenzhi Wang;Yixuan Li;Yanhong Zeng;Youqing Fang;Yuwei Guo;Wenran Liu;Jing Tan;Kai Chen;Tianfan Xue;Bo Dai;Dahua Lin", "authorids": "~Zhenzhi_Wang1;~Yixuan_Li3;~Yanhong_Zeng1;~Youqing_Fang1;~Yuwei_Guo1;~Wenran_Liu2;~Jing_Tan2;~Kai_Chen4;~Tianfan_Xue2;~Bo_Dai2;~Dahua_Lin1", "gender": "M;F;F;;M;;F;M;M;M;M", "homepage": "https://zhenzhiwang.github.io/;https://yixuanli98.github.io;https://zengyh1900.github.io/;;https://guoyww.github.io;;https://sparkstj.github.io/;https://chenkai.site/;https://tianfan.info;http://daibo.info/;http://dahua.site", "dblp": "59/9543-1;144/6087-2;215/4033;;159/7655-2;;96/8637-2;181/2839-26;54/8652;64/2903-2;53/6088", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;dC3bpFcAAAAJ;14LbnMIAAAAJ;;K2ZLY98AAAAJ;;https://scholar.google.com/citations?hl=en;https://scholar.google.com.hk/citations?user=eGD0b7IAAAAJ;RfSQKrIAAAAJ;https://scholar.google.com.hk/citations?user=KNWTvgEAAAAJ;GMzzRRUAAAAJ", "orcid": ";;;;0009-0003-1516-4083;;0009-0005-8016-915X;0000-0002-6820-2325;0000-0001-5031-6618;0000-0003-0777-9232;", "linkedin": ";liyixuan1998/;;;;;;;tianfan-xue-54016716;;", "or_profile": "~Zhenzhi_Wang1;~Yixuan_Li3;~Yanhong_Zeng1;~Youqing_Fang1;~Yuwei_Guo1;~Wenran_Liu2;~Jing_Tan2;~Kai_Chen4;~Tianfan_Xue2;~Bo_Dai2;~Dahua_Lin1", "aff": "The Chinese University of Hong Kong;The Chinese University of Hong Kong;Shanghai AI Laboratory;;The Chinese University of Hong Kong;;The Chinese University of Hong Kong;Shanghai AI Laboratory;The Chinese University of Hong Kong;Shanghai AI Laboratory;The Chinese University of Hong Kong", "aff_domain": "cuhk.edu.hk;cuhk.edu.hk;pjlab.org.cn;;cuhk.edu.hk;;ie.cuhk.edu;pjlab.org.cn;cuhk.edu.hk;pjlab.org.cn;cuhk.edu.hk", "position": "PhD student;PhD student;Researcher;;PhD student;;PhD student;Researcher;Assistant Professor;Scientist;Associate Professor", "bibtex": "@inproceedings{\nwang2024humanvid,\ntitle={HumanVid: Demystifying Training Data for Camera-controllable Human Image Animation},\nauthor={Zhenzhi Wang and Yixuan Li and Yanhong Zeng and Youqing Fang and Yuwei Guo and Wenran Liu and Jing Tan and Kai Chen and Tianfan Xue and Bo Dai and Dahua Lin},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=catfRXDWcb}\n}", "github": "", "reviewers": "KjST;tuQT;Egiq;8Lug", "pdf_size": 22689245, "rating": "6;7;7;7", "confidence": "2;5;3;4", "wc_summary_and_contributions": "31;53;154;58", "wc_strengths": "50;51;3;84", "wc_improvement": "32;148;48;106", "wc_limitations": "9;4;65;10", "wc_correctness": "1;16;11;1", "wc_clarity": "16;1;5;10", "wc_relation_to_prior_work": "1;8;7;9", "wc_documentation": "1;63;5;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "142;345;299;280", "wc_reply_reviewers": "0;32;18;0", "wc_reply_authors": "0;21;18;0", "reply_reviewers": "0;1;1;0", "reply_authors": "1;2;4;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "wc_summary_and_contributions_avg": [ 74.0, 47.29164831130334 ], "wc_strengths_avg": [ 47.0, 28.853076092507017 ], "wc_improvement_avg": [ 83.5, 46.311445669510256 ], "wc_limitations_avg": [ 22.0, 24.92990172463582 ], "wc_correctness_avg": [ 7.25, 6.49519052838329 ], "wc_clarity_avg": [ 8.0, 5.612486080160912 ], "wc_relation_to_prior_work_avg": [ 6.25, 3.112474899497183 ], "wc_documentation_avg": [ 17.5, 26.320144376503713 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 266.5, 75.66538178057387 ], "wc_reply_reviewers_avg": [ 12.5, 13.444329659748751 ], "wc_reply_authors_avg": [ 9.75, 9.807522622966516 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.7745966692414834, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14935520676625928905&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "cuhk.edu.hk;cuhk.edu.hk;pjlab.org.cn;;cuhk.edu.hk;;ie.cuhk.edu;pjlab.org.cn;cuhk.edu.hk;pjlab.org.cn;cuhk.edu.hk", "author_num": 11, "aff_unique_index": "0;0;1;0;0;1;0;1;0", "aff_unique_norm": "Chinese University of Hong Kong;Shanghai AI Laboratory", "aff_unique_dep": ";", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.shanghai-ai-lab.com", "aff_unique_abbr": "CUHK;SAIL", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Mitigating Backdoor Attack by Injecting Proactive Defensive Backdoor", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94407", "id": "cbkJBYIkID", "proceeding": "", "pdf": "https://openreview.net/pdf?id=cbkJBYIkID", "openreview": "https://openreview.net/forum?id=cbkJBYIkID", "poster": "/media/PosterPDFs/NeurIPS%202024/94407.png?t=1733722534.2101011", "project": "", "author_site": "Shaokui Wei, Hongyuan Zha, Baoyuan Wu", "tldr": "", "abstract": "Data-poisoning backdoor attacks are serious security threats to machine learning models, where an adversary can manipulate the training dataset to inject backdoors into models. In this paper, we focus on in-training backdoor defense, aiming to train a clean model even when the dataset may be potentially poisoned. Unlike most existing methods that primarily detect and remove/unlearn suspicious samples to mitigate malicious backdoor attacks, we propose a novel defense approach called PDB (Proactive Defensive Backdoor). Specifically, PDB leverages the \u201chome field\u201d advantage of defenders by proactively injecting a defensive backdoor into the model during training. Taking advantage of controlling the training process, the defensive backdoor is designed to suppress the malicious backdoor effectively while remaining secret to attackers. In addition, we introduce a reversible mapping to determine the defensive target label. During inference, PDB embeds a defensive trigger in the inputs and reverses the model\u2019s prediction, suppressing malicious backdoor and ensuring the model's utility on the original task. Experimental results across various datasets and models demonstrate that our approach achieves state-of-the-art defense performance against a wide range of backdoor attacks. The code is available at https://github.com/shawkui/Proactive_Defensive_Backdoor.", "keywords": "Adversarial Machine Learning;Backdoor Attack;Backdoor Defense;AI Security", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Shaokui Wei;Hongyuan Zha;Baoyuan Wu", "authorids": "~Shaokui_Wei1;~Hongyuan_Zha1;~Baoyuan_Wu1", "gender": "M;;M", "homepage": "https://shawkui.github.io/;;https://sites.google.com/site/baoyuanwu2015/", "dblp": "323/4243;z/HongyuanZha;73/7781", "google_scholar": "WHkEfnsAAAAJ;n1DQMIsAAAAJ;JNTG1KoAAAAJ", "orcid": ";;0000-0003-2183-5990", "linkedin": ";;", "or_profile": "~Shaokui_Wei1;~Hongyuan_Zha1;~Baoyuan_Wu1", "aff": "The Chinese University of Hong Kong, Shenzhen;The Chinese University of Hong Kong, Shenzhen;The Chinese University of Hong Kong, Shenzhen", "aff_domain": "cuhk.edu.cn;cuhk.edu.cn;cuhk.edu.cn", "position": "PhD student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nwei2024mitigating,\ntitle={Mitigating Backdoor Attack by Injecting Proactive Defensive Backdoor},\nauthor={Shaokui Wei and Hongyuan Zha and Baoyuan Wu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=cbkJBYIkID}\n}", "github": "", "reviewers": "wZZe;8vxw;8hm3;1B7j", "pdf_size": 977438, "rating": "5;5;6;7", "confidence": "4;4;3;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;4;3", "wc_summary": "54;69;127;142", "wc_strengths": "8;34;24;17", "wc_weaknesses": "221;127;154;21", "wc_questions": "17;33;28;221", "wc_limitations": "5;28;11;15", "wc_review": "305;291;344;416", "wc_reply_reviewers": "51;48;32;32", "wc_reply_authors": "185;158;151;131", "reply_reviewers": "1;1;1;1", "reply_authors": "3;3;3;3", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 98.0, 37.26258176777342 ], "wc_strengths_avg": [ 20.75, 9.522998477370455 ], "wc_weaknesses_avg": [ 130.75, 72.0151893700211 ], "wc_questions_avg": [ 74.75, 84.63561602540624 ], "wc_limitations_avg": [ 14.75, 8.437268515343103 ], "wc_review_avg": [ 339.0, 48.51288488638869 ], "wc_reply_reviewers_avg": [ 40.75, 8.814051281902097 ], "wc_reply_authors_avg": [ 156.25, 19.330998422223306 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.0, 0.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12167869062700712425&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "cuhk.edu.cn;cuhk.edu.cn;cuhk.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Chinese University of Hong Kong", "aff_unique_dep": "", "aff_unique_url": "https://www.cuhk.edu.cn", "aff_unique_abbr": "CUHK", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Shenzhen", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "BELM: Bidirectional Explicit Linear Multi-step Sampler for Exact Inversion in Diffusion Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94406", "id": "ccQ4fmwLDb", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ccQ4fmwLDb", "openreview": "https://openreview.net/forum?id=ccQ4fmwLDb", "poster": "/media/PosterPDFs/NeurIPS%202024/94406.png?t=1730170432.11887", "project": "", "author_site": "Fangyikang Wang, Hubery Yin, Yue-Jiang Dong, Huminhao Zhu, zhang chao, Hanbin Zhao, Hui Qian, Chen Li", "tldr": "", "abstract": "The inversion of diffusion model sampling, which aims to find the corresponding initial noise of a sample, plays a critical role in various tasks.\nRecently, several heuristic exact inversion samplers have been proposed to address the inexact inversion issue in a training-free manner. \nHowever, the theoretical properties of these heuristic samplers remain unknown and they often exhibit mediocre sampling quality.\nIn this paper, we introduce a generic formulation, \\emph{Bidirectional Explicit Linear Multi-step} (BELM) samplers, of the exact inversion samplers, which includes all previously proposed heuristic exact inversion samplers as special cases.\nThe BELM formulation is derived from the variable-stepsize-variable-formula linear multi-step method via integrating a bidirectional explicit constraint. We highlight this bidirectional explicit constraint is the key of mathematically exact inversion.\nWe systematically investigate the Local Truncation Error (LTE) within the BELM framework and show that the existing heuristic designs of exact inversion samplers yield sub-optimal LTE.\nConsequently, we propose the Optimal BELM (O-BELM) sampler through the LTE minimization approach.\nWe conduct additional analysis to substantiate the theoretical stability and global convergence property of the proposed optimal sampler.\nComprehensive experiments demonstrate our O-BELM sampler establishes the exact inversion property while achieving high-quality sampling.\nAdditional experiments in image editing and image interpolation highlight the extensive potential of applying O-BELM in varying applications.", "keywords": "diffusion model; exact inversion; ODE sampler", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/8896edad9d754034f349e6c49285c7b3e8070b46.zip", "author": "Fangyikang Wang;Hubery Yin;Yue-Jiang Dong;Huminhao Zhu;Chao Zhang;Hanbin Zhao;Hui Qian;Chen Li", "authorids": "~Fangyikang_Wang1;~Hubery_Yin1;~Yue-Jiang_Dong1;~Huminhao_Zhu1;~Chao_Zhang19;~Hanbin_Zhao1;~Hui_Qian1;~Chen_Li11", "gender": "M;;;M;M;M;M;", "homepage": ";;;https://lab304.pages.dev/zh-CN/team/;;;;", "dblp": "365/8666;;;365/9439;94/3019-29;222/7871;66/5293;", "google_scholar": "j80akcEAAAAJ;;;https://scholar.google.com.hk/citations?hl=zh-CN;;F2kiw10AAAAJ;;", "orcid": ";;;;;;;", "linkedin": ";;;;;;;", "or_profile": "~Fangyikang_Wang1;~Hubery_Yin1;~Yue-Jiang_Dong1;~Huminhao_Zhu1;~Chao_Zhang19;~Hanbin_Zhao1;~Hui_Qian1;~Chen_Li11", "aff": "Zhejiang University;;;College of Computer Science and Technology, Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University;", "aff_domain": "zju.edu.cn;;;cs.zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;", "position": "MS student;;;MS student;Assistant Professor;Assistant Professor;Full Professor;", "bibtex": "@inproceedings{\nwang2024belm,\ntitle={{BELM}: Bidirectional Explicit Linear Multi-step Sampler for Exact Inversion in Diffusion Models},\nauthor={Fangyikang Wang and Hubery Yin and Yue-Jiang Dong and Huminhao Zhu and Chao Zhang and Hanbin Zhao and Hui Qian and Chen Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ccQ4fmwLDb}\n}", "github": "", "reviewers": "eBdS;brf4;HZYw;QGyL", "pdf_size": 25085171, "rating": "6;6;7;7", "confidence": "5;3;4;3", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;4", "wc_summary": "30;56;86;49", "wc_strengths": "14;63;119;51", "wc_weaknesses": "140;85;63;68", "wc_questions": "4;2;78;5", "wc_limitations": "1;41;57;1", "wc_review": "189;247;403;174", "wc_reply_reviewers": "0;20;19;31", "wc_reply_authors": "367;16;16;16", "reply_reviewers": "0;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 55.25, 20.141685629559408 ], "wc_strengths_avg": [ 61.75, 37.66546827002155 ], "wc_weaknesses_avg": [ 89.0, 30.553232234904378 ], "wc_questions_avg": [ 22.25, 32.205395510690444 ], "wc_limitations_avg": [ 25.0, 24.657656011875904 ], "wc_review_avg": [ 253.25, 90.65421942744861 ], "wc_reply_reviewers_avg": [ 17.5, 11.146748404803978 ], "wc_reply_authors_avg": [ 103.75, 151.987458364169 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13320439202397014554&as_sdt=5,24&sciodt=0,24&hl=en", "gs_version_total": 3, "email": "zju.edu.cn;;;cs.zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;", "author_num": 8, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Zhejiang University", "aff_unique_dep": "", "aff_unique_url": "https://www.zju.edu.cn", "aff_unique_abbr": "ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "DeTeCtive: Detecting AI-generated Text via Multi-Level Contrastive Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94405", "id": "cdTTTJfJe3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=cdTTTJfJe3", "openreview": "https://openreview.net/forum?id=cdTTTJfJe3", "poster": "/media/PosterPDFs/NeurIPS%202024/94405.png?t=1731689122.204449", "project": "", "author_site": "Xun Guo, Yongxin He, Shan Zhang, Ting Zhang, Wanquan Feng, Haibin Huang, Chongyang Ma", "tldr": "", "abstract": "Current techniques for detecting AI-generated text are largely confined to manual feature crafting and supervised binary classification paradigms. These methodologies typically lead to performance bottlenecks and unsatisfactory generalizability. Consequently, these methods are often inapplicable for out-of-distribution (OOD) data and newly emerged large language models (LLMs). In this paper, we revisit the task of AI-generated text detection. We argue that the key to accomplishing this task lies in distinguishing writing styles of different authors, rather than simply classifying the text into human-written or AI-generated text. To this end, we propose DeTeCtive, a multi-task auxiliary, multi-level contrastive learning framework. DeTeCtive is designed to facilitate the learning of distinct writing styles, combined with a dense information retrieval pipeline for AI-generated text detection. Our method is compatible with a range of text encoders. Extensive experiments demonstrate that our method enhances the ability of various text encoders in detecting AI-generated text across multiple benchmarks and achieves state-of-the-art results. Notably, in OOD zero-shot evaluation, our method outperforms existing approaches by a large margin. Moreover, we find our method boasts a Training-Free Incremental Adaptation (TFIA) capability towards OOD data, further enhancing its efficacy in OOD detection scenarios. We will open-source our code and models in hopes that our work will spark new thoughts in the field of AI-generated text detection, ensuring safe application of LLMs and enhancing compliance.", "keywords": "LLM;Contrastive Learning;AI-generated text detection", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Xun Guo;Yongxin He;Shan Zhang;Ting Zhang;Wanquan Feng;Haibin Huang;Chongyang Ma", "authorids": "~Xun_Guo2;~Yongxin_He1;~Shan_Zhang6;~Ting_Zhang15;~Wanquan_Feng1;~Haibin_Huang1;~Chongyang_Ma1", "gender": "M;M;;F;M;M;", "homepage": "https://github.com/Space-Xun;https://github.com/heyongxin233;https://github.com/zhangshan233hyx;https://github.com/tingz515;https://wanquanf.github.io/;https://brotherhuang.github.io/;", "dblp": ";;;;279/3686;;", "google_scholar": ";;;;https://scholar.google.com/citations?hl=zh-CN;YDl1M80AAAAJ;", "orcid": "0009-0009-9140-4724;;;;;;", "linkedin": ";;;;;;", "or_profile": "~Xun_Guo2;~Yongxin_He1;~Shan_Zhang6;~Ting_Zhang15;~Wanquan_Feng1;~Haibin_Huang1;~Chongyang_Ma1", "aff": "University of Science and Technology of China;University of Chinese Academy of Sciences;University of Chinese Academy of Sciences;University of Chinese Academy of Sciences;ByteDance;Kuaishou Technology;", "aff_domain": "ustc.edu.cn;ucas.edu.cn;ucas.edu;mails.ucas.ac.cn;bytedance.com;kuaishou.com;", "position": "MS student;MS student;MS student;PhD student;Researcher;Sr.Research Scientist;", "bibtex": "@inproceedings{\nguo2024detective,\ntitle={DeTeCtive: Detecting {AI}-generated Text via Multi-Level Contrastive Learning},\nauthor={Xun Guo and Yongxin He and Shan Zhang and Ting Zhang and Wanquan Feng and Haibin Huang and Chongyang Ma},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=cdTTTJfJe3}\n}", "github": "", "reviewers": "Tv1c;W7ZQ;yy53", "pdf_size": 8089774, "rating": "5;5;7", "confidence": "4;4;3", "soundness": "2;2;3", "novelty": "2;2;2", "presentation": "3;2;3", "wc_summary": "135;31;177", "wc_strengths": "130;35;59", "wc_weaknesses": "351;153;37", "wc_questions": "69;96;27", "wc_limitations": "31;25;34", "wc_review": "716;340;334", "wc_reply_reviewers": "49;427;0", "wc_reply_authors": "60;1287;0", "reply_reviewers": "1;2;0", "reply_authors": "2;7;1", "rating_avg": [ 5.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 114.33333333333333, 61.369554521946974 ], "wc_strengths_avg": [ 74.66666666666667, 40.33471072028271 ], "wc_weaknesses_avg": [ 180.33333333333334, 129.63881448941987 ], "wc_questions_avg": [ 64.0, 28.39013913315678 ], "wc_limitations_avg": [ 30.0, 3.7416573867739413 ], "wc_review_avg": [ 463.3333333333333, 178.6791040447154 ], "wc_reply_reviewers_avg": [ 158.66666666666666, 190.7919169030899 ], "wc_reply_authors_avg": [ 449.0, 593.0615482393038 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 3.3333333333333335, 2.6246692913372702 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.9999999999999998, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5102036363232743122&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 3, "email": "ustc.edu.cn;ucas.edu.cn;ucas.edu;mails.ucas.ac.cn;bytedance.com;kuaishou.com;", "author_num": 7, "aff_unique_index": "0;1;1;1;2;3", "aff_unique_norm": "University of Science and Technology of China;University of Chinese Academy of Sciences;ByteDance;Kuaishou Technology", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.ustc.edu.cn;http://www.ucas.ac.cn;https://www.bytedance.com;https://www.kuaishou.com", "aff_unique_abbr": "USTC;UCAS;ByteDance;Kuaishou", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "cdYIL6OQr6", "title": "Local Differential Privacy for Mixtures of Experts", "track": "main", "status": "Reject", "tldr": "", "abstract": "We introduce a new approach to the mixture of experts model that consists in imposing local differential privacy on the gating mechanism. This is theoretically justified by statistical learning theory. Notably, we provide generalization bounds specifically tailored for mixtures of experts, leveraging the one-out-of-$n$ gating mechanism rather than the more common $n$-out-of-$n$ mechanism. Moreover, through experiments, we show that our approach improves the generalization ability of mixtures of experts.", "keywords": "Mixtures of experts;Local differential privacy;PAC-Bayes;Generalization bounds", "primary_area": "learning_theory", "supplementary_material": "/attachment/fda8ba5869004d3c9056f9925a7ee293fb8c5276.zip", "author": "Wissam Akretche;Fr\u00e9d\u00e9ric LeBlanc;Mario Marchand", "authorids": "~Wissam_Akretche1;~Fr\u00e9d\u00e9ric_LeBlanc1;~Mario_Marchand1", "gender": "F;M;", "homepage": ";;http://www2.ift.ulaval.ca/~mmarchand/", "dblp": "391/0827;;01/4590", "google_scholar": "Sqd81kEAAAAJ;;https://scholar.google.ca/citations?user=M792u2sAAAAJ", "orcid": ";;", "linkedin": "wissam-akretche-6aa2a0160/;fr%C3%A9d%C3%A9ric-leblanc-b99930178/;", "or_profile": "~Wissam_Akretche1;~Fr\u00e9d\u00e9ric_LeBlanc1;~Mario_Marchand1", "aff": "Universit\u00e9 Laval;Universit\u00e9 Laval;Laval university", "aff_domain": "ulaval.ca;ulaval.ca;ulaval.ca", "position": "PhD student;Researcher;Full Professor", "bibtex": "@misc{\nanonymous2024local,\ntitle={Local Differential Privacy for Mixtures of Experts},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=cdYIL6OQr6}\n}", "github": "", "project": "", "reviewers": "Cnn1;4Nc6;FV3i;StUb;YLRL", "site": "https://openreview.net/forum?id=cdYIL6OQr6", "pdf_size": 362249, "rating": "3;4;4;4;4", "confidence": "3;4;2;3;4", "soundness": "2;2;2;3;3", "novelty": "3;2;2;2;2", "presentation": "2;1;4;3;2", "wc_summary": "30;88;133;124;55", "wc_strengths": "33;82;69;38;2", "wc_weaknesses": "123;286;516;118;32", "wc_questions": "26;4;41;3;58", "wc_limitations": "46;1;26;3;2", "wc_review": "258;461;785;286;149", "wc_reply_reviewers": "0;50;118;145;9", "wc_reply_authors": "0;0;155;0;0", "reply_reviewers": "0;1;1;1;1", "reply_authors": "1;1;2;1;1", "rating_avg": [ 3.8, 0.39999999999999997 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "novelty_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 2.4, 1.019803902718557 ], "wc_summary_avg": [ 86.0, 39.380198069588225 ], "wc_strengths_avg": [ 44.8, 28.23756363427978 ], "wc_weaknesses_avg": [ 215.0, 171.46661482632706 ], "wc_questions_avg": [ 26.4, 21.265935201631738 ], "wc_limitations_avg": [ 15.6, 17.828067758453244 ], "wc_review_avg": [ 387.8, 222.4242792502653 ], "wc_reply_reviewers_avg": [ 64.4, 57.953774682931574 ], "wc_reply_authors_avg": [ 31.0, 62.0 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.13363062095621223, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:cdX4xUFsIbEJ:scholar.google.com/&scioq=Local+Differential+Privacy+for+Mixtures+of+Experts&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;1", "aff_unique_norm": "Universit\u00e9 Laval;Laval University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ulaval.ca;https://www.laval.ca", "aff_unique_abbr": "ULaval;Laval", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Canada" }, { "title": "OmniJARVIS: Unified Vision-Language-Action Tokenization Enables Open-World Instruction Following Agents", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94404", "id": "ceIO1w0PmT", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ceIO1w0PmT", "openreview": "https://openreview.net/forum?id=ceIO1w0PmT", "poster": "", "project": "", "author_site": "Zihao Wang, Shaofei Cai, Zhancun Mu, Haowei Lin, Ceyao Zhang, Xuejie Liu, Qing Li, Anji Liu, Xiaojian (Shawn) Ma, Yitao Liang", "tldr": "", "abstract": "This paper presents OmniJARVIS, a novel Vision-Language-Action (VLA) model for open-world instruction-following agents in Minecraft. Compared to prior works that either emit textual goals to separate controllers or produce the control command directly, OmniJARVIS seeks a different path to ensure both strong reasoning and efficient decision-making capabilities via unified tokenization of multimodal interaction data. First, we introduce a self-supervised approach to learn a behavior encoder that produces discretized tokens for behavior trajectories $\\tau = \\{o_0, a_0, \\dots\\}$ and an imitation learning policy decoder conditioned on these tokens. These additional behavior tokens will be augmented to the vocabulary of pretrained Multimodal Language Models. With this encoder, we then pack long-term multimodal interactions involving task instructions, memories, thoughts, observations, textual responses, behavior trajectories, etc into unified token sequences and model them with autoregressive transformers. Thanks to the semantically meaningful behavior tokens, the resulting VLA model, OmniJARVIS, can reason (by producing chain-of-thoughts), plan, answer questions, and act (by producing behavior tokens for the imitation learning policy decoder). OmniJARVIS demonstrates excellent performances on a comprehensive collection of atomic, programmatic, and open-ended tasks in open-world Minecraft. Our analysis further unveils the crucial design principles in interaction data formation, unified tokenization, and its scaling potentials. The dataset, models, and code will be released at https://craftjarvis.org/OmniJARVIS.", "keywords": "open-world;multimodal language model;decision making;generalist agents", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Zihao Wang;Shaofei Cai;Zhancun Mu;Haowei Lin;Ceyao Zhang;Xuejie Liu;Qing Li;Anji Liu;Xiaojian Ma;Yitao Liang", "authorids": "~Zihao_Wang23;~Shaofei_Cai2;~Zhancun_Mu1;~Haowei_Lin1;~Ceyao_Zhang1;~Xuejie_Liu2;~Qing_Li1;~Anji_Liu1;~Xiaojian_Ma1;~Yitao_Liang1", "gender": "M;M;M;M;M;;M;M;;M", "homepage": "https://zhwang4ai.github.io/;https://phython96.github.io/;https://muzhancun.github.io;https://linhaowei1.github.io/;;;http://liqing-ustc.github.io/;https://liuanji.github.io/;;https://web.cs.ucla.edu/~yliang/", "dblp": ";276/3245;381/4972;235/2798;277/1121;;181/2689-3;227/8622;;173/4969", "google_scholar": "I0D-EgQAAAAJ;MZXDSSUAAAAJ;mwN8K4IAAAAJ;Ng-DmJgAAAAJ;OadTFGMAAAAJ;vxqpWKUAAAAJ;iwdFZBEAAAAJ;k_4zYecAAAAJ;;KVzR1XEAAAAJ", "orcid": "0000-0001-8396-3707;;;0009-0006-9809-4835;0000-0003-2544-0718;;;;;", "linkedin": ";;;;ceyao-charles-zhang-02536815b;lxj-25a92b263;;anji-liu-7610b7190/;;", "or_profile": "~Zihao_Wang23;~Shaofei_Cai2;~Zhancun_Mu1;~Haowei_Lin1;~Ceyao_Zhang1;~Xuejie_Liu2;~Qing_Li1;~Anji_Liu1;~Xiaojian_Ma1;~Yitao_Liang1", "aff": "Peking University;Peking University;Peking University;Peking University;The Chinese University of Hong Kong, Shenzhen;Peking University;Beijing Institute for General Artificial Intelligence (BIGAI);University of California, Los Angeles;;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn;cuhk.edu.cn;pku.edu.cn;bigai.ai;ucla.edu;;pku.edu.cn", "position": "PhD student;PhD student;Undergrad student;PhD student;PhD student;PhD student;Researcher;PhD student;;Assistant Professor", "bibtex": "@inproceedings{\nwang2024omnijarvis,\ntitle={Omni{JARVIS}: Unified Vision-Language-Action Tokenization Enables Open-World Instruction Following Agents},\nauthor={Zihao Wang and Shaofei Cai and Zhancun Mu and Haowei Lin and Ceyao Zhang and Xuejie Liu and Qing Li and Anji Liu and Xiaojian Ma and Yitao Liang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ceIO1w0PmT}\n}", "github": "", "reviewers": "YroM;NWLR;qkWa;LGfx", "pdf_size": 5221978, "rating": "6;6;6;7", "confidence": "4;3;5;4", "soundness": "2;3;3;2", "novelty": "3;3;3;3", "presentation": "3;2;3;3", "wc_summary": "88;110;82;78", "wc_strengths": "87;31;206;95", "wc_weaknesses": "58;78;311;150", "wc_questions": "143;128;136;26", "wc_limitations": "79;43;9;40", "wc_review": "455;390;744;389", "wc_reply_reviewers": "69;38;75;61", "wc_reply_authors": "234;39;859;43", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;3;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 89.5, 12.359207094308275 ], "wc_strengths_avg": [ 104.75, 63.44436539205038 ], "wc_weaknesses_avg": [ 149.25, 99.45696305437845 ], "wc_questions_avg": [ 108.25, 47.78271130858943 ], "wc_limitations_avg": [ 42.75, 24.80297361204902 ], "wc_review_avg": [ 494.5, 146.5102385500754 ], "wc_reply_reviewers_avg": [ 60.75, 14.042346669983617 ], "wc_reply_authors_avg": [ 293.75, 335.72710271886007 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4586274607453081071&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn;cuhk.edu.cn;pku.edu.cn;bigai.ai;ucla.edu;;pku.edu.cn", "author_num": 10, "aff_unique_index": "0;0;0;0;1;0;2;3;0", "aff_unique_norm": "Peking University;Chinese University of Hong Kong;Beijing Institute for General Artificial Intelligence;University of California, Los Angeles", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.pku.edu.cn;https://www.cuhk.edu.cn;http://www.bigmodel.cn/;https://www.ucla.edu", "aff_unique_abbr": "Peking U;CUHK;BIGAI;UCLA", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Shenzhen;Los Angeles", "aff_country_unique_index": "0;0;0;0;0;0;0;1;0", "aff_country_unique": "China;United States" }, { "title": "Long-Tailed Out-of-Distribution Detection via Normalized Outlier Distribution Adaptation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94403", "id": "cesWi7mMLY", "proceeding": "", "pdf": "https://openreview.net/pdf?id=cesWi7mMLY", "openreview": "https://openreview.net/forum?id=cesWi7mMLY", "poster": "/media/PosterPDFs/NeurIPS%202024/94403.png?t=1731648021.0298166", "project": "", "author_site": "Wenjun Miao, Guansong Pang, Jin Zheng, Xiao Bai", "tldr": "", "abstract": "One key challenge in Out-of-Distribution (OOD) detection is the absence of ground-truth OOD samples during training. One principled approach to address this issue is to use samples from external datasets as outliers ($\\textit{i.e.}$, pseudo OOD samples) to train OOD detectors.\n However, we find empirically that the outlier samples often present a distribution shift compared to the true OOD samples, especially in Long-Tailed Recognition (LTR) scenarios, where ID classes are heavily imbalanced, $\\textit{i.e.}$, the true OOD samples exhibit very different probability distribution to the head and tailed ID classes from the outliers.\n In this work, we propose a novel approach, namely $\\textit{normalized outlier distribution adaptation}$ (AdaptOD), to tackle this distribution shift problem.\n One of its key components is $\\textit{dynamic outlier distribution adaptation}$ that effectively adapts a vanilla outlier distribution based on the outlier samples to the true OOD distribution by utilizing the OOD knowledge in the predicted OOD samples during inference.\n Further, to obtain a more reliable set of predicted OOD samples on long-tailed ID data, a novel $\\textit{dual-normalized energy loss}$ is introduced in AdaptOD, which leverages class- and sample-wise normalized energy to enforce a more balanced prediction energy on imbalanced ID samples. This helps avoid bias toward the head samples and learn a substantially better vanilla outlier distribution than existing energy losses during training. It also eliminates the need of manually tuning the sensitive margin hyperparameters in energy losses.\n Empirical results on three popular benchmarks for OOD detection in LTR show the superior performance of AdaptOD over state-of-the-art methods.\nCode is available at https://github.com/mala-lab/AdaptOD.", "keywords": "out-of-distribution detection", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Wenjun Miao;Guansong Pang;Jin Zheng;Xiao Bai", "authorids": "~Wenjun_Miao3;~Guansong_Pang1;~Jin_Zheng1;~Xiao_Bai4", "gender": ";F;M;M", "homepage": "http://guansongpang.com/;https://shi.buaa.edu.cn/zhengjin/zh_CN/index.htm;;", "dblp": "07/11150;;;365/4476", "google_scholar": "https://scholar.google.com.tw/citations?hl=en;;k6l1vZIAAAAJ;", "orcid": "0000-0002-9877-2716;;;0009-0009-8878-3305", "linkedin": "guansong-pang-5587b21b/;;;", "or_profile": "~Guansong_Pang1;~Jin_Zheng1;~Xiao_Bai4;~wenjun_miao2", "aff": "Singapore Management University;BeiHang University;Beihang University;Beihang University", "aff_domain": "smu.edu.sg;buaa.edu;buaa.edu.cn;buaa.edu.cn", "position": "Assistant Professor;Researcher;Full Professor;PhD student", "bibtex": "@inproceedings{\nmiao2024longtailed,\ntitle={Long-Tailed Out-of-Distribution Detection via Normalized Outlier Distribution Adaptation},\nauthor={Wenjun Miao and Guansong Pang and Jin Zheng and Xiao Bai},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=cesWi7mMLY}\n}", "github": "", "reviewers": "udMg;eeVQ;KD4x;t7zA", "pdf_size": 716289, "rating": "5;5;5;7", "confidence": "4;3;3;4", "soundness": "3;2;2;3", "novelty": "3;2;3;3", "presentation": "3;2;3;3", "wc_summary": "125;88;81;178", "wc_strengths": "25;68;78;133", "wc_weaknesses": "41;85;401;87", "wc_questions": "2;1;58;44", "wc_limitations": "1;2;16;8", "wc_review": "194;244;634;450", "wc_reply_reviewers": "0;0;0;24", "wc_reply_authors": "0;0;72;28", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;2;2", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 118.0, 38.4642691338338 ], "wc_strengths_avg": [ 76.0, 38.4642691338338 ], "wc_weaknesses_avg": [ 153.5, 144.07203059580996 ], "wc_questions_avg": [ 26.25, 25.24257316519059 ], "wc_limitations_avg": [ 6.75, 5.973901572674261 ], "wc_review_avg": [ 380.5, 175.00499992857348 ], "wc_reply_reviewers_avg": [ 6.0, 10.392304845413264 ], "wc_reply_authors_avg": [ 25.0, 29.444863728670914 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5134588878662595345&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "smu.edu.sg;buaa.edu;buaa.edu.cn;buaa.edu.cn", "author_num": 4, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Singapore Management University;Beihang University", "aff_unique_dep": ";", "aff_unique_url": "https://www.smu.edu.sg;http://www.buaa.edu.cn/", "aff_unique_abbr": "SMU;Beihang", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "Singapore;China" }, { "title": "Graph Diffusion Transformers for Multi-Conditional Molecular Generation", "status": "Oral", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94402", "id": "cfrDLD1wfO", "proceeding": "", "pdf": "https://openreview.net/pdf?id=cfrDLD1wfO", "openreview": "https://openreview.net/forum?id=cfrDLD1wfO", "poster": "/media/PosterPDFs/NeurIPS%202024/94402.png?t=1731536570.0577626", "project": "", "author_site": "Gang Liu, Jiaxin Xu, Tengfei Luo, Meng Jiang", "tldr": "", "abstract": "Inverse molecular design with diffusion models holds great potential for advancements in material and drug discovery. Despite success in unconditional molecule generation, integrating multiple properties such as synthetic score and gas permeability as condition constraints into diffusion models remains unexplored. We present the Graph Diffusion Transformer (Graph DiT) for multi-conditional molecular generation. Graph DiT has a condition encoder to learn the representation of numerical and categorical properties and utilizes a Transformer-based graph denoiser to achieve molecular graph denoising under conditions. Unlike previous graph diffusion models that add noise separately on the atoms and bonds in the forward diffusion process, we propose a graph-dependent noise model for training Graph DiT, designed to accurately estimate graph-related noise in molecules. We extensively validate the Graph DiT for multi-conditional polymer and small molecule generation. Results demonstrate our superiority across metrics from distribution learning to condition control for molecular properties. A polymer inverse design task for gas separation with feedback from domain experts further demonstrates its practical utility. The code is available at https://github.com/liugangcode/Graph-DiT.", "keywords": "Graph Diffusion Transformers;Inverse Molecular Design;Molecular Generation;Polymer Generation", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "/attachment/3613587d56827d47d7661ec983d862bb019d9d30.zip", "author": "Gang Liu;Jiaxin Xu;Tengfei Luo;Meng Jiang", "authorids": "~Gang_Liu6;~Jiaxin_Xu1;~Tengfei_Luo1;~Meng_Jiang3", "gender": "M;Non-Binary;M;M", "homepage": "https://liugangcode.github.io/;https://www.linkedin.com/in/jiaxin-xu-75931213a/;https://monsterlab.nd.edu;http://www.meng-jiang.com/", "dblp": "37/2109-25;76/10625;;69/339-1", "google_scholar": "zdF3vTYAAAAJ;https://scholar.google.com/citations?hl=zh-CN;VIiy6ugAAAAJ;LZIPfCkAAAAJ", "orcid": "0000-0003-4204-731X;0000-0001-9830-3189;;0000-0002-3009-519X", "linkedin": ";jiaxin-xu-75931213a/;;meng-jiang-94b10916/", "or_profile": "~Gang_Liu6;~Jiaxin_Xu1;~Tengfei_Luo1;~Meng_Jiang3", "aff": "University of Notre Dame;University of Notre Dame;University of Notre Dame;University of Notre Dame", "aff_domain": "nd.edu;nd.edu;nd.edu;nd.edu", "position": "PhD student;PhD student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nliu2024graph,\ntitle={Graph Diffusion Transformers for Multi-Conditional Molecular Generation},\nauthor={Gang Liu and Jiaxin Xu and Tengfei Luo and Meng Jiang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=cfrDLD1wfO}\n}", "github": "", "reviewers": "N56G;hJ6K;rG3q;RRe9", "pdf_size": 9752636, "rating": "5;5;6;6", "confidence": "4;5;3;3", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "2;3;4;2", "wc_summary": "60;54;110;79", "wc_strengths": "117;21;15;100", "wc_weaknesses": "129;200;24;39", "wc_questions": "127;178;338;33", "wc_limitations": "1;1;16;1", "wc_review": "434;454;503;252", "wc_reply_reviewers": "277;287;51;0", "wc_reply_authors": "1131;794;50;0", "reply_reviewers": "2;3;2;0", "reply_authors": "4;4;3;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 75.75, 21.821720830401986 ], "wc_strengths_avg": [ 63.25, 45.6966902521397 ], "wc_weaknesses_avg": [ 98.0, 71.27762622310033 ], "wc_questions_avg": [ 169.0, 110.5689829925192 ], "wc_limitations_avg": [ 4.75, 6.49519052838329 ], "wc_review_avg": [ 410.75, 95.02992949592249 ], "wc_reply_reviewers_avg": [ 153.75, 129.55959053655582 ], "wc_reply_authors_avg": [ 493.75, 483.9784990058959 ], "reply_reviewers_avg": [ 1.75, 1.0897247358851685 ], "reply_authors_avg": [ 3.0, 1.224744871391589 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.9045340337332909, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14590656542800342902&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "nd.edu;nd.edu;nd.edu;nd.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Notre Dame", "aff_unique_dep": "", "aff_unique_url": "https://www.nd.edu", "aff_unique_abbr": "Notre Dame", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Lookback Prophet Inequalities", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94401", "id": "cg1vwt5Xou", "proceeding": "", "pdf": "https://openreview.net/pdf?id=cg1vwt5Xou", "openreview": "https://openreview.net/forum?id=cg1vwt5Xou", "poster": "", "project": "", "author_site": "Ziyad Benomar, Dorian Baudry, Vianney Perchet", "tldr": "", "abstract": "Prophet inequalities are fundamental optimal stopping problems, where a decision-maker observes sequentially items with values sampled independently from known distributions, and must decide at each new observation to either stop and gain the current value or reject it irrevocably and move to the next step. This model is often too pessimistic and does not adequately represent real-world online selection processes. Potentially, rejectesd items can be revisited and a fraction of their value can be recovered. To analyze this problem, we consider general decay functions $D_1,D_2,\\ldots$, quantifying the value to be recovered from a rejected item, depending on how far it has been observed in the past. We analyze how lookback improves, or not, the competitive ratio in prophet inequalities in different order models. \nWe show that, under mild monotonicity assumptions on the decay functions, the problem can be reduced to the case where all the decay functions are equal to the same function $x \\mapsto \\gamma x$, where $\\gamma = \\inf_{x>0} \\inf_{j \\geq 1} D_j(x)/x$. Consequently, we focus on this setting and refine the analyses of the competitive ratios, with upper and lower bounds expressed as increasing functions of $\\gamma$.", "keywords": "prophet inequalitity;online algorithms;decision-making under uncertainty", "primary_area": "algorithmic_game_theory", "supplementary_material": "", "author": "Ziyad Benomar;Dorian Baudry;Vianney Perchet", "authorids": "~Ziyad_Benomar1;~Dorian_Baudry1;~Vianney_Perchet3", "gender": ";M;", "homepage": ";https://dbaudry.github.io/;", "dblp": ";277/6362;", "google_scholar": ";https://scholar.google.fr/citations?user=RRW-kfYAAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Ziyad_Benomar1;~Dorian_Baudry1;~Vianney_Perchet3", "aff": ";;", "aff_domain": ";;", "position": ";;", "bibtex": "@inproceedings{\nbenomar2024lookback,\ntitle={Lookback Prophet Inequalities},\nauthor={Ziyad Benomar and Dorian Baudry and Vianney Perchet},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=cg1vwt5Xou}\n}", "github": "", "reviewers": "g6ab;Q5gj;us2g", "pdf_size": 1173648, "rating": "5;6;6", "confidence": "3;3;3", "soundness": "3;4;3", "novelty": "2;2;3", "presentation": "3;3;4", "wc_summary": "47;184;208", "wc_strengths": "38;68;35", "wc_weaknesses": "45;54;75", "wc_questions": "73;166;56", "wc_limitations": "1;1;2", "wc_review": "204;473;376", "wc_reply_reviewers": "14;8;11", "wc_reply_authors": "62;0;0", "reply_reviewers": "1;1;1", "reply_authors": "2;1;1", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 146.33333333333334, 70.91935952584143 ], "wc_strengths_avg": [ 47.0, 14.89966442575134 ], "wc_weaknesses_avg": [ 58.0, 12.569805089976535 ], "wc_questions_avg": [ 98.33333333333333, 48.3482735529983 ], "wc_limitations_avg": [ 1.3333333333333333, 0.4714045207910317 ], "wc_review_avg": [ 351.0, 111.23248925860945 ], "wc_reply_reviewers_avg": [ 11.0, 2.449489742783178 ], "wc_reply_authors_avg": [ 20.666666666666668, 29.227080289043965 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=272371450324813979&as_sdt=8005&sciodt=0,7&hl=en", "gs_version_total": 6, "email": ";;", "author_num": 3 }, { "id": "cgb0Tn4uHy", "title": "A Transition Matrix-Based Extended Model for Label-Noise Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "The transition matrix methods have garnered sustained attention as a class of techniques for label-noise learning due to their simplicity and statistical consistency. However, existing methods primarily focus on class-dependent noise and lack applicability for instance-dependent noise, while some methods specifically designed for instance-dependent noise tend to be relatively complex. To address this issue, we propose an extended model based on transition matrix in this paper, which preserves simplicity while extending its applicability to handle a broader range of noisy data beyond class-dependent noise. The proposed model's consistency and generalization properties are theoretically analyzed under certain assumptions. Experimental evaluations conducted on various synthetic and real-world noisy datasets demonstrate significant improvements over existing transition matrix-based methods. Upon acceptance of our paper, the code will be open sourced.", "keywords": "Transition Matrix;Label-Noise Learning", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/a1644e9d31cfda5312f7e43f222ec96d8fffb6fd.zip", "author": "Haixin Yang;Ruirui Li;Xiangzhong Fang;Yukun Yang;Naihao Wang", "authorids": "~Haixin_Yang1;~Ruirui_Li2;~Xiangzhong_Fang2;~Yukun_Yang3;~Naihao_Wang2", "gender": "M;F;M;M;M", "homepage": "https://github.com/DrawFlatbread/xin;;https://www.math.pku.edu.cn/jsdw/js_20180628175159671361/f_20180628175159671361/69902.htm;https://github.com/Tyrantyyk;https://github.com/wangnaihao", "dblp": ";;;;", "google_scholar": ";Tg_SltkAAAAJ;;;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Haixin_Yang1;~Ruirui_Li2;~Xiangzhong_Fang2;~Yukun_Yang3;~NaiHao_Wang1", "aff": "Peking University;Beijing University of Chemical Technology;School of mathematical Science, Peking University, Peking University;Beijing University of Chemical Technology;Beijing University of Chemical Technology", "aff_domain": "pku.edu.cn;buct.edu.cn;math.pku.edu.cn;buct.edu.cn;buct.edu.cn", "position": "PhD student;Full Professor;Full Professor;Undergrad student;MS student", "bibtex": "@misc{\nanonymous2024a,\ntitle={A Transition Matrix-Based Extended Model for Label-Noise Learning},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=cgb0Tn4uHy}\n}", "github": "", "project": "", "reviewers": "2B3S;dsrS;17ZU;T3zW", "site": "https://openreview.net/forum?id=cgb0Tn4uHy", "pdf_size": 376645, "rating": "3;3;3;5", "confidence": "4;3;4;3", "soundness": "3;1;2;3", "novelty": "2;2;2;3", "presentation": "2;2;2;2", "wc_summary": "65;81;99;89", "wc_strengths": "28;53;17;153", "wc_weaknesses": "103;220;80;325", "wc_questions": "78;101;58;41", "wc_limitations": "53;1;1;14", "wc_review": "327;456;255;622", "wc_reply_reviewers": "13;15;0;13", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "0;0;0;0", "rating_avg": [ 3.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 83.5, 12.439855304624729 ], "wc_strengths_avg": [ 62.75, 53.71394139327331 ], "wc_weaknesses_avg": [ 182.0, 98.15548889389731 ], "wc_questions_avg": [ 69.5, 22.41093483101497 ], "wc_limitations_avg": [ 17.25, 21.3116752039815 ], "wc_review_avg": [ 415.0, 139.529566759164 ], "wc_reply_reviewers_avg": [ 10.25, 5.973901572674261 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:uccJY-dV7vsJ:scholar.google.com/&scioq=A+Transition+Matrix-Based+Extended+Model+for+Label-Noise+Learning&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;0;1;1", "aff_unique_norm": "Peking University;Beijing University of Chemical Technology", "aff_unique_dep": ";", "aff_unique_url": "http://www.pku.edu.cn;http://www.buct.edu.cn", "aff_unique_abbr": "Peking U;BUCT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Peking", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Discretely beyond $1/e$: Guided Combinatorial Algortihms for Submodular Maximization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94400", "id": "cgiOX8lfwG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=cgiOX8lfwG", "openreview": "https://openreview.net/forum?id=cgiOX8lfwG", "poster": "/media/PosterPDFs/NeurIPS%202024/94400.png?t=1733265071.83029", "project": "", "author_site": "Yixin Chen, Ankur Nath, Chunli Peng, Alan Kuhnle", "tldr": "", "abstract": "For constrained, not necessarily monotone submodular maximization, all known approximation algorithms with ratio greater than $1/e$ require continuous ideas, such as queries to the multilinear extension of a submodular function and its gradient, which are typically expensive to simulate with the original set function. For combinatorial algorithms, the best known approximation ratios for both size and matroid constraint are obtained by a simple randomized greedy algorithm of Buchbinder et al. [9]: $1/e \\approx 0.367$ for size constraint and $0.281$ for the matroid constraint in $\\mathcal O (kn)$ queries, where $k$ is the rank of the matroid. In this work, we develop the first combinatorial algorithms to break the $1/e$ barrier: we obtain approximation ratio of $0.385$ in $\\mathcal O (kn)$ queries to the submodular set function for size constraint, and $0.305$ for a general matroid constraint. These are achieved by guiding the randomized greedy algorithm with a fast local search algorithm. Further, we develop deterministic versions of these algorithms, maintaining the same ratio and asymptotic time complexity. Finally, we develop a deterministic, nearly linear time algorithm with ratio $0.377$.", "keywords": "combinatorial algorithms;deterministic algorithms;submodular optimization", "primary_area": "optimization", "supplementary_material": "/attachment/c2eb192f7e57f939545fdaaa7a01fa266d0421f1.zip", "author": "Yixin Chen;Ankur Nath;Chunli Peng;Alan Kuhnle", "authorids": "~Yixin_Chen5;~Ankur_Nath1;~Chunli_Peng2;~Alan_Kuhnle1", "gender": "F;M;M;M", "homepage": "https://yxchen95.github.io;https://ankurnath.github.io/;;https://www.alankuhnle.com", "dblp": ";;;153/2879", "google_scholar": "mBsT8EUAAAAJ;;F3XduPUAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;0009-0005-4762-0383;", "linkedin": "yixin-chen-bb077915b;;;", "or_profile": "~Yixin_Chen5;~Ankur_Nath1;~Chunli_Peng2;~Alan_Kuhnle1", "aff": "Texas A&M University - College Station;Texas A&M University - College Station;Texas A&M University - College Station;Texas A&M University - College Station", "aff_domain": "tamu.edu;tamu.edu;tamu.edu;tamu.edu", "position": "PhD student;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nchen2024discretely,\ntitle={Discretely beyond \\$1/e\\$: Guided Combinatorial Algortihms for Submodular Maximization},\nauthor={Yixin Chen and Ankur Nath and Chunli Peng and Alan Kuhnle},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=cgiOX8lfwG}\n}", "github": "", "reviewers": "qSWx;95ga;NXWY;arzS", "pdf_size": 4754713, "rating": "6;6;6;7", "confidence": "4;4;3;4", "soundness": "3;3;3;3", "novelty": "3;3;2;4", "presentation": "3;1;3;3", "wc_summary": "165;212;92;189", "wc_strengths": "28;84;97;95", "wc_weaknesses": "50;244;133;21", "wc_questions": "38;34;53;1", "wc_limitations": "1;4;15;8", "wc_review": "282;578;390;314", "wc_reply_reviewers": "9;458;0;0", "wc_reply_authors": "0;167;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "1;2;1;0", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 164.5, 45.036096633700396 ], "wc_strengths_avg": [ 76.0, 28.151376520518493 ], "wc_weaknesses_avg": [ 112.0, 86.58810541870055 ], "wc_questions_avg": [ 31.5, 18.980252896102307 ], "wc_limitations_avg": [ 7.0, 5.244044240850758 ], "wc_review_avg": [ 391.0, 114.86949116279744 ], "wc_reply_reviewers_avg": [ 116.75, 197.05503672832117 ], "wc_reply_authors_avg": [ 41.75, 72.31312121600062 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.7071067811865476 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13521024174650025300&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "tamu.edu;tamu.edu;tamu.edu;tamu.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Texas A&M University", "aff_unique_dep": "", "aff_unique_url": "https://www.tamu.edu", "aff_unique_abbr": "TAMU", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "College Station", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Large Stepsize Gradient Descent for Non-Homogeneous Two-Layer Networks: Margin Improvement and Fast Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94399", "id": "chLoLUHnai", "proceeding": "", "pdf": "https://openreview.net/pdf?id=chLoLUHnai", "openreview": "https://openreview.net/forum?id=chLoLUHnai", "poster": "/media/PosterPDFs/NeurIPS%202024/94399.png?t=1731490937.911042", "project": "", "author_site": "Yuhang Cai, Jingfeng Wu, Song Mei, Michael Lindsey, Peter Bartlett", "tldr": "", "abstract": "The typical training of neural networks using large stepsize gradient descent (GD) under the logistic loss often involves two distinct phases, where the empirical risk oscillates in the first phase but decreases monotonically in the second phase. We investigate this phenomenon in two-layer networks that satisfy a near-homogeneity condition. We show that the second phase begins once the empirical risk falls below a certain threshold, dependent on the stepsize. Additionally, we show that the normalized margin grows nearly monotonically in the second phase, demonstrating an implicit bias of GD in training non-homogeneous predictors. If the dataset is linearly separable and the derivative of the activation function is bounded away from zero, we show that the average empirical risk decreases, implying that the first phase must stop in finite steps. Finally, we demonstrate that by choosing a suitably large stepsize, GD that undergoes this phase transition is more efficient than GD that monotonically decreases the risk. Our analysis applies to networks of any width, beyond the well-known neural tangent kernel and mean-field regimes.", "keywords": "Deep Learning Theory;Large stepsize;Optimization;Implicit Bias", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Yuhang Cai;Jingfeng Wu;Song Mei;Michael Lindsey;Peter Bartlett", "authorids": "~Yuhang_Cai1;~Jingfeng_Wu1;~Song_Mei1;~Michael_Lindsey1;~Peter_Bartlett1", "gender": "M;M;M;;M", "homepage": "https://yuhang-cai.com/;https://uuujf.github.io;https://www.stat.berkeley.edu/~songmei/;https://quantumtative.github.io/;https://www.stat.berkeley.edu/~bartlett/", "dblp": ";;https://dblp.org/pers/hd/m/Mei:Song;;https://dblp.org/pers/hd/b/Bartlett:Peter_L=", "google_scholar": ";z-KILD8AAAAJ;https://scholar.google.com.hk/citations?hl=en;;yQNhFGUAAAAJ", "orcid": ";0009-0009-3414-4487;;;", "linkedin": ";jingfeng-wu-79205b184/;;;", "or_profile": "~Yuhang_Cai1;~Jingfeng_Wu1;~Song_Mei1;~Michael_Lindsey1;~Peter_Bartlett1", "aff": "University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;University of California, Berkeley", "aff_domain": "berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu;berkeley", "position": "PhD student;Postdoc;Assistant Professor;Assistant Professor;Professor", "bibtex": "@inproceedings{\ncai2024large,\ntitle={Large Stepsize Gradient Descent for Non-Homogeneous Two-Layer Networks: Margin Improvement and Fast Optimization},\nauthor={Yuhang Cai and Jingfeng Wu and Song Mei and Michael Lindsey and Peter Bartlett},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=chLoLUHnai}\n}", "github": "", "reviewers": "cshA;PHJ6;RDnn;dNN4", "pdf_size": 1427177, "rating": "6;6;6;7", "confidence": "4;4;4;4", "soundness": "3;3;4;3", "novelty": "3;2;2;4", "presentation": "4;3;3;3", "wc_summary": "84;75;157;119", "wc_strengths": "35;66;51;52", "wc_weaknesses": "18;111;309;235", "wc_questions": "81;165;51;132", "wc_limitations": "37;57;1;6", "wc_review": "255;474;569;544", "wc_reply_reviewers": "30;94;42;11", "wc_reply_authors": "0;237;0;0", "reply_reviewers": "1;2;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 108.75, 32.34482184214345 ], "wc_strengths_avg": [ 51.0, 10.977249200050075 ], "wc_weaknesses_avg": [ 168.25, 111.93608667449475 ], "wc_questions_avg": [ 107.25, 44.16092730004659 ], "wc_limitations_avg": [ 25.25, 22.93877721239735 ], "wc_review_avg": [ 460.5, 123.64970683345756 ], "wc_reply_reviewers_avg": [ 44.25, 30.776411421736615 ], "wc_reply_authors_avg": [ 59.25, 102.62401034845598 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7388843641214657084&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 4, "email": "berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu;berkeley", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Transformer Doctor: Diagnosing and Treating Vision Transformers", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94398", "id": "chnJT8Nj8X", "proceeding": "", "pdf": "https://openreview.net/pdf?id=chnJT8Nj8X", "openreview": "https://openreview.net/forum?id=chnJT8Nj8X", "poster": "/media/PosterPDFs/NeurIPS%202024/94398.png?t=1732106216.827883", "project": "", "author_site": "Jiacong Hu, Hao Chen, Kejia Chen, Yang Gao, Jingwen Ye, Xingen Wang, Mingli Song, Zunlei Feng", "tldr": "", "abstract": "Due to its powerful representational capabilities, Transformers have gradually become the mainstream model in the field of machine vision. However, the vast and complex parameters of Transformers impede researchers from gaining a deep understanding of their internal mechanisms, especially error mechanisms. Existing methods for interpreting Transformers mainly focus on understanding them from the perspectives of the importance of input tokens or internal modules, as well as the formation and meaning of features. In contrast, inspired by research on information integration mechanisms and conjunctive errors in the biological visual system, this paper conducts an in-depth exploration of the internal error mechanisms of Transformers. We first propose an information integration hypothesis for Transformers in the machine vision domain and provide substantial experimental evidence to support this hypothesis. This includes the dynamic integration of information among tokens and the static integration of information within tokens in Transformers, as well as the presence of conjunctive errors therein. Addressing these errors, we further propose heuristic dynamic integration constraint methods and rule-based static integration constraint methods to rectify errors and ultimately improve model performance. The entire methodology framework is termed as Transformer Doctor, designed for diagnosing and treating internal errors within transformers. Through a plethora of quantitative and qualitative experiments, it has been demonstrated that Transformer Doctor can effectively address internal errors in transformers, thereby enhancing model performance.", "keywords": "Transformer;Machine Vision;Biological Vision", "primary_area": "interpretability_and_explainability", "supplementary_material": "/attachment/77474d4051a16c4814725b44de5d592335cf4849.zip", "author": "Jiacong Hu;Hao Chen;Kejia Chen;Yang Gao;Jingwen Ye;Xingen Wang;Mingli Song;Zunlei Feng", "authorids": "~Jiacong_Hu1;~Hao_Chen65;~Kejia_Chen1;~Yang_Gao21;~Jingwen_Ye1;~Xingen_Wang1;~Mingli_Song1;~Zunlei_Feng1", "gender": "M;M;;M;F;M;M;M", "homepage": "https://jiaconghu.com;https://github.com/ciacst/;;;https://jngwenye.github.io/;https://person.zju.edu.cn/xingenwang;https://person.zju.edu.cn/msong;https://person.zju.edu.cn/en/zunleifeng", "dblp": "136/3061;;;;200/7853;;71/5333;191/2455", "google_scholar": ";;;;8GQnNP0AAAAJ;S8C1Y0gAAAAJ;7oLbhAwAAAAJ;wMtjcGwAAAAJ", "orcid": ";;;0000-0003-2635-1434;;;0000-0003-2621-6048;", "linkedin": ";;;;;;;", "or_profile": "~Jiacong_Hu1;~Hao_Chen65;~Kejia_Chen1;~Yang_Gao21;~Jingwen_Ye1;~Xingen_Wang1;~Mingli_Song1;~Zunlei_Feng1", "aff": "Zhejiang University;Zhejiang University;;College of Computer Science and Technology, Zhejiang University;National University of Singapore;Zhejiang University;Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn;;cs.zju.edu.cn;nus.edu.sg;zju.edu.cn;zju.edu.cn;zju.edu.cn", "position": "PhD student;Undergrad student;;PhD student;Postdoc;Researcher;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nhu2024transformer,\ntitle={Transformer Doctor: Diagnosing and Treating Vision Transformers},\nauthor={Jiacong Hu and Hao Chen and Kejia Chen and Yang Gao and Jingwen Ye and Xingen Wang and Mingli Song and Zunlei Feng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=chnJT8Nj8X}\n}", "github": "", "reviewers": "BM6N;3nqh;ZXFK;KQuQ", "pdf_size": 3645966, "rating": "5;7;7;9", "confidence": "3;5;5;5", "soundness": "2;4;4;4", "novelty": "2;4;3;4", "presentation": "2;4;3;3", "wc_summary": "63;155;90;82", "wc_strengths": "33;205;71;69", "wc_weaknesses": "205;259;168;27", "wc_questions": "3;2;2;1", "wc_limitations": "12;10;26;13", "wc_review": "316;631;357;192", "wc_reply_reviewers": "16;15;194;14", "wc_reply_authors": "683;19;130;19", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 7.0, 1.4142135623730951 ], "confidence_avg": [ 4.5, 0.8660254037844386 ], "soundness_avg": [ 3.5, 0.8660254037844386 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 97.5, 34.61574786134195 ], "wc_strengths_avg": [ 94.5, 65.56485338960196 ], "wc_weaknesses_avg": [ 164.75, 85.86144361702755 ], "wc_questions_avg": [ 2.0, 0.7071067811865476 ], "wc_limitations_avg": [ 15.25, 6.299801584177076 ], "wc_review_avg": [ 374.0, 160.33246708012697 ], "wc_reply_reviewers_avg": [ 59.75, 77.51249899209805 ], "wc_reply_authors_avg": [ 212.75, 275.2547683510678 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:B6T8XNlbZTcJ:scholar.google.com/&scioq=Transformer+Doctor:+Diagnosing+and+Treating+Vision+Transformers&hl=en&as_sdt=0,33", "gs_version_total": 2, "email": "zju.edu.cn;zju.edu.cn;;cs.zju.edu.cn;nus.edu.sg;zju.edu.cn;zju.edu.cn;zju.edu.cn", "author_num": 8, "aff_unique_index": "0;0;0;1;0;0;0", "aff_unique_norm": "Zhejiang University;National University of Singapore", "aff_unique_dep": ";", "aff_unique_url": "https://www.zju.edu.cn;https://www.nus.edu.sg", "aff_unique_abbr": "ZJU;NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;0;0", "aff_country_unique": "China;Singapore" }, { "title": "IF-Font: Ideographic Description Sequence-Following Font Generation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94397", "id": "ciwOcmo8CC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ciwOcmo8CC", "openreview": "https://openreview.net/forum?id=ciwOcmo8CC", "poster": "/media/PosterPDFs/NeurIPS%202024/94397.png?t=1731374006.1089728", "project": "", "author_site": "Xinping Chen, Xiao Ke, Wenzhong Guo", "tldr": "", "abstract": "Few-shot font generation (FFG) aims to learn the target style from a limited number of reference glyphs and generate the remaining glyphs in the target font. Previous works focus on disentangling the content and style features of glyphs, combining the content features of the source glyph with the style features of the reference glyph to generate new glyphs. However, the disentanglement is challenging due to the complexity of glyphs, often resulting in glyphs that are influenced by the style of the source glyph and prone to artifacts. We propose IF-Font, a novel paradigm which incorporates Ideographic Description Sequence (IDS) instead of the source glyph to control the semantics of generated glyphs. To achieve this, we quantize the reference glyphs into tokens, and model the token distribution of target glyphs using corresponding IDS and reference tokens. The proposed method excels in synthesizing glyphs with neat and correct strokes, and enables the creation of new glyphs based on provided IDS. Extensive experiments demonstrate that our method greatly outperforms state-of-the-art methods in both one-shot and few-shot settings, particularly when the target styles differ significantly from the training font styles. The code is available at [https://github.com/Stareven233/IF-Font](https://github.com/Stareven233/IF-Font).", "keywords": "Font Generation;Vector Quantization;Ideographic Description Sequence;Multimodal", "primary_area": "machine_vision", "supplementary_material": "", "author": "Xinping Chen;Xiao Ke;Wenzhong Guo", "authorids": "~Xinping_Chen1;~Xiao_Ke1;~Wenzhong_Guo1", "gender": ";M;M", "homepage": ";https://ccds.fzu.edu.cn/info/1206/8261.htm;http://cmcs.fzu.edu.cn/website/f/teacherDetail?id=23", "dblp": ";78/9040;", "google_scholar": ";;", "orcid": "0009-0006-4056-7521;0000-0001-9059-5391;", "linkedin": ";;", "or_profile": "~Xinping_Chen1;~Xiao_Ke1;~Wenzhong_Guo1", "aff": "Fuzhou University;Fuzhou University;Fuzhou University", "aff_domain": "fzu.edu.cn;fzu.edu.cn;fzu.edu.cn", "position": "MS student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nchen2024iffont,\ntitle={{IF}-Font: Ideographic Description Sequence-Following Font Generation},\nauthor={Xinping Chen and Xiao Ke and Wenzhong Guo},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ciwOcmo8CC}\n}", "github": "", "reviewers": "o7CM;D5bL;MtRY;ufc8", "pdf_size": 5060991, "rating": "5;5;6;7", "confidence": "4;5;3;3", "soundness": "3;3;3;2", "novelty": "3;2;3;4", "presentation": "3;3;3;3", "wc_summary": "38;65;96;72", "wc_strengths": "30;55;68;48", "wc_weaknesses": "70;157;134;209", "wc_questions": "120;15;33;198", "wc_limitations": "31;76;2;7", "wc_review": "289;368;333;534", "wc_reply_reviewers": "15;102;59;37", "wc_reply_authors": "0;183;58;22", "reply_reviewers": "1;1;1;1", "reply_authors": "1;3;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 67.75, 20.668514702319566 ], "wc_strengths_avg": [ 50.25, 13.718144918318949 ], "wc_weaknesses_avg": [ 142.5, 49.90240475167504 ], "wc_questions_avg": [ 91.5, 73.19323739253511 ], "wc_limitations_avg": [ 29.0, 29.266021253323792 ], "wc_review_avg": [ 381.0, 92.66336924588917 ], "wc_reply_reviewers_avg": [ 53.25, 32.158785735783 ], "wc_reply_authors_avg": [ 65.75, 70.78974148843885 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8181818181818182, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:5p3I1TRecd4J:scholar.google.com/&scioq=IF-Font:+Ideographic+Description+Sequence-Following+Font+Generation&hl=en&as_sdt=0,47", "gs_version_total": 3, "email": "fzu.edu.cn;fzu.edu.cn;fzu.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Fuzhou University", "aff_unique_dep": "", "aff_unique_url": "https://www.fznu.edu.cn", "aff_unique_abbr": "FZU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Learning Macroscopic Dynamics from Partial Microscopic Observations", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94396", "id": "cjH0Qsgd0D", "proceeding": "", "pdf": "https://openreview.net/pdf?id=cjH0Qsgd0D", "openreview": "https://openreview.net/forum?id=cjH0Qsgd0D", "poster": "/media/PosterPDFs/NeurIPS%202024/94396.png?t=1732616286.9620261", "project": "", "author_site": "Mengyi Chen, Qianxiao Li", "tldr": "", "abstract": "Macroscopic observables of a system are of keen interest in real applications such as the design of novel materials. Current methods rely on microscopic trajectory simulations, where the forces on all microscopic coordinates need to be computed or measured. However, this can be computationally prohibitive for realistic systems. In this paper, we propose a method to learn macroscopic dynamics requiring only force computations on a subset of the microscopic coordinates. Our method relies on a sparsity assumption: the force on each microscopic coordinate relies only on a small number of other coordinates. The main idea of our approach is to map the training procedure on the macroscopic coordinates back to the microscopic coordinates, on which partial force computations can be used as stochastic estimation to update model parameters. We provide a theoretical justification of this under suitable conditions. We demonstrate the accuracy, force computation efficiency, and robustness of our method on learning macroscopic closure models from a variety of microscopic systems, including those modeled by partial differential equations or molecular dynamics simulations.", "keywords": "Dynamical system;Neural closure model;Macroscopic dynamics;Partial observation", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "/attachment/28621c20ad1d91b28272b36289167e0349a1edb7.zip", "author": "Mengyi Chen;Qianxiao Li", "authorids": "~Mengyi_Chen1;~Qianxiao_Li1", "gender": "F;M", "homepage": ";https://blog.nus.edu.sg/qianxiaoli/", "dblp": ";172/0930.html", "google_scholar": ";https://scholar.google.com.sg/citations?user=zLgReYoAAAAJ", "orcid": "0009-0001-8008-1976;0000-0002-3903-3737", "linkedin": ";", "or_profile": "~Mengyi_Chen1;~Qianxiao_Li1", "aff": "National University of Singapore;National University of Singapore", "aff_domain": "u.nus.edu;nus.edu.sg", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nchen2024learning,\ntitle={Learning Macroscopic Dynamics from Partial Microscopic Observations},\nauthor={Mengyi Chen and Qianxiao Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=cjH0Qsgd0D}\n}", "github": "", "reviewers": "d4vx;RRQb;ktyB", "pdf_size": 3007245, "rating": "5;7;7", "confidence": "4;3;3", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "2;3;3", "wc_summary": "119;69;88", "wc_strengths": "74;58;106", "wc_weaknesses": "494;212;207", "wc_questions": "251;2;39", "wc_limitations": "55;1;56", "wc_review": "993;342;496", "wc_reply_reviewers": "1040;40;346", "wc_reply_authors": "805;0;392", "reply_reviewers": "3;1;2", "reply_authors": "3;1;3", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 92.0, 20.607442021431645 ], "wc_strengths_avg": [ 79.33333333333333, 19.955506062794353 ], "wc_weaknesses_avg": [ 304.3333333333333, 134.13011924578643 ], "wc_questions_avg": [ 97.33333333333333, 109.7036411833668 ], "wc_limitations_avg": [ 37.333333333333336, 25.69478978746902 ], "wc_review_avg": [ 610.3333333333334, 277.7940884100228 ], "wc_reply_reviewers_avg": [ 475.3333333333333, 418.366134172237 ], "wc_reply_authors_avg": [ 399.0, 328.6771465536761 ], "reply_reviewers_avg": [ 2.0, 0.816496580927726 ], "reply_authors_avg": [ 2.3333333333333335, 0.9428090415820634 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.9999999999999998, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:dz0Ge4_1KtAJ:scholar.google.com/&scioq=Learning+Macroscopic+Dynamics+from+Partial+Microscopic+Observations&hl=en&as_sdt=0,48", "gs_version_total": 4, "email": "u.nus.edu;nus.edu.sg", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "National University of Singapore", "aff_unique_dep": "", "aff_unique_url": "https://www.nus.edu.sg", "aff_unique_abbr": "NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Singapore" }, { "title": "Improving Generalization and Convergence by Enhancing Implicit Regularization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94395", "id": "cjM2bhLOiC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=cjM2bhLOiC", "openreview": "https://openreview.net/forum?id=cjM2bhLOiC", "poster": "", "project": "", "author_site": "Mingze Wang, Jinbo Wang, Haotian He, Zilin Wang, Guanhua Huang, Feiyu Xiong, Zhiyu li, Weinan E, Lei Wu", "tldr": "", "abstract": "In this work, we propose an Implicit Regularization Enhancement (IRE) framework to accelerate the discovery of flat solutions in deep learning, thereby improving generalization and convergence. \nSpecifically, IRE decouples the dynamics of flat and sharp directions, which boosts the sharpness reduction along flat directions while maintaining the training stability in sharp directions. We show that IRE can be practically incorporated with *generic base optimizers* without introducing significant computational overload. Experiments show that IRE consistently improves the generalization performance for image classification tasks across a variety of benchmark datasets (CIFAR-10/100, ImageNet) and models (ResNets and ViTs). \nSurprisingly, IRE also achieves a $2\\times$ *speed-up* compared to AdamW in the pre-training of Llama models (of sizes ranging from 60M to 229M) on datasets including Wikitext-103, Minipile, and Openwebtext. Moreover, we provide theoretical guarantees, showing that IRE can substantially accelerate the convergence towards flat minima in Sharpness-aware Minimization (SAM).", "keywords": "implicit regularization;convergence;generalization;deep learning", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Mingze Wang;Jinbo Wang;Haotian He;Zilin Wang;Guanhua Huang;Feiyu Xiong;Zhiyu li;Weinan E;Lei Wu", "authorids": "~Mingze_Wang2;~Jinbo_Wang1;~Haotian_He1;~Zilin_Wang3;~Guanhua_Huang1;~Feiyu_Xiong1;~Zhiyu_li2;~Weinan_E2;~Lei_Wu1", "gender": ";M;M;;M;M;M;;M", "homepage": "https://wmz9.github.io/;;;https://www.google.com/;;;;https://web.math.princeton.edu/~weinan/;https://leiwu0.github.io/", "dblp": "296/7556;20/10554-3;;;88/9542;55/4477;;06/9390.html;", "google_scholar": "CkU47X0AAAAJ;eHr_yNgAAAAJ;https://scholar.google.com/citations?hl=zh-CN;;SEgFVw0AAAAJ;GOKgLdQAAAAJ;;;CMweeYcAAAAJ", "orcid": ";0009-0008-0107-119X;;;;0000-0002-1456-2202;0009-0008-3196-7739;;", "linkedin": ";;;;;;;;", "or_profile": "~Mingze_Wang2;~Jinbo_Wang1;~Haotian_He1;~Zilin_Wang3;~Guanhua_Huang1;~Feiyu_Xiong1;~Zhiyu_li2;~Weinan_E2;~Lei_Wu1", "aff": "Peking University;Peking University;Peking University;Peking University;University of Science and Technology of China;Institute for Advanced Algorithms Research, Shanghai;Institute for Advanced Algorithms Research, Shanghai;Peking University;Peking University", "aff_domain": "pku.edu.cn;stu.pku.edu.cn;stu.pku.edu.cn;pku.edu.cn;mail.ustc.edu.cn;iaar.ac.cn;iaar.ac.cn;pku.edu.cn;math.pku.edu.cn", "position": "PhD student;PhD student;PhD student;Undergrad student;PhD student;Principal Researcher;Principal Researcher;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nwang2024improving,\ntitle={Improving Generalization and Convergence by Enhancing Implicit Regularization},\nauthor={Mingze Wang and Jinbo Wang and Haotian He and Zilin Wang and Guanhua Huang and Feiyu Xiong and Zhiyu li and Weinan E and Lei Wu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=cjM2bhLOiC}\n}", "github": "", "reviewers": "Jrgs;QWSd;69nS", "pdf_size": 1078893, "rating": "6;6;6", "confidence": "4;3;4", "soundness": "3;2;3", "novelty": "3;2;2", "presentation": "3;2;3", "wc_summary": "153;251;57", "wc_strengths": "21;645;75", "wc_weaknesses": "37;594;121", "wc_questions": "116;490;18", "wc_limitations": "1;119;13", "wc_review": "328;2099;284", "wc_reply_reviewers": "7;34;0", "wc_reply_authors": "37;40;40", "reply_reviewers": "1;1;0", "reply_authors": "2;2;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 153.66666666666666, 79.20157125265186 ], "wc_strengths_avg": [ 247.0, 282.2906303794017 ], "wc_weaknesses_avg": [ 250.66666666666666, 245.18337808442254 ], "wc_questions_avg": [ 208.0, 203.37813714031964 ], "wc_limitations_avg": [ 44.333333333333336, 53.024103533728464 ], "wc_review_avg": [ 903.6666666666666, 845.4191596808979 ], "wc_reply_reviewers_avg": [ 13.666666666666666, 14.659088951530682 ], "wc_reply_authors_avg": [ 39.0, 1.4142135623730951 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9037206457698345340&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "pku.edu.cn;stu.pku.edu.cn;stu.pku.edu.cn;pku.edu.cn;mail.ustc.edu.cn;iaar.ac.cn;iaar.ac.cn;pku.edu.cn;math.pku.edu.cn", "author_num": 9, "aff_unique_index": "0;0;0;0;1;2;2;0;0", "aff_unique_norm": "Peking University;University of Science and Technology of China;Institute for Advanced Algorithms Research", "aff_unique_dep": ";;", "aff_unique_url": "http://www.pku.edu.cn;http://www.ustc.edu.cn;", "aff_unique_abbr": "Peking U;USTC;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "EGSST: Event-based Graph Spatiotemporal Sensitive Transformer for Object Detection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94394", "id": "cknAewsBhD", "proceeding": "", "pdf": "https://openreview.net/pdf?id=cknAewsBhD", "openreview": "https://openreview.net/forum?id=cknAewsBhD", "poster": "/media/PosterPDFs/NeurIPS%202024/94394.png?t=1731334179.0968738", "project": "", "author_site": "Sheng Wu, Hang Sheng, Hui Feng, Bo Hu", "tldr": "", "abstract": "Event cameras provide exceptionally high temporal resolution in dynamic vision systems due to their unique event-driven mechanism. However, the sparse and asynchronous nature of event data makes frame-based visual processing methods inappropriate. This study proposes a novel framework, Event-based Graph Spatiotemporal Sensitive Transformer (EGSST), for the exploitation of spatial and temporal properties of event data. Firstly, a well-designed graph structure is employed to model event data, which not only preserves the original temporal data but also captures spatial details. Furthermore, inspired by the phenomenon that human eyes pay more attention to objects that produce significant dynamic changes, we design a Spatiotemporal Sensitivity Module (SSM) and an adaptive Temporal Activation Controller (TAC). Through these two modules, our framework can mimic the response of the human eyes in dynamic environments by selectively activating the temporal attention mechanism based on the relative dynamics of event data, thereby effectively conserving computational resources. In addition, the integration of a lightweight, multi-scale Linear Vision Transformer (LViT) markedly enhances processing efficiency. Our research proposes a fully event-driven approach, effectively exploiting the temporal precision of event data and optimising the allocation of computational resources by intelligently distinguishing the dynamics within the event data. The framework provides a lightweight, fast, accurate, and fully event-based solution for object detection tasks in complex dynamic environments, demonstrating significant practicality and potential for application.", "keywords": "Event camera;Graph;Transformer;Object detection", "primary_area": "machine_vision", "supplementary_material": "/attachment/41a9534a6077659bd85d4e648fa0259c8ab017e2.zip", "author": "Sheng Wu;Hang Sheng;Hui Feng;Bo Hu", "authorids": "~Sheng_Wu3;~Hang_Sheng1;~Hui_Feng5;~Bo_Hu6", "gender": ";;M;M", "homepage": "http://ee.fudan.edu.cn/;;;http://www.it.fudan.edu.cn/En/Data/View/1763", "dblp": ";;04/1160-1;04/2380-2.html", "google_scholar": ";;fuoFLGcAAAAJ;EqJVLSgAAAAJ", "orcid": "0009-0005-6562-8100;0000-0002-0597-618X;0000-0002-7095-0621;my-orcid?orcid=0000-0001-6348-010X", "linkedin": ";;;", "or_profile": "~Sheng_Wu3;~Hang_Sheng1;~Hui_Feng5;~Bo_Hu6", "aff": "Fudan University;Fudan University;Fudan University;Fudan University", "aff_domain": "fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn", "position": "PhD student;PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nwu2024egsst,\ntitle={{EGSST}: Event-based Graph Spatiotemporal Sensitive Transformer for Object Detection},\nauthor={Sheng Wu and Hang Sheng and Hui Feng and Bo Hu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=cknAewsBhD}\n}", "github": "", "reviewers": "NBbr;WqtW;KLe1;NWmb", "pdf_size": 1120842, "rating": "5;5;5;6", "confidence": "3;5;5;5", "soundness": "2;2;3;3", "novelty": "3;3;2;3", "presentation": "2;2;3;3", "wc_summary": "70;70;76;82", "wc_strengths": "45;47;95;43", "wc_weaknesses": "173;109;136;327", "wc_questions": "130;69;88;96", "wc_limitations": "14;41;2;45", "wc_review": "432;336;397;593", "wc_reply_reviewers": "307;626;34;49", "wc_reply_authors": "346;1069;91;0", "reply_reviewers": "2;3;1;1", "reply_authors": "3;4;3;1", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 4.5, 0.8660254037844386 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 74.5, 4.9749371855331 ], "wc_strengths_avg": [ 57.5, 21.696773953747133 ], "wc_weaknesses_avg": [ 186.25, 84.37824067850669 ], "wc_questions_avg": [ 95.75, 22.072324299900995 ], "wc_limitations_avg": [ 25.5, 18.062391868188442 ], "wc_review_avg": [ 439.5, 95.0486717424289 ], "wc_reply_reviewers_avg": [ 254.0, 240.63353880953503 ], "wc_reply_authors_avg": [ 376.5, 419.44874537897954 ], "reply_reviewers_avg": [ 1.75, 0.82915619758885 ], "reply_authors_avg": [ 2.75, 1.0897247358851685 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Itk9aTEF28EJ:scholar.google.com/&scioq=EGSST:+Event-based+Graph+Spatiotemporal+Sensitive+Transformer+for+Object+Detection&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Fudan University", "aff_unique_dep": "", "aff_unique_url": "https://www.fudan.edu.cn", "aff_unique_abbr": "Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Fractal Patterns May Illuminate the Success of Next-Token Prediction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94393", "id": "clAFYReaYE", "proceeding": "", "pdf": "https://openreview.net/pdf?id=clAFYReaYE", "openreview": "https://openreview.net/forum?id=clAFYReaYE", "poster": "/media/PosterPDFs/NeurIPS%202024/94393.png?t=1731404539.0244884", "project": "", "author_site": "Ibrahim Alabdulmohsin, Vinh Tran, Mostafa Dehghani", "tldr": "", "abstract": "We study the fractal structure of language, aiming to provide a precise formalism for quantifying properties that may have been previously suspected but not formally shown. We establish that language is: (1) self-similar, exhibiting complexities at all levels of granularity, with no particular characteristic context length, and (2) long-range dependent (LRD), with a Hurst parameter of approximately 0.7.\nBased on these findings, we argue that short-term patterns/dependencies in language, such as in paragraphs, mirror the patterns/dependencies over larger scopes, like entire documents. This may shed some light on how next-token prediction can capture the structure of text across multiple levels of granularity, from words and clauses to broader contexts and intents. In addition, we carry out an extensive analysis across different domains and architectures, showing that fractal parameters are robust.\nFinally, we demonstrate that the tiny variations in fractal parameters seen across LLMs improve upon perplexity-based bits-per-byte (BPB) in predicting their downstream performance. We hope these findings offer a fresh perspective on language and the mechanisms underlying the success of LLMs.", "keywords": "fractal patterns;large language models;Hurst exponent;self-similarity;Joseph exponent;perplexity;information theory", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Ibrahim Alabdulmohsin;Vinh Q. Tran;Mostafa Dehghani", "authorids": "~Ibrahim_Alabdulmohsin1;~Vinh_Q._Tran1;~Mostafa_Dehghani1", "gender": "M;M;M", "homepage": "http://ibomohsin.com;https://vqtran.github.io;http://mostafadehghani.com/", "dblp": "153/5393;77/2885-2.html;125/4062", "google_scholar": "8WNMsPYAAAAJ;ot3WsOwAAAAJ;https://scholar.google.nl/citations?user=MiHOX3QAAAAJ", "orcid": ";;", "linkedin": ";vinh-tran-32597468/;", "or_profile": "~Ibrahim_Alabdulmohsin1;~Vinh_Q._Tran1;~Mostafa_Dehghani1", "aff": "Google;Google DeepMind;Google DeepMind", "aff_domain": "google.com;google.com;google.com", "position": "Research Scientist;Researcher;Research Scientist", "bibtex": "@inproceedings{\nalabdulmohsin2024fractal,\ntitle={Fractal Patterns May Illuminate the Success of Next-Token Prediction},\nauthor={Ibrahim Alabdulmohsin and Vinh Q. Tran and Mostafa Dehghani},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=clAFYReaYE}\n}", "github": "", "reviewers": "iLZg;cohu;ND2C;UXZb", "pdf_size": 836037, "rating": "5;6;7;7", "confidence": "2;2;3;3", "soundness": "2;3;3;2", "novelty": "2;3;4;2", "presentation": "3;2;3;2", "wc_summary": "74;81;142;143", "wc_strengths": "69;97;53;59", "wc_weaknesses": "86;143;66;163", "wc_questions": "42;43;112;77", "wc_limitations": "44;15;11;14", "wc_review": "315;379;384;456", "wc_reply_reviewers": "212;51;64;945", "wc_reply_authors": "421;61;61;232", "reply_reviewers": "2;1;1;2", "reply_authors": "4;2;2;3", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 2.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 110.0, 32.59601202601324 ], "wc_strengths_avg": [ 69.5, 16.874537030686206 ], "wc_weaknesses_avg": [ 114.5, 39.777506206397604 ], "wc_questions_avg": [ 68.5, 28.79670119996386 ], "wc_limitations_avg": [ 21.0, 13.360389215887388 ], "wc_review_avg": [ 383.5, 49.92243984422236 ], "wc_reply_reviewers_avg": [ 318.0, 367.481292040833 ], "wc_reply_authors_avg": [ 193.75, 148.61927028484564 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.9045340337332909, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10112683379543423422&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "google.com;google.com;google.com", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United States;United Kingdom" }, { "title": "SplitNeRF: Split Sum Approximation Neural Field for Joint Geometry, Illumination, and Material Estimation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94392", "id": "clAOSSzT6v", "proceeding": "", "pdf": "https://openreview.net/pdf?id=clAOSSzT6v", "openreview": "https://openreview.net/forum?id=clAOSSzT6v", "poster": "/media/PosterPDFs/NeurIPS%202024/94392.png?t=1732181651.9743366", "project": "", "author_site": "Jesus Zarzar, Bernard Ghanem", "tldr": "", "abstract": "We present a novel approach for digitizing real-world objects by estimating their geometry, material properties, and environmental lighting from a set of posed images with fixed lighting. Our method incorporates into Neural Radiance Field (NeRF) pipelines the split sum approximation used with image-based lighting for real-time physically based rendering. We propose modeling the scene's lighting with a single scene-specific MLP representing pre-integrated image-based lighting at arbitrary resolutions. We accurately model pre-integrated lighting by exploiting a novel regularizer based on efficient Monte Carlo sampling. Additionally, we propose a new method of supervising self-occlusion predictions by exploiting a similar regularizer based on Monte Carlo sampling. Experimental results demonstrate the efficiency and effectiveness of our approach in estimating scene geometry, material properties, and lighting. Our method attains state-of-the-art relighting quality after only ${\\sim}1$ hour of training in a single NVIDIA A100 GPU.", "keywords": "inverse rendering;neural rendering;physically-based", "primary_area": "other", "supplementary_material": "/attachment/a8eebe3f2d9e08268d449d6a124bfb07bcd388ae.zip", "author": "Jesus Zarzar;Bernard Ghanem", "authorids": "~Jesus_Zarzar1;~Bernard_Ghanem1", "gender": "M;M", "homepage": ";https://ivul.kaust.edu.sa", "dblp": "237/9581;37/2516", "google_scholar": ";rVsGTeEAAAAJ", "orcid": ";0000-0002-5534-587X", "linkedin": ";bernardghanem/", "or_profile": "~Jesus_Zarzar1;~Bernard_Ghanem1", "aff": "KAUST;King Abdullah University of Science and Technology", "aff_domain": "kaust.edu.sa;kaust.edu.sa", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nzarzar2024splitnerf,\ntitle={SplitNe{RF}: Split Sum Approximation Neural Field for Joint Geometry, Illumination, and Material Estimation},\nauthor={Jesus Zarzar and Bernard Ghanem},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=clAOSSzT6v}\n}", "github": "", "reviewers": "h9Hn;ApFY;UpzA", "pdf_size": 33463114, "rating": "4;6;7", "confidence": "4;4;4", "soundness": "2;4;3", "novelty": "1;3;3", "presentation": "2;3;3", "wc_summary": "58;83;71", "wc_strengths": "16;101;110", "wc_weaknesses": "68;64;10", "wc_questions": "27;4;57", "wc_limitations": "54;6;10", "wc_review": "223;258;258", "wc_reply_reviewers": "0;44;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;1;0", "reply_authors": "1;1;1", "rating_avg": [ 5.666666666666667, 1.247219128924647 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.3333333333333335, 0.9428090415820634 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 70.66666666666667, 10.208928554075703 ], "wc_strengths_avg": [ 75.66666666666667, 42.35039026450117 ], "wc_weaknesses_avg": [ 47.333333333333336, 26.449112566503164 ], "wc_questions_avg": [ 29.333333333333332, 21.69997439834639 ], "wc_limitations_avg": [ 23.333333333333332, 21.746008573733455 ], "wc_review_avg": [ 246.33333333333334, 16.49915822768611 ], "wc_reply_reviewers_avg": [ 14.666666666666666, 20.741798914805393 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4960004454585617835&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "kaust.edu.sa;kaust.edu.sa", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "King Abdullah University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaust.edu.sa", "aff_unique_abbr": "KAUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Saudi Arabia" }, { "title": "CycleNet: Enhancing Time Series Forecasting through Modeling Periodic Patterns", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94391", "id": "clBiQUgj4w", "proceeding": "", "pdf": "https://openreview.net/pdf?id=clBiQUgj4w", "openreview": "https://openreview.net/forum?id=clBiQUgj4w", "poster": "/media/PosterPDFs/NeurIPS%202024/94391.png?t=1730252143.5783236", "project": "", "author_site": "Shengsheng Lin, Weiwei Lin, Xinyi Hu, Wentai Wu, Ruichao Mo, Haocheng Zhong", "tldr": "", "abstract": "The stable periodic patterns present in time series data serve as the foundation for conducting long-horizon forecasts. In this paper, we pioneer the exploration of explicitly modeling this periodicity to enhance the performance of models in long-term time series forecasting (LTSF) tasks. Specifically, we introduce the Residual Cycle Forecasting (RCF) technique, which utilizes learnable recurrent cycles to model the inherent periodic patterns within sequences, and then performs predictions on the residual components of the modeled cycles. Combining RCF with a Linear layer or a shallow MLP forms the simple yet powerful method proposed in this paper, called CycleNet. CycleNet achieves state-of-the-art prediction accuracy in multiple domains including electricity, weather, and energy, while offering significant efficiency advantages by reducing over 90% of the required parameter quantity. Furthermore, as a novel plug-and-play technique, the RCF can also significantly improve the prediction accuracy of existing models, including PatchTST and iTransformer. The source code is available at: https://github.com/ACAT-SCUT/CycleNet.", "keywords": "Time Series Forecasting;Time Series Analysis;Machine Learning", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Shengsheng Lin;Weiwei Lin;Xinyi HU;Wentai Wu;Ruichao Mo;Haocheng Zhong", "authorids": "~Shengsheng_Lin1;~Weiwei_Lin1;~Xinyi_HU2;~Wentai_Wu1;~Ruichao_Mo1;~Haocheng_Zhong1", "gender": ";M;Not Specified;M;M;M", "homepage": ";https://www.scholat.com/linweiwei;https://elizabethxyhu.github.io/;https://wingter562.github.io/wentai_homepage/;;", "dblp": ";53/282-1;;;;", "google_scholar": ";IWsha94AAAAJ;hANa7zAAAAAJ;hyTiOb0AAAAJ;;", "orcid": "0000-0001-5445-5148;0000-0001-6876-1795;;;0000-0003-3402-3182;0009-0005-4975-668X", "linkedin": ";;;;;", "or_profile": "~Shengsheng_Lin1;~Weiwei_Lin1;~Xinyi_HU2;~Wentai_Wu1;~Ruichao_Mo1;~Haocheng_Zhong1", "aff": "South China University of Technology;South China University of Technology;Department of Computer Science and Engineering;Jinan University;South China University of Technology;South China University of Technology", "aff_domain": "scut.edu.cn;scut.edu.cn;cse.cuhk.edu.hk;jnu.edu.cn;scut.edu.cn;scut.edu.cn", "position": "PhD student;Full Professor;PhD student;Associate Professor;PhD student;MS student", "bibtex": "@inproceedings{\nlin2024cyclenet,\ntitle={CycleNet: Enhancing Time Series Forecasting through Modeling Periodic Patterns},\nauthor={Shengsheng Lin and Weiwei Lin and Xinyi HU and Wentai Wu and Ruichao Mo and Haocheng Zhong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=clBiQUgj4w}\n}", "github": "", "reviewers": "zzRb;SdEc;EqMa;24Zo", "pdf_size": 875777, "rating": "6;7;8;8", "confidence": "3;5;4;5", "soundness": "3;3;4;3", "novelty": "3;3;4;2", "presentation": "3;2;3;3", "wc_summary": "100;55;72;127", "wc_strengths": "84;39;77;64", "wc_weaknesses": "176;75;31;62", "wc_questions": "5;5;83;885", "wc_limitations": "5;1;4;17", "wc_review": "370;175;267;1155", "wc_reply_reviewers": "10;89;28;339", "wc_reply_authors": "73;920;30;2165", "reply_reviewers": "1;2;1;3", "reply_authors": "3;4;2;5", "rating_avg": [ 7.25, 0.82915619758885 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 88.5, 27.427176303804956 ], "wc_strengths_avg": [ 66.0, 17.161002301730512 ], "wc_weaknesses_avg": [ 86.0, 54.36451048248296 ], "wc_questions_avg": [ 244.5, 371.1613530528199 ], "wc_limitations_avg": [ 6.75, 6.098155458825234 ], "wc_review_avg": [ 491.75, 389.0908473608702 ], "wc_reply_reviewers_avg": [ 116.5, 131.75450656429175 ], "wc_reply_authors_avg": [ 797.0, 865.8836526924388 ], "reply_reviewers_avg": [ 1.75, 0.82915619758885 ], "reply_authors_avg": [ 3.5, 1.118033988749895 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.6363636363636364, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6258802849799342020&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 4, "email": "scut.edu.cn;scut.edu.cn;cse.cuhk.edu.hk;jnu.edu.cn;scut.edu.cn;scut.edu.cn", "author_num": 6, "aff_unique_index": "0;0;1;2;0;0", "aff_unique_norm": "South China University of Technology;University of California, San Diego;Jinan University", "aff_unique_dep": ";Department of Computer Science and Engineering;", "aff_unique_url": "https://www.scut.edu.cn;https://cse.ucsd.edu;https://www.jnu.edu.cn", "aff_unique_abbr": "SCUT;UCSD CSE;JNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "China;United States" }, { "title": "InversionView: A General-Purpose Method for Reading Information from Neural Activations", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94390", "id": "clDGHpx2la", "proceeding": "", "pdf": "https://openreview.net/pdf?id=clDGHpx2la", "openreview": "https://openreview.net/forum?id=clDGHpx2la", "poster": "/media/PosterPDFs/NeurIPS%202024/94390.png?t=1733255772.027161", "project": "", "author_site": "Xinting Huang, Madhur Panwar, Navin Goyal, Michael Hahn", "tldr": "", "abstract": "The inner workings of neural networks can be better understood if we can fully decipher the information encoded in neural activations. In this paper, we argue that this information is embodied by the subset of inputs that give rise to similar activations. We propose InversionView, which allows us to practically inspect this subset by sampling from a trained decoder model conditioned on activations. This helps uncover the information content of activation vectors, and facilitates understanding of the algorithms implemented by transformer models. We present four case studies where we investigate models ranging from small transformers to GPT-2. In these studies, we show that InversionView can reveal clear information contained in activations, including basic information about tokens appearing in the context, as well as more complex information, such as the count of certain tokens, their relative positions, and abstract knowledge about the subject. We also provide causally verified circuits to confirm the decoded information.", "keywords": "interpretability;explainability;mechanistic interpretability", "primary_area": "interpretability_and_explainability", "supplementary_material": "/attachment/4bf4879a8403372f432201b87130137a1714433c.zip", "author": "Xinting Huang;Madhur Panwar;Navin Goyal;Michael Hahn", "authorids": "~Xinting_Huang2;~Madhur_Panwar1;~Navin_Goyal1;~Michael_Hahn1", "gender": "M;M;;M", "homepage": ";https://mdrpanwar.github.io/;;https://www.mhahn.info/", "dblp": "240/7147;280/0072;20/6275;https://dblp.uni-trier.de/pid/44/9903", "google_scholar": "BpCALOYAAAAJ;I7OVMTsAAAAJ;;", "orcid": ";0000-0002-0053-733X;;", "linkedin": ";mdrpanwar/;;", "or_profile": "~Xinting_Huang2;~Madhur_Panwar1;~Navin_Goyal1;~Michael_Hahn1", "aff": "Universit\u00e4t des Saarlandes;Microsoft;Microsoft;Universit\u00e4t des Saarlandes", "aff_domain": "uni-saarland.de;microsoft.com;microsoft.com;uni-saarland.de", "position": "PhD student;Research Fellow;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nhuang2024inversionview,\ntitle={InversionView: A General-Purpose Method for Reading Information from Neural Activations},\nauthor={Xinting Huang and Madhur Panwar and Navin Goyal and Michael Hahn},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=clDGHpx2la}\n}", "github": "", "reviewers": "fZCU;P3CM;Mv2t;vTBG", "pdf_size": 14867178, "rating": "4;6;6;7", "confidence": "3;3;3;4", "soundness": "2;3;3;4", "novelty": "3;3;3;4", "presentation": "2;3;3;3", "wc_summary": "54;55;162;123", "wc_strengths": "39;67;40;69", "wc_weaknesses": "151;49;46;79", "wc_questions": "150;58;3;59", "wc_limitations": "48;1;7;19", "wc_review": "442;230;258;349", "wc_reply_reviewers": "119;20;13;16", "wc_reply_authors": "52;8;8;8", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 98.5, 46.11127844681819 ], "wc_strengths_avg": [ 53.75, 14.271912976192084 ], "wc_weaknesses_avg": [ 81.25, 42.28696607703135 ], "wc_questions_avg": [ 67.5, 52.74703783152187 ], "wc_limitations_avg": [ 18.75, 18.08832496390973 ], "wc_review_avg": [ 319.75, 83.16963087569886 ], "wc_reply_reviewers_avg": [ 42.0, 44.52527372178637 ], "wc_reply_authors_avg": [ 19.0, 19.05255888325765 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.6622661785325219, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18381313649223880111&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "uni-saarland.de;microsoft.com;microsoft.com;uni-saarland.de", "author_num": 4, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Universit\u00e4t des Saarlandes;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.uni-saarland.de;https://www.microsoft.com", "aff_unique_abbr": "UDS;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "Germany;United States" }, { "title": "Oja's Algorithm for Streaming Sparse PCA", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94389", "id": "clQdPtooRD", "proceeding": "", "pdf": "https://openreview.net/pdf?id=clQdPtooRD", "openreview": "https://openreview.net/forum?id=clQdPtooRD", "poster": "", "project": "", "author_site": "Syamantak Kumar, Purnamrita Sarkar", "tldr": "", "abstract": "Oja's algorithm for Streaming Principal Component Analysis (PCA) for $n$ data-points in a $d$ dimensional space achieves the same sin-squared error $O(r_{\\mathsf{eff}}/n)$ as the offline algorithm in $O(d)$ space and $O(nd)$ time and a single pass through the datapoints. Here $r_{\\mathsf{eff}}$ is the effective rank (ratio of the trace and the principal eigenvalue of the population covariance matrix $\\Sigma$). Under this computational budget, we consider the problem of sparse PCA, where the principal eigenvector of $\\Sigma$ is $s$-sparse, and $r_{\\mathsf{eff}}$ can be large. In this setting, to our knowledge, *there are no known single-pass algorithms* that achieve the minimax error bound in $O(d)$ space and $O(nd)$ time without either requiring strong initialization conditions or assuming further structure (e.g., spiked) of the covariance matrix.\nWe show that a simple single-pass procedure that thresholds the output of Oja's algorithm (the Oja vector) can achieve the minimax error bound under some regularity conditions in $O(d)$ space and $O(nd)$ time. \nWe present a nontrivial and novel analysis of the entries of the unnormalized Oja vector, which involves the projection of a product of independent random matrices on a random initial vector. This is completely different from previous analyses of Oja's algorithm and matrix products, which have been done when the $r_{\\mathsf{eff}}$ is bounded.", "keywords": "Streaming PCA;Oja's Algorithm;Sparse PCA;Support Recovery;Entrywise Bounds", "primary_area": "optimization", "supplementary_material": "/attachment/b5240985381d69f9093ba803e51c1d2ab410d535.zip", "author": "Syamantak Kumar;Purnamrita Sarkar", "authorids": "~Syamantak_Kumar1;~Purnamrita_Sarkar1", "gender": "M;F", "homepage": "https://syamantakk.github.io/;https://psarkar.github.io/", "dblp": "297/4951;25/6929", "google_scholar": "https://scholar.google.com/citations?hl=en;KfT3_0AAAAAJ", "orcid": ";", "linkedin": "syamantak-kumar/;", "or_profile": "~Syamantak_Kumar1;~Purnamrita_Sarkar1", "aff": "University of Texas at Austin;University of Texas, Austin", "aff_domain": "cs.utexas.edu;utexas.edu", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nkumar2024ojas,\ntitle={Oja's Algorithm for Streaming Sparse {PCA}},\nauthor={Syamantak Kumar and Purnamrita Sarkar},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=clQdPtooRD}\n}", "github": "", "reviewers": "qSPb;jbhG;zu7o;pD6C", "pdf_size": 1236283, "rating": "5;7;7;7", "confidence": "2;4;4;2", "soundness": "3;4;4;3", "novelty": "3;4;3;3", "presentation": "3;4;4;3", "wc_summary": "164;23;276;167", "wc_strengths": "35;73;213;21", "wc_weaknesses": "30;38;29;3", "wc_questions": "13;20;477;10", "wc_limitations": "1;1;1;1", "wc_review": "243;155;996;202", "wc_reply_reviewers": "29;8;0;12", "wc_reply_authors": "92;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.0, 1.0 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 157.5, 89.81230427953622 ], "wc_strengths_avg": [ 85.5, 76.03124357788711 ], "wc_weaknesses_avg": [ 25.0, 13.171939872319491 ], "wc_questions_avg": [ 130.0, 200.3734014284331 ], "wc_limitations_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 399.0, 346.08163776773824 ], "wc_reply_reviewers_avg": [ 12.25, 10.59186008215743 ], "wc_reply_authors_avg": [ 23.0, 39.83716857408418 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:-JGDyv8kETsJ:scholar.google.com/&scioq=Oja%27s+Algorithm+for+Streaming+Sparse+PCA&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "cs.utexas.edu;utexas.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Return of Unconditional Generation: A Self-supervised Representation Generation Method", "status": "Oral", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94388", "id": "clTa4JFBML", "proceeding": "", "pdf": "https://openreview.net/pdf?id=clTa4JFBML", "openreview": "https://openreview.net/forum?id=clTa4JFBML", "poster": "/media/PosterPDFs/NeurIPS%202024/94388.png?t=1733849808.7966292", "project": "", "author_site": "Tianhong Li, Dina Katabi, Kaiming He", "tldr": "", "abstract": "Unconditional generation -- the problem of modeling data distribution without relying on human-annotated labels -- is a long-standing and fundamental challenge in generative models, creating a potential of learning from large-scale unlabeled data. In the literature, the generation quality of an unconditional method has been much worse than that of its conditional counterpart. This gap can be attributed to the lack of semantic information provided by labels. In this work, we show that one can close this gap by generating semantic representations in the representation space produced by a self-supervised encoder. These representations can be used to condition the image generator. This framework, called Representation-Conditioned Generation (RCG), provides an effective solution to the unconditional generation problem without using labels. Through comprehensive experiments, we observe that RCG significantly improves unconditional generation quality: e.g., it achieves a new state-of-the-art FID of 2.15 on ImageNet 256x256, largely reducing the previous best of 5.91 by a relative 64%. Our unconditional results are situated in the same tier as the leading class-conditional ones. We hope these encouraging observations will attract the community's attention to the fundamental problem of unconditional generation. Code is available at [https://github.com/LTH14/rcg](https://github.com/LTH14/rcg).", "keywords": "Unconditional Generation;Representation-Conditioned Generation;Self-supervised Learning", "primary_area": "generative_models", "supplementary_material": "", "author": "Tianhong Li;Dina Katabi;Kaiming He", "authorids": "~Tianhong_Li3;~Dina_Katabi1;~Kaiming_He2", "gender": "M;;", "homepage": "http://www.tianhongli.me/;;", "dblp": "195/5632;k/DinaKatabi;", "google_scholar": "https://scholar.google.com/citations?hl=en;;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Tianhong_Li3;~Dina_Katabi1;~Kaiming_He2", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;", "aff_domain": "mit.edu;mit.edu;", "position": "PhD student;Full Professor;", "bibtex": "@inproceedings{\nli2024return,\ntitle={Return of Unconditional Generation: A Self-supervised Representation Generation Method},\nauthor={Tianhong Li and Dina Katabi and Kaiming He},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=clTa4JFBML}\n}", "github": "", "reviewers": "fB14;pEoh;xRbx;zDRz", "pdf_size": 7903977, "rating": "5;8;8;9", "confidence": "5;4;4;5", "soundness": "2;4;4;4", "novelty": "1;2;4;4", "presentation": "3;3;3;4", "wc_summary": "19;98;85;111", "wc_strengths": "15;70;48;87", "wc_weaknesses": "167;124;4;103", "wc_questions": "13;410;15;11", "wc_limitations": "30;123;4;15", "wc_review": "244;825;156;327", "wc_reply_reviewers": "96;52;22;12", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.5, 1.5 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.5, 0.8660254037844386 ], "novelty_avg": [ 2.75, 1.299038105676658 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 78.25, 35.42156828826189 ], "wc_strengths_avg": [ 55.0, 26.91653766738954 ], "wc_weaknesses_avg": [ 99.5, 59.76830263609634 ], "wc_questions_avg": [ 112.25, 171.91185968396712 ], "wc_limitations_avg": [ 43.0, 47.10095540432275 ], "wc_review_avg": [ 388.0, 259.44652628239214 ], "wc_reply_reviewers_avg": [ 45.5, 32.66113898810021 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.33333333333333337, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16607738601203728769&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "mit.edu;mit.edu;", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "UniIF: Unified Molecule Inverse Folding", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94387", "id": "clqX9cVDKV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=clqX9cVDKV", "openreview": "https://openreview.net/forum?id=clqX9cVDKV", "poster": "", "project": "", "author_site": "Zhangyang Gao, Jue Wang, Cheng Tan, Lirong Wu, Yufei Huang, Siyuan Li, Zhirui Ye, Stan Z. Li", "tldr": "", "abstract": "Molecule inverse folding has been a long-standing challenge in chemistry and biology, with the potential to revolutionize drug discovery and material science. Despite specified models have been proposed for different small- or macro-molecules, few have attempted to unify the learning process, resulting in redundant efforts. Complementary to recent advancements in molecular structure prediction, such as RoseTTAFold All-Atom and AlphaFold3, we propose the unified model UniIF for the inverse folding of all molecules. We do such unification in two levels: 1) Data-Level: We propose a unified block graph data form for all molecules, including the local frame building and geometric feature initialization. 2) Model-Level: We introduce a geometric block attention network, comprising a geometric interaction, interactive attention and virtual long-term dependency modules, to capture the 3D interactions of all molecules. Through comprehensive evaluations across various tasks such as protein design, RNA design, and material design, we demonstrate that our proposed method surpasses state-of-the-art methods on all tasks. UniIF offers a versatile and effective solution for general molecule inverse folding.", "keywords": "molecule learning;protein design;RNA design", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Zhangyang Gao;Jue Wang;Cheng Tan;Lirong Wu;Yufei Huang;Siyuan Li;Zhirui Ye;Stan Z. Li", "authorids": "~Zhangyang_Gao1;~Jue_Wang9;~Cheng_Tan1;~Lirong_Wu1;~Yufei_Huang4;~Siyuan_Li6;~Zhirui_Ye1;~Stan_Z._Li2", "gender": "M;;M;;M;M;;M", "homepage": ";https://scholar.google.com.hk/citations?hl=zh-CN&pli=1&user=NjYyuQQAAAAJ;https://chengtan9907.github.io/;;https://2021.igem.org/Team:ZJU-China;https://lupin1998.github.io/;;https://en.westlake.edu.cn/academics/School_of_Engineering/About/Our_People/Faculty/201912/t20191206_2497.shtml", "dblp": "275/3266;;70/1533-12.html;15/10330;68/1946-2;63/9705-2;;l/StanZLi", "google_scholar": "4SclT-QAAAAJ;https://scholar.google.com.hk/citations?hl=zh-CN;6kTV6aMAAAAJ;Tk7TrCoAAAAJ;qmTjdwIAAAAJ;https://scholar.google.com/citations?hl=zh-CN;;https://scholar.google.com/citations?hl=zh-CN", "orcid": "0000-0003-1026-6083;;;;0009-0007-8184-4529;0000-0001-6806-2468;;", "linkedin": ";;;;;https://www.linkedin.cn/incareer/in/siyuan-li-lupin1998/;zhiruiye;stan-z-li-%E6%9D%8E%E5%AD%90%E9%9D%92-55753224/", "or_profile": "~Zhangyang_Gao1;~Jue_Wang9;~Cheng_Tan1;~Lirong_Wu1;~Yufei_Huang4;~Siyuan_Li6;~Zhirui_Ye1;~Stan_Z._Li1", "aff": "Westlake University, China;Zhongnan University of Economics and Law;Zhejiang University & Westlake University;Westlake University;Zhejiang University;Alibaba Group;Tianjin University;Westlake University", "aff_domain": "westlake.edu.cn;zuel.edu.cn;westlake.edu.cn;westlake.edu.cn;zju.edu.cn;alibaba-inc.com;tju.edu.cn;westlake.edu.cn", "position": "PhD student;Undergrad student;PhD student;PhD student;PhD student;Intern;Undergrad student;Chair Professor", "bibtex": "@inproceedings{\ngao2024uniif,\ntitle={Uni{IF}: Unified Molecule Inverse Folding},\nauthor={Zhangyang Gao and Jue Wang and Cheng Tan and Lirong Wu and Yufei Huang and Siyuan Li and Zhirui Ye and Stan Z. Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=clqX9cVDKV}\n}", "github": "", "reviewers": "r3pY;m3n9;kcUc;Aoua", "pdf_size": 6208334, "rating": "4;5;5;5", "confidence": "4;1;4;3", "soundness": "2;3;3;3", "novelty": "2;2;4;2", "presentation": "3;2;4;3", "wc_summary": "104;37;76;80", "wc_strengths": "67;93;34;60", "wc_weaknesses": "344;236;128;202", "wc_questions": "356;64;23;61", "wc_limitations": "45;12;5;29", "wc_review": "916;442;266;432", "wc_reply_reviewers": "178;87;9;14", "wc_reply_authors": "952;67;12;7", "reply_reviewers": "1;1;1;1", "reply_authors": "5;3;2;2", "rating_avg": [ 4.75, 0.4330127018922193 ], "confidence_avg": [ 3.0, 1.224744871391589 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 74.25, 24.02472684548151 ], "wc_strengths_avg": [ 63.5, 21.005951537600005 ], "wc_weaknesses_avg": [ 227.5, 77.77371021109897 ], "wc_questions_avg": [ 126.0, 133.77032555839878 ], "wc_limitations_avg": [ 22.75, 15.530212490497354 ], "wc_review_avg": [ 514.0, 242.39224410034245 ], "wc_reply_reviewers_avg": [ 72.0, 68.54560525664647 ], "wc_reply_authors_avg": [ 259.5, 400.5074905666559 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.0, 1.224744871391589 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.4714045207910316, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12363595158512965421&as_sdt=5,30&sciodt=0,30&hl=en", "gs_version_total": 6, "email": "westlake.edu.cn;zuel.edu.cn;westlake.edu.cn;westlake.edu.cn;zju.edu.cn;alibaba-inc.com;tju.edu.cn;westlake.edu.cn", "author_num": 8, "aff_unique_index": "0;1;2;0;2;3;4;0", "aff_unique_norm": "Westlake University;Zhongnan University of Economics and Law;Zhejiang University;Alibaba Group;Tianjin University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.westlake.edu.cn;http://www.zuel.edu.cn/;http://www.zju.edu.cn;https://www.alibaba.com;http://www.tju.edu.cn", "aff_unique_abbr": "WU;ZUEL;ZJU;Alibaba;TJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Axioms for AI Alignment from Human Feedback", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94386", "id": "cmBjkpRuvw", "proceeding": "", "pdf": "https://openreview.net/pdf?id=cmBjkpRuvw", "openreview": "https://openreview.net/forum?id=cmBjkpRuvw", "poster": "/media/PosterPDFs/NeurIPS%202024/94386.png?t=1732474104.1458905", "project": "", "author_site": "Luise Ge, Daniel Halpern, Evi Micha, Ariel Procaccia, Itai Shapira, Yevgeniy Vorobeychik, Junlin Wu", "tldr": "", "abstract": "In the context of reinforcement learning from human feedback (RLHF), the reward function is generally derived from maximum likelihood estimation of a random utility model based on pairwise comparisons made by humans. The problem of learning a reward function is one of preference aggregation that, we argue, largely falls within the scope of social choice theory. From this perspective, we can evaluate different aggregation methods via established axioms, examining whether these methods meet or fail well-known standards. We demonstrate that both the Bradley-Terry-Luce Model and its broad generalizations fail to meet basic axioms. In response, we develop novel rules for learning reward functions with strong axiomatic guarantees. A key innovation from the standpoint of social choice is that our problem has a *linear* structure, which greatly restricts the space of feasible rules and leads to a new paradigm that we call *linear social choice*.", "keywords": "Social choice;reinforcement learning from human feedback;AI alignment", "primary_area": "algorithmic_game_theory", "supplementary_material": "", "author": "Luise Ge;Daniel Halpern;Evi Micha;Ariel D. Procaccia;Itai Shapira;Yevgeniy Vorobeychik;Junlin Wu", "authorids": "~Luise_Ge1;~Daniel_Halpern1;~Evi_Micha1;~Ariel_D._Procaccia1;~Itai_Shapira1;~Yevgeniy_Vorobeychik1;~Junlin_Wu2", "gender": ";M;F;;M;M;", "homepage": ";https://dhalpern13.github.io;https://evi-micha.github.io;;https://ishapira1.github.io/;http://vorobeychik.com;https://jlwu002.github.io/", "dblp": ";83/5135-2;204/3011;;342/2947;70/2217;188/8292-1", "google_scholar": ";https://scholar.google.ca/citations?user=Q4HPgdsAAAAJ;;;JOQXRbIAAAAJ;https://scholar.google.com.tw/citations?user=ptI-HHkAAAAJ;", "orcid": ";;;;;;", "linkedin": ";;;;itai-shapira-968362171;;", "or_profile": "~Luise_Ge1;~Daniel_Halpern1;~Evi_Micha1;~Ariel_D._Procaccia1;~Itai_Shapira1;~Yevgeniy_Vorobeychik1;~Junlin_Wu2", "aff": ";Harvard University;University of Southern California;;Harvard University, Harvard University;Washington University, St. Louis;Washington University, St. Louis", "aff_domain": ";harvard.edu;usc.edu;;g.harvard.edu;wustl.edu;wustl.edu", "position": ";PhD student;Assistant Professor;;PhD student;Full Professor;PhD student", "bibtex": "@inproceedings{\nge2024axioms,\ntitle={Axioms for {AI} Alignment from Human Feedback},\nauthor={Luise Ge and Daniel Halpern and Evi Micha and Ariel D. Procaccia and Itai Shapira and Yevgeniy Vorobeychik and Junlin Wu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=cmBjkpRuvw}\n}", "github": "", "reviewers": "6JKc;TLca;Gde6;xjPu", "pdf_size": 411488, "rating": "6;6;7;8", "confidence": "3;3;1;3", "soundness": "3;3;3;4", "novelty": "3;2;3;4", "presentation": "3;4;3;3", "wc_summary": "99;92;64;73", "wc_strengths": "79;65;36;91", "wc_weaknesses": "121;220;19;58", "wc_questions": "7;35;50;66", "wc_limitations": "12;5;5;380", "wc_review": "318;417;174;668", "wc_reply_reviewers": "67;5;16;33", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 2.5, 0.8660254037844386 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 82.0, 14.089002803605371 ], "wc_strengths_avg": [ 67.75, 20.51066795596867 ], "wc_weaknesses_avg": [ 104.5, 75.9687435726036 ], "wc_questions_avg": [ 39.5, 21.73131381210073 ], "wc_limitations_avg": [ 100.5, 161.39470251529323 ], "wc_review_avg": [ 394.25, 180.1254770986048 ], "wc_reply_reviewers_avg": [ 30.25, 23.44541533008106 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14260180695772773081&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": ";harvard.edu;usc.edu;;g.harvard.edu;wustl.edu;wustl.edu", "author_num": 7, "aff_unique_index": "0;1;0;2;2", "aff_unique_norm": "Harvard University;University of Southern California;Washington University in St. Louis", "aff_unique_dep": ";;", "aff_unique_url": "https://www.harvard.edu;https://www.usc.edu;https://wustl.edu", "aff_unique_abbr": "Harvard;USC;WUSTL", "aff_campus_unique_index": "1;2;2", "aff_campus_unique": ";Los Angeles;St. Louis", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "DeiSAM: Segment Anything with Deictic Prompting", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94385", "id": "cmSNX47aEH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=cmSNX47aEH", "openreview": "https://openreview.net/forum?id=cmSNX47aEH", "poster": "", "project": "", "author_site": "Hikaru Shindo, Manuel Brack, Gopika Sudhakaran, Devendra S Dhami, Patrick Schramowski, Kristian Kersting", "tldr": "", "abstract": "Large-scale, pre-trained neural networks have demonstrated strong capabilities in various tasks, including zero-shot image segmentation. To identify concrete objects in complex scenes, humans instinctively rely on deictic descriptions in natural language, i.e., referring to something depending on the context such as \"The object that is on the desk and behind the cup.\". However, deep learning approaches cannot reliably interpret such deictic representations due to their lack of reasoning capabilities in complex scenarios. To remedy this issue, we propose DeiSAM \u2014 a combination of large pre-trained neural networks with differentiable logic reasoners \u2014 for deictic promptable segmentation. Given a complex, textual segmentation description, DeiSAM leverages Large Language Models (LLMs) to generate first-order logic rules and performs differentiable forward reasoning on generated scene graphs. Subsequently, DeiSAM segments objects by matching them to the logically inferred image regions. As part of our evaluation, we propose the Deictic Visual Genome (DeiVG) dataset, containing paired visual input and complex, deictic textual prompts. Our empirical results demonstrate that DeiSAM is a substantial improvement over purely data-driven baselines for deictic promptable segmentation.", "keywords": "neuro-symbolic reasoning;object segmentation;deictic representation;large language models;differentiable logic programming", "primary_area": "other", "supplementary_material": "/attachment/82dc689a62cfacc59b9dda351fc81d7c77384b29.zip", "author": "Hikaru Shindo;Manuel Brack;Gopika Sudhakaran;Devendra Singh Dhami;Patrick Schramowski;Kristian Kersting", "authorids": "~Hikaru_Shindo1;~Manuel_Brack1;~Gopika_Sudhakaran1;~Devendra_Singh_Dhami1;~Patrick_Schramowski1;~Kristian_Kersting1", "gender": "M;M;F;M;M;M", "homepage": "https://www.hikarushindo.com/;;;https://sites.google.com/view/devendradhami;https://ml-research.github.io/people/pschramowski/index.html;http://www.ml.informatik.tu-darmstadt.de/", "dblp": "227/1466;326/8265;213/0247;201/2130;217/1650;40/3793", "google_scholar": "Ws03zBoAAAAJ;kJ9Abf8AAAAJ;QZS6FjoAAAAJ;aVlaHfkAAAAJ;GD481RkAAAAJ;QY-earAAAAAJ", "orcid": ";;0009-0007-3721-5602;;0000-0003-1231-7120;0000-0002-2873-9152", "linkedin": "hkrsnd;;https://de.linkedin.com/in/gopika-sudhakaran-7a289755;;;", "or_profile": "~Hikaru_Shindo1;~Manuel_Brack1;~Gopika_Sudhakaran1;~Devendra_Singh_Dhami1;~Patrick_Schramowski1;~Kristian_Kersting1", "aff": "TU Darmstadt;Technische Universit\u00e4t Darmstadt;Technische Universit\u00e4t Darmstadt;Eindhoven University of Technology;German Research Center for AI;TU Darmstadt", "aff_domain": "tu-darmstadt.de;tu-darmstadt.de;tu-darmstadt.de;tue.nl;dfki.de;tu-darmstadt.de", "position": "PhD student;PhD student;PhD student;Assistant Professor;Researcher;Full Professor", "bibtex": "@inproceedings{\nshindo2024deisam,\ntitle={Dei{SAM}: Segment Anything with Deictic Prompting},\nauthor={Hikaru Shindo and Manuel Brack and Gopika Sudhakaran and Devendra Singh Dhami and Patrick Schramowski and Kristian Kersting},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=cmSNX47aEH}\n}", "github": "", "reviewers": "9P1c;WXVv;TxaQ;YJtK", "pdf_size": 20252423, "rating": "3;4;6;6", "confidence": "5;4;5;4", "soundness": "2;3;4;3", "novelty": "2;3;3;2", "presentation": "3;3;4;4", "wc_summary": "77;58;96;98", "wc_strengths": "37;33;204;99", "wc_weaknesses": "242;300;205;299", "wc_questions": "4;7;8;119", "wc_limitations": "4;9;11;1", "wc_review": "364;407;524;616", "wc_reply_reviewers": "0;0;0;288", "wc_reply_authors": "46;46;46;115", "reply_reviewers": "0;0;0;1", "reply_authors": "2;2;2;3", "rating_avg": [ 4.75, 1.299038105676658 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 82.25, 16.223054582907622 ], "wc_strengths_avg": [ 93.25, 69.08825877093734 ], "wc_weaknesses_avg": [ 261.5, 40.190172928217166 ], "wc_questions_avg": [ 34.5, 48.8082984747471 ], "wc_limitations_avg": [ 6.25, 3.960744879438715 ], "wc_review_avg": [ 477.75, 98.9908455363424 ], "wc_reply_reviewers_avg": [ 72.0, 124.70765814495917 ], "wc_reply_authors_avg": [ 63.25, 29.877876430563134 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.19245008972987526, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13227833690819431104&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "tu-darmstadt.de;tu-darmstadt.de;tu-darmstadt.de;tue.nl;dfki.de;tu-darmstadt.de", "author_num": 6, "aff_unique_index": "0;0;0;1;2;0", "aff_unique_norm": "Technische Universit\u00e4t Darmstadt;Eindhoven University of Technology;German Research Center for Artificial Intelligence", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tu-darmstadt.de;https://www.tue.nl;https://www.dfki.de/", "aff_unique_abbr": "TU Darmstadt;TU/e;DFKI", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Darmstadt;", "aff_country_unique_index": "0;0;0;1;0;0", "aff_country_unique": "Germany;Netherlands" }, { "title": "CableInspect-AD: An Expert-Annotated Anomaly Detection Dataset", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97600", "id": "cnjmZqVpm9", "proceeding": "", "pdf": "https://openreview.net/pdf?id=cnjmZqVpm9", "openreview": "https://openreview.net/forum?id=cnjmZqVpm9", "poster": "/media/PosterPDFs/NeurIPS%202024/97600.png?t=1731513112.1317096", "project": "", "author_site": "Akshatha Arodi, Margaux Luck, Jean-Luc Bedwani, Aldo Zaimi, Ge Li, Nicolas Pouliot, Julien Beaudry, Gaetan Marceau Caron", "tldr": "", "abstract": "Machine learning models are increasingly being deployed in real-world contexts. However, systematic studies on their transferability to specific and critical applications are underrepresented in the research literature. An important example is visual anomaly detection (VAD) for robotic power line inspection. While existing VAD methods perform well in controlled environments, real-world scenarios present diverse and unexpected anomalies that current datasets fail to capture. To address this gap, we introduce CableInspect-AD, a high-quality, publicly available dataset created and annotated by domain experts from Hydro-Qu\u00e9bec, a Canadian public utility. This dataset includes high-resolution images with challenging real-world anomalies, covering defects with varying severity levels. To address the challenges of collecting diverse anomalous and nominal examples for setting a detection threshold, we propose an enhancement to the celebrated PatchCore algorithm. This enhancement enables its use in scenarios with limited labeled data. We also present a comprehensive evaluation protocol based on cross-validation to assess models' performances. We evaluate our Enhanced-PatchCore for few-shot and many-shot detection, and Vision-Language Models for zero-shot detection. While promising, these models struggle to detect all anomalies, highlighting the dataset's value as a challenging benchmark for the broader research community. Project page: https://mila-iqia.github.io/cableinspect-ad/.", "keywords": "Preventative maintenance;Anomaly detection;Visual Inspection;Instance-based AD;Dataset;Benchmark;Vision-Language Models;Real-world applications;Evaluation framework;Few-shot learning;Zero-shot learning", "primary_area": "", "supplementary_material": "/attachment/e7e486cd6eb182ff07fe0249501d6e2380227bec.pdf", "author": "Akshatha Arodi;Margaux Luck;Jean-Luc Bedwani;Aldo Zaimi;Ge Li;Nicolas Pouliot;Julien Beaudry;Ga\u00e9tan Marceau Caron", "authorids": "~Akshatha_Arodi1;~Margaux_Luck1;~Jean-Luc_Bedwani1;~Aldo_Zaimi2;~Ge_Li8;~Nicolas_Pouliot1;~Julien_Beaudry1;~Ga\u00e9tan_Marceau_Caron1", "gender": ";;M;M;M;;M;", "homepage": ";;;;;https://www.hydroquebec.com/innovation/fr/evolution-technologique/force-innovation/;;", "dblp": ";;;185/9112;;;;123/4487.html", "google_scholar": ";;;er7hF1sAAAAJ;https://scholar.google.ca/citations?user=kexUKWIAAAAJ;;;", "orcid": ";;;;;;;", "linkedin": ";;jean-luc-bedwani-929875209/;;;;jbeaudry/;", "or_profile": "~Akshatha_Arodi1;~Margaux_Luck1;~Jean-Luc_Bedwani1;~Aldo_Zaimi2;~Ge_Li8;~Nicolas_Pouliot1;~Julien_Beaudry1;~Ga\u00e9tan_Marceau_Caron1", "aff": ";;Hydro-Quebec;Mila - Quebec Artificial Intelligence Institute;Mila - Quebec Artificial Intelligence Institute;IREQ;Hydro-Quebec Research Institute;Montreal Institute for Learning Algorithms, University of Montreal, Universit\u00e9 de Montr\u00e9al", "aff_domain": ";;hydroquebec.com;mila.quebec;mila.quebec;hydroquebec.com;hydroquebec.com;mila.umontreal.ca", "position": ";;Researcher;Researcher;Researcher;Researcher;Principal Researcher;Researcher", "bibtex": "@inproceedings{\narodi2024cableinspectad,\ntitle={CableInspect-{AD}: An Expert-Annotated Anomaly Detection Dataset},\nauthor={Akshatha Arodi and Margaux Luck and Jean-Luc Bedwani and Aldo Zaimi and Ge Li and Nicolas Pouliot and Julien Beaudry and Ga{\\'e}tan Marceau Caron},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=cnjmZqVpm9}\n}", "github": "", "reviewers": "hjBg;U6dd;jyZc", "pdf_size": 38011014, "rating": "5;6;8", "confidence": "4;3;4", "wc_summary_and_contributions": "72;54;78", "wc_strengths": "69;36;116", "wc_improvement": "152;38;160", "wc_limitations": "21;31;90", "wc_correctness": "9;1;42", "wc_clarity": "5;1;36", "wc_relation_to_prior_work": "5;1;63", "wc_documentation": "33;1;168", "wc_additional_feedback": "1;1;1", "wc_review": "367;164;754", "wc_reply_reviewers": "80;227;0", "wc_reply_authors": "383;1329;0", "reply_reviewers": "1;3;0", "reply_authors": "3;4;1", "rating_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 68.0, 10.198039027185569 ], "wc_strengths_avg": [ 73.66666666666667, 32.82614134429381 ], "wc_improvement_avg": [ 116.66666666666667, 55.72152985057831 ], "wc_limitations_avg": [ 47.333333333333336, 30.44484995674784 ], "wc_correctness_avg": [ 17.333333333333332, 17.745108872274887 ], "wc_clarity_avg": [ 14.0, 15.641824275533422 ], "wc_relation_to_prior_work_avg": [ 23.0, 28.331372481167705 ], "wc_documentation_avg": [ 67.33333333333333, 72.37095795659717 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 428.3333333333333, 244.7397710948418 ], "wc_reply_reviewers_avg": [ 102.33333333333333, 94.00827386754612 ], "wc_reply_authors_avg": [ 570.6666666666666, 558.5542846392719 ], "reply_reviewers_avg": [ 1.3333333333333333, 1.247219128924647 ], "reply_authors_avg": [ 2.6666666666666665, 1.247219128924647 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.18898223650461363, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:XKVqDZKTwggJ:scholar.google.com/&scioq=CableInspect-AD:+An+Expert-Annotated+Anomaly+Detection+Dataset&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": ";;hydroquebec.com;mila.quebec;mila.quebec;hydroquebec.com;hydroquebec.com;mila.umontreal.ca", "author_num": 8, "aff_unique_index": "0;1;1;2;0;3", "aff_unique_norm": "Hydro-Quebec;Quebec Artificial Intelligence Institute;Institut de recherche en electricite du Quebec;University of Montreal", "aff_unique_dep": ";Artificial Intelligence;;Montreal Institute for Learning Algorithms", "aff_unique_url": "https://www.hydroquebec.com/;https://mila.quebec;https://www.ireq.ca;https://www.mila.quebec", "aff_unique_abbr": "HQ;Mila;IREQ;MILA", "aff_campus_unique_index": "1", "aff_campus_unique": ";Montreal", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "Canada" }, { "title": "Community Detection Guarantees using Embeddings Learned by Node2Vec", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94384", "id": "cnpR4e2HCQ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=cnpR4e2HCQ", "openreview": "https://openreview.net/forum?id=cnpR4e2HCQ", "poster": "/media/PosterPDFs/NeurIPS%202024/94384.png?t=1731704773.6386888", "project": "", "author_site": "Andrew Davison, S. Carlyle Morgan, Owen G. Ward", "tldr": "", "abstract": "Embedding the nodes of a large network into an Euclidean space is a common objective in modern\nmachine learning, with a variety of tools available. These embeddings can then be used as features for\ntasks such as community detection/node clustering or link prediction, where they achieve state of the art\nperformance. With the exception of spectral clustering methods, there is little theoretical understanding\nfor commonly used approaches to learning embeddings. In this work we examine the theoretical\nproperties of the embeddings learned by node2vec. Our main result shows that the use of k-means\nclustering on the embedding vectors produced by node2vec gives weakly consistent community recovery\nfor the nodes in (degree corrected) stochastic block models. We also discuss the use of these embeddings\nfor node and link prediction tasks. We demonstrate this result empirically for both\nreal and simulated networks, and examine how this relates\nto other embedding tools for network data.", "keywords": "Network Embedding;Node2Vec;Community Detection;Networks", "primary_area": "probabilistic_methods", "supplementary_material": "/attachment/41286377cbeb91eac3b614c2197dac88011060ee.zip", "author": "Andrew Davison;Samuel Carlyle Morgan;Owen G. Ward", "authorids": "~Andrew_Davison4;~Samuel_Carlyle_Morgan1;~Owen_G._Ward1", "gender": ";M;M", "homepage": "https://aday651.github.io/;;https://owenward.github.io/", "dblp": ";;", "google_scholar": "2xtxQaUAAAAJ;;Sm1FpHEAAAAJ", "orcid": ";;", "linkedin": ";samuel-carlyle-morgan-5404a6186/;", "or_profile": "~Andrew_Davison4;~Samuel_Carlyle_Morgan1;~Owen_G._Ward1", "aff": ";University of Michigan - Ann Arbor;Simon Fraser University", "aff_domain": ";umich.edu;sfu.ca", "position": ";PhD student;Assistant Professor", "bibtex": "@inproceedings{\ndavison2024community,\ntitle={Community Detection Guarantees using Embeddings Learned by Node2Vec},\nauthor={Andrew Davison and Samuel Carlyle Morgan and Owen G. Ward},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=cnpR4e2HCQ}\n}", "github": "", "reviewers": "DE6D;znFf;NUS8", "pdf_size": 5671005, "rating": "6;6;8", "confidence": "4;3;3", "soundness": "4;3;4", "novelty": "3;2;4", "presentation": "3;2;3", "wc_summary": "87;83;46", "wc_strengths": "30;202;23", "wc_weaknesses": "183;184;81", "wc_questions": "56;151;71", "wc_limitations": "33;34;12", "wc_review": "389;654;233", "wc_reply_reviewers": "77;166;44", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 72.0, 18.457157599876172 ], "wc_strengths_avg": [ 85.0, 82.7808351411525 ], "wc_weaknesses_avg": [ 149.33333333333334, 48.32068800098865 ], "wc_questions_avg": [ 92.66666666666667, 41.69998667732268 ], "wc_limitations_avg": [ 26.333333333333332, 10.143416036468626 ], "wc_review_avg": [ 425.3333333333333, 173.78211134124888 ], "wc_reply_reviewers_avg": [ 95.66666666666667, 51.52561391083942 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.49999999999999983, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:CzMxBsJXpRwJ:scholar.google.com/&scioq=Community+Detection+Guarantees+using+Embeddings+Learned+by+Node2Vec&hl=en&as_sdt=0,47", "gs_version_total": 4, "email": ";umich.edu;sfu.ca", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "University of Michigan;Simon Fraser University", "aff_unique_dep": ";", "aff_unique_url": "https://www.umich.edu;https://www.sfu.ca", "aff_unique_abbr": "UM;SFU", "aff_campus_unique_index": "0", "aff_campus_unique": "Ann Arbor;", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;Canada" }, { "title": "Structured Matrix Basis for Multivariate Time Series Forecasting with Interpretable Dynamics", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94383", "id": "co7DsOwcop", "proceeding": "", "pdf": "https://openreview.net/pdf?id=co7DsOwcop", "openreview": "https://openreview.net/forum?id=co7DsOwcop", "poster": "/media/PosterPDFs/NeurIPS%202024/94383.png?t=1731299075.6966274", "project": "", "author_site": "Xiaodan Chen, Xiucheng Li, Xinyang Chen, Zhijun Li", "tldr": "", "abstract": "Multivariate time series forecasting is of central importance in modern intelligent decision systems. The dynamics of multivariate time series are jointly characterized by temporal dependencies and spatial correlations. Hence, it is equally important to build the forecasting models from both perspectives. The real-world multivariate time series data often presents spatial correlations that show structures and evolve dynamically. To capture such dynamic spatial structures, the existing forecasting approaches often rely on a two-stage learning process (learning dynamic series representations and then generating spatial structures), which is sensitive to the small time-window input data and has high variance. To address this, we propose a novel forecasting model with a structured matrix basis. At its core is a dynamic spatial structure generation function whose output space is well-constrained and the generated structures have lower variance, meanwhile, it is more expressive and can offer interpretable dynamics. This is achieved via a novel structured parameterization and imposing structure regularization on the matrix basis. The resulting forecasting model can achieve up to $8.5\\%$ improvements over the existing methods on six benchmark datasets, and meanwhile, it enables us to gain insights into the dynamics of underlying systems.", "keywords": "Time Series Forecasting;Structured Matrix Basis;Interpretable Dynamics", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Xiaodan Chen;Xiucheng Li;Xinyang Chen;Zhijun Li", "authorids": "~Xiaodan_Chen1;~Xiucheng_Li2;~Xinyang_Chen1;~Zhijun_Li3", "gender": "F;M;Not Specified;M", "homepage": "https://github.com/chenxiaodanhit;https://xiucheng.org/;https://chenxinyang123.github.io/;http://www.hit.edu.cn/lizhijun", "dblp": "17/7250;152/8201;242/3871-1;89/6527-2", "google_scholar": ";https://scholar.google.com.sg/citations?user=qFSxE6YAAAAJ;qVxhGWUAAAAJ;", "orcid": ";;0000-0001-6743-838X;", "linkedin": ";;;", "or_profile": "~Xiaodan_Chen1;~Xiucheng_Li2;~Xinyang_Chen1;~Zhijun_Li3", "aff": "Harbin Institute of Technology;Harbin Institute of Technology;Harbin Institute of Technology, Shenzhen;Harbin Institute of Technology", "aff_domain": "hit.edu.cn;hit.edu.cn;hit.edu.cn;hit.edu.cn", "position": "PhD student;Assistant Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nchen2024structured,\ntitle={Structured Matrix Basis for Multivariate Time Series Forecasting with Interpretable Dynamics},\nauthor={Xiaodan Chen and Xiucheng Li and Xinyang Chen and Zhijun Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=co7DsOwcop}\n}", "github": "", "reviewers": "aCCB;ftFS;VUH9", "pdf_size": 984822, "rating": "6;6;7", "confidence": "4;3;3", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "64;81;101", "wc_strengths": "89;38;78", "wc_weaknesses": "186;40;86", "wc_questions": "50;26;86", "wc_limitations": "19;21;13", "wc_review": "408;206;364", "wc_reply_reviewers": "72;0;16", "wc_reply_authors": "18;0;55", "reply_reviewers": "1;0;1", "reply_authors": "2;1;2", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 82.0, 15.121728296285006 ], "wc_strengths_avg": [ 68.33333333333333, 21.913973218524802 ], "wc_weaknesses_avg": [ 104.0, 60.94806532341011 ], "wc_questions_avg": [ 54.0, 24.657656011875904 ], "wc_limitations_avg": [ 17.666666666666668, 3.39934634239519 ], "wc_review_avg": [ 326.0, 86.73330771201262 ], "wc_reply_reviewers_avg": [ 29.333333333333332, 30.868898407440604 ], "wc_reply_authors_avg": [ 24.333333333333332, 22.89589968143253 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5010961462932583394&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "hit.edu.cn;hit.edu.cn;hit.edu.cn;hit.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Harbin Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.hit.edu.cn/", "aff_unique_abbr": "HIT", "aff_campus_unique_index": "0;0;1;0", "aff_campus_unique": "Harbin;Shenzhen", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Light Unbalanced Optimal Transport", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94382", "id": "co8KZws1YK", "proceeding": "", "pdf": "https://openreview.net/pdf?id=co8KZws1YK", "openreview": "https://openreview.net/forum?id=co8KZws1YK", "poster": "/media/PosterPDFs/NeurIPS%202024/94382.png?t=1733861863.9733112", "project": "", "author_site": "Milena Gazdieva, Arip Asadulaev, Evgeny Burnaev, Aleksandr Korotin", "tldr": "", "abstract": "While the continuous Entropic Optimal Transport (EOT) field has been actively developing in recent years, it became evident that the classic EOT problem is prone to different issues like the sensitivity to outliers and imbalance of classes in the source and target measures. This fact inspired the development of solvers that deal with the *unbalanced* EOT (UEOT) problem $-$ the generalization of EOT allowing for mitigating the mentioned issues by relaxing the marginal constraints. Surprisingly, it turns out that the existing solvers are either based on heuristic principles or heavy-weighted with complex optimization objectives involving several neural networks. We address this challenge and propose a novel theoretically-justified, lightweight, unbalanced EOT solver. Our advancement consists of developing a novel view on the optimization of the UEOT problem yielding tractable and a non-minimax optimization objective. We show that combined with a light parametrization recently proposed in the field our objective leads to a fast, simple, and effective solver which allows solving the continuous UEOT problem in minutes on CPU. We prove that our solver provides a universal approximation of UEOT solutions and obtain its generalization bounds. We give illustrative examples of the solver's performance.", "keywords": "unbalanced optimal transport;light solver;entropy regularization;generative modeling", "primary_area": "generative_models", "supplementary_material": "/attachment/794af35645129b9f68325a6c63db2752e2c07025.zip", "author": "Milena Gazdieva;Arip Asadulaev;Evgeny Burnaev;Alexander Korotin", "authorids": "~Milena_Gazdieva1;~Arip_Asadulaev1;~Evgeny_Burnaev1;~Alexander_Korotin2", "gender": "F;M;M;M", "homepage": ";;http://faculty.skoltech.ru/people/evgenyburnaev;https://akorotin.netlify.app", "dblp": "309/6585;243/2822;144/7845;209/9906", "google_scholar": "h52_Zx8AAAAJ;wcdrgdYAAAAJ;https://scholar.google.ru/citations?user=pCRdcOwAAAAJ;https://scholar.google.ru/citations?user=1rIIvjAAAAAJ", "orcid": "0000-0003-0047-1577;;0000-0001-8424-0690;0000-0003-4286-925X", "linkedin": ";;;", "or_profile": "~Milena_Gazdieva1;~Arip_Asadulaev1;~Evgeny_Burnaev1;~Alexander_Andreevich_Korotin1", "aff": "Skolkovo Institute of Science and Technology;ITMO University;Skolkovo Institute of Science and Technology;Skolkovo Institute of Science and Technology", "aff_domain": "skoltech.ru;itmo.ru;skoltech.ru;skoltech.ru", "position": "PhD student;PhD student;Full Professor;Head of Research Group", "bibtex": "@inproceedings{\ngazdieva2024light,\ntitle={Light Unbalanced Optimal Transport},\nauthor={Milena Gazdieva and Arip Asadulaev and Evgeny Burnaev and Alexander Korotin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=co8KZws1YK}\n}", "github": "", "reviewers": "WAcu;nig3;bWBu;vYvs;t9a3", "pdf_size": 18028082, "rating": "5;5;6;6;6", "confidence": "3;3;4;2;3", "soundness": "2;3;4;3;3", "novelty": "3;2;3;3;2", "presentation": "3;3;4;2;3", "wc_summary": "60;84;219;84;81", "wc_strengths": "53;49;81;49;77", "wc_weaknesses": "70;198;143;29;62", "wc_questions": "189;61;13;70;70", "wc_limitations": "11;4;6;1;11", "wc_review": "383;396;462;233;301", "wc_reply_reviewers": "26;38;144;16;25", "wc_reply_authors": "392;1277;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;4;1;1;1", "rating_avg": [ 5.6, 0.48989794855663565 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 105.6, 57.40592303935196 ], "wc_strengths_avg": [ 61.8, 14.17603611733548 ], "wc_weaknesses_avg": [ 100.4, 61.379475396910976 ], "wc_questions_avg": [ 80.6, 58.18797126554594 ], "wc_limitations_avg": [ 6.6, 3.9293765408777004 ], "wc_review_avg": [ 355.0, 79.6416976212838 ], "wc_reply_reviewers_avg": [ 49.8, 47.61680375665717 ], "wc_reply_authors_avg": [ 333.8, 495.4353237305552 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.8, 1.1661903789690604 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5137362216550215850&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "skoltech.ru;itmo.ru;skoltech.ru;skoltech.ru", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Skolkovo Institute of Science and Technology;ITMO University", "aff_unique_dep": ";", "aff_unique_url": "https://www.skoltech.ru;https://www.itmo.ru", "aff_unique_abbr": "Skoltech;ITMO", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Russian Federation" }, { "title": "Unrolled denoising networks provably learn to perform optimal Bayesian inference", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94381", "id": "cpklMJqZDE", "proceeding": "", "pdf": "https://openreview.net/pdf?id=cpklMJqZDE", "openreview": "https://openreview.net/forum?id=cpklMJqZDE", "poster": "", "project": "", "author_site": "Aayush Karan, Kulin Shah, Sitan Chen, Yonina Eldar", "tldr": "", "abstract": "Much of Bayesian inference centers around the design of estimators for inverse problems which are optimal assuming the data comes from a known prior. But what do these optimality guarantees mean if the prior is unknown? In recent years, algorithm unrolling has emerged as deep learning's answer to this age-old question: design a neural network whose layers can in principle simulate iterations of inference algorithms and train on data generated by the unknown prior. Despite its empirical success, however, it has remained unclear whether this method can provably recover the performance of its optimal, prior-aware counterparts.\n\nIn this work, we prove the first rigorous learning guarantees for neural networks based on unrolling approximate message passing (AMP). For compressed sensing, we prove that when trained on data drawn from a product prior, the layers of the network approximately converge to the same denoisers used in Bayes AMP. We also provide extensive numerical experiments for compressed sensing and rank-one matrix estimation demonstrating the advantages of our unrolled architecture \\--- in addition to being able to obliviously adapt to general priors, it exhibits improvements over Bayes AMP in more general settings of low dimensions, non-Gaussian designs, and non-product priors.", "keywords": "algorithm unrolling;approximate message passing (AMP);inverse problems;denoising", "primary_area": "learning_theory", "supplementary_material": "", "author": "Aayush Karan;Kulin Shah;Sitan Chen;Yonina C. Eldar", "authorids": "~Aayush_Karan1;~Kulin_Shah1;~Sitan_Chen1;~Yonina_C._Eldar2", "gender": "M;M;M;F", "homepage": ";https://kulinshah98.github.io/;https://sitanchen.com;https://www.weizmann.ac.il/math/yonina/", "dblp": ";215/3581;141/7670;", "google_scholar": ";https://scholar.google.co.in/citations?user=67OmLg4AAAAJ;YnJVsp4AAAAJ;https://scholar.google.co.il/citations?user=vyX6kpwAAAAJ", "orcid": ";;;", "linkedin": "aayush-karan-364a10298;;;", "or_profile": "~Aayush_Karan1;~Kulin_Shah1;~Sitan_Chen1;~Yonina_C._Eldar2", "aff": "Harvard University;University of Texas, Austin;Harvard University;", "aff_domain": "harvard.edu;cs.utexas.edu;seas.harvard.edu;", "position": "PhD student;PhD student;Assistant Professor;", "bibtex": "@inproceedings{\nkaran2024unrolled,\ntitle={Unrolled denoising networks provably learn to perform optimal Bayesian inference},\nauthor={Aayush Karan and Kulin Shah and Sitan Chen and Yonina C. Eldar},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=cpklMJqZDE}\n}", "github": "", "reviewers": "DSkx;mdyi;F56a", "pdf_size": 6036484, "rating": "6;7;8", "confidence": "3;4;4", "soundness": "3;3;4", "novelty": "3;3;3", "presentation": "4;3;4", "wc_summary": "91;191;129", "wc_strengths": "55;54;82", "wc_weaknesses": "47;98;58", "wc_questions": "323;191;75", "wc_limitations": "6;37;4", "wc_review": "522;571;348", "wc_reply_reviewers": "20;284;20", "wc_reply_authors": "0;390;0", "reply_reviewers": "1;2;1", "reply_authors": "1;2;1", "rating_avg": [ 7.0, 0.816496580927726 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 137.0, 41.21488404286329 ], "wc_strengths_avg": [ 63.666666666666664, 12.970050972229146 ], "wc_weaknesses_avg": [ 67.66666666666667, 21.913973218524802 ], "wc_questions_avg": [ 196.33333333333334, 101.31578795473531 ], "wc_limitations_avg": [ 15.666666666666666, 15.107025591499548 ], "wc_review_avg": [ 480.3333333333333, 95.68815089770635 ], "wc_reply_reviewers_avg": [ 108.0, 124.45079348883236 ], "wc_reply_authors_avg": [ 130.0, 183.84776310850236 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:G68itG-IWDUJ:scholar.google.com/&scioq=Unrolled+denoising+networks+provably+learn+to+perform+optimal+Bayesian+inference&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": "harvard.edu;cs.utexas.edu;seas.harvard.edu;", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Harvard University;University of Texas at Austin", "aff_unique_dep": ";", "aff_unique_url": "https://www.harvard.edu;https://www.utexas.edu", "aff_unique_abbr": "Harvard;UT Austin", "aff_campus_unique_index": "1", "aff_campus_unique": ";Austin", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "FasterDiT: Towards Faster Diffusion Transformers Training without Architecture Modification", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94380", "id": "cqRgoDFaGN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=cqRgoDFaGN", "openreview": "https://openreview.net/forum?id=cqRgoDFaGN", "poster": "/media/PosterPDFs/NeurIPS%202024/94380.png?t=1731311352.4428122", "project": "", "author_site": "JINGFENG YAO, Cheng Wang, Wenyu Liu, Xinggang Wang", "tldr": "", "abstract": "Diffusion Transformers (DiT) have attracted significant attention in research. However, they suffer from a slow convergence rate. In this paper, we aim to accelerate DiT training without any architectural modification. We identify the following issues in the training process: firstly, certain training strategies do not consistently perform well across different data. Secondly, the effectiveness of supervision at specific timesteps is limited. In response, we propose the following contributions: (1) We introduce a new perspective for interpreting the failure of the strategies. Specifically, we slightly extend the definition of Signal-to-Noise Ratio (SNR) and suggest observing the Probability Density Function (PDF) of SNR to understand the essence of the data robustness of the strategy. (2) We conduct numerous experiments and report over one hundred experimental results to empirically summarize a unified accelerating strategy from the perspective of PDF. (3) We develop a new supervision method that further accelerates the training process of DiT. Based on them, we propose FasterDiT, an exceedingly simple and practicable design strategy. With few lines of code modifications, it achieves 2.30 FID on ImageNet at 256x256 resolution with 1000 iterations, which is comparable to DiT (2.27 FID) but 7 times faster in training.", "keywords": "Generative Model;Diffusion Transformers;Flow Matching", "primary_area": "generative_models", "supplementary_material": "/attachment/f01a797b65467a6640b34c252035c2d80616b881.zip", "author": "Jingfeng Yao;Cheng Wang;Wenyu Liu;Xinggang Wang", "authorids": "~Jingfeng_Yao2;~Cheng_Wang5;~Wenyu_Liu3;~Xinggang_Wang1", "gender": "M;M;M;M", "homepage": "https://github.com/JingfengYao;;http://eic.hust.edu.cn/professor/liuwenyu/;https://xwcv.github.io/index.htm", "dblp": ";;42/4110-1.html;95/3056", "google_scholar": "4qc1qJ0AAAAJ;PdJIyPIAAAAJ;D7jDk7gAAAAJ;qNCTLV0AAAAJ", "orcid": ";0000-0001-9245-3944;0000-0002-4582-7488;0000-0001-6732-7823", "linkedin": ";;;", "or_profile": "~Jingfeng_Yao2;~Cheng_Wang5;~Wenyu_Liu3;~Xinggang_Wang1", "aff": "Huazhong University of Science and Technology;Huazhong University of Science and Technology;Huazhong University of Science and Technology;Huazhong University of Science and Technology", "aff_domain": "hust.edu.cn;hust.edu.cn;hust.edu.cn;hust.edu.cn", "position": "PhD student;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nyao2024fasterdit,\ntitle={FasterDiT: Towards Faster Diffusion Transformers Training without Architecture Modification},\nauthor={Jingfeng Yao and Cheng Wang and Wenyu Liu and Xinggang Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=cqRgoDFaGN}\n}", "github": "", "reviewers": "FhtE;dNCD;ttWn;qPr3;fAgp", "pdf_size": 1765261, "rating": "6;6;6;6;6", "confidence": "5;3;3;3;3", "soundness": "3;3;3;2;3", "novelty": "3;3;3;3;3", "presentation": "2;3;1;2;3", "wc_summary": "60;77;58;63;52", "wc_strengths": "73;65;42;75;50", "wc_weaknesses": "78;77;200;47;52", "wc_questions": "2;3;71;2;17", "wc_limitations": "13;15;1;1;27", "wc_review": "226;237;372;188;198", "wc_reply_reviewers": "0;17;44;18;23", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.4, 0.8000000000000002 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.2, 0.7483314773547882 ], "wc_summary_avg": [ 62.0, 8.318653737234168 ], "wc_strengths_avg": [ 61.0, 12.946041866145807 ], "wc_weaknesses_avg": [ 90.8, 56.040699495991305 ], "wc_questions_avg": [ 19.0, 26.615784790233032 ], "wc_limitations_avg": [ 11.4, 9.748846085563153 ], "wc_review_avg": [ 244.2, 66.34576097988477 ], "wc_reply_reviewers_avg": [ 20.4, 14.122322755127783 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6634263497191186456&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "hust.edu.cn;hust.edu.cn;hust.edu.cn;hust.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Huazhong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.hust.edu.cn", "aff_unique_abbr": "HUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Neural Krylov Iteration for Accelerating Linear System Solving", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94379", "id": "cqfE9eYMdP", "proceeding": "", "pdf": "https://openreview.net/pdf?id=cqfE9eYMdP", "openreview": "https://openreview.net/forum?id=cqfE9eYMdP", "poster": "/media/PosterPDFs/NeurIPS%202024/94379.png?t=1731727639.3511717", "project": "", "author_site": "Jian Luo, Jie Wang, Hong Wang, huanshuo dong, Zijie Geng, Hanzhu Chen, Yufei Kuang", "tldr": "", "abstract": "Solving large-scale sparse linear systems is essential in fields like mathematics, science, and engineering. Traditional numerical solvers, mainly based on the Krylov subspace iteration algorithm, suffer from the low-efficiency problem, which primarily arises from the less-than-ideal iteration. To tackle this problem, we propose a novel method, namely **Neur**al **K**rylov **It**era**t**ion (**NeurKItt**), for accelerating linear system solving.\nSpecifically, NeurKItt employs a neural operator to predict the invariant subspace of the linear system and then leverages the predicted subspace to accelerate linear system solving. To enhance the subspace prediction accuracy, we utilize QR decomposition for the neural operator outputs and introduce a novel projection loss function for training. NeurKItt benefits the solving by using the predicted subspace to guide the iteration process, significantly reducing the number of iterations.\nWe provide extensive experiments and comprehensive theoretical analyses to demonstrate the feasibility and efficiency of NeurKItt. In our main experiments, NeurKItt accelerates the solving of linear systems across various settings and datasets, achieving up to a 5.5\u00d7 speedup in computation time and a 16.1\u00d7 speedup in the number of iterations.", "keywords": "AI4PDE;Neural Operator;Scientific Computing;Krylov Subspace", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Jian Luo;Jie Wang;Hong Wang;huanshuo dong;Zijie Geng;Hanzhu Chen;Yufei Kuang", "authorids": "~Jian_Luo5;~Jie_Wang1;~Hong_Wang14;~huanshuo_dong1;~Zijie_Geng1;~Hanzhu_Chen1;~Yufei_Kuang1", "gender": ";M;M;M;M;;M", "homepage": "https://smart-jluo.github.io/;http://staff.ustc.edu.cn/~jwangx;https://wanghong1700.github.io/;https://huanshuodong.github.io;https://miralab.ai/people/zijie-geng/;;https://miralab.ai/people/yufei-kuang/", "dblp": ";29/5259-5;;;320/7568;;280/1134", "google_scholar": "AGvDtzwAAAAJ;OugG4dUAAAAJ;;;https://scholar.google.com.hk/citations?user=Ga66HL4AAAAJ;;STN3F_oAAAAJ", "orcid": ";;;;;;", "linkedin": ";;;;;;", "or_profile": "~Jian_Luo5;~Jie_Wang1;~Hong_Wang14;~huanshuo_dong1;~Zijie_Geng1;~Hanzhu_Chen1;~Yufei_Kuang1", "aff": "University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;;University of Science and Technology of China", "aff_domain": "ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;mail.ustc.edu.cn;mail.ustc.edu.cn;;ustc.edu.cn", "position": "MS student;Full Professor;PhD student;Undergrad student;MS student;;PhD student", "bibtex": "@inproceedings{\nluo2024neural,\ntitle={Neural Krylov Iteration for Accelerating Linear System Solving},\nauthor={Jian Luo and Jie Wang and Hong Wang and huanshuo dong and Zijie Geng and Hanzhu Chen and Yufei Kuang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=cqfE9eYMdP}\n}", "github": "", "reviewers": "g2gN;iSRc;XS9H;CSA7", "pdf_size": 930548, "rating": "5;6;8;8", "confidence": "3;3;3;4", "soundness": "2;2;4;4", "novelty": "2;3;3;4", "presentation": "1;3;3;3", "wc_summary": "26;63;105;59", "wc_strengths": "31;191;56;203", "wc_weaknesses": "33;298;58;205", "wc_questions": "73;56;61;2", "wc_limitations": "38;10;1;11", "wc_review": "201;618;281;480", "wc_reply_reviewers": "71;40;54;52", "wc_reply_authors": "166;163;161;37", "reply_reviewers": "1;1;1;1", "reply_authors": "3;3;3;2", "rating_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 1.0 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 63.25, 28.05686190577984 ], "wc_strengths_avg": [ 120.25, 77.37368738789692 ], "wc_weaknesses_avg": [ 148.5, 108.48156525419422 ], "wc_questions_avg": [ 48.0, 27.26719640887196 ], "wc_limitations_avg": [ 15.0, 13.838352503098047 ], "wc_review_avg": [ 395.0, 164.0015243831593 ], "wc_reply_reviewers_avg": [ 54.25, 11.053845484717073 ], "wc_reply_authors_avg": [ 131.75, 54.73287403380166 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.5555555555555555, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15370553547883280187&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "email": "ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;mail.ustc.edu.cn;mail.ustc.edu.cn;;ustc.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "University of Science and Technology of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ustc.edu.cn", "aff_unique_abbr": "USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Delta-CoMe: Training-Free Delta-Compression with Mixed-Precision for Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94378", "id": "cr5EQRJlRn", "proceeding": "", "pdf": "https://openreview.net/pdf?id=cr5EQRJlRn", "openreview": "https://openreview.net/forum?id=cr5EQRJlRn", "poster": "/media/PosterPDFs/NeurIPS%202024/94378.png?t=1731063766.473728", "project": "", "author_site": "Bowen Ping, Shuo Wang, Hanqing Wang, Xu Han, Yuzhuang Xu, Yukun Yan, Yun Chen, Baobao Chang, Zhiyuan Liu, Maosong Sun", "tldr": "", "abstract": "Fine-tuning is a crucial process for adapting large language models (LLMs) to diverse applications. In certain scenarios, such as multi-tenant serving, deploying multiple LLMs becomes necessary to meet complex demands. Recent studies suggest decomposing a fine-tuned LLM into a base model and corresponding delta weights, which are then compressed using low-rank or low-bit approaches to reduce costs. In this work, we observe that existing low-rank and low-bit compression methods can significantly harm the model performance for task-specific fine-tuned LLMs (e.g., WizardMath for math problems). Motivated by the long-tail distribution of singular values in the delta weights, we propose a delta quantization approach using mixed-precision. This method employs higher-bit representation for singular vectors corresponding to larger singular values. We evaluate our approach on various fine-tuned LLMs, including math LLMs, code LLMs, chat LLMs, and even VLMs. Experimental results demonstrate that our approach performs comparably to full fine-tuned LLMs, surpassing both low-rank and low-bit baselines by a considerable margin. Additionally, we show that our method is compatible with various backbone LLMs, such as Llama-2, Llama-3, and Mistral, highlighting its generalizability.", "keywords": "Large Language Model;Delta Compression;Efficient Serving;Multi-Tenant Serving", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Bowen Ping;Shuo Wang;Hanqing Wang;Xu Han;Yuzhuang Xu;Yukun Yan;Yun Chen;Baobao Chang;Zhiyuan Liu;Maosong Sun", "authorids": "~Bowen_Ping2;~Shuo_Wang13;~Hanqing_Wang2;~Xu_Han2;~Yuzhuang_Xu1;~Yukun_Yan2;~Yun_Chen1;~Baobao_Chang1;~Zhiyuan_Liu1;~Maosong_Sun1", "gender": "M;M;M;;M;M;F;M;M;M", "homepage": "https://github.com/pingbowen23;;;;https://www.xyznlp.com/;https://yanyk13.github.io/;https://yunc.me/;http://eecs.pku.edu.cn/EN/People/Faculty/Detail/?ID=6027;http://nlp.csai.tsinghua.edu.cn/~lzy;https://www.cs.tsinghua.edu.cn/csen/info/1312/4394.htm", "dblp": "338/6403;;35/10182-3;;;206/7211.html;10/5680-7;91/6051;53/3245-1;95/3291-1", "google_scholar": "DjcKZdkAAAAJ;5vm5yAMAAAAJ;1sFj7RcAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;B88nSvIAAAAJ;vXd0JQMAAAAJ;LaKNyhQAAAAJ;dT0v5u0AAAAJ;https://scholar.google.com.tw/citations?user=zIgT0HMAAAAJ", "orcid": ";0000-0001-5408-3145;;;0009-0002-8256-9286;;0000-0002-3563-7592;0000-0003-2824-6750;0000-0002-7709-2543;", "linkedin": ";;;;;;;;;", "or_profile": "~Bowen_Ping2;~Shuo_Wang13;~Hanqing_Wang2;~Xu_Han2;~Yuzhuang_Xu1;~Yukun_Yan2;~Yun_Chen1;~Baobao_Chang1;~Zhiyuan_Liu1;~Maosong_Sun1", "aff": "Peking University;Tsinghua University;Shanghai University of Finance and Economics;;Tsinghua University;Tsinghua University;Shanghai University of Finance and Economics;Peking University;Tsinghua University;Tsinghua University", "aff_domain": "stu.pku.edu.cn;tsinghua.edu.cn;sufe.edu;;cs.tsinghua.edu.cn;tsinghua.edu.cn;sufe.edu.cn;pku.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": "MS student;Postdoc;PhD student;;MS student;Researcher;Assistant Professor;Associate Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nping2024deltacome,\ntitle={Delta-CoMe: Training-Free Delta-Compression with Mixed-Precision for Large Language Models},\nauthor={Bowen Ping and Shuo Wang and Hanqing Wang and Xu Han and Yuzhuang Xu and Yukun Yan and Yun Chen and Baobao Chang and Zhiyuan Liu and Maosong Sun},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=cr5EQRJlRn}\n}", "github": "", "reviewers": "ZM7A;PxGh;vBME;Wicf", "pdf_size": 780778, "rating": "5;6;6;7", "confidence": "4;4;4;3", "soundness": "2;3;2;4", "novelty": "2;3;2;3", "presentation": "3;3;3;3", "wc_summary": "27;47;48;207", "wc_strengths": "5;28;33;299", "wc_weaknesses": "5;82;89;301", "wc_questions": "219;19;35;83", "wc_limitations": "1;1;1;110", "wc_review": "257;177;206;1000", "wc_reply_reviewers": "33;19;0;97", "wc_reply_authors": "29;29;37;273", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 82.25, 72.5099131153803 ], "wc_strengths_avg": [ 91.25, 120.40841955610911 ], "wc_weaknesses_avg": [ 119.25, 109.98721516612737 ], "wc_questions_avg": [ 89.0, 78.66384175718855 ], "wc_limitations_avg": [ 28.25, 47.198384506251905 ], "wc_review_avg": [ 410.0, 341.83841211894253 ], "wc_reply_reviewers_avg": [ 37.25, 36.43058467826175 ], "wc_reply_authors_avg": [ 92.0, 104.55142275454696 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6456426113313307791&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "stu.pku.edu.cn;tsinghua.edu.cn;sufe.edu;;cs.tsinghua.edu.cn;tsinghua.edu.cn;sufe.edu.cn;pku.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 10, "aff_unique_index": "0;1;2;1;1;2;0;1;1", "aff_unique_norm": "Peking University;Tsinghua University;Shanghai University of Finance and Economics", "aff_unique_dep": ";;", "aff_unique_url": "http://www.pku.edu.cn;https://www.tsinghua.edu.cn;http://www.sufe.edu.cn", "aff_unique_abbr": "Peking U;THU;SUFE", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Customized Subgraph Selection and Encoding for Drug-drug Interaction Prediction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94377", "id": "crlvDzDPgM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=crlvDzDPgM", "openreview": "https://openreview.net/forum?id=crlvDzDPgM", "poster": "/media/PosterPDFs/NeurIPS%202024/94377.png?t=1731580172.9155555", "project": "", "author_site": "Haotong Du, Quanming Yao, Juzheng Zhang, Yang Liu, Zhen Wang", "tldr": "", "abstract": "Subgraph-based methods have proven to be effective and interpretable in predicting drug-drug interactions (DDIs),\nwhich are essential for medical practice and drug development. \nSubgraph selection and encoding are critical stages in these methods, \nyet customizing these components remains underexplored due to the high cost of manual adjustments. \nIn this study, \ninspired by the success of neural architecture search (NAS), \nwe propose a method to search for data-specific components within subgraph-based frameworks. \nSpecifically, \nwe introduce extensive subgraph selection and encoding spaces that account for the diverse contexts of drug interactions in DDI prediction. \nTo address the challenge of large search spaces and high sampling costs, \nwe design a relaxation mechanism that uses an approximation strategy to efficiently explore optimal subgraph configurations. This approach allows for robust exploration of the search space. \nExtensive experiments demonstrate the effectiveness and superiority of the proposed method, \nwith the discovered subgraphs and encoding functions highlighting the model\u2019s adaptability.", "keywords": "Drug-drug Interaction Prediction;Neural Architecture Search;Graph Neural Networks", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "", "author": "Haotong Du;Quanming Yao;Juzheng Zhang;Yang Liu;Zhen Wang", "authorids": "~Haotong_Du1;~Quanming_Yao3;~Juzheng_Zhang2;~Yang_Liu111;~Zhen_Wang11", "gender": "M;M;;M;M", "homepage": "https://www.researchgate.net/profile/Haotong-Du;https://juzhengz.github.io/;https://iopen.nwpu.edu.cn/info/1347/2112.htm;http://iopen.nwpu.edu.cn/info/1015/1351.htm?ivk_sa=1024320u;https://lars-group.github.io/", "dblp": "332/0812;133/2742;;;158/1014", "google_scholar": "3-7UF88AAAAJ;d8lJm7MAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.co.uk/citations?hl=zh-CN;https://scholar.google.com/schhp?hl=en", "orcid": "0000-0001-5094-7241;;;;", "linkedin": ";;;;", "or_profile": "~Haotong_Du1;~Juzheng_Zhang2;~Yang_Liu111;~Zhen_Wang11;~quanming_yao1", "aff": "Northwest Polytechnical University;University of Maryland, College Park;Northwest Polytechnical University Xi'an;Northwestern Polytechnical University;Department of Electronic Engineering", "aff_domain": "nwpu.edu.cn;umd.edu;nwpu.edu.cn;nwpu.edu.cn;tsinghua.edu.cn", "position": "PhD student;PhD student;Associate Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\ndu2024customized,\ntitle={Customized Subgraph Selection and Encoding for Drug-drug Interaction Prediction},\nauthor={Haotong Du and Quanming Yao and Juzheng Zhang and Yang Liu and Zhen Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=crlvDzDPgM}\n}", "github": "", "reviewers": "zvSg;2Vrd;Qw7Z;KE8i", "pdf_size": 723855, "rating": "3;4;5;8", "confidence": "4;4;3;5", "soundness": "3;2;3;4", "novelty": "3;2;3;4", "presentation": "3;2;3;4", "wc_summary": "65;68;84;76", "wc_strengths": "63;52;40;88", "wc_weaknesses": "258;12;66;39", "wc_questions": "80;27;70;21", "wc_limitations": "1;1;2;7", "wc_review": "467;160;262;231", "wc_reply_reviewers": "89;0;65;0", "wc_reply_authors": "117;0;74;0", "reply_reviewers": "1;0;1;0", "reply_authors": "2;1;2;1", "rating_avg": [ 5.0, 1.8708286933869707 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 73.25, 7.39509972887452 ], "wc_strengths_avg": [ 60.75, 17.711225254058512 ], "wc_weaknesses_avg": [ 93.75, 96.73255656706277 ], "wc_questions_avg": [ 49.5, 25.83118270617898 ], "wc_limitations_avg": [ 2.75, 2.48746859276655 ], "wc_review_avg": [ 280.0, 114.12055029660522 ], "wc_reply_reviewers_avg": [ 38.5, 39.423977475642914 ], "wc_reply_authors_avg": [ 47.75, 50.11175011910879 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5669467095138409, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16195413124437233150&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "nwpu.edu.cn;umd.edu;nwpu.edu.cn;nwpu.edu.cn;tsinghua.edu.cn", "author_num": 5, "aff_unique_index": "0;1;0;2;3", "aff_unique_norm": "Northwest Polytechnical University;University of Maryland;Northwestern Polytechnical University;Institution Name Not Provided", "aff_unique_dep": ";;;Department of Electronic Engineering", "aff_unique_url": "https://www.nwpu.edu.cn;https://www/umd.edu;https://www.nwpu.edu.cn;", "aff_unique_abbr": "NWPU;UMD;NWPU;", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";College Park;Xi'an", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "China;United States;" }, { "title": "A Versatile Diffusion Transformer with Mixture of Noise Levels for Audiovisual Generation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94376", "id": "cs1HISJkLU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=cs1HISJkLU", "openreview": "https://openreview.net/forum?id=cs1HISJkLU", "poster": "/media/PosterPDFs/NeurIPS%202024/94376.png?t=1731418277.8164256", "project": "", "author_site": "Gwanghyun Kim, Alonso Martinez, Yu-Chuan Su, Brendan Jou, Jose Lezama, Agrim Gupta, Lijun Yu, Lu Jiang, Aren Jansen, Jacob Walker, Krishna Somandepalli", "tldr": "", "abstract": "Training diffusion models for audiovisual sequences allows for a range of generation tasks by learning conditional distributions of various input-output combinations of the two modalities. Nevertheless, this strategy often requires training a separate model for each task which is expensive. Here, we propose a novel training approach to effectively learn arbitrary conditional distributions in the audiovisual space. Our key contribution lies in how we parameterize the diffusion timestep in the forward diffusion process. Instead of the standard fixed diffusion timestep, we propose applying variable diffusion timesteps across the temporal dimension and across modalities of the inputs. This formulation offers flexibility to introduce variable noise levels for various portions of the input, hence the term mixture of noise levels. We propose a transformer-based audiovisual latent diffusion model and show that it can be trained in a task-agnostic fashion using our approach to enable a variety of audiovisual generation tasks at inference time. Experiments demonstrate the versatility of our method in tackling cross-modal and multimodal interpolation tasks in the audiovisual space. Notably, our proposed approach surpasses baselines in generating temporally and perceptually consistent samples conditioned on the input. Project page: neurips13025.github.io", "keywords": "multimodal diffusion;multimodal generation;diffusion timestep;latent diffusion models", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Gwanghyun Kim;Alonso Martinez;Yu-Chuan Su;Brendan Jou;Jose Lezama;Agrim Gupta;Lijun Yu;Lu Jiang;Aren Jansen;Jacob C Walker;Krishna Somandepalli", "authorids": "~Gwanghyun_Kim1;~Alonso_Martinez2;~Yu-Chuan_Su1;~Brendan_Jou1;~Jose_Lezama1;~Agrim_Gupta1;~Lijun_Yu1;~Lu_Jiang1;~Aren_Jansen2;~Jacob_C_Walker1;~Krishna_Somandepalli3", "gender": ";M;;M;M;;M;M;;;M", "homepage": "https://gwang-kim.github.io/;;http://sammy-su.github.io/;;https://iie.fing.edu.uy/~jlezama/;;https://me.lj-y.com/;http://www.lujiang.info/;https://research.google/people/104952/;;https://krishna.ai", "dblp": "02/7013;246/4183;53/6299;120/8567;151/8861;200/8282;94/5561;22/752-4;;135/1696;192/5601", "google_scholar": "https://scholar.google.co.kr/citations?user=SCLtNC4AAAAJ;;nrcJfPEAAAAJ;k7eC8-0AAAAJ;iDP84cQAAAAJ;AxzVaI8AAAAJ;IaDc0OcAAAAJ;jIKjjSYAAAAJ;XbcastEAAAAJ;0dR_wD0AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0001-6570-236X;;0000-0002-2711-6738;0000-0001-8033-0330;;;0000-0003-0645-1657;0000-0003-0286-8439;;;", "linkedin": "gwanghyun-bradley-kim/;alonsomartinez/;https://www.linkedin.com/pub/yu-chuan-su/52/38b/b82/;brendanjou/;;;lijun-yu/;roadjiang/;;;", "or_profile": "~Gwanghyun_Kim1;~Alonso_Martinez2;~Yu-Chuan_Su1;~Brendan_Jou1;~Jose_Lezama1;~Agrim_Gupta1;~Lijun_Yu1;~Lu_Jiang1;~Aren_Jansen2;~Jacob_C_Walker1;~Krishna_Somandepalli3", "aff": "Google;Research, Google;Google;Google DeepMind;Google;Stanford University;School of Computer Science, Carnegie Mellon University;Google Research;Google;Google;Google", "aff_domain": "google.com;research.google.com;google.com;google.com;google.com;stanford.edu;cs.cmu.edu;google.com;google.com;google.com;google.com", "position": "Student Researcher;Researcher;Research Scientist;Research Manager;Researcher;PhD student;PhD student;Researcher;Researcher;Research Scientist;Researcher", "bibtex": "@inproceedings{\nkim2024a,\ntitle={A Versatile Diffusion Transformer with Mixture of Noise Levels for Audiovisual Generation},\nauthor={Gwanghyun Kim and Alonso Martinez and Yu-Chuan Su and Brendan Jou and Jose Lezama and Agrim Gupta and Lijun Yu and Lu Jiang and Aren Jansen and Jacob C Walker and Krishna Somandepalli},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=cs1HISJkLU}\n}", "github": "", "reviewers": "4xRp;SA7V;tkoE;bNvz", "pdf_size": 4602905, "rating": "5;5;5;6", "confidence": "4;4;4;3", "soundness": "2;3;3;2", "novelty": "2;2;3;2", "presentation": "4;2;2;3", "wc_summary": "79;48;89;70", "wc_strengths": "54;28;62;50", "wc_weaknesses": "136;300;128;108", "wc_questions": "87;53;2;46", "wc_limitations": "7;8;54;3", "wc_review": "363;437;335;277", "wc_reply_reviewers": "135;91;47;42", "wc_reply_authors": "1512;760;806;874", "reply_reviewers": "3;1;1;1", "reply_authors": "5;3;4;5", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 71.5, 15.14100392972672 ], "wc_strengths_avg": [ 48.5, 12.599603168354152 ], "wc_weaknesses_avg": [ 168.0, 76.88953114696434 ], "wc_questions_avg": [ 47.0, 30.257230540814536 ], "wc_limitations_avg": [ 18.0, 20.868636754709208 ], "wc_review_avg": [ 353.0, 57.56735185849702 ], "wc_reply_reviewers_avg": [ 78.75, 37.65883030578618 ], "wc_reply_authors_avg": [ 988.0, 305.2376123612554 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 4.25, 0.82915619758885 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12422180058549085341&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "google.com;research.google.com;google.com;google.com;google.com;stanford.edu;cs.cmu.edu;google.com;google.com;google.com;google.com", "author_num": 11, "aff_unique_index": "0;0;0;0;0;1;2;0;0;0;0", "aff_unique_norm": "Google;Stanford University;Carnegie Mellon University", "aff_unique_dep": "Google;;School of Computer Science", "aff_unique_url": "https://www.google.com;https://www.stanford.edu;https://www.cmu.edu", "aff_unique_abbr": "Google;Stanford;CMU", "aff_campus_unique_index": "0;0;0;0;2;3;0;0;0;0", "aff_campus_unique": "Mountain View;;Stanford;Pittsburgh", "aff_country_unique_index": "0;0;0;1;0;0;0;0;0;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "How do Large Language Models Handle Multilingualism?", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94375", "id": "ctXYOoAgRy", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ctXYOoAgRy", "openreview": "https://openreview.net/forum?id=ctXYOoAgRy", "poster": "", "project": "", "author_site": "Yiran Zhao, Wenxuan Zhang, Guizhen Chen, Kenji Kawaguchi, Lidong Bing", "tldr": "", "abstract": "Large language models (LLMs) have demonstrated impressive capabilities across diverse languages. This study explores how LLMs handle multilingualism. Based on observed language ratio shifts among layers and the relationships between network structures and certain capabilities, we hypothesize the LLM's multilingual workflow ($\\texttt{MWork}$): LLMs initially understand the query, converting multilingual inputs into English for task-solving. In the intermediate layers, they employ English for thinking and incorporate multilingual knowledge with self-attention and feed-forward structures, respectively. In the final layers, LLMs generate responses aligned with the original language of the query. \nTo verify $\\texttt{MWork}$, we introduce Parallel Language-specific Neuron Detection ($\\texttt{PLND}$) to identify activated neurons for inputs in different languages without any labeled data. Using $\\texttt{PLND}$, we validate $\\texttt{MWork}$ through extensive experiments involving the deactivation of language-specific neurons across various layers and structures. \nMoreover, $\\texttt{MWork}$ allows fine-tuning of language-specific neurons with a small dataset, enhancing multilingual abilities in a specific language without compromising others. This approach results in an average improvement of $3.6\\%$ for high-resource languages and $2.3\\%$ for low-resource languages across all tasks with just $400$ documents.", "keywords": "Large Language Model;Multilingual;Language Specific Neuron", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Yiran Zhao;Wenxuan Zhang;Guizhen Chen;Kenji Kawaguchi;Lidong Bing", "authorids": "~Yiran_Zhao2;~Wenxuan_Zhang1;~Guizhen_Chen1;~Kenji_Kawaguchi1;~Lidong_Bing2", "gender": "M;;F;;", "homepage": "https://zhaoyiran924.github.io/;https://isakzhang.github.io/;;https://ml.comp.nus.edu.sg/#members;https://lidongbing.github.io", "dblp": ";85/1177-1.html;221/3221;;53/6625", "google_scholar": "D_HwSlEAAAAJ;https://scholar.google.com/citations?hl=en;HAdzwTsAAAAJ;aLl3rYoAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0002-0193-7853;;", "linkedin": ";wenxuan-zhang-608b88153/;chenguizhen;;", "or_profile": "~Yiran_Zhao2;~Wenxuan_Zhang1;~Guizhen_Chen1;~Kenji_Kawaguchi1;~Lidong_Bing3", "aff": "National University of Singapore;Alibaba Group;Alibaba Group;National University of Singapore;Alibaba Group", "aff_domain": "u.nus.edu;alibaba-inc.com;alibaba-inc.com;nus.edu;alibaba-inc.com", "position": "PhD student;Researcher;PhD student;Presidential Young Professor;Scientist", "bibtex": "@inproceedings{\nzhao2024how,\ntitle={How do Large Language Models Handle Multilingualism?},\nauthor={Yiran Zhao and Wenxuan Zhang and Guizhen Chen and Kenji Kawaguchi and Lidong Bing},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ctXYOoAgRy}\n}", "github": "", "reviewers": "Nru9;TbKY;NLiQ;2b8J", "pdf_size": 2346400, "rating": "5;6;7;7", "confidence": "4;4;3;5", "soundness": "3;3;3;3", "novelty": "2;4;4;3", "presentation": "3;4;4;3", "wc_summary": "60;153;201;74", "wc_strengths": "14;71;21;21", "wc_weaknesses": "270;301;5;69", "wc_questions": "2;286;78;4", "wc_limitations": "1;2;1;1", "wc_review": "347;813;306;169", "wc_reply_reviewers": "235;0;47;0", "wc_reply_authors": "753;107;89;108", "reply_reviewers": "3;0;1;0", "reply_authors": "4;2;2;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 122.0, 57.77110004145671 ], "wc_strengths_avg": [ 31.75, 22.84047941703501 ], "wc_weaknesses_avg": [ 161.25, 126.7682432630507 ], "wc_questions_avg": [ 92.5, 115.83932838203094 ], "wc_limitations_avg": [ 1.25, 0.4330127018922193 ], "wc_review_avg": [ 408.75, 242.5225504978867 ], "wc_reply_reviewers_avg": [ 70.5, 96.89298220201502 ], "wc_reply_authors_avg": [ 264.25, 282.28122059393183 ], "reply_reviewers_avg": [ 1.0, 1.224744871391589 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 51, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9125135572018099094&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "u.nus.edu;alibaba-inc.com;alibaba-inc.com;nus.edu;alibaba-inc.com", "author_num": 5, "aff_unique_index": "0;1;1;0;1", "aff_unique_norm": "National University of Singapore;Alibaba Group", "aff_unique_dep": ";", "aff_unique_url": "https://www.nus.edu.sg;https://www.alibaba.com", "aff_unique_abbr": "NUS;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0;1", "aff_country_unique": "Singapore;China" }, { "title": "Online Weighted Paging with Unknown Weights", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94374", "id": "ctxtY3VGGq", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ctxtY3VGGq", "openreview": "https://openreview.net/forum?id=ctxtY3VGGq", "poster": "/media/PosterPDFs/NeurIPS%202024/94374.png?t=1731423502.3432384", "project": "", "author_site": "Orin Levy, Noam Touitou, Aviv Rosenberg", "tldr": "", "abstract": "Online paging is a fundamental problem in the field of online algorithms, in which one maintains a cache of $k$ slots as requests for fetching pages arrive online. \nIn the weighted variant of this problem, each page has its own fetching cost; a substantial line of work on this problem culminated in an (optimal) $O(\\log k)$-competitive randomized algorithm, due to Bansal, Buchbinder and Naor (FOCS'07).\n\nExisting work for weighted paging assumes that page weights are known in advance, which is not always the case in practice.\nFor example, in multi-level caching architectures, the expected cost of fetching a memory block is a function of its probability of being in a mid-level cache rather than the main memory.\nThis complex property cannot be predicted in advance; over time, however, one may glean information about page weights through sampling their fetching cost multiple times.\n\nWe present the first algorithm for online weighted paging that does not know page weights in advance, but rather learns from weight samples.\nIn terms of techniques, this requires providing (integral) samples to a fractional solver, requiring a delicate interface between this solver and the randomized rounding scheme; we believe that our work can inspire online algorithms to other problems that involve cost sampling.", "keywords": "Online Learning;Online Weighted Paging;Online Algorithms;Competitive Ratio;Regret;Theory", "primary_area": "online_learning", "supplementary_material": "", "author": "Orin Levy;Noam Touitou;Aviv Rosenberg", "authorids": "~Orin_Levy1;~Noam_Touitou1;~Aviv_Rosenberg1", "gender": "F;M;M", "homepage": "https://sites.google.com/view/orinlevy/home;https://noamtouitou.com;https://sites.google.com/view/aviv-rosenberg/home", "dblp": "315/0411;211/8092;225/9369-2", "google_scholar": "X-7G2gQAAAAJ;-N6FOX0AAAAJ;https://scholar.google.co.il/citations?user=cg8_-foAAAAJ", "orcid": ";0000-0002-5720-4114;", "linkedin": "orin-l-5997b0136/;noam-touitou/;aviv-rosenberg-2a6222149/", "or_profile": "~Orin_Levy1;~Noam_Touitou1;~Aviv_Rosenberg1", "aff": "Amazon;Amazon;Google Research", "aff_domain": "amazon.com;amazon.com;google.com", "position": "Intern;Researcher;Researcher", "bibtex": "@inproceedings{\nlevy2024online,\ntitle={Online Weighted Paging with Unknown Weights},\nauthor={Orin Levy and Noam Touitou and Aviv Rosenberg},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ctxtY3VGGq}\n}", "github": "", "reviewers": "qj5j;Yutu;MmEo;VdxB;WtQQ", "pdf_size": 393779, "rating": "4;4;6;7;7", "confidence": "2;4;4;4;4", "soundness": "2;3;3;3;4", "novelty": "3;3;3;3;4", "presentation": "2;2;2;3;3", "wc_summary": "58;134;186;192;99", "wc_strengths": "24;13;38;98;44", "wc_weaknesses": "560;245;37;24;122", "wc_questions": "3;10;55;18;63", "wc_limitations": "26;23;52;12;8", "wc_review": "671;425;368;344;336", "wc_reply_reviewers": "54;324;0;21;106", "wc_reply_authors": "111;515;0;0;0", "reply_reviewers": "1;2;0;1;1", "reply_authors": "2;3;1;1;1", "rating_avg": [ 5.6, 1.3564659966250536 ], "confidence_avg": [ 3.6, 0.8000000000000002 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 133.8, 51.12494498774546 ], "wc_strengths_avg": [ 43.4, 29.363923443572727 ], "wc_weaknesses_avg": [ 197.6, 197.598178129253 ], "wc_questions_avg": [ 29.8, 24.440949245068204 ], "wc_limitations_avg": [ 24.2, 15.419468213917106 ], "wc_review_avg": [ 428.8, 125.03983365312033 ], "wc_reply_reviewers_avg": [ 101.0, 117.10166523154143 ], "wc_reply_authors_avg": [ 125.2, 199.58496937394864 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5897678246195885, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Xnbv8dL-aX4J:scholar.google.com/&scioq=Online+Weighted+Paging+with+Unknown+Weights&hl=en&as_sdt=0,39", "gs_version_total": 5, "email": "amazon.com;amazon.com;google.com", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Amazon;Google", "aff_unique_dep": "Amazon.com, Inc.;Google Research", "aff_unique_url": "https://www.amazon.com;https://research.google", "aff_unique_abbr": "Amazon;Google Research", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "A Taxonomy of Challenges to Curating Fair Datasets", "status": "Oral", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97599", "id": "cu8FfaYriU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=cu8FfaYriU", "openreview": "https://openreview.net/forum?id=cu8FfaYriU", "poster": "", "project": "", "author_site": "Dora Zhao, Morgan Scheuerman, Pooja Chitre, Jerone Andrews, Georgia Panagiotidou, Shawn Walker, Kathleen Pine, Alice Xiang", "tldr": "", "abstract": "Despite extensive efforts to create fairer machine learning (ML) datasets, there remains a limited understanding of the practical aspects of dataset curation. Drawing from interviews with 30 ML dataset curators, we present a comprehensive taxonomy of the challenges and trade-offs encountered throughout the dataset curation lifecycle. Our findings underscore overarching issues within the broader fairness landscape that impact data curation. We conclude with recommendations aimed at fostering systemic changes to better facilitate fair dataset curation practices.", "keywords": "datasets;computer vision;fairness;algorithmic bias;responsible AI", "primary_area": "", "supplementary_material": "", "author": "Dora Zhao;Morgan Scheuerman;Pooja Chitre;Jerone Andrews;Georgia Panagiotidou;Shawn Walker;Kathleen H. Pine;Alice Xiang", "authorids": "~Dora_Zhao1;~Morgan_Scheuerman1;~Pooja_Chitre1;~Jerone_Andrews1;~Georgia_Panagiotidou1;~Shawn_Walker2;~Kathleen_H._Pine1;~Alice_Xiang1", "gender": "F;M;;;;;F;", "homepage": "https://dorazhao99.github.io;https://www.morgan-klaus.com;;;https://www.kcl.ac.uk/people/georgia-panagiotidou;https://shawnw.io;;", "dblp": "295/8515;;;;;;;", "google_scholar": "I-OInyYAAAAJ;BBMK69EAAAAJ;;;;2BqAYSYAAAAJ;pb761jkAAAAJ;", "orcid": ";;;;;0000-0002-7052-5705;;", "linkedin": ";;pchitre/;;;;;", "or_profile": "~Dora_Zhao1;~Morgan_Scheuerman1;~Pooja_Chitre1;~Jerone_Andrews1;~Georgia_Panagiotidou1;~Shawn_Walker2;~Kathleen_H._Pine1;~Alice_Xiang1", "aff": "Stanford University;University of Colorado at Boulder;Arizona State University;;King's College London, University of London;Arizona State University;Arizona State University;", "aff_domain": "stanford.edu;colorado.edu;asu.edu;;kcl.ac.uk;asu.edu;asu.edu;", "position": "PhD student;Postdoc;PhD student;;Assistant Professor;Assistant Professor;Associate Professor;", "bibtex": "@inproceedings{\nzhao2024a,\ntitle={A Taxonomy of Challenges to Curating Fair Datasets},\nauthor={Dora Zhao and Morgan Scheuerman and Pooja Chitre and Jerone Andrews and Georgia Panagiotidou and Shawn Walker and Kathleen H. Pine and Alice Xiang},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=cu8FfaYriU}\n}", "github": "", "reviewers": "ZA9u;jzy5;Tw7t;aifA", "pdf_size": 808565, "rating": "6;7;8;9", "confidence": "3;3;4;4", "wc_summary_and_contributions": "82;88;319;87", "wc_strengths": "148;10;101;40", "wc_improvement": "109;21;129;92", "wc_limitations": "21;87;50;25", "wc_correctness": "1;9;33;5", "wc_clarity": "31;34;29;6", "wc_relation_to_prior_work": "1;55;40;79", "wc_documentation": "1;5;21;38", "wc_additional_feedback": "1;1;1;1", "wc_review": "395;310;723;373", "wc_reply_reviewers": "0;45;0;47", "wc_reply_authors": "56;0;0;37", "reply_reviewers": "0;1;0;1", "reply_authors": "2;1;3;2", "rating_avg": [ 7.5, 1.118033988749895 ], "confidence_avg": [ 3.5, 0.5 ], "wc_summary_and_contributions_avg": [ 144.0, 101.06186224288567 ], "wc_strengths_avg": [ 74.75, 53.513432893059665 ], "wc_improvement_avg": [ 87.75, 40.702426217610174 ], "wc_limitations_avg": [ 45.75, 26.280934153869037 ], "wc_correctness_avg": [ 12.0, 12.449899597988733 ], "wc_clarity_avg": [ 25.0, 11.113055385446435 ], "wc_relation_to_prior_work_avg": [ 43.75, 28.331740151286155 ], "wc_documentation_avg": [ 16.25, 14.618053906043718 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 450.25, 160.5325122833378 ], "wc_reply_reviewers_avg": [ 23.0, 23.010866998007703 ], "wc_reply_authors_avg": [ 23.25, 24.200981385059574 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.8944271909999159, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11402275035071606445&as_sdt=40000005&sciodt=0,22&hl=en", "gs_version_total": 6, "email": "stanford.edu;colorado.edu;asu.edu;;kcl.ac.uk;asu.edu;asu.edu;", "author_num": 8, "aff_unique_index": "0;1;2;3;2;2", "aff_unique_norm": "Stanford University;University of Colorado;Arizona State University;King's College London", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.stanford.edu;https://www.colorado.edu;https://www.asu.edu;https://www.kcl.ac.uk", "aff_unique_abbr": "Stanford;CU;ASU;KCL", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Stanford;Boulder;", "aff_country_unique_index": "0;0;0;1;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Wasserstein Gradient Boosting: A Framework for Distribution-Valued Supervised Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94373", "id": "cuO0DenqMl", "proceeding": "", "pdf": "https://openreview.net/pdf?id=cuO0DenqMl", "openreview": "https://openreview.net/forum?id=cuO0DenqMl", "poster": "", "project": "", "tldr": "", "abstract": "Gradient boosting is a sequential ensemble method that fits a new weaker learner to pseudo residuals at each iteration. We propose Wasserstein gradient boosting, a novel extension of gradient boosting, which fits a new weak learner to alternative pseudo residuals that are Wasserstein gradients of loss functionals of probability distributions assigned at each input. It solves distribution-valued supervised learning, where the output values of the training dataset are probability distributions. In classification and regression, a model typically returns, for each input, a point estimate of a parameter of a noise distribution specified for a response variable, such as the class probability parameter of a categorical distribution specified for a response label. A main application of Wasserstein gradient boosting in this paper is tree-based evidential learning, which returns a distributional estimate of the response parameter for each input. We empirically demonstrate the competitive performance of the probabilistic prediction by Wasserstein gradient boosting in comparison with existing uncertainty quantification methods.", "keywords": "Gradient Boosting; Wasserstein Gradient Flow; Uncertainty Quantification", "primary_area": "probabilistic_methods", "supplementary_material": "/attachment/5c53de6d28010db73a8232e2c5d3b439300fb6f0.zip", "author": "Takuo Matsubara", "authorids": "~Takuo_Matsubara1", "gender": "", "homepage": "https://sites.google.com/view/takuomatsubara/home", "dblp": "", "google_scholar": "", "orcid": "", "linkedin": "", "or_profile": "~Takuo_Matsubara1", "aff": "University of Edinburgh", "aff_domain": "ed.ac.uk", "position": "Postdoc", "bibtex": "@inproceedings{\nmatsubara2024wasserstein,\ntitle={Wasserstein Gradient Boosting: A Framework for Distribution-Valued Supervised Learning},\nauthor={Takuo Matsubara},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=cuO0DenqMl}\n}", "github": "", "reviewers": "q9Jy;UM7J;DY4u;jx13", "pdf_size": 4046030, "rating": "3;3;7;8", "confidence": "5;3;3;3", "soundness": "2;2;4;4", "novelty": "2;2;4;4", "presentation": "3;2;4;4", "wc_summary": "120;33;91;142", "wc_strengths": "56;80;43;40", "wc_weaknesses": "388;423;24;33", "wc_questions": "6;275;67;17", "wc_limitations": "14;1;1;1", "wc_review": "584;812;226;233", "wc_reply_reviewers": "103;499;23;20", "wc_reply_authors": "238;1222;13;0", "reply_reviewers": "1;2;1;1", "reply_authors": "2;3;2;1", "rating_avg": [ 5.25, 2.277608394786075 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 1.0 ], "novelty_avg": [ 3.0, 1.0 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 96.5, 40.88092464707715 ], "wc_strengths_avg": [ 54.75, 15.769828787910159 ], "wc_weaknesses_avg": [ 217.0, 188.93252763883726 ], "wc_questions_avg": [ 91.25, 108.55039152393694 ], "wc_limitations_avg": [ 4.25, 5.629165124598851 ], "wc_review_avg": [ 463.75, 247.7441977120756 ], "wc_reply_reviewers_avg": [ 161.25, 197.8210997340779 ], "wc_reply_authors_avg": [ 368.25, 501.91252973003174 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": -0.5703518254720302, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14824443213977613173&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "ed.ac.uk", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "University of Edinburgh", "aff_unique_dep": "", "aff_unique_url": "https://www.ed.ac.uk", "aff_unique_abbr": "Edinburgh", "aff_country_unique_index": "0", "aff_country_unique": "United Kingdom" }, { "title": "An exactly solvable model for emergence and scaling laws in the multitask sparse parity problem", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94372", "id": "cuWsR25bbI", "proceeding": "", "pdf": "https://openreview.net/pdf?id=cuWsR25bbI", "openreview": "https://openreview.net/forum?id=cuWsR25bbI", "poster": "/media/PosterPDFs/NeurIPS%202024/94372.png?t=1731019890.6678758", "project": "", "author_site": "yoonsoo nam, Nayara Fonseca, Seok Hyeong Lee, Chris Mingard, Ard Louis", "tldr": "", "abstract": "Deep learning models can exhibit what appears to be a sudden ability to solve a new problem as training time, training data, or model size increases, a phenomenon known as emergence. In this paper, we present a framework where each new ability (a skill) is represented as a basis function. We solve a simple multi-linear model in this skill-basis, finding analytic expressions for the emergence of new skills, as well as for scaling laws of the loss with training time, data size, model size, and optimal compute. We compare our detailed calculations to direct simulations of a two-layer neural network trained on multitask sparse parity, where the tasks in the dataset are distributed according to a power-law. Our simple model captures, using a single fit parameter, the sigmoidal emergence of multiple new skills as training time, data size or model size increases in the neural network.", "keywords": "science of deep learning;emergence;skills;scaling laws", "primary_area": "other", "supplementary_material": "/attachment/a3fd039a0a2df363366666f1524ecdbef95b6cfc.zip", "author": "Yoonsoo Nam;Nayara Fonseca;Seok Hyeong Lee;Chris Mingard;Ard A. Louis", "authorids": "~Yoonsoo_Nam1;~Nayara_Fonseca1;~Seok_Hyeong_Lee1;~Chris_Mingard1;~Ard_A._Louis1", "gender": "M;F;M;M;", "homepage": "https://www.physics.ox.ac.uk/our-people/yoonsoonam;https://www.nayarafonseca.com/;http://qsms.math.snu.ac.kr/AboutUs;https://c1510.github.io/;http://www-thphys.physics.ox.ac.uk/people/ArdLouis/", "dblp": "274/0898;;;;131/6487", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;eWFKhjwAAAAJ;;xTMeOcYAAAAJ;akIfLQ0AAAAJ", "orcid": ";;;0000-0002-4519-9709;0000-0002-8438-910X", "linkedin": "yoonsoo-nam-b5548474/;fonseca-nayara/;;;", "or_profile": "~Yoonsoo_Nam1;~Nayara_Fonseca1;~Seok_Hyeong_Lee1;~Chris_Mingard1;~Ard_A._Louis1", "aff": "University of Oxford;University of Oxford;Seoul National University;University of Oxford;University of Oxford", "aff_domain": "ox.ac.uk;ox.ac.uk;snu.ac.kr;ox.ac.uk;ox.ac.uk", "position": "PhD student;Researcher;Postdoc;PhD student;Full Professor", "bibtex": "@inproceedings{\nnam2024an,\ntitle={An exactly solvable model for emergence and scaling laws in the multitask sparse parity problem},\nauthor={Yoonsoo Nam and Nayara Fonseca and Seok Hyeong Lee and Chris Mingard and Ard A. Louis},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=cuWsR25bbI}\n}", "github": "", "reviewers": "NXT5;owz9;TV9v;D3CL", "pdf_size": 1204842, "rating": "3;6;7;7", "confidence": "3;3;3;5", "soundness": "1;4;4;4", "novelty": "1;2;3;3", "presentation": "2;3;4;4", "wc_summary": "219;137;67;162", "wc_strengths": "24;124;58;60", "wc_weaknesses": "24;271;171;217", "wc_questions": "13;2;49;59", "wc_limitations": "4;1;7;1", "wc_review": "284;535;352;499", "wc_reply_reviewers": "42;0;41;68", "wc_reply_authors": "17;0;205;21", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;2;2", "rating_avg": [ 5.75, 1.6393596310755 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.25, 1.299038105676658 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 146.25, 54.55902766728894 ], "wc_strengths_avg": [ 66.5, 36.14899721984 ], "wc_weaknesses_avg": [ 170.75, 91.82149802742275 ], "wc_questions_avg": [ 30.75, 23.836683913665507 ], "wc_limitations_avg": [ 3.25, 2.48746859276655 ], "wc_review_avg": [ 417.5, 103.15158748172517 ], "wc_reply_reviewers_avg": [ 37.75, 24.33490291741473 ], "wc_reply_authors_avg": [ 60.75, 83.65517019288168 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.44022545316281186, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:XvIz4aRlVCIJ:scholar.google.com/&scioq=An+exactly+solvable+model+for+emergence+and+scaling+laws+in+the+multitask+sparse+parity+problem&hl=en&as_sdt=0,33", "gs_version_total": 0, "email": "ox.ac.uk;ox.ac.uk;snu.ac.kr;ox.ac.uk;ox.ac.uk", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "University of Oxford;Seoul National University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ox.ac.uk;https://www.snu.ac.kr", "aff_unique_abbr": "Oxford;SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "United Kingdom;South Korea" }, { "title": "Is A Picture Worth A Thousand Words? Delving Into Spatial Reasoning for Vision Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94371", "id": "cvaSru8LeO", "proceeding": "", "pdf": "https://openreview.net/pdf?id=cvaSru8LeO", "openreview": "https://openreview.net/forum?id=cvaSru8LeO", "poster": "/media/PosterPDFs/NeurIPS%202024/94371.png?t=1733457741.7832723", "project": "", "author_site": "Jiayu Wang, Yifei Ming, Zhenmei Shi, Vibhav Vineet, Xin Wang, Sharon Li, Neel Joshi", "tldr": "", "abstract": "Large language models (LLMs) and vision-language models (VLMs) have demonstrated remarkable performance across a wide range of tasks and domains. Despite this promise, spatial understanding and reasoning\u2014a fundamental component of human cognition\u2014remains under-explored. We propose SpatialEval, a novel benchmark that covers diverse aspects of spatial reasoning such as relationship understanding, navigation, and counting. We conduct a comprehensive evaluation of competitive language and vision-language models. Our findings reveal several counter-intuitive insights that have been overlooked in the literature: (1) Spatial reasoning poses significant challenges where competitive models can fall behind random guessing; (2) Despite additional visual input, VLMs often under-perform compared to their LLM counterparts; (3) When both textual and visual information is available, multi-modal language models become less reliant on visual information if sufficient textual clues are provided. Additionally, we demonstrate that leveraging redundancy between vision and text can significantly enhance model performance. We hope our study will inform the development of multimodal models to improve spatial intelligence and further close the gap with human intelligence. Our code is available at https://github.com/jiayuww/SpatialEval.", "keywords": "Spatial Reasoning;Evaluation;Vision-language Models;Language Models", "primary_area": "evaluation", "supplementary_material": "", "author": "Jiayu Wang;Yifei Ming;Zhenmei Shi;Vibhav Vineet;Xin Wang;Yixuan Li;Neel Joshi", "authorids": "~Jiayu_Wang6;~Yifei_Ming1;~Zhenmei_Shi1;~Vibhav_Vineet5;~Xin_Wang1;~Yixuan_Li1;~Neel_Joshi1", "gender": "F;M;M;;F;F;", "homepage": "https://jiayuww.github.io/;https://alvinmingsf.github.io/;http://zhmeishi.github.io/;;https://people.eecs.berkeley.edu/~xinw/;http://pages.cs.wisc.edu/~sharonli/;", "dblp": ";277/4125;246/5216;;;144/6087-1;", "google_scholar": "e4S1NrEAAAAJ;Dh_4cyQAAAAJ;0oeNnzMAAAAJ;;e9gUdKwAAAAJ;https://scholar.google.com/citations?hl=en;", "orcid": ";;;;;;", "linkedin": ";;zhenmei-shi-56408a113/;;xin-wang-aa83a577;liyixuan;", "or_profile": "~Jiayu_Wang6;~Yifei_Ming1;~Zhenmei_Shi1;~Vibhav_Vineet5;~Xin_Wang1;~Yixuan_Li1;~Neel_Joshi1", "aff": "University of Wisconsin - Madison;University of Wisconsin - Madison;University of Wisconsin - Madison;;Microsoft;Cornell University;", "aff_domain": "wisc.edu;wisc.edu;wisc.edu;;microsoft.com;cornell.edu;", "position": "MS student;PhD student;PhD student;;Senior Researcher;Graduate Student;", "bibtex": "@inproceedings{\nwang2024is,\ntitle={Is A Picture Worth A Thousand Words? Delving Into Spatial Reasoning for Vision Language Models},\nauthor={Jiayu Wang and Yifei Ming and Zhenmei Shi and Vibhav Vineet and Xin Wang and Yixuan Li and Neel Joshi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=cvaSru8LeO}\n}", "github": "", "reviewers": "kVxr;i8Xy;WNLP;JHoY", "pdf_size": 12974224, "rating": "4;5;6;6", "confidence": "4;4;4;4", "soundness": "2;3;3;2", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "30;104;57;92", "wc_strengths": "22;32;60;47", "wc_weaknesses": "208;190;398;346", "wc_questions": "2;66;3;41", "wc_limitations": "7;1;7;33", "wc_review": "269;393;525;559", "wc_reply_reviewers": "0;88;51;144", "wc_reply_authors": "0;106;22;147", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 70.75, 29.18368551091517 ], "wc_strengths_avg": [ 40.25, 14.463315664120728 ], "wc_weaknesses_avg": [ 285.5, 88.66087073788526 ], "wc_questions_avg": [ 28.0, 26.99073915253156 ], "wc_limitations_avg": [ 12.0, 12.36931687685298 ], "wc_review_avg": [ 436.5, 114.87710825051265 ], "wc_reply_reviewers_avg": [ 70.75, 52.58029573899333 ], "wc_reply_authors_avg": [ 68.75, 60.04737712839754 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 34, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6698616547324601046&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "wisc.edu;wisc.edu;wisc.edu;;microsoft.com;cornell.edu;", "author_num": 7, "aff_unique_index": "0;0;0;1;2", "aff_unique_norm": "University of Wisconsin-Madison;Microsoft;Cornell University", "aff_unique_dep": ";Microsoft Corporation;", "aff_unique_url": "https://www.wisc.edu;https://www.microsoft.com;https://www.cornell.edu", "aff_unique_abbr": "UW-Madison;Microsoft;Cornell", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Madison;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Many-shot Jailbreaking", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94370", "id": "cw5mgd71jW", "proceeding": "", "pdf": "https://openreview.net/pdf?id=cw5mgd71jW", "openreview": "https://openreview.net/forum?id=cw5mgd71jW", "poster": "/media/PosterPDFs/NeurIPS%202024/94370.png?t=1731698666.1164408", "project": "", "author_site": "Cem Anil, Esin DURMUS, Nina Panickssery, Mrinank Sharma, Joe Benton, Sandipan Kundu, Joshua Batson, Meg Tong, Jesse Mu, Daniel Ford, Francesco Mosconi, Rajashree Agrawal, Rylan Schaeffer, Naomi Bashkansky, Samuel Svenningsen, Mike Lambert, Ansh Radhakrishnan, Carson Denison, Evan Hubinger, Yuntao Bai, Trenton Bricken, Timothy Maxwell, Nicholas Schiefer, James Sully, Alex Tamkin, Tamera Lanham, Karina Nguyen, Tomek Korbak, Jared Kaplan, Deep Ganguli, Samuel Bowman, Ethan Perez, Roger Grosse, David Duvenaud", "tldr": "", "abstract": "We investigate a family of simple long-context attacks on large language models: prompting with hundreds of demonstrations of undesirable behavior. This attack is newly feasible with the larger context windows recently deployed by language model providers like Google DeepMind, OpenAI and Anthropic. We find that in diverse, realistic circumstances, the effectiveness of this attack follows a power law, up to hundreds of shots. We demonstrate the success of this attack on the most widely used state-of-the-art closed-weight models, and across various tasks. Our results suggest very long contexts present a rich new attack surface for LLMs.", "keywords": "large language models;long context;robustness;jailbreaks;in-context learning", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Cem Anil;Esin DURMUS;Nina Rimsky;Mrinank Sharma;Joe Benton;Sandipan Kundu;Joshua Batson;Meg Tong;Jesse Mu;Daniel J Ford;Francesco Mosconi;Rajashree Agrawal;Rylan Schaeffer;Naomi Bashkansky;Samuel Svenningsen;Mike Lambert;Ansh Radhakrishnan;Carson Denison;Evan J Hubinger;Yuntao Bai;Trenton Bricken;Timothy Maxwell;Nicholas Schiefer;James Sully;Alex Tamkin;Tamera Lanham;Karina Nguyen;Tomasz Korbak;Jared Kaplan;Deep Ganguli;Samuel R. Bowman;Ethan Perez;Roger Baker Grosse;David Duvenaud", "authorids": "~Cem_Anil1;~Esin_DURMUS2;~Nina_Rimsky1;~Mrinank_Sharma1;~Joe_Benton1;~Sandipan_Kundu1;~Joshua_Batson1;~Meg_Tong1;~Jesse_Mu1;~Daniel_J_Ford1;~Francesco_Mosconi1;~Rajashree_Agrawal1;~Rylan_Schaeffer2;~Naomi_Bashkansky1;~Samuel_Svenningsen1;~Mike_Lambert1;~Ansh_Radhakrishnan1;~Carson_Denison1;~Evan_J_Hubinger1;~Yuntao_Bai1;~Trenton_Bricken1;~Timothy_Maxwell1;~Nicholas_Schiefer1;~James_Sully1;~Alex_Tamkin1;~Tamera_Lanham1;~Karina_Nguyen2;~Tomasz_Korbak1;~Jared_Kaplan2;~Deep_Ganguli2;~Samuel_R._Bowman1;~Ethan_Perez1;~Roger_Baker_Grosse1;~David_Duvenaud2", "gender": "M;F;F;;M;;;;;;M;F;M;F;Not Specified;;;M;M;M;;;;M;;F;F;M;;M;;M;M;M", "homepage": "https://www.cs.toronto.edu/~anilcem/;https://esdurmus.github.io/;https://ninapanickssery.com;https://mrinanksharma.github.io/;https://joejbenton.com/;;;https://www.megtong.com/;https://www.jesse.mu/;https://www.mathgenealogy.org/id.php?id=125209;https://about.me/fmosconi;https://www.monsoonmath.org/;https://rylanschaeffer.github.io;https://naomibashkansky.com/;https://github.com/altock;;;;https://www.alignmentforum.org/users/evhub;https://github.com/ytbai;https://trentbrick.github.io/;https://timmaxwell.org/;https://nicholasschiefer.com/;;;;https://karinanguyen.com/;https://tomekkorbak.com;https://github.com/google/BIG-bench;;;http://ethanperez.net;http://www.cs.toronto.edu/~rgrosse/;https://www.cs.toronto.edu/~duvenaud/", "dblp": "218/6350;;;254/2914;;;;;205/9022;;;346/4882.html;280/1341;;;;;;;308/0881;;;;;;;;209/9713.html;;;;192/1812;26/7058;86/9380", "google_scholar": "1VDV6ZEAAAAJ;;6-_i-jsAAAAJ;https://scholar.google.co.uk/citations?user=5gslw-MAAAAJ;ywp_eYsAAAAJ;;;_pTshvoAAAAJ;djLcGEQAAAAJ;;-3l8fWEAAAAJ;41ZE3NwAAAAJ;6tMEGz8AAAAJ;;;;;;;r7GUEVsAAAAJ;CP6aLusAAAAJ;;xerhYrsAAAAJ;naSZjgoAAAAJ;;;;YQ5rrk4AAAAJ;;rG3xW3UAAAAJ;;https://scholar.google.ca/citations?user=za0-taQAAAAJ;xgQd1qgAAAAJ;https://scholar.google.ca/citations?user=ZLpO3XQAAAAJ", "orcid": ";;;0000-0002-4304-7963;0000-0002-2103-6112;;;;0000-0002-0812-2710;;;;;;;;;;;;;;;0000-0002-4451-5386;;;;0000-0002-6258-2013;;;;;;", "linkedin": ";;nina-panickssery/;;joe-benton-686420157/;;;;jayelm;daniel-ford-764608;;;rylanschaeffer/;naomibas/;samsven/;mike-lambert-dancer;ansh-radhakrishnan-291753184;carson-denison-a86016218/;;yuntao-bai-3039bb138/;;;;;;tamera-lanham;;tomaszkorbak/;;;;https://linkedin.com/in/ethanjperez;;", "or_profile": "~Cem_Anil1;~Esin_DURMUS2;~Nina_Rimsky1;~Mrinank_Sharma1;~Joe_Benton1;~Sandipan_Kundu1;~Joshua_Batson1;~Meg_Tong1;~Jesse_Mu1;~Daniel_J_Ford1;~Francesco_Mosconi1;~Rajashree_Agrawal1;~Rylan_Schaeffer2;~Naomi_Bashkansky1;~Samuel_Svenningsen1;~Mike_Lambert1;~Ansh_Radhakrishnan1;~Carson_Denison1;~Evan_J_Hubinger1;~Yuntao_Bai1;~Trenton_Bricken1;~Timothy_Maxwell1;~Nicholas_Schiefer1;~James_Sully1;~Alex_Tamkin1;~Tamera_Lanham1;~Karina_Nguyen2;~Tomasz_Korbak1;~Jared_Kaplan2;~Deep_Ganguli2;~Samuel_R._Bowman1;~Ethan_Perez1;~Roger_Baker_Grosse1;~David_Duvenaud2", "aff": "Toronto University;Anthropic;Anthropic;;Anthropic;;;;Anthropic;Anthropic PBC;Anthropic PBC;Reed College;Meta Generative AI;Harvard University;Constellation;Anthropic;;Anthropic;Anthropic;Anthropic;Harvard University;Anthropic;Anthropic;Anthropic;;Anthropic;Anthropic;Anthropic;;Anthropic;;New York University;Vector Institute;Anthropic", "aff_domain": "utoronto.ca;anthropic.com;anthropic.com;;anthropic.com;;;;anthropic.com;anthropic.com;anthropic.com;reed.edu;meta.com;harvard.edu;constellation.org;anthropic.com;;anthropic.com;anthropic.com;anthropic.com;harvard.edu;anthropic.com;anthropic.com;anthropic.com;;anthropic.com;anthropic.com;anthropic.com;;anthropic.com;;nyu.edu;vectorinstitute.ai;anthropic.com", "position": "PhD student;Researcher;Researcher;;Researcher;;;;Researcher;Researcher;Researcher;Undergrad student;Intern;Undergrad student;Researcher;Researcher;;Researcher;Researcher;Principal Researcher;PhD student;Researcher;Researcher;Researcher;;Researcher;Researcher;Researcher;;Researcher;;Researcher;Faculty Member;Researcher", "bibtex": "@inproceedings{\nanil2024manyshot,\ntitle={Many-shot Jailbreaking},\nauthor={Cem Anil and Esin DURMUS and Nina Rimsky and Mrinank Sharma and Joe Benton and Sandipan Kundu and Joshua Batson and Meg Tong and Jesse Mu and Daniel J Ford and Francesco Mosconi and Rajashree Agrawal and Rylan Schaeffer and Naomi Bashkansky and Samuel Svenningsen and Mike Lambert and Ansh Radhakrishnan and Carson Denison and Evan J Hubinger and Yuntao Bai and Trenton Bricken and Timothy Maxwell and Nicholas Schiefer and James Sully and Alex Tamkin and Tamera Lanham and Karina Nguyen and Tomasz Korbak and Jared Kaplan and Deep Ganguli and Samuel R. Bowman and Ethan Perez and Roger Baker Grosse and David Duvenaud},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=cw5mgd71jW}\n}", "github": "", "reviewers": "yrcB;HzZf;7j23;FGBD;3GmK", "pdf_size": 2064274, "rating": "6;6;6;7;7", "confidence": "4;4;4;4;4", "soundness": "3;3;3;4;3", "novelty": "3;2;3;4;4", "presentation": "3;3;2;3;4", "wc_summary": "99;104;105;106;79", "wc_strengths": "42;51;163;79;87", "wc_weaknesses": "218;63;157;127;60", "wc_questions": "2;12;137;212;2", "wc_limitations": "60;23;23;104;4", "wc_review": "421;253;585;628;232", "wc_reply_reviewers": "201;20;30;391;12", "wc_reply_authors": "306;0;0;448;0", "reply_reviewers": "2;1;1;1;1", "reply_authors": "2;1;1;2;1", "rating_avg": [ 6.4, 0.48989794855663565 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.2, 0.7483314773547882 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 98.6, 10.091580649234292 ], "wc_strengths_avg": [ 84.4, 42.72516822670216 ], "wc_weaknesses_avg": [ 125.0, 59.575162609933344 ], "wc_questions_avg": [ 73.0, 86.27861844049197 ], "wc_limitations_avg": [ 42.8, 35.58314207598874 ], "wc_review_avg": [ 423.8, 163.49360843776125 ], "wc_reply_reviewers_avg": [ 130.8, 147.7719865197731 ], "wc_reply_authors_avg": [ 150.8, 190.07198636306194 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 34, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 121, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6746203427349483383&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "utoronto.ca;anthropic.com;anthropic.com;;anthropic.com;;;;anthropic.com;anthropic.com;anthropic.com;reed.edu;meta.com;harvard.edu;constellation.org;anthropic.com;;anthropic.com;anthropic.com;anthropic.com;harvard.edu;anthropic.com;anthropic.com;anthropic.com;;anthropic.com;anthropic.com;anthropic.com;;anthropic.com;;nyu.edu;vectorinstitute.ai;anthropic.com", "author_num": 34, "aff_unique_index": "0;1;1;1;1;2;2;3;4;5;6;1;1;1;1;5;1;1;1;1;1;1;1;7;8;1", "aff_unique_norm": "University of Toronto;Anthropic;Anthropic PBC;Reed College;Meta;Harvard University;Constellation;New York University;Vector Institute", "aff_unique_dep": ";;;;Generative AI;;;;", "aff_unique_url": "https://www.utoronto.ca;https://www.anthropic.com;https://www.anthropic.com;https://www.reed.edu;https://meta.com;https://www.harvard.edu;;https://www.nyu.edu;https://vectorinstitute.ai/", "aff_unique_abbr": "U of T;Anthropic;Anthropic;Reed;Meta;Harvard;;NYU;Vector Institute", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;0;1", "aff_country_unique": "Canada;United States;" }, { "title": "CharXiv: Charting Gaps in Realistic Chart Understanding in Multimodal LLMs", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97598", "id": "cy8mq7QYae", "proceeding": "", "pdf": "https://openreview.net/pdf?id=cy8mq7QYae", "openreview": "https://openreview.net/forum?id=cy8mq7QYae", "poster": "", "project": "", "author_site": "Zirui Wang, Mengzhou Xia, Luxi He, Howard Chen, Yitao Liu, Richard Zhu, Kaiqu Liang, Xindi Wu, Haotian Liu, Sadhika Malladi, Chevalier, Sanjeev Arora, Danqi Chen", "tldr": "", "abstract": "Chart understanding plays a pivotal role when applying Multimodal Large Language Models (MLLMs) to real-world tasks such as analyzing scientific papers or financial reports. However, existing datasets often focus on oversimplified and homogeneous charts with template-based questions, leading to an overly optimistic measure of progress. We demonstrate that although open-source models can appear to outperform strong proprietary models on these benchmarks, a simple stress test with slightly different charts or questions deteriorates performance by up to 34.5%. In this work, we propose CharXiv, a comprehensive evaluation suite involving 2,323 natural, challenging, and diverse charts from scientific papers. CharXiv includes two types of questions: 1) descriptive questions about examining basic chart elements and 2) reasoning questions that require synthesizing information across complex visual elements in the chart. To ensure quality, all charts and questions are handpicked, curated, and verified by human experts. Our results reveal a substantial, previously underestimated gap between the reasoning skills of the strongest proprietary model (i.e., GPT-4o), which achieves 47.1% accuracy, and the strongest open-source model (i.e., InternVL Chat V1.5), which achieves 29.2%. All models lag far behind human performance of 80.5%, underscoring weaknesses in the chart understanding capabilities of existing MLLMs. We hope that CharXiv facilitates future research on MLLM chart understanding by providing a more realistic and faithful measure of progress. Project website: https://charxiv.github.io/", "keywords": "Multimodal Large Language Models;Chart Understanding;Vision-Language;Benchmark", "primary_area": "", "supplementary_material": "", "author": "Zirui Wang;Mengzhou Xia;Luxi He;Howard Chen;Yitao Liu;Richard Zhu;Kaiqu Liang;Xindi Wu;Haotian Liu;Sadhika Malladi;Alexis Chevalier;Sanjeev Arora;Danqi Chen", "authorids": "~Zirui_Wang5;~Mengzhou_Xia1;~Luxi_He1;~Howard_Chen1;~Yitao_Liu2;~Richard_Zhu2;~Kaiqu_Liang1;~Xindi_Wu1;~Haotian_Liu1;~Sadhika_Malladi2;~Alexis_Chevalier1;~Sanjeev_Arora1;~Danqi_Chen1", "gender": "M;F;F;M;M;Not Specified;;F;;F;Not Specified;;F", "homepage": "https://zwcolin.github.io;https://xiamengzhou.github.io/;;https://howard50b.github.io/;https://yitaoliu17.com/;http://richardzhu123.github.io;https://kaiquliang.github.io/;https://xindiwu.github.io/;https://hliu.cc;https://www.cs.princeton.edu/~smalladi/;https://www.ias.edu/scholars/alexis-chevalier;http://www.cs.princeton.edu/~arora/;https://www.cs.princeton.edu/~danqic/", "dblp": ";241/9329;338/9240;06/2061;128/1821;;;235/0784;66/10511;176/9810;;a/SArora;87/7949", "google_scholar": "https://scholar.google.com/citations?hl=en;zyJn1IcAAAAJ;;wsNa_W4AAAAJ;;;hmqvdJgAAAAJ;hvnUnrUAAAAJ;Xo6wfnQAAAAJ;9HCmTcwAAAAJ;;RUP4S68AAAAJ;sVR8ktkAAAAJ", "orcid": "0009-0005-1329-5607;;;;;;;;;;;;", "linkedin": "zwcolin/;;lucy-he-3051111a9/;;%E4%BA%A6%E9%9F%AC-%E5%88%98-9ba124257/;richardzhu01/;kaiqu-liang-00042b195/?originalSubdomain=ca;;;;alexchvl;;", "or_profile": "~Zirui_Wang5;~Mengzhou_Xia1;~Luxi_He1;~Howard_Chen1;~Yitao_Liu2;~Richard_Zhu2;~Kaiqu_Liang1;~Xindi_Wu1;~Haotian_Liu1;~Sadhika_Malladi2;~Alexis_Chevalier1;~Sanjeev_Arora1;~Danqi_Chen1", "aff": "Princeton University;Princeton University;Department of Computer Science, Princeton University;Princeton University;The University of Hong Kong;Princeton University;Princeton University;Princeton University;Department of Computer Science, University of Wisconsin - Madison;Princeton University;Princeton University;Princeton University;Princeton University", "aff_domain": "princeton.edu;princeton.edu;cs.princeton.edu;princeton.edu;hku.hk;princeton.edu;princeton.edu;cs.princeton.edu;cs.wisc.edu;princeton.edu;princeton.edu;princeton.edu;cs.princeton.edu", "position": "MS student;PhD student;PhD student;PhD student;Undergrad student;MS student;PhD student;PhD student;PhD student;PhD student;Postdoc;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nwang2024charxiv,\ntitle={CharXiv: Charting Gaps in Realistic Chart Understanding in Multimodal {LLM}s},\nauthor={Zirui Wang and Mengzhou Xia and Luxi He and Howard Chen and Yitao Liu and Richard Zhu and Kaiqu Liang and Xindi Wu and Haotian Liu and Sadhika Malladi and Alexis Chevalier and Sanjeev Arora and Danqi Chen},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=cy8mq7QYae}\n}", "github": "", "reviewers": "vWvR;Bnih;ZMD4", "pdf_size": 13324366, "rating": "4;7;8", "confidence": "5;5;4", "wc_summary_and_contributions": "141;50;64", "wc_strengths": "2;93;131", "wc_improvement": "36;125;28", "wc_limitations": "9;57;10", "wc_correctness": "6;42;3", "wc_clarity": "1;19;9", "wc_relation_to_prior_work": "1;60;11", "wc_documentation": "17;75;17", "wc_additional_feedback": "1;1;1", "wc_review": "214;522;274", "wc_reply_reviewers": "0;0;63", "wc_reply_authors": "0;0;30", "reply_reviewers": "0;0;1", "reply_authors": "1;1;2", "rating_avg": [ 6.333333333333333, 1.699673171197595 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 85.0, 40.00833246545858 ], "wc_strengths_avg": [ 75.33333333333333, 54.12536887223547 ], "wc_improvement_avg": [ 63.0, 43.96210489349511 ], "wc_limitations_avg": [ 25.333333333333332, 22.395436042987768 ], "wc_correctness_avg": [ 17.0, 17.72004514666935 ], "wc_clarity_avg": [ 9.666666666666666, 7.363574011458175 ], "wc_relation_to_prior_work_avg": [ 24.0, 25.78113005022601 ], "wc_documentation_avg": [ 36.333333333333336, 27.341462205879836 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 336.6666666666667, 133.31999933326665 ], "wc_reply_reviewers_avg": [ 21.0, 29.698484809834994 ], "wc_reply_authors_avg": [ 10.0, 14.142135623730951 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 13, 0 ], "corr_rating_confidence": -0.6933752452815364, "gs_citation": 41, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4440957401833521450&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "princeton.edu;princeton.edu;cs.princeton.edu;princeton.edu;hku.hk;princeton.edu;princeton.edu;cs.princeton.edu;cs.wisc.edu;princeton.edu;princeton.edu;princeton.edu;cs.princeton.edu", "author_num": 13, "aff_unique_index": "0;0;0;0;1;0;0;0;2;0;0;0;0", "aff_unique_norm": "Princeton University;University of Hong Kong;University of Wisconsin-Madison", "aff_unique_dep": ";;Department of Computer Science", "aff_unique_url": "https://www.princeton.edu;https://www.hku.hk;https://www.wisc.edu", "aff_unique_abbr": "Princeton;HKU;UW-Madison", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Hong Kong SAR;Madison", "aff_country_unique_index": "0;0;0;0;1;0;0;0;0;0;0;0;0", "aff_country_unique": "United States;China" }, { "title": "Can neural operators always be continuously discretized?", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94369", "id": "cyJxphdw3B", "proceeding": "", "pdf": "https://openreview.net/pdf?id=cyJxphdw3B", "openreview": "https://openreview.net/forum?id=cyJxphdw3B", "poster": "", "project": "", "author_site": "Takashi Furuya, Michael Puthawala, Matti Lassas, Maarten V. de Hoop", "tldr": "", "abstract": "In this work we consider the problem of discretization of neural operators in a general setting. Using category theory, we give a no-go theorem that shows that diffeomorphisms between Hilbert spaces may not admit any continuous approximations by diffeomorphisms on finite spaces, even if the discretization is non-linear. This shows how infinite-dimensional Hilbert spaces and finite-dimensional vector spaces fundamentally differ. A key take-away is that to obtain discretization invariance, considerable effort is needed to ensure that finite-dimensional approximations of neural operator converge not only as sequences of functions, but that their representations converge in a suitable sense as well. With this perspective, we give several positive results. We first show that strongly monotone diffeomorphism operators always admit finite-dimensional strongly monotone diffeomorphisms. Next we show that bilipschitz neural operators may always be written via the repeated alternating composition of strongly monotone neural operators and invertible linear maps. We also show that such operators may be inverted locally via iteration provided that such inverse exists. Finally, we conclude by showing how our framework may be used `out of the box' to prove quantitative approximation results for discretization of neural operators.", "keywords": "Neural Operators;Invertibility;Category Theory", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Takashi Furuya;Michael Anthony Puthawala;Matti Lassas;Maarten V. de Hoop", "authorids": "~Takashi_Furuya1;~Michael_Anthony_Puthawala1;~Matti_Lassas1;~Maarten_V._de_Hoop2", "gender": "M;M;M;", "homepage": ";https://scholar.google.com/citations?user=ntwCDpoAAAAJ&hl=en;https://www.mv.helsinki.fi/home/lassas/index.html;http://maartendehoop.rice.edu/", "dblp": ";;;60/4525", "google_scholar": "https://scholar.google.co.jp/citations?user=e3YJUQoAAAAJ;ntwCDpoAAAAJ;;", "orcid": "0000-0001-6132-6846;;0000-0003-2043-3156;", "linkedin": ";;;", "or_profile": "~Takashi_Furuya1;~Michael_Anthony_Puthawala1;~Matti_Lassas1;~Maarten_v._de_Hoop1", "aff": "Shimane University;South Dakota State University;University of Helsinki;Rice University", "aff_domain": "shimane-u.ac.jp;sdstate.edu;helsinki.fi;rice.edu", "position": "Assistant Professor;Assistant Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nfuruya2024can,\ntitle={Can neural operators always be continuously discretized?},\nauthor={Takashi Furuya and Michael Anthony Puthawala and Matti Lassas and Maarten V. de Hoop},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=cyJxphdw3B}\n}", "github": "", "reviewers": "mJde;47x8;KEed;K5mB", "pdf_size": 722039, "rating": "4;6;6;7", "confidence": "2;2;3;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "1;3;3;3", "wc_summary": "111;84;73;58", "wc_strengths": "17;79;72;19", "wc_weaknesses": "113;79;168;76", "wc_questions": "124;101;9;4", "wc_limitations": "27;1;9;2", "wc_review": "392;344;331;159", "wc_reply_reviewers": "54;46;89;9", "wc_reply_authors": "0;0;536;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;2;1", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 81.5, 19.371370627810517 ], "wc_strengths_avg": [ 46.75, 28.86498744153546 ], "wc_weaknesses_avg": [ 109.0, 37.03376837428241 ], "wc_questions_avg": [ 59.5, 53.649324320069496 ], "wc_limitations_avg": [ 9.75, 10.425329730996522 ], "wc_review_avg": [ 306.5, 88.13767639324286 ], "wc_reply_reviewers_avg": [ 49.5, 28.429737951659 ], "wc_reply_authors_avg": [ 134.0, 232.09480821422954 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.7608859102526822, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:e0emYyQrF_MJ:scholar.google.com/&scioq=Can+neural+operators+always+be+continuously+discretized%3F&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "shimane-u.ac.jp;sdstate.edu;helsinki.fi;rice.edu", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Shimane University;South Dakota State University;University of Helsinki;Rice University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.shimane-u.ac.jp;https://www.sdsu.edu;https://www.helsinki.fi;https://www.rice.edu", "aff_unique_abbr": "Shimane U;SDSU;UH;Rice", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;1", "aff_country_unique": "Japan;United States;Finland" }, { "title": "Self-Consuming Generative Models with Curated Data Provably Optimize Human Preferences", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94368", "id": "cyv0LkIaoH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=cyv0LkIaoH", "openreview": "https://openreview.net/forum?id=cyv0LkIaoH", "poster": "", "project": "", "author_site": "Damien Ferbach, Quentin Bertrand, Joey Bose, Gauthier Gidel", "tldr": "", "abstract": "The rapid progress in generative models has resulted in impressive leaps in generation quality, blurring the lines between synthetic and real data. Web-scale datasets are now prone to the inevitable contamination by synthetic data, directly impacting the training of future generated models.\n Already, some theoretical results on self-consuming generative models (a.k.a., iterative retraining) have emerged in the literature, showcasing that either model collapse or stability could be possible depending on the fraction of generated data used at each retraining step.\n However, in practice, synthetic data is often subject to human feedback and curated by users before being used and uploaded online. For instance, many interfaces of popular text-to-image generative models, such as Stable Diffusion or Midjourney, produce several variations of an image for a given query which can eventually be curated by the users.\n In this paper, we theoretically study the impact of data curation on iterated retraining of generative models and show that it can be seen as an implicit preference optimization mechanism. However, unlike standard preference optimization, the generative model does not have access to the reward function or negative samples needed for pairwise comparisons. Moreover, our study doesn't require access to the density function, only to samples. We prove that, if the data is curated according to a reward model, then the expected reward of the iterative retraining procedure is maximized. We further provide theoretical results on the stability of the retraining loop when using a positive fraction of real data at each step. Finally, we conduct illustrative experiments on both synthetic datasets and on CIFAR10 showing that such a procedure amplifies biases of the reward model.", "keywords": "retraining;curating;generative model;self-consuming", "primary_area": "generative_models", "supplementary_material": "", "author": "Damien Ferbach;Quentin Bertrand;Joey Bose;Gauthier Gidel", "authorids": "~Damien_Ferbach1;~Quentin_Bertrand1;~Joey_Bose1;~Gauthier_Gidel1", "gender": "M;M;M;M", "homepage": "https://damienferbach.github.io;https://qb3.github.io/index.html;https://joeybose.github.io/;https://gauthiergidel.github.io/", "dblp": ";;174/3372;188/6326", "google_scholar": "9ppcotkAAAAJ;Uxr3P78AAAAJ;ybPyI7IAAAAJ;https://scholar.google.fr/citations?user=bDrXQPUAAAAJ", "orcid": ";;;", "linkedin": "damien-ferbach-93940b229/?fbclid=IwAR30VoGhgQaNqYDr7U8SsRFWWEkJ3n4zmyjbRsMH9P1rPllE4Vs1zV3OknE;;;", "or_profile": "~Damien_Ferbach1;~Quentin_Bertrand1;~Joey_Bose1;~Gauthier_Gidel1", "aff": "Ecole Normale Sup\u00e9rieure de Paris;INRIA;University of Oxford;Mila - Quebec Artificial Intelligence Institute", "aff_domain": "ens.fr;inria.fr;oxford.ac.uk;mila.quebec", "position": "MS student;Researcher;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nferbach2024selfconsuming,\ntitle={Self-Consuming Generative Models with Curated Data Provably Optimize Human Preferences},\nauthor={Damien Ferbach and Quentin Bertrand and Joey Bose and Gauthier Gidel},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=cyv0LkIaoH}\n}", "github": "", "reviewers": "cgiY;ej3h;WZgs;orKH;cUrK", "pdf_size": 33956196, "rating": "6;6;6;7;7", "confidence": "4;4;4;4;4", "soundness": "3;3;3;3;3", "novelty": "2;3;2;4;3", "presentation": "2;3;3;2;4", "wc_summary": "71;35;37;69;94", "wc_strengths": "52;24;42;164;85", "wc_weaknesses": "206;617;304;169;126", "wc_questions": "62;1;39;125;2", "wc_limitations": "1;1;8;338;8", "wc_review": "392;678;430;865;315", "wc_reply_reviewers": "28;633;29;194;0", "wc_reply_authors": "0;903;0;251;0", "reply_reviewers": "1;3;1;1;0", "reply_authors": "1;3;1;2;1", "rating_avg": [ 6.4, 0.48989794855663565 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 61.2, 22.38213573366045 ], "wc_strengths_avg": [ 73.4, 49.45139027368189 ], "wc_weaknesses_avg": [ 284.4, 176.38662080781523 ], "wc_questions_avg": [ 45.8, 45.84059336439702 ], "wc_limitations_avg": [ 71.2, 133.43672657855484 ], "wc_review_avg": [ 536.0, 204.55708249777126 ], "wc_reply_reviewers_avg": [ 176.8, 238.18429839097286 ], "wc_reply_authors_avg": [ 230.8, 349.87620667887666 ], "reply_reviewers_avg": [ 1.2, 0.9797958971132713 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17376944766025435971&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "ens.fr;inria.fr;oxford.ac.uk;mila.quebec", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Ecole Normale Sup\u00e9rieure de Paris;INRIA;University of Oxford;Quebec Artificial Intelligence Institute", "aff_unique_dep": ";;;Artificial Intelligence", "aff_unique_url": "https://www.ens.fr;https://www.inria.fr;https://www.ox.ac.uk;https://mila.quebec", "aff_unique_abbr": "ENS Paris;INRIA;Oxford;Mila", "aff_campus_unique_index": "0", "aff_campus_unique": "Paris;", "aff_country_unique_index": "0;0;1;2", "aff_country_unique": "France;United Kingdom;Canada" }, { "title": "FVEL: Interactive Formal Verification Environment with Large Language Models via Theorem Proving", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97597", "id": "d0gMFgrYFB", "proceeding": "", "pdf": "https://openreview.net/pdf?id=d0gMFgrYFB", "openreview": "https://openreview.net/forum?id=d0gMFgrYFB", "poster": "/media/PosterPDFs/NeurIPS%202024/97597.png?t=1733750308.63786", "project": "", "author_site": "Xiaohan Lin, Qingxing Cao, Yinya Huang, Haiming Wang, Jianqiao Lu, Zhengying Liu, Linqi Song, Xiaodan Liang", "tldr": "", "abstract": "Formal verification (FV) has witnessed growing significance with current emerging\u00a0program synthesis by the evolving large language models (LLMs). However,\u00a0current formal verification mainly resorts to symbolic verifiers or hand-craft rules,\u00a0resulting in limitations for extensive and flexible verification. On the other hand,\u00a0formal languages for automated theorem proving, such as Isabelle, as another line\u00a0of rigorous verification, are maintained with comprehensive rules and theorems. In\u00a0this paper, we propose FVEL, an interactive Formal Verification Environment\u00a0with LLMs. Specifically, FVEL transforms a given code to be verified into Isabelle,\u00a0and then conducts verification via neural automated theorem proving with an LLM.\u00a0The joined paradigm leverages the rigorous yet abundant formulated and organized\u00a0rules in Isabelle and is also convenient for introducing and adjusting cutting-edge\u00a0LLMs. To achieve this goal, we extract a large-scale FVELER. The FVELER\u00a0dataset includes code dependencies and verification processes that are formulated in\u00a0Isabelle, containing 758 theories, 29,304 lemmas, and 201,498 proof steps in total\u00a0with in-depth dependencies. We benchmark FVELER in the FVEL environment by first fine-tuning LLMs with FVELER and then evaluating them on Code2Inv\u00a0and SV-COMP. The results show that FVEL with FVELER fine-tuned Llama3-8B solves 17.39% (69\u219281) more problems, and Mistral-7B 12% (75\u219284) more\u00a0problems in SV-COMP. And the proportion of proof errors is reduced. Project\u00a0page: https://fveler.github.io/.", "keywords": "formal verification;automated theorem proving;Isabelle", "primary_area": "", "supplementary_material": "/attachment/6b2c317e5a9b9a09384d5c52b2c9a5b97217cfd8.pdf", "author": "Xiaohan Lin;Qingxing Cao;Yinya Huang;Haiming Wang;Jianqiao Lu;Zhengying Liu;Linqi Song;Xiaodan Liang", "authorids": "~Xiaohan_Lin2;~Qingxing_Cao1;~Yinya_Huang1;~Haiming_Wang1;~Jianqiao_Lu1;~Zhengying_Liu2;~Linqi_Song1;~Xiaodan_Liang2", "gender": "M;M;;M;M;M;M;F", "homepage": "https://xiaohlim.github.io/;;https://eleanor-h.github.io/;;https://jianqiaolu.github.io/;;https://sites.google.com/site/aisquaredlab/;https://www.sysu-hcp.net/", "dblp": ";149/7615;282/1562;97/604;358/4791;241/1782;137/7963.html;", "google_scholar": ";flOBrd8AAAAJ;dWStaRIAAAAJ;zDPqP6AAAAAJ;uIW6d6AAAAAJ;http:// DFme0joAAAAJ;UcGN3MoAAAAJ;voxznZAAAAAJ", "orcid": ";;0000-0002-0686-0832;;;;0000-0003-2756-4984;", "linkedin": ";;;;jianqiao-lu-308620201/;;;", "or_profile": "~Xiaohan_Lin2;~Qingxing_Cao1;~Yinya_Huang1;~Haiming_Wang1;~Jianqiao_Lu1;~Zhengying_Liu2;~Linqi_Song1;~Xiaodan_Liang2", "aff": "SUN YAT-SEN UNIVERSITY;SUN YAT-SEN UNIVERSITY, Tsinghua University;City University of Hong Kong;SUN YAT-SEN UNIVERSITY;University of Hong Kong;Huawei Technologies Ltd.;City University of Hong Kong;SUN YAT-SEN UNIVERSITY", "aff_domain": "sysu.edu.cn;sysu.edu.cn;cityu.edu.hk;sysu.edu.cn;hku.hk;huawei.com;cityu.edu.hk;sysu.edu.cn", "position": "MS student;Postdoc;Postdoc;PhD student;PhD student;Researcher;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nlin2024fvel,\ntitle={{FVEL}: Interactive Formal Verification Environment with Large Language Models via Theorem Proving},\nauthor={Xiaohan Lin and Qingxing Cao and Yinya Huang and Haiming Wang and Jianqiao Lu and Zhengying Liu and Linqi Song and Xiaodan Liang},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=d0gMFgrYFB}\n}", "github": "", "reviewers": "XkcJ;WawV;BbdW;Do8m;9xRj", "pdf_size": 1785376, "rating": "6;6;7;7;7", "confidence": "2;4;3;3;4", "wc_summary_and_contributions": "51;44;79;51;41", "wc_strengths": "3;2;51;4;71", "wc_improvement": "3;2;174;4;200", "wc_limitations": "4;2;6;78;19", "wc_correctness": "1;1;6;1;6", "wc_clarity": "1;1;20;1;73", "wc_relation_to_prior_work": "1;1;8;1;7", "wc_documentation": "1;1;14;6;5", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "66;55;359;147;423", "wc_reply_reviewers": "19;15;12;87;586", "wc_reply_authors": "0;0;0;0;771", "reply_reviewers": "1;1;1;1;3", "reply_authors": "1;1;1;1;3", "rating_avg": [ 6.6, 0.48989794855663565 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_and_contributions_avg": [ 53.2, 13.481839637082173 ], "wc_strengths_avg": [ 26.2, 29.11631844859511 ], "wc_improvement_avg": [ 76.6, 90.51762259361432 ], "wc_limitations_avg": [ 21.8, 28.722116913625992 ], "wc_correctness_avg": [ 3.0, 2.449489742783178 ], "wc_clarity_avg": [ 19.2, 27.888348821685373 ], "wc_relation_to_prior_work_avg": [ 3.6, 3.2 ], "wc_documentation_avg": [ 5.4, 4.758150901348127 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 210.0, 152.51229458637098 ], "wc_reply_reviewers_avg": [ 143.8, 222.84649425108756 ], "wc_reply_authors_avg": [ 154.2, 308.4 ], "reply_reviewers_avg": [ 1.4, 0.8 ], "reply_authors_avg": [ 1.4, 0.8 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.21821789023599236, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8579137252735378388&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 8, "email": "sysu.edu.cn;sysu.edu.cn;cityu.edu.hk;sysu.edu.cn;hku.hk;huawei.com;cityu.edu.hk;sysu.edu.cn", "author_num": 8, "aff_unique_index": "0;0;1;0;2;3;1;0", "aff_unique_norm": "Sun Yat-sen University;City University of Hong Kong;University of Hong Kong;Huawei", "aff_unique_dep": ";;;Huawei Technologies", "aff_unique_url": "http://www.sysu.edu.cn;https://www.cityu.edu.hk;https://www.hku.hk;https://www.huawei.com", "aff_unique_abbr": "SYSU;CityU;HKU;Huawei", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "d1Pup4gkWf", "title": "SMPLOlympics: Sports Environments for Physically Simulated Humanoids", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "We present SMPLOlympics, a collection of physically simulated environments that allow humanoids to compete in a variety of Olympic sports. Sports simulation offers a rich and standardized testing ground for evaluating and improving the capabilities of learning algorithms due to the diversity and physically demanding nature of athletic activities. As humans have been competing in these sports for many years, there is also a plethora of existing knowledge on the preferred strategy to achieve better performance. To leverage these existing human demonstrations from videos and motion capture, we design our humanoid to be compatible with the widely-used SMPL and SMPL-X human models from the vision and graphics community. We provide a suite of individual sports environments, including golf, javelin throw, high jump, long jump, and hurdling, as well as competitive sports, including both 1v1 and 2v2 games such as table tennis, tennis, fencing, boxing, soccer, and basketball. Our analysis shows that combining strong motion priors with simple rewards can result in human-like behavior in various sports. By providing a unified sports benchmark and baseline implementation of state and reward designs, we hope that SMPLOlympics can help the control and animation communities achieve human-like and performant behaviors.", "keywords": "Simulated Humanoid Control;Sports;Physics Simulation", "primary_area": "", "supplementary_material": "/attachment/9f7e22e584ca73acc35b1abfe3889aee839209f7.zip", "author": "Zhengyi Luo;Jiashun Wang;Kangni Liu;Haotian Zhang;Chen Tessler;Jingbo Wang;Ye Yuan;Jinkun Cao;Zihui Lin;Fengyi Wang;Jessica K. Hodgins;Kris M. Kitani", "authorids": "~Zhengyi_Luo1;~Jiashun_Wang1;~Kangni_Liu1;~Haotian_Zhang7;~Chen_Tessler1;~Jingbo_Wang3;~Ye_Yuan5;~Jinkun_Cao1;~Zihui_Lin2;~Fengyi_Wang3;~Jessica_K._Hodgins1;~Kris_M._Kitani1", "gender": "M;M;;M;M;M;M;M;F;M;;M", "homepage": "https://zhengyiluo.github.io/;https://jiashunwang.github.io/;;https://cs.stanford.edu/~haotianz/;https://chentessler.wixsite.com/Tessler;https://scholar.google.com/citations?user=GStTsxAAAAAJ&hl=en;https://www.ye-yuan.com;https://www.jinkuncao.com;;;;http://www.cs.cmu.edu/~kkitani/", "dblp": ";260/6495;;;179/2389;10/1491-3.html;33/6315-7;224/0126;;;;42/163", "google_scholar": "lHPTxGsAAAAJ;gdO9Gb0AAAAJ;;Zp8MeiUAAAAJ;https://scholar.google.co.il/citations?user=7eLKa3IAAAAJ;GStTsxAAAAAJ;EEp82sIAAAAJ;xDtTbmQAAAAJ;;;;yv3sH74AAAAJ", "orcid": ";;;0009-0008-0293-337X;;;;;;;;0000-0002-9389-4060", "linkedin": "zhengyi-zen-luo-726156105/;;;;chentessler/;;;;zihui-georgia-lin-5b7600187/;franklinwayne/;;", "or_profile": "~Zhengyi_Luo1;~Jiashun_Wang1;~Kangni_Liu1;~Haotian_Zhang7;~Chen_Tessler1;~Jingbo_Wang3;~Ye_Yuan5;~Jinkun_Cao1;~Zihui_Lin2;~Fengyi_Wang3;~Jessica_K._Hodgins1;~Kris_M._Kitani1", "aff": "Meta Platforms, Inc.;NVIDIA;;NVIDIA;NVIDIA;Shanghai Artificial Intelligence Laboratory;NVIDIA Research;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;;Carnegie Mellon University", "aff_domain": "meta.com;nvidia.com;;nvidia.com;nvidia.com;pjlab.org.cn;nvidia.com;andrew.cmu.edu;cmu.edu;andrew.cmu.edu;;cmu.edu", "position": "Intern;Intern;;Researcher;Researcher;Researcher;Researcher;PhD student;MS student;Undergrad student;;Associate Professor", "bibtex": "@misc{\nanonymous2024smplolympics,\ntitle={{SMPLO}lympics: Sports Environments for Physically Simulated Humanoids},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=d1Pup4gkWf}\n}", "github": "", "project": "", "reviewers": "YQmi;LLTZ;whY3;SaX1", "site": "https://openreview.net/forum?id=d1Pup4gkWf", "pdf_size": 13754384, "rating": "6;6;6;6", "confidence": "2;3;3;3", "wc_summary_and_contributions": "40;33;56;142", "wc_strengths": "34;6;71;42", "wc_improvement": "293;52;62;124", "wc_limitations": "105;3;9;2", "wc_correctness": "27;6;13;2", "wc_clarity": "22;9;11;2", "wc_relation_to_prior_work": "12;6;6;2", "wc_documentation": "22;3;6;2", "wc_additional_feedback": "1;1;1;1", "wc_review": "556;119;235;319", "wc_reply_reviewers": "186;0;0;0", "wc_reply_authors": "211;120;123;104", "reply_reviewers": "1;0;0;0", "reply_authors": "4;2;2;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 67.75, 43.671357890498435 ], "wc_strengths_avg": [ 38.25, 23.155722834755128 ], "wc_improvement_avg": [ 132.75, 96.54370771831792 ], "wc_limitations_avg": [ 29.75, 43.52800822459029 ], "wc_correctness_avg": [ 12.0, 9.513148795220223 ], "wc_clarity_avg": [ 11.0, 7.176350047203662 ], "wc_relation_to_prior_work_avg": [ 6.5, 3.570714214271425 ], "wc_documentation_avg": [ 8.25, 8.073877630977572 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 307.25, 160.2129442336043 ], "wc_reply_reviewers_avg": [ 46.5, 80.5403625519528 ], "wc_reply_authors_avg": [ 139.5, 41.907636535600524 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5074707447686459652&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;1;1;2;1;3;3;3;3", "aff_unique_norm": "Meta;NVIDIA;Shanghai Artificial Intelligence Laboratory;Carnegie Mellon University", "aff_unique_dep": "Meta Platforms, Inc.;NVIDIA Corporation;;", "aff_unique_url": "https://www.meta.com;https://www.nvidia.com;http://www.shailab.org/;https://www.cmu.edu", "aff_unique_abbr": "Meta;NVIDIA;Shanghai AI Lab;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;0;0;0;0;0", "aff_country_unique": "United States;China" }, { "title": "LeDex: Training LLMs to Better Self-Debug and Explain Code", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94367", "id": "d1XrZ4EINV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=d1XrZ4EINV", "openreview": "https://openreview.net/forum?id=d1XrZ4EINV", "poster": "", "project": "", "author_site": "Nan Jiang, Xiaopeng Li, Shiqi Wang, Qiang Zhou, Soneya Hossain, Baishakhi Ray, Varun Kumar, Xiaofei Ma, Anoop Deoras", "tldr": "", "abstract": "In the domain of code generation, self-debugging is crucial. It allows LLMs to refine their generated code based on execution feedback. This is particularly important because generating correct solutions in one attempt proves challenging for complex tasks. Prior works on self-debugging mostly focus on prompting methods by providing LLMs with few-shot examples, which work poorly on small open-sourced LLMs. In this work, we propose LeDex, a training framework that significantly improves the self-debugging capability of LLMs. Intuitively, we observe that a chain of explanations on the wrong code followed by code refinement helps LLMs better analyze the wrong code and do refinement. We thus propose an automated pipeline to collect a high-quality dataset for code explanation and refinement by generating a number of explanations and refinement trajectories from the LLM itself or a larger teacher model and filtering via execution verification. We perform supervised fine-tuning (SFT) and further reinforcement learning (RL) on both success and failure trajectories with a novel reward design considering code explanation and refinement quality. SFT improves the pass@1 by up to 15.92\\% and pass@10 by 9.30\\% over four benchmarks. RL training brings additional up to 3.54\\% improvement on pass@1 and 2.55\\% improvement on pass@10. The trained LLMs show iterative refinement ability and can keep refining code continuously. Lastly, our human evaluation shows that the LLMs trained with our framework generate more useful code explanations and help developers better understand bugs in source code.", "keywords": "Large Language Model;Code Generation;Code Refinement;Code Explanation;Supervised Finetuning;Reinforcement Learning", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Nan Jiang;Xiaopeng Li;Shiqi Wang;Qiang Zhou;Soneya Binta Hossain;Baishakhi Ray;Varun Kumar;Xiaofei Ma;Anoop Deoras", "authorids": "~Nan_Jiang14;~Xiaopeng_Li1;~Shiqi_Wang2;~Qiang_Zhou7;~Soneya_Binta_Hossain1;~Baishakhi_Ray2;~Varun_Kumar3;~Xiaofei_Ma1;~Anoop_Deoras1", "gender": "M;M;M;;F;F;M;M;M", "homepage": "https://jiang719.github.io/nanjiang/;http://eelxpeng.github.io/;https://shiqi-wang.github.io;;https://soneyahossain.github.io;http://rayb.info/;https://varunkumar-dev.github.io/;https://www.amazon.science/author/xiaofei-ma;", "dblp": ";;58/9145-2;;;74/1969;;;55/8761", "google_scholar": "cFT1sL8AAAAJ;https://scholar.google.com.hk/citations?user=vUZu9msAAAAJ;u_MzXeMAAAAJ;;xDDfwB8AAAAJ;https://scholar.google.com.tw/citations?user=VaAEb5YAAAAJ;d-La2lQAAAAJ;Pc2SfvMAAAAJ;QF_rhCIAAAAJ", "orcid": ";;0000-0002-6338-1432;;;;;;", "linkedin": ";eelxpeng/;tcwangshiqi/;qiang-zhou-8188b155;;;varunin/;xiaofei-ma-b3627928;anoopdeoras/", "or_profile": "~Nan_Jiang14;~Xiaopeng_Li1;~Shiqi_Wang2;~Qiang_Zhou7;~Soneya_Binta_Hossain1;~Baishakhi_Ray2;~Varun_Kumar3;~Xiaofei_Ma1;~Anoop_Deoras1", "aff": "Purdue University;Amazon;Amazon;Amazon;University of Virginia;Columbia University;Amazon;Amazon Web Services;Amazon", "aff_domain": "purdue.edu;amazon.com;amazon.com;amazon.com;uva.edu;columbia.edu;amazon.com;amazon.com;amazon.com", "position": "PhD student;Applied Scientist;Researcher;Researcher;PhD student;Assistant Professor;Principal Researcher;Applied Science Manager;Principal Researcher", "bibtex": "@inproceedings{\njiang2024ledex,\ntitle={LeDex: Training {LLM}s to Better Self-Debug and Explain Code},\nauthor={Nan Jiang and Xiaopeng Li and Shiqi Wang and Qiang Zhou and Soneya Binta Hossain and Baishakhi Ray and Varun Kumar and Xiaofei Ma and Anoop Deoras},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=d1XrZ4EINV}\n}", "github": "", "reviewers": "2CSS;jfHT;osNA;uGRw;Dkve", "pdf_size": 2942868, "rating": "5;6;7;7;8", "confidence": "4;4;4;5;4", "soundness": "3;3;3;4;3", "novelty": "3;3;2;3;3", "presentation": "3;3;3;3;4", "wc_summary": "54;103;102;121;111", "wc_strengths": "43;116;45;38;88", "wc_weaknesses": "284;151;357;19;118", "wc_questions": "38;207;172;36;236", "wc_limitations": "10;43;1;1;1", "wc_review": "429;620;677;215;554", "wc_reply_reviewers": "314;0;185;0;82", "wc_reply_authors": "483;0;731;0;0", "reply_reviewers": "2;0;2;0;1", "reply_authors": "2;1;2;1;1", "rating_avg": [ 6.6, 1.0198039027185568 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 98.2, 23.12920232087566 ], "wc_strengths_avg": [ 66.0, 30.78311225331188 ], "wc_weaknesses_avg": [ 185.8, 120.48468782380606 ], "wc_questions_avg": [ 137.8, 84.76414336262711 ], "wc_limitations_avg": [ 11.2, 16.277591959500644 ], "wc_review_avg": [ 499.0, 164.25955071167095 ], "wc_reply_reviewers_avg": [ 116.2, 120.02733022107924 ], "wc_reply_authors_avg": [ 242.8, 307.5356239527382 ], "reply_reviewers_avg": [ 1.0, 0.8944271909999159 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.1961161351381841, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=509179852627466867&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": "purdue.edu;amazon.com;amazon.com;amazon.com;uva.edu;columbia.edu;amazon.com;amazon.com;amazon.com", "author_num": 9, "aff_unique_index": "0;1;1;1;2;3;1;1;1", "aff_unique_norm": "Purdue University;Amazon;University of Virginia;Columbia University", "aff_unique_dep": ";Amazon.com, Inc.;;", "aff_unique_url": "https://www.purdue.edu;https://www.amazon.com;https://www.virginia.edu;https://www.columbia.edu", "aff_unique_abbr": "Purdue;Amazon;UVA;Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Knowledge Graph Completion by Intermediate Variables Regularization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94366", "id": "d226uyWYUo", "proceeding": "", "pdf": "https://openreview.net/pdf?id=d226uyWYUo", "openreview": "https://openreview.net/forum?id=d226uyWYUo", "poster": "/media/PosterPDFs/NeurIPS%202024/94366.png?t=1731322886.5246007", "project": "", "author_site": "Changyi Xiao, Yixin Cao", "tldr": "", "abstract": "Knowledge graph completion (KGC) can be framed as a 3-order binary tensor completion task. Tensor decomposition-based (TDB) models have demonstrated strong performance in KGC. In this paper, we provide a summary of existing TDB models and derive a general form for them, serving as a foundation for further exploration of TDB models. Despite the expressiveness of TDB models, they are prone to overfitting. Existing regularization methods merely minimize the norms of embeddings to regularize the model, leading to suboptimal performance. Therefore, we propose a novel regularization method for TDB models that addresses this limitation. The regularization is applicable to most TDB models and ensures tractable computation. Our method minimizes the norms of intermediate variables involved in the different ways of computing the predicted tensor. To support our regularization method, we provide a theoretical analysis that proves its effect in promoting low trace norm of the predicted tensor to reduce overfitting. Finally, we conduct experiments to verify the effectiveness of our regularization technique as well as the reliability of our theoretical analysis. The code is available at https://github.com/changyi7231/IVR.", "keywords": "Knowledge Graph Completion;Tensor Decomposition;Regularization", "primary_area": "graph_neural_networks", "supplementary_material": "/attachment/b7c5116c56bc44024013bb87b8a06399255a975c.zip", "author": "Changyi Xiao;Yixin Cao", "authorids": "~Changyi_Xiao1;~Yixin_Cao2", "gender": "M;M", "homepage": ";https://sites.google.com/view/yixin-homepage", "dblp": "270/8871;20/8038-2", "google_scholar": "0_fwA4QAAAAJ;https://scholar.google.co.uk/citations?user=CnhTvdoAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Changyi_Xiao1;~Yixin_Cao2", "aff": "Fudan University;Singapore Management University", "aff_domain": "fudan.edu.cn;smu.edu.sg", "position": "Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nxiao2024knowledge,\ntitle={Knowledge Graph Completion by Intermediate Variables Regularization},\nauthor={Changyi Xiao and Yixin Cao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=d226uyWYUo}\n}", "github": "", "reviewers": "YvVe;fmAK;gAVe;8dJT", "pdf_size": 1281368, "rating": "5;5;6;6", "confidence": "3;4;4;3", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "161;79;78;114", "wc_strengths": "119;99;44;57", "wc_weaknesses": "411;278;11;52", "wc_questions": "195;98;61;64", "wc_limitations": "1;49;13;1", "wc_review": "887;603;207;288", "wc_reply_reviewers": "61;16;30;20", "wc_reply_authors": "50;0;50;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;2;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 108.0, 33.860005906674026 ], "wc_strengths_avg": [ 79.75, 30.441542339375644 ], "wc_weaknesses_avg": [ 188.0, 164.05334498266106 ], "wc_questions_avg": [ 104.5, 54.23329235810785 ], "wc_limitations_avg": [ 16.0, 19.672315572906 ], "wc_review_avg": [ 496.25, 269.775253683506 ], "wc_reply_reviewers_avg": [ 31.75, 17.640507362318125 ], "wc_reply_authors_avg": [ 25.0, 25.0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:7B7s8ezAvXwJ:scholar.google.com/&scioq=Knowledge+Graph+Completion+by+Intermediate+Variables+Regularization&hl=en&as_sdt=0,3", "gs_version_total": 2, "email": "fudan.edu.cn;smu.edu.sg", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Fudan University;Singapore Management University", "aff_unique_dep": ";", "aff_unique_url": "https://www.fudan.edu.cn;https://www.smu.edu.sg", "aff_unique_abbr": "Fudan;SMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "China;Singapore" }, { "title": "RankUp: Boosting Semi-Supervised Regression with an Auxiliary Ranking Classifier", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94365", "id": "d2lPM1Aczs", "proceeding": "", "pdf": "https://openreview.net/pdf?id=d2lPM1Aczs", "openreview": "https://openreview.net/forum?id=d2lPM1Aczs", "poster": "", "project": "", "author_site": "Pin-Yen Huang, Szu-Wei Fu, Yu Tsao", "tldr": "", "abstract": "State-of-the-art (SOTA) semi-supervised learning techniques, such as FixMatch and it's variants, have demonstrated impressive performance in classification tasks. However, these methods are not directly applicable to regression tasks. In this paper, we present RankUp, a simple yet effective approach that adapts existing semi-supervised classification techniques to enhance the performance of regression tasks. RankUp achieves this by converting the original regression task into a ranking problem and training it concurrently with the original regression objective. This auxiliary ranking classifier outputs a classification result, thus enabling integration with existing semi-supervised classification methods. Moreover, we introduce regression distribution alignment (RDA), a complementary technique that further enhances RankUp's performance by refining pseudo-labels through distribution alignment. Despite its simplicity, RankUp, with or without RDA, achieves SOTA results in across a range of regression benchmarks, including computer vision, audio, and natural language processing tasks. Our code and log data are open-sourced at [https://github.com/pm25/semi-supervised-regression](https://github.com/pm25/semi-supervised-regression).", "keywords": "Semi-Supervised Learning;Weakly Supervised Learning;Regression", "primary_area": "other", "supplementary_material": "", "author": "Pin-Yen Huang;Szu-Wei Fu;Yu Tsao", "authorids": "~Pin-Yen_Huang1;~Szu-Wei_Fu1;~Yu_Tsao1", "gender": ";;M", "homepage": ";https://jasonswfu.github.io/JasonFu.github.io/;https://www.citi.sinica.edu.tw/pages/yu.tsao/index_en.html", "dblp": ";160/0591;66/7146", "google_scholar": ";eSGkKm4AAAAJ;https://scholar.google.com.tw/citations?user=ZO5e5I4AAAAJ", "orcid": ";;0000-0001-6956-0418", "linkedin": ";https://www.linkedin.com/mwlite/in/szu-wei-fu-78b47817a;", "or_profile": "~Pin-Yen_Huang1;~Szu-Wei_Fu1;~Yu_Tsao1", "aff": ";Microsoft;Academia Sinica", "aff_domain": ";microsoft.com;sinica.edu.tw", "position": ";Researcher;Full Professor", "bibtex": "@inproceedings{\nhuang2024rankup,\ntitle={RankUp: Boosting Semi-Supervised Regression with an Auxiliary Ranking Classifier},\nauthor={Pin-Yen Huang and Szu-Wei Fu and Yu Tsao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=d2lPM1Aczs}\n}", "github": "", "reviewers": "41bS;Ngei;tpBB;PMad", "pdf_size": 2979070, "rating": "4;6;6;6", "confidence": "4;3;4;3", "soundness": "3;3;3;3", "novelty": "2;3;4;3", "presentation": "3;3;2;3", "wc_summary": "50;141;35;64", "wc_strengths": "24;46;9;73", "wc_weaknesses": "141;130;7;17", "wc_questions": "35;33;44;34", "wc_limitations": "1;8;2;7", "wc_review": "251;358;97;195", "wc_reply_reviewers": "0;17;9;12", "wc_reply_authors": "37;9;9;9", "reply_reviewers": "0;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 72.5, 40.85645603818325 ], "wc_strengths_avg": [ 38.0, 24.114311103574988 ], "wc_weaknesses_avg": [ 73.75, 61.973280532823175 ], "wc_questions_avg": [ 36.5, 4.387482193696061 ], "wc_limitations_avg": [ 4.5, 3.0413812651491097 ], "wc_review_avg": [ 225.25, 94.404382843171 ], "wc_reply_reviewers_avg": [ 9.5, 6.18465843842649 ], "wc_reply_authors_avg": [ 16.0, 12.12435565298214 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:oO0Rs13hCjIJ:scholar.google.com/&scioq=RankUp:+Boosting+Semi-Supervised+Regression+with+an+Auxiliary+Ranking+Classifier&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": ";microsoft.com;sinica.edu.tw", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Microsoft;Academia Sinica", "aff_unique_dep": "Microsoft Corporation;", "aff_unique_url": "https://www.microsoft.com;https://www.sinica.edu.tw", "aff_unique_abbr": "Microsoft;Academia Sinica", "aff_campus_unique_index": "1", "aff_campus_unique": ";Taiwan", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;China" }, { "title": "EPIC: Effective Prompting for Imbalanced-Class Data Synthesis in Tabular Data Classification via Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94364", "id": "d5cKDHCrFJ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=d5cKDHCrFJ", "openreview": "https://openreview.net/forum?id=d5cKDHCrFJ", "poster": "/media/PosterPDFs/NeurIPS%202024/94364.png?t=1733280726.251948", "project": "", "author_site": "Jinhee Kim, Taesung Kim, Jaegul Choo", "tldr": "", "abstract": "Large language models (LLMs) have demonstrated remarkable in-context learning capabilities across diverse applications. In this work, we explore the effectiveness of LLMs for generating realistic synthetic tabular data, identifying key prompt design elements to optimize performance. We introduce EPIC, a novel approach that leverages balanced, grouped data samples and consistent formatting with unique variable mapping to guide LLMs in generating accurate synthetic data across all classes, even for imbalanced datasets. Evaluations on real-world datasets show that EPIC achieves state-of-the-art machine learning classification performance, significantly improving generation efficiency. These findings highlight the effectiveness of EPIC for synthetic tabular data generation, particularly in addressing class imbalance.", "keywords": "Large language model;In-context learning;Few-shot learning;Class imbalance;Tabular data;Synthetic data generation", "primary_area": "generative_models", "supplementary_material": "/attachment/5008ab02cde134c4c24e536876d727db967dfdd3.zip", "author": "Jinhee Kim;Taesung Kim;Jaegul Choo", "authorids": "~Jinhee_Kim1;~Taesung_Kim1;~Jaegul_Choo1", "gender": "F;M;M", "homepage": "https://sites.google.com/view/jinhee-kim/;;https://sites.google.com/site/jaegulchoo/", "dblp": ";;07/2074", "google_scholar": "G7JUwU8AAAAJ;rvp49kYAAAAJ;GHJYsLEAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Jinhee_Kim1;~Taesung_Kim1;~Jaegul_Choo1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "position": "PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nkim2024epic,\ntitle={{EPIC}: Effective Prompting for Imbalanced-Class Data Synthesis in Tabular Data Classification via Large Language Models},\nauthor={Jinhee Kim and Taesung Kim and Jaegul Choo},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=d5cKDHCrFJ}\n}", "github": "", "reviewers": "kZFF;8bof;zoZq;p2sf", "pdf_size": 10068082, "rating": "5;5;6;8", "confidence": "3;4;3;4", "soundness": "2;3;3;4", "novelty": "2;2;2;3", "presentation": "2;2;3;4", "wc_summary": "61;42;68;96", "wc_strengths": "44;46;75;65", "wc_weaknesses": "134;90;93;187", "wc_questions": "38;128;15;33", "wc_limitations": "65;7;4;35", "wc_review": "342;313;255;416", "wc_reply_reviewers": "17;124;14;0", "wc_reply_authors": "54;734;50;0", "reply_reviewers": "1;2;1;0", "reply_authors": "2;3;2;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 66.75, 19.382659776202026 ], "wc_strengths_avg": [ 57.5, 13.009611831257688 ], "wc_weaknesses_avg": [ 126.0, 39.274673773310965 ], "wc_questions_avg": [ 53.5, 43.85487430149584 ], "wc_limitations_avg": [ 27.75, 24.67159297653883 ], "wc_review_avg": [ 331.5, 57.97628825649328 ], "wc_reply_reviewers_avg": [ 38.75, 49.63554673819963 ], "wc_reply_authors_avg": [ 209.5, 303.56671424910866 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.40824829046386296, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14089697131903076421&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "Context-Aware Testing: A New Paradigm for Model Testing with Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94363", "id": "d75qCZb7TX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=d75qCZb7TX", "openreview": "https://openreview.net/forum?id=d75qCZb7TX", "poster": "", "project": "", "author_site": "Paulius Rauba, Nabeel Seedat, Max Ruiz Luyten, Mihaela van der Schaar", "tldr": "", "abstract": "The predominant *de facto* paradigm of testing ML models relies on either using only held-out data to compute aggregate evaluation metrics or by assessing the performance on different subgroups. However, such *data-only testing* methods operate under the restrictive assumption that the available empirical data is the sole input for testing ML models, disregarding valuable contextual information that could guide model testing. In this paper, we challenge the go-to approach of *data-only testing* and introduce *Context-Aware Testing* (CAT) which uses context as an inductive bias to guide the search for meaningful model failures. We instantiate the first CAT system, *SMART Testing*, which employs large language models to hypothesize relevant and likely failures, which are evaluated on data using a *self-falsification mechanism*. Through empirical evaluations in diverse settings, we show that SMART automatically identifies more relevant and impactful failures than alternatives, demonstrating the potential of CAT as a testing paradigm.", "keywords": "model testing;tabular data;large language models", "primary_area": "evaluation", "supplementary_material": "/attachment/26457dd17e82c54cf50f76385a60201f4114d4fd.zip", "author": "Paulius Rauba;Nabeel Seedat;Max Ruiz Luyten;Mihaela van der Schaar", "authorids": "~Paulius_Rauba1;~Nabeel_Seedat1;~Max_Ruiz_Luyten1;~Mihaela_van_der_Schaar2", "gender": "M;;;F", "homepage": ";;;https://www.vanderschaar-lab.com", "dblp": ";227/8368;;", "google_scholar": ";https://scholar.google.com/citations?hl=en;P4z6s_MAAAAJ;DZ3S--MAAAAJ", "orcid": ";;;", "linkedin": "paulius-rauba/;nabeel-seedat/;max-ruiz/;", "or_profile": "~Paulius_Rauba1;~Nabeel_Seedat1;~Max_Ruiz_Luyten1;~Mihaela_van_der_Schaar2", "aff": "University of Cambridge;AstraZeneca;University of Cambridge;University of California, Los Angeles", "aff_domain": "cam.ac.uk;astrazeneca.com;cam.ac.uk;ucla.edu", "position": "PhD student;Intern;PhD student;Full Professor", "bibtex": "@inproceedings{\nrauba2024contextaware,\ntitle={Context-Aware Testing: A New Paradigm for Model Testing with Large Language Models},\nauthor={Paulius Rauba and Nabeel Seedat and Max Ruiz Luyten and Mihaela van der Schaar},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=d75qCZb7TX}\n}", "github": "", "reviewers": "ctad;H6db;bfyv;6t3M", "pdf_size": 1166774, "rating": "3;6;6;6", "confidence": "3;4;3;3", "soundness": "2;3;3;3", "novelty": "1;3;3;3", "presentation": "2;3;4;3", "wc_summary": "148;94;86;130", "wc_strengths": "65;71;76;22", "wc_weaknesses": "142;68;163;107", "wc_questions": "75;78;185;44", "wc_limitations": "71;59;4;16", "wc_review": "501;370;514;319", "wc_reply_reviewers": "440;671;30;36", "wc_reply_authors": "1719;2707;264;101", "reply_reviewers": "2;6;1;1", "reply_authors": "5;9;2;3", "rating_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 114.5, 25.470571253900058 ], "wc_strengths_avg": [ 58.5, 21.43011899173684 ], "wc_weaknesses_avg": [ 120.0, 36.076308015094895 ], "wc_questions_avg": [ 95.5, 53.35962893424204 ], "wc_limitations_avg": [ 37.5, 28.146935890075138 ], "wc_review_avg": [ 426.0, 83.59724875855665 ], "wc_reply_reviewers_avg": [ 294.25, 273.72648300812983 ], "wc_reply_authors_avg": [ 1197.75, 1075.2077415550914 ], "reply_reviewers_avg": [ 2.5, 2.0615528128088303 ], "reply_authors_avg": [ 4.75, 2.680951323690902 ], "replies_avg": [ 35, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1181853520796167098&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "cam.ac.uk;astrazeneca.com;cam.ac.uk;ucla.edu", "author_num": 4, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "University of Cambridge;AstraZeneca;University of California, Los Angeles", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cam.ac.uk;https://www.astrazeneca.com;https://www.ucla.edu", "aff_unique_abbr": "Cambridge;AZ;UCLA", "aff_campus_unique_index": "0;0;2", "aff_campus_unique": "Cambridge;;Los Angeles", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "Classification Diffusion Models: Revitalizing Density Ratio Estimation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94362", "id": "d99yCfOnwK", "proceeding": "", "pdf": "https://openreview.net/pdf?id=d99yCfOnwK", "openreview": "https://openreview.net/forum?id=d99yCfOnwK", "poster": "/media/PosterPDFs/NeurIPS%202024/94362.png?t=1733345639.593819", "project": "", "author_site": "Shahar Yadin, Noam Elata, Tomer Michaeli", "tldr": "", "abstract": "A prominent family of methods for learning data distributions relies on density ratio estimation (DRE), where a model is trained to *classify* between data samples and samples from some reference distribution. DRE-based models can directly output the likelihood for any given input, a highly desired property that is lacking in most generative techniques. Nevertheless, to date, DRE methods have failed in accurately capturing the distributions of complex high-dimensional data, like images, and have thus been drawing reduced research attention in recent years. \nIn this work we present *classification diffusion models* (CDMs), a DRE-based generative method that adopts the formalism of denoising diffusion models (DDMs) while making use of a classifier that predicts the level of noise added to a clean signal. Our method is based on an analytical connection that we derive between the MSE-optimal denoiser for removing white Gaussian noise and the cross-entropy-optimal classifier for predicting the noise level. Our method is the first DRE-based technique that can successfully generate images beyond the MNIST dataset. Furthermore, it can output the likelihood of any input in a single forward pass, achieving state-of-the-art negative log likelihood (NLL) among methods with this property.", "keywords": "Density Ratio Estimation", "primary_area": "generative_models", "supplementary_material": "", "author": "Shahar Yadin;Noam Elata;Tomer Michaeli", "authorids": "~Shahar_Yadin1;~Noam_Elata1;~Tomer_Michaeli1", "gender": ";M;M", "homepage": ";https://noamelata.github.io;https://tomer.net.technion.ac.il/", "dblp": ";347/9215;70/3188.html", "google_scholar": ";88l-2DcAAAAJ;n2EbR2cAAAAJ", "orcid": ";0009-0000-2692-2781;", "linkedin": "shahar-yadin-069725195/?originalSubdomain=il;noamelata/;", "or_profile": "~Shahar_Yadin1;~Noam_Elata1;~Tomer_Michaeli1", "aff": "Technion - Israel Institute of Technology, Technion - Israel Institute of Technology;Technion - Israel Institute of Technology, Technion - Israel Institute of Technology;Technion, Technion", "aff_domain": "campus.technion.ac.il;campus.technion.ac.il;technion.ac.il", "position": "MS student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nyadin2024classification,\ntitle={Classification Diffusion Models: Revitalizing Density Ratio Estimation},\nauthor={Shahar Yadin and Noam Elata and Tomer Michaeli},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=d99yCfOnwK}\n}", "github": "", "reviewers": "15GS;ftzp;fmsZ;93Vh", "pdf_size": 3753536, "rating": "5;6;8;8", "confidence": "2;5;5;4", "soundness": "3;3;3;4", "novelty": "3;3;4;3", "presentation": "3;3;4;4", "wc_summary": "64;62;65;430", "wc_strengths": "27;100;94;93", "wc_weaknesses": "100;438;336;131", "wc_questions": "26;187;360;1", "wc_limitations": "19;4;197;1", "wc_review": "236;791;1052;656", "wc_reply_reviewers": "31;82;248;0", "wc_reply_authors": "0;104;133;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;2;2;1", "rating_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 4.0, 1.224744871391589 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 155.25, 158.6306638074745 ], "wc_strengths_avg": [ 78.5, 29.85381047705636 ], "wc_weaknesses_avg": [ 251.25, 140.88537007084872 ], "wc_questions_avg": [ 143.5, 143.94182852805505 ], "wc_limitations_avg": [ 55.25, 82.12300225880688 ], "wc_review_avg": [ 683.75, 295.1104666053036 ], "wc_reply_reviewers_avg": [ 90.25, 95.66706591089746 ], "wc_reply_authors_avg": [ 59.25, 60.13058705850127 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.6285393610547089, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1006007565398674426&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "campus.technion.ac.il;campus.technion.ac.il;technion.ac.il", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Technion - Israel Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.technion.ac.il/en/", "aff_unique_abbr": "Technion", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Israel" }, { "id": "dA0AdHrdAQ", "title": "ELCC: the Emergent Language Corpus Collection", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "We introduce the Emergent Language Corpus Collection (ELCC): a collection of corpora collected from open source implementations of emergent communication systems across the literature. These systems include a variety of signalling game environments as well as more complex tasks like a social deduction game and embodied navigation. Each corpus is annotated with metadata describing the characteristics of the source system as well as a suite of analyses of the corpus (e.g., size, entropy, average message length). Currently, research studying emergent languages requires directly running different systems which takes time away from actual analyses of such languages, limits the variety of languages that are studied, and presents a barrier to entry for researchers without a background in deep learning. The availability of a substantial collection of well-documented emergent language corpora, then, will enable new directions of research which focus their purview on the properties of emergent languages themselves rather than on experimental apparatus.", "keywords": "emergent language;emergent communication;dataset", "primary_area": "", "supplementary_material": "/attachment/f81c1f875a0d903d24381a523fe672c56015459e.pdf", "author": "Brendon Boldt;David R Mortensen", "authorids": "~Brendon_Boldt1;~David_R_Mortensen1", "gender": "M;M", "homepage": "http://brendonjboldt.xyz/;http://www.cs.cmu.edu/~dmortens/", "dblp": "207/4805;180/5443", "google_scholar": "QEXlK3AAAAAJ;https://scholar.google.com/citations?authuser=1", "orcid": "0000-0002-5599-5581;0000-0002-3927-6851", "linkedin": ";davidrmortensen/", "or_profile": "~Brendon_Boldt1;~David_R_Mortensen1", "aff": "School of Computer Science, Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "cs.cmu.edu;cmu.edu", "position": "PhD student;Assistant Research Professor", "bibtex": "@misc{\nanonymous2024elcc,\ntitle={{ELCC}: the Emergent Language Corpus Collection},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=dA0AdHrdAQ}\n}", "github": "", "project": "", "reviewers": "Efy8;zsif;GNDh", "site": "https://openreview.net/forum?id=dA0AdHrdAQ", "pdf_size": 233777, "rating": "4;4;6", "confidence": "2;4;4", "wc_summary_and_contributions": "66;83;144", "wc_strengths": "14;89;17", "wc_improvement": "80;289;29", "wc_limitations": "1;21;67", "wc_correctness": "1;47;8", "wc_clarity": "11;81;7", "wc_relation_to_prior_work": "1;29;17", "wc_documentation": "1;35;10", "wc_additional_feedback": "1;1;1", "wc_review": "176;675;300", "wc_reply_reviewers": "0;259;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;1;0", "reply_authors": "1;1;1", "rating_avg": [ 4.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "wc_summary_and_contributions_avg": [ 97.66666666666667, 33.48963355361709 ], "wc_strengths_avg": [ 40.0, 34.66987164671943 ], "wc_improvement_avg": [ 132.66666666666666, 112.48802405392121 ], "wc_limitations_avg": [ 29.666666666666668, 27.63250903475027 ], "wc_correctness_avg": [ 18.666666666666668, 20.237478982214054 ], "wc_clarity_avg": [ 33.0, 33.980386499665755 ], "wc_relation_to_prior_work_avg": [ 15.666666666666666, 11.469767022723502 ], "wc_documentation_avg": [ 15.333333333333334, 14.383632673594278 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 383.6666666666667, 212.1325581381185 ], "wc_reply_reviewers_avg": [ 86.33333333333333, 122.09377088487722 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:x2_TjSiAO3YJ:scholar.google.com/&scioq=ELCC:+the+Emergent+Language+Corpus+Collection&hl=en&as_sdt=0,33", "gs_version_total": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "School of Computer Science", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "0", "aff_campus_unique": "Pittsburgh;", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "One-Shot Safety Alignment for Large Language Models via Optimal Dualization", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94361", "id": "dA7hUm4css", "proceeding": "", "pdf": "https://openreview.net/pdf?id=dA7hUm4css", "openreview": "https://openreview.net/forum?id=dA7hUm4css", "poster": "/media/PosterPDFs/NeurIPS%202024/94361.png?t=1732079715.886703", "project": "", "author_site": "Xinmeng Huang, Shuo Li, Edgar Dobriban, Osbert Bastani, Hamed Hassani, Dongsheng Ding", "tldr": "", "abstract": "The growing safety concerns surrounding large language models raise an urgent need to align them with diverse human preferences to simultaneously enhance their helpfulness and safety. A promising approach is to enforce safety constraints through Reinforcement Learning from Human Feedback (RLHF). For such constrained RLHF, typical Lagrangian-based primal-dual policy optimization methods are computationally expensive and often unstable. This paper presents a perspective of dualization that reduces constrained alignment to an equivalent unconstrained alignment problem. We do so by pre-optimizing a smooth and convex dual function that has a closed form. This shortcut eliminates the need for cumbersome primal-dual policy iterations, greatly reducing the computational burden and improving training stability. Our strategy leads to two practical algorithms in model-based and preference-based settings (MoCAN and PeCAN, respectively). A broad range of experiments demonstrate the effectiveness and merits of our algorithms.", "keywords": "Large Language Models;Alignment;RLHF;Safety;Constraints", "primary_area": "generative_models", "supplementary_material": "", "author": "Xinmeng Huang;Shuo Li;Edgar Dobriban;Osbert Bastani;Hamed Hassani;Dongsheng Ding", "authorids": "~Xinmeng_Huang1;~Shuo_Li7;~Edgar_Dobriban2;~Osbert_Bastani1;~Hamed_Hassani2;~Dongsheng_Ding1", "gender": "M;M;;M;M;", "homepage": ";;https://statistics.wharton.upenn.edu/profile/dobriban/;http://obastani.github.io;https://www.seas.upenn.edu/~hassani/;https://dongshed.github.io", "dblp": "256/1617;;99/11269;21/11275;73/4984;120/4610", "google_scholar": "vM2nHxEAAAAJ;-QaDf40AAAAJ;aGvH4yMAAAAJ;cxYepGkAAAAJ;;Ixa7PJoAAAAJ", "orcid": ";;;;;", "linkedin": "xinmeng-huang-8032221b3/;shuo-li-bbb2a11b1/;edgar-dobriban/;;;", "or_profile": "~Xinmeng_Huang1;~Shuo_Li7;~Edgar_Dobriban2;~Osbert_Bastani1;~Hamed_Hassani2;~Dongsheng_Ding1", "aff": "University of Pennsylvania;Amazon;The Wharton School, University of Pennsylvania;University of Pennsylvania;University of Pennsylvania;University of Pennsylvania", "aff_domain": "upenn.edu;amazon.com;wharton.upenn.edu;upenn.edu;upenn.edu;upenn.edu", "position": "PhD student;Intern;Associate Professor;Assistant Professor;;Postdoc", "bibtex": "@inproceedings{\nhuang2024oneshot,\ntitle={One-Shot Safety Alignment for Large Language Models via Optimal Dualization},\nauthor={Xinmeng Huang and Shuo Li and Edgar Dobriban and Osbert Bastani and Hamed Hassani and Dongsheng Ding},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=dA7hUm4css}\n}", "github": "", "reviewers": "wcCD;EtW9;TvNM;S7ke;1pVY", "pdf_size": 1080207, "rating": "5;7;8;8;8", "confidence": "4;4;3;2;4", "soundness": "3;3;2;4;4", "novelty": "2;3;4;3;4", "presentation": "2;3;4;3;4", "wc_summary": "221;79;51;86;74", "wc_strengths": "10;36;58;55;120", "wc_weaknesses": "127;58;308;61;47", "wc_questions": "123;5;255;126;35", "wc_limitations": "1;1;144;169;7", "wc_review": "482;179;816;497;283", "wc_reply_reviewers": "66;22;129;39;0", "wc_reply_authors": "355;108;134;0;0", "reply_reviewers": "2;1;2;1;0", "reply_authors": "4;3;3;1;1", "rating_avg": [ 7.2, 1.16619037896906 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 3.2, 0.7483314773547882 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 102.2, 60.548823275105846 ], "wc_strengths_avg": [ 55.8, 36.378015338937885 ], "wc_weaknesses_avg": [ 120.2, 98.02734312425284 ], "wc_questions_avg": [ 108.8, 87.28436286070948 ], "wc_limitations_avg": [ 64.4, 75.64548896001665 ], "wc_review_avg": [ 451.4, 218.40750902842146 ], "wc_reply_reviewers_avg": [ 51.2, 44.485503256679024 ], "wc_reply_authors_avg": [ 119.4, 129.89472660581723 ], "reply_reviewers_avg": [ 1.2, 0.7483314773547883 ], "reply_authors_avg": [ 2.4, 1.2000000000000002 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5144957554275266, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16497487379978527064&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "upenn.edu;amazon.com;wharton.upenn.edu;upenn.edu;upenn.edu;upenn.edu", "author_num": 6, "aff_unique_index": "0;1;0;0;0;0", "aff_unique_norm": "University of Pennsylvania;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.upenn.edu;https://www.amazon.com", "aff_unique_abbr": "UPenn;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "SpaFL: Communication-Efficient Federated Learning With Sparse Models And Low Computational Overhead", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94360", "id": "dAXuir2ets", "proceeding": "", "pdf": "https://openreview.net/pdf?id=dAXuir2ets", "openreview": "https://openreview.net/forum?id=dAXuir2ets", "poster": "/media/PosterPDFs/NeurIPS%202024/94360.png?t=1731007040.1081536", "project": "", "author_site": "Minsu Kim, Walid Saad, Merouane DEBBAH, Choong Hong", "tldr": "", "abstract": "The large communication and computation overhead of federated learning (FL) is one of the main challenges facing its practical deployment over resource-constrained clients and systems. In this work, SpaFL: a communication-efficient FL framework is proposed to optimize sparse model structures with low computational overhead. In SpaFL, a trainable threshold is defined for each filter/neuron to prune its all connected \nparameters, thereby leading to structured sparsity. To optimize the pruning process itself, only thresholds are communicated between a server and clients instead of parameters, thereby learning how to prune. Further, global thresholds are used to update model parameters by extracting aggregated parameter importance. The generalization bound of SpaFL is also derived, thereby proving key insights on the relation between sparsity and performance. Experimental results show that SpaFL improves accuracy while requiring much less communication and computing resources compared to sparse baselines. The code is available at https://github.com/news-vt/SpaFL_NeruIPS_2024", "keywords": "Federated Learning;Communication and Computation Efficiency;Pruning", "primary_area": "other", "supplementary_material": "/attachment/ed92d95074f29e815389e36d3469a62cf67cac27.zip", "author": "Minsu Kim;Walid Saad;Merouane Abdelkader DEBBAH;Choong Seon Hong", "authorids": "~Minsu_Kim6;~Walid_Saad1;~Merouane_Abdelkader_DEBBAH1;~Choong_Seon_Hong1", "gender": ";;M;M", "homepage": ";http://www.netsciwis.com;;http://networking.khu.ac.kr", "dblp": "25/6052;41/6237;75/4085;73/1778.html", "google_scholar": "c6vqXkQAAAAJ;kYDNA0UAAAAJ;HU5I0X4AAAAJ;oKANWloAAAAJ", "orcid": ";;;0000-0003-3484-7333", "linkedin": ";;merouanedebbah/;", "or_profile": "~Minsu_Kim6;~Walid_Saad1;~Merouane_Abdelkader_DEBBAH1;~Choong_Seon_Hong1", "aff": "Virginia Polytechnic Institute and State University;Virginia Tech;Khalifa University of Science, Technology and Research;Kyung Hee University", "aff_domain": "vt.edu;vt.edu;ku.ac.ae;khu.ac.kr", "position": "PhD student;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nkim2024spafl,\ntitle={Spa{FL}: Communication-Efficient Federated Learning With Sparse Models And Low Computational Overhead},\nauthor={Minsu Kim and Walid Saad and Merouane Abdelkader DEBBAH and Choong Seon Hong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=dAXuir2ets}\n}", "github": "", "reviewers": "28Jj;Ju3Y;qu5D", "pdf_size": 1514069, "rating": "5;6;6", "confidence": "4;3;3", "soundness": "2;3;2", "novelty": "2;3;3", "presentation": "1;3;3", "wc_summary": "36;18;60", "wc_strengths": "34;14;53", "wc_weaknesses": "301;81;79", "wc_questions": "12;2;41", "wc_limitations": "32;11;1", "wc_review": "415;126;234", "wc_reply_reviewers": "719;25;20", "wc_reply_authors": "2088;25;196", "reply_reviewers": "4;1;1", "reply_authors": "6;2;3", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.9428090415820634 ], "wc_summary_avg": [ 38.0, 17.204650534085253 ], "wc_strengths_avg": [ 33.666666666666664, 15.923427883328248 ], "wc_weaknesses_avg": [ 153.66666666666666, 104.18359862388235 ], "wc_questions_avg": [ 18.333333333333332, 16.539514973407037 ], "wc_limitations_avg": [ 14.666666666666666, 12.918548250050735 ], "wc_review_avg": [ 258.3333333333333, 119.23180038153505 ], "wc_reply_reviewers_avg": [ 254.66666666666666, 328.3395938489431 ], "wc_reply_authors_avg": [ 769.6666666666666, 934.8127560580723 ], "reply_reviewers_avg": [ 2.0, 1.4142135623730951 ], "reply_authors_avg": [ 3.6666666666666665, 1.699673171197595 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.9999999999999997, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18277242520449417541&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "vt.edu;vt.edu;ku.ac.ae;khu.ac.kr", "author_num": 4, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Virginia Tech;Khalifa University of Science, Technology and Research;Kyung Hee University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.vt.edu;https://www.kustar.ac.ae;http://www.khu.ac.kr", "aff_unique_abbr": "VT;KUSTAR;KHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;2", "aff_country_unique": "United States;United Arab Emirates;South Korea" }, { "title": "Towards Understanding How Transformers Learn In-context Through a Representation Learning Lens", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94359", "id": "dB6gwSDXKL", "proceeding": "", "pdf": "https://openreview.net/pdf?id=dB6gwSDXKL", "openreview": "https://openreview.net/forum?id=dB6gwSDXKL", "poster": "/media/PosterPDFs/NeurIPS%202024/94359.png?t=1730438130.6433802", "project": "", "author_site": "Ruifeng Ren, Yong Liu", "tldr": "", "abstract": "Pre-trained large language models based on Transformers have demonstrated remarkable in-context learning (ICL) abilities. With just a few demonstration examples, the models can implement new tasks without any parameter updates. However, it is still an open question to understand the mechanism of ICL. In this paper, we attempt to explore the ICL process in Transformers through a lens of representation learning. Initially, leveraging kernel methods, we figure out a dual model for one softmax attention layer. The ICL inference process of the attention layer aligns with the training procedure of its dual model, generating token representation predictions that are equivalent to the dual model's test outputs. We delve into the training process of this dual model from a representation learning standpoint and further derive a generalization error bound related to the quantity of demonstration tokens. Subsequently, we extend our theoretical conclusions to more complicated scenarios, including one Transformer layer and multiple attention layers. Furthermore, drawing inspiration from existing representation learning methods especially contrastive learning, we propose potential modifications for the attention layer. Finally, experiments are designed to support our findings.", "keywords": "Transformers;In-context Learning;Representation Learning", "primary_area": "interpretability_and_explainability", "supplementary_material": "/attachment/7990b83ea912ae0ca02a5708e5d457b492dade1e.zip", "author": "Ruifeng Ren;Yong Liu", "authorids": "~Ruifeng_Ren3;~Yong_Liu7", "gender": "M;M", "homepage": "https://github.com/Miao-Mouse/RuifengRen.github.io;https://iie-liuyong.github.io", "dblp": "359/3361;29/4867-18", "google_scholar": "HXOxmJAAAAAJ;vVhmzbAAAAAJ", "orcid": ";0000-0002-6739-621X", "linkedin": ";", "or_profile": "~Ruifeng_Ren3;~Yong_Liu7", "aff": "Renmin University of China;Renmin University of China", "aff_domain": "ruc.edu.cn;ruc.edu.cn", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nren2024towards,\ntitle={Towards Understanding How Transformers Learn In-context Through a Representation Learning Lens},\nauthor={Ruifeng Ren and Yong Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=dB6gwSDXKL}\n}", "github": "", "reviewers": "8J5b;k2vC;HnPE;cRiK", "pdf_size": 18529500, "rating": "5;5;6;8", "confidence": "4;1;4;4", "soundness": "3;3;3;3", "novelty": "3;3;3;4", "presentation": "2;3;3;3", "wc_summary": "117;74;54;65", "wc_strengths": "48;69;78;21", "wc_weaknesses": "71;90;57;84", "wc_questions": "110;39;5;42", "wc_limitations": "21;29;7;1", "wc_review": "367;301;201;213", "wc_reply_reviewers": "0;0;17;206", "wc_reply_authors": "89;80;112;1467", "reply_reviewers": "0;0;1;3", "reply_authors": "2;2;3;4", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.25, 1.299038105676658 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 77.5, 23.879907872519105 ], "wc_strengths_avg": [ 54.0, 21.94310825749169 ], "wc_weaknesses_avg": [ 75.5, 12.698425099200294 ], "wc_questions_avg": [ 49.0, 38.09855640309748 ], "wc_limitations_avg": [ 14.5, 11.07925990308017 ], "wc_review_avg": [ 270.5, 67.78458526833369 ], "wc_reply_reviewers_avg": [ 55.75, 87.02406276427227 ], "wc_reply_authors_avg": [ 437.0, 594.7852553653294 ], "reply_reviewers_avg": [ 1.0, 1.224744871391589 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.4714045207910316, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3036834647683898238&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "ruc.edu.cn;ruc.edu.cn", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Renmin University of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ruc.edu.cn", "aff_unique_abbr": "RUC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Learning Linear Causal Representations from General Environments: Identifiability and Intrinsic Ambiguity", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94358", "id": "dB99jjwx3h", "proceeding": "", "pdf": "https://openreview.net/pdf?id=dB99jjwx3h", "openreview": "https://openreview.net/forum?id=dB99jjwx3h", "poster": "", "project": "", "author_site": "Jikai Jin, Vasilis Syrgkanis", "tldr": "", "abstract": "We study causal representation learning, the task of recovering high-level latent variables and their causal relationships in the form of a causal graph from low-level observed data (such as text and images), assuming access to observations generated from multiple environments. Prior results on the identifiability of causal representations typically assume access to single-node interventions which is rather unrealistic in practice, since the latent variables are unknown in the first place. In this work, we consider the task of learning causal representation learning with data collected from general environments. We show that even when the causal model and the mixing function are both linear, there exists a surrounded-node ambiguity (SNA) [Varici et al. 2023] which is basically unavoidable in our setting. On the other hand, in the same linear case, we show that identification up to SNA is possible under mild conditions, and propose an algorithm, LiNGCReL which provably achieves such identifiability guarantee. We conduct extensive experiments on synthetic data and demonstrate the effectiveness of LiNGCReL in the finite-sample regime.", "keywords": "causal representation learning; causal inference", "primary_area": "causal_inference", "supplementary_material": "", "author": "Jikai Jin;Vasilis Syrgkanis", "authorids": "~Jikai_Jin1;~Vasilis_Syrgkanis1", "gender": "M;", "homepage": "https://www.jkjin.com/;https://www.vsyrgkanis.com", "dblp": "276/0406;", "google_scholar": "xQqZt2AAAAAJ;G1WMpcUAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Jikai_Jin1;~Vasilis_Syrgkanis1", "aff": "Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\njin2024learning,\ntitle={Learning Linear Causal Representations from General Environments: Identifiability and Intrinsic Ambiguity},\nauthor={Jikai Jin and Vasilis Syrgkanis},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=dB99jjwx3h}\n}", "github": "", "reviewers": "mSkL;Cu44;Vjor;FDC3", "pdf_size": 1555611, "rating": "5;6;7;7", "confidence": "3;2;2;3", "soundness": "2;2;3;3", "novelty": "3;2;3;3", "presentation": "2;2;3;3", "wc_summary": "53;60;86;160", "wc_strengths": "35;19;79;40", "wc_weaknesses": "194;23;34;103", "wc_questions": "32;383;100;258", "wc_limitations": "1;2;1;10", "wc_review": "315;487;300;571", "wc_reply_reviewers": "80;306;0;14", "wc_reply_authors": "214;1631;0;0", "reply_reviewers": "1;3;0;1", "reply_authors": "2;5;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 2.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 89.75, 42.381452311123084 ], "wc_strengths_avg": [ 43.25, 22.049659861322123 ], "wc_weaknesses_avg": [ 88.5, 68.19274154922941 ], "wc_questions_avg": [ 193.25, 136.8345259793741 ], "wc_limitations_avg": [ 3.5, 3.774917217635375 ], "wc_review_avg": [ 418.25, 114.78539759046008 ], "wc_reply_reviewers_avg": [ 100.0, 122.71104269787622 ], "wc_reply_authors_avg": [ 461.25, 680.9828834119107 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.25, 1.6393596310755 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Nv63ZSDGjawJ:scholar.google.com/&scioq=Learning+Linear+Causal+Representations+from+General+Environments:+Identifiability+and+Intrinsic+Ambiguity&hl=en&as_sdt=0,11", "gs_version_total": 0, "email": "stanford.edu;stanford.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "ControlSynth Neural ODEs: Modeling Dynamical Systems with Guaranteed Convergence", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94357", "id": "dBE8KHdMFs", "proceeding": "", "pdf": "https://openreview.net/pdf?id=dBE8KHdMFs", "openreview": "https://openreview.net/forum?id=dBE8KHdMFs", "poster": "/media/PosterPDFs/NeurIPS%202024/94357.png?t=1731670692.0086913", "project": "", "author_site": "Wenjie Mei, Dongzhe Zheng, Shihua Li", "tldr": "", "abstract": "Neural ODEs (NODEs) are continuous-time neural networks (NNs) that can process data without the limitation of time intervals. They have advantages in learning and understanding the evolution of complex real dynamics. Many previous works have focused on NODEs in concise forms, while numerous physical systems taking straightforward forms in fact belong to their more complex quasi-classes, thus appealing to a class of general NODEs with high scalability and flexibility to model those systems. This however may result in intricate nonlinear properties. In this paper, we introduce ControlSynth Neural ODEs (CSODEs). We show that despite their highly nonlinear nature, convergence can be guaranteed via tractable linear inequalities. In the composition of CSODEs, we introduce an extra control term for learning the potential simultaneous capture of dynamics at different scales, which could be particularly useful for partial differential equation-formulated systems. Finally, we compare several representative NNs with CSODEs on important physical dynamics under the inductive biases of CSODEs, and illustrate that CSODEs have better learning and predictive abilities in these settings.", "keywords": "Neural ODEs;Differential Equations;Dynamical Systems;Deep Learning", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Wenjie Mei;Dongzhe Zheng;Shihua Li", "authorids": "~Wenjie_Mei1;~Dongzhe_Zheng1;~Shihua_Li3", "gender": "M;M;M", "homepage": ";;https://automation.seu.edu.cn/lsh/list.htm", "dblp": "222/3197.html;359/9725.html;", "google_scholar": "1P8cYogAAAAJ;Szg1nBQAAAAJ;Gg2jy5MAAAAJ", "orcid": "0000-0001-6964-9797;0009-0007-4105-0628;0000-0001-9044-7137", "linkedin": ";denzel-zheng-b4297921a;", "or_profile": "~Wenjie_Mei1;~Dongzhe_Zheng1;~Shihua_Li3", "aff": "Vanderbilt University;Shanghai Jiaotong University;Southeast University", "aff_domain": "vanderbilt.edu;sjtu.edu.cn;seu.edu.cn", "position": "Postdoc;Researcher;Full Professor", "bibtex": "@inproceedings{\nmei2024controlsynth,\ntitle={ControlSynth Neural {ODE}s: Modeling Dynamical Systems with Guaranteed Convergence},\nauthor={Wenjie Mei and Dongzhe Zheng and Shihua Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=dBE8KHdMFs}\n}", "github": "", "reviewers": "Wqu1;JYd3;x1sG;tUtW", "pdf_size": 5129884, "rating": "5;5;7;8", "confidence": "2;3;3;4", "soundness": "2;3;3;3", "novelty": "2;2;4;3", "presentation": "1;3;3;3", "wc_summary": "51;68;71;67", "wc_strengths": "30;34;54;61", "wc_weaknesses": "11;74;228;16", "wc_questions": "42;43;35;46", "wc_limitations": "35;6;19;1", "wc_review": "169;225;407;191", "wc_reply_reviewers": "11;73;119;51", "wc_reply_authors": "126;882;1397;50", "reply_reviewers": "1;2;2;1", "reply_authors": "3;4;5;2", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 64.25, 7.790218225441442 ], "wc_strengths_avg": [ 44.75, 13.06474263045392 ], "wc_weaknesses_avg": [ 82.25, 87.71651782874193 ], "wc_questions_avg": [ 41.5, 4.031128874149275 ], "wc_limitations_avg": [ 15.25, 13.160072188251856 ], "wc_review_avg": [ 248.0, 93.94147114027967 ], "wc_reply_reviewers_avg": [ 63.5, 38.99679474008088 ], "wc_reply_authors_avg": [ 613.75, 557.0351761783093 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 3.5, 1.118033988749895 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9424291223165233377&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "vanderbilt.edu;sjtu.edu.cn;seu.edu.cn", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Vanderbilt University;Shanghai Jiao Tong University;Southeast University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.vanderbilt.edu;https://www.sjtu.edu.cn;https://www.seu.edu.cn/", "aff_unique_abbr": "Vanderbilt;SJTU;SEU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United States;China" }, { "title": "PEACE: A Dataset of Pharmaceutical Care for Cancer Pain Analgesia Evaluation and Medication Decision", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97596", "id": "dBSoa8fpV7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=dBSoa8fpV7", "openreview": "https://openreview.net/forum?id=dBSoa8fpV7", "poster": "/media/PosterPDFs/NeurIPS%202024/97596.png?t=1730555885.9781818", "project": "", "author_site": "Yutao Dou, Huimin Yu, Wei Li, Jingyang Li, Fei Xia, Jian Xiao", "tldr": "", "abstract": "Over half of cancer patients experience long-term pain management challenges. Recently, interest has grown in systems for cancer pain treatment effectiveness assessment (TEA) and medication recommendation (MR) to optimize pharmacological care. These systems aim to improve treatment effectiveness by recommending personalized medication plans based on comprehensive patient information. Despite progress, current systems lack multidisciplinary treatment (MDT) team assessments of treatment and the patient's perception of medication, crucial for effective cancer pain management. Moreover, managing cancer pain medication requires multiple adjustments to the treatment plan based on the patient's evolving condition, a detail often missing in existing datasets. To tackle these issues, we designed the PEACE dataset specifically for cancer pain medication research. It includes detailed pharmacological care records for over 38,000 patients, covering demographics, clinical examination, treatment outcomes, medication plans, and patient self-perceptions. Unlike existing datasets, PEACE records not only long-term and multiple follow-ups both inside and outside hospitals but also includes patients' self-assessments of medication effects and the impact on their lives. We conducted a proof-of-concept study with 13 machine learning algorithms on the PEACE dataset for the TEA (classification task) and MR (regression task). These experiments provide valuable insights into the potential of the PEACE dataset for advancing personalized cancer pain management. The dataset is accessible at: [https://github.com/YTYTYD/PEACE].", "keywords": "Dataset;Cancer Pain;Pharmaceutical Care;Analgesia Evaluation;Medication Decision", "primary_area": "", "supplementary_material": "/attachment/d138d3adf7d10b90e1e689164ada9b2f1ddc404f.zip", "author": "Yutao Dou;Huimin Yu;Wei Li;Jingyang Li;Fei Xia;Jian Xiao", "authorids": "~Yutao_Dou1;~Huimin_Yu3;~Wei_Li68;~Jingyang_Li8;~Fei_Xia6;~Jian_Xiao2", "gender": "M;F;M;;M;M", "homepage": "https://orcid.org/my-orcid?orcid=0000-0001-9990-690X;;https://www.sydney.edu.au/engineering/about/our-people/academic-staff/weiwilson-li.html;;https://ieeexplore.ieee.org/author/37087359018;", "dblp": ";;64/6025-58;;;", "google_scholar": ";;JMAhctQAAAAJ;;;", "orcid": "0000-0001-9990-690X;0000-0002-9258-5063;;0000-0002-1232-8084;;0000-0002-4039-306X", "linkedin": ";;;;;", "or_profile": "~Yutao_Dou1;~Huimin_Yu3;~Wei_Li68;~Jingyang_Li8;~Fei_Xia6;~Jian_Xiao2", "aff": "Hunan University;Central South University;The University of Sydney;Central South University;National University of Defense Technology;Department of Computer Science, University of Massachusetts at Amherst", "aff_domain": "hnu.edu.cn;csu.edu.cn;sydney.edu.au;csu.edu.cn;nudt.edu.cn;cs.umass.edu", "position": "PhD student;MS student;Lecturer;MS student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\ndou2024peace,\ntitle={{PEACE}: A Dataset of Pharmaceutical Care for Cancer Pain Analgesia Evaluation and Medication Decision},\nauthor={Yutao Dou and Huimin Yu and Wei Li and Jingyang Li and Fei Xia and Jian Xiao},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=dBSoa8fpV7}\n}", "github": "", "reviewers": "HMu4;e7K2;ofqK;Azjb", "pdf_size": 955351, "rating": "6;6;7;7", "confidence": "2;4;4;4", "wc_summary_and_contributions": "91;63;48;79", "wc_strengths": "108;10;23;2", "wc_improvement": "41;19;5;2", "wc_limitations": "38;48;5;2", "wc_correctness": "6;1;4;1", "wc_clarity": "6;1;5;1", "wc_relation_to_prior_work": "10;1;4;1", "wc_documentation": "45;16;12;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "346;160;107;90", "wc_reply_reviewers": "27;48;119;193", "wc_reply_authors": "39;36;49;25", "reply_reviewers": "1;1;2;2", "reply_authors": "3;2;3;4", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "wc_summary_and_contributions_avg": [ 70.25, 16.23845743905498 ], "wc_strengths_avg": [ 35.75, 42.381452311123084 ], "wc_improvement_avg": [ 16.75, 15.400892831261439 ], "wc_limitations_avg": [ 23.25, 20.09197601033806 ], "wc_correctness_avg": [ 3.0, 2.1213203435596424 ], "wc_clarity_avg": [ 3.25, 2.277608394786075 ], "wc_relation_to_prior_work_avg": [ 4.0, 3.6742346141747673 ], "wc_documentation_avg": [ 18.5, 16.25576820700886 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 175.75, 101.62769061628823 ], "wc_reply_reviewers_avg": [ 96.75, 65.19346209551998 ], "wc_reply_authors_avg": [ 37.25, 8.554969316134336 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 3.0, 0.7071067811865476 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12092472411131927361&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "email": "hnu.edu.cn;csu.edu.cn;sydney.edu.au;csu.edu.cn;nudt.edu.cn;cs.umass.edu", "author_num": 6, "aff_unique_index": "0;1;2;1;3;4", "aff_unique_norm": "Hunan University;Central South University;University of Sydney;National University of Defense Technology;University of Massachusetts Amherst", "aff_unique_dep": ";;;;Department of Computer Science", "aff_unique_url": "http://www.hunu.edu.cn/;https://www.csu.edu.cn;https://www.sydney.edu.au;http://www.nudt.edu.cn/;https://www.umass.edu", "aff_unique_abbr": "HNU;CSU;USYD;NUDT;UMass Amherst", "aff_campus_unique_index": "1", "aff_campus_unique": ";Amherst", "aff_country_unique_index": "0;0;1;0;0;2", "aff_country_unique": "China;Australia;United States" }, { "title": "Probabilistic size-and-shape functional mixed models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94356", "id": "dBynjEbAt0", "proceeding": "", "pdf": "https://openreview.net/pdf?id=dBynjEbAt0", "openreview": "https://openreview.net/forum?id=dBynjEbAt0", "poster": "/media/PosterPDFs/NeurIPS%202024/94356.png?t=1733846166.2085123", "project": "", "author_site": "Fangyi Wang, Karthik Bharath, Oksana Chkrebtii, Sebastian Kurtek", "tldr": "", "abstract": "The reliable recovery and uncertainty quantification of a fixed effect function $\\mu$ in a functional mixed model, for modeling population- and object-level variability in noisily observed functional data, is a notoriously challenging task: variations along the $x$ and $y$ axes are confounded with additive measurement error, and cannot in general be disentangled. The question then as to what properties of $\\mu$ may be reliably recovered becomes important. We demonstrate that it is possible to recover the size-and-shape of a square-integrable $\\mu$ under a Bayesian functional mixed model. The size-and-shape of $\\mu$ is a geometric property invariant to a family of space-time unitary transformations, viewed as rotations of the Hilbert space, that jointly transform the $x$ and $y$ axes. A random object-level unitary transformation then captures size-and-shape preserving deviations of $\\mu$ from an individual function, while a random linear term and measurement error capture size-and-shape altering deviations. The model is regularized by appropriate priors on the unitary transformations, posterior summaries of which may then be suitably interpreted as optimal data-driven rotations of a fixed orthonormal basis for the Hilbert space. Our numerical experiments demonstrate utility of the proposed model, and superiority over the current state-of-the-art.", "keywords": "statistical shape analysis;size-and-shape perturbation model;Bayesian random effects model;norm-preserving transformation;phase function", "primary_area": "probabilistic_methods", "supplementary_material": "/attachment/a69b1ca7e84287c6445e2cea066097e7d0ca1ca9.zip", "author": "Fangyi Wang;Karthik Bharath;Oksana Chkrebtii;Sebastian Kurtek", "authorids": "~Fangyi_Wang2;~Karthik_Bharath1;~Oksana_Chkrebtii2;~Sebastian_Kurtek1", "gender": "F;M;;M", "homepage": "https://fw258.github.io/;https://karthikbharath.github.io/;https://www.asc.ohio-state.edu/chkrebtii.1//research.html;", "dblp": ";;;19/5997", "google_scholar": "7hFXmQEAAAAJ;;;6TUtm-EAAAAJ", "orcid": ";;0000-0002-3629-5213;", "linkedin": ";;;", "or_profile": "~Fangyi_Wang2;~Karthik_Bharath1;~Oksana_Chkrebtii2;~Sebastian_Andrew_Kurtek1", "aff": "Ohio State University, Columbus;University of Nottingham;Ohio State University;Ohio State University", "aff_domain": "osu.edu;nottingham.ac.uk;osu.edu;osu.edu", "position": "PhD student;Associate Professor;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nwang2024probabilistic,\ntitle={Probabilistic size-and-shape functional mixed models},\nauthor={Fangyi Wang and Karthik Bharath and Oksana Chkrebtii and Sebastian Kurtek},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=dBynjEbAt0}\n}", "github": "", "reviewers": "jjxT;JKM1;yD41;UDtq;qjNQ", "pdf_size": 12915846, "rating": "5;5;5;7;7", "confidence": "1;2;3;4;4", "soundness": "3;3;3;3;3", "novelty": "2;2;3;2;3", "presentation": "2;3;3;3;2", "wc_summary": "39;68;74;101;75", "wc_strengths": "49;41;56;108;57", "wc_weaknesses": "69;30;103;292;611", "wc_questions": "87;33;198;85;248", "wc_limitations": "5;1;29;18;2", "wc_review": "249;173;460;604;993", "wc_reply_reviewers": "0;0;42;31;240", "wc_reply_authors": "0;0;23;34;745", "reply_reviewers": "0;0;1;1;1", "reply_authors": "1;1;2;2;2", "rating_avg": [ 5.8, 0.9797958971132712 ], "confidence_avg": [ 2.8, 1.16619037896906 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 71.4, 19.784842683225964 ], "wc_strengths_avg": [ 62.2, 23.608473055240147 ], "wc_weaknesses_avg": [ 221.0, 214.77895613863106 ], "wc_questions_avg": [ 130.2, 79.7882196818553 ], "wc_limitations_avg": [ 11.0, 10.862780491200215 ], "wc_review_avg": [ 495.8, 291.65280728976364 ], "wc_reply_reviewers_avg": [ 62.6, 90.25652330995251 ], "wc_reply_authors_avg": [ 160.4, 292.59842788367814 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.840168050416806, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:VdFEi72bs2YJ:scholar.google.com/&scioq=Probabilistic+size-and-shape+functional+mixed+models&hl=en&as_sdt=0,44", "gs_version_total": 7, "email": "osu.edu;nottingham.ac.uk;osu.edu;osu.edu", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Ohio State University;University of Nottingham", "aff_unique_dep": ";", "aff_unique_url": "https://www.osu.edu;https://www.nottingham.ac.uk", "aff_unique_abbr": "OSU;UoN", "aff_campus_unique_index": "0", "aff_campus_unique": "Columbus;", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "UDC: A Unified Neural Divide-and-Conquer Framework for Large-Scale Combinatorial Optimization Problems", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94355", "id": "dCgbyvmlwL", "proceeding": "", "pdf": "https://openreview.net/pdf?id=dCgbyvmlwL", "openreview": "https://openreview.net/forum?id=dCgbyvmlwL", "poster": "/media/PosterPDFs/NeurIPS%202024/94355.png?t=1731591806.728764", "project": "", "author_site": "Zhi Zheng, Changliang Zhou, Tong Xialiang, Mingxuan Yuan, Zhenkun Wang", "tldr": "", "abstract": "Single-stage neural combinatorial optimization solvers have achieved near-optimal results on various small-scale combinatorial optimization (CO) problems without requiring expert knowledge. However, these solvers exhibit significant performance degradation when applied to large-scale CO problems. Recently, two-stage neural methods motivated by divide-and-conquer strategies have shown efficiency in addressing large-scale CO problems. Nevertheless, the performance of these methods highly relies on problem-specific heuristics in either the dividing or the conquering procedure, which limits their applicability to general CO problems. Moreover, these methods employ separate training schemes and ignore the interdependencies between the dividing and conquering strategies, often leading to sub-optimal solutions. To tackle these drawbacks, this article develops a unified neural divide-and-conquer framework (i.e., UDC) for solving general large-scale CO problems. UDC offers a Divide-Conquer-Reunion (DCR) training method to eliminate the negative impact of a sub-optimal dividing policy. Employing a high-efficiency Graph Neural Network (GNN) for global instance dividing and a fixed-length sub-path solver for conquering divided sub-problems, the proposed UDC framework demonstrates extensive applicability, achieving superior performance in 10 representative large-scale CO problems. The code is available at https://github.com/CIAM-Group/NCO_code/tree/main/single_objective/UDC-Large-scale-CO-master", "keywords": "Neural Combinatorial Optimization;Large-Scale Combinatorial Optimization Problem;Neural Divide-and-Conquer;Vehicle Routing Problem;Unified Framework", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "/attachment/f520f2862257f6594cc3e7317f5755b7ad14c018.zip", "author": "Zhi Zheng;Changliang Zhou;Tong Xialiang;Mingxuan Yuan;Zhenkun Wang", "authorids": "~Zhi_Zheng2;~Changliang_Zhou1;~Tong_Xialiang2;~Mingxuan_Yuan1;~Zhenkun_Wang1", "gender": "M;M;M;M;M", "homepage": "https://zz1358m.github.io/zhizheng.github.io//;;;;https://faculty.sustech.edu.cn/wangzk3/en/", "dblp": ";365/5197;https://dblp.uni-trier.de/pid/245/5977.html;74/2356;96/9114", "google_scholar": "nxJ4qM4AAAAJ;9IzIC7kAAAAJ;;https://scholar.google.com/citations?hl=en;https://scholar.google.com.sg/citations?user=r9ezy2gAAAAJ", "orcid": "0009-0005-8785-8177;0009-0005-4512-9558;;0000-0002-2236-8784;0000-0003-1152-6780", "linkedin": ";;;;", "or_profile": "~Zhi_Zheng2;~Changliang_Zhou1;~Tong_Xialiang2;~Mingxuan_Yuan1;~Zhenkun_Wang1", "aff": "Southern University of Science and Technology;Southern University of Science and Technology;Huawei Technologies Ltd.;Huawei Technologies Ltd.;Southern University of Science and Technology", "aff_domain": "sustc.edu.cn;sustech.edu;huawei.com;huawei.com;sustech.edu.cn", "position": "Undergrad student;PhD student;Researcher;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nzheng2024udc,\ntitle={{UDC}: A Unified Neural Divide-and-Conquer Framework for Large-Scale Combinatorial Optimization Problems},\nauthor={Zhi Zheng and Changliang Zhou and Tong Xialiang and Mingxuan Yuan and Zhenkun Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=dCgbyvmlwL}\n}", "github": "", "reviewers": "kAJo;h7Dg;Eauz;Uqww", "pdf_size": 0, "rating": "5;5;6;7", "confidence": "4;4;3;4", "soundness": "3;2;3;3", "novelty": "2;2;3;4", "presentation": "2;1;3;3", "wc_summary": "67;13;77;35", "wc_strengths": "86;40;65;60", "wc_weaknesses": "503;711;36;16", "wc_questions": "2;2;54;114", "wc_limitations": "1;2;52;23", "wc_review": "659;768;284;248", "wc_reply_reviewers": "11;648;82;43", "wc_reply_authors": "6;860;28;26", "reply_reviewers": "1;3;1;1", "reply_authors": "2;5;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 48.0, 25.475478405713993 ], "wc_strengths_avg": [ 62.75, 16.361158271956175 ], "wc_weaknesses_avg": [ 316.5, 299.7469766319587 ], "wc_questions_avg": [ 43.0, 46.16275555033516 ], "wc_limitations_avg": [ 19.5, 20.71834935510066 ], "wc_review_avg": [ 489.75, 227.40093997167207 ], "wc_reply_reviewers_avg": [ 196.0, 262.1707458890103 ], "wc_reply_authors_avg": [ 230.0, 363.832378987907 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3830449861474744563&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "sustc.edu.cn;sustech.edu;huawei.com;huawei.com;sustech.edu.cn", "author_num": 5, "aff_unique_index": "0;0;1;1;0", "aff_unique_norm": "Southern University of Science and Technology;Huawei", "aff_unique_dep": ";Huawei Technologies", "aff_unique_url": "https://www.sustech.edu.cn;https://www.huawei.com", "aff_unique_abbr": "SUSTech;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "UniDSeg: Unified Cross-Domain 3D Semantic Segmentation via Visual Foundation Models Prior", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94354", "id": "dDDc3iNZA7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=dDDc3iNZA7", "openreview": "https://openreview.net/forum?id=dDDc3iNZA7", "poster": "/media/PosterPDFs/NeurIPS%202024/94354.png?t=1731651280.7454922", "project": "", "author_site": "Yao Wu, Mingwei Xing, Yachao Zhang, Xiaotong Luo, Yuan Xie, Yanyun Qu", "tldr": "", "abstract": "3D semantic segmentation using an adapting model trained from a source domain with or without accessing unlabeled target-domain data is the fundamental task in computer vision, containing domain adaptation and domain generalization.\nThe essence of simultaneously solving cross-domain tasks is to enhance the generalizability of the encoder.\nIn light of this, we propose a groundbreaking universal method with the help of off-the-shelf Visual Foundation Models (VFMs) to boost the adaptability and generalizability of cross-domain 3D semantic segmentation, dubbed $\\textbf{UniDSeg}$.\nOur method explores the VFMs prior and how to harness them, aiming to inherit the recognition ability of VFMs.\nSpecifically, this method introduces layer-wise learnable blocks to the VFMs, which hinges on alternately learning two representations during training: (i) Learning visual prompt. The 3D-to-2D transitional prior and task-shared knowledge is captured from the prompt space, and then (ii) Learning deep query. Spatial Tunability is constructed to the representation of distinct instances driven by prompts in the query space.\nIntegrating these representations into a cross-modal learning framework, UniDSeg efficiently mitigates the domain gap between 2D and 3D modalities, achieving unified cross-domain 3D semantic segmentation.\nExtensive experiments demonstrate the effectiveness of our method across widely recognized tasks and datasets, all achieving superior performance over state-of-the-art methods. Remarkably, UniDSeg achieves 57.5\\%/54.4\\% mIoU on ``A2D2/sKITTI'' for domain adaptive/generalized tasks. Code is available at https://github.com/Barcaaaa/UniDSeg.", "keywords": "Domain Generalization;Domain Adaptation;3D Semantic Segmentation;Visual Foundation Models", "primary_area": "machine_vision", "supplementary_material": "", "author": "Yao Wu;Mingwei Xing;Yachao Zhang;Xiaotong Luo;Yuan Xie;Yanyun Qu", "authorids": "~Yao_Wu3;~Mingwei_Xing1;~Yachao_Zhang1;~Xiaotong_Luo3;~Yuan_Xie5;~Yanyun_Qu1", "gender": ";;M;;;F", "homepage": "https://barcaaaa.github.io/;https://github.com/koun-xmw;https://yachao-zhang.github.io/;;;http://quyanyun.xmu.edu.cn", "dblp": ";359/4215;40/10584-1;;;03/3500", "google_scholar": "https://scholar.google.com.hk/citations?hl=zh-CN;;https://scholar.google.de/citations?user=a-I8c8EAAAAJ;;;", "orcid": "0000-0003-0227-5641;;0000-0002-6153-5004;;;", "linkedin": ";;;;;", "or_profile": "~Yao_Wu3;~Mingwei_Xing1;~Yachao_Zhang1;~Xiaotong_Luo3;~Yuan_Xie5;~Yanyun_Qu1", "aff": "Xiamen University;Xiamen University;Tsinghua University;;;Xiamen University", "aff_domain": "xmu.edu.cn;xmu.edu.cn;tsinghua.edu.cn;;;xmu.edu.cn", "position": "PhD student;MS student;Postdoc;;;Full Professor", "bibtex": "@inproceedings{\nwu2024unidseg,\ntitle={Uni{DS}eg: Unified Cross-Domain 3D Semantic Segmentation via Visual Foundation Models Prior},\nauthor={Yao Wu and Mingwei Xing and Yachao Zhang and Xiaotong Luo and Yuan Xie and Yanyun Qu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=dDDc3iNZA7}\n}", "github": "", "reviewers": "T5Kw;KKSy;4rFB;DeKz", "pdf_size": 5589667, "rating": "4;4;5;7", "confidence": "4;4;4;3", "soundness": "3;3;4;4", "novelty": "2;2;2;3", "presentation": "3;2;2;3", "wc_summary": "48;51;72;43", "wc_strengths": "18;42;50;43", "wc_weaknesses": "48;182;393;98", "wc_questions": "123;131;142;6", "wc_limitations": "15;36;2;11", "wc_review": "252;442;659;201", "wc_reply_reviewers": "40;0;18;28", "wc_reply_authors": "13;0;168;0", "reply_reviewers": "2;0;1;1", "reply_authors": "2;1;2;1", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 53.5, 11.05667219374799 ], "wc_strengths_avg": [ 38.25, 12.090802289343747 ], "wc_weaknesses_avg": [ 180.25, 131.83393910522435 ], "wc_questions_avg": [ 100.5, 54.974994315597705 ], "wc_limitations_avg": [ 16.0, 12.469963913339926 ], "wc_review_avg": [ 388.5, 180.15340685093912 ], "wc_reply_reviewers_avg": [ 21.5, 14.654350889752845 ], "wc_reply_authors_avg": [ 45.25, 71.06818908625715 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.9428090415820632, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4019939692513347887&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "xmu.edu.cn;xmu.edu.cn;tsinghua.edu.cn;;;xmu.edu.cn", "author_num": 6, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Xiamen University;Tsinghua University", "aff_unique_dep": ";", "aff_unique_url": "https://www.xmu.edu.cn;https://www.tsinghua.edu.cn", "aff_unique_abbr": "XMU;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "A Unified Framework for 3D Scene Understanding", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94353", "id": "dE1bTyyC9A", "proceeding": "", "pdf": "https://openreview.net/pdf?id=dE1bTyyC9A", "openreview": "https://openreview.net/forum?id=dE1bTyyC9A", "poster": "/media/PosterPDFs/NeurIPS%202024/94353.png?t=1731333503.0112526", "project": "", "author_site": "Wei Xu, Chunsheng Shi, Sifan Tu, Xin Zhou, Dingkang Liang, Xiang Bai", "tldr": "", "abstract": "We propose UniSeg3D, a unified 3D scene understanding framework that achieves panoptic, semantic, instance, interactive, referring, and open-vocabulary segmentation tasks within a single model. Most previous 3D segmentation approaches are typically tailored to a specific task, limiting their understanding of 3D scenes to a task-specific perspective. In contrast, the proposed method unifies six tasks into unified representations processed by the same Transformer. It facilitates inter-task knowledge sharing, thereby promoting comprehensive 3D scene understanding. To take advantage of multi-task unification, we enhance performance by establishing explicit inter-task associations. Specifically, we design knowledge distillation and contrastive learning to transfer task-specific knowledge across different tasks. Experiments on three benchmarks, including ScanNet20, ScanRefer, and ScanNet200, demonstrate that the UniSeg3D consistently outperforms current SOTA methods, even those specialized for individual tasks. We hope UniSeg3D can serve as a solid unified baseline and inspire future work. Code and models are available at \\url{https://dk-liang.github.io/UniSeg3D/}.", "keywords": "3D scene understanding; Unified model; 3D segmentation", "primary_area": "machine_vision", "supplementary_material": "", "author": "Wei Xu;Chunsheng Shi;Sifan Tu;Xin Zhou;Dingkang Liang;Xiang Bai", "authorids": "~Wei_Xu15;~Chunsheng_Shi2;~Sifan_Tu1;~Xin_Zhou17;~Dingkang_Liang2;~Xiang_Bai1", "gender": ";;;M;;M", "homepage": ";;https://github.com/SamorsetTuska;https://lmd0311.github.io/;;http://mclab.eic.hust.edu.cn/~xbai", "dblp": ";;;05/3403-13;;59/2741", "google_scholar": ";;;SWU7N_sAAAAJ;;UeltiQ4AAAAJ", "orcid": ";;;0009-0009-4752-6118;;", "linkedin": ";;;;;", "or_profile": "~Wei_Xu15;~Chunsheng_Shi2;~Sifan_Tu1;~Xin_Zhou17;~Dingkang_Liang2;~Xiang_Bai1", "aff": ";;Huazhong University of Science and Technology;;;Huazhong University of Science and Technology", "aff_domain": ";;hust.edu.cn;;;hust.edu.cn", "position": ";;Undergrad student;;;Full Professor", "bibtex": "@inproceedings{\nxu2024a,\ntitle={A Unified Framework for 3D Scene Understanding},\nauthor={Wei Xu and Chunsheng Shi and Sifan Tu and Xin Zhou and Dingkang Liang and Xiang Bai},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=dE1bTyyC9A}\n}", "github": "", "reviewers": "3AFf;PEfy;C3ws;r1wg", "pdf_size": 15896878, "rating": "5;6;7;7", "confidence": "4;3;3;5", "soundness": "3;4;3;4", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "42;70;62;54", "wc_strengths": "39;46;48;35", "wc_weaknesses": "175;101;191;91", "wc_questions": "175;101;5;56", "wc_limitations": "12;59;15;2", "wc_review": "443;377;321;238", "wc_reply_reviewers": "48;25;49;29", "wc_reply_authors": "565;32;34;30", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 57.0, 10.344080432788601 ], "wc_strengths_avg": [ 42.0, 5.244044240850758 ], "wc_weaknesses_avg": [ 139.5, 44.00852190201348 ], "wc_questions_avg": [ 84.25, 62.43947068962068 ], "wc_limitations_avg": [ 22.0, 21.89748844045819 ], "wc_review_avg": [ 344.75, 75.25415270933559 ], "wc_reply_reviewers_avg": [ 37.75, 10.848386976873567 ], "wc_reply_authors_avg": [ 165.25, 230.80010290292333 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0909090909090909, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13063081612150198337&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": ";;hust.edu.cn;;;hust.edu.cn", "author_num": 6, "aff_unique_index": "0;0", "aff_unique_norm": "Huazhong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.hust.edu.cn", "aff_unique_abbr": "HUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "EpiCare: A Reinforcement Learning Benchmark for Dynamic Treatment Regimes", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97595", "id": "dF22s2GoX0", "proceeding": "", "pdf": "https://openreview.net/pdf?id=dF22s2GoX0", "openreview": "https://openreview.net/forum?id=dF22s2GoX0", "poster": "", "project": "", "author_site": "Mason Hargrave, Alex Spaeth, Logan Grosenick", "tldr": "", "abstract": "Healthcare applications pose significant challenges to existing reinforcement learning (RL) methods due to implementation risks, low data availability, short treatment episodes, sparse rewards, partial observations, and heterogeneous treatment effects. Despite significant interest in using RL to generate dynamic treatment regimes for longitudinal patient care scenarios, no standardized benchmark has yet been developed.\nTo fill this need we introduce *Episodes of Care* (*EpiCare*), a benchmark designed to mimic the challenges associated with applying RL to longitudinal healthcare settings. We leverage this benchmark to test five state-of-the-art offline RL models as well as five common off-policy evaluation (OPE) techniques.\nOur results suggest that while offline RL may be capable of improving upon existing standards of care given large data availability, its applicability does not appear to extend to the moderate to low data regimes typical of healthcare settings. Additionally, we demonstrate that several OPE techniques which have become standard in the the medical RL literature fail to perform adequately on our benchmark. These results suggest that the performance of RL models in dynamic treatment regimes may be difficult to meaningfully evaluate using current OPE methods, indicating that RL for this application may still be in its early stages. We hope that these results along with the benchmark itself will facilitate the comparison of existing methods and inspire further research into techniques that increase the practical applicability of medical RL.", "keywords": "reinforcement learning;benchmark;environment;dynamic treatment regime;benchmark;off-policy evaluation;healthcare;medicine", "primary_area": "", "supplementary_material": "/attachment/e310aad6fe593c8c63896005b743d87399287003.zip", "author": "Mason Hargrave;Alex Spaeth;Logan Grosenick", "authorids": "~Mason_Hargrave1;~Alex_Spaeth1;~Logan_Grosenick1", "gender": "M;Not Specified;M", "homepage": ";;http://grosenicklab.org", "dblp": ";;05/2346", "google_scholar": "N_C3XN8AAAAJ;lSYz4psAAAAJ;xw1L-WoAAAAJ", "orcid": "0000-0003-2004-2761;0000-0003-0702-3945;0000-0003-3216-0319", "linkedin": "masonhargrave/;;logan-grosenick-8235771", "or_profile": "~Mason_Hargrave1;~Alex_Spaeth1;~Logan_Grosenick1", "aff": "Rockefeller University;University of California, Santa Cruz;Weill Cornell Medicine, Cornell University", "aff_domain": "rockefeller.edu;ucsc.edu;med.cornell.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nhargrave2024epicare,\ntitle={EpiCare: A Reinforcement Learning Benchmark for Dynamic Treatment Regimes},\nauthor={Mason Hargrave and Alex Spaeth and Logan Grosenick},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=dF22s2GoX0}\n}", "github": "", "reviewers": "E79g;CJAm;h9aa", "pdf_size": 1180511, "rating": "6;6;7", "confidence": "3;3;3", "wc_summary_and_contributions": "58;85;73", "wc_strengths": "21;5;3", "wc_improvement": "73;28;1", "wc_limitations": "13;2;1", "wc_correctness": "32;7;1", "wc_clarity": "5;1;1", "wc_relation_to_prior_work": "38;8;1", "wc_documentation": "26;1;1", "wc_additional_feedback": "1;1;1", "wc_review": "267;138;83", "wc_reply_reviewers": "107;0;0", "wc_reply_authors": "289;0;0", "reply_reviewers": "2;0;0", "reply_authors": "2;1;1", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.0 ], "wc_summary_and_contributions_avg": [ 72.0, 11.045361017187261 ], "wc_strengths_avg": [ 9.666666666666666, 8.055363982396381 ], "wc_improvement_avg": [ 34.0, 29.698484809834994 ], "wc_limitations_avg": [ 5.333333333333333, 5.436502143433364 ], "wc_correctness_avg": [ 13.333333333333334, 13.424687043734844 ], "wc_clarity_avg": [ 2.3333333333333335, 1.8856180831641267 ], "wc_relation_to_prior_work_avg": [ 15.666666666666666, 16.048537489614297 ], "wc_documentation_avg": [ 9.333333333333334, 11.785113019775793 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 162.66666666666666, 77.11607412782946 ], "wc_reply_reviewers_avg": [ 35.666666666666664, 50.440283724640395 ], "wc_reply_authors_avg": [ 96.33333333333333, 136.23590650860817 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.9428090415820634 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:_BDcLlW0NfEJ:scholar.google.com/&scioq=EpiCare:+A+Reinforcement+Learning+Benchmark+for+Dynamic+Treatment+Regimes&hl=en&as_sdt=0,47", "gs_version_total": 3, "email": "rockefeller.edu;ucsc.edu;med.cornell.edu", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Rockefeller University;University of California, Santa Cruz;Cornell University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.rockefeller.edu;https://www.ucsc.edu;https://www.weill.cornell.edu", "aff_unique_abbr": "RU;UCSC;Cornell", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Santa Cruz;Weill Cornell Medicine", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "FinCon: A Synthesized LLM Multi-Agent System with Conceptual Verbal Reinforcement for Enhanced Financial Decision Making", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94352", "id": "dG1HwKMYbC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=dG1HwKMYbC", "openreview": "https://openreview.net/forum?id=dG1HwKMYbC", "poster": "/media/PosterPDFs/NeurIPS%202024/94352.png?t=1731560508.5941863", "project": "", "author_site": "Yangyang Yu, Zhiyuan Yao, Haohang Li, Zhiyang Deng, Yuechen Jiang, Yupeng Cao, Zhi Chen, Jordan Suchow, Zhenyu Cui, Rong Liu, Zhaozhuo Xu, Denghui Zhang, Koduvayur (Suba) Subbalakshmi, GUOJUN XIONG, Yueru He, Jimin Huang, Dong Li, Qianqian Xie", "tldr": "", "abstract": "Large language models (LLMs) have demonstrated notable potential in conducting complex tasks and are increasingly utilized in various financial applications. However, high-quality sequential financial investment decision-making remains challenging. These tasks require multiple interactions with a volatile environment for every decision, demanding sufficient intelligence to maximize returns and manage risks. Although LLMs have been used to develop agent systems that surpass human teams and yield impressive investment returns, opportunities to enhance multi-source information synthesis and optimize decision-making outcomes through timely experience refinement remain unexplored. Here, we introduce FinCon, an LLM-based multi-agent framework tailored for diverse financial tasks. Inspired by effective real-world investment firm organizational structures, FinCon utilizes a manager-analyst communication hierarchy. This structure allows for synchronized cross-functional agent collaboration towards unified goals through natural language interactions and equips each agent with greater memory capacity than humans. Additionally, a risk-control component in FinCon enhances decision quality by episodically initiating a self-critiquing mechanism to update systematic investment beliefs. The conceptualized beliefs serve as verbal reinforcement for the future agent\u2019s behavior and can be selectively propagated to the appropriate node that requires knowledge updates. This feature significantly improves performance while reducing unnecessary peer-to-peer communication costs. Moreover, FinCon demonstrates strong generalization capabilities in various financial tasks, including stock trading and portfolio management.", "keywords": "Multi-agent system;Financial Large Language Models;Portfolio Management", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Yangyang Yu;Zhiyuan Yao;Haohang Li;Zhiyang Deng;Yuechen Jiang;Yupeng Cao;Zhi Chen;Jordan W. Suchow;Zhenyu Cui;Rong Liu;Zhaozhuo Xu;Denghui Zhang;Koduvayur Subbalakshmi;GUOJUN XIONG;Yueru He;Jimin Huang;Dong Li;Qianqian Xie", "authorids": "~Yangyang_Yu1;~Zhiyuan_Yao2;~Haohang_Li1;~Zhiyang_Deng1;~Yuechen_Jiang1;~Yupeng_Cao1;~Zhi_Chen10;~Jordan_W._Suchow1;~Zhenyu_Cui2;~Rong_Liu4;~Zhaozhuo_Xu2;~Denghui_Zhang2;~Koduvayur_Subbalakshmi1;~GUOJUN_XIONG1;~Yueru_He1;~Jimin_Huang1;~Dong_Li27;~Qianqian_Xie1", "gender": "F;M;M;M;F;M;M;;M;;;;;;;M;F;F", "homepage": ";https://theling.github.io/;https://scholar.google.com/citations?user=gtaqrtoAAAAJ&hl=en&oi=ao;https://scholar.google.com/citations?user=JuLlgnUAAAAJ;https://www.linkedin.com/in/carolyn-yuechen-jiang-a72764194/;https://cyp0630.github.io/;http://zhi-chen.info/;;https://www.stevens.edu/profile/zcui6;https://www.stevens.edu/profile/rliu20;https://ottovonxu.github.io/;;https://sites.google.com/stevens.edu/infinitylab;https://xionggj001.github.io/;https://yueruhe-linda.myportfolio.com/work;;https://github.com/ldruth28;", "dblp": "59/3689;;313/5337;277/4880.html;361/6734;151/9000;https://dblp.org/rec/conf/aaaiss/YuLCJLZLSK24;;;;195/4352;;s/KPSubbalakshmi.html;214/2134.html;371/1115;163/4119;;", "google_scholar": ";ZLlWPBAAAAAJ;gtaqrtoAAAAJ;JuLlgnUAAAAJ;jlDkIlQAAAAJ;pumUNAcAAAAJ;kGQiYmoAAAAJ;;;;7tDlVAsAAAAJ;;SSAaI4AAAAAJ;FIBwLnoAAAAJ;;SnQ_CycAAAAJ;;UYW7X_0AAAAJ", "orcid": "0009-0009-4595-1786;0000-0001-5436-2910;0009-0002-3604-7284;0009-0007-6960-2025;0009-0001-7480-1041;0000-0002-4024-2026;0009-0000-8404-3523;;;;;;0000-0002-1670-9378;;;0000-0002-3501-3907;0009-0001-1291-7735;0000-0002-9588-7454", "linkedin": "yangyang%EF%BC%88shirley%EF%BC%89-yu-88542174/;zhiyuan-yao-74b84113a/;haohang-li-acatsama/;zhiyang-deng-784587157/;;;zhichen2288/;;;;;;https://www.linkedin.com/kpsuba;guojun-%E5%9B%BD%E9%92%A7-xiong-48696aa6/;yueruhe/;;;", "or_profile": "~Yangyang_Yu1;~Zhiyuan_Yao2;~Haohang_Li1;~Zhiyang_Deng1;~Yuechen_Jiang1;~Yupeng_Cao1;~Zhi_Chen10;~Jordan_W._Suchow1;~Zhenyu_Cui2;~Rong_Liu4;~Zhaozhuo_Xu2;~Denghui_Zhang2;~Koduvayur_Subbalakshmi1;~GUOJUN_XIONG1;~Yueru_He1;~Jimin_Huang1;~Dong_Li27;~Qianqian_Xie1", "aff": "Stevens Institute of Technology;Stevens Institute of Technology;Stevens Institute of Technology;Stevens Institute of Technology;Stevens Institute of Technology;Stevens Institute of Technology;Stevens Institute of Technology;;Stevens Institute of Technology;Stevens Institute of Technology;Rice University;;Stevens Institute of Technology;State University of New York at Stony Brook;Columbia University;The Fin AI;Wuhan University;Yale University", "aff_domain": "stevens.edu;stevens.edu;stevens.edu;stevens.edu;stevens.edu;stevens.edu;stevens.edu;;stevens.edu;stevens.edu;rice.edu;;stevens.edu;stonybrook.edu;columbia.edu;thefin.ai;whu.edu.cn;yale.edu", "position": "PhD student;PhD student;PhD student;PhD student;Intern;PhD student;PhD student;;Associate Professor;Associate Professor;PhD student;;Full Professor;PhD student;MS student;Principal Researcher;PhD student;Postdoc", "bibtex": "@inproceedings{\nyu2024fincon,\ntitle={FinCon: A Synthesized {LLM} Multi-Agent System with Conceptual Verbal Reinforcement for Enhanced Financial Decision Making},\nauthor={Yangyang Yu and Zhiyuan Yao and Haohang Li and Zhiyang Deng and Yuechen Jiang and Yupeng Cao and Zhi Chen and Jordan W. Suchow and Zhenyu Cui and Rong Liu and Zhaozhuo Xu and Denghui Zhang and Koduvayur Subbalakshmi and GUOJUN XIONG and Yueru He and Jimin Huang and Dong Li and Qianqian Xie},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=dG1HwKMYbC}\n}", "github": "", "reviewers": "rfBX;pGXR;bNfR", "pdf_size": 15280048, "rating": "5;7;8", "confidence": "2;3;2", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "42;72;65", "wc_strengths": "26;212;108", "wc_weaknesses": "25;257;34", "wc_questions": "39;143;1", "wc_limitations": "1;5;43", "wc_review": "133;689;251", "wc_reply_reviewers": "34;435;46", "wc_reply_authors": "852;2143;6", "reply_reviewers": "1;3;1", "reply_authors": "4;8;2", "rating_avg": [ 6.666666666666667, 1.247219128924647 ], "confidence_avg": [ 2.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 59.666666666666664, 12.81492185782739 ], "wc_strengths_avg": [ 115.33333333333333, 76.11103000806709 ], "wc_weaknesses_avg": [ 105.33333333333333, 107.30745029534944 ], "wc_questions_avg": [ 61.0, 60.02221810851933 ], "wc_limitations_avg": [ 16.333333333333332, 18.92675942210452 ], "wc_review_avg": [ 357.6666666666667, 239.18937174455633 ], "wc_reply_reviewers_avg": [ 171.66666666666666, 186.26921973912445 ], "wc_reply_authors_avg": [ 1000.3333333333334, 878.7090278104325 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.9428090415820634 ], "reply_authors_avg": [ 4.666666666666667, 2.494438257849294 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 18, 0 ], "corr_rating_confidence": 0.18898223650461363, "gs_citation": 41, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4034622771434201278&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "stevens.edu;stevens.edu;stevens.edu;stevens.edu;stevens.edu;stevens.edu;stevens.edu;;stevens.edu;stevens.edu;rice.edu;;stevens.edu;stonybrook.edu;columbia.edu;thefin.ai;whu.edu.cn;yale.edu", "author_num": 18, "aff_unique_index": "0;0;0;0;0;0;0;0;0;1;0;2;3;4;5;6", "aff_unique_norm": "Stevens Institute of Technology;Rice University;State University of New York at Stony Brook;Columbia University;Fin AI;Wuhan University;Yale University", "aff_unique_dep": ";;;;;;", "aff_unique_url": "https://www.stevens.edu;https://www.rice.edu;https://www.stonybrook.edu;https://www.columbia.edu;https://www.thefinai.com;http://www.whu.edu.cn/;https://www.yale.edu", "aff_unique_abbr": "SIT;Rice;SUNY Stony Brook;Columbia;Fin AI;WHU;Yale", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stony Brook", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0;0;1;0", "aff_country_unique": "United States;China" }, { "title": "Thinking Forward: Memory-Efficient Federated Finetuning of Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94351", "id": "dGQtja9X2C", "proceeding": "", "pdf": "https://openreview.net/pdf?id=dGQtja9X2C", "openreview": "https://openreview.net/forum?id=dGQtja9X2C", "poster": "/media/PosterPDFs/NeurIPS%202024/94351.png?t=1731107034.4966621", "project": "", "author_site": "Kunjal Panchal, Nisarg Parikh, Sunav Choudhary, Lijun Zhang, Yuriy Brun, Hui Guan", "tldr": "", "abstract": "Finetuning large language models (LLMs) in federated learning (FL) settings has become increasingly important as it allows resource-constrained devices to finetune a model using private data. However, finetuning LLMs using backpropagation requires excessive memory (especially from intermediate activations) for resource-constrained devices. While Forward-mode Auto-Differentiation (AD) can significantly reduce memory footprint from activations, we observe that directly applying it to LLM finetuning results in slow convergence and poor accuracy. In this paper, we introduce Spry, an FL algorithm that splits trainable weights of an LLM among participating clients, such that each client computes gradients using forward-mode AD that are closer estimations of the true gradients. Spry achieves a low memory footprint, high accuracy, and fast convergence. We formally prove that the global gradients in Spry are unbiased estimators of true global gradients for homogeneous data distributions across clients, while heterogeneity increases bias of the estimates. We also derive Spry's convergence rate, showing that the gradients decrease inversely proportional to the number of FL rounds, indicating the convergence up to the limits of heterogeneity. Empirically, Spry reduces the memory footprint during training by 1.4-7.1$\\times$ in contrast to backpropagation, while reaching comparable accuracy, across a wide range of language tasks, models, and FL settings. \nSpry reduces the convergence time by 1.2-20.3$\\times$ and achieves 5.2-13.5\\% higher accuracy against state-of-the-art zero-order methods. When finetuning Llama2-7B with LoRA, compared to the peak memory consumption of 33.9GB of backpropagation, Spry only consumes 6.2GB of peak memory. For OPT13B, the reduction is from 76.5GB to 10.8GB. Spry makes feasible previously impossible FL deployments on commodity mobile and edge devices. Our source code is available for replication at https://github.com/Astuary/Spry.", "keywords": "Federated Learning;Large Language Models;Forward-mode Automatic Differentiation;Forward-mode AD;Memory-efficient Finetuning;Memory-efficiency;Data Heterogeneity", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Kunjal Panchal;Nisarg Parikh;Sunav Choudhary;Lijun Zhang;Yuriy Brun;Hui Guan", "authorids": "~Kunjal_Panchal1;~Nisarg_Parikh1;~Sunav_Choudhary1;~Lijun_Zhang4;~Yuriy_Brun1;~Hui_Guan1", "gender": "F;M;;F;M;F", "homepage": "https://astuary.github.io/Kunjal;https://github.com/namelessCrusader?tab=repositories;;https://zhanglijun95.github.io/resume/;https://people.cs.umass.edu/~brun/;https://guanh01.github.io/", "dblp": "277/6382.html;;;;51/1518.html;77/6645-1.html", "google_scholar": "QdPrnDgAAAAJ;;;;https://scholar.google.com.tw/citations?user=YVfr3wwAAAAJ;L2P0jCsAAAAJ", "orcid": "0000-0003-4325-1072;;;;0000-0003-3027-7986;0000-0001-9128-2231", "linkedin": "kunjal-panchal/;nisargparikh-/;;;;", "or_profile": "~Kunjal_Panchal1;~Nisarg_Parikh1;~Sunav_Choudhary1;~Lijun_Zhang4;~Yuriy_Brun1;~Hui_Guan1", "aff": "University of Massachusetts at Amherst;Northeastern University;;University of Massachusetts, Amherst;University of Massachusetts Amherst;University of Massachusetts, Amherst", "aff_domain": "cs.umass.edu;neu.edu;;umass.edu;umass.edu;umass.edu", "position": "PhD student;MS student;;PhD student;Professor;Assistant Professor", "bibtex": "@inproceedings{\npanchal2024thinking,\ntitle={Thinking Forward: Memory-Efficient Federated Finetuning of Language Models},\nauthor={Kunjal Panchal and Nisarg Parikh and Sunav Choudhary and Lijun Zhang and Yuriy Brun and Hui Guan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=dGQtja9X2C}\n}", "github": "", "reviewers": "sp7C;RCwU;KHem", "pdf_size": 1718719, "rating": "5;6;7", "confidence": "2;3;3", "soundness": "2;2;3", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "122;66;130", "wc_strengths": "64;96;42", "wc_weaknesses": "165;169;36", "wc_questions": "15;163;106", "wc_limitations": "1;15;10", "wc_review": "367;509;324", "wc_reply_reviewers": "39;14;14", "wc_reply_authors": "35;31;35", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 106.0, 28.472208672083497 ], "wc_strengths_avg": [ 67.33333333333333, 22.17105219775452 ], "wc_weaknesses_avg": [ 123.33333333333333, 61.77557949725946 ], "wc_questions_avg": [ 94.66666666666667, 60.94988834189025 ], "wc_limitations_avg": [ 8.666666666666666, 5.792715732327589 ], "wc_review_avg": [ 400.0, 79.04850831398824 ], "wc_reply_reviewers_avg": [ 22.333333333333332, 11.785113019775793 ], "wc_reply_authors_avg": [ 33.666666666666664, 1.8856180831641267 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14012649217024523534&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "cs.umass.edu;neu.edu;;umass.edu;umass.edu;umass.edu", "author_num": 6, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "University of Massachusetts Amherst;Northeastern University", "aff_unique_dep": ";", "aff_unique_url": "https://www.umass.edu;https://www.northeastern.edu", "aff_unique_abbr": "UMass Amherst;NEU", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Amherst;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "From Linear to Linearizable Optimization: A Novel Framework with Applications to Stationary and Non-stationary DR-submodular Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94350", "id": "dGaMSMeeF8", "proceeding": "", "pdf": "https://openreview.net/pdf?id=dGaMSMeeF8", "openreview": "https://openreview.net/forum?id=dGaMSMeeF8", "poster": "/media/PosterPDFs/NeurIPS%202024/94350.png?t=1733849775.5535893", "project": "", "author_site": "Mohammad Pedramfar, Vaneet Aggarwal", "tldr": "", "abstract": "This paper introduces the notion of upper-linearizable/quadratizable functions, a class that extends concavity and DR-submodularity in various settings, including monotone and non-monotone cases over different types of convex sets. A general meta-algorithm is devised to convert algorithms for linear/quadratic maximization into ones that optimize upper-linearizable/quadratizable functions, offering a unified approach to tackling concave and DR-submodular optimization problems. The paper extends these results to multiple feedback settings, facilitating conversions between semi-bandit/first-order feedback and bandit/zeroth-order feedback, as well as between first/zeroth-order feedback and semi-bandit/bandit feedback. Leveraging this framework, new algorithms are derived using existing results as base algorithms for convex optimization, improving upon state-of-the-art results in various cases. Dynamic and adaptive regret guarantees are obtained for DR-submodular maximization, marking the first algorithms to achieve such guarantees in these settings. Notably, the paper achieves these advancements with fewer assumptions compared to existing state-of-the-art results, underscoring its broad applicability and theoretical contributions to non-convex optimization.", "keywords": "convex optimization;DR-submodular optimization", "primary_area": "optimization", "supplementary_material": "", "author": "Mohammad Pedramfar;Vaneet Aggarwal", "authorids": "~Mohammad_Pedramfar1;~Vaneet_Aggarwal1", "gender": ";M", "homepage": ";", "dblp": ";91/6560", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "~Mohammad_Pedramfar1;~Vaneet_Aggarwal1", "aff": ";Purdue University", "aff_domain": ";purdue.edu", "position": ";Full Professor", "bibtex": "@inproceedings{\npedramfar2024from,\ntitle={From Linear to Linearizable Optimization: A Novel Framework with Applications to Stationary and Non-stationary {DR}-submodular Optimization},\nauthor={Mohammad Pedramfar and Vaneet Aggarwal},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=dGaMSMeeF8}\n}", "github": "", "reviewers": "eLhz;VK3w;Rpsk", "pdf_size": 593500, "rating": "5;6;8", "confidence": "2;1;2", "soundness": "3;3;4", "novelty": "2;3;4", "presentation": "3;2;3", "wc_summary": "74;54;109", "wc_strengths": "99;46;129", "wc_weaknesses": "96;47;15", "wc_questions": "1;5;38", "wc_limitations": "1;5;1", "wc_review": "271;157;292", "wc_reply_reviewers": "16;9;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;0", "reply_authors": "1;1;1", "rating_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 1.6666666666666667, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 79.0, 22.73030282830976 ], "wc_strengths_avg": [ 91.33333333333333, 34.315529364349835 ], "wc_weaknesses_avg": [ 52.666666666666664, 33.30999182761166 ], "wc_questions_avg": [ 14.666666666666666, 16.579773487261185 ], "wc_limitations_avg": [ 2.3333333333333335, 1.8856180831641267 ], "wc_review_avg": [ 240.0, 59.312730505347666 ], "wc_reply_reviewers_avg": [ 8.333333333333334, 6.548960901462833 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.18898223650461365, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18130192826475902201&as_sdt=8005&sciodt=0,7&hl=en", "gs_version_total": 4, "email": ";purdue.edu", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Purdue University", "aff_unique_dep": "", "aff_unique_url": "https://www.purdue.edu", "aff_unique_abbr": "Purdue", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "UMFC: Unsupervised Multi-Domain Feature Calibration for Vision-Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94349", "id": "dHIKahbV6G", "proceeding": "", "pdf": "https://openreview.net/pdf?id=dHIKahbV6G", "openreview": "https://openreview.net/forum?id=dHIKahbV6G", "poster": "/media/PosterPDFs/NeurIPS%202024/94349.png?t=1731638717.0917575", "project": "", "author_site": "Jiachen Liang, RuiBing Hou, Minyang Hu, Hong Chang, Shiguang Shan, Xilin Chen", "tldr": "", "abstract": "Pre-trained vision-language models (e.g., CLIP) have shown powerful zero-shot transfer capabilities. But they still struggle with domain shifts and typically require labeled data to adapt to downstream tasks, which could be costly. In this work, we aim to leverage unlabeled data that naturally spans multiple domains to enhance the transferability of vision-language models. Under this unsupervised multi-domain setting, we have identified inherent model bias within CLIP, notably in its visual and text encoders. Specifically, we observe that CLIP\u2019s visual encoder tends to prioritize encoding domain over discriminative category information, meanwhile its text encoder exhibits a preference for domain-relevant classes. To mitigate this model bias, we propose a training-free and label-free feature calibration method, Unsupervised Multi-domain Feature Calibration (UMFC). UMFC estimates image-level biases from domain-specific features and text-level biases from the direction of domain transition. These biases are subsequently subtracted from original image and text features separately, to render them domain-invariant. We evaluate our method on multiple settings including transductive learning and test-time adaptation. Extensive experiments show that our method outperforms CLIP and performs on par with the state-of-the-arts that need additional annotations or optimization.\nOur code is available at https://github.com/GIT-LJc/UMFC.", "keywords": "model calibration;test-time adaptation;CLIP;multi-domain", "primary_area": "machine_vision", "supplementary_material": "", "author": "Jiachen Liang;RuiBing Hou;Minyang Hu;Hong Chang;Shiguang Shan;Xilin Chen", "authorids": "~Jiachen_Liang2;~RuiBing_Hou1;~Minyang_Hu1;~Hong_Chang1;~Shiguang_Shan2;~Xilin_Chen1", "gender": "F;F;M;F;M;M", "homepage": "https://scholar.google.com.tw/citations?hl=zh-CN&view_op=list_works&gmla=AJsN-F4WR9JNhX176XHhFNyY-uOoxAkSKlLh3wyOGdmZz2O2DsHREcsE__DkYGPfqQhe6mOj1V68pipS94MgujusUy5PRDZ1ZgpejCjdktRkQtfpffMKWW0&user=VfS4cisAAAAJ;;http://vipl.ict.ac.cn/people/sgshan/;http://vipl.ict.ac.cn/edu/student/doctoral/202211/t20221118_123501.html;;http://vipl.ict.ac.cn/people/_xlchen/", "dblp": ";;s/ShiguangShan;;325/1940;c/XilinChen", "google_scholar": ";LX6MnNsAAAAJ;https://scholar.google.com.tw/citations?user=Vkzd7MIAAAAJ;https://scholar.google.com/citations?hl=zh-CN;6Saa1ugAAAAJ;vVx2v20AAAAJ", "orcid": ";;0000-0002-8348-392X;;;0000-0003-3024-4404", "linkedin": ";;;;;", "or_profile": "~RuiBing_Hou1;~Hong_Chang1;~Shiguang_Shan2;~Jc_Liang1;~Hu_Minyang1;~Xilin_Chen4", "aff": " Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;, Chinese Academy of Sciences;, Chinese Academy of Sciences;Institute of Computing Technology", "aff_domain": "ict.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn", "position": "Assistant Professor;Full Professor;Full Professor;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nliang2024umfc,\ntitle={{UMFC}: Unsupervised Multi-Domain Feature Calibration for Vision-Language Models},\nauthor={Jiachen Liang and RuiBing Hou and Minyang Hu and Hong Chang and Shiguang Shan and Xilin Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=dHIKahbV6G}\n}", "github": "", "reviewers": "cRCJ;jj75;hYxJ;Xgmy;HbLd", "pdf_size": 3091467, "rating": "3;4;6;6;6", "confidence": "4;3;4;2;3", "soundness": "2;3;3;2;3", "novelty": "2;2;3;3;3", "presentation": "2;3;3;3;2", "wc_summary": "47;71;38;116;66", "wc_strengths": "17;67;72;72;14", "wc_weaknesses": "427;129;97;61;139", "wc_questions": "2;5;20;3;2", "wc_limitations": "5;1;1;1;1", "wc_review": "498;273;228;253;222", "wc_reply_reviewers": "293;0;27;21;209", "wc_reply_authors": "1437;31;166;31;699", "reply_reviewers": "1;0;1;1;3", "reply_authors": "4;2;2;2;3", "rating_avg": [ 5.0, 1.2649110640673518 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 67.6, 27.045147439050872 ], "wc_strengths_avg": [ 48.4, 26.941417928535238 ], "wc_weaknesses_avg": [ 170.6, 131.0718886718277 ], "wc_questions_avg": [ 6.4, 6.887670143089026 ], "wc_limitations_avg": [ 1.8, 1.6000000000000003 ], "wc_review_avg": [ 294.8, 103.22286568391715 ], "wc_reply_reviewers_avg": [ 110.0, 118.490505948789 ], "wc_reply_authors_avg": [ 472.8, 541.3591783649742 ], "reply_reviewers_avg": [ 1.2, 0.9797958971132713 ], "reply_authors_avg": [ 2.6, 0.8 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.42257712736425823, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10191104937775867258&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "ict.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;0;1", "aff_unique_norm": "Chinese Academy of Sciences;Institute of Computing Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.cas.cn;http://www.ict.ac.cn", "aff_unique_abbr": "CAS;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Towards the Dynamics of a DNN Learning Symbolic Interactions", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94348", "id": "dIHXwKjXRE", "proceeding": "", "pdf": "https://openreview.net/pdf?id=dIHXwKjXRE", "openreview": "https://openreview.net/forum?id=dIHXwKjXRE", "poster": "/media/PosterPDFs/NeurIPS%202024/94348.png?t=1732944774.8439882", "project": "", "author_site": "Qihan Ren, Junpeng Zhang, Yang Xu, Yue Xin, Dongrui Liu, Quanshi Zhang", "tldr": "", "abstract": "This study proves the two-phase dynamics of a deep neural network (DNN) learning interactions. Despite the long disappointing view of the faithfulness of post-hoc explanation of a DNN, a series of theorems have been proven [27] in recent years to show that for a given input sample, a small set of interactions between input variables can be considered as primitive inference patterns that faithfully represent a DNN's detailed inference logic on that sample. Particularly, Zhang et al. [41] have observed that various DNNs all learn interactions of different complexities in two distinct phases, and this two-phase dynamics well explains how a DNN changes from under-fitting to over-fitting. Therefore, in this study, we mathematically prove the two-phase dynamics of interactions, providing a theoretical mechanism for how the generalization power of a DNN changes during the training process. Experiments show that our theory well predicts the real dynamics of interactions on different DNNs trained for various tasks.", "keywords": "deep learning theory;learning theory;knowledge representation", "primary_area": "learning_theory", "supplementary_material": "", "author": "Qihan Ren;Junpeng Zhang;Yang Xu;Yue Xin;Dongrui Liu;Quanshi Zhang", "authorids": "~Qihan_Ren1;~Junpeng_Zhang3;~Yang_Xu20;~Yue_Xin1;~Dongrui_Liu1;~Quanshi_Zhang1", "gender": "M;;M;M;M;M", "homepage": "https://nebularaid2000.github.io/;https://qianqianshallow.github.io/;https://superposition09m.github.io/;https://yuexin.netlify.app/;https://shenqildr.github.io/;http://qszhang.com", "dblp": "268/5838;;;;199/9200.html;http://dblp.uni-trier.de/pers/hd/z/Zhang:Quanshi", "google_scholar": "ybTy_DwAAAAJ;;https://scholar.google.com/citations?hl=en;AAMp54AAAAAJ;https://scholar.google.com.hk/citations?hl=zh-CN;iFFhHK0AAAAJ", "orcid": ";;;0009-0009-9230-0804;0000-0003-0087-1124;", "linkedin": ";;;;;", "or_profile": "~Qihan_Ren1;~Junpeng_Zhang3;~Yang_Xu20;~Yue_Xin1;~Dongrui_Liu1;~Quanshi_Zhang1", "aff": "Shanghai Jiaotong University;Sun Yat-Sen University;Zhejiang University;Shanghai Jiaotong University;Shanghai Artificial Intelligence Laboratory;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;mail2.sysu.edu.cn;zju.edu.cn;sjtu.edu.cn;pjlab.org.cn;sjtu.edu.cn", "position": "PhD student;Undergrad student;Undergrad student;MS student;Researcher;Associate Professor", "bibtex": "@inproceedings{\nren2024towards,\ntitle={Towards the Dynamics of a {DNN} Learning Symbolic Interactions},\nauthor={Qihan Ren and Junpeng Zhang and Yang Xu and Yue Xin and Dongrui Liu and Quanshi Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=dIHXwKjXRE}\n}", "github": "", "reviewers": "iitK;bep2;xnzG", "pdf_size": 2136992, "rating": "5;7;8", "confidence": "2;3;3", "soundness": "2;3;3", "novelty": "2;2;3", "presentation": "1;3;3", "wc_summary": "42;52;82", "wc_strengths": "22;51;39", "wc_weaknesses": "60;13;26", "wc_questions": "42;52;93", "wc_limitations": "1;28;6", "wc_review": "167;196;246", "wc_reply_reviewers": "84;37;29", "wc_reply_authors": "33;26;44", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.666666666666667, 1.247219128924647 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.9428090415820634 ], "wc_summary_avg": [ 58.666666666666664, 16.99673171197595 ], "wc_strengths_avg": [ 37.333333333333336, 11.897712198383164 ], "wc_weaknesses_avg": [ 33.0, 19.8158185969358 ], "wc_questions_avg": [ 62.333333333333336, 22.065558884580486 ], "wc_limitations_avg": [ 11.666666666666666, 11.728408057172787 ], "wc_review_avg": [ 203.0, 32.629230249374054 ], "wc_reply_reviewers_avg": [ 50.0, 24.26245384677046 ], "wc_reply_authors_avg": [ 34.333333333333336, 7.408703590297623 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.9449111825230683, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18005156311805656827&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 3, "email": "sjtu.edu.cn;mail2.sysu.edu.cn;zju.edu.cn;sjtu.edu.cn;pjlab.org.cn;sjtu.edu.cn", "author_num": 6, "aff_unique_index": "0;1;2;0;3;0", "aff_unique_norm": "Shanghai Jiao Tong University;Sun Yat-sen University;Zhejiang University;Shanghai Artificial Intelligence Laboratory", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.sjtu.edu.cn;http://www.sysu.edu.cn/;https://www.zju.edu.cn;http://www.shailab.org/", "aff_unique_abbr": "SJTU;SYSU;ZJU;Shanghai AI Lab", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "MetaAligner: Towards Generalizable Multi-Objective Alignment of Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94347", "id": "dIVb5C0QFf", "proceeding": "", "pdf": "https://openreview.net/pdf?id=dIVb5C0QFf", "openreview": "https://openreview.net/forum?id=dIVb5C0QFf", "poster": "/media/PosterPDFs/NeurIPS%202024/94347.png?t=1731227020.784374", "project": "", "author_site": "Kailai Yang, Zhiwei Liu, Qianqian Xie, Jimin Huang, Tianlin Zhang, Sophia Ananiadou", "tldr": "", "abstract": "Recent advancements in large language models (LLMs) focus on aligning to heterogeneous human expectations and values via multi-objective preference alignment. However, existing methods are dependent on the policy model parameters, which require high-cost repetition of their alignment algorithms for each new policy model, and they cannot expand to unseen objectives due to their static alignment objectives. In this work, we propose Meta-Objective Aligner (MetaAligner), the first policy-agnostic and generalizable method for multi-objective preference alignment.\nMetaAligner models multi-objective alignment into three stages: (1) dynamic objectives reformulation algorithm reorganizes traditional alignment datasets to supervise the model on performing flexible alignment across different objectives; (2) conditional weak-to-strong correction paradigm aligns the weak outputs of fixed policy models to approach strong outputs with higher preferences in the corresponding alignment objectives, enabling plug-and-play inferences on any policy models, which significantly reduces training costs and facilitates alignment on close-source policy models; (3) generalizable inference method flexibly adjusts target objectives by updating their text descriptions in the prompts, facilitating generalizable alignment to unseen objectives.\nExperimental results show that MetaAligner achieves significant and balanced improvements in multi-objective alignments on 10 state-of-the-art policy models, and saves up to 93.63% of GPU training hours compared to previous alignment methods. The model also effectively aligns unseen objectives, marking the first step towards generalizable multi-objective preference alignment.", "keywords": "Language Models;Multi-objective Alignment;Preference Optimization", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Kailai Yang;Zhiwei Liu;Qianqian Xie;Jimin Huang;Tianlin Zhang;Sophia Ananiadou", "authorids": "~Kailai_Yang1;~Zhiwei_Liu5;~Qianqian_Xie1;~Jimin_Huang1;~Tianlin_Zhang1;~Sophia_Ananiadou1", "gender": "M;M;F;M;M;F", "homepage": "https://stevekgyang.github.io/;https://lzw108.github.io/;;;http://www.zhangtianlin.top/;http://www.manchester.ac.uk/research/Sophia.ananiadou/", "dblp": "277/3317;;;163/4119;;47/4142", "google_scholar": "df4H1aQAAAAJ;gfqqbIwAAAAJ;UYW7X_0AAAAJ;SnQ_CycAAAAJ;Yy88kOoAAAAJ;https://scholar.google.com.tw/citations?user=quhi-K0AAAAJ", "orcid": "0000-0003-3142-2516;0000-0002-7015-5054;0000-0002-9588-7454;0000-0002-3501-3907;0000-0003-0843-1916;0000-0002-4097-9191", "linkedin": ";zhiwei-liu-63727a220/;;;;sophia-ananiadou-ba98b63/", "or_profile": "~Kailai_Yang1;~Zhiwei_Liu5;~Qianqian_Xie1;~Jimin_Huang1;~Tianlin_Zhang1;~Sophia_Ananiadou1", "aff": "University of Manchester;University of Manchester;Yale University;The Fin AI;University of Manchester;University of Manchester", "aff_domain": "cs.manchester.ac.uk;cs.manchester.ac.uk;yale.edu;thefin.ai;manchester.ac.uk;manchester.ac.uk", "position": "PhD student;PhD student;Postdoc;Principal Researcher;PhD student;Full Professor", "bibtex": "@inproceedings{\nyang2024metaaligner,\ntitle={MetaAligner: Towards Generalizable Multi-Objective Alignment of Language Models},\nauthor={Kailai Yang and Zhiwei Liu and Qianqian Xie and Jimin Huang and Tianlin Zhang and Sophia Ananiadou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=dIVb5C0QFf}\n}", "github": "", "reviewers": "XHXG;E7pS;i5Yu;vbsw", "pdf_size": 13787465, "rating": "3;6;6;7", "confidence": "5;4;4;4", "soundness": "2;3;2;3", "novelty": "2;3;3;2", "presentation": "3;3;3;3", "wc_summary": "67;16;15;122", "wc_strengths": "31;29;63;71", "wc_weaknesses": "95;297;98;262", "wc_questions": "9;2;276;54", "wc_limitations": "1;1;21;8", "wc_review": "203;345;473;517", "wc_reply_reviewers": "103;37;52;120", "wc_reply_authors": "378;101;73;282", "reply_reviewers": "1;1;1;2", "reply_authors": "3;3;2;2", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 55.0, 44.02839992550263 ], "wc_strengths_avg": [ 48.5, 18.728320800328042 ], "wc_weaknesses_avg": [ 188.0, 92.33904916122972 ], "wc_questions_avg": [ 85.25, 111.92268536807005 ], "wc_limitations_avg": [ 7.75, 8.166241485530538 ], "wc_review_avg": [ 384.5, 122.36318890908328 ], "wc_reply_reviewers_avg": [ 78.0, 34.44560929929967 ], "wc_reply_authors_avg": [ 208.5, 126.5395195186073 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.9622504486493763, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13438162666525277483&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "cs.manchester.ac.uk;cs.manchester.ac.uk;yale.edu;thefin.ai;manchester.ac.uk;manchester.ac.uk", "author_num": 6, "aff_unique_index": "0;0;1;2;0;0", "aff_unique_norm": "University of Manchester;Yale University;Fin AI", "aff_unique_dep": ";;", "aff_unique_url": "https://www.manchester.ac.uk;https://www.yale.edu;https://www.thefinai.com", "aff_unique_abbr": "UoM;Yale;Fin AI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1;0;0", "aff_country_unique": "United Kingdom;United States" }, { "title": "Dissecting Query-Key Interaction in Vision Transformers", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94346", "id": "dIktpSgK4F", "proceeding": "", "pdf": "https://openreview.net/pdf?id=dIktpSgK4F", "openreview": "https://openreview.net/forum?id=dIktpSgK4F", "poster": "", "project": "", "author_site": "Xu Pan, Aaron Philip, Ziqian Xie, Odelia Schwartz", "tldr": "", "abstract": "Self-attention in vision transformers is often thought to perform perceptual grouping where tokens attend to other tokens with similar embeddings, which could correspond to semantically similar features of an object. However, attending to dissimilar tokens can be beneficial by providing contextual information. We propose to analyze the query-key interaction by the singular value decomposition of the interaction matrix (i.e. ${\\textbf{W}_q}^\\top\\textbf{W}_k$). We find that in many ViTs, especially those with classification training objectives, early layers attend more to similar tokens, while late layers show increased attention to dissimilar tokens, providing evidence corresponding to perceptual grouping and contextualization, respectively. Many of these interactions between features represented by singular vectors are interpretable and semantic, such as attention between relevant objects, between parts of an object, or between the foreground and background. This offers a novel perspective on interpreting the attention mechanism, which contributes to understanding how transformer models utilize context and salient features when processing images.", "keywords": "Vision transformer;Interpretability;Contextualization;Self-attention", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Xu Pan;Aaron Philip;Ziqian Xie;Odelia Schwartz", "authorids": "~Xu_Pan2;~Aaron_Philip1;~Ziqian_Xie1;~Odelia_Schwartz1", "gender": ";M;;F", "homepage": "https://xupan.org/;;;http://www.cs.miami.edu/~odelia/", "dblp": "172/9822;;;", "google_scholar": "y-DixhMAAAAJ;;;https://scholar.google.com.tw/citations?user=3prQpXgAAAAJ", "orcid": ";0000-0002-1444-6869;;", "linkedin": ";;;", "or_profile": "~Xu_Pan2;~Aaron_Philip1;~Ziqian_Xie1;~Odelia_Schwartz1", "aff": "University of Miami;Michigan State University;;University of Miami, University of Miami", "aff_domain": "miami.edu;msu.edu;;cs.miami.edu", "position": "PhD student;Undergrad student;;Associate Professor", "bibtex": "@inproceedings{\npan2024dissecting,\ntitle={Dissecting Query-Key Interaction in Vision Transformers},\nauthor={Xu Pan and Aaron Philip and Ziqian Xie and Odelia Schwartz},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=dIktpSgK4F}\n}", "github": "", "reviewers": "Quv7;EcHv;PNQ8;LpZi", "pdf_size": 47417694, "rating": "3;7;7;8", "confidence": "4;4;4;5", "soundness": "2;4;3;3", "novelty": "2;2;3;4", "presentation": "2;3;3;4", "wc_summary": "91;120;91;74", "wc_strengths": "37;68;147;117", "wc_weaknesses": "152;253;195;78", "wc_questions": "47;106;1;53", "wc_limitations": "11;13;41;12", "wc_review": "338;560;475;334", "wc_reply_reviewers": "0;300;69;30", "wc_reply_authors": "0;779;96;5", "reply_reviewers": "0;2;1;1", "reply_authors": "1;3;2;2", "rating_avg": [ 6.25, 1.920286436967152 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 94.0, 16.537835408541227 ], "wc_strengths_avg": [ 92.25, 42.57566793369189 ], "wc_weaknesses_avg": [ 169.5, 63.83768479511142 ], "wc_questions_avg": [ 51.75, 37.22482370676858 ], "wc_limitations_avg": [ 19.25, 12.577261228105266 ], "wc_review_avg": [ 426.75, 95.6069427395312 ], "wc_reply_reviewers_avg": [ 99.75, 118.17439443466593 ], "wc_reply_authors_avg": [ 220.0, 324.9930768493384 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5261522196019801, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17792468148461853333&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "miami.edu;msu.edu;;cs.miami.edu", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Miami;Michigan State University", "aff_unique_dep": ";", "aff_unique_url": "https://www.miami.edu;https://www.msu.edu", "aff_unique_abbr": "UM;MSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Neural Model Checking", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94345", "id": "dJ9KzkQ0oH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=dJ9KzkQ0oH", "openreview": "https://openreview.net/forum?id=dJ9KzkQ0oH", "poster": "/media/PosterPDFs/NeurIPS%202024/94345.png?t=1731338246.1234937", "project": "", "author_site": "Mirco Giacobbe, Daniel Kroening, Abhinandan Pal, Michael Tautschnig", "tldr": "", "abstract": "We introduce a machine learning approach to model checking temporal logic, with application to formal hardware verification. Model checking answers the question of whether every execution of a given system satisfies a desired temporal logic specification. Unlike testing, model checking provides formal guarantees. Its application is expected standard in silicon design and the EDA industry has invested decades into the development of performant symbolic model checking algorithms. Our new approach combines machine learning and symbolic reasoning by using neural networks as formal proof certificates for linear temporal logic. We train our neural certificates from randomly generated executions of the system and we then symbolically check their validity using satisfiability solving which, upon the affirmative answer, establishes that the system provably satisfies the specification. We leverage the expressive power of neural networks to represent proof certificates as well as the fact that checking a certificate is much simpler than finding one. As a result, our machine learning procedure for model checking is entirely unsupervised, formally sound, and practically effective. We experimentally demonstrate that our method outperforms the state-of-the-art academic and commercial model checkers on a set of standard hardware designs written in SystemVerilog.", "keywords": "Formal Verification;SystemVerilog;Temporal Logic;Neuro-symbolic AI;Neural Certificates", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "/attachment/211466574426d953242128c43fc8a5d7d66e78fd.zip", "author": "Mirco Giacobbe;Daniel Kroening;Abhinandan Pal;Michael Tautschnig", "authorids": "~Mirco_Giacobbe1;~Daniel_Kroening1;~Abhinandan_Pal1;~Michael_Tautschnig1", "gender": ";;M;", "homepage": "https://mircogiacobbe.github.io/;https://www.kroening.com;https://www.abhinandanpal.co.in/;https://tautschnig.net", "dblp": "134/7846;k/DanielKroening;;", "google_scholar": ";https://scholar.google.co.uk/citations?user=DHddutUAAAAJ;w7_c_0YAAAAJ;", "orcid": ";0000-0002-6681-5283;;", "linkedin": ";kroening/?originalSubdomain=uk;;", "or_profile": "~Mirco_Giacobbe1;~Daniel_Kroening1;~Abhinandan_Pal1;~Michael_Tautschnig1", "aff": ";Amazon;University of Birmingham;Queen Mary, University of London", "aff_domain": ";amazon.com;bham.ac.uk;qmul.ac.uk", "position": ";Senior Principal Scientist;PhD student;Lecturer", "bibtex": "@inproceedings{\ngiacobbe2024neural,\ntitle={Neural Model Checking},\nauthor={Mirco Giacobbe and Daniel Kroening and Abhinandan Pal and Michael Tautschnig},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=dJ9KzkQ0oH}\n}", "github": "", "reviewers": "GhN5;U2Dq;ST48;2gkW", "pdf_size": 827360, "rating": "5;5;5;7", "confidence": "3;4;4;4", "soundness": "3;3;3;3", "novelty": "3;2;3;4", "presentation": "3;3;3;3", "wc_summary": "109;182;60;171", "wc_strengths": "28;105;37;151", "wc_weaknesses": "54;347;217;120", "wc_questions": "12;300;10;107", "wc_limitations": "25;20;12;47", "wc_review": "228;954;336;596", "wc_reply_reviewers": "15;11;31;14", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 130.5, 49.30770730829005 ], "wc_strengths_avg": [ 80.25, 50.543916547889324 ], "wc_weaknesses_avg": [ 184.5, 110.28712526854619 ], "wc_questions_avg": [ 107.25, 117.98596314816437 ], "wc_limitations_avg": [ 26.0, 12.98075498574717 ], "wc_review_avg": [ 528.5, 279.71548044396826 ], "wc_reply_reviewers_avg": [ 17.75, 7.790218225441442 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=398455374190297758&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "email": ";amazon.com;bham.ac.uk;qmul.ac.uk", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "Amazon;University of Birmingham;Queen Mary, University of London", "aff_unique_dep": "Amazon.com, Inc.;;", "aff_unique_url": "https://www.amazon.com;https://www.birmingham.ac.uk;https://www.qmul.ac.uk", "aff_unique_abbr": "Amazon;Birmingham;QMUL", "aff_campus_unique_index": "1", "aff_campus_unique": ";London", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United States;United Kingdom" }, { "title": "Constrained Diffusion with Trust Sampling", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94344", "id": "dJUb9XRoZI", "proceeding": "", "pdf": "https://openreview.net/pdf?id=dJUb9XRoZI", "openreview": "https://openreview.net/forum?id=dJUb9XRoZI", "poster": "/media/PosterPDFs/NeurIPS%202024/94344.png?t=1731753386.162434", "project": "", "author_site": "William Huang, Yifeng Jiang, Tom Van Wouwe, Karen Liu", "tldr": "", "abstract": "Diffusion models have demonstrated significant promise in various generative tasks; however, they often struggle to satisfy challenging constraints. Our approach addresses this limitation by rethinking training-free loss-guided diffusion from an optimization perspective. We formulate a series of constrained optimizations throughout the inference process of a diffusion model. In each optimization, we allow the sample to take multiple steps along the gradient of the proxy constraint function until we can no longer trust the proxy, according to the variance at each diffusion level. Additionally, we estimate the state manifold of diffusion model to allow for early termination when the sample starts to wander away from the state manifold at each diffusion step. Trust sampling effectively balances between following the unconditional diffusion model and adhering to the loss guidance, enabling more flexible and accurate constrained generation. We demonstrate the efficacy of our method through extensive experiments on complex tasks, and in drastically different domains of images and 3D motion generation, showing significant improvements over existing methods in terms of generation quality. Our implementation is available at https://github.com/will-s-h/trust-sampling.", "keywords": "diffusion models;guidance;image generation;human motion", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/5a52d8161e9784b5b068b60f664e8e1abcbf36fc.zip", "author": "William Huang;Yifeng Jiang;Tom Van Wouwe;Karen Liu", "authorids": "~William_Huang3;~Yifeng_Jiang1;~Tom_Van_Wouwe1;~Karen_Liu1", "gender": ";;M;", "homepage": "https://willhuang.me;;;https://cs.stanford.edu/~karenliu", "dblp": ";;;", "google_scholar": "-WRy6NkAAAAJ;;;i28fU0MAAAAJ", "orcid": ";;;0000-0001-5926-0905", "linkedin": "william-s-huang/;;tom-van-wouwe-9b5545a7/;", "or_profile": "~William_Huang3;~Yifeng_Jiang1;~Tom_Van_Wouwe1;~Karen_Liu1", "aff": "Stanford University;;Stanford University;Computer Science Department, Stanford University", "aff_domain": "stanford.edu;;stanford.edu;cs.stanford.edu", "position": "Undergrad student;;Postdoc;Full Professor", "bibtex": "@inproceedings{\nhuang2024constrained,\ntitle={Constrained Diffusion with Trust Sampling},\nauthor={William Huang and Yifeng Jiang and Tom Van Wouwe and Karen Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=dJUb9XRoZI}\n}", "github": "", "reviewers": "Pivm;bnFv;rJ33;wQPN", "pdf_size": 47171298, "rating": "5;5;5;5", "confidence": "4;5;3;4", "soundness": "2;3;2;3", "novelty": "2;3;2;2", "presentation": "3;4;3;3", "wc_summary": "74;225;65;128", "wc_strengths": "37;113;23;106", "wc_weaknesses": "141;118;173;271", "wc_questions": "2;3;146;189", "wc_limitations": "8;15;69;44", "wc_review": "262;474;476;738", "wc_reply_reviewers": "71;23;17;26", "wc_reply_authors": "411;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "3;1;1;1", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 123.0, 63.62782410235321 ], "wc_strengths_avg": [ 69.75, 40.13337140086788 ], "wc_weaknesses_avg": [ 175.75, 58.35826848013913 ], "wc_questions_avg": [ 85.0, 83.88980867781258 ], "wc_limitations_avg": [ 34.0, 24.300205760445735 ], "wc_review_avg": [ 487.5, 168.75648135701337 ], "wc_reply_reviewers_avg": [ 34.25, 21.46363203188128 ], "wc_reply_authors_avg": [ 102.75, 177.96822047770215 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6956992638226049458&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "stanford.edu;;stanford.edu;cs.stanford.edu", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "QUEST: Quality-Aware Metropolis-Hastings Sampling for Machine Translation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94343", "id": "dLnduWGTB4", "proceeding": "", "pdf": "https://openreview.net/pdf?id=dLnduWGTB4", "openreview": "https://openreview.net/forum?id=dLnduWGTB4", "poster": "/media/PosterPDFs/NeurIPS%202024/94343.png?t=1730242330.422624", "project": "", "author_site": "Gon\u00e7alo Faria, Sweta Agrawal, Ant\u00f3nio Farinhas, Ricardo Rei, Jos\u00e9 de Souza, Andr\u00e9 Martins", "tldr": "", "abstract": "An important challenge in machine translation (MT) is to generate high-quality and diverse translations. \nPrior work has shown that the estimated likelihood from the MT model correlates poorly with translation quality. \nIn contrast, quality evaluation metrics (such as COMET or BLEURT) exhibit high correlations with human judgments, which has motivated their use as rerankers (such as quality-aware and minimum Bayes risk decoding). However, relying on a single translation with high estimated quality increases the chances of \"gaming the metric''. \nIn this paper, we address the problem of sampling a set of high-quality and diverse translations. \nWe provide a simple and effective way to avoid over-reliance on noisy quality estimates by using them as the energy function of a Gibbs distribution. Instead of looking for a mode in the distribution, we generate multiple samples from high-density areas through the Metropolis-Hastings algorithm, a simple Markov chain Monte Carlo approach. \n The results show that our proposed method leads to high-quality and diverse outputs across multiple language pairs (English$\\leftrightarrow$\\{German, Russian\\}) with two strong decoder-only LLMs (Alma-7b, Tower-7b).", "keywords": "Machine Translation;Decoding;Quality Estimation", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Gon\u00e7alo Faria;Sweta Agrawal;Ant\u00f3nio Farinhas;Ricardo Rei;Jos\u00e9 G. C. de Souza;Andre Martins", "authorids": "~Gon\u00e7alo_Faria1;~Sweta_Agrawal1;~Ant\u00f3nio_Farinhas1;~Ricardo_Rei1;~Jos\u00e9_G._C._de_Souza1;~Andre_Martins1", "gender": "F;M;M;M;M;M", "homepage": "https://sweta20.github.io/;;https://andre-martins.github.io/;https://www.goncalofaria.com;;", "dblp": "210/7863.html;267/5345;m/AndreFTMartins;314/9619;66/1087;72/3176", "google_scholar": "Avsw9IkAAAAJ;yK5wIPkAAAAJ;https://scholar.google.pt/citations?user=mT7ppvwAAAAJ;caTuELoAAAAJ;20ApDosAAAAJ;jf4S4tsAAAAJ", "orcid": ";;;;0000-0001-6344-7633;0000-0001-8265-1939", "linkedin": ";;;goncalorafaria/;josesouza/;ricardo-rei-159154172/", "or_profile": "~Sweta_Agrawal1;~Ant\u00f3nio_Farinhas1;~Andre_Martins1;~Gon\u00e7alo_Rui_Faria1;~Jos\u00e9_Guilherme_Camargo_de_Souza2;~Ricardo_Costa_Dias_Rei1", "aff": "Instituto de Telecomunica\u00e7\u00f5es;Instituto Superior T\u00e9cnico;Unbabel;Instituto de Telecomunica\u00e7\u00f5es, Portugal;Unbabel;INESC-ID", "aff_domain": "tecnico.ulisboa.pt;tecnico.ulisboa.pt;unbabel.com;it.pt;unbabel.com;inesc-id.pt", "position": "Postdoc;PhD student;Research Scientist;Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\nfaria2024quest,\ntitle={{QUEST}: Quality-Aware Metropolis-Hastings Sampling for Machine Translation},\nauthor={Gon{\\c{c}}alo Faria and Sweta Agrawal and Ant{\\'o}nio Farinhas and Ricardo Rei and Jos{\\'e} G. C. de Souza and Andre Martins},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=dLnduWGTB4}\n}", "github": "", "reviewers": "L9No;DVBo;Pz9P;SYtr", "pdf_size": 4265681, "rating": "5;6;6;7", "confidence": "4;3;3;3", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "47;114;53;69", "wc_strengths": "13;22;37;38", "wc_weaknesses": "3;53;55;37", "wc_questions": "129;88;1;79", "wc_limitations": "1;1;1;7", "wc_review": "193;278;147;230", "wc_reply_reviewers": "0;0;16;25", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 70.75, 26.233328038966004 ], "wc_strengths_avg": [ 27.5, 10.5 ], "wc_weaknesses_avg": [ 37.0, 20.83266665599966 ], "wc_questions_avg": [ 74.25, 46.29997300215196 ], "wc_limitations_avg": [ 2.5, 2.598076211353316 ], "wc_review_avg": [ 212.0, 48.130032204435516 ], "wc_reply_reviewers_avg": [ 10.25, 10.732543966832841 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13611599774182899544&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "tecnico.ulisboa.pt;tecnico.ulisboa.pt;unbabel.com;it.pt;unbabel.com;inesc-id.pt", "author_num": 6, "aff_unique_index": "0;1;2;0;2;3", "aff_unique_norm": "Instituto de Telecomunica\u00e7\u00f5es;Instituto Superior T\u00e9cnico;Unbabel;INESC-ID", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.it.pt;https://www.ist.utl.pt;https://www.unbabel.com;https://www.inesc-id.pt", "aff_unique_abbr": ";IST;;INESC-ID", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "Portugal" }, { "title": "Learning Optimal Lattice Vector Quantizers for End-to-end Neural Image Compression", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94342", "id": "dLr4H7Uj4H", "proceeding": "", "pdf": "https://openreview.net/pdf?id=dLr4H7Uj4H", "openreview": "https://openreview.net/forum?id=dLr4H7Uj4H", "poster": "/media/PosterPDFs/NeurIPS%202024/94342.png?t=1733214352.0363734", "project": "", "author_site": "Xi Zhang, Xiaolin Wu", "tldr": "", "abstract": "It is customary to deploy uniform scalar quantization in the end-to-end optimized Neural image compression methods, instead of more powerful vector quantization, due to the high complexity of the latter. Lattice vector quantization (LVQ), on the other hand, presents a compelling alternative, which can exploit inter-feature dependencies more effectively while keeping computational efficiency almost the same as scalar quantization. However, traditional LVQ structures are designed/optimized for uniform source distributions, hence nonadaptive and suboptimal for real source distributions of latent code space for Neural image compression tasks. In this paper, we propose a novel learning method to overcome this weakness by designing the rate-distortion optimal lattice vector quantization (OLVQ) codebooks with respect to the sample statistics of the latent features to be compressed. By being able to better fit the LVQ structures to any given latent sample distribution, the proposed OLVQ method improves the rate-distortion performances of the existing quantization schemes in neural image compression significantly, while retaining the amenability of uniform scalar quantization.", "keywords": "Lattice vector quantizer;Neural image compression;Rate-distortion optimal LVQ", "primary_area": "machine_vision", "supplementary_material": "", "author": "Xi Zhang;Xiaolin Wu", "authorids": "~Xi_Zhang7;~Xiaolin_Wu2", "gender": "M;", "homepage": "https://xzhang9308.github.io/;http://www.ece.mcmaster.ca/~xwu", "dblp": "87/1222-19;w/XiaolinWu", "google_scholar": "78WvEjMAAAAJ;ZuQnEIgAAAAJ", "orcid": "0000-0002-1993-6031;", "linkedin": ";", "or_profile": "~Xi_Zhang7;~Xiaolin_Wu2", "aff": "McMaster University;McMaster University", "aff_domain": "mcmaster.ca;mcmaster.ca", "position": "Postdoc;Full Professor", "bibtex": "@inproceedings{\nzhang2024learning,\ntitle={Learning Optimal Lattice Vector Quantizers for End-to-end Neural Image Compression},\nauthor={Xi Zhang and Xiaolin Wu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=dLr4H7Uj4H}\n}", "github": "", "reviewers": "uEQ6;4Wxt;8Aj7;B5jh", "pdf_size": 1597922, "rating": "3;5;6;6", "confidence": "4;3;3;4", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "2;3;3;3", "wc_summary": "62;64;90;95", "wc_strengths": "29;78;33;73", "wc_weaknesses": "267;80;116;169", "wc_questions": "5;35;19;97", "wc_limitations": "15;80;37;4", "wc_review": "378;337;295;438", "wc_reply_reviewers": "205;181;75;0", "wc_reply_authors": "358;423;88;53", "reply_reviewers": "1;1;2;0", "reply_authors": "4;3;3;2", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 77.75, 14.872373717735847 ], "wc_strengths_avg": [ 53.25, 22.364872009470567 ], "wc_weaknesses_avg": [ 158.0, 70.44501401802685 ], "wc_questions_avg": [ 39.0, 35.12833614050059 ], "wc_limitations_avg": [ 34.0, 29.09467305195231 ], "wc_review_avg": [ 362.0, 52.787309080876625 ], "wc_reply_reviewers_avg": [ 115.25, 82.58442649797841 ], "wc_reply_authors_avg": [ 230.5, 162.11492836873475 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 3.0, 0.7071067811865476 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.40824829046386296, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:PC3R621lexoJ:scholar.google.com/&scioq=Learning+Optimal+Lattice+Vector+Quantizers+for+End-to-end+Neural+Image+Compression&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "mcmaster.ca;mcmaster.ca", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "McMaster University", "aff_unique_dep": "", "aff_unique_url": "https://www.mcmaster.ca", "aff_unique_abbr": "McMaster", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Canada" }, { "title": "Weak-to-Strong Search: Align Large Language Models via Searching over Small Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94341", "id": "dOJ6CqWDf1", "proceeding": "", "pdf": "https://openreview.net/pdf?id=dOJ6CqWDf1", "openreview": "https://openreview.net/forum?id=dOJ6CqWDf1", "poster": "", "project": "", "author_site": "Zhanhui Zhou, Zhixuan Liu, Jie Liu, Zhichen Dong, Chao Yang, Yu Qiao", "tldr": "", "abstract": "Large language models are usually fine-tuned to align with human preferences. However, fine-tuning a large language model can be challenging. In this work, we introduce $\\textit{weak-to-strong search}$, framing the alignment of a large language model as a test-time greedy search to maximize the log-probability difference between small tuned and untuned models while sampling from the frozen large model. This method serves both as (1) a compute-efficient model up-scaling strategy that avoids directly tuning the large model and as (2) an instance of weak-to-strong generalization that enhances a strong model with weak test-time guidance.\nEmpirically, we demonstrate the flexibility of weak-to-strong search across different tasks. In controlled-sentiment generation and summarization, we use tuned and untuned $\\texttt{gpt2}$s to improve the alignment of large models without additional training. Crucially, in a more difficult instruction-following benchmark, AlpacaEval 2.0, we show that reusing off-the-shelf small models (e.g., $\\texttt{zephyr-7b-beta}$ and its untuned version) can improve the length-controlled win rates of both white-box and black-box large models against $\\texttt{gpt-4-turbo}$ (e.g., $34.4\\% \\rightarrow 37.9\\%$ for $\\texttt{Llama-3-70B-Instruct}$ and $16.0\\% \\rightarrow 20.1\\%$ for $\\texttt{gpt-3.5-turbo-instruct}$), despite the small models' low win rates $\\approx 10.0\\%$.", "keywords": "large language models;alignment;weak-to-strong generalization", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/40fbad4a08fbf83674b7213cee1500416a321d26.zip", "author": "Zhanhui Zhou;Zhixuan Liu;Jie Liu;Zhichen Dong;Chao Yang;Yu Qiao", "authorids": "~Zhanhui_Zhou1;~Zhixuan_Liu2;~Jie_Liu13;~Zhichen_Dong1;~Chao_Yang3;~Yu_Qiao1", "gender": "M;M;;F;;", "homepage": "https://zhziszz.github.io/;;;https://github.com/niconi19;;", "dblp": ";;;;;", "google_scholar": "SbACfYQAAAAJ;896Q3cwAAAAJ;;;;", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Zhanhui_Zhou1;~Zhixuan_Liu2;~Jie_Liu13;~Zhichen_Dong1;~Chao_Yang3;~Yu_Qiao1", "aff": "Shanghai Artificial Intelligence Laboratory;Shanghai Artificial Intelligence Laboratory;;Shanghai Jiaotong University;;", "aff_domain": "pjlab.org.cn;pjlab.org.cn;;sjtu.edu.cn;;", "position": "Researcher;Intern;;PhD student;;", "bibtex": "@inproceedings{\nzhou2024weaktostrong,\ntitle={Weak-to-Strong Search: Align Large Language Models via Searching over Small Language Models},\nauthor={Zhanhui Zhou and Zhixuan Liu and Jie Liu and Zhichen Dong and Chao Yang and Yu Qiao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=dOJ6CqWDf1}\n}", "github": "", "reviewers": "2D8T;mz4P;ct59;Xaca", "pdf_size": 1510106, "rating": "6;6;6;7", "confidence": "4;4;3;2", "soundness": "3;3;3;3", "novelty": "3;3;2;3", "presentation": "4;3;3;3", "wc_summary": "121;92;75;64", "wc_strengths": "73;74;35;27", "wc_weaknesses": "88;146;76;87", "wc_questions": "10;3;15;84", "wc_limitations": "9;8;5;1", "wc_review": "301;323;206;263", "wc_reply_reviewers": "62;32;0;34", "wc_reply_authors": "81;6;0;6", "reply_reviewers": "2;1;0;1", "reply_authors": "3;2;1;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 88.0, 21.50581316760657 ], "wc_strengths_avg": [ 52.25, 21.44032415799724 ], "wc_weaknesses_avg": [ 99.25, 27.39867697535777 ], "wc_questions_avg": [ 28.0, 32.61134771824066 ], "wc_limitations_avg": [ 5.75, 3.112474899497183 ], "wc_review_avg": [ 273.25, 44.36425926351075 ], "wc_reply_reviewers_avg": [ 32.0, 21.95449840010015 ], "wc_reply_authors_avg": [ 23.25, 33.43183363203401 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10599004754664203100&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "pjlab.org.cn;pjlab.org.cn;;sjtu.edu.cn;;", "author_num": 6, "aff_unique_index": "0;0;1", "aff_unique_norm": "Shanghai Artificial Intelligence Laboratory;Shanghai Jiao Tong University", "aff_unique_dep": ";", "aff_unique_url": "http://www.shailab.org/;https://www.sjtu.edu.cn", "aff_unique_abbr": "Shanghai AI Lab;SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Adversarial Representation Engineering: A General Model Editing Framework for Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94340", "id": "dQ9ji8e9qQ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=dQ9ji8e9qQ", "openreview": "https://openreview.net/forum?id=dQ9ji8e9qQ", "poster": "/media/PosterPDFs/NeurIPS%202024/94340.png?t=1731506310.5756865", "project": "", "author_site": "Yihao Zhang, Zeming Wei, Jun Sun, Meng Sun", "tldr": "", "abstract": "Since the rapid development of Large Language Models (LLMs) has achieved remarkable success, understanding and rectifying their internal complex mechanisms has become an urgent issue. Recent research has attempted to interpret their behaviors through the lens of inner representation. However, developing practical and efficient methods for applying these representations for general and flexible model editing remains challenging. In this work, we explore how to leverage insights from representation engineering to guide the editing of LLMs by deploying a representation discriminator as an editing oracle. We first identify the importance of a robust and reliable discriminator during editing, then propose an \\textbf{A}dversarial \\textbf{R}epresentation \\textbf{E}ngineering (\\textbf{ARE}) framework to provide a unified and interpretable approach for conceptual model editing without compromising baseline performance. Experiments on multiple tasks demonstrate the effectiveness of ARE in various model editing scenarios. Our code and data are available at \\url{https://github.com/Zhang-Yihao/Adversarial-Representation-Engineering}.", "keywords": "Model editing;AI safety;Large language model;Trustworthy LLM", "primary_area": "generative_models", "supplementary_material": "/attachment/413fb6842b25d331278d67dbc5810364d1c6ae45.zip", "author": "Yihao Zhang;Zeming Wei;Jun Sun;Meng Sun", "authorids": "~Yihao_Zhang3;~Zeming_Wei1;~Jun_Sun12;~Meng_Sun1", "gender": "Non-Binary;M;M;M", "homepage": "https://zhang-yihao.github.io/;https://weizeming.github.io;https://sunjun.site;https://www.math.pku.edu.cn/teachers/sunm/", "dblp": ";276/6608;;81/1237-2", "google_scholar": ";Kyn1zdQAAAAJ;https://scholar.google.com.sg/citations?user=DVsEyn0AAAAJ;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Yihao_Zhang3;~Zeming_Wei1;~Jun_Sun12;~Meng_Sun1", "aff": "Peking University;University of California, Berkeley;Singapore Management University;Peking University", "aff_domain": "pku.edu.cn;berkeley.edu;smu.edu.sg;pku.edu.cn", "position": "Undergrad student;Undergrad student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nzhang2024adversarial,\ntitle={Adversarial Representation Engineering: A General Model Editing Framework for Large Language Models},\nauthor={Yihao Zhang and Zeming Wei and Jun Sun and Meng Sun},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=dQ9ji8e9qQ}\n}", "github": "", "reviewers": "Ga6W;Nm4E;w6mA", "pdf_size": 1241383, "rating": "5;7;7", "confidence": "2;4;3", "soundness": "3;4;3", "novelty": "2;3;3", "presentation": "2;3;3", "wc_summary": "106;89;74", "wc_strengths": "115;59;50", "wc_weaknesses": "118;56;62", "wc_questions": "10;32;2", "wc_limitations": "28;44;1", "wc_review": "377;280;189", "wc_reply_reviewers": "0;16;15", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 89.66666666666667, 13.072447700751718 ], "wc_strengths_avg": [ 74.66666666666667, 28.75567576825293 ], "wc_weaknesses_avg": [ 78.66666666666667, 27.920522121829233 ], "wc_questions_avg": [ 14.666666666666666, 12.684198393626966 ], "wc_limitations_avg": [ 24.333333333333332, 17.745108872274887 ], "wc_review_avg": [ 282.0, 76.7637067022344 ], "wc_reply_reviewers_avg": [ 10.333333333333334, 7.318166133366716 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13085614557315509619&as_sdt=800005&sciodt=0,15&hl=en", "gs_version_total": 2, "email": "pku.edu.cn;berkeley.edu;smu.edu.sg;pku.edu.cn", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Peking University;University of California, Berkeley;Singapore Management University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.pku.edu.cn;https://www.berkeley.edu;https://www.smu.edu.sg", "aff_unique_abbr": "Peking U;UC Berkeley;SMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;1;2;0", "aff_country_unique": "China;United States;Singapore" }, { "id": "dQmEIwRw16", "title": "Collision Cross-entropy for Soft Class Labels and Entropy-based Clustering", "track": "main", "status": "Reject", "tldr": "", "abstract": "We propose ''collision cross-entropy'' as a robust alternative to Shannon's cross-entropy (CE) loss when class labels are represented by soft categorical distributions y. In general, soft labels can naturally represent ambiguous targets in classification. They are particularly relevant for self-labeled clustering methods, where latent pseudo-labels $y$ are jointly estimated with the model parameters and uncertainty is prevalent. \nIn case of soft labels $y$, Shannon's CE teaches the model predictions $\\sigma$ \nto reproduce the uncertainty in each training example, which inhibits the model's ability to learn and generalize from these examples. As an alternative loss, we propose the negative log of ``collision probability'' that\nmaximizes the chance of equality between two random variables, predicted class and unknown true class, \nwhose distributions are $\\sigma$ and $y$. We show that it has the properties of a generalized CE. \nThe proposed collision CE agrees with Shannon's CE for one-hot labels $y$, \nbut the training from soft labels differs. For example, unlike Shannon's CE, data points where $y$ is a uniform distribution have zero contribution to the training. Collision CE significantly improves classification \nsupervised by soft uncertain targets. \nUnlike Shannon's, collision CE is symmetric for $y$ and $\\sigma$, which is particularly relevant when \nboth distributions are estimated in the context of self-labeled clustering.\nFocusing on discriminative deep clustering where self-labeling and entropy-based losses are dominant, \nwe show that the use of collision CE improves the state-of-the-art. \nWe also derive an efficient EM algorithm that significantly \nspeeds up the pseudo-label estimation with collision CE.", "keywords": "collision cross entropy;entropy-based clustering", "primary_area": "machine_vision", "supplementary_material": "", "author": "Zhongwen Zhang;Yuri Boykov", "authorids": "~Zhongwen_Zhang1;~Yuri_Boykov1", "gender": "M;M", "homepage": ";https://cs.uwaterloo.ca/~yboykov/", "dblp": "02/10655;b/YuriBoykov", "google_scholar": ";h6_PdYsAAAAJ", "orcid": ";0000-0001-6374-1736", "linkedin": ";", "or_profile": "~Zhongwen_Zhang1;~Yuri_Boykov1", "aff": "University of Waterloo;University of Waterloo", "aff_domain": "uwaterloo.ca;uwaterloo.ca", "position": "PhD student;Professor", "bibtex": "@misc{\nanonymous2024collision,\ntitle={Collision Cross-entropy for Soft Class Labels and Entropy-based Clustering},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=dQmEIwRw16}\n}", "github": "", "project": "", "reviewers": "Puk1;C7Wi;pteP;VSk7", "site": "https://openreview.net/forum?id=dQmEIwRw16", "pdf_size": 713895, "rating": "4;4;4;5", "confidence": "3;4;3;2", "soundness": "3;2;3;3", "novelty": "1;2;3;2", "presentation": "3;3;2;2", "wc_summary": "118;96;84;114", "wc_strengths": "135;148;41;55", "wc_weaknesses": "406;285;170;283", "wc_questions": "4;153;62;1", "wc_limitations": "55;173;2;1", "wc_review": "718;855;359;454", "wc_reply_reviewers": "642;60;104;13", "wc_reply_authors": "1595;326;407;0", "reply_reviewers": "2;1;1;1", "reply_authors": "4;2;2;1", "rating_avg": [ 4.25, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 103.0, 13.74772708486752 ], "wc_strengths_avg": [ 94.75, 47.235447494440024 ], "wc_weaknesses_avg": [ 286.0, 83.46556176052492 ], "wc_questions_avg": [ 55.0, 61.58327695080865 ], "wc_limitations_avg": [ 57.75, 70.03347413915719 ], "wc_review_avg": [ 596.5, 198.93277759082338 ], "wc_reply_reviewers_avg": [ 204.75, 254.489071474592 ], "wc_reply_authors_avg": [ 582.0, 604.3703334876722 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:lqC-U4ylQD8J:scholar.google.com/&scioq=Collision+Cross-entropy+for+Soft+Class+Labels+and+Entropy-based+Clustering&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "University of Waterloo", "aff_unique_dep": "", "aff_unique_url": "https://uwaterloo.ca", "aff_unique_abbr": "UW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Canada" }, { "title": "Retrieval-Augmented Diffusion Models for Time Series Forecasting", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94339", "id": "dRJJt0Ji48", "proceeding": "", "pdf": "https://openreview.net/pdf?id=dRJJt0Ji48", "openreview": "https://openreview.net/forum?id=dRJJt0Ji48", "poster": "", "project": "", "author_site": "Jingwei Liu, Ling Yang, Hongyan Li, Shenda Hong", "tldr": "", "abstract": "While time series diffusion models have received considerable focus from many recent works, the performance of existing models remains highly unstable. Factors limiting time series diffusion models include insufficient time series datasets and the absence of guidance. To address these limitations, we propose a Retrieval-Augmented Time series Diffusion model (RATD). The framework of RATD consists of two parts: an embedding-based retrieval process and a reference-guided diffusion model. In the first part, RATD retrieves the time series that are most relevant to historical time series from the database as references. The references are utilized to guide the denoising process in the second part. Our approach allows leveraging meaningful samples within the database to aid in sampling, thus maximizing the utilization of datasets. Meanwhile, this reference-guided mechanism also compensates for the deficiencies of existing time series diffusion models in terms of guidance. Experiments and visualizations on multiple datasets demonstrate the effectiveness of our approach, particularly in complicated prediction tasks. Our code is available at https://github.com/stanliu96/RATD", "keywords": "Diffusion Models;Times Series Forecasting. Retrieval-Augmented Mechanism", "primary_area": "generative_models", "supplementary_material": "", "author": "Jingwei Liu;Ling Yang;Hongyan Li;Shenda Hong", "authorids": "~Jingwei_Liu4;~Ling_Yang1;~Hongyan_Li2;~Shenda_Hong1", "gender": "M;M;F;", "homepage": ";https://yangling0818.github.io/;;", "dblp": ";01/24-6.html;;", "google_scholar": ";https://scholar.google.com.hk/citations?user=sIKujqAAAAAJ;;", "orcid": ";0000-0003-1905-8053;0000-0001-7174-2851;", "linkedin": "%E7%BB%8F%E7%BA%AC-%E5%88%98-181399263/;;;", "or_profile": "~Jingwei_Liu4;~Ling_Yang1;~Hongyan_Li2;~Shenda_Hong1", "aff": "Peking University;Peking University;Peking University;", "aff_domain": "pku.edu.cn;pku.edu.cn;pku.edu.cn;", "position": "PhD student;PhD student;Full Professor;", "bibtex": "@inproceedings{\nliu2024retrievalaugmented,\ntitle={Retrieval-Augmented Diffusion Models for Time Series Forecasting},\nauthor={Jingwei Liu and Ling Yang and Hongyan Li and Shenda Hong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=dRJJt0Ji48}\n}", "github": "", "reviewers": "uLZU;AGBG;HjSX;zh2V;FgUz", "pdf_size": 1500291, "rating": "4;5;5;6;7", "confidence": "3;3;3;3;4", "soundness": "3;3;3;3;3", "novelty": "2;2;3;2;3", "presentation": "2;2;2;3;3", "wc_summary": "152;66;65;72;83", "wc_strengths": "49;121;42;83;60", "wc_weaknesses": "59;15;79;173;344", "wc_questions": "95;98;78;101;46", "wc_limitations": "18;5;13;29;35", "wc_review": "373;305;277;458;568", "wc_reply_reviewers": "130;13;19;164;256", "wc_reply_authors": "636;125;15;83;920", "reply_reviewers": "1;1;1;2;3", "reply_authors": "4;3;2;2;5", "rating_avg": [ 5.4, 1.0198039027185568 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 87.6, 32.83047364873069 ], "wc_strengths_avg": [ 71.0, 28.600699292150182 ], "wc_weaknesses_avg": [ 134.0, 116.98888836124566 ], "wc_questions_avg": [ 83.6, 20.4215572373901 ], "wc_limitations_avg": [ 20.0, 10.807404868885037 ], "wc_review_avg": [ 396.2, 106.20621450743832 ], "wc_reply_reviewers_avg": [ 116.4, 91.77930049853289 ], "wc_reply_authors_avg": [ 355.8, 357.95720414597054 ], "reply_reviewers_avg": [ 1.6, 0.8 ], "reply_authors_avg": [ 3.2, 1.16619037896906 ], "replies_avg": [ 31, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.7844645405527363, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=217929895425538802&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 4, "email": "pku.edu.cn;pku.edu.cn;pku.edu.cn;", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "IMDL-BenCo: A Comprehensive Benchmark and Codebase for Image Manipulation Detection & Localization", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97594", "id": "dVfNPSzpnv", "proceeding": "", "pdf": "https://openreview.net/pdf?id=dVfNPSzpnv", "openreview": "https://openreview.net/forum?id=dVfNPSzpnv", "poster": "/media/PosterPDFs/NeurIPS%202024/97594.png?t=1731354740.441627", "project": "", "author_site": "Xiaochen Ma, Xuekang Zhu, Lei Su, Bo Du, Zhuohang Jiang, Bingkui Tong, Zeyu Lei, Xinyu Yang, Chi-Man Pun, Jiancheng Lv, Ji-Zhe Zhou", "tldr": "", "abstract": "A comprehensive benchmark is yet to be established in the Image Manipulation Detection \\& Localization (IMDL) field. The absence of such a benchmark leads to insufficient and misleading model evaluations, severely undermining the development of this field. However, the scarcity of open-sourced baseline models and inconsistent training and evaluation protocols make conducting rigorous experiments and faithful comparisons among IMDL models challenging. \nTo address these challenges, we introduce IMDL-BenCo, the first comprehensive IMDL benchmark and modular codebase. IMDL-BenCo: i) decomposes the IMDL framework into standardized, reusable components and revises the model construction pipeline, improving coding efficiency and customization flexibility; ii) fully implements or incorporates training code for state-of-the-art models to establish a comprehensive IMDL benchmark; and iii) conducts deep analysis based on the established benchmark and codebase, offering new insights into IMDL model architecture, dataset characteristics, and evaluation standards.\nSpecifically, IMDL-BenCo includes common processing algorithms, 8 state-of-the-art IMDL models (1 of which are reproduced from scratch), 2 sets of standard training and evaluation protocols, 15 GPU-accelerated evaluation metrics, and 3 kinds of robustness evaluation. This benchmark and codebase represent a significant leap forward in calibrating the current progress in the IMDL field and inspiring future breakthroughs.\nCode is available at: https://github.com/scu-zjz/IMDLBenCo", "keywords": "image manipulation detection and localization;benchmark;codebase", "primary_area": "", "supplementary_material": "/attachment/0dff0534849b1bedbd8fe8f4ce736e199bd7f5c5.zip", "author": "Xiaochen Ma;Xuekang Zhu;Lei Su;Bo Du;Zhuohang Jiang;Bingkui Tong;Zeyu Lei;Xinyu Yang;Chi-Man Pun;Jiancheng Lv;Ji-Zhe Zhou", "authorids": "~Xiaochen_Ma1;~Xuekang_Zhu1;~Lei_Su2;~Bo_Du4;~Zhuohang_Jiang1;~Bingkui_Tong1;~Zeyu_Lei1;~Xinyu_Yang17;~Chi-Man_Pun1;~Jiancheng_Lv2;~Ji-Zhe_Zhou1", "gender": "M;M;;;M;M;;M;;M;M", "homepage": "https://me.xiaochen.world;https://github.com/Inkyl;https://github.com/DSLJDI;https://github.com/dddb11;http://www.zhuohangjiang.com;;https://zeyulei2.github.io/;https://github.com/ndyysheep;https://cmpun.github.io/;https://cs.scu.edu.cn/info/1303/13767.htm;https://knightzjz.github.io", "dblp": "270/7004-1;380/7889;;;;;;;p/ChiManPun;;172/4712", "google_scholar": "https://scholar.google.com.hk/citations?user=hGEIyCEAAAAJ;gZBz65gAAAAJ;;;h3vEhrgAAAAJ;2ocssHcAAAAJ;;;JTkP_EAAAAAJ;https://scholar.google.com/citations?hl=zh-CN;-cNWmJMAAAAJ", "orcid": "0009-0006-3983-5787;;;;;;;;0000-0003-1788-3746;;0000-0002-2447-1806", "linkedin": "https://www.linkedin.cn/incareer/in/ACoAAD3zzDcBn-A7g2T8mVpJUiB3MnOgPaqwDCs;;;;;;;;;;", "or_profile": "~Xiaochen_Ma1;~Xuekang_Zhu1;~Lei_Su2;~Bo_Du4;~Zhuohang_Jiang1;~Bingkui_Tong1;~Zeyu_Lei1;~Xinyu_Yang17;~Chi-Man_Pun1;~Jiancheng_Lv2;~Ji-Zhe_Zhou1", "aff": "Sichuan University;Sichuan University;Sichuan University;Sichuan University;Sichuan University;Sichuan University;Sichuan University;Sichuan University;University of Macau;Sichuan University;Sichuan University", "aff_domain": "scu.edu.cn;scu.edu.cn;scu.edu.cn;scu.edu.cn;scu.edu.cn;scu.edu.cn;scu.edu.cn;scu.edu.cn;um.edu.mo;scu.edu.cn;scu.edu.cn", "position": "Intern;MS student;MS student;Undergrad student;Undergrad student;Undergrad student;Undergrad student;Undergrad student;Full Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nma2024imdlbenco,\ntitle={{IMDL}-BenCo: A Comprehensive Benchmark and Codebase for Image Manipulation Detection \\& Localization},\nauthor={Xiaochen Ma and Xuekang Zhu and Lei Su and Bo Du and Zhuohang Jiang and Bingkui Tong and Zeyu Lei and Xinyu Yang and Chi-Man Pun and Jiancheng Lv and Ji-Zhe Zhou},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=dVfNPSzpnv}\n}", "github": "", "reviewers": "iyN9;21JT;Kx1E;oatD;wKx9", "pdf_size": 30059745, "rating": "6;7;8;9;9", "confidence": "2;5;5;4;5", "wc_summary_and_contributions": "50;43;134;79;177", "wc_strengths": "75;78;60;78;88", "wc_improvement": "122;2;36;3;112", "wc_limitations": "1;271;18;167;4", "wc_correctness": "1;10;27;1;1", "wc_clarity": "1;4;26;1;1", "wc_relation_to_prior_work": "1;13;1;1;1", "wc_documentation": "1;4;11;1;1", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "253;426;314;332;386", "wc_reply_reviewers": "0;37;14;83;116", "wc_reply_authors": "74;150;103;135;122", "reply_reviewers": "0;1;1;1;1", "reply_authors": "3;6;3;5;4", "rating_avg": [ 7.8, 1.16619037896906 ], "confidence_avg": [ 4.2, 1.16619037896906 ], "wc_summary_and_contributions_avg": [ 96.6, 51.41439487147544 ], "wc_strengths_avg": [ 75.8, 9.042123644365851 ], "wc_improvement_avg": [ 55.0, 52.17662311802097 ], "wc_limitations_avg": [ 92.2, 108.78124838408502 ], "wc_correctness_avg": [ 8.0, 10.119288512538814 ], "wc_clarity_avg": [ 6.6, 9.76933979345585 ], "wc_relation_to_prior_work_avg": [ 3.4, 4.800000000000001 ], "wc_documentation_avg": [ 3.6, 3.8781438859330635 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 342.2, 59.660372107455046 ], "wc_reply_reviewers_avg": [ 50.0, 43.382023926967726 ], "wc_reply_authors_avg": [ 116.8, 26.39242315514057 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 4.2, 1.16619037896906 ], "replies_avg": [ 31, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.6176470588235294, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14922724760422259959&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "scu.edu.cn;scu.edu.cn;scu.edu.cn;scu.edu.cn;scu.edu.cn;scu.edu.cn;scu.edu.cn;scu.edu.cn;um.edu.mo;scu.edu.cn;scu.edu.cn", "author_num": 11, "aff_unique_index": "0;0;0;0;0;0;0;0;1;0;0", "aff_unique_norm": "Sichuan University;University of Macau", "aff_unique_dep": ";", "aff_unique_url": "https://www.scu.edu.cn;https://www.um.edu.mo", "aff_unique_abbr": "SCU;UM", "aff_campus_unique_index": "1", "aff_campus_unique": ";Macau SAR", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "ReMAP: Neural Model Reprogramming with Network Inversion and Retrieval-Augmented Mapping for Adaptive Motion Forecasting", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94338", "id": "dVqZ0a7LdP", "proceeding": "", "pdf": "https://openreview.net/pdf?id=dVqZ0a7LdP", "openreview": "https://openreview.net/forum?id=dVqZ0a7LdP", "poster": "", "project": "", "author_site": "Sharmita Dey, Sarath Ravindran Nair", "tldr": "", "abstract": "Mobility impairment caused by limb loss, aging, stroke, and other movement deficiencies is a significant challenge faced by millions of individuals worldwide. Advanced assistive technologies, such as prostheses and orthoses, have the potential to greatly improve the quality of life for such individuals. A critical component in the design of these technologies is the accurate forecasting of reference joint motion for impaired limbs, which is hindered by the scarcity of joint locomotion data available for these patients. To address this, we propose ReMAP, a novel model repurposing strategy that leverages deep learning's reprogramming property, incorporating network inversion principles and retrieval-augmented mapping. Our approach adapts models originally designed for able-bodied individuals to forecast joint motion in limb-impaired patients without altering model parameters. We demonstrate the efficacy of ReMAP through extensive empirical studies on data from below-knee amputated patients, showcasing significant improvements over traditional transfer learning and fine-tuning methods. These findings have significant implications for advancing assistive technology and mobility for patients with amputations, stroke, or aging.", "keywords": "Model reprogramming;Patient mobility;Time series;Forecasting;Retrieval;Network inversion", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Sharmita Dey;Sarath Ravindran Nair", "authorids": "~Sharmita_Dey1;~Sarath_Ravindran_Nair1", "gender": "F;M", "homepage": ";", "dblp": "246/1047;", "google_scholar": "dRn_bawAAAAJ;", "orcid": ";0000-0003-3911-9618", "linkedin": "sharmita-dey-23087b24/;", "or_profile": "~Sharmita_Dey1;~Sarath_Ravindran_Nair1", "aff": "Georg-August Universit\u00e4t G\u00f6ttingen;Georg-August Universit\u00e4t G\u00f6ttingen", "aff_domain": "cs.uni-goettingen.de;uni-goettingen.de", "position": "Postdoc;PhD student", "bibtex": "@inproceedings{\ndey2024remap,\ntitle={Re{MAP}: Neural Model Reprogramming with Network Inversion and Retrieval-Augmented Mapping for Adaptive Motion Forecasting},\nauthor={Sharmita Dey and Sarath Ravindran Nair},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=dVqZ0a7LdP}\n}", "github": "", "reviewers": "rCTE;wD18;Du29;2Em3", "pdf_size": 4753041, "rating": "4;5;5;7", "confidence": "4;4;4;3", "soundness": "2;2;3;3", "novelty": "2;3;3;3", "presentation": "2;2;2;3", "wc_summary": "117;83;229;188", "wc_strengths": "128;67;41;25", "wc_weaknesses": "576;280;157;197", "wc_questions": "2;4;82;64", "wc_limitations": "1;21;39;67", "wc_review": "824;455;548;541", "wc_reply_reviewers": "498;25;0;50", "wc_reply_authors": "750;199;261;192", "reply_reviewers": "3;1;0;1", "reply_authors": "5;2;2;2", "rating_avg": [ 5.25, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 154.25, 57.42549520900973 ], "wc_strengths_avg": [ 65.25, 39.20698279643564 ], "wc_weaknesses_avg": [ 302.5, 164.01905377120062 ], "wc_questions_avg": [ 38.0, 35.58089374931439 ], "wc_limitations_avg": [ 32.0, 24.269322199023193 ], "wc_review_avg": [ 592.0, 138.8614417323974 ], "wc_reply_reviewers_avg": [ 143.25, 205.57647603750775 ], "wc_reply_authors_avg": [ 350.5, 232.2094959298607 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.9271726499455306, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:fZMTP5Z4yRIJ:scholar.google.com/&scioq=ReMAP:+Neural+Model+Reprogramming+with+Network+Inversion+and+Retrieval-Augmented+Mapping+for+Adaptive+Motion+Forecasting&hl=en&as_sdt=0,33", "gs_version_total": 0, "email": "cs.uni-goettingen.de;uni-goettingen.de", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Georg-August Universit\u00e4t G\u00f6ttingen", "aff_unique_dep": "", "aff_unique_url": "https://www.uni-goettingen.de", "aff_unique_abbr": "GAU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "title": "Breaking the curse of dimensionality in structured density estimation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94337", "id": "dWwin2uGYE", "proceeding": "", "pdf": "https://openreview.net/pdf?id=dWwin2uGYE", "openreview": "https://openreview.net/forum?id=dWwin2uGYE", "poster": "/media/PosterPDFs/NeurIPS%202024/94337.png?t=1733740506.0061374", "project": "", "author_site": "Robert Vandermeulen, Wai Ming Tai, Bryon Aragam", "tldr": "", "abstract": "We consider the problem of estimating a structured multivariate density, subject to Markov conditions implied by an undirected graph. In the worst case, without Markovian assumptions, this problem suffers from the curse of dimensionality. Our main result shows how the curse of dimensionality can be avoided or greatly alleviated under the Markov property, and applies to arbitrary graphs. While existing results along these lines focus on sparsity or manifold assumptions, we introduce a new graphical quantity called ``graph resilience'' and show that it dictates the optimal sample complexity. Surprisingly, although one might expect the sample complexity of this problem to scale with local graph parameters such as the degree, this turns out not to be the case. Through explicit examples, we compute uniform deviation bounds and illustrate how the curse of dimensionality in density estimation can thus be circumvented. Notable examples where the rate improves substantially include sequential, hierarchical, and spatial data.", "keywords": "nonparametric statistics;density estimation;graphical models;sample complexity;curse of dimensionality", "primary_area": "learning_theory", "supplementary_material": "", "author": "Robert A. Vandermeulen;Wai Ming Tai;Bryon Aragam", "authorids": "~Robert_A._Vandermeulen2;~Wai_Ming_Tai3;~Bryon_Aragam1", "gender": "M;;M", "homepage": ";http://bryonaragam.com/;https://www.user.tu-berlin.de/rvdm/", "dblp": "156/0116;140/7564;137/3375", "google_scholar": ";u-W3_9QAAAAJ;eSjfzOUAAAAJ", "orcid": ";;0000-0001-6863-7006", "linkedin": ";;", "or_profile": "~Wai_Ming_Tai3;~Bryon_Aragam1;~Robert_Vandermeulen1", "aff": "Nanyang Technological University;Booth School of Business;Berlin Institute for the Foundations of Learning and Data", "aff_domain": "ntu.edu.sg;chicagobooth.edu;tu-berlin.de", "position": "Postdoc;Assistant Professor;Researcher", "bibtex": "@inproceedings{\nvandermeulen2024breaking,\ntitle={Breaking the curse of dimensionality in structured density estimation},\nauthor={Robert A. Vandermeulen and Wai Ming Tai and Bryon Aragam},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=dWwin2uGYE}\n}", "github": "", "reviewers": "P9wL;yapz;HdwX", "pdf_size": 479236, "rating": "5;6;6", "confidence": "4;5;3", "soundness": "3;4;2", "novelty": "3;4;3", "presentation": "2;4;4", "wc_summary": "29;64;63", "wc_strengths": "34;30;79", "wc_weaknesses": "152;50;104", "wc_questions": "35;40;139", "wc_limitations": "25;22;12", "wc_review": "275;206;397", "wc_reply_reviewers": "72;93;53", "wc_reply_authors": "427;100;0", "reply_reviewers": "1;2;1", "reply_authors": "2;2;1", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.9428090415820634 ], "wc_summary_avg": [ 52.0, 16.268579122549905 ], "wc_strengths_avg": [ 47.666666666666664, 22.21611027060218 ], "wc_weaknesses_avg": [ 102.0, 41.66533331199932 ], "wc_questions_avg": [ 71.33333333333333, 47.891080125171065 ], "wc_limitations_avg": [ 19.666666666666668, 5.557777333511022 ], "wc_review_avg": [ 292.6666666666667, 78.96975511056256 ], "wc_reply_reviewers_avg": [ 72.66666666666667, 16.33673433979046 ], "wc_reply_authors_avg": [ 175.66666666666666, 182.34826264291328 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=305610582450983648&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "ntu.edu.sg;chicagobooth.edu;tu-berlin.de", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Nanyang Technological University;University of Chicago Booth School of Business;Berlin Institute for the Foundations of Learning and Data", "aff_unique_dep": ";Booth School of Business;", "aff_unique_url": "https://www.ntu.edu.sg;https://www.chicagobooth.edu;https://www.bifold.berlin", "aff_unique_abbr": "NTU;Booth;BIFOLD", "aff_campus_unique_index": "1", "aff_campus_unique": ";Chicago", "aff_country_unique_index": "0;1;2", "aff_country_unique": "Singapore;United States;Germany" }, { "title": "On Weak Regret Analysis for Dueling Bandits", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94336", "id": "dY4YGqvfgW", "proceeding": "", "pdf": "https://openreview.net/pdf?id=dY4YGqvfgW", "openreview": "https://openreview.net/forum?id=dY4YGqvfgW", "poster": "", "project": "", "author_site": "El Mehdi Saad, Alexandra Carpentier, Tom\u00e1\u0161 Koc\u00e1k, Nicolas Verzelen", "tldr": "", "abstract": "We consider the problem of $K$-armed dueling bandits in the stochastic setting, under the sole assumption of the existence of a Condorcet winner. We study the objective of weak regret minimization, where the learner doesn't incur any loss if one of the selected arms is a Condorcet winner\u2014unlike strong regret minimization, where the learner has to select the Condorcet winner twice to incur no loss. This study is particularly motivated by practical scenarios such as content recommendation and online advertising, where frequently only one optimal choice out of the two presented options is necessary to achieve user satisfaction or engagement. This necessitates the development of strategies with more exploration. While existing literature introduces strategies for weak regret with constant bounds (that do not depend on the time horizon), the optimality of these strategies remains an unresolved question. This problem turns out to be really challenging as the optimal regret should heavily depend on the full structure of the dueling problem at hand, and in particular on whether the Condorcet winner has a large minimal optimality gap with the other arms. Our contribution is threefold: first, when said optimality gap is not negligible compared to other properties of the gap matrix, we characterize the optimal budget as a function of $K$ and the optimality gap. Second, we propose a new strategy called \\wrtinf that achieves this optimal regret and improves over the state-of-the-art both in $K$ and the optimality gap. When the optimality gap is negligible, we propose another algorithm that outperforms our first algorithm, highlighting the subtlety of this dueling bandit problem. Finally, we provide numerical simulations to assess our theoretical findings.", "keywords": "Dueling bandits;Weak regret;Preference-based learning;Bandits theory", "primary_area": "bandits", "supplementary_material": "/attachment/fc7f706df2b2213be61dbb949388c841aa7f0b05.zip", "author": "El Mehdi Saad;Alexandra Carpentier;Tom\u00e1\u0161 Koc\u00e1k;Nicolas Verzelen", "authorids": "~El_Mehdi_Saad1;~Alexandra_Carpentier2;~Tom\u00e1\u0161_Koc\u00e1k1;~Nicolas_Verzelen1", "gender": "M;F;M;", "homepage": ";https://sites.google.com/site/alexandracarpentierresearch/?pli=1;;https://verzelen.montpellier.inrae.fr/", "dblp": "279/4097;;;40/1671.html", "google_scholar": "https://scholar.google.com/citations?hl=fr;;https://scholar.google.sk/citations?hl=en;", "orcid": ";;;", "linkedin": "el-mehdi-saad-b29949a9/;;;", "or_profile": "~El_Mehdi_Saad1;~Alexandra_Carpentier2;~Tom\u00e1\u0161_Koc\u00e1k1;~Nicolas_Verzelen1", "aff": "CentraleSupelec;Universit\u00e4t Potsdam;Universit\u00e4t Potsdam;INRAE", "aff_domain": "centralesupelec.fr;uni-potsdam.de;uni-potsdam.de;inrae.fr", "position": "Assistant Professor;Full Professor;Postdoc;Associate Professor", "bibtex": "@inproceedings{\nsaad2024on,\ntitle={On Weak Regret Analysis for Dueling Bandits},\nauthor={El Mehdi Saad and Alexandra Carpentier and Tom{\\'a}{\\v{s}} Koc{\\'a}k and Nicolas Verzelen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=dY4YGqvfgW}\n}", "github": "", "reviewers": "fTgP;xTXf;bFCz", "pdf_size": 12132489, "rating": "7;7;7", "confidence": "4;3;3", "soundness": "3;4;3", "novelty": "3;3;3", "presentation": "2;4;4", "wc_summary": "74;212;58", "wc_strengths": "270;442;49", "wc_weaknesses": "584;224;19", "wc_questions": "275;116;181", "wc_limitations": "17;37;14", "wc_review": "1220;1031;321", "wc_reply_reviewers": "99;30;58", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.3333333333333335, 0.9428090415820634 ], "wc_summary_avg": [ 114.66666666666667, 69.13432979031153 ], "wc_strengths_avg": [ 253.66666666666666, 160.85673404893214 ], "wc_weaknesses_avg": [ 275.6666666666667, 233.53562659450677 ], "wc_questions_avg": [ 190.66666666666666, 65.27037783115877 ], "wc_limitations_avg": [ 22.666666666666668, 10.208928554075703 ], "wc_review_avg": [ 857.3333333333334, 387.0144987235261 ], "wc_reply_reviewers_avg": [ 62.333333333333336, 28.335294049804546 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=85532968321719053&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "centralesupelec.fr;uni-potsdam.de;uni-potsdam.de;inrae.fr", "author_num": 4, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "CentraleSup\u00e9lec;University of Potsdam;INRAE", "aff_unique_dep": ";;", "aff_unique_url": "https://www.centralesupelec.fr;https://www.uni-potsdam.de;https://www.inrae.fr", "aff_unique_abbr": "CS;UP;INRAE", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "France;Germany" }, { "title": "Generalizing CNNs to graphs with learnable neighborhood quantization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94335", "id": "dYIqAZXQNV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=dYIqAZXQNV", "openreview": "https://openreview.net/forum?id=dYIqAZXQNV", "poster": "/media/PosterPDFs/NeurIPS%202024/94335.png?t=1733959197.3381274", "project": "", "author_site": "Isaac Osafo Nkansah, Neil Gallagher, Ruchi Sandilya, Conor Liston, Logan Grosenick", "tldr": "", "abstract": "Convolutional neural networks (CNNs) have led to a revolution in analyzing array data. However, many important sources of data, such as biological and social networks, are naturally structured as graphs rather than arrays, making the design of graph neural network (GNN) architectures that retain the strengths of CNNs an active and exciting area of research. Here, we introduce Quantized Graph Convolution Networks (QGCNs), the first framework for GNNs that formally and directly extends CNNs to graphs. QGCNs do this by decomposing the convolution operation into non-overlapping sub-kernels, allowing them to fit graph data while reducing to a 2D CNN layer on array data. We generalize this approach to graphs of arbitrary size and dimension by approaching sub-kernel assignment as a learnable multinomial assignment problem. Integrating this approach into a residual network architecture, we demonstrate performance that matches or exceeds other state-of-the-art GNNs on benchmark graph datasets and for predicting properties of nonlinear dynamics on a new finite element graph dataset. In summary, QGCNs are a novel GNN framework that generalizes CNNs and their strengths to graph data, allowing for more accurate and expressive models.", "keywords": "graph convolutional networks;graph neural networks;quantization", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Isaac Osafo Nkansah;Neil Gallagher;Ruchi Sandilya;Conor Liston;Logan Grosenick", "authorids": "~Isaac_Osafo_Nkansah1;~Neil_Gallagher1;~Ruchi_Sandilya1;~Conor_Liston1;~Logan_Grosenick1", "gender": "M;;F;M;M", "homepage": ";;https://ruchi-sandilya.github.io/sandilyaruchi/;https://research.cornell.edu/researchers/conor-liston;http://grosenicklab.org", "dblp": ";;;334/7613.html;05/2346", "google_scholar": ";;x1W7R2IAAAAJ;;xw1L-WoAAAAJ", "orcid": ";;0000-0001-7565-2191;;0000-0003-3216-0319", "linkedin": "isaac-osafo-nkansah-387843149/;;ruchi-sandilya/;;logan-grosenick-8235771", "or_profile": "~Isaac_Osafo_Nkansah1;~Neil_Gallagher1;~Ruchi_Sandilya1;~Conor_Liston1;~Logan_Grosenick1", "aff": "Stanford University;;Weill Cornell Medicine, Cornell University;Weill Cornell Medicine, Cornell University;Weill Cornell Medicine, Cornell University", "aff_domain": "stanford.edu;;med.cornell.edu;med.cornell.edu;med.cornell.edu", "position": "Researcher;;Researcher;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nnkansah2024generalizing,\ntitle={Generalizing {CNN}s to graphs with learnable neighborhood quantization},\nauthor={Isaac Osafo Nkansah and Neil Gallagher and Ruchi Sandilya and Conor Liston and Logan Grosenick},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=dYIqAZXQNV}\n}", "github": "", "reviewers": "5Ti4;XG75;rsM6;1wWD", "pdf_size": 2228489, "rating": "5;6;6;7", "confidence": "3;4;1;3", "soundness": "2;3;2;3", "novelty": "2;3;2;3", "presentation": "2;3;3;4", "wc_summary": "78;83;45;66", "wc_strengths": "79;49;51;79", "wc_weaknesses": "103;104;19;66", "wc_questions": "14;204;26;45", "wc_limitations": "9;76;1;7", "wc_review": "283;516;142;263", "wc_reply_reviewers": "115;0;22;24", "wc_reply_authors": "132;0;0;0", "reply_reviewers": "2;0;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 2.75, 1.0897247358851685 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 68.0, 14.64581851587681 ], "wc_strengths_avg": [ 64.5, 14.517231140957975 ], "wc_weaknesses_avg": [ 73.0, 34.734708865916815 ], "wc_questions_avg": [ 72.25, 76.86473508703455 ], "wc_limitations_avg": [ 23.25, 30.597181242722343 ], "wc_review_avg": [ 301.0, 135.34585327966278 ], "wc_reply_reviewers_avg": [ 40.25, 44.17224807500746 ], "wc_reply_authors_avg": [ 33.0, 57.15767664977295 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:uXgt-347p8oJ:scholar.google.com/&scioq=Generalizing+CNNs+to+graphs+with+learnable+neighborhood+quantization&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "stanford.edu;;med.cornell.edu;med.cornell.edu;med.cornell.edu", "author_num": 5, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Stanford University;Cornell University", "aff_unique_dep": ";", "aff_unique_url": "https://www.stanford.edu;https://www.weill.cornell.edu", "aff_unique_abbr": "Stanford;Cornell", "aff_campus_unique_index": "0;1;1;1", "aff_campus_unique": "Stanford;Weill Cornell Medicine", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Active Learning for Derivative-Based Global Sensitivity Analysis with Gaussian Processes", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94334", "id": "da0ZJatRCN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=da0ZJatRCN", "openreview": "https://openreview.net/forum?id=da0ZJatRCN", "poster": "/media/PosterPDFs/NeurIPS%202024/94334.png?t=1733780784.3390694", "project": "", "author_site": "Syrine Belakaria, Ben Letham, Jana Doppa, Barbara Engelhardt, Stefano Ermon, Eytan Bakshy", "tldr": "", "abstract": "We consider the problem of active learning for global sensitivity analysis of expensive black-box functions. Our aim is to efficiently learn the importance of different input variables, e.g., in vehicle safety experimentation, we study the impact of the thickness of various components on safety objectives. Since function evaluations are expensive, we use active learning to prioritize experimental resources where they yield the most value. We propose novel active learning acquisition functions that directly target key quantities of derivative-based global sensitivity measures (DGSMs) under Gaussian process surrogate models.\nWe showcase the first application of active learning directly to DGSMs, and develop tractable uncertainty reduction and information gain acquisition functions for these measures. Through comprehensive evaluation on synthetic and real-world problems, our study demonstrates how these active learning acquisition strategies substantially enhance the sample efficiency of DGSM estimation, particularly with limited evaluation budgets. Our work paves the way for more efficient and accurate sensitivity analysis in various scientific and engineering applications.", "keywords": "Global Sensitivity Analysis;Gaussian Processes;Bayesian Active Learning;Bayesian optimization", "primary_area": "probabilistic_methods", "supplementary_material": "/attachment/00cabc7c0958052fbf2e77be4d8b95a6575cc706.zip", "author": "Syrine Belakaria;Benjamin Letham;Jana Doppa;Barbara E Engelhardt;Stefano Ermon;Eytan Bakshy", "authorids": "~Syrine_Belakaria1;~Benjamin_Letham1;~Jana_Doppa1;~Barbara_Engelhardt1;~Stefano_Ermon1;~Eytan_Bakshy1", "gender": "F;;;F;M;M", "homepage": "https://www.sbelakaria.com/;;;https://beehive.stanford.edu;http://cs.stanford.edu/~ermon/;http://eytan.github.io", "dblp": "200/8277;;;27/2355;47/8135;58/2226", "google_scholar": "9NNlVb8AAAAJ;;;https://scholar.google.com.tw/citations?user=VEGtG7YAAAAJ;;8y9rrq0AAAAJ", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Syrine_Belakaria1;~Benjamin_Letham1;~Jana_Doppa1;~Barbara_Engelhardt1;~Stefano_Ermon1;~Eytan_Bakshy1", "aff": "Stanford University;;;Stanford University;Stanford University;Meta", "aff_domain": "stanford.edu;;;stanford.edu;stanford.edu;meta.com", "position": "Postdoc;;;Full Professor;Associate Professor;Principal Researcher", "bibtex": "@inproceedings{\nbelakaria2024active,\ntitle={Active Learning for Derivative-Based Global Sensitivity Analysis with Gaussian Processes},\nauthor={Syrine Belakaria and Benjamin Letham and Jana Doppa and Barbara E Engelhardt and Stefano Ermon and Eytan Bakshy},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=da0ZJatRCN}\n}", "github": "", "reviewers": "Uip4;nwDp;oJrN;uHUu", "pdf_size": 18446777, "rating": "6;6;6;8", "confidence": "4;3;3;4", "soundness": "3;3;3;4", "novelty": "3;3;2;3", "presentation": "3;4;3;4", "wc_summary": "95;43;106;22", "wc_strengths": "49;37;63;67", "wc_weaknesses": "58;91;81;29", "wc_questions": "66;97;67;118", "wc_limitations": "1;7;10;169", "wc_review": "269;275;327;405", "wc_reply_reviewers": "18;78;0;12", "wc_reply_authors": "83;0;173;0", "reply_reviewers": "1;1;0;1", "reply_authors": "2;1;2;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 66.5, 35.01785258978626 ], "wc_strengths_avg": [ 54.0, 11.874342087037917 ], "wc_weaknesses_avg": [ 64.75, 23.85765076448224 ], "wc_questions_avg": [ 87.0, 21.805962487356524 ], "wc_limitations_avg": [ 46.75, 70.65541380531289 ], "wc_review_avg": [ 319.0, 54.53439281774392 ], "wc_reply_reviewers_avg": [ 27.0, 30.14962686336267 ], "wc_reply_authors_avg": [ 64.0, 71.47377141301556 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16047021548589361460&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "stanford.edu;;;stanford.edu;stanford.edu;meta.com", "author_num": 6, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Stanford University;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://www.stanford.edu;https://meta.com", "aff_unique_abbr": "Stanford;Meta", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "DeltaDock: A Unified Framework for Accurate, Efficient, and Physically Reliable Molecular Docking", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94333", "id": "dao67XTSPd", "proceeding": "", "pdf": "https://openreview.net/pdf?id=dao67XTSPd", "openreview": "https://openreview.net/forum?id=dao67XTSPd", "poster": "", "project": "", "author_site": "Jiaxian Yan, ZAIXI ZHANG, Jintao Zhu, Kai Zhang, Jianfeng Pei, Qi Liu", "tldr": "", "abstract": "Molecular docking, a technique for predicting ligand binding poses, is crucial in structure-based drug design for understanding protein-ligand interactions. Recent advancements in docking methods, particularly those leveraging geometric deep learning (GDL), have demonstrated significant efficiency and accuracy advantages over traditional sampling methods. Despite these advancements, current methods are often tailored for specific docking settings, and limitations such as the neglect of protein side-chain structures, difficulties in handling large binding pockets, and challenges in predicting physically valid structures exist. To accommodate various docking settings and achieve accurate, efficient, and physically reliable docking, we propose a novel two-stage docking framework, DeltaDock, consisting of pocket prediction and site-specific docking. We innovatively reframe the pocket prediction task as a pocket-ligand alignment problem rather than direct prediction in the first stage. Then we follow a bi-level coarse-to-fine iterative refinement process to perform site-specific docking. Comprehensive experiments demonstrate the superior performance of DeltaDock. Notably, in the blind docking setting, DeltaDock achieves a 31\\% relative improvement over the docking success rate compared with the previous state-of-the-art GDL model \nDiffDock. With the consideration of physical validity, this improvement increases to about 300\\%.", "keywords": "Molecular Docking; Binding Pocket Prediction; Contrastive Learning; Iterative Refinement", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Jiaxian Yan;ZAIXI ZHANG;Jintao Zhu;Kai Zhang;Jianfeng Pei;Qi Liu", "authorids": "~Jiaxian_Yan1;~ZAIXI_ZHANG2;~Jintao_Zhu1;~Kai_Zhang12;~Jianfeng_Pei1;~Qi_Liu3", "gender": "M;M;M;M;M;M", "homepage": ";http://home.ustc.edu.cn/~zaixi/;;http://home.ustc.edu.cn/~sa517494/;http://scholar.google.com/citations?user=gb--OvYAAAAJ&hl=zh-TW&oi=ao;http://staff.ustc.edu.cn/~qiliuql/", "dblp": "328/2382;267/9295.html;;55/957-38;;95/2446-3", "google_scholar": "QY5-q1gAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=zh-CN;t6IIpAUAAAAJ;;5EoHAFwAAAAJ", "orcid": ";;0000-0002-7136-3314;0000-0001-5335-2470;;0000-0001-6956-5550", "linkedin": ";;;;;", "or_profile": "~Jiaxian_Yan1;~ZAIXI_ZHANG2;~Jintao_Zhu1;~Kai_Zhang12;~Jianfeng_Pei1;~Qi_Liu3", "aff": "University of Science and Technology of China;University of Science and Technology of China;Peking University;University of Science and Technology of China;;University of Science and Technology of China", "aff_domain": "ustc.edu.cn;ustc.edu.cn;pku.edu.cn;ustc.edu.cn;;ustc.edu.cn", "position": "PhD student;PhD student;PhD student;Researcher;;Full Professor", "bibtex": "@inproceedings{\nyan2024deltadock,\ntitle={DeltaDock: A Unified Framework for Accurate, Efficient, and Physically Reliable Molecular Docking},\nauthor={Jiaxian Yan and ZAIXI ZHANG and Jintao Zhu and Kai Zhang and Jianfeng Pei and Qi Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=dao67XTSPd}\n}", "github": "", "reviewers": "qFb1;AHbE;KF4e", "pdf_size": 7891913, "rating": "3;6;7", "confidence": "4;4;4", "soundness": "2;3;3", "novelty": "2;4;3", "presentation": "3;3;3", "wc_summary": "75;143;77", "wc_strengths": "24;199;98", "wc_weaknesses": "103;270;176", "wc_questions": "9;56;5", "wc_limitations": "1;5;5", "wc_review": "212;673;361", "wc_reply_reviewers": "55;20;37", "wc_reply_authors": "560;543;182", "reply_reviewers": "1;1;1", "reply_authors": "3;3;2", "rating_avg": [ 5.333333333333333, 1.699673171197595 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 98.33333333333333, 31.594654962860762 ], "wc_strengths_avg": [ 107.0, 71.72633175247893 ], "wc_weaknesses_avg": [ 183.0, 68.35690650304961 ], "wc_questions_avg": [ 23.333333333333332, 23.156472577277878 ], "wc_limitations_avg": [ 3.6666666666666665, 1.8856180831641267 ], "wc_review_avg": [ 415.3333333333333, 192.08389370851015 ], "wc_reply_reviewers_avg": [ 37.333333333333336, 14.29063407348401 ], "wc_reply_authors_avg": [ 428.3333333333333, 174.3221793754949 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10549046410709696507&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "ustc.edu.cn;ustc.edu.cn;pku.edu.cn;ustc.edu.cn;;ustc.edu.cn", "author_num": 6, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "University of Science and Technology of China;Peking University", "aff_unique_dep": ";", "aff_unique_url": "http://www.ustc.edu.cn;http://www.pku.edu.cn", "aff_unique_abbr": "USTC;Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "FUSE: Fast Unified Simulation and Estimation for PDEs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94332", "id": "dbnEf790Kv", "proceeding": "", "pdf": "https://openreview.net/pdf?id=dbnEf790Kv", "openreview": "https://openreview.net/forum?id=dbnEf790Kv", "poster": "", "project": "", "author_site": "Levi Lingsch, Dana Grund, Siddhartha Mishra, Georgios Kissas", "tldr": "", "abstract": "The joint prediction of continuous fields and statistical estimation of the underlying discrete parameters is a common problem for many physical systems, governed by PDEs. Hitherto, it has been separately addressed by employing operator learning surrogates for field prediction while using simulation-based inference (and its variants) for statistical parameter determination. Here, we argue that solving both problems within the same framework can lead to consistent gains in accuracy and robustness. To this end, we propose a novel and flexible formulation of the operator learning problem that jointly predicts continuous quantities and infers distributions of discrete parameters, thereby amortizing the cost of both the inverse and the surrogate models to a joint pre-training step. We present the capabilities of the proposed methodology for predicting continuous and discrete biomarkers in full-body haemodynamics simulations under different levels of missing information. We also consider a test case for atmospheric large-eddy simulation of a two-dimensional dry cold bubble, where we infer both continuous time-series and information about the system's conditions. We present comparisons against different baselines to showcase significantly increased accuracy in both the inverse and the surrogate tasks.", "keywords": "Forward and Inverse Problems;PDEs;Neural Operators;Neural Posterior Estimation", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "/attachment/a261789e085ae32d183894c4c2e68740d079b189.zip", "author": "Levi E. Lingsch;Dana Grund;Siddhartha Mishra;Georgios Kissas", "authorids": "~Levi_E._Lingsch1;~Dana_Grund1;~Siddhartha_Mishra1;~Georgios_Kissas1", "gender": "F;M;M;M", "homepage": ";http://www.sam.math.ethz.ch/;;", "dblp": ";07/2856.html;241/6304;304/9412", "google_scholar": ";FmEqyNcAAAAJ;PEwbH74AAAAJ;9pJIGJUAAAAJ", "orcid": "0000-0002-8346-7081;;0009-0004-8879-3398;", "linkedin": ";;;levi-l-1996a3151", "or_profile": "~Dana_Grund1;~Siddhartha_Mishra1;~Georgios_Kissas1;~Levi_Evan_Lingsch1", "aff": "ETHZ - ETH Zurich;Swiss Federal Institute of Technology;ETHZ - ETH Zurich;ETHZ - ETH Zurich", "aff_domain": "ethz.ch;ethz.ch;ethz.ch;ethz.ch", "position": "PhD student;Full Professor;Postdoc;MS student", "bibtex": "@inproceedings{\nlingsch2024fuse,\ntitle={{FUSE}: Fast Unified Simulation and Estimation for {PDE}s},\nauthor={Levi E. Lingsch and Dana Grund and Siddhartha Mishra and Georgios Kissas},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=dbnEf790Kv}\n}", "github": "", "reviewers": "JJ2R;4oLp;s1Yg;X7vi;goJy", "pdf_size": 8580481, "rating": "5;6;6;6;7", "confidence": "4;4;4;3;4", "soundness": "3;2;3;4;3", "novelty": "3;2;3;3;3", "presentation": "3;2;3;3;3", "wc_summary": "83;105;84;175;135", "wc_strengths": "54;53;121;73;90", "wc_weaknesses": "132;246;150;55;97", "wc_questions": "257;107;45;120;111", "wc_limitations": "73;5;1;13;12", "wc_review": "599;516;401;436;445", "wc_reply_reviewers": "66;373;0;38;21", "wc_reply_authors": "20;1650;0;17;14", "reply_reviewers": "1;2;0;1;1", "reply_authors": "2;5;1;2;2", "rating_avg": [ 6.0, 0.6324555320336759 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 116.4, 34.857423886454946 ], "wc_strengths_avg": [ 78.2, 25.372425977820882 ], "wc_weaknesses_avg": [ 136.0, 63.865483635528825 ], "wc_questions_avg": [ 128.0, 69.74811825418662 ], "wc_limitations_avg": [ 20.8, 26.47564918939666 ], "wc_review_avg": [ 479.4, 70.50843921120365 ], "wc_reply_reviewers_avg": [ 99.6, 138.39739881948648 ], "wc_reply_authors_avg": [ 340.2, 654.9358441862836 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.4, 1.3564659966250538 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12242274324934768252&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 5, "email": "ethz.ch;ethz.ch;ethz.ch;ethz.ch", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "ETH Zurich;Swiss Federal Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.ethz.ch;https://www.ethz.ch", "aff_unique_abbr": "ETHZ;ETH Zurich", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Switzerland" }, { "title": "Decision Mamba: A Multi-Grained State Space Model with Self-Evolution Regularization for Offline RL", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94331", "id": "dc4xbVfdzy", "proceeding": "", "pdf": "https://openreview.net/pdf?id=dc4xbVfdzy", "openreview": "https://openreview.net/forum?id=dc4xbVfdzy", "poster": "", "project": "", "author_site": "Qi Lv, Xiang Deng, Gongwei Chen, MICHAEL YU WANG, Liqiang Nie", "tldr": "", "abstract": "While the conditional sequence modeling with the transformer architecture has demonstrated its effectiveness in dealing with offline reinforcement learning (RL) tasks, it is struggle to handle out-of-distribution states and actions.\nExisting work attempts to address this issue by data augmentation with the learned policy or adding extra constraints with the value-based RL algorithm. However, these studies still fail to overcome the following challenges: (1) insufficiently utilizing the historical temporal information among inter-steps, (2) overlooking the local intra-step relationships among return-to-gos (RTGs), states, and actions, (3) overfitting suboptimal trajectories with noisy labels. To address these challenges, we propose $\\textbf{D}$ecision $\\textbf{M}$amba ($\\textbf{DM}$), a novel multi-grained state space model (SSM) with a self-evolving policy learning strategy.\nDM explicitly models the historical hidden state to extract the temporal information by using the mamba architecture. To capture the relationship among RTG-state-action triplets, a fine-grained SSM module is designed and integrated into the original coarse-grained SSM in mamba, resulting in a novel mamba architecture tailored for offline RL. Finally, to mitigate the overfitting issue on noisy trajectories, a self-evolving policy is proposed by using progressive regularization. The policy evolves by using its own past knowledge to refine the suboptimal actions, thus enhancing its robustness on noisy demonstrations. Extensive experiments on various tasks show that DM outperforms other baselines substantially.", "keywords": "Decision Transformer;Mamba;Offline Reinforcement Learning;Autoregressive Sequence Modeling;State Space Model", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Qi Lv;Xiang Deng;Gongwei Chen;Michael Y Wang;Liqiang Nie", "authorids": "~Qi_Lv1;~Xiang_Deng1;~Gongwei_Chen1;~Michael_Y_Wang1;~Liqiang_Nie2", "gender": "M;;M;;M", "homepage": "https://github.com/Aopolin-Lv;;;;https://liqiangnie.github.io/index.html", "dblp": ";;237/9231;;92/8277", "google_scholar": ";;Mpg0w3cAAAAJ;;yywVMhUAAAAJ", "orcid": "0000-0002-8507-7167;;0000-0002-0634-6075;;0000-0003-1476-0273", "linkedin": "qi-lv-%EF%BC%88%E5%90%95%E5%A5%87%EF%BC%89-075614311/;;;;", "or_profile": "~Qi_Lv1;~Xiang_Deng1;~Gongwei_Chen1;~Michael_Y_Wang1;~Liqiang_Nie2", "aff": "Harbin Institute of Technology;;Harbin Institute of Technology;;Shandong University", "aff_domain": "hit.edu.cn;;hit.edu.cn;;sdu.edu.cn", "position": "PhD student;;Postdoc;;Full Professor", "bibtex": "@inproceedings{\nlv2024decision,\ntitle={Decision Mamba: A Multi-Grained State Space Model with Self-Evolution Regularization for Offline {RL}},\nauthor={Qi Lv and Xiang Deng and Gongwei Chen and Michael Y Wang and Liqiang Nie},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=dc4xbVfdzy}\n}", "github": "", "reviewers": "2Wf9;qjYZ;vt4f;K8tX", "pdf_size": 1844022, "rating": "5;6;6;6", "confidence": "4;2;3;4", "soundness": "3;3;3;3", "novelty": "3;3;2;3", "presentation": "3;3;3;2", "wc_summary": "70;124;75;67", "wc_strengths": "63;112;34;34", "wc_weaknesses": "153;52;40;139", "wc_questions": "94;20;42;85", "wc_limitations": "17;7;3;17", "wc_review": "397;315;194;342", "wc_reply_reviewers": "18;0;21;56", "wc_reply_authors": "73;55;92;38", "reply_reviewers": "1;0;1;1", "reply_authors": "2;2;3;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 84.0, 23.27015255644019 ], "wc_strengths_avg": [ 60.75, 31.869852525545205 ], "wc_weaknesses_avg": [ 96.0, 50.42320894191484 ], "wc_questions_avg": [ 60.25, 30.433328769623607 ], "wc_limitations_avg": [ 11.0, 6.164414002968976 ], "wc_review_avg": [ 312.0, 74.25967950375224 ], "wc_reply_reviewers_avg": [ 23.75, 20.27775875189366 ], "wc_reply_authors_avg": [ 64.5, 20.130822139197395 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=938495297424901643&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "hit.edu.cn;;hit.edu.cn;;sdu.edu.cn", "author_num": 5, "aff_unique_index": "0;0;1", "aff_unique_norm": "Harbin Institute of Technology;Shandong University", "aff_unique_dep": ";", "aff_unique_url": "http://www.hit.edu.cn/;http://www.sdu.edu.cn", "aff_unique_abbr": "HIT;SDU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Harbin;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Domain Adaptation for Large-Vocabulary Object Detectors", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94330", "id": "deZpmEfmTo", "proceeding": "", "pdf": "https://openreview.net/pdf?id=deZpmEfmTo", "openreview": "https://openreview.net/forum?id=deZpmEfmTo", "poster": "/media/PosterPDFs/NeurIPS%202024/94330.png?t=1731394532.8957887", "project": "", "author_site": "Kai Jiang, Jiaxing Huang, Weiying Xie, Jie Lei, Yunsong Li, Ling Shao, Shijian Lu", "tldr": "", "abstract": "Large-vocabulary object detectors (LVDs) aim to detect objects of many categories, which learn super objectness features and can locate objects accurately while applied to various downstream data. However, LVDs often struggle in recognizing the located objects due to domain discrepancy in data distribution and object vocabulary. At the other end, recent vision-language foundation models such as CLIP demonstrate superior open-vocabulary recognition capability. \nThis paper presents KGD, a Knowledge Graph Distillation technique that exploits the implicit knowledge graphs (KG) in CLIP for effectively adapting LVDs to various downstream domains.\nKGD consists of two consecutive stages: 1) KG extraction that employs CLIP to encode downstream domain data as nodes and their feature distances as edges, constructing KG that inherits the rich semantic relations in CLIP explicitly; \nand 2) KG encapsulation that transfers the extracted KG into LVDs to enable accurate cross-domain object classification. \nIn addition, KGD can extract both visual and textual KG independently, providing complementary vision and language knowledge for object localization and object classification in detection tasks over various downstream domains. \nExperiments over multiple widely adopted detection benchmarks show that KGD outperforms the state-of-the-art consistently by large margins. \nCodes will be released.", "keywords": "Domain adaptation;Large-vocabulary object detectors;Vision-language models;Knowledge graph distillation", "primary_area": "machine_vision", "supplementary_material": "", "author": "Kai Jiang;Jiaxing Huang;Weiying Xie;Jie Lei;Yunsong Li;Ling Shao;Shijian Lu", "authorids": "~Kai_Jiang2;~Jiaxing_Huang2;~Weiying_Xie1;~Jie_Lei5;~Yunsong_Li1;~Ling_Shao1;~Shijian_Lu1", "gender": "M;M;F;M;M;M;M", "homepage": ";https://jxhuang0508.github.io/;https://web.xidian.edu.cn/wyxie/;;https://web.xidian.edu.cn/ysli/;;https://personal.ntu.edu.sg/shijian.lu/", "dblp": ";62/6016-1.html;/150/3937.html;61/5501-1;;;42/2718", "google_scholar": "QzspFz4AAAAJ;czirNcwAAAAJ;y0ha5lMAAAAJ;GinaT0wAAAAJ;aY_2RzkAAAAJ;z84rLjoAAAAJ;https://scholar.google.com.sg/scholar?hl=en", "orcid": "0000-0001-9921-2043;;;;;;", "linkedin": ";;;;;;", "or_profile": "~Kai_Jiang2;~Jiaxing_Huang2;~Weiying_Xie1;~Jie_Lei5;~Yunsong_Li1;~Ling_Shao1;~Shijian_Lu1", "aff": "Xidian University;Nanyang Technological University;Xidian University;Xi'an University of Electronic Science and Technology;Xidian University ;University of Chinese Academy of Sciences;Nanyang Technological University", "aff_domain": "xidian.edu;ntu.edu.sg;xidian.edu.cn;xidian.edu.cn;xidian.edu.cn;ucas.ac.cn;ntu.edu.sg", "position": "PhD student;Postdoc;Full Professor;Full Professor;Full Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\njiang2024domain,\ntitle={Domain Adaptation for Large-Vocabulary Object Detectors},\nauthor={Kai Jiang and Jiaxing Huang and Weiying Xie and Jie Lei and Yunsong Li and Ling Shao and Shijian Lu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=deZpmEfmTo}\n}", "github": "", "reviewers": "5mbu;1Ck4;Jwgx;TyHq", "pdf_size": 6561196, "rating": "5;5;5;6", "confidence": "4;5;4;4", "soundness": "3;3;2;3", "novelty": "3;3;2;2", "presentation": "2;3;2;2", "wc_summary": "35;57;71;179", "wc_strengths": "45;45;45;59", "wc_weaknesses": "122;124;194;163", "wc_questions": "2;28;4;260", "wc_limitations": "16;1;1;3", "wc_review": "220;255;315;664", "wc_reply_reviewers": "27;12;31;31", "wc_reply_authors": "0;0;552;228", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;2;2", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 85.5, 55.48648484090518 ], "wc_strengths_avg": [ 48.5, 6.06217782649107 ], "wc_weaknesses_avg": [ 150.75, 29.844388082183894 ], "wc_questions_avg": [ 73.5, 108.1607599825371 ], "wc_limitations_avg": [ 5.25, 6.2599920127744575 ], "wc_review_avg": [ 363.5, 176.78871570323713 ], "wc_reply_reviewers_avg": [ 25.25, 7.8222439235810075 ], "wc_reply_authors_avg": [ 195.0, 226.15702509539693 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10838931091427597330&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "xidian.edu;ntu.edu.sg;xidian.edu.cn;xidian.edu.cn;xidian.edu.cn;ucas.ac.cn;ntu.edu.sg", "author_num": 7, "aff_unique_index": "0;1;0;2;0;3;1", "aff_unique_norm": "Xidian University;Nanyang Technological University;Xi'an University of Electronic Science and Technology;University of Chinese Academy of Sciences", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.xidian.edu.cn/;https://www.ntu.edu.sg;http://www.xidian.edu.cn/;http://www.ucas.ac.cn", "aff_unique_abbr": "Xidian;NTU;Xidian University;UCAS", "aff_campus_unique_index": "1", "aff_campus_unique": ";Xi'an", "aff_country_unique_index": "0;1;0;0;0;0;1", "aff_country_unique": "China;Singapore" }, { "title": "LoFiT: Localized Fine-tuning on LLM Representations", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94329", "id": "dfiXFbECSZ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=dfiXFbECSZ", "openreview": "https://openreview.net/forum?id=dfiXFbECSZ", "poster": "/media/PosterPDFs/NeurIPS%202024/94329.png?t=1733779050.6932275", "project": "", "author_site": "Fangcong Yin, Xi Ye, Greg Durrett", "tldr": "", "abstract": "Recent work in interpretability shows that large language models (LLMs) can be adapted for new tasks in a learning-free way: it is possible to intervene on LLM representations to elicit desired behaviors for alignment. For instance, adding certain bias vectors to the outputs of certain attention heads is reported to boost the truthfulness of models. In this work, we show that localized fine-tuning serves as an effective alternative to such representation intervention methods. We introduce a framework called Localized Fine-Tuning on LLM Representations (LoFiT), which identifies a subset of attention heads that are most important for learning a specific task, then trains offset vectors to add to the model's hidden representations at those selected heads. LoFiT localizes to a sparse set of heads (3%-10%) and learns the offset vectors from limited training data, comparable to the settings used for representation intervention. For truthfulness and reasoning tasks, we find that LoFiT's intervention vectors are more effective for LLM adaptation than vectors from representation intervention methods such as Inference-time Intervention. We also find that the localization step is important: selecting a task-specific set of attention heads can lead to higher performance than intervening on heads selected for a different task. Finally, across 7 tasks we study, LoFiT achieves comparable performance to other parameter-efficient fine-tuning methods such as LoRA, despite modifying 20x-200x fewer parameters than these methods.", "keywords": "Interpretability;Large language models", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Fangcong Yin;Xi Ye;Greg Durrett", "authorids": "~Fangcong_Yin1;~Xi_Ye2;~Greg_Durrett1", "gender": "M;;M", "homepage": "https://fangcong-yin-2.github.io/;https://xiye17.github.io/;http://www.cs.utexas.edu/~gdurrett/", "dblp": "362/8553;;69/7968", "google_scholar": "u_-1TRIAAAAJ;qH83GlAAAAAJ;https://scholar.google.com.tw/citations?user=EpQ_sDEAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Fangcong_Yin1;~Xi_Ye2;~Greg_Durrett1", "aff": "University of Texas at Austin;UT Austin;University of Texas at Austin", "aff_domain": "utexas.edu;cs.utexas.edu;utexas.edu", "position": "PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nyin2024lofit,\ntitle={LoFiT: Localized Fine-tuning on {LLM} Representations},\nauthor={Fangcong Yin and Xi Ye and Greg Durrett},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=dfiXFbECSZ}\n}", "github": "", "reviewers": "63K5;Yw79;LqFt", "pdf_size": 1127132, "rating": "5;6;7", "confidence": "4;4;3", "soundness": "3;2;4", "novelty": "2;2;3", "presentation": "3;4;4", "wc_summary": "100;95;63", "wc_strengths": "96;81;114", "wc_weaknesses": "102;171;291", "wc_questions": "134;248;5", "wc_limitations": "7;26;54", "wc_review": "439;621;527", "wc_reply_reviewers": "28;30;72", "wc_reply_authors": "150;0;17", "reply_reviewers": "1;1;1", "reply_authors": "3;1;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 86.0, 16.391054470858997 ], "wc_strengths_avg": [ 97.0, 13.490737563232042 ], "wc_weaknesses_avg": [ 188.0, 78.08969202141856 ], "wc_questions_avg": [ 129.0, 99.26731586982696 ], "wc_limitations_avg": [ 29.0, 19.30457631409368 ], "wc_review_avg": [ 529.0, 74.31464638055319 ], "wc_reply_reviewers_avg": [ 43.333333333333336, 20.28683207293725 ], "wc_reply_authors_avg": [ 55.666666666666664, 67.06381703687582 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5159047088285204861&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "utexas.edu;cs.utexas.edu;utexas.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "QuaRot: Outlier-Free 4-Bit Inference in Rotated LLMs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94328", "id": "dfqsW38v1X", "proceeding": "", "pdf": "https://openreview.net/pdf?id=dfqsW38v1X", "openreview": "https://openreview.net/forum?id=dfqsW38v1X", "poster": "", "project": "", "author_site": "Saleh Ashkboos, Amirkeivan Mohtashami, Maximilian Croci, Bo Li, Pashmina Cameron, Martin Jaggi, Dan Alistarh, Torsten Hoefler, James Hensman", "tldr": "", "abstract": "We introduce QuaRot, a new Quantization scheme based on Rotations, which is able to quantize LLMs end-to-end, including all weights, activations, and KV cache in 4 bits. QuaRot rotates LLMs in a way that removes outliers from the hidden state without changing the output, making quantization easier. This computational invariance is applied to the hidden state (residual) of the LLM, as well as to the activations of the feed-forward components, aspects of the attention mechanism, and to the KV cache. The result is a quantized model where all matrix multiplications are performed in 4 bits, without any channels identified for retention in higher precision. Our 4-bit quantized LLAMA2-70B model has losses of at most 0.47 WikiText-2 perplexity and retains 99% of the zero-shot performance. We also show that QuaRot can provide lossless 6 and 8 bit LLAMA-2 models without any calibration data using round-to-nearest quantization. Code is available at github.com/spcl/QuaRot.", "keywords": "quantization;efficient inference;large language models", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Saleh Ashkboos;Amirkeivan Mohtashami;Maximilian L. Croci;Bo Li;Pashmina Cameron;Martin Jaggi;Dan Alistarh;Torsten Hoefler;James Hensman", "authorids": "~Saleh_Ashkboos1;~Amirkeivan_Mohtashami1;~Maximilian_L._Croci1;~Bo_Li62;~Pashmina_Cameron1;~Martin_Jaggi1;~Dan_Alistarh7;~Torsten_Hoefler1;~James_Hensman1", "gender": "M;M;M;F;M;;;M;", "homepage": "http://sashkboos.github.io;;;https://www.microsoft.com/en-us/research/people/pcameron/;https://mlo.epfl.ch;;;http://people.csail.mit.edu/alistarh/;", "dblp": "195/5539.html;271/7873;;94/8938;17/4402;16/3869;116/2940;36/3251.html;278/0132", "google_scholar": "N3RteqgAAAAJ;YT1udC0AAAAJ;k8wKfJUAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.ch/citations?user=r1TJBr8AAAAJ;;l8dX3ssAAAAJ;https://scholar.google.com.tw/citations?user=75q-6ZQAAAAJ;X8UdvWUAAAAJ", "orcid": ";;0009-0009-8444-7718;0009-0009-0444-1755;0000-0003-1579-5558;;;;", "linkedin": "saleh-ashkboos-806628161/;;bobboli0202/;pashmina-cameron-7424b51/;;;;;", "or_profile": "~Saleh_Ashkboos1;~Amirkeivan_Mohtashami1;~Bo_Li62;~Pashmina_Cameron1;~Martin_Jaggi1;~Torsten_Hoefler1;~James_Hensman1;~Dan_Alistarh1;~Maximilian_Croci1", "aff": "Swiss Federal Institute of Technology;Swiss Federal Institute of Technology Lausanne;ETHZ - ETH Zurich;Microsoft;EPFL;Swiss Federal Institute of Technology;Microsoft Research;Institute of Science and Technology;Microsoft Research", "aff_domain": "ethz.ch;epfl.ch;ethz.ch;microsoft.com;epfl.ch;ethz.ch;microsoft.com;ist.ac.at;microsoft.com", "position": "PhD student;PhD student;MS student;Principal Applied Scientist;Associate Professor;Professor;Principal Researcher;Full Professor;Postdoc", "bibtex": "@inproceedings{\nashkboos2024quarot,\ntitle={QuaRot: Outlier-Free 4-Bit Inference in Rotated {LLM}s},\nauthor={Saleh Ashkboos and Amirkeivan Mohtashami and Maximilian L. Croci and Bo Li and Pashmina Cameron and Martin Jaggi and Dan Alistarh and Torsten Hoefler and James Hensman},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=dfqsW38v1X}\n}", "github": "", "reviewers": "hgDj;QCed;rEuG;tVLH", "pdf_size": 757470, "rating": "5;7;7;8", "confidence": "3;4;4;5", "soundness": "3;4;3;4", "novelty": "2;3;3;4", "presentation": "3;3;3;4", "wc_summary": "85;82;80;197", "wc_strengths": "31;141;61;180", "wc_weaknesses": "179;217;68;54", "wc_questions": "164;210;143;82", "wc_limitations": "1;18;11;6", "wc_review": "460;668;363;519", "wc_reply_reviewers": "400;193;12;65", "wc_reply_authors": "0;511;16;17", "reply_reviewers": "1;2;1;1", "reply_authors": "1;3;2;2", "rating_avg": [ 6.75, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 111.0, 49.684001449158664 ], "wc_strengths_avg": [ 103.25, 59.83466804453752 ], "wc_weaknesses_avg": [ 129.5, 69.98035438607037 ], "wc_questions_avg": [ 149.75, 46.0129057982649 ], "wc_limitations_avg": [ 9.0, 6.284902544988268 ], "wc_review_avg": [ 502.5, 110.59950271135942 ], "wc_reply_reviewers_avg": [ 167.5, 149.49331088714305 ], "wc_reply_authors_avg": [ 136.0, 216.61140320860304 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.9733285267845754, "gs_citation": 138, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8302655462299993207&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "ethz.ch;epfl.ch;ethz.ch;microsoft.com;epfl.ch;ethz.ch;microsoft.com;ist.ac.at;microsoft.com", "author_num": 9, "aff_unique_index": "0;1;2;3;4;0;3;5;3", "aff_unique_norm": "Swiss Federal Institute of Technology;Swiss Federal Institute of Technology Lausanne;ETH Zurich;Microsoft;EPFL;Institute of Science and Technology", "aff_unique_dep": ";;;Microsoft Corporation;;", "aff_unique_url": "https://www.ethz.ch;https://www.epfl.ch;https://www.ethz.ch;https://www.microsoft.com;https://www.epfl.ch;", "aff_unique_abbr": "ETH Zurich;EPFL;ETHZ;Microsoft;EPFL;", "aff_campus_unique_index": "1", "aff_campus_unique": ";Lausanne", "aff_country_unique_index": "0;0;0;1;0;0;1;1", "aff_country_unique": "Switzerland;United States;" }, { "title": "Exploring Consistency in Graph Representations: from Graph Kernels to Graph Neural Networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94327", "id": "dg0hO4M11K", "proceeding": "", "pdf": "https://openreview.net/pdf?id=dg0hO4M11K", "openreview": "https://openreview.net/forum?id=dg0hO4M11K", "poster": "", "project": "", "author_site": "Xuyuan Liu, Yinghao Cai, Qihui Yang, Yujun Yan", "tldr": "", "abstract": "Graph Neural Networks (GNNs) have emerged as a dominant approach in graph representation learning, yet they often struggle to capture consistent similarity relationships among graphs. To capture similarity relationships, while graph kernel methods like the Weisfeiler-Lehman subtree (WL-subtree) and Weisfeiler-Lehman optimal assignment (WLOA) perform effectively, they are heavily reliant on predefined kernels and lack sufficient non-linearities. Our work aims to bridge the gap between neural network methods and kernel approaches by enabling GNNs to consistently capture relational structures in their learned representations. Given the analogy between the message-passing process of GNNs and WL algorithms, we thoroughly compare and analyze the properties of WL-subtree and WLOA kernels. We find that the similarities captured by WLOA at different iterations are asymptotically consistent, ensuring that similar graphs remain similar in subsequent iterations, thereby leading to superior performance over the WL-subtree kernel. Inspired by these findings, we conjecture that the consistency in the similarities of graph representations across GNN layers is crucial in capturing relational structures and enhancing graph classification performance. Thus, we propose a loss to enforce the similarity of graph representations to be consistent across different layers. Our empirical analysis verifies our conjecture and shows that our proposed consistency loss can significantly enhance graph classification performance across several GNN backbones on various datasets.", "keywords": "Graph Neural Networks;Representation Consistency;Graph Classification", "primary_area": "graph_neural_networks", "supplementary_material": "/attachment/06c34efab74781a44f72c168d3d25a13603d8aed.zip", "author": "Xuyuan Liu;Yinghao Cai;Qihui Yang;Yujun Yan", "authorids": "~Xuyuan_Liu1;~Yinghao_Cai4;~Qihui_Yang1;~Yujun_Yan1", "gender": "M;M;;F", "homepage": "https://xuyuan0204.github.io/;https://fuyao233.github.io/yinghaocai/;https://isaacyqh.github.io/;https://sites.google.com/umich.edu/yujunyan/home", "dblp": "331/1043.html;;;219/1736", "google_scholar": ";;_ye8JacAAAAJ;5TQUP58AAAAJ", "orcid": ";;;0000-0003-3776-4293", "linkedin": ";;isaacyqh/;", "or_profile": "~Xuyuan_Liu1;~Yinghao_Cai4;~Qihui_Yang1;~Yujun_Yan1", "aff": "Dartmouth College;Southeast University;The Chinese University of Hong Kong;Dartmouth College", "aff_domain": "dartmouth.edu;seu.edu.cn;cuhk.edu.hk;dartmouth.edu", "position": "PhD student;Undergrad student;Intern;Assistant Professor", "bibtex": "@inproceedings{\nliu2024exploring,\ntitle={Exploring Consistency in Graph Representations: from Graph Kernels to Graph Neural Networks},\nauthor={Xuyuan Liu and Yinghao Cai and Qihui Yang and Yujun Yan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=dg0hO4M11K}\n}", "github": "", "reviewers": "2j7s;q7iD;NH7G", "pdf_size": 3193052, "rating": "5;6;7", "confidence": "3;3;4", "soundness": "2;3;3", "novelty": "2;3;3", "presentation": "3;2;3", "wc_summary": "58;67;27", "wc_strengths": "30;20;55", "wc_weaknesses": "98;40;44", "wc_questions": "69;3;39", "wc_limitations": "1;1;1", "wc_review": "256;131;166", "wc_reply_reviewers": "72;17;0", "wc_reply_authors": "444;45;0", "reply_reviewers": "3;1;0", "reply_authors": "5;2;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 50.666666666666664, 17.13346303452853 ], "wc_strengths_avg": [ 35.0, 14.719601443879744 ], "wc_weaknesses_avg": [ 60.666666666666664, 26.449112566503164 ], "wc_questions_avg": [ 37.0, 26.981475126464083 ], "wc_limitations_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 184.33333333333334, 52.65189666310438 ], "wc_reply_reviewers_avg": [ 29.666666666666668, 30.728199137310703 ], "wc_reply_authors_avg": [ 163.0, 199.54448125668623 ], "reply_reviewers_avg": [ 1.3333333333333333, 1.247219128924647 ], "reply_authors_avg": [ 2.6666666666666665, 1.699673171197595 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Qg8kh2ok5B8J:scholar.google.com/&scioq=Exploring+Consistency+in+Graph+Representations:+from+Graph+Kernels+to+Graph+Neural+Networks&hl=en&as_sdt=0,34", "gs_version_total": 3, "email": "dartmouth.edu;seu.edu.cn;cuhk.edu.hk;dartmouth.edu", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Dartmouth College;Southeast University;Chinese University of Hong Kong", "aff_unique_dep": ";;", "aff_unique_url": "https://www.dartmouth.edu;https://www.seu.edu.cn/;https://www.cuhk.edu.hk", "aff_unique_abbr": "Dartmouth;SEU;CUHK", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "United States;China" }, { "title": "Molecule Design by Latent Prompt Transformer", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94326", "id": "dg3tI3c2B1", "proceeding": "", "pdf": "https://openreview.net/pdf?id=dg3tI3c2B1", "openreview": "https://openreview.net/forum?id=dg3tI3c2B1", "poster": "", "project": "", "author_site": "Deqian Kong, Yuhao Huang, Jianwen Xie, Edouardo Honig, Ming Xu, Shuanghong Xue, Pei Lin, Sanping Zhou, Sheng Zhong, Nanning Zheng, Ying Nian Wu", "tldr": "", "abstract": "This work explores the challenging problem of molecule design by framing it as a conditional generative modeling task, where target biological properties or desired chemical constraints serve as conditioning variables.\nWe propose the Latent Prompt Transformer (LPT), a novel generative model comprising three components: (1) a latent vector with a learnable prior distribution modeled by a neural transformation of Gaussian white noise; (2) a molecule generation model based on a causal Transformer, which uses the latent vector as a prompt; and (3) a property prediction model that predicts a molecule's target properties and/or constraint values using the latent prompt. LPT can be learned by maximum likelihood estimation on molecule-property pairs. During property optimization, the latent prompt is inferred from target properties and constraints through posterior sampling and then used to guide the autoregressive molecule generation.\nAfter initial training on existing molecules and their properties, we adopt an online learning algorithm to progressively shift the model distribution towards regions that support desired target properties. Experiments demonstrate that LPT not only effectively discovers useful molecules across single-objective, multi-objective, and structure-constrained optimization tasks, but also exhibits strong sample efficiency.", "keywords": "latent space generative modeling; Latent Prompt Transformer; molecule design", "primary_area": "generative_models", "supplementary_material": "", "author": "Deqian Kong;Yuhao Huang;Jianwen Xie;Edouardo Honig;Ming Xu;Shuanghong Xue;Pei Lin;Sanping Zhou;Sheng Zhong;Nanning Zheng;Ying Nian Wu", "authorids": "~Deqian_Kong1;~Yuhao_Huang3;~Jianwen_Xie1;~Edouardo_Honig1;~Ming_Xu11;~Shuanghong_Xue1;~Pei_Lin2;~Sanping_Zhou1;~Sheng_Zhong12;~Nanning_Zheng1;~Ying_Nian_Wu1", "gender": "M;M;;M;M;F;;M;;M;", "homepage": "https://sites.google.com/view/deqiankong/home;;;http://www.stat.ucla.edu/~edouardohonig/;https://systemsbio.ucsd.edu/team.html;;;http://gr.xjtu.edu.cn/web/spzhou;https://systemsbio.ucsd.edu/;;", "dblp": "199/7131;219/6363;;360/7575;;;;179/0508;;07/256-1;", "google_scholar": "https://scholar.google.com/citations?hl=en;cbKekagAAAAJ;;7_0UVa0AAAAJ;LJzhoiEAAAAJ;;7oP6UXEAAAAJ;2Drvv44AAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=zh-CN;", "orcid": ";;;;0000-0002-6998-8462;;0000-0002-1218-8800;;;;", "linkedin": ";;;;;shuanghong-xue-353436299/;;;;;", "or_profile": "~Deqian_Kong1;~Yuhao_Huang3;~Jianwen_Xie1;~Edouardo_Honig1;~Ming_Xu11;~Shuanghong_Xue1;~Pei_Lin2;~Sanping_Zhou1;~Sheng_Zhong12;~Nanning_Zheng1;~Ying_Nian_Wu1", "aff": "University of California, Los Angeles;Xi'an Jiaotong University;;University of California, Los Angeles;University of California, San Diego;University of California, San Diego;University of California, San Diego;Xi'an Jiaotong University;University of California, San Diego;Xi'an Jiaotong University;", "aff_domain": "ucla.edu;xjtu.edu.cn;;ucla.edu;ucsd.edu;ucsd.edu;ucsd.edu;xjtu.edu;ucsd.edu;xjtu.edu.cn;", "position": "PhD student;PhD student;;PhD student;Postdoc;Postdoc;Postdoc;Associate Professor;Full Professor;Full Professor;", "bibtex": "@inproceedings{\nkong2024molecule,\ntitle={Molecule Design by Latent Prompt Transformer},\nauthor={Deqian Kong and Yuhao Huang and Jianwen Xie and Edouardo Honig and Ming Xu and Shuanghong Xue and Pei Lin and Sanping Zhou and Sheng Zhong and Nanning Zheng and Ying Nian Wu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=dg3tI3c2B1}\n}", "github": "", "reviewers": "rWJi;D3n3;vrpK;jGf3", "pdf_size": 3199424, "rating": "4;4;7;7", "confidence": "4;4;3;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "3;2;3;3", "wc_summary": "46;59;54;271", "wc_strengths": "48;57;35;201", "wc_weaknesses": "111;134;67;180", "wc_questions": "146;119;3;8", "wc_limitations": "5;1;1;1", "wc_review": "356;370;160;661", "wc_reply_reviewers": "136;0;18;30", "wc_reply_authors": "508;102;21;105", "reply_reviewers": "1;0;1;1", "reply_authors": "4;3;2;3", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 107.5, 94.51058141816713 ], "wc_strengths_avg": [ 85.25, 67.28437783022149 ], "wc_weaknesses_avg": [ 123.0, 40.773766075750224 ], "wc_questions_avg": [ 69.0, 64.23783931609157 ], "wc_limitations_avg": [ 2.0, 1.7320508075688772 ], "wc_review_avg": [ 386.75, 178.78391286690197 ], "wc_reply_reviewers_avg": [ 46.0, 53.04714883949938 ], "wc_reply_authors_avg": [ 184.0, 190.0723546442249 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.0, 0.7071067811865476 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": -0.5773502691896258, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7033786548674924857&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "ucla.edu;xjtu.edu.cn;;ucla.edu;ucsd.edu;ucsd.edu;ucsd.edu;xjtu.edu;ucsd.edu;xjtu.edu.cn;", "author_num": 11, "aff_unique_index": "0;1;0;2;2;2;1;2;1", "aff_unique_norm": "University of California, Los Angeles;Xi'an Jiao Tong University;University of California, San Diego", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ucla.edu;https://www.xjtu.edu.cn;https://www.ucsd.edu", "aff_unique_abbr": "UCLA;XJTU;UCSD", "aff_campus_unique_index": "0;0;2;2;2;2", "aff_campus_unique": "Los Angeles;;San Diego", "aff_country_unique_index": "0;1;0;0;0;0;1;0;1", "aff_country_unique": "United States;China" }, { "title": "Implicitly Guided Design with PropEn: Match your Data to Follow the Gradient", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94325", "id": "dhFHO90INk", "proceeding": "", "pdf": "https://openreview.net/pdf?id=dhFHO90INk", "openreview": "https://openreview.net/forum?id=dhFHO90INk", "poster": "", "project": "", "author_site": "Nata\u0161a Tagasovska, Vladimir Gligorijevic, Kyunghyun Cho, Andreas Loukas", "tldr": "", "abstract": "Across scientific domains, generating new models or optimizing existing ones while meeting specific criteria is crucial. Traditional machine learning frameworks for guided design use a generative model and a surrogate model (discriminator), requiring large datasets. However, real-world scientific applications often have limited data and complex landscapes, making data-hungry models inefficient or impractical. We propose a new framework, PropEn, inspired by ``matching'', which enables implicit guidance without training a discriminator. By matching each sample with a similar one that has a better property value, we create a larger training dataset that inherently indicates the direction of improvement. Matching, combined with an encoder-decoder architecture, forms a domain-agnostic generative framework for property enhancement. We show that training with a matched dataset approximates the gradient of the property of interest while remaining within the data distribution, allowing efficient design optimization. Extensive evaluations in toy problems and scientific applications, such as therapeutic protein design and airfoil optimization, demonstrate PropEn's advantages over common baselines. Notably, the protein design results are validated with wet lab experiments, confirming the competitiveness and effectiveness of our approach. Our code is available at https://github.com/prescient-design/propen.", "keywords": "matching;property enhancement;gradient approximation;design optimization;shape optimization;antibodies", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Natasa Tagasovska;Vladimir Gligorijevic;Kyunghyun Cho;Andreas Loukas", "authorids": "~Natasa_Tagasovska2;~Vladimir_Gligorijevic2;~Kyunghyun_Cho1;~Andreas_Loukas1", "gender": "F;M;M;M", "homepage": "https://datascience.ch/team_member/natasa-tagasovska-computer-scientist/;https://www.gene.com/scientists/our-scientists/vladimir-gligorijevic;http://kyunghyuncho.me;", "dblp": ";116/2862;41/9736;19/10012", "google_scholar": "S2ZUSL0AAAAJ;rnuxS_YAAAAJ;https://scholar.google.fi/citations?user=0RAmmIAAAAAJ;https://scholar.google.ch/citations?user=-XGXJbQAAAAJ", "orcid": ";;;", "linkedin": "natasha-tagasovska/;;;", "or_profile": "~Natasa_Tagasovska2;~Vladimir_Gligorijevic2;~Kyunghyun_Cho1;~Andreas_Loukas1", "aff": "Prescient Design - Genentech, Roche;Genentech;Genentech;Roche / Genentech", "aff_domain": "roche.com;gene.com;gene.com;roche.com", "position": "Senior Machine Learning Scientis;Researcher;Senior Director of Frontier Research;Principal Researcher", "bibtex": "@inproceedings{\ntagasovska2024implicitly,\ntitle={Implicitly Guided Design with PropEn: Match your Data to Follow the Gradient},\nauthor={Natasa Tagasovska and Vladimir Gligorijevic and Kyunghyun Cho and Andreas Loukas},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=dhFHO90INk}\n}", "github": "", "reviewers": "zYK4;KGXt;gEZM;WPJM", "pdf_size": 7847990, "rating": "5;6;6;6", "confidence": "3;4;4;2", "soundness": "2;3;3;2", "novelty": "2;3;2;3", "presentation": "3;3;3;3", "wc_summary": "86;102;82;196", "wc_strengths": "46;121;84;50", "wc_weaknesses": "123;104;236;200", "wc_questions": "4;88;3;463", "wc_limitations": "7;52;4;19", "wc_review": "266;467;409;928", "wc_reply_reviewers": "22;0;112;632", "wc_reply_authors": "42;62;148;760", "reply_reviewers": "1;0;1;2", "reply_authors": "2;2;3;4", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 116.5, 46.50537603331468 ], "wc_strengths_avg": [ 75.25, 30.26032881513352 ], "wc_weaknesses_avg": [ 165.75, 54.19582548499469 ], "wc_questions_avg": [ 139.5, 189.93222475398954 ], "wc_limitations_avg": [ 20.5, 19.03286631067428 ], "wc_review_avg": [ 517.5, 248.03477578758992 ], "wc_reply_reviewers_avg": [ 191.5, 257.7610327415686 ], "wc_reply_authors_avg": [ 253.0, 295.4132698441287 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7011638433961933948&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "roche.com;gene.com;gene.com;roche.com", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Genentech;Roche", "aff_unique_dep": "Prescient Design;", "aff_unique_url": "https://www.gene.com;https://www.roche.com", "aff_unique_abbr": "Genentech;Roche", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "United States;Switzerland" }, { "title": "Unified Gradient-Based Machine Unlearning with Remain Geometry Enhancement", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94324", "id": "dheDf5EpBT", "proceeding": "", "pdf": "https://openreview.net/pdf?id=dheDf5EpBT", "openreview": "https://openreview.net/forum?id=dheDf5EpBT", "poster": "/media/PosterPDFs/NeurIPS%202024/94324.png?t=1733198028.0248435", "project": "", "author_site": "Zhehao Huang, Xinwen Cheng, JingHao Zheng, Haoran Wang, Zhengbao He, Tao Li, Xiaolin Huang", "tldr": "", "abstract": "Machine unlearning (MU) has emerged to enhance the privacy and trustworthiness of deep neural networks. Approximate MU is a practical method for large-scale models. Our investigation into approximate MU starts with identifying the steepest descent direction, minimizing the output Kullback-Leibler divergence to exact MU inside a parameters' neighborhood. This probed direction decomposes into three components: weighted forgetting gradient ascent, fine-tuning retaining gradient descent, and a weight saliency matrix. Such decomposition derived from Euclidean metric encompasses most existing gradient-based MU methods. Nevertheless, adhering to Euclidean space may result in sub-optimal iterative trajectories due to the overlooked geometric structure of the output probability space. We suggest embedding the unlearning update into a manifold rendered by the remaining geometry, incorporating second-order Hessian from the remaining data. It helps prevent effective unlearning from interfering with the retained performance. However, computing the second-order Hessian for large-scale models is intractable. To efficiently leverage the benefits of Hessian modulation, we propose a fast-slow parameter update strategy to implicitly approximate the up-to-date salient unlearning direction.\nFree from specific modal constraints, our approach is adaptable across computer vision unlearning tasks, including classification and generation. Extensive experiments validate our efficacy and efficiency. Notably, our method successfully performs class-forgetting on ImageNet using DiT and forgets a class on CIFAR-10 using DDPM in just 50 steps, compared to thousands of steps required by previous methods. Code is available at [Unified-Unlearning-w-Remain-Geometry](https://github.com/K1nght/Unified-Unlearning-w-Remain-Geometry).", "keywords": "Machine unlearning;steepest descent;diffusion model", "primary_area": "privacy", "supplementary_material": "/attachment/707f7b6da3ffea456e5821727b88712cbfd6692d.zip", "author": "Zhehao Huang;Xinwen Cheng;JingHao Zheng;Haoran Wang;Zhengbao He;Tao Li;Xiaolin Huang", "authorids": "~Zhehao_Huang1;~Xinwen_Cheng1;~JingHao_Zheng1;~Haoran_Wang27;~Zhengbao_He1;~Tao_Li12;~Xiaolin_Huang1", "gender": "M;F;M;M;;M;M", "homepage": "https://github.com/K1nght;;https://jinghaozheng.github.io/;https://github.com/haoran-whynot;;https://nblt.github.io/;http://www.pami.sjtu.edu.cn/en/xiaolin", "dblp": "258/1555;;;;255/5984;;61/2227", "google_scholar": ";;;;jrbdRK0AAAAJ;https://scholar.google.com/citations?hl=zh-CN;DR-gBcEAAAAJ", "orcid": ";0000-0001-6080-0614;;;;;", "linkedin": ";;;;;;", "or_profile": "~Zhehao_Huang1;~Xinwen_Cheng1;~JingHao_Zheng1;~Haoran_Wang27;~Zhengbao_He1;~Tao_Li12;~Xiaolin_Huang1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu;sjtu.edu.cn", "position": "PhD student;PhD student;Undergrad student;Undergrad student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nhuang2024unified,\ntitle={Unified Gradient-Based Machine Unlearning with Remain Geometry Enhancement},\nauthor={Zhehao Huang and Xinwen Cheng and JingHao Zheng and Haoran Wang and Zhengbao He and Tao Li and Xiaolin Huang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=dheDf5EpBT}\n}", "github": "", "reviewers": "fu6N;S1BA;HvBU", "pdf_size": 17201575, "rating": "5;7;7", "confidence": "3;2;2", "soundness": "3;4;3", "novelty": "2;3;3", "presentation": "3;3;3", "wc_summary": "62;84;56", "wc_strengths": "40;64;57", "wc_weaknesses": "152;132;27", "wc_questions": "98;82;31", "wc_limitations": "6;13;8", "wc_review": "358;375;179", "wc_reply_reviewers": "143;37;48", "wc_reply_authors": "525;123;34", "reply_reviewers": "1;1;1", "reply_authors": "4;3;2", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 2.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 67.33333333333333, 12.036980056845191 ], "wc_strengths_avg": [ 53.666666666666664, 10.077477638553983 ], "wc_weaknesses_avg": [ 103.66666666666667, 54.822947344661756 ], "wc_questions_avg": [ 70.33333333333333, 28.56960311628816 ], "wc_limitations_avg": [ 9.0, 2.943920288775949 ], "wc_review_avg": [ 304.0, 88.66040078110784 ], "wc_reply_reviewers_avg": [ 76.0, 47.588514020367 ], "wc_reply_authors_avg": [ 227.33333333333334, 213.59515183844618 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.0, 0.816496580927726 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.9999999999999998, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3316870391627077611&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 5, "email": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu;sjtu.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Geometric Exploitation for Indoor Panoramic Semantic Segmentation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94323", "id": "diYnEYUbIU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=diYnEYUbIU", "openreview": "https://openreview.net/forum?id=diYnEYUbIU", "poster": "/media/PosterPDFs/NeurIPS%202024/94323.png?t=1731487717.3677008", "project": "", "author_site": "Duc Cao Dinh, Seok Joon Kim, Kyusung Cho", "tldr": "", "abstract": "PAnoramic Semantic Segmentation (PASS) is an important task in computer vision,\nas it enables semantic understanding of a 360\u00b0 environment. Currently,\nmost of existing works have focused on addressing the distortion issues in 2D\npanoramic images without considering spatial properties of indoor scene. This\nrestricts PASS methods in perceiving contextual attributes to deal with the ambiguity\nwhen working with monocular images. In this paper, we propose a novel\napproach for indoor panoramic semantic segmentation. Unlike previous works,\nwe consider the panoramic image as a composition of segment groups: oversampled\nsegments, representing planar structures such as floors and ceilings, and\nunder-sampled segments, representing other scene elements. To optimize each\ngroup, we first enhance over-sampled segments by jointly optimizing with a dense\ndepth estimation task. Then, we introduce a transformer-based context module\nthat aggregates different geometric representations of the scene, combined\nwith a simple high-resolution branch, it serves as a robust hybrid decoder for\nestimating under-sampled segments, effectively preserving the resolution of predicted\nmasks while leveraging various indoor geometric properties. Experimental\nresults on both real-world (Stanford2D3DS, Matterport3D) and synthetic (Structured3D)\ndatasets demonstrate the robustness of our framework, by setting new\nstate-of-the-arts in almost evaluations, The code and updated results are available\nat: https://github.com/caodinhduc/vertical_relative_distance.", "keywords": "indoor panoramic semantic segmentation;vertical relative distance", "primary_area": "machine_vision", "supplementary_material": "", "author": "Duc Cao Dinh;Seok Joon Kim;Kyusung Cho", "authorids": "~Duc_Cao_Dinh1;~Seok_Joon_Kim2;~Kyusung_Cho1", "gender": "M;M;M", "homepage": "https://caodinhduc.github.io/;;", "dblp": "357/5054;296/8953;06/720", "google_scholar": "4IAC5BMAAAAJ;;", "orcid": ";0000-0001-7185-5717;", "linkedin": ";;", "or_profile": "~Duc_Cao_Dinh1;~Seok_Joon_Kim2;~Kyusung_Cho1", "aff": "MAXST;Maxst co Ltd.;MAXST", "aff_domain": "maxst.com;maxst.com;maxst.com", "position": "Researcher;Researcher;Principal Researcher", "bibtex": "@inproceedings{\ndinh2024geometric,\ntitle={Geometric Exploitation for Indoor Panoramic Semantic Segmentation},\nauthor={Duc Cao Dinh and Seok Joon Kim and Kyusung Cho},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=diYnEYUbIU}\n}", "github": "", "reviewers": "DACe;GGrZ;QfCk;irP4", "pdf_size": 11857481, "rating": "5;5;5;6", "confidence": "4;5;4;2", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "159;49;78;122", "wc_strengths": "62;23;55;62", "wc_weaknesses": "280;143;169;76", "wc_questions": "78;94;4;79", "wc_limitations": "40;7;9;60", "wc_review": "619;316;315;399", "wc_reply_reviewers": "108;92;39;0", "wc_reply_authors": "329;71;38;0", "reply_reviewers": "1;1;1;0", "reply_authors": "3;2;2;1", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 102.0, 41.934472692523514 ], "wc_strengths_avg": [ 50.5, 16.132265804901678 ], "wc_weaknesses_avg": [ 167.0, 73.53570561298777 ], "wc_questions_avg": [ 63.75, 35.074028853269766 ], "wc_limitations_avg": [ 29.0, 22.169799277395363 ], "wc_review_avg": [ 412.25, 124.13979015609782 ], "wc_reply_reviewers_avg": [ 59.75, 42.920711783473486 ], "wc_reply_authors_avg": [ 109.5, 129.1946206310464 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.9271726499455306, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7787209047725587900&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": "maxst.com;maxst.com;maxst.com", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "MAXST;Maxst Co., Ltd.", "aff_unique_dep": ";", "aff_unique_url": ";http://www.maxst.com", "aff_unique_abbr": ";Maxst", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1", "aff_country_unique": ";South Korea" }, { "title": "FedLLM-Bench: Realistic Benchmarks for Federated Learning of Large Language Models", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97593", "id": "djGx0hucok", "proceeding": "", "pdf": "https://openreview.net/pdf?id=djGx0hucok", "openreview": "https://openreview.net/forum?id=djGx0hucok", "poster": "", "project": "", "author_site": "Rui Ye, Rui Ge, Xinyu Zhu, Jingyi Chai, Du Yaxin, Yang Liu, Yanfeng Wang, Siheng Chen", "tldr": "", "abstract": "Federated learning has enabled multiple parties to collaboratively train large language models without directly sharing their data (FedLLM).\nFollowing this training paradigm, the community has put massive efforts from diverse aspects including framework, performance, and privacy.\nHowever, an unpleasant fact is that there are currently no realistic datasets and benchmarks for FedLLM and previous works all rely on artificially constructed datasets, failing to capture properties in real-world scenarios.\nAddressing this, we propose FedLLM-Bench, which involves 8 training methods, 4 training datasets, and 6 evaluation metrics, to offer a comprehensive testbed for the FedLLM community.\nFedLLM-Bench encompasses three datasets (e.g., user-annotated multilingual dataset) for federated instruction tuning and one dataset (e.g., user-annotated preference dataset) for federated preference alignment, whose scale of client number ranges from 38 to 747.\nOur datasets incorporate several representative diversities: language, quality, quantity, instruction, length, embedding, and preference, capturing properties in real-world scenarios.\nBased on FedLLM-Bench, we conduct experiments on all datasets to benchmark existing FL methods and provide empirical insights (e.g., multilingual collaboration).\nWe believe that our FedLLM-Bench can benefit the FedLLM community by reducing required efforts, providing a practical testbed, and promoting fair comparisons.\nCode and datasets are available at https://github.com/rui-ye/FedLLM-Bench.", "keywords": "federated learning;large language models;instruction tuning;preference alignment;datasets;benchmarks", "primary_area": "", "supplementary_material": "/attachment/03cc9b5084384e46255f0c5ab23300ab23aefe1c.pdf", "author": "Rui Ye;Rui Ge;Xinyu Zhu;Jingyi Chai;Yaxin Du;Yang Liu;Yanfeng Wang;Siheng Chen", "authorids": "~Rui_Ye1;~Rui_Ge1;~Xinyu_Zhu5;~Jingyi_Chai1;~Yaxin_Du1;~Yang_Liu59;~Yanfeng_Wang1;~Siheng_Chen1", "gender": "M;M;M;F;F;F;M;M", "homepage": "http://rui-ye.github.io/;https://github.com/motigrez;https://github.com/a-died-fish;;;;https://cmic.sjtu.edu.cn/wangyanfeng/;https://siheng-chen.github.io/", "dblp": ";;;340/7940;364/0077.html;;55/5407-1.html;136/4945", "google_scholar": "Q4-VTxcAAAAJ;39xrKBgAAAAJ;;aeYq_ScAAAAJ;i_7WSl0AAAAJ;JEieoFsAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=en", "orcid": ";;;;;;0000-0002-3196-2347;", "linkedin": ";;;;;;;", "or_profile": "~Rui_Ye1;~Rui_Ge1;~Xinyu_Zhu5;~Jingyi_Chai1;~Yaxin_Du1;~Yang_Liu59;~Yanfeng_Wang1;~Siheng_Chen2", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiao tong University;Tsinghua University;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;tsinghua.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "position": "PhD student;Undergrad student;Undergrad student;Undergrad student;PhD student;Associate Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nye2024fedllmbench,\ntitle={Fed{LLM}-Bench: Realistic Benchmarks for Federated Learning of Large Language Models},\nauthor={Rui Ye and Rui Ge and Xinyu Zhu and Jingyi Chai and Yaxin Du and Yang Liu and Yanfeng Wang and Siheng Chen},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=djGx0hucok}\n}", "github": "", "reviewers": "4gik;ziZv;Mo2v;GdVS", "pdf_size": 9174663, "rating": "6;7;8;8", "confidence": "5;5;4;4", "wc_summary_and_contributions": "160;66;48;68", "wc_strengths": "4;75;4;78", "wc_improvement": "116;48;41;45", "wc_limitations": "4;8;8;5", "wc_correctness": "14;1;5;9", "wc_clarity": "12;1;7;6", "wc_relation_to_prior_work": "682;1;7;1", "wc_documentation": "116;1;7;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "1109;202;128;214", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "100;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "4;1;1;1", "rating_avg": [ 7.25, 0.82915619758885 ], "confidence_avg": [ 4.5, 0.5 ], "wc_summary_and_contributions_avg": [ 85.5, 43.71212646394591 ], "wc_strengths_avg": [ 40.25, 36.265513921630834 ], "wc_improvement_avg": [ 62.5, 30.987900864692335 ], "wc_limitations_avg": [ 6.25, 1.7853571071357126 ], "wc_correctness_avg": [ 7.25, 4.815340071064556 ], "wc_clarity_avg": [ 6.5, 3.905124837953327 ], "wc_relation_to_prior_work_avg": [ 172.75, 294.025827947138 ], "wc_documentation_avg": [ 31.25, 48.99170848215032 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 413.25, 403.03931259865954 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 25.0, 43.30127018922193 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.9045340337332909, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15237021017749778037&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;tsinghua.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "author_num": 8, "aff_unique_index": "0;0;0;0;0;1;0;0", "aff_unique_norm": "Shanghai Jiao Tong University;Tsinghua University", "aff_unique_dep": ";", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.tsinghua.edu.cn", "aff_unique_abbr": "SJTU;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Hardness of Learning Neural Networks under the Manifold Hypothesis", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94321", "id": "dkkgKzMni7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=dkkgKzMni7", "openreview": "https://openreview.net/forum?id=dkkgKzMni7", "poster": "", "project": "", "author_site": "Bobak Kiani, Jason Wang, Melanie Weber", "tldr": "", "abstract": "The manifold hypothesis presumes that high-dimensional data lies on or near a low-dimensional manifold. \nWhile the utility of encoding geometric structure has been demonstrated empirically, rigorous analysis of its impact on the learnability of neural networks is largely missing. Several recent results have established hardness results for learning feedforward and equivariant neural networks under i.i.d. Gaussian or uniform Boolean data distributions. In this paper, we investigate the hardness of learning under the manifold hypothesis. We ask, which minimal assumptions on the curvature and regularity of the manifold, if any, render the learning problem efficiently learnable. We prove that learning is hard under input manifolds of bounded curvature by extending proofs of hardness in the SQ and cryptographic settings for boolean data inputs to the geometric setting. On the other hand, we show that additional assumptions on the volume of the data manifold alleviate these fundamental limitations and guarantee learnability via a simple interpolation argument. Notable instances of this regime are manifolds which can be reliably reconstructed via manifold learning. \nLooking forward, we comment on and empirically explore intermediate regimes of manifolds, which have heterogeneous features commonly found in real world data.", "keywords": "Manifold hypothesis;Learning theory;Hardness of learning;Neural network hardness;Manifold Learning", "primary_area": "learning_theory", "supplementary_material": "", "author": "Bobak Kiani;Jason Wang;Melanie Weber", "authorids": "~Bobak_Kiani1;~Jason_Wang3;~Melanie_Weber1", "gender": ";M;", "homepage": ";https://www.jasonwang.app/;", "dblp": "232/4086;;", "google_scholar": ";https://scholar.google.com/citations?hl=en;", "orcid": ";;", "linkedin": "bobak-kiani;jasonwang292/;", "or_profile": "~Bobak_Kiani1;~Jason_Wang3;~Melanie_Weber1", "aff": "Massachusetts Institute of Technology;Harvard University;", "aff_domain": "mit.edu;harvard.edu;", "position": "PhD student;Undergrad student;", "bibtex": "@inproceedings{\nkiani2024hardness,\ntitle={Hardness of Learning Neural Networks under the Manifold Hypothesis},\nauthor={Bobak Kiani and Jason Wang and Melanie Weber},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=dkkgKzMni7}\n}", "github": "", "reviewers": "Wvq2;PDhw;7hKF;VtS6", "pdf_size": 1040894, "rating": "6;6;7;7", "confidence": "4;3;3;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;4", "wc_summary": "96;158;61;111", "wc_strengths": "17;110;90;46", "wc_weaknesses": "238;85;32;53", "wc_questions": "51;111;49;179", "wc_limitations": "4;54;8;3", "wc_review": "406;518;240;392", "wc_reply_reviewers": "14;89;24;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 106.5, 34.831738400487566 ], "wc_strengths_avg": [ 65.75, 36.44430682562093 ], "wc_weaknesses_avg": [ 102.0, 80.7558047449222 ], "wc_questions_avg": [ 97.5, 53.242370345430714 ], "wc_limitations_avg": [ 17.25, 21.299941314473145 ], "wc_review_avg": [ 389.0, 98.91915891272023 ], "wc_reply_reviewers_avg": [ 31.75, 34.134842902817056 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4991074943824487360&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "mit.edu;harvard.edu;", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Massachusetts Institute of Technology;Harvard University", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.harvard.edu", "aff_unique_abbr": "MIT;Harvard", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Defensive Unlearning with Adversarial Training for Robust Concept Erasure in Diffusion Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94320", "id": "dkpmfIydrF", "proceeding": "", "pdf": "https://openreview.net/pdf?id=dkpmfIydrF", "openreview": "https://openreview.net/forum?id=dkpmfIydrF", "poster": "/media/PosterPDFs/NeurIPS%202024/94320.png?t=1731705687.9796019", "project": "", "author_site": "Yimeng Zhang, Xin Chen, Jinghan Jia, Yihua Zhang, Chongyu Fan, Jiancheng Liu, Mingyi Hong, Ke Ding, Sijia Liu", "tldr": "", "abstract": "Diffusion models (DMs) have achieved remarkable success in text-to-image generation, but they also pose safety risks, such as the potential generation of harmful content and copyright violations. The techniques of machine unlearning, also known as concept erasing, have been developed to address these risks. However, these techniques remain vulnerable to adversarial prompt attacks, which can prompt DMs post-unlearning to regenerate undesired images containing concepts (such as nudity) meant to be erased. This work aims to enhance the robustness of concept erasing by integrating the principle of adversarial training (AT) into machine unlearning, resulting in the robust unlearning framework referred to as AdvUnlearn. However, achieving this effectively and efficiently is highly nontrivial. First, we find that a straightforward implementation of AT compromises DMs\u2019 image generation quality post-unlearning. To address this, we develop a utility-retaining regularization on an additional retain set, optimizing the trade-off between concept erasure robustness and model utility in AdvUnlearn. Moreover, we identify the text encoder as a more suitable module for robustification compared to UNet, ensuring unlearning effectiveness. And the acquired text encoder can serve as a plug-and-play robust unlearner for various DM types. Empirically, we perform extensive experiments to demonstrate the robustness advantage of AdvUnlearn across various DM unlearning scenarios, including the erasure of nudity, objects, and style concepts. In addition to robustness, AdvUnlearn also achieves a balanced tradeoff with model utility. To our knowledge, this is the first work to systematically explore robust DM unlearning through AT, setting it apart from existing methods that overlook robustness in concept erasing. Codes are available at https://github.com/OPTML-Group/AdvUnlearn.\n\nWarning: This paper contains model outputs that may be offensive in nature.", "keywords": "diffusion model;machine unlearning;adversarial training", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/aee5b5f5e1bfd1cab71aeb1a7c5114c1973f179d.zip", "author": "Yimeng Zhang;Xin Chen;Jinghan Jia;Yihua Zhang;Chongyu Fan;Jiancheng Liu;Mingyi Hong;Ke Ding;Sijia Liu", "authorids": "~Yimeng_Zhang2;~Xin_Chen14;~Jinghan_Jia1;~Yihua_Zhang1;~Chongyu_Fan1;~Jiancheng_Liu2;~Mingyi_Hong1;~Ke_Ding2;~Sijia_Liu1", "gender": "M;;M;M;M;M;M;M;M", "homepage": "https://damon-demon.github.io;https://xinchenhawaii.github.io/;https://jinghanjia.netlify.app/;https://yihua-zhang.com;https://github.com/a-F1;https://ljcc0930.github.io/;http://people.ece.umn.edu/~mhong/mingyi.html;;https://lsjxjtu.github.io/", "dblp": ";24/1518;286/5392;;359/3239;74/3002;57/8053;;128/6972-1", "google_scholar": "https://scholar.google.com/citations?hl=en;bNou80wAAAAJ;bqP_zxYAAAAJ;https://scholar.google.com/citations?hl=zh-CN;;ReWNzl4AAAAJ;qRnP-p0AAAAJ;;C7dO_UgAAAAJ", "orcid": "0000-0003-1608-2541;0000-0003-1950-2468;;;;;;;", "linkedin": ";;jinghan-jia-5194451ba/;zhangyihua/;;;;dingke/;", "or_profile": "~Yimeng_Zhang2;~Xin_Chen14;~Jinghan_Jia1;~Yihua_Zhang1;~Chongyu_Fan1;~Jiancheng_Liu2;~Mingyi_Hong1;~Ke_Ding2;~Sijia_Liu1", "aff": "ByteDance Inc.;Intel Corp;Michigan State University;Michigan State University;Huazhong University of Science and Technology;Michigan State University;University of Minnesota, Minneapolis;Intel;Michigan State University", "aff_domain": "bytedance.com;intel.com;msu.edu;msu.edu;hust.edu.cn;msu.edu;umn.edu;intel.com;msu.edu", "position": "Research Intern;Machine Learning Software Engineer;PhD student;PhD student;Undergrad student;MS student;Associate Professor;Principal Researcher;Assistant Professor", "bibtex": "@inproceedings{\nzhang2024defensive,\ntitle={Defensive Unlearning with Adversarial Training for Robust Concept Erasure in Diffusion Models},\nauthor={Yimeng Zhang and Xin Chen and Jinghan Jia and Yihua Zhang and Chongyu Fan and Jiancheng Liu and Mingyi Hong and Ke Ding and Sijia Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=dkpmfIydrF}\n}", "github": "", "reviewers": "hd8m;4rR4;gHPo", "pdf_size": 11932468, "rating": "5;5;5", "confidence": "4;5;2", "soundness": "3;2;3", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "73;67;55", "wc_strengths": "68;42;76", "wc_weaknesses": "65;202;16", "wc_questions": "43;235;116", "wc_limitations": "47;46;8", "wc_review": "296;592;271", "wc_reply_reviewers": "123;21;24", "wc_reply_authors": "402;89;152", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 1.247219128924647 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 65.0, 7.483314773547883 ], "wc_strengths_avg": [ 62.0, 14.514360704718161 ], "wc_weaknesses_avg": [ 94.33333333333333, 78.7160861719015 ], "wc_questions_avg": [ 131.33333333333334, 79.12999150483687 ], "wc_limitations_avg": [ 33.666666666666664, 18.153665072253467 ], "wc_review_avg": [ 386.3333333333333, 145.78599231598199 ], "wc_reply_reviewers_avg": [ 56.0, 47.391982444291145 ], "wc_reply_authors_avg": [ 214.33333333333334, 135.1698519969926 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 38, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12060552968434477485&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "bytedance.com;intel.com;msu.edu;msu.edu;hust.edu.cn;msu.edu;umn.edu;intel.com;msu.edu", "author_num": 9, "aff_unique_index": "0;1;2;2;3;2;4;1;2", "aff_unique_norm": "ByteDance;Intel;Michigan State University;Huazhong University of Science and Technology;University of Minnesota", "aff_unique_dep": ";Intel Corporation;;;", "aff_unique_url": "https://www.bytedance.com;https://www.intel.com;https://www.msu.edu;http://www.hust.edu.cn;https://www.minnesota.edu", "aff_unique_abbr": "ByteDance;Intel;MSU;HUST;UMN", "aff_campus_unique_index": "1", "aff_campus_unique": ";Minneapolis", "aff_country_unique_index": "0;1;1;1;0;1;1;1;1", "aff_country_unique": "China;United States" }, { "title": "Semi-Supervised Sparse Gaussian Classification: Provable Benefits of Unlabeled Data", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94319", "id": "dlCTmEyq6y", "proceeding": "", "pdf": "https://openreview.net/pdf?id=dlCTmEyq6y", "openreview": "https://openreview.net/forum?id=dlCTmEyq6y", "poster": "", "project": "", "author_site": "Eyar Azar, Boaz Nadler", "tldr": "", "abstract": "The premise of semi-supervised learning (SSL) is that combining labeled and unlabeled data yields significantly more accurate models.\nDespite empirical successes, the theoretical understanding of SSL is still far from complete. \nIn this work, we study SSL for high dimensional sparse Gaussian classification. \nTo construct an accurate classifier a key task is feature selection, detecting the few variables that separate the two classes.\nFor this SSL setting, we analyze information theoretic lower bounds for accurate feature selection as well as computational lower bounds, \nassuming the low-degree likelihood hardness conjecture. \nOur key contribution is the identification of a regime in the problem parameters (dimension, sparsity, number of labeled and unlabeled samples) where SSL is guaranteed to be advantageous for classification.\nSpecifically, there is a regime where it is possible to construct in polynomial time an accurate SSL classifier.\nHowever, any computationally efficient supervised or unsupervised learning schemes, that separately use only the labeled or unlabeled data would fail. \nOur work highlights the provable benefits of combining labeled and unlabeled data for classification and feature selection in high dimensions. \nWe present simulations that complement our theoretical analysis.", "keywords": "semi-supervised learning;gaussian mixture models;high-dimensional statistics;sparsity;statistical-computational gaps", "primary_area": "learning_theory", "supplementary_material": "", "author": "Eyar Azar;Boaz Nadler", "authorids": "~Eyar_Azar1;~Boaz_Nadler2", "gender": "M;M", "homepage": ";https://www.weizmann.ac.il/math/Nadler/home", "dblp": "321/6591;53/4192", "google_scholar": "6AA9ikkAAAAJ;N3Jj5_cAAAAJ", "orcid": ";0000-0002-9777-4576", "linkedin": "eyar-azar-a7b5ab160;", "or_profile": "~Eyar_Azar1;~Boaz_Nadler2", "aff": "Weizmann Institute of Science;Weizmann Institute of Science", "aff_domain": "weizmann.ac.il;weizmann.ac.il", "position": "MS student;Full Professor", "bibtex": "@inproceedings{\nazar2024semisupervised,\ntitle={Semi-Supervised Sparse Gaussian Classification: Provable Benefits of Unlabeled Data},\nauthor={Eyar Azar and Boaz Nadler},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=dlCTmEyq6y}\n}", "github": "", "reviewers": "NrxF;6RTo;uoYb;3M9B", "pdf_size": 1182994, "rating": "6;7;7;9", "confidence": "3;4;4;5", "soundness": "3;3;3;4", "novelty": "3;3;3;4", "presentation": "3;3;3;4", "wc_summary": "410;204;115;33", "wc_strengths": "90;30;48;26", "wc_weaknesses": "58;59;126;83", "wc_questions": "1;213;25;30", "wc_limitations": "1;4;1;1", "wc_review": "560;510;315;173", "wc_reply_reviewers": "37;17;0;11", "wc_reply_authors": "4;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "2;1;1;1", "rating_avg": [ 7.25, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 190.5, 140.4181256106205 ], "wc_strengths_avg": [ 48.5, 25.352514668174436 ], "wc_weaknesses_avg": [ 81.5, 27.572631357924475 ], "wc_questions_avg": [ 67.25, 84.85981086474327 ], "wc_limitations_avg": [ 1.75, 1.299038105676658 ], "wc_review_avg": [ 389.5, 154.92982282310916 ], "wc_reply_reviewers_avg": [ 16.25, 13.442005058770064 ], "wc_reply_authors_avg": [ 1.0, 1.7320508075688772 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.9733285267845754, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13525427726894098536&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "weizmann.ac.il;weizmann.ac.il", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Weizmann Institute of Science", "aff_unique_dep": "", "aff_unique_url": "https://www.weizmann.org.il", "aff_unique_abbr": "Weizmann", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Israel" }, { "title": "Scalable DBSCAN with Random Projections", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94318", "id": "dmhi2ydnXZ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=dmhi2ydnXZ", "openreview": "https://openreview.net/forum?id=dmhi2ydnXZ", "poster": "/media/PosterPDFs/NeurIPS%202024/94318.png?t=1731066252.723969", "project": "", "author_site": "HaoChuan Xu, Ninh Pham", "tldr": "", "abstract": "We present sDBSCAN, a scalable density-based clustering algorithm in high dimensions with cosine distance. sDBSCAN leverages recent advancements in random projections given a significantly large number of random vectors to quickly identify core points and their neighborhoods, the primary hurdle of density-based clustering. Theoretically, sDBSCAN preserves the DBSCAN\u2019s clustering structure under mild conditions with high probability. To facilitate sDBSCAN, we present sOPTICS, a scalable visual tool to guide the parameter setting of sDBSCAN. We also extend sDBSCAN and sOPTICS to L2, L1, \u03c72, and Jensen-Shannon distances via random kernel features. Empirically, sDBSCAN is significantly faster and provides higher accuracy than competitive DBSCAN variants on real-world million-point data sets. On these data sets, sDBSCAN and sOPTICS run in a few minutes, while the scikit-learn counterparts and other clustering competitors demand several hours or\ncannot run on our hardware due to memory constraints. Our code is available at https://github.com/NinhPham/sDbscan.", "keywords": "Density-based clustering;random projections;extreme order statistics", "primary_area": "evaluation", "supplementary_material": "/attachment/c7395875c9d26efb6cc291448617536741484cf8.zip", "author": "HaoChuan Xu;Ninh Pham", "authorids": "~HaoChuan_Xu1;~Ninh_Pham1", "gender": "M;M", "homepage": ";https://profiles.auckland.ac.nz/ninh-pham", "dblp": ";84/9841", "google_scholar": ";bvDmZoIAAAAJ", "orcid": ";0000-0001-5768-9900", "linkedin": "haochuan-xu-34510a155/;", "or_profile": "~HaoChuan_Xu1;~Ninh_Pham1", "aff": "University of Auckland;University of Auckland", "aff_domain": "aucklanduni.ac.nz;auckland.ac.nz", "position": "Undergrad student;Lecturer", "bibtex": "@inproceedings{\nxu2024scalable,\ntitle={Scalable {DBSCAN} with Random Projections},\nauthor={HaoChuan Xu and Ninh Pham},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=dmhi2ydnXZ}\n}", "github": "", "reviewers": "KGU7;gFWb;UCFw;QDtd;C5J7", "pdf_size": 1117299, "rating": "4;5;5;6;7", "confidence": "5;4;3;4;2", "soundness": "2;3;2;3;3", "novelty": "1;2;3;3;3", "presentation": "2;2;2;3;3", "wc_summary": "114;28;49;384;426", "wc_strengths": "56;44;25;28;82", "wc_weaknesses": "83;257;297;31;57", "wc_questions": "87;2;60;105;188", "wc_limitations": "1;16;11;9;7", "wc_review": "341;347;442;557;760", "wc_reply_reviewers": "34;110;622;0;45", "wc_reply_authors": "76;422;856;0;25", "reply_reviewers": "2;2;2;0;1", "reply_authors": "3;3;3;1;2", "rating_avg": [ 5.4, 1.0198039027185568 ], "confidence_avg": [ 3.6, 1.019803902718557 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.8 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 200.2, 170.1251304187597 ], "wc_strengths_avg": [ 47.0, 20.784609690826528 ], "wc_weaknesses_avg": [ 145.0, 109.75609322493216 ], "wc_questions_avg": [ 88.4, 60.77696932226877 ], "wc_limitations_avg": [ 8.8, 4.915282290977803 ], "wc_review_avg": [ 489.4, 156.42327192588704 ], "wc_reply_reviewers_avg": [ 162.2, 232.6459971716685 ], "wc_reply_authors_avg": [ 275.8, 327.6866796194194 ], "reply_reviewers_avg": [ 1.4, 0.8 ], "reply_authors_avg": [ 2.4, 0.8 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.8076923076923078, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12143022152274206209&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "aucklanduni.ac.nz;auckland.ac.nz", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Auckland", "aff_unique_dep": "", "aff_unique_url": "https://www.auckland.ac.nz", "aff_unique_abbr": "UoA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "New Zealand" }, { "title": "Dynamics of Supervised and Reinforcement Learning in the Non-Linear Perceptron", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94317", "id": "doaJTihgIZ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=doaJTihgIZ", "openreview": "https://openreview.net/forum?id=doaJTihgIZ", "poster": "", "project": "", "author_site": "Christian Schmid, James M Murray", "tldr": "", "abstract": "The ability of a brain or a neural network to efficiently learn depends crucially on both the task structure and the learning rule.\nPrevious works have analyzed the dynamical equations describing learning in the relatively simplified context of the perceptron under assumptions of a student-teacher framework or a linearized output. \nWhile these assumptions have facilitated theoretical understanding, they have precluded a detailed understanding of the roles of the nonlinearity and input-data distribution in determining the learning dynamics, limiting the applicability of the theories to real biological or artificial neural networks.\nHere, we use a stochastic-process approach to derive flow equations describing learning, applying this framework to the case of a nonlinear perceptron performing binary classification. \nWe characterize the effects of the learning rule (supervised or reinforcement learning, SL/RL) and input-data distribution on the perceptron's learning curve and the forgetting curve as subsequent tasks are learned.\nIn particular, we find that the input-data noise differently affects the learning speed under SL vs. RL, as well as determines how quickly learning of a task is overwritten by subsequent learning. Additionally, we verify our approach with real data using the MNIST dataset.\nThis approach points a way toward analyzing learning dynamics for more-complex circuit architectures.", "keywords": "Learning Dynamics;non-linear perceptron;supervised learning;reinforcement learning", "primary_area": "learning_theory", "supplementary_material": "/attachment/2bb7a4d965a6a015c75184dd38f88d644ca0ffe9.zip", "author": "Christian Schmid;James M Murray", "authorids": "~Christian_Schmid1;~James_M_Murray1", "gender": ";M", "homepage": ";https://murraylab.uoregon.edu", "dblp": ";", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "~Christian_Schmid1;~James_M_Murray1", "aff": ";University of Oregon", "aff_domain": ";uoregon.edu", "position": ";Assistant Professor", "bibtex": "@inproceedings{\nschmid2024dynamics,\ntitle={Dynamics of Supervised and Reinforcement Learning in the Non-Linear Perceptron},\nauthor={Christian Schmid and James M Murray},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=doaJTihgIZ}\n}", "github": "", "reviewers": "WrR8;m7rp;SUcs", "pdf_size": 1678304, "rating": "5;7;7", "confidence": "2;4;3", "soundness": "3;3;3", "novelty": "2;2;4", "presentation": "2;2;2", "wc_summary": "15;101;126", "wc_strengths": "10;100;103", "wc_weaknesses": "9;213;215", "wc_questions": "17;411;740", "wc_limitations": "10;6;164", "wc_review": "61;831;1348", "wc_reply_reviewers": "0;166;55", "wc_reply_authors": "0;204;165", "reply_reviewers": "0;1;1", "reply_authors": "1;3;3", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.9428090415820634 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 80.66666666666667, 47.541794478355804 ], "wc_strengths_avg": [ 71.0, 43.15089802078283 ], "wc_weaknesses_avg": [ 145.66666666666666, 96.6413760019773 ], "wc_questions_avg": [ 389.3333333333333, 295.5608604369364 ], "wc_limitations_avg": [ 60.0, 73.55723395198237 ], "wc_review_avg": [ 746.6666666666666, 528.7887626978807 ], "wc_reply_reviewers_avg": [ 73.66666666666667, 69.04265992043536 ], "wc_reply_authors_avg": [ 123.0, 88.41945487278238 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.9428090415820634 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:n2y54kuA1RUJ:scholar.google.com/&scioq=Dynamics+of+Supervised+and+Reinforcement+Learning+in+the+Non-Linear+Perceptron&hl=en&as_sdt=0,33", "gs_version_total": 5, "email": ";uoregon.edu", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "University of Oregon", "aff_unique_dep": "", "aff_unique_url": "https://www.uoregon.edu", "aff_unique_abbr": "UO", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Noether's Razor: Learning Conserved Quantities", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94316", "id": "dpvqBkEp1f", "proceeding": "", "pdf": "https://openreview.net/pdf?id=dpvqBkEp1f", "openreview": "https://openreview.net/forum?id=dpvqBkEp1f", "poster": "", "project": "", "author_site": "Tycho van der Ouderaa, Mark van der Wilk, Pim de Haan", "tldr": "", "abstract": "Symmetries have proven useful in machine learning models, improving generalisation and overall performance. At the same time, recent advancements in learning dynamical systems rely on modelling the underlying Hamiltonian to guarantee the conservation of energy.\nThese approaches can be connected via a seminal result in mathematical physics: Noether's theorem, which states that symmetries in a dynamical system correspond to conserved quantities.\nThis work uses Noether's theorem to parameterise symmetries as learnable conserved quantities. We then allow conserved quantities and associated symmetries to be learned directly from train data through approximate Bayesian model selection, jointly with the regular training procedure. As training objective, we derive a variational lower bound to the marginal likelihood. The objective automatically embodies an Occam's Razor effect that avoids collapse of conversation laws to the trivial constant, without the need to manually add and tune additional regularisers. We demonstrate a proof-of-principle on n-harmonic oscillators and n-body systems. We find that our method correctly identifies the correct conserved quantities and U(n) and SE(n) symmetry groups, improving overall performance and predictive accuracy on test data.", "keywords": "noether theorem razor bayesian model selection learning symmetry conserved quantities variational inference", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Tycho F. A. van der Ouderaa;Mark van der Wilk;Pim De Haan", "authorids": "~Tycho_F._A._van_der_Ouderaa2;~Mark_van_der_Wilk1;~Pim_De_Haan1", "gender": "M;M;M", "homepage": "https://mvdw.uk;https://pimdehaan.com;https://tychovdo.github.io/", "dblp": "142/2927;;", "google_scholar": "PKcjcT4AAAAJ;AZeK-REAAAAJ;", "orcid": "0000-0001-7947-6682;;", "linkedin": ";https://nl.linkedin.com/in/pim-de-haan;tychovdo/", "or_profile": "~Mark_van_der_Wilk1;~Pim_De_Haan1;~Tycho_F.A._van_der_Ouderaa1", "aff": "University of Oxford;Qualcomm;Imperial College London", "aff_domain": "cs.ox.ac.uk;qualcomm.com;imperial.ac.uk", "position": "Associate Professor;Researcher;PhD student", "bibtex": "@inproceedings{\nouderaa2024noethers,\ntitle={Noether's Razor: Learning Conserved Quantities},\nauthor={Tycho F. A. van der Ouderaa and Mark van der Wilk and Pim De Haan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=dpvqBkEp1f}\n}", "github": "", "reviewers": "tVqw;Pckk;zC9J;qjc1;hvbw", "pdf_size": 3108297, "rating": "5;5;5;6;9", "confidence": "4;3;4;3;4", "soundness": "3;3;3;3;4", "novelty": "1;2;3;3;4", "presentation": "3;2;2;3;4", "wc_summary": "181;82;165;171;89", "wc_strengths": "140;29;55;75;104", "wc_weaknesses": "597;251;327;104;65", "wc_questions": "235;124;57;121;3", "wc_limitations": "110;4;7;17;1", "wc_review": "1263;490;611;488;262", "wc_reply_reviewers": "551;60;23;35;0", "wc_reply_authors": "809;26;0;0;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "2;2;1;1;1", "rating_avg": [ 6.0, 1.5491933384829668 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.6, 1.019803902718557 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 137.6, 42.90268056893415 ], "wc_strengths_avg": [ 80.6, 38.5362167317966 ], "wc_weaknesses_avg": [ 268.8, 189.79610111906936 ], "wc_questions_avg": [ 108.0, 77.69169839822013 ], "wc_limitations_avg": [ 27.8, 41.450693600951965 ], "wc_review_avg": [ 622.8, 339.40500880216837 ], "wc_reply_reviewers_avg": [ 133.8, 209.49596654828466 ], "wc_reply_authors_avg": [ 167.0, 321.1579050872016 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.263523138347365, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:AQP1iXfBYVoJ:scholar.google.com/&scioq=Noether%27s+Razor:+Learning+Conserved+Quantities&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "cs.ox.ac.uk;qualcomm.com;imperial.ac.uk", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Oxford;Qualcomm Incorporated;Imperial College London", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ox.ac.uk;https://www.qualcomm.com;https://www.imperial.ac.uk", "aff_unique_abbr": "Oxford;Qualcomm;ICL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United Kingdom;United States" }, { "title": "Approximately Equivariant Neural Processes", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94315", "id": "dqT9MC5NQl", "proceeding": "", "pdf": "https://openreview.net/pdf?id=dqT9MC5NQl", "openreview": "https://openreview.net/forum?id=dqT9MC5NQl", "poster": "/media/PosterPDFs/NeurIPS%202024/94315.png?t=1731419307.2891717", "project": "", "author_site": "Matthew Ashman, Cristiana Diaconu, Adrian Weller, Wessel Bruinsma, Richard Turner", "tldr": "", "abstract": "Equivariant deep learning architectures exploit symmetries in learning problems to improve the sample efficiency of neural-network-based models and their ability to generalise. However, when modelling real-world data, learning problems are often not *exactly* equivariant, but only approximately. For example, when estimating the global temperature field from weather station observations, local topographical features like mountains break translation equivariance. In these scenarios, it is desirable to construct architectures that can flexibly depart from exact equivariance in a data-driven way. Current approaches to achieving this cannot usually be applied out-of-the-box to any architecture and symmetry group. In this paper, we develop a general approach to achieving this using existing equivariant architectures. Our approach is agnostic to both the choice of symmetry group and model architecture, making it widely applicable. We consider the use of approximately equivariant architectures in neural processes (NPs), a popular family of meta-learning models. We demonstrate the effectiveness of our approach on a number of synthetic and real-world regression experiments, showing that approximately equivariant NP models can outperform both their non-equivariant and strictly equivariant counterparts.", "keywords": "equivariance;neural processes;meta learning;deep learning;probabilistic methods", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Matthew Ashman;Cristiana Diaconu;Adrian Weller;Wessel P Bruinsma;Richard E. Turner", "authorids": "~Matthew_Ashman1;~Cristiana_Diaconu1;~Adrian_Weller1;~Wessel_P_Bruinsma1;~Richard_E_Turner1", "gender": "M;F;M;M;", "homepage": "https://mattashman.github.io/;https://cddcam.github.io/;http://mlg.eng.cam.ac.uk/adrian/;https://rich-turner-group.github.io/;https://wessel.ai", "dblp": ";380/4379.html;73/8324;40/5352;242/3348.html", "google_scholar": "j1YiUKUAAAAJ;Ws2IoZIAAAAJ;https://scholar.google.co.uk/citations?user=Ek4hM10AAAAJ;https://scholar.google.co.uk/citations?user=DgLEyZgAAAAJ;QRQwz3cAAAAJ", "orcid": ";0009-0007-5165-2630;;;", "linkedin": "matthew-ashman-a69017150/;https://linkedin.com/in/cristiana-diaconu-99a3ba161;;;", "or_profile": "~Matthew_Ashman1;~Cristiana_Diaconu1;~Adrian_Weller1;~Richard_E_Turner1;~Wessel_Bruinsma1", "aff": "University of Cambridge;University of Cambridge;University of Cambridge;Microsoft Research;", "aff_domain": "cam.ac.uk;cam.ac.uk;cam.ac.uk;research.microsoft.com;", "position": "PhD student;PhD student;Principal Researcher;Researcher;", "bibtex": "@inproceedings{\nashman2024approximately,\ntitle={Approximately Equivariant Neural Processes},\nauthor={Matthew Ashman and Cristiana Diaconu and Adrian Weller and Wessel P Bruinsma and Richard E. Turner},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=dqT9MC5NQl}\n}", "github": "", "reviewers": "dBf7;7yUH;Ks9f;WhxG", "pdf_size": 3154801, "rating": "5;5;6;8", "confidence": "4;3;2;5", "soundness": "2;3;3;3", "novelty": "2;2;2;4", "presentation": "1;2;3;3", "wc_summary": "38;66;79;31", "wc_strengths": "22;29;87;52", "wc_weaknesses": "158;137;91;320", "wc_questions": "86;57;194;473", "wc_limitations": "16;1;4;41", "wc_review": "320;290;455;917", "wc_reply_reviewers": "803;10;65;196", "wc_reply_authors": "2535;0;48;696", "reply_reviewers": "2;1;1;3", "reply_authors": "8;1;2;4", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 53.5, 19.704060495238032 ], "wc_strengths_avg": [ 47.5, 25.362373705944798 ], "wc_weaknesses_avg": [ 176.5, 86.32062326002982 ], "wc_questions_avg": [ 202.5, 164.3053559686963 ], "wc_limitations_avg": [ 15.5, 15.75595125658873 ], "wc_review_avg": [ 495.5, 251.16379118017787 ], "wc_reply_reviewers_avg": [ 268.5, 315.9038619580331 ], "wc_reply_authors_avg": [ 819.75, 1027.738384755576 ], "reply_reviewers_avg": [ 1.75, 0.82915619758885 ], "reply_authors_avg": [ 3.75, 2.680951323690902 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5477225575051661, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16547670209971577300&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 5, "email": "cam.ac.uk;cam.ac.uk;cam.ac.uk;research.microsoft.com;", "author_num": 5, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "University of Cambridge;Microsoft", "aff_unique_dep": ";Microsoft Research", "aff_unique_url": "https://www.cam.ac.uk;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "Cambridge;MSR", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "An Efficient Memory Module for Graph Few-Shot Class-Incremental Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94314", "id": "dqdffX3BS5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=dqdffX3BS5", "openreview": "https://openreview.net/forum?id=dqdffX3BS5", "poster": "/media/PosterPDFs/NeurIPS%202024/94314.png?t=1731481575.43203", "project": "", "author_site": "Dong Li, Aijia Zhang, Junqi Gao, Biqing Qi", "tldr": "", "abstract": "Graph incremental learning has gained widespread attention for its ability to mitigate catastrophic forgetting for graph neural networks (GNN). Conventional methods typically require numerous labels for node classification. However, obtaining abundant labels is often challenging in practice, which makes graph few-shot incremental learning necessary. Current approaches rely on large number of samples from meta-learning to construct memories, and heavy fine-tuning of the GNN parameters that lead to the loss of past knowledge. These result in significant memory consumption and loss of past knowledge information, respectively. To tackle these issues, We introduce Mecoin to efficient construct and Preserve memory. For efficient storage and update of class prototypes, Mecoin use Structured Memory Unit (SMU) to cache prototypes of the seen classes and update new class prototypes through interaction between nodes and the cached prototypes by Memory Construction module(MeCo). Besides, to avoid extensive parameter fine-tuning and forgetting, we introduce a Memory Representation Adaptive Module called MRaM to separate the learning of prototypes and class representations and use Graph Knowledge Interchange Module (GKIM) to injects past knowledge information into GNN. We analyze the effectiveness of our paradigm from the perspectives of generalization error, and discuss the impact of different distillation methods on model performance through experiments and VC-dimension. By comparison with other related methods, we validate that Mecoin achieves higher accuracy and lower forgetting rate.", "keywords": "graph few-shot class-incremental learning;graph incremental learning;graph few-shot learning", "primary_area": "graph_neural_networks", "supplementary_material": "/attachment/5f69e0efcca2f4f85bfd22c7eb0907751f4d4e4e.zip", "author": "Dong Li;Aijia Zhang;Junqi Gao;Biqing Qi", "authorids": "~Dong_Li20;~Aijia_Zhang1;~Junqi_Gao1;~Biqing_Qi1", "gender": ";F;M;M", "homepage": "https://scholar.google.com.hk/citations?hl=zh-CN&user=sSOt_McAAAAJ&view_op=list_works&gmla=AHoSzlUGq462ME3yzn-O-QVHz7m99fadMPU_X2FzQ2M47JN3ZcLFtf0DPl4zmY9LfBnLpIMz-00YKFo7-kN034NWi0ZKUF3q57DNbdoGe_w;;;https://biqing-qi.github.io/", "dblp": ";;81/9266.html;233/4949.html", "google_scholar": "https://scholar.google.com.hk/citations?hl=zh-CN;vVFvlH4AAAAJ;;", "orcid": "0009-0007-8307-9904;;0009-0007-1644-5812;0000-0002-4072-0577", "linkedin": ";;;", "or_profile": "~Dong_Li20;~Aijia_Zhang1;~Junqi_Gao1;~Biqing_Qi1", "aff": "Shanghai Artificial Intelligence Laboratory;Harbin Institute of Technology;Shanghai Artificial Intelligence Laboratory;Harbin Institute of Technology", "aff_domain": "pjlab.org.cn;stu.hit.edu.cn;pjlab.org.cn;hit.edu.cn", "position": "Intern;PhD student;Intern;PhD student", "bibtex": "@inproceedings{\nli2024an,\ntitle={An Efficient Memory Module for Graph Few-Shot Class-Incremental Learning},\nauthor={Dong Li and Aijia Zhang and Junqi Gao and Biqing Qi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=dqdffX3BS5}\n}", "github": "", "reviewers": "f8WD;utSJ;cvjp", "pdf_size": 5297617, "rating": "5;6;7", "confidence": "3;4;3", "soundness": "2;3;3", "novelty": "2;3;3", "presentation": "2;2;3", "wc_summary": "62;92;69", "wc_strengths": "41;34;33", "wc_weaknesses": "54;269;33", "wc_questions": "66;4;2", "wc_limitations": "9;10;11", "wc_review": "232;409;148", "wc_reply_reviewers": "59;396;12", "wc_reply_authors": "151;703;98", "reply_reviewers": "1;2;1", "reply_authors": "2;4;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 74.33333333333333, 12.814921857827391 ], "wc_strengths_avg": [ 36.0, 3.559026084010437 ], "wc_weaknesses_avg": [ 118.66666666666667, 106.64687316351828 ], "wc_questions_avg": [ 24.0, 29.709706606876257 ], "wc_limitations_avg": [ 10.0, 0.816496580927726 ], "wc_review_avg": [ 263.0, 108.78419002777932 ], "wc_reply_reviewers_avg": [ 155.66666666666666, 171.0211163050406 ], "wc_reply_authors_avg": [ 317.3333333333333, 273.5645363630958 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 0.9428090415820634 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7629070714385760700&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "pjlab.org.cn;stu.hit.edu.cn;pjlab.org.cn;hit.edu.cn", "author_num": 4, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "Shanghai Artificial Intelligence Laboratory;Harbin Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "http://www.shailab.org/;http://www.hit.edu.cn/", "aff_unique_abbr": "Shanghai AI Lab;HIT", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Harbin", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "LLMs Can Evolve Continually on Modality for $\\mathbb{X}$-Modal Reasoning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94313", "id": "drpJ7KOr3F", "proceeding": "", "pdf": "https://openreview.net/pdf?id=drpJ7KOr3F", "openreview": "https://openreview.net/forum?id=drpJ7KOr3F", "poster": "/media/PosterPDFs/NeurIPS%202024/94313.png?t=1731569176.218531", "project": "", "author_site": "Jiazuo Yu, Haomiao Xiong, Lu Zhang, Haiwen Diao, Yunzhi Zhuge, Lanqing Hong, Dong Wang, Huchuan Lu, You He, Long Chen", "tldr": "", "abstract": "Multimodal Large Language Models (MLLMs) have gained significant attention due to their impressive capabilities in multimodal understanding. However, existing methods rely heavily on extensive modal-specific pretraining and joint-modal tuning, leading to significant computational burdens when expanding to new modalities. In this paper, we propose \\textbf{PathWeave}, a flexible and scalable framework with modal-\\textbf{path} s\\textbf{w}itching and \\textbf{e}xp\\textbf{a}nsion abilities that enables MLLMs to continually \\textbf{ev}olve on modalities for $\\mathbb{X}$-modal reasoning. We leverage the concept of Continual Learning and develop an incremental training strategy atop pre-trained MLLMs, enabling their expansion to new modalities using uni-modal data, without executing joint-modal pretraining. In detail, a novel Adapter-in-Adapter (AnA) framework is introduced, in which uni-modal and cross-modal adapters are seamlessly integrated to facilitate efficient modality alignment and collaboration. Additionally, an MoE-based gating module is applied between two types of adapters to further enhance the multimodal interaction. To investigate the proposed method, we establish a challenging benchmark called \\textbf{C}ontinual \\textbf{L}earning of \\textbf{M}odality (MCL), which consists of high-quality QA data from five distinct modalities: image, video, \\textcolor{black}{audio, depth} and point cloud. Extensive experiments demonstrate the effectiveness of the proposed AnA framework on learning plasticity and memory stability during continual learning. Furthermore, PathWeave performs comparably to state-of-the-art MLLMs while concurrently reducing parameter training burdens by 98.73\\%. Our code locates at \\url{https://github.com/JiazuoYu/PathWeave}.", "keywords": "MLLMs;Transfer Learning;Continual Learning", "primary_area": "machine_vision", "supplementary_material": "", "author": "Jiazuo Yu;Haomiao Xiong;Lu Zhang;Haiwen Diao;Yunzhi Zhuge;Lanqing HONG;Dong Wang;Huchuan Lu;You He;Long Chen", "authorids": "~Jiazuo_Yu1;~Haomiao_Xiong1;~Lu_Zhang7;~Haiwen_Diao2;~Yunzhi_Zhuge1;~Lanqing_HONG1;~Dong_Wang5;~Huchuan_Lu1;~You_He2;~Long_Chen8", "gender": "M;;M;M;F;M;M;M;M;M", "homepage": "https://github.com/hmxiong;https://faculty.dlut.edu.cn/zhanglu1/zh_CN/index.htm;https://paranioar.github.io/;;https://racheltechie.github.io/;;http://ice.dlut.edu.cn/lu/publications.html;;https://zjuchenlong.github.io/;https://github.com/JiazuoYu", "dblp": "391/5920;82/10609-53;283/4467;304/1289;226/4258;40/3934-4;64/6896;;64/5725-16;320/8169", "google_scholar": "vtz2hQ8AAAAJ;https://scholar.google.com/citations?hl=zh-CN;46eCjHQAAAAJ;;https://scholar.google.com.sg/citations?user=2p7x6OUAAAAJ;nVgPQpoAAAAJ;D3nE0agAAAAJ;;https://scholar.google.com.sg/citations?user=-gtmMpIAAAAJ;FPoIE0UAAAAJ", "orcid": ";0000-0003-4648-4437;;;;;;0000-0002-6111-340X;0000-0001-6148-9709;", "linkedin": ";;;;;;;;;", "or_profile": "~Haomiao_Xiong1;~Lu_Zhang7;~Haiwen_Diao2;~Yunzhi_Zhuge1;~Lanqing_HONG1;~Dong_Wang5;~Huchuan_Lu1;~You_He2;~Long_Chen8;~JiazuoYu1", "aff": "Dalian University of Technology;Dalian University of Technology;Dalian University of Technology;The University of Adelaide;Huawei Technologies Ltd.;Dalian University of Technology;Dalian University of Technology;Tsinghua University;Hong Kong University of Science and Technology;Dalian University of Technology", "aff_domain": "dlut.edu.cn;dlut.edu.cn;dlut.edu.cn;adelaide.edu.au;huawei.com;dlut.edu.cn;dlut.edu.cn;tsinghua.edu.cn;ust.hk;dlut.edu.cn", "position": "MS student;Postdoc;PhD student;PhD student;Researcher;Full Professor;Professor;Full Professor;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nyu2024llms,\ntitle={{LLM}s Can Evolve Continually on Modality for \\${\\textbackslash}mathbb\\{X\\}\\$-Modal Reasoning},\nauthor={Jiazuo Yu and Haomiao Xiong and Lu Zhang and Haiwen Diao and Yunzhi Zhuge and Lanqing HONG and Dong Wang and Huchuan Lu and You He and Long Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=drpJ7KOr3F}\n}", "github": "", "reviewers": "odey;PRBt;fRFW", "pdf_size": 11016473, "rating": "5;6;7", "confidence": "1;5;4", "soundness": "3;3;4", "novelty": "3;3;3", "presentation": "3;4;3", "wc_summary": "39;68;81", "wc_strengths": "1;173;147", "wc_weaknesses": "1;106;145", "wc_questions": "1;38;2", "wc_limitations": "1;1;1", "wc_review": "43;386;376", "wc_reply_reviewers": "0;0;29", "wc_reply_authors": "0;60;21", "reply_reviewers": "0;0;1", "reply_authors": "0;2;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.3333333333333335, 1.699673171197595 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 62.666666666666664, 17.55625877635159 ], "wc_strengths_avg": [ 107.0, 75.70116687784058 ], "wc_weaknesses_avg": [ 84.0, 60.81118318204309 ], "wc_questions_avg": [ 13.666666666666666, 17.21110752456745 ], "wc_limitations_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 268.3333333333333, 159.38702024387752 ], "wc_reply_reviewers_avg": [ 9.666666666666666, 13.67073110293992 ], "wc_reply_authors_avg": [ 27.0, 24.859605789312106 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.9428090415820634 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.720576692122892, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:RxiFKSCtBr8J:scholar.google.com/&scioq=LLMs+Can+Evolve+Continually+on+Modality+for+%24%5Cmathbb%7BX%7D%24-Modal+Reasoning&hl=en&as_sdt=0,33", "gs_version_total": 0, "email": "dlut.edu.cn;dlut.edu.cn;dlut.edu.cn;adelaide.edu.au;huawei.com;dlut.edu.cn;dlut.edu.cn;tsinghua.edu.cn;ust.hk;dlut.edu.cn", "author_num": 10, "aff_unique_index": "0;0;0;1;2;0;0;3;4;0", "aff_unique_norm": "Dalian University of Technology;University of Adelaide;Huawei;Tsinghua University;Hong Kong University of Science and Technology", "aff_unique_dep": ";;Huawei Technologies;;", "aff_unique_url": "http://www.dlut.edu.cn/;https://www.adelaide.edu.au;https://www.huawei.com;https://www.tsinghua.edu.cn;https://www.ust.hk", "aff_unique_abbr": "DUT;Adelaide;Huawei;THU;HKUST", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;1;0;0;0;0;0;0", "aff_country_unique": "China;Australia" }, { "title": "Nature-Inspired Local Propagation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94312", "id": "ds6xMV3yVV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ds6xMV3yVV", "openreview": "https://openreview.net/forum?id=ds6xMV3yVV", "poster": "", "project": "", "author_site": "Alessandro Betti, Marco Gori", "tldr": "", "abstract": "The spectacular results achieved in machine learning, including the recent advances in generative AI, rely on large data collections. On the opposite, intelligent processes in nature arises without the need for such collections, but simply by on-line processing of the environmental information. In particular, natural learning processes rely on mechanisms where data representation and learning are intertwined in such a way to respect spatiotemporal locality. This paper shows that such a feature arises from a pre-algorithmic view of learning that is inspired by related studies in Theoretical Physics. We show that the algorithmic interpretation of the derived \u201claws of learning\u201d, which takes the structure of Hamiltonian equations, reduces to Backpropagation when the speed of propagation goes to infinity. This opens the doors to machine learning studies based on full on-line information processing that are based on the replacement of Backpropagation with the proposed spatiotemporal local algorithm.", "keywords": "hamilton equation;backpropagation", "primary_area": "learning_theory", "supplementary_material": "/attachment/13f2d0cefeaed3043253034b6b8a545defcdda2e.zip", "author": "Alessandro Betti;Marco Gori", "authorids": "~Alessandro_Betti1;~Marco_Gori1", "gender": "M;M", "homepage": "http://sailab.diism.unisi.it/people/alessandro-betti/;http://sailab.diism.unisi.it/people/marco-gori/", "dblp": "180/7658;g/MarcoGori", "google_scholar": "https://scholar.google.com/citations?hl=it;", "orcid": ";0000-0001-6337-5430", "linkedin": ";", "or_profile": "~Alessandro_Betti1;~Marco_Gori1", "aff": "Institute for Advanced Studies Lucca;University of Siena", "aff_domain": "imtlucca.it;unisi.it", "position": "Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nbetti2024natureinspired,\ntitle={Nature-Inspired Local Propagation},\nauthor={Alessandro Betti and Marco Gori},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ds6xMV3yVV}\n}", "github": "", "reviewers": "FXeY;GgV1;FbKP", "pdf_size": 583215, "rating": "5;7;7", "confidence": "2;3;4", "soundness": "2;3;3", "novelty": "2;2;3", "presentation": "2;3;3", "wc_summary": "95;86;244", "wc_strengths": "45;36;305", "wc_weaknesses": "169;126;622", "wc_questions": "70;208;572", "wc_limitations": "6;49;1", "wc_review": "385;505;1744", "wc_reply_reviewers": "168;304;75", "wc_reply_authors": "22;421;14", "reply_reviewers": "1;2;1", "reply_authors": "2;3;2", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 141.66666666666666, 72.45381670799377 ], "wc_strengths_avg": [ 128.66666666666666, 124.7406197764875 ], "wc_weaknesses_avg": [ 305.6666666666667, 224.36923932561604 ], "wc_questions_avg": [ 283.3333333333333, 211.75037714776855 ], "wc_limitations_avg": [ 18.666666666666668, 21.545816814923082 ], "wc_review_avg": [ 878.0, 614.3109961574837 ], "wc_reply_reviewers_avg": [ 182.33333333333334, 94.03663588670582 ], "wc_reply_authors_avg": [ 152.33333333333334, 190.00409352315424 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6943456593833615907&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "imtlucca.it;unisi.it", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Institute for Advanced Studies;University of Siena", "aff_unique_dep": ";", "aff_unique_url": "https://www.iasl.it;https://www.unisi.it", "aff_unique_abbr": ";UniSi", "aff_campus_unique_index": "0", "aff_campus_unique": "Lucca;", "aff_country_unique_index": "0;0", "aff_country_unique": "Italy" }, { "title": "Assemblage: Automatic Binary Dataset Construction for Machine Learning", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97592", "id": "dsK5EmmomU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=dsK5EmmomU", "openreview": "https://openreview.net/forum?id=dsK5EmmomU", "poster": "/media/PosterPDFs/NeurIPS%202024/97592.png?t=1731746071.3494582", "project": "", "author_site": "Chang Liu, Rebecca Saul, Yihao Sun, Edward Raff, Maya Fuchs, Townsend Southard Pantano, James Holt, Kristopher Micinski", "tldr": "", "abstract": "Binary code is pervasive, and binary analysis is a key task in reverse engineering, malware classification, and vulnerability discovery. Unfortunately, while there exist large corpuses of malicious binaries, obtaining high-quality corpuses of benign binaries for modern systems has proven challenging (e.g., due to licensing issues). Consequently, machine learning based pipelines for binary analysis utilize either costly commercial corpuses (e.g., VirusTotal) or open-source binaries (e.g., coreutils) available in limited quantities. To address these issues, we present Assemblage: an extensible cloud-based distributed system that crawls, configures, and builds Windows PE binaries to obtain high-quality binary corpuses suitable for training state-of-the-art models in binary analysis. We have run Assemblage on AWS over the past year, producing 890k Windows PE and 428k Linux ELF binaries across 29 configurations. Assemblage is designed to be both reproducible and extensible, enabling users to publish \"recipes\" for their datasets, and facilitating the extraction of a wide array of features. We evaluated Assemblage by using its data to train modern learning-based pipelines for compiler provenance and binary function similarity. Our results illustrate the practical need for robust corpuses of high-quality Windows PE binaries in training modern learning-based binary analyses.", "keywords": "binary analysis;reverse engineering;benchmark;binary function similarity", "primary_area": "", "supplementary_material": "/attachment/047a527fbbe73eeb95d1abecab6cea72742845d7.zip", "author": "Chang Liu;Rebecca Saul;Yihao Sun;Edward Raff;Maya Fuchs;Townsend Southard Pantano;James Holt;Kristopher Micinski", "authorids": "~Chang_Liu33;~Rebecca_Saul1;~Yihao_Sun2;~Edward_Raff1;~Maya_Fuchs1;~Townsend_Southard_Pantano1;~James_Holt1;~Kristopher_Micinski1", "gender": "M;F;M;M;;M;M;M", "homepage": "https://changliu98.github.io/;;https://github.com/StarGazerM;http://www.edwardraff.com/;;;;", "dblp": ";;;204/3369;;;93/1248;", "google_scholar": "7BK4ze8AAAAJ;emPX1wsAAAAJ;;debM2bUAAAAJ;;;GtVgGjkAAAAJ;HpJLJWUAAAAJ", "orcid": ";;;0000-0002-9900-1972;;;0000-0002-6368-8696;", "linkedin": ";;;edward-raff-09992040/;maya-fuchs;tsouthardpantano;jeholt/;", "or_profile": "~Chang_Liu33;~Rebecca_Saul1;~Yihao_Sun2;~Edward_Raff1;~Maya_Fuchs1;~Townsend_Southard_Pantano1;~James_Holt1;~Kristopher_Micinski1", "aff": "Syracuse University;BOOZ ALLEN HAMILTON;Syracuse University;Booz Allen Hamilton;Booz Allen Hamilton ;, Syracuse University;Laboratory for Physical Sciences;Syracuse University", "aff_domain": "syr.edu;bah.com;syr.edu;boozallen.com;bah.com;eng-cs.syr.edu;umd.edu;syr.edu", "position": "PhD student;Researcher;PhD student;Principal Researcher;Data Scientist;Undergrad student;Principal Researcher;Assistant Professor", "bibtex": "@inproceedings{\nliu2024assemblage,\ntitle={Assemblage: Automatic Binary Dataset Construction for Machine Learning},\nauthor={Chang Liu and Rebecca Saul and Yihao Sun and Edward Raff and Maya Fuchs and Townsend Southard Pantano and James Holt and Kristopher Micinski},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=dsK5EmmomU}\n}", "github": "", "reviewers": "X8os;XvnZ;RpLW", "pdf_size": 1024949, "rating": "6;7;8", "confidence": "3;4;4", "wc_summary_and_contributions": "62;52;63", "wc_strengths": "29;149;315", "wc_improvement": "39;8;219", "wc_limitations": "1;24;281", "wc_correctness": "1;5;125", "wc_clarity": "1;8;60", "wc_relation_to_prior_work": "1;143;66", "wc_documentation": "1;13;63", "wc_additional_feedback": "1;1;1", "wc_review": "136;403;1193", "wc_reply_reviewers": "12;5;25", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 7.0, 0.816496580927726 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 59.0, 4.96655480858378 ], "wc_strengths_avg": [ 164.33333333333334, 117.26134155049662 ], "wc_improvement_avg": [ 88.66666666666667, 93.0244890099137 ], "wc_limitations_avg": [ 102.0, 126.91992226071787 ], "wc_correctness_avg": [ 43.666666666666664, 57.53453069437712 ], "wc_clarity_avg": [ 23.0, 26.318561257535844 ], "wc_relation_to_prior_work_avg": [ 70.0, 58.04021594262608 ], "wc_documentation_avg": [ 25.666666666666668, 26.849374087469688 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 577.3333333333334, 448.78081757381545 ], "wc_reply_reviewers_avg": [ 14.0, 8.286535263104035 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9936562053268654161&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "syr.edu;bah.com;syr.edu;boozallen.com;bah.com;eng-cs.syr.edu;umd.edu;syr.edu", "author_num": 8, "aff_unique_index": "0;1;0;1;1;0;2;0", "aff_unique_norm": "Syracuse University;Booz Allen Hamilton;Laboratory for Physical Sciences", "aff_unique_dep": ";;", "aff_unique_url": "https://www.syracuse.edu;https://www.boozallen.com;", "aff_unique_abbr": "Syracuse;BAH;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Not so griddy: Internal representations of RNNs path integrating more than one agent", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94311", "id": "dsMSWUBN8f", "proceeding": "", "pdf": "https://openreview.net/pdf?id=dsMSWUBN8f", "openreview": "https://openreview.net/forum?id=dsMSWUBN8f", "poster": "", "project": "", "author_site": "William Redman, Francisco Acosta, Santiago Acosta-Mendoza, Nina Miolane", "tldr": "", "abstract": "Success in collaborative and competitive environments, where agents must work with or against each other, requires individuals to encode the position and trajectory of themselves and others. Decades of neurophysiological experiments have shed light on how brain regions [e.g., medial entorhinal cortex (MEC), hippocampus] encode the self's position and trajectory. However, it has only recently been discovered that MEC and hippocampus are modulated by the positions and trajectories of others. To understand how encoding spatial information of multiple agents shapes neural representations, we train a recurrent neural network (RNN) model that captures properties of MEC to path integrate trajectories of two agents simultaneously navigating the same environment. We find significant differences between these RNNs and those trained to path integrate only a single agent. At the individual unit level, RNNs trained to path integrate more than one agent develop weaker grid responses, stronger border responses, and tuning for the relative position of the two agents. At the population level, they develop more distributed and robust representations, with changes in network dynamics and manifold topology. Our results provide testable predictions and open new directions with which to study the neural computations supporting spatial navigation.", "keywords": "grid cells;path integration;recurrent neural networks;normative models", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "", "author": "William T Redman;Francisco Acosta;Santiago Acosta-Mendoza;Nina Miolane", "authorids": "~William_T_Redman1;~Francisco_Acosta1;~Santiago_Acosta-Mendoza1;~Nina_Miolane2", "gender": "M;M;;", "homepage": "https://wredman4.wixsite.com/wtredman;https://web.physics.ucsb.edu/~facosta/;;https://www.ece.ucsb.edu/people/faculty/nina-miolane", "dblp": "266/7985;;;", "google_scholar": "-SOfw0AAAAAJ;;2QYLvLcAAAAJ;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~William_T_Redman1;~Francisco_Acosta1;~Santiago_Acosta-Mendoza1;~Nina_Miolane2", "aff": "AIMdyn Inc.;University of California, Santa Barbara;University of California, Santa Barbara;University of California, Santa Barbara", "aff_domain": "aimdyn.com;ucsb.edu;ucsb.edu;ucsb.edu", "position": "Researcher;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nredman2024not,\ntitle={Not so griddy: Internal representations of {RNN}s path integrating more than one agent},\nauthor={William T Redman and Francisco Acosta and Santiago Acosta-Mendoza and Nina Miolane},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=dsMSWUBN8f}\n}", "github": "", "reviewers": "bmf9;bz9a;rAUq", "pdf_size": 29270722, "rating": "5;6;7", "confidence": "3;3;4", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "3;3;4", "wc_summary": "36;127;172", "wc_strengths": "21;41;107", "wc_weaknesses": "269;51;387", "wc_questions": "39;57;4", "wc_limitations": "5;6;27", "wc_review": "370;282;697", "wc_reply_reviewers": "530;6;84", "wc_reply_authors": "494;0;16", "reply_reviewers": "2;1;1", "reply_authors": "3;1;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 111.66666666666667, 56.57050664632784 ], "wc_strengths_avg": [ 56.333333333333336, 36.745370078721784 ], "wc_weaknesses_avg": [ 235.66666666666666, 139.18173571110384 ], "wc_questions_avg": [ 33.333333333333336, 22.00504992546534 ], "wc_limitations_avg": [ 12.666666666666666, 10.143416036468626 ], "wc_review_avg": [ 449.6666666666667, 178.54286755722154 ], "wc_reply_reviewers_avg": [ 206.66666666666666, 230.83808659366034 ], "wc_reply_authors_avg": [ 170.0, 229.19569513118404 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1778103598308227722&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 7, "email": "aimdyn.com;ucsb.edu;ucsb.edu;ucsb.edu", "author_num": 4, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "AIMdyn Inc.;University of California, Santa Barbara", "aff_unique_dep": ";", "aff_unique_url": ";https://www.ucsb.edu", "aff_unique_abbr": ";UCSB", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Santa Barbara", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Generalization Analysis for Label-Specific Representation Learning", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94310", "id": "dtPIUXdJHY", "proceeding": "", "pdf": "https://openreview.net/pdf?id=dtPIUXdJHY", "openreview": "https://openreview.net/forum?id=dtPIUXdJHY", "poster": "/media/PosterPDFs/NeurIPS%202024/94310.png?t=1733872436.1164842", "project": "", "author_site": "Yi-Fan Zhang, Min-Ling Zhang", "tldr": "", "abstract": "Label-specific representation learning (LSRL), i.e., constructing the representation with specific discriminative properties for each class label, is an effective strategy to improve the performance of multi-label learning. However, the generalization analysis of LSRL is still in its infancy. The existing theory bounds for multi-label learning, which preserve the coupling among different components, are invalid for LSRL. In an attempt to overcome this challenge and make up for the gap in the generalization theory of LSRL, we develop a novel vector-contraction inequality and derive the generalization bound for general function class of LSRL with a weaker dependency on the number of labels than the state of the art. In addition, we derive generalization bounds for typical LSRL methods, and these theoretical results reveal the impact of different label-specific representations on generalization analysis. The mild bounds without strong assumptions explain the good generalization ability of LSRL.", "keywords": "Learning Theory;Multi-Label Learning", "primary_area": "learning_theory", "supplementary_material": "", "author": "Yifan Zhang;Min-Ling Zhang", "authorids": "~Yifan_Zhang13;~Min-Ling_Zhang2", "gender": "M;M", "homepage": "http://palm.seu.edu.cn/homepage/zhangyifan/demo/demo/index.html;http://palm.seu.edu.cn/zhangml/", "dblp": "https://dblp.org/rec/conf/ictai/ZhangL20;84/271.html", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;uFHCIM0AAAAJ", "orcid": ";0000-0003-1880-5918", "linkedin": ";", "or_profile": "~Yifan_Zhang13;~Min-Ling_Zhang2", "aff": "Southeast University;Southeast University", "aff_domain": "seu.edu.cn;seu.edu.cn", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nzhang2024generalization,\ntitle={Generalization Analysis for Label-Specific Representation Learning},\nauthor={Yifan Zhang and Min-Ling Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=dtPIUXdJHY}\n}", "github": "", "reviewers": "Axy4;iWbs;xFN1;BRgR", "pdf_size": 404072, "rating": "5;7;7;8", "confidence": "4;5;4;4", "soundness": "2;4;3;3", "novelty": "2;4;3;4", "presentation": "3;3;3;3", "wc_summary": "105;73;77;69", "wc_strengths": "43;103;215;82", "wc_weaknesses": "184;85;100;38", "wc_questions": "103;54;111;34", "wc_limitations": "61;1;3;1", "wc_review": "496;316;506;224", "wc_reply_reviewers": "0;0;18;0", "wc_reply_authors": "53;0;6;0", "reply_reviewers": "0;0;1;0", "reply_authors": "2;1;2;1", "rating_avg": [ 6.75, 1.0897247358851685 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 81.0, 14.142135623730951 ], "wc_strengths_avg": [ 110.75, 63.92329387633275 ], "wc_weaknesses_avg": [ 101.75, 52.7085144924423 ], "wc_questions_avg": [ 75.5, 32.407560846197605 ], "wc_limitations_avg": [ 16.5, 25.705057868053906 ], "wc_review_avg": [ 385.5, 120.04478331022969 ], "wc_reply_reviewers_avg": [ 4.5, 7.794228634059948 ], "wc_reply_authors_avg": [ 14.75, 22.219079638904937 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.13245323570650439, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:FpfnblaGaE0J:scholar.google.com/&scioq=Generalization+Analysis+for+Label-Specific+Representation+Learning&hl=en&as_sdt=0,14", "gs_version_total": 2, "email": "seu.edu.cn;seu.edu.cn", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Southeast University", "aff_unique_dep": "", "aff_unique_url": "https://www.seu.edu.cn/", "aff_unique_abbr": "SEU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "What matters when building vision-language models?", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94309", "id": "dtvJF1Vy2i", "proceeding": "", "pdf": "https://openreview.net/pdf?id=dtvJF1Vy2i", "openreview": "https://openreview.net/forum?id=dtvJF1Vy2i", "poster": "", "project": "", "author_site": "Hugo Lauren\u00e7on, Leo Tronchon, Matthieu Cord, Victor Sanh", "tldr": "", "abstract": "The growing interest in vision-language models (VLMs) has been driven by improvements in large language models and vision transformers. Despite the abundance of literature on this subject, we observe that critical decisions regarding the design of VLMs are often not justified. We argue that these unsupported decisions impede progress in the field by making it difficult to identify which choices improve model performance. To address this issue, we conduct extensive experiments around pre-trained models, architecture choice, data, and training methods. Our consolidation of findings includes the development of Idefics2, an efficient foundational VLM of 8 billion parameters. Idefics2 achieves state-of-the-art performance within its size category across various multimodal benchmarks, and is often on par with models four times its size. We release the model (base, instructed, and chat) along with the datasets created for its training.", "keywords": "vision-language model;multimodal", "primary_area": "machine_vision", "supplementary_material": "", "author": "Hugo Lauren\u00e7on;Leo Tronchon;Matthieu Cord;Victor Sanh", "authorids": "~Hugo_Lauren\u00e7on1;~Leo_Tronchon1;~Matthieu_Cord1;~Victor_Sanh1", "gender": "M;M;M;", "homepage": ";;https://cord.isir.upmc.fr/;", "dblp": ";;68/3117;230/4101", "google_scholar": ";;SpAotDcAAAAJ;6STg_7IAAAAJ", "orcid": ";;;", "linkedin": "hugo-lauren%C3%A7on-304891145/;l%C3%A9o-tronchon-6b2548130;;victor-sanh/", "or_profile": "~Hugo_Lauren\u00e7on1;~Leo_Tronchon1;~Matthieu_Cord1;~Victor_Sanh1", "aff": ";;Sorbonne Universit\u00e9;Hugging Face", "aff_domain": ";;isir.upmc.fr;huggingface.co", "position": ";;Full Professor;Researcher", "bibtex": "@inproceedings{\nlauren{\\c{c}}on2024what,\ntitle={What matters when building vision-language models?},\nauthor={Hugo Lauren{\\c{c}}on and Leo Tronchon and Matthieu Cord and Victor Sanh},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=dtvJF1Vy2i}\n}", "github": "", "reviewers": "FVDr;Gtma;Ww8w;vo36", "pdf_size": 1367578, "rating": "6;6;6;7", "confidence": "5;3;4;4", "soundness": "4;2;3;3", "novelty": "4;3;3;3", "presentation": "4;3;2;4", "wc_summary": "55;68;81;52", "wc_strengths": "49;69;92;140", "wc_weaknesses": "41;164;175;139", "wc_questions": "3;2;68;262", "wc_limitations": "1;4;82;81", "wc_review": "149;307;498;674", "wc_reply_reviewers": "11;63;69;59", "wc_reply_authors": "0;33;60;0", "reply_reviewers": "1;1;2;1", "reply_authors": "1;2;3;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 64.0, 11.510864433221338 ], "wc_strengths_avg": [ 87.5, 33.915335764223244 ], "wc_weaknesses_avg": [ 129.75, 52.87426122415329 ], "wc_questions_avg": [ 83.75, 106.33055769627093 ], "wc_limitations_avg": [ 42.0, 39.515819616958474 ], "wc_review_avg": [ 407.0, 197.5689752972364 ], "wc_reply_reviewers_avg": [ 50.5, 23.08137777516758 ], "wc_reply_authors_avg": [ 23.25, 25.13339412017406 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 213, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11051465670895345604&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": ";;isir.upmc.fr;huggingface.co", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Sorbonne Universit\u00e9;Hugging Face", "aff_unique_dep": ";", "aff_unique_url": "https://www.sorbonne-universite.fr;https://huggingface.co", "aff_unique_abbr": "Sorbonne U;Hugging Face", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "France;United States" }, { "title": "Free Lunch in Pathology Foundation Model: Task-specific Model Adaptation with Concept-Guided Feature Enhancement", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94308", "id": "dwYekpbmYG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=dwYekpbmYG", "openreview": "https://openreview.net/forum?id=dwYekpbmYG", "poster": "/media/PosterPDFs/NeurIPS%202024/94308.png?t=1731294591.8483884", "project": "", "author_site": "Yanyan Huang, Weiqin Zhao, Yihang Chen, Yu Fu, Lequan Yu", "tldr": "", "abstract": "Whole slide image (WSI) analysis is gaining prominence within the medical imaging field. Recent advances in pathology foundation models have shown the potential to extract powerful feature representations from WSIs for downstream tasks. However, these foundation models are usually designed for general-purpose pathology image analysis and may not be optimal for specific downstream tasks or cancer types. In this work, we present Concept Anchor-guided Task-specific Feature Enhancement (CATE), an adaptable paradigm that can boost the expressivity and discriminativeness of pathology foundation models for specific downstream tasks. Based on a set of task-specific concepts derived from the pathology vision-language model with expert-designed prompts, we introduce two interconnected modules to dynamically calibrate the generic image features extracted by foundation models for certain tasks or cancer types. Specifically, we design a Concept-guided Information Bottleneck module to enhance task-relevant characteristics by maximizing the mutual information between image features and concept anchors while suppressing superfluous information. Moreover, a Concept-Feature Interference module is proposed to utilize the similarity between calibrated features and concept anchors to further generate discriminative task-specific features. The extensive experiments on public WSI datasets demonstrate that CATE significantly enhances the performance and generalizability of MIL models. Additionally, heatmap and umap visualization results also reveal the effectiveness and interpretability of CATE.", "keywords": "Computational Pathology;Whole Slide Image;Vision Language Model;Mutiple Instance Learning", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "", "author": "Yanyan Huang;Weiqin Zhao;Yihang Chen;Yu Fu;Lequan Yu", "authorids": "~Yanyan_Huang1;~Weiqin_Zhao1;~Yihang_Chen4;~Yu_Fu6;~Lequan_Yu1", "gender": "M;M;M;M;M", "homepage": "http://yanyanh.com/;https://hku-medai.github.io/index.html;https://www.researchgate.net/profile/Yu-Fu-63;https://yulequan.github.io/;https://github.com/MAXzwq1998", "dblp": ";;;165/8092;340/8276", "google_scholar": "6mQ6b1EAAAAJ;https://scholar.google.com/citations?hl=zh-CN;PfmSvNMAAAAJ;https://scholar.google.com.hk/citations?user=llXf3wUAAAAJ;", "orcid": ";;0000-0002-9795-7807;0000-0002-9315-6527;", "linkedin": ";;;;", "or_profile": "~Yanyan_Huang1;~Yihang_Chen4;~Yu_Fu6;~Lequan_Yu1;~Zhao_WeiQin1", "aff": "University of Hong Kong;University of Hong Kong;Zhejiang University;The University of Hong Kong;University of Hong Kong", "aff_domain": "hku.hk;hku.hk;zju.edu.cn;hku.hk;hku.hk", "position": "PhD student;PhD student;PhD student;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nhuang2024free,\ntitle={Free Lunch in Pathology Foundation Model: Task-specific Model Adaptation with Concept-Guided Feature Enhancement},\nauthor={Yanyan Huang and Weiqin Zhao and Yihang Chen and Yu Fu and Lequan Yu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=dwYekpbmYG}\n}", "github": "", "reviewers": "VcAb;ZzEi;9fs8;R2V2", "pdf_size": 19106841, "rating": "5;5;6;8", "confidence": "5;4;4;4", "soundness": "3;2;3;4", "novelty": "2;3;3;4", "presentation": "4;3;3;4", "wc_summary": "75;74;257;110", "wc_strengths": "155;67;110;111", "wc_weaknesses": "483;195;138;36", "wc_questions": "59;93;161;134", "wc_limitations": "70;1;13;22", "wc_review": "842;430;679;413", "wc_reply_reviewers": "440;161;109;43", "wc_reply_authors": "846;701;187;0", "reply_reviewers": "3;1;1;1", "reply_authors": "4;2;2;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 129.0, 75.30936196781911 ], "wc_strengths_avg": [ 110.75, 31.115711465431737 ], "wc_weaknesses_avg": [ 213.0, 165.96535783108473 ], "wc_questions_avg": [ 111.75, 38.90613704802881 ], "wc_limitations_avg": [ 26.5, 26.196373794859472 ], "wc_review_avg": [ 591.0, 179.12984117672858 ], "wc_reply_reviewers_avg": [ 188.25, 151.2438015258807 ], "wc_reply_authors_avg": [ 433.5, 350.1417570070728 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.4714045207910316, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8424181899979744618&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "hku.hk;hku.hk;zju.edu.cn;hku.hk;hku.hk", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "University of Hong Kong;Zhejiang University", "aff_unique_dep": ";", "aff_unique_url": "https://www.hku.hk;https://www.zju.edu.cn", "aff_unique_abbr": "HKU;ZJU", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Divide-and-Conquer Predictive Coding: a structured Bayesian inference algorithm", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94307", "id": "dxwIaCVkWU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=dxwIaCVkWU", "openreview": "https://openreview.net/forum?id=dxwIaCVkWU", "poster": "/media/PosterPDFs/NeurIPS%202024/94307.png?t=1733351349.9451714", "project": "", "author_site": "Eli Sennesh, Hao Wu, Tommaso Salvatori", "tldr": "", "abstract": "Unexpected stimuli induce \"error\" or \"surprise\" signals in the brain. The theory of predictive coding promises to explain these observations in terms of Bayesian inference by suggesting that the cortex implements variational inference in a probabilistic graphical model. However, when applied to machine learning tasks, this family of algorithms has yet to perform on par with other variational approaches in high-dimensional, structured inference problems. To address this, we introduce a novel predictive coding algorithm for structured generative models, that we call divide-and-conquer predictive coding (DCPC); it differs from other formulations of predictive coding, as it respects the correlation structure of the generative model and provably performs maximum-likelihood updates of model parameters, all without sacrificing biological plausibility. Empirically, DCPC achieves better numerical performance than competing algorithms and provides accurate inference in a number of problems not previously addressed with predictive coding. We provide an open implementation of DCPC in Pyro on Github.", "keywords": "bioplausible;predictive coding;neuroscience;variational Bayes;VAE;deep generative model;probabilistic graphical models", "primary_area": "probabilistic_methods", "supplementary_material": "/attachment/ac747ae99c8a54565ba51872986b8fa6dbda3cf9.zip", "author": "Eli Zachary Sennesh;Hao Wu;Tommaso Salvatori", "authorids": "~Eli_Zachary_Sennesh1;~Hao_Wu3;~Tommaso_Salvatori1", "gender": "M;M;M", "homepage": "https://esennesh.github.io/;https://hao-w.github.io/;https://www.cs.ox.ac.uk/people/tommaso.salvatori/", "dblp": "180/8307;;270/2016", "google_scholar": "https://scholar.google.com/citations?hl=en;fcyOKfQAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;", "linkedin": ";;", "or_profile": "~Eli_Zachary_Sennesh1;~Hao_Wu3;~Tommaso_Salvatori1", "aff": "Vanderbilt University;VERSES;VERSES", "aff_domain": "vanderbilt.edu;verses.ai;verses.ai", "position": "Postdoc;Researcher;Researcher", "bibtex": "@inproceedings{\nsennesh2024divideandconquer,\ntitle={Divide-and-Conquer Predictive Coding: a structured Bayesian inference algorithm},\nauthor={Eli Zachary Sennesh and Hao Wu and Tommaso Salvatori},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=dxwIaCVkWU}\n}", "github": "", "reviewers": "DHBv;Cx3n;AbQA;jNdT", "pdf_size": 1450210, "rating": "6;7;7;7", "confidence": "3;2;4;3", "soundness": "3;3;3;4", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "362;60;152;62", "wc_strengths": "30;12;25;274", "wc_weaknesses": "200;86;812;4", "wc_questions": "254;28;66;78", "wc_limitations": "1;1;52;15", "wc_review": "847;187;1107;433", "wc_reply_reviewers": "237;69;60;0", "wc_reply_authors": "119;69;27;0", "reply_reviewers": "2;1;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 159.0, 122.95120983544652 ], "wc_strengths_avg": [ 85.25, 109.17274156125237 ], "wc_weaknesses_avg": [ 275.5, 317.4724397487127 ], "wc_questions_avg": [ 106.5, 87.13638734765173 ], "wc_limitations_avg": [ 17.25, 20.8611480987984 ], "wc_review_avg": [ 643.5, 356.7026072234404 ], "wc_reply_reviewers_avg": [ 91.5, 88.09228116015613 ], "wc_reply_authors_avg": [ 53.75, 44.985414302860434 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9722256782297622797&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "vanderbilt.edu;verses.ai;verses.ai", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "Vanderbilt University;", "aff_unique_dep": ";", "aff_unique_url": "https://www.vanderbilt.edu;", "aff_unique_abbr": "Vanderbilt;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0", "aff_country_unique": "United States;" }, { "title": "Fair Secretaries with Unfair Predictions", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94306", "id": "dxxj4S06YL", "proceeding": "", "pdf": "https://openreview.net/pdf?id=dxxj4S06YL", "openreview": "https://openreview.net/forum?id=dxxj4S06YL", "poster": "", "project": "", "author_site": "Eric Balkanski, Will Ma, Andreas Maggiori", "tldr": "", "abstract": "Algorithms with predictions is a recent framework for decision-making under uncertainty that leverages the power of machine-learned predictions without making any assumption about their quality. The goal in this framework is for algorithms to achieve an improved performance when the predictions are accurate while maintaining acceptable guarantees when the predictions are erroneous. A serious concern with algorithms that use predictions is that these predictions can be biased and, as a result, cause the algorithm to make decisions that are deemed unfair. We show that this concern manifests itself in the classical secretary problem in the learning-augmented setting---the state-of-the-art algorithm can have zero probability of accepting the best candidate, which we deem unfair, despite promising to accept a candidate whose expected value is at least $\\max\\{\\Omega (1) , 1 - O(\\varepsilon)\\}$ times the optimal value, where $\\varepsilon$ is the prediction error.\nWe show how to preserve this promise while also guaranteeing to accept the best candidate with probability $\\Omega(1)$. Our algorithm and analysis are based on a new ``pegging'' idea that diverges from existing works and simplifies/unifies some of their results. Finally, we extend to the $k$-secretary problem and complement our theoretical analysis with experiments.", "keywords": "Secretary problem;fairness;algorithms with predictions;online algorithms", "primary_area": "optimization", "supplementary_material": "/attachment/018c708551ac0361f2782b08e8a24e63eae3a01b.zip", "author": "Eric Balkanski;Will Ma;Andreas Maggiori", "authorids": "~Eric_Balkanski2;~Will_Ma1;~Andreas_Maggiori1", "gender": ";;M", "homepage": "http://ericbalkanski.com;http://www.columbia.edu/~wm2428/;", "dblp": ";86/8650.html;239/5932", "google_scholar": ";;2QzQRW4AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Eric_Balkanski2;~Will_Ma1;~Andreas_Maggiori1", "aff": "Columbia University;Columbia University;Columbia University", "aff_domain": "columbia.edu;columbia.edu;columbia.edu", "position": "Assistant Professor;Assistant Professor;Postdoc", "bibtex": "@inproceedings{\nbalkanski2024fair,\ntitle={Fair Secretaries with Unfair Predictions},\nauthor={Eric Balkanski and Will Ma and Andreas Maggiori},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=dxxj4S06YL}\n}", "github": "", "reviewers": "uZ3G;3Mim;Le4z;3Rre", "pdf_size": 1477603, "rating": "5;6;7;8", "confidence": "3;3;4;4", "soundness": "3;4;3;3", "novelty": "3;4;3;4", "presentation": "3;2;4;3", "wc_summary": "168;112;140;220", "wc_strengths": "144;57;117;46", "wc_weaknesses": "238;37;183;40", "wc_questions": "69;231;89;18", "wc_limitations": "15;5;1;11", "wc_review": "634;442;530;335", "wc_reply_reviewers": "130;10;24;21", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 160.0, 39.89987468652001 ], "wc_strengths_avg": [ 91.0, 40.82278775390039 ], "wc_weaknesses_avg": [ 124.5, 88.17737805128932 ], "wc_questions_avg": [ 101.75, 78.98536256801003 ], "wc_limitations_avg": [ 8.0, 5.385164807134504 ], "wc_review_avg": [ 485.25, 110.19840062360252 ], "wc_reply_reviewers_avg": [ 46.25, 48.633193397102765 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8944271909999159, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:4NT4J-pEB2UJ:scholar.google.com/&scioq=Fair+Secretaries+with+Unfair+Predictions&hl=en&as_sdt=0,5", "gs_version_total": 5, "email": "columbia.edu;columbia.edu;columbia.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Columbia University", "aff_unique_dep": "", "aff_unique_url": "https://www.columbia.edu", "aff_unique_abbr": "Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Introducing Spectral Attention for Long-Range Dependency in Time Series Forecasting", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94305", "id": "dxyNVEBQMp", "proceeding": "", "pdf": "https://openreview.net/pdf?id=dxyNVEBQMp", "openreview": "https://openreview.net/forum?id=dxyNVEBQMp", "poster": "/media/PosterPDFs/NeurIPS%202024/94305.png?t=1730702267.7954977", "project": "", "author_site": "Bong Gyun Kang, Dongjun Lee, HyunGi Kim, Dohyun Chung, Sungroh Yoon", "tldr": "", "abstract": "Sequence modeling faces challenges in capturing long-range dependencies across diverse tasks. Recent linear and transformer-based forecasters have shown superior performance in time series forecasting. However, they are constrained by their inherent inability to effectively address long-range dependencies in time series data, primarily due to using fixed-size inputs for prediction. Furthermore, they typically sacrifice essential temporal correlation among consecutive training samples by shuffling them into mini-batches. To overcome these limitations, we introduce a fast and effective Spectral Attention mechanism, which preserves temporal correlations among samples and facilitates the handling of long-range information while maintaining the base model structure. Spectral Attention preserves long-period trends through a low-pass filter and facilitates gradient to flow between samples. Spectral Attention can be seamlessly integrated into most sequence models, allowing models with fixed-sized look-back windows to capture long-range dependencies over thousands of steps. Through extensive experiments on 11 real-world time series datasets using 7 recent forecasting models, we consistently demonstrate the efficacy of our Spectral Attention mechanism, achieving state-of-the-art results.", "keywords": "Time series forecasting;Long-range dependency;Low-pass filter;Spectral attention;Long term trend", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Bong Gyun Kang;Dongjun Lee;HyunGi Kim;Dohyun Chung;Sungroh Yoon", "authorids": "~Bong_Gyun_Kang1;~Dongjun_Lee5;~HyunGi_Kim1;~Dohyun_Chung1;~Sungroh_Yoon1", "gender": "M;M;M;M;", "homepage": "https://github.com/Pusheen-cat;;;;http://ailab.snu.ac.kr", "dblp": "369/7132;;369/7158;339/3331;99/1474", "google_scholar": "iuMRdnIAAAAJ;IeWctbbOsCsC;https://scholar.google.com/citations?hl=ko;;Bphl_fIAAAAJ", "orcid": "0000-0002-2287-4564;;;;0000-0002-2367-197X", "linkedin": ";;;dohyun-chung-4989a5228/;", "or_profile": "~Bong_Gyun_Kang1;~Dongjun_Lee5;~HyunGi_Kim1;~Dohyun_Chung1;~Sungroh_Yoon1", "aff": "Seoul National University;Seoul National University;Seoul National University;Seoul National University;Seoul National University", "aff_domain": "snu.ac.kr;snu.ac.kr;snu.ac.kr;snu.ac.kr;snu.ac.kr", "position": "PhD student;MS student;PhD student;MS student;Full Professor", "bibtex": "@inproceedings{\nkang2024introducing,\ntitle={Introducing Spectral Attention for Long-Range Dependency in Time Series Forecasting},\nauthor={Bong Gyun Kang and Dongjun Lee and HyunGi Kim and Dohyun Chung and Sungroh Yoon},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=dxyNVEBQMp}\n}", "github": "", "reviewers": "weYb;pdEA;A63Y", "pdf_size": 8643237, "rating": "5;5;6", "confidence": "4;4;4", "soundness": "2;2;2", "novelty": "3;2;3", "presentation": "3;3;3", "wc_summary": "44;56;106", "wc_strengths": "27;41;34", "wc_weaknesses": "134;353;33", "wc_questions": "2;90;136", "wc_limitations": "16;1;1", "wc_review": "223;541;310", "wc_reply_reviewers": "17;21;15", "wc_reply_authors": "153;172;216", "reply_reviewers": "2;1;1", "reply_authors": "3;3;3", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 68.66666666666667, 26.849374087469688 ], "wc_strengths_avg": [ 34.0, 5.715476066494082 ], "wc_weaknesses_avg": [ 173.33333333333334, 133.56729473273845 ], "wc_questions_avg": [ 76.0, 55.59376463837169 ], "wc_limitations_avg": [ 6.0, 7.0710678118654755 ], "wc_review_avg": [ 358.0, 134.1864374666829 ], "wc_reply_reviewers_avg": [ 17.666666666666668, 2.494438257849294 ], "wc_reply_authors_avg": [ 180.33333333333334, 26.386023236217735 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 3.0, 0.0 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7361553380778611373&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "snu.ac.kr;snu.ac.kr;snu.ac.kr;snu.ac.kr;snu.ac.kr", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Seoul National University", "aff_unique_dep": "", "aff_unique_url": "https://www.snu.ac.kr", "aff_unique_abbr": "SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Multi-Head Mixture-of-Experts", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94304", "id": "dyZ8GJZjtX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=dyZ8GJZjtX", "openreview": "https://openreview.net/forum?id=dyZ8GJZjtX", "poster": "", "project": "", "author_site": "Xun Wu, Shaohan Huang, Wenhui Wang, Shuming Ma, Li Dong, Furu Wei", "tldr": "", "abstract": "Sparse Mixtures of Experts (SMoE) scales model capacity without significant increases in computational costs. However, it exhibits the low expert activation issue, i.e., only a small subset of experts are activated for optimization, leading to suboptimal performance and limiting its effectiveness in learning a larger number of experts in complex tasks. In this paper, we propose Multi-Head Mixture-of-Experts (MH-MoE). MH-MoE split each input token into multiple sub-tokens, then these sub-tokens are assigned to and processed by a diverse set of experts in parallel, and seamlessly reintegrated into the original token form. The above operations enables MH-MoE to significantly enhance expert activation while collectively attend to information from various representation spaces within different experts to deepen context understanding. Besides, it's worth noting that our MH-MoE is straightforward to implement and decouples from other SMoE frameworks, making it easy to integrate with these frameworks for enhanced performance. Extensive experimental results across different parameter scales (300M to 7B) and three pre-training tasks\u2014English-focused language modeling, multi-lingual language modeling and masked multi-modality modeling\u2014along with multiple downstream validation tasks, demonstrate the effectiveness of MH-MoE.", "keywords": "Mixture-of-Experts; Multi-head Mechanism", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Xun Wu;Shaohan Huang;Wenhui Wang;Shuming Ma;Li Dong;Furu Wei", "authorids": "~Xun_Wu1;~Shaohan_Huang1;~Wenhui_Wang1;~Shuming_Ma1;~Li_Dong1;~Furu_Wei1", "gender": "M;M;M;;M;M", "homepage": "https://github.com/Yu-shui;;;https://www.microsoft.com/en-us/research/people/shumma/;http://dong.li;https://www.microsoft.com/en-us/research/people/fuwei/", "dblp": ";176/0380;37/2855;;85/5090-4;72/5870", "google_scholar": ";;BxmpMVUAAAAJ;;wEfQgPgAAAAJ;G-V1VpwAAAAJ", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Xun_Wu1;~Shaohan_Huang1;~Wenhui_Wang1;~Shuming_Ma1;~Li_Dong1;~Furu_Wei1", "aff": "Tsinghua University;Microsoft;Microsoft;Microsoft;Microsoft Research;Microsoft Research", "aff_domain": "tsinghua.edu.cn;microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com", "position": "MS student;Researcher;Researcher;Researcher;Principal Researcher;Distinguished Scientist", "bibtex": "@inproceedings{\nwu2024multihead,\ntitle={Multi-Head Mixture-of-Experts},\nauthor={Xun Wu and Shaohan Huang and Wenhui Wang and Shuming Ma and Li Dong and Furu Wei},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=dyZ8GJZjtX}\n}", "github": "", "reviewers": "Y3PR;Y9UT;84mb;Rrmq", "pdf_size": 8682779, "rating": "4;4;5;7", "confidence": "5;4;5;4", "soundness": "2;2;2;3", "novelty": "3;2;3;2", "presentation": "3;3;4;2", "wc_summary": "40;59;67;81", "wc_strengths": "42;59;52;44", "wc_weaknesses": "82;184;114;328", "wc_questions": "2;6;31;64", "wc_limitations": "1;16;1;53", "wc_review": "167;324;265;570", "wc_reply_reviewers": "83;224;14;339", "wc_reply_authors": "268;445;63;782", "reply_reviewers": "1;1;1;2", "reply_authors": "2;2;2;3", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 61.75, 14.821858857781638 ], "wc_strengths_avg": [ 49.25, 6.7592529172978875 ], "wc_weaknesses_avg": [ 177.0, 94.66255859631093 ], "wc_questions_avg": [ 25.75, 24.722206616724165 ], "wc_limitations_avg": [ 17.75, 21.25294097295713 ], "wc_review_avg": [ 331.5, 148.6783440854787 ], "wc_reply_reviewers_avg": [ 165.0, 125.77956908814723 ], "wc_reply_authors_avg": [ 389.5, 263.86596976495474 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.40824829046386296, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "email": "tsinghua.edu.cn;microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com", "author_num": 6, "aff_unique_index": "0;1;1;1;1;1", "aff_unique_norm": "Tsinghua University;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.microsoft.com", "aff_unique_abbr": "THU;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1;1", "aff_country_unique": "China;United States" }, { "title": "Robust Graph Neural Networks via Unbiased Aggregation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94303", "id": "dz6ex9Ee0Q", "proceeding": "", "pdf": "https://openreview.net/pdf?id=dz6ex9Ee0Q", "openreview": "https://openreview.net/forum?id=dz6ex9Ee0Q", "poster": "/media/PosterPDFs/NeurIPS%202024/94303.png?t=1731438708.9885194", "project": "", "author_site": "Zhichao Hou, Ruiqi Feng, Tyler Derr, Xiaorui Liu", "tldr": "", "abstract": "The adversarial robustness of Graph Neural Networks (GNNs) has been questioned due to the false sense of security uncovered by strong adaptive attacks despite the existence of numerous defenses.\nIn this work, we delve into the robustness analysis of representative robust GNNs and provide a unified robust estimation point of view to\nunderstand their robustness and limitations.\nOur novel analysis of estimation bias motivates the design of a \nrobust and unbiased graph signal estimator. \nWe then develop an efficient Quasi-Newton Iterative Reweighted Least Squares algorithm to solve the estimation problem, which is unfolded as robust unbiased aggregation layers in GNNs with theoretical guarantees.\nOur comprehensive experiments confirm the strong robustness of our proposed model under various scenarios, and the ablation study provides a deep understanding of its advantages.", "keywords": "Adversarial Robustness;Graph Neural Networks;Robust Estimation", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Zhichao Hou;Ruiqi Feng;Tyler Derr;Xiaorui Liu", "authorids": "~Zhichao_Hou1;~Ruiqi_Feng1;~Tyler_Derr1;~Xiaorui_Liu1", "gender": "M;;;M", "homepage": "https://chris-hzc.github.io/;https://weenming.github.io/;http://www.tylerderr.com;https://sites.google.com/ncsu.edu/xiaorui/", "dblp": "188/4064;301/3170;207/7927.html;172/0995", "google_scholar": "rraC4ZMAAAAJ;7yE7WQQAAAAJ;et6IhFcAAAAJ;NhvN1KoAAAAJ", "orcid": "0000-0002-3989-2654;0009-0001-8813-5211;;0000-0001-8217-5688", "linkedin": "zhichao-hou-b022931a4/;;tylersnetwork/;", "or_profile": "~Zhichao_Hou1;~Ruiqi_Feng1;~Tyler_Derr1;~Xiaorui_Liu1", "aff": "Amazon;Fudan University;Vanderbilt University;North Carolina State University", "aff_domain": "amazon.com;fudan.edu.cn;vanderbilt.edu;ncsu.edu", "position": "Intern;Undergrad student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nhou2024robust,\ntitle={Robust Graph Neural Networks via Unbiased Aggregation},\nauthor={Zhichao Hou and Ruiqi Feng and Tyler Derr and Xiaorui Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=dz6ex9Ee0Q}\n}", "github": "", "reviewers": "Vusx;E1WB;7kcB", "pdf_size": 3652215, "rating": "5;7;7", "confidence": "4;3;4", "soundness": "3;3;4", "novelty": "3;3;4", "presentation": "2;3;3", "wc_summary": "100;65;101", "wc_strengths": "46;74;34", "wc_weaknesses": "231;65;97", "wc_questions": "15;3;8", "wc_limitations": "5;5;5", "wc_review": "397;212;245", "wc_reply_reviewers": "471;20;27", "wc_reply_authors": "1031;0;0", "reply_reviewers": "3;1;1", "reply_authors": "3;1;1", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 88.66666666666667, 16.73983937265296 ], "wc_strengths_avg": [ 51.333333333333336, 16.759740119968715 ], "wc_weaknesses_avg": [ 131.0, 71.90734779330042 ], "wc_questions_avg": [ 8.666666666666666, 4.921607686744467 ], "wc_limitations_avg": [ 5.0, 0.0 ], "wc_review_avg": [ 284.6666666666667, 80.56605295587521 ], "wc_reply_reviewers_avg": [ 172.66666666666666, 210.9728787203596 ], "wc_reply_authors_avg": [ 343.6666666666667, 486.0180609355536 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.9428090415820634 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.49999999999999983, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11479758961783039255&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "amazon.com;fudan.edu.cn;vanderbilt.edu;ncsu.edu", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Amazon;Fudan University;Vanderbilt University;North Carolina State University", "aff_unique_dep": "Amazon.com, Inc.;;;", "aff_unique_url": "https://www.amazon.com;https://www.fudan.edu.cn;https://www.vanderbilt.edu;https://www.ncsu.edu", "aff_unique_abbr": "Amazon;Fudan;Vanderbilt;NCSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;China" }, { "title": "Dynamic Tuning Towards Parameter and Inference Efficiency for ViT Adaptation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94302", "id": "e0SQ6wsHjv", "proceeding": "", "pdf": "https://openreview.net/pdf?id=e0SQ6wsHjv", "openreview": "https://openreview.net/forum?id=e0SQ6wsHjv", "poster": "/media/PosterPDFs/NeurIPS%202024/94302.png?t=1731038077.3829231", "project": "", "author_site": "Wangbo Zhao, Jiasheng Tang, Yizeng Han, Yibing Song, Kai Wang, Gao Huang, Fan Wang, Yang You", "tldr": "", "abstract": "Existing parameter-efficient fine-tuning (PEFT) methods have achieved significant success on vision transformers (ViTs) adaptation by improving parameter efficiency. However, the exploration of enhancing inference efficiency during adaptation remains underexplored. This limits the broader application of pre-trained ViT models, especially when the model is computationally extensive. In this paper, we propose Dynamic Tuning (DyT), a novel approach to improve both parameter and inference efficiency for ViT adaptation. Specifically, besides using the lightweight adapter modules, we propose a token dispatcher to distinguish informative tokens from less important ones, allowing the latter to dynamically skip the original block, thereby reducing the redundant computation during inference. Additionally, we explore multiple design variants to find the best practice of DyT. Finally, inspired by the mixture-of-experts (MoE) mechanism, we introduce an enhanced adapter to further boost the adaptation performance. We validate DyT across various tasks, including image/video recognition and semantic segmentation. For instance, DyT achieves superior performance compared to existing PEFT methods while evoking only 71% of their FLOPs on the VTAB-1K benchmark.", "keywords": "Vision Transformers (ViTs);Parameter Efficient Fine- tuning (PEFT);Dynamic Tuning (DyT)", "primary_area": "machine_vision", "supplementary_material": "", "author": "Wangbo Zhao;Jiasheng Tang;Yizeng Han;Yibing Song;Kai Wang;Gao Huang;Fan Wang;Yang You", "authorids": "~Wangbo_Zhao1;~Jiasheng_Tang1;~Yizeng_Han1;~Yibing_Song1;~Kai_Wang8;~Gao_Huang1;~Fan_Wang6;~Yang_You1", "gender": ";M;;M;M;F;M;M", "homepage": ";https://yizenghan.top/;https://ybsong00.github.io/;https://kaiwang960112.github.io/;http://www.gaohuang.net;;https://www.comp.nus.edu.sg/~youy/;", "dblp": "220/4083;217/9548;77/2117;78/2022-36;;;33/8167-1.html;289/6986.html", "google_scholar": ";25mubAsAAAAJ;oRhJHmIAAAAJ;i2II0XIAAAAJ;-P9LwcgAAAAJ;WCRGTHsAAAAJ;jF4dPZwAAAAJ;https://scholar.google.com.hk/citations?user=aocj89kAAAAJ", "orcid": ";;;0000-0002-1154-5175;;0000-0001-7320-1119;;", "linkedin": ";;;;;;yang-you-0b92914b/;", "or_profile": "~Jiasheng_Tang1;~Yizeng_Han1;~Yibing_Song1;~Kai_Wang8;~Gao_Huang1;~Fan_Wang6;~Yang_You1;~ZHAO_WANGBO1", "aff": "Alibaba Group, DAMO Academy;Tsinghua University;Alibaba DAMO Academy;National University of Singapore;Tsinghua University;Alibaba Group;National University of Singapore;National University of Singapore", "aff_domain": "alibaba-inc.com;tsinghua.edu.cn;alibaba-inc.com;u.nus.edu;tsinghua.edu.cn;alibaba-inc.com;nus.edu.sg;nus.edu", "position": "Researcher;PhD student;Staff Scientist;PhD student;Associate Professor;Senior Staff Algorithm Engineer;Professor;PhD student", "bibtex": "@inproceedings{\nzhao2024dynamic,\ntitle={Dynamic Tuning Towards Parameter and Inference Efficiency for ViT Adaptation},\nauthor={Wangbo Zhao and Jiasheng Tang and Yizeng Han and Yibing Song and Kai Wang and Gao Huang and Fan Wang and Yang You},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=e0SQ6wsHjv}\n}", "github": "", "reviewers": "gv1w;uJS2;fmWb;Zpyf", "pdf_size": 3286835, "rating": "5;6;6;7", "confidence": "5;5;4;4", "soundness": "3;3;4;3", "novelty": "2;3;3;3", "presentation": "3;3;3;4", "wc_summary": "54;101;83;38", "wc_strengths": "34;67;91;45", "wc_weaknesses": "153;167;254;63", "wc_questions": "30;65;82;2", "wc_limitations": "8;6;28;11", "wc_review": "279;406;538;159", "wc_reply_reviewers": "411;118;137;0", "wc_reply_authors": "1024;78;676;136", "reply_reviewers": "3;1;1;0", "reply_authors": "5;2;3;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 69.0, 24.525496936861444 ], "wc_strengths_avg": [ 59.25, 21.84462176372024 ], "wc_weaknesses_avg": [ 159.25, 67.7140125823304 ], "wc_questions_avg": [ 44.75, 30.994959267597046 ], "wc_limitations_avg": [ 13.25, 8.699856320652657 ], "wc_review_avg": [ 345.5, 141.35151219566063 ], "wc_reply_reviewers_avg": [ 166.5, 150.60295481829033 ], "wc_reply_authors_avg": [ 478.5, 391.8810406232994 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 3.0, 1.224744871391589 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.7071067811865476, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1004839728462771428&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "alibaba-inc.com;tsinghua.edu.cn;alibaba-inc.com;u.nus.edu;tsinghua.edu.cn;alibaba-inc.com;nus.edu.sg;nus.edu", "author_num": 8, "aff_unique_index": "0;1;0;2;1;0;2;2", "aff_unique_norm": "Alibaba Group;Tsinghua University;National University of Singapore", "aff_unique_dep": "DAMO Academy;;", "aff_unique_url": "https://www.alibaba-group.com;https://www.tsinghua.edu.cn;https://www.nus.edu.sg", "aff_unique_abbr": "Alibaba;THU;NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;0;1;1", "aff_country_unique": "China;Singapore" }, { "title": "Rethinking Reconstruction-based Graph-Level Anomaly Detection: Limitations and a Simple Remedy", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94301", "id": "e2INndPINB", "proceeding": "", "pdf": "https://openreview.net/pdf?id=e2INndPINB", "openreview": "https://openreview.net/forum?id=e2INndPINB", "poster": "/media/PosterPDFs/NeurIPS%202024/94301.png?t=1733304690.8135078", "project": "", "author_site": "Sunwoo Kim, Soo Yong Lee, Fanchen Bu, Shinhwan Kang, Kyungho Kim, Jaemin Yoo, Kijung Shin", "tldr": "", "abstract": "Graph autoencoders (Graph-AEs) learn representations of given graphs by aiming to accurately reconstruct them. A notable application of Graph-AEs is graph-level anomaly detection (GLAD), whose objective is to identify graphs with anomalous topological structures and/or node features compared to the majority of the graph population. Graph-AEs for GLAD regard a graph with a high mean reconstruction error (i.e. mean of errors from all node pairs and/or nodes) as anomalies. Namely, the methods rest on the assumption that they would better reconstruct graphs with similar characteristics to the majority. We, however, report non-trivial counter-examples, a phenomenon we call reconstruction flip, and highlight the limitations of the existing Graph-AE-based GLAD methods. Specifically, we empirically and theoretically investigate when this assumption holds and when it fails. Through our analyses, we further argue that, while the reconstruction errors for a given graph are effective features for GLAD, leveraging the multifaceted summaries of the reconstruction errors, beyond just mean, can further strengthen the features. Thus, we propose a novel and simple GLAD method, named MUSE. The key innovation of MUSE involves taking multifaceted summaries of reconstruction errors as graph features for GLAD. This surprisingly simple method obtains SOTA performance in GLAD, performing best overall among 14 methods across 10 datasets.", "keywords": "graph-level anomaly detection;graph neural network;graph autoencoder", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Sunwoo Kim;Soo Yong Lee;Fanchen Bu;Shinhwan Kang;Kyungho Kim;Jaemin Yoo;Kijung Shin", "authorids": "~Sunwoo_Kim4;~Soo_Yong_Lee1;~Fanchen_Bu1;~Shinhwan_Kang1;~Kyungho_Kim2;~Jaemin_Yoo1;~Kijung_Shin2", "gender": "M;M;M;M;M;M;M", "homepage": "https://sites.google.com/view/sunwoo97;https://syleetolow.notion.site/Soo-Yong-s-Homepage-2e5cfa74f1784bf4957e7ba0ab0fbc7a;https://github.com/bokveizen;https://sites.google.com/view/shinhwankang/;https://github.com/K-Kyungho;https://jaeminyoo.github.io/;https://kijungs.github.io/", "dblp": "16/3210.html;348/9631;270/0123;;;211/2843;153/2052", "google_scholar": "fYxrC_EAAAAJ;U3vZd0kAAAAJ;XjNu7-AAAAAJ;_wASixAAAAAJ;;https://scholar.google.co.kr/citations?user=LcxcTRUAAAAJ;https://scholar.google.co.kr/citations?user=Yp3Cz5AAAAAJ", "orcid": "0009-0006-6002-169X;0000-0001-7957-7600;0000-0003-0497-3902;;;0000-0001-7237-5117;0000-0002-2872-1526", "linkedin": ";syleeheal/;fanchen-bu-1268a1255/;;;jaemin-yoo-8b3678142/;kijungshin/", "or_profile": "~Sunwoo_Kim4;~Soo_Yong_Lee1;~Fanchen_Bu1;~Shinhwan_Kang1;~Kyungho_Kim2;~Jaemin_Yoo1;~Kijung_Shin2", "aff": "Korea Advanced Institute of Science & Technology;KAIST;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea University;KAIST;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.edu;kaist.ac.kr;kaist.ac.kr;korea.ac.kr;kaist.ac.kr;kaist.ac.kr", "position": "MS student;PhD student;PhD student;PhD student;Undergrad student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nkim2024rethinking,\ntitle={Rethinking Reconstruction-based Graph-Level Anomaly Detection: Limitations and a Simple Remedy},\nauthor={Sunwoo Kim and Soo Yong Lee and Fanchen Bu and Shinhwan Kang and Kyungho Kim and Jaemin Yoo and Kijung Shin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=e2INndPINB}\n}", "github": "", "reviewers": "SHGf;EQox;jxne;H5pG", "pdf_size": 2922294, "rating": "5;5;6;7", "confidence": "4;4;3;4", "soundness": "2;3;3;4", "novelty": "2;3;3;4", "presentation": "2;3;3;3", "wc_summary": "73;64;92;44", "wc_strengths": "62;90;61;67", "wc_weaknesses": "179;136;111;142", "wc_questions": "5;56;20;5", "wc_limitations": "10;10;7;50", "wc_review": "329;356;291;308", "wc_reply_reviewers": "36;88;26;43", "wc_reply_authors": "40;289;37;35", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 68.25, 17.268106439329124 ], "wc_strengths_avg": [ 70.0, 11.76860229593982 ], "wc_weaknesses_avg": [ 142.0, 24.320773014030618 ], "wc_questions_avg": [ 21.5, 20.838665984174707 ], "wc_limitations_avg": [ 19.25, 17.795715776557007 ], "wc_review_avg": [ 321.0, 24.279621084357967 ], "wc_reply_reviewers_avg": [ 48.25, 23.731571797923543 ], "wc_reply_authors_avg": [ 100.25, 108.98939168561314 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12639174128563042495&as_sdt=805&sciodt=0,3&hl=en", "gs_version_total": 6, "email": "kaist.ac.kr;kaist.edu;kaist.ac.kr;kaist.ac.kr;korea.ac.kr;kaist.ac.kr;kaist.ac.kr", "author_num": 7, "aff_unique_index": "0;0;0;0;1;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Korea University", "aff_unique_dep": ";", "aff_unique_url": "https://www.kaist.ac.kr;https://www.korea.ac.kr", "aff_unique_abbr": "KAIST;KU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Fair Bilevel Neural Network (FairBiNN): On Balancing fairness and accuracy via Stackelberg Equilibrium", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94300", "id": "e2R4WNHHGQ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=e2R4WNHHGQ", "openreview": "https://openreview.net/forum?id=e2R4WNHHGQ", "poster": "/media/PosterPDFs/NeurIPS%202024/94300.png?t=1730674279.041866", "project": "", "author_site": "Mehdi Yazdani-Jahromi, Ali Khodabandeh Yalabadi, Amirarsalan Rajabi, Aida Tayebi, Ivan Garibay, OZLEM GARIBAY", "tldr": "", "abstract": "The persistent challenge of bias in machine learning models necessitates robust solutions to ensure parity and equal treatment across diverse groups, particularly in classification tasks. Current methods for mitigating bias often result in information loss and an inadequate balance between accuracy and fairness. To address this, we propose a novel methodology grounded in bilevel optimization principles. Our deep learning-based approach concurrently optimizes for both accuracy and fairness objectives, and under certain assumptions, achieving proven Pareto optimal solutions while mitigating bias in the trained model. Theoretical analysis indicates that the upper bound on the loss incurred by this method is less than or equal to the loss of the Lagrangian approach, which involves adding a regularization term to the loss function. We demonstrate the efficacy of our model primarily on tabular datasets such as UCI Adult and Heritage Health. When benchmarked against state-of-the-art fairness methods, our model exhibits superior performance, advancing fairness-aware machine learning solutions and bridging the accuracy-fairness gap. The implementation of FairBiNN is available on https://github.com/yazdanimehdi/FairBiNN.", "keywords": "Fairness;Bilevel optimization;Pareto optimal;Stackelberg;Demographic parity;Fairness-accuracy trade-off", "primary_area": "fairness", "supplementary_material": "/attachment/f7c933684c247e3796ea8922bad0d5a8f5d3d55f.zip", "author": "Mehdi Yazdani-Jahromi;Ali Khodabandeh Yalabadi;Amirarsalan Rajabi;Aida Tayebi;Ivan Garibay;Ozlem Garibay", "authorids": "~Mehdi_Yazdani-Jahromi1;~Ali_Khodabandeh_Yalabadi1;~Amirarsalan_Rajabi1;~Aida_Tayebi1;~Ivan_Garibay2;~Ozlem_Garibay1", "gender": "M;M;M;F;M;F", "homepage": "https://yazdanimehdi.com;https://khodabandeh-ali.github.io;;;https://www.cs.ucf.edu/~garibay/;https://iems.ucf.edu/faculty/ozlem-ozmen-garibay/", "dblp": "317/0401;;;;;25/4013", "google_scholar": "iLu_fEcAAAAJ;ZK7MXGMAAAAJ;FH14xEMAAAAJ;https://scholar.google.com/citations?hl=en;rPuLfoMAAAAJ;0RKzNtIAAAAJ", "orcid": "0000-0001-6658-6591;0009-0009-4310-0259;;;0000-0002-3302-9382;0000-0001-9215-694X", "linkedin": "https://linkedin.com/in/yazdanimehdi;ali-yalabadi/;;aida-tayebi-64369a121/;ivan-garibay-ph-d-b4305815/;ozlem-ozmen-garibay/", "or_profile": "~Mehdi_Yazdani-Jahromi1;~Ali_Khodabandeh_Yalabadi1;~Amirarsalan_Rajabi1;~Aida_Tayebi1;~Ivan_Garibay2;~Ozlem_Garibay1", "aff": "University of Central Florida;University of Central Florida;Integral Ad Science;University of Central Florida;University of Central Florida;University of Central Florida", "aff_domain": "ucf.edu;ucf.edu;integralads.com;ucf.edu;ucf.edu;ucf.edu", "position": "PhD student;PhD student;Researcher;PhD student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nyazdani-jahromi2024fair,\ntitle={Fair Bilevel Neural Network (FairBi{NN}): On Balancing fairness and accuracy via Stackelberg Equilibrium},\nauthor={Mehdi Yazdani-Jahromi and Ali Khodabandeh Yalabadi and Amirarsalan Rajabi and Aida Tayebi and Ivan Garibay and Ozlem Garibay},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=e2R4WNHHGQ}\n}", "github": "", "reviewers": "dbnD;QoV4;UWLB;J3Wm", "pdf_size": 11187127, "rating": "5;5;6;6", "confidence": "4;3;4;4", "soundness": "3;2;3;3", "novelty": "3;2;3;3", "presentation": "2;3;3;3", "wc_summary": "59;60;72;89", "wc_strengths": "47;87;45;79", "wc_weaknesses": "475;86;64;33", "wc_questions": "363;243;88;40", "wc_limitations": "94;34;1;41", "wc_review": "1038;510;270;282", "wc_reply_reviewers": "64;81;208;25", "wc_reply_authors": "0;79;168;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;2;2;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 70.0, 12.103718436910205 ], "wc_strengths_avg": [ 64.5, 18.728320800328042 ], "wc_weaknesses_avg": [ 164.5, 180.25329400596263 ], "wc_questions_avg": [ 183.5, 127.93846176971176 ], "wc_limitations_avg": [ 42.5, 33.350412291304586 ], "wc_review_avg": [ 525.0, 311.23463817512345 ], "wc_reply_reviewers_avg": [ 94.5, 68.60211367006121 ], "wc_reply_authors_avg": [ 61.75, 69.30503228482041 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:VculvBdjfS8J:scholar.google.com/&scioq=Fair+Bilevel+Neural+Network+(FairBiNN):+On+Balancing+fairness+and+accuracy+via+Stackelberg+Equilibrium&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": "ucf.edu;ucf.edu;integralads.com;ucf.edu;ucf.edu;ucf.edu", "author_num": 6, "aff_unique_index": "0;0;1;0;0;0", "aff_unique_norm": "University of Central Florida;Integral Ad Science", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucf.edu;https://www.integralads.com", "aff_unique_abbr": "UCF;IAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Learning Structure-Aware Representations of Dependent Types", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94299", "id": "e397soEZh8", "proceeding": "", "pdf": "https://openreview.net/pdf?id=e397soEZh8", "openreview": "https://openreview.net/forum?id=e397soEZh8", "poster": "/media/PosterPDFs/NeurIPS%202024/94299.png?t=1732709119.9557223", "project": "", "author_site": "Konstantinos Kogkalidis, Orestis Melkonian, Jean-Philippe Bernardy", "tldr": "", "abstract": "Agda is a dependently-typed programming language and a proof assistant, pivotal in proof formalization and programming language theory.\nThis paper extends the Agda ecosystem into machine learning territory, and, vice versa, makes Agda-related resources available to machine learning practitioners.\nWe introduce and release a novel dataset of Agda program-proofs that is elaborate and extensive enough to support various machine learning applications -- the first of its kind.\nLeveraging the dataset's ultra-high resolution, which details proof states at the sub-type level, we propose a novel neural architecture targeted at faithfully representing dependently-typed programs on the basis of structural rather than nominal principles.\nWe instantiate and evaluate our architecture in a premise selection setup, where it achieves promising initial results, surpassing strong baselines.", "keywords": "premise selection;agda;structured attention;theorem proving;proof assistant", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Konstantinos Kogkalidis;Orestis Melkonian;Jean-Philippe Bernardy", "authorids": "~Konstantinos_Kogkalidis1;~Orestis_Melkonian1;~Jean-Philippe_Bernardy1", "gender": "Not Specified;M;Unspecified", "homepage": "https://konstantinoskokos.github.io/;https://omelkonian.github.io/;", "dblp": "241/9652;228/5288;47/929", "google_scholar": ";KWbkseQAAAAJ;https://scholar.google.se/citations?user=KwPrvYQAAAAJ", "orcid": ";0000-0003-2182-2698;my-orcid?orcid=0000-0002-8469-5617", "linkedin": ";;", "or_profile": "~Konstantinos_Kogkalidis1;~Orestis_Melkonian1;~Jean-Philippe_Bernardy1", "aff": "Aalto University;Input Output (IOG/IOHK);Gothenburg University", "aff_domain": "aalto.fi;iohk.io;gu.se", "position": "Postdoc;Researcher;Researcher", "bibtex": "@inproceedings{\nkogkalidis2024learning,\ntitle={Learning Structure-Aware Representations of Dependent Types},\nauthor={Konstantinos Kogkalidis and Orestis Melkonian and Jean-Philippe Bernardy},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=e397soEZh8}\n}", "github": "", "reviewers": "ctFo;xpMx;2Qh1;XfNT", "pdf_size": 517836, "rating": "6;7;7;7", "confidence": "3;4;2;4", "soundness": "2;3;3;3", "novelty": "3;3;3;4", "presentation": "2;3;1;3", "wc_summary": "43;280;50;34", "wc_strengths": "35;29;55;75", "wc_weaknesses": "264;29;63;186", "wc_questions": "29;25;7;99", "wc_limitations": "7;24;13;13", "wc_review": "378;387;188;407", "wc_reply_reviewers": "382;548;0;5", "wc_reply_authors": "374;990;0;7", "reply_reviewers": "2;7;0;1", "reply_authors": "3;10;1;2", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 101.75, 103.06884834905259 ], "wc_strengths_avg": [ 48.5, 18.07622748252522 ], "wc_weaknesses_avg": [ 135.5, 94.42060156554818 ], "wc_questions_avg": [ 40.0, 35.05709628591621 ], "wc_limitations_avg": [ 14.25, 6.139014578904337 ], "wc_review_avg": [ 340.0, 88.38269061303802 ], "wc_reply_reviewers_avg": [ 233.75, 238.58790308814903 ], "wc_reply_authors_avg": [ 342.75, 403.14846830913297 ], "reply_reviewers_avg": [ 2.5, 2.692582403567252 ], "reply_authors_avg": [ 4.0, 3.5355339059327378 ], "replies_avg": [ 32, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3653810926836846121&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "aalto.fi;iohk.io;gu.se", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Aalto University;Input Output;Gothenburg University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.aalto.fi;https://inputoutput.io;https://www.gu.se", "aff_unique_abbr": "Aalto;IOG/IOHK;GU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2", "aff_country_unique": "Finland;United Kingdom;Sweden" }, { "title": "PLIP: Language-Image Pre-training for Person Representation Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94298", "id": "e49QqJxCwq", "proceeding": "", "pdf": "https://openreview.net/pdf?id=e49QqJxCwq", "openreview": "https://openreview.net/forum?id=e49QqJxCwq", "poster": "", "project": "", "author_site": "Jialong Zuo, Jiahao Hong, Feng Zhang, Changqian Yu, Hanyu Zhou, Changxin Gao, Nong Sang, Jingdong Wang", "tldr": "", "abstract": "Language-image pre-training is an effective technique for learning powerful representations in general domains. However, when directly turning to person representation learning, these general pre-training methods suffer from unsatisfactory performance. The reason is that they neglect critical person-related characteristics, i.e., fine-grained attributes and identities. To address this issue, we propose a novel language-image pre-training framework for person representation learning, termed PLIP. Specifically, we elaborately design three pretext tasks: 1) Text-guided Image Colorization, aims to establish the correspondence between the person-related image regions and the fine-grained color-part textual phrases. 2) Image-guided Attributes Prediction, aims to mine fine-grained attribute information of the person body in the image; and 3) Identity-based Vision-Language Contrast, aims to correlate the cross-modal representations at the identity level rather than the instance level. Moreover, to implement our pre-train framework, we construct a large-scale person dataset with image-text pairs named SYNTH-PEDES by automatically generating textual annotations. We pre-train PLIP on SYNTH-PEDES and evaluate our models by spanning downstream person-centric tasks. PLIP not only significantly improves existing methods on all these tasks, but also shows great ability in the zero-shot and domain generalization settings. The code, dataset and weight will be made publicly available.", "keywords": "Person Representation Learning;Language-Image Pre-training;Contrastive Learning", "primary_area": "machine_vision", "supplementary_material": "", "author": "Jialong Zuo;Jiahao Hong;Feng Zhang;Changqian Yu;Hanyu Zhou;Changxin Gao;Nong Sang;Jingdong Wang", "authorids": "~Jialong_Zuo1;~Jiahao_Hong1;~Feng_Zhang11;~Changqian_Yu1;~Hanyu_Zhou2;~Changxin_Gao1;~Nong_Sang1;~Jingdong_Wang1", "gender": "M;M;M;M;M;M;M;M", "homepage": "https://github.com/Zplusdragon;https://github.com/Jiahaohong;;https://www.changqianyu.me/;https://github.com/6irdy;https://sites.google.com/site/changxingao/home;http://faculty.hust.edu.cn/sangnong/en/index.htm;https://jingdongwang2017.github.io/", "dblp": ";;;218/5946;;99/7463;10/1545;49/3441", "google_scholar": ";;iJ0tdroAAAAJ;Hv-vj2sAAAAJ;;4tku-lwAAAAJ;ky_ZowEAAAAJ;z5SPCmgAAAAJ", "orcid": ";;;;;0000-0003-2736-3920;0000-0002-9167-1496;0000-0002-4888-4445", "linkedin": ";;;;;;;", "or_profile": "~Jialong_Zuo1;~Jiahao_Hong1;~Feng_Zhang11;~Changqian_Yu1;~Hanyu_Zhou2;~Changxin_Gao1;~Nong_Sang1;~Jingdong_Wang1", "aff": "Huazhong University of Science and Technology;Huazhong University of Science and Technology;Huazhong University of Science and Technology;Kunlun Tech;Huazhong University of Science and Technology;Huazhong University of Science and Technology;Huazhong University of Science and Technology;Baidu", "aff_domain": "hust.edu.cn;hust.edu.cn;hust.edu.cn;kunlun.com;hust.edu.cn;hust.edu.cn;hust.edu.cn;baidu.com", "position": "MS student;MS student;PhD student;Researcher;MS student;Full Professor;Full Professor;Chief Scientist for Computer Vision", "bibtex": "@inproceedings{\nzuo2024plip,\ntitle={{PLIP}: Language-Image Pre-training for Person Representation Learning},\nauthor={Jialong Zuo and Jiahao Hong and Feng Zhang and Changqian Yu and Hanyu Zhou and Changxin Gao and Nong Sang and Jingdong Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=e49QqJxCwq}\n}", "github": "", "reviewers": "RKFb;Rk98;bKKs;cgBx", "pdf_size": 1786996, "rating": "3;3;8;8", "confidence": "5;5;5;5", "soundness": "2;2;4;4", "novelty": "2;2;4;4", "presentation": "3;3;3;4", "wc_summary": "77;48;133;102", "wc_strengths": "32;37;108;197", "wc_weaknesses": "268;174;37;106", "wc_questions": "23;49;103;40", "wc_limitations": "1;6;64;30", "wc_review": "401;314;445;475", "wc_reply_reviewers": "0;0;118;117", "wc_reply_authors": "91;91;59;38", "reply_reviewers": "0;0;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.5, 2.5 ], "confidence_avg": [ 5.0, 0.0 ], "soundness_avg": [ 3.0, 1.0 ], "novelty_avg": [ 3.0, 1.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 90.0, 31.32890039564108 ], "wc_strengths_avg": [ 93.5, 66.8898348032046 ], "wc_weaknesses_avg": [ 146.25, 85.36502503953244 ], "wc_questions_avg": [ 53.75, 29.92803869283786 ], "wc_limitations_avg": [ 25.25, 24.913600703230355 ], "wc_review_avg": [ 408.75, 60.705745197633476 ], "wc_reply_reviewers_avg": [ 58.75, 58.75106382015563 ], "wc_reply_authors_avg": [ 69.75, 22.509720122649238 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 45, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6193969859963507404&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "hust.edu.cn;hust.edu.cn;hust.edu.cn;kunlun.com;hust.edu.cn;hust.edu.cn;hust.edu.cn;baidu.com", "author_num": 8, "aff_unique_index": "0;0;0;1;0;0;0;2", "aff_unique_norm": "Huazhong University of Science and Technology;Kunlun Technology Group;Baidu", "aff_unique_dep": ";;Baidu, Inc.", "aff_unique_url": "http://www.hust.edu.cn;https://www.kunluntech.com;https://www.baidu.com", "aff_unique_abbr": "HUST;Kunlun Tech;Baidu", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Exploring DCN-like architecture for fast image generation with arbitrary resolution", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94297", "id": "e57B7BfA2B", "proceeding": "", "pdf": "https://openreview.net/pdf?id=e57B7BfA2B", "openreview": "https://openreview.net/forum?id=e57B7BfA2B", "poster": "/media/PosterPDFs/NeurIPS%202024/94297.png?t=1730772678.2255607", "project": "", "author_site": "Shuai Wang, Zexian Li, Tianhui Song, Xubin Li, Tiezheng Ge, Bo Zheng, Limin Wang", "tldr": "", "abstract": "Arbitrary-resolution image generation still remains a challenging task in AIGC, as it requires handling varying resolutions and aspect ratios while maintaining high visual quality. Existing transformer-based diffusion methods suffer from quadratic computation cost and limited resolution extrapolation capabilities, making them less effective for this task. In this paper, we propose FlowDCN, a purely convolution-based generative model with linear time and memory complexity, that can efficiently generate high-quality images at arbitrary resolutions. Equipped with a new design of learnable group-wise deformable convolution block, our FlowDCN yields higher flexibility and capability to handle different resolutions with a single model.\nFlowDCN achieves the state-of-the-art 4.30 sFID on $256\\times256$ ImageNet Benchmark and comparable resolution extrapolation results, surpassing transformer-based counterparts in terms of convergence speed (only $\\frac{1}{5}$ images), visual quality, parameters ($8\\%$ reduction) and FLOPs ($20\\%$ reduction). We believe FlowDCN offers a promising solution to scalable and flexible image synthesis.", "keywords": "Image generation;deformable convolution", "primary_area": "generative_models", "supplementary_material": "", "author": "Shuai Wang;Zexian Li;Tianhui Song;Xubin Li;Tiezheng Ge;Bo Zheng;Limin Wang", "authorids": "~Shuai_Wang19;~Zexian_Li1;~Tianhui_Song2;~Xubin_Li2;~Tiezheng_Ge3;~Bo_Zheng5;~Limin_Wang1", "gender": "M;M;;;M;M;", "homepage": ";;;;;;", "dblp": ";;;;;33/1610-7;", "google_scholar": ";8SabwVEAAAAJ;;;db5ZTlMAAAAJ;3gHhO9QAAAAJ;", "orcid": ";;;;0000-0003-1381-2692;0000-0002-4037-6315;", "linkedin": "wangsssssss/;;;;;bo-zheng-0315254/;", "or_profile": "~Shuai_Wang19;~Zexian_Li1;~Tianhui_Song2;~Xubin_Li2;~Tiezheng_Ge3;~Bo_Zheng5;~Limin_Wang1", "aff": "Nanjing University;Alibaba Group;;;Alibaba Group;Alibaba Group;", "aff_domain": "nju.edu.cn;alibaba.com;;;alibaba-inc.com;alibaba-inc.com;", "position": "PhD student;Researcher;;;Researcher;Principal Researcher;", "bibtex": "@inproceedings{\nwang2024exploring,\ntitle={Exploring {DCN}-like architecture for fast image generation with arbitrary resolution},\nauthor={Shuai Wang and Zexian Li and Tianhui Song and Xubin Li and Tiezheng Ge and Bo Zheng and Limin Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=e57B7BfA2B}\n}", "github": "", "reviewers": "LWE8;yBFS;rZWQ;zBRq", "pdf_size": 3426138, "rating": "5;5;6;8", "confidence": "5;5;4;4", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "3;1;3;3", "wc_summary": "44;79;54;84", "wc_strengths": "57;158;54;62", "wc_weaknesses": "224;239;298;24", "wc_questions": "21;20;82;21", "wc_limitations": "2;102;9;12", "wc_review": "348;598;497;203", "wc_reply_reviewers": "0;0;0;49", "wc_reply_authors": "0;0;103;112", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;2;2", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 65.25, 16.723860200324566 ], "wc_strengths_avg": [ 82.75, 43.53949356618655 ], "wc_weaknesses_avg": [ 196.25, 103.22396766255403 ], "wc_questions_avg": [ 36.0, 26.56124997058685 ], "wc_limitations_avg": [ 31.25, 41.008383289273915 ], "wc_review_avg": [ 411.5, 149.6637898758414 ], "wc_reply_reviewers_avg": [ 12.25, 21.21762239271875 ], "wc_reply_authors_avg": [ 53.75, 53.84410366976128 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.8164965809277259, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:QvYniN_D-bwJ:scholar.google.com/&scioq=Exploring+DCN-like+architecture+for+fast+image+generation+with+arbitrary+resolution&hl=en&as_sdt=0,33", "gs_version_total": 0, "email": "nju.edu.cn;alibaba.com;;;alibaba-inc.com;alibaba-inc.com;", "author_num": 7, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Nanjing University;Alibaba Group", "aff_unique_dep": ";", "aff_unique_url": "https://www.nju.edu.cn;https://www.alibaba.com", "aff_unique_abbr": "Nanjing U;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "What Rotary Position Embedding Can Tell Us: Identifying Query and Key Weights Corresponding to Basic Syntactic or High-level Semantic Information", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94296", "id": "e5Mv7iWfVW", "proceeding": "", "pdf": "https://openreview.net/pdf?id=e5Mv7iWfVW", "openreview": "https://openreview.net/forum?id=e5Mv7iWfVW", "poster": "/media/PosterPDFs/NeurIPS%202024/94296.png?t=1731667524.4557157", "project": "", "author_site": "Yiting Chen, Junchi Yan", "tldr": "", "abstract": "Transformer-based large language models (LLMs) have successfully handled various tasks. As one fundamental module in Transformers, position encoding encodes the positional information of tokens in a sequence. Specifically, rotary position embedding (RoPE), one of the most widely used techniques, encodes the positional information by dividing the query or key value with $d$ elements into $d/2$ pairs and rotating the 2d vectors corresponding to each pair of elements. Therefore, the direction of each pair and the position-related rotation jointly determine the attention score. In this paper, we show that the direction of the 2d pair is largely affected by the angle between the corresponding weight vector pair. We theoretically show that non-orthogonal weight vector pairs lead to great attention on tokens at a certain relative position and are less sensitive to the input which may correspond to basic syntactic information. Meanwhile, the orthogonal weight vector pairs are more flexible regarding the relative position, which may correspond to high-level syntactic information. Empirical evidence supports the hypothesis that shallow layers of LLMs focus more on local syntax and deep layers focus more on high-level semantics. Furthermore, we show that LLMs fine-tuning mainly changes the pairs of weight vectors that are nearly orthogonal, i.e., the weight corresponding to high-level semantics, which enables the reduction of the number of trainable parameters during fine-tuning without sacrificing performance. We propose a method namely Angle-based Weight Selection (AWS) to reduce the fine-tuning overhead and verify the effectiveness of the proposed method on widely used Alpaca fine-tuned Llama-2.", "keywords": "Large Language Model.+Rotary Position Embedding.+Self Attention", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Yiting Chen;Junchi Yan", "authorids": "~Yiting_Chen1;~Junchi_Yan2", "gender": "M;M", "homepage": "https://ytchen981.github.io/;http://thinklab.sjtu.edu.cn/", "dblp": "135/6971;60/7949.html", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;ga230VoAAAAJ", "orcid": ";0000-0001-9639-7679", "linkedin": ";", "or_profile": "~Yiting_Chen1;~Junchi_Yan1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nchen2024what,\ntitle={What Rotary Position Embedding Can Tell Us: Identifying Query and Key Weights Corresponding to Basic Syntactic or High-level Semantic Information},\nauthor={Yiting Chen and Junchi Yan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=e5Mv7iWfVW}\n}", "github": "", "reviewers": "LVwG;2wVC;CgW8;cttx", "pdf_size": 2294930, "rating": "5;6;6;7", "confidence": "2;4;2;5", "soundness": "3;2;3;4", "novelty": "3;4;2;4", "presentation": "3;3;3;3", "wc_summary": "47;140;126;62", "wc_strengths": "49;69;138;160", "wc_weaknesses": "69;224;94;121", "wc_questions": "3;146;2;68", "wc_limitations": "1;1;1;6", "wc_review": "169;580;361;417", "wc_reply_reviewers": "0;43;43;31", "wc_reply_authors": "0;85;35;34", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 1.299038105676658 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 93.75, 39.91475291167415 ], "wc_strengths_avg": [ 104.0, 46.211470437543966 ], "wc_weaknesses_avg": [ 127.0, 58.94488951554664 ], "wc_questions_avg": [ 54.75, 59.08204041838772 ], "wc_limitations_avg": [ 2.25, 2.165063509461097 ], "wc_review_avg": [ 381.75, 146.8321746076111 ], "wc_reply_reviewers_avg": [ 29.25, 17.583728273605686 ], "wc_reply_authors_avg": [ 38.5, 30.319135871591065 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.8164965809277259, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13935311658930805506&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "sjtu.edu.cn;sjtu.edu.cn", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Large Language Model Unlearning via Embedding-Corrupted Prompts", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94295", "id": "e5icsXBD8Q", "proceeding": "", "pdf": "https://openreview.net/pdf?id=e5icsXBD8Q", "openreview": "https://openreview.net/forum?id=e5icsXBD8Q", "poster": "/media/PosterPDFs/NeurIPS%202024/94295.png?t=1732260207.5214899", "project": "", "author_site": "Chris Liu, Yaxuan Wang, Jeffrey Flanigan, Yang Liu", "tldr": "", "abstract": "Large language models (LLMs) have advanced to encompass extensive knowledge across diverse domains. Yet controlling what a large language model should not know is important for ensuring alignment and thus safe use. However, accurately and efficiently unlearning knowledge from an LLM remains challenging due to the potential collateral damage caused by the fuzzy boundary between retention and forgetting, and the large computational requirements for optimization across state-of-the-art models with hundreds of billions of parameters. In this work, we present \\textbf{Embedding-COrrupted (ECO) Prompts}, a lightweight unlearning framework for large language models to address both the challenges of knowledge entanglement and unlearning efficiency. Instead of relying on the LLM itself to unlearn, we enforce an unlearned state during inference by employing a prompt classifier to identify and safeguard prompts to forget. We learn corruptions added to prompt embeddings via zeroth order optimization toward the unlearning objective offline and corrupt prompts flagged by the classifier during inference. We find that these embedding-corrupted prompts not only lead to desirable outputs that satisfy the unlearning objective but also closely approximate the output from a model that has never been trained on the data intended for forgetting. Through extensive experiments on unlearning, we demonstrate the superiority of our method in achieving promising unlearning at \\textit{nearly zero side effects} in general domains and domains closely related to the unlearned ones. Additionally, we highlight the scalability of our method to 100 LLMs, ranging from 0.5B to 236B parameters, incurring no additional cost as the number of parameters increases. We have made our code publicly available at \\url{https://github.com/chrisliu298/llm-unlearn-eco}.", "keywords": "machine unlearning;safety;alignment;large language model unlearning", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Chris Yuhao Liu;Yaxuan Wang;Jeffrey Flanigan;Yang Liu", "authorids": "~Chris_Yuhao_Liu1;~Yaxuan_Wang2;~Jeffrey_Flanigan1;~Yang_Liu3", "gender": "F;M;M;M", "homepage": "https://supergirl-os.github.io/;https://jflanigan.github.io/;http://www.yliuu.com;https://chrisliu298.ai/", "dblp": ";75/9768;51/3710-18;363/7353", "google_scholar": "2J96C48AAAAJ;XpIsORcAAAAJ;jKrIVCIAAAAJ;jZSZpNMAAAAJ", "orcid": "0000-0001-6450-0387;;0000-0001-8420-6011;0000-0001-8544-6789", "linkedin": "yaxuan-wang-a2a37a297/;;;", "or_profile": "~Yaxuan_Wang2;~Jeffrey_Flanigan1;~Yang_Liu3;~Yuhao_Liu2", "aff": "University of California, Santa Cruz;University of California, Santa Cruz;University of California, Santa Cruz;University of California, Santa Cruz", "aff_domain": "ucsc.edu;ucsc.edu;ucsc.edu;ucsc.edu", "position": "PhD student;Assistant Professor;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nliu2024large,\ntitle={Large Language Model Unlearning via Embedding-Corrupted Prompts},\nauthor={Chris Yuhao Liu and Yaxuan Wang and Jeffrey Flanigan and Yang Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=e5icsXBD8Q}\n}", "github": "", "reviewers": "UAgG;jeTk;qM3a;WoTW", "pdf_size": 900384, "rating": "5;5;6;7", "confidence": "4;3;5;4", "soundness": "3;2;3;3", "novelty": "3;2;3;3", "presentation": "3;3;3;3", "wc_summary": "73;53;69;63", "wc_strengths": "94;20;169;62", "wc_weaknesses": "134;101;164;42", "wc_questions": "165;177;43;2", "wc_limitations": "35;6;28;5", "wc_review": "501;357;473;174", "wc_reply_reviewers": "253;65;22;135", "wc_reply_authors": "732;343;527;248", "reply_reviewers": "2;1;1;2", "reply_authors": "3;2;3;3", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 64.5, 7.533259586659682 ], "wc_strengths_avg": [ 86.25, 54.50860023886139 ], "wc_weaknesses_avg": [ 110.25, 45.267952239967734 ], "wc_questions_avg": [ 96.75, 75.7706242550502 ], "wc_limitations_avg": [ 18.5, 13.238202294873727 ], "wc_review_avg": [ 376.25, 128.64558873121146 ], "wc_reply_reviewers_avg": [ 118.75, 87.37383761744702 ], "wc_reply_authors_avg": [ 462.5, 185.12225690067632 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.42640143271122083, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17860949808547217260&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "ucsc.edu;ucsc.edu;ucsc.edu;ucsc.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of California, Santa Cruz", "aff_unique_dep": "", "aff_unique_url": "https://www.ucsc.edu", "aff_unique_abbr": "UCSC", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Santa Cruz", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Attack-Resilient Image Watermarking Using Stable Diffusion", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94294", "id": "e6KrSouGHJ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=e6KrSouGHJ", "openreview": "https://openreview.net/forum?id=e6KrSouGHJ", "poster": "/media/PosterPDFs/NeurIPS%202024/94294.png?t=1730733307.3360903", "project": "", "author_site": "Lijun Zhang, Xiao Liu, Antoni Martin, Cindy Bearfield, Yuriy Brun, Hui Guan", "tldr": "", "abstract": "Watermarking images is critical for tracking image provenance and proving ownership. With the advent of generative models, such as stable diffusion, that can create fake but realistic images, watermarking has become particularly important to make human-created images reliably identifiable. Unfortunately, the very same stable diffusion technology can remove watermarks injected using existing methods.\nTo address this problem, we present ZoDiac, which uses a pre-trained stable diffusion model to inject a watermark into the trainable latent space, resulting in watermarks that can be reliably detected in the latent vector even when attacked. We evaluate ZoDiac on three benchmarks, MS-COCO, DiffusionDB, and WikiArt, and find that ZoDiac is robust against state-of-the-art watermark attacks, with a watermark detection rate above 98% and a false positive rate below 6.4%, outperforming state-of-the-art watermarking methods. We hypothesize that the reciprocating denoising process in diffusion models may inherently enhance the robustness of the watermark when faced with strong attacks and validate the hypothesis. Our research demonstrates that stable diffusion is a promising approach to robust watermarking, able to withstand even stable-diffusion-based attack methods. ZoDiac is open-sourced and available at https://github.com/zhanglijun95/ZoDiac.", "keywords": "Computer vision;Generative AI;Image Watermarking", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Lijun Zhang;Xiao Liu;Antoni Viros i Martin;Cindy Xiong Bearfield;Yuriy Brun;Hui Guan", "authorids": "~Lijun_Zhang4;~Xiao_Liu9;~Antoni_Viros_i_Martin1;~Cindy_Xiong_Bearfield1;~Yuriy_Brun1;~Hui_Guan1", "gender": "F;M;M;F;M;F", "homepage": "https://zhanglijun95.github.io/resume/;https://johnsmith2012.github.io/resume/;;https://cyxiong.com;https://people.cs.umass.edu/~brun/;https://guanh01.github.io/", "dblp": ";82/1364-30;;;51/1518.html;77/6645-1.html", "google_scholar": ";;pJtEH0UAAAAJ;fFc3ezYAAAAJ;https://scholar.google.com.tw/citations?user=YVfr3wwAAAAJ;L2P0jCsAAAAJ", "orcid": ";;0000-0002-6207-2419;0000-0002-1451-4083;0000-0003-3027-7986;0000-0001-9128-2231", "linkedin": ";;;;;", "or_profile": "~Lijun_Zhang4;~Xiao_Liu9;~Antoni_Viros_i_Martin1;~Cindy_Xiong_Bearfield1;~Yuriy_Brun1;~Hui_Guan1", "aff": "University of Massachusetts, Amherst;University of Massachusetts at Amherst;International Business Machines;Department of Computer Science, University of Massachusetts at Amherst;University of Massachusetts Amherst;University of Massachusetts, Amherst", "aff_domain": "umass.edu;umass.edu;ibm.com;cs.umass.edu;umass.edu;umass.edu", "position": "PhD student;PhD student;Researcher;Assistant Professor;Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhang2024attackresilient,\ntitle={Attack-Resilient Image Watermarking Using Stable Diffusion},\nauthor={Lijun Zhang and Xiao Liu and Antoni Viros i Martin and Cindy Xiong Bearfield and Yuriy Brun and Hui Guan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=e6KrSouGHJ}\n}", "github": "", "reviewers": "r9M9;mbRv;AMWU", "pdf_size": 3694865, "rating": "5;6;7", "confidence": "3;3;5", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "55;79;44", "wc_strengths": "37;53;20", "wc_weaknesses": "63;68;64", "wc_questions": "3;136;552", "wc_limitations": "1;15;12", "wc_review": "159;351;692", "wc_reply_reviewers": "0;95;1546", "wc_reply_authors": "151;333;4191", "reply_reviewers": "0;1;4", "reply_authors": "2;3;11", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 59.333333333333336, 14.613540144521982 ], "wc_strengths_avg": [ 36.666666666666664, 13.474255287605157 ], "wc_weaknesses_avg": [ 65.0, 2.160246899469287 ], "wc_questions_avg": [ 230.33333333333334, 233.84372749528453 ], "wc_limitations_avg": [ 9.333333333333334, 6.018490028422597 ], "wc_review_avg": [ 400.6666666666667, 220.41224003116423 ], "wc_reply_reviewers_avg": [ 547.0, 707.4635444082378 ], "wc_reply_authors_avg": [ 1558.3333333333333, 1863.058655962167 ], "reply_reviewers_avg": [ 1.6666666666666667, 1.699673171197595 ], "reply_authors_avg": [ 5.333333333333333, 4.0276819911981905 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.8660254037844387, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3042464105440165391&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "umass.edu;umass.edu;ibm.com;cs.umass.edu;umass.edu;umass.edu", "author_num": 6, "aff_unique_index": "0;0;1;0;0;0", "aff_unique_norm": "University of Massachusetts Amherst;International Business Machines Corporation", "aff_unique_dep": ";", "aff_unique_url": "https://www.umass.edu;https://www.ibm.com", "aff_unique_abbr": "UMass Amherst;IBM", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Amherst;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "AutoMix: Automatically Mixing Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94293", "id": "e6WrwIvgzX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=e6WrwIvgzX", "openreview": "https://openreview.net/forum?id=e6WrwIvgzX", "poster": "", "project": "", "author_site": "Pranjal Aggarwal, Aman Madaan, Ankit Anand, Srividya Pranavi Potharaju, Swaroop Mishra, Pei Zhou, Aditya Gupta, Dheeraj Rajagopal, Karthik Kappaganthu, Yiming Yang, Shyam Upadhyay, Manaal Faruqui, Mausam", "tldr": "", "abstract": "Large language models (LLMs) are now available from cloud API providers in various sizes and configurations. While this diversity offers a broad spectrum of choices, effectively leveraging the options to optimize computational cost and performance remains challenging. In this work, we present AutoMix, an approach that strategically routes queries to larger LMs, based on the approximate correctness of outputs from a smaller LM. Central to AutoMix are two key technical contributions. First, it has a few-shot self-verification mechanism, which estimates the reliability of its own outputs without requiring extensive training. Second, given that self-verification can be noisy, it employs a POMDP based router that can effectively select an appropriately sized model, based on answer confidence. Experiments across five language models and five challenging datasets show that Automix consistently surpasses strong baselines, reducing computational cost by over 50\\% for comparable performance.", "keywords": "Few-shot learning;Zero-shot learning;Self-Verification;cost-quality optimization;Decision making;Prompting;LLMs", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Pranjal Aggarwal;Aman Madaan;Ankit Anand;Srividya Pranavi Potharaju;Swaroop Mishra;Pei Zhou;Aditya Gupta;Dheeraj Rajagopal;Karthik Kappaganthu;Yiming Yang;Shyam Upadhyay;Manaal Faruqui;Mausam .", "authorids": "~Pranjal_Aggarwal1;~Aman_Madaan1;~Ankit_Anand4;~Srividya_Pranavi_Potharaju1;~Swaroop_Mishra1;~Pei_Zhou1;~Aditya_Gupta2;~Dheeraj_Rajagopal1;~Karthik_Kappaganthu1;~Yiming_Yang1;~Shyam_Upadhyay1;~Manaal_Faruqui2;~Mausam_.1", "gender": "M;;;;M;M;M;M;;F;;M;", "homepage": "https://github.com/Pranjal2041/;https://madaan.github.io;;;https://swarooprm.github.io/;https://shaoxia57.github.io/;https://research.google/people/AdityaGupta/;https://dheerajrajagopal.github.io;;http://www.cs.cmu.edu/~yiming/;http://shyamupa.com;http://www.manaalfaruqui.com/;", "dblp": "163/0764;138/1043;;;249/2784;;;127/0193;;25/1666;161/0014.html;07/9769;", "google_scholar": "https://scholar.google.com/citations?hl=en;jW9ts2cAAAAJ;;;-7LK2SwAAAAJ;13PGDZsAAAAJ;HW7IZ6sAAAAJ;NsJZccUAAAAJ;;MlZq4XwAAAAJ;;W-CxZCgAAAAJ;", "orcid": "0000-0002-2962-1535;;;;;;;;;0000-0001-8322-607X;;;", "linkedin": ";amnmadaan/;;;;pei-zhou-169051119/;;;;yiming-yang-24100924/;;;", "or_profile": "~Pranjal_Aggarwal1;~Aman_Madaan1;~Ankit_Anand4;~Srividya_Pranavi_Potharaju1;~Swaroop_Mishra1;~Pei_Zhou1;~Aditya_Gupta2;~Dheeraj_Rajagopal1;~Karthik_Kappaganthu1;~Yiming_Yang1;~Shyam_Upadhyay1;~Manaal_Faruqui2;~Mausam_.1", "aff": "Indian Institute of Technology, Delhi;Carnegie Mellon University;;;Google;University of Southern California;Google;Google DeepMind;;School of Computer Science, Carnegie Mellon University;Google;Google;", "aff_domain": "iitd.ac.in;cmu.edu;;;google.com;usc.edu;google.com;google.com;;cs.cmu.edu;google.com;google.com;", "position": "Undergrad student;PhD student;;;Researcher;PhD student;Researcher;Researcher;;Full Professor;Researcher;Research Scientist;", "bibtex": "@inproceedings{\naggarwal2024automix,\ntitle={AutoMix: Automatically Mixing Language Models},\nauthor={Pranjal Aggarwal and Aman Madaan and Ankit Anand and Srividya Pranavi Potharaju and Swaroop Mishra and Pei Zhou and Aditya Gupta and Dheeraj Rajagopal and Karthik Kappaganthu and Yiming Yang and Shyam Upadhyay and Manaal Faruqui and Mausam .},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=e6WrwIvgzX}\n}", "github": "", "reviewers": "RAHf;eJtm;NEfd;E2SA;zQB8", "pdf_size": 1146097, "rating": "5;6;6;6;6", "confidence": "4;4;3;4;4", "soundness": "2;3;3;3;3", "novelty": "2;3;3;3;3", "presentation": "3;3;3;3;3", "wc_summary": "73;77;78;89;118", "wc_strengths": "96;47;50;128;42", "wc_weaknesses": "273;66;69;105;20", "wc_questions": "5;2;70;72;6", "wc_limitations": "6;1;1;9;1", "wc_review": "453;193;268;403;187", "wc_reply_reviewers": "179;15;15;0;21", "wc_reply_authors": "43;24;56;56;56", "reply_reviewers": "1;1;1;0;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 5.8, 0.39999999999999997 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 87.0, 16.382917933017914 ], "wc_strengths_avg": [ 72.6, 33.82070371828475 ], "wc_weaknesses_avg": [ 106.6, 87.46793698264526 ], "wc_questions_avg": [ 31.0, 32.69250678672408 ], "wc_limitations_avg": [ 3.6, 3.32264954516723 ], "wc_review_avg": [ 300.8, 108.86395179305224 ], "wc_reply_reviewers_avg": [ 46.0, 66.86104994688611 ], "wc_reply_authors_avg": [ 47.0, 12.553883861180172 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 13, 0 ], "corr_rating_confidence": -0.25000000000000006, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6696048120490111870&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "iitd.ac.in;cmu.edu;;;google.com;usc.edu;google.com;google.com;;cs.cmu.edu;google.com;google.com;", "author_num": 13, "aff_unique_index": "0;1;2;3;2;2;1;2;2", "aff_unique_norm": "Indian Institute of Technology Delhi;Carnegie Mellon University;Google;University of Southern California", "aff_unique_dep": ";;Google;", "aff_unique_url": "https://www.iitdelhi.ac.in;https://www.cmu.edu;https://www.google.com;https://www.usc.edu", "aff_unique_abbr": "IIT Delhi;CMU;Google;USC", "aff_campus_unique_index": "0;2;3;2;4;2;2", "aff_campus_unique": "Delhi;;Mountain View;Los Angeles;Pittsburgh", "aff_country_unique_index": "0;1;1;1;1;2;1;1;1", "aff_country_unique": "India;United States;United Kingdom" }, { "title": "Is the MMI Criterion Necessary for Interpretability? Degenerating Non-causal Features to Plain Noise for Self-Rationalization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94292", "id": "eAqcVZx30k", "proceeding": "", "pdf": "https://openreview.net/pdf?id=eAqcVZx30k", "openreview": "https://openreview.net/forum?id=eAqcVZx30k", "poster": "/media/PosterPDFs/NeurIPS%202024/94292.png?t=1732255552.2571073", "project": "", "author_site": "Wei Liu, Zhiying Deng, Zhongyu Niu, Jun Wang, Haozhao Wang, YuanKai Zhang, Ruixuan Li", "tldr": "", "abstract": "An important line of research in the field of explainability is to extract a small subset of crucial rationales from the full input. The most widely used criterion for rationale extraction is the maximum mutual information (MMI) criterion. However, in certain datasets, there are spurious features non-causally correlated with the label and also get high mutual information, complicating the loss landscape of MMI. Although some penalty-based methods have been developed to penalize the spurious features (e.g., invariance penalty, intervention penalty, etc) to help MMI work better, these are merely remedial measures. \nIn the optimization objectives of these methods, spurious features are still distinguished from plain noise, which hinders the discovery of causal rationales. \nThis paper aims to develop a new criterion that treats spurious features as plain noise, allowing the model to work on datasets rich in spurious features as if it were working on clean datasets, thereby making rationale extraction easier.\nWe theoretically observe that removing either plain noise or spurious features from the input does not alter the conditional distribution of the remaining components relative to the task label. However, significant changes in the conditional distribution occur only when causal features are eliminated.\nBased on this discovery, the paper proposes a criterion for \\textbf{M}aximizing the \\textbf{R}emaining \\textbf{D}iscrepancy (MRD). Experiments on six widely used datasets show that our MRD criterion improves rationale quality (measured by the overlap with human-annotated rationales) by up to $10.4\\%$ as compared to several recent competitive MMI variants. Code: \\url{https://github.com/jugechengzi/Rationalization-MRD}.", "keywords": "interpretability;data cleaning;causality;mutual information", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Wei Liu;Zhiying Deng;Zhongyu Niu;Jun Wang;Haozhao Wang;YuanKai Zhang;Ruixuan Li", "authorids": "~Wei_Liu30;~Zhiying_Deng2;~Zhongyu_Niu1;~Jun_Wang1;~Haozhao_Wang1;~YuanKai_Zhang1;~Ruixuan_Li1", "gender": ";;M;;M;;M", "homepage": ";;https://github.com/Magicum-Sidus-Bread;;https://wanghaozhao.mysxl.cn/;;http://idc.hust.edu.cn/rxli/index.html", "dblp": ";;;w/JunWang18;224/4500.html;;60/4429.html", "google_scholar": ";;;Anp1nPUAAAAJ;https://scholar.google.com.hk/citations?user=yFrOuMEAAAAJ;;https://scholar.google.com/scholar?q=ruixuan+li", "orcid": ";;0009-0002-9991-2765;0000-0002-9515-076X;0000-0002-7591-5315;;0000-0002-7791-5511", "linkedin": ";;;hustwj/;;;https://www.linkedin.cn/incareer/in/ruixuan-li-b367319", "or_profile": "~Wei_Liu30;~Zhiying_Deng2;~Zhongyu_Niu1;~Jun_Wang1;~Haozhao_Wang1;~YuanKai_Zhang1;~Ruixuan_Li1", "aff": ";;Huazhong University of Science and Technology;iWudao Tech.;Huazhong University of Science and Technology;;Huazhong University of Science and Technology", "aff_domain": ";;hust.edu.cn;iwudao.tech;hust.edu.cn;;hust.edu.cn", "position": ";;MS student;Consultant;Postdoc;;Full Professor", "bibtex": "@inproceedings{\nliu2024is,\ntitle={Is the {MMI} Criterion Necessary for Interpretability? Degenerating Non-causal Features to Plain Noise for Self-Rationalization},\nauthor={Wei Liu and Zhiying Deng and Zhongyu Niu and Jun Wang and Haozhao Wang and YuanKai Zhang and Ruixuan Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=eAqcVZx30k}\n}", "github": "", "reviewers": "wByu;98v5;voxd;UFAP", "pdf_size": 453336, "rating": "6;7;7;7", "confidence": "4;2;3;2", "soundness": "2;4;4;3", "novelty": "2;4;2;4", "presentation": "3;4;4;4", "wc_summary": "87;27;75;151", "wc_strengths": "17;14;92;75", "wc_weaknesses": "188;21;150;163", "wc_questions": "3;42;66;16", "wc_limitations": "4;7;5;81", "wc_review": "299;111;388;486", "wc_reply_reviewers": "42;19;42;107", "wc_reply_authors": "28;21;25;49", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;3", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.0, 1.0 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 85.0, 44.22668877499196 ], "wc_strengths_avg": [ 49.5, 34.54345089883175 ], "wc_weaknesses_avg": [ 130.5, 64.6780488264759 ], "wc_questions_avg": [ 31.75, 24.252577182641847 ], "wc_limitations_avg": [ 24.25, 32.78242669480098 ], "wc_review_avg": [ 321.0, 138.11046303593366 ], "wc_reply_reviewers_avg": [ 52.5, 32.836717253708535 ], "wc_reply_authors_avg": [ 30.75, 10.825317547305483 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10579059456527233416&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": ";;hust.edu.cn;iwudao.tech;hust.edu.cn;;hust.edu.cn", "author_num": 7, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Huazhong University of Science and Technology;iWudao Tech", "aff_unique_dep": ";", "aff_unique_url": "http://www.hust.edu.cn;", "aff_unique_abbr": "HUST;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Unlock the Intermittent Control Ability of Model Free Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94291", "id": "eC5qdC4ZTQ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=eC5qdC4ZTQ", "openreview": "https://openreview.net/forum?id=eC5qdC4ZTQ", "poster": "", "project": "", "author_site": "Jiashun Liu, Jianye Hao, Xiaotian Hao, Yi Ma, YAN ZHENG, Yujing Hu, Tangjie Lv", "tldr": "", "abstract": "Intermittent control problems are common in real world. The interactions between the decision maker and the executor can be discontinuous (intermittent) due to various types of interruptions, e.g. unstable communication channel. Due to intermittent interaction, agents are unable to acquire the state sent by the executor and cannot transmit actions to the executor within a period of time step, i.e. bidirectional blockage, which may lead to inefficiencies of reinforcement learning policies and prevent the executors from completing the task. Such problem is not well studied in the RL community. In this paper, we model Intermittent control problem as an Intermittent Control Markov Decision Process, i.e agents are expected to generate action sequences corresponding to the unavailable states and transmit them before disabling interactions to ensure the smooth and effective motion of executors. However, directly generating multiple future actions in the original action space has unnatural motion issue and exploration difficulty. We propose **M**ulti-step **A**ction **R**epre**S**entation (**MARS**), which encodes a sequence of actions from the original action space to a compact and decodable latent space. Then based on the latent action sequence representation, the mainstream RL methods can be easily optimized to learn a smooth and efficient motion policy. Extensive experiments on simulation tasks and real-world robotic grasping tasks show that MARS significantly improves the learning efficiency and final performances compared with existing baselines.", "keywords": "Deep Reinforcement Learning ; Representation Learning; Intermittent Control", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/68ecee19febaab7ae8d7954a0d43b80ffbce3ad3.zip", "author": "Jiashun Liu;Jianye HAO;Xiaotian Hao;Yi Ma;YAN ZHENG;Yujing Hu;Tangjie Lv", "authorids": "~Jiashun_Liu1;~Jianye_HAO1;~Xiaotian_Hao1;~Yi_Ma5;~YAN_ZHENG1;~Yujing_Hu2;~Tangjie_Lv1", "gender": "M;M;M;;M;;M", "homepage": ";http://www.icdai.org/jianye.html;;https://mayi1996.top/;https://yanzzzzz.github.io;;", "dblp": ";21/7664.html;144/3359;69/1112-5.html;10/2381-2;https://dblp.uni-trier.de/pid/160/1923.html;", "google_scholar": "iAYyfMAAAAAJ;;xgk9NPwAAAAJ;TdVWzqgAAAAJ;https://scholar.google.com.hk/citations?user=tJuhd1kAAAAJ;IR5WY-wAAAAJ;EIuWpJcAAAAJ", "orcid": ";0000-0002-0422-8235;;0000-0001-9375-6605;;;0000-0001-9858-809X", "linkedin": ";;;;;;", "or_profile": "~Jiashun_Liu1;~Jianye_HAO1;~Xiaotian_Hao1;~Yi_Ma5;~YAN_ZHENG1;~Yujing_Hu2;~Tangjie_Lv1", "aff": "Tianjin University;Tianjin University;university of tianjin of china, Tianjin University;Tianjin University;Tianjin Unibersity, China;NetEase, Inc.;NetEase, Inc.", "aff_domain": "tju.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu.cn;corp.netease.com;netease.com", "position": "MS student;Associate Professor;PhD student;PhD student;Associate Professor;Researcher;Researcher", "bibtex": "@inproceedings{\nliu2024unlock,\ntitle={Unlock the Intermittent Control Ability of Model Free Reinforcement Learning},\nauthor={Jiashun Liu and Jianye HAO and Xiaotian Hao and Yi Ma and YAN ZHENG and Yujing Hu and Tangjie Lv},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=eC5qdC4ZTQ}\n}", "github": "", "reviewers": "Bn1i;5oZS;TqM1", "pdf_size": 2042797, "rating": "5;6;7", "confidence": "4;4;3", "soundness": "3;3;2", "novelty": "3;3;2", "presentation": "3;2;3", "wc_summary": "58;85;114", "wc_strengths": "20;99;34", "wc_weaknesses": "35;150;24", "wc_questions": "26;60;126", "wc_limitations": "2;47;3", "wc_review": "141;441;301", "wc_reply_reviewers": "21;34;289", "wc_reply_authors": "1312;509;1317", "reply_reviewers": "1;1;2", "reply_authors": "5;3;4", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 85.66666666666667, 22.866763848189994 ], "wc_strengths_avg": [ 51.0, 34.418987008142274 ], "wc_weaknesses_avg": [ 69.66666666666667, 56.98147847229737 ], "wc_questions_avg": [ 70.66666666666667, 41.51572660517404 ], "wc_limitations_avg": [ 17.333333333333332, 20.98147330914162 ], "wc_review_avg": [ 294.3333333333333, 122.56517540566823 ], "wc_reply_reviewers_avg": [ 114.66666666666667, 123.38647503767267 ], "wc_reply_authors_avg": [ 1046.0, 379.72182800922394 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 4.0, 0.816496580927726 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ij_qVPKfLTcJ:scholar.google.com/&scioq=Unlock+the+Intermittent+Control+Ability+of+Model+Free+Reinforcement+Learning&hl=en&as_sdt=0,44", "gs_version_total": 0, "email": "tju.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu.cn;corp.netease.com;netease.com", "author_num": 7, "aff_unique_index": "0;0;0;0;0;1;1", "aff_unique_norm": "Tianjin University;NetEase, Inc.", "aff_unique_dep": ";", "aff_unique_url": "http://www.tju.edu.cn;https://www.163.com", "aff_unique_abbr": "TJU;NetEase", "aff_campus_unique_index": "1", "aff_campus_unique": ";Tianjin", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Neural Assets: 3D-Aware Multi-Object Scene Synthesis with Image Diffusion Models", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94290", "id": "eDNslSwQIj", "proceeding": "", "pdf": "https://openreview.net/pdf?id=eDNslSwQIj", "openreview": "https://openreview.net/forum?id=eDNslSwQIj", "poster": "/media/PosterPDFs/NeurIPS%202024/94290.png?t=1732302788.9997432", "project": "", "author_site": "Ziyi Wu, Yulia Rubanova, Rishabh Kabra, Drew Hudson, Igor Gilitschenski, Yusuf Aytar, Sjoerd van Steenkiste, Kelsey Allen, Thomas Kipf", "tldr": "", "abstract": "We address the problem of multi-object 3D pose control in image diffusion models. Instead of conditioning on a sequence of text tokens, we propose to use a set of per-object representations, *Neural Assets*, to control the 3D pose of individual objects in a scene. Neural Assets are obtained by pooling visual representations of objects from a reference image, such as a frame in a video, and are trained to reconstruct the respective objects in a different image, e.g., a later frame in the video. Importantly, we encode object visuals from the reference image while conditioning on object poses from the target frame, which enables learning disentangled appearance and position features. Combining visual and 3D pose representations in a sequence-of-tokens format allows us to keep the text-to-image interface of existing models, with Neural Assets in place of text tokens. By fine-tuning a pre-trained text-to-image diffusion model with this information, our approach enables fine-grained 3D pose and placement control of individual objects in a scene. We further demonstrate that Neural Assets can be transferred and recomposed across different scenes. Our model achieves state-of-the-art multi-object editing results on both synthetic 3D scene datasets, as well as two real-world video datasets (Objectron, Waymo Open).", "keywords": "Controllable generation;3D-aware editing;diffusion model", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/0d03608153e65916479def9396d1ec9f92485166.zip", "author": "Ziyi Wu;Yulia Rubanova;Rishabh Kabra;Drew A. Hudson;Igor Gilitschenski;Yusuf Aytar;Sjoerd van Steenkiste;Kelsey R Allen;Thomas Kipf", "authorids": "~Ziyi_Wu1;~Yulia_Rubanova2;~Rishabh_Kabra1;~Drew_Arad_Hudson1;~Igor_Gilitschenski1;~Yusuf_Aytar1;~Sjoerd_van_Steenkiste1;~Kelsey_R_Allen1;~Thomas_Kipf2", "gender": "M;M;F;M;M;M;F;M;F", "homepage": "https://wuziyi616.github.io/;;;https://www.gilitschenski.org/igor;;http://www.sjoerdvansteenkiste.com/;;http://tkipf.github.io/;https://yuliarubanova.github.io/", "dblp": "217/8678;234/8010;;129/1281;41/5577;183/9326;153/9528;186/8206;222/3085", "google_scholar": "iopH6wIAAAAJ;;;Nuw1Y4oAAAAJ;0ncQNL8AAAAJ;i-AStBYAAAAJ;kpcjFekAAAAJ;83HL5FwAAAAJ;u_HzE9wAAAAJ", "orcid": "0000-0002-8247-5872;;;;;;;;", "linkedin": ";;drew-arad-hudson-b3a71348/;igorgilitschenski/;;;;thomas-kipf-6b260410a;https://linkedin.com/in/yulia-rubanova-031702100", "or_profile": "~Ziyi_Wu1;~Rishabh_Kabra1;~Drew_Arad_Hudson1;~Igor_Gilitschenski1;~Yusuf_Aytar1;~Sjoerd_van_Steenkiste1;~Kelsey_R_Allen1;~Thomas_N._Kipf1;~Yulia_Rubanova1", "aff": "Google;University College London, University of London;Google DeepMind;University of Toronto;Google DeepMind;Google;Google;Google;Google DeepMind", "aff_domain": "google.com;ucl.ac.uk;google.com;toronto.edu;google.com;google.com;deepmind.com;google.com;deepmind.com", "position": "Intern;PhD student;Research Scientist;Assistant Professor;Research Scientist;Researcher;Research Scientist;Research Scientist;Research Scientist", "bibtex": "@inproceedings{\nwu2024neural,\ntitle={Neural Assets: 3D-Aware Multi-Object Scene Synthesis with Image Diffusion Models},\nauthor={Ziyi Wu and Yulia Rubanova and Rishabh Kabra and Drew A. Hudson and Igor Gilitschenski and Yusuf Aytar and Sjoerd van Steenkiste and Kelsey R Allen and Thomas Kipf},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=eDNslSwQIj}\n}", "github": "", "reviewers": "h1dc;jimW;W8vg;bNz4", "pdf_size": 4914435, "rating": "6;7;7;7", "confidence": "4;4;4;5", "soundness": "4;3;3;3", "novelty": "3;3;3;3", "presentation": "4;4;4;3", "wc_summary": "119;87;84;84", "wc_strengths": "55;90;162;116", "wc_weaknesses": "52;83;160;370", "wc_questions": "3;52;42;36", "wc_limitations": "2;27;4;1", "wc_review": "231;339;452;607", "wc_reply_reviewers": "11;17;14;26", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 93.5, 14.773286702694158 ], "wc_strengths_avg": [ 105.75, 39.0280347955159 ], "wc_weaknesses_avg": [ 166.25, 124.03300971918725 ], "wc_questions_avg": [ 33.25, 18.376275465937052 ], "wc_limitations_avg": [ 8.5, 10.735455276791944 ], "wc_review_avg": [ 407.25, 139.3060928315772 ], "wc_reply_reviewers_avg": [ 17.0, 5.612486080160912 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9661284273928464206&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "google.com;ucl.ac.uk;google.com;toronto.edu;google.com;google.com;deepmind.com;google.com;deepmind.com", "author_num": 9, "aff_unique_index": "0;1;0;2;0;0;0;0;0", "aff_unique_norm": "Google;University College London;University of Toronto", "aff_unique_dep": "Google;;", "aff_unique_url": "https://www.google.com;https://www.ucl.ac.uk;https://www.utoronto.ca", "aff_unique_abbr": "Google;UCL;U of T", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;1;1;2;1;0;0;0;1", "aff_country_unique": "United States;United Kingdom;Canada" }, { "title": "Accelerating Nash Equilibrium Convergence in Monte Carlo Settings Through Counterfactual Value Based Fictitious Play", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94289", "id": "eFD9N5zdFC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=eFD9N5zdFC", "openreview": "https://openreview.net/forum?id=eFD9N5zdFC", "poster": "/media/PosterPDFs/NeurIPS%202024/94289.png?t=1730987945.6915195", "project": "", "author_site": "Qi Ju, Falin Hei, Ting Feng, Dengbing Yi, Zhemei Fang, YunFeng Luo", "tldr": "", "abstract": "Counterfactual Regret Minimization (CFR) and its variants are widely recognized as effective algorithms for solving extensive-form imperfect information games. Recently, many improvements have been focused on enhancing the convergence speed of the CFR algorithm. However, most of these variants are not applicable under Monte Carlo (MC) conditions, making them unsuitable for training in large-scale games. We introduce a new MC-based algorithm for solving extensive-form imperfect information games, called MCCFVFP (Monte Carlo Counterfactual Value-Based Fictitious Play). MCCFVFP combines CFR\u2019s counterfactual value calculations with fictitious play\u2019s best response strategy, leveraging the strengths of fictitious play to gain significant advantages in games with a high proportion of dominated strategies. Experimental results show that MCCFVFP achieved convergence speeds approximately 20\\%$\\sim$50\\% faster than the most advanced MCCFR variants in games like poker and other test games.", "keywords": "incomplete information game;Counterfactual Regret Minimization;blueprint strategy;fictitious play", "primary_area": "algorithmic_game_theory", "supplementary_material": "/attachment/2e30db118bec4e1f0df4751b3d3367d1dc252e56.zip", "author": "Qi Ju;Falin Hei;Ting Feng;Dengbing Yi;Zhemei Fang;YunFeng Luo", "authorids": "~Qi_Ju2;~Falin_Hei1;~Ting_Feng1;~Dengbing_Yi2;~Zhemei_Fang1;~YunFeng_Luo1", "gender": "M;;M;F;M;M", "homepage": "https://github.com/heifalin;https://github.com/heyzeyft;https://github.com/hubydb;;http://aia.hust.edu.cn/info/1226/5024.htm;https://zealoter.github.io", "dblp": ";;;;;", "google_scholar": ";;;1oxgdQYAAAAJ;;https://scholar.google.com.hk/citations?user=l0nT2r4AAAAJ", "orcid": ";;;;;0009-0000-3759-3696", "linkedin": ";;;;;", "or_profile": "~Falin_Hei1;~Ting_Feng1;~Dengbing_Yi2;~Zhemei_Fang1;~YunFeng_Luo1;~Ju_Qi1", "aff": "Huazhong University of Science and Technology;Huazhong University of Science and Technology;Huazhong University of Science and Technology;Huazhong University of Science and Technology;Huazhong University of Science and Technology;Huazhong University of Science and Technology", "aff_domain": "hust.edu.cn;hust.edu.cn;hust.edu.cn;hust.edu.cn;hust.edu.cn;hust.edu.cn", "position": "MS student;MS student;MS student;Associate Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\nju2024accelerating,\ntitle={Accelerating Nash Equilibrium Convergence in Monte Carlo Settings Through Counterfactual Value Based Fictitious Play},\nauthor={Qi Ju and Falin Hei and Ting Feng and Dengbing Yi and Zhemei Fang and YunFeng Luo},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=eFD9N5zdFC}\n}", "github": "", "reviewers": "H6L6;KPRH;APT4", "pdf_size": 5159890, "rating": "5;6;7", "confidence": "4;3;2", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "1;2;3", "wc_summary": "100;66;79", "wc_strengths": "98;34;61", "wc_weaknesses": "122;107;25", "wc_questions": "53;96;21", "wc_limitations": "1;6;1", "wc_review": "374;309;187", "wc_reply_reviewers": "0;17;11", "wc_reply_authors": "36;15;36", "reply_reviewers": "0;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.0, 0.816496580927726 ], "wc_summary_avg": [ 81.66666666666667, 14.007934259633796 ], "wc_strengths_avg": [ 64.33333333333333, 26.23398982660133 ], "wc_weaknesses_avg": [ 84.66666666666667, 42.63279905841928 ], "wc_questions_avg": [ 56.666666666666664, 30.728199137310703 ], "wc_limitations_avg": [ 2.6666666666666665, 2.3570226039551585 ], "wc_review_avg": [ 290.0, 77.51558982983144 ], "wc_reply_reviewers_avg": [ 9.333333333333334, 7.039570693980959 ], "wc_reply_authors_avg": [ 29.0, 9.899494936611665 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:MPbzuoOjAZEJ:scholar.google.com/&scioq=Accelerating+Nash+Equilibrium+Convergence+in+Monte+Carlo+Settings+Through+Counterfactual+Value+Based+Fictitious+Play&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "hust.edu.cn;hust.edu.cn;hust.edu.cn;hust.edu.cn;hust.edu.cn;hust.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Huazhong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.hust.edu.cn", "aff_unique_abbr": "HUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Semi-Truths: A Large-Scale Dataset of AI-Augmented Images for Evaluating Robustness of AI-Generated Image detectors", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97591", "id": "eFPxCNmI7i", "proceeding": "", "pdf": "https://openreview.net/pdf?id=eFPxCNmI7i", "openreview": "https://openreview.net/forum?id=eFPxCNmI7i", "poster": "", "project": "", "author_site": "Anisha Pal, Julia Kruk, Mansi Phute, Manognya Bhattaram, Diyi Yang, Duen Horng Chau, Judy Hoffman", "tldr": "", "abstract": "Text-to-image diffusion models have impactful applications in art, design, and entertainment, yet these technologies also pose significant risks by enabling the creation and dissemination of misinformation. Although recent advancements have produced AI-generated image detectors that claim robustness against various augmentations, their true effectiveness remains uncertain. Do these detectors reliably identify images with different levels of augmentation? Are they biased toward specific scenes or data distributions? To investigate, we introduce **Semi-Truths**, featuring $27,600$ real images, $223,400$ masks, and $1, 329, 155$ AI-augmented images that feature targeted and localized perturbations produced using diverse augmentation techniques, diffusion models, and data distributions. Each augmented image is accompanied by metadata for standardized and targeted evaluation of detector robustness. Our findings suggest that state-of-the-art detectors exhibit varying sensitivities to the types and degrees of perturbations, data distributions, and augmentation methods used, offering new insights into their performance and limitations. The code for the augmentation and evaluation pipeline is available at https://github.com/J-Kruk/SemiTruths.", "keywords": "Dataset;Benchmark;Stress testing;Generative AI", "primary_area": "", "supplementary_material": "", "author": "Anisha Pal;Julia Kruk;Mansi Phute;Manognya Bhattaram;Diyi Yang;Duen Horng Chau;Judy Hoffman", "authorids": "~Anisha_Pal1;~Julia_Kruk1;~Mansi_Phute1;~Manognya_Bhattaram1;~Diyi_Yang2;~Duen_Horng_Chau1;~Judy_Hoffman1", "gender": "F;F;F;F;F;F;Not Specified", "homepage": "https://anipal.github.io/;;https://mphute.github.io;;https://cs.stanford.edu/~diyiy/;https://www.cc.gatech.edu/~judy/;https://faculty.cc.gatech.edu/~dchau", "dblp": "251/9085;;;;70/11145;45/10336;10/2670", "google_scholar": "https://scholar.google.com/citations?hl=en;DOmg2LMAAAAJ;;;j9jhYqQAAAAJ;mqpjAt4AAAAJ;https://scholar.google.com.tw/citations?user=YON32W4AAAAJ", "orcid": ";;;;;;0000-0001-9824-3323", "linkedin": "anisha-pal/;julia-kruk-854155112/;mansi-phute-413744166/;manognyabhattaram/;;;polochau", "or_profile": "~Anisha_Pal1;~Julia_Kruk1;~Mansi_Phute1;~Manognya_Bhattaram1;~Diyi_Yang2;~Judy_Hoffman1;~Duen_Chau1", "aff": "Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology;Stanford University;Georgia Institute of Technology;Georgia Institute of Technology", "aff_domain": "gatech.edu;gatech.edu;gatech.edu;gatech.edu;stanford.edu;gatech.edu;gatech.edu", "position": "MS student;MS student;MS student;MS student;Assistant Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\npal2024semitruths,\ntitle={Semi-Truths: A Large-Scale Dataset of {AI}-Augmented Images for Evaluating Robustness of {AI}-Generated Image detectors},\nauthor={Anisha Pal and Julia Kruk and Mansi Phute and Manognya Bhattaram and Diyi Yang and Duen Horng Chau and Judy Hoffman},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=eFPxCNmI7i}\n}", "github": "", "reviewers": "qbAq;FB8v;ybbx;P89j;rPMV", "pdf_size": 19468419, "rating": "6;6;6;6;7", "confidence": "4;3;3;3;4", "wc_summary_and_contributions": "62;129;59;59;85", "wc_strengths": "54;71;45;27;38", "wc_improvement": "83;146;34;29;115", "wc_limitations": "39;6;4;8;35", "wc_correctness": "29;14;1;10;47", "wc_clarity": "9;7;1;5;16", "wc_relation_to_prior_work": "39;11;1;1;35", "wc_documentation": "7;15;1;1;72", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "323;400;147;141;444", "wc_reply_reviewers": "14;32;0;16;81", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;0;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.2, 0.39999999999999997 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_and_contributions_avg": [ 78.8, 26.92508124407427 ], "wc_strengths_avg": [ 47.0, 14.89966442575134 ], "wc_improvement_avg": [ 81.4, 45.381053315232776 ], "wc_limitations_avg": [ 18.4, 15.291827882892221 ], "wc_correctness_avg": [ 20.2, 16.166632302368974 ], "wc_clarity_avg": [ 7.6, 4.963869458396342 ], "wc_relation_to_prior_work_avg": [ 17.4, 16.46329250180534 ], "wc_documentation_avg": [ 19.2, 26.89535275842278 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 291.0, 126.13484847574837 ], "wc_reply_reviewers_avg": [ 28.6, 28.09697492613751 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.6123724356957947, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:i9yS1wqMgL8J:scholar.google.com/&scioq=Semi-Truths:+A+Large-Scale+Dataset+of+AI-Augmented+Images+for+Evaluating+Robustness+of+AI-Generated+Image+detectors&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "gatech.edu;gatech.edu;gatech.edu;gatech.edu;stanford.edu;gatech.edu;gatech.edu", "author_num": 7, "aff_unique_index": "0;0;0;0;1;0;0", "aff_unique_norm": "Georgia Institute of Technology;Stanford University", "aff_unique_dep": ";", "aff_unique_url": "https://www.gatech.edu;https://www.stanford.edu", "aff_unique_abbr": "Georgia Tech;Stanford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Transition Constrained Bayesian Optimization via Markov Decision Processes", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94288", "id": "eFrdRuyHR9", "proceeding": "", "pdf": "https://openreview.net/pdf?id=eFrdRuyHR9", "openreview": "https://openreview.net/forum?id=eFrdRuyHR9", "poster": "/media/PosterPDFs/NeurIPS%202024/94288.png?t=1730218101.027096", "project": "", "author_site": "Jose Pablo Folch, Calvin Tsay, Robert Lee, Behrang Shafei, Weronika Ormaniec, Andreas Krause, Mark van der Wilk, Ruth Misener, Mojmir Mutny", "tldr": "", "abstract": "Bayesian optimization is a methodology to optimize black-box functions. Traditionally, it focuses on the setting where you can arbitrarily query the search space. However, many real-life problems do not offer this flexibility; in particular, the search space of the next query may depend on previous ones. Example challenges arise in the physical sciences in the form of local movement constraints, required monotonicity in certain variables, and transitions influencing the accuracy of measurements. Altogether, such *transition constraints* necessitate a form of planning. This work extends classical Bayesian optimization via the framework of Markov Decision Processes. We iteratively solve a tractable linearization of our utility function using reinforcement learning to obtain a policy that plans ahead for the entire horizon. This is a parallel to the optimization of an *acquisition function in policy space*. The resulting policy is potentially history-dependent and non-Markovian. We showcase applications in chemical reactor optimization, informative path planning, machine calibration, and other synthetic examples.", "keywords": "Bayesian Optimization;Transition Constrained;Markov Decision Process;Linear Bandits;Convex Reinforcement Learning", "primary_area": "optimization", "supplementary_material": "", "author": "Jose Pablo Folch;Calvin Tsay;Robert Matthew Lee;Behrang Shafei;Weronika Ormaniec;Andreas Krause;Mark van der Wilk;Ruth Misener;Mojmir Mutny", "authorids": "~Jose_Pablo_Folch1;~Calvin_Tsay1;~Robert_Matthew_Lee1;~Behrang_Shafei1;~Weronika_Ormaniec1;~Andreas_Krause1;~Mark_van_der_Wilk1;~Ruth_Misener1;~Mojmir_Mutny1", "gender": "M;;M;M;;M;M;F;M", "homepage": "https://jpfolch.github.io;https://www.imperial.ac.uk/people/c.tsay;http://basf.net;;https://github.com/werkaaa;https://las.inf.ethz.ch/krausea;https://mvdw.uk;https://wp.doc.ic.ac.uk/rmisener/;", "dblp": "313/1434;204/0777;;;322/8772;87/1831-1.html;142/2927;04/8800;173/5114", "google_scholar": "8YaH3bUAAAAJ;i59BQe0AAAAJ;;;9RAaLc8AAAAJ;https://scholar.google.ch/citations?user=eDHv58AAAAAJ;PKcjcT4AAAAJ;AQxtWHoAAAAJ;", "orcid": ";;;;0009-0007-8868-1363;0000-0001-7260-9673;0000-0001-7947-6682;0000-0001-5612-5417;", "linkedin": "jose-pablo-folch-1690b1196/;;;behrang-shafei/;;krausea/;;ruth-misener/;", "or_profile": "~Jose_Pablo_Folch1;~Calvin_Tsay1;~Robert_Matthew_Lee1;~Behrang_Shafei1;~Weronika_Ormaniec1;~Andreas_Krause1;~Mark_van_der_Wilk1;~Ruth_Misener1;~Mojmir_Mutny1", "aff": "Imperial College London, Imperial College London;Imperial College London;;BASF;ETHZ - ETH Zurich;ETH Zurich;University of Oxford;Imperial College London;Swiss Federal Institute of Technology", "aff_domain": "imperial.ac.uk;imperial.ac.uk;;basf.com;ethz.ch;ethz.ch;cs.ox.ac.uk;imperial.ac.uk;ethz.ch", "position": "PhD student;Assistant Professor;;Researcher;MS student;Full Professor;Associate Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\nfolch2024transition,\ntitle={Transition Constrained Bayesian Optimization via Markov Decision Processes},\nauthor={Jose Pablo Folch and Calvin Tsay and Robert Matthew Lee and Behrang Shafei and Weronika Ormaniec and Andreas Krause and Mark van der Wilk and Ruth Misener and Mojmir Mutny},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=eFrdRuyHR9}\n}", "github": "", "reviewers": "mJAD;2XmZ;MNRf;J8j4", "pdf_size": 7189961, "rating": "3;6;6;7", "confidence": "2;3;3;3", "soundness": "2;3;3;4", "novelty": "2;3;3;3", "presentation": "2;3;2;4", "wc_summary": "52;92;48;114", "wc_strengths": "31;73;24;25", "wc_weaknesses": "10;220;222;561", "wc_questions": "289;110;267;162", "wc_limitations": "7;1;2;37", "wc_review": "389;496;563;899", "wc_reply_reviewers": "856;84;38;74", "wc_reply_authors": "2485;0;0;0", "reply_reviewers": "4;1;1;1", "reply_authors": "7;1;1;1", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 76.5, 27.654113618049667 ], "wc_strengths_avg": [ 38.25, 20.24073862288627 ], "wc_weaknesses_avg": [ 253.25, 197.46059733526585 ], "wc_questions_avg": [ 207.0, 73.75296604205148 ], "wc_limitations_avg": [ 11.75, 14.7542366796795 ], "wc_review_avg": [ 586.75, 190.65987385918413 ], "wc_reply_reviewers_avg": [ 263.0, 342.7958576179123 ], "wc_reply_authors_avg": [ 621.25, 1076.036564202165 ], "reply_reviewers_avg": [ 1.75, 1.299038105676658 ], "reply_authors_avg": [ 2.5, 2.598076211353316 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.9622504486493763, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10897216529163627556&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "imperial.ac.uk;imperial.ac.uk;;basf.com;ethz.ch;ethz.ch;cs.ox.ac.uk;imperial.ac.uk;ethz.ch", "author_num": 9, "aff_unique_index": "0;0;1;2;2;3;0;4", "aff_unique_norm": "Imperial College London;BASF SE;ETH Zurich;University of Oxford;Swiss Federal Institute of Technology", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.imperial.ac.uk;https://www.basf.com;https://www.ethz.ch;https://www.ox.ac.uk;https://www.ethz.ch", "aff_unique_abbr": "ICL;BASF;ETHZ;Oxford;ETH Zurich", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;2;2;0;0;2", "aff_country_unique": "United Kingdom;Germany;Switzerland" }, { "title": "ColJailBreak: Collaborative Generation and Editing for Jailbreaking Text-to-Image Deep Generation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94287", "id": "eGIzeTmAtE", "proceeding": "", "pdf": "https://openreview.net/pdf?id=eGIzeTmAtE", "openreview": "https://openreview.net/forum?id=eGIzeTmAtE", "poster": "/media/PosterPDFs/NeurIPS%202024/94287.png?t=1731118618.3389528", "project": "", "author_site": "Yizhuo Ma, Shanmin Pang, Qi Guo, Tianyu Wei, Qing Guo", "tldr": "", "abstract": "The commercial text-to-image deep generation models (e.g. DALL\u00b7E) can produce high-quality images based on input language descriptions. These models incorporate a black-box safety filter to prevent the generation of unsafe or unethical content, such as violent, criminal, or hateful imagery. Recent jailbreaking methods generate adversarial prompts capable of bypassing safety filters and producing unsafe content, exposing vulnerabilities in influential commercial models. However, once these adversarial prompts are identified, the safety filter can be updated to prevent the generation of unsafe images. In this work, we propose an effective, simple, and difficult-to-detect jailbreaking solution: generating safe content initially with normal text prompts and then editing the generations to embed unsafe content. The intuition behind this idea is that the deep generation model cannot reject safe generation with normal text prompts, while the editing models focus on modifying the local regions of images and do not involve a safety strategy. However, implementing such a solution is non-trivial, and we need to overcome several challenges: how to automatically confirm the normal prompt to replace the unsafe prompts, and how to effectively perform editable replacement and naturally generate unsafe content. In this work, we propose the collaborative generation and editing for jailbreaking text-to-image deep generation (ColJailBreak), which comprises three key components: adaptive normal safe substitution, inpainting-driven injection of unsafe content, and contrastive language-image-guided collaborative optimization. We validate our method on three datasets and compare it to two baseline methods. Our method could generate unsafe content through two commercial deep generation models including GPT-4 and DALL\u00b7E 2.", "keywords": "Text-to-Image Models;jailbreak attack", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/e73e4c2fd26c1e510233ad6f510b5a3d256d075b.zip", "author": "Yizhuo Ma;Shanmin Pang;Qi Guo;Tianyu Wei;Qing Guo", "authorids": "~Yizhuo_Ma2;~Shanmin_Pang1;~Qi_Guo10;~Tianyu_Wei2;~Qing_Guo3", "gender": "M;M;M;M;M", "homepage": "https://github.com/Yukiyousa;;;;https://tsingqguo.github.io", "dblp": ";129/3978;;;25/3038-5", "google_scholar": ";OL-lt9AAAAAJ;;;Rj2x4QUAAAAJ", "orcid": ";;0000-0003-1239-7677;0009-0002-3248-7645;0000-0003-0974-9299", "linkedin": ";;;;", "or_profile": "~Yizhuo_Ma2;~Shanmin_Pang1;~Qi_Guo10;~Tianyu_Wei2;~Qing_Guo3", "aff": "Xi'an Jiaotong University;Xi'an Jiaotong University;Xi'an Jiaotong University;Xi'an Jiaotong University; Agency for Science, Technology and Research (A*STAR))", "aff_domain": "xjtu.edu.cn;xjtu.edu.cn;xjtu.edu.cn;xjtu.edu.cn;cfar.a-star.edu.sg", "position": "MS student;Associate Professor;PhD student;MS student;Researcher", "bibtex": "@inproceedings{\nma2024coljailbreak,\ntitle={ColJailBreak: Collaborative Generation and Editing for Jailbreaking Text-to-Image Deep Generation},\nauthor={Yizhuo Ma and Shanmin Pang and Qi Guo and Tianyu Wei and Qing Guo},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=eGIzeTmAtE}\n}", "github": "", "reviewers": "kzE3;jDc9;yPxw;GbGQ", "pdf_size": 6287570, "rating": "4;5;5;6", "confidence": "4;4;4;3", "soundness": "2;3;2;3", "novelty": "2;3;2;3", "presentation": "2;3;2;3", "wc_summary": "31;61;34;39", "wc_strengths": "31;48;24;11", "wc_weaknesses": "349;124;47;1", "wc_questions": "2;26;19;1", "wc_limitations": "1;12;1;1", "wc_review": "414;271;125;53", "wc_reply_reviewers": "319;0;0;0", "wc_reply_authors": "479;31;62;31", "reply_reviewers": "2;0;0;0", "reply_authors": "2;2;3;2", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 41.25, 11.755317945508747 ], "wc_strengths_avg": [ 28.5, 13.35102992281869 ], "wc_weaknesses_avg": [ 130.25, 133.72242706442327 ], "wc_questions_avg": [ 12.0, 10.793516572461451 ], "wc_limitations_avg": [ 3.75, 4.763139720814412 ], "wc_review_avg": [ 215.75, 138.81529994924912 ], "wc_reply_reviewers_avg": [ 79.75, 138.13105190361796 ], "wc_reply_authors_avg": [ 150.75, 189.93732518912654 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8032170172206005641&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": "xjtu.edu.cn;xjtu.edu.cn;xjtu.edu.cn;xjtu.edu.cn;cfar.a-star.edu.sg", "author_num": 5, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Xi'an Jiao Tong University;Agency for Science, Technology and Research", "aff_unique_dep": ";", "aff_unique_url": "https://www.xjtu.edu.cn;https://www.a-star.edu.sg", "aff_unique_abbr": "XJTU;A*STAR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "China;Singapore" }, { "title": "Fairness-Aware Meta-Learning via Nash Bargaining", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94286", "id": "eGJnB3tUgv", "proceeding": "", "pdf": "https://openreview.net/pdf?id=eGJnB3tUgv", "openreview": "https://openreview.net/forum?id=eGJnB3tUgv", "poster": "/media/PosterPDFs/NeurIPS%202024/94286.png?t=1732838381.9445107", "project": "", "author_site": "Yi Zeng, Xuelin Yang, Li Chen, Cristian Ferrer, Ming Jin, Michael Jordan, Ruoxi Jia", "tldr": "", "abstract": "To address issues of group-level fairness in machine learning, it is natural to adjust model parameters based on specific fairness objectives over a sensitive-attributed validation set. Such an adjustment procedure can be cast within a meta-learning framework. However, naive integration of fairness goals via meta-learning can cause hypergradient conflicts for subgroups, resulting in unstable convergence and compromising model performance and fairness. To navigate this issue, we frame the resolution of hypergradient conflicts as a multi-player cooperative bargaining game. We introduce a two-stage meta-learning framework in which the first stage involves the use of a Nash Bargaining Solution (NBS) to resolve hypergradient conflicts and steer the model toward the Pareto front, and the second stage optimizes with respect to specific fairness goals.\nOur method is supported by theoretical results, notably a proof of the NBS for gradient aggregation free from linear independence assumptions, a proof of Pareto improvement, and a proof of monotonic improvement in validation loss. We also show empirical effects across various fairness objectives in six key fairness datasets and two image classification tasks.", "keywords": "Fairness;Bargaining Game;Nash Bargaining Solution;Meta-Learning", "primary_area": "fairness", "supplementary_material": "", "author": "Yi Zeng;Xuelin Yang;Li Chen;Cristian Canton Ferrer;Ming Jin;Michael Jordan;Ruoxi Jia", "authorids": "~Yi_Zeng3;~Xuelin_Yang1;~Li_Chen13;~Cristian_Canton_Ferrer1;~Ming_Jin2;~Michael_Jordan1;~Ruoxi_Jia1", "gender": "M;;;M;M;M;", "homepage": "https://yizeng623.github.io/;;;http://canton.cat;http://www.jinming.tech/;http://www.cs.berkeley.edu/~jordan/;https://ruoxijia.info/", "dblp": "75/148;;;43/950;;j/MichaelIJordan;147/5355-1", "google_scholar": "slUNmHQAAAAJ;;;https://scholar.google.com/citations?hl=en;YdxdTtkAAAAJ;https://scholar.google.com.tw/citations?user=yxUduqMAAAAJ;JCrug-YAAAAJ", "orcid": "0000-0002-6901-9194;;;0000-0002-3189-5498;;0000-0001-8935-817X;", "linkedin": "chnyizeng/;;;cristiancanton/;;;", "or_profile": "~Yi_Zeng3;~Xuelin_Yang1;~Li_Chen13;~Cristian_Canton_Ferrer1;~Ming_Jin2;~Michael_Jordan1;~Ruoxi_Jia1", "aff": "Virginia Tech;;;Meta;Virginia Tech;University of California, Berkeley;Virginia Tech", "aff_domain": "vt.edu;;;meta.com;vt.edu;berkeley.edu;vt.edu", "position": "PhD student;;;Principal Researcher;Assistant Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nzeng2024fairnessaware,\ntitle={Fairness-Aware Meta-Learning via Nash Bargaining},\nauthor={Yi Zeng and Xuelin Yang and Li Chen and Cristian Canton Ferrer and Ming Jin and Michael Jordan and Ruoxi Jia},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=eGJnB3tUgv}\n}", "github": "", "reviewers": "RvEe;gYXi;YN1k;hQxX", "pdf_size": 3763032, "rating": "6;6;6;7", "confidence": "3;2;4;3", "soundness": "3;4;3;3", "novelty": "3;3;3;3", "presentation": "4;3;3;3", "wc_summary": "113;71;83;328", "wc_strengths": "68;45;164;187", "wc_weaknesses": "101;84;69;245", "wc_questions": "42;49;110;111", "wc_limitations": "1;9;1;2", "wc_review": "325;258;427;873", "wc_reply_reviewers": "29;20;27;51", "wc_reply_authors": "23;20;34;32", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 148.75, 104.61447079634824 ], "wc_strengths_avg": [ 116.0, 60.60115510450275 ], "wc_weaknesses_avg": [ 124.75, 70.34335434140172 ], "wc_questions_avg": [ 78.0, 32.59601202601324 ], "wc_limitations_avg": [ 3.25, 3.344772040064913 ], "wc_review_avg": [ 470.75, 239.90870659482118 ], "wc_reply_reviewers_avg": [ 31.75, 11.60549438843516 ], "wc_reply_authors_avg": [ 27.25, 5.889609494694874 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7590200404162766512&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "vt.edu;;;meta.com;vt.edu;berkeley.edu;vt.edu", "author_num": 7, "aff_unique_index": "0;1;0;2;0", "aff_unique_norm": "Virginia Tech;Meta;University of California, Berkeley", "aff_unique_dep": ";Meta Platforms, Inc.;", "aff_unique_url": "https://www.vt.edu;https://meta.com;https://www.berkeley.edu", "aff_unique_abbr": "VT;Meta;UC Berkeley", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "The Group Robustness is in the Details: Revisiting Finetuning under Spurious Correlations", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94285", "id": "eHzIwAhj06", "proceeding": "", "pdf": "https://openreview.net/pdf?id=eHzIwAhj06", "openreview": "https://openreview.net/forum?id=eHzIwAhj06", "poster": "", "project": "", "author_site": "Tyler LaBonte, John Hill, Xinchen Zhang, Vidya Muthukumar, Abhishek Kumar", "tldr": "", "abstract": "Modern machine learning models are prone to over-reliance on spurious correlations, which can often lead to poor performance on minority groups. In this paper, we identify surprising and nuanced behavior of finetuned models on worst-group accuracy via comprehensive experiments on four well-established benchmarks across vision and language tasks. We first show that the commonly used class-balancing techniques of mini-batch upsampling and loss upweighting can induce a decrease in worst-group accuracy (WGA) with training epochs, leading to performance no better than without class-balancing. While in some scenarios, removing data to create a class-balanced subset is more effective, we show this depends on group structure and propose a mixture method which can outperform both techniques. Next, we show that scaling pretrained models is generally beneficial for worst-group accuracy, but only in conjunction with appropriate class-balancing. Finally, we identify spectral imbalance in finetuning features as a potential source of group disparities --- minority group covariance matrices incur a larger spectral norm than majority groups once conditioned on the classes. Our results show more nuanced interactions of modern finetuned models with group robustness than was previously known. Our code is available at https://github.com/tmlabonte/revisiting-finetuning.", "keywords": "spurious correlations;group robustness;distribution shift;class balancing", "primary_area": "fairness", "supplementary_material": "", "author": "Tyler LaBonte;John Collins Hill;Xinchen zhang;Vidya Muthukumar;Abhishek Kumar", "authorids": "~Tyler_LaBonte1;~John_Collins_Hill1;~Xinchen_zhang3;~Vidya_Muthukumar3;~Abhishek_Kumar1", "gender": "M;M;M;F;", "homepage": "https://tyler-labonte.com;;;https://vmuthukumar.ece.gatech.edu;http://inductivebias.ml", "dblp": "251/5689.html;;;149/0019;67/6188-1", "google_scholar": "0_bKeg4AAAAJ;;;K2OEs2YAAAAJ;6vghMS0AAAAJ", "orcid": "0000-0002-3781-7212;;;;", "linkedin": "https://linkedin.com/in/tmlabonte;jack-hill-3290b2249/;xinchen-zhang941?utm_source=share&utm_campaign=share_via&utm_content=profile&utm_medium=ios_app;;", "or_profile": "~Tyler_LaBonte1;~John_Collins_Hill1;~Xinchen_zhang3;~Vidya_Muthukumar3;~Abhishek_Kumar1", "aff": "Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology;Google DeepMind", "aff_domain": "gatech.edu;gatech.edu;gatech.edu;gatech.edu;google.com", "position": "PhD student;MS student;MS student;Assistant Professor;Research Scientist", "bibtex": "@inproceedings{\nlabonte2024the,\ntitle={The Group Robustness is in the Details: Revisiting Finetuning under Spurious Correlations},\nauthor={Tyler LaBonte and John Collins Hill and Xinchen zhang and Vidya Muthukumar and Abhishek Kumar},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=eHzIwAhj06}\n}", "github": "", "reviewers": "Rwfv;E5Mf;kRBT;ax3f", "pdf_size": 24523417, "rating": "2;4;5;7", "confidence": "5;4;2;4", "soundness": "1;2;2;3", "novelty": "1;2;3;3", "presentation": "1;3;3;3", "wc_summary": "12;78;27;66", "wc_strengths": "15;80;29;130", "wc_weaknesses": "74;156;189;224", "wc_questions": "3;107;29;111", "wc_limitations": "4;1;1;15", "wc_review": "108;422;275;546", "wc_reply_reviewers": "756;18;0;24", "wc_reply_authors": "726;0;0;0", "reply_reviewers": "2;1;0;1", "reply_authors": "2;1;1;1", "rating_avg": [ 4.5, 1.8027756377319946 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 2.0, 0.7071067811865476 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 45.75, 27.11434122378783 ], "wc_strengths_avg": [ 63.5, 45.37895988230669 ], "wc_weaknesses_avg": [ 160.75, 55.55796522551919 ], "wc_questions_avg": [ 62.5, 47.42098691507801 ], "wc_limitations_avg": [ 5.25, 5.7608593109014565 ], "wc_review_avg": [ 337.75, 163.69846517301255 ], "wc_reply_reviewers_avg": [ 199.5, 321.41678549820637 ], "wc_reply_authors_avg": [ 181.5, 314.36722157375124 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.44539933408304444, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14219179036438569110&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "gatech.edu;gatech.edu;gatech.edu;gatech.edu;google.com", "author_num": 5, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Georgia Institute of Technology;Google", "aff_unique_dep": ";Google DeepMind", "aff_unique_url": "https://www.gatech.edu;https://deepmind.com", "aff_unique_abbr": "Georgia Tech;DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "United States;United Kingdom" }, { "title": "Transcendence: Generative Models Can Outperform The Experts That Train Them", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94284", "id": "eJG9uDqCY9", "proceeding": "", "pdf": "https://openreview.net/pdf?id=eJG9uDqCY9", "openreview": "https://openreview.net/forum?id=eJG9uDqCY9", "poster": "/media/PosterPDFs/NeurIPS%202024/94284.png?t=1731341557.5972202", "project": "", "author_site": "Edwin Zhang, Vincent Zhu, Naomi Saphra, Anat Kleiman, Benjamin Edelman, Milind Tambe, Sham Kakade, Eran Malach", "tldr": "", "abstract": "Generative models are trained with the simple objective of imitating the conditional probability distribution induced by the data they are trained on. Therefore, when trained on data generated by humans, we may not expect the artificial model to outperform the humans on their original objectives. In this work, we study the phenomenon of *transcendence*: when a generative model achieves capabilities that surpass the abilities of the experts generating its data. We demonstrate transcendence by training an autoregressive transformer to play chess from game transcripts, and show that the trained model can sometimes achieve better performance than all players in the dataset. We theoretically prove that transcendence is enabled by low-temperature sampling, and rigorously assess this experimentally. Finally, we discuss other sources of transcendence, laying the groundwork for future investigation of this phenomenon in a broader setting.", "keywords": "theory;foundations;generative modelling;sequence modelling", "primary_area": "generative_models", "supplementary_material": "", "author": "Edwin Zhang;Vincent Zhu;Naomi Saphra;Anat Kleiman;Benjamin L. Edelman;Milind Tambe;Sham M. Kakade;eran malach", "authorids": "~Edwin_Zhang2;~Vincent_Zhu1;~Naomi_Saphra1;~Anat_Kleiman1;~Benjamin_L._Edelman1;~Milind_Tambe1;~Sham_M._Kakade1;~eran_malach1", "gender": ";M;F;;;M;M;M", "homepage": "https://eddie.win;;http://nsaphra.github.io/;;http://teamcore.seas.harvard.edu/tambe;https://shamulent.github.io;;https://www.benjaminedelman.com/", "dblp": ";;131/6883;;67/2667;s/SMKakade;202/2566;241/9410", "google_scholar": ";;TPhVfX8AAAAJ;;YOVZiJkAAAAJ;https://scholar.google.com.tw/citations?user=wb-DKCIAAAAJ;I15dUOwAAAAJ;mQSj2C0AAAAJ", "orcid": ";;;;;;;", "linkedin": ";vincent-zhu-6819a4224/;naomi-saphra-028b8060/;anat-kleiman;;;;", "or_profile": "~Edwin_Zhang2;~Vincent_Zhu1;~Naomi_Saphra1;~Anat_Kleiman1;~Milind_Tambe1;~Sham_M._Kakade1;~eran_malach1;~Benjamin_L_Edelman1", "aff": "Harvard University;University of California, Santa Barbara;Harvard University;Harvard University;Google;Harvard University;Harvard University;Harvard University", "aff_domain": "harvard.edu;ucsb.edu;harvard.edu;harvard.edu;google.com;harvard.edu;harvard.edu;harvard.edu", "position": "PhD student;Undergrad student;Fellow;PhD student;Principal Researcher;Full Professor;Postdoc;PhD student", "bibtex": "@inproceedings{\nzhang2024transcendence,\ntitle={Transcendence: Generative Models Can Outperform The Experts That Train Them},\nauthor={Edwin Zhang and Vincent Zhu and Naomi Saphra and Anat Kleiman and Benjamin L. Edelman and Milind Tambe and Sham M. Kakade and eran malach},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=eJG9uDqCY9}\n}", "github": "", "reviewers": "mPZQ;np7r;Se7B", "pdf_size": 14732059, "rating": "6;7;7", "confidence": "4;3;4", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "91;187;107", "wc_strengths": "93;60;84", "wc_weaknesses": "118;100;273", "wc_questions": "109;72;47", "wc_limitations": "150;10;34", "wc_review": "561;429;545", "wc_reply_reviewers": "32;44;26", "wc_reply_authors": "62;70;0", "reply_reviewers": "1;1;1", "reply_authors": "2;2;1", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 128.33333333333334, 41.994708661396324 ], "wc_strengths_avg": [ 79.0, 13.92838827718412 ], "wc_weaknesses_avg": [ 163.66666666666666, 77.6587978846498 ], "wc_questions_avg": [ 76.0, 25.468935326524086 ], "wc_limitations_avg": [ 64.66666666666667, 61.1300980605208 ], "wc_review_avg": [ 511.6666666666667, 58.8179866669674 ], "wc_reply_reviewers_avg": [ 34.0, 7.483314773547883 ], "wc_reply_authors_avg": [ 44.0, 31.283648551066843 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13655082249348011186&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "harvard.edu;ucsb.edu;harvard.edu;harvard.edu;google.com;harvard.edu;harvard.edu;harvard.edu", "author_num": 8, "aff_unique_index": "0;1;0;0;2;0;0;0", "aff_unique_norm": "Harvard University;University of California, Santa Barbara;Google", "aff_unique_dep": ";;Google", "aff_unique_url": "https://www.harvard.edu;https://www.ucsb.edu;https://www.google.com", "aff_unique_abbr": "Harvard;UCSB;Google", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Santa Barbara;Mountain View", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "TrackIME: Enhanced Video Point Tracking via Instance Motion Estimation", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94283", "id": "eKHQbgvL3G", "proceeding": "", "pdf": "https://openreview.net/pdf?id=eKHQbgvL3G", "openreview": "https://openreview.net/forum?id=eKHQbgvL3G", "poster": "", "project": "", "author_site": "Seong Hyeon Park, Huiwon Jang, Byungwoo Jeon, Sukmin Yun, Paul Hongsuck Seo, Jinwoo Shin", "tldr": "", "abstract": "Tracking points in video frames is essential for understanding video content. However, the task is fundamentally hindered by the computation demands for brute-force correspondence matching across the frames. As the current models down-sample the frame resolutions to mitigate this challenge, they fall short in accurately representing point trajectories due to information truncation. Instead, we address the challenge by pruning the search space for point tracking and let the model process only the important regions of the frames without down-sampling. Our first key idea is to identify the object instance and its trajectory over the frames, then prune the regions of the frame that do not contain the instance. Concretely, to estimate the instance\u2019s trajectory, we track a group of points on the instance and aggregate their motion trajectories. Furthermore, to deal with the occlusions in complex scenes, we propose to compensate for the occluded points while tracking. To this end, we introduce a unified framework that jointly performs point tracking and segmentation, providing synergistic effects between the two tasks. For example, the segmentation results enable a tracking model to avoid the occluded points referring to the instance mask, and conversely, the improved tracking results can help to produce more accurate segmentation masks. Our framework can be easily incorporated with various tracking models, and we demonstrate its efficacy for enhanced point tracking throughout extensive experiments. For example, on the recent TAP-Vid benchmark, our framework consistently improves all baselines, e.g., up to 13.5% improvement on the average Jaccard metric.", "keywords": "motion estimation;point tracking;video segmentation", "primary_area": "machine_vision", "supplementary_material": "", "author": "Seong Hyeon Park;Huiwon Jang;Byungwoo Jeon;Sukmin Yun;Paul Hongsuck Seo;Jinwoo Shin", "authorids": "~Seong_Hyeon_Park2;~Huiwon_Jang1;~Byungwoo_Jeon1;~Sukmin_Yun1;~Paul_Hongsuck_Seo1;~Jinwoo_Shin1", "gender": "M;M;M;;M;M", "homepage": "https://www.shpark.org/;https://huiwon-jang.github.io/;https://rootyjeon.github.io/;https://sites.google.com/view/sukmin-yun;https://phseo.github.io;https://sites.google.com/site/mijirim/", "dblp": "260/0708;332/0647;;234/9078;172/0938;31/7062", "google_scholar": "ZRKX9B0AAAAJ;https://scholar.google.com/citations?hl=en;;fQcZ_hQAAAAJ;https://scholar.google.co.kr/citations?user=Tp7U8_UAAAAJ;https://scholar.google.com.tw/citations?user=m3eDp7kAAAAJ", "orcid": ";;;;;", "linkedin": ";huiwon-jang-5a789b250;;sukmin-yun-975b67129/;;", "or_profile": "~Seong_Hyeon_Park2;~Huiwon_Jang1;~Byungwoo_Jeon1;~Sukmin_Yun1;~Paul_Hongsuck_Seo1;~Jinwoo_Shin1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea University;Mohamed bin Zayed University of Artificial Intelligence;Korea University;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.edu;kaist.ac.kr;korea.ac.kr;mbzuai.ac.ae;korea.ac.kr;kaist.ac.kr", "position": "PhD student;PhD student;Undergrad student;Postdoc;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\npark2024trackime,\ntitle={Track{IME}: Enhanced Video Point Tracking via Instance Motion Estimation},\nauthor={Seong Hyeon Park and Huiwon Jang and Byungwoo Jeon and Sukmin Yun and Paul Hongsuck Seo and Jinwoo Shin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=eKHQbgvL3G}\n}", "github": "", "reviewers": "7a51;BSTo;jQxW", "pdf_size": 10073931, "rating": "5;6;8", "confidence": "3;4;5", "soundness": "3;3;4", "novelty": "2;3;4", "presentation": "3;3;4", "wc_summary": "35;58;599", "wc_strengths": "31;66;405", "wc_weaknesses": "76;117;41", "wc_questions": "19;121;275", "wc_limitations": "1;1;42", "wc_review": "162;363;1362", "wc_reply_reviewers": "102;177;282", "wc_reply_authors": "434;581;515", "reply_reviewers": "1;2;1", "reply_authors": "2;3;2", "rating_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 230.66666666666666, 260.62020046206874 ], "wc_strengths_avg": [ 167.33333333333334, 168.66205527293 ], "wc_weaknesses_avg": [ 78.0, 31.05908348078975 ], "wc_questions_avg": [ 138.33333333333334, 105.22779522962975 ], "wc_limitations_avg": [ 14.666666666666666, 19.3275853524323 ], "wc_review_avg": [ 629.0, 524.7647091792663 ], "wc_reply_reviewers_avg": [ 187.0, 73.824115301167 ], "wc_reply_authors_avg": [ 510.0, 60.11655346075655 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.9819805060619659, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Q5vFNbCHbmAJ:scholar.google.com/&scioq=TrackIME:+Enhanced+Video+Point+Tracking+via+Instance+Motion+Estimation&hl=en&as_sdt=0,11", "gs_version_total": 0, "email": "kaist.edu;kaist.ac.kr;korea.ac.kr;mbzuai.ac.ae;korea.ac.kr;kaist.ac.kr", "author_num": 6, "aff_unique_index": "0;0;1;2;1;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Korea University;Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": ";;", "aff_unique_url": "https://www.kaist.ac.kr;https://www.korea.ac.kr;https://mbzuai.ac.ae", "aff_unique_abbr": "KAIST;KU;MBZUAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;0", "aff_country_unique": "South Korea;United Arab Emirates" }, { "title": "Structure Consistent Gaussian Splatting with Matching Prior for Few-shot Novel View Synthesis", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94282", "id": "eKSRTlzRWG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=eKSRTlzRWG", "openreview": "https://openreview.net/forum?id=eKSRTlzRWG", "poster": "/media/PosterPDFs/NeurIPS%202024/94282.png?t=1733540708.0731292", "project": "", "author_site": "Rui Peng, Wangze Xu, Luyang Tang, levio leo, Jianbo Jiao, Ronggang Wang", "tldr": "", "abstract": "Despite the substantial progress of novel view synthesis, existing methods, either based on the Neural Radiance Fields (NeRF) or more recently 3D Gaussian Splatting (3DGS), suffer significant degradation when the input becomes sparse. Numerous efforts have been introduced to alleviate this problem, but they still struggle to synthesize satisfactory results efficiently, especially in the large scene. In this paper, we propose SCGaussian, a Structure Consistent Gaussian Splatting method using matching priors to learn 3D consistent scene structure. Considering the high interdependence of Gaussian attributes, we optimize the scene structure in two folds: rendering geometry and, more importantly, the position of Gaussian primitives, which is hard to be directly constrained in the vanilla 3DGS due to the non-structure property. To achieve this, we present a hybrid Gaussian representation. Besides the ordinary non-structure Gaussian primitives, our model also consists of ray-based Gaussian primitives that are bound to matching rays and whose optimization of their positions is restricted along the ray. Thus, we can utilize the matching correspondence to directly enforce the position of these Gaussian primitives to converge to the surface points where rays intersect. Extensive experiments on forward-facing, surrounding, and complex large scenes show the effectiveness of our approach with state-of-the-art performance and high efficiency. Code is available at https://github.com/prstrive/SCGaussian.", "keywords": "Gaussian Splatting;Few-Shot Novel View Synthesis;Structure Consistency", "primary_area": "machine_vision", "supplementary_material": "/attachment/f46a442832e1500eadce5440aa901832c8f5c9bc.zip", "author": "Rui Peng;Wangze Xu;Luyang Tang;Liwei Liao;Jianbo Jiao;Ronggang Wang", "authorids": "~Rui_Peng1;~Wangze_Xu1;~Luyang_Tang1;~Liwei_Liao1;~Jianbo_Jiao2;~Ronggang_Wang1", "gender": "M;M;F;M;;M", "homepage": "https://prstrive.github.io/;https://zezeaaa.github.io/;https://github.com/mush-room;;https://jianbojiao.com/;http://www.ece.pku.edu.cn/2014/jsjyy_0415/48.html", "dblp": ";382/2182;303/1242;371/3873;150/6622;", "google_scholar": "Hfz_H50AAAAJ;;https://scholar.google.com.hk/citations?hl=zh-CN;4YNwVmIAAAAJ;HkEiMMwAAAAJ;", "orcid": ";;;0009-0002-0255-3090;;0000-0003-0873-0465", "linkedin": ";;;;;", "or_profile": "~Rui_Peng1;~Wangze_Xu1;~Luyang_Tang1;~Liwei_Liao1;~Jianbo_Jiao2;~Ronggang_Wang1", "aff": "Peking University;Peking University;Peking University;Peking University;University of Birmingham;Peking University Shenzhen Graduate School", "aff_domain": "pku.edu.cn;stu.pku.edu.cn;pku.edu.cn;pku.edu.cn;bham.ac.uk;pkusz.edu.cn", "position": "PhD student;MS student;PhD student;PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\npeng2024structure,\ntitle={Structure Consistent Gaussian Splatting with Matching Prior for Few-shot Novel View Synthesis},\nauthor={Rui Peng and Wangze Xu and Luyang Tang and Liwei Liao and Jianbo Jiao and Ronggang Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=eKSRTlzRWG}\n}", "github": "", "reviewers": "PwZQ;nvzF;42ua;k97m", "pdf_size": 6805599, "rating": "5;5;7;7", "confidence": "4;4;5;5", "soundness": "3;3;3;2", "novelty": "3;3;3;3", "presentation": "3;3;3;2", "wc_summary": "69;50;27;81", "wc_strengths": "78;53;31;51", "wc_weaknesses": "236;125;73;221", "wc_questions": "6;5;5;150", "wc_limitations": "7;6;7;30", "wc_review": "396;239;143;533", "wc_reply_reviewers": "0;22;8;434", "wc_reply_authors": "0;45;0;419", "reply_reviewers": "0;1;1;2", "reply_authors": "1;2;1;3", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 56.75, 20.42516829796024 ], "wc_strengths_avg": [ 53.25, 16.67895380412093 ], "wc_weaknesses_avg": [ 163.75, 67.51805314136361 ], "wc_questions_avg": [ 41.5, 62.643834493108734 ], "wc_limitations_avg": [ 12.5, 10.111874208078342 ], "wc_review_avg": [ 327.75, 148.9922397307994 ], "wc_reply_reviewers_avg": [ 116.0, 183.7661557523583 ], "wc_reply_authors_avg": [ 116.0, 175.89911881530276 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17919860901495695217&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "pku.edu.cn;stu.pku.edu.cn;pku.edu.cn;pku.edu.cn;bham.ac.uk;pkusz.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;1;0", "aff_unique_norm": "Peking University;University of Birmingham", "aff_unique_dep": ";", "aff_unique_url": "http://www.pku.edu.cn;https://www.birmingham.ac.uk", "aff_unique_abbr": "Peking U;Birmingham", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;0;0;0;1;0", "aff_country_unique": "China;United Kingdom" }, { "title": "RoME: A Robust Mixed-Effects Bandit Algorithm for Optimizing Mobile Health Interventions", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94281", "id": "eKVugi5zr0", "proceeding": "", "pdf": "https://openreview.net/pdf?id=eKVugi5zr0", "openreview": "https://openreview.net/forum?id=eKVugi5zr0", "poster": "/media/PosterPDFs/NeurIPS%202024/94281.png?t=1731178355.097848", "project": "", "author_site": "Easton Huch, Jieru Shi, Madeline R Abbott, Jessica Golbus, Alexander Moreno, Walter Dempsey", "tldr": "", "abstract": "Mobile health leverages personalized and contextually tailored interventions optimized through bandit and reinforcement learning algorithms. In practice, however, challenges such as participant heterogeneity, nonstationarity, and nonlinear relationships hinder algorithm performance. We propose RoME, a **Ro**bust **M**ixed-**E**ffects contextual bandit algorithm that simultaneously addresses these challenges via (1) modeling the differential reward with user- and time-specific random effects, (2) network cohesion penalties, and (3) debiased machine learning for flexible estimation of baseline rewards. We establish a high-probability regret bound that depends solely on the dimension of the differential-reward model, enabling us to achieve robust regret bounds even when the baseline reward is highly complex. We demonstrate the superior performance of the RoME algorithm in a simulation and two off-policy evaluation studies.", "keywords": "Bandit Algorithms;Causal Inference;Supervised Learning;mHealth;Mixed-effects Modeling", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "/attachment/85a8a62629cb32bfe4cb0ad84510100bc1441033.zip", "author": "Easton Knight Huch;Jieru Shi;Madeline R Abbott;Jessica R Golbus;Alexander Moreno;Walter H. Dempsey", "authorids": "~Easton_Knight_Huch1;~Jieru_Shi1;~Madeline_R_Abbott1;~Jessica_R_Golbus1;~Alexander_Moreno1;~Walter_H._Dempsey1", "gender": "M;F;;F;M;M", "homepage": ";https://herashi.github.io/;;;;", "dblp": ";;;;161/6588;203/4443", "google_scholar": ";https://scholar.google.com/citations?hl=zh-TW;;;zoqP2-IAAAAJ;TJ5P970AAAAJ", "orcid": "0000-0002-3822-1883;0000-0003-1108-1558;;0000-0002-9538-3926;;", "linkedin": "eastonhuch;;;;;", "or_profile": "~Easton_Knight_Huch1;~Jieru_Shi1;~Madeline_R_Abbott1;~Jessica_R_Golbus1;~Alexander_Moreno1;~Walter_H._Dempsey1", "aff": "University of Michigan - Ann Arbor;University of Michigan - Ann Arbor;;University of Michigan - Ann Arbor;STR;University of Michigan - Ann Arbor", "aff_domain": "umich.edu;umich.edu;;umich.edu;str.edu;umich.edu", "position": "PhD student;PhD student;;Instructor;Researcher;Associate Professor", "bibtex": "@inproceedings{\nhuch2024rome,\ntitle={Ro{ME}: A Robust Mixed-Effects Bandit Algorithm for Optimizing Mobile Health Interventions},\nauthor={Easton Knight Huch and Jieru Shi and Madeline R Abbott and Jessica R Golbus and Alexander Moreno and Walter H. Dempsey},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=eKVugi5zr0}\n}", "github": "", "reviewers": "SKyx;JBD9;HodX;cFGH", "pdf_size": 1651926, "rating": "3;7;7;7", "confidence": "4;4;5;4", "soundness": "3;4;2;4", "novelty": "2;3;2;4", "presentation": "2;3;3;3", "wc_summary": "29;68;68;116", "wc_strengths": "32;78;182;102", "wc_weaknesses": "111;40;136;49", "wc_questions": "41;252;168;52", "wc_limitations": "2;10;82;109", "wc_review": "215;448;636;428", "wc_reply_reviewers": "0;30;28;0", "wc_reply_authors": "0;24;19;0", "reply_reviewers": "0;1;1;0", "reply_authors": "1;2;2;1", "rating_avg": [ 6.0, 1.7320508075688772 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 70.25, 30.84132779242813 ], "wc_strengths_avg": [ 98.5, 54.37600573782521 ], "wc_weaknesses_avg": [ 84.0, 40.601724101323576 ], "wc_questions_avg": [ 128.25, 87.06427223609005 ], "wc_limitations_avg": [ 50.75, 45.84416538666616 ], "wc_review_avg": [ 431.75, 149.1448540848795 ], "wc_reply_reviewers_avg": [ 14.5, 14.517231140957975 ], "wc_reply_authors_avg": [ 10.75, 10.894379284750462 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:OxyLkMZepSQJ:scholar.google.com/&scioq=RoME:+A+Robust+Mixed-Effects+Bandit+Algorithm+for+Optimizing+Mobile+Health+Interventions&hl=en&as_sdt=0,33", "gs_version_total": 0, "email": "umich.edu;umich.edu;;umich.edu;str.edu;umich.edu", "author_num": 6, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Michigan;", "aff_unique_dep": ";", "aff_unique_url": "https://www.umich.edu;", "aff_unique_abbr": "UM;", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Ann Arbor;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States;" }, { "id": "eM3Wzs6Unt", "title": "Learning Variational Temporal Abstraction Embeddings in Option-Induced MDPs", "track": "main", "status": "Reject", "tldr": "", "abstract": "The option framework in hierarchical reinforcement learning has notably advanced the automatic discovery of temporally-extended actions from long-horizon tasks. However, existing methods often struggle with ineffective exploration and unstable updates when learning action and option policies simultaneously. Addressing these challenges, we introduce the Variational Markovian Option Critic (VMOC), an off-policy algorithm with provable convergence that employs variational inference to stabilize updates. VMOC naturally integrates maximum entropy intrinsic rewards to promote the exploration of diverse and effective options. Furthermore, we adopt low-cost option embeddings instead of traditional, computationally expensive option tuples, enhancing scalability and expressiveness. Extensive experiments in challenging Mujoco environments validate VMOC\u2019s superior performance over existing on-policy and off-policy methods, demonstrating its effectiveness in learning coherent and diverse option sets suitable for complex tasks.", "keywords": "reinforcement learning;option framework", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/735ea0d32ccbc2dd295d5be6e37b83fa7518d604.zip", "author": "Chang Li;Xiaodong He", "authorids": "~Chang_Li5;~Xiaodong_He1", "gender": "M;M", "homepage": "https://github.com/spacegoing;", "dblp": ";03/3923-1", "google_scholar": ";W5WbqgoAAAAJ", "orcid": "0000-0002-9295-1254;", "linkedin": ";", "or_profile": "~Chang_Li5;~Xiaodong_He1", "aff": ";JD AI Research", "aff_domain": ";jd.com", "position": ";Director", "bibtex": "@misc{\nanonymous2024learning,\ntitle={Learning Variational Temporal Abstraction Embeddings in Option-Induced {MDP}s},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=eM3Wzs6Unt}\n}", "github": "", "project": "", "reviewers": "AsEd;8cLu;WKv9;w5JT", "site": "https://openreview.net/forum?id=eM3Wzs6Unt", "pdf_size": 8585494, "rating": "3;4;4;6", "confidence": "4;3;3;3", "soundness": "3;2;3;3", "novelty": "1;1;2;3", "presentation": "2;2;1;3", "wc_summary": "130;72;125;55", "wc_strengths": "47;23;19;85", "wc_weaknesses": "533;93;248;283", "wc_questions": "52;23;155;3", "wc_limitations": "18;11;47;5", "wc_review": "780;222;594;431", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 4.25, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 1.75, 0.82915619758885 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 95.5, 32.60751447136074 ], "wc_strengths_avg": [ 43.5, 26.24404694402142 ], "wc_weaknesses_avg": [ 289.25, 157.85178966359553 ], "wc_questions_avg": [ 58.25, 58.512285034854 ], "wc_limitations_avg": [ 20.25, 16.11482236948332 ], "wc_review_avg": [ 506.75, 205.60809201001794 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.6622661785325219, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:KfgH_ow7OO8J:scholar.google.com/&scioq=Learning+Variational+Temporal+Abstraction+Embeddings+in+Option-Induced+MDPs&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "JD", "aff_unique_dep": "JD AI Research", "aff_unique_url": "https://www.jd.com", "aff_unique_abbr": "JD AI", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "title": "GeoLRM: Geometry-Aware Large Reconstruction Model for High-Quality 3D Gaussian Generation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94280", "id": "eM5d7ZmekA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=eM5d7ZmekA", "openreview": "https://openreview.net/forum?id=eM5d7ZmekA", "poster": "/media/PosterPDFs/NeurIPS%202024/94280.png?t=1731671404.0735571", "project": "", "author_site": "Chubin Zhang, Hongliang Song, Yi Wei, Chen Yu, Jiwen Lu, Yansong Tang", "tldr": "", "abstract": "In this work, we introduce the Geometry-Aware Large Reconstruction Model (GeoLRM), an approach which can predict high-quality assets with 512k Gaussians and 21 input images in only 11 GB GPU memory. Previous works neglect the inherent sparsity of 3D structure and do not utilize explicit geometric relationships between 3D and 2D images. This limits these methods to a low-resolution representation and makes it difficult to scale up to the dense views for better quality. GeoLRM tackles these issues by incorporating a novel 3D-aware transformer structure that directly processes 3D points and uses deformable cross-attention mechanisms to effectively integrate image features into 3D representations. We implement this solution through a two-stage pipeline: initially, a lightweight proposal network generates a sparse set of 3D anchor points from the posed image inputs; subsequently, a specialized reconstruction transformer refines the geometry and retrieves textural details. Extensive experimental results demonstrate that GeoLRM significantly outperforms existing models, especially for dense view inputs. We also demonstrate the practical applicability of our model with 3D generation tasks, showcasing its versatility and potential for broader adoption in real-world applications. The project page: https://linshan-bin.github.io/GeoLRM/.", "keywords": "Large reconstruction model;3D generation;3D gaussian splatting", "primary_area": "generative_models", "supplementary_material": "", "author": "Chubin Zhang;Hongliang Song;Yi Wei;Chen Yu;Jiwen Lu;Yansong Tang", "authorids": "~Chubin_Zhang1;~Hongliang_Song1;~Yi_Wei1;~Chen_Yu14;~Jiwen_Lu1;~Yansong_Tang1", "gender": "M;M;M;M;M;M", "homepage": "https://github.com/LinShan-Bin;https://github.com/EdgarSong;https://weiyithu.github.io/;;http://ivg.au.tsinghua.edu.cn/Jiwen_Lu/;https://andytang15.github.io/", "dblp": "344/0810;;;;http://dblp.uni-trier.de/pers/hd/l/Lu:Jiwen;214/9568", "google_scholar": "etopZM4AAAAJ;;https://scholar.google.com/citations?hl=zh-CN;;TN8uDQoAAAAJ;TIbistUAAAAJ", "orcid": "0009-0007-7598-6432;;;;0000-0002-6121-5529;", "linkedin": ";;;yu-chen-69b22311/;;", "or_profile": "~Chubin_Zhang1;~Hongliang_Song1;~Yi_Wei1;~Chen_Yu14;~Jiwen_Lu1;~Yansong_Tang1", "aff": "Tsinghua University;Alibaba Group;Automation, Tsinghua University, Tsinghua University;Alibaba Group;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;alibaba-inc.com;mails.tsinghua.edu.cn;alibaba-inc.com;tsinghua.edu.cn;tsinghua.edu.cn", "position": "Undergrad student;Researcher;PhD student;senior algorithm expert;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhang2024geolrm,\ntitle={Geo{LRM}: Geometry-Aware Large Reconstruction Model for High-Quality 3D Gaussian Generation},\nauthor={Chubin Zhang and Hongliang Song and Yi Wei and Chen Yu and Jiwen Lu and Yansong Tang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=eM5d7ZmekA}\n}", "github": "", "reviewers": "7HGK;hKRc;DmmG", "pdf_size": 4633424, "rating": "4;4;7", "confidence": "5;4;5", "soundness": "1;2;4", "novelty": "2;2;4", "presentation": "2;3;3", "wc_summary": "92;53;202", "wc_strengths": "54;33;115", "wc_weaknesses": "239;296;147", "wc_questions": "97;26;101", "wc_limitations": "10;4;1", "wc_review": "492;412;566", "wc_reply_reviewers": "127;80;0", "wc_reply_authors": "203;9;0", "reply_reviewers": "1;1;0", "reply_authors": "2;2;1", "rating_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 1.247219128924647 ], "novelty_avg": [ 2.6666666666666665, 0.9428090415820634 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 115.66666666666667, 63.089002387280004 ], "wc_strengths_avg": [ 67.33333333333333, 34.77866523539332 ], "wc_weaknesses_avg": [ 227.33333333333334, 61.38584708401622 ], "wc_questions_avg": [ 74.66666666666667, 34.451253807211266 ], "wc_limitations_avg": [ 5.0, 3.7416573867739413 ], "wc_review_avg": [ 490.0, 62.88614049746308 ], "wc_reply_reviewers_avg": [ 69.0, 52.42772803266099 ], "wc_reply_authors_avg": [ 70.66666666666667, 93.64590517238622 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7336568443676765153&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 5, "email": "tsinghua.edu.cn;alibaba-inc.com;mails.tsinghua.edu.cn;alibaba-inc.com;tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 6, "aff_unique_index": "0;1;0;1;0;0", "aff_unique_norm": "Tsinghua University;Alibaba Group", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.alibaba.com", "aff_unique_abbr": "THU;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "First-Order Methods for Linearly Constrained Bilevel Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94279", "id": "eNCYpTCGhr", "proceeding": "", "pdf": "https://openreview.net/pdf?id=eNCYpTCGhr", "openreview": "https://openreview.net/forum?id=eNCYpTCGhr", "poster": "", "project": "", "author_site": "Guy Kornowski, Swati Padmanabhan, Kai Wang, Zhe Zhang, Suvrit Sra", "tldr": "", "abstract": "Algorithms for bilevel optimization often encounter Hessian computations, which are prohibitive in high dimensions. While recent works offer first-order methods for unconstrained bilevel problems, the constrained setting remains relatively underexplored. We present first-order linearly constrained optimization methods with finite-time hypergradient stationarity guarantees. For linear equality constraints, we attain $\\epsilon$-stationarity in $\\widetilde{O}(\\epsilon^{-2})$ gradient oracle calls, which is nearly-optimal. \nFor linear inequality constraints, we attain $(\\delta,\\epsilon)$-Goldstein stationarity in $\\widetilde{O}(d{\\delta^{-1} \\epsilon^{-3}})$ gradient oracle calls, where $d$ is the upper-level dimension. \nFinally, we obtain for the linear inequality setting dimension-free rates of $\\widetilde{O}({\\delta^{-1} \\epsilon^{-4}})$ oracle complexity under the additional assumption of oracle access to the optimal dual variable. Along the way, we develop new nonsmooth nonconvex optimization methods with inexact oracles. Our numerical experiments verify these guarantees.", "keywords": "Constrained bilevel optimization;first-order methods;nonsmooth nonconvex optimization;bilevel optimization;nonconvex optimization;nonsmooth optimization;Goldstein stationarity", "primary_area": "optimization", "supplementary_material": "/attachment/ba5fe8eb68c8091a3854dcbb681d88e8c6f5be8b.zip", "author": "Guy Kornowski;Swati Padmanabhan;Kai Wang;Zhe Zhang;Suvrit Sra", "authorids": "~Guy_Kornowski1;~Swati_Padmanabhan1;~Kai_Wang5;~Zhe_Zhang6;~Suvrit_Sra1", "gender": ";F;M;M;", "homepage": ";https://web.mit.edu/pswt/www/;https://guaguakai.github.io/;https://www.isye.gatech.edu/users/zhe-zhang;https://optml.mit.edu", "dblp": "276/7550;237/9510;78/2022-40;;90/930", "google_scholar": ";https://scholar.google.com/citations?hl=en;gGSsQmsAAAAJ;;eyCw9goAAAAJ", "orcid": "0000-0001-8058-2909;;0000-0002-2446-987X;;", "linkedin": ";;guaguakai/;;", "or_profile": "~Guy_Kornowski1;~Swati_Padmanabhan1;~Kai_Wang5;~Zhe_Zhang6;~Suvrit_Sra1", "aff": "Weizmann Institute of Science;University of Washington;Georgia Institute of Technology;;Massachusetts Institute of Technology", "aff_domain": "weizmann.ac.il;uw.edu;gatech.edu;;mit.edu", "position": "PhD student;PhD student;Assistant Professor;;Associate Professor", "bibtex": "@inproceedings{\nkornowski2024firstorder,\ntitle={First-Order Methods for Linearly Constrained Bilevel Optimization},\nauthor={Guy Kornowski and Swati Padmanabhan and Kai Wang and Zhe Zhang and Suvrit Sra},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=eNCYpTCGhr}\n}", "github": "", "reviewers": "c7kZ;GkWW;wH2u;pjir", "pdf_size": 986834, "rating": "6;6;6;7", "confidence": "4;3;2;3", "soundness": "3;4;3;3", "novelty": "4;4;3;3", "presentation": "2;4;3;4", "wc_summary": "78;82;46;110", "wc_strengths": "95;107;23;160", "wc_weaknesses": "182;131;18;52", "wc_questions": "270;197;58;47", "wc_limitations": "58;20;5;9", "wc_review": "683;537;150;378", "wc_reply_reviewers": "235;46;17;16", "wc_reply_authors": "621;0;0;0", "reply_reviewers": "4;1;1;1", "reply_authors": "4;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 79.0, 22.693611435820433 ], "wc_strengths_avg": [ 96.25, 48.853735783458774 ], "wc_weaknesses_avg": [ 95.75, 64.4995155020563 ], "wc_questions_avg": [ 143.0, 94.1886405040438 ], "wc_limitations_avg": [ 23.0, 20.940391591371924 ], "wc_review_avg": [ 437.0, 197.7157049907771 ], "wc_reply_reviewers_avg": [ 78.5, 91.15508762543098 ], "wc_reply_authors_avg": [ 155.25, 268.9008878750682 ], "reply_reviewers_avg": [ 1.75, 1.299038105676658 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5297158121772609123&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "weizmann.ac.il;uw.edu;gatech.edu;;mit.edu", "author_num": 5, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Weizmann Institute of Science;University of Washington;Georgia Institute of Technology;Massachusetts Institute of Technology", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.weizmann.org.il;https://www.washington.edu;https://www.gatech.edu;https://web.mit.edu", "aff_unique_abbr": "Weizmann;UW;Georgia Tech;MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "Israel;United States" }, { "title": "Get rich quick: exact solutions reveal how unbalanced initializations promote rapid feature learning", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94278", "id": "eNM94i7R3A", "proceeding": "", "pdf": "https://openreview.net/pdf?id=eNM94i7R3A", "openreview": "https://openreview.net/forum?id=eNM94i7R3A", "poster": "/media/PosterPDFs/NeurIPS%202024/94278.png?t=1731370768.1641998", "project": "", "author_site": "Daniel Kunin, Allan Ravent\u00f3s, Cl\u00e9mentine Domin\u00e9, Feng Chen, David Klindt, Andrew Saxe, Surya Ganguli", "tldr": "", "abstract": "While the impressive performance of modern neural networks is often attributed to their capacity to efficiently extract task-relevant features from data, the mechanisms underlying this *rich feature learning regime* remain elusive, with much of our theoretical understanding stemming from the opposing *lazy regime*. In this work, we derive exact solutions to a minimal model that transitions between lazy and rich learning, precisely elucidating how unbalanced *layer-specific* initialization variances and learning rates determine the degree of feature learning. Our analysis reveals that they conspire to influence the learning regime through a set of conserved quantities that constrain and modify the geometry of learning trajectories in parameter and function space. We extend our analysis to more complex linear models with multiple neurons, outputs, and layers and to shallow nonlinear networks with piecewise linear activation functions. In linear networks, rapid feature learning only occurs from balanced initializations, where all layers learn at similar speeds. While in nonlinear networks, unbalanced initializations that promote faster learning in earlier layers can accelerate rich learning. Through a series of experiments, we provide evidence that this unbalanced rich regime drives feature learning in deep finite-width networks, promotes interpretability of early layers in CNNs, reduces the sample complexity of learning hierarchical data, and decreases the time to grokking in modular arithmetic. Our theory motivates further exploration of unbalanced initializations to enhance efficient feature learning.", "keywords": "feature learning;rich regime;lazy regime;exact solutions;conserved quantities;balanced initialization;neural tangent kernel;grokking", "primary_area": "learning_theory", "supplementary_material": "/attachment/ec12bfeb61efe10b33a4da8cc1f4eea22604b3bb.zip", "author": "Daniel Kunin;Allan Raventos;Cl\u00e9mentine Carla Juliette Domin\u00e9;Feng Chen;David Klindt;Andrew M Saxe;Surya Ganguli", "authorids": "~Daniel_Kunin1;~Allan_Raventos1;~Cl\u00e9mentine_Carla_Juliette_Domin\u00e91;~Feng_Chen13;~David_Klindt1;~Andrew_M_Saxe1;~Surya_Ganguli1", "gender": ";;F;M;;M;M", "homepage": "https://daniel-kunin.com/;;https://clementinedomine.github.io;;;https://www.saxelab.org;http://ganguli-gang.stanford.edu/surya.html", "dblp": "234/8632;232/1768;346/1036;21/3047-46.html;;39/6894;56/10453", "google_scholar": "qilW2ZMAAAAJ;qqtCGpQAAAAJ;oVZ0fSYAAAAJ;;;h0Al1fcAAAAJ;", "orcid": ";;;0000-0002-8645-7356;;0000-0002-9831-8812;", "linkedin": ";;clementine-domine-75a6a2150/;feng-chen-a0286a185;;;", "or_profile": "~Daniel_Kunin1;~Allan_Raventos1;~Cl\u00e9mentine_Carla_Juliette_Domin\u00e91;~Feng_Chen13;~David_Klindt1;~Andrew_M_Saxe1;~Surya_Ganguli1", "aff": ";Stanford University;University College London, University of London;Stanford University;;University College London, University of London;Stanford University", "aff_domain": ";stanford.edu;ucl.ac.uk;stanford.edu;;ucl.ac.uk;@stanford.edu", "position": ";PhD student;PhD student;PhD student;;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nkunin2024get,\ntitle={Get rich quick: exact solutions reveal how unbalanced initializations promote rapid feature learning},\nauthor={Daniel Kunin and Allan Raventos and Cl{\\'e}mentine Carla Juliette Domin{\\'e} and Feng Chen and David Klindt and Andrew M Saxe and Surya Ganguli},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=eNM94i7R3A}\n}", "github": "", "reviewers": "JJEZ;K8a2;KNvb;hh3v", "pdf_size": 3692685, "rating": "6;7;7;8", "confidence": "4;3;3;4", "soundness": "3;3;3;4", "novelty": "3;3;3;3", "presentation": "4;3;2;4", "wc_summary": "172;42;18;74", "wc_strengths": "123;71;61;131", "wc_weaknesses": "274;68;325;72", "wc_questions": "2;86;4;92", "wc_limitations": "1;30;1;6", "wc_review": "572;297;409;375", "wc_reply_reviewers": "153;147;39;11", "wc_reply_authors": "207;213;28;9", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 76.5, 58.606740226700886 ], "wc_strengths_avg": [ 96.5, 30.834234221073174 ], "wc_weaknesses_avg": [ 184.75, 116.16663677665804 ], "wc_questions_avg": [ 46.0, 43.05810028322197 ], "wc_limitations_avg": [ 9.5, 12.010412149464313 ], "wc_review_avg": [ 413.25, 100.24563581523138 ], "wc_reply_reviewers_avg": [ 87.5, 63.314690238522054 ], "wc_reply_authors_avg": [ 114.25, 96.00878866020548 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4219973060499875628&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": ";stanford.edu;ucl.ac.uk;stanford.edu;;ucl.ac.uk;@stanford.edu", "author_num": 7, "aff_unique_index": "0;1;0;1;0", "aff_unique_norm": "Stanford University;University College London", "aff_unique_dep": ";", "aff_unique_url": "https://www.stanford.edu;https://www.ucl.ac.uk", "aff_unique_abbr": "Stanford;UCL", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;1;0;1;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Flatten Anything: Unsupervised Neural Surface Parameterization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94277", "id": "eNeqGc9AgR", "proceeding": "", "pdf": "https://openreview.net/pdf?id=eNeqGc9AgR", "openreview": "https://openreview.net/forum?id=eNeqGc9AgR", "poster": "/media/PosterPDFs/NeurIPS%202024/94277.png?t=1730259464.3447073", "project": "", "author_site": "Qijian Zhang, Junhui Hou, Wenping Wang, Ying He", "tldr": "", "abstract": "Surface parameterization plays an essential role in numerous computer graphics and geometry processing applications. Traditional parameterization approaches are designed for high-quality meshes laboriously created by specialized 3D modelers, thus unable to meet the processing demand for the current explosion of ordinary 3D data. Moreover, their working mechanisms are typically restricted to certain simple topologies, thus relying on cumbersome manual efforts (e.g., surface cutting, part segmentation) for pre-processing. In this paper, we introduce the Flatten Anything Model (FAM), an unsupervised neural architecture to achieve global free-boundary surface parameterization via learning point-wise mappings between 3D points on the target geometric surface and adaptively-deformed UV coordinates within the 2D parameter domain. To mimic the actual physical procedures, we ingeniously construct geometrically-interpretable sub-networks with specific functionalities of surface cutting, UV deforming, unwrapping, and wrapping, which are assembled into a bi-directional cycle mapping framework. Compared with previous methods, our FAM directly operates on discrete surface points without utilizing connectivity information, thus significantly reducing the strict requirements for mesh quality and even applicable to unstructured point cloud data. More importantly, our FAM is fully-automated without the need for pre-cutting and can deal with highly-complex topologies, since its learning process adaptively finds reasonable cutting seams and UV boundaries. Extensive experiments demonstrate the universality, superiority, and inspiring potential of our proposed neural surface parameterization paradigm. Our code is available at https://github.com/keeganhk/FlattenAnything.", "keywords": "Surface Parameterization;UV Unwrapping;Neural Network;Unsupervised Learning", "primary_area": "machine_vision", "supplementary_material": "", "author": "Qijian Zhang;Junhui Hou;Wenping Wang;Ying He", "authorids": "~Qijian_Zhang1;~Junhui_Hou2;~Wenping_Wang1;~Ying_He1", "gender": "M;M;M;M", "homepage": "https://keeganhk.github.io/;http://www.cityu.edu.hk/stfprofile/csjhhou.htm;https://engineering.tamu.edu/cse/profiles/Wang-Wenping.html;https://personal.ntu.edu.sg/yhe/", "dblp": "201/6850.html;122/2673.html;;h/YingHe1", "google_scholar": "4NIiTYgAAAAJ;j6eefhwAAAAJ;28shvv0AAAAJ;ISNmBxwAAAAJ", "orcid": "0000-0003-4723-6136;0000-0003-3431-2021;0000-0002-2284-3952;0000-0002-6749-4485", "linkedin": ";;;", "or_profile": "~Qijian_Zhang1;~Junhui_Hou2;~Wenping_Wang1;~Ying_He1", "aff": "City University of Hong Kong;City University of Hong Kong;Texas A&M University - College Station;Nanyang Technological University", "aff_domain": "cityu.edu.hk;cityu.edu.hk;tamu.edu;ntu.edu.sg", "position": "PhD student;Assistant Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nzhang2024flatten,\ntitle={Flatten Anything: Unsupervised Neural Surface Parameterization},\nauthor={Qijian Zhang and Junhui Hou and Wenping Wang and Ying He},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=eNeqGc9AgR}\n}", "github": "", "reviewers": "Hnxu;4cq8;eo5W;FN2a", "pdf_size": 6495064, "rating": "5;6;6;8", "confidence": "4;4;4;4", "soundness": "3;3;3;4", "novelty": "3;3;3;4", "presentation": "2;3;2;3", "wc_summary": "76;111;152;126", "wc_strengths": "71;65;91;115", "wc_weaknesses": "199;17;183;125", "wc_questions": "7;87;70;177", "wc_limitations": "10;14;9;41", "wc_review": "363;294;505;584", "wc_reply_reviewers": "16;0;31;35", "wc_reply_authors": "73;0;34;34", "reply_reviewers": "1;0;1;1", "reply_authors": "3;1;2;2", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 116.25, 27.48067502809929 ], "wc_strengths_avg": [ 85.5, 19.56399754651385 ], "wc_weaknesses_avg": [ 131.0, 71.34423592694787 ], "wc_questions_avg": [ 85.25, 60.77982806820039 ], "wc_limitations_avg": [ 18.5, 13.124404748406688 ], "wc_review_avg": [ 436.5, 114.18953542247205 ], "wc_reply_reviewers_avg": [ 20.5, 13.793114224133722 ], "wc_reply_authors_avg": [ 35.25, 25.839649765428323 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1968059950046200492&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "cityu.edu.hk;cityu.edu.hk;tamu.edu;ntu.edu.sg", "author_num": 4, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "City University of Hong Kong;Texas A&M University;Nanyang Technological University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cityu.edu.hk;https://www.tamu.edu;https://www.ntu.edu.sg", "aff_unique_abbr": "CityU;TAMU;NTU", "aff_campus_unique_index": "0;0;1", "aff_campus_unique": "Hong Kong SAR;College Station;", "aff_country_unique_index": "0;0;1;2", "aff_country_unique": "China;United States;Singapore" }, { "title": "StreamingDialogue: Prolonged Dialogue Learning via Long Context Compression with Minimal Losses", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94276", "id": "eNvVjpx97O", "proceeding": "", "pdf": "https://openreview.net/pdf?id=eNvVjpx97O", "openreview": "https://openreview.net/forum?id=eNvVjpx97O", "poster": "/media/PosterPDFs/NeurIPS%202024/94276.png?t=1731261055.4364827", "project": "", "author_site": "JIANAN LI, Quan Tu, Cunli Mao, Zhengtao Yu, Ji-Rong Wen, Rui Yan", "tldr": "", "abstract": "Standard Large Language Models (LLMs) struggle with handling dialogues with long contexts due to efficiency and consistency issues. According to our observation, dialogue contexts are highly structured, and the special token of End-of-Utterance (EoU) in dialogues has the potential to aggregate information. We refer to the EoU tokens as ``conversational attention sinks'' (conv-attn sinks). Accordingly, we introduce StreamingDialogue, which compresses long dialogue history into conv-attn sinks with minimal losses, and thus reduces computational complexity quadratically with the number of sinks (i.e., the number of utterances). Current LLMs already demonstrate the ability to handle long context window, e.g., a window size of 200K or more. To this end, by compressing utterances into EoUs, our method has the potential to handle more than 200K of utterances, resulting in a prolonged dialogue learning. In order to minimize information losses from reconstruction after compression, we design two learning strategies of short-memory reconstruction (SMR) and long-memory reactivation (LMR). Our method outperforms strong baselines in dialogue tasks and achieves a 4 $\\times$ speedup while reducing memory usage by 18 $\\times$ compared to dense attention recomputation.", "keywords": "dialogue compression;conversational attention sinks;memory", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/5732d4e6bc23feeb7a6c60237b36ead6827d9d55.zip", "author": "Jia-Nan Li;Quan Tu;Cunli Mao;Zhengtao Yu;Ji-Rong Wen;Rui Yan", "authorids": "~Jia-Nan_Li1;~Quan_Tu2;~Cunli_Mao1;~Zhengtao_Yu2;~Ji-Rong_Wen1;~Rui_Yan2", "gender": ";M;M;M;M;M", "homepage": ";http://xzy.kmust.edu.cn/info/1127/1559.htm;http://rsc.kmust.edu.cn/info/1181/1081.htm;https://gsai.ruc.edu.cn/english/jrwen;https://gsai.ruc.edu.cn/english/ruiyan;https://github.com/morecry", "dblp": "372/2705;35/2229.html;03/6757;w/JRWen;19/2405-1;269/5183.html", "google_scholar": ";;;tbxCHJgAAAAJ;eLw6g-UAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": "0009-0000-3529-7869;;0000-0001-8952-8984;0000-0002-9777-9676;0000-0002-3356-6823;0009-0005-8806-247X", "linkedin": "li-jianan/;;;;;", "or_profile": "~Jia-Nan_Li1;~Cunli_Mao1;~Zhengtao_Yu2;~Ji-Rong_Wen1;~Rui_Yan2;~quan_Tu1", "aff": "Renmin University of China;Kunmimg University of Science and Technology;Kunming University of Science and Technology;Renmin University of China;Renmin University of China;Renmin University of China", "aff_domain": "ruc.edu.cn;kmust.edu.cn;kmust.edu.cn;ruc.edu.cn;ruc.edu.cn;ruc.edu.cn", "position": "PhD student;Full Professor;Full Professor;Full Professor;Associate Professor;PhD student", "bibtex": "@inproceedings{\nli2024streamingdialogue,\ntitle={StreamingDialogue: Prolonged Dialogue Learning via Long Context Compression with Minimal Losses},\nauthor={Jia-Nan Li and Quan Tu and Cunli Mao and Zhengtao Yu and Ji-Rong Wen and Rui Yan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=eNvVjpx97O}\n}", "github": "", "reviewers": "wRZP;7W1D;4bTd", "pdf_size": 1113767, "rating": "5;6;7", "confidence": "4;4;4", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "2;3;3", "wc_summary": "92;158;111", "wc_strengths": "72;118;41", "wc_weaknesses": "117;259;37", "wc_questions": "72;89;124", "wc_limitations": "16;1;32", "wc_review": "369;625;345", "wc_reply_reviewers": "0;210;156", "wc_reply_authors": "139;379;516", "reply_reviewers": "0;3;2", "reply_authors": "3;4;3", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 120.33333333333333, 27.740864362084242 ], "wc_strengths_avg": [ 77.0, 31.63331577098213 ], "wc_weaknesses_avg": [ 137.66666666666666, 91.80171869608735 ], "wc_questions_avg": [ 95.0, 21.64871050817269 ], "wc_limitations_avg": [ 16.333333333333332, 12.657891697365017 ], "wc_review_avg": [ 446.3333333333333, 126.71577995217837 ], "wc_reply_reviewers_avg": [ 122.0, 89.03931715820826 ], "wc_reply_authors_avg": [ 344.6666666666667, 155.81256546105502 ], "reply_reviewers_avg": [ 1.6666666666666667, 1.247219128924647 ], "reply_authors_avg": [ 3.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=63507550590047513&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "ruc.edu.cn;kmust.edu.cn;kmust.edu.cn;ruc.edu.cn;ruc.edu.cn;ruc.edu.cn", "author_num": 6, "aff_unique_index": "0;1;1;0;0;0", "aff_unique_norm": "Renmin University of China;Kunming University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "http://www.ruc.edu.cn;http://www.kust.edu.cn", "aff_unique_abbr": "RUC;", "aff_campus_unique_index": "1", "aff_campus_unique": ";Kunming", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "AutoPSV: Automated Process-Supervised Verifier", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94275", "id": "eOAPWWOGs9", "proceeding": "", "pdf": "https://openreview.net/pdf?id=eOAPWWOGs9", "openreview": "https://openreview.net/forum?id=eOAPWWOGs9", "poster": "/media/PosterPDFs/NeurIPS%202024/94275.png?t=1731480793.513281", "project": "", "author_site": "Jianqiao Lu, Zhiyang Dou, Hongru WANG, Zeyu Cao, Jianbo Dai, Yunlong Feng, Zhijiang Guo", "tldr": "", "abstract": "In this work, we propose a novel method named \\textbf{Auto}mated \\textbf{P}rocess-\\textbf{S}upervised \\textbf{V}erifier (\\textbf{\\textsc{AutoPSV}}) to enhance the reasoning capabilities of large language models (LLMs) by automatically annotating the reasoning steps.\n\\textsc{AutoPSV} begins by training a verification model on the correctness of final answers, enabling it to generate automatic process annotations. \nThis verification model assigns a confidence score to each reasoning step, indicating the probability of arriving at the correct final answer from that point onward.\nWe detect relative changes in the verification's confidence scores across reasoning steps to automatically annotate the reasoning process, enabling error detection even in scenarios where ground truth answers are unavailable. \nThis alleviates the need for numerous manual annotations or the high computational costs associated with model-induced annotation approaches.\nWe experimentally validate that the step-level confidence changes learned by the verification model trained on the final answer correctness can effectively identify errors in the reasoning steps.\nWe demonstrate that the verification model, when trained on process annotations generated by \\textsc{AutoPSV}, exhibits improved performance in selecting correct answers from multiple LLM-generated outputs.\nNotably, we achieve substantial improvements across five datasets in mathematics and commonsense reasoning. The source code of \\textsc{AutoPSV} is available at \\url{https://github.com/rookie-joe/AutoPSV}.", "keywords": "Large Language Models;Math Reasoning;Commonsense Reasoning;Automatic Process Annotation;Multi-step Reasoning", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/32a733cf18db76494147d4327c3909b33eb0c974.zip", "author": "Jianqiao Lu;Zhiyang Dou;Hongru WANG;Zeyu Cao;Jianbo Dai;Yunlong Feng;Zhijiang Guo", "authorids": "~Jianqiao_Lu1;~Zhiyang_Dou1;~Hongru_WANG1;~Zeyu_Cao1;~Jianbo_Dai1;~Yunlong_Feng3;~Zhijiang_Guo2", "gender": "M;M;M;;M;M;M", "homepage": "https://jianqiaolu.github.io/;https://frank-zy-dou.github.io/;https://rulegreen.github.io/;https://caojamie.io;https://1e0ndavid.github.io/;https://scholar.google.com/citations?user=P8lNSfUAAAAJ&hl=en;https://cartus.github.io/", "dblp": "358/4791;251/3224;72/1462-3;;178/2976;;43/6147", "google_scholar": "uIW6d6AAAAAJ;SLRYlKsAAAAJ;s6UtVYUAAAAJ;MiIz8tYAAAAJ;jU1A5BYAAAAJ;P8lNSfUAAAAJ;8b-u3icAAAAJ", "orcid": ";0000-0003-0186-8269;0000-0001-5027-0138;;0009-0005-3183-888X;;", "linkedin": "jianqiao-lu-308620201/;zhiyang-dou-0259111b3/;;;jianbo-dai-9085a2161/;;", "or_profile": "~Jianqiao_Lu1;~Zhiyang_Dou1;~Hongru_WANG1;~Zeyu_Cao1;~Jianbo_Dai1;~Yunlong_Feng3;~Zhijiang_Guo2", "aff": "University of Hong Kong;University of Hong Kong;University of Edinburgh;University of Cambridge;Huawei Technologies Ltd.;Harbin Institute of Technology;University of Cambridge", "aff_domain": "hku.hk;cs.hku.hk;ed.ac.uk;cam.ac.uk;huawei.com;hit.edu.cn;cam.ac.uk", "position": "PhD student;MPhil;Visiting Student;MS student;Researcher;PhD student;Postdoc", "bibtex": "@inproceedings{\nlu2024autopsv,\ntitle={Auto{PSV}: Automated Process-Supervised Verifier},\nauthor={Jianqiao Lu and Zhiyang Dou and Hongru WANG and Zeyu Cao and Jianbo Dai and Yunlong Feng and Zhijiang Guo},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=eOAPWWOGs9}\n}", "github": "", "reviewers": "rE8z;hKWR;kK32", "pdf_size": 507736, "rating": "6;7;7", "confidence": "4;4;3", "soundness": "3;2;3", "novelty": "2;2;3", "presentation": "3;2;3", "wc_summary": "122;114;85", "wc_strengths": "112;50;127", "wc_weaknesses": "286;222;233", "wc_questions": "81;53;4", "wc_limitations": "1;2;1", "wc_review": "602;441;450", "wc_reply_reviewers": "0;86;60", "wc_reply_authors": "0;770;0", "reply_reviewers": "0;1;1", "reply_authors": "1;2;1", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 107.0, 15.895492023421818 ], "wc_strengths_avg": [ 96.33333333333333, 33.32999983331666 ], "wc_weaknesses_avg": [ 247.0, 27.94041278626117 ], "wc_questions_avg": [ 46.0, 31.822423959633664 ], "wc_limitations_avg": [ 1.3333333333333333, 0.4714045207910317 ], "wc_review_avg": [ 497.6666666666667, 73.86624548616385 ], "wc_reply_reviewers_avg": [ 48.666666666666664, 36.01234356285202 ], "wc_reply_authors_avg": [ 256.6666666666667, 362.98148100909435 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12057204133386511762&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 4, "email": "hku.hk;cs.hku.hk;ed.ac.uk;cam.ac.uk;huawei.com;hit.edu.cn;cam.ac.uk", "author_num": 7, "aff_unique_index": "0;0;1;2;3;4;2", "aff_unique_norm": "University of Hong Kong;University of Edinburgh;University of Cambridge;Huawei;Harbin Institute of Technology", "aff_unique_dep": ";;;Huawei Technologies;", "aff_unique_url": "https://www.hku.hk;https://www.ed.ac.uk;https://www.cam.ac.uk;https://www.huawei.com;http://www.hit.edu.cn/", "aff_unique_abbr": "HKU;Edinburgh;Cambridge;Huawei;HIT", "aff_campus_unique_index": "0;0;2;3;2", "aff_campus_unique": "Hong Kong SAR;;Cambridge;Harbin", "aff_country_unique_index": "0;0;1;1;0;0;1", "aff_country_unique": "China;United Kingdom" }, { "title": "Temporal Sentence Grounding with Relevance Feedback in Videos", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94274", "id": "eOonmxzzno", "proceeding": "", "pdf": "https://openreview.net/pdf?id=eOonmxzzno", "openreview": "https://openreview.net/forum?id=eOonmxzzno", "poster": "/media/PosterPDFs/NeurIPS%202024/94274.png?t=1731647961.6804314", "project": "", "author_site": "Jianfeng Dong, Xiaoman Peng, Daizong Liu, Xiaoye Qu, Xun Yang, Cuizhu Bao, Meng Wang", "tldr": "", "abstract": "As a widely explored multi-modal task, Temporal Sentence Grounding in videos (TSG) endeavors to retrieve a specific video segment matched with a given query text from a video. The traditional paradigm for TSG generally assumes that relevant segments always exist within a given video. However, this assumption is restrictive and unrealistic in real-world applications where the existence of a query-related segment is uncertain, easily resulting in erroneous grounding. Motivated by the research gap and practical application, this paper introduces a new task, named Temporal Sentence Grounding with Relevance Feedback (TSG-RF) in videos, which accommodates the possibility that a video may or may not include a segment related to the query. This task entails localizing precise video segments that semantically align with the query text when such content is present, while delivering definitive feedback on the non-existence of related segments when absent. Moreover, we propose a novel Relation-aware Temporal Sentence Grounding (RaTSG) network for addressing this challenging task. This network first reformulates the TSG-RF task as a foreground-background detection problem by investigating whether the query-related semantics exist in both frame and video levels. Then, a multi-granularity relevance discriminator is exploited to produce precise video-query relevance feedback and a relation-aware segment grounding module is employed to selectively conduct the grounding process, dynamically adapting to the presence or absence of query-related segments in videos. To validate our RaTSG network, we reconstruct two popular TSG datasets, establishing a rigorous benchmark for TSG-RF. Experimental results demonstrate the effectiveness of our proposed RaTSG for the TSG-RF task. Our source code is available at https://github.com/HuiGuanLab/RaTSG.", "keywords": "Temporal Sentence Grounding;Cross-Modal Retrieval;Vision and Language", "primary_area": "machine_vision", "supplementary_material": "/attachment/f56fd469927308412a79f23c823be6bd2210173d.zip", "author": "Jianfeng Dong;Xiaoman Peng;Daizong Liu;Xiaoye Qu;Xun Yang;Cuizhu Bao;Meng Wang", "authorids": "~Jianfeng_Dong2;~Xiaoman_Peng1;~Daizong_Liu1;~Xiaoye_Qu1;~Xun_Yang1;~Cuizhu_Bao1;~Meng_Wang3", "gender": "M;F;;M;M;F;M", "homepage": "http://danieljf24.github.io/;;https://liudaizong.github.io/HomePage/;;https://sites.google.com/site/xunyangprofile/;https://scie.zjgsu.edu.cn/zh-hans/node/716;https://sites.google.com/view/meng-wang/home", "dblp": "51/7830;;239/6021;229/8206;72/4772-1;;93/6765-2", "google_scholar": "8-zdk9wAAAAJ;;lUw7tVIAAAAJ;rT3hqdcAAAAJ;ro8lzsUAAAAJ;;rHagaaIAAAAJ", "orcid": ";0009-0006-9720-8191;0000-0001-8179-4508;;0000-0003-0201-1638;;0000-0002-3094-7735", "linkedin": ";;;%E6%99%93%E6%99%94-xiaoye-qu-%E7%9E%BF-8b9a0a133/;;;", "or_profile": "~Jianfeng_Dong2;~Xiaoman_Peng1;~Daizong_Liu1;~Xiaoye_Qu1;~Xun_Yang1;~Cuizhu_Bao1;~Meng_Wang3", "aff": "Zhejiang Gongshang University;Zhejiang Gongshang University;Peking University;Shanghai Artificial Intelligence Laboratory;University of Science and Technology of China;Zhejiang Gongshang University;Hefei University of Technology", "aff_domain": "zjgsu.edu.cn;zjgsu.edu.cn;pku.edu.cn;pjlab.org.cn;ustc.edu.cn;zjgsu.edu.cn;hfut.edu.cn", "position": "Full Professor;Undergrad student;PhD student;Researcher;Full Professor;Lecturer;Full Professor", "bibtex": "@inproceedings{\ndong2024temporal,\ntitle={Temporal Sentence Grounding with Relevance Feedback in Videos},\nauthor={Jianfeng Dong and Xiaoman Peng and Daizong Liu and Xiaoye Qu and Xun Yang and Cuizhu Bao and Meng Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=eOonmxzzno}\n}", "github": "", "reviewers": "dpih;3Qyt;eoKJ;P4nK", "pdf_size": 1872561, "rating": "5;5;7;8", "confidence": "5;5;5;5", "soundness": "2;3;4;4", "novelty": "2;1;4;4", "presentation": "2;2;4;3", "wc_summary": "137;67;90;80", "wc_strengths": "42;53;81;134", "wc_weaknesses": "67;124;95;18", "wc_questions": "6;6;5;44", "wc_limitations": "37;1;43;20", "wc_review": "289;251;314;296", "wc_reply_reviewers": "140;18;25;69", "wc_reply_authors": "53;15;13;38", "reply_reviewers": "2;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 5.0, 0.0 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.75, 1.299038105676658 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 93.5, 26.405491853021786 ], "wc_strengths_avg": [ 77.5, 35.58440669731617 ], "wc_weaknesses_avg": [ 76.0, 39.08324449172561 ], "wc_questions_avg": [ 15.25, 16.60383991732033 ], "wc_limitations_avg": [ 25.25, 16.345871038277526 ], "wc_review_avg": [ 287.5, 22.96192500641007 ], "wc_reply_reviewers_avg": [ 63.0, 48.56439024635232 ], "wc_reply_authors_avg": [ 29.75, 16.63392617513977 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15151300426138103268&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": "zjgsu.edu.cn;zjgsu.edu.cn;pku.edu.cn;pjlab.org.cn;ustc.edu.cn;zjgsu.edu.cn;hfut.edu.cn", "author_num": 7, "aff_unique_index": "0;0;1;2;3;0;4", "aff_unique_norm": "Zhejiang Gongshang University;Peking University;Shanghai Artificial Intelligence Laboratory;University of Science and Technology of China;Hefei University of Technology", "aff_unique_dep": ";;;;", "aff_unique_url": "http://www.hzic.edu.cn;http://www.pku.edu.cn;http://www.shailab.org/;http://www.ustc.edu.cn;http://www.hfut.edu.cn/", "aff_unique_abbr": "ZJGSU;Peking U;Shanghai AI Lab;USTC;HUT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "EgoSim: An Egocentric Multi-view Simulator and Real Dataset for Body-worn Cameras during Motion and Activity", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97590", "id": "eOszT2lepG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=eOszT2lepG", "openreview": "https://openreview.net/forum?id=eOszT2lepG", "poster": "/media/PosterPDFs/NeurIPS%202024/97590.png?t=1731314607.1671796", "project": "", "author_site": "Dominik Hollidt, Paul Streli, Jiaxi Jiang, Yasaman Haghighi, Changlin Qian, Xintong Liu, Christian Holz", "tldr": "", "abstract": "Research on egocentric tasks in computer vision has mostly focused on head-mounted cameras, such as fisheye cameras or embedded cameras inside immersive headsets.\nWe argue that the increasing miniaturization of optical sensors will lead to the prolific integration of cameras into many more body-worn devices at various locations.\nThis will bring fresh perspectives to established tasks in computer vision and benefit key areas such as human motion tracking, body pose estimation, or action recognition---particularly for the lower body, which is typically occluded.\n\nIn this paper, we introduce EgoSim, a novel simulator of body-worn cameras that generates realistic egocentric renderings from multiple perspectives across a wearer's body.\nA key feature of EgoSim is its use of real motion capture data to render motion artifacts, which are especially noticeable with arm- or leg-worn cameras.\nIn addition, we introduce MultiEgoView, a dataset of egocentric footage from six body-worn cameras and ground-truth full-body 3D poses during several activities:\n119 hours of data are derived from AMASS motion sequences in four high-fidelity virtual environments, which we augment with 5 hours of real-world motion data from 13 participants using six GoPro cameras and 3D body pose references from an Xsens motion capture suit.\n\nWe demonstrate EgoSim's effectiveness by training an end-to-end video-only 3D pose estimation network.\nAnalyzing its domain gap, we show that our dataset and simulator substantially aid training for inference on real-world data.\n\nEgoSim code & MultiEgoView dataset: https://siplab.org/projects/EgoSim", "keywords": "egocentric perception;body-worn cameras;human pose estimation;simulator;data generation;synthetic data", "primary_area": "", "supplementary_material": "", "author": "Dominik Hollidt;Paul Streli;Jiaxi Jiang;Yasaman Haghighi;Changlin Qian;Xintong Liu;Christian Holz", "authorids": "~Dominik_Hollidt1;~Paul_Streli1;~Jiaxi_Jiang1;~Yasaman_Haghighi2;~Changlin_Qian1;~Xintong_Liu3;~Christian_Holz1", "gender": ";M;M;;M;;M", "homepage": "https://dominikvincent.github.io/;https://paulstreli.com/;https://jiaxi-jiang.github.io/;;https://colincqian.github.io/;;https://siplab.org", "dblp": "358/6949;291/9747;260/6367;;;;79/7439-1", "google_scholar": "S2368c0AAAAJ;zzgpUIIAAAAJ;https://scholar.google.de/citations?user=Ug68uwwAAAAJ;;;;OfXP9jMAAAAJ", "orcid": "0009-0002-1577-1329;0000-0002-3334-7727;;;;;0000-0001-9655-9519", "linkedin": ";;jiaxi-jiang/;;;;", "or_profile": "~Dominik_Hollidt1;~Paul_Streli1;~Jiaxi_Jiang1;~Yasaman_Haghighi2;~Changlin_Qian1;~Xintong_Liu3;~Christian_Holz1", "aff": "ETHZ - ETH Zurich;Meta Reality Labs;ETHZ - ETH Zurich;;;;ETH Zurich", "aff_domain": "ethz.ch;meta.com;ethz.ch;;;;inf.ethz.ch", "position": "PhD student;Intern;PhD student;;;;Assistant Professor", "bibtex": "@inproceedings{\nhollidt2024egosim,\ntitle={EgoSim: An Egocentric Multi-view Simulator and Real Dataset for Body-worn Cameras during Motion and Activity},\nauthor={Dominik Hollidt and Paul Streli and Jiaxi Jiang and Yasaman Haghighi and Changlin Qian and Xintong Liu and Christian Holz},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=eOszT2lepG}\n}", "github": "", "reviewers": "rQh4;ppfE;Lax6;C1rr", "pdf_size": 7988299, "rating": "6;7;7;7", "confidence": "4;4;4;4", "wc_summary_and_contributions": "89;120;78;116", "wc_strengths": "115;124;100;94", "wc_improvement": "2;83;201;97", "wc_limitations": "92;13;9;11", "wc_correctness": "4;4;1;1", "wc_clarity": "4;35;1;1", "wc_relation_to_prior_work": "1;19;1;1", "wc_documentation": "1;11;1;40", "wc_additional_feedback": "1;1;1;1", "wc_review": "309;410;393;362", "wc_reply_reviewers": "10;19;0;24", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;2;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.0 ], "wc_summary_and_contributions_avg": [ 100.75, 17.73943347460679 ], "wc_strengths_avg": [ 108.25, 11.882234638316145 ], "wc_improvement_avg": [ 95.75, 70.7650160743287 ], "wc_limitations_avg": [ 31.25, 35.102528398962946 ], "wc_correctness_avg": [ 2.5, 1.5 ], "wc_clarity_avg": [ 10.25, 14.341809509263467 ], "wc_relation_to_prior_work_avg": [ 5.5, 7.794228634059948 ], "wc_documentation_avg": [ 13.25, 15.974589196595948 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 368.5, 38.421998906876254 ], "wc_reply_reviewers_avg": [ 13.25, 9.148087231765993 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14475448519913568569&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "email": "ethz.ch;meta.com;ethz.ch;;;;inf.ethz.ch", "author_num": 7, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "ETH Zurich;Meta", "aff_unique_dep": ";Meta Reality Labs", "aff_unique_url": "https://www.ethz.ch;https://www.meta.com", "aff_unique_abbr": "ETHZ;MRL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "Switzerland;United States" }, { "title": "Online Consistency of the Nearest Neighbor Rule", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94273", "id": "eOx0SMRUv7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=eOx0SMRUv7", "openreview": "https://openreview.net/forum?id=eOx0SMRUv7", "poster": "", "project": "", "author_site": "Geelon So, Sanjoy Dasgupta", "tldr": "", "abstract": "In the realizable online setting, a learner is tasked with making predictions for a stream of instances, where the correct answer is revealed after each prediction. A learning rule is online consistent if its mistake rate eventually vanishes. The nearest neighbor rule is fundamental prediction strategy, but it is only known to be consistent under strong statistical or geometric assumptions: the instances come i.i.d. or the label classes are well-separated. We prove online consistency for all measurable functions in doubling metric spaces under the mild assumption that instances are generated by a process that is uniformly absolutely continuous with respect to an underlying finite, upper doubling measure.", "keywords": "Nearest Neighbor Classification;Online Learning;Smoothed Analysis", "primary_area": "learning_theory", "supplementary_material": "", "author": "Geelon So;Sanjoy Dasgupta", "authorids": "~Geelon_So1;~Sanjoy_Dasgupta3", "gender": ";", "homepage": "https://geelon.github.io;http://www.cs.ucsd.edu/~dasgupta", "dblp": "314/6243.html;34/5967", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "~Geelon_So1;~Sanjoy_Dasgupta2", "aff": "Computer Science and Engineering Department, University of California, San Diego;University of California, San Diego", "aff_domain": "cse.ucsd.edu;ucsd.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nso2024online,\ntitle={Online Consistency of the Nearest Neighbor Rule},\nauthor={Geelon So and Sanjoy Dasgupta},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=eOx0SMRUv7}\n}", "github": "", "reviewers": "4b2W;LFbH;XeYp", "pdf_size": 750003, "rating": "6;6;7", "confidence": "4;2;3", "soundness": "3;3;3", "novelty": "3;3;2", "presentation": "2;3;4", "wc_summary": "104;71;107", "wc_strengths": "98;61;54", "wc_weaknesses": "237;28;91", "wc_questions": "2;17;197", "wc_limitations": "1;13;37", "wc_review": "442;190;486", "wc_reply_reviewers": "14;21;105", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 94.0, 16.30950643030009 ], "wc_strengths_avg": [ 71.0, 19.30457631409368 ], "wc_weaknesses_avg": [ 118.66666666666667, 87.53792828762222 ], "wc_questions_avg": [ 72.0, 88.60022573334675 ], "wc_limitations_avg": [ 17.0, 14.966629547095765 ], "wc_review_avg": [ 372.6666666666667, 130.40790705406718 ], "wc_reply_reviewers_avg": [ 46.666666666666664, 41.34677200889515 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:gwFPCQPSzbUJ:scholar.google.com/&scioq=Online+Consistency+of+the+Nearest+Neighbor+Rule&hl=en&as_sdt=0,14", "gs_version_total": 4, "email": "cse.ucsd.edu;ucsd.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of California, San Diego", "aff_unique_dep": "Computer Science and Engineering Department", "aff_unique_url": "https://www.ucsd.edu", "aff_unique_abbr": "UCSD", "aff_campus_unique_index": "0;0", "aff_campus_unique": "San Diego", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Representation Noising: A Defence Mechanism Against Harmful Finetuning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94272", "id": "eP9auEJqFg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=eP9auEJqFg", "openreview": "https://openreview.net/forum?id=eP9auEJqFg", "poster": "/media/PosterPDFs/NeurIPS%202024/94272.png?t=1733337414.4571998", "project": "", "author_site": "Domenic Rosati, Jan Wehner, Kai Williams, Lukasz Bartoszcze, Robie Gonzales, carsten maple, Subhabrata Majumdar, Hassan Sajjad, Frank Rudzicz", "tldr": "", "abstract": "Releasing open-source large language models (LLMs) presents a dual-use risk since bad actors can easily fine-tune these models for harmful purposes. Even without the open release of weights, weight stealing and fine-tuning APIs make closed models vulnerable to harmful fine-tuning attacks (HFAs). While safety measures like preventing jailbreaks and improving safety guardrails are important, such measures can easily be reversed through fine-tuning. In this work, we propose Representation Noising (\\textsf{\\small RepNoise}), a defence mechanism that operates even when attackers have access to the weights. \\textsf{\\small RepNoise} works by removing information about harmful representations such that it is difficult to recover them during fine-tuning. Importantly, our defence is also able to generalize across different subsets of harm that have not been seen during the defence process as long as they are drawn from the same distribution of the attack set. Our method does not degrade the general capability of LLMs and retains the ability to train the model on harmless tasks. We provide empirical evidence that the efficacy of our defence lies in its ``depth'': the degree to which information about harmful representations is removed across {\\em all layers} of the LLM. We also find areas where \\textsf{\\small RepNoise} still remains ineffective and highlight how those limitations can inform future research.", "keywords": "Harmful Fine-tuning;LLM Security;Domain Authorization", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Domenic Rosati;Jan Wehner;Kai Williams;Lukasz Bartoszcze;Robie Gonzales;carsten maple;Subhabrata Majumdar;Hassan Sajjad;Frank Rudzicz", "authorids": "~Domenic_Rosati2;~Jan_Wehner1;~Kai_Williams1;~Lukasz_Bartoszcze1;~Robie_Gonzales1;~carsten_maple1;~Subhabrata_Majumdar2;~Hassan_Sajjad1;~Frank_Rudzicz2", "gender": ";M;M;M;Not Specified;;M;M;M", "homepage": "https://domenicrosati.github.io/;;;;https://github.com/robzeh;https://warwick.ac.uk/fac/sci/wmg/people/profile/?wmgid=1102;https://shubhobm.github.io/;https://hsajjad.github.io/;http://www.cs.toronto.edu/~frank", "dblp": "204/8129;;;371/4334;368/8093;05/2263.html;163/9977;73/5938;36/6505", "google_scholar": "80aJAKYAAAAJ;S5jFB44AAAAJ;;8pSVNn0AAAAJ;https://scholar.google.ca/citations?user=ADptGlMAAAAJ;8MMdv50AAAAJ;wED36bwAAAAJ;https://scholar.google.de/citations?user=t3BH6NkAAAAJ;https://scholar.google.ca/citations?user=elXOB1sAAAAJ", "orcid": "0000-0003-2666-7615;;;;;0000-0002-4715-212X;0000-0003-3529-7820;;0000-0002-1139-3423", "linkedin": "https://linkedin.com/in/cinemod-itasor;jan-wehner-811682216/;kai-t-williams/;lbartoszcze/;robie-gonzales/;;shubhobm/;hassan-sajjad-154b043a/;", "or_profile": "~Domenic_Rosati2;~Jan_Wehner1;~Kai_Williams1;~Lukasz_Bartoszcze1;~Robie_Gonzales1;~carsten_maple1;~Subhabrata_Majumdar2;~Hassan_Sajjad1;~Frank_Rudzicz2", "aff": "scite.ai;Delft University of Technology;Swarthmore College;University of Warwick;Dalhousie University;The university of Warwick;Vijil;Dalhousie University;Dalhousie University", "aff_domain": "scite.ai;student.tudelft.nl;swarthmore.edu;warwick.ac.uk;dal.ca;warwick.ac.uk;vijil.ai;dal.ca;dal.ca", "position": "Principal Researcher;MS student;Undergrad student;PhD student;Researcher;Full Professor;Head of AI;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nrosati2024representation,\ntitle={Representation Noising: A Defence Mechanism Against Harmful Finetuning},\nauthor={Domenic Rosati and Jan Wehner and Kai Williams and Lukasz Bartoszcze and Robie Gonzales and carsten maple and Subhabrata Majumdar and Hassan Sajjad and Frank Rudzicz},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=eP9auEJqFg}\n}", "github": "", "reviewers": "QXs8;wY4C;dHWM;zYey", "pdf_size": 767732, "rating": "2;7;7;7", "confidence": "4;5;4;4", "soundness": "1;3;3;1", "novelty": "2;3;3;3", "presentation": "2;3;3;2", "wc_summary": "182;93;67;90", "wc_strengths": "80;64;72;175", "wc_weaknesses": "232;433;16;1506", "wc_questions": "75;125;136;184", "wc_limitations": "13;24;7;1", "wc_review": "582;739;298;1956", "wc_reply_reviewers": "843;386;86;175", "wc_reply_authors": "1856;1433;405;5180", "reply_reviewers": "2;3;2;1", "reply_authors": "4;4;2;13", "rating_avg": [ 5.75, 2.165063509461097 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.0, 1.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 108.0, 43.89191269470949 ], "wc_strengths_avg": [ 97.75, 44.95761893161158 ], "wc_weaknesses_avg": [ 546.75, 573.1192611490212 ], "wc_questions_avg": [ 130.0, 38.73628789649313 ], "wc_limitations_avg": [ 11.25, 8.496322733983215 ], "wc_review_avg": [ 893.75, 633.3302357380389 ], "wc_reply_reviewers_avg": [ 372.5, 292.6777237850534 ], "wc_reply_authors_avg": [ 2218.5, 1789.3910276962943 ], "reply_reviewers_avg": [ 2.0, 0.7071067811865476 ], "reply_authors_avg": [ 5.75, 4.264680527307995 ], "replies_avg": [ 36, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1772367917886886598&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "scite.ai;student.tudelft.nl;swarthmore.edu;warwick.ac.uk;dal.ca;warwick.ac.uk;vijil.ai;dal.ca;dal.ca", "author_num": 9, "aff_unique_index": "0;1;2;3;4;3;4;4", "aff_unique_norm": "scite.ai;Delft University of Technology;Swarthmore College;University of Warwick;Dalhousie University;", "aff_unique_dep": ";;;;;", "aff_unique_url": "https://www.scite.ai;https://www.tudelft.nl;https://www.swarthmore.edu;https://www.warwick.ac.uk;https://www.dal.ca;", "aff_unique_abbr": "scite.ai;TU Delft;Swarthmore;Warwick;Dal;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;2;3;2;3;3", "aff_country_unique": "United States;Netherlands;United Kingdom;Canada;" }, { "title": "Disentangled Unsupervised Skill Discovery for Efficient Hierarchical Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94271", "id": "ePOBcWfNFC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ePOBcWfNFC", "openreview": "https://openreview.net/forum?id=ePOBcWfNFC", "poster": "/media/PosterPDFs/NeurIPS%202024/94271.png?t=1731347406.4739861", "project": "", "author_site": "Jiaheng Hu, Zizhao Wang, Peter Stone, Roberto Mart\u00edn-Mart\u00edn", "tldr": "", "abstract": "A hallmark of intelligent agents is the ability to learn reusable skills purely from unsupervised interaction with the environment. However, existing unsupervised skill discovery methods often learn entangled skills where one skill variable simultaneously influences many entities in the environment, making downstream skill chaining extremely challenging. We propose Disentangled Unsupervised Skill Discovery (DUSDi), a method for learning disentangled skills that can be efficiently reused to solve downstream tasks. DUSDi decomposes skills into disentangled components, where each skill component only affects one factor of the state space. Importantly, these skill components can be concurrently composed to generate low-level actions, and efficiently chained to tackle downstream tasks through hierarchical Reinforcement Learning. DUSDi defines a novel mutual-information-based objective to enforce disentanglement between the influences of different skill components, and utilizes value factorization to optimize this objective efficiently. Evaluated in a set of challenging environments, DUSDi successfully learns disentangled skills, and significantly outperforms previous skill discovery methods when it comes to applying the learned skills to solve downstream tasks.", "keywords": "Unsupervised Skill Discovery;Hierarchical Reinforcement Learning;Disentanglement", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/2bc12d8a2355ee97765324df01c5485176a56bef.zip", "author": "Jiaheng Hu;Zizhao Wang;Peter Stone;Roberto Mart\u00edn-Mart\u00edn", "authorids": "~Jiaheng_Hu1;~Zizhao_Wang3;~Peter_Stone1;~Roberto_Mart\u00edn-Mart\u00edn1", "gender": "M;M;M;M", "homepage": "https://jiahenghu.github.io/;;http://www.cs.utexas.edu/~pstone;https://robertomartinmartin.com/", "dblp": ";245/5008;s/PeterStone;153/7670", "google_scholar": ";https://scholar.google.ca/citations?user=V4KQIWsAAAAJ;qnwjcfAAAAAJ;XOJE8OEAAAAJ", "orcid": ";;0000-0002-6795-420X;0000-0002-9586-2759", "linkedin": ";;;", "or_profile": "~Jiaheng_Hu1;~Zizhao_Wang3;~Peter_Stone1;~Roberto_Mart\u00edn-Mart\u00edn1", "aff": "University of Texas at Austin;University of Texas at Austin;University of Texas, Austin;University of Texas at Austin", "aff_domain": "utexas.edu;utexas.edu;utexas.edu;utexas.edu", "position": "PhD student;PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nhu2024disentangled,\ntitle={Disentangled Unsupervised Skill Discovery for Efficient Hierarchical Reinforcement Learning},\nauthor={Jiaheng Hu and Zizhao Wang and Peter Stone and Roberto Mart{\\'\\i}n-Mart{\\'\\i}n},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ePOBcWfNFC}\n}", "github": "", "reviewers": "bkGy;hssy;JxTV;QWMX", "pdf_size": 2871041, "rating": "4;5;5;7", "confidence": "3;3;3;4", "soundness": "2;2;2;4", "novelty": "2;2;2;3", "presentation": "3;3;2;4", "wc_summary": "38;37;59;52", "wc_strengths": "15;26;25;94", "wc_weaknesses": "42;73;70;69", "wc_questions": "2;5;8;115", "wc_limitations": "2;1;1;10", "wc_review": "99;142;163;340", "wc_reply_reviewers": "0;43;10;75", "wc_reply_authors": "0;0;0;37", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;2", "rating_avg": [ 5.25, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 46.5, 9.340770846134703 ], "wc_strengths_avg": [ 40.0, 31.47220996371243 ], "wc_weaknesses_avg": [ 63.5, 12.5 ], "wc_questions_avg": [ 32.5, 47.67861155696546 ], "wc_limitations_avg": [ 3.5, 3.774917217635375 ], "wc_review_avg": [ 186.0, 91.85586535436917 ], "wc_reply_reviewers_avg": [ 32.0, 29.487285395573462 ], "wc_reply_authors_avg": [ 9.25, 16.021469970012117 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.9271726499455306, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16692592484086753523&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "utexas.edu;utexas.edu;utexas.edu;utexas.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Frustratingly Easy Test-Time Adaptation of Vision-Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94270", "id": "eQ6VjBhevn", "proceeding": "", "pdf": "https://openreview.net/pdf?id=eQ6VjBhevn", "openreview": "https://openreview.net/forum?id=eQ6VjBhevn", "poster": "/media/PosterPDFs/NeurIPS%202024/94270.png?t=1733244639.660211", "project": "", "author_site": "Matteo Farina, Gianni Franchi, Giovanni Iacca, Massimiliano Mancini, Elisa Ricci", "tldr": "", "abstract": "Vision-Language Models seamlessly discriminate among arbitrary semantic categories, yet they still suffer from poor generalization when presented with challenging examples. For this reason, Episodic Test-Time Adaptation (TTA) strategies have recently emerged as powerful techniques to adapt VLMs in the presence of a single unlabeled image. The recent literature on TTA is dominated by the paradigm of prompt tuning by Marginal Entropy Minimization, which, relying on online backpropagation, inevitably slows down inference while increasing memory. In this work, we theoretically investigate the properties of this approach and unveil that a surprisingly strong TTA method lies dormant and hidden within it. We term this approach ZERO (TTA with \u201czero\u201d temperature), whose design is both incredibly effective and frustratingly simple: augment N times, predict, retain the most confident predictions, and marginalize after setting the Softmax temperature to zero. Remarkably, ZERO requires a single batched forward pass through the vision encoder only and no backward passes. We thoroughly evaluate our approach following the experimental protocol established in the literature and show that ZERO largely surpasses or compares favorably w.r.t. the state-of-the-art while being almost 10\u00d7 faster and 13\u00d7 more memory friendly than standard Test-Time Prompt Tuning. Thanks to its simplicity and comparatively negligible computation, ZERO can serve as a strong baseline for future work in this field. Code will be available.", "keywords": "Vision-Language Models;Test-Time Adaptation;Robustness;Model Calibration", "primary_area": "machine_vision", "supplementary_material": "", "author": "Matteo Farina;Gianni Franchi;Giovanni Iacca;Massimiliano Mancini;Elisa Ricci", "authorids": "~Matteo_Farina1;~Gianni_Franchi1;~Giovanni_Iacca1;~Massimiliano_Mancini1;~Elisa_Ricci1", "gender": "M;M;M;M;F", "homepage": "https://farinamatteo.github.io;https://giannifranchi.github.io/;https://sites.google.com/site/giovanniiacca/;https://mancinimassimiliano.github.io/;http://elisaricci.eu/", "dblp": "290/8440;155/3061;69/9200;192/2058;88/397", "google_scholar": "SxQwDD8AAAAJ;ZCW6-psAAAAJ;qSw6YfcAAAAJ;https://scholar.google.it/citations?user=bqTPA8kAAAAJ;https://scholar.google.it/citations?user=xf1T870AAAAJ", "orcid": "0009-0003-2854-2952;0000-0002-2184-1381;0000-0001-9723-1830;0000-0001-8595-9955;", "linkedin": "matteo-farina-390b4a1ab/;gianni-franchi-94435754/;giovanniiacca/;;", "or_profile": "~Matteo_Farina1;~Gianni_Franchi1;~Giovanni_Iacca1;~Massimiliano_Mancini1;~Elisa_Ricci1", "aff": "University of Trento;ENSTA Paris;University of Trento;University of Trento;University of Trento", "aff_domain": "unitn.it;ensta-paristech.fr;unitn.it;unitn.it;unitn.it", "position": "PhD student;Assistant Professor;Associate Professor;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nfarina2024frustratingly,\ntitle={Frustratingly Easy Test-Time Adaptation of Vision-Language Models},\nauthor={Matteo Farina and Gianni Franchi and Giovanni Iacca and Massimiliano Mancini and Elisa Ricci},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=eQ6VjBhevn}\n}", "github": "", "reviewers": "Wx8e;a5fT;Wz3n;7eTE", "pdf_size": 2543548, "rating": "3;3;7;8", "confidence": "4;4;2;4", "soundness": "3;2;3;4", "novelty": "2;2;3;3", "presentation": "2;3;3;4", "wc_summary": "44;94;43;108", "wc_strengths": "37;90;41;66", "wc_weaknesses": "72;315;37;80", "wc_questions": "277;122;5;34", "wc_limitations": "1;47;5;22", "wc_review": "431;668;131;310", "wc_reply_reviewers": "703;400;0;41", "wc_reply_authors": "1969;1867;0;62", "reply_reviewers": "5;4;0;1", "reply_authors": "5;6;1;2", "rating_avg": [ 5.25, 2.277608394786075 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 72.25, 29.17511782324109 ], "wc_strengths_avg": [ 58.5, 21.313141485947114 ], "wc_weaknesses_avg": [ 126.0, 110.31092420970826 ], "wc_questions_avg": [ 109.5, 105.86902285371298 ], "wc_limitations_avg": [ 18.75, 18.115946014492316 ], "wc_review_avg": [ 385.0, 195.15762859801305 ], "wc_reply_reviewers_avg": [ 286.0, 286.66443797583264 ], "wc_reply_authors_avg": [ 974.5, 944.4433545745345 ], "reply_reviewers_avg": [ 2.5, 2.0615528128088303 ], "reply_authors_avg": [ 3.5, 2.0615528128088303 ], "replies_avg": [ 32, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.4436069753671345, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6024530939490096886&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "unitn.it;ensta-paristech.fr;unitn.it;unitn.it;unitn.it", "author_num": 5, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "University of Trento;\u00c9cole Nationale Sup\u00e9rieure de Techniques Avanc\u00e9es", "aff_unique_dep": ";", "aff_unique_url": "https://www.unitn.it;https://www.ensta.fr", "aff_unique_abbr": "UniTN;ENSTA", "aff_campus_unique_index": "1", "aff_campus_unique": ";Paris", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "Italy;France" }, { "title": "Micro-Bench: A Microscopy Benchmark for Vision-Language Understanding", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97589", "id": "eRleg6vy0Y", "proceeding": "", "pdf": "https://openreview.net/pdf?id=eRleg6vy0Y", "openreview": "https://openreview.net/forum?id=eRleg6vy0Y", "poster": "/media/PosterPDFs/NeurIPS%202024/97589.png?t=1733621660.2917323", "project": "", "author_site": "Alejandro Lozano, Jeffrey Nirschl, James Burgess, Sanket Rajan Gupte, Yuhui Zhang, Alyssa Unell, Serena Yeung", "tldr": "", "abstract": "Recent advances in microscopy have enabled the rapid generation of terabytes of image data in cell biology and biomedical research. Vision-language models (VLMs) offer a promising solution for large-scale biological image analysis, enhancing researchers\u2019 efficiency, identifying new image biomarkers, and accelerating hypothesis generation and scientific discovery. However, there is a lack of standardized, diverse, and large-scale vision-language benchmarks to evaluate VLMs\u2019 perception and cognition capabilities in biological image understanding. To address this gap, we introduce Micro-Bench, an expert-curated benchmark encompassing 24 biomedical tasks across various scientific disciplines (biology, pathology), microscopy modalities (electron, fluorescence, light), scales (subcellular, cellular, tissue), and organisms in both normal and abnormal states. We evaluate state-of-the-art biomedical, pathology, and general VLMs on Micro-Bench and find that: i) current models struggle on all categories, even for basic tasks such as distinguishing microscopy modalities; ii) current specialist models fine-tuned on biomedical data often perform worse than generalist models; iii) fine-tuning in specific microscopy domains can cause catastrophic forgetting, eroding prior biomedical knowledge encoded in their base model. iv) weight interpolation between fine-tuned and pre-trained models offers one solution to forgetting and improves general performance across biomedical tasks. We release Micro-Bench under a permissive license to accelerate the research and development of microscopy foundation models.", "keywords": "Vision-Language Models;Benchmark;Microscopy;Biology;Biomedical Imaging", "primary_area": "", "supplementary_material": "/attachment/c1dd8058ed720c42612c1bf50cbf5dbb1b2b3767.pdf", "author": "Alejandro Lozano;Jeffrey J Nirschl;James Burgess;Sanket Rajan Gupte;Yuhui Zhang;Alyssa Unell;Serena Yeung-Levy", "authorids": "~Alejandro_Lozano1;~Jeffrey_J_Nirschl1;~James_Burgess2;~Sanket_Rajan_Gupte1;~Yuhui_Zhang3;~Alyssa_Unell1;~Serena_Yeung-Levy1", "gender": "M;M;M;M;M;;F", "homepage": "https://github.com/Ale9806;https://profiles.stanford.edu/jeffrey-nirschl;https://jmhb0.github.io/;https://sanket.io/;https://cs.stanford.edu/~yuhuiz/;;http://ai.stanford.edu/~syyeung/", "dblp": ";221/7626;;204/4073;;;147/5023", "google_scholar": "EBPnTFMAAAAJ;AJJwTxoAAAAJ;;75EQD3QAAAAJ;X-Agfu8AAAAJ;;Tw2m5kUAAAAJ", "orcid": "0000-0002-6952-289X;0000-0001-6857-341X;;0000-0002-2107-600X;;;0000-0003-0529-0628", "linkedin": "eduardoalejandrolozanogarciaipn9806/;jeff-nirschl-56700918/;jmhb/;sanketgupte;;alyssa-unell-a8a9b81a9/;", "or_profile": "~Alejandro_Lozano1;~Jeffrey_J_Nirschl1;~James_Burgess2;~Sanket_Rajan_Gupte1;~Yuhui_Zhang3;~Alyssa_Unell1;~Serena_Yeung1", "aff": "Stanford University;Stanford University;Stanford University;Stanford University;Stanford University;Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu", "position": "PhD student;Postdoc;PhD student;PhD student;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nlozano2024microbench,\ntitle={Micro-Bench: A Microscopy Benchmark for Vision-Language Understanding},\nauthor={Alejandro Lozano and Jeffrey J Nirschl and James Burgess and Sanket Rajan Gupte and Yuhui Zhang and Alyssa Unell and Serena Yeung-Levy},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=eRleg6vy0Y}\n}", "github": "", "reviewers": "YbH8;jGSc;Fyh1;2NDi", "pdf_size": 2479831, "rating": "3;7;8;9", "confidence": "5;3;4;5", "wc_summary_and_contributions": "22;46;62;126", "wc_strengths": "78;43;40;196", "wc_improvement": "1269;107;1;140", "wc_limitations": "5;26;45;12", "wc_correctness": "4;8;1;7", "wc_clarity": "27;10;6;1", "wc_relation_to_prior_work": "1;16;2;17", "wc_documentation": "1;28;2;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "1408;285;160;501", "wc_reply_reviewers": "465;6;44;0", "wc_reply_authors": "69;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "3;1;1;1", "rating_avg": [ 6.75, 2.277608394786075 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "wc_summary_and_contributions_avg": [ 64.0, 38.52272056851645 ], "wc_strengths_avg": [ 89.25, 63.41677617160936 ], "wc_improvement_avg": [ 379.25, 516.2578691894197 ], "wc_limitations_avg": [ 22.0, 15.280706789936126 ], "wc_correctness_avg": [ 5.0, 2.7386127875258306 ], "wc_clarity_avg": [ 11.0, 9.772410142846033 ], "wc_relation_to_prior_work_avg": [ 9.0, 7.516648189186454 ], "wc_documentation_avg": [ 8.0, 11.554220008291344 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 588.5, 488.6105299724925 ], "wc_reply_reviewers_avg": [ 128.75, 194.86581921927714 ], "wc_reply_authors_avg": [ 17.25, 29.877876430563134 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.2316661873125233, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17475507841922352533&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": "stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0;0;0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Who's asking? User personas and the mechanics of latent misalignment", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94269", "id": "eSes1Mic9d", "proceeding": "", "pdf": "https://openreview.net/pdf?id=eSes1Mic9d", "openreview": "https://openreview.net/forum?id=eSes1Mic9d", "poster": "/media/PosterPDFs/NeurIPS%202024/94269.png?t=1731595541.1402721", "project": "", "author_site": "Asma Ghandeharioun, Ann Yuan, Marius Guerard, Emily Reif, Michael Lepori, Lucas Dixon", "tldr": "", "abstract": "Studies show that safety-tuned models may nevertheless divulge harmful information. In this work, we show that whether they do so depends significantly on who they are talking to, which we refer to as *user persona*. In fact, we find manipulating user persona to be more effective for eliciting harmful content than certain more direct attempts to control model refusal. We study both natural language prompting and activation steering as intervention methods and show that activation steering is significantly more effective at bypassing safety filters.\nWe shed light on the mechanics of this phenomenon by showing that even when model generations are safe, harmful content can persist in hidden representations and can be extracted by decoding from earlier layers. We also show we can predict a persona\u2019s effect on refusal given only the geometry of its steering vector. Finally, we show that certain user personas induce the model to form more charitable interpretations of otherwise dangerous queries.", "keywords": "safety;interpretability;explainability;NLP;alignment;activation engineering;jailbreaking", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/b5c61f39382620a73ac029b3c3e21c403445344c.zip", "author": "Asma Ghandeharioun;Ann Yuan;Marius Guerard;Emily Reif;Michael A. Lepori;Lucas Dixon", "authorids": "~Asma_Ghandeharioun1;~Ann_Yuan1;~Marius_Guerard1;~Emily_Reif2;~Michael_A._Lepori1;~Lucas_Dixon1", "gender": ";F;M;F;M;Not Specified", "homepage": "https://alum.mit.edu/www/asma_gh;;;;https://lepori.xyz/;https://research.google/people/lucas-dixon/", "dblp": "124/3110;196/3767;;;262/0162;39/6853", "google_scholar": "CkfQy2gAAAAJ;;;J1hMgtAAAAAJ;G1fepc8AAAAJ;nDs3-TMAAAAJ", "orcid": ";;;;;0000-0003-1094-1675", "linkedin": ";;mariusguerard/;emily-reif-4b995884;michael-lepori-925426124/;lucas-dixon-94070354/", "or_profile": "~Asma_Ghandeharioun1;~Ann_Yuan1;~Marius_Guerard1;~Emily_Reif2;~Michael_A._Lepori1;~Lucas_Dixon1", "aff": "Google;Google;Google;Google;Brown University;Research, Google", "aff_domain": "google.com;google.com;google.com;google.com;brown.edu;research.google.com", "position": "Research Scientist;Researcher;Researcher;Researcher;PhD student;Researcher", "bibtex": "@inproceedings{\nghandeharioun2024whos,\ntitle={Who's asking? User personas and the mechanics of latent misalignment},\nauthor={Asma Ghandeharioun and Ann Yuan and Marius Guerard and Emily Reif and Michael A. Lepori and Lucas Dixon},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=eSes1Mic9d}\n}", "github": "", "reviewers": "19wX;zwX8;XbKV;AgYD", "pdf_size": 2882573, "rating": "7;7;7;8", "confidence": "3;4;3;3", "soundness": "4;3;4;4", "novelty": "3;3;4;4", "presentation": "4;4;4;4", "wc_summary": "67;49;89;167", "wc_strengths": "103;59;60;85", "wc_weaknesses": "26;37;28;149", "wc_questions": "176;46;1;100", "wc_limitations": "30;13;2;22", "wc_review": "402;204;180;523", "wc_reply_reviewers": "76;0;9;20", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 4.0, 0.0 ], "wc_summary_avg": [ 93.0, 45.0111097397076 ], "wc_strengths_avg": [ 76.75, 18.38987493160299 ], "wc_weaknesses_avg": [ 60.0, 51.55094567512802 ], "wc_questions_avg": [ 80.75, 65.21263297858782 ], "wc_limitations_avg": [ 16.75, 10.425329730996522 ], "wc_review_avg": [ 327.25, 142.10801349677646 ], "wc_reply_reviewers_avg": [ 26.25, 29.583568074186047 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16040441916144855627&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "google.com;google.com;google.com;google.com;brown.edu;research.google.com", "author_num": 6, "aff_unique_index": "0;0;0;0;1;0", "aff_unique_norm": "Google;Brown University", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://www.brown.edu", "aff_unique_abbr": "Google;Brown", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Only Strict Saddles in the Energy Landscape of Predictive Coding Networks?", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94268", "id": "eTu6kvrkSq", "proceeding": "", "pdf": "https://openreview.net/pdf?id=eTu6kvrkSq", "openreview": "https://openreview.net/forum?id=eTu6kvrkSq", "poster": "/media/PosterPDFs/NeurIPS%202024/94268.png?t=1730999003.430903", "project": "", "author_site": "Francesco Innocenti, El Mehdi Achour, Ryan Singh, Christopher L Buckley", "tldr": "", "abstract": "Predictive coding (PC) is an energy-based learning algorithm that performs iterative inference over network activities before updating weights. Recent work suggests that PC can converge in fewer learning steps than backpropagation thanks to its inference procedure. However, these advantages are not always observed, and the impact of PC inference on learning is not theoretically well understood. To address this gap, we study the geometry of the PC weight landscape at the inference equilibrium of the network activities. For deep linear networks, we first show that the equilibrated PC energy is equal to a rescaled mean squared error loss with a weight-dependent rescaling. We then prove that many highly degenerate (non-strict) saddles of the loss including the origin become much easier to escape (strict) in the equilibrated energy. Experiments on both linear and non-linear networks strongly validate our theory and further suggest that all the saddles of the equilibrated energy are strict. Overall, this work shows that PC inference makes the loss landscape of feedforward networks more benign and robust to vanishing gradients, while also highlighting the fundamental challenge of scaling PC to very deep models.", "keywords": "Predictive Coding;Backpropagation;Deep Neural Networks;Loss Landscape;Saddle Points;Gradient Descent;Vanishing Gradients;Local Learning;Inference Learning", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "", "author": "Francesco Innocenti;El Mehdi Achour;Ryan Singh;Christopher Buckley", "authorids": "~Francesco_Innocenti1;~El_Mehdi_Achour1;~Ryan_Singh1;~Christopher_Buckley1", "gender": "M;M;M;M", "homepage": "https://francescoinnocenti.github.io/;;;https://christopherlbuckley.com/", "dblp": "36/10936;319/6566;;37/3540.html", "google_scholar": "t6Sq56EAAAAJ;A-i6nwgAAAAJ;Ukqus4oAAAAJ;https://scholar.google.co.uk/citations?user=nWuZ0XcAAAAJ", "orcid": ";;0009-0007-9598-4645;0000-0002-8551-9121", "linkedin": "francescoinnocenti/;https://fr.linkedin.com/in/el-mehdi-achour-5baa28135;;", "or_profile": "~Francesco_Innocenti1;~El_Mehdi_Achour1;~Ryan_Singh1;~Christopher_Buckley1", "aff": "University of Sussex;Rheinisch Westf\u00e4lische Technische Hochschule Aachen;University of Sussex;University of Sussex", "aff_domain": "sussex.ac.uk;rwth-aachen.de;sussex.ac.uk;sussex.ac.uk", "position": "PhD student;Postdoc;PhD student;Full Professor", "bibtex": "@inproceedings{\ninnocenti2024only,\ntitle={Only Strict Saddles in the Energy Landscape of Predictive Coding Networks?},\nauthor={Francesco Innocenti and El Mehdi Achour and Ryan Singh and Christopher Buckley},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=eTu6kvrkSq}\n}", "github": "", "reviewers": "hPaX;KSTi;cEwT", "pdf_size": 15946329, "rating": "5;7;7", "confidence": "4;3;3", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "3;3;4", "wc_summary": "76;177;85", "wc_strengths": "117;114;51", "wc_weaknesses": "411;16;229", "wc_questions": "4;412;54", "wc_limitations": "1;39;9", "wc_review": "609;758;428", "wc_reply_reviewers": "113;114;121", "wc_reply_authors": "45;204;0", "reply_reviewers": "1;1;1", "reply_authors": "2;2;1", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 112.66666666666667, 45.63867755411948 ], "wc_strengths_avg": [ 94.0, 30.430248109405877 ], "wc_weaknesses_avg": [ 218.66666666666666, 161.42352850670673 ], "wc_questions_avg": [ 156.66666666666666, 181.69816240738984 ], "wc_limitations_avg": [ 16.333333333333332, 16.35712552851373 ], "wc_review_avg": [ 598.3333333333334, 134.9329051376605 ], "wc_reply_reviewers_avg": [ 116.0, 3.559026084010437 ], "wc_reply_authors_avg": [ 83.0, 87.50999942863673 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.9999999999999998, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1627301626981080570&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "sussex.ac.uk;rwth-aachen.de;sussex.ac.uk;sussex.ac.uk", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Sussex;RWTH Aachen University", "aff_unique_dep": ";", "aff_unique_url": "https://www.sussex.ac.uk;https://www.rwth-aachen.de", "aff_unique_abbr": "Sussex;RWTH", "aff_campus_unique_index": "1", "aff_campus_unique": ";Aachen", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United Kingdom;Germany" }, { "title": "SpeAr: A Spectral Approach for Zero-Shot Node Classification", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94267", "id": "eU87jJyEK5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=eU87jJyEK5", "openreview": "https://openreview.net/forum?id=eU87jJyEK5", "poster": "/media/PosterPDFs/NeurIPS%202024/94267.png?t=1731591682.3374598", "project": "", "author_site": "Ting Guo, Da Wang, Jiye Liang, Kaihan Zhang, Jianchao Zeng", "tldr": "", "abstract": "Zero-shot node classification is a vital task in the field of graph data processing, aiming to identify nodes of classes unseen during the training process. Prediction bias is one of the primary challenges in zero-shot node classification, referring to the model's propensity to misclassify nodes of unseen classes as seen classes. However, most methods introduce external knowledge to mitigate the bias, inadequately leveraging the inherent cluster information within the unlabeled nodes. To address this issue, we employ spectral analysis coupled with learnable class prototypes to discover the implicit cluster structures within the graph, providing a more comprehensive understanding of classes. In this paper, we propose a spectral approach for zero-shot node classification (SpeAr). Specifically, we establish an approximate relationship between minimizing the spectral contrastive loss and performing spectral decomposition on the graph, thereby enabling effective node characterization through loss minimization. Subsequently, the class prototypes are iteratively refined based on the learned node representations, initialized with the semantic vectors. Finally, extensive experiments verify the effectiveness of the SpeAr, which can further alleviate the bias problem.", "keywords": "zero-shot node classification;graph neural networks;class prototypes;spectral contrastive loss", "primary_area": "graph_neural_networks", "supplementary_material": "/attachment/1c74a959c61c689a41fe991e0075d807a6158849.zip", "author": "Ting Guo;Da Wang;Jiye Liang;Kaihan Zhang;Jianchao Zeng", "authorids": "~Ting_Guo3;~Da_Wang2;~Jiye_Liang1;~Kaihan_Zhang1;~Jianchao_Zeng1", "gender": "F;M;M;F;M", "homepage": ";;https://jiyeliang.github.io/index.html;https://cst.nuc.edu.cn/info/1050/7645.htm;", "dblp": "64/3254.html;10/3366;80/6535;;17/4266-1.html", "google_scholar": "https://scholar.google.co.uk/citations?hl=en;7Nlkm7IAAAAJ;iGc61hUAAAAJ;;", "orcid": ";;0000-0001-5887-9327;;", "linkedin": ";;;;", "or_profile": "~Ting_Guo3;~Da_Wang2;~Jiye_Liang1;~Kaihan_Zhang1;~Jianchao_Zeng1", "aff": "North University of China;Shanxi University;Shanxi University;North University of China;North University of China", "aff_domain": "nuc.edu.cn;sxu.edu.cn;sxu.edu.cn;nuc.edu.cn;nuc.edu.cn", "position": "Lecturer;PhD student;Full Professor;Lecturer;Full Professor", "bibtex": "@inproceedings{\nguo2024spear,\ntitle={SpeAr: A Spectral Approach for Zero-Shot Node Classification},\nauthor={Ting Guo and Da Wang and Jiye Liang and Kaihan Zhang and Jianchao Zeng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=eU87jJyEK5}\n}", "github": "", "reviewers": "rWPM;7tvx;HC2X", "pdf_size": 1101064, "rating": "5;7;7", "confidence": "3;4;4", "soundness": "2;3;3", "novelty": "3;3;3", "presentation": "3;4;3", "wc_summary": "66;65;119", "wc_strengths": "58;104;59", "wc_weaknesses": "113;5;41", "wc_questions": "96;161;127", "wc_limitations": "11;3;13", "wc_review": "344;338;359", "wc_reply_reviewers": "28;24;137", "wc_reply_authors": "89;66;418", "reply_reviewers": "1;1;1", "reply_authors": "3;2;3", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 83.33333333333333, 25.223445883190152 ], "wc_strengths_avg": [ 73.66666666666667, 21.452790546272116 ], "wc_weaknesses_avg": [ 53.0, 44.8998886412873 ], "wc_questions_avg": [ 128.0, 26.54555832275273 ], "wc_limitations_avg": [ 9.0, 4.320493798938574 ], "wc_review_avg": [ 347.0, 8.831760866327848 ], "wc_reply_reviewers_avg": [ 63.0, 52.351376931907595 ], "wc_reply_authors_avg": [ 191.0, 160.78764463312058 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.9999999999999998, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:PbYgxmFd2uQJ:scholar.google.com/&scioq=SpeAr:+A+Spectral+Approach+for+Zero-Shot+Node+Classification&hl=en&as_sdt=0,11", "gs_version_total": 0, "email": "nuc.edu.cn;sxu.edu.cn;sxu.edu.cn;nuc.edu.cn;nuc.edu.cn", "author_num": 5, "aff_unique_index": "0;1;1;0;0", "aff_unique_norm": "North University of China;Shanxi University", "aff_unique_dep": ";", "aff_unique_url": "http://www.nuc.edu.cn;http://www.sxu.edu.cn", "aff_unique_abbr": "NUC;SXU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Generating Origin-Destination Matrices in Neural Spatial Interaction Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94266", "id": "eUcyIe1AzY", "proceeding": "", "pdf": "https://openreview.net/pdf?id=eUcyIe1AzY", "openreview": "https://openreview.net/forum?id=eUcyIe1AzY", "poster": "/media/PosterPDFs/NeurIPS%202024/94266.png?t=1729087386.8718219", "project": "", "author_site": "Ioannis Zachos, Mark Girolami, Theodoros Damoulas", "tldr": "", "abstract": "Agent-based models (ABMs) are proliferating as decision-making tools across policy areas in transportation, economics, and epidemiology. In these models, a central object of interest is the discrete origin-destination matrix which captures spatial interactions and agent trip counts between locations. Existing approaches resort to continuous approximations of this matrix and subsequent ad-hoc discretisations in order to perform ABM simulation and calibration. This impedes conditioning on partially observed summary statistics, fails to explore the multimodal matrix distribution over a discrete combinatorial support, and incurs discretisation errors. To address these challenges, we introduce a computationally efficient framework that scales linearly with the number of origin-destination pairs, operates directly on the discrete combinatorial space, and learns the agents' trip intensity through a neural differential equation that embeds spatial interactions. Our approach outperforms the prior art in terms of reconstruction error and ground truth matrix coverage, at a fraction of the computational cost. We demonstrate these benefits in two large-scale spatial mobility ABMs in Washington, DC and Cambridge, UK.", "keywords": "multiagent systems;neural differential equations;contingency tables;agent-based modelling;markov bases;origin-destination matrix", "primary_area": "machine_learning_for_social_sciences", "supplementary_material": "/attachment/e985180efda03095a41b9980651ff7479189b090.zip", "author": "Ioannis Zachos;Mark Girolami;Theodoros Damoulas", "authorids": "~Ioannis_Zachos1;~Mark_Girolami2;~Theodoros_Damoulas1", "gender": "M;Not Specified;M", "homepage": "https://yannisza.github.io/;http://www.eng.cam.ac.uk/profiles/mag92;https://warwick.ac.uk/fac/sci/statistics/staff/academic-research/damoulas/", "dblp": ";g/MarkAGirolami.html;31/1929", "google_scholar": "GfymnikAAAAJ;https://scholar.google.co.uk/citations?hl=en;https://scholar.google.co.uk/citations?user=sRg-VkwAAAAJ", "orcid": "0000-0002-7503-2117;;", "linkedin": "izachos/;;", "or_profile": "~Ioannis_Zachos1;~Mark_Girolami2;~Theo_Damoulas1", "aff": "University of Cambridge;University of Cambridge;University of Warwick", "aff_domain": "cam.ac.uk;cam.ac.uk;warwick.ac.uk", "position": "PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nzachos2024generating,\ntitle={Generating Origin-Destination Matrices in Neural Spatial Interaction Models},\nauthor={Ioannis Zachos and Mark Girolami and Theodoros Damoulas},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=eUcyIe1AzY}\n}", "github": "", "reviewers": "dVmA;qQL1;C8eo;h7J4;C3cp;sr5H", "pdf_size": 1213876, "rating": "5;5;6;6;6;6", "confidence": "5;1;1;2;3;3", "soundness": "3;2;3;3;3;3", "novelty": "3;2;3;3;3;3", "presentation": "3;2;2;3;2;3", "wc_summary": "94;80;77;59;80;79", "wc_strengths": "111;71;58;72;78;101", "wc_weaknesses": "88;528;60;154;117;309", "wc_questions": "73;22;52;40;70;72", "wc_limitations": "1;5;3;21;1;102", "wc_review": "367;706;250;346;346;663", "wc_reply_reviewers": "10;15;16;0;13;59", "wc_reply_authors": "0;0;0;0;0;0", "reply_reviewers": "1;1;1;0;1;1", "reply_authors": "1;1;1;1;1;1", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 2.5, 1.3844373104863459 ], "soundness_avg": [ 2.8333333333333335, 0.3726779962499649 ], "novelty_avg": [ 2.8333333333333335, 0.3726779962499649 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 78.16666666666667, 10.221165404307976 ], "wc_strengths_avg": [ 81.83333333333333, 18.324998105199235 ], "wc_weaknesses_avg": [ 209.33333333333334, 163.34591788253812 ], "wc_questions_avg": [ 54.833333333333336, 18.97732565165305 ], "wc_limitations_avg": [ 22.166666666666668, 36.3612278242758 ], "wc_review_avg": [ 446.3333333333333, 172.88981719259493 ], "wc_reply_reviewers_avg": [ 18.833333333333332, 18.72090334952409 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8333333333333334, 0.37267799624996495 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.25537695922762454, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13036075616619738350&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "cam.ac.uk;cam.ac.uk;warwick.ac.uk", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Cambridge;University of Warwick", "aff_unique_dep": ";", "aff_unique_url": "https://www.cam.ac.uk;https://www.warwick.ac.uk", "aff_unique_abbr": "Cambridge;Warwick", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "CountGD: Multi-Modal Open-World Counting", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94265", "id": "eUg64OsGDE", "proceeding": "", "pdf": "https://openreview.net/pdf?id=eUg64OsGDE", "openreview": "https://openreview.net/forum?id=eUg64OsGDE", "poster": "/media/PosterPDFs/NeurIPS%202024/94265.png?t=1733576281.3782125", "project": "", "author_site": "Niki Amini-Naieni, Tengda Han, Andrew Zisserman", "tldr": "", "abstract": "The goal of this paper is to improve the generality and accuracy of open-vocabulary object counting in images. To improve the generality, we repurpose an open-vocabulary detection foundation model (GroundingDINO) for the counting task, and also extend its capabilities by introducing modules to enable specifying the target object to count by visual exemplars. In turn, these new capabilities -- being able to specify the target object by multi-modalites (text and exemplars) -- lead to an improvement in counting accuracy. We make three contributions: First, we introduce the first open-world counting model, CountGD, where the prompt can be specified by a text description or visual exemplars or both; Second, we show that the performance of the model significantly improves the state of the art on multiple counting benchmarks -- when using text only, CountGD outperforms all previous text-only works, and when using both text and visual exemplars, we outperform all previous models; Third, we carry out a preliminary study into different interactions between the text and visual exemplar prompts, including the cases where they reinforce each other and where one restricts the other. The code and an app to test the model are available at https://www.robots.ox.ac.uk/vgg/research/countgd/.", "keywords": "multi-modal open-world counting;vision-language foundation model;open-world object counting;class-agnostic counting;text-specified counting", "primary_area": "machine_vision", "supplementary_material": "", "author": "Niki Amini-Naieni;Tengda Han;Andrew Zisserman", "authorids": "~Niki_Amini-Naieni2;~Tengda_Han1;~Andrew_Zisserman1", "gender": "F;M;", "homepage": ";https://tengdahan.github.io/;", "dblp": "348/8890;203/8188;", "google_scholar": ";https://scholar.google.co.uk/citations?user=SbAuWREAAAAJ;", "orcid": ";0000-0002-1874-9664;", "linkedin": "niki-amini-naieni/;;", "or_profile": "~Niki_Amini-Naieni2;~Tengda_Han1;~Andrew_Zisserman1", "aff": "University of Oxford;University of Oxford;", "aff_domain": "eng.ox.ac.uk;robots.ox.ac.uk;", "position": "PhD student;Postdoc;", "bibtex": "@inproceedings{\namini-naieni2024countgd,\ntitle={Count{GD}: Multi-Modal Open-World Counting},\nauthor={Niki Amini-Naieni and Tengda Han and Andrew Zisserman},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=eUg64OsGDE}\n}", "github": "", "reviewers": "QCsU;44bk;Jn2T", "pdf_size": 5522028, "rating": "4;5;7", "confidence": "4;4;4", "soundness": "3;3;3", "novelty": "2;3;4", "presentation": "4;3;3", "wc_summary": "99;73;56", "wc_strengths": "70;57;84", "wc_weaknesses": "183;393;38", "wc_questions": "193;47;51", "wc_limitations": "6;10;1", "wc_review": "551;580;230", "wc_reply_reviewers": "149;134;127", "wc_reply_authors": "141;312;278", "reply_reviewers": "1;2;2", "reply_authors": "2;2;2", "rating_avg": [ 5.333333333333333, 1.247219128924647 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 76.0, 17.682382946499793 ], "wc_strengths_avg": [ 70.33333333333333, 11.025223605694151 ], "wc_weaknesses_avg": [ 204.66666666666666, 145.73568159132782 ], "wc_questions_avg": [ 97.0, 67.90189000805992 ], "wc_limitations_avg": [ 5.666666666666667, 3.6817870057290873 ], "wc_review_avg": [ 453.6666666666667, 158.59872494933734 ], "wc_reply_reviewers_avg": [ 136.66666666666666, 9.177266598624136 ], "wc_reply_authors_avg": [ 243.66666666666666, 73.91135832122751 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17200966682726035594&as_sdt=8005&sciodt=0,7&hl=en", "gs_version_total": 9, "email": "eng.ox.ac.uk;robots.ox.ac.uk;", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "University of Oxford", "aff_unique_dep": "", "aff_unique_url": "https://www.ox.ac.uk", "aff_unique_abbr": "Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "title": "The Expressive Capacity of State Space Models: A Formal Language Perspective", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94264", "id": "eV5YIrJPdy", "proceeding": "", "pdf": "https://openreview.net/pdf?id=eV5YIrJPdy", "openreview": "https://openreview.net/forum?id=eV5YIrJPdy", "poster": "/media/PosterPDFs/NeurIPS%202024/94264.png?t=1732561180.232848", "project": "", "author_site": "Yash Sarrof, Yana Veitsman, Michael Hahn", "tldr": "", "abstract": "Recently, recurrent models based on linear state space models (SSMs) have shown promising performance in language modeling (LM), competititve with transformers. However, there is little understanding of the in-principle abilities of such models, which could provide useful guidance to the search for better LM architectures. We present a comprehensive theoretical study of the capacity of such SSMs as it compares to that of transformers and traditional RNNs. We find that SSMs and transformers have overlapping but distinct strengths. In star-free state tracking, SSMs implement straightforward and exact solutions to problems that transformers struggle to represent exactly. They can also model bounded hierarchical structure with optimal memory even without simulating a stack. On the other hand, we identify a design choice in current SSMs that limits their expressive power. We discuss implications for SSM and LM research, and verify results empirically on a recent SSM, Mamba.", "keywords": "state-space models;formal languages;expressivity;theory", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/0023e5cee821c211e97ce5f3d8faf87aaf42fb33.zip", "author": "Yash Sarrof;Yana Veitsman;Michael Hahn", "authorids": "~Yash_Sarrof1;~Yana_Veitsman1;~Michael_Hahn1", "gender": "F;M;M", "homepage": ";https://www.mhahn.info/;https://yashsarrof.com", "dblp": "378/1390;https://dblp.uni-trier.de/pid/44/9903;378/0980", "google_scholar": "3j3Myg0AAAAJ;;flWrpAoAAAAJ", "orcid": ";;", "linkedin": "yveitsman/;;yash-raj-sarrof-094364142/", "or_profile": "~Yana_Veitsman1;~Michael_Hahn1;~Yash_Raj_Sarrof1", "aff": "Universit\u00e4t des Saarlandes;Universit\u00e4t des Saarlandes;Universit\u00e4t des Saarlandes", "aff_domain": "uni-saarland.de;uni-saarland.de;uni-saarland.de", "position": "MS student;Assistant Professor;MS student", "bibtex": "@inproceedings{\nsarrof2024the,\ntitle={The Expressive Capacity of State Space Models: A Formal Language Perspective},\nauthor={Yash Sarrof and Yana Veitsman and Michael Hahn},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=eV5YIrJPdy}\n}", "github": "", "reviewers": "px5T;9mjL;6tRo;tByQ", "pdf_size": 679823, "rating": "6;7;7;8", "confidence": "3;4;2;5", "soundness": "2;3;3;3", "novelty": "2;4;3;4", "presentation": "3;2;2;4", "wc_summary": "32;202;250;107", "wc_strengths": "35;161;37;119", "wc_weaknesses": "251;399;112;140", "wc_questions": "94;119;86;99", "wc_limitations": "23;76;1;4", "wc_review": "435;957;486;469", "wc_reply_reviewers": "87;97;49;96", "wc_reply_authors": "56;111;43;28", "reply_reviewers": "2;2;1;1", "reply_authors": "3;3;2;2", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 147.75, 84.34564304100124 ], "wc_strengths_avg": [ 88.0, 54.08326913195984 ], "wc_weaknesses_avg": [ 225.5, 112.85499545877444 ], "wc_questions_avg": [ 99.5, 12.175795661885921 ], "wc_limitations_avg": [ 26.0, 30.074906483645133 ], "wc_review_avg": [ 586.75, 214.55113026968652 ], "wc_reply_reviewers_avg": [ 82.25, 19.587942719948924 ], "wc_reply_authors_avg": [ 59.5, 31.34086788842964 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.6324555320336759, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2179199620311798873&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "uni-saarland.de;uni-saarland.de;uni-saarland.de", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Universit\u00e4t des Saarlandes", "aff_unique_dep": "", "aff_unique_url": "https://www.uni-saarland.de", "aff_unique_abbr": "UDS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "id": "eVAzmJgrAc", "title": "Lost in Translation: Benchmarking Commercial Machine Translation Models for Dyslexic-Style Text", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "Dyslexia is a neurodivergence that impacts one's ability to process and produce textual information. While previous research has identified unique patterns in the writings of people with dyslexia - such as letter swapping and homophone confusion - that differ themselves from the text typically used in the training and evaluation of common natural language processing (NLP) systems such as machine translation (MT), it is unclear how current state-of-the-art NLP systems perform for users with dyslexia. In this work, we explore this topic through a systematic audit of the performance of commercial MT services using synthetic dyslexia data. By injecting common dyslexia-style writing errors into popular benchmarking datasets, we benchmark the performance of three commercial MT services and one large language model (LLM) with various types and quantities of dyslexia-style errors and show a substantial disparity in MT quality for dyslexic and non-dyslexic text. While people with dyslexia often rely on modern NLP tools as assistive technologies, our results shed light on the fairness challenges experienced by this demographic with popular NLP services, highlighting the need to develop more inclusive and equitable NLP models for users with diverse language use patterns.", "keywords": "NLP;MT;Dyslexia;Disparities;LLM", "primary_area": "", "supplementary_material": "/attachment/5f9f659ac1ea65e73e7484bfa6569a702a8b0e8f.pdf", "author": "Gregory Price;Shaomei Wu", "authorids": "~Gregory_Price2;~Shaomei_Wu1", "gender": ";", "homepage": ";https://www.shaomei.info/", "dblp": ";32/5316", "google_scholar": ";Y0xVWqYAAAAJ", "orcid": ";", "linkedin": "greg-price-4-hire/;shaomei/", "or_profile": "~Gregory_Price2;~Shaomei_Wu1", "aff": "University of Ottawa;AImpower.org", "aff_domain": "uottawa.ca;aimpower.org", "position": "MS student;Founder and CEO", "bibtex": "@misc{\nanonymous2024lost,\ntitle={Lost in Translation: Benchmarking Commercial Machine Translation Models for Dyslexic-Style Text},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=eVAzmJgrAc}\n}", "github": "", "project": "", "reviewers": "UQGJ;BrsV;ix6v", "site": "https://openreview.net/forum?id=eVAzmJgrAc", "pdf_size": 591363, "rating": "2;4;4", "confidence": "5;4;4", "wc_summary_and_contributions": "54;153;50", "wc_strengths": "13;109;11", "wc_improvement": "117;211;91", "wc_limitations": "5;1;2", "wc_correctness": "10;32;2", "wc_clarity": "7;8;2", "wc_relation_to_prior_work": "8;46;2", "wc_documentation": "10;12;2", "wc_additional_feedback": "1;1;1", "wc_review": "225;573;163", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "rating_avg": [ 3.3333333333333335, 0.9428090415820634 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 85.66666666666667, 47.6398525979621 ], "wc_strengths_avg": [ 44.333333333333336, 45.73352769637999 ], "wc_improvement_avg": [ 139.66666666666666, 51.54501808020722 ], "wc_limitations_avg": [ 2.6666666666666665, 1.699673171197595 ], "wc_correctness_avg": [ 14.666666666666666, 12.684198393626966 ], "wc_clarity_avg": [ 5.666666666666667, 2.6246692913372702 ], "wc_relation_to_prior_work_avg": [ 18.666666666666668, 19.48218559493661 ], "wc_documentation_avg": [ 8.0, 4.320493798938574 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 320.3333333333333, 180.44636014308765 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:fVeNWuUw3b8J:scholar.google.com/&scioq=Lost+in+Translation:+Benchmarking+Commercial+Machine+Translation+Models+for+Dyslexic-Style+Text&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1", "aff_unique_norm": "University of Ottawa;AImpower", "aff_unique_dep": ";", "aff_unique_url": "https://www.uottawa.ca;https://www.aimpower.org", "aff_unique_abbr": "U Ottawa;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Canada;United States" }, { "title": "Statistical Efficiency of Distributional Temporal Difference Learning", "status": "Oral", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94263", "id": "eWUM5hRYgH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=eWUM5hRYgH", "openreview": "https://openreview.net/forum?id=eWUM5hRYgH", "poster": "/media/PosterPDFs/NeurIPS%202024/94263.png?t=1729599214.8290908", "project": "", "author_site": "Yang Peng, Liangyu Zhang, Zhihua Zhang", "tldr": "", "abstract": "Distributional reinforcement learning (DRL) has achieved empirical success in various domains.\nOne of the core tasks in the field of DRL is distributional policy evaluation, which involves estimating the return distribution $\\eta^\\pi$ for a given policy $\\pi$.\nThe distributional temporal difference learning has been accordingly proposed, which\nis an extension of the temporal difference learning (TD) in the classic RL area.\nIn the tabular case, Rowland et al. [2018] and Rowland et al. [2023] proved the asymptotic convergence of two instances of distributional TD, namely categorical temporal difference learning (CTD) and quantile temporal difference learning (QTD), respectively.\nIn this paper, we go a step further and analyze the finite-sample performance of distributional TD.\nTo facilitate theoretical analysis, we propose a non-parametric distributional TD learning (NTD).\nFor a $\\gamma$-discounted infinite-horizon tabular Markov decision process,\nwe show that for NTD we need $\\widetilde O\\left(\\frac{1}{\\varepsilon^{2p}(1-\\gamma)^{2p+1}}\\right)$ iterations to achieve an $\\varepsilon$-optimal estimator with high probability, when the estimation error is measured by the $p$-Wasserstein distance.\nThis sample complexity bound is minimax optimal (up to logarithmic factors) in the case of the $1$-Wasserstein distance.\nTo achieve this, we establish a novel Freedman's inequality in Hilbert spaces, which would be of independent interest.\nIn addition, we revisit CTD, showing that the same non-asymptotic convergence bounds hold for CTD in the case of the $p$-Wasserstein distance.", "keywords": "Distributional Reinforcement Learning;Distributional Temporal Difference Learning;Sample Complexity", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Yang Peng;Liangyu Zhang;Zhihua Zhang", "authorids": "~Yang_Peng1;~Liangyu_Zhang2;~Zhihua_Zhang1", "gender": "M;M;M", "homepage": ";https://zhangliangyu32.github.io/;http://www.math.pku.edu.cn/teachers/zhzhang/", "dblp": ";123/7110;52/5331", "google_scholar": "6OlQcpEAAAAJ;rmjtiikAAAAJ;", "orcid": "0000-0002-0063-3102;;", "linkedin": ";;", "or_profile": "~Yang_Peng1;~Liangyu_Zhang2;~Zhihua_Zhang1", "aff": "Peking University;Peking University;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;pku.edu.cn", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\npeng2024statistical,\ntitle={Statistical Efficiency of Distributional Temporal Difference Learning},\nauthor={Yang Peng and Liangyu Zhang and Zhihua Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=eWUM5hRYgH}\n}", "github": "", "reviewers": "KRno;CFYY;bMoZ", "pdf_size": 490152, "rating": "3;7;7", "confidence": "4;3;3", "soundness": "3;3;3", "novelty": "1;3;3", "presentation": "2;3;2", "wc_summary": "78;65;73", "wc_strengths": "31;78;48", "wc_weaknesses": "244;95;260", "wc_questions": "24;52;121", "wc_limitations": "26;11;4", "wc_review": "403;301;506", "wc_reply_reviewers": "0;135;41", "wc_reply_authors": "0;226;39", "reply_reviewers": "0;2;1", "reply_authors": "1;3;2", "rating_avg": [ 5.666666666666667, 1.8856180831641267 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.3333333333333335, 0.9428090415820634 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 72.0, 5.354126134736337 ], "wc_strengths_avg": [ 52.333333333333336, 19.430788855719562 ], "wc_weaknesses_avg": [ 199.66666666666666, 74.29819797425925 ], "wc_questions_avg": [ 65.66666666666667, 40.762182909598394 ], "wc_limitations_avg": [ 13.666666666666666, 9.177266598624136 ], "wc_review_avg": [ 403.3333333333333, 83.691231453613 ], "wc_reply_reviewers_avg": [ 58.666666666666664, 56.51155240794183 ], "wc_reply_authors_avg": [ 88.33333333333333, 98.63850949581283 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11612540020065005444&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "pku.edu.cn;pku.edu.cn;pku.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Exploring Token Pruning in Vision State Space Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94262", "id": "eWiGn0Fcdx", "proceeding": "", "pdf": "https://openreview.net/pdf?id=eWiGn0Fcdx", "openreview": "https://openreview.net/forum?id=eWiGn0Fcdx", "poster": "", "project": "", "author_site": "Zheng Zhan, Zhenglun Kong, Yifan Gong, Yushu Wu, Zichong Meng, Hangyu Zheng, Xuan Shen, Stratis Ioannidis, Wei Niu, Pu Zhao, Yanzhi Wang", "tldr": "", "abstract": "State Space Models (SSMs) have the advantage of keeping linear computational complexity compared to attention modules in transformers, and have been applied to vision tasks as a new type of powerful vision foundation model. Inspired by the observations that the final prediction in vision transformers (ViTs) is only based on a subset of most informative tokens, we take the novel step of enhancing the efficiency of SSM-based vision models through token-based pruning. However, direct applications of existing token pruning techniques designed for ViTs fail to deliver good performance, even with extensive fine-tuning. To address this issue, we revisit the unique computational characteristics of SSMs and discover that naive application disrupts the sequential token positions. This insight motivates us to design a novel and general token pruning method specifically for SSM-based vision models. We first introduce a pruning-aware hidden state alignment method to stabilize the neighborhood of remaining tokens for performance enhancement. Besides, based on our detailed analysis, we propose a token importance evaluation method adapted for SSM models, to guide the token pruning. With efficient implementation and practical acceleration methods, our method brings actual speedup. Extensive experiments demonstrate that our approach can achieve significant computation reduction with minimal impact on performance across different tasks. Notably, we achieve 81.7\\% accuracy on ImageNet with a 41.6\\% reduction in the FLOPs for pruned PlainMamba-L3. Furthermore, our work provides deeper insights into understanding the behavior of SSM-based vision models for future research.", "keywords": "State Space Models;Token pruning;Efficiency;Interpretability", "primary_area": "machine_vision", "supplementary_material": "", "author": "Zheng Zhan;Zhenglun Kong;Yifan Gong;Yushu Wu;Zichong Meng;Hangyu Zheng;Xuan Shen;Stratis Ioannidis;Wei Niu;Pu Zhao;Yanzhi Wang", "authorids": "~Zheng_Zhan3;~Zhenglun_Kong1;~Yifan_Gong2;~Yushu_Wu1;~Zichong_Meng1;~Hangyu_Zheng1;~Xuan_Shen1;~Stratis_Ioannidis1;~Wei_Niu3;~Pu_Zhao1;~Yanzhi_Wang3", "gender": ";M;F;M;M;M;M;M;M;M;M", "homepage": ";https://sites.google.com/husky.neu.edu/zlk/home?authuser=1;https://yifanfanfanfan.github.io/;https://wuyushuwys.github.io;https://cr8br0ze.github.io/;;https://shawnricecake.github.io/;https://ece.northeastern.edu/fac-ece/ioannidis/;https://www.niuwei.info;https://puzhao.info/;https://web.northeastern.edu/yanzhiwang/", "dblp": "156/4008-1.html;211/6323;49/3073-4.html;166/4244;372/0186;;;42/6940;68/828-2.html;75/8475-1;", "google_scholar": "hwTuEX0AAAAJ;XYa4NVYAAAAJ;U_gevVgAAAAJ;https://scholar.google.com/citations?hl=en;HMQjIWoAAAAJ;;Pvj14ZUAAAAJ;GPIB5kUAAAAJ;w1RoaOMAAAAJ;rWZLnpwAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0002-8120-4456;0000-0002-3912-097X;0000-0001-9883-7973;0009-0003-2051-2572;0009-0004-5972-2929;;0000-0001-8355-4751;;0000-0001-5018-2859;", "linkedin": ";zhenglun-kong-35b527150/;yifan-gong-3059b8132/;;;;;stratis-ioannidis-87b826110;;;", "or_profile": "~Zheng_Zhan3;~Zhenglun_Kong1;~Yifan_Gong2;~Yushu_Wu1;~Zichong_Meng1;~Hangyu_Zheng1;~Xuan_Shen1;~Stratis_Ioannidis1;~Wei_Niu3;~Pu_Zhao1;~Yanzhi_Wang3", "aff": "Northeastern University;Northeastern University;Northeastern University;Northeastern University;Northeastern University;University of Georgia;Northeastern University;Meta, Inc.;University of Georgia;Northeastern University;Northeastern University", "aff_domain": "neu.edu;northeastern.edu;neu.edu;northeastern.edu;northeastern.edu;uga.edu;neu.edu;meta.com;uga.edu;neu.edu;northeastern.edu", "position": "PhD student;PhD student;PhD student;PhD student;MS student;PhD student;PhD student;Researcher;Assistant Professor;Researcher;Associate Professor", "bibtex": "@inproceedings{\nzhan2024exploring,\ntitle={Exploring Token Pruning in Vision State Space Models},\nauthor={Zheng Zhan and Zhenglun Kong and Yifan Gong and Yushu Wu and Zichong Meng and Hangyu Zheng and Xuan Shen and Stratis Ioannidis and Wei Niu and Pu Zhao and Yanzhi Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=eWiGn0Fcdx}\n}", "github": "", "reviewers": "6myn;TrFe;kiGk;g8vG;M2Lk", "pdf_size": 1221532, "rating": "4;5;5;6;6", "confidence": "4;3;4;4;4", "soundness": "3;2;3;3;3", "novelty": "2;3;3;2;3", "presentation": "3;3;3;3;3", "wc_summary": "88;112;85;82;59", "wc_strengths": "49;70;68;73;40", "wc_weaknesses": "108;140;256;11;4", "wc_questions": "101;82;31;11;62", "wc_limitations": "8;7;18;6;1", "wc_review": "354;411;458;183;166", "wc_reply_reviewers": "244;20;0;0;31", "wc_reply_authors": "943;58;68;71;63", "reply_reviewers": "2;1;0;0;1", "reply_authors": "4;2;2;2;2", "rating_avg": [ 5.2, 0.7483314773547882 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 85.2, 16.868906307167638 ], "wc_strengths_avg": [ 60.0, 13.069047402163633 ], "wc_weaknesses_avg": [ 103.8, 92.80603428656995 ], "wc_questions_avg": [ 57.4, 32.793901872146904 ], "wc_limitations_avg": [ 8.0, 5.549774770204643 ], "wc_review_avg": [ 314.4, 119.00352935942699 ], "wc_reply_reviewers_avg": [ 59.0, 93.26521323623294 ], "wc_reply_authors_avg": [ 240.6, 351.2279032195477 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 2.4, 0.8000000000000002 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.13363062095621225, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15022575789971339082&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "neu.edu;northeastern.edu;neu.edu;northeastern.edu;northeastern.edu;uga.edu;neu.edu;meta.com;uga.edu;neu.edu;northeastern.edu", "author_num": 11, "aff_unique_index": "0;0;0;0;0;1;0;2;1;0;0", "aff_unique_norm": "Northeastern University;University of Georgia;Meta", "aff_unique_dep": ";;Meta Platforms, Inc.", "aff_unique_url": "https://www.northeastern.edu;https://www.uga.edu;https://www.meta.com", "aff_unique_abbr": "NEU;UGA;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Active Learning of General Halfspaces: Label Queries vs Membership Queries", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94261", "id": "eXNyq8FGSz", "proceeding": "", "pdf": "https://openreview.net/pdf?id=eXNyq8FGSz", "openreview": "https://openreview.net/forum?id=eXNyq8FGSz", "poster": "", "project": "", "author_site": "Ilias Diakonikolas, Daniel Kane, Mingchen Ma", "tldr": "", "abstract": "We study the problem of learning general (i.e., not necessarily homogeneous) \nhalfspaces under the Gaussian distribution on $\\mathbb{R}^d$ \nin the presence of some form of query access. \nIn the classical pool-based active learning model, where the algorithm is\nallowed to make adaptive label queries to previously sampled points, \nwe establish a strong information-theoretic lower bound ruling out non-trivial\nimprovements over the passive setting. Specifically, we show that\nany active learner requires label complexity of \n$\\tilde{\\Omega}(d/(\\log(m)\\epsilon))$, where $m$ is the number of unlabeled examples. \nSpecifically, to beat the passive label complexity of $\\tilde{O}(d/\\epsilon)$, \nan active learner requires a pool of $2^{\\mathrm{poly}(d)}$ unlabeled samples.\nOn the positive side, we show that this lower bound \ncan be circumvented with membership query access, \neven in the agnostic model. Specifically, we give a computationally efficient \nlearner with query complexity of $\\tilde{O}(\\min(1/p, 1/\\epsilon) + d\\mathrm{polylog}(1/\\epsilon))$\nachieving error guarantee of $O(\\mathrm{opt}+\\epsilon)$. Here $p \\in [0, 1/2]$ \nis the bias and $\\mathrm{opt}$ is the 0-1 loss of the optimal halfspace. \nAs a corollary, we obtain a strong separation \nbetween the active and membership query models. \nTaken together, our results characterize the complexity of learning \ngeneral halfspaces under Gaussian marginals in these models.", "keywords": "Active Learning;Membership Query;Linear Separator", "primary_area": "learning_theory", "supplementary_material": "", "author": "Ilias Diakonikolas;Daniel Kane;Mingchen Ma", "authorids": "~Ilias_Diakonikolas1;~Daniel_Kane1;~Mingchen_Ma1", "gender": "M;M;", "homepage": "http://www.iliasdiakonikolas.org/;http://cseweb.ucsd.edu/~dakane/;https://mmingchen.github.io/", "dblp": "d/IliasDiakonikolas;52/6817;270/6320", "google_scholar": "Vb3FLmkAAAAJ;https://scholar.google.com.tw/citations?user=DulpV-cAAAAJ;w84UnLsAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Ilias_Diakonikolas1;~Daniel_Kane1;~Mingchen_Ma1", "aff": "University of Wisconsin - Madison;University of California, San Diego;University of Wisconsin - Madison", "aff_domain": "wisc.edu;ucsd.edu;wisc.edu", "position": "Full Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\ndiakonikolas2024active,\ntitle={Active Learning of General Halfspaces: Label Queries vs Membership Queries},\nauthor={Ilias Diakonikolas and Daniel Kane and Mingchen Ma},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=eXNyq8FGSz}\n}", "github": "", "reviewers": "goh3;dMkP;vQ1c;88gR;amzj", "pdf_size": 466346, "rating": "5;6;7;7;7", "confidence": "3;3;4;2;3", "soundness": "3;3;4;3;3", "novelty": "2;2;3;3;3", "presentation": "2;2;3;2;3", "wc_summary": "121;137;73;83;62", "wc_strengths": "28;37;103;61;86", "wc_weaknesses": "234;52;309;14;93", "wc_questions": "113;35;86;56;1", "wc_limitations": "1;2;12;1;1", "wc_review": "497;263;583;215;243", "wc_reply_reviewers": "95;8;9;12;13", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.4, 0.7999999999999999 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 95.2, 28.833314065504158 ], "wc_strengths_avg": [ 63.0, 28.40422503783548 ], "wc_weaknesses_avg": [ 140.4, 112.45016674065005 ], "wc_questions_avg": [ 58.2, 38.96357273146291 ], "wc_limitations_avg": [ 3.4, 4.317406628984581 ], "wc_review_avg": [ 360.2, 150.08051172620648 ], "wc_reply_reviewers_avg": [ 27.4, 33.850258492366045 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14436106226491989418&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "wisc.edu;ucsd.edu;wisc.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Wisconsin-Madison;University of California, San Diego", "aff_unique_dep": ";", "aff_unique_url": "https://www.wisc.edu;https://www.ucsd.edu", "aff_unique_abbr": "UW-Madison;UCSD", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Madison;San Diego", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Navigating the Effect of Parametrization for Dimensionality Reduction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94260", "id": "eYNYnYle41", "proceeding": "", "pdf": "https://openreview.net/pdf?id=eYNYnYle41", "openreview": "https://openreview.net/forum?id=eYNYnYle41", "poster": "/media/PosterPDFs/NeurIPS%202024/94260.png?t=1733173574.658144", "project": "", "author_site": "Haiyang Huang, Yingfan Wang, Cynthia Rudin", "tldr": "", "abstract": "Parametric dimensionality reduction methods have gained prominence for their ability to generalize to unseen datasets, an advantage that traditional non-parametric approaches typically lack. Despite their growing popularity, there remains a prevalent misconception among practitioners about the equivalence in performance between parametric and non-parametric methods. Here, we show that these methods are not equivalent -- parametric methods retain global structure but lose significant local details. To explain this, we provide evidence that parameterized approaches lack the ability to repulse negative samples, and the choice of loss function also has an impact.\nAddressing these issues, we developed a new parametric method, ParamRepulsor, that incorporates Hard Negative Mining and a loss function that applies a strong repulsive force. This new method achieves state-of-the-art performance on local structure preservation for parametric methods without sacrificing the fidelity of global structural representation. Our code is available at https://github.com/hyhuang00/ParamRepulsor.", "keywords": "Data visualization;Dimensionality reduction", "primary_area": "interpretability_and_explainability", "supplementary_material": "/attachment/d94695d9084286a881d763629b718b3b3b6f39e3.zip", "author": "Haiyang Huang;Yingfan Wang;Cynthia Rudin", "authorids": "~Haiyang_Huang2;~Yingfan_Wang1;~Cynthia_Rudin1", "gender": ";F;", "homepage": ";https://www.linkedin.com/in/yingfan-wang-2387421a7/;", "dblp": ";224/0553.html;", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;", "orcid": ";0000-0003-3456-4149;", "linkedin": ";;", "or_profile": "~Haiyang_Huang2;~Yingfan_Wang1;~Cynthia_Rudin1", "aff": ";Duke University;", "aff_domain": ";duke.edu;", "position": ";PhD student;", "bibtex": "@inproceedings{\nhuang2024navigating,\ntitle={Navigating the Effect of Parametrization for Dimensionality Reduction},\nauthor={Haiyang Huang and Yingfan Wang and Cynthia Rudin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=eYNYnYle41}\n}", "github": "", "reviewers": "bihR;QK23;9imm;Tz9e", "pdf_size": 17583864, "rating": "2;6;6;6", "confidence": "5;3;2;4", "soundness": "1;3;3;3", "novelty": "1;2;3;3", "presentation": "3;3;3;3", "wc_summary": "48;161;65;107", "wc_strengths": "31;109;35;76", "wc_weaknesses": "450;260;2;237", "wc_questions": "157;85;20;3", "wc_limitations": "1;162;1;32", "wc_review": "687;777;123;455", "wc_reply_reviewers": "672;20;0;69", "wc_reply_authors": "1960;0;0;33", "reply_reviewers": "2;1;0;1", "reply_authors": "6;1;1;2", "rating_avg": [ 5.0, 1.7320508075688772 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 95.25, 43.61407456314991 ], "wc_strengths_avg": [ 62.75, 31.98730216820418 ], "wc_weaknesses_avg": [ 237.25, 158.99901729256064 ], "wc_questions_avg": [ 66.25, 60.67691076513372 ], "wc_limitations_avg": [ 49.0, 66.45675285477014 ], "wc_review_avg": [ 510.5, 252.69101685655548 ], "wc_reply_reviewers_avg": [ 190.25, 279.26902352391323 ], "wc_reply_authors_avg": [ 498.25, 844.0492802556022 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.5, 2.0615528128088303 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.7745966692414834, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=58492312383489274&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": ";duke.edu;", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "Duke University", "aff_unique_dep": "", "aff_unique_url": "https://www.duke.edu", "aff_unique_abbr": "Duke", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "eYUbBKxABP", "title": "Formalising Anti-Discrimination Law in Automated Decision Systems", "track": "main", "status": "Reject", "tldr": "", "abstract": "We study the legal challenges in automated decision-making by analysing conventional algorithmic fairness approaches and their alignment with anti-discrimination law in the United Kingdom and other jurisdictions based on English common law. By translating principles of anti-discrimination law into a decision-theoretic framework, we formalise discrimination and propose a new, legally informed approach to developing systems for automated decision-making. Our investigation reveals that while algorithmic fairness approaches have adapted concepts from legal theory, they can conflict with legal standards, highlighting the importance of bridging the gap between automated decisions, fairness, and anti-discrimination doctrine.", "keywords": "Law and Machine Learning;Law and AI;Anti-Discrimination Law;Justice;AI Governance;Algorithmic Fairness;Algorithmic Bias", "primary_area": "fairness", "supplementary_material": "", "author": "Holli Sargeant;M\u00e5ns Magnusson", "authorids": "~Holli_Sargeant1;~M\u00e5ns_Magnusson2", "gender": ";M", "homepage": ";http://www.mansmagnusson.com", "dblp": ";119/9862", "google_scholar": ";https://scholar.google.se/citations?user=6AA-AAcAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Holli_Sargeant1;~M\u00e5ns_Magnusson2", "aff": ";Uppsala University", "aff_domain": ";statistik.uu.se", "position": ";Assistant Professor", "bibtex": "@misc{\nanonymous2024formalising,\ntitle={Formalising Anti-Discrimination Law in Automated Decision Systems},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=eYUbBKxABP}\n}", "github": "", "project": "", "reviewers": "4EE7;XoDZ;Z976;iALf;e8Fx", "site": "https://openreview.net/forum?id=eYUbBKxABP", "pdf_size": 351436, "rating": "3;6;7;7;8", "confidence": "2;4;1;4;3", "soundness": "2;3;3;4;4", "novelty": "1;3;3;3;3", "presentation": "2;3;4;3;4", "wc_summary": "118;65;35;582;44", "wc_strengths": "51;106;32;41;114", "wc_weaknesses": "144;263;1;66;126", "wc_questions": "10;44;45;52;54", "wc_limitations": "1;49;1;7;74", "wc_review": "324;527;114;748;412", "wc_reply_reviewers": "13;131;0;349;40", "wc_reply_authors": "0;0;0;245;0", "reply_reviewers": "1;1;0;1;1", "reply_authors": "1;1;1;2;1", "rating_avg": [ 6.2, 1.7204650534085253 ], "confidence_avg": [ 2.8, 1.16619037896906 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 2.6, 0.8000000000000002 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 168.8, 208.5985618358861 ], "wc_strengths_avg": [ 68.8, 34.26601815209932 ], "wc_weaknesses_avg": [ 120.0, 87.33613227067019 ], "wc_questions_avg": [ 41.0, 15.974980438172686 ], "wc_limitations_avg": [ 26.4, 29.810065414218737 ], "wc_review_avg": [ 425.0, 210.6295325921795 ], "wc_reply_reviewers_avg": [ 106.6, 129.5478290053523 ], "wc_reply_authors_avg": [ 49.0, 98.0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.2192993612779475, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13795874545328823650&as_sdt=5,40&sciodt=0,40&hl=en", "gs_version_total": 3, "aff_unique_index": "0", "aff_unique_norm": "Uppsala University", "aff_unique_dep": "", "aff_unique_url": "https://www.uu.se", "aff_unique_abbr": "UU", "aff_country_unique_index": "0", "aff_country_unique": "Sweden" }, { "title": "EgoChoir: Capturing 3D Human-Object Interaction Regions from Egocentric Views", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94259", "id": "ea4oxkiMP7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ea4oxkiMP7", "openreview": "https://openreview.net/forum?id=ea4oxkiMP7", "poster": "/media/PosterPDFs/NeurIPS%202024/94259.png?t=1729415224.7309237", "project": "", "author_site": "Yuhang Yang, Wei Zhai, Chengfeng Wang, Chengjun Yu, Yang Cao, Zheng-Jun Zha", "tldr": "", "abstract": "Understanding egocentric human-object interaction (HOI) is a fundamental aspect of human-centric perception, facilitating applications like AR/VR and embodied AI. For the egocentric HOI, in addition to perceiving semantics e.g., ''what'' interaction is occurring, capturing ''where'' the interaction specifically manifests in 3D space is also crucial, which links the perception and operation. Existing methods primarily leverage observations of HOI to capture interaction regions from an exocentric view. However, incomplete observations of interacting parties in the egocentric view introduce ambiguity between visual observations and interaction contents, impairing their efficacy. From the egocentric view, humans integrate the visual cortex, cerebellum, and brain to internalize their intentions and interaction concepts of objects, allowing for the pre-formulation of interactions and making behaviors even when interaction regions are out of sight. In light of this, we propose harmonizing the visual appearance, head motion, and 3D object to excavate the object interaction concept and subject intention, jointly inferring 3D human contact and object affordance from egocentric videos. To achieve this, we present EgoChoir, which links object structures with interaction contexts inherent in appearance and head motion to reveal object affordance, further utilizing it to model human contact. Additionally, a gradient modulation is employed to adopt appropriate clues for capturing interaction regions across various egocentric scenarios. Moreover, 3D contact and affordance are annotated for egocentric videos collected from Ego-Exo4D and GIMO to support the task. Extensive experiments on them demonstrate the effectiveness and superiority of EgoChoir.", "keywords": "egocentric human-object interaction;3D human contact;3D object affordance;embodied AI", "primary_area": "machine_vision", "supplementary_material": "/attachment/454be871a65519e0501074807c531a957304695f.zip", "author": "Yuhang Yang;Wei Zhai;Chengfeng Wang;Chengjun Yu;Yang Cao;Zheng-Jun Zha", "authorids": "~Yuhang_Yang4;~Wei_Zhai1;~Chengfeng_Wang1;~Chengjun_Yu1;~Yang_Cao5;~Zheng-Jun_Zha2", "gender": "M;M;M;M;M;M", "homepage": "https://yyvhang.github.io/;https://tiaotiao11-22.github.io/wzhai/;https://github.com/wchieffff;;;", "dblp": ";189/3967.html;65/2489;346/6086.html;25/7045-10;23/1818", "google_scholar": "https://scholar.google.com.hk/citations?user=x3aClGEAAAAJ;UI5_qZcAAAAJ;hUne7P4AAAAJ;;K7rTHNcAAAAJ;", "orcid": ";;0009-0004-9127-3158;;;", "linkedin": ";;;chengjun-yu-47490832b/;;", "or_profile": "~Yuhang_Yang4;~Wei_Zhai1;~Chengfeng_Wang1;~Chengjun_Yu1;~Yang_Cao5;~Zheng-Jun_Zha2", "aff": "University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China", "aff_domain": "mail.ustc.edu.cn;ustc.edu.cn;mail.ustc.edu.cn;mail.ustc.edu.cn;ustc.edu.cn;ustc.edu.cn", "position": "MS student;Postdoc;Undergrad student;MS student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nyang2024egochoir,\ntitle={EgoChoir: Capturing 3D Human-Object Interaction Regions from Egocentric Views},\nauthor={Yuhang Yang and Wei Zhai and Chengfeng Wang and Chengjun Yu and Yang Cao and Zheng-Jun Zha},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ea4oxkiMP7}\n}", "github": "", "reviewers": "L6qS;fLMx;PyYF;paDU", "pdf_size": 13237191, "rating": "6;6;6;7", "confidence": "3;4;4;4", "soundness": "3;3;4;3", "novelty": "2;3;3;3", "presentation": "2;3;4;3", "wc_summary": "61;76;28;102", "wc_strengths": "41;124;33;59", "wc_weaknesses": "438;113;49;139", "wc_questions": "48;74;19;6", "wc_limitations": "21;1;6;8", "wc_review": "609;388;135;314", "wc_reply_reviewers": "62;22;8;21", "wc_reply_authors": "67;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "3;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 66.75, 26.75233634656981 ], "wc_strengths_avg": [ 64.25, 35.75874019033668 ], "wc_weaknesses_avg": [ 184.75, 149.8372033241411 ], "wc_questions_avg": [ 36.75, 26.337947907914163 ], "wc_limitations_avg": [ 9.0, 7.3824115301167 ], "wc_review_avg": [ 361.5, 169.93895962962702 ], "wc_reply_reviewers_avg": [ 28.25, 20.253086184579377 ], "wc_reply_authors_avg": [ 16.75, 29.011851026778693 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4616329413602053091&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "mail.ustc.edu.cn;ustc.edu.cn;mail.ustc.edu.cn;mail.ustc.edu.cn;ustc.edu.cn;ustc.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "University of Science and Technology of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ustc.edu.cn", "aff_unique_abbr": "USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Confidence Calibration of Classifiers with Many Classes", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94258", "id": "ebBnKVxMcZ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ebBnKVxMcZ", "openreview": "https://openreview.net/forum?id=ebBnKVxMcZ", "poster": "/media/PosterPDFs/NeurIPS%202024/94258.png?t=1730816885.9417586", "project": "", "author_site": "Adrien Le Coz, St\u00e9phane Herbin, Faouzi Adjed", "tldr": "", "abstract": "For classification models based on neural networks, the maximum predicted class probability is often used as a confidence score. This score rarely predicts well the probability of making a correct prediction and requires a post-processing calibration step. However, many confidence calibration methods fail for problems with many classes. To address this issue, we transform the problem of calibrating a multiclass classifier into calibrating a single surrogate binary classifier. This approach allows for more efficient use of standard calibration methods. We evaluate our approach on numerous neural networks used for image or text classification and show that it significantly enhances existing calibration methods.", "keywords": "Calibration;Classification;Uncertainty quantification", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/2fa4bd2b7851f92bc9b3fd83a8ce675d89a31b3a.zip", "author": "Adrien Le Coz;St\u00e9phane Herbin;Faouzi Adjed", "authorids": "~Adrien_Le_Coz1;~St\u00e9phane_Herbin1;~Faouzi_Adjed1", "gender": "M;M;M", "homepage": ";https://stepherbin.github.io/;", "dblp": "281/7439;49/247;", "google_scholar": "z-0Z-AwAAAAJ;https://scholar.google.fr/citations?user=xap7jEQAAAAJ;XikGL6wAAAAJ", "orcid": ";0000-0002-3341-3018;", "linkedin": ";;", "or_profile": "~Adrien_Le_Coz1;~St\u00e9phane_Herbin1;~Faouzi_Adjed1", "aff": "IRT SystemX;ONERA;SystemX", "aff_domain": "irt-systemx.fr;onera.fr;irt-systemx.fr", "position": "PhD student;Research Scientist;Researcher", "bibtex": "@inproceedings{\ncoz2024confidence,\ntitle={Confidence Calibration of Classifiers with Many Classes},\nauthor={Adrien Le Coz and St{\\'e}phane Herbin and Faouzi Adjed},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ebBnKVxMcZ}\n}", "github": "", "reviewers": "oe96;S8uZ;QaKm;yKjq;RhL1;eq8Z", "pdf_size": 560007, "rating": "3;3;4;6;6;7", "confidence": "5;4;4;4;4;2", "soundness": "1;1;2;3;3;3", "novelty": "1;2;2;3;3;3", "presentation": "1;2;2;3;3;4", "wc_summary": "89;75;34;83;38;305", "wc_strengths": "65;27;19;72;114;132", "wc_weaknesses": "104;326;92;69;131;57", "wc_questions": "2;51;1;6;30;90", "wc_limitations": "2;6;1;10;7;165", "wc_review": "262;485;147;240;320;749", "wc_reply_reviewers": "42;0;32;17;0;62", "wc_reply_authors": "72;343;102;0;0;0", "reply_reviewers": "1;0;1;1;0;1", "reply_authors": "2;2;2;1;1;1", "rating_avg": [ 4.833333333333333, 1.5723301886761005 ], "confidence_avg": [ 3.8333333333333335, 0.8975274678557507 ], "soundness_avg": [ 2.1666666666666665, 0.8975274678557507 ], "novelty_avg": [ 2.3333333333333335, 0.7453559924999298 ], "presentation_avg": [ 2.5, 0.9574271077563381 ], "wc_summary_avg": [ 104.0, 92.34356140702683 ], "wc_strengths_avg": [ 71.5, 41.32291535375176 ], "wc_weaknesses_avg": [ 129.83333333333334, 90.90914267675294 ], "wc_questions_avg": [ 30.0, 32.25419869309007 ], "wc_limitations_avg": [ 31.833333333333332, 59.630575676875324 ], "wc_review_avg": [ 367.1666666666667, 199.00202399864068 ], "wc_reply_reviewers_avg": [ 25.5, 22.433234274174556 ], "wc_reply_authors_avg": [ 86.16666666666667, 121.57907806124463 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.7282956597405348, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8933636249362146791&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 7, "email": "irt-systemx.fr;onera.fr;irt-systemx.fr", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "IRT SystemX;ONERA;SystemX", "aff_unique_dep": ";;", "aff_unique_url": "https://www.irt-systemx.fr;https://www.onera.fr;", "aff_unique_abbr": ";ONERA;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "France;" }, { "title": "Image-aware Evaluation of Generated Medical Reports", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94257", "id": "ecPIg6o84Z", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ecPIg6o84Z", "openreview": "https://openreview.net/forum?id=ecPIg6o84Z", "poster": "", "project": "", "author_site": "Gefen Dawidowicz, Elad Hirsch, Ayellet Tal", "tldr": "", "abstract": "The paper proposes a novel evaluation metric for automatic medical report generation from X-ray images, VLScore. It aims to overcome the limitations of existing evaluation methods, which either focus solely on textual similarities, ignoring clinical aspects, or concentrate only on a single clinical aspect, the pathology, neglecting all other factors. The key idea of our metric is to measure the similarity between radiology reports while considering the corresponding image. We demonstrate the benefit of our metric through evaluation on a dataset where radiologists marked errors in pairs of reports, showing notable alignment with radiologists' judgments. In addition, we provide a new dataset for evaluating metrics. This dataset includes well-designed perturbations that distinguish between significant modifications (e.g., removal of a diagnosis) and insignificant ones. It highlights the weaknesses in current evaluation metrics and provides a clear framework for analysis.", "keywords": "medical report generation;evaluation metric;vision language", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "/attachment/adb193de7ee6ad976034d5eaaf1e01fbbfbee2a1.zip", "author": "Gefen Dawidowicz;Elad Hirsch;Ayellet Tal", "authorids": "~Gefen_Dawidowicz1;~Elad_Hirsch1;~Ayellet_Tal1", "gender": "F;M;F", "homepage": ";;http://webee.technion.ac.il/people/ayellet/", "dblp": "342/8927;265/6032;54/3491", "google_scholar": ";;https://scholar.google.com.tw/citations?user=eFGgX-QAAAAJ", "orcid": ";;", "linkedin": "gefen-dawidowicz;elad-hirsch/;", "or_profile": "~Gefen_Dawidowicz1;~Elad_Hirsch1;~Ayellet_Tal1", "aff": "Technion - Israel Institute of Technology, Technion - Israel Institute of Technology;Technion, Technion;Technion", "aff_domain": "campus.technion.ac.il;technion.ac.il;technion.ac.il", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\ndawidowicz2024imageaware,\ntitle={Image-aware Evaluation of Generated Medical Reports},\nauthor={Gefen Dawidowicz and Elad Hirsch and Ayellet Tal},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ecPIg6o84Z}\n}", "github": "", "reviewers": "VcyL;EkCX;6uVW;F1XY", "pdf_size": 1566608, "rating": "3;4;6;6", "confidence": "4;4;4;4", "soundness": "3;3;3;3", "novelty": "2;2;3;2", "presentation": "3;3;3;3", "wc_summary": "59;106;50;16", "wc_strengths": "35;98;138;81", "wc_weaknesses": "103;173;44;115", "wc_questions": "79;129;52;11", "wc_limitations": "1;11;16;29", "wc_review": "277;517;300;252", "wc_reply_reviewers": "127;86;22;33", "wc_reply_authors": "113;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 4.75, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 57.75, 32.14323412477344 ], "wc_strengths_avg": [ 88.0, 36.93913913452776 ], "wc_weaknesses_avg": [ 108.75, 45.805976684271236 ], "wc_questions_avg": [ 67.75, 42.856592258367904 ], "wc_limitations_avg": [ 14.25, 10.084022015049353 ], "wc_review_avg": [ 336.5, 105.58527359437963 ], "wc_reply_reviewers_avg": [ 67.0, 42.2551771975932 ], "wc_reply_authors_avg": [ 28.25, 48.93043531382078 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13094118641203117745&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 3, "email": "campus.technion.ac.il;technion.ac.il;technion.ac.il", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Technion - Israel Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.technion.ac.il/en/", "aff_unique_abbr": "Technion", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Israel" }, { "title": "CryoGEM: Physics-Informed Generative Cryo-Electron Microscopy", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94256", "id": "edOZifvwMi", "proceeding": "", "pdf": "https://openreview.net/pdf?id=edOZifvwMi", "openreview": "https://openreview.net/forum?id=edOZifvwMi", "poster": "/media/PosterPDFs/NeurIPS%202024/94256.png?t=1730358946.4915245", "project": "", "author_site": "Jiakai Zhang, Qihe Chen, Yan Zeng, Wenyuan Gao, Xuming He, Zhijie Liu, Jingyi Yu", "tldr": "", "abstract": "In the past decade, deep conditional generative models have revolutionized the generation of realistic images, extending their application from entertainment to scientific domains. Single-particle cryo-electron microscopy (cryo-EM) is crucial in resolving near-atomic resolution 3D structures of proteins, such as the SARS-COV-2 spike protein. To achieve high-resolution reconstruction, a comprehensive data processing pipeline has been adopted. However, its performance is still limited as it lacks high-quality annotated datasets for training. To address this, we introduce physics-informed generative cryo-electron microscopy (CryoGEM), which for the first time integrates physics-based cryo-EM simulation with a generative unpaired noise translation to generate physically correct synthetic cryo-EM datasets with realistic noises. Initially, CryoGEM simulates the cryo-EM imaging process based on a virtual specimen. To generate realistic noises, we leverage an unpaired noise translation via contrastive learning with a novel mask-guided sampling scheme. Extensive experiments show that CryoGEM is capable of generating authentic cryo-EM images. The generated dataset can be used as training data for particle picking and pose estimation models, eventually improving the reconstruction resolution.", "keywords": "Image Synthesis;Contrastive Learning;Cryo-EM", "primary_area": "generative_models", "supplementary_material": "", "author": "Jiakai Zhang;Qihe Chen;Yan Zeng;Wenyuan Gao;Xuming He;Zhijie Liu;Jingyi Yu", "authorids": "~Jiakai_Zhang3;~Qihe_Chen2;~Yan_Zeng3;~Wenyuan_Gao1;~Xuming_He3;~Zhijie_Liu3;~Jingyi_Yu5", "gender": "M;M;M;;M;M;M", "homepage": "https://jiakai-zhang.github.io;https://github.com/Dylan8527/Dylan8527.github.io;https://zerone182.github.io;https://github.com/wyllgao;https://faculty.sist.shanghaitech.edu.cn/faculty/hexm/index.html;;", "dblp": "179/2299;;;205/4041;03/4230;;", "google_scholar": "https://scholar.google.com/citations?hl=en;;sR3Nf7YAAAAJ;;0KyeZ2QAAAAJ;;R9L_AfQAAAAJ", "orcid": ";;;;;0000-0001-7279-2893;", "linkedin": "jiakai-zhang-38b8b4217/;;;;;;", "or_profile": "~Jiakai_Zhang3;~Qihe_Chen2;~Yan_Zeng3;~Wenyuan_Gao1;~Xuming_He3;~Zhijie_Liu3;~Jingyi_Yu5", "aff": "ShanghaiTech University;ShanghaiTech University;ShanghaiTech University;ShanghaiTech University;ShanghaiTech University;ShanghaiTech University;ShanghaiTech University", "aff_domain": "shanghaitech.edu.cn;shanghaitech.edu.cn;shanghaitech.edu.cn;shanghaitech.edu.cn;shanghaitech.edu.cn;shanghaitech.edu.cn;shanghaitech.edu.cn", "position": "PhD student;Undergrad student;Undergrad student;PhD student;Associate Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nzhang2024cryogem,\ntitle={Cryo{GEM}: Physics-Informed Generative Cryo-Electron Microscopy},\nauthor={Jiakai Zhang and Qihe Chen and Yan Zeng and Wenyuan Gao and Xuming He and Zhijie Liu and Jingyi Yu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=edOZifvwMi}\n}", "github": "", "reviewers": "D477;w8uj;B6fS", "pdf_size": 15795716, "rating": "5;5;7", "confidence": "4;4;4", "soundness": "3;3;4", "novelty": "3;3;3", "presentation": "4;3;3", "wc_summary": "49;163;64", "wc_strengths": "74;91;150", "wc_weaknesses": "39;146;31", "wc_questions": "59;94;123", "wc_limitations": "2;74;11", "wc_review": "223;568;379", "wc_reply_reviewers": "11;19;22", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 5.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 92.0, 50.57667446560717 ], "wc_strengths_avg": [ 105.0, 32.567877834864625 ], "wc_weaknesses_avg": [ 72.0, 52.42772803266099 ], "wc_questions_avg": [ 92.0, 26.166135875720485 ], "wc_limitations_avg": [ 29.0, 32.03123475609393 ], "wc_review_avg": [ 390.0, 141.06027080648894 ], "wc_reply_reviewers_avg": [ 17.333333333333332, 4.642796092394706 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Iw5gEL-EO6UJ:scholar.google.com/&scioq=CryoGEM:+Physics-Informed+Generative+Cryo-Electron+Microscopy&hl=en&as_sdt=0,47", "gs_version_total": 4, "email": "shanghaitech.edu.cn;shanghaitech.edu.cn;shanghaitech.edu.cn;shanghaitech.edu.cn;shanghaitech.edu.cn;shanghaitech.edu.cn;shanghaitech.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "ShanghaiTech University", "aff_unique_dep": "", "aff_unique_url": "https://www.shanghaitech.edu.cn", "aff_unique_abbr": "ShanghaiTech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Nuclear Norm Regularization for Deep Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94255", "id": "eddHTvb5eM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=eddHTvb5eM", "openreview": "https://openreview.net/forum?id=eddHTvb5eM", "poster": "/media/PosterPDFs/NeurIPS%202024/94255.png?t=1729814414.15079", "project": "", "author_site": "Christopher Scarvelis, Justin Solomon", "tldr": "", "abstract": "Penalizing the nuclear norm of a function's Jacobian encourages it to locally behave like a low-rank linear map. Such functions vary locally along only a handful of directions, making the Jacobian nuclear norm a natural regularizer for machine learning problems. However, this regularizer is intractable for high-dimensional problems, as it requires computing a large Jacobian matrix and taking its SVD. We show how to efficiently penalize the Jacobian nuclear norm using techniques tailor-made for deep learning. We prove that for functions parametrized as compositions $f = g \\circ h$, one may equivalently penalize the average squared Frobenius norm of $Jg$ and $Jh$. We then propose a denoising-style approximation that avoids the Jacobian computations altogether. Our method is simple, efficient, and accurate, enabling Jacobian nuclear norm regularization to scale to high-dimensional deep learning problems. We complement our theory with an empirical study of our regularizer's performance and investigate applications to denoising and representation learning.", "keywords": "nuclear norm;low-rank;optimization;regularization;denoising;representation learning", "primary_area": "optimization_for_deep_networks", "supplementary_material": "/attachment/807eed7adbe74cedc732c1f7715d0e1de86ef164.zip", "author": "Christopher Scarvelis;Justin Solomon", "authorids": "~Christopher_Scarvelis1;~Justin_Solomon1", "gender": "M;M", "homepage": "https://cscarv.github.io/;http://people.csail.mit.edu/jsolomon/", "dblp": ";80/5094", "google_scholar": "D5LCU9wAAAAJ;pImSVwoAAAAJ", "orcid": ";0000-0002-7701-7586", "linkedin": ";justin-solomon-8a587914/", "or_profile": "~Christopher_Scarvelis1;~Justin_Solomon1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nscarvelis2024nuclear,\ntitle={Nuclear Norm Regularization for Deep Learning},\nauthor={Christopher Scarvelis and Justin Solomon},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=eddHTvb5eM}\n}", "github": "", "reviewers": "gXsR;82sN;P8Q4;Gitn", "pdf_size": 14675198, "rating": "4;6;7;8", "confidence": "4;3;4;3", "soundness": "2;3;3;4", "novelty": "4;3;3;4", "presentation": "4;3;2;4", "wc_summary": "71;115;68;168", "wc_strengths": "40;127;62;81", "wc_weaknesses": "1040;171;210;258", "wc_questions": "17;13;58;3", "wc_limitations": "15;8;11;28", "wc_review": "1183;434;409;538", "wc_reply_reviewers": "77;17;0;7", "wc_reply_authors": "45;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.25, 1.479019945774904 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 105.5, 40.59864529759583 ], "wc_strengths_avg": [ 77.5, 32.05074102107469 ], "wc_weaknesses_avg": [ 419.75, 359.4248008972113 ], "wc_questions_avg": [ 22.75, 20.980645843252777 ], "wc_limitations_avg": [ 15.5, 7.632168761236874 ], "wc_review_avg": [ 641.0, 316.6409638691747 ], "wc_reply_reviewers_avg": [ 25.25, 30.48257699079919 ], "wc_reply_authors_avg": [ 11.25, 19.48557158514987 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.50709255283711, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5569077806168227752&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "mit.edu;mit.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Adversarial Environment Design via Regret-Guided Diffusion Models", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94254", "id": "eezCLKwx6T", "proceeding": "", "pdf": "https://openreview.net/pdf?id=eezCLKwx6T", "openreview": "https://openreview.net/forum?id=eezCLKwx6T", "poster": "/media/PosterPDFs/NeurIPS%202024/94254.png?t=1731342245.1638498", "project": "", "author_site": "Hojun Chung, Junseo Lee, Minsoo Kim, Dohyeong Kim, Songhwai Oh", "tldr": "", "abstract": "Training agents that are robust to environmental changes remains a significant challenge in deep reinforcement learning (RL). Unsupervised environment design (UED) has recently emerged to address this issue by generating a set of training environments tailored to the agent's capabilities. While prior works demonstrate that UED has the potential to learn a robust policy, their performance is constrained by the capabilities of the environment generation. To this end, we propose a novel UED algorithm, adversarial environment design via regret-guided diffusion models (ADD). The proposed method guides the diffusion-based environment generator with the regret of the agent to produce environments that the agent finds challenging but conducive to further improvement. By exploiting the representation power of diffusion models, ADD can directly generate adversarial environments while maintaining the diversity of training environments, enabling the agent to effectively learn a robust policy. Our experimental results demonstrate that the proposed method successfully generates an instructive curriculum of environments, outperforming UED baselines in zero-shot generalization across novel, out-of-distribution environments.", "keywords": "deep reinforcement learning;curriculum learning;environment design", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/43cc64a22730f249830484d5ebea8944d2455cdf.zip", "author": "Hojun Chung;Junseo Lee;Minsoo Kim;Dohyeong Kim;Songhwai Oh", "authorids": "~Hojun_Chung1;~Junseo_Lee3;~Minsoo_Kim4;~Dohyeong_Kim1;~Songhwai_Oh1", "gender": "M;M;M;M;", "homepage": "https://rllab.snu.ac.kr/people/hojun-chung/profile;https://rllab.snu.ac.kr/people/junseo-lee/junseo-lee;https://github.com/goldbird5;https://dobro12.github.io/;https://rllab.snu.ac.kr/", "dblp": ";;;126/4248;17/3173", "google_scholar": ";;;;VEzNY_oAAAAJ", "orcid": ";;;0000-0003-0788-6089;0000-0002-9781-2018", "linkedin": ";;;;", "or_profile": "~Hojun_Chung1;~Junseo_Lee3;~Minsoo_Kim4;~Dohyeong_Kim1;~Songhwai_Oh1", "aff": "Seoul National University;Seoul National University;Seoul National University;Seoul National University;Seoul National University", "aff_domain": "snu.ac.kr;snu.ac.kr;snu.ac.kr;snu.ac.kr;snu.ac.kr", "position": "PhD student;PhD student;Undergrad student;PhD student;Full Professor", "bibtex": "@inproceedings{\nchung2024adversarial,\ntitle={Adversarial Environment Design via Regret-Guided Diffusion Models},\nauthor={Hojun Chung and Junseo Lee and Minsoo Kim and Dohyeong Kim and Songhwai Oh},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=eezCLKwx6T}\n}", "github": "", "reviewers": "rCfY;a2dv;1NF7;TVkr", "pdf_size": 5996088, "rating": "6;7;7;8", "confidence": "4;4;4;4", "soundness": "3;3;3;2", "novelty": "3;3;3;4", "presentation": "3;3;3;4", "wc_summary": "130;91;19;17", "wc_strengths": "43;76;28;115", "wc_weaknesses": "122;372;222;496", "wc_questions": "6;39;112;18", "wc_limitations": "5;40;36;2", "wc_review": "306;618;417;648", "wc_reply_reviewers": "0;64;51;282", "wc_reply_authors": "321;34;15;31", "reply_reviewers": "0;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 64.25, 48.26683644076956 ], "wc_strengths_avg": [ 65.5, 33.44024521441193 ], "wc_weaknesses_avg": [ 303.0, 142.59382875847047 ], "wc_questions_avg": [ 43.75, 41.13620667976084 ], "wc_limitations_avg": [ 20.75, 17.340343133859836 ], "wc_review_avg": [ 497.25, 141.70634248332007 ], "wc_reply_reviewers_avg": [ 99.25, 108.1882040704993 ], "wc_reply_authors_avg": [ 100.25, 127.65456317734983 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:DTtqupmEQcYJ:scholar.google.com/&scioq=Adversarial+Environment+Design+via+Regret-Guided+Diffusion+Models&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "snu.ac.kr;snu.ac.kr;snu.ac.kr;snu.ac.kr;snu.ac.kr", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Seoul National University", "aff_unique_dep": "", "aff_unique_url": "https://www.snu.ac.kr", "aff_unique_abbr": "SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "egGFHCFPiU", "title": "On the Roles of LLMs in Planning: Embedding LLMs into Planning Graphs", "track": "main", "status": "Reject", "tldr": "", "abstract": "Plan synthesis aims to generate a course of actions or policies to transit given initial states to goal states, provided domain models that could be designed by experts or learnt from training data or interactions with the world.\nIntrigued by the claims of emergent planning capabilities in large language models (LLMs), works have been proposed to investigate the planning effectiveness of LLMs, without considering any utilization of off-the-shelf planning techniques in LLMs. \nIn this paper, we aim to further study the insight of the planning capability of LLMs by investigating the roles of LLMs in off-the-shelf planning frameworks. To do this, we investigate the effectiveness of embedding LLMs into one of the well-known planning frameworks, graph-based planning, proposing a novel LLMs-based planning framework with LLMs embedded in two levels of planning graphs, i.e., mutual constraints generation level and constraints solving level. We empirically exhibit the effectiveness of our proposed framework in various planning domains.", "keywords": "Planning;LLMs", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Hankz Hankui Zhuo;xin chen;Ziwei Mo;Rong Pan", "authorids": "~Hankz_Hankui_Zhuo2;~xin_chen41;~Ziwei_Mo1;~Rong_Pan4", "gender": "M;M;M;M", "homepage": "http://xplan-lab.org;https://test.com;https://openreview.net/profile/activate?token=95629b93f01ce9b6baffbb6d7f7592e5c6c0052e423257276d4764fab296507a;", "dblp": "12/793;;;94/1243.html", "google_scholar": ";;;OIJ8_gIAAAAJ", "orcid": ";;;0000-0003-2298-3207", "linkedin": ";;;", "or_profile": "~Hankz_Hankui_Zhuo2;~xin_chen41;~Ziwei_Mo1;~Rong_Pan4", "aff": ";SUN YAT-SEN UNIVERSITY;Sun Yat-Sen University;SUN YAT-SEN UNIVERSITY", "aff_domain": ";sysu.edu.cn;mail2.sysu.edu.cn;sysu.edu.cn", "position": ";MS student;MS student;Associate Professor", "bibtex": "@misc{\nanonymous2024on,\ntitle={On the Roles of {LLM}s in Planning: Embedding {LLM}s into Planning Graphs},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=egGFHCFPiU}\n}", "github": "", "project": "", "reviewers": "jjfw;Hoc3;KX5q;HR5T", "site": "https://openreview.net/forum?id=egGFHCFPiU", "pdf_size": 552207, "rating": "5;5;6;6", "confidence": "3;4;4;3", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "2;3;2;3", "wc_summary": "65;69;97;81", "wc_strengths": "60;23;34;119", "wc_weaknesses": "29;539;147;165", "wc_questions": "42;4;397;31", "wc_limitations": "1;4;9;4", "wc_review": "197;639;684;400", "wc_reply_reviewers": "0;199;142;0", "wc_reply_authors": "0;116;66;0", "reply_reviewers": "0;1;1;0", "reply_authors": "1;3;2;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 78.0, 12.449899597988733 ], "wc_strengths_avg": [ 59.0, 37.155080406318596 ], "wc_weaknesses_avg": [ 220.0, 191.4392854144624 ], "wc_questions_avg": [ 118.5, 161.3854082623333 ], "wc_limitations_avg": [ 4.5, 2.8722813232690143 ], "wc_review_avg": [ 480.0, 195.8226238206403 ], "wc_reply_reviewers_avg": [ 85.25, 87.5995861862372 ], "wc_reply_authors_avg": [ 45.5, 48.81342028581894 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2922672897196473521&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0;0", "aff_unique_norm": "Sun Yat-sen University", "aff_unique_dep": "", "aff_unique_url": "http://www.sysu.edu.cn", "aff_unique_abbr": "SYSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "LinNet: Linear Network for Efficient Point Cloud Representation Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94253", "id": "ehfCxpDsrw", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ehfCxpDsrw", "openreview": "https://openreview.net/forum?id=ehfCxpDsrw", "poster": "", "project": "", "author_site": "Hao Deng, Kunlei Jing, Shengmei Chen, Cheng Liu, Jiawei Ru, Bo Jiang, Lin Wang", "tldr": "", "abstract": "Point-based methods have made significant progress, but improving their scalability in large-scale 3D scenes is still a challenging problem. In this paper, we delve into the point-based method and develop a simpler, faster, stronger variant model, dubbed as LinNet. In particular, we first propose the disassembled set abstraction (DSA) module, which is more effective than the previous version of set abstraction. It achieves more efficient local aggregation by leveraging spatial anisotropy and channel anisotropy separately. Additionally, by mapping 3D point clouds onto 1D space-filling curves, we enable parallelization of downsampling and neighborhood queries on GPUs with linear complexity. \nLinNet, as a purely point-based method, outperforms most previous methods in both indoor and outdoor scenes without any extra attention, and sparse convolution but merely relying on a simple MLP. It achieves the mIoU of 73.7\\%, 81.4\\%, and 69.1\\% on the S3DIS Area5, NuScenes, and SemanticKITTI validation benchmarks, respectively, while speeding up almost 10x times over PointNeXt. Our work further reveals both the efficacy and efficiency potential of the vanilla point-based models in large-scale representation learning. Our code will be available upon publication.", "keywords": "Point cloud", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Hao Deng;Kunlei Jing;Shengmei Chen;Cheng Liu;Jiawei Ru;Bo Jiang;Lin Wang", "authorids": "~Hao_Deng2;~Kunlei_Jing1;~Shengmei_Chen1;~Cheng_Liu7;~Jiawei_Ru1;~Bo_Jiang17;~Lin_Wang15", "gender": "M;M;;M;M;;M", "homepage": "https://github.com/DengH293;https://www.researchgate.net/profile/Kunlei-Jing;https://github.com/ShengmeiChen;https://ist.nwu.edu.cn/info/1018/1704.htm;https://github.com/Elbenhorn;;", "dblp": ";237/2403;;;;;", "google_scholar": ";;;;;;E272afIAAAAJ", "orcid": ";;;;;;", "linkedin": ";;;;;;", "or_profile": "~Hao_Deng2;~Kunlei_Jing1;~Shengmei_Chen1;~Cheng_Liu7;~Jiawei_Ru1;~Bo_Jiang17;~Lin_Wang15", "aff": "Northwest University Xi'an;Hong Kong Polytechnic University;Northwest University Xi'an;Northwest University;Northwest University Xi'an;;Northwest University", "aff_domain": "nwu.edu.cn;polyu.edu.hk;nwu.edu.cn;nwu.edu.cn;nwu.edu.cn;;nwu.edu.cn", "position": "MS student;Postdoc;MS student;Researcher;MS student;;Associate Professor", "bibtex": "@inproceedings{\ndeng2024linnet,\ntitle={LinNet: Linear Network for Efficient Point Cloud Representation Learning},\nauthor={Hao Deng and Kunlei Jing and Shengmei Chen and Cheng Liu and Jiawei Ru and Bo Jiang and Lin Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ehfCxpDsrw}\n}", "github": "", "reviewers": "J8xV;aBR6;7sY3;zyd3", "pdf_size": 3628538, "rating": "4;6;6;8", "confidence": "3;3;4;4", "soundness": "3;3;4;4", "novelty": "2;3;3;4", "presentation": "2;3;3;4", "wc_summary": "42;80;120;81", "wc_strengths": "67;36;54;97", "wc_weaknesses": "60;48;57;18", "wc_questions": "43;159;203;38", "wc_limitations": "6;1;10;14", "wc_review": "218;324;444;248", "wc_reply_reviewers": "0;98;0;51", "wc_reply_authors": "330;562;0;47", "reply_reviewers": "0;2;0;1", "reply_authors": "2;3;1;2", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 80.75, 27.580563808595358 ], "wc_strengths_avg": [ 63.5, 22.254213084267885 ], "wc_weaknesses_avg": [ 45.75, 16.618889854620253 ], "wc_questions_avg": [ 110.75, 71.97351943597033 ], "wc_limitations_avg": [ 7.75, 4.815340071064556 ], "wc_review_avg": [ 308.5, 87.2510744919511 ], "wc_reply_reviewers_avg": [ 37.25, 40.78832553562355 ], "wc_reply_authors_avg": [ 234.75, 227.22387088508108 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.7071067811865476, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5308696279009309974&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "email": "nwu.edu.cn;polyu.edu.hk;nwu.edu.cn;nwu.edu.cn;nwu.edu.cn;;nwu.edu.cn", "author_num": 7, "aff_unique_index": "0;1;0;0;0;0", "aff_unique_norm": "Northwest University;Hong Kong Polytechnic University", "aff_unique_dep": ";", "aff_unique_url": "https://www.nwu.edu.cn;https://www.polyu.edu.hk", "aff_unique_abbr": "NWU;PolyU", "aff_campus_unique_index": "0;1;0;0", "aff_campus_unique": "Xi'an;Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Stochastic Optimization Schemes for Performative Prediction with Nonconvex Loss", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94252", "id": "ejIzdt50ek", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ejIzdt50ek", "openreview": "https://openreview.net/forum?id=ejIzdt50ek", "poster": "/media/PosterPDFs/NeurIPS%202024/94252.png?t=1733037752.4624913", "project": "", "author_site": "Qiang LI, Hoi-To Wai", "tldr": "", "abstract": "This paper studies a risk minimization problem with decision dependent data distribution. The problem pertains to the performative prediction setting in which a trained model can affect the outcome estimated by the model. Such dependency creates a feedback loop that influences the stability of optimization algorithms such as stochastic gradient descent (SGD). We present the first study on performative prediction with smooth but possibly non-convex loss. We analyze a greedy deployment scheme with SGD (SGD-GD). Note that in the literature, SGD-GD is often studied with strongly convex loss. We first propose the definition of stationary performative stable (SPS) solutions through relaxing the popular performative stable condition. We then prove that SGD-GD converges to a biased SPS solution in expectation. We consider two conditions of sensitivity on the distribution shifts: (i) the sensitivity is characterized by Wasserstein-1 distance and the loss is Lipschitz w.r.t.~data samples, or (ii) the sensitivity is characterized by total variation (TV) divergence and the loss is bounded. In both conditions, the bias levels are proportional to the stochastic gradient's variance and sensitivity level. \nOur analysis is extended to a lazy deployment scheme where models are deployed once per several SGD updates, and we show that it converges to an SPS solution with reduced bias. Numerical experiments corroborate our theories.", "keywords": "Non-convex optimization;Performative prediction;Stochastic optimization algorithm", "primary_area": "optimization", "supplementary_material": "/attachment/d2082e2944b891354398c62a90f35db885e6eaaa.zip", "author": "Qiang LI;Hoi To Wai", "authorids": "~Qiang_LI7;~Hoi_To_Wai1", "gender": "M;M", "homepage": ";http://www1.se.cuhk.edu.hk/~htwai/", "dblp": ";29/9875", "google_scholar": "NjVNiJ8AAAAJ;https://scholar.google.com.hk/citations?user=5-J7LeMAAAAJ", "orcid": "0009-0006-1024-1344;", "linkedin": ";", "or_profile": "~Qiang_LI7;~Hoi_To_Wai1", "aff": "Chinese University of Hong Kong;The Chinese University of Hong Kong", "aff_domain": "se.cuhk.edu.hk;cuhk.edu.hk", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nli2024stochastic,\ntitle={Stochastic Optimization Schemes for Performative Prediction with Nonconvex Loss},\nauthor={Qiang LI and Hoi To Wai},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ejIzdt50ek}\n}", "github": "", "reviewers": "kiFq;eMX9;71Gw;JTrB", "pdf_size": 1455998, "rating": "6;6;7;7", "confidence": "4;4;4;4", "soundness": "4;3;4;3", "novelty": "3;3;3;3", "presentation": "4;3;3;3", "wc_summary": "22;36;51;134", "wc_strengths": "56;42;162;52", "wc_weaknesses": "82;66;156;25", "wc_questions": "114;1;113;39", "wc_limitations": "3;1;14;1", "wc_review": "277;146;496;251", "wc_reply_reviewers": "77;0;9;24", "wc_reply_authors": "229;0;0;0", "reply_reviewers": "2;0;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 60.75, 43.51651985166093 ], "wc_strengths_avg": [ 78.0, 48.76474136094644 ], "wc_weaknesses_avg": [ 82.25, 47.383409543847726 ], "wc_questions_avg": [ 66.75, 48.64347335460329 ], "wc_limitations_avg": [ 4.75, 5.402545696243577 ], "wc_review_avg": [ 292.5, 127.31555285981364 ], "wc_reply_reviewers_avg": [ 27.5, 29.837057495671385 ], "wc_reply_authors_avg": [ 57.25, 99.15990873331822 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8229523800144639540&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 2, "email": "se.cuhk.edu.hk;cuhk.edu.hk", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Chinese University of Hong Kong", "aff_unique_dep": "", "aff_unique_url": "https://www.cuhk.edu.hk", "aff_unique_abbr": "CUHK", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "RegExplainer: Generating Explanations for Graph Neural Networks in Regression Tasks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94251", "id": "ejWvCpLuwu", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ejWvCpLuwu", "openreview": "https://openreview.net/forum?id=ejWvCpLuwu", "poster": "/media/PosterPDFs/NeurIPS%202024/94251.png?t=1731728787.7075305", "project": "", "author_site": "Jiaxing Zhang, Zhuomin Chen, hao mei, Longchao Da, Dongsheng Luo, Hua Wei", "tldr": "", "abstract": "Graph regression is a fundamental task that has gained significant attention in\nvarious graph learning tasks. However, the inference process is often not easily\ninterpretable. Current explanation techniques are limited to understanding Graph\nNeural Network (GNN) behaviors in classification tasks, leaving an explanation gap\nfor graph regression models. In this work, we propose a novel explanation method\nto interpret the graph regression models (XAIG-R). Our method addresses the\ndistribution shifting problem and continuously ordered decision boundary issues\nthat hinder existing methods away from being applied in regression tasks. We\nintroduce a novel objective based on the graph information bottleneck theory (GIB)\nand a new mix-up framework, which can support various GNNs and explainers\nin a model-agnostic manner. Additionally, we present a self-supervised learning\nstrategy to tackle the continuously ordered labels in regression tasks. We evaluate\nour proposed method on three benchmark datasets and a real-life dataset introduced\nby us, and extensive experiments demonstrate its effectiveness in interpreting GNN\nmodels in regression tasks.", "keywords": "graph neural network;explainability;data augmentation", "primary_area": "interpretability_and_explainability", "supplementary_material": "/attachment/5f2269440e0228c458e49f26d51d33370b4c5361.zip", "author": "Jiaxing Zhang;Zhuomin Chen;hao mei;Longchao Da;Dongsheng Luo;Hua Wei", "authorids": "~Jiaxing_Zhang4;~Zhuomin_Chen1;~hao_mei1;~Longchao_Da1;~Dongsheng_Luo1;~Hua_Wei1", "gender": "M;;M;M;M;M", "homepage": "https://tabzhangjx.github.io/;;https://www.linkedin.com/in/hao-mei-1b24b2187/;https://longchaoda.github.io/;https://users.cs.fiu.edu/~dluo/;https://www.public.asu.edu/~hwei27/", "dblp": "131/6330-2;;;334/1633;;01/6961-1", "google_scholar": "86f1WUAAAAAJ;;UXRku5MAAAAJ;https://scholar.google.com.hk/citations?user=jic73NsAAAAJ;https://scholar.google.com/citations?hl=en;F1CEAKwAAAAJ", "orcid": "0009-0007-8031-661X;;;0009-0000-8631-9634;0000-0003-4192-0826;0000-0002-3735-1635", "linkedin": "jiaxing-zhang-45593b156/;;;longchao-da-b8014624b;;", "or_profile": "~Jiaxing_Zhang4;~Zhuomin_Chen1;~hao_mei1;~Longchao_Da1;~Dongsheng_Luo1;~Hua_Wei1", "aff": "New Jersey Institute of Technology;;Arizona State University;Arizona State University;Florida International University;Arizona State University", "aff_domain": "njit.edu;;asu.edu;asu.edu;fiu.edu;asu.edu", "position": "PhD student;;PhD student;PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhang2024regexplainer,\ntitle={RegExplainer: Generating Explanations for Graph Neural Networks in Regression Tasks},\nauthor={Jiaxing Zhang and Zhuomin Chen and hao mei and Longchao Da and Dongsheng Luo and Hua Wei},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ejWvCpLuwu}\n}", "github": "", "reviewers": "4dqr;B4J7;LTFm;KBtu;X7ek", "pdf_size": 2634609, "rating": "5;5;5;5;6", "confidence": "2;2;2;3;4", "soundness": "3;2;3;2;4", "novelty": "3;2;3;2;3", "presentation": "3;2;3;3;3", "wc_summary": "53;55;31;62;42", "wc_strengths": "20;58;50;37;32", "wc_weaknesses": "367;99;6;59;25", "wc_questions": "7;164;58;30;231", "wc_limitations": "23;123;6;1;28", "wc_review": "470;499;151;189;358", "wc_reply_reviewers": "169;22;0;0;23", "wc_reply_authors": "824;466;88;210;92", "reply_reviewers": "2;1;0;0;1", "reply_authors": "3;3;2;3;2", "rating_avg": [ 5.2, 0.39999999999999997 ], "confidence_avg": [ 2.6, 0.8 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 48.6, 10.892199043352083 ], "wc_strengths_avg": [ 39.4, 13.380582946942184 ], "wc_weaknesses_avg": [ 111.2, 131.76099574608565 ], "wc_questions_avg": [ 98.0, 85.49853799919622 ], "wc_limitations_avg": [ 36.2, 44.55737873798233 ], "wc_review_avg": [ 333.4, 141.99239416250435 ], "wc_reply_reviewers_avg": [ 42.8, 63.898043788522976 ], "wc_reply_authors_avg": [ 336.0, 279.9857139212642 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 2.6, 0.4898979485566356 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.875, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3532473923114620074&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "njit.edu;;asu.edu;asu.edu;fiu.edu;asu.edu", "author_num": 6, "aff_unique_index": "0;1;1;2;1", "aff_unique_norm": "New Jersey Institute of Technology;Arizona State University;Florida International University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.njit.edu;https://www.asu.edu;https://www.fiu.edu", "aff_unique_abbr": "NJIT;ASU;FIU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "AUCSeg: AUC-oriented Pixel-level Long-tail Semantic Segmentation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94250", "id": "ekK26cW5TB", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ekK26cW5TB", "openreview": "https://openreview.net/forum?id=ekK26cW5TB", "poster": "/media/PosterPDFs/NeurIPS%202024/94250.png?t=1733470344.9781482", "project": "", "author_site": "Boyu Han, Qianqian Xu, Zhiyong Yang, Shilong Bao, Peisong Wen, Yangbangyan Jiang, Qingming Huang", "tldr": "", "abstract": "The Area Under the ROC Curve (AUC) is a well-known metric for evaluating instance-level long-tail learning problems. In the past two decades, many AUC optimization methods have been proposed to improve model performance under long-tail distributions. In this paper, we explore AUC optimization methods in the context of pixel-level long-tail semantic segmentation, a much more complicated scenario. This task introduces two major challenges for AUC optimization techniques. On one hand, AUC optimization in a pixel-level task involves complex coupling across loss terms, with structured inner-image and pairwise inter-image dependencies, complicating theoretical analysis. On the other hand, we find that mini-batch estimation of AUC loss in this case requires a larger batch size, resulting in an unaffordable space complexity. To address these issues, we develop a pixel-level AUC loss function and conduct a dependency-graph-based theoretical analysis of the algorithm's generalization ability. Additionally, we design a Tail-Classes Memory Bank (T-Memory Bank) to manage the significant memory demand. Finally, comprehensive experiments across various benchmarks confirm the effectiveness of our proposed AUCSeg method. The code is available at https://github.com/boyuh/AUCSeg.", "keywords": "Semantic Segmentation", "primary_area": "machine_vision", "supplementary_material": "", "author": "Boyu Han;Qianqian Xu;Zhiyong Yang;Shilong Bao;Peisong Wen;Yangbangyan Jiang;Qingming Huang", "authorids": "~Boyu_Han1;~Qianqian_Xu2;~Zhiyong_Yang1;~Shilong_Bao1;~Peisong_Wen1;~Yangbangyan_Jiang1;~Qingming_Huang2", "gender": ";F;M;M;M;F;", "homepage": ";http://vipl.ict.ac.cn/people/~qianqianxu;https://joshuaas.github.io/;https://statusrank.github.io/;https://github.com/KID-7391;https://jiangyangby.github.io/;https://qmhuang-ucas.github.io/", "dblp": ";07/7627;01/452-1.html;143/0246;276/3218;220/7539;68/4388", "google_scholar": ";https://scholar.google.com.hk/citations?user=MjifS2MAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.hk/citations?user=5ZCgkQkAAAAJ;Zk2XLWYAAAAJ;h4Zm5d8AAAAJ;https://scholar.google.com.hk/citations?user=J1vMnRgAAAAJ", "orcid": ";;0000-0002-4409-4999;;;;", "linkedin": ";;;;;;", "or_profile": "~Boyu_Han1;~Qianqian_Xu2;~Zhiyong_Yang1;~Shilong_Bao1;~Peisong_Wen1;~Yangbangyan_Jiang1;~Qingming_Huang2", "aff": ";Institute of Computing Technology, Chinese Academy of Sciences;University of Chinese Academic of Sciences;University of Chinese Academy of Sciences;Chinese Academy of Sciences;University of Chinese Academy of Sciences;University of Chinese Academy of Sciences", "aff_domain": ";ict.ac.cn;ucas.ac.cb;ucas.ac.cn;ict.ac.cn;ucas.ac.cn;ucas.ac.cn", "position": ";Full Professor;Associate Professor;PhD student;PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\nhan2024aucseg,\ntitle={{AUCS}eg: {AUC}-oriented Pixel-level Long-tail Semantic Segmentation},\nauthor={Boyu Han and Qianqian Xu and Zhiyong Yang and Shilong Bao and Peisong Wen and Yangbangyan Jiang and Qingming Huang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ekK26cW5TB}\n}", "github": "", "reviewers": "eudJ;tNYv;WAeT;TAdP", "pdf_size": 22092193, "rating": "5;5;5;8", "confidence": "4;3;3;5", "soundness": "2;2;3;4", "novelty": "2;2;3;4", "presentation": "3;3;4;3", "wc_summary": "72;58;88;79", "wc_strengths": "30;32;45;59", "wc_weaknesses": "309;5;59;113", "wc_questions": "50;177;70;6", "wc_limitations": "27;1;1;1", "wc_review": "488;273;263;258", "wc_reply_reviewers": "23;30;21;12", "wc_reply_authors": "24;156;37;9", "reply_reviewers": "1;1;1;1", "reply_authors": "2;3;2;2", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 74.25, 10.96300597464035 ], "wc_strengths_avg": [ 41.5, 11.629703349613008 ], "wc_weaknesses_avg": [ 121.5, 114.79002569909983 ], "wc_questions_avg": [ 75.75, 62.87437872456475 ], "wc_limitations_avg": [ 7.5, 11.258330249197702 ], "wc_review_avg": [ 320.5, 96.85685313905257 ], "wc_reply_reviewers_avg": [ 21.5, 6.422616289332565 ], "wc_reply_authors_avg": [ 56.5, 58.29451089082059 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.8703882797784892, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=915247707952947223&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": ";ict.ac.cn;ucas.ac.cb;ucas.ac.cn;ict.ac.cn;ucas.ac.cn;ucas.ac.cn", "author_num": 7, "aff_unique_index": "0;1;1;0;1;1", "aff_unique_norm": "Chinese Academy of Sciences;University of Chinese Academy of Sciences", "aff_unique_dep": "Institute of Computing Technology;", "aff_unique_url": "http://www.ict.ac.cn;http://www.ucas.ac.cn", "aff_unique_abbr": "CAS;UCAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Poisson Variational Autoencoder", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94249", "id": "ektPEcqGLb", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ektPEcqGLb", "openreview": "https://openreview.net/forum?id=ektPEcqGLb", "poster": "/media/PosterPDFs/NeurIPS%202024/94249.png?t=1733757896.0187545", "project": "", "author_site": "Hadi Vafaii, Dekel Galor, Jacob Yates", "tldr": "", "abstract": "Variational autoencoders (VAE) employ Bayesian inference to interpret sensory inputs, mirroring processes that occur in primate vision across both ventral (Higgins et al., 2021) and dorsal (Vafaii et al., 2023) pathways. Despite their success, traditional VAEs rely on continuous latent variables, which significantly deviates from the discrete nature of biological neurons. Here, we developed the Poisson VAE (P-VAE), a novel architecture that combines principles of predictive coding with a VAE that encodes inputs into discrete spike counts. Combining Poisson-distributed latent variables with predictive coding introduces a metabolic cost term in the model loss function, suggesting a relationship with sparse coding which we verify empirically. Additionally, we analyze the geometry of learned representations, contrasting the P-VAE to alternative VAE models. We find that the P-VAE encodes its inputs in relatively higher dimensions, facilitating linear separability of categories in a downstream classification task with a much better (5x) sample efficiency. Our work provides an interpretable computational framework to study brain-like sensory processing and paves the way for a deeper understanding of perception as an inferential process.", "keywords": "NeuroAI;Bayesian Inference;Predictive Coding;Sparse Coding;Variational Autoencoder", "primary_area": "generative_models", "supplementary_material": "", "author": "Hadi Vafaii;Dekel Galor;Jacob L. Yates", "authorids": "~Hadi_Vafaii1;~Dekel_Galor1;~Jacob_L._Yates1", "gender": "M;M;Not Specified", "homepage": ";https://www.dekelgalor.com/;https://jake.vision/", "dblp": ";336/4228;91/11540", "google_scholar": "caQ3wQIAAAAJ;As1WihkAAAAJ;UJm-TkYAAAAJ", "orcid": "0000-0002-4153-5373;0000-0001-9243-6925;0000-0001-8322-5982", "linkedin": ";dekelgalor/;", "or_profile": "~Hadi_Vafaii1;~Dekel_Galor1;~Jacob_L._Yates1", "aff": "University of California, Berkeley;University of California, Berkeley;University of California, Berkeley", "aff_domain": "berkeley.edu;berkeley.edu;berkeley.edu", "position": "Postdoc;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nvafaii2024poisson,\ntitle={Poisson Variational Autoencoder},\nauthor={Hadi Vafaii and Dekel Galor and Jacob L. Yates},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ektPEcqGLb}\n}", "github": "", "reviewers": "gEE3;tb4S;w8Jn;2Wt3", "pdf_size": 6091207, "rating": "7;7;7;7", "confidence": "4;3;3;3", "soundness": "4;3;4;4", "novelty": "3;3;3;3", "presentation": "4;3;4;4", "wc_summary": "122;38;67;58", "wc_strengths": "132;58;140;105", "wc_weaknesses": "152;320;210;112", "wc_questions": "121;2;140;33", "wc_limitations": "6;16;10;53", "wc_review": "533;434;567;361", "wc_reply_reviewers": "33;51;34;30", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 71.25, 31.123744954616242 ], "wc_strengths_avg": [ 108.75, 32.041964671349355 ], "wc_weaknesses_avg": [ 198.5, 78.32464490822795 ], "wc_questions_avg": [ 74.0, 57.94393842327254 ], "wc_limitations_avg": [ 21.25, 18.673175948402566 ], "wc_review_avg": [ 473.75, 81.39218328562025 ], "wc_reply_reviewers_avg": [ 37.0, 8.215838362577491 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4267187149938533710&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 7, "email": "berkeley.edu;berkeley.edu;berkeley.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Functional Bilevel Optimization for Machine Learning", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94248", "id": "enlxHLwwFf", "proceeding": "", "pdf": "https://openreview.net/pdf?id=enlxHLwwFf", "openreview": "https://openreview.net/forum?id=enlxHLwwFf", "poster": "/media/PosterPDFs/NeurIPS%202024/94248.png?t=1734029681.8445308", "project": "", "author_site": "Ieva Petrulionyt\u0117, Julien Mairal, Michael Arbel", "tldr": "", "abstract": "In this paper, we introduce a new functional point of view on bilevel optimization problems for machine learning, where the inner objective is minimized over a function space. These types of problems are most often solved by using methods developed in the parametric setting, where the inner objective is strongly convex with respect to the parameters of the prediction function. The functional point of view does not rely on this assumption and notably allows using over-parameterized neural networks as the inner prediction function. We propose scalable and efficient algorithms for the functional bilevel optimization problem and illustrate the benefits of our approach on instrumental regression and reinforcement learning tasks.", "keywords": "bilevel optimization;functional optimization;adjoint method;neural networks", "primary_area": "optimization", "supplementary_material": "", "author": "Ieva Petrulionyt\u0117;Julien Mairal;Michael Arbel", "authorids": "~Ieva_Petrulionyt\u01171;~Julien_Mairal1;~Michael_Arbel1", "gender": ";;M", "homepage": ";http://julien.mairal.org;https://michaelarbel.github.io/", "dblp": ";49/6555;200/8609", "google_scholar": ";https://scholar.google.fr/citations?user=Bx9WGD6lBFEC;NsOqVtkAAAAJ", "orcid": ";;", "linkedin": "ieva-petrulionyte-0247761a3/;;michael-arbel-0a38a655/", "or_profile": "~Ieva_Petrulionyt\u01171;~Julien_Mairal1;~Michael_Arbel1", "aff": "INRIA Rhone-Alpes;Inria;INRIA", "aff_domain": "inrialpes.fr;inria.fr;inria.fr", "position": "PhD student;Research Scientist;Researcher", "bibtex": "@inproceedings{\npetrulionyt{\\.{e}}2024functional,\ntitle={Functional Bilevel Optimization for Machine Learning},\nauthor={Ieva Petrulionyt{\\.{e}} and Julien Mairal and Michael Arbel},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=enlxHLwwFf}\n}", "github": "", "reviewers": "98ub;QfTd;MXrv;p8LC", "pdf_size": 3865923, "rating": "6;7;7;7", "confidence": "3;3;4;3", "soundness": "3;3;3;4", "novelty": "3;3;3;4", "presentation": "3;3;3;3", "wc_summary": "47;44;74;319", "wc_strengths": "24;104;107;85", "wc_weaknesses": "126;231;212;108", "wc_questions": "52;79;3;149", "wc_limitations": "5;41;3;24", "wc_review": "254;499;399;685", "wc_reply_reviewers": "18;51;21;68", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 121.0, 114.9108349982716 ], "wc_strengths_avg": [ 80.0, 33.414068893207244 ], "wc_weaknesses_avg": [ 169.25, 53.063052117268946 ], "wc_questions_avg": [ 70.75, 52.75592383799188 ], "wc_limitations_avg": [ 18.25, 15.481844205391036 ], "wc_review_avg": [ 459.25, 156.76475209689198 ], "wc_reply_reviewers_avg": [ 39.5, 20.910523666326483 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16532863543200051716&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 6, "email": "inrialpes.fr;inria.fr;inria.fr", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "INRIA", "aff_unique_dep": "", "aff_unique_url": "https://www.inria.fr", "aff_unique_abbr": "INRIA", "aff_campus_unique_index": "0", "aff_campus_unique": "Rhone-Alpes;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "France" }, { "title": "Mission Impossible: A Statistical Perspective on Jailbreaking LLMs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94247", "id": "eowkjKVPoH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=eowkjKVPoH", "openreview": "https://openreview.net/forum?id=eowkjKVPoH", "poster": "", "project": "", "author_site": "Jingtong Su, Julia Kempe, Karen Ullrich", "tldr": "", "abstract": "Large language models (LLMs) are trained on a deluge of text data with limited quality control. As a result, LLMs can exhibit unintended or even harmful behaviours, such as leaking information, fake news or hate speech. Countermeasures, commonly referred to as preference alignment, include fine-tuning the pretrained LLMs with carefully crafted text examples of desired behaviour. Even then, empirical evidence shows preference aligned LLMs can be enticed to harmful behaviour. This so called jailbreaking of LLMs is typically achieved by adversarially modifying the input prompt to the LLM. Our paper provides theoretical insights into the phenomenon of preference alignment and jailbreaking from a statistical perspective. Under our framework, we first show that pretrained LLMs will mimic harmful behaviour if present in the training corpus. \\textbf{Under that same framework, we then introduce a statistical notion of alignment, and lower-bound the jailbreaking probability, showing that it is unpreventable under reasonable assumptions.} Based on our insights, we propose an alteration to the currently prevalent alignment strategy RLHF. Specifically, we introduce a simple modification to the RLHF objective, we call \\emph{E-RLHF}, that aims to increase the likelihood of safe responses. \\emph{E-RLHF} brings no additional training cost, and is compatible with other methods. Empirically, we demonstrate that \\emph{E-RLHF} outperforms RLHF on all alignment problems put forward by the AdvBench \\citep{zou2023universal} and HarmBench project \\citep{mazeika2024harmbench} without sacrificing model performance as measured by the MT-Bench project \\citep{zheng2024judging}.", "keywords": "large language models;jailbreak;safety alignment;theory", "primary_area": "learning_theory", "supplementary_material": "", "author": "Jingtong Su;Julia Kempe;Karen Ullrich", "authorids": "~Jingtong_Su1;~Julia_Kempe1;~Karen_Ullrich1", "gender": "M;;F", "homepage": "https://cims.nyu.edu/~js12196/;;https://www.karenullrich.info", "dblp": "275/3776;;155/8025", "google_scholar": "i0OY_LAAAAAJ;;TMIPmNAAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Jingtong_Su1;~Julia_Kempe1;~Karen_Ullrich1", "aff": "New York University;;Meta AI", "aff_domain": "nyu.edu;;fb.com", "position": "PhD student;;Research Scientist", "bibtex": "@inproceedings{\nsu2024mission,\ntitle={Mission Impossible: A Statistical Perspective on Jailbreaking {LLM}s},\nauthor={Jingtong Su and Julia Kempe and Karen Ullrich},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=eowkjKVPoH}\n}", "github": "", "reviewers": "T1QG;UXGw;sD4q;qvRD", "pdf_size": 1004408, "rating": "4;6;7;7", "confidence": "2;2;3;4", "soundness": "3;2;3;3", "novelty": "2;3;4;3", "presentation": "3;3;2;3", "wc_summary": "96;74;111;139", "wc_strengths": "58;66;84;86", "wc_weaknesses": "140;221;26;112", "wc_questions": "17;96;28;6", "wc_limitations": "22;5;1;8", "wc_review": "333;462;250;351", "wc_reply_reviewers": "120;35;12;10", "wc_reply_authors": "140;33;17;17", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 105.0, 23.632604596192948 ], "wc_strengths_avg": [ 73.5, 11.863810517704673 ], "wc_weaknesses_avg": [ 124.75, 69.66123383920213 ], "wc_questions_avg": [ 36.75, 35.08115591026043 ], "wc_limitations_avg": [ 9.0, 7.905694150420948 ], "wc_review_avg": [ 349.0, 75.54799798803407 ], "wc_reply_reviewers_avg": [ 44.25, 44.82396122611209 ], "wc_reply_authors_avg": [ 51.75, 51.368156478503295 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.7385489458759963, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1231440186153208177&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 6, "email": "nyu.edu;;fb.com", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "New York University;Meta", "aff_unique_dep": ";Meta AI", "aff_unique_url": "https://www.nyu.edu;https://meta.com", "aff_unique_abbr": "NYU;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "MKGL: Mastery of a Three-Word Language", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94246", "id": "eqMNwXvOqn", "proceeding": "", "pdf": "https://openreview.net/pdf?id=eqMNwXvOqn", "openreview": "https://openreview.net/forum?id=eqMNwXvOqn", "poster": "/media/PosterPDFs/NeurIPS%202024/94246.png?t=1730877744.9069674", "project": "", "author_site": "Lingbing Guo, Zhongpu Bo, Zhuo Chen, Yichi Zhang, Jiaoyan Chen, Lan Yarong, Mengshu Sun, Zhiqiang Zhang, Yangyifei Luo, Qian Li, Qiang Zhang, Wen Zhang, Huajun Chen", "tldr": "", "abstract": "Large language models (LLMs) have significantly advanced performance across a spectrum of natural language processing (NLP) tasks. Yet, their application to knowledge graphs (KGs), which describe facts in the form of triplets and allow minimal hallucinations, remains an underexplored frontier. In this paper, we investigate the integration of LLMs with KGs by introducing a specialized KG Language (KGL), where a sentence precisely consists of an entity noun, a relation verb, and ends with another entity noun. Despite KGL's unfamiliar vocabulary to the LLM, we facilitate its learning through a tailored dictionary and illustrative sentences, and enhance context understanding via real-time KG context retrieval and KGL token embedding augmentation. Our results reveal that LLMs can achieve fluency in KGL, drastically reducing errors compared to conventional KG embedding methods on KG completion. Furthermore, our enhanced LLM shows exceptional competence in generating accurate three-word sentences from an initial entity and interpreting new unseen terms out of KGs.", "keywords": "Knowledge Graph;Large Language Model;Knowledge Graph Completion;Knowledge Graph Embedding;Low-Rank Adaption", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/4034fc1c8312aaf48b7194cc9d04a5ffe0fdccff.zip", "author": "Lingbing Guo;Zhongpu Bo;Zhuo Chen;Yichi Zhang;Jiaoyan Chen;Lan Yarong;Mengshu Sun;Zhiqiang Zhang;Yangyifei Luo;Qian Li;Qiang Zhang;Wen Zhang;Huajun Chen", "authorids": "~Lingbing_Guo1;~Zhongpu_Bo1;~Zhuo_Chen3;~Yichi_Zhang13;~Jiaoyan_Chen1;~Lan_Yarong1;~Mengshu_Sun2;~Zhiqiang_Zhang4;~Yangyifei_Luo1;~Qian_Li8;~Qiang_Zhang6;~Wen_Zhang4;~Huajun_Chen1", "gender": "M;M;;M;M;F;F;M;M;;;;M", "homepage": "https://guolingbing.github.io/;;;https://zhang-each.github.io/CV/;https://chenjiaoyan.github.io/;https://github.com/victttt;;;https://github.com/lyyf2002;;https://qiangairesearcher.github.io;https://person.zju.edu.cn/en/wenzhang;", "dblp": "228/2586;235/9794.html;;86/7054-9;56/8110-1;;;67/2010-12;;;72/3527-26;43/2368-15;94/5089", "google_scholar": "og4v8cMAAAAJ;https://scholar.google.com/citations?view_op=list_works;;-ys4Y-EAAAAJ;https://scholar.google.ch/citations?user=5Cy4z8wAAAAJ;;https://scholar.google.com.hk/citations?view_op=list_works;TMx0g8kAAAAJ;;;https://scholar.google.com/citations?hl=zh-CN;Ig9ho4kAAAAJ;", "orcid": ";;;0009-0007-4046-1003;0000-0003-4643-6750;;;0000-0002-2321-7259;;;;;", "linkedin": ";;;;;;;;;;;;", "or_profile": "~Lingbing_Guo1;~Zhongpu_Bo1;~Zhuo_Chen3;~Yichi_Zhang13;~Jiaoyan_Chen1;~Lan_Yarong1;~Mengshu_Sun2;~Zhiqiang_Zhang4;~Yangyifei_Luo1;~Qian_Li8;~Qiang_Zhang6;~Wen_Zhang4;~Huajun_Chen1", "aff": "Zhejiang University;Alibaba Group;;Zhejiang University;University of Oxford;Zhejiang University;antgroup;Ant Group;The Insititute of Advanced Computing Technology, Beijing University of Aeronautics and Astronautics;;Zhejiang University;Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;antgroup.com;;zju.edu.cn;cs.ox.ac.uk;zju.edu.cn;antgroup.com;antfin.com;act.buaa.edu.cn;;zju.edu.cn;zju.edu.cn;zju.edu.cn", "position": "PhD student;Researcher;;MS student;Senior Researcher;Undergrad student;Researcher;Researcher;MS student;;Principal Researcher;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nguo2024mkgl,\ntitle={{MKGL}: Mastery of a Three-Word Language},\nauthor={Lingbing Guo and Zhongpu Bo and Zhuo Chen and Yichi Zhang and Jiaoyan Chen and Lan Yarong and Mengshu Sun and Zhiqiang Zhang and Yangyifei Luo and Qian Li and Qiang Zhang and Wen Zhang and Huajun Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=eqMNwXvOqn}\n}", "github": "", "reviewers": "dtBb;NCjt;XXej;4rXf", "pdf_size": 1813956, "rating": "6;7;7;8", "confidence": "2;3;2;4", "soundness": "2;2;3;4", "novelty": "3;3;2;4", "presentation": "2;3;3;4", "wc_summary": "128;128;98;44", "wc_strengths": "21;40;50;82", "wc_weaknesses": "170;187;121;14", "wc_questions": "65;120;57;1", "wc_limitations": "13;63;3;9", "wc_review": "397;538;329;150", "wc_reply_reviewers": "31;113;79;0", "wc_reply_authors": "28;163;152;0", "reply_reviewers": "1;1;2;0", "reply_authors": "2;2;3;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 99.5, 34.30378987808781 ], "wc_strengths_avg": [ 48.25, 22.094965489902897 ], "wc_weaknesses_avg": [ 123.0, 67.4351540370451 ], "wc_questions_avg": [ 60.75, 42.16856056352884 ], "wc_limitations_avg": [ 22.0, 23.93741840717165 ], "wc_review_avg": [ 353.5, 139.59315885816181 ], "wc_reply_reviewers_avg": [ 55.75, 43.41298768801797 ], "wc_reply_authors_avg": [ 85.75, 72.53404373120252 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 13, 0 ], "corr_rating_confidence": 0.8528028654224418, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10284091059340573457&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "zju.edu.cn;antgroup.com;;zju.edu.cn;cs.ox.ac.uk;zju.edu.cn;antgroup.com;antfin.com;act.buaa.edu.cn;;zju.edu.cn;zju.edu.cn;zju.edu.cn", "author_num": 13, "aff_unique_index": "0;1;0;2;0;3;3;4;0;0;0", "aff_unique_norm": "Zhejiang University;Alibaba Group;University of Oxford;Ant Group;Beijing University of Aeronautics and Astronautics", "aff_unique_dep": ";;;;The Insititute of Advanced Computing Technology", "aff_unique_url": "https://www.zju.edu.cn;https://www.alibaba.com;https://www.ox.ac.uk;https://www.antgroup.com;http://www.buaa.edu.cn", "aff_unique_abbr": "ZJU;Alibaba;Oxford;Ant Group;BUAA", "aff_campus_unique_index": "1", "aff_campus_unique": ";Beijing", "aff_country_unique_index": "0;0;0;1;0;0;0;0;0;0;0", "aff_country_unique": "China;United Kingdom" }, { "title": "Frozen-DETR: Enhancing DETR with Image Understanding from Frozen Foundation Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94245", "id": "erQDc72vyi", "proceeding": "", "pdf": "https://openreview.net/pdf?id=erQDc72vyi", "openreview": "https://openreview.net/forum?id=erQDc72vyi", "poster": "/media/PosterPDFs/NeurIPS%202024/94245.png?t=1731377285.7275624", "project": "", "author_site": "Shenghao Fu, Junkai Yan, Qize Yang, Xihan Wei, Xiaohua Xie, Wei-Shi Zheng", "tldr": "", "abstract": "Recent vision foundation models can extract universal representations and show impressive abilities in various tasks. However, their application on object detection is largely overlooked, especially without fine-tuning them. In this work, we show that frozen foundation models can be a versatile feature enhancer, even though they are not pre-trained for object detection. Specifically, we explore directly transferring the high-level image understanding of foundation models to detectors in the following two ways. First, the class token in foundation models provides an in-depth understanding of the complex scene, which facilitates decoding object queries in the detector's decoder by providing a compact context. Additionally, the patch tokens in foundation models can enrich the features in the detector's encoder by providing semantic details. Utilizing frozen foundation models as plug-and-play modules rather than the commonly used backbone can significantly enhance the detector's performance while preventing the problems caused by the architecture discrepancy between the detector's backbone and the foundation model. With such a novel paradigm, we boost the SOTA query-based detector DINO from 49.0% AP to 51.9% AP (+2.9% AP) and further to 53.8% AP (+4.8% AP) by integrating one or two foundation models respectively, on the COCO validation set after training for 12 epochs with R50 as the detector's backbone. Code will be available.", "keywords": "Object Detection;DETR;Foundation Model", "primary_area": "machine_vision", "supplementary_material": "", "author": "Shenghao Fu;Junkai Yan;Qize Yang;Xihan Wei;Xiaohua Xie;Wei-Shi Zheng", "authorids": "~Shenghao_Fu1;~Junkai_Yan1;~Qize_Yang1;~Xihan_Wei1;~Xiaohua_Xie1;~Wei-Shi_Zheng3", "gender": "M;M;;F;M;M", "homepage": "https://github.com/fushh;https://yanjk3.github.io/;;https://www.zhihu.com/people/HannahW;https://cse.sysu.edu.cn/content/2478;http://www.isee-ai.cn/~zhwshi", "dblp": "355/0567;;246/5729;;22/5763;30/8399", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.hk/citations?user=QMm29SwAAAAJ;8ptUJjwAAAAJ;;5YZ3kvoAAAAJ;AwqDDGoAAAAJ", "orcid": ";0009-0009-6531-0070;;;0000-0002-0310-4679;", "linkedin": ";;;;;", "or_profile": "~Shenghao_Fu1;~Junkai_Yan1;~Qize_Yang1;~Xihan_Wei1;~Xiaohua_Xie1;~Wei-Shi_Zheng3", "aff": "SUN YAT-SEN UNIVERSITY;SUN YAT-SEN UNIVERSITY;Alibaba Group;Alibaba Group;SUN YAT-SEN UNIVERSITY;SUN YAT-SEN UNIVERSITY", "aff_domain": "sysu.edu.cn;sysu.edu.cn;alibaba-inc.com;alibaba-inc.com;sysu.edu.cn;sysu.edu.cn", "position": "PhD student;MS student;Researcher;Senior Algorithm Expert;Full Professor;Full Professor", "bibtex": "@inproceedings{\nfu2024frozendetr,\ntitle={Frozen-{DETR}: Enhancing {DETR} with Image Understanding from Frozen Foundation Models},\nauthor={Shenghao Fu and Junkai Yan and Qize Yang and Xihan Wei and Xiaohua Xie and Wei-Shi Zheng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=erQDc72vyi}\n}", "github": "", "reviewers": "V83J;C9T7;QbuY;RksL", "pdf_size": 8627730, "rating": "5;6;6;6", "confidence": "4;5;5;5", "soundness": "3;3;3;3", "novelty": "3;3;2;3", "presentation": "3;2;3;3", "wc_summary": "107;59;48;104", "wc_strengths": "38;43;49;78", "wc_weaknesses": "374;199;218;114", "wc_questions": "4;88;44;54", "wc_limitations": "57;56;6;1", "wc_review": "580;445;365;351", "wc_reply_reviewers": "26;39;738;25", "wc_reply_authors": "0;0;1729;0", "reply_reviewers": "1;1;4;1", "reply_authors": "1;1;5;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 79.5, 26.31064423384574 ], "wc_strengths_avg": [ 52.0, 15.508062419270823 ], "wc_weaknesses_avg": [ 226.25, 93.86259904775703 ], "wc_questions_avg": [ 47.5, 29.94578434437809 ], "wc_limitations_avg": [ 30.0, 26.56124997058685 ], "wc_review_avg": [ 435.25, 90.94057125397883 ], "wc_reply_reviewers_avg": [ 207.0, 306.6227323601432 ], "wc_reply_authors_avg": [ 432.25, 748.6789615716472 ], "reply_reviewers_avg": [ 1.75, 1.299038105676658 ], "reply_authors_avg": [ 2.0, 1.7320508075688772 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9020119726633865616&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "sysu.edu.cn;sysu.edu.cn;alibaba-inc.com;alibaba-inc.com;sysu.edu.cn;sysu.edu.cn", "author_num": 6, "aff_unique_index": "0;0;1;1;0;0", "aff_unique_norm": "Sun Yat-sen University;Alibaba Group", "aff_unique_dep": ";", "aff_unique_url": "http://www.sysu.edu.cn;https://www.alibaba.com", "aff_unique_abbr": "SYSU;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Discovering Preference Optimization Algorithms with and for Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94244", "id": "erjQDJ0z9L", "proceeding": "", "pdf": "https://openreview.net/pdf?id=erjQDJ0z9L", "openreview": "https://openreview.net/forum?id=erjQDJ0z9L", "poster": "/media/PosterPDFs/NeurIPS%202024/94244.png?t=1731926922.6291456", "project": "", "author_site": "Chris Lu, Samuel Holt, Claudio Fanconi, Alex Chan, Jakob Foerster, Mihaela van der Schaar, Robert Lange", "tldr": "", "abstract": "Offline preference optimization is a key method for enhancing and controlling the quality of Large Language Model (LLM) outputs.\nTypically, preference optimization is approached as an offline supervised learning task using manually crafted convex loss functions. While these methods are based on theoretical insights, they are inherently constrained by human creativity, so the large search space of possible loss functions remains under-explored. We address this by performing LLM-driven *objective discovery* to automatically discover new state-of-the-art preference optimization algorithms without (expert) human intervention. Specifically, we iteratively prompt an LLM to propose and implement new preference optimization loss functions based on previously evaluated performance metrics. This process leads to the discovery of previously unknown and performant preference optimization algorithms. The best performing of these we call *Discovered Preference Optimization* (DiscoPOP), a novel algorithm that adaptively blends logistic and exponential losses. Experiments demonstrate the state-of-the-art performance of DiscoPOP and its successful transfer to held-out tasks.", "keywords": "Preference optimization;RLHF;Large Language Models", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Chris Lu;Samuel Holt;Claudio Fanconi;Alex James Chan;Jakob Nicolaus Foerster;Mihaela van der Schaar;Robert Tjarko Lange", "authorids": "~Chris_Lu1;~Samuel_Holt1;~Claudio_Fanconi1;~Alex_James_Chan1;~Jakob_Nicolaus_Foerster1;~Mihaela_van_der_Schaar2;~Robert_Tjarko_Lange1", "gender": ";;M;M;M;F;", "homepage": ";https://samholt.github.io/;;https://alexjchan.com;https://www.jakobfoerster.com;https://www.vanderschaar-lab.com;https://roberttlange.github.io/", "dblp": "77/9579;322/3656;;268/6948;176/5095;;245/9152", "google_scholar": "4WLoIRsAAAAJ;Ey5aInIAAAAJ;Uko0QY4AAAAJ;yfy_BGIAAAAJ;6z4lQzMAAAAJ;DZ3S--MAAAAJ;https://scholar.google.es/citations?user=cTrc3x4AAAAJ", "orcid": ";;;;;;", "linkedin": ";;;alex-chan-040081131/;;;", "or_profile": "~Chris_Lu1;~Samuel_Holt1;~Claudio_Fanconi1;~Alex_James_Chan1;~Jakob_Nicolaus_Foerster1;~Mihaela_van_der_Schaar2;~Robert_Tjarko_Lange1", "aff": "University of Oxford;Google DeepMind;University of Cambridge;Spotify;University of Oxford, University of Oxford;University of California, Los Angeles;TU Berlin", "aff_domain": "ox.ac.uk;google.com;cam.ac.uk;spotify.com;eng.ox.ac.uk;ucla.edu;tu-berlin.de", "position": "PhD student;Intern;PhD student;Researcher;Associate Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\nlu2024discovering,\ntitle={Discovering Preference Optimization Algorithms with and for Large Language Models},\nauthor={Chris Lu and Samuel Holt and Claudio Fanconi and Alex James Chan and Jakob Nicolaus Foerster and Mihaela van der Schaar and Robert Tjarko Lange},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=erjQDJ0z9L}\n}", "github": "", "reviewers": "gRRA;663E;MbcT;ePhP", "pdf_size": 2139265, "rating": "6;6;7;7", "confidence": "3;3;3;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "69;41;50;79", "wc_strengths": "62;46;67;56", "wc_weaknesses": "298;17;120;103", "wc_questions": "11;4;3;78", "wc_limitations": "23;4;3;7", "wc_review": "463;112;243;323", "wc_reply_reviewers": "85;14;0;53", "wc_reply_authors": "72;0;0;54", "reply_reviewers": "1;1;0;2", "reply_authors": "2;1;1;3", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 59.75, 15.022899187573616 ], "wc_strengths_avg": [ 57.75, 7.8222439235810075 ], "wc_weaknesses_avg": [ 134.5, 102.15307141735876 ], "wc_questions_avg": [ 24.0, 31.32890039564108 ], "wc_limitations_avg": [ 9.25, 8.073877630977572 ], "wc_review_avg": [ 285.25, 127.29959740706174 ], "wc_reply_reviewers_avg": [ 38.0, 33.36914742692717 ], "wc_reply_authors_avg": [ 31.5, 32.13642792844283 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12314960506809251884&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "ox.ac.uk;google.com;cam.ac.uk;spotify.com;eng.ox.ac.uk;ucla.edu;tu-berlin.de", "author_num": 7, "aff_unique_index": "0;1;2;3;0;4;5", "aff_unique_norm": "University of Oxford;Google;University of Cambridge;Spotify;University of California, Los Angeles;Technische Universit\u00e4t Berlin", "aff_unique_dep": ";Google DeepMind;;;;", "aff_unique_url": "https://www.ox.ac.uk;https://deepmind.com;https://www.cam.ac.uk;https://www.spotify.com;https://www.ucla.edu;https://www.tu-berlin.de", "aff_unique_abbr": "Oxford;DeepMind;Cambridge;Spotify;UCLA;TU Berlin", "aff_campus_unique_index": "1;2;3", "aff_campus_unique": ";Cambridge;Los Angeles;Berlin", "aff_country_unique_index": "0;0;0;1;0;2;3", "aff_country_unique": "United Kingdom;Sweden;United States;Germany" }, { "title": "Mixture of Experts Meets Prompt-Based Continual Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94243", "id": "erwatqQ4p8", "proceeding": "", "pdf": "https://openreview.net/pdf?id=erwatqQ4p8", "openreview": "https://openreview.net/forum?id=erwatqQ4p8", "poster": "/media/PosterPDFs/NeurIPS%202024/94243.png?t=1733245318.154181", "project": "", "author_site": "Minh Le, An Nguyen The, Huy Nguyen, Trang Nguyen, Trang Pham, Linh Ngo, Nhat Ho", "tldr": "", "abstract": "Exploiting the power of pre-trained models, prompt-based approaches stand out compared to other continual learning solutions in effectively preventing catastrophic forgetting, even with very few learnable parameters and without the need for a memory buffer. While existing prompt-based continual learning methods excel in leveraging prompts for state-of-the-art performance, they often lack a theoretical explanation for the effectiveness of prompting. This paper conducts a theoretical analysis to unravel how prompts bestow such advantages in continual learning, thus offering a new perspective on prompt design. We first show that the attention block of pre-trained models like Vision Transformers inherently encodes a special mixture of experts architecture, characterized by linear experts and quadratic gating score functions. This realization drives us to provide a novel view on prefix tuning, reframing it as the addition of new task-specific experts, thereby inspiring the design of a novel gating mechanism termed Non-linear Residual Gates (NoRGa). Through the incorporation of non-linear activation and residual connection, NoRGa enhances continual learning performance while preserving parameter efficiency. The effectiveness of NoRGa is substantiated both theoretically and empirically across diverse benchmarks and pretraining paradigms. Our code is publicly available at https://github.com/Minhchuyentoancbn/MoE_PromptCL.", "keywords": "Continual Learning;Pre-training;Attention;Prompt Tuning;Mixture of Experts", "primary_area": "online_learning", "supplementary_material": "/attachment/1325e9e620248cb5534e9e2572ae88233b0184b1.zip", "author": "Minh Le;An Nguyen The;Huy Nguyen;Thien Trang Nguyen Vu;Huyen Trang Pham;Linh Ngo Van;Nhat Ho", "authorids": "~Minh_Le2;~An_Nguyen_The1;~Huy_Nguyen5;~Thien_Trang_Nguyen_Vu1;~Huyen_Trang_Pham2;~Linh_Ngo_Van1;~Nhat_Ho1", "gender": "M;M;F;F;;M;M", "homepage": "https://minhchuyentoancbn.github.io/;https://huynm99.github.io/;https://nguyenvuthientrang.github.io/;https://phamvuhuyentrang.github.io/;https://users.soict.hust.edu.vn/linhnv/;https://nhatptnk8912.github.io/;https://sg-nta.github.io/", "dblp": ";48/6075;;;125/3578;203/4479;", "google_scholar": "GOE6jNAAAAAJ;_YYwzhQAAAAJ;TrSX1n8AAAAJ;https://scholar.google.com/citations?view_op=list_works;https://scholar.google.com.vn/citations?user=tZ78MoQAAAAJ;https://scholar.google.ca/citations?user=Xs7cKMwAAAAJ;h0qziUcAAAAJ", "orcid": ";;;;;;", "linkedin": "itgaming/;huy-nguyen-081199/;thientrangnguyenvu/;huyen-trang-04a111251/;;nhat-pham-minh-ho-267b8164/;an-nguyen-the-130515170/", "or_profile": "~Minh_Le2;~Huy_Nguyen5;~Thien_Trang_Nguyen_Vu1;~Huyen_Trang_Pham2;~Linh_Ngo_Van1;~Nhat_Ho1;~An_Nguyen_The2", "aff": "Hanoi University of Science and Technology;Microsoft AI;VinAI Research;Hanoi University of Science and Technology;Hanoi University of Science and Technology;University of Texas, Austin;Hanoi University of Science and Technology", "aff_domain": "hust.edu.vn;microsoft.com;vinai.io;hust.edu.vn;hust.edu.vn;utexas.edu;hust.edu.vn", "position": "Undergrad student;Intern;Researcher;Undergrad student;Assistant Professor;Assistant Professor;Undergrad student", "bibtex": "@inproceedings{\nle2024mixture,\ntitle={Mixture of Experts Meets Prompt-Based Continual Learning},\nauthor={Minh Le and An Nguyen The and Huy Nguyen and Thien Trang Nguyen Vu and Huyen Trang Pham and Linh Ngo Van and Nhat Ho},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=erwatqQ4p8}\n}", "github": "", "reviewers": "dPHR;boEM;MKaE;cCSa", "pdf_size": 912627, "rating": "5;6;6;6", "confidence": "5;2;4;5", "soundness": "3;3;3;2", "novelty": "3;3;3;3", "presentation": "3;2;3;3", "wc_summary": "71;51;70;60", "wc_strengths": "86;40;83;31", "wc_weaknesses": "697;60;64;203", "wc_questions": "132;7;89;9", "wc_limitations": "10;14;38;6", "wc_review": "996;172;344;309", "wc_reply_reviewers": "312;18;105;21", "wc_reply_authors": "463;29;186;29", "reply_reviewers": "2;1;1;1", "reply_authors": "4;2;2;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 1.224744871391589 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 63.0, 8.154753215150045 ], "wc_strengths_avg": [ 60.0, 24.728526037756478 ], "wc_weaknesses_avg": [ 256.0, 261.04118449011065 ], "wc_questions_avg": [ 59.25, 53.46201922860752 ], "wc_limitations_avg": [ 17.0, 12.449899597988733 ], "wc_review_avg": [ 455.25, 318.75019607837106 ], "wc_reply_reviewers_avg": [ 114.0, 119.5303308788192 ], "wc_reply_authors_avg": [ 176.75, 177.26022537501186 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.4714045207910316, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3456624676935485221&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "hust.edu.vn;microsoft.com;vinai.io;hust.edu.vn;hust.edu.vn;utexas.edu;hust.edu.vn", "author_num": 7, "aff_unique_index": "0;1;2;0;0;3;0", "aff_unique_norm": "Hanoi University of Science and Technology;Microsoft;VinAI Research;University of Texas at Austin", "aff_unique_dep": ";Microsoft AI;;", "aff_unique_url": "https://www.hust.edu.vn;https://www.microsoft.com;https://www.vinai.io/;https://www.utexas.edu", "aff_unique_abbr": "HUST;Microsoft;VinAI;UT Austin", "aff_campus_unique_index": "0;0;0;2;0", "aff_campus_unique": "Hanoi;;Austin", "aff_country_unique_index": "0;1;0;0;0;1;0", "aff_country_unique": "Vietnam;United States" }, { "title": "A Simple Image Segmentation Framework via In-Context Examples", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94242", "id": "esDvZi2Cf3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=esDvZi2Cf3", "openreview": "https://openreview.net/forum?id=esDvZi2Cf3", "poster": "/media/PosterPDFs/NeurIPS%202024/94242.png?t=1730799670.7310658", "project": "", "author_site": "Yang Liu, Chenchen Jing, Hengtao Li, Muzhi Zhu, Hao Chen, Xinlong Wang, Chunhua Shen", "tldr": "", "abstract": "Recently, there have been explorations of generalist segmentation models that can effectively tackle a variety of image segmentation tasks within a unified in-context learning framework. However, these methods still struggle with task ambiguity in in-context segmentation, as not all in-context examples can accurately convey the task information. In order to address this issue, we present SINE, a simple image $\\textbf{S}$egmentation framework utilizing $\\textbf{in}$-context $\\textbf{e}$xamples. Our approach leverages a Transformer encoder-decoder structure, where the encoder provides high-quality image representations, and the decoder is designed to yield multiple task-specific output masks to eliminate task ambiguity effectively. Specifically, we introduce an In-context Interaction module to complement in-context information and produce correlations between the target image and the in-context example and a Matching Transformer that uses fixed matching and a Hungarian algorithm to eliminate differences between different tasks. In addition, we have further perfected the current evaluation system for in-context image segmentation, aiming to facilitate a holistic appraisal of these models. Experiments on various segmentation tasks show the effectiveness of the proposed method.", "keywords": "Image Segmentation; In-Context Learning; Task Ambiguity", "primary_area": "machine_vision", "supplementary_material": "", "author": "Yang Liu;Chenchen Jing;Hengtao Li;Muzhi Zhu;Hao Chen;Xinlong Wang;Chunhua Shen", "authorids": "~Yang_Liu83;~Chenchen_Jing2;~Hengtao_Li1;~Muzhi_Zhu1;~Hao_Chen17;~Xinlong_Wang2;~Chunhua_Shen2", "gender": "M;M;M;M;;M;", "homepage": "https://github.com/yangliu96;https://jingchenchen.github.io/;https://github.com/lihengtao;https://z-mu-z.github.io/;;;", "dblp": "51/3710-98.html;219/1679.html;;157/1679;;;", "google_scholar": "9JcQ2hwAAAAJ;0TKm3fgAAAAJ;;https://scholar.google.com.hk/citations?user=064gBH4AAAAJ;;DPz0DjYAAAAJ;", "orcid": "0009-0003-8540-9154;;;;;;", "linkedin": ";;;;;;", "or_profile": "~Yang_Liu83;~Chenchen_Jing2;~Hengtao_Li1;~Muzhi_Zhu1;~Hao_Chen17;~Xinlong_Wang2;~Chunhua_Shen2", "aff": "Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University;;Beijing Academy of Artificial Intelligence;", "aff_domain": "zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;;baai.ac.cn;", "position": "PhD student;Postdoc;MS student;PhD student;;Researcher;", "bibtex": "@inproceedings{\nliu2024a,\ntitle={A Simple Image Segmentation Framework via In-Context Examples},\nauthor={Yang Liu and Chenchen Jing and Hengtao Li and Muzhi Zhu and Hao Chen and Xinlong Wang and Chunhua Shen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=esDvZi2Cf3}\n}", "github": "", "reviewers": "2YmX;rhP3;sZmi;nRMP", "pdf_size": 17132253, "rating": "3;5;5;7", "confidence": "5;3;4;4", "soundness": "2;2;2;3", "novelty": "2;2;3;3", "presentation": "2;3;2;3", "wc_summary": "63;70;182;75", "wc_strengths": "26;33;65;87", "wc_weaknesses": "321;99;266;113", "wc_questions": "252;1;47;42", "wc_limitations": "28;1;1;2", "wc_review": "690;204;561;319", "wc_reply_reviewers": "1434;192;27;55", "wc_reply_authors": "2872;737;16;16", "reply_reviewers": "4;2;1;1", "reply_authors": "7;3;2;2", "rating_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 97.5, 48.971930735881756 ], "wc_strengths_avg": [ 52.75, 24.641174890820444 ], "wc_weaknesses_avg": [ 199.75, 95.87328877221225 ], "wc_questions_avg": [ 85.5, 97.77141709109058 ], "wc_limitations_avg": [ 8.0, 11.554220008291344 ], "wc_review_avg": [ 443.5, 191.98242107026363 ], "wc_reply_reviewers_avg": [ 427.0, 584.7345551615707 ], "wc_reply_authors_avg": [ 910.25, 1170.239799143748 ], "reply_reviewers_avg": [ 2.0, 1.224744871391589 ], "reply_authors_avg": [ 3.5, 2.0615528128088303 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17219581958849249454&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;;baai.ac.cn;", "author_num": 7, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Zhejiang University;Beijing Academy of Artificial Intelligence", "aff_unique_dep": ";", "aff_unique_url": "https://www.zju.edu.cn;https://www.baaic.cn", "aff_unique_abbr": "ZJU;BAAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Overcoming Brittleness in Pareto-Optimal Learning Augmented Algorithms", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94241", "id": "esTPCUJZhe", "proceeding": "", "pdf": "https://openreview.net/pdf?id=esTPCUJZhe", "openreview": "https://openreview.net/forum?id=esTPCUJZhe", "poster": "/media/PosterPDFs/NeurIPS%202024/94241.png?t=1731574085.1014202", "project": "", "author_site": "Alex Elenter, Spyros Angelopoulos, Christoph D\u00fcrr, Yanni LEFKI", "tldr": "", "abstract": "The study of online algorithms with machine-learned predictions has gained considerable prominence in recent years. One of the common objectives in the design and analysis of such algorithms is to attain (Pareto) optimal tradeoffs between the {\\em consistency} of the algorithm, i.e., its performance assuming perfect predictions, and its {\\em robustness}, i.e., the performance of the algorithm under adversarial predictions. In this work, we demonstrate that this optimization criterion can be extremely brittle, in that the performance of Pareto-optimal algorithms may degrade dramatically even in the presence of imperceptive prediction error. To remedy this drawback, we propose a new framework in which the smoothness in the performance of the algorithm is enforced by means of a {\\em user-specified profile}. This allows us to regulate the performance of the algorithm as a function of the prediction error, while simultaneously\nmaintaining the analytical notion of consistency/robustness tradeoffs, adapted to the profile setting. We apply this new approach to a well-studied online problem, namely the {\\em one-way trading} problem. For this problem, we further address another limitation of the state-of-the-art Pareto-optimal algorithms, namely the fact that they are tailored to worst-case, and extremely pessimistic inputs. We propose a new Pareto-optimal algorithm that leverages any deviation from the worst-case input to its benefit, and introduce a new metric that allows us to compare any two Pareto-optimal algorithms via a {\\em dominance} relation.", "keywords": "Learning-augmented algorithms;online algorithms;competitive analysis;one-way trading;Pareto-optimality", "primary_area": "optimization", "supplementary_material": "/attachment/5b881454f92cfbfa328fa10ba7feeebc3e668f9e.zip", "author": "Alex Elenter;Spyros Angelopoulos;Christoph D\u00fcrr;Yanni LEFKI", "authorids": "~Alex_Elenter1;~Spyros_Angelopoulos1;~Christoph_D\u00fcrr1;~Yanni_LEFKI1", "gender": "M;;M;M", "homepage": ";http://lip6.fr/Spyros.Angelopoulos;http://www.lip6.fr/Christoph.Durr;", "dblp": ";00/4199;d/ChristophDurr;", "google_scholar": ";uH4c-KcAAAAJ;https://scholar.google.com/citations?hl=fr;", "orcid": ";0000-0001-9819-9158;0000-0001-8103-5333;", "linkedin": "alex-elenter-4ab43a192/;;;yanni-lefki-495a12212?original_referer=https%3A%2F%2Fwww%2Egoogle%2Ecom%2F&originalSubdomain=fr", "or_profile": "~Alex_Elenter1;~Spyros_Angelopoulos1;~Christoph_D\u00fcrr1;~Yanni_LEFKI1", "aff": "T\u00e9l\u00e9com ParisTech;CNRS;Sorbonne Universit\u00e9 - Facult\u00e9 des Sciences (Paris VI);\u00c9cole Polytechnique", "aff_domain": "telecom-paristech.fr;cnrs.fr;sorbonne-universite.fr;polytechnique.edu", "position": "MS student;Researcher;Researcher;MS student", "bibtex": "@inproceedings{\nelenter2024overcoming,\ntitle={Overcoming Brittleness in Pareto-Optimal Learning Augmented Algorithms},\nauthor={Alex Elenter and Spyros Angelopoulos and Christoph D{\\\"u}rr and Yanni LEFKI},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=esTPCUJZhe}\n}", "github": "", "reviewers": "czee;9nZL;jsKn;f8Kk", "pdf_size": 511234, "rating": "6;6;6;8", "confidence": "4;3;4;3", "soundness": "3;3;3;4", "novelty": "3;3;3;3", "presentation": "3;3;2;3", "wc_summary": "210;405;153;246", "wc_strengths": "80;155;66;101", "wc_weaknesses": "177;319;62;45", "wc_questions": "154;1;4;77", "wc_limitations": "1;9;8;2", "wc_review": "622;889;293;471", "wc_reply_reviewers": "83;22;12;14", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 253.5, 93.5427709660132 ], "wc_strengths_avg": [ 100.5, 33.84154251803543 ], "wc_weaknesses_avg": [ 150.75, 109.60924915352719 ], "wc_questions_avg": [ 59.0, 62.72559286288173 ], "wc_limitations_avg": [ 5.0, 3.5355339059327378 ], "wc_review_avg": [ 568.75, 218.5112983348916 ], "wc_reply_reviewers_avg": [ 32.75, 29.252136674096132 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5721177826288934264&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": "telecom-paristech.fr;cnrs.fr;sorbonne-universite.fr;polytechnique.edu", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "T\u00e9l\u00e9com ParisTech;Centre National de la Recherche Scientifique;Sorbonne Universit\u00e9;Ecole Polytechnique", "aff_unique_dep": ";;Facult\u00e9 des Sciences;", "aff_unique_url": "https://www.telecom-paristech.fr;https://www.cnrs.fr;https://www.sorbonne-universite.fr;https://www.polytechnique.edu", "aff_unique_abbr": "TP;CNRS;Sorbonne U;X", "aff_campus_unique_index": "1", "aff_campus_unique": ";Paris VI", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "France" }, { "title": "Neur2BiLO: Neural Bilevel Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94240", "id": "esVleaqkRc", "proceeding": "", "pdf": "https://openreview.net/pdf?id=esVleaqkRc", "openreview": "https://openreview.net/forum?id=esVleaqkRc", "poster": "/media/PosterPDFs/NeurIPS%202024/94240.png?t=1733342270.1392128", "project": "", "author_site": "Justin Dumouchelle, Esther Julien, Jannis Kurtz, Elias Khalil", "tldr": "", "abstract": "Bilevel optimization deals with nested problems in which *leader* takes the first decision to minimize their objective function while accounting for a *follower*'s best-response reaction. Constrained bilevel problems with integer variables are particularly notorious for their hardness. While exact solvers have been proposed for mixed-integer *linear* bilevel optimization, they tend to scale poorly with problem size and are hard to generalize to the non-linear case. On the other hand, problem-specific algorithms (exact and heuristic) are limited in scope. Under a data-driven setting in which similar instances of a bilevel problem are solved routinely, our proposed framework, Neur2BiLO, embeds a neural network approximation of the leader's or follower's value function, trained via supervised regression, into an easy-to-solve mixed-integer program. Neur2BiLO serves as a heuristic that produces high-quality solutions extremely fast for four applications with linear and non-linear objectives and pure and mixed-integer variables.", "keywords": "bilevel optimization;machine learning;discrete optimization;integer programming", "primary_area": "optimization", "supplementary_material": "", "author": "Justin Dumouchelle;Esther Julien;Jannis Kurtz;Elias Boutros Khalil", "authorids": "~Justin_Dumouchelle1;~Esther_Julien1;~Jannis_Kurtz1;~Elias_Boutros_Khalil1", "gender": "M;F;;M", "homepage": ";;;http://www.ekhalil.com", "dblp": ";;181/0912;151/3240", "google_scholar": ";qyOapq8AAAAJ;https://scholar.google.de/citations?user=rBbMHgwAAAAJ;juqDWQMAAAAJ", "orcid": ";0000-0002-7337-1086;0000-0003-1570-7044;", "linkedin": "justin-dumouchelle-676bbb84/;esther-julien/;;", "or_profile": "~Justin_Dumouchelle1;~Esther_Julien1;~Jannis_Kurtz1;~Elias_Bouros_Khalil1", "aff": "University of Toronto;Delft University of Technology;University of Amsterdam;Polytechnique Montreal", "aff_domain": "utoronto.ca;tudelft.nl;uva.nl;polymtl.ca", "position": "PhD student;PhD student;Assistant Professor;Postdoc", "bibtex": "@inproceedings{\ndumouchelle2024neurbilo,\ntitle={Neur2Bi{LO}: Neural Bilevel Optimization},\nauthor={Justin Dumouchelle and Esther Julien and Jannis Kurtz and Elias Boutros Khalil},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=esVleaqkRc}\n}", "github": "", "reviewers": "qVCc;Rwxw;saS9;otky", "pdf_size": 585299, "rating": "4;4;6;8", "confidence": "3;5;4;3", "soundness": "3;3;3;4", "novelty": "2;2;3;4", "presentation": "2;3;3;4", "wc_summary": "37;38;145;192", "wc_strengths": "34;83;55;115", "wc_weaknesses": "156;153;110;99", "wc_questions": "2;35;5;106", "wc_limitations": "2;13;26;4", "wc_review": "231;322;341;516", "wc_reply_reviewers": "0;0;9;127", "wc_reply_authors": "0;0;0;30", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;1;2", "rating_avg": [ 5.5, 1.6583123951777 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 103.0, 67.57588327206682 ], "wc_strengths_avg": [ 71.75, 30.425112982534674 ], "wc_weaknesses_avg": [ 129.5, 25.3229145242012 ], "wc_questions_avg": [ 37.0, 41.87481343242021 ], "wc_limitations_avg": [ 11.25, 9.470348462437906 ], "wc_review_avg": [ 352.5, 103.14674013268669 ], "wc_reply_reviewers_avg": [ 34.0, 53.819141576208736 ], "wc_reply_authors_avg": [ 7.5, 12.99038105676658 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.4545454545454545, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11882381119212113785&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "utoronto.ca;tudelft.nl;uva.nl;polymtl.ca", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "University of Toronto;Delft University of Technology;University of Amsterdam;Polytechnique Montreal", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.utoronto.ca;https://www.tudelft.nl;https://www.uva.nl;https://www.polymtl.ca", "aff_unique_abbr": "U of T;TU Delft;UvA;PolyMTL", "aff_campus_unique_index": "1", "aff_campus_unique": ";Montreal", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "Canada;Netherlands" }, { "title": "In-Context Symmetries: Self-Supervised Learning through Contextual World Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94239", "id": "etPAH4xSUn", "proceeding": "", "pdf": "https://openreview.net/pdf?id=etPAH4xSUn", "openreview": "https://openreview.net/forum?id=etPAH4xSUn", "poster": "", "project": "", "author_site": "Sharut Gupta, Chenyu Wang, Yifei Wang, Tommi Jaakkola, Stefanie Jegelka", "tldr": "", "abstract": "At the core of self-supervised learning for vision is the idea of learning invariant or equivariant representations with respect to a set of data transformations. This approach, however, introduces strong inductive biases, which can render the representations fragile in downstream tasks that do not conform to these symmetries. In this work, drawing insights from world models, we propose to instead learn a general representation that can adapt to be invariant or equivariant to different transformations by paying attention to context --- a memory module that tracks task-specific states, actions and future states. Here, the action is the transformation, while the current and future states respectively represent the input's representation before and after the transformation. Our proposed algorithm, Contextual Self Supervised Learning (ContextSSL), learns equivariance to all transformations (as opposed to invariance). In this way, the model can learn to encode all relevant features as general representations while having the versatility to tail down to task-wise symmetries when given a few examples as the context. Empirically, we demonstrate significant performance gains over existing methods on equivariance-related tasks, supported by both qualitative and quantitative evaluations.", "keywords": "Self-Supervised Learning; Context; Equivariance", "primary_area": "machine_vision", "supplementary_material": "", "author": "Sharut Gupta;Chenyu Wang;Yifei Wang;Tommi Jaakkola;Stefanie Jegelka", "authorids": "~Sharut_Gupta1;~Chenyu_Wang7;~Yifei_Wang1;~Tommi_S._Jaakkola1;~Stefanie_Jegelka3", "gender": "F;;M;;F", "homepage": "https://www.mit.edu/~sharut/;;https://yifeiwang77.com;;http://people.csail.mit.edu/stefje/", "dblp": ";;00/555-1;;38/7003", "google_scholar": "https://scholar.google.com/citations?hl=en;;-CLy6YsAAAAJ;;gTWUZlsAAAAJ", "orcid": ";;;;", "linkedin": "sharut-gupta/;;;;", "or_profile": "~Sharut_Gupta1;~Chenyu_Wang7;~Yifei_Wang1;~Tommi_S._Jaakkola1;~Stefanie_Jegelka3", "aff": "Google;;Massachusetts Institute of Technology;;Massachusetts Institute of Technology", "aff_domain": "google.com;;mit.edu;;mit.edu", "position": "Student Researcher;;Postdoc;;Associate Professor", "bibtex": "@inproceedings{\ngupta2024incontext,\ntitle={In-Context Symmetries: Self-Supervised Learning through Contextual World Models},\nauthor={Sharut Gupta and Chenyu Wang and Yifei Wang and Tommi Jaakkola and Stefanie Jegelka},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=etPAH4xSUn}\n}", "github": "", "reviewers": "iSU1;WVFD;y5eJ", "pdf_size": 16254157, "rating": "5;6;7", "confidence": "3;3;3", "soundness": "3;3;3", "novelty": "3;3;4", "presentation": "3;2;3", "wc_summary": "72;84;88", "wc_strengths": "33;86;107", "wc_weaknesses": "83;280;221", "wc_questions": "58;15;50", "wc_limitations": "4;25;5", "wc_review": "250;490;471", "wc_reply_reviewers": "212;0;134", "wc_reply_authors": "1016;241;73", "reply_reviewers": "2;0;1", "reply_authors": "5;3;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 81.33333333333333, 6.79869268479038 ], "wc_strengths_avg": [ 75.33333333333333, 31.13768706175132 ], "wc_weaknesses_avg": [ 194.66666666666666, 82.5523402992846 ], "wc_questions_avg": [ 41.0, 18.672618098881223 ], "wc_limitations_avg": [ 11.333333333333334, 9.672412085697939 ], "wc_review_avg": [ 403.6666666666667, 108.93525090111505 ], "wc_reply_reviewers_avg": [ 115.33333333333333, 87.5493511620097 ], "wc_reply_authors_avg": [ 443.3333333333333, 410.7037320935318 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 3.3333333333333335, 1.247219128924647 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:q4IGSvV_MJIJ:scholar.google.com/&scioq=In-Context+Symmetries:+Self-Supervised+Learning+through+Contextual+World+Models&hl=en&as_sdt=0,44", "gs_version_total": 5, "email": "google.com;;mit.edu;;mit.edu", "author_num": 5, "aff_unique_index": "0;1;1", "aff_unique_norm": "Google;Massachusetts Institute of Technology", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://web.mit.edu", "aff_unique_abbr": "Google;MIT", "aff_campus_unique_index": "0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "LibMOON: A Gradient-based MultiObjective OptimizatioN Library in PyTorch", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97588", "id": "etdXLAMZoc", "proceeding": "", "pdf": "https://openreview.net/pdf?id=etdXLAMZoc", "openreview": "https://openreview.net/forum?id=etdXLAMZoc", "poster": "", "project": "", "author_site": "Xiaoyuan Zhang, Liang ZHAO, Yingying Yu, Xi Lin, Yifan Chen, Han Zhao, Qingfu Zhang", "tldr": "", "abstract": "Multiobjective optimization problems (MOPs) are prevalent in machine learning, with applications in multi-task learning, learning under fairness or robustness constraints, etc. Instead of reducing multiple objective functions into a scalar objective, MOPs aim to optimize for the so-called Pareto optimality or Pareto set learning, which involves optimizing more than one objective function simultaneously, over models with thousands to millions of parameters. Existing benchmark libraries for MOPs mainly focus on evolutionary algorithms, most of which are zeroth-order or meta-heuristic methods that do not effectively utilize higher-order information from objectives and cannot scale to large-scale models with millions of parameters. In light of the above challenges, this paper introduces \\algoname, the first multiobjective optimization library that supports state-of-the-art gradient-based methods, provides a fair and comprehensive benchmark, and is open-sourced for the community.", "keywords": "Mathematical tools; Multiobjective optimization; Pareto set learning; Bayesian optimization; Pareto Machine Learning", "primary_area": "", "supplementary_material": "", "author": "Xiaoyuan Zhang;Liang Zhao;Yingying Yu;Xi Lin;Yifan Chen;Han Zhao;Qingfu Zhang", "authorids": "~Xiaoyuan_Zhang2;~Liang_Zhao21;~Yingying_Yu3;~Xi_Lin2;~Yifan_Chen3;~Han_Zhao1;~Qingfu_Zhang1", "gender": "M;;F;M;;M;M", "homepage": ";https://liazhao5.github.io/;;https://xi-l.github.io/;;https://hanzhaoml.github.io/;https://www.cs.cityu.edu.hk/~qzhan7/index.html", "dblp": ";;;43/489-1;;03/3520-2;98/1240.html", "google_scholar": "KQj18L8AAAAJ;https://scholar.google.com.hk/citations?user=DDGCxNkAAAAJ;https://scholar.google.com/citations?hl=en;QB_MUboAAAAJ;;x942ipYAAAAJ;https://scholar.google.co.uk/citations?user=nhL9PHwAAAAJ", "orcid": "0000-0002-3852-645X;0000-0003-3735-936X;0000-0002-9452-0298;;;0000-0002-8579-1600;", "linkedin": ";;;;;;", "or_profile": "~Xiaoyuan_Zhang2;~Liang_Zhao21;~Yingying_Yu3;~Xi_Lin2;~Yifan_Chen3;~Han_Zhao1;~Qingfu_Zhang1", "aff": "City University of Hong Kong;City University of Hong Kong;City University;City University of Hong Kong;;University of Illinois, Urbana Champaign;City University of Hong Kong", "aff_domain": "cityu.edu.hk;cityu.edu.hk;cityu.edu;cityu.edu.hk;;illinois.edu;cityu.edu.hk", "position": "PhD student;PhD student;PhD student;Postdoc;;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nzhang2024libmoon,\ntitle={Lib{MOON}: A Gradient-based MultiObjective OptimizatioN Library in PyTorch},\nauthor={Xiaoyuan Zhang and Liang Zhao and Yingying Yu and Xi Lin and Yifan Chen and Han Zhao and Qingfu Zhang},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=etdXLAMZoc}\n}", "github": "", "reviewers": "jTTe;PsFx;f2ft;vVnF", "pdf_size": 917416, "rating": "4;6;7;7", "confidence": "4;3;4;3", "wc_summary_and_contributions": "108;59;59;28", "wc_strengths": "77;99;134;284", "wc_improvement": "158;124;140;322", "wc_limitations": "44;16;1;24", "wc_correctness": "14;10;1;10", "wc_clarity": "65;8;5;102", "wc_relation_to_prior_work": "141;11;1;4", "wc_documentation": "53;4;1;21", "wc_additional_feedback": "1;1;1;1", "wc_review": "661;332;343;796", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "558;0;26;211", "reply_reviewers": "0;0;0;0", "reply_authors": "4;4;2;2", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "wc_summary_and_contributions_avg": [ 63.5, 28.64000698323937 ], "wc_strengths_avg": [ 148.5, 80.82852219359204 ], "wc_improvement_avg": [ 186.0, 79.435508432942 ], "wc_limitations_avg": [ 21.25, 15.514106484100203 ], "wc_correctness_avg": [ 8.75, 4.763139720814412 ], "wc_clarity_avg": [ 45.0, 40.675545478825484 ], "wc_relation_to_prior_work_avg": [ 39.25, 58.8573487340366 ], "wc_documentation_avg": [ 19.75, 20.656415468323637 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 533.0, 201.27965620002433 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 198.75, 222.7974135846285 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 3.0, 1.0 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.40824829046386296, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6331942781442268723&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "cityu.edu.hk;cityu.edu.hk;cityu.edu;cityu.edu.hk;;illinois.edu;cityu.edu.hk", "author_num": 7, "aff_unique_index": "0;0;1;0;2;0", "aff_unique_norm": "City University of Hong Kong;City University;University of Illinois Urbana-Champaign", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cityu.edu.hk;https://www.cityuniversity.edu;https://illinois.edu", "aff_unique_abbr": "CityU;CityU;UIUC", "aff_campus_unique_index": "0;0;0;2;0", "aff_campus_unique": "Hong Kong SAR;;Urbana-Champaign", "aff_country_unique_index": "0;0;1;0;1;0", "aff_country_unique": "China;United States" }, { "title": "Leveraging Drift to Improve Sample Complexity of Variance Exploding Diffusion Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94238", "id": "euQ0C4iS7O", "proceeding": "", "pdf": "https://openreview.net/pdf?id=euQ0C4iS7O", "openreview": "https://openreview.net/forum?id=euQ0C4iS7O", "poster": "/media/PosterPDFs/NeurIPS%202024/94238.png?t=1731676184.0208926", "project": "", "author_site": "Ruofeng Yang, Zhijie Wang, Bo Jiang, Shuai Li", "tldr": "", "abstract": "Variance exploding (VE) based diffusion models, an important class of diffusion models, have shown state-of-the-art (SOTA) performance. However, only a few theoretical works analyze VE-based models, and those works suffer from a worse forward convergence rate $1/\\text{poly}(T)$ than the $\\exp{(-T)}$ of variance preserving (VP) based models, where $T$ is the forward diffusion time and the rate measures the distance between forward marginal distribution $q_T$ and pure Gaussian noise. The slow rate is due to the Brownian Motion without a drift term. In this work, we design a new drifted VESDE forward process, which allows a faster $\\exp{(-T)}$ forward convergence rate. With this process, we achieve the first efficient polynomial sample complexity for a series of VE-based models with reverse SDE under the manifold hypothesis. Furthermore, unlike previous works, we allow the diffusion coefficient to be unbounded instead of a constant, which is closer to the SOTA models. Besides the reverse SDE, the other common reverse process is the probability flow ODE (PFODE) process, which is deterministic and enjoys faster sample speed. To deepen the understanding of VE-based models, we consider a more general setting considering reverse SDE and PFODE simultaneously, propose a unified tangent-based analysis framework, and prove the first quantitative convergence guarantee for SOTA VE-based models with reverse PFODE.\nWe also show that the drifted VESDE can balance different error terms and improve generated samples without training through synthetic and real-world experiments.", "keywords": "Variance exploding diffusion models;Convergence guarantee;Manifold hypothesis", "primary_area": "generative_models", "supplementary_material": "", "author": "Ruofeng Yang;Zhijie Wang;Bo Jiang;Shuai Li", "authorids": "~Ruofeng_Yang1;~Zhijie_Wang7;~Bo_Jiang2;~Shuai_Li3", "gender": "M;M;M;F", "homepage": "https://github.com/wanshuiyin;https://github.com/Vincent-Zhijie;https://jhc.sjtu.edu.cn/~bjiang/;http://shuaili8.github.io", "dblp": "350/4546;;34/2005-3.html;57/2281-10", "google_scholar": "https://scholar.google.com.hk/citations?user=Cw9HDacAAAAJ;x_9XRb4AAAAJ;WxAIZtMAAAAJ;https://scholar.google.com.hk/citations?user=kMZgQxcAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Ruofeng_Yang1;~Zhijie_Wang7;~Bo_Jiang2;~Shuai_Li3", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;John Hopcroft Center, Shanghai Jiao Tong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "position": "PhD student;Undergrad student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nyang2024leveraging,\ntitle={Leveraging Drift to Improve Sample Complexity of Variance Exploding Diffusion Models},\nauthor={Ruofeng Yang and Zhijie Wang and Bo Jiang and Shuai Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=euQ0C4iS7O}\n}", "github": "", "reviewers": "bHCt;a6C8;ZwJR;Duca", "pdf_size": 3134164, "rating": "4;6;6;7", "confidence": "3;2;4;2", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;2", "wc_summary": "111;74;142;38", "wc_strengths": "52;34;80;61", "wc_weaknesses": "149;71;193;57", "wc_questions": "102;77;220;156", "wc_limitations": "8;10;32;11", "wc_review": "422;266;667;323", "wc_reply_reviewers": "0;105;227;31", "wc_reply_authors": "184;298;608;35", "reply_reviewers": "0;2;1;1", "reply_authors": "3;3;2;2", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 91.25, 39.04724702203729 ], "wc_strengths_avg": [ 56.75, 16.57369904396722 ], "wc_weaknesses_avg": [ 117.5, 55.9352304008842 ], "wc_questions_avg": [ 138.75, 54.91527565259051 ], "wc_limitations_avg": [ 15.25, 9.730750228014282 ], "wc_review_avg": [ 419.5, 153.40876767642715 ], "wc_reply_reviewers_avg": [ 90.75, 87.42532527820528 ], "wc_reply_authors_avg": [ 281.25, 210.441648682004 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.3458572319330373, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:xkkD14eXv3sJ:scholar.google.com/&scioq=Leveraging+Drift+to+Improve+Sample+Complexity+of+Variance+Exploding+Diffusion+Models&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shanghai", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Are We on the Right Way for Evaluating Large Vision-Language Models?", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94237", "id": "evP9mxNNxJ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=evP9mxNNxJ", "openreview": "https://openreview.net/forum?id=evP9mxNNxJ", "poster": "/media/PosterPDFs/NeurIPS%202024/94237.png?t=1731645839.3994298", "project": "", "author_site": "Lin Chen, Jinsong Li, Xiaoyi Dong, Pan Zhang, Yuhang Zang, Zehui Chen, Haodong Duan, Jiaqi Wang, Yu Qiao, Dahua Lin, Feng Zhao", "tldr": "", "abstract": "Large vision-language models (LVLMs) have recently achieved rapid progress, sparking numerous studies to evaluate their multi-modal capabilities. However, we dig into current evaluation works and identify two primary issues: 1) Visual content is unnecessary for many samples. The answers can be directly inferred from the questions and options, or the world knowledge embedded in LLMs. This phenomenon is prevalent across current benchmarks. For instance, GeminiPro achieves 42.7% on the MMMU benchmark without any visual input, and outperforms the random choice baseline across six benchmarks near 24% on average. 2) Unintentional data leakage exists in LLM and LVLM training. LLM and LVLM could still answer some visual-necessary questions without visual content, indicating the memorizing of these samples within large-scale training data. For example, Sphinx-X-MoE gets 43.6% on MMMU without accessing images, surpassing its LLM backbone with 17.9%. Both problems lead to misjudgments of actual multi-modal gains and potentially misguide the study of LVLM. To this end, we present MMStar, an elite vision-indispensable multi-modal benchmark comprising 1,500 samples meticulously selected by humans. MMStar benchmarks 6 core capabilities and 18 detailed axes, aiming to evaluate LVLMs' multi-modal capacities with carefully balanced and purified samples. These samples are first roughly selected from current benchmarks with an automated pipeline, human review is then involved to ensure each curated sample exhibits visual dependency, minimal data leakage, and requires advanced multi-modal capabilities. Moreover, two metrics are developed to measure data leakage and actual performance gain in multi-modal training. We evaluate 16 leading LVLMs on MMStar to assess their multi-modal capabilities, and on 7 benchmarks with the proposed metrics to investigate their data leakage and actual multi-modal gain.", "keywords": "large vision-language model;data leakage;multi-modal benchmark", "primary_area": "machine_vision", "supplementary_material": "", "author": "Lin Chen;Jinsong Li;Xiaoyi Dong;Pan Zhang;Yuhang Zang;Zehui Chen;Haodong Duan;Jiaqi Wang;Yu Qiao;Dahua Lin;Feng Zhao", "authorids": "~Lin_Chen18;~Jinsong_Li1;~Xiaoyi_Dong1;~Pan_Zhang1;~Yuhang_Zang1;~Zehui_Chen1;~Haodong_Duan1;~Jiaqi_Wang1;~Yu_Qiao1;~Dahua_Lin1;~Feng_Zhao6", "gender": "M;M;M;M;M;M;M;M;;M;M", "homepage": "https://lin-chen.site;https://li-jinsong.github.io/;;https://panzhang0212.github.io/;https://yuhangzang.github.io;https://lovesnowbest.site;https://kennymckormick.github.io;https://myownskyw7.github.io/;;http://dahua.site;https://bivlab123.github.io/", "dblp": "13/3479-19;29/3923-1;230/3711;;230/4433;;211/7919;44/740-3;;53/6088;181/2734-4", "google_scholar": "https://scholar.google.com/citations?hl=en;4yD2aTkAAAAJ;FscToE0AAAAJ;moHH480AAAAJ;hW23VKIAAAAJ;NfSsLncAAAAJ;vi3W-m8AAAAJ;https://scholar.google.com.hk/citations?user=GDvt570AAAAJ;;GMzzRRUAAAAJ;https://scholar.google.co.uk/citations?hl=en", "orcid": "0000-0002-1546-791X;;;;0000-0003-1110-5062;0000-0002-1843-4478;0000-0002-3052-4177;;;;0000-0001-6767-8105", "linkedin": ";;;;yuhang-zang/;;haodong-duan-bb9349166/;;;;", "or_profile": "~Lin_Chen18;~Jinsong_Li1;~Xiaoyi_Dong1;~Pan_Zhang1;~Yuhang_Zang1;~Zehui_Chen1;~Haodong_Duan1;~Jiaqi_Wang1;~Yu_Qiao1;~Dahua_Lin1;~Feng_Zhao6", "aff": "University of Science and Technology of China;Xi'an Jiaotong University;Shanghai Artificial Intelligence Laboratory;Shanghai Artificial Intelligence Laboratory;Shanghai Artificial Intelligence Laboratory;University of Science and Technology of China;Shanghai Artificial Intelligence Laboratory;Shanghai AI Laboratory;;The Chinese University of Hong Kong;University of Science and Technology of China", "aff_domain": "ustc.edu.cn;xjtu.edu.cn;pjlab.org.cn;pjlab.org.cn;pjlab.org.cn;ustc.edu.cn;pjlab.org.cn;pjlab.org.cn;;cuhk.edu.hk;ustc.edu.cn", "position": "MS student;Undergrad student;Researcher;Researcher;Researcher;PhD student;Postdoc;Research Scientist;;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nchen2024are,\ntitle={Are We on the Right Way for Evaluating Large Vision-Language Models?},\nauthor={Lin Chen and Jinsong Li and Xiaoyi Dong and Pan Zhang and Yuhang Zang and Zehui Chen and Haodong Duan and Jiaqi Wang and Yu Qiao and Dahua Lin and Feng Zhao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=evP9mxNNxJ}\n}", "github": "", "reviewers": "sNEM;btYn;JQaJ;bz7V", "pdf_size": 4844032, "rating": "6;6;7;7", "confidence": "4;4;4;3", "soundness": "2;3;3;3", "novelty": "2;3;3;2", "presentation": "3;3;3;3", "wc_summary": "65;141;87;81", "wc_strengths": "62;162;102;41", "wc_weaknesses": "523;115;87;55", "wc_questions": "88;22;52;10", "wc_limitations": "101;9;1;1", "wc_review": "839;449;329;188", "wc_reply_reviewers": "23;0;6;0", "wc_reply_authors": "46;0;20;0", "reply_reviewers": "1;0;1;0", "reply_authors": "2;1;2;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 93.5, 28.578838324886476 ], "wc_strengths_avg": [ 91.75, 46.09975596464693 ], "wc_weaknesses_avg": [ 195.0, 190.55707806324068 ], "wc_questions_avg": [ 43.0, 30.14962686336267 ], "wc_limitations_avg": [ 28.0, 42.2729227756965 ], "wc_review_avg": [ 451.25, 242.17800787850246 ], "wc_reply_reviewers_avg": [ 7.25, 9.41740410091868 ], "wc_reply_authors_avg": [ 16.5, 18.887826767524103 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 243, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4459148544604166048&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "ustc.edu.cn;xjtu.edu.cn;pjlab.org.cn;pjlab.org.cn;pjlab.org.cn;ustc.edu.cn;pjlab.org.cn;pjlab.org.cn;;cuhk.edu.hk;ustc.edu.cn", "author_num": 11, "aff_unique_index": "0;1;2;2;2;0;2;3;4;0", "aff_unique_norm": "University of Science and Technology of China;Xi'an Jiao Tong University;Shanghai Artificial Intelligence Laboratory;Shanghai AI Laboratory;Chinese University of Hong Kong", "aff_unique_dep": ";;;;", "aff_unique_url": "http://www.ustc.edu.cn;https://www.xjtu.edu.cn;http://www.shailab.org/;https://www.shanghai-ai-lab.com;https://www.cuhk.edu.hk", "aff_unique_abbr": "USTC;XJTU;Shanghai AI Lab;SAIL;CUHK", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "A scalable generative model for dynamical system reconstruction from neuroimaging data", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94236", "id": "exATQD4HSv", "proceeding": "", "pdf": "https://openreview.net/pdf?id=exATQD4HSv", "openreview": "https://openreview.net/forum?id=exATQD4HSv", "poster": "/media/PosterPDFs/NeurIPS%202024/94236.png?t=1731717082.0041466", "project": "", "author_site": "Eric Volkmann, Alena Br\u00e4ndle, Daniel Durstewitz, Georgia Koppe", "tldr": "", "abstract": "Data-driven inference of the generative dynamics underlying a set of observed time series is of growing interest in machine learning and the natural sciences. In neuroscience, such methods promise to alleviate the need to handcraft models based on biophysical principles and allow to automatize the inference of inter-individual differences in brain dynamics. \nRecent breakthroughs in training techniques for state space models (SSMs) specifically geared toward dynamical systems (DS) reconstruction (DSR) enable to recover the underlying system including its geometrical (attractor) and long-term statistical invariants from even short time series. These techniques are based on control-theoretic ideas, like modern variants of teacher forcing (TF), to ensure stable loss gradient propagation while training. \nHowever, as it currently stands, these techniques are not directly applicable to data modalities where current observations depend on an entire history of previous states due to a signal\u2019s filtering properties, as common in neuroscience (and physiology more generally). \nProminent examples are the blood oxygenation level dependent (BOLD) signal in functional magnetic resonance imaging (fMRI) or Ca$^{2+}$ imaging data. \nSuch types of signals render the SSM's decoder model non-invertible, a requirement for previous TF-based methods.\nHere, exploiting the recent success of control techniques for training SSMs, we propose a novel algorithm that solves this problem and scales exceptionally well with model dimensionality and filter length. We demonstrate its efficiency in reconstructing dynamical systems, including their state space geometry and long-term temporal properties, from just short BOLD time series.", "keywords": "Dynamical Systems Reconstruction;Recurrent Neural Networks;Nonlinear Dynamics;Neuroscience;fMRI", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "/attachment/9dc50e6190ba7c467c570658b4ff356672257b64.zip", "author": "Eric Volkmann;Alena Br\u00e4ndle;Daniel Durstewitz;Georgia Koppe", "authorids": "~Eric_Volkmann1;~Alena_Br\u00e4ndle1;~Daniel_Durstewitz1;~Georgia_Koppe1", "gender": "M;F;;F", "homepage": ";https://www.zi-mannheim.de/forschung/personen/person/24981.html;https://durstewitzlab.github.io;https://www.zi-mannheim.de/en/research/people/person/7111.html", "dblp": "392/9484;393/1612;98/2120;152/2270", "google_scholar": "BTpkGgEAAAAJ;;https://scholar.google.de/citations?user=2bcbKU0AAAAJ;https://scholar.google.de/citations?user=5EVBcowAAAAJ", "orcid": ";;0000-0002-9340-3786;", "linkedin": "eric-volkmann-34b027205/;;;", "or_profile": "~Eric_Volkmann1;~Alena_Br\u00e4ndle1;~Daniel_Durstewitz1;~Georgia_Koppe1", "aff": "Johannes Kepler Universit\u00e4t Linz;Ruprecht-Karls-Universit\u00e4t Heidelberg;Heidelberg University;Central Institute of Mental Health", "aff_domain": "jku.at;uni-heidelberg.de;uni-heidelberg.de;zi-mannheim.de", "position": "PhD student;PhD student;Full Professor;Principal Researcher", "bibtex": "@inproceedings{\nvolkmann2024a,\ntitle={A scalable generative model for dynamical system reconstruction from neuroimaging data},\nauthor={Eric Volkmann and Alena Br{\\\"a}ndle and Daniel Durstewitz and Georgia Koppe},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=exATQD4HSv}\n}", "github": "", "reviewers": "9P9G;oqqk;qvr3;6CX8", "pdf_size": 11851871, "rating": "5;5;7;7", "confidence": "3;3;3;3", "soundness": "3;2;4;4", "novelty": "2;2;3;3", "presentation": "3;2;3;4", "wc_summary": "130;50;80;111", "wc_strengths": "84;55;43;92", "wc_weaknesses": "1122;115;161;83", "wc_questions": "88;1;33;59", "wc_limitations": "4;1;21;10", "wc_review": "1428;222;338;355", "wc_reply_reviewers": "605;32;23;43", "wc_reply_authors": "1219;41;20;44", "reply_reviewers": "3;1;1;1", "reply_authors": "5;3;2;3", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 92.75, 30.457962834043908 ], "wc_strengths_avg": [ 68.5, 20.155644370746373 ], "wc_weaknesses_avg": [ 370.25, 434.90767698443767 ], "wc_questions_avg": [ 45.25, 32.11210830823788 ], "wc_limitations_avg": [ 9.0, 7.648529270389178 ], "wc_review_avg": [ 585.75, 488.95929022772435 ], "wc_reply_reviewers_avg": [ 175.75, 247.9287952215313 ], "wc_reply_authors_avg": [ 331.0, 512.7704164633525 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 3.25, 1.0897247358851685 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13947701724876954776&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 6, "email": "jku.at;uni-heidelberg.de;uni-heidelberg.de;zi-mannheim.de", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Johannes Kepler University Linz;Ruprecht-Karls-Universit\u00e4t Heidelberg;Heidelberg University;Central Institute of Mental Health", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.jku.at;https://www.uni-heidelberg.de/;https://www.uni-heidelberg.de;https://www.zi-mannheim.de", "aff_unique_abbr": "JKU;Uni Heidelberg;Uni Heidelberg;", "aff_campus_unique_index": "0", "aff_campus_unique": "Linz;", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "Austria;Germany" }, { "title": "Grid4D: 4D Decomposed Hash Encoding for High-Fidelity Dynamic Gaussian Splatting", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94235", "id": "eyfYC19gOd", "proceeding": "", "pdf": "https://openreview.net/pdf?id=eyfYC19gOd", "openreview": "https://openreview.net/forum?id=eyfYC19gOd", "poster": "/media/PosterPDFs/NeurIPS%202024/94235.png?t=1732534008.6392593", "project": "", "author_site": "Jiawei Xu, Zexin Fan, Jian Yang, Jin Xie", "tldr": "", "abstract": "Recently, Gaussian splatting has received more and more attention in the field of static scene rendering. Due to the low computational overhead and inherent flexibility of explicit representations, plane-based explicit methods are popular ways to predict deformations for Gaussian-based dynamic scene rendering models. However, plane-based methods rely on the inappropriate low-rank assumption and excessively decompose the space-time 4D encoding, resulting in overmuch feature overlap and unsatisfactory rendering quality. To tackle these problems, we propose Grid4D, a dynamic scene rendering model based on Gaussian splatting and employing a novel explicit encoding method for the 4D input through the hash encoding. Different from plane-based explicit representations, we decompose the 4D encoding into one spatial and three temporal 3D hash encodings without the low-rank assumption. Additionally, we design a novel attention module that generates the attention scores in a directional range to aggregate the spatial and temporal features. The directional attention enables Grid4D to more accurately fit the diverse deformations across distinct scene components based on the spatial encoded features. Moreover, to mitigate the inherent lack of smoothness in explicit representation methods, we introduce a smooth regularization term that keeps our model from the chaos of deformation prediction. Our experiments demonstrate that Grid4D significantly outperforms the state-of-the-art models in visual quality and rendering speed.", "keywords": "Dynamic Scene Rendering;Gaussian Splatting;Hash Encoding;Explicit Representation", "primary_area": "machine_vision", "supplementary_material": "", "author": "Jiawei Xu;Zexin Fan;Jian Yang;Jin Xie", "authorids": "~Jiawei_Xu4;~Zexin_Fan1;~Jian_Yang1;~Jin_Xie3", "gender": "M;F;M;M", "homepage": "https://github.com/JiaweiXu8;https://github.com/zexinfan;;https://csjinxie.github.io/", "dblp": ";;y/JianYang3.html;80/1949-1.html", "google_scholar": "d4xKI4sAAAAJ;;https://scholar.google.com.hk/citations?user=6CIDtZQAAAAJ;https://scholar.google.ae/citations?user=Q7QqJPEAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Jiawei_Xu4;~Zexin_Fan1;~Jian_Yang1;~Jin_Xie3", "aff": "College of Computer Science;Nankai University;Nanjing University of Science and Technology;Nanjing University", "aff_domain": "nankai.edu.cn;nankai.edu.cn;njust.edu.cn;nju.edu.cn", "position": "PhD student;MS student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nxu2024gridd,\ntitle={Grid4D: 4D Decomposed Hash Encoding for High-fidelity Dynamic Gaussian Splatting},\nauthor={Jiawei Xu and Zexin Fan and Jian Yang and Jin Xie},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=eyfYC19gOd}\n}", "github": "", "reviewers": "KYXG;V3gV;2Pr7;T422", "pdf_size": 3166516, "rating": "5;5;5;6", "confidence": "5;4;4;5", "soundness": "3;2;3;3", "novelty": "2;3;3;3", "presentation": "2;3;2;4", "wc_summary": "62;102;81;103", "wc_strengths": "39;69;57;219", "wc_weaknesses": "172;175;67;316", "wc_questions": "104;90;64;32", "wc_limitations": "6;8;51;6", "wc_review": "383;444;320;676", "wc_reply_reviewers": "24;41;0;349", "wc_reply_authors": "0;0;0;701", "reply_reviewers": "1;1;0;2", "reply_authors": "1;1;1;3", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 87.0, 16.896745248715803 ], "wc_strengths_avg": [ 96.0, 71.81225522151495 ], "wc_weaknesses_avg": [ 182.5, 88.5 ], "wc_questions_avg": [ 72.5, 27.436289836637897 ], "wc_limitations_avg": [ 17.75, 19.2142525225417 ], "wc_review_avg": [ 455.75, 134.50720240938773 ], "wc_reply_reviewers_avg": [ 103.5, 142.4859642210418 ], "wc_reply_authors_avg": [ 175.25, 303.54190402644576 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9045399776483744455&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "nankai.edu.cn;nankai.edu.cn;njust.edu.cn;nju.edu.cn", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "College of Computer Science;Nankai University;Nanjing University of Science and Technology;Nanjing University", "aff_unique_dep": "Computer Science;;;", "aff_unique_url": ";http://www.nankai.edu.cn;http://www.nust.edu.cn/;https://www.nju.edu.cn", "aff_unique_abbr": ";NKU;NUST;Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1;1;1", "aff_country_unique": ";China" }, { "title": "Bayesian Optimisation with Unknown Hyperparameters: Regret Bounds Logarithmically Closer to Optimal", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94234", "id": "eygv0JRvTL", "proceeding": "", "pdf": "https://openreview.net/pdf?id=eygv0JRvTL", "openreview": "https://openreview.net/forum?id=eygv0JRvTL", "poster": "", "project": "", "author_site": "Juliusz Ziomek, Masaki Adachi, Michael A Osborne", "tldr": "", "abstract": "Bayesian Optimization (BO) is widely used for optimising black-box functions but requires us to specify the length scale hyperparameter, which defines the smoothness of the functions the optimizer will consider. Most current BO algorithms choose this hyperparameter by maximizing the marginal likelihood of the observed data, albeit risking misspecification if the objective function is less smooth in regions we have not yet explored. The only prior solution addressing this problem with theoretical guarantees was A-GP-UCB, proposed by Berkenkamp et al. (2019). This algorithm progressively decreases the length scale, expanding the class of functions considered by the optimizer. However, A-GP-UCB lacks a stopping mechanism, leading to over-exploration and slow convergence. To overcome this, we introduce Length scale Balancing (LB) - a novel approach, aggregating multiple base surrogate models with varying length scales. LB intermittently adds smaller length scale candidate values while retaining longer scales, balancing exploration and exploitation. We formally derive a cumulative regret bound of LB and compare it with the regret of an oracle BO algorithm using the optimal length scale. Denoting the factor by which the regret bound of A-GP-UCB was away from oracle as $g(T)$, we show that LB is only $\\log g(T)$ away from oracle regret. We also empirically evaluate our algorithm on synthetic and real-world benchmarks and show it outperforms A-GP-UCB and maximum likelihood estimation.", "keywords": "Bayesian Optimisation; Unknown Hyperparameters; Regret Bounds", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Juliusz Ziomek;Masaki Adachi;Michael A Osborne", "authorids": "~Juliusz_Ziomek1;~Masaki_Adachi1;~Michael_A_Osborne1", "gender": "M;;M", "homepage": "https://www.masaki-adachi.com;;https://www.robots.ox.ac.uk/~mosb/", "dblp": "317/2023;305/3383;59/6403", "google_scholar": ";aOHCQ-AAAAAJ;https://scholar.google.co.uk/citations?user=iTNcAakAAAAJ", "orcid": ";;0000-0003-1959-012X", "linkedin": "masaki-adachi-b349311a2/;juliusz-ziomek-73a30b186/;", "or_profile": "~Masaki_Adachi1;~Juliusz_Krzysztof_Ziomek1;~Michael_Osborne1", "aff": "University of Oxford;University of Oxford;University of Oxford", "aff_domain": "ox.ac.uk;ox.ac.uk;oxford.ac.uk", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nziomek2024bayesian,\ntitle={Bayesian Optimisation with Unknown Hyperparameters: Regret Bounds Logarithmically Closer to Optimal},\nauthor={Juliusz Ziomek and Masaki Adachi and Michael A Osborne},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=eygv0JRvTL}\n}", "github": "", "reviewers": "YCMG;NyJf;k1W2;5KYZ", "pdf_size": 2536035, "rating": "6;6;7;7", "confidence": "3;3;4;3", "soundness": "3;2;3;3", "novelty": "3;3;4;3", "presentation": "2;1;3;3", "wc_summary": "80;47;83;139", "wc_strengths": "56;22;144;50", "wc_weaknesses": "442;204;86;109", "wc_questions": "189;120;1;42", "wc_limitations": "27;12;23;57", "wc_review": "794;405;337;397", "wc_reply_reviewers": "22;20;22;25", "wc_reply_authors": "27;26;27;26", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 87.25, 33.04826016600571 ], "wc_strengths_avg": [ 68.0, 45.71651780264984 ], "wc_weaknesses_avg": [ 210.25, 140.92262948157048 ], "wc_questions_avg": [ 88.0, 72.30145226757206 ], "wc_limitations_avg": [ 29.75, 16.663958113245485 ], "wc_review_avg": [ 483.25, 181.32619088261904 ], "wc_reply_reviewers_avg": [ 22.25, 1.7853571071357126 ], "wc_reply_authors_avg": [ 26.5, 0.5 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18317012427517734442&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "ox.ac.uk;ox.ac.uk;oxford.ac.uk", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Oxford", "aff_unique_dep": "", "aff_unique_url": "https://www.ox.ac.uk", "aff_unique_abbr": "Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "How JEPA Avoids Noisy Features: The Implicit Bias of Deep Linear Self Distillation Networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94233", "id": "ez7w0Ss4g9", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ez7w0Ss4g9", "openreview": "https://openreview.net/forum?id=ez7w0Ss4g9", "poster": "/media/PosterPDFs/NeurIPS%202024/94233.png?t=1733429349.638819", "project": "", "author_site": "Etai Littwin, Omid Saremi, Madhu Advani, Vimal Thilak, Preetum Nakkiran, Chen Huang, Joshua Susskind", "tldr": "", "abstract": "Two competing paradigms exist for self-supervised learning of data representations. \n Joint Embedding Predictive Architectures (JEPAs) is a class of architectures in which semantically similar inputs are encoded into representations that are predictive of each other. A recent successful approach that falls under the JEPA framework is self-distillation, where an online encoder is trained to predict the output of the target encoder, sometimes with a lightweight predictor network. This is contrasted with the Masked Auto Encoder (MAE) paradigm, where an encoder and decoder are trained to reconstruct missing parts of the input in ambient space rather than its latent representation. A common motivation for using the JEPA approach over MAE is that the JEPA objective prioritizes abstract features over fine-grained pixel information (which can be unpredictable and uninformative).\n In this work, we seek to understand the mechanism behind this empirical observation by analyzing deep linear models. We uncover a surprising mechanism: in a simplified linear setting where both approaches learn similar representations, JEPAs are biased to learn high influence features, or features characterized by having high regression coefficients. Our results point to a distinct implicit bias of predicting in latent space that may shed light on its success in practice.", "keywords": "SSL;JEPA", "primary_area": "learning_theory", "supplementary_material": "/attachment/34c65e3fecd18bfae65eed5e2f2dd5acecb816c9.zip", "author": "Etai Littwin;Omid Saremi;Madhu Advani;Vimal Thilak;Preetum Nakkiran;Chen Huang;Joshua M. Susskind", "authorids": "~Etai_Littwin1;~Omid_Saremi1;~Madhu_Advani2;~Vimal_Thilak2;~Preetum_Nakkiran1;~Chen_Huang6;~Joshua_M._Susskind1", "gender": "M;;M;M;;M;M", "homepage": ";;;;http://preetum.nakkiran.org;;http://www.apple.com", "dblp": ";;;14/4821;151/6343;05/8125-1;132/7797", "google_scholar": "NOVS7vwAAAAJ;;https://scholar.google.com/citations?hl=en;KyBnuqsAAAAJ;zithBbUAAAAJ;QZ-JKOUAAAAJ;Sv2TGqsAAAAJ", "orcid": ";;;;;;", "linkedin": ";omidsaremi/;;;;;joshua-susskind-8ab2ab5/", "or_profile": "~Etai_Littwin1;~Omid_Saremi1;~Madhu_Advani2;~Vimal_Thilak2;~Preetum_Nakkiran1;~Chen_Huang6;~Joshua_M._Susskind1", "aff": ";Apple;Apple;Apple;Apple;Apple;Apple", "aff_domain": ";apple.com;apple.com;apple.com;apple.com;apple.com;apple.com", "position": ";ML;Researcher;Engineer;Principal Researcher;Research Scientist;Researcher", "bibtex": "@inproceedings{\nlittwin2024how,\ntitle={How {JEPA} Avoids Noisy Features: The Implicit Bias of Deep Linear Self Distillation Networks},\nauthor={Etai Littwin and Omid Saremi and Madhu Advani and Vimal Thilak and Preetum Nakkiran and Chen Huang and Joshua M. Susskind},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ez7w0Ss4g9}\n}", "github": "", "reviewers": "U4uU;5fn3;Nsp4;Mqg6;MHjv", "pdf_size": 3221449, "rating": "6;7;7;7;8", "confidence": "2;5;4;3;2", "soundness": "3;3;4;4;3", "novelty": "3;3;3;4;3", "presentation": "3;3;4;3;3", "wc_summary": "83;66;38;82;138", "wc_strengths": "68;71;131;81;69", "wc_weaknesses": "165;71;137;47;112", "wc_questions": "5;134;116;142;41", "wc_limitations": "6;49;59;1;5", "wc_review": "327;391;481;353;365", "wc_reply_reviewers": "28;80;91;101;92", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 7.0, 0.6324555320336759 ], "confidence_avg": [ 3.2, 1.16619037896906 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 81.4, 32.64107841355736 ], "wc_strengths_avg": [ 84.0, 23.949947807876327 ], "wc_weaknesses_avg": [ 106.4, 42.85603808099857 ], "wc_questions_avg": [ 87.6, 54.61355143185618 ], "wc_limitations_avg": [ 24.0, 24.75479751482528 ], "wc_review_avg": [ 383.4, 52.966404446592364 ], "wc_reply_reviewers_avg": [ 78.4, 26.0660698993922 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11874363805710044356&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": ";apple.com;apple.com;apple.com;apple.com;apple.com;apple.com", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Apple", "aff_unique_dep": "Apple Inc.", "aff_unique_url": "https://www.apple.com", "aff_unique_abbr": "Apple", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "The Art of Saying No: Contextual Noncompliance in Language Models", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97587", "id": "f1UL4wNlw6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=f1UL4wNlw6", "openreview": "https://openreview.net/forum?id=f1UL4wNlw6", "poster": "", "project": "", "author_site": "Faeze Brahman, Sachin Kumar, Vidhisha Balachandran, Pradeep Dasigi, Valentina Pyatkin, Abhilasha Ravichander, Sarah Wiegreffe, Nouha Dziri, Khyathi Chandu, Jack Hessel, Yulia Tsvetkov, Noah Smith, Yejin Choi, Hannaneh Hajishirzi", "tldr": "", "abstract": "Chat-based language models are designed to be helpful, yet they should not comply with every user request.\n While most existing work primarily focuses on refusal of ``unsafe'' queries, we posit that the scope of noncompliance should be broadened. We introduce a comprehensive taxonomy of contextual noncompliance describing when and how models should *not* comply with user requests. Our taxonomy spans a wide range of categories including *incomplete*, *unsupported*, *indeterminate*, and *humanizing* requests (in addition to *unsafe* requests). To test noncompliance capabilities of language models, we use this taxonomy to develop a new evaluation suite of 1000 noncompliance prompts. We find that most existing models show significantly high compliance rates in certain previously understudied categories with models like GPT-4 incorrectly complying with as many as 30\\% of requests.\nTo address these gaps, we explore different training strategies using a synthetically-generated training set of requests and expected noncompliant responses. \nOur experiments demonstrate that while direct finetuning of instruction-tuned models can lead to both over-refusal and a decline in general capabilities, using parameter efficient methods like low rank adapters helps to strike a good balance between appropriate noncompliance and other capabilities.", "keywords": "LLM;Safety;Refusal;Noncompliance;Evaluation;Reliable AI", "primary_area": "", "supplementary_material": "/attachment/a30d78dc00905fef70c8da65b744265cf1d31049.zip", "author": "Faeze Brahman;Sachin Kumar;Vidhisha Balachandran;Pradeep Dasigi;Valentina Pyatkin;Abhilasha Ravichander;Sarah Wiegreffe;Nouha Dziri;Khyathi Chandu;Jack Hessel;Yulia Tsvetkov;Noah A. Smith;Yejin Choi;Hannaneh Hajishirzi", "authorids": "~Faeze_Brahman1;~Sachin_Kumar1;~Vidhisha_Balachandran1;~Pradeep_Dasigi1;~Valentina_Pyatkin1;~Abhilasha_Ravichander2;~Sarah_Wiegreffe1;~Nouha_Dziri2;~Khyathi_Chandu1;~Jack_Hessel1;~Yulia_Tsvetkov1;~Noah_A._Smith2;~Yejin_Choi1;~Hannaneh_Hajishirzi1", "gender": "F;M;F;M;;;;;;M;F;;F;F", "homepage": "https://fabrahman.github.io;https://shocheen.com;https://vidhishanair.github.io/;https://pdasigi.github.io/;;https://www.cs.cmu.edu/~aravicha/;;;;https://www.jmhessel.com;https://homes.cs.washington.edu/~yuliats/;;https://yejinc.github.io/;https://homes.cs.washington.edu/~hannaneh/", "dblp": "276/6005;31/4484-9;234/4867;27/7184;;170/4795.html;;;;https://dblp.uni-trier.de/pid/132/5250.html;75/8157;;89/579-1;52/1296", "google_scholar": "viCG2ikAAAAJ;qO38fRIAAAAJ;LgitgaIAAAAJ;https://scholar.google.com/citations?authorid=Bpd76vcAAAAJ;;6vLsKGsAAAAJ;;;;SxQQ1msAAAAJ;SEDPkrsAAAAJ;;vhP-tlcAAAAJ;LOV6_WIAAAAJ", "orcid": ";;;0000-0001-7127-1316;;;;;;0000-0002-4012-8979;0000-0002-4634-7128;;;", "linkedin": ";;;;;abhilasha-ravichander-57524958;;;;;;;;", "or_profile": "~Faeze_Brahman1;~Sachin_Kumar1;~Vidhisha_Balachandran1;~Pradeep_Dasigi1;~Valentina_Pyatkin1;~Abhilasha_Ravichander2;~Sarah_Wiegreffe1;~Nouha_Dziri2;~Khyathi_Chandu1;~Jack_Hessel1;~Yulia_Tsvetkov1;~Noah_A._Smith2;~Yejin_Choi1;~Hannaneh_Hajishirzi1", "aff": "Allen Institute for AI;Allen Institute for Artificial Intelligence;Carnegie Mellon University;Allen Institute for Artificial Intelligence;;Allen Institute for Artificial Intelligence;;;;Samaya AI;Department of Computer Science, University of Washington;;Department of Computer Science, University of Washington;University of Washington", "aff_domain": "allenai.org;allenai.org;cmu.edu;allenai.org;;allenai.org;;;;samaya.ai;cs.washington.edu;;cs.washington.edu;uw.edu", "position": "Postdoc;Postdoc;PhD student;Research Scientist;;Postdoc;;;;Researcher;Associate Professor;;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nbrahman2024the,\ntitle={The Art of Saying No: Contextual Noncompliance in Language Models},\nauthor={Faeze Brahman and Sachin Kumar and Vidhisha Balachandran and Pradeep Dasigi and Valentina Pyatkin and Abhilasha Ravichander and Sarah Wiegreffe and Nouha Dziri and Khyathi Chandu and Jack Hessel and Yulia Tsvetkov and Noah A. Smith and Yejin Choi and Hannaneh Hajishirzi},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=f1UL4wNlw6}\n}", "github": "", "reviewers": "C58Z;TLyZ;g9Hn;mfkY", "pdf_size": 1563125, "rating": "4;7;7;9", "confidence": "4;4;5;4", "wc_summary_and_contributions": "98;160;140;112", "wc_strengths": "3;10;2;54", "wc_improvement": "3;124;2;63", "wc_limitations": "1;1;2;1", "wc_correctness": "28;7;1;1", "wc_clarity": "1;1;1;39", "wc_relation_to_prior_work": "1;1;1;41", "wc_documentation": "1;1;9;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "137;306;159;313", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "261;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "3;1;1;1", "rating_avg": [ 6.75, 1.7853571071357126 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 127.5, 24.09875515457178 ], "wc_strengths_avg": [ 17.25, 21.44032415799724 ], "wc_improvement_avg": [ 48.0, 50.35374861914453 ], "wc_limitations_avg": [ 1.25, 0.4330127018922193 ], "wc_correctness_avg": [ 9.25, 11.098986440211556 ], "wc_clarity_avg": [ 10.5, 16.454482671904334 ], "wc_relation_to_prior_work_avg": [ 11.0, 17.320508075688775 ], "wc_documentation_avg": [ 3.0, 3.4641016151377544 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 228.75, 81.16149025245902 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 65.25, 113.01631519386925 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 14, 0 ], "corr_rating_confidence": 0.08084520834544431, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6005550001494382933&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "allenai.org;allenai.org;cmu.edu;allenai.org;;allenai.org;;;;samaya.ai;cs.washington.edu;;cs.washington.edu;uw.edu", "author_num": 14, "aff_unique_index": "0;1;2;1;1;3;4;4;4", "aff_unique_norm": "Allen Institute for AI;Allen Institute for Artificial Intelligence;Carnegie Mellon University;Samaya AI;University of Washington", "aff_unique_dep": ";;;;Department of Computer Science", "aff_unique_url": "https://allenai.org;https://allenai.org;https://www.cmu.edu;;https://www.washington.edu", "aff_unique_abbr": "AI2;AI2;CMU;;UW", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Seattle", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States;" }, { "title": "Rethinking Transformer for Long Contextual Histopathology Whole Slide Image Analysis", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94232", "id": "f3oHNyqd83", "proceeding": "", "pdf": "https://openreview.net/pdf?id=f3oHNyqd83", "openreview": "https://openreview.net/forum?id=f3oHNyqd83", "poster": "/media/PosterPDFs/NeurIPS%202024/94232.png?t=1731338953.271936", "project": "", "author_site": "Honglin Li, Yunlong Zhang, Pingyi Chen, Zhongyi Shui, Chenglu Zhu, Lin Yang", "tldr": "", "abstract": "Histopathology Whole Slide Image (WSI) analysis serves as the gold standard for clinical cancer diagnosis in the daily routines of doctors. To develop computer-aided diagnosis model for histopathology WSIs, previous methods typically employ Multi-Instance Learning to enable slide-level prediction given only slide-level labels.\nAmong these models, vanilla attention mechanisms without pairwise interactions have traditionally been employed but are unable to model contextual information. More recently, self-attention models have been utilized to address this issue. To alleviate the computational complexity of long sequences in large WSIs, methods like HIPT use region-slicing, and TransMIL employs Nystr\\\"{o}mformer as an approximation of full self-attention. Both approaches suffer from suboptimal performance due to the loss of key information. Moreover, their use of absolute positional embedding struggles to effectively handle long contextual dependencies in shape-varying WSIs.\nIn this paper, we first analyze how the low-rank nature of the long-sequence attention matrix constrains the representation ability of WSI modelling. Then, we demonstrate that the rank of attention matrix can be improved by focusing on local interactions via a local attention mask. Our analysis shows that the local mask aligns with the attention patterns in the lower layers of the Transformer. Furthermore, the local attention mask can be implemented during chunked attention calculation, reducing the quadratic computational complexity to linear with a small local bandwidth. Additionally, this locality helps the model generalize to unseen or under-fitted positions more easily.\nBuilding on this, we propose a local-global hybrid Transformer for both computational acceleration and local-global information interactions modelling. Our method, Long-contextual MIL (LongMIL), is evaluated through extensive experiments on various WSI tasks to validate its superiority in: 1) overall performance, 2) memory usage and speed, and 3) extrapolation ability compared to previous methods.", "keywords": "Digital Pathology;Efficient Transformer", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "", "author": "Honglin Li;Yunlong Zhang;Pingyi Chen;Zhongyi Shui;Chenglu Zhu;Lin Yang", "authorids": "~Honglin_Li2;~Yunlong_Zhang1;~Pingyi_Chen1;~Zhongyi_Shui1;~Chenglu_Zhu1;~Lin_Yang13", "gender": ";M;M;M;M;M", "homepage": ";https://www.researchgate.net/profile/Zhang-Yunlong-3;;https://github.com/windyzy;https://hzzcl.github.io/resume.io/;https://www.yanglinlab.com", "dblp": "67/5566-1;;;;296/3987;", "google_scholar": "VJYS9dMAAAAJ;;CTGCfsgAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;", "orcid": ";;;;0000-0001-5705-3718;", "linkedin": ";;;;;", "or_profile": "~Honglin_Li2;~Yunlong_Zhang1;~Pingyi_Chen1;~Zhongyi_Shui1;~Chenglu_Zhu1;~Lin_Yang13", "aff": "Westlake University;Westlake University;Zhejiang University;Zhejiang University;Westlake University;Westlake University ", "aff_domain": "westlake.edu.cn;westlake.edu;zju.edu.cn;zju.edu.cn;westlake.edu;westlake.edu.cn", "position": "Researcher;PhD student;PhD student;PhD student;Researcher;Full Professor", "bibtex": "@inproceedings{\nli2024rethinking,\ntitle={Rethinking Transformer for Long Contextual Histopathology Whole Slide Image Analysis},\nauthor={Honglin Li and Yunlong Zhang and Pingyi Chen and Zhongyi Shui and Chenglu Zhu and Lin Yang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=f3oHNyqd83}\n}", "github": "", "reviewers": "Ghwr;UesU;UUFY;GgQd", "pdf_size": 1248378, "rating": "3;6;6;7", "confidence": "5;5;5;5", "soundness": "2;3;3;3", "novelty": "2;3;3;4", "presentation": "2;2;2;4", "wc_summary": "75;49;59;153", "wc_strengths": "74;95;135;49", "wc_weaknesses": "143;561;389;92", "wc_questions": "2;54;87;45", "wc_limitations": "1;5;1;15", "wc_review": "295;764;671;354", "wc_reply_reviewers": "246;31;20;0", "wc_reply_authors": "492;29;74;0", "reply_reviewers": "3;1;1;0", "reply_authors": "2;2;3;1", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 5.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 84.0, 40.90232267243512 ], "wc_strengths_avg": [ 88.25, 31.522809202226885 ], "wc_weaknesses_avg": [ 296.25, 189.6699435862203 ], "wc_questions_avg": [ 47.0, 30.32325840011261 ], "wc_limitations_avg": [ 5.5, 5.722761571129799 ], "wc_review_avg": [ 521.0, 200.3209924096823 ], "wc_reply_reviewers_avg": [ 74.25, 99.78069703103903 ], "wc_reply_authors_avg": [ 148.75, 199.92170342411552 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14158053202341298707&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 3, "email": "westlake.edu.cn;westlake.edu;zju.edu.cn;zju.edu.cn;westlake.edu;westlake.edu.cn", "author_num": 6, "aff_unique_index": "0;0;1;1;0;0", "aff_unique_norm": "Westlake University;Zhejiang University", "aff_unique_dep": ";", "aff_unique_url": "https://www.westlake.edu.cn;https://www.zju.edu.cn", "aff_unique_abbr": "WU;ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Foundation Inference Models for Markov Jump Processes", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94231", "id": "f4v7cmm5sC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=f4v7cmm5sC", "openreview": "https://openreview.net/forum?id=f4v7cmm5sC", "poster": "/media/PosterPDFs/NeurIPS%202024/94231.png?t=1730211269.1888523", "project": "", "author_site": "David Berghaus, Kostadin Cvejoski, Patrick Seifner, C\u00e9sar Ali Ojeda Marin, Rams\u00e9s J. S\u00e1nchez", "tldr": "", "abstract": "Markov jump processes are continuous-time stochastic processes which describe dynamical systems evolving in discrete state spaces. These processes find wide application in the natural sciences and machine learning, but their inference is known to be far from trivial. In this work we introduce a methodology for *zero-shot inference* of Markov jump processes (MJPs), on bounded state spaces, from noisy and sparse observations, which consists of two components. First, a broad probability distribution over families of MJPs, as well as over possible observation times and noise mechanisms, with which we simulate a synthetic dataset of hidden MJPs and their noisy observations. Second, a neural recognition model that processes subsets of the simulated observations, and that is trained to output the initial condition and rate matrix of the target MJP in a supervised way. We empirically demonstrate that *one and the same* (pretrained) recognition model can infer, *in a zero-shot fashion*, hidden MJPs evolving in state spaces of different dimensionalities. Specifically, we infer MJPs which describe (i) discrete flashing ratchet systems, which are a type of Brownian motors, and the conformational dynamics in (ii) molecular simulations, (iii) experimental ion channel data and (iv) simple protein folding models. What is more, we show that our model performs on par with state-of-the-art models which are trained on the target datasets.\n\nOur pretrained model is available online.", "keywords": "Zero-shot inference;Markov jump process;Inference of Markov processes;Foundation models;Foundation models for time series;time series", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "David Berghaus;Kostadin Cvejoski;Patrick Seifner;Cesar Ojeda;Ramses J Sanchez", "authorids": "~David_Berghaus1;~Kostadin_Cvejoski1;~Patrick_Seifner1;~Cesar_Ojeda1;~Ramses_J_Sanchez1", "gender": "M;M;M;M;M", "homepage": ";;https://mlai.cs.uni-bonn.de/people/copy_of_florian-seiffarth;https://dblp.org/pid/167/6031.html;", "dblp": ";185/2496;;;243/6057", "google_scholar": ";k1WSOfoAAAAJ;;_-oU1lEAAAAJ;https://scholar.google.de/citations?user=32AoF2wAAAAJ", "orcid": ";;;;", "linkedin": "david-berghaus/;;;;", "or_profile": "~David_Berghaus1;~Kostadin_Cvejoski1;~Patrick_Seifner1;~Cesar_Ojeda1;~Ramses_J_Sanchez1", "aff": "Fraunhofer Institute IAIS, Fraunhofer IAIS;Fraunhofer IAIS;Rheinische Friedrich-Wilhelms Universit\u00e4t Bonn;Universit\u00e4t Potsdam;Rheinische Friedrich-Wilhelms Universit\u00e4t Bonn", "aff_domain": "iais.fraunhofer.de;iais.fraunhofer.de;uni-bonn.de;uni-potsdam.de;bit.uni-bonn.de", "position": "Postdoc;Researcher;PhD student;Postdoc;Postdoc", "bibtex": "@inproceedings{\nberghaus2024foundation,\ntitle={Foundation Inference Models for Markov Jump Processes},\nauthor={David Berghaus and Kostadin Cvejoski and Patrick Seifner and Cesar Ojeda and Ramses J Sanchez},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=f4v7cmm5sC}\n}", "github": "", "reviewers": "sVKK;pfZc;EGs8;5km2", "pdf_size": 2031795, "rating": "6;7;7;8", "confidence": "4;2;3;4", "soundness": "3;3;3;4", "novelty": "3;3;3;4", "presentation": "3;3;3;4", "wc_summary": "57;109;54;76", "wc_strengths": "39;104;20;45", "wc_weaknesses": "95;43;45;68", "wc_questions": "324;117;46;208", "wc_limitations": "1;8;4;6", "wc_review": "516;381;169;403", "wc_reply_reviewers": "156;104;33;40", "wc_reply_authors": "134;473;0;0", "reply_reviewers": "1;2;1;1", "reply_authors": "3;4;1;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 74.0, 21.89748844045819 ], "wc_strengths_avg": [ 52.0, 31.40859754907882 ], "wc_weaknesses_avg": [ 62.75, 21.05201890555868 ], "wc_questions_avg": [ 173.75, 104.02974334294976 ], "wc_limitations_avg": [ 4.75, 2.5860201081971503 ], "wc_review_avg": [ 367.25, 125.39612234834058 ], "wc_reply_reviewers_avg": [ 83.25, 50.29599884682677 ], "wc_reply_authors_avg": [ 151.75, 193.37318195654743 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 1.299038105676658 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14520557621111860686&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "iais.fraunhofer.de;iais.fraunhofer.de;uni-bonn.de;uni-potsdam.de;bit.uni-bonn.de", "author_num": 5, "aff_unique_index": "0;1;2;3;2", "aff_unique_norm": "Fraunhofer Institute for Intelligent Analysis and Information Systems;Fraunhofer Institute for Applied Information Technology;Rheinische Friedrich-Wilhelms Universit\u00e4t Bonn;University of Potsdam", "aff_unique_dep": "Intelligent Analysis and Information Systems;;;", "aff_unique_url": "https://www.iais.fraunhofer.de/;https://www.iais.fraunhofer.de/;https://www.uni-bonn.de/;https://www.uni-potsdam.de", "aff_unique_abbr": "Fraunhofer IAIS;Fraunhofer IAIS;Uni Bonn;UP", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Germany" }, { "id": "f5XZEROoGb", "title": "SubjECTive-QA: Measuring Subjectivity in Earnings Call Transcripts' QA Through Six-Dimensional Feature Analysis", "track": "Datasets & Benchmarks", "status": "Poster", "tldr": "", "abstract": "Fact-checking is extensively studied in the context of misinformation and disinformation, addressing objective inaccuracies. However, a softer form of misinformation involves responses that are factually correct but lack certain features such as clarity and relevance. This challenge is prevalent in formal Question-Answer (QA) settings such as press conferences in finance, politics, sports, and other domains, where subjective answers can obscure transparency. Despite this, there is a lack of manually annotated datasets for subjective features across multiple dimensions. To address this gap, we introduce SubjECTive-QA, a human annotated dataset on Earnings Call Transcripts' (ECTs) QA sessions as the answers given by company representatives are often open to subjective interpretations and scrutiny. The dataset includes 49,446 annotations for long-form QA pairs across six features: Assertive, Cautious, Optimistic, Specific, Clear, and Relevant. These features are carefully selected to encompass the key attributes that reflect the tone of the answers provided during QA sessions across different domains. Our findings are that the best-performing Pre-trained Language Model (PLM), RoBERTa-base, has similar weighted F1 scores to Llama-3-70b-Chat on features with lower subjectivity, such as Relevant and Clear, with a mean difference of 2.17% in their weighted F1 scores. The models perform significantly better on features with higher subjectivity, such as Specific and Assertive, with a mean difference of 10.01% in their weighted F1 scores. Furthermore, testing SubjECTive-QA's generalizability using QAs from White House Press Briefings and Gaggles yields an average weighted F1 score of 65.97% using our best models for each feature, demonstrating broader applicability beyond the financial domain. SubjECTive-QA is publicly available under the CC BY 4.0 license.", "keywords": "FinNLP;Subjectivity;Tone Analysis;Earnings Calls;Transfer Learning;Misinformation", "primary_area": "", "supplementary_material": "", "author": "Huzaifa Pardawala;Siddhant Sukhani;Agam Shah;Veer Kejriwal;Abhishek Pillai;Rohan Bhasin;Andrew DiBiasio;Tarun Mandapati;Dhruv Adha;Sudheer Chava", "authorids": "~Huzaifa_Pardawala1;~Siddhant_Sukhani1;~Agam_Shah1;~Veer_Kejriwal1;~Abhishek_Pillai1;~Rohan_Bhasin2;~Andrew_DiBiasio1;~Tarun_Mandapati1;~Dhruv_Adha1;~Sudheer_Chava1", "gender": "M;M;M;M;M;M;;M;M;Not Specified", "homepage": ";;https://shahagam4.github.io/;;https://github.com/abhipi/;;https://adibiasio.github.io/;;;https://fintech.gatech.edu", "dblp": ";391/4704.html;206/6806;;;;;;;61/9926", "google_scholar": ";VAwr1v8AAAAJ;https://scholar.google.co.in/citations?user=wGA2umEAAAAJ;;;;;;;AXYf-i8AAAAJ", "orcid": ";;0000-0002-9062-2430;;;;;;;0000-0001-8330-682X", "linkedin": "huzaifa-pardawala-75106a219/;siddhantsukhani/;agam-shah/;veerkejriwal;abhipi/;rohan-bhasin-356aa41a0/;;tmandapati/;dhruv-adha-ba5142215/;", "or_profile": "~Huzaifa_Pardawala1;~Siddhant_Sukhani1;~Agam_Shah1;~Veer_Kejriwal1;~Abhishek_Pillai1;~Rohan_Bhasin2;~Andrew_DiBiasio1;~Tarun_Mandapati1;~Dhruv_Adha1;~Sudheer_Chava1", "aff": "Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology", "aff_domain": "gatech.edu;gatech.edu;gatech.edu;gatech.edu;gatech.edu;gatech.edu;gatech.edu;gatech.edu;gatech.edu;gatech.edu", "position": "Undergrad student;Undergrad student;PhD student;Undergrad student;Undergrad student;Undergrad student;Undergrad student;Undergrad student;Undergrad student;Full Professor", "bibtex": "@inproceedings{\npardawala2024subjectiveqa,\ntitle={Subj{ECT}ive-{QA}: Measuring Subjectivity in Earnings Call Transcripts' {QA} Through Six-Dimensional Feature Analysis},\nauthor={Huzaifa Pardawala and Siddhant Sukhani and Agam Shah and Veer Kejriwal and Abhishek Pillai and Rohan Bhasin and Andrew DiBiasio and Tarun Mandapati and Dhruv Adha and Sudheer Chava},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=f5XZEROoGb}\n}", "github": "", "project": "", "reviewers": "eJKB;xL6k;nn6k;wUf8", "site": "https://openreview.net/forum?id=f5XZEROoGb", "pdf_size": 4910573, "rating": "6;6;7;8", "confidence": "3;3;5;4", "wc_summary_and_contributions": "110;78;130;130", "wc_strengths": "196;73;69;39", "wc_improvement": "2;260;17;121", "wc_limitations": "1;12;40;1", "wc_correctness": "1;20;50;7", "wc_clarity": "1;4;12;19", "wc_relation_to_prior_work": "5;13;42;3", "wc_documentation": "1;6;9;4", "wc_additional_feedback": "1;1;1;1", "wc_review": "318;467;370;325", "wc_reply_reviewers": "0;42;0;41", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "wc_summary_and_contributions_avg": [ 112.0, 21.2602916254693 ], "wc_strengths_avg": [ 94.25, 60.197072187939504 ], "wc_improvement_avg": [ 100.0, 103.11886345378328 ], "wc_limitations_avg": [ 13.5, 15.945218719101975 ], "wc_correctness_avg": [ 19.5, 18.9010581714358 ], "wc_clarity_avg": [ 9.0, 7.035623639735144 ], "wc_relation_to_prior_work_avg": [ 15.75, 15.610493265749165 ], "wc_documentation_avg": [ 5.0, 2.9154759474226504 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 370.0, 59.451661036509314 ], "wc_reply_reviewers_avg": [ 20.75, 20.753011829611623 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.6363636363636364, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17049929619383993542&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_unique_norm": "Georgia Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.gatech.edu", "aff_unique_abbr": "Georgia Tech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Self-Healing Machine Learning: A Framework for Autonomous Adaptation in Real-World Environments", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94230", "id": "f63DKIpx0I", "proceeding": "", "pdf": "https://openreview.net/pdf?id=f63DKIpx0I", "openreview": "https://openreview.net/forum?id=f63DKIpx0I", "poster": "", "project": "", "author_site": "Paulius Rauba, Nabeel Seedat, Krzysztof Kacprzyk, Mihaela van der Schaar", "tldr": "", "abstract": "Real-world machine learning systems often encounter model performance degradation due to distributional shifts in the underlying data generating process (DGP). Existing approaches to addressing shifts, such as concept drift adaptation, are limited by their *reason-agnostic* nature. By choosing from a pre-defined set of actions, such methods implicitly assume that the causes of model degradation are irrelevant to what actions should be taken, limiting their ability to select appropriate adaptations. In this paper, we propose an alternative paradigm to overcome these limitations, called *self-healing machine learning* (SHML). Contrary to previous approaches, SHML autonomously diagnoses the reason for degradation and proposes diagnosis-based corrective actions. We formalize SHML as an optimization problem over a space of adaptation actions to minimize the expected risk under the shifted DGP. We introduce a theoretical framework for self-healing systems and build an agentic self-healing solution *$\\mathcal{H}$-LLM* which uses large language models to perform self-diagnosis by reasoning about the structure underlying the DGP, and self-adaptation by proposing and evaluating corrective actions. Empirically, we analyze different components of *$\\mathcal{H}$-LLM* to understand *why* and *when* it works, demonstrating the potential of self-healing ML.", "keywords": "Model performance degradation;autonomous adaptation;large language models", "primary_area": "other", "supplementary_material": "/attachment/209dca384709ce1915893ec971d2547ed6c9bd49.zip", "author": "Paulius Rauba;Nabeel Seedat;Krzysztof Kacprzyk;Mihaela van der Schaar", "authorids": "~Paulius_Rauba1;~Nabeel_Seedat1;~Krzysztof_Kacprzyk1;~Mihaela_van_der_Schaar2", "gender": "M;;;F", "homepage": ";;;https://www.vanderschaar-lab.com", "dblp": ";227/8368;;", "google_scholar": ";https://scholar.google.com/citations?hl=en;;DZ3S--MAAAAJ", "orcid": ";;;", "linkedin": "paulius-rauba/;nabeel-seedat/;;", "or_profile": "~Paulius_Rauba1;~Nabeel_Seedat1;~Krzysztof_Kacprzyk1;~Mihaela_van_der_Schaar2", "aff": "University of Cambridge;AstraZeneca;;University of California, Los Angeles", "aff_domain": "cam.ac.uk;astrazeneca.com;;ucla.edu", "position": "PhD student;Intern;;Full Professor", "bibtex": "@inproceedings{\nrauba2024selfhealing,\ntitle={Self-Healing Machine Learning: A Framework for Autonomous Adaptation in Real-World Environments},\nauthor={Paulius Rauba and Nabeel Seedat and Krzysztof Kacprzyk and Mihaela van der Schaar},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=f63DKIpx0I}\n}", "github": "", "reviewers": "LnCp;xkmy;4TYN;vweT", "pdf_size": 1048526, "rating": "5;6;6;7", "confidence": "4;5;3;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;2;3", "wc_summary": "53;86;162;80", "wc_strengths": "45;89;108;43", "wc_weaknesses": "45;109;91;221", "wc_questions": "5;64;30;75", "wc_limitations": "7;64;35;77", "wc_review": "155;412;426;496", "wc_reply_reviewers": "0;20;0;27", "wc_reply_authors": "157;110;157;38", "reply_reviewers": "0;1;0;1", "reply_authors": "3;3;3;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 95.25, 40.49305496007926 ], "wc_strengths_avg": [ 71.25, 28.07467720206236 ], "wc_weaknesses_avg": [ 116.5, 64.6896436842869 ], "wc_questions_avg": [ 43.5, 27.735356496717326 ], "wc_limitations_avg": [ 45.75, 27.049722734253674 ], "wc_review_avg": [ 372.25, 129.40319741026494 ], "wc_reply_reviewers_avg": [ 11.75, 12.007809958522827 ], "wc_reply_authors_avg": [ 115.5, 48.685213360937425 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9200384265753570580&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "cam.ac.uk;astrazeneca.com;;ucla.edu", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Cambridge;AstraZeneca;University of California, Los Angeles", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cam.ac.uk;https://www.astrazeneca.com;https://www.ucla.edu", "aff_unique_abbr": "Cambridge;AZ;UCLA", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Cambridge;;Los Angeles", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "The Factorization Curse: Which Tokens You Predict Underlie the Reversal Curse and More", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94229", "id": "f70e6YYFHF", "proceeding": "", "pdf": "https://openreview.net/pdf?id=f70e6YYFHF", "openreview": "https://openreview.net/forum?id=f70e6YYFHF", "poster": "/media/PosterPDFs/NeurIPS%202024/94229.png?t=1731601766.0749376", "project": "", "author_site": "Ouail Kitouni, Niklas S Nolte, Adina Williams, Michael Rabbat, Diane Bouchacourt, Mark Ibrahim", "tldr": "", "abstract": "Today's best language models still struggle with \"hallucinations\", factually incorrect generations, which impede their ability to reliably retrieve information seen during training. The *reversal curse*, where models cannot recall information when probed in a different order than was encountered during training, exemplifies limitations in information retrieval. \nTo better understand these limitations, we reframe the reversal curse as a *factorization curse* --- a failure of models to learn the same joint distribution under different factorizations.\nWe more closely simulate finetuning workflows which train pretrained models on specialized knowledge by introducing\n*WikiReversal*, a realistic testbed based on Wikipedia knowledge graphs. Through a series of controlled experiments with increasing levels of realism, including non-reciprocal relations, we find that reliable information retrieval is an inherent failure of the next-token prediction objective used in popular large language models. Moreover, we demonstrate reliable information retrieval cannot be solved with scale, reversed tokens, or even naive bidirectional-attention training. Consequently, various approaches to finetuning on specialized data would necessarily provide mixed results on downstream tasks, unless the model has already seen the right sequence of tokens. \nAcross five tasks of varying levels of complexity, our results uncover a promising path forward: factorization-agnostic objectives can significantly mitigate the reversal curse and hint at improved knowledge storage and planning capabilities.", "keywords": "Reversal curse;reliability;safety;language models;interpretability;learning objectives", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Ouail Kitouni;Niklas Nolte;Adina Williams;Michael Rabbat;Diane Bouchacourt;Mark Ibrahim", "authorids": "~Ouail_Kitouni1;~Niklas_Nolte1;~Adina_Williams1;~Michael_Rabbat1;~Diane_Bouchacourt3;~Mark_Ibrahim1", "gender": "M;Not Specified;F;M;;F", "homepage": "https://okitouni.github.io/;https://nolte.dev/about;http://www.adinawilliams.com;;https://markibrahim.me/;https://dianebouchacourt.github.io/", "dblp": "285/7983.html;;199/2104;47/1744;180/5660;176/1498", "google_scholar": ";5elJ_uIAAAAJ;MUtbKt0AAAAJ;https://scholar.google.ch/citations?user=cMPKe9UAAAAJ;AqYyoCMAAAAJ;", "orcid": ";0000-0003-2536-4209;0000-0001-5281-3343;;;", "linkedin": "ouail-kitouni-645804187/;;;;;", "or_profile": "~Ouail_Kitouni1;~Niklas_Nolte1;~Adina_Williams1;~Michael_Rabbat1;~Mark_Ibrahim1;~Diane_Nicole_Bouchacourt1", "aff": "Massachusetts Institute of Technology;Meta Facebook;FAIR (Meta Platforms Inc.);Mila;Facebook AI Research (FAIR) Meta;Meta AI Research", "aff_domain": "mit.edu;meta.com;facebook.com;mila.quebec;ai.facebook.com;meta.com", "position": "PhD student;Researcher;Research Scientist;Associate Member;Researcher;Researcher", "bibtex": "@inproceedings{\nkitouni2024the,\ntitle={The Factorization Curse: Which Tokens You Predict Underlie the Reversal Curse and More},\nauthor={Ouail Kitouni and Niklas Nolte and Adina Williams and Michael Rabbat and Diane Bouchacourt and Mark Ibrahim},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=f70e6YYFHF}\n}", "github": "", "reviewers": "Z95k;kVoQ;bZEp", "pdf_size": 1539942, "rating": "6;7;7", "confidence": "3;3;3", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;4;4", "wc_summary": "176;80;132", "wc_strengths": "221;86;74", "wc_weaknesses": "303;72;243", "wc_questions": "271;58;235", "wc_limitations": "14;54;3", "wc_review": "985;350;687", "wc_reply_reviewers": "32;41;181", "wc_reply_authors": "7;4;59", "reply_reviewers": "1;1;1", "reply_authors": "2;2;3", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 129.33333333333334, 39.2371705854991 ], "wc_strengths_avg": [ 127.0, 66.64833081180653 ], "wc_weaknesses_avg": [ 206.0, 97.86725703727473 ], "wc_questions_avg": [ 188.0, 93.09135298189624 ], "wc_limitations_avg": [ 23.666666666666668, 21.913973218524802 ], "wc_review_avg": [ 674.0, 259.40059110701094 ], "wc_reply_reviewers_avg": [ 84.66666666666667, 68.21697410143281 ], "wc_reply_authors_avg": [ 23.333333333333332, 25.249862485874168 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2770402055970698852&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "mit.edu;meta.com;facebook.com;mila.quebec;ai.facebook.com;meta.com", "author_num": 6, "aff_unique_index": "0;1;1;2;1;1", "aff_unique_norm": "Massachusetts Institute of Technology;Meta;Mila", "aff_unique_dep": ";Meta Platforms, Inc.;Quebec Artificial Intelligence Institute", "aff_unique_url": "https://web.mit.edu;https://meta.com;https://mila.quebec", "aff_unique_abbr": "MIT;Meta;Mila", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;0", "aff_country_unique": "United States;Canada" }, { "title": "Boundary Decomposition for Nadir Objective Vector Estimation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94228", "id": "f829mkQMUg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=f829mkQMUg", "openreview": "https://openreview.net/forum?id=f829mkQMUg", "poster": "/media/PosterPDFs/NeurIPS%202024/94228.png?t=1731564411.2691352", "project": "", "author_site": "Ruihao Zheng, Zhenkun Wang", "tldr": "", "abstract": "The nadir objective vector plays a key role in solving multi-objective optimization problems (MOPs), where it is often used to normalize the objective space and guide the search. The current methods for estimating the nadir objective vector perform effectively only on specific MOPs. This paper reveals the limitations of these methods: exact methods can only work on discrete MOPs, while heuristic methods cannot deal with the MOP with a complicated feasible objective region. To fill this gap, we propose a general and rigorous method, namely boundary decomposition for nadir objective vector estimation (BDNE). BDNE scalarizes the MOP into a set of boundary subproblems. By utilizing bilevel optimization, boundary subproblems are optimized and adjusted alternately, thereby refining their optimal solutions to align with the nadir objective vector. We prove that the bilevel optimization identifies the nadir objective vector under mild conditions. We compare BDNE with existing methods on various black-box MOPs. The results conform to the theoretical analysis and show the significant potential of BDNE for real-world application.", "keywords": "multi-objective optimization;nadir objective vector estimation;black-box optimization;evolutionary computation", "primary_area": "optimization", "supplementary_material": "/attachment/a71ecb03162c6e2bc42e3258b4d6ea95352b5e66.zip", "author": "Ruihao Zheng;Zhenkun Wang", "authorids": "~Ruihao_Zheng1;~Zhenkun_Wang1", "gender": "M;M", "homepage": ";https://faculty.sustech.edu.cn/wangzk3/en/", "dblp": "335/1822;96/9114", "google_scholar": "dRYH_D4AAAAJ;https://scholar.google.com.sg/citations?user=r9ezy2gAAAAJ", "orcid": ";0000-0003-1152-6780", "linkedin": ";", "or_profile": "~Ruihao_Zheng1;~Zhenkun_Wang1", "aff": "Southern University of Science and Technology;Southern University of Science and Technology", "aff_domain": "mail.sustech.edu.cn;sustech.edu.cn", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nzheng2024boundary,\ntitle={Boundary Decomposition for Nadir Objective Vector Estimation},\nauthor={Ruihao Zheng and Zhenkun Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=f829mkQMUg}\n}", "github": "", "reviewers": "2guY;wp71;NNkD;3RPX", "pdf_size": 1965243, "rating": "6;7;7;7", "confidence": "4;5;4;5", "soundness": "2;3;3;3", "novelty": "2;3;4;3", "presentation": "2;3;4;3", "wc_summary": "41;76;67;39", "wc_strengths": "45;51;72;64", "wc_weaknesses": "648;76;134;50", "wc_questions": "4;59;5;102", "wc_limitations": "2;20;9;26", "wc_review": "740;282;287;281", "wc_reply_reviewers": "348;124;17;72", "wc_reply_authors": "1640;402;24;17", "reply_reviewers": "4;2;1;1", "reply_authors": "7;3;2;2", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 55.75, 16.08376510646683 ], "wc_strengths_avg": [ 58.0, 10.606601717798213 ], "wc_weaknesses_avg": [ 227.0, 244.95918027295895 ], "wc_questions_avg": [ 42.5, 40.929817981515626 ], "wc_limitations_avg": [ 14.25, 9.33742469849155 ], "wc_review_avg": [ 397.5, 197.75553089610415 ], "wc_reply_reviewers_avg": [ 140.25, 125.77037608276441 ], "wc_reply_authors_avg": [ 520.75, 664.7079715935412 ], "reply_reviewers_avg": [ 2.0, 1.224744871391589 ], "reply_authors_avg": [ 3.5, 2.0615528128088303 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5033724766823422097&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": "mail.sustech.edu.cn;sustech.edu.cn", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Southern University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.sustech.edu.cn", "aff_unique_abbr": "SUSTech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Adaptive Important Region Selection with Reinforced Hierarchical Search for Dense Object Detection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94227", "id": "f8MrWxlnRz", "proceeding": "", "pdf": "https://openreview.net/pdf?id=f8MrWxlnRz", "openreview": "https://openreview.net/forum?id=f8MrWxlnRz", "poster": "/media/PosterPDFs/NeurIPS%202024/94227.png?t=1731358362.7632473", "project": "", "author_site": "Dingrong Wang, Hitesh Sapkota, Qi Yu", "tldr": "", "abstract": "Existing state-of-the-art dense object detection techniques tend to produce a large number of false positive detections on difficult images with complex scenes because they focus on ensuring a high recall. To improve the detection accuracy, we propose an Adaptive Important Region Selection (AIRS) framework guided by Evidential Q-learning coupled with a uniquely designed reward function. Inspired by human visual attention, our detection model conducts object search in a top-down, hierarchical fashion. It starts from the top of the hierarchy with the coarsest granularity and then identifies the potential patches likely to contain objects of interest. It then discards non-informative patches and progressively moves downward on the selected ones for a fine-grained search. The proposed evidential Q-learning systematically encodes epistemic uncertainty in its evidential-Q value to encourage the exploration of unknown patches, especially in the early phase of model training. In this way, the proposed model dynamically balances exploration-exploitation to cover both highly valuable and informative patches. Theoretical analysis and extensive experiments on multiple datasets demonstrate that our proposed framework outperforms the SOTA models.", "keywords": "Dense Object Detection;Reinforced Hierarchical Search", "primary_area": "machine_vision", "supplementary_material": "", "author": "Dingrong Wang;Hitesh Sapkota;Qi Yu", "authorids": "~Dingrong_Wang1;~Hitesh_Sapkota1;~Qi_Yu1", "gender": "M;M;M", "homepage": "https://wdr123.github.io;https://hiteshsapkota.github.io/;https://www.rit.edu/mining/", "dblp": "276/3229;251/4284;58/6957-1", "google_scholar": "v--3Zr0AAAAJ;0FKsBXYAAAAJ;L3gWdfEAAAAJ", "orcid": "0009-0005-2407-2337;;0000-0002-0426-5407", "linkedin": "dingrong-wang-56a203179/;hitesh-sapkota-2226051ba/;", "or_profile": "~Dingrong_Wang1;~Hitesh_Sapkota1;~Qi_Yu1", "aff": "Rochester Institute of Technology;Amazon;Rochester Institute of Technology", "aff_domain": "rit.edu;amazon.com;rit.edu", "position": "PhD student;Researcher;Professor", "bibtex": "@inproceedings{\nwang2024adaptive,\ntitle={Adaptive Important Region Selection with Reinforced Hierarchical Search for Dense Object Detection},\nauthor={Dingrong Wang and Hitesh Sapkota and Qi Yu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=f8MrWxlnRz}\n}", "github": "", "reviewers": "mhHL;8nSu;Q21q;A3eZ;4PiS", "pdf_size": 19503180, "rating": "5;5;6;7;8", "confidence": "4;4;3;3;2", "soundness": "2;3;3;3;4", "novelty": "2;2;3;3;4", "presentation": "3;3;3;3;4", "wc_summary": "130;109;62;80;58", "wc_strengths": "96;63;46;38;83", "wc_weaknesses": "214;246;80;132;159", "wc_questions": "1124;2;46;2;63", "wc_limitations": "21;11;27;5;13", "wc_review": "1585;431;261;257;376", "wc_reply_reviewers": "0;0;14;12;31", "wc_reply_authors": "115;147;158;114;160", "reply_reviewers": "0;0;1;1;1", "reply_authors": "2;2;3;3;3", "rating_avg": [ 6.2, 1.16619037896906 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 87.8, 27.730128019899222 ], "wc_strengths_avg": [ 65.2, 21.81192334481304 ], "wc_weaknesses_avg": [ 166.2, 58.82992435827195 ], "wc_questions_avg": [ 247.4, 438.96131948042984 ], "wc_limitations_avg": [ 15.4, 7.735631842325486 ], "wc_review_avg": [ 582.0, 505.94703280086543 ], "wc_reply_reviewers_avg": [ 11.4, 11.412274094149685 ], "wc_reply_authors_avg": [ 138.8, 20.33125672456083 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 2.6, 0.4898979485566356 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.9625334218796219, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:3tp_IUTaO5cJ:scholar.google.com/&scioq=Adaptive+Important+Region+Selection+with+Reinforced+Hierarchical+Search+for+Dense+Object+Detection&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "rit.edu;amazon.com;rit.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Rochester Institute of Technology;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.rit.edu;https://www.amazon.com", "aff_unique_abbr": "RIT;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Tactile DreamFusion: Exploiting Tactile Sensing for 3D Generation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94226", "id": "fA3RMMl8ii", "proceeding": "", "pdf": "https://openreview.net/pdf?id=fA3RMMl8ii", "openreview": "https://openreview.net/forum?id=fA3RMMl8ii", "poster": "/media/PosterPDFs/NeurIPS%202024/94226.png?t=1731717332.2910688", "project": "", "author_site": "Ruihan Gao, Kangle Deng, Gengshan Yang, Wenzhen Yuan, Jun-Yan Zhu", "tldr": "", "abstract": "3D generation methods have shown visually compelling results powered by diffusion image priors. However, they often fail to produce realistic geometric details, resulting in overly smooth surfaces or geometric details inaccurately baked in albedo maps. To address this, we introduce a new method that incorporates touch as an additional modality to improve the geometric details of generated 3D assets. We design a lightweight 3D texture field to synthesize visual and tactile textures, guided by 2D diffusion model priors on both visual and tactile domains. We condition the visual texture generation on high-resolution tactile normals and guide the patch-based tactile texture refinement with a customized TextureDreambooth. We further present a multi-part generation pipeline that enables us to synthesize different textures across various regions. To our knowledge, we are the first to leverage high-resolution tactile sensing to enhance geometric details for 3D generation tasks. We evaluate our method in both text-to-3D and image-to-3D settings. Our experiments demonstrate that our method provides customized and realistic fine geometric textures while maintaining accurate alignment between two modalities of vision and touch.", "keywords": "3D neural rendering;diffusion model;texture synthesis;multi-modal generation", "primary_area": "generative_models", "supplementary_material": "/attachment/ca826a2a69d6e2504de46f90d39e92fa859718e3.zip", "author": "Ruihan Gao;Kangle Deng;Gengshan Yang;Wenzhen Yuan;Jun-Yan Zhu", "authorids": "~Ruihan_Gao1;~Kangle_Deng1;~Gengshan_Yang1;~Wenzhen_Yuan1;~Jun-Yan_Zhu1", "gender": "F;M;;;M", "homepage": "https://ruihangao.github.io/;https://dunbar12138.github.io;http://gengshan-y.github.io/;;https://www.cs.cmu.edu/~junyanz/", "dblp": "244/7481;246/3131;180/7347;59/8714;117/4782.html", "google_scholar": "rbfajPIAAAAJ;;yRaFnrcAAAAJ;SNqm6doAAAAJ;UdpacsMAAAAJ", "orcid": ";;;0000-0001-8014-356X;0000-0001-8504-3410", "linkedin": "ruihan-gao/;;;;jun-yan-zhu-99b18814", "or_profile": "~Ruihan_Gao1;~Kangle_Deng1;~Gengshan_Yang1;~Wenzhen_Yuan1;~Jun-Yan_Zhu1", "aff": "Carnegie Mellon University;Carnegie Mellon University;World Labs;University of Illinois, Urbana Champaign;Carnegie Mellon University", "aff_domain": "cs.cmu.edu;cmu.edu;worldlabs.ai;illinois.edu;cmu.edu", "position": "PhD student;PhD student;Researcher;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\ngao2024tactile,\ntitle={Tactile DreamFusion: Exploiting Tactile Sensing for 3D Generation},\nauthor={Ruihan Gao and Kangle Deng and Gengshan Yang and Wenzhen Yuan and Jun-Yan Zhu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=fA3RMMl8ii}\n}", "github": "", "reviewers": "ecYi;MUQX;zBVq;a9oZ", "pdf_size": 17321134, "rating": "4;6;6;7", "confidence": "4;4;3;4", "soundness": "2;2;3;2", "novelty": "1;3;3;3", "presentation": "2;3;3;3", "wc_summary": "32;78;101;73", "wc_strengths": "41;30;59;60", "wc_weaknesses": "165;209;125;54", "wc_questions": "4;183;104;28", "wc_limitations": "33;132;1;15", "wc_review": "275;632;390;230", "wc_reply_reviewers": "13;25;29;11", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 71.0, 24.869660230891775 ], "wc_strengths_avg": [ 47.5, 12.619429464123963 ], "wc_weaknesses_avg": [ 138.25, 56.997258706011465 ], "wc_questions_avg": [ 79.75, 70.11552966354886 ], "wc_limitations_avg": [ 45.25, 51.353553917913025 ], "wc_review_avg": [ 381.75, 155.81780225635325 ], "wc_reply_reviewers_avg": [ 19.5, 7.664854858377946 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.13245323570650439, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4076962791971710259&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "cs.cmu.edu;cmu.edu;worldlabs.ai;illinois.edu;cmu.edu", "author_num": 5, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Carnegie Mellon University;World Labs;University of Illinois Urbana-Champaign", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cmu.edu;;https://illinois.edu", "aff_unique_abbr": "CMU;;UIUC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States;" }, { "title": "AdjointDEIS: Efficient Gradients for Diffusion Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94225", "id": "fAlcxvrOEX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=fAlcxvrOEX", "openreview": "https://openreview.net/forum?id=fAlcxvrOEX", "poster": "/media/PosterPDFs/NeurIPS%202024/94225.png?t=1733696507.3260584", "project": "", "author_site": "Zander W. Blasingame, Chen Liu", "tldr": "", "abstract": "The optimization of the latents and parameters of diffusion models with respect to some differentiable metric defined on the output of the model is a challenging and complex problem. The sampling for diffusion models is done by solving either the *probability flow* ODE or diffusion SDE wherein a neural network approximates the score function allowing a numerical ODE/SDE solver to be used. However, naive backpropagation techniques are memory intensive, requiring the storage of all intermediate states, and face additional complexity in handling the injected noise from the diffusion term of the diffusion SDE. We propose a novel family of bespoke ODE solvers to the continuous adjoint equations for diffusion models, which we call *AdjointDEIS*. We exploit the unique construction of diffusion SDEs to further simplify the formulation of the continuous adjoint equations using *exponential integrators*. Moreover, we provide convergence order guarantees for our bespoke solvers. Significantly, we show that continuous adjoint equations for diffusion SDEs actually simplify to a simple ODE. Lastly, we demonstrate the effectiveness of AdjointDEIS for guided generation with an adversarial attack in the form of the face morphing problem. Our code will be released on our project page [https://zblasingame.github.io/AdjointDEIS/](https://zblasingame.github.io/AdjointDEIS/)", "keywords": "continuous adjoint equations;neural differential equations;neural ODEs;adjoint sensitivity method;diffusion models;guided generation", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Zander W. Blasingame;Chen Liu", "authorids": "~Zander_W._Blasingame1;~Chen_Liu3", "gender": "M;Not Specified", "homepage": ";https://www.clarkson.edu/people/chen-liu", "dblp": "250/5790;10/2639-1", "google_scholar": "gBBtH3AAAAAJ;dpKWP8oAAAAJ", "orcid": "0000-0002-9508-8425;0000-0003-1558-6836", "linkedin": ";chenliu/", "or_profile": "~Zander_W._Blasingame1;~Chen_Liu3", "aff": "Clarkson University;Clarkson University", "aff_domain": "clarkson.edu;clarkson.edu", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nblasingame2024adjointdeis,\ntitle={Adjoint{DEIS}: Efficient Gradients for Diffusion Models},\nauthor={Zander W. Blasingame and Chen Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=fAlcxvrOEX}\n}", "github": "", "reviewers": "oW3N;vPC5;1FE9;jxa4", "pdf_size": 3162291, "rating": "5;6;7;7", "confidence": "3;2;4;2", "soundness": "2;3;4;3", "novelty": "2;2;3;3", "presentation": "2;2;3;4", "wc_summary": "264;59;94;81", "wc_strengths": "20;50;79;50", "wc_weaknesses": "33;65;118;26", "wc_questions": "135;33;81;22", "wc_limitations": "25;1;5;1", "wc_review": "477;208;377;180", "wc_reply_reviewers": "113;27;0;46", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 124.5, 81.50613473843549 ], "wc_strengths_avg": [ 49.75, 20.8611480987984 ], "wc_weaknesses_avg": [ 60.5, 36.30771267926417 ], "wc_questions_avg": [ 67.75, 44.71786555729153 ], "wc_limitations_avg": [ 8.0, 9.9498743710662 ], "wc_review_avg": [ 310.5, 122.14847522584962 ], "wc_reply_reviewers_avg": [ 46.5, 41.728287767412645 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0909090909090909, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7355246494665551262&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "clarkson.edu;clarkson.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Clarkson University", "aff_unique_dep": "", "aff_unique_url": "https://www.clarkson.edu", "aff_unique_abbr": "Clarkson", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "A PID Controller Approach for Adaptive Probability-dependent Gradient Decay in Model Calibration", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94224", "id": "fAnubdSFpn", "proceeding": "", "pdf": "https://openreview.net/pdf?id=fAnubdSFpn", "openreview": "https://openreview.net/forum?id=fAnubdSFpn", "poster": "/media/PosterPDFs/NeurIPS%202024/94224.png?t=1729425592.4638784", "project": "", "author_site": "Siyuan Zhang, Linbo Xie", "tldr": "", "abstract": "Modern deep learning models often exhibit overconfident predictions, inadequately capturing uncertainty. During model optimization, the expected calibration error tends to overfit earlier than classification accuracy, indicating distinct optimization objectives for classification error and calibration error. To ensure consistent optimization of both model accuracy and model calibration, we propose a novel method incorporating a probability-dependent gradient decay coefficient into loss function. This coefficient exhibits a strong correlation with the overall confidence level. To maintain model calibration during optimization, we utilize a proportional-integral-derivative (PID) controller to dynamically adjust this gradient decay rate, where the adjustment relies on the proposed relative calibration error feedback in each epoch, thereby preventing the model from exhibiting over-confidence or under-confidence. Within the PID control system framework, the proposed relative calibration error serves as the control system output, providing an indication of the overall confidence level, while the gradient decay rate functions as the controlled variable. Moreover, recognizing the impact of gradient amplitude of adaptive decay rates, we implement an adaptive learning rate mechanism for gradient compensation to prevent inadequate learning of over-small or over-large gradient. Empirical experiments validate the efficacy of our PID-based adaptive gradient decay rate approach, ensuring consistent optimization of model calibration and model accuracy.", "keywords": "Model calibration;Softmax loss;Gradient decay;PID controller;Supervised learning", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Siyuan Zhang;Linbo Xie", "authorids": "~Siyuan_Zhang6;~Linbo_Xie1", "gender": "M;M", "homepage": "https://www.researchgate.net/profile/Siyuan-Zhang-35;http://iot.jiangnan.edu.cn/info/1059/3394.htm", "dblp": "15/7930;68/9211", "google_scholar": ";", "orcid": "0000-0001-5134-3044;", "linkedin": ";", "or_profile": "~Siyuan_Zhang6;~Linbo_Xie1", "aff": "Jiangnan University;School of IoT Engineering", "aff_domain": "jiangnan.edu.cn;jiangnan.edu.cn", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nzhang2024a,\ntitle={A {PID} Controller Approach for Adaptive Probability-dependent Gradient Decay in Model Calibration},\nauthor={Siyuan Zhang and Linbo Xie},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=fAnubdSFpn}\n}", "github": "", "reviewers": "3YTY;dGQ5;iW8b", "pdf_size": 4280796, "rating": "6;6;6", "confidence": "3;5;3", "soundness": "2;2;3", "novelty": "2;2;3", "presentation": "3;3;2", "wc_summary": "60;61;114", "wc_strengths": "63;65;65", "wc_weaknesses": "19;82;96", "wc_questions": "450;55;95", "wc_limitations": "5;71;10", "wc_review": "597;334;380", "wc_reply_reviewers": "17;376;19", "wc_reply_authors": "17;17;0", "reply_reviewers": "1;1;1", "reply_authors": "2;2;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 78.33333333333333, 25.223445883190152 ], "wc_strengths_avg": [ 64.33333333333333, 0.9428090415820634 ], "wc_weaknesses_avg": [ 65.66666666666667, 33.48963355361709 ], "wc_questions_avg": [ 200.0, 177.52934029806642 ], "wc_limitations_avg": [ 28.666666666666668, 30.003703475108235 ], "wc_review_avg": [ 437.0, 114.68507604159603 ], "wc_reply_reviewers_avg": [ 137.33333333333334, 168.76479359023776 ], "wc_reply_authors_avg": [ 11.333333333333334, 8.013876853447538 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:IcD6k4ueRl4J:scholar.google.com/&scioq=A+PID+Controller+Approach+for+Adaptive+Probability-dependent+Gradient+Decay+in+Model+Calibration&hl=en&as_sdt=0,44", "gs_version_total": 2, "email": "jiangnan.edu.cn;jiangnan.edu.cn", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Jiangnan University;School of IoT Engineering", "aff_unique_dep": ";IoT Engineering", "aff_unique_url": "https://www.jnu.edu.cn;", "aff_unique_abbr": "JNU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0", "aff_country_unique": "China;" }, { "title": "LaKD: Length-agnostic Knowledge Distillation for Trajectory Prediction with Any Length Observations", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94223", "id": "fC2SV2sQ8J", "proceeding": "", "pdf": "https://openreview.net/pdf?id=fC2SV2sQ8J", "openreview": "https://openreview.net/forum?id=fC2SV2sQ8J", "poster": "/media/PosterPDFs/NeurIPS%202024/94223.png?t=1729754799.4364057", "project": "", "author_site": "Yuhang Li, Changsheng Li, Ruilin Lv, Rongqing Li, Ye Yuan, Guoren Wang", "tldr": "", "abstract": "Trajectory prediction is a crucial technology to help systems avoid traffic accidents, ensuring safe autonomous driving. Previous methods typically use a fixed-length and sufficiently long trajectory of an agent as observations to predict its future trajectory. However, in real-world scenarios, we often lack the time to gather enough trajectory points before making predictions, e.g., when a car suddenly appears due to an obstruction, the system must make immediate predictions to prevent a collision. This poses a new challenge for trajectory prediction systems, requiring them to be capable of making accurate predictions based on observed trajectories of arbitrary lengths, leading to the failure of existing methods. In this paper, we propose a Length-agnostic Knowledge Distillation framework, named LaKD, which can make accurate trajectory predictions, regardless of the length of observed data. Specifically, considering the fact that long trajectories, containing richer temporal information but potentially additional interference, may perform better or worse than short trajectories, we devise a dynamic length-agnostic knowledge distillation mechanism for exchanging information among trajectories of arbitrary lengths, dynamically determining the transfer direction based on prediction performance. In contrast to traditional knowledge distillation, LaKD employs a unique model that simultaneously serves as both the teacher and the student, potentially causing knowledge collision during the distillation process. Therefore, we design a dynamic soft-masking mechanism, where we first calculate the importance of neuron units and then apply soft-masking to them, so as to safeguard critical units from disruption during the knowledge distillation process. In essence, LaKD is a general and principled framework that can be naturally compatible with existing trajectory prediction models of different architectures. Extensive experiments on three benchmark datasets, Argoverse 1, nuScenes and Argoverse 2, demonstrate the effectiveness of our approach.", "keywords": "Trajectory Prediction", "primary_area": "robotics", "supplementary_material": "", "author": "Yuhang Li;Changsheng Li;Ruilin Lv;Rongqing Li;Ye Yuan;Guoren Wang", "authorids": "~Yuhang_Li5;~Changsheng_Li4;~Ruilin_Lv2;~Rongqing_Li1;~Ye_Yuan15;~Guoren_Wang1", "gender": "M;M;F;;;M", "homepage": "https://github.com/bit-lyh;;https://github.com/Lvrlin;;;https://guorenwang.github.io/", "dblp": ";;;;;64/146", "google_scholar": ";FfJnUioAAAAJ;;;;", "orcid": ";0000-0001-9789-7632;;;;0000-0002-0181-8379", "linkedin": ";;;;;", "or_profile": "~Yuhang_Li5;~Changsheng_Li4;~Ruilin_Lv2;~Rongqing_Li1;~Ye_Yuan15;~Guoren_Wang1", "aff": "Beijing Institute of Technology;Beijing Institute of Technology;Beijing Institute of Technology;;;Beijing Institute of Technology", "aff_domain": "bit.edu.cn;bit.edu.cn;bit.edu.cn;;;bit.edu.cn", "position": "MS student;Full Professor;MS student;;;Full Professor", "bibtex": "@inproceedings{\nli2024lakd,\ntitle={La{KD}: Length-agnostic Knowledge Distillation for Trajectory Prediction with Any Length Observations},\nauthor={Yuhang Li and Changsheng Li and Ruilin Lv and Rongqing Li and Ye Yuan and Guoren Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=fC2SV2sQ8J}\n}", "github": "", "reviewers": "4u3J;G7Yk;Rudm", "pdf_size": 4045431, "rating": "6;7;7", "confidence": "4;5;3", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;4;3", "wc_summary": "147;246;43", "wc_strengths": "41;52;38", "wc_weaknesses": "145;28;16", "wc_questions": "289;1;73", "wc_limitations": "5;1;13", "wc_review": "627;328;183", "wc_reply_reviewers": "30;10;19", "wc_reply_authors": "207;16;24", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 145.33333333333334, 82.88278202109021 ], "wc_strengths_avg": [ 43.666666666666664, 6.018490028422596 ], "wc_weaknesses_avg": [ 63.0, 58.18934610390462 ], "wc_questions_avg": [ 121.0, 122.37646832622684 ], "wc_limitations_avg": [ 6.333333333333333, 4.988876515698588 ], "wc_review_avg": [ 379.3333333333333, 184.86090867340113 ], "wc_reply_reviewers_avg": [ 19.666666666666668, 8.178562764256865 ], "wc_reply_authors_avg": [ 82.33333333333333, 88.21312575549942 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14230936729035714588&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "bit.edu.cn;bit.edu.cn;bit.edu.cn;;;bit.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Beijing Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.bit.edu.cn/", "aff_unique_abbr": "BIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Non-Stationary Learning of Neural Networks with Automatic Soft Parameter Reset", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94222", "id": "fDiZJ7mmOV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=fDiZJ7mmOV", "openreview": "https://openreview.net/forum?id=fDiZJ7mmOV", "poster": "", "project": "", "author_site": "Alexandre Galashov, Michalis Titsias, Andr\u00e1s Gy\u00f6rgy, Clare Lyle, Razvan Pascanu, Yee Whye Teh, Maneesh Sahani", "tldr": "", "abstract": "Neural networks are most often trained under the assumption that data come from a stationary distribution. However, settings in which this assumption is violated are of increasing importance; examples include supervised learning with distributional shifts, reinforcement learning, continual learning and non-stationary contextual bandits. Here, we introduce a novel learning approach that automatically models and adapts to non-stationarity by linking parameters through an Ornstein-Uhlenbeck process with an adaptive drift parameter. The adaptive drift draws the parameters towards the distribution used at initialisation, so the approach can be understood as a form of soft parameter reset. We show empirically that our approach performs well in non-stationary supervised, and off-policy reinforcement learning settings.", "keywords": "Non-stationarity;plasticity loss;online learning;deep learning", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Alexandre Galashov;Michalis Titsias;Andr\u00e1s Gy\u00f6rgy;Clare Lyle;Razvan Pascanu;Yee Whye Teh;Maneesh Sahani", "authorids": "~Alexandre_Galashov1;~Michalis_Titsias1;~Andr\u00e1s_Gy\u00f6rgy2;~Clare_Lyle1;~Razvan_Pascanu1;~Yee_Whye_Teh2;~Maneesh_Sahani1", "gender": "M;M;;M;;;M", "homepage": "https://galashov.com;https://mtitsias.github.io/;;https://razp.info;http://www.gatsby.ucl.ac.uk/~maneesh;http://www.cs.bme.hu/~gya;http://csml.stats.ox.ac.uk/people/teh/", "dblp": ";19/5385;192/1910;65/8368.html;44/3197;72/251-1;88/2483", "google_scholar": ";https://scholar.google.gr/citations?user=B-SbkAwAAAAJ;;https://scholar.google.ca/citations?user=eSPY8LwAAAAJ;https://scholar.google.co.uk/citations?user=rwxX83UAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.co.uk/citations?user=y-nUzMwAAAAJ", "orcid": ";;;;0000-0001-5560-3341;0000-0003-0586-4337;", "linkedin": ";;;;;;", "or_profile": "~Alexandre_Galashov1;~Michalis_Titsias1;~Clare_Lyle1;~Razvan_Pascanu1;~Maneesh_Sahani1;~Andras_Gyorgy1;~Yee_Whye_Teh1", "aff": "Ecole Polytechnique;Google DeepMind;Google DeepMind;Google DeepMind;University College London;Google DeepMind;University of Oxford", "aff_domain": "polytechnique.edu;google.com;google.com;google.com;ucl.ac.uk;deepmind.com;ox.ac.uk", "position": "MS student;Research Scientist;Researcher;Research Scientist;Full Professor;Research Scientist;Full Professor", "bibtex": "@inproceedings{\ngalashov2024nonstationary,\ntitle={Non-Stationary Learning of Neural Networks with Automatic Soft Parameter Reset},\nauthor={Alexandre Galashov and Michalis Titsias and Andr{\\'a}s Gy{\\\"o}rgy and Clare Lyle and Razvan Pascanu and Yee Whye Teh and Maneesh Sahani},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=fDiZJ7mmOV}\n}", "github": "", "reviewers": "4koq;Hmj6;DfC5;6GY1", "pdf_size": 4153759, "rating": "4;4;5;7", "confidence": "3;4;3;3", "soundness": "2;3;2;4", "novelty": "2;3;2;3", "presentation": "1;2;3;4", "wc_summary": "112;57;31;128", "wc_strengths": "35;68;28;105", "wc_weaknesses": "503;139;259;75", "wc_questions": "57;2;107;28", "wc_limitations": "23;16;1;9", "wc_review": "730;282;426;345", "wc_reply_reviewers": "919;0;86;0", "wc_reply_authors": "1423;0;172;0", "reply_reviewers": "2;0;1;0", "reply_authors": "4;1;2;1", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 82.0, 39.50316443020736 ], "wc_strengths_avg": [ 59.0, 30.553232234904378 ], "wc_weaknesses_avg": [ 244.0, 163.4717100907677 ], "wc_questions_avg": [ 48.5, 38.977557645393844 ], "wc_limitations_avg": [ 12.25, 8.166241485530538 ], "wc_review_avg": [ 445.75, 171.8667725303527 ], "wc_reply_reviewers_avg": [ 251.25, 387.1210243580165 ], "wc_reply_authors_avg": [ 398.75, 595.5054050972166 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.4714045207910316, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2775542219441770803&as_sdt=5,30&sciodt=0,30&hl=en", "gs_version_total": 3, "email": "polytechnique.edu;google.com;google.com;google.com;ucl.ac.uk;deepmind.com;ox.ac.uk", "author_num": 7, "aff_unique_index": "0;1;1;1;2;1;3", "aff_unique_norm": "Ecole Polytechnique;Google;University College London;University of Oxford", "aff_unique_dep": ";Google DeepMind;;", "aff_unique_url": "https://www.polytechnique.edu;https://deepmind.com;https://www.ucl.ac.uk;https://www.ox.ac.uk", "aff_unique_abbr": "X;DeepMind;UCL;Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1;1;1", "aff_country_unique": "France;United Kingdom" }, { "title": "Metric Flow Matching for Smooth Interpolations on the Data Manifold", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94221", "id": "fE3RqiF4Nx", "proceeding": "", "pdf": "https://openreview.net/pdf?id=fE3RqiF4Nx", "openreview": "https://openreview.net/forum?id=fE3RqiF4Nx", "poster": "", "project": "", "author_site": "Kacper Kapusniak, Peter Potaptchik, Teodora Reu, Leo Zhang, Alexander Tong, Michael Bronstein, Joey Bose, Francesco Di Giovanni", "tldr": "", "abstract": "Matching objectives underpin the success of modern generative models and rely on constructing conditional paths that transform a source distribution into a target distribution. Despite being a fundamental building block, conditional paths have been designed principally under the assumption of $\\textit{Euclidean geometry}$, resulting in straight interpolations. However, this can be particularly restrictive for tasks such as trajectory inference, where straight paths might lie outside the data manifold, thus failing to capture the underlying dynamics giving rise to the observed marginals. In this paper, we propose Metric Flow Matching (MFM), a novel simulation-free framework for conditional flow matching where interpolants are approximate geodesics learned by minimizing the kinetic energy of a data-induced Riemannian metric. This way, the generative model matches vector fields on the data manifold, which corresponds to lower uncertainty and more meaningful interpolations. We prescribe general metrics to instantiate MFM, independent of the task, and test it on a suite of challenging problems including LiDAR navigation, unpaired image translation, and modeling cellular dynamics. We observe that MFM outperforms the Euclidean baselines, particularly achieving SOTA on single-cell trajectory prediction.", "keywords": "Flow Matching; Riemannian Geometry; single-cell RNA sequencing;", "primary_area": "generative_models", "supplementary_material": "/attachment/658104beb2581247cde62c8fc8ebecc2b9c11eec.zip", "author": "Kacper Kapusniak;Peter Potaptchik;Teodora Reu;Leo Zhang;Alexander Tong;Michael M. Bronstein;Joey Bose;Francesco Di Giovanni", "authorids": "~Kacper_Kapusniak1;~Peter_Potaptchik1;~Teodora_Reu1;~Leo_Zhang1;~Alexander_Tong1;~Michael_M._Bronstein1;~Joey_Bose1;~Francesco_Di_Giovanni1", "gender": ";M;F;M;;M;M;M", "homepage": ";https://peterpotaptchik.github.io;https://github.com/TeoReu;https://leozhangml.github.io/;https://alextong.net;http://www.inf.usi.ch/bronstein/;https://joeybose.github.io/;https://francescodgv.github.io/", "dblp": "363/7484;;;252/8493;153/9296;07/2668;174/3372;", "google_scholar": "FO80TZ8AAAAJ;AwzdzqEAAAAJ;;7EJ5TecAAAAJ;CS80pt4AAAAJ;UU3N6-UAAAAJ;ybPyI7IAAAAJ;yzjjeqsAAAAJ", "orcid": "0009-0005-5261-0657;;;0009-0006-0126-9337;0000-0002-2031-4096;;;", "linkedin": "kacper-kapusniak/;peterpotaptchik/;;;atong01/;mbronstein/;;", "or_profile": "~Kacper_Kapusniak1;~Peter_Potaptchik1;~Teodora_Reu1;~Leo_Zhang1;~Alexander_Tong1;~Michael_M._Bronstein1;~Joey_Bose1;~Francesco_Di_Giovanni1", "aff": "Valence Labs powered by recursion;University of Oxford;Department of Computer Science, University of Oxford;University of Oxford;Universit\u00e9 de Montr\u00e9al;University of Oxford;University of Oxford;Valence Labs powered by recursion", "aff_domain": "valencelabs.com;stats.ox.ac.uk;cs.ox.ac.uk;ox.ac.uk;umontreal.ca;ox.ac.uk;oxford.ac.uk;valencelabs.com", "position": "Intern;PhD student;PhD student;PhD student;Postdoc;Full Professor;Postdoc;Researcher", "bibtex": "@inproceedings{\nkapusniak2024metric,\ntitle={Metric Flow Matching for Smooth Interpolations on the Data Manifold},\nauthor={Kacper Kapusniak and Peter Potaptchik and Teodora Reu and Leo Zhang and Alexander Tong and Michael M. Bronstein and Joey Bose and Francesco Di Giovanni},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=fE3RqiF4Nx}\n}", "github": "", "reviewers": "hBAX;CNRE;mgNS;q2sv", "pdf_size": 20315407, "rating": "5;6;6;7", "confidence": "4;4;5;4", "soundness": "2;3;4;4", "novelty": "1;3;3;3", "presentation": "2;3;4;4", "wc_summary": "74;35;89;65", "wc_strengths": "17;60;68;98", "wc_weaknesses": "1081;340;132;193", "wc_questions": "163;110;242;88", "wc_limitations": "43;1;10;43", "wc_review": "1378;546;541;487", "wc_reply_reviewers": "963;30;177;38", "wc_reply_authors": "3354;66;277;20", "reply_reviewers": "2;1;1;1", "reply_authors": "7;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 65.75, 19.715159142142372 ], "wc_strengths_avg": [ 60.75, 28.960101864461734 ], "wc_weaknesses_avg": [ 436.5, 379.70547796943885 ], "wc_questions_avg": [ 150.75, 59.31852577399408 ], "wc_limitations_avg": [ 24.25, 19.01808349965895 ], "wc_review_avg": [ 738.0, 370.22763268022015 ], "wc_reply_reviewers_avg": [ 302.0, 386.07835992192054 ], "wc_reply_authors_avg": [ 929.25, 1403.2799747377571 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.25, 2.165063509461097 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11811847690843542556&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "valencelabs.com;stats.ox.ac.uk;cs.ox.ac.uk;ox.ac.uk;umontreal.ca;ox.ac.uk;oxford.ac.uk;valencelabs.com", "author_num": 8, "aff_unique_index": "0;1;1;1;2;1;1;0", "aff_unique_norm": "Valence Labs;University of Oxford;Universit\u00e9 de Montr\u00e9al", "aff_unique_dep": ";;", "aff_unique_url": ";https://www.ox.ac.uk;https://www.umontreal.ca", "aff_unique_abbr": ";Oxford;UdeM", "aff_campus_unique_index": "1", "aff_campus_unique": ";Oxford", "aff_country_unique_index": "1;1;1;2;1;1", "aff_country_unique": ";United Kingdom;Canada" }, { "title": "Attractor Memory for Long-Term Time Series Forecasting: A Chaos Perspective", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94220", "id": "fEYHZzN7kX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=fEYHZzN7kX", "openreview": "https://openreview.net/forum?id=fEYHZzN7kX", "poster": "/media/PosterPDFs/NeurIPS%202024/94220.png?t=1731757005.0944839", "project": "", "author_site": "Jiaxi Hu, Yuehong HU, Wei Chen, Ming Jin, Shirui Pan, Qingsong Wen, Yuxuan Liang", "tldr": "", "abstract": "In long-term time series forecasting (LTSF) tasks, an increasing number of works have acknowledged that discrete time series originate from continuous dynamic systems and have attempted to model their underlying dynamics. Recognizing the chaotic nature of real-world data, our model, Attraos, incorporates chaos theory into LTSF, perceiving real-world time series as low-dimensional observations from unknown high-dimensional chaotic dynamical systems. Under the concept of attractor invariance, Attraos utilizes non-parametric Phase Space Reconstruction embedding along with a novel multi-resolution dynamic memory unit to memorize historical dynamical structures, and evolves by a frequency-enhanced local evolution strategy. Detailed theoretical analysis and abundant empirical evidence consistently show that Attraos outperforms various LTSF methods on mainstream LTSF datasets and chaotic datasets with only one-twelfth of the parameters compared to PatchTST.", "keywords": "State Space Model; Time Series; Chaos Theory", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Jiaxi Hu;Yuehong HU;Wei Chen;Ming Jin;Shirui Pan;Qingsong Wen;Yuxuan Liang", "authorids": "~Jiaxi_Hu1;~Yuehong_HU1;~Wei_Chen50;~Ming_Jin3;~Shirui_Pan1;~Qingsong_Wen2;~Yuxuan_Liang1", "gender": "M;M;M;M;;M;M", "homepage": ";https://dejavu2021.github.io/;https://onedean.github.io/;https://mingjin.dev/;;https://yuxuanliang.com;https://sites.google.com/site/qingsongwen8/", "dblp": "71/2455;;;34/3870-5;91/8171;183/0977;27/561", "google_scholar": "TUzje_sAAAAJ;;RCfQIcQAAAAJ;I2xvKaIAAAAJ;https://scholar.google.com.au/citations?user=frWRJN4AAAAJ;n9cODgcAAAAJ;vjPJvwYAAAAJ", "orcid": ";;;0000-0002-6833-4811;0000-0003-0794-527X;0000-0003-2817-7337;0000-0003-4516-2524", "linkedin": ";;;;;yoshall/;qingsong-wen-22814156/", "or_profile": "~Jiaxi_Hu1;~Yuehong_HU1;~Wei_Chen50;~Ming_Jin3;~Shirui_Pan1;~Yuxuan_Liang1;~Qingsong_Wen1", "aff": "Hong Kong University of Science and Technology;Hong Kong University of Science and Technology (GuangZhou);Hong Kong University of Science and Technology, Guangzhou;Griffith University;Griffith University;The Hong Kong University of Science and Technology (Guangzhou);Squirrel Ai Learning", "aff_domain": "connect.hkust-gz.edu.cn;hkust.edu;hkust-gz.edu.cn;griffith.edu.au;griffith.edu.au;hkust-gz.edu.cn;squirrelai.com", "position": "PhD student;MS student;PhD student;Assistant Professor;Full Professor;Assistant Professor;Principal Researcher", "bibtex": "@inproceedings{\nhu2024attractor,\ntitle={Attractor Memory for Long-Term Time Series Forecasting: A Chaos Perspective},\nauthor={Jiaxi Hu and Yuehong HU and Wei Chen and Ming Jin and Shirui Pan and Qingsong Wen and Yuxuan Liang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=fEYHZzN7kX}\n}", "github": "", "reviewers": "qpQ4;zUbZ;zqBV", "pdf_size": 5393687, "rating": "6;6;7", "confidence": "2;3;3", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "1;2;2", "wc_summary": "86;92;96", "wc_strengths": "45;77;146", "wc_weaknesses": "182;110;342", "wc_questions": "180;92;50", "wc_limitations": "1;19;7", "wc_review": "494;390;641", "wc_reply_reviewers": "15;92;57", "wc_reply_authors": "24;700;19", "reply_reviewers": "1;1;1", "reply_authors": "2;3;2", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 1.6666666666666667, 0.4714045207910317 ], "wc_summary_avg": [ 91.33333333333333, 4.109609335312651 ], "wc_strengths_avg": [ 89.33333333333333, 42.14525147892966 ], "wc_weaknesses_avg": [ 211.33333333333334, 96.9581811343885 ], "wc_questions_avg": [ 107.33333333333333, 54.16846150872501 ], "wc_limitations_avg": [ 9.0, 7.483314773547883 ], "wc_review_avg": [ 508.3333333333333, 102.97033013877778 ], "wc_reply_reviewers_avg": [ 54.666666666666664, 31.47838764754143 ], "wc_reply_authors_avg": [ 247.66666666666666, 319.85448080164343 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17954328167184227310&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 4, "email": "connect.hkust-gz.edu.cn;hkust.edu;hkust-gz.edu.cn;griffith.edu.au;griffith.edu.au;hkust-gz.edu.cn;squirrelai.com", "author_num": 7, "aff_unique_index": "0;0;0;1;1;0;2", "aff_unique_norm": "Hong Kong University of Science and Technology;Griffith University;Squirrel Ai Learning", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ust.hk;https://www.griffith.edu.au;https://www.squirrelai.com/", "aff_unique_abbr": "HKUST;Griffith;", "aff_campus_unique_index": "0;0;0;2", "aff_campus_unique": "Hong Kong SAR;;Guangzhou", "aff_country_unique_index": "0;0;0;1;1;0;0", "aff_country_unique": "China;Australia" }, { "title": "TARP-VP: Towards Evaluation of Transferred Adversarial Robustness and Privacy on Label Mapping Visual Prompting Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94219", "id": "fEvUEBbEjb", "proceeding": "", "pdf": "https://openreview.net/pdf?id=fEvUEBbEjb", "openreview": "https://openreview.net/forum?id=fEvUEBbEjb", "poster": "/media/PosterPDFs/NeurIPS%202024/94219.png?t=1731363136.8658583", "project": "", "author_site": "Zhen Chen, Yi Zhang, Fu Wang, Xingyu Zhao, Xiaowei Huang, Wenjie Ruan", "tldr": "", "abstract": "Adversarial robustness and privacy of deep learning (DL) models are two widely studied topics in AI security. Adversarial training (AT) is \nan effective approach to improve the robustness of DL models against adversarial attacks. However, while models with AT demonstrate enhanced robustness, they become more susceptible to membership inference attacks (MIAs), thus increasing the risk of privacy leakage. This indicates a negative trade-off between adversarial robustness and privacy in general deep learning models. Visual prompting is a novel model reprogramming (MR) technique used for fine-tuning pre-trained models, achieving good performance in vision tasks, especially when combined with the label mapping technique. However, the performance of label-mapping-based visual prompting (LM-VP) under adversarial attacks and MIAs lacks evaluation. In this work, we regard the MR of LM-VP as a unified entity, referred to as the LM-VP model, and take a step toward jointly evaluating the adversarial robustness and privacy of LM-VP models. Experimental results show that \nthe choice of pre-trained models significantly affects the white-box adversarial robustness of LM-VP, and standard AT even substantially degrades its performance. In contrast, transfer AT-trained LM-VP achieves a good trade-off between transferred adversarial robustness and privacy, a finding that has been consistently validated across various pre-trained models.", "keywords": "Visual Prompting; Adversarial Robustness; Membership Inference Attacks", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Zhen Chen;Yi Zhang;Fu Wang;Xingyu Zhao;Xiaowei Huang;Wenjie Ruan", "authorids": "~Zhen_Chen17;~Yi_Zhang84;~Fu_Wang1;~Xingyu_Zhao1;~Xiaowei_Huang1;~Wenjie_Ruan2", "gender": ";F;M;M;M;", "homepage": ";;;https://www.xzhao.me;https://cgi.csc.liv.ac.uk/~xiaowei/;", "dblp": ";;;83/504-1;60/5414-1.html;", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;https://scholar.google.com/citations?hl=en;;SzEBdA8AAAAJ;https://scholar.google.co.uk/citations?user=X4fLCCIAAAAJ;", "orcid": ";;0000-0002-5039-7021;0000-0002-3474-349X;;", "linkedin": ";yi-zhang-8224132a4/;;xingyu-zhao-29877485/;;", "or_profile": "~Zhen_Chen17;~Yi_Zhang84;~Fu_Wang1;~Xingyu_Zhao1;~Xiaowei_Huang1;~Wenjie_Ruan2", "aff": "University of Liverpool;University of Warwick;University of Exeter;University of Warwick;University of Liverpool;", "aff_domain": "liverpool.ac.uk;warwick.ac.uk;exeter.ac.uk;warwick.ac.uk;liverpool.ac.uk;", "position": "PhD student;PhD student;PhD student;Assistant Professor;Full Professor;", "bibtex": "@inproceedings{\nchen2024tarpvp,\ntitle={{TARP}-{VP}: Towards Evaluation of Transferred Adversarial Robustness and Privacy on Label Mapping Visual Prompting Models},\nauthor={Zhen Chen and Yi Zhang and Fu Wang and Xingyu Zhao and Xiaowei Huang and Wenjie Ruan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=fEvUEBbEjb}\n}", "github": "", "reviewers": "J3te;Jw71;RUkH;bbnn", "pdf_size": 602462, "rating": "4;5;7;7", "confidence": "5;3;4;3", "soundness": "1;3;3;3", "novelty": "1;3;3;3", "presentation": "2;2;3;3", "wc_summary": "42;94;41;54", "wc_strengths": "35;70;107;18", "wc_weaknesses": "196;56;99;1", "wc_questions": "2;32;33;1", "wc_limitations": "14;58;70;1", "wc_review": "289;310;350;75", "wc_reply_reviewers": "494;135;38;0", "wc_reply_authors": "1485;583;26;0", "reply_reviewers": "3;2;1;0", "reply_authors": "4;3;2;1", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 57.75, 21.545011023436494 ], "wc_strengths_avg": [ 57.5, 34.1796723214252 ], "wc_weaknesses_avg": [ 88.0, 71.37576619553727 ], "wc_questions_avg": [ 17.0, 15.508062419270823 ], "wc_limitations_avg": [ 35.75, 28.93419257556706 ], "wc_review_avg": [ 256.0, 106.77312395916867 ], "wc_reply_reviewers_avg": [ 166.75, 195.2451984044678 ], "wc_reply_authors_avg": [ 523.5, 601.9927325142721 ], "reply_reviewers_avg": [ 1.5, 1.118033988749895 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:k2FmA9oKD3YJ:scholar.google.com/&scioq=TARP-VP:+Towards+Evaluation+of+Transferred+Adversarial+Robustness+and+Privacy+on+Label+Mapping+Visual+Prompting+Models&hl=en&as_sdt=0,33", "gs_version_total": 2, "email": "liverpool.ac.uk;warwick.ac.uk;exeter.ac.uk;warwick.ac.uk;liverpool.ac.uk;", "author_num": 6, "aff_unique_index": "0;1;2;1;0", "aff_unique_norm": "University of Liverpool;University of Warwick;University of Exeter", "aff_unique_dep": ";;", "aff_unique_url": "https://www.liverpool.ac.uk;https://www.warwick.ac.uk;https://www.exeter.ac.uk", "aff_unique_abbr": "Liv Uni;Warwick;Exeter", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "How Transformers Utilize Multi-Head Attention in In-Context Learning? A Case Study on Sparse Linear Regression", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94218", "id": "fG8TukiXa5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=fG8TukiXa5", "openreview": "https://openreview.net/forum?id=fG8TukiXa5", "poster": "", "project": "", "author_site": "Xingwu Chen, Lei Zhao, Difan Zou", "tldr": "", "abstract": "Despite the remarkable success of transformer-based models in various real-world tasks, their underlying mechanisms remain poorly understood. Recent studies have suggested that transformers can implement gradient descent as an in-context learner for linear regression problems and have developed various theoretical analyses accordingly. However, these works mostly focus on the expressive power of transformers by designing specific parameter constructions, lacking a comprehensive understanding of their inherent working mechanisms post-training. In this study, we consider a sparse linear regression problem and investigate how a trained multi-head transformer performs in-context learning. We experimentally discover that the utilization of multi-heads exhibits different patterns across layers: multiple heads are utilized and essential in the first layer, while usually only a single head is sufficient for subsequent layers. We provide a theoretical explanation for this observation: the first layer preprocesses the context data, and the following layers execute simple optimization steps based on the preprocessed context. Moreover, we demonstrate that such a preprocess-then-optimize algorithm can significantly outperform naive gradient descent and ridge regression algorithms. Further experimental results support our explanations. Our findings offer insights into the benefits of multi-head attention and contribute to understanding the more intricate mechanisms hidden within trained transformers.", "keywords": "in-context learning;transformers;deep learning theory;learning theory", "primary_area": "learning_theory", "supplementary_material": "", "author": "Xingwu Chen;Lei Zhao;Difan Zou", "authorids": "~Xingwu_Chen1;~Lei_Zhao18;~Difan_Zou1", "gender": "M;M;M", "homepage": "https://github.com;https://difanzou.github.io/;", "dblp": ";161/8923;", "google_scholar": ";Cp4fcTQAAAAJ;", "orcid": ";;", "linkedin": ";;https://www.linkedin.com/", "or_profile": "~Xingwu_Chen1;~Difan_Zou1;~Zhao_Lei2", "aff": "University of Hong Kong;University of Hong Kong;University of Science and Technology of China", "aff_domain": "hku.hk;hku.hk;ustc.edu.cn", "position": "PhD student;Assistant Professor;Undergrad student", "bibtex": "@inproceedings{\nchen2024how,\ntitle={How Transformers Utilize Multi-Head Attention in In-Context Learning? A Case Study on Sparse Linear Regression},\nauthor={Xingwu Chen and Lei Zhao and Difan Zou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=fG8TukiXa5}\n}", "github": "", "reviewers": "Bazq;fd5H;Swt3;uBeS", "pdf_size": 1228740, "rating": "5;5;7;7", "confidence": "4;3;3;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;4;4", "wc_summary": "40;85;104;52", "wc_strengths": "11;48;69;60", "wc_weaknesses": "421;43;118;97", "wc_questions": "29;53;19;25", "wc_limitations": "1;25;25;102", "wc_review": "502;254;335;336", "wc_reply_reviewers": "17;0;25;14", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 70.25, 25.518375732009275 ], "wc_strengths_avg": [ 47.0, 22.079402165819616 ], "wc_weaknesses_avg": [ 169.75, 147.6166911294248 ], "wc_questions_avg": [ 31.5, 12.913171570144957 ], "wc_limitations_avg": [ 38.25, 38.08789177678386 ], "wc_review_avg": [ 356.75, 90.22021669226915 ], "wc_reply_reviewers_avg": [ 14.0, 9.027735042633894 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11634058554029488322&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "hku.hk;hku.hk;ustc.edu.cn", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Hong Kong;University of Science and Technology of China", "aff_unique_dep": ";", "aff_unique_url": "https://www.hku.hk;http://www.ustc.edu.cn", "aff_unique_abbr": "HKU;USTC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "AlphaPruning: Using Heavy-Tailed Self Regularization Theory for Improved Layer-wise Pruning of Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94217", "id": "fHq4x2YXVv", "proceeding": "", "pdf": "https://openreview.net/pdf?id=fHq4x2YXVv", "openreview": "https://openreview.net/forum?id=fHq4x2YXVv", "poster": "/media/PosterPDFs/NeurIPS%202024/94217.png?t=1733538065.8645315", "project": "", "author_site": "Haiquan Lu, Yefan Zhou, Shiwei Liu, Zhangyang "Atlas" Wang, Michael Mahoney, Yaoqing Yang", "tldr": "", "abstract": "Recent work on pruning large language models (LLMs) has shown that one can eliminate a large number of parameters without compromising performance, making pruning a promising strategy to reduce LLM model size. Existing LLM pruning strategies typically assign uniform pruning ratios across layers, limiting overall pruning ability; and recent work on layerwise pruning of LLMs is often based on heuristics that can easily lead to suboptimal performance. In this paper, we leverage Heavy-Tailed Self-Regularization (HT-SR) Theory, in particular the shape of empirical spectral densities (ESDs) of weight matrices, to design improved layerwise pruning ratios for LLMs. Our analysis reveals a wide variability in how well-trained, and thus relatedly how prunable, different layers of an LLM are. Based on this, we propose AlphaPruning, which uses shape metrics to allocate layerwise sparsity ratios in a more theoretically-principled manner. AlphaPruning can be used in conjunction with multiple existing LLM pruning methods. Our empirical results show that AlphaPruning prunes LLaMA-7B to 80% sparsity while maintaining reasonable perplexity, marking a first in the literature on LLMs.", "keywords": "Pruning;large language models;heavy tails", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/01b2444766d306292401d62bea47670067636f2a.zip", "author": "Haiquan Lu;Yefan Zhou;Shiwei Liu;Zhangyang Wang;Michael W. Mahoney;Yaoqing Yang", "authorids": "~Haiquan_Lu1;~Yefan_Zhou1;~Shiwei_Liu2;~Zhangyang_Wang1;~Michael_W._Mahoney1;~Yaoqing_Yang1", "gender": "M;M;M;M;;M", "homepage": "https://github.com/haiquanlu;https://yefanzhou.github.io/;https://shiweiliuiiiiiii.github.io/;https://vita-group.github.io;;https://sites.google.com/site/yangyaoqingcmu/", "dblp": "236/2803;237/4333;234/8697-3.html;119/4026;;04/4176", "google_scholar": "S1brcdYAAAAJ;TAeVaicAAAAJ;73IbXtsAAAAJ;pxFyKAIAAAAJ;;LYvugWgAAAAJ", "orcid": ";;;;;0000-0001-9908-5531", "linkedin": ";yefan-zhou/;;;;", "or_profile": "~Haiquan_Lu1;~Yefan_Zhou1;~Shiwei_Liu2;~Zhangyang_Wang1;~Michael_W._Mahoney1;~Yaoqing_Yang1", "aff": "Nankai University;Dartmouth College;University of Oxford;University of Texas at Austin;;Dartmouth College", "aff_domain": "nankai.edu.cn;dartmouth.edu;ox.ac.uk;utexas.edu;;dartmouth.edu", "position": "Undergrad student;PhD student;Postdoc;Associate Professor;;Assistant Professor", "bibtex": "@inproceedings{\nlu2024alphapruning,\ntitle={AlphaPruning: Using Heavy-Tailed Self Regularization Theory for Improved Layer-wise Pruning of Large Language Models},\nauthor={Haiquan Lu and Yefan Zhou and Shiwei Liu and Zhangyang Wang and Michael W. Mahoney and Yaoqing Yang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=fHq4x2YXVv}\n}", "github": "", "reviewers": "hyQK;N9yi;GYDB;nY3j", "pdf_size": 1855047, "rating": "5;5;7;7", "confidence": "3;3;3;3", "soundness": "2;2;4;3", "novelty": "3;2;3;4", "presentation": "2;3;3;2", "wc_summary": "65;44;81;81", "wc_strengths": "59;22;85;108", "wc_weaknesses": "105;24;41;79", "wc_questions": "37;84;78;83", "wc_limitations": "10;5;5;11", "wc_review": "276;179;290;362", "wc_reply_reviewers": "59;0;49;0", "wc_reply_authors": "298;0;21;0", "reply_reviewers": "1;0;1;0", "reply_authors": "2;1;2;1", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 67.75, 15.188400179084036 ], "wc_strengths_avg": [ 68.5, 31.957002362549588 ], "wc_weaknesses_avg": [ 62.25, 31.712576369636068 ], "wc_questions_avg": [ 70.5, 19.474342094150447 ], "wc_limitations_avg": [ 7.75, 2.7726341266023544 ], "wc_review_avg": [ 276.75, 65.18962724237653 ], "wc_reply_reviewers_avg": [ 27.0, 27.230497608380205 ], "wc_reply_authors_avg": [ 79.75, 126.29801067316936 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12783901745263489759&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 4, "email": "nankai.edu.cn;dartmouth.edu;ox.ac.uk;utexas.edu;;dartmouth.edu", "author_num": 6, "aff_unique_index": "0;1;2;3;1", "aff_unique_norm": "Nankai University;Dartmouth College;University of Oxford;University of Texas at Austin", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.nankai.edu.cn;https://www.dartmouth.edu;https://www.ox.ac.uk;https://www.utexas.edu", "aff_unique_abbr": "NKU;Dartmouth;Oxford;UT Austin", "aff_campus_unique_index": "1", "aff_campus_unique": ";Austin", "aff_country_unique_index": "0;1;2;1;1", "aff_country_unique": "China;United States;United Kingdom" }, { "title": "MaVEn: An Effective Multi-granularity Hybrid Visual Encoding Framework for Multimodal Large Language Model", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94216", "id": "fHxmoekQBh", "proceeding": "", "pdf": "https://openreview.net/pdf?id=fHxmoekQBh", "openreview": "https://openreview.net/forum?id=fHxmoekQBh", "poster": "", "project": "", "author_site": "Chaoya Jiang, Hongrui Jia, Haiyang Xu, Wei Ye, Mengfan Dong, Ming Yan, Ji Zhang, Fei Huang, Shikun Zhang", "tldr": "", "abstract": "This paper presents MaVEn, an innovative Multi-granularity Visual Encoding framework designed to enhance the capabilities of Multimodal Large Language Models (MLLMs) in multi-image reasoning. Current MLLMs primarily focus on single-image visual understanding, limiting their ability to interpret and integrate information across multiple images. MaVEn addresses this limitation by combining discrete visual symbol sequences, which abstract coarse-grained semantic concepts, with traditional continuous representation sequences that model fine-grained features. This dual approach bridges the semantic gap between visual and textual data, thereby improving the model's ability to process and interpret information from multiple images effectively. Additionally, we design a dynamic reduction mechanism by for long-sequence continuous features to enhance multi-image processing efficiency. Experimental results demonstrate that MaVEn significantly enhances MLLMs' understanding in complex multi-image scenarios, while also improving performance in single-image contexts.", "keywords": "Large Vision Language Model;Multi-image;Multi-granularity Visual Encoding", "primary_area": "machine_vision", "supplementary_material": "", "author": "Chaoya Jiang;Jia Hongrui;Haiyang Xu;Wei Ye;Mengfan Dong;Ming Yan;Ji Zhang;Fei Huang;Shikun Zhang", "authorids": "~Chaoya_Jiang1;~Jia_Hongrui1;~Haiyang_Xu1;~Wei_Ye2;~Mengfan_Dong1;~Ming_Yan2;~Ji_Zhang3;~Fei_Huang2;~Shikun_Zhang2", "gender": "M;M;M;M;;M;M;F;M", "homepage": "https://github.com/APiaoG;;https://se.pku.edu.cn/kcl/weiye/;;;https://sites.google.com/view/fei-huang;;https://se.pku.edu.cn/kcl/;https://github.com/Darren-greenhand?tab=stars", "dblp": ";;09/5394-4;51/5332-4.html;86/1953-11;h/FeiHuang.html;83/3715.html;270/4680;", "google_scholar": ";qZYvce8AAAAJ;RgLGFMIAAAAJ;uIUfGxYAAAAJ;cgnuJDUAAAAJ;9r98PpoAAAAJ;uiklLscAAAAJ;861HG_0AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;0000-0003-4959-8878;;;;0009-0009-7282-159X;0009-0005-2526-9166", "linkedin": ";;;;;fei-huang-cas-cmu;;;%E5%AD%9F%E5%B8%86-%E8%91%A3-56464a25a/", "or_profile": "~Jia_Hongrui1;~Haiyang_Xu1;~Wei_Ye2;~Ming_Yan2;~Ji_Zhang3;~Fei_Huang2;~Shikun_Zhang2;~Chao_Ya_Jiang1;~Darren_Dong1", "aff": "South China University of Technology;Alibaba Group;Peking University;Alibaba Group;Alibaba Group;Alibaba Group US;Peking University;Peking University;University of Electronic Science and Technology of China", "aff_domain": "scut.edu.cn;alibaba-inc.com;pku.edu.cn;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;pku.edu.cn;pku.edu.cn;uestc.edu.cn", "position": "Undergrad student;Researcher;Associate Professor;Instructor;Senior Staff Engineer;Senior Research Director;Full Professor;PhD student;Undergrad student", "bibtex": "@inproceedings{\njiang2024maven,\ntitle={Ma{VE}n: An Effective Multi-granularity Hybrid Visual Encoding Framework for Multimodal Large Language Model},\nauthor={Chaoya Jiang and Jia Hongrui and Haiyang Xu and Wei Ye and Mengfan Dong and Ming Yan and Ji Zhang and Fei Huang and Shikun Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=fHxmoekQBh}\n}", "github": "", "reviewers": "9o1r;TdkD;od3A;Rxew;HJUS", "pdf_size": 5233626, "rating": "4;6;6;7;8", "confidence": "4;4;2;4;4", "soundness": "3;2;2;3;3", "novelty": "2;3;2;3;3", "presentation": "3;3;2;3;3", "wc_summary": "87;50;49;63;67", "wc_strengths": "45;56;128;42;121", "wc_weaknesses": "148;134;2;50;48", "wc_questions": "6;25;63;20;14", "wc_limitations": "1;1;1;10;1", "wc_review": "287;266;243;185;251", "wc_reply_reviewers": "0;15;11;15;176", "wc_reply_authors": "98;130;160;859;562", "reply_reviewers": "0;1;1;1;2", "reply_authors": "2;3;3;3;3", "rating_avg": [ 6.2, 1.32664991614216 ], "confidence_avg": [ 3.6, 0.8 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 63.2, 13.833293172632464 ], "wc_strengths_avg": [ 78.4, 37.992630864418956 ], "wc_weaknesses_avg": [ 76.4, 55.64746175702896 ], "wc_questions_avg": [ 25.6, 19.744366285095097 ], "wc_limitations_avg": [ 2.8, 3.6 ], "wc_review_avg": [ 246.4, 34.16196715647388 ], "wc_reply_reviewers_avg": [ 43.4, 66.52698700527478 ], "wc_reply_authors_avg": [ 361.8, 300.44393819812706 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.8, 0.39999999999999997 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.07537783614444088, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17230382939491222328&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "scut.edu.cn;alibaba-inc.com;pku.edu.cn;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;pku.edu.cn;pku.edu.cn;uestc.edu.cn", "author_num": 9, "aff_unique_index": "0;1;2;1;1;1;2;2;3", "aff_unique_norm": "South China University of Technology;Alibaba Group;Peking University;University of Electronic Science and Technology of China", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.scut.edu.cn;https://www.alibaba.com;http://www.pku.edu.cn;https://www.uestc.edu.cn", "aff_unique_abbr": "SCUT;Alibaba;Peking U;UESTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;1;0;0;0", "aff_country_unique": "China;United States" }, { "title": "Rethinking the Diffusion Models for Missing Data Imputation: A Gradient Flow Perspective", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94215", "id": "fIz8K4DJ7w", "proceeding": "", "pdf": "https://openreview.net/pdf?id=fIz8K4DJ7w", "openreview": "https://openreview.net/forum?id=fIz8K4DJ7w", "poster": "/media/PosterPDFs/NeurIPS%202024/94215.png?t=1731085398.2660687", "project": "", "author_site": "Zhichao Chen, Haoxuan Li, Fangyikang Wang, Odin Zhang, Hu Xu, Xiaoyu Jiang, Zhihuan Song, Hao Wang", "tldr": "", "abstract": "Diffusion models have demonstrated competitive performance in missing data imputation (MDI) task. However, directly applying diffusion models to MDI produces suboptimal performance due to two primary defects. First, the sample diversity promoted by diffusion models hinders the accurate inference of missing values. Second, data masking reduces observable indices for model training, obstructing imputation performance. To address these challenges, we introduce $\\underline{\\text{N}}$egative $\\underline{\\text{E}}$ntropy-regularized $\\underline{\\text{W}}$asserstein gradient flow for $\\underline{\\text{Imp}}$utation (NewImp), enhancing diffusion models for MDI from a gradient flow perspective. To handle the first defect, we incorporate a negative entropy regularization term into the cost functional to suppress diversity and improve accuracy. To handle the second defect, we demonstrate that the imputation procedure of NewImp, induced by the conditional distribution-related cost functional, can equivalently be replaced by that induced by the joint distribution, thereby naturally eliminating the need for data masking. Extensive experiments validate the effectiveness of our method. Code is available at [https://github.com/JustusvLiebig/NewImp](https://github.com/JustusvLiebig/NewImp).", "keywords": "Missing Data Imputation;Gradient Flow;Reproducing Kernel Hilbert Space;Functional Optimization", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Zhichao Chen;Haoxuan Li;Fangyikang Wang;Odin Zhang;Hu Xu;Xiaoyu Jiang;Zhihuan Song;Hao Wang", "authorids": "~Zhichao_Chen2;~Haoxuan_Li6;~Fangyikang_Wang1;~Odin_Zhang1;~Hu_Xu3;~Xiaoyu_Jiang2;~Zhihuan_Song1;~Hao_Wang28", "gender": ";M;M;;;M;M;", "homepage": ";https://haoxuanli-pku.github.io/;;https://haotianzhangai4science.github.io/;;;https://person.zju.edu.cn/zhsong;", "dblp": ";145/4965-1.html;365/8666;;;;;", "google_scholar": ";gtDqiucAAAAJ;j80akcEAAAAJ;ypnp3YwAAAAJ;;https://scholar.google.com.hk/citations?hl=en;;", "orcid": ";0000-0003-3620-3769;;;0009-0004-3001-2472;;0000-0003-4098-6479;", "linkedin": ";;;;;;;", "or_profile": "~Zhichao_Chen2;~Haoxuan_Li6;~Fangyikang_Wang1;~Odin_Zhang1;~Hu_Xu3;~Xiaoyu_Jiang2;~Zhihuan_Song1;~Hao_Wang28", "aff": ";Peking University;Zhejiang University;;Zhejiang University;Zhejiang University;Zhejiang University;", "aff_domain": ";pku.edu.cn;zju.edu.cn;;zju.edu.cn;zju.edu.cn;zju.edu.cn;", "position": ";PhD student;MS student;;MS student;Postdoc;Full Professor;", "bibtex": "@inproceedings{\nchen2024rethinking,\ntitle={Rethinking the Diffusion Models for Missing Data Imputation: A Gradient Flow Perspective},\nauthor={Zhichao Chen and Haoxuan Li and Fangyikang Wang and Odin Zhang and Hu Xu and Xiaoyu Jiang and Zhihuan Song and Hao Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=fIz8K4DJ7w}\n}", "github": "", "reviewers": "EhUg;fS2D;833r;9XXU;Gtbe", "pdf_size": 3468986, "rating": "3;5;5;6;6", "confidence": "3;3;4;3;3", "soundness": "2;3;2;3;4", "novelty": "2;3;2;3;3", "presentation": "3;3;2;3;3", "wc_summary": "56;190;85;57;90", "wc_strengths": "11;24;33;191;78", "wc_weaknesses": "70;125;56;137;137", "wc_questions": "209;106;69;42;46", "wc_limitations": "1;20;197;5;7", "wc_review": "347;465;440;432;358", "wc_reply_reviewers": "0;102;237;35;18", "wc_reply_authors": "1187;475;1349;51;282", "reply_reviewers": "0;1;3;1;1", "reply_authors": "5;4;5;2;2", "rating_avg": [ 5.0, 1.0954451150103321 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 95.6, 49.22032100667366 ], "wc_strengths_avg": [ 67.4, 65.783280550608 ], "wc_weaknesses_avg": [ 105.0, 34.85398112124352 ], "wc_questions_avg": [ 94.4, 61.64608665600762 ], "wc_limitations_avg": [ 46.0, 75.76806715233008 ], "wc_review_avg": [ 408.4, 47.051461188787755 ], "wc_reply_reviewers_avg": [ 78.4, 86.474504913298 ], "wc_reply_authors_avg": [ 668.8, 509.9123061860735 ], "reply_reviewers_avg": [ 1.2, 0.9797958971132713 ], "reply_authors_avg": [ 3.6, 1.3564659966250536 ], "replies_avg": [ 32, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15926588906274464075&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": ";pku.edu.cn;zju.edu.cn;;zju.edu.cn;zju.edu.cn;zju.edu.cn;", "author_num": 8, "aff_unique_index": "0;1;1;1;1", "aff_unique_norm": "Peking University;Zhejiang University", "aff_unique_dep": ";", "aff_unique_url": "http://www.pku.edu.cn;https://www.zju.edu.cn", "aff_unique_abbr": "Peking U;ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "R$^2$-Gaussian: Rectifying Radiative Gaussian Splatting for Tomographic Reconstruction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94214", "id": "fMWrTAe5Iy", "proceeding": "", "pdf": "https://openreview.net/pdf?id=fMWrTAe5Iy", "openreview": "https://openreview.net/forum?id=fMWrTAe5Iy", "poster": "/media/PosterPDFs/NeurIPS%202024/94214.png?t=1730183797.0827496", "project": "", "author_site": "Ruyi Zha, Tao Jun Lin, Yuanhao Cai, Jiwen Cao, Yanhao Zhang, Hongdong Li", "tldr": "", "abstract": "3D Gaussian splatting (3DGS) has shown promising results in image rendering and surface reconstruction. However, its potential in volumetric reconstruction tasks, such as X-ray computed tomography, remains under-explored. This paper introduces R$^2$-Gaussian, the first 3DGS-based framework for sparse-view tomographic reconstruction. By carefully deriving X-ray rasterization functions, we discover a previously unknown \\emph{integration bias} in the standard 3DGS formulation, which hampers accurate volume retrieval. To address this issue, we propose a novel rectification technique via refactoring the projection from 3D to 2D Gaussians. Our new method presents three key innovations: (1) introducing tailored Gaussian kernels, (2) extending rasterization to X-ray imaging, and (3) developing a CUDA-based differentiable voxelizer. Experiments on synthetic and real-world datasets demonstrate that our method outperforms state-of-the-art approaches in accuracy and efficiency. Crucially, it delivers high-quality results in 4 minutes, which is 12$\\times$ faster than NeRF-based methods and on par with traditional algorithms.", "keywords": "3D Gaussian Splatting;3D Reconstruction;CT Reconstruction;Tomographic Reconstruction", "primary_area": "machine_vision", "supplementary_material": "/attachment/858df0e6b52d5d7a511f4acf305b032db520b67e.zip", "author": "Ruyi Zha;Tao Jun Lin;Yuanhao Cai;Jiwen Cao;Yanhao Zhang;Hongdong Li", "authorids": "~Ruyi_Zha1;~Tao_Jun_Lin1;~Yuanhao_Cai1;~Jiwen_Cao1;~Yanhao_Zhang4;~Hongdong_Li1", "gender": "M;;;M;M;M", "homepage": "https://ruyi-zha.github.io/;;;;https://sites.google.com/view/yanhaozhang/home;http://users.cecs.anu.edu.au/~hongdong/", "dblp": "280/0677;;;;84/10486-3;59/4859.html", "google_scholar": "_5W6W7oAAAAJ;;;https://scholar.google.com.au/citations?user=3es2SHsAAAAJ;9W32tDUAAAAJ;https://scholar.google.com.tw/citations?hl=en", "orcid": "0009-0005-0410-1807;;;0009-0009-8796-7970;0000-0002-2722-8019;", "linkedin": "ruyi-zha-208a17223/;;;jiwen-cao-87a541235;yanhao-zhang-027622229/;", "or_profile": "~Ruyi_Zha1;~Tao_Jun_Lin1;~Yuanhao_Cai1;~Jiwen_Cao1;~Yanhao_Zhang4;~Hongdong_Li1", "aff": "Australian National University;;;Australian National University;University of Technology Sydney;Australian National University", "aff_domain": "anu.edu.au;;;anu.edu.au;uts.edu.au;anu.edu.au", "position": "PhD student;;;MS student;Postdoc;Full Professor", "bibtex": "@inproceedings{\nzha2024rgaussian,\ntitle={R\\${\\textasciicircum}2\\$-Gaussian: Rectifying Radiative Gaussian Splatting for Tomographic Reconstruction},\nauthor={Ruyi Zha and Tao Jun Lin and Yuanhao Cai and Jiwen Cao and Yanhao Zhang and Hongdong Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=fMWrTAe5Iy}\n}", "github": "", "reviewers": "DGKr;P8A3;w7Ci;sKx6", "pdf_size": 47610420, "rating": "5;7;7;8", "confidence": "4;4;5;5", "soundness": "2;4;3;4", "novelty": "2;3;3;4", "presentation": "2;4;4;4", "wc_summary": "35;52;198;58", "wc_strengths": "47;42;380;89", "wc_weaknesses": "123;116;720;9", "wc_questions": "23;34;20;116", "wc_limitations": "1;14;12;29", "wc_review": "229;258;1330;301", "wc_reply_reviewers": "16;37;377;0", "wc_reply_authors": "37;44;56;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 6.75, 1.0897247358851685 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.5, 0.8660254037844386 ], "wc_summary_avg": [ 85.75, 65.35432273384829 ], "wc_strengths_avg": [ 139.5, 140.04731343371068 ], "wc_weaknesses_avg": [ 242.0, 279.6470990373403 ], "wc_questions_avg": [ 48.25, 39.4612151358774 ], "wc_limitations_avg": [ 14.0, 9.974968671630002 ], "wc_review_avg": [ 529.5, 462.8782237262842 ], "wc_reply_reviewers_avg": [ 107.5, 156.14816681600846 ], "wc_reply_authors_avg": [ 34.25, 20.90902915010642 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.6882472016116854, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14592517772403928711&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "anu.edu.au;;;anu.edu.au;uts.edu.au;anu.edu.au", "author_num": 6, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Australian National University;University of Technology Sydney", "aff_unique_dep": ";", "aff_unique_url": "https://www.anu.edu.au;https://www.uts.edu.au", "aff_unique_abbr": "ANU;UTS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Australia" }, { "title": "Expected Probabilistic Hierarchies", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94213", "id": "fMdrBucZnj", "proceeding": "", "pdf": "https://openreview.net/pdf?id=fMdrBucZnj", "openreview": "https://openreview.net/forum?id=fMdrBucZnj", "poster": "/media/PosterPDFs/NeurIPS%202024/94213.png?t=1731872956.0265043", "project": "", "author_site": "Marcel Kollovieh, Bertrand Charpentier, Daniel Z\u00fcgner, Stephan G\u00fcnnemann", "tldr": "", "abstract": "Hierarchical clustering has usually been addressed by discrete optimization using heuristics or continuous optimization of relaxed scores for hierarchies. In this work, we propose to optimize expected scores under a probabilistic model over hierarchies. (1) We show theoretically that the global optimal values of the expected Dasgupta cost and Tree-Sampling divergence (TSD), two unsupervised metrics for hierarchical clustering, are equal to the optimal values of their discrete counterparts contrary to some relaxed scores. (2) We propose Expected Probabilistic Hierarchies (EPH), a probabilistic model to learn hierarchies in data by optimizing expected scores. EPH uses differentiable hierarchy sampling enabling end-to-end gradient descent based optimization, and an unbiased subgraph sampling approach to scale to large datasets. (3) We evaluate EPH on synthetic and real-world datasets including vector and graph datasets. EPH outperforms all other approaches quantitatively and provides meaningful hierarchies in qualitative evaluations.", "keywords": "hierarchical clustering;graph clustering;clustering;unsupervised learning;probabilistic models", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Marcel Kollovieh;Bertrand Charpentier;Daniel Z\u00fcgner;Stephan G\u00fcnnemann", "authorids": "~Marcel_Kollovieh1;~Bertrand_Charpentier2;~Daniel_Z\u00fcgner1;~Stephan_G\u00fcnnemann1", "gender": "M;;M;M", "homepage": "https://marcelkollovieh.de;https://sharpenb.github.io/;;http://www.daml.in.tum.de", "dblp": "299/1647;222/1875;172/6951;43/3011", "google_scholar": "4oq7nmIAAAAJ;0rqI-ycAAAAJ;;", "orcid": ";;;", "linkedin": "marcel-kollovieh-8a2ab21b7/;bertrand-charpentier-76995ab6/;;", "or_profile": "~Marcel_Kollovieh1;~Bertrand_Charpentier2;~Daniel_Z\u00fcgner1;~Stephan_G\u00fcnnemann1", "aff": "Department of Informatics, Technische Universit\u00e4t M\u00fcnchen;Technical University Munich;Microsoft;Technical University Munich", "aff_domain": "in.tum.de;tum.de;microsoft.com;tum.de", "position": "PhD student;PhD student;Postdoc;Professor", "bibtex": "@inproceedings{\nkollovieh2024expected,\ntitle={Expected Probabilistic Hierarchies},\nauthor={Marcel Kollovieh and Bertrand Charpentier and Daniel Z{\\\"u}gner and Stephan G{\\\"u}nnemann},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=fMdrBucZnj}\n}", "github": "", "reviewers": "KKzQ;AAtX;xKdT;YnBf;f9v5", "pdf_size": 6882198, "rating": "5;5;5;6;6", "confidence": "4;3;4;3;3", "soundness": "3;3;3;3;3", "novelty": "2;2;2;3;3", "presentation": "3;4;3;3;3", "wc_summary": "125;100;78;122;39", "wc_strengths": "40;27;121;31;39", "wc_weaknesses": "48;127;247;28;50", "wc_questions": "111;74;66;51;14", "wc_limitations": "10;6;1;9;2", "wc_review": "334;334;513;241;144", "wc_reply_reviewers": "16;50;67;24;0", "wc_reply_authors": "7;12;15;12;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "2;2;2;2;1", "rating_avg": [ 5.4, 0.48989794855663565 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 92.8, 31.795597179483828 ], "wc_strengths_avg": [ 51.6, 35.04054794092124 ], "wc_weaknesses_avg": [ 100.0, 80.90241034728199 ], "wc_questions_avg": [ 63.2, 31.555665101531293 ], "wc_limitations_avg": [ 5.6, 3.6110940170535577 ], "wc_review_avg": [ 313.2, 122.16939060173789 ], "wc_reply_reviewers_avg": [ 31.4, 24.046621384302618 ], "wc_reply_authors_avg": [ 9.2, 5.2687759489277965 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.6666666666666665, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:HtTTiWxtClYJ:scholar.google.com/&scioq=Expected+Probabilistic+Hierarchies&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "in.tum.de;tum.de;microsoft.com;tum.de", "author_num": 4, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "Technische Universit\u00e4t M\u00fcnchen;Technical University of Munich;Microsoft", "aff_unique_dep": "Department of Informatics;;Microsoft Corporation", "aff_unique_url": "https://www.tum.de;https://www.tum.de;https://www.microsoft.com", "aff_unique_abbr": "TUM;TUM;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Germany;United States" }, { "title": "Trajectory Flow Matching with Applications to Clinical Time Series Modelling", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94212", "id": "fNakQltI1N", "proceeding": "", "pdf": "https://openreview.net/pdf?id=fNakQltI1N", "openreview": "https://openreview.net/forum?id=fNakQltI1N", "poster": "", "project": "", "author_site": "Xi (Nicole) Zhang, Yuan Pu, Yuki Kawamura, Andrew Loza, Yoshua Bengio, Dennis Shung, Alexander Tong", "tldr": "", "abstract": "Modeling stochastic and irregularly sampled time series is a challenging problem found in a wide range of applications, especially in medicine. Neural stochastic differential equations (Neural SDEs) are an attractive modeling technique for this problem, which parameterize the drift and diffusion terms of an SDE with neural networks. However, current algorithms for training Neural SDEs require backpropagation through the SDE dynamics, greatly limiting their scalability and stability. \nTo address this, we propose **Trajectory Flow Matching** (TFM), which trains a Neural SDE in a *simulation-free* manner, bypassing backpropagation through the dynamics. TFM leverages the flow matching technique from generative modeling to model time series. In this work we first establish necessary conditions for TFM to learn time series data. Next, we present a reparameterization trick which improves training stability. Finally, we adapt TFM to the clinical time series setting, demonstrating improved performance on four clinical time series datasets both in terms of absolute performance and uncertainty prediction, a crucial parameter in this setting.", "keywords": "Flow matching;stochastic differential equations;ODE;SDE;uncertainty;time series;EHR", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "/attachment/9a65e5a6dc9facb16ecb8ad142fc1d41e42c21eb.zip", "author": "Xi Zhang;Yuan Pu;Yuki Kawamura;Andrew Loza;Yoshua Bengio;Dennis Shung;Alexander Tong", "authorids": "~Xi_Zhang18;~Yuan_Pu4;~Yuki_Kawamura1;~Andrew_Loza1;~Yoshua_Bengio1;~Dennis_Shung1;~Alexander_Tong1", "gender": "F;;;M;M;M;", "homepage": ";https://yuan-pu.github.io/;;https://medicine.yale.edu/profile/andrew-loza/;http://yoshuabengio.org;;https://alextong.net", "dblp": ";;;;56/953;;153/9296", "google_scholar": "CblgXekAAAAJ;https://scholar.google.com/citations?view_op=list_works;TOmnEFgAAAAJ;;kukA0LcAAAAJ;N-LePdMAAAAJ;CS80pt4AAAAJ", "orcid": "0000-0001-6485-4564;0009-0008-4227-1833;0000-0003-2137-6464;;;0000-0001-8226-1842;0000-0002-2031-4096", "linkedin": ";yuan-pu/;yuki-kawamura-2021/;andrew-loza-37b3252bb/;yoshuabengio/?originalSubdomain=ca;;atong01/", "or_profile": "~Xi_Zhang18;~Yuan_Pu4;~Yuki_Kawamura1;~Andrew_Loza1;~Yoshua_Bengio1;~Dennis_Shung1;~Alexander_Tong1", "aff": "McGill University;Yale University;University of Cambridge;Yale University;University of Montreal;Yale University;Universit\u00e9 de Montr\u00e9al", "aff_domain": "mcgill.ca;yale.edu;cam.ac.uk;yale.edu;umontreal.ca;yale.edu;umontreal.ca", "position": "PhD student;Researcher;MD Student;Lecturer;Full Professor;Assistant Professor;Postdoc", "bibtex": "@inproceedings{\nzhang2024trajectory,\ntitle={Trajectory Flow Matching with Applications to Clinical Time Series Modelling},\nauthor={Xi Zhang and Yuan Pu and Yuki Kawamura and Andrew Loza and Yoshua Bengio and Dennis Shung and Alexander Tong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=fNakQltI1N}\n}", "github": "", "reviewers": "4EUb;TMfk;s476", "pdf_size": 2099937, "rating": "7;7;10", "confidence": "4;3;4", "soundness": "4;4;4", "novelty": "3;3;4", "presentation": "3;3;4", "wc_summary": "74;69;50", "wc_strengths": "30;102;41", "wc_weaknesses": "22;58;4", "wc_questions": "8;32;39", "wc_limitations": "4;27;6", "wc_review": "138;288;140", "wc_reply_reviewers": "14;262;17", "wc_reply_authors": "0;40;0", "reply_reviewers": "1;1;1", "reply_authors": "1;2;1", "rating_avg": [ 8.0, 1.4142135623730951 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 4.0, 0.0 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 64.33333333333333, 10.338708279513883 ], "wc_strengths_avg": [ 57.666666666666664, 31.668421004036322 ], "wc_weaknesses_avg": [ 28.0, 22.44994432064365 ], "wc_questions_avg": [ 26.333333333333332, 13.274871834493252 ], "wc_limitations_avg": [ 12.333333333333334, 10.402991022884823 ], "wc_review_avg": [ 188.66666666666666, 70.24401912064985 ], "wc_reply_reviewers_avg": [ 97.66666666666667, 116.20766851728082 ], "wc_reply_authors_avg": [ 13.333333333333334, 18.856180831641264 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13442218324709368428&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "mcgill.ca;yale.edu;cam.ac.uk;yale.edu;umontreal.ca;yale.edu;umontreal.ca", "author_num": 7, "aff_unique_index": "0;1;2;1;3;1;4", "aff_unique_norm": "McGill University;Yale University;University of Cambridge;University of Montreal;Universit\u00e9 de Montr\u00e9al", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.mcgill.ca;https://www.yale.edu;https://www.cam.ac.uk;https://wwwumontreal.ca;https://www.umontreal.ca", "aff_unique_abbr": "McGill;Yale;Cambridge;UM;UdeM", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;1;2;1;0;1;0", "aff_country_unique": "Canada;United States;United Kingdom" }, { "title": "Advancing Video Anomaly Detection: A Concise Review and a New Dataset", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97585", "id": "fNcyFhTw2f", "proceeding": "", "pdf": "https://openreview.net/pdf?id=fNcyFhTw2f", "openreview": "https://openreview.net/forum?id=fNcyFhTw2f", "poster": "/media/PosterPDFs/NeurIPS%202024/97585.png?t=1731228911.4444528", "project": "", "author_site": "Liyun Zhu, Lei Wang, Arjun Raj, Tom Gedeon, Chen Chen", "tldr": "", "abstract": "Video Anomaly Detection (VAD) finds widespread applications in security surveillance, traffic monitoring, industrial monitoring, and healthcare. Despite extensive research efforts, there remains a lack of concise reviews that provide insightful guidance for researchers. Such reviews would serve as quick references to grasp current challenges, research trends, and future directions. In this paper, we present such a review, examining models and datasets from various perspectives. We emphasize the critical relationship between model and dataset, where the quality and diversity of datasets profoundly influence model performance, and dataset development adapts to the evolving needs of emerging approaches. Our review identifies practical issues, including the absence of comprehensive datasets with diverse scenarios. To address this, we introduce a new dataset, Multi-Scenario Anomaly Detection (MSAD), comprising 14 distinct scenarios captured from various camera views. Our dataset has diverse motion patterns and challenging variations, such as different lighting and weather conditions, providing a robust foundation for training superior models. We conduct an in-depth analysis of recent representative models using MSAD and highlight its potential in addressing the challenges of detecting anomalies across diverse and evolving surveillance scenarios.", "keywords": "new dataset;a concise review;video anomaly detection", "primary_area": "", "supplementary_material": "", "author": "Liyun Zhu;Lei Wang;Arjun Raj;Tom Gedeon;Chen Chen", "authorids": "~Liyun_Zhu1;~Lei_Wang20;~Arjun_Raj2;~Tom_Gedeon1;~Chen_Chen18", "gender": "M;M;M;M;M", "homepage": "https://tom-roujiang.github.io/liyun_zhu/;https://leiwangr.github.io/;;https://cs.anu.edu.au/people/Tom.Gedeon/;https://www.crcv.ucf.edu/chenchen/", "dblp": "22/7697;181/2817-108;;g/TamasDGedeon.html;65/4423-1", "google_scholar": "3PmmHjUAAAAJ;VWCZLXgAAAAJ;;https://scholar.google.com.tw/citations?user=lPTjWIkAAAAJ;TuEwcZ0AAAAJ", "orcid": ";0000-0002-8600-7099;0009-0003-8717-6307;0000-0001-8356-4909;0000-0003-3957-7061", "linkedin": "https://au.linkedin.com/in/liyun-zhu-b8755328b;lei-l-wang/;;tom-gedeon;dennychen/", "or_profile": "~Liyun_Zhu1;~Lei_Wang20;~Arjun_Raj2;~Tom_Gedeon1;~Chen_Chen18", "aff": "Australian National University;Australian National University;Australian National University;Curtin University of Technology;University of Central Florida", "aff_domain": "anu.edu.au;anu.edu.au;anu.edu.au;curtin.edu.au;ucf.edu", "position": "MS student;Postdoc;Undergrad student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhu2024advancing,\ntitle={Advancing Video Anomaly Detection: A Concise Review and a New Dataset},\nauthor={Liyun Zhu and Lei Wang and Arjun Raj and Tom Gedeon and Chen Chen},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=fNcyFhTw2f}\n}", "github": "", "reviewers": "wmDs;veG2;sasR", "pdf_size": 3683732, "rating": "5;5;9", "confidence": "5;5;3", "wc_summary_and_contributions": "61;178;112", "wc_strengths": "37;64;80", "wc_improvement": "68;376;19", "wc_limitations": "1;63;25", "wc_correctness": "1;54;1", "wc_clarity": "1;45;1", "wc_relation_to_prior_work": "13;6;25", "wc_documentation": "5;74;1", "wc_additional_feedback": "1;1;1", "wc_review": "188;861;265", "wc_reply_reviewers": "60;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;0;0", "reply_authors": "3;3;1", "rating_avg": [ 6.333333333333333, 1.8856180831641267 ], "confidence_avg": [ 4.333333333333333, 0.9428090415820634 ], "wc_summary_and_contributions_avg": [ 117.0, 47.89572005931219 ], "wc_strengths_avg": [ 60.333333333333336, 17.745108872274887 ], "wc_improvement_avg": [ 154.33333333333334, 158.01336089781213 ], "wc_limitations_avg": [ 29.666666666666668, 25.525586292102197 ], "wc_correctness_avg": [ 18.666666666666668, 24.984439601924677 ], "wc_clarity_avg": [ 15.666666666666666, 20.741798914805393 ], "wc_relation_to_prior_work_avg": [ 14.666666666666666, 7.84573486395988 ], "wc_documentation_avg": [ 26.666666666666668, 33.50953429829917 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 438.0, 300.7534981786025 ], "wc_reply_reviewers_avg": [ 20.0, 28.284271247461902 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.9428090415820634 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11533546418405843733&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "anu.edu.au;anu.edu.au;anu.edu.au;curtin.edu.au;ucf.edu", "author_num": 5, "aff_unique_index": "0;0;0;1;2", "aff_unique_norm": "Australian National University;Curtin University;University of Central Florida", "aff_unique_dep": ";;", "aff_unique_url": "https://www.anu.edu.au;https://www.curtin.edu.au;https://www.ucf.edu", "aff_unique_abbr": "ANU;Curtin;UCF", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "Australia;United States" }, { "title": "The Unmet Promise of Synthetic Training Images: Using Retrieved Real Images Performs Better", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94211", "id": "fNoleQa9RX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=fNoleQa9RX", "openreview": "https://openreview.net/forum?id=fNoleQa9RX", "poster": "", "project": "", "author_site": "Scott Geng, Cheng-Yu Hsieh, Vivek Ramanujan, Matthew Wallingford, Chun-Liang Li, Pang Wei Koh, Ranjay Krishna", "tldr": "", "abstract": "Generative text-to-image models enable us to synthesize unlimited amounts of images in a controllable manner, spurring many recent efforts to train vision models with synthetic data. However, every synthetic image ultimately originates from the upstream data used to train the generator. Does the intermediate generator provide additional information over directly training on relevant parts of the upstream data? \nGrounding this question in the setting of image classification, we compare finetuning on task-relevant, targeted synthetic data generated by Stable Diffusion---a generative model trained on the LAION-2B dataset---against finetuning on targeted real images retrieved directly from LAION-2B. We show that while synthetic data can benefit some downstream tasks, it is universally matched or outperformed by real data from the simple retrieval baseline. Our analysis suggests that this underperformance is partially due to generator artifacts and inaccurate task-relevant visual details in the synthetic images. Overall, we argue that targeted retrieval is a critical baseline to consider when training with synthetic data---a baseline that current methods do not yet surpass. We release code, data, and models at [https://github.com/scottgeng00/unmet-promise/](https://github.com/scottgeng00/unmet-promise).", "keywords": "Synthetic training data;task adaptation;data-centric machine learning", "primary_area": "machine_vision", "supplementary_material": "", "author": "Scott Geng;Cheng-Yu Hsieh;Vivek Ramanujan;Matthew Wallingford;Chun-Liang Li;Pang Wei Koh;Ranjay Krishna", "authorids": "~Scott_Geng1;~Cheng-Yu_Hsieh1;~Vivek_Ramanujan1;~Matthew_Wallingford1;~Chun-Liang_Li1;~Pang_Wei_Koh1;~Ranjay_Krishna1", "gender": ";M;M;M;M;M;M", "homepage": "https://www.scottgeng.com/;https://chengyuhsieh.github.io/;https://vkramanuj.github.io;http://chunliangli.github.io;http://cs.stanford.edu/~pangwei;http://ranjaykrishna.com;https://mattwallingford.github.io/", "dblp": "330/4056.html;40/4421;225/4845;;10/10453;167/3785;263/1795", "google_scholar": "jCg1gRoAAAAJ;WXX6ZwwAAAAJ;yXFPyNMAAAAJ;https://scholar.google.com.tw/citations?user=vqHIt_sAAAAJ;Nn990CkAAAAJ;IcqahyAAAAAJ;", "orcid": ";;;;;0000-0001-8784-2531;", "linkedin": ";;;;;ranjay-krishna-1a344444/;", "or_profile": "~Scott_Geng1;~Cheng-Yu_Hsieh1;~Vivek_Ramanujan1;~Chun-Liang_Li1;~Pang_Wei_Koh1;~Ranjay_Krishna1;~Matthew_C_Wallingford2", "aff": "University of Washington;Google;Meta Facebook;Apple;University of Washington;University of Washington;University of Washington", "aff_domain": "cs.washington.edu;google.com;meta.com;apple.com;cs.washington.edu;cs.washington.edu;washington.edu", "position": "PhD student;Intern;Intern;Researcher;Assistant Professor;Assistant Professor;PhD student", "bibtex": "@inproceedings{\ngeng2024the,\ntitle={The Unmet Promise of Synthetic Training Images: Using Retrieved Real Images Performs Better},\nauthor={Scott Geng and Cheng-Yu Hsieh and Vivek Ramanujan and Matthew Wallingford and Chun-Liang Li and Pang Wei Koh and Ranjay Krishna},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=fNoleQa9RX}\n}", "github": "", "reviewers": "rgD8;ZHX8;Etje;6uYi", "pdf_size": 31557351, "rating": "5;5;6;8", "confidence": "3;3;4;4", "soundness": "2;3;3;4", "novelty": "2;2;3;3", "presentation": "2;4;3;4", "wc_summary": "155;86;77;95", "wc_strengths": "90;130;39;170", "wc_weaknesses": "684;155;233;19", "wc_questions": "2;65;4;57", "wc_limitations": "1;34;7;7", "wc_review": "932;470;360;348", "wc_reply_reviewers": "349;287;79;85", "wc_reply_authors": "915;691;542;43", "reply_reviewers": "3;2;2;1", "reply_authors": "4;3;2;2", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 103.25, 30.548117781624452 ], "wc_strengths_avg": [ 107.25, 48.50451009957734 ], "wc_weaknesses_avg": [ 272.75, 249.47983385436186 ], "wc_questions_avg": [ 32.0, 29.146183283579344 ], "wc_limitations_avg": [ 12.25, 12.794041581923986 ], "wc_review_avg": [ 527.5, 238.32907921611243 ], "wc_reply_reviewers_avg": [ 200.0, 120.03749414245534 ], "wc_reply_authors_avg": [ 547.75, 320.23380130773205 ], "reply_reviewers_avg": [ 2.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.8164965809277259, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4510928534885338380&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "cs.washington.edu;google.com;meta.com;apple.com;cs.washington.edu;cs.washington.edu;washington.edu", "author_num": 7, "aff_unique_index": "0;1;2;3;0;0;0", "aff_unique_norm": "University of Washington;Google;Meta;Apple", "aff_unique_dep": ";Google;Meta Platforms, Inc.;Apple Inc.", "aff_unique_url": "https://www.washington.edu;https://www.google.com;https://meta.com;https://www.apple.com", "aff_unique_abbr": "UW;Google;Meta;Apple", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "SimVG: A Simple Framework for Visual Grounding with Decoupled Multi-modal Fusion", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94210", "id": "fOLNl52Q5U", "proceeding": "", "pdf": "https://openreview.net/pdf?id=fOLNl52Q5U", "openreview": "https://openreview.net/forum?id=fOLNl52Q5U", "poster": "/media/PosterPDFs/NeurIPS%202024/94210.png?t=1729832938.587653", "project": "", "author_site": "Ming Dai, Lingfeng Yang, Yihao Xu, Zhenhua Feng, Wankou Yang", "tldr": "", "abstract": "Visual grounding is a common vision task that involves grounding descriptive sentences to the corresponding regions of an image. Most existing methods use independent image-text encoding and apply complex hand-crafted modules or encoder-decoder architectures for modal interaction and query reasoning. However, their performance significantly drops when dealing with complex textual expressions. This is because the former paradigm only utilizes limited downstream data to fit the multi-modal feature fusion. Therefore, it is only effective when the textual expressions are relatively simple. In contrast, given the wide diversity of textual expressions and the uniqueness of downstream training data, the existing fusion module, which extracts multimodal content from a visual-linguistic context, has not been fully investigated. In this paper, we present a simple yet robust transformer-based framework, SimVG, for visual grounding. Specifically, we decouple visual-linguistic feature fusion from downstream tasks by leveraging existing multimodal pre-trained models and incorporating additional object tokens to facilitate deep integration of downstream and pre-training tasks. Furthermore, we design a dynamic weight-balance distillation method in the multi-branch synchronous learning process to enhance the representation capability of the simpler branch. This branch only consists of a lightweight MLP, which simplifies the structure and improves reasoning speed. Experiments on six widely used VG datasets, i.e., RefCOCO/+/g, ReferIt, Flickr30K, and GRefCOCO, demonstrate the superiority of SimVG. Finally, the proposed method not only achieves improvements in efficiency and convergence speed but also attains new state-of-the-art performance on these benchmarks. Codes and models are available at https://github.com/Dmmm1997/SimVG.", "keywords": "visual grounding;vison-language pre-training;referring expression comprehension;knowledge distillation", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "dai ming;Lingfeng Yang;Yihao Xu;Zhenhua Feng;Wankou Yang", "authorids": "~dai_ming1;~Lingfeng_Yang1;~Yihao_Xu1;~Zhenhua_Feng1;~Wankou_Yang1", "gender": "M;M;M;M;M", "homepage": "https://dmmm1997.github.io/;;http://www.xuyihao.top;https://ai.jiangnan.edu.cn/info/1013/3771.htm;https://automation.seu.edu.cn/ywk/list.htm", "dblp": ";45/7593;;348/7584-1;99/3602", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;RLhH0jwAAAAJ;;Y6KtijIAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": "0009-0004-6133-0035;0000-0002-2725-8947;;0000-0002-4485-4249;", "linkedin": ";;;;", "or_profile": "~dai_ming1;~Lingfeng_Yang1;~Yihao_Xu1;~Zhenhua_Feng1;~Wankou_Yang1", "aff": "Southeast University;Baidu, Inc.;Southeast University;University of Surrey;Southeast University", "aff_domain": "seu.edu.cn;baidu.com;seu.edu.cn;surrey.ac.uk;seu.edu.cn", "position": "PhD student;PhD student;MS student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nming2024simvg,\ntitle={Sim{VG}: A Simple Framework for Visual Grounding with Decoupled Multi-modal Fusion},\nauthor={dai ming and Lingfeng Yang and Yihao Xu and Zhenhua Feng and Wankou Yang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=fOLNl52Q5U}\n}", "github": "", "reviewers": "3YFV;TBfU;qCaP;MFGp", "pdf_size": 7496655, "rating": "5;5;6;8", "confidence": "4;3;4;5", "soundness": "2;3;3;3", "novelty": "2;3;2;3", "presentation": "2;3;2;3", "wc_summary": "49;128;85;58", "wc_strengths": "44;65;44;47", "wc_weaknesses": "120;96;78;8", "wc_questions": "2;38;23;21", "wc_limitations": "1;6;8;3", "wc_review": "216;333;238;137", "wc_reply_reviewers": "57;0;0;29", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 80.0, 30.71644510681534 ], "wc_strengths_avg": [ 50.0, 8.74642784226795 ], "wc_weaknesses_avg": [ 75.5, 41.7222961975968 ], "wc_questions_avg": [ 21.0, 12.786711852544421 ], "wc_limitations_avg": [ 4.5, 2.692582403567252 ], "wc_review_avg": [ 231.0, 69.8462597423799 ], "wc_reply_reviewers_avg": [ 21.5, 23.66960075708925 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.8660254037844386, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6728976427144768414&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "seu.edu.cn;baidu.com;seu.edu.cn;surrey.ac.uk;seu.edu.cn", "author_num": 5, "aff_unique_index": "0;1;0;2;0", "aff_unique_norm": "Southeast University;Baidu;University of Surrey", "aff_unique_dep": ";Baidu, Inc.;", "aff_unique_url": "https://www.seu.edu.cn/;https://www.baidu.com;https://www.surrey.ac.uk", "aff_unique_abbr": "SEU;Baidu;Surrey", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "China;United Kingdom" }, { "title": "Compositional Generalization Across Distributional Shifts with Sparse Tree Operations", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94209", "id": "fOQunr2E0T", "proceeding": "", "pdf": "https://openreview.net/pdf?id=fOQunr2E0T", "openreview": "https://openreview.net/forum?id=fOQunr2E0T", "poster": "/media/PosterPDFs/NeurIPS%202024/94209.png?t=1733345930.9003246", "project": "", "author_site": "Paul Soulos, Henry Conklin, Mattia Opper, Paul Smolensky, Jianfeng Gao, Roland Fernandez", "tldr": "", "abstract": "Neural networks continue to struggle with compositional generalization, and this issue is exacerbated by a lack of massive pre-training. One successful approach for developing neural systems which exhibit human-like compositional generalization is $\\textit{hybrid}$ neurosymbolic techniques. However, these techniques run into the core issues that plague symbolic approaches to AI: scalability and flexibility. The reason for this failure is that at their core, hybrid neurosymbolic models perform symbolic computation and relegate the scalable and flexible neural computation to parameterizing a symbolic system. We investigate a $\\textit{unified}$ neurosymbolic system where transformations in the network can be interpreted simultaneously as both symbolic and neural computation. We extend a unified neurosymbolic architecture called the Differentiable Tree Machine in two central ways. First, we significantly increase the model\u2019s efficiency through the use of sparse vector representations of symbolic structures. Second, we enable its application beyond the restricted set of tree2tree problems to the more general class of seq2seq problems. The improved model retains its prior generalization capabilities and, since there is a fully neural path through the network, avoids the pitfalls of other neurosymbolic techniques that elevate symbolic computation over neural computation.", "keywords": "compositionality;compositional generalization;systematicity;neurosymbolic;differentiable computing;vector symbolic;VSA;trees", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "", "author": "Paul Soulos;Henry Conklin;Mattia Opper;Paul Smolensky;Jianfeng Gao;Roland Fernandez", "authorids": "~Paul_Soulos1;~Henry_Conklin1;~Mattia_Opper1;~Paul_Smolensky1;~Jianfeng_Gao1;~Roland_Fernandez1", "gender": "M;M;M;M;M;M", "homepage": "https://hconklin.com/;https://mopper97.github.io/;http://cogsci.jhu.edu/directory/paul-smolensky/;https://www.microsoft.com/en-us/research/people/jfgao/;https://www.microsoft.com/en-us/research/people/rfernand/;http://paulsoulos.com", "dblp": ";;48/1105;92/5339;http://dblp.uni-trier.de/pers/hd/f/Fernandez:Roland;", "google_scholar": ";02E6E3EAAAAJ;PRtkZzYAAAAJ;https://scholar.google.com/citations?hl=en;4__jyWsAAAAJ;", "orcid": ";;0000-0003-2420-182X;;0000-0002-8032-6646;", "linkedin": ";mattia-opper-45243b105/;paul-smolensky-b1871183/;;fernandezroland/;", "or_profile": "~Henry_Conklin1;~Mattia_Opper1;~Paul_Smolensky1;~Jianfeng_Gao1;~Roland_Fernandez1;~Paul_Michael_Soulos1", "aff": "University of Edinburgh, University of Edinburgh;University of Edinburgh, University of Edinburgh;Johns Hopkins University;Microsoft Research;Microsoft Research AI;Johns Hopkins University", "aff_domain": "ed.ac.uk;ed.ac.uk;jhu.edu;microsoft.com;microsoft.com;jhu.edu", "position": "PhD student;PhD student;Professor;Principal Researcher;Senior Reseacher;PhD student", "bibtex": "@inproceedings{\nsoulos2024compositional,\ntitle={Compositional Generalization Across Distributional Shifts with Sparse Tree Operations},\nauthor={Paul Soulos and Henry Conklin and Mattia Opper and Paul Smolensky and Jianfeng Gao and Roland Fernandez},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=fOQunr2E0T}\n}", "github": "", "reviewers": "mbkf;Hmrt;bgti", "pdf_size": 914040, "rating": "7;7;7", "confidence": "4;4;3", "soundness": "3;3;3", "novelty": "3;2;3", "presentation": "4;4;3", "wc_summary": "106;259;109", "wc_strengths": "39;155;111", "wc_weaknesses": "446;752;281", "wc_questions": "61;521;60", "wc_limitations": "4;33;29", "wc_review": "656;1720;590", "wc_reply_reviewers": "946;250;19", "wc_reply_authors": "481;29;21", "reply_reviewers": "2;1;1", "reply_authors": "3;2;2", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 158.0, 71.42828571371429 ], "wc_strengths_avg": [ 101.66666666666667, 47.81445620544295 ], "wc_weaknesses_avg": [ 493.0, 195.13585011473418 ], "wc_questions_avg": [ 214.0, 217.0821657038336 ], "wc_limitations_avg": [ 22.0, 12.832251036613439 ], "wc_review_avg": [ 988.6666666666666, 517.8322336647481 ], "wc_reply_reviewers_avg": [ 405.0, 393.99746192075906 ], "wc_reply_authors_avg": [ 177.0, 214.98527081329703 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3512587851074296251&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "ed.ac.uk;ed.ac.uk;jhu.edu;microsoft.com;microsoft.com;jhu.edu", "author_num": 6, "aff_unique_index": "0;0;1;2;2;1", "aff_unique_norm": "University of Edinburgh;Johns Hopkins University;Microsoft", "aff_unique_dep": ";;Microsoft Research", "aff_unique_url": "https://www.ed.ac.uk;https://www.jhu.edu;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "Edinburgh;JHU;MSR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1;1;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "MInference 1.0: Accelerating Pre-filling for Long-Context LLMs via Dynamic Sparse Attention", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94208", "id": "fPBACAbqSN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=fPBACAbqSN", "openreview": "https://openreview.net/forum?id=fPBACAbqSN", "poster": "/media/PosterPDFs/NeurIPS%202024/94208.png?t=1731742821.8861883", "project": "", "author_site": "Huiqiang Jiang, Yucheng LI, Chengruidong Zhang, Qianhui Wu, Xufang Luo, Surin Ahn, Zhenhua Han, Amir Abdi, Dongsheng Li, Chin-Yew Lin, Yuqing Yang, Lili Qiu", "tldr": "", "abstract": "The computational challenges of Large Language Model (LLM) inference remain a significant barrier to their widespread deployment, especially as prompt lengths continue to increase. Due to the quadratic complexity of the attention computation, it takes 30 minutes for an 8B LLM to process a prompt of 1M tokens (i.e., the pre-filling stage) on a single A100 GPU. Existing methods for speeding up prefilling often fail to maintain acceptable accuracy or efficiency when applied to long-context LLMs. To address this gap, we introduce MInference (Milliontokens Inference), a sparse calculation method designed to accelerate pre-filling of long-sequence processing. Specifically, we identify three unique patterns in long-context attention matrices-the A-shape, Vertical-Slash, and Block-Sparse-that can be leveraged for efficient sparse computation on GPUs. We determine the optimal pattern for each attention head offline and dynamically build sparse\nindices based on the assigned pattern during inference. With the pattern and sparse indices, we perform efficient sparse attention calculations via our optimized GPU kernels to significantly reduce the latency in the pre-filling stage of longcontext LLMs. Our proposed technique can be directly applied to existing LLMs without any modifications to the pre-training setup or additional fine-tuning. By\nevaluating on a wide range of downstream tasks, including InfiniteBench, RULER, PG-19, and Needle In A Haystack, and models including LLaMA-3-1M, GLM-4-1M, Yi-200K, Phi-3-128K, and Qwen2-128K, we demonstrate that MInference effectively reduces inference latency by up to 10x for pre-filling on an A100, while maintaining accuracy. Our code is available at https://aka.ms/MInference.", "keywords": "LLMs Inference;Long-Context LLMs;Dynamic Sparse Attention;Efficient Inference", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/52ca2530d967c614495c87354cf6d45c8686f03f.zip", "author": "Huiqiang Jiang;YUCHENG LI;Chengruidong Zhang;Qianhui Wu;Xufang Luo;Surin Ahn;Zhenhua Han;Amir H. Abdi;Dongsheng Li;Chin-Yew Lin;Yuqing Yang;Lili Qiu", "authorids": "~Huiqiang_Jiang2;~YUCHENG_LI2;~Chengruidong_Zhang1;~Qianhui_Wu1;~Xufang_Luo1;~Surin_Ahn1;~Zhenhua_Han1;~Amir_H._Abdi1;~Dongsheng_Li2;~Chin-Yew_Lin1;~Yuqing_Yang1;~Lili_Qiu3", "gender": "M;M;M;F;F;;M;;M;M;;", "homepage": "https://hqjiang.com;;https://github.com/Starmys/;https://qianhuiwu.github.io/;;;https://hzhua.github.io/;;http://recmind.cn;https://www.microsoft.com/en-us/research/people/cyl/;;https://www.microsoft.com/en-us/research/people/liliqiu/", "dblp": "204/2497;72/7816-1;;204/2307;218/7350;;147/1606.html;;254/0830-2.html;64/6843;91/9064-1.html;", "google_scholar": "99KtvpYAAAAJ;;WbvQ5JEAAAAJ;BLZieokAAAAJ;;;https://scholar.google.com.hk/citations?user=NJr2G5AAAAAJ;;VNg5rA8AAAAJ;cDF07aYAAAAJ;4BtNQAEAAAAJ;", "orcid": "0000-0002-1327-4882;;;;;;0000-0002-2880-7100;;0000-0003-3103-8442;;0000-0003-3518-5212;", "linkedin": ";yuchengli09/;;qianhui-wu-2b1608b7?originalSubdomain=cn;;;;;;chin-yew-lin-32585a4;;", "or_profile": "~Huiqiang_Jiang2;~YUCHENG_LI2;~Chengruidong_Zhang1;~Qianhui_Wu1;~Xufang_Luo1;~Surin_Ahn1;~Zhenhua_Han1;~Amir_H._Abdi1;~Dongsheng_Li2;~Chin-Yew_Lin1;~Yuqing_Yang1;~Lili_Qiu3", "aff": "Microsoft;University of Surrey;Microsoft;Microsoft;Microsoft Research;;Microsoft;;Microsoft Research Asia;Microsoft;Microsoft Research;University of Texas at Austin", "aff_domain": "microsoft.com;surrey.ac.uk;microsoft.com;microsoft.com;microsoft.com;;microsoft.com;;microsoft.com;microsoft.com;research.microsoft.com;utexas.edu", "position": "RSDE;PhD student;Researcher;Researcher;Researcher;;Researcher;;Principal Researcher;Senior Principal Research Manager;Researcher;Full Professor", "bibtex": "@inproceedings{\njiang2024minference,\ntitle={{MI}nference 1.0: Accelerating Pre-filling for Long-Context {LLM}s via Dynamic Sparse Attention},\nauthor={Huiqiang Jiang and YUCHENG LI and Chengruidong Zhang and Qianhui Wu and Xufang Luo and Surin Ahn and Zhenhua Han and Amir H. Abdi and Dongsheng Li and Chin-Yew Lin and Yuqing Yang and Lili Qiu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=fPBACAbqSN}\n}", "github": "", "reviewers": "nzY9;MtKP;9cfV;gjr6", "pdf_size": 4691541, "rating": "7;7;7;7", "confidence": "3;5;5;3", "soundness": "3;3;2;3", "novelty": "3;3;3;3", "presentation": "3;2;3;3", "wc_summary": "178;84;58;82", "wc_strengths": "61;133;25;33", "wc_weaknesses": "249;111;59;59", "wc_questions": "209;37;212;69", "wc_limitations": "11;1;1;21", "wc_review": "708;366;355;264", "wc_reply_reviewers": "155;45;564;0", "wc_reply_authors": "75;0;1269;0", "reply_reviewers": "1;1;6;0", "reply_authors": "2;1;6;1", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 4.0, 1.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 100.5, 45.89934640057525 ], "wc_strengths_avg": [ 63.0, 42.567593307585526 ], "wc_weaknesses_avg": [ 119.5, 77.7222619331167 ], "wc_questions_avg": [ 131.75, 79.56561757442721 ], "wc_limitations_avg": [ 8.5, 8.2915619758885 ], "wc_review_avg": [ 423.25, 169.09963778790302 ], "wc_reply_reviewers_avg": [ 191.0, 222.61064664566248 ], "wc_reply_authors_avg": [ 336.0, 539.5373017688397 ], "reply_reviewers_avg": [ 2.0, 2.345207879911715 ], "reply_authors_avg": [ 2.5, 2.0615528128088303 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 76, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4349982442220269228&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "microsoft.com;surrey.ac.uk;microsoft.com;microsoft.com;microsoft.com;;microsoft.com;;microsoft.com;microsoft.com;research.microsoft.com;utexas.edu", "author_num": 12, "aff_unique_index": "0;1;0;0;0;0;0;0;0;2", "aff_unique_norm": "Microsoft;University of Surrey;University of Texas at Austin", "aff_unique_dep": "Microsoft Corporation;;", "aff_unique_url": "https://www.microsoft.com;https://www.surrey.ac.uk;https://www.utexas.edu", "aff_unique_abbr": "Microsoft;Surrey;UT Austin", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Asia;Austin", "aff_country_unique_index": "0;1;0;0;0;0;2;0;0;0", "aff_country_unique": "United States;United Kingdom;China" }, { "title": "Found in the Middle: How Language Models Use Long Contexts Better via Plug-and-Play Positional Encoding", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94207", "id": "fPmScVB1Td", "proceeding": "", "pdf": "https://openreview.net/pdf?id=fPmScVB1Td", "openreview": "https://openreview.net/forum?id=fPmScVB1Td", "poster": "/media/PosterPDFs/NeurIPS%202024/94207.png?t=1731618679.951212", "project": "", "author_site": "Zhenyu Zhang, Runjin Chen, Shiwei Liu, Zhewei Yao, Olatunji Ruwase, Beidi Chen, Xiaoxia Wu, Zhangyang "Atlas" Wang", "tldr": "", "abstract": "This paper aims to overcome the ``lost-in-the-middle'' challenge of large language models (LLMs). While recent advancements have successfully enabled LLMs to perform stable language modeling with up to 4 million tokens, the persistent difficulty faced by most LLMs in identifying relevant information situated in the middle of the context has not been adequately tackled. To address this problem, this paper introduces Multi-scale Positional Encoding (Ms-PoE) which is a simple yet effective plug-and-play approach to enhance the capacity of LLMs to handle the relevant information located in the middle of the context, without fine-tuning or introducing any additional overhead. Ms-PoE leverages the position indice rescaling to relieve the long-term decay effect introduced by RoPE, while meticulously assigning distinct scaling ratios to different attention heads to preserve essential knowledge learned during the pre-training step, forming a multi-scale context fusion from short to long distance. Extensive experiments with a wide range of LLMs demonstrate the efficacy of our approach. Notably, Ms-PoE achieves an average accuracy gain of up to 3.8 on the Zero-SCROLLS benchmark over the original LLMs. Code will be made public upon acceptence.", "keywords": "Positional encoding; Large language model; long context reasoning", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Zhenyu Zhang;Runjin Chen;Shiwei Liu;Zhewei Yao;Olatunji Ruwase;Beidi Chen;Xiaoxia Wu;Zhangyang Wang", "authorids": "~Zhenyu_Zhang4;~Runjin_Chen1;~Shiwei_Liu2;~Zhewei_Yao1;~Olatunji_Ruwase1;~Beidi_Chen1;~Xiaoxia_Wu1;~Zhangyang_Wang1", "gender": "M;;M;M;M;F;F;M", "homepage": "https://zhenyu.gallery;;https://shiweiliuiiiiiii.github.io/;;https://www.microsoft.com/en-us/research/people/olruwase/;https://www.andrew.cmu.edu/user/beidic/;https://sites.google.com/view/xwu/home;https://vita-group.github.io", "dblp": "01/1844-15;;234/8697-3.html;195/2887;72/2838;192/1339;63/1016;119/4026", "google_scholar": "ZLyJRxoAAAAJ;;73IbXtsAAAAJ;gpSeMjYAAAAJ;HZDafzgAAAAJ;;Ry0Bdt8AAAAJ;pxFyKAIAAAAJ", "orcid": ";;;;;;;", "linkedin": "zhenyu-allen-zhang-a9b1391a3/;;;;;;;", "or_profile": "~Zhenyu_Zhang4;~Runjin_Chen1;~Shiwei_Liu2;~Zhewei_Yao1;~Olatunji_Ruwase1;~Beidi_Chen1;~Xiaoxia_Wu1;~Zhangyang_Wang1", "aff": "University of Texas at Austin;;University of Oxford;Snowflake;Microsoft;Meta Facebook;Microsoft;University of Texas at Austin", "aff_domain": "utexas.edu;;ox.ac.uk;snowflake.com;microsoft.com;fb.com;microsoft.com;utexas.edu", "position": "PhD student;;Postdoc;Researcher;Researcher;Researcher;Researcher;Associate Professor", "bibtex": "@inproceedings{\nzhang2024found,\ntitle={Found in the Middle: How Language Models Use Long Contexts Better via Plug-and-Play Positional Encoding},\nauthor={Zhenyu Zhang and Runjin Chen and Shiwei Liu and Zhewei Yao and Olatunji Ruwase and Beidi Chen and Xiaoxia Wu and Zhangyang Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=fPmScVB1Td}\n}", "github": "", "reviewers": "qA6u;LVQT;xsrg", "pdf_size": 1353546, "rating": "5;6;6", "confidence": "4;4;4", "soundness": "3;3;3", "novelty": "2;3;2", "presentation": "4;3;3", "wc_summary": "70;81;53", "wc_strengths": "27;17;69", "wc_weaknesses": "239;120;139", "wc_questions": "70;46;33", "wc_limitations": "1;5;5", "wc_review": "407;269;299", "wc_reply_reviewers": "21;0;61", "wc_reply_authors": "30;118;116", "reply_reviewers": "1;0;2", "reply_authors": "2;3;3", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 68.0, 11.51810169544733 ], "wc_strengths_avg": [ 37.666666666666664, 22.528993664954402 ], "wc_weaknesses_avg": [ 166.0, 52.19833969262496 ], "wc_questions_avg": [ 49.666666666666664, 15.326085243430198 ], "wc_limitations_avg": [ 3.6666666666666665, 1.8856180831641267 ], "wc_review_avg": [ 325.0, 59.262129560116215 ], "wc_reply_reviewers_avg": [ 27.333333333333332, 25.30261295246446 ], "wc_reply_authors_avg": [ 88.0, 41.02032016777376 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1411148366807228817&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "utexas.edu;;ox.ac.uk;snowflake.com;microsoft.com;fb.com;microsoft.com;utexas.edu", "author_num": 8, "aff_unique_index": "0;1;2;3;4;3;0", "aff_unique_norm": "University of Texas at Austin;University of Oxford;Snowflake Inc.;Microsoft;Meta", "aff_unique_dep": ";;;Microsoft Corporation;Meta Platforms, Inc.", "aff_unique_url": "https://www.utexas.edu;https://www.ox.ac.uk;https://www.snowflake.com;https://www.microsoft.com;https://meta.com", "aff_unique_abbr": "UT Austin;Oxford;Snowflake;Microsoft;Meta", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Austin;", "aff_country_unique_index": "0;1;0;0;0;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Over-parameterized Student Model via Tensor Decomposition Boosted Knowledge Distillation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94206", "id": "fT1RkAgrC3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=fT1RkAgrC3", "openreview": "https://openreview.net/forum?id=fT1RkAgrC3", "poster": "/media/PosterPDFs/NeurIPS%202024/94206.png?t=1731379272.5579567", "project": "", "author_site": "Yu-Liang Zhan, Zhong-Yi Lu, Hao Sun, Ze-Feng Gao", "tldr": "", "abstract": "Increased training parameters have enabled large pre-trained models to excel in various downstream tasks. Nevertheless, the extensive computational requirements associated with these models hinder their widespread adoption within the community. We focus on Knowledge Distillation (KD), where a compact student model is trained to mimic a larger teacher model, facilitating the transfer of knowledge of large models. In contrast to much of the previous work, we scale up the parameters of the student model during training, to benefit from over-parameterization without increasing the inference latency. In particular, we propose a tensor decomposition strategy that effectively over-parameterizes the relatively small student model through an efficient and nearly lossless decomposition of its parameter matrices into higher-dimensional tensors. To ensure efficiency, we further introduce a tensor constraint loss to align the high-dimensional tensors between the student and teacher models. Comprehensive experiments validate the significant performance enhancement by our approach in various KD tasks, covering computer vision and natural language processing areas. Our code is available at https://github.com/intell-sci-comput/OPDF.", "keywords": "Knowledge Distillation;Over-parameterization;Tensor Decomposition;Model Compression", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Yu-Liang Zhan;Zhong-Yi Lu;Hao Sun;Ze-Feng Gao", "authorids": "~Yu-Liang_Zhan1;~Zhong-Yi_Lu1;~Hao_Sun4;~Ze-Feng_Gao1", "gender": ";M;;M", "homepage": ";;;https://zfgao66.github.io/homepage/", "dblp": ";;;239/5268.html", "google_scholar": ";;;vB64k4IAAAAJ", "orcid": ";0000-0001-8866-3180;;0000-0002-6695-8209", "linkedin": ";;;", "or_profile": "~Yu-Liang_Zhan1;~Zhong-Yi_Lu1;~Hao_Sun4;~Ze-Feng_Gao1", "aff": ";Renmin University of China, Tsinghua University;;Renmin University of China", "aff_domain": ";ruc.edu.cn;;ruc.edu.cn", "position": ";Full Professor;;Postdoc", "bibtex": "@inproceedings{\nzhan2024overparameterized,\ntitle={Over-parameterized Student Model via Tensor Decomposition Boosted Knowledge Distillation},\nauthor={Yu-Liang Zhan and Zhong-Yi Lu and Hao Sun and Ze-Feng Gao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=fT1RkAgrC3}\n}", "github": "", "reviewers": "XV5e;Qtmi;Dajm;fkms", "pdf_size": 506154, "rating": "5;6;7;7", "confidence": "4;4;4;4", "soundness": "3;3;4;3", "novelty": "3;3;3;3", "presentation": "3;3;4;2", "wc_summary": "88;67;70;36", "wc_strengths": "82;79;162;29", "wc_weaknesses": "43;102;98;298", "wc_questions": "162;4;5;38", "wc_limitations": "31;1;1;10", "wc_review": "406;253;336;411", "wc_reply_reviewers": "36;75;26;340", "wc_reply_authors": "203;220;34;741", "reply_reviewers": "1;1;1;3", "reply_authors": "5;3;2;5", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 65.25, 18.699933154960743 ], "wc_strengths_avg": [ 88.0, 47.62877281643944 ], "wc_weaknesses_avg": [ 135.25, 96.81264122003903 ], "wc_questions_avg": [ 52.25, 64.82428171603601 ], "wc_limitations_avg": [ 10.75, 12.255100978776143 ], "wc_review_avg": [ 351.5, 64.1346240341362 ], "wc_reply_reviewers_avg": [ 119.25, 128.75825216272548 ], "wc_reply_authors_avg": [ 299.5, 265.06838740219473 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 3.75, 1.299038105676658 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17020851806466312505&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": ";ruc.edu.cn;;ruc.edu.cn", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Renmin University of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ruc.edu.cn", "aff_unique_abbr": "RUC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Label Noise: Ignorance Is Bliss", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94205", "id": "fTKcqr4xuX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=fTKcqr4xuX", "openreview": "https://openreview.net/forum?id=fTKcqr4xuX", "poster": "/media/PosterPDFs/NeurIPS%202024/94205.png?t=1731653216.5165215", "project": "", "author_site": "Yilun Zhu, Jianxin Zhang, Aditya Gangrade, Clay Scott", "tldr": "", "abstract": "We establish a new theoretical framework for learning under multi-class, instance-dependent label noise. \n This framework casts learning with label\n noise as a form of domain adaptation, in particular, domain adaptation\n under posterior drift. \n We introduce the concept of \\emph{relative signal strength} (RSS), a pointwise measure that quantifies the transferability from noisy to clean posterior. \n Using RSS, we establish nearly matching upper and lower bounds on the excess risk. \n Our theoretical findings support \n the simple\n \\emph{Noise Ignorant Empirical Risk Minimization (NI-ERM)} principle,\n which minimizes empirical risk while ignoring label noise.\n Finally, we translate this theoretical insight into practice: by\n using NI-ERM to fit a linear classifier on top of a self-supervised\n feature extractor, we achieve state-of-the-art performance on the\n CIFAR-N data challenge.", "keywords": "label noise; multi-class classification; learning theory; domain adaptation; minimax rate; self-supervised learning", "primary_area": "learning_theory", "supplementary_material": "/attachment/7c8db7a6e33f93dfa49bb53262c1f3dbe0aeb579.zip", "author": "Yilun Zhu;Jianxin Zhang;Aditya Gangrade;Clayton Scott", "authorids": "~Yilun_Zhu2;~Jianxin_Zhang1;~Aditya_Gangrade1;~Clayton_Scott1", "gender": ";;;", "homepage": "https://sites.google.com/umich.edu/allanzhu;;;", "dblp": ";;;", "google_scholar": ";;;", "orcid": "0000-0003-1821-2004;;;", "linkedin": ";;;", "or_profile": "~Yilun_Zhu2;~Jianxin_Zhang1;~Aditya_Gangrade1;~Clayton_Scott1", "aff": "University of Michigan - Ann Arbor;;;", "aff_domain": "umich.edu;;;", "position": "PhD student;;;", "bibtex": "@inproceedings{\nzhu2024label,\ntitle={Label Noise: Ignorance Is Bliss},\nauthor={Yilun Zhu and Jianxin Zhang and Aditya Gangrade and Clayton Scott},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=fTKcqr4xuX}\n}", "github": "", "reviewers": "rXjd;V7Mw;JX6n;G7Jo;1ugH", "pdf_size": 760154, "rating": "4;5;6;7;8", "confidence": "4;2;3;3;3", "soundness": "3;3;3;3;3", "novelty": "2;3;3;3;3", "presentation": "3;4;3;4;3", "wc_summary": "59;79;139;83;70", "wc_strengths": "81;49;48;240;170", "wc_weaknesses": "303;166;86;266;229", "wc_questions": "163;75;18;211;12", "wc_limitations": "1;1;12;31;1", "wc_review": "607;370;303;831;482", "wc_reply_reviewers": "584;205;0;89;25", "wc_reply_authors": "751;132;0;24;0", "reply_reviewers": "1;3;0;1;1", "reply_authors": "2;2;1;2;1", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 86.0, 27.756080414928906 ], "wc_strengths_avg": [ 117.6, 75.65342027958816 ], "wc_weaknesses_avg": [ 210.0, 76.7567586600685 ], "wc_questions_avg": [ 95.8, 79.11358922460794 ], "wc_limitations_avg": [ 9.2, 11.702991070662234 ], "wc_review_avg": [ 518.6, 187.25020694247576 ], "wc_reply_reviewers_avg": [ 180.6, 213.7967258869976 ], "wc_reply_authors_avg": [ 181.4, 288.95369871313295 ], "reply_reviewers_avg": [ 1.2, 0.9797958971132713 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.22360679774997896, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:IVjiPb6hDdkJ:scholar.google.com/&scioq=Label+Noise:+Ignorance+Is+Bliss&hl=en&as_sdt=0,5", "gs_version_total": 5, "email": "umich.edu;;;", "author_num": 4, "aff_unique_index": "0", "aff_unique_norm": "University of Michigan", "aff_unique_dep": "", "aff_unique_url": "https://www.umich.edu", "aff_unique_abbr": "UM", "aff_campus_unique_index": "0", "aff_campus_unique": "Ann Arbor", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "fTOw3BzcWs", "title": "ExID: Offline RL with Intuitive Expert Insights in Limited-Data Settings", "track": "main", "status": "Reject", "tldr": "", "abstract": "With the ability to learn from static datasets, Offline Reinforcement Learning (RL) emerges as a compelling avenue for real-world applications. However, state-of-the-art offline RL algorithms perform sub-optimally when confronted with limited data confined to specific regions within the state space. The performance degradation is attributed to the inability of offline RL algorithms to learn appropriate actions for rare or unseen observations. This paper proposes a novel domain knowledge-based regularization technique and adaptively refines the initial domain knowledge to considerably boost performance in limited data with partially omitted states. The key insight is that the regularization term mitigates erroneous actions for sparse samples and unobserved states covered by domain knowledge. Empirical evaluations on standard discrete environment datasets demonstrate a substantial average performance increase compared to ensemble of domain knowledge and existing offline RL algorithms operating on limited data.", "keywords": "Offline Reinforcement Learning;Domain knowledge", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/38daaa896826514b2f6398c26fb8652f041d6393.zip", "author": "Briti Gangopadhyay;Zhao Wang;Jia-Fong Yeh;Shingo Takamatsu", "authorids": "~Briti_Gangopadhyay1;~Zhao_Wang6;~Jia-Fong_Yeh1;~Shingo_Takamatsu2", "gender": "F;M;;M", "homepage": "https://sites.google.com/view/britigangopadhyay/home;https://www.joewang.info;https://www.cmlab.csie.ntu.edu.tw/~jiafongyeh/;https://scholar.google.co.jp/citations?user=oCVG8wQAAAAJ&hl=en", "dblp": "254/6480;86/981-7;198/7831;17/1070.html", "google_scholar": "JSHb52gAAAAJ;;kS-oZ20AAAAJ;https://scholar.google.co.jp/citations?user=oCVG8wQAAAAJ", "orcid": "0000-0002-6488-9326;0000-0002-9169-5391;;0009-0008-1640-2406", "linkedin": "briti-gangopadhyay-a988008a/;;;shingotakamatsu/", "or_profile": "~Briti_Gangopadhyay1;~Zhao_Wang6;~Jia-Fong_Yeh1;~Shingo_Takamatsu2", "aff": "Sony Group Coorporation;Waseda University;Sony Group Corporation;Sony Group Corporation", "aff_domain": "sony.co.jp;waseda.jp;sony.com;sony.com", "position": "Researcher;Researcher;Intern;Researcher", "bibtex": "@misc{\nanonymous2024exid,\ntitle={Ex{ID}: Offline {RL} with Intuitive Expert Insights in Limited-Data Settings},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=fTOw3BzcWs}\n}", "github": "", "project": "", "reviewers": "UWfE;NyjZ;Cy8u", "site": "https://openreview.net/forum?id=fTOw3BzcWs", "pdf_size": 26724066, "rating": "6;6;7", "confidence": "3;4;4", "soundness": "3;3;4", "novelty": "3;2;4", "presentation": "2;2;4", "wc_summary": "33;43;119", "wc_strengths": "58;29;215", "wc_weaknesses": "198;177;107", "wc_questions": "104;36;64", "wc_limitations": "120;9;67", "wc_review": "513;294;572", "wc_reply_reviewers": "70;15;21", "wc_reply_authors": "436;109;39", "reply_reviewers": "2;1;1", "reply_authors": "3;3;2", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 2.6666666666666665, 0.9428090415820634 ], "wc_summary_avg": [ 65.0, 38.40138886377245 ], "wc_strengths_avg": [ 100.66666666666667, 81.70815272800029 ], "wc_weaknesses_avg": [ 160.66666666666666, 38.90444133457716 ], "wc_questions_avg": [ 68.0, 27.9045993819418 ], "wc_limitations_avg": [ 65.33333333333333, 45.330882286680556 ], "wc_review_avg": [ 459.6666666666667, 119.59468587227816 ], "wc_reply_reviewers_avg": [ 35.333333333333336, 24.63511495586917 ], "wc_reply_authors_avg": [ 194.66666666666666, 173.02472527230194 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:kp9jHxdItFoJ:scholar.google.com/&scioq=ExID:+Offline+RL+with+Intuitive+Expert+Insights+in+Limited-Data+Settings&hl=en&as_sdt=0,33", "gs_version_total": 2, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Sony Group Corporation;Waseda University", "aff_unique_dep": ";", "aff_unique_url": "https://www.sony.com;https://www.waseda.jp/top", "aff_unique_abbr": "Sony;Waseda", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Japan" }, { "title": "TrajCLIP: Pedestrian trajectory prediction method using contrastive learning and idempotent networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94204", "id": "fUBFy8tb3z", "proceeding": "", "pdf": "https://openreview.net/pdf?id=fUBFy8tb3z", "openreview": "https://openreview.net/forum?id=fUBFy8tb3z", "poster": "/media/PosterPDFs/NeurIPS%202024/94204.png?t=1733101450.5994444", "project": "", "author_site": "Pengfei Yao, Yinglong Zhu, Huikun Bi, Tianlu Mao, Zhaoqi Wang", "tldr": "", "abstract": "The distribution of pedestrian trajectories is highly complex and influenced by the scene, nearby pedestrians, and subjective intentions. This complexity presents challenges for modeling and generalizing trajectory prediction. Previous methods modeled the feature space of future trajectories based on the high-dimensional feature space of historical trajectories, but this approach is suboptimal because it overlooks the similarity between historical and future trajectories. Our proposed method, TrajCLIP, utilizes contrastive learning and idempotent generative networks to address this issue. By pairing historical and future trajectories and applying contrastive learning on the encoded feature space, we enforce same-space consistency constraints. To manage complex distributions, we use idempotent loss and tightness loss to control over-expansion in the latent space. Additionally, we have developed a trajectory interpolation algorithm and synthetic trajectory data to enhance model capacity and improve generalization. Experimental results on public datasets demonstrate that TrajCLIP achieves state-of-the-art performance and excels in scene-to-scene transfer, few-shot transfer, and online learning tasks.", "keywords": "trajectory prediction;constrative learning", "primary_area": "generative_models", "supplementary_material": "", "author": "Pengfei Yao;Yinglong Zhu;Huikun Bi;Tianlu Mao;Zhaoqi Wang", "authorids": "~Pengfei_Yao1;~Yinglong_Zhu1;~Huikun_Bi1;~Tianlu_Mao1;~Zhaoqi_Wang2", "gender": "M;M;F;F;M", "homepage": ";https://www.zhihu.com/people/shi-mo-zhi-mao;;http://sourcedb.ict.cas.cn/cn/jssrck/200910/t20091030_2643412.html;https://people.ucas.edu.cn/~zqwang", "dblp": ";;;;", "google_scholar": "v4E9uF4AAAAJ;;Nzqvb7kAAAAJ;;", "orcid": "0000-0002-9399-2488;;;0000-0003-2537-9873;", "linkedin": ";;;;", "or_profile": "~Pengfei_Yao1;~Yinglong_Zhu1;~Huikun_Bi1;~Tianlu_Mao1;~Zhaoqi_Wang2", "aff": "Institute of Computing Technology, CAS;Beijing University of Posts and Telecommunications;;, Chinese Academy of Sciences;, Chinese Academy of Sciences", "aff_domain": "ict.cas.cn;bupt.edu.cn;;ict.ac.cn;ict.ac.cn", "position": "Intern;MS student;;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nyao2024trajclip,\ntitle={Traj{CLIP}: Pedestrian trajectory prediction method using contrastive learning and idempotent networks},\nauthor={Pengfei Yao and Yinglong Zhu and Huikun Bi and Tianlu Mao and Zhaoqi Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=fUBFy8tb3z}\n}", "github": "", "reviewers": "NFeW;Xn2A;iNsW;AYPL", "pdf_size": 16320807, "rating": "5;5;6;6", "confidence": "4;3;4;3", "soundness": "2;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "60;42;136;88", "wc_strengths": "30;81;79;45", "wc_weaknesses": "110;39;112;188", "wc_questions": "125;47;84;141", "wc_limitations": "13;19;16;5", "wc_review": "338;228;427;467", "wc_reply_reviewers": "0;56;30;22", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 81.5, 35.47886694921358 ], "wc_strengths_avg": [ 58.75, 21.9131809648896 ], "wc_weaknesses_avg": [ 112.25, 52.69902750525858 ], "wc_questions_avg": [ 99.25, 36.635877224382114 ], "wc_limitations_avg": [ 13.25, 5.2141634036535525 ], "wc_review_avg": [ 365.0, 91.85042188253682 ], "wc_reply_reviewers_avg": [ 27.0, 20.024984394500787 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:TO8SRLgcPbAJ:scholar.google.com/&scioq=TrajCLIP:+Pedestrian+trajectory+prediction+method+using+contrastive+learning+and+idempotent+networks&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "ict.cas.cn;bupt.edu.cn;;ict.ac.cn;ict.ac.cn", "author_num": 5, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Chinese Academy of Sciences;Beijing University of Posts and Telecommunications", "aff_unique_dep": "Institute of Computing Technology;", "aff_unique_url": "http://www.ict.ac.cn;http://www.bupt.edu.cn/", "aff_unique_abbr": "CAS;BUPT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Beijing", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "PrefPaint: Aligning Image Inpainting Diffusion Model with Human Preference", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94203", "id": "fVRCsK4EoM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=fVRCsK4EoM", "openreview": "https://openreview.net/forum?id=fVRCsK4EoM", "poster": "/media/PosterPDFs/NeurIPS%202024/94203.png?t=1733731948.8747785", "project": "", "author_site": "Kendong Liu, Zhiyu Zhu, Chuanhao Li, Hui LIU, Huanqiang Zeng, Junhui Hou", "tldr": "", "abstract": "In this paper, we make the first attempt to align diffusion models for image inpainting with human aesthetic standards via a reinforcement learning framework, significantly improving the quality and visual appeal of inpainted images. Specifically, instead of directly measuring the divergence with paired images, we train a reward model with the dataset we construct, consisting of nearly 51,000 images annotated with human preferences. Then, we adopt a reinforcement learning process to fine-tune the distribution of a pre-trained diffusion model for image inpainting in the direction of higher reward. Moreover, we theoretically deduce the upper bound on the error of the reward model, which illustrates the potential confidence of reward estimation throughout the reinforcement alignment process, thereby facilitating accurate regularization.\nExtensive experiments on inpainting comparison and downstream tasks, such as image extension and 3D reconstruction, demonstrate the effectiveness of our approach, showing significant improvements in the alignment of inpainted images with human preference compared with state-of-the-art methods. This research not only advances the field of image inpainting but also provides a framework for incorporating human preference into the iterative refinement of generative models based on modeling reward accuracy, with broad implications for the design of visually driven AI applications. Our code and dataset are publicly available at \\url{https://prefpaint.github.io}.", "keywords": "diffusion model;image inpainting;human feedback reinforcement learning", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/6f857bd4c057c73730e6e8a0064b5570eab7b301.zip", "author": "Kendong Liu;Zhiyu Zhu;Chuanhao Li;Hui LIU;Huanqiang Zeng;Junhui Hou", "authorids": "~Kendong_Liu1;~Zhiyu_Zhu1;~Chuanhao_Li1;~Hui_LIU14;~Huanqiang_Zeng1;~Junhui_Hou2", "gender": "F;M;;F;M;M", "homepage": ";;https://cyrilli.github.io/;https://heidiliu.github.io/;http://smartviplab.org/members/hqzeng-en.html;http://www.cityu.edu.hk/stfprofile/csjhhou.htm", "dblp": ";;195/9947;;25/8798;122/2673.html", "google_scholar": "Uh2I5RwAAAAJ;d1L0KkoAAAAJ;w2ShljkAAAAJ;U66txqwAAAAJ;-QvoSUkAAAAJ;j6eefhwAAAAJ", "orcid": ";0000-0002-0726-4522;;;;0000-0003-3431-2021", "linkedin": ";;;;;", "or_profile": "~Kendong_Liu1;~Zhiyu_Zhu1;~Chuanhao_Li1;~Hui_LIU14;~Huanqiang_Zeng1;~Junhui_Hou2", "aff": "City University;City University of Hong Kong;Yale University;Saint Francis University;Huaqiao University, China;City University of Hong Kong", "aff_domain": "cityu.edu;cityu.edu.hk;yale.edu;sfu.edu.hk;hqu.edu.cn;cityu.edu.hk", "position": "PhD student;Postdoc;Postdoc;Assistant Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nliu2024prefpaint,\ntitle={PrefPaint: Aligning Image Inpainting Diffusion Model with Human Preference},\nauthor={Kendong Liu and Zhiyu Zhu and Chuanhao Li and Hui LIU and Huanqiang Zeng and Junhui Hou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=fVRCsK4EoM}\n}", "github": "", "reviewers": "G8Cx;85gD;Hftn;yfx6", "pdf_size": 51101980, "rating": "5;5;6;6", "confidence": "3;4;4;3", "soundness": "3;2;3;3", "novelty": "2;2;3;3", "presentation": "2;3;2;4", "wc_summary": "19;55;62;76", "wc_strengths": "72;48;67;56", "wc_weaknesses": "82;122;234;13", "wc_questions": "2;2;5;138", "wc_limitations": "9;23;10;1", "wc_review": "184;250;378;284", "wc_reply_reviewers": "0;29;41;8", "wc_reply_authors": "59;88;69;69", "reply_reviewers": "0;1;1;1", "reply_authors": "2;3;3;3", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 53.0, 21.03568396796263 ], "wc_strengths_avg": [ 60.75, 9.364160400164021 ], "wc_weaknesses_avg": [ 112.75, 80.12919255801846 ], "wc_questions_avg": [ 36.75, 58.46954335378377 ], "wc_limitations_avg": [ 10.75, 7.8859051477937525 ], "wc_review_avg": [ 274.0, 69.98571282769076 ], "wc_reply_reviewers_avg": [ 19.5, 16.31716887208072 ], "wc_reply_authors_avg": [ 71.25, 10.497023387608508 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5235769747471135281&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "cityu.edu;cityu.edu.hk;yale.edu;sfu.edu.hk;hqu.edu.cn;cityu.edu.hk", "author_num": 6, "aff_unique_index": "0;1;2;3;4;1", "aff_unique_norm": "City University;City University of Hong Kong;Yale University;Saint Francis University;Huaqiao University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.cityuniversity.edu;https://www.cityu.edu.hk;https://www.yale.edu;https://www.saintfrancis.edu;https://www.hqu.edu.cn", "aff_unique_abbr": "CityU;CityU;Yale;SFU;HQU", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;0;0;1;1", "aff_country_unique": "United States;China" }, { "title": "Pretrained Optimization Model for Zero-Shot Black Box Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94202", "id": "fWQhXdeuSG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=fWQhXdeuSG", "openreview": "https://openreview.net/forum?id=fWQhXdeuSG", "poster": "", "project": "", "author_site": "Xiaobin Li, Kai Wu, yujian li, Xiaoyu Zhang, Handing Wang, Jing Liu", "tldr": "", "abstract": "Zero-shot optimization involves optimizing a target task that was not seen during training, aiming to provide the optimal solution without or with minimal adjustments to the optimizer. It is crucial to ensure reliable and robust performance in various applications. Current optimizers often struggle with zero-shot optimization and require intricate hyperparameter tuning to adapt to new tasks. To address this, we propose a Pretrained Optimization Model (POM) that leverages knowledge gained from optimizing diverse tasks, offering efficient solutions to zero-shot optimization through direct application or fine-tuning with few-shot samples. Evaluation on the BBOB benchmark and two robot control tasks demonstrates that POM outperforms state-of-the-art black-box optimization methods, especially for high-dimensional tasks. Fine-tuning POM with a small number of samples and budget yields significant performance improvements. Moreover, POM demonstrates robust generalization across diverse task distributions, dimensions, population sizes, and optimization horizons. For code implementation, see https://github.com/ninja-wm/POM/.", "keywords": "Pretrained Optimization Models;Black-box Optimization;Zero-shot Optimization", "primary_area": "optimization", "supplementary_material": "", "author": "Xiaobin Li;Kai Wu;Yujian Betterest Li;Xiaoyu Zhang;Handing Wang;Jing Liu", "authorids": "~Xiaobin_Li1;~Kai_Wu3;~Yujian_Betterest_Li1;~Xiaoyu_Zhang6;~Handing_Wang1;~Jing_Liu20", "gender": "M;;;F;F;F", "homepage": ";;;https://scholar.google.com.hk/citations?user=XtfE1f0AAAAJ&hl=zh-CN;;https://faculty.xidian.edu.cn/LJ22/zh_CN/index.htm", "dblp": "91/1888;;;12/5927-10.html;125/6067;72/2590-6", "google_scholar": ";;;https://scholar.google.com.hk/citations?user=XtfE1f0AAAAJ;https://scholar.google.com.hk/citations?user=JtYr_7cAAAAJ;kqRxf3MAAAAJ", "orcid": ";;;;0000-0002-4805-3780;0000-0002-6834-5350", "linkedin": ";;;;;", "or_profile": "~Xiaobin_Li1;~Kai_Wu3;~Yujian_Betterest_Li1;~Xiaoyu_Zhang6;~Handing_Wang1;~Jing_Liu20", "aff": "Xidian University;;;Xidian University;Xidian University ;Xidian University, China", "aff_domain": "xidian.edu.cn;;;xidian.edu.cn;xidian.edu.cn;mail.xidian.edu.cn", "position": "MS student;;;Associate Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nli2024pretrained,\ntitle={Pretrained Optimization Model for Zero-Shot Black Box Optimization},\nauthor={Xiaobin Li and Kai Wu and Yujian Betterest Li and Xiaoyu Zhang and Handing Wang and Jing Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=fWQhXdeuSG}\n}", "github": "", "reviewers": "eUpS;G2ar;Se7V", "pdf_size": 3215391, "rating": "5;6;7", "confidence": "1;4;5", "soundness": "2;3;3", "novelty": "2;3;3", "presentation": "1;2;3", "wc_summary": "97;36;64", "wc_strengths": "42;24;88", "wc_weaknesses": "79;355;179", "wc_questions": "24;84;85", "wc_limitations": "11;2;6", "wc_review": "253;501;422", "wc_reply_reviewers": "0;299;96", "wc_reply_authors": "58;1684;432", "reply_reviewers": "0;2;2", "reply_authors": "2;6;3", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.3333333333333335, 1.699673171197595 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.0, 0.816496580927726 ], "wc_summary_avg": [ 65.66666666666667, 24.931015935086872 ], "wc_strengths_avg": [ 51.333333333333336, 26.948510575210317 ], "wc_weaknesses_avg": [ 204.33333333333334, 114.0915811481675 ], "wc_questions_avg": [ 64.33333333333333, 28.522895287041873 ], "wc_limitations_avg": [ 6.333333333333333, 3.6817870057290873 ], "wc_review_avg": [ 392.0, 103.44402673265705 ], "wc_reply_reviewers_avg": [ 131.66666666666666, 124.64438303518624 ], "wc_reply_authors_avg": [ 724.6666666666666, 695.3221475610344 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.9428090415820634 ], "reply_authors_avg": [ 3.6666666666666665, 1.699673171197595 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.9607689228305226, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8458001346648878415&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "xidian.edu.cn;;;xidian.edu.cn;xidian.edu.cn;mail.xidian.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Xidian University", "aff_unique_dep": "", "aff_unique_url": "http://www.xidian.edu.cn/", "aff_unique_abbr": "Xidian", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "DeepStack: Deeply Stacking Visual Tokens is Surprisingly Simple and Effective for LMMs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94201", "id": "fXDpDzHTDV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=fXDpDzHTDV", "openreview": "https://openreview.net/forum?id=fXDpDzHTDV", "poster": "/media/PosterPDFs/NeurIPS%202024/94201.png?t=1731741580.572668", "project": "", "author_site": "Lingchen Meng, Jianwei Yang, Rui Tian, Xiyang Dai, Zuxuan Wu, Jianfeng Gao, Yu-Gang Jiang", "tldr": "", "abstract": "Most large multimodal models (LMMs) are implemented by feeding visual tokens as a sequence into the first layer of a large language model (LLM). \nThe resulting architecture is simple but significantly increases computation and memory costs, as it has to handle a large number of additional tokens in its input layer. \nThis paper presents a new architecture *DeepStack* for LMMs. \nConsidering $N$ layers in the language and vision transformer of LMMs, we stack the visual tokens into $N$ groups and feed each group to its aligned transformer layer from bottom to top. Surprisingly, this simple method greatly enhances the power of LMMs to model interactions among visual tokens across layers but with minimal additional cost. We apply *DeepStack* to both language and vision transformer in LMMs, and \nvalidate the effectiveness of *DeepStack* LMMs with extensive empirical results. Using the same context length, our DeepStack 7B and 13B parameters surpass their counterparts by 2.7 and 2.9 on average across 9 benchmarks, respectively. Using only one-fifth of the context length, DeepStack rivals closely to the counterparts that use the full context length. These gains are particularly pronounced on high-resolution tasks, *e.g.*, 4.2, 11.0, and 4.0 improvements on TextVQA, DocVQA, and InfoVQA compared to LLaVA-1.5-7B, respectively. We further apply *DeepStack* to vision transformer layers, which brings us a similar amount of improvements, 3.8 on average compared with LLaVA-1.5-7B.", "keywords": "Large Multi-modal Models;High-resolution Visual Token", "primary_area": "machine_vision", "supplementary_material": "/attachment/67060167bbfc60f0415e6e58cc857a1e25c888b6.zip", "author": "Lingchen Meng;Jianwei Yang;Rui Tian;Xiyang Dai;Zuxuan Wu;Jianfeng Gao;Yu-Gang Jiang", "authorids": "~Lingchen_Meng1;~Jianwei_Yang1;~Rui_Tian2;~Xiyang_Dai4;~Zuxuan_Wu1;~Jianfeng_Gao1;~Yu-Gang_Jiang1", "gender": "M;F;M;M;M;M;M", "homepage": "https://menglcool.github.io;https://github.com/ruitian12;https://zxwu.azurewebsites.net/;https://www.microsoft.com/en-us/research/people/jfgao/;https://fvl.fudan.edu.cn/people/yugangjiang/;https://jwyang.github.io/;https://sites.google.com/site/xiyangdai/", "dblp": "300/8453;;150/8447;92/5339;24/5818;;176/5470", "google_scholar": "https://scholar.google.com/citations?hl=en;zTI-OFoAAAAJ;7t12hVkAAAAJ;https://scholar.google.com/citations?hl=en;f3_FP8AAAAAJ;Cl9byD8AAAAJ;QC8RwcoAAAAJ", "orcid": ";;;;;;", "linkedin": ";;;;;;", "or_profile": "~Lingchen_Meng1;~Rui_Tian2;~Zuxuan_Wu1;~Jianfeng_Gao1;~Yu-Gang_Jiang1;~Jianwei_Yang2;~Xiyang_Dai2", "aff": "Fudan University;Microsoft;Fudan University;Microsoft Research;Fudan University;Microsoft;Microsoft", "aff_domain": "fudan.edu.cn;microsoft.com;fudan.edu;microsoft.com;fudan.edu.cn;microsoft.com;microsoft.com", "position": "PhD student;Intern;Associate Professor;Principal Researcher;Full Professor;Researcher;Researcher", "bibtex": "@inproceedings{\nmeng2024deepstack,\ntitle={DeepStack: Deeply Stacking Visual Tokens is Surprisingly Simple and Effective for {LMM}s},\nauthor={Lingchen Meng and Jianwei Yang and Rui Tian and Xiyang Dai and Zuxuan Wu and Jianfeng Gao and Yu-Gang Jiang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=fXDpDzHTDV}\n}", "github": "", "reviewers": "REjR;SQxC;QJM9", "pdf_size": 1809072, "rating": "6;7;7", "confidence": "3;5;4", "soundness": "2;4;3", "novelty": "3;4;3", "presentation": "2;2;4", "wc_summary": "67;36;53", "wc_strengths": "103;32;26", "wc_weaknesses": "88;62;20", "wc_questions": "63;22;1", "wc_limitations": "176;24;1", "wc_review": "497;176;101", "wc_reply_reviewers": "31;14;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;0", "reply_authors": "1;1;1", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.9428090415820634 ], "wc_summary_avg": [ 52.0, 12.675435561221029 ], "wc_strengths_avg": [ 53.666666666666664, 34.96982826507572 ], "wc_weaknesses_avg": [ 56.666666666666664, 28.015868519267595 ], "wc_questions_avg": [ 28.666666666666668, 25.74662868977002 ], "wc_limitations_avg": [ 67.0, 77.64448896519744 ], "wc_review_avg": [ 258.0, 171.74981804939415 ], "wc_reply_reviewers_avg": [ 15.0, 12.675435561221029 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8051497452900656735&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "fudan.edu.cn;microsoft.com;fudan.edu;microsoft.com;fudan.edu.cn;microsoft.com;microsoft.com", "author_num": 7, "aff_unique_index": "0;1;0;1;0;1;1", "aff_unique_norm": "Fudan University;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.fudan.edu.cn;https://www.microsoft.com", "aff_unique_abbr": "Fudan;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1;0;1;1", "aff_country_unique": "China;United States" }, { "title": "Referring Human Pose and Mask Estimation In the Wild", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94200", "id": "fXEi3LVflp", "proceeding": "", "pdf": "https://openreview.net/pdf?id=fXEi3LVflp", "openreview": "https://openreview.net/forum?id=fXEi3LVflp", "poster": "/media/PosterPDFs/NeurIPS%202024/94200.png?t=1730294730.1306872", "project": "", "author_site": "Bo Miao, Mingtao Feng, Zijie Wu, Mohammed Bennamoun, Yongsheng Gao, Ajmal Mian", "tldr": "", "abstract": "We introduce Referring Human Pose and Mask Estimation (R-HPM) in the wild, where either a text or positional prompt specifies the person of interest in an image. This new task holds significant potential for human-centric applications such as assistive robotics and sports analysis. In contrast to previous works, R-HPM (i) ensures high-quality, identity-aware results corresponding to the referred person, and (ii) simultaneously predicts human pose and mask for a comprehensive representation. To achieve this, we introduce a large-scale dataset named RefHuman, which substantially extends the MS COCO dataset with additional text and positional prompt annotations. RefHuman includes over 50,000 annotated instances in the wild, each equipped with keypoint, mask, and prompt annotations. To enable prompt-conditioned estimation, we propose the first end-to-end promptable approach named UniPHD for R-HPM. UniPHD extracts multimodal representations and employs a proposed pose-centric hierarchical decoder to process (text or positional) instance queries and keypoint queries, producing results specific to the referred person. Extensive experiments demonstrate that UniPHD produces quality results based on user-friendly prompts and achieves top-tier performance on RefHuman val and MS COCO val2017.", "keywords": "Referring Human Pose and Mask Estimation;Object Segmentation;Human-AI Interaction;Vision-and-Language;Multimodal Reasoning", "primary_area": "machine_vision", "supplementary_material": "", "author": "Bo Miao;Mingtao Feng;Zijie Wu;Mohammed Bennamoun;Yongsheng Gao;Ajmal Saeed Mian", "authorids": "~Bo_Miao1;~Mingtao_Feng2;~Zijie_Wu1;~Mohammed_Bennamoun1;~Yongsheng_Gao1;~Ajmal_Saeed_Mian1", "gender": "M;M;M;M;;M", "homepage": ";https://scholar.google.com/citations?user=0JTE5YAAAAAJ&hl=zh-CN;https://scholar.google.com/citations?user=AFsLiBcAAAAJ&hl=zh-CN;https://research-repository.uwa.edu.au/en/persons/mohammed-bennamoun;;https://ajmalsaeed.net/", "dblp": "242/5030;184/6596;243/9714;00/3214.html;;63/807", "google_scholar": "AJnEdGoAAAAJ;0JTE5YAAAAAJ;AFsLiBcAAAAJ;https://scholar.google.com.au/citations?user=ylX5MEAAAAAJ;;X589yaIAAAAJ", "orcid": "0000-0002-3025-4429;;0000-0002-0675-6525;0000-0002-6603-3257;;0000-0002-5206-3842", "linkedin": ";;;mohammed-bennamoun-b3147174/;;", "or_profile": "~Bo_Miao1;~Mingtao_Feng2;~Zijie_Wu1;~Mohammed_Bennamoun1;~Yongsheng_Gao1;~Ajmal_Saeed_Mian1", "aff": "University of Western Australia;Xidian University;Hunan University;University of Western Australia;;University of Western Australia", "aff_domain": "uwa.edu.au;xidian.edu.cn;hnu.edu.cn;uwa.edu.au;;uwa.edu.au", "position": "PhD student;Associate Professor;PhD student;Full Professor;;Professor", "bibtex": "@inproceedings{\nmiao2024referring,\ntitle={Referring Human Pose and Mask Estimation In the Wild},\nauthor={Bo Miao and Mingtao Feng and Zijie Wu and Mohammed Bennamoun and Yongsheng Gao and Ajmal Saeed Mian},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=fXEi3LVflp}\n}", "github": "", "reviewers": "TbW6;DJJG;apkP", "pdf_size": 3104089, "rating": "5;6;6", "confidence": "4;5;4", "soundness": "3;3;3", "novelty": "3;3;4", "presentation": "3;3;3", "wc_summary": "53;65;99", "wc_strengths": "32;67;60", "wc_weaknesses": "169;253;132", "wc_questions": "3;29;19", "wc_limitations": "1;7;9", "wc_review": "258;421;319", "wc_reply_reviewers": "31;0;39", "wc_reply_authors": "89;48;88", "reply_reviewers": "1;0;1", "reply_authors": "3;2;3", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 72.33333333333333, 19.48218559493661 ], "wc_strengths_avg": [ 53.0, 15.121728296285006 ], "wc_weaknesses_avg": [ 184.66666666666666, 50.62498285322069 ], "wc_questions_avg": [ 17.0, 10.708252269472673 ], "wc_limitations_avg": [ 5.666666666666667, 3.39934634239519 ], "wc_review_avg": [ 332.6666666666667, 67.24251300743865 ], "wc_reply_reviewers_avg": [ 23.333333333333332, 16.81930108205715 ], "wc_reply_authors_avg": [ 75.0, 19.096247449870006 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3185304835915881695&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "uwa.edu.au;xidian.edu.cn;hnu.edu.cn;uwa.edu.au;;uwa.edu.au", "author_num": 6, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "University of Western Australia;Xidian University;Hunan University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uwa.edu.au;http://www.xidian.edu.cn/;http://www.hunu.edu.cn/", "aff_unique_abbr": "UWA;Xidian;HNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0;0", "aff_country_unique": "Australia;China" }, { "title": "MatFormer: Nested Transformer for Elastic Inference", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94199", "id": "fYa6ezMxD5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=fYa6ezMxD5", "openreview": "https://openreview.net/forum?id=fYa6ezMxD5", "poster": "", "project": "", "author_site": "Fnu Devvrit, Sneha Kudugunta, Aditya Kusupati, Tim Dettmers, Kaifeng Chen, Inderjit Dhillon, Yulia Tsvetkov, Hanna Hajishirzi, Sham Kakade, Ali Farhadi, Prateek Jain", "tldr": "", "abstract": "Foundation models are applied in a broad spectrum of settings with different inference constraints, from massive multi-accelerator clusters to resource-constrained standalone mobile devices. However, the substantial costs associated with training these models often limit the number of unique model sizes that can be offered. Consequently, practitioners are compelled to select a model that may not be optimally aligned with their specific latency and cost requirements. We present MatFormer, a novel Transformer architecture designed to provide elastic inference across diverse deployment constraints. MatFormer achieves this by incorporating a nested Feed Forward Network (FFN) block structure within a standard Transformer model. During training, we optimize the parameters of multiple nested FFN blocks with varying sizes, enabling the extraction of hundreds of accurate smaller models without incurring additional computational costs. We empirically validate the efficacy of MatFormer across different model classes (decoders and encoders) and modalities (language and vision), demonstrating its potential for real-world deployment. We show that a 850M decoder-only MatFormer language model (MatLM) allows us to extract multiple smaller models spanning from 582M to 850M parameters, each exhibiting better validation loss and one-shot downstream evaluations than independently trained counterparts. Furthermore, we observe that smaller encoders extracted from a universal MatFormer-based ViT (MatViT) encoder preserve the metric-space structure for adaptive large-scale retrieval. Finally, we showcase that speculative decoding with the accurate and consistent submodels extracted from MatFormer can lead to significant reduction in inference latency.", "keywords": "Transformer;Large-scale deployment;Efficiency", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Fnu Devvrit;Sneha Kudugunta;Aditya Kusupati;Tim Dettmers;Kaifeng Chen;Inderjit S Dhillon;Yulia Tsvetkov;Hannaneh Hajishirzi;Sham M. Kakade;Ali Farhadi;Prateek Jain", "authorids": "~Fnu_Devvrit1;~Sneha_Kudugunta1;~Aditya_Kusupati1;~Tim_Dettmers2;~Kaifeng_Chen2;~Inderjit_S_Dhillon1;~Yulia_Tsvetkov1;~Hannaneh_Hajishirzi1;~Sham_M._Kakade1;~Ali_Farhadi3;~Prateek_Jain1", "gender": "M;F;M;M;M;M;F;F;M;M;M", "homepage": ";;http://www.adityakusupati.com/;https://timdettmers.com/;;http://www.cs.utexas.edu/users/inderjit/;https://homes.cs.washington.edu/~yuliats/;https://homes.cs.washington.edu/~hannaneh/;https://shamulent.github.io;https://homes.cs.washington.edu/~ali/;http://prateekjain.org", "dblp": ";;231/7662;172/1045;186/7404;d/InderjitSDhillon;75/8157;52/1296;s/SMKakade;37/5826;https://dblp.uni-trier.de/pers/j/Jain_0002:Prateek.html", "google_scholar": "c86HtPoAAAAJ;LeEwxtgAAAAJ;https://scholar.google.co.in/citations?user=qULx8g8AAAAJ;lHI3w5kAAAAJ;xjEcoNQAAAAJ;xBv5ZfkAAAAJ;SEDPkrsAAAAJ;LOV6_WIAAAAJ;https://scholar.google.com.tw/citations?user=wb-DKCIAAAAJ;jeOFRDsAAAAJ;qYhRbJoAAAAJ", "orcid": ";;0000-0001-8455-1851;;;;0000-0002-4634-7128;;;;", "linkedin": "devvrit/;;adityakusupati/;;kaifeng-chen-b37a2b69/;inderjit-dhillon-a20888b0/;;;;;", "or_profile": "~Fnu_Devvrit1;~Sneha_Kudugunta1;~Aditya_Kusupati1;~Tim_Dettmers2;~Kaifeng_Chen2;~Inderjit_S_Dhillon1;~Yulia_Tsvetkov1;~Hannaneh_Hajishirzi1;~Sham_M._Kakade1;~Ali_Farhadi3;~Prateek_Jain1", "aff": ", University of Texas at Austin;Google DeepMind;Department of Computer Science, University of Washington;University of Washington;Google;University of Texas, Austin;Department of Computer Science, University of Washington;University of Washington;Harvard University;University of Washington;Google", "aff_domain": "cs.utexas.edu;google.com;cs.washington.edu;cs.washington.edu;google.com;utexas.edu;cs.washington.edu;uw.edu;harvard.edu;cs.uw.edu;google.com", "position": "PhD student;Researcher;PhD student;PhD student;Researcher;Full Professor;Associate Professor;Associate Professor;Full Professor;Full Professor;Researcher", "bibtex": "@inproceedings{\ndevvrit2024matformer,\ntitle={MatFormer: Nested Transformer for Elastic Inference},\nauthor={Fnu Devvrit and Sneha Kudugunta and Aditya Kusupati and Tim Dettmers and Kaifeng Chen and Inderjit S Dhillon and Yulia Tsvetkov and Hannaneh Hajishirzi and Sham M. Kakade and Ali Farhadi and Prateek Jain},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=fYa6ezMxD5}\n}", "github": "", "reviewers": "6C9L;51tp;vEVM;zcbm", "pdf_size": 949617, "rating": "5;6;7;9", "confidence": "4;3;4;4", "soundness": "3;3;3;4", "novelty": "3;3;2;4", "presentation": "3;3;3;4", "wc_summary": "79;90;167;63", "wc_strengths": "87;96;178;45", "wc_weaknesses": "105;135;340;12", "wc_questions": "657;1;355;29", "wc_limitations": "21;1;17;4", "wc_review": "949;323;1057;153", "wc_reply_reviewers": "288;18;45;33", "wc_reply_authors": "360;0;0;19", "reply_reviewers": "2;1;1;1", "reply_authors": "2;1;1;2", "rating_avg": [ 6.75, 1.479019945774904 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 99.75, 39.99609355924651 ], "wc_strengths_avg": [ 101.5, 48.17935242404157 ], "wc_weaknesses_avg": [ 148.0, 119.76852675056165 ], "wc_questions_avg": [ 260.5, 267.8969018111258 ], "wc_limitations_avg": [ 10.75, 8.437268515343103 ], "wc_review_avg": [ 620.5, 389.0716514987953 ], "wc_reply_reviewers_avg": [ 96.0, 111.26320146391618 ], "wc_reply_authors_avg": [ 94.75, 153.33847364572273 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.29277002188455997, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15483101880052662391&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "cs.utexas.edu;google.com;cs.washington.edu;cs.washington.edu;google.com;utexas.edu;cs.washington.edu;uw.edu;harvard.edu;cs.uw.edu;google.com", "author_num": 11, "aff_unique_index": "0;1;2;2;1;0;2;2;3;2;1", "aff_unique_norm": "University of Texas at Austin;Google;University of Washington;Harvard University", "aff_unique_dep": ";Google DeepMind;Department of Computer Science;", "aff_unique_url": "https://www.utexas.edu;https://deepmind.com;https://www.washington.edu;https://www.harvard.edu", "aff_unique_abbr": "UT Austin;DeepMind;UW;Harvard", "aff_campus_unique_index": "0;2;3;0;2;3", "aff_campus_unique": "Austin;;Seattle;Mountain View", "aff_country_unique_index": "0;1;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Learning predictable and robust neural representations by straightening image sequences", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94198", "id": "fYfliutfHX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=fYfliutfHX", "openreview": "https://openreview.net/forum?id=fYfliutfHX", "poster": "/media/PosterPDFs/NeurIPS%202024/94198.png?t=1733768142.835843", "project": "", "author_site": "Julie Xueyan Niu, Cristina Savin, Eero Simoncelli", "tldr": "", "abstract": "Prediction is a fundamental capability of all living organisms, and has been proposed as an objective for learning sensory representations. Recent work demonstrates that in primate visual systems, prediction is facilitated by neural representations that follow straighter temporal trajectories than their initial photoreceptor encoding, which allows for prediction by linear extrapolation. Inspired by these experimental findings, we develop a self-supervised learning (SSL) objective that explicitly quantifies and promotes straightening. We demonstrate the power of this objective in training deep feedforward neural networks on smoothly-rendered synthetic image sequences that mimic commonly-occurring properties of natural videos. The learned model contains neural embeddings that are predictive, but also factorize the geometric, photometric, and semantic attributes of objects. The representations also prove more robust to noise and adversarial attacks compared to previous SSL methods that optimize for invariance to random augmentations. Moreover, these beneficial properties can be transferred to other training procedures by using the straightening objective as a regularizer, suggesting a broader utility for straightening as a principle for robust unsupervised learning.", "keywords": "straightening; prediction; self-supervised learning; robustness", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "", "author": "Xueyan Niu;Cristina Savin;Eero P Simoncelli", "authorids": "~Xueyan_Niu2;~Cristina_Savin1;~Eero_P_Simoncelli1", "gender": ";F;M", "homepage": ";http://csavin.wixsite.com/savinlab;https://www.cns.nyu.edu/~eero/", "dblp": ";23/10829;30/5604", "google_scholar": "_D9pHloAAAAJ;muNtwgcAAAAJ;MplR7_cAAAAJ", "orcid": ";0000-0002-3414-8244;0000-0002-1206-527X", "linkedin": ";cristina-savin-1889199b/;eero-simoncelli-445782123", "or_profile": "~Xueyan_Niu2;~Cristina_Savin1;~Eero_Peter_Simoncelli1", "aff": "New York University;New York University;New York University", "aff_domain": "nyu.edu;nyu.edu;nyu.edu", "position": "PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nniu2024learning,\ntitle={Learning predictable and robust neural representations by straightening image sequences},\nauthor={Xueyan Niu and Cristina Savin and Eero P Simoncelli},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=fYfliutfHX}\n}", "github": "", "reviewers": "jxcp;Z5Vh;etrK", "pdf_size": 8259701, "rating": "6;7;7", "confidence": "4;4;4", "soundness": "2;4;3", "novelty": "3;2;3", "presentation": "3;4;4", "wc_summary": "71;113;118", "wc_strengths": "59;232;75", "wc_weaknesses": "151;340;160", "wc_questions": "6;118;63", "wc_limitations": "120;25;70", "wc_review": "407;828;486", "wc_reply_reviewers": "53;92;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;0", "reply_authors": "1;1;1", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 100.66666666666667, 21.076579946049648 ], "wc_strengths_avg": [ 122.0, 78.05553578489271 ], "wc_weaknesses_avg": [ 217.0, 87.05170877128145 ], "wc_questions_avg": [ 62.333333333333336, 45.72623851673008 ], "wc_limitations_avg": [ 71.66666666666667, 38.80148908940939 ], "wc_review_avg": [ 573.6666666666666, 182.70984891047578 ], "wc_reply_reviewers_avg": [ 48.333333333333336, 37.70352179247392 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5720684179840094149&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "nyu.edu;nyu.edu;nyu.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "New York University", "aff_unique_dep": "", "aff_unique_url": "https://www.nyu.edu", "aff_unique_abbr": "NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Higher-Rank Irreducible Cartesian Tensors for Equivariant Message Passing", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94197", "id": "faBXeVBNqz", "proceeding": "", "pdf": "https://openreview.net/pdf?id=faBXeVBNqz", "openreview": "https://openreview.net/forum?id=faBXeVBNqz", "poster": "/media/PosterPDFs/NeurIPS%202024/94197.png?t=1733492037.047673", "project": "", "author_site": "Viktor Zaverkin, Francesco Alesiani, Takashi Maruyama, Federico Errica, Henrik Christiansen, Makoto Takamoto, Nicolas Weber, Mathias Niepert", "tldr": "", "abstract": "The ability to perform fast and accurate atomistic simulations is crucial for advancing the chemical sciences. By learning from high-quality data, machine-learned interatomic potentials achieve accuracy on par with ab initio and first-principles methods at a fraction of their computational cost. The success of machine-learned interatomic potentials arises from integrating inductive biases such as equivariance to group actions on an atomic system, e.g., equivariance to rotations and reflections. In particular, the field has notably advanced with the emergence of equivariant message passing. Most of these models represent an atomic system using spherical tensors, tensor products of which require complicated numerical coefficients and can be computationally demanding. Cartesian tensors offer a promising alternative, though state-of-the-art methods lack flexibility in message-passing mechanisms, restricting their architectures and expressive power. This work explores higher-rank irreducible Cartesian tensors to address these limitations. We integrate irreducible Cartesian tensor products into message-passing neural networks and prove the equivariance and traceless property of the resulting layers. Through empirical evaluations on various benchmark data sets, we consistently observe on-par or better performance than that of state-of-the-art spherical and Cartesian models.", "keywords": "equivariance;graph neural networks;interatomic potentials;irreducible Cartesian tensors;many-body interactions;molecules;materials", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "/attachment/f300a1017bf6f9a3e732120205ec14006d3a9c2f.zip", "author": "Viktor Zaverkin;Francesco Alesiani;Takashi Maruyama;Federico Errica;Henrik Christiansen;Makoto Takamoto;Nicolas Weber;Mathias Niepert", "authorids": "~Viktor_Zaverkin1;~Francesco_Alesiani1;~Takashi_Maruyama2;~Federico_Errica1;~Henrik_Christiansen1;~Makoto_Takamoto1;~Nicolas_Weber1;~Mathias_Niepert1", "gender": "M;;M;M;M;M;M;M", "homepage": "https://viktorzaverkin.github.io/;https://falesiani.github.io/;https://sites.google.com/view/tmaruyama/home;http://pages.di.unipi.it/errica/;https://henrik-christiansen.de/;https://www.neclab.eu/;;http://www.matlog.net", "dblp": ";122/8256;15/1541;203/9424;;;31/515;n/MathiasNiepert", "google_scholar": "OTOXTzgAAAAJ;0puEQdgAAAAJ;;https://scholar.google.co.uk/citations?user=VJ0n2gQAAAAJ;https://scholar.google.de/citations?user=ARDarRoAAAAJ;;;https://scholar.google.de/citations?user=p5vLzq0AAAAJ", "orcid": "0000-0001-9940-8548;0000-0003-4413-7247;;0000-0001-5181-2904;0000-0003-1718-6334;;;", "linkedin": ";francesco-alesiani-2b48b74;;https://it.linkedin.com/in/federicoerrica;henrik-christiansen-835ba1200/;;;", "or_profile": "~Viktor_Zaverkin1;~Francesco_Alesiani1;~Takashi_Maruyama2;~Federico_Errica1;~Henrik_Christiansen1;~Makoto_Takamoto1;~Nicolas_Weber1;~Mathias_Niepert1", "aff": "NEC Laboratories Europe;NEC;NEC;NEC;NEC;NEC;NEC Laboratories Europe;NEC", "aff_domain": "neclab.eu;neclab.eu;nec.com;neclab.eu;neclab.eu;neclab.eu;neclab.eu;neclab.eu", "position": "Researcher;Senior Researcher;Researcher;Researcher;Researcher;Researcher;Postdoc;Research Scientist", "bibtex": "@inproceedings{\nzaverkin2024higherrank,\ntitle={Higher-Rank Irreducible Cartesian Tensors for Equivariant Message Passing},\nauthor={Viktor Zaverkin and Francesco Alesiani and Takashi Maruyama and Federico Errica and Henrik Christiansen and Makoto Takamoto and Nicolas Weber and Mathias Niepert},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=faBXeVBNqz}\n}", "github": "", "reviewers": "qF29;69Cm;Prcd", "pdf_size": 1950367, "rating": "5;6;7", "confidence": "4;4;3", "soundness": "2;2;3", "novelty": "2;3;2", "presentation": "3;3;3", "wc_summary": "50;73;62", "wc_strengths": "66;43;83", "wc_weaknesses": "98;319;105", "wc_questions": "120;39;137", "wc_limitations": "2;6;7", "wc_review": "336;480;394", "wc_reply_reviewers": "200;20;82", "wc_reply_authors": "2540;1826;1831", "reply_reviewers": "3;1;1", "reply_authors": "7;5;5", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 61.666666666666664, 9.392668535736913 ], "wc_strengths_avg": [ 64.0, 16.391054470858997 ], "wc_weaknesses_avg": [ 174.0, 102.57030109474509 ], "wc_questions_avg": [ 98.66666666666667, 42.7577153531643 ], "wc_limitations_avg": [ 5.0, 2.160246899469287 ], "wc_review_avg": [ 403.3333333333333, 59.157041470610714 ], "wc_reply_reviewers_avg": [ 100.66666666666667, 74.6607140484353 ], "wc_reply_authors_avg": [ 2065.6666666666665, 335.4105278941348 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.9428090415820634 ], "reply_authors_avg": [ 5.666666666666667, 0.9428090415820634 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7406207955432091475&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "neclab.eu;neclab.eu;nec.com;neclab.eu;neclab.eu;neclab.eu;neclab.eu;neclab.eu", "author_num": 8, "aff_unique_index": "0;1;1;1;1;1;0;1", "aff_unique_norm": "NEC Laboratories Europe;NEC Corporation", "aff_unique_dep": ";", "aff_unique_url": "https://www.nec-labs.eu;https://www.nec.com", "aff_unique_abbr": "NEC LE;NEC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1;1;0;1", "aff_country_unique": "Unknown;Japan" }, { "title": "Graph Neural Networks Need Cluster-Normalize-Activate Modules", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94196", "id": "faj2EBhdHC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=faj2EBhdHC", "openreview": "https://openreview.net/forum?id=faj2EBhdHC", "poster": "/media/PosterPDFs/NeurIPS%202024/94196.png?t=1733306088.736497", "project": "", "author_site": "Arseny Skryagin, Felix Divo, Mohammad Amin Ali, Devendra S Dhami, Kristian Kersting", "tldr": "", "abstract": "Graph Neural Networks (GNNs) are non-Euclidean deep learning models for graph-structured data. Despite their successful and diverse applications, oversmoothing prohibits deep architectures due to node features converging to a single fixed point. This severely limits their potential to solve complex tasks. To counteract this tendency, we propose a plug-and-play module consisting of three steps: Cluster\u2192Normalize\u2192Activate (CNA). By applying CNA modules, GNNs search and form super nodes in each layer, which are normalized and activated individually. We demonstrate in node classification and property prediction tasks that CNA significantly improves the accuracy over the state-of-the-art. Particularly, CNA reaches 94.18% and 95.75% accuracy on Cora and CiteSeer, respectively. It further benefits GNNs in regression tasks as well, reducing the mean squared error compared to all baselines. At the same time, GNNs with CNA require substantially fewer learnable parameters than competing architectures.", "keywords": "Graph Neural Networks;Deep Geometric Learning;Learnable Activation Functions;Oversmoothing", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Arseny Skryagin;Felix Divo;Mohammad Amin Ali;Devendra Singh Dhami;Kristian Kersting", "authorids": "~Arseny_Skryagin1;~Felix_Divo1;~Mohammad_Amin_Ali1;~Devendra_Singh_Dhami1;~Kristian_Kersting1", "gender": "M;;M;M;M", "homepage": "https://ml-research.github.io/people/skryagin/;;;https://sites.google.com/view/devendradhami;http://www.ml.informatik.tu-darmstadt.de/", "dblp": ";;;201/2130;40/3793", "google_scholar": ";;;aVlaHfkAAAAJ;QY-earAAAAAJ", "orcid": ";;;;0000-0002-2873-9152", "linkedin": ";;amin-ali-9bb3b9188/;;", "or_profile": "~Arseny_Skryagin1;~Felix_Divo1;~Mohammad_Amin_Ali1;~Devendra_Singh_Dhami1;~Kristian_Kersting1", "aff": "CS Department, TU Darmstadt, TU Darmstadt;;Technische Universit\u00e4t Darmstadt;Eindhoven University of Technology;TU Darmstadt", "aff_domain": "cs.tu-darmstadt.de;;tu-darmstadt.de;tue.nl;tu-darmstadt.de", "position": "PhD student;;MS student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nskryagin2024graph,\ntitle={Graph Neural Networks Need Cluster-Normalize-Activate Modules},\nauthor={Arseny Skryagin and Felix Divo and Mohammad Amin Ali and Devendra Singh Dhami and Kristian Kersting},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=faj2EBhdHC}\n}", "github": "", "reviewers": "v2xE;swbi;gBSJ;jRzY", "pdf_size": 6293321, "rating": "5;5;5;6", "confidence": "3;5;4;4", "soundness": "3;2;2;3", "novelty": "2;2;2;3", "presentation": "2;3;3;4", "wc_summary": "98;135;116;40", "wc_strengths": "59;47;81;51", "wc_weaknesses": "120;146;233;52", "wc_questions": "2;5;14;3", "wc_limitations": "1;7;14;1", "wc_review": "280;340;458;147", "wc_reply_reviewers": "17;44;359;11", "wc_reply_authors": "57;43;1054;0", "reply_reviewers": "1;1;2;1", "reply_authors": "2;2;4;1", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 97.25, 35.54838252297846 ], "wc_strengths_avg": [ 59.5, 13.143439428094915 ], "wc_weaknesses_avg": [ 137.75, 64.82428171603601 ], "wc_questions_avg": [ 6.0, 4.743416490252569 ], "wc_limitations_avg": [ 5.75, 5.356071321407137 ], "wc_review_avg": [ 306.25, 112.04547068043402 ], "wc_reply_reviewers_avg": [ 107.75, 145.590822169531 ], "wc_reply_authors_avg": [ 288.5, 442.46045020995945 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:RJ39zP_qtccJ:scholar.google.com/&scioq=Graph+Neural+Networks+Need+Cluster-Normalize-Activate+Modules&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": "cs.tu-darmstadt.de;;tu-darmstadt.de;tue.nl;tu-darmstadt.de", "author_num": 5, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Technische Universit\u00e4t Darmstadt;Eindhoven University of Technology", "aff_unique_dep": "Computer Science Department;", "aff_unique_url": "https://www.tu-darmstadt.de;https://www.tue.nl", "aff_unique_abbr": "TU Darmstadt;TU/e", "aff_campus_unique_index": "1", "aff_campus_unique": ";Darmstadt", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Germany;Netherlands" }, { "title": "Searching for Efficient Linear Layers over a Continuous Space of Structured Matrices", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94195", "id": "fc88ANWvdF", "proceeding": "", "pdf": "https://openreview.net/pdf?id=fc88ANWvdF", "openreview": "https://openreview.net/forum?id=fc88ANWvdF", "poster": "", "project": "", "author_site": "Andres Potapczynski, Shikai Qiu, Marc Finzi, Christopher Ferri, Charlie Chen, Micah Goldblum, C. Bayan Bruss, Christopher De Sa, Andrew Wilson", "tldr": "", "abstract": "Dense linear layers are the dominant computational bottleneck in large neural networks, presenting a critical need for more efficient alternatives. Previous efforts to develop alternatives have focused on a small number of hand-crafted structured matrices, and have neglected to investigate whether these structures can surpass dense layers in terms of compute-optimal scaling laws when both the model size and training examples are optimally allocated. In this work, we present a unifying framework that enables searching among all linear operators expressible via an Einstein summation. This framework encompasses many previously proposed structures, such as low-rank, Kronecker, Tensor-Train, and Monarch, along with many novel structures. We develop a taxonomy of all such operators based on their computational and algebraic properties, which provides insights into their scaling laws. Combining these insights with empirical evaluation, we identify a subset of structures that achieve equal or better performance than dense layers as a function of training compute. To further improve their compute efficiency, we develop a natural extension of these performant structures that convert them into a sparse Mixture-of-Experts layer. The resulting layer significantly outperforms dense layers in compute-optimal training efficiency for GPT-2 language models.", "keywords": "Structured Matrices;Scaling Laws;Linear Layers;Optimization;Deep Learning;Einsum", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/62e86afdfd37996bf83076b0e51267c092198af7.zip", "author": "Andres Potapczynski;Shikai Qiu;Marc Anton Finzi;Christopher Ferri;Zixi Chen;Micah Goldblum;C. Bayan Bruss;Christopher De Sa;Andrew Gordon Wilson", "authorids": "~Andres_Potapczynski3;~Shikai_Qiu1;~Marc_Anton_Finzi1;~Christopher_Ferri1;~Zixi_Chen4;~Micah_Goldblum1;~C._Bayan_Bruss1;~Christopher_De_Sa2;~Andrew_Gordon_Wilson1", "gender": ";M;M;M;;;M;Not Specified;M", "homepage": "https://andpotap.com/;https://shikaiqiu.github.io/;https://mfinzi.github.io;;https://charliezchen.github.io/;;https://www.cbbruss.com;https://cims.nyu.edu/~andrewgw;http://cs.cornell.edu/~cdesa", "dblp": "255/7271;;222/3062;;;241/7231;;65/10453;154/6336", "google_scholar": ";pK0OAsQAAAAJ;ysMAhlwAAAAJ;;;pGDKzuUAAAAJ;ClqvGRQAAAAJ;https://scholar.google.com.tw/citations?user=twWX2LIAAAAJ;", "orcid": ";;;0000-0002-9435-3274;;;;;", "linkedin": ";;;;;;bayan-bruss/;;", "or_profile": "~Andres_Potapczynski3;~Shikai_Qiu1;~Marc_Anton_Finzi1;~Christopher_Ferri1;~Zixi_Chen4;~Micah_Goldblum1;~C._Bayan_Bruss1;~Andrew_Gordon_Wilson1;~Christopher_De_Sa1", "aff": "New York University;New York University;Carnegie Mellon University;Capital One;New York University;New York University;;New York University;Cornell University", "aff_domain": "nyu.edu;nyu.edu;cmu.edu;capitalone.com;nyu.edu;nyu.edu;;nyu.edu;cornell.edu", "position": "PhD student;PhD student;Postdoc;Researcher;PhD student;Postdoc;;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\npotapczynski2024searching,\ntitle={Searching for Efficient Linear Layers over a Continuous Space of Structured Matrices},\nauthor={Andres Potapczynski and Shikai Qiu and Marc Anton Finzi and Christopher Ferri and Zixi Chen and Micah Goldblum and C. Bayan Bruss and Christopher De Sa and Andrew Gordon Wilson},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=fc88ANWvdF}\n}", "github": "", "reviewers": "bZBq;XAQT;Mhte", "pdf_size": 844946, "rating": "5;7;7", "confidence": "3;5;3", "soundness": "2;4;2", "novelty": "2;4;3", "presentation": "2;2;3", "wc_summary": "41;76;71", "wc_strengths": "9;44;55", "wc_weaknesses": "26;83;81", "wc_questions": "345;60;71", "wc_limitations": "12;40;1", "wc_review": "433;303;279", "wc_reply_reviewers": "0;26;16", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 2.6666666666666665, 0.9428090415820634 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 62.666666666666664, 15.456030825826172 ], "wc_strengths_avg": [ 36.0, 19.61292091114087 ], "wc_weaknesses_avg": [ 63.333333333333336, 26.411277052720408 ], "wc_questions_avg": [ 158.66666666666666, 131.83407079439755 ], "wc_limitations_avg": [ 17.666666666666668, 16.418147141366333 ], "wc_review_avg": [ 338.3333333333333, 67.65270791985262 ], "wc_reply_reviewers_avg": [ 14.0, 10.708252269472673 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ISy_lhNEgAIJ:scholar.google.com/&scioq=Searching+for+Efficient+Linear+Layers+over+a+Continuous+Space+of+Structured+Matrices&hl=en&as_sdt=0,33", "gs_version_total": 5, "email": "nyu.edu;nyu.edu;cmu.edu;capitalone.com;nyu.edu;nyu.edu;;nyu.edu;cornell.edu", "author_num": 9, "aff_unique_index": "0;0;1;2;0;0;0;3", "aff_unique_norm": "New York University;Carnegie Mellon University;Capital One;Cornell University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.nyu.edu;https://www.cmu.edu;https://www.capitalone.com;https://www.cornell.edu", "aff_unique_abbr": "NYU;CMU;Capital One;Cornell", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Exploiting Descriptive Completeness Prior for Cross Modal Hashing with Incomplete Labels", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94194", "id": "ferj6WqShv", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ferj6WqShv", "openreview": "https://openreview.net/forum?id=ferj6WqShv", "poster": "/media/PosterPDFs/NeurIPS%202024/94194.png?t=1731308101.0786195", "project": "", "author_site": "Haoyang Luo, Zheng Zhang, Yadan Luo", "tldr": "", "abstract": "In this paper, we tackle the challenge of generating high-quality hash codes for cross-modal retrieval in the presence of incomplete labels, which creates uncertainty in distinguishing between positive and negative pairs. Vision-language models such as CLIP offer a potential solution by providing generic knowledge for missing label recovery, yet their zero-shot performance remains insufficient. To address this, we propose a novel Prompt Contrastive Recovery approach, \\textbf{PCRIL}, which progressively identifies promising positive classes from unknown label sets and recursively searches for other relevant labels. Identifying unknowns is nontrivial due to the fixed and long-tailed patterns of positive label sets in training data, which hampers the discovery of new label combinations. Therefore, we consider each subset of positive labels and construct three types of negative prompts through deletion, addition, and replacement for prompt learning. The augmented supervision guides the model to measure the completeness of label sets, thus facilitating the subsequent greedy tree search for label completion. We also address extreme cases of significant unknown labels and lack of negative pairwise supervision by deriving two augmentation strategies: seeking unknown-complementary samples for mixup and random flipping for negative labels. Extensive experiments reveal the vulnerability of current methods and demonstrate the effectiveness of PCRIL, achieving an average 12\\% mAP improvement to the current SOTA across all datasets. Our code is available at https://github.com/E-Galois/PCRIL.", "keywords": "cross-modal hashing;incomplete labels;prompt contrastive learning", "primary_area": "machine_vision", "supplementary_material": "/attachment/7e6eb76d2edd48275d0880607693401331253620.zip", "author": "Haoyang Luo;Zheng Zhang;Yadan Luo", "authorids": "~Haoyang_Luo1;~Zheng_Zhang7;~Yadan_Luo1", "gender": ";M;F", "homepage": ";;https://sites.google.com/view/yadanluo/home", "dblp": "308/6966;181/2621-6.html;182/2414", "google_scholar": "hzUL6JcAAAAJ;tpVOb2EAAAAJ;3IfL11AAAAAJ", "orcid": ";0000-0003-1470-6998;0000-0001-6272-2971", "linkedin": ";;", "or_profile": "~Haoyang_Luo1;~Zheng_Zhang7;~Yadan_Luo1", "aff": "Harbin Institute of Technology;Harbin Institute of Technology;The University of Queensland", "aff_domain": "hit.edu.cn;hit.edu.cn;uq.edu.au", "position": "MS student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nluo2024exploiting,\ntitle={Exploiting Descriptive Completeness Prior for Cross Modal Hashing with Incomplete Labels},\nauthor={Haoyang Luo and Zheng Zhang and Yadan Luo},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ferj6WqShv}\n}", "github": "", "reviewers": "MwQQ;Sq2X;FiXs;zzeX;MbLT", "pdf_size": 0, "rating": "4;5;5;6;7", "confidence": "3;4;4;5;4", "soundness": "2;3;3;3;2", "novelty": "3;3;3;3;3", "presentation": "3;3;3;3;3", "wc_summary": "84;141;36;81;89", "wc_strengths": "69;97;31;109;50", "wc_weaknesses": "37;109;188;114;55", "wc_questions": "29;32;4;19;57", "wc_limitations": "57;16;23;1;24", "wc_review": "276;395;282;324;275", "wc_reply_reviewers": "0;29;11;0;100", "wc_reply_authors": "80;39;55;30;60", "reply_reviewers": "0;1;1;0;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 5.4, 1.0198039027185568 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 86.2, 33.3550595862157 ], "wc_strengths_avg": [ 71.2, 28.861046412075915 ], "wc_weaknesses_avg": [ 100.6, 52.939965999233515 ], "wc_questions_avg": [ 28.2, 17.405746177627663 ], "wc_limitations_avg": [ 24.2, 18.345571672749802 ], "wc_review_avg": [ 310.4, 46.01130295916428 ], "wc_reply_reviewers_avg": [ 28.0, 37.52865571799768 ], "wc_reply_authors_avg": [ 52.8, 17.359723500102184 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.6201736729460423, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14215966934823169931&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "hit.edu.cn;hit.edu.cn;uq.edu.au", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Harbin Institute of Technology;University of Queensland", "aff_unique_dep": ";", "aff_unique_url": "http://www.hit.edu.cn/;https://www.uq.edu.au", "aff_unique_abbr": "HIT;UQ", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Harbin;", "aff_country_unique_index": "0;0;1", "aff_country_unique": "China;Australia" }, { "title": "Graph Convolutions Enrich the Self-Attention in Transformers!", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94193", "id": "ffNrpcBpi6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ffNrpcBpi6", "openreview": "https://openreview.net/forum?id=ffNrpcBpi6", "poster": "/media/PosterPDFs/NeurIPS%202024/94193.png?t=1733675592.777139", "project": "", "author_site": "Jeongwhan Choi, Hyowon Wi, Jayoung Kim, Yehjin Shin, Kookjin Lee, Nathaniel Trask, Noseong Park", "tldr": "", "abstract": "Transformers, renowned for their self-attention mechanism, have achieved state-of-the-art performance across various tasks in natural language processing, computer vision, time-series modeling, etc. However, one of the challenges with deep Transformer models is the oversmoothing problem, where representations across layers converge to indistinguishable values, leading to significant performance degradation. We interpret the original self-attention as a simple graph filter and redesign it from a graph signal processing (GSP) perspective. We propose a graph-filter-based self-attention (GFSA) to learn a general yet effective one, whose complexity, however, is slightly larger than that of the original self-attention mechanism. We demonstrate that GFSA improves the performance of Transformers in various fields, including computer vision, natural language processing, graph-level tasks, speech recognition, and code classification.", "keywords": "Transformer;self-attention;oversmoothing;graph filter", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Jeongwhan Choi;Hyowon Wi;Jayoung Kim;Yehjin Shin;Kookjin Lee;Nathaniel Trask;Noseong Park", "authorids": "~Jeongwhan_Choi1;~Hyowon_Wi1;~Jayoung_Kim1;~Yehjin_Shin1;~Kookjin_Lee1;~Nathaniel_Trask2;~Noseong_Park1", "gender": "M;;F;F;M;M;", "homepage": "https://www.jeongwhanchoi.com;;;http://yehjin-shin.github.io/;https://scholar.google.com/citations?hl=en&user=KL89hVQAAAAJ&view_op=list_works;https://www.sandia.gov/ccr/staff/nathaniel-albert-trask/;", "dblp": "39/11215-2;332/6458;26/9969-2;322/5257;122/5103;188/8236;", "google_scholar": "3MNElkYAAAAJ;https://scholar.google.com/citations?view_op=list_works;3qbSHGwAAAAJ;https://scholar.google.com/citations?view_op=list_works;https://scholar.google.com/citations?hl=en;6iLMZkwAAAAJ;", "orcid": "0000-0002-6530-2662;;;0009-0001-7600-2585;;;", "linkedin": "jeongwhanchoi/;;;yehjin-shin-528987217/;;;", "or_profile": "~Jeongwhan_Choi1;~Hyowon_Wi1;~Jayoung_Kim1;~Yehjin_Shin1;~Kookjin_Lee1;~Nathaniel_Trask2;~Noseong_Park1", "aff": "Yonsei University;Yonsei University;Yonsei University;Yonsei University;Arizona State University;;", "aff_domain": "yonsei.ac.kr;yonsei.ac.kr;yonsei.ac.kr;yonsei.ac.kr;asu.edu;;", "position": "PhD student;MS student;MS student;MS student;Assistant Professor;;", "bibtex": "@inproceedings{\nchoi2024graph,\ntitle={Graph Convolutions Enrich the Self-Attention in Transformers!},\nauthor={Jeongwhan Choi and Hyowon Wi and Jayoung Kim and Yehjin Shin and Kookjin Lee and Nathaniel Trask and Noseong Park},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ffNrpcBpi6}\n}", "github": "", "reviewers": "k2id;Py4c;bzWT;XKs2", "pdf_size": 1852630, "rating": "4;4;5;5", "confidence": "4;4;4;4", "soundness": "1;3;3;3", "novelty": "2;2;1;2", "presentation": "3;3;3;3", "wc_summary": "62;65;86;77", "wc_strengths": "43;99;48;25", "wc_weaknesses": "275;82;76;85", "wc_questions": "56;85;78;19", "wc_limitations": "37;9;1;3", "wc_review": "473;340;289;209", "wc_reply_reviewers": "205;0;757;92", "wc_reply_authors": "1469;246;2036;939", "reply_reviewers": "1;0;4;1", "reply_authors": "5;3;6;3", "rating_avg": [ 4.5, 0.5 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 72.5, 9.604686356149273 ], "wc_strengths_avg": [ 53.75, 27.48977082479954 ], "wc_weaknesses_avg": [ 129.5, 84.06693761521232 ], "wc_questions_avg": [ 59.5, 25.71478174124758 ], "wc_limitations_avg": [ 12.5, 14.44818327679989 ], "wc_review_avg": [ 327.75, 95.98274584528201 ], "wc_reply_reviewers_avg": [ 263.5, 294.0276347556467 ], "wc_reply_authors_avg": [ 1172.5, 660.7701945457286 ], "reply_reviewers_avg": [ 1.5, 1.5 ], "reply_authors_avg": [ 4.25, 1.299038105676658 ], "replies_avg": [ 31, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17784763847977253745&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 5, "email": "yonsei.ac.kr;yonsei.ac.kr;yonsei.ac.kr;yonsei.ac.kr;asu.edu;;", "author_num": 7, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Yonsei University;Arizona State University", "aff_unique_dep": ";", "aff_unique_url": "https://www.yonsei.ac.kr;https://www.asu.edu", "aff_unique_abbr": "Yonsei;ASU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "South Korea;United States" }, { "title": "Persistent Test-time Adaptation in Recurring Testing Scenarios", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94192", "id": "ffeUBoTcdS", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ffeUBoTcdS", "openreview": "https://openreview.net/forum?id=ffeUBoTcdS", "poster": "/media/PosterPDFs/NeurIPS%202024/94192.png?t=1732247022.4040537", "project": "", "author_site": "Trung Hieu Hoang, MinhDuc Vo, Minh Do", "tldr": "", "abstract": "Current test-time adaptation (TTA) approaches aim to adapt a machine learning model to environments that change continuously. Yet, it is unclear whether TTA methods can maintain their adaptability over prolonged periods. To answer this question, we introduce a diagnostic setting - **recurring TTA** where environments not only change but also recur over time, creating an extensive data stream. This setting allows us to examine the error accumulation of TTA models, in the most basic scenario, when they are regularly exposed to previous testing environments. Furthermore, we simulate a TTA process on a simple yet representative $\\epsilon$-**perturbed Gaussian Mixture Model Classifier**, deriving theoretical insights into the dataset- and algorithm-dependent factors contributing to gradual performance degradation. Our investigation leads us to propose **persistent TTA (PeTTA)**, which senses when the model is diverging towards collapse and adjusts the adaptation strategy, striking a balance between the dual objectives of adaptation and model collapse prevention. The supreme stability of PeTTA over existing approaches, in the face of lifelong TTA scenarios, has been demonstrated over comprehensive experiments on various benchmarks. Our project page is available at [https://hthieu166.github.io/petta](https://hthieu166.github.io/petta).", "keywords": "domain adaptation;test-time adaptation;continual adaptation;performance degradation;self-supervised learning", "primary_area": "other", "supplementary_material": "", "author": "Trung-Hieu Hoang;MinhDuc Vo;Minh N. Do", "authorids": "~Trung-Hieu_Hoang1;~MinhDuc_Vo1;~Minh_N._Do1", "gender": "M;;M", "homepage": "http://hthieu.web.illinois.edu/;https://vmdlab.github.io/;http://minhdo.ece.illinois.edu/", "dblp": "232/1616;138/9093;d/MinhNDo", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.co.jp/citations?user=pfKTAYwAAAAJ;https://scholar.google.com/citations?sortby=pubdate", "orcid": "0000-0002-4143-5401;0000-0003-4839-032X;0000-0001-5132-4986", "linkedin": "trunghieuhoang/;;profminhdo/", "or_profile": "~Trung-Hieu_Hoang1;~MinhDuc_Vo1;~Minh_N._Do1", "aff": "University of Illinois, Urbana Champaign;The University of Tokyo, Tokyo Institute of Technology;University of Illinois at Urbana Champaign (UIUC)", "aff_domain": "illinois.edu;u-tokyo.ac.jp;illinois.edu", "position": "PhD student;Postdoc;Professor", "bibtex": "@inproceedings{\nhoang2024persistent,\ntitle={Persistent Test-time Adaptation in Recurring Testing Scenarios},\nauthor={Trung-Hieu Hoang and MinhDuc Vo and Minh N. Do},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ffeUBoTcdS}\n}", "github": "", "reviewers": "JUpp;MNYK;oiWU;rAs4", "pdf_size": 4834925, "rating": "5;6;6;6", "confidence": "4;4;4;4", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "3;3;3;3", "wc_summary": "111;130;76;30", "wc_strengths": "74;53;106;27", "wc_weaknesses": "87;206;99;107", "wc_questions": "4;41;119;43", "wc_limitations": "1;26;32;1", "wc_review": "277;456;432;208", "wc_reply_reviewers": "99;86;21;14", "wc_reply_authors": "338;68;40;32", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 86.75, 38.06162765831225 ], "wc_strengths_avg": [ 65.0, 28.939592256975562 ], "wc_weaknesses_avg": [ 124.75, 47.446680600438214 ], "wc_questions_avg": [ 51.75, 41.81731100871982 ], "wc_limitations_avg": [ 15.0, 14.159802258506296 ], "wc_review_avg": [ 343.25, 104.00811266434941 ], "wc_reply_reviewers_avg": [ 55.0, 37.8615900352851 ], "wc_reply_authors_avg": [ 119.5, 126.85720318531384 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8049821154504085336&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "illinois.edu;u-tokyo.ac.jp;illinois.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Illinois Urbana-Champaign;University of Tokyo", "aff_unique_dep": ";", "aff_unique_url": "https://illinois.edu;https://www.u-tokyo.ac.jp", "aff_unique_abbr": "UIUC;UTokyo", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Urbana-Champaign;Tokyo", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;Japan" }, { "title": "Re-assembling the past: The RePAIR dataset and benchmark for real world 2D and 3D puzzle solving", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97584", "id": "fgJ9OvJPZB", "proceeding": "", "pdf": "https://openreview.net/pdf?id=fgJ9OvJPZB", "openreview": "https://openreview.net/forum?id=fgJ9OvJPZB", "poster": "/media/PosterPDFs/NeurIPS%202024/97584.png?t=1731596222.8742807", "project": "", "author_site": "Theodore Tsesmelis, Luca Palmieri, Marina Khoroshiltseva, Adeela Islam, Gur Elkin, Ofir I Shahar, Gianluca Scarpellini, Stefano Fiorini, Yaniv Ohayon, Nadav Alali, Sinem Aslan, Pietro Morerio, Sebastiano Vascon, Elena gravina, Maria Napolitano, Giuseppe Scarpati, Gabriel zuchtriegel, Alexandra Sp\u00fchler, Michel Fuchs, Stuart James, Ohad Ben-Shahar, Marcello Pelillo, Alessio Del Bue", "tldr": "", "abstract": "This paper proposes the RePAIR dataset that represents a challenging benchmark to test modern computational and data driven methods for puzzle-solving and reassembly tasks. Our dataset has unique properties that are uncommon to current benchmarks for 2D and 3D puzzle solving. The fragments and fractures are realistic, caused by a collapse of a fresco during a World War II bombing at the Pompeii archaeological park. The fragments are also eroded and have missing pieces with irregular shapes and different dimensions, challenging further the reassembly algorithms. The dataset is multi-modal providing high resolution images with characteristic pictorial elements, detailed 3D scans of the fragments and meta-data annotated by the archaeologists. Ground truth has been generated through several years of unceasing fieldwork, including the excavation and cleaning of each fragment, followed by manual puzzle solving by archaeologists of a subset of approx. 1000 pieces among the 16000 available. After digitizing all the fragments in 3D, a benchmark was prepared to challenge current reassembly and puzzle-solving methods that often solve more simplistic synthetic scenarios. The tested baselines show that there clearly exists a gap to fill in solving this computationally complex problem.", "keywords": "Dataset;Repair;Cultural Heritage;Frescoes;Fragments;Assembly;Puzzle Solving;Reconstruction;Digitization", "primary_area": "", "supplementary_material": "/attachment/03c28267dd1dbdae543dc7dc7f0af08ffedc60de.zip", "author": "Theodore Tsesmelis;Luca Palmieri;Marina Khoroshiltseva;Adeela Islam;Gur Elkin;Ofir Itzhak Shahar;Gianluca Scarpellini;Stefano Fiorini;Yaniv Ohayon;Nadav Alali;Sinem Aslan;Pietro Morerio;Sebastiano Vascon;Elena gravina;Maria Cristina Napolitano;Giuseppe Scarpati;Gabriel zuchtriegel;Alexandra Sp\u00fchler;Michel E. Fuchs;Stuart James;Ohad Ben-Shahar;Marcello Pelillo;Alessio Del Bue", "authorids": "~Theodore_Tsesmelis1;~Luca_Palmieri1;~Marina_Khoroshiltseva1;~Adeela_Islam1;~Gur_Elkin1;~Ofir_Itzhak_Shahar1;~Gianluca_Scarpellini1;~Stefano_Fiorini1;~Yaniv_Ohayon1;~Nadav_Alali1;~Sinem_Aslan1;~Pietro_Morerio1;~Sebastiano_Vascon1;~Elena_gravina1;~Maria_Cristina_Napolitano2;~Giuseppe_Scarpati1;~Gabriel_zuchtriegel1;~Alexandra_Sp\u00fchler1;~Michel_E._Fuchs1;~Stuart_James1;~Ohad_Ben-Shahar2;~Marcello_Pelillo1;~Alessio_Del_Bue2", "gender": ";M;F;F;;M;M;M;M;M;F;M;M;F;M;M;F;M;M;M;M;F;Unspecified", "homepage": ";;https://www.unive.it/data/persone/5338882;;https://github.com/gurelkin;;https://scarpellini.dev/;;;;https://www.researchgate.net/profile/Sinem_Aslan;https://iit.it/people/pietro-morerio;http://www.sebastianovascon.it;http://intranet.pompei.local/;https://pompeiisites.org/;https://pompeiisites.org/;https://unil.academia.edu/AlexandraSp%C3%BChler;;https://stuart-james.com;;https://www.iit.it/people/alessio-delbue;https://independent.academia.edu/MariaCristinaNapolitano;https://www.cs.bgu.ac.il/~ben-shahar", "dblp": "136/0193;65/9664-2.html;;;;384/8510;290/7952;210/9032;;;158/0491;119/8523;133/9513;;;;;;19/10673;42/1745;73/6117;;59/3818", "google_scholar": ";chn5eyoAAAAJ;;;https://scholar.google.co.il/citations?user=Wgc0ymMAAAAJ;Vkb7730AAAAJ;Wlye2XUAAAAJ;2O-BN9YAAAAJ;;;https://scholar.google.com.tr/citations?hl=en;https://scholar.google.it/citations?hl=en;https://scholar.google.it/citations?user=oiaAgVEAAAAJ;;;;;;VbRT0CwAAAAJ;Hz9A7M0AAAAJ;LUzvbGIAAAAJ;;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0002-9701-1915;;;0009-0001-0464-7712;0009-0001-3688-1159;;0000-0001-5432-7584;;;0000-0003-0068-6551;0000-0001-5259-1496;0000-0002-7855-1641;;;;;;0000-0002-2649-2133;0000-0001-8992-9243;0000-0002-2262-4872;;", "linkedin": ";;;adeela-islam-647504b6/;https://il.linkedin.com/in/gur-elkin;ofir-itzhak-shahar?utm_source=share&utm_campaign=share_via&utm_content=profile&utm_medium=android_app;;;yaniv-ohayon/;https://il.linkedin.com/in/nadav-alali-698407206?original_referer=https%3A%2F%2Fwww.google.com%2F;sinem-aslan-b4010b4/;;;;;;;fuchs-michel-a5856b140/;stuartajames/;marcello-pelillo-bb185585/?originalSubdomain=it;alessiodelbue/;;", "or_profile": "~Theodore_Tsesmelis1;~Luca_Palmieri1;~Marina_Khoroshiltseva1;~Adeela_Islam1;~Gur_Elkin1;~Ofir_Itzhak_Shahar1;~Gianluca_Scarpellini1;~Stefano_Fiorini1;~Yaniv_Ohayon1;~Nadav_Alali1;~Sinem_Aslan1;~Pietro_Morerio1;~Sebastiano_Vascon1;~Elena_gravina1;~Giuseppe_Scarpati1;~Gabriel_zuchtriegel1;~Alexandra_Sp\u00fchler1;~Michel_E._Fuchs1;~Stuart_James1;~Marcello_Pelillo1;~Alessio_Del_Bue1;~MARIA_CRISTINA_NAPOLITANO1;~Ohad_Ben-shahar1", "aff": "Universit\u00e0 degli Studi di Genova, Istituto Italiano di Tecnologia;University of Venice;University Ca' Foscari of Venice;Universit\u00e0 degli Studi di Genova, Istituto Italiano di Tecnologia;Ben Gurion University of the Negev;;Istituto Italiano di Tecnologia;Istituto Italiano di Tecnologia;Ben Gurion University of the Negev;Ben Gurion University of the Negev;Ca' Foscari University of Venice;Istituto Italiano di Tecnologia;University Ca' Foscari of Venice;University of Naples Federico II;Pompeii Archaeological Park;St. Johns River State College;Universit\u00e9 de Lausanne;Universit\u00e9 de Lausanne;University College London;Ca' Foscari University of Venice;Istituto Italiano di Tecnologia;University of Lecce;Ben Gurion University of the Negev", "aff_domain": "iit.it;unive.it;unive.it;iit.it;post.bgu.ac.il;;iit.it;iit.it;bgu.ac.il;bgu.ac.il;unive.it;iit.it;unive.it;unina.it;pompeiisites.org;sjrstate.edu;unil.ch;unil.ch;ucl.ac.uk;unive.it;iit.it;unile.it;bgu.ac.il", "position": "Postdoc;Postdoc;Postdoc;PhD student;MS student;;PhD student;Postdoc;MS student;MS student;Assistant Professor;Researcher;Assistant Professor;Intern;Intern;Principal Researcher;PhD student;Emeritus;Honorary Research Associate;Professor;Senior Researcher;PhD student;Full Professor", "bibtex": "@inproceedings{\ntsesmelis2024reassembling,\ntitle={Re-assembling the past: The Re{PAIR} dataset and benchmark for real world 2D and 3D puzzle solving},\nauthor={Theodore Tsesmelis and Luca Palmieri and Marina Khoroshiltseva and Adeela Islam and Gur Elkin and Ofir Itzhak Shahar and Gianluca Scarpellini and Stefano Fiorini and Yaniv Ohayon and Nadav Alali and Sinem Aslan and Pietro Morerio and Sebastiano Vascon and Elena gravina and Maria Cristina Napolitano and Giuseppe Scarpati and Gabriel zuchtriegel and Alexandra Sp{\\\"u}hler and Michel E. Fuchs and Stuart James and Ohad Ben-Shahar and Marcello Pelillo and Alessio Del Bue},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=fgJ9OvJPZB}\n}", "github": "", "reviewers": "ziZJ;85Yr;oFfJ;JiwA", "pdf_size": 9308742, "rating": "6;6;7;7", "confidence": "4;5;5;4", "wc_summary_and_contributions": "80;43;37;105", "wc_strengths": "82;23;2;56", "wc_improvement": "307;71;3;5", "wc_limitations": "37;74;1;1", "wc_correctness": "15;8;1;2", "wc_clarity": "7;1;1;3", "wc_relation_to_prior_work": "120;43;1;1", "wc_documentation": "11;118;1;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "660;382;48;175", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.5, 0.5 ], "wc_summary_and_contributions_avg": [ 66.25, 27.779263849137543 ], "wc_strengths_avg": [ 40.75, 30.621683493890405 ], "wc_improvement_avg": [ 96.5, 124.57427503300993 ], "wc_limitations_avg": [ 28.25, 30.22726418318403 ], "wc_correctness_avg": [ 6.5, 5.5901699437494745 ], "wc_clarity_avg": [ 3.0, 2.449489742783178 ], "wc_relation_to_prior_work_avg": [ 41.25, 48.59205181920187 ], "wc_documentation_avg": [ 32.75, 49.38813116529112 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 316.25, 231.51498331641517 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 23, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14590634606772503484&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "iit.it;unive.it;unive.it;iit.it;post.bgu.ac.il;;iit.it;iit.it;bgu.ac.il;bgu.ac.il;unive.it;iit.it;unive.it;unina.it;pompeiisites.org;sjrstate.edu;unil.ch;unil.ch;ucl.ac.uk;unive.it;iit.it;unile.it;bgu.ac.il", "author_num": 23, "aff_unique_index": "0;1;2;0;3;4;4;3;3;2;4;2;5;6;7;8;8;9;2;4;10;3", "aff_unique_norm": "Universit\u00e0 degli Studi di Genova;University of Venice;Ca' Foscari University of Venice;Ben Gurion University of the Negev;Istituto Italiano di Tecnologia;University of Naples Federico II;Pompeii Archaeological Park;St. Johns River State College;Universit\u00e9 de Lausanne;University College London;University of Lecce", "aff_unique_dep": ";;;;;;;;;;", "aff_unique_url": "https://www.unige.it;https://www.unive.it;https://www.unive.it;https://www.bgu.ac.il;https://www.iit.it;https://www.unina.it;http://pompeiisites.org/;https://www.sjrstate.edu/;https://www.unil.ch;https://www.ucl.ac.uk;https://www.unile.it", "aff_unique_abbr": "UniGe;Unive;Ca' Foscari;BGU;IIT;UNINA;;SJRSC;UNIL;UCL;UNILE", "aff_campus_unique_index": "1;1;1;1", "aff_campus_unique": ";Venice", "aff_country_unique_index": "0;0;0;0;1;0;0;1;1;0;0;0;0;0;2;3;3;4;0;0;0;1", "aff_country_unique": "Italy;Israel;United States;Switzerland;United Kingdom" }, { "title": "Efficient Leverage Score Sampling for Tensor Train Decomposition", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94191", "id": "fi3aKVnBQo", "proceeding": "", "pdf": "https://openreview.net/pdf?id=fi3aKVnBQo", "openreview": "https://openreview.net/forum?id=fi3aKVnBQo", "poster": "/media/PosterPDFs/NeurIPS%202024/94191.png?t=1731648738.6269429", "project": "", "author_site": "Vivek Bharadwaj, Beheshteh Toloueirakhshan, Osman Asif Malik, Guillaume Rabusseau", "tldr": "", "abstract": "Tensor Train~(TT) decomposition is widely used in the machine learning and quantum physics communities as a popular tool to efficiently compress high-dimensional tensor data. In this paper, we propose an efficient algorithm to accelerate computing the TT decomposition with the Alternating Least Squares (ALS) algorithm relying on exact leverage scores sampling. For this purpose, we propose a data structure that allows us to efficiently sample from the tensor with time complexity logarithmic in the product of the tensor dimensions. Our contribution specifically leverages the canonical form of the TT decomposition. By maintaining the canonical form through each iteration of ALS, we can efficiently compute (and sample from) the leverage scores, thus achieving significant speed-up in solving each sketched least-square problem. Experiments on synthetic and real data on dense and sparse tensors demonstrate that our method outperforms SVD-based and ALS-based algorithms.", "keywords": "leverage score sampling;tensor train decomposition;alternating least square", "primary_area": "optimization", "supplementary_material": "/attachment/7411be1d17270bd51d8bcce0a11f6f136c89644a.zip", "author": "Vivek Bharadwaj;Beheshteh T. Rakhshan;Osman Asif Malik;Guillaume Rabusseau", "authorids": "~Vivek_Bharadwaj1;~Beheshteh_T._Rakhshan1;~Osman_Asif_Malik1;~Guillaume_Rabusseau1", "gender": "M;F;;M", "homepage": "https://vbharadwaj-bk.github.io/;;https://osmanmalik.github.io/;https://www-labs.iro.umontreal.ca/~grabus/", "dblp": "64/10881;;231/7644;143/7327", "google_scholar": "0cKXP7cAAAAJ;AWGU-v8AAAAJ;WAleKq0AAAAJ;https://scholar.google.fr/citations?user=t2i4V4EAAAAJ", "orcid": "0000-0003-0483-9578;;0000-0003-4477-481X;", "linkedin": ";;;", "or_profile": "~Vivek_Bharadwaj1;~Beheshteh_T._Rakhshan1;~Osman_Asif_Malik1;~Guillaume_Rabusseau1", "aff": "University of California, Berkeley;Montreal Institute for Learning Algorithms, University of Montreal, Universit\u00e9 de Montr\u00e9al;Lawrence Berkeley National Lab;Universit\u00e9 de Montr\u00e9al", "aff_domain": "berkeley.edu;mila.umontreal.ca;lbl.gov;umontreal.ca", "position": "PhD student;PhD student;Postdoc;Associate Professor", "bibtex": "@inproceedings{\nbharadwaj2024efficient,\ntitle={Efficient Leverage Score Sampling for Tensor Train Decomposition},\nauthor={Vivek Bharadwaj and Beheshteh T. Rakhshan and Osman Asif Malik and Guillaume Rabusseau},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=fi3aKVnBQo}\n}", "github": "", "reviewers": "BTzr;oeCx;8ErW;yjyo", "pdf_size": 428809, "rating": "4;6;7;7", "confidence": "3;4;3;4", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "2;3;4;3", "wc_summary": "52;27;44;92", "wc_strengths": "31;25;30;44", "wc_weaknesses": "240;71;45;35", "wc_questions": "162;113;40;278", "wc_limitations": "3;1;1;34", "wc_review": "488;237;160;483", "wc_reply_reviewers": "212;10;63;0", "wc_reply_authors": "518;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "3;1;1;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 53.75, 23.85765076448224 ], "wc_strengths_avg": [ 32.5, 7.0178344238090995 ], "wc_weaknesses_avg": [ 97.75, 83.17263672626953 ], "wc_questions_avg": [ 148.25, 86.58052610142768 ], "wc_limitations_avg": [ 9.75, 14.02453207775575 ], "wc_review_avg": [ 342.0, 146.07018860807977 ], "wc_reply_reviewers_avg": [ 71.25, 84.71533214241681 ], "wc_reply_authors_avg": [ 129.5, 224.3005795801696 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.40824829046386296, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6462085498324136686&as_sdt=20000005&sciodt=0,21&hl=en", "gs_version_total": 6, "email": "berkeley.edu;mila.umontreal.ca;lbl.gov;umontreal.ca", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "University of California, Berkeley;University of Montreal;Lawrence Berkeley National Laboratory;Universit\u00e9 de Montr\u00e9al", "aff_unique_dep": ";Montreal Institute for Learning Algorithms;;", "aff_unique_url": "https://www.berkeley.edu;https://www.mila.quebec;https://www.lbl.gov;https://www.umontreal.ca", "aff_unique_abbr": "UC Berkeley;MILA;LBNL;UdeM", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Berkeley;Montreal;", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "United States;Canada" }, { "title": "Long-range Brain Graph Transformer", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94190", "id": "fjLCqicn64", "proceeding": "", "pdf": "https://openreview.net/pdf?id=fjLCqicn64", "openreview": "https://openreview.net/forum?id=fjLCqicn64", "poster": "/media/PosterPDFs/NeurIPS%202024/94190.png?t=1731494620.8212986", "project": "", "author_site": "Shuo Yu, Shan Jin, Ming Li, Tabinda Sarwar, Feng Xia", "tldr": "", "abstract": "Understanding communication and information processing among brain regions of interest (ROIs) is highly dependent on long-range connectivity, which plays a crucial role in facilitating diverse functional neural integration across the entire brain. However, previous studies generally focused on the short-range dependencies within brain networks while neglecting the long-range dependencies, limiting an integrated understanding of brain-wide communication. To address this limitation, we propose Adaptive Long-range aware TransformER (ALTER), a brain graph transformer to capture long-range dependencies between brain ROIs utilizing biased random walk. Specifically, we present a novel long-range aware strategy to explicitly capture long-range dependencies between brain ROIs. By guiding the walker towards the next hop with higher correlation value, our strategy simulates the real-world brain-wide communication. Furthermore, by employing the transformer framework, ALERT adaptively integrates both short- and long-range dependencies between brain ROIs, enabling an integrated understanding of multi-level communication across the entire brain. Extensive experiments on ABIDE and ADNI datasets demonstrate that ALTER consistently outperforms generalized state-of-the-art graph learning methods (including SAN, Graphormer, GraphTrans, and LRGNN) and other graph learning based brain network analysis methods (including FBNETGEN, BrainNetGNN, BrainGNN, and BrainNETTF) in neurological disease diagnosis.", "keywords": "long-range dependencies;brain graph transformer;brain disease prediction;graph learning;graph neural networks", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Shuo Yu;Shan Jin;Ming Li;Tabinda Sarwar;Feng Xia", "authorids": "~Shuo_Yu1;~Shan_Jin2;~Ming_Li15;~Tabinda_Sarwar1;~Feng_Xia1", "gender": "F;F;M;F;M", "homepage": "https://shuo-yu.com;http://www.ubinec.org/2634.html;https://mingli-ai.github.io;;http://xia.ai", "dblp": ";;181/2821-65;;62/3147", "google_scholar": "ev3C1hkAAAAJ;;Z7yEoOQAAAAJ;QxZMm-MAAAAJ;HDFA2VYAAAAJ", "orcid": "0000-0003-1124-9509;0000-0003-2544-0531;0000-0002-1218-2804;0000-0001-7313-5350;0000-0002-8324-1859", "linkedin": ";;;;fxia61", "or_profile": "~Shuo_Yu1;~Shan_Jin2;~Ming_Li15;~Tabinda_Sarwar1;~Feng_Xia1", "aff": "Dalian University of Technology;Dalian University of Technology;Zhejiang Normal University;Royal Melbourne Institute of Technology;Royal Melbourne Institute of Technology", "aff_domain": "dlut.edu.cn;dlut.edu.cn;zjnu.edu.cn;rmit.edu.au;rmit.edu.au", "position": "Associate Professor;PhD student;Full Professor;Lecturer;Full Professor", "bibtex": "@inproceedings{\nyu2024longrange,\ntitle={Long-range Brain Graph Transformer},\nauthor={Shuo Yu and Shan Jin and Ming Li and Tabinda Sarwar and Feng Xia},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=fjLCqicn64}\n}", "github": "", "reviewers": "s7Az;dPgM;xvKE;u6uE", "pdf_size": 20398732, "rating": "5;5;7;7", "confidence": "5;4;5;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;4;3", "wc_summary": "35;23;79;128", "wc_strengths": "32;47;207;187", "wc_weaknesses": "340;77;177;179", "wc_questions": "24;62;100;92", "wc_limitations": "8;1;6;54", "wc_review": "439;210;569;640", "wc_reply_reviewers": "129;0;73;33", "wc_reply_authors": "250;0;0;0", "reply_reviewers": "2;0;1;1", "reply_authors": "3;1;1;1", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 66.25, 41.299969733645085 ], "wc_strengths_avg": [ 118.25, 79.24447930297731 ], "wc_weaknesses_avg": [ 193.25, 94.22944072846872 ], "wc_questions_avg": [ 69.5, 29.845435161846776 ], "wc_limitations_avg": [ 17.25, 21.370248009791556 ], "wc_review_avg": [ 464.5, 163.66199925456124 ], "wc_reply_reviewers_avg": [ 58.75, 48.095607907583414 ], "wc_reply_authors_avg": [ 62.5, 108.25317547305482 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11839638856779225767&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "dlut.edu.cn;dlut.edu.cn;zjnu.edu.cn;rmit.edu.au;rmit.edu.au", "author_num": 5, "aff_unique_index": "0;0;1;2;2", "aff_unique_norm": "Dalian University of Technology;Zhejiang Normal University;Royal Melbourne Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "http://www.dlut.edu.cn/;http://www.zjnu.edu.cn;https://www.rmit.edu.au", "aff_unique_abbr": "DUT;ZJNU;RMIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;1", "aff_country_unique": "China;Australia" }, { "title": "Reconstruct and Match: Out-of-Distribution Robustness via Topological Homogeneity", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94189", "id": "fkbMlfDBxm", "proceeding": "", "pdf": "https://openreview.net/pdf?id=fkbMlfDBxm", "openreview": "https://openreview.net/forum?id=fkbMlfDBxm", "poster": "/media/PosterPDFs/NeurIPS%202024/94189.png?t=1731692515.4104776", "project": "", "author_site": "Chaoqi Chen, Luyao Tang, Hui Huang", "tldr": "", "abstract": "Since deep learning models are usually deployed in non-stationary environments, it is imperative to improve their robustness to out-of-distribution (OOD) data. A common approach to mitigate distribution shift is to regularize internal representations or predictors learned from in-distribution (ID) data to be domain invariant. Past studies have primarily learned pairwise invariances, ignoring the intrinsic structure and high-order dependencies of the data. Unlike machines, human recognizes objects by first dividing them into major components and then identifying the topological relation of these components. Motivated by this, we propose Reconstruct and Match (REMA), a general learning framework for object recognition tasks to endow deep models with the capability of capturing the topological homogeneity of objects without human prior knowledge or fine-grained annotations. To identify major components from objects, REMA introduces a selective slot-based reconstruction module to dynamically map dense pixels into a sparse and discrete set of slot vectors in an unsupervised manner. Then, to model high-order dependencies among these components, we propose a hypergraph-based relational reasoning module that models the intricate relations of nodes (slots) with structural constraints. Experiments on standard benchmarks show that REMA outperforms state-of-the-art methods in OOD generalization and test-time adaptation settings.", "keywords": "Out-of-Distribution;Domain Generalization;Test-Time Adaptation;Topological Homogeneity", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/0cd313ffc8ca7b64595d8f9054c807743cde3621.zip", "author": "Chaoqi Chen;Luyao Tang;Hui Huang", "authorids": "~Chaoqi_Chen2;~Luyao_Tang1;~Hui_Huang3", "gender": "M;M;", "homepage": ";https://lytang63.github.io/;https://vcc.tech/~huihuang", "dblp": "230/4601;163/8474;33/5763-4", "google_scholar": "https://scholar.google.com.hk/citations?user=MmUZ_AQAAAAJ;kKaYkMcAAAAJ;https://scholar.google.com.hk/citations?user=wjzkl3YAAAAJ", "orcid": ";;0000-0003-3212-0544", "linkedin": ";;", "or_profile": "~Chaoqi_Chen2;~Luyao_Tang1;~Hui_Huang3", "aff": "The University of Hong Kong;Xiamen University;Shenzhen University", "aff_domain": "hku.hk;xmu.edu.cn;szu.edu", "position": "PhD student;MS student;Full Professor", "bibtex": "@inproceedings{\nchen2024reconstruct,\ntitle={Reconstruct and Match: Out-of-Distribution Robustness via Topological Homogeneity},\nauthor={Chaoqi Chen and Luyao Tang and Hui Huang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=fkbMlfDBxm}\n}", "github": "", "reviewers": "rhAB;EaaQ;M7aK;jWEa", "pdf_size": 2034953, "rating": "5;6;6;7", "confidence": "2;2;3;2", "soundness": "2;3;3;4", "novelty": "2;3;3;4", "presentation": "2;3;3;2", "wc_summary": "103;49;45;138", "wc_strengths": "78;50;166;94", "wc_weaknesses": "89;1303;80;486", "wc_questions": "73;185;500;274", "wc_limitations": "1;96;31;8", "wc_review": "344;1683;822;1000", "wc_reply_reviewers": "27;66;43;0", "wc_reply_authors": "21;52;29;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 2.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 83.75, 38.80318930191177 ], "wc_strengths_avg": [ 97.0, 42.83689998120779 ], "wc_weaknesses_avg": [ 489.5, 497.46482287695477 ], "wc_questions_avg": [ 258.0, 156.8231488014445 ], "wc_limitations_avg": [ 34.0, 37.476659402887016 ], "wc_review_avg": [ 962.25, 480.3146754993022 ], "wc_reply_reviewers_avg": [ 34.0, 24.031229681395832 ], "wc_reply_authors_avg": [ 25.5, 18.607794065928395 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:wKiHL8e2YCwJ:scholar.google.com/&scioq=Reconstruct+and+Match:+Out-of-Distribution+Robustness+via+Topological+Homogeneity&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "hku.hk;xmu.edu.cn;szu.edu", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Hong Kong;Xiamen University;Shenzhen University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.hku.hk;https://www.xmu.edu.cn;https://www.szu.edu.cn", "aff_unique_abbr": "HKU;XMU;SZU", "aff_campus_unique_index": "0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Private Online Learning via Lazy Algorithms", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94188", "id": "fkf0OquD3Q", "proceeding": "", "pdf": "https://openreview.net/pdf?id=fkf0OquD3Q", "openreview": "https://openreview.net/forum?id=fkf0OquD3Q", "poster": "", "project": "", "author_site": "Hilal Asi, Tomer Koren, Daogao Liu, Kunal Talwar", "tldr": "", "abstract": "We study the problem of private online learning, specifically, online prediction from experts (OPE) and online convex optimization (OCO). \n We propose a new transformation that transforms lazy online learning algorithms into private algorithms. We apply our transformation for differentially private OPE and OCO using existing lazy algorithms for these problems. Our final algorithms obtain regret which significantly improves the regret in the high privacy regime $\\varepsilon \\ll 1$, obtaining $\\sqrt{T \\log d} + T^{1/3} \\log(d)/\\varepsilon^{2/3}$ for DP-OPE and $\\sqrt{T} + T^{1/3} \\sqrt{d}/\\varepsilon^{2/3}$ for DP-OCO. We also complement our results with a lower bound for DP-OPE, showing that these rates are optimal for a natural family of low-switching private algorithms.", "keywords": "Differential Privacy;Online Optimization", "primary_area": "privacy", "supplementary_material": "", "author": "Hilal Asi;Tomer Koren;Daogao Liu;Kunal Talwar", "authorids": "~Hilal_Asi1;~Tomer_Koren1;~Daogao_Liu1;~Kunal_Talwar1", "gender": "M;M;M;M", "homepage": "http://web.stanford.edu/~asi/;https://tomerkoren.github.io;https://daogaoliu.github.io/;http://www.kunaltalwar.org", "dblp": ";12/10044;245/4078;06/3696", "google_scholar": "QGcz9-kAAAAJ;wGG1voYAAAAJ;auA3AaQAAAAJ;XD_01h8AAAAJ", "orcid": ";;;", "linkedin": ";;;kunal-talwar-128a6159", "or_profile": "~Hilal_Asi1;~Tomer_Koren1;~Daogao_Liu1;~Kunal_Talwar1", "aff": "Apple;Tel Aviv University;University of Washington, Seattle;Apple", "aff_domain": "apple.com;tau.ac.il;uw.edu;apple.com", "position": "Researcher;Associate Professor;PhD student;Research Scientist", "bibtex": "@inproceedings{\nasi2024private,\ntitle={Private Online Learning via Lazy Algorithms},\nauthor={Hilal Asi and Tomer Koren and Daogao Liu and Kunal Talwar},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=fkf0OquD3Q}\n}", "github": "", "reviewers": "MEJp;rLS6;FJx4;djv7", "pdf_size": 452876, "rating": "6;6;6;7", "confidence": "4;3;3;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;4", "wc_summary": "80;102;57;68", "wc_strengths": "35;71;87;41", "wc_weaknesses": "48;20;96;63", "wc_questions": "142;51;107;60", "wc_limitations": "2;13;1;1", "wc_review": "307;257;348;233", "wc_reply_reviewers": "16;16;0;14", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 76.75, 16.69393602479655 ], "wc_strengths_avg": [ 58.5, 21.37171027316251 ], "wc_weaknesses_avg": [ 56.75, 27.416919958303122 ], "wc_questions_avg": [ 90.0, 36.78994427829431 ], "wc_limitations_avg": [ 4.25, 5.0682837331783235 ], "wc_review_avg": [ 286.25, 44.53860684844105 ], "wc_reply_reviewers_avg": [ 11.5, 6.689544080129826 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3312437923797300011&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 5, "email": "apple.com;tau.ac.il;uw.edu;apple.com", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Apple;Tel Aviv University;University of Washington", "aff_unique_dep": "Apple Inc.;;", "aff_unique_url": "https://www.apple.com;https://www.tau.ac.il;https://www.washington.edu", "aff_unique_abbr": "Apple;TAU;UW", "aff_campus_unique_index": "1", "aff_campus_unique": ";Seattle", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;Israel" }, { "title": "Active Sequential Posterior Estimation for Sample-Efficient Simulation-Based Inference", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94187", "id": "fkuseU0nJs", "proceeding": "", "pdf": "https://openreview.net/pdf?id=fkuseU0nJs", "openreview": "https://openreview.net/forum?id=fkuseU0nJs", "poster": "/media/PosterPDFs/NeurIPS%202024/94187.png?t=1733894949.4268935", "project": "", "author_site": "Sam Griesemer, Defu Cao, Zijun Cui, Carolina Osorio, Yan Liu", "tldr": "", "abstract": "Computer simulations have long presented the exciting possibility of scientific insight into complex real-world processes. Despite the power of modern computing, however, it remains challenging to systematically perform inference under simulation models. This has led to the rise of simulation-based inference (SBI), a class of machine learning-enabled techniques for approaching inverse problems with stochastic simulators. Many such methods, however, require large numbers of simulation samples and face difficulty scaling to high-dimensional settings, often making inference prohibitive under resource-intensive simulators. To mitigate these drawbacks, we introduce active sequential neural posterior estimation (ASNPE). ASNPE brings an active learning scheme into the inference loop to estimate the utility of simulation parameter candidates to the underlying probabilistic model. The proposed acquisition scheme is easily integrated into existing posterior estimation pipelines, allowing for improved sample efficiency with low computational overhead. We further demonstrate the effectiveness of the proposed method in the travel demand calibration setting, a high-dimensional inverse problem commonly requiring computationally expensive traffic simulators. Our method outperforms well-tuned benchmarks and state-of-the-art posterior estimation methods on a large-scale real-world traffic network, as well as demonstrates a performance advantage over non-active counterparts on a suite of SBI benchmark environments.", "keywords": "simulation-based inference;likelihood-free inference;active learning;Bayesian methods", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Sam Griesemer;Defu Cao;Zijun Cui;Carolina Osorio;Yan Liu", "authorids": "~Sam_Griesemer1;~Defu_Cao1;~Zijun_Cui1;~Carolina_Osorio1;~Yan_Liu1", "gender": "M;M;;;F", "homepage": "https://samgriesemer.com;https://idevede.github.io/;https://zijunjkl.github.io/;;http://www-bcf.usc.edu/~liu32/", "dblp": "317/5153;274/1535;266/4675;;150/4295", "google_scholar": "fBZ3_FsAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;;UUKLPMYAAAAJ", "orcid": "0009-0009-0192-9405;0000-0003-0240-3818;0000-0002-4362-197X;;0000-0002-7055-9518", "linkedin": ";;;;", "or_profile": "~Sam_Griesemer1;~Defu_Cao1;~Zijun_Cui1;~Carolina_Osorio1;~Yan_Liu1", "aff": "University of Southern California;University of Southern California;University of Southern California;;University of Southern California", "aff_domain": "usc.edu;usc.edu;usc.edu;;usc.edu", "position": "PhD student;PhD student;Postdoc;;Professor", "bibtex": "@inproceedings{\ngriesemer2024active,\ntitle={Active Sequential Posterior Estimation for Sample-Efficient Simulation-Based Inference},\nauthor={Sam Griesemer and Defu Cao and Zijun Cui and Carolina Osorio and Yan Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=fkuseU0nJs}\n}", "github": "", "reviewers": "JwDn;S6Jd;s4rF;GNuF;jL6p;garc", "pdf_size": 4265070, "rating": "4;5;6;6;7;8", "confidence": "3;4;4;3;3;3", "soundness": "2;2;2;3;3;3", "novelty": "2;2;2;3;3;3", "presentation": "2;1;3;4;3;3", "wc_summary": "98;48;97;184;64;50", "wc_strengths": "136;39;85;53;168;88", "wc_weaknesses": "516;395;154;114;245;230", "wc_questions": "10;102;201;202;18;84", "wc_limitations": "30;11;1;64;13;50", "wc_review": "790;595;538;617;508;502", "wc_reply_reviewers": "156;335;66;157;583;145", "wc_reply_authors": "349;347;26;79;422;168", "reply_reviewers": "1;2;1;1;1;1", "reply_authors": "4;2;2;2;2;2", "rating_avg": [ 6.0, 1.2909944487358056 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.6666666666666665, 0.9428090415820634 ], "wc_summary_avg": [ 90.16666666666667, 46.52747097742962 ], "wc_strengths_avg": [ 94.83333333333333, 44.83085495008494 ], "wc_weaknesses_avg": [ 275.6666666666667, 139.02357913517966 ], "wc_questions_avg": [ 102.83333333333333, 77.07229650716845 ], "wc_limitations_avg": [ 28.166666666666668, 22.46045908306912 ], "wc_review_avg": [ 591.6666666666666, 98.2423307722061 ], "wc_reply_reviewers_avg": [ 240.33333333333334, 173.16626563957414 ], "wc_reply_authors_avg": [ 231.83333333333334, 148.8594601927902 ], "reply_reviewers_avg": [ 1.1666666666666667, 0.3726779962499649 ], "reply_authors_avg": [ 2.3333333333333335, 0.7453559924999298 ], "replies_avg": [ 31, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.2738612787525831, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10189814048571382538&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "usc.edu;usc.edu;usc.edu;;usc.edu", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Southern California", "aff_unique_dep": "", "aff_unique_url": "https://www.usc.edu", "aff_unique_abbr": "USC", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Localized Adaptive Risk Control", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94186", "id": "fogJgrozu1", "proceeding": "", "pdf": "https://openreview.net/pdf?id=fogJgrozu1", "openreview": "https://openreview.net/forum?id=fogJgrozu1", "poster": "/media/PosterPDFs/NeurIPS%202024/94186.png?t=1731079902.3976994", "project": "", "author_site": "Matteo Zecchin, Osvaldo Simeone", "tldr": "", "abstract": "Adaptive Risk Control (ARC) is an online calibration strategy based on set prediction that offers worst-case deterministic long-term risk control, as well as statistical marginal coverage guarantees. ARC adjusts the size of the prediction set by varying a single scalar threshold based on feedback from past decisions. In this work, we introduce Localized Adaptive Risk Control (L-ARC), an online calibration scheme that targets statistical localized risk guarantees ranging from conditional risk to marginal risk, while preserving the worst-case performance of ARC. L-ARC updates a threshold function within a reproducing kernel Hilbert space (RKHS), with the kernel determining the level of localization of the statistical risk guarantee. The theoretical results highlight a trade-off between localization of the statistical risk and convergence speed to the long-term risk target. Thanks to localization, L-ARC is demonstrated via experiments to produce prediction sets with risk guarantees across different data subpopulations, significantly improving the fairness of the calibrated model for tasks such as image segmentation and beam selection in wireless networks.", "keywords": "online conformal risk control;uncertainty quantification;conformal predictions", "primary_area": "online_learning", "supplementary_material": "/attachment/3380b95f265545a4a05d7df405a7820dfd6b3797.zip", "author": "Matteo Zecchin;Osvaldo Simeone", "authorids": "~Matteo_Zecchin1;~Osvaldo_Simeone2", "gender": "M;M", "homepage": "https://www.kcl.ac.uk/people/matteo-zecchin;https://nms.kcl.ac.uk/osvaldo.simeone/index.htm", "dblp": "271/4669;", "google_scholar": "cV3oJxgAAAAJ;https://scholar.google.co.uk/citations?user=m1xeKH4AAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Matteo_Zecchin1;~Osvaldo_Simeone2", "aff": "King's College London, University of London;King's College London", "aff_domain": "kcl.ac.uk;kcl.ac.uk", "position": "Postdoc;Full Professor", "bibtex": "@inproceedings{\nzecchin2024localized,\ntitle={Localized Adaptive Risk Control},\nauthor={Matteo Zecchin and Osvaldo Simeone},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=fogJgrozu1}\n}", "github": "", "reviewers": "tGoZ;gQ5F;ydRn;LXXA;nFrT", "pdf_size": 3317455, "rating": "4;5;5;6;7", "confidence": "4;3;3;4;3", "soundness": "2;3;2;3;3", "novelty": "2;3;2;3;3", "presentation": "2;3;2;2;3", "wc_summary": "87;39;52;352;84", "wc_strengths": "33;48;41;202;59", "wc_weaknesses": "341;201;71;234;58", "wc_questions": "4;2;3;209;73", "wc_limitations": "1;1;1;12;21", "wc_review": "466;291;168;1009;295", "wc_reply_reviewers": "217;9;9;21;9", "wc_reply_authors": "223;41;25;20;15", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 5.4, 1.0198039027185568 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 122.8, 116.06446484605009 ], "wc_strengths_avg": [ 76.6, 63.27906446843221 ], "wc_weaknesses_avg": [ 181.0, 105.86595297828288 ], "wc_questions_avg": [ 58.2, 80.12839696387293 ], "wc_limitations_avg": [ 7.2, 8.109253973085318 ], "wc_review_avg": [ 445.8, 297.1460247083915 ], "wc_reply_reviewers_avg": [ 53.0, 82.13160171334783 ], "wc_reply_authors_avg": [ 64.8, 79.57989695896822 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.32025630761017426, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13022533543130451960&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 5, "email": "kcl.ac.uk;kcl.ac.uk", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "King's College London", "aff_unique_dep": "", "aff_unique_url": "https://www.kcl.ac.uk", "aff_unique_abbr": "KCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "title": "Theoretical Characterisation of the Gauss Newton Conditioning in Neural Networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94185", "id": "fpOnUMjLiO", "proceeding": "", "pdf": "https://openreview.net/pdf?id=fpOnUMjLiO", "openreview": "https://openreview.net/forum?id=fpOnUMjLiO", "poster": "/media/PosterPDFs/NeurIPS%202024/94185.png?t=1733805711.2879822", "project": "", "author_site": "Jim Zhao, Sidak Pal Singh, Aurelien Lucchi", "tldr": "", "abstract": "The Gauss-Newton (GN) matrix plays an important role in machine learning, most evident in its use as a preconditioning matrix for a wide family of popular adaptive methods to speed up optimization. Besides, it can also provide key insights into the optimization landscape of neural networks. \nIn the context of deep neural networks, understanding the GN matrix involves studying the interaction between different weight matrices as well as the dependencies introduced by the data, thus rendering its analysis challenging.\nIn this work, we take a first step towards theoretically characterizing the conditioning of the GN matrix in neural networks. We establish tight bounds on the condition number of the GN in deep linear networks of arbitrary depth and width, which we also extend to two-layer ReLU networks.\nWe expand the analysis to further architectural components, such as residual connections and convolutional layers. \nFinally, we empirically validate the bounds and uncover valuable insights into the influence of the analyzed architectural components.", "keywords": "conditioning;gradient outer product;Gauss-Newton matrix;optimization landscape", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/63ace01bcbd675573ad4e57666e767021b2d634a.zip", "author": "Jim Zhao;Sidak Pal Singh;Aurelien Lucchi", "authorids": "~Jim_Zhao2;~Sidak_Pal_Singh1;~Aurelien_Lucchi1", "gender": "M;;M", "homepage": "https://jydzh.altervista.org;http://sidakpal.com/;http://people.inf.ethz.ch/alucchi/", "dblp": ";189/9168;14/5780", "google_scholar": "GAxXyUUAAAAJ;c59mPS4AAAAJ;https://scholar.google.ch/citations?user=V1ONSgIAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Jim_Zhao2;~Sidak_Pal_Singh1;~Aurelien_Lucchi1", "aff": "University of Basel;Max Planck Institute for Intelligent Systems;University of Basel", "aff_domain": "unibas.ch;tuebingen.mpg.de;unibas.ch", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nzhao2024theoretical,\ntitle={Theoretical Characterisation of the Gauss Newton Conditioning in Neural Networks},\nauthor={Jim Zhao and Sidak Pal Singh and Aurelien Lucchi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=fpOnUMjLiO}\n}", "github": "", "reviewers": "oXX9;6bdM;NTTB;ewWf", "pdf_size": 1043002, "rating": "5;6;6;8", "confidence": "4;4;3;2", "soundness": "3;2;3;4", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "58;164;72;103", "wc_strengths": "67;75;65;96", "wc_weaknesses": "263;222;128;97", "wc_questions": "19;169;72;69", "wc_limitations": "38;23;5;23", "wc_review": "445;653;342;388", "wc_reply_reviewers": "137;51;34;196", "wc_reply_authors": "0;745;511;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;2;2;1", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 99.25, 40.776065283447835 ], "wc_strengths_avg": [ 75.75, 12.275483697190918 ], "wc_weaknesses_avg": [ 177.5, 67.49259218610588 ], "wc_questions_avg": [ 82.25, 54.32943493172002 ], "wc_limitations_avg": [ 22.25, 11.691342951089922 ], "wc_review_avg": [ 457.0, 118.8970142602412 ], "wc_reply_reviewers_avg": [ 104.5, 65.69056248807739 ], "wc_reply_authors_avg": [ 314.0, 324.71602978602704 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.899228803025897, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:sBDS409RutoJ:scholar.google.com/&scioq=Theoretical+Characterisation+of+the+Gauss+Newton+Conditioning+in+Neural+Networks&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "unibas.ch;tuebingen.mpg.de;unibas.ch", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Basel;Max Planck Institute for Intelligent Systems", "aff_unique_dep": ";Intelligent Systems", "aff_unique_url": "https://www.unibas.ch;https://www.mpi-is.mpg.de", "aff_unique_abbr": "UniBas;MPI-IS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Switzerland;Germany" }, { "title": "Differentiable Modal Synthesis for Physical Modeling of Planar String Sound and Motion Simulation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94184", "id": "fpxRpPbF1t", "proceeding": "", "pdf": "https://openreview.net/pdf?id=fpxRpPbF1t", "openreview": "https://openreview.net/forum?id=fpxRpPbF1t", "poster": "/media/PosterPDFs/NeurIPS%202024/94184.png?t=1732832880.2528358", "project": "", "author_site": "Jin Woo Lee, Jaehyun Park, Min Jun Choi, Kyogu Lee", "tldr": "", "abstract": "While significant advancements have been made in music generation and differentiable sound synthesis within machine learning and computer audition, the simulation of instrument vibration guided by physical laws has been underexplored. To address this gap, we introduce a novel model for simulating the spatio-temporal motion of nonlinear strings, integrating modal synthesis and spectral modeling within a neural network framework. Our model leverages mechanical properties and fundamental frequencies as inputs, outputting string states across time and space that solve the partial differential equation characterizing the nonlinear string. Empirical evaluations demonstrate that the proposed architecture achieves superior accuracy in string motion simulation compared to existing baseline architectures. The code and demo are available online.", "keywords": "Differentiable Audio Signal Processing;Physical Modeling;Musical Sound Synthesis;Physical Simulation", "primary_area": "speech_and_audio", "supplementary_material": "", "author": "Jin Woo Lee;Jaehyun Park;Min Jun Choi;Kyogu Lee", "authorids": "~Jin_Woo_Lee1;~Jaehyun_Park5;~Min_Jun_Choi1;~Kyogu_Lee1", "gender": "M;M;M;M", "homepage": "http://jnwoo.com;https://snu-marg.notion.site/snu-marg/MARG-091390162ca941f4b88f64d47d2c4e87;https://marg.snu.ac.kr;http://marg.snu.ac.kr", "dblp": "06/3971-1.html;;253/2295;85/6128", "google_scholar": "ioNhgC8AAAAJ;;nJwn5S0AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0009-0006-5716-3611;;0009-0005-0012-9685;", "linkedin": "jin-woo-lee-8867101b9/;;;", "or_profile": "~Jin_Woo_Lee1;~Jaehyun_Park5;~Min_Jun_Choi1;~Kyogu_Lee1", "aff": "Seoul National University;Seoul National University;Seoul National University;Seoul National University", "aff_domain": "snu.ac.kr;snu.ac.kr;snu.ac.kr;snu.ac.kr", "position": "PhD student;PhD student;Undergrad student;Professor", "bibtex": "@inproceedings{\nlee2024differentiable,\ntitle={Differentiable Modal Synthesis for Physical Modeling of Planar String Sound and Motion Simulation},\nauthor={Jin Woo Lee and Jaehyun Park and Min Jun Choi and Kyogu Lee},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=fpxRpPbF1t}\n}", "github": "", "reviewers": "ygxN;Hh9k;gawP", "pdf_size": 2628506, "rating": "5;6;7", "confidence": "3;2;2", "soundness": "3;2;3", "novelty": "3;2;3", "presentation": "1;2;4", "wc_summary": "48;67;29", "wc_strengths": "41;34;64", "wc_weaknesses": "355;89;158", "wc_questions": "3;4;5", "wc_limitations": "29;7;11", "wc_review": "476;201;267", "wc_reply_reviewers": "18;25;17", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 2.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 1.247219128924647 ], "wc_summary_avg": [ 48.0, 15.513435037626794 ], "wc_strengths_avg": [ 46.333333333333336, 12.814921857827391 ], "wc_weaknesses_avg": [ 200.66666666666666, 112.70709333883511 ], "wc_questions_avg": [ 4.0, 0.816496580927726 ], "wc_limitations_avg": [ 15.666666666666666, 9.568466729604882 ], "wc_review_avg": [ 314.6666666666667, 117.21869399640241 ], "wc_reply_reviewers_avg": [ 20.0, 3.559026084010437 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7598659574691505314&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "snu.ac.kr;snu.ac.kr;snu.ac.kr;snu.ac.kr", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Seoul National University", "aff_unique_dep": "", "aff_unique_url": "https://www.snu.ac.kr", "aff_unique_abbr": "SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Value Imprint: A Technique for Auditing the Human Values Embedded in RLHF Datasets", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97583", "id": "fq7WmnJ3iV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=fq7WmnJ3iV", "openreview": "https://openreview.net/forum?id=fq7WmnJ3iV", "poster": "/media/PosterPDFs/NeurIPS%202024/97583.png?t=1734044927.6326592", "project": "", "author_site": "Ike Obi, Rohan Pant, Srishti Shekhar Agrawal, Maham Ghazanfar, Aaron Basiletti", "tldr": "", "abstract": "LLMs are increasingly fine-tuned using RLHF datasets to align them with human preferences and values. However, very limited research has investigated which specific human values are operationalized through these datasets. In this paper, we introduce Value Imprint, a framework for auditing and classifying the human values embedded within RLHF datasets. To investigate the viability of this framework, we conducted three case study experiments by auditing the Anthropic/hh-rlhf, OpenAI WebGPT Comparisons, and Alpaca GPT-4-LLM datasets to examine the human values embedded within them. Our analysis involved a two-phase process. During the first phase, we developed a taxonomy of human values through an integrated review of prior works from philosophy, axiology, and ethics. Then, we applied this taxonomy to annotate 6,501 RLHF preferences. During the second phase, we employed the labels generated from the annotation as ground truth data for training a transformer-based machine learning model to audit and classify the three RLHF datasets. Through this approach, we discovered that information-utility values, including Wisdom/Knowledge and Information Seeking, were the most dominant human values within all three RLHF datasets. In contrast, prosocial and democratic values, including Well-being, Justice, and Human/Animal Rights, were the least represented human values. These findings have significant implications for developing language models that align with societal values and norms. We contribute our datasets to support further research in this area. https://github.com/hv-rsrch/valueimprint", "keywords": "Datasets;RLHF;Human Values;LLMs;AI", "primary_area": "", "supplementary_material": "", "author": "Ike Obi;Rohan Pant;Srishti Shekhar Agrawal;Maham Ghazanfar;Aaron Basiletti", "authorids": "~Ike_Obi1;~Rohan_Pant1;~Srishti_Shekhar_Agrawal1;~Maham_Ghazanfar1;~Aaron_Basiletti1", "gender": ";M;F;F;M", "homepage": ";;https://www.srishtisagrawal.com;;https://aaronbasiletti.weebly.com/", "dblp": ";;;;", "google_scholar": ";;;;", "orcid": ";;0009-0006-8745-6271;;", "linkedin": ";rohan-pant/;srishti-s-agrawal;maham-ghazanfar/;aaron-basiletti/", "or_profile": "~Ike_Obi1;~Rohan_Pant1;~Srishti_Shekhar_Agrawal1;~Maham_Ghazanfar1;~Aaron_Basiletti1", "aff": ";Purdue University;Purdue University;Purdue University;Purdue University", "aff_domain": ";purdue.edu;purdue.edu;purdue.edu;purdue.edu", "position": ";MS student;MS student;MS student;Undergrad student", "bibtex": "@inproceedings{\nobi2024value,\ntitle={Value Imprint: A Technique for Auditing the Human Values Embedded in {RLHF} Datasets},\nauthor={Ike Obi and Rohan Pant and Srishti Shekhar Agrawal and Maham Ghazanfar and Aaron Basiletti},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=fq7WmnJ3iV}\n}", "github": "", "reviewers": "rChu;rTbf;fZFM;zqWr", "pdf_size": 1223462, "rating": "3;6;7;8", "confidence": "4;4;4;5", "wc_summary_and_contributions": "59;48;118;37", "wc_strengths": "2;38;52;53", "wc_improvement": "14;133;386;192", "wc_limitations": "1;1;29;15", "wc_correctness": "1;12;18;23", "wc_clarity": "2;6;26;10", "wc_relation_to_prior_work": "1;1;14;9", "wc_documentation": "1;1;51;10", "wc_additional_feedback": "1;1;1;1", "wc_review": "82;241;695;350", "wc_reply_reviewers": "0;7;70;0", "wc_reply_authors": "0;0;60;0", "reply_reviewers": "0;1;2;0", "reply_authors": "3;1;5;1", "rating_avg": [ 6.0, 1.8708286933869707 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 65.5, 31.2929704566377 ], "wc_strengths_avg": [ 36.25, 20.64430914319973 ], "wc_improvement_avg": [ 181.25, 134.47931997151085 ], "wc_limitations_avg": [ 11.5, 11.6081867662439 ], "wc_correctness_avg": [ 13.5, 8.200609733428363 ], "wc_clarity_avg": [ 11.0, 9.1104335791443 ], "wc_relation_to_prior_work_avg": [ 6.25, 5.539629951540085 ], "wc_documentation_avg": [ 15.75, 20.680606857633556 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 342.0, 224.98555509187696 ], "wc_reply_reviewers_avg": [ 19.25, 29.439556722206262 ], "wc_reply_authors_avg": [ 15.0, 25.98076211353316 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.5, 1.6583123951777 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.6172133998483676, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8480822050812950833&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": ";purdue.edu;purdue.edu;purdue.edu;purdue.edu", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Purdue University", "aff_unique_dep": "", "aff_unique_url": "https://www.purdue.edu", "aff_unique_abbr": "Purdue", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Harmonizing Visual Text Comprehension and Generation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94183", "id": "fqjeKsHOVR", "proceeding": "", "pdf": "https://openreview.net/pdf?id=fqjeKsHOVR", "openreview": "https://openreview.net/forum?id=fqjeKsHOVR", "poster": "/media/PosterPDFs/NeurIPS%202024/94183.png?t=1731421337.0399978", "project": "", "author_site": "Zhen Zhao, Jingqun Tang, Binghong Wu, Chunhui Lin, Shu Wei, Hao Liu, Xin Tan, zhizhong zhang, Can Huang, Yuan Xie", "tldr": "", "abstract": "In this work, we present TextHarmony, a unified and versatile multimodal generative model proficient in comprehending and generating visual text. Simultaneously generating images and texts typically results in performance degradation due to the inherent inconsistency between vision and language modalities. To overcome this challenge, existing approaches resort to modality-specific data for supervised fine-tuning, necessitating distinct model instances. We propose Slide-LoRA, which dynamically aggregates modality-specific and modality-agnostic LoRA experts, partially decoupling the multimodal generation space. Slide-LoRA harmonizes the generation of vision and language within a singular model instance, thereby facilitating a more unified generative process. Additionally, we develop a high-quality image caption dataset, DetailedTextCaps-100K, synthesized with a sophisticated closed-source MLLM to enhance visual text generation capabilities further. Comprehensive experiments across various benchmarks demonstrate the effectiveness of the proposed approach. Empowered by Slide-LoRA, TextHarmony achieves comparable performance to modality-specific fine-tuning results with only a 2% increase in parameters and shows an average improvement of 2.5% in visual text comprehension tasks and 4.0% in visual text generation tasks. Our work delineates the viability of an integrated approach to multimodal generation within the visual text domain, setting a foundation for subsequent inquiries. Code is available at https://github.com/bytedance/TextHarmony.", "keywords": "multimodal generative model;visual text comprehension and generation;LoRA expert", "primary_area": "generative_models", "supplementary_material": "", "author": "Zhen Zhao;Jingqun Tang;Binghong Wu;Chunhui Lin;Shu Wei;Hao Liu;Xin Tan;zhizhong zhang;Can Huang;Yuan Xie", "authorids": "~Zhen_Zhao6;~Jingqun_Tang1;~Binghong_Wu1;~Chunhui_Lin1;~Shu_Wei2;~Hao_Liu15;~Xin_Tan2;~zhizhong_zhang1;~Can_Huang1;~Yuan_Xie5", "gender": "M;;M;M;F;M;M;M;M;", "homepage": "https://www.ecnu.edu.cn/;;;https://github.com/Linchunhui;https://github.com/weishu27;;https://tanxincs.github.io/;;;", "dblp": ";317/5539;236/1328;;;09/3214-3;89/6413-2;20/1541;;", "google_scholar": "mYW60xMAAAAJ;OxQXSioAAAAJ;66ry4nMAAAAJ;;;wFOk3PQAAAAJ;https://scholar.google.com/citations?hl=zh-CN;CXZciFAAAAAJ;;", "orcid": "0009-0004-7976-2629;;;;;;0000-0001-9346-1196;;0009-0006-9126-3069;", "linkedin": ";;%E7%A7%89%E6%B3%93-%E6%AD%A6-211300134/;;;;;;https://www.linkedin.cn/incareer/in/can-huang-3175b256;", "or_profile": "~Zhen_Zhao6;~Jingqun_Tang1;~Binghong_Wu1;~Chunhui_Lin1;~Shu_Wei2;~Hao_Liu15;~Xin_Tan2;~zhizhong_zhang1;~Can_Huang1;~Yuan_Xie5", "aff": "East China Normal University;Bytedance;Bytedance;Bytedance;ByteDance Inc.;Bytedance;East China Normal University;East China Normal University;Bytedance;", "aff_domain": "ecnu.edu.cn;bytedance.com;bytedance.com;bytedance.com;bytedance.com;bytedance.com;ecnu.edu.cn;ecnu.edu.cn;bytedance.com;", "position": "MS student;Researcher;Researcher;Researcher;Researcher;Researcher;Associate Professor;Associate Professor;Researcher;", "bibtex": "@inproceedings{\nzhao2024harmonizing,\ntitle={Harmonizing Visual Text Comprehension and Generation},\nauthor={Zhen Zhao and Jingqun Tang and Binghong Wu and Chunhui Lin and Shu Wei and Hao Liu and Xin Tan and zhizhong zhang and Can Huang and Yuan Xie},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=fqjeKsHOVR}\n}", "github": "", "reviewers": "2avc;SovZ;LmXP;r95i", "pdf_size": 4762188, "rating": "5;6;8;8", "confidence": "4;4;5;4", "soundness": "2;3;3;4", "novelty": "2;2;4;3", "presentation": "3;3;3;3", "wc_summary": "80;38;65;51", "wc_strengths": "43;80;51;60", "wc_weaknesses": "204;50;72;63", "wc_questions": "6;2;38;15", "wc_limitations": "10;2;4;2", "wc_review": "343;172;230;191", "wc_reply_reviewers": "73;14;167;34", "wc_reply_authors": "165;106;94;31", "reply_reviewers": "1;1;2;1", "reply_authors": "2;3;3;2", "rating_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 58.5, 15.660459763365825 ], "wc_strengths_avg": [ 58.5, 13.793114224133722 ], "wc_weaknesses_avg": [ 97.25, 62.126383284398585 ], "wc_questions_avg": [ 15.25, 13.953046262375826 ], "wc_limitations_avg": [ 4.5, 3.278719262151 ], "wc_review_avg": [ 234.0, 66.31364867054143 ], "wc_reply_reviewers_avg": [ 72.0, 58.80901291468851 ], "wc_reply_authors_avg": [ 99.0, 47.576254581461114 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.5555555555555555, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11509549809684930458&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "ecnu.edu.cn;bytedance.com;bytedance.com;bytedance.com;bytedance.com;bytedance.com;ecnu.edu.cn;ecnu.edu.cn;bytedance.com;", "author_num": 10, "aff_unique_index": "0;1;1;1;1;1;0;0;1", "aff_unique_norm": "East China Normal University;ByteDance", "aff_unique_dep": ";", "aff_unique_url": "http://www.ecnu.edu.cn;https://www.bytedance.com", "aff_unique_abbr": "ECNU;Bytedance", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Deep Learning for Computing Convergence Rates of Markov Chains", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94182", "id": "fqmSGK8C0B", "proceeding": "", "pdf": "https://openreview.net/pdf?id=fqmSGK8C0B", "openreview": "https://openreview.net/forum?id=fqmSGK8C0B", "poster": "/media/PosterPDFs/NeurIPS%202024/94182.png?t=1733358038.6173801", "project": "", "author_site": "Yanlin Qu, Jose Blanchet, Peter W Glynn", "tldr": "", "abstract": "Convergence rate analysis for general state-space Markov chains is fundamentally important in operations research (stochastic systems) and machine learning (stochastic optimization). This problem, however, is notoriously difficult because traditional analytical methods often do not generate practically useful convergence bounds for realistic Markov chains. We propose the Deep Contractive Drift Calculator (DCDC), the first general-purpose sample-based algorithm for bounding the convergence of Markov chains to stationarity in Wasserstein distance. The DCDC has two components. First, inspired by the new convergence analysis framework in (Qu et.al, 2023), we introduce the Contractive Drift Equation (CDE), the solution of which leads to an explicit convergence bound. Second, we develop an efficient neural-network-based CDE solver. Equipped with these two components, DCDC solves the CDE and converts the solution into a convergence bound. We analyze the sample complexity of the algorithm and further demonstrate the effectiveness of the DCDC by generating convergence bounds for realistic Markov chains arising from stochastic processing networks as well as constant step-size stochastic optimization.", "keywords": "Markov chains;convergence analysis;Wasserstein distance;neural networks;stochastic optimization", "primary_area": "probabilistic_methods", "supplementary_material": "/attachment/9a1d744102cb22a58b1be86386aca31bb1ed8ebe.zip", "author": "Yanlin Qu;Jose Blanchet;Peter Glynn", "authorids": "~Yanlin_Qu1;~Jose_Blanchet1;~Peter_Glynn2", "gender": "M;M;M", "homepage": "https://quyanlin.github.io;https://web.stanford.edu/~jblanche/;https://web.stanford.edu/~glynn/", "dblp": "285/8931;75/5093.html;", "google_scholar": ";https://scholar.google.co.in/citations?user=O24CcQQAAAAJ;", "orcid": ";;", "linkedin": ";jose-blanchet;", "or_profile": "~Yanlin_Qu1;~Jose_Blanchet1;~Peter_Glynn2", "aff": "Stanford University;Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu", "position": "PhD student;Professor;Full Professor", "bibtex": "@inproceedings{\nqu2024deep,\ntitle={Deep Learning for Computing Convergence Rates of Markov Chains},\nauthor={Yanlin Qu and Jose Blanchet and Peter Glynn},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=fqmSGK8C0B}\n}", "github": "", "reviewers": "3Pb4;wP6n;ituE", "pdf_size": 1743982, "rating": "6;8;8", "confidence": "3;3;3", "soundness": "3;4;3", "novelty": "3;4;3", "presentation": "2;4;3", "wc_summary": "67;77;60", "wc_strengths": "58;142;90", "wc_weaknesses": "156;10;99", "wc_questions": "133;1;214", "wc_limitations": "11;12;1", "wc_review": "425;242;464", "wc_reply_reviewers": "113;15;0", "wc_reply_authors": "77;0;0", "reply_reviewers": "1;1;0", "reply_authors": "2;1;1", "rating_avg": [ 7.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 68.0, 6.97614984548545 ], "wc_strengths_avg": [ 96.66666666666667, 34.61534662865912 ], "wc_weaknesses_avg": [ 88.33333333333333, 60.07957685899223 ], "wc_questions_avg": [ 116.0, 87.78382538941898 ], "wc_limitations_avg": [ 8.0, 4.96655480858378 ], "wc_review_avg": [ 377.0, 96.77809669548166 ], "wc_reply_reviewers_avg": [ 42.666666666666664, 50.108770578501414 ], "wc_reply_authors_avg": [ 25.666666666666668, 36.29814810090944 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:0KuJySXKs8oJ:scholar.google.com/&scioq=Deep+Learning+for+Computing+Convergence+Rates+of+Markov+Chains&hl=en&as_sdt=0,33", "gs_version_total": 5, "email": "stanford.edu;stanford.edu;stanford.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "SpikedAttention: Training-Free and Fully Spike-Driven Transformer-to-SNN Conversion with Winner-Oriented Spike Shift for Softmax Operation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94181", "id": "fs28jccJj5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=fs28jccJj5", "openreview": "https://openreview.net/forum?id=fs28jccJj5", "poster": "/media/PosterPDFs/NeurIPS%202024/94181.png?t=1730693847.2541857", "project": "", "author_site": "Sangwoo Hwang, Seunghyun Lee, Dahoon Park, Donghun Lee, Jaeha Kung", "tldr": "", "abstract": "Event-driven spiking neural networks(SNNs) are promising neural networks that reduce the energy consumption of continuously growing AI models. Recently, keeping pace with the development of transformers, transformer-based SNNs were presented. Due to the incompatibility of self-attention with spikes, however, existing transformer-based SNNs limit themselves by either restructuring self-attention architecture or conforming to non-spike computations. In this work, we propose a novel transformer-to-SNN conversion method that outputs an end-to-end spike-based transformer, named SpikedAttention. Our method directly converts the well-trained transformer without modifying its attention architecture. For the vision task, the proposed method converts Swin Transformer into an SNN without post-training or conversion-aware training, achieving state-of-the-art SNN accuracy on ImageNet dataset, i.e., 80.0\\% with 28.7M parameters. Considering weight accumulation, neuron potential update, and on-chip data movement, SpikedAttention reduces energy consumption by 42\\% compared to the baseline ANN, i.e., Swin-T. Furthermore, for the first time, we demonstrate that SpikedAttention successfully converts a BERT model to an SNN with only 0.3\\% accuracy loss on average consuming 58\\% less energy on GLUE benchmark. Our code is available at Github ( https://github.com/sangwoohwang/SpikedAttention ).", "keywords": "Spiking Neural Network;ANN-to-SNN conversion;Transformer;Neuromorphic", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "/attachment/606f73a136a3b977fa098cd9b1651a7c6328a19f.zip", "author": "Sangwoo Hwang;Seunghyun Lee;Dahoon Park;Donghun Lee;Jaeha Kung", "authorids": "~Sangwoo_Hwang1;~Seunghyun_Lee4;~Dahoon_Park1;~Donghun_Lee6;~Jaeha_Kung1", "gender": "M;M;M;M;M", "homepage": "https://idslab.korea.ac.kr/;http://r3c0d3r.com;https://github.com/pdh930105;https://sites.google.com/view/kuidslab;https://idslab.korea.ac.kr", "dblp": "296/1157.html;;;;", "google_scholar": "-K2oND4AAAAJ;https://scholar.google.com/citations?hl=en;;;dQvmSAoAAAAJ", "orcid": "0000-0001-8716-5390;0000-0002-1917-0873;;;", "linkedin": "sangwoo-hwang-070857223/;r3c0d3r/;;;", "or_profile": "~Sangwoo_Hwang1;~Seunghyun_Lee4;~Dahoon_Park1;~Donghun_Lee6;~Jaeha_Kung1", "aff": "Daegu Gyeongbuk Institute of Science and Technology;Korea University;Korea University;Chungnam National University;Korea University", "aff_domain": "dgist.ac.kr;korea.ac.kr;korea.ac.kr;cnu.ac.kr;korea.ac.kr", "position": "PhD student;PhD student;PhD student;Undergrad student;Associate Professor", "bibtex": "@inproceedings{\nhwang2024spikedattention,\ntitle={SpikedAttention: Training-Free and Fully Spike-Driven Transformer-to-{SNN} Conversion with Winner-Oriented Spike Shift for Softmax Operation},\nauthor={Sangwoo Hwang and Seunghyun Lee and Dahoon Park and Donghun Lee and Jaeha Kung},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=fs28jccJj5}\n}", "github": "", "reviewers": "uL88;xJgY;XrJp", "pdf_size": 1616679, "rating": "6;6;7", "confidence": "2;3;4", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "72;92;48", "wc_strengths": "81;119;50", "wc_weaknesses": "58;123;55", "wc_questions": "40;78;144", "wc_limitations": "58;8;1", "wc_review": "309;420;298", "wc_reply_reviewers": "9;20;51", "wc_reply_authors": "0;0;25", "reply_reviewers": "1;1;1", "reply_authors": "1;1;2", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 70.66666666666667, 17.987650084309387 ], "wc_strengths_avg": [ 83.33333333333333, 28.21740991342441 ], "wc_weaknesses_avg": [ 78.66666666666667, 31.372316175606517 ], "wc_questions_avg": [ 87.33333333333333, 42.967688118192044 ], "wc_limitations_avg": [ 22.333333333333332, 25.381533094401966 ], "wc_review_avg": [ 342.3333333333333, 55.10192575783738 ], "wc_reply_reviewers_avg": [ 26.666666666666668, 17.78263822446552 ], "wc_reply_authors_avg": [ 8.333333333333334, 11.785113019775793 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14505546012572775458&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "dgist.ac.kr;korea.ac.kr;korea.ac.kr;cnu.ac.kr;korea.ac.kr", "author_num": 5, "aff_unique_index": "0;1;1;2;1", "aff_unique_norm": "Daegu Gyeongbuk Institute of Science and Technology;Korea University;Chungnam National University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.dgist.ac.kr;https://www.korea.ac.kr;http://www.cnu.ac.kr", "aff_unique_abbr": "DGIST;KU;CNU", "aff_campus_unique_index": "0", "aff_campus_unique": "Daegu;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "DEX: Data Channel Extension for Efficient CNN Inference on Tiny AI Accelerators", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94180", "id": "ftqjwZQz10", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ftqjwZQz10", "openreview": "https://openreview.net/forum?id=ftqjwZQz10", "poster": "/media/PosterPDFs/NeurIPS%202024/94180.png?t=1732551577.314756", "project": "", "author_site": "Taesik Gong, Fahim Kawsar, Chulhong Min", "tldr": "", "abstract": "Tiny machine learning (TinyML) aims to run ML models on small devices and is increasingly favored for its enhanced privacy, reduced latency, and low cost. Recently, the advent of tiny AI accelerators has revolutionized the TinyML field by significantly enhancing hardware processing power. These accelerators, equipped with multiple parallel processors and dedicated per-processor memory instances, offer substantial performance improvements over traditional microcontroller units (MCUs). However, their limited data memory often necessitates downsampling input images, resulting in accuracy degradation. To address this challenge, we propose Data channel EXtension (DEX), a novel approach for efficient CNN execution on tiny AI accelerators. DEX incorporates additional spatial information from original images into input images through patch-wise even sampling and channel-wise stacking, effectively extending data across input channels. By leveraging underutilized processors and data memory for channel extension, DEX facilitates parallel execution without increasing inference latency. Our evaluation with four models and four datasets on tiny AI accelerators demonstrates that this simple idea improves accuracy on average by 3.5%p while keeping the inference latency the same on the AI accelerator. The source code is available at https://github.com/Nokia-Bell-Labs/data-channel-extension.", "keywords": "TinyML;On-device ML;CNN;AI accelerator;Microcontroller;MCU", "primary_area": "machine_vision", "supplementary_material": "", "author": "Taesik Gong;Fahim Kawsar;Chulhong Min", "authorids": "~Taesik_Gong1;~Fahim_Kawsar2;~Chulhong_Min1", "gender": "M;M;M", "homepage": "https://taesikgong.com/;http://www.fahim-kawsar.net/;https://chulhongmin.com", "dblp": "206/1779;;", "google_scholar": "bcmJw7AAAAAJ;;_tKsRSMAAAAJ", "orcid": "0000-0002-8967-3652;;", "linkedin": "taesik-gong-70a507a6;;", "or_profile": "~Taesik_Gong1;~Fahim_Kawsar2;~Chulhong_Min1", "aff": "Bell Labs;Bell Labs;Nokia Bell Labs", "aff_domain": "bell-labs.com;bell-labs.com;nokia-bell-labs.com", "position": "Researcher;Research Director;Principal Researcher", "bibtex": "@inproceedings{\ngong2024dex,\ntitle={{DEX}: Data Channel Extension for Efficient {CNN} Inference on Tiny {AI} Accelerators},\nauthor={Taesik Gong and Fahim Kawsar and Chulhong Min},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ftqjwZQz10}\n}", "github": "", "reviewers": "UAVo;ySD7;Zbte;k2BH", "pdf_size": 5322484, "rating": "5;5;6;6", "confidence": "3;4;4;3", "soundness": "3;2;3;3", "novelty": "2;2;3;3", "presentation": "3;3;3;3", "wc_summary": "94;76;72;122", "wc_strengths": "44;26;97;62", "wc_weaknesses": "43;95;33;111", "wc_questions": "203;50;37;85", "wc_limitations": "1;1;35;52", "wc_review": "385;248;274;432", "wc_reply_reviewers": "12;0;15;0", "wc_reply_authors": "40;0;20;0", "reply_reviewers": "1;0;1;0", "reply_authors": "2;1;2;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 91.0, 19.72308292331602 ], "wc_strengths_avg": [ 57.25, 26.242856170775315 ], "wc_weaknesses_avg": [ 70.5, 33.17755265235819 ], "wc_questions_avg": [ 93.75, 65.47279969575152 ], "wc_limitations_avg": [ 22.25, 22.083647796503186 ], "wc_review_avg": [ 334.75, 76.15567936798936 ], "wc_reply_reviewers_avg": [ 6.75, 6.832825184358224 ], "wc_reply_authors_avg": [ 15.0, 16.583123951777 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8269725433859397254&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 6, "email": "bell-labs.com;bell-labs.com;nokia-bell-labs.com", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Bell Labs;Nokia Bell Labs", "aff_unique_dep": ";", "aff_unique_url": "https://www.bell-labs.com;https://www.nokialabs.com", "aff_unique_abbr": "Bell Labs;Nokia Bell Labs", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Bigger, Regularized, Optimistic: scaling for compute and sample efficient continuous control", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94179", "id": "fu0xdh4aEJ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=fu0xdh4aEJ", "openreview": "https://openreview.net/forum?id=fu0xdh4aEJ", "poster": "/media/PosterPDFs/NeurIPS%202024/94179.png?t=1730061681.1516771", "project": "", "author_site": "Michal Nauman, Mateusz Ostaszewski, Krzysztof Jankowski, Piotr Mi\u0142o\u015b, Marek Cygan", "tldr": "", "abstract": "Sample efficiency in Reinforcement Learning (RL) has traditionally been driven by algorithmic enhancements. In this work, we demonstrate that scaling can also lead to substantial improvements. We conduct a thorough investigation into the interplay of scaling model capacity and domain-specific RL enhancements. These empirical findings inform the design choices underlying our proposed BRO (Bigger, Regularized, Optimistic) algorithm. The key innovation behind BRO is that strong regularization allows for effective scaling of the critic networks, which, paired with optimistic exploration, leads to superior performance. BRO achieves state-of-the-art results, significantly outperforming the leading model-based and model-free algorithms across 40 complex tasks from the DeepMind Control, MetaWorld, and MyoSuite benchmarks. BRO is the first model-free algorithm to achieve near-optimal policies in the notoriously challenging Dog and Humanoid tasks.", "keywords": "Machine Learning;Reinforcement Learning;Scaling", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Michal Nauman;Mateusz Ostaszewski;Krzysztof Jankowski;Piotr Mi\u0142o\u015b;Marek Cygan", "authorids": "~Michal_Nauman1;~Mateusz_Ostaszewski1;~Krzysztof_Jankowski1;~Piotr_Mi\u0142o\u015b1;~Marek_Cygan1", "gender": ";;M;;", "homepage": ";;;;", "dblp": ";;;208/0989.html;76/819", "google_scholar": ";;;Se68XecAAAAJ;df8TSy4AAAAJ", "orcid": ";;0009-0009-4111-2379;;", "linkedin": "michal-nauman/;;krzysztof-jankowski-b876571a7/;piotr-milos-4b02151/;marek-cygan-b9a316140/", "or_profile": "~Michal_Nauman1;~Mateusz_Ostaszewski1;~Krzysztof_Jankowski1;~Piotr_Mi\u0142o\u015b1;~Marek_Cygan1", "aff": "University of Warsaw;;University of Warsaw;IDEAS NCBR;Nomagic", "aff_domain": "mimuw.edu.pl;;mimuw.edu.pl;ideas-ncbr.pl;nomagic.ai", "position": "PhD student;;MS student;Researcher;Founder / CTO", "bibtex": "@inproceedings{\nnauman2024bigger,\ntitle={Bigger, Regularized, Optimistic: scaling for compute and sample efficient continuous control},\nauthor={Michal Nauman and Mateusz Ostaszewski and Krzysztof Jankowski and Piotr Mi{\\l}o{\\'s} and Marek Cygan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=fu0xdh4aEJ}\n}", "github": "", "reviewers": "TNcA;7YXC;zd6E", "pdf_size": 4388895, "rating": "7;7;8", "confidence": "4;4;4", "soundness": "3;3;3", "novelty": "3;4;4", "presentation": "2;3;4", "wc_summary": "72;102;112", "wc_strengths": "96;46;64", "wc_weaknesses": "353;36;68", "wc_questions": "42;78;19", "wc_limitations": "17;10;9", "wc_review": "580;272;272", "wc_reply_reviewers": "324;123;14", "wc_reply_authors": "46;177;0", "reply_reviewers": "1;1;1", "reply_authors": "2;2;1", "rating_avg": [ 7.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 95.33333333333333, 16.996731711975947 ], "wc_strengths_avg": [ 68.66666666666667, 20.677416559027762 ], "wc_weaknesses_avg": [ 152.33333333333334, 142.4928848126187 ], "wc_questions_avg": [ 46.333333333333336, 24.280765135299085 ], "wc_limitations_avg": [ 12.0, 3.559026084010437 ], "wc_review_avg": [ 374.6666666666667, 145.19259240363778 ], "wc_reply_reviewers_avg": [ 153.66666666666666, 128.40128071358512 ], "wc_reply_authors_avg": [ 74.33333333333333, 74.98592460514764 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7814067493029205847&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 4, "email": "mimuw.edu.pl;;mimuw.edu.pl;ideas-ncbr.pl;nomagic.ai", "author_num": 5, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "University of Warsaw;Institute for Development, Economic Analysis, and Simulation (IDEAS);Nomagic", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uw.edu.pl;https://www.ideas-ncbr.gov.pl;", "aff_unique_abbr": "UW;IDEAS;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Poland;" }, { "title": "Time-MMD: Multi-Domain Multimodal Dataset for Time Series Analysis", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97582", "id": "fuD0h4R1IL", "proceeding": "", "pdf": "https://openreview.net/pdf?id=fuD0h4R1IL", "openreview": "https://openreview.net/forum?id=fuD0h4R1IL", "poster": "/media/PosterPDFs/NeurIPS%202024/97582.png?t=1731375341.9370136", "project": "", "author_site": "Haoxin Liu, Shangqing Xu, Zhiyuan Zhao, Lingkai Kong, Harshavardhan Prabhakar Kamarthi, Aditya Sasanur, Megha Sharma, Jiaming Cui, Qingsong Wen, Chao Zhang, B. Aditya Prakash", "tldr": "", "abstract": "Time series data are ubiquitous across a wide range of real-world domains. While\nreal-world time series analysis (TSA) requires human experts to integrate numerical series data with multimodal domain-specific knowledge, most existing TSA\nmodels rely solely on numerical data, overlooking the significance of information beyond numerical series. This oversight is due to the untapped potential\nof textual series data and the absence of a comprehensive, high-quality multimodal dataset. To overcome this obstacle, we introduce Time-MMD, the first\nmulti-domain, multimodal time series dataset covering 9 primary data domains.\nTime-MMD ensures fine-grained modality alignment, eliminates data contamination, and provides high usability. Additionally, we develop MM-TSFlib, the\nfirst-cut multimodal time-series forecasting (TSF) library, seamlessly pipelining\nmultimodal TSF evaluations based on Time-MMD for in-depth analyses. Extensive\nexperiments conducted on Time-MMD through MM-TSFlib demonstrate significant performance enhancements by extending unimodal TSF to multimodality,\nevidenced by over 15% mean squared error reduction in general, and up to 40%\nin domains with rich textual data. More importantly, our datasets and library\nrevolutionize broader applications, impacts, research topics to advance TSA. The\ndataset is available at https://github.com/AdityaLab/Time-MMD.", "keywords": "Time Series Analysis;Multimodality;Large Language Models", "primary_area": "", "supplementary_material": "", "author": "Haoxin Liu;Shangqing Xu;Zhiyuan Zhao;Lingkai Kong;Harshavardhan Kamarthi;Aditya B. Sasanur;Megha Sharma;Jiaming Cui;Qingsong Wen;Chao Zhang;B. Aditya Prakash", "authorids": "~Haoxin_Liu3;~Shangqing_Xu1;~Zhiyuan_Zhao1;~Lingkai_Kong1;~Harshavardhan_Kamarthi1;~Aditya_B._Sasanur1;~Megha_Sharma2;~Jiaming_Cui1;~Qingsong_Wen2;~Chao_Zhang15;~B._Aditya_Prakash2", "gender": ";M;M;M;M;M;F;;;;", "homepage": ";https://sigmatsing.github.io;https://leozhao1997.github.io/;https://lingkai-kong.com/;https://www.harsha-pk.com;https://www.linkedin.com/in/aditya-sasanur;https://sites.google.com/view/meghasharmaa;https://sites.cc.gatech.edu/~jcui75/index.html;;http://chaozhang.org/;https://www.cc.gatech.edu/~badityap/", "dblp": ";275/7602;;20/10253;245/8927;;;;;94/3019-14;06/3956", "google_scholar": ";LxyfYAUAAAAJ;TzWPFmwAAAAJ;https://scholar.google.com/citations?hl=en;LNXEjT8AAAAJ;;;;;https://scholar.google.com/citations?hl=en;C-NftTgAAAAJ", "orcid": ";;0009-0005-6671-705x;0000-0001-6480-513X;0000-0002-2901-7127;;;;;0000-0003-3009-598X;0000-0002-3252-455X", "linkedin": ";shangqing-xu-08735a258/;;;harshavardhan-kamarthi-462928112/;;meghasharma97/;;;;", "or_profile": "~Haoxin_Liu3;~Shangqing_Xu1;~Zhiyuan_Zhao1;~Lingkai_Kong1;~Harshavardhan_Kamarthi1;~Aditya_B._Sasanur1;~Megha_Sharma2;~Jiaming_Cui1;~Qingsong_Wen2;~Chao_Zhang15;~B._Aditya_Prakash2", "aff": ";College of Computing, Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology;;Georgia Institute of Technology;Georgia Institute of Technology", "aff_domain": ";cc.gatech.edu;gatech.edu;gatech.edu;gatech.edu;gatech.edu;gatech.edu;gatech.edu;;gatech.edu;gatech.edu", "position": ";MS student;PhD student;PhD student;PhD student;MS student;MS student;PhD student;;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nliu2024timemmd,\ntitle={Time-{MMD}: Multi-Domain Multimodal Dataset for Time Series Analysis},\nauthor={Haoxin Liu and Shangqing Xu and Zhiyuan Zhao and Lingkai Kong and Harshavardhan Kamarthi and Aditya B. Sasanur and Megha Sharma and Jiaming Cui and Qingsong Wen and Chao Zhang and B. Aditya Prakash},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=fuD0h4R1IL}\n}", "github": "", "reviewers": "otUR;HR7m;mAU8;BWHX", "pdf_size": 1166587, "rating": "6;6;7;8", "confidence": "4;4;3;5", "wc_summary_and_contributions": "46;66;47;115", "wc_strengths": "25;76;19;203", "wc_improvement": "219;2;27;73", "wc_limitations": "15;253;7;35", "wc_correctness": "54;4;13;16", "wc_clarity": "9;6;8;1", "wc_relation_to_prior_work": "14;65;8;1", "wc_documentation": "18;24;17;63", "wc_additional_feedback": "1;1;1;1", "wc_review": "401;497;147;508", "wc_reply_reviewers": "35;84;15;12", "wc_reply_authors": "0;0;39;0", "reply_reviewers": "1;1;1;1", "reply_authors": "4;4;5;2", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 68.5, 28.00446392988089 ], "wc_strengths_avg": [ 80.75, 73.97423537962389 ], "wc_improvement_avg": [ 80.25, 84.05764391178235 ], "wc_limitations_avg": [ 77.5, 101.8368793708841 ], "wc_correctness_avg": [ 21.75, 19.13602623325961 ], "wc_clarity_avg": [ 6.0, 3.082207001484488 ], "wc_relation_to_prior_work_avg": [ 22.0, 25.248762345905195 ], "wc_documentation_avg": [ 30.5, 18.953891421024867 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 388.25, 145.37086193594644 ], "wc_reply_reviewers_avg": [ 36.5, 28.81405906844782 ], "wc_reply_authors_avg": [ 9.75, 16.887495373796554 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.75, 1.0897247358851685 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.42640143271122083, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15972762123576209371&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": ";cc.gatech.edu;gatech.edu;gatech.edu;gatech.edu;gatech.edu;gatech.edu;gatech.edu;;gatech.edu;gatech.edu", "author_num": 11, "aff_unique_index": "0;0;0;0;0;0;0;0;0", "aff_unique_norm": "Georgia Institute of Technology", "aff_unique_dep": "College of Computing", "aff_unique_url": "https://www.gatech.edu", "aff_unique_abbr": "Georgia Tech", "aff_campus_unique_index": "0", "aff_campus_unique": "Atlanta;", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Back to the Continuous Attractor", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94178", "id": "fvG6ZHrH0B", "proceeding": "", "pdf": "https://openreview.net/pdf?id=fvG6ZHrH0B", "openreview": "https://openreview.net/forum?id=fvG6ZHrH0B", "poster": "/media/PosterPDFs/NeurIPS%202024/94178.png?t=1733761731.3556828", "project": "", "author_site": "\u00c1bel S\u00e1godi, Guillermo Mart\u00edn-S\u00e1nchez, Piotr Sokol, Memming Park", "tldr": "", "abstract": "Continuous attractors offer a unique class of solutions for storing continuous-valued variables in recurrent system states for indefinitely long time intervals.\nUnfortunately, continuous attractors suffer from severe structural instability in general---they are destroyed by most infinitesimal changes of the dynamical law that defines them.\nThis fragility limits their utility especially in biological systems as their recurrent dynamics are subject to constant perturbations.\nWe observe that the bifurcations from continuous attractors in theoretical neuroscience models display various structurally stable forms.\nAlthough their asymptotic behaviors to maintain memory are categorically distinct, their finite-time behaviors are similar.\nWe build on the persistent manifold theory to explain the commonalities between bifurcations from and approximations of continuous attractors.\nFast-slow decomposition analysis uncovers the existence of a persistent slow manifold that survives the seemingly destructive bifurcation, relating the flow within the manifold to the size of the perturbation. Moreover, this allows the bounding of the memory error of these approximations of continuous attractors.\nFinally, we train recurrent neural networks on analog memory tasks to support the appearance of these systems as solutions and their generalization capabilities.\nTherefore, we conclude that continuous attractors are functionally robust and remain useful as a universal analogy for understanding analog memory.", "keywords": "continuous attractors;robustness;fast-slow decomposition;generalization", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "", "author": "\u00c1bel S\u00e1godi;Guillermo Mart\u00edn-S\u00e1nchez;Piotr A Sokol;Il Memming Park", "authorids": "~\u00c1bel_S\u00e1godi1;~Guillermo_Mart\u00edn-S\u00e1nchez1;~Piotr_A_Sokol2;~Il_Memming_Park1", "gender": "M;Non-Binary;M;M", "homepage": ";;https://scholar.google.com/citations?user=MwySeOEAAAAJ&hl=en;http://catniplab.github.io/", "dblp": ";;228/9266;00/4652-2", "google_scholar": "-nSEXC0AAAAJ;;MwySeOEAAAAJ;CsmltusAAAAJ", "orcid": "0000-0002-1414-5062;0000-0002-5387-9579;;0000-0002-4255-7750", "linkedin": "asagodi/;;;memming/", "or_profile": "~\u00c1bel_S\u00e1godi1;~Guillermo_Mart\u00edn-S\u00e1nchez1;~Piotr_A_Sokol2;~Il_Memming_Park1", "aff": "Champalimaud Research;Champalimaud Foundation;;Champalimaud Centre for the Unknown", "aff_domain": "fchampalimaud.org;research.fchampalimaud.org;;fchampalimaud.org", "position": "PhD student;PhD student;;Associate Professor", "bibtex": "@inproceedings{\ns{\\'a}godi2024back,\ntitle={Back to the Continuous Attractor},\nauthor={{\\'A}bel S{\\'a}godi and Guillermo Mart{\\'\\i}n-S{\\'a}nchez and Piotr A Sokol and Il Memming Park},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=fvG6ZHrH0B}\n}", "github": "", "reviewers": "TfFn;xmvZ;CAVv;8M6u", "pdf_size": 9401934, "rating": "6;6;7;8", "confidence": "3;4;3;4", "soundness": "3;3;3;4", "novelty": "3;3;3;4", "presentation": "2;2;2;2", "wc_summary": "31;70;167;89", "wc_strengths": "131;44;93;43", "wc_weaknesses": "822;375;277;37", "wc_questions": "12;52;73;312", "wc_limitations": "5;87;79;1", "wc_review": "1001;628;689;482", "wc_reply_reviewers": "143;430;136;313", "wc_reply_authors": "38;740;45;758", "reply_reviewers": "1;3;2;3", "reply_authors": "2;3;2;3", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 89.25, 49.51956683978567 ], "wc_strengths_avg": [ 77.75, 36.79249244071404 ], "wc_weaknesses_avg": [ 377.75, 284.44100882256765 ], "wc_questions_avg": [ 112.25, 117.38904335584306 ], "wc_limitations_avg": [ 43.0, 40.124805295477756 ], "wc_review_avg": [ 700.0, 189.36076679185686 ], "wc_reply_reviewers_avg": [ 255.5, 123.17974671186818 ], "wc_reply_authors_avg": [ 395.25, 353.81589492276913 ], "reply_reviewers_avg": [ 2.25, 0.82915619758885 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16718134011186498879&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "fchampalimaud.org;research.fchampalimaud.org;;fchampalimaud.org", "author_num": 4, "aff_unique_index": "0;0;1", "aff_unique_norm": "Champalimaud Foundation;Champalimaud Centre for the Unknown", "aff_unique_dep": "Champalimaud Research;", "aff_unique_url": "https://www.champalimaud.org;https://www.champalimaud.org", "aff_unique_abbr": "CF;CCU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Portugal" }, { "title": "Diffusion Twigs with Loop Guidance for Conditional Graph Generation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94177", "id": "fvOCJAAYLx", "proceeding": "", "pdf": "https://openreview.net/pdf?id=fvOCJAAYLx", "openreview": "https://openreview.net/forum?id=fvOCJAAYLx", "poster": "/media/PosterPDFs/NeurIPS%202024/94177.png?t=1731690041.6526217", "project": "", "author_site": "Giangiacomo Mercatali, Yogesh Verma, Andre Freitas, Vikas Garg", "tldr": "", "abstract": "We introduce a novel score-based diffusion framework named Twigs that incorporates multiple co-evolving flows for enriching conditional generation tasks. Specifically, a central or trunk diffusion process is associated with a primary variable (e.g., graph structure), and additional offshoot or stem processes are dedicated to dependent variables (e.g., graph properties or labels). A new strategy, which we call loop guidance, effectively orchestrates the flow of information between the trunk and the stem processes during sampling. This approach allows us to uncover intricate interactions and dependencies, and unlock new generative capabilities. We provide extensive experiments to demonstrate strong performance gains of the proposed method over contemporary baselines in the context of conditional graph generation, underscoring the potential of Twigs in challenging generative tasks such as inverse molecular design and molecular optimization. \nCode is available at https://github.com/Aalto-QuML/Diffusion_twigs.", "keywords": "graph network;conditional generation;diffusion;generative models;molecule design;molecule optimization", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Giangiacomo Mercatali;Yogesh Verma;Andre Freitas;Vikas Garg", "authorids": "~Giangiacomo_Mercatali1;~Yogesh_Verma1;~Andre_Freitas1;~Vikas_Garg2", "gender": ";M;;", "homepage": ";https://yoverma.github.io/yoerma.github.io/;http://andrefreitas.org;", "dblp": ";284/2155;47/9409.html;", "google_scholar": ";9W9u4owAAAAJ;ExmHmMoAAAAJ;", "orcid": ";;;", "linkedin": ";yogeshverma1998/;andrefreitas/;", "or_profile": "~Giangiacomo_Mercatali1;~Yogesh_Verma1;~Andre_Freitas1;~Vikas_Garg2", "aff": ";Aalto University;University of Manchester;", "aff_domain": ";aalto.fi;manchester.ac.uk;", "position": ";PhD student;Associate Professor;", "bibtex": "@inproceedings{\nmercatali2024diffusion,\ntitle={Diffusion Twigs with Loop Guidance for Conditional Graph Generation},\nauthor={Giangiacomo Mercatali and Yogesh Verma and Andre Freitas and Vikas Garg},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=fvOCJAAYLx}\n}", "github": "", "reviewers": "Gpe5;WGvA;GMeF;8xx4", "pdf_size": 3724602, "rating": "5;5;5;6", "confidence": "3;3;3;3", "soundness": "3;2;3;3", "novelty": "2;2;3;3", "presentation": "3;3;3;3", "wc_summary": "56;77;69;41", "wc_strengths": "19;27;81;55", "wc_weaknesses": "60;242;80;86", "wc_questions": "74;40;17;215", "wc_limitations": "25;16;2;31", "wc_review": "234;402;249;428", "wc_reply_reviewers": "144;64;13;26", "wc_reply_authors": "848;119;58;58", "reply_reviewers": "3;2;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 60.75, 13.645054048995188 ], "wc_strengths_avg": [ 45.5, 24.469368606484313 ], "wc_weaknesses_avg": [ 117.0, 72.80796659706958 ], "wc_questions_avg": [ 86.5, 76.91066245976562 ], "wc_limitations_avg": [ 18.5, 10.920164833920778 ], "wc_review_avg": [ 328.25, 87.39672476700714 ], "wc_reply_reviewers_avg": [ 61.75, 51.050832510351874 ], "wc_reply_authors_avg": [ 270.75, 334.2045593644707 ], "reply_reviewers_avg": [ 1.75, 0.82915619758885 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9976239036771124464&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": ";aalto.fi;manchester.ac.uk;", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Aalto University;University of Manchester", "aff_unique_dep": ";", "aff_unique_url": "https://www.aalto.fi;https://www.manchester.ac.uk", "aff_unique_abbr": "Aalto;UoM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Finland;United Kingdom" }, { "title": "HC-GAE: The Hierarchical Cluster-based Graph Auto-Encoder for Graph Representation Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94176", "id": "fx6aSBMu6z", "proceeding": "", "pdf": "https://openreview.net/pdf?id=fx6aSBMu6z", "openreview": "https://openreview.net/forum?id=fx6aSBMu6z", "poster": "/media/PosterPDFs/NeurIPS%202024/94176.png?t=1731341412.612012", "project": "", "author_site": "Lu Bai, Zhuo Xu, Lixin Cui, Ming Li, Yue Wang, Edwin Hancock", "tldr": "", "abstract": "Graph Auto-Encoders (GAEs) are powerful tools for graph representation learning. In this paper, we develop a novel Hierarchical Cluster-based GAE (HC-GAE), that can learn effective structural characteristics for graph data analysis. To this end, during the encoding process, we commence by utilizing the hard node assignment to decompose a sample graph into a family of separated subgraphs. We compress each subgraph into a coarsened node, transforming the original graph into a coarsened graph. On the other hand, during the decoding process, we adopt the soft node assignment to reconstruct the original graph structure by expanding the coarsened nodes. By hierarchically performing the above compressing procedure during the decoding process as well as the expanding procedure during the decoding process, the proposed HC-GAE can effectively extract bidirectionally hierarchical structural features of the original sample graph. Furthermore, we re-design the loss function that can integrate the information from either the encoder or the decoder. Since the associated graph convolution operation of the proposed HC-GAE is restricted in each individual separated subgraph and cannot propagate the node information between different subgraphs, the proposed HC-GAE can significantly reduce the over-smoothing problem arising in the classical convolution-based GAEs. The proposed HC-GAE can generate effective representations for either node classification or graph classification, and the experiments demonstrate the effectiveness on real-world datasets.", "keywords": "Graph Auto-encoder;Graph Neural Networks", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Lu Bai;Zhuo Xu;Lixin Cui;Ming Li;Yue Wang;Edwin Hancock", "authorids": "~Lu_Bai3;~Zhuo_Xu3;~Lixin_Cui1;~Ming_Li15;~Yue_Wang41;~Edwin_Hancock1", "gender": "M;;F;M;;M", "homepage": ";;https://ie.cufe.edu.cn/info/1388/6214.htm;https://mingli-ai.github.io;;https://pure.york.ac.uk/portal/en/persons/edwin-r-hancock", "dblp": "26/1137-1;;https://dblp.uni-trier.de/pid/72/5436.html;181/2821-65;;h/EdwinRHancock", "google_scholar": ";;;Z7yEoOQAAAAJ;;EjDU2ncAAAAJ", "orcid": ";;;0000-0002-1218-2804;;0000-0003-4496-2028", "linkedin": ";;;;;edwin-hancock-11913117/?originalSubdomain=uk", "or_profile": "~Lu_Bai3;~Zhuo_Xu3;~Lixin_Cui1;~Ming_Li15;~Yue_Wang41;~Edwin_Hancock1", "aff": ";;Central University of Finance and Economics;Zhejiang Normal University;;Anhui University", "aff_domain": ";;cufe.edu.cn;zjnu.edu.cn;;ahu.edu.cn", "position": ";;Associate Professor;Full Professor;;Honorary Professor", "bibtex": "@inproceedings{\nbai2024hcgae,\ntitle={{HC}-{GAE}: The Hierarchical Cluster-based Graph Auto-Encoder for Graph Representation Learning},\nauthor={Lu Bai and Zhuo Xu and Lixin Cui and Ming Li and Yue Wang and Edwin Hancock},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=fx6aSBMu6z}\n}", "github": "", "reviewers": "M4BE;Udb8;DgD7;94wD", "pdf_size": 577422, "rating": "3;4;7;8", "confidence": "4;4;4;4", "soundness": "1;2;3;3", "novelty": "1;2;3;4", "presentation": "3;3;3;3", "wc_summary": "27;65;129;102", "wc_strengths": "26;27;57;58", "wc_weaknesses": "320;175;183;101", "wc_questions": "4;25;7;5", "wc_limitations": "5;1;1;1", "wc_review": "382;293;377;267", "wc_reply_reviewers": "328;146;17;22", "wc_reply_authors": "495;372;50;50", "reply_reviewers": "2;1;1;1", "reply_authors": "3;4;2;2", "rating_avg": [ 5.5, 2.0615528128088303 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "novelty_avg": [ 2.5, 1.118033988749895 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 80.75, 38.46020670771284 ], "wc_strengths_avg": [ 42.0, 15.508062419270823 ], "wc_weaknesses_avg": [ 194.75, 79.06445155694182 ], "wc_questions_avg": [ 10.25, 8.584142356694699 ], "wc_limitations_avg": [ 2.0, 1.7320508075688772 ], "wc_review_avg": [ 329.75, 50.622993787408504 ], "wc_reply_reviewers_avg": [ 128.25, 126.37320720785716 ], "wc_reply_authors_avg": [ 241.75, 196.61939756799174 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6778194528713800644&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": ";;cufe.edu.cn;zjnu.edu.cn;;ahu.edu.cn", "author_num": 6, "aff_unique_index": "0;1;2", "aff_unique_norm": "Central University of Finance and Economics;Zhejiang Normal University;Anhui University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.cufe.edu.cn;http://www.zjnu.edu.cn;http://www.ahu.edu.cn/", "aff_unique_abbr": "CUFE;ZJNU;AHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Rethinking Imbalance in Image Super-Resolution for Efficient Inference", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94175", "id": "fyYrZbWtNz", "proceeding": "", "pdf": "https://openreview.net/pdf?id=fyYrZbWtNz", "openreview": "https://openreview.net/forum?id=fyYrZbWtNz", "poster": "/media/PosterPDFs/NeurIPS%202024/94175.png?t=1731326374.1099484", "project": "", "author_site": "Wei Yu, Bowen Yang, Liu Qinglin, Jianing Li, Shengping Zhang, Xiangyang Ji", "tldr": "", "abstract": "Existing super-resolution (SR) methods optimize all model weights equally using $\\mathcal{L}_1$ or $\\mathcal{L}_2$ losses by uniformly sampling image patches without considering dataset imbalances or parameter redundancy, which limits their performance. To address this, we formulate the image SR task as an imbalanced distribution transfer learning problem from a statistical probability perspective, proposing a plug-and-play Weight-Balancing framework (WBSR) to achieve balanced model learning without changing the original model structure and training data. Specifically, we develop a Hierarchical Equalization Sampling (HES) strategy to address data distribution imbalances, enabling better feature representation from texture-rich samples. To tackle model optimization imbalances, we propose a Balanced Diversity Loss (BDLoss) function, focusing on learning texture regions while disregarding redundant computations in smooth regions. After joint training of HES and BDLoss to rectify these imbalances, we present a gradient projection dynamic inference strategy to facilitate accurate and efficient inference. Extensive experiments across various models, datasets, and scale factors demonstrate that our method achieves comparable or superior performance to existing approaches with about 34\\% reduction in computational cost.", "keywords": "Efficient Image Super-Resolution; Dynamic Network; Imbalanced Data Learning", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Wei Yu;Bowen Yang;Qinglin Liu;Jianing Li;Shengping Zhang;Xiangyang Ji", "authorids": "~Wei_Yu14;~Bowen_Yang8;~Qinglin_Liu1;~Jianing_Li4;~Shengping_Zhang1;~Xiangyang_Ji1", "gender": "M;M;M;M;M;", "homepage": ";https://www.hitwh.edu.cn/;;https://jianing-li.github.io/;http://homepage.hit.edu.cn/zhangshengping;", "dblp": ";;227/7900;;60/1866;", "google_scholar": "9-EijjUAAAAJ;https://scholar.google.com.hk/citations?hl=zh-CN;hsu1cSIAAAAJ;https://scholar.google.com.hk/citations?user=xrYnfwcAAAAJ;hMNsT8sAAAAJ;", "orcid": "0000-0002-4805-3115;;0000-0002-2408-3344;0000-0002-7468-0622;;", "linkedin": ";;%E9%9D%92%E6%9E%97-%E6%9F%B3-a7354377/;;;", "or_profile": "~Wei_Yu14;~Bowen_Yang8;~Qinglin_Liu1;~Jianing_Li4;~Shengping_Zhang1;~Xiangyang_Ji1", "aff": "Harbin Institute of Technology;Harbin Institute of Technology;Harbin Institute of Technology;Peking University;Harbin Institute of Technology;", "aff_domain": "hit.edu.cn;hit.edu.cn;hit.edu.cn;pku.edu.cn;hit.edu.cn;", "position": "PhD student;Undergrad student;Lecturer;Researcher;Full Professor;", "bibtex": "@inproceedings{\nyu2024rethinking,\ntitle={Rethinking Imbalance in Image Super-Resolution for Efficient Inference},\nauthor={Wei Yu and Bowen Yang and Qinglin Liu and Jianing Li and Shengping Zhang and Xiangyang Ji},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=fyYrZbWtNz}\n}", "github": "", "reviewers": "SYab;FS1Z;Cw8X;D78d", "pdf_size": 7116351, "rating": "3;5;7;7", "confidence": "5;4;4;5", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;2;3;3", "wc_summary": "49;37;25;73", "wc_strengths": "35;33;105;71", "wc_weaknesses": "142;158;107;47", "wc_questions": "89;5;50;143", "wc_limitations": "4;5;57;93", "wc_review": "319;238;344;427", "wc_reply_reviewers": "0;0;122;0", "wc_reply_authors": "44;44;570;0", "reply_reviewers": "0;0;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 5.5, 1.6583123951777 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 46.0, 17.74823934929885 ], "wc_strengths_avg": [ 61.0, 29.563490998188964 ], "wc_weaknesses_avg": [ 113.5, 42.59401366389413 ], "wc_questions_avg": [ 71.75, 50.751231512151506 ], "wc_limitations_avg": [ 39.75, 37.47916087641237 ], "wc_review_avg": [ 332.0, 67.4054893907017 ], "wc_reply_reviewers_avg": [ 30.5, 52.827549630850754 ], "wc_reply_authors_avg": [ 164.5, 234.8036413686977 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ReGbp9j7facJ:scholar.google.com/&scioq=Rethinking+Imbalance+in+Image+Super-Resolution+for+Efficient+Inference&hl=en&as_sdt=0,7", "gs_version_total": 2, "email": "hit.edu.cn;hit.edu.cn;hit.edu.cn;pku.edu.cn;hit.edu.cn;", "author_num": 6, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Harbin Institute of Technology;Peking University", "aff_unique_dep": ";", "aff_unique_url": "http://www.hit.edu.cn/;http://www.pku.edu.cn", "aff_unique_abbr": "HIT;Peking U", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Harbin;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "ReFT: Representation Finetuning for Language Models", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94174", "id": "fykjplMc0V", "proceeding": "", "pdf": "https://openreview.net/pdf?id=fykjplMc0V", "openreview": "https://openreview.net/forum?id=fykjplMc0V", "poster": "", "project": "", "author_site": "Zhengxuan Wu, Aryaman Arora, Zheng Wang, Atticus Geiger, Dan Jurafsky, Christopher D Manning, Christopher Potts", "tldr": "", "abstract": "Parameter-efficient finetuning (PEFT) methods seek to adapt large neural models via updates to a small number of *weights*. However, much prior interpretability work has shown that *representations* encode rich semantic information, suggesting that editing representations might be a more powerful alternative. We pursue this hypothesis by developing a family of **Representation Finetuning (ReFT)** methods. ReFT methods operate on a frozen base model and learn task-specific interventions on hidden representations. We define a strong instance of the ReFT family, Low-rank Linear Subspace ReFT (LoReFT), and we identify an ablation of this method that trades some performance for increased efficiency. Both are drop-in replacements for existing PEFTs and learn interventions that are 15x--65x more parameter-efficient than LoRA. We showcase LoReFT on eight commonsense reasoning tasks, four arithmetic reasoning tasks, instruction-tuning, and GLUE. In all these evaluations, our ReFTs deliver the best balance of efficiency and performance, and almost always outperform state-of-the-art PEFTs. Upon publication, we will publicly release our generic ReFT training library.", "keywords": "Representation finetuning;Interpretability;Parameter-efficient finetuning;Activation intervention", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/13c898c42fb43981f4bb038f167d361b4eecbba1.zip", "author": "Zhengxuan Wu;Aryaman Arora;Zheng Wang;Atticus Geiger;Dan Jurafsky;Christopher D Manning;Christopher Potts", "authorids": "~Zhengxuan_Wu1;~Aryaman_Arora1;~Zheng_Wang41;~Atticus_Geiger1;~Dan_Jurafsky1;~Christopher_D_Manning1;~Christopher_Potts1", "gender": "M;M;;M;M;M;M", "homepage": "https://cs.stanford.edu/~wuzhengx/;https://aryaman.io/;;https://atticusg.github.io/;http://web.stanford.edu/~jurafsky/;https://nlp.stanford.edu/~manning/;http://web.stanford.edu/~cgpotts/", "dblp": "234/4650;263/6933;;229/4086;31/985;m/ChristopherDManning;13/2617", "google_scholar": "CBvE6lwAAAAJ;0-4GKw8AAAAJ;;;uZg9l58AAAAJ;1zmDOdwAAAAJ;3j08YoAAAAAJ", "orcid": ";0000-0002-4977-8206;;;;0000-0001-6155-649X;0000-0002-7978-6055", "linkedin": ";aryaman-arora2020/;;;;christopher-manning-011575/;", "or_profile": "~Zhengxuan_Wu1;~Aryaman_Arora1;~Zheng_Wang41;~Atticus_Geiger1;~Dan_Jurafsky1;~Christopher_D_Manning1;~Christopher_Potts1", "aff": "Stanford University;Stanford University;; Pr(Ai)\u00b2R Group;Stanford University;Computer Science Department, Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;;prair.group;stanford.edu;cs.stanford.edu;stanford.edu", "position": "PhD student;PhD student;;Principal Researcher;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nwu2024reft,\ntitle={Re{FT}: Representation Finetuning for Language Models},\nauthor={Zhengxuan Wu and Aryaman Arora and Zheng Wang and Atticus Geiger and Dan Jurafsky and Christopher D Manning and Christopher Potts},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=fykjplMc0V}\n}", "github": "", "reviewers": "d959;Pcei;333n;ddW1", "pdf_size": 2138554, "rating": "6;6;7;9", "confidence": "4;4;3;4", "soundness": "4;3;3;4", "novelty": "3;3;4;4", "presentation": "4;4;3;4", "wc_summary": "34;57;43;81", "wc_strengths": "56;192;70;172", "wc_weaknesses": "171;171;379;76", "wc_questions": "37;80;55;52", "wc_limitations": "6;1;34;9", "wc_review": "304;501;581;390", "wc_reply_reviewers": "516;42;34;41", "wc_reply_authors": "362;0;0;0", "reply_reviewers": "2;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 7.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 53.75, 17.73943347460679 ], "wc_strengths_avg": [ 122.5, 60.12279101971232 ], "wc_weaknesses_avg": [ 199.25, 110.78893220895307 ], "wc_questions_avg": [ 56.0, 15.443445211480501 ], "wc_limitations_avg": [ 12.5, 12.737739202856996 ], "wc_review_avg": [ 444.0, 105.51540171936986 ], "wc_reply_reviewers_avg": [ 158.25, 206.570054702999 ], "wc_reply_authors_avg": [ 90.5, 156.7505980849834 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 70, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1515800201922300185&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "stanford.edu;stanford.edu;;prair.group;stanford.edu;cs.stanford.edu;stanford.edu", "author_num": 7, "aff_unique_index": "0;0;1;0;0;0", "aff_unique_norm": "Stanford University;Pr(Ai)\u00b2R Group", "aff_unique_dep": ";", "aff_unique_url": "https://www.stanford.edu;", "aff_unique_abbr": "Stanford;", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States;" }, { "title": "SLIM: Style-Linguistics Mismatch Model for Generalized Audio Deepfake Detection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94173", "id": "fymr0CBDHZ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=fymr0CBDHZ", "openreview": "https://openreview.net/forum?id=fymr0CBDHZ", "poster": "/media/PosterPDFs/NeurIPS%202024/94173.png?t=1731310515.0225308", "project": "", "author_site": "Yi Zhu, Surya Koppisetti, Trang Tran, Gaurav Bharaj", "tldr": "", "abstract": "Audio deepfake detection (ADD) is crucial to combat the misuse of speech synthesized by generative AI models. Existing ADD models suffer from generalization issues to unseen attacks, with a large performance discrepancy between in-domain and out-of-domain data. Moreover, the black-box nature of existing models limits their use in real-world scenarios, where explanations are required for model decisions. To alleviate these issues, we introduce a new ADD model that explicitly uses the Style-LInguistics Mismatch (SLIM) in fake speech to separate them from real speech. SLIM first employs self-supervised pretraining on only real samples to learn the style-linguistics dependency in the real class. The learned features are then used in complement with standard pretrained acoustic features (e.g., Wav2vec) to learn a classifier on the real and fake classes. When the feature encoders are frozen, SLIM outperforms benchmark methods on out-of-domain datasets while achieving competitive results on in-domain data. The features learned by SLIM allow us to quantify the (mis)match between style and linguistic content in a sample, hence facilitating an explanation of the model decision.", "keywords": "Deepfake detection;anti-spoofing;synthesized speech;generalization;explainability;self-supervised learning", "primary_area": "speech_and_audio", "supplementary_material": "/attachment/a5660a1345572ba95403fb2853840c6ea9c0c2da.zip", "author": "Yi Zhu;Surya Koppisetti;Trang Tran;Gaurav Bharaj", "authorids": "~Yi_Zhu7;~Surya_Koppisetti1;~Trang_Tran1;~Gaurav_Bharaj1", "gender": "M;M;F;", "homepage": "https://zhu00121.github.io/;https://scholar.google.com/citations?user=gex4pLEAAAAJ&hl=en;https://ttmt001.github.io/;", "dblp": ";;135/3569-1.html;", "google_scholar": ";;pJXKrrkAAAAJ;", "orcid": ";;0009-0009-1116-4442;", "linkedin": "zhu-yi-00121/;;trang-tran-540a8035/;", "or_profile": "~Yi_Zhu7;~Surya_Koppisetti1;~Trang_Tran1;~Gaurav_Bharaj1", "aff": "nstitut national de la recherche scientifique;Reality Defender Inc;Reality Defender;", "aff_domain": "inrs.ca;realitydefender.ai;realitydefender.ai;", "position": "PhD student;Researcher;Researcher;", "bibtex": "@inproceedings{\nzhu2024slim,\ntitle={{SLIM}: Style-Linguistics Mismatch Model for Generalized Audio Deepfake Detection},\nauthor={Yi Zhu and Surya Koppisetti and Trang Tran and Gaurav Bharaj},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=fymr0CBDHZ}\n}", "github": "", "reviewers": "1UM9;DiH3;chQg;4aqZ", "pdf_size": 4104277, "rating": "6;6;7;7", "confidence": "5;3;3;3", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;4;3;3", "wc_summary": "194;38;90;92", "wc_strengths": "54;50;66;116", "wc_weaknesses": "52;103;52;491", "wc_questions": "85;4;261;489", "wc_limitations": "1;22;6;48", "wc_review": "386;217;475;1236", "wc_reply_reviewers": "11;0;0;56", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 103.5, 56.55749287229766 ], "wc_strengths_avg": [ 71.5, 26.358110706194402 ], "wc_weaknesses_avg": [ 174.5, 183.9137025890132 ], "wc_questions_avg": [ 209.75, 186.07978799429023 ], "wc_limitations_avg": [ 19.25, 18.32177666057525 ], "wc_review_avg": [ 578.5, 390.7547184615945 ], "wc_reply_reviewers_avg": [ 16.75, 23.101677428273472 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=69467650872774100&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "inrs.ca;realitydefender.ai;realitydefender.ai;", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "Institut National de la Recherche Scientifique;Reality Defender Inc;Reality Defender", "aff_unique_dep": ";;", "aff_unique_url": "https://www.inrs.ca;;", "aff_unique_abbr": "INRS;;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Canada;United States;" }, { "id": "fzdFPqkAHD", "title": "Agent-to-Sim: Learning Interactive Behavior from Casual Videos", "track": "main", "status": "Reject", "tldr": "", "abstract": "Agent behavior simulation empowers robotics, gaming, movies, and VR applications, but building such simulators often requires laborious effort of manually crafting the agent's decision process and motion patterns. Recent advance in visual tracking and motion capture enables learning of agent behavior from real-world data, but these methods are limited to a few scenarios due to the dependence on specialized sensors (e.g., synchronized multi-camera systems). Towards better scalability, we present a framework, Agent-to-Sim, that learns simulatable 3D agents in a 3D environment from monocular videos. To deal with partial views, our framework fuses observations in a canonical space for both the agent and the scene, resulting in a dense 4D spatiotemporal reconstruction. We then learn an interactive behavior generator by querying paired data of agents' perception and actions from the 4D reconstruction. Agent-to-Sim enables real-to-sim transfer of agents in their familiar environments given longitudinal video recordings captured with a smartphone over a month. We show results on pets (e.g., cat, dog, bunny) and a person, and analyse how the observer's motion and 3D scene affect an agent's behavior.", "keywords": "Embodied Agents;Animal Reconstruction and Tracking;Motion Generation;4D Reconstruction;Scene Reconstruction", "primary_area": "machine_vision", "supplementary_material": "/attachment/9e7b28f76983e6f33d46b681f0027d723cee04f8.zip", "author": "Gengshan Yang;Andrea Bajcsy;Angjoo Kanazawa;Shunsuke Saito", "authorids": "~Gengshan_Yang1;~Andrea_Bajcsy1;~Angjoo_Kanazawa1;~Shunsuke_Saito1", "gender": ";F;M;F", "homepage": "http://gengshan-y.github.io/;https://people.eecs.berkeley.edu/~kanazawa/;http://www-scf.usc.edu/~saitos/;https://www.cs.cmu.edu/~abajcsy/", "dblp": "180/7347;119/1305;21/5061;208/0997", "google_scholar": "yRaFnrcAAAAJ;Ci-_QYIAAAAJ;IolN_okAAAAJ;LUe32ToAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Gengshan_Yang1;~Angjoo_Kanazawa1;~Shunsuke_Saito1;~Andrea_Victoria_Bajcsy1", "aff": "World Labs;University of California, Berkeley;Codec Avatars Lab;Carnegie Mellon University", "aff_domain": "worldlabs.ai;berkeley.edu;meta.com;cmu.edu", "position": "Researcher;Assistant Professor;Researcher;Assistant Professor", "bibtex": "@misc{\nanonymous2024agenttosim,\ntitle={Agent-to-Sim: Learning Interactive Behavior from Casual Videos},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=fzdFPqkAHD}\n}", "github": "", "project": "", "reviewers": "m6Ge;C9Gr;Kj9h;xDUi", "site": "https://openreview.net/forum?id=fzdFPqkAHD", "pdf_size": 46387704, "rating": "4;6;6;7", "confidence": "4;2;1;3", "soundness": "2;3;3;3", "novelty": "3;3;3;3", "presentation": "2;2;3;4", "wc_summary": "65;71;97;251", "wc_strengths": "166;66;93;83", "wc_weaknesses": "348;172;105;75", "wc_questions": "52;8;111;199", "wc_limitations": "9;11;13;11", "wc_review": "640;328;419;619", "wc_reply_reviewers": "145;502;64;25", "wc_reply_authors": "0;661;0;0", "reply_reviewers": "1;3;1;1", "reply_authors": "1;3;1;1", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 2.5, 1.118033988749895 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 121.0, 76.01315675591957 ], "wc_strengths_avg": [ 102.0, 38.19031290785662 ], "wc_weaknesses_avg": [ 175.0, 105.87492621012777 ], "wc_questions_avg": [ 92.5, 71.52796655854269 ], "wc_limitations_avg": [ 11.0, 1.4142135623730951 ], "wc_review_avg": [ 501.5, 132.1902038730556 ], "wc_reply_reviewers_avg": [ 184.0, 188.6305913684204 ], "wc_reply_authors_avg": [ 165.25, 286.22139595075697 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5129891760425771, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:YhaHyFWg4kIJ:scholar.google.com/&scioq=Agent-to-Sim:+Learning+Interactive+Behavior+from+Casual+Videos&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "World Labs;University of California, Berkeley;Codec Avatars Lab;Carnegie Mellon University", "aff_unique_dep": ";;;", "aff_unique_url": ";https://www.berkeley.edu;;https://www.cmu.edu", "aff_unique_abbr": ";UC Berkeley;;CMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "1;1", "aff_country_unique": ";United States" }, { "title": "GraphTrail: Translating GNN Predictions into Human-Interpretable Logical Rules", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94172", "id": "fzlMza6dRZ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=fzlMza6dRZ", "openreview": "https://openreview.net/forum?id=fzlMza6dRZ", "poster": "/media/PosterPDFs/NeurIPS%202024/94172.png?t=1733902484.0645955", "project": "", "author_site": "Burouj Armgaan, Manthan Dalmia, Sourav Medya, Sayan Ranu", "tldr": "", "abstract": "Instance-level explanation of graph neural networks (GNNs) is a well-studied area. These explainers, however, only explain an instance (e.g., a graph) and fail to uncover the combinatorial reasoning learned by a GNN from the training data towards making its predictions. In this work, we introduce GraphTrail, the first end-to-end, global, post-hoc GNN explainer that translates the functioning of a black-box GNN model to a boolean formula over the (sub)graph level concepts without relying on local explainers. GraphTrail is unique in automatically mining the discriminative subgraph-level concepts using Shapley values. Subsequently, the GNN predictions are mapped to a human-interpretable boolean formula over these concepts through symbolic regression. Extensive experiments across diverse datasets and GNN architectures demonstrate significant improvement over existing global explainers in mapping GNN predictions to faithful logical formulae. The robust and accurate performance of GraphTrail makes it invaluable for improving GNNs and facilitates adoption in domains with strict transparency requirements.", "keywords": "Graph Neural Network;Explainability;Global Factual Explanation;Symbolic Regression;Computation Trees", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Burouj Armgaan;Manthan Dalmia;Sourav Medya;Sayan Ranu", "authorids": "~Burouj_Armgaan1;~Manthan_Dalmia1;~Sourav_Medya1;~Sayan_Ranu2", "gender": "M;M;M;M", "homepage": "https://armagaan.github.io/;;https://souravmedya.github.io/;https://www.cse.iitd.ac.in/~sayan/index.html", "dblp": "349/0229;;178/3021;38/768", "google_scholar": "GaOJrMYAAAAJ;;RCFhOM4AAAAJ;K4w5qYUAAAAJ", "orcid": "0009-0007-2423-1523;;0000-0003-0996-2807;0000-0003-4147-9372", "linkedin": "burouj-armgaan/;manthan-dalmia-b6a6b520b;sourav-medya-35987a49/;", "or_profile": "~Burouj_Armgaan1;~Manthan_Dalmia1;~Sourav_Medya1;~Sayan_Ranu2", "aff": "Indian Institute of Technology Delhi;Indian Institute of Technology, Delhi;University of Illinois at Chicago;Indian Institute of Technology Delhi", "aff_domain": "iitd.ac.in;iitd.ac.in;uic.edu;iitd.ac.in", "position": "PhD student;Undergrad student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\narmgaan2024graphtrail,\ntitle={GraphTrail: Translating {GNN} Predictions into Human-Interpretable Logical Rules},\nauthor={Burouj Armgaan and Manthan Dalmia and Sourav Medya and Sayan Ranu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=fzlMza6dRZ}\n}", "github": "", "reviewers": "AvDp;Sg1i;x6B5;8Xmp", "pdf_size": 8457956, "rating": "4;6;6;7", "confidence": "3;3;4;4", "soundness": "3;3;3;3", "novelty": "2;3;2;3", "presentation": "3;2;3;3", "wc_summary": "141;120;57;48", "wc_strengths": "68;477;35;36", "wc_weaknesses": "89;206;423;36", "wc_questions": "34;95;163;163", "wc_limitations": "1;51;7;10", "wc_review": "333;949;685;293", "wc_reply_reviewers": "55;10;370;34", "wc_reply_authors": "167;48;869;0", "reply_reviewers": "1;1;2;1", "reply_authors": "2;2;5;1", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 91.5, 39.82775414205526 ], "wc_strengths_avg": [ 154.0, 186.95587714752378 ], "wc_weaknesses_avg": [ 188.5, 148.7052453681443 ], "wc_questions_avg": [ 113.75, 53.76511415406834 ], "wc_limitations_avg": [ 17.25, 19.753164303473 ], "wc_review_avg": [ 565.0, 269.1022110648666 ], "wc_reply_reviewers_avg": [ 117.25, 146.79130594146235 ], "wc_reply_authors_avg": [ 271.0, 350.56739722912056 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 1.5 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.6882472016116854, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16669716107211479680&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "iitd.ac.in;iitd.ac.in;uic.edu;iitd.ac.in", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Indian Institute of Technology Delhi;University of Illinois at Chicago", "aff_unique_dep": ";", "aff_unique_url": "https://www.iitd.ac.in;https://www.uic.edu", "aff_unique_abbr": "IIT Delhi;UIC", "aff_campus_unique_index": "0;0;1;0", "aff_campus_unique": "Delhi;Chicago", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "India;United States" }, { "title": "GFlowNet Assisted Biological Sequence Editing", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94171", "id": "g0G8DQSBcj", "proceeding": "", "pdf": "https://openreview.net/pdf?id=g0G8DQSBcj", "openreview": "https://openreview.net/forum?id=g0G8DQSBcj", "poster": "", "project": "", "author_site": "Pouya M. Ghari, Alex Tseng, Gokcen Eraslan, Romain Lopez, Tommaso Biancalani, Gabriele Scalia, Ehsan Hajiramezanali", "tldr": "", "abstract": "Editing biological sequences has extensive applications in synthetic biology and medicine, such as designing regulatory elements for nucleic-acid therapeutics and treating genetic disorders. The primary objective in biological-sequence editing is to determine the optimal modifications to a sequence which augment certain biological properties while adhering to a minimal number of alterations to ensure predictability and potentially support safety. In this paper, we propose GFNSeqEditor, a novel biological-sequence editing algorithm which builds on the recently proposed area of generative flow networks (GFlowNets). Our proposed GFNSeqEditor identifies elements within a starting seed sequence that may compromise a desired biological property. Then, using a learned stochastic policy, the algorithm makes edits at these identified locations, offering diverse modifications for each sequence to enhance the desired property. The number of edits can be regulated through specific hyperparameters. We conducted extensive experiments on a range of real-world datasets and biological applications, and our results underscore the superior performance of our proposed algorithm compared to existing state-of-the-art sequence editing methods.", "keywords": "GFlowNet;Sequence Editing", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Pouya M. Ghari;Alex M Tseng;G\u00f6kcen Eraslan;Romain Lopez;Tommaso Biancalani;Gabriele Scalia;Ehsan Hajiramezanali", "authorids": "~Pouya_M._Ghari1;~Alex_M_Tseng1;~G\u00f6kcen_Eraslan1;~Romain_Lopez1;~Tommaso_Biancalani1;~Gabriele_Scalia1;~Ehsan_Hajiramezanali1", "gender": "M;;M;;M;Non-Binary;", "homepage": ";https://romain-lopez.github.io/;;;http://ehsanhajiramezanali.github.io/;https://alextseng.net/;", "dblp": ";132/4587;;201/9258;225/3486;;280/1628", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.fr/citations?user=Z8RR17oAAAAJ;https://scholar.google.it/citations?user=s_qd9x0AAAAJ;MxeFvewAAAAJ;20I_DMoAAAAJ;;", "orcid": ";0000-0003-0495-738X;;0000-0003-3305-9220;;0000-0002-5191-1926;", "linkedin": ";;;gabriele-scalia;ehsan-hajiramezanali-978a3b52/;;", "or_profile": "~G\u00f6kcen_Eraslan1;~Romain_Lopez1;~Tommaso_Biancalani1;~Gabriele_Scalia1;~Ehsan_Hajiramezanali1;~Alex_Tseng1;~Pouya_M_Gari1", "aff": "Genentech;Stanford University;Genentech;Genentech;Genentech;Genentech;University of California, Irvine", "aff_domain": "gene.com;stanford.edu;gene.com;gene.com;gene.come;gene.com;uci.edu", "position": "Principal Researcher;Postdoc;Director;Researcher;Principal Researcher;Researcher;PhD student", "bibtex": "@inproceedings{\nghari2024gflownet,\ntitle={{GF}lowNet Assisted Biological Sequence Editing},\nauthor={Pouya M. Ghari and Alex M Tseng and G{\\\"o}kcen Eraslan and Romain Lopez and Tommaso Biancalani and Gabriele Scalia and Ehsan Hajiramezanali},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=g0G8DQSBcj}\n}", "github": "", "reviewers": "Ns1N;6Uc5;a1PN", "pdf_size": 845693, "rating": "6;6;7", "confidence": "4;4;3", "soundness": "3;3;4", "novelty": "3;2;3", "presentation": "3;2;3", "wc_summary": "35;52;87", "wc_strengths": "46;42;85", "wc_weaknesses": "36;264;204", "wc_questions": "190;42;94", "wc_limitations": "1;7;1", "wc_review": "308;407;471", "wc_reply_reviewers": "21;12;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;0", "reply_authors": "1;1;1", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 58.0, 21.64871050817269 ], "wc_strengths_avg": [ 57.666666666666664, 19.39644870130154 ], "wc_weaknesses_avg": [ 168.0, 96.49870465451855 ], "wc_questions_avg": [ 108.66666666666667, 61.30434097372079 ], "wc_limitations_avg": [ 3.0, 2.8284271247461903 ], "wc_review_avg": [ 395.3333333333333, 67.05387551978052 ], "wc_reply_reviewers_avg": [ 11.0, 8.602325267042627 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.9999999999999997, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16127868925693529536&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 0, "email": "gene.com;stanford.edu;gene.com;gene.com;gene.come;gene.com;uci.edu", "author_num": 7, "aff_unique_index": "0;1;0;0;0;0;2", "aff_unique_norm": "Genentech;Stanford University;University of California, Irvine", "aff_unique_dep": ";;", "aff_unique_url": "https://www.genentech.com;https://www.stanford.edu;https://www.uci.edu", "aff_unique_abbr": "Genentech;Stanford;UCI", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Stanford;Irvine", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Speculative Monte-Carlo Tree Search", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94170", "id": "g1HxCIc0wi", "proceeding": "", "pdf": "https://openreview.net/pdf?id=g1HxCIc0wi", "openreview": "https://openreview.net/forum?id=g1HxCIc0wi", "poster": "/media/PosterPDFs/NeurIPS%202024/94170.png?t=1731286157.5964158", "project": "", "author_site": "Scott Cheng, Mahmut T Kandemir, Ding-Yong Hong", "tldr": "", "abstract": "Monte-Carlo tree search (MCTS) is an influential sequential decision-making algorithm notably employed in AlphaZero. Despite its success, the primary challenge in AlphaZero training lies in its prolonged time-to-solution due to the high latency imposed by the sequential MCTS process. To address this challenge, this paper proposes and evaluates an inter-decision parallelization strategy called speculative MCTS, a new type of parallelism in AlphaZero which implements speculative execution. This approach allows for the parallel execution of future moves before the current MCTS computations are completed, thus reducing the latency. Additionally, we analyze factors contributing to the overall speedup by studying the synergistic effects of speculation and neural network caching in MCTS. We also provide an analytical model that can be used to evaluate the potential of different speculation strategies before they are implemented and deployed. Our empirical findings indicate that the proposed speculative MCTS can reduce training latency by 5.81$\\times$ in 9x9 Go games. Moreover, our study shows that speculative execution can enhance the NN cache hit rate by 26\\% during midgame. Overall, our end-to-end evaluation indicates 1.91$\\times$ speedup in 19x19 Go training time, compared to the state-of-the-art KataGo program.", "keywords": "Monte-Carlo Tree Search;MCTS", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/bb2be6d39692d6031100b74f27d98ab7c13f9df2.zip", "author": "Scott Cheng;Mahmut Kandemir;Ding-Yong Hong", "authorids": "~Scott_Cheng1;~Mahmut_Kandemir1;~Ding-Yong_Hong1", "gender": "Not Specified;M;M", "homepage": ";http://www.cse.psu.edu/hpcl/kandemir/;https://www.iis.sinica.edu.tw/pages/dyhong/", "dblp": "226/4169.html;k/MahmutTKandemir.html;43/4596.html", "google_scholar": ";j67v24EAAAAJ;fMPVShUAAAAJ", "orcid": "0000-0001-9954-7986;;0000-0002-7649-7581", "linkedin": ";;", "or_profile": "~Scott_Cheng1;~Mahmut_Kandemir1;~Ding-Yong_Hong1", "aff": "Pennsylvania State University;Pennsylvania State University;", "aff_domain": "psu.edu;psu.edu;", "position": "PhD student;Full Professor;", "bibtex": "@inproceedings{\ncheng2024speculative,\ntitle={Speculative Monte-Carlo Tree Search},\nauthor={Scott Cheng and Mahmut Kandemir and Ding-Yong Hong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=g1HxCIc0wi}\n}", "github": "", "reviewers": "j3Ju;Ty7h;wL1P;BfQS", "pdf_size": 844378, "rating": "4;4;5;6", "confidence": "4;2;2;3", "soundness": "2;3;2;3", "novelty": "2;3;2;2", "presentation": "2;4;2;4", "wc_summary": "71;28;85;101", "wc_strengths": "21;24;36;57", "wc_weaknesses": "61;66;40;52", "wc_questions": "193;2;202;1", "wc_limitations": "12;1;25;53", "wc_review": "358;121;388;264", "wc_reply_reviewers": "31;42;17;87", "wc_reply_authors": "0;133;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 4.75, 0.82915619758885 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 1.0 ], "wc_summary_avg": [ 71.25, 27.13277538328875 ], "wc_strengths_avg": [ 34.5, 14.150971698084906 ], "wc_weaknesses_avg": [ 54.75, 9.883698700385398 ], "wc_questions_avg": [ 99.5, 98.0522819724253 ], "wc_limitations_avg": [ 22.75, 19.421315609401955 ], "wc_review_avg": [ 282.75, 103.98888161721906 ], "wc_reply_reviewers_avg": [ 44.25, 26.223796445213647 ], "wc_reply_authors_avg": [ 33.25, 57.59068935166517 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.0909090909090909, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=602860876797701327&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "psu.edu;psu.edu;", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Pennsylvania State University", "aff_unique_dep": "", "aff_unique_url": "https://www.psu.edu", "aff_unique_abbr": "PSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "UniBench: Visual Reasoning Requires Rethinking Vision-Language Beyond Scaling", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97581", "id": "g1Zn0XPUFF", "proceeding": "", "pdf": "https://openreview.net/pdf?id=g1Zn0XPUFF", "openreview": "https://openreview.net/forum?id=g1Zn0XPUFF", "poster": "/media/PosterPDFs/NeurIPS%202024/97581.png?t=1731446111.1223269", "project": "", "author_site": "Haider Al-Tahan, Quentin Garrido, Randall Balestriero, Diane Bouchacourt, Caner Hazirbas, Mark Ibrahim", "tldr": "", "abstract": "Significant research efforts have been made to scale and improve vision-language model (VLM) training approaches. \nYet, with an ever-growing number of benchmarks,\nresearchers are tasked with the heavy burden of implementing each protocol, bearing a non-trivial computational cost, and making sense of how all these benchmarks translate into meaningful axes of progress.\nTo facilitate a systematic evaluation of VLM progress, we introduce UniBench: a unified implementation of 50+ VLM benchmarks spanning a range of carefully categorized vision-centric capabilities from object recognition to spatial awareness, counting, and much more. We showcase the utility of UniBench for measuring progress by evaluating nearly 60 publicly available vision-language models, trained on scales of up to 12.8B samples. We find that while scaling training data or model size can boost many vision-language model capabilities, scaling offers little benefit for reasoning or relations. Surprisingly, we also discover today's best VLMs struggle on simple digit recognition and counting tasks, e.g. MNIST, which much simpler networks can solve. Where scale falls short, we find that more precise interventions, such as data quality or tailored-learning objectives offer more promise. For practitioners, we also offer guidance on selecting a suitable VLM for a given application. Finally, we release an easy-to-run UniBench code-base with the full set of 50+ benchmarks and comparisons across 59 models as well as a distilled, representative set of benchmarks that runs in 5 minutes on a single GPU. UniBench with model evaluations on all benchmarks are provided as a toolbox at: https://github.com/facebookresearch/unibench", "keywords": "vision-language model;visual reasoning;zero-shot;VLM scaling", "primary_area": "", "supplementary_material": "/attachment/cfce2128b17117b8f97bbf54f0f4c1f41418a6dc.pdf", "author": "Haider Al-Tahan;Quentin Garrido;Randall Balestriero;Diane Bouchacourt;Caner Hazirbas;Mark Ibrahim", "authorids": "~Haider_Al-Tahan2;~Quentin_Garrido1;~Randall_Balestriero1;~Diane_Bouchacourt3;~Caner_Hazirbas2;~Mark_Ibrahim1", "gender": ";M;M;M;;F", "homepage": "https://haideraltahan.com;https://garridoq.com;https://randallbalestriero.github.io/;https://hazirbas.com;https://markibrahim.me/;https://dianebouchacourt.github.io/", "dblp": "276/6441;285/6628;175/5364;161/7891;180/5660;176/1498", "google_scholar": "https://scholar.google.ca/citations?user=kfpoNrEAAAAJ;RQaZUNsAAAAJ;S1x_xqcAAAAJ;JEiXKpcAAAAJ;AqYyoCMAAAAJ;", "orcid": ";;;;;", "linkedin": ";;randallbalestriero/;;;", "or_profile": "~Haider_Al-Tahan2;~Quentin_Garrido1;~Randall_Balestriero1;~Caner_Hazirbas2;~Mark_Ibrahim1;~Diane_Nicole_Bouchacourt1", "aff": "Georgia Institute of Technology;Research, Facebook;Citadel;Meta AI;Facebook AI Research (FAIR) Meta;Meta AI Research", "aff_domain": "gatech.edu;research.facebook.com;citadel.com;meta.com;ai.facebook.com;meta.com", "position": "PhD student;PhD student;Researcher;Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\nal-tahan2024unibench,\ntitle={UniBench: Visual Reasoning Requires Rethinking Vision-Language Beyond Scaling},\nauthor={Haider Al-Tahan and Quentin Garrido and Randall Balestriero and Diane Bouchacourt and Caner Hazirbas and Mark Ibrahim},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=g1Zn0XPUFF}\n}", "github": "", "reviewers": "1p61;qzJ1;GrCg;eN9n", "pdf_size": 3138271, "rating": "7;7;7;9", "confidence": "4;3;4;4", "wc_summary_and_contributions": "49;102;92;55", "wc_strengths": "4;100;184;73", "wc_improvement": "4;123;83;57", "wc_limitations": "5;45;10;27", "wc_correctness": "8;86;23;5", "wc_clarity": "12;12;27;17", "wc_relation_to_prior_work": "12;84;42;5", "wc_documentation": "1;5;37;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "96;558;499;241", "wc_reply_reviewers": "59;134;67;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 74.5, 22.874658467395747 ], "wc_strengths_avg": [ 90.25, 64.46074386787667 ], "wc_improvement_avg": [ 66.75, 43.18781656902789 ], "wc_limitations_avg": [ 21.75, 15.706288549495072 ], "wc_correctness_avg": [ 30.5, 32.76049450176233 ], "wc_clarity_avg": [ 17.0, 6.123724356957945 ], "wc_relation_to_prior_work_avg": [ 35.75, 31.13177637077589 ], "wc_documentation_avg": [ 11.0, 15.0996688705415 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 348.5, 188.3168871875276 ], "wc_reply_reviewers_avg": [ 65.0, 47.502631506054485 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1848289486011150588&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "gatech.edu;research.facebook.com;citadel.com;meta.com;ai.facebook.com;meta.com", "author_num": 6, "aff_unique_index": "0;1;2;1;1;1", "aff_unique_norm": "Georgia Institute of Technology;Meta;Citadel", "aff_unique_dep": ";Research;", "aff_unique_url": "https://www.gatech.edu;https://www.facebook.com;https://www.citadel.edu", "aff_unique_abbr": "Georgia Tech;FB;Citadel", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "To Learn or Not to Learn, That is the Question \u2014 A Feature-Task Dual Learning Model of Perceptual Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94169", "id": "g3MbZOw0qO", "proceeding": "", "pdf": "https://openreview.net/pdf?id=g3MbZOw0qO", "openreview": "https://openreview.net/forum?id=g3MbZOw0qO", "poster": "/media/PosterPDFs/NeurIPS%202024/94169.png?t=1733757726.3513227", "project": "", "author_site": "Xiao Liu, Muyang Lyu, Cong Yu, Si Wu", "tldr": "", "abstract": "Perceptual learning refers to the practices through which participants learn to improve their performance in perceiving sensory stimuli. Two seemingly conflicting phenomena of specificity and transfer have been widely observed in perceptual learning. \n\nHere, we propose a dual-learning model to reconcile these two phenomena. The model consists of two learning processes. One is task-based learning, which is fast and enables the brain to adapt to a task rapidly by using existing feature representations. The other is feature-based learning, which is slow and enables the brain to improve feature representations to match the statistical change of the environment. Associated with different training paradigms, the interactions between these two learning processes induce the rich phenomena of perceptual learning. Specifically, in the training paradigm where the same stimulus condition is presented excessively, feature-based learning is triggered, which incurs specificity, while in the paradigm where the stimulus condition varies during the training, task-based learning dominates to induce the transfer effect. As the number of training sessions under the same stimulus condition increases, a transition from transfer to specificity occurs. \n\nWe demonstrate that the dual-learning model can account for both the specificity and transfer phenomena observed in classical psychophysical experiments. We hope that this study gives us insight into understanding how the brain balances the accomplishment of a new task and the consumption of learning effort.", "keywords": "perceptual learning;specificity;transfer;plasticity", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "", "author": "Xiao Liu;Muyang Lyu;Cong Yu;Si Wu", "authorids": "~Xiao_Liu11;~Muyang_Lyu1;~Cong_Yu5;~Si_Wu1", "gender": "F;F;M;M", "homepage": ";;http://www.psy.pku.edu.cn;https://mgv.pku.edu.cn/english/people/lbd/soeeace/267528.htm", "dblp": "82/1364;;;25/437-1", "google_scholar": ";;;", "orcid": ";0000-0002-5770-6747;;", "linkedin": ";;;", "or_profile": "~Xiao_Liu11;~Muyang_Lyu1;~Cong_Yu5;~Si_Wu1", "aff": ";Peking University;Peking University;Peking University", "aff_domain": ";stu.pku.edu.cn;pku.edu.cn;pku.edu.cn", "position": ";PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nliu2024to,\ntitle={To Learn or Not to Learn, That is the Question {\\textemdash} A Feature-Task Dual Learning Model of Perceptual Learning},\nauthor={Xiao Liu and Muyang Lyu and Cong Yu and Si Wu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=g3MbZOw0qO}\n}", "github": "", "reviewers": "VB2J;n6Xq;Wypk;UGym", "pdf_size": 1709637, "rating": "5;5;7;8", "confidence": "5;2;4;4", "soundness": "2;3;3;4", "novelty": "2;2;4;3", "presentation": "2;3;4;4", "wc_summary": "56;162;128;82", "wc_strengths": "21;47;31;206", "wc_weaknesses": "377;64;133;374", "wc_questions": "269;84;497;18", "wc_limitations": "19;4;9;1", "wc_review": "742;361;798;681", "wc_reply_reviewers": "385;44;13;0", "wc_reply_authors": "279;32;32;0", "reply_reviewers": "2;1;1;0", "reply_authors": "3;2;2;1", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 107.0, 40.90232267243512 ], "wc_strengths_avg": [ 76.25, 75.48302789369276 ], "wc_weaknesses_avg": [ 237.0, 140.63605512101086 ], "wc_questions_avg": [ 217.0, 186.00672030870282 ], "wc_limitations_avg": [ 8.25, 6.832825184358224 ], "wc_review_avg": [ 645.5, 169.38786851483786 ], "wc_reply_reviewers_avg": [ 110.5, 159.28669122057875 ], "wc_reply_authors_avg": [ 85.75, 112.33515700794653 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.22075539284417395, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:cxoX-dA4RUoJ:scholar.google.com/&scioq=To+Learn+or+Not+to+Learn,+That+is+the+Question+%E2%80%94+A+Feature-Task+Dual+Learning+Model+of+Perceptual+Learning&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": ";stu.pku.edu.cn;pku.edu.cn;pku.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "SPARKLE: A Unified Single-Loop Primal-Dual Framework for Decentralized Bilevel Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94168", "id": "g5DyqerUpX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=g5DyqerUpX", "openreview": "https://openreview.net/forum?id=g5DyqerUpX", "poster": "/media/PosterPDFs/NeurIPS%202024/94168.png?t=1731738956.0308454", "project": "", "author_site": "Shuchen Zhu, Boao Kong, Songtao Lu, Xinmeng Huang, Kun Yuan", "tldr": "", "abstract": "This paper studies decentralized bilevel optimization, in which multiple agents collaborate to solve problems involving nested optimization structures with neighborhood communications. Most existing literature primarily utilizes gradient tracking to mitigate the influence of data heterogeneity, without exploring other well-known heterogeneity-correction techniques such as EXTRA or Exact Diffusion. Additionally, these studies often employ identical decentralized strategies for both upper- and lower-level problems, neglecting to leverage distinct mechanisms across different levels. To address these limitations, this paper proposes SPARKLE, a unified single-loop primal-dual algorithm framework for decentralized bilevel optimization. SPARKLE offers the flexibility to incorporate various heterogeneity-correction strategies into the algorithm. Moreover, SPARKLE allows for different strategies to solve upper- and lower-level problems. We present a unified convergence analysis for SPARKLE, applicable to all its variants, with state-of-the-art convergence rates compared to existing decentralized bilevel algorithms. Our results further reveal that EXTRA and Exact Diffusion are more suitable for decentralized bilevel optimization, and using mixed strategies in bilevel algorithms brings more benefits than relying solely on gradient tracking.", "keywords": "non-convex optimization;decentralized bilevel optimization;transient iteration complexity", "primary_area": "optimization", "supplementary_material": "", "author": "Shuchen Zhu;Boao Kong;Songtao Lu;Xinmeng Huang;Kun Yuan", "authorids": "~Shuchen_Zhu1;~Boao_Kong1;~Songtao_Lu1;~Xinmeng_Huang1;~Kun_Yuan4", "gender": "M;M;M;M;", "homepage": ";;https://songtaogithub.github.io/;;", "dblp": ";368/4976;05/2887;256/1617;", "google_scholar": ";_kcK_BwAAAAJ;LRsjX7kAAAAJ;vM2nHxEAAAAJ;", "orcid": "0009-0008-4613-6452;;;;", "linkedin": ";;;xinmeng-huang-8032221b3/;", "or_profile": "~Shuchen_Zhu1;~Boao_Kong1;~Songtao_Lu1;~Xinmeng_Huang1;~Kun_Yuan4", "aff": "Nankai University;Peking University;IBM Thomas J. Watson Research Center;University of Pennsylvania;", "aff_domain": "nankai.edu.cn;stu.pku.edu.cn;ibm.com;upenn.edu;", "position": "Undergrad student;PhD student;Researcher;PhD student;", "bibtex": "@inproceedings{\nzhu2024sparkle,\ntitle={{SPARKLE}: A Unified Single-Loop Primal-Dual Framework for Decentralized Bilevel Optimization},\nauthor={Shuchen Zhu and Boao Kong and Songtao Lu and Xinmeng Huang and Kun Yuan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=g5DyqerUpX}\n}", "github": "", "reviewers": "E8rC;ehMq;AnxT;4NJf", "pdf_size": 1430551, "rating": "5;6;6;6", "confidence": "4;5;3;3", "soundness": "3;3;2;3", "novelty": "2;2;3;3", "presentation": "3;2;3;3", "wc_summary": "58;79;64;123", "wc_strengths": "63;59;72;117", "wc_weaknesses": "108;156;194;146", "wc_questions": "2;14;67;141", "wc_limitations": "1;69;25;124", "wc_review": "232;377;422;651", "wc_reply_reviewers": "19;25;20;0", "wc_reply_authors": "456;116;116;877", "reply_reviewers": "1;1;1;0", "reply_authors": "3;3;3;6", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 81.0, 25.42636427018224 ], "wc_strengths_avg": [ 77.75, 23.14492384951828 ], "wc_weaknesses_avg": [ 151.0, 30.610455730027933 ], "wc_questions_avg": [ 56.0, 54.83156025502101 ], "wc_limitations_avg": [ 54.75, 46.83147979724749 ], "wc_review_avg": [ 420.5, 150.46345071146015 ], "wc_reply_reviewers_avg": [ 16.0, 9.513148795220223 ], "wc_reply_authors_avg": [ 391.25, 312.91802041429315 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.75, 1.299038105676658 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6616016331787873571&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "nankai.edu.cn;stu.pku.edu.cn;ibm.com;upenn.edu;", "author_num": 5, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Nankai University;Peking University;IBM;University of Pennsylvania", "aff_unique_dep": ";;Research;", "aff_unique_url": "http://www.nankai.edu.cn;http://www.pku.edu.cn;https://www.ibm.com/research;https://www.upenn.edu", "aff_unique_abbr": "NKU;Peking U;IBM;UPenn", "aff_campus_unique_index": "1", "aff_campus_unique": ";Yorktown Heights", "aff_country_unique_index": "0;0;1;1", "aff_country_unique": "China;United States" }, { "title": "CODE: Contrasting Self-generated Description to Combat Hallucination in Large Multi-modal Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94167", "id": "g6nn2AijDp", "proceeding": "", "pdf": "https://openreview.net/pdf?id=g6nn2AijDp", "openreview": "https://openreview.net/forum?id=g6nn2AijDp", "poster": "/media/PosterPDFs/NeurIPS%202024/94167.png?t=1731674054.2829561", "project": "", "author_site": "Junho Kim, Hyunjun Kim, Kim Yeonju, Yong Man Ro", "tldr": "", "abstract": "Large Multi-modal Models (LMMs) have recently demonstrated remarkable abilities in visual context understanding and coherent response generation. However, alongside these advancements, the issue of hallucinations has emerged as a significant challenge, producing erroneous responses that are unrelated to the visual contents. In this paper, we introduce a novel contrastive-based decoding method, COuntering DEscription Contrastive Decoding (CODE), which leverages self-generated descriptions as contrasting references during the decoding phase of LMMs to address hallucination issues. CODE utilizes the comprehensive descriptions from model itself as visual counterpart to correct and improve response alignment with actual visual content. By dynamically adjusting the information flow and distribution of next-token predictions in the LMM's vocabulary, CODE enhances the coherence and informativeness of generated responses. Extensive experiments demonstrate that our method significantly reduces hallucinations and improves cross-modal consistency across various benchmarks and cutting-edge LMMs. Our method provides a simple yet effective decoding strategy that can be integrated to existing LMM frameworks without additional training.", "keywords": "Large Vision-Language Models;Large Multi-modal Models;Hallucination", "primary_area": "machine_vision", "supplementary_material": "", "author": "Junho Kim;Hyunjun Kim;KIM YEONJU;Yong Man Ro", "authorids": "~Junho_Kim4;~Hyunjun_Kim2;~KIM_YEONJU1;~Yong_Man_Ro3", "gender": "M;M;F;M", "homepage": "https://jhkim0911.github.io/;;;https://www.ivllab.kaist.ac.kr/people/professor", "dblp": ";;250/5224.html;02/1221", "google_scholar": "ZxE16ZUAAAAJ;i1HI8WoAAAAJ;M-kZaToAAAAJ;https://scholar.google.co.kr/citations?user=IPzfF7cAAAAJ", "orcid": "0000-0002-7769-910X;;;0000-0001-5306-6853", "linkedin": "junhokim-kaist/;hyunjun-kim-b199b3254;;", "or_profile": "~Junho_Kim4;~Hyunjun_Kim2;~KIM_YEONJU1;~Yong_Man_Ro1", "aff": "Korea Advanced Institute of Science & Technology;KAIST;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;ee.kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "position": "PhD student;PhD student;MS student;Full Professor", "bibtex": "@inproceedings{\nkim2024code,\ntitle={{CODE}: Contrasting Self-generated Description to Combat Hallucination in Large Multi-modal Models},\nauthor={Junho Kim and Hyunjun Kim and KIM YEONJU and Yong Man Ro},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=g6nn2AijDp}\n}", "github": "", "reviewers": "Z6jD;9CKq;FLJA", "pdf_size": 6837495, "rating": "4;5;6", "confidence": "4;5;4", "soundness": "2;3;3", "novelty": "2;3;3", "presentation": "2;3;4", "wc_summary": "95;61;108", "wc_strengths": "37;38;61", "wc_weaknesses": "46;68;85", "wc_questions": "2;2;88", "wc_limitations": "1;6;8", "wc_review": "181;175;350", "wc_reply_reviewers": "58;27;22", "wc_reply_authors": "260;280;0", "reply_reviewers": "1;1;1", "reply_authors": "2;2;1", "rating_avg": [ 5.0, 0.816496580927726 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 88.0, 19.8158185969358 ], "wc_strengths_avg": [ 45.333333333333336, 11.08552609887726 ], "wc_weaknesses_avg": [ 66.33333333333333, 15.965240019770729 ], "wc_questions_avg": [ 30.666666666666668, 40.54078878802872 ], "wc_limitations_avg": [ 5.0, 2.943920288775949 ], "wc_review_avg": [ 235.33333333333334, 81.11856891133017 ], "wc_reply_reviewers_avg": [ 35.666666666666664, 15.923427883328248 ], "wc_reply_authors_avg": [ 180.0, 127.54084313139327 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7220813164554123622&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "kaist.ac.kr;ee.kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Point-PRC: A Prompt Learning Based Regulation Framework for Generalizable Point Cloud Analysis", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94166", "id": "g7lYP11Erv", "proceeding": "", "pdf": "https://openreview.net/pdf?id=g7lYP11Erv", "openreview": "https://openreview.net/forum?id=g7lYP11Erv", "poster": "", "project": "", "author_site": "Hongyu Sun, Qiuhong Ke, Yongcai Wang, Wang Chen, Kang Yang, Deying Li, Jianfei Cai", "tldr": "", "abstract": "This paper investigates the 3D domain generalization (3DDG) ability of large 3D models based on prevalent prompt learning. Recent works demonstrate the performances of 3D point cloud recognition can be boosted remarkably by parameter-efficient prompt tuning. However, we observe that the improvement on downstream tasks comes at the expense of a severe drop in 3D domain generalization. To resolve this challenge, we present a comprehensive regulation framework that allows the learnable prompts to actively interact with the well-learned general knowledge in large 3D models to maintain good generalization. Specifically, the proposed framework imposes multiple explicit constraints on the prompt learning trajectory by maximizing the mutual agreement between task-specific predictions and task-agnostic knowledge. We design the regulation framework as a plug-and-play module to embed into existing representative large 3D models. Surprisingly, our method not only realizes consistently increasing generalization ability but also enhances task-specific 3D recognition performances across various 3DDG benchmarks by a clear margin. Considering the lack of study and evaluation on 3DDG, we also create three new benchmarks, namely base-to-new, cross-dataset and few-shot generalization benchmarks, to enrich the field and inspire future research. Code and benchmarks are available at \\url{https://github.com/auniquesun/Point-PRC}.", "keywords": "point cloud analysis;point cloud recognition;3d domain generalization;large 3d models;prompt learning;multi-modal learning", "primary_area": "machine_vision", "supplementary_material": "", "author": "Hongyu Sun;Qiuhong Ke;Yongcai Wang;Wang Chen;Kang Yang;Deying Li;Jianfei Cai", "authorids": "~Hongyu_Sun3;~Qiuhong_Ke6;~Yongcai_Wang1;~Wang_Chen5;~Kang_Yang7;~Deying_Li1;~Jianfei_Cai1", "gender": "M;F;M;M;F;M;M", "homepage": "https://auniquesun.github.io;https://research.monash.edu/en/persons/qiuhong-ke;https://yongcaiwang.github.io;;http://info.ruc.edu.cn/jsky/szdw/ajxjgcx/jsjkxyjsx1/js2/c2523870862c49758d02a6705c1e1556.htm;https://jianfei-cai.github.io/;https://github.com/sidiangongyuan", "dblp": "33/2079-6;151/3574;04/2124;;63/1296;83/6096;", "google_scholar": "https://scholar.google.com/citations?hl=en;84qxdhsAAAAJ;ZOHWbl8AAAAJ;;;https://scholar.google.com.tw/citations?user=N6czCoUAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": "0000-0003-1991-6476;;;0000-0002-9606-365X;;;0009-0000-5489-516X", "linkedin": "auniquesun/;;;;;;", "or_profile": "~Hongyu_Sun3;~Qiuhong_Ke6;~Yongcai_Wang1;~Wang_Chen5;~Deying_Li1;~Jianfei_Cai1;~yang_kang2", "aff": "Renmin University of China;Monash University;Renmin University of China;Renmin University of China;Renmin University of China;Monash University;Renmin University of China", "aff_domain": "ruc.edu.cn;monash.edu;ruc.edu.cn;ruc.edu.cn;ruc.edu.cn;monash.edu;ruc.edu.cn", "position": "PhD student;Lecturer;Associate Professor;PhD student;Full Professor;Full Professor;MS student", "bibtex": "@inproceedings{\nsun2024pointprc,\ntitle={Point-{PRC}: A Prompt Learning Based Regulation Framework for Generalizable Point Cloud Analysis},\nauthor={Hongyu Sun and Qiuhong Ke and Yongcai Wang and Wang Chen and Kang Yang and Deying Li and Jianfei Cai},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=g7lYP11Erv}\n}", "github": "", "reviewers": "UDVe;d557;kpdQ", "pdf_size": 902288, "rating": "5;6;6", "confidence": "5;3;3", "soundness": "2;3;3", "novelty": "3;2;3", "presentation": "3;3;3", "wc_summary": "116;90;86", "wc_strengths": "67;29;90", "wc_weaknesses": "127;71;33", "wc_questions": "4;116;33", "wc_limitations": "36;1;45", "wc_review": "350;307;287", "wc_reply_reviewers": "239;44;14", "wc_reply_authors": "1445;70;170", "reply_reviewers": "2;1;1", "reply_authors": "5;2;3", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 97.33333333333333, 13.299958228840003 ], "wc_strengths_avg": [ 62.0, 25.152865973217974 ], "wc_weaknesses_avg": [ 77.0, 38.60915262818736 ], "wc_questions_avg": [ 51.0, 47.462265713582056 ], "wc_limitations_avg": [ 27.333333333333332, 18.979521127315678 ], "wc_review_avg": [ 314.6666666666667, 26.284765338288434 ], "wc_reply_reviewers_avg": [ 99.0, 99.74968671630002 ], "wc_reply_authors_avg": [ 561.6666666666666, 625.9437319404641 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 3.3333333333333335, 1.247219128924647 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17379761902270823432&as_sdt=20000005&sciodt=0,21&hl=en", "gs_version_total": 5, "email": "ruc.edu.cn;monash.edu;ruc.edu.cn;ruc.edu.cn;ruc.edu.cn;monash.edu;ruc.edu.cn", "author_num": 7, "aff_unique_index": "0;1;0;0;0;1;0", "aff_unique_norm": "Renmin University of China;Monash University", "aff_unique_dep": ";", "aff_unique_url": "http://www.ruc.edu.cn;https://www.monash.edu", "aff_unique_abbr": "RUC;Monash", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0;1;0", "aff_country_unique": "China;Australia" }, { "title": "Decision-Focused Learning with Directional Gradients", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94165", "id": "g8kFlZDcaX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=g8kFlZDcaX", "openreview": "https://openreview.net/forum?id=g8kFlZDcaX", "poster": "/media/PosterPDFs/NeurIPS%202024/94165.png?t=1731659673.1089213", "project": "", "author_site": "Michael Huang, Vishal Gupta", "tldr": "", "abstract": "We propose a novel family of decision-aware surrogate losses, called Perturbation Gradient (PG) losses, for the predict-then-optimize framework. These losses directly approximate the downstream decision loss and can be optimized using off-the-shelf gradient-based methods. Importantly, unlike existing surrogate losses, the approximation error of our PG losses vanishes as the number of samples grows. This implies that optimizing our surrogate loss yields a best-in-class policy asymptotically, even in misspecified settings. This is the first such result in misspecified settings and we provide numerical evidence confirming our PG losses substantively outperform existing proposals when the underlying model is misspecified and the noise is not centrally symmetric. Insofar as misspecification is commonplace in practice -- especially when we might prefer a simpler, more interpretable model -- PG losses offer a novel, theoretically justified, method for computationally tractable decision-aware learning.", "keywords": "decision-focused learning", "primary_area": "optimization", "supplementary_material": "/attachment/7e0ad1a9cc8bd230bbf3486b1a0bb84d542ae0ec.zip", "author": "Michael Huang;Vishal Gupta", "authorids": "~Michael_Huang2;~Vishal_Gupta1", "gender": "M;M", "homepage": "https://mh3166.github.io/;https://faculty.marshall.usc.edu/Vishal-Gupta/", "dblp": "87/6759-3;66/6170-4", "google_scholar": "lX0NYw8AAAAJ;KZcnoikAAAAJ", "orcid": ";0000-0003-4371-9114", "linkedin": ";", "or_profile": "~Michael_Huang2;~Vishal_Gupta1", "aff": "CUNY Baruch College;University of Southern California", "aff_domain": "baruch.cuny.edu;usc.edu", "position": "Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nhuang2024decisionfocused,\ntitle={Decision-Focused Learning with Directional Gradients},\nauthor={Michael Huang and Vishal Gupta},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=g8kFlZDcaX}\n}", "github": "", "reviewers": "oPp7;LgZs;CTUs;TiWp", "pdf_size": 829424, "rating": "5;5;5;7", "confidence": "3;3;3;4", "soundness": "2;3;2;4", "novelty": "3;3;2;3", "presentation": "2;3;2;3", "wc_summary": "45;64;64;105", "wc_strengths": "68;10;46;48", "wc_weaknesses": "164;108;54;380", "wc_questions": "18;7;46;9", "wc_limitations": "2;10;9;14", "wc_review": "297;199;219;556", "wc_reply_reviewers": "246;26;0;1069", "wc_reply_authors": "994;0;0;1557", "reply_reviewers": "4;1;0;2", "reply_authors": "3;1;1;5", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 69.5, 21.914607000811127 ], "wc_strengths_avg": [ 43.0, 20.904544960366874 ], "wc_weaknesses_avg": [ 176.5, 123.76085810950084 ], "wc_questions_avg": [ 20.0, 15.572411502397436 ], "wc_limitations_avg": [ 8.75, 4.322904116447646 ], "wc_review_avg": [ 317.75, 142.3435544729722 ], "wc_reply_reviewers_avg": [ 335.25, 434.2760498807182 ], "wc_reply_authors_avg": [ 637.75, 668.0914514495752 ], "reply_reviewers_avg": [ 1.75, 1.479019945774904 ], "reply_authors_avg": [ 2.5, 1.6583123951777 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7930673154538754452&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 6, "email": "baruch.cuny.edu;usc.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Baruch College, City University of New York;University of Southern California", "aff_unique_dep": ";", "aff_unique_url": "https://www.baruch.cuny.edu;https://www.usc.edu", "aff_unique_abbr": "Baruch;USC", "aff_campus_unique_index": "0;1", "aff_campus_unique": "New York;Los Angeles", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Fully Explicit Dynamic Gaussian Splatting", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94164", "id": "g8pyTkxyIV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=g8pyTkxyIV", "openreview": "https://openreview.net/forum?id=g8pyTkxyIV", "poster": "/media/PosterPDFs/NeurIPS%202024/94164.png?t=1733448308.6407514", "project": "", "author_site": "Junoh Lee, Changyeon Won, Hyunjun Jung, Inhwan Bae, Hae-Gon Jeon", "tldr": "", "abstract": "3D Gaussian Splatting has shown fast and high-quality rendering results in static scenes by leveraging dense 3D prior and explicit representations. Unfortunately, the benefits of the prior and representation do not involve novel view synthesis for dynamic motions. Ironically, this is because the main barrier is the reliance on them, which requires increasing training and rendering times to account for dynamic motions. \nIn this paper, we design Explicit 4D Gaussian Splatting (Ex4DGS).\nOur key idea is to firstly separate static and dynamic Gaussians during training, and to explicitly sample positions and rotations of the dynamic Gaussians at sparse timestamps. The sampled positions and rotations are then interpolated to represent both spatially and temporally continuous motions of objects in dynamic scenes as well as reducing computational cost. \nAdditionally, we introduce a progressive training scheme and a point-backtracking technique that improves Ex4DGS's convergence. We initially train Ex4DGS using short timestamps and progressively extend timestamps, which makes it work well with a few point clouds. The point-backtracking is used to quantify the cumulative error of each Gaussian over time, enabling the detection and removal of erroneous Gaussians in dynamic scenes. Comprehensive experiments on various scenes demonstrate the state-of-the-art rendering quality from our method, achieving fast rendering of 62 fps on a single 2080Ti GPU.", "keywords": "Gaussian Splatting;Neural Rendering;Computer Vision", "primary_area": "machine_vision", "supplementary_material": "/attachment/186fa8ec1bfc5a6d523e64be4e42f8f4354de11a.zip", "author": "Junoh Lee;Changyeon Won;Hyunjun Jung;Inhwan Bae;Hae-Gon Jeon", "authorids": "~Junoh_Lee1;~Changyeon_Won1;~Hyunjun_Jung2;~Inhwan_Bae1;~Hae-Gon_Jeon3", "gender": "M;M;M;M;M", "homepage": "https://juno181.github.io/;;https://github.com/hj-jung-01;https://ihbae.com/;https://sites.google.com/site/hgjeoncv/", "dblp": ";;;294/0205;142/2427", "google_scholar": "OZcCxNQAAAAJ;;;og4c800AAAAJ;https://scholar.google.co.kr/citations?user=Ei00xroAAAAJ", "orcid": ";0000-0001-5335-2606;;0000-0003-1884-2268;0000-0003-1105-1666", "linkedin": ";;;;", "or_profile": "~Junoh_Lee1;~Changyeon_Won1;~Hyunjun_Jung2;~Inhwan_Bae1;~Hae-Gon_Jeon3", "aff": "Gwangju Institute of Science and Technology;Gwangju Institute of Science and Technology;Gwangju Institute of Science and Technology;Gwangju Institute of Science and Technology;Gwangju Institute of Science and Technology", "aff_domain": "gist.ac.kr;gist.ac.kr;gist.ac.kr;gist.ac.kr;gist.ac.kr", "position": "PhD student;PhD student;PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nlee2024fully,\ntitle={Fully Explicit Dynamic Gaussian Splatting},\nauthor={Junoh Lee and Changyeon Won and Hyunjun Jung and Inhwan Bae and Hae-Gon Jeon},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=g8pyTkxyIV}\n}", "github": "", "reviewers": "MZ9L;1Xsu;Tqxz;Vpue", "pdf_size": 42125430, "rating": "5;5;5;6", "confidence": "4;4;4;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "3;2;3;3", "wc_summary": "95;86;91;72", "wc_strengths": "70;59;48;188", "wc_weaknesses": "122;164;117;280", "wc_questions": "67;45;67;318", "wc_limitations": "4;27;9;19", "wc_review": "358;381;332;877", "wc_reply_reviewers": "187;64;125;117", "wc_reply_authors": "906;613;2029;2554", "reply_reviewers": "1;1;2;1", "reply_authors": "3;3;5;7", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 86.0, 8.689073598491383 ], "wc_strengths_avg": [ 91.25, 56.39758416811841 ], "wc_weaknesses_avg": [ 170.75, 65.66344112213432 ], "wc_questions_avg": [ 124.25, 112.22159997077212 ], "wc_limitations_avg": [ 14.75, 8.898735865278843 ], "wc_review_avg": [ 487.0, 225.83290282861796 ], "wc_reply_reviewers_avg": [ 123.25, 43.63699691775317 ], "wc_reply_authors_avg": [ 1525.5, 794.946696326238 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 4.5, 1.6583123951777 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2536576276975903830&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "gist.ac.kr;gist.ac.kr;gist.ac.kr;gist.ac.kr;gist.ac.kr", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Gwangju Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.gist.ac.kr", "aff_unique_abbr": "GIST", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Gwangju", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Parameter Disparities Dissection for Backdoor Defense in Heterogeneous Federated Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94163", "id": "g8wnC1E1OS", "proceeding": "", "pdf": "https://openreview.net/pdf?id=g8wnC1E1OS", "openreview": "https://openreview.net/forum?id=g8wnC1E1OS", "poster": "/media/PosterPDFs/NeurIPS%202024/94163.png?t=1731228881.985771", "project": "", "author_site": "Wenke Huang, Mang Ye, Zekun Shi, Guancheng Wan, He Li, Bo Du", "tldr": "", "abstract": "Backdoor attacks pose a serious threat to federated systems, where malicious clients optimize on the triggered distribution to mislead the global model towards a predefined target. Existing backdoor defense methods typically require either homogeneous assumption, validation datasets, or client optimization conflicts. In our work, we observe that benign heterogeneous distributions and malicious triggered distributions exhibit distinct parameter importance degrees. We introduce the Fisher Discrepancy Cluster and Rescale (FDCR) method, which utilizes Fisher Information to calculate the degree of parameter importance for local distributions. This allows us to reweight client parameter updates and identify those with large discrepancies as backdoor attackers. Furthermore, we prioritize rescaling important parameters to expedite adaptation to the target distribution, encouraging significant elements to contribute more while diminishing the influence of trivial ones. This approach enables FDCR to handle backdoor attacks in heterogeneous federated learning environments. Empirical results on various heterogeneous federated scenarios under backdoor attacks demonstrate the effectiveness of our method.", "keywords": "Federated Learning;Backdoor Attack", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/8a83f88b33ea9472f4aee96b5c60ff7fbe3640ff.zip", "author": "Wenke Huang;Mang Ye;Zekun Shi;Guancheng Wan;He Li;Bo Du", "authorids": "~Wenke_Huang1;~Mang_Ye1;~Zekun_Shi1;~Guancheng_Wan1;~He_Li4;~Bo_Du3", "gender": "M;M;M;;M;", "homepage": "https://wenkehuang.github.io/;https://marswhu.github.io/;https://github.com/Szkqwer;;https://marswhu.github.io/team/index.htm;", "dblp": "330/1664;156/0610;234/8649.html;;;", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;j-HxRy0AAAAJ;y6_df_4AAAAJ;;;", "orcid": "0000-0003-4819-293X;0000-0003-3989-7655;;;0000-0002-8469-8260;", "linkedin": ";;;;;", "or_profile": "~Wenke_Huang1;~Mang_Ye1;~Zekun_Shi1;~Guancheng_Wan1;~He_Li4;~Bo_Du3", "aff": "Wuhan University;Wuhan University;Xiaomi Corporation;;Wuhan University;", "aff_domain": "whu.edu.cn;whu.edu.cn;xiaomi.com;;whu.edu.cn;", "position": "PhD student;Professor;Researcher;;PhD student;", "bibtex": "@inproceedings{\nhuang2024parameter,\ntitle={Parameter Disparities Dissection for Backdoor Defense in Heterogeneous Federated Learning},\nauthor={Wenke Huang and Mang Ye and Zekun Shi and Guancheng Wan and He Li and Bo Du},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=g8wnC1E1OS}\n}", "github": "", "reviewers": "7qoM;M9TV;AqVq;oCkz", "pdf_size": 822028, "rating": "6;6;7;9", "confidence": "4;4;4;5", "soundness": "3;4;4;3", "novelty": "3;4;3;4", "presentation": "4;4;3;4", "wc_summary": "75;75;54;60", "wc_strengths": "89;41;93;102", "wc_weaknesses": "82;22;54;29", "wc_questions": "13;4;1;2", "wc_limitations": "19;10;1;25", "wc_review": "278;152;203;218", "wc_reply_reviewers": "0;12;16;9", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.0, 1.224744871391589 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 66.0, 9.246621004453464 ], "wc_strengths_avg": [ 81.25, 23.710493457539005 ], "wc_weaknesses_avg": [ 46.75, 23.573024837725004 ], "wc_questions_avg": [ 5.0, 4.743416490252569 ], "wc_limitations_avg": [ 13.75, 9.093266739736606 ], "wc_review_avg": [ 212.75, 44.91867651656714 ], "wc_reply_reviewers_avg": [ 9.25, 5.889609494694874 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.9428090415820632, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5244434670505631561&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "whu.edu.cn;whu.edu.cn;xiaomi.com;;whu.edu.cn;", "author_num": 6, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Wuhan University;Xiaomi Corporation", "aff_unique_dep": ";", "aff_unique_url": "http://www.whu.edu.cn/;https://www.xiaomi.com", "aff_unique_abbr": "WHU;Xiaomi", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "DHA: Learning Decoupled-Head Attention from Transformer Checkpoints via Adaptive Heads Fusion", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94162", "id": "g92nu7knRq", "proceeding": "", "pdf": "https://openreview.net/pdf?id=g92nu7knRq", "openreview": "https://openreview.net/forum?id=g92nu7knRq", "poster": "/media/PosterPDFs/NeurIPS%202024/94162.png?t=1731647830.3444955", "project": "", "author_site": "Yilong Chen, Linhao Zhang, Junyuan Shang, Zhenyu Zhang, Tingwen Liu, Shuohuan Wang, YU SUN", "tldr": "", "abstract": "Large language models (LLMs) with billions of parameters demonstrate impressive performance. However, the widely used Multi-Head Attention (MHA) in LLMs incurs substantial computational and memory costs during inference. While some efforts have optimized attention mechanisms by pruning heads or sharing parameters among heads, these methods often lead to performance degradation or necessitate substantial continued pre-training costs to restore performance. Based on the analysis of attention redundancy, we design a Decoupled-Head Attention (DHA) mechanism. DHA adaptively configures group sharing for key heads and value heads across various layers, achieving a better balance between performance and efficiency. Inspired by the observation of clustering similar heads, we propose to progressively transform the MHA checkpoint into the DHA model through linear fusion of similar head parameters step by step, retaining the parametric knowledge of the MHA checkpoint. We construct DHA models by transforming various scales of MHA checkpoints given target head budgets. Our experiments show that DHA remarkably requires a mere 0.25\\% of the original model's pre-training budgets to achieve 96.1\\% of performance while saving 75\\% of KV cache. Compared to Group-Query Attention (GQA), DHA achieves a 5$\\times$ training acceleration, a maximum of 13.93\\% performance improvement under 0.01\\% pre-training budget, and 5\\% relative improvement under 0.05\\% pre-training budget.", "keywords": "Large Language Models;Multi-Head Attention;Pre-training Acceleration;Efficient Inference;Model Fusion", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Yilong Chen;Linhao Zhang;Junyuan Shang;Zhenyu Zhang;Tingwen Liu;Shuohuan Wang;Yu Sun", "authorids": "~Yilong_Chen6;~Linhao_Zhang1;~Junyuan_Shang1;~Zhenyu_Zhang5;~Tingwen_Liu1;~Shuohuan_Wang1;~Yu_Sun13", "gender": "M;;M;M;M;M;", "homepage": "https://chenyilong.cn/;;;https://zhenyu.ac.cn/;http://liutingwen.ac.cn;;", "dblp": ";;225/9295;01/1844-6.html;56/7911;239/5773.html;", "google_scholar": "v8f7-yEAAAAJ;;hU5tOV4AAAAJ;Q3j7owkAAAAJ;JqOOSuIAAAAJ;fUkS6pAAAAAJ;02fM4YEAAAAJ", "orcid": ";;0000-0002-8558-7108;;0000-0002-0750-6923;;", "linkedin": ";;;;;;", "or_profile": "~Yilong_Chen6;~Linhao_Zhang1;~Junyuan_Shang1;~Zhenyu_Zhang5;~Tingwen_Liu1;~Shuohuan_Wang1;~Yu_Sun13", "aff": "Institute of Information Engineering, Chinese Academy of Sciences;;Baidu;Baidu Inc.;Institute of Information Engineering, Chinese Academy of Sciences;Baidu;", "aff_domain": "iie.ac.cn;;baidu.com;baidu.com;iie.ac.cn;baidu.com;", "position": "MS student;;Researcher;Researcher;Full Professor;Researcher;", "bibtex": "@inproceedings{\nchen2024dha,\ntitle={{DHA}: Learning Decoupled-Head Attention from Transformer Checkpoints via Adaptive Heads Fusion},\nauthor={Yilong Chen and Linhao Zhang and Junyuan Shang and Zhenyu Zhang and Tingwen Liu and Shuohuan Wang and Yu Sun},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=g92nu7knRq}\n}", "github": "", "reviewers": "Txec;aEta;FLzu;wgtQ", "pdf_size": 3253528, "rating": "5;5;6;7", "confidence": "3;3;4;3", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "2;3;3;2", "wc_summary": "83;92;72;152", "wc_strengths": "45;58;53;58", "wc_weaknesses": "79;52;152;253", "wc_questions": "3;44;7;136", "wc_limitations": "1;7;1;6", "wc_review": "211;253;285;605", "wc_reply_reviewers": "0;17;145;65", "wc_reply_authors": "0;39;317;73", "reply_reviewers": "0;1;2;1", "reply_authors": "1;2;3;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 99.75, 30.986892390170397 ], "wc_strengths_avg": [ 53.5, 5.315072906367325 ], "wc_weaknesses_avg": [ 134.0, 77.8363668216856 ], "wc_questions_avg": [ 47.5, 53.53737012592232 ], "wc_limitations_avg": [ 3.75, 2.7726341266023544 ], "wc_review_avg": [ 338.5, 156.0857136319657 ], "wc_reply_reviewers_avg": [ 56.75, 56.25111110013739 ], "wc_reply_authors_avg": [ 107.25, 123.82321066746735 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3154395523761915463&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "iie.ac.cn;;baidu.com;baidu.com;iie.ac.cn;baidu.com;", "author_num": 7, "aff_unique_index": "0;1;1;0;1", "aff_unique_norm": "Chinese Academy of Sciences;Baidu", "aff_unique_dep": "Institute of Information Engineering;Baidu, Inc.", "aff_unique_url": "http://www.cas.cn;https://www.baidu.com", "aff_unique_abbr": "CAS;Baidu", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "DINTR: Tracking via Diffusion-based Interpolation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94161", "id": "gAgwqHOBIg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=gAgwqHOBIg", "openreview": "https://openreview.net/forum?id=gAgwqHOBIg", "poster": "/media/PosterPDFs/NeurIPS%202024/94161.png?t=1731203876.5670764", "project": "", "author_site": "Pha Nguyen, Ngan Le, Jackson Cothren, Alper Yilmaz, Khoa Luu", "tldr": "", "abstract": "Object tracking is a fundamental task in computer vision, requiring the localization of objects of interest across video frames. Diffusion models have shown remarkable capabilities in visual generation, making them well-suited for addressing several requirements of the tracking problem. This work proposes a novel diffusion-based methodology to formulate the tracking task. Firstly, their conditional process allows for injecting indications of the target object into the generation process. Secondly, diffusion mechanics can be developed to inherently model temporal correspondences, enabling the reconstruction of actual frames in video. However, existing diffusion models rely on extensive and unnecessary mapping to a Gaussian noise domain, which can be replaced by a more efficient and stable interpolation process. Our proposed interpolation mechanism draws inspiration from classic image-processing techniques, offering a more interpretable, stable, and faster approach tailored specifically for the object tracking task. By leveraging the strengths of diffusion models while circumventing their limitations, our Diffusion-based INterpolation TrackeR (DINTR) presents a promising new paradigm and achieves a superior multiplicity on seven benchmarks across five indicator representations.", "keywords": "Visual Tracking;Diffusion;Unification", "primary_area": "machine_vision", "supplementary_material": "", "author": "Pha Nguyen;Ngan Hoang Le;Jackson Cothren;Alper Yilmaz;Khoa Luu", "authorids": "~Pha_Nguyen1;~Ngan_Hoang_Le1;~Jackson_Cothren1;~Alper_Yilmaz2;~Khoa_Luu2", "gender": ";F;M;M;M", "homepage": ";https://computer-science-and-computer-engineering.uark.edu/directory/index/uid/thile/name/Thi+Hoang+Ngan+Le/;https://cast.uark.edu/directory/index/uid/jcothre/name/Jackson+David+Cothren/;https://u.osu.edu/pcvlab/publications;https://uark-cviu.github.io", "dblp": ";37/245;229/6846.html;11/1315;43/8092", "google_scholar": ";8ck0k_UAAAAJ;_WB9fo4AAAAJ;MeQC1XYAAAAJ;JPAl8-gAAAAJ", "orcid": ";0000-0003-2571-0511;0000-0002-5548-6955;0000-0003-0755-2628;0000-0003-2104-0901", "linkedin": ";;jcothren/;alper-yilmaz/;khoa-luu-90900215/", "or_profile": "~Pha_Nguyen1;~Ngan_Hoang_Le1;~Jackson_Cothren1;~Alper_Yilmaz2;~Khoa_Luu2", "aff": ";University of Arkansas, Fayetteville;University of Arkansas - Fayetteville;Ohio State University, Columbus;University of Arkansas, Fayetteville", "aff_domain": ";uark.edu;uark.edu;osu.edu;uark.edu", "position": ";Assistant Professor;Full Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nnguyen2024dintr,\ntitle={{DINTR}: Tracking via Diffusion-based Interpolation},\nauthor={Pha Nguyen and Ngan Hoang Le and Jackson Cothren and Alper Yilmaz and Khoa Luu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=gAgwqHOBIg}\n}", "github": "", "reviewers": "ddR4;kgMr;oiCF", "pdf_size": 32085708, "rating": "6;7;7", "confidence": "4;4;5", "soundness": "3;4;4", "novelty": "3;4;3", "presentation": "3;3;3", "wc_summary": "66;40;85", "wc_strengths": "165;47;31", "wc_weaknesses": "145;48;71", "wc_questions": "115;2;5", "wc_limitations": "9;1;1", "wc_review": "500;138;193", "wc_reply_reviewers": "32;14;35", "wc_reply_authors": "30;34;34", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 63.666666666666664, 18.445113776342563 ], "wc_strengths_avg": [ 81.0, 59.75505557412416 ], "wc_weaknesses_avg": [ 88.0, 41.38437708443449 ], "wc_questions_avg": [ 40.666666666666664, 52.57587110283787 ], "wc_limitations_avg": [ 3.6666666666666665, 3.7712361663282534 ], "wc_review_avg": [ 277.0, 159.27544276085584 ], "wc_reply_reviewers_avg": [ 27.0, 9.273618495495704 ], "wc_reply_authors_avg": [ 32.666666666666664, 1.8856180831641267 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Zcuf9BgsL4AJ:scholar.google.com/&scioq=DINTR:+Tracking+via+Diffusion-based+Interpolation&hl=en&as_sdt=0,44", "gs_version_total": 3, "email": ";uark.edu;uark.edu;osu.edu;uark.edu", "author_num": 5, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Arkansas;Ohio State University", "aff_unique_dep": ";", "aff_unique_url": "https://www.uark.edu;https://www.osu.edu", "aff_unique_abbr": "UARK;OSU", "aff_campus_unique_index": "0;0;1;0", "aff_campus_unique": "Fayetteville;Columbus", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "gBOQ0ACqoO", "title": "DH-Fusion: Depth-Aware Hybrid Feature Fusion for Multimodal 3D Object Detection", "track": "main", "status": "Reject", "tldr": "", "abstract": "State-of-the-art LiDAR-camera 3D object detectors usually focus on feature fusion. However, they neglect the factor of depth while designing the fusion strategy. In this work, we for the first time point out that different modalities play different roles as depth varies via statistical analysis and visualization. Based on this finding, we propose a Depth-Aware Hybrid Feature Fusion (DH-Fusion) strategy that guides the weights of point cloud and RGB image modalities by introducing depth encoding at both global and local levels. Specifically, the Depth-Aware Global Feature Fusion (DGF) module adaptively adjusts the weights of image Bird's-Eye-View (BEV) features in multi-modal global features via depth encoding. Furthermore, to compensate for the information lost when transferring raw features to the BEV space, we propose a Depth-Aware Local Feature Fusion (DLF) module, which adaptively adjusts the weights of original voxel features and multi-view image features in multi-modal local features via depth encoding. Extensive experiments on the nuScenes dataset demonstrate that our DH-Fusion method surpasses previous state-of-the-art methods w.r.t. NDS. Moreover, our DH-Fusion is more robust to various kinds of corruptions, outperforming previous methods on nuScenes-C w.r.t. both NDS and mAP.", "keywords": "Depth Aware;Multi Modality;3D Object Detection", "primary_area": "machine_vision", "supplementary_material": "", "author": "Mingqian Ji;Jian Yang;Shanshan Zhang", "authorids": "~Mingqian_Ji1;~Jian_Yang1;~Shanshan_Zhang1", "gender": "M;M;F", "homepage": "https://github.com/Mingqj;;https://sites.google.com/site/shanshanzhangshomepage/", "dblp": ";y/JianYang3.html;34/3535-1", "google_scholar": ";https://scholar.google.com.hk/citations?user=6CIDtZQAAAAJ;pOSMWfQAAAAJ", "orcid": "0009-0006-1855-1945;;", "linkedin": ";;", "or_profile": "~Mingqian_Ji1;~Jian_Yang1;~Shanshan_Zhang1", "aff": "Nanjing University of Science and Technology;Nanjing University of Science and Technology;Nanjing University of Science and Technology", "aff_domain": "njust.edu.cn;njust.edu.cn;njust.edu.cn", "position": "PhD student;Full Professor;Full Professor", "bibtex": "@misc{\nanonymous2024dhfusion,\ntitle={{DH}-Fusion: Depth-Aware Hybrid Feature Fusion for Multimodal 3D Object Detection},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=gBOQ0ACqoO}\n}", "github": "", "project": "", "reviewers": "kgkY;tCBR;kR8p;qiTv", "site": "https://openreview.net/forum?id=gBOQ0ACqoO", "pdf_size": 20125801, "rating": "4;4;5;7", "confidence": "3;5;3;4", "soundness": "2;2;2;3", "novelty": "2;2;2;3", "presentation": "3;3;3;3", "wc_summary": "39;114;99;114", "wc_strengths": "30;55;62;85", "wc_weaknesses": "113;76;150;28", "wc_questions": "61;25;29;37", "wc_limitations": "1;73;11;34", "wc_review": "244;343;351;298", "wc_reply_reviewers": "71;0;23;64", "wc_reply_authors": "92;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 91.5, 30.923292192132454 ], "wc_strengths_avg": [ 58.0, 19.60867155112758 ], "wc_weaknesses_avg": [ 91.75, 45.157363740590526 ], "wc_questions_avg": [ 38.0, 13.96424004376894 ], "wc_limitations_avg": [ 29.75, 27.6891224129621 ], "wc_review_avg": [ 309.0, 42.62041764225217 ], "wc_reply_reviewers_avg": [ 39.5, 29.261749776799064 ], "wc_reply_authors_avg": [ 23.0, 39.83716857408418 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:9_9rpFoAM9gJ:scholar.google.com/&scioq=DH-Fusion:+Depth-Aware+Hybrid+Feature+Fusion+for+Multimodal+3D+Object+Detection&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0", "aff_unique_norm": "Nanjing University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.nust.edu.cn/", "aff_unique_abbr": "NUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Online Learning of Delayed Choices", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94160", "id": "gC3BzNwqQp", "proceeding": "", "pdf": "https://openreview.net/pdf?id=gC3BzNwqQp", "openreview": "https://openreview.net/forum?id=gC3BzNwqQp", "poster": "", "project": "", "tldr": "", "abstract": "Choice models are essential for understanding decision-making processes in domains like online advertising, product recommendations, and assortment optimization. The Multinomial Logit (MNL) model is particularly versatile in selecting products or advertisements for display. However, challenges arise with unknown MNL parameters and delayed feedback, requiring sellers to learn customers\u2019 choice behavior and make dynamic decisions with biased knowledge due to delays. We address these challenges by developing an algorithm that handles delayed feedback, balancing exploration and exploitation using confidence bounds and optimism. We first consider a censored setting where a threshold for considering feedback is imposed by business requirements. Our algorithm demonstrates a $\\tilde{O}(\\sqrt{NT})$ regret, with a matching lower bound up to a logarithmic term. Furthermore, we extend our analysis to environments with non-thresholded delays, achieving a $\\tilde{O}(\\sqrt{NT})$ regret. To validate our approach, we conduct experiments that confirm the effectiveness of our algorithm.", "keywords": "choice models;multinomial logit;delayed feedback", "primary_area": "online_learning", "supplementary_material": "/attachment/4dd5b7aca9e24e6310e6277321350c1f9c243c0c.zip", "author": "Recep Yusuf Bekci", "authorids": "~Recep_Yusuf_Bekci1", "gender": "", "homepage": "https://rybekci.com", "dblp": "", "google_scholar": "MBDhaAIAAAAJ", "orcid": "0009-0000-5315-6955", "linkedin": "", "or_profile": "~Recep_Yusuf_Bekci1", "aff": "University of Waterloo", "aff_domain": "uwaterloo.ca", "position": "Assistant Professor", "bibtex": "@inproceedings{\nbekci2024online,\ntitle={Online Learning of Delayed Choices},\nauthor={Recep Yusuf Bekci},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=gC3BzNwqQp}\n}", "github": "", "reviewers": "rHM8;KQSR;9pwP;3Qos;dJHq", "pdf_size": 566329, "rating": "4;5;5;6;7", "confidence": "4;2;3;3;4", "soundness": "2;3;3;3;3", "novelty": "2;3;3;2;3", "presentation": "4;3;3;3;3", "wc_summary": "123;256;236;64;98", "wc_strengths": "53;5;22;48;82", "wc_weaknesses": "258;5;92;113;12", "wc_questions": "119;5;2;242;55", "wc_limitations": "2;5;1;1;9", "wc_review": "555;276;353;468;256", "wc_reply_reviewers": "0;19;0;10;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;1;0;1;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.4, 1.0198039027185568 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 155.4, 76.5704903993699 ], "wc_strengths_avg": [ 42.0, 26.55560204551951 ], "wc_weaknesses_avg": [ 96.0, 91.52704518337735 ], "wc_questions_avg": [ 84.6, 89.4351161457288 ], "wc_limitations_avg": [ 3.6, 3.0724582991474434 ], "wc_review_avg": [ 381.6, 114.26040434026129 ], "wc_reply_reviewers_avg": [ 5.8, 7.652450587883596 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.15724272550828772, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Kt9Ms5368CoJ:scholar.google.com/&scioq=Online+Learning+of+Delayed+Choices&hl=en&as_sdt=0,14", "gs_version_total": 0, "email": "uwaterloo.ca", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "University of Waterloo", "aff_unique_dep": "", "aff_unique_url": "https://uwaterloo.ca", "aff_unique_abbr": "UW", "aff_country_unique_index": "0", "aff_country_unique": "Canada" }, { "title": "TrAct: Making First-layer Pre-Activations Trainable", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94159", "id": "gCCMzedgbo", "proceeding": "", "pdf": "https://openreview.net/pdf?id=gCCMzedgbo", "openreview": "https://openreview.net/forum?id=gCCMzedgbo", "poster": "/media/PosterPDFs/NeurIPS%202024/94159.png?t=1733861685.7850916", "project": "", "author_site": "Felix Petersen, Christian Borgelt, Stefano Ermon", "tldr": "", "abstract": "We consider the training of the first layer of vision models and notice the clear relationship between pixel values and gradient update magnitudes: the gradients arriving at the weights of a first layer are by definition directly proportional to (normalized) input pixel values. Thus, an image with low contrast has a smaller impact on learning than an image with higher contrast, and a very bright or very dark image has a stronger impact on the weights than an image with moderate brightness. In this work, we propose performing gradient descent on the embeddings produced by the first layer of the model. However, switching to discrete inputs with an embedding layer is not a reasonable option for vision models. Thus, we propose the conceptual procedure of (i) a gradient descent step on first layer activations to construct an activation proposal, and (ii) finding the optimal weights of the first layer, i.e., those weights which minimize the squared distance to the activation proposal. We provide a closed form solution of the procedure and adjust it for robust stochastic training while computing everything efficiently. Empirically, we find that TrAct (Training Activations) speeds up training by factors between 1.25x and 4x while requiring only a small computational overhead. We demonstrate the utility of TrAct with different optimizers for a range of different vision models including convolutional and transformer architectures.", "keywords": "computer vision;convolution;second-order;optimization", "primary_area": "machine_vision", "supplementary_material": "", "author": "Felix Petersen;Christian Borgelt;Stefano Ermon", "authorids": "~Felix_Petersen1;~Christian_Borgelt1;~Stefano_Ermon1", "gender": "Not Specified;M;M", "homepage": "http://www.petersen.ai/;https://www.borgelt.net/;http://cs.stanford.edu/~ermon/", "dblp": "230/3983;b/ChristianBorgelt.html;47/8135", "google_scholar": "v8Kat6YAAAAJ;https://scholar.google.de/citations?user=T50Bxb8AAAAJ;", "orcid": ";;", "linkedin": ";christian-borgelt-a2429071/;", "or_profile": "~Felix_Petersen1;~Christian_Borgelt1;~Stefano_Ermon1", "aff": "Stanford University;Paris-Lodron-University of Salzburg;Stanford University", "aff_domain": "stanford.edu;sbg.ac.at;stanford.edu", "position": "Postdoc;Full Professor;Associate Professor", "bibtex": "@inproceedings{\npetersen2024tract,\ntitle={TrAct: Making First-layer Pre-Activations Trainable},\nauthor={Felix Petersen and Christian Borgelt and Stefano Ermon},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=gCCMzedgbo}\n}", "github": "", "reviewers": "yoFY;juWi;v9Bj", "pdf_size": 2253518, "rating": "7;7;7", "confidence": "4;4;4", "soundness": "4;3;3", "novelty": "4;3;3", "presentation": "4;3;3", "wc_summary": "54;51;86", "wc_strengths": "58;34;65", "wc_weaknesses": "116;15;97", "wc_questions": "3;38;52", "wc_limitations": "8;3;46", "wc_review": "239;141;346", "wc_reply_reviewers": "44;18;110", "wc_reply_authors": "72;72;504", "reply_reviewers": "1;1;1", "reply_authors": "2;2;3", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 63.666666666666664, 15.839472494022296 ], "wc_strengths_avg": [ 52.333333333333336, 13.27487183449325 ], "wc_weaknesses_avg": [ 76.0, 43.825411197918804 ], "wc_questions_avg": [ 31.0, 20.607442021431645 ], "wc_limitations_avg": [ 19.0, 19.200694431886227 ], "wc_review_avg": [ 242.0, 83.71777987182094 ], "wc_reply_reviewers_avg": [ 57.333333333333336, 38.724095283886946 ], "wc_reply_authors_avg": [ 216.0, 203.64675298172568 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:db4q7vK7EikJ:scholar.google.com/&scioq=TrAct:+Making+First-layer+Pre-Activations+Trainable&hl=en&as_sdt=0,5", "gs_version_total": 5, "email": "stanford.edu;sbg.ac.at;stanford.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Stanford University;Paris-Lodron-University of Salzburg", "aff_unique_dep": ";", "aff_unique_url": "https://www.stanford.edu;https://www.uni-salzburg.at", "aff_unique_abbr": "Stanford;PLUS", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;Austria" }, { "title": "Pandora's Box: Towards Building Universal Attackers against Real-World Large Vision-Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94158", "id": "gDpWYpocE1", "proceeding": "", "pdf": "https://openreview.net/pdf?id=gDpWYpocE1", "openreview": "https://openreview.net/forum?id=gDpWYpocE1", "poster": "/media/PosterPDFs/NeurIPS%202024/94158.png?t=1729060248.1715941", "project": "", "author_site": "Daizong Liu, Mingyu Yang, Xiaoye Qu, Pan Zhou, Xiang Fang, Keke Tang, Yao Wan, Lichao Sun", "tldr": "", "abstract": "Large Vision-Language Models (LVLMs) have demonstrated remarkable capabilities across a wide range of multimodal understanding tasks. Nevertheless, these models are susceptible to adversarial examples. In real-world applications, existing LVLM attackers generally rely on the detailed prior knowledge of the model to generate effective perturbations. Moreover, these attacks are task-specific, leading to significant costs for designing perturbation. Motivated by the research gap and practical demands, in this paper, we make the first attempt to build a universal attacker against real-world LVLMs, focusing on two critical aspects: (i) restricting access to only the LVLM inputs and outputs. (ii) devising a universal adversarial patch, which is task-agnostic and can deceive any LVLM-driven task when applied to various inputs. Specifically, we start by initializing the location and the pattern of the adversarial patch through random sampling, guided by the semantic distance between their output and the target label. Subsequently, we maintain a consistent patch location while refining the pattern to enhance semantic resemblance to the target. In particular, our approach incorporates a diverse set of LVLM task inputs as query samples to approximate the patch gradient, capitalizing on the importance of distinct inputs. In this way, the optimized patch is universally adversarial against different tasks and prompts, leveraging solely gradient estimates queried from the model. Extensive experiments are conducted to verify the strong universal adversarial capabilities of our proposed attack with prevalent LVLMs including LLaVA, MiniGPT-4, Flamingo, and BLIP-2, spanning a spectrum of tasks, all achieved without delving into the details of the model structures.", "keywords": "Large vision-language models;adversarial attack", "primary_area": "evaluation", "supplementary_material": "", "author": "Daizong Liu;Mingyu Yang;Xiaoye Qu;Pan Zhou;Xiang Fang;Keke Tang;Yao Wan;Lichao Sun", "authorids": "~Daizong_Liu1;~Mingyu_Yang7;~Xiaoye_Qu1;~Pan_Zhou5;~Xiang_Fang1;~Keke_Tang2;~Yao_Wan2;~Lichao_Sun1", "gender": ";M;M;M;;M;M;M", "homepage": "https://liudaizong.github.io/HomePage/;https://github.com/whyn0tdance;;http://faculty.hust.edu.cn/pzhou/zh_CN/index.htm;;https://tangbohu.github.io/;http://wanyao.me;https://lichao-sun.github.io/", "dblp": "239/6021;;229/8206;84/6614-1;;162/3984;167/0275.html;121/0780-1.html", "google_scholar": "lUw7tVIAAAAJ;;rT3hqdcAAAAJ;cTpFPJgAAAAJ;;9Lk6HpQAAAAJ;c3MtqtMAAAAJ;WhGUE7AAAAAJ", "orcid": "0000-0001-8179-4508;;;;;0000-0003-0377-1022;0000-0001-6937-4180;", "linkedin": ";;%E6%99%93%E6%99%94-xiaoye-qu-%E7%9E%BF-8b9a0a133/;;;;;lichao-sun-b273a290/", "or_profile": "~Daizong_Liu1;~Mingyu_Yang7;~Xiaoye_Qu1;~Pan_Zhou5;~Xiang_Fang1;~Keke_Tang2;~Yao_Wan2;~Lichao_Sun1", "aff": "Peking University;Huazhong University of Science and Technology;Shanghai Artificial Intelligence Laboratory;Huazhong University of Science and Technology;;Guangzhou University;Huazhong University of Science and Technology;Lehigh University", "aff_domain": "pku.edu.cn;hust.edu.cn;pjlab.org.cn;hust.edu.cn;;gzhu.edu.cn;hust.edu.cn;lehigh.edu", "position": "PhD student;MS student;Researcher;Professor;;Associate Professor;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nliu2024pandoras,\ntitle={Pandora's Box: Towards Building Universal Attackers against Real-World Large Vision-Language Models},\nauthor={Daizong Liu and Mingyu Yang and Xiaoye Qu and Pan Zhou and Xiang Fang and Keke Tang and Yao Wan and Lichao Sun},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=gDpWYpocE1}\n}", "github": "", "reviewers": "K8hM;3jB6;8diJ;i8wm", "pdf_size": 5752162, "rating": "5;5;5;6", "confidence": "4;4;4;3", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;2;3;3", "wc_summary": "89;79;59;97", "wc_strengths": "57;44;69;114", "wc_weaknesses": "306;22;193;249", "wc_questions": "2;83;5;27", "wc_limitations": "2;1;1;12", "wc_review": "456;229;327;499", "wc_reply_reviewers": "16;0;9;32", "wc_reply_authors": "20;0;23;18", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;2;2", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 81.0, 14.212670403551895 ], "wc_strengths_avg": [ 71.0, 26.353367906208877 ], "wc_weaknesses_avg": [ 192.5, 106.23676388143608 ], "wc_questions_avg": [ 29.25, 32.4990384473141 ], "wc_limitations_avg": [ 4.0, 4.636809247747852 ], "wc_review_avg": [ 377.75, 106.68499191545173 ], "wc_reply_reviewers_avg": [ 14.25, 11.712706775122479 ], "wc_reply_authors_avg": [ 15.25, 8.98262211161084 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6377829546555349255&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "pku.edu.cn;hust.edu.cn;pjlab.org.cn;hust.edu.cn;;gzhu.edu.cn;hust.edu.cn;lehigh.edu", "author_num": 8, "aff_unique_index": "0;1;2;1;3;1;4", "aff_unique_norm": "Peking University;Huazhong University of Science and Technology;Shanghai Artificial Intelligence Laboratory;Guangzhou University;Lehigh University", "aff_unique_dep": ";;;;", "aff_unique_url": "http://www.pku.edu.cn;http://www.hust.edu.cn;http://www.shailab.org/;http://www.gzhu.edu.cn;https://www.lehigh.edu", "aff_unique_abbr": "Peking U;HUST;Shanghai AI Lab;GU;Lehigh", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;1", "aff_country_unique": "China;United States" }, { "title": "Exponential Quantum Communication Advantage in Distributed Inference and Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94157", "id": "gGR9dJbe3r", "proceeding": "", "pdf": "https://openreview.net/pdf?id=gGR9dJbe3r", "openreview": "https://openreview.net/forum?id=gGR9dJbe3r", "poster": "/media/PosterPDFs/NeurIPS%202024/94157.png?t=1733509194.6720362", "project": "", "author_site": "Dar Gilboa, Hagay Michaeli, Daniel Soudry, Jarrod McClean", "tldr": "", "abstract": "Training and inference with large machine learning models that far exceed the memory capacity of individual devices necessitates the design of distributed architectures, forcing one to contend with communication constraints. We present a framework for distributed computation over a quantum network in which data is encoded into specialized quantum states. We prove that for models within this framework, inference and training using gradient descent can be performed with exponentially less communication compared to their classical analogs, and with relatively modest overhead relative to standard gradient-based methods. We show that certain graph neural networks are particularly amenable to implementation within this framework, and moreover present empirical evidence that they perform well on standard benchmarks.\nTo our knowledge, this is the first example of exponential quantum advantage for a generic class of machine learning problems that hold regardless of the data encoding cost. \nMoreover, we show that models in this class can encode highly nonlinear features of their inputs, and their expressivity increases exponentially with model depth.\nWe also delineate the space of models for which exponential communication advantages hold by showing that they cannot hold for linear classification. \nCommunication of quantum states that potentially limit the amount of information that can be extracted from them about the data and model parameters may also lead to improved privacy guarantees for distributed computation. Taken as a whole, these findings form a promising foundation for distributed machine learning over quantum networks.", "keywords": "Quantum computing;Communication complexity;distributed computation;graph neural networks", "primary_area": "infrastructure", "supplementary_material": "", "author": "Dar Gilboa;Hagay Michaeli;Daniel Soudry;Jarrod Ryan McClean", "authorids": "~Dar_Gilboa1;~Hagay_Michaeli1;~Daniel_Soudry1;~Jarrod_Ryan_McClean1", "gender": ";M;M;M", "homepage": ";;https://soudry.github.io/;https://jarrodmcclean.com", "dblp": "203/4469;342/7736;126/1779;217/3528", "google_scholar": ";O9I8GFgAAAAJ;https://scholar.google.co.il/citations?user=AEBWEm8AAAAJ;yVy2ZIwAAAAJ", "orcid": ";0009-0001-5797-1719;0000-0001-9368-6352;", "linkedin": ";hagay-michaeli-a3b319152/;daniel-soudry-2aa3a88/;", "or_profile": "~Dar_Gilboa1;~Hagay_Michaeli1;~Daniel_Soudry1;~Jarrod_Ryan_McClean1", "aff": "Google;Technion - Israel Institute of Technology, Technion - Israel Institute of Technology;Technion - Israel Institute of Technology, Technion;Research, Google", "aff_domain": "google.com;campus.technion.ac.il;technion.ac.il;research.google.com", "position": "Researcher;PhD student;Associate Professor;Researcher", "bibtex": "@inproceedings{\ngilboa2024exponential,\ntitle={Exponential Quantum Communication Advantage in Distributed Inference and Learning},\nauthor={Dar Gilboa and Hagay Michaeli and Daniel Soudry and Jarrod Ryan McClean},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=gGR9dJbe3r}\n}", "github": "", "reviewers": "K89H;W5VQ;mkaP", "pdf_size": 638163, "rating": "6;6;7", "confidence": "4;2;4", "soundness": "4;3;3", "novelty": "2;2;3", "presentation": "4;3;3", "wc_summary": "93;231;80", "wc_strengths": "39;29;24", "wc_weaknesses": "34;51;112", "wc_questions": "10;18;8", "wc_limitations": "2;71;10", "wc_review": "178;400;234", "wc_reply_reviewers": "10;37;25", "wc_reply_authors": "0;142;0", "reply_reviewers": "1;1;1", "reply_authors": "1;2;1", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 134.66666666666666, 68.32438965861475 ], "wc_strengths_avg": [ 30.666666666666668, 6.236095644623236 ], "wc_weaknesses_avg": [ 65.66666666666667, 33.48963355361709 ], "wc_questions_avg": [ 12.0, 4.320493798938574 ], "wc_limitations_avg": [ 27.666666666666668, 30.814859330452176 ], "wc_review_avg": [ 270.6666666666667, 94.2667609617633 ], "wc_reply_reviewers_avg": [ 24.0, 11.045361017187261 ], "wc_reply_authors_avg": [ 47.333333333333336, 66.9394419523265 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:XdSzPGhAGeMJ:scholar.google.com/&scioq=Exponential+Quantum+Communication+Advantage+in+Distributed+Inference+and+Learning&hl=en&as_sdt=0,44", "gs_version_total": 7, "email": "google.com;campus.technion.ac.il;technion.ac.il;research.google.com", "author_num": 4, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Google;Technion - Israel Institute of Technology", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://www.technion.ac.il/en/", "aff_unique_abbr": "Google;Technion", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "United States;Israel" }, { "title": "Selective Explanations", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94156", "id": "gHCFduRo7o", "proceeding": "", "pdf": "https://openreview.net/pdf?id=gHCFduRo7o", "openreview": "https://openreview.net/forum?id=gHCFduRo7o", "poster": "/media/PosterPDFs/NeurIPS%202024/94156.png?t=1733339985.651257", "project": "", "author_site": "Lucas Monteiro Paes, Dennis Wei, Flavio Calmon", "tldr": "", "abstract": "Feature attribution methods explain black-box machine learning (ML) models by assigning importance scores to input features. \nThese methods can be computationally expensive for large ML models. To address this challenge, there have been increasing efforts to develop amortized explainers, where a ML model is trained to efficiently approximate computationally expensive feature attribution scores. Despite their efficiency, amortized explainers can produce misleading explanations. In this paper, we propose selective explanations to (i) detect when amortized explainers generate inaccurate explanations and (ii) improve the approximation of the explanation using a technique we call explanations with initial guess. Selective explanations allow practitioners to specify the fraction of samples that receive explanations with initial guess, offering a principled way to bridge the gap between amortized explainers (one inference) and more computationally costly approximations (multiple inferences). Our experiments on various models and datasets demonstrate that feature attributions via selective explanations strike a favorable balance between explanation quality and computational efficiency.", "keywords": "explainability;selective prediction;amortization;interpretability;shapley", "primary_area": "interpretability_and_explainability", "supplementary_material": "/attachment/a37cc729ca194d3489d8a7e8b3a0f8c7f42cfa58.zip", "author": "Lucas Monteiro Paes;Dennis Wei;Flavio Calmon", "authorids": "~Lucas_Monteiro_Paes1;~Dennis_Wei1;~Flavio_Calmon1", "gender": "M;M;", "homepage": ";https://sites.google.com/site/dennislwei/;http://people.seas.harvard.edu/~flavio/", "dblp": ";59/8761;89/4611", "google_scholar": "ruB-9hwAAAAJ;r4ldy4AAAAAJ;P8N_YH4AAAAJ", "orcid": "0000-0003-0129-1420;;", "linkedin": "lucas-monteiro-paes-201125141;dennis-wei-4886036b/;", "or_profile": "~Lucas_Monteiro_Paes1;~Dennis_Wei1;~Flavio_Calmon1", "aff": "Harvard University;International Business Machines;Harvard University", "aff_domain": "g.harvard.edu;ibm.com;harvard.edu", "position": "PhD student;Research Staff Member;Assistant Professor", "bibtex": "@inproceedings{\npaes2024selective,\ntitle={Selective Explanations},\nauthor={Lucas Monteiro Paes and Dennis Wei and Flavio Calmon},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=gHCFduRo7o}\n}", "github": "", "reviewers": "xNyJ;XcVA;kYJk;tCbB", "pdf_size": 1339177, "rating": "4;4;5;7", "confidence": "4;4;4;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "95;84;117;120", "wc_strengths": "22;36;85;83", "wc_weaknesses": "275;79;91;88", "wc_questions": "201;2;81;101", "wc_limitations": "1;17;1;69", "wc_review": "594;218;375;461", "wc_reply_reviewers": "27;86;18;24", "wc_reply_authors": "40;308;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 104.0, 15.049916943292411 ], "wc_strengths_avg": [ 56.5, 27.95084971874737 ], "wc_weaknesses_avg": [ 133.25, 81.95844983892754 ], "wc_questions_avg": [ 96.25, 70.90618802333123 ], "wc_limitations_avg": [ 22.0, 27.910571473905726 ], "wc_review_avg": [ 412.0, 136.50091574784398 ], "wc_reply_reviewers_avg": [ 38.75, 27.471576219794887 ], "wc_reply_authors_avg": [ 87.0, 128.635142943132 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:AmfzX9dX8RAJ:scholar.google.com/&scioq=Selective+Explanations&hl=en&as_sdt=0,47", "gs_version_total": 2, "email": "g.harvard.edu;ibm.com;harvard.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Harvard University;International Business Machines Corporation", "aff_unique_dep": ";", "aff_unique_url": "https://www.harvard.edu;https://www.ibm.com", "aff_unique_abbr": "Harvard;IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Voxel Mamba: Group-Free State Space Models for Point Cloud based 3D Object Detection", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94155", "id": "gHYhVSCtDH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=gHYhVSCtDH", "openreview": "https://openreview.net/forum?id=gHYhVSCtDH", "poster": "/media/PosterPDFs/NeurIPS%202024/94155.png?t=1731595461.5170295", "project": "", "author_site": "Guowen Zhang, Lue Fan, Chenhang HE, Zhen Lei, ZHAO-XIANG ZHANG, Lei Zhang", "tldr": "", "abstract": "Serialization-based methods, which serialize the 3D voxels and group them into multiple sequences before inputting to Transformers, have demonstrated their effectiveness in 3D object detection. However, serializing 3D voxels into 1D sequences will inevitably sacrifice the voxel spatial proximity. Such an issue is hard to be addressed by enlarging the group size with existing serialization-based methods due to the quadratic complexity of Transformers with feature sizes. Inspired by the recent advances of state space models (SSMs), we present a Voxel SSM, termed as Voxel Mamba, which employs a group-free strategy to serialize the whole space of voxels into a single sequence. The linear complexity of SSMs encourages our group-free design, alleviating the loss of spatial proximity of voxels. To further enhance the spatial proximity, we propose a Dual-scale SSM Block to establish a hierarchical structure, enabling a larger receptive field in the 1D serialization curve, as well as more complete local regions in 3D space. Moreover, we implicitly apply window partition under the group-free framework by positional encoding, which further enhances spatial proximity by encoding voxel positional information. Our experiments on Waymo Open Dataset and nuScenes dataset show that Voxel Mamba not only achieves higher accuracy than state-of-the-art methods, but also demonstrates significant advantages in computational efficiency. The source code is available at https://github.com/gwenzhang/Voxel-Mamba.", "keywords": "Point Cloud based 3D Object Detection;State Space Models;Group-free", "primary_area": "machine_vision", "supplementary_material": "", "author": "Guowen Zhang;Lue Fan;Chenhang HE;Zhen Lei;Zhaoxiang Zhang;Lei Zhang", "authorids": "~Guowen_Zhang1;~Lue_Fan1;~Chenhang_HE1;~Zhen_Lei2;~Zhaoxiang_Zhang3;~Lei_Zhang2", "gender": "M;;M;M;M;M", "homepage": ";;https://github.com/skyhehe123;http://www.cbsr.ia.ac.cn/users/zlei/;http://zhaoxiangzhang.net;http://www4.comp.polyu.edu.hk/~cslzhang/", "dblp": "211/5894;287/9792;272/1145;55/112-1.html;55/2285-1.html;64/5666-6.html", "google_scholar": "https://scholar.google.com/citations?hl=zh-TW;https://scholar.google.com/citations?hl=en;dU6hpFUAAAAJ;cuJ3QG8AAAAJ;qxWfV6cAAAAJ;tAK5l1IAAAAJ", "orcid": ";;0000-0001-5069-3587;0000-0002-0791-189X;;0000-0002-2078-4215", "linkedin": ";;;;;", "or_profile": "~Guowen_Zhang1;~Lue_Fan1;~Chenhang_HE1;~Zhen_Lei2;~Zhaoxiang_Zhang3;~Lei_Zhang2", "aff": "Hong Kong Polytechnic University;Institute of Automation, Chinese Academy of Sciences;The Hong Kong Polytechnic University;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;The Hong Kong Polytechnic University", "aff_domain": "polyu.edu.hk;ia.ac.cn;polyu.edu.hk;ia.ac.cn;ia.ac.cn;polyu.edu.hk", "position": "PhD student;PhD student;Assistant Professor;Full Professor;Full Professor;Chair Professor", "bibtex": "@inproceedings{\nzhang2024voxel,\ntitle={Voxel Mamba: Group-Free State Space Models for Point Cloud based 3D Object Detection},\nauthor={Guowen Zhang and Lue Fan and Chenhang HE and Zhen Lei and Zhaoxiang Zhang and Lei Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=gHYhVSCtDH}\n}", "github": "", "reviewers": "rV2B;vX3v;2cxV;kB9g;dTn9", "pdf_size": 664560, "rating": "5;5;7;7;7", "confidence": "4;5;5;5;4", "soundness": "2;2;3;3;3", "novelty": "3;2;3;2;3", "presentation": "2;2;3;3;4", "wc_summary": "69;90;91;91;45", "wc_strengths": "76;151;116;29;91", "wc_weaknesses": "122;149;342;190;5", "wc_questions": "79;187;40;51;130", "wc_limitations": "7;180;4;6;6", "wc_review": "353;757;593;367;277", "wc_reply_reviewers": "0;74;43;106;11", "wc_reply_authors": "0;301;39;100;17", "reply_reviewers": "0;1;1;1;1", "reply_authors": "1;3;2;3;2", "rating_avg": [ 6.2, 0.9797958971132712 ], "confidence_avg": [ 4.6, 0.4898979485566356 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 77.2, 18.15929514050587 ], "wc_strengths_avg": [ 92.6, 40.69692863104045 ], "wc_weaknesses_avg": [ 161.6, 109.17985162107522 ], "wc_questions_avg": [ 97.4, 54.54759389743969 ], "wc_limitations_avg": [ 40.6, 69.70681458795833 ], "wc_review_avg": [ 469.4, 178.3497687130544 ], "wc_reply_reviewers_avg": [ 46.8, 39.32124107909108 ], "wc_reply_authors_avg": [ 91.4, 110.12828882716738 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 2.2, 0.7483314773547882 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.1666666666666667, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2229543488492934840&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "polyu.edu.hk;ia.ac.cn;polyu.edu.hk;ia.ac.cn;ia.ac.cn;polyu.edu.hk", "author_num": 6, "aff_unique_index": "0;1;0;1;1;0", "aff_unique_norm": "Hong Kong Polytechnic University;Chinese Academy of Sciences", "aff_unique_dep": ";Institute of Automation", "aff_unique_url": "https://www.polyu.edu.hk;http://www.ia.cas.cn", "aff_unique_abbr": "PolyU;CAS", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Approximating the Top Eigenvector in Random Order Streams", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94154", "id": "gITGmIEinf", "proceeding": "", "pdf": "https://openreview.net/pdf?id=gITGmIEinf", "openreview": "https://openreview.net/forum?id=gITGmIEinf", "poster": "", "project": "", "author_site": "Praneeth Kacham, David Woodruff", "tldr": "", "abstract": "When rows of an $n \\times d$ matrix $A$ are given in a stream, we study algorithms for approximating the top eigenvector of $A^T A$ (equivalently, the top right singular vector of $A$). We consider worst case inputs $A$ but assume that the rows are presented to the streaming algorithm in a uniformly random order. We show that when the gap parameter $R = \\sigma_1(A)^2/\\sigma_2(A)^2 = \\Omega(1)$, then there is a randomized algorithm that uses $O(h \\cdot d \\cdot \\text{polylog}(d))$ bits of space and outputs a unit vector $v$ that has a correlation $1 - O(1/\\sqrt{R})$ with the top eigenvector $v_1$. Here $h$ denotes the number of ``heavy rows'' in the matrix, defined as the rows with Euclidean norm at least $\\|{A}\\|_F/\\sqrt{d \\cdot \\text{polylog}(d)}$. We also provide a lower bound showing that any algorithm using $O(hd/R)$ bits of space can obtain at most $1 - \\Omega(1/R^2)$ correlation with the top eigenvector. Thus, parameterizing the space complexity in terms of the number of heavy rows is necessary for high accuracy solutions.\n\nOur results improve upon the $R = \\Omega(\\log n \\cdot \\log d)$ requirement in a recent work of Price. We note that Price's algorithm works for arbitrary order streams whereas our algorithm requires a stronger assumption that the rows are presented in a uniformly random order. We additionally show that the gap requirements in Price's analysis can be brought down to $R = \\Omega(\\log^2 d)$ for arbitrary order streams and $R = \\Omega(\\log d)$ for random order streams. The requirement of $R = \\Omega(\\log d)$ for random order streams is nearly tight for Price's analysis as we obtain a simple instance with $R = \\Omega(\\log d/\\log\\log d)$ for which Price's algorithm, with any fixed learning rate, cannot output a vector approximating the top eigenvector $v_1$.", "keywords": "Streaming Algorithms;Top Singular Vector;PCA;Random Order Streams", "primary_area": "optimization", "supplementary_material": "", "author": "Praneeth Kacham;David Woodruff", "authorids": "~Praneeth_Kacham1;~David_Woodruff1", "gender": "M;M", "homepage": "https://www.praneethkacham.com;http://www.cs.cmu.edu/~dwoodruf/", "dblp": "255/5684;w/DPWoodruff", "google_scholar": "hKhPmTkAAAAJ;https://scholar.google.com.tw/citations?user=0G2t-6sAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Praneeth_Kacham1;~David_Woodruff1", "aff": "Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "cmu.edu;cmu.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nkacham2024approximating,\ntitle={Approximating the Top Eigenvector in Random Order Streams},\nauthor={Praneeth Kacham and David Woodruff},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=gITGmIEinf}\n}", "github": "", "reviewers": "S42D;CKd4;Yww1;yHif", "pdf_size": 416290, "rating": "6;7;7;7", "confidence": "4;3;3;3", "soundness": "3;4;4;4", "novelty": "3;3;3;3", "presentation": "3;4;4;4", "wc_summary": "305;208;453;159", "wc_strengths": "24;165;60;209", "wc_weaknesses": "128;52;68;179", "wc_questions": "1;70;1;116", "wc_limitations": "1;1;1;4", "wc_review": "459;496;583;667", "wc_reply_reviewers": "0;77;0;0", "wc_reply_authors": "0;18;0;0", "reply_reviewers": "0;1;0;0", "reply_authors": "1;2;0;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 281.25, 112.2193722135354 ], "wc_strengths_avg": [ 114.5, 75.23463298242373 ], "wc_weaknesses_avg": [ 106.75, 50.42506817050424 ], "wc_questions_avg": [ 47.0, 48.79036790187178 ], "wc_limitations_avg": [ 1.75, 1.299038105676658 ], "wc_review_avg": [ 551.25, 80.5741118474166 ], "wc_reply_reviewers_avg": [ 19.25, 33.34197804570089 ], "wc_reply_authors_avg": [ 4.5, 7.794228634059948 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.7071067811865476 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:4U5LHIp6X7IJ:scholar.google.com/&scioq=Approximating+the+Top+Eigenvector+in+Random+Order+Streams&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": "cmu.edu;cmu.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "HyperLogic: Enhancing Diversity and Accuracy in Rule Learning with HyperNets", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94153", "id": "gJbZyKGfd6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=gJbZyKGfd6", "openreview": "https://openreview.net/forum?id=gJbZyKGfd6", "poster": "/media/PosterPDFs/NeurIPS%202024/94153.png?t=1731751018.5207467", "project": "", "author_site": "Yang Yang, Wendi Ren, Shuang Li", "tldr": "", "abstract": "Exploring the integration of if-then logic rules within neural network architectures presents an intriguing area. This integration seamlessly transforms the rule learning task into neural network training using backpropagation and stochastic gradient descent. From a well-trained sparse and shallow neural network, one can interpret each layer and neuron through the language of logic rules, and a global explanatory rule set can be directly extracted. However, ensuring interpretability may impose constraints on the flexibility, depth, and width of neural networks. In this paper, we propose HyperLogic: a novel framework leveraging hypernetworks to generate weights of the main network. HyperLogic can unveil multiple diverse rule sets, each capable of capturing heterogeneous patterns in data. This provides a simple yet effective method to increase model flexibility and preserve interpretability. We theoretically analyzed the benefits of the HyperLogic by examining the approximation error and generalization capabilities under two types of regularization terms: sparsity and diversity regularizations. Experiments on real data demonstrate that our method can learn more diverse, accurate, and concise rules.", "keywords": "Differentiable rule learning;Heterogeneous logic rule;Hypernet", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Yang Yang;Wendi Ren;Shuang Li", "authorids": "~Yang_Yang56;~Wendi_Ren1;~Shuang_Li3", "gender": "M;F;F", "homepage": "https://www.linkedin.cn/incareer/in/ACoAAENYKa0ByOPUp5MsyVPXTbQkJiv80QkfF94;;https://shuangli01.github.io", "dblp": ";218/8905;43/6294-2", "google_scholar": ";V0vQt1YAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;", "linkedin": "https://www.linkedin.cn/incareer/in/ACoAAENYKa0ByOPUp5MsyVPXTbQkJiv80QkfF94;wendiren-gatech/;", "or_profile": "~Yang_Yang56;~Wendi_Ren1;~Shuang_Li3", "aff": "The Chinese University of Hong Kong,Shenzhen;The Chinese University of Hong Kong;The Chinese University of Hong Kong (Shenzhen)", "aff_domain": "cuhk.edu.cn;cuhk.edu.cn;cuhk.edu.cn", "position": "MS student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nyang2024hyperlogic,\ntitle={HyperLogic: Enhancing Diversity and Accuracy in Rule Learning with HyperNets},\nauthor={Yang Yang and Wendi Ren and Shuang Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=gJbZyKGfd6}\n}", "github": "", "reviewers": "hMMc;F7a8;Q4Et", "pdf_size": 677501, "rating": "5;6;6", "confidence": "3;5;2", "soundness": "3;2;3", "novelty": "2;2;3", "presentation": "3;3;3", "wc_summary": "65;83;98", "wc_strengths": "92;20;75", "wc_weaknesses": "79;213;77", "wc_questions": "16;94;2", "wc_limitations": "5;27;1", "wc_review": "257;437;253", "wc_reply_reviewers": "0;266;18", "wc_reply_authors": "95;805;28", "reply_reviewers": "0;2;1", "reply_authors": "2;3;2", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 1.247219128924647 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 82.0, 13.490737563232042 ], "wc_strengths_avg": [ 62.333333333333336, 30.728199137310703 ], "wc_weaknesses_avg": [ 123.0, 63.64484791926733 ], "wc_questions_avg": [ 37.333333333333336, 40.47495796442811 ], "wc_limitations_avg": [ 11.0, 11.430952132988164 ], "wc_review_avg": [ 315.6666666666667, 85.81116218508846 ], "wc_reply_reviewers_avg": [ 94.66666666666667, 121.37361968548007 ], "wc_reply_authors_avg": [ 309.3333333333333, 351.5549585610888 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.18898223650461363, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7094404926659344616&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "cuhk.edu.cn;cuhk.edu.cn;cuhk.edu.cn", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Chinese University of Hong Kong, Shenzhen;Chinese University of Hong Kong", "aff_unique_dep": ";", "aff_unique_url": "https://www.cuhk.edu.cn;https://www.cuhk.edu.hk", "aff_unique_abbr": "CUHK;CUHK", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Shenzhen;Hong Kong SAR", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Biologically Inspired Learning Model for Instructed Vision", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94152", "id": "gJxEiRcnao", "proceeding": "", "pdf": "https://openreview.net/pdf?id=gJxEiRcnao", "openreview": "https://openreview.net/forum?id=gJxEiRcnao", "poster": "/media/PosterPDFs/NeurIPS%202024/94152.png?t=1733403640.2277977", "project": "", "author_site": "Roy Abel, Shimon Ullman", "tldr": "", "abstract": "As part of the effort to understand how the brain learns, ongoing research seeks to combine biological knowledge with current artificial intelligence (AI) modeling in an attempt to find an efficient biologically plausible learning scheme. Current models often use a cortical-like combination of bottom-up (BU) and top-down (TD) processing, where the TD part carries feedback signals for learning. However, in the visual cortex, the TD pathway plays a second major role in visual attention, by guiding the visual process toward locations and tasks of interest. A biological model should therefore integrate both learning and visual guidance. We introduce a model that uses a cortical-like combination of BU and TD processing that naturally integrates the two major functions of the TD stream. This integration is achieved through an appropriate connectivity pattern between the BU and TD streams, a novel processing cycle that uses the TD stream twice, and a 'Counter-Hebb' learning mechanism that operates across both streams. We show that the 'Counter-Hebb' mechanism can provide an exact backpropagation synaptic modification. Additionally, our model can effectively guide the visual stream to perform a task of interest, achieving competitive performance on standard multi-task learning benchmarks compared to AI models. The successful combination of learning and visual guidance could provide a new view on combining BU and TD processing in human vision and suggests possible directions for both biologically plausible models and artificial instructed models, such as vision-language models (VLMs).", "keywords": "Biologically Plausible Deep Networks;Neuroscience;Synaptic Modulation;Instructed Vision", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "", "author": "Roy Abel;Shimon Ullman", "authorids": "~Roy_Abel1;~Shimon_Ullman1", "gender": "M;M", "homepage": ";http://www.weizmann.ac.il/math/shimon/", "dblp": ";93/2158", "google_scholar": ";XOfA8ckAAAAJ", "orcid": ";0000-0003-4331-298X", "linkedin": "roy-abel-641481169/;", "or_profile": "~Roy_Abel1;~Shimon_Ullman1", "aff": "Weizmann Institute of Science;Weizmann Institute of Science", "aff_domain": "weizmann.ac.il;weizmann.ac.il", "position": "PhD student;Emeritus", "bibtex": "@inproceedings{\nabel2024biologicallyinspired,\ntitle={Biologically-Inspired Learning Model for Instructed Vision},\nauthor={Roy Abel and Shimon Ullman},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=gJxEiRcnao}\n}", "github": "", "reviewers": "LFXA;MScR;n2CE;jamG", "pdf_size": 3830261, "rating": "3;6;6;8", "confidence": "3;2;3;4", "soundness": "2;3;2;4", "novelty": "2;3;2;4", "presentation": "2;3;2;3", "wc_summary": "50;51;73;290", "wc_strengths": "53;10;21;205", "wc_weaknesses": "102;1;204;189", "wc_questions": "46;1;70;92", "wc_limitations": "5;7;89;7", "wc_review": "256;70;457;783", "wc_reply_reviewers": "67;0;0;50", "wc_reply_authors": "257;0;0;311", "reply_reviewers": "1;0;0;1", "reply_authors": "2;1;1;2", "rating_avg": [ 5.75, 1.7853571071357126 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 116.0, 100.87863995911127 ], "wc_strengths_avg": [ 72.25, 78.2539935083188 ], "wc_weaknesses_avg": [ 124.0, 80.99074021145874 ], "wc_questions_avg": [ 52.25, 33.766662553471285 ], "wc_limitations_avg": [ 27.0, 35.805027579936315 ], "wc_review_avg": [ 391.5, 264.237109430148 ], "wc_reply_reviewers_avg": [ 29.25, 29.86113695089321 ], "wc_reply_authors_avg": [ 142.0, 143.27770238247123 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.39605901719066966, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5999983455658640318&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "weizmann.ac.il;weizmann.ac.il", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Weizmann Institute of Science", "aff_unique_dep": "", "aff_unique_url": "https://www.weizmann.org.il", "aff_unique_abbr": "Weizmann", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Israel" }, { "title": "An Information Theoretic Perspective on Conformal Prediction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94151", "id": "gKLgY3m9zj", "proceeding": "", "pdf": "https://openreview.net/pdf?id=gKLgY3m9zj", "openreview": "https://openreview.net/forum?id=gKLgY3m9zj", "poster": "/media/PosterPDFs/NeurIPS%202024/94151.png?t=1731492574.095238", "project": "", "author_site": "Alvaro Correia, Fabio Valerio Massoli, Christos Louizos, Arash Behboodi", "tldr": "", "abstract": "Conformal Prediction (CP) is a distribution-free uncertainty estimation framework that constructs prediction sets guaranteed to contain the true answer with a user-specified probability. Intuitively, the size of the prediction set encodes a general notion of uncertainty, with larger sets associated with higher degrees of uncertainty. In this work, we leverage information theory to connect conformal prediction to other notions of uncertainty. More precisely, we prove three different ways to upper bound the intrinsic uncertainty, as described by the conditional entropy of the target variable given the inputs, by combining CP with information theoretical inequalities. Moreover, we demonstrate two direct and useful applications of such connection between conformal prediction and information theory: (i) more principled and effective conformal training objectives that generalize previous approaches and enable end-to-end training of machine learning models from scratch, and (ii) a natural mechanism to incorporate side information into conformal prediction. We empirically validate both applications in centralized and federated learning settings, showing our theoretical results translate to lower inefficiency (average prediction set size) for popular CP methods.", "keywords": "conformal prediction;information theory;uncertainty quantification", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Alvaro Correia;Fabio Valerio Massoli;Christos Louizos;Arash Behboodi", "authorids": "~Alvaro_Correia1;~Fabio_Valerio_Massoli1;~Christos_Louizos1;~Arash_Behboodi1", "gender": ";M;;M", "homepage": ";;;https://arashbehboodi.github.io/", "dblp": ";244/4976;;97/7718", "google_scholar": ";https://scholar.google.it/citations?user=b5kxczMAAAAJ;;", "orcid": ";;;", "linkedin": ";fvmassoli17/;;", "or_profile": "~Alvaro_Correia1;~Fabio_Valerio_Massoli1;~Christos_Louizos1;~Arash_Behboodi1", "aff": ";Qualcomm Inc, QualComm;;QualComm", "aff_domain": ";qti.qualcomm.com;;qualcomm.com", "position": ";Researcher;;Machine Learning Researcher", "bibtex": "@inproceedings{\ncorreia2024an,\ntitle={An Information Theoretic Perspective on Conformal Prediction},\nauthor={Alvaro Correia and Fabio Valerio Massoli and Christos Louizos and Arash Behboodi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=gKLgY3m9zj}\n}", "github": "", "reviewers": "XnUM;aseq;T2Vh;oiJy", "pdf_size": 643342, "rating": "5;5;6;8", "confidence": "3;3;2;4", "soundness": "3;3;2;4", "novelty": "2;2;2;3", "presentation": "2;2;2;4", "wc_summary": "50;54;110;68", "wc_strengths": "61;55;22;56", "wc_weaknesses": "151;116;155;32", "wc_questions": "92;38;34;225", "wc_limitations": "28;1;94;7", "wc_review": "382;264;415;388", "wc_reply_reviewers": "43;24;437;130", "wc_reply_authors": "89;27;884;22", "reply_reviewers": "1;1;2;1", "reply_authors": "2;2;3;2", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 70.5, 23.76446927663229 ], "wc_strengths_avg": [ 48.5, 15.46770829825802 ], "wc_weaknesses_avg": [ 113.5, 49.439356791932475 ], "wc_questions_avg": [ 97.25, 77.23138934397076 ], "wc_limitations_avg": [ 32.5, 36.89512162874653 ], "wc_review_avg": [ 362.25, 58.07053900214807 ], "wc_reply_reviewers_avg": [ 158.5, 165.68418753761628 ], "wc_reply_authors_avg": [ 255.5, 363.8231026199408 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1457852775845632807&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": ";qti.qualcomm.com;;qualcomm.com", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Qualcomm Incorporated", "aff_unique_dep": "", "aff_unique_url": "https://www.qualcomm.com", "aff_unique_abbr": "Qualcomm", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Optimal Multi-Fidelity Best-Arm Identification", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94150", "id": "gKMTM1i8Ew", "proceeding": "", "pdf": "https://openreview.net/pdf?id=gKMTM1i8Ew", "openreview": "https://openreview.net/forum?id=gKMTM1i8Ew", "poster": "", "project": "", "author_site": "Riccardo Poiani, R\u00e9my Degenne, Emilie Kaufmann, Alberto Maria Metelli, Marcello Restelli", "tldr": "", "abstract": "In bandit best-arm identification, an algorithm is tasked with finding the arm with highest mean reward with a specified accuracy as fast as possible. We study multi-fidelity best-arm identification, in which the algorithm can choose to sample an arm at a lower fidelity (less accurate mean estimate) for a lower cost. Several methods have been proposed for tackling this problem, but their optimality remain elusive, notably due to loose lower bounds on the total cost needed to identify the best arm. Our first contribution is a tight, instance-dependent lower bound on the cost complexity. The study of the optimization problem featured in the lower bound provides new insights to devise computationally efficient algorithms, and leads us to propose a gradient-based approach with asymptotically optimal cost complexity. We demonstrate the benefits of the new algorithm compared to existing methods in experiments. Our theoretical and empirical findings also shed light on an intriguing concept of optimal fidelity for each arm.", "keywords": "Multi-Fidelity;Best-Arm Identification;Fixed Confidence", "primary_area": "bandits", "supplementary_material": "/attachment/e27322f9c28c231dbe752a50c524fbd0041e2fd8.zip", "author": "Riccardo Poiani;R\u00e9my Degenne;Emilie Kaufmann;Alberto Maria Metelli;Marcello Restelli", "authorids": "~Riccardo_Poiani3;~R\u00e9my_Degenne1;~Emilie_Kaufmann1;~Alberto_Maria_Metelli2;~Marcello_Restelli1", "gender": "M;M;F;M;M", "homepage": ";https://remydegenne.github.io/;https://emiliekaufmann.github.io/;https://albertometelli.github.io/;http://home.deib.polimi.it/restelli/", "dblp": "268/8198;157/1070;67/11350;209/4941;64/1011", "google_scholar": "WQWOAkkAAAAJ;https://scholar.google.fr/citations?user=H-uIBOwAAAAJ;9GE1vx4AAAAJ;R31IsPwAAAAJ;https://scholar.google.com.tw/citations?user=xdgxRiEAAAAJ", "orcid": ";;;0000-0002-3424-5212;0000-0002-6322-1076", "linkedin": ";;;;", "or_profile": "~Riccardo_Poiani3;~R\u00e9my_Degenne1;~Emilie_Kaufmann1;~Alberto_Maria_Metelli2;~Marcello_Restelli1", "aff": "Polytechnic Institute of Milan;INRIA;CNRS;Politecnico di Milano;Politecnico di Milano", "aff_domain": "polimi.it;inria.fr;cnrs.fr;polimi.it;polimi.it", "position": "PhD student;Researcher;Researcher;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\npoiani2024optimal,\ntitle={Optimal Multi-Fidelity Best-Arm Identification},\nauthor={Riccardo Poiani and R{\\'e}my Degenne and Emilie Kaufmann and Alberto Maria Metelli and Marcello Restelli},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=gKMTM1i8Ew}\n}", "github": "", "reviewers": "7qou;97EK;bSBL", "pdf_size": 2857531, "rating": "4;7;7", "confidence": "3;3;4", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "1;3;3", "wc_summary": "89;93;63", "wc_strengths": "57;116;29", "wc_weaknesses": "194;14;58", "wc_questions": "191;15;67", "wc_limitations": "89;5;5", "wc_review": "620;243;222", "wc_reply_reviewers": "0;8;0", "wc_reply_authors": "244;0;0", "reply_reviewers": "0;1;0", "reply_authors": "2;1;1", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.9428090415820634 ], "wc_summary_avg": [ 81.66666666666667, 13.299958228840003 ], "wc_strengths_avg": [ 67.33333333333333, 36.261396675926434 ], "wc_weaknesses_avg": [ 88.66666666666667, 76.61737545897942 ], "wc_questions_avg": [ 91.0, 73.82863039950469 ], "wc_limitations_avg": [ 33.0, 39.59797974644666 ], "wc_review_avg": [ 361.6666666666667, 182.87032442568574 ], "wc_reply_reviewers_avg": [ 2.6666666666666665, 3.7712361663282534 ], "wc_reply_authors_avg": [ 81.33333333333333, 115.02270307301174 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11038123574213462075&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "polimi.it;inria.fr;cnrs.fr;polimi.it;polimi.it", "author_num": 5, "aff_unique_index": "0;1;2;3;3", "aff_unique_norm": "Polytechnic Institute of Milan;INRIA;Centre National de la Recherche Scientifique;Politecnico di Milano", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.polimi.it/;https://www.inria.fr;https://www.cnrs.fr;https://www.polimi.it", "aff_unique_abbr": "Politecnico di Milano;INRIA;CNRS;Polimi", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0;0", "aff_country_unique": "Italy;France" }, { "title": "Panacea: Pareto Alignment via Preference Adaptation for LLMs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94149", "id": "gL5nT4y8fn", "proceeding": "", "pdf": "https://openreview.net/pdf?id=gL5nT4y8fn", "openreview": "https://openreview.net/forum?id=gL5nT4y8fn", "poster": "/media/PosterPDFs/NeurIPS%202024/94149.png?t=1731415171.7406142", "project": "", "author_site": "Yifan Zhong, Chengdong Ma, Xiaoyuan Zhang, Ziran Yang, Haojun Chen, Qingfu Zhang, Siyuan Qi, Yaodong Yang", "tldr": "", "abstract": "Current methods for large language model alignment typically use scalar human preference labels. However, this convention tends to oversimplify the multi-dimensional and heterogeneous nature of human preferences, leading to reduced expressivity and even misalignment. This paper presents Panacea, an innovative approach that reframes alignment as a multi-dimensional preference optimization problem. Panacea trains a single model capable of adapting online and Pareto-optimally to diverse sets of preferences without the need for further tuning. A major challenge here is using a low-dimensional preference vector to guide the model's behavior, despite it being governed by an overwhelmingly large number of parameters. To address this, Panacea is designed to use singular value decomposition (SVD)-based low-rank adaptation, which allows the preference vector to be simply injected online as singular values. Theoretically, we prove that Panacea recovers the entire Pareto front with common loss aggregation methods under mild conditions. Moreover, our experiments demonstrate, for the first time, the feasibility of aligning a single LLM to represent an exponentially vast spectrum of human preferences through various optimization methods. Our work marks a step forward in effectively and efficiently aligning models to diverse and intricate human preferences in a controllable and Pareto-optimal manner.", "keywords": "large language models;alignment;multi-dimensional preference optimization;RLHF", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Yifan Zhong;Chengdong Ma;Xiaoyuan Zhang;Ziran Yang;Haojun Chen;Qingfu Zhang;Siyuan Qi;Yaodong Yang", "authorids": "~Yifan_Zhong2;~Chengdong_Ma1;~Xiaoyuan_Zhang2;~Ziran_Yang1;~Haojun_Chen2;~Qingfu_Zhang1;~Siyuan_Qi1;~Yaodong_Yang1", "gender": ";;M;M;M;M;;M", "homepage": ";;;https://ziranyang0.github.io;https://chjchjchjchjchj.github.io/;https://www.cs.cityu.edu.hk/~qzhan7/index.html;;https://www.yangyaodong.com", "dblp": ";;;358/4441;;98/1240.html;177/5178;170/1496-1", "google_scholar": ";;KQj18L8AAAAJ;_k1opxYAAAAJ;SysuqWgAAAAJ;https://scholar.google.co.uk/citations?user=nhL9PHwAAAAJ;ePclJR4AAAAJ;https://scholar.google.co.uk/citations?user=6yL0xw8AAAAJ", "orcid": ";;0000-0002-3852-645X;;;;0000-0002-4070-733X;0000-0001-8132-5613", "linkedin": ";;;;;;;yaodong-yang", "or_profile": "~Yifan_Zhong2;~Chengdong_Ma1;~Xiaoyuan_Zhang2;~Ziran_Yang1;~Haojun_Chen2;~Qingfu_Zhang1;~Siyuan_Qi1;~Yaodong_Yang1", "aff": ";;City University of Hong Kong;Peking University;Beijing Jiaotong University;City University of Hong Kong;Beijing Institute for General Artificial Intelligence;Peking University", "aff_domain": ";;cityu.edu.hk;pku.edu.cn;bjtu.edu.cn;cityu.edu.hk;bigai.ai;pku.edu.cn", "position": ";;PhD student;Undergrad student;Undergrad student;Full Professor;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nzhong2024panacea,\ntitle={Panacea: Pareto Alignment via Preference Adaptation for {LLM}s},\nauthor={Yifan Zhong and Chengdong Ma and Xiaoyuan Zhang and Ziran Yang and Haojun Chen and Qingfu Zhang and Siyuan Qi and Yaodong Yang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=gL5nT4y8fn}\n}", "github": "", "reviewers": "ZtuD;Chm5;wdga", "pdf_size": 12385403, "rating": "6;7;7", "confidence": "4;3;3", "soundness": "4;3;3", "novelty": "3;3;3", "presentation": "3;4;3", "wc_summary": "59;62;107", "wc_strengths": "85;52;76", "wc_weaknesses": "141;41;127", "wc_questions": "98;26;56", "wc_limitations": "55;64;8", "wc_review": "438;245;374", "wc_reply_reviewers": "18;174;46", "wc_reply_authors": "132;1732;181", "reply_reviewers": "1;3;1", "reply_authors": "3;6;3", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 76.0, 21.95449840010015 ], "wc_strengths_avg": [ 71.0, 13.92838827718412 ], "wc_weaknesses_avg": [ 103.0, 44.21161235090467 ], "wc_questions_avg": [ 60.0, 29.5296461204668 ], "wc_limitations_avg": [ 42.333333333333336, 24.553795814270526 ], "wc_review_avg": [ 352.3333333333333, 80.26760796790252 ], "wc_reply_reviewers_avg": [ 79.33333333333333, 67.90843508398316 ], "wc_reply_authors_avg": [ 681.6666666666666, 742.9671743907818 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.9428090415820634 ], "reply_authors_avg": [ 4.0, 1.4142135623730951 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.9999999999999997, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14938363172192926682&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 6, "email": ";;cityu.edu.hk;pku.edu.cn;bjtu.edu.cn;cityu.edu.hk;bigai.ai;pku.edu.cn", "author_num": 8, "aff_unique_index": "0;1;2;0;3;1", "aff_unique_norm": "City University of Hong Kong;Peking University;Beijing Jiao Tong University;Beijing Institute for General Artificial Intelligence", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.cityu.edu.hk;http://www.pku.edu.cn;http://www.njtu.edu.cn/en;http://www.bigaiai.org/", "aff_unique_abbr": "CityU;Peking U;BJTU;BIGAI", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Fine-Grained Dynamic Framework for Bias-Variance Joint Optimization on Data Missing Not at Random", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94148", "id": "gLoe70Tn8V", "proceeding": "", "pdf": "https://openreview.net/pdf?id=gLoe70Tn8V", "openreview": "https://openreview.net/forum?id=gLoe70Tn8V", "poster": "/media/PosterPDFs/NeurIPS%202024/94148.png?t=1731076653.1141465", "project": "", "author_site": "Mingming Ha, Taoxuewen, Wenfang Lin, QIONGXU MA, Wujiang Xu, Linxun Chen", "tldr": "", "abstract": "In most practical applications such as recommendation systems, display advertising, and so forth, the collected data often contains missing values and those missing values are generally missing-not-at-random, which deteriorates the prediction performance of models. Some existing estimators and regularizers attempt to achieve unbiased estimation to improve the predictive performance. However, variances and generalization bound of these methods are generally unbounded when the propensity scores tend to zero, compromising their stability and robustness. In this paper, we first theoretically reveal that limitations of regularization techniques. Besides, we further illustrate that, for more general estimators, unbiasedness will inevitably lead to unbounded variance. These general laws inspire us that the estimator designs is not merely about eliminating bias, reducing variance, or simply achieve a bias-variance trade-off. Instead, it involves a quantitative joint optimization of bias and variance. Then, we develop a systematic fine-grained dynamic learning framework to jointly optimize bias and variance, which adaptively selects an appropriate estimator for each user-item pair according to the predefined objective function. With this operation, the generalization bounds and variances of models are reduced and bounded with theoretical guarantees. Extensive experiments are conducted to verify the theoretical results and the effectiveness of the proposed dynamic learning framework.", "keywords": "Advertising recommendation systems;Quantifying bias-variance joint optimization;Limitations of regularization", "primary_area": "causal_inference", "supplementary_material": "", "author": "Mingming Ha;Taoxuewen;Wenfang Lin;QIONGXU MA;Wujiang Xu;Linxun Chen", "authorids": "~Mingming_Ha1;~Taoxuewen1;~Wenfang_Lin1;~QIONGXU_MA1;~Wujiang_Xu1;~Linxun_Chen1", "gender": "M;;M;M;M;F", "homepage": ";https://orcid.org/0000-0003-2465-4941;https://wujiangxu.github.io/;https://github.com/LinxunChen;https://github.com/taoxuewen;", "dblp": "231/1063;297/3889.html;283/0798;;;", "google_scholar": "J66ui5YAAAAJ;;cucjW6wAAAAJ;;;", "orcid": "0000-0002-2901-9608;0000-0003-2465-4941;;;;0009-0004-7398-4706", "linkedin": ";;;;;", "or_profile": "~Mingming_Ha1;~QIONGXU_MA1;~Wujiang_Xu1;~Linxun_Chen1;~Taoxuewen2;~linwenfang1", "aff": "Ant Group;;MYbank, Ant Group;Ant Group;Alibaba Group;Alibaba Group", "aff_domain": "mybank.cn;;mybank.cn;antgroup.com;antgroup.com;antgroup.com", "position": "Researcher;;Researcher;Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\nha2024finegrained,\ntitle={Fine-Grained Dynamic Framework for Bias-Variance Joint Optimization on Data Missing Not at Random},\nauthor={Mingming Ha and Taoxuewen and Wenfang Lin and QIONGXU MA and Wujiang Xu and Linxun Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=gLoe70Tn8V}\n}", "github": "", "reviewers": "rrSW;AciS;aCF5;h92D", "pdf_size": 2583280, "rating": "5;5;6;7", "confidence": "3;4;5;4", "soundness": "2;2;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "33;69;107;117", "wc_strengths": "43;28;272;92", "wc_weaknesses": "269;135;253;181", "wc_questions": "102;123;217;8", "wc_limitations": "6;1;186;12", "wc_review": "453;356;1035;410", "wc_reply_reviewers": "16;14;22;39", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 81.5, 33.237779709240506 ], "wc_strengths_avg": [ 108.75, 97.17863705568215 ], "wc_weaknesses_avg": [ 209.5, 54.302394054037805 ], "wc_questions_avg": [ 112.5, 74.26472917879659 ], "wc_limitations_avg": [ 51.25, 77.89536250637775 ], "wc_review_avg": [ 563.5, 274.3815773699102 ], "wc_reply_reviewers_avg": [ 22.75, 9.832980219648569 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.42640143271122083, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6164669697891245771&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "mybank.cn;;mybank.cn;antgroup.com;antgroup.com;antgroup.com", "author_num": 6, "aff_unique_index": "0;1;0;2;2", "aff_unique_norm": "Ant Group;MYbank;Alibaba Group", "aff_unique_dep": ";;", "aff_unique_url": "https://www.antgroup.com;;https://www.alibaba.com", "aff_unique_abbr": "Ant Group;;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Understanding the Gains from Repeated Self-Distillation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94147", "id": "gMqaKJCOCB", "proceeding": "", "pdf": "https://openreview.net/pdf?id=gMqaKJCOCB", "openreview": "https://openreview.net/forum?id=gMqaKJCOCB", "poster": "/media/PosterPDFs/NeurIPS%202024/94147.png?t=1731631618.0880268", "project": "", "author_site": "Divyansh Pareek, Simon Du, Sewoong Oh", "tldr": "", "abstract": "Self-Distillation is a special type of knowledge distillation where the student model has the same architecture as the teacher model. Despite using the same architecture and the same training data, self-distillation has been empirically observed to improve performance, especially when applied repeatedly. For such a process, there is a fundamental question of interest: How much gain is possible by applying multiple steps of self-distillation? To investigate this relative gain, we propose using the simple but canonical task of linear regression. Our analysis shows that the excess risk achieved by multi-step self-distillation can significantly improve upon a single step of self-distillation, reducing the excess risk by a factor of $d$, where $d$ is the input dimension. Empirical results on regression tasks from the UCI repository show a reduction in the learnt model's risk (MSE) by up to $47$%.", "keywords": "Self-Distillation Theory;Linear Regression", "primary_area": "learning_theory", "supplementary_material": "", "author": "Divyansh Pareek;Simon Shaolei Du;Sewoong Oh", "authorids": "~Divyansh_Pareek2;~Simon_Shaolei_Du1;~Sewoong_Oh3", "gender": "M;M;M", "homepage": "https://pareek21.github.io/;http://simonshaoleidu.com;https://homes.cs.washington.edu/~sewoong/", "dblp": ";176/5602;80/4366", "google_scholar": "R1lrFWgAAAAJ;OttawxUAAAAJ;55TAOdgAAAAJ", "orcid": ";;", "linkedin": "divyansh-pareek-4a0a18106/;;", "or_profile": "~Divyansh_Pareek2;~Simon_Shaolei_Du1;~Sewoong_Oh1", "aff": "Department of Computer Science, University of Washington;University of Washington;University of Washington", "aff_domain": "cs.washington.edu;washington.edu;uw.edu", "position": "PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\npareek2024understanding,\ntitle={Understanding the Gains from Repeated Self-Distillation},\nauthor={Divyansh Pareek and Simon Shaolei Du and Sewoong Oh},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=gMqaKJCOCB}\n}", "github": "", "reviewers": "2wSn;tBMq;Uvvg;BMMQ", "pdf_size": 2985538, "rating": "5;5;6;7", "confidence": "3;4;4;3", "soundness": "3;3;3;4", "novelty": "3;2;1;3", "presentation": "3;3;3;4", "wc_summary": "39;91;99;288", "wc_strengths": "43;67;24;65", "wc_weaknesses": "191;125;130;241", "wc_questions": "14;50;4;109", "wc_limitations": "5;40;4;1", "wc_review": "292;373;261;704", "wc_reply_reviewers": "12;30;31;11", "wc_reply_authors": "23;33;24;216", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;3", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 129.25, 94.50496018728329 ], "wc_strengths_avg": [ 49.75, 17.597940220378067 ], "wc_weaknesses_avg": [ 171.75, 47.683199347359235 ], "wc_questions_avg": [ 44.25, 41.111890007636475 ], "wc_limitations_avg": [ 12.5, 15.945218719101975 ], "wc_review_avg": [ 407.5, 176.00071022583973 ], "wc_reply_reviewers_avg": [ 21.0, 9.513148795220223 ], "wc_reply_authors_avg": [ 74.0, 82.07618412182671 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13208776464655762926&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "cs.washington.edu;washington.edu;uw.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Washington", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.washington.edu", "aff_unique_abbr": "UW", "aff_campus_unique_index": "0", "aff_campus_unique": "Seattle;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Dual Lagrangian Learning for Conic Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94146", "id": "gN1iKwxlL5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=gN1iKwxlL5", "openreview": "https://openreview.net/forum?id=gN1iKwxlL5", "poster": "/media/PosterPDFs/NeurIPS%202024/94146.png?t=1733961597.3540547", "project": "", "author_site": "Mathieu Tanneau, Pascal Van Hentenryck", "tldr": "", "abstract": "This paper presents Dual Lagrangian Learning (DLL), a principled learning methodology for dual conic optimization proxies.\nDLL leverages conic duality and the representation power of ML models to provide high-duality, dual-feasible solutions, and therefore valid Lagrangian dual bounds, for linear and nonlinear conic optimization problems.\nThe paper introduces a systematic dual completion procedure, differentiable conic projection layers, and a self-supervised learning framework based on Lagrangian duality.\nIt also provides closed-form dual completion formulae for broad classes of conic problems, which eliminate the need for costly implicit layers.\nThe effectiveness of DLL is demonstrated on linear and nonlinear conic optimization problems.\nThe proposed methodology significantly outperforms a state-of-the-art learning-based method, and achieves 1000x speedups over commercial interior-point solvers with optimality gaps under 0.5\\% on average.", "keywords": "Conic optimization;optimization proxies;duality;self-supervised learning", "primary_area": "optimization", "supplementary_material": "", "author": "Mathieu Tanneau;Pascal Van Hentenryck", "authorids": "~Mathieu_Tanneau1;~Pascal_Van_Hentenryck2", "gender": ";M", "homepage": ";https://sites.gatech.edu/pascal-van-hentenryck/", "dblp": "251/3115.html;h/PVHentenryck.html", "google_scholar": "69NakqoAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-9967-0578;0000-0001-7085-9994", "linkedin": ";", "or_profile": "~Mathieu_Tanneau1;~Pascal_Van_Hentenryck2", "aff": "Georgia Institute of Technology;Georgia Institute of Technology", "aff_domain": "gatech.edu;gatech.edu", "position": "Researcher;Full Professor", "bibtex": "@inproceedings{\ntanneau2024dual,\ntitle={Dual Lagrangian Learning for Conic Optimization},\nauthor={Mathieu Tanneau and Pascal Van Hentenryck},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=gN1iKwxlL5}\n}", "github": "", "reviewers": "yA4S;H5sy;vMXP;9KhZ", "pdf_size": 1104905, "rating": "3;7;7;7", "confidence": "5;3;4;4", "soundness": "2;4;4;3", "novelty": "2;4;3;3", "presentation": "3;4;4;3", "wc_summary": "88;28;208;152", "wc_strengths": "78;23;170;48", "wc_weaknesses": "172;18;197;70", "wc_questions": "89;71;137;128", "wc_limitations": "21;1;21;30", "wc_review": "448;141;733;428", "wc_reply_reviewers": "111;18;73;211", "wc_reply_authors": "273;0;0;64", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;2", "rating_avg": [ 6.0, 1.7320508075688772 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 119.0, 67.54998149518622 ], "wc_strengths_avg": [ 79.75, 55.6254213467188 ], "wc_weaknesses_avg": [ 114.25, 73.15181132412239 ], "wc_questions_avg": [ 106.25, 27.19719654670312 ], "wc_limitations_avg": [ 18.25, 10.615436872781073 ], "wc_review_avg": [ 437.5, 209.42361375928934 ], "wc_reply_reviewers_avg": [ 103.25, 70.44989354143837 ], "wc_reply_authors_avg": [ 84.25, 112.0633191548421 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10605105324591920471&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "gatech.edu;gatech.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Georgia Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.gatech.edu", "aff_unique_abbr": "Georgia Tech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Adaptive Domain Learning for Cross-domain Image Denoising", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94145", "id": "gOtt78AQk4", "proceeding": "", "pdf": "https://openreview.net/pdf?id=gOtt78AQk4", "openreview": "https://openreview.net/forum?id=gOtt78AQk4", "poster": "/media/PosterPDFs/NeurIPS%202024/94145.png?t=1731646686.056448", "project": "", "author_site": "Zian Qian, Chenyang Qi, Ka Law, Hao Fu, Chenyang Lei, Qifeng Chen", "tldr": "", "abstract": "Different camera sensors have different noise patterns, and thus an image denoising model trained on one sensor often does not generalize well to a different sensor. One plausible solution is to collect a large dataset for each sensor for training or fine-tuning, which is inevitably time-consuming. To address this cross-domain challenge, we present a novel adaptive domain learning (ADL) scheme for cross-domain RAW image denoising by utilizing existing data from different sensors (source domain) plus a small amount of data from the new sensor (target domain). The ADL training scheme automatically removes the data in the source domain that are harmful to fine-tuning a model for the target domain (some data are harmful as adding them during training lowers the performance due to domain gaps). Also, we introduce a modulation module to adopt sensor-specific information (sensor type and ISO) to understand input data for image denoising. We conduct extensive experiments on public datasets with various smartphone and DSLR cameras, which show our proposed model outperforms prior work on cross-domain image denoising, given a small amount of image data from the target domain sensor.", "keywords": "RAW denoising; Cross-Domain Learning;", "primary_area": "other", "supplementary_material": "", "author": "Zian Qian;Chenyang Qi;Ka Lung Law;Hao Fu;Chenyang Lei;Qifeng Chen", "authorids": "~Zian_Qian1;~Chenyang_Qi1;~Ka_Lung_Law1;~Hao_Fu4;~Chenyang_Lei1;~Qifeng_Chen1", "gender": "M;M;M;M;M;M", "homepage": ";https://chenyangqiqi.github.io/;;;https://chenyanglei.github.io/;http://cqf.io/", "dblp": ";299/1389;;64/3069.html;153/6769;117/4819", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;qNweIR4AAAAJ;;;CuGF_pEAAAAJ;lLMX9hcAAAAJ", "orcid": "0000-0001-5147-9689;;;;;", "linkedin": ";chenyang-qi-5196a6137;ka-lung-law-74047073;;;", "or_profile": "~Zian_Qian1;~Chenyang_Qi1;~Ka_Lung_Law1;~Hao_Fu4;~Chenyang_Lei1;~Qifeng_Chen1", "aff": "Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;Sensetime;Sensetime;Princeton University;Hong Kong University of Science and Technology", "aff_domain": "ust.hk;ust.hk;sensetime.com;sensetime.com;princeton.edu;hkust.edu", "position": "PhD student;PhD student;senior researcher;Researcher;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nqian2024adaptive,\ntitle={Adaptive Domain Learning for Cross-domain Image Denoising},\nauthor={Zian Qian and Chenyang Qi and Ka Lung Law and Hao Fu and Chenyang Lei and Qifeng Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=gOtt78AQk4}\n}", "github": "", "reviewers": "G2YE;e1pc;4VKx;Xk6Q", "pdf_size": 36036298, "rating": "4;5;5;6", "confidence": "4;4;4;5", "soundness": "2;3;3;4", "novelty": "2;2;2;3", "presentation": "2;2;3;3", "wc_summary": "57;68;95;83", "wc_strengths": "25;55;54;66", "wc_weaknesses": "191;287;124;90", "wc_questions": "34;69;71;10", "wc_limitations": "1;14;25;1", "wc_review": "308;493;369;250", "wc_reply_reviewers": "429;107;0;68", "wc_reply_authors": "548;221;37;92", "reply_reviewers": "2;2;0;1", "reply_authors": "3;4;2;3", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 75.75, 14.446020213193666 ], "wc_strengths_avg": [ 50.0, 15.182226450688976 ], "wc_weaknesses_avg": [ 173.0, 75.18310980532794 ], "wc_questions_avg": [ 46.0, 25.465663156493687 ], "wc_limitations_avg": [ 10.25, 10.034316120194738 ], "wc_review_avg": [ 355.0, 90.10271915985666 ], "wc_reply_reviewers_avg": [ 151.0, 165.00757558366828 ], "wc_reply_authors_avg": [ 224.5, 198.35385047938948 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 3.0, 0.7071067811865476 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:AkR6uovc118J:scholar.google.com/&scioq=Adaptive+Domain+Learning+for+Cross-domain+Image+Denoising&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "ust.hk;ust.hk;sensetime.com;sensetime.com;princeton.edu;hkust.edu", "author_num": 6, "aff_unique_index": "0;0;1;1;2;0", "aff_unique_norm": "Hong Kong University of Science and Technology;SenseTime;Princeton University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ust.hk;https://www.sensetime.com;https://www.princeton.edu", "aff_unique_abbr": "HKUST;SenseTime;Princeton", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;1;0", "aff_country_unique": "China;United States" }, { "title": "CausalChaos! Dataset for Comprehensive Causal Action Question Answering Over Longer Causal Chains Grounded in Dynamic Visual Scenes", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97580", "id": "gP4aAi7q8S", "proceeding": "", "pdf": "https://openreview.net/pdf?id=gP4aAi7q8S", "openreview": "https://openreview.net/forum?id=gP4aAi7q8S", "poster": "", "project": "", "author_site": "Paritosh Parmar, Eric Peh, Ruirui Chen, Ting En Lam, Yuhan Chen, Elston Tan, Basura Fernando", "tldr": "", "abstract": "Causal video question answering (QA) has garnered increasing interest, yet existing datasets often lack depth in causal reasoning. To address this gap, we capitalize on the unique properties of cartoons and construct CausalChaos!, a novel, challenging causal Why-QA dataset built upon the iconic \"Tom and Jerry\" cartoon series. Cartoons use the principles of animation that allow animators to create expressive, unambiguous causal relationships between events to form a coherent storyline. Utilizing these properties, along with thought-provoking questions and multi-level answers (answer and detailed causal explanation), our questions involve causal chains that interconnect multiple dynamic interactions between characters and visual scenes. These factors demand models to solve more challenging, yet well-defined causal relationships. We also introduce hard incorrect answer mining, including a causally confusing version that is even more challenging. While models perform well, there is much room for improvement, especially, on open-ended answers. We identify more advanced/explicit causal relationship modeling \\& joint modeling of vision and language as the immediate areas for future efforts to focus upon. Along with the other complementary datasets, our new challenging dataset will pave the way for these developments in the field. Dataset and Code: https://github.com/LUNAProject22/CausalChaos", "keywords": "Video Question Answering;Visual Question Answering;Video Understanding;Visual Reasoning;Causal Reasoning;Causal Video Question Answering;Causal Question Answering;CausalChaos!QA;Visually Grounded Video Question Answering;Vision Language;Multimodal Large Language Models;Vision Language Models", "primary_area": "", "supplementary_material": "/attachment/dde66f9eee22f7080ab4831016038a0dbbbfe6d3.zip", "author": "Paritosh Parmar;Eric Peh;Ruirui Chen;Ting En Lam;Yuhan Chen;Elston Tan;Basura Fernando", "authorids": "~Paritosh_Parmar2;~Eric_Peh1;~Ruirui_Chen1;~Ting_En_Lam1;~Yuhan_Chen3;~Elston_Tan1;~Basura_Fernando1", "gender": ";M;;F;F;M;M", "homepage": "https://paritoshparmar.github.io/;;;;;https://elston.netlify.app;https://basurafernando.github.io/", "dblp": "146/0742;367/3267;;;;;01/9558", "google_scholar": "https://scholar.google.com/citations?hl=en;;;;;G8leC1IAAAAJ;https://scholar.google.com.au/citations?user=GyvseMkAAAAJ", "orcid": ";;;;;;0000-0002-6920-9916", "linkedin": ";eric-peh-2308301b9/;;tingenlam;yuhan-chen-4054b61b2/;https://linkedin.com/in/Elstuhn;", "or_profile": "~Paritosh_Parmar2;~Eric_Peh1;~Ruirui_Chen1;~Ting_En_Lam1;~Yuhan_Chen3;~Elston_Tan1;~Basura_Fernando1", "aff": "Institute of High Performance Computing, Singapore, A*STAR;Institute of High Performance Computing, Singapore, A*STAR;;Nanyang Technological University;National University of Singapore;Institute of High Performance Computing, Singapore, A*STAR;A*STAR", "aff_domain": "ihpc.a-star.edu.sg;ihpc.a-star.edu.sg;;ntu.edu.sg;nus.edu;ihpc.a-star.edu.sg;astar.edu.sg", "position": "Scientist;Researcher;;Undergrad student;Undergrad student;Research Intern;Principal Researcher", "bibtex": "@inproceedings{\nparmar2024causalchaos,\ntitle={CausalChaos! Dataset for Comprehensive Causal Action Question Answering Over Longer Causal Chains Grounded in Dynamic Visual Scenes},\nauthor={Paritosh Parmar and Eric Peh and Ruirui Chen and Ting En Lam and Yuhan Chen and Elston Tan and Basura Fernando},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=gP4aAi7q8S}\n}", "github": "", "reviewers": "387i;sN8y;nU6m", "pdf_size": 14994310, "rating": "6;7;8", "confidence": "4;4;3", "wc_summary_and_contributions": "65;157;69", "wc_strengths": "44;91;50", "wc_improvement": "326;6;145", "wc_limitations": "43;17;33", "wc_correctness": "21;29;10", "wc_clarity": "34;8;8", "wc_relation_to_prior_work": "1;11;7", "wc_documentation": "10;10;10", "wc_additional_feedback": "1;1;1", "wc_review": "545;330;333", "wc_reply_reviewers": "0;91;27", "wc_reply_authors": "0;110;81", "reply_reviewers": "0;1;1", "reply_authors": "5;4;3", "rating_avg": [ 7.0, 0.816496580927726 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 97.0, 42.45782220824175 ], "wc_strengths_avg": [ 61.666666666666664, 20.885933597094056 ], "wc_improvement_avg": [ 159.0, 131.0139941634735 ], "wc_limitations_avg": [ 31.0, 10.708252269472673 ], "wc_correctness_avg": [ 20.0, 7.788880963698615 ], "wc_clarity_avg": [ 16.666666666666668, 12.256517540566824 ], "wc_relation_to_prior_work_avg": [ 6.333333333333333, 4.109609335312651 ], "wc_documentation_avg": [ 10.0, 0.0 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 402.6666666666667, 100.6523168580281 ], "wc_reply_reviewers_avg": [ 39.333333333333336, 38.16047984790315 ], "wc_reply_authors_avg": [ 63.666666666666664, 46.549853800940014 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 4.0, 0.816496580927726 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2566930150855245573&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 6, "email": "ihpc.a-star.edu.sg;ihpc.a-star.edu.sg;;ntu.edu.sg;nus.edu;ihpc.a-star.edu.sg;astar.edu.sg", "author_num": 7, "aff_unique_index": "0;0;1;2;0;3", "aff_unique_norm": "Institute of High Performance Computing;Nanyang Technological University;National University of Singapore;Agency for Science, Technology and Research", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.ihpc.a-star.edu.sg;https://www.ntu.edu.sg;https://www.nus.edu.sg;https://www.a-star.edu.sg", "aff_unique_abbr": "IHPC;NTU;NUS;A*STAR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "Singapore" }, { "title": "GOMAA-Geo: GOal Modality Agnostic Active Geo-localization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94144", "id": "gPCesxD4B4", "proceeding": "", "pdf": "https://openreview.net/pdf?id=gPCesxD4B4", "openreview": "https://openreview.net/forum?id=gPCesxD4B4", "poster": "", "project": "", "author_site": "Anindya Sarkar, Srikumar Sastry, Aleksis Pirinen, Chongjie Zhang, Nathan Jacobs, Yevgeniy Vorobeychik", "tldr": "", "abstract": "We consider the task of active geo-localization (AGL) in which an agent uses a sequence of visual cues observed during aerial navigation to find a target specified through multiple possible modalities. This could emulate a UAV involved in a search-and-rescue operation navigating through an area, observing a stream of aerial images as it goes. The AGL task is associated with two important challenges. Firstly, an agent must deal with a goal specification in one of multiple modalities (e.g., through a natural language description) while the search cues are provided in other modalities (aerial imagery). The second challenge is limited localization time (e.g., limited battery life, urgency) so that the goal must be localized as efficiently as possible, i.e. the agent must effectively leverage its sequentially observed aerial views when searching for the goal. To address these challenges, we propose GOMAA-Geo -- a goal modality agnostic active geo-localization agent -- for zero-shot generalization between different goal modalities. Our approach combines cross-modality contrastive learning to align representations across modalities with supervised foundation model pretraining and reinforcement learning to obtain highly effective navigation and localization policies. Through extensive evaluations, we show that GOMAA-Geo outperforms alternative learnable approaches and that it generalizes across datasets -- e.g., to disaster-hit areas without seeing a single disaster scenario during training -- and goal modalities -- e.g., to ground-level imagery or textual descriptions, despite only being trained with goals specified as aerial views. Our code is available at: https://github.com/mvrl/GOMAA-Geo.", "keywords": "Active Geo-localization;Representation Learning;Remote Sensing;Deep Reinforcement Learning", "primary_area": "machine_vision", "supplementary_material": "", "author": "Anindya Sarkar;Srikumar Sastry;Aleksis Pirinen;Chongjie Zhang;Nathan Jacobs;Yevgeniy Vorobeychik", "authorids": "~Anindya_Sarkar2;~Srikumar_Sastry1;~Aleksis_Pirinen1;~Chongjie_Zhang1;~Nathan_Jacobs1;~Yevgeniy_Vorobeychik1", "gender": "M;M;M;;M;M", "homepage": "https://sites.google.com/view/anindya-sarkar/home;https://vishu26.github.io/;https://aleksispi.github.io/;;https://jacobsn.github.io/;http://vorobeychik.com", "dblp": ";353/2310;;29/6693;82/3140;70/2217", "google_scholar": "2hQyYz0AAAAJ;A-nQa6EAAAAJ;paBGTgsAAAAJ;LjxqXycAAAAJ;ZBgGyh8AAAAJ;https://scholar.google.com.tw/citations?user=ptI-HHkAAAAJ", "orcid": ";;;;0000-0002-4242-8967;", "linkedin": ";srikumar-sastry/;;;jacobsn/;", "or_profile": "~Anindya_Sarkar2;~Srikumar_Sastry1;~Aleksis_Pirinen1;~Chongjie_Zhang1;~Nathan_Jacobs1;~Yevgeniy_Vorobeychik1", "aff": "Washington University, Saint Louis;Washington University, Saint Louis;RISE Research Institutes of Sweden;Washington University, Saint Louis;Washington University, Saint Louis;Washington University, St. Louis", "aff_domain": "wustl.edu;wustl.edu;ri.se;wustl.edu;wustl.edu;wustl.edu", "position": "PhD student;PhD student;Researcher;Associate Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nsarkar2024gomaageo,\ntitle={{GOMAA}-Geo: {GO}al Modality Agnostic Active Geo-localization},\nauthor={Anindya Sarkar and Srikumar Sastry and Aleksis Pirinen and Chongjie Zhang and Nathan Jacobs and Yevgeniy Vorobeychik},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=gPCesxD4B4}\n}", "github": "", "reviewers": "noHa;DjP3;9JwT;4muN", "pdf_size": 25494469, "rating": "4;5;5;6", "confidence": "4;4;3;4", "soundness": "2;4;2;3", "novelty": "2;3;3;3", "presentation": "2;3;2;4", "wc_summary": "50;76;77;144", "wc_strengths": "26;93;66;86", "wc_weaknesses": "174;92;112;21", "wc_questions": "4;1;22;3", "wc_limitations": "4;1;7;18", "wc_review": "258;263;284;272", "wc_reply_reviewers": "0;86;347;16", "wc_reply_authors": "116;606;503;71", "reply_reviewers": "0;2;2;1", "reply_authors": "2;3;3;2", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 86.75, 34.78056210011563 ], "wc_strengths_avg": [ 67.75, 26.06122598804592 ], "wc_weaknesses_avg": [ 99.75, 54.600251830921074 ], "wc_questions_avg": [ 7.5, 8.440971508067067 ], "wc_limitations_avg": [ 7.5, 6.422616289332565 ], "wc_review_avg": [ 269.25, 9.883698700385398 ], "wc_reply_reviewers_avg": [ 112.25, 139.3383920533031 ], "wc_reply_authors_avg": [ 324.0, 233.90061992222252 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3977599648790594312&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 5, "email": "wustl.edu;wustl.edu;ri.se;wustl.edu;wustl.edu;wustl.edu", "author_num": 6, "aff_unique_index": "0;0;1;0;0;0", "aff_unique_norm": "Washington University in St. Louis;RISE Research Institutes of Sweden", "aff_unique_dep": ";", "aff_unique_url": "https://wustl.edu;https://www.rise.se", "aff_unique_abbr": "WUSTL;RISE", "aff_campus_unique_index": "0;0;0;0;2", "aff_campus_unique": "Saint Louis;;St. Louis", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "United States;Sweden" }, { "title": "A Retrospective on the Robot Air Hockey Challenge: Benchmarking Robust, Reliable, and Safe Learning Techniques for Real-world Robotics", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97579", "id": "gPLE4siNjO", "proceeding": "", "pdf": "https://openreview.net/pdf?id=gPLE4siNjO", "openreview": "https://openreview.net/forum?id=gPLE4siNjO", "poster": "/media/PosterPDFs/NeurIPS%202024/97579.png?t=1730131328.1169274", "project": "", "author_site": "Puze Liu, Jonas G\u00fcnster, Niklas Funk, Simon Gr\u00f6ger, Dong Chen, Haitham Bou Ammar, Julius Jankowski, Ante Mari\u0107, Sylvain Calinon, Andrej Orsula, Miguel Olivares, Hongyi Zhou, Rudolf Lioutikov, Gerhard Neumann, Amarildo Likmeta, Amirhossein Zhalehmehrabi, Thomas Bonenfant, Marcello Restelli, Davide Tateo, Ziyuan Liu, Jan Peters", "tldr": "", "abstract": "Machine learning methods have a groundbreaking impact in many application domains, but their application on real robotic platforms is still limited.\nDespite the many challenges associated with combining machine learning technology with robotics, robot learning remains one of the most promising directions for enhancing the capabilities of robots. \nWhen deploying learning-based approaches on real robots, extra effort is required to address the challenges posed by various real-world factors. To investigate the key factors influencing real-world deployment and to encourage original solutions from different researchers, we organized the Robot Air Hockey Challenge at the NeurIPS 2023 conference. \nWe selected the air hockey task as a benchmark, encompassing low-level robotics problems and high-level tactics. Different from other machine learning-centric benchmarks, participants need to tackle practical challenges in robotics, such as the sim-to-real gap, low-level control issues, safety problems, real-time requirements, and the limited availability of real-world data. Furthermore, we focus on a dynamic environment, removing the typical assumption of quasi-static motions of other real-world benchmarks.\nThe competition's results show that solutions combining learning-based approaches with prior knowledge outperform those relying solely on data when real-world deployment is challenging.\nOur ablation study reveals which real-world factors may be overlooked when building a learning-based solution.\nThe successful real-world air hockey deployment of best-performing agents sets the foundation for future competitions and follow-up research directions.", "keywords": "robotics;reinforcement learning;imitation learinng;planning;challenge", "primary_area": "", "supplementary_material": "", "author": "Puze Liu;Jonas G\u00fcnster;Niklas Funk;Simon Gr\u00f6ger;Dong Chen;Haitham Bou Ammar;Julius Jankowski;Ante Mari\u0107;Sylvain Calinon;Andrej Orsula;Miguel Olivares-Mendez;Hongyi Zhou;Rudolf Lioutikov;Gerhard Neumann;Amarildo Likmeta;Amirhossein Zhalehmehrabi;Thomas Bonenfant;Marcello Restelli;Davide Tateo;Ziyuan Liu;Jan Peters", "authorids": "~Puze_Liu1;~Jonas_G\u00fcnster1;~Niklas_Funk1;~Simon_Gr\u00f6ger1;~Dong_Chen17;~Haitham_Bou_Ammar1;~Julius_Jankowski1;~Ante_Mari\u01071;~Sylvain_Calinon1;~Andrej_Orsula1;~Miguel_Olivares-Mendez1;~Hongyi_Zhou1;~Rudolf_Lioutikov1;~Gerhard_Neumann2;~Amarildo_Likmeta1;~Amirhossein_Zhalehmehrabi1;~Thomas_Bonenfant1;~Marcello_Restelli1;~Davide_Tateo2;~Ziyuan_Liu2;~Jan_Peters3", "gender": "M;M;M;M;M;M;;M;M;M;M;M;M;;M;M;M;M;M;;M", "homepage": "https://puzeliu.github.io/;;https://niklasfunk.de;https://www.ias.informatik.tu-darmstadt.de/;;;https://jujankowski.github.io/;https://www.idiap.ch/~amaric/;https://calinon.ch/;https://github.com/AndrejOrsula;https://www.spacer.lu/;https://hongyizhoucn.github.io/;https://rudolf.intuitive-robots.net;;https://www.unibo.it/sitoweb/amarildo.likmeta2;;;http://home.deib.polimi.it/restelli/;https://www.ias.informatik.tu-darmstadt.de/Team/DavideTateo;;https://www.jan-peters.net", "dblp": "292/4069;;209/9556;;;;;;59/6334;;;;151/9451;;255/6990;;;64/1011;214/0808;;p/JanPeters1", "google_scholar": "zg-FMloAAAAJ;;zhsWjy8AAAAJ;;https://scholar.google.de/citations?hl=en;https://scholar.google.co.uk/citations?user=AE5suDoAAAAJ;EFuKzdMAAAAJ;;t7VnipMAAAAJ;sbQC2dAAAAAJ;Aw-nkicAAAAJ;W35-J2sAAAAJ;hvjV43MAAAAJ;;AfEypgsAAAAJ;;;https://scholar.google.com.tw/citations?user=xdgxRiEAAAAJ;https://scholar.google.it/citations?user=LGnu3SEAAAAJ;;https://scholar.google.de/citations?user=-kIVAcAAAAAJ", "orcid": "0000-0001-6887-7704;;;;;;0000-0003-0890-1965;;0000-0002-9036-6799;0000-0003-0706-1191;0000-0001-8824-3231;;;;0000-0002-4227-0741;;;0000-0002-6322-1076;0000-0002-7193-923X;;0000-0002-5266-8091", "linkedin": ";jonas-g%C3%BCnster-6b49a2186/;;;;;;;sylvaincalinon;AndrejOrsula;miguel-a-olivares-mendez-8a952533/;hongyi-zhou-9413b9242/;rudolf-lioutikov-74830730a/;;amarildolikmeta;https://it.linkedin.com/in/amirhossein-zhalehmehrabi-5146a2186;thomas-bonenfant-5679a7162/;;;;janrpeters/", "or_profile": "~Puze_Liu1;~Jonas_G\u00fcnster1;~Niklas_Funk1;~Simon_Gr\u00f6ger1;~Dong_Chen17;~Haitham_Bou_Ammar1;~Julius_Jankowski1;~Ante_Mari\u01071;~Sylvain_Calinon1;~Andrej_Orsula1;~Miguel_Olivares-Mendez1;~Hongyi_Zhou1;~Rudolf_Lioutikov1;~Gerhard_Neumann2;~Amarildo_Likmeta1;~Amirhossein_Zhalehmehrabi1;~Thomas_Bonenfant1;~Marcello_Restelli1;~Davide_Tateo2;~Ziyuan_Liu2;~Jan_Peters3", "aff": "TU Darmstadt;Technische Universit\u00e4t Darmstadt;TU Darmstadt;Technische Universit\u00e4t Darmstadt;Huawei Technologies Ltd.;Huawei R&D UK;EPFL - EPF Lausanne;EPFL - EPF Lausanne;EPFL - EPF Lausanne;University of Luxemburg;University of Luxemburg;Karlsruher Institut f\u00fcr Technologie;Karlsruher Institut f\u00fcr Technologie;;Universita' di Bologna;Polytechnic Institute of Milan;Polytechnic Institute of Milan;Politecnico di Milano;Technische Universit\u00e4t Darmstadt;;TU Darmstadt", "aff_domain": "tu-darmstadt.de;tu-darmstadt.de;tu-darmstadt.de;tu-darmstadt.de;huawei.com;huawei.com;epfl.ch;epfl.ch;epfl.ch;uni.lu;uni.lu;kit.edu;kit.edu;;unibo.it;mail.polimi.it;polimi.it;polimi.it;tu-darmstadt.de;;tu-darmstadt.de", "position": "PhD student;MS student;PhD student;MS student;Researcher;Principal Researcher;PhD student;PhD student;Lecturer;PhD student;Assistant Professor;PhD student;Tenure-Track Professor;;PhD student;MS student;MS student;Associate Professor;Researcher;;Full Professor", "bibtex": "@inproceedings{\nliu2024a,\ntitle={A Retrospective on the Robot Air Hockey Challenge: Benchmarking Robust, Reliable, and Safe Learning Techniques for Real-world Robotics},\nauthor={Puze Liu and Jonas G{\\\"u}nster and Niklas Funk and Simon Gr{\\\"o}ger and Dong Chen and Haitham Bou Ammar and Julius Jankowski and Ante Mari{\\'c} and Sylvain Calinon and Andrej Orsula and Miguel Olivares-Mendez and Hongyi Zhou and Rudolf Lioutikov and Gerhard Neumann and Amarildo Likmeta and Amirhossein Zhalehmehrabi and Thomas Bonenfant and Marcello Restelli and Davide Tateo and Ziyuan Liu and Jan Peters},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=gPLE4siNjO}\n}", "github": "", "reviewers": "uZwi;w1oz;qeQ2;3fJX;zeVZ;neoz", "pdf_size": 5780632, "rating": "3;5;6;6;7;9", "confidence": "3;3;3;4;5;4", "wc_summary_and_contributions": "349;85;81;89;49;79", "wc_strengths": "58;83;5;4;15;8", "wc_improvement": "479;21;5;28;7;45", "wc_limitations": "269;11;5;1;7;11", "wc_correctness": "7;1;5;1;1;3", "wc_clarity": "11;9;5;1;7;18", "wc_relation_to_prior_work": "1;1;5;1;1;3", "wc_documentation": "1;1;5;1;1;45", "wc_additional_feedback": "1;1;1;1;1;1", "wc_review": "1176;213;117;127;89;213", "wc_reply_reviewers": "0;0;0;0;0;43", "wc_reply_authors": "920;0;0;0;0;0", "reply_reviewers": "0;0;0;0;0;1", "reply_authors": "3;1;1;1;1;1", "rating_avg": [ 6.0, 1.8257418583505538 ], "confidence_avg": [ 3.6666666666666665, 0.7453559924999299 ], "wc_summary_and_contributions_avg": [ 122.0, 102.34419052068043 ], "wc_strengths_avg": [ 28.833333333333332, 30.536408142121466 ], "wc_improvement_avg": [ 97.5, 171.13712825295002 ], "wc_limitations_avg": [ 50.666666666666664, 97.70306488994544 ], "wc_correctness_avg": [ 3.0, 2.309401076758503 ], "wc_clarity_avg": [ 8.5, 5.283622494715786 ], "wc_relation_to_prior_work_avg": [ 2.0, 1.5275252316519468 ], "wc_documentation_avg": [ 9.0, 16.165807537309522 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 322.5, 384.58104910842053 ], "wc_reply_reviewers_avg": [ 7.166666666666667, 16.025153838748494 ], "wc_reply_authors_avg": [ 153.33333333333334, 342.8637565499678 ], "reply_reviewers_avg": [ 0.16666666666666666, 0.372677996249965 ], "reply_authors_avg": [ 1.3333333333333333, 0.74535599249993 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 21, 0 ], "corr_rating_confidence": 0.6123724356957945, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:fUqOlec4RMoJ:scholar.google.com/&scioq=A+Retrospective+on+the+Robot+Air+Hockey+Challenge:+Benchmarking+Robust,+Reliable,+and+Safe+Learning+Techniques+for+Real-world+Robotics&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "tu-darmstadt.de;tu-darmstadt.de;tu-darmstadt.de;tu-darmstadt.de;huawei.com;huawei.com;epfl.ch;epfl.ch;epfl.ch;uni.lu;uni.lu;kit.edu;kit.edu;;unibo.it;mail.polimi.it;polimi.it;polimi.it;tu-darmstadt.de;;tu-darmstadt.de", "author_num": 21, "aff_unique_index": "0;0;0;0;1;1;2;2;2;3;3;4;4;5;6;6;7;0;0", "aff_unique_norm": "Technische Universit\u00e4t Darmstadt;Huawei;EPFL;University of Luxembourg;Karlsruher Institut f\u00fcr Technologie;University of Bologna;Polytechnic Institute of Milan;Politecnico di Milano", "aff_unique_dep": ";Huawei Technologies;;;;;;", "aff_unique_url": "https://www.tu-darmstadt.de;https://www.huawei.com;https://www.epfl.ch;https://wwwen.uniluxembourg.lu;https://www.kit.edu;https://www.unibo.it;https://www.polimi.it/;https://www.polimi.it", "aff_unique_abbr": "TU Darmstadt;Huawei;EPFL;Uni Lu;KIT;Unibo;Politecnico di Milano;Polimi", "aff_campus_unique_index": "0;0;2;2;2;0", "aff_campus_unique": "Darmstadt;;Lausanne", "aff_country_unique_index": "0;0;0;0;1;2;3;3;3;4;4;0;0;5;5;5;5;0;0", "aff_country_unique": "Germany;China;United Kingdom;Switzerland;Luxembourg;Italy" }, { "title": "Wormhole Loss for Partial Shape Matching", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94143", "id": "gPhBvrPdEs", "proceeding": "", "pdf": "https://openreview.net/pdf?id=gPhBvrPdEs", "openreview": "https://openreview.net/forum?id=gPhBvrPdEs", "poster": "/media/PosterPDFs/NeurIPS%202024/94143.png?t=1733147956.529442", "project": "", "author_site": "Amit Bracha, Thomas Dag\u00e8s, Ron Kimmel", "tldr": "", "abstract": "When matching parts of a surface to its whole, a fundamental question arises: Which points should be included in the matching process? The issue is intensified when using isometry to measure similarity, as it requires the validation of whether distances measured between pairs of surface points should influence the matching process. The approach we propose treats surfaces as manifolds equipped with geodesic distances, and addresses the partial shape matching challenge by introducing a novel criterion to meticulously search for consistent distances between pairs of points. The new criterion explores the relation between intrinsic geodesic distances between the points, geodesic distances between the points and surface boundaries, and extrinsic distances between boundary points measured in the embedding space. It is shown to be less restrictive compared to previous measures and achieves state-of-the-art results when used as a loss function in training networks for partial shape matching.", "keywords": "Partial surfaces;Shape correspondence;Geodesic distances", "primary_area": "machine_vision", "supplementary_material": "", "author": "Amit Bracha;Thomas Dag\u00e8s;Ron Kimmel", "authorids": "~Amit_Bracha1;~Thomas_Dag\u00e8s1;~Ron_Kimmel1", "gender": "M;;M", "homepage": ";;https://ron.cs.technion.ac.il/", "dblp": ";245/9167;k/RonKimmel", "google_scholar": "vocN0JsAAAAJ;xZE3P4kAAAAJ;https://scholar.google.com.tw/citations?user=yV7LaW8AAAAJ", "orcid": ";0000-0002-0803-9300;", "linkedin": ";;", "or_profile": "~Amit_Bracha1;~Thomas_Dag\u00e8s1;~Ron_Kimmel1", "aff": "Computer Science Department, Technion - Israel Institute of Technology;Technion - Israel Institute of Technology, Technion;Computer Science Departmen, Technion-Israel Institute of Technology", "aff_domain": "cs.technion.ac.il;technion.ac.il;cs.technion.ac.il", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nbracha2024wormhole,\ntitle={Wormhole Loss for Partial Shape Matching},\nauthor={Amit Bracha and Thomas Dag{\\`e}s and Ron Kimmel},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=gPhBvrPdEs}\n}", "github": "", "reviewers": "a2Wg;NPnj;1VSx;tftc", "pdf_size": 40600323, "rating": "5;6;6;7", "confidence": "4;5;4;4", "soundness": "3;3;3;4", "novelty": "2;2;2;3", "presentation": "2;3;3;3", "wc_summary": "80;81;61;56", "wc_strengths": "48;29;33;37", "wc_weaknesses": "170;86;55;164", "wc_questions": "34;26;86;105", "wc_limitations": "18;9;31;1", "wc_review": "350;231;266;363", "wc_reply_reviewers": "123;9;27;24", "wc_reply_authors": "222;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 69.5, 11.146748404803978 ], "wc_strengths_avg": [ 36.75, 7.084313657652377 ], "wc_weaknesses_avg": [ 118.75, 49.52461509189143 ], "wc_questions_avg": [ 62.75, 33.55126674210677 ], "wc_limitations_avg": [ 14.75, 11.143944544011335 ], "wc_review_avg": [ 302.5, 55.59001708940194 ], "wc_reply_reviewers_avg": [ 45.75, 45.1185937280851 ], "wc_reply_authors_avg": [ 55.5, 96.12881982007269 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17439797631086949137&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "cs.technion.ac.il;technion.ac.il;cs.technion.ac.il", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Technion - Israel Institute of Technology;Technion-Israel Institute of Technology", "aff_unique_dep": "Computer Science Department;Computer Science Department", "aff_unique_url": "https://www.technion.ac.il;https://www.technion.ac.il", "aff_unique_abbr": "Technion;Technion", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Israel" }, { "title": "Road Network Representation Learning with the Third Law of Geography", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94142", "id": "gPtiGRaVcE", "proceeding": "", "pdf": "https://openreview.net/pdf?id=gPtiGRaVcE", "openreview": "https://openreview.net/forum?id=gPtiGRaVcE", "poster": "/media/PosterPDFs/NeurIPS%202024/94142.png?t=1732867802.394169", "project": "", "author_site": "Haicang Zhou, Weiming Huang, Yile Chen, Tiantian He, Gao Cong, Yew Soon Ong", "tldr": "", "abstract": "Road network representation learning aims to learn compressed and effective vectorized representations for road segments that are applicable to numerous tasks. In this paper, we identify the limitations of existing methods, particularly their overemphasis on the distance effect as outlined in the First Law of Geography. In response, we propose to endow road network representation with the principles of the recent Third Law of Geography. To this end, we propose a novel graph contrastive learning framework that employs geographic configuration-aware graph augmentation and spectral negative sampling, ensuring that road segments with similar geographic configurations yield similar representations, and vice versa, aligning with the principles stated in the Third Law. The framework further fuses the Third Law with the First Law through a dual contrastive learning objective to effectively balance the implications of both laws. We evaluate our framework on two real-world datasets across three downstream tasks. The results show that the integration of the Third Law significantly improves the performance of road segment representations in downstream tasks.", "keywords": "Road network;the third law of geography;geographic laws;representation learning", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "/attachment/ac1f9051bea24f8cd65559ca8f9922b2329b2479.zip", "author": "Haicang Zhou;Weiming Huang;Yile Chen;Tiantian He;Gao Cong;Yew-Soon Ong", "authorids": "~Haicang_Zhou1;~Weiming_Huang1;~Yile_Chen2;~Tiantian_He1;~Gao_Cong1;~Yew-Soon_Ong1", "gender": "M;M;M;Unspecified;M;M", "homepage": "https://openreview.net/profile?id=~Haicang_Zhou1;https://www.nateko.lu.se/weiming-huang;https://yileccc.github.io/;https://he-tiantian.github.io/;https://personal.ntu.edu.sg/gaocong/;http://www.ntu.edu.sg/home/asysong/", "dblp": "268/6822;13/8593-1.html;256/9091.html;151/4420;33/3180;64/4136", "google_scholar": ";https://scholar.google.se/citations?user=gDM3WGwAAAAJ;https://scholar.google.com/citations?hl=en;;https://scholar.google.com.tw/citations?user=WFFamJkAAAAJ;https://scholar.google.com.tw/citations?user=h9oWOsEAAAAJ", "orcid": "0000-0001-8749-9446;0000-0002-3208-4208;0000-0002-8502-1016;;0000-0002-4430-6373;0000-0002-4480-169X", "linkedin": ";;;;;", "or_profile": "~Haicang_Zhou1;~Weiming_Huang1;~Yile_Chen2;~Tiantian_He1;~Gao_Cong1;~Yew_Soon_Ong1", "aff": "Nanyang Technological University;Nanyang Technological University;Nanyang Technological University;A*STAR;Nanyang Technological University;Nanyang Technological University", "aff_domain": "ntu.edu.sg;ntu.edu.sg;ntu.edu.sg;a-star.edu.sg;ntu.edu.sg;ntu.edu.sg", "position": "PhD student;Postdoc;Postdoc;Senior Scientist;Full Professor;Full Professor", "bibtex": "@inproceedings{\nzhou2024road,\ntitle={Road Network Representation Learning with the Third Law of Geography},\nauthor={Haicang Zhou and Weiming Huang and Yile Chen and Tiantian He and Gao Cong and Yew-Soon Ong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=gPtiGRaVcE}\n}", "github": "", "reviewers": "u7JA;MjEv;ggbs;cFij;UWD1", "pdf_size": 3944248, "rating": "3;4;5;6;6", "confidence": "4;3;3;4;4", "soundness": "2;2;3;3;4", "novelty": "2;2;3;3;3", "presentation": "2;3;3;2;3", "wc_summary": "105;84;86;71;62", "wc_strengths": "94;48;68;134;45", "wc_weaknesses": "463;67;74;112;118", "wc_questions": "211;21;118;115;72", "wc_limitations": "2;2;43;11;19", "wc_review": "875;222;389;443;316", "wc_reply_reviewers": "33;142;9;17;16", "wc_reply_authors": "2586;793;35;35;401", "reply_reviewers": "1;3;1;1;1", "reply_authors": "8;4;2;2;3", "rating_avg": [ 4.8, 1.16619037896906 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 81.6, 14.62326912834473 ], "wc_strengths_avg": [ 77.8, 33.10830711468045 ], "wc_weaknesses_avg": [ 166.8, 149.45955974777925 ], "wc_questions_avg": [ 107.4, 62.64375467674331 ], "wc_limitations_avg": [ 15.4, 15.186836405255704 ], "wc_review_avg": [ 449.0, 225.53491969094276 ], "wc_reply_reviewers_avg": [ 43.4, 49.92233968876058 ], "wc_reply_authors_avg": [ 770.0, 950.2627005202298 ], "reply_reviewers_avg": [ 1.4, 0.8000000000000002 ], "reply_authors_avg": [ 3.8, 2.2271057451320084 ], "replies_avg": [ 33, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.21004201260420152, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18319414564970082201&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "ntu.edu.sg;ntu.edu.sg;ntu.edu.sg;a-star.edu.sg;ntu.edu.sg;ntu.edu.sg", "author_num": 6, "aff_unique_index": "0;0;0;1;0;0", "aff_unique_norm": "Nanyang Technological University;Agency for Science, Technology and Research", "aff_unique_dep": ";", "aff_unique_url": "https://www.ntu.edu.sg;https://www.a-star.edu.sg", "aff_unique_abbr": "NTU;A*STAR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "Singapore" }, { "title": "Personalizing Reinforcement Learning from Human Feedback with Variational Preference Learning", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94141", "id": "gRG6SzbW9p", "proceeding": "", "pdf": "https://openreview.net/pdf?id=gRG6SzbW9p", "openreview": "https://openreview.net/forum?id=gRG6SzbW9p", "poster": "", "project": "", "author_site": "Sriyash Poddar, Yanming Wan, Hamish Ivison, Abhishek Gupta, Natasha Jaques", "tldr": "", "abstract": "Reinforcement Learning from Human Feedback (RLHF) is a powerful paradigm for aligning foundation models to human values and preferences. However, current RLHF techniques cannot account for the naturally occurring differences in individual human preferences across a diverse population. When these differences arise, traditional RLHF frameworks simply average over them, leading to inaccurate rewards and poor performance for individual subgroups. To address the need for pluralistic alignment, we develop a class of multimodal RLHF methods. Our proposed techniques are based on a latent variable formulation - inferring a novel user-specific latent and learning reward models and policies conditioned on this latent without additional user-specific data. While conceptually simple, we show that in practice, this reward modeling requires careful algorithmic considerations around model architecture and reward scaling. To empirically validate our proposed technique, we first show that it can provide a way to combat underspecification in simulated control problems, inferring and optimizing user-specific reward functions. Next, we conduct experiments on pluralistic language datasets representing diverse user preferences and demonstrate improved reward function accuracy. We additionally show the benefits of this probabilistic framework in terms of measuring uncertainty, and actively learning user preferences. This work enables learning from diverse populations of users with divergent preferences, an important challenge that naturally occurs in problems from robot learning to foundation model alignment.", "keywords": "reinforcement learning from human feedback;preference based reward learning;pluralistic alignment;multimodal reward modeling;variational inference", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Sriyash Poddar;Yanming Wan;Hamish Ivison;Abhishek Gupta;Natasha Jaques", "authorids": "~Sriyash_Poddar1;~Yanming_Wan1;~Hamish_Ivison1;~Abhishek_Gupta1;~Natasha_Jaques1", "gender": ";;;M;F", "homepage": "https://sriya.sh;;https://hamishivi.github.io;https://homes.cs.washington.edu/~abhgupta/;https://natashajaques.ai/", "dblp": ";346/1162;288/1956;18/6404-4;145/7732", "google_scholar": "kQVM154AAAAJ;;;1wLVDP4AAAAJ;8iCb2TwAAAAJ", "orcid": ";;0000-0002-0069-7659;;", "linkedin": ";yanming-wan;;;natashajaques", "or_profile": "~Sriyash_Poddar1;~Yanming_Wan1;~Hamish_Ivison1;~Abhishek_Gupta1;~Natasha_Jaques1", "aff": "Department of Computer Science, University of Washington;Department of Computer Science, University of Washington;University of Washington;University of Washington;Google", "aff_domain": "cs.washington.edu;cs.washington.edu;uw.edu;uw.edu;google.com", "position": "PhD student;PhD student;PhD student;Assistant Professor;Senior Research Scientist", "bibtex": "@inproceedings{\npoddar2024personalizing,\ntitle={Personalizing Reinforcement Learning from Human Feedback with Variational Preference Learning},\nauthor={Sriyash Poddar and Yanming Wan and Hamish Ivison and Abhishek Gupta and Natasha Jaques},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=gRG6SzbW9p}\n}", "github": "", "reviewers": "Z9tQ;KQbQ;hKQW;PuPi", "pdf_size": 4476985, "rating": "4;5;6;7", "confidence": "4;3;4;3", "soundness": "3;3;3;4", "novelty": "3;3;3;3", "presentation": "2;3;3;3", "wc_summary": "57;99;139;48", "wc_strengths": "61;32;109;41", "wc_weaknesses": "210;304;833;125", "wc_questions": "63;4;35;36", "wc_limitations": "2;4;196;7", "wc_review": "393;443;1312;257", "wc_reply_reviewers": "78;83;255;0", "wc_reply_authors": "1401;608;1149;0", "reply_reviewers": "1;1;1;0", "reply_authors": "3;3;3;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 85.75, 36.27240686803124 ], "wc_strengths_avg": [ 60.75, 29.768901558505647 ], "wc_weaknesses_avg": [ 368.0, 275.8323766347961 ], "wc_questions_avg": [ 34.5, 20.886598574205422 ], "wc_limitations_avg": [ 52.25, 83.01317666491266 ], "wc_review_avg": [ 601.25, 415.95815594840786 ], "wc_reply_reviewers_avg": [ 104.0, 93.18529926978826 ], "wc_reply_authors_avg": [ 789.5, 538.3829956452934 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.4472135954999579, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1596582017704522071&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "cs.washington.edu;cs.washington.edu;uw.edu;uw.edu;google.com", "author_num": 5, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "University of Washington;Google", "aff_unique_dep": "Department of Computer Science;Google", "aff_unique_url": "https://www.washington.edu;https://www.google.com", "aff_unique_abbr": "UW;Google", "aff_campus_unique_index": "0;0;2", "aff_campus_unique": "Seattle;;Mountain View", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Automated Label Unification for Multi-Dataset Semantic Segmentation with GNNs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94140", "id": "gSGLkCX9sc", "proceeding": "", "pdf": "https://openreview.net/pdf?id=gSGLkCX9sc", "openreview": "https://openreview.net/forum?id=gSGLkCX9sc", "poster": "/media/PosterPDFs/NeurIPS%202024/94140.png?t=1731554505.800011", "project": "", "author_site": "Ma Rong, Jie Chen, Xiangyang Xue, Jian Pu", "tldr": "", "abstract": "Deep supervised models possess significant capability to assimilate extensive training data, thereby presenting an opportunity to enhance model performance through training on multiple datasets. However, conflicts arising from different label spaces among datasets may adversely affect model performance. In this paper, we propose a novel approach to automatically construct a unified label space across multiple datasets using graph neural networks. This enables semantic segmentation models to be trained simultaneously on multiple datasets, resulting in performance improvements. Unlike existing methods, our approach facilitates seamless training without the need for additional manual reannotation or taxonomy reconciliation. This significantly enhances the efficiency and effectiveness of multi-dataset segmentation model training. The results demonstrate that our method significantly outperforms other multi-dataset training methods when trained on seven datasets simultaneously, and achieves state-of-the-art performance on the WildDash 2 benchmark. Our code can be found in https://github.com/Mrhonor/AutoUniSeg.", "keywords": "Semantic Segmentation;Multi-dataset Training;Graph Neural Networks", "primary_area": "machine_vision", "supplementary_material": "/attachment/af84ee2d9d43586e7f8e2e8ff104fde091d80c62.zip", "author": "Rong Ma;Jie Chen;Xiangyang Xue;Jian Pu", "authorids": "~Rong_Ma4;~Jie_Chen19;~Xiangyang_Xue2;~Jian_Pu1", "gender": "M;M;M;M", "homepage": "https://github.com/Mrhonor;https://www.researchgate.net/profile/Jie_Chen364;http://homepage.fudan.edu.cn//xyxue;", "dblp": ";;84/3791;43/6295", "google_scholar": ";vuRdFLsAAAAJ;;9pUCoOkAAAAJ", "orcid": ";;0000-0002-4897-9209;", "linkedin": ";;;", "or_profile": "~Rong_Ma4;~Jie_Chen19;~Xiangyang_Xue2;~Jian_Pu1", "aff": "Fudan University;Fudan University;Fudan University;Fudan University", "aff_domain": "fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn", "position": "MS student;PhD student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nma2024automated,\ntitle={Automated Label Unification for Multi-Dataset Semantic Segmentation with {GNN}s},\nauthor={Rong Ma and Jie Chen and Xiangyang Xue and Jian Pu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=gSGLkCX9sc}\n}", "github": "", "reviewers": "pNQc;dye9;kdwE;GmR6;M8bi", "pdf_size": 36957235, "rating": "4;4;5;6;7", "confidence": "4;3;5;4;4", "soundness": "2;3;2;3;3", "novelty": "3;2;3;3;3", "presentation": "3;3;3;3;3", "wc_summary": "154;56;164;69;59", "wc_strengths": "26;46;57;90;83", "wc_weaknesses": "140;215;299;147;259", "wc_questions": "166;3;266;4;45", "wc_limitations": "17;6;9;31;126", "wc_review": "503;326;795;341;572", "wc_reply_reviewers": "195;282;0;21;120", "wc_reply_authors": "402;426;0;0;162", "reply_reviewers": "1;3;0;1;1", "reply_authors": "3;3;1;1;2", "rating_avg": [ 5.2, 1.16619037896906 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 100.4, 48.143950814198874 ], "wc_strengths_avg": [ 60.4, 23.618636709175235 ], "wc_weaknesses_avg": [ 212.0, 61.96127823084349 ], "wc_questions_avg": [ 96.8, 103.45124455510431 ], "wc_limitations_avg": [ 37.8, 44.94174006422092 ], "wc_review_avg": [ 507.4, 171.75633903876735 ], "wc_reply_reviewers_avg": [ 123.6, 105.83496586667377 ], "wc_reply_authors_avg": [ 198.0, 186.1741120564296 ], "reply_reviewers_avg": [ 1.2, 0.9797958971132713 ], "reply_authors_avg": [ 2.0, 0.8944271909999159 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.2711630722733202, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8911932370042273857&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Fudan University", "aff_unique_dep": "", "aff_unique_url": "https://www.fudan.edu.cn", "aff_unique_abbr": "Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Alias-Free Mamba Neural Operator", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94139", "id": "gUEBXGV8JM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=gUEBXGV8JM", "openreview": "https://openreview.net/forum?id=gUEBXGV8JM", "poster": "/media/PosterPDFs/NeurIPS%202024/94139.png?t=1731408232.8021576", "project": "", "author_site": "Jianwei Zheng, Wei Li, Ni Xu, Junwei Zhu, XiaoxuLin, Xiaoqin Zhang", "tldr": "", "abstract": "Benefiting from the booming deep learning techniques, neural operators (NO) are considered as an ideal alternative to break the traditions of solving Partial Differential Equations (PDE) with expensive cost.\nYet with the remarkable progress, current solutions concern little on the holistic function features--both global and local information-- during the process of solving PDEs.\nBesides, a meticulously designed kernel integration to meet desirable performance often suffers from a severe computational burden, such as GNO with $O(N(N-1))$, FNO with $O(NlogN)$, and Transformer-based NO with $O(N^2)$.\nTo counteract the dilemma, we propose a mamba neural operator with $O(N)$ computational complexity, namely MambaNO.\nFunctionally, MambaNO achieves a clever balance between global integration, facilitated by state space model of Mamba that scans the entire function, and local integration, engaged with an alias-free architecture. We prove a property of continuous-discrete equivalence to show the capability of\nMambaNO in approximating operators arising from universal PDEs to desired accuracy. MambaNOs are evaluated on a diverse set of benchmarks with possibly multi-scale solutions and set new state-of-the-art scores, yet with fewer parameters and better efficiency.", "keywords": "Mamba;Neural Operator", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/b716cee883d8506aa057cb8752c87763b208ea82.zip", "author": "Jianwei Zheng;LiweiNo;Ni Xu;Junwei Zhu;XiaoxuLin;Xiaoqin Zhang", "authorids": "~Jianwei_Zheng2;~LiweiNo1;~Ni_Xu1;~Junwei_Zhu3;~XiaoxuLin1;~Xiaoqin_Zhang4", "gender": "M;M;F;M;M;", "homepage": ";https://github.com/weili419;https://blog.csdn.net/cangzhexingxing?type=blog;https://jwzhu0.github.io/;https://pion6.github.io/;", "dblp": "60/4818-1;;;;;", "google_scholar": "X0wntOEAAAAJ;;;;;", "orcid": ";0009-0002-8566-6073;;0009-0005-8305-506X;;", "linkedin": ";;;;;", "or_profile": "~Jianwei_Zheng2;~LiweiNo1;~Ni_Xu1;~Junwei_Zhu3;~XiaoxuLin1;~Xiaoqin_Zhang4", "aff": "Zhejiang University of Technology;Zhejiang University of Technology;Zhejiang University of Technology;Zhejiang University of Technology;Zhejiang University of Technology;", "aff_domain": "zjut.edu.cn;zjut.edu.cn;zjut.edu.cn;zjut.edu.cn;zjut.edu.cn;", "position": "Full Professor;MS student;MS student;MS student;MS student;", "bibtex": "@inproceedings{\nzheng2024aliasfree,\ntitle={Alias-Free Mamba Neural Operator},\nauthor={Jianwei Zheng and LiweiNo and Ni Xu and Junwei Zhu and XiaoxuLin and Xiaoqin Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=gUEBXGV8JM}\n}", "github": "", "reviewers": "oeYD;eofX;Jj9b;5UBc", "pdf_size": 9953184, "rating": "5;6;6;7", "confidence": "1;5;3;4", "soundness": "3;4;3;2", "novelty": "2;3;3;3", "presentation": "2;4;3;3", "wc_summary": "66;58;38;105", "wc_strengths": "29;66;57;173", "wc_weaknesses": "223;82;172;119", "wc_questions": "74;1;169;126", "wc_limitations": "76;1;28;5", "wc_review": "468;208;464;528", "wc_reply_reviewers": "28;0;0;110", "wc_reply_authors": "14;37;37;7", "reply_reviewers": "1;0;0;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 1.479019945774904 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 66.75, 24.324627438051337 ], "wc_strengths_avg": [ 81.25, 54.70089121760266 ], "wc_weaknesses_avg": [ 149.0, 53.37134062397159 ], "wc_questions_avg": [ 92.5, 62.62786919575023 ], "wc_limitations_avg": [ 27.5, 29.837057495671385 ], "wc_review_avg": [ 417.0, 123.30044606569759 ], "wc_reply_reviewers_avg": [ 34.5, 45.06384359994163 ], "wc_reply_authors_avg": [ 23.75, 13.47915056670857 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.7171371656006361, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:NO_4uRjaPIMJ:scholar.google.com/&scioq=Alias-Free+Mamba+Neural+Operator&hl=en&as_sdt=0,44", "gs_version_total": 3, "email": "zjut.edu.cn;zjut.edu.cn;zjut.edu.cn;zjut.edu.cn;zjut.edu.cn;", "author_num": 6, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Zhejiang University of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.zjut.edu.cn", "aff_unique_abbr": "ZJUT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Generalizable and Animatable Gaussian Head Avatar", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94138", "id": "gVM2AZ5xA6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=gVM2AZ5xA6", "openreview": "https://openreview.net/forum?id=gVM2AZ5xA6", "poster": "/media/PosterPDFs/NeurIPS%202024/94138.png?t=1730384415.9711165", "project": "", "author_site": "Xuangeng Chu, Tatsuya Harada", "tldr": "", "abstract": "In this paper, we propose Generalizable and Animatable Gaussian head Avatar (GAGA) for one-shot animatable head avatar reconstruction.\nExisting methods rely on neural radiance fields, leading to heavy rendering consumption and low reenactment speeds.\nTo address these limitations, we generate the parameters of 3D Gaussians from a single image in a single forward pass.\nThe key innovation of our work is the proposed dual-lifting method, which produces high-fidelity 3D Gaussians that capture identity and facial details.\nAdditionally, we leverage global image features and the 3D morphable model to construct 3D Gaussians for controlling expressions.\nAfter training, our model can reconstruct unseen identities without specific optimizations and perform reenactment rendering at real-time speeds.\nExperiments show that our method exhibits superior performance compared to previous methods in terms of reconstruction quality and expression accuracy.\nWe believe our method can establish new benchmarks for future research and advance applications of digital avatars.", "keywords": "3D Gaussian Splatting;Head Avatar;Dynamic 3D Gaussian Splatting", "primary_area": "machine_vision", "supplementary_material": "/attachment/cdb64706d422bca56c3f222590dbb209a243e6bd.zip", "author": "Xuangeng Chu;Tatsuya Harada", "authorids": "~Xuangeng_Chu1;~Tatsuya_Harada1", "gender": "M;M", "homepage": "https://xg-chu.site;https://www.mi.t.u-tokyo.ac.jp/harada/", "dblp": ";14/5849", "google_scholar": "yr4kSUsAAAAJ;https://scholar.google.com/citations?hl=ja", "orcid": ";", "linkedin": "xuangeng-chu-544188165/;", "or_profile": "~Xuangeng_Chu1;~Tatsuya_Harada1", "aff": "Princeton University;The University of Tokyo", "aff_domain": "princeton.edu;u-tokyo.ac.jp", "position": "Student Researcher;Full Professor", "bibtex": "@inproceedings{\nchu2024generalizable,\ntitle={Generalizable and Animatable Gaussian Head Avatar},\nauthor={Xuangeng Chu and Tatsuya Harada},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=gVM2AZ5xA6}\n}", "github": "", "reviewers": "d3t7;65B2;PdVU;3QbB", "pdf_size": 13487391, "rating": "4;5;5;6", "confidence": "4;3;4;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "102;54;85;130", "wc_strengths": "64;51;75;65", "wc_weaknesses": "330;107;57;55", "wc_questions": "32;34;49;48", "wc_limitations": "24;20;98;63", "wc_review": "552;266;364;361", "wc_reply_reviewers": "0;28;25;18", "wc_reply_authors": "0;41;41;43", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 92.75, 27.544282528321553 ], "wc_strengths_avg": [ 63.75, 8.525696452489967 ], "wc_weaknesses_avg": [ 137.25, 113.21743461145903 ], "wc_questions_avg": [ 40.75, 7.790218225441442 ], "wc_limitations_avg": [ 51.25, 31.7913117061879 ], "wc_review_avg": [ 385.75, 103.76024045847234 ], "wc_reply_reviewers_avg": [ 17.75, 10.871407452579449 ], "wc_reply_authors_avg": [ 31.25, 18.06066167115701 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12061532816793365171&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "princeton.edu;u-tokyo.ac.jp", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Princeton University;University of Tokyo", "aff_unique_dep": ";", "aff_unique_url": "https://www.princeton.edu;https://www.u-tokyo.ac.jp", "aff_unique_abbr": "Princeton;UTokyo", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;Japan" }, { "title": "Amortizing intractable inference in diffusion models for vision, language, and control", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94137", "id": "gVTkMsaaGI", "proceeding": "", "pdf": "https://openreview.net/pdf?id=gVTkMsaaGI", "openreview": "https://openreview.net/forum?id=gVTkMsaaGI", "poster": "", "project": "", "author_site": "Siddarth Venkatraman, Moksh Jain, Luca Scimeca, Minsu Kim, Marcin Sendera, Mohsin Hasan, Luke Rowe, Sarthak Mittal, Pablo Lemos, Emmanuel Bengio, Alexandre Adam, Jarrid Rector-Brooks, Yoshua Bengio, Glen Berseth, Nikolay Malkin", "tldr": "", "abstract": "Diffusion models have emerged as effective distribution estimators in vision, language, and reinforcement learning, but their use as priors in downstream tasks poses an intractable posterior inference problem. This paper studies *amortized* sampling of the posterior over data, $\\mathbf{x}\\sim p^{\\rm post}(\\mathbf{x})\\propto p(\\mathbf{x})r(\\mathbf{x})$, in a model that consists of a diffusion generative model prior $p(\\mathbf{x})$ and a black-box constraint or likelihood function $r(\\mathbf{x})$. We state and prove the asymptotic correctness of a data-free learning objective, *relative trajectory balance*, for training a diffusion model that samples from this posterior, a problem that existing methods solve only approximately or in restricted cases. Relative trajectory balance arises from the generative flow network perspective on diffusion models, which allows the use of deep reinforcement learning techniques to improve mode coverage. Experiments illustrate the broad potential of unbiased inference of arbitrary posteriors under diffusion priors: in vision (classifier guidance), language (infilling under a discrete diffusion LLM), and multimodal data (text-to-image generation). Beyond generative modeling, we apply relative trajectory balance to the problem of continuous control with a score-based behavior prior, achieving state-of-the-art results on benchmarks in offline reinforcement learning. Code is available at [this link](https://github.com/GFNOrg/diffusion-finetuning).", "keywords": "diffusion;inverse problems;conditional generation;language models;infilling;discrete diffusion;offline RL;planning;GFlowNet", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Siddarth Venkatraman;Moksh Jain;Luca Scimeca;Minsu Kim;Marcin Sendera;Mohsin Hasan;Luke Rowe;Sarthak Mittal;Pablo Lemos;Emmanuel Bengio;Alexandre Adam;Jarrid Rector-Brooks;Yoshua Bengio;Glen Berseth;Nikolay Malkin", "authorids": "~Siddarth_Venkatraman1;~Moksh_Jain1;~Luca_Scimeca1;~Minsu_Kim2;~Marcin_Sendera1;~Mohsin_Hasan1;~Luke_Rowe1;~Sarthak_Mittal1;~Pablo_Lemos1;~Emmanuel_Bengio1;~Alexandre_Adam1;~Jarrid_Rector-Brooks2;~Yoshua_Bengio1;~Glen_Berseth1;~Nikolay_Malkin1", "gender": "M;M;M;M;M;Not Specified;M;M;M;M;;M;M;M;", "homepage": ";https://mj10.github.io;https://lucascimeca.com;https://minsuukim.github.io/;;https://hasanmohsin.github.io/;https://rluke22.github.io/;https://sarthmit.github.io/;https://pablo-lemos.github.io;http://folinoid.com;;;http://yoshuabengio.org;http://fracturedplane.com/;", "dblp": "261/9239;249/9368;223/6396;;220/9876;322/9329.html;330/2836;228/8275;313/2645;137/8040;;230/4010;56/953;147/5478;", "google_scholar": "j9l0rg4AAAAJ;TD07G_wAAAAJ;fKJvAvMAAAAJ;https://scholar.google.ca/citations?user=VvyLuhAAAAAJ;https://scholar.google.pl/citations?user=ScNBRmQAAAAJ;https://scholar.google.ca/citations?user=eY7J71sAAAAJ;https://scholar.google.ca/citations?user=J-CXTg8AAAAJ;FGGgTrcAAAAJ;AklQTTsAAAAJ;https://scholar.google.ca/citations?user=yVtSOt8AAAAJ;Mmlh_CEAAAAJ;gxRPZh4AAAAJ;kukA0LcAAAAJ;https://scholar.google.ca/citations?user=-WZcuuwAAAAJ;", "orcid": "0000-0002-3607-2781;;0000-0002-2821-0072;;0000-0002-8741-6919;;;;0000-0002-4728-8473;;0000-0001-8806-7936;;;0000-0001-7351-8028;", "linkedin": "siddarth-venkatraman-59b863157/;;luca-scimeca/;;marcin-sendera-976516123/;;rluke22/;;;;alexandre-adam-31b131160/;;yoshuabengio/?originalSubdomain=ca;glen-berseth-0523278b?trk=hp-identity-name;", "or_profile": "~Siddarth_Venkatraman1;~Moksh_Jain1;~Luca_Scimeca1;~Minsu_Kim2;~Marcin_Sendera1;~Mohsin_Hasan1;~Luke_Rowe1;~Sarthak_Mittal1;~Pablo_Lemos1;~Emmanuel_Bengio1;~Alexandre_Adam1;~Jarrid_Rector-Brooks2;~Yoshua_Bengio1;~Glen_Berseth1;~Nikolay_Malkin1", "aff": "Universit\u00e9 de Montr\u00e9al;Universit\u00e9 de Montr\u00e9al;Montreal Institute for Learning Algorithms, University of Montreal, Universit\u00e9 de Montr\u00e9al;Korea Advanced Institute of Science & Technology;Jagiellonian University;Mila - Quebec Artificial Intelligence Institute;Universit\u00e9 de Montr\u00e9al;University of Montreal;Universit\u00e9 de Montr\u00e9al;Valence Labs powered by recursion;Universit\u00e9 de Montr\u00e9al;Montreal Institute for Learning Algorithms, University of Montreal, University of Montreal;University of Montreal;Montreal Institute for Learning Algorithms, University of Montreal, Universit\u00e9 de Montr\u00e9al;", "aff_domain": "umontreal.ca;umontreal.ca;mila.umontreal.ca;kaist.ac.kr;uj.edu.pl;mila.quebec;umontreal.ca;umontreal.ca;umontreal.ca;valencelabs.com;umontreal.ca;mila.umontreal.ca;umontreal.ca;mila.umontreal.ca;", "position": "PhD student;PhD student;Postdoc;PhD student;PhD student;PhD student;PhD student;PhD student;Postdoc;Researcher;PhD student;PhD student;Full Professor;Assistant Professor;", "bibtex": "@inproceedings{\nvenkatraman2024amortizing,\ntitle={Amortizing intractable inference in diffusion models for vision, language, and control},\nauthor={Siddarth Venkatraman and Moksh Jain and Luca Scimeca and Minsu Kim and Marcin Sendera and Mohsin Hasan and Luke Rowe and Sarthak Mittal and Pablo Lemos and Emmanuel Bengio and Alexandre Adam and Jarrid Rector-Brooks and Yoshua Bengio and Glen Berseth and Nikolay Malkin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=gVTkMsaaGI}\n}", "github": "", "reviewers": "VfXT;4zDq;Z1Kk", "pdf_size": 17387734, "rating": "5;5;7", "confidence": "2;4;3", "soundness": "2;3;3", "novelty": "3;3;3", "presentation": "3;4;3", "wc_summary": "101;112;105", "wc_strengths": "141;141;41", "wc_weaknesses": "253;331;91", "wc_questions": "143;164;18", "wc_limitations": "31;12;4", "wc_review": "669;760;259", "wc_reply_reviewers": "325;0;11", "wc_reply_authors": "235;59;0", "reply_reviewers": "2;0;1", "reply_authors": "2;2;1", "rating_avg": [ 5.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 106.0, 4.546060565661952 ], "wc_strengths_avg": [ 107.66666666666667, 47.14045207910317 ], "wc_weaknesses_avg": [ 225.0, 99.9599919967984 ], "wc_questions_avg": [ 108.33333333333333, 64.44808418840358 ], "wc_limitations_avg": [ 15.666666666666666, 11.323525167642018 ], "wc_review_avg": [ 562.6666666666666, 217.91486614934948 ], "wc_reply_reviewers_avg": [ 112.0, 150.68067781459794 ], "wc_reply_authors_avg": [ 98.0, 99.82317700146929 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 15, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6277091480718856611&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "umontreal.ca;umontreal.ca;mila.umontreal.ca;kaist.ac.kr;uj.edu.pl;mila.quebec;umontreal.ca;umontreal.ca;umontreal.ca;valencelabs.com;umontreal.ca;mila.umontreal.ca;umontreal.ca;mila.umontreal.ca;", "author_num": 15, "aff_unique_index": "0;0;1;2;3;4;0;1;0;5;0;1;1;1", "aff_unique_norm": "Universit\u00e9 de Montr\u00e9al;University of Montreal;Korea Advanced Institute of Science and Technology;Jagiellonian University;Quebec Artificial Intelligence Institute;Valence Labs", "aff_unique_dep": ";Montreal Institute for Learning Algorithms;;;Artificial Intelligence;", "aff_unique_url": "https://www.umontreal.ca;https://www.mila.quebec;https://www.kaist.ac.kr;https://www.uj.edu.pl;https://mila.quebec;", "aff_unique_abbr": "UdeM;MILA;KAIST;UJ;Mila;", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Montreal", "aff_country_unique_index": "0;0;0;1;2;0;0;0;0;0;0;0;0", "aff_country_unique": "Canada;South Korea;Poland;" }, { "title": "Retrospective for the Dynamic Sensorium Competition for predicting large-scale mouse primary visual cortex activity from videos", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97578", "id": "gViJjwRUlM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=gViJjwRUlM", "openreview": "https://openreview.net/forum?id=gViJjwRUlM", "poster": "", "project": "", "author_site": "Polina Turishcheva, Paul Fahey, Michaela Vystr\u010dilov\u00e1, Laura Hansel, Rachel Froebe, Kayla Ponder, Yongrong Qiu, Konstantin Willeke, Mohammad Bashiri, Ruslan Baikulov, Yu Zhu, Lei Ma, Shan Yu, Tiejun Huang, Bryan Li, Wolf De Wulf, Nina Kudryashova, Matthias Hennig, Nathalie Rochefort, Arno Onken, Eric Y. Wang, Zhiwei Ding, Andreas Tolias, Fabian Sinz, Alexander Ecker", "tldr": "", "abstract": "Understanding how biological visual systems process information is challenging because of the nonlinear relationship between visual input and neuronal responses. \nArtificial neural networks allow computational neuroscientists to create predictive models that connect biological and machine vision.\nMachine learning has benefited tremendously from benchmarks that compare different models on the same task under standardized conditions. \nHowever, there was no standardized benchmark to identify state-of-the-art dynamic models of the mouse visual system.\nTo address this gap, we established the SENSORIUM 2023 Benchmark Competition with dynamic input, featuring a new large-scale dataset from the primary visual cortex of ten mice. \nThis dataset includes responses from 78,853 neurons to 2 hours of dynamic stimuli per neuron, together with behavioral measurements such as running speed, pupil dilation, and eye movements.\nThe competition ranked models in two tracks based on predictive performance for neuronal responses on a held-out test set: one focusing on predicting in-domain natural stimuli and another on out-of-distribution (OOD) stimuli to assess model generalization.\nAs part of the NeurIPS 2023 Competition Track, we received more than 160 model submissions from 22 teams. \nSeveral new architectures for predictive models were proposed, and the winning teams improved the previous state-of-the-art model by 50\\%. \nAccess to the dataset as well as the benchmarking infrastructure will remain online at www.sensorium-competition.net.", "keywords": "benchmark;neuroscience;predictive models;visual cortex;competition retrospective", "primary_area": "", "supplementary_material": "/attachment/c08aa7dd8ea9144093a6223179a2171992fc980e.pdf", "author": "Polina Turishcheva;Paul G. Fahey;Michaela Vystr\u010dilov\u00e1;Laura Hansel;Rachel E Froebe;Kayla Ponder;Yongrong Qiu;Konstantin Friedrich Willeke;Mohammad Bashiri;Ruslan Baikulov;Yu Zhu;Lei Ma;Shan Yu;Tiejun Huang;Bryan M. Li;Wolf De Wulf;Nina Kudryashova;Matthias H. Hennig;Nathalie Rochefort;Arno Onken;Eric Wang;Zhiwei Ding;Andreas S. Tolias;Fabian H. Sinz;Alexander S Ecker", "authorids": "~Polina_Turishcheva1;~Paul_G._Fahey1;~Michaela_Vystr\u010dilov\u00e11;~Laura_Hansel1;~Rachel_E_Froebe1;~Kayla_Ponder1;~Yongrong_Qiu1;~Konstantin_Friedrich_Willeke1;~Mohammad_Bashiri3;~Ruslan_Baikulov1;~Yu_Zhu13;~Lei_Ma3;~Shan_Yu1;~Tiejun_Huang1;~Bryan_M._Li1;~Wolf_De_Wulf1;~Nina_Kudryashova1;~Matthias_H._Hennig2;~Nathalie_Rochefort1;~Arno_Onken1;~Eric_Wang1;~Zhiwei_Ding1;~Andreas_S._Tolias1;~Fabian_H._Sinz1;~Alexander_S_Ecker1", "gender": "F;;;;;;;M;M;M;M;Not Specified;M;M;M;M;F;M;F;M;M;;;M;M", "homepage": ";;;;;;;https://sinzlab.org/team.html;https://mohammadbashiri.github.io/;https://github.com/lRomul;https://github.com/zhuyu-cs;https://nbic.pku.edu.cn/rcdw/kyry/02c5f5ce8e254b1e82a48bebd0a24c33.htm;https://people.ucas.ac.cn/~yushan?language=en;https://idm.pku.edu.cn/~tjhuang/;https://bryanli.io;https://www.wolfdewulf.eu;;https://homepages.inf.ed.ac.uk/mhennig/;https://www.ed.ac.uk/discovery-brain-sciences/our-staff/research-groups/nathalie-rochefort;https://homepages.inf.ed.ac.uk/aonken/;;;;https://sinzlab.org;http://eckerlab.org", "dblp": ";;;;;;;;229/0971;;;20/6534-8;;h/TiejunHuang;213/8145;274/7026.html;;;;15/2035;02/5887;;32/3057;53/5834;26/7228", "google_scholar": "VvnKLRYAAAAJ;;;;;;;sc3jZTsAAAAJ;https://scholar.google.com/citations?view_op=list_works;Gbgo-mIAAAAJ;;;YdaRHiIAAAAJ;https://scholar.google.com.tw/citations?user=knvEK4AAAAAJ;QQrzFdAAAAAJ;EphKDJ4AAAAJ;rEt5StMAAAAJ;QMvayicAAAAJ;;JQh31ekAAAAJ;;;;https://scholar.google.com/citations?hl=de;VgYU_m8AAAAJ", "orcid": ";;;;0009-0004-0260-9036;0000-0001-7776-1914;;0000-0003-4445-6408;;0009-0003-4400-0619;0000-0001-8836-7939;0000-0001-6024-3854;;0000-0002-4234-6099;0000-0003-3144-4838;0000-0002-0219-3120;0000-0001-8529-7250;0000-0001-7270-5817;;0000-0001-7387-5535;0000-0002-1084-7059;;;0000-0002-1348-9736;0000-0003-2392-5105", "linkedin": "polina-turishcheva-a63860197/;;michaela-vystr\u010dilov\u00e1-4b7a601ba/;;;;;;;;;maleiwhat/;;;;wolf-de-wulf/;ninakudryashova/;;;;https://linkedin.com/in/eric-wang-77476a303;;;;alexecker/", "or_profile": "~Polina_Turishcheva1;~Paul_G._Fahey1;~Michaela_Vystr\u010dilov\u00e11;~Laura_Hansel1;~Rachel_E_Froebe1;~Kayla_Ponder1;~Yongrong_Qiu1;~Konstantin_Friedrich_Willeke1;~Mohammad_Bashiri3;~Ruslan_Baikulov1;~Yu_Zhu13;~Lei_Ma3;~Shan_Yu1;~Tiejun_Huang1;~Bryan_M._Li1;~Wolf_De_Wulf1;~Nina_Kudryashova1;~Matthias_H._Hennig2;~Nathalie_Rochefort1;~Arno_Onken1;~Eric_Wang1;~Zhiwei_Ding1;~Andreas_S._Tolias1;~Fabian_H._Sinz1;~Alexander_S_Ecker1", "aff": "Georg-August Universit\u00e4t G\u00f6ttingen;;Georg-August Universit\u00e4t G\u00f6ttingen;;Baylor College of Medicine;Baylor College of Medicine;;University of Tuebingen;Noselab;Moscow Power Engineering Institute;Institute of automation, Chinese academy of science;Beijing Academy of Artifical Intelligence;Institute of Automation, Chinese Academy of Sciences;Peking University;Alan Turing Institute;Edinburgh University, University of Edinburgh;University of Edinburgh;University of Edinburgh;University of Edinburgh, University of Edinburgh;University of Edinburgh;Baylor College of Medicine;;Baylor College of Medicine;Baylor College of Medicine;Max Planck Institute for Dynamics and Self-Organization", "aff_domain": "uni-goettingen.de;;uni-goettingen.de;;bcm.edu;bcm.edu;;uni-tuebingen.de;noselab.com;mpei.ru;ia.ac.cn;baai.ac.cn;ia.ac.cn;pku.edu.cn;turing.ac.uk;inf.ed.ac.uk;ed.ac.uk;ed.ac.uk;ed.ac.uk;ed.ac.uk;bcm.edu;;bcm.edu;bcm.edu;ds.mpg.de", "position": "PhD student;;PhD student;;Research Technician;PhD student;;PhD student;Senior Data Scientist;Undergrad student;PhD student;Principal Researcher;Full Professor;Full Professor;Researcher;PhD student;Postdoc;Full Professor;Full Professor;Assistant Professor;PhD student;;Professor;Assistant Professor;Principal Researcher", "bibtex": "@inproceedings{\nturishcheva2024retrospective,\ntitle={Retrospective for the Dynamic Sensorium Competition for predicting large-scale mouse primary visual cortex activity from videos},\nauthor={Polina Turishcheva and Paul G. Fahey and Michaela Vystr{\\v{c}}ilov{\\'a} and Laura Hansel and Rachel E Froebe and Kayla Ponder and Yongrong Qiu and Konstantin Friedrich Willeke and Mohammad Bashiri and Ruslan Baikulov and Yu Zhu and Lei Ma and Shan Yu and Tiejun Huang and Bryan M. Li and Wolf De Wulf and Nina Kudryashova and Matthias H. Hennig and Nathalie Rochefort and Arno Onken and Eric Wang and Zhiwei Ding and Andreas S. Tolias and Fabian H. Sinz and Alexander S Ecker},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=gViJjwRUlM}\n}", "github": "", "reviewers": "XZTg;H4Rf;d2mU", "pdf_size": 2523437, "rating": "7;7;8", "confidence": "4;4;4", "wc_summary_and_contributions": "41;85;64", "wc_strengths": "150;24;25", "wc_improvement": "251;15;23", "wc_limitations": "1;19;12", "wc_correctness": "1;14;1", "wc_clarity": "1;5;1", "wc_relation_to_prior_work": "1;23;1", "wc_documentation": "21;22;1", "wc_additional_feedback": "1;1;1", "wc_review": "468;208;129", "wc_reply_reviewers": "122;0;4", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;0;1", "reply_authors": "2;1;1", "rating_avg": [ 7.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "wc_summary_and_contributions_avg": [ 63.333333333333336, 17.96910929592474 ], "wc_strengths_avg": [ 66.33333333333333, 59.162675921751735 ], "wc_improvement_avg": [ 96.33333333333333, 109.41460394095277 ], "wc_limitations_avg": [ 10.666666666666666, 7.408703590297622 ], "wc_correctness_avg": [ 5.333333333333333, 6.128258770283412 ], "wc_clarity_avg": [ 2.3333333333333335, 1.8856180831641267 ], "wc_relation_to_prior_work_avg": [ 8.333333333333334, 10.370899457402697 ], "wc_documentation_avg": [ 14.666666666666666, 9.672412085697939 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 268.3333333333333, 144.8224967177253 ], "wc_reply_reviewers_avg": [ 42.0, 56.592107812544555 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 25, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11680072027991301102&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "email": "uni-goettingen.de;;uni-goettingen.de;;bcm.edu;bcm.edu;;uni-tuebingen.de;noselab.com;mpei.ru;ia.ac.cn;baai.ac.cn;ia.ac.cn;pku.edu.cn;turing.ac.uk;inf.ed.ac.uk;ed.ac.uk;ed.ac.uk;ed.ac.uk;ed.ac.uk;bcm.edu;;bcm.edu;bcm.edu;ds.mpg.de", "author_num": 25, "aff_unique_index": "0;0;1;1;2;3;4;5;6;5;7;8;9;9;9;9;9;1;1;1;10", "aff_unique_norm": "Georg-August Universit\u00e4t G\u00f6ttingen;Baylor College of Medicine;University of Tuebingen;Noselab;Moscow Power Engineering Institute;Chinese Academy of Sciences;Beijing Academy of Artificial Intelligence;Peking University;Alan Turing Institute;University of Edinburgh;Max Planck Institute for Dynamics and Self-Organization", "aff_unique_dep": ";;;;;Institute of Automation;;;;;", "aff_unique_url": "https://www.uni-goettingen.de;https://www.bcm.edu;https://www.uni-tuebingen.de/;;http://www.mpei.ru;http://www.ia.cas.cn;https://www.baaic.cn;http://www.pku.edu.cn;https://www.turing.ac.uk;https://www.ed.ac.uk;https://www.mpids.org", "aff_unique_abbr": "GAU;BCM;Uni T\u00fcbingen;;MPEI;CAS;BAAI;Peking U;ATI;Edinburgh;MPIDS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1;0;3;4;4;4;4;5;5;5;5;5;5;1;1;1;0", "aff_country_unique": "Germany;United States;;Russian Federation;China;United Kingdom" }, { "title": "Gene-Gene Relationship Modeling Based on Genetic Evidence for Single-Cell RNA-Seq Data Imputation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94136", "id": "gW0znG5JCG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=gW0znG5JCG", "openreview": "https://openreview.net/forum?id=gW0znG5JCG", "poster": "/media/PosterPDFs/NeurIPS%202024/94136.png?t=1732189107.532392", "project": "", "author_site": "Daeho Um, Ji Won Yoon, Seong Jin Ahn, Yunha Yeo", "tldr": "", "abstract": "Single-cell RNA sequencing (scRNA-seq) technologies enable the exploration of cellular heterogeneity and facilitate the construction of cell atlases. However, scRNA-seq data often contain a large portion of missing values (false zeros) or noisy values, hindering downstream analyses. To recover these false zeros, propagation-based imputation methods have been proposed using $k$-NN graphs. However they model only associating relationships among genes within a cell, while, according to well-known genetic evidence, there are both associating and dissociating relationships among genes. To apply this genetic evidence to gene-gene relationship modeling, this paper proposes a novel imputation method that newly employs dissociating relationships in addition to associating relationships. Our method constructs a $k$-NN graph to additionally model dissociating relationships via the negation of a given cell-gene matrix. Moreover, our method standardizes the value distribution (mean and variance) of each gene to have standard distributions regardless of the gene. Through extensive experiments, we demonstrate that the proposed method achieves exceptional performance gains over state-of-the-art methods in both cell clustering and gene expression recovery across six scRNA-seq datasets, validating the significance of using complete gene-gene relationships in accordance with genetic evidence. The source code is available at https://github.com/daehoum1/scCR.", "keywords": "scRNA-seq;imputation;bioinformatics", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "", "author": "Daeho Um;Ji Won Yoon;Seong Jin Ahn;Yunha Yeo", "authorids": "~Daeho_Um1;~Ji_Won_Yoon3;~Seong_Jin_Ahn1;~Yunha_Yeo1", "gender": ";F;M;F", "homepage": ";;https://github.com/SeongJinAhn;", "dblp": ";;12/3610-2;", "google_scholar": ";-3sBeKoAAAAJ;JWggrigAAAAJ;", "orcid": ";0000-0001-8631-4489;;", "linkedin": ";;;serinahyeo/", "or_profile": "~Daeho_Um1;~Ji_Won_Yoon3;~Seong_Jin_Ahn1;~Yunha_Yeo1", "aff": ";Seoul National University;Korea Advanced Institute of Science & Technology;Korea University", "aff_domain": ";snu.ac.kr;kaist.edu;korea.ac.kr", "position": ";PhD student;PhD student;MS student", "bibtex": "@inproceedings{\num2024genegene,\ntitle={Gene-Gene Relationship Modeling Based on Genetic Evidence for Single-Cell {RNA}-Seq Data Imputation},\nauthor={Daeho Um and Ji Won Yoon and Seong Jin Ahn and Yunha Yeo},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=gW0znG5JCG}\n}", "github": "", "reviewers": "49oE;NnmD;wZRo", "pdf_size": 4963583, "rating": "4;6;7", "confidence": "4;4;3", "soundness": "3;2;3", "novelty": "2;3;3", "presentation": "2;3;3", "wc_summary": "53;299;78", "wc_strengths": "78;103;69", "wc_weaknesses": "64;84;70", "wc_questions": "26;28;74", "wc_limitations": "1;1;1", "wc_review": "222;515;292", "wc_reply_reviewers": "0;21;0", "wc_reply_authors": "303;114;260", "reply_reviewers": "0;1;0", "reply_authors": "3;2;3", "rating_avg": [ 5.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 143.33333333333334, 110.54511396810908 ], "wc_strengths_avg": [ 83.33333333333333, 14.383632673594278 ], "wc_weaknesses_avg": [ 72.66666666666667, 8.379870059984357 ], "wc_questions_avg": [ 42.666666666666664, 22.17105219775452 ], "wc_limitations_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 343.0, 124.93464958395916 ], "wc_reply_reviewers_avg": [ 7.0, 9.899494936611665 ], "wc_reply_authors_avg": [ 225.66666666666666, 80.88812575952596 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.7559289460184545, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10912902207974071518&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": ";snu.ac.kr;kaist.edu;korea.ac.kr", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "Seoul National University;Korea Advanced Institute of Science and Technology;Korea University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.snu.ac.kr;https://www.kaist.ac.kr;https://www.korea.ac.kr", "aff_unique_abbr": "SNU;KAIST;KU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "Rough Transformers: Lightweight and Continuous Time Series Modelling through Signature Patching", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94135", "id": "gXWmhzeVmh", "proceeding": "", "pdf": "https://openreview.net/pdf?id=gXWmhzeVmh", "openreview": "https://openreview.net/forum?id=gXWmhzeVmh", "poster": "/media/PosterPDFs/NeurIPS%202024/94135.png?t=1733324238.262192", "project": "", "author_site": "Fernando Moreno-Pino, Alvaro Arroyo, Harrison Waldon, Xiaowen Dong, Alvaro Cartea", "tldr": "", "abstract": "Time-series data in real-world settings typically exhibit long-range dependencies and are observed at non-uniform intervals. In these settings, traditional sequence-based recurrent models struggle. To overcome this, researchers often replace recurrent models with Neural ODE-based architectures to account for irregularly sampled data and use Transformer-based architectures to account for long-range dependencies. Despite the success of these two approaches, both incur very high computational costs for input sequences of even moderate length. To address this challenge, we introduce the Rough Transformer, a variation of the Transformer model that operates on continuous-time representations of input sequences and incurs significantly lower computational costs. In particular, we propose multi-view signature attention, which uses path signatures to augment vanilla attention and to capture both local and global (multi-scale) dependencies in the input data, while remaining robust to changes in the sequence length and sampling frequency and yielding improved spatial processing. We find that, on a variety of time-series-related tasks, Rough Transformers consistently outperform their vanilla attention counterparts while obtaining the representational benefits of Neural ODE-based models, all at a fraction of the computational time and memory resources.", "keywords": "efficient time series modelling;efficient sequence modelling;continuous-time temporal modelling;patching;self-attention;transformers;computational efficiency;spatial processing", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/75eb45051f6e8229ace1583c829e5331b1739f8a.zip", "author": "Fernando Moreno-Pino;Alvaro Arroyo;Harrison Waldon;Xiaowen Dong;Alvaro Cartea", "authorids": "~Fernando_Moreno-Pino1;~Alvaro_Arroyo1;~Harrison_Waldon1;~Xiaowen_Dong1;~Alvaro_Cartea1", "gender": "M;M;;;", "homepage": "https://fmorenopino.github.io;https://scholar.google.co.uk/citations?user=P1qHzNYAAAAJ&hl=en;https://oxford-man.ox.ac.uk/who-we-are/people/;https://web.media.mit.edu/~xdong/;", "dblp": "285/5886;;;91/9827-1;", "google_scholar": "20lxrFsAAAAJ;;;_8tUq8kAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-3654-9236;;;;", "linkedin": "fernando-moreno-pino/;;;;", "or_profile": "~Fernando_Moreno-Pino1;~Alvaro_Arroyo1;~Harrison_Waldon1;~Xiaowen_Dong1;~Alvaro_Cartea1", "aff": "University of Oxford;University of Oxford;;Massachusetts Institute of Technology;University of Oxford", "aff_domain": "oxford.ac.uk;ox.ac.uk;;mit.edu;oxford.ac.uk", "position": "Postdoc;PhD student;;Research Affiliate;Full Professor", "bibtex": "@inproceedings{\nmoreno-pino2024rough,\ntitle={Rough Transformers: Lightweight Continuous-Time Sequence Modelling with Path Signatures},\nauthor={Fernando Moreno-Pino and Alvaro Arroyo and Harrison Waldon and Xiaowen Dong and Alvaro Cartea},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=gXWmhzeVmh}\n}", "github": "", "reviewers": "hdr3;tNoo;wNkv", "pdf_size": 1301636, "rating": "4;6;7", "confidence": "3;4;4", "soundness": "2;3;2", "novelty": "2;2;3", "presentation": "2;3;3", "wc_summary": "57;81;82", "wc_strengths": "25;73;64", "wc_weaknesses": "157;382;293", "wc_questions": "65;42;164", "wc_limitations": "1;7;10", "wc_review": "305;585;613", "wc_reply_reviewers": "0;31;27", "wc_reply_authors": "0;15;40", "reply_reviewers": "0;1;1", "reply_authors": "1;2;2", "rating_avg": [ 5.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 73.33333333333333, 11.55662388223981 ], "wc_strengths_avg": [ 54.0, 20.83266665599966 ], "wc_weaknesses_avg": [ 277.3333333333333, 92.52146898002766 ], "wc_questions_avg": [ 90.33333333333333, 52.92972279877871 ], "wc_limitations_avg": [ 6.0, 3.7416573867739413 ], "wc_review_avg": [ 501.0, 139.06353464034584 ], "wc_reply_reviewers_avg": [ 19.333333333333332, 13.767917618708921 ], "wc_reply_authors_avg": [ 18.333333333333332, 16.49915822768611 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.9449111825230683, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5916677363372826078&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "oxford.ac.uk;ox.ac.uk;;mit.edu;oxford.ac.uk", "author_num": 5, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Oxford;Massachusetts Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.ox.ac.uk;https://web.mit.edu", "aff_unique_abbr": "Oxford;MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United Kingdom;United States" }, { "title": "Towards Unsupervised Model Selection for Domain Adaptive Object Detection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94134", "id": "gYa94o5Gmq", "proceeding": "", "pdf": "https://openreview.net/pdf?id=gYa94o5Gmq", "openreview": "https://openreview.net/forum?id=gYa94o5Gmq", "poster": "/media/PosterPDFs/NeurIPS%202024/94134.png?t=1731745523.7068343", "project": "", "author_site": "Hengfu Yu, Jinhong Deng, Wen Li, Lixin Duan", "tldr": "", "abstract": "Evaluating the performance of deep models in new scenarios has drawn increasing attention in recent years due to the wide application of deep learning techniques in various fields. However, while it is possible to collect data from new scenarios, the annotations are not always available. Existing Domain Adaptive Object Detection (DAOD) works usually report their performance by selecting the best model on the validation set or even the test set of the target domain, which is highly impractical in real-world applications. In this paper, we propose a novel unsupervised model selection approach for domain adaptive object detection, which is able to select almost the optimal model for the target domain without using any target labels. Our approach is based on the flat minima principle, i.e., models located in the flat minima region in the parameter space usually exhibit excellent generalization ability. However, traditional methods require labeled data to evaluate how well a model is located in the flat minima region, which is unrealistic for the DAOD task. Therefore, we design a Detection Adaptation Score (DAS) approach to approximately measure the flat minima without using target labels. We show via a generalization bound that the flatness can be deemed as model variance, while the minima depend on the domain distribution distance for the DAOD task. Accordingly, we propose a Flatness Index Score (FIS) to assess the flatness by measuring the classification and localization fluctuation before and after perturbations of model parameters and a Prototypical Distance Ratio (PDR) score to seek the minima by measuring the transferability and discriminability of the models. In this way, the proposed DAS approach can effectively represent the degree of flat minima and evaluate the model generalization ability on the target domain. We have conducted extensive experiments on various DAOD benchmarks and approaches, and the experimental results show that the proposed DAS correlates well with the performance of DAOD models and can be used as an effective tool for model selection after training. The code will be released at https://github.com/HenryYu23/DAS.", "keywords": "Domain Adaptive Object Detection;Unsupervised Model Selection", "primary_area": "machine_vision", "supplementary_material": "", "author": "Hengfu Yu;Jinhong Deng;Wen Li;Lixin Duan", "authorids": "~Hengfu_Yu1;~Jinhong_Deng1;~Wen_Li2;~Lixin_Duan1", "gender": "M;M;M;M", "homepage": "https://github.com/HenryYu23;https://jhdeng-vision.github.io/;http://wenli-vision.github.io;http://lxduan.info/", "dblp": ";260/0959;06/721-1;54/7057.html", "google_scholar": ";XrtJ8mEAAAAJ;https://scholar.google.ch/citations?user=yjG4Eg4AAAAJ;inRIcS0AAAAJ", "orcid": ";;0000-0002-5559-8594;0000-0002-0723-4016", "linkedin": ";;;lxduan/", "or_profile": "~Hengfu_Yu1;~Jinhong_Deng1;~Wen_Li2;~Lixin_Duan1", "aff": "University of Electronic Science and Technology of China;University of Electronic Science and Technology of China;University of Electronic Science and Technology of China;University of Electronic Science and Technology of China", "aff_domain": "uestc.edu.cn;uestc.edu.cn;uestc.edu.cn;uestc.edu.cn", "position": "PhD student;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nyu2024towards,\ntitle={Towards Unsupervised Model Selection for Domain Adaptive Object Detection},\nauthor={Hengfu Yu and Jinhong Deng and Wen Li and Lixin Duan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=gYa94o5Gmq}\n}", "github": "", "reviewers": "9Lzv;jxDH;ood2;XtC2;4YNt", "pdf_size": 1015019, "rating": "3;5;5;7;8", "confidence": "4;5;4;5;4", "soundness": "2;2;3;3;3", "novelty": "2;3;3;3;3", "presentation": "3;3;3;3;3", "wc_summary": "51;139;83;98;75", "wc_strengths": "34;112;28;78;92", "wc_weaknesses": "158;312;229;46;70", "wc_questions": "31;50;7;231;105", "wc_limitations": "38;7;4;5;10", "wc_review": "312;620;351;458;352", "wc_reply_reviewers": "286;24;26;44;24", "wc_reply_authors": "565;118;129;33;29", "reply_reviewers": "2;1;1;1;1", "reply_authors": "5;3;3;2;2", "rating_avg": [ 5.6, 1.7435595774162693 ], "confidence_avg": [ 4.4, 0.48989794855663565 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 89.2, 29.178073959739017 ], "wc_strengths_avg": [ 68.8, 32.756068140117186 ], "wc_weaknesses_avg": [ 163.0, 98.91410415102591 ], "wc_questions_avg": [ 84.8, 79.92596574330521 ], "wc_limitations_avg": [ 12.8, 12.765578717786358 ], "wc_review_avg": [ 418.6, 111.78837148827243 ], "wc_reply_reviewers_avg": [ 80.8, 102.87545868670524 ], "wc_reply_authors_avg": [ 174.8, 199.4717022537282 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 3.0, 1.0954451150103321 ], "replies_avg": [ 31, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.18731716231633877, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:6OXqvB8xF5UJ:scholar.google.com/&scioq=Towards+Unsupervised+Model+Selection+for+Domain+Adaptive+Object+Detection&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "uestc.edu.cn;uestc.edu.cn;uestc.edu.cn;uestc.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Electronic Science and Technology of China", "aff_unique_dep": "", "aff_unique_url": "https://www.uestc.edu.cn", "aff_unique_abbr": "UESTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Diffeomorphic interpolation for efficient persistence-based topological optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94133", "id": "gYjM1BZzdX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=gYjM1BZzdX", "openreview": "https://openreview.net/forum?id=gYjM1BZzdX", "poster": "/media/PosterPDFs/NeurIPS%202024/94133.png?t=1730367068.0220041", "project": "", "author_site": "Mathieu Carri\u00e8re, Marc Theveneau, Th\u00e9o Lacombe", "tldr": "", "abstract": "Topological Data Analysis (TDA) provides a pipeline to extract quantitative and powerful topological descriptors from structured objects. \nThis enables the definition of topological loss functions, which assert to which extent a given object exhibits some topological properties. \nOne can then use these losses to perform topological optimization via gradient descent routines. \nWhile theoretically sounded, topological optimization faces an important challenge: gradients tend to be extremely sparse, in the sense that the loss function typically depends (locally) on only very few coordinates of the input object, yielding dramatically slow optimization schemes in practice. \n\nIn this work, focusing on the central case of topological optimization for point clouds, we propose to overcome this limitation using diffeomorphic interpolation, turning sparse gradients into smooth vector fields defined on the whole space. \nIn particular, this approach combines efficiently with subsampling techniques routinely used in TDA, as the diffeomorphism derived from the gradient computed on the subsample can be used to update the coordinates of the full and possibly large input object. We then illustrate the usefulness of our approach on black-box autoencoder (AE) regularization, where we aim at applying some topological priors on the latent spaces associated to fixed, black-box AE models without modifying their (unknown) architectures and parameters. We empirically show that, while vanilla topological optimization has to be re-run every time that new data comes out of the black-box models, learning a diffeomorphic flow can be done once and then re-applied to new data in linear time. Moreover, reverting the flow allows us to generate data by sampling the topologically-optimized latent space directly, allowing for better interpretability of the model.", "keywords": "Persistent Homology;Persistence Diagrams;Optimization;Topological Data Analysis", "primary_area": "optimization", "supplementary_material": "/attachment/7921d934b34940599d464d204696fb7c3d582470.zip", "author": "Mathieu Carri\u00e8re;Marc Theveneau;Th\u00e9o Lacombe", "authorids": "~Mathieu_Carri\u00e8re1;~Marc_Theveneau1;~Th\u00e9o_Lacombe1", "gender": ";M;M", "homepage": "https://mathieucarriere.github.io/website/;;https://tlacombe.github.io", "dblp": "167/1015;;220/5549", "google_scholar": ";;https://scholar.google.fr/citations?user=e9Xx4ZEAAAAJ", "orcid": ";;", "linkedin": ";marc-theveneau-aa0746206/;", "or_profile": "~Mathieu_Carri\u00e8re1;~Marc_Theveneau1;~Th\u00e9o_Lacombe1", "aff": "INRIA;McGill University;Universit\u00e9 Gustave Eiffel", "aff_domain": "inria.fr;mcgill.ca;univ-eiffel.fr", "position": "Researcher;PhD student;Lecturer", "bibtex": "@inproceedings{\ncarri{\\`e}re2024diffeomorphic,\ntitle={Diffeomorphic interpolation for efficient persistence-based topological optimization},\nauthor={Mathieu Carri{\\`e}re and Marc Theveneau and Th{\\'e}o Lacombe},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=gYjM1BZzdX}\n}", "github": "", "reviewers": "uH7P;v7wD;GJy4;fiZD", "pdf_size": 2763204, "rating": "7;7;7;7", "confidence": "4;4;3;4", "soundness": "3;3;4;3", "novelty": "3;3;4;3", "presentation": "3;3;3;3", "wc_summary": "111;111;35;121", "wc_strengths": "55;112;28;74", "wc_weaknesses": "74;210;12;177", "wc_questions": "44;428;274;9", "wc_limitations": "2;57;1;6", "wc_review": "286;918;350;387", "wc_reply_reviewers": "34;402;24;23", "wc_reply_authors": "0;414;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 94.5, 34.59407463713981 ], "wc_strengths_avg": [ 67.25, 30.572659354397025 ], "wc_weaknesses_avg": [ 118.25, 79.24132444627614 ], "wc_questions_avg": [ 188.75, 171.5887161208452 ], "wc_limitations_avg": [ 16.5, 23.4574082114798 ], "wc_review_avg": [ 485.25, 252.44739551043105 ], "wc_reply_reviewers_avg": [ 120.75, 162.43671844752345 ], "wc_reply_authors_avg": [ 103.5, 179.2672585833788 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7920032710468906743&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "inria.fr;mcgill.ca;univ-eiffel.fr", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "INRIA;McGill University;Universit\u00e9 Gustave Eiffel", "aff_unique_dep": ";;", "aff_unique_url": "https://www.inria.fr;https://www.mcgill.ca;https://www.univ-gustave-eiffel.fr", "aff_unique_abbr": "INRIA;McGill;UGE", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "France;Canada" }, { "title": "TALoS: Enhancing Semantic Scene Completion via Test-time Adaptation on the Line of Sight", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94132", "id": "gZWYdJ3c26", "proceeding": "", "pdf": "https://openreview.net/pdf?id=gZWYdJ3c26", "openreview": "https://openreview.net/forum?id=gZWYdJ3c26", "poster": "/media/PosterPDFs/NeurIPS%202024/94132.png?t=1730356651.0604572", "project": "", "author_site": "Hyun-Kurl Jang, Jihun Kim, Hyeokjun Kweon, Kuk-Jin Yoon", "tldr": "", "abstract": "Semantic Scene Completion (SSC) aims to perform geometric completion and semantic segmentation simultaneously. Despite the promising results achieved by existing studies, the inherently ill-posed nature of the task presents significant challenges in diverse driving scenarios. This paper introduces TALoS, a novel test-time adaptation approach for SSC that excavates the information available in driving environments. Specifically, we focus on that observations made at a certain moment can serve as Ground Truth (GT) for scene completion at another moment. Given the characteristics of the LiDAR sensor, an observation of an object at a certain location confirms both 1) the occupation of that location and 2) the absence of obstacles along the line of sight from the LiDAR to that point. TALoS utilizes these observations to obtain self-supervision about occupancy and emptiness, guiding the model to adapt to the scene in test time. In a similar manner, we aggregate reliable SSC predictions among multiple moments and leverage them as semantic pseudo-GT for adaptation. Further, to leverage future observations that are not accessible at the current time, we present a dual optimization scheme using the model in which the update is delayed until the future observation is available. Evaluations on the SemanticKITTI validation and test sets demonstrate that TALoS significantly improves the performance of the pre-trained SSC model.", "keywords": "semantic scene completion;test-time adaptation;point cloud", "primary_area": "machine_vision", "supplementary_material": "", "author": "Hyun-Kurl Jang;Jihun Kim;Hyeokjun Kweon;Kuk-Jin Yoon", "authorids": "~Hyun-Kurl_Jang1;~Jihun_Kim2;~Hyeokjun_Kweon1;~Kuk-Jin_Yoon1", "gender": "M;M;M;M", "homepage": "https://blue-531.github.io/;https://jihun1998.github.io/;https://sangrockeg.github.io/;", "dblp": "355/1755;;308/6809;42/5677", "google_scholar": "https://scholar.google.com/citations?hl=ko;;em3aymgAAAAJ;1NvBj_gAAAAJ", "orcid": ";0009-0007-8764-195X;;", "linkedin": "janghk531;jihun1998/;;", "or_profile": "~Hyun-Kurl_Jang1;~Jihun_Kim2;~Hyeokjun_Kweon1;~Kuk-Jin_Yoon1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.edu;kaist.edu;kaist.ac.kr;kaist.ac.kr", "position": "PhD student;PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\njang2024talos,\ntitle={{TAL}oS: Enhancing Semantic Scene Completion via Test-time Adaptation on the Line of Sight},\nauthor={Hyun-Kurl Jang and Jihun Kim and Hyeokjun Kweon and Kuk-Jin Yoon},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=gZWYdJ3c26}\n}", "github": "", "reviewers": "9T82;xDnF;MQ39", "pdf_size": 782786, "rating": "6;6;6", "confidence": "5;3;4", "soundness": "3;2;3", "novelty": "3;2;3", "presentation": "3;3;3", "wc_summary": "97;108;94", "wc_strengths": "99;27;205", "wc_weaknesses": "625;195;143", "wc_questions": "74;98;12", "wc_limitations": "318;6;14", "wc_review": "1213;434;468", "wc_reply_reviewers": "28;286;177", "wc_reply_authors": "20;612;487", "reply_reviewers": "1;2;2", "reply_authors": "2;3;3", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 99.66666666666667, 6.018490028422597 ], "wc_strengths_avg": [ 110.33333333333333, 73.10874700669468 ], "wc_weaknesses_avg": [ 321.0, 216.00617275130512 ], "wc_questions_avg": [ 61.333333333333336, 36.23380864453651 ], "wc_limitations_avg": [ 112.66666666666667, 145.22932057802774 ], "wc_review_avg": [ 705.0, 359.47832572585884 ], "wc_reply_reviewers_avg": [ 163.66666666666666, 105.74917914049684 ], "wc_reply_authors_avg": [ 373.0, 254.77179331053637 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17394425119337138546&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "kaist.edu;kaist.edu;kaist.ac.kr;kaist.ac.kr", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Slice-100K: A Multimodal Dataset for Extrusion-based 3D Printing", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97577", "id": "gad19kaPzb", "proceeding": "", "pdf": "https://openreview.net/pdf?id=gad19kaPzb", "openreview": "https://openreview.net/forum?id=gad19kaPzb", "poster": "/media/PosterPDFs/NeurIPS%202024/97577.png?t=1733461564.2391806", "project": "", "author_site": "Anushrut Nirmal Jignasu, Kelly Marshall, Ankush Kumar Mishra, Lucas Nerone Rillo, Baskar Ganapathysubramanian, Aditya Balu, Chinmay Hegde, Adarsh Krishnamurthy", "tldr": "", "abstract": "G-code (Geometric code) or RS-274 is the most widely used computer numerical control (CNC) and 3D printing programming language. G-code provides machine instructions for the movement of the 3D printer, especially for the nozzle, stage, and extrusion of material for extrusion-based additive manufacturing. Currently, there does not exist a large repository of curated CAD models along with their corresponding G-code files for additive manufacturing. To address this issue, we present Slice-100K, a first-of-its-kind dataset of over 100,000 G-code files, along with their tessellated CAD model, LVIS (Large Vocabulary Instance Segmentation) categories, geometric properties, and renderings. We build our dataset from triangulated meshes derived from Objaverse-XL and Thingi10K datasets. We demonstrate the utility of this dataset by finetuning GPT-2 on a subset of the dataset for G-code translation from a legacy G-code format (Sailfish) to a more modern, widely used format (Marlin). Our dataset can be found here. Slice-100K will be the first step in developing a multimodal foundation model for digital manufacturing.", "keywords": "3D;G-code translation;Large Language Model;3D Printing;Multimodal", "primary_area": "", "supplementary_material": "/attachment/ebce731e5ee6ac0f8cb42b360bd08c3beb0ce455.pdf", "author": "Anushrut Jignasu;Kelly O. Marshall;Ankush Kumar Mishra;Lucas Nerone Rillo;Baskar Ganapathysubramanian;Aditya Balu;Chinmay Hegde;Adarsh Krishnamurthy", "authorids": "~Anushrut_Jignasu1;~Kelly_O._Marshall1;~Ankush_Kumar_Mishra1;~Lucas_Nerone_Rillo1;~Baskar_Ganapathysubramanian1;~Aditya_Balu1;~Chinmay_Hegde1;~Adarsh_Krishnamurthy1", "gender": "M;M;M;M;M;M;M;M", "homepage": "https://ajignasu.github.io/;;;;;;https://chinmayhegde.github.io/;https://web.me.iastate.edu/idealab/p-krishnamurthy.html", "dblp": "332/4850;;;;;192/1502;39/2056;", "google_scholar": "Gb_YM5oAAAAJ;;GudYxOIAAAAJ;;R1JIs4cAAAAJ;;eJAV17IAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0001-6931-4532;;;;;", "linkedin": "anushrutjignasu/;kelly-marshall-031947151/;ankush-kumar-mishra-b5bb47111/;lucas-rillo/;baskar-ganapathysubramanian-5b22a51a6/?original_referer=;;;", "or_profile": "~Anushrut_Jignasu1;~Kelly_O._Marshall1;~Ankush_Kumar_Mishra1;~Lucas_Nerone_Rillo1;~Baskar_Ganapathysubramanian1;~Aditya_Balu1;~Chinmay_Hegde1;~Adarsh_Krishnamurthy1", "aff": "Iowa State University;New York University;Iowa State University;Iowa State University;Iowa State University;Iowa State University;New York University;Iowa State University", "aff_domain": "iastate.edu;nyu.edu;iastate.edu;iastate.edu;iastate.edu;iastate.edu;nyu.edu;iastate.edu", "position": "PhD student;PhD student;PhD student;Undergrad student;Professor;Researcher;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\njignasu2024slicek,\ntitle={Slice-100K: A Multimodal Dataset for Extrusion-based 3D Printing},\nauthor={Anushrut Jignasu and Kelly O. Marshall and Ankush Kumar Mishra and Lucas Nerone Rillo and Baskar Ganapathysubramanian and Aditya Balu and Chinmay Hegde and Adarsh Krishnamurthy},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=gad19kaPzb}\n}", "github": "", "reviewers": "YCzd;YQbc;CqL5;9nHc", "pdf_size": 5481436, "rating": "6;6;7;7", "confidence": "3;3;4;3", "wc_summary_and_contributions": "38;98;67;62", "wc_strengths": "26;73;21;3", "wc_improvement": "60;59;102;39", "wc_limitations": "1;40;1;1", "wc_correctness": "1;29;1;1", "wc_clarity": "1;47;1;1", "wc_relation_to_prior_work": "1;45;1;1", "wc_documentation": "1;81;1;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "130;473;196;110", "wc_reply_reviewers": "0;4;5;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 66.25, 21.358546298847212 ], "wc_strengths_avg": [ 30.75, 25.849323008543184 ], "wc_improvement_avg": [ 65.0, 22.94558781116753 ], "wc_limitations_avg": [ 10.75, 16.887495373796554 ], "wc_correctness_avg": [ 8.0, 12.12435565298214 ], "wc_clarity_avg": [ 12.5, 19.91858428704209 ], "wc_relation_to_prior_work_avg": [ 12.0, 19.05255888325765 ], "wc_documentation_avg": [ 21.0, 34.64101615137755 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 227.25, 145.40869128081718 ], "wc_reply_reviewers_avg": [ 2.25, 2.277608394786075 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:api9S2dTre0J:scholar.google.com/&scioq=Slice-100K:+A+Multimodal+Dataset+for+Extrusion-based+3D+Printing&hl=en&as_sdt=0,22", "gs_version_total": 8, "email": "iastate.edu;nyu.edu;iastate.edu;iastate.edu;iastate.edu;iastate.edu;nyu.edu;iastate.edu", "author_num": 8, "aff_unique_index": "0;1;0;0;0;0;1;0", "aff_unique_norm": "Iowa State University;New York University", "aff_unique_dep": ";", "aff_unique_url": "https://www.iastate.edu;https://www.nyu.edu", "aff_unique_abbr": "ISU;NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Quasi-Bayes meets Vines", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94131", "id": "gcpeEg88R3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=gcpeEg88R3", "openreview": "https://openreview.net/forum?id=gcpeEg88R3", "poster": "/media/PosterPDFs/NeurIPS%202024/94131.png?t=1732728770.4311583", "project": "", "author_site": "David Huk, Yuanhe Zhang, Ritabrata Dutta, Mark Steel", "tldr": "", "abstract": "Recently developed quasi-Bayesian (QB) methods \\cite{fong2023martingale} proposed a stimulating change of paradigm in Bayesian computation by directly constructing the Bayesian predictive distribution through recursion, removing the need for expensive computations involved in sampling the Bayesian posterior distribution. This has proved to be data-efficient for univariate predictions, however, existing constructions for higher dimensional densities are only possible by relying on restrictive assumptions on the model's multivariate structure. Here, we propose a wholly different approach to extend Quasi-Bayesian prediction to high dimensions through the use of Sklar's theorem, by decomposing the predictive distribution into one-dimensional predictive marginals and a high-dimensional copula. We use the efficient recursive QB construction for the one-dimensional marginals and model the dependence using highly expressive vine copulas. Further, we tune hyperparameters using robust divergences (eg. energy score) and show that our proposed Quasi-Bayesian Vine (QB-Vine) is a fully non-parametric density estimator with \\emph{an analytical form} and convergence rate independent of the dimension of the data in some situations. Our experiments illustrate that the QB-Vine is appropriate for high dimensional distributions ($\\sim$64), needs very few samples to train ($\\sim$200) and outperforms state-of-the-art methods with analytical forms for density estimation and supervised tasks by a considerable margin.", "keywords": "Quasi-Bayesian;Copula;Vine Copula;Nonparametric Bayesian;density estimation", "primary_area": "probabilistic_methods", "supplementary_material": "/attachment/0961dee3598506f4add8c8f0d55c09d8b8f08647.zip", "author": "David Huk;Yuanhe Zhang;Ritabrata Dutta;Mark Steel", "authorids": "~David_Huk1;~Yuanhe_Zhang1;~Ritabrata_Dutta1;~Mark_Steel1", "gender": ";M;;M", "homepage": ";https://warwick.ac.uk/fac/sci/statistics/staff/research_students/yuanhezhang/;https://warwick.ac.uk/fac/sci/statistics/staff/academic-research/dutta/;https://warwick.ac.uk/fac/sci/statistics/staff/academic-research/steel/", "dblp": ";;;", "google_scholar": ";;;METrs9QAAAAJ", "orcid": ";;;0000-0001-9858-9279", "linkedin": ";yuanhe-zhang;;", "or_profile": "~David_Huk1;~Yuanhe_Zhang1;~Ritabrata_Dutta1;~Mark_Steel1", "aff": ";University of Warwick;The university of Warwick;University of Warwick", "aff_domain": ";warwick.ac.uk;warwick.ac.uk;warwick.ac.uk", "position": ";Undergrad student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nhuk2024quasibayes,\ntitle={Quasi-Bayes meets Vines},\nauthor={David Huk and Yuanhe Zhang and Ritabrata Dutta and Mark Steel},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=gcpeEg88R3}\n}", "github": "", "reviewers": "qqFA;iVqh;2HTp;QJH9", "pdf_size": 1742667, "rating": "5;5;6;7", "confidence": "4;3;4;2", "soundness": "3;2;3;3", "novelty": "3;2;2;3", "presentation": "2;3;3;3", "wc_summary": "53;116;96;75", "wc_strengths": "53;163;28;69", "wc_weaknesses": "1119;238;11;37", "wc_questions": "56;174;62;119", "wc_limitations": "12;1;2;12", "wc_review": "1293;692;199;312", "wc_reply_reviewers": "57;0;19;17", "wc_reply_authors": "777;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "3;1;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 85.0, 23.484037131634757 ], "wc_strengths_avg": [ 78.25, 51.06552163642314 ], "wc_weaknesses_avg": [ 351.25, 451.8818291323518 ], "wc_questions_avg": [ 102.75, 47.92376758978784 ], "wc_limitations_avg": [ 6.75, 5.261891294962297 ], "wc_review_avg": [ 624.0, 427.2452457313013 ], "wc_reply_reviewers_avg": [ 23.25, 20.837166314064877 ], "wc_reply_authors_avg": [ 194.25, 336.4508693702544 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.6363636363636364, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2912057260320973925&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": ";warwick.ac.uk;warwick.ac.uk;warwick.ac.uk", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Warwick", "aff_unique_dep": "", "aff_unique_url": "https://www.warwick.ac.uk", "aff_unique_abbr": "Warwick", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Achieving Optimal Clustering in Gaussian Mixture Models with Anisotropic Covariance Structures", "status": "Oral", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94130", "id": "ge8GZn8Gtu", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ge8GZn8Gtu", "openreview": "https://openreview.net/forum?id=ge8GZn8Gtu", "poster": "", "project": "", "author_site": "Xin Chen, Anderson Ye Zhang", "tldr": "", "abstract": "We study clustering under anisotropic Gaussian Mixture Models (GMMs), where covariance matrices from different clusters are unknown and are not necessarily the identity matrix. We analyze two anisotropic scenarios: homogeneous, with identical covariance matrices, and heterogeneous, with distinct matrices per cluster. For these models, we derive minimax lower bounds that illustrate the critical influence of covariance structures on clustering accuracy. To solve the clustering problem, we consider a variant of Lloyd's algorithm, adapted to estimate and utilize covariance information iteratively. We prove that the adjusted algorithm not only achieves the minimax optimality but also converges within a logarithmic number of iterations, thus bridging the gap between theoretical guarantees and practical efficiency.", "keywords": "Minimax rates;Mixture model;Lloyd\u2019s algoirhtm;Clustering", "primary_area": "learning_theory", "supplementary_material": "", "author": "Xin Chen;Anderson Ye Zhang", "authorids": "~Xin_Chen29;~Anderson_Ye_Zhang1", "gender": ";", "homepage": "https://orfe.princeton.edu/people/xin-chen;", "dblp": ";", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "~Xin_Chen29;~Anderson_Ye_Zhang1", "aff": "Princeton University;", "aff_domain": "princeton.edu;", "position": "PhD student;", "bibtex": "@inproceedings{\nchen2024achieving,\ntitle={Achieving Optimal Clustering in Gaussian Mixture Models with Anisotropic Covariance Structures},\nauthor={Xin Chen and Anderson Ye Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ge8GZn8Gtu}\n}", "github": "", "reviewers": "M7TA;LL3K;yjVp", "pdf_size": 1440636, "rating": "6;8;8", "confidence": "3;4;5", "soundness": "4;4;4", "novelty": "3;4;3", "presentation": "3;3;4", "wc_summary": "49;30;59", "wc_strengths": "43;107;36", "wc_weaknesses": "74;142;518", "wc_questions": "3;22;389", "wc_limitations": "5;66;120", "wc_review": "174;367;1122", "wc_reply_reviewers": "13;0;143", "wc_reply_authors": "0;0;31", "reply_reviewers": "1;0;1", "reply_authors": "1;1;2", "rating_avg": [ 7.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 4.0, 0.0 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 46.0, 12.027745701779143 ], "wc_strengths_avg": [ 62.0, 31.94787421201396 ], "wc_weaknesses_avg": [ 244.66666666666666, 195.2593716629812 ], "wc_questions_avg": [ 138.0, 177.65322025414193 ], "wc_limitations_avg": [ 63.666666666666664, 46.97753600274166 ], "wc_review_avg": [ 554.3333333333334, 409.0609843151616 ], "wc_reply_reviewers_avg": [ 52.0, 64.56521251158914 ], "wc_reply_authors_avg": [ 10.333333333333334, 14.613540144521982 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5749172227463021304&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "princeton.edu;", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Princeton University", "aff_unique_dep": "", "aff_unique_url": "https://www.princeton.edu", "aff_unique_abbr": "Princeton", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "In-N-Out: Lifting 2D Diffusion Prior for 3D Object Removal via Tuning-Free Latents Alignment", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94129", "id": "gffaYDu9mM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=gffaYDu9mM", "openreview": "https://openreview.net/forum?id=gffaYDu9mM", "poster": "/media/PosterPDFs/NeurIPS%202024/94129.png?t=1731393278.6303225", "project": "", "author_site": "Dongting Hu, Huan Fu, Jiaxian Guo, Liuhua Peng, Tingjin Chu, Feng Liu, Tongliang Liu, Mingming Gong", "tldr": "", "abstract": "Neural representations for 3D scenes have made substantial advancements recently, yet object removal remains a challenging yet practical issue, due to the absence of multi-view supervision over occluded areas. Diffusion Models (DMs), trained on extensive 2D images, show diverse and high-fidelity generative capabilities in the 2D domain. However, due to not being specifically trained on 3D data, their application to multi-view data often exacerbates inconsistency, hence impacting the overall quality of the 3D output. To address these issues, we introduce \"In-N-Out\", a novel approach that begins by inpainting a prior, i.e., the occluded area from a single view using DMs, followed by outstretching it to create multi-view inpaintings via latents alignments. Our analysis identifies that the variability in DMs' outputs mainly arises from initially sampled latents and intermediate latents predicted in the denoising process. We explicitly align of initial latents using a Neural Radiance Field (NeRF) to establish a consistent foundational structure in the inpainted area, complemented by an implicit alignment of intermediate latents through cross-view attention during the denoising phases, enhancing appearance consistency across views. To further enhance rendering results, we apply a patch-based hybrid loss to optimize NeRF. We demonstrate that our techniques effectively mitigate the challenges posed by inconsistencies in DMs and substantially improve the fidelity and coherence of inpainted 3D representations.", "keywords": "Neural Radiance Fields;3D Inpainting;Neural Representation Editing;Diffusion Models;Object Removal", "primary_area": "machine_vision", "supplementary_material": "/attachment/f201b9e199442f4e1c3d891bb2fc92ad5d6fd75d.zip", "author": "Dongting Hu;Huan Fu;Jiaxian Guo;Liuhua Peng;Tingjin Chu;Feng Liu;Tongliang Liu;Mingming Gong", "authorids": "~Dongting_Hu1;~Huan_Fu1;~Jiaxian_Guo2;~Liuhua_Peng1;~Tingjin_Chu1;~Feng_Liu2;~Tongliang_Liu1;~Mingming_Gong1", "gender": ";M;M;M;M;M;M;M", "homepage": ";https://huan-fu.github.io/;;https://sites.google.com/view/liuhua-peng;;https://fengliu90.github.io/index.html;https://tongliang-liu.github.io/;https://mingming-gong.github.io/", "dblp": ";139/8082;206/6264;180/7474.html;;77/1318-3;150/6667;98/8479", "google_scholar": ";https://scholar.google.com/citations?hl=en;wQgPocEAAAAJ;https://scholar.google.com.au/citations?user=SstmL7wAAAAJ;;https://scholar.google.com/citations?hl=en;https://scholar.google.com.au/citations?user=EiLdZ_YAAAAJ;https://scholar.google.com.au/citations?user=6BmiCJIAAAAJ", "orcid": ";;;0000-0002-5431-8079; 0000-0001-9849-8369;0000-0002-5005-9129;;0000-0001-7147-5589", "linkedin": ";;;;;alexfengliu;;", "or_profile": "~Dongting_Hu1;~Huan_Fu1;~Jiaxian_Guo2;~Liuhua_Peng1;~Tingjin_Chu1;~Feng_Liu2;~Tongliang_Liu1;~Mingming_Gong1", "aff": ";Futurise;Google Research;University of Melbourne;University of Melbourne;University of Melbourne;Mohamed bin Zayed University of Artificial Intelligence;University of Melbourne", "aff_domain": ";agibang.ai;google.com;unimelb.edu.au;unimelb.edu.au;unimelb.edu.au;mbzuai.ac.ae;unimelb.edu.au", "position": ";Principal Researcher;Research Scientist;Associate Professor;Lecturer;Assistant Professor;Affiliated Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nhu2024innout,\ntitle={In-N-Out: Lifting 2D Diffusion Prior for 3D Object Removal via Tuning-Free Latents Alignment},\nauthor={Dongting Hu and Huan Fu and Jiaxian Guo and Liuhua Peng and Tingjin Chu and Feng Liu and Tongliang Liu and Mingming Gong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=gffaYDu9mM}\n}", "github": "", "reviewers": "uTYn;S5oa;P7tn;f6e3", "pdf_size": 32400949, "rating": "4;4;6;7", "confidence": "4;5;5;3", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "2;3;1;3", "wc_summary": "37;140;38;77", "wc_strengths": "34;31;43;126", "wc_weaknesses": "62;363;98;89", "wc_questions": "56;2;53;70", "wc_limitations": "13;20;23;1", "wc_review": "202;556;255;363", "wc_reply_reviewers": "0;0;146;52", "wc_reply_authors": "94;94;302;141", "reply_reviewers": "0;0;3;2", "reply_authors": "2;2;4;3", "rating_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 73.0, 41.91061917939175 ], "wc_strengths_avg": [ 58.5, 39.22053033807676 ], "wc_weaknesses_avg": [ 153.0, 121.96515895943398 ], "wc_questions_avg": [ 45.25, 25.781534089343868 ], "wc_limitations_avg": [ 14.25, 8.46684711093805 ], "wc_review_avg": [ 344.0, 135.4529438587438 ], "wc_reply_reviewers_avg": [ 49.5, 59.621724228673564 ], "wc_reply_authors_avg": [ 157.75, 85.46453943010516 ], "reply_reviewers_avg": [ 1.25, 1.299038105676658 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:wIsV40RLyHIJ:scholar.google.com/&scioq=In-N-Out:+Lifting+2D+Diffusion+Prior+for+3D+Object+Removal+via+Tuning-Free+Latents+Alignment&hl=en&as_sdt=0,44", "gs_version_total": 0, "email": ";agibang.ai;google.com;unimelb.edu.au;unimelb.edu.au;unimelb.edu.au;mbzuai.ac.ae;unimelb.edu.au", "author_num": 8, "aff_unique_index": "0;1;2;2;2;3;2", "aff_unique_norm": "Futurise;Google;University of Melbourne;Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": ";Google Research;;", "aff_unique_url": "https://www.futurise.sg;https://research.google;https://www.unimelb.edu.au;https://mbzuai.ac.ae", "aff_unique_abbr": ";Google Research;UniMelb;MBZUAI", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;2;2;2;3;2", "aff_country_unique": "Singapore;United States;Australia;United Arab Emirates" }, { "id": "gg3POFjqq8", "title": "Benchmarking Vision Models Under Generative Continuous Nuisance Shifts", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "One important challenge in evaluating the robustness of vision models is controlling individual nuisance factors independently.\nWhile some simple synthetic corruptions are commonly applied to existing models, they do not fully capture all realistic and relevant distribution shifts of real-world images.\nTo overcome this challenge, we apply LoRA adapters to diffusion models to realize a wide range of individual nuisance shifts in a continuous manner. \nWhile existing generative benchmarks perform manipulations in one step, we argue for gradual and continuous nuisance shifts, as they allow evaluating the sensitivity and failure points of vision models.\nWith this in mind, we perform a comprehensive large-scale study to evaluate the robustness and generalization of various classifiers under various nuisance shifts. Through carefully-designed comparisons and analysis, we reveal multiple valuable observations: 1) More modern and larger architectures trained on larger datasets tend to be more robust to various nuisance shifts and fail later for larger scales. \n2) Pre-training strategy influences the robustness and fine-tuning a CLIP classifier improves the standard accuracy but deteriorates the robustness.\n3) The accuracy drops only account for one dimension of robustness and the failure point analysis should be considered as an additional dimension for robustness evaluation.\nWe hope our continuous nuisance shift benchmark can provide a new perspective on assessing the robustness of vision models.", "keywords": "Generative models;benchmarking;computer vision", "primary_area": "", "supplementary_material": "/attachment/624684380b85e4d335fbb036076cf91b8be7c456.pdf", "author": "Olaf D\u00fcnkel;Artur Jesslen;Jiahao Xie;Christian Theobalt;Christian Rupprecht;Adam Kortylewski", "authorids": "~Olaf_D\u00fcnkel1;~Artur_Jesslen1;~Jiahao_Xie2;~Christian_Theobalt2;~Christian_Rupprecht1;~Adam_Kortylewski1", "gender": ";M;;M;M;", "homepage": ";https://arturjssln.github.io;https://jiahao000.github.io/;https://www.mpi-inf.mpg.de/~theobalt/;http://chrirupp.github.io;https://gvrl.mpi-inf.mpg.de/", "dblp": ";345/3057;217/4325-2;55/3346;https://dblp.uni-trier.de/pid/76/744-1;161/0772", "google_scholar": ";;yA9qseUAAAAJ;https://scholar.google.com.tw/citations?user=eIWg8NMAAAAJ;https://scholar.google.de/citations?user=IrYlproAAAAJ;https://scholar.google.ch/citations?user=tRLUOBIAAAAJ", "orcid": ";0000-0002-4837-8163;0000-0001-9237-2802;;;0000-0002-9146-4403", "linkedin": ";artur-jesslen/;;;;", "or_profile": "~Olaf_D\u00fcnkel1;~Artur_Jesslen1;~Jiahao_Xie2;~Christian_Theobalt2;~Christian_Rupprecht1;~Adam_Kortylewski1", "aff": ";University of Freiburg, Albert-Ludwigs-Universit\u00e4t Freiburg;Saarland Informatics Campus, Max-Planck Institute;Max-Planck-Institute for Informatics, Saarland Informatics Campus;University of Oxford;Albert-Ludwigs-Universit\u00e4t Freiburg", "aff_domain": ";cs.uni-freiburg.de;mpi-inf.mpg.de;mpi-inf.mpg.de;ox.ac.uk;uni-freiburg.de", "position": ";PhD student;Postdoc;Director;Associate Professor;Research Group Leader", "bibtex": "@misc{\nanonymous2024benchmarking,\ntitle={Benchmarking Vision Models Under Generative Continuous Nuisance Shifts},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=gg3POFjqq8}\n}", "github": "", "project": "", "reviewers": "Hf4n;nzJ3;Y3Jb", "site": "https://openreview.net/forum?id=gg3POFjqq8", "pdf_size": 2655419, "rating": "3;6;9", "confidence": "4;3;3", "wc_summary_and_contributions": "161;178;102", "wc_strengths": "23;3;2", "wc_improvement": "46;3;18", "wc_limitations": "22;42;1", "wc_correctness": "26;23;7", "wc_clarity": "20;19;1", "wc_relation_to_prior_work": "9;6;1", "wc_documentation": "5;12;1", "wc_additional_feedback": "1;1;1", "wc_review": "313;287;134", "wc_reply_reviewers": "0;199;0", "wc_reply_authors": "64;65;64", "reply_reviewers": "0;1;0", "reply_authors": "2;2;2", "rating_avg": [ 6.0, 2.449489742783178 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 147.0, 32.567877834864625 ], "wc_strengths_avg": [ 9.333333333333334, 9.672412085697939 ], "wc_improvement_avg": [ 22.333333333333332, 17.82008853949821 ], "wc_limitations_avg": [ 21.666666666666668, 16.73983937265296 ], "wc_correctness_avg": [ 18.666666666666668, 8.339997335464536 ], "wc_clarity_avg": [ 13.333333333333334, 8.73053390247253 ], "wc_relation_to_prior_work_avg": [ 5.333333333333333, 3.2998316455372216 ], "wc_documentation_avg": [ 6.0, 4.546060565661952 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 244.66666666666666, 78.96975511056257 ], "wc_reply_reviewers_avg": [ 66.33333333333333, 93.80949963741531 ], "wc_reply_authors_avg": [ 64.33333333333333, 0.4714045207910317 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:zF0CNSWWODcJ:scholar.google.com/&scioq=Benchmarking+Vision+Models+Under+Generative+Continuous+Nuisance+Shifts&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "University of Freiburg;Max-Planck Institute;Max-Planck-Institute for Informatics;University of Oxford;Albert-Ludwigs-Universit\u00e4t Freiburg", "aff_unique_dep": ";Informatics;;;", "aff_unique_url": "https://www.uni-freiburg.de;https://www.mpi-sws.org;https://mpi-inf.mpg.de;https://www.ox.ac.uk;https://www.uni-freiburg.de", "aff_unique_abbr": "UoF;MPI-SWS;MPII;Oxford;Albert-Ludwigs-Universit\u00e4t", "aff_campus_unique_index": "0;1;1;0", "aff_campus_unique": "Freiburg;Saarland;", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "Germany;United Kingdom" }, { "title": "Test-Time Adaptation Induces Stronger Accuracy and Agreement-on-the-Line", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94128", "id": "giXUx4VH9t", "proceeding": "", "pdf": "https://openreview.net/pdf?id=giXUx4VH9t", "openreview": "https://openreview.net/forum?id=giXUx4VH9t", "poster": "/media/PosterPDFs/NeurIPS%202024/94128.png?t=1732312877.3372128", "project": "", "author_site": "Eungyeup Kim, Mingjie Sun, Christina Baek, Aditi Raghunathan, J. Zico Kolter", "tldr": "", "abstract": "Recently, Miller et al. (2021) and Baek et al. (2022) empirically demonstrated strong linear correlations between in-distribution (ID) versus out-of-distribution (OOD) accuracy and agreement. These trends, coined accuracy-on-the-line (ACL) and agreement-on-the-line (AGL), enable OOD model selection and performance estimation without labeled data. However, these phenomena also break for certain shifts, such as CIFAR10-C Gaussian Noise, posing a critical bottleneck. In this paper, we make a key finding that recent test-time adaptation (TTA) methods not only improve OOD performance, but it drastically strengthen the ACL and AGL trends in models, even in shifts where models showed very weak correlations before. To analyze this, we revisit the theoretical conditions from Miller et al. (2021) that outline the types of distribution shifts needed for perfect ACL in linear models. Surprisingly, these conditions are satisfied after applying TTA to deep models in the penultimate feature embedding space. In particular, TTA causes the data distribution to collapse complex shifts into those can be expressed by a singular \"scaling\" variable in the feature space. Our results show that by combining TTA with AGL-based estimation methods, we can estimate the OOD performance of models with high precision for a broader set of distribution shifts. This lends us a simple system for selecting the best hyperparameters and adaptation strategy without any OOD labeled data. Code is available at https://github.com/EungyeupKim/TTALine.", "keywords": "accuracy-on-the-line;agreement-on-the-line;test-time-adaptation;distribution-shift", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Eungyeup Kim;Mingjie Sun;Christina Baek;Aditi Raghunathan;J Zico Kolter", "authorids": "~Eungyeup_Kim1;~Mingjie_Sun1;~Christina_Baek2;~Aditi_Raghunathan1;~J_Zico_Kolter1", "gender": "M;M;;F;M", "homepage": "https://www.linkedin.com/in/eungyeup-kim-815718165/;https://eric-mingjie.github.io/;https://kebaek.github.io;https://www.cs.cmu.edu/~aditirag/;http://www.zicokolter.com", "dblp": ";54/3913;202/7238;166/1409;67/2526", "google_scholar": ";wCZbouUAAAAJ;;Ch9iRwQAAAAJ;UXh1I6UAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Eungyeup_Kim1;~Mingjie_Sun1;~Christina_Baek2;~Aditi_Raghunathan1;~Zico_Kolter1", "aff": "Carnegie Mellon University;Computer Science Department, Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "andrew.cmu.edu;cs.cmu.edu;cmu.edu;cmu.edu;cmu.edu", "position": "PhD student;PhD student;PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nkim2024testtime,\ntitle={Test-Time Adaptation Induces Stronger Accuracy and Agreement-on-the-Line},\nauthor={Eungyeup Kim and Mingjie Sun and Christina Baek and Aditi Raghunathan and J Zico Kolter},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=giXUx4VH9t}\n}", "github": "", "reviewers": "JQG9;puv1;1nU3;ZucK", "pdf_size": 2998591, "rating": "5;5;6;7", "confidence": "3;2;4;4", "soundness": "3;3;3;3", "novelty": "3;2;3;2", "presentation": "2;3;3;4", "wc_summary": "38;62;26;41", "wc_strengths": "47;60;36;40", "wc_weaknesses": "246;287;48;503", "wc_questions": "27;5;112;2", "wc_limitations": "1;9;5;12", "wc_review": "359;423;227;598", "wc_reply_reviewers": "15;46;13;231", "wc_reply_authors": "30;115;30;503", "reply_reviewers": "1;1;1;2", "reply_authors": "2;3;2;3", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 41.75, 12.968712349342937 ], "wc_strengths_avg": [ 45.75, 9.12071817347735 ], "wc_weaknesses_avg": [ 271.0, 161.58124891211853 ], "wc_questions_avg": [ 36.5, 44.64582847254601 ], "wc_limitations_avg": [ 6.75, 4.14578098794425 ], "wc_review_avg": [ 401.75, 133.53908603850783 ], "wc_reply_reviewers_avg": [ 76.25, 90.29777129032587 ], "wc_reply_authors_avg": [ 169.5, 195.64828136224452 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.8181818181818182, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16267932493121464837&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "andrew.cmu.edu;cs.cmu.edu;cmu.edu;cmu.edu;cmu.edu", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Conditional Synthesis of 3D Molecules with Time Correction Sampler", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94127", "id": "gipFTlvfF1", "proceeding": "", "pdf": "https://openreview.net/pdf?id=gipFTlvfF1", "openreview": "https://openreview.net/forum?id=gipFTlvfF1", "poster": "/media/PosterPDFs/NeurIPS%202024/94127.png?t=1731743529.8868418", "project": "", "author_site": "Hojung Jung, Youngrok Park, Laura Schmid, Jaehyeong Jo, Dongkyu Lee, Bongsang Kim, Se-Young Yun, Jinwoo Shin", "tldr": "", "abstract": "Diffusion models have demonstrated remarkable success in various domains, including molecular generation. However, conditional molecular generation remains a fundamental challenge due to an intrinsic trade-off between targeting specific chemical properties and generating meaningful samples from the data distribution. In this work, we present Time-Aware Conditional Synthesis (TACS), a novel approach to conditional generation on diffusion models. It integrates adaptively controlled plug-and-play \"online\" guidance into a diffusion model, driving samples toward the desired properties while maintaining validity and stability. A key component of our algorithm is our new type of diffusion sampler, Time Correction Sampler (TCS), which is used to control guidance and ensure that the generated molecules remain on the correct manifold at each reverse step of the diffusion process at the same time. Our proposed method demonstrates significant performance in conditional 3D molecular generation and offers a promising approach towards inverse molecular design, potentially facilitating advancements in drug discovery, materials science, and other related fields.", "keywords": "Diffusion model;Molecular generation", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Hojung Jung;Youngrok Park;Laura Schmid;Jaehyeong Jo;Dongkyu Lee;Bongsang Kim;Se-Young Yun;Jinwoo Shin", "authorids": "~Hojung_Jung1;~Youngrok_Park1;~Laura_Schmid1;~Jaehyeong_Jo1;~Dongkyu_Lee3;~Bongsang_Kim1;~Se-Young_Yun1;~Jinwoo_Shin1", "gender": "M;M;;M;M;;M;M", "homepage": ";https://osi.kaist.ac.kr;;https://github.com/harryjo97;https://www.linkedin.com/in/dongkyu-lee-5ba66764/;;https://fbsqkd.github.io;https://sites.google.com/site/mijirim/", "dblp": ";;242/7866;296/2037;;;23/8862;31/7062", "google_scholar": ";;https://scholar.google.at/citations?user=eOUSYYoAAAAJ;https://scholar.google.com/citations?hl=ko;;;X_IAjb8AAAAJ;https://scholar.google.com.tw/citations?user=m3eDp7kAAAAJ", "orcid": ";;;;;;;", "linkedin": "hojung-jung-a722702a0;youngrok-park-7ba48a201/;;;;kaiststeve;seyoung-yun-395130ab/;", "or_profile": "~Hojung_Jung1;~Youngrok_Park1;~Laura_Schmid1;~Jaehyeong_Jo1;~Dongkyu_Lee3;~Bongsang_Kim1;~Se-Young_Yun1;~Jinwoo_Shin1", "aff": "Korea Advanced Institute of Science & Technology;Korea University;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;LG Corporation;Korea Advanced Institute of Science & Technology;KAIST;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;korea.ac.kr;kaist.ac.kr;kaist.ac.kr;lge.com;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "position": "MS student;Undergrad student;Postdoc;MS student;Researcher;PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\njung2024conditional,\ntitle={Conditional Synthesis of 3D Molecules with Time Correction Sampler},\nauthor={Hojung Jung and Youngrok Park and Laura Schmid and Jaehyeong Jo and Dongkyu Lee and Bongsang Kim and Se-Young Yun and Jinwoo Shin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=gipFTlvfF1}\n}", "github": "", "reviewers": "ByqK;VutT;1ecU;9Yep", "pdf_size": 4803114, "rating": "5;6;7;8", "confidence": "3;4;4;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "2;2;2;3", "wc_summary": "77;86;72;102", "wc_strengths": "56;77;72;102", "wc_weaknesses": "143;209;35;23", "wc_questions": "108;164;203;9", "wc_limitations": "41;31;7;27", "wc_review": "425;567;389;263", "wc_reply_reviewers": "33;47;16;0", "wc_reply_authors": "49;39;39;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 84.25, 11.409973707244026 ], "wc_strengths_avg": [ 76.75, 16.513252253871737 ], "wc_weaknesses_avg": [ 102.5, 77.231793971136 ], "wc_questions_avg": [ 121.0, 72.94861204985328 ], "wc_limitations_avg": [ 26.5, 12.359207094308275 ], "wc_review_avg": [ 411.0, 108.30512453249847 ], "wc_reply_reviewers_avg": [ 24.0, 17.67766952966369 ], "wc_reply_authors_avg": [ 31.75, 18.779976038323372 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.7745966692414834, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8058187316909615244&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "kaist.ac.kr;korea.ac.kr;kaist.ac.kr;kaist.ac.kr;lge.com;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "author_num": 8, "aff_unique_index": "0;1;0;0;2;0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Korea University;LG", "aff_unique_dep": ";;LG Corporation", "aff_unique_url": "https://www.kaist.ac.kr;https://www.korea.ac.kr;https://www.lg.com", "aff_unique_abbr": "KAIST;KU;LG", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "MimicTalk: Mimicking a personalized and expressive 3D talking face in minutes", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94126", "id": "gjEzL0bamb", "proceeding": "", "pdf": "https://openreview.net/pdf?id=gjEzL0bamb", "openreview": "https://openreview.net/forum?id=gjEzL0bamb", "poster": "", "project": "", "author_site": "Zhenhui Ye, Tianyun Zhong, Yi Ren, Ziyue Jiang, Jiawei Huang, Rongjie Huang, Jinglin Liu, Jinzheng He, Chen Zhang, Zehan Wang, Xize Cheng, Xiang Yin, Zhou Zhao", "tldr": "", "abstract": "Talking face generation (TFG) aims to animate a target identity's face to create realistic talking videos. Personalized TFG is a variant that emphasizes the perceptual identity similarity of the synthesized result (from the perspective of appearance and talking style). While previous works typically solve this problem by learning an individual neural radiance field (NeRF) for each identity to implicitly store its static and dynamic information, we find it inefficient and non-generalized due to the per-identity-per-training framework and the limited training data. To this end, we propose MimicTalk, the first attempt that exploits the rich knowledge from a NeRF-based person-agnostic generic model for improving the efficiency and robustness of personalized TFG. To be specific, (1) we first come up with a person-agnostic 3D TFG model as the base model and propose to adapt it into a specific identity; (2) we propose a static-dynamic-hybrid adaptation pipeline to help the model learn the personalized static appearance and facial dynamic features; (3) To generate the facial motion of the personalized talking style, we propose an in-context stylized audio-to-motion model that mimics the implicit talking style provided in the reference video without information loss by an explicit style representation. The adaptation process to an unseen identity can be performed in 15 minutes, which is 47 times faster than previous person-dependent methods. Experiments show that our MimicTalk surpasses previous baselines regarding video quality, efficiency, and expressiveness. Video samples are available at https://mimictalk.github.io .", "keywords": "Talking Face Generation; Personalization; Talking Style Control", "primary_area": "machine_vision", "supplementary_material": "", "author": "Zhenhui Ye;Tianyun Zhong;Yi Ren;Ziyue Jiang;Jiawei Huang;Rongjie Huang;Jinglin Liu;Jinzheng He;Chen Zhang;Zehan Wang;Xize Cheng;Xiang Yin;Zhou Zhao", "authorids": "~Zhenhui_Ye1;~Tianyun_Zhong3;~Yi_Ren2;~Ziyue_Jiang1;~Jiawei_Huang5;~Rongjie_Huang1;~Jinglin_Liu1;~Jinzheng_He1;~Chen_Zhang3;~Zehan_Wang2;~Xize_Cheng1;~Xiang_Yin2;~Zhou_Zhao3", "gender": "M;M;M;M;M;M;M;;F;M;M;M;M", "homepage": "https://yerfor.github.io;;https://rayeren.github.io/;;;;;;https://actuy.github.io/;https://github.com/12zehan17;https://exgc.github.io/;;https://dblp.uni-trier.de/pid/75/7785.html?", "dblp": "265/6375;;75/6568-6;258/6865;13/4208-8;212/8936-1;;272/8857;94/4084-20;126/7826-1;334/2167;18/1022-6.html;75/7785", "google_scholar": ";https://scholar.google.co.jp/scholar?hl=zh-CN;4FA6C0AAAAAJ;wDgSBssAAAAJ;https://scholar.google.com/citations?hl=zh-CN;iRHBUsgAAAAJ;Ri8x0jEAAAAJ;https://scholar.google.com/citations?hl=zh-CN;eBBFeVcAAAAJ;euXK0lkAAAAJ;https://scholar.google.com/citations?hl=zh-CN;e6_J-lEAAAAJ;https://scholar.google.com.hk/citations?user=IIoFY90AAAAJ", "orcid": ";;;;;;;;;0009-0007-7509-7563;0000-0001-9708-3225;;0000-0001-6121-0384", "linkedin": ";;;;;;;;;;;;", "or_profile": "~Zhenhui_Ye1;~Tianyun_Zhong3;~Yi_Ren2;~Ziyue_Jiang1;~Jiawei_Huang5;~Rongjie_Huang1;~Jinglin_Liu1;~Jinzheng_He1;~Chen_Zhang3;~Zehan_Wang2;~Xize_Cheng1;~Xiang_Yin2;~Zhou_Zhao2", "aff": "Zhejiang University;Zhejiang University;ByteDance;Zhejiang University;Zhejiang University;Zhejiang University;ByteDance;Zhejiang University;Bytedance;Zhejiang University;Zhejiang University;ByteDance Inc.;Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn;bytedance.com;zju.edu.cn;zju.edu.cn;zju.edu.cn;bytedance.com;zju.edu.cn;bytedance.com;zju.edu.cn;zju.edu.cn;bytedance.com;zju.edu.cn", "position": "PhD student;MS student;Researcher;PhD student;MS student;MS student;Research Scientist;MS student;Research Scientist;PhD student;PhD student;Researcher;Associate Professor", "bibtex": "@inproceedings{\nye2024mimictalk,\ntitle={MimicTalk: Mimicking a personalized and expressive 3D talking face in minutes},\nauthor={Zhenhui Ye and Tianyun Zhong and Yi Ren and Ziyue Jiang and Jiawei Huang and Rongjie Huang and Jinglin Liu and Jinzheng He and Chen Zhang and Zehan Wang and Xize Cheng and Xiang Yin and Zhou Zhao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=gjEzL0bamb}\n}", "github": "", "reviewers": "dHKC;TKxi;fWzj;iWSN", "pdf_size": 1302574, "rating": "5;5;5;7", "confidence": "3;5;3;4", "soundness": "3;3;2;4", "novelty": "3;3;2;3", "presentation": "3;3;2;3", "wc_summary": "81;77;74;63", "wc_strengths": "97;33;64;117", "wc_weaknesses": "123;49;250;26", "wc_questions": "3;100;5;7", "wc_limitations": "8;26;5;5", "wc_review": "312;285;398;218", "wc_reply_reviewers": "0;209;0;45", "wc_reply_authors": "103;1527;521;67", "reply_reviewers": "0;2;0;1", "reply_authors": "3;4;4;2", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 73.75, 6.684870978560469 ], "wc_strengths_avg": [ 77.75, 32.02635633349507 ], "wc_weaknesses_avg": [ 112.0, 87.36418030291362 ], "wc_questions_avg": [ 28.75, 41.16050898616294 ], "wc_limitations_avg": [ 11.0, 8.74642784226795 ], "wc_review_avg": [ 303.25, 64.52664178461482 ], "wc_reply_reviewers_avg": [ 63.5, 85.98982497947068 ], "wc_reply_authors_avg": [ 554.5, 589.1491746578281 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 3.25, 0.82915619758885 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 13, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6584164374916365642&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "zju.edu.cn;zju.edu.cn;bytedance.com;zju.edu.cn;zju.edu.cn;zju.edu.cn;bytedance.com;zju.edu.cn;bytedance.com;zju.edu.cn;zju.edu.cn;bytedance.com;zju.edu.cn", "author_num": 13, "aff_unique_index": "0;0;1;0;0;0;1;0;1;0;0;1;0", "aff_unique_norm": "Zhejiang University;ByteDance", "aff_unique_dep": ";", "aff_unique_url": "https://www.zju.edu.cn;https://www.bytedance.com", "aff_unique_abbr": "ZJU;ByteDance", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Improving the Worst-Case Bidirectional Communication Complexity for Nonconvex Distributed Optimization under Function Similarity", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94125", "id": "gkJ5nBIOU4", "proceeding": "", "pdf": "https://openreview.net/pdf?id=gkJ5nBIOU4", "openreview": "https://openreview.net/forum?id=gkJ5nBIOU4", "poster": "/media/PosterPDFs/NeurIPS%202024/94125.png?t=1731349201.6054716", "project": "", "author_site": "Kaja Gruntkowska, Alexander Tyurin, Peter Richtarik", "tldr": "", "abstract": "Effective communication between the server and workers plays a key role in distributed optimization. In this paper, we focus on optimizing communication, uncovering inefficiencies in prevalent downlink compression approaches. Considering first the pure setup where the uplink communication costs are negligible, we introduce MARINA-P, a novel method for downlink compression, employing a collection of correlated compressors. Theoretical analysis demonstrates that MARINA-P with permutation compressors can achieve a server-to-worker communication complexity improving with the number of workers, thus being provably superior to existing algorithms. We further show that MARINA-P can serve as a starting point for extensions such as methods supporting bidirectional compression: we introduce M3, a method combining MARINA-P with uplink compression and a momentum step, achieving bidirectional compression with provable improvements in total communication complexity as the number of workers increases. Theoretical findings align closely with empirical experiments, underscoring the efficiency of the proposed algorithms.", "keywords": "Nonconvex Optimization;Distributed Optimization;Federated Learning;Compressed Communication", "primary_area": "optimization", "supplementary_material": "/attachment/7a8ab4108108e510981554ecc25d0720318c781e.zip", "author": "Kaja Gruntkowska;Alexander Tyurin;Peter Richt\u00e1rik", "authorids": "~Kaja_Gruntkowska1;~Alexander_Tyurin1;~Peter_Richt\u00e1rik1", "gender": "F;M;M", "homepage": ";https://k3nfalt.github.io/;https://richtarik.org", "dblp": ";203/8919;62/8001", "google_scholar": "H0W8ADAAAAAJ;;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0003-4380-5848", "linkedin": "kaja-gruntkowska-a240a924b;;richtarik/", "or_profile": "~Kaja_Gruntkowska1;~Alexander_Tyurin1;~Peter_Richtarik1", "aff": "King Abdullah University of Science and Technology;KAUST;King Abdullah University of Science and Technology (KAUST)", "aff_domain": "kaust.edu.sa;kaust.edu.sa;kaust.edu.sa", "position": "PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\ngruntkowska2024improving,\ntitle={Improving the Worst-Case Bidirectional Communication Complexity for Nonconvex Distributed Optimization under Function Similarity},\nauthor={Kaja Gruntkowska and Alexander Tyurin and Peter Richt{\\'a}rik},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=gkJ5nBIOU4}\n}", "github": "", "reviewers": "r7n3;vddK;K3Nf;1G9G", "pdf_size": 3169443, "rating": "5;6;7;7", "confidence": "4;4;3;4", "soundness": "3;4;3;3", "novelty": "3;3;3;4", "presentation": "3;4;4;3", "wc_summary": "95;87;76;227", "wc_strengths": "67;35;85;80", "wc_weaknesses": "172;60;109;75", "wc_questions": "32;66;86;204", "wc_limitations": "2;35;7;12", "wc_review": "368;283;363;598", "wc_reply_reviewers": "277;13;87;55", "wc_reply_authors": "676;0;190;0", "reply_reviewers": "2;1;1;1", "reply_authors": "2;1;2;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 121.25, 61.42627695050385 ], "wc_strengths_avg": [ 66.75, 19.472737352514155 ], "wc_weaknesses_avg": [ 104.0, 43.087121045621046 ], "wc_questions_avg": [ 97.0, 64.72248450113763 ], "wc_limitations_avg": [ 14.0, 12.62933094031509 ], "wc_review_avg": [ 403.0, 117.52659273543158 ], "wc_reply_reviewers_avg": [ 108.0, 101.03959619871806 ], "wc_reply_authors_avg": [ 216.5, 276.3996201155132 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=709086109870291946&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "kaust.edu.sa;kaust.edu.sa;kaust.edu.sa", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "King Abdullah University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kast.kau.edu.sa", "aff_unique_abbr": "KAUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Saudi Arabia" }, { "title": "Federated Fine-tuning of Large Language Models under Heterogeneous Tasks and Client Resources", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94124", "id": "gkOzoHBXUw", "proceeding": "", "pdf": "https://openreview.net/pdf?id=gkOzoHBXUw", "openreview": "https://openreview.net/forum?id=gkOzoHBXUw", "poster": "/media/PosterPDFs/NeurIPS%202024/94124.png?t=1732643668.6698241", "project": "", "author_site": "Jiamu Bai, Daoyuan Chen, Bingchen Qian, Liuyi Yao, Yaliang Li", "tldr": "", "abstract": "Federated Learning (FL) has recently been applied to the parameter-efficient fine-tuning of Large Language Models (LLMs). While promising, it raises significant challenges due to the heterogeneous resources and data distributions of clients.This study introduces FlexLoRA, a simple yet effective aggregation scheme for LLM fine-tuning, which mitigates the \"buckets effect\" in traditional FL that restricts the potential of clients with ample resources by tying them to the capabilities of the least-resourced participants. FlexLoRA allows for dynamic adjustment of local LoRA ranks, fostering the development of a global model imbued with broader, less task-specific knowledge. By synthesizing a full-size LoRA weight from individual client contributions and employing Singular Value Decomposition (SVD) for weight redistribution, FlexLoRA fully leverages heterogeneous client resources. Involving thousands of clients performing heterogeneous NLP tasks and client resources, our experiments validate the efficacy of FlexLoRA, with the federated global model achieving consistently better improvement over SOTA FL methods in downstream NLP task performance across various heterogeneous distributions. FlexLoRA's practicality is further underscored by our theoretical analysis and its seamless integration with existing LoRA-based FL methods, offering a path toward cross-device, privacy-preserving federated tuning for LLMs.", "keywords": "Federated Learning;Large Language Models;Parameter Efficient Fine-tuning", "primary_area": "other", "supplementary_material": "", "author": "Jiamu Bai;Daoyuan Chen;Bingchen Qian;Liuyi Yao;Yaliang Li", "authorids": "~Jiamu_Bai1;~Daoyuan_Chen1;~Bingchen_Qian1;~Liuyi_Yao1;~Yaliang_Li1", "gender": "F;M;M;F;M", "homepage": ";https://yxdyc.github.io/;;;https://sites.google.com/site/yaliangli/", "dblp": "331/3706;217/4891;294/3682.html;219/1767;https://dblp.org/pers/hd/l/Li:Yaliang", "google_scholar": ";https://scholar.google.com.hk/citations?user=1GdfinUAAAAJ;;0c5is-gAAAAJ;CCPBcdYAAAAJ", "orcid": ";0000-0002-8015-2121;;;0000-0002-4204-6096", "linkedin": "jiamu-bai-4b86511b5/;;;;", "or_profile": "~Jiamu_Bai1;~Daoyuan_Chen1;~Bingchen_Qian1;~Liuyi_Yao1;~Yaliang_Li1", "aff": "Alibaba Group;Alibaba Group;Alibaba Group;Alibaba Group;Alibaba Group", "aff_domain": "alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com", "position": "Intern;Staff;Researcher;Staff Engineer;Staff Engineer", "bibtex": "@inproceedings{\nbai2024federated,\ntitle={Federated Fine-tuning of Large Language Models under Heterogeneous Tasks and Client Resources},\nauthor={Jiamu Bai and Daoyuan Chen and Bingchen Qian and Liuyi Yao and Yaliang Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=gkOzoHBXUw}\n}", "github": "", "reviewers": "ug2H;V4JD;18q4;XizQ", "pdf_size": 750338, "rating": "5;6;7;7", "confidence": "4;4;2;4", "soundness": "3;3;3;3", "novelty": "3;2;2;3", "presentation": "2;2;3;3", "wc_summary": "134;73;34;63", "wc_strengths": "36;63;27;51", "wc_weaknesses": "178;104;112;91", "wc_questions": "17;12;4;4", "wc_limitations": "63;15;31;1", "wc_review": "428;267;208;210", "wc_reply_reviewers": "14;13;0;21", "wc_reply_authors": "63;31;0;30", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;1;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 76.0, 36.42114770294862 ], "wc_strengths_avg": [ 44.25, 13.808964479641476 ], "wc_weaknesses_avg": [ 121.25, 33.61082414937188 ], "wc_questions_avg": [ 9.25, 5.539629951540085 ], "wc_limitations_avg": [ 27.5, 23.08137777516758 ], "wc_review_avg": [ 278.25, 89.64478512440085 ], "wc_reply_reviewers_avg": [ 12.0, 7.582875444051551 ], "wc_reply_authors_avg": [ 31.0, 22.282279955157193 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14976702556702806172&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": "alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Alibaba Group", "aff_unique_dep": "", "aff_unique_url": "https://www.alibaba.com", "aff_unique_abbr": "Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "CigTime: Corrective Instruction Generation Through Inverse Motion Editing", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94123", "id": "gktA1Qycj9", "proceeding": "", "pdf": "https://openreview.net/pdf?id=gktA1Qycj9", "openreview": "https://openreview.net/forum?id=gktA1Qycj9", "poster": "/media/PosterPDFs/NeurIPS%202024/94123.png?t=1731743975.0240004", "project": "", "author_site": "Qihang Fang, Chengcheng Tang, Bugra Tekin, Yanchao Yang", "tldr": "", "abstract": "Recent advancements in models linking natural language with human motions have shown significant promise in motion generation and editing based on instructional text. Motivated by applications in sports coaching and motor skill learning, we investigate the inverse problem: generating corrective instructional text, leveraging motion editing and generation models. We introduce a novel approach that, given a user\u2019s current motion (source) and the desired motion (target), generates text instructions to guide the user towards achieving the target motion. We leverage large language models to generate corrective texts and utilize existing motion generation and editing frameworks to compile datasets of triplets (source motion, target motion, and corrective text). Using this data, we propose a new motion-language model for generating corrective instructions. We present both qualitative and quantitative results across a diverse range of applications that largely improve upon baselines. Our approach demonstrates its effectiveness in instructional scenarios, offering text-based guidance to correct and enhance user performance.", "keywords": "Correctional Instruction Generation", "primary_area": "machine_vision", "supplementary_material": "/attachment/7dc300066130a9e950763f91112511d688fd1730.zip", "author": "Qihang Fang;Chengcheng Tang;Bugra Tekin;Yanchao Yang", "authorids": "~Qihang_Fang4;~Chengcheng_Tang1;~Bugra_Tekin4;~Yanchao_Yang1", "gender": "M;M;M;M", "homepage": "https://qhfang.github.io/;;https://btekin.github.io/;https://yanchaoyang.github.io/", "dblp": ";130/8880.html;136/4930;84/8637-1", "google_scholar": "ulI2Fe8AAAAJ;WbG27wQAAAAJ;3fa02HAAAAAJ;r2tKnV4AAAAJ", "orcid": "0009-0003-5439-6821;0000-0002-4875-6670;;", "linkedin": ";;;", "or_profile": "~Qihang_Fang4;~Chengcheng_Tang1;~Bugra_Tekin4;~Yanchao_Yang1", "aff": "Meta Facebook;Meta;Meta;University of Hong Kong", "aff_domain": "meta.com;meta.com;fb.com;hku.hk", "position": "Intern;Research Scientist;Research Scientist;Assistant Professor", "bibtex": "@inproceedings{\nfang2024cigtime,\ntitle={CigTime: Corrective Instruction Generation Through Inverse Motion Editing},\nauthor={Qihang Fang and Chengcheng Tang and Bugra Tekin and Yanchao Yang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=gktA1Qycj9}\n}", "github": "", "reviewers": "cTdo;cjte;A1J6;bYSe", "pdf_size": 3196161, "rating": "5;5;5;6", "confidence": "4;4;5;5", "soundness": "2;3;3;3", "novelty": "3;2;2;2", "presentation": "3;3;3;2", "wc_summary": "120;53;35;97", "wc_strengths": "48;52;51;38", "wc_weaknesses": "177;82;232;50", "wc_questions": "48;27;47;240", "wc_limitations": "8;7;6;8", "wc_review": "401;221;371;433", "wc_reply_reviewers": "84;48;0;78", "wc_reply_authors": "65;94;0;99", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;1;2", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 76.25, 33.862774546690645 ], "wc_strengths_avg": [ 47.25, 5.539629951540085 ], "wc_weaknesses_avg": [ 135.25, 72.81268776799824 ], "wc_questions_avg": [ 90.5, 86.71937499774776 ], "wc_limitations_avg": [ 7.25, 0.82915619758885 ], "wc_review_avg": [ 356.5, 81.24499984614438 ], "wc_reply_reviewers_avg": [ 52.5, 33.237779709240506 ], "wc_reply_authors_avg": [ 64.5, 39.436658073421995 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:taVEGUsskqAJ:scholar.google.com/&scioq=CigTime:+Corrective+Instruction+Generation+Through+Inverse+Motion+Editing&hl=en&as_sdt=0,21", "gs_version_total": 5, "email": "meta.com;meta.com;fb.com;hku.hk", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Meta;University of Hong Kong", "aff_unique_dep": "Meta Platforms, Inc.;", "aff_unique_url": "https://meta.com;https://www.hku.hk", "aff_unique_abbr": "Meta;HKU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "United States;China" }, { "title": "Learning to Understand: Identifying Interactions via the M\u00f6bius Transform", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94122", "id": "glGeXu1zG4", "proceeding": "", "pdf": "https://openreview.net/pdf?id=glGeXu1zG4", "openreview": "https://openreview.net/forum?id=glGeXu1zG4", "poster": "/media/PosterPDFs/NeurIPS%202024/94122.png?t=1732746284.1310208", "project": "", "author_site": "Justin Kang, Yigit Efe Erginbas, Landon Butler, Ramtin Pedarsani, Kannan Ramchandran", "tldr": "", "abstract": "One of the key challenges in machine learning is to find interpretable representations of learned functions. The M\u00f6bius transform is essential for this purpose, as its coefficients correspond to unique *importance scores* for *sets of input variables*. This transform is closely related to widely used game-theoretic notions of importance like the *Shapley* and *Bhanzaf value*, but it also captures crucial higher-order interactions. Although computing the M\u00f6bius Transform of a function with $n$ inputs involves $2^n$ coefficients, it becomes tractable when the function is *sparse* and of *low-degree* as we show is the case for many real-world functions. Under these conditions, the complexity of the transform computation is significantly reduced. When there are $K$ non-zero coefficients, our algorithm recovers the M\u00f6bius transform in $O(Kn)$ samples and $O(Kn^2)$ time asymptotically under certain assumptions, the first non-adaptive algorithm to do so. We also uncover a surprising connection between group testing and the M\u00f6bius transform. For functions where all interactions involve at most $t$ inputs, we use group testing results to compute the M\u00f6bius transform with $O(Kt\\log n)$ sample complexity and $O(K\\mathrm{poly}(n))$ time. A robust version of this algorithm withstands noise and maintains this complexity. This marks the first $n$ sub-linear query complexity, noise-tolerant algorithm for the M\u00f6bius transform. While our algorithms are conceptualized in an idealized setting, they indicate that the M\u00f6bius transform is a potent tool for interpreting deep learning models.", "keywords": "Shapley Value;Importance Scores;Transforms;Signal Processing;Interactions;Group Testing", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Justin Singh Kang;Yigit Efe Erginbas;Landon Butler;Ramtin Pedarsani;Kannan Ramchandran", "authorids": "~Justin_Singh_Kang1;~Yigit_Efe_Erginbas1;~Landon_Butler1;~Ramtin_Pedarsani1;~Kannan_Ramchandran1", "gender": "Not Specified;M;M;M;M", "homepage": "https://justinkang221.github.io/;https://erginbas.github.io/;https://landonbutler.github.io/;https://web.ece.ucsb.edu/~ramtin/index.html;https://www.eecs.berkeley.edu/~kannanr/", "dblp": "271/4534;;287/4521.html;;53/5765", "google_scholar": "0I2yD4EAAAAJ;;ldjHUeEAAAAJ;gKUEEY4AAAAJ;https://scholar.google.com.tw/citations?user=DcV-5RAAAAAJ", "orcid": "0000-0002-3977-6848;0000-0001-5010-9766;;;0000-0002-4567-328X", "linkedin": "justin-kang-uoft/;;;;", "or_profile": "~Justin_Singh_Kang1;~Yigit_Efe_Erginbas1;~Landon_Butler1;~Ramtin_Pedarsani1;~Kannan_Ramchandran1", "aff": "University of California, Berkeley;Uber;University of California, Berkeley;University of California, Santa Barbara;University of California, Berkeley", "aff_domain": "berkeley.edu;uber.com;berkeley.edu;ucsb.edu;berkeley.edu", "position": "PhD student;Intern;PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nkang2024learning,\ntitle={Learning to Understand: Identifying Interactions via the M\\\"obius Transform},\nauthor={Justin Singh Kang and Yigit Efe Erginbas and Landon Butler and Ramtin Pedarsani and Kannan Ramchandran},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=glGeXu1zG4}\n}", "github": "", "reviewers": "vNmZ;kQft;jcxh", "pdf_size": 1006479, "rating": "5;7;7", "confidence": "5;3;5", "soundness": "2;3;3", "novelty": "3;3;3", "presentation": "1;3;3", "wc_summary": "12;202;45", "wc_strengths": "26;73;90", "wc_weaknesses": "302;147;906", "wc_questions": "33;58;38", "wc_limitations": "17;12;1", "wc_review": "390;492;1080", "wc_reply_reviewers": "95;116;60", "wc_reply_authors": "380;46;933", "reply_reviewers": "1;1;1", "reply_authors": "2;2;5", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.333333333333333, 0.9428090415820634 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.3333333333333335, 0.9428090415820634 ], "wc_summary_avg": [ 86.33333333333333, 82.89082511888085 ], "wc_strengths_avg": [ 63.0, 27.067816067549053 ], "wc_weaknesses_avg": [ 451.6666666666667, 327.43481115415256 ], "wc_questions_avg": [ 43.0, 10.801234497346433 ], "wc_limitations_avg": [ 10.0, 6.683312551921141 ], "wc_review_avg": [ 654.0, 304.0920913144569 ], "wc_reply_reviewers_avg": [ 90.33333333333333, 23.098821518760552 ], "wc_reply_authors_avg": [ 453.0, 365.7767989726339 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.0, 1.4142135623730951 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.49999999999999983, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1717967738963991903&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "email": "berkeley.edu;uber.com;berkeley.edu;ucsb.edu;berkeley.edu", "author_num": 5, "aff_unique_index": "0;1;0;2;0", "aff_unique_norm": "University of California, Berkeley;Uber Technologies Inc.;University of California, Santa Barbara", "aff_unique_dep": ";;", "aff_unique_url": "https://www.berkeley.edu;https://www.uber.com;https://www.ucsb.edu", "aff_unique_abbr": "UC Berkeley;Uber;UCSB", "aff_campus_unique_index": "0;0;2;0", "aff_campus_unique": "Berkeley;;Santa Barbara", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Selective Generation for Controllable Language Models", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94121", "id": "glfYOAzh2f", "proceeding": "", "pdf": "https://openreview.net/pdf?id=glfYOAzh2f", "openreview": "https://openreview.net/forum?id=glfYOAzh2f", "poster": "/media/PosterPDFs/NeurIPS%202024/94121.png?t=1733661782.4180472", "project": "", "author_site": "Minjae Lee, Kyungmin Kim, Taesoo Kim, Sangdon Park", "tldr": "", "abstract": "Trustworthiness of generative language models (GLMs) is crucial in their deployment to critical decision making systems. Hence, certified risk control methods such as selective prediction and conformal prediction have been applied to mitigating the hallucination problem in various supervised downstream tasks. However, the lack of appropriate correctness metric hinders applying such principled methods to language generation tasks. In this paper, we circumvent this problem by leveraging the concept of textual entailment to evaluate the correctness of the generated sequence, and propose two selective generation algorithms which control the false discovery rate with respect to the textual entailment relation (FDR-E) with a theoretical guarantee: $\\texttt{SGen}^{\\texttt{Sup}}$ and $\\texttt{SGen}^{\\texttt{Semi}}$. $\\texttt{SGen}^{\\texttt{Sup}}$, a direct modification of the selective prediction, is a supervised learning algorithm which exploits entailment-labeled data, annotated by humans. Since human annotation is costly, we further propose a semi-supervised version, $\\texttt{SGen}^{\\texttt{Semi}}$, which fully utilizes the unlabeled data by pseudo-labeling, leveraging an entailment set function learned via conformal prediction. Furthermore, $\\texttt{SGen}^{\\texttt{Semi}}$ enables to use more general class of selection functions, neuro-selection functions, and provides users with an optimal selection function class given multiple candidates. Finally, we demonstrate the efficacy of the $\\texttt{SGen}$ family in achieving a desired FDR-E level with comparable selection efficiency to those from baselines on both open and closed source GLMs. Code and datasets are provided at https://github.com/ml-postech/selective-generation.", "keywords": "trustworthy AI;selective classification;conformal prediction;hallucination;LLM", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Minjae Lee;Kyungmin Kim;Taesoo Kim;Sangdon Park", "authorids": "~Minjae_Lee5;~Kyungmin_Kim3;~Taesoo_Kim1;~Sangdon_Park1", "gender": "M;M;M;M", "homepage": "https://ml.postech.ac.kr/;https://kmkim95.github.io;https://taesoo.kim/;https://sangdon.github.io/", "dblp": ";;;119/1530-1", "google_scholar": ";O7m-xYgAAAAJ;;Vi2E2F4AAAAJ", "orcid": ";0009-0005-4944-2381;;", "linkedin": ";;;", "or_profile": "~Minjae_Lee5;~Kyungmin_Kim3;~Taesoo_Kim1;~Sangdon_Park1", "aff": "Pohang University of Science and Technology;Pohang University of Science and Technology;Georgia Institute of Technology;POSTECH", "aff_domain": "postech.ac.kr;postech.ac.kr;gatech.edu;postech.ac.kr", "position": "PhD student;MS student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nlee2024selective,\ntitle={Selective Generation for Controllable Language Models},\nauthor={Minjae Lee and Kyungmin Kim and Taesoo Kim and Sangdon Park},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=glfYOAzh2f}\n}", "github": "", "reviewers": "abKP;UCLF;7eme;KeFc", "pdf_size": 808237, "rating": "3;6;7;8", "confidence": "3;3;3;4", "soundness": "3;3;3;3", "novelty": "2;3;3;4", "presentation": "2;3;3;4", "wc_summary": "158;99;215;136", "wc_strengths": "53;99;40;71", "wc_weaknesses": "264;116;103;143", "wc_questions": "83;1;41;70", "wc_limitations": "10;1;34;34", "wc_review": "568;316;433;454", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "37;0;30;0", "reply_reviewers": "0;0;0;0", "reply_authors": "2;1;2;1", "rating_avg": [ 6.0, 1.8708286933869707 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 152.0, 42.04164601915581 ], "wc_strengths_avg": [ 65.75, 22.128883839904805 ], "wc_weaknesses_avg": [ 156.5, 63.7200910231616 ], "wc_questions_avg": [ 48.75, 31.483130403439873 ], "wc_limitations_avg": [ 19.75, 14.600941750448838 ], "wc_review_avg": [ 442.75, 89.40742418837488 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 16.75, 16.931848688197046 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.6172133998483676, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7082816536308921007&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "postech.ac.kr;postech.ac.kr;gatech.edu;postech.ac.kr", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Pohang University of Science and Technology;Georgia Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.postech.ac.kr;https://www.gatech.edu", "aff_unique_abbr": "POSTECH;Georgia Tech", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Pohang;", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "South Korea;United States" }, { "title": "Metric Space Magnitude for Evaluating the Diversity of Latent Representations", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94120", "id": "glgZZAfssH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=glgZZAfssH", "openreview": "https://openreview.net/forum?id=glgZZAfssH", "poster": "/media/PosterPDFs/NeurIPS%202024/94120.png?t=1732182867.6047618", "project": "", "author_site": "Katharina Limbeck, Rayna Andreeva, Rik Sarkar, Bastian Rieck", "tldr": "", "abstract": "The *magnitude* of a metric space is a novel\ninvariant that provides a measure of the 'effective size' of a space across\nmultiple scales, while also capturing numerous geometrical properties, such as curvature, density, or entropy.\nWe develop a family of magnitude-based measures of the intrinsic\ndiversity of latent representations, formalising a novel notion of\ndissimilarity between magnitude functions of finite metric spaces.\nOur measures are provably stable under perturbations of the data, can be\nefficiently calculated, and enable a rigorous multi-scale characterisation and comparison of\nlatent representations. \nWe show their utility and superior performance across different domains and tasks, including\nthe automated estimation of diversity,\nthe detection of mode collapse, and\nthe evaluation of generative models for text, image, and graph data.", "keywords": "diversity evaluation;generative model evaluation;metric space magnitude;geometric machine learning", "primary_area": "evaluation", "supplementary_material": "/attachment/7ac8774683cedb98b79d83340ef63954f7ef9572.zip", "author": "Katharina Limbeck;Rayna Andreeva;Rik Sarkar;Bastian Rieck", "authorids": "~Katharina_Limbeck1;~Rayna_Andreeva1;~Rik_Sarkar1;~Bastian_Rieck1", "gender": ";;M;M", "homepage": ";https://rorondre.github.io/about/;https://homepages.inf.ed.ac.uk/rsarkar/;https://bastian.rieck.me", "dblp": ";;82/4961;119/8860", "google_scholar": ";scwihDYAAAAJ;rmMWizEAAAAJ;https://scholar.google.ch/citations?user=La7zuKQAAAAJ", "orcid": ";;;0000-0003-4335-0302", "linkedin": ";;;br-ml/", "or_profile": "~Katharina_Limbeck1;~Rayna_Andreeva1;~Rik_Sarkar1;~Bastian_Rieck1", "aff": ";University of Edinburgh;University of Edinburgh;Helmholtz Zentrum M\u00fcnchen", "aff_domain": ";ed.ac.uk;ed.ac.uk;helmholtz-munich.de", "position": ";PhD student;Associate Professor;Principal Investigator", "bibtex": "@inproceedings{\nlimbeck2024metric,\ntitle={Metric Space Magnitude for Evaluating the Diversity of Latent Representations},\nauthor={Katharina Limbeck and Rayna Andreeva and Rik Sarkar and Bastian Rieck},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=glgZZAfssH}\n}", "github": "", "reviewers": "cXLQ;vep5;WoGi", "pdf_size": 12706627, "rating": "6;6;7", "confidence": "4;3;4", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "54;35;77", "wc_strengths": "28;51;58", "wc_weaknesses": "67;59;70", "wc_questions": "118;54;41", "wc_limitations": "51;8;2", "wc_review": "318;207;248", "wc_reply_reviewers": "0;19;14", "wc_reply_authors": "0;0;1562", "reply_reviewers": "0;1;1", "reply_authors": "1;1;5", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 55.333333333333336, 17.172329163188344 ], "wc_strengths_avg": [ 45.666666666666664, 12.81492185782739 ], "wc_weaknesses_avg": [ 65.33333333333333, 4.642796092394706 ], "wc_questions_avg": [ 71.0, 33.65511352924941 ], "wc_limitations_avg": [ 20.333333333333332, 21.82251640444388 ], "wc_review_avg": [ 257.6666666666667, 45.828181528642645 ], "wc_reply_reviewers_avg": [ 11.0, 8.04155872120988 ], "wc_reply_authors_avg": [ 520.6666666666666, 736.3338614755916 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 1.8856180831641267 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13276047666130597067&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": ";ed.ac.uk;ed.ac.uk;helmholtz-munich.de", "author_num": 4, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Edinburgh;Helmholtz Zentrum M\u00fcnchen", "aff_unique_dep": ";", "aff_unique_url": "https://www.ed.ac.uk;https://www.helmholtz-muenchen.de", "aff_unique_abbr": "Edinburgh;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United Kingdom;Germany" }, { "title": "SARAD: Spatial Association-Aware Anomaly Detection and Diagnosis for Multivariate Time Series", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94119", "id": "gmf5Aj01Hz", "proceeding": "", "pdf": "https://openreview.net/pdf?id=gmf5Aj01Hz", "openreview": "https://openreview.net/forum?id=gmf5Aj01Hz", "poster": "", "project": "", "author_site": "Zhihao Dai, Ligang He, Shuanghua Yang, Matthew Leeke", "tldr": "", "abstract": "Anomaly detection in time series data is fundamental to the design, deployment, and evaluation of industrial control systems. Temporal modeling has been the natural focus of anomaly detection approaches for time series data. However, the focus on temporal modeling can obscure or dilute the spatial information that can be used to capture complex interactions in multivariate time series. In this paper, we propose SARAD, an approach that leverages spatial information beyond data autoencoding errors to improve the detection and diagnosis of anomalies. SARAD trains a Transformer to learn the spatial associations, the pairwise inter-feature relationships which ubiquitously characterize such feedback-controlled systems. As new associations form and old ones dissolve, SARAD applies subseries division to capture their changes over time. Anomalies exhibit association descending patterns, a key phenomenon we exclusively observe and attribute to the disruptive nature of anomalies detaching anomalous features from others. To exploit the phenomenon and yet dismiss non-anomalous descent, SARAD performs anomaly detection via autoencoding in the association space. We present experimental results to demonstrate that SARAD achieves state-of-the-art performance, providing robust anomaly detection and a nuanced understanding of anomalous events.", "keywords": "multivariate time series;anomaly detection;anomaly diagnosis;spatial associations", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "/attachment/8f8439107f3a1240e886c5399f259a6715fe3d16.zip", "author": "Zhihao Dai;Ligang He;Shuanghua Yang;Matthew Leeke", "authorids": "~Zhihao_Dai1;~Ligang_He1;~Shuanghua_Yang1;~Matthew_Leeke2", "gender": "M;M;M;M", "homepage": "https://warwick.ac.uk/fac/sci/dcs/people/zhihao_dai/;https://www.dcs.warwick.ac.uk/~liganghe/;;https://www.birmingham.ac.uk/staff/profiles/computer-science/academic-staff/leeke-matthew.aspx", "dblp": ";36/5655;87/4298;", "google_scholar": "9Cskrc0AAAAJ;https://scholar.google.co.uk/citations?user=g3lthZYAAAAJ;https://scholar.google.com/citations?hl=en;", "orcid": "0000-0003-4007-6862;;;", "linkedin": ";;;", "or_profile": "~Zhihao_Dai1;~Ligang_He1;~Shuanghua_Yang1;~Matthew_Leeke2", "aff": "University of Warwick;The University of Warwick;Reading University;University of Birmingham", "aff_domain": "warwick.ac.uk;warwick.ac.uk;reading.ac.uk;bham.ac.uk", "position": "PhD student;Assistant professor, Associate professor, Reader;Full Professor;Professor", "bibtex": "@inproceedings{\ndai2024sarad,\ntitle={{SARAD}: Spatial Association-Aware Anomaly Detection and Diagnosis for Multivariate Time Series},\nauthor={Zhihao Dai and Ligang He and Shuanghua Yang and Matthew Leeke},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=gmf5Aj01Hz}\n}", "github": "", "reviewers": "KV18;BjR8;RKBC;7FD6", "pdf_size": 2144164, "rating": "5;5;6;7", "confidence": "3;4;3;5", "soundness": "3;2;3;4", "novelty": "3;2;3;4", "presentation": "2;2;2;3", "wc_summary": "86;64;56;162", "wc_strengths": "24;1;37;51", "wc_weaknesses": "182;151;65;78", "wc_questions": "105;4;61;31", "wc_limitations": "16;4;1;30", "wc_review": "413;224;220;352", "wc_reply_reviewers": "113;16;16;16", "wc_reply_authors": "711;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 92.0, 41.88078318274385 ], "wc_strengths_avg": [ 28.25, 18.403464347779742 ], "wc_weaknesses_avg": [ 119.0, 48.964272689380365 ], "wc_questions_avg": [ 50.25, 37.49249924984996 ], "wc_limitations_avg": [ 12.75, 11.431863365173676 ], "wc_review_avg": [ 302.25, 83.10949103441797 ], "wc_reply_reviewers_avg": [ 40.25, 42.00223208354527 ], "wc_reply_authors_avg": [ 177.75, 307.8720310453679 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.6363636363636364, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16510964907492233593&as_sdt=10005&sciodt=0,8&hl=en", "gs_version_total": 4, "email": "warwick.ac.uk;warwick.ac.uk;reading.ac.uk;bham.ac.uk", "author_num": 4, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "University of Warwick;University of Reading;University of Birmingham", "aff_unique_dep": ";;", "aff_unique_url": "https://www.warwick.ac.uk;https://www.reading.ac.uk;https://www.birmingham.ac.uk", "aff_unique_abbr": "Warwick;Reading;Birmingham", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "PIVOT-R: Primitive-Driven Waypoint-Aware World Model for Robotic Manipulation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94118", "id": "gnXTDQyxlU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=gnXTDQyxlU", "openreview": "https://openreview.net/forum?id=gnXTDQyxlU", "poster": "/media/PosterPDFs/NeurIPS%202024/94118.png?t=1731383946.0071616", "project": "", "author_site": "Kaidong Zhang, Pengzhen Ren, Bingqian Lin, Junfan Lin, Shikui Ma, Hang Xu, Xiaodan Liang", "tldr": "", "abstract": "Language-guided robotic manipulation is a challenging task that requires an embodied agent to follow abstract user instructions to accomplish various complex manipulation tasks. Previous work generally maps instructions and visual perceptions directly to low-level executable actions, neglecting the modeling of critical waypoints (e.g., key states of \u201cclose to/grab/move up\u201d in action trajectories) in manipulation tasks.\nTo address this issue, we propose a PImitive-driVen waypOinT-aware world model for Robotic manipulation (PIVOT-R) that focuses solely on the prediction of task-relevant waypoints. Specifically, PIVOT-R consists of a Waypoint-aware World Model (WAWM) and a lightweight action prediction module. The former performs primitive action parsing and primitive-driven waypoint prediction, while the latter focuses on decoding low-level actions. Additionally, we also design an asynchronous hierarchical executor (AHE) for PIVOT-R, which can use different execution frequencies for different modules of the model, thereby helping the model reduce computational redundancy and improve model execution efficiency. Our PIVOT-R outperforms state-of-the-art (SoTA) open-source models on the SeaWave benchmark, achieving an average relative improvement of 19.45% across four levels of instruction tasks. Moreover, compared to the synchronously executed PIVOT-R, the execution efficiency of PIVOT-R with AHE is increased by 28-fold, with only a 2.9% drop in performance. These results provide compelling evidence that our PIVOT-R can significantly improve both the performance and efficiency of robotic manipulation.", "keywords": "Robot manipulation; World model", "primary_area": "robotics", "supplementary_material": "/attachment/d05e98e7d4eb4305c7ec7a7591cce04fb8327f25.zip", "author": "Kaidong Zhang;Pengzhen Ren;Bingqian Lin;Junfan Lin;Shikui Ma;Hang Xu;Xiaodan Liang", "authorids": "~Kaidong_Zhang2;~Pengzhen_Ren2;~Bingqian_Lin1;~Junfan_Lin1;~Shikui_Ma1;~Hang_Xu1;~Xiaodan_Liang2", "gender": "M;;;M;M;F;M", "homepage": "https://github.com/abliao;;https://expectorlin.github.io/;https://github.com/junfanlin;;https://www.sysu-hcp.net/;https://www.linkedin.cn/in/mas2902", "dblp": ";222/7912;225/5474;260/6800;;;227/3175", "google_scholar": ";;https://scholar.google.com.hk/citations?hl=zh-CN;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.hk/citations?user=J_8TX6sAAAAJ;voxznZAAAAAJ;", "orcid": ";;0000-0002-8763-9530;;0000-0003-3645-8972;;0000-0002-8340-9762", "linkedin": ";;;;;;https://www.linkedin.cn/in/mas2902", "or_profile": "~Kaidong_Zhang2;~Pengzhen_Ren2;~Bingqian_Lin1;~Junfan_Lin1;~Hang_Xu1;~Xiaodan_Liang2;~Mas_Ma1", "aff": "SUN YAT-SEN UNIVERSITY;SUN YAT-SEN UNIVERSITY;SUN YAT-SEN UNIVERSITY;Pengcheng Laboratory;Huawei Noah\u2018s Ark Lab;SUN YAT-SEN UNIVERSITY;Dataa Robotics", "aff_domain": "sysu.edu.cn;sysu.edu.cn;sysu.edu.cn;pcl.ac.cn;huawei.com;sysu.edu.cn;dataarobotics.com", "position": "Undergrad student;Postdoc;PhD student;Postdoc;Researcher;Associate Professor;VP", "bibtex": "@inproceedings{\nzhang2024pivotr,\ntitle={{PIVOT}-R: Primitive-Driven Waypoint-Aware World Model for Robotic Manipulation},\nauthor={Kaidong Zhang and Pengzhen Ren and Bingqian Lin and Junfan Lin and Shikui Ma and Hang Xu and Xiaodan Liang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=gnXTDQyxlU}\n}", "github": "", "reviewers": "9FHi;eMP4;XYkg;F1Qx", "pdf_size": 4534673, "rating": "5;5;6;7", "confidence": "4;3;3;4", "soundness": "3;3;3;3", "novelty": "2;2;2;3", "presentation": "2;2;3;4", "wc_summary": "83;52;89;76", "wc_strengths": "29;44;71;47", "wc_weaknesses": "477;224;175;71", "wc_questions": "30;72;21;163", "wc_limitations": "10;18;33;21", "wc_review": "629;410;389;378", "wc_reply_reviewers": "309;564;0;0", "wc_reply_authors": "872;603;0;0", "reply_reviewers": "2;2;0;0", "reply_authors": "4;3;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 75.0, 14.053469322555197 ], "wc_strengths_avg": [ 47.75, 15.056144924913548 ], "wc_weaknesses_avg": [ 236.75, 149.30568475446606 ], "wc_questions_avg": [ 71.5, 56.224994441973934 ], "wc_limitations_avg": [ 20.5, 8.261355820929152 ], "wc_review_avg": [ 451.5, 103.12249996969624 ], "wc_reply_reviewers_avg": [ 218.25, 236.13806872251666 ], "wc_reply_authors_avg": [ 368.75, 380.81713131107955 ], "reply_reviewers_avg": [ 1.0, 1.0 ], "reply_authors_avg": [ 2.25, 1.299038105676658 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18294883714306024225&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "sysu.edu.cn;sysu.edu.cn;sysu.edu.cn;pcl.ac.cn;huawei.com;sysu.edu.cn;dataarobotics.com", "author_num": 7, "aff_unique_index": "0;0;0;1;2;0;3", "aff_unique_norm": "Sun Yat-sen University;Pengcheng Laboratory;Huawei;Dataa Robotics", "aff_unique_dep": ";;Noah's Ark Lab;", "aff_unique_url": "http://www.sysu.edu.cn;;https://www.huawei.com;", "aff_unique_abbr": "SYSU;;Huawei;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;1", "aff_country_unique": "China;Unknown" }, { "title": "In-Context Learning State Vector with Inner and Momentum Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94117", "id": "gnnmB7y0Xx", "proceeding": "", "pdf": "https://openreview.net/pdf?id=gnnmB7y0Xx", "openreview": "https://openreview.net/forum?id=gnnmB7y0Xx", "poster": "", "project": "", "author_site": "Dongfang Li, zhenyu liu, Xinshuo Hu, Zetian Sun, Baotian Hu, Min Zhang", "tldr": "", "abstract": "Large Language Models (LLMs) have exhibited an impressive ability to perform In-Context Learning (ICL) from only a few examples. Recent works have indicated that the functions learned by ICL can be represented through compressed vectors derived from the transformer. However, the working mechanisms and optimization of these vectors are yet to be thoroughly explored. In this paper, we address this gap by presenting a comprehensive analysis of these compressed vectors, drawing parallels to the parameters trained with gradient descent, and introducing the concept of state vector. Inspired by the works on model soup and momentum-based gradient descent, we propose inner and momentum optimization methods that are applied to refine the state vector progressively as test-time adaptation. Moreover, we simulate state vector aggregation in the multiple example setting, where demonstrations comprising numerous examples are usually too lengthy for regular ICL, and further propose a divide-and-conquer aggregation method to address this challenge. We conduct extensive experiments using Llama-2 and GPT-J in both zero-shot setting and few-shot setting. The experimental results show that our optimization method effectively enhances the state vector and achieves the state-of-the-art performance on diverse tasks.", "keywords": "in-context learning;language models;task representations;mechanistic interpretability", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/9fc9b507eff83444f3c765bf888863ca5309b68a.zip", "author": "Dongfang Li;zhenyu liu;Xinshuo Hu;Zetian Sun;Baotian Hu;Min Zhang", "authorids": "~Dongfang_Li2;~zhenyu_liu4;~Xinshuo_Hu2;~Zetian_Sun1;~Baotian_Hu1;~Min_Zhang9", "gender": "M;M;M;M;M;M", "homepage": ";;https://github.com/zetian1025;;https://zhangmin-nlp-ai.github.io/;http://crazyofapple.github.io", "dblp": ";;;155/1902;83/5342-5;https://dblp.uni-trier.de/pid/98/6118.html", "google_scholar": ";Z3Cq8-4AAAAJ;;5NiJ1VoAAAAJ;https://scholar.google.com/citations?hl=zh-CN;_OOzj40AAAAJ", "orcid": ";;;0000-0001-7490-684X;;", "linkedin": "foggyforest/;;;;;", "or_profile": "~zhenyu_liu4;~Xinshuo_Hu2;~Zetian_Sun1;~Baotian_Hu1;~Min_Zhang9;~dongfang_li1", "aff": "Harbin Institute of Technology;Harbin Institute of Technology;Harbin Institute of Technology;Harbin Institute of Technology, Shenzhen;Harbin Institute of Technology, Shenzhen;Harbin Institute of Technology", "aff_domain": "hit.edu.cn;hit.edu.cn;stu.hit.edu.cn;hhit.edu.cn;hit.edu.cn;hit.edu.cn", "position": "MS student;MS student;MS student;Associate Professor;Full Professor;Postdoc", "bibtex": "@inproceedings{\nli2024incontext,\ntitle={In-Context Learning State Vector with Inner and Momentum Optimization},\nauthor={Dongfang Li and zhenyu liu and Xinshuo Hu and Zetian Sun and Baotian Hu and Min Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=gnnmB7y0Xx}\n}", "github": "", "reviewers": "4gb3;yzeU;pJQh;MvM2;FZSw", "pdf_size": 5206288, "rating": "4;4;6;7;7", "confidence": "4;4;3;5;4", "soundness": "3;2;3;3;3", "novelty": "2;2;3;3;3", "presentation": "3;2;2;2;3", "wc_summary": "33;52;87;90;82", "wc_strengths": "63;41;48;50;74", "wc_weaknesses": "59;124;91;79;87", "wc_questions": "22;2;55;211;189", "wc_limitations": "1;2;43;1;2", "wc_review": "178;221;324;431;434", "wc_reply_reviewers": "192;0;295;55;0", "wc_reply_authors": "1018;118;478;147;118", "reply_reviewers": "1;0;1;1;0", "reply_authors": "4;2;2;3;2", "rating_avg": [ 5.6, 1.3564659966250536 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 68.8, 22.444598459317557 ], "wc_strengths_avg": [ 55.2, 11.7881296226331 ], "wc_weaknesses_avg": [ 88.0, 21.109239683134017 ], "wc_questions_avg": [ 95.8, 87.02505386381556 ], "wc_limitations_avg": [ 9.8, 16.60602300371766 ], "wc_review_avg": [ 317.6, 105.13724363896935 ], "wc_reply_reviewers_avg": [ 108.4, 116.75718393315248 ], "wc_reply_authors_avg": [ 375.8, 348.75114336730144 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 2.6, 0.8 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.23312620206007845, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13467769071575363789&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "hit.edu.cn;hit.edu.cn;stu.hit.edu.cn;hhit.edu.cn;hit.edu.cn;hit.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Harbin Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.hit.edu.cn/", "aff_unique_abbr": "HIT", "aff_campus_unique_index": "0;0;0;1;1;0", "aff_campus_unique": "Harbin;Shenzhen", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Boosting Vision-Language Models with Transduction", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94116", "id": "go4zzXBWVs", "proceeding": "", "pdf": "https://openreview.net/pdf?id=go4zzXBWVs", "openreview": "https://openreview.net/forum?id=go4zzXBWVs", "poster": "/media/PosterPDFs/NeurIPS%202024/94116.png?t=1731679301.332827", "project": "", "author_site": "Maxime Zanella, Beno\u00eet G\u00e9rin, Ismail Ayed", "tldr": "", "abstract": "Transduction is a powerful paradigm that leverages the structure of unlabeled data to boost predictive accuracy. We present TransCLIP, a novel and computationally efficient transductive approach designed for Vision-Language Models (VLMs). TransCLIP is applicable as a plug-and-play module on top of popular inductive zero- and few-shot models, consistently improving their performances. Our new objective function can be viewed as a regularized maximum-likelihood estimation, constrained by a KL divergence penalty that integrates the text-encoder knowledge and guides the transductive learning process. We further derive an iterative Block Majorize-Minimize (BMM) procedure for optimizing our objective, with guaranteed convergence and decoupled sample-assignment updates, yielding computationally efficient transduction for large-scale datasets. We report comprehensive evaluations, comparisons, and ablation studies that demonstrate: (i) Transduction can greatly enhance the generalization capabilities of inductive pretrained zero- and few-shot VLMs; (ii) TransCLIP substantially outperforms standard transductive few-shot learning methods relying solely on vision features, notably due to the KL-based language constraint.", "keywords": "Vision-Language;zero-shot;transduction;unsupervised learning;few-shot", "primary_area": "machine_vision", "supplementary_material": "", "author": "Maxime Zanella;Beno\u00eet G\u00e9rin;Ismail Ben Ayed", "authorids": "~Maxime_Zanella1;~Beno\u00eet_G\u00e9rin1;~Ismail_Ben_Ayed1", "gender": "M;M;M", "homepage": ";https://pilab.be/about-me/?p=benoit_gerin;https://profs.etsmtl.ca/ibenayed/", "dblp": "334/0835;;68/4478", "google_scholar": "FIoE9YIAAAAJ;MgT_oMAAAAAJ;https://scholar.google.ca/citations?user=29vyUccAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Maxime_Zanella1;~Beno\u00eet_G\u00e9rin1;~Ismail_Ben_Ayed1", "aff": "Universit\u00e9 Catholique de Louvain;Universit\u00e9 Catholique de Louvain;\u00c9cole de technologie sup\u00e9rieure, Universit\u00e9 du Qu\u00e9bec", "aff_domain": "uclouvain.be;ucl.ac.be;etsmtl.ca", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nzanella2024boosting,\ntitle={Boosting Vision-Language Models with Transduction},\nauthor={Maxime Zanella and Beno{\\^\\i}t G{\\'e}rin and Ismail Ben Ayed},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=go4zzXBWVs}\n}", "github": "", "reviewers": "bXXg;uSPt;AvCq;aJGT;NaRY", "pdf_size": 565331, "rating": "5;5;5;6;7", "confidence": "4;4;4;4;2", "soundness": "2;2;3;3;3", "novelty": "3;2;3;3;3", "presentation": "3;2;3;3;3", "wc_summary": "177;67;81;160;73", "wc_strengths": "123;80;64;117;32", "wc_weaknesses": "284;259;162;151;231", "wc_questions": "5;3;2;89;3", "wc_limitations": "9;3;95;19;8", "wc_review": "598;412;404;536;347", "wc_reply_reviewers": "25;0;25;30;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;0;1;1;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.6, 0.8 ], "confidence_avg": [ 3.6, 0.8 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 111.6, 46.97914430893777 ], "wc_strengths_avg": [ 83.2, 33.84316770043845 ], "wc_weaknesses_avg": [ 217.4, 52.59125402574082 ], "wc_questions_avg": [ 20.4, 34.313845602030675 ], "wc_limitations_avg": [ 26.8, 34.49289781969616 ], "wc_review_avg": [ 459.4, 92.7655108324209 ], "wc_reply_reviewers_avg": [ 16.0, 13.19090595827292 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.875, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3194689844176400835&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 4, "email": "uclouvain.be;ucl.ac.be;etsmtl.ca", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Universit\u00e9 catholique de Louvain;Universit\u00e9 du Qu\u00e9bec", "aff_unique_dep": ";", "aff_unique_url": "https://www.uclouvain.be;https://www.etsmtl.ca", "aff_unique_abbr": "UCLouvain;ETS", "aff_campus_unique_index": "1", "aff_campus_unique": ";\u00c9cole de technologie sup\u00e9rieure", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Belgium;Canada" }, { "title": "Visual Autoregressive Modeling: Scalable Image Generation via Next-Scale Prediction", "status": "Oral", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94115", "id": "gojL67CfS8", "proceeding": "", "pdf": "https://openreview.net/pdf?id=gojL67CfS8", "openreview": "https://openreview.net/forum?id=gojL67CfS8", "poster": "/media/PosterPDFs/NeurIPS%202024/94115.png?t=1733113950.6477468", "project": "", "author_site": "Keyu Tian, Yi Jiang, Zehuan Yuan, BINGYUE PENG, Liwei Wang", "tldr": "", "abstract": "We present Visual AutoRegressive modeling (VAR), a new generation paradigm that redefines the autoregressive learning on images as coarse-to-fine \"next-scale prediction\" or \"next-resolution prediction\", diverging from the standard raster-scan \"next-token prediction\". This simple, intuitive methodology allows autoregressive (AR) transformers to learn visual distributions fast and generalize well: VAR, for the first time, makes GPT-style AR models surpass diffusion transformers in image generation. On ImageNet 256x256 benchmark, VAR significantly improve AR baseline by improving Frechet inception distance (FID) from 18.65 to 1.73, inception score (IS) from 80.4 to 350.2, with around 20x faster inference speed. It is also empirically verified that VAR outperforms the Diffusion Transformer (DiT) in multiple dimensions including image quality, inference speed, data efficiency, and scalability. Scaling up VAR models exhibits clear power-law scaling laws similar to those observed in LLMs, with linear correlation coefficients near -0.998 as solid evidence. VAR further showcases zero-shot generalization ability in downstream tasks including image in-painting, out-painting, and editing. These results suggest VAR has initially emulated the two important properties of LLMs: Scaling Laws and zero-shot task generalization. We have released all models and codes to promote the exploration of AR/VAR models for visual generation and unified learning.", "keywords": "Language Models;Autoregressive Modeling;Scaling Laws;Generative Model;Image Generation;Image Synthesis", "primary_area": "generative_models", "supplementary_material": "/attachment/9962551f028543b6cadc22c25b0c79992f6422b8.zip", "author": "Keyu Tian;Yi Jiang;Zehuan Yuan;BINGYUE PENG;Liwei Wang", "authorids": "~Keyu_Tian1;~Yi_Jiang2;~Zehuan_Yuan1;~BINGYUE_PENG1;~Liwei_Wang1", "gender": "M;M;M;M;M", "homepage": ";https://enjoyyi.github.io/;https://shallowyuan.github.io/;https://www.linkedin.com/in/bingyp/;http://www.liweiwang-pku.com/", "dblp": "265/5609;;227/3298;57/11335.html;", "google_scholar": "6FdkbygAAAAJ;https://scholar.google.com.hk/citations?user=6dikuoYAAAAJ;;;VZHxoh8AAAAJ", "orcid": "0000-0001-5909-2091;0000-0002-2133-8719;;;", "linkedin": "keyu-tian/;;;;", "or_profile": "~Keyu_Tian1;~Yi_Jiang2;~Zehuan_Yuan1;~BINGYUE_PENG1;~Liwei_Wang1", "aff": "Peking University;Bytedance;ByteDance Inc.;ByteDance Inc.;Peking University", "aff_domain": "pku.edu.cn;bytedance.com;bytedance.com;bytedance.com;pku.edu.cn", "position": "MS student;Researcher;Researcher;Researcher;Full Professor", "bibtex": "@inproceedings{\ntian2024visual,\ntitle={Visual Autoregressive Modeling: Scalable Image Generation via Next-Scale Prediction},\nauthor={Keyu Tian and Yi Jiang and Zehuan Yuan and BINGYUE PENG and Liwei Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=gojL67CfS8}\n}", "github": "", "reviewers": "Hisd;dFai;yx9H;oCJa", "pdf_size": 6208706, "rating": "7;8;8;8", "confidence": "4;4;5;4", "soundness": "3;4;3;3", "novelty": "4;4;4;4", "presentation": "3;3;4;4", "wc_summary": "73;84;63;63", "wc_strengths": "61;117;95;78", "wc_weaknesses": "131;220;85;301", "wc_questions": "21;173;328;56", "wc_limitations": "9;11;24;2", "wc_review": "295;605;595;500", "wc_reply_reviewers": "12;43;288;361", "wc_reply_authors": "30;412;976;596", "reply_reviewers": "1;1;3;3", "reply_authors": "2;3;4;4", "rating_avg": [ 7.75, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 4.0, 0.0 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 70.75, 8.671072598012312 ], "wc_strengths_avg": [ 87.75, 20.72890493972125 ], "wc_weaknesses_avg": [ 184.25, 83.05833793184162 ], "wc_questions_avg": [ 144.5, 119.96770398736487 ], "wc_limitations_avg": [ 11.5, 7.952986860293433 ], "wc_review_avg": [ 498.75, 124.56800351615178 ], "wc_reply_reviewers_avg": [ 176.0, 151.1241211719691 ], "wc_reply_authors_avg": [ 503.5, 340.7297316055645 ], "reply_reviewers_avg": [ 2.0, 1.0 ], "reply_authors_avg": [ 3.25, 0.82915619758885 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 256, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2627565997597441780&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "pku.edu.cn;bytedance.com;bytedance.com;bytedance.com;pku.edu.cn", "author_num": 5, "aff_unique_index": "0;1;1;1;0", "aff_unique_norm": "Peking University;ByteDance", "aff_unique_dep": ";", "aff_unique_url": "http://www.pku.edu.cn;https://www.bytedance.com", "aff_unique_abbr": "Peking U;Bytedance", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Diffusion4D: Fast Spatial-temporal Consistent 4D generation via Video Diffusion Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94114", "id": "grrefkWEES", "proceeding": "", "pdf": "https://openreview.net/pdf?id=grrefkWEES", "openreview": "https://openreview.net/forum?id=grrefkWEES", "poster": "/media/PosterPDFs/NeurIPS%202024/94114.png?t=1733499485.6725283", "project": "", "author_site": "HANWEN LIANG, Yuyang Yin, Dejia Xu, hanxue liang, Zhangyang "Atlas" Wang, Konstantinos N Plataniotis, Yao Zhao, Yunchao Wei", "tldr": "", "abstract": "The availability of large-scale multimodal datasets and advancements in diffusion models have significantly accelerated progress in 4D content generation. Most prior approaches rely on multiple images or video diffusion models, utilizing score distillation sampling for optimization or generating pseudo novel views for direct supervision. However, these methods are hindered by slow optimization speeds and multi-view inconsistency issues. Spatial and temporal consistency in 4D geometry has been extensively explored respectively in 3D-aware diffusion models and traditional monocular video diffusion models. Building on this foundation, we propose a strategy to migrate the temporal consistency in video diffusion models to the spatial-temporal consistency required for 4D generation. Specifically, we present a novel framework, \\textbf{Diffusion4D}, for efficient and scalable 4D content generation. Leveraging a meticulously curated dynamic 3D dataset, we develop a 4D-aware video diffusion model capable of synthesizing orbital views of dynamic 3D assets. To control the dynamic strength of these assets, we introduce a 3D-to-4D motion magnitude metric as guidance. Additionally, we propose a novel motion magnitude reconstruction loss and 3D-aware classifier-free guidance to refine the learning and generation of motion dynamics. After obtaining orbital views of the 4D asset, we perform explicit 4D construction with Gaussian splatting in a coarse-to-fine manner. Extensive experiments demonstrate that our method surpasses prior state-of-the-art techniques in terms of generation efficiency and 4D geometry consistency across various prompt modalities.", "keywords": "4D generation;diffusion model;spatial-temporal consistency", "primary_area": "generative_models", "supplementary_material": "/attachment/3d05b7a01b8f1fc9a8ccd286920dd0ac712616ea.zip", "author": "HANWEN LIANG;Yuyang Yin;Dejia Xu;hanxue liang;Zhangyang Wang;Konstantinos N Plataniotis;Yao Zhao;Yunchao Wei", "authorids": "~HANWEN_LIANG1;~Yuyang_Yin1;~Dejia_Xu1;~hanxue_liang1;~Zhangyang_Wang1;~Konstantinos_N_Plataniotis1;~Yao_Zhao1;~Yunchao_Wei1", "gender": "M;M;M;M;M;M;M;M", "homepage": ";https://yuyangyin.github.io/;https://ir1d.github.io;https://hanxuel.github.io/;https://vita-group.github.io;http://mepro.bjtu.edu.cn;https://weiyc.github.io/;https://www.comm.utoronto.ca/~kostas/", "dblp": "248/9332;354/8945;264/5685;295/9018;119/4026;45/2091-1.html;118/5394;p/KonstantinosNPlataniotis", "google_scholar": "https://scholar.google.ca/citations?user=mrOHvI8AAAAJ;;ET0e93cAAAAJ;https://scholar.google.com/citations?view_op=list_works;pxFyKAIAAAAJ;474TbQYAAAAJ;https://scholar.google.com.sg/citations?user=qL9Csv0AAAAJ;https://scholar.google.ca/citations?hl=en", "orcid": ";;;;;;;0000-0003-3647-5473", "linkedin": ";;;hanxue-charles-liang-78b581177/;;;;", "or_profile": "~HANWEN_LIANG1;~Yuyang_Yin1;~Dejia_Xu1;~hanxue_liang1;~Zhangyang_Wang1;~Yao_Zhao1;~Yunchao_Wei1;~Kostantinos_Plataniotis1", "aff": "University of Toronto;Beijing Jiaotong University;University of Texas at Austin;University of Cambridge;University of Texas at Austin;Beijing Jiaotong University;Beijing Jiaotong University;Toronto University", "aff_domain": "utoronto.ca;bjtu.edu.cn;utexas.edu;cam.ac.uk;utexas.edu;bjtu.edu.cn;bjtu.edu.cn;utoronto.ca", "position": "Researcher;PhD student;PhD student;PhD student;Associate Professor;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nliang2024diffusiond,\ntitle={Diffusion4D: Fast Spatial-temporal Consistent 4D generation via Video Diffusion Models},\nauthor={HANWEN LIANG and Yuyang Yin and Dejia Xu and hanxue liang and Zhangyang Wang and Konstantinos N Plataniotis and Yao Zhao and Yunchao Wei},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=grrefkWEES}\n}", "github": "", "reviewers": "mXZW;BwxH;G7WS;fabU", "pdf_size": 35057300, "rating": "5;6;6;7", "confidence": "3;4;4;3", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "3;3;2;3", "wc_summary": "46;68;111;79", "wc_strengths": "52;65;118;38", "wc_weaknesses": "144;143;379;28", "wc_questions": "2;1;135;28", "wc_limitations": "9;1;15;1", "wc_review": "253;278;758;174", "wc_reply_reviewers": "38;0;26;10", "wc_reply_authors": "64;0;318;69", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;3;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 76.0, 23.441416339462084 ], "wc_strengths_avg": [ 68.25, 30.26858932953434 ], "wc_weaknesses_avg": [ 173.5, 127.67243241984544 ], "wc_questions_avg": [ 41.5, 55.056788863863105 ], "wc_limitations_avg": [ 6.5, 5.894913061275798 ], "wc_review_avg": [ 365.75, 229.6958586914444 ], "wc_reply_reviewers_avg": [ 18.5, 14.585952145814822 ], "wc_reply_authors_avg": [ 112.75, 121.58407584877223 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4619043377812920774&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "utoronto.ca;bjtu.edu.cn;utexas.edu;cam.ac.uk;utexas.edu;bjtu.edu.cn;bjtu.edu.cn;utoronto.ca", "author_num": 8, "aff_unique_index": "0;1;2;3;2;1;1;0", "aff_unique_norm": "University of Toronto;Beijing Jiao Tong University;University of Texas at Austin;University of Cambridge", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.utoronto.ca;http://www.njtu.edu.cn/en;https://www.utexas.edu;https://www.cam.ac.uk", "aff_unique_abbr": "U of T;BJTU;UT Austin;Cambridge", "aff_campus_unique_index": "1;2;1", "aff_campus_unique": ";Austin;Cambridge", "aff_country_unique_index": "0;1;2;3;2;1;1;0", "aff_country_unique": "Canada;China;United States;United Kingdom" }, { "id": "gsott1UXlq", "title": "Transparent Networks for Multivariate Time Series", "track": "main", "status": "Reject", "tldr": "", "abstract": "Transparent models, which are machine learning models that produce inherently interpretable predictions, are receiving significant attention in high-stakes domains. However, despite much real-world data being collected as time series, there is a lack of studies on transparent time series models. To address this gap, we propose a novel transparent neural network model for time series called Generalized Additive Time Series Model (GATSM). GATSM consists of two parts: 1) independent feature networks to learn feature representations, and 2) a transparent temporal module to learn temporal patterns across different time steps using the feature representations. This structure allows GATSM to effectively capture temporal patterns and handle dynamic-length time series while preserving transparency. Empirical experiments show that GATSM significantly outperforms existing generalized additive models and achieves comparable performance to black-box time series models, such as recurrent neural networks and Transformer. In addition, we demonstrate that GATSM finds interesting patterns in time series. The source code is available at https://anonymous.4open.science/r/GATSM-78F4/.", "keywords": "Generalized Additive Models;Interpretability;Time Series;Transparent Model", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Minkyu Kim;Suan Lee;Jinho Kim", "authorids": "~Minkyu_Kim3;~Suan_Lee1;~Jinho_Kim1", "gender": "M;M;M", "homepage": ";http://suanlab.com;https://cs.kangwon.ac.kr/~jhkim", "dblp": ";14/3121.html;", "google_scholar": "yRw4m2gAAAAJ;mK5U7hgAAAAJ;V1N15zMAAAAJ", "orcid": "my-orcid?orcid=0000-0003-1122-5363;0000-0002-3047-1167;", "linkedin": ";suan-lee-46aaa15b/;", "or_profile": "~Minkyu_Kim3;~Suan_Lee1;~Jinho_Kim1", "aff": "ZIOVISION;Semyung University;", "aff_domain": "ziovision.co.kr;semyung.ac.kr;", "position": "Researcher;Assistant Professor;", "bibtex": "@misc{\nanonymous2024transparent,\ntitle={Transparent Networks for Multivariate Time Series},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=gsott1UXlq}\n}", "github": "", "project": "", "reviewers": "JRxn;1uUQ;f5AT;cPMr", "site": "https://openreview.net/forum?id=gsott1UXlq", "pdf_size": 554648, "rating": "4;5;5;6", "confidence": "2;4;4;1", "soundness": "2;2;2;3", "novelty": "2;2;3;2", "presentation": "3;3;2;3", "wc_summary": "88;27;68;46", "wc_strengths": "23;32;72;76", "wc_weaknesses": "126;208;75;104", "wc_questions": "54;3;18;59", "wc_limitations": "1;38;1;46", "wc_review": "292;308;234;331", "wc_reply_reviewers": "61;22;32;52", "wc_reply_authors": "326;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 2.75, 1.299038105676658 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 57.25, 22.92787604642 ], "wc_strengths_avg": [ 50.75, 23.509306667785847 ], "wc_weaknesses_avg": [ 128.25, 49.4690559845243 ], "wc_questions_avg": [ 33.5, 23.66960075708925 ], "wc_limitations_avg": [ 21.5, 20.694202086574876 ], "wc_review_avg": [ 291.25, 35.84253757757673 ], "wc_reply_reviewers_avg": [ 41.75, 15.497983739828868 ], "wc_reply_authors_avg": [ 81.5, 141.1621408168635 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.2721655269759087, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:weFyagcZO-8J:scholar.google.com/&scioq=Transparent+Networks+for+Multivariate+Time+Series&hl=en&as_sdt=0,5", "gs_version_total": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Ziovision;Semyung University", "aff_unique_dep": ";", "aff_unique_url": ";http://www.semyung.ac.kr", "aff_unique_abbr": ";Semyung U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1", "aff_country_unique": ";South Korea" }, { "title": "Brain-JEPA: Brain Dynamics Foundation Model with Gradient Positioning and Spatiotemporal Masking", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94113", "id": "gtU2eLSAmO", "proceeding": "", "pdf": "https://openreview.net/pdf?id=gtU2eLSAmO", "openreview": "https://openreview.net/forum?id=gtU2eLSAmO", "poster": "/media/PosterPDFs/NeurIPS%202024/94113.png?t=1730451883.7057018", "project": "", "author_site": "Zijian Dong, Ruilin Li, Yilei Wu, Thuan Tinh Nguyen, Joanna Chong, Fang Ji, Nathanael Tong, Christopher Chen, Juan Helen Zhou", "tldr": "", "abstract": "We introduce *Brain-JEPA*, a brain dynamics foundation model with the Joint-Embedding Predictive Architecture (JEPA). This pioneering model achieves state-of-the-art performance in demographic prediction, disease diagnosis/prognosis, and trait prediction through fine-tuning. Furthermore, it excels in off-the-shelf evaluations (e.g., linear probing) and demonstrates superior generalizability across different ethnic groups, surpassing the previous large model for brain activity significantly. Brain-JEPA incorporates two innovative techniques: **Brain Gradient Positioning** and **Spatiotemporal Masking**. Brain Gradient Positioning introduces a functional coordinate system for brain functional parcellation, enhancing the positional encoding of different Regions of Interest (ROIs). Spatiotemporal Masking, tailored to the unique characteristics of fMRI data, addresses the challenge of heterogeneous time-series patches. These methodologies enhance model performance and advance our understanding of the neural circuits underlying cognition. Overall, Brain-JEPA is paving the way to address pivotal questions of building brain functional coordinate system and masking brain activity at the AI-neuroscience interface, and setting a potentially new paradigm in brain activity analysis through downstream adaptation.", "keywords": "foundation model;fMRI", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "", "author": "Zijian Dong;Li Ruilin;Yilei Wu;Thuan Tinh Nguyen;Joanna Su Xian Chong;Fang Ji;Nathanael Ren Jie Tong;Christopher Li Hsian Chen;Juan Helen Zhou", "authorids": "~Zijian_Dong2;~Li_Ruilin1;~Yilei_Wu1;~Thuan_Tinh_Nguyen1;~Joanna_Su_Xian_Chong1;~Fang_Ji1;~Nathanael_Ren_Jie_Tong1;~Christopher_Li_Hsian_Chen1;~Juan_Helen_Zhou1", "gender": "M;M;M;;;M;;M;F", "homepage": "https://zijiand.github.io/;;https://github.com/yilei-wu;;;;;;https://discovery.nus.edu.sg/3278-juan-helen-zhou/about", "dblp": ";;;;220/2613;;;;247/7929.html", "google_scholar": "xKUU2_EAAAAJ;vEYtdJoAAAAJ;;;44SVVLUAAAAJ;gkA7PnQAAAAJ;;;https://scholar.google.com.sg/citations?user=4Z1S3_oAAAAJ", "orcid": "0009-0008-4690-137X;0000-0002-1979-8767;;0009-0007-1216-7747;0009-0008-5077-5206;;;0000-0002-1047-9225;0000-0002-0180-8648", "linkedin": ";;;;;;nathanael-tong-0754951aa/;;juan-helen-zhou2011", "or_profile": "~Zijian_Dong2;~Li_Ruilin1;~Yilei_Wu1;~Thuan_Tinh_Nguyen1;~Joanna_Su_Xian_Chong1;~Fang_Ji1;~Nathanael_Ren_Jie_Tong1;~Christopher_Li_Hsian_Chen1;~Juan_Helen_Zhou1", "aff": "National University of Singapore;Nanyang Technological University;National University of Singapore;National University of Singapore;National University of Singapore;National University of Singapore;National University of Singapore;National University of Singapore;National University of Singapore", "aff_domain": "u.nus.edu;ntu.edu.sg;u.nus.edu;nus.edu.sg;nus.edu.sg;nus.edu;nus.edu.sg;nus.edu.sg;nus.edu.sg", "position": "PhD student;PhD student;PhD student;PhD student;Postdoc;Researcher;Researcher;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\ndong2024brainjepa,\ntitle={Brain-{JEPA}: Brain Dynamics Foundation Model with Gradient Positioning and Spatiotemporal Masking},\nauthor={Zijian Dong and Li Ruilin and Yilei Wu and Thuan Tinh Nguyen and Joanna Su Xian Chong and Fang Ji and Nathanael Ren Jie Tong and Christopher Li Hsian Chen and Juan Helen Zhou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=gtU2eLSAmO}\n}", "github": "", "reviewers": "iLSi;M77o;GJHn;fytG", "pdf_size": 1162154, "rating": "6;6;7;8", "confidence": "3;5;4;4", "soundness": "3;3;4;3", "novelty": "3;3;4;3", "presentation": "3;3;3;4", "wc_summary": "106;43;209;145", "wc_strengths": "92;37;117;58", "wc_weaknesses": "93;256;474;93", "wc_questions": "648;108;134;2", "wc_limitations": "1;7;89;5", "wc_review": "940;451;1023;303", "wc_reply_reviewers": "0;64;253;51", "wc_reply_authors": "0;179;140;65", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 125.75, 60.28836952514141 ], "wc_strengths_avg": [ 76.0, 30.748983723043597 ], "wc_weaknesses_avg": [ 229.0, 156.32178351080825 ], "wc_questions_avg": [ 223.0, 250.30581295687082 ], "wc_limitations_avg": [ 25.5, 36.72533185690771 ], "wc_review_avg": [ 679.25, 308.1463734980504 ], "wc_reply_reviewers_avg": [ 92.0, 95.98176910226233 ], "wc_reply_authors_avg": [ 96.0, 68.92387104624927 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14500624398403414080&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "u.nus.edu;ntu.edu.sg;u.nus.edu;nus.edu.sg;nus.edu.sg;nus.edu;nus.edu.sg;nus.edu.sg;nus.edu.sg", "author_num": 9, "aff_unique_index": "0;1;0;0;0;0;0;0;0", "aff_unique_norm": "National University of Singapore;Nanyang Technological University", "aff_unique_dep": ";", "aff_unique_url": "https://www.nus.edu.sg;https://www.ntu.edu.sg", "aff_unique_abbr": "NUS;NTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "Singapore" }, { "title": "Nonparametric Classification on Low Dimensional Manifolds using Overparameterized Convolutional Residual Networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94112", "id": "guzWIg7ody", "proceeding": "", "pdf": "https://openreview.net/pdf?id=guzWIg7ody", "openreview": "https://openreview.net/forum?id=guzWIg7ody", "poster": "", "project": "", "author_site": "Zixuan Zhang, Kaiqi Zhang, Minshuo Chen, Yuma Takeda, Mengdi Wang, Tuo Zhao, Yu-Xiang Wang", "tldr": "", "abstract": "Convolutional residual neural networks (ConvResNets), though overparametersized, can achieve remarkable prediction performance in practice, which cannot be well explained by conventional wisdom. To bridge this gap, we study the performance of ConvResNeXts trained with weight decay, which cover ConvResNets as a special case, from the perspective of nonparametric classification. Our analysis allows for infinitely many building blocks in ConvResNeXts, and shows that weight decay implicitly enforces sparsity on these blocks. Specifically, we consider a smooth target function supported on a low-dimensional manifold, then prove that ConvResNeXts can adapt to the function smoothness and low-dimensional structures and efficiently learn the function without suffering from the curse of dimensionality. Our findings partially justify the advantage of overparameterized ConvResNeXts over conventional machine learning models.", "keywords": "Nonparametric Classification;Low Dimensional Manifolds;Overparameterized ResNets;Function Approximation.", "primary_area": "learning_theory", "supplementary_material": "", "author": "Zixuan Zhang;Kaiqi Zhang;Minshuo Chen;Yuma Takeda;Mengdi Wang;Tuo Zhao;Yu-Xiang Wang", "authorids": "~Zixuan_Zhang5;~Kaiqi_Zhang2;~Minshuo_Chen1;~Yuma_Takeda1;~Mengdi_Wang1;~Tuo_Zhao2;~Yu-Xiang_Wang1", "gender": "F;M;M;M;F;;M", "homepage": "https://www.isye.gatech.edu/users/zixuan-zhang;;https://minshuochen.github.io;;http://mwang.princeton.edu;http://www.cs.ucsb.edu/~yuxiangw/publications.html;http://www2.isye.gatech.edu/~tzhao80", "dblp": ";;217/1509;;;62/1637-3.html;", "google_scholar": ";XA7np8gAAAAJ;qU9WvTgAAAAJ;;;HGNZ1fkAAAAJ;EJXN6tYAAAAJ", "orcid": ";;;;;;", "linkedin": ";;;yuma-takeda-61207a293/;;;", "or_profile": "~Zixuan_Zhang5;~Kaiqi_Zhang2;~Minshuo_Chen1;~Yuma_Takeda1;~Mengdi_Wang1;~Yu-Xiang_Wang1;~Tuo_Zhao1", "aff": "Georgia Institute of Technology;UC Santa Barbara;Princeton University;the University of Tokyo;Princeton University;UC Santa Barbara;Georgia Institute of Technology", "aff_domain": "gatech.edu;ucsb.edu;princeton.edu;g.ecc.u-tokyo.ac.jp;princeton.edu;ucsb.edu;gatech.edu", "position": "PhD student;PhD student;Postdoc;MS student;Full Professor;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nzhang2024nonparametric,\ntitle={Nonparametric Classification on Low Dimensional Manifolds using Overparameterized Convolutional Residual Networks},\nauthor={Zixuan Zhang and Kaiqi Zhang and Minshuo Chen and Yuma Takeda and Mengdi Wang and Tuo Zhao and Yu-Xiang Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=guzWIg7ody}\n}", "github": "", "reviewers": "xShA;UzhH;QKy6;giSF", "pdf_size": 972872, "rating": "4;5;7;8", "confidence": "4;3;3;5", "soundness": "2;3;3;4", "novelty": "2;3;3;3", "presentation": "3;3;3;4", "wc_summary": "137;17;40;55", "wc_strengths": "53;25;28;45", "wc_weaknesses": "136;30;142;36", "wc_questions": "191;69;64;137", "wc_limitations": "22;22;4;1", "wc_review": "539;163;278;274", "wc_reply_reviewers": "193;0;25;63", "wc_reply_authors": "363;63;63;18", "reply_reviewers": "1;0;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 6.0, 1.5811388300841898 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 62.25, 45.22927702274269 ], "wc_strengths_avg": [ 37.75, 11.648497757221744 ], "wc_weaknesses_avg": [ 86.0, 53.08483775994799 ], "wc_questions_avg": [ 115.25, 52.38499308007972 ], "wc_limitations_avg": [ 12.25, 9.807522622966516 ], "wc_review_avg": [ 313.5, 138.1312781378642 ], "wc_reply_reviewers_avg": [ 70.25, 74.33496821819459 ], "wc_reply_authors_avg": [ 126.75, 137.63061977626927 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.38138503569823695, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=884794093389877077&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "gatech.edu;ucsb.edu;princeton.edu;g.ecc.u-tokyo.ac.jp;princeton.edu;ucsb.edu;gatech.edu", "author_num": 7, "aff_unique_index": "0;1;2;3;2;1;0", "aff_unique_norm": "Georgia Institute of Technology;University of California, Santa Barbara;Princeton University;University of Tokyo", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.gatech.edu;https://www.ucsb.edu;https://www.princeton.edu;https://www.u-tokyo.ac.jp", "aff_unique_abbr": "Georgia Tech;UCSB;Princeton;UTokyo", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Santa Barbara", "aff_country_unique_index": "0;0;0;1;0;0;0", "aff_country_unique": "United States;Japan" }, { "title": "Diversify, Contextualize, and Adapt: Efficient Entropy Modeling for Neural Image Codec", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94111", "id": "gvg8pExqdd", "proceeding": "", "pdf": "https://openreview.net/pdf?id=gvg8pExqdd", "openreview": "https://openreview.net/forum?id=gvg8pExqdd", "poster": "", "project": "", "author_site": "Jun-Hyuk Kim, Seungeon Kim, Won-Hee Lee, Dokwan Oh", "tldr": "", "abstract": "Designing a fast and effective entropy model is challenging but essential for practical application of neural codecs. Beyond spatial autoregressive entropy models, more efficient backward adaptation-based entropy models have been recently developed. They not only reduce decoding time by using smaller number of modeling steps but also maintain or even improve rate--distortion performance by leveraging more diverse contexts for backward adaptation. Despite their significant progress, we argue that their performance has been limited by the simple adoption of the design convention for forward adaptation: using only a single type of hyper latent representation, which does not provide sufficient contextual information, especially in the first modeling step. In this paper, we propose a simple yet effective entropy modeling framework that leverages sufficient contexts for forward adaptation without compromising on bit-rate. Specifically, we introduce a strategy of diversifying hyper latent representations for forward adaptation, i.e., using two additional types of contexts along with the existing single type of context. In addition, we present a method to effectively use the diverse contexts for contextualizing the current elements to be encoded/decoded. By addressing the limitation of the previous approach, our proposed framework leads to significant performance improvements. Experimental results on popular datasets show that our proposed framework consistently improves rate-distortion performance across various bit-rate regions, e.g., 3.73\\% BD-rate gain over the state-of-the-art baseline on the Kodak dataset.", "keywords": "Neural codec;entropy model", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Jun-Hyuk Kim;Seungeon Kim;Won-Hee Lee;Dokwan Oh", "authorids": "~Jun-Hyuk_Kim1;~Seungeon_Kim1;~Won-Hee_Lee1;~Dokwan_Oh1", "gender": "M;M;;M", "homepage": "https://junhyukk.github.io/;;;", "dblp": "193/6547;;https://dblp.org/rec/journals/tip/LeeCR14.html;274/9571", "google_scholar": "A0io6mQAAAAJ;https://scholar.google.com/citations?hl=ko;Ilt6x_sAAAAJ;", "orcid": ";0000-0002-8370-8631;0000-0002-4053-0917;", "linkedin": ";;;dokwan-oh-18a26572/", "or_profile": "~Jun-Hyuk_Kim1;~Seungeon_Kim1;~Won-Hee_Lee1;~Dokwan_Oh1", "aff": "Samsung Advanced Institute of Technology;Samsung;SAIT;Samsung Advanced Institute of Technology", "aff_domain": "samsung.com;samsung.com;samsung.com;samsung.com", "position": "Researcher;Researcher;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nkim2024diversify,\ntitle={Diversify, Contextualize, and Adapt: Efficient Entropy Modeling for Neural Image Codec},\nauthor={Jun-Hyuk Kim and Seungeon Kim and Won-Hee Lee and Dokwan Oh},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=gvg8pExqdd}\n}", "github": "", "reviewers": "YQWe;hsbA;TpTZ;295Z", "pdf_size": 5959619, "rating": "5;5;6;6", "confidence": "1;4;4;1", "soundness": "3;4;3;3", "novelty": "2;2;2;3", "presentation": "3;4;3;3", "wc_summary": "54;195;73;74", "wc_strengths": "52;215;44;27", "wc_weaknesses": "28;144;303;22", "wc_questions": "6;2;174;61", "wc_limitations": "1;2;3;1", "wc_review": "141;558;597;185", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 2.5, 1.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 99.0, 55.995535536326464 ], "wc_strengths_avg": [ 84.5, 75.88313383091133 ], "wc_weaknesses_avg": [ 124.25, 114.08412466246125 ], "wc_questions_avg": [ 60.75, 69.41676670661059 ], "wc_limitations_avg": [ 1.75, 0.82915619758885 ], "wc_review_avg": [ 370.25, 208.28991214170694 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:z7ZpW-GFEDMJ:scholar.google.com/&scioq=Diversify,+Contextualize,+and+Adapt:+Efficient+Entropy+Modeling+for+Neural+Image+Codec&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "samsung.com;samsung.com;samsung.com;samsung.com", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Samsung;Southern Alberta Institute of Technology", "aff_unique_dep": "Samsung Advanced Institute of Technology;", "aff_unique_url": "https://www.sait.samsung.com;https://www.sait.ca", "aff_unique_abbr": "SAIT;SAIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "South Korea;Canada" }, { "title": "Image Copy Detection for Diffusion Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94110", "id": "gvlOQC6oP1", "proceeding": "", "pdf": "https://openreview.net/pdf?id=gvlOQC6oP1", "openreview": "https://openreview.net/forum?id=gvlOQC6oP1", "poster": "/media/PosterPDFs/NeurIPS%202024/94110.png?t=1727961210.5975814", "project": "", "author_site": "Wenhao Wang, Yifan Sun, Zhentao Tan, Yi Yang", "tldr": "", "abstract": "Images produced by diffusion models are increasingly popular in digital artwork and visual marketing. However, such generated images might replicate content from existing ones and pose the challenge of content originality. Existing Image Copy Detection (ICD) models, though accurate in detecting hand-crafted replicas, overlook the challenge from diffusion models. This motivates us to introduce ICDiff, the first ICD specialized for diffusion models. To this end, we construct a Diffusion-Replication (D-Rep) dataset and correspondingly propose a novel deep embedding method. D-Rep uses a state-of-the-art diffusion model (Stable Diffusion V1.5) to generate 40, 000 image-replica pairs, which are manually annotated into 6 replication levels ranging from 0 (no replication) to 5 (total replication). Our method, PDF-Embedding, transforms the replication level of each image-replica pair into a probability density function (PDF) as the supervision signal. The intuition is that the probability of neighboring replication levels should be continuous and smooth. Experimental results show that PDF-Embedding surpasses protocol-driven methods and non-PDF choices on the D-Rep test set. Moreover, by utilizing PDF-Embedding, we find that the replication ratios of well-known diffusion models against an open-source gallery range from 10% to 20%. The project is publicly available at https://icdiff.github.io/.", "keywords": "Image Copy Detection;Diffusion Models;Replicated Content", "primary_area": "machine_vision", "supplementary_material": "/attachment/8bccaacbf0f8c999ec0f0f29465703e5c77ad748.zip", "author": "Wenhao Wang;Yifan Sun;Zhentao Tan;Yi Yang", "authorids": "~Wenhao_Wang2;~Yifan_Sun2;~Zhentao_Tan2;~Yi_Yang22", "gender": "M;M;M;M", "homepage": "http://wangwenhao0716.github.io/;https://yifansun-reid.github.io;https://github.com/PKUTAN;https://person.zju.edu.cn/yiyang", "dblp": ";99/10261-3.html;;33/4854-1.html", "google_scholar": "k3mq3XMAAAAJ;uUZEL7UAAAAJ;jDsfBUwAAAAJ;RMSuNFwAAAAJ", "orcid": "0000-0001-8727-1572;0000-0003-3532-6521;;", "linkedin": ";;;", "or_profile": "~Wenhao_Wang2;~Yifan_Sun2;~Zhentao_Tan2;~Yi_Yang22", "aff": "University of Technology Sydney;Baidu;Peking University;Zhejiang University", "aff_domain": "uts.edu.au;baidu.com;pku.edu.cn;zju.edu.cn", "position": "PhD student;Senior Expert;MS student;Full Professor", "bibtex": "@inproceedings{\nwang2024image,\ntitle={Image Copy Detection for Diffusion Models},\nauthor={Wenhao Wang and Yifan Sun and Zhentao Tan and Yi Yang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=gvlOQC6oP1}\n}", "github": "", "reviewers": "vdEZ;2jDa;dJ32;2Ky2", "pdf_size": 30489368, "rating": "6;7;7;7", "confidence": "4;5;3;4", "soundness": "2;3;3;4", "novelty": "2;3;2;4", "presentation": "3;4;3;3", "wc_summary": "296;51;74;65", "wc_strengths": "85;71;77;32", "wc_weaknesses": "337;82;2;43", "wc_questions": "299;40;285;82", "wc_limitations": "1;14;2;1", "wc_review": "1018;258;440;223", "wc_reply_reviewers": "106;46;249;30", "wc_reply_authors": "29;21;349;20", "reply_reviewers": "1;1;2;1", "reply_authors": "2;2;3;2", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 121.5, 101.08041353298867 ], "wc_strengths_avg": [ 66.25, 20.38841582860228 ], "wc_weaknesses_avg": [ 116.0, 130.69238692441118 ], "wc_questions_avg": [ 176.5, 116.55578063742699 ], "wc_limitations_avg": [ 4.5, 5.5 ], "wc_review_avg": [ 484.75, 318.7031338095062 ], "wc_reply_reviewers_avg": [ 107.75, 86.33184522527014 ], "wc_reply_authors_avg": [ 104.75, 141.0609354144513 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:HjsXrpz1s0YJ:scholar.google.com/&scioq=Image+Copy+Detection+for+Diffusion+Models&hl=en&as_sdt=0,48", "gs_version_total": 6, "email": "uts.edu.au;baidu.com;pku.edu.cn;zju.edu.cn", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "University of Technology Sydney;Baidu;Peking University;Zhejiang University", "aff_unique_dep": ";Baidu, Inc.;;", "aff_unique_url": "https://www.uts.edu.au;https://www.baidu.com;http://www.pku.edu.cn;https://www.zju.edu.cn", "aff_unique_abbr": "UTS;Baidu;Peking U;ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "Australia;China" }, { "title": "Dual Cone Gradient Descent for Training Physics-Informed Neural Networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94109", "id": "gvtCR7dHJ3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=gvtCR7dHJ3", "openreview": "https://openreview.net/forum?id=gvtCR7dHJ3", "poster": "/media/PosterPDFs/NeurIPS%202024/94109.png?t=1729491516.6503792", "project": "", "author_site": "Youngsik Hwang, Dongyoung Lim", "tldr": "", "abstract": "Physics-informed neural networks (PINNs) have emerged as a prominent approach for solving partial differential equations (PDEs) by minimizing a combined loss function that incorporates both boundary loss and PDE residual loss. Despite their remarkable empirical performance in various scientific computing tasks, PINNs often fail to generate reasonable solutions, and such pathological behaviors remain difficult to explain and resolve. In this paper, we identify that PINNs can be adversely trained when gradients of each loss function exhibit a significant imbalance in their magnitudes and present a negative inner product value. To address these issues, we propose a novel optimization framework, *Dual Cone Gradient Descent* (DCGD), which adjusts the direction of the updated gradient to ensure it falls within a dual cone region. This region is defined as a set of vectors where the inner products with both the gradients of the PDE residual loss and the boundary loss are non-negative. Theoretically, we analyze the convergence properties of DCGD algorithms in a non-convex setting. On a variety of benchmark equations, we demonstrate that DCGD outperforms other optimization algorithms in terms of various evaluation metrics. In particular, DCGD achieves superior predictive accuracy and enhances the stability of training for failure modes of PINNs and complex PDEs, compared to existing optimally tuned models. Moreover, DCGD can be further improved by combining it with popular strategies for PINNs, including learning rate annealing and the Neural Tangent Kernel (NTK).", "keywords": "physics-informed neural networks;multi-objective optimization;scientific machine learning;gradient descent", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "/attachment/1bcae2435f8b0896336b02767d58b5658ab481bf.zip", "author": "Youngsik Hwang;Dongyoung Lim", "authorids": "~Youngsik_Hwang1;~Dongyoung_Lim1", "gender": ";M", "homepage": "https://sites.google.com/view/dlim/research?authuser=0;https://sites.google.com/view/dlim/", "dblp": "387/9649;", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "~Youngsik_Hwang1;~Dongyoung_Lim1", "aff": "Ulsan National Institute of Science and Technology;Ulsan National Institute of Science and Technology", "aff_domain": "unist.ac.kr;unist.ac.kr", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nhwang2024dual,\ntitle={Dual Cone Gradient Descent for Training Physics-Informed Neural Networks},\nauthor={Youngsik Hwang and Dongyoung Lim},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=gvtCR7dHJ3}\n}", "github": "", "reviewers": "NFZX;Bm5Z;SdDT;Nfs2", "pdf_size": 3928315, "rating": "6;6;7;8", "confidence": "3;4;4;4", "soundness": "3;3;3;4", "novelty": "3;3;3;4", "presentation": "2;3;3;4", "wc_summary": "98;141;174;179", "wc_strengths": "30;29;48;275", "wc_weaknesses": "281;29;2;475", "wc_questions": "95;7;312;264", "wc_limitations": "212;18;2;95", "wc_review": "716;224;538;1288", "wc_reply_reviewers": "176;17;180;49", "wc_reply_authors": "407;18;112;15", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 148.0, 32.349652239243625 ], "wc_strengths_avg": [ 95.5, 103.90981666810889 ], "wc_weaknesses_avg": [ 196.75, 194.02883162045788 ], "wc_questions_avg": [ 169.5, 123.68609461050987 ], "wc_limitations_avg": [ 81.75, 83.01317666491266 ], "wc_review_avg": [ 691.5, 386.8239263540972 ], "wc_reply_reviewers_avg": [ 105.5, 73.39107575175609 ], "wc_reply_authors_avg": [ 138.0, 160.1296349836594 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:3Sg6J--0dTAJ:scholar.google.com/&scioq=Dual+Cone+Gradient+Descent+for+Training+Physics-Informed+Neural+Networks&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": "unist.ac.kr;unist.ac.kr", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Ulsan National Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.unist.ac.kr", "aff_unique_abbr": "UNIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "title": "KptLLM: Unveiling the Power of Large Language Model for Keypoint Comprehension", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94108", "id": "gwd3MQufGP", "proceeding": "", "pdf": "https://openreview.net/pdf?id=gwd3MQufGP", "openreview": "https://openreview.net/forum?id=gwd3MQufGP", "poster": "/media/PosterPDFs/NeurIPS%202024/94108.png?t=1731318396.82413", "project": "", "author_site": "Jie Yang, Wang ZENG, Sheng Jin, Lumin Xu, Wentao Liu, Chen Qian, Ruimao Zhang", "tldr": "", "abstract": "Recent advancements in Multimodal Large Language Models (MLLMs) have greatly improved their abilities in image understanding. However, these models often struggle with grasping pixel-level semantic details, e.g., the keypoints of an object. To bridge this gap, we introduce the novel challenge of Semantic Keypoint Comprehension, which aims to comprehend keypoints across different task scenarios, including keypoint semantic understanding, visual prompt-based keypoint detection, and textual prompt-based keypoint detection. Moreover, we introduce KptLLM, a unified multimodal model that utilizes an identify-then-detect strategy to effectively address these challenges. KptLLM underscores the initial discernment of semantics in keypoints, followed by the precise determination of their positions through a chain-of-thought process. With several carefully designed modules, KptLLM adeptly handles various modality inputs, facilitating the interpretation of both semantic contents and keypoint locations. Our extensive experiments demonstrate KptLLM's superiority in various keypoint detection benchmarks and its unique semantic capabilities in interpreting keypoints.", "keywords": "Keypoint Detection;Pose Estimation;Multimodal Large Language Model", "primary_area": "machine_vision", "supplementary_material": "", "author": "Jie Yang;Wang ZENG;Sheng Jin;Lumin Xu;Wentao Liu;Chen Qian;Ruimao Zhang", "authorids": "~Jie_Yang20;~Wang_ZENG1;~Sheng_Jin1;~Lumin_Xu1;~Wentao_Liu1;~Chen_Qian1;~Ruimao_Zhang1", "gender": "M;M;M;;M;M;M", "homepage": "https://yangjie-cv.github.io/;;https://jin-s13.github.io/;http://mmlab.ie.cuhk.edu.hk;;;http://zhangruimao.site/#", "dblp": ";;70/6780-7;212/1288;30/3943-2;;54/10697", "google_scholar": "UVzG9IcAAAAJ;https://scholar.google.com/citations?hl=zh-CN;wrNd--oAAAAJ;;KZn9NWEAAAAJ;AerkT0YAAAAJ;ZJwZdtgAAAAJ", "orcid": ";;0000-0001-5736-7434;;;;", "linkedin": ";;;;;;", "or_profile": "~Jie_Yang20;~Wang_ZENG1;~Sheng_Jin1;~Lumin_Xu1;~Wentao_Liu1;~Chen_Qian1;~Ruimao_Zhang1", "aff": "The Chinese University of Hong Kong, Shenzhen;Sensetime;The University of Hong Kong;The Chinese University of Hong Kong;Sensetime;Tsinghua University;The Chinese University of Hong Kong (Shenzhen)", "aff_domain": "cuhk.edu.cn;sensetime.com;hku.hk;cuhk.edu.hk;sensetime.com;mails.tsinghua.edu.cn;cuhk.edu.cn", "position": "PhD student;Researcher;PhD student;PhD student;Senior Researcher;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nyang2024kptllm,\ntitle={Kpt{LLM}: Unveiling the Power of Large Language Model for Keypoint Comprehension},\nauthor={Jie Yang and Wang ZENG and Sheng Jin and Lumin Xu and Wentao Liu and Chen Qian and Ruimao Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=gwd3MQufGP}\n}", "github": "", "reviewers": "j2Jb;wNmk;Hwow;7RyA", "pdf_size": 971921, "rating": "4;6;6;7", "confidence": "4;5;3;3", "soundness": "3;3;2;3", "novelty": "2;3;2;3", "presentation": "3;3;3;3", "wc_summary": "68;38;94;220", "wc_strengths": "73;25;27;70", "wc_weaknesses": "274;150;111;68", "wc_questions": "9;2;93;36", "wc_limitations": "1;7;9;14", "wc_review": "425;222;334;408", "wc_reply_reviewers": "238;0;154;81", "wc_reply_authors": "106;0;85;0", "reply_reviewers": "1;0;2;1", "reply_authors": "2;1;2;1", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 105.0, 69.28924880528002 ], "wc_strengths_avg": [ 48.75, 22.78568629644497 ], "wc_weaknesses_avg": [ 150.75, 76.84196444651842 ], "wc_questions_avg": [ 35.0, 35.812009158939965 ], "wc_limitations_avg": [ 7.75, 4.656984002549289 ], "wc_review_avg": [ 347.25, 79.99804685115755 ], "wc_reply_reviewers_avg": [ 118.25, 88.01810893219644 ], "wc_reply_authors_avg": [ 47.75, 48.32377779106265 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.3458572319330373, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18269084168384938884&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 3, "email": "cuhk.edu.cn;sensetime.com;hku.hk;cuhk.edu.hk;sensetime.com;mails.tsinghua.edu.cn;cuhk.edu.cn", "author_num": 7, "aff_unique_index": "0;1;2;0;1;3;0", "aff_unique_norm": "Chinese University of Hong Kong;SenseTime;University of Hong Kong;Tsinghua University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.cuhk.edu.cn;https://www.sensetime.com;https://www.hku.hk;https://www.tsinghua.edu.cn", "aff_unique_abbr": "CUHK;SenseTime;HKU;THU", "aff_campus_unique_index": "0;2;2;0", "aff_campus_unique": "Shenzhen;;Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Q-VLM: Post-training Quantization for Large Vision-Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94107", "id": "gxMfNArldP", "proceeding": "", "pdf": "https://openreview.net/pdf?id=gxMfNArldP", "openreview": "https://openreview.net/forum?id=gxMfNArldP", "poster": "/media/PosterPDFs/NeurIPS%202024/94107.png?t=1731720158.433936", "project": "", "author_site": "Changyuan Wang, Ziwei Wang, Xiuwei Xu, Yansong Tang, Jie Zhou, Jiwen Lu", "tldr": "", "abstract": "In this paper, we propose a post-training quantization framework of large vision-language models (LVLMs) for efficient multi-modal inference. Conventional quantization methods sequentially search the layer-wise rounding functions by minimizing activation discretization errors, which fails to acquire optimal quantization strategy without considering cross-layer dependency. On the contrary, we mine the cross-layer dependency that significantly influences discretization errors of the entire vision-language model, and embed this dependency into optimal quantization strategy searching with low search cost. Specifically, we observe the strong correlation between the activation entropy and the cross-layer dependency concerning output discretization errors. Therefore, we employ the entropy as the proxy to partition blocks optimally, which aims to achieve satisfying trade-offs between discretization errors and the search cost. Moreover, we optimize the visual encoder to disentangle the cross-layer dependency for fine-grained decomposition of search space, so that the search cost is further reduced without harming the quantization accuracy. Experimental results demonstrate that our method compresses the memory by 2.78x and increase generate speed by 1.44x about 13B LLaVA model without performance degradation on diverse multi-modal reasoning tasks.", "keywords": "Post-training quantization;Large vision-language models;Rounding function search", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Changyuan Wang;Ziwei Wang;Xiuwei Xu;Yansong Tang;Jie Zhou;Jiwen Lu", "authorids": "~Changyuan_Wang1;~Ziwei_Wang2;~Xiuwei_Xu1;~Yansong_Tang1;~Jie_Zhou3;~Jiwen_Lu1", "gender": "M;M;M;M;M;M", "homepage": "https://changyuanwang17.github.io/;https://ziweiwangthu.github.io/;https://xuxw98.github.io/;https://andytang15.github.io/;https://www.tsinghua.edu.cn/publish/auen/1713/2011/20110506105532098625469/20110506105532098625469_.html;http://ivg.au.tsinghua.edu.cn/Jiwen_Lu/", "dblp": ";136/5574-1;315/9374;214/9568;00/5012-1;http://dblp.uni-trier.de/pers/hd/l/Lu:Jiwen", "google_scholar": "otkdEAEAAAAJ;cMTW09EAAAAJ;4G627acAAAAJ;TIbistUAAAAJ;;TN8uDQoAAAAJ", "orcid": "0009-0000-4971-3683;0000-0001-9225-8495;;;;0000-0002-6121-5529", "linkedin": ";;;;;", "or_profile": "~Changyuan_Wang1;~Ziwei_Wang2;~Xiuwei_Xu1;~Yansong_Tang1;~Jie_Zhou3;~Jiwen_Lu1", "aff": "Electronic Engineering, Tsinghua University, Tsinghua University;Carnegie Mellon University;Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "mails.tsinghua.edu.cn;andrew.cmu.edu;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": "MS student;Postdoc;PhD student;Assistant Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nwang2024qvlm,\ntitle={Q-{VLM}: Post-training Quantization for Large Vision-Language Models},\nauthor={Changyuan Wang and Ziwei Wang and Xiuwei Xu and Yansong Tang and Jie Zhou and Jiwen Lu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=gxMfNArldP}\n}", "github": "", "reviewers": "rcqj;ff1U;TNuV;EGvn", "pdf_size": 1606454, "rating": "5;5;5;6", "confidence": "2;4;4;2", "soundness": "3;2;2;3", "novelty": "3;3;2;2", "presentation": "4;2;3;3", "wc_summary": "73;61;44;96", "wc_strengths": "50;62;60;64", "wc_weaknesses": "141;196;165;156", "wc_questions": "61;2;9;19", "wc_limitations": "9;1;1;2", "wc_review": "334;322;279;337", "wc_reply_reviewers": "26;0;84;51", "wc_reply_authors": "0;0;131;28", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;2;2", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.0, 1.0 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 68.5, 18.9274932307477 ], "wc_strengths_avg": [ 59.0, 5.385164807134504 ], "wc_weaknesses_avg": [ 164.5, 20.1059692628831 ], "wc_questions_avg": [ 22.75, 22.895141405983935 ], "wc_limitations_avg": [ 3.25, 3.344772040064913 ], "wc_review_avg": [ 318.0, 23.205602771744587 ], "wc_reply_reviewers_avg": [ 40.25, 31.03526220285564 ], "wc_reply_authors_avg": [ 39.75, 53.9090669553833 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10426676085875068089&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "mails.tsinghua.edu.cn;andrew.cmu.edu;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 6, "aff_unique_index": "0;1;0;0;0;0", "aff_unique_norm": "Tsinghua University;Carnegie Mellon University", "aff_unique_dep": "Electronic Engineering;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.cmu.edu", "aff_unique_abbr": "THU;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0;0", "aff_country_unique": "China;United States" }, { "title": "End-To-End Causal Effect Estimation from Unstructured Natural Language Data", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94106", "id": "gzQARCgIsI", "proceeding": "", "pdf": "https://openreview.net/pdf?id=gzQARCgIsI", "openreview": "https://openreview.net/forum?id=gzQARCgIsI", "poster": "", "project": "", "author_site": "Nikita Dhawan, Leonardo Cotta, Karen Ullrich, Rahul Krishnan, Chris Maddison", "tldr": "", "abstract": "Knowing the effect of an intervention is critical for human decision-making, but current approaches for causal effect estimation rely on manual data collection and structuring, regardless of the causal assumptions. This increases both the cost and time-to-completion for studies. We show how large, diverse observational text data can be mined with large language models (LLMs) to produce inexpensive causal effect estimates under appropriate causal assumptions. We introduce _NATURAL_, a novel family of causal effect estimators built with LLMs that operate over datasets of unstructured text. Our estimators use LLM conditional distributions (over variables of interest, given the text data) to assist in the computation of classical estimators of causal effect. We overcome a number of technical challenges to realize this idea, such as automating data curation and using LLMs to impute missing information. We prepare six (two synthetic and four real) observational datasets, paired with corresponding ground truth in the form of randomized trials, which we used to systematically evaluate each step of our pipeline. NATURAL estimators demonstrate remarkable performance, yielding causal effect estimates that fall within 3 percentage points of their ground truth counterparts, including on real-world Phase 3/4 clinical trials. Our results suggest that unstructured text data is a rich source of causal effect information, and NATURAL is a first step towards an automated pipeline to tap this resource.", "keywords": "treatment effect estimation;unstructured reports;real-world effects", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Nikita Dhawan;Leonardo Cotta;Karen Ullrich;Rahul Krishnan;Chris J. Maddison", "authorids": "~Nikita_Dhawan1;~Leonardo_Cotta1;~Karen_Ullrich1;~Rahul_G_Krishnan1;~Chris_J._Maddison1", "gender": ";M;F;M;M", "homepage": "https://www.cs.toronto.edu/~nikita/;https://cottascience.github.io/;https://www.karenullrich.info;http://www.cs.toronto.edu/~rahulgk/index.html;http://www.cs.toronto.edu/~cmaddis/", "dblp": "255/4863;183/1858.html;155/8025;172/0880;139/1388", "google_scholar": "4D_8pTEAAAAJ;https://scholar.google.com.br/citations?user=0GI4MyoAAAAJ;TMIPmNAAAAAJ;ilJgXHkAAAAJ;https://scholar.google.ca/citations?user=WjCG3owAAAAJ", "orcid": ";;;;", "linkedin": "nikita-dhawan-7a4a29149/;cotta/;;rahulgk/;", "or_profile": "~Nikita_Dhawan1;~Leonardo_Cotta1;~Karen_Ullrich1;~Rahul_G_Krishnan1;~Chris_J_Maddison1", "aff": "Meta AI;Vector Institute;Meta AI;Department of Computer Science, University of Toronto;Google", "aff_domain": "meta.com;vectorinstitute.ai;fb.com;cs.toronto.edu;google.com", "position": "Intern;Postdoc;Research Scientist;Assistant Professor;Researcher", "bibtex": "@inproceedings{\ndhawan2024endtoend,\ntitle={End-To-End Causal Effect Estimation from Unstructured Natural Language Data},\nauthor={Nikita Dhawan and Leonardo Cotta and Karen Ullrich and Rahul Krishnan and Chris J. Maddison},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=gzQARCgIsI}\n}", "github": "", "reviewers": "p9SY;7S1Z;oMqj;oSFK", "pdf_size": 975643, "rating": "4;7;7;8", "confidence": "4;3;3;4", "soundness": "3;3;2;4", "novelty": "3;3;3;3", "presentation": "1;3;3;4", "wc_summary": "50;56;59;88", "wc_strengths": "34;55;66;63", "wc_weaknesses": "77;55;184;211", "wc_questions": "333;54;217;76", "wc_limitations": "6;1;47;58", "wc_review": "500;221;573;496", "wc_reply_reviewers": "0;23;414;12", "wc_reply_authors": "250;117;595;236", "reply_reviewers": "0;1;1;1", "reply_authors": "2;2;3;2", "rating_avg": [ 6.5, 1.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 63.25, 14.652218262092603 ], "wc_strengths_avg": [ 54.5, 12.5 ], "wc_weaknesses_avg": [ 131.75, 66.8931050258545 ], "wc_questions_avg": [ 170.0, 112.9933626369266 ], "wc_limitations_avg": [ 28.0, 24.869660230891775 ], "wc_review_avg": [ 447.5, 134.3139977813184 ], "wc_reply_reviewers_avg": [ 112.25, 174.40523931350228 ], "wc_reply_authors_avg": [ 299.5, 178.26174575606512 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.33333333333333337, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1602794119117514128&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "meta.com;vectorinstitute.ai;fb.com;cs.toronto.edu;google.com", "author_num": 5, "aff_unique_index": "0;1;0;2;3", "aff_unique_norm": "Meta;Vector Institute;University of Toronto;Google", "aff_unique_dep": "Meta AI;;Department of Computer Science;Google", "aff_unique_url": "https://meta.com;https://vectorinstitute.ai/;https://www.utoronto.ca;https://www.google.com", "aff_unique_abbr": "Meta;Vector Institute;U of T;Google", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Toronto;Mountain View", "aff_country_unique_index": "0;1;0;1;0", "aff_country_unique": "United States;Canada" }, { "title": "Least Squares Regression Can Exhibit Under-Parameterized Double Descent", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94105", "id": "gzh9nTUtsY", "proceeding": "", "pdf": "https://openreview.net/pdf?id=gzh9nTUtsY", "openreview": "https://openreview.net/forum?id=gzh9nTUtsY", "poster": "", "project": "", "author_site": "Xinyue Li, Rishi Sonthalia", "tldr": "", "abstract": "The relationship between the number of training data points, the number of parameters, and the generalization capabilities of models has been widely studied. Previous work has shown that double descent can occur in the over-parameterized regime and that the standard bias-variance trade-off holds in the under-parameterized regime. These works provide multiple reasons for the existence of the peak. We postulate that the location of the peak depends on the technical properties of both the spectrum as well as the eigenvectors of the sample covariance. We present two simple examples that provably exhibit double descent in the under-parameterized regime and do not seem to occur for reasons provided in prior work.", "keywords": "Learning Theory;Generalization;Random Matrix Theory;High Dimensional Statistics", "primary_area": "learning_theory", "supplementary_material": "", "author": "Xinyue Li;Rishi Sonthalia", "authorids": "~Xinyue_Li4;~Rishi_Sonthalia1", "gender": "F;M", "homepage": "https://xinyue097.github.io;https://sites.google.com/umich.edu/rsonthal/home", "dblp": ";223/5758", "google_scholar": ";HYozgRsAAAAJ", "orcid": ";", "linkedin": ";rishi-sonthalia-53b44795/", "or_profile": "~Xinyue_Li4;~Rishi_Sonthalia1", "aff": "University of California, Los Angeles;University of California, Los Angeles", "aff_domain": "ucla.edu;ucla.edu", "position": "Undergrad student;Postdoc", "bibtex": "@inproceedings{\nli2024least,\ntitle={Least Squares Regression Can Exhibit Under-Parameterized Double Descent},\nauthor={Xinyue Li and Rishi Sonthalia},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=gzh9nTUtsY}\n}", "github": "", "reviewers": "LfUR;4cXR;4Q1q;SLpr;RQj2", "pdf_size": 1177887, "rating": "5;6;6;7;7", "confidence": "3;3;2;3;3", "soundness": "3;3;3;4;3", "novelty": "2;2;2;3;3", "presentation": "2;3;3;2;3", "wc_summary": "109;23;78;56;105", "wc_strengths": "32;84;45;135;42", "wc_weaknesses": "117;110;26;285;144", "wc_questions": "336;43;180;77;40", "wc_limitations": "12;2;60;1;50", "wc_review": "606;262;389;554;381", "wc_reply_reviewers": "47;21;25;17;40", "wc_reply_authors": "13;0;0;0;22", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;1;1;1;2", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 2.8, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 74.2, 32.021242949017456 ], "wc_strengths_avg": [ 67.6, 38.06625802466011 ], "wc_weaknesses_avg": [ 136.4, 84.14416200783035 ], "wc_questions_avg": [ 135.2, 112.49782220114308 ], "wc_limitations_avg": [ 25.0, 24.99599967994879 ], "wc_review_avg": [ 438.4, 125.14407696730996 ], "wc_reply_reviewers_avg": [ 30.0, 11.523888232710346 ], "wc_reply_authors_avg": [ 7.0, 9.033271832508971 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.13363062095621223, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ijHBcH6ZmOkJ:scholar.google.com/&scioq=Least+Squares+Regression+Can+Exhibit+Under-Parameterized+Double+Descent&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": "ucla.edu;ucla.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of California, Los Angeles", "aff_unique_dep": "", "aff_unique_url": "https://www.ucla.edu", "aff_unique_abbr": "UCLA", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Weak-eval-Strong: Evaluating and Eliciting Lateral Thinking of LLMs with Situation Puzzles", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94104", "id": "h024LpF3bZ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=h024LpF3bZ", "openreview": "https://openreview.net/forum?id=h024LpF3bZ", "poster": "/media/PosterPDFs/NeurIPS%202024/94104.png?t=1731592922.638178", "project": "", "author_site": "Qi Chen, Bowen Zhang, Gang Wang, Qi Wu", "tldr": "", "abstract": "While advancements in NLP have significantly improved the performance of Large Language Models (LLMs) on tasks requiring vertical thinking, their lateral thinking capabilities remain under-explored and challenging to measure due to the complexity of assessing creative thought processes and the scarcity of relevant data. To address these challenges, we introduce SPLAT, a benchmark leveraging Situation Puzzles to evaluate and elicit LAteral Thinking of LLMs. This benchmark, containing 975 graded situation puzzles across three difficulty levels, employs a new multi-turn player-judge framework instead of the traditional model-based evaluation, which often necessitates a stronger evaluation model. This framework simulates an interactive game where the model (player) asks the evaluation model (judge) questions about an incomplete story to infer the full scenario. The judge answers based on a detailed reference scenario or evaluates if the player's predictions align with the reference one. This approach lessens dependence on more robust evaluation models, enabling the assessment of state-of-the-art LLMs. The experiments demonstrate that a robust evaluation model, such as WizardLM-2, closely matches human judgements in both intermediate question-answering and final scenario accuracy, achieving over 80% agreement--similar to the agreement levels among humans. Furthermore, applying data and reasoning processes from our benchmark to other lateral thinking-related benchmarks, e.g., RiddleSense and BrainTeaser, leads to performance enhancements. This suggests that our benchmark effectively evaluates and elicits the lateral thinking abilities of LLMs.", "keywords": "large language models;lateral thinking;situation puzzles", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Qi Chen;Bowen Zhang;Gang Wang;Qi Wu", "authorids": "~Qi_Chen4;~Bowen_Zhang3;~Gang_Wang26;~Qi_Wu3", "gender": "M;M;M;M", "homepage": "https://chenqi008.github.io/;;https://theb.ai;http://qi-wu.me/", "dblp": "66/6320-14;;;96/3446-1", "google_scholar": "OgKU77kAAAAJ;;;https://scholar.google.co.uk/citations?user=aKXe1FEAAAAJ", "orcid": "0000-0001-8732-8049;0000-0001-6180-6815;;", "linkedin": "qi-chen-4b1a72287;;gang-wang-4ab381279;", "or_profile": "~Qi_Chen4;~Bowen_Zhang3;~Gang_Wang26;~Qi_Wu3", "aff": "University of Adelaide;University of Adelaide;The BAI Limited;The University of Adelaide", "aff_domain": "adelaide.edu.au;adelaide.edu.au;theb.ai;adelaide.edu.au", "position": "PhD student;PhD student;Intern;Associate Professor", "bibtex": "@inproceedings{\nchen2024weakevalstrong,\ntitle={Weak-eval-Strong: Evaluating and Eliciting Lateral Thinking of {LLM}s with Situation Puzzles},\nauthor={Qi Chen and Bowen Zhang and Gang Wang and Qi Wu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=h024LpF3bZ}\n}", "github": "", "reviewers": "ZJ8a;eDsP;6oC8;s9Dk", "pdf_size": 532502, "rating": "5;6;7;8", "confidence": "3;4;4;5", "soundness": "2;3;3;4", "novelty": "3;3;3;4", "presentation": "3;3;3;3", "wc_summary": "117;87;101;95", "wc_strengths": "40;68;37;182", "wc_weaknesses": "206;210;29;80", "wc_questions": "4;137;105;76", "wc_limitations": "1;26;1;1", "wc_review": "368;528;273;434", "wc_reply_reviewers": "31;0;75;17", "wc_reply_authors": "19;0;27;29", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;2;2", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 100.0, 11.0 ], "wc_strengths_avg": [ 81.75, 59.12856754564582 ], "wc_weaknesses_avg": [ 131.25, 78.85231448727424 ], "wc_questions_avg": [ 80.5, 49.1553659329274 ], "wc_limitations_avg": [ 7.25, 10.825317547305483 ], "wc_review_avg": [ 400.75, 93.12726507312453 ], "wc_reply_reviewers_avg": [ 30.75, 27.806249297594956 ], "wc_reply_authors_avg": [ 18.75, 11.453711188955307 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.9486832980505139, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9684475681051865688&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "adelaide.edu.au;adelaide.edu.au;theb.ai;adelaide.edu.au", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Adelaide;BAI Limited", "aff_unique_dep": ";", "aff_unique_url": "https://www.adelaide.edu.au;", "aff_unique_abbr": "Adelaide;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Australia;Unknown" }, { "title": "Loss Landscape Characterization of Neural Networks without Over-Parametrization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94103", "id": "h0a3p5WtXU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=h0a3p5WtXU", "openreview": "https://openreview.net/forum?id=h0a3p5WtXU", "poster": "/media/PosterPDFs/NeurIPS%202024/94103.png?t=1732734602.4856005", "project": "", "author_site": "Rustem Islamov, Niccol\u00f2 Ajroldi, Antonio Orvieto, Aurelien Lucchi", "tldr": "", "abstract": "Modern machine learning heavily depends on the effectiveness of optimization techniques. While deep learning models have achieved remarkable empirical results in training, their theoretical underpinnings remain somewhat elusive. Ensuring the convergence of optimization methods requires imposing specific structures on the objective function which often do not hold in practice. One prominent example is the widely recognized Polyak-Lojasiewicz (PL) inequality, which has garnered considerable attention in recent years. However, validating such assumptions for deep neural networks entails substantial and often impractical levels of over-parametrization. In order to address this limitation, we propose a novel class of functions that can characterize the loss landscape of modern deep models without requiring extensive over-parametrization and can also include saddle points. Crucially, we prove that gradient-based optimizers possess theoretical guarantees of convergence under this assumption. Finally, we validate the soundness of our assumption through both theoretical analysis and empirical experimentation across a diverse range of deep learning models.", "keywords": "optimization;deep learning;neural networks;convergence", "primary_area": "optimization_for_deep_networks", "supplementary_material": "/attachment/6448b838bbcecb1c34f5ba53fdd172910b737da0.zip", "author": "Rustem Islamov;Niccol\u00f2 Ajroldi;Antonio Orvieto;Aurelien Lucchi", "authorids": "~Rustem_Islamov1;~Niccol\u00f2_Ajroldi1;~Antonio_Orvieto3;~Aurelien_Lucchi1", "gender": "M;M;M;M", "homepage": "https://rustem-islamov.github.io/;https://github.com/Niccolo-Ajroldi;http://orvi.altervista.org/;http://people.inf.ethz.ch/alucchi/", "dblp": "285/5128;;;14/5780", "google_scholar": "-dlYjUsAAAAJ;JjYMet4AAAAJ;xkuLyHoAAAAJ;https://scholar.google.ch/citations?user=V1ONSgIAAAAJ", "orcid": ";;;", "linkedin": "rustem-islamov-053345228/;;antonio-orvieto-947ab0130/;", "or_profile": "~Rustem_Islamov1;~Niccol\u00f2_Ajroldi1;~Antonio_Orvieto3;~Aurelien_Lucchi1", "aff": "University of Basel;Max Planck Institut, Max-Planck Institute;ELLIS Institute T\u00fcbingen, Max Planck Institute for Intelligent Systems, T\u00fcbingen AI Center, T\u00fcbingen, Germany;University of Basel", "aff_domain": "unibas.ch;mpip.mpg.de;tue.ellis.eu;unibas.ch", "position": "PhD student;Research Engineer;Principal Researcher;Assistant Professor", "bibtex": "@inproceedings{\nislamov2024loss,\ntitle={Loss Landscape Characterization of Neural Networks without Over-Parametrization},\nauthor={Rustem Islamov and Niccol{\\`o} Ajroldi and Antonio Orvieto and Aurelien Lucchi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=h0a3p5WtXU}\n}", "github": "", "reviewers": "W37Q;MYcC;mWKG;qMQA", "pdf_size": 14304294, "rating": "4;7;7;8", "confidence": "4;4;4;4", "soundness": "2;3;4;4", "novelty": "2;4;3;4", "presentation": "3;4;4;4", "wc_summary": "79;87;59;203", "wc_strengths": "40;63;26;265", "wc_weaknesses": "364;112;189;293", "wc_questions": "22;240;29;110", "wc_limitations": "35;5;1;222", "wc_review": "540;507;304;1093", "wc_reply_reviewers": "243;41;16;70", "wc_reply_authors": "326;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "3;1;1;1", "rating_avg": [ 6.5, 1.5 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 107.0, 56.356011214421486 ], "wc_strengths_avg": [ 98.5, 97.03221114660842 ], "wc_weaknesses_avg": [ 239.5, 96.3963173570443 ], "wc_questions_avg": [ 100.25, 87.78489334731802 ], "wc_limitations_avg": [ 65.75, 91.16297219814632 ], "wc_review_avg": [ 611.0, 292.5875937219485 ], "wc_reply_reviewers_avg": [ 92.5, 88.96769076468154 ], "wc_reply_authors_avg": [ 81.5, 141.1621408168635 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10530134090870297584&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "unibas.ch;mpip.mpg.de;tue.ellis.eu;unibas.ch", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of Basel;Max Planck Institute;ELLIS Institute T\u00fcbingen", "aff_unique_dep": ";;", "aff_unique_url": "https://www.unibas.ch;https://www.mpg.de;", "aff_unique_abbr": "UniBas;MPI;", "aff_campus_unique_index": "1", "aff_campus_unique": ";T\u00fcbingen", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "Switzerland;Germany" }, { "title": "Generalize or Detect? Towards Robust Semantic Segmentation Under Multiple Distribution Shifts", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94102", "id": "h0rbjHyWoa", "proceeding": "", "pdf": "https://openreview.net/pdf?id=h0rbjHyWoa", "openreview": "https://openreview.net/forum?id=h0rbjHyWoa", "poster": "", "project": "", "author_site": "Zhitong Gao, Bingnan Li, Mathieu Salzmann, Xuming He", "tldr": "", "abstract": "In open-world scenarios, where both novel classes and domains may exist, an ideal segmentation model should detect anomaly classes for safety and generalize to new domains. However, existing methods often struggle to distinguish between domain-level and semantic-level distribution shifts, leading to poor OOD detection or domain generalization performance. In this work, we aim to equip the model to generalize effectively to covariate-shift regions while precisely identifying semantic-shift regions. To achieve this, we design a novel generative augmentation method to produce coherent images that incorporate both anomaly (or novel) objects and various covariate shifts at both image and object levels. Furthermore, we introduce a training strategy that recalibrates uncertainty specifically for semantic shifts and enhances the feature extractor to align features associated with domain shifts. We validate the effectiveness of our method across benchmarks featuring both semantic and domain shifts. Our method achieves state-of-the-art performance across all benchmarks for both OOD detection and domain generalization. Code is available at https://github.com/gaozhitong/MultiShiftSeg.", "keywords": "Anomaly Segmentation;Out-of-distribution Detection;Domain Generalization;Semantic Segmentation", "primary_area": "machine_vision", "supplementary_material": "", "author": "Zhitong Gao;Bingnan Li;Mathieu Salzmann;Xuming He", "authorids": "~Zhitong_Gao1;~Bingnan_Li1;~Mathieu_Salzmann1;~Xuming_He3", "gender": "F;M;M;M", "homepage": "https://gaozhitong.github.io/;https://cuttle-fish-my.github.io/libingnan.github.io/;https://people.epfl.ch/mathieu.salzmann;https://faculty.sist.shanghaitech.edu.cn/faculty/hexm/index.html", "dblp": ";;18/4533;03/4230", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.ch/citations?user=n-B0jr4AAAAJ;0KyeZ2QAAAAJ", "orcid": "0000-0002-3707-4850;;;", "linkedin": ";;;", "or_profile": "~Zhitong_Gao1;~Bingnan_Li1;~Mathieu_Salzmann1;~Xuming_He3", "aff": "ShanghaiTech University;ShanghaiTech University;CSIRO;ShanghaiTech University", "aff_domain": "shanghaitech.edu.cn;shanghaitech.edu.cn;data61.csiro.au;shanghaitech.edu.cn", "position": "MS student;Undergrad student;Collaborator;Associate Professor", "bibtex": "@inproceedings{\ngao2024generalize,\ntitle={Generalize or Detect? Towards Robust Semantic Segmentation Under Multiple Distribution Shifts},\nauthor={Zhitong Gao and Bingnan Li and Mathieu Salzmann and Xuming He},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=h0rbjHyWoa}\n}", "github": "", "reviewers": "UbMA;LCzB;ua5t;gYbq", "pdf_size": 19576599, "rating": "5;5;6;6", "confidence": "4;4;4;4", "soundness": "3;3;2;3", "novelty": "4;2;3;3", "presentation": "3;3;2;3", "wc_summary": "86;124;82;92", "wc_strengths": "43;29;93;59", "wc_weaknesses": "135;104;174;535", "wc_questions": "3;8;42;39", "wc_limitations": "1;12;17;24", "wc_review": "268;277;408;749", "wc_reply_reviewers": "56;15;23;406", "wc_reply_authors": "863;26;1103;1973", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;3;3", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 96.0, 16.55294535724685 ], "wc_strengths_avg": [ 56.0, 23.853720883753127 ], "wc_weaknesses_avg": [ 237.0, 173.82893890258893 ], "wc_questions_avg": [ 23.0, 17.621010186706094 ], "wc_limitations_avg": [ 13.5, 8.381527307120106 ], "wc_review_avg": [ 425.5, 194.81850528119756 ], "wc_reply_reviewers_avg": [ 125.0, 162.96165193075333 ], "wc_reply_authors_avg": [ 991.25, 693.6275567622728 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13863541672932498076&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "shanghaitech.edu.cn;shanghaitech.edu.cn;data61.csiro.au;shanghaitech.edu.cn", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "ShanghaiTech University;Commonwealth Scientific and Industrial Research Organisation", "aff_unique_dep": ";", "aff_unique_url": "https://www.shanghaitech.edu.cn;https://www.csiro.au", "aff_unique_abbr": "ShanghaiTech;CSIRO", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "China;Australia" }, { "title": "Provable Benefits of Complex Parameterizations for Structured State Space Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94101", "id": "h15RyEj151", "proceeding": "", "pdf": "https://openreview.net/pdf?id=h15RyEj151", "openreview": "https://openreview.net/forum?id=h15RyEj151", "poster": "", "project": "", "author_site": "Yuval Ran-Milo, Eden Lumbroso, Edo Cohen-Karlik, Raja Giryes, Amir Globerson, Nadav Cohen", "tldr": "", "abstract": "Structured state space models (SSMs), the core engine behind prominent neural networks such as S4 and Mamba, are linear dynamical systems adhering to a specified structure, most notably diagonal. In contrast to typical neural network modules, whose parameterizations are real, SSMs often use complex parameterizations. Theoretically explaining the benefits of complex parameterizations for SSMs is an open problem. The current paper takes a step towards its resolution, by establishing formal gaps between real and complex diagonal SSMs. Firstly, we prove that while a moderate dimension suffices in order for a complex SSM to express all mappings of a real SSM, a much higher dimension is needed for a real SSM to express mappings of a complex SSM. Secondly, we prove that even if the dimension of a real SSM is high enough to express a given mapping, typically, doing so requires the parameters of the real SSM to hold exponentially large values, which cannot be learned in practice. In contrast, a complex SSM can express any given mapping with moderate parameter values. Experiments corroborate our theory, and suggest a potential extension of the theory that accounts for selectivity, a new architectural feature yielding state of the art performance.", "keywords": "Neural Networks;Theory;Structured State Space Models;Mamba;S4;Complex parametrization", "primary_area": "learning_theory", "supplementary_material": "", "author": "Yuval Ran-Milo;Eden Lumbroso;Edo Cohen-Karlik;Raja Giryes;Amir Globerson;Nadav Cohen", "authorids": "~Yuval_Ran-Milo1;~Eden_Lumbroso1;~Edo_Cohen-Karlik1;~Raja_Giryes1;~Amir_Globerson1;~Nadav_Cohen1", "gender": "M;M;M;M;M;M", "homepage": "https://www.yuvalrm.com/;;;https://www.giryes.sites.tau.ac.il/;http://www.cs.tau.ac.il/~gamir/;http://www.cohennadav.com", "dblp": "391/0259;;242/8962.html;50/7998;08/4162.html;119/7155", "google_scholar": ";;tWI9Pw8AAAAJ;https://scholar.google.co.il/citations?user=9aQUYVQAAAAJ;https://scholar.google.com.tw/citations?user=5JserkUAAAAJ;AfLwLQ0AAAAJ", "orcid": ";;;0000-0002-2830-0297;;", "linkedin": "yuval-ran-milo-35b720141/;eden-lumbroso-a63147182/;;raja-giryes-0818935/;;cohennadav/", "or_profile": "~Yuval_Ran-Milo1;~Eden_Lumbroso1;~Edo_Cohen-Karlik1;~Raja_Giryes1;~Amir_Globerson1;~Nadav_Cohen1", "aff": "Tel Aviv University;Tel Aviv University;;Tel Aviv University;Tel Aviv University;School of Computer Science, Tel Aviv University", "aff_domain": "mail.tau.ac.il;tau.ac.il;;tauex.tau.ac.il;tau.ac.il;cs.tau.ac.il", "position": "PhD student;MS student;;Associate Professor;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nran-milo2024provable,\ntitle={Provable Benefits of Complex Parameterizations for Structured State Space Models},\nauthor={Yuval Ran-Milo and Eden Lumbroso and Edo Cohen-Karlik and Raja Giryes and Amir Globerson and Nadav Cohen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=h15RyEj151}\n}", "github": "", "reviewers": "UNm3;SDv4;9K2D;sPCd", "pdf_size": 562953, "rating": "5;7;7;8", "confidence": "4;3;5;3", "soundness": "3;4;4;3", "novelty": "2;3;3;3", "presentation": "3;4;3;3", "wc_summary": "72;49;159;67", "wc_strengths": "31;25;113;195", "wc_weaknesses": "112;29;445;120", "wc_questions": "175;2;153;153", "wc_limitations": "1;12;2;1", "wc_review": "391;117;872;536", "wc_reply_reviewers": "265;14;132;59", "wc_reply_authors": "229;13;119;18", "reply_reviewers": "2;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 6.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 86.75, 42.581539427315214 ], "wc_strengths_avg": [ 91.0, 69.38299503480663 ], "wc_weaknesses_avg": [ 176.5, 159.06052307219412 ], "wc_questions_avg": [ 120.75, 69.14613148976593 ], "wc_limitations_avg": [ 4.0, 4.636809247747852 ], "wc_review_avg": [ 479.0, 272.2526400239307 ], "wc_reply_reviewers_avg": [ 117.5, 95.0013157803617 ], "wc_reply_authors_avg": [ 94.75, 88.29602199419858 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.3458572319330373, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18433896469808030847&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 6, "email": "mail.tau.ac.il;tau.ac.il;;tauex.tau.ac.il;tau.ac.il;cs.tau.ac.il", "author_num": 6, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Tel Aviv University", "aff_unique_dep": "", "aff_unique_url": "https://www.tau.ac.il", "aff_unique_abbr": "TAU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Tel Aviv", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Israel" }, { "title": "BLURD: Benchmarking and Learning using a Unified Rendering and Diffusion Model", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97576", "id": "h18O23kQzD", "proceeding": "", "pdf": "https://openreview.net/pdf?id=h18O23kQzD", "openreview": "https://openreview.net/forum?id=h18O23kQzD", "poster": "/media/PosterPDFs/NeurIPS%202024/97576.png?t=1731653938.8177342", "project": "", "author_site": "Boris Repasky, Ehsan Abbasnejad, Anthony Dick", "tldr": "", "abstract": "Recent advancements in pre-trained vision models have made them pivotal in computer vision, emphasizing the need for their thorough evaluation and benchmarking. This evaluation needs to consider various factors of variation, their potential biases, shortcuts, and inaccuracies that ultimately lead to disparate performance in models. Such evaluations are conventionally done using either synthetic data from 2D or 3D rendering software or real-world images in controlled settings. Synthetic methods offer full control and flexibility, while real-world methods are limited by high costs and less adaptability. Moreover, 3D rendering can't yet fully replicate real photography, creating a realism gap.\nIn this paper, we introduce BLURD--Benchmarking and Learning using a Unified Rendering and Diffusion Model--a novel method combining 3D rendering and Stable Diffusion to bridge this gap in representation learning. With BLURD we create a new family of datasets that allow for the creation of both 3D rendered and photo-realistic images with identical factors. BLURD, therefore, provides deeper insights into the representations learned by various CLIP backbones. The source code for creating the BLURD datasets is available at https://github.com/squaringTheCircle/BLURD", "keywords": "Representation learning;Synthetic Dataset;Photorealism;3D scene;disentanglement;Large pre-trained models", "primary_area": "", "supplementary_material": "/attachment/3d20cd95f9181b552e039fa03295e72cf80bfa70.pdf", "author": "Boris Repasky;Ehsan Abbasnejad;Anthony Dick", "authorids": "~Boris_Repasky1;~Ehsan_Abbasnejad3;~Anthony_Dick1", "gender": ";;M", "homepage": "https://www.adelaide.edu.au/aiml/our-people#aiml-students;;https://ehsanabb.github.io/", "dblp": "255/8063;;30/11191", "google_scholar": ";https://scholar.google.com.au/citations?user=Y6wo5UwAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;", "linkedin": ";;", "or_profile": "~Boris_Repasky1;~Anthony_Dick1;~Ehsan_M_Abbasnejad1", "aff": "University of Adelaide;University of Adelaide;University of Adelaide", "aff_domain": "adelaide.edu.au;adelaide.edu.au;adelaide.edu.au", "position": "PhD student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nrepasky2024blurd,\ntitle={{BLURD}: Benchmarking and Learning using a Unified Rendering and Diffusion Model},\nauthor={Boris Repasky and Ehsan Abbasnejad and Anthony Dick},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=h18O23kQzD}\n}", "github": "", "reviewers": "jBso;H6Ka;SNeq;bS5K", "pdf_size": 33819719, "rating": "6;6;7;8", "confidence": "4;4;4;4", "wc_summary_and_contributions": "68;78;45;175", "wc_strengths": "76;60;109;46", "wc_improvement": "251;13;66;55", "wc_limitations": "1;13;98;32", "wc_correctness": "14;11;1;1", "wc_clarity": "9;26;1;5", "wc_relation_to_prior_work": "37;1;1;41", "wc_documentation": "1;30;1;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "458;233;323;357", "wc_reply_reviewers": "129;30;0;0", "wc_reply_authors": "96;251;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "4;3;1;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.0 ], "wc_summary_and_contributions_avg": [ 91.5, 49.671420354163416 ], "wc_strengths_avg": [ 72.75, 23.466731770743024 ], "wc_improvement_avg": [ 96.25, 91.50785485410529 ], "wc_limitations_avg": [ 36.0, 37.46331538985838 ], "wc_correctness_avg": [ 6.75, 5.84700778176325 ], "wc_clarity_avg": [ 10.25, 9.522998477370455 ], "wc_relation_to_prior_work_avg": [ 20.0, 19.05255888325765 ], "wc_documentation_avg": [ 8.25, 12.55736835487436 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 342.75, 80.49961180030621 ], "wc_reply_reviewers_avg": [ 39.75, 52.964020806581516 ], "wc_reply_authors_avg": [ 86.75, 102.6093928449048 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.25, 1.299038105676658 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:PbZdAbZv6soJ:scholar.google.com/&scioq=BLURD:+Benchmarking+and+Learning+using+a+Unified+Rendering+and+Diffusion+Model&hl=en&as_sdt=0,44", "gs_version_total": 2, "email": "adelaide.edu.au;adelaide.edu.au;adelaide.edu.au", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Adelaide", "aff_unique_dep": "", "aff_unique_url": "https://www.adelaide.edu.au", "aff_unique_abbr": "Adelaide", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Australia" }, { "title": "The Price of Implicit Bias in Adversarially Robust Generalization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94100", "id": "h1grUs6CjN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=h1grUs6CjN", "openreview": "https://openreview.net/forum?id=h1grUs6CjN", "poster": "", "project": "", "author_site": "Nikolaos Tsilivis, Natalie Frank, Nati Srebro, Julia Kempe", "tldr": "", "abstract": "We study the implicit bias of optimization in robust empirical risk minimization (robust ERM) and its connection with robust generalization. In classification settings under adversarial perturbations with linear models, we study what type of regularization should ideally be applied for a given perturbation set to improve (robust) generalization. We then show that the implicit bias of optimization in robust ERM can significantly affect the robustness of the model and identify two ways this can happen; either through the optimization algorithm or the architecture. We verify our predictions in simulations with synthetic data and experimentally study the importance of implicit bias in robust ERM with deep neural networks.", "keywords": "Adversarial Robustness;Robust Generalization Gap;Implicit Bias;Optimisation;Generalization", "primary_area": "learning_theory", "supplementary_material": "", "author": "Nikolaos Tsilivis;Natalie Frank;Nathan Srebro;Julia Kempe", "authorids": "~Nikolaos_Tsilivis1;~Natalie_Frank1;~Nathan_Srebro1;~Julia_Kempe1", "gender": ";F;M;", "homepage": "https://tsili42.github.io;https://natalie-frank.github.io/;http://ttic.uchicago.edu/~nati/;", "dblp": "312/6719;263/9872;50/3633;", "google_scholar": "uQ83NcQAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=ZnT-QpMAAAAJ;", "orcid": ";0009-0007-5582-4487;;", "linkedin": ";;;", "or_profile": "~Nikolaos_Tsilivis1;~Natalie_Frank1;~Nathan_Srebro1;~Julia_Kempe1", "aff": "Toyota Technological Institute at Chicago;New York University;University of Chicago;", "aff_domain": "ttic.edu;nyu.edu;uchicago.edu;", "position": "Intern;PhD student;Full Professor;", "bibtex": "@inproceedings{\ntsilivis2024the,\ntitle={The Price of Implicit Bias in Adversarially Robust Generalization},\nauthor={Nikolaos Tsilivis and Natalie Frank and Nathan Srebro and Julia Kempe},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=h1grUs6CjN}\n}", "github": "", "reviewers": "ZNCk;Pp2c;mpQX;ey2u", "pdf_size": 1855188, "rating": "5;5;7;7", "confidence": "4;3;2;3", "soundness": "3;4;4;3", "novelty": "2;2;3;3", "presentation": "3;3;3;3", "wc_summary": "73;74;56;45", "wc_strengths": "33;34;43;69", "wc_weaknesses": "195;103;9;95", "wc_questions": "2;23;2;2", "wc_limitations": "2;1;2;4", "wc_review": "305;235;112;215", "wc_reply_reviewers": "0;63;13;12", "wc_reply_authors": "0;2;2;2", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 62.0, 12.144957801491119 ], "wc_strengths_avg": [ 44.75, 14.53229162933362 ], "wc_weaknesses_avg": [ 100.5, 65.83881833690516 ], "wc_questions_avg": [ 7.25, 9.093266739736606 ], "wc_limitations_avg": [ 2.25, 1.0897247358851685 ], "wc_review_avg": [ 216.75, 69.09549551164677 ], "wc_reply_reviewers_avg": [ 22.0, 24.21776207662467 ], "wc_reply_authors_avg": [ 1.5, 0.8660254037844386 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.7071067811865475, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3197255480694471589&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "ttic.edu;nyu.edu;uchicago.edu;", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "Toyota Technological Institute at Chicago;New York University;University of Chicago", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tti-chicago.org;https://www.nyu.edu;https://www.uchicago.edu", "aff_unique_abbr": "TTI Chicago;NYU;UChicago", "aff_campus_unique_index": "0", "aff_campus_unique": "Chicago;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "A-FedPD: Aligning Dual-Drift is All Federated Primal-Dual Learning Needs", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94099", "id": "h1iMVi2iEM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=h1iMVi2iEM", "openreview": "https://openreview.net/forum?id=h1iMVi2iEM", "poster": "/media/PosterPDFs/NeurIPS%202024/94099.png?t=1729596605.1213686", "project": "", "author_site": "Yan Sun, Li Shen, Dacheng Tao", "tldr": "", "abstract": "As a popular paradigm for juggling data privacy and collaborative training, federated learning (FL) is flourishing to distributively process the large scale of heterogeneous datasets on edged clients. Due to bandwidth limitations and security considerations, it ingeniously splits the original problem into multiple subproblems to be solved in parallel, which empowers primal dual solutions to great application values in FL. In this paper, we review the recent development of classical federated primal dual methods and point out a serious common defect of such methods in non-convex scenarios, which we say is a ``dual drift'' caused by dual hysteresis of those longstanding inactive clients under partial participation training. To further address this problem, we propose a novel Aligned Federated Primal Dual (A-FedPD) method, which constructs virtual dual updates to align global consensus and local dual variables for those protracted unparticipated local clients. Meanwhile, we provide a comprehensive analysis of the optimization and generalization efficiency for the A-FedPD method on smooth non-convex objectives, which confirms its high efficiency and practicality. Extensive experiments are conducted on several classical FL setups to validate the effectiveness of our proposed method.", "keywords": "Federated primal dual methods;dual drift;virtual dual update;generalization", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Yan Sun;Li Shen;Dacheng Tao", "authorids": "~Yan_Sun3;~Li_Shen1;~Dacheng_Tao1", "gender": "M;M;", "homepage": ";https://sites.google.com/site/mathshenli/home;", "dblp": ";91/3680-8;", "google_scholar": "_-hoDQkAAAAJ;yVhgENIAAAAJ;", "orcid": "0000-0003-2271-252X;;", "linkedin": ";;", "or_profile": "~Yan_Sun3;~Li_Shen1;~Dacheng_Tao1", "aff": "University of Sydney;JD Explore Academy;", "aff_domain": "uni.sydney.edu.au;jd.com;", "position": "PhD student;Researcher;", "bibtex": "@inproceedings{\nsun2024afedpd,\ntitle={A-Fed{PD}: Aligning Dual-Drift is All Federated Primal-Dual Learning Needs},\nauthor={Yan Sun and Li Shen and Dacheng Tao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=h1iMVi2iEM}\n}", "github": "", "reviewers": "YYQB;c2hH;b9A4;6Q4p", "pdf_size": 1654124, "rating": "5;6;7;7", "confidence": "3;4;5;4", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "3;2;3;4", "wc_summary": "74;23;58;108", "wc_strengths": "139;14;151;52", "wc_weaknesses": "131;528;98;47", "wc_questions": "115;116;6;20", "wc_limitations": "11;1;6;1", "wc_review": "470;682;319;228", "wc_reply_reviewers": "75;305;13;10", "wc_reply_authors": "66;1057;26;13", "reply_reviewers": "1;2;1;1", "reply_authors": "2;3;2;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 65.75, 30.58083550199373 ], "wc_strengths_avg": [ 89.0, 57.745129664760476 ], "wc_weaknesses_avg": [ 201.0, 191.15046429449237 ], "wc_questions_avg": [ 64.25, 51.489683432703295 ], "wc_limitations_avg": [ 4.75, 4.14578098794425 ], "wc_review_avg": [ 424.75, 171.8420422946608 ], "wc_reply_reviewers_avg": [ 100.75, 120.74430628398177 ], "wc_reply_authors_avg": [ 290.5, 442.9698070975041 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8528028654224417, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:mGQ9p8hcXxcJ:scholar.google.com/&scioq=A-FedPD:+Aligning+Dual-Drift+is+All+Federated+Primal-Dual+Learning+Needs&hl=en&as_sdt=0,48", "gs_version_total": 3, "email": "uni.sydney.edu.au;jd.com;", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "University of Sydney;JD", "aff_unique_dep": ";JD Explore Academy", "aff_unique_url": "https://www.sydney.edu.au;", "aff_unique_abbr": "USYD;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0", "aff_country_unique": "Australia;" }, { "title": "Action Imitation in Common Action Space for Customized Action Image Synthesis", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94098", "id": "h2e4G2YiwR", "proceeding": "", "pdf": "https://openreview.net/pdf?id=h2e4G2YiwR", "openreview": "https://openreview.net/forum?id=h2e4G2YiwR", "poster": "", "project": "", "author_site": "wang lin, Jingyuan Chen, Jiaxin Shi, Zirun Guo, Yichen Zhu, Zehan Wang, Tao Jin, Zhou Zhao, Fei Wu, Shuicheng Yan, Hanwang Zhang", "tldr": "", "abstract": "We propose a novel method, \\textbf{TwinAct}, to tackle the challenge of decoupling actions and actors in order to customize the text-guided diffusion models (TGDMs) for few-shot action image generation. TwinAct addresses the limitations of existing methods that struggle to decouple actions from other semantics (e.g., the actor's appearance) due to the lack of an effective inductive bias with few exemplar images. Our approach introduces a common action space, which is a textual embedding space focused solely on actions, enabling precise customization without actor-related details. Specifically, TwinAct involves three key steps: 1) Building common action space based on a set of representative action phrases; 2) Imitating the customized action within the action space; and 3) Generating highly adaptable customized action images in diverse contexts with action similarity loss. To comprehensively evaluate TwinAct, we construct a novel benchmark, which provides sample images with various forms of actions. Extensive experiments demonstrate TwinAct's superiority in generating accurate, context-independent customized actions while maintaining the identity consistency of different subjects, including animals, humans, and even customized actors.", "keywords": "customized action; diffusion model; text to image generation", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Wang Lin;Jingyuan Chen;Jiaxin Shi;Zirun Guo;Yichen Zhu;Zehan Wang;Tao Jin;Zhou Zhao;Fei Wu;Shuicheng YAN;Hanwang Zhang", "authorids": "~Wang_Lin2;~Jingyuan_Chen3;~Jiaxin_Shi3;~Zirun_Guo1;~Yichen_Zhu2;~Zehan_Wang2;~Tao_Jin2;~Zhou_Zhao3;~Fei_Wu1;~Shuicheng_YAN3;~Hanwang_Zhang3", "gender": ";;M;;M;M;M;;M;M;M", "homepage": ";;;https://openreview.net/profile?id=~Zirun_Guo1;https://github.com/Echen-Zhu;https://github.com/12zehan17;https://hugddygff.github.io/;;https://person.zju.edu.cn/wufei;https://yanshuicheng.ai/;https://mreallab.github.io/index.html", "dblp": ";;;382/7932;;126/7826-1;88/4850-4.html;;84/3254-1;y/ShuichengYan;79/8116.html", "google_scholar": ";;8XcQHUEAAAAJ;;9K3a7T8AAAAJ;euXK0lkAAAAJ;;;XJLn4MYAAAAJ;https://scholar.google.com.hk/citations?user=DNuiPHwAAAAJ;YG0DFyYAAAAJ", "orcid": ";;;;;0009-0007-7509-7563;0000-0003-3564-1628;;;;", "linkedin": ";;;;;;;;;;", "or_profile": "~Wang_Lin2;~Jingyuan_Chen3;~Jiaxin_Shi3;~Zirun_Guo1;~Yichen_Zhu2;~Zehan_Wang2;~Tao_Jin2;~Zhou_Zhao3;~Fei_Wu1;~Shuicheng_YAN3;~Hanwang_Zhang3", "aff": ";;Huawei Technologies Ltd.;Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University;;Zhejiang University;sea Group;Nanyang Technological University", "aff_domain": ";;huawei.com;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;;zju.edu.cn;sea.com;ntu.edu.sg", "position": ";;Researcher;MS student;MS student;PhD student;Assistant Professor;;Full Professor;Researcher;Associate Professor", "bibtex": "@inproceedings{\nlin2024action,\ntitle={Action Imitation in Common Action Space for Customized Action Image Synthesis},\nauthor={Wang Lin and Jingyuan Chen and Jiaxin Shi and Zirun Guo and Yichen Zhu and Zehan Wang and Tao Jin and Zhou Zhao and Fei Wu and Shuicheng YAN and Hanwang Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=h2e4G2YiwR}\n}", "github": "", "reviewers": "3NDQ;Q179;BXKD;2ZgR", "pdf_size": 35953563, "rating": "5;6;6;7", "confidence": "2;4;4;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "67;60;173;85", "wc_strengths": "30;62;102;81", "wc_weaknesses": "152;189;160;91", "wc_questions": "7;5;234;9", "wc_limitations": "8;12;26;1", "wc_review": "264;328;695;267", "wc_reply_reviewers": "16;35;164;40", "wc_reply_authors": "88;59;657;28", "reply_reviewers": "1;1;2;1", "reply_authors": "3;2;3;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 96.25, 45.240330458563186 ], "wc_strengths_avg": [ 68.75, 26.47050245084139 ], "wc_weaknesses_avg": [ 148.0, 35.67211796347394 ], "wc_questions_avg": [ 63.75, 98.30405637612316 ], "wc_limitations_avg": [ 11.75, 9.12071817347735 ], "wc_review_avg": [ 388.5, 178.7910791958033 ], "wc_reply_reviewers_avg": [ 63.75, 58.5678025881115 ], "wc_reply_authors_avg": [ 208.0, 260.09709725408317 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17557358939610406413&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "email": ";;huawei.com;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;;zju.edu.cn;sea.com;ntu.edu.sg", "author_num": 11, "aff_unique_index": "0;1;1;1;1;1;2;3", "aff_unique_norm": "Huawei;Zhejiang University;Sea Group;Nanyang Technological University", "aff_unique_dep": "Huawei Technologies;;;", "aff_unique_url": "https://www.huawei.com;https://www.zju.edu.cn;;https://www.ntu.edu.sg", "aff_unique_abbr": "Huawei;ZJU;;NTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;2", "aff_country_unique": "China;;Singapore" }, { "title": "Doubly Hierarchical Geometric Representations for Strand-based Human Hairstyle Generation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94097", "id": "h34jVnPo1c", "proceeding": "", "pdf": "https://openreview.net/pdf?id=h34jVnPo1c", "openreview": "https://openreview.net/forum?id=h34jVnPo1c", "poster": "/media/PosterPDFs/NeurIPS%202024/94097.png?t=1733212735.8094823", "project": "", "author_site": "Yunlu Chen, Francisco Vicente Carrasco, Christian H\u00e4ne, Giljoo Nam, Jean-Charles Bazin, Fernando D De la Torre", "tldr": "", "abstract": "We introduce a doubly hierarchical generative representation for strand-based 3D hairstyle geometry that progresses from coarse, low-pass filtered guide hair to densely populated hair strands rich in high-frequency details. We employ the Discrete Cosine Transform (DCT) to separate low-frequency structural curves from high-frequency curliness and noise, avoiding the Gibbs' oscillation issues associated with the standard Fourier transform in open curves. Unlike the guide hair sampled from the scalp UV map grids which may lose capturing details of the hairstyle in existing methods, our method samples optimal sparse guide strands by utilising $k$-medoids clustering centres from low-pass filtered dense strands, which more accurately retain the hairstyle's inherent characteristics. The proposed variational autoencoder-based generation network, with an architecture inspired by geometric deep learning and implicit neural representations, facilitates flexible, off-the-grid guide strand modelling and enables the completion of dense strands in any quantity and density, drawing on principles from implicit neural representations. Empirical evaluations confirm the capacity of the model to generate convincing guide hair and dense strands, complete with nuanced high-frequency details.", "keywords": "strand-based hair;geometry;generative models;3D;implicit neural representations;graph neural networks;frequency decomposition;clustering", "primary_area": "machine_vision", "supplementary_material": "", "author": "Yunlu Chen;Francisco Vicente Carrasco;Christian H\u00e4ne;Giljoo Nam;Jean-Charles Bazin;Fernando De la Torre", "authorids": "~Yunlu_Chen1;~Francisco_Vicente_Carrasco1;~Christian_H\u00e4ne2;~Giljoo_Nam1;~Jean-Charles_Bazin2;~Fernando_De_la_Torre2", "gender": ";Not Specified;;M;;", "homepage": ";https://www.linkedin.com/in/francisco-vicente-carrasco-32a508144/;;https://sites.google.com/view/gjnam/;;", "dblp": ";;;154/1756;;", "google_scholar": ";3elKp9wAAAAJ;;https://scholar.google.co.kr/citations?user=jNScip4AAAAJ;;", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Yunlu_Chen1;~Francisco_Vicente_Carrasco1;~Christian_H\u00e4ne2;~Giljoo_Nam1;~Jean-Charles_Bazin2;~Fernando_De_la_Torre2", "aff": ";Carnegie Mellon University;;Meta;;", "aff_domain": ";andrew.cmu.edu;;meta.com;;", "position": ";Researcher;;Researcher;;", "bibtex": "@inproceedings{\nchen2024doubly,\ntitle={Doubly Hierarchical Geometric Representations for Strand-based Human Hairstyle Generation},\nauthor={Yunlu Chen and Francisco Vicente Carrasco and Christian H{\\\"a}ne and Giljoo Nam and Jean-Charles Bazin and Fernando De la Torre},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=h34jVnPo1c}\n}", "github": "", "reviewers": "VBS2;mnz6;Sftt;vBP9", "pdf_size": 5192062, "rating": "4;4;5;6", "confidence": "2;1;3;3", "soundness": "2;2;3;3", "novelty": "2;2;3;3", "presentation": "2;2;3;3", "wc_summary": "63;46;80;143", "wc_strengths": "54;22;118;124", "wc_weaknesses": "89;151;157;127", "wc_questions": "24;10;68;50", "wc_limitations": "20;1;1;22", "wc_review": "250;230;424;466", "wc_reply_reviewers": "0;54;13;153", "wc_reply_authors": "0;527;38;264", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 4.75, 0.82915619758885 ], "confidence_avg": [ 2.25, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 83.0, 36.66742423459821 ], "wc_strengths_avg": [ 79.5, 43.066808565297706 ], "wc_weaknesses_avg": [ 131.0, 26.720778431774775 ], "wc_questions_avg": [ 38.0, 22.494443758403985 ], "wc_limitations_avg": [ 11.0, 10.024968827881711 ], "wc_review_avg": [ 342.5, 103.81112657128811 ], "wc_reply_reviewers_avg": [ 55.0, 59.98749869764533 ], "wc_reply_authors_avg": [ 207.25, 210.39174769938103 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.8181818181818182, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:jdSloOnnJ70J:scholar.google.com/&scioq=Doubly+Hierarchical+Geometric+Representations+for+Strand-based+Human+Hairstyle+Generation&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": ";andrew.cmu.edu;;meta.com;;", "author_num": 6, "aff_unique_index": "0;1", "aff_unique_norm": "Carnegie Mellon University;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://www.cmu.edu;https://meta.com", "aff_unique_abbr": "CMU;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Breaking Determinism: Fuzzy Modeling of Sequential Recommendation Using Discrete State Space Diffusion Model", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94096", "id": "h3BdT2UMWQ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=h3BdT2UMWQ", "openreview": "https://openreview.net/forum?id=h3BdT2UMWQ", "poster": "", "project": "", "author_site": "Wenjia Xie, Hao Wang, Luankang Zhang, Rui Zhou, Defu Lian, Enhong Chen", "tldr": "", "abstract": "Sequential recommendation (SR) aims to predict items that users may be interested in based on their historical behavior sequences. We revisit SR from a novel information-theoretic perspective and find that conventional sequential modeling methods fail to adequately capture the randomness and unpredictability of user behavior. Inspired by fuzzy information processing theory, this paper introduces the DDSR model, which uses fuzzy sets of interaction sequences to overcome the limitations and better capture the evolution of users' real interests. Formally based on diffusion transition processes in discrete state spaces, which is unlike common diffusion models such as DDPM that operate in continuous domains. It is better suited for discrete data, using structured transitions instead of arbitrary noise introduction to avoid information loss. Additionally, to address the inefficiency of matrix transformations due to the vast discrete space, we use semantic labels derived from quantization or RQ-VAE to replace item IDs, enhancing efficiency and improving cold start issues. Testing on three public benchmark datasets shows that DDSR outperforms existing state-of-the-art methods in various settings, demonstrating its potential and effectiveness in handling SR tasks.", "keywords": "Sequential Recommendation;Discrete Diffusion Model", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "/attachment/b95f21da89f221572420ffd6ad9684e2c1e8f862.zip", "author": "Wenjia Xie;Hao Wang;Luankang Zhang;Rui Zhou;Defu Lian;Enhong Chen", "authorids": "~Wenjia_Xie1;~Hao_Wang32;~Luankang_Zhang1;~Rui_Zhou10;~Defu_Lian1;~Enhong_Chen1", "gender": "F;M;Not Specified;M;M;M", "homepage": "https://home.ustc.edu.cn/~xiaohulu;http://staff.ustc.edu.cn/~wanghao3/;https://scholar.google.com/citations?user=O5Ib6NYAAAAJ&hl=zh-CN&oi=ao;https://faculty.ustc.edu.cn/liandefu/en/index.htm;http://staff.ustc.edu.cn/~cheneh;https://github.com/Aquarius-zr", "dblp": ";181/2812-76;374/6044;87/10734;07/258;", "google_scholar": ";ou4Miu4AAAAJ;O5Ib6NYAAAAJ;QW0ad4sAAAAJ;Q9h02J0AAAAJ;", "orcid": "0009-0009-6392-2559;0000-0001-9921-2078;0009-0006-5833-5999;0000-0002-3507-9607;0000-0002-4835-4102;0009-0005-9561-7496", "linkedin": ";;;;;", "or_profile": "~Wenjia_Xie1;~Hao_Wang32;~Luankang_Zhang1;~Defu_Lian1;~Enhong_Chen1;~Zhou_rui1", "aff": "University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China", "aff_domain": "mail.ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu", "position": "MS student;Associate Researcher;PhD student;Full Professor;Full Professor;MS student", "bibtex": "@inproceedings{\nxie2024breaking,\ntitle={Breaking Determinism: Fuzzy Modeling of Sequential Recommendation Using Discrete State Space Diffusion Model},\nauthor={Wenjia Xie and Hao Wang and Luankang Zhang and Rui Zhou and Defu Lian and Enhong Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=h3BdT2UMWQ}\n}", "github": "", "reviewers": "ukuQ;qbmo;LQPH;6uVb", "pdf_size": 477788, "rating": "5;5;7;8", "confidence": "4;3;4;4", "soundness": "2;2;4;4", "novelty": "3;2;3;3", "presentation": "3;3;3;3", "wc_summary": "57;127;55;30", "wc_strengths": "61;21;81;127", "wc_weaknesses": "69;103;66;2", "wc_questions": "29;14;3;79", "wc_limitations": "6;4;5;32", "wc_review": "222;269;210;270", "wc_reply_reviewers": "36;59;19;40", "wc_reply_authors": "203;125;39;98", "reply_reviewers": "1;1;1;1", "reply_authors": "4;3;2;2", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 1.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 67.25, 36.09968836430586 ], "wc_strengths_avg": [ 72.5, 38.1673944617654 ], "wc_weaknesses_avg": [ 60.0, 36.50342449688796 ], "wc_questions_avg": [ 31.25, 29.07210862665452 ], "wc_limitations_avg": [ 11.75, 11.712706775122479 ], "wc_review_avg": [ 242.75, 27.086666461563706 ], "wc_reply_reviewers_avg": [ 38.5, 14.221462653327892 ], "wc_reply_authors_avg": [ 116.25, 58.9549616232595 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5555555555555555, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16710605988499833808&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "mail.ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "University of Science and Technology of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ustc.edu.cn", "aff_unique_abbr": "USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "DiffusionBlend: Learning 3D Image Prior through Position-aware Diffusion Score Blending for 3D Computed Tomography Reconstruction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94095", "id": "h3Kv6sdTWO", "proceeding": "", "pdf": "https://openreview.net/pdf?id=h3Kv6sdTWO", "openreview": "https://openreview.net/forum?id=h3Kv6sdTWO", "poster": "/media/PosterPDFs/NeurIPS%202024/94095.png?t=1733775822.4468694", "project": "", "author_site": "Bowen Song, Jason Hu, Zhaoxu Luo, Jeffrey Fessler, Liyue Shen", "tldr": "", "abstract": "Diffusion models face significant challenges when employed for large-scale medical image reconstruction in real practice such as 3D Computed Tomography (CT).\nDue to the demanding memory, time, and data requirements, it is difficult to train a diffusion model directly on the entire volume of high-dimensional data to obtain an efficient 3D diffusion prior. \nExisting works utilizing diffusion priors on single 2D image slice with hand-crafted cross-slice regularization would sacrifice the z-axis consistency, which results in severe artifacts along the z-axis. \nIn this work, we propose a novel framework that enables learning the 3D image prior through position-aware 3D-patch diffusion score blending for reconstructing large-scale 3D medical images. To the best of our knowledge, we are the first to utilize a 3D-patch diffusion prior for 3D medical image reconstruction. \nExtensive experiments on sparse view and limited angle CT reconstruction\nshow that our DiffusionBlend method significantly outperforms previous methods\nand achieves state-of-the-art performance on real-world CT reconstruction problems with high-dimensional 3D image (i.e., $256 \\times 256 \\times 500$). Our algorithm also comes with better or comparable computational efficiency than previous state-of-the-art methods. Code is available at https://github.com/efzero/DiffusionBlend.", "keywords": "diffusion models;CT reconstruction;inverse problems", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Bowen Song;Jason Hu;Zhaoxu Luo;Jeffrey A Fessler;Liyue Shen", "authorids": "~Bowen_Song3;~Jason_Hu1;~Zhaoxu_Luo1;~Jeffrey_A_Fessler2;~Liyue_Shen1", "gender": ";M;F;F;M", "homepage": "https://web.stanford.edu/~bowens18/;https://jasonhu4.github.io/;;https://liyueshen.engin.umich.edu/;https://web.eecs.umich.edu/~fessler/", "dblp": ";;;159/2036;f/JeffreyAFessler", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?view_op=list_works;n1bmg7YAAAAJ;Ho4qk9wAAAAJ;J5f4Gq8AAAAJ", "orcid": ";;;0000-0001-5942-3196;0000-0001-9998-3315", "linkedin": ";jason-hu-7115221a2/;;;jeff-fessler-578ab746", "or_profile": "~Bowen_Song3;~Jason_Hu1;~Zhaoxu_Luo1;~Liyue_Shen1;~Jeffrey_Fessler1", "aff": "University of Michigan - Ann Arbor;University of Michigan - Ann Arbor;University of Michigan - Ann Arbor;University of Michigan - Ann Arbor;University of Michigan - Ann Arbor", "aff_domain": "umich.edu;umich.edu;umich.edu;umich.edu;umich.edu", "position": "PhD student;PhD student;MS student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nsong2024diffusionblend,\ntitle={DiffusionBlend: Learning 3D Image Prior through Position-aware Diffusion Score Blending for 3D Computed Tomography Reconstruction},\nauthor={Bowen Song and Jason Hu and Zhaoxu Luo and Jeffrey A Fessler and Liyue Shen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=h3Kv6sdTWO}\n}", "github": "", "reviewers": "YiqS;Vnw7;FN1q", "pdf_size": 20889363, "rating": "5;6;7", "confidence": "4;4;5", "soundness": "3;1;3", "novelty": "3;3;2", "presentation": "3;3;3", "wc_summary": "58;46;58", "wc_strengths": "32;27;28", "wc_weaknesses": "56;341;201", "wc_questions": "7;217;63", "wc_limitations": "6;32;35", "wc_review": "159;663;385", "wc_reply_reviewers": "23;36;32", "wc_reply_authors": "466;1039;375", "reply_reviewers": "1;1;1", "reply_authors": "2;4;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.9428090415820634 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 54.0, 5.656854249492381 ], "wc_strengths_avg": [ 29.0, 2.160246899469287 ], "wc_weaknesses_avg": [ 199.33333333333334, 116.35673117138042 ], "wc_questions_avg": [ 95.66666666666667, 88.78938875538876 ], "wc_limitations_avg": [ 24.333333333333332, 13.021349989749739 ], "wc_review_avg": [ 402.3333333333333, 206.12186255276808 ], "wc_reply_reviewers_avg": [ 30.333333333333332, 5.436502143433364 ], "wc_reply_authors_avg": [ 626.6666666666666, 293.9210022362396 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.6666666666666665, 0.9428090415820634 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16554588620235227280&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "umich.edu;umich.edu;umich.edu;umich.edu;umich.edu", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of Michigan", "aff_unique_dep": "", "aff_unique_url": "https://www.umich.edu", "aff_unique_abbr": "UM", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Ann Arbor", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Certified Machine Unlearning via Noisy Stochastic Gradient Descent", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94094", "id": "h3k2NXu5bJ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=h3k2NXu5bJ", "openreview": "https://openreview.net/forum?id=h3k2NXu5bJ", "poster": "/media/PosterPDFs/NeurIPS%202024/94094.png?t=1730472117.7758937", "project": "", "author_site": "Eli Chien, Haoyu Wang, Ziang Chen, Pan Li", "tldr": "", "abstract": "``The right to be forgotten'' ensured by laws for user data privacy becomes increasingly important. Machine unlearning aims to efficiently remove the effect of certain data points on the trained model parameters so that it can be approximately the same as if one retrains the model from scratch. We propose to leverage projected noisy stochastic gradient descent for unlearning and establish its first approximate unlearning guarantee under the convexity assumption. Our approach exhibits several benefits, including provable complexity saving compared to retraining, and supporting sequential and batch unlearning. Both of these benefits are closely related to our new results on the infinite Wasserstein distance tracking of the adjacent (un)learning processes. Extensive experiments show that our approach achieves a similar utility under the same privacy constraint while using $2\\%$ and $10\\%$ of the gradient computations compared with the state-of-the-art gradient-based approximate unlearning methods for mini-batch and full-batch settings, respectively.", "keywords": "Machine unlearning;privacy;stochastic gradient descent", "primary_area": "privacy", "supplementary_material": "/attachment/7516e756a98bdbfdd483e2bd4852adf83541724f.zip", "author": "Eli Chien;Haoyu Peter Wang;Ziang Chen;Pan Li", "authorids": "~Eli_Chien1;~Haoyu_Peter_Wang1;~Ziang_Chen1;~Pan_Li2", "gender": "M;M;;M", "homepage": ";https://sites.duke.edu/ziangchen/;;https://sites.google.com/view/eli-chien/home", "dblp": ";;https://dblp.org/pers/hd/l/Li_0005:Pan;222/3243", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;odvrFvIAAAAJ;IroP0EwAAAAJ;N3BuEnYAAAAJ", "orcid": ";0000-0002-8298-5223;;", "linkedin": ";;pan-li-b951105a/;", "or_profile": "~Haoyu_Peter_Wang1;~Ziang_Chen1;~Pan_Li2;~I_Chien2", "aff": "Georgia Institute of Technology;Massachusetts Institute of Technology;Purdue University;Georgia Institute of Technology", "aff_domain": "gatech.edu;mit.edu;purdue.edu;gatech.edu", "position": "PhD student;Instructor;Assistant Professor;Postdoc", "bibtex": "@inproceedings{\nchien2024certified,\ntitle={Certified Machine Unlearning via Noisy Stochastic Gradient Descent},\nauthor={Eli Chien and Haoyu Peter Wang and Ziang Chen and Pan Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=h3k2NXu5bJ}\n}", "github": "", "reviewers": "MVBT;SMx2;DXXZ", "pdf_size": 1451819, "rating": "4;6;7", "confidence": "2;4;3", "soundness": "3;3;4", "novelty": "2;3;3", "presentation": "3;3;4", "wc_summary": "75;90;52", "wc_strengths": "41;4;69", "wc_weaknesses": "306;38;42", "wc_questions": "1;173;41", "wc_limitations": "23;1;1", "wc_review": "446;306;205", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "rating_avg": [ 5.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 72.33333333333333, 15.627610892974722 ], "wc_strengths_avg": [ 38.0, 26.620793877468543 ], "wc_weaknesses_avg": [ 128.66666666666666, 125.40423526429329 ], "wc_questions_avg": [ 71.66666666666667, 73.49074015744357 ], "wc_limitations_avg": [ 8.333333333333334, 10.370899457402697 ], "wc_review_avg": [ 319.0, 98.81632793555256 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.6546536707079772, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12363560973958182715&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 2, "email": "gatech.edu;mit.edu;purdue.edu;gatech.edu", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Georgia Institute of Technology;Massachusetts Institute of Technology;Purdue University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.gatech.edu;https://web.mit.edu;https://www.purdue.edu", "aff_unique_abbr": "Georgia Tech;MIT;Purdue", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "SPIQA: A Dataset for Multimodal Question Answering on Scientific Papers", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97575", "id": "h3lddsY5nf", "proceeding": "", "pdf": "https://openreview.net/pdf?id=h3lddsY5nf", "openreview": "https://openreview.net/forum?id=h3lddsY5nf", "poster": "/media/PosterPDFs/NeurIPS%202024/97575.png?t=1733938107.4986053", "project": "", "author_site": "Shraman Pramanick, Rama Chellappa, Subhashini Venugopalan", "tldr": "", "abstract": "Seeking answers to questions within long scientific research articles is a crucial area of study that aids readers in quickly addressing their inquiries. However, existing question-answering (QA) datasets based on scientific papers are limited in scale and focus solely on textual content. We introduce SPIQA (Scientific Paper Image Question Answering), the first large-scale QA dataset specifically designed to interpret complex figures and tables within the context of scientific research articles across various domains of computer science. Leveraging the breadth of expertise and ability of multimodal large language models (MLLMs) to understand figures, we employ automatic and manual curation to create the dataset. We craft an information-seeking task on interleaved images and text that involves multiple images covering a wide variety of plots, charts, tables, schematic diagrams, and result visualizations. SPIQA comprises 270K questions divided into training, validation, and three different evaluation splits. Through extensive experiments with 12 prominent foundational models, we evaluate the ability of current multimodal systems to comprehend the nuanced aspects of research articles. Additionally, we propose a Chain-of-Thought (CoT) evaluation strategy with in-context retrieval that allows fine-grained, step-by-step assessment and improves model performance. We further explore the upper bounds of performance enhancement with additional textual information, highlighting its promising potential for future research and the dataset\u2019s impact on revolutionizing how we interact with scientific literature.", "keywords": "multimodal;QA;dataset;scientific papers;figures and tables", "primary_area": "", "supplementary_material": "", "author": "Shraman Pramanick;Rama Chellappa;Subhashini Venugopalan", "authorids": "~Shraman_Pramanick1;~Rama_Chellappa1;~Subhashini_Venugopalan2", "gender": "M;;", "homepage": "https://shramanpramanick.github.io/;;https://vsubhashini.github.io", "dblp": "289/0043;;21/11044", "google_scholar": "20SubC8AAAAJ;;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0003-3729-8456", "linkedin": "shramanpramanick/;;", "or_profile": "~Shraman_Pramanick1;~Rama_Chellappa1;~Subhashini_Venugopalan2", "aff": "Google;;Google", "aff_domain": "google.com;;google.com", "position": "Intern;;Staff Research Scientist ", "bibtex": "@inproceedings{\npramanick2024spiqa,\ntitle={{SPIQA}: A Dataset for Multimodal Question Answering on Scientific Papers},\nauthor={Shraman Pramanick and Rama Chellappa and Subhashini Venugopalan},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=h3lddsY5nf}\n}", "github": "", "reviewers": "oGaX;m6KR;yhyx;HCve", "pdf_size": 23043574, "rating": "5;6;6;7", "confidence": "4;4;4;3", "wc_summary_and_contributions": "92;84;110;119", "wc_strengths": "2;2;117;89", "wc_improvement": "17;2;380;45", "wc_limitations": "3;2;19;1", "wc_correctness": "1;11;7;1", "wc_clarity": "1;10;26;1", "wc_relation_to_prior_work": "1;1;11;1", "wc_documentation": "1;1;5;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "119;114;676;259", "wc_reply_reviewers": "0;12;72;0", "wc_reply_authors": "93;28;107;0", "reply_reviewers": "0;1;2;0", "reply_authors": "3;2;4;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 101.25, 13.91716565971678 ], "wc_strengths_avg": [ 52.5, 51.46115039522533 ], "wc_improvement_avg": [ 111.0, 156.07209872363478 ], "wc_limitations_avg": [ 6.25, 7.39509972887452 ], "wc_correctness_avg": [ 5.0, 4.242640687119285 ], "wc_clarity_avg": [ 9.5, 10.21028892833107 ], "wc_relation_to_prior_work_avg": [ 3.5, 4.330127018922194 ], "wc_documentation_avg": [ 2.0, 1.7320508075688772 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 292.0, 229.21496460746187 ], "wc_reply_reviewers_avg": [ 21.0, 29.8496231131986 ], "wc_reply_authors_avg": [ 57.0, 44.401576548586654 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16567716802494152816&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "google.com;;google.com", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "PaGoDA: Progressive Growing of a One-Step Generator from a Low-Resolution Diffusion Teacher", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94093", "id": "h5zYGF68KH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=h5zYGF68KH", "openreview": "https://openreview.net/forum?id=h5zYGF68KH", "poster": "", "project": "", "author_site": "Dongjun Kim, Chieh-Hsin Lai, Wei-Hsiang Liao, Yuhta Takida, Naoki Murata, Toshimitsu Uesaka, Yuki Mitsufuji, Stefano Ermon", "tldr": "", "abstract": "The diffusion model performs remarkable in generating high-dimensional content but is computationally intensive, especially during training. We propose Progressive Growing of Diffusion Autoencoder (PaGoDA), a novel pipeline that reduces the training costs through three stages: training diffusion on downsampled data, distilling the pretrained diffusion, and progressive super-resolution. With the proposed pipeline, PaGoDA achieves a $64\\times$ reduced cost in training its diffusion model on $8\\times$ downsampled data; while at the inference, with the single-step, it performs state-of-the-art on ImageNet across all resolutions from $64\\times64$ to $512\\times512$, and text-to-image. PaGoDA's pipeline can be applied directly in the latent space, adding compression alongside the pre-trained autoencoder in Latent Diffusion Models (e.g., Stable Diffusion). The code is available at https://github.com/sony/pagoda.", "keywords": "Diffusion Models; Distillation Models; Generative Models; Generative AI; Text-to-Image Generation; Generative Adversarial Networks (GAN); Variational Autoencoders (VAE)", "primary_area": "generative_models", "supplementary_material": "", "author": "Dongjun Kim;Chieh-Hsin Lai;Wei-Hsiang Liao;Yuhta Takida;Naoki Murata;Toshimitsu Uesaka;Yuki Mitsufuji;Stefano Ermon", "authorids": "~Dongjun_Kim1;~Chieh-Hsin_Lai2;~Wei-Hsiang_Liao1;~Yuhta_Takida1;~Naoki_Murata1;~Toshimitsu_Uesaka1;~Yuki_Mitsufuji1;~Stefano_Ermon1", "gender": "M;M;M;M;M;M;M;M", "homepage": "https://sites.google.com/view/dongjun-kim?pli=1;https://chiehhsinjesselai.github.io/;;;;;https://www.yukimitsufuji.com/;http://cs.stanford.edu/~ermon/", "dblp": "03/4394;239/4021;18/4227;225/9928;166/6626;204/3391;136/5043;47/8135", "google_scholar": "https://scholar.google.com/citations?hl=ko;KDnKGu8AAAAJ;zhV53JEAAAAJ;https://scholar.google.co.jp/citations?user=ahqdEYUAAAAJ;https://scholar.google.co.jp/citations?user=oyuTmwoAAAAJ;;https://scholar.google.com/citations?hl=en;", "orcid": ";;;;0000-0001-7418-5173;0009-0007-0747-8305;0000-0002-6806-6140;", "linkedin": ";;wei-hsiang-liao-66283154/;;;;mittu1204;", "or_profile": "~Dongjun_Kim1;~Chieh-Hsin_Lai2;~Wei-Hsiang_Liao1;~Yuhta_Takida1;~Naoki_Murata1;~Toshimitsu_Uesaka1;~Yuki_Mitsufuji1;~Stefano_Ermon1", "aff": "Sony AI;Sony AI;Sony Research Inc.;Sony AI;Sony Group Corporation;Sony AI;Tokyo Institute of Technology, Tokyo Institute of Technology;Stanford University", "aff_domain": "sony.com;sony.com;sony.com;sony.com;sony.com;sony.com;titech.ac.jp;stanford.edu", "position": "Intern;Researcher;Staff Research Scientist;AI Engineer;Researcher;Researcher;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nkim2024pagoda,\ntitle={PaGo{DA}: Progressive Growing of a One-Step Generator from a Low-Resolution Diffusion Teacher},\nauthor={Dongjun Kim and Chieh-Hsin Lai and Wei-Hsiang Liao and Yuhta Takida and Naoki Murata and Toshimitsu Uesaka and Yuki Mitsufuji and Stefano Ermon},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=h5zYGF68KH}\n}", "github": "", "reviewers": "TnXh;FHu2;1Kk6", "pdf_size": 4442769, "rating": "5;6;7", "confidence": "5;5;4", "soundness": "3;3;2", "novelty": "2;3;3", "presentation": "3;3;3", "wc_summary": "114;19;94", "wc_strengths": "29;65;158", "wc_weaknesses": "398;435;193", "wc_questions": "48;89;55", "wc_limitations": "1;13;13", "wc_review": "590;621;513", "wc_reply_reviewers": "279;197;233", "wc_reply_authors": "454;70;761", "reply_reviewers": "1;1;2", "reply_authors": "2;2;3", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 75.66666666666667, 40.89281382128433 ], "wc_strengths_avg": [ 84.0, 54.35071296680477 ], "wc_weaknesses_avg": [ 342.0, 106.4362093775735 ], "wc_questions_avg": [ 64.0, 17.90716802475106 ], "wc_limitations_avg": [ 9.0, 5.656854249492381 ], "wc_review_avg": [ 574.6666666666666, 45.404356129732264 ], "wc_reply_reviewers_avg": [ 236.33333333333334, 33.55923452974192 ], "wc_reply_authors_avg": [ 428.3333333333333, 282.68278255945404 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13092026626282050774&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "sony.com;sony.com;sony.com;sony.com;sony.com;sony.com;titech.ac.jp;stanford.edu", "author_num": 8, "aff_unique_index": "0;0;1;0;2;0;3;4", "aff_unique_norm": "Sony;Sony Research Inc.;Sony Group Corporation;Tokyo Institute of Technology;Stanford University", "aff_unique_dep": "Sony AI;;;;", "aff_unique_url": "https://www.sony.com;https://www.sony.com;https://www.sony.com;https://www.titech.ac.jp;https://www.stanford.edu", "aff_unique_abbr": "Sony AI;SRI;Sony;Titech;Stanford", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Tokyo;Stanford", "aff_country_unique_index": "0;0;1;0;0;0;0;1", "aff_country_unique": "Japan;United States" }, { "title": "TPC: Test-time Procrustes Calibration for Diffusion-based Human Image Animation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94092", "id": "h6nSE8AWCT", "proceeding": "", "pdf": "https://openreview.net/pdf?id=h6nSE8AWCT", "openreview": "https://openreview.net/forum?id=h6nSE8AWCT", "poster": "/media/PosterPDFs/NeurIPS%202024/94092.png?t=1730095399.7196531", "project": "", "author_site": "Sunjae Yoon, Gwanhyeong Koo, Younghwan Lee, Chang Yoo", "tldr": "", "abstract": "Human image animation aims to generate a human motion video from the inputs of a reference human image and a target motion video. Current diffusion-based image animation systems exhibit high precision in transferring human identity into targeted motion, yet they still exhibit irregular quality in their outputs. Their optimal precision is achieved only when the physical compositions (i.e., scale and rotation) of the human shapes in the reference image and target pose frame are aligned. In the absence of such alignment, there is a noticeable decline in fidelity and consistency. Especially, in real-world environments, this compositional misalignment commonly occurs, posing significant challenges to the practical usage of current systems. To this end, we propose Test-time Procrustes Calibration (TPC), which enhances the robustness of diffusion-based image animation systems by maintaining optimal performance even when faced with compositional misalignment, effectively addressing real-world scenarios. The TPC provides a calibrated reference image for the diffusion model, enhancing its capability to understand the correspondence between human shapes in the reference and target images. Our method is simple and can be applied to any diffusion-based image animation system in a model-agnostic manner, improving the effectiveness at test time without additional training.", "keywords": "Human Image Animation;Diffusion model;Procrustes Analysis", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/518112c47a81b46838cae1ef815510758955d40c.zip", "author": "Sunjae Yoon;Gwanhyeong Koo;Younghwan Lee;Chang D. Yoo", "authorids": "~Sunjae_Yoon1;~Gwanhyeong_Koo2;~Younghwan_Lee1;~Chang_D._Yoo1", "gender": "M;M;M;M", "homepage": "https://dbstjswo505.github.io/;http://sanctusfactory.com/u-aim/;https://sanctusfactory.com/family.php;https://kookie12.github.io/", "dblp": "273/3911;;31/7819;358/7119", "google_scholar": "2A2lRoUAAAAJ;;gFWgUQEAAAAJ;https://scholar.google.co.kr/citations?user=qDCTLZgAAAAJ", "orcid": "0000-0001-7458-5273;0009-0007-1145-4023;0000-0002-0756-7179;", "linkedin": "sunjae-yoon-133294333/;younghwan-%E2%80%8Dlee-722530216/;;", "or_profile": "~Sunjae_Yoon1;~Younghwan_Lee1;~Chang_D._Yoo1;~GwanHyeong_Koo1", "aff": "Korea Advanced Institute of Science and Technology (KAIST);Yonsei University;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science and Technology (KAIST)", "aff_domain": "kaist.ac.kr;yonsei.ac.kr;kaist.ac.kr;kaist.ac.kr", "position": "PhD student;Undergrad student;Full Professor;MS student", "bibtex": "@inproceedings{\nyoon2024tpc,\ntitle={{TPC}: Test-time Procrustes Calibration for Diffusion-based Human Image Animation},\nauthor={Sunjae Yoon and Gwanhyeong Koo and Younghwan Lee and Chang D. Yoo},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=h6nSE8AWCT}\n}", "github": "", "reviewers": "KKQ2;c8u6;mDg5;rFbR", "pdf_size": 6044119, "rating": "4;6;6;8", "confidence": "5;5;5;5", "soundness": "2;3;3;4", "novelty": "2;3;3;4", "presentation": "2;3;3;4", "wc_summary": "39;110;56;94", "wc_strengths": "55;48;57;75", "wc_weaknesses": "268;133;137;73", "wc_questions": "4;95;26;112", "wc_limitations": "28;15;1;10", "wc_review": "394;401;277;364", "wc_reply_reviewers": "87;20;75;22", "wc_reply_authors": "138;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 5.0, 0.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 74.75, 28.472574523565655 ], "wc_strengths_avg": [ 58.75, 9.959292143521045 ], "wc_weaknesses_avg": [ 152.75, 71.20524910426197 ], "wc_questions_avg": [ 59.25, 45.328660911171866 ], "wc_limitations_avg": [ 13.5, 9.759610647971568 ], "wc_review_avg": [ 359.0, 49.34065261019558 ], "wc_reply_reviewers_avg": [ 51.0, 30.306764921383476 ], "wc_reply_authors_avg": [ 34.5, 59.75575286112627 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18333227288194246572&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "kaist.ac.kr;yonsei.ac.kr;kaist.ac.kr;kaist.ac.kr", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Yonsei University", "aff_unique_dep": ";", "aff_unique_url": "https://www.kaist.ac.kr;https://www.yonsei.ac.kr", "aff_unique_abbr": "KAIST;Yonsei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Dissect Black Box: Interpreting for Rule-Based Explanations in Unsupervised Anomaly Detection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94091", "id": "h6o6qXLmHZ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=h6o6qXLmHZ", "openreview": "https://openreview.net/forum?id=h6o6qXLmHZ", "poster": "/media/PosterPDFs/NeurIPS%202024/94091.png?t=1730367442.2158825", "project": "", "author_site": "Yu Zhang, Ruoyu Li, Nengwu Wu, Qing Li, Xinhan Lin, Yang Hu, Tao Li, Yong Jiang", "tldr": "", "abstract": "In high-stakes sectors such as network security, IoT security, accurately distinguishing between normal and anomalous data is critical due to the significant implications for operational success and safety in decision-making. The complexity is exacerbated by the presence of unlabeled data and the opaque nature of black-box anomaly detection models, which obscure the rationale behind their predictions. In this paper, we present a novel method to interpret the decision-making processes of these models, which are essential for detecting malicious activities without labeled attack data. We put forward the Segmentation Clustering Decision Tree (SCD-Tree), designed to dissect and understand the structure of normal data distributions. The SCD-Tree integrates predictions from the anomaly detection model into its splitting criteria, enhancing the clustering process with the model's insights into anomalies. To further refine these segments, the Gaussian Boundary Delineation (GBD) algorithm is employed to define boundaries within each segmented distribution, effectively delineating normal from anomalous data points. At this point, this approach addresses the curse of dimensionality by segmenting high-dimensional data and ensures resilience to data drift and perturbations through flexible boundary fitting. We transform the intricate operations of anomaly detection into an interpretable rule's format, constructing a comprehensive set of rules for understanding. Our method's evaluation on diverse datasets and models demonstrates superior explanation accuracy, fidelity, and robustness over existing method, proving its efficacy in environments where interpretability is paramount.", "keywords": "Machine Learning;Anomaly Detection;Rule Extraction", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "/attachment/c8a1e2279ab4981fe986994a8cda751d466e7403.zip", "author": "Yu Zhang;Ruoyu Li;Nengwu Wu;Qing Li;Xinhan Lin;Yang Hu;Tao Li;Yong Jiang", "authorids": "~Yu_Zhang61;~Ruoyu_Li4;~Nengwu_Wu1;~Qing_Li15;~Xinhan_Lin1;~Yang_Hu15;~Tao_Li28;~Yong_Jiang3", "gender": "M;M;M;M;;;Not Specified;M", "homepage": "http://iloveeli.top;https://ruoyu-li.github.io;https://github.com/wunengwu;https://smartinternet.group/qing-li/;;;http://www.metaverse.hnust.edu.cn;", "dblp": ";08/10026-3;;181/2689-6;https://dblp.uni-trier.de/pid/177/5557.html;43/4685-1.html;;74/1552-1.html", "google_scholar": "INA7I98AAAAJ;https://scholar.google.com/citations?hl=en;;54AuaywAAAAJ;;2W3uYmQAAAAJ;;", "orcid": "0000-0001-8968-6683;0000-0003-0754-2817;;0000-0002-6071-473X;;0000-0001-6942-4395;;", "linkedin": ";;;;;;;", "or_profile": "~Yu_Zhang61;~Ruoyu_Li4;~Nengwu_Wu1;~Qing_Li15;~Xinhan_Lin1;~Yang_Hu15;~Tao_Li28;~Yong_Jiang3", "aff": "Tsinghua University;Tsinghua University;Hunan University of Science and Technology;Pengcheng Laboratory;Shanghai Artificial Intelligence Laboratory;Tsinghua University;Hunan University of Science and Technology;Tsinghua University", "aff_domain": "tsinghua.edu.cn;mails.tsinghua.edu.cn;hnust.edu.cn;pcl.ac.cn;pjlab.org.cn;tsinghua.edu.cn;hnust.edu.cn;tsinghua.edu.cn", "position": "MS student;PhD student;Undergrad student;Associate Professor;Researcher;Associate Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nzhang2024dissect,\ntitle={Dissect Black Box: Interpreting for Rule-Based Explanations in Unsupervised Anomaly Detection},\nauthor={Yu Zhang and Ruoyu Li and Nengwu Wu and Qing Li and Xinhan Lin and Yang Hu and Tao Li and Yong Jiang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=h6o6qXLmHZ}\n}", "github": "", "reviewers": "dX38;7Q5n;kdm5;ZxG9", "pdf_size": 702012, "rating": "6;6;6;7", "confidence": "2;4;4;4", "soundness": "3;3;2;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "56;61;41;119", "wc_strengths": "38;46;21;59", "wc_weaknesses": "59;192;255;49", "wc_questions": "35;16;18;20", "wc_limitations": "30;21;1;8", "wc_review": "218;336;336;255", "wc_reply_reviewers": "62;27;85;15", "wc_reply_authors": "424;41;534;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 69.25, 29.65109610115619 ], "wc_strengths_avg": [ 41.0, 13.765899897936205 ], "wc_weaknesses_avg": [ 138.75, 87.69941561948974 ], "wc_questions_avg": [ 22.25, 7.495832175282475 ], "wc_limitations_avg": [ 15.0, 11.247221879201993 ], "wc_review_avg": [ 286.25, 51.44110710317187 ], "wc_reply_reviewers_avg": [ 47.25, 27.806249297594956 ], "wc_reply_authors_avg": [ 249.75, 232.9767960548861 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:yd06Vmj241cJ:scholar.google.com/&scioq=Dissect+Black+Box:+Interpreting+for+Rule-Based+Explanations+in+Unsupervised+Anomaly+Detection&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": "tsinghua.edu.cn;mails.tsinghua.edu.cn;hnust.edu.cn;pcl.ac.cn;pjlab.org.cn;tsinghua.edu.cn;hnust.edu.cn;tsinghua.edu.cn", "author_num": 8, "aff_unique_index": "0;0;1;2;3;0;1;0", "aff_unique_norm": "Tsinghua University;Hunan University of Science and Technology;Pengcheng Laboratory;Shanghai Artificial Intelligence Laboratory", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.tsinghua.edu.cn;http://www.hust.edu.cn;;http://www.shailab.org/", "aff_unique_abbr": "THU;HUST;;Shanghai AI Lab", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "SynRS3D: A Synthetic Dataset for Global 3D Semantic Understanding from Monocular Remote Sensing Imagery", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97574", "id": "h7Z2Q36sPk", "proceeding": "", "pdf": "https://openreview.net/pdf?id=h7Z2Q36sPk", "openreview": "https://openreview.net/forum?id=h7Z2Q36sPk", "poster": "/media/PosterPDFs/NeurIPS%202024/97574.png?t=1730122562.2405648", "project": "", "author_site": "JIAN SONG, Hongruixuan Chen, Weihao Xuan, Junshi Xia, Naoto YOKOYA", "tldr": "", "abstract": "Global semantic 3D understanding from single-view high-resolution remote sensing (RS) imagery is crucial for Earth observation (EO). However, this task faces significant challenges due to the high costs of annotations and data collection, as well as geographically restricted data availability. To address these challenges, synthetic data offer a promising solution by being unrestricted and automatically annotatable, thus enabling the provision of large and diverse datasets. We develop a specialized synthetic data generation pipeline for EO and introduce SynRS3D, the largest synthetic RS dataset. SynRS3D comprises 69,667 high-resolution optical images that cover six different city styles worldwide and feature eight land cover types, precise height information, and building change masks. To further enhance its utility, we develop a novel multi-task unsupervised domain adaptation (UDA) method, RS3DAda, coupled with our synthetic dataset, which facilitates the RS-specific transition from synthetic to real scenarios for land cover mapping and height estimation tasks, ultimately enabling global monocular 3D semantic understanding based on synthetic data. Extensive experiments on various real-world datasets demonstrate the adaptability and effectiveness of our synthetic dataset and the proposed RS3DAda method. SynRS3D and related codes are available at https://github.com/JTRNEO/SynRS3D.", "keywords": "Remote Sensing;Synthetic Data;Land Cover Mapping;Height Estimation;3D Semantic Understanding;Domain Adaptation", "primary_area": "", "supplementary_material": "/attachment/f560bcbfcc5042433cc1609a19d2db79d9595ef0.pdf", "author": "Jian Song;Hongruixuan Chen;Weihao Xuan;Junshi Xia;Naoto Yokoya", "authorids": "~Jian_Song10;~Hongruixuan_Chen1;~Weihao_Xuan1;~Junshi_Xia1;~Naoto_Yokoya1", "gender": "M;M;M;M;M", "homepage": ";https://chrx97.com/;https://weihaoxuan.com/;https://researchmap.jp/xiajunshi;https://naotoyokoya.com/", "dblp": ";243/6879;249/5453;97/8958;79/8993", "google_scholar": "CgcMFJsAAAAJ;XOk4Cf0AAAAJ;;n1aKdTkAAAAJ;DJ2KOn8AAAAJ", "orcid": ";0000-0003-0100-4786;;;0000-0002-7321-4590", "linkedin": ";hongruixuan-chen-993b06130/;;;naoto-yokoya-a8780540/", "or_profile": "~Jian_Song10;~Hongruixuan_Chen1;~Weihao_Xuan1;~Junshi_Xia1;~Naoto_Yokoya1", "aff": "The University of Tokyo;The University of Tokyo;Waseda University;RIKEN;The University of Tokyo", "aff_domain": "mns.k.u-tokyo.ac.jp;u-tokyo.ac.jp;waseda.jp;riken.jp;u-tokyo.ac.jp", "position": "PhD student;PhD student;MS student;Researcher;Associate Professor", "bibtex": "@inproceedings{\nsong2024synrsd,\ntitle={Syn{RS}3D: A Synthetic Dataset for Global 3D Semantic Understanding from Monocular Remote Sensing Imagery},\nauthor={Jian Song and Hongruixuan Chen and Weihao Xuan and Junshi Xia and Naoto Yokoya},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=h7Z2Q36sPk}\n}", "github": "", "reviewers": "jAqv;7f97;ezGZ", "pdf_size": 31370876, "rating": "6;7;8", "confidence": "3;4;4", "wc_summary_and_contributions": "84;151;129", "wc_strengths": "62;29;98", "wc_improvement": "8;23;164", "wc_limitations": "1;1;96", "wc_correctness": "1;1;116", "wc_clarity": "1;1;361", "wc_relation_to_prior_work": "1;1;134", "wc_documentation": "15;1;140", "wc_additional_feedback": "1;1;1", "wc_review": "174;209;1239", "wc_reply_reviewers": "26;0;69", "wc_reply_authors": "39;0;87", "reply_reviewers": "1;0;1", "reply_authors": "3;2;3", "rating_avg": [ 7.0, 0.816496580927726 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 121.33333333333333, 27.884683171152503 ], "wc_strengths_avg": [ 63.0, 28.178005607210743 ], "wc_improvement_avg": [ 65.0, 70.27090436304346 ], "wc_limitations_avg": [ 32.666666666666664, 44.78342947514801 ], "wc_correctness_avg": [ 39.333333333333336, 54.21151989096864 ], "wc_clarity_avg": [ 121.0, 169.7056274847714 ], "wc_relation_to_prior_work_avg": [ 45.333333333333336, 62.69680126520721 ], "wc_documentation_avg": [ 52.0, 62.487332049517576 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 540.6666666666666, 494.00292396795476 ], "wc_reply_reviewers_avg": [ 31.666666666666668, 28.452689777164395 ], "wc_reply_authors_avg": [ 42.0, 35.58089374931439 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15224838950595506586&as_sdt=8005&sciodt=0,7&hl=en", "gs_version_total": 4, "email": "mns.k.u-tokyo.ac.jp;u-tokyo.ac.jp;waseda.jp;riken.jp;u-tokyo.ac.jp", "author_num": 5, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "University of Tokyo;Waseda University;RIKEN", "aff_unique_dep": ";;", "aff_unique_url": "https://www.u-tokyo.ac.jp;https://www.waseda.jp/top;https://www.riken.jp", "aff_unique_abbr": "UTokyo;Waseda;RIKEN", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Japan" }, { "id": "h8LuywKj6N", "title": "GUI-World: A Dataset for GUI-Orientated Multimodal Large Language Models", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "Recently, Multimodal Large Language Models (MLLMs) have been used as agents to control keyboard and mouse inputs by directly perceiving the Graphical User Interface (GUI) and generating corresponding code.\nHowever, current agents primarily exhibit excellent understanding capabilities in static environments and are predominantly applied in relatively simple domains, such as Web or mobile interfaces. We argue that a robust GUI agent should be capable of perceiving temporal information on the GUI, including dynamic Web content and multi-step tasks. Additionally, it should possess a comprehensive understanding across various GUI scenarios, including desktop software and multi-window interactions.\nTo this end, this paper introduces a new dataset, termed GUI-World, which features meticulously crafted Human-MLLM annotations, extensively covering six GUI scenarios and eight types of GUI-orientated questions in three formats.\nWe evaluate the capabilities of current state-of-the-art MLLMs, including ImageLLMs and VideoLLMs, in understanding various types of GUI content, especially dynamic and sequential content. Our findings reveal that ImageLLMs struggle with dynamic GUI content without manually annotated keyframes or operation history. On the other hand, VideoLLMs fall short in all GUI-orientated tasks given the sparse of GUI video dataset. Based on GUI-World, we take the initial step of leveraging a fine-tuned VideoLLM as a GUI agent, demonstrating an improved understanding of various GUI tasks. However, due to the limitations in the performance of base LLMs, we conclude that using VideoLLMs as GUI agents remains a significant challenge. We believe our work provides valuable insights for future research in dynamic GUI content understanding.", "keywords": "GUI Agent;Multimodal Large Language Model;Benchmark;Dataset;Video LLM;Instruction Tuning", "primary_area": "", "supplementary_material": "/attachment/ace28d56f68c619f004fa839b89892ffc8d6d95e.zip", "author": "Dongping Chen;Yue Huang;Siyuan Wu;Jingyu Tang;Huichi Zhou;Qihui Zhang;Zhigang He;Yilin Bai;Chujie Gao;Liuyi Chen;Yiqiang Li;Chenlong Wang;Yue Yu;Tianshuo Zhou;Zhen Li;Yi Gui;Yao Wan;Pan Zhou;Jianfeng Gao;Lichao Sun", "authorids": "~Dongping_Chen1;~Yue_Huang9;~Siyuan_Wu6;~Jingyu_Tang1;~Huichi_Zhou2;~Qihui_Zhang1;~Zhigang_He3;~Yilin_Bai1;~Chujie_Gao1;~Liuyi_Chen1;~Yiqiang_Li3;~Chenlong_Wang3;~Yue_Yu15;~Tianshuo_Zhou2;~Zhen_Li24;~Yi_Gui3;~Yao_Wan2;~Pan_Zhou5;~Jianfeng_Gao1;~Lichao_Sun1", "gender": "M;;;F;M;M;M;F;F;F;F;M;M;M;M;M;M;M;M;M", "homepage": "https://dongping-chen.github.io;;https://github.com/nauyisu022;https://jingyuhhh.github.io/;;https://github.com/Mask-Hui;https://github.com/ZhigangHe-hust;https://github.com/baiyilin1005;;https://github.com/NancyCLY;https://scholar.google.com/scholar?scilib=1&hl=zh-CN;https://lucky-wang-chenlong.github.io/;;https://94753233.github.io/;https://github.com/LZpenguin;https://github.com/gystar;http://wanyao.me;http://faculty.hust.edu.cn/pzhou/zh_CN/index.htm;https://www.microsoft.com/en-us/research/people/jfgao/;https://lichao-sun.github.io/", "dblp": "151/7051;;44/3983-1;;368/3214;160/4750;;;366/6075;;;;;;74/2397-50;311/5499;167/0275.html;84/6614-1;92/5339;121/0780-1.html", "google_scholar": ";;v8qD1HsAAAAJ;fehpu2oAAAAJ;1IJyxpUAAAAJ;;https://scholar.google.com.hk/citations?hl=zh-CN;;1AqAngQAAAAJ;;;https://scholar.google.com/citations?view_op=list_works;;;https://scholar.google.cz/citations?user=YGS6-hEAAAAJ;https://scholar.google.com.hk/citations?user=zImBrG4AAAAJ;c3MtqtMAAAAJ;cTpFPJgAAAAJ;https://scholar.google.com/citations?hl=en;WhGUE7AAAAAJ", "orcid": "0009-0009-9848-2557;;;;;;;;;;;;;;0009-0007-0873-6126;0009-0006-2841-7942;0000-0001-6937-4180;;;", "linkedin": ";;;;;;;;;;;;yue-yu-198344310/;;;;;;;lichao-sun-b273a290/", "or_profile": "~Dongping_Chen1;~Yue_Huang9;~Siyuan_Wu6;~Jingyu_Tang1;~Huichi_Zhou2;~Qihui_Zhang1;~Zhigang_He3;~Yilin_Bai1;~Chujie_Gao1;~Liuyi_Chen1;~Yiqiang_Li3;~Chenlong_Wang3;~Yue_Yu15;~Tianshuo_Zhou2;~Zhen_Li24;~Yi_Gui3;~Yao_Wan2;~Pan_Zhou5;~Jianfeng_Gao1;~Lichao_Sun1", "aff": "Huazhong University of Science and Technology;;University of Waterloo;Huazhong University of Science and Technology;Imperial College London;Huazhong University of Science and Technology;Huazhong University of Science and Technology;Huazhong University of Science and Technology;;Huazhong University of Science and Technology;Royal College of Art;Huazhong University of Science and Technology;Sichuan University;Huazhong University of Science and Technology;Huazhong University of Science and Technology;Huazhong University of Science and Technology;Huazhong University of Science and Technology;Huazhong University of Science and Technology;Microsoft Research;Lehigh University", "aff_domain": "hust.edu.cn;;uwaterloo.ca;hust.edu.cn;imperial.ac.uk;hust.edu.cn;hust.edu.cn;hust.edu.cn;;hust.edu.cn;network.rca.ac.uk;hust.edu.cn;scu.edu.cn;hust.edu.cn;hust.edu.cn;hust.edu.cn;hust.edu.cn;hust.edu.cn;microsoft.com;lehigh.edu", "position": "Undergrad student;;Intern;Undergrad student;Researcher;Intern;Undergrad student;Undergrad student;;Undergrad student;MS student;Undergrad student;Undergrad student;Intern;Undergrad student;PhD student;Assistant Professor;Professor;Principal Researcher;Assistant Professor", "bibtex": "@misc{\nanonymous2024guiworld,\ntitle={{GUI}-World: A Dataset for {GUI}-Orientated Multimodal Large Language Models},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=h8LuywKj6N}\n}", "github": "", "project": "", "reviewers": "6RQG;pKiX;Mxkp;XMtM", "site": "https://openreview.net/forum?id=h8LuywKj6N", "pdf_size": 5634700, "rating": "6;6;6;7", "confidence": "3;3;3;3", "wc_summary_and_contributions": "64;92;52;70", "wc_strengths": "11;41;65;93", "wc_improvement": "264;214;136;56", "wc_limitations": "16;24;1;123", "wc_correctness": "27;1;1;19", "wc_clarity": "157;1;1;52", "wc_relation_to_prior_work": "1;11;1;25", "wc_documentation": "46;7;1;19", "wc_additional_feedback": "1;1;1;1", "wc_review": "587;392;259;458", "wc_reply_reviewers": "29;378;0;205", "wc_reply_authors": "0;1034;35;534", "reply_reviewers": "1;2;0;2", "reply_authors": "2;5;2;4", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.0 ], "wc_summary_and_contributions_avg": [ 69.5, 14.517231140957975 ], "wc_strengths_avg": [ 52.5, 30.211752680041588 ], "wc_improvement_avg": [ 167.5, 78.89708486376414 ], "wc_limitations_avg": [ 41.0, 48.05725751642513 ], "wc_correctness_avg": [ 12.0, 11.357816691600547 ], "wc_clarity_avg": [ 52.75, 63.68820534447489 ], "wc_relation_to_prior_work_avg": [ 9.5, 9.836157786453 ], "wc_documentation_avg": [ 18.25, 17.282577932704367 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 424.0, 118.29412495977981 ], "wc_reply_reviewers_avg": [ 153.0, 151.75144150880413 ], "wc_reply_authors_avg": [ 400.75, 422.2365302765738 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 3.25, 1.299038105676658 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 20, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:i2TsUp-tIYoJ:scholar.google.com/&scioq=GUI-World:+A+Dataset+for+GUI-Orientated+Multimodal+Large+Language+Models&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;0;2;0;0;0;0;3;0;4;0;0;0;0;0;5;6", "aff_unique_norm": "Huazhong University of Science and Technology;University of Waterloo;Imperial College London;Royal College of Art;Sichuan University;Microsoft;Lehigh University", "aff_unique_dep": ";;;;;Microsoft Research;", "aff_unique_url": "http://www.hust.edu.cn;https://uwaterloo.ca;https://www.imperial.ac.uk;https://www.rca.ac.uk;https://www.scu.edu.cn;https://www.microsoft.com/en-us/research;https://www.lehigh.edu", "aff_unique_abbr": "HUST;UW;ICL;RCA;SCU;MSR;Lehigh", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;2;0;0;0;0;2;0;0;0;0;0;0;0;3;3", "aff_country_unique": "China;Canada;United Kingdom;United States" }, { "id": "h8goI8uPXM", "title": "decoupleQ: Towards 2-bit Post-Training Uniform Quantization via decoupling Parameters into Integer and Floating Points", "track": "main", "status": "Reject", "tldr": "", "abstract": "Quantization emerges as one of the most promising compression technologies for deploying efficient large models in recent years. \nHowever, existing quantization schemes suffer from significant accuracy degradation at very low bits, or require some additional computational overhead when deployed, making it difficult to be applied to large-scale applications in industry.\nIn this paper, we propose decoupleQ, achieving a substantial increase in model accuracy, especially at very low bits.\n\ndecoupleQ abandons the traditional heuristic quantization paradigm and decouples the model parameters into integer and floating-point parts, then transforming the quantization problem into a mathematical constrained optimization problem, which is then solved alternatively by off-the-shelf solution methods. decoupleQ gets rid of any tricks for dealing with outliers, sensitive channels, etc., and focuses only on the basic optimization objective to achieve high model accuracy on extreme low bit quantization.\n Quantization via decoupleQ is linear and uniform, making it hardware-friendlier than non-uniform counterpart, and enabling the idea to be migrated to high-bit quantization to enhance its robustness. \n \ndecoupleQ has achieved comparable accuracy as fp16/bf16 for 2-bit quantization of large speech models in our company.\nThe code (including the W2 CUDA kernels) is attached and will be made public.", "keywords": "quantization; large language model; optimization", "primary_area": "optimization_for_deep_networks", "supplementary_material": "/attachment/1d817c70ea7f3d1c262ffb99315ca71708f04a3c.zip", "author": "Yi Guo;Fanliu Kong;Xiaoyang Li;Hui Li;WeiChen;Xiaogang Tian;jinping cai;Yang Zhang;Shouda Liu", "authorids": "~Yi_Guo5;~Fanliu_Kong1;~Xiaoyang_Li4;~Hui_Li34;~WeiChen1;~Xiaogang_Tian2;~jinping_cai1;~Yang_Zhang21;~Shouda_Liu1", "gender": "M;M;M;M;M;;M;M;M", "homepage": "https://scholar.google.com/citations?hl=zh-TW&user=QJsDc4cAAAAJ&scilu=&scisig=AMD79ooAAAAAYPLWWnKq-cGkSB4FgGPptJ1acLO3qlTm&gmla=AJsN-F4AELcMHiSujlmlMcuAxp2jJcY1gzBiaBH_8Z1aeJHxzUmtdU_EkCgDU9bAP14zmfOqUCfD8bTmJ0tQ6msxfOTw29VqVqk7BQQ4PgfZSImX8K15aJh8R1-mcf78PztZmF-iBajj&sciund=11717972921654323345;https://github.com/KongFanliu;;;https://github.com/gavinchen430;https://github.com/MyPandaShaoxiang;;https://scholar.google.com/citations?view_op=list_works&hl=en&user=Zyko2wwAAAAJ;https://vsooda.github.io/", "dblp": ";;;;;;;;", "google_scholar": ";;IbRfaRAAAAAJ;;https://scholar.google.com/citations?view_op=list_works;https://scholar.google.com/citations?view_op=list_works;;;h3aJn_YAAAAJ", "orcid": ";;;;;;;;", "linkedin": ";;;%E6%99%96-%E6%9D%8E-938b1a9a/;;;jinpingcai-588863172;;", "or_profile": "~Yi_Guo5;~Fanliu_Kong1;~Xiaoyang_Li4;~Hui_Li34;~WeiChen1;~Xiaogang_Tian2;~jinping_cai1;~Yang_Zhang21;~Shouda_Liu1", "aff": "bytedance technology;ByteDance Inc.;ByteDance Inc.;ByteDance Inc.;ByteDance Inc.;ByteDance Inc.;ByteDance Inc.;;Xiamen University", "aff_domain": "bytedance.com;bytedance.com;bytedance.com;bytedance.com;bytedance.com;bytedance.com;bytedance.com;;xmu.edu.cn", "position": "Researcher;Researcher;Researcher;Researcher;Engineer;Researcher;Researcher;;MS student", "bibtex": "@misc{\nanonymous2024decoupleq,\ntitle={decoupleQ: Towards 2-bit Post-Training Uniform Quantization via decoupling Parameters into Integer and Floating Points},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=h8goI8uPXM}\n}", "github": "", "project": "", "reviewers": "jfxa;G89P;6SKU;4m2b", "site": "https://openreview.net/forum?id=h8goI8uPXM", "pdf_size": 496996, "rating": "3;5;5;6", "confidence": "5;4;4;4", "soundness": "2;3;2;3", "novelty": "2;3;2;3", "presentation": "3;3;1;2", "wc_summary": "50;59;62;57", "wc_strengths": "13;68;61;66", "wc_weaknesses": "69;169;94;213", "wc_questions": "18;5;138;5", "wc_limitations": "1;5;18;9", "wc_review": "151;306;373;350", "wc_reply_reviewers": "91;71;54;72", "wc_reply_authors": "167;642;848;18", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;3;2", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 57.0, 4.415880433163924 ], "wc_strengths_avg": [ 52.0, 22.66053838724932 ], "wc_weaknesses_avg": [ 136.25, 57.59937065628409 ], "wc_questions_avg": [ 41.5, 55.96650784174407 ], "wc_limitations_avg": [ 8.25, 6.299801584177076 ], "wc_review_avg": [ 295.0, 86.55345169315895 ], "wc_reply_reviewers_avg": [ 72.0, 13.095800853708795 ], "wc_reply_authors_avg": [ 418.75, 338.40609849705726 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.9271726499455306, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5236346582355836940&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;0;0;0;0;0;1", "aff_unique_norm": "ByteDance;Xiamen University", "aff_unique_dep": ";", "aff_unique_url": "https://www.bytedance.com;https://www.xmu.edu.cn", "aff_unique_abbr": "ByteDance;XMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Detecting Bugs with Substantial Monetary Consequences by LLM and Rule-based Reasoning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94090", "id": "hB5NkiET32", "proceeding": "", "pdf": "https://openreview.net/pdf?id=hB5NkiET32", "openreview": "https://openreview.net/forum?id=hB5NkiET32", "poster": "/media/PosterPDFs/NeurIPS%202024/94090.png?t=1731537202.4153192", "project": "", "author_site": "Brian Zhang, Zhuo Zhang", "tldr": "", "abstract": "Financial transactions are increasingly being handled by automated programs called *smart contracts*. \nHowever, one challenge in the adaptation of smart contracts is the presence of vulnerabilities, which can cause significant monetary loss.\nIn 2024, $247.88 M was lost in 20 smart contract exploits.\nAccording to a recent study, accounting bugs (i.e., incorrect implementations of domain-specific financial models) are the most prevalent type of vulnerability, \nand are one of the most difficult to find, requiring substantial human efforts.\nWhile Large Language Models (LLMs) have shown promise in identifying these bugs, they often suffer from lack of generalization of vulnerability types, hallucinations, and problems with representing smart contracts in limited token context space.\nThis paper proposes a hybrid system combining LLMs and rule-based reasoning to detect accounting error vulnerabilities in smart contracts. \nIn particular, it utilizes the understanding capabilities of LLMs to annotate the financial meaning of variables in smart contracts, and employs rule-based reasoning to propagate the information throughout a contract's logic and to validate potential vulnerabilities.\nTo remedy hallucinations, we propose a feedback loop where validation is performed by providing the reasoning trace of vulnerabilities to the LLM for iterative self-reflection. \nWe achieve 75.6% accuracy on the labelling of financial meanings against human annotations. \nFurthermore, we achieve a recall of 90.5% from running on 23 real-world smart contract projects containing 21 accounting error vulnerabilities.\nFinally, we apply the automated technique on 8 recent projects, finding 4 known and 2 unknown bugs.", "keywords": "LLM;rule based reasoning;smart contract;accounting bugs", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "/attachment/28f91ec74291f0870e601c4886c55ab051fc419b.zip", "author": "Brian Zhang;ZHUO ZHANG", "authorids": "~Brian_Zhang1;~ZHUO_ZHANG1", "gender": "M;", "homepage": "https://niothefirst.github.io/;https://www.cs.purdue.edu/homes/zhan3299/index.html", "dblp": ";16/1234-2.html", "google_scholar": ";https://scholar.google.com/citations?hl=en", "orcid": ";", "linkedin": ";", "or_profile": "~Brian_Zhang1;~ZHUO_ZHANG1", "aff": "University of Texas at Austin;Purdue University", "aff_domain": "utexas.edu;purdue.edu", "position": "Undergrad student;Postdoc", "bibtex": "@inproceedings{\nzhang2024detecting,\ntitle={Detecting Bugs with Substantial Monetary Consequences by {LLM} and Rule-based Reasoning},\nauthor={Brian Zhang and ZHUO ZHANG},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=hB5NkiET32}\n}", "github": "", "reviewers": "uXwt;F88y;zmms;2yGP", "pdf_size": 3926342, "rating": "5;5;5;7", "confidence": "3;4;5;5", "soundness": "3;2;3;4", "novelty": "2;2;2;3", "presentation": "3;3;2;4", "wc_summary": "92;34;146;80", "wc_strengths": "71;34;78;48", "wc_weaknesses": "115;87;291;116", "wc_questions": "245;48;88;199", "wc_limitations": "8;45;72;11", "wc_review": "531;248;675;454", "wc_reply_reviewers": "0;89;109;75", "wc_reply_authors": "354;517;262;445", "reply_reviewers": "0;1;1;1", "reply_authors": "4;4;4;5", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 88.0, 39.87480407475377 ], "wc_strengths_avg": [ 57.75, 17.640507362318125 ], "wc_weaknesses_avg": [ 152.25, 80.94867200887239 ], "wc_questions_avg": [ 145.0, 79.95936467981721 ], "wc_limitations_avg": [ 34.0, 26.315394733881533 ], "wc_review_avg": [ 477.0, 154.18333243252982 ], "wc_reply_reviewers_avg": [ 68.25, 41.215136782497765 ], "wc_reply_authors_avg": [ 394.5, 95.85535978754658 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 4.25, 0.4330127018922193 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15640781379487794774&as_sdt=805&sciodt=0,3&hl=en", "gs_version_total": 2, "email": "utexas.edu;purdue.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "University of Texas at Austin;Purdue University", "aff_unique_dep": ";", "aff_unique_url": "https://www.utexas.edu;https://www.purdue.edu", "aff_unique_abbr": "UT Austin;Purdue", "aff_campus_unique_index": "0", "aff_campus_unique": "Austin;", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Towards Scalable and Stable Parallelization of Nonlinear RNNs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94089", "id": "hBCxxVQDBw", "proceeding": "", "pdf": "https://openreview.net/pdf?id=hBCxxVQDBw", "openreview": "https://openreview.net/forum?id=hBCxxVQDBw", "poster": "", "project": "", "author_site": "Xavier Gonzalez, Andrew Warrington, Jimmy Smith, Scott Linderman", "tldr": "", "abstract": "Transformers and linear state space models can be evaluated in parallel on modern hardware, but evaluating nonlinear RNNs appears to be an inherently sequential problem. Recently, however, Lim et al. '24 developed an approach called DEER, which evaluates nonlinear RNNs in parallel by posing the states as the solution to a fixed-point problem. They derived a parallel form of Newton's method to solve the fixed-point problem and achieved significant speedups over sequential evaluation. However, the computational complexity of DEER is cubic in the state size, and the algorithm can suffer from numerical instability. We address these limitations with two novel contributions. To reduce the computational complexity, we apply quasi-Newton approximations and show they converge comparably to Newton, use less memory, and are faster. To stabilize DEER, we leverage a connection between the Levenberg-Marquardt algorithm and Kalman smoothing, which we call ELK. This connection allows us to stabilize Newton's method while using efficient parallelized Kalman smoothing algorithms to retain performance. Through several experiments, we show that these innovations allow for parallel evaluation of nonlinear RNNs at larger scales and with greater stability.", "keywords": "RNNs;Newton's method;Parallel algorithms;Scalability;Numerical Stability", "primary_area": "optimization", "supplementary_material": "", "author": "Xavier Gonzalez;Andrew Warrington;Jimmy T.H. Smith;Scott Linderman", "authorids": "~Xavier_Gonzalez1;~Andrew_Warrington2;~Jimmy_T.H._Smith1;~Scott_Linderman1", "gender": "M;M;M;M", "homepage": ";;https://jimmysmith1919.github.io/;https://web.stanford.edu/~swl1/", "dblp": ";207/8575;305/3641;142/2484", "google_scholar": "5sj7cH8AAAAJ;https://scholar.google.co.uk/citations?hl=en;GC9Vv1wAAAAJ;6mD3I24AAAAJ", "orcid": "0000-0002-7558-0893;;0000-0003-2016-2480;", "linkedin": "xavier-gonzalez-517b5262/;;jimmy-t-h-smith-1679b122/;", "or_profile": "~Xavier_Gonzalez1;~Andrew_Warrington2;~Jimmy_T.H._Smith1;~Scott_W_Linderman1", "aff": "Stanford University;;Stanford University;Stanford University", "aff_domain": "stanford.edu;;stanford.edu;stanford.edu", "position": "PhD student;;PhD student;Assistant Professor", "bibtex": "@inproceedings{\ngonzalez2024towards,\ntitle={Towards Scalable and Stable Parallelization of Nonlinear {RNN}s},\nauthor={Xavier Gonzalez and Andrew Warrington and Jimmy T.H. Smith and Scott Linderman},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=hBCxxVQDBw}\n}", "github": "", "reviewers": "jU7g;VSf9;4jAU;KXRw;S8Ch;5uzK", "pdf_size": 1675009, "rating": "3;5;6;6;6;7", "confidence": "4;3;2;3;3;4", "soundness": "2;3;3;3;3;4", "novelty": "2;2;2;3;3;3", "presentation": "3;3;3;4;3;4", "wc_summary": "47;47;75;134;349;68", "wc_strengths": "18;19;132;25;51;90", "wc_weaknesses": "56;85;75;73;145;267", "wc_questions": "64;28;121;3;586;1", "wc_limitations": "1;6;8;7;60;4", "wc_review": "186;185;411;242;1191;430", "wc_reply_reviewers": "0;167;25;13;177;26", "wc_reply_authors": "0;425;31;8;272;12", "reply_reviewers": "0;2;1;1;2;1", "reply_authors": "1;3;2;2;3;2", "rating_avg": [ 5.5, 1.2583057392117916 ], "confidence_avg": [ 3.1666666666666665, 0.6871842709362768 ], "soundness_avg": [ 3.0, 0.5773502691896257 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 120.0, 106.4769145558479 ], "wc_strengths_avg": [ 55.833333333333336, 42.29033249127065 ], "wc_weaknesses_avg": [ 116.83333333333333, 72.71959081904194 ], "wc_questions_avg": [ 133.83333333333334, 206.3406703057403 ], "wc_limitations_avg": [ 14.333333333333334, 20.548046676563256 ], "wc_review_avg": [ 440.8333333333333, 349.68624826009705 ], "wc_reply_reviewers_avg": [ 68.0, 74.0990328321947 ], "wc_reply_authors_avg": [ 124.66666666666667, 164.58398734047273 ], "reply_reviewers_avg": [ 1.1666666666666667, 0.6871842709362768 ], "reply_authors_avg": [ 2.1666666666666665, 0.6871842709362768 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.28912165479145596, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16995150618706623506&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "stanford.edu;;stanford.edu;stanford.edu", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Continuous Partitioning for Graph-Based Semi-Supervised Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94088", "id": "hCOuip5Ona", "proceeding": "", "pdf": "https://openreview.net/pdf?id=hCOuip5Ona", "openreview": "https://openreview.net/forum?id=hCOuip5Ona", "poster": "", "project": "", "author_site": "Chester Holtz, Pengwen Chen, Zhengchao Wan, Chung-Kuan Cheng, Gal Mishne", "tldr": "", "abstract": "Laplace learning algorithms for graph-based semi-supervised learning have been shown to produce degenerate predictions at low label rates and in imbalanced class regimes, particularly near class boundaries. We propose CutSSL: a framework for graph-based semi-supervised learning based on continuous nonconvex quadratic programming, which provably obtains \\emph{integer} solutions. Our framework is naturally motivated by an \\emph{exact} quadratic relaxation of a cardinality-constrained minimum-cut graph partitioning problem. Furthermore, we show our formulation is related to an optimization problem whose approximate solution is the mean-shifted Laplace learning heuristic, thus providing new insight into the performance of this heuristic. We demonstrate that CutSSL significantly surpasses the current state-of-the-art on k-nearest neighbor graphs and large real-world graph benchmarks across a variety of label rates, class imbalance, and label imbalance regimes. Our implementation is available on Colab\\footnote{\\url{https://colab.research.google.com/drive/1tGU5rxE1N5d0KGcNzlvZ0BgRc7_vob7b?usp=sharing}}.", "keywords": "graph learning;semi-supervised learning", "primary_area": "optimization", "supplementary_material": "/attachment/b0ab2fdf56838a38666adb961149a3d5e2ae2551.zip", "author": "Chester Holtz;Pengwen Chen;Zhengchao Wan;Chung-Kuan Cheng;Gal Mishne", "authorids": "~Chester_Holtz1;~Pengwen_Chen1;~Zhengchao_Wan1;~Chung-Kuan_Cheng1;~Gal_Mishne1", "gender": "M;M;;M;F", "homepage": "https://cseweb.ucsd.edu/~chholtz/;http://www.amath.nchu.edu.tw/en/member_detail.php?Key=20;https://zhengchaow.github.io;http://cseweb.ucsd.edu/~kuan/;http://mishne.ucsd.edu/", "dblp": "161/9916;;228/7893;c/ChungKuanCheng.html;125/3214", "google_scholar": "YqC8p9sAAAAJ;SOCoAN8AAAAJ;kmTKYtoAAAAJ;OogCTloAAAAJ;KrwpdXYAAAAJ", "orcid": "0000-0002-8548-4539;0000-0002-4586-0552;0000-0003-4388-6991;0000-0002-9865-8390;0000-0002-5287-3626", "linkedin": "choltz95/;;;;", "or_profile": "~Chester_Holtz1;~Pengwen_Chen1;~Zhengchao_Wan1;~Chung-Kuan_Cheng1;~Gal_Mishne1", "aff": "University of California, San Diego;National Chung Hsing University, Taichung;University of California, San Diego;University of California, San Diego;University of California, San Diego", "aff_domain": "ucsd.edu;nchu.edu.tw;ucsd.edu;ucsd.edu;ucsd.edu", "position": "Postdoc;Full Professor;Postdoc;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nholtz2024continuous,\ntitle={Continuous Partitioning for Graph-Based Semi-Supervised Learning},\nauthor={Chester Holtz and Pengwen Chen and Zhengchao Wan and Chung-Kuan Cheng and Gal Mishne},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=hCOuip5Ona}\n}", "github": "", "reviewers": "8RfG;5yWx;Gcf5;aiM6", "pdf_size": 1127220, "rating": "4;5;6;8", "confidence": "3;4;4;4", "soundness": "2;2;2;3", "novelty": "2;2;2;3", "presentation": "1;3;2;3", "wc_summary": "17;62;100;143", "wc_strengths": "51;117;64;42", "wc_weaknesses": "294;139;161;87", "wc_questions": "9;101;179;3", "wc_limitations": "6;62;25;20", "wc_review": "377;481;529;295", "wc_reply_reviewers": "19;0;0;25", "wc_reply_authors": "70;0;286;0", "reply_reviewers": "1;0;0;1", "reply_authors": "2;1;2;1", "rating_avg": [ 5.75, 1.479019945774904 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 80.5, 46.53224688320993 ], "wc_strengths_avg": [ 68.5, 29.073183520213263 ], "wc_weaknesses_avg": [ 170.25, 76.33274199188708 ], "wc_questions_avg": [ 73.0, 72.48448109768049 ], "wc_limitations_avg": [ 28.25, 20.69269194667528 ], "wc_review_avg": [ 420.5, 90.93266739736606 ], "wc_reply_reviewers_avg": [ 11.0, 11.20267825120404 ], "wc_reply_authors_avg": [ 89.0, 117.27318534089538 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.6831300510639732, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4088280915351489819&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 0, "email": "ucsd.edu;nchu.edu.tw;ucsd.edu;ucsd.edu;ucsd.edu", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of California, San Diego;", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucsd.edu;", "aff_unique_abbr": "UCSD;", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "San Diego;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States;" }, { "title": "An Equivalence Between Static and Dynamic Regret Minimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94087", "id": "hD8Et4uZ1o", "proceeding": "", "pdf": "https://openreview.net/pdf?id=hD8Et4uZ1o", "openreview": "https://openreview.net/forum?id=hD8Et4uZ1o", "poster": "", "project": "", "author_site": "Andrew Jacobsen, Francesco Orabona", "tldr": "", "abstract": "We study the problem of dynamic regret minimization in online convex optimization, in which the objective is to minimize the difference between the cumulative loss of an algorithm and that of an arbitrary sequence of comparators. While the literature on this topic is very rich, a unifying framework for the analysis and design of these algorithms is still missing. In this paper we show that *for linear losses, dynamic regret minimization is equivalent to static regret minimization in an extended decision space*. Using this simple observation, we show that there is a frontier of lower bounds trading off penalties due to the variance of the losses and penalties due to variability of the comparator sequence, and provide a framework for achieving any of the guarantees along this frontier. As a result, we also prove for the first time that adapting to the squared path-length of an arbitrary sequence of comparators to achieve regret $R_{T}(u_{1},\\dots,u_{T})\\le O(\\sqrt{T\\sum_{t} \\\\|u_{t}-u_{t+1}\\\\|^{2}})$ is impossible. However, using our framework we introduce an alternative notion of variability based on a locally-smoothed comparator sequence $\\bar u_{1}, \\dots, \\bar u_{T}$, and provide an algorithm guaranteeing dynamic regret of the form $R_{T}(u_{1},\\dots,u_{T})\\le \\tilde O(\\sqrt{T\\sum_{i}\\\\|\\bar u_{i}-\\bar u_{i+1}\\\\|^{2}})$, while still matching in the worst case the usual path-length dependencies up to polylogarithmic terms.", "keywords": "online learning;dynamic regret;non-stationary;parameter-free", "primary_area": "online_learning", "supplementary_material": "", "author": "Andrew Jacobsen;Francesco Orabona", "authorids": "~Andrew_Jacobsen1;~Francesco_Orabona1", "gender": ";M", "homepage": ";https://francesco.orabona.com/", "dblp": "245/2567;80/3790.html", "google_scholar": "H2iBC18AAAAJ;g1ha-iYAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Andrew_Jacobsen1;~Francesco_Orabona1", "aff": "University of Alberta;Boston University", "aff_domain": "ualberta.ca;bu.edu", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\njacobsen2024an,\ntitle={An Equivalence Between Static and Dynamic Regret Minimization},\nauthor={Andrew Jacobsen and Francesco Orabona},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=hD8Et4uZ1o}\n}", "github": "", "reviewers": "YDMs;QTaB;bfmX;Rnvn;28Fn;yAJf", "pdf_size": 566360, "rating": "4;5;6;6;7;8", "confidence": "4;3;3;3;3;3", "soundness": "3;2;4;3;3;3", "novelty": "2;3;4;2;3;3", "presentation": "3;2;2;2;3;3", "wc_summary": "155;72;88;98;207;148", "wc_strengths": "57;74;124;86;39;39", "wc_weaknesses": "405;99;233;448;65;29", "wc_questions": "22;2;33;6;55;26", "wc_limitations": "17;9;1;16;8;8", "wc_review": "656;256;479;654;374;250", "wc_reply_reviewers": "847;5;11;625;13;45", "wc_reply_authors": "1339;0;0;1002;0;0", "reply_reviewers": "3;1;1;3;1;1", "reply_authors": "4;1;1;4;1;1", "rating_avg": [ 6.0, 1.2909944487358056 ], "confidence_avg": [ 3.1666666666666665, 0.3726779962499649 ], "soundness_avg": [ 3.0, 0.5773502691896257 ], "novelty_avg": [ 2.8333333333333335, 0.6871842709362768 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 128.0, 46.55820729653008 ], "wc_strengths_avg": [ 69.83333333333333, 29.66151640687906 ], "wc_weaknesses_avg": [ 213.16666666666666, 163.93333672224477 ], "wc_questions_avg": [ 24.0, 17.59734828508735 ], "wc_limitations_avg": [ 9.833333333333334, 5.398044913567215 ], "wc_review_avg": [ 444.8333333333333, 167.4041184944053 ], "wc_reply_reviewers_avg": [ 257.6666666666667, 344.4854455883763 ], "wc_reply_authors_avg": [ 390.1666666666667, 560.2893349769286 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.9428090415820634 ], "reply_authors_avg": [ 2.0, 1.4142135623730951 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.692820323027551, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13685195328346970357&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "ualberta.ca;bu.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "University of Alberta;Boston University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ualberta.ca;https://www.bu.edu", "aff_unique_abbr": "UAlberta;BU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Canada;United States" }, { "title": "Surge Phenomenon in Optimal Learning Rate and Batch Size Scaling", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94086", "id": "hD9TUV4xdz", "proceeding": "", "pdf": "https://openreview.net/pdf?id=hD9TUV4xdz", "openreview": "https://openreview.net/forum?id=hD9TUV4xdz", "poster": "/media/PosterPDFs/NeurIPS%202024/94086.png?t=1730967380.9820917", "project": "", "author_site": "Shuaipeng Li, Penghao Zhao, Hailin Zhang, Xingwu Sun, Hao Wu, Dian Jiao, Weiyan Wang, Chengjun Liu, Zheng Fang, Jinbao Xue, Yangyu Tao, Bin CUI, Di Wang", "tldr": "", "abstract": "In current deep learning tasks, Adam-style optimizers\u2014such as Adam, Adagrad, RMSprop, Adafactor, and Lion\u2014have been widely used as alternatives to SGD-style optimizers. These optimizers typically update model parameters using the sign of gradients, resulting in more stable convergence curves. \nThe learning rate and the batch size are the most critical hyperparameters for optimizers, which require careful tuning to enable effective convergence. Previous research has shown that the optimal learning rate increases linearly (or follows similar rules) with batch size for SGD-style optimizers. However, this conclusion is not applicable to Adam-style optimizers. \nIn this paper, we elucidate the connection between optimal learning rates and batch sizes for Adam-style optimizers through both theoretical analysis and extensive experiments. \nFirst, we raise the scaling law between batch sizes and optimal learning rates in the \u201csign of gradient\u201d case, in which we prove that the optimal learning rate first rises and then falls as the batch size increases. Moreover, the peak value of the surge will gradually move toward the larger batch size as training progresses.\nSecond, we conduct experiments on various CV and NLP tasks and verify the correctness of the scaling law.", "keywords": "Optimal learning rate;Scaling law", "primary_area": "learning_theory", "supplementary_material": "", "author": "Shuaipeng Li;Penghao Zhao;Hailin Zhang;Samm Sun;Hao Wu;Dian Jiao;Weiyan Wang;Chengjun Liu;Zheng Fang;Jinbao Xue;Yangyu Tao;Bin CUI;Di Wang", "authorids": "~Shuaipeng_Li1;~Penghao_Zhao1;~Hailin_Zhang2;~Samm_Sun1;~Hao_Wu39;~Dian_Jiao3;~Weiyan_Wang1;~Chengjun_Liu3;~Zheng_Fang17;~Jinbao_Xue2;~Yangyu_Tao2;~Bin_CUI2;~Di_Wang25", "gender": "M;M;;M;M;M;M;M;M;M;M;M;M", "homepage": ";https://hymie122.github.io/hymiezhao.github.io;https://hugozhl.github.io/;https://llm.hunyuan.tencent.com/;https://easylearningscores.github.io/;https://to.be.add;https://weiyan-wang.github.io/;https://github.com/Kite0011;https://www.google.com/;;https://cuibinpku.github.io/index.html;;https://github.com/liuhatry", "dblp": "151/9206;;04/1131-4;228/5449;111;;17/2902;;https://dblp.org;47/208.html;55/5031.html;;", "google_scholar": "omssCtIAAAAJ;https://scholar.google.com.hk/citations?user=3S5Ua1YAAAAJ;https://scholar.google.com/citations?hl=zh-CN;rjC51OsAAAAJ;HdXMhfcAAAAJ;;https://scholar.google.com.hk/citations?hl=en;;;;IJAU8KoAAAAJ;https://scholar.google.com/citations?view_op=list_works;", "orcid": "0009-0009-6325-7114;0009-0003-1436-6660;0009-0000-4188-7742;0009-0008-3222-0901;0009-0008-4084-1409;;0000-0002-4105-0691;;0000-0000-0000-0000;;0000-0003-1681-4677;;", "linkedin": ";;;;;;;;https://www.linkedin;yangyutao/;;;", "or_profile": "~Shuaipeng_Li1;~Penghao_Zhao1;~Hailin_Zhang2;~Samm_Sun1;~Hao_Wu39;~Dian_Jiao3;~Weiyan_Wang1;~Zheng_Fang17;~Jinbao_Xue2;~Yangyu_Tao2;~Bin_CUI2;~Di_Wang25;~Chengjunliu1", "aff": "Tencent;Peking University;Peking University;Tencent Hunyuan;University of Science and Technology of China;Tencent;Tencent;Tencent AI Lab;Shandong University of Science and Technology;;Peking University;Tencent Hunyuan;Tencent AI Platform", "aff_domain": "tencent.com;stu.pku.edu.cn;pku.edu.cn;tencent.com;ustc.edu.cn;tencent.com;tencent.com;tencent.com;sdust.edu.cn;;pku.edu.cn;tencent.com;tencent.com", "position": "Researcher;MS student;PhD student;Researcher;MS student;Researcher;Researcher;Researcher;MS student;;Full Professor;Researcher;Researcher", "bibtex": "@inproceedings{\nli2024surge,\ntitle={Surge Phenomenon in Optimal Learning Rate and Batch Size Scaling},\nauthor={Shuaipeng Li and Penghao Zhao and Hailin Zhang and Samm Sun and Hao Wu and Dian Jiao and Weiyan Wang and Chengjun Liu and Zheng Fang and Jinbao Xue and Yangyu Tao and Bin CUI and Di Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=hD9TUV4xdz}\n}", "github": "", "reviewers": "PmLA;ASLs;JT4e;ytaJ", "pdf_size": 3796061, "rating": "5;6;6;7", "confidence": "3;3;3;3", "soundness": "3;3;3;3", "novelty": "2;2;2;3", "presentation": "3;1;2;3", "wc_summary": "83;107;30;49", "wc_strengths": "64;49;46;68", "wc_weaknesses": "152;103;63;94", "wc_questions": "131;107;21;25", "wc_limitations": "162;1;1;19", "wc_review": "592;367;161;255", "wc_reply_reviewers": "98;13;43;19", "wc_reply_authors": "287;49;614;55", "reply_reviewers": "2;1;2;1", "reply_authors": "4;2;3;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 67.25, 29.785692874264313 ], "wc_strengths_avg": [ 56.75, 9.41740410091868 ], "wc_weaknesses_avg": [ 103.0, 31.945265689926575 ], "wc_questions_avg": [ 71.0, 48.76474136094644 ], "wc_limitations_avg": [ 45.75, 67.51805314136361 ], "wc_review_avg": [ 343.75, 160.81258501746683 ], "wc_reply_reviewers_avg": [ 43.25, 33.54381463101655 ], "wc_reply_authors_avg": [ 251.25, 230.37184615312697 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 13, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15074248267314722695&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "tencent.com;stu.pku.edu.cn;pku.edu.cn;tencent.com;ustc.edu.cn;tencent.com;tencent.com;tencent.com;sdust.edu.cn;;pku.edu.cn;tencent.com;tencent.com", "author_num": 13, "aff_unique_index": "0;1;1;0;2;0;0;0;3;1;0;0", "aff_unique_norm": "Tencent;Peking University;University of Science and Technology of China;Shandong University of Science and Technology", "aff_unique_dep": "Tencent Holdings Limited;;;", "aff_unique_url": "https://www.tencent.com;http://www.pku.edu.cn;http://www.ustc.edu.cn;http://www.sdstu.edu.cn/", "aff_unique_abbr": "Tencent;Peking U;USTC;SDUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Understanding Multi-Granularity for Open-Vocabulary Part Segmentation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94085", "id": "hE6ZxU0N3c", "proceeding": "", "pdf": "https://openreview.net/pdf?id=hE6ZxU0N3c", "openreview": "https://openreview.net/forum?id=hE6ZxU0N3c", "poster": "/media/PosterPDFs/NeurIPS%202024/94085.png?t=1733481549.8738742", "project": "", "author_site": "Jiho Choi, Seonho Lee, Seungho Lee, Minhyun Lee, Hyunjung Shim", "tldr": "", "abstract": "Open-vocabulary part segmentation (OVPS) is an emerging research area focused on segmenting fine-grained entities using diverse and previously unseen vocabularies.\nOur study highlights the inherent complexities of part segmentation due to intricate boundaries and diverse granularity, reflecting the knowledge-based nature of part identification.\nTo address these challenges, we propose PartCLIPSeg, a novel framework utilizing generalized parts and object-level contexts to mitigate the lack of generalization in fine-grained parts.\nPartCLIPSeg integrates competitive part relationships and attention control, alleviating ambiguous boundaries and underrepresented parts.\nExperimental results demonstrate that PartCLIPSeg outperforms existing state-of-the-art OVPS methods, offering refined segmentation and an advanced understanding of part relationships within images.\nThrough extensive experiments, our model demonstrated a significant improvement over the state-of-the-art models on the Pascal-Part-116, ADE20K-Part-234, and PartImageNet datasets.", "keywords": "part segmentation;open-vocabulary;multi-granularity", "primary_area": "machine_vision", "supplementary_material": "", "author": "Jiho Choi;Seonho Lee;Seungho Lee;Minhyun Lee;Hyunjung Shim", "authorids": "~Jiho_Choi2;~Seonho_Lee2;~Seungho_Lee2;~Minhyun_Lee1;~Hyunjung_Shim1", "gender": "M;M;M;M;F", "homepage": "https://jihochoi.github.io/;http://glanceyes.github.io;;;https://sites.google.com/view/cvml-kaist", "dblp": ";225/0239;;192/0530;72/4620", "google_scholar": "https://scholar.google.co.kr/citations?user=uvwpFpIAAAAJ;DFKGTG0AAAAJ/;vUM0nAgAAAAJ;2hUlCnQAAAAJ;KB5XZGIAAAAJ", "orcid": "0000-0002-7140-7962;0009-0006-4183-4214;0000-0001-5254-9685;;", "linkedin": ";glanceyes?utm_source=share&utm_campaign=share_via&utm_content=profile&utm_medium=ios_app;seungho-lee-1aba99151/;;", "or_profile": "~Jiho_Choi2;~Seonho_Lee2;~Seungho_Lee2;~Minhyun_Lee1;~Hyunjung_Shim1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Yonsei University;Yonsei University;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr;yonsei.ac.kr;yonsei.ac.kr;kaist.ac.kr", "position": "PhD student;MS student;PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nchoi2024understanding,\ntitle={Understanding Multi-Granularity for Open-Vocabulary Part Segmentation},\nauthor={Jiho Choi and Seonho Lee and Seungho Lee and Minhyun Lee and Hyunjung Shim},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=hE6ZxU0N3c}\n}", "github": "", "reviewers": "LDit;KLx2;Vaxb", "pdf_size": 27376161, "rating": "5;5;6", "confidence": "4;4;4", "soundness": "2;2;3", "novelty": "2;2;3", "presentation": "3;2;3", "wc_summary": "135;43;40", "wc_strengths": "76;41;30", "wc_weaknesses": "168;406;129", "wc_questions": "2;135;16", "wc_limitations": "3;13;44", "wc_review": "384;638;259", "wc_reply_reviewers": "0;377;61", "wc_reply_authors": "0;1231;220", "reply_reviewers": "0;2;1", "reply_authors": "1;5;3", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 72.66666666666667, 44.09333534925911 ], "wc_strengths_avg": [ 49.0, 19.61292091114087 ], "wc_weaknesses_avg": [ 234.33333333333334, 122.42639512058756 ], "wc_questions_avg": [ 51.0, 59.67132197854063 ], "wc_limitations_avg": [ 20.0, 17.45470328211473 ], "wc_review_avg": [ 427.0, 157.68534068411898 ], "wc_reply_reviewers_avg": [ 146.0, 165.22913383137572 ], "wc_reply_authors_avg": [ 483.6666666666667, 536.0225948803112 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 3.0, 1.632993161855452 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=898559212846525744&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "kaist.ac.kr;kaist.ac.kr;yonsei.ac.kr;yonsei.ac.kr;kaist.ac.kr", "author_num": 5, "aff_unique_index": "0;0;1;1;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Yonsei University", "aff_unique_dep": ";", "aff_unique_url": "https://www.kaist.ac.kr;https://www.yonsei.ac.kr", "aff_unique_abbr": "KAIST;Yonsei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "DALD: Improving Logits-based Detector without Logits from Black-box LLMs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94084", "id": "hEKSSsv5Q9", "proceeding": "", "pdf": "https://openreview.net/pdf?id=hEKSSsv5Q9", "openreview": "https://openreview.net/forum?id=hEKSSsv5Q9", "poster": "/media/PosterPDFs/NeurIPS%202024/94084.png?t=1731223117.3132324", "project": "", "author_site": "Cong Zeng, Shengkun Tang, Xianjun Yang, Yuanzhou Chen, Yiyou Sun, Zhiqiang Xu, Yao Li, Haifeng Chen, Wei Cheng, Dongkuan (DK) Xu", "tldr": "", "abstract": "The advent of Large Language Models (LLMs) has revolutionized text generation, producing outputs that closely mimic human writing. This blurring of lines between machine- and human-written text presents new challenges in distinguishing one from the other \u2013 a task further complicated by the frequent updates and closed nature of leading proprietary LLMs. Traditional logits-based detection methods leverage surrogate models for identifying LLM-generated content when the exact logits are unavailable from black-box LLMs. However, these methods grapple with the misalignment between the distributions of the surrogate and the often undisclosed target models, leading to performance degradation, particularly with the introduction of new, closed-source models. Furthermore, while current methodologies are generally effective when the source model is identified, they falter in scenarios where the model version remains unknown, or the test set comprises outputs from various source models. To address these limitations, we present \\textbf{D}istribution-\\textbf{A}ligned \\textbf{L}LMs \\textbf{D}etection (DALD), an innovative framework that redefines the state-of-the-art performance in black-box text detection even without logits from source LLMs. DALD is designed to align the surrogate model's distribution with that of unknown target LLMs, ensuring enhanced detection capability and resilience against rapid model iterations with minimal training investment. By leveraging corpus samples from publicly accessible outputs of advanced models such as ChatGPT, GPT-4 and Claude-3, DALD fine-tunes surrogate models to synchronize with unknown source model distributions effectively. Our approach achieves SOTA performance in black-box settings on different advanced closed-source and open-source models. The versatility of our method enriches widely adopted zero-shot detection frameworks (DetectGPT, DNA-GPT, Fast-DetectGPT) with a `plug-and-play' enhancement feature. \nExtensive experiments validate that our methodology reliably secures high detection precision for LLM-generated text and effectively detects text from diverse model origins through a singular detector.\nOur method is also robust under the revised text attack and non-English texts.", "keywords": "LLM Detection", "primary_area": "fairness", "supplementary_material": "/attachment/6715df4a51a2f2b103729107e6c91740a13c6046.zip", "author": "Cong Zeng;Shengkun Tang;Xianjun Yang;Yuanzhou Chen;Yiyou Sun;zhiqiang xu;Yao Li;Haifeng Chen;Wei Cheng;Dongkuan Xu", "authorids": "~Cong_Zeng1;~Shengkun_Tang1;~Xianjun_Yang1;~Yuanzhou_Chen1;~Yiyou_Sun1;~zhiqiang_xu1;~Yao_Li1;~Haifeng_Chen1;~Wei_Cheng1;~Dongkuan_Xu2", "gender": "F;;M;M;M;M;F;;M;M", "homepage": "https://github.com/cong-zeng;;;;https://sunyiyou.github.io/;https://scholar.google.com/citations?user=0R20iBMAAAAJ&hl=en;https://liyao880.github.io/yaoli/;https://haifengchen.gitlab.io/intro/;https://chengw07.github.io/;https://dongkuanx27.github.io/", "dblp": "183/9716;;37/10237;298/1088;211/5630;72/51-3.html;;08/57-1.html;89/2506-2.html;142/8139", "google_scholar": "dXZ_XVsAAAAJ;;Tunh15sAAAAJ;mQ0FosEAAAAJ;IKqlQo4AAAAJ;;bQ6YhCwAAAAJ;QzakB68AAAAJ;PRrGVmoAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0009-0008-5247-7480;;0000-0003-3318-8444;0009-0003-0826-2697;;0000-0002-5693-8933;0000-0002-7195-5774;;;0000-0002-1456-9658", "linkedin": "cong-zeng-179597278/;;xianjun-yang-0062aa1a6/;yuanzhou-chen-215032253/;;;yao-li-b189574a/;;wei-cheng-ml/;dongkuan-dk-xu-%F0%9F%87%BA%F0%9F%87%A6-05038087/", "or_profile": "~Cong_Zeng1;~Shengkun_Tang1;~Xianjun_Yang1;~Yuanzhou_Chen1;~Yiyou_Sun1;~zhiqiang_xu1;~Yao_Li1;~Haifeng_Chen1;~Wei_Cheng1;~Dongkuan_Xu2", "aff": "Mohamed bin Zayed University of Artificial Intelligence;;University of California, Santa Barbara;University of California, Los Angeles;University of California, Berkeley;Mohamed bin Zayed University of Artificial Intelligence;University of North Carolina, Chapel Hill;NEC-Labs;NEC-Labs;North Carolina State University", "aff_domain": "mbzuai.ac.ae;;ucsb.edu;cs.ucla.edu;berkeley.edu;mbzuai.ac.ae;unc.edu;nec-labs.com;nec-labs.com;ncsu.edu", "position": "MS student;;PhD student;PhD student;Postdoc;Assistant Professor;Assistant Professor;Researcher;Principal Researcher;Assistant Professor", "bibtex": "@inproceedings{\nzeng2024dlad,\ntitle={{DLAD}: Improving Logits-based Detector without Logits from Black-box {LLM}s},\nauthor={Cong Zeng and Shengkun Tang and Xianjun Yang and Yuanzhou Chen and Yiyou Sun and zhiqiang xu and Yao Li and Haifeng Chen and Wei Cheng and Dongkuan Xu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=hEKSSsv5Q9}\n}", "github": "", "reviewers": "iaeF;bnRm;yZjY;3RMB", "pdf_size": 1314087, "rating": "4;5;5;8", "confidence": "3;4;3;3", "soundness": "3;2;2;3", "novelty": "2;3;2;3", "presentation": "2;3;3;3", "wc_summary": "88;77;115;110", "wc_strengths": "21;82;58;136", "wc_weaknesses": "137;170;132;147", "wc_questions": "5;53;104;31", "wc_limitations": "5;27;1;14", "wc_review": "256;409;410;438", "wc_reply_reviewers": "0;68;45;23", "wc_reply_authors": "63;236;254;22", "reply_reviewers": "0;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 97.5, 15.596473960482221 ], "wc_strengths_avg": [ 74.25, 41.75149697915034 ], "wc_weaknesses_avg": [ 146.5, 14.603081866510234 ], "wc_questions_avg": [ 48.25, 36.396256675652786 ], "wc_limitations_avg": [ 11.75, 9.98436277385793 ], "wc_review_avg": [ 378.25, 71.53451963912248 ], "wc_reply_reviewers_avg": [ 34.0, 25.268557536986556 ], "wc_reply_authors_avg": [ 143.75, 102.4801810107691 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.19245008972987526, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3540974184925796969&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 2, "email": "mbzuai.ac.ae;;ucsb.edu;cs.ucla.edu;berkeley.edu;mbzuai.ac.ae;unc.edu;nec-labs.com;nec-labs.com;ncsu.edu", "author_num": 10, "aff_unique_index": "0;1;2;3;0;4;5;5;6", "aff_unique_norm": "Mohamed bin Zayed University of Artificial Intelligence;University of California, Santa Barbara;University of California, Los Angeles;University of California, Berkeley;University of North Carolina;NEC Laboratories;North Carolina State University", "aff_unique_dep": ";;;;;;", "aff_unique_url": "https://mbzuai.ac.ae;https://www.ucsb.edu;https://www.ucla.edu;https://www.berkeley.edu;https://www.unc.edu;https://www.nec-labs.com;https://www.ncsu.edu", "aff_unique_abbr": "MBZUAI;UCSB;UCLA;UC Berkeley;UNC;NEC-Labs;NCSU", "aff_campus_unique_index": "1;2;3;4", "aff_campus_unique": ";Santa Barbara;Los Angeles;Berkeley;Chapel Hill", "aff_country_unique_index": "0;1;1;1;0;1;1;1;1", "aff_country_unique": "United Arab Emirates;United States" }, { "title": "Transformers are Minimax Optimal Nonparametric In-Context Learners", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94083", "id": "hF6vatntqc", "proceeding": "", "pdf": "https://openreview.net/pdf?id=hF6vatntqc", "openreview": "https://openreview.net/forum?id=hF6vatntqc", "poster": "/media/PosterPDFs/NeurIPS%202024/94083.png?t=1730449141.3417861", "project": "", "author_site": "Juno Kim, Tai Nakamaki, Taiji Suzuki", "tldr": "", "abstract": "In-context learning (ICL) of large language models has proven to be a surprisingly effective method of learning a new task from only a few demonstrative examples. In this paper, we shed light on the efficacy of ICL from the viewpoint of statistical learning theory. We develop approximation and generalization error analyses for a transformer model composed of a deep neural network and one linear attention layer, pretrained on nonparametric regression tasks sampled from general function spaces including the Besov space and piecewise $\\gamma$-smooth class. In particular, we show that sufficiently trained transformers can achieve -- and even improve upon -- the minimax optimal estimation risk in context by encoding the most relevant basis representations during pretraining. Our analysis extends to high-dimensional or sequential data and distinguishes the \\emph{pretraining} and \\emph{in-context} generalization gaps, establishing upper and lower bounds w.r.t. both the number of tasks and in-context examples. These findings shed light on the effectiveness of few-shot prompting and the roles of task diversity and representation learning for ICL.", "keywords": "in-context learning;transformers;nonparametric regression;minimax optimality;meta-learning", "primary_area": "learning_theory", "supplementary_material": "", "author": "Juno Kim;Tai Nakamaki;Taiji Suzuki", "authorids": "~Juno_Kim1;~Tai_Nakamaki1;~Taiji_Suzuki1", "gender": "M;;M", "homepage": "https://junokim1.github.io/;;http://ibis.t.u-tokyo.ac.jp/suzuki/", "dblp": "59/8200;;08/312", "google_scholar": "PEHQlgkAAAAJ;;x8osrBsAAAAJ", "orcid": ";;", "linkedin": ";tai-nakamaki/;", "or_profile": "~Juno_Kim1;~Tai_Nakamaki1;~Taiji_Suzuki1", "aff": "RIKEN;The University of Tokyo;The University of Tokyo", "aff_domain": "riken.jp;u-tokyo.ac.jp;tokyo.ac.jp", "position": "Researcher;Undergrad student;Associate Professor", "bibtex": "@inproceedings{\nkim2024transformers,\ntitle={Transformers are Minimax Optimal Nonparametric In-Context Learners},\nauthor={Juno Kim and Tai Nakamaki and Taiji Suzuki},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=hF6vatntqc}\n}", "github": "", "reviewers": "YwVZ;1n5m;KRWK;eg5H", "pdf_size": 769245, "rating": "5;5;6;7", "confidence": "4;3;3;4", "soundness": "3;3;4;3", "novelty": "3;3;2;3", "presentation": "2;2;4;4", "wc_summary": "87;117;457;73", "wc_strengths": "60;40;32;72", "wc_weaknesses": "81;119;88;93", "wc_questions": "65;146;86;102", "wc_limitations": "1;1;1;45", "wc_review": "294;423;664;385", "wc_reply_reviewers": "14;13;127;10", "wc_reply_authors": "54;353;453;46", "reply_reviewers": "1;1;1;1", "reply_authors": "2;3;3;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 1.0 ], "wc_summary_avg": [ 183.5, 158.70333959939217 ], "wc_strengths_avg": [ 51.0, 15.84297951775486 ], "wc_weaknesses_avg": [ 95.25, 14.359230480774379 ], "wc_questions_avg": [ 99.75, 29.752100766164396 ], "wc_limitations_avg": [ 12.0, 19.05255888325765 ], "wc_review_avg": [ 441.5, 136.74520101268635 ], "wc_reply_reviewers_avg": [ 41.0, 49.67393682807917 ], "wc_reply_authors_avg": [ 226.5, 180.02846997072436 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13649698741532954096&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "riken.jp;u-tokyo.ac.jp;tokyo.ac.jp", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "RIKEN;University of Tokyo", "aff_unique_dep": ";", "aff_unique_url": "https://www.riken.jp;https://www.u-tokyo.ac.jp", "aff_unique_abbr": "RIKEN;UTokyo", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Japan" }, { "title": "Do Counterfactually Fair Image Classifiers Satisfy Group Fairness? -- A Theoretical and Empirical Study", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97573", "id": "hFDdSd6hSM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=hFDdSd6hSM", "openreview": "https://openreview.net/forum?id=hFDdSd6hSM", "poster": "", "project": "", "author_site": "Sangwon Jung, Sumin Yu, Sanghyuk Chun, Taesup Moon", "tldr": "", "abstract": "The notion of algorithmic fairness has been actively explored from various aspects of fairness, such as counterfactual fairness (CF) and group fairness (GF). However, the exact relationship between CF and GF remains to be unclear, especially in image classification tasks; the reason is because we often cannot collect counterfactual samples regarding a sensitive attribute, essential for evaluating CF, from the existing images (e.g., a photo of the same person but with different secondary sex characteristics). In this paper, we construct new image datasets for evaluating CF by using a high-quality image editing method and carefully labeling with human annotators. Our datasets, CelebA-CF and LFW-CF, build upon the popular image GF benchmarks; hence, we can evaluate CF and GF simultaneously. We empirically observe that CF does not imply GF in image classification, whereas previous studies on tabular datasets observed the opposite. We theoretically show that it could be due to the existence of a latent attribute $G$ that is correlated with, but not caused by, the sensitive attribute (e.g., secondary sex characteristics are highly correlated with hair length). From this observation, we propose a simple baseline, Counterfactual Knowledge Distillation (CKD), to mitigate such correlation with the sensitive attributes. Extensive experimental results on CelebA-CF and LFW-CF demonstrate that CF-achieving models satisfy GF if we successfully reduce the reliance on $G$ (e.g., using CKD).", "keywords": "Counterfactual fairness;Group fairness;Image editing", "primary_area": "", "supplementary_material": "/attachment/82f0b0afc22f2d92752d9834ef94fb53351714a9.pdf", "author": "Sangwon Jung;Sumin Yu;Sanghyuk Chun;Taesup Moon", "authorids": "~Sangwon_Jung1;~Sumin_Yu1;~Sanghyuk_Chun1;~Taesup_Moon1", "gender": "M;;M;", "homepage": "https://successful-humor-4db.notion.site/Sangwon-Jung-70109a49767a470092a6ee0d02c78313;https://sites.google.com/view/sumin-yu;https://sanghyukchun.github.io/home/;https://mindlab-snu.github.io/people/pi/", "dblp": "236/3698;;213/1095.html;05/4084", "google_scholar": "WdC_a5IAAAAJ;;https://scholar.google.co.kr/citations?user=4_uj0xcAAAAJ;lQlioBoAAAAJ", "orcid": ";;0000-0002-4533-2610;0000-0002-9257-6503", "linkedin": ";;https://kr.linkedin.com/in/sanghyukchun/en;", "or_profile": "~Sangwon_Jung1;~Sumin_Yu1;~Sanghyuk_Chun1;~Taesup_Moon1", "aff": "Seoul National University;Seoul National University;NAVER AI Lab;Seoul National University", "aff_domain": "snu.ac.kr;snu.ac.kr;navercorp.com;snu.ac.kr", "position": "PhD student;MS student;Lead research scientist;Associate Professor", "bibtex": "@inproceedings{\njung2024do,\ntitle={Do Counterfactually Fair Image Classifiers Satisfy Group Fairness? -- A Theoretical and Empirical Study},\nauthor={Sangwon Jung and Sumin Yu and Sanghyuk Chun and Taesup Moon},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=hFDdSd6hSM}\n}", "github": "", "reviewers": "sLrQ;dGaB;pwn9", "pdf_size": 781335, "rating": "5;5;9", "confidence": "3;3;3", "wc_summary_and_contributions": "78;91;166", "wc_strengths": "82;49;74", "wc_improvement": "197;155;129", "wc_limitations": "6;1;68", "wc_correctness": "18;1;35", "wc_clarity": "31;20;116", "wc_relation_to_prior_work": "18;1;94", "wc_documentation": "102;1;39", "wc_additional_feedback": "1;1;1", "wc_review": "533;320;722", "wc_reply_reviewers": "42;253;107", "wc_reply_authors": "155;63;0", "reply_reviewers": "1;2;1", "reply_authors": "4;4;2", "rating_avg": [ 6.333333333333333, 1.8856180831641267 ], "confidence_avg": [ 3.0, 0.0 ], "wc_summary_and_contributions_avg": [ 111.66666666666667, 38.784303812524755 ], "wc_strengths_avg": [ 68.33333333333333, 14.055445761538678 ], "wc_improvement_avg": [ 160.33333333333334, 28.015868519267592 ], "wc_limitations_avg": [ 25.0, 30.474032661705056 ], "wc_correctness_avg": [ 18.0, 13.880441875771343 ], "wc_clarity_avg": [ 55.666666666666664, 42.89781139198388 ], "wc_relation_to_prior_work_avg": [ 37.666666666666664, 40.43375927228247 ], "wc_documentation_avg": [ 47.333333333333336, 41.651997417757634 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 525.0, 164.21327595538676 ], "wc_reply_reviewers_avg": [ 134.0, 88.230758053338 ], "wc_reply_authors_avg": [ 72.66666666666667, 63.646593694312415 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 3.3333333333333335, 0.9428090415820634 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4712159277985347996&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "snu.ac.kr;snu.ac.kr;navercorp.com;snu.ac.kr", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Seoul National University;NAVER Corporation", "aff_unique_dep": ";NAVER AI Lab", "aff_unique_url": "https://www.snu.ac.kr;https://www.naver.com", "aff_unique_abbr": "SNU;NAVER", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Fixed Confidence Best Arm Identification in the Bayesian Setting", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94082", "id": "hFTye9Ge40", "proceeding": "", "pdf": "https://openreview.net/pdf?id=hFTye9Ge40", "openreview": "https://openreview.net/forum?id=hFTye9Ge40", "poster": "/media/PosterPDFs/NeurIPS%202024/94082.png?t=1731672377.0701728", "project": "", "author_site": "Kyoungseok Jang, Junpei Komiyama, Kazutoshi Yamazaki", "tldr": "", "abstract": "We consider the fixed-confidence best arm identification (FC-BAI) problem in the Bayesian setting. This problem aims to find the arm of the largest mean with a fixed confidence level when the bandit model has been sampled from the known prior. \nMost studies on the FC-BAI problem have been conducted in the frequentist setting, where the bandit model is predetermined before the game starts. \nWe show that the traditional FC-BAI algorithms studied in the frequentist setting, such as track-and-stop and top-two algorithms, result in arbitrarily suboptimal performances in the Bayesian setting. \nWe also obtain a lower bound of the expected number of samples in the Bayesian setting and introduce a variant of successive elimination that has a matching performance with the lower bound up to a logarithmic factor. Simulations verify the theoretical results.", "keywords": "Multi-armed bandit;Best arm identification", "primary_area": "bandits", "supplementary_material": "/attachment/7410f41c04216acf5137c12dd6cd295b5c43efe2.zip", "author": "Kyoungseok Jang;Junpei Komiyama;Kazutoshi Yamazaki", "authorids": "~Kyoungseok_Jang1;~Junpei_Komiyama1;~Kazutoshi_Yamazaki1", "gender": "M;M;M", "homepage": "https://jajajang.github.io;https://sites.google.com/view/junpeikomiyama/home;https://sites.google.com/site/kyamazak/", "dblp": "296/8698;137/4226;", "google_scholar": "hDqIvzAAAAAJ;https://scholar.google.co.jp/citations?user=1uFfImMAAAAJ;cCRDTGYAAAAJ", "orcid": "0009-0002-6689-4601;;0000-0002-7401-2157", "linkedin": "kyoungseok-jang-856440219/;;", "or_profile": "~Kyoungseok_Jang1;~Junpei_Komiyama1;~Kazutoshi_Yamazaki1", "aff": "New York University;RIKEN;University of Queensland", "aff_domain": "nyu.edu;riken.jp;uq.edu.au", "position": "Postdoc;Researcher;Senior Lecturer", "bibtex": "@inproceedings{\njang2024fixed,\ntitle={Fixed Confidence Best Arm Identification in the Bayesian Setting},\nauthor={Kyoungseok Jang and Junpei Komiyama and Kazutoshi Yamazaki},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=hFTye9Ge40}\n}", "github": "", "reviewers": "4HmL;bY8m;EP71;jd9J", "pdf_size": 610839, "rating": "5;6;6;7", "confidence": "3;4;3;5", "soundness": "3;3;3;4", "novelty": "2;3;3;3", "presentation": "4;3;2;4", "wc_summary": "178;100;111;109", "wc_strengths": "49;35;60;54", "wc_weaknesses": "197;71;133;85", "wc_questions": "125;49;74;56", "wc_limitations": "4;7;9;1", "wc_review": "553;262;387;305", "wc_reply_reviewers": "24;9;87;0", "wc_reply_authors": "155;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 124.5, 31.164884084494844 ], "wc_strengths_avg": [ 49.5, 9.233092656309694 ], "wc_weaknesses_avg": [ 121.5, 49.28234978164089 ], "wc_questions_avg": [ 76.0, 29.723727895403698 ], "wc_limitations_avg": [ 5.25, 3.031088913245535 ], "wc_review_avg": [ 376.75, 111.22584007324916 ], "wc_reply_reviewers_avg": [ 30.0, 34.007352146263905 ], "wc_reply_authors_avg": [ 38.75, 67.11696879329399 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8528028654224418, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:6nb0X2polrQJ:scholar.google.com/&scioq=Fixed+Confidence+Best+Arm+Identification+in+the+Bayesian+Setting&hl=en&as_sdt=0,5", "gs_version_total": 6, "email": "nyu.edu;riken.jp;uq.edu.au", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "New York University;RIKEN;University of Queensland", "aff_unique_dep": ";;", "aff_unique_url": "https://www.nyu.edu;https://www.riken.jp;https://www.uq.edu.au", "aff_unique_abbr": "NYU;RIKEN;UQ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2", "aff_country_unique": "United States;Japan;Australia" }, { "title": "Web2Code: A Large-scale Webpage-to-Code Dataset and Evaluation Framework for Multimodal LLMs", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97572", "id": "hFVpqkRRH1", "proceeding": "", "pdf": "https://openreview.net/pdf?id=hFVpqkRRH1", "openreview": "https://openreview.net/forum?id=hFVpqkRRH1", "poster": "/media/PosterPDFs/NeurIPS%202024/97572.png?t=1732000565.2280135", "project": "", "author_site": "Sukmin Yun, haokun lin, Rusiru Thushara, Mohammad Bhat, Yongxin Wang, zutao jiang, Mingkai Deng, Jinhong Wang, Tianhua Tao, Junbo Li, Haonan Li, Preslav Nakov, Timothy Baldwin, Zhengzhong Liu, Eric Xing, Xiaodan Liang, Zhiqiang Shen", "tldr": "", "abstract": "Multimodal large language models (MLLMs) have shown impressive success across modalities such as image, video, and audio in a variety of understanding and generation tasks.\n However, current MLLMs are surprisingly poor at understanding webpage screenshots and generating their corresponding HTML code.\n To address this problem, \n we propose Web2Code, a benchmark consisting of a new large-scale webpage-to-code dataset for instruction tuning and an evaluation framework for the webpage understanding and HTML code translation abilities of MLLMs. \n For dataset construction, we leverage pretrained LLMs to enhance existing webpage-to-code datasets as well as generate a diverse pool of new webpages rendered into images.\n Specifically, the inputs are webpage images and instructions, while the responses are the webpage's HTML code.\n We further include diverse natural language QA pairs about the webpage content in the responses to enable a more comprehensive understanding of the web content.\n To evaluate model performance in these tasks, we develop an evaluation framework for testing MLLMs' abilities in webpage understanding and web-to-code generation.\n Extensive experiments show that our proposed dataset is beneficial not only to our proposed tasks but also in the general visual domain.\n We hope our work will contribute to the development of general MLLMs suitable for web-based content generation and task automation.\n Our data and code are available at https://github.com/MBZUAI-LLM/web2code.", "keywords": "Web2Code Dataset;Webpage Code Generation and Understanding Benchmark;Multi-modal Large Language Model", "primary_area": "", "supplementary_material": "", "author": "Sukmin Yun;Haokun Lin;Rusiru Thushara;Mohammad Qazim Bhat;Yongxin Wang;Zutao Jiang;Mingkai Deng;Jinhong Wang;Tianhua Tao;Junbo Li;Haonan Li;Preslav Nakov;Timothy Baldwin;Zhengzhong Liu;Eric P. Xing;Xiaodan Liang;Zhiqiang Shen", "authorids": "~Sukmin_Yun1;~Haokun_Lin3;~Rusiru_Thushara1;~Mohammad_Qazim_Bhat1;~Yongxin_Wang4;~Zutao_Jiang1;~Mingkai_Deng1;~Jinhong_Wang2;~Tianhua_Tao1;~Junbo_Li3;~Haonan_Li2;~Preslav_Nakov2;~Timothy_Baldwin1;~Zhengzhong_Liu1;~Eric_Xing1;~Xiaodan_Liang2;~Zhiqiang_Shen1", "gender": ";M;;M;M;M;;M;M;M;M;M;;M;M;F;", "homepage": "https://sites.google.com/view/sukmin-yun;https://blog.csdn.net/qq_46192381;;;https://yongxinwang-ai.github.io/;https://jiangzt.github.io/;https://mingkaid.github.io/;;http://www.taotianhua.com/;https://ljb121002.github.io/;https://haonan-li.github.io/;https://mbzuai.ac.ae/study/faculty/preslav-nakov/;https://eltimster.github.io/www/;https://hunterhector.github.io/;http://www.cs.cmu.edu/~epxing/;https://www.sysu-hcp.net/;", "dblp": "234/9078;;;;95/9248-2;199/6895;301/9617;;296/1990.html;74/8487-3;218/7270.html;https://dblp.uni-trier.de/pid/19/1947;65/4863;166/0352;36/3855;;", "google_scholar": "fQcZ_hQAAAAJ;https://scholar.google.com/citations?hl=zh-CN;;;4oRTmTgAAAAJ;BCR0RI4AAAAJ;;https://scholar.google.com.hk/citations?user=UjQOIXoAAAAJ;;XVSPtCoAAAAJ;IqfgexsAAAAJ;DfXsKZ4AAAAJ;wjBD1dkAAAAJ;S9E-hMwAAAAJ;https://scholar.google.com.tw/citations?user=5pKTRxEAAAAJ;voxznZAAAAAJ;", "orcid": ";0009-0008-6831-2859;;;;;;0009-0001-1266-1186;;;0000-0001-6623-5089;0000-0002-3600-1510;0000-0003-4525-6950;;;;", "linkedin": "sukmin-yun-975b67129/;;;https://ae.linkedin.com/in/mqazim?original_referer=;;;mingkaideng/;jinhong-wang-531498289/;;;haonan-li-809709b9/;preslavnakov/;;hunterhector/;;;", "or_profile": "~Sukmin_Yun1;~Haokun_Lin3;~Rusiru_Thushara1;~Mohammad_Qazim_Bhat1;~Yongxin_Wang4;~Zutao_Jiang1;~Mingkai_Deng1;~Jinhong_Wang2;~Tianhua_Tao1;~Junbo_Li3;~Haonan_Li2;~Preslav_Nakov2;~Timothy_Baldwin1;~Zhengzhong_Liu1;~Eric_Xing1;~Xiaodan_Liang2;~Zhiqiang_Shen1", "aff": "Mohamed bin Zayed University of Artificial Intelligence;Mohamed bin Zayed University of Artificial Intelligence;;Mohamed bin Zayed University of Artificial Intelligence;Mohamed bin Zayed University of Artificial Intelligence;Mohamed bin Zayed University of Artificial Intelligence;Carnegie Mellon University;Mohamed bin Zayed University of Artificial Intelligence;University of Illinois, Urbana Champaign;Mohamed bin Zayed University of Artificial Intelligence;Mohamed bin Zayed University of Artificial Intelligence;Mohamed bin Zayed University of Artificial Intelligence;The University of Melbourne;Mohamed bin Zayed University of Artificial Intelligence;School of Computer Science, Carnegie Mellon University;SUN YAT-SEN UNIVERSITY;", "aff_domain": "mbzuai.ac.ae;mbzuai.ac.ae;;mbzuai.ac.ae;mbzuai.ac.ae;mbzuai.ac.ae;cmu.edu;mbzuai.ac.ae;illinois.edu;mbzuai.ac.ae;mbzuai.ac.ae;mbzuai.ac.ae;unimelb.edu.au;mbzuai.ac.ae;cs.cmu.edu;sysu.edu.cn;", "position": "Postdoc;MS student;;Researcher;MS student;Postdoc;PhD student;MS student;MS student;Researcher;Postdoc;Full Professor;Full Professor;Researcher;Full Professor;Associate Professor;", "bibtex": "@inproceedings{\nyun2024webcode,\ntitle={Web2Code: A Large-scale Webpage-to-Code Dataset and Evaluation Framework for Multimodal {LLM}s},\nauthor={Sukmin Yun and Haokun Lin and Rusiru Thushara and Mohammad Qazim Bhat and Yongxin Wang and Zutao Jiang and Mingkai Deng and Jinhong Wang and Tianhua Tao and Junbo Li and Haonan Li and Preslav Nakov and Timothy Baldwin and Zhengzhong Liu and Eric P. Xing and Xiaodan Liang and Zhiqiang Shen},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=hFVpqkRRH1}\n}", "github": "", "reviewers": "r3wQ;iHjX", "pdf_size": 8093474, "rating": "5;7", "confidence": "5;3", "wc_summary_and_contributions": "109;48", "wc_strengths": "95;100", "wc_improvement": "274;47", "wc_limitations": "60;4", "wc_correctness": "31;6", "wc_clarity": "32;8", "wc_relation_to_prior_work": "8;8", "wc_documentation": "44;5", "wc_additional_feedback": "1;1", "wc_review": "654;227", "wc_reply_reviewers": "0;0", "wc_reply_authors": "0;0", "reply_reviewers": "0;0", "reply_authors": "6;2", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 4.0, 1.0 ], "wc_summary_and_contributions_avg": [ 78.5, 30.5 ], "wc_strengths_avg": [ 97.5, 2.5 ], "wc_improvement_avg": [ 160.5, 113.5 ], "wc_limitations_avg": [ 32.0, 28.0 ], "wc_correctness_avg": [ 18.5, 12.5 ], "wc_clarity_avg": [ 20.0, 12.0 ], "wc_relation_to_prior_work_avg": [ 8.0, 0.0 ], "wc_documentation_avg": [ 24.5, 19.5 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 440.5, 213.5 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 4.0, 2.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 17, 0 ], "corr_rating_confidence": -0.9999999999999999, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13964157541291930293&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "mbzuai.ac.ae;mbzuai.ac.ae;;mbzuai.ac.ae;mbzuai.ac.ae;mbzuai.ac.ae;cmu.edu;mbzuai.ac.ae;illinois.edu;mbzuai.ac.ae;mbzuai.ac.ae;mbzuai.ac.ae;unimelb.edu.au;mbzuai.ac.ae;cs.cmu.edu;sysu.edu.cn;", "author_num": 17, "aff_unique_index": "0;0;0;0;0;1;0;2;0;0;0;3;0;1;4", "aff_unique_norm": "Mohamed bin Zayed University of Artificial Intelligence;Carnegie Mellon University;University of Illinois Urbana-Champaign;University of Melbourne;Sun Yat-sen University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://mbzuai.ac.ae;https://www.cmu.edu;https://illinois.edu;https://www.unimelb.edu.au;http://www.sysu.edu.cn", "aff_unique_abbr": "MBZUAI;CMU;UIUC;UniMelb;SYSU", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Urbana-Champaign;Pittsburgh", "aff_country_unique_index": "0;0;0;0;0;1;0;1;0;0;0;2;0;1;3", "aff_country_unique": "United Arab Emirates;United States;Australia;China" }, { "title": "Low-Rank Optimal Transport through Factor Relaxation with Latent Coupling", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94081", "id": "hGgkdFF2hR", "proceeding": "", "pdf": "https://openreview.net/pdf?id=hGgkdFF2hR", "openreview": "https://openreview.net/forum?id=hGgkdFF2hR", "poster": "", "project": "", "author_site": "Peter Halmos, Xinhao Liu, Julian Gold, Benjamin Raphael", "tldr": "", "abstract": "Optimal transport (OT) is a general framework for finding a minimum-cost transport plan, or coupling, between probability distributions, and has many applications in machine learning. A key challenge in applying OT to massive datasets is the quadratic scaling of the coupling matrix with the size of the dataset. [Forrow et al. 2019] introduced a factored coupling for the k-Wasserstein barycenter problem, which [Scetbon et al. 2021] adapted to solve the primal low-rank OT problem. We derive an alternative parameterization of the low-rank problem based on the _latent coupling_ (LC) factorization previously introduced by [Lin et al. 2021] generalizing [Forrow et al. 2019]. The LC factorization has multiple advantages for low-rank OT including decoupling the problem into three OT problems and greater flexibility and interpretability. We leverage these advantages to derive a new algorithm _Factor Relaxation with Latent Coupling_ (FRLC), which uses _coordinate_ mirror descent to compute the LC factorization. FRLC handles multiple OT objectives (Wasserstein, Gromov-Wasserstein, Fused Gromov-Wasserstein), and marginal constraints (balanced, unbalanced, and semi-relaxed) with linear space complexity. We provide theoretical results on FRLC, and demonstrate superior performance on diverse applications -- including graph clustering and spatial transcriptomics -- while demonstrating its interpretability.", "keywords": "Optimal Transport;Sinkhorn;Low-Rank;Matrix Factorization", "primary_area": "other", "supplementary_material": "/attachment/b4542b17debf51e3664ebe447b311b62c2bbbae1.zip", "author": "Peter Halmos;Xinhao Liu;Julian Gold;Benjamin Raphael", "authorids": "~Peter_Halmos1;~Xinhao_Liu3;~Julian_Gold1;~Benjamin_Raphael1", "gender": "M;;M;M", "homepage": ";;https://the-ninth-wave.github.io;https://www.cs.princeton.edu/~braphael/", "dblp": ";;;", "google_scholar": ";ZiK_z9EAAAAJ;;GhvZjJUAAAAJ", "orcid": ";;;", "linkedin": "peter-halmos-680937124/;;;", "or_profile": "~Peter_Halmos1;~Xinhao_Liu3;~Julian_Gold1;~Benjamin_Raphael1", "aff": "Princeton University;Princeton University;Princeton University;Princeton University", "aff_domain": "princeton.edu;princeton.edu;princeton.edu;princeton.edu", "position": "PhD student;PhD student;Researcher;Full Professor", "bibtex": "@inproceedings{\nhalmos2024lowrank,\ntitle={Low-Rank Optimal Transport through Factor Relaxation with Latent Coupling},\nauthor={Peter Halmos and Xinhao Liu and Julian Gold and Benjamin Raphael},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=hGgkdFF2hR}\n}", "github": "", "reviewers": "CGKE;pnTm;jx3E;1xw8", "pdf_size": 7382922, "rating": "6;6;7;9", "confidence": "3;4;3;4", "soundness": "3;3;3;4", "novelty": "3;3;3;4", "presentation": "3;3;3;4", "wc_summary": "40;78;96;150", "wc_strengths": "223;66;48;63", "wc_weaknesses": "66;55;282;105", "wc_questions": "61;77;122;118", "wc_limitations": "1;1;22;31", "wc_review": "391;277;570;467", "wc_reply_reviewers": "14;13;100;13", "wc_reply_authors": "0;0;72;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;2;1", "rating_avg": [ 7.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 91.0, 39.61060464067672 ], "wc_strengths_avg": [ 100.0, 71.34073170356469 ], "wc_weaknesses_avg": [ 127.0, 91.39748355397975 ], "wc_questions_avg": [ 94.5, 26.158172719056658 ], "wc_limitations_avg": [ 13.75, 13.141061600951424 ], "wc_review_avg": [ 426.25, 107.05460055504388 ], "wc_reply_reviewers_avg": [ 35.0, 37.52998800959041 ], "wc_reply_authors_avg": [ 18.0, 31.176914536239792 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.40824829046386296, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9558244931145069210&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "princeton.edu;princeton.edu;princeton.edu;princeton.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Princeton University", "aff_unique_dep": "", "aff_unique_url": "https://www.princeton.edu", "aff_unique_abbr": "Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "hGhLd2ByoR", "title": "Challenges with unsupervised LLM knowledge discovery", "track": "main", "status": "Reject", "tldr": "", "abstract": "We reveal novel pathologies in existing unsupervised methods seeking to discover latent knowledge from large language model (LLM) activations---instead of knowledge they seem to discover whatever feature of the activations is most prominent. These methods search for hypothesised consistency structures of latent knowledge. We first prove theoretically that arbitrary features (not just knowledge) satisfy the consistency structure of a popular unsupervised knowledge-elicitation method: contrast-consistent search. We then present a series of experiments showing settings in which this and other unsupervised methods result in classifiers that do not predict knowledge, but instead predict a different prominent feature. We conclude that existing unsupervised methods for discovering latent knowledge are insufficient, and we contribute sanity checks to apply to evaluating future knowledge elicitation methods. We offer conceptual arguments grounded in identification issues such as distinguishing a model's knowledge from that of a simulated character's that are likely to persist in future unsupervised methods.", "keywords": "Eliciting latent knowledge;large language models;deception", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Sebastian Farquhar;Vikrant Varma;Zachary Kenton;Johannes Gasteiger;Vladimir Mikulik;Rohin Shah", "authorids": "~Sebastian_Farquhar1;~Vikrant_Varma1;~Zachary_Kenton2;~Johannes_Gasteiger1;~Vladimir_Mikulik1;~Rohin_Shah1", "gender": ";;M;M;M;M", "homepage": "https://sebastianfarquhar.com/;;https://zackenton.github.io/;http://rohinshah.com/;;", "dblp": "215/5432;281/7099;209/9980;145/1009;228/7897;", "google_scholar": "bvShhTEAAAAJ;EPYHbToAAAAJ;https://scholar.google.co.uk/citations?hl=en;odFQXSYAAAAJ;;xGF4YZcAAAAJ", "orcid": ";;;;;", "linkedin": ";;zac-kenton-824429124/;rohin-shah-76405832/;;", "or_profile": "~Sebastian_Farquhar1;~Vikrant_Varma1;~Zachary_Kenton2;~Rohin_Shah1;~Johannes_Klicpera1;~Vladimir_Mikulik2", "aff": "Google DeepMind;Google DeepMind;Google DeepMind;Google DeepMind;Google;Google", "aff_domain": "google.com;deepmind.com;google.com;deepmind.com;google.com;deepmind.com", "position": "Researcher;Researcher;Researcher;Researcher;Researcher;Researcher", "bibtex": "@misc{\nanonymous2024challenges,\ntitle={Challenges with unsupervised {LLM} knowledge discovery},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=hGhLd2ByoR}\n}", "github": "", "project": "", "reviewers": "1zMZ;cjB1;DA2a", "site": "https://openreview.net/forum?id=hGhLd2ByoR", "pdf_size": 3782833, "rating": "3;4;6", "confidence": "3;3;2", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "3;4;3", "wc_summary": "61;93;100", "wc_strengths": "74;60;21", "wc_weaknesses": "114;238;28", "wc_questions": "142;143;29", "wc_limitations": "1;21;10", "wc_review": "392;555;188", "wc_reply_reviewers": "223;329;104", "wc_reply_authors": "0;156;0", "reply_reviewers": "1;1;1", "reply_authors": "1;2;1", "rating_avg": [ 4.333333333333333, 1.247219128924647 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 84.66666666666667, 16.97710877099579 ], "wc_strengths_avg": [ 51.666666666666664, 22.425184255405547 ], "wc_weaknesses_avg": [ 126.66666666666667, 86.19873677857595 ], "wc_questions_avg": [ 104.66666666666667, 53.50597059103674 ], "wc_limitations_avg": [ 10.666666666666666, 8.178562764256865 ], "wc_review_avg": [ 378.3333333333333, 150.1384546195796 ], "wc_reply_reviewers_avg": [ 218.66666666666666, 91.90695778279732 ], "wc_reply_authors_avg": [ 52.0, 73.53910524340094 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.944911182523068, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17204021088878162096&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google DeepMind", "aff_unique_url": "https://deepmind.com", "aff_unique_abbr": "DeepMind", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;1;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "Identifying Selections for Unsupervised Subtask Discovery", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94080", "id": "hH4bPkOhhh", "proceeding": "", "pdf": "https://openreview.net/pdf?id=hH4bPkOhhh", "openreview": "https://openreview.net/forum?id=hH4bPkOhhh", "poster": "/media/PosterPDFs/NeurIPS%202024/94080.png?t=1733600437.4840477", "project": "", "author_site": "Yiwen Qiu, Yujia Zheng, Kun Zhang", "tldr": "", "abstract": "When solving long-horizon tasks, it is intriguing to decompose the high-level task into subtasks. Decomposing experiences into reusable subtasks can improve data efficiency, accelerate policy generalization, and in general provide promising solutions to multi-task reinforcement learning and imitation learning problems. However, the concept of subtasks is not sufficiently understood and modeled yet, and existing works often overlook the true structure of the data generation process: subtasks are the results of a *selection* mechanism on actions, rather than possible underlying confounders or intermediates. Specifically, we provide a theory to identify, and experiments to verify the existence of selection variables in such data. These selections serve as subgoals that indicate subtasks and guide policy. In light of this idea, we develop a sequential non-negative matrix factorization (seq- NMF) method to learn these subgoals and extract meaningful behavior patterns as subtasks. Our empirical results on a challenging Kitchen environment demonstrate that the learned subtasks effectively enhance the generalization to new tasks in multi-task imitation learning scenarios. The codes are provided at this [*link*](https://anonymous.4open.science/r/Identifying\\_Selections\\_for\\_Unsupervised\\_Subtask\\_Discovery/README.md).", "keywords": "Imitation Learning;Causality;Selection", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/5434582d90ac603d59ab3b8d46a36db683bf41fd.zip", "author": "Yiwen Qiu;Yujia Zheng;Kun Zhang", "authorids": "~Yiwen_Qiu1;~Yujia_Zheng1;~Kun_Zhang1", "gender": "F;M;M", "homepage": "https://evieq01.github.io/evieqiu.github.io/;https://yjzheng.com;http://www.andrew.cmu.edu/user/kunz1/", "dblp": "159/9832;245/6109-1.html;96/3115-1", "google_scholar": "tumZYG0AAAAJ;https://scholar.google.co.uk/citations?user=ioiW248AAAAJ;RGoypN4AAAAJ", "orcid": ";0009-0003-5225-6366;", "linkedin": ";;", "or_profile": "~Yiwen_Qiu1;~Yujia_Zheng1;~Kun_Zhang1", "aff": "Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "andrew.cmu.edu;cmu.edu;cmu.edu", "position": "MS student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nqiu2024identifying,\ntitle={Identifying Selections for Unsupervised Subtask Discovery},\nauthor={Yiwen Qiu and Yujia Zheng and Kun Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=hH4bPkOhhh}\n}", "github": "", "reviewers": "1Ld6;imJV;WHCR", "pdf_size": 7053790, "rating": "6;8;8", "confidence": "3;2;3", "soundness": "2;3;3", "novelty": "3;3;3", "presentation": "3;4;3", "wc_summary": "37;48;89", "wc_strengths": "34;78;143", "wc_weaknesses": "90;86;53", "wc_questions": "5;2;59", "wc_limitations": "1;8;16", "wc_review": "167;222;360", "wc_reply_reviewers": "17;100;50", "wc_reply_authors": "0;41;19", "reply_reviewers": "1;1;1", "reply_authors": "1;2;2", "rating_avg": [ 7.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 58.0, 22.37558192911788 ], "wc_strengths_avg": [ 85.0, 44.773504069557326 ], "wc_weaknesses_avg": [ 76.33333333333333, 16.579773487261185 ], "wc_questions_avg": [ 22.0, 26.19160170741759 ], "wc_limitations_avg": [ 8.333333333333334, 6.128258770283412 ], "wc_review_avg": [ 249.66666666666666, 81.18428966794555 ], "wc_reply_reviewers_avg": [ 55.666666666666664, 34.120700787384514 ], "wc_reply_authors_avg": [ 20.0, 16.753109164172084 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.49999999999999983, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:L5s-L6Q62Z4J:scholar.google.com/&scioq=Identifying+Selections+for+Unsupervised+Subtask+Discovery&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "andrew.cmu.edu;cmu.edu;cmu.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "HARMONIC: Harnessing LLMs for Tabular Data Synthesis and Privacy Protection", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97571", "id": "hHA9qrGZBe", "proceeding": "", "pdf": "https://openreview.net/pdf?id=hHA9qrGZBe", "openreview": "https://openreview.net/forum?id=hHA9qrGZBe", "poster": "/media/PosterPDFs/NeurIPS%202024/97571.png?t=1733748360.9648511", "project": "", "author_site": "Yuxin Wang, Duanyu Feng, Yongfu Dai, Zhengyu Chen, Jimin Huang, Sophia Ananiadou, Qianqian Xie, Hao Wang", "tldr": "", "abstract": "Data serves as the fundamental basis for advancing deep learning. The tabular data presented in a structured format is highly valuable for modeling and training.\nHowever, even in the era of LLM, obtaining tabular data from sensitive domains remains a challenge due to privacy or copyright concerns. \nTherefore, exploring the methods for effectively using models like LLMs to generate synthetic tabular data, which is privacy-preserving but similar to original one, is urgent.\nIn this paper, we introduce a new framework HARMONIC for tabular data generation and evaluation by LLMs. In the data generation part of our framework, we employ fine-tuning to generate tabular data and enhance privacy rather than continued pre-training which is often used by previous small-scale LLM-based methods. In particular, we construct an instruction fine-tuning dataset based on the idea of the k-nearest neighbors algorithm to inspire LLMs to discover inter-row relationships. By such fine-tuning, LLMs are trained to remember the format and connections of the data rather than the data itself, which reduces the risk of privacy leakage. The experiments find that our tabular data generation achieves equivalent performance as existing methods but with better privacy by the metric of MLE, DCR, etc.\nIn the evaluation part of our framework, we develop a specific privacy risk metric DLT for LLM synthetic data generation, which quantifies the extent to which the generator itself leaks data. We also developed LLE, a performance evaluation metric for downstream LLM tasks, which is more practical and credible than previous metrics.\nThe experiments show that our data generation method outperform the previous methods in the metrics DLT and LLE.", "keywords": "tabular synthetic data;LLMs;privacy;kNN;instruction fine-tuning", "primary_area": "", "supplementary_material": "", "author": "Yuxin Wang;Duanyu Feng;Yongfu Dai;Zhengyu Chen;Jimin Huang;Sophia Ananiadou;Qianqian Xie;Hao Wang", "authorids": "~Yuxin_Wang10;~Duanyu_Feng1;~Yongfu_Dai1;~Zhengyu_Chen7;~Jimin_Huang1;~Sophia_Ananiadou1;~Qianqian_Xie1;~Hao_Wang80", "gender": "F;M;M;M;M;F;F;M", "homepage": ";https://colfeng.github.io/;;;;http://www.manchester.ac.uk/research/Sophia.ananiadou/;;https://math.scu.edu.cn/info/1013/9318.htm", "dblp": ";273/0679.html;;;163/4119;47/4142;;", "google_scholar": ";https://scholar.google.com.hk/citations?user=aRXvKmwAAAAJ;https://scholar.google.com.hk/citations?user=TI1KZkwAAAAJ;https://scholar.google.com/citations?hl=en;SnQ_CycAAAAJ;https://scholar.google.com.tw/citations?user=quhi-K0AAAAJ;UYW7X_0AAAAJ;", "orcid": "0009-0005-2563-029X;0000-0002-8288-1002;0009-0005-9710-7561;;0000-0002-3501-3907;0000-0002-4097-9191;0000-0002-9588-7454;0000-0003-2480-4436", "linkedin": ";;;;;sophia-ananiadou-ba98b63/;;", "or_profile": "~Yuxin_Wang10;~Duanyu_Feng1;~Yongfu_Dai1;~Zhengyu_Chen7;~Jimin_Huang1;~Sophia_Ananiadou1;~Qianqian_Xie1;~Hao_Wang80", "aff": "Sichuan University;Sichuan University;Sichuan University;Wuhan University;The Fin AI;University of Manchester;Yale University;Sichuan University", "aff_domain": "scu.edu.cn;scu.edu.cn;scu.edu.cn;whu.edu.cn;thefin.ai;manchester.ac.uk;yale.edu;scu.edu.cn", "position": "MS student;PhD student;MS student;MS student;Principal Researcher;Full Professor;Postdoc;Associate Professor", "bibtex": "@inproceedings{\nwang2024harmonic,\ntitle={{HARMONIC}: Harnessing {LLM}s for Tabular Data Synthesis and Privacy Protection},\nauthor={Yuxin Wang and Duanyu Feng and Yongfu Dai and Zhengyu Chen and Jimin Huang and Sophia Ananiadou and Qianqian Xie and Hao Wang},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=hHA9qrGZBe}\n}", "github": "", "reviewers": "povU;N5C1;YZek", "pdf_size": 425702, "rating": "6;7;7", "confidence": "5;4;5", "wc_summary_and_contributions": "47;85;30", "wc_strengths": "3;98;21", "wc_improvement": "97;97;68", "wc_limitations": "60;50;23", "wc_correctness": "13;1;33", "wc_clarity": "5;14;21", "wc_relation_to_prior_work": "11;24;1", "wc_documentation": "7;7;16", "wc_additional_feedback": "1;1;1", "wc_review": "244;377;214", "wc_reply_reviewers": "27;20;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;0", "reply_authors": "5;4;2", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 54.0, 22.992752481307377 ], "wc_strengths_avg": [ 40.666666666666664, 41.20140234938072 ], "wc_improvement_avg": [ 87.33333333333333, 13.67073110293992 ], "wc_limitations_avg": [ 44.333333333333336, 15.627610892974722 ], "wc_correctness_avg": [ 15.666666666666666, 13.199326582148888 ], "wc_clarity_avg": [ 13.333333333333334, 6.548960901462833 ], "wc_relation_to_prior_work_avg": [ 12.0, 9.41629792788369 ], "wc_documentation_avg": [ 10.0, 4.242640687119285 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 278.3333333333333, 70.83470586905514 ], "wc_reply_reviewers_avg": [ 15.666666666666666, 11.440668201153676 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 3.6666666666666665, 1.247219128924647 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11330644948454737145&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "scu.edu.cn;scu.edu.cn;scu.edu.cn;whu.edu.cn;thefin.ai;manchester.ac.uk;yale.edu;scu.edu.cn", "author_num": 8, "aff_unique_index": "0;0;0;1;2;3;4;0", "aff_unique_norm": "Sichuan University;Wuhan University;Fin AI;University of Manchester;Yale University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.scu.edu.cn;http://www.whu.edu.cn/;https://www.thefinai.com;https://www.manchester.ac.uk;https://www.yale.edu", "aff_unique_abbr": "SCU;WHU;Fin AI;UoM;Yale", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;2;1;0", "aff_country_unique": "China;United States;United Kingdom" }, { "title": "Fast Last-Iterate Convergence of Learning in Games Requires Forgetful Algorithms", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94079", "id": "hK7XTpCtBi", "proceeding": "", "pdf": "https://openreview.net/pdf?id=hK7XTpCtBi", "openreview": "https://openreview.net/forum?id=hK7XTpCtBi", "poster": "", "project": "", "author_site": "Yang Cai, Gabriele Farina, Julien Grand-Cl\u00e9ment, Christian Kroer, Chung-Wei Lee, Haipeng Luo, Weiqiang Zheng", "tldr": "", "abstract": "Self play via online learning is one of the premier ways to solve large-scale zero-sum games, both in theory and practice. Particularly popular algorithms include optimistic multiplicative weights update (OMWU) and optimistic gradient-descent-ascent (OGDA). While both algorithms enjoy $O(1/T)$ ergodic convergence to Nash equilibrium in two-player zero-sum games, OMWU offers several advantages, including logarithmic dependence on the size of the payoff matrix and $\\tilde{O}(1/T)$ convergence to coarse correlated equilibria even in general-sum games. However, in terms of last-iterate convergence in two-player zero-sum games, an increasingly popular topic in this area, OGDA guarantees that the duality gap shrinks at a rate of $(1/\\sqrt{T})$, while the best existing last-iterate convergence for OMWU depends on some game-dependent constant that could be arbitrarily large. This begs the question: is this potentially slow last-iterate convergence an inherent disadvantage of OMWU, or is the current analysis too loose? Somewhat surprisingly, we show that the former is true. More generally, we prove that a broad class of algorithms that do not forget the past quickly all suffer the same issue: for any arbitrarily small $\\delta>0$, there exists a $2\\times 2$ matrix game such that the algorithm admits a constant duality gap even after $1/\\delta$ rounds. This class of algorithms includes OMWU and other standard optimistic follow-the-regularized-leader algorithms.", "keywords": "Last-Iterate Convergence;Zero-Sum Games;Optimistic Multicaptive Weights Update;Optimistic Gradient", "primary_area": "algorithmic_game_theory", "supplementary_material": "/attachment/02d22bbd0123cf6d88ce1148b7f1ea57f70dba06.zip", "author": "Yang Cai;Gabriele Farina;Julien Grand-Cl\u00e9ment;Christian Kroer;Chung-Wei Lee;Haipeng Luo;Weiqiang Zheng", "authorids": "~Yang_Cai1;~Gabriele_Farina1;~Julien_Grand-Cl\u00e9ment1;~Christian_Kroer1;~Chung-Wei_Lee1;~Haipeng_Luo1;~Weiqiang_Zheng1", "gender": ";M;M;M;;M;M", "homepage": ";http://www.cs.cmu.edu/~gfarina/about/;https://julien-grand-clement.fr/;http://www.columbia.edu/~ck2945/;https://chungwei.net/;https://haipeng-luo.net/;https://weiqiang-zheng.com/", "dblp": ";;197/0112;64/10660;80/2550;62/2576;277/5088", "google_scholar": ";sktDNcEAAAAJ;https://scholar.google.fr/citations?user=K_ZLzdoAAAAJ;https://scholar.google.ch/citations?user=ckHwjPAAAAAJ;VVVc6BIAAAAJ;ct2hw4UAAAAJ;YrfhnIwAAAAJ", "orcid": ";;;0000-0002-9009-8683;;;", "linkedin": ";;;;;;", "or_profile": "~Yang_Cai1;~Gabriele_Farina1;~Julien_Grand-Cl\u00e9ment1;~Christian_Kroer1;~Chung-Wei_Lee1;~Haipeng_Luo1;~Weiqiang_Zheng1", "aff": ";Massachusetts Institute of Technology;HEC Paris;Columbia University;University of Southern California;University of Southern California;Yale University", "aff_domain": ";mit.edu;hec.fr;columbia.edu;usc.edu;usc.edu;yale.edu", "position": ";Assistant Professor;Assistant Professor;Associate Professor;PhD student;Associate Professor;PhD student", "bibtex": "@inproceedings{\ncai2024fast,\ntitle={Fast Last-Iterate Convergence of Learning in Games Requires Forgetful Algorithms},\nauthor={Yang Cai and Gabriele Farina and Julien Grand-Cl{\\'e}ment and Christian Kroer and Chung-Wei Lee and Haipeng Luo and Weiqiang Zheng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=hK7XTpCtBi}\n}", "github": "", "reviewers": "FqEz;7DUi;DNGf;RnhV", "pdf_size": 1338934, "rating": "6;7;7;8", "confidence": "2;3;4;3", "soundness": "3;3;4;4", "novelty": "3;3;4;4", "presentation": "2;4;3;3", "wc_summary": "50;192;62;91", "wc_strengths": "46;44;66;153", "wc_weaknesses": "188;49;69;79", "wc_questions": "68;27;54;76", "wc_limitations": "2;3;1;4", "wc_review": "354;315;252;403", "wc_reply_reviewers": "48;11;21;6", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 98.75, 55.86311394829329 ], "wc_strengths_avg": [ 77.25, 44.572272771309294 ], "wc_weaknesses_avg": [ 96.25, 54.061885834661744 ], "wc_questions_avg": [ 56.25, 18.632968094214082 ], "wc_limitations_avg": [ 2.5, 1.118033988749895 ], "wc_review_avg": [ 331.0, 55.24943438624508 ], "wc_reply_reviewers_avg": [ 21.5, 16.224980739587952 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7803811056403373006&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": ";mit.edu;hec.fr;columbia.edu;usc.edu;usc.edu;yale.edu", "author_num": 7, "aff_unique_index": "0;1;2;3;3;4", "aff_unique_norm": "Massachusetts Institute of Technology;HEC Paris;Columbia University;University of Southern California;Yale University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://web.mit.edu;https://www.hec.edu;https://www.columbia.edu;https://www.usc.edu;https://www.yale.edu", "aff_unique_abbr": "MIT;HEC;Columbia;USC;Yale", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;1;0;0;0;0", "aff_country_unique": "United States;France" }, { "title": "DFA-GNN: Forward Learning of Graph Neural Networks by Direct Feedback Alignment", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94078", "id": "hKVTwQQu76", "proceeding": "", "pdf": "https://openreview.net/pdf?id=hKVTwQQu76", "openreview": "https://openreview.net/forum?id=hKVTwQQu76", "poster": "/media/PosterPDFs/NeurIPS%202024/94078.png?t=1731312700.5175755", "project": "", "author_site": "Gongpei Zhao, Tao Wang, Congyan Lang, Yi Jin, Yidong Li, Haibin Ling", "tldr": "", "abstract": "Graph neural networks (GNNs) are recognized for their strong performance across various applications, with the backpropagation (BP) algorithm playing a central role in the development of most GNN models. However, despite its effectiveness, BP has limitations that challenge its biological plausibility and affect the efficiency, scalability and parallelism of training neural networks for graph-based tasks. While several non-backpropagation (non-BP) training algorithms, such as the direct feedback alignment (DFA), have been successfully applied to fully-connected and convolutional network components for handling Euclidean data, directly adapting these non-BP frameworks to manage non-Euclidean graph data in GNN models presents significant challenges. These challenges primarily arise from the violation of the independent and identically distributed (i.i.d.) assumption in graph data and the difficulty in accessing prediction errors for all samples (nodes) within the graph. To overcome these obstacles, in this paper we propose DFA-GNN, a novel forward learning framework tailored for GNNs with a case study of semi-supervised learning. The proposed method breaks the limitations of BP by using a dedicated forward training mechanism. Specifically, DFA-GNN extends the principles of DFA to adapt to graph data and unique architecture of GNNs, which incorporates the information of graph topology into the feedback links to accommodate the non-Euclidean characteristics of graph data. Additionally, for semi-supervised graph learning tasks, we developed a pseudo error generator that spreads residual errors from training data to create a pseudo error for each unlabeled node. These pseudo errors are then utilized to train GNNs using DFA. Extensive experiments on 10 public benchmarks reveal that our learning framework outperforms not only previous non-BP methods but also the standard BP methods, and it exhibits excellent robustness against various types of noise and attacks.", "keywords": "Graph neural network;direct feedback alignment;graph learning;non-BP training approach", "primary_area": "graph_neural_networks", "supplementary_material": "/attachment/a7f00344581a34d86ecaa202dbed46be9a6de943.zip", "author": "Gongpei Zhao;Tao Wang;Congyan Lang;Yi Jin;Yidong Li;Haibin Ling", "authorids": "~Gongpei_Zhao1;~Tao_Wang1;~Congyan_Lang2;~Yi_Jin2;~Yidong_Li1;~Haibin_Ling1", "gender": "M;M;F;F;M;M", "homepage": "https://github.com/liberty1213/;;http://faculty.bjtu.edu.cn/7968/;http://faculty.bjtu.edu.cn/8497/;;https://www3.cs.stonybrook.edu/~hling/", "dblp": "303/9682;12/5838-11;89/4275;38/4674-1.html;40/7652.html;93/3488", "google_scholar": ";https://scholar.google.com.hk/citations?user=F3C5oAcAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;;https://scholar.google.com/citations?hl=en", "orcid": ";;;0000-0001-8408-3816;;", "linkedin": ";;;;;", "or_profile": "~Gongpei_Zhao1;~Tao_Wang1;~Congyan_Lang2;~Yi_Jin2;~Yidong_Li1;~Haibin_Ling1", "aff": "Beijing jiaotong univercity;Beijing Jiaotong University;Beijing jiaotong university;Beijing Jiaotong University;Beijing Jiaotong University;State University of New York, Stony Brook", "aff_domain": "bjtu.edu.cn;bjtu.edu.cn;bjtu.edu.cn;bjtu.edu.cn;bjtu.edu.cn;stonybrook.edu", "position": "PhD student;Full Professor;Full Professor;Full Professor;Full Professor;Professor", "bibtex": "@inproceedings{\nzhao2024dfagnn,\ntitle={{DFA}-{GNN}: Forward Learning of Graph Neural Networks by Direct Feedback Alignment},\nauthor={Gongpei Zhao and Tao Wang and Congyan Lang and Yi Jin and Yidong Li and Haibin Ling},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=hKVTwQQu76}\n}", "github": "", "reviewers": "mUbj;buRu;k6XA", "pdf_size": 0, "rating": "4;6;6", "confidence": "4;4;2", "soundness": "2;3;3", "novelty": "2;2;3", "presentation": "3;3;3", "wc_summary": "61;88;125", "wc_strengths": "56;89;73", "wc_weaknesses": "105;144;200", "wc_questions": "35;76;138", "wc_limitations": "3;62;28", "wc_review": "260;459;564", "wc_reply_reviewers": "135;12;78", "wc_reply_authors": "1032;42;135", "reply_reviewers": "2;1;1", "reply_authors": "4;2;2", "rating_avg": [ 5.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 91.33333333333333, 26.233989826601334 ], "wc_strengths_avg": [ 72.66666666666667, 13.474255287605157 ], "wc_weaknesses_avg": [ 149.66666666666666, 38.9900272149459 ], "wc_questions_avg": [ 83.0, 42.339894504670966 ], "wc_limitations_avg": [ 31.0, 24.179881444429512 ], "wc_review_avg": [ 427.6666666666667, 126.06964565491393 ], "wc_reply_reviewers_avg": [ 75.0, 50.25932749251625 ], "wc_reply_authors_avg": [ 403.0, 446.38772384553766 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 0.9428090415820634 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10202704241708923469&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "bjtu.edu.cn;bjtu.edu.cn;bjtu.edu.cn;bjtu.edu.cn;bjtu.edu.cn;stonybrook.edu", "author_num": 6, "aff_unique_index": "0;0;0;0;0;1", "aff_unique_norm": "Beijing Jiao Tong University;State University of New York", "aff_unique_dep": ";", "aff_unique_url": "http://www.bjtu.edu.cn;https://www.stonybrook.edu", "aff_unique_abbr": "BJTU;SUNY Stony Brook", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stony Brook", "aff_country_unique_index": "0;0;0;0;0;1", "aff_country_unique": "China;United States" }, { "title": "On the Target-kernel Alignment: a Unified Analysis with Kernel Complexity", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94077", "id": "hKcx2wa3P0", "proceeding": "", "pdf": "https://openreview.net/pdf?id=hKcx2wa3P0", "openreview": "https://openreview.net/forum?id=hKcx2wa3P0", "poster": "/media/PosterPDFs/NeurIPS%202024/94077.png?t=1731227139.827205", "project": "", "author_site": "Chao Wang, Xin HE, Yuwen Wang, Junhui Wang", "tldr": "", "abstract": "This paper investigates the impact of alignment between the target function of interest and the kernel matrix on a variety of kernel-based methods based on a general loss belonging to a rich loss function family, which covers many commonly used methods in regression and classification problems. We consider the truncated kernel-based method (TKM) which is estimated within a reduced function space constructed by using the spectral truncation of the kernel matrix and compare its theoretical behavior to that of the standard kernel-based method (KM) under various settings. By using the kernel complexity function that quantifies the complexity of the induced function space, we derive the upper bounds for both TKM and KM, and further reveal their dependencies on the degree of target-kernel alignment. Specifically, for the alignment with polynomial decay, the established results indicate that under the just-aligned and weakly-aligned regimes, TKM and KM share the same learning rate. Yet, under the strongly-aligned regime, KM suffers the saturation effect, while TKM can be continuously improved as the alignment becomes stronger. This further implies that TKM has a strong ability to capture the strong alignment and provide a theoretically guaranteed solution to eliminate the phenomena of saturation effect. The minimax lower bound is also established for the squared loss to confirm the optimality of TKM. Extensive numerical experiments further support our theoretical findings. The Python code for reproducing the numerical experiments is available at https://github.com/wywangen.", "keywords": "Learning theory;kernel method;reproducing kernel Hilbert space;saturation effect", "primary_area": "learning_theory", "supplementary_material": "/attachment/f3ea3c249ff382e49a176aa2034ebcf9c7cb335d.zip", "author": "Chao Wang;Xin HE;Yuwen Wang;Junhui Wang", "authorids": "~Chao_Wang39;~Xin_HE6;~Yuwen_Wang4;~Junhui_Wang3", "gender": ";;M;", "homepage": "https://sites.google.com/view/guoqinghe;https://sites.google.com/site/junhuiwang;https://github.com/wangchao-afk;https://www.sta.cuhk.edu.hk/peoples/wang-yuwen/", "dblp": ";;;", "google_scholar": "aduqO4EAAAAJ;;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Xin_HE6;~Junhui_Wang3;~Wang_Chao2;~Wang_Yuwen1", "aff": "Shanghai University of Finance and Economics;The Chinese University of Hong Kong;Shanghai University of Finance and Economics;Chinese University of Hong Kong", "aff_domain": "shufe.edu;cuhk.edu.hk;sufe.edu;cuhk.hk", "position": "Associate Professor;Full Professor;PhD student;PhD student", "bibtex": "@inproceedings{\nwang2024on,\ntitle={On the Target-kernel Alignment: a Unified Analysis with Kernel Complexity},\nauthor={Chao Wang and Xin HE and Yuwen Wang and Junhui Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=hKcx2wa3P0}\n}", "github": "", "reviewers": "4evc;Pn9U;rK2F;ikTW;Twpu", "pdf_size": 899815, "rating": "5;6;6;7;7", "confidence": "3;2;2;4;3", "soundness": "3;3;3;4;4", "novelty": "2;3;3;3;4", "presentation": "3;3;3;4;4", "wc_summary": "130;85;70;108;31", "wc_strengths": "139;42;33;90;37", "wc_weaknesses": "200;25;1;35;21", "wc_questions": "63;40;31;191;1", "wc_limitations": "1;2;9;13;1", "wc_review": "533;194;144;437;91", "wc_reply_reviewers": "0;14;0;21;15", "wc_reply_authors": "234;108;159;90;43", "reply_reviewers": "0;1;0;1;1", "reply_authors": "2;3;2;3;2", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 2.8, 0.7483314773547882 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 84.8, 33.74848144731848 ], "wc_strengths_avg": [ 68.2, 40.955585699633204 ], "wc_weaknesses_avg": [ 56.4, 72.64599094237754 ], "wc_questions_avg": [ 65.2, 65.96483911903371 ], "wc_limitations_avg": [ 5.2, 4.915282290977803 ], "wc_review_avg": [ 279.8, 173.36135670904284 ], "wc_reply_reviewers_avg": [ 10.0, 8.508818954473059 ], "wc_reply_authors_avg": [ 126.8, 65.20552123861906 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 2.4, 0.4898979485566356 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.4285714285714286, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:MIb_JoqylsQJ:scholar.google.com/&scioq=On+the+Target-kernel+Alignment:+a+Unified+Analysis+with+Kernel+Complexity&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": "shufe.edu;cuhk.edu.hk;sufe.edu;cuhk.hk", "author_num": 4, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "Shanghai University of Finance and Economics;Chinese University of Hong Kong", "aff_unique_dep": ";", "aff_unique_url": "http://www.sufe.edu.cn;https://www.cuhk.edu.hk", "aff_unique_abbr": "SUFE;CUHK", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Rethinking Optimal Transport in Offline Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94076", "id": "hKloKv7pR2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=hKloKv7pR2", "openreview": "https://openreview.net/forum?id=hKloKv7pR2", "poster": "", "project": "", "author_site": "Arip Asadulaev, Rostislav Korst, Aleksandr Korotin, Vage Egiazarian, Andrey Filchenkov, Evgeny Burnaev", "tldr": "", "abstract": "We propose a novel algorithm for offline reinforcement learning using optimal transport. Typically, in offline reinforcement learning, the data is provided by various experts and some of them can be sub-optimal. To extract an efficient policy, it is necessary to \\emph{stitch} the best behaviors from the dataset. To address this problem, we rethink offline reinforcement learning as an optimal transportation problem. And based on this, we present an algorithm that aims to find a policy that maps states to a \\emph{partial} distribution of the best expert actions for each given state. We evaluate the performance of our algorithm on continuous control problems from the D4RL suite and demonstrate improvements over existing methods.", "keywords": "Optimal Transport;Reinforcement Learning", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/c43808d8d1893edcd2dd37f962ed3aa96889221d.zip", "author": "Arip Asadulaev;Rostislav Korst;Alexander Korotin;Vage Egiazarian;Andrey Filchenkov;Evgeny Burnaev", "authorids": "~Arip_Asadulaev1;~Rostislav_Korst1;~Alexander_Korotin2;~Vage_Egiazarian1;~Andrey_Filchenkov2;~Evgeny_Burnaev1", "gender": "M;;M;M;M;M", "homepage": ";https://github.com/RostislavKorst;;https://research.itmo.ru/en/person/188779/andrey_aleksandrovich_filchenkov.htm;http://faculty.skoltech.ru/people/evgenyburnaev;https://akorotin.netlify.app", "dblp": "243/2822;;232/3274;169/0448.html;144/7845;209/9906", "google_scholar": "wcdrgdYAAAAJ;;Bktg6JEAAAAJ;ry63T9QAAAAJ;https://scholar.google.ru/citations?user=pCRdcOwAAAAJ;https://scholar.google.ru/citations?user=1rIIvjAAAAAJ", "orcid": ";;0000-0003-4444-9769;0000-0002-1133-8432;0000-0001-8424-0690;0000-0003-4286-925X", "linkedin": ";;;;;", "or_profile": "~Arip_Asadulaev1;~Rostislav_Korst1;~Vage_Egiazarian1;~Andrey_Filchenkov2;~Evgeny_Burnaev1;~Alexander_Andreevich_Korotin1", "aff": "ITMO University;MIPT;Yandex;ITMO University;Skolkovo Institute of Science and Technology;Skolkovo Institute of Science and Technology", "aff_domain": "itmo.ru;phystech.edu;yandex-team.ru;itmo.ru;skoltech.ru;skoltech.ru", "position": "PhD student;PhD student;Researcher;Associate Professor;Full Professor;Head of Research Group", "bibtex": "@inproceedings{\nasadulaev2024rethinking,\ntitle={Rethinking Optimal Transport in Offline Reinforcement Learning},\nauthor={Arip Asadulaev and Rostislav Korst and Alexander Korotin and Vage Egiazarian and Andrey Filchenkov and Evgeny Burnaev},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=hKloKv7pR2}\n}", "github": "", "reviewers": "iRBG;qCSm;7ePM", "pdf_size": 2778477, "rating": "6;6;7", "confidence": "4;4;4", "soundness": "2;3;3", "novelty": "4;2;3", "presentation": "2;2;3", "wc_summary": "97;55;46", "wc_strengths": "30;58;78", "wc_weaknesses": "163;134;84", "wc_questions": "24;48;29", "wc_limitations": "9;3;1", "wc_review": "323;298;238", "wc_reply_reviewers": "646;11;35", "wc_reply_authors": "1290;0;0", "reply_reviewers": "5;1;1", "reply_authors": "5;1;1", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 66.0, 22.22611077089287 ], "wc_strengths_avg": [ 55.333333333333336, 19.68643074697787 ], "wc_weaknesses_avg": [ 127.0, 32.629230249374054 ], "wc_questions_avg": [ 33.666666666666664, 10.338708279513883 ], "wc_limitations_avg": [ 4.333333333333333, 3.39934634239519 ], "wc_review_avg": [ 286.3333333333333, 35.6682242650545 ], "wc_reply_reviewers_avg": [ 230.66666666666666, 293.8484114112052 ], "wc_reply_authors_avg": [ 430.0, 608.1118318204309 ], "reply_reviewers_avg": [ 2.3333333333333335, 1.8856180831641267 ], "reply_authors_avg": [ 2.3333333333333335, 1.8856180831641267 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11971310469757434203&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "itmo.ru;phystech.edu;yandex-team.ru;itmo.ru;skoltech.ru;skoltech.ru", "author_num": 6, "aff_unique_index": "0;1;2;0;3;3", "aff_unique_norm": "ITMO University;Moscow Institute of Physics and Technology;Yandex;Skolkovo Institute of Science and Technology", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.itmo.ru;https://mipt.ru;https://yandex.com;https://www.skoltech.ru", "aff_unique_abbr": "ITMO;MIPT;Yandex;Skoltech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "Russian Federation" }, { "title": "Contrastive losses as generalized models of global epistasis", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94075", "id": "hLoiXOzoly", "proceeding": "", "pdf": "https://openreview.net/pdf?id=hLoiXOzoly", "openreview": "https://openreview.net/forum?id=hLoiXOzoly", "poster": "/media/PosterPDFs/NeurIPS%202024/94075.png?t=1733526043.0783021", "project": "", "author_site": "David Brookes, Jakub Otwinowski, Sam Sinai", "tldr": "", "abstract": "Fitness functions map large combinatorial spaces of biological sequences to properties of interest. Inferring these multimodal functions from experimental data is a central task in modern protein engineering. Global epistasis models are an effective and physically-grounded class of models for estimating fitness functions from observed data. These models assume that a sparse latent function is transformed by a monotonic nonlinearity to emit measurable fitness. Here we demonstrate that minimizing supervised contrastive loss functions, such as the Bradley-Terry loss, is a simple and flexible technique for extracting the sparse latent function implied by global epistasis. We argue by way of a fitness-epistasis uncertainty principle that the nonlinearities in global epistasis models can produce observed fitness functions that do not admit sparse representations, and thus may be inefficient to learn from observations when using a Mean Squared Error (MSE) loss (a common practice). We show that contrastive losses are able to accurately estimate a ranking function from limited data even in regimes where MSE is ineffective and validate the practical utility of this insight by demonstrating that contrastive loss functions result in consistently improved performance on empirical benchmark tasks.", "keywords": "Computational Biology;Ranking losses;Protein engineering", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "", "author": "David H Brookes;Jakub Otwinowski;Sam Sinai", "authorids": "~David_H_Brookes1;~Jakub_Otwinowski1;~Sam_Sinai1", "gender": ";M;M", "homepage": ";;", "dblp": ";;", "google_scholar": ";vYBWfCEAAAAJ;4k0EcsIAAAAJ", "orcid": ";;", "linkedin": ";;sam-sinai-710a0221/", "or_profile": "~David_H_Brookes1;~Jakub_Otwinowski1;~Sam_Sinai1", "aff": ";Dyno Therapeutics;Dyno Therapeutics", "aff_domain": ";dynotx.com;dynotx.com", "position": ";Researcher;Principal Researcher", "bibtex": "@inproceedings{\nbrookes2024contrastive,\ntitle={Contrastive losses as generalized models of global epistasis},\nauthor={David H Brookes and Jakub Otwinowski and Sam Sinai},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=hLoiXOzoly}\n}", "github": "", "reviewers": "NoUX;GFMs;pZCs;5WR7;iMMK", "pdf_size": 3037346, "rating": "5;5;5;6;7", "confidence": "2;3;5;3;3", "soundness": "2;3;2;3;3", "novelty": "3;3;3;2;3", "presentation": "2;3;3;3;4", "wc_summary": "87;54;70;153;167", "wc_strengths": "117;53;21;51;98", "wc_weaknesses": "360;229;124;144;65", "wc_questions": "212;36;37;18;1", "wc_limitations": "25;29;13;5;1", "wc_review": "801;401;265;371;332", "wc_reply_reviewers": "132;132;19;34;0", "wc_reply_authors": "26;94;63;0;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "2;2;2;1;1", "rating_avg": [ 5.6, 0.8 ], "confidence_avg": [ 3.2, 0.9797958971132712 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 106.2, 45.36694832143772 ], "wc_strengths_avg": [ 68.0, 34.71022903986662 ], "wc_weaknesses_avg": [ 184.4, 102.33396308166708 ], "wc_questions_avg": [ 60.8, 76.74737780536869 ], "wc_limitations_avg": [ 14.6, 10.910545357588685 ], "wc_review_avg": [ 434.0, 189.056605280006 ], "wc_reply_reviewers_avg": [ 63.4, 57.03893407138671 ], "wc_reply_authors_avg": [ 36.6, 36.832594261061764 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.15309310892394865, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=127676217459626854&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": ";dynotx.com;dynotx.com", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Dyno Therapeutics", "aff_unique_dep": "", "aff_unique_url": "https://www.dynotherapeutics.com", "aff_unique_abbr": "", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Empowering and Assessing the Utility of Large Language Models in Crop Science", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97570", "id": "hMj6jZ6JWU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=hMj6jZ6JWU", "openreview": "https://openreview.net/forum?id=hMj6jZ6JWU", "poster": "/media/PosterPDFs/NeurIPS%202024/97570.png?t=1729740823.9720955", "project": "", "author_site": "Hang Zhang, Jiawei SUN, Renqi Chen, Wei Liu, Zhonghang Yuan, Xinzhe Zheng, Zhefan Wang, Zhiyuan Yang, Hang Yan, Han-Sen Zhong, Xiqing Wang, Fan Yang, Nanqing Dong, Wanli Ouyang", "tldr": "", "abstract": "Large language models (LLMs) have demonstrated remarkable efficacy across knowledge-intensive tasks. Nevertheless, their untapped potential in crop science presents an opportunity for advancement. To narrow this gap, we introduce CROP, which includes a novel instruction tuning dataset specifically designed to enhance LLMs\u2019 professional capabilities in the crop science sector, along with a benchmark that serves as a comprehensive evaluation of LLMs\u2019 understanding of the domain knowledge. The CROP dataset is curated through a task-oriented and LLM-human integrated pipeline, comprising 210,038 single-turn and 1,871 multi-turn dialogues related to crop science scenarios. The CROP benchmark includes 5,045 multiple-choice questions covering three difficulty levels. Our experiments based on the CROP benchmark demonstrate notable enhancements in crop science-related tasks when LLMs are fine-tuned with the CROP dataset. To the best of our knowledge, CROP dataset is the first-ever instruction tuning dataset in the crop science domain. We anticipate that CROP will accelerate the adoption of LLMs in the domain of crop science, ultimately contributing to global food production.", "keywords": "crop science;instruction tuning dataset;benchmark;conversation;open-source", "primary_area": "", "supplementary_material": "/attachment/658b534696e19eb55c2350edd6b378ea7cb53a59.pdf", "author": "Hang Zhang;Jiawei Sun;Renqi Chen;Wei Liu;Zhonghang Yuan;Xinzhe Zheng;Zhefan Wang;Zhiyuan Yang;Hang Yan;Han-Sen Zhong;Xiqing Wang;Wanli Ouyang;Fan Yang;Nanqing Dong", "authorids": "~Hang_Zhang21;~Jiawei_Sun7;~Renqi_Chen1;~Wei_Liu31;~Zhonghang_Yuan1;~Xinzhe_Zheng1;~Zhefan_Wang1;~Zhiyuan_Yang8;~Hang_Yan2;~Han-Sen_Zhong1;~Xiqing_Wang1;~Wanli_Ouyang1;~Fan_Yang74;~Nanqing_Dong1", "gender": ";M;M;M;;M;;M;;M;;;M;", "homepage": "https://github.com/xukuncai200;;https://renqichen.github.io/;http;;;https://github.com/wanggzf;https://auto.hdu.edu.cn/2021/0416/c3803a133771/page.htm;;;https://scholar.google.com/citations?hl=zh-CN&user=fnrt8KsAAAAJ;;;", "dblp": ";;;49/3283-123;;;;;;;;;;198/1455", "google_scholar": ";90dSO_EAAAAJ;;;;qGlH0FUAAAAJ;;;;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=zh-CN;;qGlH0FUAAABJ;0DX2YsQAAAAJ", "orcid": ";;0009-0001-6855-7900;0009-0004-6819-4427;;0000-0002-6215-0069;;;;;;;;", "linkedin": ";;;;;;;;;;;;;", "or_profile": "~Hang_Zhang21;~Jiawei_Sun7;~Renqi_Chen1;~Wei_Liu31;~Zhonghang_Yuan1;~Xinzhe_Zheng1;~Zhefan_Wang1;~Zhiyuan_Yang8;~Hang_Yan2;~Han-Sen_Zhong1;~Xiqing_Wang1;~Wanli_Ouyang1;~Fan_Yang74;~Nanqing_Dong1", "aff": "Shanghai Artificial Intelligence Laboratory;Shanghai Artificial Intelligence Laboratory;Fudan University;Shanghai Jiaotong University;;The University of Hong Kong;Liaoning University;Hangzhou Dianzi University;;Shanghai Artificial Intelligence Laboratory;China Agricultural University;;Yazhouwan Lab;Shanghai Artificial Intelligence Laboratory", "aff_domain": "pjlab.org.cn;pjlab.org.cn;m.fudan.edu.cn;sjtu.edu.cn;;cs.hku.hk;lnu.edu.cn;hdu.edu.cn;;pjlab.org;cau.edu.cn;;yzwlab.cn;pjlab.org.cn", "position": "Researcher;Intern;MS student;PhD student;;PhD student;Undergrad student;Associate Professor;;Researcher;Full Professor;;Principal Researcher;Assistant Professor", "bibtex": "@inproceedings{\nzhang2024empowering,\ntitle={Empowering and Assessing the Utility of Large Language Models in Crop Science},\nauthor={Hang Zhang and Jiawei Sun and Renqi Chen and Wei Liu and Zhonghang Yuan and Xinzhe Zheng and Zhefan Wang and Zhiyuan Yang and Hang Yan and Han-Sen Zhong and Xiqing Wang and Wanli Ouyang and Fan Yang and Nanqing Dong},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=hMj6jZ6JWU}\n}", "github": "", "reviewers": "P1Fn;ejg3;UDLV", "pdf_size": 14183693, "rating": "6;7;7", "confidence": "3;3;4", "wc_summary_and_contributions": "29;62;110", "wc_strengths": "6;20;2", "wc_improvement": "14;13;30", "wc_limitations": "1;76;2", "wc_correctness": "37;1;1", "wc_clarity": "1;1;1", "wc_relation_to_prior_work": "1;1;1", "wc_documentation": "1;12;1", "wc_additional_feedback": "1;1;1", "wc_review": "91;187;149", "wc_reply_reviewers": "0;21;45", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 67.0, 33.25657829663178 ], "wc_strengths_avg": [ 9.333333333333334, 7.717224601860151 ], "wc_improvement_avg": [ 19.0, 7.788880963698615 ], "wc_limitations_avg": [ 26.333333333333332, 35.122009560324926 ], "wc_correctness_avg": [ 13.0, 16.97056274847714 ], "wc_clarity_avg": [ 1.0, 0.0 ], "wc_relation_to_prior_work_avg": [ 1.0, 0.0 ], "wc_documentation_avg": [ 4.666666666666667, 5.185449728701348 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 142.33333333333334, 39.474323581566566 ], "wc_reply_reviewers_avg": [ 22.0, 18.384776310850235 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 14, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16526556491903664420&as_sdt=5,24&sciodt=0,24&hl=en", "gs_version_total": 3, "email": "pjlab.org.cn;pjlab.org.cn;m.fudan.edu.cn;sjtu.edu.cn;;cs.hku.hk;lnu.edu.cn;hdu.edu.cn;;pjlab.org;cau.edu.cn;;yzwlab.cn;pjlab.org.cn", "author_num": 14, "aff_unique_index": "0;0;1;2;3;4;5;0;6;7;0", "aff_unique_norm": "Shanghai Artificial Intelligence Laboratory;Fudan University;Shanghai Jiao Tong University;University of Hong Kong;Liaoning University;Hangzhou Dianzi University;China Agricultural University;Yazhouwan Lab", "aff_unique_dep": ";;;;;;;", "aff_unique_url": "http://www.shailab.org/;https://www.fudan.edu.cn;https://www.sjtu.edu.cn;https://www.hku.hk;http://www.lnu.edu.cn/;http://www.hdu.edu.cn/;http://www.cau.edu.cn/;", "aff_unique_abbr": "Shanghai AI Lab;Fudan;SJTU;HKU;LNNU;HGHDU;CAU;", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China;" }, { "title": "Faster Algorithms for User-Level Private Stochastic Convex Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94074", "id": "hNlk9cIGo9", "proceeding": "", "pdf": "https://openreview.net/pdf?id=hNlk9cIGo9", "openreview": "https://openreview.net/forum?id=hNlk9cIGo9", "poster": "", "project": "", "author_site": "Andrew Lowy, Daogao Liu, Hilal Asi", "tldr": "", "abstract": "We study private stochastic convex optimization (SCO) under user-level differential privacy (DP) constraints. In this setting, there are $n$ users (e.g., cell phones), each possessing $m$ data items (e.g., text messages), and we need to protect the privacy of each user's entire collection of data items. Existing algorithms for user-level DP SCO are impractical in many large-scale machine learning scenarios because: (i) they make restrictive assumptions on the smoothness parameter of the loss function and require the number of users to grow polynomially with the dimension of the parameter space; or (ii) they are prohibitively slow, requiring at least $(mn)^{3/2}$ gradient computations for smooth losses and $(mn)^3$ computations for non-smooth losses. To address these limitations, we provide novel user-level DP algorithms with state-of-the-art excess risk and runtime guarantees, without stringent assumptions. First, we develop a linear-time algorithm with state-of-the-art excess risk (for a non-trivial linear-time algorithm) under a mild smoothness assumption. Our second algorithm applies to arbitrary smooth losses and achieves optimal excess risk in $\\approx (mn)^{9/8}$ gradient computations. Third, for non-smooth loss functions, we obtain optimal excess risk in $n^{11/8} m^{5/4}$ gradient computations. Moreover, our algorithms do not require the number of users to grow polynomially with the dimension.", "keywords": "differential privacy;user-level privacy;stochastic convex optimization;private optimization", "primary_area": "privacy", "supplementary_material": "", "author": "Andrew Lowy;Daogao Liu;Hilal Asi", "authorids": "~Andrew_Lowy1;~Daogao_Liu1;~Hilal_Asi1", "gender": ";M;M", "homepage": "https://sites.google.com/view/andrewlowy;https://daogaoliu.github.io/;http://web.stanford.edu/~asi/", "dblp": "285/5314;245/4078;", "google_scholar": "https://scholar.google.com/citations?hl=en;auA3AaQAAAAJ;QGcz9-kAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Andrew_Lowy1;~Daogao_Liu1;~Hilal_Asi1", "aff": "University of Wisconsin - Madison;University of Washington, Seattle;Apple", "aff_domain": "wisc.edu;uw.edu;apple.com", "position": "Postdoc;PhD student;Researcher", "bibtex": "@inproceedings{\nlowy2024faster,\ntitle={Faster Algorithms for User-Level Private Stochastic Convex Optimization},\nauthor={Andrew Lowy and Daogao Liu and Hilal Asi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=hNlk9cIGo9}\n}", "github": "", "reviewers": "jGa8;JSLp;Xb14;W7xk", "pdf_size": 1258990, "rating": "4;4;7;7", "confidence": "3;4;4;3", "soundness": "2;4;4;3", "novelty": "3;3;3;3", "presentation": "2;3;4;3", "wc_summary": "106;59;171;117", "wc_strengths": "43;45;59;53", "wc_weaknesses": "184;237;20;40", "wc_questions": "113;92;46;8", "wc_limitations": "6;9;2;1", "wc_review": "452;442;298;219", "wc_reply_reviewers": "930;522;5;11", "wc_reply_authors": "1039;951;0;0", "reply_reviewers": "4;4;1;1", "reply_authors": "4;5;1;1", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 113.25, 39.8269695056001 ], "wc_strengths_avg": [ 50.0, 6.4031242374328485 ], "wc_weaknesses_avg": [ 120.25, 92.44559210692525 ], "wc_questions_avg": [ 64.75, 40.751533713468994 ], "wc_limitations_avg": [ 4.5, 3.2015621187164243 ], "wc_review_avg": [ 352.75, 98.36507256135178 ], "wc_reply_reviewers_avg": [ 367.0, 386.90244248389024 ], "wc_reply_authors_avg": [ 497.5, 498.4719149560986 ], "reply_reviewers_avg": [ 2.5, 1.5 ], "reply_authors_avg": [ 2.75, 1.7853571071357126 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8561978569392777901&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 3, "email": "wisc.edu;uw.edu;apple.com", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Wisconsin-Madison;University of Washington;Apple", "aff_unique_dep": ";;Apple Inc.", "aff_unique_url": "https://www.wisc.edu;https://www.washington.edu;https://www.apple.com", "aff_unique_abbr": "UW-Madison;UW;Apple", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Madison;Seattle;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "RoleAgent: Building, Interacting, and Benchmarking High-quality Role-Playing Agents from Scripts", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97569", "id": "hORTHzt2cE", "proceeding": "", "pdf": "https://openreview.net/pdf?id=hORTHzt2cE", "openreview": "https://openreview.net/forum?id=hORTHzt2cE", "poster": "/media/PosterPDFs/NeurIPS%202024/97569.png?t=1731420462.209207", "project": "", "author_site": "Jiaheng Liu, Zehao Ni, Haoran Que, Sun, Noah Wang, Jian Yang, JiakaiWang, Hongcheng Guo, Z.Y. Peng, Ge Zhang, Jiayi Tian, Xingyuan Bu, Ke Xu, Wenge Rong, Junran Peng, ZHAO-XIANG ZHANG", "tldr": "", "abstract": "Believable agents can empower interactive applications ranging from immersive environments to rehearsal spaces for interpersonal communication. Recently, generative agents have been proposed to simulate believable human behavior by using Large Language Models. However, the existing method heavily relies on human-annotated agent profiles (e.g., name, age, personality, relationships with others, and so on) for the initialization of each agent, which cannot be scaled up easily. In this paper, we propose a scalable RoleAgent framework to generate high-quality role-playing agents from raw scripts, which includes building and interacting stages. Specifically, in the building stage, we use a hierarchical memory system to extract and summarize the structure and high-level information of each agent for the raw script. In the interacting stage, we propose a novel innovative mechanism with four steps to achieve a high-quality interaction between agents. Finally, we introduce a systematic and comprehensive evaluation benchmark called RoleAgentBench to evaluate the effectiveness of our RoleAgent, which includes 100 and 28 roles for 20 English and 5 Chinese scripts, respectively. Extensive experimental results on RoleAgentBench demonstrate the effectiveness of RoleAgent.", "keywords": "Large Language Models;Agent;Role-playing", "primary_area": "", "supplementary_material": "", "author": "Jiaheng Liu;Zehao Ni;Haoran Que;Tao Sun;Noah Wang;Jian Yang;JiakaiWang;Hongcheng Guo;Z.Y. Peng;Ge Zhang;Jiayi Tian;Xingyuan Bu;Ke Xu;Wenge Rong;Junran Peng;Zhaoxiang Zhang", "authorids": "~Jiaheng_Liu1;~Zehao_Ni1;~Haoran_Que1;~Tao_Sun18;~Noah_Wang4;~Jian_Yang10;~JiakaiWang1;~Hongcheng_Guo1;~Z.Y._Peng2;~Ge_Zhang5;~Jiayi_Tian1;~Xingyuan_Bu1;~Ke_Xu4;~Wenge_Rong1;~Junran_Peng1;~Zhaoxiang_Zhang3", "gender": "M;;M;;;M;;M;;M;F;M;;M;M;M", "homepage": "https://liujiaheng.github.io/;;;;;https://www.researchgate.net/scientific-contributions/Jian-Yang-2146089927;https://github.com/BlcDec;;;;https://liujiaheng.github.io/;https://xingyuanbu.github.io/;;;https://jrpeng.github.io/;http://zhaoxiangzhang.net", "dblp": "225/1962;;358/4966;;;181/2854-30;;84/8542;;;;186/6825;x/KeXu;18/5572.html;;55/2285-1.html", "google_scholar": "yFI_RjUAAAAJ;;onEik5gAAAAJ;;;i9opWEgAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;;qyTrq4kAAAAJ;;https://scholar.google.com.hk/citations?hl=zh-CN;;;https://scholar.google.com.hk/citations?hl=en;qxWfV6cAAAAJ", "orcid": ";0009-0007-7288-6217;;;;0000-0003-1983-012X;;;;;;0000-0002-6445-4306;;;;", "linkedin": ";;;;;;;;;ge-zhang-792797169/;;;;;;", "or_profile": "~Jiaheng_Liu1;~Zehao_Ni1;~Haoran_Que1;~Tao_Sun18;~Noah_Wang4;~Jian_Yang10;~JiakaiWang1;~Hongcheng_Guo1;~Z.Y._Peng2;~Ge_Zhang5;~Jiayi_Tian1;~Xingyuan_Bu1;~Ke_Xu4;~Wenge_Rong1;~Junran_Peng1;~Zhaoxiang_Zhang3", "aff": "Alibaba Group;University of Chinese Academy of Sciences;Beihang University;;;Alibaba Group;;Beihang University;;University of Waterloo;;Alibaba Group;Beihang University;Beihang University;University of Science and Technology Beijing;Institute of Automation, Chinese Academy of Sciences", "aff_domain": "alibaba-inc.com;ucas.ac.cn;buaa.edu.cn;;;alibaba-inc.com;;buaa.edu.cn;;cs.uwaterloo.ca;;alibaba-inc.com;buaa.edu.cn;buaa.edu.cn;ustb.edu.cn;ia.ac.cn", "position": "Researcher;MS student;MS student;;;Researcher;;PhD student;;PhD student;;Researcher;Full Professor;Full Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nliu2024roleagent,\ntitle={RoleAgent: Building, Interacting, and Benchmarking High-quality Role-Playing Agents from Scripts},\nauthor={Jiaheng Liu and Zehao Ni and Haoran Que and Tao Sun and Noah Wang and Jian Yang and JiakaiWang and Hongcheng Guo and Z.Y. Peng and Ge Zhang and Jiayi Tian and Xingyuan Bu and Ke Xu and Wenge Rong and Junran Peng and Zhaoxiang Zhang},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=hORTHzt2cE}\n}", "github": "", "reviewers": "dVmX;M7P2;HBPU;rkjx", "pdf_size": 8934131, "rating": "6;7;7;8", "confidence": "4;4;4;4", "wc_summary_and_contributions": "48;108;80;84", "wc_strengths": "38;153;22;58", "wc_improvement": "30;168;35;42", "wc_limitations": "27;8;4;45", "wc_correctness": "1;8;1;29", "wc_clarity": "8;1;5;1", "wc_relation_to_prior_work": "1;1;1;1", "wc_documentation": "3;8;1;14", "wc_additional_feedback": "1;1;1;1", "wc_review": "157;456;150;275", "wc_reply_reviewers": "0;17;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;0;0", "reply_authors": "1;1;1;2", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.0 ], "wc_summary_and_contributions_avg": [ 80.0, 21.354156504062622 ], "wc_strengths_avg": [ 67.75, 50.8447391575569 ], "wc_improvement_avg": [ 68.75, 57.4603123903795 ], "wc_limitations_avg": [ 21.0, 16.355427233796124 ], "wc_correctness_avg": [ 9.75, 11.4755174175285 ], "wc_clarity_avg": [ 3.75, 2.947456530637899 ], "wc_relation_to_prior_work_avg": [ 1.0, 0.0 ], "wc_documentation_avg": [ 6.5, 5.024937810560445 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 259.5, 123.84365143195674 ], "wc_reply_reviewers_avg": [ 4.25, 7.361215932167728 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 16, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16068084125303276908&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 2, "email": "alibaba-inc.com;ucas.ac.cn;buaa.edu.cn;;;alibaba-inc.com;;buaa.edu.cn;;cs.uwaterloo.ca;;alibaba-inc.com;buaa.edu.cn;buaa.edu.cn;ustb.edu.cn;ia.ac.cn", "author_num": 16, "aff_unique_index": "0;1;2;0;2;3;0;2;2;4;5", "aff_unique_norm": "Alibaba Group;University of Chinese Academy of Sciences;Beihang University;University of Waterloo;University of Science and Technology Beijing;Chinese Academy of Sciences", "aff_unique_dep": ";;;;;Institute of Automation", "aff_unique_url": "https://www.alibaba.com;http://www.ucas.ac.cn;http://www.buaa.edu.cn/;https://uwaterloo.ca;http://www.ustb.edu.cn;http://www.ia.cas.cn", "aff_unique_abbr": "Alibaba;UCAS;BUAA;UW;USTB;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;1;0;0;0;0;0", "aff_country_unique": "China;Canada" }, { "title": "Attack-Aware Noise Calibration for Differential Privacy", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94073", "id": "hOcsUrOY0D", "proceeding": "", "pdf": "https://openreview.net/pdf?id=hOcsUrOY0D", "openreview": "https://openreview.net/forum?id=hOcsUrOY0D", "poster": "/media/PosterPDFs/NeurIPS%202024/94073.png?t=1734022970.3484688", "project": "", "author_site": "Bogdan Kulynych, Juan Gomez, Georgios Kaissis, Flavio Calmon, Carmela Troncoso", "tldr": "", "abstract": "Differential privacy (DP) is a widely used approach for mitigating privacy risks when training machine learning models on sensitive data. DP mechanisms add noise during training to limit the risk of information leakage. The scale of the added noise is critical, as it determines the trade-off between privacy and utility. The standard practice is to select the noise scale to satisfy a given privacy budget \u03b5. This privacy budget is in turn interpreted in terms of operational attack risks, such as accuracy, sensitivity, and specificity of inference attacks aimed to recover\ninformation about the training data records. We show that first calibrating the noise scale to a privacy budget \u03b5, and then translating \u03b5 to attack risk leads to overly conservative risk assessments and unnecessarily low utility. Instead, we propose methods to directly calibrate the noise scale to a desired attack risk level, bypassing the step of choosing \u03b5. For a given notion of attack risk, our approach significantly\ndecreases noise scale, leading to increased utility at the same level of privacy. We empirically demonstrate that calibrating noise to attack sensitivity/specificity, rather than \u03b5, when training privacy-preserving ML models substantially improves model accuracy for the same risk level. Our work provides a principled and practical way to improve the utility of privacy-preserving ML without compromising on privacy.", "keywords": "differential privacy;DP-SGD", "primary_area": "privacy", "supplementary_material": "", "author": "Bogdan Kulynych;Juan Felipe Gomez;Georgios Kaissis;Flavio Calmon;Carmela Troncoso", "authorids": "~Bogdan_Kulynych1;~Juan_Felipe_Gomez1;~Georgios_Kaissis1;~Flavio_Calmon1;~Carmela_Troncoso1", "gender": "Not Specified;M;;;F", "homepage": "https://kulyny.ch;https://www.krellinst.org/csgf/fellows/profile?n=gomez2021;;http://people.seas.harvard.edu/~flavio/;http://carmelatroncoso.com/", "dblp": "203/9056;;;89/4611;01/4825", "google_scholar": "https://scholar.google.com/citations?hl=en;;;P8N_YH4AAAAJ;sMkt3SgAAAAJ", "orcid": ";;;;0000-0002-2374-2248", "linkedin": ";;;;carmela-troncoso-b497975/?originalSubdomain=ch", "or_profile": "~Bogdan_Kulynych1;~Juan_Felipe_Gomez1;~Georgios_Kaissis1;~Flavio_Calmon1;~Carmela_Troncoso1", "aff": "CHUV - University Hospital Lausanne;Harvard University;;Harvard University;EPFL - EPF Lausanne", "aff_domain": "chuv.ch;harvard.edu;;harvard.edu;epfl.ch", "position": "Postdoc;PhD student;;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nkulynych2024attackaware,\ntitle={Attack-Aware Noise Calibration for Differential Privacy},\nauthor={Bogdan Kulynych and Juan Felipe Gomez and Georgios Kaissis and Flavio Calmon and Carmela Troncoso},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=hOcsUrOY0D}\n}", "github": "", "reviewers": "tsqL;JyJW;pcXQ;Bdkd", "pdf_size": 566363, "rating": "5;5;6;7", "confidence": "4;3;3;2", "soundness": "4;3;3;4", "novelty": "3;3;3;4", "presentation": "3;3;3;2", "wc_summary": "116;96;56;82", "wc_strengths": "37;23;24;91", "wc_weaknesses": "318;78;40;78", "wc_questions": "313;14;29;3", "wc_limitations": "5;36;1;3", "wc_review": "789;247;150;257", "wc_reply_reviewers": "241;0;9;0", "wc_reply_authors": "997;25;0;0", "reply_reviewers": "1;0;1;0", "reply_authors": "3;2;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 87.5, 21.834605560898048 ], "wc_strengths_avg": [ 43.75, 27.83320858255476 ], "wc_weaknesses_avg": [ 128.5, 110.50226242027807 ], "wc_questions_avg": [ 89.75, 129.22340151845563 ], "wc_limitations_avg": [ 11.25, 14.359230480774379 ], "wc_review_avg": [ 360.75, 250.7572282108733 ], "wc_reply_reviewers_avg": [ 62.5, 103.12249996969624 ], "wc_reply_authors_avg": [ 255.5, 428.2268674429478 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8528028654224417, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12041886279801146891&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "chuv.ch;harvard.edu;;harvard.edu;epfl.ch", "author_num": 5, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "University Hospital Lausanne;Harvard University;EPFL", "aff_unique_dep": ";;", "aff_unique_url": "https://www.chuv.ch;https://www.harvard.edu;https://www.epfl.ch", "aff_unique_abbr": "CHUV;Harvard;EPFL", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Lausanne;", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "Switzerland;United States" }, { "title": "Autobidder's Dilemma: Why More Sophisticated Autobidders Lead to Worse Auction Efficiency", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94072", "id": "hQJksiskaa", "proceeding": "", "pdf": "https://openreview.net/pdf?id=hQJksiskaa", "openreview": "https://openreview.net/forum?id=hQJksiskaa", "poster": "/media/PosterPDFs/NeurIPS%202024/94072.png?t=1733457803.196862", "project": "", "author_site": "Yuan Deng, Jieming Mao, Vahab Mirrokni, Hanrui Zhang, Song Zuo", "tldr": "", "abstract": "The recent increasing adoption of autobidding has inspired the growing interest in analyzing the performance of classic mechanism with value-maximizing autobidders both theoretically and empirically. It is known that optimal welfare can be obtained in first-price auctions if autobidders are restricted to uniform bid-scaling and the price of anarchy is $2$ when non-uniform bid-scaling strategies are allowed. \n\nIn this paper, we provide a fine-grained price of anarchy analysis for non-uniform bid-scaling strategies in first-price auctions, demonstrating the reason why more powerful (individual) non-uniform bid-scaling strategies may lead to worse (aggregated) performance in social welfare. Our theoretical results match recent empirical findings that a higher level of non-uniform bid-scaling leads to lower welfare performance in first-price auctions.", "keywords": "ad auctions;autobidding;non-uniform bidding;price of anarchy", "primary_area": "algorithmic_game_theory", "supplementary_material": "", "author": "Yuan Deng;Jieming Mao;Vahab Mirrokni;Hanrui Zhang;Song Zuo", "authorids": "~Yuan_Deng1;~Jieming_Mao1;~Vahab_Mirrokni2;~Hanrui_Zhang1;~Song_Zuo1", "gender": "M;M;M;;", "homepage": "https://sites.google.com/view/yuandeng;https://sites.google.com/seas.upenn.edu/jiemingmao/;https://people.csail.mit.edu/mirrokni/Welcome.html;;https://sites.google.com/corp/view/songzuo", "dblp": "58/366;123/4948;m/VahabSMirrokni;168/8847;123/4898", "google_scholar": "OoTMmy8AAAAJ;;opbZfw0AAAAJ;;https://scholar.google.com/citations?hl=en", "orcid": ";;;;", "linkedin": "yuandeng/yuandeng/;;;;", "or_profile": "~Yuan_Deng1;~Jieming_Mao1;~Vahab_Mirrokni2;~Hanrui_Zhang1;~Song_Zuo1", "aff": "Google Research;Google;Google Research;Google Research;Google", "aff_domain": "google.com;google.com;google.com;google.com;google.com", "position": "Research Scientist;Research Scientist;VP, Google Fellow;Researcher;Research Scientist", "bibtex": "@inproceedings{\ndeng2024autobidders,\ntitle={Autobidder's Dilemma: Why More Sophisticated Autobidders Lead to Worse Auction Efficiency},\nauthor={Yuan Deng and Jieming Mao and Vahab Mirrokni and Hanrui Zhang and Song Zuo},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=hQJksiskaa}\n}", "github": "", "reviewers": "DtQM;uz4Y;TSHw;DAvE", "pdf_size": 1209479, "rating": "4;5;5;7", "confidence": "2;4;3;3", "soundness": "2;4;3;3", "novelty": "3;3;3;3", "presentation": "1;3;2;3", "wc_summary": "40;40;87;129", "wc_strengths": "12;46;45;49", "wc_weaknesses": "44;57;77;57", "wc_questions": "41;1;31;1", "wc_limitations": "3;1;1;3", "wc_review": "140;145;241;239", "wc_reply_reviewers": "47;4;74;0", "wc_reply_authors": "22;0;392;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;1;2;1", "rating_avg": [ 5.25, 1.0897247358851685 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 74.0, 37.1012129181783 ], "wc_strengths_avg": [ 38.0, 15.083103128998356 ], "wc_weaknesses_avg": [ 58.75, 11.797775213996918 ], "wc_questions_avg": [ 18.5, 17.853571071357123 ], "wc_limitations_avg": [ 2.0, 1.0 ], "wc_review_avg": [ 191.25, 48.78716532039959 ], "wc_reply_reviewers_avg": [ 31.25, 30.80077109424373 ], "wc_reply_authors_avg": [ 103.5, 166.8075238111279 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.3244428422615251, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:1RGfxMK9JzkJ:scholar.google.com/&scioq=Autobidder%27s+Dilemma:+Why+More+Sophisticated+Autobidders+Lead+to+Worse+Auction+Efficiency&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": "google.com;google.com;google.com;google.com;google.com", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google Research", "aff_unique_url": "https://research.google", "aff_unique_abbr": "Google Research", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "hQQyetmOxs", "title": "A Systematic Review of NeurIPS Dataset Management Practices", "track": "Datasets & Benchmarks", "status": "Poster", "tldr": "", "abstract": "As new machine learning methods demand larger training datasets, researchers and developers face significant challenges in dataset management. Although ethics reviews, documentation, and checklists have been established, it remains uncertain whether consistent dataset management practices exist across the community. This lack of a comprehensive overview hinders our ability to diagnose and address fundamental tensions and ethical issues related to managing large datasets. We present a systematic review of datasets published at the NeurIPS Datasets and Benchmarks track, focusing on four key aspects: provenance, distribution, ethical disclosure, and licensing. Our findings reveal that dataset provenance is often unclear due to ambiguous filtering and curation processes. Additionally, a variety of sites are used for dataset hosting, but only a few offer structured metadata and version control. These inconsistencies underscore the urgent need for standardized data infrastructures for the publication and management of datasets.", "keywords": "data governance;datasheet;data management", "primary_area": "", "supplementary_material": "", "author": "Yiwei Wu;Leah Hope Ajmani;Shayne Longpre;Hanlin Li", "authorids": "~Yiwei_Wu2;~Leah_Hope_Ajmani1;~Shayne_Longpre1;~Hanlin_Li2", "gender": "F;F;M;", "homepage": "https://www.yiweiwu.net/;;https://www.shaynelongpre.com;", "dblp": ";;190/7024;", "google_scholar": ";Ng3PD7MAAAAJ;ADd_YfkAAAAJ;", "orcid": ";;;", "linkedin": ";;shayne-redford-longpre/;", "or_profile": "~Yiwei_Wu2;~Leah_Hope_Ajmani1;~Shayne_Longpre1;~Hanlin_Li2", "aff": "University of Texas at Austin;University of Minnesota - Twin Cities;Massachusetts Institute of Technology;", "aff_domain": "utexas.edu;umn.edu;mit.edu;", "position": "MS student;PhD student;PhD student;", "bibtex": "@inproceedings{\nwu2024a,\ntitle={A Systematic Review of Neur{IPS} Dataset Management Practices},\nauthor={Yiwei Wu and Leah Hope Ajmani and Shayne Longpre and Hanlin Li},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=hQQyetmOxs}\n}", "github": "", "project": "", "reviewers": "rjqg;55ZR;5eR4;jDR7", "site": "https://openreview.net/forum?id=hQQyetmOxs", "pdf_size": 188735, "rating": "6;6;7;7", "confidence": "5;4;4;3", "wc_summary_and_contributions": "29;71;93;82", "wc_strengths": "2;98;71;73", "wc_improvement": "2;202;110;161", "wc_limitations": "69;163;85;26", "wc_correctness": "248;12;45;18", "wc_clarity": "109;17;7;5", "wc_relation_to_prior_work": "83;72;61;14", "wc_documentation": "66;9;39;15", "wc_additional_feedback": "1;1;1;1", "wc_review": "609;645;512;395", "wc_reply_reviewers": "0;53;0;0", "wc_reply_authors": "0;537;0;0", "reply_reviewers": "0;1;0;0", "reply_authors": "0;1;0;0", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 68.75, 24.23195204683271 ], "wc_strengths_avg": [ 61.0, 35.68613176011096 ], "wc_improvement_avg": [ 118.75, 74.87113929946572 ], "wc_limitations_avg": [ 85.75, 49.54480295651604 ], "wc_correctness_avg": [ 80.75, 97.35855124230228 ], "wc_clarity_avg": [ 34.5, 43.25216757574122 ], "wc_relation_to_prior_work_avg": [ 57.5, 26.291633650269812 ], "wc_documentation_avg": [ 32.25, 22.487496525847426 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 540.25, 96.94682821010701 ], "wc_reply_reviewers_avg": [ 13.25, 22.949673200287624 ], "wc_reply_authors_avg": [ 134.25, 232.52782091612178 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 0.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.7071067811865475, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ArZpHiS-xR4J:scholar.google.com/&scioq=A+Systematic+Review+of+NeurIPS+Dataset+Management+Practices&hl=en&as_sdt=0,5", "gs_version_total": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Texas at Austin;University of Minnesota;Massachusetts Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.utexas.edu;https://www.minnesota.edu;https://web.mit.edu", "aff_unique_abbr": "UT Austin;UMN;MIT", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Austin;Twin Cities;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "An engine not a camera: Measuring performative power of online search", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94071", "id": "hQfcrTBHeD", "proceeding": "", "pdf": "https://openreview.net/pdf?id=hQfcrTBHeD", "openreview": "https://openreview.net/forum?id=hQfcrTBHeD", "poster": "", "project": "", "author_site": "Celestine Mendler-D\u00fcnner, Gabriele Carovano, Moritz Hardt", "tldr": "", "abstract": "The power of digital platforms is at the center of major ongoing policy and regulatory efforts. To advance existing debates, we designed and executed an experiment to measure the performative power of online search providers. Instantiated in our setting, performative power quantifies the ability of a search engine to steer web traffic by rearranging results. To operationalize this definition we developed a browser extension that performs unassuming randomized experiments in the background. These randomized experiments emulate updates to the search algorithm and identify the causal effect of different content arrangements on clicks. Analyzing tens of thousands of clicks, we discuss what our robust quantitative findings say about the power of online search engines, using the Google Shopping antitrust investigation as a case study. More broadly, we envision our work to serve as a blueprint for how the recent definition of performative power can help integrate quantitative insights from online experiments with future investigations into the economic power of digital platforms.", "keywords": "Performativity;Power;Digital Markets;Search Engine;Ranking;Online Experiment", "primary_area": "machine_learning_for_social_sciences", "supplementary_material": "", "author": "Celestine Mendler-D\u00fcnner;Gabriele Carovano;Moritz Hardt", "authorids": "~Celestine_Mendler-D\u00fcnner1;~Gabriele_Carovano1;~Moritz_Hardt1", "gender": ";;Not Specified", "homepage": "http://celestine.ai/;;http://mrtz.org/", "dblp": "176/5511;;26/4683", "google_scholar": "UqtDdZUAAAAJ;;adnTgaAAAAAJ", "orcid": ";0000-0001-5406-9589;", "linkedin": ";;", "or_profile": "~Celestine_Mendler-D\u00fcnner1;~Gabriele_Carovano1;~Moritz_Hardt1", "aff": "Max Planck Institute for Intelligent Systems;;Max-Planck-Institute for Intelligent Systems, Max-Planck Institute", "aff_domain": "tuebingen.mpg.de;;is.mpg.de", "position": "Group Lead;;Principal Researcher", "bibtex": "@inproceedings{\nmendler-d{\\\"u}nner2024an,\ntitle={An engine not a camera: Measuring performative power of online search},\nauthor={Celestine Mendler-D{\\\"u}nner and Gabriele Carovano and Moritz Hardt},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=hQfcrTBHeD}\n}", "github": "", "reviewers": "iJky;7fp1;6Zv1;9NWC", "pdf_size": 1526216, "rating": "4;6;7;7", "confidence": "4;2;5;4", "soundness": "2;3;4;4", "novelty": "2;3;3;2", "presentation": "3;3;4;4", "wc_summary": "95;177;85;103", "wc_strengths": "103;98;144;70", "wc_weaknesses": "494;102;244;114", "wc_questions": "255;2;82;68", "wc_limitations": "43;10;8;9", "wc_review": "990;389;563;364", "wc_reply_reviewers": "1123;31;35;583", "wc_reply_authors": "769;0;0;445", "reply_reviewers": "3;1;1;2", "reply_authors": "4;1;1;3", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 115.0, 36.359317925395686 ], "wc_strengths_avg": [ 103.75, 26.423237878806603 ], "wc_weaknesses_avg": [ 238.5, 157.6729209471303 ], "wc_questions_avg": [ 101.75, 93.49431800917101 ], "wc_limitations_avg": [ 17.5, 14.739402972983676 ], "wc_review_avg": [ 576.5, 250.73741244576965 ], "wc_reply_reviewers_avg": [ 443.0, 452.2742530810261 ], "wc_reply_authors_avg": [ 303.5, 324.39828914468706 ], "reply_reviewers_avg": [ 1.75, 0.82915619758885 ], "reply_authors_avg": [ 2.25, 1.299038105676658 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.18731716231633877, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15946435139640878938&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "tuebingen.mpg.de;;is.mpg.de", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Max Planck Institute for Intelligent Systems;Max-Planck-Institute for Intelligent Systems", "aff_unique_dep": "Intelligent Systems;Intelligent Systems", "aff_unique_url": "https://www.mpi-is.mpg.de;https://www.mpi-is.mpg.de", "aff_unique_abbr": "MPI-IS;MPI-IS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "title": "Autoregressive Policy Optimization for Constrained Allocation Tasks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94070", "id": "hRKsahifqj", "proceeding": "", "pdf": "https://openreview.net/pdf?id=hRKsahifqj", "openreview": "https://openreview.net/forum?id=hRKsahifqj", "poster": "", "project": "", "author_site": "David Winkel, Niklas Strau\u00df, Maximilian Bernhard, Zongyue Li, Thomas Seidl, Matthias Schubert", "tldr": "", "abstract": "Allocation tasks represent a class of problems where a limited amount of resources must be allocated to a set of entities at each time step. Prominent examples of this task include portfolio optimization or distributing computational workloads across servers.\nAllocation tasks are typically bound by linear constraints describing practical requirements that have to be strictly fulfilled at all times. In portfolio optimization, for example, investors may be obligated to allocate less than 30\\% of the funds into a certain industrial sector in any investment period. \nSuch constraints restrict the action space of allowed allocations in intricate ways, which makes learning a policy that avoids constraint violations difficult.\nIn this paper, we propose a new method for constrained allocation tasks based on an autoregressive process to sequentially sample allocations for each entity. In addition, we introduce a novel de-biasing mechanism to counter the initial bias caused by sequential sampling. We demonstrate the superior performance of our approach compared to a variety of Constrained Reinforcement Learning (CRL) methods on three distinct constrained allocation tasks: portfolio optimization, computational workload distribution, and a synthetic allocation benchmark. Our code is available at: https://github.com/niklasdbs/paspo", "keywords": "Reinforcement learning;Constraint Reinforcement Learning;Allocation Tasks", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "David Winkel;Niklas Alexander Strau\u00df;Maximilian Bernhard;Zongyue Li;Thomas Seidl;Matthias Schubert", "authorids": "~David_Winkel1;~Niklas_Alexander_Strau\u00df1;~Maximilian_Bernhard1;~Zongyue_Li1;~Thomas_Seidl2;~Matthias_Schubert1", "gender": "M;M;;M;;M", "homepage": ";https://www.dbs.ifi.lmu.de/cms/personen/mitarbeiter/strauss/index.html;;;https://www.dbs.ifi.lmu.de/cms/personen/professoren/seidl/index.html;https://www.dbs.ifi.lmu.de/cms/personen/professoren/schubert/index.html", "dblp": "288/3552.html;256/6733.html;;;s/ThomasSeidl1;77/4813", "google_scholar": ";;;3_6s1VoAAAAJ;0fOi9KkAAAAJ;GEnUYjQAAAAJ", "orcid": ";;;;0000-0002-4861-1412;0000-0002-6566-6343", "linkedin": ";;;;;", "or_profile": "~David_Winkel1;~Niklas_Alexander_Strau\u00df1;~Maximilian_Bernhard1;~Zongyue_Li1;~Thomas_Seidl2;~Matthias_Schubert1", "aff": "Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;;Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;LMU Munich", "aff_domain": "lmu.de;lmu.de;;lmu.de;lmu.de;lmu.de", "position": "PhD student;Postdoc;;PhD student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nwinkel2024autoregressive,\ntitle={Autoregressive Policy Optimization for Constrained Allocation Tasks},\nauthor={David Winkel and Niklas Alexander Strau{\\ss} and Maximilian Bernhard and Zongyue Li and Thomas Seidl and Matthias Schubert},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=hRKsahifqj}\n}", "github": "", "reviewers": "2peK;3WTq;3hEE;QtCd", "pdf_size": 903611, "rating": "5;5;5;7", "confidence": "2;2;3;1", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "3;3;3;4", "wc_summary": "33;64;57;110", "wc_strengths": "37;32;34;75", "wc_weaknesses": "117;149;131;66", "wc_questions": "2;1;215;27", "wc_limitations": "11;1;1;9", "wc_review": "200;247;438;287", "wc_reply_reviewers": "31;10;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 2.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 66.0, 27.883686987197372 ], "wc_strengths_avg": [ 44.5, 17.698870020427858 ], "wc_weaknesses_avg": [ 115.75, 30.8818312280862 ], "wc_questions_avg": [ 61.25, 89.3766608237296 ], "wc_limitations_avg": [ 5.5, 4.55521678957215 ], "wc_review_avg": [ 293.0, 89.19921524318474 ], "wc_reply_reviewers_avg": [ 10.25, 12.65652005884714 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:-PBS00vqzm4J:scholar.google.com/&scioq=Autoregressive+Policy+Optimization+for+Constrained+Allocation+Tasks&hl=en&as_sdt=0,44", "gs_version_total": 5, "email": "lmu.de;lmu.de;;lmu.de;lmu.de;lmu.de", "author_num": 6, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;Ludwig Maximilian University of Munich", "aff_unique_dep": ";", "aff_unique_url": "https://www.lmu.de;https://www.lmu.de", "aff_unique_abbr": "LMU;LMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Munich", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Germany" }, { "title": "LESS: Label-Efficient and Single-Stage Referring 3D Segmentation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94069", "id": "hRqaot0NZF", "proceeding": "", "pdf": "https://openreview.net/pdf?id=hRqaot0NZF", "openreview": "https://openreview.net/forum?id=hRqaot0NZF", "poster": "/media/PosterPDFs/NeurIPS%202024/94069.png?t=1729999090.2083595", "project": "", "author_site": "Xuexun Liu, Xiaoxu Xu, Jinlong Li, Qiudan Zhang, Xu Wang, Nicu Sebe, Lin Ma", "tldr": "", "abstract": "Referring 3D Segmentation is a visual-language task that segments all points of the specified object from a 3D point cloud described by a sentence of query. Previous works perform a two-stage paradigm, first conducting language-agnostic instance segmentation then matching with given text query. However, the semantic concepts from text query and visual cues are separately interacted during the training, and both instance and semantic labels for each object are required, which is time consuming and human-labor intensive. To mitigate these issues, we propose a novel Referring 3D Segmentation pipeline, Label-Efficient and Single-Stage, dubbed LESS, which is only under the supervision of efficient binary mask. Specifically, we design a Point-Word Cross-Modal Alignment module for aligning the fine-grained features of points and textual embedding. Query Mask Predictor module and Query-Sentence Alignment module are introduced for coarse-grained alignment between masks and query. Furthermore, we propose an area regularization loss, which coarsely reduces irrelevant background predictions on a large scale. Besides, a point-to-point contrastive loss is proposed concentrating on distinguishing points with subtly similar features. Through extensive experiments, we achieve state-of-the-art performance on ScanRefer dataset by surpassing the previous methods about 3.7% mIoU using only binary labels. Code is available at https://github.com/mellody11/LESS.", "keywords": "Referring 3d segmentation;label-efficient;single-stage;cross-modal", "primary_area": "machine_vision", "supplementary_material": "", "author": "Xuexun Liu;Xiaoxu Xu;Jinlong Li;Qiudan Zhang;Xu Wang;Nicu Sebe;Lin Ma", "authorids": "~Xuexun_Liu1;~Xiaoxu_Xu1;~Jinlong_Li3;~Qiudan_Zhang1;~Xu_Wang17;~Nicu_Sebe1;~Lin_Ma2", "gender": "M;M;M;F;M;M;M", "homepage": "https://liuxuexun.github.io/;https://xuxiaoxxxx.github.io/;https://tyroneli.github.io/;;https://csse.szu.edu.cn/pages/user/index?id=556;http://disi.unitn.it/~sebe/;http://forestlinma.com", "dblp": "390/5600;223/5153;34/1296-3;190/4336;w/XuWang6;20/3519;74/3608-2", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.hk/citations?user=BrdJ_bMAAAAJ;hpEAymEAAAAJ;42KrEAsAAAAJ;XNDHhIEAAAAJ;https://scholar.google.it/citations?user=stFCYOAAAAAJ;DAn1pA4AAAAJ", "orcid": ";0009-0007-9001-413X;0000-0002-8746-4566;;;0000-0002-6597-7248;", "linkedin": ";;;;;;", "or_profile": "~Xuexun_Liu1;~Xiaoxu_Xu1;~Jinlong_Li3;~Qiudan_Zhang1;~Xu_Wang17;~Nicu_Sebe1;~Lin_Ma2", "aff": "Shenzhen University;Shenzhen University;University of Trento;Shenzhen University;Shenzhen University;University of Trento;Meituan", "aff_domain": "szu.edu.cn;szu.edu.cn;unitn.it;szu.edu.cn;szu.edu.cn;unitn.it;meituan.com", "position": "MS student;Undergrad student;PhD student;Assistant Professor;Associate Professor;Full Professor;Principal Researcher and Research Manager ", "bibtex": "@inproceedings{\nliu2024less,\ntitle={{LESS}: Label-Efficient and Single-Stage Referring 3D Segmentation},\nauthor={Xuexun Liu and Xiaoxu Xu and Jinlong Li and Qiudan Zhang and Xu Wang and Nicu Sebe and Lin Ma},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=hRqaot0NZF}\n}", "github": "", "reviewers": "pLC3;6zjS;NB5W;Monv", "pdf_size": 3211755, "rating": "5;5;5;7", "confidence": "3;5;4;5", "soundness": "2;3;3;3", "novelty": "2;3;2;3", "presentation": "2;3;3;3", "wc_summary": "39;43;96;96", "wc_strengths": "43;35;26;112", "wc_weaknesses": "111;119;168;35", "wc_questions": "2;3;52;18", "wc_limitations": "1;1;9;4", "wc_review": "196;201;351;265", "wc_reply_reviewers": "45;300;164;171", "wc_reply_authors": "11;833;1137;41", "reply_reviewers": "1;1;3;1", "reply_authors": "2;3;5;2", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 68.5, 27.536339626028727 ], "wc_strengths_avg": [ 54.0, 34.022051672408 ], "wc_weaknesses_avg": [ 108.25, 47.58873291021731 ], "wc_questions_avg": [ 18.75, 20.216020874544032 ], "wc_limitations_avg": [ 3.75, 3.2691742076555053 ], "wc_review_avg": [ 253.25, 62.65131682574597 ], "wc_reply_reviewers_avg": [ 170.0, 90.2247194509354 ], "wc_reply_authors_avg": [ 505.5, 491.51271601048126 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 3.0, 1.224744871391589 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:O8FmAn1A0CcJ:scholar.google.com/&scioq=LESS:+Label-Efficient+and+Single-Stage+Referring+3D+Segmentation&hl=en&as_sdt=0,44", "gs_version_total": 3, "email": "szu.edu.cn;szu.edu.cn;unitn.it;szu.edu.cn;szu.edu.cn;unitn.it;meituan.com", "author_num": 7, "aff_unique_index": "0;0;1;0;0;1;2", "aff_unique_norm": "Shenzhen University;University of Trento;Meituan", "aff_unique_dep": ";;", "aff_unique_url": "https://www.szu.edu.cn;https://www.unitn.it;https://www.meituan.com", "aff_unique_abbr": "SZU;UniTN;Meituan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;1;0", "aff_country_unique": "China;Italy" }, { "title": "Localized Zeroth-Order Prompt Optimization", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94068", "id": "hS1jvV3Dk3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=hS1jvV3Dk3", "openreview": "https://openreview.net/forum?id=hS1jvV3Dk3", "poster": "/media/PosterPDFs/NeurIPS%202024/94068.png?t=1731747718.3893604", "project": "", "author_site": "Wenyang Hu, Yao Shu, Zongmin Yu, Zhaoxuan Wu, Xiaoqiang Lin, Zhongxiang Dai, See-Kiong Ng, Bryan Kian Hsiang Low", "tldr": "", "abstract": "The efficacy of large language models (LLMs) in understanding and generating natural language has aroused a wide interest in developing prompt-based methods to harness the power of black-box LLMs. Existing methodologies usually prioritize a global optimization for finding the global optimum, which however will perform poorly in certain tasks. This thus motivates us to re-think the necessity of finding a global optimum in prompt optimization. To answer this, we conduct a thorough empirical study on prompt optimization and draw two major insights. Contrasting with the rarity of global optimum, local optima are usually prevalent and well-performed, which can be more worthwhile for efficient prompt optimization (**Insight I**). The choice of the input domain, covering both the generation and the representation of prompts, affects the identification of well-performing local optima (**Insight II**). Inspired by these insights, we propose a novel algorithm, namely localized zeroth-order prompt optimization (ZOPO), which incorporates a Neural Tangent Kernel-based derived Gaussian process into standard zeroth-order optimization for an efficient search of well-performing local optima in prompt optimization. Remarkably, ZOPO outperforms existing baselines in terms of both the optimization performance and the query efficiency, which we demonstrate through extensive experiments.", "keywords": "Prompt Optimization;Large Language Models;LLM;Instruction Optimization", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/64d61badcc22f0c71186f3685a895442c662bef8.zip", "author": "Wenyang Hu;Yao Shu;Zongmin Yu;Zhaoxuan Wu;Xiaoqiang Lin;Zhongxiang Dai;See-Kiong Ng;Bryan Kian Hsiang Low", "authorids": "~Wenyang_Hu1;~Yao_Shu1;~Zongmin_Yu1;~Zhaoxuan_Wu1;~Xiaoqiang_Lin1;~Zhongxiang_Dai1;~See-Kiong_Ng1;~Bryan_Kian_Hsiang_Low1", "gender": ";M;M;M;M;M;M;M", "homepage": "https://scholar.google.com/citations?user=EecZzYsAAAAJ;https://yao.notion.site;;https://zhaoxuanwu.github.io/;https://xqlin98.github.io/;https://daizhongxiang.github.io/;https://www.comp.nus.edu.sg/~ngsk/;http://www.comp.nus.edu.sg/~lowkh", "dblp": "258/0545;44/1338;;298/5083;269/4573;172/4968;00/5480;97/4877", "google_scholar": "EecZzYsAAAAJ;https://scholar.google.com.au/citations?hl=en;;Th_mPm8AAAAJ;nqKwA60AAAAJ;1v8xOIYAAAAJ;https://scholar.google.com.tw/citations?user=_wsommYAAAAJ;https://scholar.google.com.tw/citations?user=2P-Q09UAAAAJ", "orcid": "0009-0008-6189-7890;;;0009-0002-5659-6387;;;0000-0001-6565-7511;", "linkedin": ";yao-shu-a5640514b;zongmin-yu;zhaoxuanwu/;;;seekiong/?originalSubdomain=sg;", "or_profile": "~Wenyang_Hu1;~Yao_Shu1;~Zongmin_Yu1;~Zhaoxuan_Wu1;~Xiaoqiang_Lin1;~Zhongxiang_Dai1;~See-Kiong_Ng1;~Bryan_Kian_Hsiang_Low1", "aff": "National University of Singapore;Guangming Lab;National University of Singapore;National University of Singapore;National University of Singapore;Massachusetts Institute of Technology;National University of Singapore;National University of Singapore", "aff_domain": "u.nus.edu;gml.ac.cn;nus.edu.sg;u.nus.edu;u.nus.edu;mit.edu;nus.edu.sg;nus.edu.sg", "position": "PhD student;Researcher;Undergrad student;PhD student;PhD student;Postdoc;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nhu2024localized,\ntitle={Localized Zeroth-Order Prompt Optimization},\nauthor={Wenyang Hu and Yao Shu and Zongmin Yu and Zhaoxuan Wu and Xiaoqiang Lin and Zhongxiang Dai and See-Kiong Ng and Bryan Kian Hsiang Low},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=hS1jvV3Dk3}\n}", "github": "", "reviewers": "TMXX;TQjw;bDUw;GMgT", "pdf_size": 1877899, "rating": "3;5;6;8", "confidence": "4;3;3;4", "soundness": "2;2;2;3", "novelty": "2;2;2;3", "presentation": "3;2;2;4", "wc_summary": "79;77;79;120", "wc_strengths": "76;45;31;72", "wc_weaknesses": "197;35;28;162", "wc_questions": "19;28;238;126", "wc_limitations": "12;1;8;15", "wc_review": "383;186;384;495", "wc_reply_reviewers": "0;0;11;145", "wc_reply_authors": "62;62;47;301", "reply_reviewers": "0;0;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 88.75, 18.06066167115701 ], "wc_strengths_avg": [ 56.0, 18.721645226849056 ], "wc_weaknesses_avg": [ 105.5, 75.068302232034 ], "wc_questions_avg": [ 102.75, 88.64923857541022 ], "wc_limitations_avg": [ 9.0, 5.244044240850758 ], "wc_review_avg": [ 362.0, 111.34406135937381 ], "wc_reply_reviewers_avg": [ 39.0, 61.363670033660796 ], "wc_reply_authors_avg": [ 118.0, 105.83241469417581 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8828024884808257227&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "u.nus.edu;gml.ac.cn;nus.edu.sg;u.nus.edu;u.nus.edu;mit.edu;nus.edu.sg;nus.edu.sg", "author_num": 8, "aff_unique_index": "0;1;0;0;0;2;0;0", "aff_unique_norm": "National University of Singapore;Guangming Lab;Massachusetts Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.nus.edu.sg;;https://web.mit.edu", "aff_unique_abbr": "NUS;;MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0;2;0;0", "aff_country_unique": "Singapore;China;United States" }, { "title": "ViLCo-Bench: VIdeo Language COntinual learning Benchmark", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97567", "id": "hSAu90mDkC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=hSAu90mDkC", "openreview": "https://openreview.net/forum?id=hSAu90mDkC", "poster": "/media/PosterPDFs/NeurIPS%202024/97567.png?t=1731888159.3812642", "project": "", "author_site": "Tianqi Tang, Shohreh Deldari, Hao Xue, Celso de Melo, Flora Salim", "tldr": "", "abstract": "Video language continual learning involves continuously adapting to information from video and text inputs, enhancing a model\u2019s ability to handle new tasks while retaining prior knowledge. This field is a relatively under-explored area, and establishing appropriate datasets is crucial for facilitating communication and research in this field. In this study, we present the first dedicated benchmark, ViLCo-Bench, designed to evaluate continual learning models across a range of video-text tasks. The dataset comprises ten-minute-long videos and corresponding language queries collected from publicly available datasets. Additionally, we introduce a novel memory-efficient framework that incorporates self-supervised learning and mimics long-term and short-term memory effects. This framework addresses challenges including memory complexity from long video clips, natural language complexity from open queries, and text-video misalignment. We posit that ViLCo-Bench, with greater complexity compared to existing continual learning benchmarks, would serve as a critical tool for exploring the video-language domain, extending beyond conventional class-incremental tasks, and addressing complex and limited annotation issues. The curated data, evaluations, and our novel method are available at https://github.com/cruiseresearchgroup/ViLCo.", "keywords": "Continual learning;Multimodal;Video-text;Computer vision", "primary_area": "", "supplementary_material": "/attachment/1a41e73c305dbf06f6afe900b30ca8d9d9625106.pdf", "author": "Tianqi Tang;Shohreh Deldari;Hao Xue;Celso M de Melo;Flora D. Salim", "authorids": "~Tianqi_Tang1;~Shohreh_Deldari1;~Hao_Xue1;~Celso_M_de_Melo1;~Flora_D._Salim1", "gender": "M;F;M;M;F", "homepage": "http://reler.net/people/tianqi_tang/index.html;;https://haoxue01.github.io/;http://www.celsodemelo.net/;http://florasalim.com/", "dblp": ";;156/1517-1;;08/1554", "google_scholar": ";250tnREAAAAJ;KwhLl7IAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.au/citations?user=Yz35RSYAAAAJ", "orcid": ";;;;0000-0002-1237-1664", "linkedin": ";;;;https://au.linkedin.com/in/flora-salim-6958986", "or_profile": "~Tianqi_Tang1;~Shohreh_Deldari1;~Hao_Xue1;~Celso_M_de_Melo1;~Flora_Salim1", "aff": "University of New South Wales;University of New South Wales;University of New South Wales;Army Research Laboratory;University of New South Wales", "aff_domain": "unsw.edu.au;unsw.edu.au;unsw.edu.au;army.mil;unsw.edu.au", "position": "Postdoc;Postdoc;Lecturer;Researcher;Full Professor", "bibtex": "@inproceedings{\ntang2024vilcobench,\ntitle={Vi{LC}o-Bench: {VI}deo Language {CO}ntinual learning Benchmark},\nauthor={Tianqi Tang and Shohreh Deldari and Hao Xue and Celso M de Melo and Flora D. Salim},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=hSAu90mDkC}\n}", "github": "", "reviewers": "ifvZ;NAFV;EZFs", "pdf_size": 824952, "rating": "6;7;8", "confidence": "3;4;3", "wc_summary_and_contributions": "41;75;114", "wc_strengths": "14;65;114", "wc_improvement": "11;99;49", "wc_limitations": "8;100;47", "wc_correctness": "2;17;8", "wc_clarity": "2;133;3", "wc_relation_to_prior_work": "1;47;21", "wc_documentation": "3;72;33", "wc_additional_feedback": "1;1;1", "wc_review": "83;609;390", "wc_reply_reviewers": "0;159;0", "wc_reply_authors": "58;490;85", "reply_reviewers": "0;1;0", "reply_authors": "2;3;2", "rating_avg": [ 7.0, 0.816496580927726 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 76.66666666666667, 29.825417944356715 ], "wc_strengths_avg": [ 64.33333333333333, 40.827550610940264 ], "wc_improvement_avg": [ 53.0, 36.03701800463888 ], "wc_limitations_avg": [ 51.666666666666664, 37.703521792473914 ], "wc_correctness_avg": [ 9.0, 6.164414002968976 ], "wc_clarity_avg": [ 46.0, 61.51964455900787 ], "wc_relation_to_prior_work_avg": [ 23.0, 18.83259585576738 ], "wc_documentation_avg": [ 36.0, 28.24889378365107 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 360.6666666666667, 215.7380098380647 ], "wc_reply_reviewers_avg": [ 53.0, 74.95331880577403 ], "wc_reply_authors_avg": [ 211.0, 197.59048560090133 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15353426006094862193&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "unsw.edu.au;unsw.edu.au;unsw.edu.au;army.mil;unsw.edu.au", "author_num": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "University of New South Wales;Army Research Laboratory", "aff_unique_dep": ";", "aff_unique_url": "https://www.unsw.edu.au;https://www.arl.army.mil", "aff_unique_abbr": "UNSW;ARL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "Australia;United States" }, { "title": "Propensity Score Alignment of Unpaired Multimodal Data", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94067", "id": "hT4y7D2o2T", "proceeding": "", "pdf": "https://openreview.net/pdf?id=hT4y7D2o2T", "openreview": "https://openreview.net/forum?id=hT4y7D2o2T", "poster": "/media/PosterPDFs/NeurIPS%202024/94067.png?t=1733804226.6163125", "project": "", "author_site": "Johnny Xi, Jana Osea, Zuheng Xu, Jason Hartford", "tldr": "", "abstract": "Multimodal representation learning techniques typically require paired samples to learn shared representations, but collecting paired samples can be challenging in fields like biology, where measurement devices often destroy the samples. This paper presents an approach to address the challenge of aligning unpaired samples across disparate modalities in multimodal representation learning. We draw an analogy between potential outcomes in causal inference and potential views in multimodal observations, allowing us to leverage Rubin's framework to estimate a common space for matching samples. Our approach assumes experimentally perturbed samples by treatments, and uses this to estimate a propensity score from each modality. We show that the propensity score encapsulates all shared information between a latent state and treatment, and can be used to define a distance between samples. We experiment with two alignment techniques that leverage this distance---shared nearest neighbours (SNN) and optimal transport (OT) matching---and find that OT matching results in significant improvements over state-of-the-art alignment approaches in on synthetic multi-modal tasks, in real-world data from NeurIPS Multimodal Single-Cell Integration Challenge, and on a single cell microscopy to expression prediction task.", "keywords": "unpaired data;multimodal;causal representations;propensity score", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "/attachment/af8b3be66d08d70d238089cee34ad892448f7ece.zip", "author": "Johnny Xi;Jana Osea;Zuheng Xu;Jason Hartford", "authorids": "~Johnny_Xi1;~Jana_Osea2;~Zuheng_Xu1;~Jason_Hartford1", "gender": "F;M;M;M", "homepage": ";https://zuhengxu.github.io/;https://jhartford.github.io;https://xijohnny.github.io", "dblp": ";278/8104;191/6716;266/4683.html", "google_scholar": ";lkMkblkAAAAJ;https://scholar.google.ca/citations?user=eBNK7SsAAAAJ;", "orcid": ";;;", "linkedin": "jana-osea/;zuheng-david-xu-29825624b/;jasonhartford1/;", "or_profile": "~Jana_Osea2;~Zuheng_Xu1;~Jason_Hartford1;~Quanhan_Xi1", "aff": "Valence Labs powered by recursion;University of British Columbia;Valence Labs powered by Recursion;University of British Columbia", "aff_domain": "valencelabs.com;ubc.ca;valencelabs.com;ubc.ca", "position": "Intern;PhD student;Researcher;PhD student", "bibtex": "@inproceedings{\nxi2024propensity,\ntitle={Propensity Score Alignment of Unpaired Multimodal Data},\nauthor={Johnny Xi and Jana Osea and Zuheng Xu and Jason Hartford},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=hT4y7D2o2T}\n}", "github": "", "reviewers": "XqRN;qEyT;iHqN", "pdf_size": 1604505, "rating": "5;6;7", "confidence": "3;4;3", "soundness": "2;3;3", "novelty": "2;2;3", "presentation": "3;2;3", "wc_summary": "68;330;60", "wc_strengths": "61;104;74", "wc_weaknesses": "204;256;52", "wc_questions": "105;162;81", "wc_limitations": "7;39;27", "wc_review": "445;891;294", "wc_reply_reviewers": "99;62;0", "wc_reply_authors": "40;4;0", "reply_reviewers": "2;1;0", "reply_authors": "2;2;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 152.66666666666666, 125.43612805815644 ], "wc_strengths_avg": [ 79.66666666666667, 18.00617178142601 ], "wc_weaknesses_avg": [ 170.66666666666666, 86.55377262462656 ], "wc_questions_avg": [ 116.0, 33.97057550292606 ], "wc_limitations_avg": [ 24.333333333333332, 13.199326582148887 ], "wc_review_avg": [ 543.3333333333334, 253.44865796098077 ], "wc_reply_reviewers_avg": [ 53.666666666666664, 40.843876189977635 ], "wc_reply_authors_avg": [ 14.666666666666666, 17.98765008430939 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16426946414747195002&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "valencelabs.com;ubc.ca;valencelabs.com;ubc.ca", "author_num": 4, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "Valence Labs;University of British Columbia", "aff_unique_dep": ";", "aff_unique_url": ";https://www.ubc.ca", "aff_unique_abbr": ";UBC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1;1", "aff_country_unique": ";Canada" }, { "title": "Assouad, Fano, and Le Cam with Interaction: A Unifying Lower Bound Framework and Characterization for Bandit Learnability", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94066", "id": "hUGD1aNMrp", "proceeding": "", "pdf": "https://openreview.net/pdf?id=hUGD1aNMrp", "openreview": "https://openreview.net/forum?id=hUGD1aNMrp", "poster": "", "project": "", "author_site": "Fan Chen, Dylan J Foster, Yanjun Han, Jian Qian, Alexander Rakhlin, Yunbei Xu", "tldr": "", "abstract": "We develop a unifying framework for information-theoretic lower bound in statistical estimation and interactive decision making. Classical lower bound techniques---such as Fano's method, Le Cam's method, and Assouad's lemma---are central to the study of minimax risk in statistical estimation, yet are insufficient to provide tight lower bounds for \\emph{interactive decision making} algorithms that collect data interactively (e.g., algorithms for bandits and reinforcement learning). Recent work of Foster et al. provides minimax lower bounds for interactive decision making using seemingly different analysis techniques from the classical methods. These results---which are proven using a complexity measure known as the \\emph{Decision-Estimation Coefficient} (DEC)---capture difficulties unique to interactive learning, yet do not recover the tightest known lower bounds for passive estimation. We propose a unified view of these distinct methodologies through a new lower bound approach called \\emph{interactive Fano method}. As an application, we introduce a novel complexity measure, the \\emph{Fractional Covering Number}, which facilitates the new lower bounds for interactive decision making that extend the DEC methodology by incorporating the complexity of estimation. Using the fractional covering number, we (i) provide a unified characterization of learnability for \\emph{any} stochastic bandit problem, (ii) close the remaining gap between the upper and lower bounds in Foster et al. (up to polynomial factors) for any interactive decision making problem in which the underlying model class is convex.", "keywords": "Information-theoretic lower bound;online learning;interactive decision making;statistical learning;minimax lower bound", "primary_area": "learning_theory", "supplementary_material": "", "author": "Fan Chen;Dylan J Foster;Yanjun Han;Jian Qian;Alexander Rakhlin;Yunbei Xu", "authorids": "~Fan_Chen4;~Dylan_J_Foster1;~Yanjun_Han1;~Jian_Qian2;~Alexander_Rakhlin1;~Yunbei_Xu1", "gender": "M;;M;;M;M", "homepage": "https://sites.google.com/view/chen-fan;http://dylanfoster.net;https://yanjunhan2021.github.io;https://sites.google.com/view/jianqian/about;http://www.mit.edu/~rakhlin/;https://yunbeixu.github.io/", "dblp": ";167/4271;35/7252;;59/407;", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;RqwU8xsAAAAJ;hdTDzlQAAAAJ;;https://scholar.google.com.tw/citations?user=fds2VpgAAAAJ;", "orcid": ";;;;;", "linkedin": ";;;jianQ/;;", "or_profile": "~Fan_Chen4;~Dylan_J_Foster1;~Yanjun_Han1;~Jian_Qian2;~Alexander_Rakhlin1;~Yunbei_Xu1", "aff": "Massachusetts Institute of Technology;Microsoft Research;New York University;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;microsoft.com;nyu.edu;mit.edu;mit.edu;mit.edu", "position": "PhD student;Principal Researcher;Assistant Professor;PhD student;Full Professor;Postdoc", "bibtex": "@inproceedings{\nchen2024assouad,\ntitle={Assouad, Fano, and Le Cam with Interaction: A Unifying Lower Bound Framework and Characterization for Bandit Learnability},\nauthor={Fan Chen and Dylan J Foster and Yanjun Han and Jian Qian and Alexander Rakhlin and Yunbei Xu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=hUGD1aNMrp}\n}", "github": "", "reviewers": "zJ4c;uqGG;Fyj4;3RHd", "pdf_size": 630399, "rating": "6;6;7;8", "confidence": "4;2;3;3", "soundness": "2;3;3;4", "novelty": "3;3;4;4", "presentation": "3;3;3;4", "wc_summary": "96;70;91;78", "wc_strengths": "108;56;99;62", "wc_weaknesses": "440;426;338;2", "wc_questions": "2;45;2;134", "wc_limitations": "1;1;5;19", "wc_review": "647;598;535;295", "wc_reply_reviewers": "16;244;40;0", "wc_reply_authors": "0;110;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;2;1;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 83.75, 10.304731922762475 ], "wc_strengths_avg": [ 81.25, 22.576259654778955 ], "wc_weaknesses_avg": [ 301.5, 177.2815557242208 ], "wc_questions_avg": [ 45.75, 53.89051400756909 ], "wc_limitations_avg": [ 6.5, 7.399324293474371 ], "wc_review_avg": [ 518.75, 135.14506095303668 ], "wc_reply_reviewers_avg": [ 75.0, 98.60527369263775 ], "wc_reply_authors_avg": [ 27.5, 47.63139720814412 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:H4crLdDRFq0J:scholar.google.com/&scioq=Assouad,+Fano,+and+Le+Cam+with+Interaction:+A+Unifying+Lower+Bound+Framework+and+Characterization+for+Bandit+Learnability&hl=en&as_sdt=0,44", "gs_version_total": 5, "email": "mit.edu;microsoft.com;nyu.edu;mit.edu;mit.edu;mit.edu", "author_num": 6, "aff_unique_index": "0;1;2;0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology;Microsoft;New York University", "aff_unique_dep": ";Microsoft Research;", "aff_unique_url": "https://web.mit.edu;https://www.microsoft.com/en-us/research;https://www.nyu.edu", "aff_unique_abbr": "MIT;MSR;NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "NeuRodin: A Two-stage Framework for High-Fidelity Neural Surface Reconstruction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94065", "id": "hVGAGU4TKk", "proceeding": "", "pdf": "https://openreview.net/pdf?id=hVGAGU4TKk", "openreview": "https://openreview.net/forum?id=hVGAGU4TKk", "poster": "/media/PosterPDFs/NeurIPS%202024/94065.png?t=1731598335.0545514", "project": "", "author_site": "Yifan Wang, Di Huang, Weicai Ye, Guofeng Zhang, Wanli Ouyang, Tong He", "tldr": "", "abstract": "Signed Distance Function (SDF)-based volume rendering has demonstrated significant capabilities in surface reconstruction. Although promising, SDF-based methods often fail to capture detailed geometric structures, resulting in visible defects. By comparing SDF-based volume rendering to density-based volume rendering, we identify two main factors within the SDF-based approach that degrade surface quality: SDF-to-density representation and geometric regularization. These factors introduce challenges that hinder the optimization of the SDF field. To address these issues, we introduce NeuRodin, a novel two-stage neural surface reconstruction framework that not only achieves high-fidelity surface reconstruction but also retains the flexible optimization characteristics of density-based methods. \n NeuRodin incorporates innovative strategies that facilitate transformation of arbitrary topologies and reduce artifacts associated with density bias.\n Extensive evaluations on the Tanks and Temples and ScanNet++ datasets demonstrate the superiority of NeuRodin, showing strong reconstruction capabilities for both indoor and outdoor environments using solely posed RGB captures. Project website:\nhttps://open3dvlab.github.io/NeuRodin/", "keywords": "SDF representation; Volume Rendering; High-Fidelity Surface Reconstruction", "primary_area": "machine_vision", "supplementary_material": "", "author": "Yifan Wang;Di Huang;Weicai Ye;Guofeng Zhang;Wanli Ouyang;Tong He", "authorids": "~Yifan_Wang42;~Di_Huang6;~Weicai_Ye3;~Guofeng_Zhang3;~Wanli_Ouyang1;~Tong_He2", "gender": "M;;M;M;;M", "homepage": "https://github.com/yyfz;;https://ywcmaike.github.io/;http://www.cad.zju.edu.cn/home/gfzhang;;http://tonghe90.github.io/", "dblp": ";;02/10372;78/5389-1.html;;02/1554-1", "google_scholar": ";;https://scholar.google.com/citations?hl=zh-CN;F0xfpXAAAAAJ;;kWADCMUAAAAJ", "orcid": ";;0000-0001-6215-1347;0000-0001-5661-8430;;0000-0003-2772-9320", "linkedin": ";;weicai-ye-b9b36b129/;;;", "or_profile": "~Yifan_Wang42;~Di_Huang6;~Weicai_Ye3;~Guofeng_Zhang3;~Wanli_Ouyang1;~Tong_He2", "aff": "Xi'an Jiaotong University;;Zhejiang University;Zhejiang University;;Shanghai AI lab", "aff_domain": "xjtu.edu.cn;;zju.edu.cn;zju.edu.cn;;pjlab.org.cn", "position": "Undergrad student;;PhD student;Full Professor;;Researcher", "bibtex": "@inproceedings{\nwang2024neurodin,\ntitle={NeuRodin: A Two-stage Framework for High-Fidelity Neural Surface Reconstruction},\nauthor={Yifan Wang and Di Huang and Weicai Ye and Guofeng Zhang and Wanli Ouyang and Tong He},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=hVGAGU4TKk}\n}", "github": "", "reviewers": "EWQD;JYLp;9bjU;o1gj", "pdf_size": 15405574, "rating": "5;5;6;6", "confidence": "3;5;4;2", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "112;33;41;127", "wc_strengths": "74;34;61;47", "wc_weaknesses": "355;34;213;89", "wc_questions": "53;74;5;38", "wc_limitations": "7;17;9;28", "wc_review": "601;192;329;329", "wc_reply_reviewers": "0;19;53;0", "wc_reply_authors": "53;15;13;53", "reply_reviewers": "0;1;1;0", "reply_authors": "2;2;2;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 78.25, 41.68557904119841 ], "wc_strengths_avg": [ 54.0, 14.983324063771697 ], "wc_weaknesses_avg": [ 172.75, 123.59282948456193 ], "wc_questions_avg": [ 42.5, 25.144581921360317 ], "wc_limitations_avg": [ 15.25, 8.257572282456872 ], "wc_review_avg": [ 362.75, 148.48968819416385 ], "wc_reply_reviewers_avg": [ 18.0, 21.644860821913362 ], "wc_reply_authors_avg": [ 33.5, 19.512816301087856 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.4472135954999579, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3499041717513974878&as_sdt=40000005&sciodt=0,22&hl=en", "gs_version_total": 3, "email": "xjtu.edu.cn;;zju.edu.cn;zju.edu.cn;;pjlab.org.cn", "author_num": 6, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Xi'an Jiao Tong University;Zhejiang University;Shanghai AI Lab", "aff_unique_dep": ";;AI Research", "aff_unique_url": "https://www.xjtu.edu.cn;https://www.zju.edu.cn;https://www.shanghaiailab.com", "aff_unique_abbr": "XJTU;ZJU;Shanghai AI Lab", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Optimizing Automatic Differentiation with Deep Reinforcement Learning", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94064", "id": "hVmi98a0ki", "proceeding": "", "pdf": "https://openreview.net/pdf?id=hVmi98a0ki", "openreview": "https://openreview.net/forum?id=hVmi98a0ki", "poster": "/media/PosterPDFs/NeurIPS%202024/94064.png?t=1731536817.3482757", "project": "", "author_site": "Jamie Lohoff, Emre Neftci", "tldr": "", "abstract": "Computing Jacobians with automatic differentiation is ubiquitous in many scientific domains such as machine learning, computational fluid dynamics, robotics and finance. \nEven small savings in the number of computations or memory usage in Jacobian computations can already incur massive savings in energy consumption and runtime. \nWhile there exist many methods that allow for such savings, they generally trade computational efficiency for approximations of the exact Jacobian.\n\nIn this paper, we present a novel method to optimize the number of necessary multiplications for Jacobian computation by leveraging deep reinforcement learning (RL) and a concept called cross-country elimination while still computing the exact Jacobian. \nCross-country elimination is a framework for automatic differentiation that phrases Jacobian accumulation as ordered elimination of all vertices on the computational graph where every elimination incurs a certain computational cost.\nFinding the optimal elimination order that minimizes the number of necessary multiplications can be seen as a single player game which in our case is played by an RL agent.\nWe demonstrate that this method achieves up to 33% improvements over state-of-the-art methods on several relevant tasks taken from relevant domains.\nFurthermore, we show that these theoretical gains translate into actual runtime improvements by providing a cross-country elimination interpreter in JAX that can execute the obtained elimination orders.", "keywords": "Reinforcement Learning;Automatic Differentiation;AlphaZero;Deep Learning", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/11b75e5e32d5316bf9bd3888a9fb1792442d40c2.zip", "author": "Jamie Lohoff;Emre Neftci", "authorids": "~Jamie_Lohoff1;~Emre_Neftci1", "gender": "M;M", "homepage": ";https://nmi-lab.org/", "dblp": ";62/5283", "google_scholar": ";yYT6jtkAAAAJ", "orcid": "0009-0005-6790-2556;", "linkedin": ";", "or_profile": "~Jamie_Lohoff1;~Emre_Neftci1", "aff": "Forschungszentrum Juelich GmbH;Foschungszentrum Juelich and RWTH Aachen", "aff_domain": "fz-juelich.de;fz-juelich.de", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nlohoff2024optimizing,\ntitle={Optimizing Automatic Differentiation with Deep Reinforcement Learning},\nauthor={Jamie Lohoff and Emre Neftci},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=hVmi98a0ki}\n}", "github": "", "reviewers": "ErgV;N91J;ZG2Q;tWYs;PBG1;kjpN", "pdf_size": 3778422, "rating": "5;7;7;7;7;8", "confidence": "3;3;3;3;3;4", "soundness": "2;3;4;3;3;4", "novelty": "2;3;3;3;3;4", "presentation": "3;3;3;3;3;4", "wc_summary": "86;103;223;154;116;162", "wc_strengths": "21;152;70;102;86;51", "wc_weaknesses": "72;63;203;149;229;24", "wc_questions": "125;27;52;163;95;85", "wc_limitations": "1;101;1;1;15;21", "wc_review": "305;446;549;569;541;343", "wc_reply_reviewers": "64;11;11;310;49;90", "wc_reply_authors": "0;0;0;1276;0;20", "reply_reviewers": "1;1;1;3;1;1", "reply_authors": "1;1;1;3;1;2", "rating_avg": [ 6.833333333333333, 0.8975274678557505 ], "confidence_avg": [ 3.1666666666666665, 0.3726779962499649 ], "soundness_avg": [ 3.1666666666666665, 0.6871842709362768 ], "novelty_avg": [ 3.0, 0.5773502691896257 ], "presentation_avg": [ 3.1666666666666665, 0.3726779962499649 ], "wc_summary_avg": [ 140.66666666666666, 45.51068250666235 ], "wc_strengths_avg": [ 80.33333333333333, 41.07986800801039 ], "wc_weaknesses_avg": [ 123.33333333333333, 75.62333561070213 ], "wc_questions_avg": [ 91.16666666666667, 44.73781646685746 ], "wc_limitations_avg": [ 23.333333333333332, 35.598064866256735 ], "wc_review_avg": [ 458.8333333333333, 103.49302177227001 ], "wc_reply_reviewers_avg": [ 89.16666666666667, 102.66030824466137 ], "wc_reply_authors_avg": [ 216.0, 474.1026611751229 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.7453559924999299 ], "reply_authors_avg": [ 1.5, 0.7637626158259734 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.5813183589761799, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13123428198425161656&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "fz-juelich.de;fz-juelich.de", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Forschungszentrum Juelich;Forschungszentrum J\u00fclich", "aff_unique_dep": ";", "aff_unique_url": "https://www.fz-juelich.de;https://www.fz-juelich.de", "aff_unique_abbr": "FZJ;FZJ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "title": "GraphMorph: Tubular Structure Extraction by Morphing Predicted Graphs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94063", "id": "hW5QWiCctl", "proceeding": "", "pdf": "https://openreview.net/pdf?id=hW5QWiCctl", "openreview": "https://openreview.net/forum?id=hW5QWiCctl", "poster": "/media/PosterPDFs/NeurIPS%202024/94063.png?t=1731592218.11415", "project": "", "author_site": "Zhao Zhang, Ziwei Zhao, Dong Wang, Liwei Wang", "tldr": "", "abstract": "Accurately restoring topology is both challenging and crucial in tubular structure extraction tasks, such as blood vessel segmentation and road network extraction. Diverging from traditional approaches based on pixel-level classification, our proposed method, named GraphMorph, focuses on branch-level features of tubular structures to achieve more topologically accurate predictions. GraphMorph comprises two main components: a Graph Decoder and a Morph Module. Utilizing multi-scale features extracted from an image patch by the segmentation network, the Graph Decoder facilitates the learning of branch-level features and generates a graph that accurately represents the tubular structure in this patch. The Morph Module processes two primary inputs: the graph and the centerline probability map, provided by the Graph Decoder and the segmentation network, respectively. Employing a novel SkeletonDijkstra algorithm, the Morph Module produces a centerline mask that aligns with the predicted graph. Furthermore, we observe that employing centerline masks predicted by GraphMorph significantly reduces false positives in the segmentation task, which is achieved by a simple yet effective post-processing strategy. The efficacy of our method in the centerline extraction and segmentation tasks has been substantiated through experimental evaluations across various datasets. Source code will be released soon.", "keywords": "Image Segmentation;Tubular Structure Extraction;Branch-level Features;Graph Representation", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "", "author": "Zhao Zhang;Ziwei Zhao;Dong Wang;Liwei Wang", "authorids": "~Zhao_Zhang10;~Ziwei_Zhao1;~Dong_Wang16;~Liwei_Wang1", "gender": "M;M;M;M", "homepage": ";http;;http://www.liweiwang-pku.com/", "dblp": ";;;", "google_scholar": "https://scholar.google.com/citations?hl=en;oW1KZZQAAAAJ;CHMpZBIAAAAJ;VZHxoh8AAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Zhao_Zhang10;~Ziwei_Zhao1;~Dong_Wang16;~Liwei_Wang1", "aff": "Peking University;Yizhun co. ltd;Yizhun co. ltd;Peking University", "aff_domain": "stu.pku.edu.cn;yizhun-ai.com;yizhun-ai.com;pku.edu.cn", "position": "MS student;Researcher;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nzhang2024graphmorph,\ntitle={GraphMorph: Tubular Structure Extraction by Morphing Predicted Graphs},\nauthor={Zhao Zhang and Ziwei Zhao and Dong Wang and Liwei Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=hW5QWiCctl}\n}", "github": "", "reviewers": "Ysem;4DfY;S6G1;CJeh", "pdf_size": 10193403, "rating": "4;5;7;7", "confidence": "4;3;4;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "88;67;73;54", "wc_strengths": "23;36;13;63", "wc_weaknesses": "291;86;46;35", "wc_questions": "326;3;12;22", "wc_limitations": "2;23;18;5", "wc_review": "730;215;162;179", "wc_reply_reviewers": "124;25;10;0", "wc_reply_authors": "356;143;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "3;3;1;1", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 70.5, 12.216791722870616 ], "wc_strengths_avg": [ 33.75, 18.7533330370897 ], "wc_weaknesses_avg": [ 114.5, 103.65447409542918 ], "wc_questions_avg": [ 90.75, 135.98782114586587 ], "wc_limitations_avg": [ 12.0, 8.74642784226795 ], "wc_review_avg": [ 321.5, 236.62258979226814 ], "wc_reply_reviewers_avg": [ 39.75, 49.4488371956308 ], "wc_reply_authors_avg": [ 124.75, 145.7178352158719 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 1.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:JcjSv8M6bUYJ:scholar.google.com/&scioq=GraphMorph:+Tubular+Structure+Extraction+by+Morphing+Predicted+Graphs&hl=en&as_sdt=0,34", "gs_version_total": 0, "email": "stu.pku.edu.cn;yizhun-ai.com;yizhun-ai.com;pku.edu.cn", "author_num": 4, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Peking University;Yizhun Company Limited", "aff_unique_dep": ";", "aff_unique_url": "http://www.pku.edu.cn;", "aff_unique_abbr": "Peking U;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China;" }, { "title": "Entropy-regularized Diffusion Policy with Q-Ensembles for Offline Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94062", "id": "hWRVbdAWiS", "proceeding": "", "pdf": "https://openreview.net/pdf?id=hWRVbdAWiS", "openreview": "https://openreview.net/forum?id=hWRVbdAWiS", "poster": "/media/PosterPDFs/NeurIPS%202024/94062.png?t=1731507333.1969619", "project": "", "author_site": "Ruoqi Zhang, Ziwei Luo, Jens Sj\u00f6lund, Thomas Sch\u00f6n, Per Mattsson", "tldr": "", "abstract": "Diffusion policy has shown a strong ability to express complex action distributions in offline reinforcement learning (RL). However, it suffers from overestimating Q-value functions on out-of-distribution (OOD) data points due to the offline dataset limitation. To address it, this paper proposes a novel entropy-regularized diffusion policy and takes into account the confidence of the Q-value prediction with Q-ensembles. At the core of our diffusion policy is a mean-reverting stochastic differential equation (SDE) that transfers the action distribution into a standard Gaussian form and then samples actions conditioned on the environment state with a corresponding reverse-time process. We show that the entropy of such a policy is tractable and that can be used to increase the exploration of OOD samples in offline RL training. Moreover, we propose using the lower confidence bound of Q-ensembles for pessimistic Q-value function estimation. The proposed approach demonstrates state-of-the-art performance across a range of tasks in the D4RL benchmarks, significantly improving upon existing diffusion-based policies. The code is available at https://github.com/ruoqizzz/entropy-offlineRL.", "keywords": "Offline Reinforcement Learning; Diffusion Model; Stochastic Differential Equation", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/1fe546914abc1fe98587ed1c758109252acf48be.zip", "author": "Ruoqi Zhang;Ziwei Luo;Jens Sj\u00f6lund;Thomas B. Sch\u00f6n;Per Mattsson", "authorids": "~Ruoqi_Zhang1;~Ziwei_Luo1;~Jens_Sj\u00f6lund1;~Thomas_B._Sch\u00f6n1;~Per_Mattsson1", "gender": "F;M;M;M;M", "homepage": ";https://algolzw.github.io;https://jsjol.github.io;http://user.it.uu.se/~thosc112/index.html;https://katalog.uu.se/empinfo/?id=N10-1806", "dblp": ";166/7005;155/3118;85/4891;", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.se/citations?user=AlF2g-YAAAAJ;https://scholar.google.se/citations?user=FUqUC2oAAAAJ;", "orcid": ";0000-0003-3334-8655;0000-0002-9099-3522;0000-0001-5183-234X;", "linkedin": "ruoqi-zhang/;;jens-sj%C3%B6lund/;thomas-sch%C3%B6n-2b587b1/;", "or_profile": "~Ruoqi_Zhang1;~Ziwei_Luo1;~Jens_Sj\u00f6lund1;~Thomas_B._Sch\u00f6n1;~Per_Mattsson1", "aff": "Uppsala University;Uppsala University;Uppsala University;Uppsala University;", "aff_domain": "uu.se;it.uu.se;uu.se;uu.se;", "position": "PhD student;PhD student;Assistant Professor;Full Professor;", "bibtex": "@inproceedings{\nzhang2024entropyregularized,\ntitle={Entropy-regularized Diffusion Policy with Q-Ensembles for Offline Reinforcement Learning},\nauthor={Ruoqi Zhang and Ziwei Luo and Jens Sj{\\\"o}lund and Thomas B. Sch{\\\"o}n and Per Mattsson},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=hWRVbdAWiS}\n}", "github": "", "reviewers": "Jzch;jdmN;T2t7;zXRk", "pdf_size": 1196672, "rating": "5;5;7;7", "confidence": "4;4;3;4", "soundness": "3;3;4;3", "novelty": "3;3;4;3", "presentation": "2;3;3;4", "wc_summary": "61;46;17;65", "wc_strengths": "89;43;33;48", "wc_weaknesses": "85;48;279;80", "wc_questions": "98;500;289;47", "wc_limitations": "19;37;12;27", "wc_review": "352;674;630;267", "wc_reply_reviewers": "150;230;0;13", "wc_reply_authors": "388;423;0;0", "reply_reviewers": "2;1;0;1", "reply_authors": "3;2;1;1", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 47.25, 18.846418757949746 ], "wc_strengths_avg": [ 53.25, 21.33512362279628 ], "wc_weaknesses_avg": [ 123.0, 91.17839656409845 ], "wc_questions_avg": [ 233.5, 178.35708564562273 ], "wc_limitations_avg": [ 23.75, 9.310612224768036 ], "wc_review_avg": [ 480.75, 174.5614146940841 ], "wc_reply_reviewers_avg": [ 98.25, 96.12069236121846 ], "wc_reply_authors_avg": [ 202.75, 203.12726921809391 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10633359430018982591&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "uu.se;it.uu.se;uu.se;uu.se;", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Uppsala University", "aff_unique_dep": "", "aff_unique_url": "https://www.uu.se", "aff_unique_abbr": "UU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Sweden" }, { "title": "CoMERA: Computing- and Memory-Efficient Training via Rank-Adaptive Tensor Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94061", "id": "hXgLvYsG2c", "proceeding": "", "pdf": "https://openreview.net/pdf?id=hXgLvYsG2c", "openreview": "https://openreview.net/forum?id=hXgLvYsG2c", "poster": "/media/PosterPDFs/NeurIPS%202024/94061.png?t=1729879039.1085572", "project": "", "author_site": "Zi Yang, Ziyue Liu, Samridhi Choudhary, Xinfeng Xie, Cao Gao, Siegfried Kunzmann, Zheng Zhang", "tldr": "", "abstract": "Training large AI models such as LLMs and DLRMs costs massive GPUs and computing time. The high training cost has become only affordable to big tech companies, meanwhile also causing increasing concerns about the environmental impact. This paper presents CoMERA, a **Co**mputing- and **M**emory-**E**fficient training method via **R**ank-**A**daptive tensor optimization. CoMERA achieves end-to-end rank-adaptive tensor-compressed training via a multi-objective optimization formulation, and improves the training to provide both a high compression ratio and excellent accuracy in the training process. Our optimized numerical computation (e.g., optimized tensorized embedding and tensor-vector contractions) and GPU implementation eliminate part of the run-time overhead in the tensorized training on GPU. This leads to, for the first time, $2-3\\times$ speedup per training epoch compared with standard training. CoMERA also outperforms the recent GaLore in terms of both memory and computing efficiency. Specifically, CoMERA is $2\\times$ faster per training epoch and $9\\times$ more memory-efficient than GaLore on a tested six-encoder transformer with single-batch training. Our method also shows $\\sim 2\\times$ speedup than standard pre-training on a BERT-like code-generation LLM while achieving $4.23\\times$ compression ratio in pre-training.\nWith further HPC optimization, CoMERA may reduce the pre-training cost of many other LLMs. An implementation of CoMERA is available at .", "keywords": "Compressed Training;Memory-efficient;Tensor", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Zi Yang;Ziyue Liu;Samridhi Choudhary;Xinfeng Xie;Cao Gao;Siegfried Kunzmann;Zheng Zhang", "authorids": "~Zi_Yang3;~Ziyue_Liu1;~Samridhi_Choudhary1;~Xinfeng_Xie1;~Cao_Gao1;~Siegfried_Kunzmann1;~Zheng_Zhang2", "gender": "M;M;F;M;Not Specified;M;M", "homepage": "https://sites.google.com/view/ziyangjoy;;;;;;https://web.ece.ucsb.edu/~zhengzhang/", "dblp": "45/3180;150/8404-3;;145/6282;;;181/2621-5", "google_scholar": "EQE26QsAAAAJ;7hVIDywAAAAJ;GUgyCUUAAAAJ;;;1LNIIfMAAAAJ;qeahx5QAAAAJ", "orcid": "0000-0003-1048-9869;0000-0001-9538-5350;;;0000-0001-6858-1655;;", "linkedin": ";ziyue-liu-54b660244/;samridhishree/;;;;", "or_profile": "~Zi_Yang3;~Ziyue_Liu1;~Samridhi_Choudhary1;~Xinfeng_Xie1;~Cao_Gao1;~Siegfried_Kunzmann1;~Zheng_Zhang2", "aff": "State University of New York at Albany;University of California, Santa Barbara;Amazon;Meta Facebook;Meta Platforms;Amazon;University of California, Santa Barbara", "aff_domain": "albany.edu;ucsb.edu;amazon.com;meta.com;meta.com;amazon.com;ucsb.edu", "position": "Assistant Professor;PhD student;Researcher;Researcher;Researcher;Researcher;Associate Professor", "bibtex": "@inproceedings{\nyang2024comera,\ntitle={Co{MERA}: Computing- and Memory-Efficient Training via Rank-Adaptive Tensor Optimization},\nauthor={Zi Yang and Ziyue Liu and Samridhi Choudhary and Xinfeng Xie and Cao Gao and Siegfried Kunzmann and Zheng Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=hXgLvYsG2c}\n}", "github": "", "reviewers": "Rz1r;1f6L;hvAM;6ExQ", "pdf_size": 2623451, "rating": "5;5;6;6", "confidence": "5;4;3;4", "soundness": "3;3;2;2", "novelty": "3;3;3;3", "presentation": "3;3;3;2", "wc_summary": "70;51;55;82", "wc_strengths": "70;33;66;46", "wc_weaknesses": "23;86;96;115", "wc_questions": "320;3;138;25", "wc_limitations": "7;1;1;1", "wc_review": "490;174;356;269", "wc_reply_reviewers": "633;430;17;18", "wc_reply_authors": "1182;1799;309;460", "reply_reviewers": "3;3;1;1", "reply_authors": "6;6;3;5", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 64.5, 12.338962679253067 ], "wc_strengths_avg": [ 53.75, 15.039531242695032 ], "wc_weaknesses_avg": [ 80.0, 34.51811118818641 ], "wc_questions_avg": [ 121.5, 125.52788534823647 ], "wc_limitations_avg": [ 2.5, 2.598076211353316 ], "wc_review_avg": [ 322.25, 116.28924068889606 ], "wc_reply_reviewers_avg": [ 274.5, 266.8337497394211 ], "wc_reply_authors_avg": [ 937.5, 596.8628401902736 ], "reply_reviewers_avg": [ 2.0, 1.0 ], "reply_authors_avg": [ 5.0, 1.224744871391589 ], "replies_avg": [ 34, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.7071067811865475, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6438166007824450154&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "albany.edu;ucsb.edu;amazon.com;meta.com;meta.com;amazon.com;ucsb.edu", "author_num": 7, "aff_unique_index": "0;1;2;3;3;2;1", "aff_unique_norm": "State University of New York;University of California, Santa Barbara;Amazon;Meta", "aff_unique_dep": ";;Amazon.com, Inc.;Meta Platforms, Inc.", "aff_unique_url": "https://www.albany.edu;https://www.ucsb.edu;https://www.amazon.com;https://meta.com", "aff_unique_abbr": "SUNY Albany;UCSB;Amazon;Meta", "aff_campus_unique_index": "0;1;1", "aff_campus_unique": "Albany;Santa Barbara;", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Is O(log N) practical? Near-Equivalence Between Delay Robustness and Bounded Regret in Bandits and RL", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94060", "id": "hYJOfWfw1P", "proceeding": "", "pdf": "https://openreview.net/pdf?id=hYJOfWfw1P", "openreview": "https://openreview.net/forum?id=hYJOfWfw1P", "poster": "", "project": "", "author_site": "Enoch H. Kang, P. R. Kumar", "tldr": "", "abstract": "Interactive decision making, encompassing bandits, contextual bandits, and reinforcement learning, has recently been of interest to theoretical studies of experimentation design and recommender system algorithm research. One recent finding in this area is that the well-known Graves-Lai constant being zero is a necessary and sufficient condition for achieving bounded (or constant) regret in interactive decision-making. As this condition may be a strong requirement for many applications, the practical usefulness of pursuing bounded regret has been questioned. In this paper, we show that the condition of the Graves-Lai constant being zero is also necessary for a consistent algorithm to achieve delay model robustness when reward delays are unknown (i.e., when feedback is anonymous). Here, model robustness is measured in terms of $\\epsilon$-robustness, one of the most widely used and one of the least adversarial robustness concepts in the robust statistics literature. In particular, we show that $\\epsilon$-robustness cannot be achieved for a consistent (i.e., uniformly sub-polynomial regret) algorithm, however small the nonzero $\\epsilon$ value is, when the Grave-Lai constant is not zero. While this is a strongly negative result, we also provide a positive result for linear rewards models (contextual linear bandits, reinforcement learning with linear MDP) that the Grave-Lai constant being zero is also sufficient for achieving bounded regret without any knowledge of delay models, i.e., the best of both the efficiency world and the delay robustness world.", "keywords": "Delayed anonymous reward;reward attribution;robustness;consistent algorithm;bounded regret;bandits;RL", "primary_area": "learning_theory", "supplementary_material": "", "author": "Enoch H. Kang;Panganamala Kumar", "authorids": "~Enoch_H._Kang1;~Panganamala_Kumar1", "gender": "M;M", "homepage": "https://cesg.tamu.edu/faculty/p-r-kumar/;https://sites.google.com/view/hyunwookkang", "dblp": "https://dblp.org/pers/k/Kumar:P=_R=.html;", "google_scholar": "qGUpTVwAAAAJ;UXvZAZMAAAAJ", "orcid": "0000-0003-0389-5367;0000-0002-9617-0893", "linkedin": ";", "or_profile": "~Panganamala_Kumar1;~HYUNWOOK_KANG1", "aff": "Texas A&M;University of Washington", "aff_domain": "tamu.edu;uw.edu", "position": "Full Professor;PhD student", "bibtex": "@inproceedings{\nkang2024is,\ntitle={Is O(log N) practical? Near-Equivalence Between Delay Robustness and Bounded Regret in Bandits and {RL}},\nauthor={Enoch H. Kang and Panganamala Kumar},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=hYJOfWfw1P}\n}", "github": "", "reviewers": "f9VR;uxdM;ykEi;3wWt;bj5z", "pdf_size": 671578, "rating": "5;5;5;6;6", "confidence": "3;4;3;1;3", "soundness": "3;3;3;3;3", "novelty": "2;3;2;3;3", "presentation": "3;2;2;3;2", "wc_summary": "77;71;100;145;172", "wc_strengths": "31;36;6;87;18", "wc_weaknesses": "112;51;28;47;133", "wc_questions": "21;65;77;61;83", "wc_limitations": "1;9;4;35;1", "wc_review": "242;232;215;375;407", "wc_reply_reviewers": "140;17;24;21;0", "wc_reply_authors": "508;203;403;127;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "2;2;2;2;1", "rating_avg": [ 5.4, 0.48989794855663565 ], "confidence_avg": [ 2.8, 0.9797958971132712 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 113.0, 39.329378332234036 ], "wc_strengths_avg": [ 35.6, 27.7459907013608 ], "wc_weaknesses_avg": [ 74.2, 40.74015218430093 ], "wc_questions_avg": [ 61.4, 21.70345594600086 ], "wc_limitations_avg": [ 10.0, 12.83744522870497 ], "wc_review_avg": [ 294.2, 80.14836242868597 ], "wc_reply_reviewers_avg": [ 40.4, 50.48801838060194 ], "wc_reply_authors_avg": [ 248.2, 184.27957021873044 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.6666666666666665, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:6hc8IyQZd2gJ:scholar.google.com/&scioq=Is+O(log+N)+practical%3F+Near-Equivalence+Between+Delay+Robustness+and+Bounded+Regret+in+Bandits+and+RL&hl=en&as_sdt=0,44", "gs_version_total": 0, "email": "tamu.edu;uw.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Texas A&M University;University of Washington", "aff_unique_dep": ";", "aff_unique_url": "https://www.tamu.edu;https://www.washington.edu", "aff_unique_abbr": "TAMU;UW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Embedding Trajectory for Out-of-Distribution Detection in Mathematical Reasoning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94059", "id": "hYMxyeyEc5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=hYMxyeyEc5", "openreview": "https://openreview.net/forum?id=hYMxyeyEc5", "poster": "", "project": "", "author_site": "Yiming Wang, Pei Zhang, Baosong Yang, Derek Wong, Zhuosheng Zhang, Rui Wang", "tldr": "", "abstract": "Real-world data deviating from the independent and identically distributed (\\textit{i.i.d.}) assumption of in-distribution training data poses security threats to deep networks, thus advancing out-of-distribution (OOD) detection algorithms. Detection methods in generative language models (GLMs) mainly focus on uncertainty estimation and embedding distance measurement, with the latter proven to be most effective in traditional linguistic tasks like summarization and translation. However, another complex generative scenario mathematical reasoning poses significant challenges to embedding-based methods due to its high-density feature of output spaces, but this feature causes larger discrepancies in the embedding shift trajectory between different samples in latent spaces. Hence, we propose a trajectory-based method TV score, which uses trajectory volatility for OOD detection in mathematical reasoning. Experiments show that our method outperforms all traditional algorithms on GLMs under mathematical reasoning scenarios and can be extended to more applications with high-density features in output spaces, such as multiple-choice questions.", "keywords": "Out-of-Distribution Detection;Mathematical Reasoning;Generative Language Models", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Yiming Wang;Pei Zhang;Baosong Yang;Derek F. Wong;Zhuosheng Zhang;Rui Wang", "authorids": "~Yiming_Wang13;~Pei_Zhang7;~Baosong_Yang1;~Derek_F._Wong1;~Zhuosheng_Zhang1;~Rui_Wang10", "gender": ";F;M;M;M;M", "homepage": "https://alsace08.github.io/yiming.wang.cv/;;https://baosongyang.site/;https://www.fst.um.edu.mo/personal/derek-wong/;https://bcmi.sjtu.edu.cn/~zhangzs/;https://wangruinlp.github.io/", "dblp": ";78/5323-11;203/8245;123/0533;06/9708;w/RuiWang15", "google_scholar": "2C1VDq8AAAAJ;JqhgnkwAAAAJ;https://scholar.google.com.tw/citations?user=fXsHJXkAAAAJ;KjQBe8oAAAAJ;https://scholar.google.co.jp/citations?user=63LTQhgAAAAJ;oTU0v5IAAAAJ", "orcid": "0000-0001-5821-8895;;;0000-0002-5307-7322;0000-0002-4183-3645;0000-0001-8007-2503", "linkedin": ";;;derek-wong-6209445/;;", "or_profile": "~Yiming_Wang13;~Pei_Zhang7;~Baosong_Yang1;~Derek_F._Wong1;~Zhuosheng_Zhang1;~Rui_Wang7", "aff": "Shanghai Jiaotong University;Alibaba Group;Alibaba Group;University of Macau;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;alibaba-inc.com;alibaba-inc.com;um.edu.mo;sjtu.edu.cn;sjtu.edu.cn", "position": "PhD student;Researcher;Researcher;Associate Professor;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nwang2024embedding,\ntitle={Embedding Trajectory for Out-of-Distribution Detection in Mathematical Reasoning},\nauthor={Yiming Wang and Pei Zhang and Baosong Yang and Derek F. Wong and Zhuosheng Zhang and Rui Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=hYMxyeyEc5}\n}", "github": "", "reviewers": "A7h4;SAjz;KF5B;oehe", "pdf_size": 2140134, "rating": "3;5;6;7", "confidence": "3;3;4;2", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "92;75;55;40", "wc_strengths": "38;136;19;39", "wc_weaknesses": "170;153;60;36", "wc_questions": "2;146;20;120", "wc_limitations": "2;1;5;9", "wc_review": "304;511;159;244", "wc_reply_reviewers": "59;0;33;14", "wc_reply_authors": "746;109;19;6", "reply_reviewers": "2;0;1;1", "reply_authors": "2;3;2;2", "rating_avg": [ 5.25, 1.479019945774904 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 65.5, 19.704060495238032 ], "wc_strengths_avg": [ 58.0, 45.732920309116494 ], "wc_weaknesses_avg": [ 104.75, 57.69477879323223 ], "wc_questions_avg": [ 72.0, 62.01612693485461 ], "wc_limitations_avg": [ 4.25, 3.112474899497183 ], "wc_review_avg": [ 304.5, 129.8778272069563 ], "wc_reply_reviewers_avg": [ 26.5, 22.118996360594664 ], "wc_reply_authors_avg": [ 220.0, 306.2654077756742 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.23904572186687872, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15127512255085250641&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "sjtu.edu.cn;alibaba-inc.com;alibaba-inc.com;um.edu.mo;sjtu.edu.cn;sjtu.edu.cn", "author_num": 6, "aff_unique_index": "0;1;1;2;0;0", "aff_unique_norm": "Shanghai Jiao Tong University;Alibaba Group;University of Macau", "aff_unique_dep": ";;", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.alibaba.com;https://www.um.edu.mo", "aff_unique_abbr": "SJTU;Alibaba;UM", "aff_campus_unique_index": "1", "aff_campus_unique": ";Macau SAR", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "A2PO: Towards Effective Offline Reinforcement Learning from an Advantage-aware Perspective", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94058", "id": "hYjRmGqq5e", "proceeding": "", "pdf": "https://openreview.net/pdf?id=hYjRmGqq5e", "openreview": "https://openreview.net/forum?id=hYjRmGqq5e", "poster": "/media/PosterPDFs/NeurIPS%202024/94058.png?t=1731068959.5022116", "project": "", "author_site": "Yunpeng Qing, Shunyu Liu, Jingyuan Cong, Kaixuan Chen, Yihe Zhou, Mingli Song", "tldr": "", "abstract": "Offline reinforcement learning endeavors to leverage offline datasets to craft effective agent policy without online interaction, which imposes proper conservative constraints with the support of behavior policies to tackle the out-of-distribution problem. However, existing works often suffer from the constraint conflict issue when offline datasets are collected from multiple behavior policies, i.e., different behavior policies may exhibit inconsistent actions with distinct returns across the state space. To remedy this issue, recent advantage-weighted methods prioritize samples with high advantage values for agent training while inevitably ignoring the diversity of behavior policy. In this paper, we introduce a novel Advantage-Aware Policy Optimization (A2PO) method to explicitly construct advantage-aware policy constraints for offline learning under mixed-quality datasets. Specifically, A2PO employs a conditional variational auto-encoder to disentangle the action distributions of intertwined behavior policies by modeling the advantage values of all training data as conditional variables. Then the agent can follow such disentangled action distribution constraints to optimize the advantage-aware policy towards high advantage values. Extensive experiments conducted on both the single-quality and mixed-quality datasets of the D4RL benchmark demonstrate that A2PO yields results superior to the counterparts. Our code is available at https://github.com/Plankson/A2PO.", "keywords": "Deep Learning;Reinforcement Learning", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/39afde5a78a23269706b14d12a8777aedcf77139.zip", "author": "Yunpeng Qing;Shunyu Liu;Jingyuan Cong;Kaixuan Chen;Yihe Zhou;Mingli Song", "authorids": "~Yunpeng_Qing1;~Shunyu_Liu1;~Jingyuan_Cong1;~Kaixuan_Chen2;~Yihe_Zhou1;~Mingli_Song1", "gender": "M;;M;M;M;M", "homepage": "https://plankson.github.io/;https://liushunyu.github.io/;;;https://person.zju.edu.cn/msong;https://chenchkx.github.io/", "dblp": "333/0812;235/0752-1;;324/2516;71/5333;220/5629", "google_scholar": "-RvDl44AAAAJ;4U-X6d4AAAAJ;;;7oLbhAwAAAAJ;", "orcid": "0000-0001-7376-9847;0000-0003-0584-9129;0000-0002-8974-4609;0009-0004-2595-6743;0000-0003-2621-6048;0000-0002-2492-5230", "linkedin": ";;;;;", "or_profile": "~Yunpeng_Qing1;~Shunyu_Liu1;~Jingyuan_Cong1;~Yihe_Zhou1;~Mingli_Song1;~Kai-Xuan_Chen1", "aff": "Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn", "position": "MS student;PhD student;Undergrad student;MS student;Full Professor;Researcher", "bibtex": "@inproceedings{\nqing2024apo,\ntitle={A2{PO}: Towards Effective Offline Reinforcement Learning from an Advantage-aware Perspective},\nauthor={Yunpeng Qing and Shunyu Liu and Jingyuan Cong and Kaixuan Chen and Yihe Zhou and Mingli Song},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=hYjRmGqq5e}\n}", "github": "", "reviewers": "W3XG;xTq4;M64c;3MzD;e3Zq", "pdf_size": 1665921, "rating": "6;6;7;7;8", "confidence": "4;4;3;3;3", "soundness": "3;3;3;3;4", "novelty": "3;2;3;3;3", "presentation": "3;3;3;3;4", "wc_summary": "117;52;69;61;100", "wc_strengths": "42;61;16;3;163", "wc_weaknesses": "2;16;99;3;23", "wc_questions": "167;276;2;105;1", "wc_limitations": "1;21;4;3;38", "wc_review": "329;426;190;175;325", "wc_reply_reviewers": "20;90;23;19;0", "wc_reply_authors": "648;549;485;0;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "4;4;3;1;1", "rating_avg": [ 6.8, 0.7483314773547882 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 79.8, 24.63655820117737 ], "wc_strengths_avg": [ 57.0, 56.69920634365176 ], "wc_weaknesses_avg": [ 28.6, 36.081019941237805 ], "wc_questions_avg": [ 110.2, 104.2830762875741 ], "wc_limitations_avg": [ 13.4, 14.235167719419396 ], "wc_review_avg": [ 289.0, 94.29952279836839 ], "wc_reply_reviewers_avg": [ 30.4, 30.88430021871954 ], "wc_reply_authors_avg": [ 336.4, 279.53718893914635 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 2.6, 1.3564659966250536 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.8728715609439696, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:SC82tiqs7WkJ:scholar.google.com/&scioq=A2PO:+Towards+Effective+Offline+Reinforcement+Learning+from+an+Advantage-aware+Perspective&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Zhejiang University", "aff_unique_dep": "", "aff_unique_url": "https://www.zju.edu.cn", "aff_unique_abbr": "ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "BoNBoN Alignment for Large Language Models and the Sweetness of Best-of-n Sampling", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94057", "id": "haSKMlrbX5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=haSKMlrbX5", "openreview": "https://openreview.net/forum?id=haSKMlrbX5", "poster": "/media/PosterPDFs/NeurIPS%202024/94057.png?t=1731356548.7221413", "project": "", "author_site": "Lin Gui, Cristina Garbacea, Victor Veitch", "tldr": "", "abstract": "This paper concerns the problem of aligning samples from large language models to human preferences using *best-of-$n$* sampling, where we draw $n$ samples, rank them, and return the best one. We consider two fundamental problems. First: what is the relationship between best-of-$n$ and other (RLHF-type) approaches to aligning LLMs? In particular, when should one be preferred to the other? We show that the best-of-$n$ sampling distribution is essentially equivalent to the policy learned by RLHF if we apply a particular monotone transformation to the reward function. Moreover, we show that this transformation yields the best possible trade-off between win-rate against the base model vs KL distance from the base model. Then, best-of-$n$ is a Pareto-optimal win-rate vs KL solution.\nThe second problem we consider is how to fine-tune a model to mimic the best-of-$n$ sampling distribution, to avoid drawing $n$ samples for each inference. We derive *BonBon Alignment* as a method for achieving this. Experiments show that BonBon alignment yields a model that achieves high win rates while minimally affecting off-target aspects of the generations.", "keywords": "LLM alignment; LLM fine tuning; Best of n; RLHF; Optimal Policy", "primary_area": "generative_models", "supplementary_material": "/attachment/cb666c7970b409d9628cf59b2b2bb7db3e96fc68.zip", "author": "Lin Gui;Cristina Garbacea;Victor Veitch", "authorids": "~Lin_Gui5;~Cristina_Garbacea1;~Victor_Veitch1", "gender": "F;;", "homepage": ";https://cristinagarbacea.com/;http://victorveitch.com", "dblp": ";150/5069.html;167/5650", "google_scholar": "88eaL8UAAAAJ;302eGI0AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;", "linkedin": ";cristinagarbacea;", "or_profile": "~Lin_Gui5;~Cristina_Garbacea1;~Victor_Veitch1", "aff": "University of Chicago;University of Chicago;Google", "aff_domain": "uchicago.edu;uchicago.edu;google.com", "position": "PhD student;Postdoc;Research Scientist", "bibtex": "@inproceedings{\ngui2024bonbon,\ntitle={Bo{NB}oN Alignment for Large Language Models and the Sweetness of Best-of-n Sampling},\nauthor={Lin Gui and Cristina Garbacea and Victor Veitch},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=haSKMlrbX5}\n}", "github": "", "reviewers": "ySG3;PJN3;mr5a;Jgyb", "pdf_size": 1651482, "rating": "5;5;7;7", "confidence": "2;2;3;4", "soundness": "3;2;4;3", "novelty": "2;3;3;3", "presentation": "3;1;3;4", "wc_summary": "78;43;62;150", "wc_strengths": "75;29;93;66", "wc_weaknesses": "2;330;74;37", "wc_questions": "310;326;70;36", "wc_limitations": "87;20;1;1", "wc_review": "552;748;300;290", "wc_reply_reviewers": "17;628;0;21", "wc_reply_authors": "55;621;0;0", "reply_reviewers": "1;2;0;1", "reply_authors": "2;3;1;1", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 83.25, 40.48070528041724 ], "wc_strengths_avg": [ 65.75, 23.33854108550918 ], "wc_weaknesses_avg": [ 110.75, 129.11888901318815 ], "wc_questions_avg": [ 185.5, 133.16437211206306 ], "wc_limitations_avg": [ 27.25, 35.35799061032739 ], "wc_review_avg": [ 472.5, 190.58003568055076 ], "wc_reply_reviewers_avg": [ 166.5, 266.563782236072 ], "wc_reply_authors_avg": [ 169.0, 261.9265164125236 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.9045340337332909, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12511600876626376365&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "uchicago.edu;uchicago.edu;google.com", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Chicago;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.uchicago.edu;https://www.google.com", "aff_unique_abbr": "UChicago;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Vision-Language Models are Strong Noisy Label Detectors", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94056", "id": "haUnEiXgQ7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=haUnEiXgQ7", "openreview": "https://openreview.net/forum?id=haUnEiXgQ7", "poster": "/media/PosterPDFs/NeurIPS%202024/94056.png?t=1731047562.534391", "project": "", "author_site": "Tong Wei, Hao-Tian Li, ChunShu Li, Jiang-Xin Shi, Yu-Feng Li, Min-Ling Zhang", "tldr": "", "abstract": "Recent research on fine-tuning vision-language models has demonstrated impressive performance in various downstream tasks. However, the challenge of obtaining accurately labeled data in real-world applications poses a significant obstacle during the fine-tuning process. To address this challenge, this paper presents a Denoising Fine-Tuning framework, called DeFT, for adapting vision-language models. DeFT utilizes the robust alignment of textual and visual features pre-trained on millions of auxiliary image-text pairs to sieve out noisy labels. The proposed framework establishes a noisy label detector by learning positive and negative textual prompts for each class. The positive prompt seeks to reveal distinctive features of the class, while the negative prompt serves as a learnable threshold for separating clean and noisy samples. We employ parameter-efficient fine-tuning for the adaptation of a pre-trained visual encoder to promote its alignment with the learned textual prompts. As a general framework, DeFT can seamlessly fine-tune many pre-trained models to downstream tasks by utilizing carefully selected clean samples. Experimental results on seven synthetic and real-world noisy datasets validate the effectiveness of DeFT in both noisy label detection and image classification. Our source code can be found in the supplementary material.", "keywords": "label-noise learning;sample selection;semi-supervised learning", "primary_area": "machine_vision", "supplementary_material": "/attachment/0cd97bdd37f60cd8270c140e16610b39e7e3e062.zip", "author": "Tong Wei;Hao-Tian Li;Chun-Shu Li;Jiang-Xin Shi;Yu-Feng Li;Min-Ling Zhang", "authorids": "~Tong_Wei1;~Hao-Tian_Li1;~Chun-Shu_Li1;~Jiang-Xin_Shi1;~Yu-Feng_Li1;~Min-Ling_Zhang2", "gender": "M;;M;M;M;M", "homepage": "https://palm.seu.edu.cn/weit/;http://www.lamda.nju.edu.cn/shijx;http://palm.seu.edu.cn/zhangml/;;http://palm.seu.edu.cn/homepage/lihaotian/index.html;https://cs.nju.edu.cn/liyf/index.htm", "dblp": "49/933-1;299/5485.html;84/271.html;40/8572;353/4983;57/413", "google_scholar": "EFCZuW4AAAAJ;KEgtGncAAAAJ;uFHCIM0AAAAJ;;;https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-2766-8209;0000-0002-0318-0911;0000-0003-1880-5918;0000-0002-0705-7304;;0000-0002-2220-5248", "linkedin": ";;;;;", "or_profile": "~Tong_Wei1;~Jiang-Xin_Shi1;~Min-Ling_Zhang2;~Chunshu_Li1;~Haotian_Li1;~Yu-feng_Li2", "aff": "Southeast University;Nanjing University;Southeast University;Southeast University;Southeast University;Nanjing University", "aff_domain": "seu.edu.cn;nju.edu.cn;seu.edu.cn;seu.edu.cn;seu.edu.cn;nju.edu.cn", "position": "Associate Professor;PhD student;Full Professor;MS student;MS student;Assistant Professor", "bibtex": "@inproceedings{\nwei2024visionlanguage,\ntitle={Vision-Language Models are Strong Noisy Label Detectors},\nauthor={Tong Wei and Hao-Tian Li and Chun-Shu Li and Jiang-Xin Shi and Yu-Feng Li and Min-Ling Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=haUnEiXgQ7}\n}", "github": "", "reviewers": "6CXy;g1PL;FyAK;mmEa", "pdf_size": 1917746, "rating": "5;5;6;7", "confidence": "4;2;4;5", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "121;114;91;64", "wc_strengths": "41;80;73;121", "wc_weaknesses": "350;57;65;22", "wc_questions": "52;72;75;11", "wc_limitations": "15;16;11;15", "wc_review": "579;339;315;233", "wc_reply_reviewers": "57;0;24;0", "wc_reply_authors": "111;63;16;0", "reply_reviewers": "1;0;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 97.5, 22.299103120977758 ], "wc_strengths_avg": [ 78.75, 28.481353549296074 ], "wc_weaknesses_avg": [ 123.5, 131.76589088227652 ], "wc_questions_avg": [ 52.5, 25.53918557824427 ], "wc_limitations_avg": [ 14.25, 1.920286436967152 ], "wc_review_avg": [ 366.5, 128.82837420382205 ], "wc_reply_reviewers_avg": [ 20.25, 23.370654676324325 ], "wc_reply_authors_avg": [ 47.5, 43.36184959154764 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.7608859102526822, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:RCU7533qnr0J:scholar.google.com/&scioq=Vision-Language+Models+are+Strong+Noisy+Label+Detectors&hl=en&as_sdt=0,5", "gs_version_total": 5, "email": "seu.edu.cn;nju.edu.cn;seu.edu.cn;seu.edu.cn;seu.edu.cn;nju.edu.cn", "author_num": 6, "aff_unique_index": "0;1;0;0;0;1", "aff_unique_norm": "Southeast University;Nanjing University", "aff_unique_dep": ";", "aff_unique_url": "https://www.seu.edu.cn/;https://www.nju.edu.cn", "aff_unique_abbr": "SEU;Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "GraphVis: Boosting LLMs with Visual Knowledge Graph Integration", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94055", "id": "haVPmN8UGi", "proceeding": "", "pdf": "https://openreview.net/pdf?id=haVPmN8UGi", "openreview": "https://openreview.net/forum?id=haVPmN8UGi", "poster": "", "project": "", "author_site": "Yihe Deng, Chenchen Ye, Zijie Huang, Mingyu Derek Ma, Yiwen Kou, Wei Wang", "tldr": "", "abstract": "The rapid evolution of large language models (LLMs) has expanded their capabilities across various data modalities, extending from well-established image data to increasingly popular graph data. Given the limitation of LLMs in hallucinations and inaccuracies in recalling factual knowledge, Knowledge Graph (KG) has emerged as a crucial data modality to support more accurate reasoning by LLMs. However, integrating structured knowledge from KGs into LLMs remains challenging, as most current KG-enhanced LLM methods directly convert the KG into linearized text triples, which is not as expressive as the original structured data. To address this, we introduce GraphVis, which conserves the intricate graph structure through the visual modality to enhance the comprehension of KGs with the aid of Large Vision Language Models (LVLMs). Our approach incorporates a unique curriculum fine-tuning scheme which first instructs LVLMs to recognize basic graphical features from the images, and subsequently incorporates reasoning on QA tasks with the visual graphs. This cross-modal methodology not only markedly enhances performance on standard textual QA but also shows improved zero-shot VQA performance by utilizing synthetic graph images to augment the data for VQA tasks. We present comprehensive evaluations across commonsense reasoning QA benchmarks, where GraphVis provides an average improvement of 11.1% over its base model and outperforms existing KG-enhanced LLM approaches. Across VQA benchmarks such as ScienceQA that share similar scientific diagram images, GraphVis provides a notable gain of 4.32%.", "keywords": "Large Language Models;Knowledge Graphs;Multi-modal learning", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/0ed680fcddf8e405c9f6e600f31592be83a77f47.zip", "author": "Yihe Deng;Chenchen Ye;Zijie Huang;Mingyu Derek Ma;Yiwen Kou;Wei Wang", "authorids": "~Yihe_Deng1;~Chenchen_Ye1;~Zijie_Huang1;~Mingyu_Derek_Ma1;~Yiwen_Kou1;~Wei_Wang13", "gender": "F;F;F;M;F;F", "homepage": ";https://yecchen.github.io/;https://zijieh.github.io/;https://derek.ma;https://evankou.github.io/;http://www.cs.ucla.edu/~weiwang", "dblp": "230/8011;263/7265-1;246/8147-2;232/6526;323/9058;w/WeiWang.html", "google_scholar": "7Lix1poAAAAJ;;SejA1zsAAAAJ;6tXXg6AAAAAJ;https://scholar.google.com/citations?hl=en;UedS9LQAAAAJ", "orcid": ";0009-0007-5318-2265;;;;0000-0002-8180-2886", "linkedin": ";;zijie-huang-62514a177/;;yiwen-kou-5a444916b/;wei-wang-8800845/", "or_profile": "~Yihe_Deng1;~Chenchen_Ye1;~Zijie_Huang1;~Mingyu_Derek_Ma1;~Yiwen_Kou1;~Wei_Wang13", "aff": "University of California, Los Angeles;University of California, Los Angeles;University of California, Los Angeles;University of California, Los Angeles;University of California, Los Angeles;University of California, Los Angeles", "aff_domain": "ucla.edu;ucla.edu;cs.ucla.edu;ucla.edu;ucla.edu;ucla.edu", "position": "PhD student;PhD student;PhD student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\ndeng2024graphvis,\ntitle={GraphVis: Boosting {LLM}s with Visual Knowledge Graph Integration},\nauthor={Yihe Deng and Chenchen Ye and Zijie Huang and Mingyu Derek Ma and Yiwen Kou and Wei Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=haVPmN8UGi}\n}", "github": "", "reviewers": "GUVR;Tf8h;omgG;2Am7", "pdf_size": 3637867, "rating": "5;6;6;6", "confidence": "3;3;4;4", "soundness": "2;3;3;3", "novelty": "3;3;3;2", "presentation": "3;3;3;3", "wc_summary": "155;28;79;51", "wc_strengths": "191;18;69;67", "wc_weaknesses": "245;93;132;99", "wc_questions": "147;12;4;4", "wc_limitations": "6;8;1;1", "wc_review": "744;159;285;222", "wc_reply_reviewers": "58;14;10;9", "wc_reply_authors": "327;12;185;11", "reply_reviewers": "1;1;1;1", "reply_authors": "4;2;3;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 78.25, 47.8506792010312 ], "wc_strengths_avg": [ 86.25, 63.83327893818396 ], "wc_weaknesses_avg": [ 142.25, 61.15298439160594 ], "wc_questions_avg": [ 41.75, 60.85382075104241 ], "wc_limitations_avg": [ 4.0, 3.082207001484488 ], "wc_review_avg": [ 352.5, 230.38066325106368 ], "wc_reply_reviewers_avg": [ 22.75, 20.437404434027332 ], "wc_reply_authors_avg": [ 133.75, 132.15781286023162 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17441902318322479499&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": "ucla.edu;ucla.edu;cs.ucla.edu;ucla.edu;ucla.edu;ucla.edu", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "University of California, Los Angeles", "aff_unique_dep": "", "aff_unique_url": "https://www.ucla.edu", "aff_unique_abbr": "UCLA", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Piecewise-Stationary Bandits with Knapsacks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94054", "id": "haa457jwjw", "proceeding": "", "pdf": "https://openreview.net/pdf?id=haa457jwjw", "openreview": "https://openreview.net/forum?id=haa457jwjw", "poster": "/media/PosterPDFs/NeurIPS%202024/94054.png?t=1731235221.678999", "project": "", "author_site": "Xilin Zhang, Wang Chi Cheung", "tldr": "", "abstract": "We study Bandits with Knapsacks (Bwk) in a piecewise-stationary environment. We propose a novel inventory reserving algorithm which draws new insights into the problem. Suppose parameters $\\eta_{\\min}, \\eta_{\\max} \\in (0,1]$ respectively lower and upper bound the reward earned and the resources consumed in a time round. Our algorithm achieves a provably near-optimal competitive ratio of $O(\\log(\\eta_{\\max}/\\eta_{\\min}))$, with a matching lower bound provided. Our performance guarantee is based on a dynamic benchmark, distinguishing our work from existing works on adversarial Bwk who compare with the static benchmark. Furthermore, different from existing non-stationary Bwk work, we do not require a bounded global variation.", "keywords": "non-stationary bandits;bandits with knapsacks;competitive ratio", "primary_area": "bandits", "supplementary_material": "", "author": "Xilin Zhang;Wang Chi Cheung", "authorids": "~Xilin_Zhang2;~Wang_Chi_Cheung1", "gender": "Not Specified;", "homepage": ";", "dblp": ";", "google_scholar": ";", "orcid": "0009-0008-8351-7262;", "linkedin": ";", "or_profile": "~Xilin_Zhang2;~Wang_Chi_Cheung1", "aff": "National University of Singapore;", "aff_domain": "nus.edu.sg;", "position": "Researcher;", "bibtex": "@inproceedings{\nzhang2024piecewisestationary,\ntitle={Piecewise-Stationary Bandits with Knapsacks},\nauthor={Xilin Zhang and Wang Chi Cheung},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=haa457jwjw}\n}", "github": "", "reviewers": "engA;ifdP;betX;SDDb;BBCY", "pdf_size": 511182, "rating": "5;5;6;6;6", "confidence": "4;4;3;2;3", "soundness": "3;2;3;3;3", "novelty": "2;2;3;3;3", "presentation": "3;3;4;2;2", "wc_summary": "60;70;45;39;114", "wc_strengths": "24;62;52;63;75", "wc_weaknesses": "29;153;2;34;105", "wc_questions": "306;67;33;49;44", "wc_limitations": "1;2;1;13;1", "wc_review": "420;354;133;198;339", "wc_reply_reviewers": "56;0;0;31;200", "wc_reply_authors": "2084;667;252;627;1441", "reply_reviewers": "1;0;0;1;2", "reply_authors": "6;2;2;2;4", "rating_avg": [ 5.6, 0.48989794855663565 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 65.6, 26.552589327596657 ], "wc_strengths_avg": [ 55.2, 17.22091751330341 ], "wc_weaknesses_avg": [ 64.6, 55.80179208591782 ], "wc_questions_avg": [ 99.8, 103.68297835228306 ], "wc_limitations_avg": [ 3.6, 4.715930449020639 ], "wc_review_avg": [ 288.8, 106.30409211314492 ], "wc_reply_reviewers_avg": [ 57.4, 74.32792207508561 ], "wc_reply_authors_avg": [ 1014.2, 660.0076969248162 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 3.2, 1.6 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.8728715609439693, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7609963414713753361&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": "nus.edu.sg;", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "National University of Singapore", "aff_unique_dep": "", "aff_unique_url": "https://www.nus.edu.sg", "aff_unique_abbr": "NUS", "aff_country_unique_index": "0", "aff_country_unique": "Singapore" }, { "title": "Long-range Meta-path Search on Large-scale Heterogeneous Graphs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94053", "id": "hbOWLtJNMK", "proceeding": "", "pdf": "https://openreview.net/pdf?id=hbOWLtJNMK", "openreview": "https://openreview.net/forum?id=hbOWLtJNMK", "poster": "/media/PosterPDFs/NeurIPS%202024/94053.png?t=1730079168.7830799", "project": "", "author_site": "Chao Li, Zijie Guo, qiuting he, Kun He", "tldr": "", "abstract": "Utilizing long-range dependency, a concept extensively studied in homogeneous graphs, remains underexplored in heterogeneous graphs, especially on large ones, posing two significant challenges: Reducing computational costs while maximizing effective information utilization in the presence of heterogeneity, and overcoming the over-smoothing issue in graph neural networks. To address this gap, we investigate the importance of different meta-paths and introduce \nan automatic framework for utilizing long-range dependency on heterogeneous graphs, denoted as Long-range Meta-path Search through Progressive Sampling (LMSPS). Specifically, we develop a search space with all meta-paths related to the target node type. By employing a progressive sampling algorithm, LMSPS dynamically shrinks the search space with hop-independent time complexity. Through a sampling evaluation strategy, LMSPS conducts a specialized and effective meta-path selection, leading to retraining with only effective meta-paths, thus mitigating costs and over-smoothing. Extensive experiments across diverse heterogeneous datasets validate LMSPS's capability in discovering effective long-range meta-paths, surpassing state-of-the-art methods. Our code is available at https://github.com/JHL-HUST/LMSPS.", "keywords": "Heterogeneous graph;long-range dependency;meta-path;sampling", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Chao Li;Zijie Guo;Qiuting He;Kun He", "authorids": "~Chao_Li14;~Zijie_Guo1;~Qiuting_He1;~Kun_He1", "gender": "M;;M;F", "homepage": "https://github.com/lichaoaaron;;;http://faculty.hust.edu.cn/hekun/zh_CN/more/1411001/jsjjgd/index.htm", "dblp": ";;https://dblp.uni-trier.de/pid/322/9335.html;59/1028-1", "google_scholar": "Wik8bkIAAAAJ;;;YTQnGJsAAAAJ", "orcid": "0000-0001-9066-1440;;;0000-0001-7627-4604", "linkedin": ";;;", "or_profile": "~Chao_Li14;~Zijie_Guo1;~Qiuting_He1;~Kun_He1", "aff": "Huazhong University of Science and Technology;;;Huazhong University of Sceince and Technology", "aff_domain": "hust.edu.cn;;;hust.edu.cn", "position": "PhD student;;;Full Professor", "bibtex": "@inproceedings{\nli2024longrange,\ntitle={Long-range Meta-path Search on Large-scale Heterogeneous Graphs},\nauthor={Chao Li and Zijie Guo and Qiuting He and Kun He},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=hbOWLtJNMK}\n}", "github": "", "reviewers": "r1gQ;dJZo;71Wo;msas", "pdf_size": 801729, "rating": "3;5;7;7", "confidence": "4;4;4;4", "soundness": "2;2;3;3", "novelty": "2;2;3;3", "presentation": "3;3;4;4", "wc_summary": "48;37;65;140", "wc_strengths": "30;34;80;74", "wc_weaknesses": "37;207;80;185", "wc_questions": "255;29;2;5", "wc_limitations": "1;19;1;10", "wc_review": "371;326;228;414", "wc_reply_reviewers": "322;177;0;31", "wc_reply_authors": "1134;886;0;15", "reply_reviewers": "1;1;0;1", "reply_authors": "3;3;1;2", "rating_avg": [ 5.5, 1.6583123951777 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 72.5, 40.22747817102136 ], "wc_strengths_avg": [ 54.5, 22.643983748448505 ], "wc_weaknesses_avg": [ 127.25, 70.83916642649037 ], "wc_questions_avg": [ 72.75, 105.74113438014555 ], "wc_limitations_avg": [ 7.75, 7.46240577829965 ], "wc_review_avg": [ 334.75, 69.04120146695016 ], "wc_reply_reviewers_avg": [ 132.5, 128.20783907390376 ], "wc_reply_authors_avg": [ 508.75, 508.8886788876326 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1180950756383037870&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 3, "email": "hust.edu.cn;;;hust.edu.cn", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Huazhong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.hust.edu.cn", "aff_unique_abbr": "HUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "hc9GkYw9kR", "title": "LORA-MOO: Learning Ordinal Relations and Angles for Expensive Many-Objective Optimization", "track": "main", "status": "Reject", "tldr": "", "abstract": "Many-objective optimization (MOO) simultaneously optimizes many conflicting objectives to identify the Pareto front - a set of diverse solutions that represent different optimal balances between conflicting objectives. For expensive MOO problems, due to their costly function evaluations, computationally cheap surrogates have been widely used in MOO to save evaluation budget. However, as the number of objectives increases, the cost of learning and surrogation, as well as the difficulty of maintaining solution diversity, increases rapidly. In this paper, we propose LORA-MOO, a surrogate-assisted MOO algorithm that learns surrogates from spherical coordinates. This includes an ordinal-regression-based surrogate for convergence and $M-1$ regression-based surrogates for diversity. $M$ is the number of objectives. Such a surrogate modeling method makes it possible to use a single ordinal surrogate to do the surrogate-assisted search, and the remaining surrogates are used to select solution for expensive evaluations, which enhances the optimization efficiency. The ordinal regression surrogate is developed to predict ordinal relation values as radial coordinates, estimating how desirable the candidate solutions are in terms of convergence. The solution diversity is maintained via angles between solutions, which is a parameter-free. Experimental results show that LORA-MOO significantly outperforms other surrogate-assisted MOO methods on most MOO benchmark problems and real-world applications.", "keywords": "Expensive optimization;many-objective optimization;Gaussian Processes.", "primary_area": "optimization", "supplementary_material": "", "author": "Xunzhao Yu;Miqing Li", "authorids": "~Xunzhao_Yu1;~Miqing_Li1", "gender": ";M", "homepage": ";https://sites.google.com/view/miqing-li", "dblp": ";05/3393", "google_scholar": ";h8UksmEAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Xunzhao_Yu1;~Miqing_Li1", "aff": ";University of Birmingham", "aff_domain": ";bham.ac.uk", "position": ";Associate Professor", "bibtex": "@misc{\nanonymous2024loramoo,\ntitle={{LORA}-{MOO}: Learning Ordinal Relations and Angles for Expensive Many-Objective Optimization},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=hc9GkYw9kR}\n}", "github": "", "project": "", "reviewers": "wux8;97CT;pAi2;TSjm", "site": "https://openreview.net/forum?id=hc9GkYw9kR", "pdf_size": 4717084, "rating": "3;4;4;4", "confidence": "4;5;4;4", "soundness": "2;3;2;3", "novelty": "2;2;2;2", "presentation": "1;3;3;4", "wc_summary": "37;31;43;95", "wc_strengths": "17;6;63;82", "wc_weaknesses": "56;134;354;234", "wc_questions": "480;2;287;24", "wc_limitations": "2;2;24;1", "wc_review": "592;175;771;436", "wc_reply_reviewers": "27;11;437;45", "wc_reply_authors": "541;0;683;309", "reply_reviewers": "1;1;4;1", "reply_authors": "2;1;4;2", "rating_avg": [ 3.75, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 51.5, 25.470571253900058 ], "wc_strengths_avg": [ 42.0, 31.47220996371243 ], "wc_weaknesses_avg": [ 194.5, 111.62772952989772 ], "wc_questions_avg": [ 198.25, 197.57071518825862 ], "wc_limitations_avg": [ 7.25, 9.67923034130297 ], "wc_review_avg": [ 493.5, 218.77899807796908 ], "wc_reply_reviewers_avg": [ 130.0, 177.65415840897168 ], "wc_reply_authors_avg": [ 383.25, 258.4224980530913 ], "reply_reviewers_avg": [ 1.75, 1.299038105676658 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:CyLfKZl_DCIJ:scholar.google.com/&scioq=LORA-MOO:+Learning+Ordinal+Relations+and+Angles+for+Expensive+Many-Objective+Optimization&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0", "aff_unique_norm": "University of Birmingham", "aff_unique_dep": "", "aff_unique_url": "https://www.birmingham.ac.uk", "aff_unique_abbr": "Birmingham", "aff_country_unique_index": "0", "aff_country_unique": "United Kingdom" }, { "title": "BetterBench: Assessing AI Benchmarks, Uncovering Issues, and Establishing Best Practices", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97566", "id": "hcOq2buakM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=hcOq2buakM", "openreview": "https://openreview.net/forum?id=hcOq2buakM", "poster": "", "project": "", "author_site": "Anka Reuel-Lamparth, Amelia Hardy`, Chandler Smith, Max Lamparth, Mykel J Kochenderfer", "tldr": "", "abstract": "AI models are increasingly prevalent in high-stakes environments, necessitating thorough assessment of their capabilities and risks. Benchmarks are popular for measuring these attributes and for comparing model performance, tracking progress, and identifying weaknesses in foundation and non-foundation models. They can inform model selection for downstream tasks and influence policy initiatives. However, not all benchmarks are the same: their quality depends on their design and usability. In this paper, we develop an assessment framework considering 40 best practices across a benchmark's life cycle and evaluate 25 AI benchmarks against it. We find that there exist large quality differences and that commonly used benchmarks suffer from significant issues. We further find that most benchmarks do not report statistical significance of their results nor can results be easily replicated. To support benchmark developers in aligning with best practices, we provide a checklist for minimum quality assurance based on our assessment. We also develop a living repository of benchmark assessments to support benchmark comparability.", "keywords": "benchmarking;assessment;best practices;evaluation;benchmark", "primary_area": "", "supplementary_material": "/attachment/a2de234c3cdeaf4a37b8ad7c4712bcdbc5839518.zip", "author": "Anka Reuel;Amelia Hardy;Chandler Smith;Max Lamparth;Malcolm Hardy;Mykel Kochenderfer", "authorids": "~Anka_Reuel1;~Amelia_Hardy1;~Chandler_Smith1;~Max_Lamparth1;~Malcolm_Hardy1;~Mykel_Kochenderfer1", "gender": "F;M;;;M;F", "homepage": "https://www.linkedin.com/in/ameliahardy/;https://chandlersmith.me/;https://www.maxlamparth.com;;https://mykel.kochenderfer.com;http://www.ankareuel.com", "dblp": ";;326/6752;;34/2029.html;", "google_scholar": "KPkdQb4AAAAJ;MW32guUAAAAJ;snC9WF4AAAAJ;;cAy9G6oAAAAJ;", "orcid": ";;0000-0001-6405-513X;;0000-0002-7238-9663;", "linkedin": ";chandlerdsmith;maxlamparth;malcolm-hardy-8a2461329/;mykel-kochenderfer;", "or_profile": "~Amelia_Hardy1;~Chandler_Smith1;~Max_Lamparth1;~Malcolm_Hardy1;~Mykel_Kochenderfer1;~Ann-Katrin_Reuel1", "aff": "Stanford University;Machine Learning Alignment and Theory Scholars;Stanford University;University of Southern California;Stanford University;Stanford University", "aff_domain": "stanford.edu;matsprogram.com;stanford.edu;usc.edu;stanford.edu;stanford.edu", "position": "MS student;Researcher;Postdoc;Undergrad student;Associate Professor;PhD student", "bibtex": "@inproceedings{\nreuel2024betterbench,\ntitle={BetterBench: Assessing {AI} Benchmarks, Uncovering Issues, and Establishing Best Practices},\nauthor={Anka Reuel and Amelia Hardy and Chandler Smith and Max Lamparth and Malcolm Hardy and Mykel Kochenderfer},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=hcOq2buakM}\n}", "github": "", "reviewers": "X3U4;2Wd9;fLHh", "pdf_size": 883722, "rating": "4;7;7", "confidence": "5;5;3", "wc_summary_and_contributions": "107;37;39", "wc_strengths": "3;36;77", "wc_improvement": "3;16;354", "wc_limitations": "3;1;18", "wc_correctness": "3;1;12", "wc_clarity": "3;1;12", "wc_relation_to_prior_work": "3;1;71", "wc_documentation": "30;1;70", "wc_additional_feedback": "1;1;1", "wc_review": "156;95;654", "wc_reply_reviewers": "0;0;40", "wc_reply_authors": "70;70;143", "reply_reviewers": "0;0;1", "reply_authors": "3;2;3", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 4.333333333333333, 0.9428090415820634 ], "wc_summary_and_contributions_avg": [ 61.0, 32.53715824509981 ], "wc_strengths_avg": [ 38.666666666666664, 30.26916289265731 ], "wc_improvement_avg": [ 124.33333333333333, 162.48555491352317 ], "wc_limitations_avg": [ 7.333333333333333, 7.586537784494029 ], "wc_correctness_avg": [ 5.333333333333333, 4.784233364802441 ], "wc_clarity_avg": [ 5.333333333333333, 4.784233364802441 ], "wc_relation_to_prior_work_avg": [ 25.0, 32.53715824509981 ], "wc_documentation_avg": [ 33.666666666666664, 28.288199345702832 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 301.6666666666667, 250.37882409572012 ], "wc_reply_reviewers_avg": [ 13.333333333333334, 18.856180831641264 ], "wc_reply_authors_avg": [ 94.33333333333333, 34.41253001774531 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3277356671138817320&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "stanford.edu;matsprogram.com;stanford.edu;usc.edu;stanford.edu;stanford.edu", "author_num": 6, "aff_unique_index": "0;1;0;2;0;0", "aff_unique_norm": "Stanford University;Machine Learning Alignment and Theory Scholars;University of Southern California", "aff_unique_dep": ";;", "aff_unique_url": "https://www.stanford.edu;;https://www.usc.edu", "aff_unique_abbr": "Stanford;;USC", "aff_campus_unique_index": "0;0;2;0;0", "aff_campus_unique": "Stanford;;Los Angeles", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States;" }, { "title": "Indoor Air Quality Dataset with Activities of Daily Living in Low to Middle-income Communities", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97565", "id": "hceKrY4dfC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=hceKrY4dfC", "openreview": "https://openreview.net/forum?id=hceKrY4dfC", "poster": "/media/PosterPDFs/NeurIPS%202024/97565.png?t=1731606168.943651", "project": "", "author_site": "Prasenjit Karmakar, Swadhin Pradhan, Sandip Chakraborty", "tldr": "", "abstract": "In recent years, indoor air pollution has posed a significant threat to our society, claiming over 3.2 million lives annually. Developing nations, such as India, are most affected since lack of knowledge, inadequate regulation, and outdoor air pollution lead to severe daily exposure to pollutants. However, only a limited number of studies have attempted to understand how indoor air pollution affects developing countries like India. To address this gap, we present spatiotemporal measurements of air quality from 30 indoor sites over six months during summer and winter seasons. The sites are geographically located across four regions of type: rural, suburban, and urban, covering the typical low to middle-income population in India. The dataset contains various types of indoor environments (e.g., studio apartments, classrooms, research laboratories, food canteens, and residential households), and can provide the basis for data-driven learning model research aimed at coping with unique pollution patterns in developing countries. This unique dataset demands advanced data cleaning and imputation techniques for handling missing data due to power failure or network outages during data collection. Furthermore, through a simple speech-to-text application, we provide real-time indoor activity labels annotated by occupants. Therefore, environmentalists and ML enthusiasts can utilize this dataset to understand the complex patterns of the pollutants under different indoor activities, identify recurring sources of pollution, forecast exposure, improve floor plans and room structures of modern indoor designs, develop pollution-aware recommender systems, etc.", "keywords": "Indoor Air Quality Dataset;Developing Countries;Activities of Daily Living", "primary_area": "", "supplementary_material": "", "author": "Prasenjit Karmakar;Swadhin Pradhan;Sandip Chakraborty", "authorids": "~Prasenjit_Karmakar2;~Swadhin_Pradhan1;~Sandip_Chakraborty1", "gender": "M;M;M", "homepage": "https://prasenjit52282.github.io/;https://www.swadhinpradhan.com/;http://cse.iitkgp.ac.in/~sandipc/", "dblp": "160/8336.html;;28/9571", "google_scholar": "uK96UkwAAAAJ;https://scholar.google.co.in/citations?user=YT4IhEIAAAAJ;https://scholar.google.com.tw/citations?user=dEpbTokAAAAJ", "orcid": "0000-0002-7345-1406;;", "linkedin": "prasenjit52282/;;", "or_profile": "~Prasenjit_Karmakar2;~Swadhin_Pradhan1;~Sandip_Chakraborty1", "aff": "Indian Institute of Technology Kharagpur;Cisco Systems ;Indian Institute of Technology Kharagpur", "aff_domain": "iitkgp.ac.in;cisco.com;iitkgp.ac.in", "position": "PhD student;Researcher;Associate Professor", "bibtex": "@inproceedings{\nkarmakar2024indoor,\ntitle={Indoor Air Quality Dataset with Activities of Daily Living in Low to Middle-income Communities},\nauthor={Prasenjit Karmakar and Swadhin Pradhan and Sandip Chakraborty},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=hceKrY4dfC}\n}", "github": "", "reviewers": "cXtY;97sg;2kUv;TNmg", "pdf_size": 13584747, "rating": "6;7;7;7", "confidence": "3;4;4;4", "wc_summary_and_contributions": "87;50;139;71", "wc_strengths": "6;85;74;82", "wc_improvement": "284;179;241;90", "wc_limitations": "32;24;8;17", "wc_correctness": "14;89;1;20", "wc_clarity": "1;17;1;9", "wc_relation_to_prior_work": "2;30;10;7", "wc_documentation": "5;4;1;23", "wc_additional_feedback": "1;1;1;1", "wc_review": "432;479;476;320", "wc_reply_reviewers": "109;23;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "2;2;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 86.75, 32.89661836724255 ], "wc_strengths_avg": [ 61.75, 32.437439787998066 ], "wc_improvement_avg": [ 198.5, 72.91947613635195 ], "wc_limitations_avg": [ 20.25, 8.842369591913696 ], "wc_correctness_avg": [ 31.0, 34.18332927027442 ], "wc_clarity_avg": [ 7.0, 6.6332495807108 ], "wc_relation_to_prior_work_avg": [ 12.25, 10.638961415476606 ], "wc_documentation_avg": [ 8.25, 8.642193008721803 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 426.75, 64.37924743269372 ], "wc_reply_reviewers_avg": [ 33.0, 44.87204029236914 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16497287580629152449&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 4, "email": "iitkgp.ac.in;cisco.com;iitkgp.ac.in", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Indian Institute of Technology Kharagpur;Cisco Systems", "aff_unique_dep": ";", "aff_unique_url": "https://www.iitkgp.ac.in;https://www.cisco.com", "aff_unique_abbr": "IIT Kharagpur;Cisco", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Kharagpur;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "India;United States" }, { "title": "Happy: A Debiased Learning Framework for Continual Generalized Category Discovery", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94052", "id": "hdUCZiMkFO", "proceeding": "", "pdf": "https://openreview.net/pdf?id=hdUCZiMkFO", "openreview": "https://openreview.net/forum?id=hdUCZiMkFO", "poster": "/media/PosterPDFs/NeurIPS%202024/94052.png?t=1731217376.7110276", "project": "", "author_site": "Shijie Ma, Fei Zhu, Zhun Zhong, Wenzhuo Liu, Xu-yao Zhang, Cheng-lin Liu", "tldr": "", "abstract": "Constantly discovering novel concepts is crucial in evolving environments. This paper explores the underexplored task of Continual Generalized Category Discovery (C-GCD), which aims to incrementally discover new classes from *unlabeled* data while maintaining the ability to recognize previously learned classes. Although several settings are proposed to study the C-GCD task, they have limitations that do not reflect real-world scenarios. We thus study a more practical C-GCD setting, which includes more new classes to be discovered over a longer period, without storing samples of past classes. In C-GCD, the model is initially trained on labeled data of known classes, followed by multiple incremental stages where the model is fed with unlabeled data containing both old and new classes. The core challenge involves two conflicting objectives: discover new classes and prevent forgetting old ones. We delve into the conflicts and identify that models are susceptible to *prediction bias* and *hardness bias*. To address these issues, we introduce a debiased learning framework, namely **Happy**, characterized by **H**ardness-**a**ware **p**rototype sampling and soft entro**py** regularization. For the *prediction bias*, we first introduce clustering-guided initialization to provide robust features. In addition, we propose soft entropy regularization to assign appropriate probabilities to new classes, which can significantly enhance the clustering performance of new classes. For the *harness bias*, we present the hardness-aware prototype sampling, which can effectively reduce the forgetting issue for previously seen classes, especially for difficult classes. Experimental results demonstrate our method proficiently manages the conflicts of C-GCD and achieves remarkable performance across various datasets, e.g., 7.5% overall gains on ImageNet-100. Our code is publicly available at https://github.com/mashijie1028/Happy-CGCD.", "keywords": "novel category discovery;generalized category discovery;continual category discovery;continual generalized category discovery", "primary_area": "machine_vision", "supplementary_material": "", "author": "Shijie Ma;Fei Zhu;Zhun Zhong;Wenzhuo Liu;Xu-Yao Zhang;Cheng-Lin Liu", "authorids": "~Shijie_Ma1;~Fei_Zhu1;~Zhun_Zhong1;~Wenzhuo_Liu5;~Xu-Yao_Zhang3;~Cheng-Lin_Liu2", "gender": "M;M;M;;;", "homepage": "https://mashijie1028.github.io/;http://www.nlpr.ia.ac.cn/pal/People/ZhuFei.html;http://zhunzhong.site;;;", "dblp": "191/4553;;32/6525;;;", "google_scholar": "https://scholar.google.com/citations?hl=en;fjZ1CBwAAAAJ;nZizkQ0AAAAJ;;;", "orcid": "0009-0005-1131-5686;;;;;", "linkedin": ";;;;;", "or_profile": "~Shijie_Ma1;~Fei_Zhu1;~Zhun_Zhong1;~Wenzhuo_Liu5;~Xu-Yao_Zhang3;~Cheng-Lin_Liu2", "aff": "Institute of Automation, Chinese Academy of Sciences;Centre for Artificial Intelligence and Robotics Hong Kong Institute of Science & Innovation, Chinese Academy of Sciences;University of Nottingham;;;", "aff_domain": "ia.ac.cn;cair-cas.org.hk;nottingham.ac.uk;;;", "position": "PhD student;Postdoc;Assistant Professor;;;", "bibtex": "@inproceedings{\nma2024happy,\ntitle={Happy: A Debiased Learning Framework for Continual Generalized Category Discovery},\nauthor={Shijie Ma and Fei Zhu and Zhun Zhong and Wenzhuo Liu and Xu-Yao Zhang and Cheng-Lin Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=hdUCZiMkFO}\n}", "github": "", "reviewers": "cVL6;TLvc;TDQJ;jUDg", "pdf_size": 1242079, "rating": "4;5;5;6", "confidence": "3;4;5;5", "soundness": "3;3;3;4", "novelty": "2;2;3;3", "presentation": "2;4;3;4", "wc_summary": "86;48;111;56", "wc_strengths": "76;29;251;61", "wc_weaknesses": "102;192;228;108", "wc_questions": "61;19;198;4", "wc_limitations": "1;1;194;1", "wc_review": "326;289;982;230", "wc_reply_reviewers": "69;13;21;106", "wc_reply_authors": "1199;540;542;412", "reply_reviewers": "2;1;1;2", "reply_authors": "5;4;4;3", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 75.25, 25.033727249452888 ], "wc_strengths_avg": [ 104.25, 86.40999652817953 ], "wc_weaknesses_avg": [ 157.5, 54.0624638728203 ], "wc_questions_avg": [ 70.5, 76.51960533092156 ], "wc_limitations_avg": [ 49.25, 83.57145146519834 ], "wc_review_avg": [ 456.75, 305.1797625990295 ], "wc_reply_reviewers_avg": [ 52.25, 37.7052715147365 ], "wc_reply_authors_avg": [ 673.25, 308.0774050461994 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 4.0, 0.7071067811865476 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.8528028654224418, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16696623051778876310&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "ia.ac.cn;cair-cas.org.hk;nottingham.ac.uk;;;", "author_num": 6, "aff_unique_index": "0;1;2", "aff_unique_norm": "Chinese Academy of Sciences;Hong Kong Institute of Science & Innovation;University of Nottingham", "aff_unique_dep": "Institute of Automation;Centre for Artificial Intelligence and Robotics;", "aff_unique_url": "http://www.ia.cas.cn;http://www.hkisi.org;https://www.nottingham.ac.uk", "aff_unique_abbr": "CAS;HKISI;UoN", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong", "aff_country_unique_index": "0;0;1", "aff_country_unique": "China;United Kingdom" }, { "title": "DenseFusion-1M: Merging Vision Experts for Comprehensive Multimodal Perception", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97564", "id": "hej9QGCHT6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=hej9QGCHT6", "openreview": "https://openreview.net/forum?id=hej9QGCHT6", "poster": "/media/PosterPDFs/NeurIPS%202024/97564.png?t=1731726232.586316", "project": "", "author_site": "Xiaotong Li, Fan Zhang, Haiwen Diao, Yueze Wang, Xinlong Wang, LINGYU DUAN", "tldr": "", "abstract": "Existing Multimodal Large Language Models (MLLMs) increasingly emphasize complex understanding of various visual elements, including multiple objects, text information, spatial relations. Their development for comprehensive visual perception hinges on the availability of high-quality image-text datasets that offer diverse visual elements and throughout image descriptions. However, the scarcity of such hyper-detailed datasets currently hinders progress within the MLLM community. The bottleneck stems from the limited perceptual capabilities of current caption engines, which fall short in providing complete and accurate annotations. To facilitate the cutting-edge research of MLLMs on comprehensive vision perception, we thereby propose Perceptual Fusion, using a low-budget but highly effective caption engine for complete and accurate image descriptions. Specifically, Perceptual Fusion integrates diverse perception experts as image priors to provide explicit information on visual elements and adopts an efficient MLLM as a centric pivot to mimic advanced MLLMs' perception abilities. We carefully select 1M highly representative images from uncurated LAION dataset and generate dense descriptions using our engine, dubbed DenseFusion-1M. Extensive experiments validate that our engine outperforms its counterparts, where the resulting dataset significantly improves the perception and cognition abilities of existing MLLMs across diverse vision-language benchmarks, especially with high-resolution images as inputs. The code and dataset are available at https://huggingface.co/datasets/BAAI/DenseFusion-1M.", "keywords": "Multimodal Large Language Models;Hyper-detailed Image Descriptions;Perception Models", "primary_area": "", "supplementary_material": "/attachment/87fe97c29dff9e71b8d02f44f57aca66cfd32552.pdf", "author": "Xiaotong Li;Fan Zhang;Haiwen Diao;Yueze Wang;Xinlong Wang;LINGYU DUAN", "authorids": "~Xiaotong_Li2;~Fan_Zhang15;~Haiwen_Diao2;~Yueze_Wang1;~Xinlong_Wang2;~LINGYU_DUAN1", "gender": "M;M;M;M;M;M", "homepage": "https://github.com/lixiaotong97;;https://paranioar.github.io/;https://yuezewang.github.io/;;http://eecs.pku.edu.cn/EN/People/Faculty/Detail/?ID=6096", "dblp": ";;283/4467;;;d/LingyuDuan", "google_scholar": "cpCE_T4AAAAJ;VsJ39HMAAAAJ;46eCjHQAAAAJ;;DPz0DjYAAAAJ;", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Xiaotong_Li2;~Fan_Zhang15;~Haiwen_Diao2;~Yueze_Wang1;~Xinlong_Wang2;~LINGYU_DUAN1", "aff": "Peking University;Beijing Academy of Artificial Intelligence;Dalian University of Technology;Beijing Academy of Artificial Intelligence;Beijing Academy of Artificial Intelligence;Peking University", "aff_domain": "pku.edu.cn;baai.ac.cn;dlut.edu.cn;baai.ac.cn;baai.ac.cn;pku.edu.cn", "position": "PhD student;Researcher;PhD student;Researcher;Researcher;Full Professor", "bibtex": "@inproceedings{\nli2024densefusionm,\ntitle={DenseFusion-1M: Merging Vision Experts for Comprehensive Multimodal Perception},\nauthor={Xiaotong Li and Fan Zhang and Haiwen Diao and Yueze Wang and Xinlong Wang and LINGYU DUAN},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=hej9QGCHT6}\n}", "github": "", "reviewers": "r1id;TaN1;BJnX", "pdf_size": 31660785, "rating": "6;7;8", "confidence": "3;3;3", "wc_summary_and_contributions": "55;79;48", "wc_strengths": "11;190;105", "wc_improvement": "3;123;67", "wc_limitations": "5;2;60", "wc_correctness": "2;1;1", "wc_clarity": "2;3;1", "wc_relation_to_prior_work": "2;9;1", "wc_documentation": "5;1;1", "wc_additional_feedback": "1;1;1", "wc_review": "86;409;285", "wc_reply_reviewers": "35;0;0", "wc_reply_authors": "173;0;0", "reply_reviewers": "1;0;0", "reply_authors": "3;1;1", "rating_avg": [ 7.0, 0.816496580927726 ], "confidence_avg": [ 3.0, 0.0 ], "wc_summary_and_contributions_avg": [ 60.666666666666664, 13.27487183449325 ], "wc_strengths_avg": [ 102.0, 73.10722718491427 ], "wc_improvement_avg": [ 64.33333333333333, 49.026070162267295 ], "wc_limitations_avg": [ 22.333333333333332, 26.662499674428297 ], "wc_correctness_avg": [ 1.3333333333333333, 0.4714045207910317 ], "wc_clarity_avg": [ 2.0, 0.816496580927726 ], "wc_relation_to_prior_work_avg": [ 4.0, 3.559026084010437 ], "wc_documentation_avg": [ 2.3333333333333335, 1.8856180831641267 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 260.0, 133.043852419669 ], "wc_reply_reviewers_avg": [ 11.666666666666666, 16.49915822768611 ], "wc_reply_authors_avg": [ 57.666666666666664, 81.55298209684848 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4695488418957794109&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "pku.edu.cn;baai.ac.cn;dlut.edu.cn;baai.ac.cn;baai.ac.cn;pku.edu.cn", "author_num": 6, "aff_unique_index": "0;1;2;1;1;0", "aff_unique_norm": "Peking University;Beijing Academy of Artificial Intelligence;Dalian University of Technology", "aff_unique_dep": ";;", "aff_unique_url": "http://www.pku.edu.cn;https://www.baaic.cn;http://www.dlut.edu.cn/", "aff_unique_abbr": "Peking U;BAAI;DUT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Policy-shaped prediction: avoiding distractions in model-based reinforcement learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94051", "id": "hgdh4foghu", "proceeding": "", "pdf": "https://openreview.net/pdf?id=hgdh4foghu", "openreview": "https://openreview.net/forum?id=hgdh4foghu", "poster": "", "project": "", "author_site": "Miles Hutson, Isaac Kauvar, Nick Haber", "tldr": "", "abstract": "Model-based reinforcement learning (MBRL) is a promising route to sample-efficient policy optimization. However, a known vulnerability of reconstruction-based MBRL consists of scenarios in which detailed aspects of the world are highly predictable, but irrelevant to learning a good policy. Such scenarios can lead the model to exhaust its capacity on meaningless content, at the cost of neglecting important environment dynamics. While existing approaches attempt to solve this problem, we highlight its continuing impact on leading MBRL methods ---including DreamerV3 and DreamerPro--- with a novel environment where background distractions are intricate, predictable, and useless for planning future actions. To address this challenge we develop a method for focusing the capacity of the world model through a synergy of a pretrained segmentation model, a task-aware reconstruction loss, and adversarial learning. Our method outperforms a variety of other approaches designed to reduce the impact of distractors, and is an advance towards robust model-based reinforcement learning.", "keywords": "machine learning;model based reinforcement learning;reinforcement learning;segment anything model", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Miles Richard Hutson;Isaac Kauvar;Nick Haber", "authorids": "~Miles_Richard_Hutson1;~Isaac_Kauvar1;~Nick_Haber1", "gender": "M;M;", "homepage": "https://www.mileshutson.com;https://www.ivk.io;", "dblp": ";171/6800;179/4983", "google_scholar": "4IFn6D4AAAAJ;;euNCoVYAAAAJ", "orcid": ";;0000-0001-8804-7804", "linkedin": ";;", "or_profile": "~Miles_Richard_Hutson1;~Isaac_Kauvar1;~Nick_Haber1", "aff": "Stanford University;Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu", "position": "MS student;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nhutson2024policyshaped,\ntitle={Policy-shaped prediction: avoiding distractions in model-based reinforcement learning},\nauthor={Miles Richard Hutson and Isaac Kauvar and Nick Haber},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=hgdh4foghu}\n}", "github": "", "reviewers": "7m6w;1AxW;8n3B;aBfS", "pdf_size": 4205542, "rating": "4;5;5;6", "confidence": "4;3;4;4", "soundness": "2;3;2;3", "novelty": "2;3;3;3", "presentation": "2;3;2;3", "wc_summary": "54;74;471;113", "wc_strengths": "43;60;2;95", "wc_weaknesses": "417;80;2;122", "wc_questions": "5;82;2;181", "wc_limitations": "6;42;2;15", "wc_review": "525;338;479;526", "wc_reply_reviewers": "24;473;84;91", "wc_reply_authors": "15;169;182;712", "reply_reviewers": "1;2;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 178.0, 170.4890025778789 ], "wc_strengths_avg": [ 50.0, 33.45893004864322 ], "wc_weaknesses_avg": [ 155.25, 157.13588864419228 ], "wc_questions_avg": [ 67.5, 72.95375247374189 ], "wc_limitations_avg": [ 16.25, 15.594470173750693 ], "wc_review_avg": [ 467.0, 76.86026281505939 ], "wc_reply_reviewers_avg": [ 168.0, 178.00702233338998 ], "wc_reply_authors_avg": [ 269.5, 263.786371899687 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:OY7BxDA_Nw0J:scholar.google.com/&scioq=Policy-shaped+prediction:+avoiding+distractions+in+model-based+reinforcement+learning&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "stanford.edu;stanford.edu;stanford.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "BuckTales: A multi-UAV dataset for multi-object tracking and re-identification of wild antelopes", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97563", "id": "hgl4dYE76J", "proceeding": "", "pdf": "https://openreview.net/pdf?id=hgl4dYE76J", "openreview": "https://openreview.net/forum?id=hgl4dYE76J", "poster": "/media/PosterPDFs/NeurIPS%202024/97563.png?t=1732805977.2113132", "project": "", "author_site": "Hemal Naik, Junran Yang, Dipin Das, Margaret Crofoot, Akanksha Rathore, Vivek Hari Sridhar", "tldr": "", "abstract": "Understanding animal behaviour is central to predicting, understanding, and miti-\ngating impacts of natural and anthropogenic changes on animal populations and\necosystems. However, the challenges of acquiring and processing long-term, eco-\nlogically relevant data in wild settings have constrained the scope of behavioural\nresearch. The increasing availability of Unmanned Aerial Vehicles (UAVs), cou-\npled with advances in machine learning, has opened new opportunities for wildlife\nmonitoring using aerial tracking. However, the limited availability of datasets with wild\nanimals in natural habitats has hindered progress in automated computer vision\nsolutions for long-term animal tracking. Here, we introduce the first large-scale\nUAV dataset designed to solve multi-object tracking (MOT) and re-identification\n(Re-ID) problem in wild animals, specifically the mating behaviour (or lekking) of\nblackbuck antelopes. Collected in collaboration with biologists, the MOT dataset\nincludes over 1.2 million annotations including 680 tracks across 12 high-resolution\n(5.4K) videos, each averaging 66 seconds and featuring 30 to 130 individuals. The\nRe-ID dataset includes 730 individuals captured with two UAVs simultaneously.\nThe dataset is designed to drive scalable, long-term animal behavior tracking using\nmultiple camera sensors. By providing baseline performance with two detectors,\nand benchmarking several state-of-the-art tracking methods, our dataset reflects the\nreal-world challenges of tracking wild animals in socially and ecologically relevant\ncontexts. In making these data widely available, we hope to catalyze progress in\nMOT and Re-ID for wild animals, fostering insights into animal behaviour, conser-\nvation efforts, and ecosystem dynamics through automated, long-term monitoring.", "keywords": "Dataset;MOT;Re-Identification;UAVs;Animals;Wild;Antelopes", "primary_area": "", "supplementary_material": "/attachment/300b118cb6365c27ca5f852a3cc3dfd032beb4a0.pdf", "author": "Hemal Naik;Junran Yang;Dipin Das;Margaret C Crofoot;Akanksha Rathore;Vivek H Sridhar", "authorids": "~Hemal_Naik1;~Junran_Yang1;~Dipin_Das1;~Margaret_C_Crofoot1;~Akanksha_Rathore1;~Vivek_H_Sridhar1", "gender": "M;;M;F;F;M", "homepage": "https://www.hemalnaik.com/;https://jyang.cc;https://dipindas.carrd.co/;;https://www.akanksharathore.in/;https://www.vivekhsridhar.com/", "dblp": "153/6278;;;;;", "google_scholar": "https://scholar.google.de/citations?user=iWIresYAAAAJ;;;https://scholar.google.de/citations?view_op=list_works;VX3vCvQAAAAJ;Lw6qdu8AAAAJ", "orcid": ";;;;;0000-0001-6658-2232", "linkedin": ";;;;akanksha-rathore-76a7818b/;", "or_profile": "~Hemal_Naik1;~Junran_Yang1;~Dipin_Das1;~Margaret_C_Crofoot1;~Akanksha_Rathore1;~Vivek_Hari_Sridhar1", "aff": "Max Planck Institute of Animal Behavior;Max Planck Institute of Animal Behavior;Max Planck Schools;Universit\u00e4t Konstanz;Max-Planck Institute;Universit\u00e4t Konstanz", "aff_domain": "ab.mpg.de;ab.mpg.de;maxplanckschools.de;uni-konstanz.de;mpg.de;uni-konstanz.de", "position": "Postdoc;Research Assistant;Researcher;Full Professor;Postdoc;Postdoc", "bibtex": "@inproceedings{\nnaik2024bucktales,\ntitle={BuckTales: A multi-{UAV} dataset for multi-object tracking and re-identification of wild antelopes},\nauthor={Hemal Naik and Junran Yang and Dipin Das and Margaret C Crofoot and Akanksha Rathore and Vivek H Sridhar},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=hgl4dYE76J}\n}", "github": "", "reviewers": "TLDH;odky;GyVH;PYY4", "pdf_size": 9457964, "rating": "5;5;7;8", "confidence": "3;4;4;3", "wc_summary_and_contributions": "44;99;53;103", "wc_strengths": "38;48;28;26", "wc_improvement": "104;210;12;54", "wc_limitations": "28;1;12;2", "wc_correctness": "21;6;34;1", "wc_clarity": "11;1;37;1", "wc_relation_to_prior_work": "43;1;27;1", "wc_documentation": "11;1;37;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "301;368;241;190", "wc_reply_reviewers": "0;0;60;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;2;1", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "wc_summary_and_contributions_avg": [ 74.75, 26.479945241635225 ], "wc_strengths_avg": [ 35.0, 8.774964387392123 ], "wc_improvement_avg": [ 95.0, 73.95268757793728 ], "wc_limitations_avg": [ 10.75, 10.848386976873567 ], "wc_correctness_avg": [ 15.5, 12.971121771072847 ], "wc_clarity_avg": [ 12.5, 14.722431864335457 ], "wc_relation_to_prior_work_avg": [ 18.0, 17.916472867168917 ], "wc_documentation_avg": [ 12.5, 14.722431864335457 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 275.0, 66.53194721335007 ], "wc_reply_reviewers_avg": [ 15.0, 25.98076211353316 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.19245008972987526, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10263416561610124830&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "ab.mpg.de;ab.mpg.de;maxplanckschools.de;uni-konstanz.de;mpg.de;uni-konstanz.de", "author_num": 6, "aff_unique_index": "0;0;1;2;3;2", "aff_unique_norm": "Max Planck Institute of Animal Behavior;Max Planck Society;Universit\u00e4t Konstanz;Max-Planck-Gesellschaft zur F\u00f6rderung der Wissenschaften e.V.", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.awesomeanimalscience.org/;https://www.mpg.de;https://www.uni-konstanz.de;https://www.mpg.de", "aff_unique_abbr": "MPIAB;MPG;Uni Konstanz;MPG", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "Germany" }, { "title": "Inverse M-Kernels for Linear Universal Approximators of Non-Negative Functions", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94050", "id": "hgsS4onO4s", "proceeding": "", "pdf": "https://openreview.net/pdf?id=hgsS4onO4s", "openreview": "https://openreview.net/forum?id=hgsS4onO4s", "poster": "/media/PosterPDFs/NeurIPS%202024/94050.png?t=1732609575.5024526", "project": "", "tldr": "", "abstract": "Kernel methods are widely utilized in machine learning field to learn, from training data, a latent function in a reproducing kernel Hilbert space. It is well known that the approximator thus obtained usually achieves a linear representation, which brings various computational benefits, while maintaining great representation power (i.e., universal approximation). However, when non-negativity constraints are imposed on the function's outputs, the literature usually takes the kernel method-based approximators as offering linear representations at the expense of limited model flexibility or good representation power by allowing for their nonlinear forms. The main contribution of this paper is to derive a sufficient condition for a positive definite kernel so that it may construct flexible and linear approximators of non-negative functions. We call a kernel function that offers these attributes an *inverse M-kernel*; it is reminiscent of the inverse M-matrix. Furthermore, we show that for a one-dimensional input space, universal exponential/Abel kernels are inverse M-kernels and construct linear universal approximators of non-negative functions. To the best of our knowledge, it is the first time that the existence of linear universal approximators of non-negative functions has been elucidated. We confirm the effectiveness of our results by experiments on the problems of non-negativity-constrained regression, density estimation, and intensity estimation. Finally, we discuss issues and perspectives on multi-dimensional input settings.", "keywords": "kernel method;m-matrix;non-negative function;intensity estimation;density estimation", "primary_area": "learning_theory", "supplementary_material": "", "author": "Hideaki Kim", "authorids": "~Hideaki_Kim1", "gender": "M", "homepage": "", "dblp": "41/10856", "google_scholar": "", "orcid": "", "linkedin": "", "or_profile": "~Hideaki_Kim1", "aff": "NTT", "aff_domain": "ntt.co.jp", "position": "Researcher", "bibtex": "@inproceedings{\nkim2024inverse,\ntitle={Inverse M-Kernels for Linear Universal Approximators of Non-Negative Functions},\nauthor={Hideaki Kim},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=hgsS4onO4s}\n}", "github": "", "reviewers": "WaA4;dTaH;wAiw", "pdf_size": 393028, "rating": "4;7;7", "confidence": "3;5;4", "soundness": "3;4;4", "novelty": "3;4;3", "presentation": "3;2;2", "wc_summary": "46;42;59", "wc_strengths": "20;40;133", "wc_weaknesses": "141;33;201", "wc_questions": "1;137;3", "wc_limitations": "2;1;1", "wc_review": "210;253;397", "wc_reply_reviewers": "421;30;27", "wc_reply_authors": "613;0;0", "reply_reviewers": "1;1;1", "reply_authors": "2;1;1", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 49.0, 7.2571803523590805 ], "wc_strengths_avg": [ 64.33333333333333, 49.23639123882072 ], "wc_weaknesses_avg": [ 125.0, 69.51258878793107 ], "wc_questions_avg": [ 47.0, 63.64484791926733 ], "wc_limitations_avg": [ 1.3333333333333333, 0.4714045207910317 ], "wc_review_avg": [ 286.6666666666667, 79.96804917521052 ], "wc_reply_reviewers_avg": [ 159.33333333333334, 185.0303278444434 ], "wc_reply_authors_avg": [ 204.33333333333334, 288.97097124490244 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.8660254037844387, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=999438371566434944&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": "ntt.co.jp", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "NTT Corporation", "aff_unique_dep": "", "aff_unique_url": "https://www.ntt.co.jp", "aff_unique_abbr": "NTT", "aff_country_unique_index": "0", "aff_country_unique": "Japan" }, { "title": "Precipitation Downscaling with Spatiotemporal Video Diffusion", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94049", "id": "hhnkH8ex5d", "proceeding": "", "pdf": "https://openreview.net/pdf?id=hhnkH8ex5d", "openreview": "https://openreview.net/forum?id=hhnkH8ex5d", "poster": "/media/PosterPDFs/NeurIPS%202024/94049.png?t=1731576876.7595973", "project": "", "author_site": "Prakhar Srivastava, Ruihan Yang, Gavin Kerrigan, Gideon Dresdner, Jeremy McGibbon, Christopher S. Bretherton, Stephan Mandt", "tldr": "", "abstract": "In climate science and meteorology, high-resolution local precipitation (rain and snowfall) predictions are limited by the computational costs of simulation-based methods. Statistical downscaling, or super-resolution, is a common workaround where a low-resolution prediction is improved using statistical approaches. Unlike traditional computer vision tasks, weather and climate applications require capturing the accurate conditional distribution of high-resolution given low-resolution patterns to assure reliable ensemble averages and unbiased estimates of extreme events, such as heavy rain. This work extends recent video diffusion models to precipitation super-resolution, employing a deterministic downscaler followed by a temporally-conditioned diffusion model to capture noise characteristics and high-frequency patterns. We test our approach on FV3GFS output, an established large-scale global atmosphere model, and compare it against six state-of-the-art baselines. Our analysis, capturing CRPS, MSE, precipitation distributions, and qualitative aspects using California and the Himalayas as examples, establishes our method as a new standard for data-driven precipitation downscaling.", "keywords": "precipitation downscaling;diffusion models", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "/attachment/362ee8c62ef9bef569587f0e7e59ad2b24b08f1f.zip", "author": "Prakhar Srivastava;Ruihan Yang;Gavin Kerrigan;Gideon Dresdner;Jeremy J McGibbon;Christopher S. Bretherton;Stephan Mandt", "authorids": "~Prakhar_Srivastava1;~Ruihan_Yang1;~Gavin_Kerrigan1;~Gideon_Dresdner1;~Jeremy_J_McGibbon1;~Christopher_S._Bretherton1;~Stephan_Mandt1", "gender": ";M;M;;M;M;M", "homepage": ";;https://gavinkerrigan.github.io/;;;https://atmos.uw.edu/~breth/;https://www.stephanmandt.com", "dblp": ";225/4834;274/1893;133/2729;;;147/5018", "google_scholar": "E8RKhwYAAAAJ;mWEXfLwAAAAJ;2F2XCy8AAAAJ;zCsVb4IAAAAJ;;;HOrGe7wAAAAJ", "orcid": ";;;;0000-0003-4955-9772;0000-0002-6712-8856;", "linkedin": ";;;;;;stephan-mandt-8702795a/", "or_profile": "~Prakhar_Srivastava1;~Ruihan_Yang1;~Gavin_Kerrigan1;~Gideon_Dresdner1;~Jeremy_J_McGibbon1;~Christopher_S._Bretherton1;~Stephan_M_Mandt1", "aff": "University of California, Irvine;Microsoft;University of California, Irvine;Allen Institute for Artificial Intelligence;Allen Institute for Artificial Intelligence;;University of California, Irvine", "aff_domain": "uci.edu;microsoft.com;uci.edu;allenai.org;allenai.org;;uci.edu", "position": "PhD student;Intern;PhD student;Researcher;Researcher;;Associate Professor", "bibtex": "@inproceedings{\nsrivastava2024precipitation,\ntitle={Precipitation Downscaling with Spatiotemporal Video Diffusion},\nauthor={Prakhar Srivastava and Ruihan Yang and Gavin Kerrigan and Gideon Dresdner and Jeremy J McGibbon and Christopher S. Bretherton and Stephan Mandt},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=hhnkH8ex5d}\n}", "github": "", "reviewers": "giVy;EgC8;St3G", "pdf_size": 22584960, "rating": "5;5;7", "confidence": "5;2;5", "soundness": "2;3;3", "novelty": "2;2;3", "presentation": "3;4;3", "wc_summary": "38;49;122", "wc_strengths": "74;69;46", "wc_weaknesses": "20;87;202", "wc_questions": "37;5;96", "wc_limitations": "54;18;71", "wc_review": "223;228;537", "wc_reply_reviewers": "15;0;0", "wc_reply_authors": "0;47;47", "reply_reviewers": "1;0;0", "reply_authors": "1;2;2", "rating_avg": [ 5.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 4.0, 1.4142135623730951 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 69.66666666666667, 37.276742823851386 ], "wc_strengths_avg": [ 63.0, 12.192894105447921 ], "wc_weaknesses_avg": [ 103.0, 75.1576121671429 ], "wc_questions_avg": [ 46.0, 37.69173207305107 ], "wc_limitations_avg": [ 47.666666666666664, 22.095751225568733 ], "wc_review_avg": [ 329.3333333333333, 146.85669507682954 ], "wc_reply_reviewers_avg": [ 5.0, 7.0710678118654755 ], "wc_reply_authors_avg": [ 31.333333333333332, 22.15601247717849 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.49999999999999994, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1996345094090044189&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 5, "email": "uci.edu;microsoft.com;uci.edu;allenai.org;allenai.org;;uci.edu", "author_num": 7, "aff_unique_index": "0;1;0;2;2;0", "aff_unique_norm": "University of California, Irvine;Microsoft;Allen Institute for Artificial Intelligence", "aff_unique_dep": ";Microsoft Corporation;", "aff_unique_url": "https://www.uci.edu;https://www.microsoft.com;https://allenai.org", "aff_unique_abbr": "UCI;Microsoft;AI2", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Irvine;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "A Bayesian Approach for Personalized Federated Learning in Heterogeneous Settings", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94048", "id": "hilGwNabqB", "proceeding": "", "pdf": "https://openreview.net/pdf?id=hilGwNabqB", "openreview": "https://openreview.net/forum?id=hilGwNabqB", "poster": "", "project": "", "author_site": "Disha Makhija, Joydeep Ghosh, Nhat Ho", "tldr": "", "abstract": "Federated learning (FL), through its privacy-preserving collaborative learning approach, has significantly empowered decentralized devices. However, constraints in either data and/or computational resources among participating clients introduce several challenges in learning, including the inability to train large model architectures, heightened risks of overfitting, and more. In this work, we present a novel FL framework grounded in Bayesian learning to address these challenges. Our approach involves training personalized Bayesian models at each client tailored to the unique complexities of the clients' datasets and efficiently collaborating across these clients. By leveraging Bayesian neural networks and their uncertainty quantification capabilities, our local training procedure robustly learns from small datasets. And the novel collaboration procedure utilizing priors in the functional (output) space of the networks facilitates collaboration across models of varying sizes, enabling the framework to adapt well in heterogeneous data and computational settings. Furthermore, we present a differentially private version of the algorithm, accompanied by formal differential privacy guarantees that apply without any assumptions on the learning algorithm. Through experiments on popular FL datasets, we demonstrate that our approach outperforms strong baselines in both homogeneous and heterogeneous settings, and under strict privacy constraints.", "keywords": "Federated Learning;Bayesian Learning", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Disha Makhija;Joydeep Ghosh;Nhat Ho", "authorids": "~Disha_Makhija1;~Joydeep_Ghosh1;~Nhat_Ho1", "gender": ";M;M", "homepage": ";http://ideal.ece.utexas.edu/ghosh/;https://nhatptnk8912.github.io/", "dblp": "169/9962.html;51/2272;203/4479", "google_scholar": ";;https://scholar.google.ca/citations?user=Xs7cKMwAAAAJ", "orcid": ";;", "linkedin": ";;nhat-pham-minh-ho-267b8164/", "or_profile": "~Disha_Makhija1;~Joydeep_Ghosh1;~Nhat_Ho1", "aff": "University of Texas at Austin;University of Texas, Austin;University of Texas, Austin", "aff_domain": "utexas.edu;utexas.edu;utexas.edu", "position": "PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nmakhija2024a,\ntitle={A Bayesian Approach for Personalized Federated Learning in Heterogeneous Settings},\nauthor={Disha Makhija and Joydeep Ghosh and Nhat Ho},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=hilGwNabqB}\n}", "github": "", "reviewers": "PtiT;4fxY;yjRB;rtXt", "pdf_size": 812636, "rating": "4;5;6;6", "confidence": "3;3;3;4", "soundness": "2;3;2;3", "novelty": "2;3;3;2", "presentation": "2;3;3;2", "wc_summary": "51;131;46;151", "wc_strengths": "22;127;35;48", "wc_weaknesses": "312;141;136;144", "wc_questions": "34;3;32;48", "wc_limitations": "18;1;1;10", "wc_review": "437;403;250;401", "wc_reply_reviewers": "222;0;24;0", "wc_reply_authors": "685;115;142;123", "reply_reviewers": "1;0;1;0", "reply_authors": "3;2;3;2", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 94.75, 46.82080200082011 ], "wc_strengths_avg": [ 58.0, 40.88398219351926 ], "wc_weaknesses_avg": [ 183.25, 74.38875923148605 ], "wc_questions_avg": [ 29.25, 16.361158271956175 ], "wc_limitations_avg": [ 7.5, 7.088723439378913 ], "wc_review_avg": [ 372.75, 72.29929114451953 ], "wc_reply_reviewers_avg": [ 61.5, 93.18127494298412 ], "wc_reply_authors_avg": [ 266.25, 241.96422772798462 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:4fQrlryzU0kJ:scholar.google.com/&scioq=A+Bayesian+Approach+for+Personalized+Federated+Learning+in+Heterogeneous+Settings&hl=en&as_sdt=0,14", "gs_version_total": 2, "email": "utexas.edu;utexas.edu;utexas.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "hiwHaqFXGi", "title": "Disentangled Generative Graph Representation Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Recently, generative graph models have shown promising results in learning graph representations through self-supervised methods. However, most existing generative graph representation learning (GRL) approaches rely on random masking across the entire graph, which overlooks the entanglement of learned representations. This oversight results in non-robustness and a lack of explainability. Furthermore, disentangling the learned representations remains a significant challenge and has not been sufficiently explored in GRL research.\nBased on these insights, this paper introduces **DiGGR** (**Di**sentangled **G**enerative **G**raph **R**epresentation Learning), a self-supervised learning framework. DiGGR aims to learn latent disentangled factors and utilizes them to guide graph mask modeling, thereby enhancing the disentanglement of learned representations and enabling end-to-end joint learning.\nExtensive experiments on 11 public datasets for two different graph learning tasks demonstrate that DiGGR consistently outperforms many previous self-supervised methods, verifying the effectiveness of the proposed approach.", "keywords": "Probabilistic Inference;disentangled representation learning;graph representation learning", "primary_area": "probabilistic_methods", "supplementary_material": "/attachment/2ed44b170436078159e4e08f706a666bb81f729d.zip", "author": "Xinyue Hu;Zhibin Duan;Bo Chen;Yuxin Li;Xinyang Liu;QuChunhui;Mingyuan Zhou", "authorids": "~Xinyue_Hu1;~Zhibin_Duan1;~Bo_Chen1;~Yuxin_Li3;~Xinyang_Liu4;~QuChunhui1;~Mingyuan_Zhou1", "gender": "F;M;M;M;M;M;M", "homepage": "https://www.researchgate.net/profile/Xinyue-Hu-46;;http://web.xidian.edu.cn/bchen/en/index.html;https://liyuxin321.github.io/YuxinLi.github.io/;https://xinyangatk.github.io;;http://mingyuanzhou.github.io", "dblp": ";268/2560;89/5615-1;;;;", "google_scholar": ";https://scholar.google.com.hk/citations?user=bITyHaEAAAAJ;;https://scholar.google.com/citations?hl=en;https://scholar.google.com.hk/citations?hl=zh-CN;;LXwCIisAAAAJ", "orcid": ";;0000-0001-5151-9388;0000-0002-5935-0684;;0009-0001-2263-3613;", "linkedin": ";;;;;;", "or_profile": "~Xinyue_Hu1;~Zhibin_Duan1;~Bo_Chen1;~Yuxin_Li3;~Xinyang_Liu4;~QuChunhui1;~Mingyuan_Zhou1", "aff": "Xidian University;Xidian University;Xidian University;Xidian University;Xidian University;;Google", "aff_domain": "stu.xidian.edu.cn;xidian.edu;xidian.edu.cn;xidian.edu;xidian.edu;;google.com", "position": "MS student;PhD student;Full Professor;PhD student;MS student;;Researcher", "bibtex": "@misc{\nanonymous2024disentangled,\ntitle={Disentangled Generative Graph Representation Learning},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=hiwHaqFXGi}\n}", "github": "", "project": "", "reviewers": "efGr;DL9g;wD15;Vvqv", "site": "https://openreview.net/forum?id=hiwHaqFXGi", "pdf_size": 9263937, "rating": "4;4;4;6", "confidence": "3;4;4;3", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "2;3;3;3", "wc_summary": "53;26;75;68", "wc_strengths": "59;42;58;41", "wc_weaknesses": "65;137;118;62", "wc_questions": "2;247;18;6", "wc_limitations": "2;8;1;6", "wc_review": "181;460;270;183", "wc_reply_reviewers": "0;268;0;0", "wc_reply_authors": "0;1050;0;0", "reply_reviewers": "0;1;0;0", "reply_authors": "1;4;1;1", "rating_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 55.5, 18.7949461292125 ], "wc_strengths_avg": [ 50.0, 8.514693182963201 ], "wc_weaknesses_avg": [ 95.5, 32.71467560591118 ], "wc_questions_avg": [ 68.25, 103.36918061008319 ], "wc_limitations_avg": [ 4.25, 2.8613807855648994 ], "wc_review_avg": [ 273.5, 113.51321508969782 ], "wc_reply_reviewers_avg": [ 67.0, 116.04740410711477 ], "wc_reply_authors_avg": [ 262.5, 454.6633369868303 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:jiepOVkAK6gJ:scholar.google.com/&scioq=Disentangled+Generative+Graph+Representation+Learning&hl=en&as_sdt=0,44", "gs_version_total": 3, "aff_unique_index": "0;0;0;0;0;1", "aff_unique_norm": "Xidian University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "http://www.xidian.edu.cn/;https://www.google.com", "aff_unique_abbr": "Xidian;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;0;1", "aff_country_unique": "China;United States" }, { "title": "Interpretable Image Classification with Adaptive Prototype-based Vision Transformers", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94047", "id": "hjhpCJfbFG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=hjhpCJfbFG", "openreview": "https://openreview.net/forum?id=hjhpCJfbFG", "poster": "/media/PosterPDFs/NeurIPS%202024/94047.png?t=1731891389.8349195", "project": "", "author_site": "Chiyu Ma, Jon Donnelly, Wenjun Liu, Soroush Vosoughi, Cynthia Rudin, Chaofan Chen", "tldr": "", "abstract": "We present ProtoViT, a method for interpretable image classification combining deep learning and case-based reasoning. This method classifies an image by comparing it to a set of learned prototypes, providing explanations of the form ``this looks like that.'' In our model, a prototype consists of **parts**, which can deform over irregular geometries to create a better comparison between images. Unlike existing models that rely on Convolutional Neural Network (CNN) backbones and spatially rigid prototypes, our model integrates Vision Transformer (ViT) backbones into prototype based models, while offering spatially deformed prototypes that not only accommodate geometric variations of objects but also provide coherent and clear prototypical feature representations with an adaptive number of prototypical parts. Our experiments show that our model can generally achieve higher performance than the existing prototype based models. Our comprehensive analyses ensure that the prototypes are consistent and the interpretations are faithful.", "keywords": "deep learning;interpretability;prototype-based neural network;case-based reasoning", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Chiyu Ma;Jon Donnelly;Wenjun Liu;Soroush Vosoughi;Cynthia Rudin;Chaofan Chen", "authorids": "~Chiyu_Ma1;~Jon_Donnelly1;~Wenjun_Liu3;~Soroush_Vosoughi1;~Cynthia_Rudin1;~Chaofan_Chen1", "gender": "M;M;F;;;M", "homepage": "https://henrymachiyu.github.io/;;;https://www.cs.dartmouth.edu/~soroush/;;https://umaine.edu/scis/people/chaofan-chen/", "dblp": ";307/5438;;01/1709;;", "google_scholar": "h_3TRv0AAAAJ;https://scholar.google.com/citations?hl=en;;45DAXkwAAAAJ;;pJ0vTRUAAAAJ", "orcid": ";0000-0002-3971-1075;;0000-0002-2564-8909;;", "linkedin": "henry-chiyu-ma-3b7b30203/;;wenjun-l-747799196/;;;", "or_profile": "~Chiyu_Ma1;~Jon_Donnelly1;~Wenjun_Liu3;~Soroush_Vosoughi1;~Cynthia_Rudin1;~Chaofan_Chen1", "aff": "Dartmouth College;Duke University;Dartmouth College;Dartmouth College;;University of Maine", "aff_domain": "dartmouth.edu;duke.edu;dartmouth.edu;dartmouth.edu;;maine.edu", "position": "PhD student;PhD student;PhD student;Assistant Professor;;Assistant Professor", "bibtex": "@inproceedings{\nma2024interpretable,\ntitle={Interpretable Image Classification with Adaptive Prototype-based Vision Transformers},\nauthor={Chiyu Ma and Jon Donnelly and Wenjun Liu and Soroush Vosoughi and Cynthia Rudin and Chaofan Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=hjhpCJfbFG}\n}", "github": "", "reviewers": "GK9W;Mc2e;dybc;w1UG", "pdf_size": 19619218, "rating": "5;6;6;7", "confidence": "5;3;4;5", "soundness": "2;2;3;4", "novelty": "3;2;2;4", "presentation": "3;3;3;3", "wc_summary": "93;165;93;48", "wc_strengths": "47;156;41;56", "wc_weaknesses": "165;113;233;284", "wc_questions": "43;383;4;1", "wc_limitations": "21;40;20;1", "wc_review": "369;857;391;390", "wc_reply_reviewers": "157;252;10;54", "wc_reply_authors": "152;493;34;113", "reply_reviewers": "1;1;1;1", "reply_authors": "2;3;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 99.75, 41.91285602294361 ], "wc_strengths_avg": [ 75.0, 47.069098143049224 ], "wc_weaknesses_avg": [ 198.75, 65.06295028662626 ], "wc_questions_avg": [ 107.75, 159.77699302465297 ], "wc_limitations_avg": [ 20.5, 13.793114224133722 ], "wc_review_avg": [ 501.75, 205.29171317907597 ], "wc_reply_reviewers_avg": [ 118.25, 93.85727196120715 ], "wc_reply_authors_avg": [ 198.0, 175.54344191680872 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7889223759526287347&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "dartmouth.edu;duke.edu;dartmouth.edu;dartmouth.edu;;maine.edu", "author_num": 6, "aff_unique_index": "0;1;0;0;2", "aff_unique_norm": "Dartmouth College;Duke University;University of Maine", "aff_unique_dep": ";;", "aff_unique_url": "https://www.dartmouth.edu;https://www.duke.edu;https://www.umaine.edu", "aff_unique_abbr": "Dartmouth;Duke;UMaine", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "GENOT: Entropic (Gromov) Wasserstein Flow Matching with Applications to Single-Cell Genomics", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94046", "id": "hjspWd7jvg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=hjspWd7jvg", "openreview": "https://openreview.net/forum?id=hjspWd7jvg", "poster": "", "project": "", "author_site": "Dominik Klein, Th\u00e9o Uscidda, Fabian Theis, Marco Cuturi", "tldr": "", "abstract": "Single-cell genomics has significantly advanced our understanding of cellular behavior, catalyzing innovations in treatments and precision medicine. However,\nsingle-cell sequencing technologies are inherently destructive and can only measure a limited array of data modalities simultaneously. This limitation underscores\nthe need for new methods capable of realigning cells. Optimal transport (OT)\nhas emerged as a potent solution, but traditional discrete solvers are hampered by\nscalability, privacy, and out-of-sample estimation issues. These challenges have\nspurred the development of neural network-based solvers, known as neural OT\nsolvers, that parameterize OT maps. Yet, these models often lack the flexibility\nneeded for broader life science applications. To address these deficiencies, our\napproach learns stochastic maps (i.e. transport plans), allows for any cost function,\nrelaxes mass conservation constraints and integrates quadratic solvers to tackle the\ncomplex challenges posed by the (Fused) Gromov-Wasserstein problem. Utilizing\nflow matching as a backbone, our method offers a flexible and effective framework.\nWe demonstrate its versatility and robustness through applications in cell development studies, cellular drug response modeling, and cross-modality cell translation,\nillustrating significant potential for enhancing therapeutic strategies.", "keywords": "single-cell genomics;optimal transport;flow matching", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "", "author": "Dominik Klein;Th\u00e9o Uscidda;Fabian J Theis;marco cuturi", "authorids": "~Dominik_Klein1;~Th\u00e9o_Uscidda1;~Fabian_J_Theis1;~marco_cuturi2", "gender": "M;M;M;", "homepage": "https://scholar.google.com/citations?user=Zs1w-ukAAAAJ&hl=en&oi=ao;https://theouscidda6.github.io/;http://marcocuturi.net;https://www.helmholtz-munich.de/en/icb/pi/fabian-theis", "dblp": ";;85/5102;t/FabianJTheis", "google_scholar": "Zs1w-ukAAAAJ;xnQZonMAAAAJ;https://scholar.google.fr/citations?user=kQEydDMAAAAJ;sqWpn2AAAAAJ", "orcid": ";;;0000-0002-2419-1943", "linkedin": ";th\u00e9o-uscidda-926335174/;;", "or_profile": "~Dominik_Klein1;~Th\u00e9o_Uscidda1;~marco_cuturi2;~Fabian_J._Theis1", "aff": ";Ecole Nationale de la Statistique et de l'Administration Economique;Ensae ParisTech;Technical University Munich", "aff_domain": ";ensae.fr;ensae.fr;tum.de", "position": ";PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nklein2024genot,\ntitle={{GENOT}: Entropic (Gromov) Wasserstein Flow Matching with Applications to Single-Cell Genomics},\nauthor={Dominik Klein and Th{\\'e}o Uscidda and Fabian J Theis and marco cuturi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=hjspWd7jvg}\n}", "github": "", "reviewers": "NoBU;GAfL;GBWj", "pdf_size": 11856884, "rating": "6;6;7", "confidence": "4;3;3", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "95;76;86", "wc_strengths": "53;74;115", "wc_weaknesses": "377;323;91", "wc_questions": "99;44;267", "wc_limitations": "21;10;1", "wc_review": "645;527;560", "wc_reply_reviewers": "17;19;18", "wc_reply_authors": "1226;17;340", "reply_reviewers": "1;1;1", "reply_authors": "4;2;3", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 85.66666666666667, 7.760297817881877 ], "wc_strengths_avg": [ 80.66666666666667, 25.746628689770024 ], "wc_weaknesses_avg": [ 263.6666666666667, 124.06808166844883 ], "wc_questions_avg": [ 136.66666666666666, 94.85544557670664 ], "wc_limitations_avg": [ 10.666666666666666, 8.178562764256865 ], "wc_review_avg": [ 577.3333333333334, 49.70803646181258 ], "wc_reply_reviewers_avg": [ 18.0, 0.816496580927726 ], "wc_reply_authors_avg": [ 527.6666666666666, 511.0996858626396 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.0, 0.816496580927726 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13782514708155876089&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": ";ensae.fr;ensae.fr;tum.de", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "Ecole Nationale de la Statistique et de l'Administration Economique;ENSAE ParisTech;Technical University of Munich", "aff_unique_dep": ";;", "aff_unique_url": "https://ensae.fr;https://www.ensae.fr;https://www.tum.de", "aff_unique_abbr": "ENSAE;Ensae;TUM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "France;Germany" }, { "title": "Multi-Agent Domain Calibration with a Handful of Offline Data", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94045", "id": "hkBhX5ABjk", "proceeding": "", "pdf": "https://openreview.net/pdf?id=hkBhX5ABjk", "openreview": "https://openreview.net/forum?id=hkBhX5ABjk", "poster": "/media/PosterPDFs/NeurIPS%202024/94045.png?t=1731677064.9531028", "project": "", "author_site": "Tao Jiang, Lei Yuan, Lihe Li, Cong Guan, Zongzhang Zhang, Yang Yu", "tldr": "", "abstract": "The shift in dynamics results in significant performance degradation of policies trained in the source domain when deployed in a different target domain, posing a challenge for the practical application of reinforcement learning (RL) in real-world scenarios. Domain transfer methods aim to bridge this dynamics gap through techniques such as domain adaptation or domain calibration. While domain adaptation involves refining the policy through extensive interactions in the target domain, it may not be feasible for sensitive fields like healthcare and autonomous driving. On the other hand, offline domain calibration utilizes only static data from the target domain to adjust the physics parameters of the source domain (e.g., a simulator) to align with the target dynamics, enabling the direct deployment of the trained policy without sacrificing performance, which emerges as the most promising for policy deployment. However, existing techniques primarily rely on evolution algorithms for calibration, resulting in low sample efficiency.\nTo tackle this issue, we propose a novel framework Madoc (\\textbf{M}ulti-\\textbf{a}gent \\textbf{do}main \\textbf{c}alibration). Firstly, we formulate a bandit RL objective to match the target trajectory distribution by learning a couple of classifiers. We then address the challenge of a large domain parameter space by modeling domain calibration as a cooperative multi-agent reinforcement learning (MARL) problem. Specifically, we utilize a Variational Autoencoder (VAE) to automatically cluster physics parameters with similar effects on the dynamics, grouping them into distinct agents. These grouped agents train calibration policies coordinately to adjust multiple parameters using MARL.\nOur empirical evaluation on 21 offline locomotion tasks in D4RL and NeoRL benchmarks showcases the superior performance of our method compared to strong existing offline model-based RL, offline domain calibration, and hybrid offline-and-online RL baselines.", "keywords": "Multi-agent reinforcement learning;domain transfer", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/f7147abb1fcca64f57370a364827a07957e2179a.zip", "author": "Tao Jiang;Lei Yuan;Lihe Li;Cong Guan;Zongzhang Zhang;Yang Yu", "authorids": "~Tao_Jiang21;~Lei_Yuan2;~Lihe_Li1;~Cong_Guan1;~Zongzhang_Zhang1;~Yang_Yu5", "gender": "M;M;M;M;M;M", "homepage": "http://www.lamda.nju.edu.cn/jiangt/;http://www.lamda.nju.edu.cn/yuanl/;http://lilh.site;http://www.lamda.nju.edu.cn/guanc/;http://www.lamda.nju.edu.cn/zhangzz;http://www.lamda.nju.edu.cn/yuy", "dblp": ";23/6750-1;347/1926;191/7206;90/8724;46/2181-1", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;https://scholar.google.com/citations?hl=zh-CN;q0tENOAAAAAJ;;sG7WEAgAAAAJ;PG2lDSwAAAAJ", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Tao_Jiang21;~Lei_Yuan2;~Lihe_Li1;~Cong_Guan1;~Zongzhang_Zhang1;~Yang_Yu2", "aff": "Nanjing University;Nanjing University;Nanjing University;Nanjing University;Nanjing University;Nanjing University", "aff_domain": "nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn", "position": "MS student;Researcher;MS student;PhD student;Associate Professor;Professor", "bibtex": "@inproceedings{\njiang2024multiagent,\ntitle={Multi-Agent Domain Calibration with a Handful of Offline Data},\nauthor={Tao Jiang and Lei Yuan and Lihe Li and Cong Guan and Zongzhang Zhang and Yang Yu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=hkBhX5ABjk}\n}", "github": "", "reviewers": "dRio;EfDo;Cdsr", "pdf_size": 8692746, "rating": "5;6;6", "confidence": "3;3;3", "soundness": "3;2;3", "novelty": "2;2;3", "presentation": "2;2;3", "wc_summary": "62;70;69", "wc_strengths": "25;4;126", "wc_weaknesses": "150;141;206", "wc_questions": "41;168;105", "wc_limitations": "6;6;6", "wc_review": "284;389;512", "wc_reply_reviewers": "281;170;45", "wc_reply_authors": "447;614;0", "reply_reviewers": "2;3;1", "reply_authors": "3;2;1", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 67.0, 3.559026084010437 ], "wc_strengths_avg": [ 51.666666666666664, 53.25619421459088 ], "wc_weaknesses_avg": [ 165.66666666666666, 28.75567576825293 ], "wc_questions_avg": [ 104.66666666666667, 51.84806864505391 ], "wc_limitations_avg": [ 6.0, 0.0 ], "wc_review_avg": [ 395.0, 93.17725044236924 ], "wc_reply_reviewers_avg": [ 165.33333333333334, 96.40308893160818 ], "wc_reply_authors_avg": [ 353.6666666666667, 259.2069100587834 ], "reply_reviewers_avg": [ 2.0, 0.816496580927726 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:BTPpvzTq2SoJ:scholar.google.com/&scioq=Multi-Agent+Domain+Calibration+with+a+Handful+of+Offline+Data&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Nanjing University", "aff_unique_dep": "", "aff_unique_url": "https://www.nju.edu.cn", "aff_unique_abbr": "Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "DA-Ada: Learning Domain-Aware Adapter for Domain Adaptive Object Detection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94044", "id": "hkEwwAqmCk", "proceeding": "", "pdf": "https://openreview.net/pdf?id=hkEwwAqmCk", "openreview": "https://openreview.net/forum?id=hkEwwAqmCk", "poster": "/media/PosterPDFs/NeurIPS%202024/94044.png?t=1731683372.7917697", "project": "", "author_site": "Haochen Li, Rui Zhang, Hantao Yao, Xin Zhang, Yifan Hao, Xinkai Song, Xiaqing Li, Yongwei Zhao, Yunji Chen, Ling Li", "tldr": "", "abstract": "Domain adaptive object detection (DAOD) aims to generalize detectors trained on an annotated source domain to an unlabelled target domain.\nAs the visual-language models (VLMs) can provide essential general knowledge on unseen images, freezing the visual encoder and inserting a domain-agnostic adapter can learn domain-invariant knowledge for DAOD.\nHowever, the domain-agnostic adapter is inevitably biased to the source domain.\nIt discards some beneficial knowledge discriminative on the unlabelled domain, \\ie domain-specific knowledge of the target domain.\nTo solve the issue, we propose a novel Domain-Aware Adapter (DA-Ada) tailored for the DAOD task.\nThe key point is exploiting domain-specific knowledge between the essential general knowledge and domain-invariant knowledge.\nDA-Ada consists of the Domain-Invariant Adapter (DIA) for learning domain-invariant knowledge and the Domain-Specific Adapter (DSA) for injecting the domain-specific knowledge from the information discarded by the visual encoder.\nComprehensive experiments over multiple DAOD tasks show that DA-Ada can efficiently infer a domain-aware visual encoder for boosting domain adaptive object detection.\nOur code is available at https://github.com/Therock90421/DA-Ada.", "keywords": "Unsupervised domain adaptation; Object detection; Visual-language model", "primary_area": "machine_vision", "supplementary_material": "/attachment/ccb246d4c31ff5185aa13d57a09ec7811da153c3.zip", "author": "Haochen Li;Rui Zhang;Hantao Yao;Xin Zhang;Yifan Hao;Xinkai Song;Xiaqing Li;Yongwei Zhao;Yunji Chen;Ling Li", "authorids": "~Haochen_Li2;~Rui_Zhang1;~Hantao_Yao2;~Xin_Zhang46;~Yifan_Hao3;~Xinkai_Song1;~Xiaqing_Li1;~Yongwei_Zhao1;~Yunji_Chen1;~Ling_Li6", "gender": "M;F;M;F;M;;M;;M;F", "homepage": "https://github.com/Therock90421;;http://www.hantaoyao.com/;;https://www.ict.ac.cn/sourcedb_2018_ict_cas/cn/jssrck/202311/t20231108_6923467.html;;https://sciprofiles.com/profile/1301031;;;", "dblp": "49/11531-2;60/2536-40;167/3478;76/1584-62;;;;;48/474;92/5001-1", "google_scholar": "QxfHHQcAAAAJ;dse6jAsAAAAJ;;;;;;;;", "orcid": "0000-0003-0813-6351;;;0000-0002-4026-4284;;;;;;0000-0001-8877-9052", "linkedin": ";;;;;;;;;", "or_profile": "~Haochen_Li2;~Rui_Zhang1;~Hantao_Yao2;~Xin_Zhang46;~Yifan_Hao3;~Xinkai_Song1;~Xiaqing_Li1;~Yongwei_Zhao1;~Yunji_Chen1;~Ling_Li6", "aff": "Institute of Software, Chinese Academy of Sciences;Institute of Computing Technology, CAS;Institute of automation, Chinese academy of science;Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;;Institute of computing, Chinese Academy of Sciences;;Institute of Computing Technology, Chinese Academy of Sciences;Institute of Software, CAS", "aff_domain": "iscas.ac.cn;ict.ac.cn;nlpr.ia.ac.cn;ict.ac.cn;ict.ac.cn;;ict.ac.cn;;ict.ac.cn;iscas.ac.cn", "position": "PhD student;Assistant Professor;Associate Professor;Assistant Professor;Associate Professor;;Assistant Professor;;Full Professor;Full Professor", "bibtex": "@inproceedings{\nli2024daada,\ntitle={{DA}-Ada: Learning Domain-Aware Adapter for Domain Adaptive Object Detection},\nauthor={Haochen Li and Rui Zhang and Hantao Yao and Xin Zhang and Yifan Hao and Xinkai Song and Xiaqing Li and Yongwei Zhao and Yunji Chen and Ling Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=hkEwwAqmCk}\n}", "github": "", "reviewers": "N5kf;nqrX;DyVN;xAKd", "pdf_size": 9828046, "rating": "5;5;5;6", "confidence": "5;5;4;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "90;69;69;122", "wc_strengths": "39;71;42;78", "wc_weaknesses": "52;163;270;151", "wc_questions": "238;26;2;161", "wc_limitations": "5;1;2;8", "wc_review": "424;330;385;520", "wc_reply_reviewers": "19;17;48;73", "wc_reply_authors": "14;13;28;67", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 87.5, 21.68524844220144 ], "wc_strengths_avg": [ 57.5, 17.211914478058507 ], "wc_weaknesses_avg": [ 159.0, 77.2172260574025 ], "wc_questions_avg": [ 106.75, 97.03446552643035 ], "wc_limitations_avg": [ 4.0, 2.7386127875258306 ], "wc_review_avg": [ 414.75, 69.33748985938271 ], "wc_reply_reviewers_avg": [ 39.25, 23.025800746119558 ], "wc_reply_authors_avg": [ 30.5, 21.891779278989635 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16242311761821143756&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 4, "email": "iscas.ac.cn;ict.ac.cn;nlpr.ia.ac.cn;ict.ac.cn;ict.ac.cn;;ict.ac.cn;;ict.ac.cn;iscas.ac.cn", "author_num": 10, "aff_unique_index": "0;0;0;0;0;0;0;0", "aff_unique_norm": "Chinese Academy of Sciences", "aff_unique_dep": "Institute of Software", "aff_unique_url": "http://www.ios.ac.cn", "aff_unique_abbr": "CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "HippoRAG: Neurobiologically Inspired Long-Term Memory for Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94043", "id": "hkujvAPVsg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=hkujvAPVsg", "openreview": "https://openreview.net/forum?id=hkujvAPVsg", "poster": "/media/PosterPDFs/NeurIPS%202024/94043.png?t=1733155303.7007272", "project": "", "author_site": "Bernal Jimenez Gutierrez, Yiheng Shu, Yu Gu, Michihiro Yasunaga, Yu Su", "tldr": "", "abstract": "In order to thrive in hostile and ever-changing natural environments, mammalian brains evolved to store large amounts of knowledge about the world and continually integrate new information while avoiding catastrophic forgetting. Despite the impressive accomplishments, large language models (LLMs), even with retrieval-augmented generation (RAG), still struggle to efficiently and effectively integrate a large amount of new experiences after pre-training. In this work, we introduce HippoRAG, a novel retrieval framework inspired by the hippocampal indexing theory of human long-term memory to enable deeper and more efficient knowledge integration over new experiences. HippoRAG synergistically orchestrates LLMs, knowledge graphs, and the Personalized PageRank algorithm to mimic the different roles of neocortex and hippocampus in human memory. We compare HippoRAG with existing RAG methods on multi-hop question answering (QA) and show that our method outperforms the state-of-the-art methods remarkably, by up to 20%. Single-step retrieval with HippoRAG achieves comparable or better performance than iterative retrieval like IRCoT while being 10-20 times cheaper and 6-13 times faster, and integrating HippoRAG into IRCoT brings further substantial gains. Finally, we show that our method can tackle new types of scenarios that are out of reach of existing methods.", "keywords": "retrieval-augmented generation;RAG;long-term memory;neurobiological inspired;hippocampal memory indexing theory", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/9248ae70b029c3517afae0a2730969c01443c5ad.zip", "author": "Bernal Jimenez Gutierrez;Yiheng Shu;Yu Gu;Michihiro Yasunaga;Yu Su", "authorids": "~Bernal_Jimenez_Gutierrez1;~Yiheng_Shu1;~Yu_Gu5;~Michihiro_Yasunaga1;~Yu_Su2", "gender": "M;M;;M;M", "homepage": "http://yihengshu.github.io;http://entslscheia.github.io;;http://ysu1989.github.io;https://bernaljg.github.io/", "dblp": "239/6116;15/4208-16;202/1809;38/1070-1;264/4620", "google_scholar": "H63aXLcAAAAJ;c5RwjjcAAAAJ;SieJYoEAAAAJ;rIh5OqoAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-7536-2503;;;;", "linkedin": "shuyh/?locale=en_US;;;;bernal-jimenez/", "or_profile": "~Yiheng_Shu1;~Yu_Gu5;~Michihiro_Yasunaga1;~Yu_Su2;~Bernal_Jimenez1", "aff": "The Ohio State University;Ohio State University;Stanford University;Microsoft;The Ohio State University", "aff_domain": "osu.edu;osu.edu;stanford.edu;microsoft.com;osu.edu", "position": "PhD student;PhD student;PhD student;Senior Researcher;PhD student", "bibtex": "@inproceedings{\ngutierrez2024hipporag,\ntitle={Hippo{RAG}: Neurobiologically Inspired Long-Term Memory for Large Language Models},\nauthor={Bernal Jimenez Gutierrez and Yiheng Shu and Yu Gu and Michihiro Yasunaga and Yu Su},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=hkujvAPVsg}\n}", "github": "", "reviewers": "bGvw;oPeR;fKDL", "pdf_size": 3256579, "rating": "5;7;7", "confidence": "4;4;4", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "3;3;4", "wc_summary": "77;31;71", "wc_strengths": "64;66;74", "wc_weaknesses": "141;97;37", "wc_questions": "20;2;46", "wc_limitations": "14;7;1", "wc_review": "316;203;229", "wc_reply_reviewers": "90;11;18", "wc_reply_authors": "1097;0;12", "reply_reviewers": "1;1;1", "reply_authors": "4;1;2", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 59.666666666666664, 20.417857108151406 ], "wc_strengths_avg": [ 68.0, 4.320493798938574 ], "wc_weaknesses_avg": [ 91.66666666666667, 42.62497963505541 ], "wc_questions_avg": [ 22.666666666666668, 18.06162291219209 ], "wc_limitations_avg": [ 7.333333333333333, 5.312459150169743 ], "wc_review_avg": [ 249.33333333333334, 48.32068800098865 ], "wc_reply_reviewers_avg": [ 39.666666666666664, 35.705586242055865 ], "wc_reply_authors_avg": [ 369.6666666666667, 514.3256642331675 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.3333333333333335, 1.247219128924647 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 64, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12568182236807328696&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "osu.edu;osu.edu;stanford.edu;microsoft.com;osu.edu", "author_num": 5, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Ohio State University;Stanford University;Microsoft", "aff_unique_dep": ";;Microsoft Corporation", "aff_unique_url": "https://www.osu.edu;https://www.stanford.edu;https://www.microsoft.com", "aff_unique_abbr": "OSU;Stanford;Microsoft", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Convergence of $\\text{log}(1/\\epsilon)$ for Gradient-Based Algorithms in Zero-Sum Games without the Condition Number: A Smoothed Analysis", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94042", "id": "hoVXLC8vQU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=hoVXLC8vQU", "openreview": "https://openreview.net/forum?id=hoVXLC8vQU", "poster": "/media/PosterPDFs/NeurIPS%202024/94042.png?t=1731712979.91624", "project": "", "author_site": "Ioannis Anagnostides, Tuomas Sandholm", "tldr": "", "abstract": "Gradient-based algorithms have shown great promise in solving large (two-player) zero-sum games. However, their success has been mostly confined to the low-precision regime since the number of iterations grows polynomially in $1/\\epsilon$, where $\\epsilon > 0$ is the duality gap. While it has been well-documented that linear convergence---an iteration complexity scaling as $\\text{log}(1/\\epsilon)$---can be attained even with gradient-based algorithms, that comes at the cost of introducing a dependency on certain condition number-like quantities which can be exponentially large in the description of the game. To address this shortcoming, we examine the iteration complexity of several gradient-based algorithms in the celebrated framework of smoothed analysis, and we show that they have polynomial smoothed complexity, in that their number of iterations grows as a polynomial in the dimensions of the game, $\\text{log}(1/\\epsilon)$, and $1/\\sigma$, where $\\sigma$ measures the magnitude of the smoothing perturbation. Our result applies to optimistic gradient and extra-gradient descent/ascent, as well as a certain iterative variant of Nesterov's smoothing technique. From a technical standpoint, the proof proceeds by characterizing and performing a smoothed analysis of a certain error bound, the key ingredient driving linear convergence in zero-sum games. En route, our characterization also makes a natural connection between the convergence rate of such algorithms and perturbation-stability properties of the equilibrium, which is of interest beyond the model of smoothed complexity.", "keywords": "smoothed complexity;zero-sum games;optimistic gradient descent;linear convergence", "primary_area": "algorithmic_game_theory", "supplementary_material": "", "author": "Ioannis Anagnostides;Tuomas Sandholm", "authorids": "~Ioannis_Anagnostides1;~Tuomas_Sandholm1", "gender": "M;M", "homepage": ";http://www.cs.cmu.edu/~sandholm", "dblp": "273/7648;s/TuomasSandholm", "google_scholar": "QVwDo_sAAAAJ;0DpK1EMAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Ioannis_Anagnostides1;~Tuomas_Sandholm1", "aff": "Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "cmu.edu;cmu.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nanagnostides2024convergence,\ntitle={Convergence of \\${\\textbackslash}text\\{log\\}(1/{\\textbackslash}epsilon)\\$ for Gradient-Based Algorithms in Zero-Sum Games without the Condition Number: A Smoothed Analysis},\nauthor={Ioannis Anagnostides and Tuomas Sandholm},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=hoVXLC8vQU}\n}", "github": "", "reviewers": "RDh1;La2m;uKXc;L1q2;NLN1", "pdf_size": 718673, "rating": "6;6;6;6;7", "confidence": "1;3;3;4;1", "soundness": "3;3;3;3;3", "novelty": "3;2;3;3;3", "presentation": "3;3;3;3;3", "wc_summary": "116;77;192;86;70", "wc_strengths": "45;55;89;63;34", "wc_weaknesses": "61;305;126;89;19", "wc_questions": "51;3;38;48;2", "wc_limitations": "11;1;1;1;3", "wc_review": "284;441;446;287;128", "wc_reply_reviewers": "9;9;14;15;0", "wc_reply_authors": "0;38;0;0;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "1;2;1;1;1", "rating_avg": [ 6.2, 0.39999999999999997 ], "confidence_avg": [ 2.4, 1.2 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 108.2, 44.74103262107391 ], "wc_strengths_avg": [ 57.2, 18.63759641155479 ], "wc_weaknesses_avg": [ 120.0, 98.89792717746919 ], "wc_questions_avg": [ 28.4, 21.58332689832594 ], "wc_limitations_avg": [ 3.4, 3.8781438859330635 ], "wc_review_avg": [ 317.2, 118.09047379022576 ], "wc_reply_reviewers_avg": [ 9.4, 5.314132102234569 ], "wc_reply_authors_avg": [ 7.6, 15.200000000000001 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5833333333333334, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:N0PkHd-ACDYJ:scholar.google.com/&scioq=Convergence+of+%24%5Ctext%7Blog%7D(1/%5Cepsilon)%24+for+Gradient-Based+Algorithms+in+Zero-Sum+Games+without+the+Condition+Number:+A+Smoothed+Analysis&hl=en&as_sdt=0,4", "gs_version_total": 3, "email": "cmu.edu;cmu.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Flexible mapping of abstract domains by grid cells via self-supervised extraction and projection of generalized velocity signals", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94041", "id": "hocAc3Qit7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=hocAc3Qit7", "openreview": "https://openreview.net/forum?id=hocAc3Qit7", "poster": "/media/PosterPDFs/NeurIPS%202024/94041.png?t=1731449711.212003", "project": "", "author_site": "Abhiram Iyer, Sarthak Chandra, Sugandha Sharma, Ila Fiete", "tldr": "", "abstract": "Grid cells in the medial entorhinal cortex create remarkable periodic maps of explored space during navigation. Recent studies show that they form similar maps of abstract cognitive spaces. Examples of such abstract environments include auditory tone sequences in which the pitch is continuously varied or images in which abstract features are continuously deformed (e.g., a cartoon bird whose legs stretch and shrink). Here, we hypothesize that the brain generalizes how it maps spatial domains to mapping abstract spaces. \nTo sidestep the computational cost of learning representations for each high-dimensional sensory input, the brain extracts self-consistent, low-dimensional descriptions of displacements across abstract spaces, leveraging the spatial velocity integration of grid cells to efficiently build maps of different domains.\nOur neural network model for abstract velocity extraction factorizes the content of these abstract domains from displacements within the domains to generate content-independent and self-consistent, low-dimensional velocity estimates. \nCrucially, it uses a self-supervised geometric consistency constraint that requires displacements along closed loop trajectories to sum to zero, an integration that is itself performed by the downstream grid cell circuit over learning. This process results in high fidelity estimates of velocities and allowed transitions in abstract domains, a crucial prerequisite for efficient map generation in these high-dimensional environments. We also show how our method outperforms traditional dimensionality reduction and deep-learning based motion extraction networks on the same set of tasks.\nThis is the first neural network model to explain how grid cells can flexibly represent different abstract spaces and makes the novel prediction that they should do so while maintaining their population correlation and manifold structure across domains. Fundamentally, our model sheds light on the mechanistic origins of cognitive flexibility and transfer of representations across vastly different domains in brains, providing a potential self-supervised learning (SSL) framework for leveraging similar ideas in transfer learning and data-efficient generalization in machine learning and robotics.", "keywords": "grid cells;cognitive mapping;cognitive maps;self-supervised learning;entorhinal cortex;path integration;neuroscience;dimensionality reduction", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "", "author": "Abhiram Iyer;Sarthak Chandra;Sugandha Sharma;Ila R Fiete", "authorids": "~Abhiram_Iyer1;~Sarthak_Chandra1;~Sugandha_Sharma1;~Ila_R_Fiete1", "gender": ";;F;F", "homepage": ";;https://www.sugandhasharma.com/;https://fietelab.mit.edu/", "dblp": ";;;", "google_scholar": "pXIRasMAAAAJ;Mx8q1rgAAAAJ;FsXCQc8AAAAJ;uE-CihIAAAAJ", "orcid": ";;;0000-0003-4738-2539", "linkedin": ";;sugandhasharma17/;", "or_profile": "~Abhiram_Iyer1;~Sarthak_Chandra1;~Sugandha_Sharma1;~Ila_R_Fiete1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;mit.edu;mit.edu", "position": "PhD student;Postdoc;PhD student;Professor", "bibtex": "@inproceedings{\niyer2024flexible,\ntitle={Flexible mapping of abstract domains by grid cells via self-supervised extraction and projection of generalized velocity signals},\nauthor={Abhiram Iyer and Sarthak Chandra and Sugandha Sharma and Ila R Fiete},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=hocAc3Qit7}\n}", "github": "", "reviewers": "AXig;SMXS;sK54;SEuj", "pdf_size": 27242631, "rating": "4;7;7;8", "confidence": "4;4;4;4", "soundness": "3;3;3;4", "novelty": "2;3;3;3", "presentation": "3;2;4;4", "wc_summary": "70;57;178;60", "wc_strengths": "57;61;146;73", "wc_weaknesses": "881;139;423;135", "wc_questions": "22;4;2;149", "wc_limitations": "6;1;18;20", "wc_review": "1036;262;767;437", "wc_reply_reviewers": "604;20;123;95", "wc_reply_authors": "2719;97;784;306", "reply_reviewers": "2;1;1;2", "reply_authors": "8;3;3;4", "rating_avg": [ 6.5, 1.5 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 91.25, 50.3158772158451 ], "wc_strengths_avg": [ 84.25, 36.134298111351214 ], "wc_weaknesses_avg": [ 394.5, 304.1853875517363 ], "wc_questions_avg": [ 44.25, 60.97694236348687 ], "wc_limitations_avg": [ 11.25, 7.980444849756184 ], "wc_review_avg": [ 625.5, 298.4112095749756 ], "wc_reply_reviewers_avg": [ 210.5, 230.28732053675904 ], "wc_reply_authors_avg": [ 976.5, 1036.3943506214225 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 4.5, 2.0615528128088303 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:VCoNgslpuWIJ:scholar.google.com/&scioq=Flexible+mapping+of+abstract+domains+by+grid+cells+via+self-supervised+extraction+and+projection+of+generalized+velocity+signals&hl=en&as_sdt=0,33", "gs_version_total": 0, "email": "mit.edu;mit.edu;mit.edu;mit.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "RGFN: Synthesizable Molecular Generation Using GFlowNets", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94040", "id": "hpvJwmzEHX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=hpvJwmzEHX", "openreview": "https://openreview.net/forum?id=hpvJwmzEHX", "poster": "/media/PosterPDFs/NeurIPS%202024/94040.png?t=1731735360.2795541", "project": "", "author_site": "Micha\u0142 Koziarski, Andrei Rekesh, Dmytro Shevchuk, Almer van der Sloot, Piotr Gai\u0144ski, Yoshua Bengio, Chenghao Liu, Mike Tyers, Robert Batey", "tldr": "", "abstract": "Generative models hold great promise for small molecule discovery, significantly increasing the size of search space compared to traditional in silico screening libraries. However, most existing machine learning methods for small molecule generation suffer from poor synthesizability of candidate compounds, making experimental validation difficult. In this paper we propose Reaction-GFlowNet (RGFN), an extension of the GFlowNet framework that operates directly in the space of chemical reactions, thereby allowing out-of-the-box synthesizability while maintaining comparable quality of generated candidates. We demonstrate that with the proposed set of reactions and building blocks, it is possible to obtain a search space of molecules orders of magnitude larger than existing screening libraries coupled with low cost of synthesis. We also show that the approach scales to very large fragment libraries, further increasing the number of potential molecules. We demonstrate the effectiveness of the proposed approach across a range of oracle models, including pretrained proxy models and GPU-accelerated docking.", "keywords": "drug discovery;generative models;GFlowNets;synthesizability", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "", "author": "Micha\u0142 Koziarski;Andrei Rekesh;Dmytro Shevchuk;Almer M. van der Sloot;Piotr Gai\u0144ski;Yoshua Bengio;Cheng-Hao Liu;Mike Tyers;Robert A. Batey", "authorids": "~Micha\u0142_Koziarski1;~Andrei_Rekesh1;~Dmytro_Shevchuk1;~Almer_M._van_der_Sloot1;~Piotr_Gai\u0144ski1;~Yoshua_Bengio1;~Cheng-Hao_Liu1;~Mike_Tyers1;~Robert_A._Batey1", "gender": ";M;M;;M;M;M;;M", "homepage": ";http://github.com/andreirekesh;;https://mila.quebec/en/;https://github.com/panpiort8/;http://yoshuabengio.org;https://pchliu.github.io/;https://www.sickkids.ca/en/staff/t/mike-tyers/;https://sites.chem.utoronto.ca/bateylab/", "dblp": ";;;;;56/953;;;", "google_scholar": "https://scholar.google.pl/citations?user=hEVRtosAAAAJ;;;https://scholar.google.ca/citations?user=IP20cIoAAAAJ;;kukA0LcAAAAJ;iVJGx0cAAAAJ;9zEuqoIAAAAJ;NkQTwSMAAAAJ", "orcid": ";;;0000-0003-0049-2044;;;0000-0001-7923-6806;;0000-0001-8808-7646", "linkedin": ";;dmytro-shevchuk-160434268/;;;yoshuabengio/?originalSubdomain=ca;chenghao-peter-liu/;;robert-batey-156221a2/", "or_profile": "~Micha\u0142_Koziarski1;~Andrei_Rekesh1;~Dmytro_Shevchuk1;~Almer_M._van_der_Sloot1;~Piotr_Gai\u0144ski1;~Yoshua_Bengio1;~Cheng-Hao_Liu1;~Mike_Tyers1;~Robert_A._Batey1", "aff": "Mila;University of Toronto;University of Toronto;Montreal Institute for Learning Algorithms, University of Montreal, Universit\u00e9 de Montr\u00e9al;Mila - Quebec Artificial Intelligence Institute;University of Montreal;Montreal Institute for Learning Algorithms, University of Montreal, Universit\u00e9 de Montr\u00e9al;University of Toronto;University of Toronto", "aff_domain": "mila.quebec;utoronto.ca;utoronto.ca;mila.umontreal.ca;mila.quebec;umontreal.ca;mila.umontreal.ca;utoronto.ca;utoronto.ca", "position": "Postdoc;PhD student;PhD student;Researcher;Intern;Full Professor;PhD student intern;Full Professor;Full Professor", "bibtex": "@inproceedings{\nkoziarski2024rgfn,\ntitle={{RGFN}: Synthesizable Molecular Generation Using {GF}lowNets},\nauthor={Micha{\\l} Koziarski and Andrei Rekesh and Dmytro Shevchuk and Almer M. van der Sloot and Piotr Gai{\\'n}ski and Yoshua Bengio and Cheng-Hao Liu and Mike Tyers and Robert A. Batey},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=hpvJwmzEHX}\n}", "github": "", "reviewers": "FKST;sx7b;5soV;rpKJ", "pdf_size": 23628197, "rating": "4;6;6;6", "confidence": "4;3;4;3", "soundness": "2;3;2;3", "novelty": "2;3;2;3", "presentation": "3;2;3;3", "wc_summary": "38;62;155;47", "wc_strengths": "61;85;480;89", "wc_weaknesses": "130;132;479;43", "wc_questions": "94;2;392;56", "wc_limitations": "59;9;184;2", "wc_review": "382;290;1690;237", "wc_reply_reviewers": "242;34;730;0", "wc_reply_authors": "744;0;907;0", "reply_reviewers": "1;1;3;0", "reply_authors": "2;1;3;1", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 75.5, 46.69314724882014 ], "wc_strengths_avg": [ 178.75, 174.25609745429284 ], "wc_weaknesses_avg": [ 196.0, 167.2946502432161 ], "wc_questions_avg": [ 136.0, 151.37370973851438 ], "wc_limitations_avg": [ 63.5, 72.9606058088884 ], "wc_review_avg": [ 649.75, 602.8251715879157 ], "wc_reply_reviewers_avg": [ 251.5, 291.3807646362402 ], "wc_reply_authors_avg": [ 412.75, 416.75374923328525 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6508203719053833245&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "mila.quebec;utoronto.ca;utoronto.ca;mila.umontreal.ca;mila.quebec;umontreal.ca;mila.umontreal.ca;utoronto.ca;utoronto.ca", "author_num": 9, "aff_unique_index": "0;1;1;2;3;2;2;1;1", "aff_unique_norm": "Mila;University of Toronto;University of Montreal;Quebec Artificial Intelligence Institute", "aff_unique_dep": "Quebec Artificial Intelligence Institute;;Montreal Institute for Learning Algorithms;Artificial Intelligence", "aff_unique_url": "https://mila.quebec;https://www.utoronto.ca;https://www.mila.quebec;https://mila.quebec", "aff_unique_abbr": "Mila;U of T;MILA;Mila", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Montreal", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "Canada" }, { "title": "FT-AED: Benchmark Dataset for Early Freeway Traffic Anomalous Event Detection", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97562", "id": "hqvWcQ3uzF", "proceeding": "", "pdf": "https://openreview.net/pdf?id=hqvWcQ3uzF", "openreview": "https://openreview.net/forum?id=hqvWcQ3uzF", "poster": "/media/PosterPDFs/NeurIPS%202024/97562.png?t=1732569476.2043777", "project": "", "author_site": "Austin Coursey, Junyi Ji, Marcos Quinones Grueiro, William Barbour, Yuhang Zhang, Tyler Derr, Gautam Biswas, Daniel Work", "tldr": "", "abstract": "Early and accurate detection of anomalous events on the freeway, such as accidents, can improve emergency response and clearance. However, existing delays and mistakes from manual crash reporting records make it a difficult problem to solve. Current large-scale freeway traffic datasets are not designed for anomaly detection and ignore these challenges. In this paper, we introduce the first large-scale lane-level freeway traffic dataset for anomaly detection. Our dataset consists of a month of weekday radar detection sensor data collected in 4 lanes along an 18-mile stretch of Interstate 24 heading toward Nashville, TN, comprising over 3.7 million sensor measurements. We also collect official crash reports from the Tennessee Department of Transportation Traffic Management Center and manually label all other potential anomalies in the dataset. To show the potential for our dataset to be used in future machine learning and traffic research, we benchmark numerous deep learning anomaly detection models on our dataset. We find that unsupervised graph neural network autoencoders are a promising solution for this problem and that ignoring spatial relationships leads to decreased performance. We demonstrate that our methods can reduce reporting delays by over 10 minutes on average while detecting 75% of crashes. Our dataset and all preprocessing code needed to get started are publicly released at https://vu.edu/ft-aed/ to facilitate future research.", "keywords": "anomaly detection;traffic data;graph neural network", "primary_area": "", "supplementary_material": "/attachment/1197f34f5d1845522135961f55eb62634bd25861.pdf", "author": "Austin Coursey;JUNYI JI;Marcos Quinones Grueiro;William Barbour;Yuhang Zhang;Tyler Derr;Gautam Biswas;Daniel Work", "authorids": "~Austin_Coursey1;~JUNYI_JI1;~Marcos_Quinones_Grueiro1;~William_Barbour1;~Yuhang_Zhang11;~Tyler_Derr1;~Gautam_Biswas2;~Daniel_Work1", "gender": "M;M;M;M;M;;M;", "homepage": ";https://www.jijunyi.com/;;https://barbourww.github.io;;http://www.tylerderr.com;https://my.vanderbilt.edu/gautambiswas/;https://engineering.vanderbilt.edu/bio/daniel-work", "dblp": ";338/9388;;;;207/7927.html;15/6242;", "google_scholar": "vLlw5KcAAAAJ;VJz-xKsAAAAJ;D6ENug8AAAAJ;rFHdBcQAAAAJ;nVhLFiQAAAAJ;et6IhFcAAAAJ;-m5wrTkAAAAJ;", "orcid": "0000-0003-1774-6442;0000-0003-1085-1573;;0000-0002-1368-3192;0000-0002-8408-2095;;0000-0002-2752-3878;", "linkedin": "austin-coursey/;;;william-barbour-887431165/;;tylersnetwork/;gautam-biswas-66a39a2/;", "or_profile": "~Austin_Coursey1;~JUNYI_JI1;~Marcos_Quinones_Grueiro1;~William_Barbour1;~Yuhang_Zhang11;~Tyler_Derr1;~Gautam_Biswas2;~Daniel_Work1", "aff": "Vanderbilt University;Vanderbilt University;Vanderbilt University;Vanderbilt University;Vanderbilt University;Vanderbilt University;Vanderbilt University;", "aff_domain": "vanderbilt.edu;vanderbilt.edu;vanderbilt.edu;vanderbilt.edu;vanderbilt.edu;vanderbilt.edu;vanderbilt.edu;", "position": "PhD student;PhD student;Researcher;Senior Research Scientist;PhD student;Assistant Professor;Full Professor;", "bibtex": "@inproceedings{\ncoursey2024ftaed,\ntitle={{FT}-{AED}: Benchmark Dataset for Early Freeway Traffic Anomalous Event Detection},\nauthor={Austin Coursey and JUNYI JI and Marcos Quinones Grueiro and William Barbour and Yuhang Zhang and Tyler Derr and Gautam Biswas and Daniel Work},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=hqvWcQ3uzF}\n}", "github": "", "reviewers": "z8qJ;LEDx;CYVt;DJ9r;DxHH;9FVR", "pdf_size": 16121754, "rating": "6;6;7;7;7;7", "confidence": "4;2;4;5;3;4", "wc_summary_and_contributions": "71;74;118;42;232;159", "wc_strengths": "94;94;171;6;93;80", "wc_improvement": "146;42;120;29;127;97", "wc_limitations": "1;25;15;34;55;6", "wc_correctness": "1;5;10;23;35;1", "wc_clarity": "1;1;11;13;23;5", "wc_relation_to_prior_work": "1;1;25;23;75;8", "wc_documentation": "1;7;27;14;31;1", "wc_additional_feedback": "1;1;1;1;1;1", "wc_review": "317;250;498;185;672;358", "wc_reply_reviewers": "0;29;54;0;11;21", "wc_reply_authors": "0;0;0;0;0;0", "reply_reviewers": "0;1;1;0;1;1", "reply_authors": "1;1;1;1;1;1", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "wc_summary_and_contributions_avg": [ 116.0, 63.99739578034927 ], "wc_strengths_avg": [ 89.66666666666667, 47.8841193809481 ], "wc_improvement_avg": [ 93.5, 43.59950305527193 ], "wc_limitations_avg": [ 22.666666666666668, 18.190351532856337 ], "wc_correctness_avg": [ 12.5, 12.539936203984453 ], "wc_clarity_avg": [ 9.0, 7.745966692414834 ], "wc_relation_to_prior_work_avg": [ 22.166666666666668, 25.48474489223351 ], "wc_documentation_avg": [ 13.5, 11.856784274554942 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 380.0, 162.56588407986058 ], "wc_reply_reviewers_avg": [ 19.166666666666668, 18.77424358588708 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14114897188899830284&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "vanderbilt.edu;vanderbilt.edu;vanderbilt.edu;vanderbilt.edu;vanderbilt.edu;vanderbilt.edu;vanderbilt.edu;", "author_num": 8, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "Vanderbilt University", "aff_unique_dep": "", "aff_unique_url": "https://www.vanderbilt.edu", "aff_unique_abbr": "Vanderbilt", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Constant Acceleration Flow", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94039", "id": "hsgNvC5YM9", "proceeding": "", "pdf": "https://openreview.net/pdf?id=hsgNvC5YM9", "openreview": "https://openreview.net/forum?id=hsgNvC5YM9", "poster": "", "project": "", "author_site": "Dogyun Park, Sojin Lee, Sihyeon Kim, Taehoon Lee, Youngjoon Hong, Hyunwoo Kim", "tldr": "", "abstract": "Rectified flow and reflow procedures have significantly advanced fast generation by progressively straightening ordinary differential equation (ODE) flows under the assumption that image and noise pairs, known as coupling, can be approximated by straight trajectories with constant velocity. However, we observe that the constant velocity modeling and reflow procedures have limitations in accurately learning to couple with flow crossing, leading to suboptimal few-step generation. To overcome the limitations, we introduce the Constant Acceleration Flow (CAF), a novel framework based on a simple constant acceleration equation. Additionally, we propose two techniques to improve estimation accuracy: initial velocity conditioning for the acceleration model and a reflow process for the initial velocity. Our comparative studies show that CAF not only outperforms rectified flow with reflow procedures in terms of speed and accuracy but also demonstrates substantial improvements in preserving coupling for fast generation.", "keywords": "Generative model;Rectified flow;Fast generation", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Dogyun Park;Sojin Lee;Sihyeon Kim;Taehoon Lee;Youngjoon Hong;Hyunwoo J. Kim", "authorids": "~Dogyun_Park2;~Sojin_Lee1;~Sihyeon_Kim1;~Taehoon_Lee3;~Youngjoon_Hong1;~Hyunwoo_J._Kim3", "gender": "M;F;F;M;M;M", "homepage": "https://dogyunpark.github.io/;;;https://github.com/LTH9898;https://www.youngjoonhong.com;https://hyunwoojkim.com/publications", "dblp": "323/9575;342/6155;304/2362;;119/1276;150/4259", "google_scholar": "Cgc-2roAAAAJ;roblxQYAAAAJ;;VXyBknIAAAAJ;;https://scholar.google.co.kr/citations?user=LfBoJt8AAAAJ", "orcid": ";0000-0001-9198-8437;;;;0000-0002-2181-9264", "linkedin": "dogyunpark/;sojin-lee-4b5138262/;sihyeon-kim-a91aaa212/;;;", "or_profile": "~Dogyun_Park2;~Sojin_Lee1;~Sihyeon_Kim1;~Taehoon_Lee3;~Youngjoon_Hong1;~Hyunwoo_Kim1", "aff": "Korea University;Korea University;Korea University;Korea University;KAIST;Korea University", "aff_domain": "korea.ac.kr;korea.ac.kr;korea.ac.kr;korea.ac.kr;kaist.ac.kr;korea.ac.kr", "position": "PhD student;PhD student;PhD student;Undergrad student;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\npark2024constant,\ntitle={Constant Acceleration Flow},\nauthor={Dogyun Park and Sojin Lee and Sihyeon Kim and Taehoon Lee and Youngjoon Hong and Hyunwoo J. Kim},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=hsgNvC5YM9}\n}", "github": "", "reviewers": "rYLJ;oUYA;GaYE", "pdf_size": 13089778, "rating": "4;6;6", "confidence": "4;3;5", "soundness": "2;3;3", "novelty": "3;3;3", "presentation": "2;3;3", "wc_summary": "89;208;136", "wc_strengths": "49;122;97", "wc_weaknesses": "445;300;136", "wc_questions": "65;82;18", "wc_limitations": "34;24;6", "wc_review": "682;736;393", "wc_reply_reviewers": "216;25;27", "wc_reply_authors": "691;35;34", "reply_reviewers": "2;1;1", "reply_authors": "3;2;2", "rating_avg": [ 5.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 144.33333333333334, 48.93760199364993 ], "wc_strengths_avg": [ 89.33333333333333, 30.291179500896884 ], "wc_weaknesses_avg": [ 293.6666666666667, 126.22818843489578 ], "wc_questions_avg": [ 55.0, 27.067816067549053 ], "wc_limitations_avg": [ 21.333333333333332, 11.585431464655178 ], "wc_review_avg": [ 603.6666666666666, 150.5862617313486 ], "wc_reply_reviewers_avg": [ 89.33333333333333, 89.5705804876182 ], "wc_reply_authors_avg": [ 253.33333333333334, 309.47733717062744 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2438251271800596258&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "korea.ac.kr;korea.ac.kr;korea.ac.kr;korea.ac.kr;kaist.ac.kr;korea.ac.kr", "author_num": 6, "aff_unique_index": "0;0;0;0;1;0", "aff_unique_norm": "Korea University;Korea Advanced Institute of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.korea.ac.kr;https://www.kaist.ac.kr", "aff_unique_abbr": "KU;KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Unlocking the Potential of Global Human Expertise", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94038", "id": "hw76X5uWrc", "proceeding": "", "pdf": "https://openreview.net/pdf?id=hw76X5uWrc", "openreview": "https://openreview.net/forum?id=hw76X5uWrc", "poster": "/media/PosterPDFs/NeurIPS%202024/94038.png?t=1733524439.604349", "project": "", "author_site": "Elliot Meyerson, Olivier Francon, Darren Sargent, Babak Hodjat, Risto Miikkulainen", "tldr": "", "abstract": "Solving societal problems on a global scale requires the collection and processing of ideas and methods from diverse sets of international experts. As the number and diversity of human experts increase, so does the likelihood that elements in this collective knowledge can be combined and refined to discover novel and better solutions. However, it is difficult to identify, combine, and refine complementary information in an increasingly large and diverse knowledge base. This paper argues that artificial intelligence (AI) can play a crucial role in this process. An evolutionary AI framework, termed RHEA, fills this role by distilling knowledge from diverse models created by human experts into equivalent neural networks, which are then recombined and refined in a population-based search. The framework was implemented in a formal synthetic domain, demonstrating that it is transparent and systematic. It was then applied to the results of the XPRIZE Pandemic Response Challenge, in which over 100 teams of experts across 23 countries submitted models based on diverse methodologies to predict COVID-19 cases and suggest non-pharmaceutical intervention policies for 235 nations, states, and regions across the globe. Building upon this expert knowledge, by recombining and refining the 169 resulting policy suggestion models, RHEA discovered a broader and more effective set of policies than either AI or human experts alone, as evaluated based on real-world data. The results thus suggest that AI can play a crucial role in realizing the potential of human expertise in global problem-solving.", "keywords": "Human-AI collaboration;Evolution;Distillation;Neural Networks", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "/attachment/2dda65f86322c29b664410b71f6de83831e9cef9.zip", "author": "Elliot Meyerson;Olivier Francon;Darren Sargent;Babak Hodjat;Risto Miikkulainen", "authorids": "~Elliot_Meyerson1;~Olivier_Francon1;~Darren_Sargent1;~Babak_Hodjat1;~Risto_Miikkulainen1", "gender": "M;;;M;", "homepage": ";;;;http://www.cs.utexas.edu/users/risto", "dblp": "149/2699;;;;m/RistoMiikkulainen", "google_scholar": "RT_LBQ4AAAAJ;1Wg6HFYAAAAJ;;letSloQAAAAJ;", "orcid": ";0009-0009-0006-4913;;0000-0002-4547-4731;", "linkedin": "ekmeyerson/;olivier-francon;;babakhodjat/;", "or_profile": "~Elliot_Meyerson1;~Olivier_Francon1;~Darren_Sargent1;~Babak_Hodjat1;~Risto_Miikkulainen1", "aff": "Cognizant AI Labs;Cognizant;;Cognizant;The University of Texas, Austin", "aff_domain": "cognizant.com;cognizant.com;;cognizant.com;cs.utexas.edu", "position": "Research Scientist;Researcher;;Researcher;Full Professor", "bibtex": "@inproceedings{\nmeyerson2024unlocking,\ntitle={Unlocking the Potential of Global Human Expertise},\nauthor={Elliot Meyerson and Olivier Francon and Darren Sargent and Babak Hodjat and Risto Miikkulainen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=hw76X5uWrc}\n}", "github": "", "reviewers": "CmcD;R6Y1;fdTu", "pdf_size": 3302105, "rating": "5;5;6", "confidence": "2;3;2", "soundness": "3;3;3", "novelty": "2;2;3", "presentation": "3;3;3", "wc_summary": "53;201;50", "wc_strengths": "37;255;44", "wc_weaknesses": "43;393;2", "wc_questions": "84;287;70", "wc_limitations": "16;134;1", "wc_review": "233;1270;167", "wc_reply_reviewers": "20;0;31", "wc_reply_authors": "31;0;39", "reply_reviewers": "1;0;2", "reply_authors": "2;1;2", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 2.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 101.33333333333333, 70.48561713017928 ], "wc_strengths_avg": [ 112.0, 101.15664420425712 ], "wc_weaknesses_avg": [ 146.0, 175.45559742187385 ], "wc_questions_avg": [ 147.0, 99.15980368408697 ], "wc_limitations_avg": [ 50.333333333333336, 59.47735329985317 ], "wc_review_avg": [ 556.6666666666666, 505.1219874666141 ], "wc_reply_reviewers_avg": [ 17.0, 12.832251036613439 ], "wc_reply_authors_avg": [ 23.333333333333332, 16.81930108205715 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:3D8MmHwWitMJ:scholar.google.com/&scioq=Unlocking+the+Potential+of+Global+Human+Expertise&hl=en&as_sdt=0,44", "gs_version_total": 4, "email": "cognizant.com;cognizant.com;;cognizant.com;cs.utexas.edu", "author_num": 5, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Cognizant;University of Texas at Austin", "aff_unique_dep": "AI Labs;", "aff_unique_url": "https://www.cognizant.com;https://www.utexas.edu", "aff_unique_abbr": "Cognizant;UT Austin", "aff_campus_unique_index": "1", "aff_campus_unique": ";Austin", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Visual Riddles: a Commonsense and World Knowledge Challenge for Large Vision and Language Models", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97561", "id": "hwbRjslR5N", "proceeding": "", "pdf": "https://openreview.net/pdf?id=hwbRjslR5N", "openreview": "https://openreview.net/forum?id=hwbRjslR5N", "poster": "/media/PosterPDFs/NeurIPS%202024/97561.png?t=1731367421.9679456", "project": "", "author_site": "Nitzan Bitton Guetta, Aviv Slobodkin, Aviya Maimon, Eliya Habba, Royi Rassin, Yonatan Bitton, Idan Szpektor, Amir Globerson, Yuval Elovici", "tldr": "", "abstract": "Imagine observing someone scratching their arm; to understand why, additional context would be necessary. However, spotting a mosquito nearby would immediately offer a likely explanation for the person\u2019s discomfort, thereby alleviating the need for further information. This example illustrates how subtle visual cues can challenge our cognitive skills and demonstrates the complexity of interpreting visual scenarios. To study these skills, we present Visual Riddles, a benchmark aimed to test vision and language models on visual riddles requiring commonsense and world knowledge. The benchmark comprises 400 visual riddles, each featuring a unique image created by a variety of text-to-image models, question, ground-truth answer, textual hint, and attribution. Human evaluation reveals that existing models lag significantly behind human performance, which is at 82% accuracy, with Gemini-Pro-1.5 leading with 40% accuracy. Our benchmark comes with automatic evaluation tasks to make assessment scalable. These findings underscore the potential of Visual Riddles as a valuable resource for enhancing vision and language models\u2019 capabilities in interpreting complex visual scenarios. Data, code, and leaderboard are available at https://visual-riddles.github.io/.", "keywords": "Visual Riddles Challenge;Vision and Language Benchmark;Commonsense;World knowledge", "primary_area": "", "supplementary_material": "/attachment/ec39c08810d9d9b6080c7ababa6308107273aedf.zip", "author": "Nitzan Bitton Guetta;Aviv Slobodkin;Aviya Maimon;Eliya Habba;Royi Rassin;Yonatan Bitton;Idan Szpektor;Amir Globerson;Yuval Elovici", "authorids": "~Nitzan_Bitton_Guetta1;~Aviv_Slobodkin2;~Aviya_Maimon1;~Eliya_Habba1;~Royi_Rassin1;~Yonatan_Bitton1;~Idan_Szpektor1;~Amir_Globerson1;~Yuval_Elovici1", "gender": "F;M;F;M;M;M;;M;M", "homepage": ";https://lovodkin93.github.io/;;;https://royi-rassin.netlify.app/;https://yonatanbitton.github.io/;;http://www.cs.tau.ac.il/~gamir/;https://cyber.bgu.ac.il/yuval/", "dblp": "301/9349;290/2100.html;;;331/5719;277/7042;15/6513;08/4162.html;38/4086", "google_scholar": ";oAy77cgAAAAJ;;CbtVxHMAAAAJ;_6g4hxsAAAAJ;P9Fpf4sAAAAJ;XI2CP68AAAAJ;https://scholar.google.com.tw/citations?user=5JserkUAAAAJ;https://scholar.google.co.il/citations?user=ruZDm9QAAAAJ", "orcid": ";;;;;;;;0000-0002-9641-128X", "linkedin": "https://www.linkedin.com/mwlite/in/nitzan-guetta-34a0021b5;aviv-slobodkin-73926515a/;aviya-maimon;;royi-rassin-4b8085163/;yonatanbitton/;;;yuval-elovici-0baa4a4/?originalSubdomain=il", "or_profile": "~Nitzan_Bitton_Guetta1;~Aviv_Slobodkin2;~Aviya_Maimon1;~Eliya_Habba1;~Royi_Rassin1;~Yonatan_Bitton1;~Idan_Szpektor1;~Amir_Globerson1;~Yuval_Elovici1", "aff": "Ben-Gurion University of the Negev;Bar-Ilan University;Bar-Ilan University;Hebrew University, Hebrew University of Jerusalem;Bar-Ilan University;Google;Google;Tel Aviv University;Ben Gurion University of the Negev, Technion", "aff_domain": "bgu.ac.il;biu.ac.il;biu.ac.il;cs.huji.ac.il;biu.ac.il;google.com;google.com;tau.ac.il;bgu.ac.il", "position": "PhD student;PhD student;PhD student;PhD student;PhD student;Research Scientist;Researcher;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nguetta2024visual,\ntitle={Visual Riddles: a Commonsense and World Knowledge Challenge for Large Vision and Language Models},\nauthor={Nitzan Bitton Guetta and Aviv Slobodkin and Aviya Maimon and Eliya Habba and Royi Rassin and Yonatan Bitton and Idan Szpektor and Amir Globerson and Yuval Elovici},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=hwbRjslR5N}\n}", "github": "", "reviewers": "5gUA;g9pM;WVhG;zua6", "pdf_size": 21698724, "rating": "7;7;7;9", "confidence": "4;3;3;3", "wc_summary_and_contributions": "51;71;99;63", "wc_strengths": "28;28;89;44", "wc_improvement": "2;79;443;9", "wc_limitations": "23;27;111;4", "wc_correctness": "2;1;146;11", "wc_clarity": "9;2;29;6", "wc_relation_to_prior_work": "6;2;110;32", "wc_documentation": "1;19;13;8", "wc_additional_feedback": "1;1;1;1", "wc_review": "123;230;1041;178", "wc_reply_reviewers": "13;0;73;0", "wc_reply_authors": "48;48;80;0", "reply_reviewers": "1;0;2;0", "reply_authors": "2;2;3;1", "rating_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 71.0, 17.663521732655695 ], "wc_strengths_avg": [ 47.25, 24.973736204260668 ], "wc_improvement_avg": [ 133.25, 181.35100633853676 ], "wc_limitations_avg": [ 41.25, 41.19693556564614 ], "wc_correctness_avg": [ 40.0, 61.32291578194892 ], "wc_clarity_avg": [ 11.5, 10.404326023342406 ], "wc_relation_to_prior_work_avg": [ 37.5, 43.41370751271999 ], "wc_documentation_avg": [ 10.25, 6.609652033201143 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 393.0, 376.0312487014876 ], "wc_reply_reviewers_avg": [ 21.5, 30.20347662107791 ], "wc_reply_authors_avg": [ 44.0, 28.5657137141714 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10880605018839294550&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "bgu.ac.il;biu.ac.il;biu.ac.il;cs.huji.ac.il;biu.ac.il;google.com;google.com;tau.ac.il;bgu.ac.il", "author_num": 9, "aff_unique_index": "0;1;1;2;1;3;3;4;5", "aff_unique_norm": "Ben-Gurion University of the Negev;Bar-Ilan University;Hebrew University of Jerusalem;Google;Tel Aviv University;Ben Gurion University of the Negev", "aff_unique_dep": ";;;Google;;", "aff_unique_url": "https://www.bgu.ac.il;https://www.biu.ac.il;https://www.huji.ac.il;https://www.google.com;https://www.tau.ac.il;https://www.bgu.ac.il", "aff_unique_abbr": "BGU;BIU;HUJI;Google;TAU;BGU", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;0;1;1;0;0", "aff_country_unique": "Israel;United States" }, { "title": "CuMo: Scaling Multimodal LLM with Co-Upcycled Mixture-of-Experts", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94037", "id": "hwuUBsMlBf", "proceeding": "", "pdf": "https://openreview.net/pdf?id=hwuUBsMlBf", "openreview": "https://openreview.net/forum?id=hwuUBsMlBf", "poster": "/media/PosterPDFs/NeurIPS%202024/94037.png?t=1729466047.79761", "project": "", "author_site": "Jiachen Li, Xinyao Wang, Sijie Zhu, Chia-Wen Kuo, Lu XU, Fan Chen, Jitesh Jain, Humphrey Shi, Longyin Wen", "tldr": "", "abstract": "Recent advancements in Multimodal Large Language Models (LLMs) have focused primarily on scaling by increasing text-image pair data and enhancing LLMs to improve performance on multimodal tasks. However, these scaling approaches are computationally expensive and overlook the significance of efficiently improving model capabilities from the vision side. \nInspired by the successful applications of Mixture-of-Experts (MoE) in LLMs, which improves model scalability during training while keeping inference costs similar to those of smaller models, we propose CuMo, which incorporates Co-upcycled Top-K sparsely-gated Mixture-of-experts blocks into both the vision encoder and the MLP connector, thereby enhancing the multimodal LLMs with neglectable additional activated parameters during inference.\nCuMo first pre-trains the MLP blocks and then initializes each expert in the MoE block from the pre-trained MLP block during the visual instruction tuning stage, with auxiliary losses to ensure a balanced loading of experts.\nCuMo outperforms state-of-the-art multimodal LLMs across various VQA and visual-instruction-following benchmarks within each model size group, all while training exclusively on open-sourced datasets.", "keywords": "Mixture-of-Experts;Multimodal LLM", "primary_area": "machine_vision", "supplementary_material": "", "author": "Jiachen Li;Xinyao Wang;Sijie Zhu;Chia-Wen Kuo;Lu XU;Fan Chen;Jitesh Jain;Humphrey Shi;Longyin Wen", "authorids": "~Jiachen_Li3;~Xinyao_Wang2;~Sijie_Zhu1;~Chia-Wen_Kuo1;~Lu_XU1;~Fan_Chen5;~Jitesh_Jain1;~Humphrey_Shi1;~Longyin_Wen1", "gender": "M;M;;M;M;M;M;M;M", "homepage": "https://chrisjuniorli.github.io/;;https://jeff-zilence.github.io/;https://sites.google.com/view/chiawen-kuo/home;;;https://praeclarumjj3.github.io/;;https://www.humphreyshi.com", "dblp": "137/8316-3;;216/1192;;;38/4539-2;92/2515;119/1468;176/5516", "google_scholar": "mTHic3EAAAAJ;qnA2TdEAAAAJ;https://scholar.google.com/citations?hl=en;iip65VkAAAAJ;gA9q5ugAAAAJ;yNgP-0oAAAAJ;https://scholar.google.com/citations?view_op=list_works;5HDWtHsAAAAJ;WBvt5A8AAAAJ", "orcid": ";;;;;;;0000-0001-5525-492X;0000-0002-2922-5663", "linkedin": "jiachen-li-59a64a15b/;;;;lu-xu-34b200160/;fan-chen-42732438;jitesh-jain-1451b9192/;longyin-wen-16934689/;humphreyshi", "or_profile": "~Jiachen_Li3;~Xinyao_Wang2;~Sijie_Zhu1;~Chia-Wen_Kuo1;~Lu_XU1;~Fan_Chen5;~Jitesh_Jain1;~Longyin_Wen1;~Honghui_Shi1", "aff": "University of Illinois, Urbana-Champaign;ByteDance Inc.;ByteDance;ByteDance Inc.;ByteDance Inc.;Bytedance;Georgia Institute of Technology;Bytedance Inc.;University of Illinois, Urbana Champaign", "aff_domain": "uiuc.edu;bytedance.com;bytedance.com;bytedance.com;bytedance.com;bytedance.com;gatech.edu;bytedance.com;illinois.edu", "position": "PhD student;Researcher;Researcher;Researcher;Researcher;Researcher;PhD student;Research Manager;Adjunct Assistant Professor", "bibtex": "@inproceedings{\nli2024cumo,\ntitle={CuMo: Scaling Multimodal {LLM} with Co-Upcycled Mixture-of-Experts},\nauthor={Jiachen Li and Xinyao Wang and Sijie Zhu and Chia-Wen Kuo and Lu XU and Fan Chen and Jitesh Jain and Humphrey Shi and Longyin Wen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=hwuUBsMlBf}\n}", "github": "", "reviewers": "rTFY;5mox;NGUV", "pdf_size": 1131660, "rating": "5;6;7", "confidence": "4;4;4", "soundness": "3;2;3", "novelty": "3;2;3", "presentation": "3;3;3", "wc_summary": "105;63;165", "wc_strengths": "77;42;286", "wc_weaknesses": "221;158;182", "wc_questions": "2;2;270", "wc_limitations": "2;24;104", "wc_review": "407;289;1007", "wc_reply_reviewers": "0;21;47", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 111.0, 41.8568990729127 ], "wc_strengths_avg": [ 135.0, 107.72495842035246 ], "wc_weaknesses_avg": [ 187.0, 25.96150997149434 ], "wc_questions_avg": [ 91.33333333333333, 126.33641157199649 ], "wc_limitations_avg": [ 43.333333333333336, 43.827946437049604 ], "wc_review_avg": [ 567.6666666666666, 314.3685028045201 ], "wc_reply_reviewers_avg": [ 22.666666666666668, 19.22382780706162 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15996204880838085035&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "uiuc.edu;bytedance.com;bytedance.com;bytedance.com;bytedance.com;bytedance.com;gatech.edu;bytedance.com;illinois.edu", "author_num": 9, "aff_unique_index": "0;1;1;1;1;1;2;3;4", "aff_unique_norm": "University of Illinois;ByteDance;Georgia Institute of Technology;Bytedance Inc.;University of Illinois Urbana-Champaign", "aff_unique_dep": ";;;;", "aff_unique_url": "https://illinois.edu;https://www.bytedance.com;https://www.gatech.edu;https://www.bytedance.com;https://illinois.edu", "aff_unique_abbr": "UIUC;ByteDance;Georgia Tech;Bytedance;UIUC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Urbana-Champaign;", "aff_country_unique_index": "0;1;1;1;1;1;0;1;0", "aff_country_unique": "United States;China" }, { "title": "PCP-MAE: Learning to Predict Centers for Point Masked Autoencoders", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94036", "id": "i1xjK5a0X8", "proceeding": "", "pdf": "https://openreview.net/pdf?id=i1xjK5a0X8", "openreview": "https://openreview.net/forum?id=i1xjK5a0X8", "poster": "/media/PosterPDFs/NeurIPS%202024/94036.png?t=1731332156.2684648", "project": "", "author_site": "Xiangdong Zhang, Shaofeng Zhang, Junchi Yan", "tldr": "", "abstract": "Masked autoencoder has been widely explored in point cloud self-supervised learning, whereby the point cloud is generally divided into visible and masked parts. These methods typically include an encoder accepting visible patches (normalized) and corresponding patch centers (position) as input, with the decoder accepting the output of the encoder and the centers (position) of the masked parts to reconstruct each point in the masked patches. Then, the pre-trained encoders are used for downstream tasks. In this paper, we show a motivating empirical result that when directly feeding the centers of masked patches to the decoder without information from the encoder, it still reconstructs well. In other words, the centers of patches are important and the reconstruction objective does not necessarily rely on representations of the encoder, thus preventing the encoder from learning semantic representations. Based on this key observation, we propose a simple yet effective method, $i.e.$, learning to \\textbf{P}redict \\textbf{C}enters for \\textbf{P}oint \\textbf{M}asked \\textbf{A}uto\\textbf{E}ncoders (\\textbf{PCP-MAE}) which guides the model to learn to predict the significant centers and use the predicted centers to replace the directly provided centers. Specifically, we propose a Predicting Center Module (PCM) that shares parameters with the original encoder with extra cross-attention to predict centers. Our method is of high pre-training efficiency compared to other alternatives and achieves great improvement over Point-MAE, particularly surpassing it by \\textbf{5.50\\% on OBJ-BG, 6.03\\% on OBJ-ONLY, and 5.17\\% on PB-T50-RS} for 3D object classification on the ScanObjectNN dataset. The code is available at \\url{https://github.com/aHapBean/PCP-MAE}.", "keywords": "Self-supervised Learning;Point Cloud;Transfer Learning;Masked Autoencoders", "primary_area": "machine_vision", "supplementary_material": "", "author": "Xiangdong Zhang;Shaofeng Zhang;Junchi Yan", "authorids": "~Xiangdong_Zhang3;~Shaofeng_Zhang1;~Junchi_Yan2", "gender": "M;M;M", "homepage": "https://github.com/aHapBean;https://sherrylone.github.io;http://thinklab.sjtu.edu.cn/", "dblp": ";132/2540;60/7949.html", "google_scholar": ";VoVVJIgAAAAJ;ga230VoAAAAJ", "orcid": ";;0000-0001-9639-7679", "linkedin": ";;", "or_profile": "~Xiangdong_Zhang3;~Shaofeng_Zhang1;~Junchi_Yan1", "aff": "SUN YAT-SEN UNIVERSITY;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "mail2.sysu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "position": "Intern;PhD student;Full Professor", "bibtex": "@inproceedings{\nzhang2024pcpmae,\ntitle={{PCP}-{MAE}: Learning to Predict Centers for Point Masked Autoencoders},\nauthor={Xiangdong Zhang and Shaofeng Zhang and Junchi Yan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=i1xjK5a0X8}\n}", "github": "", "reviewers": "GmbZ;DT8p;MoXR;rD3v;nJYR", "pdf_size": 1863464, "rating": "4;4;5;6;6", "confidence": "5;5;4;4;3", "soundness": "3;3;3;3;3", "novelty": "2;3;3;4;2", "presentation": "3;4;2;3;2", "wc_summary": "66;39;117;95;97", "wc_strengths": "58;39;78;124;167", "wc_weaknesses": "459;132;237;31;102", "wc_questions": "63;5;11;12;40", "wc_limitations": "37;1;8;1;1", "wc_review": "683;216;451;263;407", "wc_reply_reviewers": "578;0;210;0;0", "wc_reply_authors": "2199;344;361;0;0", "reply_reviewers": "3;0;2;0;0", "reply_authors": "7;3;3;1;1", "rating_avg": [ 5.0, 0.8944271909999159 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 82.8, 27.27929617860402 ], "wc_strengths_avg": [ 93.2, 46.481824404814404 ], "wc_weaknesses_avg": [ 192.2, 148.9528784548993 ], "wc_questions_avg": [ 26.2, 22.03088740836374 ], "wc_limitations_avg": [ 9.6, 13.965672200076872 ], "wc_review_avg": [ 404.0, 164.48951334355635 ], "wc_reply_reviewers_avg": [ 157.6, 225.38642372600884 ], "wc_reply_authors_avg": [ 580.8, 824.3318263903196 ], "reply_reviewers_avg": [ 1.0, 1.2649110640673518 ], "reply_authors_avg": [ 3.0, 2.1908902300206643 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8964214570007952, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5437604445288897863&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "mail2.sysu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Sun Yat-sen University;Shanghai Jiao Tong University", "aff_unique_dep": ";", "aff_unique_url": "http://www.sysu.edu.cn;https://www.sjtu.edu.cn", "aff_unique_abbr": "SYSU;SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Belief-State Query Policies for User-Aligned POMDPs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94035", "id": "i2oacRDF5L", "proceeding": "", "pdf": "https://openreview.net/pdf?id=i2oacRDF5L", "openreview": "https://openreview.net/forum?id=i2oacRDF5L", "poster": "/media/PosterPDFs/NeurIPS%202024/94035.png?t=1733781988.356423", "project": "", "author_site": "Daniel Bramblett, Siddharth Srivastava", "tldr": "", "abstract": "Planning in real-world settings often entails addressing partial observability while aligning with users' requirements. We present a novel framework for expressing users' constraints and preferences about agent behavior in a partially observable setting using parameterized belief-state query (BSQ) policies in the setting of goal-oriented partially observable Markov decision processes (gPOMDPs). We present the first formal analysis of such constraints and prove that while the expected cost function of a parameterized BSQ policy w.r.t its parameters is not convex, it is piecewise constant and yields an implicit discrete parameter search space that is finite for finite horizons. This theoretical result leads to novel algorithms that optimize gPOMDP agent behavior with guaranteed user alignment. Analysis proves that our algorithms converge to the optimal user-aligned behavior in the limit. Empirical results show that parameterized BSQ policies provide a computationally feasible approach for user-aligned planning in partially observable settings.", "keywords": "POMDPs;sequential decision making;user-preferences in POMDPs", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/a3187a54e56d243c701eb69ea3e569a9ce918909.zip", "author": "Daniel Richard Bramblett;Siddharth Srivastava", "authorids": "~Daniel_Richard_Bramblett1;~Siddharth_Srivastava2", "gender": "M;", "homepage": ";", "dblp": ";", "google_scholar": ";", "orcid": ";", "linkedin": "daniel-bramblett-060973186/;", "or_profile": "~Daniel_Richard_Bramblett1;~Siddharth_Srivastava2", "aff": "Arizona State University;", "aff_domain": "asu.edu;", "position": "PhD student;", "bibtex": "@inproceedings{\nbramblett2024beliefstate,\ntitle={Belief-State Query Policies for User-Aligned Planning under Partial Observability},\nauthor={Daniel Richard Bramblett and Siddharth Srivastava},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=i2oacRDF5L}\n}", "github": "", "reviewers": "Mo3a;aWyU;NodM;isnx", "pdf_size": 1666568, "rating": "5;6;6;7", "confidence": "2;3;4;3", "soundness": "3;3;3;4", "novelty": "3;2;3;3", "presentation": "2;2;2;2", "wc_summary": "23;205;224;76", "wc_strengths": "43;103;177;23", "wc_weaknesses": "70;371;995;332", "wc_questions": "1;114;313;1", "wc_limitations": "1;14;26;25", "wc_review": "138;807;1735;457", "wc_reply_reviewers": "16;41;172;10", "wc_reply_authors": "12;34;117;18", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 132.0, 84.86754385511578 ], "wc_strengths_avg": [ 86.5, 59.9729105513481 ], "wc_weaknesses_avg": [ 442.0, 339.6078620998048 ], "wc_questions_avg": [ 107.25, 127.43307066848857 ], "wc_limitations_avg": [ 16.5, 10.111874208078342 ], "wc_review_avg": [ 784.25, 597.740485076927 ], "wc_reply_reviewers_avg": [ 59.75, 65.84214076106578 ], "wc_reply_authors_avg": [ 45.25, 42.19819308927812 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:81d59TJq0HIJ:scholar.google.com/&scioq=Belief-State+Query+Policies+for+User-Aligned+POMDPs&hl=en&as_sdt=0,14", "gs_version_total": 2, "email": "asu.edu;", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Arizona State University", "aff_unique_dep": "", "aff_unique_url": "https://www.asu.edu", "aff_unique_abbr": "ASU", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Set-based Neural Network Encoding Without Weight Tying", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94034", "id": "i3me9bCSCy", "proceeding": "", "pdf": "https://openreview.net/pdf?id=i3me9bCSCy", "openreview": "https://openreview.net/forum?id=i3me9bCSCy", "poster": "/media/PosterPDFs/NeurIPS%202024/94034.png?t=1731570581.2103937", "project": "", "author_site": "Bruno Andreis, Bedionita Soro, Philip Torr, Sung Ju Hwang", "tldr": "", "abstract": "We propose a neural network weight encoding method for network property prediction that utilizes set-to-set and set-to-vector functions\nto efficiently encode neural network parameters. Our approach is capable of encoding neural networks in a model zoo of mixed architecture and different parameter sizes as opposed to previous approaches that require custom encoding models for different architectures. Furthermore, our \\textbf{S}et-based \\textbf{N}eural network \\textbf{E}ncoder (SNE) takes into consideration the hierarchical computational structure of neural networks. To respect symmetries inherent in network weight space, we utilize Logit Invariance to learn the required minimal invariance properties. Additionally, we introduce a \\textit{pad-chunk-encode} pipeline to efficiently encode neural network layers that is adjustable to computational and memory constraints. We also introduce two new tasks for neural network property prediction: cross-dataset and cross-architecture. In cross-dataset property prediction, we evaluate how well property predictors generalize across model zoos trained on different datasets but of the same architecture. In cross-architecture property prediction, we evaluate how well property predictors transfer to model zoos of different architecture not seen during training. We show that SNE outperforms the relevant baselines on standard benchmarks.", "keywords": "Neural Network Encoding", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Bruno Andreis;Bedionita Soro;Philip Torr;Sung Ju Hwang", "authorids": "~Bruno_Andreis1;~Bedionita_Soro1;~Philip_Torr1;~Sung_Ju_Hwang1", "gender": "M;;;", "homepage": "https://andreisbruno.github.io/;;http://www.robots.ox.ac.uk/~tvg/;", "dblp": "225/0404;;;", "google_scholar": "WzQ_v4IAAAAJ;;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Bruno_Andreis1;~Bedionita_Soro1;~Philip_Torr1;~Sung_Ju_Hwang1", "aff": "Korea Advanced Institute of Science & Technology;;University of Oxford;", "aff_domain": "kaist.ac.kr;;ox.ac.uk;", "position": "PhD student;;Full Professor;", "bibtex": "@inproceedings{\nandreis2024setbased,\ntitle={Set-based Neural Network Encoding Without Weight Tying},\nauthor={Bruno Andreis and Bedionita Soro and Philip Torr and Sung Ju Hwang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=i3me9bCSCy}\n}", "github": "", "reviewers": "bv1s;uB2K;nYWS;EgRw", "pdf_size": 3766552, "rating": "4;6;6;7", "confidence": "4;4;3;4", "soundness": "2;4;3;3", "novelty": "2;3;3;3", "presentation": "2;2;3;2", "wc_summary": "100;101;103;65", "wc_strengths": "52;84;72;23", "wc_weaknesses": "101;188;157;41", "wc_questions": "49;187;57;44", "wc_limitations": "10;34;13;6", "wc_review": "312;594;402;179", "wc_reply_reviewers": "298;64;47;0", "wc_reply_authors": "609;43;0;0", "reply_reviewers": "2;1;1;0", "reply_authors": "4;2;1;1", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 92.25, 15.769828787910159 ], "wc_strengths_avg": [ 57.75, 23.09085316743407 ], "wc_weaknesses_avg": [ 121.75, 56.08642883978262 ], "wc_questions_avg": [ 84.25, 59.503676357011756 ], "wc_limitations_avg": [ 15.75, 10.825317547305483 ], "wc_review_avg": [ 371.75, 150.85817014666458 ], "wc_reply_reviewers_avg": [ 102.25, 115.42178087345559 ], "wc_reply_authors_avg": [ 163.0, 258.09591240467176 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.13245323570650439, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ENsk7EJWHuAJ:scholar.google.com/&scioq=Set-based+Neural+Network+Encoding+Without+Weight+Tying&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "kaist.ac.kr;;ox.ac.uk;", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;University of Oxford", "aff_unique_dep": ";", "aff_unique_url": "https://www.kaist.ac.kr;https://www.ox.ac.uk", "aff_unique_abbr": "KAIST;Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "South Korea;United Kingdom" }, { "title": "Pre-trained Large Language Models Use Fourier Features to Compute Addition", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94033", "id": "i4MutM2TZb", "proceeding": "", "pdf": "https://openreview.net/pdf?id=i4MutM2TZb", "openreview": "https://openreview.net/forum?id=i4MutM2TZb", "poster": "/media/PosterPDFs/NeurIPS%202024/94033.png?t=1731699245.7617617", "project": "", "author_site": "Tianyi Zhou, Deqing Fu, Vatsal Sharan, Robin Jia", "tldr": "", "abstract": "Pre-trained large language models (LLMs) exhibit impressive mathematical reasoning capabilities, yet how they compute basic arithmetic, such as addition, remains unclear. \nThis paper shows that pre-trained LLMs add numbers using Fourier features---dimensions in the hidden state that represent numbers via a set of features sparse in the frequency domain. \nWithin the model, MLP and attention layers use Fourier features in complementary ways: MLP layers primarily approximate the magnitude of the answer using low-frequency features, while attention layers primarily perform modular addition (e.g., computing whether the answer is even or odd) using high-frequency features.\nPre-training is crucial for this mechanism: models trained from scratch to add numbers only exploit low-frequency features, leading to lower accuracy.\nIntroducing pre-trained token embeddings to a randomly initialized model rescues its performance.\nOverall, our analysis demonstrates that appropriate pre-trained representations (e.g., Fourier features) can unlock the ability of Transformers to learn precise mechanisms for algorithmic tasks.", "keywords": "large language models;arithmetic task;Fourier analysis", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Tianyi Zhou;Deqing Fu;Vatsal Sharan;Robin Jia", "authorids": "~Tianyi_Zhou4;~Deqing_Fu1;~Vatsal_Sharan1;~Robin_Jia1", "gender": ";M;M;M", "homepage": ";https://deqingfu.github.io/;https://vatsalsharan.github.io/;https://robinjia.github.io/", "dblp": ";304/3030;126/2543;182/2556", "google_scholar": ";fsbgfqEAAAAJ;Ize17HEAAAAJ;ajZ-_O0AAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Tianyi_Zhou4;~Deqing_Fu1;~Vatsal_Sharan1;~Robin_Jia1", "aff": ";University of Southern California;University of Southern California;University of Southern California", "aff_domain": ";usc.edu;usc.edu;usc.edu", "position": ";PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhou2024pretrained,\ntitle={Pre-trained Large Language Models Use Fourier Features to Compute Addition},\nauthor={Tianyi Zhou and Deqing Fu and Vatsal Sharan and Robin Jia},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=i4MutM2TZb}\n}", "github": "", "reviewers": "mHEL;wYe3;XwiY;YRn2", "pdf_size": 6865078, "rating": "5;6;7;7", "confidence": "3;4;4;3", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;4;4", "wc_summary": "110;110;118;113", "wc_strengths": "87;37;32;59", "wc_weaknesses": "75;99;80;86", "wc_questions": "527;38;477;195", "wc_limitations": "68;3;2;56", "wc_review": "867;287;709;509", "wc_reply_reviewers": "0;0;264;0", "wc_reply_authors": "1278;0;969;0", "reply_reviewers": "0;0;1;0", "reply_authors": "4;1;3;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 112.75, 3.2691742076555053 ], "wc_strengths_avg": [ 53.75, 21.718367802392518 ], "wc_weaknesses_avg": [ 85.0, 8.972179222463181 ], "wc_questions_avg": [ 309.25, 201.36083904274932 ], "wc_limitations_avg": [ 32.25, 30.053078045351693 ], "wc_review_avg": [ 593.0, 217.49942528659702 ], "wc_reply_reviewers_avg": [ 66.0, 114.3153532995459 ], "wc_reply_authors_avg": [ 561.75, 572.2745735221861 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 1.299038105676658 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15114131838372969396&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": ";usc.edu;usc.edu;usc.edu", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Southern California", "aff_unique_dep": "", "aff_unique_url": "https://www.usc.edu", "aff_unique_abbr": "USC", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Reconstruction Attacks on Machine Unlearning: Simple Models are Vulnerable", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94032", "id": "i4gqCM1r3z", "proceeding": "", "pdf": "https://openreview.net/pdf?id=i4gqCM1r3z", "openreview": "https://openreview.net/forum?id=i4gqCM1r3z", "poster": "", "project": "", "author_site": "Martin Bertran, Shuai Tang, Michael Kearns, Jamie Morgenstern, Aaron Roth, Steven Wu", "tldr": "", "abstract": "Machine unlearning is motivated by principles of data autonomy. The premise is that a person can request to have their data's influence removed from deployed models, and those models should be updated as if they were retrained without the person's data. We show that these updates expose individuals to high-accuracy reconstruction attacks which allow the attacker to recover their data in its entirety, even when the original models are so simple that privacy risk might not otherwise have been a concern. We show how to mount a near-perfect attack on the deleted data point from linear regression models. We then generalize our attack to other loss functions and architectures, and empirically demonstrate the effectiveness of our attacks across a wide range of datasets (capturing both tabular and image data). Our work highlights that privacy risk is significant even for extremely simple model classes when individuals can request deletion of their data from the model.", "keywords": "machine unlearning;reconstruction attack", "primary_area": "privacy", "supplementary_material": "", "author": "Martin Andres Bertran;Shuai Tang;Michael Kearns;Jamie Heather Morgenstern;Aaron Roth;Steven Wu", "authorids": "~Martin_Andres_Bertran1;~Shuai_Tang1;~Michael_Kearns2;~Jamie_Heather_Morgenstern1;~Aaron_Roth1;~Steven_Wu1", "gender": "M;M;;M;;M", "homepage": ";http://shuaitang.github.io;http://jamiemorgenstern.com;http://www.cis.upenn.edu/~aaroth/;https://www.cis.upenn.edu/~mkearns/;https://zstevenwu.com/", "dblp": "154/1944;;64/8610;80/3311;78/6858;137/8350", "google_scholar": "1kki_voAAAAJ;fJVeBrAAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=kLUQrrYAAAAJ;8iQk0DIAAAAJ;MbF6rTEAAAAJ", "orcid": ";;;;;", "linkedin": ";;;;;zstevenwu/", "or_profile": "~Martin_Andres_Bertran1;~Shuai_Tang1;~Jamie_Heather_Morgenstern1;~Aaron_Roth1;~Michael_J._Kearns1;~Zhiwei_Steven_Wu1", "aff": ";Amazon Web Services;;University of Pennsylvania;University of Pennsylvania;Carnegie Mellon University", "aff_domain": ";amazon.com;;upenn.edu;upenn.edu;cmu.edu", "position": ";Applied Scientist;;Full Professor;Professor;Assistant Professor", "bibtex": "@inproceedings{\nbertran2024reconstruction,\ntitle={Reconstruction Attacks on Machine Unlearning: Simple Models are Vulnerable},\nauthor={Martin Andres Bertran and Shuai Tang and Michael Kearns and Jamie Heather Morgenstern and Aaron Roth and Steven Wu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=i4gqCM1r3z}\n}", "github": "", "reviewers": "zg9a;b41V;YZsT;VcL6", "pdf_size": 1977854, "rating": "4;5;6;6", "confidence": "4;3;5;3", "soundness": "2;2;4;3", "novelty": "2;3;2;3", "presentation": "3;3;4;2", "wc_summary": "90;104;55;60", "wc_strengths": "20;55;67;58", "wc_weaknesses": "166;146;207;83", "wc_questions": "3;27;56;116", "wc_limitations": "1;1;17;3", "wc_review": "280;333;402;320", "wc_reply_reviewers": "0;90;210;31", "wc_reply_authors": "0;6;14;16", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 77.25, 20.437404434027332 ], "wc_strengths_avg": [ 50.0, 17.874562931719478 ], "wc_weaknesses_avg": [ 150.5, 44.746508243660756 ], "wc_questions_avg": [ 50.5, 42.21670285562339 ], "wc_limitations_avg": [ 5.5, 6.689544080129826 ], "wc_review_avg": [ 333.75, 43.97939858615622 ], "wc_reply_reviewers_avg": [ 82.75, 80.26635347391833 ], "wc_reply_authors_avg": [ 9.0, 6.4031242374328485 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0909090909090909, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15314155343869425781&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": ";amazon.com;;upenn.edu;upenn.edu;cmu.edu", "author_num": 6, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Amazon;University of Pennsylvania;Carnegie Mellon University", "aff_unique_dep": "Amazon Web Services;;", "aff_unique_url": "https://aws.amazon.com;https://www.upenn.edu;https://www.cmu.edu", "aff_unique_abbr": "AWS;UPenn;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Learning to Predict Structural Vibrations", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94031", "id": "i4jZ6fCDdy", "proceeding": "", "pdf": "https://openreview.net/pdf?id=i4jZ6fCDdy", "openreview": "https://openreview.net/forum?id=i4jZ6fCDdy", "poster": "/media/PosterPDFs/NeurIPS%202024/94031.png?t=1733147815.2286057", "project": "", "author_site": "Jan van Delden, Julius Schultz, Christopher Blech, Sabine Langer, Timo L\u00fcddecke", "tldr": "", "abstract": "In mechanical structures like airplanes, cars and houses, noise is generated and transmitted through vibrations. To take measures to reduce this noise, vibrations need to be simulated with expensive numerical computations. Deep learning surrogate models present a promising alternative to classical numerical simulations as they can be evaluated magnitudes faster, while trading-off accuracy. To quantify such trade-offs systematically and foster the development of methods, we present a benchmark on the task of predicting the vibration of harmonically excited plates. The benchmark features a total of 12,000 plate geometries with varying forms of beadings, material, boundary conditions, load position and sizes with associated numerical solutions. \nTo address the benchmark task, we propose a new network architecture, named \\modelname, which predicts vibration patterns of plate geometries given a specific excitation frequency. Applying principles from operator learning and implicit models for shape encoding, our approach effectively addresses the prediction of highly variable frequency response functions occurring in dynamic systems. To quantify the prediction quality, we introduce a set of evaluation metrics and evaluate the method on our vibrating-plates benchmark. Our method outperforms DeepONets, Fourier Neural Operators and more traditional neural network architectures and can be used for design optimization.\nCode, dataset and visualizations: https://github.com/ecker-lab/Learning_Vibrating_Plates", "keywords": "Vibrations;Engineering;Acoustics;Vibroacoustics;Surrogate Modeling", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "", "author": "Jan van Delden;Julius Schultz;Christopher Blech;Sabine C. Langer;Timo L\u00fcddecke", "authorids": "~Jan_van_Delden1;~Julius_Schultz1;~Christopher_Blech1;~Sabine_C._Langer1;~Timo_L\u00fcddecke1", "gender": "M;M;M;F;", "homepage": "https://eckerlab.org/people/;;https://www.tu-braunschweig.de/ina/institut/team/christopher-blech;https://www.tu-braunschweig.de/;https://eckerlab.org/people/", "dblp": "357/1642;;;;142/3944", "google_scholar": ";;;;dgGelc4AAAAJ", "orcid": ";0000-0002-1486-9976;;0000-0002-5814-044X;", "linkedin": ";;;;", "or_profile": "~Jan_van_Delden1;~Julius_Schultz1;~Christopher_Blech1;~Sabine_C._Langer1;~Timo_L\u00fcddecke1", "aff": "Georg-August Universit\u00e4t G\u00f6ttingen;;TU Braunschweig;TU Braunschweig;University of Goettingen", "aff_domain": "uni-goettingen.de;;tu-braunschweig.de;tu-braunschweig.de;uni-goettingen.de", "position": "PhD student;;Postdoc;Full Professor;Postdoc", "bibtex": "@inproceedings{\ndelden2024learning,\ntitle={Learning to Predict Structural Vibrations},\nauthor={Jan van Delden and Julius Schultz and Christopher Blech and Sabine C. Langer and Timo L{\\\"u}ddecke},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=i4jZ6fCDdy}\n}", "github": "", "reviewers": "75jo;YeHT;ifbc;1y6b", "pdf_size": 2101443, "rating": "5;5;7;7", "confidence": "4;5;3;5", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "3;2;3;3", "wc_summary": "115;90;75;29", "wc_strengths": "112;10;74;40", "wc_weaknesses": "179;99;11;43", "wc_questions": "88;28;46;125", "wc_limitations": "288;42;33;51", "wc_review": "782;269;239;288", "wc_reply_reviewers": "283;11;18;184", "wc_reply_authors": "82;0;0;66", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;2", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 77.25, 31.307946275666183 ], "wc_strengths_avg": [ 59.0, 38.06573262134856 ], "wc_weaknesses_avg": [ 83.0, 63.74950980203691 ], "wc_questions_avg": [ 71.75, 37.67210506462308 ], "wc_limitations_avg": [ 103.5, 106.71105847099447 ], "wc_review_avg": [ 394.5, 224.404211190432 ], "wc_reply_reviewers_avg": [ 124.0, 114.98478160174066 ], "wc_reply_authors_avg": [ 37.0, 37.429934544425805 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11604574847193490038&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 4, "email": "uni-goettingen.de;;tu-braunschweig.de;tu-braunschweig.de;uni-goettingen.de", "author_num": 5, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Georg-August Universit\u00e4t G\u00f6ttingen;Technische Universit\u00e4t Braunschweig;University of G\u00f6ttingen", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uni-goettingen.de;https://tu-braunschweig.de;https://www.uni-goettingen.de", "aff_unique_abbr": "GAU;TU Braunschweig;UG", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "title": "Causal language modeling can elicit search and reasoning capabilities on logic puzzles", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94030", "id": "i5PoejmWoC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=i5PoejmWoC", "openreview": "https://openreview.net/forum?id=i5PoejmWoC", "poster": "", "project": "", "author_site": "Kulin Shah, Nishanth Dikkala, Xin Wang, Rina Panigrahy", "tldr": "", "abstract": "Causal language modeling using the Transformer architecture has yielded remarkable capabilities in Large Language Models (LLMs) over the last few years. However, the extent to which fundamental search and reasoning capabilities emerged within LLMs remains a topic of ongoing debate. In this work, we study if causal language modeling can learn a complex task such as solving Sudoku puzzles. To solve a Sudoku, the model is first required to search over all empty cells of the puzzle to decide on a cell to fill and then apply an appropriate strategy to fill the decided cell. Sometimes, the application of a strategy only results in thinning down the possible values in a cell rather than concluding the exact value of the cell. In such cases, multiple strategies are applied one after the other to fill a single cell. We observe that Transformer models trained on this synthetic task can indeed learn to solve Sudokus (our model solves $94.21\\%$ of the puzzles fully correctly) when trained on a logical sequence of steps taken by a solver. We find that training Transformers with the logical sequence of steps is necessary and without such training, they fail to learn Sudoku. We also extend our analysis to Zebra puzzles (known as Einstein puzzles) and show that the model solves $92.04 \\%$ of the puzzles fully correctly. In addition, we study the internal representations of the trained Transformer and find that through linear probing, we can decode information about the set of possible values in any given cell from them, pointing to the presence of a strong reasoning engine implicit in the Transformer weights.", "keywords": "reasoning;search;planning;sudoku;world model;transformers", "primary_area": "generative_models", "supplementary_material": "", "author": "Kulin Shah;Nishanth Dikkala;Xin Wang;Rina Panigrahy", "authorids": "~Kulin_Shah1;~Nishanth_Dikkala1;~Xin_Wang30;~Rina_Panigrahy1", "gender": "M;M;M;", "homepage": "https://kulinshah98.github.io/;http://people.csail.mit.edu/nishanthd/;;", "dblp": "215/3581;138/8092;;p/RinaPanigrahy", "google_scholar": "https://scholar.google.co.in/citations?user=67OmLg4AAAAJ;CMZoOTIAAAAJ;7BjA8ccAAAAJ;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Kulin_Shah1;~Nishanth_Dikkala1;~Xin_Wang30;~Rina_Panigrahy1", "aff": "University of Texas, Austin;Google;Google;Google", "aff_domain": "cs.utexas.edu;google.com;google.com;google.com", "position": "PhD student;Google Research;Software Engineer;Research Scientist", "bibtex": "@inproceedings{\nshah2024causal,\ntitle={Causal language modeling can elicit search and reasoning capabilities on logic puzzles},\nauthor={Kulin Shah and Nishanth Dikkala and Xin Wang and Rina Panigrahy},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=i5PoejmWoC}\n}", "github": "", "reviewers": "Qf5M;xEc8;W1Mz;Uxpn", "pdf_size": 1213460, "rating": "5;5;7;8", "confidence": "3;4;3;4", "soundness": "2;3;2;3", "novelty": "2;2;3;3", "presentation": "2;3;2;4", "wc_summary": "89;139;135;76", "wc_strengths": "30;42;81;61", "wc_weaknesses": "258;51;129;213", "wc_questions": "90;30;58;35", "wc_limitations": "1;1;12;29", "wc_review": "468;263;415;414", "wc_reply_reviewers": "49;9;189;80", "wc_reply_authors": "0;31;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 109.75, 27.67105888830422 ], "wc_strengths_avg": [ 53.5, 19.345542122153102 ], "wc_weaknesses_avg": [ 162.75, 79.41150735252417 ], "wc_questions_avg": [ 53.25, 23.699947257325277 ], "wc_limitations_avg": [ 10.75, 11.453711188955307 ], "wc_review_avg": [ 390.0, 76.50816949842677 ], "wc_reply_reviewers_avg": [ 81.75, 66.84076226375639 ], "wc_reply_authors_avg": [ 7.75, 13.423393758658799 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.19245008972987526, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1545547239578282248&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "cs.utexas.edu;google.com;google.com;google.com", "author_num": 4, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "University of Texas at Austin;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.utexas.edu;https://www.google.com", "aff_unique_abbr": "UT Austin;Google", "aff_campus_unique_index": "0;1;1;1", "aff_campus_unique": "Austin;Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "How to Use Diffusion Priors under Sparse Views?", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94029", "id": "i6BBclCymR", "proceeding": "", "pdf": "https://openreview.net/pdf?id=i6BBclCymR", "openreview": "https://openreview.net/forum?id=i6BBclCymR", "poster": "/media/PosterPDFs/NeurIPS%202024/94029.png?t=1730785728.4090235", "project": "", "author_site": "Qisen Wang, Yifan Zhao, Jiawei Ma, Jia Li", "tldr": "", "abstract": "Novel view synthesis under sparse views has been a long-term important challenge in 3D reconstruction. Existing works mainly rely on introducing external semantic or depth priors to supervise the optimization of 3D representations. However, the diffusion model, as an external prior that can directly provide visual supervision, has always underperformed in sparse-view 3D reconstruction using Score Distillation Sampling (SDS) due to the low information entropy of sparse views compared to text, leading to optimization challenges caused by mode deviation. To this end, we present a thorough analysis of SDS from the mode-seeking perspective and propose Inline Prior Guided Score Matching (IPSM), which leverages visual inline priors provided by pose relationships between viewpoints to rectify the rendered image distribution and decomposes the original optimization objective of SDS, thereby offering effective diffusion visual guidance without any fine-tuning or pre-training. Furthermore, we propose the IPSM-Gaussian pipeline, which adopts 3D Gaussian Splatting as the backbone and supplements depth and geometry consistency regularization based on IPSM to further improve inline priors and rectified distribution. Experimental results on different public datasets show that our method achieves state-of-the-art reconstruction quality. The code is released at https://github.com/iCVTEAM/IPSM.", "keywords": "Novel View Synthesis;Sparse View;Diffusion Model;3D Gaussian Splatting", "primary_area": "machine_vision", "supplementary_material": "/attachment/bb47d8c8dc4f6fe9e5e3a199a7f65f832c552da8.zip", "author": "Qisen Wang;Yifan Zhao;Jiawei Ma;Jia Li", "authorids": "~Qisen_Wang1;~Yifan_Zhao2;~Jiawei_Ma4;~Jia_Li1", "gender": "M;M;M;M", "homepage": "http://cvteam.buaa.edu.cn/members.html;https://zhao1f.github.io/;;http://cvteam.net/", "dblp": "185/7976;13/7050-2.html;;23/6950-3", "google_scholar": ";bUzykm0AAAAJ;;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";;0009-0003-1728-6543;", "linkedin": ";;;", "or_profile": "~Qisen_Wang1;~Yifan_Zhao2;~Jiawei_Ma4;~Jia_Li1", "aff": "Beihang University;Beihang University;Beihang University;Beihang University", "aff_domain": "buaa.edu.cn;buaa.edu.cn;buaa.edu.cn;buaa.edu.cn", "position": "PhD student;Associate Professor;MS student;Professor", "bibtex": "@inproceedings{\nwang2024how,\ntitle={How to Use Diffusion Priors under Sparse Views?},\nauthor={Qisen Wang and Yifan Zhao and Jiawei Ma and Jia Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=i6BBclCymR}\n}", "github": "", "reviewers": "Ky7s;y1nW;xf5Q", "pdf_size": 6771934, "rating": "5;6;6", "confidence": "3;3;3", "soundness": "3;3;3", "novelty": "3;2;3", "presentation": "3;3;3", "wc_summary": "89;104;187", "wc_strengths": "36;42;84", "wc_weaknesses": "71;137;139", "wc_questions": "114;196;27", "wc_limitations": "6;9;9", "wc_review": "316;488;446", "wc_reply_reviewers": "0;569;0", "wc_reply_authors": "0;1642;0", "reply_reviewers": "0;2;0", "reply_authors": "1;4;1", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 126.66666666666667, 43.099368389287974 ], "wc_strengths_avg": [ 54.0, 21.354156504062622 ], "wc_weaknesses_avg": [ 115.66666666666667, 31.594654962860762 ], "wc_questions_avg": [ 112.33333333333333, 69.00402564746173 ], "wc_limitations_avg": [ 8.0, 1.4142135623730951 ], "wc_review_avg": [ 416.6666666666667, 73.21809126772487 ], "wc_reply_reviewers_avg": [ 189.66666666666666, 268.229172330097 ], "wc_reply_authors_avg": [ 547.3333333333334, 774.0462231388739 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.9428090415820634 ], "reply_authors_avg": [ 2.0, 1.4142135623730951 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14357384674319169588&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "buaa.edu.cn;buaa.edu.cn;buaa.edu.cn;buaa.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Beihang University", "aff_unique_dep": "", "aff_unique_url": "http://www.buaa.edu.cn/", "aff_unique_abbr": "BUAA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "SkiLD: Unsupervised Skill Discovery Guided by Factor Interactions", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94028", "id": "i816TeqgVh", "proceeding": "", "pdf": "https://openreview.net/pdf?id=i816TeqgVh", "openreview": "https://openreview.net/forum?id=i816TeqgVh", "poster": "/media/PosterPDFs/NeurIPS%202024/94028.png?t=1731721034.6501749", "project": "", "author_site": "Zizhao Wang, Jiaheng Hu, Caleb Chuck, Stephen Chen, Roberto Mart\u00edn-Mart\u00edn, Amy Zhang, Scott Niekum, Peter Stone", "tldr": "", "abstract": "Unsupervised skill discovery carries the promise that an intelligent agent can learn reusable skills through autonomous, reward-free interactions with environments. Existing unsupervised skill discovery methods learn skills by encouraging distinguishable behaviors that cover diverse states. However, in complex environments with many state factors (e.g., household environments with many objects), learning skills that cover all possible states is impossible, and naively encouraging state diversity often leads to simple skills that are not ideal for solving downstream tasks. This work introduces Skill Discovery from Local Dependencies (SkiLD), which leverages state factorization as a natural inductive bias to guide the skill learning process. The key intuition guiding SkiLD is that skills that induce \\textbf{diverse interactions} between state factors are often more valuable for solving downstream tasks. To this end, SkiLD develops a novel skill learning objective that explicitly encourages the mastering of skills that effectively induce different interactions within an environment. We evaluate SkiLD in several domains with challenging, long-horizon sparse reward tasks including a realistic simulated household robot domain, where SkiLD successfully learns skills with clear semantic meaning and shows superior performance compared to existing unsupervised reinforcement learning methods that only maximize state coverage.", "keywords": "unsupervised skill discovery", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Zizhao Wang;Jiaheng Hu;Caleb Chuck;Stephen Chen;Roberto Mart\u00edn-Mart\u00edn;Amy Zhang;Scott Niekum;Peter Stone", "authorids": "~Zizhao_Wang3;~Jiaheng_Hu1;~Caleb_Chuck1;~Stephen_Chen1;~Roberto_Mart\u00edn-Mart\u00edn1;~Amy_Zhang1;~Scott_Niekum1;~Peter_Stone1", "gender": "M;M;M;;M;;M;M", "homepage": ";https://jiahenghu.github.io/;http://calcharles.github.io;;https://robertomartinmartin.com/;;https://people.cs.umass.edu/~sniekum/index.php;http://www.cs.utexas.edu/~pstone", "dblp": "245/5008;;;;153/7670;;62/8399;s/PeterStone", "google_scholar": "https://scholar.google.ca/citations?user=V4KQIWsAAAAJ;;gELTaB4AAAAJ;;XOJE8OEAAAAJ;;4wXYfSUAAAAJ;qnwjcfAAAAAJ", "orcid": ";;;;0000-0002-9586-2759;;;0000-0002-6795-420X", "linkedin": ";;;;;;;", "or_profile": "~Zizhao_Wang3;~Jiaheng_Hu1;~Caleb_Chuck1;~Stephen_Chen1;~Roberto_Mart\u00edn-Mart\u00edn1;~Amy_Zhang1;~Scott_Niekum1;~Peter_Stone1", "aff": "University of Texas at Austin;University of Texas at Austin;University of Texas, Austin;;University of Texas at Austin;;University of Massachusetts at Amherst;University of Texas, Austin", "aff_domain": "utexas.edu;utexas.edu;utexas.edu;;utexas.edu;;umass.edu;utexas.edu", "position": "PhD student;PhD student;PhD student;;Assistant Professor;;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nwang2024skild,\ntitle={Ski{LD}: Unsupervised Skill Discovery Guided by Factor Interactions},\nauthor={Zizhao Wang and Jiaheng Hu and Caleb Chuck and Stephen Chen and Roberto Mart{\\'\\i}n-Mart{\\'\\i}n and Amy Zhang and Scott Niekum and Peter Stone},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=i816TeqgVh}\n}", "github": "", "reviewers": "c2kz;dKfG;WEDj;jfpH", "pdf_size": 2029318, "rating": "5;5;6;8", "confidence": "4;4;3;4", "soundness": "2;3;3;4", "novelty": "2;3;3;3", "presentation": "3;3;2;4", "wc_summary": "132;49;27;89", "wc_strengths": "139;50;50;56", "wc_weaknesses": "414;96;295;46", "wc_questions": "216;4;58;242", "wc_limitations": "52;4;2;7", "wc_review": "953;203;432;440", "wc_reply_reviewers": "167;93;249;11", "wc_reply_authors": "284;428;42;0", "reply_reviewers": "1;2;1;1", "reply_authors": "2;2;2;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 74.25, 40.07103068302586 ], "wc_strengths_avg": [ 73.75, 37.7516555928346 ], "wc_weaknesses_avg": [ 212.75, 148.9150344995427 ], "wc_questions_avg": [ 130.0, 101.24228365658294 ], "wc_limitations_avg": [ 16.25, 20.716840975399702 ], "wc_review_avg": [ 507.0, 274.5204910384651 ], "wc_reply_reviewers_avg": [ 130.0, 88.11923740024082 ], "wc_reply_authors_avg": [ 188.5, 175.695048308141 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1791339620768329456&as_sdt=4005&sciodt=0,6&hl=en", "gs_version_total": 5, "email": "utexas.edu;utexas.edu;utexas.edu;;utexas.edu;;umass.edu;utexas.edu", "author_num": 8, "aff_unique_index": "0;0;0;0;1;0", "aff_unique_norm": "University of Texas at Austin;University of Massachusetts Amherst", "aff_unique_dep": ";", "aff_unique_url": "https://www.utexas.edu;https://www.umass.edu", "aff_unique_abbr": "UT Austin;UMass Amherst", "aff_campus_unique_index": "0;0;0;0;1;0", "aff_campus_unique": "Austin;Amherst", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "$\\textit{Read-ME}$: Refactorizing LLMs as Router-Decoupled Mixture of Experts with System Co-Design", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94027", "id": "i8JaxY7tDI", "proceeding": "", "pdf": "https://openreview.net/pdf?id=i8JaxY7tDI", "openreview": "https://openreview.net/forum?id=i8JaxY7tDI", "poster": "/media/PosterPDFs/NeurIPS%202024/94027.png?t=1731742141.6920156", "project": "", "author_site": "Ruisi Cai, Yeonju Ro, Geon-Woo Kim, Peihao Wang, Babak Ehteshami Bejnordi, Aditya Akella, Zhangyang "Atlas" Wang", "tldr": "", "abstract": "The proliferation of large language models (LLMs) has led to the adoption of Mixture-of-Experts (MoE) architectures that dynamically leverage specialized subnetworks for improved efficiency and performance. Despite their benefits, MoE models face significant challenges during inference, including inefficient memory management and suboptimal batching, due to misaligned design choices between the model architecture and the system policies. Furthermore, the conventional approach of training MoEs from scratch is increasingly prohibitive in terms of cost. \nIn this paper, we propose a novel framework $\\textit{Read-ME}$ that transforms pre-trained dense LLMs into smaller MoE models (in contrast to ``upcycling\" generalist MoEs), avoiding the high costs of ground-up training. Our approach employs activation sparsity to extract experts. \nTo compose experts, we examine the widely-adopted layer-wise router design and show its redundancy, and thus we introduce the pre-gating router decoupled from the MoE backbone that facilitates system-friendly pre-computing and lookahead scheduling, enhancing expert-aware batching and caching.\nOur codesign therefore addresses critical gaps on both the algorithmic and system fronts, establishing a scalable and efficient alternative for LLM inference in resource-constrained settings.\n$\\textit{Read-ME}$ outperforms other popular open-source dense models of similar scales, achieving improvements of up to 10.1\\% on MMLU, and improving mean end-to-end latency up to 6.1\\%. \nCodes are available at: \\url{https://github.com/VITA-Group/READ-ME}.", "keywords": "MoE;Machine Learning;Efficient Inference;Mixture-of-experts;LLM;MoE Inference;Efficient serving;LLM serving", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Ruisi Cai;Yeonju Ro;Geon-Woo Kim;Peihao Wang;Babak Ehteshami Bejnordi;Aditya Akella;Zhangyang Wang", "authorids": "~Ruisi_Cai1;~Yeonju_Ro1;~Geon-Woo_Kim1;~Peihao_Wang1;~Babak_Ehteshami_Bejnordi1;~Aditya_Akella1;~Zhangyang_Wang1", "gender": "F;F;M;M;M;M;M", "homepage": "https://cairuisi.github.io;https://sites.google.com/view/hey-yeonju;;https://peihaowang.github.io/;http://babakint.com/;https://www.cs.utexas.edu/~akella;https://vita-group.github.io", "dblp": "341/1491;232/0146;;239/4075;175/5607;a/AdityaAkella.html;119/4026", "google_scholar": "B0chY1AAAAAJ;https://scholar.google.com/citations?hl=en;lNpT22YAAAAJ;fqf2tBsAAAAJ;https://scholar.google.se/citations?user=Qk-AMk0AAAAJ;d_rxnzAAAAAJ;pxFyKAIAAAAJ", "orcid": ";0009-0002-9034-0377;;;;;", "linkedin": ";yeonju-ro-a938728b/;;peihao-wang-25a411162/;babakint/;aditya-akella-5327148;", "or_profile": "~Ruisi_Cai1;~Yeonju_Ro1;~Geon-Woo_Kim1;~Peihao_Wang1;~Babak_Ehteshami_Bejnordi1;~Aditya_Akella1;~Zhangyang_Wang1", "aff": "University of Texas at Austin;University of Texas at Austin;University of Texas at Austin;University of Texas, Austin;Qualcomm AI Research, Qualcomm Inc, QualComm;Google;University of Texas at Austin", "aff_domain": "utexas.edu;utexas.edu;utexas.edu;utexas.edu;qti.qualcomm.com;google.com;utexas.edu", "position": "PhD student;PhD student;PhD student;PhD student;Researcher;software engineer ;Associate Professor", "bibtex": "@inproceedings{\ncai2024textitreadme,\ntitle={\\${\\textbackslash}textit\\{Read-{ME}\\}\\$: Refactorizing {LLM}s as Router-Decoupled Mixture of Experts with System Co-Design},\nauthor={Ruisi Cai and Yeonju Ro and Geon-Woo Kim and Peihao Wang and Babak Ehteshami Bejnordi and Aditya Akella and Zhangyang Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=i8JaxY7tDI}\n}", "github": "", "reviewers": "AX77;wHVe;EGvf;ZdPD", "pdf_size": 895725, "rating": "4;6;7;7", "confidence": "4;3;4;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;4", "wc_summary": "88;141;64;94", "wc_strengths": "8;38;158;26", "wc_weaknesses": "87;97;138;391", "wc_questions": "3;146;34;5", "wc_limitations": "1;1;4;5", "wc_review": "187;423;398;521", "wc_reply_reviewers": "16;23;34;48", "wc_reply_authors": "89;29;43;23", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 96.75, 27.904972675134445 ], "wc_strengths_avg": [ 57.5, 58.997881317891405 ], "wc_weaknesses_avg": [ 178.25, 124.30883918692186 ], "wc_questions_avg": [ 47.0, 58.45938761225608 ], "wc_limitations_avg": [ 2.75, 1.7853571071357126 ], "wc_review_avg": [ 382.25, 121.74024601585131 ], "wc_reply_reviewers_avg": [ 30.25, 12.090802289343747 ], "wc_reply_authors_avg": [ 46.0, 25.865034312755125 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:f5MOOk3j_EYJ:scholar.google.com/&scioq=%24%5Ctextit%7BRead-ME%7D%24:+Refactorizing+LLMs+as+Router-Decoupled+Mixture+of+Experts+with+System+Co-Design&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "utexas.edu;utexas.edu;utexas.edu;utexas.edu;qti.qualcomm.com;google.com;utexas.edu", "author_num": 7, "aff_unique_index": "0;0;0;0;1;2;0", "aff_unique_norm": "University of Texas at Austin;Qualcomm;Google", "aff_unique_dep": ";Qualcomm AI Research;Google", "aff_unique_url": "https://www.utexas.edu;https://www.qualcomm.com/research;https://www.google.com", "aff_unique_abbr": "UT Austin;Qualcomm;Google", "aff_campus_unique_index": "0;0;0;0;2;0", "aff_campus_unique": "Austin;;Mountain View", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Interpretable Lightweight Transformer via Unrolling of Learned Graph Smoothness Priors", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94026", "id": "i8LoWBJf7j", "proceeding": "", "pdf": "https://openreview.net/pdf?id=i8LoWBJf7j", "openreview": "https://openreview.net/forum?id=i8LoWBJf7j", "poster": "/media/PosterPDFs/NeurIPS%202024/94026.png?t=1731622127.578805", "project": "", "author_site": "VIET HO TAM THUC DO, Parham Eftekhar, Seyed Alireza Hosseini, Gene Cheung, Philip Chou", "tldr": "", "abstract": "We build interpretable and lightweight transformer-like neural networks by unrolling iterative optimization algorithms that minimize graph smoothness priors---the quadratic graph Laplacian regularizer (GLR) and the $\\ell_1$-norm graph total variation (GTV)---subject to an interpolation constraint. The crucial insight is that a normalized signal-dependent graph learning module amounts to a variant of the basic self-attention mechanism in conventional transformers. Unlike \"black-box\" transformers that require learning of large key, query and value matrices to compute scaled dot products as affinities and subsequent output embeddings, resulting in huge parameter sets, our unrolled networks employ shallow CNNs to learn low-dimensional features per node to establish pairwise Mahalanobis distances and construct sparse similarity graphs. At each layer, given a learned graph, the target interpolated signal is simply a low-pass filtered output derived from the minimization of an assumed graph smoothness prior, leading to a dramatic reduction in parameter count. Experiments for two image interpolation applications verify the restoration performance, parameter efficiency and robustness to covariate shift of our graph-based unrolled networks compared to conventional transformers.", "keywords": "Algorithm Unrolling;Graph Smoothness Prior;White-box Transformer", "primary_area": "optimization", "supplementary_material": "", "author": "VIET HO TAM THUC DO;Parham Eftekhar;Seyed Alireza Hosseini;Gene Cheung;Philip Chou", "authorids": "~VIET_HO_TAM_THUC_DO1;~Parham_Eftekhar1;~Seyed_Alireza_Hosseini1;~Gene_Cheung1;~Philip_Chou1", "gender": ";M;M;M;", "homepage": ";;https://ee.sharif.edu/~hosseini.sayyedalireza/;https://www.eecs.yorku.ca/~genec/index.html;https://packet.media", "dblp": ";;;17/6315;c/PhilipAChou.html", "google_scholar": "https://scholar.google.ca/citations?user=8JcaCWcAAAAJ;;;https://scholar.google.co.jp/citations?user=bYvl2kwAAAAJ;BI4MThAAAAAJ", "orcid": ";;;0000-0002-5571-4137;0000-0002-7242-0210", "linkedin": "tam-thuc-do-b7a1b3145/;parham-eftekhar/;;gene-cheung-b6bab/;phchou/", "or_profile": "~VIET_HO_TAM_THUC_DO1;~Parham_Eftekhar1;~Seyed_Alireza_Hosseini1;~Gene_Cheung1;~Philip_Chou1", "aff": "York University;York University;York University;York University;", "aff_domain": "yorku.ca;yorku.ca;yorku.ca;yorku.ca;", "position": "Researcher;PhD student;MS student;Full Professor;", "bibtex": "@inproceedings{\ndo2024interpretable,\ntitle={Interpretable Lightweight Transformer via Unrolling of Learned Graph Smoothness Priors},\nauthor={VIET HO TAM THUC DO and Parham Eftekhar and Seyed Alireza Hosseini and Gene Cheung and Philip Chou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=i8LoWBJf7j}\n}", "github": "", "reviewers": "1kQ9;oH21;rsiC", "pdf_size": 1187768, "rating": "6;6;7", "confidence": "3;2;5", "soundness": "3;3;4", "novelty": "2;3;3", "presentation": "2;3;4", "wc_summary": "61;61;21", "wc_strengths": "44;17;24", "wc_weaknesses": "202;59;14", "wc_questions": "3;22;156", "wc_limitations": "9;1;7", "wc_review": "319;160;222", "wc_reply_reviewers": "17;29;67", "wc_reply_authors": "0;311;216", "reply_reviewers": "1;1;2", "reply_authors": "1;2;2", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 1.247219128924647 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 47.666666666666664, 18.856180831641264 ], "wc_strengths_avg": [ 28.333333333333332, 11.440668201153676 ], "wc_weaknesses_avg": [ 91.66666666666667, 80.15124591809052 ], "wc_questions_avg": [ 60.333333333333336, 68.08980997346241 ], "wc_limitations_avg": [ 5.666666666666667, 3.39934634239519 ], "wc_review_avg": [ 233.66666666666666, 65.43359653538506 ], "wc_reply_reviewers_avg": [ 37.666666666666664, 21.31248981752771 ], "wc_reply_authors_avg": [ 175.66666666666666, 130.1289958293522 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.944911182523068, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6681427994822453677&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "yorku.ca;yorku.ca;yorku.ca;yorku.ca;", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "York University", "aff_unique_dep": "", "aff_unique_url": "https://www.yorku.ca", "aff_unique_abbr": "York U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Canada" }, { "title": "WildVision: Evaluating Vision-Language Models in the Wild with Human Preferences", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97560", "id": "i92eyFCQHC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=i92eyFCQHC", "openreview": "https://openreview.net/forum?id=i92eyFCQHC", "poster": "", "project": "", "author_site": "Yujie Lu, Dongfu Jiang, Wenhu Chen, William Yang Wang, Yejin Choi, Bill Yuchen Lin", "tldr": "", "abstract": "Recent breakthroughs in vision-language models (VLMs) emphasize the necessity of benchmarking human preferences in real-world multimodal interactions. To address this gap, we launched WildVision-Arena (WV-Arena), an online platform that collects human preferences to evaluate VLMs. We curated WV-Bench by selecting 500 high-quality samples from 8,000 user submissions in WV-Arena. WV-Bench uses GPT-4 as the judge to compare each VLM with Claude-3-Sonnet, achieving a Spearman correlation of 0.94 with the WV-Arena Elo. This significantly outperforms other benchmarks like MMVet, MMMU, and MMStar.\n\nOur comprehensive analysis of 20K real-world interactions reveals important insights into the failure cases of top-performing VLMs. For example, we find that although GPT-4V surpasses many other models like Reka-Flash, Opus, and Yi-VL-Plus in simple visual recognition and reasoning tasks, it still faces challenges with subtle contextual cues, spatial reasoning, visual imagination, and expert domain knowledge. Additionally, current VLMs exhibit issues with hallucinations and safety when intentionally provoked. We are releasing our chat and feedback data to further advance research in the field of VLMs.", "keywords": "Vision and Language Models;Evaluation Platform;Benchmark", "primary_area": "", "supplementary_material": "/attachment/24c5d39110088b81b0b8039c47f138d97da09956.pdf", "author": "Yujie Lu;Dongfu Jiang;Wenhu Chen;William Yang Wang;Yejin Choi;Bill Yuchen Lin", "authorids": "~Yujie_Lu1;~Dongfu_Jiang1;~Wenhu_Chen3;~William_Yang_Wang2;~Yejin_Choi1;~Bill_Yuchen_Lin1", "gender": ";M;F;M;M;M", "homepage": "https://yujielu10.github.io/;https://jdf-prog.github.io/;https://yejinc.github.io/;http://yuchenlin.xyz/;https://wenhuchen.github.io/;https://www.cs.ucsb.edu/~william/", "dblp": ";336/6970;89/579-1;190/4518;136/0957.html;08/9282", "google_scholar": "pcmr6GMAAAAJ;kciKEPUAAAAJ;vhP-tlcAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.co.jp/citations?user=U8ShbhUAAAAJ;gf8Ms_8AAAAJ", "orcid": ";0009-0007-9442-6721;;;;", "linkedin": ";dongfu-jiang-a76a15222/;;;;", "or_profile": "~Yujie_Lu1;~Dongfu_Jiang1;~Yejin_Choi1;~Bill_Yuchen_Lin1;~wenhu_chen1;~William_Wang1", "aff": "UC Santa Barbara;Allen Institute for Artificial Intelligence;Department of Computer Science, University of Washington;Allen Institute for Artificial Intelligence;University of Waterloo;UC Santa Barbara", "aff_domain": "ucsb.edu;allenai.org;cs.washington.edu;allenai.org;uwaterloo.ca;ucsb.edu", "position": "PhD student;Intern;Full Professor;Researcher;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nlu2024wildvision,\ntitle={WildVision: Evaluating Vision-Language Models in the Wild with Human Preferences},\nauthor={Yujie Lu and Dongfu Jiang and Wenhu Chen and William Yang Wang and Yejin Choi and Bill Yuchen Lin},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=i92eyFCQHC}\n}", "github": "", "reviewers": "dzdh;WVHU;4AmE;f9Sh", "pdf_size": 24352503, "rating": "4;6;8;8", "confidence": "5;4;3;3", "wc_summary_and_contributions": "55;94;71;39", "wc_strengths": "54;55;88;44", "wc_improvement": "86;28;115;17", "wc_limitations": "1;53;14;46", "wc_correctness": "1;1;2;11", "wc_clarity": "1;1;1;5", "wc_relation_to_prior_work": "1;1;5;13", "wc_documentation": "1;1;1;10", "wc_additional_feedback": "1;1;1;1", "wc_review": "201;235;298;186", "wc_reply_reviewers": "0;185;0;0", "wc_reply_authors": "748;832;331;211", "reply_reviewers": "0;2;0;0", "reply_authors": "2;4;1;1", "rating_avg": [ 6.5, 1.6583123951777 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "wc_summary_and_contributions_avg": [ 64.75, 20.327014045353536 ], "wc_strengths_avg": [ 60.25, 16.588776326179094 ], "wc_improvement_avg": [ 61.5, 40.512343797909296 ], "wc_limitations_avg": [ 28.5, 21.639085008382402 ], "wc_correctness_avg": [ 3.75, 4.205650960315181 ], "wc_clarity_avg": [ 2.0, 1.7320508075688772 ], "wc_relation_to_prior_work_avg": [ 5.0, 4.898979485566356 ], "wc_documentation_avg": [ 3.25, 3.897114317029974 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 230.0, 43.087121045621046 ], "wc_reply_reviewers_avg": [ 46.25, 80.10734985006057 ], "wc_reply_authors_avg": [ 530.5, 264.61717631325445 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11840427840945496858&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "ucsb.edu;allenai.org;cs.washington.edu;allenai.org;uwaterloo.ca;ucsb.edu", "author_num": 6, "aff_unique_index": "0;1;2;1;3;0", "aff_unique_norm": "University of California, Santa Barbara;Allen Institute for Artificial Intelligence;University of Washington;University of Waterloo", "aff_unique_dep": ";;Department of Computer Science;", "aff_unique_url": "https://www.ucsb.edu;https://allenai.org;https://www.washington.edu;https://uwaterloo.ca", "aff_unique_abbr": "UCSB;AI2;UW;UW", "aff_campus_unique_index": "0;2;0", "aff_campus_unique": "Santa Barbara;;Seattle", "aff_country_unique_index": "0;0;0;0;1;0", "aff_country_unique": "United States;Canada" }, { "title": "HiCo: Hierarchical Controllable Diffusion Model for Layout-to-image Generation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94025", "id": "i9QpRjUAhv", "proceeding": "", "pdf": "https://openreview.net/pdf?id=i9QpRjUAhv", "openreview": "https://openreview.net/forum?id=i9QpRjUAhv", "poster": "/media/PosterPDFs/NeurIPS%202024/94025.png?t=1730425693.0597887", "project": "", "author_site": "Bocheng, Yuhang Ma, wuliebucha, Shanyuan Liu, Ao Ma, Xiaoyu Wu, Dawei Leng, Yuhui Yin", "tldr": "", "abstract": "The task of layout-to-image generation involves synthesizing images based on the captions of objects and their spatial positions. Existing methods still struggle in complex layout generation, where common bad cases include object missing, inconsistent lighting, conflicting view angles, etc. To effectively address these issues, we propose a \\textbf{Hi}erarchical \\textbf{Co}ntrollable (HiCo) diffusion model for layout-to-image generation, featuring object seperable conditioning branch structure. Our key insight is to achieve spatial disentanglement through hierarchical modeling of layouts. We use a multi branch structure to represent hierarchy and aggregate them in fusion module. To evaluate the performance of multi-objective controllable layout generation in natural scenes, we introduce the HiCo-7K benchmark, derived from the GRIT-20M dataset and manually cleaned. https://github.com/360CVGroup/HiCo_T2I.", "keywords": "Text to Image;Latent Diffusion Model;Layout to Image", "primary_area": "generative_models", "supplementary_material": "", "author": "Bocheng;YuhangMa;wuliebucha;Shanyuan Liu;Ao Ma;Xiaoyu Wu;Dawei Leng;Yuhui Yin", "authorids": "~Bocheng1;~YuhangMa1;~wuliebucha1;~Shanyuan_Liu2;~Ao_Ma2;~Xiaoyu_Wu2;~Dawei_Leng1;~Yuhui_Yin1", "gender": "M;M;F;;M;;M;M", "homepage": "https://github.com/cbstyle;https://github.com/JohnnMa;https://github.com/wuliebucha;;https://github.com/MaAo;https://github.com/123wxy123;;", "dblp": ";;;;;;;193/4950", "google_scholar": "https://scholar.google.com.hk/citations?user=_zz3R38AAAAJ;;;vutZ7LcAAAAJ;https://scholar.google.com/citations?hl=en;;;pv6QIggAAAAJ", "orcid": "0009-0008-4920-4852;;;;;;;", "linkedin": ";;;;;;%E5%AE%87%E8%BE%89-%E6%AE%B7-7147bb251/;", "or_profile": "~Bocheng1;~YuhangMa1;~wuliebucha1;~Shanyuan_Liu2;~Ao_Ma2;~Xiaoyu_Wu2;~Yuhui_Yin1;~Leng_DaWei1", "aff": "Qihoo 360;Shopee;Qihoo 360;360 AI Institute;Qihoo 360;qihoo,inc;360 AI Lab;360 AI Institute", "aff_domain": "360.cn;shopee.com;360.com;ai.360.cn;360.cn;360.cn;lab.ai.360.cn;ai.360.cn", "position": "Researcher;Researcher;Researcher;Researcher;Researcher;Researcher;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nbocheng2024hico,\ntitle={HiCo: Hierarchical Controllable Diffusion Model for Layout-to-image Generation},\nauthor={Bocheng and YuhangMa and wuliebucha and Shanyuan Liu and Ao Ma and Xiaoyu Wu and Dawei Leng and Yuhui Yin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=i9QpRjUAhv}\n}", "github": "", "reviewers": "6Tmf;72RR;iE3j", "pdf_size": 8825231, "rating": "5;6;7", "confidence": "5;4;5", "soundness": "2;2;4", "novelty": "2;3;4", "presentation": "2;3;3", "wc_summary": "115;51;56", "wc_strengths": "41;37;70", "wc_weaknesses": "195;83;199", "wc_questions": "30;75;3", "wc_limitations": "8;13;9", "wc_review": "389;259;337", "wc_reply_reviewers": "58;50;55", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.9428090415820634 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 74.0, 29.06314963431642 ], "wc_strengths_avg": [ 49.333333333333336, 14.704496666741854 ], "wc_weaknesses_avg": [ 159.0, 53.7649204097492 ], "wc_questions_avg": [ 36.0, 29.698484809834994 ], "wc_limitations_avg": [ 10.0, 2.160246899469287 ], "wc_review_avg": [ 328.3333333333333, 53.42492135906446 ], "wc_reply_reviewers_avg": [ 54.333333333333336, 3.299831645537222 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14715630029806688920&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "email": "360.cn;shopee.com;360.com;ai.360.cn;360.cn;360.cn;lab.ai.360.cn;ai.360.cn", "author_num": 8, "aff_unique_index": "0;1;0;2;0;3;4;2", "aff_unique_norm": "Qihoo 360;Shopee;360 AI Institute;Qihoo Inc.;360 AI Lab", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.qihoo.net;https://shopee.sg;;https://www.qihoo.net;", "aff_unique_abbr": "Qihoo;Shopee;;Qihoo;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0;0;0;0", "aff_country_unique": "China;Singapore" }, { "title": "ConceptFactory: Facilitate 3D Object Knowledge Annotation with Object Conceptualization", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97559", "id": "iACMjECRjV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=iACMjECRjV", "openreview": "https://openreview.net/forum?id=iACMjECRjV", "poster": "/media/PosterPDFs/NeurIPS%202024/97559.png?t=1730608245.5498989", "project": "", "author_site": "Jianhua Sun, Yuxuan Li, Longfei Xu, Nange Wang, Jiude Wei, Yining Zhang, Cewu Lu", "tldr": "", "abstract": "We present ConceptFactory, a novel scope to facilitate more efficient annotation of 3D object knowledge by recognizing 3D objects through generalized concepts (i.e. object conceptualization), aiming at promoting machine intelligence to learn comprehensive object knowledge from both vision and robotics aspects. This idea originates from the findings in human cognition research that the perceptual recognition of objects can be explained as a process of arranging generalized geometric components (e.g. cuboids and cylinders). ConceptFactory consists of two critical parts: i) ConceptFactory Suite, a unified toolbox that adopts Standard Concept Template Library (STL-C) to drive a web-based platform for object conceptualization, and ii) ConceptFactory Asset, a large collection of conceptualized objects acquired using ConceptFactory suite. Our approach enables researchers to effortlessly acquire or customize extensive varieties of object knowledge to comprehensively study different object understanding tasks. We validate our idea on a wide range of benchmark tasks from both vision and robotics aspects with state-of-the-art algorithms, demonstrating the high quality and versatility of annotations provided by our approach. Our website is available at https://apeirony.github.io/ConceptFactory.", "keywords": "3D object;articulated object;object manipulation;affordance;knowledge annotation", "primary_area": "", "supplementary_material": "/attachment/95ca78018c9c2595c9765d736daf5af6379fc6ec.pdf", "author": "Jianhua Sun;Yuxuan Li;Longfei Xu;Nange Wang;Jiude Wei;Yining Zhang;Cewu Lu", "authorids": "~Jianhua_Sun1;~Yuxuan_Li2;~Longfei_Xu2;~Nange_Wang1;~Jiude_Wei1;~Yining_Zhang5;~Cewu_Lu3", "gender": "M;M;M;M;M;M;M", "homepage": "https://gothicai.github.io/;https://github.com/ApeironY;;https://github.com/wngnbr;https://kizunawl.github.io/;https://github.com/zyns1748;https://www.mvig.org/", "dblp": "36/1447-3;;;;;;", "google_scholar": "L0hoY3kAAAAJ;;7daNLEAAAAAJ;;;;https://scholar.google.com.tw/citations?user=QZVQEWAAAAAJ", "orcid": ";0000-0001-9918-4310;;;;;", "linkedin": ";;;;;;", "or_profile": "~Jianhua_Sun1;~Yuxuan_Li2;~Longfei_Xu2;~Nange_Wang1;~Jiude_Wei1;~Yining_Zhang5;~Cewu_Lu3", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "position": "PhD student;MS student;PhD student;Undergrad student;Undergrad student;Undergrad student;Full Professor", "bibtex": "@inproceedings{\nsun2024conceptfactory,\ntitle={ConceptFactory: Facilitate 3D Object Knowledge Annotation with Object Conceptualization},\nauthor={Jianhua Sun and Yuxuan Li and Longfei Xu and Nange Wang and Jiude Wei and Yining Zhang and Cewu Lu},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=iACMjECRjV}\n}", "github": "", "reviewers": "mVG2;S87Q;r1o8;Eyxu", "pdf_size": 3616356, "rating": "6;6;6;7", "confidence": "4;4;4;4", "wc_summary_and_contributions": "66;111;151;46", "wc_strengths": "15;79;132;2", "wc_improvement": "50;28;53;28", "wc_limitations": "7;99;9;11", "wc_correctness": "5;1;1;5", "wc_clarity": "12;1;1;6", "wc_relation_to_prior_work": "9;1;1;7", "wc_documentation": "1;1;14;13", "wc_additional_feedback": "1;1;1;1", "wc_review": "166;322;363;119", "wc_reply_reviewers": "0;50;0;0", "wc_reply_authors": "0;75;0;0", "reply_reviewers": "0;1;0;0", "reply_authors": "1;2;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.0 ], "wc_summary_and_contributions_avg": [ 93.5, 40.697051490249265 ], "wc_strengths_avg": [ 57.0, 52.19674319342156 ], "wc_improvement_avg": [ 39.75, 11.797775213996918 ], "wc_limitations_avg": [ 31.5, 38.99679474008088 ], "wc_correctness_avg": [ 3.0, 2.0 ], "wc_clarity_avg": [ 5.0, 4.527692569068709 ], "wc_relation_to_prior_work_avg": [ 4.5, 3.570714214271425 ], "wc_documentation_avg": [ 7.25, 6.2599920127744575 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 242.5, 102.40239255017434 ], "wc_reply_reviewers_avg": [ 12.5, 21.650635094610966 ], "wc_reply_authors_avg": [ 18.75, 32.47595264191645 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3507706935699386881&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Scaling Retrieval-Based Language Models with a Trillion-Token Datastore", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94024", "id": "iAkhPz7Qt3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=iAkhPz7Qt3", "openreview": "https://openreview.net/forum?id=iAkhPz7Qt3", "poster": "", "project": "", "author_site": "Rulin Shao, Jacqueline He, Akari Asai, Weijia Shi, Tim Dettmers, Sewon Min, Luke Zettlemoyer, Pang Wei Koh", "tldr": "", "abstract": "Scaling laws with respect to the amount of training data and the number of parameters allow us to predict the cost-benefit trade-offs of pretraining language models (LMs) in different configurations. In this paper, we consider another dimension of scaling: the amount of data available at inference time. Specifically, we find that increasing the size of the datastore used by a retrieval-based LM monotonically improves language modeling and several downstream tasks without obvious saturation, such that a smaller model augmented with a large datastore outperforms a larger LM-only model on knowledge-intensive tasks. By plotting compute-optimal scaling curves with varied datastore, model, and pretraining data sizes, we show that using larger datastores can significantly improve model performance for the same training compute budget. We carry out our study by constructing a 1.4 trillion-token datastore named MassiveDS, which is the largest and the most diverse open-sourced datastore for retrieval-based LMs to date, and designing an efficient pipeline for studying datastore scaling in an accessible manner. Finally, we analyze the effect of improving the retriever, datastore quality filtering, and other design choices on our observed scaling trends. Overall, our results show that datastore size should be considered as an integral part of LM efficiency and performance trade-offs. To facilitate future research, we open-source our datastore and code at https://github.com/RulinShao/retrieval-scaling.", "keywords": "retrieval-augmented language model;scaling;trillion-token datastore;RAG", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Rulin Shao;Jacqueline He;Akari Asai;Weijia Shi;Tim Dettmers;Sewon Min;Luke Zettlemoyer;Pang Wei Koh", "authorids": "~Rulin_Shao1;~Jacqueline_He1;~Akari_Asai2;~Weijia_Shi1;~Tim_Dettmers2;~Sewon_Min1;~Luke_Zettlemoyer1;~Pang_Wei_Koh1", "gender": ";F;F;;M;F;M;M", "homepage": "https://rulinshao.github.io/;http://jacqueline-he.github.io;https://akariasai.github.io/;https://weijiashi.notion.site/;https://timdettmers.com/;https://www.sewonmin.com;https://www.cs.washington.edu/people/faculty/lsz/;http://cs.stanford.edu/~pangwei", "dblp": ";319/3146.html;;132/80601;172/1045;203/9401;21/6793;10/10453", "google_scholar": "Vdwh6bcAAAAJ;;gqB4u_wAAAAJ;https://scholar.google.com/citations?hl=en;lHI3w5kAAAAJ;https://scholar.google.ca/citations?user=jU4IZs4AAAAJ;https://scholar.google.com.tw/citations?user=UjpbO6IAAAAJ;Nn990CkAAAAJ", "orcid": ";;;0000-3200-0000-0011;;;;", "linkedin": ";;;weijia-shi-773768112;;;luke-zettlemoyer-a0109b226/;", "or_profile": "~Rulin_Shao1;~Jacqueline_He1;~Akari_Asai2;~Weijia_Shi1;~Tim_Dettmers2;~Sewon_Min1;~Luke_Zettlemoyer1;~Pang_Wei_Koh1", "aff": "University of Washington;University of Washington;Paul G. Allen School of Computer Science & Engineering, University of Washington;University of Washington, Seattle;University of Washington;Department of Computer Science, University of Washington;Meta;University of Washington", "aff_domain": "uw.edu;uw.edu;cs.washington.edu;uw.edu;cs.washington.edu;cs.washington.edu;meta.com;cs.washington.edu", "position": "PhD student;PhD student;PhD student;PhD student;PhD student;PhD student;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nshao2024scaling,\ntitle={Scaling Retrieval-Based Language Models with a Trillion-Token Datastore},\nauthor={Rulin Shao and Jacqueline He and Akari Asai and Weijia Shi and Tim Dettmers and Sewon Min and Luke Zettlemoyer and Pang Wei Koh},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=iAkhPz7Qt3}\n}", "github": "", "reviewers": "F6AB;AKfc;vyTD;omwP;TsAT;cEKQ", "pdf_size": 12115990, "rating": "5;6;6;7;7;7", "confidence": "4;4;4;3;3;4", "soundness": "2;3;3;3;4;4", "novelty": "3;3;2;3;3;2", "presentation": "3;4;4;4;4;4", "wc_summary": "37;75;69;74;67;131", "wc_strengths": "40;58;75;95;188;106", "wc_weaknesses": "259;433;118;48;85;203", "wc_questions": "148;186;13;176;6;25", "wc_limitations": "1;70;8;67;11;44", "wc_review": "485;822;283;460;357;509", "wc_reply_reviewers": "0;216;0;19;0;59", "wc_reply_authors": "87;745;0;0;0;0", "reply_reviewers": "0;2;0;1;0;1", "reply_authors": "2;3;1;1;1;1", "rating_avg": [ 6.333333333333333, 0.7453559924999298 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.1666666666666665, 0.6871842709362768 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.8333333333333335, 0.3726779962499649 ], "wc_summary_avg": [ 75.5, 27.926988141700253 ], "wc_strengths_avg": [ 93.66666666666667, 47.527769660366864 ], "wc_weaknesses_avg": [ 191.0, 129.32259405584676 ], "wc_questions_avg": [ 92.33333333333333, 78.69067430275472 ], "wc_limitations_avg": [ 33.5, 28.217902119044926 ], "wc_review_avg": [ 486.0, 169.28870803059095 ], "wc_reply_reviewers_avg": [ 49.0, 77.55858001106863 ], "wc_reply_authors_avg": [ 138.66666666666666, 273.0150585997451 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.7453559924999299 ], "reply_authors_avg": [ 1.5, 0.7637626158259734 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.6324555320336761, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17366483891427227375&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "uw.edu;uw.edu;cs.washington.edu;uw.edu;cs.washington.edu;cs.washington.edu;meta.com;cs.washington.edu", "author_num": 8, "aff_unique_index": "0;0;0;0;0;0;1;0", "aff_unique_norm": "University of Washington;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://www.washington.edu;https://meta.com", "aff_unique_abbr": "UW;Meta", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Seattle", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Similarity-Navigated Conformal Prediction for Graph Neural Networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94023", "id": "iBZSOh027z", "proceeding": "", "pdf": "https://openreview.net/pdf?id=iBZSOh027z", "openreview": "https://openreview.net/forum?id=iBZSOh027z", "poster": "/media/PosterPDFs/NeurIPS%202024/94023.png?t=1731156288.7931197", "project": "", "author_site": "Jianqing Song, Jianguo Huang, Wenyu Jiang, Baoming Zhang, Shuangjie Li, Chongjun Wang", "tldr": "", "abstract": "Graph Neural Networks have achieved remarkable accuracy in semi-supervised node classification tasks. However, these results lack reliable uncertainty estimates. Conformal prediction methods provide a theoretical guarantee for node classification tasks, ensuring that the conformal prediction set contains the ground-truth label with a desired probability (e.g., 95\\%). In this paper, we empirically show that for each node, aggregating the non-conformity scores of nodes with the same label can improve the efficiency of conformal prediction sets while maintaining valid marginal coverage. This observation motivates us to propose a novel algorithm named $\\textit{Similarity-Navigated Adaptive Prediction Sets}$ (SNAPS), which aggregates the non-conformity scores based on feature similarity and structural neighborhood. The key idea behind SNAPS is that nodes with high feature similarity or direct connections tend to have the same label. By incorporating adaptive similar nodes information, SNAPS can generate compact prediction sets and increase the singleton hit ratio (correct prediction sets of size one). Moreover, we theoretically provide a finite-sample coverage guarantee of SNAPS. Extensive experiments demonstrate the superiority of SNAPS, improving the efficiency of prediction sets and singleton hit ratio while maintaining valid coverage.", "keywords": "Graph Neural Networks;Node Classification;Conformal Prediction;Uncertainty Estimation", "primary_area": "graph_neural_networks", "supplementary_material": "/attachment/ca853b9563893198fdd0a432c5cdfdf00e23c0b7.zip", "author": "Jianqing Song;Jianguo Huang;Wenyu Jiang;Baoming Zhang;Shuangjie Li;Chongjun Wang", "authorids": "~Jianqing_Song1;~Jianguo_Huang2;~Wenyu_Jiang1;~Baoming_Zhang1;~Shuangjie_Li2;~Chongjun_Wang1", "gender": "M;M;M;F;M;M", "homepage": "https://jianguo99.github.io/;;;https://github.com/LiShuangjie;;http://iip.nju.edu.cn", "dblp": ";;79/7838;30/10212.html;07/146;", "google_scholar": "r-BhZGwAAAAJ;mdOH9HQAAAAJ;ftfHnfQAAAAJ;;;", "orcid": ";;;;;", "linkedin": "jianguo-huang-1471b0232/;;;;;", "or_profile": "~Jianguo_Huang2;~Wenyu_Jiang1;~Baoming_Zhang1;~Shuangjie_Li2;~Chongjun_Wang1;~Song_Jianqing1", "aff": "ShanghaiTech University;Nanjing University;Nanjing University;Nanjing university;Nanjing University;Nanjing University", "aff_domain": "shanghaitech.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu;nju.edu.cn;nju.edu.cn", "position": "MS student;PhD student;PhD student;PhD student;Full Professor;PhD student", "bibtex": "@inproceedings{\nsong2024similaritynavigated,\ntitle={Similarity-Navigated Conformal Prediction for Graph Neural Networks},\nauthor={Jianqing Song and Jianguo Huang and Wenyu Jiang and Baoming Zhang and Shuangjie Li and Chongjun Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=iBZSOh027z}\n}", "github": "", "reviewers": "xCn2;3cJc;aWFN;8yy7", "pdf_size": 797494, "rating": "6;6;6;7", "confidence": "2;3;5;3", "soundness": "3;3;2;3", "novelty": "2;3;2;3", "presentation": "3;3;3;3", "wc_summary": "74;47;67;55", "wc_strengths": "66;59;50;40", "wc_weaknesses": "97;101;201;33", "wc_questions": "9;89;195;53", "wc_limitations": "13;3;34;1", "wc_review": "259;299;547;182", "wc_reply_reviewers": "18;18;14;20", "wc_reply_authors": "19;28;32;26", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 1.0897247358851685 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 60.75, 10.449282272003183 ], "wc_strengths_avg": [ 53.75, 9.756408150543928 ], "wc_weaknesses_avg": [ 108.0, 60.091596750294464 ], "wc_questions_avg": [ 86.5, 68.75136362284024 ], "wc_limitations_avg": [ 12.75, 13.083864108129525 ], "wc_review_avg": [ 321.75, 136.67731157730606 ], "wc_reply_reviewers_avg": [ 17.5, 2.179449471770337 ], "wc_reply_authors_avg": [ 26.25, 4.710360920354193 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.13245323570650439, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6321386396973565193&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "shanghaitech.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu;nju.edu.cn;nju.edu.cn", "author_num": 6, "aff_unique_index": "0;1;1;1;1;1", "aff_unique_norm": "ShanghaiTech University;Nanjing University", "aff_unique_dep": ";", "aff_unique_url": "https://www.shanghaitech.edu.cn;https://www.nju.edu.cn", "aff_unique_abbr": "ShanghaiTech;Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "ProEdit: Simple Progression is All You Need for High-Quality 3D Scene Editing", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94022", "id": "iC869BBmc5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=iC869BBmc5", "openreview": "https://openreview.net/forum?id=iC869BBmc5", "poster": "", "project": "", "author_site": "Junkun Chen, Yu-Xiong Wang", "tldr": "", "abstract": "This paper proposes ProEdit - a simple yet effective framework for high-quality 3D scene editing guided by diffusion distillation in a novel progressive manner. Inspired by the crucial observation that multi-view inconsistency in scene editing is rooted in the diffusion model\u2019s large feasible output space (FOS), our framework controls the size of FOS and reduces inconsistency by decomposing the overall editing task into several subtasks, which are then executed progressively on the scene. Within this framework, we design a difficulty-aware subtask decomposition scheduler and an adaptive 3D Gaussian splatting (3DGS) training strategy, ensuring high efficiency in performing each subtask. Extensive evaluation shows that our ProEdit achieves state-of-the-art results in various scenes and challenging editing tasks, all through a simple framework without any expensive or sophisticated add-ons like distillation losses, components, or training procedures. Notably, ProEdit also provides a new way to preview, control, and select the aggressivity of editing operation during the editing process.", "keywords": "3D Scene Editing;Diffusion Model;3D Gaussian Splatting", "primary_area": "generative_models", "supplementary_material": "/attachment/36126a03ad7fed9b3278fdac67dc8bbfdbd102dd.zip", "author": "Jun-Kun Chen;Yu-Xiong Wang", "authorids": "~Jun-Kun_Chen1;~Yu-Xiong_Wang1", "gender": ";M", "homepage": "https://yxw.cs.illinois.edu/;https://scholar.google.com/citations?user=_m5__wUAAAAJ", "dblp": "35/10700;333/0859", "google_scholar": "T_Q-xDkAAAAJ;_m5__wUAAAAJ", "orcid": ";0000-0002-3431-0870", "linkedin": ";junkun-chen-a24520167/", "or_profile": "~Yu-Xiong_Wang1;~Junkun_Chen2", "aff": "Department of Computer Science, University of Illinois Urbana-Champaign;SpreeAI", "aff_domain": "cs.illinois.edu;spreeai.com", "position": "Assistant Professor;Intern", "bibtex": "@inproceedings{\nchen2024proedit,\ntitle={ProEdit: Simple Progression is All You Need for High-Quality 3D Scene Editing},\nauthor={Jun-Kun Chen and Yu-Xiong Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=iC869BBmc5}\n}", "github": "", "reviewers": "PcMD;49ym;Mz9K", "pdf_size": 38140853, "rating": "6;6;6", "confidence": "4;4;5", "soundness": "3;2;3", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "55;84;48", "wc_strengths": "30;62;56", "wc_weaknesses": "42;43;151", "wc_questions": "117;192;20", "wc_limitations": "14;23;10", "wc_review": "258;404;285", "wc_reply_reviewers": "0;121;26", "wc_reply_authors": "0;500;27", "reply_reviewers": "0;2;1", "reply_authors": "1;4;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 62.333333333333336, 15.584892970081281 ], "wc_strengths_avg": [ 49.333333333333336, 13.888444437333106 ], "wc_weaknesses_avg": [ 78.66666666666667, 51.14901975817545 ], "wc_questions_avg": [ 109.66666666666667, 70.40991091853161 ], "wc_limitations_avg": [ 15.666666666666666, 5.436502143433364 ], "wc_review_avg": [ 315.6666666666667, 63.42624763367993 ], "wc_reply_reviewers_avg": [ 49.0, 52.00640986134946 ], "wc_reply_authors_avg": [ 175.66666666666666, 229.60303908170633 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 2.3333333333333335, 1.247219128924647 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7629694373709983020&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "cs.illinois.edu;spreeai.com", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "University of Illinois Urbana-Champaign;SpreeAI", "aff_unique_dep": "Department of Computer Science;", "aff_unique_url": "https://illinois.edu;", "aff_unique_abbr": "UIUC;", "aff_campus_unique_index": "0", "aff_campus_unique": "Urbana-Champaign;", "aff_country_unique_index": "0", "aff_country_unique": "United States;" }, { "title": "S-SOS: Stochastic Sum-Of-Squares for Parametric Polynomial Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94021", "id": "iChQIJtjHB", "proceeding": "", "pdf": "https://openreview.net/pdf?id=iChQIJtjHB", "openreview": "https://openreview.net/forum?id=iChQIJtjHB", "poster": "", "project": "", "author_site": "Licheng Zhu, Mathias Oster, Yuehaw Khoo", "tldr": "", "abstract": "Global polynomial optimization is an important tool across applied mathematics, with many applications in operations research, engineering, and the physical sciences. In various settings, the polynomials depend on external parameters that may be random. We discuss a stochastic sum-of-squares (S-SOS) algorithm based on the sum-of-squares hierarchy that constructs a series of semidefinite programs to jointly find strict lower bounds on the global minimum and extracts candidates for parameterized global minimizers. We prove quantitative convergence of the hierarchy as the degree increases and use it to solve unconstrained and constrained polynomial optimization problems parameterized by random variables. By employing n-body priors from condensed matter physics to induce sparsity, we can use S-SOS to produce solutions and uncertainty intervals for sensor network localization problems containing up to 40 variables and semidefinite matrix sizes surpassing 800 x 800.", "keywords": "convex optimization;semidefinite programming;sum-of-squares;uncertainty quantification;polynomial optimization", "primary_area": "optimization", "supplementary_material": "/attachment/7cf808f3cffd9270497b58bac627517dfa3c6822.zip", "author": "Richard Licheng Zhu;Mathias Oster;Yuehaw Khoo", "authorids": "~Richard_Licheng_Zhu1;~Mathias_Oster1;~Yuehaw_Khoo1", "gender": ";M;M", "homepage": "https://richardzhu.info;;https://www.stat.uchicago.edu/~ykhoo/", "dblp": ";;", "google_scholar": "BfD_MmcAAAAJ;;iGW44-4AAAAJ", "orcid": "0000-0002-0483-979X;0000-0001-6603-7883;", "linkedin": "therichardzhu;;", "or_profile": "~Richard_Licheng_Zhu1;~Mathias_Oster1;~Yuehaw_Khoo1", "aff": "University of Chicago;Rheinisch Westf\u00e4lische Technische Hochschule Aachen;University of Chicago", "aff_domain": "uchicago.edu;rwth-aachen.de;uchicago.edu", "position": "PhD student;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nzhu2024ssos,\ntitle={S-{SOS}: Stochastic Sum-Of-Squares for Parametric Polynomial Optimization},\nauthor={Richard Licheng Zhu and Mathias Oster and Yuehaw Khoo},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=iChQIJtjHB}\n}", "github": "", "reviewers": "pRMv;iKyk;Cxce;MyRu;Dyr8", "pdf_size": 723678, "rating": "3;5;6;6;7", "confidence": "4;3;2;3;4", "soundness": "3;3;3;3;3", "novelty": "2;2;3;3;3", "presentation": "2;2;3;2;3", "wc_summary": "264;114;103;71;81", "wc_strengths": "16;47;119;96;29", "wc_weaknesses": "4;150;69;144;87", "wc_questions": "108;10;123;24;61", "wc_limitations": "1;13;28;49;26", "wc_review": "393;334;442;384;284", "wc_reply_reviewers": "0;0;132;0;32", "wc_reply_authors": "0;0;343;0;0", "reply_reviewers": "0;0;1;0;1", "reply_authors": "1;1;2;1;1", "rating_avg": [ 5.4, 1.3564659966250536 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 126.6, 70.37783742059712 ], "wc_strengths_avg": [ 61.4, 39.5808034279245 ], "wc_weaknesses_avg": [ 90.8, 53.58880480100298 ], "wc_questions_avg": [ 65.2, 44.57532949962344 ], "wc_limitations_avg": [ 23.4, 16.0822883943797 ], "wc_review_avg": [ 367.4, 53.97629109155241 ], "wc_reply_reviewers_avg": [ 32.8, 51.124944987745465 ], "wc_reply_authors_avg": [ 68.6, 137.2 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.2758386421836853, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Btrf8D1rY_0J:scholar.google.com/&scioq=S-SOS:+Stochastic+Sum-Of-Squares+for+Parametric+Polynomial+Optimization&hl=en&as_sdt=0,33", "gs_version_total": 5, "email": "uchicago.edu;rwth-aachen.de;uchicago.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Chicago;RWTH Aachen University", "aff_unique_dep": ";", "aff_unique_url": "https://www.uchicago.edu;https://www.rwth-aachen.de", "aff_unique_abbr": "UChicago;RWTH", "aff_campus_unique_index": "1", "aff_campus_unique": ";Aachen", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;Germany" }, { "title": "$C^2M^3$: Cycle-Consistent Multi-Model Merging", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94020", "id": "iD18l6prA7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=iD18l6prA7", "openreview": "https://openreview.net/forum?id=iD18l6prA7", "poster": "/media/PosterPDFs/NeurIPS%202024/94020.png?t=1731170026.1073942", "project": "", "author_site": "Donato Crisostomi, Marco Fumero, Daniele Baieri, Florian Bernard, Emanuele Rodol\u00e0", "tldr": "", "abstract": "In this paper, we present a novel data-free method for merging neural networks in weight space. Our method optimizes for the permutations of network neurons while ensuring global coherence across all layers, and it outperforms recent layer-local approaches in a set of challenging scenarios. We then generalize the formulation to the $N$-models scenario to enforce cycle consistency of the permutations with guarantees, allowing circular compositions of permutations to be computed without accumulating error along the path. \n We qualitatively and quantitatively motivate the need for such a constraint, showing its benefits when merging homogeneous sets of models in scenarios spanning varying architectures and datasets. We finally show that, when coupled with activation renormalization, the approach yields the best results in the task.", "keywords": "model merging;linear mode connectivity;deep learning", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/02395ddb821770d2b6a9baa3fe7e6e07607fc71d.zip", "author": "Donato Crisostomi;Marco Fumero;Daniele Baieri;Florian Bernard;Emanuele Rodol\u00e0", "authorids": "~Donato_Crisostomi1;~Marco_Fumero1;~Daniele_Baieri1;~Florian_Bernard3;~Emanuele_Rodol\u00e01", "gender": "M;;M;;M", "homepage": "https://crisostomi.com/;;;https://florianbernard.net;", "dblp": "322/1085;273/9625;322/8620;134/8112;54/8401", "google_scholar": "qi08d7kAAAAJ;VYEljYEAAAAJ;DgUEOm4AAAAJ;https://scholar.google.de/citations?user=9GrQ2KYAAAAJ;-EH4wBYAAAAJ", "orcid": "0000-0001-8180-5203;0000-0001-5614-5004;0000-0002-0704-5960;;0000-0003-0091-7241", "linkedin": "donato-crisostomi/;;;;", "or_profile": "~Donato_Crisostomi1;~Marco_Fumero1;~Daniele_Baieri1;~Florian_Bernard3;~Emanuele_Rodol\u00e01", "aff": "University of Roma \"La Sapienza\";Institute of Science and Technology Austria(ISTA);University of Roma \"La Sapienza\";Rheinische Friedrich-Wilhelms Universit\u00e4t Bonn;Sapienza University of Rome", "aff_domain": "uniroma1.it;ista.ac.at;uniroma1.it;uni-bonn.de;uniroma1.it", "position": "PhD student;Postdoc;PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\ncrisostomi2024cm,\ntitle={\\$C{\\textasciicircum}2M{\\textasciicircum}3\\$: Cycle-Consistent Multi-Model Merging},\nauthor={Donato Crisostomi and Marco Fumero and Daniele Baieri and Florian Bernard and Emanuele Rodol{\\`a}},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=iD18l6prA7}\n}", "github": "", "reviewers": "dfjE;TBfo;mu2n", "pdf_size": 1661740, "rating": "5;6;7", "confidence": "2;4;3", "soundness": "2;4;3", "novelty": "3;3;3", "presentation": "2;3;3", "wc_summary": "67;49;57", "wc_strengths": "51;68;113", "wc_weaknesses": "197;68;80", "wc_questions": "5;253;175", "wc_limitations": "1;11;1", "wc_review": "321;449;426", "wc_reply_reviewers": "0;12;0", "wc_reply_authors": "1199;192;199", "reply_reviewers": "0;1;0", "reply_authors": "4;3;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 57.666666666666664, 7.363574011458174 ], "wc_strengths_avg": [ 77.33333333333333, 26.157641755751268 ], "wc_weaknesses_avg": [ 115.0, 58.18934610390462 ], "wc_questions_avg": [ 144.33333333333334, 103.54172535209604 ], "wc_limitations_avg": [ 4.333333333333333, 4.714045207910317 ], "wc_review_avg": [ 398.6666666666667, 55.715547401261546 ], "wc_reply_reviewers_avg": [ 4.0, 5.656854249492381 ], "wc_reply_authors_avg": [ 530.0, 473.0630683816553 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 3.0, 0.816496580927726 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15962849261430716532&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "uniroma1.it;ista.ac.at;uniroma1.it;uni-bonn.de;uniroma1.it", "author_num": 5, "aff_unique_index": "0;1;0;2;3", "aff_unique_norm": "University of Rome La Sapienza;Institute of Science and Technology Austria;Rheinische Friedrich-Wilhelms Universit\u00e4t Bonn;Sapienza University of Rome", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.uniroma1.it;https://www.ista.ac.at;https://www.uni-bonn.de/;https://www.uniroma1.it", "aff_unique_abbr": "La Sapienza;ISTA;Uni Bonn;Sapienza", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Rome;", "aff_country_unique_index": "0;1;0;2;0", "aff_country_unique": "Italy;Austria;Germany" }, { "title": "PROSPECT PTMs: Rich Labeled Tandem Mass Spectrometry Dataset of Modified Peptides for Machine Learning in Proteomics", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97558", "id": "iDg6ktCf6W", "proceeding": "", "pdf": "https://openreview.net/pdf?id=iDg6ktCf6W", "openreview": "https://openreview.net/forum?id=iDg6ktCf6W", "poster": "/media/PosterPDFs/NeurIPS%202024/97558.png?t=1733864305.8832982", "project": "", "author_site": "Wassim Gabriel, Omar Shouman, Eva Ayla Schr\u00f6der, Florian B\u00f6\u00dfl, Mathias Wilhelm", "tldr": "", "abstract": "Post-Translational Modifications (PTMs) are changes that occur in proteins after synthesis, influencing their structure, function, and cellular behavior. PTMs are essential in cell biology; they regulate protein function and stability, are involved in various cellular processes, and are linked to numerous diseases. A particularly interesting class of PTMs are chemical modifications such as phosphorylation introduced on amino acid side chains because they can drastically alter the physicochemical properties of the peptides once they are present. One or more PTMs can be attached to each amino acid of the peptide sequence. The most commonly applied technique to detect PTMs on proteins is bottom-up Mass Spectrometry-based proteomics (MS), where proteins are digested into peptides and subsequently analyzed using Tandem Mass Spectrometry (MS/MS). While an increasing number of machine learning models are published focusing on MS/MS-related property prediction of unmodified peptides, high-quality reference data for modified peptides is missing, impeding model development for this important class of peptides. To enable researchers to train machine learning models that can accurately predict the properties of modified peptides, we introduce four high-quality labeled datasets for applying machine and deep learning to tasks in MS-based proteomics. The four datasets comprise several subgroups of peptides with 1.2 million unique modified peptide sequences and 30 unique pairs of (amino-acid, PTM), covering both experimentally introduced and naturally occurring modifications on various amino acids. We evaluate the utility and importance of the dataset by providing benchmarking results on models trained with and without modifications and highlighting the impact of including modified sequences on downstream tasks. We demonstrate that predicting the properties of modified peptides is more challenging but has a broad impact since they are often the core of protein functionality and its regulation, and they have a potential role as biomarkers in clinical applications. Our datasets contribute to applied machine learning in proteomics by enabling the research community to experiment with methods to encode PTMs as model inputs and to benchmark against reference data for model comparison. With a proper data split for three common tasks in proteomics, we provide a robust way to evaluate model performance and assess generalization on unseen modified sequences.", "keywords": "Proteomics;Deep Learning;Machine Learning;Dataset;Mass Spectrometry;Retention Time;Annotated Spectra;Neutral Losses;ProteomeTools;Fragment Ion Intensity;Precursor Charge;modifications;PTMs;Peptides", "primary_area": "", "supplementary_material": "/attachment/9bf10995463a51017c7b1d9290866315ba79a35b.pdf", "author": "Wassim Gabriel;Omar Shouman;Eva Ayla Schr\u00f6der;Florian B\u00f6\u00dfl;Mathias Wilhelm", "authorids": "~Wassim_Gabriel1;~Omar_Shouman1;~Eva_Ayla_Schr\u00f6der1;~Florian_B\u00f6\u00dfl1;~Mathias_Wilhelm1", "gender": "M;M;F;M;M", "homepage": ";https://github.com/omsh;;https://www.bio.ifi.lmu.de/bim/;https://www1.ls.tum.de/en/compms/group-members/people/mathias-wilhelm/", "dblp": ";;;;", "google_scholar": "m-JawDMAAAAJ;pbnOwM0AAAAJ;;;VNA2wwsAAAAJ", "orcid": "0000-0001-6440-9794;0000-0002-9077-3036;;;0000-0002-9224-3258", "linkedin": "wassim-gabriel-93b91737;omar-shouman;eva-ayla-schr%C3%B6der-563433235;;mathias-wilhelm-9567b7181/", "or_profile": "~Wassim_Gabriel1;~Omar_Shouman1;~Eva_Ayla_Schr\u00f6der1;~Florian_B\u00f6\u00dfl1;~Mathias_Wilhelm1", "aff": ";Technical University of Munich;Technische Universit\u00e4t M\u00fcnchen;Technische Universit\u00e4t M\u00fcnchen;Technische Universit\u00e4t M\u00fcnchen", "aff_domain": ";tum.de;tum.de;tum.de;tum.de", "position": ";PhD student;PhD student;MS student;Assistant Professor", "bibtex": "@inproceedings{\ngabriel2024prospect,\ntitle={{PROSPECT} {PTM}s: Rich Labeled Tandem Mass Spectrometry Dataset of Modified Peptides for Machine Learning in Proteomics},\nauthor={Wassim Gabriel and Omar Shouman and Eva Ayla Schr{\\\"o}der and Florian B{\\\"o}{\\ss}l and Mathias Wilhelm},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=iDg6ktCf6W}\n}", "github": "", "reviewers": "LgtK;9C1C;56xj", "pdf_size": 2049804, "rating": "5;7;7", "confidence": "5;5;4", "wc_summary_and_contributions": "115;20;75", "wc_strengths": "39;75;2", "wc_improvement": "40;61;2", "wc_limitations": "41;54;1", "wc_correctness": "1;81;1", "wc_clarity": "1;141;6", "wc_relation_to_prior_work": "1;19;16", "wc_documentation": "1;13;1", "wc_additional_feedback": "1;1;1", "wc_review": "240;465;105", "wc_reply_reviewers": "0;18;0", "wc_reply_authors": "606;493;233", "reply_reviewers": "0;1;0", "reply_authors": "2;2;2", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 70.0, 38.94440481849308 ], "wc_strengths_avg": [ 38.666666666666664, 29.80305726300948 ], "wc_improvement_avg": [ 34.333333333333336, 24.417662095749915 ], "wc_limitations_avg": [ 32.0, 22.55363976538303 ], "wc_correctness_avg": [ 27.666666666666668, 37.71236166328253 ], "wc_clarity_avg": [ 49.333333333333336, 64.85025486114573 ], "wc_relation_to_prior_work_avg": [ 12.0, 7.874007874011811 ], "wc_documentation_avg": [ 5.0, 5.656854249492381 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 270.0, 148.49242404917499 ], "wc_reply_reviewers_avg": [ 6.0, 8.48528137423857 ], "wc_reply_authors_avg": [ 444.0, 156.16871218866686 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4073304756162963064&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": ";tum.de;tum.de;tum.de;tum.de", "author_num": 5, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Technical University of Munich;Technische Universit\u00e4t M\u00fcnchen", "aff_unique_dep": ";", "aff_unique_url": "https://www.tum.de;https://www.tum.de", "aff_unique_abbr": "TUM;TUM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "title": "II-Bench: An Image Implication Understanding Benchmark for Multimodal Large Language Models", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97557", "id": "iEN2linUr8", "proceeding": "", "pdf": "https://openreview.net/pdf?id=iEN2linUr8", "openreview": "https://openreview.net/forum?id=iEN2linUr8", "poster": "/media/PosterPDFs/NeurIPS%202024/97557.png?t=1729507061.3839457", "project": "", "author_site": "Ziqiang Liu, Feiteng Fang, Xi Feng, Xeron Du, Chenhao Zhang, Noah Wang, yuelin bai, Qixuan Zhao, Liyang Fan, CHENGGUANG GAN, Hongquan Lin, Jiaming Li, Yuansheng Ni, Haihong Wu, Yaswanth Narsupalli, Zhigang Zheng, Chengming Li, Xiping Hu, Ruifeng Xu, Xiaojun Chen, Min Yang, Jiaheng Liu, Ruibo Liu, Wenhao Huang, Ge Zhang, Shiwen Ni", "tldr": "", "abstract": "The rapid advancements in the development of multimodal large language models (MLLMs) have consistently led to new breakthroughs on various benchmarks. In response, numerous challenging and comprehensive benchmarks have been proposed to more accurately assess the capabilities of MLLMs. However, there is a dearth of exploration of the higher-order perceptual capabilities of MLLMs. To fill this gap, we propose the Image Implication understanding Benchmark, II-Bench, which aims to evaluate the model's higher-order perception of images. Through extensive experiments on II-Bench across multiple MLLMs, we have made significant findings. Initially, a substantial gap is observed between the performance of MLLMs and humans on II-Bench. The pinnacle accuracy of MLLMs attains 74.8%, whereas human accuracy averages 90%, peaking at an impressive 98%. Subsequently, MLLMs perform worse on abstract and complex images, suggesting limitations in their ability to understand high-level semantics and capture image details. Finally, it is observed that most models exhibit enhanced accuracy when image sentiment polarity hints are incorporated into the prompts. This observation underscores a notable deficiency in their inherent understanding of image sentiment. We believe that II-Bench will inspire the community to develop the next generation of MLLMs, advancing the journey towards expert artificial general intelligence (AGI). II-Bench is publicly available at https://huggingface.co/datasets/m-a-p/II-Bench.", "keywords": "Multimodal;Image Understanding;Image Implication;Large Language Models", "primary_area": "", "supplementary_material": "/attachment/03d997462b65086c03765fc74e4867c9e456ef91.pdf", "author": "Ziqiang Liu;Feiteng Fang;Xi Feng;Xeron Du;Chenhao Zhang;Noah Wang;yuelin bai;Qixuan Zhao;Liyang Fan;CHENGGUANG GAN;Hongquan Lin;Jiaming Li;Yuansheng Ni;Haihong Wu;Yaswanth Narsupalli;Zhigang Zheng;Chengming Li;Xiping Hu;Ruifeng Xu;Xiaojun Chen;Min Yang;Jiaheng Liu;Ruibo Liu;Wenhao Huang;Ge Zhang;Shiwen Ni", "authorids": "~Ziqiang_Liu2;~Feiteng_Fang1;~Xi_Feng1;~Xeron_Du1;~Chenhao_Zhang5;~Noah_Wang4;~yuelin_bai2;~Qixuan_Zhao2;~Liyang_Fan1;~CHENGGUANG_GAN1;~Hongquan_Lin1;~Jiaming_Li7;~Yuansheng_Ni1;~Haihong_Wu2;~Yaswanth_Narsupalli1;~Zhigang_Zheng3;~Chengming_Li1;~Xiping_Hu1;~Ruifeng_Xu1;~Xiaojun_Chen4;~Min_Yang6;~Jiaheng_Liu1;~Ruibo_Liu1;~Wenhao_Huang1;~Ge_Zhang5;~Shiwen_Ni1", "gender": "M;M;M;;M;;Not Specified;M;M;M;M;M;;M;M;;;;M;M;F;M;M;;M;", "homepage": ";;https://github.com/NozomiX1;;https://ming-zch.github.io/;;https://github.com/paralym;https://github.com/MeaKagura;https://relic-yuexi.github.io/;https://ganchengguang.github.io/;http://lhqvq.com;https://www.geaming.cn/;;https://blog.csdn.net/dortorwu?type=blog;;;;https://scholar.google.com/citations?hl=en&user=Mc93YSEAAAAJ&view_op=list_works;http://faculty.hitsz.edu.cn/xuruifeng;https://bruce-xjchen.github.io/HomePage/;https://minyang.me/;https://liujiaheng.github.io/;https://www.cs.dartmouth.edu/~rbliu/;;;", "dblp": ";351/9853.html;;;337/1091-5;;371/4009;;;326/1475.html;;;;;359/0633.html;;;81/10098.html;93/5407-1;20/3215-6;02/1640-7;225/1962;;;;279/5319.html", "google_scholar": "JT40MWMAAAAJ;F260lzsAAAAJ;https://scholar.google.com.hk/citations?user=DGuZvecAAAAJ;;NfYxy-MAAAAJ;;TbtRAakAAAAJ;;8DZLKyUAAAAJ;FepMPoMAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;;;0xLN_4gAAAAJ;;-f1AUBcAAAAJ;https://scholar.google.com/citations?hl=en;mObXnNIAAAAJ;yAjyrwkAAAAJ;_wop6KgAAAAJ;yFI_RjUAAAAJ;5lgfeo4AAAAJ;;qyTrq4kAAAAJ;https://scholar.google.com.tw/citations?user=ln4hmCwAAAAJ", "orcid": ";0009-0002-2971-9572;;;0009-0002-6782-8745;;;;;0000-0001-8034-0993;;;;;0009-0001-7364-8188;;0000-0002-4592-3875;0000-0002-4952-699X;0000-0002-4009-5679;0000-0002-2818-4652;;;;;;0000-0002-4986-4446", "linkedin": ";;;;chen-hao-zhang;;;;;;;;;;narsupalli-yaswanth/;;;;;;;;;;ge-zhang-792797169/;", "or_profile": "~Ziqiang_Liu2;~Feiteng_Fang1;~Xi_Feng1;~Xeron_Du1;~Chenhao_Zhang5;~Noah_Wang4;~yuelin_bai2;~Qixuan_Zhao2;~Liyang_Fan1;~CHENGGUANG_GAN1;~Hongquan_Lin1;~Jiaming_Li7;~Yuansheng_Ni1;~Haihong_Wu2;~Yaswanth_Narsupalli1;~Zhigang_Zheng3;~Chengming_Li1;~Xiping_Hu1;~Ruifeng_Xu1;~Xiaojun_Chen4;~Min_Yang6;~Jiaheng_Liu1;~Ruibo_Liu1;~Wenhao_Huang1;~Ge_Zhang5;~Shiwen_Ni1", "aff": "Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Chinese Academy of Sciences;University of Science and Technology of China;University of Science and Technology of China;;Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences;;Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Chinese Academy of Sciences;University of Science and Technology of China;Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Chinese Academy of Sciences;Yokohama National University;Microsoft;Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Chinese Academy of Sciences;;University of Science and Technology of China;IIT Kharagpur;; Shenzhen MSU-BIT University;Beijing Institute of Technology;Harbin Institute of Technology;Shenzhen University;Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Chinese Academy of Sciences;Alibaba Group;Google DeepMind;;University of Waterloo;Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences", "aff_domain": "siat.ac.cn;mail.ustc.edu.cn;ustc.edu.cn;;siat.ac.cn;;siat.ac.cn;mail.ustc.edu.cn;siat.ac.cn;ynu.ac.jp;microsoft.com;siat.ac.cn;;mail.ustc.edu.cn;iitkgp.ac.in;;smbu.edu.cn;bit.edu.cn;hit.edu.cn;szu.edu.cn;siat.ac.cn;alibaba-inc.com;google.com;;cs.uwaterloo.ca;siat.ac.cn", "position": "MS student;MS student;MS student;;Intern;;MS student;MS student;MS student;PhD student;Intern;Undergrad student;;MS student;MS student;;Full Professor;Full Professor;Full Professor;Full Professor;Associate Professor;Researcher;Researcher;;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nliu2024iibench,\ntitle={{II}-Bench: An Image Implication Understanding Benchmark for Multimodal Large Language Models},\nauthor={Ziqiang Liu and Feiteng Fang and Xi Feng and Xeron Du and Chenhao Zhang and Noah Wang and yuelin bai and Qixuan Zhao and Liyang Fan and CHENGGUANG GAN and Hongquan Lin and Jiaming Li and Yuansheng Ni and Haihong Wu and Yaswanth Narsupalli and Zhigang Zheng and Chengming Li and Xiping Hu and Ruifeng Xu and Xiaojun Chen and Min Yang and Jiaheng Liu and Ruibo Liu and Wenhao Huang and Ge Zhang and Shiwen Ni},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=iEN2linUr8}\n}", "github": "", "reviewers": "SyRn;472N;C8Pk", "pdf_size": 10966796, "rating": "5;5;8", "confidence": "5;5;4", "wc_summary_and_contributions": "66;111;42", "wc_strengths": "198;2;116", "wc_improvement": "1;2;41", "wc_limitations": "1;1;36", "wc_correctness": "1;1;48", "wc_clarity": "1;1;28", "wc_relation_to_prior_work": "1;1;54", "wc_documentation": "1;1;59", "wc_additional_feedback": "1;1;1", "wc_review": "271;121;425", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "1813;2696;330", "reply_reviewers": "0;0;0", "reply_authors": "5;8;3", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 73.0, 28.600699292150182 ], "wc_strengths_avg": [ 105.33333333333333, 80.3713602942961 ], "wc_improvement_avg": [ 14.666666666666666, 18.624953392931992 ], "wc_limitations_avg": [ 12.666666666666666, 16.49915822768611 ], "wc_correctness_avg": [ 16.666666666666668, 22.15601247717849 ], "wc_clarity_avg": [ 10.0, 12.727922061357855 ], "wc_relation_to_prior_work_avg": [ 18.666666666666668, 24.984439601924677 ], "wc_documentation_avg": [ 20.333333333333332, 27.34146220587984 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 272.3333333333333, 124.11106137470405 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 1613.0, 976.213432947256 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 5.333333333333333, 2.0548046676563256 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 26, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6294844404206029569&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "siat.ac.cn;mail.ustc.edu.cn;ustc.edu.cn;;siat.ac.cn;;siat.ac.cn;mail.ustc.edu.cn;siat.ac.cn;ynu.ac.jp;microsoft.com;siat.ac.cn;;mail.ustc.edu.cn;iitkgp.ac.in;;smbu.edu.cn;bit.edu.cn;hit.edu.cn;szu.edu.cn;siat.ac.cn;alibaba-inc.com;google.com;;cs.uwaterloo.ca;siat.ac.cn", "author_num": 26, "aff_unique_index": "0;1;1;0;0;1;0;2;3;0;1;4;5;6;7;8;0;9;10;11;0", "aff_unique_norm": "Chinese Academy of Sciences;University of Science and Technology of China;Yokohama National University;Microsoft;Indian Institute of Technology Kharagpur;Shenzhen MSU-BIT University;Beijing Institute of Technology;Harbin Institute of Technology;Shenzhen University;Alibaba Group;Google;University of Waterloo", "aff_unique_dep": "Shenzhen Institutes of Advanced Technology;;;Microsoft Corporation;;;;;;;Google DeepMind;", "aff_unique_url": "http://www.cas.cn;http://www.ustc.edu.cn;https://www.yokohama-nu.ac.jp;https://www.microsoft.com;https://www.iitkgp.ac.in;http://www.msubit.edu.cn;http://www.bit.edu.cn/;http://www.hit.edu.cn/;https://www.szu.edu.cn;https://www.alibaba.com;https://deepmind.com;https://uwaterloo.ca", "aff_unique_abbr": "CAS;USTC;YNU;Microsoft;IIT KGP;;BIT;HIT;SZU;Alibaba;DeepMind;UW", "aff_campus_unique_index": "0;0;0;0;0;2;0;3;0;0", "aff_campus_unique": "Shenzhen;;Kharagpur;Harbin", "aff_country_unique_index": "0;0;0;0;0;0;0;1;2;0;0;3;0;0;0;0;0;0;4;5;0", "aff_country_unique": "China;Japan;United States;India;United Kingdom;Canada" }, { "title": "No Regrets: Investigating and Improving Regret Approximations for Curriculum Discovery", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94019", "id": "iEeiZlTbts", "proceeding": "", "pdf": "https://openreview.net/pdf?id=iEeiZlTbts", "openreview": "https://openreview.net/forum?id=iEeiZlTbts", "poster": "/media/PosterPDFs/NeurIPS%202024/94019.png?t=1733441190.180636", "project": "", "author_site": "Alexander Rutherford, Michael Beukman, Timon Willi, Bruno Lacerda, Nick Hawes, Jakob Foerster", "tldr": "", "abstract": "What data or environments to use for training to improve downstream performance is a longstanding and very topical question in reinforcement learning. \nIn particular, Unsupervised Environment Design (UED) methods have gained recent attention as their adaptive curricula promise to enable agents to be robust to in- and out-of-distribution tasks.\nThis work investigates how existing UED methods select training environments, focusing on task prioritisation metrics.\nSurprisingly, despite methods aiming to maximise regret in theory, the practical approximations do not correlate with regret but with success rate.\nAs a result, a significant portion of an agent's experience comes from environments it has already mastered, offering little to no contribution toward enhancing its abilities. Put differently, current methods fail to predict intuitive measures of *learnability*. Specifically, they are unable to consistently identify those scenarios that the agent can sometimes solve, but not always.\nBased on our analysis, we develop a method that directly trains on scenarios with high learnability. This simple and intuitive approach outperforms existing UED methods in several binary-outcome environments, including the standard domain of Minigrid and a novel setting closely inspired by a real-world robotics problem. \nWe further introduce a new adversarial evaluation procedure for directly measuring robustness, closely mirroring the conditional value at risk (CVaR).\nWe open-source all our code and present visualisations of final policies here: https://github.com/amacrutherford/sampling-for-learnability.", "keywords": "MARL;UED;Robotics", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Alexander Rutherford;Michael Beukman;Timon Willi;Bruno Lacerda;Nick Hawes;Jakob Nicolaus Foerster", "authorids": "~Alexander_Rutherford1;~Michael_Beukman1;~Timon_Willi1;~Bruno_Lacerda1;~Nick_Hawes1;~Jakob_Nicolaus_Foerster1", "gender": "M;;;M;M;M", "homepage": "https://amacrutherford.com/;;https://www.timonwilli.com;https://bfalacerda.github.io/;https://www.robots.ox.ac.uk/~nickh/;https://www.jakobfoerster.com", "dblp": ";;243/3437;87/10333;35/1190;176/5095", "google_scholar": "https://scholar.google.com/citations?hl=en;;Dn-udzAAAAAJ;https://scholar.google.co.uk/citations?user=k9XjG_MAAAAJ;bRsi4zoAAAAJ;6z4lQzMAAAAJ", "orcid": "0000-0002-2662-5602;;0000-0003-4405-5700;0000-0003-0862-331X;0000-0002-7556-6098;", "linkedin": ";;;;;", "or_profile": "~Alexander_Rutherford1;~Michael_Beukman1;~Timon_Willi1;~Bruno_Lacerda1;~Nick_Hawes1;~Jakob_Nicolaus_Foerster1", "aff": "University of Oxford;;University of Oxford, University of Oxford;University of Oxford;University of Oxford;University of Oxford, University of Oxford", "aff_domain": "ox.ac.uk;;eng.ox.ac.uk;ox.ac.uk;ox.ac.uk;eng.ox.ac.uk", "position": "PhD student;;PhD student;Senior Researcher;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nrutherford2024no,\ntitle={No Regrets: Investigating and Improving Regret Approximations for Curriculum Discovery},\nauthor={Alexander Rutherford and Michael Beukman and Timon Willi and Bruno Lacerda and Nick Hawes and Jakob Nicolaus Foerster},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=iEeiZlTbts}\n}", "github": "", "reviewers": "ycoo;Ns9r;gAKk;Su34", "pdf_size": 2725182, "rating": "5;7;7;7", "confidence": "4;3;4;5", "soundness": "2;3;3;3", "novelty": "3;3;3;3", "presentation": "1;3;3;3", "wc_summary": "226;106;162;90", "wc_strengths": "112;119;105;97", "wc_weaknesses": "175;3;177;452", "wc_questions": "230;198;30;101", "wc_limitations": "3;25;101;63", "wc_review": "746;451;575;803", "wc_reply_reviewers": "563;12;38;211", "wc_reply_authors": "1491;0;0;0", "reply_reviewers": "2;1;1;1", "reply_authors": "4;1;1;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 146.0, 53.36665625650534 ], "wc_strengths_avg": [ 108.25, 8.166241485530538 ], "wc_weaknesses_avg": [ 201.75, 160.82191237514868 ], "wc_questions_avg": [ 139.75, 79.19082964586241 ], "wc_limitations_avg": [ 48.0, 37.376463182061514 ], "wc_review_avg": [ 643.75, 139.36889000060236 ], "wc_reply_reviewers_avg": [ 206.0, 219.84881168657702 ], "wc_reply_authors_avg": [ 372.75, 645.6219385212991 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17064287103787540054&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "ox.ac.uk;;eng.ox.ac.uk;ox.ac.uk;ox.ac.uk;eng.ox.ac.uk", "author_num": 6, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of Oxford", "aff_unique_dep": "", "aff_unique_url": "https://www.ox.ac.uk", "aff_unique_abbr": "Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Causal Effect Identification in a Sub-Population with Latent Variables", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94018", "id": "iEsyRsg6t1", "proceeding": "", "pdf": "https://openreview.net/pdf?id=iEsyRsg6t1", "openreview": "https://openreview.net/forum?id=iEsyRsg6t1", "poster": "/media/PosterPDFs/NeurIPS%202024/94018.png?t=1733732304.5591483", "project": "", "author_site": "Amir Mohammad Abouei, Ehsan Mokhtarian, Negar Kiyavash, Matthias Grossglauser", "tldr": "", "abstract": "The s-ID problem seeks to compute a causal effect in a specific sub-population from the observational data pertaining to the same sub population (Abouei et al., 2023). This problem has been addressed when all the variables in the system are observable. In this paper, we consider an extension of the s-ID problem that allows for the presence of latent variables. To tackle the challenges induced by the presence of latent variables in a sub-population, we first extend the classical relevant graphical definitions, such as c-components and Hedges, initially defined for the so-called ID problem (Pearl, 1995; Tian & Pearl, 2002), to their new counterparts. Subsequently, we propose a sound algorithm for the s-ID problem with latent variables.", "keywords": "Causal Effect Identification;Selection Bias;Latent Variables", "primary_area": "causal_inference", "supplementary_material": "", "author": "Amir Mohammad Abouei;Ehsan Mokhtarian;Negar Kiyavash;Matthias Grossglauser", "authorids": "~Amir_Mohammad_Abouei1;~Ehsan_Mokhtarian1;~Negar_Kiyavash1;~Matthias_Grossglauser1", "gender": "M;M;F;", "homepage": "https://people.epfl.ch/amir.abouei;;https://people.epfl.ch/negar.kiyavash?lang=en;", "dblp": ";276/5445;85/4976;", "google_scholar": ";https://scholar.google.com/scholar?hl=en;7tBDvOwAAAAJ;", "orcid": ";;0000-0002-8545-7709;", "linkedin": ";https://linkedin.com/in/ehsanmokhtarian/;;", "or_profile": "~Amir_Mohammad_Abouei1;~Ehsan_Mokhtarian1;~Negar_Kiyavash1;~Matthias_Grossglauser1", "aff": "School of Computer and Communication Sciences, EPFL - EPF Lausanne;Swiss Federal Institute of Technology Lausanne;EPFL - EPF Lausanne;", "aff_domain": "ic.epfl.ch;epfl.ch;epfl.ch;", "position": "PhD student;PhD student;Full Professor;", "bibtex": "@inproceedings{\nabouei2024causal,\ntitle={Causal Effect Identification in a Sub-Population with Latent Variables},\nauthor={Amir Mohammad Abouei and Ehsan Mokhtarian and Negar Kiyavash and Matthias Grossglauser},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=iEsyRsg6t1}\n}", "github": "", "reviewers": "YSpJ;4gRi;APnQ;rYoX", "pdf_size": 716584, "rating": "6;7;7;7", "confidence": "3;3;2;3", "soundness": "3;3;3;3", "novelty": "3;3;3;4", "presentation": "2;3;3;4", "wc_summary": "107;38;85;107", "wc_strengths": "21;30;22;110", "wc_weaknesses": "154;48;5;129", "wc_questions": "61;3;61;82", "wc_limitations": "27;3;10;1", "wc_review": "370;122;183;429", "wc_reply_reviewers": "25;18;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 84.25, 28.172459956489423 ], "wc_strengths_avg": [ 45.75, 37.25838831726354 ], "wc_weaknesses_avg": [ 84.0, 60.12902793160721 ], "wc_questions_avg": [ 51.75, 29.422567868899545 ], "wc_limitations_avg": [ 10.25, 10.231690964840562 ], "wc_review_avg": [ 276.0, 127.09248600920512 ], "wc_reply_reviewers_avg": [ 10.75, 11.031205736455105 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:lgzGsem5XrsJ:scholar.google.com/&scioq=Causal+Effect+Identification+in+a+Sub-Population+with+Latent+Variables&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": "ic.epfl.ch;epfl.ch;epfl.ch;", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "EPFL;Swiss Federal Institute of Technology Lausanne", "aff_unique_dep": "School of Computer and Communication Sciences;", "aff_unique_url": "https://www.epfl.ch;https://www.epfl.ch", "aff_unique_abbr": "EPFL;EPFL", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Lausanne", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Switzerland" }, { "title": "Understanding the Differences in Foundation Models: Attention, State Space Models, and Recurrent Neural Networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94017", "id": "iF7MnXnxRw", "proceeding": "", "pdf": "https://openreview.net/pdf?id=iF7MnXnxRw", "openreview": "https://openreview.net/forum?id=iF7MnXnxRw", "poster": "/media/PosterPDFs/NeurIPS%202024/94017.png?t=1733632863.7309318", "project": "", "author_site": "Jerome Sieber, Carmen Amo Alonso, Alexandre Didier, Melanie Zeilinger, Antonio Orvieto", "tldr": "", "abstract": "Softmax attention is the principle backbone of foundation models for various artificial intelligence applications, yet its quadratic complexity in sequence length can limit its inference throughput in long-context settings. To address this challenge, alternative architectures such as linear attention, State Space Models (SSMs), and Recurrent Neural Networks (RNNs) have been considered as more efficient alternatives. While connections between these approaches exist, such models are commonly developed in isolation and there is a lack of theoretical understanding of the shared principles underpinning these architectures and their subtle differences, greatly influencing performance and scalability. In this paper, we introduce the Dynamical Systems Framework (DSF), which allows a principled investigation of all these architectures in a common representation. Our framework facilitates rigorous comparisons, providing new insights on the distinctive characteristics of each model class. For instance, we compare linear attention and selective SSMs, detailing their differences and conditions under which both are equivalent. We also provide principled comparisons between softmax attention and other model classes, discussing the theoretical conditions under which softmax attention can be approximated. Additionally, we substantiate these new insights with empirical validations and mathematical arguments. This shows the DSF's potential to guide the systematic development of future more efficient and scalable foundation models.", "keywords": "deep learning architectures;softmax attention;linear attention;state space models;recurrent neural networks;dynamical system models", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/477b9fea44e7cd97ea957e64806a11ced418ad14.zip", "author": "Jerome Sieber;Carmen Amo Alonso;Alexandre Didier;Melanie Zeilinger;Antonio Orvieto", "authorids": "~Jerome_Sieber1;~Carmen_Amo_Alonso3;~Alexandre_Didier1;~Melanie_Zeilinger1;~Antonio_Orvieto3", "gender": "M;;;F;M", "homepage": "https://jerome.sieber.io;https://camoalon.github.io;;;http://orvi.altervista.org/", "dblp": "268/7789;;;41/7142;", "google_scholar": "JHmSvaMAAAAJ;2snI7NsAAAAJ;https://scholar.google.ch/citations?user=m_wFCFcAAAAJ;;xkuLyHoAAAAJ", "orcid": "0000-0002-8937-7749;;0000-0001-5396-8996;0000-0003-4570-7571;", "linkedin": "https://ch.linkedin.com/in/jsie;;;;antonio-orvieto-947ab0130/", "or_profile": "~Jerome_Sieber1;~Carmen_Amo_Alonso3;~Alexandre_Didier1;~Melanie_Zeilinger1;~Antonio_Orvieto3", "aff": "ETHZ - ETH Zurich;ETHZ - ETH Zurich;ETHZ - ETH Zurich;ETHZ - ETH Zurich;ELLIS Institute T\u00fcbingen, Max Planck Institute for Intelligent Systems, T\u00fcbingen AI Center, T\u00fcbingen, Germany", "aff_domain": "ethz.ch;ethz.ch;ethz.ch;ethz.ch;tue.ellis.eu", "position": "PhD student;Postdoc;PhD student;Associate Professor;Principal Researcher", "bibtex": "@inproceedings{\nsieber2024understanding,\ntitle={Understanding the Differences in Foundation Models: Attention, State Space Models, and Recurrent Neural Networks},\nauthor={Jerome Sieber and Carmen Amo Alonso and Alexandre Didier and Melanie Zeilinger and Antonio Orvieto},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=iF7MnXnxRw}\n}", "github": "", "reviewers": "US2F;BUkP;GS8T;BcKL", "pdf_size": 1380570, "rating": "5;6;6;7", "confidence": "3;4;4;3", "soundness": "2;3;3;3", "novelty": "3;2;2;3", "presentation": "3;3;2;3", "wc_summary": "58;91;36;110", "wc_strengths": "34;107;85;61", "wc_weaknesses": "180;378;435;63", "wc_questions": "2;269;28;88", "wc_limitations": "1;2;5;49", "wc_review": "275;847;589;371", "wc_reply_reviewers": "11;33;32;128", "wc_reply_authors": "47;47;47;41", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 73.75, 28.656369274560934 ], "wc_strengths_avg": [ 71.75, 27.19719654670312 ], "wc_weaknesses_avg": [ 264.0, 149.74478288074013 ], "wc_questions_avg": [ 96.75, 104.22421743529668 ], "wc_limitations_avg": [ 14.25, 20.116846174288852 ], "wc_review_avg": [ 520.5, 220.17890452992992 ], "wc_reply_reviewers_avg": [ 51.0, 45.3155602414888 ], "wc_reply_authors_avg": [ 45.5, 2.598076211353316 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11785388280162295028&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 5, "email": "ethz.ch;ethz.ch;ethz.ch;ethz.ch;tue.ellis.eu", "author_num": 5, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "ETH Zurich;ELLIS Institute T\u00fcbingen", "aff_unique_dep": ";", "aff_unique_url": "https://www.ethz.ch;", "aff_unique_abbr": "ETHZ;", "aff_campus_unique_index": "1", "aff_campus_unique": ";T\u00fcbingen", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "Switzerland;Germany" }, { "title": "ReFIR: Grounding Large Restoration Models with Retrieval Augmentation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94016", "id": "iFKmFUxQDh", "proceeding": "", "pdf": "https://openreview.net/pdf?id=iFKmFUxQDh", "openreview": "https://openreview.net/forum?id=iFKmFUxQDh", "poster": "/media/PosterPDFs/NeurIPS%202024/94016.png?t=1730186024.9318438", "project": "", "author_site": "Hang Guo, Tao Dai, Zhihao Ouyang, Taolin Zhang, Yaohua Zha, Bin Chen, Shu-Tao Xia", "tldr": "", "abstract": "Recent advances in diffusion-based Large Restoration Models (LRMs) have significantly improved photo-realistic image restoration by leveraging the internal knowledge embedded within model weights. However, existing LRMs often suffer from the hallucination dilemma, i.e., producing incorrect contents or textures when dealing with severe degradations, due to their heavy reliance on limited internal knowledge. In this paper, we propose an orthogonal solution called the Retrieval-augmented Framework for Image Restoration (ReFIR), which incorporates retrieved images as external knowledge to extend the knowledge boundary of existing LRMs in generating details faithful to the original scene. Specifically, we first introduce the nearest neighbor lookup to retrieve content-relevant high-quality images as reference, after which we propose the cross-image injection to modify existing LRMs to utilize high-quality textures from retrieved images. Thanks to the additional external knowledge, our ReFIR can well handle the hallucination challenge and facilitate faithfully results. Extensive experiments demonstrate that ReFIR can achieve not only high-fidelity but also realistic restoration results. Importantly, our ReFIR requires no training and is adaptable to various LRMs.", "keywords": "Image Restoration;Diffusion Model;Retrieval Augmented Generation", "primary_area": "machine_vision", "supplementary_material": "", "author": "Hang Guo;Tao Dai;Zhihao Ouyang;Taolin Zhang;Yaohua Zha;Bin Chen;Shu-Tao Xia", "authorids": "~Hang_Guo3;~Tao_Dai3;~Zhihao_Ouyang1;~Taolin_Zhang3;~Yaohua_Zha1;~Bin_Chen4;~Shu-Tao_Xia1", "gender": "M;M;M;;M;M;M", "homepage": "https://github.com/csguoh;https://csse.szu.edu.cn/pages/user/index?id=1204;;https://github.com/taolinzhang;https://github.com/zyh16143998882;https://binchen17tsinghua.wixsite.com/website;https://www.sigs.tsinghua.edu.cn/xst/list.htm", "dblp": ";54/875-1;;270/2482-3;344/5717;22/5523-11;03/6195", "google_scholar": "https://scholar.google.com.hk/citations?user=fRwhfpoAAAAJ;MqJNdaAAAAAJ;wjCvLM8AAAAJ;DWnu_G0AAAAJ;https://scholar.google.com.hk/citations?user=-zUO4_QAAAAJ;Yl0wv7AAAAAJ;https://scholar.google.com.hk/citations?user=koAXTXgAAAAJ", "orcid": "0000-0003-1746-2693;0000-0003-0594-6404;;0009-0006-2441-2861;0000-0001-9789-452X;0000-0002-4798-230X;0000-0002-8639-982X", "linkedin": ";;zhihao-ouyang-46416a129;;;;", "or_profile": "~Hang_Guo3;~Tao_Dai3;~Zhihao_Ouyang1;~Taolin_Zhang3;~Yaohua_Zha1;~Bin_Chen4;~Shu-Tao_Xia1", "aff": "Tsinghua University;Department of Software Engineering, Shenzhen University;Bytedance;Tsinghua University;Computer Science, Tsinghua University, Tsinghua University;Harbin Institute of Technology, Shenzhen;Shenzhen International Graduate School, Tsinghua University", "aff_domain": "tsinghua.edu.cn;szu.edu;bytedance.com;tsinghua.edu.cn;mails.tsinghua.edu.cn;hit.edu.cn;sz.tsinghua.edu.cn", "position": "MS student;Assistant Professor;Researcher;MS student;MS student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nguo2024refir,\ntitle={Re{FIR}: Grounding Large Restoration Models with Retrieval Augmentation},\nauthor={Hang Guo and Tao Dai and Zhihao Ouyang and Taolin Zhang and Yaohua Zha and Bin Chen and Shu-Tao Xia},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=iFKmFUxQDh}\n}", "github": "", "reviewers": "SJc6;NDC8;qg3y;UnRT", "pdf_size": 6270831, "rating": "5;5;6;6", "confidence": "5;5;4;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "56;161;70;61", "wc_strengths": "43;221;134;103", "wc_weaknesses": "219;217;77;125", "wc_questions": "147;163;38;36", "wc_limitations": "2;68;5;24", "wc_review": "467;830;324;349", "wc_reply_reviewers": "15;0;0;0", "wc_reply_authors": "33;0;0;0", "reply_reviewers": "1;0;0;0", "reply_authors": "2;1;1;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 87.0, 43.01743832447488 ], "wc_strengths_avg": [ 125.25, 64.23540690304687 ], "wc_weaknesses_avg": [ 159.5, 60.915925668087816 ], "wc_questions_avg": [ 96.0, 59.27478384608416 ], "wc_limitations_avg": [ 24.75, 26.356925086208367 ], "wc_review_avg": [ 492.5, 202.20101384513382 ], "wc_reply_reviewers_avg": [ 3.75, 6.49519052838329 ], "wc_reply_authors_avg": [ 8.25, 14.289419162443238 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7379853921213327698&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "tsinghua.edu.cn;szu.edu;bytedance.com;tsinghua.edu.cn;mails.tsinghua.edu.cn;hit.edu.cn;sz.tsinghua.edu.cn", "author_num": 7, "aff_unique_index": "0;1;2;0;0;3;0", "aff_unique_norm": "Tsinghua University;Shenzhen University;ByteDance;Harbin Institute of Technology", "aff_unique_dep": ";Department of Software Engineering;;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.szu.edu.cn;https://www.bytedance.com;http://en.hhit.edu.cn/", "aff_unique_abbr": "THU;SZU;Bytedance;HIT", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "iGeJxHqnbx", "title": "QCircuitNet: A Large-Scale Hierarchical Dataset for Quantum Algorithm Design", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "Quantum computing is an emerging field recognized for the significant speedup it offers over classical computing through quantum algorithms. However, designing and implementing quantum algorithms pose challenges due to the complex nature of quantum mechanics and the necessity for precise control over quantum states. To address these challenges, we leverage AI to simplify and enhance the process. Despite the significant advancements in AI, there has been a lack of datasets specifically tailored for this purpose. \n\nIn this work, we introduce QCircuitNet, a benchmark and test dataset designed to evaluate AI\u2019s capability in designing and implementing quantum algorithms in the form of quantum circuit codes. Unlike traditional AI code writing, this task is fundamentally different and significantly more complicated due to the highly flexible design space and the extreme demands for intricate manipulation of qubits.\n\nOur key contributions include: \n1. The first comprehensive, structured universal quantum algorithm dataset.\n2. A framework which formulates the task of quantum algorithm design for Large Language Models (LLMs), providing guidelines for expansion and potential evolution into a training dataset.\n3. Automatic validation and verification functions, allowing for scalable and efficient evaluation methodologies.\n4. A fair and stable benchmark that avoids data contamination, a particularly critical issue in quantum computing datasets.\n\nOur work aims to bridge the gap in available resources for AI-driven quantum algorithm design, offering a robust and scalable method for evaluating and improving AI models in this field. As we expand the dataset to include more algorithms and explore novel fine-tuning methods, we hope it will significantly contribute to both quantum algorithm design and implementation.", "keywords": "AI for science;quantum computing;quantum oracle construction;quantum algorithm design", "primary_area": "", "supplementary_material": "/attachment/deda2433172dceceb619b045799e38663d17d90e.pdf", "author": "Rui Yang;Yuntian Gu;Ziruo Wang;Yitao Liang;Tongyang Li", "authorids": "~Rui_Yang12;~Yuntian_Gu1;~Ziruo_Wang1;~Yitao_Liang1;~Tongyang_Li1", "gender": "F;;F;M;M", "homepage": "https://www.pku.edu.cn;https://github.com/guyuntian;https://zerowang030221.github.io/;https://web.cs.ucla.edu/~yliang/;https://www.tongyangli.com/", "dblp": ";;;173/4969;142/1312", "google_scholar": ";qdyC5XsAAAAJ;https://scholar.google.com/citations?hl=zh-CN;KVzR1XEAAAAJ;ny0ZgiQAAAAJ", "orcid": ";;;;0000-0002-0338-413X", "linkedin": ";;;;", "or_profile": "~Rui_Yang12;~Yuntian_Gu1;~Ziruo_Wang1;~Yitao_Liang1;~Tongyang_Li1", "aff": "Peking University;Peking University;Beijing Institute of Technology;Peking University;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;bit.edu.cn;pku.edu.cn;pku.edu.cn", "position": "Undergrad student;Undergrad student;Intern;Assistant Professor;Assistant Professor", "bibtex": "@misc{\nanonymous2024qcircuitnet,\ntitle={{QC}ircuitNet: A Large-Scale Hierarchical Dataset for Quantum Algorithm Design},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=iGeJxHqnbx}\n}", "github": "", "project": "", "reviewers": "oAQd;7rUP;DaGg", "site": "https://openreview.net/forum?id=iGeJxHqnbx", "pdf_size": 3497505, "rating": "4;6;7", "confidence": "3;3;4", "wc_summary_and_contributions": "51;123;89", "wc_strengths": "16;6;55", "wc_improvement": "59;6;192", "wc_limitations": "3;6;170", "wc_correctness": "1;6;10", "wc_clarity": "19;6;37", "wc_relation_to_prior_work": "1;6;7", "wc_documentation": "1;6;2", "wc_additional_feedback": "1;1;1", "wc_review": "152;166;563", "wc_reply_reviewers": "0;0;101", "wc_reply_authors": "63;63;1", "reply_reviewers": "0;0;2", "reply_authors": "2;2;4", "rating_avg": [ 5.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 87.66666666666667, 29.4089933334837 ], "wc_strengths_avg": [ 25.666666666666668, 21.139746660943903 ], "wc_improvement_avg": [ 85.66666666666667, 78.24037036949375 ], "wc_limitations_avg": [ 59.666666666666664, 78.02706083290734 ], "wc_correctness_avg": [ 5.666666666666667, 3.6817870057290873 ], "wc_clarity_avg": [ 20.666666666666668, 12.710450643291745 ], "wc_relation_to_prior_work_avg": [ 4.666666666666667, 2.6246692913372702 ], "wc_documentation_avg": [ 3.0, 2.160246899469287 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 293.6666666666667, 190.53317004891534 ], "wc_reply_reviewers_avg": [ 33.666666666666664, 47.6118565998942 ], "wc_reply_authors_avg": [ 42.333333333333336, 29.227080289043965 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.9428090415820634 ], "reply_authors_avg": [ 2.6666666666666665, 0.9428090415820634 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.7559289460184545, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6128561910290460494&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Peking University;Beijing Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "http://www.pku.edu.cn;http://www.bit.edu.cn/", "aff_unique_abbr": "Peking U;BIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "CTIBench: A Benchmark for Evaluating LLMs in Cyber Threat Intelligence", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97556", "id": "iJAOpsXo2I", "proceeding": "", "pdf": "https://openreview.net/pdf?id=iJAOpsXo2I", "openreview": "https://openreview.net/forum?id=iJAOpsXo2I", "poster": "/media/PosterPDFs/NeurIPS%202024/97556.png?t=1731459521.5624025", "project": "", "author_site": "Md Tanvirul Alam, Dipkamal Bhusal, Le Nguyen, Nidhi Rastogi", "tldr": "", "abstract": "Cyber threat intelligence (CTI) is crucial in today's cybersecurity landscape, providing essential insights to understand and mitigate the ever-evolving cyber threats. The recent rise of Large Language Models (LLMs) have shown potential in this domain, but concerns about their reliability, accuracy, and hallucinations persist. While existing benchmarks provide general evaluations of LLMs, there are no benchmarks that address the practical and applied aspects of CTI-specific tasks. To bridge this gap, we introduce CTIBench, a benchmark designed to assess LLMs' performance in CTI applications. CTIBench includes multiple datasets focused on evaluating knowledge acquired by LLMs in the cyber-threat landscape. Our evaluation of several state-of-the-art models on these tasks provides insights into their strengths and weaknesses in CTI contexts, contributing to a better understanding of LLM capabilities in CTI.", "keywords": "Cyber Threat Intelligence;Large Language Model;Benchmark", "primary_area": "", "supplementary_material": "/attachment/54c0aac4aa52ce8a1ddf8a4fec104611f1cafc63.pdf", "author": "Md Tanvirul Alam;Dipkamal Bhusal;Le Nguyen;Nidhi Rastogi", "authorids": "~Md_Tanvirul_Alam1;~Dipkamal_Bhusal1;~Le_Nguyen2;~Nidhi_Rastogi1", "gender": ";;M;F", "homepage": ";;;https://nidhirastogi.github.io", "dblp": ";;;160/7318", "google_scholar": ";;;zZnJdNUAAAAJ", "orcid": ";;;0000-0002-2002-3213", "linkedin": ";;le-nguyen-a15152235/;https://linkedin.com/in/rastoginidhi", "or_profile": "~Md_Tanvirul_Alam1;~Dipkamal_Bhusal1;~Le_Nguyen2;~Nidhi_Rastogi1", "aff": ";;Rochester Institute of Technology;Rensselaer Polytechnic Institute", "aff_domain": ";;rit.edu;rpi.edu", "position": ";;PhD student;Research Scientist", "bibtex": "@inproceedings{\nalam2024ctibench,\ntitle={{CTIB}ench: A Benchmark for Evaluating {LLM}s in Cyber Threat Intelligence},\nauthor={Md Tanvirul Alam and Dipkamal Bhusal and Le Nguyen and Nidhi Rastogi},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=iJAOpsXo2I}\n}", "github": "", "reviewers": "svyM;e7WG;NgSY", "pdf_size": 3133558, "rating": "6;8;9", "confidence": "3;3;5", "wc_summary_and_contributions": "106;129;68", "wc_strengths": "87;71;11", "wc_improvement": "118;211;21", "wc_limitations": "160;13;4", "wc_correctness": "23;23;7", "wc_clarity": "1;24;6", "wc_relation_to_prior_work": "1;20;6", "wc_documentation": "1;14;5", "wc_additional_feedback": "1;1;1", "wc_review": "498;506;129", "wc_reply_reviewers": "0;62;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;1;0", "reply_authors": "1;1;1", "rating_avg": [ 7.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "wc_summary_and_contributions_avg": [ 101.0, 25.152865973217974 ], "wc_strengths_avg": [ 56.333333333333336, 32.71425105702746 ], "wc_improvement_avg": [ 116.66666666666667, 77.57290477709053 ], "wc_limitations_avg": [ 59.0, 71.5122367151245 ], "wc_correctness_avg": [ 17.666666666666668, 7.542472332656507 ], "wc_clarity_avg": [ 10.333333333333334, 9.877021593352703 ], "wc_relation_to_prior_work_avg": [ 9.0, 8.04155872120988 ], "wc_documentation_avg": [ 6.666666666666667, 5.436502143433363 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 377.6666666666667, 175.86421529754773 ], "wc_reply_reviewers_avg": [ 20.666666666666668, 29.227080289043965 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.7559289460184546, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5899779720452572262&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 3, "email": ";;rit.edu;rpi.edu", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Rochester Institute of Technology;Rensselaer Polytechnic Institute", "aff_unique_dep": ";", "aff_unique_url": "https://www.rit.edu;https://www.rpi.edu", "aff_unique_abbr": "RIT;RPI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Beyond Primal-Dual Methods in Bandits with Stochastic and Adversarial Constraints", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94015", "id": "iJgwd5mWYg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=iJgwd5mWYg", "openreview": "https://openreview.net/forum?id=iJgwd5mWYg", "poster": "", "project": "", "author_site": "Martino Bernasconi, Matteo Castiglioni, Andrea Celli, Federico Fusco", "tldr": "", "abstract": "We address a generalization of the bandit with knapsacks problem, where a learner aims to maximize rewards while satisfying an arbitrary set of long-term constraints. Our goal is to design best-of-both-worlds algorithms that perform optimally under both stochastic and adversarial constraints. Previous works address this problem via primal-dual methods, and require some stringent assumptions, namely the Slater's condition, and in adversarial settings, they either assume knowledge of a lower bound on the Slater's parameter, or impose strong requirements on the primal and dual regret minimizers such as requiring weak adaptivity. We propose an alternative and more natural approach based on optimistic estimations of the constraints. Surprisingly, we show that estimating the constraints with an UCB-like approach guarantees optimal performances.\nOur algorithm consists of two main components: (i) a regret minimizer working on moving strategy sets and (ii) an estimate of the feasible set as an optimistic weighted empirical mean of previous samples. The key challenge in this approach is designing adaptive weights that meet the different requirements for stochastic and adversarial constraints. Our algorithm is significantly simpler than previous approaches, and has a cleaner analysis. Moreover, ours is the first best-of-both-worlds algorithm providing bounds logarithmic in the number of constraints. Additionally, in stochastic settings, it provides $\\widetilde O(\\sqrt{T})$ regret without Slater's condition.", "keywords": "bandits with knapsacks;long-term constraints", "primary_area": "online_learning", "supplementary_material": "", "author": "Martino Bernasconi;Matteo Castiglioni;Andrea Celli;Federico Fusco", "authorids": "~Martino_Bernasconi1;~Matteo_Castiglioni1;~Andrea_Celli1;~Federico_Fusco1", "gender": ";;M;M", "homepage": "https://sites.google.com/view/martinobernasconi/home;https://castiglionimatteo.github.io;https://andcelli.github.io/;https://sites.google.com/uniroma1.it/federicofusco/home", "dblp": "301/6372.html;225/7720;190/7301.html;243/5755", "google_scholar": "dtmoCekAAAAJ;https://scholar.google.it/citations?user=NPE3HAYAAAAJ;9wQscqEAAAAJ;https://scholar.google.co.il/citations?user=oaS8iAQAAAAJ", "orcid": ";0000-0002-1070-6766;;0000-0001-6250-945X", "linkedin": ";;;", "or_profile": "~Martino_Bernasconi1;~Matteo_Castiglioni1;~Andrea_Celli1;~Federico_Fusco1", "aff": "Bocconi University;Politecnico di Milano;Bocconi University;University of Roma \"La Sapienza\"", "aff_domain": "unibocconi.it;polimi.it;unibocconi.it;uniroma1.it", "position": "Postdoc;Assistant Professor;Assistant Professor;Lecturer", "bibtex": "@inproceedings{\nbernasconi2024beyond,\ntitle={Beyond Primal-Dual Methods in Bandits with Stochastic and Adversarial Constraints},\nauthor={Martino Bernasconi and Matteo Castiglioni and Andrea Celli and Federico Fusco},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=iJgwd5mWYg}\n}", "github": "", "reviewers": "RfbD;EbNg;EcmS;Q9L2", "pdf_size": 395011, "rating": "4;5;7;7", "confidence": "4;4;3;4", "soundness": "4;3;4;4", "novelty": "2;2;3;3", "presentation": "3;3;3;3", "wc_summary": "178;62;96;682", "wc_strengths": "34;41;130;176", "wc_weaknesses": "215;210;76;88", "wc_questions": "76;33;69;77", "wc_limitations": "1;17;17;3", "wc_review": "504;363;388;1026", "wc_reply_reviewers": "14;291;0;0", "wc_reply_authors": "37;348;0;0", "reply_reviewers": "1;2;0;0", "reply_authors": "2;2;1;1", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 254.5, 250.39319080198646 ], "wc_strengths_avg": [ 95.25, 60.04737712839754 ], "wc_weaknesses_avg": [ 147.25, 65.41167709209114 ], "wc_questions_avg": [ 63.75, 18.019087102292392 ], "wc_limitations_avg": [ 9.5, 7.533259586659682 ], "wc_review_avg": [ 570.25, 268.4514620932432 ], "wc_reply_reviewers_avg": [ 76.25, 124.11763573320272 ], "wc_reply_authors_avg": [ 96.25, 146.1307205894777 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5555555555555555, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2480726548556871279&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "unibocconi.it;polimi.it;unibocconi.it;uniroma1.it", "author_num": 4, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Bocconi University;Politecnico di Milano;University of Rome La Sapienza", "aff_unique_dep": ";;", "aff_unique_url": "https://www.bocconi.edu;https://www.polimi.it;https://www.uniroma1.it", "aff_unique_abbr": "Bocconi;Polimi;La Sapienza", "aff_campus_unique_index": "1", "aff_campus_unique": ";Rome", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Italy" }, { "title": "MaskFactory: Towards High-quality Synthetic Data Generation for Dichotomous Image Segmentation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94014", "id": "iM5i289eqt", "proceeding": "", "pdf": "https://openreview.net/pdf?id=iM5i289eqt", "openreview": "https://openreview.net/forum?id=iM5i289eqt", "poster": "/media/PosterPDFs/NeurIPS%202024/94014.png?t=1731686960.5187662", "project": "", "author_site": "Haotian Qian, Yinda Chen, Shengtao Lou, Fahad Shahbaz Khan, Xiaogang Jin, Deng-Ping Fan", "tldr": "", "abstract": "Dichotomous Image Segmentation (DIS) tasks require highly precise annotations, and traditional dataset creation methods are labor intensive, costly, and require extensive domain expertise. Although using synthetic data for DIS is a promising solution to these challenges, current generative models and techniques struggle with the issues of scene deviations, noise-induced errors, and limited training sample variability. To address these issues, we introduce a novel approach, Mask Factory, which provides a scalable solution for generating diverse and precise datasets, markedly reducing preparation time and costs. We first introduce a general mask editing method that combines rigid and non-rigid editing techniques to generate high-quality synthetic masks. Specially, rigid editing leverages geometric priors from diffusion models to achieve precise viewpoint transformations under zero-shot conditions, while non-rigid editing employs adversarial training and self-attention mechanisms for complex, topologically consistent modifications. Then, we generate pairs of high-resolution image and accurate segmentation mask using a multi-conditional control generation method. Finally, our experiments on the widely-used DIS5K dataset benchmark demonstrate superior performance in quality and efficiency compared to existing methods. The code is available at https://qian-hao-tian.github.io/MaskFactory/.", "keywords": "Dichotomous Image Segmentation; Diffusion Model; Synthetic Data", "primary_area": "machine_vision", "supplementary_material": "", "author": "Haotian Qian;Yinda Chen;Shengtao Lou;Fahad Khan;Xiaogang Jin;Deng-Ping Fan", "authorids": "~Haotian_Qian2;~Yinda_Chen1;~Shengtao_Lou1;~Fahad_Khan1;~Xiaogang_Jin1;~Deng-Ping_Fan1", "gender": ";M;M;M;M;M", "homepage": ";https://ydchen0806.github.io/;https://github.com/loushengtao;https://sites.google.com/view/fahadkhans/home;http://www.cad.zju.edu.cn/home/jin/;https://dengpingfan.github.io/", "dblp": ";313/5179;359/4243.html;05/8618;36/3676-1;205/3148", "google_scholar": ";hCvlj5cAAAAJ;https://scholar.google.com/citations?hl=en;zvaeYnUAAAAJ;yryOvLwAAAAJ;kakwJ5QAAAAJ", "orcid": ";;;;;0000-0002-5245-7518", "linkedin": ";;;;;deng-ping-fan-584b25198/", "or_profile": "~Haotian_Qian2;~Yinda_Chen1;~Shengtao_Lou1;~Fahad_Khan1;~Xiaogang_Jin1;~Deng-Ping_Fan1", "aff": ";University of Science and Technology of China;Zhejiang University;Link\u00f6ping University;Zhejiang University;Nankai University", "aff_domain": ";ustc.edu.cn;zju.edu.cn;liu.se;zju.edu.cn;nankai.edu.cn", "position": ";MS student;Intern;Associate Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nqian2024maskfactory,\ntitle={MaskFactory: Towards High-quality Synthetic Data Generation for Dichotomous Image Segmentation},\nauthor={Haotian Qian and Yinda Chen and Shengtao Lou and Fahad Khan and Xiaogang Jin and Deng-Ping Fan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=iM5i289eqt}\n}", "github": "", "reviewers": "nBLG;49D1;h8fH;AsEU", "pdf_size": 0, "rating": "4;5;5;7", "confidence": "4;4;5;3", "soundness": "3;3;2;3", "novelty": "2;2;2;3", "presentation": "2;1;4;4", "wc_summary": "118;39;77;53", "wc_strengths": "135;34;40;50", "wc_weaknesses": "132;318;101;30", "wc_questions": "36;382;34;4", "wc_limitations": "87;4;2;4", "wc_review": "508;777;254;141", "wc_reply_reviewers": "0;62;20;0", "wc_reply_authors": "69;54;19;0", "reply_reviewers": "0;1;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 5.25, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 1.299038105676658 ], "wc_summary_avg": [ 71.75, 29.961433543807612 ], "wc_strengths_avg": [ 64.75, 40.959583738119214 ], "wc_weaknesses_avg": [ 145.25, 106.37051988215532 ], "wc_questions_avg": [ 114.0, 155.24818839522734 ], "wc_limitations_avg": [ 24.25, 36.237929024711114 ], "wc_review_avg": [ 420.0, 245.2498725789679 ], "wc_reply_reviewers_avg": [ 20.5, 25.31304011769428 ], "wc_reply_authors_avg": [ 35.5, 27.37243138634199 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.6488856845230502, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10979683144764337221&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": ";ustc.edu.cn;zju.edu.cn;liu.se;zju.edu.cn;nankai.edu.cn", "author_num": 6, "aff_unique_index": "0;1;2;1;3", "aff_unique_norm": "University of Science and Technology of China;Zhejiang University;Link\u00f6ping University;Nankai University", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.ustc.edu.cn;https://www.zju.edu.cn;https://www.liu.se;http://www.nankai.edu.cn", "aff_unique_abbr": "USTC;ZJU;LiU;NKU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "China;Sweden" }, { "title": "Improved Algorithms for Contextual Dynamic Pricing", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94013", "id": "iMEAHXDiNP", "proceeding": "", "pdf": "https://openreview.net/pdf?id=iMEAHXDiNP", "openreview": "https://openreview.net/forum?id=iMEAHXDiNP", "poster": "", "project": "", "author_site": "Matilde Tullii, Solenne Gaucher, Nadav Merlis, Vianney Perchet", "tldr": "", "abstract": "In contextual dynamic pricing, a seller sequentially prices goods based on contextual information. Buyers will purchase products only if the prices are below their valuations.\nThe goal of the seller is to design a pricing strategy that collects as much revenue as possible. We focus on two different valuation models. The first assumes that valuations linearly depend on the context and are further distorted by noise. Under minor regularity assumptions, our algorithm achieves an optimal regret bound of $\\tilde{\\mathcal{O}}(T^{2/3})$, improving the existing results. The second model removes the linearity assumption, requiring only that the expected buyer valuation is $\\beta$-H\\\"older in the context. For this model, our algorithm obtains a regret $\\tilde{\\mathcal{O}}(T^{d+2\\beta/d+3\\beta})$, where $d$ is the dimension of the context space.", "keywords": "Dynamic Pricing;Bandits", "primary_area": "bandits", "supplementary_material": "", "author": "Matilde Tullii;Solenne Gaucher;Nadav Merlis;Vianney Perchet", "authorids": "~Matilde_Tullii1;~Solenne_Gaucher1;~Nadav_Merlis1;~Vianney_Perchet3", "gender": "F;F;M;M", "homepage": "https://crest.science/user/Matilde-TULLII/;https://solennegaucher.github.io/;;", "dblp": "381/5025;255/9225;227/2875;83/7398", "google_scholar": ";cxgVVJcAAAAJ;https://scholar.google.co.il/citations?user=pX2zzp0AAAAJ;", "orcid": ";;0000-0002-9906-0577;", "linkedin": ";solenne-gaucher-450800199/;;", "or_profile": "~Matilde_Tullii1;~Solenne_Gaucher1;~Nadav_Merlis1;~Vianney_Perchet1", "aff": "Ecole Nationale de la Statistique et de l'Administration Economique;Ecole Nationale de la Statistique et de l'Administration Economique;Ecole Nationale de la Statistique et de l'Administration Economique;", "aff_domain": "ensae.fr;ensae.fr;ensae.fr;", "position": "PhD student;Postdoc;Postdoc;", "bibtex": "@inproceedings{\ntullii2024improved,\ntitle={Improved Algorithms for Contextual Dynamic Pricing},\nauthor={Matilde Tullii and Solenne Gaucher and Nadav Merlis and Vianney Perchet},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=iMEAHXDiNP}\n}", "github": "", "reviewers": "iWzp;EQLG;B94q;beqc", "pdf_size": 542688, "rating": "6;6;7;8", "confidence": "3;3;3;4", "soundness": "3;3;3;4", "novelty": "3;3;3;3", "presentation": "3;3;3;4", "wc_summary": "75;67;54;72", "wc_strengths": "55;42;44;127", "wc_weaknesses": "41;22;151;392", "wc_questions": "75;68;52;80", "wc_limitations": "1;2;2;43", "wc_review": "247;201;303;714", "wc_reply_reviewers": "0;16;15;178", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 67.0, 8.031189202104505 ], "wc_strengths_avg": [ 67.0, 34.99285641384538 ], "wc_weaknesses_avg": [ 151.5, 147.32701721001482 ], "wc_questions_avg": [ 68.75, 10.568230693924125 ], "wc_limitations_avg": [ 12.0, 17.902513789968157 ], "wc_review_avg": [ 366.25, 203.99678306287086 ], "wc_reply_reviewers_avg": [ 52.25, 72.87789445366819 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3124455118829410212&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "ensae.fr;ensae.fr;ensae.fr;", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Ecole Nationale de la Statistique et de l'Administration Economique", "aff_unique_dep": "", "aff_unique_url": "https://ensae.fr", "aff_unique_abbr": "ENSAE", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "France" }, { "title": "BenchX: A Unified Benchmark Framework for Medical Vision-Language Pretraining on Chest X-Rays", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97555", "id": "iMtAjdGh1U", "proceeding": "", "pdf": "https://openreview.net/pdf?id=iMtAjdGh1U", "openreview": "https://openreview.net/forum?id=iMtAjdGh1U", "poster": "", "project": "", "author_site": "Yang Zhou, Tan Faith, Yanyu Xu, Sicong Leng, Xinxing Xu, Yong Liu, Rick Siow Mong Goh", "tldr": "", "abstract": "Medical Vision-Language Pretraining (MedVLP) shows promise in learning generalizable and transferable visual representations from paired and unpaired medical images and reports. MedVLP can provide useful features to downstream tasks and facilitate adapting task-specific models to new setups using fewer examples. However, existing MedVLP methods often differ in terms of datasets, preprocessing, and finetuning implementations. This pose great challenges in evaluating how well a MedVLP method generalizes to various clinically-relevant tasks due to the lack of unified, standardized, and comprehensive benchmark. To fill this gap, we propose BenchX, a unified benchmark framework that enables head-to-head comparison and systematical analysis between MedVLP methods using public chest X-ray datasets. Specifically, BenchX is composed of three components: 1) Comprehensive datasets covering nine datasets and four medical tasks; 2) Benchmark suites to standardize data preprocessing, train-test splits, and parameter selection; 3) Unified finetuning protocols that accommodate heterogeneous MedVLP methods for consistent task adaptation in classification, segmentation, and report generation, respectively. Utilizing BenchX, we establish baselines for nine state-of-the-art MedVLP methods and found that the performance of some early MedVLP methods can be enhanced to surpass more recent ones, prompting a revisiting of the developments and conclusions from prior works in MedVLP. Our code are available at https://github.com/yangzhou12/BenchX.", "keywords": "Benchmarking;Vision-Language Pretraining;Medical Vision-Language Models;Chest X-Rays", "primary_area": "", "supplementary_material": "", "author": "Yang Zhou;Tan Li Hui Faith;Yanyu Xu;Sicong Leng;Xinxing Xu;Yong Liu;Rick Siow Mong Goh", "authorids": "~Yang_Zhou6;~Tan_Li_Hui_Faith1;~Yanyu_Xu1;~Sicong_Leng1;~Xinxing_Xu1;~Yong_Liu10;~Rick_Siow_Mong_Goh1", "gender": "M;;M;M;M;M;", "homepage": "https://yangzhou.netlify.app/;;https://svip-lab.github.io/team/xuyy.html;https://lengsicong.github.io/;https://sites.google.com/site/xinxingxu666/;;https://sites.google.com/view/rickgoh/home", "dblp": "07/4580-17.html;;188/7560;295/8971;15/10654;29/4867-26;https://dblp.uni-trier.de/pers/g/Goh:Rick_Siow_Mong", "google_scholar": "_-cbldUAAAAJ;;Y3hU1AYAAAAJ;xQsBP6YAAAAJ;https://scholar.google.com.sg/citations?user=neFbpuEAAAAJ;QujHYk0AAAAJ;https://scholar.google.com.sg/citations?user=fBsBJjoAAAAJ", "orcid": "0000-0002-0873-619X;;;;0000-0003-1449-3072;;0000-0001-9116-1595", "linkedin": ";;;sicong-leng-203516179/;;liuyongsg;rickgoh/", "or_profile": "~Yang_Zhou6;~Tan_Li_Hui_Faith1;~Yanyu_Xu1;~Sicong_Leng1;~Xinxing_Xu1;~Yong_Liu10;~Rick_Siow_Mong_Goh1", "aff": "Institute of High Performance Computing, Singapore, A*STAR;;Institute of High Performance Computing, Singapore, A*STAR;Nanyang Technological University;Institute of High Performance Computing;Institute of High Performance Computing, Singapore, A*STAR;Institute of High Performance Computing, Singapore, A*STAR", "aff_domain": "ihpc.a-star.edu.sg;;ihpc.a-star.edu.sg;ntu.edu.sg;ihpc.a-star.edu.sg;ihpc.a-star.edu.sg;ihpc.a-star.edu.sg", "position": "Researcher;;Researcher;PhD student;Scientist;Senior Scientist, Adjunct Assistant Professor;Director", "bibtex": "@inproceedings{\nzhou2024benchx,\ntitle={BenchX: A Unified Benchmark Framework for Medical Vision-Language Pretraining on Chest X-Rays},\nauthor={Yang Zhou and Tan Li Hui Faith and Yanyu Xu and Sicong Leng and Xinxing Xu and Yong Liu and Rick Siow Mong Goh},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=iMtAjdGh1U}\n}", "github": "", "reviewers": "magQ;LeEn;CLKA", "pdf_size": 744714, "rating": "6;6;8", "confidence": "4;4;4", "wc_summary_and_contributions": "153;23;99", "wc_strengths": "29;36;133", "wc_improvement": "20;104;61", "wc_limitations": "1;1;90", "wc_correctness": "1;1;11", "wc_clarity": "1;1;19", "wc_relation_to_prior_work": "1;1;27", "wc_documentation": "1;1;7", "wc_additional_feedback": "1;1;1", "wc_review": "208;169;448", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "rating_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.0 ], "wc_summary_and_contributions_avg": [ 91.66666666666667, 53.324999348856586 ], "wc_strengths_avg": [ 66.0, 47.462265713582056 ], "wc_improvement_avg": [ 61.666666666666664, 34.29609631171195 ], "wc_limitations_avg": [ 30.666666666666668, 41.95500235040182 ], "wc_correctness_avg": [ 4.333333333333333, 4.714045207910317 ], "wc_clarity_avg": [ 7.0, 8.48528137423857 ], "wc_relation_to_prior_work_avg": [ 9.666666666666666, 12.256517540566826 ], "wc_documentation_avg": [ 3.0, 2.8284271247461903 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 275.0, 123.36125809993995 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:s2PfIltHDdUJ:scholar.google.com/&scioq=BenchX:+A+Unified+Benchmark+Framework+for+Medical+Vision-Language+Pretraining+on+Chest+X-Rays&hl=en&as_sdt=0,10", "gs_version_total": 5, "email": "ihpc.a-star.edu.sg;;ihpc.a-star.edu.sg;ntu.edu.sg;ihpc.a-star.edu.sg;ihpc.a-star.edu.sg;ihpc.a-star.edu.sg", "author_num": 7, "aff_unique_index": "0;0;1;0;0;0", "aff_unique_norm": "Institute of High Performance Computing;Nanyang Technological University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ihpc.a-star.edu.sg;https://www.ntu.edu.sg", "aff_unique_abbr": "IHPC;NTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "Singapore" }, { "title": "Are Self-Attentions Effective for Time Series Forecasting?", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94012", "id": "iN43sJoib7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=iN43sJoib7", "openreview": "https://openreview.net/forum?id=iN43sJoib7", "poster": "/media/PosterPDFs/NeurIPS%202024/94012.png?t=1730198916.7449195", "project": "", "author_site": "Dongbin Kim, Jinseong Park, Jaewook Lee, Hoki Kim", "tldr": "", "abstract": "Time series forecasting is crucial for applications across multiple domains and various scenarios. Although Transformers have dramatically advanced the landscape of forecasting, their effectiveness remains debated. Recent findings have indicated that simpler linear models might outperform complex Transformer-based approaches, highlighting the potential for more streamlined architectures. In this paper, we shift the focus from evaluating the overall Transformer architecture to specifically examining the effectiveness of self-attention for time series forecasting. To this end, we introduce a new architecture, Cross-Attention-only Time Series transformer (CATS), that rethinks the traditional transformer framework by eliminating self-attention and leveraging cross-attention mechanisms instead. \nBy establishing future horizon-dependent parameters as queries and enhanced parameter sharing, our model not only improves long-term forecasting accuracy but also reduces the number of parameters and memory usage. Extensive experiment across various datasets demonstrates that our model achieves superior performance with the lowest mean squared error and uses fewer parameters compared to existing models.\nThe implementation of our model is available at: https://github.com/dongbeank/CATS.", "keywords": "time series forecasting;learnable query;parameter sharing", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/b919864c8005f48898c8d5a5a68fd315a5561d61.zip", "author": "Dongbin Kim;Jinseong Park;Jaewook Lee;Hoki Kim", "authorids": "~Dongbin_Kim1;~Jinseong_Park1;~Jaewook_Lee1;~Hoki_Kim1", "gender": "M;M;M;M", "homepage": ";https://github.com/JinseongP;http://slcf.snu.ac.kr;", "dblp": ";178/8948-1;39/4985-1;75/6518", "google_scholar": "https://scholar.google.co.kr/citations?user=1KykjwYAAAAJ;o4-E5z0AAAAJ;teMdzbwAAAAJ;X4sz0QsAAAAJ", "orcid": "0009-0001-4684-4853;0000-0003-1931-8441;;0000-0001-5361-459X", "linkedin": "dongbin-kim-13866a31b/;jinseong-park-a84740226/;;", "or_profile": "~Dongbin_Kim1;~Jinseong_Park1;~Jaewook_Lee1;~Hoki_Kim1", "aff": "Seoul National University;Seoul National University;Seoul National University;Seoul National University", "aff_domain": "snu.ac.kr;snu.ac.kr;snu.ac.kr;snu.ac.kr", "position": "PhD student;PhD student;Full Professor;Postdoc", "bibtex": "@inproceedings{\nkim2024are,\ntitle={Are Self-Attentions Effective for Time Series Forecasting?},\nauthor={Dongbin Kim and Jinseong Park and Jaewook Lee and Hoki Kim},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=iN43sJoib7}\n}", "github": "", "reviewers": "Q7Js;ztsM;sgNU", "pdf_size": 1151022, "rating": "3;6;6", "confidence": "5;4;3", "soundness": "2;3;3", "novelty": "2;2;2", "presentation": "1;3;2", "wc_summary": "110;12;48", "wc_strengths": "71;23;38", "wc_weaknesses": "229;94;65", "wc_questions": "2;3;2", "wc_limitations": "2;2;1", "wc_review": "414;134;154", "wc_reply_reviewers": "206;19;35", "wc_reply_authors": "1063;454;73", "reply_reviewers": "1;1;1", "reply_authors": "4;3;2", "rating_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.0, 0.816496580927726 ], "wc_summary_avg": [ 56.666666666666664, 40.47495796442811 ], "wc_strengths_avg": [ 44.0, 20.049937655763422 ], "wc_weaknesses_avg": [ 129.33333333333334, 71.46249987852993 ], "wc_questions_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_limitations_avg": [ 1.6666666666666667, 0.4714045207910317 ], "wc_review_avg": [ 234.0, 127.54084313139327 ], "wc_reply_reviewers_avg": [ 86.66666666666667, 84.63385190861213 ], "wc_reply_authors_avg": [ 530.0, 407.722945147805 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.0, 0.816496580927726 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8660254037844387, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11331535960680046681&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "snu.ac.kr;snu.ac.kr;snu.ac.kr;snu.ac.kr", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Seoul National University", "aff_unique_dep": "", "aff_unique_url": "https://www.snu.ac.kr", "aff_unique_abbr": "SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Easy2Hard-Bench: Standardized Difficulty Labels for Profiling LLM Performance and Generalization", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97554", "id": "iNB4uoFQJb", "proceeding": "", "pdf": "https://openreview.net/pdf?id=iNB4uoFQJb", "openreview": "https://openreview.net/forum?id=iNB4uoFQJb", "poster": "/media/PosterPDFs/NeurIPS%202024/97554.png?t=1731726188.5022786", "project": "", "author_site": "Mucong Ding, Chenghao Deng, Jocelyn Choo, Zichu Wu, Aakriti Agrawal, Avi Schwarzschild, Tianyi Zhou, Tom Goldstein, John Langford, Animashree Anandkumar, Furong Huang", "tldr": "", "abstract": "Despite the abundance of datasets available for assessing large language models (LLMs), the scarcity of continuous and reliable difficulty labels for individual data points, in most cases, curtails their capacity to benchmark model generalization performance across different levels of complexity. Addressing this limitation, we present Easy2Hard, an innovative collection of 6 benchmark datasets featuring standardized difficulty labels spanning a wide range of domains, such as mathematics and programming problems, chess puzzles, and reasoning questions, providing a much-needed tool for those in demand of a dataset with varying degrees of difficulty for LLM assessment. We estimate the difficulty of individual problems by leveraging the performance data of many human subjects and LLMs on prominent leaderboards. Harnessing the rich human performance data, we employ widely recognized difficulty ranking systems, including the Item Response Theory (IRT) and Glicko-2 models, to uniformly assign difficulty scores to problems. The Easy2Hard datasets distinguish themselves from previous collections by incorporating a significantly higher proportion of challenging problems, presenting a novel and demanding test for state-of-the-art LLMs. Through extensive experiments conducted with six state-of-the-art LLMs on the Easy2Hard datasets, we offer valuable insights into their performance and generalization capabilities across varying degrees of difficulty, setting the stage for future research in LLM generalization.", "keywords": "easy to hard;difficulty levels;large language models;weak to strong", "primary_area": "", "supplementary_material": "/attachment/23c6bbb2b9ca729211bd595d6c30a931c22968b5.zip", "author": "Mucong Ding;Chenghao Deng;Jocelyn Choo;Zichu Wu;Aakriti Agrawal;Avi Schwarzschild;Tianyi Zhou;Tom Goldstein;John Langford;Anima Anandkumar;Furong Huang", "authorids": "~Mucong_Ding1;~Chenghao_Deng1;~Jocelyn_Choo1;~Zichu_Wu1;~Aakriti_Agrawal1;~Avi_Schwarzschild1;~Tianyi_Zhou1;~Tom_Goldstein1;~John_Langford1;~Anima_Anandkumar1;~Furong_Huang1", "gender": "M;M;F;M;F;M;M;M;M;F;F", "homepage": "http://www.cs.umd.edu/~mcding/;https://deng-chenghao.com;;;https://sites.google.com/umd.edu/aakriti-agrawal/;https://cs.umd.edu/~avi1;https://tianyizhou.github.io/;https://www.cs.umd.edu/~tomg/;http://hunch.net/~jl;https://furong-huang.com;http://tensorlab.cms.caltech.edu/users/anima/", "dblp": "232/1754.html;;;;259/2330;249/9334.html;88/8205-1;25/8184;77/4488;72/8513;", "google_scholar": "_bVao2MAAAAJ;AcGw1hcAAAAJ;;;2RRnCRMAAAAJ;WNvQ7AcAAAAJ;OKvgizMAAAAJ;KmSuVtgAAAAJ;LFiqVpwAAAAJ;13yyuCcAAAAJ;bEcLezcAAAAJ", "orcid": "0000-0002-6173-8055;;;;;;0000-0001-5348-0632;;;;", "linkedin": "mucong-ding-489296104;chenghao-deng-326444182/;jchoo-;zichu-wu-9a5495227/;aakriti-agrawal05/;;tianyizhou;;;;anima-anandkumar-35171b1/", "or_profile": "~Mucong_Ding1;~Chenghao_Deng1;~Jocelyn_Choo1;~Zichu_Wu1;~Aakriti_Agrawal1;~Avi_Schwarzschild1;~Tianyi_Zhou1;~Tom_Goldstein1;~John_Langford1;~Furong_Huang1;~anima_anandkumar1", "aff": "Department of Computer Science, University of Maryland, College Park;University of Maryland, College Park;University of Maryland, College Park;University of Waterloo;University of Maryland;Carnegie Mellon University;University of Maryland, College Park;University of Maryland, College Park;Microsoft;University of Maryland;California Institute of Technology", "aff_domain": "cs.umd.edu;umd.edu;umd.edu;uwaterloo.ca;umd.edu;cmu.edu;umd.edu;umd.edu;microsoft.com;cs.umd.edu;caltech.edu", "position": "PhD student;PhD student;Undergrad student;Undergrad student;PhD student;Postdoc;Assistant Professor;Full Professor;Researcher;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nding2024easyhardbench,\ntitle={Easy2Hard-Bench: Standardized Difficulty Labels for Profiling {LLM} Performance and Generalization},\nauthor={Mucong Ding and Chenghao Deng and Jocelyn Choo and Zichu Wu and Aakriti Agrawal and Avi Schwarzschild and Tianyi Zhou and Tom Goldstein and John Langford and Anima Anandkumar and Furong Huang},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=iNB4uoFQJb}\n}", "github": "", "reviewers": "2GjG;H1Vy;q7Pi;WtDm", "pdf_size": 3280840, "rating": "7;7;7;8", "confidence": "4;3;4;4", "wc_summary_and_contributions": "54;46;60;71", "wc_strengths": "12;19;54;3", "wc_improvement": "8;122;208;3", "wc_limitations": "1;3;8;1", "wc_correctness": "1;9;26;1", "wc_clarity": "1;1;6;1", "wc_relation_to_prior_work": "3;1;6;1", "wc_documentation": "1;1;37;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "82;203;406;83", "wc_reply_reviewers": "0;0;0;19", "wc_reply_authors": "100;92;92;0", "reply_reviewers": "0;0;0;1", "reply_authors": "2;2;2;1", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 57.75, 9.12071817347735 ], "wc_strengths_avg": [ 22.0, 19.32614809008769 ], "wc_improvement_avg": [ 85.25, 85.36795358915428 ], "wc_limitations_avg": [ 3.25, 2.8613807855648994 ], "wc_correctness_avg": [ 9.25, 10.207227831296802 ], "wc_clarity_avg": [ 2.25, 2.165063509461097 ], "wc_relation_to_prior_work_avg": [ 2.75, 2.0463381929681126 ], "wc_documentation_avg": [ 10.0, 15.588457268119896 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 193.5, 132.18263879950348 ], "wc_reply_reviewers_avg": [ 4.75, 8.227241335952167 ], "wc_reply_authors_avg": [ 71.0, 41.12177038990418 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:2d9Dhida8eoJ:scholar.google.com/&scioq=Easy2Hard-Bench:+Standardized+Difficulty+Labels+for+Profiling+LLM+Performance+and+Generalization&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "cs.umd.edu;umd.edu;umd.edu;uwaterloo.ca;umd.edu;cmu.edu;umd.edu;umd.edu;microsoft.com;cs.umd.edu;caltech.edu", "author_num": 11, "aff_unique_index": "0;1;1;2;1;3;1;1;4;1;5", "aff_unique_norm": "University of Maryland, College Park;University of Maryland;University of Waterloo;Carnegie Mellon University;Microsoft;California Institute of Technology", "aff_unique_dep": "Department of Computer Science;;;;Microsoft Corporation;", "aff_unique_url": "https://www/umd.edu;https://www/umd.edu;https://uwaterloo.ca;https://www.cmu.edu;https://www.microsoft.com;https://www.caltech.edu", "aff_unique_abbr": "UMD;UMD;UW;CMU;Microsoft;Caltech", "aff_campus_unique_index": "0;0;0;0;0;2", "aff_campus_unique": "College Park;;Pasadena", "aff_country_unique_index": "0;0;0;1;0;0;0;0;0;0;0", "aff_country_unique": "United States;Canada" }, { "title": "Sm: enhanced localization in Multiple Instance Learning for medical imaging classification", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94011", "id": "iNS3SC949v", "proceeding": "", "pdf": "https://openreview.net/pdf?id=iNS3SC949v", "openreview": "https://openreview.net/forum?id=iNS3SC949v", "poster": "/media/PosterPDFs/NeurIPS%202024/94011.png?t=1732223295.05881", "project": "", "author_site": "Francisco M. Castro-Mac\u00edas, Pablo Morales Alvarez, Yunan Wu, Rafael Molina, Aggelos Katsaggelos", "tldr": "", "abstract": "Multiple Instance Learning (MIL) is widely used in medical imaging classification to reduce the labeling effort. \nWhile only bag labels are available for training, one typically seeks predictions at both bag and instance levels (classification and localization tasks, respectively). Early MIL methods treated the instances in a bag independently. Recent methods account for global and local dependencies among instances. Although they have yielded excellent results in classification, their performance in terms of localization is comparatively limited. We argue that these models have been designed to target the classification task, while implications at the instance level have not been deeply investigated. Motivated by a simple observation -- that neighboring instances are likely to have the same label -- we propose a novel, principled, and flexible mechanism to model local dependencies. It can be used alone or combined with any mechanism to model global dependencies (e.g., transformers). A thorough empirical validation shows that our module leads to state-of-the-art performance in localization while being competitive or superior in classification. Our code is at https://github.com/Franblueee/SmMIL.", "keywords": "Multiple Instance Learning;Transformers;Graph Neural Networks;Medical Imaging;Weakly Supervised Learning", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "/attachment/7b6163989a7b13448f4c06acc6eee232ae841539.zip", "author": "Francisco M Castro-Mac\u00edas;Pablo Morales-Alvarez;Yunan Wu;Rafael Molina;Aggelos Katsaggelos", "authorids": "~Francisco_M_Castro-Mac\u00edas1;~Pablo_Morales-Alvarez1;~Yunan_Wu2;~Rafael_Molina2;~Aggelos_Katsaggelos1", "gender": ";;F;;M", "homepage": ";;https://yunanwu2168.github.io/Bio/;https://ccia.ugr.es/~rms/;http://www.mccormick.northwestern.edu/research-faculty/directory/profiles/katsaggelos-aggelos.html", "dblp": ";;365/5983.html;30/4210-1;k/AggelosKAggelos", "google_scholar": ";;odHrXBQAAAAJ;u6K_sU8AAAAJ;https://scholar.google.com.tw/citations?user=aucB85kAAAAJ", "orcid": ";;0000-0001-6980-9746;0000-0003-4694-8588;0000-0003-4554-0070", "linkedin": ";;yunan-wu-53513b1b9/;;", "or_profile": "~Francisco_M_Castro-Mac\u00edas1;~Pablo_Morales-Alvarez1;~Yunan_Wu2;~Rafael_Molina2;~Aggelos_Katsaggelos1", "aff": ";;Northwestern University;Universidad de Granada;", "aff_domain": ";;northwestern.edu;ugr.es;", "position": ";;PhD student;Full Professor;", "bibtex": "@inproceedings{\ncastro-mac{\\'\\i}as2024sm,\ntitle={Sm: enhanced localization in Multiple Instance Learning for medical imaging classification},\nauthor={Francisco M Castro-Mac{\\'\\i}as and Pablo Morales-Alvarez and Yunan Wu and Rafael Molina and Aggelos Katsaggelos},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=iNS3SC949v}\n}", "github": "", "reviewers": "uFQt;YAE1;jUey", "pdf_size": 9229397, "rating": "5;5;6", "confidence": "4;4;4", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "3;2;3", "wc_summary": "84;52;68", "wc_strengths": "51;11;45", "wc_weaknesses": "406;89;260", "wc_questions": "26;108;44", "wc_limitations": "5;25;1", "wc_review": "572;285;418", "wc_reply_reviewers": "270;28;0", "wc_reply_authors": "1040;0;34", "reply_reviewers": "2;1;0", "reply_authors": "3;1;2", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 68.0, 13.063945294843617 ], "wc_strengths_avg": [ 35.666666666666664, 17.613126418163876 ], "wc_weaknesses_avg": [ 251.66666666666666, 129.5487896079654 ], "wc_questions_avg": [ 59.333333333333336, 35.188381921057726 ], "wc_limitations_avg": [ 10.333333333333334, 10.498677165349081 ], "wc_review_avg": [ 425.0, 117.27176414920459 ], "wc_reply_reviewers_avg": [ 99.33333333333333, 121.2197270341021 ], "wc_reply_authors_avg": [ 358.0, 482.44654280724893 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2695159041066549692&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": ";;northwestern.edu;ugr.es;", "author_num": 5, "aff_unique_index": "0;1", "aff_unique_norm": "Northwestern University;Universidad de Granada", "aff_unique_dep": ";", "aff_unique_url": "https://www.northwestern.edu;https://www.ugr.es", "aff_unique_abbr": "NU;UGr", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;Spain" }, { "title": "Your contrastive learning problem is secretly a distribution alignment problem", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94010", "id": "iNUKoLU8xb", "proceeding": "", "pdf": "https://openreview.net/pdf?id=iNUKoLU8xb", "openreview": "https://openreview.net/forum?id=iNUKoLU8xb", "poster": "/media/PosterPDFs/NeurIPS%202024/94010.png?t=1732737706.7977214", "project": "", "author_site": "Zihao Chen, Chi-Heng Lin, Ran Liu, Jingyun Xiao, Eva Dyer", "tldr": "", "abstract": "Despite the success of contrastive learning (CL) in vision and language, its theoretical foundations and mechanisms for building representations remain poorly understood. In this work, we build connections between noise contrastive estimation losses widely used in CL and distribution alignment with entropic optimal transport (OT). This connection allows us to develop a family of different losses and multistep iterative variants for existing CL methods. Intuitively, by using more information from the distribution of latents, our approach allows a more distribution-aware manipulation of the relationships within augmented sample sets.\nWe provide theoretical insights and experimental evidence demonstrating the benefits of our approach for generalized contrastive alignment. Through this framework, it is possible to leverage tools in OT to build unbalanced losses to handle noisy views and customize the representation space by changing the constraints on alignment.\nBy reframing contrastive learning as an alignment problem and leveraging existing optimization tools for OT, our work provides new insights and connections between different self-supervised learning models in addition to new tools that can be more easily adapted to incorporate domain knowledge into learning.", "keywords": "Optimal transport;Distribution alignment;Noise contrastive estimation", "primary_area": "probabilistic_methods", "supplementary_material": "/attachment/ac0f8972f8ccc0fd106026e5f4cfb924252c73bc.zip", "author": "Zihao Chen;Chi-Heng Lin;Ran Liu;Jingyun Xiao;Eva L Dyer", "authorids": "~Zihao_Chen16;~Chi-Heng_Lin1;~Ran_Liu2;~Jingyun_Xiao1;~Eva_L_Dyer1", "gender": "M;M;F;M;F", "homepage": "https://www.zihaoiz.com/;https://www.chihenglin.com/;https://ranliu98.github.io/;;http://dyerlab.gatech.edu", "dblp": ";128/4282;;;64/8509.html", "google_scholar": "28FzfuAAAAAJ;OqSt2wMAAAAJ;vBEAxZgAAAAJ;LEHq9a8AAAAJ;Sb_jcHcAAAAJ", "orcid": ";;;;", "linkedin": "zihao-chen-804647140/;chi-heng-lin-986727217;;;", "or_profile": "~Zihao_Chen16;~Chi-Heng_Lin1;~Ran_Liu2;~Jingyun_Xiao1;~Eva_Dyer1", "aff": "Georgia Institute of Technology;Samsung Research America;Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology", "aff_domain": "gatech.edu;samsung.com;gatech.edu;gatech.edu;gatech.edu", "position": "PhD student;Researcher;PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nchen2024your,\ntitle={Your contrastive learning problem is secretly a distribution alignment problem},\nauthor={Zihao Chen and Chi-Heng Lin and Ran Liu and Jingyun Xiao and Eva L Dyer},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=iNUKoLU8xb}\n}", "github": "", "reviewers": "TBTc;96Mt;VXuY;ih3d", "pdf_size": 496012, "rating": "4;4;6;6", "confidence": "4;4;4;3", "soundness": "4;2;4;2", "novelty": "3;1;4;2", "presentation": "1;2;3;2", "wc_summary": "138;89;109;219", "wc_strengths": "102;46;202;259", "wc_weaknesses": "722;365;93;460", "wc_questions": "378;2;170;333", "wc_limitations": "18;15;1;242", "wc_review": "1358;517;575;1513", "wc_reply_reviewers": "851;282;24;0", "wc_reply_authors": "1096;355;35;53", "reply_reviewers": "3;2;1;0", "reply_authors": "4;3;2;2", "rating_avg": [ 5.0, 1.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 1.0 ], "novelty_avg": [ 2.5, 1.118033988749895 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 138.75, 49.499368682842814 ], "wc_strengths_avg": [ 152.25, 83.19367463936186 ], "wc_weaknesses_avg": [ 410.0, 224.921097276356 ], "wc_questions_avg": [ 220.75, 148.11714114173282 ], "wc_limitations_avg": [ 69.0, 100.0874617522095 ], "wc_review_avg": [ 990.75, 448.5824199631546 ], "wc_reply_reviewers_avg": [ 289.25, 342.6509703765626 ], "wc_reply_authors_avg": [ 384.75, 429.8676395124434 ], "reply_reviewers_avg": [ 1.5, 1.118033988749895 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11093842536675861450&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 0, "email": "gatech.edu;samsung.com;gatech.edu;gatech.edu;gatech.edu", "author_num": 5, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "Georgia Institute of Technology;Samsung", "aff_unique_dep": ";Samsung Research America", "aff_unique_url": "https://www.gatech.edu;https://www.samsung.com/us/careers/research/", "aff_unique_abbr": "Georgia Tech;SRA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Learning Superconductivity from Ordered and Disordered Material Structures", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97553", "id": "iNYrB3ip9F", "proceeding": "", "pdf": "https://openreview.net/pdf?id=iNYrB3ip9F", "openreview": "https://openreview.net/forum?id=iNYrB3ip9F", "poster": "/media/PosterPDFs/NeurIPS%202024/97553.png?t=1731478909.6947308", "project": "", "author_site": "Pin Chen, Luoxuan Peng, Rui Jiao, Qing Mo, Zhen Wang, Wenbing Huang, Yang Liu, Yutong Lu", "tldr": "", "abstract": "Superconductivity is a fascinating phenomenon observed in certain materials under certain conditions. However, some critical aspects of it, such as the relationship between superconductivity and materials' chemical/structural features, still need to be understood. Recent successes of data-driven approaches in material science strongly inspire researchers to study this relationship with them, but a corresponding dataset is still lacking. Hence, we present a new dataset for data-driven approaches, namely SuperCon3D, containing both 3D crystal structures and experimental superconducting transition temperature (Tc) for the first time. Based on SuperCon3D, we propose two deep learning methods for designing high Tc superconductors. The first is SODNet, a novel equivariant graph attention model for screening known structures, which differs from existing models in incorporating both ordered and disordered geometric content. The second is a diffusion generative model DiffCSP-SC for creating new structures, which enables high Tc-targeted generation. Extensive experiments demonstrate that both our proposed dataset and models are advantageous for designing new high Tc superconducting candidates.", "keywords": "Superconducting material design;equivariant graph neural networks;diffusion generative models", "primary_area": "", "supplementary_material": "/attachment/f7e54edd82e8a4bab416dbdf8957e4506ce65c67.pdf", "author": "Pin Chen;Luoxuan Peng;Rui Jiao;Qing Mo;Zhen WANG;Wenbing Huang;Yang Liu;Yutong Lu", "authorids": "~Pin_Chen1;~Luoxuan_Peng1;~Rui_Jiao1;~Qing_Mo1;~Zhen_WANG2;~Wenbing_Huang1;~Yang_Liu19;~Yutong_Lu1", "gender": "M;;M;;M;M;M;F", "homepage": ";https://github.com/Xxxuan11;https://jiaor17.github.io/;https://github.com/qingmo-nscc-gz;https://joneswong.github.io/;https://gsai.ruc.edu.cn/english/wenbing_huang;http://nlp.csai.tsinghua.edu.cn/~ly/;http://www.sysu.edu.cn", "dblp": "78/5412;;223/1073;;78/6727-36;155/3181-1.html;51/3710-5;", "google_scholar": ";;buW16-AAAAAJ;;e5CqTBMAAAAJ;0yNkmO4AAAAJ;https://scholar.google.com.hk/citations?user=lVhoKNcAAAAJ;", "orcid": "0000-0001-8746-9917;;;;0000-0002-8140-8782;;0000-0002-3087-242X;", "linkedin": ";;;;;;;", "or_profile": "~Pin_Chen1;~Luoxuan_Peng1;~Rui_Jiao1;~Qing_Mo1;~Zhen_WANG2;~Wenbing_Huang1;~Yang_Liu19;~Yutong_Lu1", "aff": "SUN YAT-SEN UNIVERSITY;Sun Yat-Sen University;Tsinghua University;;SUN YAT-SEN UNIVERSITY;Renmin University of China;Tsinghua University;SUN YAT-SEN UNIVERSITY", "aff_domain": "sysu.edu.cn;sysu.edu.cn;tsinghua.edu.cn;;sysu.edu.cn;ruc.edu.cn;tsinghua.edu.cn;sysu.edu.cn", "position": "Researcher;Intern;PhD student;;Associate Professor;Associate Professor;Professor;Full Professor", "bibtex": "@inproceedings{\nchen2024learning,\ntitle={Learning Superconductivity from Ordered and Disordered Material Structures},\nauthor={Pin Chen and Luoxuan Peng and Rui Jiao and Qing Mo and Zhen WANG and Wenbing Huang and Yang Liu and Yutong Lu},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=iNYrB3ip9F}\n}", "github": "", "reviewers": "MswQ;zdb5;ZXok", "pdf_size": 24815719, "rating": "4;7;7", "confidence": "3;4;5", "wc_summary_and_contributions": "66;42;53", "wc_strengths": "59;43;71", "wc_improvement": "103;2;114", "wc_limitations": "5;66;36", "wc_correctness": "5;1;10", "wc_clarity": "13;1;4", "wc_relation_to_prior_work": "12;1;17", "wc_documentation": "35;1;28", "wc_additional_feedback": "1;1;1", "wc_review": "299;158;334", "wc_reply_reviewers": "0;9;26", "wc_reply_authors": "72;89;66", "reply_reviewers": "0;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "wc_summary_and_contributions_avg": [ 53.666666666666664, 9.809292646374775 ], "wc_strengths_avg": [ 57.666666666666664, 11.469767022723502 ], "wc_improvement_avg": [ 73.0, 50.40502620440412 ], "wc_limitations_avg": [ 35.666666666666664, 24.9042611258038 ], "wc_correctness_avg": [ 5.333333333333333, 3.6817870057290873 ], "wc_clarity_avg": [ 6.0, 5.0990195135927845 ], "wc_relation_to_prior_work_avg": [ 10.0, 6.683312551921141 ], "wc_documentation_avg": [ 21.333333333333332, 14.65908895153068 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 263.6666666666667, 76.07160369605 ], "wc_reply_reviewers_avg": [ 11.666666666666666, 10.780641085864152 ], "wc_reply_authors_avg": [ 75.66666666666667, 9.741092797468305 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.8660254037844387, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16846668468656437919&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "sysu.edu.cn;sysu.edu.cn;tsinghua.edu.cn;;sysu.edu.cn;ruc.edu.cn;tsinghua.edu.cn;sysu.edu.cn", "author_num": 8, "aff_unique_index": "0;0;1;0;2;1;0", "aff_unique_norm": "Sun Yat-sen University;Tsinghua University;Renmin University of China", "aff_unique_dep": ";;", "aff_unique_url": "http://www.sysu.edu.cn;https://www.tsinghua.edu.cn;http://www.ruc.edu.cn", "aff_unique_abbr": "SYSU;THU;RUC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Fast and Memory-Efficient Video Diffusion Using Streamlined Inference", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94009", "id": "iNvXYQrkpi", "proceeding": "", "pdf": "https://openreview.net/pdf?id=iNvXYQrkpi", "openreview": "https://openreview.net/forum?id=iNvXYQrkpi", "poster": "/media/PosterPDFs/NeurIPS%202024/94009.png?t=1731815246.5419972", "project": "", "author_site": "Zheng Zhan, Yushu Wu, Yifan Gong, Zichong Meng, Zhenglun Kong, Changdi Yang, Geng Yuan, Pu Zhao, Wei Niu, Yanzhi Wang", "tldr": "", "abstract": "The rapid progress in artificial intelligence-generated content (AIGC), especially with diffusion models, has significantly advanced development of high-quality video generation. However, current video diffusion models exhibit demanding computational requirements and high peak memory usage, especially for generating longer and higher-resolution videos. These limitations greatly hinder the practical application of video diffusion models on standard hardware platforms. To tackle this issue, we present a novel, training-free framework named Streamlined Inference, which leverages the temporal and spatial properties of video diffusion models. Our approach integrates three core components: Feature Slicer, Operator Grouping, and Step Rehash. Specifically, Feature Slicer effectively partitions input features into sub-features and Operator Grouping processes each sub-feature with a group of consecutive operators, resulting in significant memory reduction without sacrificing the quality or speed. Step Rehash further exploits the similarity between adjacent steps in diffusion, and accelerates inference through skipping unnecessary steps. Extensive experiments demonstrate that our approach significantly reduces peak memory and computational overhead, making it feasible to generate high-quality videos on a single consumer GPU (e.g., reducing peak memory of Animatediff from 42GB to 11GB, featuring faster inference on 2080Ti).", "keywords": "video diffusion models;training free;inference framework;Efficiency", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Zheng Zhan;Yushu Wu;Yifan Gong;Zichong Meng;Zhenglun Kong;Changdi Yang;Geng Yuan;Pu Zhao;Wei Niu;Yanzhi Wang", "authorids": "~Zheng_Zhan3;~Yushu_Wu1;~Yifan_Gong2;~Zichong_Meng1;~Zhenglun_Kong1;~Changdi_Yang1;~Geng_Yuan1;~Pu_Zhao1;~Wei_Niu3;~Yanzhi_Wang3", "gender": ";M;F;M;M;M;M;M;M;M", "homepage": ";https://wuyushuwys.github.io;https://yifanfanfanfan.github.io/;https://cr8br0ze.github.io/;https://sites.google.com/husky.neu.edu/zlk/home?authuser=1;;;https://puzhao.info/;https://www.niuwei.info;https://web.northeastern.edu/yanzhiwang/", "dblp": "156/4008-1.html;166/4244;49/3073-4.html;372/0186;211/6323;;205/3007;75/8475-1;68/828-2.html;", "google_scholar": "hwTuEX0AAAAJ;https://scholar.google.com/citations?hl=en;U_gevVgAAAAJ;HMQjIWoAAAAJ;XYa4NVYAAAAJ;aZvLu6wAAAAJ;tBIAgtgAAAAJ;rWZLnpwAAAAJ;w1RoaOMAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0001-9883-7973;0000-0002-3912-097X;0009-0003-2051-2572;0000-0002-8120-4456;0000-0002-8848-3806;0000-0001-9844-992X;0000-0001-5018-2859;;", "linkedin": ";;yifan-gong-3059b8132/;;zhenglun-kong-35b527150/;;;;;", "or_profile": "~Zheng_Zhan3;~Yushu_Wu1;~Yifan_Gong2;~Zichong_Meng1;~Zhenglun_Kong1;~Changdi_Yang1;~Geng_Yuan1;~Pu_Zhao1;~Wei_Niu3;~Yanzhi_Wang3", "aff": "Northeastern University;Northeastern University;Northeastern University;Northeastern University;Northeastern University;Northeastern University;University of Georgia;Northeastern University;University of Georgia;Northeastern University", "aff_domain": "neu.edu;northeastern.edu;neu.edu;northeastern.edu;northeastern.edu;northeastern.edu;uga.edu;neu.edu;uga.edu;northeastern.edu", "position": "PhD student;PhD student;PhD student;MS student;PhD student;PhD student;Assistant Professor;Researcher;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nzhan2024fast,\ntitle={Fast and Memory-Efficient Video Diffusion Using Streamlined Inference},\nauthor={Zheng Zhan and Yushu Wu and Yifan Gong and Zichong Meng and Zhenglun Kong and Changdi Yang and Geng Yuan and Pu Zhao and Wei Niu and Yanzhi Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=iNvXYQrkpi}\n}", "github": "", "reviewers": "5EX5;M5Qw;2uzE;mL5N", "pdf_size": 14488506, "rating": "6;6;6;6", "confidence": "5;2;4;3", "soundness": "4;3;2;3", "novelty": "3;3;3;3", "presentation": "4;3;3;3", "wc_summary": "49;37;66;97", "wc_strengths": "56;36;13;45", "wc_weaknesses": "128;104;353;95", "wc_questions": "65;84;8;37", "wc_limitations": "1;66;4;6", "wc_review": "299;327;444;280", "wc_reply_reviewers": "10;16;21;0", "wc_reply_authors": "104;107;99;79", "reply_reviewers": "1;1;1;0", "reply_authors": "3;3;3;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 62.25, 22.554101622543072 ], "wc_strengths_avg": [ 37.5, 15.819292019556375 ], "wc_weaknesses_avg": [ 170.0, 106.3414312485966 ], "wc_questions_avg": [ 48.5, 28.74456470360962 ], "wc_limitations_avg": [ 19.25, 27.049722734253674 ], "wc_review_avg": [ 337.5, 63.7200910231616 ], "wc_reply_reviewers_avg": [ 11.75, 7.8222439235810075 ], "wc_reply_authors_avg": [ 97.25, 10.917302780449024 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7883140150346874911&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "neu.edu;northeastern.edu;neu.edu;northeastern.edu;northeastern.edu;northeastern.edu;uga.edu;neu.edu;uga.edu;northeastern.edu", "author_num": 10, "aff_unique_index": "0;0;0;0;0;0;1;0;1;0", "aff_unique_norm": "Northeastern University;University of Georgia", "aff_unique_dep": ";", "aff_unique_url": "https://www.northeastern.edu;https://www.uga.edu", "aff_unique_abbr": "NEU;UGA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Epipolar-Free 3D Gaussian Splatting for Generalizable Novel View Synthesis", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94008", "id": "iO6tcLJEwA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=iO6tcLJEwA", "openreview": "https://openreview.net/forum?id=iO6tcLJEwA", "poster": "/media/PosterPDFs/NeurIPS%202024/94008.png?t=1731069793.6821089", "project": "", "author_site": "Zhiyuan Min, Yawei Luo, Jianwen Sun, Yi Yang", "tldr": "", "abstract": "Generalizable 3D Gaussian splitting (3DGS) can reconstruct new scenes from sparse-view observations in a feed-forward inference manner, eliminating the need for scene-specific retraining required in conventional 3DGS. However, existing methods rely heavily on epipolar priors, which can be unreliable in complex real-world scenes, particularly in non-overlapping and occluded regions. In this paper, we propose eFreeSplat, an efficient feed-forward 3DGS-based model for generalizable novel view synthesis that operates independently of epipolar line constraints. To enhance multiview feature extraction with 3D perception, we employ a self-supervised Vision Transformer (ViT) with cross-view completion pre-training on large-scale datasets. Additionally, we introduce an Iterative Cross-view Gaussians Alignment method to ensure consistent depth scales across different views. Our eFreeSplat represents a new paradigm for generalizable novel view synthesis. We evaluate eFreeSplat on wide-baseline novel view synthesis tasks using the RealEstate10K and ACID datasets. Extensive experiments demonstrate that eFreeSplat surpasses state-of-the-art baselines that rely on epipolar priors, achieving superior geometry reconstruction and novel view synthesis quality.", "keywords": "epipolar-free;3DGS;generalizable novel view synthesis", "primary_area": "machine_vision", "supplementary_material": "/attachment/e1006f473d3df4276ce7066da359cbbfd8aabc2a.zip", "author": "Zhiyuan Min;Yawei Luo;Jianwen Sun;Yi Yang", "authorids": "~Zhiyuan_Min1;~Yawei_Luo3;~Jianwen_Sun1;~Yi_Yang22", "gender": "M;M;M;M", "homepage": "https://github.com/tatakai1;;http://faculty.ccnu.edu.cn/2012980010;https://person.zju.edu.cn/yiyang", "dblp": "361/2278;160/7852;;33/4854-1.html", "google_scholar": ";pnVwaGsAAAAJ;;RMSuNFwAAAAJ", "orcid": ";;0000-0002-0951-1072;", "linkedin": ";;;", "or_profile": "~Zhiyuan_Min1;~Yawei_Luo3;~Jianwen_Sun1;~Yi_Yang22", "aff": "Zhejiang University;Zhejiang University;Central China Normal University;Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn;ccnu.edu.cn;zju.edu.cn", "position": "MS student;Associate Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nmin2024epipolarfree,\ntitle={Epipolar-Free 3D Gaussian Splatting for Generalizable Novel View Synthesis},\nauthor={Zhiyuan Min and Yawei Luo and Jianwen Sun and Yi Yang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=iO6tcLJEwA}\n}", "github": "", "reviewers": "MtQr;1Kz3;AnyG;w4rj", "pdf_size": 14837255, "rating": "4;5;5;7", "confidence": "3;5;3;5", "soundness": "2;3;3;3", "novelty": "3;2;3;3", "presentation": "3;2;3;3", "wc_summary": "69;51;33;68", "wc_strengths": "48;69;55;50", "wc_weaknesses": "103;239;19;205", "wc_questions": "84;4;61;5", "wc_limitations": "21;46;14;5", "wc_review": "325;409;182;333", "wc_reply_reviewers": "0;651;0;0", "wc_reply_authors": "50;1708;50;50", "reply_reviewers": "0;4;0;0", "reply_authors": "2;4;2;2", "rating_avg": [ 5.25, 1.0897247358851685 ], "confidence_avg": [ 4.0, 1.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 55.25, 14.703315952532613 ], "wc_strengths_avg": [ 55.5, 8.200609733428363 ], "wc_weaknesses_avg": [ 141.5, 86.64150275705056 ], "wc_questions_avg": [ 38.5, 34.96069221282668 ], "wc_limitations_avg": [ 21.5, 15.239750654128171 ], "wc_review_avg": [ 312.25, 82.03467254764902 ], "wc_reply_reviewers_avg": [ 162.75, 281.89126893183476 ], "wc_reply_authors_avg": [ 464.5, 717.9350597372996 ], "reply_reviewers_avg": [ 1.0, 1.7320508075688772 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.6882472016116854, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:jWivQHl6XkEJ:scholar.google.com/&scioq=Epipolar-Free+3D+Gaussian+Splatting+for+Generalizable+Novel+View+Synthesis&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "zju.edu.cn;zju.edu.cn;ccnu.edu.cn;zju.edu.cn", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Zhejiang University;Central China Normal University", "aff_unique_dep": ";", "aff_unique_url": "https://www.zju.edu.cn;http://www.ccnu.edu.cn", "aff_unique_abbr": "ZJU;CCNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Expectation Alignment: Handling Reward Misspecification in the Presence of Expectation Mismatch", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94007", "id": "iO7viYaAt7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=iO7viYaAt7", "openreview": "https://openreview.net/forum?id=iO7viYaAt7", "poster": "", "project": "", "author_site": "Malek Mechergui, Sarath Sreedharan", "tldr": "", "abstract": "Detecting and handling misspecified objectives, such as reward functions, has been widely recognized as one of the central challenges within the domain of Artificial Intelligence (AI) safety research. However, even with the recognition of the importance of this problem, we are unaware of any works that attempt to provide a clear definition for what constitutes (a) misspecified objectives and (b) successfully resolving such misspecifications. In this work, we use the theory of mind, i.e., the human user's beliefs about the AI agent, as a basis to develop a formal explanatory framework, called Expectation Alignment (EAL), to understand the objective misspecification and its causes.\nOur EAL framework not only acts as an explanatory framework for existing works but also provides us with concrete insights into the limitations of existing methods to handle reward misspecification and novel solution strategies. We use these insights to propose a new interactive algorithm that uses the specified reward to infer potential user expectations about the system behavior. We show how one can efficiently implement this algorithm by mapping the inference problem into linear programs. We evaluate our method on a set of standard Markov Decision Process (MDP) benchmarks.", "keywords": "Reward Design", "primary_area": "human-AI_interaction", "supplementary_material": "", "author": "Malek Mechergui;Sarath Sreedharan", "authorids": "~Malek_Mechergui1;~Sarath_Sreedharan1", "gender": "F;", "homepage": "https://www.linkedin.com/in/malekmechergui/;", "dblp": ";162/5110", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "~Malek_Mechergui1;~Sarath_Sreedharan1", "aff": "Colorado State University;Colorado State University", "aff_domain": "colostate.edu;colostate.edu", "position": "MS student;Assistant Professor", "bibtex": "@inproceedings{\nmechergui2024expectation,\ntitle={Expectation Alignment: Handling Reward Misspecification in the Presence of Expectation Mismatch},\nauthor={Malek Mechergui and Sarath Sreedharan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=iO7viYaAt7}\n}", "github": "", "reviewers": "RJbh;ahA3;pegd", "pdf_size": 538382, "rating": "5;7;7", "confidence": "3;4;3", "soundness": "2;4;2", "novelty": "3;4;2", "presentation": "3;3;4", "wc_summary": "77;147;87", "wc_strengths": "41;94;69", "wc_weaknesses": "255;226;32", "wc_questions": "110;170;121", "wc_limitations": "58;5;3", "wc_review": "541;642;312", "wc_reply_reviewers": "1507;126;100", "wc_reply_authors": "1066;111;21", "reply_reviewers": "3;2;1", "reply_authors": "5;3;2", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.9428090415820634 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 103.66666666666667, 30.912061651652344 ], "wc_strengths_avg": [ 68.0, 21.64871050817269 ], "wc_weaknesses_avg": [ 171.0, 98.99831648400222 ], "wc_questions_avg": [ 133.66666666666666, 26.08107018935807 ], "wc_limitations_avg": [ 22.0, 25.468935326524086 ], "wc_review_avg": [ 498.3333333333333, 138.05876365599622 ], "wc_reply_reviewers_avg": [ 577.6666666666666, 657.2236216759779 ], "wc_reply_authors_avg": [ 399.3333333333333, 472.8342439187566 ], "reply_reviewers_avg": [ 2.0, 0.816496580927726 ], "reply_authors_avg": [ 3.3333333333333335, 1.247219128924647 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.49999999999999983, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4880703283749264463&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 4, "email": "colostate.edu;colostate.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Colorado State University", "aff_unique_dep": "", "aff_unique_url": "https://www.colostate.edu", "aff_unique_abbr": "CSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "3DET-Mamba: Causal Sequence Modelling for End-to-End 3D Object Detection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94006", "id": "iOleSlC80F", "proceeding": "", "pdf": "https://openreview.net/pdf?id=iOleSlC80F", "openreview": "https://openreview.net/forum?id=iOleSlC80F", "poster": "", "project": "", "author_site": "Mingsheng Li, Jiakang Yuan, Sijin Chen, Lin Zhang, Anyu Zhu, Xin Chen, Tao Chen", "tldr": "", "abstract": "Transformer-based architectures have been proven successful in detecting 3D objects from point clouds. However, the quadratic complexity of the attention mechanism struggles to encode rich information as point cloud resolution increases. Recently, state space models (SSM) such as Mamba have gained great attention due to their linear complexity and long sequence modeling ability for language understanding. To exploit the potential of Mamba on 3D scene-level perception, for the first time, we propose 3DET-Mamba, which is a novel SSM-based model designed for indoor 3d object detection. Specifically, we divide the point cloud into different patches and use a lightweight yet effective Inner Mamba to capture local geometric information. To observe the scene from a global perspective, we introduce a novel Dual Mamba module that models the point cloud in terms of spatial distribution and continuity. Additionally, we design a Query-aware Mamba module that decodes context features into object sets under the guidance of learnable queries. Extensive experiments demonstrate that 3DET-Mamba surpasses previous 3DETR on indoor 3D detection benchmarks such as ScanNet, improving AP25/AP50 from 65.0\\%/47.0\\% to 70.4\\%/54.4\\%, respectively.", "keywords": "3D Object Detection;Mamba", "primary_area": "machine_vision", "supplementary_material": "", "author": "Mingsheng Li;Jiakang Yuan;Sijin Chen;Lin Zhang;Anyu Zhu;Xin Chen;Tao Chen", "authorids": "~Mingsheng_Li2;~Jiakang_Yuan1;~Sijin_Chen1;~Lin_Zhang15;~Anyu_Zhu1;~Xin_Chen16;~Tao_Chen6", "gender": "M;M;M;M;M;M;M", "homepage": "https://jiakangyuan.github.io/;https://ch3cook-fdu.github.io/;https://github.com/zl2048;;https://chenxin.tech/;https://eetchen.github.io/;https://github.com/MSheng-Lee", "dblp": "323/7363;96/9616;;;24/1518-40;69/510-3;", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=en;;;7qeAJZ4AAAAJ;https://scholar.google.com.sg/citations?user=w3OoFL0AAAAJ;", "orcid": ";;;0000-0002-7825-5113;0000-0002-9347-1367;;", "linkedin": ";;;;xin-chen-cs/;;", "or_profile": "~Jiakang_Yuan1;~Sijin_Chen1;~Lin_Zhang15;~Anyu_Zhu1;~Xin_Chen16;~Tao_Chen6;~Li_Mingsheng1", "aff": "Shanghai AI Laboratory;Fudan University;StepFun;Central South University;Tencent;Fudan University;Fudan University", "aff_domain": "pjlab.org.cn;fudan.edu.cn;stepfun.com;csu.edu.cn;tencent.com;fudan.edu.cn;fudan.edu.cn", "position": "Intern;MS student;Intern;Undergrad student;Researcher;Full Professor;MS student", "bibtex": "@inproceedings{\nli2024detmamba,\ntitle={3{DET}-Mamba: Causal Sequence Modelling for End-to-End 3D Object Detection},\nauthor={Mingsheng Li and Jiakang Yuan and Sijin Chen and Lin Zhang and Anyu Zhu and Xin Chen and Tao Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=iOleSlC80F}\n}", "github": "", "reviewers": "2ZE6;m4H4;eAYh", "pdf_size": 4288251, "rating": "5;5;6", "confidence": "5;4;4", "soundness": "3;2;3", "novelty": "3;2;3", "presentation": "2;3;3", "wc_summary": "45;104;86", "wc_strengths": "70;70;63", "wc_weaknesses": "152;173;151", "wc_questions": "3;75;11", "wc_limitations": "1;1;1", "wc_review": "271;423;312", "wc_reply_reviewers": "0;80;120", "wc_reply_authors": "0;164;34", "reply_reviewers": "0;1;1", "reply_authors": "1;2;2", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 78.33333333333333, 24.689178916188272 ], "wc_strengths_avg": [ 67.66666666666667, 3.2998316455372216 ], "wc_weaknesses_avg": [ 158.66666666666666, 10.143416036468626 ], "wc_questions_avg": [ 29.666666666666668, 32.221455929585524 ], "wc_limitations_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 335.3333333333333, 64.20972581228554 ], "wc_reply_reviewers_avg": [ 66.66666666666667, 49.88876515698588 ], "wc_reply_authors_avg": [ 66.0, 70.67295569499458 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:KmrM-S5iEV8J:scholar.google.com/&scioq=3DET-Mamba:+Causal+Sequence+Modelling+for+End-to-End+3D+Object+Detection&hl=en&as_sdt=0,33", "gs_version_total": 2, "email": "pjlab.org.cn;fudan.edu.cn;stepfun.com;csu.edu.cn;tencent.com;fudan.edu.cn;fudan.edu.cn", "author_num": 7, "aff_unique_index": "0;1;2;3;4;1;1", "aff_unique_norm": "Shanghai AI Laboratory;Fudan University;StepFun;Central South University;Tencent", "aff_unique_dep": ";;;;Tencent Holdings Limited", "aff_unique_url": "https://www.shanghai-ai-lab.com;https://www.fudan.edu.cn;;https://www.csu.edu.cn;https://www.tencent.com", "aff_unique_abbr": "SAIL;Fudan;;CSU;Tencent", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China;" }, { "title": "UDON: Universal Dynamic Online distillatioN for generic image representations", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94005", "id": "iQUxHrCna0", "proceeding": "", "pdf": "https://openreview.net/pdf?id=iQUxHrCna0", "openreview": "https://openreview.net/forum?id=iQUxHrCna0", "poster": "/media/PosterPDFs/NeurIPS%202024/94005.png?t=1733494714.862858", "project": "", "author_site": "Nikolaos-Antonios Ypsilantis, Kaifeng Chen, Andre Araujo, Ondrej Chum", "tldr": "", "abstract": "Universal image representations are critical in enabling real-world fine-grained and instance-level recognition applications, where objects and entities from any domain must be identified at large scale.\nDespite recent advances, existing methods fail to capture important domain-specific knowledge, while also ignoring differences in data distribution across different domains.\nThis leads to a large performance gap between efficient universal solutions and expensive approaches utilising a collection of specialist models, one for each domain.\nIn this work, we make significant strides towards closing this gap, by introducing a new learning technique, dubbed UDON (Universal Dynamic Online distillatioN).\nUDON employs multi-teacher distillation, where each teacher is specialized in one domain, to transfer detailed domain-specific knowledge into the student universal embedding.\nUDON's distillation approach is not only effective, but also very efficient, by sharing most model parameters between the student and all teachers, where all models are jointly trained in an online manner.\nUDON also comprises a sampling technique which adapts the training process to dynamically allocate batches to domains which are learned slower and require more frequent processing.\nThis boosts significantly the learning of complex domains which are characterised by a large number of classes and long-tail distributions.\nWith comprehensive experiments, we validate each component of UDON, and showcase significant improvements over the state of the art in the recent UnED benchmark.\nCode: https://github.com/nikosips/UDON.", "keywords": "Universal Image Embeddings;Image Retrieval;Fine-grained recognition;Instance-level recognition", "primary_area": "machine_vision", "supplementary_material": "", "author": "Nikolaos-Antonios Ypsilantis;Kaifeng Chen;Andre Araujo;Ondrej Chum", "authorids": "~Nikolaos-Antonios_Ypsilantis1;~Kaifeng_Chen2;~Andre_Araujo1;~Ondrej_Chum1", "gender": "M;M;M;M", "homepage": "https://nikosips.github.io/;;https://andrefaraujo.github.io/;http://cmp.felk.cvut.cz/~chum/", "dblp": "313/1452;186/7404;177/1567;96/63", "google_scholar": "srRJhYEAAAAJ;xjEcoNQAAAAJ;_ASUnDcAAAAJ;4T42Ke0AAAAJ", "orcid": ";;;", "linkedin": "nikolaos-antonios-ypsilantis-252744135/;kaifeng-chen-b37a2b69/;andrefaraujo;", "or_profile": "~Nikolaos-Antonios_Ypsilantis1;~Kaifeng_Chen2;~Andre_Araujo1;~Ondrej_Chum1", "aff": "Czech Technical University in Prague;Google;Google Research;Czech Technical Univeresity in Prague, Czech Technical University of Prague", "aff_domain": "fel.cvut.cz;google.com;google.com;fel.cvut.cz", "position": "PhD student;Researcher;Researcher;Full Professor", "bibtex": "@inproceedings{\nypsilantis2024udon,\ntitle={{UDON}: Universal Dynamic Online distillatioN for generic image representations},\nauthor={Nikolaos-Antonios Ypsilantis and Kaifeng Chen and Andre Araujo and Ondrej Chum},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=iQUxHrCna0}\n}", "github": "", "reviewers": "uq4A;Mmnc;xwWg;CCei", "pdf_size": 4938573, "rating": "3;5;6;6", "confidence": "4;3;3;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;2;4", "wc_summary": "75;33;65;97", "wc_strengths": "58;20;49;78", "wc_weaknesses": "174;12;281;136", "wc_questions": "1;26;2;124", "wc_limitations": "1;3;2;1", "wc_review": "309;94;399;436", "wc_reply_reviewers": "47;0;19;26", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 67.5, 23.038012067016545 ], "wc_strengths_avg": [ 51.25, 20.873128658636684 ], "wc_weaknesses_avg": [ 150.75, 96.14409758274296 ], "wc_questions_avg": [ 38.25, 50.509281325316834 ], "wc_limitations_avg": [ 1.75, 0.82915619758885 ], "wc_review_avg": [ 309.5, 132.71492003539015 ], "wc_reply_reviewers_avg": [ 23.0, 16.80773631397161 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.40824829046386296, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3259460302053208215&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 4, "email": "fel.cvut.cz;google.com;google.com;fel.cvut.cz", "author_num": 4, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Czech Technical University;Google;Czech Technical University in Prague", "aff_unique_dep": ";Google;", "aff_unique_url": "https://www.ctu.cz;https://www.google.com;https://www.ctu.cz", "aff_unique_abbr": "CTU;Google;CTU", "aff_campus_unique_index": "0;1;1;0", "aff_campus_unique": "Prague;Mountain View", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "Czech Republic;United States" }, { "id": "iRHxp1ibFj", "title": "Approximate Size Targets Are Sufficient for Accurate Semantic Segmentation", "track": "main", "status": "Reject", "tldr": "", "abstract": "We propose a new general form of image-level supervision for semantic segmentation based on approximate targets for the relative size of segments. At each training image, such targets are represented by a categorical distribution for the \"expected\" average prediction over the image pixels. We motivate the zero-avoiding variant of KL divergence as a general training loss for any segmentation architecture leading to quality on par with the full pixel-level supervision. However, our image-level supervision is significantly less expensive, it needs to know only an approximate fraction of an image occupied by each class. Such estimates are easy for a human annotator compared to pixel-accurate labeling. Our loss shows significant robustness to size target errors, which may even improve the generalization quality. The proposed size targets can be seen as an extension of the standard class tags, which correspond to non-zero size targets in each image. Using only a minimal amount of extra information, our supervision improves and simplifies the training. It works on standard segmentation architectures as is, unlike tag-based methods requiring complex specialized modifications and multi-stage training.", "keywords": "Cardinality of segments;KL divergence", "primary_area": "machine_vision", "supplementary_material": "", "author": "Xingye Fan;Zhongwen Zhang;Yuri Boykov", "authorids": "~Xingye_Fan1;~Zhongwen_Zhang1;~Yuri_Boykov1", "gender": ";M;M", "homepage": ";;https://cs.uwaterloo.ca/~yboykov/", "dblp": ";02/10655;b/YuriBoykov", "google_scholar": ";;h6_PdYsAAAAJ", "orcid": ";;0000-0001-6374-1736", "linkedin": ";;", "or_profile": "~Xingye_Fan1;~Zhongwen_Zhang1;~Yuri_Boykov1", "aff": ";University of Waterloo;University of Waterloo", "aff_domain": ";uwaterloo.ca;uwaterloo.ca", "position": ";PhD student;Professor", "bibtex": "@misc{\nanonymous2024approximate,\ntitle={Approximate Size Targets Are Sufficient for Accurate Semantic Segmentation},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=iRHxp1ibFj}\n}", "github": "", "project": "", "reviewers": "GY7b;AEBs;FTCz;thaL", "site": "https://openreview.net/forum?id=iRHxp1ibFj", "pdf_size": 6422470, "rating": "4;5;5;6", "confidence": "5;2;4;5", "soundness": "2;2;2;3", "novelty": "2;2;3;3", "presentation": "2;2;2;3", "wc_summary": "74;70;93;44", "wc_strengths": "24;64;91;68", "wc_weaknesses": "281;58;77;150", "wc_questions": "7;48;3;25", "wc_limitations": "18;58;9;4", "wc_review": "404;298;273;291", "wc_reply_reviewers": "192;0;0;67", "wc_reply_authors": "1135;0;0;533", "reply_reviewers": "1;0;0;1", "reply_authors": "3;1;1;2", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 1.224744871391589 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 70.25, 17.469616481193857 ], "wc_strengths_avg": [ 61.75, 24.107830678018296 ], "wc_weaknesses_avg": [ 141.5, 87.5571242104262 ], "wc_questions_avg": [ 20.75, 17.781661902083282 ], "wc_limitations_avg": [ 22.25, 21.241174637952582 ], "wc_review_avg": [ 316.5, 51.33468612936092 ], "wc_reply_reviewers_avg": [ 64.75, 78.39443538925451 ], "wc_reply_authors_avg": [ 417.0, 468.1767828502392 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:vw3YrLfX1cIJ:scholar.google.com/&scioq=Approximate+Size+Targets+Are+Sufficient+for+Accurate+Semantic+Segmentation&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "University of Waterloo", "aff_unique_dep": "", "aff_unique_url": "https://uwaterloo.ca", "aff_unique_abbr": "UW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Canada" }, { "title": "SubgDiff: A Subgraph Diffusion Model to Improve Molecular Representation Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94004", "id": "iSMTo0toDO", "proceeding": "", "pdf": "https://openreview.net/pdf?id=iSMTo0toDO", "openreview": "https://openreview.net/forum?id=iSMTo0toDO", "poster": "/media/PosterPDFs/NeurIPS%202024/94004.png?t=1731396085.9842536", "project": "", "author_site": "JIYING ZHANG, Zijing Liu, Yu Wang, Bin Feng, Yu Li", "tldr": "", "abstract": "Molecular representation learning has shown great success in advancing AI-based drug discovery. A key insight of many recent works is that the 3D geometric structure of molecules provides essential information about their physicochemical properties. Recently, denoising diffusion probabilistic models have achieved impressive performance in molecular 3D conformation generation. However, most existing molecular diffusion models treat each atom as an independent entity, overlooking the dependency among atoms within the substructures. This paper introduces a novel approach that enhances molecular representation learning by incorporating substructural information in the diffusion model framework. We propose a novel diffusion model termed SubgDiff for involving the molecular subgraph information in diffusion. Specifically, SubgDiff adopts three vital techniques: i) subgraph prediction, ii) expectation state, and iii) k-step same subgraph diffusion, to enhance the perception of molecular substructure in the denoising network. Experiments on extensive downstream tasks, especially the molecular force predictions, demonstrate the superior performance of our approach.", "keywords": "Diffusion model;Subgraph;Molecular representation learning", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Jiying Zhang;Zijing Liu;Yu Wang;Bin Feng;Yu Li", "authorids": "~Jiying_Zhang1;~Zijing_Liu1;~Yu_Wang43;~Bin_Feng4;~Yu_Li4", "gender": "M;;F;M;M", "homepage": "https://youjibiying.github.io/;https://github.com/zj-liu;https://github.com/rain305f;https://github.com/BFeng14;https://yu-li.github.io/", "dblp": "287/9432;205/3211;;;34/2997-3", "google_scholar": "j90eZ0MAAAAJ;;lzsu-5MAAAAJ;;j9lwU7kAAAAJ", "orcid": ";0000-0002-0189-7409;;;", "linkedin": ";;;;", "or_profile": "~Jiying_Zhang1;~Zijing_Liu1;~Yu_Wang43;~Bin_Feng4;~Yu_Li4", "aff": "IDEA;International Digital Economy Academy;Peking University;International Digital Economy Academy, International Digital Economy Academy;International Digital Economy Academy", "aff_domain": "idea.edu.cn;idea.edu.cn;pku.edu.cn;idea.edu.cn;idea.edu.cn", "position": "Researcher;Researcher;MS student;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nzhang2024subgdiff,\ntitle={SubgDiff: A Subgraph Diffusion Model to Improve Molecular Representation Learning},\nauthor={Jiying Zhang and Zijing Liu and Yu Wang and Bin Feng and Yu Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=iSMTo0toDO}\n}", "github": "", "reviewers": "aMXv;VBzc;BpDA;Djew", "pdf_size": 5097046, "rating": "4;5;5;6", "confidence": "4;3;3;4", "soundness": "2;2;2;3", "novelty": "3;3;2;3", "presentation": "2;2;3;3", "wc_summary": "52;37;52;90", "wc_strengths": "7;22;43;29", "wc_weaknesses": "53;91;290;73", "wc_questions": "46;4;2;154", "wc_limitations": "1;1;1;33", "wc_review": "159;155;388;379", "wc_reply_reviewers": "0;0;43;27", "wc_reply_authors": "52;52;52;30", "reply_reviewers": "0;0;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 57.75, 19.60070151805797 ], "wc_strengths_avg": [ 25.25, 12.968712349342937 ], "wc_weaknesses_avg": [ 126.75, 95.20602659495879 ], "wc_questions_avg": [ 51.5, 61.731272463800714 ], "wc_limitations_avg": [ 9.0, 13.856406460551018 ], "wc_review_avg": [ 270.25, 113.3035193628159 ], "wc_reply_reviewers_avg": [ 17.5, 18.39157415774952 ], "wc_reply_authors_avg": [ 46.5, 9.526279441628825 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=339696470561768881&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 4, "email": "idea.edu.cn;idea.edu.cn;pku.edu.cn;idea.edu.cn;idea.edu.cn", "author_num": 5, "aff_unique_index": "0;1;2;1;1", "aff_unique_norm": "Institute of Electrical and Electronics Engineers;International Digital Economy Academy;Peking University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ieee.org;;http://www.pku.edu.cn", "aff_unique_abbr": "IEEE;;Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;2", "aff_country_unique": "United States;;China" }, { "title": "Learn To be Efficient: Build Structured Sparsity in Large Language Models", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94003", "id": "iSfCWhvEGA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=iSfCWhvEGA", "openreview": "https://openreview.net/forum?id=iSfCWhvEGA", "poster": "/media/PosterPDFs/NeurIPS%202024/94003.png?t=1733796951.599831", "project": "", "author_site": "Haizhong Zheng, Xiaoyan Bai, Xueshen Liu, Zhuoqing Morley Mao, Beidi Chen, Fan Lai, Atul Prakash", "tldr": "", "abstract": "Large Language Models (LLMs) have achieved remarkable success with their billion-level parameters, yet they incur high inference overheads. The emergence of activation sparsity in LLMs provides a natural approach to reduce this cost by involving only parts of the parameters for inference. However, existing methods only focus on utilizing this naturally formed activation sparsity in a post-training setting, overlooking the potential for further amplifying this inherent sparsity. In this paper, we hypothesize that LLMs can learn to be efficient by achieving more structured activation sparsity. To achieve this, we introduce a novel training algorithm, Learn-To-be-Efficient (LTE), designed to train efficiency-aware LLMs to learn to activate fewer neurons and achieve a better trade-off between sparsity and performance. Furthermore, unlike SOTA MoEfication methods, which mainly focus on ReLU-based models, LTE can also be applied to LLMs like LLaMA using non-ReLU activations. Extensive evaluation on language understanding, language generation, and instruction tuning tasks show that LTE consistently outperforms SOTA baselines. Along with our hardware-aware custom kernel implementation, LTE reduces LLaMA2-7B inference latency by 25% at 50% sparsity.", "keywords": "LLM inference efficiency;Moefication;Contexual Sparsity", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/74a24bcc97b701b3719f9f18e757e9218889ee44.zip", "author": "Haizhong Zheng;Xiaoyan Bai;Xueshen Liu;Zhuoqing Mao;Beidi Chen;Fan Lai;Atul Prakash", "authorids": "~Haizhong_Zheng1;~Xiaoyan_Bai1;~Xueshen_Liu1;~Zhuoqing_Mao1;~Beidi_Chen1;~Fan_Lai1;~Atul_Prakash1", "gender": "M;F;M;F;F;;", "homepage": "http://zhenghaizhong.com/;https://elena-baixy.github.io/;https://xenshinu.github.io/;https://web.eecs.umich.edu/~zmao/;https://www.andrew.cmu.edu/user/beidic/;https://fanlai.me/;https://www.eecs.umich.edu/~aprakash", "dblp": "158/4817;63/3140;;;192/1339;179/2330;p/AtulPrakash", "google_scholar": "Zx6pKsQAAAAJ;ic3BUhMAAAAJ;3AxUf6QAAAAJ;Ba_Ci9UAAAAJ;;PlWcEMsAAAAJ;kIkHa2IAAAAJ", "orcid": "0000-0003-3723-8701;;0009-0001-0227-7463;;;;0000-0002-4907-3687", "linkedin": "haizhong-zheng-1093a0a7/;elenabai/;xueshen-liu-a75718205/;;;;atul-prakash-8729a44/", "or_profile": "~Haizhong_Zheng1;~Xiaoyan_Bai1;~Xueshen_Liu1;~Zhuoqing_Mao1;~Beidi_Chen1;~Fan_Lai1;~Atul_Prakash1", "aff": "University of Michigan;University of Michigan - Ann Arbor;University of Michigan - Ann Arbor;University of Michigan;Meta Facebook;Google;University of Michigan", "aff_domain": "umich.edu;umich.edu;umich.edu;umich.edu;fb.com;google.com;umich.edu", "position": "PhD student;Undergrad student;PhD student;Professor;Researcher;Researcher;Professor", "bibtex": "@inproceedings{\nzheng2024learn,\ntitle={Learn To be Efficient: Build Structured Sparsity in Large Language Models},\nauthor={Haizhong Zheng and Xiaoyan Bai and Xueshen Liu and Zhuoqing Mao and Beidi Chen and Fan Lai and Atul Prakash},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=iSfCWhvEGA}\n}", "github": "", "reviewers": "zH4X;aQHS;BZ5G;Jye3", "pdf_size": 1776474, "rating": "5;6;7;7", "confidence": "4;4;4;3", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "3;3;3;3", "wc_summary": "60;99;85;36", "wc_strengths": "46;57;46;54", "wc_weaknesses": "272;49;72;69", "wc_questions": "21;31;132;10", "wc_limitations": "19;56;8;6", "wc_review": "418;292;343;175", "wc_reply_reviewers": "27;31;11;19", "wc_reply_authors": "50;73;45;52", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 70.0, 24.093567606313517 ], "wc_strengths_avg": [ 50.75, 4.866980583482946 ], "wc_weaknesses_avg": [ 115.5, 90.78683825313006 ], "wc_questions_avg": [ 48.5, 48.77755631435425 ], "wc_limitations_avg": [ 22.25, 20.104414938017968 ], "wc_review_avg": [ 307.0, 88.41097216974825 ], "wc_reply_reviewers_avg": [ 22.0, 7.681145747868608 ], "wc_reply_authors_avg": [ 55.0, 10.700467279516348 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14826115114835184752&as_sdt=20000005&sciodt=0,21&hl=en", "gs_version_total": 5, "email": "umich.edu;umich.edu;umich.edu;umich.edu;fb.com;google.com;umich.edu", "author_num": 7, "aff_unique_index": "0;0;0;0;1;2;0", "aff_unique_norm": "University of Michigan;Meta;Google", "aff_unique_dep": ";Meta Platforms, Inc.;Google", "aff_unique_url": "https://www.umich.edu;https://meta.com;https://www.google.com", "aff_unique_abbr": "UM;Meta;Google", "aff_campus_unique_index": "1;1;2", "aff_campus_unique": ";Ann Arbor;Mountain View", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Stochastic Concept Bottleneck Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94002", "id": "iSjqTQ5S1f", "proceeding": "", "pdf": "https://openreview.net/pdf?id=iSjqTQ5S1f", "openreview": "https://openreview.net/forum?id=iSjqTQ5S1f", "poster": "/media/PosterPDFs/NeurIPS%202024/94002.png?t=1732293994.9811182", "project": "", "author_site": "Moritz Vandenhirtz, Sonia Laguna, Ri\u010dards Marcinkevi\u010ds, Julia Vogt", "tldr": "", "abstract": "Concept Bottleneck Models (CBMs) have emerged as a promising interpretable method whose final prediction is based on intermediate, human-understandable concepts rather than the raw input. Through time-consuming manual interventions, a user can correct wrongly predicted concept values to enhance the model's downstream performance. We propose *Stochastic Concept Bottleneck Models* (SCBMs), a novel approach that models concept dependencies. In SCBMs, a single-concept intervention affects all correlated concepts, thereby improving intervention effectiveness. Unlike previous approaches that model the concept relations via an autoregressive structure, we introduce an explicit, distributional parameterization that allows SCBMs to retain the CBMs' efficient training and inference procedure. \nAdditionally, we leverage the parameterization to derive an effective intervention strategy based on the confidence region. We show empirically on synthetic tabular and natural image datasets that our approach improves intervention effectiveness significantly. Notably, we showcase the versatility and usability of SCBMs by examining a setting with CLIP-inferred concepts, alleviating the need for manual concept annotations.", "keywords": "Concept Bottleneck Models;Interventions;Interpretability;Concepts", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Moritz Vandenhirtz;Sonia Laguna;Ri\u010dards Marcinkevi\u010ds;Julia E Vogt", "authorids": "~Moritz_Vandenhirtz1;~Sonia_Laguna1;~Ri\u010dards_Marcinkevi\u010ds1;~Julia_E_Vogt1", "gender": "M;F;F;M", "homepage": ";https://mds.inf.ethz.ch/team/detail/sonia-laguna;http://mds.inf.ethz.ch;https://rmarcinkevics.github.io/", "dblp": ";313/3156;13/8412;234/8553", "google_scholar": "H2cG0BwAAAAJ;PljVnCQAAAAJ;UoeV-8kAAAAJ;https://scholar.google.ch/citations?user=XcxXOJsAAAAJ", "orcid": ";0000-0003-3504-2051;;0000-0001-8901-5062", "linkedin": "moritz-simon-vandenhirtz-488b0b16b/;;julia-vogt-50b53895;ri%C4%8Dards-m-668568106?lipi=urn%3Ali%3Apage%3Ad_flagship3_profile_view_base_contact_details%3Byeq5%2FsReRoWG3HN7r6A5Lw%3D%3D", "or_profile": "~Moritz_Vandenhirtz1;~Sonia_Laguna1;~Julia_E_Vogt1;~Ricards_Marcinkevics1", "aff": "ETHZ - ETH Zurich;Department of Computer Science, ETHZ - ETH Zurich;Swiss Federal Institute of Technology;Swiss Federal Institute of Technology", "aff_domain": "ethz.ch;inf.ethz.ch;ethz.ch;inf.ethz.ch", "position": "PhD student;PhD student;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nvandenhirtz2024stochastic,\ntitle={Stochastic Concept Bottleneck Models},\nauthor={Moritz Vandenhirtz and Sonia Laguna and Ri{\\v{c}}ards Marcinkevi{\\v{c}}s and Julia E Vogt},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=iSjqTQ5S1f}\n}", "github": "", "reviewers": "9xQm;sfUo;rLqE", "pdf_size": 2034754, "rating": "5;5;7", "confidence": "4;4;3", "soundness": "3;2;4", "novelty": "2;3;4", "presentation": "3;3;3", "wc_summary": "37;62;121", "wc_strengths": "21;25;103", "wc_weaknesses": "19;408;96", "wc_questions": "121;5;63", "wc_limitations": "4;18;56", "wc_review": "202;518;439", "wc_reply_reviewers": "9;111;18", "wc_reply_authors": "39;39;39", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 5.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 73.33333333333333, 35.216788925485844 ], "wc_strengths_avg": [ 49.666666666666664, 37.74770044504551 ], "wc_weaknesses_avg": [ 174.33333333333334, 168.1910289588069 ], "wc_questions_avg": [ 63.0, 47.35680169380811 ], "wc_limitations_avg": [ 26.0, 21.96967607104544 ], "wc_review_avg": [ 386.3333333333333, 134.27418052461002 ], "wc_reply_reviewers_avg": [ 46.0, 46.10856753359401 ], "wc_reply_authors_avg": [ 39.0, 0.0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.9999999999999998, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11772638938481831897&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "ethz.ch;inf.ethz.ch;ethz.ch;inf.ethz.ch", "author_num": 4, "aff_unique_index": "0;0;1;1", "aff_unique_norm": "ETH Zurich;Swiss Federal Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.ethz.ch;https://www.ethz.ch", "aff_unique_abbr": "ETHZ;ETH Zurich", "aff_campus_unique_index": "1", "aff_campus_unique": ";Zurich", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Switzerland" }, { "title": "Embodied Agent Interface: Benchmarking LLMs for Embodied Decision Making", "status": "Oral", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97552", "id": "iSwK1YqO7v", "proceeding": "", "pdf": "https://openreview.net/pdf?id=iSwK1YqO7v", "openreview": "https://openreview.net/forum?id=iSwK1YqO7v", "poster": "/media/PosterPDFs/NeurIPS%202024/97552.png?t=1733762523.9838076", "project": "", "author_site": "Manling Li, Shiyu Zhao, Qineng Wang, Kangrui Wang, Yu Zhou, Sanjana Srivastava, Cem Gokmen, Tony Lee, Erran Li Li, Ruohan Zhang, Weiyu Liu, Percy Liang, Li Fei-Fei, Jiayuan Mao, Jiajun Wu", "tldr": "", "abstract": "We aim to evaluate Large Language Models (LLMs) for embodied decision making. While a significant body of work has been leveraging LLMs for decision making in embodied environments, we still lack a systematic understanding of their performance because they are usually applied in different domains, for different purposes, and built based on different inputs and outputs. Furthermore, existing evaluations tend to rely solely on a final success rate, making it difficult to pinpoint what ability is missing in LLMs and where the problem lies, which in turn blocks embodied agents from leveraging LLMs effectively and selectively. To address these limitations, we propose a generalized interface (Embodied Agent Interface) that supports the formalization of various types of tasks and input-output specifications of LLM-based modules. Specifically, it allows us to unify 1) a broad set of embodied decision-making tasks involving both state and temporally extended goals, 2) four commonly-used LLM-based modules for decision making: goal interpretation, subgoal decomposition, action sequencing, and transition modeling, and 3) a collection of fine-grained metrics that break down evaluation into error types, such as hallucination errors, affordance errors, and various types of planning errors. Overall, our benchmark offers a comprehensive assessment of LLMs\u2019 performance for different subtasks, pinpointing the strengths and weaknesses in LLM-powered embodied AI systems and providing insights into the effective and selective use of LLMs in embodied decision making.", "keywords": "Embodied Agent;LLMs;Embodied Decision Making;Physical State Change", "primary_area": "", "supplementary_material": "", "author": "Manling Li;Shiyu Zhao;Qineng Wang;Kangrui Wang;Yu Zhou;Sanjana Srivastava;Cem Gokmen;Tony Lee;Li Erran Li;Ruohan Zhang;Weiyu Liu;Percy Liang;Li Fei-Fei;Jiayuan Mao;Jiajun Wu", "authorids": "~Manling_Li1;~Shiyu_Zhao5;~Qineng_Wang1;~Kangrui_Wang2;~Yu_Zhou20;~Sanjana_Srivastava2;~Cem_Gokmen1;~Tony_Lee1;~Li_Erran_Li1;~Ruohan_Zhang1;~Weiyu_Liu1;~Percy_Liang1;~Li_Fei-Fei1;~Jiayuan_Mao1;~Jiajun_Wu1", "gender": "F;M;M;M;M;;M;M;;M;M;;F;F;M", "homepage": "https://limanling.github.io/;https://shiyu-zhao.netlify.app/;https://qinengwang-aiden.github.io/;https://jameskrw.github.io/;https://yu-bryan-zhou.github.io/;;https://www.cemgokmen.com;;http://www.cs.columbia.edu/~lierranli/;https://ai.stanford.edu/~zharu/;http://weiyuliu.com/;https://cs.stanford.edu/~pliang/;https://profiles.stanford.edu/fei-fei-li;http://jiayuanm.com;https://jiajunwu.com", "dblp": "178/3620;;360/6561;216/9159;36/2728-30.html;;220/3187;46/4265;l/ErranLLi.html;;133/0311.html;04/1701;79/2528;200/8283;117/4768", "google_scholar": "6U4SXnUAAAAJ;;;;61rJc-YAAAAJ;sqTh_dwAAAAJ;wCiI8oUAAAAJ;OYNdx48AAAAJ;GkMfzy4AAAAJ;-bqvNWoAAAAJ;PHi0YEQAAAAJ;pouyVyUAAAAJ;rDfyQnIAAAAJ;-xaOIZIAAAAJ;2efgcS0AAAAJ", "orcid": ";;;;0000-0002-2593-9167;;0000-0001-9446-6052;;;;;;;0000-0003-4798-3748;0000-0002-4176-343X", "linkedin": ";shiyu-zhao-1124a0266/;;wang-kangrui-8b9a37257/;yu-zhou-997359178/;sanjana-srivastava5/;cgokmen/;tonyhlee/;;;;;fei-fei-li-4541247/;;jiajunwu/", "or_profile": "~Manling_Li1;~Shiyu_Zhao5;~Qineng_Wang1;~Kangrui_Wang2;~Yu_Zhou20;~Sanjana_Srivastava2;~Cem_Gokmen1;~Tony_Lee1;~Li_Erran_Li1;~Ruohan_Zhang1;~Weiyu_Liu1;~Percy_Liang1;~Li_Fei-Fei1;~Jiayuan_Mao1;~Jiajun_Wu1", "aff": "Stanford University;Stanford University;Zhejiang University;Northwestern University;Stanford University;Stanford University;Stanford University;Stanford University;Columbia University;Stanford University;Stanford University;Stanford University;Stanford University;Massachusetts Institute of Technology;Stanford University", "aff_domain": "stanford.edu;stanford.edu;zju.edu.cn;northwestern.edu;cs.stanford.edu;stanford.edu;stanford.edu;stanford.edu;columbia.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu;mit.edu;stanford.edu", "position": "Postdoc;MS student;Undergrad student;PhD student;Research Intern;PhD student;PhD student;Researcher;Adjunct Professor;Postdoc;Postdoc;Associate Professor;Full Professor;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nli2024embodied,\ntitle={Embodied Agent Interface: Benchmarking {LLM}s for Embodied Decision Making},\nauthor={Manling Li and Shiyu Zhao and Qineng Wang and Kangrui Wang and Yu Zhou and Sanjana Srivastava and Cem Gokmen and Tony Lee and Li Erran Li and Ruohan Zhang and Weiyu Liu and Percy Liang and Li Fei-Fei and Jiayuan Mao and Jiajun Wu},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=iSwK1YqO7v}\n}", "github": "", "reviewers": "tF6x;hSVb;yhYD;1xw9", "pdf_size": 25309611, "rating": "7;8;8;9", "confidence": "4;4;4;4", "wc_summary_and_contributions": "160;64;117;97", "wc_strengths": "142;78;66;97", "wc_improvement": "454;158;22;104", "wc_limitations": "9;17;16;17", "wc_correctness": "7;1;19;1", "wc_clarity": "4;1;17;4", "wc_relation_to_prior_work": "1;1;17;12", "wc_documentation": "15;6;27;4", "wc_additional_feedback": "1;1;1;1", "wc_review": "793;327;302;337", "wc_reply_reviewers": "13;37;0;0", "wc_reply_authors": "20;49;52;52", "reply_reviewers": "1;1;0;0", "reply_authors": "4;3;2;2", "rating_avg": [ 8.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.0 ], "wc_summary_and_contributions_avg": [ 109.5, 34.7598906787694 ], "wc_strengths_avg": [ 95.75, 28.899610724021873 ], "wc_improvement_avg": [ 184.5, 162.95628248091572 ], "wc_limitations_avg": [ 14.75, 3.344772040064913 ], "wc_correctness_avg": [ 7.0, 7.3484692283495345 ], "wc_clarity_avg": [ 6.5, 6.18465843842649 ], "wc_relation_to_prior_work_avg": [ 7.75, 6.977642868476432 ], "wc_documentation_avg": [ 13.0, 9.082951062292475 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 439.75, 204.3469782012937 ], "wc_reply_reviewers_avg": [ 12.5, 15.107944929738128 ], "wc_reply_authors_avg": [ 43.25, 13.47915056670857 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 15, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 33, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4195683906488898012&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "stanford.edu;stanford.edu;zju.edu.cn;northwestern.edu;cs.stanford.edu;stanford.edu;stanford.edu;stanford.edu;columbia.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu;mit.edu;stanford.edu", "author_num": 15, "aff_unique_index": "0;0;1;2;0;0;0;0;3;0;0;0;0;4;0", "aff_unique_norm": "Stanford University;Zhejiang University;Northwestern University;Columbia University;Massachusetts Institute of Technology", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.stanford.edu;https://www.zju.edu.cn;https://www.northwestern.edu;https://www.columbia.edu;https://web.mit.edu", "aff_unique_abbr": "Stanford;ZJU;NU;Columbia;MIT", "aff_campus_unique_index": "0;0;0;0;0;0;0;0;0;0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;1;0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States;China" }, { "id": "iTUlYblV0K", "title": "MQuAKE-Remastered: Multi-Hop Knowledge Editing Can Only Be Advanced With Reliable Evaluations", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "Large language models (LLMs) can give out erroneous answers to factually rooted questions either as a result of undesired training outcomes or simply because the world has moved on after a certain knowledge cutoff date. Under such scenarios, *knowledge editing* often comes to the rescue by delivering efficient patches for such erroneous answers without significantly altering the rests, where many editing methods have seen reasonable success when the editing targets are simple and direct (e.g., *\"what club does Lionel Messi currently play for?\"*).\n\nHowever, knowledge fragments like this are often deeply intertwined in the real world, making effectively propagating the editing effect to non-directly related questions a practical challenge (e.g., *\"who is the offspring of the owner of the club that Messi currently plays for?\"*). Prior arts have coined this task as *multi-hop knowledge editing* with the most popular dataset being MQuAKE, serving as the sole evaluation benchmark for many later proposed editing methods due to the expensive nature of making knowledge editing datasets at scale. \n\nIn this work, we reveal that **up to 33\\% or 76\\% of MQuAKE's questions and ground truth labels are, in fact, corrupted in various fashions due to some unintentional clerical or procedural oversights**. Our work provides a detailed audit of MQuAKE's error pattern and a comprehensive fix without sacrificing its dataset capacity. Additionally, we benchmarked almost all proposed \\mquake{}-evaluated editing methods on our post-fix dataset, \\mquaker{}. It is our observation that many methods try to overfit the original \\mquake{} by exploiting some data-specific properties of \\mquake{}. We provide a guideline on how to faithfully approach such datasets and show that a simple, minimally invasive approach can bring excellent editing performance without such exploitation. Please refer to [`https://github.com/henryzhongsc/MQuAKE-Remastered`](https://github.com/henryzhongsc/MQuAKE-Remastered) and supplemental material for assets.", "keywords": "knowledge edit;model edit;multi-hop;question answering;natural language processing;dataset audit", "primary_area": "", "supplementary_material": "/attachment/b4f87abe5315f69aa9fa5cbdedb6f60c85167924.zip", "author": "Shaochen Zhong;Yifan Lu;Lize Shao;Bhargav Bhushanam;Xiaocong Du;Louis Feng;Yixin Wan;Yucheng Shi;Daochen Zha;Yiwei Wang;Ninghao Liu;Kaixiong Zhou;Shuai Xu;Vipin Chaudhary;Xia Hu", "authorids": "~Shaochen_Zhong1;~Yifan_Lu8;~Lize_Shao1;~Bhargav_Bhushanam1;~Xiaocong_Du2;~Louis_Feng1;~Yixin_Wan1;~Yucheng_Shi2;~Daochen_Zha1;~Yiwei_Wang2;~Ninghao_Liu2;~Kaixiong_Zhou1;~Shuai_Xu2;~Vipin_Chaudhary2;~Xia_Hu4", "gender": "M;M;;M;F;M;F;M;;M;;M;M;M;", "homepage": "https://openreview.net/profile?id=~Shaochen_Zhong1;;;;;;https://scholar.google.com/citations?user=hZPIICQAAAAJ&hl=en;https://sycny.github.io/;http://dczha.com/;;;https://kaixiong-zhou.github.io/;https://engineering.case.edu/profiles/sxx214;https://engineering.case.edu/profiles/vxc204;", "dblp": "326/7286.html;;;292/2704;;;320/5376;;167/0903;50/5889-1;;178/7315;;c/VipinChaudhary.html;", "google_scholar": "https://scholar.google.com/citations?hl=en;TVqeW7gAAAAJ;;nYaiiJQAAAAJ;wctRExYAAAAJ;IcwWyQEAAAAJ;hZPIICQAAAAJ;https://scholar.google.co.uk/citations?hl=en;jK0NgMcAAAAJ;https://scholar.google.com.hk/citations?user=Sh9QvBkAAAAJ;;zMspIjIAAAAJ;wu-vtI4AAAAJ;vJbjqpIAAAAJ;", "orcid": ";;;;;;;0009-0007-4192-1315;0000-0002-6677-7504;;;0000-0001-5226-8736;;0000-0001-9672-6225;", "linkedin": "shaochen-henry-zhong-96a941249/;;;;;louisfeng/;elaine-yixin-wan-8032b8136/;;daochen-zha;;;;;vipin-chaudhary-379529/;", "or_profile": "~Shaochen_Zhong1;~Yifan_Lu8;~Lize_Shao1;~Bhargav_Bhushanam1;~Xiaocong_Du2;~Louis_Feng1;~Yixin_Wan1;~Yucheng_Shi2;~Daochen_Zha1;~Yiwei_Wang2;~Ninghao_Liu2;~Kaixiong_Zhou1;~Shuai_Xu2;~Vipin_Chaudhary2;~Xia_Hu4", "aff": "Rice University;Rice University;;Meta Facebook;;;University of California, Los Angeles;University of Georgia;Airbnb;UCLA Computer Science Department, University of California, Los Angeles;;Massachusetts Institute of Technology;Case Western Reserve University;Case Western Reserve University;", "aff_domain": "rice.edu;rice.edu;;fb.com;;;ucla.edu;uga.edu;airbnb.com;cs.ucla.edu;;mit.edu;case.edu;case.edu;", "position": "PhD student;Undergrad student;;Researcher;;;PhD student;PhD student;Researcher;Postdoc;;Postdoc;Assistant Professor;Full Professor;", "bibtex": "@misc{\nanonymous2024mquakeremastered,\ntitle={{MQ}u{AKE}-Remastered: Multi-Hop Knowledge Editing Can Only Be Advanced With Reliable Evaluations},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=iTUlYblV0K}\n}", "github": "", "project": "", "reviewers": "wp4u;LCmw;ujS8;jSuo", "site": "https://openreview.net/forum?id=iTUlYblV0K", "pdf_size": 297620, "rating": "3;4;6;8", "confidence": "4;4;4;4", "wc_summary_and_contributions": "87;94;58;104", "wc_strengths": "150;34;6;122", "wc_improvement": "755;5;6;50", "wc_limitations": "1;5;1;1", "wc_correctness": "1;5;1;1", "wc_clarity": "1;5;9;1", "wc_relation_to_prior_work": "10;34;1;1", "wc_documentation": "1;5;9;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "1007;188;92;282", "wc_reply_reviewers": "648;0;0;19", "wc_reply_authors": "4063;1188;668;2514", "reply_reviewers": "8;0;0;1", "reply_authors": "10;5;3;6", "rating_avg": [ 5.25, 1.920286436967152 ], "confidence_avg": [ 4.0, 0.0 ], "wc_summary_and_contributions_avg": [ 85.75, 17.122718826167766 ], "wc_strengths_avg": [ 78.0, 59.665735560705194 ], "wc_improvement_avg": [ 204.0, 318.63850991366377 ], "wc_limitations_avg": [ 2.0, 1.7320508075688772 ], "wc_correctness_avg": [ 2.0, 1.7320508075688772 ], "wc_clarity_avg": [ 4.0, 3.3166247903554 ], "wc_relation_to_prior_work_avg": [ 11.5, 13.5 ], "wc_documentation_avg": [ 4.0, 3.3166247903554 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 392.25, 361.22733492912744 ], "wc_reply_reviewers_avg": [ 166.75, 277.95806788075066 ], "wc_reply_authors_avg": [ 2108.25, 1314.0453521473298 ], "reply_reviewers_avg": [ 2.25, 3.344772040064913 ], "reply_authors_avg": [ 6.0, 2.5495097567963922 ], "replies_avg": [ 38, 0 ], "authors#_avg": [ 15, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:nlzD_xCaI-4J:scholar.google.com/&scioq=MQuAKE-Remastered:+Multi-Hop+Knowledge+Editing+Can+Only+Be+Advanced+With+Reliable+Evaluations&hl=en&as_sdt=0,31", "gs_version_total": 0, "aff_unique_index": "0;0;1;2;3;4;2;5;6;6", "aff_unique_norm": "Rice University;Meta;University of California, Los Angeles;University of Georgia;Airbnb;Massachusetts Institute of Technology;Case Western Reserve University", "aff_unique_dep": ";Meta Platforms, Inc.;;;;;", "aff_unique_url": "https://www.rice.edu;https://meta.com;https://www.ucla.edu;https://www.uga.edu;https://www.airbnb.com;https://web.mit.edu;https://www.case.edu", "aff_unique_abbr": "Rice;Meta;UCLA;UGA;Airbnb;MIT;CWRU", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "STimage-1K4M: A histopathology image-gene expression dataset for spatial transcriptomics", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97551", "id": "iTyOWtcCU2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=iTyOWtcCU2", "openreview": "https://openreview.net/forum?id=iTyOWtcCU2", "poster": "", "project": "", "author_site": "Jiawen Chen, Muqing Zhou, Wenrong Wu, Jinwei Zhang, Yun Li, Didong Li", "tldr": "", "abstract": "Recent advances in multi-modal algorithms have driven and been driven by the increasing availability of large image-text datasets, leading to significant strides in various fields, including computational pathology. However, in most existing medical image-text datasets, the text typically provides high-level summaries that may not sufficiently describe sub-tile regions within a large pathology image. For example, an image might cover an extensive tissue area containing cancerous and healthy regions, but the accompanying text might only specify that this image is a cancer slide, lacking the nuanced details needed for in-depth analysis. In this study, we introduce STimage-1K4M, a novel dataset designed to bridge this gap by providing genomic features for sub-tile images. STimage-1K4M contains 1,149 images derived from spatial transcriptomics data, which captures gene expression information at the level of individual spatial spots within a pathology image. Specifically, each image in the dataset is broken down into smaller sub-image tiles, with each tile paired with $15,000-30,000$ dimensional gene expressions. With $4,293,195$ pairs of sub-tile images and gene expressions, STimage-1K4M offers unprecedented granularity, paving the way for a wide range of advanced research in multi-modal data analysis an innovative applications in computational pathology, and beyond.", "keywords": "Vision-Language;Dataset;Histopathology;Gene expression;Spatial transcriptomics", "primary_area": "", "supplementary_material": "/attachment/33854171a6e377ed242d41646fcff1d4f521b05b.pdf", "author": "Jiawen Chen;Muqing Zhou;Wenrong Wu;Jinwei Zhang;Yun Li;Didong Li", "authorids": "~Jiawen_Chen4;~Muqing_Zhou1;~Wenrong_Wu1;~Jinwei_Zhang4;~Yun_Li7;~Didong_Li1", "gender": "F;M;M;M;Not Specified;", "homepage": "https://github.com/JiawenChenn;;https://www.linkedin.com/in/wenrong-wu-35;;https://yunliweb.its.unc.edu;https://sites.google.com/view/didongli/", "dblp": "04/6087;;;;;211/6299", "google_scholar": "_5IUKh0AAAAJ;;;;https://scholar.google.com/citations?hl=en;YBVhMxoAAAAJ", "orcid": "0000-0002-6193-534X;0009-0006-7926-7243;;;0000-0002-9275-4189;0000-0001-9146-705X", "linkedin": ";;;jinwei-zhang-b1a02b2b2/;;", "or_profile": "~Jiawen_Chen4;~Muqing_Zhou1;~Wenrong_Wu1;~Jinwei_Zhang4;~Yun_Li7;~Didong_Li1", "aff": "University of North Carolina at Chapel Hill;University of North Carolina at Chapel Hill;University of North Carolina at Chapel Hill;University of North Carolina at Chapel Hill;University of North Carolina at Chapel Hill;University of North Carolina at Chapel Hill", "aff_domain": "unc.edu;unc.edu;ad.unc.edu;unc.edu;unc.edu;unc.edu", "position": "PhD student;PhD student;MS student;MS student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nchen2024stimagekm,\ntitle={{ST}image-1K4M: A histopathology image-gene expression dataset for spatial transcriptomics},\nauthor={Jiawen Chen and Muqing Zhou and Wenrong Wu and Jinwei Zhang and Yun Li and Didong Li},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=iTyOWtcCU2}\n}", "github": "", "reviewers": "8sup;LBXw;8uje;cGjP", "pdf_size": 18898705, "rating": "5;6;7;7", "confidence": "4;2;4;4", "wc_summary_and_contributions": "68;92;30;122", "wc_strengths": "88;51;64;143", "wc_improvement": "65;88;129;77", "wc_limitations": "90;34;22;119", "wc_correctness": "53;37;19;16", "wc_clarity": "21;29;24;8", "wc_relation_to_prior_work": "7;3;9;1", "wc_documentation": "41;3;64;8", "wc_additional_feedback": "1;1;1;1", "wc_review": "434;338;362;495", "wc_reply_reviewers": "28;11;26;68", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;2;3", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "wc_summary_and_contributions_avg": [ 78.0, 33.67491648096547 ], "wc_strengths_avg": [ 86.5, 35.21718330588067 ], "wc_improvement_avg": [ 89.75, 24.076700355322778 ], "wc_limitations_avg": [ 66.25, 39.8269695056001 ], "wc_correctness_avg": [ 31.25, 14.905955185763842 ], "wc_clarity_avg": [ 20.5, 7.762087348130012 ], "wc_relation_to_prior_work_avg": [ 5.0, 3.1622776601683795 ], "wc_documentation_avg": [ 29.0, 24.92990172463582 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 407.25, 61.76315649317156 ], "wc_reply_reviewers_avg": [ 33.25, 21.111312133545844 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2483257642772913876&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "unc.edu;unc.edu;ad.unc.edu;unc.edu;unc.edu;unc.edu", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "University of North Carolina", "aff_unique_dep": "", "aff_unique_url": "https://www.unc.edu", "aff_unique_abbr": "UNC", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Chapel Hill", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Induced Model Matching: Restricted Models Help Train Full-Featured Models", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94001", "id": "iW0wXE0VyR", "proceeding": "", "pdf": "https://openreview.net/pdf?id=iW0wXE0VyR", "openreview": "https://openreview.net/forum?id=iW0wXE0VyR", "poster": "/media/PosterPDFs/NeurIPS%202024/94001.png?t=1733704136.779684", "project": "", "author_site": "Usama Muneeb, Mesrob I Ohannessian", "tldr": "", "abstract": "We consider scenarios where a very accurate (often small) predictive model using restricted features is available when training a full-featured (often larger) model. This restricted model may be thought of as ``side-information'', and can come either from an auxiliary dataset or from the same dataset by forcing the restriction. How can the restricted model be useful to the full model? To answer this, we introduce a methodology called Induced Model Matching (IMM). IMM aligns the context-restricted, or induced, version of the large model with the restricted model. We relate IMM to approaches such as noising, which is implicit in addressing the problem, and reverse knowledge distillation from weak teachers, which is explicit but does not exploit restriction being the nature of the weakness. We show that these prior methods can be thought of as approximations to IMM and can be problematic in terms of consistency. Experimentally, we first motivate IMM using logistic regression as a toy example. We then explore it in language modeling, the application that initially inspired it, and demonstrate it on both LSTM and transformer full models, using bigrams as restricted models. We lastly give a simple RL example, which shows that POMDP policies can help learn better MDP policies. The IMM principle is thus generally applicable in common scenarios where restricted data is cheaper to collect or restricted models are easier to learn.", "keywords": "restricted;model;learning", "primary_area": "other", "supplementary_material": "", "author": "Usama Muneeb;Mesrob I Ohannessian", "authorids": "~Usama_Muneeb1;~Mesrob_I_Ohannessian1", "gender": "M;M", "homepage": "https://usamamuneeb.github.io;https://sites.google.com/site/mesrob/", "dblp": "241/5125;77/7823", "google_scholar": ";pswXDJgAAAAJ", "orcid": ";0000-0002-6479-9769", "linkedin": ";mesrob-ohannessian/", "or_profile": "~Usama_Muneeb1;~Mesrob_I_Ohannessian1", "aff": "University of Illinois at Chicago;University of Illinois, Chicago", "aff_domain": "uic.edu;uic.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nmuneeb2024induced,\ntitle={Induced Model Matching: Restricted Models Help Train Full-Featured Models},\nauthor={Usama Muneeb and Mesrob I Ohannessian},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=iW0wXE0VyR}\n}", "github": "", "reviewers": "vLY4;ADbo;2S2s;vmaB", "pdf_size": 856208, "rating": "6;6;6;7", "confidence": "4;3;4;3", "soundness": "3;3;3;4", "novelty": "2;2;3;3", "presentation": "4;2;3;4", "wc_summary": "87;111;129;232", "wc_strengths": "55;48;101;35", "wc_weaknesses": "308;135;72;110", "wc_questions": "191;208;165;29", "wc_limitations": "1;2;1;1", "wc_review": "642;504;468;407", "wc_reply_reviewers": "143;40;0;233", "wc_reply_authors": "277;143;133;1606", "reply_reviewers": "1;1;0;2", "reply_authors": "2;2;2;4", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 139.75, 55.30540208695711 ], "wc_strengths_avg": [ 59.75, 24.873429598670143 ], "wc_weaknesses_avg": [ 156.25, 90.43886056336623 ], "wc_questions_avg": [ 148.25, 70.5314646097754 ], "wc_limitations_avg": [ 1.25, 0.4330127018922193 ], "wc_review_avg": [ 505.25, 86.23043256298787 ], "wc_reply_reviewers_avg": [ 104.0, 90.93129274347748 ], "wc_reply_authors_avg": [ 539.75, 618.2197728154608 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:zeqnoelSV98J:scholar.google.com/&scioq=Induced+Model+Matching:+Restricted+Models+Help+Train+Full-Featured+Models&hl=en&as_sdt=0,48", "gs_version_total": 2, "email": "uic.edu;uic.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Illinois at Chicago", "aff_unique_dep": "", "aff_unique_url": "https://www.uic.edu", "aff_unique_abbr": "UIC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Chicago", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "APEBench: A Benchmark for Autoregressive Neural Emulators of PDEs", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97550", "id": "iWc0qE116u", "proceeding": "", "pdf": "https://openreview.net/pdf?id=iWc0qE116u", "openreview": "https://openreview.net/forum?id=iWc0qE116u", "poster": "/media/PosterPDFs/NeurIPS%202024/97550.png?t=1732889001.6506903", "project": "", "author_site": "Felix Koehler, Simon Niedermayr, r\u00fcdiger westermann, Nils Thuerey", "tldr": "", "abstract": "We introduce the **A**utoregressive **P**DE **E**mulator Benchmark (APEBench), a comprehensive benchmark suite to evaluate autoregressive neural emulators for solving partial differential equations. APEBench is based on JAX and provides a seamlessly integrated differentiable simulation framework employing efficient pseudo-spectral methods, enabling 46 distinct PDEs across 1D, 2D, and 3D. Facilitating systematic analysis and comparison of learned emulators, we propose a novel taxonomy for unrolled training and introduce a unique identifier for PDE dynamics that directly relates to the stability criteria of classical numerical methods. APEBench enables the evaluation of diverse neural architectures, and unlike existing benchmarks, its tight integration of the solver enables support for differentiable physics training and neural-hybrid emulators. Moreover, APEBench emphasizes rollout metrics to understand temporal generalization, providing insights into the long-term behavior of emulating PDE dynamics. In several experiments, we highlight the similarities between neural emulators and numerical simulators. The code is available at [github.com/tum-pbs/apebench](https://github.com/tum-pbs/apebench) and APEBench can be installed via `pip install apebench`.", "keywords": "PDEs;Neural Operators;Neural Solvers;Numerical Methods;Autoregressive Emulators;Scientific Machine Learning", "primary_area": "", "supplementary_material": "/attachment/ea90ece0be3e15c4a3d19a6eeb9909d0613c93e4.zip", "author": "Felix Koehler;Simon Niedermayr;r\u00fcdiger westermann;Nils Thuerey", "authorids": "~Felix_Koehler1;~Simon_Niedermayr1;~r\u00fcdiger_westermann1;~Nils_Thuerey1", "gender": "M;M;M;M", "homepage": "https://fkoehler.site/;https://campus.tum.de/tumonline/ee/ui/ca2/app/desktop/#/pl/ui/$ctx/visitenkarte.show_vcard?$ctx=design=ca2;header=max;lang=de&pPersonenGruppe=3&pPersonenId=0E06ACAED2418A81;https://www.cs.cit.tum.de/cg/cover-page/;https://ge.in.tum.de", "dblp": "392/3781;366/4088;w/RudigerWestermann;42/478", "google_scholar": "GLNLfF4AAAAJ;7VzeWK4AAAAJ;a8YUuWwAAAAJ;https://scholar.google.com.tw/citations?user=GEehwv8AAAAJ", "orcid": "0009-0007-3479-3079;;0000-0002-3394-0731;", "linkedin": "felix-koehler;;;", "or_profile": "~Felix_Koehler1;~Simon_Niedermayr1;~r\u00fcdiger_westermann1;~Nils_Thuerey1", "aff": "Technische Universit\u00e4t M\u00fcnchen;Technische Universit\u00e4t M\u00fcnchen;Technische Universit\u00e4t M\u00fcnchen;Technical University Munich", "aff_domain": "tum.de;tum.de;tum.de;tum.de", "position": "PhD student;PhD student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nkoehler2024apebench,\ntitle={{APEB}ench: A Benchmark for Autoregressive Neural Emulators of {PDE}s},\nauthor={Felix Koehler and Simon Niedermayr and r{\\\"u}diger westermann and Nils Thuerey},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=iWc0qE116u}\n}", "github": "", "reviewers": "rrDt;oS3v;psus;HkPL", "pdf_size": 4553899, "rating": "6;7;7;9", "confidence": "2;4;3;3", "wc_summary_and_contributions": "55;33;66;96", "wc_strengths": "126;44;36;27", "wc_improvement": "249;47;216;26", "wc_limitations": "121;7;1;33", "wc_correctness": "220;8;2;17", "wc_clarity": "348;10;9;4", "wc_relation_to_prior_work": "119;47;1;17", "wc_documentation": "20;23;1;24", "wc_additional_feedback": "1;1;1;1", "wc_review": "1259;220;333;245", "wc_reply_reviewers": "237;17;0;25", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "4;1;2;1", "rating_avg": [ 7.25, 1.0897247358851685 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 62.5, 22.699118925632334 ], "wc_strengths_avg": [ 58.25, 39.57508686029634 ], "wc_improvement_avg": [ 134.5, 98.9709553353912 ], "wc_limitations_avg": [ 40.5, 48.00781186432058 ], "wc_correctness_avg": [ 61.75, 91.52151386422757 ], "wc_clarity_avg": [ 92.75, 147.38618490211354 ], "wc_relation_to_prior_work_avg": [ 46.0, 45.2658811910251 ], "wc_documentation_avg": [ 17.0, 9.354143466934854 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 514.25, 432.0251005439383 ], "wc_reply_reviewers_avg": [ 69.75, 96.98292375464868 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.3244428422615251, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7272032536467862136&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 4, "email": "tum.de;tum.de;tum.de;tum.de", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Technische Universit\u00e4t M\u00fcnchen;Technical University of Munich", "aff_unique_dep": ";", "aff_unique_url": "https://www.tum.de;https://www.tum.de", "aff_unique_abbr": "TUM;TUM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "title": "Physics-Informed Regularization for Domain-Agnostic Dynamical System Modeling", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/94000", "id": "iWlqbNE8P7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=iWlqbNE8P7", "openreview": "https://openreview.net/forum?id=iWlqbNE8P7", "poster": "", "project": "", "author_site": "Zijie Huang, Wanjia Zhao, Jingdong Gao, Ziniu Hu, Xiao Luo, Yadi Cao, Yuanzhou Chen, Yizhou Sun, Wei Wang", "tldr": "", "abstract": "Learning complex physical dynamics purely from data is challenging due to the intrinsic properties of systems to be satisfied. Incorporating physics-informed priors, such as in Hamiltonian Neural Networks (HNNs), achieves high-precision modeling for energy-conservative systems. However, real-world systems often deviate from strict energy conservation and follow different physical priors. To address this, we present a framework that achieves high-precision modeling for a wide range of dynamical systems from the numerical aspect, by enforcing Time-Reversal Symmetry (TRS) via a novel regularization term. It helps preserve energies for conservative systems while serving as a strong inductive bias for non-conservative, reversible systems. While TRS is a domain-specific physical prior, we present the first theoretical proof that TRS loss can universally improve modeling accuracy by minimizing higher-order Taylor terms in ODE integration, which is numerically beneficial to various systems regardless of their properties, even for irreversible systems. By integrating the TRS loss within neural ordinary differential equation models, the proposed model TREAT demonstrates superior performance on diverse physical systems. It achieves a significant 11.5% MSE improvement in a challenging chaotic triple-pendulum scenario, underscoring TREAT\u2019s broad applicability and effectiveness.", "keywords": "Physics-informed Neural Networks; Dynamical Systems ; Graph Neural Networks", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "/attachment/68b0b5639a7de67172cb97e45c0c570f83823b2f.zip", "author": "Zijie Huang;Wanjia Zhao;Jingdong Gao;Ziniu Hu;Xiao Luo;Yadi Cao;Yuanzhou Chen;Yizhou Sun;Wei Wang", "authorids": "~Zijie_Huang1;~Wanjia_Zhao1;~Jingdong_Gao1;~Ziniu_Hu1;~Xiao_Luo3;~Yadi_Cao1;~Yuanzhou_Chen1;~Yizhou_Sun1;~Wei_Wang13", "gender": "F;F;;M;M;M;M;F;F", "homepage": "https://zijieh.github.io/;https://wanjiazhao1203.github.io/;https://github.com/mxuan0;http://acbull.github.io;http://luoxiao12.github.io;https://eydcao.github.io/;;http://web.cs.ucla.edu/~yzsun/;http://www.cs.ucla.edu/~weiwang", "dblp": "246/8147-2;;;180/5436;50/1585-1;330/3851;298/1088;37/3868;w/WeiWang.html", "google_scholar": "SejA1zsAAAAJ;https://scholar.google.com/citations?hl=zh-CN;;x6ct1CsAAAAJ;https://scholar.google.com.hk/citations?;hkVMfjcAAAAJ;mQ0FosEAAAAJ;https://scholar.google.com.tw/citations?user=TQgOjK0AAAAJ;UedS9LQAAAAJ", "orcid": ";;;;;0000-0001-8872-5759;0009-0003-0826-2697;;0000-0002-8180-2886", "linkedin": "zijie-huang-62514a177/;;;;%E9%9C%84-%E7%BD%97-303548214/;https://www.linkedin.com/mwlite/in/elliott-cao;yuanzhou-chen-215032253/;;wei-wang-8800845/", "or_profile": "~Zijie_Huang1;~Wanjia_Zhao1;~Jingdong_Gao1;~Ziniu_Hu1;~Xiao_Luo3;~Yadi_Cao1;~Yuanzhou_Chen1;~Yizhou_Sun1;~Wei_Wang13", "aff": "University of California, Los Angeles;Zhejiang University;;Deepmind;University of California, Los Angeles;University of California, Los Angeles;University of California, Los Angeles;University of California, Los Angeles;University of California, Los Angeles", "aff_domain": "cs.ucla.edu;zju.edu.cn;;deepmind.com;cs.ucla.edu;ucla.edu;cs.ucla.edu;ucla.edu;ucla.edu", "position": "PhD student;Undergrad student;;Visiting Researcher;Postdoc;PhD student;PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nhuang2024physicsinformed,\ntitle={Physics-Informed Regularization for Domain-Agnostic Dynamical System Modeling},\nauthor={Zijie Huang and Wanjia Zhao and Jingdong Gao and Ziniu Hu and Xiao Luo and Yadi Cao and Yuanzhou Chen and Yizhou Sun and Wei Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=iWlqbNE8P7}\n}", "github": "", "reviewers": "D3dA;SfDm;edxC;hqsc", "pdf_size": 1506009, "rating": "6;6;7;8", "confidence": "4;3;3;4", "soundness": "3;3;3;3", "novelty": "3;3;3;4", "presentation": "2;3;3;4", "wc_summary": "21;74;77;68", "wc_strengths": "14;62;56;52", "wc_weaknesses": "24;121;74;32", "wc_questions": "76;106;86;10", "wc_limitations": "21;1;1;20", "wc_review": "156;364;294;182", "wc_reply_reviewers": "24;98;98;0", "wc_reply_authors": "62;339;349;0", "reply_reviewers": "1;2;2;0", "reply_authors": "2;4;3;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 60.0, 22.74862633215465 ], "wc_strengths_avg": [ 46.0, 18.81488772222678 ], "wc_weaknesses_avg": [ 62.75, 38.62237046065402 ], "wc_questions_avg": [ 69.5, 36.010415160061676 ], "wc_limitations_avg": [ 10.75, 9.756408150543928 ], "wc_review_avg": [ 249.0, 84.2436941260294 ], "wc_reply_reviewers_avg": [ 55.0, 43.829214001622255 ], "wc_reply_authors_avg": [ 187.5, 158.0672325309708 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2475058880004453918&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "cs.ucla.edu;zju.edu.cn;;deepmind.com;cs.ucla.edu;ucla.edu;cs.ucla.edu;ucla.edu;ucla.edu", "author_num": 9, "aff_unique_index": "0;1;2;0;0;0;0;0", "aff_unique_norm": "University of California, Los Angeles;Zhejiang University;DeepMind", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ucla.edu;https://www.zju.edu.cn;https://deepmind.com", "aff_unique_abbr": "UCLA;ZJU;DeepMind", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;1;2;0;0;0;0;0", "aff_country_unique": "United States;China;United Kingdom" }, { "title": "Spiking Token Mixer: An event-driven friendly Former structure for spiking neural networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93999", "id": "iYcY7KAkSy", "proceeding": "", "pdf": "https://openreview.net/pdf?id=iYcY7KAkSy", "openreview": "https://openreview.net/forum?id=iYcY7KAkSy", "poster": "/media/PosterPDFs/NeurIPS%202024/93999.png?t=1731311872.8342083", "project": "", "author_site": "Shikuang Deng, Yuhang Wu, KANGRUI DU, Shi Gu", "tldr": "", "abstract": "Spiking neural networks (SNNs), inspired by biological processes, use spike signals for inter-layer communication, presenting an energy-efficient alternative to traditional neural networks. To realize the theoretical advantages of SNNs in energy efficiency, it is essential to deploy them onto neuromorphic chips. On clock-driven synchronous chips, employing shorter time steps can enhance energy efficiency but reduce SNN performance. Compared to the clock-driven synchronous chip, the event-driven asynchronous chip achieves much lower energy consumption but only supports some specific network operations. Recently, a series of SNN projects have achieved tremendous success, significantly improving the SNN's performance. However, event-driven asynchronous chips do not support some of the proposed structures, making it impossible to integrate these SNNs into asynchronous hardware. In response to these problems, we propose the Spiking Token Mixer (STMixer) architecture, which consists exclusively of operations supported by asynchronous scenarios, including convolutional, fully connected layers and residual paths. Our series of experiments also demonstrates that STMixer achieves performance on par with spiking transformers in synchronous scenarios with very low timesteps. This indicates its ability to achieve the same level of performance with lower power consumption in synchronous scenarios. The codes are available at \\url{https://github.com/brain-intelligence-lab/STMixer_demo}.", "keywords": "spiking neural network; event-driven friendly; low energy consumption", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/4feb23203a64d3dc26455a99cc4c627efdd09b05.zip", "author": "Shikuang Deng;Yuhang Wu;Kangrui Du;Shi Gu", "authorids": "~Shikuang_Deng1;~Yuhang_Wu5;~Kangrui_Du3;~Shi_Gu1", "gender": "M;M;;M", "homepage": "https://www.guslab.org/;https://github.com/WHYwyh;https://nangongwubu.github.io/;https://rockdu.github.io", "dblp": "286/8188;;175/1269;", "google_scholar": "rtlmA3gAAAAJ;;9_jlOXUAAAAJ;", "orcid": ";;0000-0003-2303-6770;", "linkedin": ";;;", "or_profile": "~Shikuang_Deng1;~Yuhang_Wu5;~Shi_Gu1;~KANGRUI_DU2", "aff": "University of Electronic Science and Technology of China;University of Electronic Science and Technology of China;University of Electronic Science and Technology of China, Tsinghua University;University of Electronic Science and Technology of China", "aff_domain": "uestc.edu.cn;uestc.edu.cn;uestc.edu.cn;uestc.edu.cn", "position": "PhD student;MS student;Full Professor;Undergrad student", "bibtex": "@inproceedings{\ndeng2024spiking,\ntitle={Spiking Token Mixer: A event-driven friendly Former structure for spiking neural networks},\nauthor={Shikuang Deng and Yuhang Wu and Kangrui Du and Shi Gu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=iYcY7KAkSy}\n}", "github": "", "reviewers": "2hoo;DoAS;ESkb", "pdf_size": 30308240, "rating": "5;7;7", "confidence": "4;5;5", "soundness": "2;3;3", "novelty": "2;2;3", "presentation": "3;2;4", "wc_summary": "51;48;79", "wc_strengths": "29;46;55", "wc_weaknesses": "103;77;46", "wc_questions": "57;184;47", "wc_limitations": "4;1;1", "wc_review": "244;356;228", "wc_reply_reviewers": "11;41;10", "wc_reply_authors": "117;12;11", "reply_reviewers": "1;1;1", "reply_authors": "3;2;2", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 59.333333333333336, 13.960261060914616 ], "wc_strengths_avg": [ 43.333333333333336, 10.780641085864152 ], "wc_weaknesses_avg": [ 75.33333333333333, 23.299976156401723 ], "wc_questions_avg": [ 96.0, 62.35917467916543 ], "wc_limitations_avg": [ 2.0, 1.4142135623730951 ], "wc_review_avg": [ 276.0, 56.94441734416699 ], "wc_reply_reviewers_avg": [ 20.666666666666668, 14.383632673594278 ], "wc_reply_authors_avg": [ 46.666666666666664, 49.73485252371375 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.9999999999999997, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:AZ-Zj1wl67wJ:scholar.google.com/&scioq=Spiking+Token+Mixer:+An+event-driven+friendly+Former+structure+for+spiking+neural+networks&hl=en&as_sdt=0,33", "gs_version_total": 0, "email": "uestc.edu.cn;uestc.edu.cn;uestc.edu.cn;uestc.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Electronic Science and Technology of China", "aff_unique_dep": "", "aff_unique_url": "https://www.uestc.edu.cn", "aff_unique_abbr": "UESTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Soft-Label Integration for Robust Toxicity Classification", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93998", "id": "iYkhThIXG1", "proceeding": "", "pdf": "https://openreview.net/pdf?id=iYkhThIXG1", "openreview": "https://openreview.net/forum?id=iYkhThIXG1", "poster": "", "project": "", "author_site": "Zelei Cheng, Xian Wu, Jiahao Yu, Shuo Han, Xin-Qiang Cai, Xinyu Xing", "tldr": "", "abstract": "Toxicity classification in textual content remains a significant problem. Data with labels from a single annotator fall short of capturing the diversity of human perspectives. Therefore, there is a growing need to incorporate crowdsourced annotations for training an effective toxicity classifier. Additionally, the standard approach to training a classifier using empirical risk minimization (ERM) may fail to address the potential shifts between the training set and testing set due to exploiting spurious correlations. This work introduces a novel bi-level optimization framework that integrates crowdsourced annotations with the soft-labeling technique and optimizes the soft-label weights by Group Distributionally Robust Optimization (GroupDRO) to enhance the robustness against out-of-distribution (OOD) risk. We theoretically prove the convergence of our bi-level optimization algorithm. Experimental results demonstrate that our approach outperforms existing baseline methods in terms of both average and worst-group accuracy, confirming its effectiveness in leveraging crowdsourced annotations to achieve more effective and robust toxicity classification.", "keywords": "toxicity;LLM safety;crowdsourcing;OOD", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Zelei Cheng;Xian Wu;Jiahao Yu;Shuo Han;Xin-Qiang Cai;Xinyu Xing", "authorids": "~Zelei_Cheng1;~Xian_Wu8;~Jiahao_Yu1;~Shuo_Han5;~Xin-Qiang_Cai1;~Xinyu_Xing3", "gender": ";M;M;;M;M", "homepage": ";https://nuwuxian.github.io/;https://sherdencooper.github.io/;;https://caixq1996.github.io/;http://xinyuxing.org/", "dblp": "258/0335;03/5595-7.html;238/6241-1;;248/8034.html;", "google_scholar": "https://scholar.google.com/citations?hl=en;ptWUm0EAAAAJ;mB4eowUAAAAJ;;rtMUMooAAAAJ;71rdofMAAAAJ", "orcid": "0000-0001-7478-933X;;;;;", "linkedin": ";;;;;", "or_profile": "~Zelei_Cheng1;~Xian_Wu8;~Jiahao_Yu1;~Shuo_Han5;~Xin-Qiang_Cai1;~Xinyu_Xing3", "aff": "Northwestern University;Northwestern University;Northwestern University;;The University of Tokyo;Northwestern University", "aff_domain": "northwestern.edu;northwestern.edu;northwestern.edu;;u-tokyo.ac.jp;northwestern.edu", "position": "PhD student;PhD student;PhD student;;PhD student;Associate Professor", "bibtex": "@inproceedings{\ncheng2024softlabel,\ntitle={Soft-Label Integration for Robust Toxicity Classification},\nauthor={Zelei Cheng and Xian Wu and Jiahao Yu and Shuo Han and Xin-Qiang Cai and Xinyu Xing},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=iYkhThIXG1}\n}", "github": "", "reviewers": "8fSW;ZkTs;jUAS;KPSa", "pdf_size": 1135761, "rating": "5;5;7;8", "confidence": "3;3;3;4", "soundness": "3;2;3;4", "novelty": "2;3;3;4", "presentation": "3;3;3;3", "wc_summary": "57;146;73;27", "wc_strengths": "30;166;93;68", "wc_weaknesses": "32;175;66;67", "wc_questions": "112;371;141;101", "wc_limitations": "1;1;1;8", "wc_review": "232;859;374;271", "wc_reply_reviewers": "35;21;39;27", "wc_reply_authors": "16;0;18;20", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;2;2", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 75.75, 43.79140897482062 ], "wc_strengths_avg": [ 89.25, 49.6657578216622 ], "wc_weaknesses_avg": [ 85.0, 53.83771911959124 ], "wc_questions_avg": [ 181.25, 110.52233937082585 ], "wc_limitations_avg": [ 2.75, 3.031088913245535 ], "wc_review_avg": [ 434.0, 250.79772726242956 ], "wc_reply_reviewers_avg": [ 30.5, 6.98212002188447 ], "wc_reply_authors_avg": [ 13.5, 7.92148975887743 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.7777777777777777, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5681573141908334656&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 3, "email": "northwestern.edu;northwestern.edu;northwestern.edu;;u-tokyo.ac.jp;northwestern.edu", "author_num": 6, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Northwestern University;University of Tokyo", "aff_unique_dep": ";", "aff_unique_url": "https://www.northwestern.edu;https://www.u-tokyo.ac.jp", "aff_unique_abbr": "NU;UTokyo", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "United States;Japan" }, { "title": "CooHOI: Learning Cooperative Human-Object Interaction with Manipulated Object Dynamics", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93997", "id": "iYzyTmd3Jd", "proceeding": "", "pdf": "https://openreview.net/pdf?id=iYzyTmd3Jd", "openreview": "https://openreview.net/forum?id=iYzyTmd3Jd", "poster": "/media/PosterPDFs/NeurIPS%202024/93997.png?t=1733955169.9292989", "project": "", "author_site": "Jiawei Gao, Ziqin Wang, Zeqi Xiao, Jingbo Wang, Tai WANG, Jinkun Cao, Xiaolin Hu, Si Liu, Jifeng Dai, Jiangmiao Pang", "tldr": "", "abstract": "Enabling humanoid robots to clean rooms has long been a pursued dream within humanoid research communities. However, many tasks require multi-humanoid collaboration, such as carrying large and heavy furniture together. Given the scarcity of motion capture data on multi-humanoid collaboration and the efficiency challenges associated with multi-agent learning, these tasks cannot be straightforwardly addressed using training paradigms designed for single-agent scenarios. In this paper, we introduce **Coo**perative **H**uman-**O**bject **I**nteraction (**CooHOI**), a framework designed to tackle the challenge of multi-humanoid object transportation problem through a two-phase learning paradigm: individual skill learning and subsequent policy transfer. First, a single humanoid character learns to interact with objects through imitation learning from human motion priors. Then, the humanoid learns to collaborate with others by considering the shared dynamics of the manipulated object using centralized training and decentralized execution (CTDE) multi-agent RL algorithms. When one agent interacts with the object, resulting in specific object dynamics changes, the other agents learn to respond appropriately, thereby achieving implicit communication and coordination between teammates. Unlike previous approaches that relied on tracking-based methods for multi-humanoid HOI, CooHOI is inherently efficient, does not depend on motion capture data of multi-humanoid interactions, and can be seamlessly extended to include more participants and a wide range of object types.", "keywords": "Physics-based Character Animation;Human-Object Interactions;Multi-Agent Cooperation", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/75ad5950dd8b5f0d43ac2b8aaf07a53453b3efd3.zip", "author": "Jiawei Gao;Ziqin Wang;Zeqi Xiao;Jingbo Wang;Tai Wang;Jinkun Cao;Xiaolin Hu;Si Liu;Jifeng Dai;Jiangmiao Pang", "authorids": "~Jiawei_Gao1;~Ziqin_Wang2;~Zeqi_Xiao2;~Jingbo_Wang3;~Tai_Wang2;~Jinkun_Cao1;~Xiaolin_Hu1;~Si_Liu5;~Jifeng_Dai1;~Jiangmiao_Pang1", "gender": "M;M;M;M;M;M;F;M;M;M", "homepage": "https://gao-jiawei.com/;https://github.com/wtt0213;https://github.com/xizaoqu;https://scholar.google.com/citations?user=GStTsxAAAAAJ&hl=en;https://www.jinkuncao.com;http://www.xlhu.cn/;https://colalab.net;https://jifengdai.org/;https://oceanpang.github.io/;https://tai-wang.github.io/", "dblp": "124/9335-4;;344/1615;10/1491-3.html;224/0126;60/6028-1;60/7642;14/9399;231/7630;", "google_scholar": "NJxUNrcAAAAJ;BF-xXsIAAAAJ;;GStTsxAAAAAJ;xDtTbmQAAAAJ;PksdgoUAAAAJ;https://scholar.google.com/citations?hl=zh-CN;SH_-B_AAAAAJ;https://scholar.google.com/citations?authuser=0;JmbbZWIAAAAJ", "orcid": ";;;;;0000-0002-4907-7354;0000-0002-9180-2935;;0000-0002-6711-9319;", "linkedin": ";;;;;;;;;%E6%B3%B0-%E7%8E%8B-2b2738147/", "or_profile": "~Jiawei_Gao1;~Ziqin_Wang2;~Zeqi_Xiao2;~Jingbo_Wang3;~Jinkun_Cao1;~Xiaolin_Hu1;~Si_Liu5;~Jifeng_Dai1;~Jiangmiao_Pang1;~Tai_WANG1", "aff": "Tsinghua University;Beihang University;Nanyang Technological University;Shanghai Artificial Intelligence Laboratory;Carnegie Mellon University;Tsinghua University;Beihang University;Tsinghua University;Shanghai AI Laboratory ;Shanghai AI Laboratory", "aff_domain": "tsinghua.edu.cn;buaa.edu.cn;ntu.edu.sg;pjlab.org.cn;andrew.cmu.edu;tsinghua.edu.cn;buaa.edu.cn;tsinghua.edu.cn;pjlab.org.cn;pjlab.org.cn", "position": "Undergrad student;PhD student;PhD student;Researcher;PhD student;Associate Professor;Full Professor;Associate Professor;Research Scientist;Research Scientist", "bibtex": "@inproceedings{\ngao2024coohoi,\ntitle={Coo{HOI}: Learning Cooperative Human-Object Interaction with Manipulated Object Dynamics},\nauthor={Jiawei Gao and Ziqin Wang and Zeqi Xiao and Jingbo Wang and Tai Wang and Jinkun Cao and Xiaolin Hu and Si Liu and Jifeng Dai and Jiangmiao Pang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=iYzyTmd3Jd}\n}", "github": "", "reviewers": "kufk;ehnN;TU4S;Zw8u", "pdf_size": 7764496, "rating": "6;6;7;7", "confidence": "4;4;5;3", "soundness": "3;3;4;4", "novelty": "3;2;4;3", "presentation": "3;3;4;3", "wc_summary": "90;86;94;79", "wc_strengths": "92;50;82;106", "wc_weaknesses": "270;42;138;202", "wc_questions": "7;177;80;231", "wc_limitations": "6;10;29;12", "wc_review": "465;365;423;630", "wc_reply_reviewers": "98;0;114;23", "wc_reply_authors": "7;0;51;7", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;2;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 87.25, 5.539629951540085 ], "wc_strengths_avg": [ 82.5, 20.60946384552495 ], "wc_weaknesses_avg": [ 163.0, 84.01785524517987 ], "wc_questions_avg": [ 123.75, 86.4331388993828 ], "wc_limitations_avg": [ 14.25, 8.78564169540279 ], "wc_review_avg": [ 470.75, 98.56057781892312 ], "wc_reply_reviewers_avg": [ 58.75, 48.27719440895463 ], "wc_reply_authors_avg": [ 16.25, 20.26542622300355 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13719471994674657815&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "tsinghua.edu.cn;buaa.edu.cn;ntu.edu.sg;pjlab.org.cn;andrew.cmu.edu;tsinghua.edu.cn;buaa.edu.cn;tsinghua.edu.cn;pjlab.org.cn;pjlab.org.cn", "author_num": 10, "aff_unique_index": "0;1;2;3;4;0;1;0;5;5", "aff_unique_norm": "Tsinghua University;Beihang University;Nanyang Technological University;Shanghai Artificial Intelligence Laboratory;Carnegie Mellon University;Shanghai AI Laboratory", "aff_unique_dep": ";;;;;", "aff_unique_url": "https://www.tsinghua.edu.cn;http://www.buaa.edu.cn/;https://www.ntu.edu.sg;http://www.shailab.org/;https://www.cmu.edu;https://www.shanghai-ai-lab.com", "aff_unique_abbr": "THU;BUAA;NTU;Shanghai AI Lab;CMU;SAIL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;2;0;0;0;0;0", "aff_country_unique": "China;Singapore;United States" }, { "title": "Theoretical guarantees in KL for Diffusion Flow Matching", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93996", "id": "ia4WUCwHA9", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ia4WUCwHA9", "openreview": "https://openreview.net/forum?id=ia4WUCwHA9", "poster": "/media/PosterPDFs/NeurIPS%202024/93996.png?t=1729763536.6674628", "project": "", "author_site": "Marta Gentiloni Silveri, Alain Durmus, Giovanni Conforti", "tldr": "", "abstract": "Flow Matching (FM) (also referred to as stochastic interpolants or rectified flows) stands out as a class of generative models that aims to bridge in finite time the target distribution $\\nu^\\star$ with an auxiliary distribution $\\mu$ leveraging a fixed coupling $\\pi$ and a bridge which can either be deterministic or stochastic. These two ingredients define a path measure which can then be approximated by learning the drift of its Markovian projection. The main contribution of this paper is to provide relatively mild assumption on $\\nu^\\star$, $\\mu$ and $\\pi$ to obtain non-asymptotics guarantees for Diffusion Flow Matching (DFM) models using as bridge the conditional distribution associated with the Brownian motion. More precisely, it establishes bounds on the Kullback-Leibler divergence between the target distribution and the one generated by such DFM models under moment conditions on the score of $\\nu^\\star$, $\\mu$ and $\\pi$, and a standard $\\mathrm{L}^2$-drift-approximation error assumption.", "keywords": "Flow Matching;Stochastic interpolant;Theoretical guarantees", "primary_area": "generative_models", "supplementary_material": "", "author": "Marta Gentiloni Silveri;Alain Oliviero Durmus;Giovanni Conforti", "authorids": "~Marta_Gentiloni_Silveri2;~Alain_Oliviero_Durmus1;~Giovanni_Conforti1", "gender": "F;M;M", "homepage": ";https://sites.google.com/site/giovanniconfort/home;", "dblp": ";;01/11275", "google_scholar": "kr7pa_AAAAAJ;;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Marta_Gentiloni_Silveri2;~Giovanni_Conforti1;~Alain_Durmus1", "aff": "\u00c9cole Polytechnique;Ecole Polytechnique;\u00c9cole Polytechnique", "aff_domain": "polytechnique.edu;polytechnique.edu;polytechnique.fr", "position": "PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nsilveri2024theoretical,\ntitle={Theoretical guarantees in {KL} for Diffusion Flow Matching},\nauthor={Marta Gentiloni Silveri and Alain Oliviero Durmus and Giovanni Conforti},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ia4WUCwHA9}\n}", "github": "", "reviewers": "83K8;igTf;NqRW;TMKS", "pdf_size": 541379, "rating": "5;7;7;8", "confidence": "3;3;3;2", "soundness": "3;3;3;4", "novelty": "2;3;4;4", "presentation": "2;3;3;4", "wc_summary": "45;50;504;67", "wc_strengths": "60;29;3;54", "wc_weaknesses": "60;48;3;66", "wc_questions": "152;24;3;90", "wc_limitations": "1;1;3;7", "wc_review": "318;152;516;284", "wc_reply_reviewers": "0;21;11;38", "wc_reply_authors": "0;8;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 6.75, 1.0897247358851685 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 166.5, 195.02628028037657 ], "wc_strengths_avg": [ 36.5, 22.566568192793515 ], "wc_weaknesses_avg": [ 44.25, 24.681724007856502 ], "wc_questions_avg": [ 67.25, 58.52082962501471 ], "wc_limitations_avg": [ 3.0, 2.449489742783178 ], "wc_review_avg": [ 317.5, 130.30253259242508 ], "wc_reply_reviewers_avg": [ 17.5, 13.97318861248212 ], "wc_reply_authors_avg": [ 2.0, 3.4641016151377544 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.6622661785325219, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3590188373994313367&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 2, "email": "polytechnique.edu;polytechnique.edu;polytechnique.fr", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Ecole Polytechnique", "aff_unique_dep": "", "aff_unique_url": "https://www.polytechnique.edu", "aff_unique_abbr": "X", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "France" }, { "title": "Map It Anywhere: Empowering BEV Map Prediction using Large-scale Public Datasets", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97549", "id": "iaahkRzA9f", "proceeding": "", "pdf": "https://openreview.net/pdf?id=iaahkRzA9f", "openreview": "https://openreview.net/forum?id=iaahkRzA9f", "poster": "", "project": "", "author_site": "Cherie Ho, Jiaye Zou, Omar Alama, Sai Mitheran Jagadesh Kumar, Cheng-Yu Chiang, Taneesh Gupta, Chen Wang, Nikhil Keetha, Katia Sycara, Sebastian Scherer", "tldr": "", "abstract": "Top-down Bird's Eye View (BEV) maps are a popular perception representation for ground robot navigation due to their richness and flexibility for downstream tasks. While recent methods have shown promise for predicting BEV maps from First-Person View (FPV) images, their generalizability is limited to small regions captured by current autonomous vehicle-based datasets. In this context, we show that a more scalable approach towards generalizable map prediction can be enabled by using two large-scale crowd-sourced mapping platforms, Mapillary for FPV images and OpenStreetMap for BEV semantic maps.\nWe introduce Map It Anywhere (MIA), a data engine that enables seamless curation and modeling of labeled map prediction data from existing open-source map platforms. Using our MIA data engine, we display the ease of automatically collecting a 1.2 million FPV & BEV pair dataset encompassing diverse geographies, landscapes, environmental factors, camera models & capture scenarios. We further train a simple camera model-agnostic model on this data for BEV map prediction.\nExtensive evaluations using established benchmarks and our dataset show that the data curated by MIA enables effective pretraining for generalizable BEV map prediction, with zero-shot performance far exceeding baselines trained on existing datasets by 35%. Our analysis highlights the promise of using large-scale public maps for developing & testing generalizable BEV perception, paving the way for more robust autonomous navigation.\n\nWebsite: mapitanywhere.github.io", "keywords": "map prediction;bird's eye view mapping", "primary_area": "", "supplementary_material": "/attachment/bfd3236236576df162c23f703c6227a9ff54dba0.pdf", "author": "Cherie Ho;Jiaye Zou;Omar Alama;Sai Mitheran;Benjamin Chiang;Taneesh Gupta;Chen Wang;Nikhil Varma Keetha;Katia P. Sycara;Sebastian Scherer", "authorids": "~Cherie_Ho2;~Jiaye_Zou1;~Omar_Alama1;~Sai_Mitheran1;~Benjamin_Chiang1;~Taneesh_Gupta1;~Chen_Wang2;~Nikhil_Varma_Keetha1;~Katia_P._Sycara1;~Sebastian_Scherer1", "gender": "F;M;M;M;;M;M;M;F;M", "homepage": "https://cherieho.com/;;;https://smj007.github.io/;;;https://sairlab.org/chenw/;https://nik-v9.github.io/;;https://theairlab.org", "dblp": ";;;;;;82/4206-33;261/3637;s/KatiaPSycara;253/5743", "google_scholar": "j8lsq7sAAAAJ;;p5W2kBYAAAAJ;https://scholar.google.com/citations?hl=en;;WArg3JAAAAAJ;vZfmKl4AAAAJ;ZTm5H50AAAAJ;VWv6a9kAAAAJ;gxoPfIYAAAAJ", "orcid": ";;;;;;0000-0002-4630-0805;0000-0003-2770-0835;;0000-0002-8373-4688", "linkedin": ";tonyjzou;omaralama/;sai-mitheran/;chengyuchiang/;taneesh-gupta;wang-chen/;nik-v9/;;sebastian-scherer-a026961a/", "or_profile": "~Cherie_Ho2;~Jiaye_Zou1;~Omar_Alama1;~Sai_Mitheran1;~Benjamin_Chiang1;~Taneesh_Gupta1;~Chen_Wang2;~Nikhil_Varma_Keetha1;~Katia_P._Sycara1;~Sebastian_Scherer1", "aff": "Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;National University of Singapore;Carnegie Mellon University;Microsoft;University at Buffalo;Carnegie Mellon University;Carnegie Mellon University;Near Earth Autonomy Inc.", "aff_domain": "cmu.edu;cmu.edu;cmu.edu;nus.edu.sg;cmu.edu;microsoft.com;buffalo.edu;cmu.edu;cmu.edu;nearearth.aero", "position": "PhD student;Undergrad student;MS student;Researcher;MS student;Researcher;Assistant Professor;PhD student;Full Professor;Senior Scientist", "bibtex": "@inproceedings{\nho2024map,\ntitle={Map It Anywhere: Empowering {BEV} Map Prediction using Large-scale Public Datasets},\nauthor={Cherie Ho and Jiaye Zou and Omar Alama and Sai Mitheran and Benjamin Chiang and Taneesh Gupta and Chen Wang and Nikhil Varma Keetha and Katia P. Sycara and Sebastian Scherer},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=iaahkRzA9f}\n}", "github": "", "reviewers": "NDfz;GXHK;Eqmy", "pdf_size": 15263079, "rating": "6;6;7", "confidence": "3;4;3", "wc_summary_and_contributions": "75;111;44", "wc_strengths": "42;2;29", "wc_improvement": "3;2;53", "wc_limitations": "3;2;33", "wc_correctness": "19;18;6", "wc_clarity": "9;4;8", "wc_relation_to_prior_work": "48;9;18", "wc_documentation": "7;4;4", "wc_additional_feedback": "1;1;1", "wc_review": "207;153;196", "wc_reply_reviewers": "7;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;0;0", "reply_authors": "1;1;1", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 76.66666666666667, 27.378012264508094 ], "wc_strengths_avg": [ 24.333333333333332, 16.659998666133067 ], "wc_improvement_avg": [ 19.333333333333332, 23.809428571238097 ], "wc_limitations_avg": [ 12.666666666666666, 14.38363267359428 ], "wc_correctness_avg": [ 14.333333333333334, 5.90668171555645 ], "wc_clarity_avg": [ 7.0, 2.160246899469287 ], "wc_relation_to_prior_work_avg": [ 25.0, 16.673332000533065 ], "wc_documentation_avg": [ 5.0, 1.4142135623730951 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 185.33333333333334, 23.299976156401726 ], "wc_reply_reviewers_avg": [ 2.3333333333333335, 3.2998316455372216 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:rjoeN8y0yO8J:scholar.google.com/&scioq=Map+It+Anywhere:+Empowering+BEV+Map+Prediction+using+Large-scale+Public+Datasets&hl=en&as_sdt=0,44", "gs_version_total": 3, "email": "cmu.edu;cmu.edu;cmu.edu;nus.edu.sg;cmu.edu;microsoft.com;buffalo.edu;cmu.edu;cmu.edu;nearearth.aero", "author_num": 10, "aff_unique_index": "0;0;0;1;0;2;3;0;0;4", "aff_unique_norm": "Carnegie Mellon University;National University of Singapore;Microsoft;University at Buffalo;Near Earth Autonomy", "aff_unique_dep": ";;Microsoft Corporation;;", "aff_unique_url": "https://www.cmu.edu;https://www.nus.edu.sg;https://www.microsoft.com;https://www.buffalo.edu;https://www.nearearthautonomy.com", "aff_unique_abbr": "CMU;NUS;Microsoft;UB;NEA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;0;0;0;0;0", "aff_country_unique": "United States;Singapore" }, { "title": "DeepDRK: Deep Dependency Regularized Knockoff for Feature Selection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93995", "id": "ibKpPabHVn", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ibKpPabHVn", "openreview": "https://openreview.net/forum?id=ibKpPabHVn", "poster": "", "project": "", "author_site": "Hongyu Shen, Yici Yan, Zhizhen Jane Zhao", "tldr": "", "abstract": "Model-X knockoff has garnered significant attention among various feature selection methods due to its guarantees for controlling the false discovery rate (FDR). Since its introduction in parametric design, knockoff techniques have evolved to handle arbitrary data distributions using deep learning-based generative models. However, we have observed limitations in the current implementations of the deep Model-X knockoff framework. Notably, the \"swap property\" that knockoffs require often faces challenges at the sample level, resulting in diminished selection power. To address these issues, we develop \"Deep Dependency Regularized Knockoff (DeepDRK),\" a distribution-free deep learning method that effectively balances FDR and power. In DeepDRK, we introduce a novel formulation of the knockoff model as a learning problem under multi-source adversarial attacks. By employing an innovative perturbation technique, we achieve lower FDR and higher power. Our model outperforms existing benchmarks across synthetic, semi-synthetic, and real-world datasets, particularly when sample sizes are small and data distributions are non-Gaussian.", "keywords": "Feature Selection;Deep Learning;Model-X Knockoff;FDR Control;Boosting Power", "primary_area": "generative_models", "supplementary_material": "", "author": "Hongyu Shen;Yici Yan;Zhizhen Zhao", "authorids": "~Hongyu_Shen1;~Yici_Yan1;~Zhizhen_Zhao1", "gender": "M;M;", "homepage": ";;", "dblp": "35/7565;;122/3155", "google_scholar": "CVCKr-EAAAAJ;;", "orcid": ";;", "linkedin": "hongyu-shen-61943267/;yici-yan-418b36117;", "or_profile": "~Hongyu_Shen1;~Yici_Yan1;~Zhizhen_Zhao1", "aff": "University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;University of Washington", "aff_domain": "illinois.edu;illinois.edu;uw.edu", "position": "PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nshen2024deepdrk,\ntitle={Deep{DRK}: Deep Dependency Regularized Knockoff for Feature Selection},\nauthor={Hongyu Shen and Yici Yan and Zhizhen Zhao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ibKpPabHVn}\n}", "github": "", "reviewers": "FDZg;Znex;WTXG", "pdf_size": 1022857, "rating": "3;6;7", "confidence": "3;4;4", "soundness": "2;3;4", "novelty": "2;3;3", "presentation": "1;3;4", "wc_summary": "48;61;395", "wc_strengths": "17;61;65", "wc_weaknesses": "830;15;142", "wc_questions": "2;115;2", "wc_limitations": "1;7;77", "wc_review": "898;259;681", "wc_reply_reviewers": "0;20;28", "wc_reply_authors": "399;42;42", "reply_reviewers": "0;1;1", "reply_authors": "3;2;2", "rating_avg": [ 5.333333333333333, 1.699673171197595 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 1.247219128924647 ], "wc_summary_avg": [ 168.0, 160.6009547501716 ], "wc_strengths_avg": [ 47.666666666666664, 21.746008573733455 ], "wc_weaknesses_avg": [ 329.0, 358.0344489943205 ], "wc_questions_avg": [ 39.666666666666664, 53.268710849386586 ], "wc_limitations_avg": [ 28.333333333333332, 34.49959742116163 ], "wc_review_avg": [ 612.6666666666666, 265.30778771499 ], "wc_reply_reviewers_avg": [ 16.0, 11.775681155103795 ], "wc_reply_authors_avg": [ 161.0, 168.2914139223983 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.9707253433941508, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5293816254708543531&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 6, "email": "illinois.edu;illinois.edu;uw.edu", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Illinois Urbana-Champaign;University of Washington", "aff_unique_dep": ";", "aff_unique_url": "https://illinois.edu;https://www.washington.edu", "aff_unique_abbr": "UIUC;UW", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Urbana-Champaign;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Lumina-Next : Making Lumina-T2X Stronger and Faster with Next-DiT", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93994", "id": "ieYdf9TZ2u", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ieYdf9TZ2u", "openreview": "https://openreview.net/forum?id=ieYdf9TZ2u", "poster": "/media/PosterPDFs/NeurIPS%202024/93994.png?t=1731298502.9884877", "project": "", "author_site": "Le Zhuo, Ruoyi Du, Han Xiao, Yangguang Li, Dongyang Liu, Rongjie Huang, Wenze Liu, Xiangyang Zhu, Fu-Yun Wang, Zhanyu Ma, Xu Luo, Zehan Wang, Kaipeng Zhang, Lirui Zhao, Si Liu, Xiangyu Yue, Wanli Ouyang, Yu Qiao, Hongsheng Li, Peng Gao", "tldr": "", "abstract": "Lumina-T2X is a nascent family of Flow-based Large Diffusion Transformers (Flag-DiT) that establishes a unified framework for transforming noise into various modalities, such as images and videos, conditioned on text instructions. Despite its promising capabilities, Lumina-T2X still encounters challenges including training instability, slow inference, and extrapolation artifacts. In this paper, we present Lumina-Next, an improved version of Lumina-T2X, showcasing stronger generation performance with increased training and inference efficiency. We begin with a comprehensive analysis of the Flag-DiT architecture and identify several suboptimal components, which we address by introducing the Next-DiT architecture with 3D RoPE and sandwich normalizations. To enable better resolution extrapolation, we thoroughly compare different context extrapolation methods applied to text-to-image generation with 3D RoPE, and propose Frequency- and Time-Aware Scaled RoPE tailored for diffusion transformers. Additionally, we introduce a sigmoid time discretization schedule for diffusion sampling, which achieves high-quality generation in 5-10 steps combined with higher-order ODE solvers. Thanks to these improvements, Lumina-Next not only improves the basic text-to-image generation but also demonstrates superior resolution extrapolation capabilities as well as multilingual generation using decoder-based LLMs as the text encoder, all in a zero-shot manner. To further validate Lumina-Next as a versatile generative framework, we instantiate it on diverse tasks including visual recognition, multi-views, audio, music, and point cloud generation, showcasing strong performance across these domains. By releasing all codes and model weights at https://github.com/Alpha-VLLM/Lumina-T2X, we aim to advance the development of next-generation generative AI capable of universal modeling.", "keywords": "Generative Models;Text-to-Image Generation;Diffusion Models;Flow Matching", "primary_area": "generative_models", "supplementary_material": "/attachment/767025bf3db767693412a02856d145d1d742ec8c.zip", "author": "Le Zhuo;Ruoyi Du;Han Xiao;Yangguang Li;Dongyang Liu;Rongjie Huang;Wenze Liu;Xiangyang Zhu;Fu-Yun Wang;Zhanyu Ma;Xu Luo;Zehan Wang;Kaipeng Zhang;Lirui Zhao;Si Liu;Xiangyu Yue;Wanli Ouyang;Yu Qiao;Hongsheng Li;Peng Gao", "authorids": "~Le_Zhuo2;~Ruoyi_Du1;~Han_Xiao6;~Yangguang_Li1;~Dongyang_Liu1;~Rongjie_Huang1;~Wenze_Liu1;~Xiangyang_Zhu2;~Fu-Yun_Wang1;~Zhanyu_Ma1;~Xu_Luo1;~Zehan_Wang2;~Kaipeng_Zhang1;~Lirui_Zhao1;~Si_Liu5;~Xiangyu_Yue1;~Wanli_Ouyang1;~Yu_Qiao1;~Hongsheng_Li3;~Peng_Gao3", "gender": "M;M;;M;M;M;M;M;M;M;M;M;M;M;F;M;;;M;", "homepage": "https://le-zhuo.com/;https://ruoyidu.github.io/;;https://yg256li.github.io/;https://chrisliu6.github.io/;;;;https://g-u-n.github.io/;https://zhanyuma.cn/;https://frankluox.github.io/;https://github.com/12zehan17;http://kpzhang93.github.io/;https://github.com/Lirui-Zhao;https://colalab.net;http://xyue.io/;;;http://www.ee.cuhk.edu.hk/~hsli;", "dblp": "334/1111;260/0418;;132/4829-1.html;;212/8936-1;324/8251;;309/6410;;06/2622-3;126/7826-1;179/2126;;60/7642;207/7518;;;27/7402-1;", "google_scholar": "NxNC8qgAAAAJ;DbRZSaoAAAAJ;;a7AMvgkAAAAJ;VxQGEOcAAAAJ;iRHBUsgAAAAJ;;k7YfbnEAAAAJ;R15m3J4AAAAJ;5GAAs7IAAAAJ;https://scholar.google.com/citations?hl=en;euXK0lkAAAAJ;4OqZBmYAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;-xQ-C1sAAAAJ;;;BN2Ze-QAAAAJ;", "orcid": ";0000-0001-8372-5637;;;;;0000-0002-1510-6196;0000-0002-5853-6057;;0000-0003-2950-2488;0000-0001-9827-1244;0009-0007-7509-7563;;;0000-0002-9180-2935;;;;;", "linkedin": ";ruoyi-du-2136171a3/;;;;;;;;;;;;;;;;;;", "or_profile": "~Le_Zhuo2;~Ruoyi_Du1;~Han_Xiao6;~Yangguang_Li1;~Dongyang_Liu1;~Rongjie_Huang1;~Wenze_Liu1;~Xiangyang_Zhu2;~Fu-Yun_Wang1;~Zhanyu_Ma1;~Xu_Luo1;~Zehan_Wang2;~Kaipeng_Zhang1;~Lirui_Zhao1;~Si_Liu5;~Xiangyu_Yue1;~Wanli_Ouyang1;~Yu_Qiao1;~Hongsheng_Li3;~Peng_Gao3", "aff": "Beihang University;Beijing University of Posts and Telecommunications;;Shanghai AI Laboratory;Institute of Computing Technology , Chinese Academy of Sciences;Zhejiang University;Huazhong University of Science and Technology;City University of Hong Kong;The Chinese University of Hong Kong;Beijing University of Post and Telecommunication;University of Electronic Science and Technology of China;Zhejiang University;Shanghai AI Laboratory;Xiamen University;Beihang University;The Chinese University of Hong Kong;;;The Chinese University of Hong Kong;", "aff_domain": "buaa.edu.cn;bupt.edu.cn;;pjlab.org.cn;ict.ac.cn;zju.edu.cn;hust.edu.cn;cityu.edu.hk;cuhk.edu.hk;bupt.edu.cn;uestc.edu.cn;zju.edu.cn;pjlab.org.cn;xmu.edu.cn;buaa.edu.cn;ie.cuhk.edu;;;cuhk.edu.hk;", "position": "Undergrad student;PhD student;;Researcher;MS student;MS student;MS student;PhD student;PhD student;Full Professor;PhD student;PhD student;Researcher;MS student;Full Professor;Assistant Professor;;;Associate Professor;", "bibtex": "@inproceedings{\nzhuo2024luminanext,\ntitle={Lumina-Next : Making Lumina-T2X Stronger and Faster with Next-DiT},\nauthor={Le Zhuo and Ruoyi Du and Han Xiao and Yangguang Li and Dongyang Liu and Rongjie Huang and Wenze Liu and Xiangyang Zhu and Fu-Yun Wang and Zhanyu Ma and Xu Luo and Zehan Wang and Kaipeng Zhang and Lirui Zhao and Si Liu and Xiangyu Yue and Wanli Ouyang and Yu Qiao and Hongsheng Li and Peng Gao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ieYdf9TZ2u}\n}", "github": "", "reviewers": "ki5b;FoVd;NAeC;Nfk5;XpYM", "pdf_size": 39435374, "rating": "4;5;5;6;6", "confidence": "4;3;4;4;3", "soundness": "3;2;3;4;3", "novelty": "2;2;2;3;3", "presentation": "3;2;3;4;3", "wc_summary": "70;59;213;49;129", "wc_strengths": "66;22;52;162;50", "wc_weaknesses": "111;32;185;81;54", "wc_questions": "5;9;20;2;1", "wc_limitations": "7;1;16;32;1", "wc_review": "259;123;486;326;235", "wc_reply_reviewers": "0;0;0;74;0", "wc_reply_authors": "0;0;0;49;0", "reply_reviewers": "0;0;0;2;0", "reply_authors": "1;1;1;3;1", "rating_avg": [ 5.2, 0.7483314773547882 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 104.0, 61.17515835696709 ], "wc_strengths_avg": [ 70.4, 47.97332592180784 ], "wc_weaknesses_avg": [ 92.6, 53.24133732354964 ], "wc_questions_avg": [ 7.4, 6.887670143089026 ], "wc_limitations_avg": [ 11.4, 11.672189169131899 ], "wc_review_avg": [ 285.8, 119.58160393639147 ], "wc_reply_reviewers_avg": [ 14.8, 29.6 ], "wc_reply_authors_avg": [ 9.8, 19.6 ], "reply_reviewers_avg": [ 0.4, 0.8 ], "reply_authors_avg": [ 1.4, 0.8 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 20, 0 ], "corr_rating_confidence": -0.3273268353539886, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15035790787388684947&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 5, "email": "buaa.edu.cn;bupt.edu.cn;;pjlab.org.cn;ict.ac.cn;zju.edu.cn;hust.edu.cn;cityu.edu.hk;cuhk.edu.hk;bupt.edu.cn;uestc.edu.cn;zju.edu.cn;pjlab.org.cn;xmu.edu.cn;buaa.edu.cn;ie.cuhk.edu;;;cuhk.edu.hk;", "author_num": 20, "aff_unique_index": "0;1;2;3;4;5;6;7;1;8;4;2;9;0;7;7", "aff_unique_norm": "Beihang University;Beijing University of Posts and Telecommunications;Shanghai AI Laboratory;Chinese Academy of Sciences;Zhejiang University;Huazhong University of Science and Technology;City University of Hong Kong;Chinese University of Hong Kong;University of Electronic Science and Technology of China;Xiamen University", "aff_unique_dep": ";;;Institute of Computing Technology;;;;;;", "aff_unique_url": "http://www.buaa.edu.cn/;http://www.bupt.edu.cn/;https://www.shanghai-ai-lab.com;http://www.ict.ac.cn;https://www.zju.edu.cn;http://www.hust.edu.cn;https://www.cityu.edu.hk;https://www.cuhk.edu.hk;https://www.uestc.edu.cn;https://www.xmu.edu.cn", "aff_unique_abbr": "BUAA;BUPT;SAIL;CAS;ZJU;HUST;CityU;CUHK;UESTC;XMU", "aff_campus_unique_index": "1;2;2;1;2;2", "aff_campus_unique": ";Beijing;Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Flex-MoE: Modeling Arbitrary Modality Combination via the Flexible Mixture-of-Experts", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93993", "id": "ihEHCbqZEx", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ihEHCbqZEx", "openreview": "https://openreview.net/forum?id=ihEHCbqZEx", "poster": "", "project": "", "author_site": "Sukwon Yun, Inyoung Choi, Jie Peng, Yangfan Wu, Jingxuan Bao, Qiyiwen Zhang, Jiayi Xin, Qi Long, Tianlong Chen", "tldr": "", "abstract": "Multimodal learning has gained increasing importance across various fields, offering the ability to integrate data from diverse sources such as images, text, and personalized records, which are frequently observed in medical domains. However, in scenarios where some modalities are missing, many existing frameworks struggle to accommodate arbitrary modality combinations, often relying heavily on a single modality or complete data. This oversight of potential modality combinations limits their applicability in real-world situations. To address this challenge, we propose Flex-MoE (Flexible Mixture-of-Experts), a new framework designed to flexibly incorporate arbitrary modality combinations while maintaining robustness to missing data. The core idea of Flex-MoE is to first address missing modalities using a new missing modality bank that integrates observed modality combinations with the corresponding missing ones. This is followed by a uniquely designed Sparse MoE framework. Specifically, Flex-MoE first trains experts using samples with all modalities to inject generalized knowledge through the generalized router ($\\mathcal{G}$-Router). The $\\mathcal{S}$-Router then specializes in handling fewer modality combinations by assigning the top-1 gate to the expert corresponding to the observed modality combination. We evaluate Flex-MoE on the ADNI dataset, which encompasses four modalities in the Alzheimer's Disease domain, as well as on the MIMIC-IV dataset. The results demonstrate the effectiveness of Flex-MoE, highlighting its ability to model arbitrary modality combinations in diverse missing modality scenarios. Code is available at: \\url{https://github.com/UNITES-Lab/flex-moe}.", "keywords": "Multi-modal learning;MoE", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "", "author": "Sukwon Yun;Inyoung Choi;Jie Peng;Yangfan Wu;Jingxuan Bao;Qiyiwen Zhang;Jiayi Xin;Qi Long;Tianlong Chen", "authorids": "~Sukwon_Yun1;~Inyoung_Choi1;~Jie_Peng4;~Yangfan_Wu1;~Jingxuan_Bao1;~Qiyiwen_Zhang1;~Jiayi_Xin1;~Qi_Long1;~Tianlong_Chen1", "gender": "M;F;M;;;F;F;M;M", "homepage": "https://sukwonyun.github.io/;;http://home.ustc.edu.cn/~pengjieb/;http://home.ustc.edu.cn/~ustc_wyf/;;;;https://www.med.upenn.edu/long-lab/;https://tianlong-chen.github.io", "dblp": "327/3464;390/0940;;;;;128/0543;47/7320;", "google_scholar": "AgqvtZkAAAAJ;;;;CiYAo8cAAAAJ;;ltQ26LQAAAAJ;gfklepYAAAAJ;LE3ctn0AAAAJ", "orcid": "0000-0002-5186-6563;0009-0008-3152-5041;;;;;0000-0003-3693-3809;0000-0003-0660-5230;0000-0001-7774-8197", "linkedin": ";;;;;qiyiwen-zhang-438354133/;jiayi-xin-978511334/;qi-long-9652a0125/;tianlong-chen-783862167/", "or_profile": "~Sukwon_Yun1;~Inyoung_Choi1;~Jie_Peng4;~Yangfan_Wu1;~Jingxuan_Bao1;~Qiyiwen_Zhang1;~Jiayi_Xin1;~Qi_Long1;~Tianlong_Chen1", "aff": "University of North Carolina at Chapel Hill;University of Pennsylvania;University of Science and Technology of China;University of Science and Technology of China;University of Pennsylvania;University of Pennsylvania;University of Hong Kong;University of Pennsylvania;Harvard University", "aff_domain": "cs.unc.edu;seas.upenn.edu;ustc.edu.cn;mail.ustc.edu.cn;upenn.edu;upenn.edu;hku.hk;upenn.edu;harvard.edu", "position": "PhD student;PhD student;PhD student;MS student;PhD student;Postdoc;Undergrad student;Professor;Postdoc", "bibtex": "@inproceedings{\nyun2024flexmoe,\ntitle={Flex-MoE: Modeling Arbitrary Modality Combination via the Flexible Mixture-of-Experts},\nauthor={Sukwon Yun and Inyoung Choi and Jie Peng and Yangfan Wu and Jingxuan Bao and Qiyiwen Zhang and Jiayi Xin and Qi Long and Tianlong Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ihEHCbqZEx}\n}", "github": "", "reviewers": "qdnn;6yyS;BwQM", "pdf_size": 1093378, "rating": "5;7;7", "confidence": "3;4;4", "soundness": "2;4;3", "novelty": "2;3;3", "presentation": "2;3;4", "wc_summary": "61;72;29", "wc_strengths": "25;86;38", "wc_weaknesses": "158;225;240", "wc_questions": "123;16;52", "wc_limitations": "41;13;6", "wc_review": "408;412;365", "wc_reply_reviewers": "33;97;84", "wc_reply_authors": "633;220;173", "reply_reviewers": "1;2;2", "reply_authors": "3;4;3", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 54.0, 18.239152027072603 ], "wc_strengths_avg": [ 49.666666666666664, 26.23398982660133 ], "wc_weaknesses_avg": [ 207.66666666666666, 35.64952859280034 ], "wc_questions_avg": [ 63.666666666666664, 44.4547210341289 ], "wc_limitations_avg": [ 20.0, 15.121728296285006 ], "wc_review_avg": [ 395.0, 21.275964529643932 ], "wc_reply_reviewers_avg": [ 71.33333333333333, 27.6204433144888 ], "wc_reply_authors_avg": [ 342.0, 206.66075260355234 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.4714045207910317 ], "reply_authors_avg": [ 3.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.9999999999999998, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5170887688442609719&as_sdt=40000005&sciodt=0,22&hl=en", "gs_version_total": 3, "email": "cs.unc.edu;seas.upenn.edu;ustc.edu.cn;mail.ustc.edu.cn;upenn.edu;upenn.edu;hku.hk;upenn.edu;harvard.edu", "author_num": 9, "aff_unique_index": "0;1;2;2;1;1;3;1;4", "aff_unique_norm": "University of North Carolina;University of Pennsylvania;University of Science and Technology of China;University of Hong Kong;Harvard University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.unc.edu;https://www.upenn.edu;http://www.ustc.edu.cn;https://www.hku.hk;https://www.harvard.edu", "aff_unique_abbr": "UNC;UPenn;USTC;HKU;Harvard", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Chapel Hill;;Hong Kong SAR", "aff_country_unique_index": "0;0;1;1;0;0;1;0;0", "aff_country_unique": "United States;China" }, { "title": "Variational Distillation of Diffusion Policies into Mixture of Experts", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93992", "id": "iiYadgKHwo", "proceeding": "", "pdf": "https://openreview.net/pdf?id=iiYadgKHwo", "openreview": "https://openreview.net/forum?id=iiYadgKHwo", "poster": "/media/PosterPDFs/NeurIPS%202024/93992.png?t=1733066981.6129336", "project": "", "author_site": "Hongyi Zhou, Denis Blessing, Ge Li, Onur Celik, Xiaogang Jia, Gerhard Neumann, Rudolf Lioutikov", "tldr": "", "abstract": "This work introduces Variational Diffusion Distillation (VDD), a novel method that distills denoising diffusion policies into Mixtures of Experts (MoE) through variational inference. Diffusion Models are the current state-of-the-art in generative modeling due to their exceptional ability to accurately learn and represent complex, multi-modal distributions. This ability allows Diffusion Models to replicate the inherent diversity in human behavior, making them the preferred models in behavior learning such as Learning from Human Demonstrations (LfD).\nHowever, diffusion models come with some drawbacks, including the intractability of likelihoods and long inference times due to their iterative sampling process. The inference times, in particular, pose a significant challenge to real-time applications such as robot control.\nIn contrast, MoEs effectively address the aforementioned issues while retaining the ability to represent complex distributions but are notoriously difficult to train.\nVDD is the first method that distills pre-trained diffusion models into MoE models, and hence, combines the expressiveness of Diffusion Models with the benefits of Mixture Models.\nSpecifically, VDD leverages a decompositional upper bound of the variational objective that allows the training of each expert separately, resulting in a robust optimization scheme for MoEs.\nVDD demonstrates across nine complex behavior learning tasks, that it is able to: i) accurately distill complex distributions learned by the diffusion model, ii) outperform existing state-of-the-art distillation methods, and iii) surpass conventional methods for training MoE. The code and videos are available at https://intuitive-robots.github.io/vdd-website.", "keywords": "Diverse Behavior Learning;Model Distillation;Diffusion Models;Variational Inference", "primary_area": "robotics", "supplementary_material": "", "author": "Hongyi Zhou;Denis Blessing;Ge Li;Onur Celik;Xiaogang Jia;Gerhard Neumann;Rudolf Lioutikov", "authorids": "~Hongyi_Zhou1;~Denis_Blessing1;~Ge_Li3;~Onur_Celik1;~Xiaogang_Jia1;~Gerhard_Neumann2;~Rudolf_Lioutikov1", "gender": "M;M;M;M;M;M;M", "homepage": "https://hongyizhoucn.github.io/;;;https://alr.anthropomatik.kit.edu/21_69.php;https://xiaogangjia.github.io/Personal_Website/;https://rudolf.intuitive-robots.net;https://alr.anthropomatik.kit.edu/", "dblp": ";219/1435;;243/5913;23/10777;151/9451;60/4878", "google_scholar": "W35-J2sAAAAJ;https://scholar.google.de/citations?view_op=list_works;;9jqaTcAAAAAJ;E7Tja9gAAAAJ;hvjV43MAAAAJ;https://scholar.google.com.tw/citations?user=GL360kMAAAAJ", "orcid": ";;;;;;", "linkedin": "hongyi-zhou-9413b9242/;;geli-bruce/;;;rudolf-lioutikov-74830730a/;", "or_profile": "~Hongyi_Zhou1;~Denis_Blessing1;~Ge_Li3;~Onur_Celik1;~Xiaogang_Jia1;~Rudolf_Lioutikov1;~Gerhard_Neumann1", "aff": "Karlsruher Institut f\u00fcr Technologie;Karlsruher Institut f\u00fcr Technologie;Karlsruhe Institute of Technology;Karlsruhe Institute of Technology;Karlsruher Institut f\u00fcr Technologie;Karlsruher Institut f\u00fcr Technologie;Karlsruhe Institute of Technology", "aff_domain": "kit.edu;kit.edu;kit.edu;kit.edu;kit.edu;kit.edu;kit.edu", "position": "PhD student;PhD student;PhD student;PhD student;PhD student;Tenure-Track Professor;Full Professor", "bibtex": "@inproceedings{\nzhou2024variational,\ntitle={Variational Distillation of Diffusion Policies into Mixture of Experts},\nauthor={Hongyi Zhou and Denis Blessing and Ge Li and Onur Celik and Xiaogang Jia and Gerhard Neumann and Rudolf Lioutikov},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=iiYadgKHwo}\n}", "github": "", "reviewers": "8iR5;5fPT;MHge;YkuG;14hC", "pdf_size": 3093274, "rating": "5;6;6;6;7", "confidence": "4;4;4;4;3", "soundness": "3;3;3;3;3", "novelty": "3;3;3;3;3", "presentation": "2;2;3;3;3", "wc_summary": "68;47;93;81;90", "wc_strengths": "49;32;47;71;78", "wc_weaknesses": "228;110;122;261;202", "wc_questions": "2;50;4;45;55", "wc_limitations": "2;1;21;12;23", "wc_review": "349;240;287;470;448", "wc_reply_reviewers": "31;80;30;12;300", "wc_reply_authors": "296;48;35;28;798", "reply_reviewers": "1;1;1;1;2", "reply_authors": "3;2;2;2;3", "rating_avg": [ 6.0, 0.6324555320336759 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 75.8, 16.821414922651424 ], "wc_strengths_avg": [ 55.4, 16.81190054693401 ], "wc_weaknesses_avg": [ 184.6, 59.172966800727515 ], "wc_questions_avg": [ 31.2, 23.249946236496978 ], "wc_limitations_avg": [ 11.8, 9.19565114605812 ], "wc_review_avg": [ 358.8, 89.0918627036162 ], "wc_reply_reviewers_avg": [ 90.6, 107.1104103250473 ], "wc_reply_authors_avg": [ 241.0, 296.08377192949973 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 2.4, 0.4898979485566356 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.7905694150420948, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12513494865134574456&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "kit.edu;kit.edu;kit.edu;kit.edu;kit.edu;kit.edu;kit.edu", "author_num": 7, "aff_unique_index": "0;0;1;1;0;0;1", "aff_unique_norm": "Karlsruher Institut f\u00fcr Technologie;Karlsruhe Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.kit.edu;https://www.kit.edu", "aff_unique_abbr": "KIT;KIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "Germany" }, { "title": "In-Context Learning with Representations: Contextual Generalization of Trained Transformers", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93991", "id": "ik37kKxKBm", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ik37kKxKBm", "openreview": "https://openreview.net/forum?id=ik37kKxKBm", "poster": "/media/PosterPDFs/NeurIPS%202024/93991.png?t=1731693932.9800644", "project": "", "author_site": "Tong Yang, Yu Huang, Yingbin Liang, Yuejie Chi", "tldr": "", "abstract": "In-context learning (ICL) refers to a remarkable capability of pretrained large language models, which can learn a new task given a few examples during inference. However, theoretical understanding of ICL is largely under-explored, particularly whether transformers can be trained to generalize to unseen examples in a prompt, which will require the model to acquire contextual knowledge of the prompt for generalization. This paper investigates the training dynamics of transformers by gradient descent through the lens of non-linear regression tasks. The contextual generalization here can be attained via learning the template function for each task in-context, where all template functions lie in a linear space with $m$ basis functions. We analyze the training dynamics of one-layer multi-head transformers to {in-contextly} predict unlabeled inputs given partially labeled prompts, where the labels contain Gaussian noise and the number of examples in each prompt are not sufficient to determine the template. Under mild assumptions, we show that the training loss for a one-layer multi-head transformer converges linearly to a global minimum. Moreover, the transformer effectively learns to perform ridge regression over the basis functions. To our knowledge, this study is the first provable demonstration that transformers can learn contextual (i.e., template) information to generalize to both unseen examples and tasks when prompts contain only a small number of query-answer pairs.", "keywords": "training dynamics;optimization;transformer;multi-head softmax attention;in-context learning", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Tong Yang;Yu Huang;Yingbin Liang;Yuejie Chi", "authorids": "~Tong_Yang4;~Yu_Huang3;~Yingbin_Liang1;~Yuejie_Chi1", "gender": "F;F;F;", "homepage": "https://pptmiao.github.io;https://yuhuang42.org/;https://sites.google.com/view/yingbinliang/home;", "dblp": ";39/6301-23;51/332;", "google_scholar": ";;lGgLAiIAAAAJ;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Tong_Yang4;~Yu_Huang3;~Yingbin_Liang1;~Yuejie_Chi1", "aff": "Carnegie Mellon University;The Wharton School, University of Pennsylvania;The Ohio State University;", "aff_domain": "cmu.edu;wharton.upenn.edu;osu.edu;", "position": "PhD student;PhD student;Professor;", "bibtex": "@inproceedings{\nyang2024incontext,\ntitle={In-Context Learning with Representations: Contextual Generalization of Trained Transformers},\nauthor={Tong Yang and Yu Huang and Yingbin Liang and Yuejie Chi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ik37kKxKBm}\n}", "github": "", "reviewers": "TyrV;vG3D;MzSs;dubr", "pdf_size": 837253, "rating": "4;5;6;7", "confidence": "2;4;4;4", "soundness": "2;2;3;3", "novelty": "3;3;3;2", "presentation": "2;3;3;3", "wc_summary": "161;73;90;218", "wc_strengths": "151;27;117;212", "wc_weaknesses": "96;215;103;138", "wc_questions": "45;76;67;18", "wc_limitations": "4;10;33;68", "wc_review": "457;401;410;654", "wc_reply_reviewers": "0;176;130;28", "wc_reply_authors": "198;425;139;69", "reply_reviewers": "0;3;1;1", "reply_authors": "4;4;3;2", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 135.5, 57.95040983461636 ], "wc_strengths_avg": [ 126.75, 66.89684222741758 ], "wc_weaknesses_avg": [ 138.0, 47.217581471312144 ], "wc_questions_avg": [ 51.5, 22.38861317723811 ], "wc_limitations_avg": [ 28.75, 25.113492389550284 ], "wc_review_avg": [ 480.5, 102.40239255017434 ], "wc_reply_reviewers_avg": [ 83.5, 72.06073826987897 ], "wc_reply_authors_avg": [ 207.75, 133.4829108912448 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 3.25, 0.82915619758885 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.7745966692414834, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6792039888499265367&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 6, "email": "cmu.edu;wharton.upenn.edu;osu.edu;", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "Carnegie Mellon University;University of Pennsylvania;Ohio State University", "aff_unique_dep": ";The Wharton School;", "aff_unique_url": "https://www.cmu.edu;https://www.wharton.upenn.edu;https://www.osu.edu", "aff_unique_abbr": "CMU;UPenn Wharton;OSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Generalizing Weather Forecast to Fine-grained Temporal Scales via Physics-AI Hybrid Modeling", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93990", "id": "ioAlzcELTf", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ioAlzcELTf", "openreview": "https://openreview.net/forum?id=ioAlzcELTf", "poster": "/media/PosterPDFs/NeurIPS%202024/93990.png?t=1730915425.074249", "project": "", "author_site": "Wanghan Xu, Fenghua Ling, zhangwenlong, Tao Han, Hao Chen, Wanli Ouyang, LEI BAI", "tldr": "", "abstract": "Data-driven artificial intelligence (AI) models have made significant advancements in weather forecasting, particularly in medium-range and nowcasting. However, most data-driven weather forecasting models are black-box systems that focus on learning data mapping rather than fine-grained physical evolution in the time dimension. Consequently, the limitations in the temporal scale of datasets prevent these models from forecasting at finer time scales. This paper proposes a physics-AI hybrid model (i.e., WeatherGFT) which generalizes weather forecasts to finer-grained temporal scales beyond training dataset. Specifically, we employ a carefully designed PDE kernel to simulate physical evolution on a small time scale (e.g., 300 seconds) and use a parallel neural networks with a learnable router for bias correction. Furthermore, we introduce a lead time-aware training framework to promote the generalization of the model at different lead times. The weight analysis of physics-AI modules indicates that physics conducts major evolution while AI performs corrections adaptively. Extensive experiments show that WeatherGFT trained on an hourly dataset, effectively generalizes forecasts across multiple time scales, including 30-minute, which is even smaller than the dataset's temporal resolution.", "keywords": "weather forecast;physics-AI hybrid model;partial differential equation;machine learning", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "", "author": "Wanghan Xu;Fenghua Ling;Wenlong Zhang;Tao Han;Hao Chen;Wanli Ouyang;LEI BAI", "authorids": "~Wanghan_Xu1;~Fenghua_Ling1;~Wenlong_Zhang3;~Tao_Han4;~Hao_Chen14;~Wanli_Ouyang1;~LEI_BAI1", "gender": "M;M;M;M;;;M", "homepage": "https://scholar.google.com/citations?user=lmCL5xQAAAAJ&hl=zh-CN;https://github.com/LingFH;https://wenlongzhang0517.github.io/;https://taohan10200.github.io/;;;http://leibai.site/", "dblp": "367/7191;344/4099;;78/744-3;;;119/1223-1", "google_scholar": "https://scholar.google.com.hk/citations?user=lmCL5xQAAAAJ;;https://scholar.google.com.hk/citations?user=UnMImiUAAAAJ;a3OxwlMAAAAJ;;;https://scholar.google.com.au/citations?user=sakOO04AAAAJ", "orcid": ";;;;;;0000-0003-3378-7201", "linkedin": ";;;;;;lei-bai-641370153/", "or_profile": "~Wanghan_Xu1;~Fenghua_Ling1;~Wenlong_Zhang3;~Tao_Han4;~Hao_Chen14;~Wanli_Ouyang1;~LEI_BAI1", "aff": "Xi'an Jiaotong University;Nanjing University of Information Science and Technology;Shanghai Artificial Intelligence Laboratory;Department of Computer Science and Engineering, Hong Kong University of Science and Technology;;;Shanghai AI Laboratory", "aff_domain": "xjtu.edu.cn;nuist.edu.cn;pjlab.org.cn;cse.ust.hk;;;pjlab.org.cn", "position": "Undergrad student;PhD student;Researcher;PhD student;;;Researcher", "bibtex": "@inproceedings{\nxu2024generalizing,\ntitle={Generalizing Weather Forecast to Fine-grained Temporal Scales via Physics-{AI} Hybrid Modeling},\nauthor={Wanghan Xu and Fenghua Ling and Wenlong Zhang and Tao Han and Hao Chen and Wanli Ouyang and LEI BAI},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ioAlzcELTf}\n}", "github": "", "reviewers": "jiN5;oEsH;CYo5", "pdf_size": 8885932, "rating": "5;7;7", "confidence": "3;5;4", "soundness": "2;3;3", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "49;87;71", "wc_strengths": "27;66;49", "wc_weaknesses": "471;78;62", "wc_questions": "12;122;105", "wc_limitations": "3;140;1", "wc_review": "562;493;288", "wc_reply_reviewers": "17;39;15", "wc_reply_authors": "15;28;16", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 69.0, 15.57776192739723 ], "wc_strengths_avg": [ 47.333333333333336, 15.965240019770729 ], "wc_weaknesses_avg": [ 203.66666666666666, 189.14603411708697 ], "wc_questions_avg": [ 79.66666666666667, 48.3482735529983 ], "wc_limitations_avg": [ 48.0, 65.05894762956643 ], "wc_review_avg": [ 447.6666666666667, 116.36246053698856 ], "wc_reply_reviewers_avg": [ 23.666666666666668, 10.873004286866728 ], "wc_reply_authors_avg": [ 19.666666666666668, 5.90668171555645 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4687700074124234658&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "xjtu.edu.cn;nuist.edu.cn;pjlab.org.cn;cse.ust.hk;;;pjlab.org.cn", "author_num": 7, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "Xi'an Jiao Tong University;Nanjing University of Information Science and Technology;Shanghai Artificial Intelligence Laboratory;Hong Kong University of Science and Technology;Shanghai AI Laboratory", "aff_unique_dep": ";;;Department of Computer Science and Engineering;", "aff_unique_url": "https://www.xjtu.edu.cn;http://www.nuist.edu.cn;http://www.shailab.org/;https://www.ust.hk;https://www.shanghai-ai-lab.com", "aff_unique_abbr": "XJTU;;Shanghai AI Lab;HKUST;SAIL", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Guided Trajectory Generation with Diffusion Models for Offline Model-based Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93989", "id": "ioKQzb8SMr", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ioKQzb8SMr", "openreview": "https://openreview.net/forum?id=ioKQzb8SMr", "poster": "/media/PosterPDFs/NeurIPS%202024/93989.png?t=1731320174.2698638", "project": "", "author_site": "Taeyoung Yun, Sujin Yun, Jaewoo Lee, Jinkyoo Park", "tldr": "", "abstract": "Optimizing complex and high-dimensional black-box functions is ubiquitous in science and engineering fields. Unfortunately, the online evaluation of these functions is restricted due to time and safety constraints in most cases. In offline model-based optimization (MBO), we aim to find a design that maximizes the target function using only a pre-existing offline dataset. While prior methods consider forward or inverse approaches to address the problem, these approaches are limited by conservatism and the difficulty of learning highly multi-modal mappings. Recently, there has been an emerging paradigm of learning to improve solutions with synthetic trajectories constructed from the offline dataset. In this paper, we introduce a novel conditional generative modeling approach to produce trajectories toward high-scoring regions. First, we construct synthetic trajectories toward high-scoring regions using the dataset while injecting locality bias for consistent improvement directions. Then, we train a conditional diffusion model to generate trajectories conditioned on their scores. Lastly, we sample multiple trajectories from the trained model with guidance to explore high-scoring regions beyond the dataset and select high-fidelity designs among generated trajectories with the proxy function. Extensive experiment results demonstrate that our method outperforms competitive baselines on Design-Bench and its practical variants. The code is publicly available in \\url{https://github.com/dbsxodud-11/GTG}.", "keywords": "Offline Model-based Optimization;Diffusion Models;Decision Making", "primary_area": "generative_models", "supplementary_material": "", "author": "Taeyoung Yun;Sujin Yun;Jaewoo Lee;Jinkyoo Park", "authorids": "~Taeyoung_Yun1;~Sujin_Yun1;~Jaewoo_Lee3;~Jinkyoo_Park1", "gender": "M;F;M;M", "homepage": "https://dbsxodud-11.github.io;https://github.com/sujinyun999;;http://silab.kaist.ac.kr/", "dblp": "358/5797.html;;;156/7535", "google_scholar": "_51PhLQAAAAJ;wpTfpnIAAAAJ;;sH2a0nkAAAAJ", "orcid": "0009-0001-4602-6367;0009-0008-2263-9944;;0000-0003-2620-1479", "linkedin": ";;%EC%9E%AC%EC%9A%B0-%EC%9D%B4-2a7b2a1b7;", "or_profile": "~Taeyoung_Yun1;~Sujin_Yun1;~Jaewoo_Lee3;~Jinkyoo_Park1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr;kaist.edu;kaist.ac.kr", "position": "MS student;MS student;MS student;Associate Professor", "bibtex": "@inproceedings{\nyun2024guided,\ntitle={Guided Trajectory Generation with Diffusion Models for Offline Model-based Optimization},\nauthor={Taeyoung Yun and Sujin Yun and Jaewoo Lee and Jinkyoo Park},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ioKQzb8SMr}\n}", "github": "", "reviewers": "FGy7;MZpe;nEhC;gJu1", "pdf_size": 7740241, "rating": "5;5;6;7", "confidence": "3;4;4;3", "soundness": "2;3;3;3", "novelty": "2;2;2;3", "presentation": "3;3;3;3", "wc_summary": "40;62;64;50", "wc_strengths": "47;101;50;63", "wc_weaknesses": "94;97;119;99", "wc_questions": "3;4;133;99", "wc_limitations": "1;4;12;15", "wc_review": "185;268;378;326", "wc_reply_reviewers": "0;147;10;0", "wc_reply_authors": "0;51;38;0", "reply_reviewers": "0;1;1;0", "reply_authors": "1;2;2;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 54.0, 9.695359714832659 ], "wc_strengths_avg": [ 65.25, 21.498546462493692 ], "wc_weaknesses_avg": [ 102.25, 9.832980219648569 ], "wc_questions_avg": [ 59.75, 57.52119174704224 ], "wc_limitations_avg": [ 8.0, 5.70087712549569 ], "wc_review_avg": [ 289.25, 71.67068787168155 ], "wc_reply_reviewers_avg": [ 39.25, 62.34330356983018 ], "wc_reply_authors_avg": [ 22.25, 22.71976012197312 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15294650491717318251&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "kaist.ac.kr;kaist.ac.kr;kaist.edu;kaist.ac.kr", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Time Makes Space: Emergence of Place Fields in Networks Encoding Temporally Continuous Sensory Experiences", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93988", "id": "ioe66JeCMF", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ioe66JeCMF", "openreview": "https://openreview.net/forum?id=ioe66JeCMF", "poster": "/media/PosterPDFs/NeurIPS%202024/93988.png?t=1734054835.7614598", "project": "", "author_site": "Zhaoze Wang, Ronald Di Tullio, Spencer Rooke, Vijay Balasubramanian", "tldr": "", "abstract": "The vertebrate hippocampus is thought to use recurrent connectivity in area CA3 to support episodic memory recall from partial cues. This brain area also contains place cells, whose location-selective firing fields implement maps supporting spatial memory. Here we show that place cells emerge in networks trained to remember temporally continuous sensory episodes. We model CA3 as a recurrent autoencoder that recalls and reconstructs sensory experiences from noisy and partially occluded observations by agents traversing simulated arenas. The agents move in realistic trajectories modeled from rodents and environments are modeled as continuously varying, high-dimensional, sensory experience maps (spatially smoothed Gaussian random fields). Training our autoencoder to accurately pattern-complete and reconstruct sensory experiences with a constraint on total activity causes spatially localized firing fields, i.e., place cells, to emerge in the encoding layer. The emergent place fields reproduce key aspects of hippocampal phenomenology: a) remapping (maintenance of and reversion to distinct learned maps in different environments), implemented via repositioning of experience manifolds in the network\u2019s hidden layer, b) orthogonality of spatial representations in different arenas, c) robust place field emergence in differently shaped rooms, with single units showing multiple place fields in large or complex spaces, and (d) slow representational drift of place fields. We argue that these results arise because continuous traversal of space makes sensory experience temporally continuous. We make testable predictions: a) rapidly changing sensory context will disrupt place fields, b) place fields will form even if recurrent connections are blocked, but reversion to previously learned representations upon remapping will be abolished, c) the dimension of temporally smooth experience sets the dimensionality of place fields, including during virtual navigation of abstract spaces.", "keywords": "Place Cells/Fields;Recurrent Neural Networks;Episodic Memory;Hippocampus", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "", "author": "Zhaoze Wang;Ronald W Di Tullio;Spencer Rooke;Vijay Balasubramanian", "authorids": "~Zhaoze_Wang2;~Ronald_W_Di_Tullio1;~Spencer_Rooke1;~Vijay_Balasubramanian2", "gender": ";M;M;M", "homepage": ";;http://spencer.roo.ke;https://www.sas.upenn.edu/~vbalasub", "dblp": ";;;", "google_scholar": ";https://scholar.google.com/citations?hl=en;;ltj3BwwAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Zhaoze_Wang2;~Ronald_W_Di_Tullio1;~Spencer_Rooke1;~Vijay_Balasubramanian2", "aff": ";University of Pennsylvania;University of Pennsylvania;University of Pennsylvania", "aff_domain": ";upenn.edu;upenn.edu;upenn.edu", "position": ";Postdoc;PhD student;Full Professor", "bibtex": "@inproceedings{\nwang2024time,\ntitle={Time Makes Space: Emergence of Place Fields in Networks Encoding Temporally Continuous Sensory Experiences},\nauthor={Zhaoze Wang and Ronald W Di Tullio and Spencer Rooke and Vijay Balasubramanian},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ioe66JeCMF}\n}", "github": "", "reviewers": "kdgt;KNH2;6VUu;hM85", "pdf_size": 13556419, "rating": "3;6;7;8", "confidence": "4;3;4;5", "soundness": "3;3;3;4", "novelty": "2;3;3;3", "presentation": "3;2;3;3", "wc_summary": "94;70;100;125", "wc_strengths": "84;47;93;47", "wc_weaknesses": "73;45;155;89", "wc_questions": "398;67;95;55", "wc_limitations": "45;8;87;34", "wc_review": "694;237;530;350", "wc_reply_reviewers": "0;12;57;10", "wc_reply_authors": "190;22;117;17", "reply_reviewers": "0;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 1.8708286933869707 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 97.25, 19.562400159489634 ], "wc_strengths_avg": [ 67.75, 20.99255820523073 ], "wc_weaknesses_avg": [ 90.5, 40.432041749088064 ], "wc_questions_avg": [ 153.75, 141.76278601946282 ], "wc_limitations_avg": [ 43.5, 28.48245073725223 ], "wc_review_avg": [ 452.75, 174.12262202252757 ], "wc_reply_reviewers_avg": [ 19.75, 21.98152633462927 ], "wc_reply_authors_avg": [ 86.5, 71.82095794404304 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.37796447300922725, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9266678277669963453&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": ";upenn.edu;upenn.edu;upenn.edu", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Pennsylvania", "aff_unique_dep": "", "aff_unique_url": "https://www.upenn.edu", "aff_unique_abbr": "UPenn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Customizing Language Models with Instance-wise LoRA for Sequential Recommendation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93987", "id": "isZ8XRe3De", "proceeding": "", "pdf": "https://openreview.net/pdf?id=isZ8XRe3De", "openreview": "https://openreview.net/forum?id=isZ8XRe3De", "poster": "", "project": "", "author_site": "Xiaoyu Kong, Jiancan Wu, An Zhang, Leheng Sheng, Hui Lin, Xiang Wang, Xiangnan He", "tldr": "", "abstract": "Sequential recommendation systems predict the next interaction item based on users' past interactions, aligning recommendations with individual preferences. Leveraging the strengths of Large Language Models (LLMs) in knowledge comprehension and reasoning, recent approaches are eager to apply LLMs to sequential recommendation. A common paradigm is converting user behavior sequences into instruction data, and fine-tuning the LLM with parameter-efficient fine-tuning (PEFT) methods like Low-Rank Adaption (LoRA). However, the uniform application of LoRA across diverse user behaviors is insufficient to capture individual variability, resulting in negative transfer between disparate sequences.\nTo address these challenges, we propose Instance-wise LoRA (iLoRA). We innovatively treat the sequential recommendation task as a form of multi-task learning, integrating LoRA with the Mixture of Experts (MoE) framework. This approach encourages different experts to capture various aspects of user behavior. Additionally, we introduce a sequence representation guided gate function that generates customized expert participation weights for each user sequence, which allows dynamic parameter adjustment for instance-wise recommendations. \nIn sequential recommendation, iLoRA achieves an average relative improvement of 11.4\\% over basic LoRA in the hit ratio metric, with less than a 1\\% relative increase in trainable parameters.\nExtensive experiments on three benchmark datasets demonstrate the effectiveness of iLoRA, highlighting its superior performance compared to existing methods in mitigating negative transfer and improving recommendation accuracy.\nOur data and code are available at https://github.com/AkaliKong/iLoRA.", "keywords": "Sequential Recommendation;Large Language Models;Mixture of Experts", "primary_area": "other", "supplementary_material": "/attachment/5061e4c34609a33659dcea3e0616927fdd8a3330.zip", "author": "Xiaoyu Kong;Jiancan Wu;An Zhang;Leheng Sheng;Hui Lin;Xiang Wang;Xiangnan He", "authorids": "~Xiaoyu_Kong2;~Jiancan_Wu1;~An_Zhang2;~Leheng_Sheng2;~Hui_Lin7;~Xiang_Wang6;~Xiangnan_He1", "gender": "M;M;M;M;M;M;F", "homepage": "https://github.com/AkaliKong?tab=repositories;https://wujcan.github.io/;https://lehengthu.github.io/;https://baike.baidu.com/item/%E6%9E%97%E6%99%96/54130577;https://github.com/xiangwang1223;http://staff.ustc.edu.cn/~hexn;https://github.com/anzhang314", "dblp": ";257/4945;359/0347.html;36/3545.html;31/2864-10;59/1007;78/5581-3", "google_scholar": ";z9zW1UgAAAAJ;https://scholar.google.com.hk/citations?user=s8bNbU0AAAAJ;aLbznmQAAAAJ;https://scholar.google.com.sg/citations?user=HdhaQB0AAAAJ;https://scholar.google.com.sg/citations?user=X45Go24AAAAJ;https://scholar.google.com.sg/citations?user=BcX7GJcAAAAJ", "orcid": ";0000-0002-6941-5218;0000-0002-5764-6596;0000-0003-0190-969X;0000-0002-6148-6329;0000-0001-8472-7992;", "linkedin": ";;;;;;", "or_profile": "~Xiaoyu_Kong2;~Jiancan_Wu1;~Leheng_Sheng2;~Hui_Lin7;~Xiang_Wang6;~Xiangnan_He1;~AN_ZHANG1", "aff": "University of Science and Technology of China;University of Science and Technology of China;Tsinghua University;Electronic Science Research Institute of China Electronics Technology Group Corporation;University of Science and Technology of China;University of Science and Technology of China;National University of Singapore", "aff_domain": "mail.ustc.edu.cn;ustc.edu.cn;mails.tsinghua.edu.cn;caeit.cetc.com.cn;ustc.edu.cn;ustc.edu.cn;nus.edu.sg", "position": "MS student;Postdoc;MS student;Researcher;Full Professor;Professor;Postdoc", "bibtex": "@inproceedings{\nkong2024customizing,\ntitle={Customizing Language Models with Instance-wise Lo{RA} for Sequential Recommendation},\nauthor={Xiaoyu Kong and Jiancan Wu and An Zhang and Leheng Sheng and Hui Lin and Xiang Wang and Xiangnan He},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=isZ8XRe3De}\n}", "github": "", "reviewers": "7niz;cnYP;wwLq;59LV", "pdf_size": 12741505, "rating": "4;5;7;7", "confidence": "4;3;4;3", "soundness": "3;3;4;4", "novelty": "2;3;3;3", "presentation": "3;3;4;4", "wc_summary": "46;37;217;38", "wc_strengths": "83;146;152;116", "wc_weaknesses": "4;189;141;126", "wc_questions": "152;2;56;6", "wc_limitations": "25;2;28;1", "wc_review": "310;376;594;287", "wc_reply_reviewers": "0;22;0;27", "wc_reply_authors": "177;83;0;135", "reply_reviewers": "0;1;0;1", "reply_authors": "2;2;1;2", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 84.5, 76.57839120796415 ], "wc_strengths_avg": [ 124.25, 27.444261695297982 ], "wc_weaknesses_avg": [ 115.0, 68.17990906418107 ], "wc_questions_avg": [ 54.0, 60.44832503882965 ], "wc_limitations_avg": [ 14.0, 12.549900398011133 ], "wc_review_avg": [ 391.75, 121.25257729219615 ], "wc_reply_reviewers_avg": [ 12.25, 12.376893794486563 ], "wc_reply_authors_avg": [ 98.75, 66.02414331136755 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.19245008972987526, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5575025681155003721&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "mail.ustc.edu.cn;ustc.edu.cn;mails.tsinghua.edu.cn;caeit.cetc.com.cn;ustc.edu.cn;ustc.edu.cn;nus.edu.sg", "author_num": 7, "aff_unique_index": "0;0;1;2;0;0;3", "aff_unique_norm": "University of Science and Technology of China;Tsinghua University;China Electronics Technology Group Corporation;National University of Singapore", "aff_unique_dep": ";;Electronic Science Research Institute;", "aff_unique_url": "http://www.ustc.edu.cn;https://www.tsinghua.edu.cn;http://www.cetc.com.cn;https://www.nus.edu.sg", "aff_unique_abbr": "USTC;THU;CETC;NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;1", "aff_country_unique": "China;Singapore" }, { "title": "NYU CTF Bench: A Scalable Open-Source Benchmark Dataset for Evaluating LLMs in Offensive Security", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97547", "id": "itBDglVylS", "proceeding": "", "pdf": "https://openreview.net/pdf?id=itBDglVylS", "openreview": "https://openreview.net/forum?id=itBDglVylS", "poster": "/media/PosterPDFs/NeurIPS%202024/97547.png?t=1731453043.2891166", "project": "", "author_site": "Minghao Shao, Sofija Jancheska, Meet Udeshi, Brendan Dolan-Gavitt, haoran xi, Kimberly Milner, Boyuan Chen, Max Yin, Siddharth Garg, Prashanth Krishnamurthy, Farshad Khorrami, Ramesh Karri, Muhammad Shafique", "tldr": "", "abstract": "Large Language Models (LLMs) are being deployed across various domains today. However, their capacity to solve Capture the Flag (CTF) challenges in cybersecurity has not been thoroughly evaluated. To address this, we develop a novel method to assess LLMs in solving CTF challenges by creating a scalable, open-source benchmark database specifically designed for these applications. This database includes metadata for LLM testing and adaptive learning, compiling a diverse range of CTF challenges from popular competitions. Utilizing the advanced function calling capabilities of LLMs, we build a fully automated system with an enhanced workflow and support for external tool calls. Our benchmark dataset and automated framework allow us to evaluate the performance of five LLMs, encompassing both black-box and open-source models. This work lays the foundation for future research into improving the efficiency of LLMs in interactive cybersecurity tasks and automated task planning. By providing a specialized benchmark, our project offers an ideal platform for developing, testing, and refining LLM-based approaches to vulnerability detection and resolution. Evaluating LLMs on these challenges \nand comparing with human performance yields insights into their potential for AI-driven cybersecurity solutions to perform real-world threat management. We make our benchmark dataset open source to public https://github.com/NYU-LLM-CTF/NYU_CTF_Bench along with our playground automated framework https://github.com/NYU-LLM-CTF/llm_ctf_automation.", "keywords": "Capture the Flag;automation;large language models;agent systems;cybersecurity", "primary_area": "", "supplementary_material": "/attachment/22de5ca219f870a45f61f91cd92696e30fe426ee.pdf", "author": "Minghao Shao;Sofija Jancheska;Meet Udeshi;Brendan Dolan-Gavitt;Haoran Xi;Kimberly Milner;Boyuan Chen;Max Yin;Siddharth Garg;Prashanth Krishnamurthy;Farshad Khorrami;Ramesh Karri;Muhammad Shafique", "authorids": "~Minghao_Shao3;~Sofija_Jancheska1;~Meet_Udeshi1;~Brendan_Dolan-Gavitt1;~Haoran_Xi1;~Kimberly_Milner1;~Boyuan_Chen3;~Max_Yin2;~Siddharth_Garg1;~Prashanth_Krishnamurthy1;~Farshad_Khorrami1;~Ramesh_Karri1;~Muhammad_Shafique1", "gender": "M;;M;M;F;;M;;M;;M;M;M", "homepage": "https://www.linkedin.com/in/shao-minghao/;;https://mudeshi.in;http://engineering.nyu.edu/people/brendan-dolan-gavitt;;https://github.com/rollingcoconut;;https://bbtc33.com;http://engineering.nyu.edu/people/siddharth-garg/;;https://engineering.nyu.edu/faculty/farshad-khorrami;http://engineering.nyu.edu/people/ramesh-karri/;https://nyuad.nyu.edu/en/academics/divisions/engineering/faculty/muhammad-shafique.html", "dblp": "307/9631;;285/4879;94/7563.html;226/2733;;;;94/3807;24/3420;94/5644;;s/MuhammadShafique", "google_scholar": ";;YlIc4EQAAAAJ;https://scholar.google.com.tw/citations?user=MzAxRscAAAAJ;;;;;https://scholar.google.com.tw/citations?user=Yf8OqQQAAAAJ;W-_zgGgAAAAJ;NdOqlPQAAAAJ;https://scholar.google.com.tw/citations?user=o60TaTEAAAAJ;xSR276UAAAAJ", "orcid": "0009-0002-4467-6224;;0000-0001-7297-0880;0000-0002-8867-4282;;;;;;;;;0000-0002-2607-8135", "linkedin": ";sofija-jancheska/;;;haoran-xi-22841327b;;boyuan-jack-chen-6466b2142/;;;;;;mshafiquewahlah", "or_profile": "~Minghao_Shao3;~Sofija_Jancheska1;~Meet_Udeshi1;~Brendan_Dolan-Gavitt1;~Haoran_Xi1;~Kimberly_Milner1;~Boyuan_Chen3;~Max_Yin2;~Siddharth_Garg1;~Prashanth_Krishnamurthy1;~Farshad_Khorrami1;~Ramesh_Karri1;~Muhammad_Shafique1", "aff": "New York University;New York University;New York University;New York University;New York University;New York University;New York University;New York University;New York University;New York University;New York University;New York University;New York University", "aff_domain": "nyu.edu;nyu.edu;nyu.edu;nyu.edu;nyu.edu;nyu.edu;nyu.edu;nyu.edu;nyu.edu;nyu.edu;nyu.edu;nyu.edu;nyu.edu", "position": "PhD student;PhD student;PhD student;Assistant Professor;PhD student;MS student;PhD student;Undergrad student;Associate Professor;Research Scientist and Adjunct Faculty;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nshao2024nyu,\ntitle={{NYU} {CTF} Bench: A Scalable Open-Source Benchmark Dataset for Evaluating {LLM}s in Offensive Security},\nauthor={Minghao Shao and Sofija Jancheska and Meet Udeshi and Brendan Dolan-Gavitt and Haoran Xi and Kimberly Milner and Boyuan Chen and Max Yin and Siddharth Garg and Prashanth Krishnamurthy and Farshad Khorrami and Ramesh Karri and Muhammad Shafique},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=itBDglVylS}\n}", "github": "", "reviewers": "82Xy;rkZ5;RHcs", "pdf_size": 811754, "rating": "6;6;8", "confidence": "5;3;4", "wc_summary_and_contributions": "53;97;184", "wc_strengths": "31;149;83", "wc_improvement": "69;160;79", "wc_limitations": "44;18;113", "wc_correctness": "1;41;67", "wc_clarity": "1;62;49", "wc_relation_to_prior_work": "29;75;111", "wc_documentation": "4;98;69", "wc_additional_feedback": "1;1;1", "wc_review": "233;701;756", "wc_reply_reviewers": "0;148;11", "wc_reply_authors": "46;0;0", "reply_reviewers": "0;2;1", "reply_authors": "2;2;1", "rating_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "wc_summary_and_contributions_avg": [ 111.33333333333333, 54.43242497711166 ], "wc_strengths_avg": [ 87.66666666666667, 48.28618389928485 ], "wc_improvement_avg": [ 102.66666666666667, 40.74582459862878 ], "wc_limitations_avg": [ 58.333333333333336, 40.086018621071474 ], "wc_correctness_avg": [ 36.333333333333336, 27.145697428669777 ], "wc_clarity_avg": [ 37.333333333333336, 26.23398982660133 ], "wc_relation_to_prior_work_avg": [ 71.66666666666667, 33.559234529741914 ], "wc_documentation_avg": [ 57.0, 39.30224760324359 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 563.3333333333334, 234.65767028209888 ], "wc_reply_reviewers_avg": [ 53.0, 67.32508200267317 ], "wc_reply_authors_avg": [ 15.333333333333334, 21.684607956387456 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 13, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14339120283558658861&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "nyu.edu;nyu.edu;nyu.edu;nyu.edu;nyu.edu;nyu.edu;nyu.edu;nyu.edu;nyu.edu;nyu.edu;nyu.edu;nyu.edu;nyu.edu", "author_num": 13, "aff_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0", "aff_unique_norm": "New York University", "aff_unique_dep": "", "aff_unique_url": "https://www.nyu.edu", "aff_unique_abbr": "NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "COSMIC: Compress Satellite Image Efficiently via Diffusion Compensation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93986", "id": "itbKmreqUZ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=itbKmreqUZ", "openreview": "https://openreview.net/forum?id=itbKmreqUZ", "poster": "/media/PosterPDFs/NeurIPS%202024/93986.png?t=1730087975.9614525", "project": "", "author_site": "Ziyuan Zhang, Han Qiu, Maosen Zhang, Jun Liu, Bin Chen, Tianwei Zhang, Hewu Li", "tldr": "", "abstract": "With the rapidly increasing number of satellites in space and their enhanced capabilities, the amount of earth observation images collected by satellites is exceeding the transmission limits of satellite-to-ground links. Although existing learned image compression solutions achieve remarkable performance by using a sophisticated encoder to extract fruitful features as compression and using a decoder to reconstruct. It is still hard to directly deploy those complex encoders on current satellites' embedded GPUs with limited computing capability and power supply to compress images in orbit. In this paper, we propose COSMIC, a simple yet effective learned compression solution to transmit satellite images. We first design a lightweight encoder (i.e. reducing FLOPs by 2.5~5X) on satellite to achieve a high image compression ratio to save satellite-to-ground links. Then, for reconstructions on the ground, to deal with the feature extraction ability degradation due to simplifying encoders, we propose a diffusion-based model to compensate image details when decoding. Our insight is that satellite's earth observation photos are not just images but indeed multi-modal data with a nature of Text-to-Image pairing since they are collected with rich sensor data (e.g. coordinates, timestep, etc.) that can be used as the condition for diffusion generation. Extensive experiments show that COSMIC outperforms state-of-the-art baselines on both perceptual and distortion metrics.", "keywords": "Image compression;Satellite images;Generative model;Probabilistic diffusion models", "primary_area": "generative_models", "supplementary_material": "", "author": "Ziyuan Zhang;Han Qiu;Zhang Maosen;Jun Liu;Bin Chen;Tianwei Zhang;Hewu Li", "authorids": "~Ziyuan_Zhang2;~Han_Qiu3;~Zhang_Maosen1;~Jun_Liu28;~Bin_Chen4;~Tianwei_Zhang1;~Hewu_Li1", "gender": "F;M;M;F;M;M;", "homepage": ";https://qiuhan.info;https://MS-chang.github.io;https://www.insc.tsinghua.edu.cn/info/1157/2459.htm;https://binchen17tsinghua.wixsite.com/website;https://personal.ntu.edu.sg/tianwei.zhang/index.html;", "dblp": ";15/4507-1;;95/3736-63;22/5523-11;77/7902-4;", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.fr/citations?user=6JWNv6gAAAAJ;;;Yl0wv7AAAAAJ;9vpiYDIAAAAJ;", "orcid": ";;0009-0003-2174-5757;;0000-0002-4798-230X;;", "linkedin": ";;;;;;", "or_profile": "~Ziyuan_Zhang2;~Han_Qiu3;~Zhang_Maosen1;~Jun_Liu28;~Bin_Chen4;~Tianwei_Zhang1;~Hewu_Li1", "aff": "Tsinghua University;Tsinghua University;Beijing University of Posts and Telecommunications;Tsinghua University;Harbin Institute of Technology, Shenzhen;Nanyang Technological University;", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;bupt.edu.cn;tsinghua.edu.cn;hit.edu.cn;ntu.edu.sg;", "position": "PhD student;Assistant Professor;Undergrad student;Assistant Professor;Assistant Professor;Assistant Professor;", "bibtex": "@inproceedings{\nzhang2024cosmic,\ntitle={{COSMIC}: Compress Satellite Image Efficiently via Diffusion Compensation},\nauthor={Ziyuan Zhang and Han Qiu and Zhang Maosen and Jun Liu and Bin Chen and Tianwei Zhang and Hewu Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=itbKmreqUZ}\n}", "github": "", "reviewers": "upSj;AGki;GEfY;gvA5", "pdf_size": 34754646, "rating": "3;5;6;6", "confidence": "5;4;4;5", "soundness": "3;2;3;3", "novelty": "1;2;2;3", "presentation": "3;3;2;1", "wc_summary": "67;111;37;71", "wc_strengths": "90;64;49;74", "wc_weaknesses": "136;228;137;174", "wc_questions": "132;2;69;3", "wc_limitations": "53;2;1;3", "wc_review": "478;407;293;325", "wc_reply_reviewers": "51;0;37;19", "wc_reply_authors": "251;117;1141;73", "reply_reviewers": "1;0;2;1", "reply_authors": "2;3;5;2", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 71.5, 26.320144376503713 ], "wc_strengths_avg": [ 69.25, 14.922717580923388 ], "wc_weaknesses_avg": [ 168.75, 37.47916087641237 ], "wc_questions_avg": [ 51.5, 53.82610890636625 ], "wc_limitations_avg": [ 14.75, 22.094965489902897 ], "wc_review_avg": [ 375.75, 72.20586887504366 ], "wc_reply_reviewers_avg": [ 26.75, 19.162137145944865 ], "wc_reply_authors_avg": [ 395.5, 435.3788580075978 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 3.0, 1.224744871391589 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.40824829046386296, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=584569366116208236&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 5, "email": "tsinghua.edu.cn;tsinghua.edu.cn;bupt.edu.cn;tsinghua.edu.cn;hit.edu.cn;ntu.edu.sg;", "author_num": 7, "aff_unique_index": "0;0;1;0;2;3", "aff_unique_norm": "Tsinghua University;Beijing University of Posts and Telecommunications;Harbin Institute of Technology;Nanyang Technological University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.tsinghua.edu.cn;http://www.bupt.edu.cn/;http://en.hhit.edu.cn/;https://www.ntu.edu.sg", "aff_unique_abbr": "THU;BUPT;HIT;NTU", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Beijing;Shenzhen", "aff_country_unique_index": "0;0;0;0;0;1", "aff_country_unique": "China;Singapore" }, { "title": "A Universal Growth Rate for Learning with Smooth Surrogate Losses", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93985", "id": "itztwTAcN6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=itztwTAcN6", "openreview": "https://openreview.net/forum?id=itztwTAcN6", "poster": "", "project": "", "author_site": "Anqi Mao, Mehryar Mohri, Yutao Zhong", "tldr": "", "abstract": "This paper presents a comprehensive analysis of the growth rate of $H$-consistency bounds (and excess error bounds) for various surrogate losses used in classification. We prove a square-root growth rate near zero for smooth margin-based surrogate losses in binary classification, providing both upper and lower bounds under mild assumptions. This result also translates to excess error bounds. Our lower bound requires weaker conditions than those in previous work for excess error bounds, and our upper bound is entirely novel. Moreover, we extend this analysis to multi-class classification with a series of novel results, demonstrating a universal square-root growth rate for smooth *comp-sum* and *constrained losses*, covering common choices for training neural networks in multi-class classification. Given this universal rate, we turn to the question of choosing among different surrogate losses. We first examine how $H$-consistency bounds vary across surrogates based on the number of classes. Next, ignoring constants and focusing on behavior near zero, we identify *minimizability gaps* as the key differentiating factor in these bounds. Thus, we thoroughly analyze these gaps, to guide surrogate loss selection, covering: comparisons across different comp-sum losses, conditions where gaps become zero, and general conditions leading to small gaps. Additionally, we demonstrate the key role of minimizability gaps in comparing excess error bounds and $H$-consistency bounds.", "keywords": "surrogate loss functions;Bayes-consistency;H-consistency bounds;excess error bounds;estimation error bounds;generalization bounds;learning theory", "primary_area": "learning_theory", "supplementary_material": "", "author": "Anqi Mao;Mehryar Mohri;Yutao Zhong", "authorids": "~Anqi_Mao1;~Mehryar_Mohri2;~Yutao_Zhong1", "gender": "F;M;", "homepage": "https://anqi-mao.github.io;https://cs.nyu.edu/~mohri/;", "dblp": "241/6864;03/5448;51/3178-2", "google_scholar": "nkjIZ-oAAAAJ;ktwwLjsAAAAJ;", "orcid": ";;", "linkedin": ";mehryar-mohri-3737b981/;", "or_profile": "~Anqi_Mao1;~Mehryar_Mohri2;~Yutao_Zhong1", "aff": "Courant Institute of Mathematical Sciences, NYU;Google Research;Google", "aff_domain": "cims.nyu.edu;google.com;google.com", "position": "PhD student;Principal Researcher;Researcher", "bibtex": "@inproceedings{\nmao2024a,\ntitle={A Universal Growth Rate for Learning with Smooth Surrogate Losses},\nauthor={Anqi Mao and Mehryar Mohri and Yutao Zhong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=itztwTAcN6}\n}", "github": "", "reviewers": "ZEpV;s9tD;cQ3k;jALT", "pdf_size": 497835, "rating": "5;7;7;8", "confidence": "2;3;2;4", "soundness": "3;3;3;4", "novelty": "3;3;3;4", "presentation": "3;3;3;3", "wc_summary": "99;69;63;96", "wc_strengths": "129;52;47;179", "wc_weaknesses": "196;28;24;7", "wc_questions": "2;44;43;26", "wc_limitations": "2;10;1;7", "wc_review": "428;203;178;315", "wc_reply_reviewers": "0;0;9;31", "wc_reply_authors": "37;0;13;22", "reply_reviewers": "0;0;1;1", "reply_authors": "2;1;2;2", "rating_avg": [ 6.75, 1.0897247358851685 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 81.75, 15.927570436196476 ], "wc_strengths_avg": [ 101.75, 55.187747734438304 ], "wc_weaknesses_avg": [ 63.75, 76.7605855892202 ], "wc_questions_avg": [ 28.75, 17.020208576865326 ], "wc_limitations_avg": [ 5.0, 3.6742346141747673 ], "wc_review_avg": [ 281.0, 99.32018928697227 ], "wc_reply_reviewers_avg": [ 10.0, 12.668859459319927 ], "wc_reply_authors_avg": [ 18.0, 13.47219358530748 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.7608859102526822, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10746044362098325739&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "cims.nyu.edu;google.com;google.com", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "New York University;Google", "aff_unique_dep": "Courant Institute of Mathematical Sciences;Google Research", "aff_unique_url": "https://www.courant.nyu.edu;https://research.google", "aff_unique_abbr": "NYU;Google Research", "aff_campus_unique_index": "0;1;1", "aff_campus_unique": "New York;Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Identifiable Shared Component Analysis of Unpaired Multimodal Mixtures", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93984", "id": "ivCX2cjwcT", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ivCX2cjwcT", "openreview": "https://openreview.net/forum?id=ivCX2cjwcT", "poster": "/media/PosterPDFs/NeurIPS%202024/93984.png?t=1733434399.378483", "project": "", "author_site": "Subash Timilsina, Sagar Shrestha, Xiao Fu", "tldr": "", "abstract": "A core task in multi-modal learning is to integrate information from multiple feature spaces (e.g., text and audio), offering modality-invariant essential representations of data. Recent research showed that, classical tools such as canonical correlation analysis (CCA) provably identify the shared components up to minor ambiguities, when samples in each modality are generated from a linear mixture of shared and private components. Such identifiability results were obtained under the condition that the cross-modality samples are aligned/paired according to their shared information. This work takes a step further, investigating shared component identifiability from multi-modal linear mixtures where cross-modality samples are unaligned. A distribution divergence minimization-based loss is proposed, under which a suite of sufficient conditions ensuring identifiability of the shared components are derived. Our conditions are based on cross-modality distribution discrepancy characterization and density-preserving transform removal, which are much milder than existing studies relying on independent component analysis. More relaxed conditions are also provided via adding reasonable structural constraints, motivated by available side information in various applications. The identifiability claims are thoroughly validated using synthetic and real-world data.", "keywords": "Identifiability;Multi-modal Learning;Canonical Correlation Analysis;Shared Component Analysis;Independent Component Analysis", "primary_area": "other", "supplementary_material": "/attachment/5b4de823c9051b63b01d0c3529d325ca920b2c30.zip", "author": "Subash Timilsina;Sagar Shrestha;Xiao Fu", "authorids": "~Subash_Timilsina1;~Sagar_Shrestha1;~Xiao_Fu1", "gender": "M;M;M", "homepage": ";;https://web.engr.oregonstate.edu/~fuxia/", "dblp": "287/5033;292/3689;60/4601-1", "google_scholar": "qjQqWjAAAAAJ;qIBTvlAAAAAJ;pDnpH1MAAAAJ", "orcid": "0000-0002-1177-9051;;", "linkedin": ";;", "or_profile": "~Subash_Timilsina1;~Sagar_Shrestha1;~Xiao_Fu1", "aff": "Oregon State University;Oregon State University;Oregon State University", "aff_domain": "oregonstate.edu;oregonstate.edu;oregonstate.edu", "position": "PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\ntimilsina2024identifiable,\ntitle={Identifiable Shared Component Analysis of Unpaired Multimodal Mixtures},\nauthor={Subash Timilsina and Sagar Shrestha and Xiao Fu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ivCX2cjwcT}\n}", "github": "", "reviewers": "YjhB;tDhK;1Nhs;DxMx", "pdf_size": 4125460, "rating": "5;5;6;7", "confidence": "2;3;3;3", "soundness": "3;2;2;4", "novelty": "2;2;2;3", "presentation": "3;3;2;3", "wc_summary": "299;45;97;97", "wc_strengths": "86;40;37;158", "wc_weaknesses": "470;74;435;228", "wc_questions": "386;60;2;113", "wc_limitations": "53;6;2;61", "wc_review": "1294;225;573;657", "wc_reply_reviewers": "422;13;392;209", "wc_reply_authors": "557;0;578;686", "reply_reviewers": "2;1;2;2", "reply_authors": "3;1;3;3", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 134.5, 97.31777843744688 ], "wc_strengths_avg": [ 80.25, 48.90999386628463 ], "wc_weaknesses_avg": [ 301.75, 160.75816464491004 ], "wc_questions_avg": [ 140.25, 147.21476658270393 ], "wc_limitations_avg": [ 30.5, 26.688012290165037 ], "wc_review_avg": [ 687.25, 385.93676619363436 ], "wc_reply_reviewers_avg": [ 259.0, 163.76354905778027 ], "wc_reply_authors_avg": [ 455.25, 267.35685422296547 ], "reply_reviewers_avg": [ 1.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=919149306570882004&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "oregonstate.edu;oregonstate.edu;oregonstate.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Oregon State University", "aff_unique_dep": "", "aff_unique_url": "https://oregonstate.edu", "aff_unique_abbr": "OSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "iwC19lVBoq", "title": "AVSET-10M: An Open Large-Scale Audio-Visual Dataset with High Correspondence", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "Groundbreaking research from initiatives such as ChatGPT and Sora underscores the crucial role of large-scale data in advancing generative and comprehension tasks. However, the scarcity of comprehensive and large-scale audio-visual correspondence datasets poses a significant challenge to research in the audio-visual fields. To address this gap, we introduce **AVSET-10M**, a audio-visual high-corresponding dataset comprising 10 million samples, featuring the following key attributes: (1) **High Audio-Visual Correspondence**: Through meticulous sample filtering, we ensure robust correspondence between the audio and visual components of each entry. (2) **Comprehensive Categories**: Encompassing 527 unique audio categories, AVSET-10M offers the most extensive range of audio categories available. (3) **Large Scale**: With 10 million samples, AVSET-10M is the largest publicly available audio-visual corresponding dataset. We have benchmarked two critical tasks on AVSET-10M: audio-video retrieval and vision-queried sound separation. These tasks highlight the essential role of precise audio-visual correspondence in advancing audio-visual research. For more information, please visit https://avset-10m.github.io/.", "keywords": "audio-visual corresponding dataset;sound separation;audio-video retrieval", "primary_area": "", "supplementary_material": "/attachment/b96e9c1145d2a80161863de4c1e4cbdbb11fa0d8.zip", "author": "Xize Cheng;Ziang Zhang;Zehan Wang;Minghui Fang;Rongjie Huang;Siqi Zheng;Ruofan Hu;Bai Jionghao;Tao Jin;Zhou Zhao", "authorids": "~Xize_Cheng1;~Ziang_Zhang1;~Zehan_Wang2;~Minghui_Fang1;~Rongjie_Huang1;~Siqi_Zheng1;~Ruofan_Hu2;~Bai_Jionghao1;~Tao_Jin2;~Zhou_Zhao3", "gender": "M;M;M;M;M;M;M;M;M;M", "homepage": "https://exgc.github.io/;;https://github.com/12zehan17;https://scholar.google.com/citations?user=8c3I0RwAAAAJ&hl=zh-CN;;;https://www.researchgate.net/profile/Bai-Jionghao;https://hugddygff.github.io/;https://github.com/2811668688;https://dblp.uni-trier.de/pid/75/7785.html?", "dblp": "334/2167;;126/7826-1;124/1993-2;212/8936-1;;356/8993;88/4850-4.html;;75/7785", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;DptGMnYAAAAJ;euXK0lkAAAAJ;8c3I0RwAAAAJ;iRHBUsgAAAAJ;https://scholar.google.com.hk/citations?user=BsrS95gAAAAJ;https://scholar.google.com/citations?hl=zh-CN;;;https://scholar.google.com.hk/citations?user=IIoFY90AAAAJ", "orcid": "0000-0001-9708-3225;;0009-0007-7509-7563;0009-0000-6488-9695;;;0009-0005-7106-513X;0000-0003-3564-1628;0009-0000-4618-1140;0000-0001-6121-0384", "linkedin": ";;;;;;;;;", "or_profile": "~Xize_Cheng1;~Ziang_Zhang1;~Zehan_Wang2;~Minghui_Fang1;~Rongjie_Huang1;~Siqi_Zheng1;~Bai_Jionghao1;~Tao_Jin2;~Hu_Ruofan1;~Zhou_Zhao2", "aff": "Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University;Alibaba Group;Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;alibaba-inc.com;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn", "position": "PhD student;Undergrad student;PhD student;MS student;MS student;Researcher;Undergrad student;Assistant Professor;Undergrad student;Associate Professor", "bibtex": "@misc{\nanonymous2024avsetm,\ntitle={{AVSET}-10M: An Open Large-Scale Audio-Visual Dataset with High Correspondence},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=iwC19lVBoq}\n}", "github": "", "project": "", "reviewers": "T2Qw;zYSb;hCbp;aXDx", "site": "https://openreview.net/forum?id=iwC19lVBoq", "pdf_size": 14769828, "rating": "3;6;7;7", "confidence": "5;3;4;4", "wc_summary_and_contributions": "71;73;95;40", "wc_strengths": "27;47;92;38", "wc_improvement": "224;26;4;61", "wc_limitations": "11;86;119;8", "wc_correctness": "27;24;28;2", "wc_clarity": "8;70;16;2", "wc_relation_to_prior_work": "15;23;21;1", "wc_documentation": "66;16;31;2", "wc_additional_feedback": "1;1;1;1", "wc_review": "450;366;407;155", "wc_reply_reviewers": "42;600;21;55", "wc_reply_authors": "318;0;0;37", "reply_reviewers": "1;3;1;2", "reply_authors": "6;4;2;4", "rating_avg": [ 5.75, 1.6393596310755 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 69.75, 19.587942719948924 ], "wc_strengths_avg": [ 51.0, 24.708298201211672 ], "wc_improvement_avg": [ 78.75, 86.28839725015177 ], "wc_limitations_avg": [ 56.0, 47.953102089437344 ], "wc_correctness_avg": [ 20.25, 10.638961415476606 ], "wc_clarity_avg": [ 24.0, 27.018512172212592 ], "wc_relation_to_prior_work_avg": [ 15.0, 8.602325267042627 ], "wc_documentation_avg": [ 28.75, 23.826193569263218 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 344.5, 113.36776437771012 ], "wc_reply_reviewers_avg": [ 179.5, 243.07869096241242 ], "wc_reply_authors_avg": [ 88.75, 133.21669377371592 ], "reply_reviewers_avg": [ 1.75, 0.82915619758885 ], "reply_authors_avg": [ 4.0, 1.4142135623730951 ], "replies_avg": [ 33, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.6469966392206306, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Mgl5JzEmSPYJ:scholar.google.com/&scioq=AVSET-10M:+An+Open+Large-Scale+Audio-Visual+Dataset+with+High+Correspondence&hl=en&as_sdt=0,5", "gs_version_total": 2, "aff_unique_index": "0;0;0;0;0;1;0;0;0;0", "aff_unique_norm": "Zhejiang University;Alibaba Group", "aff_unique_dep": ";", "aff_unique_url": "https://www.zju.edu.cn;https://www.alibaba.com", "aff_unique_abbr": "ZJU;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Reinforcement Learning with LTL and $\\omega$-Regular Objectives via Optimality-Preserving Translation to Average Rewards", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93983", "id": "iykao97YXf", "proceeding": "", "pdf": "https://openreview.net/pdf?id=iykao97YXf", "openreview": "https://openreview.net/forum?id=iykao97YXf", "poster": "/media/PosterPDFs/NeurIPS%202024/93983.png?t=1732237065.2570188", "project": "", "author_site": "Xuan Bach Le, Dominik Wagner, Leon Witzman, Alexander Rabinovich, Luke Ong", "tldr": "", "abstract": "Linear temporal logic (LTL) and, more generally, $\\omega$-regular objectives are alternatives to the traditional discount sum and average reward objectives in reinforcement learning (RL), offering the advantage of greater comprehensibility and hence explainability. In this work, we study the relationship between these objectives. Our main result is that each RL problem for $\\omega$-regular objectives can be reduced to a limit-average reward problem in an optimality-preserving fashion, via (finite-memory) reward machines. Furthermore, we demonstrate the efficacy of this approach by showing that optimal policies for limit-average problems can be found asymptotically by solving a sequence of discount-sum problems approximately. Consequently, we resolve an open problem: optimal policies for LTL and $\\omega$-regular objectives can be learned asymptotically.", "keywords": "Reinforcement Learning;LTL;limit average rewards;translation", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Xuan-Bach Le;Dominik Wagner;Leon Witzman;Alexander Rabinovich;Luke Ong", "authorids": "~Xuan-Bach_Le2;~Dominik_Wagner1;~Leon_Witzman1;~Alexander_Rabinovich2;~Luke_Ong1", "gender": "M;;;M;M", "homepage": "https://lexuanbach.github.io/;https://domwagner.github.io/;;https://www.cs.tau.ac.il//~rabinoa/;", "dblp": "125/2923;180/8364;258/0901;r/AlexanderMosheRabinovich;o/CHLukeOng", "google_scholar": "https://scholar.google.com.sg/citations?user=FnjHppEAAAAJ;;;;", "orcid": ";0000-0002-2807-8462;0000-0003-2074-7307;0000-0002-1460-2358;", "linkedin": ";;;;", "or_profile": "~Xuan-Bach_Le2;~Dominik_Wagner1;~Leon_Witzman1;~Alexander_Rabinovich2;~Luke_Ong1", "aff": "Nanyang Technological University;Nanyang Technological University;Nanyang Technological University;Tel Aviv University;Nanyang Technological University", "aff_domain": "ntu.sg;ntu.edu.sg;ntu.edu.sg;tau.ac.il;ntu.edu.sg", "position": "Postdoc;Postdoc;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nle2024reinforcement,\ntitle={Reinforcement Learning with {LTL} and \\${\\textbackslash}omega\\$-Regular Objectives via Optimality-Preserving Translation to Average Rewards},\nauthor={Xuan-Bach Le and Dominik Wagner and Leon Witzman and Alexander Rabinovich and Luke Ong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=iykao97YXf}\n}", "github": "", "reviewers": "EhuA;VjJR;eqsH;FqP1", "pdf_size": 439291, "rating": "6;6;7;7", "confidence": "4;3;4;3", "soundness": "4;3;4;4", "novelty": "3;2;4;3", "presentation": "3;3;4;3", "wc_summary": "172;64;96;47", "wc_strengths": "84;53;301;15", "wc_weaknesses": "207;114;47;19", "wc_questions": "366;2;6;104", "wc_limitations": "10;6;15;12", "wc_review": "839;239;465;197", "wc_reply_reviewers": "104;43;9;12", "wc_reply_authors": "362;36;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 94.75, 47.94462952198087 ], "wc_strengths_avg": [ 113.25, 111.1178990982101 ], "wc_weaknesses_avg": [ 96.75, 72.40985775431409 ], "wc_questions_avg": [ 119.5, 148.06333104452298 ], "wc_limitations_avg": [ 10.75, 3.2691742076555053 ], "wc_review_avg": [ 435.0, 254.54665584132115 ], "wc_reply_reviewers_avg": [ 42.0, 38.19031290785662 ], "wc_reply_authors_avg": [ 99.5, 152.26539331049588 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9111296627572588542&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "ntu.sg;ntu.edu.sg;ntu.edu.sg;tau.ac.il;ntu.edu.sg", "author_num": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Nanyang Technological University;Tel Aviv University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ntu.edu.sg;https://www.tau.ac.il", "aff_unique_abbr": "NTU;TAU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "Singapore;Israel" }, { "title": "Public-data Assisted Private Stochastic Optimization: Power and Limitations", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93982", "id": "j14wStqZni", "proceeding": "", "pdf": "https://openreview.net/pdf?id=j14wStqZni", "openreview": "https://openreview.net/forum?id=j14wStqZni", "poster": "", "project": "", "author_site": "Enayat Ullah, Michael Menart, Raef Bassily, Crist\u00f3bal Guzm\u00e1n, Raman Arora", "tldr": "", "abstract": "We study the limits and capability of public-data assisted differentially private (PA-DP) algorithms. Specifically, we focus on the problem of stochastic convex optimization (SCO) with either labeled or unlabeled public data. For complete/labeled public data, we show that any $(\\epsilon,\\delta)$-PA-DP has excess risk $\\tilde{\\Omega}\\big(\\min(\\frac{1}{\\sqrt{n_{\\text{pub}}}},\\frac{1}{\\sqrt{n}}+\\frac{\\sqrt{d}}{n\\epsilon} ) \\big)$, where $d$ is the dimension, ${n_{\\text{pub}}}$ is the number of public samples, ${n_{\\text{priv}}}$ is the number of private samples, and $n={n_{\\text{pub}}}+{n_{\\text{priv}}}$. These lower bounds are established via our new lower bounds for PA-DP mean estimation, which are of a similar form. Up to constant factors, these lower bounds show that the simple strategy of either treating all data as private or discarding the private data, is optimal. We also study PA-DP supervised learning with \\textit{unlabeled} public samples. In contrast to our previous result, we here show novel methods for leveraging public data in private supervised learning. For generalized linear models (GLM) with unlabeled public data, we show an efficient algorithm which, given $\\tilde{O}({n_{\\text{priv}}}\\epsilon)$ unlabeled public samples, achieves the dimension independent rate $\\tilde{O}\\big(\\frac{1}{\\sqrt{{n_{\\text{priv}}}}} + \\frac{1}{\\sqrt{{n_{\\text{priv}}}\\epsilon}}\\big)$. We develop new lower bounds for this setting which shows that this rate cannot be improved with more public samples, and any fewer public samples leads to a worse rate. Finally, we provide extensions of this result to general hypothesis classes with finite \\textit{fat-shattering dimension} with applications to neural networks and non-Euclidean geometries.", "keywords": "Differential Privacy;Public Data;Stochastic Optimization;Generalized Linear Model", "primary_area": "privacy", "supplementary_material": "", "author": "Enayat Ullah;Michael Menart;Raef Bassily;Crist\u00f3bal A Guzm\u00e1n;Raman Arora", "authorids": "~Enayat_Ullah1;~Michael_Menart1;~Raef_Bassily2;~Crist\u00f3bal_A_Guzm\u00e1n1;~Raman_Arora1", "gender": ";M;;M;M", "homepage": "https://enayatullah.github.io;https://mikemenart.github.io/;https://sites.google.com/view/cguzman/;http://www.cs.jhu.edu/~raman/Home.html;https://sites.google.com/view/rbassily", "dblp": "223/5999;297/3184.html;21/9888;;88/8656", "google_scholar": ";https://scholar.google.com/citations?hl=en;Rt9fMGEAAAAJ;Spe0xdkAAAAJ;C8qMVQUAAAAJ", "orcid": ";;0000-0002-1498-2055;;", "linkedin": ";;;;", "or_profile": "~Enayat_Ullah1;~Michael_Menart1;~Crist\u00f3bal_A_Guzm\u00e1n1;~Raman_Arora1;~RAEF_BASSILY1", "aff": "Johns Hopkins University;The Ohio State University;Pontificia Universidad Catolica de Chile;Johns Hopkins University;Google", "aff_domain": "jhu.edu;osu.edu;uc.cl;jhu.edu;google.com", "position": "PhD student;Graduate Student;Associate Professor;Associate Professor;Researcher", "bibtex": "@inproceedings{\nullah2024publicdata,\ntitle={Public-data Assisted Private Stochastic Optimization: Power and Limitations},\nauthor={Enayat Ullah and Michael Menart and Raef Bassily and Crist{\\'o}bal A Guzm{\\'a}n and Raman Arora},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=j14wStqZni}\n}", "github": "", "reviewers": "9k4B;42Zi;VXJL", "pdf_size": 557443, "rating": "5;6;7", "confidence": "3;2;3", "soundness": "3;3;4", "novelty": "2;3;3", "presentation": "1;3;3", "wc_summary": "56;283;79", "wc_strengths": "50;52;154", "wc_weaknesses": "367;115;1", "wc_questions": "6;77;50", "wc_limitations": "19;7;6", "wc_review": "498;534;290", "wc_reply_reviewers": "177;10;11", "wc_reply_authors": "228;0;0", "reply_reviewers": "1;1;1", "reply_authors": "2;1;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.9428090415820634 ], "wc_summary_avg": [ 139.33333333333334, 102.02069506831553 ], "wc_strengths_avg": [ 85.33333333333333, 48.56153027059817 ], "wc_weaknesses_avg": [ 161.0, 152.9182788289222 ], "wc_questions_avg": [ 44.333333333333336, 29.26127512980633 ], "wc_limitations_avg": [ 10.666666666666666, 5.90668171555645 ], "wc_review_avg": [ 440.6666666666667, 107.54637242707084 ], "wc_reply_reviewers_avg": [ 66.0, 78.48991442641957 ], "wc_reply_authors_avg": [ 76.0, 107.48023074035522 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16695340928368727269&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "jhu.edu;osu.edu;uc.cl;jhu.edu;google.com", "author_num": 5, "aff_unique_index": "0;1;2;0;3", "aff_unique_norm": "Johns Hopkins University;Ohio State University;Pontificia Universidad Catolica de Chile;Google", "aff_unique_dep": ";;;Google", "aff_unique_url": "https://www.jhu.edu;https://www.osu.edu;https://www.puc.cl;https://www.google.com", "aff_unique_abbr": "JHU;OSU;PUC;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "United States;Chile" }, { "title": "Generalizable Person Re-identification via Balancing Alignment and Uniformity", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93981", "id": "j25WK4GEGH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=j25WK4GEGH", "openreview": "https://openreview.net/forum?id=j25WK4GEGH", "poster": "/media/PosterPDFs/NeurIPS%202024/93981.png?t=1733412714.5961092", "project": "", "author_site": "Yoonki Cho, Jaeyoon Kim, Woo Jae Kim, Junsik Jung, Sung-eui Yoon", "tldr": "", "abstract": "Domain generalizable person re-identification (DG re-ID) aims to learn discriminative representations that are robust to distributional shifts. While data augmentation is a straightforward solution to improve generalization, certain augmentations exhibit a polarized effect in this task, enhancing in-distribution performance while deteriorating out-of-distribution performance. In this paper, we investigate this phenomenon and reveal that it leads to sparse representation spaces with reduced uniformity. To address this issue, we propose a novel framework, Balancing Alignment and Uniformity (BAU), which effectively mitigates this effect by maintaining a balance between alignment and uniformity. Specifically, BAU incorporates alignment and uniformity losses applied to both original and augmented images and integrates a weighting strategy to assess the reliability of augmented samples, further improving the alignment loss. Additionally, we introduce a domain-specific uniformity loss that promotes uniformity within each source domain, thereby enhancing the learning of domain-invariant features. Extensive experimental results demonstrate that BAU effectively exploits the advantages of data augmentation, which previous studies could not fully utilize, and achieves state-of-the-art performance without requiring complex training procedures. The code is available at https://github.com/yoonkicho/BAU.", "keywords": "re-identification;domain generalization;image retrieval", "primary_area": "machine_vision", "supplementary_material": "", "author": "Yoonki Cho;Jaeyoon Kim;Woo Jae Kim;Junsik Jung;Sung-eui Yoon", "authorids": "~Yoonki_Cho1;~Jaeyoon_Kim1;~Woo_Jae_Kim1;~Junsik_Jung1;~Sung-eui_Yoon1", "gender": ";;M;M;M", "homepage": "https://yoonkicho.github.io/;;https://wkim97.github.io/;;http://sglab.kaist.ac.kr/~sungeui/", "dblp": "308/7049;;317/4885;254/8149;96/4574", "google_scholar": "BaNgLL0AAAAJ;;TlCQ1DYAAAAJ;https://scholar.google.co.kr/citations?user=FWJg--IAAAAJ;https://scholar.google.com.tw/citations?user=uLQzQW4AAAAJ", "orcid": ";;;;", "linkedin": ";;woo-jae-kim-ba4861273/;;", "or_profile": "~Yoonki_Cho1;~Jaeyoon_Kim1;~Woo_Jae_Kim1;~Junsik_Jung1;~Sung-eui_Yoon1", "aff": "KAIST;;Korea Advanced Institute of Science & Technology;KAIST;KAIST", "aff_domain": "kaist.ac.kr;;kaist.ac.kr;kaist.ac.kr;kaist.edu", "position": "PhD student;;PhD student;PhD student;Professor", "bibtex": "@inproceedings{\ncho2024generalizable,\ntitle={Generalizable Person Re-identification via Balancing Alignment and Uniformity},\nauthor={Yoonki Cho and Jaeyoon Kim and Woo Jae Kim and Junsik Jung and Sung-eui Yoon},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=j25WK4GEGH}\n}", "github": "", "reviewers": "fzqs;Ywj9;L6wV;4LRb", "pdf_size": 980731, "rating": "4;5;5;6", "confidence": "5;5;3;4", "soundness": "2;3;2;3", "novelty": "2;3;2;3", "presentation": "2;3;3;4", "wc_summary": "49;70;69;98", "wc_strengths": "26;30;57;143", "wc_weaknesses": "183;124;36;392", "wc_questions": "3;4;65;58", "wc_limitations": "38;13;78;16", "wc_review": "299;241;305;707", "wc_reply_reviewers": "75;25;21;20", "wc_reply_authors": "936;502;130;454", "reply_reviewers": "1;1;1;1", "reply_authors": "3;4;3;4", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 71.5, 17.44276354251241 ], "wc_strengths_avg": [ 64.0, 47.14339826529267 ], "wc_weaknesses_avg": [ 183.75, 131.11898222606825 ], "wc_questions_avg": [ 32.5, 29.107559155655768 ], "wc_limitations_avg": [ 36.25, 25.96512083545925 ], "wc_review_avg": [ 388.0, 185.86285266292455 ], "wc_reply_reviewers_avg": [ 35.25, 23.025800746119558 ], "wc_reply_authors_avg": [ 505.5, 286.790428710583 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.5, 0.5 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.4264014327112209, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:nc3m8y2YGIMJ:scholar.google.com/&scioq=Generalizable+Person+Re-identification+via+Balancing+Alignment+and+Uniformity&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "kaist.ac.kr;;kaist.ac.kr;kaist.ac.kr;kaist.edu", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Accurate and Steady Inertial Pose Estimation through Sequence Structure Learning and Modulation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93980", "id": "j2hzCTqbF0", "proceeding": "", "pdf": "https://openreview.net/pdf?id=j2hzCTqbF0", "openreview": "https://openreview.net/forum?id=j2hzCTqbF0", "poster": "", "project": "", "author_site": "Yinghao Wu, chaoran wang, Lu Yin, Shihui Guo, Yipeng Qin", "tldr": "", "abstract": "Transformer models excel at capturing long-range dependencies in sequential data, but lack explicit mechanisms to leverage structural patterns inherent in fixed-length input sequences. \nIn this paper, we propose a novel sequence structure learning and modulation approach that endows Transformers with the ability to model and utilize such fixed-sequence structural properties for improved performance on inertial pose estimation tasks.\nSpecifically, our method introduces a Sequence Structure Module (SSM) that utilizes structural information of fixed-length inertial sensor readings to adjust the input features of transformers.\nSuch structural information can either be acquired by learning or specified based on users' prior knowledge.\nTo justify the prospect of our approach, we show that i) injecting spatial structural information of IMUs/joints learned from data improves accuracy, while ii) injecting temporal structural information based on smooth priors reduces jitter (i.e., improves steadiness), in a spatial-temporal transformer solution for inertial pose estimation.\nExtensive experiments across multiple benchmark datasets demonstrate the superiority of our approach against state-of-the-art methods and has the potential to advance the design of the transformer architecture for fixed-length sequences.", "keywords": "Inertial Measurement Units (IMUs);Pose Estimation;Transformer", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "/attachment/842d7f94fd172788c9bb024744d4c0f3bbc75c81.zip", "author": "Yinghao Wu;chaoran wang;Lu Yin;Shihui Guo;Yipeng Qin", "authorids": "~Yinghao_Wu3;~chaoran_wang1;~Lu_Yin5;~Shihui_Guo1;~Yipeng_Qin1", "gender": "M;M;M;M;", "homepage": "https://www.humanplus.xyz/;;https://www.humanplus.xyz/;http://www.guoshihui.net;https://profiles.cardiff.ac.uk/staff/qiny16", "dblp": ";;;;169/5516", "google_scholar": ";;;https://scholar.google.jp/citations?user=RPAVxiAAAAAJ;ojgWPpgAAAAJ", "orcid": ";0009-0007-7891-5699;;;0000-0002-1551-9126", "linkedin": ";;;;", "or_profile": "~Yinghao_Wu3;~chaoran_wang1;~Lu_Yin5;~Shihui_Guo1;~Yipeng_Qin1", "aff": "Xiamen University;Xiamen University;Chongqing University of Post and Telecommunications;Xiamen University;Cardiff University", "aff_domain": "xmu.edu.cn;xmu.edu.cn;cqupt.edu.cn;xmu.edu.cn;cardiff.ac.uk", "position": "MS student;MS student;Undergrad student;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nwu2024accurate,\ntitle={Accurate and Steady Inertial Pose Estimation through Sequence Structure Learning and Modulation},\nauthor={Yinghao Wu and chaoran wang and Lu Yin and Shihui Guo and Yipeng Qin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=j2hzCTqbF0}\n}", "github": "", "reviewers": "bDZ5;eYY8;8LBr;2q9D", "pdf_size": 5264160, "rating": "4;5;6;7", "confidence": "5;3;3;4", "soundness": "2;2;3;3", "novelty": "2;2;3;3", "presentation": "2;2;2;4", "wc_summary": "138;49;111;92", "wc_strengths": "140;14;83;43", "wc_weaknesses": "69;97;115;128", "wc_questions": "48;149;46;30", "wc_limitations": "8;6;1;17", "wc_review": "403;315;356;310", "wc_reply_reviewers": "0;33;98;0", "wc_reply_authors": "0;24;38;0", "reply_reviewers": "0;1;1;0", "reply_authors": "1;2;2;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 97.5, 32.42298567374695 ], "wc_strengths_avg": [ 70.0, 47.25991959366838 ], "wc_weaknesses_avg": [ 102.25, 22.128883839904805 ], "wc_questions_avg": [ 68.25, 47.14008379288268 ], "wc_limitations_avg": [ 8.0, 5.787918451395113 ], "wc_review_avg": [ 346.0, 37.43661309466977 ], "wc_reply_reviewers_avg": [ 32.75, 40.00859282704154 ], "wc_reply_authors_avg": [ 15.5, 16.27114009527298 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.40451991747794525, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4038176974189819577&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "xmu.edu.cn;xmu.edu.cn;cqupt.edu.cn;xmu.edu.cn;cardiff.ac.uk", "author_num": 5, "aff_unique_index": "0;0;1;0;2", "aff_unique_norm": "Xiamen University;Chongqing University of Post and Telecommunications;Cardiff University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.xmu.edu.cn;http://www.cqupt.edu.cn;https://www.cardiff.ac.uk", "aff_unique_abbr": "XMU;CQUPT;Cardiff", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "China;United Kingdom" }, { "title": "Kernel Language Entropy: Fine-grained Uncertainty Quantification for LLMs from Semantic Similarities", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93979", "id": "j2wCrWmgMX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=j2wCrWmgMX", "openreview": "https://openreview.net/forum?id=j2wCrWmgMX", "poster": "/media/PosterPDFs/NeurIPS%202024/93979.png?t=1733734207.6523933", "project": "", "author_site": "Alexander Nikitin, Jannik Kossen, Yarin Gal, Pekka Marttinen", "tldr": "", "abstract": "Uncertainty quantification in Large Language Models (LLMs) is crucial for applications where safety and reliability are important. In particular, uncertainty can be used to improve the trustworthiness of LLMs by detecting factually incorrect model responses, commonly called hallucinations. Critically, one should seek to capture the model's semantic uncertainty, i.e., the uncertainty over the meanings of LLM outputs, rather than uncertainty over lexical or syntactic variations that do not affect answer correctness.\nTo address this problem, we propose Kernel Language Entropy (KLE), a novel method for uncertainty estimation in white- and black-box LLMs. KLE defines positive semidefinite unit trace kernels to encode the semantic similarities of LLM outputs and quantifies uncertainty using the von Neumann entropy. It considers pairwise semantic dependencies between answers (or semantic clusters), providing more fine-grained uncertainty estimates than previous methods based on hard clustering of answers. We theoretically prove that KLE generalizes the previous state-of-the-art method called semantic entropy and empirically demonstrate that it improves uncertainty quantification performance across multiple natural language generation datasets and LLM architectures.", "keywords": "uncertainty quantification;LLMs", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Alexander V Nikitin;Jannik Kossen;Yarin Gal;Pekka Marttinen", "authorids": "~Alexander_V_Nikitin1;~Jannik_Kossen2;~Yarin_Gal1;~Pekka_Marttinen1", "gender": "Not Specified;Unspecified;;M", "homepage": "https://anikitin.me;https://jlko.eu;http://www.cs.ox.ac.uk/people/yarin.gal/website//;https://users.ics.aalto.fi/~pemartti/", "dblp": "320/6937;250/2339;67/9076;32/894", "google_scholar": ";i1FIOV0AAAAJ;https://scholar.google.co.uk/citations?user=SIayDoQAAAAJ;id47-5cAAAAJ", "orcid": ";;;0000-0001-7078-7927", "linkedin": ";;;", "or_profile": "~Alexander_V_Nikitin1;~Jannik_Kossen2;~Yarin_Gal1;~Pekka_Marttinen1", "aff": "Aalto University;University of Oxford;University of Oxford;Aalto University", "aff_domain": "aalto.fi;oxford.ac.uk;ox.ac.uk;aalto.fi", "position": "PhD student;PhD student;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nnikitin2024kernel,\ntitle={Kernel Language Entropy: Fine-grained Uncertainty Quantification for {LLM}s from Semantic Similarities},\nauthor={Alexander V Nikitin and Jannik Kossen and Yarin Gal and Pekka Marttinen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=j2wCrWmgMX}\n}", "github": "", "reviewers": "S5q9;Raee;tEe6;DkpB", "pdf_size": 1558766, "rating": "4;6;7;7", "confidence": "4;2;5;4", "soundness": "3;4;3;3", "novelty": "2;3;3;3", "presentation": "2;3;4;2", "wc_summary": "171;62;96;26", "wc_strengths": "111;88;65;28", "wc_weaknesses": "217;73;47;237", "wc_questions": "271;156;27;61", "wc_limitations": "1;25;6;5", "wc_review": "771;404;241;357", "wc_reply_reviewers": "89;11;21;34", "wc_reply_authors": "417;0;0;0", "reply_reviewers": "2;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 88.75, 53.55079364491249 ], "wc_strengths_avg": [ 73.0, 30.651264247988205 ], "wc_weaknesses_avg": [ 143.5, 84.30154209740175 ], "wc_questions_avg": [ 128.75, 94.76385123030828 ], "wc_limitations_avg": [ 9.25, 9.283722313813572 ], "wc_review_avg": [ 443.25, 198.3083142482937 ], "wc_reply_reviewers_avg": [ 38.75, 30.13614938906429 ], "wc_reply_authors_avg": [ 104.25, 180.56629668905546 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.18731716231633877, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11268977261436486694&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "aalto.fi;oxford.ac.uk;ox.ac.uk;aalto.fi", "author_num": 4, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Aalto University;University of Oxford", "aff_unique_dep": ";", "aff_unique_url": "https://www.aalto.fi;https://www.ox.ac.uk", "aff_unique_abbr": "Aalto;Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "Finland;United Kingdom" }, { "title": "Are Large Language Models Good Statisticians?", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97546", "id": "j4CRWz418M", "proceeding": "", "pdf": "https://openreview.net/pdf?id=j4CRWz418M", "openreview": "https://openreview.net/forum?id=j4CRWz418M", "poster": "/media/PosterPDFs/NeurIPS%202024/97546.png?t=1730448650.753446", "project": "", "author_site": "Yizhang Zhu, Shiyin Du, Boyan Li, Yuyu Luo, Nan Tang", "tldr": "", "abstract": "Large Language Models (LLMs) have demonstrated impressive capabilities across a range of scientific tasks including mathematics, physics, and chemistry. Despite their successes, the effectiveness of LLMs in handling complex statistical tasks remains systematically under-explored. To bridge this gap, we introduce StatQA, a new benchmark designed for statistical analysis tasks. StatQA comprises 11,623 examples tailored to evaluate LLMs' proficiency in specialized statistical tasks and their applicability assessment capabilities, particularly for hypothesis testing methods. We systematically experiment with representative LLMs using various prompting strategies and show that even state-of-the-art models such as GPT-4o achieve a best performance of only 64.83%, indicating significant room for improvement. Notably, while open-source LLMs (e.g. LLaMA-3) show limited capability, those fine-tuned ones exhibit marked improvements, outperforming all in-context learning-based methods (e.g. GPT-4o). Moreover, our comparative human experiments highlight a striking contrast in error types between LLMs and humans: LLMs primarily make applicability errors, whereas humans mostly make statistical task confusion errors. This divergence highlights distinct areas of proficiency and deficiency, suggesting that combining LLM and human expertise could lead to complementary strengths, inviting further investigation into their collaborative potential. Our source code and data are available at https://statqa.github.io/.", "keywords": "statistical question answering;statistical reasoning;benchmark evaluation;large language model", "primary_area": "", "supplementary_material": "", "author": "Yizhang Zhu;Shiyin Du;Boyan Li;Yuyu Luo;Nan Tang", "authorids": "~Yizhang_Zhu1;~Shiyin_Du2;~Boyan_Li2;~Yuyu_Luo1;~Nan_Tang3", "gender": "M;;M;M;M", "homepage": "https://derrickzhuyz.github.io/;https://github.com/dushiyin;https://github.com/BugMaker-Boyan;https://luoyuyu.vip/;https://nantang.github.io/", "dblp": "359/8583;;;185/9921.html;27/104-1", "google_scholar": "eAkUamUAAAAJ;;https://scholar.google.cz/citations?user=RZ2oElwAAAAJ;FAjYJkQAAAAJ;", "orcid": "0009-0004-5496-5008;;0009-0009-8391-4687;0000-0001-9530-3327;", "linkedin": ";;;;", "or_profile": "~Yizhang_Zhu1;~Shiyin_Du2;~Boyan_Li2;~Yuyu_Luo1;~Nan_Tang3", "aff": "The Hong Kong University of Science and Technology (Guangzhou);The Hong Kong University of Science and Technology (Guangzhou);Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;HKUST(GZ)", "aff_domain": "hkust-gz.edu.cn;hkust-gz.edu.cn;hkust-gz.edu.cn;ust.hk;hkust-gz.edu.cn", "position": "MS student;MS student;Intern;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nzhu2024are,\ntitle={Are Large Language Models Good Statisticians?},\nauthor={Yizhang Zhu and Shiyin Du and Boyan Li and Yuyu Luo and Nan Tang},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=j4CRWz418M}\n}", "github": "", "reviewers": "F2bD;jPG1;8PyK;f7Y3;u58N", "pdf_size": 1158388, "rating": "5;6;7;7;9", "confidence": "3;4;3;3;4", "wc_summary_and_contributions": "63;88;67;132;233", "wc_strengths": "14;54;80;85;147", "wc_improvement": "19;74;42;181;171", "wc_limitations": "27;6;65;11;67", "wc_correctness": "10;4;90;14;38", "wc_clarity": "12;4;93;23;47", "wc_relation_to_prior_work": "32;1;122;26;1", "wc_documentation": "3;1;49;15;4", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "181;233;609;488;709", "wc_reply_reviewers": "0;47;50;23;36", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;2;1;1;1", "reply_authors": "1;3;1;1;1", "rating_avg": [ 6.8, 1.32664991614216 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_and_contributions_avg": [ 116.6, 63.14617961523879 ], "wc_strengths_avg": [ 76.0, 43.51091817004095 ], "wc_improvement_avg": [ 97.4, 66.58708583501759 ], "wc_limitations_avg": [ 35.2, 26.095210288480146 ], "wc_correctness_avg": [ 31.2, 31.587339235839412 ], "wc_clarity_avg": [ 35.8, 32.05869616812262 ], "wc_relation_to_prior_work_avg": [ 36.4, 44.63451579215349 ], "wc_documentation_avg": [ 14.4, 17.97331355092878 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 444.0, 206.43449324180298 ], "wc_reply_reviewers_avg": [ 31.2, 18.258148865643527 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.4, 0.8000000000000002 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.43082021842766455, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5474692788141686046&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "hkust-gz.edu.cn;hkust-gz.edu.cn;hkust-gz.edu.cn;ust.hk;hkust-gz.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Hong Kong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.ust.hk", "aff_unique_abbr": "HKUST", "aff_campus_unique_index": "0;0;1;1;0", "aff_campus_unique": "Guangzhou;Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Stress-Testing Long-Context Language Models with Lifelong ICL and Task Haystack", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97545", "id": "j6PTT6NB2O", "proceeding": "", "pdf": "https://openreview.net/pdf?id=j6PTT6NB2O", "openreview": "https://openreview.net/forum?id=j6PTT6NB2O", "poster": "/media/PosterPDFs/NeurIPS%202024/97545.png?t=1731704802.3293543", "project": "", "author_site": "Xiaoyue Xu, Qinyuan Ye, Xiang Ren", "tldr": "", "abstract": "We introduce Lifelong ICL, a problem setting that challenges long-context language models (LMs) to learn a sequence of language tasks through in-context learning (ICL). We further introduce Task Haystack, an evaluation suite dedicated to assessing and diagnosing how long-context LMs utilizes contexts in Lifelong ICL. When given a task instruction and test inputs, long-context LMs are expected\nto leverage the relevant demonstrations in the Lifelong ICL prompt, avoid distraction and interference from other tasks, and achieve test accuracies that are not significantly worse than those of the Single-task ICL baseline.\n\nTask Haystack draws inspiration from the widely-adopted \u201cneedle-in-a-haystack\u201d (NIAH) evaluation, but presents distinct new challenges. It requires models (1) to utilize the contexts at a deeper level, rather than resorting to simple copying and pasting; (2) to navigate through long streams of evolving topics and tasks, proxying the complexities and dynamism of contexts in real-world scenarios. Additionally, Task Haystack inherits the controllability of NIAH, providing model developers with tools and visualizations to identify model vulnerabilities effectively.\n\nWe benchmark 14 long-context LMs using Task Haystack, finding that frontier models like GPT-4o still struggle with the setting, failing on 15% of cases on average. Most open-weight models further lack behind by a large margin, with failure rates reaching up to 61%. In our controlled analysis, we identify factors such as distraction and recency bias as contributors to these failure cases. Further, performance declines when task instructions are paraphrased at test time or when ICL demonstrations are repeated excessively, raising concerns about the robustness, instruction understanding, and true context utilization of long-context LMs. We release our code and data to encourage future research that investigates and addresses these limitations.", "keywords": "long-context models;lifelong learning;evaluation;benchmark;robustness;in-context learning", "primary_area": "", "supplementary_material": "/attachment/d84991acf01152399ab8d90c3732040f008f33b3.zip", "author": "Xiaoyue Xu;Qinyuan Ye;Xiang Ren", "authorids": "~Xiaoyue_Xu1;~Qinyuan_Ye1;~Xiang_Ren1", "gender": ";F;M", "homepage": "https://xiaoyue2002.github.io;http://yeqy.xyz/;https://shanzhenren.github.io/", "dblp": ";239/5731;36/360-1", "google_scholar": ";g230ERwAAAAJ;_moJlrIAAAAJ", "orcid": ";;", "linkedin": ";;xren7", "or_profile": "~Xiaoyue_Xu1;~Qinyuan_Ye1;~Xiang_Ren1", "aff": "Tsinghua University;University of Southern California;University of Southern California", "aff_domain": "tsinghua.edu.cn;usc.edu;usc.edu", "position": "Undergrad student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nxu2024stresstesting,\ntitle={Stress-Testing Long-Context Language Models with Lifelong {ICL} and Task Haystack},\nauthor={Xiaoyue Xu and Qinyuan Ye and Xiang Ren},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=j6PTT6NB2O}\n}", "github": "", "reviewers": "PEya;seb7;6Dct;mKcD", "pdf_size": 1152284, "rating": "6;6;7;7", "confidence": "3;4;4;3", "wc_summary_and_contributions": "53;47;57;52", "wc_strengths": "6;27;43;59", "wc_improvement": "6;76;348;43", "wc_limitations": "1;5;12;31", "wc_correctness": "1;3;14;5", "wc_clarity": "1;6;5;5", "wc_relation_to_prior_work": "1;1;190;19", "wc_documentation": "1;1;1;15", "wc_additional_feedback": "1;1;1;1", "wc_review": "71;167;671;230", "wc_reply_reviewers": "233;0;104;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "2;0;1;0", "reply_authors": "2;1;4;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "wc_summary_and_contributions_avg": [ 52.25, 3.5619517121937516 ], "wc_strengths_avg": [ 33.75, 19.613452016409553 ], "wc_improvement_avg": [ 118.25, 134.93771711423014 ], "wc_limitations_avg": [ 12.25, 11.519006033508273 ], "wc_correctness_avg": [ 5.75, 4.968651728587948 ], "wc_clarity_avg": [ 4.25, 1.920286436967152 ], "wc_relation_to_prior_work_avg": [ 52.75, 79.58132632722327 ], "wc_documentation_avg": [ 4.5, 6.06217782649107 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 284.75, 230.07648184897127 ], "wc_reply_reviewers_avg": [ 84.25, 95.802857473042 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=667885094496878429&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "tsinghua.edu.cn;usc.edu;usc.edu", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Tsinghua University;University of Southern California", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.usc.edu", "aff_unique_abbr": "THU;USC", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;1;1", "aff_country_unique": "China;United States" }, { "title": "Does Worst-Performing Agent Lead the Pack? Analyzing Agent Dynamics in Unified Distributed SGD", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93978", "id": "j6Zsoj544N", "proceeding": "", "pdf": "https://openreview.net/pdf?id=j6Zsoj544N", "openreview": "https://openreview.net/forum?id=j6Zsoj544N", "poster": "", "project": "", "author_site": "Jie Hu, Yi-Ting Ma, Do-Young Eun", "tldr": "", "abstract": "Distributed learning is essential to train machine learning algorithms across *heterogeneous* agents while maintaining data privacy. We conduct an asymptotic analysis of Unified Distributed SGD (UD-SGD), exploring a variety of communication patterns, including decentralized SGD and local SGD within Federated Learning (FL), as well as the increasing communication interval in the FL setting. In this study, we assess how different sampling strategies, such as *i.i.d.* sampling, shuffling, and Markovian sampling, affect the convergence speed of UD-SGD by considering the impact of agent dynamics on the limiting covariance matrix as described in the Central Limit Theorem (CLT). Our findings not only support existing theories on linear speedup and asymptotic network independence, but also theoretically and empirically show how efficient sampling strategies employed by individual agents contribute to overall convergence in UD-SGD. Simulations reveal that a few agents using highly efficient sampling can achieve or surpass the performance of the majority employing moderately improved strategies, providing new insights beyond traditional analyses focusing on the worst-performing agent.", "keywords": "Distributed Optimization;Agent Dynamics;Federated Learning;Central Limit Theorem;Efficient Sampling", "primary_area": "learning_theory", "supplementary_material": "", "author": "Jie Hu;Yi-Ting Ma;Do Young Eun", "authorids": "~Jie_Hu7;~Yi-Ting_Ma1;~Do_Young_Eun1", "gender": "M;M;M", "homepage": ";;https://people.engr.ncsu.edu/dyeun/", "dblp": ";;", "google_scholar": "8uBqtwEAAAAJ;https://scholar.google.com/citations?hl=zh-TW;https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-0165-3738;;", "linkedin": "jie-hu-ncsu;;", "or_profile": "~Jie_Hu7;~Yi-Ting_Ma1;~Do_Young_Eun1", "aff": "North Carolina State University;North Carolina State University;North Carolina State University", "aff_domain": "ncsu.edu;ncsu.edu;ncsu.edu", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nhu2024does,\ntitle={Does Worst-Performing Agent Lead the Pack? Analyzing Agent Dynamics in Unified Distributed {SGD}},\nauthor={Jie Hu and Yi-Ting Ma and Do Young Eun},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=j6Zsoj544N}\n}", "github": "", "reviewers": "WSiF;ds2S;Evbf", "pdf_size": 1289534, "rating": "6;7;7", "confidence": "3;3;4", "soundness": "3;3;4", "novelty": "3;3;4", "presentation": "3;3;3", "wc_summary": "62;97;39", "wc_strengths": "77;145;52", "wc_weaknesses": "70;158;136", "wc_questions": "58;4;5", "wc_limitations": "1;4;4", "wc_review": "268;408;236", "wc_reply_reviewers": "13;21;16", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 66.0, 23.84673283002656 ], "wc_strengths_avg": [ 91.33333333333333, 39.296593010364425 ], "wc_weaknesses_avg": [ 121.33333333333333, 37.392809766347085 ], "wc_questions_avg": [ 22.333333333333332, 25.223445883190152 ], "wc_limitations_avg": [ 3.0, 1.4142135623730951 ], "wc_review_avg": [ 304.0, 74.69047239552489 ], "wc_reply_reviewers_avg": [ 16.666666666666668, 3.2998316455372216 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:XbXXUelI3XYJ:scholar.google.com/&scioq=Does+Worst-Performing+Agent+Lead+the+Pack%3F+Analyzing+Agent+Dynamics+in+Unified+Distributed+SGD&hl=en&as_sdt=0,44", "gs_version_total": 4, "email": "ncsu.edu;ncsu.edu;ncsu.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "North Carolina State University", "aff_unique_dep": "", "aff_unique_url": "https://www.ncsu.edu", "aff_unique_abbr": "NCSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Agent Planning with World Knowledge Model", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93977", "id": "j6kJSS9O6I", "proceeding": "", "pdf": "https://openreview.net/pdf?id=j6kJSS9O6I", "openreview": "https://openreview.net/forum?id=j6kJSS9O6I", "poster": "/media/PosterPDFs/NeurIPS%202024/93977.png?t=1729585588.839695", "project": "", "author_site": "Shuofei Qiao, Runnan Fang, Ningyu Zhang, Yuqi Zhu, Xiang Chen, Shumin Deng, Yong Jiang, Pengjun Xie, Fei Huang, Huajun Chen", "tldr": "", "abstract": "Recent endeavors towards directly using large language models (LLMs) as agent models to execute interactive planning tasks have shown commendable results. Despite their achievements, however, they still struggle with brainless trial-and-error in global planning and generating hallucinatory actions in local planning due to their poor understanding of the \"real\" physical world. Imitating humans' mental world knowledge model which provides global prior knowledge before the task and maintains local dynamic knowledge during the task, in this paper, we introduce parametric World Knowledge Model (WKM) to facilitate agent planning. Concretely, we steer the agent model to self-synthesize knowledge from both expert and sampled trajectories. Then we develop WKM, providing prior task knowledge to guide the global planning and dynamic state knowledge to assist the local planning. Experimental results on three real-world simulated datasets with Mistral-7B, Gemma-7B, and Llama-3-8B demonstrate that our method can achieve superior performance compared to various strong baselines. Besides, we analyze to illustrate that our WKM can effectively alleviate the blind trial-and-error and hallucinatory action issues, providing strong support for the agent's understanding of the world. Other interesting findings include: 1) our instance-level task knowledge can generalize better to unseen tasks, 2) weak WKM can guide strong agent model planning, and 3) unified WKM training has promising potential for further development.", "keywords": "world knowledge model;agent planning;large language models", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/4146761db80a315a40ddd9af105151035f272294.zip", "author": "Shuofei Qiao;Runnan Fang;Ningyu Zhang;Yuqi Zhu;Xiang Chen;Shumin Deng;Yong Jiang;Pengjun Xie;Fei Huang;Huajun Chen", "authorids": "~Shuofei_Qiao1;~Runnan_Fang1;~Ningyu_Zhang1;~Yuqi_Zhu2;~Xiang_Chen5;~Shumin_Deng1;~Yong_Jiang1;~Pengjun_Xie2;~Fei_Huang2;~Huajun_Chen1", "gender": ";;M;F;M;F;M;M;M;M", "homepage": ";;https://person.zju.edu.cn/en/ningyu;https://github.com/LesileZ;https://faculty.nuaa.edu.cn/ChenXiang/zh_CN/index.htm;https://231sm.github.io/;http://jiangyong.site/;;https://sites.google.com/view/fei-huang;", "dblp": ";;139/4181-1.html;;64/3062-16;213/1853;;212/1755.html;h/FeiHuang.html;94/5089", "google_scholar": ";;xQDOPvsAAAAJ;2DIYcgMAAAAJ;pXivdn8AAAAJ;3am3hL4AAAAJ;sxXZWQQAAAAJ;;9r98PpoAAAAJ;", "orcid": ";;0000-0002-1970-0678;0009-0009-4041-2601;0000-0002-2594-0600;;;;;", "linkedin": ";;ningyuzhang/;yuqi-zhu/;;;;;fei-huang-cas-cmu;", "or_profile": "~Shuofei_Qiao1;~Runnan_Fang1;~Ningyu_Zhang1;~Yuqi_Zhu2;~Xiang_Chen5;~Shumin_Deng1;~Yong_Jiang1;~Pengjun_Xie2;~Fei_Huang2;~Huajun_Chen1", "aff": ";;Zhejiang University;Zhejiang University;Zhejiang University;National University of Singapore;Tongyi Lab;Alibaba Group;Alibaba Group US;Zhejiang University", "aff_domain": ";;zju.edu.cn;zju.edu.cn;zju.edu.cn;nus.edu.sg;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;zju.edu.cn", "position": ";;Associate Professor;PhD student;PhD student;Postdoc;Researcher;Researcher;Senior Research Director;Full Professor", "bibtex": "@inproceedings{\nqiao2024agent,\ntitle={Agent Planning with World Knowledge Model},\nauthor={Shuofei Qiao and Runnan Fang and Ningyu Zhang and Yuqi Zhu and Xiang Chen and Shumin Deng and Yong Jiang and Pengjun Xie and Fei Huang and Huajun Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=j6kJSS9O6I}\n}", "github": "", "reviewers": "vJYv;8XFL;pvgu;TdHL", "pdf_size": 4995138, "rating": "4;6;6;7", "confidence": "3;4;4;2", "soundness": "2;3;4;3", "novelty": "2;3;3;3", "presentation": "2;4;3;3", "wc_summary": "71;86;89;62", "wc_strengths": "20;44;117;53", "wc_weaknesses": "26;250;84;35", "wc_questions": "296;4;2;69", "wc_limitations": "1;1;5;7", "wc_review": "414;385;297;226", "wc_reply_reviewers": "763;24;0;80", "wc_reply_authors": "1351;27;0;60", "reply_reviewers": "3;1;0;1", "reply_authors": "4;2;1;2", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 77.0, 11.022703842524301 ], "wc_strengths_avg": [ 58.5, 35.864327680858594 ], "wc_weaknesses_avg": [ 98.75, 90.07045853108554 ], "wc_questions_avg": [ 92.75, 120.40219059468976 ], "wc_limitations_avg": [ 3.5, 2.598076211353316 ], "wc_review_avg": [ 330.5, 74.13669806512831 ], "wc_reply_reviewers_avg": [ 216.75, 316.7107315832541 ], "wc_reply_authors_avg": [ 359.5, 572.8370187060191 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.20751433915982243, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4717588484739326620&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": ";;zju.edu.cn;zju.edu.cn;zju.edu.cn;nus.edu.sg;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;zju.edu.cn", "author_num": 10, "aff_unique_index": "0;0;0;1;2;3;3;0", "aff_unique_norm": "Zhejiang University;National University of Singapore;Tongyi Lab;Alibaba Group", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.zju.edu.cn;https://www.nus.edu.sg;;https://www.alibaba.com", "aff_unique_abbr": "ZJU;NUS;;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;3;0", "aff_country_unique": "China;Singapore;;United States" }, { "title": "Instructor-inspired Machine Learning for Robust Molecular Property Prediction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93976", "id": "j7sw0nXLjZ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=j7sw0nXLjZ", "openreview": "https://openreview.net/forum?id=j7sw0nXLjZ", "poster": "/media/PosterPDFs/NeurIPS%202024/93976.png?t=1729394103.3490574", "project": "", "author_site": "Fang Wu, Shuting Jin, Siyuan Li, Stan Z. Li", "tldr": "", "abstract": "Machine learning catalyzes a revolution in chemical and biological science. However, its efficacy is heavily dependent on the availability of labeled data, and annotating biochemical data is extremely laborious. To surmount this data sparsity challenge, we present an instructive learning algorithm named InstructMol to measure pseudo-labels' reliability and help the target model leverage large-scale unlabeled data. InstructMol does not require transferring knowledge between multiple domains, which avoids the potential gap between the pretraining and fine-tuning stages. We demonstrated the high accuracy of InstructMol on several real-world molecular datasets and out-of-distribution (OOD) benchmarks.", "keywords": "Molecular Representations;Semi-supervised Learning", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "", "author": "Fang Wu;Shuting Jin;Siyuan Li;Stan Z. Li", "authorids": "~Fang_Wu1;~Shuting_Jin1;~Siyuan_Li6;~Stan_Z._Li2", "gender": ";F;M;", "homepage": ";;https://lupin1998.github.io/;", "dblp": ";;63/9705-2;", "google_scholar": ";https://scholar.google.com/citations?view_op=search_authors;https://scholar.google.com/citations?hl=zh-CN;", "orcid": ";0000-0002-8113-9367;0000-0001-6806-2468;", "linkedin": ";;https://www.linkedin.cn/incareer/in/siyuan-li-lupin1998/;", "or_profile": "~Fang_Wu1;~Shuting_Jin1;~Siyuan_Li6;~Stan_Z._Li2", "aff": ";Wuhan University of Science and Technology;Alibaba Group;", "aff_domain": ";wust.edu.cn;alibaba-inc.com;", "position": ";Lecturer;Intern;", "bibtex": "@inproceedings{\nwu2024instructorinspired,\ntitle={Instructor-inspired Machine Learning for Robust Molecular Property Prediction},\nauthor={Fang Wu and Shuting Jin and Siyuan Li and Stan Z. Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=j7sw0nXLjZ}\n}", "github": "", "reviewers": "dbD2;pmns;nboj;73Xa;UbNR", "pdf_size": 3326690, "rating": "5;6;6;6;7", "confidence": "4;4;4;3;4", "soundness": "3;3;3;3;4", "novelty": "3;3;3;2;3", "presentation": "2;3;3;3;4", "wc_summary": "78;80;68;35;87", "wc_strengths": "33;39;40;15;38", "wc_weaknesses": "182;78;35;22;101", "wc_questions": "105;113;62;14;108", "wc_limitations": "1;6;34;22;3", "wc_review": "399;316;239;108;337", "wc_reply_reviewers": "64;74;47;29;23", "wc_reply_authors": "0;0;0;41;65", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;2;2", "rating_avg": [ 6.0, 0.6324555320336759 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 69.6, 18.33684814792335 ], "wc_strengths_avg": [ 33.0, 9.316651759081692 ], "wc_weaknesses_avg": [ 83.6, 56.873895593672856 ], "wc_questions_avg": [ 80.4, 37.88720100508878 ], "wc_limitations_avg": [ 13.2, 12.765578717786358 ], "wc_review_avg": [ 279.8, 99.97079573555469 ], "wc_reply_reviewers_avg": [ 47.4, 19.5816240388789 ], "wc_reply_authors_avg": [ 21.2, 27.051062825700583 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5503926270098793721&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": ";wust.edu.cn;alibaba-inc.com;", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Wuhan University of Science and Technology;Alibaba Group", "aff_unique_dep": ";", "aff_unique_url": "http://www.wust.edu.cn;https://www.alibaba.com", "aff_unique_abbr": "WUST;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Query-Based Adversarial Prompt Generation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93975", "id": "jBf3eIyD2x", "proceeding": "", "pdf": "https://openreview.net/pdf?id=jBf3eIyD2x", "openreview": "https://openreview.net/forum?id=jBf3eIyD2x", "poster": "", "project": "", "author_site": "Jonathan Hayase, Ema Borevkovi\u0107, Nicholas Carlini, Florian Tramer, Milad Nasr", "tldr": "", "abstract": "Recent work has shown it is possible to construct adversarial examples that cause aligned language models to emit harmful strings or perform harmful behavior.\nExisting attacks work either in the white-box setting (with full access to the model weights), or through _transferability_: the phenomenon that adversarial examples crafted on one model often remain effective on other models.\nWe improve on prior work with a _query-based_ attack that leverages API access to a remote language model to construct adversarial examples that cause the model to emit harmful strings with (much) higher probability than with transfer-only attacks.\nWe validate our attack on GPT-3.5 and OpenAI's safety classifier; we can cause GPT-3.5 to emit harmful strings that current transfer attacks fail at, and we can evade the OpenAI and Llama Guard safety classifiers with nearly 100% probability.", "keywords": "adversarial examples;large language models;black box", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/b9f4dcaafa711ee2e4e643d493bd3755b9601af4.zip", "author": "Jonathan Hayase;Ema Borevkovi\u0107;Nicholas Carlini;Florian Tram\u00e8r;Milad Nasr", "authorids": "~Jonathan_Hayase2;~Ema_Borevkovi\u01071;~Nicholas_Carlini1;~Florian_Tram\u00e8r1;~Milad_Nasr2", "gender": "M;F;;;M", "homepage": "https://jhayase.github.io/;;http://nicholas.carlini.com;https://people.cs.umass.edu/~milad/;http://floriantramer.com", "dblp": "244/9599;;145/1806;;158/7224", "google_scholar": "Zw-l1d8AAAAJ;;;k6-nvDAAAAAJ;https://scholar.google.ch/citations?user=ijH0-a8AAAAJ", "orcid": "0000-0002-3757-6586;;;;", "linkedin": "jonathan-hayase-5ab849128;ema-borevkovic-0a97b32b2/;;;", "or_profile": "~Jonathan_Hayase2;~Ema_Borevkovi\u01071;~Nicholas_Carlini1;~Milad_Nasr2;~Florian_Tramer1", "aff": "University of Washington;University of Zagreb;Google;Google;ETHZ - ETH Zurich", "aff_domain": "washington.edu;unizg.hr;google.com;google.com;ethz.ch", "position": "PhD student;Undergrad student;Researcher;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nhayase2024querybased,\ntitle={Query-Based Adversarial Prompt Generation},\nauthor={Jonathan Hayase and Ema Borevkovi{\\'c} and Nicholas Carlini and Florian Tram{\\`e}r and Milad Nasr},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=jBf3eIyD2x}\n}", "github": "", "reviewers": "67w9;CNWy;A7Jt;ReRa", "pdf_size": 408600, "rating": "5;5;6;7", "confidence": "4;4;4;4", "soundness": "2;1;3;4", "novelty": "2;2;2;3", "presentation": "3;3;3;3", "wc_summary": "39;157;71;82", "wc_strengths": "63;61;56;175", "wc_weaknesses": "343;120;166;75", "wc_questions": "2;26;9;38", "wc_limitations": "1;2;1;20", "wc_review": "448;366;303;390", "wc_reply_reviewers": "142;14;0;22", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.5, 1.118033988749895 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 87.25, 43.25722483007896 ], "wc_strengths_avg": [ 88.75, 49.861683685972736 ], "wc_weaknesses_avg": [ 176.0, 101.64398654126077 ], "wc_questions_avg": [ 18.75, 14.13108276106258 ], "wc_limitations_avg": [ 6.0, 8.093207028119323 ], "wc_review_avg": [ 376.75, 51.977759666996036 ], "wc_reply_reviewers_avg": [ 44.5, 56.83968683939066 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5649596156528356126&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "washington.edu;unizg.hr;google.com;google.com;ethz.ch", "author_num": 5, "aff_unique_index": "0;1;2;2;3", "aff_unique_norm": "University of Washington;University of Zagreb;Google;ETH Zurich", "aff_unique_dep": ";;Google;", "aff_unique_url": "https://www.washington.edu;https://www.unizg.hr;https://www.google.com;https://www.ethz.ch", "aff_unique_abbr": "UW;UNIZG;Google;ETHZ", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;0;0;2", "aff_country_unique": "United States;Croatia;Switzerland" }, { "title": "INDICT: Code Generation with Internal Dialogues of Critiques for Both Security and Helpfulness", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93974", "id": "jCMYIUwprx", "proceeding": "", "pdf": "https://openreview.net/pdf?id=jCMYIUwprx", "openreview": "https://openreview.net/forum?id=jCMYIUwprx", "poster": "/media/PosterPDFs/NeurIPS%202024/93974.png?t=1731655437.5131545", "project": "", "author_site": "Hung Le, Doyen Sahoo, Yingbo Zhou, Caiming Xiong, Silvio Savarese", "tldr": "", "abstract": "Large language models (LLMs) for code are typically trained to align with natural language instructions to closely follow their intentions and requirements. However, in many practical scenarios, it becomes increasingly challenging for these models to navigate the intricate boundary between helpfulness and safety, especially against highly complex yet potentially malicious instructions. In this work, we introduce INDICT: a new framework that empowers LLMs with Internal Dialogues of Critiques for both safety and helpfulness guidance. The internal dialogue is a dual cooperative system between a safety-driven critic and a helpfulness-driven critic. Each critic provides analysis against the given task and corresponding generated response, equipped with external knowledge queried through relevant code snippets and tools like web search and code interpreter. We engage the dual critic system in both code generation stage as well as code execution stage, providing preemptive and post-hoc guidance respectively to LLMs. We evaluated INDICT on 8 diverse tasks across 8 programming languages from 5 benchmarks, using LLMs from 7B to 70B parameters. We observed that our approach can provide an advanced level of critiques of both safety and helpfulness analysis, significantly improving the quality of output codes (+10% absolute improvements in all models).", "keywords": "code generation;safety;helpfulness;code security;large language model;critic;autonomous agent", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Hung Le;Doyen Sahoo;Yingbo Zhou;Caiming Xiong;Silvio Savarese", "authorids": "~Hung_Le2;~Doyen_Sahoo1;~Yingbo_Zhou1;~Caiming_Xiong1;~Silvio_Savarese1", "gender": "M;;M;M;M", "homepage": "https://www.linkedin.com/in/doyensahoo/?originalSubdomain=sg;;http://cmxiong.com/;;https://sites.google.com/view/henryle2018/home", "dblp": "151/3155;72/8614;80/7282;50/3578;", "google_scholar": "https://scholar.google.com.sg/citations?hl=en;H_6RQ7oAAAAJ;vaSdahkAAAAJ;ImpbxLsAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;;", "linkedin": "doyensahoo/?originalSubdomain=sg;yingbozhou/;caiming-xiong-150a1417;;hungle2012", "or_profile": "~Doyen_Sahoo1;~Yingbo_Zhou1;~Caiming_Xiong1;~Silvio_Savarese1;~Henry_Le1", "aff": "SalesForce.com;Salesforce Research;Salesforce Research;Stanford University;Salesforce Research", "aff_domain": "salesforce.com;salesforce.com;salesforce.com;stanford.edu;salesforce.com", "position": "Researcher;Research Scientist;Research Scientist;Adjunct Professor;Researcher", "bibtex": "@inproceedings{\nle2024indict,\ntitle={{INDICT}: Code Generation with Internal Dialogues of Critiques for Both Security and Helpfulness},\nauthor={Hung Le and Doyen Sahoo and Yingbo Zhou and Caiming Xiong and Silvio Savarese},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=jCMYIUwprx}\n}", "github": "", "reviewers": "ZHvf;GLux;rV62;SdG3", "pdf_size": 3488830, "rating": "5;5;6;6", "confidence": "5;5;3;2", "soundness": "2;2;3;4", "novelty": "2;2;2;2", "presentation": "3;2;3;3", "wc_summary": "92;213;171;68", "wc_strengths": "64;101;33;46", "wc_weaknesses": "220;421;17;181", "wc_questions": "70;50;151;58", "wc_limitations": "57;18;5;1", "wc_review": "503;803;377;354", "wc_reply_reviewers": "52;122;29;31", "wc_reply_authors": "24;161;66;73", "reply_reviewers": "1;1;1;1", "reply_authors": "2;4;3;3", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.75, 1.299038105676658 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 136.0, 58.553394436189606 ], "wc_strengths_avg": [ 61.0, 25.583197610932064 ], "wc_weaknesses_avg": [ 209.75, 143.79738349497185 ], "wc_questions_avg": [ 82.25, 40.32601517631019 ], "wc_limitations_avg": [ 20.25, 22.128883839904805 ], "wc_review_avg": [ 509.25, 178.83005200468963 ], "wc_reply_reviewers_avg": [ 58.5, 37.75248336202534 ], "wc_reply_authors_avg": [ 81.0, 49.84475900232641 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.0, 0.7071067811865476 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.9622504486493763, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18275703701613764637&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "salesforce.com;salesforce.com;salesforce.com;stanford.edu;salesforce.com", "author_num": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Salesforce;Stanford University", "aff_unique_dep": ";", "aff_unique_url": "https://www.salesforce.com;https://www.stanford.edu", "aff_unique_abbr": "Salesforce;Stanford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Con4m: Context-aware Consistency Learning Framework for Segmented Time Series Classification", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93973", "id": "jCPufQaHvb", "proceeding": "", "pdf": "https://openreview.net/pdf?id=jCPufQaHvb", "openreview": "https://openreview.net/forum?id=jCPufQaHvb", "poster": "/media/PosterPDFs/NeurIPS%202024/93973.png?t=1732180648.1711104", "project": "", "author_site": "Junru Chen, Tianyu Cao, Jing Xu, Jiahe Li, Zhilong Chen, Tao Xiao, YANG YANG", "tldr": "", "abstract": "Time Series Classification (TSC) encompasses two settings: classifying entire sequences or classifying segmented subsequences. The raw time series for segmented TSC usually contain Multiple classes with Varying Duration of each class (MVD). Therefore, the characteristics of MVD pose unique challenges for segmented TSC, yet have been largely overlooked by existing works. Specifically, there exists a natural temporal dependency between consecutive instances (segments) to be classified within MVD. However, mainstream TSC models rely on the assumption of independent and identically distributed (i.i.d.), focusing on independently modeling each segment. Additionally, annotators with varying expertise may provide inconsistent boundary labels, leading to unstable performance of noise-free TSC models. To address these challenges, we first formally demonstrate that valuable contextual information enhances the discriminative power of classification instances. Leveraging the contextual priors of MVD at both the data and label levels, we propose a novel consistency learning framework Con4m, which effectively utilizes contextual information more conducive to discriminating consecutive segments in segmented TSC tasks, while harmonizing inconsistent boundary labels for training. Extensive experiments across multiple datasets validate the effectiveness of Con4m in handling segmented TSC tasks on MVD. The source code is available at https://github.com/MrNobodyCali/Con4m.", "keywords": "Time series classification;Segmented time series;Label consistency learning", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Junru Chen;Tianyu Cao;Jing Xu;Jiahe Li;Zhilong Chen;Tao Xiao;Yang Yang", "authorids": "~Junru_Chen1;~Tianyu_Cao2;~Jing_Xu13;~Jiahe_Li5;~Zhilong_Chen2;~Tao_Xiao1;~Yang_Yang35", "gender": "M;;;F;M;;M", "homepage": "https://mrnobodycali.github.io/;https://springcty.github.io/;;https://erikaqvq.github.io/;https://cnblogs.com/Apocrypha;;http://yangy.org", "dblp": "212/6753;;;;;;", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;;https://scholar.google.com/citations?hl=zh-CN;;;", "orcid": ";;0009-0005-3420-4356;;;0009-0000-5146-3265;0000-0002-5058-4417", "linkedin": ";tianyu-cao-98234b267/;;;;;", "or_profile": "~Junru_Chen1;~Tianyu_Cao2;~Jing_Xu13;~Jiahe_Li5;~Zhilong_Chen2;~Tao_Xiao1;~Yang_Yang35", "aff": "Zhejiang University;Zhejiang University;State Grid Power Supply Co. Ltd.;Zhejiang University;Zhejiang University;State Grid Power Supply Co. Ltd.;Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn;zj.sgcc.com.cn;zju.edu.cn;zju.edu.cn;zj.sgcc.com.cn;zju.edu.cn", "position": "PhD student;Undergrad student;Researcher;Undergrad student;Undergrad student;Researcher;Associate Professor", "bibtex": "@inproceedings{\nchen2024conm,\ntitle={Con4m: Context-aware Consistency Learning Framework for Segmented Time Series Classification},\nauthor={Junru Chen and Tianyu Cao and Jing Xu and Jiahe Li and Zhilong Chen and Tao Xiao and Yang Yang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=jCPufQaHvb}\n}", "github": "", "reviewers": "Gi9s;7tXB;uhXL;gPgT", "pdf_size": 9782538, "rating": "5;5;6;8", "confidence": "5;4;4;4", "soundness": "2;3;3;3", "novelty": "2;3;3;4", "presentation": "2;3;4;3", "wc_summary": "24;98;72;73", "wc_strengths": "44;91;134;136", "wc_weaknesses": "505;143;167;129", "wc_questions": "2;110;173;140", "wc_limitations": "37;82;24;62", "wc_review": "612;524;570;540", "wc_reply_reviewers": "236;124;16;12", "wc_reply_authors": "550;650;55;30", "reply_reviewers": "2;2;1;1", "reply_authors": "3;5;2;2", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 66.75, 26.789690181112583 ], "wc_strengths_avg": [ 101.25, 37.62562291843153 ], "wc_weaknesses_avg": [ 236.0, 155.90060936378666 ], "wc_questions_avg": [ 106.25, 64.18089669052623 ], "wc_limitations_avg": [ 51.25, 22.398381637966615 ], "wc_review_avg": [ 561.5, 33.507461855533016 ], "wc_reply_reviewers_avg": [ 97.0, 91.97282207261013 ], "wc_reply_authors_avg": [ 321.25, 281.12219318296445 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 3.0, 1.224744871391589 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.4714045207910316, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9691363371746288754&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "zju.edu.cn;zju.edu.cn;zj.sgcc.com.cn;zju.edu.cn;zju.edu.cn;zj.sgcc.com.cn;zju.edu.cn", "author_num": 7, "aff_unique_index": "0;0;1;0;0;1;0", "aff_unique_norm": "Zhejiang University;State Grid Corporation of China", "aff_unique_dep": ";", "aff_unique_url": "https://www.zju.edu.cn;http://www.sgcc.com.cn", "aff_unique_abbr": "ZJU;SGCC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "MV2Cyl: Reconstructing 3D Extrusion Cylinders from Multi-View Images", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93972", "id": "jDF2ZXI8AX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=jDF2ZXI8AX", "openreview": "https://openreview.net/forum?id=jDF2ZXI8AX", "poster": "/media/PosterPDFs/NeurIPS%202024/93972.png?t=1732004468.4359403", "project": "", "author_site": "Eunji Hong, Minh Hieu Nguyen, Mikaela Angelina Uy, Minhyuk Sung", "tldr": "", "abstract": "We present MV2Cyl, a novel method for reconstructing 3D from 2D multi-view images, not merely as a field or raw geometry but as a sketch-extrude CAD. Extracting extrusion cylinders from raw 3D geometry has been extensively researched in computer vision, while the processing of 3D data through neural networks has remained a bottleneck. Since 3D scans are generally accompanied by multi-view images, leveraging 2D convolutional neural networks allows these images to be exploited as a rich source for extracting extrusion cylinder information. However, we observe that extracting only the surface information of the extrudes and utilizing it results in suboptimal outcomes due to the challenges in the occlusion and surface segmentation. By synergizing with the extracted base curve information, we achieve the optimal reconstruction result with the best accuracy in 2D sketch and extrude parameter estimation. Our experiments, comparing our method with previous work that takes a raw 3D point cloud as input, demonstrate the effectiveness of our approach by taking advantage of multi-view images.", "keywords": "Extrusion;Multi-View Images;3D Reconstruction;Computer-Aided Design", "primary_area": "machine_vision", "supplementary_material": "", "author": "Eunji Hong;Nguyen Minh Hieu;Mikaela Angelina Uy;Minhyuk Sung", "authorids": "~Eunji_Hong1;~Nguyen_Minh_Hieu1;~Mikaela_Angelina_Uy1;~Minhyuk_Sung1", "gender": ";;F;M", "homepage": "https://github.com/eunzihong;http://min-hieu.github.io;http://mikacuy.github.io;https://mhsung.github.io/", "dblp": ";;218/5350;171/6792", "google_scholar": ";;PcX1zXwAAAAJ;PcIYMp4AAAAJ", "orcid": ";0000-0003-3446-0315;;", "linkedin": ";;;mhsung", "or_profile": "~Eunji_Hong1;~Nguyen_Minh_Hieu1;~Mikaela_Angelina_Uy1;~Minhyuk_Sung1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Stanford University;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr;stanford.edu;kaist.ac.kr", "position": "MS student;Undergrad student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nhong2024mvcyl,\ntitle={{MV}2Cyl: Reconstructing 3D Extrusion Cylinders from Multi-View Images},\nauthor={Eunji Hong and Nguyen Minh Hieu and Mikaela Angelina Uy and Minhyuk Sung},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=jDF2ZXI8AX}\n}", "github": "", "reviewers": "7GUw;mRwh;AtoY;Ajxc;dRdb", "pdf_size": 41022019, "rating": "5;5;5;5;7", "confidence": "3;4;4;4;5", "soundness": "3;3;3;3;3", "novelty": "2;2;2;3;4", "presentation": "3;3;4;4;4", "wc_summary": "55;172;109;425;165", "wc_strengths": "38;57;160;43;211", "wc_weaknesses": "231;231;269;312;238", "wc_questions": "3;114;38;166;200", "wc_limitations": "28;34;9;24;80", "wc_review": "355;608;585;970;894", "wc_reply_reviewers": "0;72;78;111;41", "wc_reply_authors": "0;105;82;179;21", "reply_reviewers": "0;1;1;1;1", "reply_authors": "1;2;2;2;2", "rating_avg": [ 5.4, 0.7999999999999999 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6, 0.8 ], "presentation_avg": [ 3.6, 0.4898979485566356 ], "wc_summary_avg": [ 185.2, 127.15722551235538 ], "wc_strengths_avg": [ 101.8, 70.49368766066931 ], "wc_weaknesses_avg": [ 256.2, 31.237157361066004 ], "wc_questions_avg": [ 104.2, 74.45374402943078 ], "wc_limitations_avg": [ 35.0, 23.96664348631239 ], "wc_review_avg": [ 682.4, 223.47313037589106 ], "wc_reply_reviewers_avg": [ 60.4, 37.49453293481598 ], "wc_reply_authors_avg": [ 77.4, 63.68233664054735 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.8, 0.4000000000000001 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.7905694150420948, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14071778381588232025&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "kaist.ac.kr;kaist.ac.kr;stanford.edu;kaist.ac.kr", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Stanford University", "aff_unique_dep": ";", "aff_unique_url": "https://www.kaist.ac.kr;https://www.stanford.edu", "aff_unique_abbr": "KAIST;Stanford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "South Korea;United States" }, { "title": "Multi-Object 3D Grounding with Dynamic Modules and Language-Informed Spatial Attention", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93971", "id": "jFWl9EWZ7z", "proceeding": "", "pdf": "https://openreview.net/pdf?id=jFWl9EWZ7z", "openreview": "https://openreview.net/forum?id=jFWl9EWZ7z", "poster": "/media/PosterPDFs/NeurIPS%202024/93971.png?t=1731300129.4725697", "project": "", "author_site": "Haomeng Zhang, Chiao-An Yang, Raymond A. Yeh", "tldr": "", "abstract": "Multi-object 3D Grounding involves locating 3D boxes based on a given query phrase from a point cloud. It is a challenging and significant task that has numerous applications in visual understanding, human-computer interaction, and robotics. To tackle this challenge, we introduce D-LISA, a two-stage approach that incorporates three innovations. First, a dynamic vision module that enables a variable and learnable number of box proposals. Second, a dynamic camera positioning that extracts features for each proposal. Third, a language-informed spatial attention module that better reasons over the proposals to output the final prediction. Empirically, experiments show that our method outperforms the state-of-the-art methods on multi-object 3D grounding by 12.8% (absolute) and is competitive in single-object 3D grounding.", "keywords": "3D grounding;vision and language", "primary_area": "machine_vision", "supplementary_material": "/attachment/ca568c48e4ed9a856f096acff29913e6e90c1dc7.zip", "author": "Haomeng Zhang;Chiao An Yang;Raymond A. Yeh", "authorids": "~Haomeng_Zhang1;~Chiao_An_Yang1;~Raymond_A._Yeh1", "gender": "M;M;", "homepage": "https://haomengz.github.io/;https://ca-joe-yang.github.io/;", "dblp": "274/9115;312/7959;", "google_scholar": "K3FNat4AAAAJ;f1fIHBYAAAAJ;", "orcid": ";0000-0003-1947-1331;", "linkedin": ";;", "or_profile": "~Haomeng_Zhang1;~Chiao_An_Yang1;~Raymond_A._Yeh1", "aff": "Purdue University;Purdue University;", "aff_domain": "purdue.edu;purdue.edu;", "position": "PhD student;PhD student;", "bibtex": "@inproceedings{\nzhang2024multiobject,\ntitle={Multi-Object 3D Grounding with Dynamic Modules and Language-Informed Spatial Attention},\nauthor={Haomeng Zhang and Chiao An Yang and Raymond A. Yeh},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=jFWl9EWZ7z}\n}", "github": "", "reviewers": "WVQA;rjTU;W21s;su8L", "pdf_size": 2872506, "rating": "3;4;5;5", "confidence": "5;4;4;4", "soundness": "3;2;3;3", "novelty": "2;3;2;3", "presentation": "3;2;4;2", "wc_summary": "147;83;79;70", "wc_strengths": "29;28;42;52", "wc_weaknesses": "111;143;133;112", "wc_questions": "17;25;5;2", "wc_limitations": "4;19;15;1", "wc_review": "308;298;274;237", "wc_reply_reviewers": "0;168;54;0", "wc_reply_authors": "0;323;52;0", "reply_reviewers": "0;1;1;0", "reply_authors": "1;2;2;1", "rating_avg": [ 4.25, 0.82915619758885 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 94.75, 30.531745773866255 ], "wc_strengths_avg": [ 37.75, 9.908960591303208 ], "wc_weaknesses_avg": [ 124.75, 13.718144918318949 ], "wc_questions_avg": [ 12.25, 9.256754290786809 ], "wc_limitations_avg": [ 9.75, 7.46240577829965 ], "wc_review_avg": [ 279.25, 27.343874999714288 ], "wc_reply_reviewers_avg": [ 55.5, 68.59118019104206 ], "wc_reply_authors_avg": [ 93.75, 134.04919805802643 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:8c1of-4csNgJ:scholar.google.com/&scioq=Multi-Object+3D+Grounding+with+Dynamic+Modules+and+Language-Informed+Spatial+Attention&hl=en&as_sdt=0,47", "gs_version_total": 3, "email": "purdue.edu;purdue.edu;", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Purdue University", "aff_unique_dep": "", "aff_unique_url": "https://www.purdue.edu", "aff_unique_abbr": "Purdue", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Generalization Bound and Learning Methods for Data-Driven Projections in Linear Programming", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93970", "id": "jHh804fZ5l", "proceeding": "", "pdf": "https://openreview.net/pdf?id=jHh804fZ5l", "openreview": "https://openreview.net/forum?id=jHh804fZ5l", "poster": "/media/PosterPDFs/NeurIPS%202024/93970.png?t=1730942716.4598696", "project": "", "author_site": "Shinsaku Sakaue, Taihei Oki", "tldr": "", "abstract": "How to solve high-dimensional linear programs (LPs) efficiently is a fundamental question.\nRecently, there has been a surge of interest in reducing LP sizes using *random projections*, which can accelerate solving LPs independently of improving LP solvers. \nThis paper explores a new direction of *data-driven projections*, which use projection matrices learned from data instead of random projection matrices.\nGiven training data of $n$-dimensional LPs, we learn an $n\\times k$ projection matrix with $n > k$. \nWhen addressing a future LP instance, we reduce its dimensionality from $n$ to $k$ via the learned projection matrix, solve the resulting LP to obtain a $k$-dimensional solution, and apply the learned matrix to it to recover an $n$-dimensional solution.\n\nOn the theoretical side, a natural question is: how much data is sufficient to ensure the quality of recovered solutions? We address this question based on the framework of *data-driven algorithm design*, which connects the amount of data sufficient for establishing generalization bounds to the *pseudo-dimension* of performance metrics. We obtain an $\\tilde{\\mathrm{O}}(nk^2)$ upper bound on the pseudo-dimension, where $\\tilde{\\mathrm{O}}$ compresses logarithmic factors. We also provide an $\\Omega(nk)$ lower bound, implying our result is tight up to an $\\tilde{\\mathrm{O}}(k)$ factor. \n\nOn the practical side, we explore two simple methods for learning projection matrices: PCA- and gradient-based methods. While the former is relatively efficient, the latter can sometimes achieve better solution quality. Experiments demonstrate that learning projection matrices from data is indeed beneficial: it leads to significantly higher solution quality than the existing random projection while greatly reducing the time for solving LPs.", "keywords": "data-driven algorithm design;linear programming;dimensionality reduction;generalization bound", "primary_area": "learning_theory", "supplementary_material": "/attachment/d94d6ac77fe398144aed4300c3c318bfe999c829.zip", "author": "Shinsaku Sakaue;Taihei Oki", "authorids": "~Shinsaku_Sakaue1;~Taihei_Oki1", "gender": "M;M", "homepage": "https://ssakaue.github.io/;https://www.opt.mist.i.u-tokyo.ac.jp/~oki/", "dblp": "183/6350;185/4435", "google_scholar": "https://scholar.google.co.jp/citations?user=9oTbrmEAAAAJ;o-QxZEYAAAAJ", "orcid": ";0000-0002-6862-9484", "linkedin": ";", "or_profile": "~Shinsaku_Sakaue1;~Taihei_Oki1", "aff": "NTT;The University of Tokyo", "aff_domain": "ntt.co.jp;u-tokyo.ac.jp", "position": "Researcher;Project Research Associate", "bibtex": "@inproceedings{\nsakaue2024generalization,\ntitle={Generalization Bound and Learning Methods for Data-Driven Projections in Linear Programming},\nauthor={Shinsaku Sakaue and Taihei Oki},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=jHh804fZ5l}\n}", "github": "", "reviewers": "nj7Z;hECG;E5r3;vPup", "pdf_size": 621845, "rating": "5;6;6;6", "confidence": "3;4;3;4", "soundness": "3;3;3;3", "novelty": "2;2;2;3", "presentation": "3;3;3;3", "wc_summary": "119;167;86;81", "wc_strengths": "14;87;52;105", "wc_weaknesses": "101;59;62;372", "wc_questions": "6;77;44;1", "wc_limitations": "6;1;1;1", "wc_review": "246;391;245;560", "wc_reply_reviewers": "17;17;14;17", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 113.25, 34.29559009552103 ], "wc_strengths_avg": [ 64.5, 34.831738400487566 ], "wc_weaknesses_avg": [ 148.5, 130.09707913708132 ], "wc_questions_avg": [ 32.0, 30.84639363037436 ], "wc_limitations_avg": [ 2.25, 2.165063509461097 ], "wc_review_avg": [ 360.5, 129.59648914997658 ], "wc_reply_reviewers_avg": [ 16.25, 1.299038105676658 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:w53ZYp2jTEwJ:scholar.google.com/&scioq=Generalization+Bound+and+Learning+Methods+for+Data-Driven+Projections+in+Linear+Programming&hl=en&as_sdt=0,33", "gs_version_total": 6, "email": "ntt.co.jp;u-tokyo.ac.jp", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "NTT Corporation;University of Tokyo", "aff_unique_dep": ";", "aff_unique_url": "https://www.ntt.co.jp;https://www.u-tokyo.ac.jp", "aff_unique_abbr": "NTT;UTokyo", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Japan" }, { "title": "Sparsity-Agnostic Linear Bandits with Adaptive Adversaries", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93969", "id": "jIabKyXOTt", "proceeding": "", "pdf": "https://openreview.net/pdf?id=jIabKyXOTt", "openreview": "https://openreview.net/forum?id=jIabKyXOTt", "poster": "/media/PosterPDFs/NeurIPS%202024/93969.png?t=1731756748.3189445", "project": "", "author_site": "Tianyuan Jin, Kyoungseok Jang, Nicol\u00f2 Cesa-Bianchi", "tldr": "", "abstract": "We study stochastic linear bandits where, in each round, the learner receives a set of actions (i.e., feature vectors), from which it chooses an element and obtains a stochastic reward. The expected reward is a fixed but unknown linear function of the chosen action. We study \\emph{sparse} regret bounds, that depend on the number $S$ of non-zero coefficients in the linear reward function. Previous works focused on the case where $S$ is known, or the action sets satisfy additional assumptions. In this work, we obtain the first sparse regret bounds that hold when $S$ is unknown and the action sets are adversarially generated. Our techniques combine online to confidence set conversions with a novel randomized model selection approach over a hierarchy of nested confidence sets. When $S$ is known, our analysis recovers state-of-the-art bounds for adversarial action sets. We also show that a variant of our approach, using Exp3 to dynamically select the confidence sets, can be used to improve the empirical performance of stochastic linear bandits while enjoying a regret bound with optimal dependence on the time horizon.", "keywords": "Regret bounds;online learning;sparse linear regression;model selection", "primary_area": "bandits", "supplementary_material": "/attachment/00020db68aeeb4da45d6507e03589a3de8f0ff8b.zip", "author": "Tianyuan Jin;Kyoungseok Jang;Nicol\u00f2 Cesa-Bianchi", "authorids": "~Tianyuan_Jin1;~Kyoungseok_Jang1;~Nicol\u00f2_Cesa-Bianchi1", "gender": "M;M;M", "homepage": "https://tianyuanjin.github.io/;https://jajajang.github.io;http://cesa-bianchi.di.unimi.it/", "dblp": "208/2335;296/8698;c/NicoloCesaBianchi", "google_scholar": "3e5kmjsAAAAJ;hDqIvzAAAAAJ;https://scholar.google.it/citations?user=BWADJUkAAAAJ", "orcid": ";0009-0002-6689-4601;0000-0001-8477-4748", "linkedin": ";kyoungseok-jang-856440219/;", "or_profile": "~Tianyuan_Jin1;~Kyoungseok_Jang1;~Nicol\u00f2_Cesa-Bianchi1", "aff": "National University of Singapore;New York University;University of Milan", "aff_domain": "nus.edu.sg;nyu.edu;unimi.it", "position": "PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\njin2024sparsityagnostic,\ntitle={Sparsity-Agnostic Linear Bandits with Adaptive Adversaries},\nauthor={Tianyuan Jin and Kyoungseok Jang and Nicol{\\`o} Cesa-Bianchi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=jIabKyXOTt}\n}", "github": "", "reviewers": "muo1;zJ6Y;9XZb;KeoW", "pdf_size": 740560, "rating": "5;5;6;6", "confidence": "3;4;4;3", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "3;1;3;3", "wc_summary": "66;58;52;127", "wc_strengths": "80;65;59;114", "wc_weaknesses": "62;174;135;96", "wc_questions": "69;2;20;149", "wc_limitations": "4;9;1;28", "wc_review": "281;308;267;514", "wc_reply_reviewers": "10;32;16;29", "wc_reply_authors": "47;44;25;25", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 75.75, 30.003124837256536 ], "wc_strengths_avg": [ 79.5, 21.33658829335187 ], "wc_weaknesses_avg": [ 116.75, 41.94862929822618 ], "wc_questions_avg": [ 60.0, 56.934172515282945 ], "wc_limitations_avg": [ 10.5, 10.5 ], "wc_review_avg": [ 342.5, 100.10619361458112 ], "wc_reply_reviewers_avg": [ 21.75, 9.065732182234372 ], "wc_reply_authors_avg": [ 35.25, 10.304731922762475 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16374742300657781549&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "nus.edu.sg;nyu.edu;unimi.it", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "National University of Singapore;New York University;University of Milan", "aff_unique_dep": ";;", "aff_unique_url": "https://www.nus.edu.sg;https://www.nyu.edu;https://www.unimi.it", "aff_unique_abbr": "NUS;NYU;UniMi", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2", "aff_country_unique": "Singapore;United States;Italy" }, { "title": "Autoregressive Image Diffusion: Generation of Image Sequence and Application in MRI", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93968", "id": "jIh4W7r0rn", "proceeding": "", "pdf": "https://openreview.net/pdf?id=jIh4W7r0rn", "openreview": "https://openreview.net/forum?id=jIh4W7r0rn", "poster": "/media/PosterPDFs/NeurIPS%202024/93968.png?t=1731247459.1041472", "project": "", "author_site": "Guanxiong Luo, Shoujin Huang, Martin Uecker", "tldr": "", "abstract": "Magnetic resonance imaging (MRI) is a widely used non-invasive imaging modality. However, a persistent challenge lies in balancing image quality with imaging speed. This trade-off is primarily constrained by k-space measurements, which traverse specific trajectories in the spatial Fourier domain (k-space). These measurements are often undersampled to shorten acquisition times, resulting in image artifacts and compromised quality. Generative models learn image distributions and can be used to reconstruct high-quality images from undersampled k-space data. In this work, we present the autoregressive image diffusion (AID) model for image sequences and use it to sample the posterior for accelerated MRI reconstruction. The algorithm incorporates both undersampled k-space and pre-existing information. Models trained with fastMRI dataset are evaluated comprehensively. The results show that the AID model can robustly generate sequentially coherent image sequences. In MRI applications, the AID can outperform the standard diffusion model and reduce hallucinations, due to the learned inter-image dependencies. The project code is available at https://github.com/mrirecon/aid.", "keywords": "Autoregressive models;Diffusion models;Inverse problems;Medical imaging;MRI", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "", "author": "Guanxiong Luo;Shoujin Huang;Martin Uecker", "authorids": "~Guanxiong_Luo1;~Shoujin_Huang1;~Martin_Uecker1", "gender": "M;M;M", "homepage": ";https://www.tugraz.at/institute/ibi/home;https://ggluo.github.io/", "dblp": ";;313/2198", "google_scholar": "pKeq354AAAAJ;r7BG-ZYAAAAJ;HuXdcKkAAAAJ", "orcid": ";0000-0002-8850-809X;", "linkedin": ";;guanxiong-luo/", "or_profile": "~Shoujin_Huang1;~Martin_Uecker1;~Luo_Guanxiong1", "aff": ";Technische Universit\u00e4t Graz;Georg-August Universit\u00e4t G\u00f6ttingen", "aff_domain": ";tugraz.at;uni-goettingen.de", "position": ";Full Professor;Postdoc", "bibtex": "@inproceedings{\nluo2024autoregressive,\ntitle={Autoregressive Image Diffusion: Generation of Image Sequence and Application in {MRI}},\nauthor={Guanxiong Luo and Shoujin Huang and Martin Uecker},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=jIh4W7r0rn}\n}", "github": "", "reviewers": "YPrH;oZEE;NC3G", "pdf_size": 19941060, "rating": "4;5;7", "confidence": "4;4;2", "soundness": "2;3;3", "novelty": "2;3;3", "presentation": "3;3;3", "wc_summary": "48;71;237", "wc_strengths": "19;53;237", "wc_weaknesses": "81;235;272", "wc_questions": "56;91;174", "wc_limitations": "76;12;295", "wc_review": "280;462;1215", "wc_reply_reviewers": "50;155;52", "wc_reply_authors": "240;115;68", "reply_reviewers": "1;1;1", "reply_authors": "3;3;2", "rating_avg": [ 5.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 118.66666666666667, 84.19949854693647 ], "wc_strengths_avg": [ 103.0, 95.76359781601079 ], "wc_weaknesses_avg": [ 196.0, 82.70832283795063 ], "wc_questions_avg": [ 107.0, 49.48400414948922 ], "wc_limitations_avg": [ 127.66666666666667, 121.17297095016235 ], "wc_review_avg": [ 652.3333333333334, 404.74381472180096 ], "wc_reply_reviewers_avg": [ 85.66666666666667, 49.032868794536405 ], "wc_reply_authors_avg": [ 141.0, 72.58558167202814 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.944911182523068, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16847769509697482148&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 7, "email": ";tugraz.at;uni-goettingen.de", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Technische Universit\u00e4t Graz;Georg-August Universit\u00e4t G\u00f6ttingen", "aff_unique_dep": ";", "aff_unique_url": "https://www.tugraz.at;https://www.uni-goettingen.de", "aff_unique_abbr": "TU Graz;GAU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Austria;Germany" }, { "title": "AMOR: A Recipe for Building Adaptable Modular Knowledge Agents Through Process Feedback", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93967", "id": "jImXgQEmX3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=jImXgQEmX3", "openreview": "https://openreview.net/forum?id=jImXgQEmX3", "poster": "/media/PosterPDFs/NeurIPS%202024/93967.png?t=1729666752.685714", "project": "", "author_site": "Jian Guan, Wei Wu, zujie wen, Peng Xu, Hongning Wang, Minlie Huang", "tldr": "", "abstract": "The notable success of large language models (LLMs) has sparked an upsurge in building language agents to complete various complex tasks. We present AMOR, an agent framework based on open-source LLMs, which reasons with external knowledge bases and adapts to specific domains through human supervision to the reasoning process. AMOR builds reasoning logic over a finite state machine (FSM)\nthat solves problems through autonomous executions and transitions over disentangled modules. This allows humans to provide direct feedback to the individual modules, and thus naturally forms process supervision. Based on this reasoning and feedback framework, we develop AMOR through two-stage fine-tuning: warm-up and adaptation. The former fine-tunes the LLM with examples automatically constructed from various public datasets, enabling AMOR to generalize across different knowledge environments, while the latter tailors AMOR to specific domains using process feedback. Extensive experiments across multiple domains demonstrate the advantage of AMOR to strong baselines, thanks to its FSM-based reasoning and process feedback mechanism. The code and data are publicly available at\nhttps://github.com/JianGuanTHU/AMOR.", "keywords": "Agent;Knowledge;Feedback-Driven Adaptation", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Jian Guan;Wei Wu;zujie wen;Peng Xu;Hongning Wang;Minlie Huang", "authorids": "~Jian_Guan1;~Wei_Wu1;~zujie_wen1;~Peng_Xu12;~Hongning_Wang1;~Minlie_Huang1", "gender": "M;M;M;M;M;M", "homepage": "https://jianguanthu.github.io/;https://sites.google.com/view/wei-wu-homepage;;;http://www.cs.virginia.edu/~hw5x/;http://coai.cs.tsinghua.edu.cn/hml", "dblp": "58/2489-2;95/6985-14;260/0351;;05/6545;", "google_scholar": "BWCDa8YAAAAJ;https://scholar.google.co.jp/citations?hl=en;vsZ4dK8AAAAJ;;qkdvKNoAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": "0000-0002-3597-0176;0000-0001-6079-7697;;;0000-0002-6524-9195;", "linkedin": ";;;peng-xu-5b0a5b11/;;", "or_profile": "~Jian_Guan1;~Wei_Wu1;~zujie_wen1;~Peng_Xu12;~Hongning_Wang1;~Minlie_Huang1", "aff": "Tsinghua University;Ant Research;Ant Group;Ant Technologies, US;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;antgroup.com;antgroup.com;antgroup.com;tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;Researcher;Researcher;Principal Researcher;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nguan2024amor,\ntitle={{AMOR}: A Recipe for Building Adaptable Modular Knowledge Agents Through Process Feedback},\nauthor={Jian Guan and Wei Wu and zujie wen and Peng Xu and Hongning Wang and Minlie Huang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=jImXgQEmX3}\n}", "github": "", "reviewers": "bbCe;CkwG;4AJ6;QRnu", "pdf_size": 2186049, "rating": "6;6;6;7", "confidence": "4;3;4;4", "soundness": "3;3;3;4", "novelty": "3;3;2;3", "presentation": "3;2;2;3", "wc_summary": "84;103;84;171", "wc_strengths": "74;74;90;46", "wc_weaknesses": "70;218;224;32", "wc_questions": "5;46;14;15", "wc_limitations": "27;19;16;1", "wc_review": "260;460;428;265", "wc_reply_reviewers": "0;0;21;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 110.5, 35.78058132562969 ], "wc_strengths_avg": [ 71.0, 15.84297951775486 ], "wc_weaknesses_avg": [ 136.0, 86.08135686662938 ], "wc_questions_avg": [ 20.0, 15.508062419270823 ], "wc_limitations_avg": [ 15.75, 9.41740410091868 ], "wc_review_avg": [ 353.25, 91.46959877467485 ], "wc_reply_reviewers_avg": [ 5.25, 9.093266739736606 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5272594705972743742&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "tsinghua.edu.cn;antgroup.com;antgroup.com;antgroup.com;tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 6, "aff_unique_index": "0;1;2;3;0;0", "aff_unique_norm": "Tsinghua University;Ant Research;Ant Group;Ant Technologies", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.antgroup.com;https://www.antgroup.com;", "aff_unique_abbr": "THU;Ant Research;Ant Group;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;0", "aff_country_unique": "China;United States" }, { "title": "MOTE-NAS: Multi-Objective Training-based Estimate for Efficient Neural Architecture Search", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93966", "id": "jKLyKeZfzv", "proceeding": "", "pdf": "https://openreview.net/pdf?id=jKLyKeZfzv", "openreview": "https://openreview.net/forum?id=jKLyKeZfzv", "poster": "/media/PosterPDFs/NeurIPS%202024/93966.png?t=1731658993.1790655", "project": "", "author_site": "Yuming Zhang, Jun Hsieh, Xin Li, Ming-Ching Chang, Chun-Chieh Lee, Kuo-Chin Fan", "tldr": "", "abstract": "Neural Architecture Search (NAS) methods seek effective optimization toward performance metrics regarding model accuracy and generalization while facing challenges regarding search costs and GPU resources. Recent Neural Tangent Kernel (NTK) NAS methods achieve remarkable search efficiency based on a training-free model estimate; however, they overlook the non-convex nature of the DNNs in the search process. In this paper, we develop Multi-Objective Training-based Estimate (MOTE) for efficient NAS, retaining search effectiveness and achieving the new state-of-the-art in the accuracy and cost trade-off. To improve NTK and inspired by the Training Speed Estimation (TSE) method, MOTE is designed to model the actual performance of DNNs from macro to micro perspective by draw loss landscape and convergence speed simultaneously. Using two reduction strategies, the MOTE is generated based on a reduced architecture and a reduced dataset. Inspired by evolutionary search, our iterative ranking-based, coarse-to-fine architecture search is highly effective. Experiments on NASBench-201 show MOTE-NAS achieves 94.32% accuracy on CIFAR-10, 72.81% on CIFAR-100, and 46.38% on ImageNet-16-120, outperforming NTK-based NAS approaches. An evaluation-free (EF) version of MOTE-NAS delivers high efficiency in only 5 minutes, delivering a model more accurate than KNAS.", "keywords": "neural architecture search;few-cost;training-related estimate.", "primary_area": "optimization_for_deep_networks", "supplementary_material": "/attachment/f9bf130f363005a1ba3fd5fa1dc3b831d87902b0.zip", "author": "Yuming Zhang;Jun Wei Hsieh;Xin Li;Ming-Ching Chang;Chun-Chieh Lee;Kuo-Chin Fan", "authorids": "~Yuming_Zhang2;~Jun_Wei_Hsieh1;~Xin_Li2;~Ming-Ching_Chang1;~Chun-Chieh_Lee1;~Kuo-Chin_Fan1", "gender": "M;M;M;M;M;M", "homepage": "https://github.com/D0352276;;;https://www.albany.edu/faculty/mchang2/;;http://fox1.csie.ncu.edu.tw/personnel/kcfan.htm", "dblp": ";83/5722.html;09/1365-5;21/4361;25/4934;f/KuoChinFan", "google_scholar": "https://scholar.google.com.tw/citations?user=9Ui4VOIAAAAJ;https://scholar.google.com.tw/citations?user=JldKbbEAAAAJ;gMBvzGoAAAAJ;a3-Gl8YAAAAJ;;", "orcid": "0009-0007-6675-0436;;0000-0003-2067-2763;0000-0001-9325-5341;;", "linkedin": ";;;mingchingchang/edit/forms/intro/new/?profileFormEntryPoint=PROFILE_SECTION;%E4%BF%8A%E5%82%91-%E6%9D%8E-7a580710a/;", "or_profile": "~Yuming_Zhang2;~Jun_Wei_Hsieh1;~Xin_Li2;~Ming-Ching_Chang1;~Chun-Chieh_Lee1;~Kuo-Chin_Fan1", "aff": "National Central University;National Chiao Tung University;State University of New York at Albany;State University of New York at Albany;National Central University;National Central University", "aff_domain": "ncu.edu.tw;nctu.edu.tw;albany.edu;albany.edu;ncu.edu.tw;ncu.edu.tw", "position": "PhD student;Full Professor;Full Professor;Assistant Professor;Postdoc;Full Professor", "bibtex": "@inproceedings{\nzhang2024motenas,\ntitle={{MOTE}-{NAS}: Multi-Objective Training-based Estimate for Efficient Neural Architecture Search},\nauthor={Yuming Zhang and Jun Wei Hsieh and Xin Li and Ming-Ching Chang and Chun-Chieh Lee and Kuo-Chin Fan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=jKLyKeZfzv}\n}", "github": "", "reviewers": "Ndgx;NBd3;33pQ;XpJH", "pdf_size": 2693575, "rating": "5;5;6;7", "confidence": "4;4;4;4", "soundness": "2;3;4;4", "novelty": "2;3;3;3", "presentation": "3;3;4;3", "wc_summary": "93;224;47;57", "wc_strengths": "53;43;33;42", "wc_weaknesses": "391;315;133;41", "wc_questions": "76;401;47;4", "wc_limitations": "57;1;32;11", "wc_review": "670;984;292;155", "wc_reply_reviewers": "40;330;190;0", "wc_reply_authors": "40;77;352;0", "reply_reviewers": "1;1;4;0", "reply_authors": "2;2;5;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 105.25, 70.66249004953052 ], "wc_strengths_avg": [ 42.75, 7.084313657652377 ], "wc_weaknesses_avg": [ 220.0, 139.5313584826006 ], "wc_questions_avg": [ 132.0, 157.40552722188633 ], "wc_limitations_avg": [ 25.25, 21.47527648250425 ], "wc_review_avg": [ 525.25, 325.15179147591977 ], "wc_reply_reviewers_avg": [ 140.0, 130.57564857200595 ], "wc_reply_authors_avg": [ 117.25, 138.24141022139494 ], "reply_reviewers_avg": [ 1.5, 1.5 ], "reply_authors_avg": [ 2.5, 1.5 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:-Ra4ywBNlSgJ:scholar.google.com/&scioq=MOTE-NAS:+Multi-Objective+Training-based+Estimate+for+Efficient+Neural+Architecture+Search&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "ncu.edu.tw;nctu.edu.tw;albany.edu;albany.edu;ncu.edu.tw;ncu.edu.tw", "author_num": 6, "aff_unique_index": "0;1;2;2;0;0", "aff_unique_norm": "National Central University;National Chiao Tung University;State University of New York", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ncu.edu.tw;https://www.nctu.edu.tw;https://www.albany.edu", "aff_unique_abbr": "NCU;NCTU;SUNY Albany", "aff_campus_unique_index": "0;0;1;1;0;0", "aff_campus_unique": "Taiwan;Albany", "aff_country_unique_index": "0;0;1;1;0;0", "aff_country_unique": "China;United States" }, { "id": "jKzLukkKZO", "title": "Learning to Control the Smoothness of GCN Features", "track": "main", "status": "Reject", "tldr": "", "abstract": "The pioneering work of Oono \\& Suzuki [ICLR, 2020] and Cai \\& Wang [arXiv:2006.13318] analyze the smoothness of graph convolutional network (GCN) features. Their results reveal an intricate empirical correlation between node classification accuracy and the ratio of smooth to non-smooth feature components. However, the optimal ratio that favors node classification is unknown, and the non-smooth features of deep GCN with ReLU or leaky ReLU activation function diminish. In this paper, we propose a new strategy to let GCN learn node features with a desired smoothness to enhance node classification. Our approach has three key steps: (1) We establish a geometric relationship between the input and output of ReLU or leaky ReLU. (2) Building on our geometric insights, we augment the message-passing process of graph convolutional layers (GCLs) with a learnable term to modulate the smoothness of node features with computational efficiency. (3) We investigate the achievable ratio between smooth and non-smooth feature components for GCNs with the augmented message passing scheme. Our extensive numerical results show that the augmented message passing remarkably improves node classification for GCN and some related models.", "keywords": "graph neural networks;activation function;smoothness of node features", "primary_area": "graph_neural_networks", "supplementary_material": "/attachment/5b0a26f56b7625359bbee229c9e16df45fd3d0fd.zip", "author": "Shih-Hsin Wang;Justin Baker;Cory D Hauck;Bao Wang", "authorids": "~Shih-Hsin_Wang1;~Justin_Baker1;~Cory_D_Hauck1;~Bao_Wang1", "gender": "M;;M;M", "homepage": ";;https://www.ornl.gov/staff-profile/cory-d-hauck;https://www.math.utah.edu/~bwang/index.html", "dblp": ";;;", "google_scholar": ";;;", "orcid": ";;;", "linkedin": "shih-hsin-sam-wang-9803671a5/;;;", "or_profile": "~Shih-Hsin_Wang1;~Justin_Baker1;~Cory_D_Hauck1;~Bao_Wang1", "aff": "University of Utah;;Oak Ridge National Laboratory;University of Utah", "aff_domain": "utah.edu;;ornl.gov;utah.edu", "position": "PhD student;;Researcher;Assistant Professor", "bibtex": "@misc{\nanonymous2024learning,\ntitle={Learning to Control the Smoothness of {GCN} Features},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=jKzLukkKZO}\n}", "github": "", "project": "", "reviewers": "6k42;RBEx;5MTH;DP5C;pS2J;49R3", "site": "https://openreview.net/forum?id=jKzLukkKZO", "pdf_size": 983623, "rating": "4;5;5;5;6;8", "confidence": "4;3;4;4;3;3", "soundness": "3;3;2;3;3;3", "novelty": "3;3;2;3;3;4", "presentation": "2;2;2;2;2;3", "wc_summary": "109;102;102;60;121;63", "wc_strengths": "18;219;42;76;44;35", "wc_weaknesses": "117;289;388;413;82;31", "wc_questions": "105;78;51;2;15;1", "wc_limitations": "41;42;102;1;61;1", "wc_review": "390;730;685;552;323;131", "wc_reply_reviewers": "0;28;98;73;23;0", "wc_reply_authors": "0;22;207;152;6;0", "reply_reviewers": "0;1;2;1;1;0", "reply_authors": "1;2;4;2;2;1", "rating_avg": [ 5.5, 1.2583057392117916 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.8333333333333335, 0.3726779962499649 ], "novelty_avg": [ 3.0, 0.5773502691896257 ], "presentation_avg": [ 2.1666666666666665, 0.3726779962499649 ], "wc_summary_avg": [ 92.83333333333333, 23.06090968621046 ], "wc_strengths_avg": [ 72.33333333333333, 67.81510811676768 ], "wc_weaknesses_avg": [ 220.0, 150.3373983190255 ], "wc_questions_avg": [ 42.0, 39.48839491968917 ], "wc_limitations_avg": [ 41.333333333333336, 34.93167935015754 ], "wc_review_avg": [ 468.5, 209.5493179818695 ], "wc_reply_reviewers_avg": [ 37.0, 36.58779395736598 ], "wc_reply_authors_avg": [ 64.5, 83.17802193688748 ], "reply_reviewers_avg": [ 0.8333333333333334, 0.6871842709362768 ], "reply_authors_avg": [ 2.0, 1.0 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.6622661785325218, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:l_pZEAVVvSMJ:scholar.google.com/&scioq=Learning+to+Control+the+Smoothness+of+GCN+Features&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Utah;Oak Ridge National Laboratory", "aff_unique_dep": ";", "aff_unique_url": "https://www.utah.edu;https://www.ornl.gov", "aff_unique_abbr": "Utah;ORNL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Exploring Behavior-Relevant and Disentangled Neural Dynamics with Generative Diffusion Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93965", "id": "jL0EsbfbAV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=jL0EsbfbAV", "openreview": "https://openreview.net/forum?id=jL0EsbfbAV", "poster": "", "project": "", "author_site": "Yule Wang, Chengrui Li, Weihan Li, Anqi Wu", "tldr": "", "abstract": "Understanding the neural basis of behavior is a fundamental goal in neuroscience. Current research in large-scale neuro-behavioral data analysis often relies on decoding models, which quantify behavioral information in neural data but lack details on behavior encoding. This raises an intriguing scientific question: \"how can we enable in-depth exploration of neural representations in behavioral tasks, revealing interpretable neural dynamics associated with behaviors\". However, addressing this issue is challenging due to the varied behavioral encoding across different brain regions and mixed selectivity at the population level. To tackle this limitation, our approach, named (\"BeNeDiff\"), first identifies a fine-grained and disentangled neural subspace using a behavior-informed latent variable model. It then employs state-of-the-art generative diffusion models to synthesize behavior videos that interpret the neural dynamics of each latent factor. We validate the method on multi-session datasets containing widefield calcium imaging recordings across the dorsal cortex. Through guiding the diffusion model to activate individual latent factors, we verify that the neural dynamics of latent factors in the disentangled neural subspace provide interpretable quantifications of the behaviors of interest. At the same time, the neural subspace in BeNeDiff demonstrates high disentanglement and neural reconstruction quality.", "keywords": "Neural Latent Discovery;Neural Behavior Analysis;Diffusion Models;Neuroscience", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "/attachment/75253245a700906c938e57dff00083353bc76606.zip", "author": "Yule Wang;Chengrui Li;Weihan Li;Anqi Wu", "authorids": "~Yule_Wang1;~Chengrui_Li1;~Weihan_Li1;~Anqi_Wu3", "gender": "M;M;M;F", "homepage": "https://yulewang97.github.io/;https://jerrysoybean.github.io/;https://weihanlikk.github.io/;https://sites.google.com/view/brainml/home", "dblp": ";174/4237;24/8923;15/9453", "google_scholar": "vqsl1YYAAAAJ;https://scholar.google.com/citations?h;qW4_NR4AAAAJ;ptGYJiEAAAAJ", "orcid": ";0000-0001-5947-2393;;0000-0002-7866-9455", "linkedin": "yule-wang-a8002b195/;;;", "or_profile": "~Yule_Wang1;~Chengrui_Li1;~Weihan_Li1;~Anqi_Wu3", "aff": "Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology", "aff_domain": "gatech.edu;gatech.edu;gatech.edu;gatech.edu", "position": "PhD student;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nwang2024exploring,\ntitle={Exploring Behavior-Relevant and Disentangled Neural Dynamics with Generative Diffusion Models},\nauthor={Yule Wang and Chengrui Li and Weihan Li and Anqi Wu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=jL0EsbfbAV}\n}", "github": "", "reviewers": "o3cJ;CLKJ;HCYS", "pdf_size": 4555382, "rating": "5;6;7", "confidence": "3;3;4", "soundness": "2;3;3", "novelty": "3;3;3", "presentation": "3;2;4", "wc_summary": "72;68;66", "wc_strengths": "81;30;90", "wc_weaknesses": "238;67;122", "wc_questions": "1;111;4", "wc_limitations": "13;23;3", "wc_review": "405;299;285", "wc_reply_reviewers": "178;90;30", "wc_reply_authors": "96;370;99", "reply_reviewers": "1;1;1", "reply_authors": "3;3;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 68.66666666666667, 2.494438257849294 ], "wc_strengths_avg": [ 67.0, 26.419689627245813 ], "wc_weaknesses_avg": [ 142.33333333333334, 71.27567763425489 ], "wc_questions_avg": [ 38.666666666666664, 51.162051909159736 ], "wc_limitations_avg": [ 13.0, 8.16496580927726 ], "wc_review_avg": [ 329.6666666666667, 53.57445494097184 ], "wc_reply_reviewers_avg": [ 99.33333333333333, 60.7801137068879 ], "wc_reply_authors_avg": [ 188.33333333333334, 128.46357027923864 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4227847658487853018&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 4, "email": "gatech.edu;gatech.edu;gatech.edu;gatech.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Georgia Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.gatech.edu", "aff_unique_abbr": "Georgia Tech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Certified Adversarial Robustness via Randomized $\\alpha$-Smoothing for Regression Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93964", "id": "jLUbLxa4XV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=jLUbLxa4XV", "openreview": "https://openreview.net/forum?id=jLUbLxa4XV", "poster": "", "project": "", "author_site": "Aref Miri Rekavandi, Farhad Farokhi, Olga Ohrimenko, Benjamin Rubinstein", "tldr": "", "abstract": "Certified adversarial robustness of large-scale deep networks has progressed substantially after the introduction of randomized smoothing. Deep net classifiers are now provably robust in their predictions against a large class of threat models, including $\\ell_1$, $\\ell_2$, and $\\ell_\\infty$ norm-bounded attacks. Certified robustness analysis by randomized smoothing has not been performed for deep regression networks where the output variable is continuous and unbounded. In this paper, we extend the existing results for randomized smoothing into regression models using powerful tools from robust statistics, in particular, $\\alpha$-trimming filter as the smoothing function. Adjusting the hyperparameter $\\alpha$ achieves a smooth trade-off between desired certified robustness and utility. For the first time, we propose a benchmark for certified robust regression in visual positioning systems using the Cambridge Landmarks dataset where robustness analysis is essential for autonomous navigation of AI agents and self-driving cars. Code is publicly available at \\url{https://github.com/arekavandi/Certified_adv_RRegression/}.", "keywords": "Certified robustness;alpha-trimming;Probabilistic certificates;Robust regression;Randomized smoothing", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Aref Miri Rekavandi;Farhad Farokhi;Olga Ohrimenko;Benjamin I. P. Rubinstein", "authorids": "~Aref_Miri_Rekavandi1;~Farhad_Farokhi1;~Olga_Ohrimenko1;~Benjamin_I._P._Rubinstein1", "gender": ";M;;M", "homepage": ";https://farokhi.xyz/;;http://www.bipr.net/", "dblp": ";;;90/1092", "google_scholar": ";npKNOcwAAAAJ;;https://scholar.google.com.au/citations?user=hMG_gR4AAAAJ", "orcid": ";0000-0002-5102-7073;;0000-0002-2947-6980", "linkedin": ";;;benjaminrubinstein/", "or_profile": "~Aref_Miri_Rekavandi1;~Farhad_Farokhi1;~Olga_Ohrimenko1;~Benjamin_I._P._Rubinstein1", "aff": ";The University of Melbourne;;The University of Melbourne", "aff_domain": ";unimelb.edu.au;;unimelb.edu.au", "position": ";Lecturer;;Associate Professor", "bibtex": "@inproceedings{\nrekavandi2024certified,\ntitle={Certified Adversarial Robustness via Randomized \\${\\textbackslash}alpha\\$-Smoothing for Regression Models},\nauthor={Aref Miri Rekavandi and Farhad Farokhi and Olga Ohrimenko and Benjamin I. P. Rubinstein},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=jLUbLxa4XV}\n}", "github": "", "reviewers": "ZCs7;pBXb;wW8d", "pdf_size": 2373516, "rating": "5;5;5", "confidence": "4;3;3", "soundness": "2;3;3", "novelty": "2;3;2", "presentation": "2;1;2", "wc_summary": "39;60;56", "wc_strengths": "30;32;26", "wc_weaknesses": "91;79;135", "wc_questions": "472;90;46", "wc_limitations": "1;29;3", "wc_review": "633;290;266", "wc_reply_reviewers": "613;244;28", "wc_reply_authors": "1563;715;125", "reply_reviewers": "4;2;1", "reply_authors": "3;2;2", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 1.6666666666666667, 0.4714045207910317 ], "wc_summary_avg": [ 51.666666666666664, 9.104333522498441 ], "wc_strengths_avg": [ 29.333333333333332, 2.494438257849294 ], "wc_weaknesses_avg": [ 101.66666666666667, 24.073960113690383 ], "wc_questions_avg": [ 202.66666666666666, 191.29267860764796 ], "wc_limitations_avg": [ 11.0, 12.754084313139327 ], "wc_review_avg": [ 396.3333333333333, 167.63518591141764 ], "wc_reply_reviewers_avg": [ 295.0, 241.53260649444414 ], "wc_reply_authors_avg": [ 801.0, 590.2022252301889 ], "reply_reviewers_avg": [ 2.3333333333333335, 1.247219128924647 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:hMwhciUyf34J:scholar.google.com/&scioq=Certified+Adversarial+Robustness+via+Randomized+%24%5Calpha%24-Smoothing+for+Regression+Models&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": ";unimelb.edu.au;;unimelb.edu.au", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "University of Melbourne", "aff_unique_dep": "", "aff_unique_url": "https://www.unimelb.edu.au", "aff_unique_abbr": "UniMelb", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Australia" }, { "title": "Kermut: Composite kernel regression for protein variant effects", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93963", "id": "jM9atrvUii", "proceeding": "", "pdf": "https://openreview.net/pdf?id=jM9atrvUii", "openreview": "https://openreview.net/forum?id=jM9atrvUii", "poster": "/media/PosterPDFs/NeurIPS%202024/93963.png?t=1733930689.8580725", "project": "", "author_site": "Peter M\u00f8rch Groth, Mads Kerrn, Lars Olsen, Jesper Salomon, Wouter Boomsma", "tldr": "", "abstract": "Reliable prediction of protein variant effects is crucial for both protein optimization and for advancing biological understanding. For practical use in protein engineering, it is important that we can also provide reliable uncertainty estimates for our predictions, and while prediction accuracy has seen much progress in recent years, uncertainty metrics are rarely reported. We here provide a Gaussian process regression model, Kermut, with a novel composite kernel for modeling mutation similarity, which obtains state-of-the-art performance for supervised protein variant effect prediction while also offering estimates of uncertainty through its posterior. An analysis of the quality of the uncertainty estimates demonstrates that our model provides meaningful levels of overall calibration, but that instance-specific uncertainty calibration remains more challenging.", "keywords": "Gaussian process;uncertainty quantification;proteins;protein;enzyme;variant effects;kernel;protein structure;uncertainty calibration;few-shot learning;transfer learning", "primary_area": "probabilistic_methods", "supplementary_material": "/attachment/45d6ccf4aaf062cabd2df63c034ce18f65ac64bd.zip", "author": "Peter M\u00f8rch Groth;Mads Herbert Kerrn;Lars Olsen;Jesper Salomon;Wouter Boomsma", "authorids": "~Peter_M\u00f8rch_Groth1;~Mads_Herbert_Kerrn1;~Lars_Olsen1;~Jesper_Salomon1;~Wouter_Boomsma1", "gender": ";M;M;M;M", "homepage": ";;;;", "dblp": ";;;;06/5945", "google_scholar": ";R2MFVjYAAAAJ;o0XjBEYAAAAJ;;EwqU_jsAAAAJ", "orcid": ";;0000-0002-7607-7130;0000-0003-2445-6582;0000-0002-8257-3827", "linkedin": ";mads-kerrn-627093103/;larsolsensprofile/;;", "or_profile": "~Peter_M\u00f8rch_Groth1;~Mads_Herbert_Kerrn1;~Lars_Olsen1;~Jesper_Salomon1;~Wouter_Boomsma1", "aff": ";Copenhagen University;;Novozymes;University of Copenhagen", "aff_domain": ";ku.dk;;novozymes.com;ku.dk", "position": ";PhD student;;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\ngroth2024kermut,\ntitle={Kermut: Composite kernel regression for protein variant effects},\nauthor={Peter M{\\o}rch Groth and Mads Herbert Kerrn and Lars Olsen and Jesper Salomon and Wouter Boomsma},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=jM9atrvUii}\n}", "github": "", "reviewers": "Fowf;RDFU;1cc6", "pdf_size": 4267378, "rating": "7;7;7", "confidence": "4;5;3", "soundness": "3;4;3", "novelty": "3;2;3", "presentation": "2;2;3", "wc_summary": "68;59;57", "wc_strengths": "122;41;138", "wc_weaknesses": "439;149;32", "wc_questions": "88;74;68", "wc_limitations": "96;15;3", "wc_review": "813;338;298", "wc_reply_reviewers": "33;170;0", "wc_reply_authors": "1493;38;0", "reply_reviewers": "1;1;0", "reply_authors": "5;2;1", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 61.333333333333336, 4.784233364802441 ], "wc_strengths_avg": [ 100.33333333333333, 42.460439103816256 ], "wc_weaknesses_avg": [ 206.66666666666666, 171.08737209066277 ], "wc_questions_avg": [ 76.66666666666667, 8.379870059984357 ], "wc_limitations_avg": [ 38.0, 41.30375285612676 ], "wc_review_avg": [ 483.0, 233.91593931723992 ], "wc_reply_reviewers_avg": [ 67.66666666666667, 73.60404578252172 ], "wc_reply_authors_avg": [ 510.3333333333333, 695.0234208683586 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 1.699673171197595 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9419666853523074225&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": ";ku.dk;;novozymes.com;ku.dk", "author_num": 5, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Copenhagen;Novozymes", "aff_unique_dep": ";", "aff_unique_url": "https://www.ku.dk;https://www.novozymes.com", "aff_unique_abbr": "UCPH;Novozymes", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Denmark" }, { "title": "Towards Neuron Attributions in Multi-Modal Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93962", "id": "jMJVFP4BH6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=jMJVFP4BH6", "openreview": "https://openreview.net/forum?id=jMJVFP4BH6", "poster": "", "project": "", "author_site": "Junfeng Fang, Zac Bi, Ruipeng Wang, Houcheng Jiang, Yuan Gao, Kun Wang, An Zhang, Jie Shi, Xiang Wang, Tat-Seng Chua", "tldr": "", "abstract": "As Large Language Models (LLMs) demonstrate impressive capabilities, demystifying their internal mechanisms becomes increasingly vital. Neuron attribution, which attributes LLM outputs to specific neurons to reveal the semantic properties they learn, has emerged as a key interpretability approach. However, while neuron attribution has made significant progress in deciphering text-only LLMs, its application to Multimodal LLMs (MLLMs) remains less explored. To address this gap, we propose a novel Neuron Attribution method tailored for MLLMs, termed NAM. Specifically, NAM not only reveals the modality-specific semantic knowledge learned by neurons within MLLMs, but also highlights several intriguing properties of neurons, such as cross-modal invariance and semantic sensitivity. These properties collectively elucidate the inner workings mechanism of MLLMs, providing a deeper understanding of how MLLMs process and generate multi-modal content. Through theoretical analysis and empirical validation, we demonstrate the efficacy of NAM and the valuable insights it offers. Furthermore, leveraging NAM, we introduce a multi-modal knowledge editing paradigm, underscoring the practical significance of our approach for downstream applications of MLLMs.", "keywords": "Neuron Attribution;Large Language Models;Multimodality", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Junfeng Fang;Zac Bi;Ruipeng Wang;Houcheng Jiang;Yuan Gao;Kun Wang;An Zhang;Jie Shi;Xiang Wang;Tat-Seng Chua", "authorids": "~Junfeng_Fang1;~Zac_Bi1;~Ruipeng_Wang1;~Houcheng_Jiang1;~Yuan_Gao18;~Kun_Wang15;~An_Zhang2;~Jie_Shi2;~Xiang_Wang6;~Tat-Seng_Chua2", "gender": "M;M;M;;M;;M;F;M;M", "homepage": "https://scholar.google.com/citations?user=beNNywsAAAAJ&hl=zh-CN;https://github.com/ZacBi;https://github.com/littlelittlenine;https://github.com/blacksingular;http://home.ustc.edu.cn/~wk520529/#home;;https://github.com/xiangwang1223;https://github.com/anzhang314;https://github.com/jianghoucheng;http://www.comp.nus.edu.sg/~chuats/", "dblp": "340/7929;;;76/2452-20;;;31/2864-10;78/5581-3;389/5053;", "google_scholar": "beNNywsAAAAJ;;;;UnyqjWQAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.sg/citations?user=HdhaQB0AAAAJ;https://scholar.google.com.sg/citations?user=BcX7GJcAAAAJ;ncYK9KkAAAAJ;https://scholar.google.com.tw/citations?user=Z9DWCBEAAAAJ", "orcid": ";;;;0000-0003-0602-169X;;0000-0002-6148-6329;;;0000-0001-6097-7807", "linkedin": ";;;;;;;;;", "or_profile": "~Junfeng_Fang1;~Zac_Bi1;~Ruipeng_Wang1;~Yuan_Gao18;~Kun_Wang15;~Jie_Shi2;~Xiang_Wang6;~AN_ZHANG1;~Jianghoucheng2;~Tat-seng_Chua1", "aff": ";Alibaba Group;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;Huawei International.;University of Science and Technology of China;National University of Singapore;University of Science and Technology of China;National University of Singapore", "aff_domain": ";alibaba-inc.com;mail.ustc.edu.cn;mail.ustc.edu.cn;ustc.edu.cn;huawei.com;ustc.edu.cn;nus.edu.sg;ustc.edu.cn;nus.edu.sg", "position": ";Researcher;Undergrad student;PhD student;PhD student;Principal Researcher;Full Professor;Postdoc;MS student;Full Professor", "bibtex": "@inproceedings{\nfang2024towards,\ntitle={Towards Neuron Attributions in Multi-Modal Large Language Models},\nauthor={Junfeng Fang and Zac Bi and Ruipeng Wang and Houcheng Jiang and Yuan Gao and Kun Wang and An Zhang and Jie Shi and Xiang Wang and Tat-Seng Chua},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=jMJVFP4BH6}\n}", "github": "", "reviewers": "VhwX;kLxz;VfTi;gq3m", "pdf_size": 5651582, "rating": "5;5;6;8", "confidence": "3;3;4;4", "soundness": "3;2;3;4", "novelty": "3;2;3;3", "presentation": "2;2;3;2", "wc_summary": "55;104;103;68", "wc_strengths": "67;80;44;66", "wc_weaknesses": "40;115;189;87", "wc_questions": "1;1;188;304", "wc_limitations": "1;1;4;12", "wc_review": "164;301;528;537", "wc_reply_reviewers": "12;19;101;63", "wc_reply_authors": "251;277;1195;131", "reply_reviewers": "1;1;1;1", "reply_authors": "3;3;4;2", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 82.5, 21.5 ], "wc_strengths_avg": [ 64.25, 12.93010054098575 ], "wc_weaknesses_avg": [ 107.75, 54.02487852832249 ], "wc_questions_avg": [ 123.5, 129.18300971877068 ], "wc_limitations_avg": [ 4.5, 4.5 ], "wc_review_avg": [ 382.5, 157.6586502542756 ], "wc_reply_reviewers_avg": [ 48.75, 35.947009611371016 ], "wc_reply_authors_avg": [ 463.5, 425.90697341086116 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.0, 0.7071067811865476 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.8164965809277259, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3467592600282305929&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": ";alibaba-inc.com;mail.ustc.edu.cn;mail.ustc.edu.cn;ustc.edu.cn;huawei.com;ustc.edu.cn;nus.edu.sg;ustc.edu.cn;nus.edu.sg", "author_num": 10, "aff_unique_index": "0;1;1;1;2;1;3;1;3", "aff_unique_norm": "Alibaba Group;University of Science and Technology of China;Huawei;National University of Singapore", "aff_unique_dep": ";;Huawei;", "aff_unique_url": "https://www.alibaba.com;http://www.ustc.edu.cn;https://www.huawei.com;https://www.nus.edu.sg", "aff_unique_abbr": "Alibaba;USTC;Huawei;NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;1;0;1", "aff_country_unique": "China;Singapore" }, { "title": "Kraken: Inherently Parallel Transformers For Efficient Multi-Device Inference", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93961", "id": "jRtxzzk0a6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=jRtxzzk0a6", "openreview": "https://openreview.net/forum?id=jRtxzzk0a6", "poster": "/media/PosterPDFs/NeurIPS%202024/93961.png?t=1733632985.369992", "project": "", "author_site": "Rohan Baskar Prabhakar, Hengrui Zhang, David Wentzlaff", "tldr": "", "abstract": "Large Transformer networks are increasingly used in settings where low inference latency is necessary to enable new applications and improve the end-user experience.\nHowever, autoregressive inference is resource intensive and requires parallelism for efficiency.\nParallelism introduces collective communication that is both expensive and represents a phase when hardware resources are underutilized.\nTowards mitigating this, Kraken is an evolution of the standard Transformer architecture that is designed to complement existing tensor parallelism schemes for efficient inference on multi-device systems.\nBy introducing a fixed degree of intra-layer model parallelism, the architecture allows collective operations to be overlapped with compute, decreasing latency and increasing hardware utilization.\nWhen trained on OpenWebText, Kraken models reach a similar perplexity as standard Transformers while also preserving their language modeling capabilities as evaluated on the SuperGLUE benchmark.\nImportantly, when tested on multi-GPU systems using TensorRT-LLM engines, Kraken speeds up Time To First Token by a mean of 35.6% across a range of model sizes, context lengths, and degrees of tensor parallelism.", "keywords": "Neural Networks", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Rohan Baskar Prabhakar;Hengrui Zhang;David Wentzlaff", "authorids": "~Rohan_Baskar_Prabhakar1;~Hengrui_Zhang4;~David_Wentzlaff1", "gender": "M;;", "homepage": ";https://nicsefc.ee.tsinghua.edu.cn/people/HengruiZhang;", "dblp": "294/3826;;", "google_scholar": "GUo1-cEAAAAJ;HPzhDi0AAAAJ;", "orcid": ";;", "linkedin": "rohan-baskar/;;", "or_profile": "~Rohan_Baskar_Prabhakar1;~Hengrui_Zhang4;~David_Wentzlaff1", "aff": "Microsoft;Princeton University;", "aff_domain": "microsoft.com;princeton.edu;", "position": "Intern;PhD student;", "bibtex": "@inproceedings{\nprabhakar2024kraken,\ntitle={Kraken: Inherently Parallel Transformers For Efficient Multi-Device Inference},\nauthor={Rohan Baskar Prabhakar and Hengrui Zhang and David Wentzlaff},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=jRtxzzk0a6}\n}", "github": "", "reviewers": "ptrK;6qbN;vwQM", "pdf_size": 463596, "rating": "5;6;6", "confidence": "4;4;4", "soundness": "3;3;2", "novelty": "3;3;3", "presentation": "3;4;3", "wc_summary": "41;99;67", "wc_strengths": "57;18;95", "wc_weaknesses": "144;76;66", "wc_questions": "152;3;38", "wc_limitations": "7;1;37", "wc_review": "401;197;303", "wc_reply_reviewers": "71;9;14", "wc_reply_authors": "117;0;0", "reply_reviewers": "1;1;1", "reply_authors": "2;1;1", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 69.0, 23.72059583287626 ], "wc_strengths_avg": [ 56.666666666666664, 31.436002007606216 ], "wc_weaknesses_avg": [ 95.33333333333333, 34.65384378231207 ], "wc_questions_avg": [ 64.33333333333333, 63.61516241344424 ], "wc_limitations_avg": [ 15.0, 15.748015748023622 ], "wc_review_avg": [ 300.3333333333333, 83.30399483551527 ], "wc_reply_reviewers_avg": [ 31.333333333333332, 28.122746823325937 ], "wc_reply_authors_avg": [ 39.0, 55.154328932550705 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4269128365604843094&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "microsoft.com;princeton.edu;", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Microsoft;Princeton University", "aff_unique_dep": "Microsoft Corporation;", "aff_unique_url": "https://www.microsoft.com;https://www.princeton.edu", "aff_unique_abbr": "Microsoft;Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Nonstationary Sparse Spectral Permanental Process", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93960", "id": "jS34QpqdWs", "proceeding": "", "pdf": "https://openreview.net/pdf?id=jS34QpqdWs", "openreview": "https://openreview.net/forum?id=jS34QpqdWs", "poster": "/media/PosterPDFs/NeurIPS%202024/93960.png?t=1731550875.4295924", "project": "", "author_site": "Zicheng Sun, Yixuan Zhang, Zenan Ling, Xuhui Fan, Feng Zhou", "tldr": "", "abstract": "Existing permanental processes often impose constraints on kernel types or stationarity, limiting the model's expressiveness. To overcome these limitations, we propose a novel approach utilizing the sparse spectral representation of nonstationary kernels. \nThis technique relaxes the constraints on kernel types and stationarity, allowing for more flexible modeling while reducing computational complexity to the linear level. \nAdditionally, we introduce a deep kernel variant by hierarchically stacking multiple spectral feature mappings, further enhancing the model's expressiveness to capture complex patterns in data. Experimental results on both synthetic and real-world datasets demonstrate the effectiveness of our approach, particularly in scenarios with pronounced data nonstationarity. Additionally, ablation studies are conducted to provide insights into the impact of various hyperparameters on model performance.", "keywords": "permanental process;nonstationary kernel;Bayesian inference", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Zicheng Sun;Yixuan Zhang;Zenan Ling;Xuhui Fan;Feng Zhou", "authorids": "~Zicheng_Sun2;~Yixuan_Zhang12;~Zenan_Ling1;~Xuhui_Fan1;~Feng_Zhou9", "gender": "M;;M;M;", "homepage": ";;https://scholar.google.com/citations?user=BabePTkAAAAJ&hl=zh-CN;https://xuhuifan.github.io/;", "dblp": ";;183/7798;117/4874;", "google_scholar": ";;BabePTkAAAAJ;https://scholar.google.com.au/citations?user=NSc42eUAAAAJ;", "orcid": "0009-0002-4088-8289;;;0000-0002-7558-7200;", "linkedin": ";;;;", "or_profile": "~Zicheng_Sun2;~Yixuan_Zhang12;~Zenan_Ling1;~Xuhui_Fan1;~Feng_Zhou9", "aff": "Renmin University of China;;Huazhong University of Science and Technology;Macquarie University;", "aff_domain": "ruc.edu.cn;;hust.edu.cn;mq.edu.au;", "position": "Undergrad student;;Researcher;Lecturer;", "bibtex": "@inproceedings{\nsun2024nonstationary,\ntitle={Nonstationary Sparse Spectral Permanental Process},\nauthor={Zicheng Sun and Yixuan Zhang and Zenan Ling and Xuhui Fan and Feng Zhou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=jS34QpqdWs}\n}", "github": "", "reviewers": "vqtT;WJmT;QimU", "pdf_size": 10375388, "rating": "6;6;7", "confidence": "3;3;3", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "4;3;3", "wc_summary": "225;107;193", "wc_strengths": "65;97;103", "wc_weaknesses": "213;54;218", "wc_questions": "39;13;103", "wc_limitations": "8;1;46", "wc_review": "550;272;663", "wc_reply_reviewers": "41;0;71", "wc_reply_authors": "31;0;85", "reply_reviewers": "1;0;1", "reply_authors": "2;1;2", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 175.0, 49.82636517614612 ], "wc_strengths_avg": [ 88.33333333333333, 16.679994670929073 ], "wc_weaknesses_avg": [ 161.66666666666666, 76.15919000502974 ], "wc_questions_avg": [ 51.666666666666664, 37.81827894315422 ], "wc_limitations_avg": [ 18.333333333333332, 19.770910168449223 ], "wc_review_avg": [ 495.0, 164.29445111344043 ], "wc_reply_reviewers_avg": [ 37.333333333333336, 29.101355447622865 ], "wc_reply_authors_avg": [ 38.666666666666664, 35.122009560324926 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8620644843071696644&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "ruc.edu.cn;;hust.edu.cn;mq.edu.au;", "author_num": 5, "aff_unique_index": "0;1;2", "aff_unique_norm": "Renmin University of China;Huazhong University of Science and Technology;Macquarie University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.ruc.edu.cn;http://www.hust.edu.cn;https://www.mq.edu.au", "aff_unique_abbr": "RUC;HUST;MQ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "China;Australia" }, { "title": "VideoGUI: A Benchmark for GUI Automation from Instructional Videos", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97544", "id": "jSKtxmxc0M", "proceeding": "", "pdf": "https://openreview.net/pdf?id=jSKtxmxc0M", "openreview": "https://openreview.net/forum?id=jSKtxmxc0M", "poster": "/media/PosterPDFs/NeurIPS%202024/97544.png?t=1733578117.8970912", "project": "", "author_site": "Kevin Qinghong Lin, Linjie Li, Difei Gao, Qinchen WU, Mingyi Yan, Zhengyuan Yang, Lijuan Wang, Mike Zheng Shou", "tldr": "", "abstract": "Graphical User Interface (GUI) automation holds significant promise for enhancing human productivity by assisting with computer tasks. Existing task formulations primarily focus on simple tasks that can be specified by a single, language-only instruction, such as \u201cInsert a new slide.\u201d In this work, we introduce VideoGUI, a novel multi-modal benchmark designed to evaluate GUI assistants on visual-centric GUI tasks. Sourced from high-quality web instructional videos, our benchmark focuses on tasks involving professional and novel software (e.g., Adobe Pho- toshop or Stable Diffusion WebUI) and complex activities (e.g., video editing). VideoGUI evaluates GUI assistants through a hierarchical process, allowing for identification of the specific levels at which they may fail: (i) high-level planning: reconstruct procedural subtasks from visual conditions without language descrip- tions; (ii) middle-level planning: generate sequences of precise action narrations based on visual state (i.e., screenshot) and goals; (iii) atomic action execution: perform specific actions such as accurately clicking designated elements. For each level, we design evaluation metrics across individual dimensions to provide clear signals, such as individual performance in clicking, dragging, typing, and scrolling for atomic action execution. Our evaluation on VideoGUI reveals that even the SoTA large multimodal model GPT4o performs poorly on visual-centric GUI tasks, especially for high-level planning. The data and code are available at https://github.com/showlab/videogui.", "keywords": "Graphical User Interface; Language Agent; Vision-Language Models; Computer Software task", "primary_area": "", "supplementary_material": "/attachment/6402fd38335161d60c99a626054814db246a9f45.zip", "author": "Kevin Qinghong Lin;Linjie Li;Difei Gao;Qinchen WU;Mingyi Yan;Zhengyuan Yang;Lijuan Wang;Mike Zheng Shou", "authorids": "~Kevin_Qinghong_Lin1;~Linjie_Li1;~Difei_Gao1;~Qinchen_WU1;~Mingyi_Yan1;~Zhengyuan_Yang1;~Lijuan_Wang1;~Mike_Zheng_Shou1", "gender": ";F;;M;M;M;F;", "homepage": ";;;https://frank6200db.github.io/;;http://zhengyuan.info/;https://www.microsoft.com/en-us/research/people/lijuanw/;", "dblp": ";200/8256;;233/9118;;163/9713;51/2527.html;", "google_scholar": ";WR875gYAAAAJ;;;X_kJ5-4AAAAJ;https://scholar.google.com/citations?hl=zh-CN;cDcWXuIAAAAJ;", "orcid": ";;;0009-0005-1722-2579;;;;", "linkedin": ";;;;;;;", "or_profile": "~Kevin_Qinghong_Lin1;~Linjie_Li1;~Difei_Gao1;~Qinchen_WU1;~Mingyi_Yan1;~Zhengyuan_Yang1;~Lijuan_Wang1;~Mike_Zheng_Shou1", "aff": ";Microsoft;;National University of Singapore;National University of Singapore;Microsoft;Microsoft;", "aff_domain": ";microsoft.com;;nus.edu;u.nus.edu;microsoft.com;microsoft.com;", "position": ";Researcher;;MS student;MS student;Researcher;Principal Researcher;", "bibtex": "@inproceedings{\nlin2024videogui,\ntitle={Video{GUI}: A Benchmark for {GUI} Automation from Instructional Videos},\nauthor={Kevin Qinghong Lin and Linjie Li and Difei Gao and Qinchen WU and Mingyi Yan and Zhengyuan Yang and Lijuan Wang and Mike Zheng Shou},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=jSKtxmxc0M}\n}", "github": "", "reviewers": "ymcv;YcXa;Pf3N;pM4s", "pdf_size": 38951222, "rating": "6;6;6;8", "confidence": "3;5;3;4", "wc_summary_and_contributions": "21;92;58;101", "wc_strengths": "26;72;166;79", "wc_improvement": "135;35;217;58", "wc_limitations": "21;1;43;18", "wc_correctness": "7;1;29;19", "wc_clarity": "1;1;10;6", "wc_relation_to_prior_work": "1;1;15;16", "wc_documentation": "20;32;14;8", "wc_additional_feedback": "1;1;1;1", "wc_review": "233;236;553;306", "wc_reply_reviewers": "88;0;16;22", "wc_reply_authors": "126;47;75;27", "reply_reviewers": "1;0;1;1", "reply_authors": "2;2;3;2", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "wc_summary_and_contributions_avg": [ 68.0, 31.51983502494897 ], "wc_strengths_avg": [ 85.75, 50.60817621689207 ], "wc_improvement_avg": [ 111.25, 71.40859542099956 ], "wc_limitations_avg": [ 20.75, 14.939461168328663 ], "wc_correctness_avg": [ 14.0, 10.816653826391969 ], "wc_clarity_avg": [ 4.5, 3.774917217635375 ], "wc_relation_to_prior_work_avg": [ 8.25, 7.258615570478987 ], "wc_documentation_avg": [ 18.5, 8.874119674649425 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 332.0, 130.89499608464794 ], "wc_reply_reviewers_avg": [ 31.5, 33.596874854664684 ], "wc_reply_authors_avg": [ 68.75, 37.19122880465231 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16647995736732959718&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": ";microsoft.com;;nus.edu;u.nus.edu;microsoft.com;microsoft.com;", "author_num": 8, "aff_unique_index": "0;1;1;0;0", "aff_unique_norm": "Microsoft;National University of Singapore", "aff_unique_dep": "Microsoft Corporation;", "aff_unique_url": "https://www.microsoft.com;https://www.nus.edu.sg", "aff_unique_abbr": "Microsoft;NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0;0", "aff_country_unique": "United States;Singapore" }, { "title": "Stochastic Zeroth-Order Optimization under Strongly Convexity and Lipschitz Hessian: Minimax Sample Complexity", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93959", "id": "jTyjwRpLZ5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=jTyjwRpLZ5", "openreview": "https://openreview.net/forum?id=jTyjwRpLZ5", "poster": "/media/PosterPDFs/NeurIPS%202024/93959.png?t=1733799543.840622", "project": "", "author_site": "Qian Yu, Yining Wang, Baihe Huang, Qi Lei, Jason Lee", "tldr": "", "abstract": "Optimization of convex functions under stochastic zeroth-order feedback has been a major and challenging question in online learning. In this work, we consider the problem of optimizing second-order smooth and strongly convex functions where the algorithm is only accessible to noisy evaluations of the objective function it queries. \nWe provide the first tight characterization for the rate of the minimax simple regret by developing matching upper and lower bounds. \nWe propose an algorithm that features a combination of a bootstrapping stage and a mirror-descent stage. \nOur main technical innovation consists of a sharp characterization for the spherical-sampling gradient estimator under higher-order smoothness conditions, which allows the algorithm to optimally balance the bias-variance tradeoff, \nand a new iterative method for the bootstrapping stage, which maintains the performance for unbounded Hessian.", "keywords": "Stochastic Optimization;Sample Complexity;Optimality", "primary_area": "optimization", "supplementary_material": "", "author": "Qian Yu;Yining Wang;Baihe Huang;Qi Lei;Jason D. Lee", "authorids": "~Qian_Yu5;~Yining_Wang1;~Baihe_Huang1;~Qi_Lei1;~Jason_D._Lee1", "gender": ";M;;F;M", "homepage": "https://scholar.princeton.edu/qyu;https://yining-wang.com;;https://cecilialeiqi.github.io/;https://jasondlee88.github.io/", "dblp": "16/3790-1;04/7235;279/4131;;88/3262", "google_scholar": "SxUNhucAAAAJ;HpQGq54AAAAJ;chICXXMAAAAJ;kGOgaowAAAAJ;GR_DsT0AAAAJ", "orcid": "0000-0002-2034-5941;;;;", "linkedin": ";;;;", "or_profile": "~Qian_Yu5;~Yining_Wang1;~Baihe_Huang1;~Qi_Lei1;~Jason_D._Lee1", "aff": "University of California, Santa Barbara;University of Texas at Dallas;University of California, Berkeley;New York University;Princeton University", "aff_domain": "ucsb.edu;cs.utdallas.edu;berkeley.edu;nyu.edu;princeton.edu", "position": "Assistant Professor;Associate Professor;PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nyu2024stochastic,\ntitle={Stochastic Zeroth-Order Optimization under Strongly Convexity and Lipschitz Hessian: Minimax Sample Complexity},\nauthor={Qian Yu and Yining Wang and Baihe Huang and Qi Lei and Jason D. Lee},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=jTyjwRpLZ5}\n}", "github": "", "reviewers": "cqSV;QNuA;X3iq;vYTV", "pdf_size": 509882, "rating": "5;5;6;6", "confidence": "3;3;3;3", "soundness": "2;3;3;2", "novelty": "2;3;3;3", "presentation": "1;2;3;2", "wc_summary": "65;54;51;110", "wc_strengths": "40;34;111;79", "wc_weaknesses": "394;112;3;149", "wc_questions": "98;103;128;306", "wc_limitations": "1;42;1;6", "wc_review": "598;345;294;650", "wc_reply_reviewers": "104;15;0;127", "wc_reply_authors": "98;0;0;0", "reply_reviewers": "2;1;0;2", "reply_authors": "2;1;1;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 70.0, 23.67488120350343 ], "wc_strengths_avg": [ 66.0, 31.20096152364539 ], "wc_weaknesses_avg": [ 164.5, 142.95891018051304 ], "wc_questions_avg": [ 158.75, 85.77113442178552 ], "wc_limitations_avg": [ 12.5, 17.153716798408443 ], "wc_review_avg": [ 471.75, 154.4123942564197 ], "wc_reply_reviewers_avg": [ 61.5, 54.865745233250955 ], "wc_reply_authors_avg": [ 24.5, 42.4352447854375 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12026496598151491674&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "ucsb.edu;cs.utdallas.edu;berkeley.edu;nyu.edu;princeton.edu", "author_num": 5, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "University of California, Santa Barbara;University of Texas at Dallas;University of California, Berkeley;New York University;Princeton University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.ucsb.edu;https://www.utdallas.edu;https://www.berkeley.edu;https://www.nyu.edu;https://www.princeton.edu", "aff_unique_abbr": "UCSB;UT Dallas;UC Berkeley;NYU;Princeton", "aff_campus_unique_index": "0;1;2", "aff_campus_unique": "Santa Barbara;Dallas;Berkeley;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "An Expectation-Maximization Algorithm for Training Clean Diffusion Models from Corrupted Observations", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93958", "id": "jURBh4V9N4", "proceeding": "", "pdf": "https://openreview.net/pdf?id=jURBh4V9N4", "openreview": "https://openreview.net/forum?id=jURBh4V9N4", "poster": "/media/PosterPDFs/NeurIPS%202024/93958.png?t=1731744397.7670414", "project": "", "author_site": "Weimin Bai, Yifei Wang, Wenzheng Chen, He Sun", "tldr": "", "abstract": "Diffusion models excel in solving imaging inverse problems due to their ability to model complex image priors. However, their reliance on large, clean datasets for training limits their practical use where clean data is scarce. In this paper, we propose EMDiffusion, an expectation-maximization (EM) approach to train diffusion models from corrupted observations. Our method alternates between reconstructing clean images from corrupted data using a known diffusion model (E-step) and refining diffusion model weights based on these reconstructions (M-step). This iterative process leads the learned diffusion model to gradually converge to a local optimum, that is, to approximate the true clean data distribution. We validate our method through extensive experiments on diverse computational imaging tasks, including random inpainting, denoising, and deblurring, achieving new state-of-the-art performance.", "keywords": "Diffusion model;Inverse Problem;Computational Imaging", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "WeiminBai;Yifei Wang;Wenzheng Chen;He Sun", "authorids": "~WeiminBai1;~Yifei_Wang9;~Wenzheng_Chen1;~He_Sun3", "gender": ";M;M;M", "homepage": ";https://a-little-hoof.github.io/;https://wenzhengchen.github.io/;http://users.cms.caltech.edu/~hesun/", "dblp": ";;165/6329;93/2604-10", "google_scholar": ";;KzhR_TsAAAAJ;c9V5HkYAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~WeiminBai1;~Yifei_Wang9;~Wenzheng_Chen1;~He_Sun3", "aff": ";Peking University;Peking University;Peking University", "aff_domain": ";pku.edu.cn;pku.edu.cn;pku.edu.cn", "position": ";Undergrad student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nweiminbai2024an,\ntitle={An Expectation-Maximization Algorithm for Training Clean Diffusion Models from Corrupted Observations},\nauthor={WeiminBai and Yifei Wang and Wenzheng Chen and He Sun},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=jURBh4V9N4}\n}", "github": "", "reviewers": "HYFc;qpvz;bEKL;Kirh", "pdf_size": 8052676, "rating": "2;6;7;7", "confidence": "3;5;3;3", "soundness": "2;4;3;4", "novelty": "2;3;3;3", "presentation": "3;2;3;3", "wc_summary": "42;60;62;25", "wc_strengths": "115;166;98;23", "wc_weaknesses": "60;344;143;19", "wc_questions": "36;19;77;42", "wc_limitations": "44;10;64;13", "wc_review": "297;599;444;122", "wc_reply_reviewers": "149;308;6;8", "wc_reply_authors": "213;63;13;17", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.5, 2.0615528128088303 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 47.25, 15.022899187573616 ], "wc_strengths_avg": [ 100.5, 51.266460771151344 ], "wc_weaknesses_avg": [ 141.5, 125.15690152764249 ], "wc_questions_avg": [ 43.5, 21.10094784600919 ], "wc_limitations_avg": [ 32.75, 22.42069356643545 ], "wc_review_avg": [ 365.5, 176.5424878039278 ], "wc_reply_reviewers_avg": [ 117.75, 124.20220408672303 ], "wc_reply_authors_avg": [ 76.5, 81.22037921605636 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.14002800840280097, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7883583070979278810&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": ";pku.edu.cn;pku.edu.cn;pku.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "The Implicit Bias of Gradient Descent toward Collaboration between Layers: A Dynamic Analysis of Multilayer Perceptions", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93957", "id": "jV6z08u7y0", "proceeding": "", "pdf": "https://openreview.net/pdf?id=jV6z08u7y0", "openreview": "https://openreview.net/forum?id=jV6z08u7y0", "poster": "/media/PosterPDFs/NeurIPS%202024/93957.png?t=1731758607.4856567", "project": "", "author_site": "Zheng Wang, Geyong Min, Wenjie Ruan", "tldr": "", "abstract": "The implicit bias of gradient descent has long been considered the primary mechanism explaining the superior generalization of over-parameterized neural networks without overfitting, even when the training error is zero. However, the implicit bias toward adversarial robustness has rarely been considered in the research community, although it is crucial for the trustworthiness of machine learning models. To fill this gap, in this paper, we explore whether consecutive layers collaborate to strengthen adversarial robustness during gradient descent. By quantifying this collaboration between layers using our proposed concept, co-correlation, we demonstrate a monotonically increasing trend in co-correlation, which implies a decreasing trend in adversarial robustness during gradient descent. Additionally, we observe different behaviours between narrow and wide neural networks during gradient descent. We conducted extensive experiments that verified our proposed theorems.", "keywords": "Implicit Bias;Adversarial Robustness;Dynamic Analysis", "primary_area": "learning_theory", "supplementary_material": "", "author": "Zheng Wang;Geyong Min;Wenjie Ruan", "authorids": "~Zheng_Wang36;~Geyong_Min1;~Wenjie_Ruan2", "gender": "M;M;", "homepage": ";;", "dblp": "181/2834-74;96/3727;", "google_scholar": "p0b4pLoAAAAJ;;", "orcid": "0000-0001-7146-7503;;", "linkedin": ";;", "or_profile": "~Zheng_Wang36;~Geyong_Min1;~Wenjie_Ruan2", "aff": "University of Exeter;University of Exeter;", "aff_domain": "exeter.ac.uk;exeter.ac.uk;", "position": "PhD student;Full Professor;", "bibtex": "@inproceedings{\nwang2024the,\ntitle={The Implicit Bias of Gradient Descent toward Collaboration between Layers: A Dynamic Analysis of Multilayer Perceptions},\nauthor={Zheng Wang and Geyong Min and Wenjie Ruan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=jV6z08u7y0}\n}", "github": "", "reviewers": "DyJc;yLC9;KqkQ;G4as", "pdf_size": 1033923, "rating": "5;5;5;6", "confidence": "3;3;3;4", "soundness": "3;3;3;3", "novelty": "3;2;2;4", "presentation": "2;2;2;3", "wc_summary": "54;80;93;203", "wc_strengths": "81;52;20;81", "wc_weaknesses": "358;114;177;142", "wc_questions": "59;44;5;182", "wc_limitations": "34;26;9;1", "wc_review": "586;316;304;609", "wc_reply_reviewers": "152;438;63;58", "wc_reply_authors": "649;584;55;57", "reply_reviewers": "1;2;1;1", "reply_authors": "3;3;3;3", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 107.5, 56.89683646741706 ], "wc_strengths_avg": [ 58.5, 25.184320518926057 ], "wc_weaknesses_avg": [ 197.75, 95.17451076837747 ], "wc_questions_avg": [ 72.5, 66.22122016393234 ], "wc_limitations_avg": [ 17.5, 13.124404748406688 ], "wc_review_avg": [ 453.75, 144.04231149214456 ], "wc_reply_reviewers_avg": [ 177.75, 154.83923113991492 ], "wc_reply_authors_avg": [ 336.25, 281.1915494818434 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.0, 0.0 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:PaWgQmwWQqoJ:scholar.google.com/&scioq=The+Implicit+Bias+of+Gradient+Descent+toward+Collaboration+between+Layers:+A+Dynamic+Analysis+of+Multilayer+Perceptions&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "exeter.ac.uk;exeter.ac.uk;", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "University of Exeter", "aff_unique_dep": "", "aff_unique_url": "https://www.exeter.ac.uk", "aff_unique_abbr": "Exeter", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "title": "DARNet: Dual Attention Refinement Network with Spatiotemporal Construction for Auditory Attention Detection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93956", "id": "jWGGEDYORs", "proceeding": "", "pdf": "https://openreview.net/pdf?id=jWGGEDYORs", "openreview": "https://openreview.net/forum?id=jWGGEDYORs", "poster": "/media/PosterPDFs/NeurIPS%202024/93956.png?t=1731340980.9846892", "project": "", "author_site": "Sheng Yan, Cunhang Fan, Hongyu Zhang, Xiaoke Yang, Jianhua Tao, Zhao Lv", "tldr": "", "abstract": "At a cocktail party, humans exhibit an impressive ability to direct their attention. The auditory attention detection (AAD) approach seeks to identify the attended speaker by analyzing brain signals, such as EEG signals. \nHowever, current AAD algorithms overlook the spatial distribution information within EEG signals and lack the ability to capture long-range latent dependencies, limiting the model's ability to decode brain activity.\nTo address these issues, this paper proposes a dual attention refinement network with spatiotemporal construction for AAD, named DARNet, which consists of the spatiotemporal construction module, dual attention refinement module, and feature fusion \\& classifier module. Specifically, the spatiotemporal construction module aims to construct more expressive spatiotemporal feature representations, by capturing the spatial distribution characteristics of EEG signals. The dual attention refinement module aims to extract different levels of temporal patterns in EEG signals and enhance the model's ability to capture long-range latent dependencies. The feature fusion \\& classifier module aims to aggregate temporal patterns and dependencies from different levels and obtain the final classification results.\nThe experimental results indicate that DARNet achieved excellent classification performance, particularly under short decision windows. While maintaining excellent classification performance, DARNet significantly reduces the number of required parameters. Compared to the state-of-the-art models, DARNet reduces the parameter count by 91\\%. Code is available at: https://github.com/fchest/DARNet.git.", "keywords": "auditory attention decoding (AAD);electroencephalography (EEG);brain-computer interface (BCI)", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "/attachment/e597a42260a0dc2529ed64cb2355c85598f65108.zip", "author": "Sheng Yan;Cunhang Fan;Hongyu Zhang;Xiaoke Yang;Jianhua Tao;Zhao Lv", "authorids": "~Sheng_Yan2;~Cunhang_Fan1;~Hongyu_Zhang2;~Xiaoke_Yang3;~Jianhua_Tao1;~Zhao_Lv1", "gender": "M;M;F;;;M", "homepage": "https://www.researchgate.net/profile/Yan-Sheng-9;;https://github.com/amber0220;https://github.com/Ybekk;;https://ieeexplore.ieee.org/author/37532072800", "dblp": ";240/7820.html;;;;", "google_scholar": ";QbnlF74AAAAJ;;;;", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Sheng_Yan2;~Cunhang_Fan1;~Hongyu_Zhang2;~Xiaoke_Yang3;~Jianhua_Tao1;~Zhao_Lv1", "aff": "Anhui University;School of Computer Science and Technology, Anhui University, Hefei 230601, China ;Anhui University;Anhui University;;School of Computer Science and Technology, Anhui University, Hefei 230601, China", "aff_domain": "ahu.edu.cn;ahu.edu.cn;ahu.edu.cn;ahu.edu.cn;;ahu.edu.cn", "position": "Undergrad student;Associate Professor;MS student;MS student;;Full Professor", "bibtex": "@inproceedings{\nyan2024darnet,\ntitle={{DARN}et: Dual Attention Refinement Network with Spatiotemporal Construction for Auditory Attention Detection},\nauthor={Sheng Yan and Cunhang Fan and Hongyu Zhang and Xiaoke Yang and Jianhua Tao and Zhao Lv},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=jWGGEDYORs}\n}", "github": "", "reviewers": "eWL2;vVL1;FMXQ;XzJY", "pdf_size": 489221, "rating": "4;6;6;8", "confidence": "3;3;4;5", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "3;3;3;3", "wc_summary": "55;42;73;89", "wc_strengths": "18;84;50;62", "wc_weaknesses": "45;76;131;69", "wc_questions": "63;1;45;37", "wc_limitations": "1;6;16;20", "wc_review": "182;209;315;277", "wc_reply_reviewers": "0;15;25;83", "wc_reply_authors": "0;16;16;54", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 64.75, 17.80975856096876 ], "wc_strengths_avg": [ 53.5, 23.84848003542364 ], "wc_weaknesses_avg": [ 80.25, 31.47518864121389 ], "wc_questions_avg": [ 36.5, 22.555487137279922 ], "wc_limitations_avg": [ 10.75, 7.595228765481656 ], "wc_review_avg": [ 245.75, 52.88371677558226 ], "wc_reply_reviewers_avg": [ 30.75, 31.45135132232 ], "wc_reply_authors_avg": [ 21.5, 19.868316486305527 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.8528028654224418, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1166499381673944523&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "ahu.edu.cn;ahu.edu.cn;ahu.edu.cn;ahu.edu.cn;;ahu.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Anhui University", "aff_unique_dep": "", "aff_unique_url": "http://www.ahu.edu.cn/", "aff_unique_abbr": "AHU", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Hefei", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Identifying General Mechanism Shifts in Linear Causal Representations", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93955", "id": "jWaXhCYTV1", "proceeding": "", "pdf": "https://openreview.net/pdf?id=jWaXhCYTV1", "openreview": "https://openreview.net/forum?id=jWaXhCYTV1", "poster": "/media/PosterPDFs/NeurIPS%202024/93955.png?t=1731748886.204772", "project": "", "author_site": "Tianyu Chen, Kevin Bello, Francesco Locatello, Bryon Aragam, Pradeep Ravikumar", "tldr": "", "abstract": "We consider the linear causal representation learning setting where we observe a linear mixing of $d$ unknown latent factors, which follow a linear structural causal model. \nRecent work has shown that it is possible to recover the latent factors as well as the underlying structural causal model over them, up to permutation and scaling, provided that we have at least $d$ environments, each of which corresponds to perfect interventions on a single latent node (factor). \nAfter this powerful result, a key open problem faced by the community has been to relax these conditions: allow for coarser than perfect single-node interventions, and allow for fewer than $d$ of them, since the number of latent factors $d$ could be very large. \nIn this work, we consider precisely such a setting, where we allow a smaller than $d$ number of environments, and also allow for very coarse interventions that can very coarsely \\textit{change the entire causal graph over the latent factors}. \nOn the flip side, we relax what we wish to extract to simply the \\textit{list of nodes that have shifted between one or more environments}. \nWe provide a surprising identifiability result that it is indeed possible, under some very mild standard assumptions, to identify the set of shifted nodes. \nOur identifiability proof moreover is a constructive one: we explicitly provide necessary and sufficient conditions for a node to be a shifted node, and show that we can check these conditions given observed data. \nOur algorithm lends itself very naturally to the sample setting where instead of just interventional distributions, we are provided datasets of samples from each of these distributions. \nWe corroborate our results on both synthetic experiments as well as an interesting psychometric dataset. The code can be found at https://github.com/TianyuCodings/iLCS.", "keywords": "latent variable modeling;distribution shifts;causal representation learning;heterogeneous data;root cause analysis", "primary_area": "causal_inference", "supplementary_material": "/attachment/e43aa5e3d64b844cb799e2f7555a0672edc5722e.zip", "author": "Tianyu Chen;Kevin Bello;Francesco Locatello;Bryon Aragam;Pradeep Kumar Ravikumar", "authorids": "~Tianyu_Chen3;~Kevin_Bello1;~Francesco_Locatello1;~Bryon_Aragam1;~Pradeep_Kumar_Ravikumar1", "gender": "M;M;M;;M", "homepage": ";https://www.cs.cmu.edu/~kbello;https://twitter.com/FrancescoLocat8;http://bryonaragam.com/;http://www.cs.cmu.edu/~pradeepr/", "dblp": ";202/2531;195/6074;140/7564;94/3594", "google_scholar": ";pCS09UsAAAAJ;;u-W3_9QAAAAJ;https://scholar.google.com.tw/citations?user=Q4DTPw4AAAAJ", "orcid": ";;;;", "linkedin": "tianyu-chen-1a056a160/;;;;", "or_profile": "~Tianyu_Chen3;~Kevin_Bello1;~Francesco_Locatello1;~Bryon_Aragam1;~Pradeep_Kumar_Ravikumar1", "aff": "University of Texas at Austin;University of Chicago;Institute of Science and Technology;Booth School of Business;Carnegie Mellon University", "aff_domain": "utexas.edu;uchicago.edu;ist.ac.at;chicagobooth.edu;cmu.edu", "position": "PhD student;Postdoc;Assistant Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nchen2024identifying,\ntitle={Identifying General Mechanism Shifts in Linear Causal Representations},\nauthor={Tianyu Chen and Kevin Bello and Francesco Locatello and Bryon Aragam and Pradeep Kumar Ravikumar},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=jWaXhCYTV1}\n}", "github": "", "reviewers": "Eg5F;JGz2;33zq;tbD8", "pdf_size": 5659119, "rating": "3;5;6;7", "confidence": "3;5;4;4", "soundness": "2;2;3;3", "novelty": "2;2;3;3", "presentation": "2;4;3;4", "wc_summary": "80;112;155;54", "wc_strengths": "50;40;150;132", "wc_weaknesses": "88;235;113;292", "wc_questions": "173;110;117;179", "wc_limitations": "1;1;6;26", "wc_review": "392;498;541;683", "wc_reply_reviewers": "277;220;70;39", "wc_reply_authors": "456;325;34;34", "reply_reviewers": "1;2;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.25, 1.479019945774904 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 100.25, 37.69864055904404 ], "wc_strengths_avg": [ 93.0, 48.54894437575342 ], "wc_weaknesses_avg": [ 182.0, 84.41859984624242 ], "wc_questions_avg": [ 144.75, 31.419540098480116 ], "wc_limitations_avg": [ 8.5, 10.307764064044152 ], "wc_review_avg": [ 528.5, 104.38989414689527 ], "wc_reply_reviewers_avg": [ 151.5, 99.67572422611235 ], "wc_reply_authors_avg": [ 212.25, 184.1689102427443 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.47809144373375745, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9500962905117182263&as_sdt=20000005&sciodt=0,21&hl=en", "gs_version_total": 4, "email": "utexas.edu;uchicago.edu;ist.ac.at;chicagobooth.edu;cmu.edu", "author_num": 5, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "University of Texas at Austin;University of Chicago;Institute of Science and Technology;University of Chicago Booth School of Business;Carnegie Mellon University", "aff_unique_dep": ";;;Booth School of Business;", "aff_unique_url": "https://www.utexas.edu;https://www.uchicago.edu;;https://www.chicagobooth.edu;https://www.cmu.edu", "aff_unique_abbr": "UT Austin;UChicago;;Booth;CMU", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Austin;;Chicago", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States;" }, { "title": "High-Resolution Image Harmonization with Adaptive-Interval Color Transformation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93954", "id": "jXgHEwtXs8", "proceeding": "", "pdf": "https://openreview.net/pdf?id=jXgHEwtXs8", "openreview": "https://openreview.net/forum?id=jXgHEwtXs8", "poster": "/media/PosterPDFs/NeurIPS%202024/93954.png?t=1731334031.7954488", "project": "", "author_site": "Quanling Meng, Liu Qinglin, Zonglin Li, Xiangyuan Lan, Shengping Zhang, Liqiang Nie", "tldr": "", "abstract": "Existing high-resolution image harmonization methods typically rely on global color adjustments or the upsampling of parameter maps. However, these methods ignore local variations, leading to inharmonious appearances. To address this problem, we propose an Adaptive-Interval Color Transformation method (AICT), which predicts pixel-wise color transformations and adaptively adjusts the sampling interval to model local non-linearities of the color transformation at high resolution. Specifically, a parameter network is first designed to generate multiple position-dependent 3-dimensional lookup tables (3D LUTs), which use the color and position of each pixel to perform pixel-wise color transformations. Then, to enhance local variations adaptively, we separate a color transform into a cascade of sub-transformations using two 3D LUTs to achieve the non-uniform sampling intervals of the color transform. Finally, a global consistent weight learning method is proposed to predict an image-level weight for each color transform, utilizing global information to enhance the overall harmony. Extensive experiments demonstrate that our AICT achieves state-of-the-art performance with a lightweight architecture. The code is available at https://github.com/aipixel/AICT.", "keywords": "Image Composition;Image Harmonization;Lookup Table", "primary_area": "machine_vision", "supplementary_material": "", "author": "Quanling Meng;Qinglin Liu;Zonglin Li;Xiangyuan Lan;Shengping Zhang;Liqiang Nie", "authorids": "~Quanling_Meng1;~Qinglin_Liu1;~Zonglin_Li1;~Xiangyuan_Lan4;~Shengping_Zhang1;~Liqiang_Nie2", "gender": "M;M;M;M;M;M", "homepage": "https://github.com/lingtianxia123;;https://lzl.sd.cn/;http://homepage.hit.edu.cn/zhangshengping;https://liqiangnie.github.io/index.html;https://www.comp.hkbu.edu.hk/v1/?page=profile&id=lanxiangyuan", "dblp": ";227/7900;;60/1866;92/8277;151/8902", "google_scholar": ";hsu1cSIAAAAJ;e3cfNyMAAAAJ;hMNsT8sAAAAJ;yywVMhUAAAAJ;https://scholar.google.com.hk/citations?user=c3iwWRcAAAAJ", "orcid": ";0000-0002-2408-3344;;;0000-0003-1476-0273;", "linkedin": ";%E9%9D%92%E6%9E%97-%E6%9F%B3-a7354377/;;;;", "or_profile": "~Quanling_Meng1;~Qinglin_Liu1;~Zonglin_Li1;~Shengping_Zhang1;~Liqiang_Nie2;~xiangyuan_lan1", "aff": "Harbin Institute of Technology;Harbin Institute of Technology;Harbin Institute of Technology;Harbin Institute of Technology;Shandong University;Pengcheng Laboratory", "aff_domain": "hit.edu.cn;hit.edu.cn;hit.edu.cn;hit.edu.cn;sdu.edu.cn;pcl.ac.cn", "position": "Lecturer;Lecturer;PhD student;Full Professor;Full Professor;Researcher", "bibtex": "@inproceedings{\nmeng2024highresolution,\ntitle={High-Resolution Image Harmonization with Adaptive-Interval Color Transformation},\nauthor={Quanling Meng and Qinglin Liu and Zonglin Li and Xiangyuan Lan and Shengping Zhang and Liqiang Nie},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=jXgHEwtXs8}\n}", "github": "", "reviewers": "M5vB;dAiu;pwQP;4Tt1", "pdf_size": 1599246, "rating": "5;5;6;7", "confidence": "5;4;4;5", "soundness": "3;3;3;3", "novelty": "3;3;2;3", "presentation": "3;2;2;3", "wc_summary": "92;83;91;35", "wc_strengths": "74;19;206;23", "wc_weaknesses": "497;316;446;94", "wc_questions": "194;44;3;6", "wc_limitations": "19;1;3;3", "wc_review": "876;463;749;161", "wc_reply_reviewers": "12;44;33;7", "wc_reply_authors": "43;26;18;23", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 75.25, 23.498670175139697 ], "wc_strengths_avg": [ 80.5, 75.63233435508916 ], "wc_weaknesses_avg": [ 338.25, 155.69581722063057 ], "wc_questions_avg": [ 61.75, 78.04606011836856 ], "wc_limitations_avg": [ 6.5, 7.262919523166975 ], "wc_review_avg": [ 562.25, 275.75657290443684 ], "wc_reply_reviewers_avg": [ 24.0, 15.116216457830975 ], "wc_reply_authors_avg": [ 27.5, 9.394147114027968 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:lo1w68g0k2QJ:scholar.google.com/&scioq=High-Resolution+Image+Harmonization+with+Adaptive-Interval+Color+Transformation&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": "hit.edu.cn;hit.edu.cn;hit.edu.cn;hit.edu.cn;sdu.edu.cn;pcl.ac.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;1;2", "aff_unique_norm": "Harbin Institute of Technology;Shandong University;Pengcheng Laboratory", "aff_unique_dep": ";;", "aff_unique_url": "http://www.hit.edu.cn/;http://www.sdu.edu.cn;", "aff_unique_abbr": "HIT;SDU;", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Harbin;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Robust Prompt Optimization for Defending Language Models Against Jailbreaking Attacks", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93953", "id": "jXs6Cvpe7k", "proceeding": "", "pdf": "https://openreview.net/pdf?id=jXs6Cvpe7k", "openreview": "https://openreview.net/forum?id=jXs6Cvpe7k", "poster": "", "project": "", "author_site": "Andy Zhou, Bo Li, Haohan Wang", "tldr": "", "abstract": "Despite advances in AI alignment, large language models (LLMs) remain vulnerable to adversarial attacks or jailbreaking, in which adversaries can modify prompts to induce unwanted behavior. While some defenses have been proposed, they have not been adapted to newly proposed attacks and more challenging threat models. To address this, we propose an optimization-based objective for defending LLMs against jailbreaking attacks and an algorithm, Robust Prompt Optimization (RPO), to create robust system-level defenses. Our approach directly incorporates the adversary into the defensive objective and optimizes a lightweight and transferable suffix, enabling RPO to adapt to worst-case adaptive attacks. Our theoretical and experimental results show improved robustness to both jailbreaks seen during optimization and unknown jailbreaks, reducing the attack success rate (ASR) on GPT-4 to 6% and Llama-2 to 0% on JailbreakBench, setting the state-of-the-art.", "keywords": "large language models;AI safety;jailbreaking;red-teaming", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/8bc86a30288a1cca66905eeedba9078985e6a8ff.zip", "author": "Andy Zhou;Bo Li;Haohan Wang", "authorids": "~Andy_Zhou2;~Bo_Li19;~Haohan_Wang1", "gender": "M;F;M", "homepage": "https://www.andyzhou.ai;http://boli.cs.illinois.edu/;http://cs.cmu.edu/~haohanw", "dblp": ";50/3402-26;132/4066", "google_scholar": "https://scholar.google.com/citations?hl=en;K8vJkTcAAAAJ;nZxJGeUAAAAJ", "orcid": ";;", "linkedin": "andy-zhou-679376206/;;haohanwang/", "or_profile": "~Andy_Zhou2;~Bo_Li19;~Haohan_Wang1", "aff": "Department of Computer Science;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign", "aff_domain": "cs.illinois.edu;illinois.edu;illinois.edu", "position": "Undergrad student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhou2024robust,\ntitle={Robust Prompt Optimization for Defending Language Models Against Jailbreaking Attacks},\nauthor={Andy Zhou and Bo Li and Haohan Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=jXs6Cvpe7k}\n}", "github": "", "reviewers": "yUwN;YYJE;Md6V;EVpE", "pdf_size": 1199758, "rating": "6;7;7;8", "confidence": "5;2;4;5", "soundness": "4;3;3;3", "novelty": "3;3;3;3", "presentation": "4;3;3;3", "wc_summary": "80;99;35;170", "wc_strengths": "51;59;29;280", "wc_weaknesses": "75;106;148;558", "wc_questions": "6;34;2;552", "wc_limitations": "1;7;62;7", "wc_review": "213;305;276;1567", "wc_reply_reviewers": "0;0;179;0", "wc_reply_authors": "36;36;510;36", "reply_reviewers": "0;0;3;0", "reply_authors": "2;2;5;2", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 1.224744871391589 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 96.0, 48.63640611722869 ], "wc_strengths_avg": [ 104.75, 101.77518115925906 ], "wc_weaknesses_avg": [ 221.75, 195.85501653008532 ], "wc_questions_avg": [ 148.5, 233.28684060615163 ], "wc_limitations_avg": [ 19.25, 24.80297361204902 ], "wc_review_avg": [ 590.25, 564.9067954096498 ], "wc_reply_reviewers_avg": [ 44.75, 77.50927363870726 ], "wc_reply_authors_avg": [ 154.5, 205.24802069691196 ], "reply_reviewers_avg": [ 0.75, 1.299038105676658 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 72, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4510265351549314483&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "cs.illinois.edu;illinois.edu;illinois.edu", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Unknown Institution;University of Illinois Urbana-Champaign", "aff_unique_dep": "Department of Computer Science;", "aff_unique_url": ";https://illinois.edu", "aff_unique_abbr": ";UIUC", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "1;1", "aff_country_unique": ";United States" }, { "title": "Star-Agents: Automatic Data Optimization with LLM Agents for Instruction Tuning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93952", "id": "jXsxGt80sv", "proceeding": "", "pdf": "https://openreview.net/pdf?id=jXsxGt80sv", "openreview": "https://openreview.net/forum?id=jXsxGt80sv", "poster": "", "project": "", "author_site": "Hang Zhou, Yehui Tang, Haochen Qin, Yujie Yang, Renren Jin, Deyi Xiong, Kai Han, Yunhe Wang", "tldr": "", "abstract": "The efficacy of large language models (LLMs) on downstream tasks usually hinges on instruction tuning, which relies critically on the quality of training data. Unfortunately, collecting high-quality and diverse data is both expensive and time-consuming. To mitigate this issue, we propose a novel Star-Agents framework, which automates the enhancement of data quality across datasets through multi-agent collaboration and assessment. The framework adopts a three-pronged strategy. It initially generates diverse instruction data with multiple LLM agents through a bespoke sampling method. Subsequently, the generated data undergo a rigorous evaluation using a dual-model method that assesses both difficulty and quality. Finaly, the above process evolves in a dynamic refinement phase, where more effective LLMs are prioritized, enhancing the overall data quality. Our empirical studies, including instruction tuning experiments with models such as Pythia and LLaMA, demonstrate the effectiveness of the proposed framework. Optimized datasets have achieved substantial improvements, with an average increase of 12\\% and notable gains in specific metrics, such as a 40\\% improvement in Fermi, as evidenced by benchmarks like MT-bench, Vicuna bench, and WizardLM testset. Codes will be released soon.", "keywords": "Large language models;Data;Instruction-Tuning", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/3d9e6bc15390fb3a8bde56396baf68d453c522b9.zip", "author": "Hang Zhou;Yehui Tang;Haochen Qin;Yujie Yang;Renren Jin;Deyi Xiong;Kai Han;Yunhe Wang", "authorids": "~Hang_Zhou19;~Yehui_Tang1;~Haochen_Qin1;~Yujie_Yang4;~Renren_Jin1;~Deyi_Xiong2;~Kai_Han2;~Yunhe_Wang1", "gender": ";M;M;;M;M;M;M", "homepage": ";;;;;https://dyxiong.github.io;https://iamhankai.github.io;https://www.wangyunhe.site/", "dblp": ";244/9659;;;329/4176;55/6548;51/4757-2;63/8217-1", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;TkSZQ6gAAAAJ;xB9-qwEAAAAJ;;qW3oQDUAAAAJ;QPLO3myO5PkC;vThoBVcAAAAJ;https://scholar.google.com.sg/citations?user=isizOkYAAAAJ", "orcid": ";;;;;0000-0002-2353-5038;0000-0002-9761-2702;0000-0002-0142-509X", "linkedin": ";;;;renren-jin-222a861b3/;;;", "or_profile": "~Hang_Zhou19;~Yehui_Tang1;~Haochen_Qin1;~Yujie_Yang4;~Renren_Jin1;~Deyi_Xiong2;~Kai_Han2;~Yunhe_Wang1", "aff": "Huawei Technologies Ltd.;Huawei Technologies Ltd.;Huawei Technologies Ltd.;;Tianjin University;Tianjin University;Huawei Noah's Ark Lab;Huawei Noah's Ark Lab", "aff_domain": "huawei.com;huawei.com;huawei.com;;tju.edu.cn;tju.edu.cn;huawei.com;huawei.com", "position": "Researcher;Researcher;Researcher;;PhD student;Full Professor;Principal Researcher;Principal Researcher", "bibtex": "@inproceedings{\nzhou2024staragents,\ntitle={Star-Agents: Automatic Data Optimization with {LLM} Agents for Instruction Tuning},\nauthor={Hang Zhou and Yehui Tang and Haochen Qin and Yujie Yang and Renren Jin and Deyi Xiong and Kai Han and Yunhe Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=jXsxGt80sv}\n}", "github": "", "reviewers": "piXU;4rks;b1DV;kHqZ", "pdf_size": 511031, "rating": "5;6;6;6", "confidence": "4;3;4;3", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;4", "wc_summary": "186;82;51;109", "wc_strengths": "166;78;29;75", "wc_weaknesses": "239;47;27;13", "wc_questions": "16;33;5;50", "wc_limitations": "6;23;1;47", "wc_review": "613;263;113;294", "wc_reply_reviewers": "0;47;24;171", "wc_reply_authors": "66;297;90;139", "reply_reviewers": "0;1;1;1", "reply_authors": "2;3;3;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 107.0, 50.014997750674745 ], "wc_strengths_avg": [ 87.0, 49.57317823178175 ], "wc_weaknesses_avg": [ 81.5, 91.73194645269444 ], "wc_questions_avg": [ 26.0, 17.073371078963874 ], "wc_limitations_avg": [ 19.25, 17.977416388346796 ], "wc_review_avg": [ 320.75, 182.0856597868157 ], "wc_reply_reviewers_avg": [ 60.5, 65.9260949852181 ], "wc_reply_authors_avg": [ 148.0, 89.95832368380371 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13472077997693616006&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "huawei.com;huawei.com;huawei.com;;tju.edu.cn;tju.edu.cn;huawei.com;huawei.com", "author_num": 8, "aff_unique_index": "0;0;0;1;1;0;0", "aff_unique_norm": "Huawei;Tianjin University", "aff_unique_dep": "Huawei Technologies;", "aff_unique_url": "https://www.huawei.com;http://www.tju.edu.cn", "aff_unique_abbr": "Huawei;TJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Statistical Multicriteria Benchmarking via the GSD-Front", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93951", "id": "jXxvSkb9HD", "proceeding": "", "pdf": "https://openreview.net/pdf?id=jXxvSkb9HD", "openreview": "https://openreview.net/forum?id=jXxvSkb9HD", "poster": "/media/PosterPDFs/NeurIPS%202024/93951.png?t=1731873816.2084208", "project": "", "author_site": "Christoph Jansen, Georg Schollmeyer, Julian Rodemann, Hannah Blocher, Thomas Augustin", "tldr": "", "abstract": "Given the vast number of classifiers that have been (and continue to be) proposed, reliable methods for comparing them are becoming increasingly important. The desire for reliability is broken down into three main aspects: (1) Comparisons should allow for different quality metrics simultaneously. (2) Comparisons should take into account the statistical uncertainty induced by the choice of benchmark suite. (3) The robustness of the comparisons under small deviations in the underlying assumptions should be verifiable. To address (1), we propose to compare classifiers using a generalized stochastic dominance ordering (GSD) and present the GSD-front as an information-efficient alternative to the classical Pareto-front. For (2), we propose a consistent statistical estimator for the GSD-front and construct a statistical test for whether a (potentially new) classifier lies in the GSD-front of a set of state-of-the-art classifiers. For (3), we relax our proposed test using techniques from robust statistics and imprecise probabilities. We illustrate our concepts on the benchmark suite PMLB and on the platform OpenML.", "keywords": "multicriteria benchmarking;robust statistics;statistical test;imprecise probabilities;reliability;non-standard scales of measurement;decision theory", "primary_area": "evaluation", "supplementary_material": "", "author": "Christoph Jansen;Georg Schollmeyer;Julian Rodemann;Hannah Blocher;Thomas Augustin", "authorids": "~Christoph_Jansen1;~Georg_Schollmeyer1;~Julian_Rodemann1;~Hannah_Blocher1;~Thomas_Augustin1", "gender": "M;M;F;M;M", "homepage": "https://chjansen.weebly.com/;https://georgschollmeyer.weebly.com;https://www.statistik.uni-muenchen.de/personen/mitarbeiter/blocher/index.html;https://www.foundstat.statistik.uni-muenchen.de/index.html;https://www.julian-rodemann.de", "dblp": ";;;34/2415.html;306/7585", "google_scholar": "https://scholar.google.de/citations?user=Lrh5XU8AAAAJ;54Hl7LAAAAAJ;;https://scholar.google.de/citations?hl=de;hIcjYocAAAAJ", "orcid": ";;;0000-0002-1854-6226;", "linkedin": ";;;;", "or_profile": "~Christoph_Jansen1;~Georg_Schollmeyer1;~Hannah_Blocher1;~Thomas_Augustin1;~Julian_Martin_Rodemann1", "aff": "Lancaster University Leipzig;Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;Department of Statistics;Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen", "aff_domain": "lancaster.ac.uk;lmu.de;lmu.de;lmu.de;lmu.de", "position": "Assistant Professor;Postdoc;PhD student;Associate Professor;PhD student", "bibtex": "@inproceedings{\njansen2024statistical,\ntitle={Statistical Multicriteria Benchmarking via the {GSD}-Front},\nauthor={Christoph Jansen and Georg Schollmeyer and Julian Rodemann and Hannah Blocher and Thomas Augustin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=jXxvSkb9HD}\n}", "github": "", "reviewers": "n7Tx;1fNM;3sEe", "pdf_size": 1739759, "rating": "7;7;8", "confidence": "4;3;4", "soundness": "3;3;4", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "75;286;59", "wc_strengths": "32;63;91", "wc_weaknesses": "30;137;51", "wc_questions": "209;107;83", "wc_limitations": "2;9;1", "wc_review": "348;602;285", "wc_reply_reviewers": "0;102;0", "wc_reply_authors": "0;40;0", "reply_reviewers": "0;1;0", "reply_authors": "1;2;1", "rating_avg": [ 7.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 140.0, 103.44402673265705 ], "wc_strengths_avg": [ 62.0, 24.097026095903757 ], "wc_weaknesses_avg": [ 72.66666666666667, 46.29134788369085 ], "wc_questions_avg": [ 133.0, 54.62600113499065 ], "wc_limitations_avg": [ 4.0, 3.559026084010437 ], "wc_review_avg": [ 411.6666666666667, 137.02149060964447 ], "wc_reply_reviewers_avg": [ 34.0, 48.08326112068523 ], "wc_reply_authors_avg": [ 13.333333333333334, 18.856180831641264 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8447500421513541530&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "lancaster.ac.uk;lmu.de;lmu.de;lmu.de;lmu.de", "author_num": 5, "aff_unique_index": "0;1;1;2;1", "aff_unique_norm": "Lancaster University;Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;University Affiliation Not Specified", "aff_unique_dep": ";;Department of Statistics", "aff_unique_url": "https://www.lancaster.ac.uk;https://www.lmu.de;", "aff_unique_abbr": "Lancaster;LMU;", "aff_campus_unique_index": "0", "aff_campus_unique": "Leipzig;", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "United Kingdom;Germany;" }, { "title": "Gated Slot Attention for Efficient Linear-Time Sequence Modeling", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93950", "id": "jY4PhQibmg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=jY4PhQibmg", "openreview": "https://openreview.net/forum?id=jY4PhQibmg", "poster": "/media/PosterPDFs/NeurIPS%202024/93950.png?t=1733893560.952345", "project": "", "author_site": "Yu Zhang, Songlin Yang, Rui-Jie Zhu, Yue Zhang, Leyang Cui, Yiqiao Wang, Bolun Wang, Freda Shi, Bailin Wang, Wei Bi, Peng Zhou, Guohong Fu", "tldr": "", "abstract": "Linear attention Transformers and their gated variants, celebrated for enabling parallel training and efficient recurrent inference, still fall short in recall-intensive tasks compared to traditional Transformers and demand significant resources for training from scratch.\nThis paper introduces Gated Slot Attention (GSA), which enhances Attention with Bounded-memory-Control (ABC) by incorporating a gating mechanism inspired by Gated Linear Attention (GLA).\nEssentially, GSA comprises a two-layer GLA linked via $\\operatorname{softmax}$, utilizing context-aware memory reading and adaptive forgetting to improve memory capacity while maintaining compact recurrent state size.\nThis design greatly enhances both training and inference efficiency through GLA's hardware-efficient training algorithm and reduced state size.\nAdditionally, retaining the $\\operatorname{softmax}$ operation is particularly beneficial in ``finetuning pretrained Transformers to RNNs'' (T2R) settings, reducing the need for extensive training from scratch.\nExtensive experiments confirm GSA's superior performance in scenarios requiring in-context recall and in T2R settings.", "keywords": "sequence modeling;language modeling;hardware-efficient;linear attention", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Yu Zhang;Songlin Yang;Rui-Jie Zhu;Yue Zhang;Leyang Cui;Yiqiao Wang;Bolun Wang;Freda Shi;Bailin Wang;Wei Bi;Peng Zhou;Guohong Fu", "authorids": "~Yu_Zhang36;~Songlin_Yang1;~Rui-Jie_Zhu2;~Yue_Zhang12;~Leyang_Cui1;~Yiqiao_Wang3;~Bolun_Wang1;~Freda_Shi1;~Bailin_Wang3;~Wei_Bi1;~Peng_Zhou10;~Guohong_Fu1", "gender": "M;F;M;M;M;;F;F;F;;M;M", "homepage": "https://yzhang.site;https://sustcsonglin.github.io;https://ruijie-zhu.github.io;https://hillzhang1999.github.io/;https://github.com/Nealcly;https://www.luxitech.cn;http://www.luxitech.cn;http://ttic.uchicago.edu/~freda;https://scholar.google.com.hk/citations?hl=en&user=aSJcgQMAAAAJ&view_op=list_works&sortby=pubdate#d=gsc_md_iad&u=%2Fcitations%3Fview_op%3Dimport_lookup%26hl%3Den%26imq%3DWei%2BBi%26json%3D%26btnA%3D1;https://pengzhouzp.github.io/;http://web.suda.edu.cn/ghfu/;https://berlino.github.io/", "dblp": "50/671-92;;317/4836;;247/6181;;;194/2512;38/1163;;23/5204;218/7334", "google_scholar": "y3JK-1oAAAAJ;1chlis0AAAAJ;08ITzJsAAAAJ;wYEAchYAAAAJ;6YVwZgkAAAAJ;;;jkDd-3QAAAAJ;https://scholar.google.com.hk/citations?hl=en;wry71v4AAAAJ;ueOZz5QAAAAJ;", "orcid": "0000-0002-8345-3835;;;;;;;0009-0009-5697-449X;0000-0001-8457-0630;0000-0003-1686-6354;0000-0001-6882-6181;", "linkedin": ";;;;;;;;;peng-zhou-891336105/;;", "or_profile": "~Yu_Zhang36;~Songlin_Yang1;~Rui-Jie_Zhu2;~Yue_Zhang12;~Leyang_Cui1;~Yiqiao_Wang3;~Bolun_Wang1;~Freda_Shi1;~Wei_Bi1;~Peng_Zhou10;~Guohong_Fu1;~bailin_wang1", "aff": "Soochow University, China;Massachusetts Institute of Technology;University of California, Santa Cruz;Suzhou University;Tencent AI Lab;Luxi Technology;LuxiTech Co., Ltd;Toyota Technological Institute at Chicago;Hong Kong University of Science and Technology;LuxiTech Co., Ltd;Soochow University, China,;Massachusetts Institute of Technology", "aff_domain": "suda.edu.cn;mit.edu;ucsc.edu;suda.edu.cn;tencent.com;luxitech.cn;luxitech.cn;ttic.edu;ust.hk;luxitech.cn;suda.edu.cn;mit.edu", "position": "PhD student;PhD student;PhD student;MS student;Researcher;Intern;CEO;PhD student;PhD student;CTO;Full Professor;Postdoc", "bibtex": "@inproceedings{\nzhang2024gated,\ntitle={Gated Slot Attention for Efficient Linear-Time Sequence Modeling},\nauthor={Yu Zhang and Songlin Yang and Rui-Jie Zhu and Yue Zhang and Leyang Cui and Yiqiao Wang and Bolun Wang and Freda Shi and Bailin Wang and Wei Bi and Peng Zhou and Guohong Fu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=jY4PhQibmg}\n}", "github": "", "reviewers": "AbD2;eYuC;LU5r", "pdf_size": 729526, "rating": "5;6;6", "confidence": "3;3;4", "soundness": "3;3;3", "novelty": "2;3;2", "presentation": "2;3;3", "wc_summary": "56;81;144", "wc_strengths": "18;70;145", "wc_weaknesses": "58;116;255", "wc_questions": "104;19;89", "wc_limitations": "8;1;62", "wc_review": "244;287;695", "wc_reply_reviewers": "16;0;37", "wc_reply_authors": "86;259;431", "reply_reviewers": "1;0;2", "reply_authors": "4;3;4", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 93.66666666666667, 37.025516726831626 ], "wc_strengths_avg": [ 77.66666666666667, 52.13017893270227 ], "wc_weaknesses_avg": [ 143.0, 82.65994596336648 ], "wc_questions_avg": [ 70.66666666666667, 37.04351795148812 ], "wc_limitations_avg": [ 23.666666666666668, 27.255988129012838 ], "wc_review_avg": [ 408.6666666666667, 203.2278414872223 ], "wc_reply_reviewers_avg": [ 17.666666666666668, 15.15109090315135 ], "wc_reply_authors_avg": [ 258.6666666666667, 140.84585743129102 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 3.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13887551512377176758&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "suda.edu.cn;mit.edu;ucsc.edu;suda.edu.cn;tencent.com;luxitech.cn;luxitech.cn;ttic.edu;ust.hk;luxitech.cn;suda.edu.cn;mit.edu", "author_num": 12, "aff_unique_index": "0;1;2;3;4;5;6;7;8;6;0;1", "aff_unique_norm": "Soochow University;Massachusetts Institute of Technology;University of California, Santa Cruz;Suzhou University;Tencent;Luxi Technology;LuxiTech Co., Ltd;Toyota Technological Institute at Chicago;Hong Kong University of Science and Technology", "aff_unique_dep": ";;;;Tencent AI Lab;;;;", "aff_unique_url": "https://www.soochow.edu.cn;https://web.mit.edu;https://www.ucsc.edu;https://www.suda.edu.cn;https://ai.tencent.com;;;https://www.tti-chicago.org;https://www.ust.hk", "aff_unique_abbr": "Soochow U;MIT;UCSC;Suda;Tencent AI Lab;;;TTI Chicago;HKUST", "aff_campus_unique_index": "1;2;3", "aff_campus_unique": ";Santa Cruz;Chicago;Hong Kong SAR", "aff_country_unique_index": "0;1;1;0;0;1;0;0;1", "aff_country_unique": "China;United States;" }, { "title": "Bridge the Points: Graph-based Few-shot Segment Anything Semantically", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93949", "id": "jYypS5VIPj", "proceeding": "", "pdf": "https://openreview.net/pdf?id=jYypS5VIPj", "openreview": "https://openreview.net/forum?id=jYypS5VIPj", "poster": "/media/PosterPDFs/NeurIPS%202024/93949.png?t=1730253963.7750373", "project": "", "author_site": "Anqi Zhang, Guangyu Gao, Jianbo Jiao, Chi Liu, Yunchao Wei", "tldr": "", "abstract": "The recent advancements in large-scale pre-training techniques have significantly enhanced the capabilities of vision foundation models, notably the Segment Anything Model (SAM), which can generate precise masks based on point and box prompts. Recent studies extend SAM to Few-shot Semantic Segmentation (FSS), focusing on prompt generation for SAM-based automatic semantic segmentation. However, these methods struggle with selecting suitable prompts, require specific hyperparameter settings for different scenarios, and experience prolonged one-shot inference times due to the overuse of SAM, resulting in low efficiency and limited automation ability. To address these issues, we propose a simple yet effective approach based on graph analysis. In particular, a Positive-Negative Alignment module dynamically selects the point prompts for generating masks, especially uncovering the potential of the background context as the negative reference. Another subsequent Point-Mask Clustering module aligns the granularity of masks and selected points as a directed graph, based on mask coverage over points. These points are then aggregated by decomposing the weakly connected components of the directed graph in an efficient manner, constructing distinct natural clusters. Finally, the positive and overshooting gating, benefiting from graph-based granularity alignment, aggregates high-confident masks and filters the false-positive masks for final prediction, reducing the usage of additional hyperparameters and redundant mask generation. Extensive experimental analysis across standard FSS, One-shot Part Segmentation, and Cross Domain FSS datasets validate the effectiveness and efficiency of the proposed approach, surpassing state-of-the-art generalist models with a mIoU of 58.7% on COCO-20i and 35.2% on LVIS-92i. The project page of this work is https://andyzaq.github.io/GF-SAM/.", "keywords": "semantic segmentation; few shot; Segment Anything Model; Parameter-Free;", "primary_area": "machine_vision", "supplementary_material": "/attachment/20eadc6d7334a84f672a90132653a38350f99998.zip", "author": "Anqi Zhang;Guangyu Gao;Jianbo Jiao;Chi Harold Liu;Yunchao Wei", "authorids": "~Anqi_Zhang2;~Guangyu_Gao2;~Jianbo_Jiao2;~Chi_Harold_Liu1;~Yunchao_Wei1", "gender": "M;M;;M;M", "homepage": "https://github.com/ANDYZAQ;https://guangyugao.weebly.com/;https://jianbojiao.com/;;https://weiyc.github.io/", "dblp": ";33/7626;150/6622;45/4723.html;118/5394", "google_scholar": "DmRZv5sAAAAJ;snmRfqMAAAAJ;HkEiMMwAAAAJ;3IgFTEkAAAAJ;https://scholar.google.com.sg/citations?user=qL9Csv0AAAAJ", "orcid": "0000-0002-2112-8199;0000-0002-0083-3016;;;", "linkedin": ";;;;", "or_profile": "~Anqi_Zhang2;~Guangyu_Gao2;~Jianbo_Jiao2;~Chi_Harold_Liu1;~Yunchao_Wei1", "aff": "Beijing Institute of Technology;Beijing Institute of Technology;University of Birmingham;Beijing Institute of Technology;Beijing Jiaotong University", "aff_domain": "bit.edu.cn;bit.edu.cn;bham.ac.uk;bit.edu.cn;bjtu.edu.cn", "position": "MS student;Associate Professor;Assistant Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nzhang2024bridge,\ntitle={Bridge the Points: Graph-based Few-shot Segment Anything Semantically},\nauthor={Anqi Zhang and Guangyu Gao and Jianbo Jiao and Chi Harold Liu and Yunchao Wei},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=jYypS5VIPj}\n}", "github": "", "reviewers": "zdFP;DDH8;XvpJ", "pdf_size": 5735582, "rating": "6;6;7", "confidence": "4;3;5", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "62;72;76", "wc_strengths": "52;38;135", "wc_weaknesses": "181;79;132", "wc_questions": "4;2;15", "wc_limitations": "30;5;12", "wc_review": "329;196;370", "wc_reply_reviewers": "18;0;19", "wc_reply_authors": "0;0;49", "reply_reviewers": "1;0;1", "reply_authors": "1;1;2", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 70.0, 5.887840577551898 ], "wc_strengths_avg": [ 75.0, 42.80965623158713 ], "wc_weaknesses_avg": [ 130.66666666666666, 41.651997417757634 ], "wc_questions_avg": [ 7.0, 5.715476066494082 ], "wc_limitations_avg": [ 15.666666666666666, 10.530379332620877 ], "wc_review_avg": [ 298.3333333333333, 74.27127454286901 ], "wc_reply_reviewers_avg": [ 12.333333333333334, 8.73053390247253 ], "wc_reply_authors_avg": [ 16.333333333333332, 23.098821518760552 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9275671105275430446&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "bit.edu.cn;bit.edu.cn;bham.ac.uk;bit.edu.cn;bjtu.edu.cn", "author_num": 5, "aff_unique_index": "0;0;1;0;2", "aff_unique_norm": "Beijing Institute of Technology;University of Birmingham;Beijing Jiao Tong University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.bit.edu.cn/;https://www.birmingham.ac.uk;http://www.njtu.edu.cn/en", "aff_unique_abbr": "BIT;Birmingham;BJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "China;United Kingdom" }, { "title": "Data-faithful Feature Attribution: Mitigating Unobservable Confounders via Instrumental Variables", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93948", "id": "jZv9A8Tg9p", "proceeding": "", "pdf": "https://openreview.net/pdf?id=jZv9A8Tg9p", "openreview": "https://openreview.net/forum?id=jZv9A8Tg9p", "poster": "", "project": "", "author_site": "Qiheng Sun, Haocheng Xia, Jinfei Liu", "tldr": "", "abstract": "The state-of-the-art feature attribution methods often neglect the influence of unobservable confounders, posing a risk of misinterpretation, especially when it is crucial for the interpretation to remain faithful to the data. To counteract this, we propose a new approach, data-faithful feature attribution, which trains a confounder-free model using instrumental variables. The cluttered effects of unobservable confounders in a model trained as such are decoupled from input features, thereby aligning the output of the model with the contribution of input features to the target feature in the data generation. Furthermore, feature attribution results produced by our method are more robust when focusing on attributions from the perspective of data generation. Our experiments on both synthetic and real-world datasets demonstrate the effectiveness of our approaches.", "keywords": "feature attribution;Shapley value;integrated gradients", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Qiheng Sun;Haocheng Xia;Jinfei Liu", "authorids": "~Qiheng_Sun1;~Haocheng_Xia1;~Jinfei_Liu1", "gender": "M;M;M", "homepage": ";https://haochengxia.com;https://person.zju.edu.cn/jinfeiliu#947644", "dblp": "175/8831;349/2976;89/9935", "google_scholar": ";UD4iBqwAAAAJ;", "orcid": ";0000-0002-8317-6217;", "linkedin": "https://www.linkedin.cn/incareer/in/ACoAAEDMhmwBa614-gItudMzmV665riW50x3O9I;;", "or_profile": "~Qiheng_Sun1;~Haocheng_Xia1;~Jinfei_Liu1", "aff": "Zhejiang University;Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn;zju.edu.cn", "position": "PhD student;MS student;Full Professor", "bibtex": "@inproceedings{\nsun2024datafaithful,\ntitle={Data-faithful Feature Attribution: Mitigating Unobservable Confounders via Instrumental Variables},\nauthor={Qiheng Sun and Haocheng Xia and Jinfei Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=jZv9A8Tg9p}\n}", "github": "", "reviewers": "Ye8R;p3Si;6T6P", "pdf_size": 848502, "rating": "5;6;7", "confidence": "2;4;3", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "3;3;3", "wc_summary": "49;66;112", "wc_strengths": "114;80;108", "wc_weaknesses": "63;79;107", "wc_questions": "99;101;350", "wc_limitations": "6;10;10", "wc_review": "331;336;687", "wc_reply_reviewers": "0;118;256", "wc_reply_authors": "0;48;382", "reply_reviewers": "0;1;2", "reply_authors": "1;2;3", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 75.66666666666667, 26.612444874949432 ], "wc_strengths_avg": [ 100.66666666666667, 14.817407180595245 ], "wc_weaknesses_avg": [ 83.0, 18.184242262647807 ], "wc_questions_avg": [ 183.33333333333334, 117.85395859094236 ], "wc_limitations_avg": [ 8.666666666666666, 1.8856180831641267 ], "wc_review_avg": [ 451.3333333333333, 166.6539995186301 ], "wc_reply_reviewers_avg": [ 124.66666666666667, 104.61782299822956 ], "wc_reply_authors_avg": [ 143.33333333333334, 169.89670064156306 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11359940508335386797&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "zju.edu.cn;zju.edu.cn;zju.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Zhejiang University", "aff_unique_dep": "", "aff_unique_url": "https://www.zju.edu.cn", "aff_unique_abbr": "ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "DC-Gaussian: Improving 3D Gaussian Splatting for Reflective Dash Cam Videos", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93947", "id": "ja20BpFAPa", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ja20BpFAPa", "openreview": "https://openreview.net/forum?id=ja20BpFAPa", "poster": "/media/PosterPDFs/NeurIPS%202024/93947.png?t=1731020556.3325315", "project": "", "author_site": "Linhan Wang, Kai Cheng, Shuo Lei, Shengkun Wang, Wei Yin, Chenyang Lei, Xiaoxiao Long, Chang-Tien Lu", "tldr": "", "abstract": "We present DC-Gaussian, a new method for generating novel views from in-vehicle dash cam videos. While neural rendering techniques have made significant strides in driving scenarios, existing methods are primarily designed for videos collected by autonomous vehicles. However, these videos are limited in both quantity and diversity compared to dash cam videos, which are more widely used across various types of vehicles and capture a broader range of scenarios. Dash cam videos often suffer from severe obstructions such as reflections and occlusions on the windshields, which significantly impede the application of neural rendering techniques. To address this challenge, we develop DC-Gaussian based on the recent real-time neural rendering technique 3D Gaussian Splatting (3DGS). Our approach includes an adaptive image decomposition module to model reflections and occlusions in a unified manner. Additionally, we introduce illumination-aware obstruction modeling to manage reflections and occlusions under varying lighting conditions. Lastly, we employ a geometry-guided Gaussian enhancement strategy to improve rendering details by incorporating additional geometry priors. Experiments on self-captured and public dash cam videos show that our method not only achieves state-of-the-art performance in novel view synthesis, but also accurately reconstructing captured scenes getting rid of obstructions.", "keywords": "Neural rendering; Novel view synthesis; Reflection removal", "primary_area": "machine_vision", "supplementary_material": "/attachment/2d360cc9d126fe8a7fe815964b59f63963d2ed8a.zip", "author": "Linhan Wang;Kai Cheng;Shuo Lei;Shengkun Wang;Wei Yin;Chenyang Lei;Xiaoxiao Long;Chang-Tien Lu", "authorids": "~Linhan_Wang3;~Kai_Cheng1;~Shuo_Lei1;~Shengkun_Wang1;~Wei_Yin2;~Chenyang_Lei1;~Xiaoxiao_Long2;~Chang-Tien_Lu1", "gender": "M;M;F;M;M;M;M;M", "homepage": ";https://cklibra.github.io/;https://slei109.github.io/;;https://yvanyin.net/;https://chenyanglei.github.io/;http://people.cs.vt.edu/~ctlu/;https://xxlong0.github.io/", "dblp": ";23/2177;183/5433;;67/4051-6;153/6769;08/4367;262/3688", "google_scholar": "Zr2Tr3UAAAAJ;LeDSFrAAAAAJ;vm368LkAAAAJ;;ZIf_rtcAAAAJ;CuGF_pEAAAAJ;https://scholar.google.com/citations?hl=en;W3G5kZEAAAAJ", "orcid": "0009-0000-8057-1767;;;0009-0004-1378-0197;;;0000-0003-3675-0199;0000-0002-3386-8805", "linkedin": "linhan-wang-a663b9139/;;;;;;chang-tien-lu-00b2381/;", "or_profile": "~Linhan_Wang3;~Kai_Cheng1;~Shuo_Lei1;~Shengkun_Wang1;~Wei_Yin2;~Chenyang_Lei1;~Chang-Tien_Lu1;~XIAOXIAO_LONG1", "aff": "Virginia Polytechnic Institute and State University;University of Science and Technology of China;Sony Coporation of America;Virginia Polytechnic Institute and State University; Shenzhen DJI Sciences and Technologies Ltd.;Princeton University;Virginia Tech;University of Hong Kong", "aff_domain": "vt.edu;ustc.edu.cn;sony.com;vt.edu;dji.com;princeton.edu;vt.edu;hku.hk", "position": "PhD student;PhD student;Researcher;PhD student;Researcher;Researcher;Full Professor;Postdoc", "bibtex": "@inproceedings{\nwang2024dcgaussian,\ntitle={{DC}-Gaussian: Improving 3D Gaussian Splatting for Reflective Dash Cam Videos},\nauthor={Linhan Wang and Kai Cheng and Shuo Lei and Shengkun Wang and Wei Yin and Chenyang Lei and Xiaoxiao Long and Chang-Tien Lu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ja20BpFAPa}\n}", "github": "", "reviewers": "N7Lh;zoS8;si9q;QCPG", "pdf_size": 13290236, "rating": "4;5;5;6", "confidence": "4;5;4;5", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;4;3;3", "wc_summary": "160;90;91;88", "wc_strengths": "35;86;44;79", "wc_weaknesses": "146;181;125;51", "wc_questions": "26;23;2;23", "wc_limitations": "13;23;16;35", "wc_review": "380;403;278;276", "wc_reply_reviewers": "0;90;358;101", "wc_reply_authors": "47;126;673;43", "reply_reviewers": "0;1;2;1", "reply_authors": "3;2;3;2", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 107.25, 30.474374480865066 ], "wc_strengths_avg": [ 61.0, 21.874642854227357 ], "wc_weaknesses_avg": [ 125.75, 47.567714891510185 ], "wc_questions_avg": [ 18.5, 9.604686356149273 ], "wc_limitations_avg": [ 21.75, 8.46684711093805 ], "wc_review_avg": [ 334.25, 57.82895036225368 ], "wc_reply_reviewers_avg": [ 137.25, 133.3367447480251 ], "wc_reply_authors_avg": [ 222.25, 262.3369731852527 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.7071067811865476, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7552374123757356289&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "vt.edu;ustc.edu.cn;sony.com;vt.edu;dji.com;princeton.edu;vt.edu;hku.hk", "author_num": 8, "aff_unique_index": "0;1;2;0;3;4;0;5", "aff_unique_norm": "Virginia Tech;University of Science and Technology of China;Sony Corporation of America;DJI Sciences and Technologies;Princeton University;University of Hong Kong", "aff_unique_dep": ";;;;;", "aff_unique_url": "https://www.vt.edu;http://www.ustc.edu.cn;https://www.sony.com;https://www.dji.com;https://www.princeton.edu;https://www.hku.hk", "aff_unique_abbr": "VT;USTC;SCA;DJI;Princeton;HKU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;0;0;1;0;0;1", "aff_country_unique": "United States;China" }, { "title": "Revisiting Score Propagation in Graph Out-of-Distribution Detection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93946", "id": "jb5qN3212b", "proceeding": "", "pdf": "https://openreview.net/pdf?id=jb5qN3212b", "openreview": "https://openreview.net/forum?id=jb5qN3212b", "poster": "", "project": "", "author_site": "Longfei Ma, Yiyou Sun, Kaize Ding, Zemin Liu, Fei Wu", "tldr": "", "abstract": "The field of graph learning has been substantially advanced by the development of deep learning models, in particular graph neural networks. However, one salient yet largely under-explored challenge is detecting Out-of-Distribution (OOD) nodes on graphs. Prevailing OOD detection techniques developed in other domains like computer vision, do not cater to the interconnected nature of graphs. This work aims to fill this gap by exploring the potential of a simple yet effective method -- OOD score propagation, which propagates OOD scores among neighboring nodes along the graph structure. This post hoc solution can be easily integrated with existing OOD scoring functions, showcasing its excellent flexibility and effectiveness in most scenarios. However, the conditions under which score propagation proves beneficial remain not fully elucidated. Our study meticulously derives these conditions and, inspired by this discovery, introduces an innovative edge augmentation strategy with theoretical guarantee. Empirical evaluations affirm the superiority of our proposed method, outperforming strong OOD detection baselines in various scenarios and settings.", "keywords": "Graph Neural Network;Out-of-distribution Detection", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Longfei Ma;Yiyou Sun;Kaize Ding;Zemin Liu;Fei Wu", "authorids": "~Longfei_Ma1;~Yiyou_Sun1;~Kaize_Ding1;~Zemin_Liu1;~Fei_Wu1", "gender": ";M;M;M;M", "homepage": "https://github.com/ma-longfei;https://sunyiyou.github.io/;https://kaize0409.github.io/;https://zemin-liu.github.io/;https://person.zju.edu.cn/wufei", "dblp": ";211/5630;234/6878;17/964.html;84/3254-1", "google_scholar": ";IKqlQo4AAAAJ;PI3myr8AAAAJ;IxHO1nkAAAAJ;XJLn4MYAAAAJ", "orcid": ";;;0000-0001-6262-9435;", "linkedin": ";;;;", "or_profile": "~Longfei_Ma1;~Yiyou_Sun1;~Kaize_Ding1;~Zemin_Liu1;~Fei_Wu1", "aff": "Zhejiang University;University of California, Berkeley;Northwestern University;National University of Singapore;Zhejiang University", "aff_domain": "cs.zju.edu.cn;berkeley.edu;northwestern.edu;nus.edu;zju.edu.cn", "position": "PhD student;Postdoc;Assistant Professor;Postdoc;Full Professor", "bibtex": "@inproceedings{\nma2024revisiting,\ntitle={Revisiting Score Propagation in Graph Out-of-Distribution Detection},\nauthor={Longfei Ma and Yiyou Sun and Kaize Ding and Zemin Liu and Fei Wu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=jb5qN3212b}\n}", "github": "", "reviewers": "hXZy;xmGd;hQev;aWhs", "pdf_size": 1275025, "rating": "5;5;5;7", "confidence": "4;5;4;3", "soundness": "3;3;2;3", "novelty": "2;3;3;3", "presentation": "3;2;3;3", "wc_summary": "83;97;44;57", "wc_strengths": "35;77;45;66", "wc_weaknesses": "203;114;63;42", "wc_questions": "137;39;66;58", "wc_limitations": "13;19;1;41", "wc_review": "471;346;219;264", "wc_reply_reviewers": "18;46;118;0", "wc_reply_authors": "621;915;1918;825", "reply_reviewers": "1;1;1;0", "reply_authors": "3;3;4;3", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 70.25, 20.873128658636684 ], "wc_strengths_avg": [ 55.75, 16.60383991732033 ], "wc_weaknesses_avg": [ 105.5, 62.082606259724635 ], "wc_questions_avg": [ 75.0, 37.11468711979127 ], "wc_limitations_avg": [ 18.5, 14.517231140957975 ], "wc_review_avg": [ 325.0, 95.80448841259997 ], "wc_reply_reviewers_avg": [ 45.5, 44.95275297465106 ], "wc_reply_authors_avg": [ 1069.75, 501.1872778712564 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.25, 0.4330127018922193 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15444035569551641287&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "email": "cs.zju.edu.cn;berkeley.edu;northwestern.edu;nus.edu;zju.edu.cn", "author_num": 5, "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "Zhejiang University;University of California, Berkeley;Northwestern University;National University of Singapore", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.zju.edu.cn;https://www.berkeley.edu;https://www.northwestern.edu;https://www.nus.edu.sg", "aff_unique_abbr": "ZJU;UC Berkeley;NU;NUS", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;1;1;2;0", "aff_country_unique": "China;United States;Singapore" }, { "title": "INQUIRE: A Natural World Text-to-Image Retrieval Benchmark", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97543", "id": "jbrMS0DNaD", "proceeding": "", "pdf": "https://openreview.net/pdf?id=jbrMS0DNaD", "openreview": "https://openreview.net/forum?id=jbrMS0DNaD", "poster": "", "project": "", "author_site": "Edward Vendrow, Omiros Pantazis, Alexander Shepard, Gabriel Brostow, Kate Jones, Oisin Mac Aodha, Sara Beery, Grant Van Horn", "tldr": "", "abstract": "We introduce INQUIRE, a text-to-image retrieval benchmark designed to challenge multimodal vision-language models on expert-level queries. INQUIRE includes iNaturalist 2024 (iNat24), a new dataset of five million natural world images, along with 250 expert-level retrieval queries. These queries are paired with all relevant images comprehensively labeled within iNat24, comprising 33,000 total matches. Queries span categories such as species identification, context, behavior, and appearance, emphasizing tasks that require nuanced image understanding and domain expertise. Our benchmark evaluates two core retrieval tasks: (1) INQUIRE-Fullrank, a full dataset ranking task, and (2) INQUIRE-Rerank, a reranking task for refining top-100 retrievals. Detailed evaluation of a range of recent multimodal models demonstrates that INQUIRE poses a significant challenge, with the best models failing to achieve an mAP@50 above 50%. In addition, we show that reranking with more powerful multimodal models can enhance retrieval performance, yet there remains a significant margin for improvement. By focusing on scientifically-motivated ecological challenges, INQUIRE aims to bridge the gap between AI capabilities and the needs of real-world scientific inquiry, encouraging the development of retrieval systems that can assist with accelerating ecological and biodiversity research.", "keywords": "image retrieval;expert;benchmarks;computer vision;ecology;reranking;text-to-image;t2i;retrieval", "primary_area": "", "supplementary_material": "/attachment/c916484d9463cab1907b6f34addf7e90e5ad08fd.pdf", "author": "Edward Vendrow;Omiros Pantazis;Alexander Shepard;Gabriel Brostow;Kate E. Jones;Oisin Mac Aodha;Sara Beery;Grant Van Horn", "authorids": "~Edward_Vendrow1;~Omiros_Pantazis1;~Alexander_Shepard1;~Gabriel_Brostow1;~Kate_E._Jones1;~Oisin_Mac_Aodha4;~Sara_Beery1;~Grant_Van_Horn1", "gender": "M;M;M;M;F;F;M;M", "homepage": "https://edwardv.com;https://omipan.github.io/;;http://www0.cs.ucl.ac.uk/staff/G.Brostow/;https://profiles.ucl.ac.uk/35481-kate-jones;https://beerys.github.io/;https://gvh.codes/;https://homepages.inf.ed.ac.uk/omacaod/", "dblp": ";251/8619;04/11130;41/281;155/3043;191/2643;144/8033;90/8653", "google_scholar": "uz4U3TsAAAAJ;1-A7q8kAAAAJ;;https://scholar.google.com.tw/citations?user=CZiTv0gAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;PxYY_nsAAAAJ;IfZBjkUAAAAJ", "orcid": "0000-0001-8501-6834;;;0000-0001-8472-3828;0000-0001-5231-3293;;0000-0003-2953-9651;0000-0002-5787-5073", "linkedin": "edward-vendrow-51231b89/;opantazis/;;gabriel-brostow-b126071/?originalSubdomain=uk;kate-jones-991688234/;;;oisin-mac-aodha-406273273/", "or_profile": "~Edward_Vendrow1;~Omiros_Pantazis1;~Alexander_Shepard1;~Gabriel_Brostow1;~Kate_E._Jones1;~Sara_Beery1;~Grant_Van_Horn1;~Oisin_Mac_Aodha2", "aff": "Massachusetts Institute of Technology;University College London, University of London;iNaturalist;Niantic;University College London, University of London;Massachusetts Institute of Technology;University of Massachusetts at Amherst;University of Edinburgh, University of Edinburgh", "aff_domain": "mit.edu;ucl.ac.uk;inaturalist.org;nianticlabs.com;ucl.ac.uk;mit.edu;umass.edu;ed.ac.uk", "position": "PhD student;PhD student;Software Developer;Chief Research Scientist;Full Professor;Assistant Professor;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nvendrow2024inquire,\ntitle={{INQUIRE}: A Natural World Text-to-Image Retrieval Benchmark},\nauthor={Edward Vendrow and Omiros Pantazis and Alexander Shepard and Gabriel Brostow and Kate E. Jones and Oisin Mac Aodha and Sara Beery and Grant Van Horn},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=jbrMS0DNaD}\n}", "github": "", "reviewers": "efky;aBR1;gN6o", "pdf_size": 23928119, "rating": "4;7;7", "confidence": "4;3;4", "wc_summary_and_contributions": "104;68;136", "wc_strengths": "112;118;66", "wc_improvement": "45;407;151", "wc_limitations": "64;21;16", "wc_correctness": "29;52;40", "wc_clarity": "8;8;1", "wc_relation_to_prior_work": "7;26;1", "wc_documentation": "24;6;30", "wc_additional_feedback": "1;1;1", "wc_review": "394;707;442", "wc_reply_reviewers": "0;32;0", "wc_reply_authors": "31;0;0", "reply_reviewers": "0;1;0", "reply_authors": "1;1;1", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 102.66666666666667, 27.776888874666213 ], "wc_strengths_avg": [ 98.66666666666667, 23.22833518691246 ], "wc_improvement_avg": [ 201.0, 151.95613402119267 ], "wc_limitations_avg": [ 33.666666666666664, 21.545816814923082 ], "wc_correctness_avg": [ 40.333333333333336, 9.392668535736913 ], "wc_clarity_avg": [ 5.666666666666667, 3.299831645537222 ], "wc_relation_to_prior_work_avg": [ 11.333333333333334, 10.656244908763853 ], "wc_documentation_avg": [ 20.0, 10.198039027185569 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 514.3333333333334, 137.63801154558368 ], "wc_reply_reviewers_avg": [ 10.666666666666666, 15.084944665313014 ], "wc_reply_authors_avg": [ 10.333333333333334, 14.613540144521982 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3601981809671823625&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "mit.edu;ucl.ac.uk;inaturalist.org;nianticlabs.com;ucl.ac.uk;mit.edu;umass.edu;ed.ac.uk", "author_num": 8, "aff_unique_index": "0;1;2;3;1;0;4;5", "aff_unique_norm": "Massachusetts Institute of Technology;University College London;iNaturalist;Niantic;University of Massachusetts Amherst;University of Edinburgh", "aff_unique_dep": ";;;;;", "aff_unique_url": "https://web.mit.edu;https://www.ucl.ac.uk;https://www.inaturalist.org;https://www.nianticlabs.com;https://www.umass.edu;https://www.ed.ac.uk", "aff_unique_abbr": "MIT;UCL;iNaturalist;Niantic;UMass Amherst;Edinburgh", "aff_campus_unique_index": "1", "aff_campus_unique": ";Amherst", "aff_country_unique_index": "0;1;0;0;1;0;0;1", "aff_country_unique": "United States;United Kingdom" }, { "title": "Small coresets via negative dependence: DPPs, linear statistics, and concentration", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93945", "id": "jd3msHMtTL", "proceeding": "", "pdf": "https://openreview.net/pdf?id=jd3msHMtTL", "openreview": "https://openreview.net/forum?id=jd3msHMtTL", "poster": "/media/PosterPDFs/NeurIPS%202024/93945.png?t=1731366792.3106194", "project": "", "author_site": "R\u00e9mi Bardenet, Subhroshekhar Ghosh, Hugo Simon-Onfroy, Hoang Son Tran", "tldr": "", "abstract": "Determinantal point processes (DPPs) are random configurations of points with tunable negative dependence. \nBecause sampling is tractable, DPPs are natural candidates for subsampling tasks, such as minibatch selection or coreset construction. \nA \\emph{coreset} is a subset of a (large) training set, such that minimizing an empirical loss averaged over the coreset is a controlled replacement for the intractable minimization of the original empirical loss.\nTypically, the control takes the form of a guarantee that the average loss over the coreset approximates the total loss uniformly across the parameter space.\nRecent work has provided significant empirical support in favor of using DPPs to build randomized coresets, coupled with interesting theoretical results that are suggestive but leave some key questions unanswered.\nIn particular, the central question of whether the cardinality of a DPP-based coreset is fundamentally smaller than one based on independent sampling remained open.\nIn this paper, we answer this question in the affirmative, demonstrating that \\emph{DPPs can provably outperform independently drawn coresets}. \nIn this vein, we contribute a conceptual understanding of coreset loss as a \\emph{linear statistic} of the (random) coreset. \nWe leverage this structural observation to connect the coresets problem to a more general problem of concentration phenomena for linear statistics of DPPs, wherein we obtain \\emph{effective concentration inequalities that extend well-beyond the state-of-the-art}, encompassing general non-projection, even non-symmetric kernels. \nThe latter have been recently shown to be of interest in machine learning beyond coresets, but come with a limited theoretical toolbox, to the extension of which our result contributes. Finally, we are also able to address the coresets problem for vector-valued objective functions, a novelty in the coresets literature.", "keywords": "Coresets;determinantal point processes;concentration inequalities", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "R\u00e9mi Bardenet;Subhroshekhar Ghosh;Hugo Simon-Onfroy;Hoang-Son Tran", "authorids": "~R\u00e9mi_Bardenet1;~Subhroshekhar_Ghosh1;~Hugo_Simon-Onfroy1;~Hoang-Son_Tran1", "gender": "M;;;", "homepage": "http://rbardenet.github.io;https://subhro-ghosh.github.io/;;https://sites.google.com/view/hoangson-tran", "dblp": "09/8412;;;", "google_scholar": "https://scholar.google.ca/citations?user=Nj74Gv4AAAAJ;RpGHEzsAAAAJ;;", "orcid": "0000-0002-1094-9493;;0009-0004-3707-9282;", "linkedin": ";;hsimonfroy/;", "or_profile": "~R\u00e9mi_Bardenet1;~Subhroshekhar_Ghosh1;~Hugo_Simon-Onfroy1;~Hoang-Son_Tran1", "aff": "CNRS & Univ. Lille;National University of Singapore;CEA Paris-Saclay;National University of Singapore", "aff_domain": "univ-lille.fr;nus.edu.sg;cea.fr;u.nus.edu", "position": "Researcher;Assistant Professor;PhD student;PhD student", "bibtex": "@inproceedings{\nbardenet2024small,\ntitle={Small coresets via negative dependence: {DPP}s, linear statistics, and concentration},\nauthor={R{\\'e}mi Bardenet and Subhroshekhar Ghosh and Hugo Simon-Onfroy and Hoang-Son Tran},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=jd3msHMtTL}\n}", "github": "", "reviewers": "818N;koaH;329X;5TaP", "pdf_size": 591120, "rating": "6;6;7;8", "confidence": "5;3;2;3", "soundness": "3;3;3;4", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "34;114;68;73", "wc_strengths": "32;51;27;91", "wc_weaknesses": "78;3;179;169", "wc_questions": "50;27;4;52", "wc_limitations": "1;1;4;12", "wc_review": "195;196;282;397", "wc_reply_reviewers": "18;20;19;15", "wc_reply_authors": "0;32;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 1.0897247358851685 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 72.25, 28.39344114403888 ], "wc_strengths_avg": [ 50.25, 25.17315037892556 ], "wc_weaknesses_avg": [ 107.25, 71.91096926060725 ], "wc_questions_avg": [ 33.25, 19.536824204563032 ], "wc_limitations_avg": [ 4.5, 4.5 ], "wc_review_avg": [ 267.5, 82.68766534375003 ], "wc_reply_reviewers_avg": [ 18.0, 1.8708286933869707 ], "wc_reply_authors_avg": [ 8.0, 13.856406460551018 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.48420012470625223, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:qdxL286cGWgJ:scholar.google.com/&scioq=Small+coresets+via+negative+dependence:+DPPs,+linear+statistics,+and+concentration&hl=en&as_sdt=0,5", "gs_version_total": 6, "email": "univ-lille.fr;nus.edu.sg;cea.fr;u.nus.edu", "author_num": 4, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "CNRS;National University of Singapore;CEA", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cnrs.fr;https://www.nus.edu.sg;https://www.cea.fr", "aff_unique_abbr": "CNRS;NUS;CEA", "aff_campus_unique_index": "1", "aff_campus_unique": ";Paris-Saclay", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "France;Singapore" }, { "title": "LT-Defense: Searching-free Backdoor Defense via Exploiting the Long-tailed Effect", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93944", "id": "jdCMwF06c6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=jdCMwF06c6", "openreview": "https://openreview.net/forum?id=jdCMwF06c6", "poster": "/media/PosterPDFs/NeurIPS%202024/93944.png?t=1730550517.5138202", "project": "", "author_site": "Yixiao Xu, Binxing Fang, Mohan Li, Keke Tang, Zhihong Tian", "tldr": "", "abstract": "Language models have shown vulnerability against backdoor attacks, threatening the security of services based on them. To mitigate the threat, existing solutions attempted to search for backdoor triggers, which can be time-consuming when handling a large search space. Looking into the attack process, we observe that poisoned data will create a long-tailed effect in the victim model, causing the decision boundary to shift towards the attack targets. Inspired by this observation, we introduce LT-Defense, the first searching-free backdoor defense via exploiting the long-tailed effect. Specifically, LT-Defense employs a small set of clean examples and two metrics to distinguish backdoor-related features in the target model. Upon detecting a backdoor model, LT-Defense additionally provides test-time backdoor freezing and attack target prediction. Extensive experiments demonstrate the effectiveness of LT-Defense in both detection accuracy and efficiency, e.g., in task-agnostic scenarios, LT-Defense achieves 98% accuracy across 1440 models with less than 1% of the time cost of state-of-the-art solutions.", "keywords": "backdoor defense;natural language processing;deep long-tailed learning", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Yixiao Xu;Binxing Fang;Mohan Li;Keke Tang;Zhihong Tian", "authorids": "~Yixiao_Xu1;~Binxing_Fang1;~Mohan_Li2;~Keke_Tang2;~Zhihong_Tian2", "gender": "M;M;F;M;M", "homepage": ";;;https://tangbohu.github.io/;", "dblp": "231/1772;87/254;50/8279;162/3984;73/5444", "google_scholar": "1TLNZQEAAAAJ;;;9Lk6HpQAAAAJ;", "orcid": ";;0000-0002-2290-046X;0000-0003-0377-1022;", "linkedin": ";;;;", "or_profile": "~Yixiao_Xu1;~Binxing_Fang1;~Mohan_Li2;~Keke_Tang2;~Zhihong_Tian2", "aff": "Beijing University of Posts and Telecommunications;Harbin Institute of Technology;Guangzhou University;Guangzhou University;Guangzhou University", "aff_domain": "bupt.edu.cn;hit.edu.cn;gzhu.edu.cn;gzhu.edu.cn;gzhu.edu.cn", "position": "PhD student;Full Professor;Assistant Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nxu2024ltdefense,\ntitle={{LT}-Defense: Searching-free Backdoor Defense via Exploiting the Long-tailed Effect},\nauthor={Yixiao Xu and Binxing Fang and Mohan Li and Keke Tang and Zhihong Tian},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=jdCMwF06c6}\n}", "github": "", "reviewers": "FiYe;Qza2;gJDd", "pdf_size": 2175847, "rating": "4;4;6", "confidence": "4;4;4", "soundness": "2;3;3", "novelty": "3;2;4", "presentation": "2;2;3", "wc_summary": "91;97;69", "wc_strengths": "66;67;99", "wc_weaknesses": "95;220;132", "wc_questions": "51;21;10", "wc_limitations": "5;12;2", "wc_review": "308;417;312", "wc_reply_reviewers": "0;130;10", "wc_reply_authors": "0;214;0", "reply_reviewers": "0;1;1", "reply_authors": "1;2;1", "rating_avg": [ 4.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 85.66666666666667, 12.036980056845191 ], "wc_strengths_avg": [ 77.33333333333333, 15.326085243430198 ], "wc_weaknesses_avg": [ 149.0, 52.42772803266099 ], "wc_questions_avg": [ 27.333333333333332, 17.326921891156037 ], "wc_limitations_avg": [ 6.333333333333333, 4.189935029992179 ], "wc_review_avg": [ 345.6666666666667, 50.46671070011289 ], "wc_reply_reviewers_avg": [ 46.666666666666664, 59.0668171555645 ], "wc_reply_authors_avg": [ 71.33333333333333, 100.88056744928079 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2680811368123287640&as_sdt=800005&sciodt=0,15&hl=en", "gs_version_total": 2, "email": "bupt.edu.cn;hit.edu.cn;gzhu.edu.cn;gzhu.edu.cn;gzhu.edu.cn", "author_num": 5, "aff_unique_index": "0;1;2;2;2", "aff_unique_norm": "Beijing University of Posts and Telecommunications;Harbin Institute of Technology;Guangzhou University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.bupt.edu.cn/;http://www.hit.edu.cn/;http://www.gzhu.edu.cn", "aff_unique_abbr": "BUPT;HIT;GU", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Beijing;Harbin;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Reinforcement Learning Policy as Macro Regulator Rather than Macro Placer", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93943", "id": "jeWZStUavo", "proceeding": "", "pdf": "https://openreview.net/pdf?id=jeWZStUavo", "openreview": "https://openreview.net/forum?id=jeWZStUavo", "poster": "", "project": "", "author_site": "Ke Xue, Ruo-Tong Chen, Xi Lin, Yunqi Shi, Shixiong Kai, Siyuan Xu, Chao Qian", "tldr": "", "abstract": "In modern chip design, placement aims at placing millions of circuit modules, which is an essential step that significantly influences power, performance, and area (PPA) metrics. Recently, reinforcement learning (RL) has emerged as a promising technique for improving placement quality, especially macro placement. However, current RL-based placement methods suffer from long training times, low generalization ability, and inability to guarantee PPA results. A key issue lies in the problem formulation, i.e., using RL to place from scratch, which results in limits useful information and inaccurate rewards during the training process. In this work, we propose an approach that utilizes RL for the refinement stage, which allows the RL policy to learn how to adjust existing placement layouts, thereby receiving sufficient information for the policy to act and obtain relatively dense and precise rewards. Additionally, we introduce the concept of regularity during training, which is considered an important metric in the chip design industry but is often overlooked in current RL placement methods. We evaluate our approach on the ISPD 2005 and ICCAD 2015 benchmark, comparing the global half-perimeter wirelength and regularity of our proposed method against several competitive approaches. Besides, we test the PPA performance using commercial software, showing that RL as a regulator can achieve significant PPA improvements. Our RL regulator can fine-tune placements from any method and enhance their quality. Our work opens up new possibilities for the application of RL in placement, providing a more effective and efficient approach to optimizing chip design. Our code is available at \\url{https://github.com/lamda-bbo/macro-regulator}.", "keywords": "Reinforcement learning;EDA;Placement;Chip design", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "/attachment/0492155617665292195fe6dcf02078fc1198392c.zip", "author": "Ke Xue;Ruo-Tong Chen;Xi Lin;Yunqi Shi;Shixiong Kai;Siyuan Xu;Chao Qian", "authorids": "~Ke_Xue1;~Ruo-Tong_Chen1;~Xi_Lin7;~Yunqi_Shi1;~Shixiong_Kai1;~Siyuan_Xu5;~Chao_Qian1", "gender": "M;M;M;M;M;M;M", "homepage": "http://www.lamda.nju.edu.cn/xuek/;https://github.com/StudyForest;https://www.lamda.nju.edu.cn/shiyq/;;;http://www.lamda.nju.edu.cn/qianc/;https://git.nju.edu.cn/DukeTomlist", "dblp": "93/2469-1;;;;;84/8508-1;", "google_scholar": "78bZVOwAAAAJ;;2oMUbPYAAAAJ;https://scholar.google.com.hk/citations?hl=zh-CN;;;", "orcid": "0000-0001-6789-2670;;;;0000-0001-6239-6774;;", "linkedin": ";;;;siyuanxu1991/;;", "or_profile": "~Ke_Xue1;~Xi_Lin7;~Yunqi_Shi1;~Shixiong_Kai1;~Siyuan_Xu5;~Chao_Qian1;~Ruotong_Chen1", "aff": "Nanjing University;Nanjing University;Nanjing university;Huawei Technologies Ltd.;Huawei Technologies Ltd.;Nanjing university;Nanjing University", "aff_domain": "nju.edu.cn;nju.edu.cn;nju.edu.cn;huawei.com;huawei.com;nju.edu;smail.nju.edu.cn", "position": "PhD student;Undergrad student;MS student;Researcher;Researcher;Full Professor;Undergrad student", "bibtex": "@inproceedings{\nxue2024reinforcement,\ntitle={Reinforcement Learning Policy as Macro Regulator Rather than Macro Placer},\nauthor={Ke Xue and Ruo-Tong Chen and Xi Lin and Yunqi Shi and Shixiong Kai and Siyuan Xu and Chao Qian},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=jeWZStUavo}\n}", "github": "", "reviewers": "fJAy;ChiY;XyiM;KS3m;NxAR", "pdf_size": 4442009, "rating": "5;5;6;7;7", "confidence": "5;3;3;4;5", "soundness": "2;3;4;3;3", "novelty": "3;2;4;3;3", "presentation": "1;3;2;3;2", "wc_summary": "76;83;56;78;201", "wc_strengths": "77;84;113;77;262", "wc_weaknesses": "208;78;70;67;288", "wc_questions": "7;59;85;54;177", "wc_limitations": "77;9;37;1;1", "wc_review": "445;313;361;277;929", "wc_reply_reviewers": "0;48;0;13;13", "wc_reply_authors": "0;96;0;0;0", "reply_reviewers": "0;1;0;1;1", "reply_authors": "1;2;1;1;1", "rating_avg": [ 6.0, 0.8944271909999159 ], "confidence_avg": [ 4.0, 0.8944271909999159 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.2, 0.7483314773547882 ], "wc_summary_avg": [ 98.8, 51.92070877790479 ], "wc_strengths_avg": [ 122.6, 70.95519713171122 ], "wc_weaknesses_avg": [ 142.2, 90.08529291732364 ], "wc_questions_avg": [ 76.4, 56.240910376699986 ], "wc_limitations_avg": [ 25.0, 29.175332046096752 ], "wc_review_avg": [ 465.0, 238.72997298202839 ], "wc_reply_reviewers_avg": [ 14.8, 17.58863269273652 ], "wc_reply_authors_avg": [ 19.2, 38.400000000000006 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.25, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14085191703491322950&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "nju.edu.cn;nju.edu.cn;nju.edu.cn;huawei.com;huawei.com;nju.edu;smail.nju.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;1;1;0;0", "aff_unique_norm": "Nanjing University;Huawei", "aff_unique_dep": ";Huawei Technologies", "aff_unique_url": "https://www.nju.edu.cn;https://www.huawei.com", "aff_unique_abbr": "Nanjing U;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "FuseMoE: Mixture-of-Experts Transformers for Fleximodal Fusion", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93942", "id": "jfE7XCE89y", "proceeding": "", "pdf": "https://openreview.net/pdf?id=jfE7XCE89y", "openreview": "https://openreview.net/forum?id=jfE7XCE89y", "poster": "/media/PosterPDFs/NeurIPS%202024/93942.png?t=1733594243.644216", "project": "", "author_site": "Xing Han, Huy Nguyen, Carl Harris, Nhat Ho, Suchi Saria", "tldr": "", "abstract": "As machine learning models in critical fields increasingly grapple with multimodal data, they face the dual challenges of handling a wide array of modalities, often incomplete due to missing elements, and the temporal irregularity and sparsity of collected samples. Successfully leveraging this complex data, while overcoming the scarcity of high-quality training samples, is key to improving these models' predictive performance. We introduce ``FuseMoE'', a mixture-of-experts framework incorporated with an innovative gating function. Designed to integrate a diverse number of modalities, FuseMoE is effective in managing scenarios with missing modalities and irregularly sampled data trajectories. Theoretically, our unique gating function contributes to enhanced convergence rates, leading to better performance in multiple downstream tasks. The practical utility of FuseMoE in the real world is validated by a diverse set of challenging prediction tasks.", "keywords": "Multimodal Fusion; Mixture-of-Experts; MoE Theory; Missingness and Irregularity", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Xing Han;Huy Nguyen;Carl William Harris;Nhat Ho;Suchi Saria", "authorids": "~Xing_Han1;~Huy_Nguyen5;~Carl_William_Harris1;~Nhat_Ho1;~Suchi_Saria1", "gender": "M;M;M;M;F", "homepage": "https://aaronhan223.github.io/;https://huynm99.github.io/;;https://nhatptnk8912.github.io/;https://suchisaria.jhu.edu/", "dblp": "05/2143;48/6075;;203/4479;72/2433", "google_scholar": "Vejou24AAAAJ;_YYwzhQAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.ca/citations?user=Xs7cKMwAAAAJ;", "orcid": "0000-0003-0857-5506;;0000-0002-0680-5833;;", "linkedin": "xing-han-628653b6/;huy-nguyen-081199/;carlwharris/;nhat-pham-minh-ho-267b8164/;", "or_profile": "~Xing_Han1;~Huy_Nguyen5;~Carl_William_Harris1;~Nhat_Ho1;~Suchi_Saria1", "aff": "Johns Hopkins University;Microsoft AI;Johns Hopkins University;University of Texas, Austin;Department of Computer Science, Whiting School of Engineering", "aff_domain": "cs.jhu.edu;microsoft.com;jh.edu;utexas.edu;cs.jhu.edu", "position": "Postdoc;Intern;PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nhan2024fusemoe,\ntitle={FuseMoE: Mixture-of-Experts Transformers for Fleximodal Fusion},\nauthor={Xing Han and Huy Nguyen and Carl William Harris and Nhat Ho and Suchi Saria},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=jfE7XCE89y}\n}", "github": "", "reviewers": "P6x6;sZSN;6SUH;6Ay7", "pdf_size": 3032302, "rating": "5;6;6;7", "confidence": "3;3;3;4", "soundness": "3;3;3;2", "novelty": "2;2;3;3", "presentation": "3;3;2;3", "wc_summary": "41;143;53;190", "wc_strengths": "71;89;94;87", "wc_weaknesses": "127;107;151;39", "wc_questions": "36;35;9;149", "wc_limitations": "1;90;1;18", "wc_review": "276;464;308;483", "wc_reply_reviewers": "0;0;0;42", "wc_reply_authors": "0;123;0;40", "reply_reviewers": "0;0;0;1", "reply_authors": "1;2;1;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 106.75, 62.162589231787955 ], "wc_strengths_avg": [ 85.25, 8.613216588476108 ], "wc_weaknesses_avg": [ 106.0, 41.701318923986086 ], "wc_questions_avg": [ 57.25, 54.0665099668917 ], "wc_limitations_avg": [ 27.5, 36.745748053346254 ], "wc_review_avg": [ 382.75, 91.69889584940486 ], "wc_reply_reviewers_avg": [ 10.5, 18.186533479473212 ], "wc_reply_authors_avg": [ 40.75, 50.2164066814821 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12583590489407811251&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "cs.jhu.edu;microsoft.com;jh.edu;utexas.edu;cs.jhu.edu", "author_num": 5, "aff_unique_index": "0;1;0;2;0", "aff_unique_norm": "Johns Hopkins University;Microsoft;University of Texas at Austin", "aff_unique_dep": ";Microsoft AI;", "aff_unique_url": "https://www.jhu.edu;https://www.microsoft.com;https://www.utexas.edu", "aff_unique_abbr": "JHU;Microsoft;UT Austin", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Austin;Baltimore", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "LocCa: Visual Pretraining with Location-aware Captioners", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93941", "id": "jfHkAEgKwH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=jfHkAEgKwH", "openreview": "https://openreview.net/forum?id=jfHkAEgKwH", "poster": "/media/PosterPDFs/NeurIPS%202024/93941.png?t=1731723332.340597", "project": "", "author_site": "Bo Wan, Michael Tschannen, Yongqin Xian, Filip Pavetic, Ibrahim Alabdulmohsin, Xiao Wang, Andr\u00e9 Susano Pinto, Andreas Steiner, Lucas Beyer, Xiaohua Zhai", "tldr": "", "abstract": "Image captioning was recently found to be an effective pretraining method similar to contrastive pretraining. This opens up the largely-unexplored potential of using natural language as a flexible and powerful interface for handling diverse pretraining tasks. In this paper, we demonstrate this with a novel visual pretraining paradigm, LocCa, that incorporates location-aware tasks into captioners to teach models to extract rich information from images. Specifically, LocCa employs two tasks, bounding box prediction and location-dependent captioning, conditioned on the image pixel input. Thanks to the multitask capabilities of an encoder-decoder architecture, we show that an image captioner can effortlessly handle multiple tasks during pretraining. LocCa significantly outperforms standard captioners on downstream localization tasks, achieving state-of-the-art results on RefCOCO/+/g, while maintaining comparable performance on holistic tasks. Our work paves the way for further exploration of natural language interfaces in visual pretraining.", "keywords": "Vision Language Models;Visual Pretraining;Location-aware Generation", "primary_area": "machine_vision", "supplementary_material": "", "author": "Bo Wan;Michael Tschannen;Yongqin Xian;Filip Pavetic;Ibrahim Alabdulmohsin;Xiao Wang;Andr\u00e9 Susano Pinto;Andreas Peter Steiner;Lucas Beyer;Xiaohua Zhai", "authorids": "~Bo_Wan1;~Michael_Tschannen1;~Yongqin_Xian1;~Filip_Pavetic1;~Ibrahim_Alabdulmohsin1;~Xiao_Wang5;~Andr\u00e9_Susano_Pinto1;~Andreas_Peter_Steiner1;~Lucas_Beyer1;~Xiaohua_Zhai2", "gender": "M;;M;M;M;M;M;M;;", "homepage": "https://bobwan.w3spaces.com/;https://mitscha.github.io/;https://xianyongqin.github.io/;;http://ibomohsin.com;;;;http://lucasb.eyer.be;", "dblp": "86/4321.html;134/9824;177/9313.html;149/2329;153/5393;49/67-38;73/10264;s/AndreasSteiner;126/4720;66/636", "google_scholar": "_7KkpE4AAAAJ;https://scholar.google.ch/citations?user=TSj_8nYAAAAJ;https://scholar.google.de/citations?user=JmdnBzcAAAAJ;aA76AEQAAAAJ;8WNMsPYAAAAJ;ukyXqzMAAAAJ;pTYo1vYAAAAJ;;p2gwhK4AAAAJ;", "orcid": ";;;;;;;;;", "linkedin": ";;;filip-pavetic/;;;;andreas-steiner-1859223b/;;", "or_profile": "~Bo_Wan1;~Michael_Tschannen1;~Yongqin_Xian1;~Filip_Pavetic1;~Ibrahim_Alabdulmohsin1;~Xiao_Wang5;~Andr\u00e9_Susano_Pinto1;~Andreas_Peter_Steiner1;~Lucas_Beyer1;~Xiaohua_Zhai2", "aff": "KU Leuven;Google DeepMind;Google;Google;Google;Google DeepMind;Google DeepMind;Google DeepMind;Google Brain;Google DeepMind", "aff_domain": "kuleuven.be;google.com;google.com;google.com;google.com;google.com;google.com;deepmind.com;google.com;google.com", "position": "PhD student;Researcher;Researcher;Software Engineer;Research Scientist;Researcher;Software Engineer;Research Engineer;Researcher;Researcher", "bibtex": "@inproceedings{\nwan2024locca,\ntitle={LocCa: Visual Pretraining with Location-aware Captioners},\nauthor={Bo Wan and Michael Tschannen and Yongqin Xian and Filip Pavetic and Ibrahim Alabdulmohsin and Xiao Wang and Andr{\\'e} Susano Pinto and Andreas Peter Steiner and Lucas Beyer and Xiaohua Zhai},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=jfHkAEgKwH}\n}", "github": "", "reviewers": "gCZj;88rJ;HN67;ris8", "pdf_size": 5107863, "rating": "5;5;7;7", "confidence": "5;3;5;4", "soundness": "3;3;4;3", "novelty": "3;2;3;3", "presentation": "3;3;3;3", "wc_summary": "72;52;59;91", "wc_strengths": "66;19;37;30", "wc_weaknesses": "345;58;358;109", "wc_questions": "30;58;1;113", "wc_limitations": "7;58;1;5", "wc_review": "520;245;456;348", "wc_reply_reviewers": "0;0;35;20", "wc_reply_authors": "62;62;22;62", "reply_reviewers": "0;0;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 68.5, 14.84082207965583 ], "wc_strengths_avg": [ 38.0, 17.392527130926087 ], "wc_weaknesses_avg": [ 217.5, 135.28580856837868 ], "wc_questions_avg": [ 50.5, 41.33098111586513 ], "wc_limitations_avg": [ 17.75, 23.33854108550918 ], "wc_review_avg": [ 392.25, 104.91037841891526 ], "wc_reply_reviewers_avg": [ 13.75, 14.737282653189496 ], "wc_reply_authors_avg": [ 52.0, 17.320508075688775 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6370127227244833474&as_sdt=20000005&sciodt=0,21&hl=en", "gs_version_total": 5, "email": "kuleuven.be;google.com;google.com;google.com;google.com;google.com;google.com;deepmind.com;google.com;google.com", "author_num": 10, "aff_unique_index": "0;1;1;1;1;1;1;1;1;1", "aff_unique_norm": "Katholieke Universiteit Leuven;Google", "aff_unique_dep": ";Google DeepMind", "aff_unique_url": "https://www.kuleuven.be;https://deepmind.com", "aff_unique_abbr": "KU Leuven;DeepMind", "aff_campus_unique_index": "1;1;1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;2;2;2;1;1;1;2;1", "aff_country_unique": "Belgium;United Kingdom;United States" }, { "title": "Medformer: A Multi-Granularity Patching Transformer for Medical Time-Series Classification", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93940", "id": "jfkid2HwNr", "proceeding": "", "pdf": "https://openreview.net/pdf?id=jfkid2HwNr", "openreview": "https://openreview.net/forum?id=jfkid2HwNr", "poster": "/media/PosterPDFs/NeurIPS%202024/93940.png?t=1731739086.9506214", "project": "", "author_site": "Yihe Wang, Nan Huang, Taida Li, Yujun Yan, Xiang Zhang", "tldr": "", "abstract": "Medical time series (MedTS) data, such as Electroencephalography (EEG) and Electrocardiography (ECG), play a crucial role in healthcare, such as diagnosing brain and heart diseases. Existing methods for MedTS classification primarily rely on handcrafted biomarkers extraction and CNN-based models, with limited exploration of transformer-based models. In this paper, we introduce Medformer, a multi-granularity patching transformer tailored specifically for MedTS classification. Our method incorporates three novel mechanisms to leverage the unique characteristics of MedTS: cross-channel patching to leverage inter-channel correlations, multi-granularity embedding for capturing features at different scales, and two-stage (intra- and inter-granularity) multi-granularity self-attention for learning features and correlations within and among granularities. We conduct extensive experiments on five public datasets under both subject-dependent and challenging subject-independent setups. Results demonstrate Medformer's superiority over 10 baselines, achieving top averaged ranking across five datasets on all six evaluation metrics. These findings underscore the significant impact of our method on healthcare applications, such as diagnosing Myocardial Infarction, Alzheimer's, and Parkinson's disease. We release the source code at https://github.com/DL4mHealth/Medformer.", "keywords": "Transformer;Time Series;Healthcare", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "", "author": "Yihe Wang;Nan Huang;Taida Li;Yujun Yan;Xiang Zhang", "authorids": "~Yihe_Wang2;~Nan_Huang2;~Taida_Li1;~Yujun_Yan1;~Xiang_Zhang10", "gender": "M;M;M;F;M", "homepage": "https://webpages.charlotte.edu/ywang145/;https://teddyhuang-00.github.io/;https://webpages.charlotte.edu/tli14/;https://sites.google.com/umich.edu/yujunyan/home;http://xiangzhang.info/", "dblp": ";;;219/1736;https://dblp.uni-trier.de/pers/hd/z/Zhang_0012:Xiang", "google_scholar": "1JRRcVkAAAAJ;;;5TQUP58AAAAJ;0hCzMi4AAAAJ", "orcid": "0009-0000-5220-5452;;;0000-0003-3776-4293;", "linkedin": "yihe-wang-4a8a641b5/;;;;", "or_profile": "~Yihe_Wang2;~Nan_Huang2;~Taida_Li1;~Yujun_Yan1;~Xiang_Zhang10", "aff": "University of North Carolina at Charlotte;University of North Carolina at Charlotte;University of North Carolina at Charlotte;Dartmouth College;University of North Carolina at Charlotte", "aff_domain": "uncc.edu;uncc.edu;uncc.edu;dartmouth.edu;uncc.edu", "position": "PhD student;PhD student;PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nwang2024medformer,\ntitle={Medformer: A Multi-Granularity Patching Transformer for Medical Time-Series Classification},\nauthor={Yihe Wang and Nan Huang and Taida Li and Yujun Yan and Xiang Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=jfkid2HwNr}\n}", "github": "", "reviewers": "vj5L;zGJA;AkiE;ApjC", "pdf_size": 1780802, "rating": "5;6;6;7", "confidence": "4;3;4;3", "soundness": "3;3;3;4", "novelty": "2;2;3;3", "presentation": "3;3;3;4", "wc_summary": "54;35;78;60", "wc_strengths": "34;51;24;169", "wc_weaknesses": "88;107;34;57", "wc_questions": "42;36;21;39", "wc_limitations": "1;33;25;66", "wc_review": "219;262;182;391", "wc_reply_reviewers": "22;61;40;83", "wc_reply_authors": "61;237;17;19", "reply_reviewers": "1;2;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 56.75, 15.35211711784404 ], "wc_strengths_avg": [ 69.5, 58.25160941982633 ], "wc_weaknesses_avg": [ 71.5, 28.0579756931964 ], "wc_questions_avg": [ 34.5, 8.077747210701755 ], "wc_limitations_avg": [ 31.25, 23.263436977368585 ], "wc_review_avg": [ 263.5, 78.868561543875 ], "wc_reply_reviewers_avg": [ 51.5, 22.830900113661748 ], "wc_reply_authors_avg": [ 83.5, 90.34793854870182 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.7071067811865476, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14075625917756522065&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "uncc.edu;uncc.edu;uncc.edu;dartmouth.edu;uncc.edu", "author_num": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "University of North Carolina at Charlotte;Dartmouth College", "aff_unique_dep": ";", "aff_unique_url": "https://www.uncc.edu;https://www.dartmouth.edu", "aff_unique_abbr": "UNCC;Dartmouth", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Charlotte;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Unified Graph Augmentations for Generalized Contrastive Learning on Graphs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93939", "id": "jgkKroLxeC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=jgkKroLxeC", "openreview": "https://openreview.net/forum?id=jgkKroLxeC", "poster": "", "project": "", "author_site": "Jiaming Zhuo, Yintong Lu, Hui Ning, Kun Fu, bingxin niu, Dongxiao He, Chuan Wang, Yuanfang Guo, Zhen Wang, Xiaochun Cao, Liang Yang", "tldr": "", "abstract": "In real-world scenarios, networks (graphs) and their tasks possess unique characteristics, requiring the development of a versatile graph augmentation (GA) to meet the varied demands of network analysis. Unfortunately, most Graph Contrastive Learning (GCL) frameworks are hampered by the specificity, complexity, and incompleteness of their GA techniques. Firstly, GAs designed for specific scenarios may compromise the universality of models if mishandled. Secondly, the process of identifying and generating optimal augmentations generally involves substantial computational overhead. Thirdly, the effectiveness of the GCL, even the learnable ones, is constrained by the finite selection of GAs available. To overcome the above limitations, this paper introduces a novel unified GA module dubbed UGA after reinterpreting the mechanism of GAs in GCLs from a message-passing perspective. Theoretically, this module is capable of unifying any explicit GAs, including node, edge, attribute, and subgraph augmentations. Based on the proposed UGA, a novel generalized GCL framework dubbed Graph cOntrastive UnifieD Augmentations (GOUDA) is proposed. It seamlessly integrates widely adopted contrastive losses and an introduced independence loss to fulfill the common requirements of consistency and diversity of augmentation across diverse scenarios. Evaluations across various datasets and tasks demonstrate the generality and efficiency of the proposed GOUDA over existing state-of-the-art GCLs.", "keywords": "Graph Neural Networks;Graph Contrastive Learning;Graph Augmentation", "primary_area": "graph_neural_networks", "supplementary_material": "/attachment/4c49216db33bc8377638d5a35ad7540e99a3e814.zip", "author": "Jiaming Zhuo;Yintong Lu;Hui Ning;Kun Fu;Bingxin Niu;Dongxiao He;Chuan Wang;Yuanfang Guo;Zhen Wang;Xiaochun Cao;Liang Yang", "authorids": "~Jiaming_Zhuo1;~Yintong_Lu4;~Hui_Ning1;~Kun_Fu5;~Bingxin_Niu2;~Dongxiao_He1;~Chuan_Wang1;~Yuanfang_Guo1;~Zhen_Wang11;~Xiaochun_Cao3;~Liang_Yang2", "gender": "M;M;M;F;F;M;M;M;M;M;F", "homepage": "https://github.com/Jiamingz1996;https://hanyang-shu.github.io/;https://github.com/ninghui048;http://cic.tju.edu.cn/faculty/hedongxiao/index.htm;https://chuanwang-cv.github.io/;https://irip.buaa.edu.cn/andyguo/index.html;http://iopen.nwpu.edu.cn/info/1015/1351.htm?ivk_sa=1024320u;https://scst.sysu.edu.cn/members/caoxiaochun.htm;http://yangliang.github.io/;;https://ai.hebut.edu.cn/szdw/jsml/js/55e694ad86274c9aa2213a8ccef435f5.htm", "dblp": "359/4143;;;48/8875;68/363-2;78/8545;;39/3695;05/3933-2;169/1022.html;", "google_scholar": ";;;JyqwTr4AAAAJ;;;https://scholar.google.co.uk/citations?hl=zh-CN;https://scholar.google.com/citations?hl=en;7agkJogAAAAJ;;", "orcid": "0009-0008-1229-7987;;;;;;;0000-0001-7141-708X;;;", "linkedin": ";;;;;;;;;;", "or_profile": "~Jiaming_Zhuo1;~Yintong_Lu4;~Hui_Ning1;~Dongxiao_He1;~Chuan_Wang1;~Yuanfang_Guo1;~Zhen_Wang11;~Xiaochun_Cao3;~Liang_Yang2;~bingxin_niu1;~kun_fu4", "aff": "Hebei University of Technology;Hebei University of Technology;Hebei University of Technology;Tianjin University;Institute of Information Engineering, Chinese Academy of Sciences;Beihang University;Northwestern Polytechnical University;SUN YAT-SEN UNIVERSITY;Hebei University of Technology;Hebei University of Techonology;Hebei University of Technology", "aff_domain": "hebut.edu.cn;hebut.edu.cn;hebut.edu.cn;tju.edu.cn;iie.ac.cn;buaa.edu.cn;nwpu.edu.cn;sysu.edu.cn;hebut.edu.cn;hebut.edu.cn;hebut.edu.cn", "position": "PhD student;Undergrad student;MS student;Full Professor;Associate Professor;Associate Professor;Full Professor;Full Professor;Full Professor;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhuo2024unified,\ntitle={Unified Graph Augmentations for Generalized Contrastive Learning on Graphs},\nauthor={Jiaming Zhuo and Yintong Lu and Hui Ning and Kun Fu and Bingxin Niu and Dongxiao He and Chuan Wang and Yuanfang Guo and Zhen Wang and Xiaochun Cao and Liang Yang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=jgkKroLxeC}\n}", "github": "", "reviewers": "i3ih;9GrB;BtAD;sGNH", "pdf_size": 1332237, "rating": "5;7;7;8", "confidence": "3;4;4;5", "soundness": "2;3;4;3", "novelty": "2;3;4;3", "presentation": "2;3;4;3", "wc_summary": "61;85;58;109", "wc_strengths": "37;46;74;56", "wc_weaknesses": "56;106;48;90", "wc_questions": "39;2;36;4", "wc_limitations": "1;1;11;1", "wc_review": "194;240;227;260", "wc_reply_reviewers": "13;20;19;29", "wc_reply_authors": "26;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.75, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 78.25, 20.60794749605113 ], "wc_strengths_avg": [ 53.25, 13.736356867816154 ], "wc_weaknesses_avg": [ 75.0, 23.853720883753127 ], "wc_questions_avg": [ 20.25, 17.297037318569906 ], "wc_limitations_avg": [ 3.5, 4.330127018922194 ], "wc_review_avg": [ 230.25, 24.00390593216029 ], "wc_reply_reviewers_avg": [ 20.25, 5.717298313014636 ], "wc_reply_authors_avg": [ 6.5, 11.258330249197702 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.9733285267845754, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11483293279187766188&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "hebut.edu.cn;hebut.edu.cn;hebut.edu.cn;tju.edu.cn;iie.ac.cn;buaa.edu.cn;nwpu.edu.cn;sysu.edu.cn;hebut.edu.cn;hebut.edu.cn;hebut.edu.cn", "author_num": 11, "aff_unique_index": "0;0;0;1;2;3;4;5;0;0;0", "aff_unique_norm": "Hebei University of Technology;Tianjin University;Chinese Academy of Sciences;Beihang University;Northwestern Polytechnical University;Sun Yat-sen University", "aff_unique_dep": ";;Institute of Information Engineering;;;", "aff_unique_url": "http://www.hbut.edu.cn;http://www.tju.edu.cn;http://www.cas.cn;http://www.buaa.edu.cn/;https://www.nwpu.edu.cn;http://www.sysu.edu.cn", "aff_unique_abbr": "HUT;TJU;CAS;BUAA;NWPU;SYSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Advection Augmented Convolutional Neural Networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93938", "id": "jgpWXnXdME", "proceeding": "", "pdf": "https://openreview.net/pdf?id=jgpWXnXdME", "openreview": "https://openreview.net/forum?id=jgpWXnXdME", "poster": "/media/PosterPDFs/NeurIPS%202024/93938.png?t=1731558525.7977912", "project": "", "author_site": "Niloufar Zakariaei, Siddharth Rout, Eldad Haber, Moshe Eliasof", "tldr": "", "abstract": "Many problems in physical sciences are characterized by the prediction of space-time sequences. Such problems range from weather prediction to the analysis of disease propagation and video prediction. Modern techniques for the solution of these problems typically combine Convolution Neural Networks (CNN) architecture with a time prediction mechanism. However, oftentimes, such approaches underperform in the long-range propagation of information and lack explainability. In this work, we introduce a physically inspired architecture for the solution of such problems. Namely, we propose to augment CNNs with advection by designing a novel semi-Lagrangian push operator. We show that the proposed operator allows for the non-local transformation of information compared with standard convolutional kernels. We then complement it with Reaction and Diffusion neural components to form a network that mimics the Reaction-Advection-Diffusion equation, in high dimensions. We demonstrate the effectiveness of our network on a number of spatio-temporal datasets that show their merit. Our code is available at https://github.com/Siddharth-Rout/deepADRnet.", "keywords": "Reaction-Advection-Diffusion System;Partial Differential Equation;Semi-Lagrangian Scheme;Spatio-temporal Prediction", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "", "author": "Niloufar Zakariaei;Siddharth Rout;Eldad Haber;Moshe Eliasof", "authorids": "~Niloufar_Zakariaei1;~Siddharth_Rout2;~Eldad_Haber4;~Moshe_Eliasof1", "gender": "F;M;M;M", "homepage": ";;https://eldad-haber.webnode.com/;", "dblp": ";239/6004;;", "google_scholar": "https://scholar.google.ca/citations?user=9oDr0OIAAAAJ;44LKqBsAAAAJ;https://scholar.google.ca/citations?user=NZmEIS8AAAAJ;2r63M5kAAAAJ", "orcid": ";;;", "linkedin": "niloufarz/;;;", "or_profile": "~Niloufar_Zakariaei1;~Moshe_Eliasof1;~Eldad_Haber3;~SIDDHARTH_ROUT1", "aff": "University of British Columbia;University of Cambridge;University of British Columbia;University of British Columbia", "aff_domain": "ubc.ca;cam.ac.uk;ubc.ca;ubc.ca", "position": "PhD student;Postdoc;Full Professor;PhD student", "bibtex": "@inproceedings{\nzakariaei2024advection,\ntitle={Advection Augmented Convolutional Neural Networks},\nauthor={Niloufar Zakariaei and Siddharth Rout and Eldad Haber and Moshe Eliasof},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=jgpWXnXdME}\n}", "github": "", "reviewers": "Lq8m;2i2R;wodQ;zzNS", "pdf_size": 5354245, "rating": "5;5;6;7", "confidence": "2;4;3;4", "soundness": "3;3;2;3", "novelty": "3;3;2;3", "presentation": "1;2;2;2", "wc_summary": "102;43;54;153", "wc_strengths": "107;27;38;45", "wc_weaknesses": "140;198;90;454", "wc_questions": "2;91;45;130", "wc_limitations": "1;5;50;48", "wc_review": "352;364;277;830", "wc_reply_reviewers": "28;198;50;24", "wc_reply_authors": "0;680;0;0", "reply_reviewers": "1;2;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_avg": [ 88.0, 43.59472445147463 ], "wc_strengths_avg": [ 54.25, 31.123744954616242 ], "wc_weaknesses_avg": [ 220.5, 140.12405218234306 ], "wc_questions_avg": [ 67.0, 48.09885653526495 ], "wc_limitations_avg": [ 26.0, 23.054283766797006 ], "wc_review_avg": [ 455.75, 218.63025293860866 ], "wc_reply_reviewers_avg": [ 75.0, 71.7007670809734 ], "wc_reply_authors_avg": [ 170.0, 294.44863728670913 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.4545454545454545, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3789404759428227050&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "ubc.ca;cam.ac.uk;ubc.ca;ubc.ca", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of British Columbia;University of Cambridge", "aff_unique_dep": ";", "aff_unique_url": "https://www.ubc.ca;https://www.cam.ac.uk", "aff_unique_abbr": "UBC;Cambridge", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "Canada;United Kingdom" }, { "title": "LucidAction: A Hierarchical and Multi-model Dataset for Comprehensive Action Quality Assessment", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97542", "id": "ji5isUwL3r", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ji5isUwL3r", "openreview": "https://openreview.net/forum?id=ji5isUwL3r", "poster": "/media/PosterPDFs/NeurIPS%202024/97542.png?t=1732857232.4347286", "project": "", "author_site": "Linfeng Dong, Wei Wang, Yu Qiao, Xiao Sun", "tldr": "", "abstract": "Action Quality Assessment (AQA) research confronts formidable obstacles due to limited, mono-modal datasets sourced from one-shot competitions, which hinder the generalizability and comprehensiveness of AQA models. To address these limitations, we present LucidAction, the first systematically collected multi-view AQA dataset structured on curriculum learning principles. LucidAction features a three-tier hierarchical structure, encompassing eight diverse sports events with four curriculum levels, facilitating sequential skill mastery and supporting a wide range of athletic abilities. The dataset encompasses multi-modal data, including multi-view RGB video, 2D and 3D pose sequences, enhancing the richness of information available for analysis. Leveraging a high-precision multi-view Motion Capture (MoCap) system ensures precise capture of complex movements. Meticulously annotated data, incorporating detailed penalties from professional gymnasts, ensures the establishment of robust and comprehensive ground truth annotations. Experimental evaluations employing diverse contrastive regression baselines on LucidAction elucidate the dataset's complexities. Through ablation studies, we investigate the advantages conferred by multi-modal data and fine-grained annotations, offering insights into improving AQA performance. The data and code will be openly released to support advancements in the AI sports field.", "keywords": "Action Quality Assessment;Sports;Human body pose / motion;Dataset creation", "primary_area": "", "supplementary_material": "/attachment/3db0b81ffd9133031b9ec9dab583b0735bf306eb.zip", "author": "Linfeng Dong;Wei Wang;Yu Qiao;Xiao Sun", "authorids": "~Linfeng_Dong1;~Wei_Wang45;~Yu_Qiao1;~Xiao_Sun8", "gender": "F;M;;", "homepage": ";;;", "dblp": "134/4260.html;35/7092-315;;", "google_scholar": ";;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Linfeng_Dong1;~Wei_Wang45;~Yu_Qiao1;~Xiao_Sun8", "aff": "Zhejiang University;Shanghai Artificial Intelligence Laboratory;;", "aff_domain": "zju.edu.cn;pjlab.org.cn;;", "position": "PhD student;Researcher;;", "bibtex": "@inproceedings{\ndong2024lucidaction,\ntitle={LucidAction: A Hierarchical and Multi-model Dataset for Comprehensive Action Quality Assessment},\nauthor={Linfeng Dong and Wei Wang and Yu Qiao and Xiao Sun},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=ji5isUwL3r}\n}", "github": "", "reviewers": "FFMn;oQZi;573E;XN6L", "pdf_size": 13443872, "rating": "6;6;7;8", "confidence": "3;5;5;4", "wc_summary_and_contributions": "134;186;49;47", "wc_strengths": "206;38;49;22", "wc_improvement": "149;2;5;43", "wc_limitations": "37;2;5;5", "wc_correctness": "11;1;1;48", "wc_clarity": "1;1;16;19", "wc_relation_to_prior_work": "1;1;52;1", "wc_documentation": "3;1;1;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "543;233;179;187", "wc_reply_reviewers": "0;0;39;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "wc_summary_and_contributions_avg": [ 104.0, 58.94488951554664 ], "wc_strengths_avg": [ 78.75, 74.09242538883446 ], "wc_improvement_avg": [ 49.75, 59.53727823809214 ], "wc_limitations_avg": [ 12.25, 14.341809509263467 ], "wc_correctness_avg": [ 15.25, 19.343926695477318 ], "wc_clarity_avg": [ 9.25, 8.317902379807062 ], "wc_relation_to_prior_work_avg": [ 13.75, 22.083647796503186 ], "wc_documentation_avg": [ 1.5, 0.8660254037844386 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 285.5, 150.08914018009432 ], "wc_reply_reviewers_avg": [ 9.75, 16.887495373796554 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0909090909090909, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8394978286184826721&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": "zju.edu.cn;pjlab.org.cn;;", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Zhejiang University;Shanghai Artificial Intelligence Laboratory", "aff_unique_dep": ";", "aff_unique_url": "https://www.zju.edu.cn;http://www.shailab.org/", "aff_unique_abbr": "ZJU;Shanghai AI Lab", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Efficient Temporal Action Segmentation via Boundary-aware Query Voting", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93937", "id": "jij4vOVU7i", "proceeding": "", "pdf": "https://openreview.net/pdf?id=jij4vOVU7i", "openreview": "https://openreview.net/forum?id=jij4vOVU7i", "poster": "/media/PosterPDFs/NeurIPS%202024/93937.png?t=1731209269.9936044", "project": "", "author_site": "Peiyao Wang, Yuewei Lin, Erik Blasch, jie wei, Haibin Ling", "tldr": "", "abstract": "Although the performance of Temporal Action Segmentation (TAS) has been improved in recent years, achieving promising results often comes with a high computational cost due to dense inputs, complex model structures, and resource-intensive post-processing requirements. To improve the efficiency while keeping the high performance, we present a novel perspective centered on per-segment classification. By harnessing the capabilities of Transformers, we tokenize each video segment as an instance token, endowed with intrinsic instance segmentation. To realize efficient action segmentation, we introduce BaFormer, a boundary-aware Transformer network. It employs instance queries for instance segmentation and a global query for class-agnostic boundary prediction, yielding continuous segment proposals. During inference, BaFormer employs a simple yet effective voting strategy to classify boundary-wise segments based on instance segmentation. Remarkably, as a single-stage approach, BaFormer significantly reduces the computational costs, utilizing only 6% of the running time compared to the state-of-the-art method DiffAct, while producing better or comparable accuracy over several popular benchmarks. The code for this project is publicly available at https://github.com/peiyao-w/BaFormer.", "keywords": "efficiency;action understanding;action segmentation;action detection", "primary_area": "machine_vision", "supplementary_material": "", "author": "Peiyao Wang;Yuewei Lin;Erik Blasch;Jie Wei;Haibin Ling", "authorids": "~Peiyao_Wang2;~Yuewei_Lin1;~Erik_Blasch1;~Jie_Wei1;~Haibin_Ling1", "gender": "F;M;M;M;M", "homepage": ";https://ywlincq.github.io/;https://sites.google.com/site/erikblasch/;https://www.ccny.cuny.edu/profiles/jie-wei/;https://www3.cs.stonybrook.edu/~hling/", "dblp": ";41/1100;01/4960;;93/3488", "google_scholar": "ZwRTlyYAAAAJ;wOFhljYAAAAJ;Po7s1TsAAAAJ;https://scholar.google.com.tw/citations?user=VOuKF7sAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0001-6894-6108;;", "linkedin": ";;erik-blasch-76a0429/;;", "or_profile": "~Peiyao_Wang2;~Yuewei_Lin1;~Erik_Blasch1;~Jie_Wei1;~Haibin_Ling1", "aff": "State University of New York at Stony Brook;Brookhaven national laboratory;Air Force Research Laboratory;CUNY City College of NY;State University of New York, Stony Brook", "aff_domain": "stonybrook.edu;bnl.gov;us.af.mil;ccny.cuny.edu;stonybrook.edu", "position": "PhD student;Scientist;Principal Researcher;full Professor;Professor", "bibtex": "@inproceedings{\nwang2024efficient,\ntitle={Efficient Temporal Action Segmentation via Boundary-aware Query Voting},\nauthor={Peiyao Wang and Yuewei Lin and Erik Blasch and Jie Wei and Haibin Ling},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=jij4vOVU7i}\n}", "github": "", "reviewers": "W81q;rrtF;KTbs;KZs3", "pdf_size": 2462694, "rating": "4;4;5;6", "confidence": "3;4;3;4", "soundness": "2;3;3;3", "novelty": "3;3;2;3", "presentation": "2;3;3;3", "wc_summary": "43;62;86;71", "wc_strengths": "48;43;68;39", "wc_weaknesses": "238;255;149;146", "wc_questions": "37;8;9;70", "wc_limitations": "7;1;7;21", "wc_review": "373;369;319;347", "wc_reply_reviewers": "184;125;33;99", "wc_reply_authors": "505;347;31;239", "reply_reviewers": "1;1;1;2", "reply_authors": "2;2;2;3", "rating_avg": [ 4.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 65.5, 15.56438241627338 ], "wc_strengths_avg": [ 49.5, 11.146748404803978 ], "wc_weaknesses_avg": [ 197.0, 49.87484335815001 ], "wc_questions_avg": [ 31.0, 25.347583711273153 ], "wc_limitations_avg": [ 9.0, 7.3484692283495345 ], "wc_review_avg": [ 352.0, 21.470910553583888 ], "wc_reply_reviewers_avg": [ 110.25, 54.2004381901106 ], "wc_reply_authors_avg": [ 280.5, 172.3332527401488 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16984181288659989342&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "stonybrook.edu;bnl.gov;us.af.mil;ccny.cuny.edu;stonybrook.edu", "author_num": 5, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "State University of New York at Stony Brook;Brookhaven National Laboratory;Air Force Research Laboratory;City College of New York;State University of New York", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.stonybrook.edu;https://www.bnl.gov;https://www.afrl.af.mil/;https://www.ccny.cuny.edu;https://www.stonybrook.edu", "aff_unique_abbr": "SUNY Stony Brook;BNL;AFRL;CCNY;SUNY Stony Brook", "aff_campus_unique_index": "0;2;0", "aff_campus_unique": "Stony Brook;;New York", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "A Canonicalization Perspective on Invariant and Equivariant Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93936", "id": "jjcY92FX4R", "proceeding": "", "pdf": "https://openreview.net/pdf?id=jjcY92FX4R", "openreview": "https://openreview.net/forum?id=jjcY92FX4R", "poster": "", "project": "", "author_site": "George Ma, Yifei Wang, Derek Lim, Stefanie Jegelka, Yisen Wang", "tldr": "", "abstract": "In many applications, we desire neural networks to exhibit invariance or equivariance to certain groups due to symmetries inherent in the data. Recently, frame-averaging methods emerged to be a unified framework for attaining symmetries efficiently by averaging over input-dependent subsets of the group, i.e., frames. What we currently lack is a principled understanding of the design of frames. In this work, we introduce a canonicalization perspective that provides an essential and complete view of the design of frames. Canonicalization is a classic approach for attaining invariance by mapping inputs to their canonical forms. We show that there exists an inherent connection between frames and canonical forms. Leveraging this connection, we can efficiently compare the complexity of frames as well as determine the optimality of certain frames. Guided by this principle, we design novel frames for eigenvectors that are strictly superior to existing methods --- some are even optimal --- both theoretically and empirically. The reduction to the canonicalization perspective further uncovers equivalences between previous methods. These observations suggest that canonicalization provides a fundamental understanding of existing frame-averaging methods and unifies existing equivariant and invariant learning methods. Code is available at https://github.com/PKU-ML/canonicalization.", "keywords": "Equivariance;Canonicalization;Invariance", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "George Ma;Yifei Wang;Derek Lim;Stefanie Jegelka;Yisen Wang", "authorids": "~George_Ma1;~Yifei_Wang1;~Derek_Lim1;~Stefanie_Jegelka3;~Yisen_Wang1", "gender": "M;M;M;F;M", "homepage": "https://github.com/GeorgeMLP;https://yifeiwang77.com;https://cptq.github.io/;http://people.csail.mit.edu/stefje/;https://yisenwang.github.io/", "dblp": "86/8408;00/555-1;267/5433;38/7003;172/1346-1", "google_scholar": "kiYSRMkAAAAJ;-CLy6YsAAAAJ;y9YTBIsAAAAJ;gTWUZlsAAAAJ;uMWPDboAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~George_Ma1;~Yifei_Wang1;~Derek_Lim1;~Stefanie_Jegelka3;~Yisen_Wang1", "aff": "Peking University;Massachusetts Institute of Technology;Liquid AI;Massachusetts Institute of Technology;Peking University", "aff_domain": "pku.edu.cn;mit.edu;liquid.ai;mit.edu;pku.edu.cn", "position": "Undergrad student;Postdoc;Researcher;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nma2024a,\ntitle={A Canonicalization Perspective on Invariant and Equivariant Learning},\nauthor={George Ma and Yifei Wang and Derek Lim and Stefanie Jegelka and Yisen Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=jjcY92FX4R}\n}", "github": "", "reviewers": "TcMZ;8tUB;MgpB;y3Wh", "pdf_size": 698349, "rating": "5;6;6;6", "confidence": "3;3;3;3", "soundness": "3;3;3;3", "novelty": "2;3;4;3", "presentation": "1;2;4;3", "wc_summary": "64;72;102;129", "wc_strengths": "81;52;29;70", "wc_weaknesses": "149;259;82;169", "wc_questions": "435;17;55;67", "wc_limitations": "2;23;5;1", "wc_review": "731;423;273;436", "wc_reply_reviewers": "105;28;4;55", "wc_reply_authors": "572;0;0;50", "reply_reviewers": "2;1;1;1", "reply_authors": "4;1;1;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 91.75, 25.752427070084092 ], "wc_strengths_avg": [ 58.0, 19.685019685029527 ], "wc_weaknesses_avg": [ 164.75, 63.23912950065015 ], "wc_questions_avg": [ 143.5, 169.30667441066817 ], "wc_limitations_avg": [ 7.75, 8.926785535678562 ], "wc_review_avg": [ 465.75, 165.99905873227112 ], "wc_reply_reviewers_avg": [ 48.0, 37.52998800959041 ], "wc_reply_authors_avg": [ 155.5, 241.33120395009013 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15563182889195269812&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "pku.edu.cn;mit.edu;liquid.ai;mit.edu;pku.edu.cn", "author_num": 5, "aff_unique_index": "0;1;2;1;0", "aff_unique_norm": "Peking University;Massachusetts Institute of Technology;Liquid AI", "aff_unique_dep": ";;", "aff_unique_url": "http://www.pku.edu.cn;https://web.mit.edu;", "aff_unique_abbr": "Peking U;MIT;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;1;0", "aff_country_unique": "China;United States;Unknown" }, { "id": "jkJDNG468g", "title": "CY-Bench: A comprehensive benchmark dataset for subnational crop yield forecasting", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "In-season or pre-harvest crop yield forecasts are essential for enhancing transparency in commodity markets and planning towards achieving the United Nations\u2019 Sustainable Development Goal 2 of zero hunger, especially in the context of climate change and extreme events leading to crop failures. Pre-harvest crop yield forecasting is a difficult problem, as several interacting factors contribute to yield formation, including in-season weather variability, extreme events, long-term climate change, pests, diseases and farmer management decisions. Machine learning methods provide ways to capture complex interactions among such predictors and crop yields. Prior research in agricultural applications, including crop yield forecasting, has primarily been case-study based, which makes it difficult to compare modeling approaches and measure progress. To address this gap, we introduce CY-Bench (Crop Yield Benchmark), a comprehensive dataset and benchmark to forecast crop yields. We standardized data source selection, preprocessing and spatio-temporal harmonization of public subnational yield statistics with relevant predictors such as weather, soil, and remote sensing indicators, in collaboration with domain experts such as agronomists, climate scientists, and machine learning researchers. With CY-Bench we aim to: (i) standardize machine learning model evaluation in a framework that covers multiple farming systems in more than twenty five countries across the globe, (ii) facilitate robust and reproducible model comparison through a benchmark addressing real-world operational needs, (iii) share a dataset with the machine learning community to facilitate research efforts related to time series forecasting, domain adaptation and online learning. The dataset and code used will be openly available, supporting the further development of advanced machine learning models for crop yield forecasting that can be used to aid decision-makers in improving global and regional food security.", "keywords": "benchmark dataset; crop yield forecasts; agriculture; climate change; food security", "primary_area": "", "supplementary_material": "/attachment/b1ba42d246eebc0cd0070759b56d4b17c9c052c5.pdf", "author": "Dilli Paudel;Hilmy Baja;Ron van Bree;Michiel Kallenberg;Stella Ofori-Ampofo;Aike Potze;Pratishtha Poudel;Abdelrahman Saleh;Weston Anderson;Malte von Bloh;Andres Castellano;Oumnia Ennaji;Raed Hamed;Rahel Laudien;Donghoon Lee;Inti Luna;Dainius Masili\u016bnas;Michele Meroni;Janet Mumo Mutuku;Siyabusa Mkuhlani;Jonathan Richetti;Alex C. Ruane;Ritvik Sahajpal;Guanyuan Shuai;Vasileios Sitokonstantinou;Rogerio de Souza Noia Junior;Amit Kumar Srivastava;Robert Strong;Lily-belle Sweet;Petar Vojnovi\u0107;Allard de Wit;Maximilian Zachow;Ioannis N. Athanasiadis", "authorids": "~Dilli_Paudel1;~Hilmy_Baja1;~Ron_van_Bree1;~Michiel_Kallenberg1;~Stella_Ofori-Ampofo1;~Aike_Potze1;~Pratishtha_Poudel1;~Abdelrahman_Saleh1;~Weston_Anderson1;~Malte_von_Bloh1;~Andres_Castellano1;~Oumnia_Ennaji1;~Raed_Hamed1;~Rahel_Laudien1;~Donghoon_Lee11;~Inti_Luna1;~Dainius_Masili\u016bnas1;~Michele_Meroni1;~Janet_Mumo_Mutuku1;~Siyabusa_Mkuhlani1;~Jonathan_Richetti1;~Alex_C._Ruane1;~Ritvik_Sahajpal1;~Guanyuan_Shuai1;~Vasileios_Sitokonstantinou1;~Rogerio_de_Souza_Noia_Junior1;~Amit_Kumar_Srivastava1;~Robert_Strong1;~Lily-belle_Sweet1;~Petar_Vojnovi\u01071;~Allard_de_Wit1;~Maximilian_Zachow1;~Ioannis_N._Athanasiadis1", "gender": "M;;;M;F;M;F;M;;M;M;F;M;;M;M;M;;F;M;M;M;M;M;M;M;M;M;F;M;M;M;M", "homepage": "https://krsnapaudel.github.io/;;https://www.wur.nl/en/Persons/Ron-RJ-Ron-van-Bree-MSc.htm;https://www.wur.nl/en/persons/michiel-mgj-michiel-kallenberg-phd.htm;;;;;;;;;;https://www.pik-potsdam.de/de/members/laudien;https://umanitoba.ca/engineering/civil/donghoon-lee;;;;;;;;https://nasaharvest.org/partner/ritvik-sahajpal;https://geog.umd.edu/facultyprofile/shuai/guanyuan;;;;https://alec.tamu.edu/people/strong-robert/;https://www.ufz.de/index.php?en=48902;;;;https://www.athanasiadis.info", "dblp": ";;;25/6501;;;;;;;;;;;;;;;;;;;;;;;;;;;;;66/775", "google_scholar": ";;;3XSFQJ0AAAAJ;;;https://scholar.google.com/scholar?hl=en;;RP8M1g8AAAAJ;https://scholar.google.de/citations?hl=de;;https://scholar.google.com/citations?hl=fr;;;https://scholar.google.ca/citations?user=je8BbywAAAAJ;Hnz_uZ0AAAAJ;sobi1d0AAAAJ;iQk-wj8AAAAJ;;;DWQtw2wAAAAJ;BVFLmtwAAAAJ;-wcLMN0AAAAJ;7eF20VgAAAAJ;https://scholar.google.gr/citations?user=8WCaLYQAAAAJ;LIj-Xs4AAAAJ;qRFKdHwAAAAJ;GoNJb_kAAAAJ;wl5hJ14AAAAJ;;https://scholar.google.nl/citations?user=UZUQ1eUAAAAJ;--80v1wAAAAJ;effzbRYAAAAJ", "orcid": ";;;0000-0002-4661-6674;0009-0007-5374-2031;;0000-0002-4261-310X;0000-0003-4304-1075;;0000-0003-2762-5089;0009-0006-3364-8614;0009-0009-8216-2962;0000-0003-2243-3109;;;0000-0002-2212-9767;0000-0001-5654-1277;;;0000-0001-7872-2036;0000-0003-2675-3425;0000-0002-5582-9217;;;;0000-0002-4096-7588;;0000-0001-5279-4808;0000-0001-9971-6102;0009-0008-4363-7278;0000-0002-5517-6404;0000-0002-0525-7900;0000-0003-2764-0078", "linkedin": "https://linkedin.com/in/drpaudel;;;michiel-kallenberg-61154112/;ellaampy;aikepotze;;a3saleh/;;;;oumnia-ennaji-418868123/;;;;inti-luna-aviles/;https://www.linkedin.com/mynetwork/invitation-manager/;;janetmutuku;siyabusa-mkuhlani-3011b826/;jonathan-richetti/;;;;;;;;lily-belle-sweet/;;allarddewit/;;", "or_profile": "~Dilli_Paudel1;~Hilmy_Baja1;~Ron_van_Bree1;~Michiel_Kallenberg1;~Stella_Ofori-Ampofo1;~Aike_Potze1;~Pratishtha_Poudel1;~Abdelrahman_Saleh1;~Weston_Anderson1;~Malte_von_Bloh1;~Andres_Castellano1;~Oumnia_Ennaji1;~Raed_Hamed1;~Rahel_Laudien1;~Donghoon_Lee11;~Inti_Luna1;~Dainius_Masili\u016bnas1;~Michele_Meroni1;~Janet_Mumo_Mutuku1;~Siyabusa_Mkuhlani1;~Jonathan_Richetti1;~Alex_C._Ruane1;~Ritvik_Sahajpal1;~Guanyuan_Shuai1;~Vasileios_Sitokonstantinou1;~Rogerio_de_Souza_Noia_Junior1;~Amit_Kumar_Srivastava1;~Robert_Strong1;~Lily-belle_Sweet1;~Petar_Vojnovi\u01071;~Allard_de_Wit1;~Maximilian_Zachow1;~Ioannis_N._Athanasiadis1", "aff": "Wageningen University & Research;;Wageningen University & Research ;Wageningen University;Technische Universit\u00e4t M\u00fcnchen;Wageningen University & Research;Purdue University;Ankara University;University of Maryland, College Park;Technische Universit\u00e4t M\u00fcnchen;NASA GISS;University Mohammed VI Polytechnic;Vrije Universiteit Amsterdam;Potsdam Institute for Climate Impact Research;University of Manitoba;Universidad de Valencia;Wageningen University & Research;Seidor Consulting;The International Crops Research Institute for the Semi-Arid Tropics (ICRISAT);International Institute of Tropical Agriculture;CSIRO;NASA Goddard Institute for Space Studies;University of Maryland, College Park;University of Maryland, College Park;Universidad de Valencia;INRAE, National Research Institute in Agriculture and Environment;Rheinische Friedrich-Wilhelms Universit\u00e4t Bonn;Texas A&M University - College Station;Helmholtz Centre for Environmental Research - UFZ;Joint Research Center;Wageningen University & Research;Technische Universit\u00e4t M\u00fcnchen;Wageningen University & Research", "aff_domain": "wur.nl;;wur.nl;wageningenuniversity.nl;tum.de;wur.nl;purdue.edu;ankara.edu.tr;umd.edu;tum.de;nasa.gov;um6p.ma;vu.nl;pik-potsdam.de;umanitoba.ca;uv.es;wur.nl;seidor.com;icrisat.org;iita.org;csiro.au;nasa.gov;umd.edu;umd.edu;uv.es;inrae.fr;uni-bonn.de;tamu.edu;ufz.de;ext.ec.europa.eu;wur.nl;tum.de;wur.nl", "position": "Postdoc;;PhD student;Postdoc;PhD student;PhD student;Assistant Professor;PhD student;Researcher;PhD student;Researcher;PhD student;Postdoc;Postdoc;Assistant Professor;PhD student;Lecturer;Researcher;Researcher;Researcher;Researcher;Researcher;Associate Professor;Postdoc;Postdoc;Postdoc;Postdoc;Associate Professor;PhD student;Researcher;Principal Researcher;PhD student;Full Professor", "bibtex": "@misc{\nanonymous2024cybench,\ntitle={{CY}-Bench: A comprehensive benchmark dataset for subnational crop yield forecasting},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=jkJDNG468g}\n}", "github": "", "project": "", "reviewers": "ZsHV;DYtA;nRmX", "site": "https://openreview.net/forum?id=jkJDNG468g", "pdf_size": 3309149, "rating": "5;5;8", "confidence": "3;4;4", "wc_summary_and_contributions": "132;122;54", "wc_strengths": "64;41;74", "wc_improvement": "198;67;128", "wc_limitations": "1;47;4", "wc_correctness": "1;140;8", "wc_clarity": "1;63;6", "wc_relation_to_prior_work": "1;97;5", "wc_documentation": "1;32;4", "wc_additional_feedback": "1;1;1", "wc_review": "400;610;284", "wc_reply_reviewers": "0;58;0", "wc_reply_authors": "305;887;0", "reply_reviewers": "0;1;0", "reply_authors": "2;3;1", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 102.66666666666667, 34.65384378231207 ], "wc_strengths_avg": [ 59.666666666666664, 13.816254517375139 ], "wc_improvement_avg": [ 131.0, 53.522580904387134 ], "wc_limitations_avg": [ 17.333333333333332, 21.013223349965983 ], "wc_correctness_avg": [ 49.666666666666664, 63.93920723798679 ], "wc_clarity_avg": [ 23.333333333333332, 28.122746823325937 ], "wc_relation_to_prior_work_avg": [ 34.333333333333336, 44.34210439550904 ], "wc_documentation_avg": [ 12.333333333333334, 13.960261060914616 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 431.3333333333333, 134.9205527544101 ], "wc_reply_reviewers_avg": [ 19.333333333333332, 27.34146220587984 ], "wc_reply_authors_avg": [ 397.3333333333333, 367.95500932707637 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 33, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:8BRudVZZAisJ:scholar.google.com/&scioq=CY-Bench:+A+comprehensive+benchmark+dataset+for+subnational+crop+yield+forecasting&hl=en&as_sdt=0,33", "gs_version_total": 3, "aff_unique_index": "0;0;1;2;0;3;4;5;2;6;7;8;9;10;11;0;12;13;14;15;6;5;5;11;16;17;18;19;20;0;2;0", "aff_unique_norm": "Wageningen University & Research;Wageningen University;Technische Universit\u00e4t M\u00fcnchen;Purdue University;Ankara University;University of Maryland;NASA Goddard Institute for Space Studies;Mohammed VI Polytechnic University;Vrije Universiteit Amsterdam;Potsdam Institute for Climate Impact Research;University of Manitoba;Universidad de Valencia;Seidor Consulting;International Crops Research Institute for the Semi-Arid Tropics;International Institute of Tropical Agriculture;Commonwealth Scientific and Industrial Research Organisation;INRAE;Rheinische Friedrich-Wilhelms Universit\u00e4t Bonn;Texas A&M University;Helmholtz Centre for Environmental Research;Joint Research Center", "aff_unique_dep": ";;;;;;;;;;;;;;;;National Research Institute in Agriculture and Environment;;;;", "aff_unique_url": "https://www.wur.nl;https://www.wageningenur.nl;https://www.tum.de;https://www.purdue.edu;https://www.ankara.edu.tr;https://www/umd.edu;https://www.giss.nasa.gov;https://ump.ac.ma;https://www.vu.nl;https://www.pik-potsdam.de;https://umanitoba.ca;https://www.uv.es;https://www.seidor.com;https://www.icrisat.org;https://www.iita.org;https://www.csiro.au;https://www.inrae.fr;https://www.uni-bonn.de/;https://www.tamu.edu;https://www.ufz.de;", "aff_unique_abbr": "WUR;WU;TUM;Purdue;Ankara U;UMD;GISS;UM6P;VU Amsterdam;PIK;U of M;UV;;ICRISAT;IITA;CSIRO;INRAE;Uni Bonn;TAMU;UFZ;", "aff_campus_unique_index": "1;1;1;2", "aff_campus_unique": ";College Park;College Station", "aff_country_unique_index": "0;0;0;1;0;2;3;2;1;2;4;0;1;5;6;0;6;7;8;9;2;2;2;6;10;1;2;1;0;1;0", "aff_country_unique": "Netherlands;Germany;United States;T\u00fcrkiye;Morocco;Canada;Spain;Unknown;Nigeria;Australia;France;" }, { "title": "Improved Bayes Regret Bounds for Multi-Task Hierarchical Bayesian Bandit Algorithms", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93935", "id": "joNPMCzVIi", "proceeding": "", "pdf": "https://openreview.net/pdf?id=joNPMCzVIi", "openreview": "https://openreview.net/forum?id=joNPMCzVIi", "poster": "/media/PosterPDFs/NeurIPS%202024/93935.png?t=1730362763.9844148", "project": "", "author_site": "Jiechao Guan, Hui Xiong", "tldr": "", "abstract": "Hierarchical Bayesian bandit refers to the multi-task bandit problem in which bandit tasks are assumed to be drawn from the same distribution. In this work, we provide improved Bayes regret bounds for hierarchical Bayesian bandit algorithms in the multi-task linear bandit and semi-bandit settings. For the multi-task linear bandit, we first analyze the preexisting hierarchical Thompson sampling (HierTS) algorithm, and improve its gap-independent Bayes regret bound from $O(m\\sqrt{n\\log{n}\\log{(mn)}})$ to $O(m\\sqrt{n\\log{n}})$ in the case of infinite action set, with $m$ being the number of tasks and $n$ the number of iterations per task. In the case of finite action set, we propose a novel hierarchical Bayesian bandit algorithm, named hierarchical BayesUCB (HierBayesUCB), that achieves the logarithmic but gap-dependent regret bound $O(m\\log{(mn)}\\log{n})$ under mild assumptions. All of the above regret bounds hold in many variants of hierarchical Bayesian linear bandit problem, including when the tasks are solved sequentially or concurrently. Furthermore, we extend the aforementioned HierTS and HierBayesUCB algorithms to the multi-task combinatorial semi-bandit setting. Concretely, our combinatorial HierTS algorithm attains comparable Bayes regret bound $O(m\\sqrt{n}\\log{n})$ with respect to the latest one. Moreover, our combinatorial HierBayesUCB yields a sharper Bayes regret bound $O(m\\log{(mn)}\\log{n})$. Experiments are conducted to validate the soundness of our theoretical results for multi-task bandit algorithms.", "keywords": "hierarchical Bayesian bandi/semi-bandit;multi-task bandit;Bayes regret bound;Thompson sampling algorithm;BayesUCB algorithm", "primary_area": "bandits", "supplementary_material": "/attachment/3cc73a91f5df0ba093c65773f11fd76d69d3dc0c.zip", "author": "Jiechao Guan;Hui Xiong", "authorids": "~Jiechao_Guan1;~Hui_Xiong1", "gender": "M;M", "homepage": ";https://www.hkust-gz.edu.cn/people/hui-xiong/", "dblp": "228/8337;262/1686-1.html", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;cVDF1tkAAAAJ", "orcid": ";0000-0001-6016-6465", "linkedin": ";", "or_profile": "~Jiechao_Guan1;~Hui_Xiong1", "aff": "AI Thrust, The Hong Kong University of Science and Technology (Guangzhou);Hong Kong University of Science and Technology (Guangzhou)", "aff_domain": "hkust-gz.edu.cn;hkust.edu", "position": "Postdoc;Full Professor", "bibtex": "@inproceedings{\nguan2024improved,\ntitle={Improved Bayes Regret Bounds for Multi-Task Hierarchical Bayesian Bandit Algorithms},\nauthor={Jiechao Guan and Hui Xiong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=joNPMCzVIi}\n}", "github": "", "reviewers": "rxXv;diEN;wPds;yLZF", "pdf_size": 1010368, "rating": "6;6;6;8", "confidence": "2;4;3;4", "soundness": "3;3;4;3", "novelty": "3;3;3;3", "presentation": "3;3;4;4", "wc_summary": "181;74;229;75", "wc_strengths": "50;51;4;49", "wc_weaknesses": "239;94;4;26", "wc_questions": "45;708;4;2", "wc_limitations": "7;3;38;1", "wc_review": "522;930;279;153", "wc_reply_reviewers": "13;41;0;0", "wc_reply_authors": "618;1614;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "3;5;1;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 139.75, 67.42171386133698 ], "wc_strengths_avg": [ 38.5, 19.93113142799475 ], "wc_weaknesses_avg": [ 90.75, 91.79699069141645 ], "wc_questions_avg": [ 189.75, 299.70349931223694 ], "wc_limitations_avg": [ 12.25, 15.022899187573616 ], "wc_review_avg": [ 471.0, 296.34017614896567 ], "wc_reply_reviewers_avg": [ 13.5, 16.740669042783207 ], "wc_reply_authors_avg": [ 558.0, 659.8227034590428 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.5, 1.6583123951777 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:60UGp5mm44QJ:scholar.google.com/&scioq=Improved+Bayes+Regret+Bounds+for+Multi-Task+Hierarchical+Bayesian+Bandit+Algorithms&hl=en&as_sdt=0,44", "gs_version_total": 0, "email": "hkust-gz.edu.cn;hkust.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Hong Kong University of Science and Technology", "aff_unique_dep": "AI Thrust", "aff_unique_url": "https://www.ust.hk", "aff_unique_abbr": "HKUST", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Sequential Harmful Shift Detection Without Labels", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93934", "id": "jps9KkuSD3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=jps9KkuSD3", "openreview": "https://openreview.net/forum?id=jps9KkuSD3", "poster": "/media/PosterPDFs/NeurIPS%202024/93934.png?t=1731511433.0187762", "project": "", "author_site": "Salim I. Amoukou, Tom Bewley, Saumitra Mishra, Freddy Lecue, Daniele Magazzeni, Manuela Veloso", "tldr": "", "abstract": "We introduce a novel approach for detecting distribution shifts that negatively impact the performance of machine learning models in continuous production environments, which requires no access to ground truth data labels. It builds upon the work of Podkopaev and Ramdas [2022], who address scenarios where labels are available for tracking model errors over time. Our solution extends this framework to work in the absence of labels, by employing a proxy for the true error. This proxy is derived using the predictions of a trained error estimator. Experiments show that our method has high power and false alarm control under various distribution shifts, including covariate and label shifts and natural shifts over geography and time.", "keywords": "distribution shift;sequential testing;confidence sequences;uncertainty quantification", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Salim I. Amoukou;Tom Bewley;Saumitra Mishra;Freddy Lecue;Daniele Magazzeni;Manuela Veloso", "authorids": "~Salim_I._Amoukou1;~Tom_Bewley1;~Saumitra_Mishra1;~Freddy_Lecue1;~Daniele_Magazzeni1;~Manuela_Veloso1", "gender": "M;;M;;M;F", "homepage": "https://salimamoukou.github.io/;http://tombewley.com;https://sites.google.com/site/saumitramishrac4dm/;http://www-sop.inria.fr/members/Freddy.Lecue/;https://nms.kcl.ac.uk/daniele.magazzeni/;https://www.cs.cmu.edu/~mmv/", "dblp": "289/1335;;208/1387;02/3657.html;14/4672;v/ManuelaMVeloso", "google_scholar": "JrHnICMAAAAJ;OqPzZ08AAAAJ;https://scholar.google.co.uk/citations?user=On6E6ogAAAAJ;https://scholar.google.ca/citations?user=GLByS4gAAAAJ;;https://scholar.google.com.tw/citations?user=2FbkAzYAAAAJ", "orcid": ";;;;;", "linkedin": "slim-amk/;;;freddylecue/;;", "or_profile": "~Salim_I._Amoukou1;~Tom_Bewley1;~Saumitra_Mishra1;~Freddy_Lecue1;~Daniele_Magazzeni1;~Manuela_Veloso1", "aff": "J.P. Morgan Chase;J.P. Morgan;J.P. Morgan Chase;INRIA;;School of Computer Science, Carnegie Mellon University", "aff_domain": "jpmorgan.com;jpmorgan.com;jpmorgan.com;inria.fr;;cs.cmu.edu", "position": "Researcher;Researcher;Researcher;Full Professor;;Full Professor", "bibtex": "@inproceedings{\namoukou2024sequential,\ntitle={Sequential Harmful Shift Detection Without Labels},\nauthor={Salim I. Amoukou and Tom Bewley and Saumitra Mishra and Freddy Lecue and Daniele Magazzeni and Manuela Veloso},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=jps9KkuSD3}\n}", "github": "", "reviewers": "MiPF;FDg9;4vCN;KyAy", "pdf_size": 1975577, "rating": "5;6;6;7", "confidence": "1;3;3;4", "soundness": "2;3;3;3", "novelty": "2;3;3;2", "presentation": "2;4;3;4", "wc_summary": "44;120;73;34", "wc_strengths": "19;210;34;34", "wc_weaknesses": "277;309;92;215", "wc_questions": "1;486;84;50", "wc_limitations": "1;24;56;3", "wc_review": "342;1149;339;336", "wc_reply_reviewers": "19;290;12;101", "wc_reply_authors": "0;588;0;692", "reply_reviewers": "1;1;1;2", "reply_authors": "1;2;1;3", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 2.75, 1.0897247358851685 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 67.75, 33.39442318711314 ], "wc_strengths_avg": [ 74.25, 78.6141685703029 ], "wc_weaknesses_avg": [ 223.25, 82.97100397126698 ], "wc_questions_avg": [ 155.25, 193.22444850484112 ], "wc_limitations_avg": [ 21.0, 22.124646889837585 ], "wc_review_avg": [ 541.5, 350.7467034770249 ], "wc_reply_reviewers_avg": [ 105.5, 112.12158578971312 ], "wc_reply_authors_avg": [ 320.0, 322.1055727552692 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.9733285267845752, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16902775055214732209&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "jpmorgan.com;jpmorgan.com;jpmorgan.com;inria.fr;;cs.cmu.edu", "author_num": 6, "aff_unique_index": "0;1;0;2;3", "aff_unique_norm": "JPMorgan Chase & Co.;J.P. Morgan;INRIA;Carnegie Mellon University", "aff_unique_dep": ";;;School of Computer Science", "aff_unique_url": "https://www.jpmorganchase.com;https://www.jpmorganchase.com;https://www.inria.fr;https://www.cmu.edu", "aff_unique_abbr": "JPM;JPM;INRIA;CMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pittsburgh", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "United States;France" }, { "title": "Kronecker-Factored Approximate Curvature for Physics-Informed Neural Networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93933", "id": "jrNlWfor7q", "proceeding": "", "pdf": "https://openreview.net/pdf?id=jrNlWfor7q", "openreview": "https://openreview.net/forum?id=jrNlWfor7q", "poster": "", "project": "", "author_site": "Felix Dangel, Johannes M\u00fcller, Marius Zeinhofer", "tldr": "", "abstract": "Physics-Informed Neural Networks (PINNs) are infamous for being hard to train.\nRecently, second-order methods based on natural gradient and Gauss-Newton methods have shown promising performance, improving the accuracy achieved by first-order methods by several orders of magnitude. \nWhile promising, the proposed methods only scale to networks with a few thousand parameters due to the high computational cost to evaluate, store, and invert the curvature matrix.\nWe propose Kronecker-factored approximate curvature (KFAC) for PINN losses that greatly reduces the computational cost and allows scaling to much larger networks.\nOur approach goes beyond the popular KFAC for traditional deep learning problems as it captures contributions from a PDE's differential operator that are crucial for optimization. \nTo establish KFAC for such losses, we use Taylor-mode automatic differentiation to describe the differential operator's computation graph as a forward network with shared weights which allows us to apply a variant of KFAC for networks with weight-sharing. \nEmpirically, we find that our KFAC-based optimizers are competitive with expensive second-order methods on small problems, scale more favorably to higher-dimensional neural networks and PDEs, and consistently outperform first-order methods.", "keywords": "KFAC;PINNs;Gauss-Newton;PDEs;Taylor mode automatic differentiation;Forward Laplacian;Second-order optimization;Higher-order derivatives", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "", "author": "Felix Dangel;Johannes M\u00fcller;Marius Zeinhofer", "authorids": "~Felix_Dangel1;~Johannes_M\u00fcller1;~Marius_Zeinhofer1", "gender": "M;M;M", "homepage": "https://f-dangel.com;https://math.ethz.ch/sam/the-institute/people.ethz_search.html?u=mzeinhofer;https://muellerjohannes.github.io/", "dblp": "236/4218;255/5011;", "google_scholar": "9hlJ9W0AAAAJ;;https://scholar.google.de/citations?user=Wfww-P8AAAAJ", "orcid": "0000-0002-1414-8554;;0000-0001-8729-0466", "linkedin": ";;", "or_profile": "~Felix_Dangel1;~Marius_Zeinhofer1;~Johannes_Christoph_M\u00fcller1", "aff": "Vector Institute, Toronto;Simula Research Laboratory;RWTH Aachen University", "aff_domain": "vectorinstitute.ai;simula.no;mathc.rwth-aachen.de", "position": "Postdoc;Postdoc;Postdoc", "bibtex": "@inproceedings{\ndangel2024kroneckerfactored,\ntitle={Kronecker-Factored Approximate Curvature for Physics-Informed Neural Networks},\nauthor={Felix Dangel and Johannes M{\\\"u}ller and Marius Zeinhofer},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=jrNlWfor7q}\n}", "github": "", "reviewers": "YHUU;TQnM;aiiA", "pdf_size": 2395074, "rating": "6;6;7", "confidence": "4;4;4", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "2;3;3", "wc_summary": "65;112;155", "wc_strengths": "93;32;96", "wc_weaknesses": "400;134;400", "wc_questions": "127;1;246", "wc_limitations": "3;1;24", "wc_review": "688;280;921", "wc_reply_reviewers": "88;118;154", "wc_reply_authors": "99;207;204", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 110.66666666666667, 36.75444039689475 ], "wc_strengths_avg": [ 73.66666666666667, 29.48822740612863 ], "wc_weaknesses_avg": [ 311.3333333333333, 125.39360253041443 ], "wc_questions_avg": [ 124.66666666666667, 100.03443851438807 ], "wc_limitations_avg": [ 9.333333333333334, 10.402991022884823 ], "wc_review_avg": [ 629.6666666666666, 264.91801666846965 ], "wc_reply_reviewers_avg": [ 120.0, 26.981475126464083 ], "wc_reply_authors_avg": [ 170.0, 50.21951811795888 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2853566301774205931&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "vectorinstitute.ai;simula.no;mathc.rwth-aachen.de", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Vector Institute;Simula Research Laboratory;RWTH Aachen University", "aff_unique_dep": ";;", "aff_unique_url": "https://vectorinstitute.ai;https://www.simula.no;https://www.rwth-aachen.de", "aff_unique_abbr": "Vector Institute;Simula;RWTH", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Toronto;;Aachen", "aff_country_unique_index": "0;1;2", "aff_country_unique": "Canada;Norway;Germany" }, { "title": "Towards Croppable Implicit Neural Representations", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93932", "id": "jrVoZLF20h", "proceeding": "", "pdf": "https://openreview.net/pdf?id=jrVoZLF20h", "openreview": "https://openreview.net/forum?id=jrVoZLF20h", "poster": "/media/PosterPDFs/NeurIPS%202024/93932.png?t=1730667407.9174376", "project": "", "author_site": "Maor Ashkenazi, Eran Treister", "tldr": "", "abstract": "Implicit Neural Representations (INRs) have peaked interest in recent years due to their ability to encode natural signals using neural networks. While INRs allow for useful applications such as interpolating new coordinates and signal compression, their black-box nature makes it difficult to modify them post-training. In this paper we explore the idea of editable INRs, and specifically focus on the widely used cropping operation. To this end, we present Local-Global SIRENs - a novel INR architecture that supports cropping by design. Local-Global SIRENs are based on combining local and global feature extraction for signal encoding. What makes their design unique is the ability to effortlessly remove specific portions of an encoded signal, with a proportional weight decrease. This is achieved by eliminating the corresponding weights from the network, without the need for retraining. We further show how this architecture can be used to support the straightforward extension of previously encoded signals. Beyond signal editing, we examine how the Local-Global approach can accelerate training, enhance encoding of various signals, improve downstream performance, and be applied to modern INRs such as INCODE, highlighting its potential and flexibility. Code is available at https://github.com/maorash/Local-Global-INRs.", "keywords": "Implicit Neural Representations;Implicit Functions;Deep Learning", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/8228fee8aa8d64c6b5427c55c23116e082a21be5.zip", "author": "Maor Ashkenazi;Eran Treister", "authorids": "~Maor_Ashkenazi1;~Eran_Treister1", "gender": "M;M", "homepage": ";https://www.cs.bgu.ac.il/~erant/", "dblp": "190/8394;22/10384", "google_scholar": ";https://scholar.google.co.il/citations?user=5nNoFlEAAAAJ", "orcid": ";0000-0002-5351-0966", "linkedin": ";", "or_profile": "~Maor_Ashkenazi1;~Eran_Treister1", "aff": "Ben Gurion University of the Negev;Ben-Gurion University of the Negev", "aff_domain": "bgu.ac.il;bgu.ac.il", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nashkenazi2024towards,\ntitle={Towards Croppable Implicit Neural Representations},\nauthor={Maor Ashkenazi and Eran Treister},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=jrVoZLF20h}\n}", "github": "", "reviewers": "45rR;XqfY;AXyi", "pdf_size": 14106966, "rating": "5;6;8", "confidence": "3;4;5", "soundness": "3;2;4", "novelty": "2;2;3", "presentation": "4;3;4", "wc_summary": "111;64;101", "wc_strengths": "85;52;142", "wc_weaknesses": "275;233;106", "wc_questions": "4;29;73", "wc_limitations": "1;13;1", "wc_review": "476;391;423", "wc_reply_reviewers": "153;76;22", "wc_reply_authors": "136;25;11", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 92.0, 20.215505600075073 ], "wc_strengths_avg": [ 93.0, 37.17526059088221 ], "wc_weaknesses_avg": [ 204.66666666666666, 71.84396673037726 ], "wc_questions_avg": [ 35.333333333333336, 28.522895287041877 ], "wc_limitations_avg": [ 5.0, 5.656854249492381 ], "wc_review_avg": [ 430.0, 35.05234181430203 ], "wc_reply_reviewers_avg": [ 83.66666666666667, 53.754586367635234 ], "wc_reply_authors_avg": [ 57.333333333333336, 55.918591621113706 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.9819805060619659, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:cWt8KkOfED4J:scholar.google.com/&scioq=Towards+Croppable+Implicit+Neural+Representations&hl=en&as_sdt=0,14", "gs_version_total": 4, "email": "bgu.ac.il;bgu.ac.il", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Ben Gurion University of the Negev;Ben-Gurion University of the Negev", "aff_unique_dep": ";", "aff_unique_url": "https://www.bgu.ac.il;https://www.bgu.ac.il", "aff_unique_abbr": "BGU;BGU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Israel" }, { "title": "EGODE: An Event-attended Graph ODE Framework for Modeling Rigid Dynamics", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93931", "id": "js5vZtyoIQ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=js5vZtyoIQ", "openreview": "https://openreview.net/forum?id=js5vZtyoIQ", "poster": "/media/PosterPDFs/NeurIPS%202024/93931.png?t=1731732383.2924132", "project": "", "author_site": "Jingyang Yuan, Gongbo Sun, Zhiping Xiao, Hang Zhou, Xiao Luo, Junyu Luo, Yusheng Zhao, Wei Ju, Ming Zhang", "tldr": "", "abstract": "This paper studies the problem of rigid dynamics modeling, which has a wide range of applications in robotics, graphics, and mechanical design. The problem is partly solved by graph neural network (GNN) simulators. However, these approaches cannot effectively handle the relationship between intrinsic continuity and instantaneous changes in rigid dynamics. Moreover, they usually neglect hierarchical structures across mesh nodes and objects in systems. In this paper, we propose a novel approach named Event-attend Graph ODE (EGODE) for effective rigid dynamics modeling. In particular, we describe the rigid system using both mesh node representations and object representations. To model continuous dynamics across hierarchical structures, we use a coupled graph ODE framework for the evolution of both types of representations over a long period. In addition, to capture instantaneous changes during the collision, we introduce an event module, which can effectively estimate the occurrence of the collision and update the states of both mesh node and object representations during evolution. Extensive experiments on a range of benchmark datasets validate the superiority of the proposed EGODE compared to various state-of-the-art baselines. The source code can be found at https://github.com/yuanjypku/EGODE.", "keywords": "Keywords: Interacting Dynamical System;Rigid Simulation;Neural ODE;Graph Neural Network", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "", "author": "Jingyang Yuan;Gongbo Sun;Zhiping Xiao;Hang Zhou;Xiao Luo;Junyu Luo;Yusheng Zhao;Wei Ju;Ming Zhang", "authorids": "~Jingyang_Yuan1;~Gongbo_Sun1;~Zhiping_Xiao1;~Hang_Zhou13;~Xiao_Luo3;~Junyu_Luo2;~Yusheng_Zhao1;~Wei_Ju1;~Ming_Zhang5", "gender": "M;M;F;M;M;M;;;F", "homepage": "https://github.com/yuanjypku;https://github.com/qingfengtommy;https://patriciaxiao.github.io/www/;https://hg-zh.github.io;http://luoxiao12.github.io;http://luojunyu.com/;;;https://cs.pku.edu.cn/info/1080/1371.htm", "dblp": "244/7491;;176/5397-1.html;;50/1585-1;198/0850-2;;;73/1844-4", "google_scholar": ";;tF8GQawAAAAJ;hVUydFIAAAAJ;https://scholar.google.com.hk/citations?;https://scholar.google.com/citations?hl=en;;;LbzoQBsAAAAJ", "orcid": ";;0000-0002-8583-4789;0009-0003-9535-6287;;0009-0001-6894-1144;;;0000-0002-9809-3430", "linkedin": ";;zpxiao/;;%E9%9C%84-%E7%BD%97-303548214/;luojunyu/;;;", "or_profile": "~Jingyang_Yuan1;~Gongbo_Sun1;~Zhiping_Xiao1;~Hang_Zhou13;~Xiao_Luo3;~Junyu_Luo2;~Yusheng_Zhao1;~Wei_Ju1;~Ming_Zhang5", "aff": "Peking University;University of Wisconsin - Madison;University of California, Los Angeles;University of California, Davis;University of California, Los Angeles;Sensetime;;;Peking University", "aff_domain": "pku.edu.cn;uwm.edu;cs.ucla.edu;ucdavis.edu;cs.ucla.edu;sensetime.com;;;pku.edu.cn", "position": "PhD student;Undergrad student;PhD student;Postdoc;Postdoc;Intern;;;Full Professor", "bibtex": "@inproceedings{\nyuan2024egode,\ntitle={{EGODE}: An Event-attended Graph {ODE} Framework for Modeling Rigid Dynamics},\nauthor={Jingyang Yuan and Gongbo Sun and Zhiping Xiao and Hang Zhou and Xiao Luo and Junyu Luo and Yusheng Zhao and Wei Ju and Ming Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=js5vZtyoIQ}\n}", "github": "", "reviewers": "KaXG;ZzdH;rwMo;uFcU;Py8V", "pdf_size": 3874366, "rating": "5;6;6;7;7", "confidence": "3;3;3;2;2", "soundness": "2;3;3;3;3", "novelty": "2;2;3;3;3", "presentation": "2;3;3;3;3", "wc_summary": "59;95;56;91;88", "wc_strengths": "90;56;57;106;61", "wc_weaknesses": "551;68;242;36;73", "wc_questions": "2;56;12;96;56", "wc_limitations": "2;7;5;1;41", "wc_review": "704;282;372;330;319", "wc_reply_reviewers": "293;119;13;24;13", "wc_reply_authors": "228;808;38;239;0", "reply_reviewers": "1;2;1;1;1", "reply_authors": "2;3;2;3;1", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 2.6, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 77.8, 16.749925372968082 ], "wc_strengths_avg": [ 74.0, 20.307634032550418 ], "wc_weaknesses_avg": [ 194.0, 192.47545298037357 ], "wc_questions_avg": [ 44.4, 33.997646977401246 ], "wc_limitations_avg": [ 11.2, 15.051910177781423 ], "wc_review_avg": [ 401.4, 153.99688308534041 ], "wc_reply_reviewers_avg": [ 92.4, 107.92145291831461 ], "wc_reply_authors_avg": [ 262.6, 289.3507214437178 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 2.2, 0.7483314773547882 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.8728715609439696, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4920163669633710051&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": "pku.edu.cn;uwm.edu;cs.ucla.edu;ucdavis.edu;cs.ucla.edu;sensetime.com;;;pku.edu.cn", "author_num": 9, "aff_unique_index": "0;1;2;3;2;4;0", "aff_unique_norm": "Peking University;University of Wisconsin-Madison;University of California, Los Angeles;University of California, Davis;SenseTime", "aff_unique_dep": ";;;;", "aff_unique_url": "http://www.pku.edu.cn;https://www.wisc.edu;https://www.ucla.edu;https://www.ucdavis.edu;https://www.sensetime.com", "aff_unique_abbr": "Peking U;UW-Madison;UCLA;UC Davis;SenseTime", "aff_campus_unique_index": "1;2;3;2", "aff_campus_unique": ";Madison;Los Angeles;Davis", "aff_country_unique_index": "0;1;1;1;1;0;0", "aff_country_unique": "China;United States" }, { "title": "RFLPA: A Robust Federated Learning Framework against Poisoning Attacks with Secure Aggregation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93930", "id": "js74ZCddxG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=js74ZCddxG", "openreview": "https://openreview.net/forum?id=js74ZCddxG", "poster": "/media/PosterPDFs/NeurIPS%202024/93930.png?t=1731329130.4689102", "project": "", "author_site": "Peihua Mai, Ran Yan, Yan Pang", "tldr": "", "abstract": "Federated learning (FL) allows multiple devices to train a model collaboratively without sharing their data. Despite its benefits, FL is vulnerable to privacy leakage and poisoning attacks. To address the privacy concern, secure aggregation (SecAgg) is often used to obtain the aggregation of gradients on sever without inspecting individual user updates. Unfortunately, existing defense strategies against poisoning attacks rely on the analysis of local updates in plaintext, making them incompatible with SecAgg. To reconcile the conflicts, we propose a robust federated learning framework against poisoning attacks (RFLPA) based on SecAgg protocol. Our framework computes the cosine similarity between local updates and server updates to conduct robust aggregation. Furthermore, we leverage verifiable packed Shamir secret sharing to achieve reduced communication cost of $O(M+N)$ per user, and design a novel dot product aggregation algorithm to resolve the issue of increased information leakage. Our experimental results show that RFLPA significantly reduces communication and computation overhead by over $75\\%$ compared to the state-of-the-art secret sharing method, BREA, while maintaining competitive accuracy.", "keywords": "federated learning;poisoning attack;privacy protection", "primary_area": "privacy", "supplementary_material": "", "author": "Peihua Mai;Ran Yan;Yan Pang", "authorids": "~Peihua_Mai1;~Ran_Yan3;~Yan_Pang1", "gender": "F;F;M", "homepage": ";;https://discovery.nus.edu.sg/10509-yan-james-pang", "dblp": "331/2673;;", "google_scholar": "ZtDss4cAAAAJ;;", "orcid": "0000-0002-5851-2290;0009-0009-3405-5361;", "linkedin": ";;", "or_profile": "~Peihua_Mai1;~Ran_Yan3;~Yan_Pang1", "aff": "National University of Singapore;;National University of Singapore", "aff_domain": "nus.edu;;nus.edu.sg", "position": "PhD student;;Full Professor", "bibtex": "@inproceedings{\nmai2024rflpa,\ntitle={{RFLPA}: A Robust Federated Learning Framework against Poisoning Attacks with Secure Aggregation},\nauthor={Peihua Mai and Ran Yan and Yan Pang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=js74ZCddxG}\n}", "github": "", "reviewers": "VWie;aBFW;WU56;4mMR", "pdf_size": 1906964, "rating": "5;5;6;7", "confidence": "4;4;4;4", "soundness": "3;3;3;4", "novelty": "2;2;2;3", "presentation": "3;3;3;2", "wc_summary": "44;75;66;87", "wc_strengths": "76;58;31;48", "wc_weaknesses": "358;104;66;43", "wc_questions": "3;24;97;53", "wc_limitations": "1;7;1;4", "wc_review": "482;268;261;235", "wc_reply_reviewers": "700;16;16;28", "wc_reply_authors": "1797;49;57;48", "reply_reviewers": "3;1;1;1", "reply_authors": "5;2;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 68.0, 15.732132722552274 ], "wc_strengths_avg": [ 53.25, 16.29992331270304 ], "wc_weaknesses_avg": [ 142.75, 126.16928112658802 ], "wc_questions_avg": [ 44.25, 35.25177300505607 ], "wc_limitations_avg": [ 3.25, 2.48746859276655 ], "wc_review_avg": [ 311.5, 99.20307454912877 ], "wc_reply_reviewers_avg": [ 190.0, 294.4893886033926 ], "wc_reply_authors_avg": [ 487.75, 755.9038877397046 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15991410091909303403&as_sdt=5,24&sciodt=0,24&hl=en", "gs_version_total": 5, "email": "nus.edu;;nus.edu.sg", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "National University of Singapore", "aff_unique_dep": "", "aff_unique_url": "https://www.nus.edu.sg", "aff_unique_abbr": "NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Singapore" }, { "title": "Dual Prototype Evolving for Test-Time Generalization of Vision-Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93929", "id": "jsgYYXaSiS", "proceeding": "", "pdf": "https://openreview.net/pdf?id=jsgYYXaSiS", "openreview": "https://openreview.net/forum?id=jsgYYXaSiS", "poster": "/media/PosterPDFs/NeurIPS%202024/93929.png?t=1733349097.132244", "project": "", "author_site": "Ce Zhang, Simon Stepputtis, Katia Sycara, Yaqi Xie", "tldr": "", "abstract": "Test-time adaptation, which enables models to generalize to diverse data with unlabeled test samples, holds significant value in real-world scenarios. Recently, researchers have applied this setting to advanced pre-trained vision-language models (VLMs), developing approaches such as test-time prompt tuning to further extend their practical applicability. However, these methods typically focus solely on adapting VLMs from a single modality and fail to accumulate task-specific knowledge as more samples are processed. To address this, we introduce Dual Prototype Evolving (DPE), a novel test-time adaptation approach for VLMs that effectively accumulates task-specific knowledge from multi-modalities. Specifically, we create and evolve two sets of prototypes\u2014textual and visual\u2014to progressively capture more accurate multi-modal representations for target classes during test time. Moreover, to promote consistent multi-modal representations, we introduce and optimize learnable residuals for each test sample to align the prototypes from both modalities. Extensive experimental results on 15 benchmark datasets demonstrate that our proposed DPE consistently outperforms previous state-of-the-art methods while also exhibiting competitive computational efficiency.", "keywords": "Test-Time Adaptation;Vision-Language Models;CLIP;Transfer Learning", "primary_area": "machine_vision", "supplementary_material": "", "author": "Ce Zhang;Simon Stepputtis;Katia P. Sycara;Yaqi Xie", "authorids": "~Ce_Zhang5;~Simon_Stepputtis1;~Katia_P._Sycara1;~Yaqi_Xie1", "gender": "M;;F;F", "homepage": "https://zhangce01.github.io/;https://simonstepputtis.com/;;https://yaqi-xie.me/", "dblp": "97/919-9;192/7092;s/KatiaPSycara;237/8691", "google_scholar": "_z-zaU0AAAAJ;WUQgzsAAAAAJ;VWv6a9kAAAAJ;lBCCo0EAAAAJ", "orcid": "0000-0001-6789-0130;0009-0003-0519-3454;;0009-0005-0458-9419", "linkedin": "ce-zhang-64263a275/;simon-stepputtis/;;yaqi-xie/", "or_profile": "~Ce_Zhang5;~Simon_Stepputtis1;~Katia_P._Sycara1;~Yaqi_Xie1", "aff": "Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "cmu.edu;cmu.edu;cmu.edu;cmu.edu", "position": "MS student;Postdoc;Full Professor;Postdoc", "bibtex": "@inproceedings{\nzhang2024dual,\ntitle={Dual Prototype Evolving for Test-Time Generalization of Vision-Language Models},\nauthor={Ce Zhang and Simon Stepputtis and Katia P. Sycara and Yaqi Xie},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=jsgYYXaSiS}\n}", "github": "", "reviewers": "owvr;Sqqz;vqJd;SVvN", "pdf_size": 3564043, "rating": "5;5;6;7", "confidence": "4;5;5;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;4;3", "wc_summary": "83;53;48;75", "wc_strengths": "27;89;25;100", "wc_weaknesses": "173;185;120;83", "wc_questions": "43;129;2;70", "wc_limitations": "1;35;1;67", "wc_review": "327;491;196;395", "wc_reply_reviewers": "55;107;16;98", "wc_reply_authors": "44;720;20;24", "reply_reviewers": "1;2;1;1", "reply_authors": "2;3;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 64.75, 14.635146053251399 ], "wc_strengths_avg": [ 60.25, 34.47734763580284 ], "wc_weaknesses_avg": [ 140.25, 41.117970523847596 ], "wc_questions_avg": [ 61.0, 46.12483062299525 ], "wc_limitations_avg": [ 26.0, 27.440845468024488 ], "wc_review_avg": [ 352.25, 107.39035105632163 ], "wc_reply_reviewers_avg": [ 69.0, 36.36619309193636 ], "wc_reply_authors_avg": [ 202.0, 299.20561492057595 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17681331167303109940&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 5, "email": "cmu.edu;cmu.edu;cmu.edu;cmu.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "RL in Latent MDPs is Tractable: Online Guarantees via Off-Policy Evaluation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93928", "id": "juJl2uSq4D", "proceeding": "", "pdf": "https://openreview.net/pdf?id=juJl2uSq4D", "openreview": "https://openreview.net/forum?id=juJl2uSq4D", "poster": "", "project": "", "author_site": "Jeongyeol Kwon, Shie Mannor, Constantine Caramanis, Yonathan Efroni", "tldr": "", "abstract": "In many real-world decision problems there is partially observed, hidden or latent information that remains fixed throughout an interaction. \nSuch decision problems can be modeled as Latent Markov Decision Processes (LMDPs), where a latent variable is selected at the beginning of an interaction and is not disclosed to the agent initially. \nIn last decade, there has been significant progress in designing learning algorithms for solving LMDPs under different structural assumptions. However, for general LMDPs, there is no known learning algorithm that provably matches the existing lower bound. We effectively resolve this open question, introducing the first sample-efficient algorithm for LMDPs without *any additional structural assumptions*. \nOur result builds off a new perspective on the role off-policy evaluation guarantees and coverage coefficient in LMDPs, a perspective, which has been overlooked in the context of exploration in partially observed environments. Specifically, we establish a novel off-policy evaluation lemma and introduce a new coverage coefficient for LMDPs. Then, we show how these can be used to derive near-optimal guarantees of an optimistic exploration algorithm. \nThese results, we believe, can be valuable for a wide range of interactive learning problems beyond the LMDP class, and especially, for partially observed environments.", "keywords": "Latent Markov Decision Process;Online Exploration;Off-Policy Evaluation;Partially Observed Systems;Sample Complexity;Information Theory", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Jeongyeol Kwon;Shie Mannor;Constantine Caramanis;Yonathan Efroni", "authorids": "~Jeongyeol_Kwon1;~Shie_Mannor2;~Constantine_Caramanis1;~Yonathan_Efroni2", "gender": "M;M;M;M", "homepage": "https://kwonchungli.github.io/;https://shie.net.technion.ac.il;http://users.ece.utexas.edu/~cmcaram/constantine_caramanis/Home.html;https://sites.google.com/view/yonathan-efroni/", "dblp": "https://dblp.uni-trier.de/pid/228/9224;20/1669;96/5760;215/3475", "google_scholar": "cnyMCYMAAAAJ;https://scholar.google.com.tw/citations?user=q1HlbIUAAAAJ;47YTUrEAAAAJ;pfTInEgAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Jeongyeol_Kwon1;~Shie_Mannor2;~Constantine_Caramanis1;~Yonathan_Efroni2", "aff": "University of Wisconsin - Madison;Technion - Israel Institute of Technology, Technion;University of Texas, Austin;Meta", "aff_domain": "wisc.edu;technion.il;utexas.edu;meta.com", "position": "Postdoc;Full Professor;Full Professor;Researcher", "bibtex": "@inproceedings{\nkwon2024rl,\ntitle={{RL} in Latent {MDP}s is Tractable: Online Guarantees via Off-Policy Evaluation},\nauthor={Jeongyeol Kwon and Shie Mannor and Constantine Caramanis and Yonathan Efroni},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=juJl2uSq4D}\n}", "github": "", "reviewers": "8u1Y;Ch5D;jHm3;AN8Q", "pdf_size": 711531, "rating": "3;6;6;8", "confidence": "3;3;3;3", "soundness": "3;3;3;4", "novelty": "1;3;2;4", "presentation": "2;3;3;3", "wc_summary": "48;165;99;95", "wc_strengths": "18;54;63;35", "wc_weaknesses": "261;125;103;9", "wc_questions": "6;78;2;9", "wc_limitations": "6;11;1;11", "wc_review": "339;433;268;159", "wc_reply_reviewers": "0;11;47;10", "wc_reply_authors": "0;5;23;7", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 5.75, 1.7853571071357126 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 1.118033988749895 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 101.75, 41.66158302321217 ], "wc_strengths_avg": [ 42.5, 17.38533865071371 ], "wc_weaknesses_avg": [ 124.5, 90.04859799019638 ], "wc_questions_avg": [ 23.75, 31.419540098480116 ], "wc_limitations_avg": [ 7.25, 4.14578098794425 ], "wc_review_avg": [ 299.75, 100.14333477571036 ], "wc_reply_reviewers_avg": [ 17.0, 17.84656829757475 ], "wc_reply_authors_avg": [ 8.75, 8.613216588476108 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1857800086047917010&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "wisc.edu;technion.il;utexas.edu;meta.com", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "University of Wisconsin-Madison;Technion - Israel Institute of Technology;University of Texas at Austin;Meta", "aff_unique_dep": ";;;Meta Platforms, Inc.", "aff_unique_url": "https://www.wisc.edu;https://www.technion.ac.il;https://www.utexas.edu;https://meta.com", "aff_unique_abbr": "UW-Madison;Technion;UT Austin;Meta", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Madison;;Austin", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;Israel" }, { "id": "jwE1dgOox1", "title": "Node-Level Topological Representation Learning on Point Clouds", "track": "main", "status": "Reject", "tldr": "", "abstract": "Topological Data Analysis (TDA) allows us to extract powerful topological, and higher-order information on the global shape of a data set or point cloud. Tools like Persistent Homology or the Euler Transform give a single complex description of the global structure of the point cloud. However, common machine learning applications like classification require point-level information and features to be available. In this paper, we bridge this gap and propose a novel method to extract node-level topological features from complex point clouds using discrete variants of concepts from algebraic topology and differential geometry. We verify the effectiveness of these topological point features (TOPF) on both synthetic and real-world data and study their robustness under noise.", "keywords": "Topological Data Analysis;TDA;Hodge Laplacian;Higher-Order Networks;Simplicial Complexes;Algebraic Topology;Differential Geometry;Point Clouds;Persistent Homology", "primary_area": "other", "supplementary_material": "", "author": "Vincent Peter Grande;Michael T Schaub", "authorids": "~Vincent_Peter_Grande1;~Michael_T_Schaub1", "gender": "M;", "homepage": "https://vincent-grande.github.io;https://michaelschaub.github.io/", "dblp": "344/1170;72/10263", "google_scholar": "48g4gIIAAAAJ#%20your%20Google%20Scholar%20ID;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0003-2426-6404", "linkedin": ";", "or_profile": "~Vincent_Peter_Grande1;~Michael_T_Schaub1", "aff": "Rheinisch Westf\u00e4lische Technische Hochschule Aachen;RWTH Aachen University", "aff_domain": "rwth-aachen.de;rwth-aachen.de", "position": "PhD student;Assistant Professor", "bibtex": "@misc{\nanonymous2024nodelevel,\ntitle={Node-Level Topological Representation Learning on Point Clouds},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=jwE1dgOox1}\n}", "github": "", "project": "", "reviewers": "Zmis;gsQy;qf14;mNMA", "site": "https://openreview.net/forum?id=jwE1dgOox1", "pdf_size": 14682728, "rating": "5;6;6;6", "confidence": "4;4;4;2", "soundness": "3;2;4;2", "novelty": "2;2;3;1", "presentation": "3;3;3;3", "wc_summary": "115;97;57;90", "wc_strengths": "63;24;43;101", "wc_weaknesses": "303;157;33;129", "wc_questions": "86;12;11;35", "wc_limitations": "6;6;13;12", "wc_review": "573;296;157;367", "wc_reply_reviewers": "292;22;0;221", "wc_reply_authors": "1178;0;148;600", "reply_reviewers": "2;1;0;1", "reply_authors": "3;1;2;3", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 89.75, 20.99255820523073 ], "wc_strengths_avg": [ 57.75, 28.525208149985513 ], "wc_weaknesses_avg": [ 155.5, 96.78197146163122 ], "wc_questions_avg": [ 36.0, 30.422031490352513 ], "wc_limitations_avg": [ 9.25, 3.2691742076555053 ], "wc_review_avg": [ 348.25, 150.14222424088436 ], "wc_reply_reviewers_avg": [ 133.75, 125.5316195227322 ], "wc_reply_authors_avg": [ 481.5, 458.86245215750654 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9595900054186840673&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0", "aff_unique_norm": "RWTH Aachen University", "aff_unique_dep": "", "aff_unique_url": "https://www.rwth-aachen.de", "aff_unique_abbr": "RWTH", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Aachen", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "title": "Regularizing Hidden States Enables Learning Generalizable Reward Model for LLMs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93927", "id": "jwh9MHEfmY", "proceeding": "", "pdf": "https://openreview.net/pdf?id=jwh9MHEfmY", "openreview": "https://openreview.net/forum?id=jwh9MHEfmY", "poster": "/media/PosterPDFs/NeurIPS%202024/93927.png?t=1731220200.1638646", "project": "", "author_site": "Rui Yang, Ruomeng Ding, Yong Lin, Huan Zhang, Tong Zhang", "tldr": "", "abstract": "Reward models trained on human preference data have been proven to effectively align Large Language Models (LLMs) with human intent within the framework of reinforcement learning from human feedback (RLHF). However, current reward models have limited generalization capabilities to unseen prompts and responses, which can lead to an unexpected phenomenon known as reward over-optimization, resulting in a decline in actual performance due to excessive optimization of rewards. While previous research has advocated for constraining policy optimization, our study introduces a novel approach to enhance the reward model's generalization ability against distribution shifts by regularizing the hidden states. Specifically, we retain the base model's language model head and incorporate a suite of text-generation losses to preserve the hidden states' text-generation capabilities, while concurrently learning a reward head behind the same hidden states. Our experimental results demonstrate that the introduced regularization technique markedly improves the accuracy of learned reward models across a variety of out-of-distribution (OOD) tasks and effectively alleviates the over-optimization issue in RLHF, offering a more reliable and robust preference learning paradigm.", "keywords": "Large Language Model;Reward Model;Out-of-distribution Generalization;Alignment", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Rui Yang;Ruomeng Ding;Yong Lin;Huan Zhang;Tong Zhang", "authorids": "~Rui_Yang8;~Ruomeng_Ding1;~Yong_Lin2;~Huan_Zhang1;~Tong_Zhang2", "gender": "M;F;;M;M", "homepage": "https://yangrui2015.github.io;;;http://huan-zhang.com;http://tongzhang-ml.org", "dblp": "92/1942-10;;;23/1797-1.html;07/4227-1", "google_scholar": "QHSUy3MAAAAJ;https://scholar.google.com/citations?hl=en;;LTa3GzEAAAAJ;LurWtuYAAAAJ", "orcid": "0000-0003-3525-1726;;;;0000-0002-5511-2558", "linkedin": ";;;;", "or_profile": "~Rui_Yang8;~Ruomeng_Ding1;~Yong_Lin2;~Huan_Zhang1;~Tong_Zhang2", "aff": "Hong Kong University of Science and Technology;Georgia Institute of Technology;;University of Illinois, Urbana Champaign;UIUC", "aff_domain": "ust.hk;gatech.edu;;uiuc.edu;illinois.edu", "position": "PhD student;MS student;;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nyang2024regularizing,\ntitle={Regularizing Hidden States Enables Learning Generalizable Reward Model for {LLM}s},\nauthor={Rui Yang and Ruomeng Ding and Yong Lin and Huan Zhang and Tong Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=jwh9MHEfmY}\n}", "github": "", "reviewers": "tSHJ;SDk2;VYhZ;NAqK", "pdf_size": 772774, "rating": "4;5;7;7", "confidence": "4;5;3;4", "soundness": "3;3;3;3", "novelty": "2;3;4;3", "presentation": "3;3;3;3", "wc_summary": "68;25;117;68", "wc_strengths": "17;33;34;112", "wc_weaknesses": "139;46;34;73", "wc_questions": "11;82;277;1", "wc_limitations": "1;60;6;7", "wc_review": "236;246;468;261", "wc_reply_reviewers": "0;48;9;48", "wc_reply_authors": "239;108;31;35", "reply_reviewers": "0;2;1;1", "reply_authors": "3;3;2;2", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 69.5, 32.56148031032987 ], "wc_strengths_avg": [ 49.0, 36.99324262618783 ], "wc_weaknesses_avg": [ 73.0, 40.6386515524322 ], "wc_questions_avg": [ 92.75, 110.86562812702591 ], "wc_limitations_avg": [ 18.5, 24.06761309311748 ], "wc_review_avg": [ 302.75, 95.82112241045812 ], "wc_reply_reviewers_avg": [ 26.25, 21.98152633462927 ], "wc_reply_authors_avg": [ 103.25, 84.1557336133433 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5443310539518174, "gs_citation": 40, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4583452762644537432&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 3, "email": "ust.hk;gatech.edu;;uiuc.edu;illinois.edu", "author_num": 5, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "Hong Kong University of Science and Technology;Georgia Institute of Technology;University of Illinois Urbana-Champaign", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ust.hk;https://www.gatech.edu;https://illinois.edu", "aff_unique_abbr": "HKUST;Georgia Tech;UIUC", "aff_campus_unique_index": "0;2;2", "aff_campus_unique": "Hong Kong SAR;;Urbana-Champaign", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "China;United States" }, { "title": "A New Multi-Source Light Detection Benchmark and Semi-Supervised Focal Light Detection", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97541", "id": "jz2CTTCABH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=jz2CTTCABH", "openreview": "https://openreview.net/forum?id=jz2CTTCABH", "poster": "/media/PosterPDFs/NeurIPS%202024/97541.png?t=1733301140.9720614", "project": "", "author_site": "Jae-Yong Baek, Yong-Sang Yoo, Seung-Hwan Bae", "tldr": "", "abstract": "This paper addresses a multi-source light detection (LD) problem from vehicles, traffic signals, and streetlights under driving scenarios. Albeit it is crucial for autonomous driving and night vision, this problem has not been yet focused on as much as other object detection (OD). One of the main reasons is the absence of a public available LD benchmark dataset. Therefore, we construct a new large LD dataset consisting of different light sources via heavy annotation: YouTube Driving Light Detection dataset (YDLD). Compared to the existing LD datasets, our dataset has much more images and box annotations for multi-source lights. We also provide rigorous statistical analysis and transfer learning comparison of other well-known detection benchmark datasets to prove the generality of our YDLD.\n\nFor the recent object detectors, we achieve the extensive comparison results on YDLD. However, they tend to yield the low mAP scores due to the intrinsic challenges of LD caused by very tiny size and similar appearance. To resolve those, we design a novel lightness focal loss which penalizes miss-classified samples more and a lightness spatial attention prior by reflecting a global scene context. In addition, we develop a semi-supervised focal light detection (SS-FLD) by embedding our lightness focal loss into the semi-supervised object detection (SSOD). We prove that our methods can consistently boost mAP to the variety of types of recent detectors on YDLD. We will open both YDLD and SS-FLD code at https://github.com/YDLD-dataset/YDLD.", "keywords": "Multi-source light detection;Lightness focal loss;Light detection", "primary_area": "", "supplementary_material": "/attachment/d83b9716df9b84da3d16abf5c1a6c754a854bb4c.zip", "author": "Jae-Yong Baek;Yong-Sang Yoo;Seung-Hwan Bae", "authorids": "~Jae-Yong_Baek4;~Yong-Sang_Yoo1;~Seung-Hwan_Bae1", "gender": "M;M;M", "homepage": ";;https://cvl.inha.ac.kr/", "dblp": "240/6943;240/6988;211/6993", "google_scholar": "https://scholar.google.co.kr/citations?user=0yV2qjgAAAAJ;MgTJ6r4AAAAJ;https://scholar.google.co.kr/citations?user=Jd2Jp0wAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Jae-Yong_Baek4;~Yong-Sang_Yoo1;~Seung-Hwan_Bae1", "aff": "Inha University;Inha University;Inha University", "aff_domain": "inha.ac.kr;inha.ac.kr;inha.ac.kr", "position": "PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nbaek2024a,\ntitle={A New Multi-Source Light Detection Benchmark and Semi-Supervised Focal Light Detection},\nauthor={Jae-Yong Baek and Yong-Sang Yoo and Seung-Hwan Bae},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=jz2CTTCABH}\n}", "github": "", "reviewers": "zKq3;CuTW;RwvL", "pdf_size": 13540293, "rating": "6;7;8", "confidence": "4;4;4", "wc_summary_and_contributions": "76;56;82", "wc_strengths": "66;26;59", "wc_improvement": "209;22;30", "wc_limitations": "16;33;38", "wc_correctness": "1;1;38", "wc_clarity": "1;1;60", "wc_relation_to_prior_work": "1;27;14", "wc_documentation": "1;18;34", "wc_additional_feedback": "1;1;1", "wc_review": "372;185;356", "wc_reply_reviewers": "59;0;0", "wc_reply_authors": "220;0;0", "reply_reviewers": "2;0;0", "reply_authors": "4;7;1", "rating_avg": [ 7.0, 0.816496580927726 ], "confidence_avg": [ 4.0, 0.0 ], "wc_summary_and_contributions_avg": [ 71.33333333333333, 11.115554667022044 ], "wc_strengths_avg": [ 50.333333333333336, 17.441967269268172 ], "wc_improvement_avg": [ 87.0, 86.3288287113098 ], "wc_limitations_avg": [ 29.0, 9.41629792788369 ], "wc_correctness_avg": [ 13.333333333333334, 17.441967269268172 ], "wc_clarity_avg": [ 20.666666666666668, 27.812866726670865 ], "wc_relation_to_prior_work_avg": [ 14.0, 10.614455552060438 ], "wc_documentation_avg": [ 17.666666666666668, 13.474255287605157 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 304.3333333333333, 84.63385190861212 ], "wc_reply_reviewers_avg": [ 19.666666666666668, 27.812866726670865 ], "wc_reply_authors_avg": [ 73.33333333333333, 103.70899457402697 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.9428090415820634 ], "reply_authors_avg": [ 4.0, 2.449489742783178 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:FkUIBp7nKRAJ:scholar.google.com/&scioq=A+New+Multi-Source+Light+Detection+Benchmark+and+Semi-Supervised+Focal+Light+Detection&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "inha.ac.kr;inha.ac.kr;inha.ac.kr", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Inha University", "aff_unique_dep": "", "aff_unique_url": "https://www.inha.edu/", "aff_unique_abbr": "Inha", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "DRIP: Unleashing Diffusion Priors for Joint Foreground and Alpha Prediction in Image Matting", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93926", "id": "jz5ZMeN9He", "proceeding": "", "pdf": "https://openreview.net/pdf?id=jz5ZMeN9He", "openreview": "https://openreview.net/forum?id=jz5ZMeN9He", "poster": "", "project": "", "author_site": "Xiaodi Li, Zongxin Yang, Ruijie Quan, Yi Yang", "tldr": "", "abstract": "Recovering the foreground color and opacity/alpha matte from a single image (i.e., image matting) is a challenging and ill-posed problem where data priors play a critical role in achieving precise results. Traditional methods generally predict the alpha matte and then extract the foreground through post-processing, often failing to produce high-fidelity foreground color. This failure stems from the models' difficulty in learning robust color predictions from limited matting datasets. To address this, we explore the potential of leveraging vision priors embedded in pre-trained latent diffusion models (LDM) for estimating foreground RGBA values in challenging scenarios and rare objects. We introduce Drip, a novel approach for image matting that harnesses the rich prior knowledge of LDM models. Our method incorporates a switcher and a cross-domain attention mechanism to extend the original LDM for joint prediction of the foreground color and opacity. This setup facilitates mutual information exchange and ensures high consistency across both modalities. To mitigate the inherent reconstruction errors of the LDM's VAE decoder, we propose a latent transparency decoder to align the RGBA prediction with the input image, thereby reducing discrepancies. Comprehensive experimental results demonstrate that our approach achieves state-of-the-art performance in foreground and alpha predictions and shows remarkable generalizability across various benchmarks.", "keywords": "Image Matting;Diffusion Model;Foreground Estimation", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Xiaodi Li;Zongxin Yang;Ruijie Quan;Yi Yang", "authorids": "~Xiaodi_Li2;~Zongxin_Yang1;~Ruijie_Quan1;~Yi_Yang22", "gender": ";M;M;M", "homepage": ";https://z-x-yang.github.io/;;https://person.zju.edu.cn/yiyang", "dblp": ";;238/0204;33/4854-1.html", "google_scholar": ";8IE0CfwAAAAJ;WKLRPsAAAAAJ;RMSuNFwAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Xiaodi_Li2;~Zongxin_Yang1;~Ruijie_Quan1;~Yi_Yang22", "aff": ";Zhejiang University;Zhejiang University;Zhejiang University", "aff_domain": ";zju.edu.cn;zju.edu.cn;zju.edu.cn", "position": ";Postdoc;Postdoc;Full Professor", "bibtex": "@inproceedings{\nli2024drip,\ntitle={{DRIP}: Unleashing Diffusion Priors for Joint Foreground and Alpha Prediction in Image Matting},\nauthor={Xiaodi Li and Zongxin Yang and Ruijie Quan and Yi Yang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=jz5ZMeN9He}\n}", "github": "", "reviewers": "L9RY;TjXY;YFFw;8UU8", "pdf_size": 18478810, "rating": "4;5;5;7", "confidence": "4;4;4;5", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "3;3;3;4", "wc_summary": "53;74;93;65", "wc_strengths": "8;96;32;97", "wc_weaknesses": "58;131;116;172", "wc_questions": "1;60;95;2", "wc_limitations": "6;20;39;1", "wc_review": "126;381;375;337", "wc_reply_reviewers": "0;62;0;63", "wc_reply_authors": "0;133;82;52", "reply_reviewers": "0;1;0;1", "reply_authors": "1;2;2;2", "rating_avg": [ 5.25, 1.0897247358851685 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 71.25, 14.600941750448838 ], "wc_strengths_avg": [ 58.25, 39.18146883413127 ], "wc_weaknesses_avg": [ 119.25, 40.874044331335746 ], "wc_questions_avg": [ 39.5, 39.96561021678513 ], "wc_limitations_avg": [ 16.5, 14.739402972983676 ], "wc_review_avg": [ 304.75, 104.5714468676799 ], "wc_reply_reviewers_avg": [ 31.25, 31.251999936004097 ], "wc_reply_authors_avg": [ 66.75, 48.204641892664235 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.9271726499455306, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=929864353646161335&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": ";zju.edu.cn;zju.edu.cn;zju.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Zhejiang University", "aff_unique_dep": "", "aff_unique_url": "https://www.zju.edu.cn", "aff_unique_abbr": "ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Efficient multi-prompt evaluation of LLMs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93925", "id": "jzkpwcj200", "proceeding": "", "pdf": "https://openreview.net/pdf?id=jzkpwcj200", "openreview": "https://openreview.net/forum?id=jzkpwcj200", "poster": "", "project": "", "author_site": "Felipe Maia Polo, Ronald Xu, Lucas Weber, M\u00edrian Silva, Onkar Bhardwaj, Leshem Choshen, Allysson de Oliveira, Yuekai Sun, Mikhail Yurochkin", "tldr": "", "abstract": "Most popular benchmarks for comparing LLMs rely on a limited set of prompt templates, which may not fully capture the LLMs\u2019 abilities and can affect the reproducibility of results on leaderboards. Many recent works empirically verify prompt sensitivity and advocate for changes in LLM evaluation. In this paper, we consider the problem of estimating the performance distribution across many prompt variants instead of finding a single prompt to evaluate with. We introduce PromptEval, a method for estimating performance across a large set of prompts borrowing strength across prompts and examples to produce accurate estimates under practical evaluation budgets. The resulting distribution can be used to obtain performance quantiles to construct various robust performance metrics (e.g., top 95% quantile or median). We prove that PromptEval consistently estimates the performance distribution and demonstrate its efficacy empirically on three prominent LLM benchmarks: MMLU, BIG-bench Hard, and LMentry; for example, PromptEval can accurately estimate performance quantiles across 100 prompt templates on MMLU with a budget equivalent to two single-prompt evaluations. Moreover, we show how PromptEval can be useful in LLM-as-a-judge and best prompt identification applications.", "keywords": "llm;multi-prompt evaluation;efficient evaluation;evaluation", "primary_area": "evaluation", "supplementary_material": "", "author": "Felipe Maia Polo;Ronald Xu;Lucas Weber;M\u00edrian Silva;Onkar Bhardwaj;Leshem Choshen;Allysson Flavio Melo de Oliveira;Yuekai Sun;Mikhail Yurochkin", "authorids": "~Felipe_Maia_Polo1;~Ronald_Xu1;~Lucas_Weber1;~M\u00edrian_Silva1;~Onkar_Bhardwaj1;~Leshem_Choshen1;~Allysson_Flavio_Melo_de_Oliveira1;~Yuekai_Sun1;~Mikhail_Yurochkin1", "gender": "M;M;M;F;;Not Specified;M;;M", "homepage": "https://felipemaiapolo.github.io/;;https://lucweber.github.io/;https://mirianfsilva.github.io/;;https://ktilana.wixsite.com/leshem-choshen;;https://yuekai.github.io/;https://moonfolk.github.io/", "dblp": "261/9581;;;;;218/5237;;;191/6719", "google_scholar": "CJbgmnkAAAAJ;cAIyTFAAAAAJ;LGnAvXkAAAAJ;6vP2segAAAAJ;;https://scholar.google.com/citations?hl=en;;6T1XtW8AAAAJ;QjBF9sUAAAAJ", "orcid": "0000-0002-4950-2795;;;0009-0006-1950-4120;;0000-0002-0085-6496;;;", "linkedin": ";;lucasweber0/;mirianfsilva/;;leshemchoshen/;allysson-oliveira/;;mikhail-yurochkin-a45659114/", "or_profile": "~Felipe_Maia_Polo1;~Ronald_Xu1;~Lucas_Weber1;~M\u00edrian_Silva1;~Onkar_Bhardwaj1;~Leshem_Choshen1;~Allysson_Flavio_Melo_de_Oliveira1;~Yuekai_Sun1;~Mikhail_Yurochkin1", "aff": "University of Michigan - Ann Arbor;Massachusetts Institute of Technology;Universitat Pompeu Fabra;Universidade Federal de Minas Gerais, Universidade Federal de Minas Gerais;;International Business Machines;International Business Machines;University of Michigan - Ann Arbor;IBM Research", "aff_domain": "umich.edu;mit.edu;upf.es;dcc.ufmg.br;;ibm.com;ibm.com;umich.edu;ibm.com", "position": "PhD student;Undergrad student;PhD student;MS student;;Researcher;Researcher;Assistant \u2192 Associate Professor of Statistics;Researcher", "bibtex": "@inproceedings{\npolo2024efficient,\ntitle={Efficient multi-prompt evaluation of {LLM}s},\nauthor={Felipe Maia Polo and Ronald Xu and Lucas Weber and M{\\'\\i}rian Silva and Onkar Bhardwaj and Leshem Choshen and Allysson Flavio Melo de Oliveira and Yuekai Sun and Mikhail Yurochkin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=jzkpwcj200}\n}", "github": "", "reviewers": "hMGK;PhKA;emjz", "pdf_size": 864320, "rating": "5;6;8", "confidence": "3;4;1", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "2;3;3", "wc_summary": "53;30;25", "wc_strengths": "39;64;32", "wc_weaknesses": "56;132;1", "wc_questions": "38;7;1", "wc_limitations": "15;9;1", "wc_review": "201;242;60", "wc_reply_reviewers": "30;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;0;0", "reply_authors": "1;1;1", "rating_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 2.6666666666666665, 1.247219128924647 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 36.0, 12.192894105447921 ], "wc_strengths_avg": [ 45.0, 13.73559851869101 ], "wc_weaknesses_avg": [ 63.0, 53.70909296075169 ], "wc_questions_avg": [ 15.333333333333334, 16.21384867602041 ], "wc_limitations_avg": [ 8.333333333333334, 5.734883511361751 ], "wc_review_avg": [ 167.66666666666666, 77.95012650549211 ], "wc_reply_reviewers_avg": [ 10.0, 14.142135623730951 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.7857142857142859, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17365611858523186794&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 6, "email": "umich.edu;mit.edu;upf.es;dcc.ufmg.br;;ibm.com;ibm.com;umich.edu;ibm.com", "author_num": 9, "aff_unique_index": "0;1;2;3;4;4;0;5", "aff_unique_norm": "University of Michigan;Massachusetts Institute of Technology;Universitat Pompeu Fabra;Universidade Federal de Minas Gerais;International Business Machines Corporation;IBM", "aff_unique_dep": ";;;;;IBM Research", "aff_unique_url": "https://www.umich.edu;https://web.mit.edu;https://www.upf.edu/;https://www.ufmg.br;https://www.ibm.com;https://www.ibm.com/research", "aff_unique_abbr": "UM;MIT;UPF;UFMG;IBM;IBM", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Ann Arbor;", "aff_country_unique_index": "0;0;1;2;0;0;0;0", "aff_country_unique": "United States;Spain;Brazil" }, { "title": "Solving Minimum-Cost Reach Avoid using Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93924", "id": "jzngdJQ2lY", "proceeding": "", "pdf": "https://openreview.net/pdf?id=jzngdJQ2lY", "openreview": "https://openreview.net/forum?id=jzngdJQ2lY", "poster": "/media/PosterPDFs/NeurIPS%202024/93924.png?t=1731660267.3989549", "project": "", "author_site": "Oswin So, Cheng Ge, Chuchu Fan", "tldr": "", "abstract": "Current reinforcement-learning methods are unable to directly learn policies that solve the minimum cost reach-avoid problem to minimize cumulative costs subject to the constraints of reaching the goal and avoiding unsafe states, as the structure of this new optimization problem is incompatible with current methods. Instead, a surrogate problem is solved where all objectives are combined with a weighted sum. However, this surrogate objective results in suboptimal policies that do not directly minimize the cumulative cost. In this work, we propose RC-PPO, a reinforcement-learning-based method for solving the minimum-cost reach-avoid problem by using connections to Hamilton-Jacobi reachability. Empirical results demonstrate that RC-PPO learns policies with comparable goal-reaching rates to while achieving up to 57% lower cumulative costs compared to existing methods on a suite of minimum-cost reach-avoid benchmarks on the Mujoco simulator. The project page can be found at https://oswinso.xyz/rcppo.", "keywords": "Reinforcement Learning;Optimal Control;Reachability Analysis", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/c131b5a4d8faabfd95b6491e9d829f51f5403cca.zip", "author": "Oswin So;Cheng Ge;Chuchu Fan", "authorids": "~Oswin_So1;~Cheng_Ge4;~Chuchu_Fan2", "gender": "M;M;F", "homepage": "https://oswinso.xyz;https://github.com/mepear;https://chuchu.mit.edu", "dblp": "274/3208;;127/1756", "google_scholar": "AwlxGQgAAAAJ;;J-dq_8EAAAAJ", "orcid": "0000-0002-5411-3663;;", "linkedin": "oswinso/;;chuchu-fan/", "or_profile": "~Oswin_So1;~Cheng_Ge4;~Chuchu_Fan2", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;mit.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nso2024solving,\ntitle={Solving Minimum-Cost Reach Avoid using Reinforcement Learning},\nauthor={Oswin So and Cheng Ge and Chuchu Fan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=jzngdJQ2lY}\n}", "github": "", "reviewers": "186A;M8co;zsEG", "pdf_size": 4656505, "rating": "5;6;7", "confidence": "3;4;3", "soundness": "2;2;3", "novelty": "2;2;3", "presentation": "3;2;3", "wc_summary": "63;85;66", "wc_strengths": "11;37;93", "wc_weaknesses": "110;247;66", "wc_questions": "66;187;36", "wc_limitations": "44;3;4", "wc_review": "294;559;265", "wc_reply_reviewers": "234;175;28", "wc_reply_authors": "1064;867;0", "reply_reviewers": "2;2;1", "reply_authors": "3;3;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 71.33333333333333, 9.741092797468305 ], "wc_strengths_avg": [ 47.0, 34.215006454283575 ], "wc_weaknesses_avg": [ 141.0, 77.07572034477957 ], "wc_questions_avg": [ 96.33333333333333, 65.27037783115877 ], "wc_limitations_avg": [ 17.0, 19.096247449870006 ], "wc_review_avg": [ 372.6666666666667, 132.28840547161425 ], "wc_reply_reviewers_avg": [ 145.66666666666666, 86.61921778040302 ], "wc_reply_authors_avg": [ 643.6666666666666, 462.19211974627564 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.9428090415820634 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4496835855757736250&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "mit.edu;mit.edu;mit.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "k0qTnbQxzR", "title": "CogCoM: Train Large Vision-Language Models Diving into Details through Chain of Manipulations", "track": "main", "status": "Reject", "tldr": "", "abstract": "Vision-Language Models (VLMs) have demonstrated their broad effectiveness thanks to extensive training in aligning visual instructions to responses. However, such training of conclusive alignment leads models to ignore essential visual reasoning, further resulting in failures in meticulous visual problems and unfaithful responses. Drawing inspiration from human cognition in solving visual problems (e.g., marking, zoom in), this paper introduces Chain of Manipulations, a mechanism that enables VLMs to solve problems step-by-step with evidence. After training, models can solve various visual problems by eliciting intrinsic manipulations (e.g., grounding, zoom in) and results (e.g., boxes, image) actively without involving external tools, while also allowing users to trace error causes. We study the roadmap to implement this mechanism, including (1) a flexible design of manipulations upon extensive analysis, (2) an efficient automated data generation pipeline, (3) a compatible VLM architecture capable of multi-turn multi-image, and (4) a model training process for versatile capabilities. With the design, we also manually annotate 6K high-quality samples for the challenging graphical mathematical problems. Our trained model, CogCoM, equipped with this mechanism with 17B parameters achieves state-of-the-art performance across 9 benchmarks from 4 categories, demonstrating the effectiveness while preserving the interpretability. Our code, model weights, and collected data will be publicly available.", "keywords": "Vision-Language Models;Multimodal Large Language Models;Visual Reasoning;Chain of Manipulations", "primary_area": "machine_vision", "supplementary_material": "", "author": "Ji Qi;Ming Ding;Weihan Wang;Yushi Bai;Qingsong Lv;Wenyi Hong;Bin Xu;Lei Hou;Juanzi Li;Yuxiao Dong;Jie Tang", "authorids": "~Ji_Qi3;~Ming_Ding1;~Weihan_Wang2;~Yushi_Bai1;~Qingsong_Lv4;~Wenyi_Hong1;~Bin_Xu1;~Lei_Hou2;~Juanzi_Li1;~Yuxiao_Dong1;~Jie_Tang1", "gender": "M;M;M;M;;F;M;M;;M;", "homepage": "https://qijimrc.github.io;;https://github.com/mactavish91;https://bys0318.github.io/;;;http://keg.cs.tsinghua.edu.cn/persons/xubin/;https://www.cs.tsinghua.edu.cn/csen/info/1305/4466.htm;;https://keg.cs.tsinghua.edu.cn/yuxiao/;", "dblp": ";48/3462-4;;302/4421;;216/5413;;32/5685-1;;17/9267;", "google_scholar": "vv6bZjMAAAAJ;Va50YzkAAAAJ;https://scholar.google.com.hk/citations?hl=zh-CN;https://scholar.google.com/citations?hl=zh-CN;tJNCPogAAAAJ;JSEzrlwAAAAJ;https://scholar.google.com.tw/citations?user=h-BY2wgAAAAJ;YnIq4hsAAAAJ;;https://scholar.google.com.hk/citations?hl=en;", "orcid": ";;;;;;0000-0003-3040-4391;0000-0002-8907-3526;;0000-0002-6092-2002;", "linkedin": ";;;;;wenyi-hong;;;;;", "or_profile": "~Ji_Qi3;~Ming_Ding1;~Weihan_Wang2;~Yushi_Bai1;~Qingsong_Lv4;~Wenyi_Hong1;~Bin_Xu1;~Lei_Hou2;~Juanzi_Li1;~Yuxiao_Dong1;~Jie_Tang1", "aff": "Tsinghua University;ZHIPU AI;Tsinghua University;Tsinghua University;Tsinghua University;Department of Computer Science and Technology, Tsinghua University;Department of Computer Science, Tsinghua University;Tsinghua University;;Tsinghua University;", "aff_domain": "tsinghua.edu.cn;zhipuai.cn;mails.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;mails.tsinghua.edu.cn;cs.tsinghuae.du.cn;tsinghua.edu.cn;;tsinghua.edu.cn;", "position": "PhD student;Principal Researcher;MS student;PhD student;MS student;PhD student;Full Professor;Assistant Professor;;Associate Professor;", "bibtex": "@misc{\nanonymous2024cogcom,\ntitle={CogCoM: Train Large Vision-Language Models Diving into Details through\u000b Chain of Manipulations},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=k0qTnbQxzR}\n}", "github": "", "project": "", "reviewers": "jU6F;GCaR;XZTe", "site": "https://openreview.net/forum?id=k0qTnbQxzR", "pdf_size": 6488335, "rating": "4;5;6", "confidence": "4;4;5", "soundness": "2;3;2", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "48;85;90", "wc_strengths": "50;125;25", "wc_weaknesses": "198;131;179", "wc_questions": "2;4;60", "wc_limitations": "2;4;5", "wc_review": "300;349;359", "wc_reply_reviewers": "0;0;79", "wc_reply_authors": "90;28;334", "reply_reviewers": "0;0;2", "reply_authors": "3;2;3", "rating_avg": [ 5.0, 0.816496580927726 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 74.33333333333333, 18.732028424302822 ], "wc_strengths_avg": [ 66.66666666666667, 42.49182927993987 ], "wc_weaknesses_avg": [ 169.33333333333334, 28.193773938387338 ], "wc_questions_avg": [ 22.0, 26.8824602048746 ], "wc_limitations_avg": [ 3.6666666666666665, 1.247219128924647 ], "wc_review_avg": [ 336.0, 25.78113005022601 ], "wc_reply_reviewers_avg": [ 26.333333333333332, 37.2409571424915 ], "wc_reply_authors_avg": [ 150.66666666666666, 132.08414826247025 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.9428090415820634 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3152660253766448416&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;0;0;0;0;0;0;0", "aff_unique_norm": "Tsinghua University;Zhipu AI", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.zhipu.ai", "aff_unique_abbr": "THU;ZHIPU AI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Multi-Label Open Set Recognition", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93923", "id": "k1VrxRS6WZ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=k1VrxRS6WZ", "openreview": "https://openreview.net/forum?id=k1VrxRS6WZ", "poster": "/media/PosterPDFs/NeurIPS%202024/93923.png?t=1732191671.7177472", "project": "", "author_site": "Yibo Wang, Jun-Yi Hang, Min-Ling Zhang", "tldr": "", "abstract": "In multi-label learning, each training instance is associated with multiple labels simultaneously. Traditional multi-label learning studies primarily focus on closed set scenario, i.e. the class label set of test data is identical to those used in training phase. Nevertheless, in numerous real-world scenarios, the environment is open and dynamic where unknown labels may emerge gradually during testing. In this paper, the problem of multi-label open set recognition (MLOSR) is investigated, which poses significant challenges in classifying and recognizing instances with unknown labels in multi-label setting. To enable open set multi-label prediction, a novel approach named SLAN is proposed by leveraging sub-labeling information enriched by structural information in the feature space. Accordingly, unknown labels are recognized by differentiating the sub-labeling information from holistic supervision. Experimental results on various datasets validate the effectiveness of the proposed approach in dealing with the MLOSR problem.", "keywords": "machine learning;multi-label learning;open set recognition", "primary_area": "other", "supplementary_material": "", "author": "Yibo Wang;Jun-Yi Hang;Min-Ling Zhang", "authorids": "~Yibo_Wang3;~Jun-Yi_Hang1;~Min-Ling_Zhang2", "gender": "M;M;M", "homepage": "http://palm.seu.edu.cn/homepage/wangyibo/demo/index.html;;http://palm.seu.edu.cn/zhangml/", "dblp": ";https://dblp.uni-trier.de/pid/299/4577;84/271.html", "google_scholar": ";https://scholar.google.com.hk/citations?user=s-4VLP0AAAAJ;uFHCIM0AAAAJ", "orcid": ";;0000-0003-1880-5918", "linkedin": ";;", "or_profile": "~Yibo_Wang3;~Jun-Yi_Hang1;~Min-Ling_Zhang2", "aff": "Southeast University;Southeast University;Southeast University", "aff_domain": "seu.edu.cn;seu.edu.cn;seu.edu.cn", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nwang2024multilabel,\ntitle={Multi-Label Open Set Recognition},\nauthor={Yibo Wang and Jun-Yi Hang and Min-Ling Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=k1VrxRS6WZ}\n}", "github": "", "reviewers": "wpso;zY3w;vnjs", "pdf_size": 622077, "rating": "5;6;7", "confidence": "4;4;5", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "3;3;4", "wc_summary": "54;98;51", "wc_strengths": "51;153;102", "wc_weaknesses": "80;127;118", "wc_questions": "88;95;24", "wc_limitations": "7;1;1", "wc_review": "280;474;296", "wc_reply_reviewers": "13;0;0", "wc_reply_authors": "99;46;46", "reply_reviewers": "1;0;0", "reply_authors": "3;2;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 67.66666666666667, 21.483844059096022 ], "wc_strengths_avg": [ 102.0, 41.641325627314025 ], "wc_weaknesses_avg": [ 108.33333333333333, 20.368821489936252 ], "wc_questions_avg": [ 69.0, 31.94787421201396 ], "wc_limitations_avg": [ 3.0, 2.8284271247461903 ], "wc_review_avg": [ 350.0, 87.92420978699022 ], "wc_reply_reviewers_avg": [ 4.333333333333333, 6.128258770283412 ], "wc_reply_authors_avg": [ 63.666666666666664, 24.98443960192468 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:n7Go_J6CaswJ:scholar.google.com/&scioq=Multi-Label+Open+Set+Recognition&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "seu.edu.cn;seu.edu.cn;seu.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Southeast University", "aff_unique_dep": "", "aff_unique_url": "https://www.seu.edu.cn/", "aff_unique_abbr": "SEU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Physically Compatible 3D Object Modeling from a Single Image", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93922", "id": "k29Iv0XrBF", "proceeding": "", "pdf": "https://openreview.net/pdf?id=k29Iv0XrBF", "openreview": "https://openreview.net/forum?id=k29Iv0XrBF", "poster": "/media/PosterPDFs/NeurIPS%202024/93922.png?t=1731721132.7191567", "project": "", "author_site": "Minghao Guo, Bohan Wang, Pingchuan Ma, Tianyuan Zhang, Crystal Owens, Chuang Gan, Josh Tenenbaum, Kaiming He, Wojciech Matusik", "tldr": "", "abstract": "We present a computational framework that transforms single images into 3D physical objects. The visual geometry of a physical object in an image is determined by three orthogonal attributes: mechanical properties, external forces, and rest-shape geometry. Existing single-view 3D reconstruction methods often overlook this underlying composition, presuming rigidity or neglecting external forces. Consequently, the reconstructed objects fail to withstand real-world physical forces, resulting in instability or undesirable deformation -- diverging from their intended designs as depicted in the image. Our optimization framework addresses this by embedding physical compatibility into the reconstruction process. We explicitly decompose the three physical attributes and link them through static equilibrium, which serves as a hard constraint, ensuring that the optimized physical shapes exhibit desired physical behaviors. Evaluations on a dataset collected from Objaverse demonstrate that our framework consistently enhances the physical realism of 3D models over existing methods. The utility of our framework extends to practical applications in dynamic simulations and 3D printing, where adherence to physical compatibility is paramount.", "keywords": "single-image modeling;physical compatibility", "primary_area": "machine_vision", "supplementary_material": "/attachment/322115a3d0d994fc79dbb38fc527c4c6761c7eea.zip", "author": "Minghao Guo;Bohan Wang;Pingchuan Ma;Tianyuan Zhang;Crystal Elaine Owens;Chuang Gan;Joshua B. Tenenbaum;Kaiming He;Wojciech Matusik", "authorids": "~Minghao_Guo1;~Bohan_Wang9;~Pingchuan_Ma3;~Tianyuan_Zhang2;~Crystal_Elaine_Owens1;~Chuang_Gan1;~Joshua_B._Tenenbaum1;~Kaiming_He2;~Wojciech_Matusik2", "gender": "M;M;M;;F;M;;;M", "homepage": "https://www.minghaoguo.com/;https://bohanwang123.com;https://people.csail.mit.edu/pcma;;https://www.crystalowens.com/;http://people.csail.mit.edu/ganchuang/;;;https://cdfg.mit.edu/wojciech", "dblp": "145/0008/;;215/4446-2;;;139/6993;t/JoshuaBTenenbaum;;", "google_scholar": "Hq2unJcAAAAJ;;EtCZmkwAAAAJ;;LvkKN_oAAAAJ;PTeSCbIAAAAJ;;;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0003-1439-1455;;;0000-0002-2433-7025;;;;0000-0003-0212-5643", "linkedin": ";;;;crystaleowens/;;;;wojciech-matusik-67238126/", "or_profile": "~Minghao_Guo1;~Bohan_Wang9;~Pingchuan_Ma3;~Tianyuan_Zhang2;~Crystal_Elaine_Owens1;~Chuang_Gan1;~Joshua_B._Tenenbaum1;~Kaiming_He2;~Wojciech_Matusik2", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;;Massachusetts Institute of Technology;University of Massachusetts at Amherst;Massachusetts Institute of Technology;;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;mit.edu;;mit.edu;umass.edu;mit.edu;;mit.edu", "position": "PhD student;Postdoc;PhD student;;Postdoc;Assistant Professor;Professor;;Full Professor", "bibtex": "@inproceedings{\nguo2024physically,\ntitle={Physically Compatible 3D Object Modeling from a Single Image},\nauthor={Minghao Guo and Bohan Wang and Pingchuan Ma and Tianyuan Zhang and Crystal Elaine Owens and Chuang Gan and Joshua B. Tenenbaum and Kaiming He and Wojciech Matusik},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=k29Iv0XrBF}\n}", "github": "", "reviewers": "H9KU;91LB;XLVo;cqis", "pdf_size": 17185822, "rating": "5;6;7;8", "confidence": "4;3;4;3", "soundness": "4;4;3;3", "novelty": "4;3;4;3", "presentation": "3;4;4;3", "wc_summary": "59;90;54;103", "wc_strengths": "87;61;28;80", "wc_weaknesses": "66;49;34;33", "wc_questions": "65;48;118;20", "wc_limitations": "11;1;15;10", "wc_review": "288;249;249;246", "wc_reply_reviewers": "58;0;0;0", "wc_reply_authors": "67;0;0;0", "reply_reviewers": "1;0;0;0", "reply_authors": "2;1;1;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 76.5, 20.59732992404598 ], "wc_strengths_avg": [ 64.0, 22.85825890132492 ], "wc_weaknesses_avg": [ 45.5, 13.425721582097552 ], "wc_questions_avg": [ 62.75, 35.716767770894386 ], "wc_limitations_avg": [ 9.25, 5.11737237261468 ], "wc_review_avg": [ 258.0, 17.363755354185336 ], "wc_reply_reviewers_avg": [ 14.5, 25.11473670974872 ], "wc_reply_authors_avg": [ 16.75, 29.011851026778693 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.4472135954999579, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4323901317068019883&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "mit.edu;mit.edu;mit.edu;;mit.edu;umass.edu;mit.edu;;mit.edu", "author_num": 9, "aff_unique_index": "0;0;0;0;1;0;0", "aff_unique_norm": "Massachusetts Institute of Technology;University of Massachusetts Amherst", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.umass.edu", "aff_unique_abbr": "MIT;UMass Amherst", "aff_campus_unique_index": "1", "aff_campus_unique": ";Amherst", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Off-Dynamics Reinforcement Learning via Domain Adaptation and Reward Augmented Imitation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93921", "id": "k2hS5Rt1N0", "proceeding": "", "pdf": "https://openreview.net/pdf?id=k2hS5Rt1N0", "openreview": "https://openreview.net/forum?id=k2hS5Rt1N0", "poster": "/media/PosterPDFs/NeurIPS%202024/93921.png?t=1731733862.833467", "project": "", "author_site": "Yihong Guo, Yixuan Wang, Yuanyuan Shi, Pan Xu, Anqi Liu", "tldr": "", "abstract": "Training a policy in a source domain for deployment in the target domain under a dynamics shift can be challenging, often resulting in performance degradation. Previous work tackles this challenge by training on the source domain with modified rewards derived by matching distributions between the source and the target optimal trajectories. However, pure modified rewards only ensure the behavior of the learned policy in the source domain resembles trajectories produced by the target optimal policies, which does not guarantee optimal performance when the learned policy is actually deployed to the target domain. In this work, we propose to utilize imitation learning to transfer the policy learned from the reward modification to the target domain so that the new policy can generate the same trajectories in the target domain. Our approach, Domain Adaptation and Reward Augmented Imitation Learning (DARAIL), utilizes the reward modification for domain adaptation and follows the general framework of generative adversarial imitation learning from observation (GAIfO) by applying a reward augmented estimator for the policy optimization step. Theoretically, we present an error bound for our method under a mild assumption regarding the dynamics shift to justify the motivation of our method. Empirically, our method outperforms the pure modified reward method without imitation learning and also outperforms other baselines in benchmark off-dynamics environments.", "keywords": "off-dynamics reinforcement learning;domain adaptation;imitation learning;dynamics mismatch", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Yihong Guo;Yixuan Wang;Yuanyuan Shi;Pan Xu;Anqi Liu", "authorids": "~Yihong_Guo4;~Yixuan_Wang7;~Yuanyuan_Shi1;~Pan_Xu1;~Anqi_Liu2", "gender": "M;;;M;F", "homepage": "https://scholar.google.com/citations?user=tr1UChkAAAAJ&hl=en;;;https://panxulab.github.io/;https://anqiliu-ai.github.io/", "dblp": ";;;11/9718-2;", "google_scholar": "tr1UChkAAAAJ;;;UkYBx6YAAAAJ;Q8yp6zQAAAAJ", "orcid": ";;;0000-0002-2559-8622;0000-0002-0468-5698", "linkedin": ";yixuan-wang-b81200293;;pan-xu-0931a2a6/;", "or_profile": "~Yihong_Guo4;~Yixuan_Wang7;~Yuanyuan_Shi1;~Pan_Xu1;~Anqi_Liu2", "aff": "Johns Hopkins University;Johns Hopkins University;;Duke University;University of Illinois, Chicago", "aff_domain": "jh.edu;jh.edu;;duke.edu;uic.edu", "position": "PhD student;MS student;;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nguo2024offdynamics,\ntitle={Off-Dynamics Reinforcement Learning via Domain Adaptation and Reward Augmented Imitation},\nauthor={Yihong Guo and Yixuan Wang and Yuanyuan Shi and Pan Xu and Anqi Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=k2hS5Rt1N0}\n}", "github": "", "reviewers": "JwpF;VFUe;mGy8;LdFt", "pdf_size": 2467210, "rating": "5;5;6;7", "confidence": "3;4;3;3", "soundness": "3;2;3;3", "novelty": "3;2;3;3", "presentation": "2;2;4;2", "wc_summary": "22;175;95;76", "wc_strengths": "55;59;51;37", "wc_weaknesses": "102;998;75;70", "wc_questions": "451;51;15;85", "wc_limitations": "26;50;8;18", "wc_review": "656;1333;244;286", "wc_reply_reviewers": "55;610;0;19", "wc_reply_authors": "20;1005;0;9", "reply_reviewers": "1;1;0;1", "reply_authors": "2;3;1;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 92.0, 54.895354994753426 ], "wc_strengths_avg": [ 50.5, 8.2915619758885 ], "wc_weaknesses_avg": [ 311.25, 396.68209878944623 ], "wc_questions_avg": [ 150.5, 175.25053494925487 ], "wc_limitations_avg": [ 25.5, 15.5161206491829 ], "wc_review_avg": [ 629.75, 436.52512814269926 ], "wc_reply_reviewers_avg": [ 171.0, 254.2252937848632 ], "wc_reply_authors_avg": [ 258.5, 431.0501710938067 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11220087856659629707&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "jh.edu;jh.edu;;duke.edu;uic.edu", "author_num": 5, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Johns Hopkins University;Duke University;University of Illinois at Chicago", "aff_unique_dep": ";;", "aff_unique_url": "https://www.jhu.edu;https://www.duke.edu;https://www.uic.edu", "aff_unique_abbr": "JHU;Duke;UIC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Chicago", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Unveiling the Potential of Robustness in Selecting Conditional Average Treatment Effect Estimators", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93920", "id": "k4EP46Q9X2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=k4EP46Q9X2", "openreview": "https://openreview.net/forum?id=k4EP46Q9X2", "poster": "/media/PosterPDFs/NeurIPS%202024/93920.png?t=1731833026.8172648", "project": "", "author_site": "Yiyan Huang, Cheuk Hang LEUNG, Siyi WANG, YIJUN LI, Qi WU", "tldr": "", "abstract": "The growing demand for personalized decision-making has led to a surge of interest in estimating the Conditional Average Treatment Effect (CATE). Various types of CATE estimators have been developed with advancements in machine learning and causal inference. However, selecting the desirable CATE estimator through a conventional model validation procedure remains impractical due to the absence of counterfactual outcomes in observational data. Existing approaches for CATE estimator selection, such as plug-in and pseudo-outcome metrics, face two challenges. First, they must determine the metric form and the underlying machine learning models for fitting nuisance parameters (e.g., outcome function, propensity function, and plug-in learner). Second, they lack a specific focus on selecting a robust CATE estimator. To address these challenges, this paper introduces a Distributionally Robust Metric (DRM) for CATE estimator selection. The proposed DRM is nuisance-free, eliminating the need to fit models for nuisance parameters, and it effectively prioritizes the selection of a distributionally robust CATE estimator. The experimental results validate the effectiveness of the DRM method in selecting CATE estimators that are robust to the distribution shift incurred by covariate shift and hidden confounders.", "keywords": "Causal inference;Treatment effect;CATE estimator selection;Model Validation;Robustness", "primary_area": "causal_inference", "supplementary_material": "/attachment/0c6112a4515039f4e353318941196115867a1891.zip", "author": "Yiyan HUANG;Cheuk Hang LEUNG;WANG Siyi;YIJUN LI;Qi WU", "authorids": "~Yiyan_HUANG2;~Cheuk_Hang_LEUNG2;~WANG_Siyi1;~YIJUN_LI6;~Qi_WU5", "gender": "F;M;;M;M", "homepage": "https://scholars.cityu.edu.hk/en/persons/siyi-wang(18f03378-7ea9-4fe8-ba6b-b9b069369b6b).html;;;http://www.cityu.edu.hk/stfprofile/qiwu55.htm;", "dblp": ";;;;", "google_scholar": ";lfJisoYAAAAJ;;60AO2VAAAAAJ;", "orcid": ";0000-0001-7237-1378;0000-0002-3911-9055;0000-0002-4028-981X;0000-0003-1268-2208", "linkedin": ";;;qi-wu-27802817;", "or_profile": "~WANG_Siyi1;~YIJUN_LI6;~Cheuk_Hang_Leung1;~Qi_Wu2;~Yiyan_Huang1", "aff": "City University of Hong Kong;City University Hong Kong ;City University of Hong Kong;City University of Hong Kong;Hong Kong Polytechnic University", "aff_domain": "cityu.edu.hk;cityu.edu;cityu.edu.hk;cityu.edu.hk;polyu.edu.hk", "position": "PhD student;PhD student;Research Assistant;Associate Professor, SDSc;Postdoc", "bibtex": "@inproceedings{\nhuang2024unveiling,\ntitle={Unveiling the Potential of Robustness in Selecting Conditional Average Treatment Effect Estimators},\nauthor={Yiyan HUANG and Cheuk Hang LEUNG and WANG Siyi and YIJUN LI and Qi WU},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=k4EP46Q9X2}\n}", "github": "", "reviewers": "e7g2;T6Zv;1tzr;9qbS", "pdf_size": 556017, "rating": "6;6;8;8", "confidence": "3;4;4;4", "soundness": "3;2;4;3", "novelty": "2;2;4;3", "presentation": "3;3;2;3", "wc_summary": "42;127;31;141", "wc_strengths": "41;44;75;246", "wc_weaknesses": "157;249;311;315", "wc_questions": "128;10;29;143", "wc_limitations": "4;17;12;11", "wc_review": "372;447;458;856", "wc_reply_reviewers": "117;365;44;20", "wc_reply_authors": "152;1076;63;48", "reply_reviewers": "1;3;1;1", "reply_authors": "2;4;2;2", "rating_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 85.25, 49.15473018947414 ], "wc_strengths_avg": [ 101.5, 84.48224665573235 ], "wc_weaknesses_avg": [ 258.0, 63.914004725099176 ], "wc_questions_avg": [ 77.5, 58.62806495186414 ], "wc_limitations_avg": [ 11.0, 4.636809247747852 ], "wc_review_avg": [ 533.25, 189.2556141835692 ], "wc_reply_reviewers_avg": [ 136.5, 136.67571108284017 ], "wc_reply_authors_avg": [ 334.75, 429.80307990985824 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:_368WZWEHdkJ:scholar.google.com/&scioq=Unveiling+the+Potential+of+Robustness+in+Selecting+Conditional+Average+Treatment+Effect+Estimators&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "cityu.edu.hk;cityu.edu;cityu.edu.hk;cityu.edu.hk;polyu.edu.hk", "author_num": 5, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "City University of Hong Kong;Hong Kong Polytechnic University", "aff_unique_dep": ";", "aff_unique_url": "https://www.cityu.edu.hk;https://www.polyu.edu.hk", "aff_unique_abbr": "CityU;PolyU", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "MLLMGuard: A Multi-dimensional Safety Evaluation Suite for Multimodal Large Language Models", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97540", "id": "k4tuZmvSnl", "proceeding": "", "pdf": "https://openreview.net/pdf?id=k4tuZmvSnl", "openreview": "https://openreview.net/forum?id=k4tuZmvSnl", "poster": "/media/PosterPDFs/NeurIPS%202024/97540.png?t=1731699901.9420524", "project": "", "author_site": "Tianle Gu, Zeyang Zhou, Kexin Huang, Liang Dandan, Yixu Wang, Haiquan Zhao, Yuanqi Yao, xingge qiao, Keqing wang, Yujiu Yang, Yan Teng, Yu Qiao, Yingchun Wang", "tldr": "", "abstract": "Powered by remarkable advancements in Large Language Models (LLMs), Multimodal Large Language Models (MLLMs) demonstrate impressive capabilities in manifold tasks.\nHowever, the practical application scenarios of MLLMs are intricate, exposing them to potential malicious instructions and thereby posing safety risks.\nWhile current benchmarks do incorporate certain safety considerations, they often lack comprehensive coverage and fail to exhibit the necessary rigor and robustness.\nFor instance, the common practice of employing GPT-4V as both the evaluator and a model to be evaluated lacks credibility, as it tends to exhibit a bias toward its own responses.\nIn this paper, we present MLLMGuard, a multi-dimensional safety evaluation suite for MLLMs, including a bilingual image-text evaluation dataset, inference utilities, and a lightweight evaluator.\nMLLMGuard's assessment comprehensively covers two languages (English and Chinese) and five important safety dimensions (Privacy, Bias, Toxicity, Truthfulness, and Legality), each with corresponding rich subtasks.\nFocusing on these dimensions, our evaluation dataset is primarily sourced from platforms such as social media, and \nit integrates text-based and image-based red teaming techniques with meticulous annotation by human experts.\nThis can prevent inaccurate evaluation caused by data leakage when using open-source datasets and ensures the quality and challenging nature of our benchmark.\nAdditionally, a fully automated lightweight evaluator termed GuardRank is developed, which achieves significantly higher evaluation accuracy than GPT-4.\nOur evaluation results across 13 advanced models indicate that MLLMs still have a substantial journey ahead before they can be considered safe and responsible.", "keywords": "Multimodal Large Language Models;MLLM Safety;Dataset", "primary_area": "", "supplementary_material": "/attachment/dce44c88a1c503c6f05d2a5706faf4ab0422b1b0.zip", "author": "Tianle Gu;Zeyang Zhou;Kexin Huang;Liang Dandan;Yixu Wang;Haiquan Zhao;Yuanqi Yao;xingge qiao;Keqing wang;Yujiu Yang;Yan Teng;Yu Qiao;Yingchun Wang", "authorids": "~Tianle_Gu1;~Zeyang_Zhou3;~Kexin_Huang3;~Liang_Dandan1;~Yixu_Wang1;~Haiquan_Zhao1;~Yuanqi_Yao1;~xingge_qiao1;~Keqing_wang1;~Yujiu_Yang2;~Yan_Teng1;~Yu_Qiao1;~Yingchun_Wang2", "gender": "F;F;;F;;M;F;;;M;F;;", "homepage": "https://carol-gutianle.top;https://honey-lily-f34.notion.site/profile-4adb7c3d552246e0b93de39e7aa8f73c?pvs=4;;;;https://haidequanbu.github.io;https://ceciliayao.github.io/;https://myaccount.google.com/?hl=zh-CN;https://myaccount.google.com/u/1/?hl=zh-CN;https://sites.google.com/view/iigroup-thu;;;", "dblp": "362/4238;;;;;;353/1986;;;30/3847;;;", "google_scholar": "wlW9_7QAAAAJ;;;;EYP7wNIAAAAJ;V9-CrQ0AAAAJ;s482QHoAAAAJ;;;4gH3sxsAAAAJ;;;", "orcid": "0009-0005-1546-8196;;;0009-0002-8320-6153;;0009-0002-8098-403X;0009-0005-3012-9395;;;0000-0002-6427-1024;0000-0002-7069-4728;;", "linkedin": ";;;;;;;;;;;;yingchun-wang-6b178927b/", "or_profile": "~Tianle_Gu1;~Zeyang_Zhou3;~Kexin_Huang3;~Liang_Dandan1;~Yixu_Wang1;~Haiquan_Zhao1;~Yuanqi_Yao1;~xingge_qiao1;~Keqing_wang1;~Yujiu_Yang2;~Yan_Teng1;~Yu_Qiao1;~Yingchun_Wang2", "aff": "Tsinghua University;Fudan University;;Shanghai University;Fudan University;Fudan University;Harbin Institute of Technology;East China University of Science and Technology;East China University of Science and Technology;Tsinghua University;Shanghai Artificial Intelligence Laboratory;;Shanghai Artificial Intelligence Laboratory", "aff_domain": "mails.tsinghua.edu.cn;fudan.edu.cn;;shu.edu.cn;fudan.edu.cn;fudan.edu.cn;hit.edu.cn;ecust.edu.cn;ecust.edu.cn;tsinghua.edu.cn;org.cn;;org.cn", "position": "MS student;Undergrad student;;Undergrad student;PhD student;MS student;MS student;MS student;MS student;Full Professor;Assistant Professor;;Full Professor", "bibtex": "@inproceedings{\ngu2024mllmguard,\ntitle={{MLLMG}uard: A Multi-dimensional Safety Evaluation Suite for Multimodal Large Language Models},\nauthor={Tianle Gu and Zeyang Zhou and Kexin Huang and Liang Dandan and Yixu Wang and Haiquan Zhao and Yuanqi Yao and xingge qiao and Keqing wang and Yujiu Yang and Yan Teng and Yu Qiao and Yingchun Wang},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=k4tuZmvSnl}\n}", "github": "", "reviewers": "41rD;NEZo;GFXL;JUin", "pdf_size": 1146216, "rating": "6;6;6;7", "confidence": "4;2;3;4", "wc_summary_and_contributions": "103;49;109;43", "wc_strengths": "35;44;61;60", "wc_improvement": "153;21;240;19", "wc_limitations": "29;31;20;3", "wc_correctness": "1;2;12;1", "wc_clarity": "1;2;31;1", "wc_relation_to_prior_work": "37;43;16;1", "wc_documentation": "14;14;5;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "374;207;495;130", "wc_reply_reviewers": "20;84;115;13", "wc_reply_authors": "56;56;56;56", "reply_reviewers": "1;1;1;1", "reply_authors": "4;3;4;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "wc_summary_and_contributions_avg": [ 76.0, 30.14962686336267 ], "wc_strengths_avg": [ 50.0, 10.977249200050075 ], "wc_improvement_avg": [ 108.25, 93.45955007381535 ], "wc_limitations_avg": [ 20.75, 11.053845484717073 ], "wc_correctness_avg": [ 4.0, 4.636809247747852 ], "wc_clarity_avg": [ 8.75, 12.852528934026953 ], "wc_relation_to_prior_work_avg": [ 24.25, 16.753730927766508 ], "wc_documentation_avg": [ 8.5, 5.678908345800274 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 301.5, 142.33850498020556 ], "wc_reply_reviewers_avg": [ 58.0, 42.99418565341132 ], "wc_reply_authors_avg": [ 56.0, 0.0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.25, 0.82915619758885 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 13, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2382938533515898959&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "mails.tsinghua.edu.cn;fudan.edu.cn;;shu.edu.cn;fudan.edu.cn;fudan.edu.cn;hit.edu.cn;ecust.edu.cn;ecust.edu.cn;tsinghua.edu.cn;org.cn;;org.cn", "author_num": 13, "aff_unique_index": "0;1;2;1;1;3;4;4;0;5;5", "aff_unique_norm": "Tsinghua University;Fudan University;Shanghai University;Harbin Institute of Technology;East China University of Science and Technology;Shanghai Artificial Intelligence Laboratory", "aff_unique_dep": ";;;;;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.fudan.edu.cn;https://www.shu.edu.cn;http://www.hit.edu.cn/;http://www.ecust.edu.cn;http://www.shailab.org/", "aff_unique_abbr": "THU;Fudan;SHU;HIT;ECUST;Shanghai AI Lab", "aff_campus_unique_index": "1", "aff_campus_unique": ";Harbin", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Beyond Optimism: Exploration With Partially Observable Rewards", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93919", "id": "k6ZHvF1vkg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=k6ZHvF1vkg", "openreview": "https://openreview.net/forum?id=k6ZHvF1vkg", "poster": "/media/PosterPDFs/NeurIPS%202024/93919.png?t=1731361427.8422883", "project": "", "author_site": "Simone Parisi, Alireza Kazemipour, Michael Bowling", "tldr": "", "abstract": "Exploration in reinforcement learning (RL) remains an open challenge.\nRL algorithms rely on observing rewards to train the agent, and if informative rewards are sparse the agent learns slowly or may not learn at all. \nTo improve exploration and reward discovery, popular algorithms rely on optimism. \nBut what if sometimes rewards are unobservable, e.g., situations of partial monitoring in bandits and the recent formalism of monitored Markov decision process? \nIn this case, optimism can lead to suboptimal behavior that does not explore further to collapse uncertainty.\nWith this paper, we present a novel exploration strategy that overcomes the limitations of existing methods and guarantees convergence to an optimal policy even when rewards are not always observable. \nWe further propose a collection of tabular environments for benchmarking exploration in RL (with and without unobservable rewards) and show that our method outperforms existing ones.", "keywords": "reinforcement learning;partial observability;exploration;successor representations", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Simone Parisi;Alireza Kazemipour;Michael Bowling", "authorids": "~Simone_Parisi1;~Alireza_Kazemipour1;~Michael_Bowling1", "gender": "M;;M", "homepage": "https://sparisi.github.io/;https://alirezakazemipour.github.io;https://webdocs.cs.ualberta.ca/~bowling/", "dblp": "147/4917;;71/5161", "google_scholar": "https://scholar.google.it/citations?user=HWbCBfcAAAAJ;https://scholar.google.ca/citations?hl=en;https://scholar.google.ca/citations?user=PYtPCHoAAAAJ", "orcid": ";;", "linkedin": ";alireza-kazemipour-1b7a511a0;", "or_profile": "~Simone_Parisi1;~Alireza_Kazemipour1;~Michael_Bowling1", "aff": "University of Alberta;University of Alberta;Department of Computing Science, University of Alberta", "aff_domain": "ualberta.ca;ualberta.ca;cs.ualberta.ca", "position": "Postdoc;MS student;Full Professor", "bibtex": "@inproceedings{\nparisi2024beyond,\ntitle={Beyond Optimism: Exploration With Partially Observable Rewards},\nauthor={Simone Parisi and Alireza Kazemipour and Michael Bowling},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=k6ZHvF1vkg}\n}", "github": "", "reviewers": "Y2Vd;zrqd;ajhV;DPys", "pdf_size": 35929119, "rating": "6;6;6;6", "confidence": "3;3;5;4", "soundness": "3;2;3;3", "novelty": "2;3;3;2", "presentation": "3;3;3;4", "wc_summary": "99;129;57;97", "wc_strengths": "49;60;26;159", "wc_weaknesses": "405;100;273;415", "wc_questions": "4;98;117;38", "wc_limitations": "5;11;16;16", "wc_review": "562;398;489;725", "wc_reply_reviewers": "15;121;184;179", "wc_reply_authors": "0;281;691;0", "reply_reviewers": "1;1;2;1", "reply_authors": "1;2;2;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 95.5, 25.588083163847973 ], "wc_strengths_avg": [ 73.5, 50.865017448144066 ], "wc_weaknesses_avg": [ 298.25, 127.44287936169678 ], "wc_questions_avg": [ 64.25, 45.38928838393482 ], "wc_limitations_avg": [ 12.0, 4.527692569068709 ], "wc_review_avg": [ 543.5, 119.81756966321758 ], "wc_reply_reviewers_avg": [ 124.75, 68.03078347336594 ], "wc_reply_authors_avg": [ 243.0, 282.95140925607706 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:moTJPbC_3uQJ:scholar.google.com/&scioq=Beyond+Optimism:+Exploration+With+Partially+Observable+Rewards&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "ualberta.ca;ualberta.ca;cs.ualberta.ca", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Alberta", "aff_unique_dep": "", "aff_unique_url": "https://www.ualberta.ca", "aff_unique_abbr": "UAlberta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Canada" }, { "title": "To Believe or Not to Believe Your LLM: Iterative Prompting for Estimating Epistemic Uncertainty", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93918", "id": "k6iyUfwdI9", "proceeding": "", "pdf": "https://openreview.net/pdf?id=k6iyUfwdI9", "openreview": "https://openreview.net/forum?id=k6iyUfwdI9", "poster": "", "project": "", "author_site": "Yasin Abbasi Yadkori, Ilja Kuzborskij, Andr\u00e1s Gy\u00f6rgy, Csaba Szepesvari", "tldr": "", "abstract": "We explore uncertainty quantification in large language models (LLMs), with the goal to identify when uncertainty in responses given a query is large. We simultaneously consider both epistemic and aleatoric uncertainties, where the former comes from the lack of knowledge about the ground truth (such as about facts or the language), and the latter comes from irreducible randomness (such as multiple possible answers). In particular, we derive an information-theoretic metric that allows to reliably detect when only epistemic uncertainty is large, in which case the output of the model is unreliable. This condition can be computed based solely on the output of the model obtained simply by some special iterative prompting based on the previous responses. Such quantification, for instance, allows to detect hallucinations (cases when epistemic uncertainty is high) in both single- and multi-answer responses. This is in contrast to many standard uncertainty quantification strategies (such as thresholding the log-likelihood of a response) where hallucinations in the multi-answer case cannot be detected. We conduct a series of experiments which demonstrate the advantage of our formulation. Further, our investigations shed some light on how the probabilities assigned to a given output by an LLM can be amplified by iterative prompting, which might be of independent interest.", "keywords": "uncertainty quantification;large language models", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Yasin Abbasi-Yadkori;Ilja Kuzborskij;Andr\u00e1s Gy\u00f6rgy;Csaba Szepesvari", "authorids": "~Yasin_Abbasi-Yadkori1;~Ilja_Kuzborskij1;~Andr\u00e1s_Gy\u00f6rgy2;~Csaba_Szepesvari1", "gender": ";M;M;", "homepage": "https://yasin-abbasi.github.io;https://iljaku.github.io/;https://sites.ualberta.ca/~szepesva/;http://www.cs.bme.hu/~gya", "dblp": "https://dblp.org/pers/a/Abbasi=Yadkori:Yasin.html;135/4924.html;http://dblp.uni-trier.de/pers/hd/s/Szepesv=aacute=ri:Csaba;72/251-1", "google_scholar": "https://scholar.google.com.au/citations?user=-D0EgMIAAAAJ;4Io_CtIAAAAJ;https://scholar.google.ca/citations?user=zvC19mQAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;0000-0003-0586-4337", "linkedin": ";;csaba-szepesvari-09376b1?trk=hp-identity-name;", "or_profile": "~Yasin_Abbasi-Yadkori1;~Ilja_Kuzborskij1;~Csaba_Szepesvari1;~Andras_Gyorgy1", "aff": "Google DeepMind;Google DeepMind;Google DeepMind;Google DeepMind", "aff_domain": "deepmind.com;deepmind.com;google.com;deepmind.com", "position": "Researcher;Research Scientist;Research Scientist;Research Scientist", "bibtex": "@inproceedings{\nabbasi-yadkori2024to,\ntitle={To Believe or Not to Believe Your {LLM}: IterativePrompting for Estimating Epistemic Uncertainty},\nauthor={Yasin Abbasi-Yadkori and Ilja Kuzborskij and Andr{\\'a}s Gy{\\\"o}rgy and Csaba Szepesvari},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=k6iyUfwdI9}\n}", "github": "", "reviewers": "DdzV;1raE;DRXq;zm61", "pdf_size": 1628633, "rating": "3;5;6;7", "confidence": "4;4;4;3", "soundness": "1;3;3;4", "novelty": "2;3;2;3", "presentation": "1;2;3;4", "wc_summary": "103;46;72;44", "wc_strengths": "20;45;51;34", "wc_weaknesses": "103;148;223;146", "wc_questions": "271;417;2;1", "wc_limitations": "10;9;3;10", "wc_review": "507;665;351;235", "wc_reply_reviewers": "37;74;145;0", "wc_reply_authors": "46;14;62;0", "reply_reviewers": "1;1;2;0", "reply_authors": "2;2;2;1", "rating_avg": [ 5.25, 1.479019945774904 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 1.0897247358851685 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 66.25, 23.920441049445557 ], "wc_strengths_avg": [ 37.5, 11.800423721205947 ], "wc_weaknesses_avg": [ 155.0, 43.17985641476822 ], "wc_questions_avg": [ 172.75, 178.86080481760112 ], "wc_limitations_avg": [ 8.0, 2.9154759474226504 ], "wc_review_avg": [ 439.5, 162.06403055582692 ], "wc_reply_reviewers_avg": [ 64.0, 53.58637886627534 ], "wc_reply_authors_avg": [ 30.5, 24.672859582950654 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.6831300510639732, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6661822518513758023&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 2, "email": "deepmind.com;deepmind.com;google.com;deepmind.com", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google DeepMind", "aff_unique_url": "https://deepmind.com", "aff_unique_abbr": "DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "IllumiNeRF: 3D Relighting Without Inverse Rendering", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93917", "id": "k6m3y6qnSj", "proceeding": "", "pdf": "https://openreview.net/pdf?id=k6m3y6qnSj", "openreview": "https://openreview.net/forum?id=k6m3y6qnSj", "poster": "/media/PosterPDFs/NeurIPS%202024/93917.png?t=1731713098.2230718", "project": "", "author_site": "Xiaoming Zhao, Pratul Srinivasan, Dor Verbin, Keunhong Park, Ricardo Martin Brualla, Philipp Henzler", "tldr": "", "abstract": "Existing methods for relightable view synthesis --- using a set of images of an object under unknown lighting to recover a 3D representation that can be rendered from novel viewpoints under a target illumination --- are based on inverse rendering, and attempt to disentangle the object geometry, materials, and lighting that explain the input images. Furthermore, this typically involves optimization through differentiable Monte Carlo rendering, which is brittle and computationally-expensive. In this work, we propose a simpler approach: we first relight each input image using an image diffusion model conditioned on target environment lighting and estimated object geometry. We then reconstruct a Neural Radiance Field (NeRF) with these relit images, from which we render novel views under the target lighting. We demonstrate that this strategy is surprisingly competitive and achieves state-of-the-art results on multiple relighting benchmarks. Please see our project page at [illuminerf.github.io](illuminerf.github.io).", "keywords": "3D Relighting; NeRF; Diffusion Model", "primary_area": "machine_vision", "supplementary_material": "", "author": "Xiaoming Zhao;Pratul P. Srinivasan;Dor Verbin;Keunhong Park;Ricardo Martin Brualla;Philipp Henzler", "authorids": "~Xiaoming_Zhao1;~Pratul_P._Srinivasan1;~Dor_Verbin1;~Keunhong_Park1;~Ricardo_Martin_Brualla1;~Philipp_Henzler1", "gender": ";M;;;M;M", "homepage": "https://xiaoming-zhao.com/;http://dorverbin.github.io;https://keunhong.com;http://ricardomartinbrualla.com;https://henzler.github.io/;https://pratulsrinivasan.github.io/", "dblp": "64/3046-1;188/5667;227/2601;16/7968;179/4982;169/4719", "google_scholar": "tDyRAbkAAAAJ;r2CioPsAAAAJ;;9F59OCYAAAAJ;H181jygAAAAJ;aYyDsZ0AAAAJ", "orcid": ";;;0000-0003-3247-9522;;", "linkedin": ";;;;;", "or_profile": "~Xiaoming_Zhao1;~Dor_Verbin1;~Keunhong_Park1;~Ricardo_Martin_Brualla1;~Philipp_Henzler1;~Pratul_Srinivasan1", "aff": "University of Illinois Urbana Champaign;Google;Google;Google;Google;Google DeepMind", "aff_domain": "illinois.edu;google.com;google.com;google.com;google.com;google.com", "position": "PhD student;Researcher;Researcher;Researcher;Researcher;Research Scientist", "bibtex": "@inproceedings{\nzhao2024illuminerf,\ntitle={IllumiNe{RF}: 3D Relighting Without Inverse Rendering},\nauthor={Xiaoming Zhao and Pratul P. Srinivasan and Dor Verbin and Keunhong Park and Ricardo Martin Brualla and Philipp Henzler},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=k6m3y6qnSj}\n}", "github": "", "reviewers": "q4ex;hgwb;Ma7D;mZi1", "pdf_size": 43053124, "rating": "4;5;5;7", "confidence": "5;4;4;4", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "4;4;4;4", "wc_summary": "122;125;106;49", "wc_strengths": "181;125;42;58", "wc_weaknesses": "340;416;93;67", "wc_questions": "175;50;15;52", "wc_limitations": "1;30;1;5", "wc_review": "819;746;257;231", "wc_reply_reviewers": "633;77;0;0", "wc_reply_authors": "575;99;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "2;2;1;1", "rating_avg": [ 5.25, 1.0897247358851685 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 4.0, 0.0 ], "wc_summary_avg": [ 100.5, 30.598202561588483 ], "wc_strengths_avg": [ 101.5, 55.46395225729952 ], "wc_weaknesses_avg": [ 229.0, 151.68223363334283 ], "wc_questions_avg": [ 73.0, 60.700082372267005 ], "wc_limitations_avg": [ 9.25, 12.090802289343747 ], "wc_review_avg": [ 513.25, 270.6403286651862 ], "wc_reply_reviewers_avg": [ 177.5, 264.8551490909701 ], "wc_reply_authors_avg": [ 168.5, 238.1475383034643 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.6622661785325219, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9619504124783939844&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "illinois.edu;google.com;google.com;google.com;google.com;google.com", "author_num": 6, "aff_unique_index": "0;1;1;1;1;1", "aff_unique_norm": "University of Illinois Urbana-Champaign;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://illinois.edu;https://www.google.com", "aff_unique_abbr": "UIUC;Google", "aff_campus_unique_index": "0;1;1;1;1", "aff_campus_unique": "Urbana-Champaign;Mountain View;", "aff_country_unique_index": "0;0;0;0;0;1", "aff_country_unique": "United States;United Kingdom" }, { "id": "k73M4XEvFX", "title": "SORRY-Bench: Systematically Evaluating Large Language Model Safety Refusal Behaviors", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "Evaluating aligned large language models' (LLMs) ability to recognize and reject unsafe user requests is crucial for safe, policy-compliant deployments. Existing evaluation efforts, however, face three limitations that we address with **SORRY-Bench**, our proposed benchmark. **First**, existing methods often use coarse-grained taxonomies of unsafe topics, and are over-representing some fine-grained topics. For example, among the ten existing datasets that we evaluated, tests for refusals of self-harm instructions are over 3x less represented than tests for fraudulent activities. SORRY-Bench improves on this by using a fine-grained taxonomy of 45 potentially unsafe topics, and 450 class-balanced unsafe instructions, compiled through human-in-the-loop methods. **Second**, evaluations often overlook the linguistic formatting of prompts, like different languages, dialects, and more --- which are only implicitly considered in many evaluations. We supplement SORRY-bench with 20 diverse linguistic augmentations to systematically examine these effects. **Third**, existing evaluations rely on large LLMs (e.g., GPT-4) for evaluation, which can be computationally expensive. We investigate design choices for creating a fast, accurate automated safety evaluator. By collecting 7K+ human annotations and conducting a meta-evaluation of diverse LLM-as-a-judge designs, we show that fine-tuned 7B LLMs can achieve accuracy comparable to GPT-4 scale LLMs, with lower computational cost. Putting these together, we evaluate over 40 proprietary and open-source LLMs on SORRY-Bench, analyzing their distinctive refusal behaviors. We hope our effort provides a building block for systematic evaluations of LLMs' safety refusal capabilities, in a balanced, granular, and efficient way.", "keywords": "LLM;safety;alignment;benchmark;refusal", "primary_area": "", "supplementary_material": "/attachment/49edec2daba6ed040f97fa3853c85607dd01c38a.zip", "author": "Tinghao Xie;Xiangyu Qi;Yi Zeng;Yangsibo Huang;Udari Madhushani Sehwag;Boyi Wei;Luxi He;Kaixuan Huang;Dacheng Li;Ying Sheng;Ruoxi Jia;Bo Li;Danqi Chen;Kai Li;Peter Henderson;Prateek Mittal", "authorids": "~Tinghao_Xie1;~Xiangyu_Qi2;~Yi_Zeng3;~Yangsibo_Huang2;~Udari_Madhushani_Sehwag1;~Boyi_Wei2;~Luxi_He1;~Kaixuan_Huang1;~Dacheng_Li1;~Ying_Sheng1;~Ruoxi_Jia1;~Bo_Li19;~Danqi_Chen1;~Kai_Li8;~Peter_Henderson1;~Prateek_Mittal1", "gender": "M;M;M;F;;M;F;M;;F;;F;F;M;M;", "homepage": "https://tinghaoxie.com;https://unispac.github.io;https://yizeng623.github.io/;https://hazelsuko07.github.io/yangsibo/;;https://www.boyiwei.com/;;https://hackyhuang.github.io/;;https://sites.google.com/view/yingsheng;https://ruoxijia.info/;http://boli.cs.illinois.edu/;https://www.cs.princeton.edu/~danqic/;https://www.cs.princeton.edu/~li/;http://www.peterhenderson.co/;http://www.princeton.edu/~pmittal/", "dblp": "307/5298;274/2321;75/148;;;;338/9240;;;262/6232.html;147/5355-1;50/3402-26;87/7949;l/KaiLi1.html;h/PeterHenderson2;", "google_scholar": "gFT5XpMAAAAJ;9Za3rmkAAAAJ;slUNmHQAAAAJ;NMPUDa0AAAAJ;;;;EfxwV6oAAAAJ;;xMhGYpgAAAAJ;JCrug-YAAAAJ;K8vJkTcAAAAJ;sVR8ktkAAAAJ;9MSpWOUAAAAJ;dy_JBs0AAAAJ;https://scholar.google.com.tw/citations?user=xTKD8J4AAAAJ", "orcid": ";;0000-0002-6901-9194;;;;;;;0000-0002-1883-2126;;;;;;0000-0002-4057-0118", "linkedin": ";;chnyizeng/;;;;lucy-he-3051111a9/;;;;;;;;phende/;", "or_profile": "~Tinghao_Xie1;~Xiangyu_Qi2;~Yi_Zeng3;~Yangsibo_Huang2;~Udari_Madhushani_Sehwag1;~Boyi_Wei2;~Luxi_He1;~Kaixuan_Huang1;~Dacheng_Li1;~Ying_Sheng1;~Ruoxi_Jia1;~Bo_Li19;~Danqi_Chen1;~Kai_Li8;~Peter_Henderson1;~Prateek_Mittal1", "aff": "Meta Facebook;Princeton University;Virginia Tech;Princeton University;;Princeton University;Department of Computer Science, Princeton University;Princeton University;;Stanford University;Virginia Tech;University of Illinois, Urbana Champaign;Princeton University;Princeton University;Princeton University;Princeton University", "aff_domain": "meta.com;princeton.edu;vt.edu;princeton.edu;;princeton.edu;cs.princeton.edu;princeton.edu;;stanford.edu;vt.edu;illinois.edu;cs.princeton.edu;princeton.edu;princeton.edu;princeton.edu", "position": "Intern;PhD student;PhD student;PhD student;;PhD student;PhD student;PhD student;;PhD student;Assistant Professor;Assistant Professor;Assistant Professor;Full Professor;Assistant Professor;Full Professor", "bibtex": "@misc{\nanonymous2024sorrybench,\ntitle={{SORRY}-Bench: Systematically Evaluating Large Language Model Safety Refusal Behaviors},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=k73M4XEvFX}\n}", "github": "", "project": "", "reviewers": "2i7Q;Duy6;iAGQ;KBhd", "site": "https://openreview.net/forum?id=k73M4XEvFX", "pdf_size": 3385301, "rating": "6;6;6;7", "confidence": "4;3;5;3", "wc_summary_and_contributions": "90;18;163;144", "wc_strengths": "72;30;118;46", "wc_improvement": "144;40;203;42", "wc_limitations": "1;1;44;30", "wc_correctness": "25;1;32;11", "wc_clarity": "1;1;8;12", "wc_relation_to_prior_work": "1;1;5;1", "wc_documentation": "15;1;4;11", "wc_additional_feedback": "1;1;1;1", "wc_review": "350;94;578;298", "wc_reply_reviewers": "0;0;0;11", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;1", "reply_authors": "2;1;2;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "wc_summary_and_contributions_avg": [ 103.75, 56.286654723833074 ], "wc_strengths_avg": [ 66.5, 33.29789783154486 ], "wc_improvement_avg": [ 107.25, 69.45997048660473 ], "wc_limitations_avg": [ 19.0, 18.66815470259447 ], "wc_correctness_avg": [ 17.25, 12.04937757728589 ], "wc_clarity_avg": [ 5.5, 4.716990566028302 ], "wc_relation_to_prior_work_avg": [ 2.0, 1.7320508075688772 ], "wc_documentation_avg": [ 7.75, 5.539629951540085 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 330.0, 172.20917513303408 ], "wc_reply_reviewers_avg": [ 2.75, 4.763139720814412 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 16, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 55, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11593894196166008515&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;2;1;1;1;1;3;2;4;1;1;1;1", "aff_unique_norm": "Meta;Princeton University;Virginia Tech;Stanford University;University of Illinois Urbana-Champaign", "aff_unique_dep": "Meta Platforms, Inc.;;;;", "aff_unique_url": "https://meta.com;https://www.princeton.edu;https://www.vt.edu;https://www.stanford.edu;https://illinois.edu", "aff_unique_abbr": "Meta;Princeton;VT;Stanford;UIUC", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Stanford;Urbana-Champaign", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Understanding and Improving Adversarial Collaborative Filtering for Robust Recommendation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93916", "id": "k8AYft5ED1", "proceeding": "", "pdf": "https://openreview.net/pdf?id=k8AYft5ED1", "openreview": "https://openreview.net/forum?id=k8AYft5ED1", "poster": "/media/PosterPDFs/NeurIPS%202024/93916.png?t=1731413248.4350545", "project": "", "author_site": "Kaike Zhang, Qi Cao, Yunfan Wu, Fei Sun, Huawei Shen, Xueqi Cheng", "tldr": "", "abstract": "Adversarial Collaborative Filtering (ACF), which typically applies adversarial perturbations at user and item embeddings through adversarial training, is widely recognized as an effective strategy for enhancing the robustness of Collaborative Filtering (CF) recommender systems against poisoning attacks. Besides, numerous studies have empirically shown that ACF can also improve recommendation performance compared to traditional CF. Despite these empirical successes, the theoretical understanding of ACF's effectiveness in terms of both performance and robustness remains unclear. To bridge this gap, in this paper, we first theoretically show that ACF can achieve a lower recommendation error compared to traditional CF with the same training epochs in both clean and poisoned data contexts. Furthermore, by establishing bounds for reductions in recommendation error during ACF's optimization process, we find that applying personalized magnitudes of perturbation for different users based on their embedding scales can further improve ACF's effectiveness. Building on these theoretical understandings, we propose Personalized Magnitude Adversarial Collaborative Filtering (PamaCF). Extensive experiments demonstrate that PamaCF effectively defends against various types of poisoning attacks while significantly enhancing recommendation performance.", "keywords": "Adversarial Collaborative Filtering;Robust Recommender System;Poisoning Attacks", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/be37d397e929c4fada1a4c6fda6f8b2ebcb22655.zip", "author": "Kaike Zhang;Qi Cao;Yunfan Wu;Fei Sun;Huawei Shen;Xueqi Cheng", "authorids": "~Kaike_Zhang1;~Qi_Cao1;~Yunfan_Wu1;~Fei_Sun3;~Huawei_Shen1;~Xueqi_Cheng1", "gender": "M;F;M;M;M;M", "homepage": "https://kaike-zhang.github.io/;https://caoqi92.github.io/biography/;https://profile.yunfan.info;http://ofey.me;https://www.ict.ac.cn/sourcedb/cn/jssrck/201402/t20140221_4037648.html;https://people.ucas.ac.cn/~cxq?language=en", "dblp": "139/1710;40/5905;289/3320;51/394-1;;44/912", "google_scholar": ";FflWb1gAAAAJ;L6Pwc_kAAAAJ;OlRxBhcAAAAJ;;hY8aLqAAAAAJ", "orcid": ";;0000-0001-6994-6791;0000-0002-6146-148X;0000-0002-1081-8119;", "linkedin": ";;;;;", "or_profile": "~Kaike_Zhang1;~Qi_Cao1;~Yunfan_Wu1;~Fei_Sun3;~Huawei_Shen1;~Xueqi_Cheng1", "aff": "Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences, China;Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy", "aff_domain": "ict.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn", "position": "PhD student;Associate Professor;PhD student;Associate Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nzhang2024understanding,\ntitle={Understanding and Improving Adversarial Collaborative Filtering for Robust Recommendation},\nauthor={Kaike Zhang and Qi Cao and Yunfan Wu and Fei Sun and Huawei Shen and Xueqi Cheng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=k8AYft5ED1}\n}", "github": "", "reviewers": "6kCb;svg8;gsZx;MDNE", "pdf_size": 4612607, "rating": "5;5;6;7", "confidence": "4;4;3;3", "soundness": "2;2;4;3", "novelty": "2;3;3;3", "presentation": "3;4;3;4", "wc_summary": "60;44;113;77", "wc_strengths": "46;25;39;101", "wc_weaknesses": "317;24;169;47", "wc_questions": "4;121;7;6", "wc_limitations": "6;78;10;9", "wc_review": "433;292;338;240", "wc_reply_reviewers": "60;232;15;10", "wc_reply_authors": "618;985;119;116", "reply_reviewers": "1;2;1;1", "reply_authors": "3;5;3;3", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 73.5, 25.617376914898998 ], "wc_strengths_avg": [ 52.75, 28.86498744153546 ], "wc_weaknesses_avg": [ 139.25, 116.48256307276209 ], "wc_questions_avg": [ 34.5, 49.95247741604014 ], "wc_limitations_avg": [ 25.75, 30.202441954252638 ], "wc_review_avg": [ 325.75, 70.96610106240867 ], "wc_reply_reviewers_avg": [ 79.25, 90.31438146829109 ], "wc_reply_authors_avg": [ 459.5, 365.78853180492143 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.5, 0.8660254037844386 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.9045340337332909, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1195073837311918922&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "ict.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Chinese Academy of Sciences", "aff_unique_dep": "Institute of Computing Technology", "aff_unique_url": "http://www.ict.ac.cn", "aff_unique_abbr": "CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Metric Transforms and Low Rank Representations of Kernels for Fast Attention", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93915", "id": "k9PXsryuWG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=k9PXsryuWG", "openreview": "https://openreview.net/forum?id=k9PXsryuWG", "poster": "/media/PosterPDFs/NeurIPS%202024/93915.png?t=1733866688.180001", "project": "", "author_site": "Timothy Chu, Josh Alman, Gary L. Miller, Shyam Narayanan, Mark Sellke, Zhao Song", "tldr": "", "abstract": "We introduce a new linear-algebraic tool based on group representation theory, and use it to address three key problems in machine learning.\n\n1. Past researchers have proposed fast attention algorithms for LLMs by approximating or replace softmax attention with other functions, such as low-degree polynomials. The key property of these functions is that, when applied entry-wise to the matrix $QK^{\\top}$, the result is a low rank matrix when $Q$ and $K$ are $n \\times d$ matrices and $n \\gg d$. This suggests a natural question: what are all functions $f$ with this property? If other $f$ exist and are quickly computable, they can be used in place of softmax for fast subquadratic attention algorithms. It was previously known that low-degree polynomials have this property. We prove that low-degree polynomials are the only piecewise continuous functions with this property. This suggests that the low-rank fast attention only works for functions approximable by polynomials. Our work gives a converse to the polynomial method in algorithm design.\n\n2. We prove the first full classification of all positive definite kernels that are functions of Manhattan or $\\ell_1$ distance. Our work generalizes an existing theorem at the heart of all kernel methods in machine learning: the classification of all positive definite kernels that are functions of Euclidean distance. \n\n3. The key problem in metric transforms, a mathematical theory used in geometry and machine learning, asks what functions transform pairwise distances in semi-metric space $M$ to semi-metric space $N$ for specified $M$ and $N$. We provide the first full classification of functions that transform Manhattan distances to Manhattan distances. Our work generalizes the foundational work of Schoenberg, which fully classifies functions that transform Euclidean to Euclidean distances.\n \nWe additionally prove results about stable-rank preserving functions that are potentially useful in algorithmic design, and more. Our core new tool is called the representation theory of the hyperrectangle.", "keywords": "hardness;impossibility;low rank transform;kernel method;LLM;attention", "primary_area": "learning_theory", "supplementary_material": "", "author": "Timothy Zer-An Chu;Josh Alman;Gary Miller;Shyam Narayanan;Mark Sellke;Zhao Song", "authorids": "~Timothy_Zer-An_Chu1;~Josh_Alman1;~Gary_Miller1;~Shyam_Narayanan1;~Mark_Sellke1;~Zhao_Song3", "gender": "M;M;M;M;M;M", "homepage": ";http://joshalman.com;https://www.cs.cmu.edu/~glmiller/;https://sites.google.com/view/shyamnarayanan/home;https://msellke.com/;https://www.youtube.com/@zhaosong2031", "dblp": ";166/1624;m/GaryLMiller;222/2805;207/8338;76/4051-2", "google_scholar": "https://scholar.google.com/citations?hl=en;yyDMlesAAAAJ;https://scholar.google.com.tw/citations?user=umm-i20AAAAJ;CTT44Y0AAAAJ;lXCP2cMAAAAJ;yDZct7UAAAAJ", "orcid": ";;;;0000-0001-9166-8185;", "linkedin": ";;;;mark-sellke-a40b19100/;", "or_profile": "~Timothy_Zer-An_Chu1;~Josh_Alman1;~Gary_Miller1;~Shyam_Narayanan1;~Mark_Sellke1;~Zhao_Song3", "aff": "AAAS;Columbia University;;Massachusetts Institute of Technology;Harvard University;Adobe", "aff_domain": "aaas.org;columbia.edu;;mit.edu;harvard.edu;adobe.com", "position": "Researcher;Assistant Professor;;PhD student;Assistant Professor;Researcher", "bibtex": "@inproceedings{\nchu2024metric,\ntitle={Metric Transforms and Low Rank Representations of Kernels for Fast Attention},\nauthor={Timothy Zer-An Chu and Josh Alman and Gary Miller and Shyam Narayanan and Mark Sellke and Zhao Song},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=k9PXsryuWG}\n}", "github": "", "reviewers": "voKg;iDXR;MC98", "pdf_size": 583777, "rating": "6;7;8", "confidence": "4;4;4", "soundness": "3;4;4", "novelty": "4;3;4", "presentation": "2;3;4", "wc_summary": "263;274;67", "wc_strengths": "165;229;141", "wc_weaknesses": "261;157;1", "wc_questions": "278;59;18", "wc_limitations": "1;22;9", "wc_review": "968;741;236", "wc_reply_reviewers": "35;87;33", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 7.0, 0.816496580927726 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 201.33333333333334, 95.09410543713469 ], "wc_strengths_avg": [ 178.33333333333334, 37.14236873915765 ], "wc_weaknesses_avg": [ 139.66666666666666, 106.84984271812893 ], "wc_questions_avg": [ 118.33333333333333, 114.1353971776017 ], "wc_limitations_avg": [ 10.666666666666666, 8.65383665716478 ], "wc_review_avg": [ 648.3333333333334, 305.93717583117547 ], "wc_reply_reviewers_avg": [ 51.666666666666664, 24.997777679003566 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8241963701767998240&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "aaas.org;columbia.edu;;mit.edu;harvard.edu;adobe.com", "author_num": 6, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "American Association for the Advancement of Science;Columbia University;Massachusetts Institute of Technology;Harvard University;Adobe", "aff_unique_dep": ";;;;Adobe Inc.", "aff_unique_url": "https://www.aaas.org;https://www.columbia.edu;https://web.mit.edu;https://www.harvard.edu;https://www.adobe.com", "aff_unique_abbr": "AAAS;Columbia;MIT;Harvard;Adobe", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Diffusion-Reward Adversarial Imitation Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93914", "id": "k9SH68MvJs", "proceeding": "", "pdf": "https://openreview.net/pdf?id=k9SH68MvJs", "openreview": "https://openreview.net/forum?id=k9SH68MvJs", "poster": "", "project": "", "author_site": "Chun-Mao Lai, Hsiang-Chun Wang, Ping-Chun Hsieh, Frank Wang, Min-Hung Chen, Shao-Hua Sun", "tldr": "", "abstract": "Imitation learning aims to learn a policy from observing expert demonstrations without access to reward signals from environments. Generative adversarial imitation learning (GAIL) formulates imitation learning as adversarial learning, employing a generator policy learning to imitate expert behaviors and discriminator learning to distinguish the expert demonstrations from agent trajectories. Despite its encouraging results, GAIL training is often brittle and unstable. Inspired by the recent dominance of diffusion models in generative modeling, we propose Diffusion-Reward Adversarial Imitation Learning (DRAIL), which integrates a diffusion model into GAIL, aiming to yield more robust and smoother rewards for policy learning. Specifically, we propose a diffusion discriminative classifier to construct an enhanced discriminator, and design diffusion rewards based on the classifier\u2019s output for policy learning. Extensive experiments are conducted in navigation, manipulation, and locomotion, verifying DRAIL\u2019s effectiveness compared to prior imitation learning methods. Moreover, additional experimental results demonstrate the generalizability and data efficiency of DRAIL. Visualized learned reward functions of GAIL and DRAIL suggest that DRAIL can produce more robust and smoother rewards. Project page: https://nturobotlearninglab.github.io/DRAIL/", "keywords": "Imitation Learning;Adversarial Imitation Learning;Diffusion Model", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Chun-Mao Lai;Hsiang-Chun Wang;Ping-Chun Hsieh;Yu-Chiang Frank Wang;Min-Hung Chen;Shao-Hua Sun", "authorids": "~Chun-Mao_Lai1;~Hsiang-Chun_Wang1;~Ping-Chun_Hsieh1;~Yu-Chiang_Frank_Wang2;~Min-Hung_Chen2;~Shao-Hua_Sun1", "gender": "M;;M;M;M;M", "homepage": "https://mecoli1219.github.io/;https://hsiangchun0205.github.io/;https://pinghsieh.github.io/;http://vllab.ee.ntu.edu.tw/ycwang.html;https://minhungchen.netlify.app/;http://shaohua0116.github.io", "dblp": "325/4767;;163/7352;30/1690;04/6305;158/9680", "google_scholar": ";https://scholar.google.com.tw/citations?user=vpJMSjMAAAAJ;ix38JgoAAAAJ;HSGvdtoAAAAJ;ovzuxi8AAAAJ;uXsfnaQAAAAJ", "orcid": ";;;0000-0002-2333-157X;0000-0002-4046-3937;0000-0001-7579-6734", "linkedin": ";https://tw.linkedin.com/in/hsiang-chun-wang-8a4798269;;;chensteven/;shaohua0116/", "or_profile": "~Chun-Mao_Lai1;~Hsiang-Chun_Wang1;~Ping-Chun_Hsieh1;~Yu-Chiang_Frank_Wang2;~Min-Hung_Chen2;~Shao-Hua_Sun1", "aff": "National Taiwan University;National Taiwan University;National Yang Ming Chiao Tung University;National Taiwan University;NVIDIA;National Taiwan University", "aff_domain": "ntu.edu.tw;ntu.edu.tw;nycu.edu.tw;ntu.edu.tw;nvidia.com;ntu.edu.tw", "position": "Undergrad student;MS student;Associate Professor;Full Professor;Research Scientist;Assistant Professor", "bibtex": "@inproceedings{\nlai2024diffusionreward,\ntitle={Diffusion-Reward Adversarial Imitation Learning},\nauthor={Chun-Mao Lai and Hsiang-Chun Wang and Ping-Chun Hsieh and Yu-Chiang Frank Wang and Min-Hung Chen and Shao-Hua Sun},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=k9SH68MvJs}\n}", "github": "", "reviewers": "PZWW;uNxe;5b3J;3pMZ", "pdf_size": 6718096, "rating": "5;5;6;6", "confidence": "4;3;4;3", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "2;3;2;3", "wc_summary": "59;145;269;101", "wc_strengths": "32;47;170;119", "wc_weaknesses": "165;70;574;120", "wc_questions": "125;150;2;114", "wc_limitations": "20;1;77;3", "wc_review": "401;413;1092;457", "wc_reply_reviewers": "22;0;0;0", "wc_reply_authors": "17;214;127;177", "reply_reviewers": "1;0;0;0", "reply_authors": "2;3;2;3", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 143.5, 78.57957749950047 ], "wc_strengths_avg": [ 92.0, 55.76289088632332 ], "wc_weaknesses_avg": [ 232.25, 200.15041219043243 ], "wc_questions_avg": [ 97.75, 56.79953785023255 ], "wc_limitations_avg": [ 25.25, 30.776411421736615 ], "wc_review_avg": [ 590.75, 290.1468378252639 ], "wc_reply_reviewers_avg": [ 5.5, 9.526279441628825 ], "wc_reply_authors_avg": [ 133.75, 74.13964863687985 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9449103192016508964&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "ntu.edu.tw;ntu.edu.tw;nycu.edu.tw;ntu.edu.tw;nvidia.com;ntu.edu.tw", "author_num": 6, "aff_unique_index": "0;0;1;0;2;0", "aff_unique_norm": "National Taiwan University;National Yang Ming Chiao Tung University;NVIDIA", "aff_unique_dep": ";;NVIDIA Corporation", "aff_unique_url": "https://www.ntu.edu.tw;https://www.nycu.edu.tw;https://www.nvidia.com", "aff_unique_abbr": "NTU;NYCU;NVIDIA", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Taiwan;", "aff_country_unique_index": "0;0;0;0;1;0", "aff_country_unique": "China;United States" }, { "title": "UQ-Guided Hyperparameter Optimization for Iterative Learners", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93913", "id": "k9uZfaeerK", "proceeding": "", "pdf": "https://openreview.net/pdf?id=k9uZfaeerK", "openreview": "https://openreview.net/forum?id=k9uZfaeerK", "poster": "/media/PosterPDFs/NeurIPS%202024/93913.png?t=1734053780.6491346", "project": "", "author_site": "Jiesong Liu, Feng Zhang, Jiawei Guan, Xipeng Shen", "tldr": "", "abstract": "Hyperparameter Optimization (HPO) plays a pivotal role in unleashing the potential of iterative machine learning models. This paper addresses a crucial aspect that has largely been overlooked in HPO: the impact of uncertainty in ML model training. The paper introduces the concept of uncertainty-aware HPO and presents a novel approach called the UQ-guided scheme for quantifying uncertainty. This scheme offers a principled and versatile method to empower HPO techniques in handling model uncertainty during their exploration of the candidate space.\nBy constructing a probabilistic model and implementing probability-driven candidate selection and budget allocation, this approach enhances the quality of the resulting model hyperparameters. It achieves a notable performance improvement of over 50\\% in terms of accuracy regret and exploration time.", "keywords": "Uncertainty quantification; Hyperparameter Optimization; iterative learners", "primary_area": "optimization_for_deep_networks", "supplementary_material": "/attachment/cfb4339619a9df8a4603a78d9ae286cc7525f52c.zip", "author": "Jiesong Liu;Feng Zhang;Jiawei Guan;Xipeng Shen", "authorids": "~Jiesong_Liu1;~Feng_Zhang10;~Jiawei_Guan1;~Xipeng_Shen1", "gender": ";M;;M", "homepage": "https://fred1031.github.io;https://fengzhangcs.github.io/;https://guan-jw.github.io/;https://research.csc.ncsu.edu/picture/xshen5/index.htm", "dblp": "337/2891;48/1294-7;;36/4172.html", "google_scholar": ";6dDocYkAAAAJ;;0DC5oGQAAAAJ", "orcid": ";0000-0003-1983-7321;;0000-0003-3599-8010", "linkedin": ";;;", "or_profile": "~Jiesong_Liu1;~Feng_Zhang10;~Jiawei_Guan1;~Xipeng_Shen1", "aff": "North Carolina State University;Renmin University of China;;North Carolina State University", "aff_domain": "ncsu.edu;ruc.edu.cn;;ncsu.edu", "position": "PhD student;Full Professor;;Professor", "bibtex": "@inproceedings{\nliu2024uqguided,\ntitle={{UQ}-Guided Hyperparameter Optimization for Iterative Learners},\nauthor={Jiesong Liu and Feng Zhang and Jiawei Guan and Xipeng Shen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=k9uZfaeerK}\n}", "github": "", "reviewers": "gRaV;P7Zr;BPGo;ffRZ", "pdf_size": 1312653, "rating": "6;6;6;7", "confidence": "3;4;3;5", "soundness": "3;2;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "69;72;72;85", "wc_strengths": "403;61;35;47", "wc_weaknesses": "6;286;16;52", "wc_questions": "214;134;236;67", "wc_limitations": "41;45;22;44", "wc_review": "733;598;381;295", "wc_reply_reviewers": "5;248;6;9", "wc_reply_authors": "0;689;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 74.5, 6.18465843842649 ], "wc_strengths_avg": [ 136.5, 154.13873620865067 ], "wc_weaknesses_avg": [ 90.0, 114.44649404852908 ], "wc_questions_avg": [ 162.75, 67.05734486243844 ], "wc_limitations_avg": [ 38.0, 9.354143466934854 ], "wc_review_avg": [ 501.75, 173.2532467228248 ], "wc_reply_reviewers_avg": [ 67.0, 104.51076499576492 ], "wc_reply_authors_avg": [ 172.25, 298.34575160373913 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2614307199794749651&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "ncsu.edu;ruc.edu.cn;;ncsu.edu", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "North Carolina State University;Renmin University of China", "aff_unique_dep": ";", "aff_unique_url": "https://www.ncsu.edu;http://www.ruc.edu.cn", "aff_unique_abbr": "NCSU;RUC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;China" }, { "title": "Nuclear Fusion Diamond Polishing Dataset", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97539", "id": "kBvwv92E1S", "proceeding": "", "pdf": "https://openreview.net/pdf?id=kBvwv92E1S", "openreview": "https://openreview.net/forum?id=kBvwv92E1S", "poster": "", "project": "", "author_site": "Antonios Alexos, Junze Liu, Shashank Galla, Sean Hayes, Kshitij Bhardwaj, Alexander Schwartz, Monika Biener, Pierre Baldi, Satish Bukkapatnam, Suhas Bhandarkar", "tldr": "", "abstract": "In the Inertial Confinement Fusion (ICF) process, roughly a 2mm spherical shell made of high-density carbon is used as a target for laser beams, which compress and heat it to energy levels needed for high fusion yield in nuclear fusion. These shells are polished meticulously to meet the standards for a fusion shot. However, the polishing of these shells involves multiple stages, with each stage taking several hours. To make sure that the polishing process is advancing in the right direction, we are able to measure the shell surface roughness. This measurement, however, is very labor-intensive, time-consuming, and requires a human operator. To help improve the polishing process we have released the first dataset to the public that consists of raw vibration signals with the corresponding polishing surface roughness changes. We show that this dataset can be used with a variety of neural network based methods for prediction of the change of polishing surface roughness, hence eliminating the need for the time-consuming manual process. This is the first dataset of its kind to be released in public and its use will allow the operator to make any necessary changes to the ICF polishing process for optimal results. This dataset contains the raw vibration data of multiple polishing runs with their extracted statistical features and the corresponding surface roughness values. Additionally, to generalize the prediction models to different polishing conditions, we also apply domain adaptation techniques to improve prediction accuracy for conditions unseen by the trained model. The dataset is available in \\url{https://junzeliu.github.io/Diamond-Polishing-Dataset/}.", "keywords": "domain adaptation;polishing surface roughness;nuclear fusion", "primary_area": "", "supplementary_material": "", "author": "Antonios Alexos;Junze Liu;Shashank Galla;Sean Hayes;Kshitij Bhardwaj;Alexander Schwartz;Monika Biener;Pierre Baldi;Satish Bukkapatnam;Suhas Bhandarkar", "authorids": "~Antonios_Alexos1;~Junze_Liu1;~Shashank_Galla1;~Sean_Hayes1;~Kshitij_Bhardwaj1;~Alexander_Schwartz1;~Monika_Biener1;~Pierre_Baldi1;~Satish_Bukkapatnam1;~Suhas_Bhandarkar1", "gender": "M;M;M;M;;M;F;;M;M", "homepage": "https://antonyalexos.github.io;https://junzeliu.github.io/;;;https://scholar.google.com/citations?user=WZJ2k8MAAAAJ&hl=en&oi=ao;;https://www.researchgate.net/profile/Monika-Biener;;https://engineering.tamu.edu/industrial/profiles/bukkapatnam-satish.html;https://www.llnl.gov/", "dblp": "267/9750;245/3563;;;;;;;;", "google_scholar": "G33kETkAAAAJ;https://scholar.google.com/citations?hl=en;8NIvDL4AAAAJ;;WZJ2k8MAAAAJ;;;;;", "orcid": ";;0009-0000-9482-6206;0000-0001-7727-7145;;;;;;0000-0002-4978-1181", "linkedin": "antonios-alexos-861446122/;junzel/;shashankgalla/;;;alexanderschwartz91/;;;;", "or_profile": "~Antonios_Alexos1;~Junze_Liu1;~Shashank_Galla1;~Sean_Hayes1;~Kshitij_Bhardwaj1;~Alexander_Schwartz1;~Monika_Biener1;~Pierre_Baldi1;~Satish_Bukkapatnam1;~Suhas_Bhandarkar1", "aff": "University of California, Irvine;University of California, Irvine;Texas A&M University - College Station;Lawrence Livermore National Labs;Lawrence Livermore National Labs;Lawrence University;Lawrence Livermore National Labs;;Texas A&M University - College Station;Lawrence Livermore National Labs", "aff_domain": "uci.edu;uci.edu;tamu.edu;llnl.gov;llnl.gov;lawrence.edu;llnl.gov;;tamu.edu;llnl.gov", "position": "PhD student;PhD student;PhD student;Researcher;Researcher;Researcher;Researcher;;Full Professor;Researcher", "bibtex": "@inproceedings{\nalexos2024nuclear,\ntitle={Nuclear Fusion Diamond Polishing Dataset},\nauthor={Antonios Alexos and Junze Liu and Shashank Galla and Sean Hayes and Kshitij Bhardwaj and Alexander Schwartz and Monika Biener and Pierre Baldi and Satish Bukkapatnam and Suhas Bhandarkar},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=kBvwv92E1S}\n}", "github": "", "reviewers": "Gqfh;oCrs;yAxf", "pdf_size": 2290182, "rating": "5;6;7", "confidence": "3;3;3", "wc_summary_and_contributions": "109;132;64", "wc_strengths": "51;91;33", "wc_improvement": "276;230;92", "wc_limitations": "32;8;1", "wc_correctness": "21;8;1", "wc_clarity": "19;31;1", "wc_relation_to_prior_work": "21;7;1", "wc_documentation": "1;1;10", "wc_additional_feedback": "1;1;1", "wc_review": "531;509;204", "wc_reply_reviewers": "0;110;0", "wc_reply_authors": "0;23;0", "reply_reviewers": "0;2;0", "reply_authors": "3;3;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.0, 0.0 ], "wc_summary_and_contributions_avg": [ 101.66666666666667, 28.241026106633512 ], "wc_strengths_avg": [ 58.333333333333336, 24.239545283597124 ], "wc_improvement_avg": [ 199.33333333333334, 78.18496587508938 ], "wc_limitations_avg": [ 13.666666666666666, 13.274871834493252 ], "wc_correctness_avg": [ 10.0, 8.286535263104035 ], "wc_clarity_avg": [ 17.0, 12.328828005937952 ], "wc_relation_to_prior_work_avg": [ 9.666666666666666, 8.379870059984357 ], "wc_documentation_avg": [ 4.0, 4.242640687119285 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 414.6666666666667, 149.23434219002303 ], "wc_reply_reviewers_avg": [ 36.666666666666664, 51.85449728701349 ], "wc_reply_authors_avg": [ 7.666666666666667, 10.842303978193728 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.9428090415820634 ], "reply_authors_avg": [ 2.3333333333333335, 0.9428090415820634 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4408347178585266488&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "uci.edu;uci.edu;tamu.edu;llnl.gov;llnl.gov;lawrence.edu;llnl.gov;;tamu.edu;llnl.gov", "author_num": 10, "aff_unique_index": "0;0;1;2;2;3;2;1;2", "aff_unique_norm": "University of California, Irvine;Texas A&M University;Lawrence Livermore National Laboratory;Lawrence University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.uci.edu;https://www.tamu.edu;https://www.llnl.gov;https://www.lawrence.edu", "aff_unique_abbr": "UCI;TAMU;LLNL;LU", "aff_campus_unique_index": "0;0;1;1", "aff_campus_unique": "Irvine;College Station;", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Neural Isometries: Taming Transformations for Equivariant ML", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93912", "id": "kCabCEhQWv", "proceeding": "", "pdf": "https://openreview.net/pdf?id=kCabCEhQWv", "openreview": "https://openreview.net/forum?id=kCabCEhQWv", "poster": "", "project": "", "author_site": "Thomas Mitchel, Michael Taylor, Vincent Sitzmann", "tldr": "", "abstract": "Real-world geometry and 3D vision tasks are replete with challenging symmetries that defy tractable analytical expression. In this paper, we introduce Neural Isometries, an autoencoder framework which learns to map the observation space to a general-purpose latent space wherein encodings are related by isometries whenever their corresponding observations are geometrically related in world space. Specifically, we regularize the latent space such that maps between encodings preserve a learned inner product and commute with a learned functional operator, in the same manner as rigid-body transformations commute with the Laplacian. This approach forms an effective backbone for self-supervised representation learning, and we demonstrate that a simple off-the-shelf equivariant network operating in the pre-trained latent space can achieve results on par with meticulously-engineered, handcrafted networks designed to handle complex, nonlinear symmetries. Furthermore, isometric maps capture information about the respective transformations in world space, and we show that this allows us to regress camera poses directly from the coefficients of the maps between encodings of adjacent views of a scene.", "keywords": "Equivariance;Geometric Deep Learning;Representation Learning", "primary_area": "machine_vision", "supplementary_material": "", "author": "Thomas Mitchel;Michael Taylor;Vincent Sitzmann", "authorids": "~Thomas_Mitchel1;~Michael_Taylor5;~Vincent_Sitzmann1", "gender": ";M;M", "homepage": ";;https://vsitzmann.github.io", "dblp": ";;192/1958", "google_scholar": ";KC6FJ14AAAAJ;X44QVV4AAAAJ", "orcid": ";;0000-0002-0107-5704", "linkedin": ";;vincentsitzmann/", "or_profile": "~Thomas_Mitchel1;~Michael_Taylor5;~Vincent_Sitzmann1", "aff": ";Sony Interactive Entertainment;Massachusetts Institute of Technology", "aff_domain": ";sony.com;mit.edu", "position": ";Researcher;Assistant Professor", "bibtex": "@inproceedings{\nmitchel2024neural,\ntitle={Neural Isometries: Taming Transformations for Equivariant {ML}},\nauthor={Thomas Mitchel and Michael Taylor and Vincent Sitzmann},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=kCabCEhQWv}\n}", "github": "", "reviewers": "Sfsw;FFcP;2Npe;JQqD", "pdf_size": 41078636, "rating": "4;4;4;6", "confidence": "3;2;4;3", "soundness": "3;3;2;2", "novelty": "3;2;2;3", "presentation": "3;3;2;4", "wc_summary": "129;45;103;168", "wc_strengths": "75;38;31;61", "wc_weaknesses": "235;84;330;210", "wc_questions": "113;42;3;58", "wc_limitations": "59;4;16;84", "wc_review": "611;213;483;581", "wc_reply_reviewers": "305;165;0;128", "wc_reply_authors": "329;1322;60;230", "reply_reviewers": "1;1;0;1", "reply_authors": "3;4;2;3", "rating_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 111.25, 44.70109059072273 ], "wc_strengths_avg": [ 51.25, 17.640507362318125 ], "wc_weaknesses_avg": [ 214.75, 87.76495599041795 ], "wc_questions_avg": [ 54.0, 39.50316443020736 ], "wc_limitations_avg": [ 40.75, 32.275183965393595 ], "wc_review_avg": [ 472.0, 156.84705926475002 ], "wc_reply_reviewers_avg": [ 149.5, 108.66577197995696 ], "wc_reply_authors_avg": [ 485.25, 492.5836857834413 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.0, 0.7071067811865476 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17219884338876563808&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 4, "email": ";sony.com;mit.edu", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Sony Interactive Entertainment;Massachusetts Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.sonyinteractive.com;https://web.mit.edu", "aff_unique_abbr": "SIE;MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Image Textualization: An Automatic Framework for Generating Rich and Detailed Image Descriptions", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97538", "id": "kChaL3rZxi", "proceeding": "", "pdf": "https://openreview.net/pdf?id=kChaL3rZxi", "openreview": "https://openreview.net/forum?id=kChaL3rZxi", "poster": "/media/PosterPDFs/NeurIPS%202024/97538.png?t=1731252964.8566878", "project": "", "author_site": "Renjie Pi, Jianshu Zhang, Jipeng Zhang, Rui Pan, Zhekai Chen, Tong Zhang", "tldr": "", "abstract": "Image description datasets play a crucial role in the advancement of various applications such as image understanding, text-to-image generation, and text-image retrieval. Currently, image description datasets primarily originate from two sources. One source is the scraping of image-text pairs from the web. Despite their abundance, these descriptions are often of low quality and noisy. Another way is through human labeling. Datasets such as COCO are generally very short and lack details. Although detailed image descriptions can be annotated by humans, the high cost limits their quantity and feasibility. These limitations underscore the need for more efficient and scalable methods to generate accurate and detailed image descriptions. In this paper, we propose an innovative framework termed Image Textualization, which automatically produces high-quality image descriptions by leveraging existing mult-modal large language models (MLLMs) and multiple vision expert models in a collaborative manner. We conduct various experiments to validate the high quality of the descriptions constructed by our framework. Furthermore, we show that MLLMs fine-tuned on our dataset acquire an unprecedented capability of generating richer image descriptions, substantially increasing the length and detail of their output with even less hallucinations.", "keywords": "Multimodal large language model;image captioning", "primary_area": "", "supplementary_material": "/attachment/5ce6e672d799fc560d792d04905f3dd01f8ae769.pdf", "author": "Renjie Pi;Jianshu Zhang;Jipeng Zhang;Rui Pan;Zhekai Chen;Tong Zhang", "authorids": "~Renjie_Pi1;~Jianshu_Zhang3;~Jipeng_Zhang1;~Rui_Pan4;~Zhekai_Chen1;~Tong_Zhang2", "gender": "M;;M;M;M;M", "homepage": ";;https://2003pro.github.io/;;https://zhekai-chen.github.io/;http://tongzhang-ml.org", "dblp": "67/2156;;;74/9957;;07/4227-1", "google_scholar": "XUq0HwcAAAAJ;;q0De288AAAAJ;;https://scholar.google.com/citations?hl=en;LurWtuYAAAAJ", "orcid": ";;;0000-0001-7217-0656;;0000-0002-5511-2558", "linkedin": ";;;;;", "or_profile": "~Renjie_Pi1;~Jianshu_Zhang3;~Jipeng_Zhang1;~Rui_Pan4;~Zhekai_Chen1;~Tong_Zhang2", "aff": "Hong Kong University of Science and Technology;;Department of Computer Science and Engineering, The Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;Zhejiang University;UIUC", "aff_domain": "ust.hk;;cse.ust.hk;ust.hk;zju.edu.cn;illinois.edu", "position": "PhD student;;PhD student;MS student;Undergrad student;Full Professor", "bibtex": "@inproceedings{\npi2024image,\ntitle={Image Textualization: An Automatic Framework for Generating Rich and Detailed Image Descriptions},\nauthor={Renjie Pi and Jianshu Zhang and Jipeng Zhang and Rui Pan and Zhekai Chen and Tong Zhang},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=kChaL3rZxi}\n}", "github": "", "reviewers": "MZYA;g394;aJpW;QK76", "pdf_size": 3078335, "rating": "6;6;6;8", "confidence": "5;3;4;4", "wc_summary_and_contributions": "73;74;253;88", "wc_strengths": "21;166;24;12", "wc_improvement": "205;201;70;135", "wc_limitations": "49;115;14;160", "wc_correctness": "22;1;44;1", "wc_clarity": "6;1;1;1", "wc_relation_to_prior_work": "1;1;1;1", "wc_documentation": "16;1;13;15", "wc_additional_feedback": "1;1;1;1", "wc_review": "394;561;421;414", "wc_reply_reviewers": "40;9;40;107", "wc_reply_authors": "0;347;498;255", "reply_reviewers": "1;1;1;1", "reply_authors": "2;4;5;3", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 122.0, 75.86501169841075 ], "wc_strengths_avg": [ 55.75, 63.80585788154564 ], "wc_improvement_avg": [ 152.75, 55.27375055123363 ], "wc_limitations_avg": [ 84.5, 56.7031745143074 ], "wc_correctness_avg": [ 17.0, 17.790446874657196 ], "wc_clarity_avg": [ 2.25, 2.165063509461097 ], "wc_relation_to_prior_work_avg": [ 1.0, 0.0 ], "wc_documentation_avg": [ 11.25, 6.015604707757983 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 447.5, 66.27405223765935 ], "wc_reply_reviewers_avg": [ 49.0, 35.7980446393375 ], "wc_reply_authors_avg": [ 275.0, 180.92678077056476 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.5, 1.118033988749895 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:CYNuNC476IAJ:scholar.google.com/&scioq=Image+Textualization:+An+Automatic+Framework+for+Generating+Rich+and+Detailed+Image+Descriptions&hl=en&as_sdt=0,31", "gs_version_total": 0, "email": "ust.hk;;cse.ust.hk;ust.hk;zju.edu.cn;illinois.edu", "author_num": 6, "aff_unique_index": "0;0;0;1;2", "aff_unique_norm": "Hong Kong University of Science and Technology;Zhejiang University;University of Illinois Urbana-Champaign", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ust.hk;https://www.zju.edu.cn;https://www illinois.edu", "aff_unique_abbr": "HKUST;ZJU;UIUC", "aff_campus_unique_index": "0;0;0;2", "aff_campus_unique": "Hong Kong SAR;;Urbana-Champaign", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "China;United States" }, { "title": "Benchmarking Out-of-Distribution Generalization Capabilities of DNN-based Encoding Models for the Ventral Visual Cortex.", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97537", "id": "kD1kpLtrmX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=kD1kpLtrmX", "openreview": "https://openreview.net/forum?id=kD1kpLtrmX", "poster": "", "project": "", "author_site": "Spandan Madan, Will Xiao, Mingran Cao, Hanspeter Pfister, Margaret Livingstone, Gabriel Kreiman", "tldr": "", "abstract": "We characterized the generalization capabilities of deep neural network encoding models when predicting neuronal responses from the visual cortex to flashed images. We collected MacaqueITBench, a large-scale dataset of neuronal population responses from the macaque inferior temporal (IT) cortex to over $300,000$ images, comprising $8,233$ unique natural images presented to seven monkeys over $109$ sessions. Using MacaqueITBench, we investigated the impact of distribution shifts on models predicting neuronal activity by dividing the images into Out-Of-Distribution (OOD) train and test splits. The OOD splits included variations in image contrast, hue, intensity, temperature, and saturation. Compared to the performance on in-distribution test images---the conventional way in which these models have been evaluated---models performed worse at predicting neuronal responses to out-of-distribution images, retaining as little as $20\\\\%$ of the performance on in-distribution test images. Additionally, the relative ranking of different models in terms of their ability to predict neuronal responses changed drastically across OOD shifts. The generalization performance under OOD shifts can be well accounted by a simple image similarity metric---the cosine distance between image representations extracted from a pre-trained object recognition model is a strong predictor of neuronal predictivity under different distribution shifts. The dataset of images, neuronal firing rate recordings, and computational benchmarks are hosted publicly at: https://github.com/Spandan-Madan/benchmarking_ood_generalization_visual_cortex.", "keywords": "Generalization;OOD;ElectroPhysiology;Visual Cortex", "primary_area": "", "supplementary_material": "", "author": "Spandan Madan;Will Xiao;Mingran Cao;Hanspeter Pfister;Margaret Livingstone;Gabriel Kreiman", "authorids": "~Spandan_Madan1;~Will_Xiao1;~Mingran_Cao1;~Hanspeter_Pfister1;~Margaret_Livingstone1;~Gabriel_Kreiman1", "gender": "M;;;M;F;M", "homepage": ";;;https://vcg.seas.harvard.edu;;http://klab.tch.harvard.edu", "dblp": "205/2937;;;p/HanspeterPfister;;12/1367", "google_scholar": "QY5OAIMAAAAJ;;;tvBEoaMAAAAJ;P_3rGrsAAAAJ;WxZ_6nsAAAAJ", "orcid": ";0000-0001-5555-3217;;0000-0002-3620-2582;;0000-0003-3505-8475", "linkedin": ";;mingrancao/;hpfister/;;kreiman/", "or_profile": "~Spandan_Madan1;~Will_Xiao1;~Mingran_Cao1;~Hanspeter_Pfister1;~Margaret_Livingstone1;~Gabriel_Kreiman1", "aff": "Harvard University;Harvard University, Harvard University;The Francis Crick Institute;Harvard University;Harvard University;Harvard Medical School", "aff_domain": "harvard.edu;fas.harvard.edu;crick.ac.uk;harvard.edu;harvard.edu;harvard.edu", "position": "PhD student;PhD student;PhD student;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nmadan2024benchmarking,\ntitle={Benchmarking Out-of-Distribution Generalization Capabilities of {DNN}-based Encoding Models for the Ventral Visual Cortex.},\nauthor={Spandan Madan and Will Xiao and Mingran Cao and Hanspeter Pfister and Margaret Livingstone and Gabriel Kreiman},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=kD1kpLtrmX}\n}", "github": "", "reviewers": "VCLi;hU8b;daFw", "pdf_size": 17108916, "rating": "6;6;8", "confidence": "4;3;4", "wc_summary_and_contributions": "106;98;90", "wc_strengths": "48;52;37", "wc_improvement": "97;76;17", "wc_limitations": "1;15;21", "wc_correctness": "1;8;7", "wc_clarity": "1;8;1", "wc_relation_to_prior_work": "1;1;7", "wc_documentation": "1;1;1", "wc_additional_feedback": "1;1;1", "wc_review": "257;260;182", "wc_reply_reviewers": "65;0;57", "wc_reply_authors": "0;38;0", "reply_reviewers": "1;0;1", "reply_authors": "2;2;1", "rating_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 98.0, 6.531972647421808 ], "wc_strengths_avg": [ 45.666666666666664, 6.342099196813483 ], "wc_improvement_avg": [ 63.333333333333336, 33.86574801903671 ], "wc_limitations_avg": [ 12.333333333333334, 8.379870059984357 ], "wc_correctness_avg": [ 5.333333333333333, 3.0912061651652345 ], "wc_clarity_avg": [ 3.3333333333333335, 3.2998316455372216 ], "wc_relation_to_prior_work_avg": [ 3.0, 2.8284271247461903 ], "wc_documentation_avg": [ 1.0, 0.0 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 233.0, 36.08323710533743 ], "wc_reply_reviewers_avg": [ 40.666666666666664, 28.94055209486432 ], "wc_reply_authors_avg": [ 12.666666666666666, 17.913371790059205 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.49999999999999983, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14740751292435428484&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "harvard.edu;fas.harvard.edu;crick.ac.uk;harvard.edu;harvard.edu;harvard.edu", "author_num": 6, "aff_unique_index": "0;0;1;0;0;0", "aff_unique_norm": "Harvard University;Francis Crick Institute", "aff_unique_dep": ";", "aff_unique_url": "https://www.harvard.edu;https://www.crick.ac.uk", "aff_unique_abbr": "Harvard;Crick", "aff_campus_unique_index": "1", "aff_campus_unique": ";Boston", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "United States;United Kingdom" }, { "id": "kDp8Eq76dm", "title": "AlleNoise - large-scale text classification benchmark dataset with real-world label noise", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "Label noise remains a challenge for training robust classification models. Most methods for mitigating label noise have been benchmarked using primarily datasets with synthetic noise. While the need for datasets with realistic noise distribution has partially been addressed by web-scraped benchmarks such as WebVision and Clothing1M, those benchmarks are restricted to the computer vision domain. With the growing importance of Transformer-based models, it is crucial to establish text classification benchmarks for learning with noisy labels. In this paper, we present AlleNoise, a new curated text classification dataset with real-world instance-dependent label noise, containing over 500,000 examples across approximately 5600 classes, complemented with a meaningful, hierarchical taxonomy of categories. The noise distribution comes from actual users of a major e-commerce marketplace, so it realistically reflects the semantics of human mistakes. In addition to the noisy labels, we provide human-verified clean labels, which help to get a deeper insight into the noise distribution, unlike web-scraped datasets typically used in the field. We demonstrate that a representative selection of established methods for learning with noisy labels is inadequate to handle such real-world noise. In addition, we show evidence that these algorithms do not alleviate excessive memorization. As such, with AlleNoise, we set a high bar for the development of label noise methods that can handle real-world label noise in text classification tasks. The code and dataset are available for download at https://github.com/allegro/AlleNoise.", "keywords": "label noise;real-world noise;instance-dependent noise;text classification;benchmark;dataset", "primary_area": "", "supplementary_material": "/attachment/7e04a0fc1f295f842992a38f05b739f5f62edcd2.zip", "author": "Alicja R\u0105czkowska;Aleksandra Osowska-Kurczab;Jacek Szczerbi\u0144ski;Kalina Jasinska-Kobus;Klaudia Nazarko", "authorids": "~Alicja_R\u0105czkowska1;~Aleksandra_Osowska-Kurczab1;~Jacek_Szczerbi\u0144ski1;~Kalina_Jasinska-Kobus1;~Klaudia_Nazarko1", "gender": "F;F;M;F;F", "homepage": "https://pl.linkedin.com/in/alicjar%C4%85czkowska/pl;;;;http://www.cs.put.poznan.pl/kjasinska/", "dblp": "383/8196;;;;182/8947", "google_scholar": "uoxkPlwAAAAJ;zkC4zlAAAAAJ;OJjMoPoAAAAJ;;LrKNC6UAAAAJ", "orcid": "0000-0001-5901-4595;0000-0001-5764-522X;;;0000-0002-6214-4734", "linkedin": "https://pl.linkedin.com/in/alicjar%C4%85czkowska/pl;aleksandra-osowska-kurczab?trk=contact-info;szcz/;https://linkedin.com/in/klaudianazarko/;kalina-jasinska-65687983/", "or_profile": "~Alicja_R\u0105czkowska1;~Aleksandra_Osowska-Kurczab1;~Jacek_Szczerbi\u0144ski1;~Klaudia_Nazarko1;~Kalina_Jasinska1", "aff": "IDEAS NCBR Sp.;IDEAS NCBR Sp.;Allegro ;Allegro Sp. z o.o.;Allegro.pl", "aff_domain": "ideas-ncbr.pl;ideas-ncbr.pl;ml.allegro.tech;allegro.pl;allegro.pl", "position": "Postdoc;Postdoc;Researcher;Researcher;Research Engineer", "bibtex": "@misc{\nanonymous2024allenoise,\ntitle={AlleNoise - large-scale text classification benchmark dataset with real-world label noise},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=kDp8Eq76dm}\n}", "github": "", "project": "", "reviewers": "7uLQ;exsb;kKUV", "site": "https://openreview.net/forum?id=kDp8Eq76dm", "pdf_size": 3733174, "rating": "5;5;6", "confidence": "3;5;4", "wc_summary_and_contributions": "107;74;79", "wc_strengths": "68;71;150", "wc_improvement": "41;130;98", "wc_limitations": "1;14;14", "wc_correctness": "1;31;90", "wc_clarity": "1;26;1", "wc_relation_to_prior_work": "1;33;1", "wc_documentation": "1;23;18", "wc_additional_feedback": "1;1;1", "wc_review": "222;403;452", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "34;34;34", "reply_reviewers": "0;0;0", "reply_authors": "2;2;2", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "wc_summary_and_contributions_avg": [ 86.66666666666667, 14.522013940527977 ], "wc_strengths_avg": [ 96.33333333333333, 37.96782263385084 ], "wc_improvement_avg": [ 89.66666666666667, 36.80881536926839 ], "wc_limitations_avg": [ 9.666666666666666, 6.128258770283412 ], "wc_correctness_avg": [ 40.666666666666664, 36.971460464646086 ], "wc_clarity_avg": [ 9.333333333333334, 11.785113019775793 ], "wc_relation_to_prior_work_avg": [ 11.666666666666666, 15.084944665313014 ], "wc_documentation_avg": [ 14.0, 9.41629792788369 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 359.0, 98.91747402085572 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 34.0, 0.0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11102746671323878692&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;1;1;1", "aff_unique_norm": "IDEAS NCBR;Allegro", "aff_unique_dep": ";", "aff_unique_url": ";", "aff_unique_abbr": ";", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Poland;" }, { "title": "Large Language Models Play StarCraft II:Benchmarks and A Chain of Summarization Approach", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93911", "id": "kEPpD7yETM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=kEPpD7yETM", "openreview": "https://openreview.net/forum?id=kEPpD7yETM", "poster": "", "project": "", "author_site": "Weiyu Ma, Qirui Mi, Yongcheng Zeng, Xue Yan, Runji Lin, Yuqiao Wu, Jun Wang, Haifeng Zhang", "tldr": "", "abstract": "With the continued advancement of Large Language Models (LLMs) Agents in reasoning, planning, and decision-making, benchmarks have become crucial in evaluating these skills. However, there is a notable gap in benchmarks for real-time strategic decision-making. StarCraft II (SC2), with its complex and dynamic nature, serves as an ideal setting for such evaluations. To this end, we have developed TextStarCraft II, a specialized environment for assessing LLMs in real-time strategic scenarios within SC2. Addressing the limitations of traditional Chain of Thought (CoT) methods, we introduce the Chain of Summarization (CoS) method, enhancing LLMs' capabilities in rapid and effective decision-making. Our key experiments included:\n1. LLM Evaluation: Tested 10 LLMs in TextStarCraft II, most of them defeating LV5 build-in AI, showcasing effective strategy skills.\n2. Commercial Model Knowledge: Evaluated four commercial models on SC2 knowledge; GPT-4 ranked highest by Grandmaster-level experts.\n3. Human-AI Matches: Experimental results showed that fine-tuned LLMs performed on par with Gold-level players in real-time matches, demonstrating comparable strategic abilities.\n\nAll code and data from this\nstudy have been made pulicly available at https://github.com/histmeisah/Large-Language-Models-play-StarCraftII", "keywords": "LLM Agent;StarCraft2", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/89cde7ab347ffb6e79d01f016701ab029bc42f44.zip", "author": "Weiyu Ma;Qirui Mi;Yongcheng Zeng;Xue Yan;Runji Lin;Yuqiao Wu;Jun Wang;Haifeng Zhang", "authorids": "~Weiyu_Ma1;~Qirui_Mi1;~Yongcheng_Zeng1;~Xue_Yan2;~Runji_Lin1;~Yuqiao_Wu1;~Jun_Wang2;~Haifeng_Zhang3", "gender": "M;F;M;F;;M;;M", "homepage": "http://marl.ia.ac.cn/people/maweiyu.html;https://miracle1207.github.io/miqirui.github.io/;http://marl.ia.ac.cn/people/zengyongcheng.html;;;http://www0.cs.ucl.ac.uk/staff/jun.wang/;https://pkuzhf.github.io;https://linprophet.github.io/", "dblp": ";;;;;w/JunWang12;93/7133-2;", "google_scholar": ";PaRrsDcAAAAJ;;5d0Upv8AAAAJ;;https://scholar.google.co.uk/citations?user=wIE1tY4AAAAJ;;", "orcid": ";;;;0000-0002-1904-5896;;;", "linkedin": ";;;\u96ea-\u95eb-48926a188/;;;;", "or_profile": "~Weiyu_Ma1;~Qirui_Mi1;~Yongcheng_Zeng1;~Xue_Yan2;~Yuqiao_Wu1;~Jun_Wang2;~Haifeng_Zhang3;~Lin_Runji1", "aff": "Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;University of Chinese Academy of Sciences;University College London;Institute of Automation, Chinese Academy of Sciences;School of Artificial Intelligence, University of Chinese Academy of Sciences", "aff_domain": "ia.ac.cn;ia.ac.cn;ia.ac.cn;ia.ac.cn;ucas.edu.cn;ucl.ac.uk;ia.ac.cn;ia.ac.cn", "position": "MS student;PhD student;PhD student;PhD student;MS student;Professor;Associate Professor;MS student", "bibtex": "@inproceedings{\nma2024large,\ntitle={Large Language Models Play StarCraft {II}:Benchmarks and A Chain of Summarization Approach},\nauthor={Weiyu Ma and Qirui Mi and Yongcheng Zeng and Xue Yan and Runji Lin and Yuqiao Wu and Jun Wang and Haifeng Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=kEPpD7yETM}\n}", "github": "", "reviewers": "gmAe;B3oZ;Q8qA", "pdf_size": 12384217, "rating": "5;6;8", "confidence": "4;4;3", "soundness": "2;3;3", "novelty": "2;3;4", "presentation": "2;3;3", "wc_summary": "87;90;64", "wc_strengths": "62;29;84", "wc_weaknesses": "95;33;121", "wc_questions": "1;208;48", "wc_limitations": "1;1;46", "wc_review": "246;361;363", "wc_reply_reviewers": "21;16;10", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 80.33333333333333, 11.61416759345623 ], "wc_strengths_avg": [ 58.333333333333336, 22.60285134421958 ], "wc_weaknesses_avg": [ 83.0, 36.914315199752345 ], "wc_questions_avg": [ 85.66666666666667, 88.60524188155512 ], "wc_limitations_avg": [ 16.0, 21.213203435596427 ], "wc_review_avg": [ 323.3333333333333, 54.68901982015118 ], "wc_reply_reviewers_avg": [ 15.666666666666666, 4.4969125210773475 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.944911182523068, "gs_citation": 57, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11934872791193537299&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "ia.ac.cn;ia.ac.cn;ia.ac.cn;ia.ac.cn;ucas.edu.cn;ucl.ac.uk;ia.ac.cn;ia.ac.cn", "author_num": 8, "aff_unique_index": "0;0;0;0;1;2;0;1", "aff_unique_norm": "Chinese Academy of Sciences;University of Chinese Academy of Sciences;University College London", "aff_unique_dep": "Institute of Automation;;", "aff_unique_url": "http://www.ia.cas.cn;http://www.ucas.ac.cn;https://www.ucl.ac.uk", "aff_unique_abbr": "CAS;UCAS;UCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;1;0;0", "aff_country_unique": "China;United Kingdom" }, { "title": "Distributed-Order Fractional Graph Operating Network", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93910", "id": "kEQFjKqiqM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=kEQFjKqiqM", "openreview": "https://openreview.net/forum?id=kEQFjKqiqM", "poster": "", "project": "", "author_site": "Kai Zhao, Xuhao Li, Qiyu Kang, Feng Ji, Qinxu Ding, Yanan Zhao, WENFEI LIANG, Wee Peng Tay", "tldr": "", "abstract": "We introduce the Distributed-order fRActional Graph Operating Network (DRAGON), a novel continuous Graph Neural Network (GNN) framework that incorporates distributed-order fractional calculus. \nUnlike traditional continuous GNNs that utilize integer-order or single fractional-order differential equations, DRAGON uses a learnable probability distribution over a range of real numbers for the derivative orders. \nBy allowing a flexible and learnable superposition of multiple derivative orders, our framework captures complex graph feature updating dynamics beyond the reach of conventional models.\nWe provide a comprehensive interpretation of our framework's capability to capture intricate dynamics through the lens of a non-Markovian graph random walk with node feature updating driven by an anomalous diffusion process over the graph. \nFurthermore, to highlight the versatility of the DRAGON framework, we conduct empirical evaluations across a range of graph learning tasks. The results consistently demonstrate superior performance when compared to traditional continuous GNN models. The implementation code is available at \\url{https://github.com/zknus/NeurIPS-2024-DRAGON}.", "keywords": "Graph Neural Networks", "primary_area": "graph_neural_networks", "supplementary_material": "/attachment/8e577907221cc2158198adc675c35a2bdd03147e.zip", "author": "Kai Zhao;Xuhao Li;Qiyu Kang;Feng Ji;Qinxu Ding;Yanan Zhao;Wenfei Liang;Wee Peng Tay", "authorids": "~Kai_Zhao7;~Xuhao_Li2;~Qiyu_Kang2;~Feng_Ji2;~Qinxu_Ding1;~Yanan_Zhao1;~Wenfei_Liang1;~Wee_Peng_Tay1", "gender": ";;;M;M;F;;", "homepage": ";http://math.ahu.edu.cn/lxh/list.htm;;;;;;https://personal.ntu.edu.sg/wptay/", "dblp": ";194/5914;;;232/3684;00/4709-3;;45/3753", "google_scholar": ";Yl5Bc0UAAAAJ;;EA0VBD8AAAAJ;orksudIAAAAJ;https://scholar.google.com/citations?hl=zh-CN;;BkCI7rEAAAAJ", "orcid": ";;;0000-0003-3442-1471;;0000-0002-2761-696X;;0000-0002-1543-195X", "linkedin": ";;;;;;;", "or_profile": "~Kai_Zhao7;~Xuhao_Li2;~Qiyu_Kang2;~Feng_Ji2;~Qinxu_Ding1;~Yanan_Zhao1;~Wenfei_Liang1;~Wee_Peng_Tay1", "aff": ";Anhui University;;Nanyang Technological University;Singapore University of Social Sciences;Nanyang Technological University;;Nanyang Technological University", "aff_domain": ";ahu.edu.cn;;ntu.edu.sg;suss.edu.sg;ntu.edu.sg;;ntu.edu.sg", "position": ";Lecturer;;Researcher;Lecturer;PhD student;;Full Professor", "bibtex": "@inproceedings{\nzhao2024distributedorder,\ntitle={Distributed-Order Fractional Graph Operating Network},\nauthor={Kai Zhao and Xuhao Li and Qiyu Kang and Feng Ji and Qinxu Ding and Yanan Zhao and Wenfei Liang and Wee Peng Tay},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=kEQFjKqiqM}\n}", "github": "", "reviewers": "smsh;uHnh;mRZs", "pdf_size": 509899, "rating": "5;6;7", "confidence": "3;3;4", "soundness": "3;3;4", "novelty": "3;3;3", "presentation": "2;3;3", "wc_summary": "92;82;116", "wc_strengths": "60;73;141", "wc_weaknesses": "42;130;64", "wc_questions": "97;37;4", "wc_limitations": "44;5;10", "wc_review": "335;327;335", "wc_reply_reviewers": "19;59;0", "wc_reply_authors": "60;638;62", "reply_reviewers": "1;1;0", "reply_authors": "3;4;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 96.66666666666667, 14.2672897060218 ], "wc_strengths_avg": [ 91.33333333333333, 35.51838334659329 ], "wc_weaknesses_avg": [ 78.66666666666667, 37.392809766347085 ], "wc_questions_avg": [ 46.0, 38.49675310984031 ], "wc_limitations_avg": [ 19.666666666666668, 17.326921891156037 ], "wc_review_avg": [ 332.3333333333333, 3.7712361663282534 ], "wc_reply_reviewers_avg": [ 26.0, 24.589970855343985 ], "wc_reply_authors_avg": [ 253.33333333333334, 272.00163398202017 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 3.0, 0.816496580927726 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13784169488299730779&as_sdt=805&sciodt=0,3&hl=en", "gs_version_total": 4, "email": ";ahu.edu.cn;;ntu.edu.sg;suss.edu.sg;ntu.edu.sg;;ntu.edu.sg", "author_num": 8, "aff_unique_index": "0;1;2;1;1", "aff_unique_norm": "Anhui University;Nanyang Technological University;Singapore University of Social Sciences", "aff_unique_dep": ";;", "aff_unique_url": "http://www.ahu.edu.cn/;https://www.ntu.edu.sg;https://www.suss.edu.sg", "aff_unique_abbr": "AHU;NTU;SUSS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1", "aff_country_unique": "China;Singapore" }, { "title": "Nesterov acceleration despite very noisy gradients", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93909", "id": "kHXUb494SY", "proceeding": "", "pdf": "https://openreview.net/pdf?id=kHXUb494SY", "openreview": "https://openreview.net/forum?id=kHXUb494SY", "poster": "/media/PosterPDFs/NeurIPS%202024/93909.png?t=1733724537.5923471", "project": "", "author_site": "Kanan Gupta, Jonathan W. Siegel, Stephan Wojtowytsch", "tldr": "", "abstract": "We present a generalization of Nesterov's accelerated gradient descent algorithm. Our algorithm (AGNES) provably achieves acceleration for smooth convex and strongly convex minimization tasks with noisy gradient estimates if the noise intensity is proportional to the magnitude of the gradient at every point. Nesterov's method converges at an accelerated rate if the constant of proportionality is below 1, while AGNES accommodates any signal-to-noise ratio. The noise model is motivated by applications in overparametrized machine learning. AGNES requires only two parameters in convex and three in strongly convex minimization tasks, improving on existing methods. We further provide clear geometric interpretations and heuristics for the choice of parameters.", "keywords": "Stochastic optimization;stochastic acceleration;smooth convex optimization;deep learning;accelerated gradient descent", "primary_area": "optimization", "supplementary_material": "/attachment/f49fa8b536ef571a70e687b220d38f52b5925d56.zip", "author": "Kanan Gupta;Jonathan W. Siegel;Stephan Wojtowytsch", "authorids": "~Kanan_Gupta1;~Jonathan_W._Siegel1;~Stephan_Wojtowytsch1", "gender": ";M;", "homepage": ";https://jwsiegel2510.github.io;http://www.swojtowytsch.com", "dblp": ";239/6028;252/1157", "google_scholar": ";oI42qIIAAAAJ;vnluGycAAAAJ", "orcid": ";;0000-0003-3766-5332", "linkedin": ";;", "or_profile": "~Kanan_Gupta1;~Jonathan_W._Siegel1;~Stephan_Wojtowytsch1", "aff": ";Texas A&M University - College Station;University of Pittsburgh", "aff_domain": ";tamu.edu;pitt.edu", "position": ";Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\ngupta2024nesterov,\ntitle={Nesterov acceleration despite very noisy gradients},\nauthor={Kanan Gupta and Jonathan W. Siegel and Stephan Wojtowytsch},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=kHXUb494SY}\n}", "github": "", "reviewers": "AMLK;e67y;x6ut;tWyF;2pRK", "pdf_size": 2220141, "rating": "3;6;6;7;7", "confidence": "4;4;3;3;3", "soundness": "3;3;3;3;3", "novelty": "2;3;3;3;4", "presentation": "1;2;3;4;4", "wc_summary": "65;102;51;168;73", "wc_strengths": "57;190;39;95;38", "wc_weaknesses": "151;339;77;183;55", "wc_questions": "183;128;75;1;92", "wc_limitations": "6;23;6;1;1", "wc_review": "462;782;248;448;259", "wc_reply_reviewers": "0;492;11;0;34", "wc_reply_authors": "0;51;0;0;0", "reply_reviewers": "0;1;1;0;1", "reply_authors": "1;2;1;1;1", "rating_avg": [ 5.8, 1.469693845669907 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.8, 1.16619037896906 ], "wc_summary_avg": [ 91.8, 41.58557442190742 ], "wc_strengths_avg": [ 83.8, 56.968061227322806 ], "wc_weaknesses_avg": [ 161.0, 100.55844071981228 ], "wc_questions_avg": [ 95.8, 60.124537420257965 ], "wc_limitations_avg": [ 7.4, 8.114185110040317 ], "wc_review_avg": [ 439.8, 193.4615207218221 ], "wc_reply_reviewers_avg": [ 107.4, 192.70038920562666 ], "wc_reply_authors_avg": [ 10.2, 20.4 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.7222222222222223, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10041211087435910060&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": ";tamu.edu;pitt.edu", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Texas A&M University;University of Pittsburgh", "aff_unique_dep": ";", "aff_unique_url": "https://www.tamu.edu;https://www.pitt.edu", "aff_unique_abbr": "TAMU;Pitt", "aff_campus_unique_index": "0", "aff_campus_unique": "College Station;", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Towards Flexible Visual Relationship Segmentation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93908", "id": "kJkp2ECJT7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=kJkp2ECJT7", "openreview": "https://openreview.net/forum?id=kJkp2ECJT7", "poster": "/media/PosterPDFs/NeurIPS%202024/93908.png?t=1731633619.3498943", "project": "", "author_site": "Fangrui Zhu, Jianwei Yang, Huaizu Jiang", "tldr": "", "abstract": "Visual relationship understanding has been studied separately in human-object interaction(HOI) detection, scene graph generation(SGG), and referring relationships(RR) tasks. \nGiven the complexity and interconnectedness of these tasks, it is crucial to have a flexible framework that can effectively address these tasks in a cohesive manner.\nIn this work, we propose FleVRS, a single model that seamlessly integrates the above three aspects in standard and promptable visual relationship segmentation, and further possesses the capability for open-vocabulary segmentation to adapt to novel scenarios. \nFleVRS leverages the synergy between text and image modalities, \nto ground various types of relationships from images and use textual features from vision-language models to visual conceptual understanding.\nEmpirical validation across various datasets demonstrates that our framework outperforms existing models in standard, promptable, and open-vocabulary tasks, e.g., +1.9 $mAP$ on HICO-DET, +11.4 $Acc$ on VRD, +4.7 $mAP$ on unseen HICO-DET.\nOur FleVRS represents a significant step towards a more intuitive, comprehensive, and scalable understanding of visual relationships.", "keywords": "Visual relationship segmentation;relationship understanding;human-object interaction;scene graph generation.", "primary_area": "machine_vision", "supplementary_material": "", "author": "Fangrui Zhu;Jianwei Yang;Huaizu Jiang", "authorids": "~Fangrui_Zhu1;~Jianwei_Yang1;~Huaizu_Jiang1", "gender": "F;M;M", "homepage": "https://fangruizhu.github.io/;http://jianghz.me;https://jwyang.github.io/", "dblp": "213/8030;128/7890;", "google_scholar": "https://scholar.google.com.au/citations?user=Xq0De8EAAAAJ;0hHqYoAAAAAJ;Cl9byD8AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Fangrui_Zhu1;~Huaizu_Jiang1;~Jianwei_Yang2", "aff": "Northeastern University;Northeastern University;Microsoft", "aff_domain": "neu.edu;northeastern.edu;microsoft.com", "position": "PhD student;Assistant Professor;Researcher", "bibtex": "@inproceedings{\nzhu2024towards,\ntitle={Towards Flexible Visual Relationship Segmentation},\nauthor={Fangrui Zhu and Jianwei Yang and Huaizu Jiang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=kJkp2ECJT7}\n}", "github": "", "reviewers": "k2zH;W5TQ;LLLd;EDLE", "pdf_size": 4415609, "rating": "5;5;5;7", "confidence": "4;5;4;4", "soundness": "3;3;2;3", "novelty": "2;3;2;3", "presentation": "3;3;2;3", "wc_summary": "84;40;40;49", "wc_strengths": "73;14;29;58", "wc_weaknesses": "165;121;98;100", "wc_questions": "6;2;19;8", "wc_limitations": "17;2;3;10", "wc_review": "345;179;189;225", "wc_reply_reviewers": "373;14;0;28", "wc_reply_authors": "512;127;117;116", "reply_reviewers": "2;1;0;1", "reply_authors": "4;3;3;3", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 53.25, 18.129740759315894 ], "wc_strengths_avg": [ 43.5, 23.243278598338918 ], "wc_weaknesses_avg": [ 121.0, 26.953663943887108 ], "wc_questions_avg": [ 8.75, 6.299801584177076 ], "wc_limitations_avg": [ 8.0, 6.041522986797286 ], "wc_review_avg": [ 234.5, 66.051116568912 ], "wc_reply_reviewers_avg": [ 103.75, 155.76645177957928 ], "wc_reply_authors_avg": [ 218.0, 169.7954651926841 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 3.25, 0.4330127018922193 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10901532541954078669&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "neu.edu;northeastern.edu;microsoft.com", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Northeastern University;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.northeastern.edu;https://www.microsoft.com", "aff_unique_abbr": "NEU;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "On the Saturation Effects of Spectral Algorithms in Large Dimensions", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93907", "id": "kJzecLYsRi", "proceeding": "", "pdf": "https://openreview.net/pdf?id=kJzecLYsRi", "openreview": "https://openreview.net/forum?id=kJzecLYsRi", "poster": "/media/PosterPDFs/NeurIPS%202024/93907.png?t=1731342865.0563118", "project": "", "author_site": "Weihao Lu, haobo Zhang, Yicheng Li, Qian Lin", "tldr": "", "abstract": "The saturation effects, which originally refer to the fact that kernel ridge regression (KRR) fails to achieve the information-theoretical lower bound when the regression function is over-smooth, have been observed for almost 20 years and were rigorously proved recently for kernel ridge regression and some other spectral algorithms over a fixed dimensional domain. The main focus of this paper is to explore the saturation effects for a large class of spectral algorithms (including the KRR, gradient descent, etc.) in large dimensional settings where $n \\asymp d^{\\gamma}$. More precisely, we first propose an improved minimax lower bound for the kernel regression problem in large dimensional settings and show that the gradient flow with early stopping strategy will result in an estimator achieving this lower bound (up to a logarithmic factor). Similar to the results in KRR, we can further determine the exact convergence rates (both upper and lower bounds) of a large class of (optimal tuned) spectral algorithms with different qualification $\\tau$'s. In particular, we find that these exact rate curves (varying along $\\gamma$) exhibit the periodic plateau behavior and the polynomial approximation barrier. Consequently, we can fully depict the saturation effects of the spectral algorithms and reveal a new phenomenon in large dimensional settings (i.e., the saturation effect occurs in large dimensional setting as long as the source condition $s>\\tau$ while it occurs in fixed dimensional setting as long as $s>2\\tau$).", "keywords": "reproducing kernel Hilbert space;spectral algorithm;high-dimensional statistics;minimax rates", "primary_area": "learning_theory", "supplementary_material": "", "author": "Weihao Lu;Haobo Zhang;Yicheng Li;Qian Lin", "authorids": "~Weihao_Lu2;~Haobo_Zhang2;~Yicheng_Li2;~Qian_Lin2", "gender": "M;M;M;M", "homepage": "https://luweihao.github.io/;;;https://sites.google.com/site/qianlincd/", "dblp": "44/9720-2;;;79/3108", "google_scholar": "OWRzIyIAAAAJ;https://scholar.google.com/citations?hl=zh-CN;;kHPrqdgAAAAJ", "orcid": ";0000-0003-3478-140X;0000-0002-9497-0379;", "linkedin": ";;;", "or_profile": "~Weihao_Lu2;~Haobo_Zhang2;~Yicheng_Li2;~Qian_Lin2", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nlu2024on,\ntitle={On the Saturation Effects of Spectral Algorithms in Large Dimensions},\nauthor={Weihao Lu and Haobo Zhang and Yicheng Li and Qian Lin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=kJzecLYsRi}\n}", "github": "", "reviewers": "JAP1;XV4b;QpMc", "pdf_size": 1130810, "rating": "6;7;7", "confidence": "4;4;4", "soundness": "3;4;4", "novelty": "2;3;4", "presentation": "3;4;3", "wc_summary": "55;117;81", "wc_strengths": "70;65;28", "wc_weaknesses": "388;9;21", "wc_questions": "116;306;43", "wc_limitations": "1;10;1", "wc_review": "630;507;174", "wc_reply_reviewers": "35;31;49", "wc_reply_authors": "37;31;30", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 84.33333333333333, 25.42090128658349 ], "wc_strengths_avg": [ 54.333333333333336, 18.732028424302822 ], "wc_weaknesses_avg": [ 139.33333333333334, 175.90211924691403 ], "wc_questions_avg": [ 155.0, 110.85425867627579 ], "wc_limitations_avg": [ 4.0, 4.242640687119285 ], "wc_review_avg": [ 437.0, 192.62917743685665 ], "wc_reply_reviewers_avg": [ 38.333333333333336, 7.717224601860151 ], "wc_reply_authors_avg": [ 32.666666666666664, 3.091206165165235 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:QcX6hE9KWe4J:scholar.google.com/&scioq=On+the+Saturation+Effects+of+Spectral+Algorithms+in+Large+Dimensions&hl=en&as_sdt=0,23", "gs_version_total": 2, "email": "tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Immiscible Diffusion: Accelerating Diffusion Training with Noise Assignment", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93906", "id": "kK23oMGe9g", "proceeding": "", "pdf": "https://openreview.net/pdf?id=kK23oMGe9g", "openreview": "https://openreview.net/forum?id=kK23oMGe9g", "poster": "/media/PosterPDFs/NeurIPS%202024/93906.png?t=1732683888.72574", "project": "", "author_site": "Yiheng Li, Heyang Jiang, Akio Kodaira, Masayoshi TOMIZUKA, Kurt Keutzer, Chenfeng Xu", "tldr": "", "abstract": "In this paper, we point out that suboptimal noise-data mapping leads to slow training of diffusion models. During diffusion training, current methods diffuse each image across the entire noise space, resulting in a mixture of all images at every point in the noise layer. We emphasize that this random mixture of noise-data mapping complicates the optimization of the denoising function in diffusion models. Drawing inspiration from the immiscibility phenomenon in physics, we propose *Immiscible Diffusion*, a simple and effective method to improve the random mixture of noise-data mapping. In physics, miscibility can vary according to various intermolecular forces. Thus, immiscibility means that the mixing of molecular sources is distinguishable. Inspired by this concept, we propose an assignment-then-diffusion training strategy to achieve *Immiscible Diffusion*. As one example, prior to diffusing the image data into noise, we assign diffusion target noise for the image data by minimizing the total image-noise pair distance in a mini-batch. The assignment functions analogously to external forces to expel the diffuse-able areas of images, thus mitigating the inherent difficulties in diffusion training. Our approach is remarkably simple, requiring only *one line of code* to restrict the diffuse-able area for each image while preserving the Gaussian distribution of noise. In this way, each image is preferably projected to nearby noise. To address the high complexity of the assignment algorithm, we employ a quantized assignment strategy, which significantly reduces the computational overhead to a negligible level (e.g. 22.8ms for a large batch size of 1024 on an A6000). Experiments demonstrate that our method can achieve up to 3x faster training for unconditional Consistency Models on the CIFAR dataset, as well as for DDIM and Stable Diffusion on CelebA and ImageNet dataset, and in class-conditional training and fine-tuning. In addition, we conducted a thorough analysis that sheds light on how it improves diffusion training speed while improving fidelity. The code is available at https://yhli123.github.io/immiscible-diffusion", "keywords": "Diffusion Model;Training Efficiency", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Yiheng Li;Heyang Jiang;Akio Kodaira;Masayoshi Tomizuka;Kurt Keutzer;Chenfeng Xu", "authorids": "~Yiheng_Li2;~Heyang_Jiang2;~Akio_Kodaira1;~Masayoshi_Tomizuka2;~Kurt_Keutzer1;~Chenfeng_Xu1", "gender": "M;M;M;M;M;M", "homepage": "https://yhli123.github.io/;;;https://people.eecs.berkeley.edu/~keutzer/;;https://me.berkeley.edu/people/masayoshi-tomizuka/", "dblp": ";;238/1433;k/KurtKeutzer.html;65/1881;10/4434", "google_scholar": "Jd2A44wAAAAJ;;https://scholar.google.com/citations?hl=ja;ID9QePIAAAAJ;RpqvaTUAAAAJ;", "orcid": ";;;0000-0003-3868-8501;0000-0002-4941-6985;", "linkedin": ";heyang-jiang-b64a522b2/;;kurtkeutzer/;;", "or_profile": "~Yiheng_Li2;~Heyang_Jiang2;~Akio_Kodaira1;~Kurt_Keutzer1;~Chenfeng_Xu1;~Masayoshi_Tomizuka1", "aff": "University of California, Berkeley;Tsinghua University;University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;University of California, Berkeley", "aff_domain": "berkeley.edu;mails.tsinghua.edu.cn;berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu", "position": "PhD student;Undergrad student;PhD student;Full Professor;PhD student;Full Professor", "bibtex": "@inproceedings{\nli2024immiscible,\ntitle={Immiscible Diffusion: Accelerating Diffusion Training with Noise Assignment},\nauthor={Yiheng Li and Heyang Jiang and Akio Kodaira and Masayoshi Tomizuka and Kurt Keutzer and Chenfeng Xu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=kK23oMGe9g}\n}", "github": "", "reviewers": "Da24;nEkA;y1v3;h6mk", "pdf_size": 27560240, "rating": "2;5;6;6", "confidence": "5;3;5;4", "soundness": "1;2;3;3", "novelty": "1;3;3;4", "presentation": "1;2;3;4", "wc_summary": "102;61;58;107", "wc_strengths": "33;11;44;105", "wc_weaknesses": "1670;114;203;243", "wc_questions": "82;95;3;5", "wc_limitations": "241;6;1;1", "wc_review": "2128;287;309;461", "wc_reply_reviewers": "765;12;144;496", "wc_reply_authors": "671;23;355;610", "reply_reviewers": "2;1;1;2", "reply_authors": "2;2;2;3", "rating_avg": [ 4.75, 1.6393596310755 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "novelty_avg": [ 2.75, 1.0897247358851685 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 82.0, 22.594247055390007 ], "wc_strengths_avg": [ 48.25, 34.85236720798173 ], "wc_weaknesses_avg": [ 557.5, 643.9970885027354 ], "wc_questions_avg": [ 46.25, 42.50514674718816 ], "wc_limitations_avg": [ 62.25, 103.22154571599866 ], "wc_review_avg": [ 796.25, 771.7996420704015 ], "wc_reply_reviewers_avg": [ 354.25, 295.8668408253956 ], "wc_reply_authors_avg": [ 414.75, 255.35502246871903 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.32186034291019194, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7264169939484572238&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "berkeley.edu;mails.tsinghua.edu.cn;berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu", "author_num": 6, "aff_unique_index": "0;1;0;0;0;0", "aff_unique_norm": "University of California, Berkeley;Tsinghua University", "aff_unique_dep": ";", "aff_unique_url": "https://www.berkeley.edu;https://www.tsinghua.edu.cn", "aff_unique_abbr": "UC Berkeley;THU", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;1;0;0;0;0", "aff_country_unique": "United States;China" }, { "title": "Benchmarking Structural Inference Methods for Interacting Dynamical Systems with Synthetic Data", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97536", "id": "kKtalvwqBZ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=kKtalvwqBZ", "openreview": "https://openreview.net/forum?id=kKtalvwqBZ", "poster": "/media/PosterPDFs/NeurIPS%202024/97536.png?t=1730397544.6577065", "project": "", "author_site": "Aoran Wang, Tsz Pan Tong, Andrzej Mizera, Jun Pang", "tldr": "", "abstract": "Understanding complex dynamical systems begins with identifying their topological structures, which expose the organization of the systems. This requires robust structural inference methods that can deduce structure from observed behavior. However, existing methods are often domain-specific and lack a standardized, objective comparison framework. We address this gap by benchmarking 13 structural inference methods from various disciplines on simulations representing two types of dynamics and 11 interaction graph models, supplemented by a biological experimental dataset to mirror real-world application. We evaluated the methods for accuracy, scalability, robustness, and sensitivity to graph properties. Our findings indicate that deep learning methods excel with multi-dimensional data, while classical statistics and information theory based approaches are notably accurate and robust. Additionally, performance correlates positively with the graph's average shortest path length. This benchmark should aid researchers in selecting suitable methods for their specific needs and stimulate further methodological innovation.", "keywords": "Structural Inference;Benchmark;AI4Science;Interaction Graphs;Dynamical Systems", "primary_area": "", "supplementary_material": "", "author": "Aoran Wang;Tsz Pan Tong;Andrzej Mizera;Jun Pang", "authorids": "~Aoran_Wang1;~Tsz_Pan_Tong1;~Andrzej_Mizera1;~Jun_Pang1", "gender": ";M;M;M", "homepage": ";https://satoss.uni.lu/members/tszpan/;https://www.mimuw.edu.pl/~amizera/;https://satoss.uni.lu/members/jun/", "dblp": ";;;p/JunPang", "google_scholar": ";;6O_oWFUAAAAJ;0JjOM_gAAAAJ", "orcid": ";;0000-0002-6351-2877;0000-0002-4521-4112", "linkedin": ";;;", "or_profile": "~Aoran_Wang1;~Tsz_Pan_Tong1;~Andrzej_Mizera1;~Jun_Pang1", "aff": ";University of Luxemburg;University of Warsaw;University of Luxembourg", "aff_domain": ";uni.lu;mimuw.edu.pl;uni.lu", "position": ";PhD student;Postdoc;Principal Researcher", "bibtex": "@inproceedings{\nwang2024benchmarking,\ntitle={Benchmarking Structural Inference Methods for Interacting Dynamical Systems with Synthetic Data},\nauthor={Aoran Wang and Tsz Pan Tong and Andrzej Mizera and Jun Pang},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=kKtalvwqBZ}\n}", "github": "", "reviewers": "DD7x;3UMV;hQrd", "pdf_size": 801339, "rating": "6;7;9", "confidence": "4;4;4", "wc_summary_and_contributions": "52;130;93", "wc_strengths": "25;48;75", "wc_improvement": "18;57;57", "wc_limitations": "82;1;45", "wc_correctness": "1;1;28", "wc_clarity": "1;1;28", "wc_relation_to_prior_work": "1;1;41", "wc_documentation": "7;1;35", "wc_additional_feedback": "1;1;1", "wc_review": "188;241;403", "wc_reply_reviewers": "41;0;20", "wc_reply_authors": "155;0;53", "reply_reviewers": "1;0;1", "reply_authors": "4;1;2", "rating_avg": [ 7.333333333333333, 1.247219128924647 ], "confidence_avg": [ 4.0, 0.0 ], "wc_summary_and_contributions_avg": [ 91.66666666666667, 31.857320805254304 ], "wc_strengths_avg": [ 49.333333333333336, 20.434176165325468 ], "wc_improvement_avg": [ 44.0, 18.384776310850235 ], "wc_limitations_avg": [ 42.666666666666664, 33.10924677823738 ], "wc_correctness_avg": [ 10.0, 12.727922061357855 ], "wc_clarity_avg": [ 10.0, 12.727922061357855 ], "wc_relation_to_prior_work_avg": [ 14.333333333333334, 18.856180831641264 ], "wc_documentation_avg": [ 14.333333333333334, 14.817407180595245 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 277.3333333333333, 91.45612184114425 ], "wc_reply_reviewers_avg": [ 20.333333333333332, 16.73983937265296 ], "wc_reply_authors_avg": [ 69.33333333333333, 64.32383349549026 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 1.247219128924647 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10058773427544546483&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": ";uni.lu;mimuw.edu.pl;uni.lu", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Luxembourg;University of Warsaw", "aff_unique_dep": ";", "aff_unique_url": "https://wwwen.uniluxembourg.lu;https://www.uw.edu.pl", "aff_unique_abbr": "Uni Lu;UW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Luxembourg;Poland" }, { "title": "On the Robustness of Spectral Algorithms for Semirandom Stochastic Block Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93905", "id": "kLen1XyW6P", "proceeding": "", "pdf": "https://openreview.net/pdf?id=kLen1XyW6P", "openreview": "https://openreview.net/forum?id=kLen1XyW6P", "poster": "", "project": "", "author_site": "Aditya Bhaskara, Agastya Jha, Michael Kapralov, Naren Manoj, Davide Mazzali, Weronika Wrzos-Kaminska", "tldr": "", "abstract": "In a graph bisection problem, we are given a graph $G$ with two equally-sized unlabeled communities, and the goal is to recover the vertices in these communities. A popular heuristic, known as spectral clustering, is to output an estimated community assignment based on the eigenvector corresponding to the second-smallest eigenvalue of the Laplacian of $G$. Spectral algorithms can be shown to provably recover the cluster structure for graphs generated from probabilistic models, such as the Stochastic Block Model (SBM). However, spectral clustering is known to be non-robust to model mis-specification. Techniques based on semidefinite programming have been shown to be more robust, but they incur significant computational overheads. \n\nIn this work, we study the robustness of spectral algorithms against semirandom adversaries. Informally, a semirandom adversary is allowed to ``helpfully'' change the specification of the model in a way that is consistent with the ground-truth solution. Our semirandom adversaries in particular are allowed to add edges inside clusters or increase the probability that an edge appears inside a cluster. Semirandom adversaries are a useful tool to determine the extent to which an algorithm has overfit to statistical assumptions on the input. \n\nOn the positive side, we identify a wide range of semirandom adversaries under which spectral bisection using the _unnormalized_ Laplacian is strongly consistent, i.e., it exactly recovers the planted partitioning. On the negative side, we show that in many of these settings, _normalized_ spectral bisection outputs a partitioning that makes a classification mistake on a constant fraction of the vertices. Finally, we demonstrate numerical experiments that complement our theoretical findings.", "keywords": "stochastic block model;clustering;spectral algorithms;random matrices;semirandom model;monotone adversary", "primary_area": "learning_theory", "supplementary_material": "/attachment/0399ab625dad65f487cf574fec96aad101c9c293.zip", "author": "Aditya Bhaskara;Agastya Vibhuti Jha;Michael Kapralov;Naren Sarayu Manoj;Davide Mazzali;Weronika Wrzos-Kaminska", "authorids": "~Aditya_Bhaskara1;~Agastya_Vibhuti_Jha1;~Michael_Kapralov1;~Naren_Sarayu_Manoj1;~Davide_Mazzali1;~Weronika_Wrzos-Kaminska1", "gender": "M;M;;M;;", "homepage": "http://www.cs.utah.edu/~bhaskara/;;;https://home.ttic.edu/~nsm/;https://people.epfl.ch/davide.mazzali;", "dblp": "47/7801.html;307/4257.html;76/6407;236/5698;;", "google_scholar": "tqxTaiAAAAAJ;;;WQSMHvkAAAAJ;;", "orcid": ";;;0000-0002-9353-4882;;", "linkedin": ";;;narenmanoj/;;", "or_profile": "~Aditya_Bhaskara1;~Agastya_Vibhuti_Jha1;~Michael_Kapralov1;~Naren_Sarayu_Manoj1;~Davide_Mazzali1;~Weronika_Wrzos-Kaminska1", "aff": "University of Utah;EPFL - EPF Lausanne;Swiss Federal Institute of Technology Lausanne;Toyota Technological Institute at Chicago;EPFL - EPF Lausanne;", "aff_domain": "utah.edu;epfl.ch;epfl.ch;ttic.edu;epfl.ch;", "position": "Associate Professor;MS student;Assistant Professor;PhD student;PhD student;", "bibtex": "@inproceedings{\nbhaskara2024on,\ntitle={On the Robustness of Spectral Algorithms for Semirandom Stochastic Block Models},\nauthor={Aditya Bhaskara and Agastya Vibhuti Jha and Michael Kapralov and Naren Sarayu Manoj and Davide Mazzali and Weronika Wrzos-Kaminska},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=kLen1XyW6P}\n}", "github": "", "reviewers": "hmYB;e2Qn;eQb7;q18A", "pdf_size": 643725, "rating": "6;6;6;7", "confidence": "4;4;5;3", "soundness": "3;3;4;4", "novelty": "3;3;3;3", "presentation": "3;3;4;4", "wc_summary": "291;88;149;65", "wc_strengths": "45;70;79;63", "wc_weaknesses": "119;75;52;5", "wc_questions": "1;49;131;19", "wc_limitations": "1;1;1;1", "wc_review": "457;283;412;153", "wc_reply_reviewers": "30;10;27;5", "wc_reply_authors": "0;0;43;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;2;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 148.25, 87.94707215137977 ], "wc_strengths_avg": [ 64.25, 12.47747971346778 ], "wc_weaknesses_avg": [ 62.75, 41.12405014100629 ], "wc_questions_avg": [ 50.0, 49.80963762164909 ], "wc_limitations_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 326.25, 118.67471297626972 ], "wc_reply_reviewers_avg": [ 18.0, 10.700467279516348 ], "wc_reply_authors_avg": [ 10.75, 18.619546181365433 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4852827186270194706&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "utah.edu;epfl.ch;epfl.ch;ttic.edu;epfl.ch;", "author_num": 6, "aff_unique_index": "0;1;2;3;1", "aff_unique_norm": "University of Utah;EPFL;Swiss Federal Institute of Technology Lausanne;Toyota Technological Institute at Chicago", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.utah.edu;https://www.epfl.ch;https://www.epfl.ch;https://www.tti-chicago.org", "aff_unique_abbr": "Utah;EPFL;EPFL;TTI Chicago", "aff_campus_unique_index": "1;1;2;1", "aff_campus_unique": ";Lausanne;Chicago", "aff_country_unique_index": "0;1;1;0;1", "aff_country_unique": "United States;Switzerland" }, { "title": "An Analysis of Elo Rating Systems via Markov Chains", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93904", "id": "kLiWXUdCEw", "proceeding": "", "pdf": "https://openreview.net/pdf?id=kLiWXUdCEw", "openreview": "https://openreview.net/forum?id=kLiWXUdCEw", "poster": "/media/PosterPDFs/NeurIPS%202024/93904.png?t=1730142768.9356005", "project": "", "author_site": "Sam Olesker-Taylor, Luca Zanetti", "tldr": "", "abstract": "We present a theoretical analysis of the Elo rating system, a popular method for ranking skills of players in an online setting. In particular, we study Elo under the Bradley-Terry-Luce model and, using techniques from Markov chain theory, show that Elo learns the model parameters at a rate competitive with the state-of-the-art. We apply our results to the problem of efficient tournament design and discuss a connection with the fastest-mixing Markov chain problem.", "keywords": "Elo ratings;Bradley\u2013Terry\u2013Luce model;tournament design;concentration", "primary_area": "probabilistic_methods", "supplementary_material": "/attachment/df2c1946551193c0450aec01235eedd4c20cc536.zip", "author": "Sam Olesker-Taylor;Luca Zanetti", "authorids": "~Sam_Olesker-Taylor1;~Luca_Zanetti1", "gender": "M;", "homepage": "https://mathematicalsam.wordpress.com;", "dblp": "306/1033;", "google_scholar": "J3YGj0oAAAAJ;", "orcid": "0000-0001-9764-1645;", "linkedin": ";", "or_profile": "~Sam_Olesker-Taylor1;~Luca_Zanetti1", "aff": "University of Warwick;", "aff_domain": "warwick.ac.uk;", "position": "Assistant Professor;", "bibtex": "@inproceedings{\nolesker-taylor2024an,\ntitle={An Analysis of Elo Rating Systems via Markov Chains},\nauthor={Sam Olesker-Taylor and Luca Zanetti},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=kLiWXUdCEw}\n}", "github": "", "reviewers": "EfRM;ZKzd;ZrEV;M8kb", "pdf_size": 915590, "rating": "6;6;6;7", "confidence": "2;2;3;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;4;2;3", "wc_summary": "138;64;104;114", "wc_strengths": "48;45;116;67", "wc_weaknesses": "152;35;234;11", "wc_questions": "234;15;42;914", "wc_limitations": "109;1;4;14", "wc_review": "681;160;500;1120", "wc_reply_reviewers": "50;4;184;58", "wc_reply_authors": "0;0;315;0", "reply_reviewers": "1;1;2;1", "reply_authors": "1;1;3;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 105.0, 26.70205984563738 ], "wc_strengths_avg": [ 69.0, 28.416544476765644 ], "wc_weaknesses_avg": [ 108.0, 90.20809276334357 ], "wc_questions_avg": [ 301.25, 363.7082450261473 ], "wc_limitations_avg": [ 32.0, 44.715769030622745 ], "wc_review_avg": [ 615.25, 346.2768942623807 ], "wc_reply_reviewers_avg": [ 74.0, 66.76825593049439 ], "wc_reply_authors_avg": [ 78.75, 136.3990010960491 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14392143369696321405&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "warwick.ac.uk;", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "University of Warwick", "aff_unique_dep": "", "aff_unique_url": "https://www.warwick.ac.uk", "aff_unique_abbr": "Warwick", "aff_country_unique_index": "0", "aff_country_unique": "United Kingdom" }, { "title": "Fairness and Efficiency in Online Class Matching", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93903", "id": "kMAXN7HF6d", "proceeding": "", "pdf": "https://openreview.net/pdf?id=kMAXN7HF6d", "openreview": "https://openreview.net/forum?id=kMAXN7HF6d", "poster": "", "project": "", "author_site": "MohammadTaghi Hajiaghayi, Shayan Jahan, Mohammad Sharifi, Suho Shin, Max Springer", "tldr": "", "abstract": "The online bipartite matching problem, extensively studied in the literature, deals with the allocation of online arriving vertices (items) to a predetermined set of offline vertices (agents). However, little attention has been given to the concept of class fairness, where agents are categorized into different classes, and the matching algorithm must ensure equitable distribution across these classes.\n\nWe here focus on randomized algorithms for the fair matching of indivisible items, subject to various definitions of fairness. Our main contribution is the first (randomized) non-wasteful algorithm that simultaneously achieves a $1/2$ approximation to class envy-freeness (CEF) while simultaneously ensuring an equivalent approximation to the class proportionality (CPROP) and utilitarian social welfare (USW) objectives. We supplement this result by demonstrating that no non-wasteful algorithm can achieve an $\\alpha$-CEF guarantee for $\\alpha > 0.761$. In a similar vein, we provide a novel input instance for deterministic divisible matching that demonstrates a nearly tight CEF approximation.\n\nLastly, we define the ``price of fairness,\" which represents the trade-off between optimal and fair matching. We demonstrate that increasing the level of fairness in the approximation of the solution leads to a decrease in the objective of maximizing USW, following an inverse proportionality relationship.", "keywords": "online algorithm;matching;envy-free;fair allocation;fairness", "primary_area": "algorithmic_game_theory", "supplementary_material": "", "author": "MohammadTaghi Hajiaghayi;Shayan Chashm Jahan;Mohammad Sharifi;Suho Shin;Max Springer", "authorids": "~MohammadTaghi_Hajiaghayi1;~Shayan_Chashm_Jahan1;~Mohammad_Sharifi1;~Suho_Shin1;~Max_Springer1", "gender": "M;M;M;M;M", "homepage": "http://www.cs.umd.edu/~hajiagha/;;;https://suhoshin.github.io/;https://www.maxspringer.me", "dblp": "334/4488;336/6086.html;;218/5505;292/2716", "google_scholar": "https://scholar.google.com.tw/citations?user=SQ1eGN4AAAAJ;wVy-X9EAAAAJ;xjGWx2sAAAAJ;-p5eVQsAAAAJ;x9NBFhwAAAAJ", "orcid": "0000-0003-4842-0533;;;;0000-0001-9291-6574", "linkedin": "mohammad-hajiaghayi-2139a913a&ved=2ahUKEwjMyeH-5-_-AhV3K1kFHeeBDKwQjjh6BAgSEAE&usg=AOvVaw1NSVoT5FCGtOTi4eT8nr4b;shayan-chashm-jahan-32b4a2216/;;;mss423/", "or_profile": "~MohammadTaghi_Hajiaghayi1;~Shayan_Chashm_Jahan1;~Mohammad_Sharifi1;~Suho_Shin1;~Max_Springer1", "aff": "University of Maryland, College Park;University of Maryland, College Park;Sharif University of Technology, Sharif University of Technology;University of Maryland, College Park;University of Maryland, College Park", "aff_domain": "umd.edu;umd.edu;ce.sharif.edu;umd.edu;umd.edu", "position": "Full Professor;PhD student;Undergrad student;PhD student;PhD student", "bibtex": "@inproceedings{\nhajiaghayi2024fairness,\ntitle={Fairness and Efficiency in Online Class Matching},\nauthor={MohammadTaghi Hajiaghayi and Shayan Chashm Jahan and Mohammad Sharifi and Suho Shin and Max Springer},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=kMAXN7HF6d}\n}", "github": "", "reviewers": "h2C8;CL5G;gpQ6;yvhP", "pdf_size": 448894, "rating": "4;6;6;7", "confidence": "3;3;3;3", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "443;56;137;167", "wc_strengths": "120;68;99;84", "wc_weaknesses": "273;26;92;87", "wc_questions": "66;1;32;51", "wc_limitations": "2;1;1;4", "wc_review": "904;152;361;393", "wc_reply_reviewers": "117;0;92;10", "wc_reply_authors": "194;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 200.75, 145.63717760242403 ], "wc_strengths_avg": [ 92.75, 19.17517926904466 ], "wc_weaknesses_avg": [ 119.5, 92.35393873571392 ], "wc_questions_avg": [ 37.5, 24.27447218787671 ], "wc_limitations_avg": [ 2.0, 1.224744871391589 ], "wc_review_avg": [ 452.5, 276.61570815844857 ], "wc_reply_reviewers_avg": [ 54.75, 50.65261592455023 ], "wc_reply_authors_avg": [ 48.5, 84.00446416709055 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:cklP9WGXnx4J:scholar.google.com/&scioq=Fairness+and+Efficiency+in+Online+Class+Matching&hl=en&as_sdt=0,14", "gs_version_total": 3, "email": "umd.edu;umd.edu;ce.sharif.edu;umd.edu;umd.edu", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "University of Maryland;Sharif University of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www/umd.edu;https://www.sharif.edu", "aff_unique_abbr": "UMD;SUT", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "College Park;", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "United States;Iran" }, { "title": "DenseFormer: Enhancing Information Flow in Transformers via Depth Weighted Averaging", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93902", "id": "kMnoh7CXrq", "proceeding": "", "pdf": "https://openreview.net/pdf?id=kMnoh7CXrq", "openreview": "https://openreview.net/forum?id=kMnoh7CXrq", "poster": "", "project": "", "author_site": "Matteo Pagliardini, Amirkeivan Mohtashami, Fran\u00e7ois Fleuret, Martin Jaggi", "tldr": "", "abstract": "The transformer architecture by Vaswani et al. (2017) is now ubiquitous across application domains, from natural language processing to speech processing and image understanding. We propose DenseFormer, a simple modification to the standard architecture that improves the perplexity of the model without increasing its size---adding a few thousand parameters for large-scale models in the 100B parameters range. Our approach relies on an additional averaging step after each transformer block, which computes a weighted average of current and past representations---we refer to this operation as Depth-Weighted-Average (DWA). The learned DWA weights exhibit coherent patterns of information flow, revealing the strong and structured reuse of activations from distant layers. Experiments demonstrate that DenseFormer is more data efficient, reaching the same perplexity of much deeper transformer models, and that for the same perplexity, these new models outperform transformer baselines in terms of memory efficiency and inference time.", "keywords": "Transformer architecture;Large language models", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Matteo Pagliardini;Amirkeivan Mohtashami;Fran\u00e7ois Fleuret;Martin Jaggi", "authorids": "~Matteo_Pagliardini1;~Amirkeivan_Mohtashami1;~Fran\u00e7ois_Fleuret2;~Martin_Jaggi1", "gender": "M;M;M;M", "homepage": ";;https://mlo.epfl.ch;https://fleuret.org/francois/", "dblp": "140/7789;271/7873;17/4402;90/5265", "google_scholar": "https://scholar.google.ch/citations?user=FXacC3oAAAAJ;YT1udC0AAAAJ;https://scholar.google.ch/citations?user=r1TJBr8AAAAJ;https://scholar.google.ch/citations?user=Bj1tRlsAAAAJ", "orcid": ";;0000-0003-1579-5558;0000-0001-9457-7393", "linkedin": ";;;francois-fleuret/", "or_profile": "~Matteo_Pagliardini1;~Amirkeivan_Mohtashami1;~Martin_Jaggi1;~Francois_Fleuret1", "aff": "Swiss Federal Institute of Technology Lausanne;Swiss Federal Institute of Technology Lausanne;EPFL;University of Geneva", "aff_domain": "epfl.ch;epfl.ch;epfl.ch;unige.ch", "position": "PhD student;PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\npagliardini2024denseformer,\ntitle={DenseFormer: Enhancing Information Flow in Transformers via Depth Weighted Averaging},\nauthor={Matteo Pagliardini and Amirkeivan Mohtashami and Fran{\\c{c}}ois Fleuret and Martin Jaggi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=kMnoh7CXrq}\n}", "github": "", "reviewers": "Ljsg;EGgo;1YUF;VTZd", "pdf_size": 1203998, "rating": "4;7;7;7", "confidence": "4;4;4;4", "soundness": "2;3;3;3", "novelty": "2;3;4;4", "presentation": "3;4;4;4", "wc_summary": "75;86;80;86", "wc_strengths": "10;40;190;106", "wc_weaknesses": "13;168;622;46", "wc_questions": "102;69;70;102", "wc_limitations": "1;18;9;1", "wc_review": "201;381;971;341", "wc_reply_reviewers": "252;42;288;0", "wc_reply_authors": "109;21;222;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 81.75, 4.602988159880492 ], "wc_strengths_avg": [ 86.5, 69.11403620104964 ], "wc_weaknesses_avg": [ 212.25, 243.51219168657656 ], "wc_questions_avg": [ 85.75, 16.253845698787718 ], "wc_limitations_avg": [ 7.25, 7.013380069552769 ], "wc_review_avg": [ 473.5, 294.90464560599924 ], "wc_reply_reviewers_avg": [ 145.5, 126.02678286776981 ], "wc_reply_authors_avg": [ 88.0, 87.50714256562146 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1738013171565504162&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "epfl.ch;epfl.ch;epfl.ch;unige.ch", "author_num": 4, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Swiss Federal Institute of Technology Lausanne;EPFL;University of Geneva", "aff_unique_dep": ";;", "aff_unique_url": "https://www.epfl.ch;https://www.epfl.ch;https://www.unige.ch", "aff_unique_abbr": "EPFL;EPFL;UNIGE", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Lausanne;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Switzerland" }, { "title": "Rethinking 3D Convolution in $\\ell_p$-norm Space", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93901", "id": "kMxdV4Blhn", "proceeding": "", "pdf": "https://openreview.net/pdf?id=kMxdV4Blhn", "openreview": "https://openreview.net/forum?id=kMxdV4Blhn", "poster": "/media/PosterPDFs/NeurIPS%202024/93901.png?t=1731124041.183772", "project": "", "author_site": "Li Zhang, Yan Zhong, Jianan Wang, Zhe Min, RujingWang, Liu Liu", "tldr": "", "abstract": "Convolution is a fundamental operation in the 3D backbone. However, under certain conditions, the feature extraction ability of traditional convolution methods may be weakened. In this paper, we introduce a new convolution method based on $\\ell_p$-norm. \nFor theoretical support, we prove the universal approximation theorem for $\\ell_p$-norm based convolution, and analyze the robustness and feasibility of $\\ell_p$-norms in 3D point cloud tasks. Concretely, $\\ell_{\\infty}$-norm based convolution is prone to feature loss. $\\ell_2$-norm based convolution is essentially a linear transformation of the traditional convolution. $\\ell_1$-norm based convolution is an economical and effective feature extractor. We propose customized optimization strategies to accelerate the training process of $\\ell_1$-norm based Nets and enhance the performance. Besides, a theoretical guarantee is given for the convergence by \\textit{regret} argument. We apply our methods to classic networks and conduct related experiments. Experimental results indicate that our approach exhibits competitive performance with traditional CNNs, with lower energy consumption and instruction latency.", "keywords": "$\\ell_p$-norm; 3D Convolution", "primary_area": "learning_theory", "supplementary_material": "", "author": "Li Zhang;Yan Zhong;Jianan Wang;Zhe Min;RujingWang;Liu Liu", "authorids": "~Li_Zhang25;~Yan_Zhong2;~Jianan_Wang2;~Zhe_Min2;~RujingWang1;~Liu_Liu13", "gender": ";M;F;;M;M", "homepage": ";;https://scholar.google.com/citations?user=mt5mvZ8AAAAJ&hl=en;https://zhemincuhk.wixsite.com/minzhe;https://www.researchgate.net/scientific-contributions/Rujing-Wang-69690845;http://home.ustc.edu.cn/~liuliu66/", "dblp": ";81/5094-1.html;49/6053,;195/8919.html;99/263.html;74/7037-12", "google_scholar": ";;mt5mvZ8AAAAJ;https://scholar.google.com/citations?hl=en;;-_aPWUIAAAAJ", "orcid": ";0000-0003-0005-2620;;;;", "linkedin": ";;;;;", "or_profile": "~Li_Zhang25;~Yan_Zhong2;~Jianan_Wang2;~Zhe_Min2;~RujingWang1;~Liu_Liu13", "aff": ";Peking University;International Digital Economy Academy (IDEA);Shandong University;Chinese Academy Sciences;Hefei University of Technology", "aff_domain": ";pku.edu.cn;idea.edu.cn;sdu.edu.cn;iim.ac.cn;hfut.edu.cn", "position": ";PhD student;Researcher;Full Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nzhang2024rethinking,\ntitle={Rethinking 3D Convolution in \\${\\textbackslash}ell\\_p\\$-norm Space},\nauthor={Li Zhang and Yan Zhong and Jianan Wang and Zhe Min and RujingWang and Liu Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=kMxdV4Blhn}\n}", "github": "", "reviewers": "uudN;VAmn;dtmU;4SD1", "pdf_size": 1912010, "rating": "5;7;7;8", "confidence": "3;5;5;4", "soundness": "3;3;3;4", "novelty": "3;4;3;4", "presentation": "2;4;4;3", "wc_summary": "56;101;159;92", "wc_strengths": "31;51;67;94", "wc_weaknesses": "56;48;64;51", "wc_questions": "448;152;108;65", "wc_limitations": "15;9;1;4", "wc_review": "606;361;399;306", "wc_reply_reviewers": "49;26;0;60", "wc_reply_authors": "553;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.75, 1.0897247358851685 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 102.0, 36.966200778549045 ], "wc_strengths_avg": [ 60.75, 23.047505287991584 ], "wc_weaknesses_avg": [ 54.75, 6.057020719792859 ], "wc_questions_avg": [ 193.25, 150.26206274372782 ], "wc_limitations_avg": [ 7.25, 5.3091901453988255 ], "wc_review_avg": [ 418.0, 113.46585389446466 ], "wc_reply_reviewers_avg": [ 33.75, 23.025800746119558 ], "wc_reply_authors_avg": [ 138.25, 239.45602414639728 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.6225430174794673, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7259551735597560311&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": ";pku.edu.cn;idea.edu.cn;sdu.edu.cn;iim.ac.cn;hfut.edu.cn", "author_num": 6, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "Peking University;International Digital Economy Academy;Shandong University;Chinese Academy of Sciences;Hefei University of Technology", "aff_unique_dep": ";;;;", "aff_unique_url": "http://www.pku.edu.cn;;http://www.sdu.edu.cn;http://www.cas.cn;http://www.hfut.edu.cn/", "aff_unique_abbr": "Peking U;IDEA;SDU;CAS;HUT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China;" }, { "title": "This Too Shall Pass: Removing Stale Observations in Dynamic Bayesian Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93900", "id": "kN7GTUss0l", "proceeding": "", "pdf": "https://openreview.net/pdf?id=kN7GTUss0l", "openreview": "https://openreview.net/forum?id=kN7GTUss0l", "poster": "", "project": "", "author_site": "Anthony Bardou, Patrick Thiran, Giovanni Ranieri", "tldr": "", "abstract": "Bayesian Optimization (BO) has proven to be very successful at optimizing a static, noisy, costly-to-evaluate black-box function $f : \\mathcal{S} \\to \\mathbb{R}$. However, optimizing a black-box which is also a function of time (*i.e.*, a *dynamic* function) $f : \\mathcal{S} \\times \\mathcal{T} \\to \\mathbb{R}$ remains a challenge, since a dynamic Bayesian Optimization (DBO) algorithm has to keep track of the optimum over time. This changes the nature of the optimization problem in at least three aspects: (i) querying an arbitrary point in $\\mathcal{S} \\times \\mathcal{T}$ is impossible, (ii) past observations become less and less relevant for keeping track of the optimum as time goes by and (iii) the DBO algorithm must have a high sampling frequency so it can collect enough relevant observations to keep track of the optimum through time. In this paper, we design a Wasserstein distance-based criterion able to quantify the relevancy of an observation with respect to future predictions. Then, we leverage this criterion to build W-DBO, a DBO algorithm able to remove irrelevant observations from its dataset on the fly, thus maintaining simultaneously a good predictive performance and a high sampling frequency, even in continuous-time optimization tasks with unknown horizon. Numerical experiments establish the superiority of W-DBO, which outperforms state-of-the-art methods by a comfortable margin.", "keywords": "Dynamic Bayesian Optimization;Black-Box Optimization;Zeroth-Order Optimization", "primary_area": "probabilistic_methods", "supplementary_material": "/attachment/f61db0f508e21c0cfe72b8d3153a626e0e1a9856.zip", "author": "Anthony Bardou;Patrick Thiran;Giovanni Ranieri", "authorids": "~Anthony_Bardou1;~Patrick_Thiran1;~Giovanni_Ranieri1", "gender": "M;;M", "homepage": "https://abardou.github.io/;https://people.epfl.ch/patrick.thiran;https://flxinxout.github.io", "dblp": "306/6397;t/PThiran;", "google_scholar": "yVYolcEAAAAJ;https://scholar.google.ch/citations?user=7Ek7pqgAAAAJ;https://scholar.google.fr/citations?hl=fr", "orcid": ";;", "linkedin": ";;giovanni-ranieri-9010a3222/", "or_profile": "~Anthony_Bardou1;~Patrick_Thiran1;~Giovanni_Ranieri1", "aff": "EPFL - EPF Lausanne;EPFL;EPFL - EPF Lausanne", "aff_domain": "epfl.ch;epfl.ch;epfl.ch", "position": "Postdoc;Full Professor;Undergrad student", "bibtex": "@inproceedings{\nbardou2024this,\ntitle={This Too Shall Pass: Removing Stale Observations in Dynamic Bayesian Optimization},\nauthor={Anthony Bardou and Patrick Thiran and Giovanni Ranieri},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=kN7GTUss0l}\n}", "github": "", "reviewers": "iNzn;JgwK;mGHg;NVX3", "pdf_size": 3261184, "rating": "3;6;6;7", "confidence": "5;3;3;3", "soundness": "1;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "57;179;114;510", "wc_strengths": "41;168;99;123", "wc_weaknesses": "627;195;139;271", "wc_questions": "7;63;70;2", "wc_limitations": "4;1;4;10", "wc_review": "736;606;426;916", "wc_reply_reviewers": "450;30;51;80", "wc_reply_authors": "1806;0;0;0", "reply_reviewers": "2;1;1;1", "reply_authors": "4;1;1;1", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 215.0, 175.70287419390726 ], "wc_strengths_avg": [ 107.75, 45.811434162226355 ], "wc_weaknesses_avg": [ 308.0, 190.03946958461023 ], "wc_questions_avg": [ 35.5, 31.148836254345042 ], "wc_limitations_avg": [ 4.75, 3.2691742076555053 ], "wc_review_avg": [ 671.0, 179.2344832893492 ], "wc_reply_reviewers_avg": [ 152.75, 172.53314898882476 ], "wc_reply_authors_avg": [ 451.5, 782.0209396173481 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.9622504486493763, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:R_b3eVVIPAkJ:scholar.google.com/&scioq=This+Too+Shall+Pass:+Removing+Stale+Observations+in+Dynamic+Bayesian+Optimization&hl=en&as_sdt=0,33", "gs_version_total": 7, "email": "epfl.ch;epfl.ch;epfl.ch", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "EPFL", "aff_unique_dep": "", "aff_unique_url": "https://www.epfl.ch", "aff_unique_abbr": "EPFL", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Lausanne;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Switzerland" }, { "title": "Global Lyapunov functions: a long-standing open problem in mathematics, with symbolic transformers", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93899", "id": "kOMrm4ZJ3m", "proceeding": "", "pdf": "https://openreview.net/pdf?id=kOMrm4ZJ3m", "openreview": "https://openreview.net/forum?id=kOMrm4ZJ3m", "poster": "/media/PosterPDFs/NeurIPS%202024/93899.png?t=1731676061.5897448", "project": "", "author_site": "Alberto Alfarano, Francois Charton, Amaury Hayat", "tldr": "", "abstract": "Despite their spectacular progress, language models still struggle on complex reasoning tasks, such as advanced mathematics.\nWe consider a long-standing open problem in mathematics: discovering a Lyapunov function that ensures the global stability of a dynamical system. This problem has no known general solution, and algorithmic solvers only exist for some small polynomial systems.\nWe propose a new method for generating synthetic training samples from random solutions, and show that sequence-to-sequence transformers trained on such datasets perform better than algorithmic solvers and humans on polynomial systems, and can discover new Lyapunov functions for non-polynomial systems.", "keywords": "mathematics;Lyapunov;transformers;control;AI for science;AI for maths;reasoning", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Alberto Alfarano;Francois Charton;Amaury Hayat", "authorids": "~Alberto_Alfarano1;~Francois_Charton1;~Amaury_Hayat1", "gender": ";M;", "homepage": "https://www.linkedin.com/in/alberto-alfarano/;;http://cermics.enpc.fr/~hayata/index_en.html", "dblp": ";255/5318;", "google_scholar": "nVZQq1gAAAAJ;;0Zx6u9cAAAAJ", "orcid": ";;", "linkedin": "alberto-alfarano/;fran%C3%A7ois-charton-214187120/;", "or_profile": "~Alberto_Alfarano1;~Francois_Charton1;~Amaury_Hayat1", "aff": "Meta;Meta Facebook;Ecole des Ponts Paristech", "aff_domain": "facebook.com;fb.com;enpc.fr", "position": "Researcher;Research Engineer;Full Professor", "bibtex": "@inproceedings{\nalfarano2024global,\ntitle={Global Lyapunov functions: a long-standing open problem in mathematics, with symbolic transformers},\nauthor={Alberto Alfarano and Francois Charton and Amaury Hayat},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=kOMrm4ZJ3m}\n}", "github": "", "reviewers": "S4kD;f8Nq;uYdz;LBjn", "pdf_size": 465764, "rating": "3;5;6;8", "confidence": "5;4;4;3", "soundness": "2;3;3;4", "novelty": "1;3;3;4", "presentation": "2;3;2;4", "wc_summary": "97;98;122;103", "wc_strengths": "43;23;55;240", "wc_weaknesses": "291;28;570;149", "wc_questions": "327;36;31;251", "wc_limitations": "39;37;12;9", "wc_review": "797;222;790;752", "wc_reply_reviewers": "416;19;81;15", "wc_reply_authors": "394;0;54;16", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;2;2", "rating_avg": [ 5.5, 1.8027756377319946 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 1.0897247358851685 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 105.0, 10.074720839804943 ], "wc_strengths_avg": [ 90.25, 87.21059281990921 ], "wc_weaknesses_avg": [ 259.5, 201.9931929546142 ], "wc_questions_avg": [ 161.25, 130.557219256539 ], "wc_limitations_avg": [ 24.25, 13.808964479641476 ], "wc_review_avg": [ 640.25, 242.08301778522178 ], "wc_reply_reviewers_avg": [ 132.75, 165.6145751436147 ], "wc_reply_authors_avg": [ 116.0, 161.69724796668618 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.9805806756909202, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6879397419809633794&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "facebook.com;fb.com;enpc.fr", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Meta;Ecole des Ponts ParisTech", "aff_unique_dep": "Meta Platforms, Inc.;", "aff_unique_url": "https://meta.com;https://www.ponts.org", "aff_unique_abbr": "Meta;ENPC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;France" }, { "id": "kOp0kiXZ3a", "title": "Nearly Lossless Adaptive Bit Switching", "track": "main", "status": "Reject", "tldr": "", "abstract": "Model quantization is widely applied for compressing and accelerating deep neural networks (DNNs). However, conventional Quantization-aware training (QAT) focuses on training DNNs with uniform bit-width. The bit-width settings vary across different hardware and transmission demands, which induces considerable training and storage costs. Hence, the scheme of one-shot joint training multiple precisions is proposed to address this issue. Previous works either store a larger FP32 model to switch between different precision models for higher accuracy or store a smaller INT8 model but compromise accuracy due to using shared quantization parameters. In this paper, we introduce the ${\\bf Double Rounding}$ quantization method, which fully utilizes the quantized representation range to accomplish nearly lossless bit-switching while reducing storage by using the highest integer precision instead of full precision. Furthermore, we observe a competitive interference among different precisions during one-shot joint training, primarily due to inconsistent gradients of quantization scales during backward propagation. To tackle this problem, we propose an Adaptive Learning Rate Scaling (${\\bf ALRS}$) technique that dynamically adapts learning rates for various precisions to optimize the training process. Additionally, we extend our \\emph{Double Rounding} to one-shot mixed precision training and develop a Hessian-aware Stochastic Bit-switching (${\\bf HASB}$) strategy. Experimental results on the ImageNet-1K classification demonstrate that our methods have enough advantages to state-of-the-art one-shot joint QAT in both multi-precision and mixed-precision. Our codes are available at https://anonymous.4open.science/r/Double-Rounding-EF78/README.md.", "keywords": "Model Quantization;One-Shot Mixed-Precision;Multi-Preciison;Quantization-Aware Training", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Haiduo Huang;Zhenhua Liu;Tian Xia;Wenzhe zhao;Pengju Ren", "authorids": "~Haiduo_Huang1;~Zhenhua_Liu2;~Tian_Xia14;~Wenzhe_zhao2;~Pengju_Ren1", "gender": "M;M;M;M;M", "homepage": ";;;https://venturezhao.github.io;https://gr.xjtu.edu.cn/web/pengjuren", "dblp": "315/7717;02/1825-3.html;;;99/2460.html", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;bihqxP4AAAAJ;IHUKbtAAAAAJ;https://scholar.google.com.hk/citations?hl=en;qVYDbvIAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Haiduo_Huang1;~Zhenhua_Liu2;~Tian_Xia14;~Wenzhe_zhao2;~Pengju_Ren1", "aff": "AMD;Huawei Technologies Ltd.;Xi'an Jiaotong University;Xi'an Jiaotong University;Xi'an Jiaotong University", "aff_domain": "amd.com;huawei.com;xjtu.edu.cn;xjtu.edu.cn;xjtu.edu.cn", "position": "Intern;Researcher;Associate Professor;Assistant Professor;Full Professor", "bibtex": "@misc{\nanonymous2024nearly,\ntitle={Nearly Lossless Adaptive Bit Switching},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=kOp0kiXZ3a}\n}", "github": "", "project": "", "reviewers": "T2Zq;Aa14;bT7w;rFvS", "site": "https://openreview.net/forum?id=kOp0kiXZ3a", "pdf_size": 1325471, "rating": "5;5;5;5", "confidence": "5;3;3;4", "soundness": "2;3;3;3", "novelty": "2;2;2;2", "presentation": "2;3;3;3", "wc_summary": "121;29;126;65", "wc_strengths": "25;22;26;35", "wc_weaknesses": "243;102;207;149", "wc_questions": "5;46;27;169", "wc_limitations": "20;1;1;5", "wc_review": "414;200;387;423", "wc_reply_reviewers": "56;26;23;44", "wc_reply_authors": "174;208;171;171", "reply_reviewers": "1;1;1;1", "reply_authors": "3;3;3;3", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 85.25, 40.350805444253524 ], "wc_strengths_avg": [ 27.0, 4.847679857416329 ], "wc_weaknesses_avg": [ 175.25, 53.97395205096621 ], "wc_questions_avg": [ 61.75, 63.59785766832087 ], "wc_limitations_avg": [ 6.75, 7.8222439235810075 ], "wc_review_avg": [ 356.0, 91.03570728016562 ], "wc_reply_reviewers_avg": [ 37.25, 13.47915056670857 ], "wc_reply_authors_avg": [ 181.0, 15.636495771111889 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.0, 0.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:RWaoDOhHHLUJ:scholar.google.com/&scioq=Nearly+Lossless+Adaptive+Bit+Switching&hl=en&as_sdt=0,31", "gs_version_total": 4, "aff_unique_index": "0;1;2;2;2", "aff_unique_norm": "Advanced Micro Devices, Inc.;Huawei;Xi'an Jiao Tong University", "aff_unique_dep": ";Huawei Technologies;", "aff_unique_url": "https://www.amd.com;https://www.huawei.com;https://www.xjtu.edu.cn", "aff_unique_abbr": "AMD;Huawei;XJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1", "aff_country_unique": "United States;China" }, { "id": "kP92Fyc6ry", "title": "CryoBench: Diverse and challenging datasets for the heterogeneity problem in cryo-EM", "track": "Datasets & Benchmarks", "status": "Spotlight", "tldr": "", "abstract": "Cryo-electron microscopy (cryo-EM) is a powerful technique for determining high-resolution 3D biomolecular structures from imaging data. Its unique ability to capture structural variability has spurred the development of heterogeneous reconstruction algorithms that can infer distributions of 3D structures from noisy, unlabeled imaging data. Despite the growing number of advanced methods, progress in the field is hindered by the lack of standardized benchmarks with ground truth information and reliable validation metrics. Here, we introduce CryoBench, a suite of datasets, metrics, and benchmarks for heterogeneous reconstruction in cryo-EM. CryoBench includes five datasets representing different sources of heterogeneity and degrees of difficulty. These include conformational heterogeneity generated from designed motions of antibody complexes or sampled from a molecular dynamics simulation, as well as {compositional heterogeneity from mixtures of ribosome assembly states or 100 common complexes present in cells. We then analyze state-of-the-art heterogeneous reconstruction tools, including neural and non-neural methods, assess their sensitivity to noise, and propose new metrics for quantitative evaluation. We hope that CryoBench will be a foundational resource for accelerating algorithmic development and evaluation in the cryo-EM and machine learning communities. Project page: https://cryobench.cs.princeton.edu.", "keywords": "Cryo-EM;proteins;structural biology;3D reconstruction;datasets and benchmarks", "primary_area": "", "supplementary_material": "/attachment/7a1b77d36323db0930330f07bfad4ab1d77a6c6f.pdf", "author": "Minkyu Jeon;Rishwanth Raghu;Miro A. Astore;Geoffrey Woollard;J. Ryan Feathers;Alkin Kaz;Sonya M Hanson;Pilar Cossio;Ellen D Zhong", "authorids": "~Minkyu_Jeon3;~Rishwanth_Raghu1;~Miro_A._Astore1;~Geoffrey_Woollard1;~J._Ryan_Feathers1;~Alkin_Kaz1;~Sonya_M_Hanson1;~Pilar_Cossio1;~Ellen_D_Zhong1", "gender": "M;M;M;;M;;;;F", "homepage": "https://minkyujeon.github.io;https://rish-raghu.github.io/;https://miro-astore.github.io/;;;;;https://www.simonsfoundation.org/people/pilar-cossio/;https://www.cs.princeton.edu/~zhonge/", "dblp": "303/0461;385/3039;;;;;;;193/8335", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;_eIEG2EAAAAJ;;;;;;17tGlqAAAAAJ", "orcid": "0000-0003-0572-6065;0009-0006-6651-142X;;;0000-0002-4402-4659;;;;", "linkedin": ";rishwanth-raghu/;;;;alkin-kaz;;;", "or_profile": "~Minkyu_Jeon3;~Rishwanth_Raghu1;~Miro_A._Astore1;~Geoffrey_Woollard1;~J._Ryan_Feathers1;~Alkin_Kaz1;~Sonya_M_Hanson1;~Pilar_Cossio1;~Ellen_D_Zhong1", "aff": "Princeton University;Princeton University;Simons Foundation;;Princeton University;;;Flatiron institute;Princeton University", "aff_domain": "princeton.edu;princeton.edu;simonsfoundation.org;;princeton.edu;;;flatironinstitute.org;princeton.edu", "position": "PhD student;MS student;Postdoc;;Postdoc;;;Principal Researcher;Assistant Professor", "bibtex": "@inproceedings{\njeon2024cryobench,\ntitle={CryoBench: Datasets and Benchmarks for Heterogeneous Cryo-{EM} Reconstruction},\nauthor={Minkyu Jeon and Rishwanth Raghu and Miro A. Astore and Geoffrey Woollard and J. Ryan Feathers and Alkin Kaz and Sonya M Hanson and Pilar Cossio and Ellen D Zhong},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=kP92Fyc6ry}\n}", "github": "", "project": "", "reviewers": "8cUz;iw7S;eH5Y;D6Nq;1HSM", "site": "https://openreview.net/forum?id=kP92Fyc6ry", "pdf_size": 40120731, "rating": "6;7;8;8;9", "confidence": "4;4;4;5;3", "wc_summary_and_contributions": "24;17;110;121;27", "wc_strengths": "22;2;42;146;16", "wc_improvement": "41;2;16;118;19", "wc_limitations": "4;1;53;153;19", "wc_correctness": "1;1;59;2;1", "wc_clarity": "1;1;1;1;1", "wc_relation_to_prior_work": "1;1;1;1;14", "wc_documentation": "18;1;12;1;1", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "113;27;295;544;99", "wc_reply_reviewers": "0;0;10;23;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;1;1;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 7.6, 1.0198039027185568 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "wc_summary_and_contributions_avg": [ 59.8, 45.72701608458615 ], "wc_strengths_avg": [ 45.6, 51.82123117024527 ], "wc_improvement_avg": [ 39.2, 41.33473115915961 ], "wc_limitations_avg": [ 46.0, 56.59681969863678 ], "wc_correctness_avg": [ 12.8, 23.103246525109842 ], "wc_clarity_avg": [ 1.0, 0.0 ], "wc_relation_to_prior_work_avg": [ 3.6, 5.2 ], "wc_documentation_avg": [ 6.6, 7.116178749862878 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 215.6, 186.46350849428958 ], "wc_reply_reviewers_avg": [ 6.6, 9.068627239003707 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.31008683647302115, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17575292287030253962&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;1;0;2;0", "aff_unique_norm": "Princeton University;Simons Foundation;Flatiron Institute", "aff_unique_dep": ";;", "aff_unique_url": "https://www.princeton.edu;https://www.simonsfoundation.org;https://flatironinstitute.org", "aff_unique_abbr": "Princeton;Simons Foundation;Flatiron", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Chain of Thoughtlessness? An Analysis of CoT in Planning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93898", "id": "kPBEAZU5Nm", "proceeding": "", "pdf": "https://openreview.net/pdf?id=kPBEAZU5Nm", "openreview": "https://openreview.net/forum?id=kPBEAZU5Nm", "poster": "", "project": "", "author_site": "Kaya Stechly, Karthik Valmeekam, Subbarao Kambhampati", "tldr": "", "abstract": "Large language model (LLM) performance on reasoning problems typically does not generalize out of distribution. Previous work has claimed that this can be mitigated with chain of thought prompting--a method of demonstrating solution procedures--with the intuition that it is possible to in-context teach an LLM an algorithm for solving the problem.\nThis paper presents a case study of chain of thought on problems from Blocksworld, a classical planning domain, and examines the performance of two state-of-the-art LLMs across two axes: generality of examples given in prompt, and complexity of problems queried with each prompt. While our problems are very simple, we only find meaningful performance improvements from chain of thought prompts when those prompts are exceedingly specific to their problem class, and that those improvements quickly deteriorate as the size n of the query-specified stack grows past the size of stacks shown in the examples.\nWe also create scalable variants of three domains commonly studied in previous CoT papers and demonstrate the existence of similar failure modes.\nOur results hint that, contrary to previous claims in the literature, CoT's performance improvements do not stem from the model learning general algorithmic procedures via demonstrations but depend on carefully engineering highly problem specific prompts. This spotlights drawbacks of chain of thought, especially the sharp tradeoff between possible performance gains and the amount of human labor necessary to generate examples with correct reasoning traces.", "keywords": "LLMs;Planning;Reasoning;Chain of Thought", "primary_area": "other", "supplementary_material": "", "author": "Kaya Stechly;Karthik Valmeekam;Subbarao Kambhampati", "authorids": "~Kaya_Stechly1;~Karthik_Valmeekam1;~Subbarao_Kambhampati1", "gender": ";M;M", "homepage": "https://kstechly.github.io/;;http://rakaposhi.eas.asu.edu", "dblp": ";279/2957;k/SKambhampati", "google_scholar": "BUT7cR0AAAAJ;CrYLDt4AAAAJ;yl3L07sAAAAJ", "orcid": ";;", "linkedin": "kaya-stechly-327729236;;", "or_profile": "~Kaya_Stechly1;~Karthik_Valmeekam1;~Subbarao_Kambhampati1", "aff": "Arizona State University;Arizona State University;Arizona State University", "aff_domain": "asu.edu;asu.edu;asu.edu", "position": "MS student;PhD student;Full Professor", "bibtex": "@inproceedings{\nstechly2024chain,\ntitle={Chain of Thoughtlessness? An Analysis of CoT in Planning},\nauthor={Kaya Stechly and Karthik Valmeekam and Subbarao Kambhampati},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=kPBEAZU5Nm}\n}", "github": "", "reviewers": "g5xe;4aaa;x4qq;zEQi", "pdf_size": 2869003, "rating": "4;5;7;8", "confidence": "4;5;4;4", "soundness": "1;2;3;4", "novelty": "1;3;3;3", "presentation": "1;3;4;4", "wc_summary": "108;36;134;164", "wc_strengths": "97;41;65;136", "wc_weaknesses": "458;72;206;17", "wc_questions": "1;185;167;109", "wc_limitations": "1;7;33;1", "wc_review": "665;341;605;427", "wc_reply_reviewers": "54;264;55;122", "wc_reply_authors": "174;545;0;0", "reply_reviewers": "1;2;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 6.0, 1.5811388300841898 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 1.118033988749895 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 3.0, 1.224744871391589 ], "wc_summary_avg": [ 110.5, 47.35768153108849 ], "wc_strengths_avg": [ 84.75, 35.639689953758015 ], "wc_weaknesses_avg": [ 188.25, 170.2356822173307 ], "wc_questions_avg": [ 115.5, 71.82443873780011 ], "wc_limitations_avg": [ 10.5, 13.219304066402286 ], "wc_review_avg": [ 509.5, 130.86156807863796 ], "wc_reply_reviewers_avg": [ 123.75, 85.53471517460031 ], "wc_reply_authors_avg": [ 179.75, 222.52008336327756 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.36514837167011077, "gs_citation": 47, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11997973918885536848&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "asu.edu;asu.edu;asu.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Arizona State University", "aff_unique_dep": "", "aff_unique_url": "https://www.asu.edu", "aff_unique_abbr": "ASU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Solving Sparse \\& High-Dimensional-Output Regression via Compression", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93897", "id": "kPGNE4CrTq", "proceeding": "", "pdf": "https://openreview.net/pdf?id=kPGNE4CrTq", "openreview": "https://openreview.net/forum?id=kPGNE4CrTq", "poster": "/media/PosterPDFs/NeurIPS%202024/93897.png?t=1731465004.1455724", "project": "", "author_site": "Renyuan Li, Zhehui Chen, Guanyi Wang", "tldr": "", "abstract": "Multi-Output Regression (MOR) has been widely used in scientific data analysis for decision-making. Unlike traditional regression models, MOR aims to simultaneously predict multiple real-valued outputs given an input. However, the increasing dimensionality of the outputs poses significant challenges regarding interpretability and computational scalability for modern MOR applications. As a first step to address these challenges, this paper proposes a Sparse \\& High-dimensional-Output REgression (SHORE) model by incorporating additional sparsity requirements to resolve the output interpretability, and then designs a computationally efficient two-stage optimization framework capable of solving SHORE with provable accuracy via compression on outputs. Theoretically, we show that the proposed framework is computationally scalable while maintaining the same order of training loss and prediction loss before-and-after compression under arbitrary or relatively weak sample set conditions. Empirically, numerical results further validate the theoretical findings, showcasing the efficiency and accuracy of the proposed framework.", "keywords": "multi-output regression;sparsity;compression;non-convex optimization", "primary_area": "learning_theory", "supplementary_material": "/attachment/16bcb72acc677a16093f604b8b666e0bc4f7f043.zip", "author": "Renyuan Li;Zhehui Chen;Guanyi Wang", "authorids": "~Renyuan_Li1;~Zhehui_Chen1;~Guanyi_Wang1", "gender": ";M;M", "homepage": "https://renyuanli.github.io/;https://sites.google.com/view/zhehuichen/home;https://sites.google.com/view/guanyiwang", "dblp": "119/4639;195/6300;", "google_scholar": ";2lvIrNAAAAAJ;EmqEodUAAAAJ", "orcid": "0009-0002-0021-1601;;", "linkedin": ";zhehui-chen-366551105/;", "or_profile": "~Renyuan_Li1;~Zhehui_Chen1;~Guanyi_Wang1", "aff": "National University of Singapore;Google;National University of Singapore", "aff_domain": "u.nus.edu;google.com;nus.edu.sg", "position": "PhD student;Software Engineer;Assistant Professor", "bibtex": "@inproceedings{\nli2024solving,\ntitle={Solving Sparse {\\textbackslash}\\& High-Dimensional-Output Regression via Compression},\nauthor={Renyuan Li and Zhehui Chen and Guanyi Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=kPGNE4CrTq}\n}", "github": "", "reviewers": "hQ2t;PtcT;vYuH", "pdf_size": 1270226, "rating": "6;7;7", "confidence": "2;4;3", "soundness": "4;3;3", "novelty": "3;3;2", "presentation": "3;3;3", "wc_summary": "80;60;52", "wc_strengths": "9;70;45", "wc_weaknesses": "129;197;251", "wc_questions": "2;66;145", "wc_limitations": "1;100;1", "wc_review": "221;493;494", "wc_reply_reviewers": "12;47;22", "wc_reply_authors": "33;23;28", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 64.0, 11.775681155103795 ], "wc_strengths_avg": [ 41.333333333333336, 25.037749277618563 ], "wc_weaknesses_avg": [ 192.33333333333334, 49.91548412622635 ], "wc_questions_avg": [ 71.0, 58.48646567084274 ], "wc_limitations_avg": [ 34.0, 46.66904755831214 ], "wc_review_avg": [ 402.6666666666667, 128.4583806357357 ], "wc_reply_reviewers_avg": [ 27.0, 14.719601443879744 ], "wc_reply_authors_avg": [ 28.0, 4.08248290463863 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:FLoJkRf42fMJ:scholar.google.com/&scioq=Solving+Sparse+%5C%26+High-Dimensional-Output+Regression+via+Compression&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "u.nus.edu;google.com;nus.edu.sg", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "National University of Singapore;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.nus.edu.sg;https://www.google.com", "aff_unique_abbr": "NUS;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Singapore;United States" }, { "title": "Vitron: A Unified Pixel-level Vision LLM for Understanding, Generating, Segmenting, Editing", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93896", "id": "kPmSfhCM5s", "proceeding": "", "pdf": "https://openreview.net/pdf?id=kPmSfhCM5s", "openreview": "https://openreview.net/forum?id=kPmSfhCM5s", "poster": "/media/PosterPDFs/NeurIPS%202024/93896.png?t=1729877662.5402112", "project": "", "author_site": "Hao Fei, Shengqiong Wu, Hanwang Zhang, Tat-Seng Chua, Shuicheng Yan", "tldr": "", "abstract": "Recent developments of vision large language models (LLMs) have seen remarkable progress, yet still encounter challenges towards multimodal generalists, such as coarse-grained instance-level understanding, lack of unified support for both images and videos, and insufficient coverage across various vision tasks. In this paper we present Vitron, a universal pixel-level vision LLM designed for comprehensive understanding, generating, segmenting, and editing of both static images and dynamic videos. Building on top of an LLM backbone, Vitron incorporates encoders for images, videos, and pixel-level regional visuals within its frontend modules, while employing state-of-the-art visual specialists as its backend, via which Vitron supports a spectrum of vision end tasks, spanning visual comprehension to visual generation, from low level to high level. To ensure an effective and precise message passing from LLM to backend modules for function invocation, we propose a novel hybrid method by simultaneously integrating discrete textual instructions and continuous signal embeddings. Further, we design various pixel-level spatiotemporal vision-language alignment learning for Vitron to reach the best fine-grained visual capability. Finally, a cross-task synergy module is advised to learn to maximize the task-invariant fine-grained visual features, enhancing the synergy between different visual tasks. Demonstrated over 12 visual tasks and evaluated across 22 datasets, Vitron showcases its extensive capabilities in the four main vision task clusters. Overall, this work illuminates the great potential of developing a more unified multimodal generalist.", "keywords": "Multimodal Large Language Model;Unified Large Language Model", "primary_area": "human-AI_interaction", "supplementary_material": "", "author": "Hao Fei;Shengqiong Wu;Hanwang Zhang;Tat-Seng Chua;Shuicheng YAN", "authorids": "~Hao_Fei1;~Shengqiong_Wu2;~Hanwang_Zhang3;~Tat-Seng_Chua2;~Shuicheng_YAN3", "gender": "M;F;M;M;M", "homepage": "https://haofei.vip/;https://chocowu.github.io/;https://mreallab.github.io/index.html;https://yanshuicheng.ai/;http://www.comp.nus.edu.sg/~chuats/", "dblp": "81/3569-1;274/7191;79/8116.html;y/ShuichengYan;", "google_scholar": "YGDX46AAAAAJ;RJJLKR0AAAAJ;YG0DFyYAAAAJ;https://scholar.google.com.hk/citations?user=DNuiPHwAAAAJ;https://scholar.google.com.tw/citations?user=Z9DWCBEAAAAJ", "orcid": "0000-0003-3026-6347;0000-0001-6192-1194;;;0000-0001-6097-7807", "linkedin": ";;;;", "or_profile": "~Hao_Fei1;~Shengqiong_Wu2;~Hanwang_Zhang3;~Shuicheng_YAN3;~Tat-seng_Chua1", "aff": "National University of Singapore;National University of Singapore;Nanyang Technological University;sea Group;National University of Singapore", "aff_domain": "nus.edu.sg;u.nus.edu;ntu.edu.sg;sea.com;nus.edu.sg", "position": "Postdoc;PhD student;Associate Professor;Researcher;Full Professor", "bibtex": "@inproceedings{\nfei2024vitron,\ntitle={Vitron: A Unified Pixel-level Vision {LLM} for Understanding, Generating, Segmenting, Editing},\nauthor={Hao Fei and Shengqiong Wu and Hanwang Zhang and Tat-Seng Chua and Shuicheng YAN},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=kPmSfhCM5s}\n}", "github": "", "reviewers": "BLKJ;FR5p;vBVg;d2kF", "pdf_size": 11641433, "rating": "5;6;8;8", "confidence": "5;4;4;5", "soundness": "2;4;3;4", "novelty": "2;4;4;4", "presentation": "2;4;3;4", "wc_summary": "88;59;49;151", "wc_strengths": "79;40;50;246", "wc_weaknesses": "607;49;81;146", "wc_questions": "4;3;67;112", "wc_limitations": "4;7;36;1", "wc_review": "782;158;283;656", "wc_reply_reviewers": "94;0;47;24", "wc_reply_authors": "730;0;246;34", "reply_reviewers": "1;0;1;1", "reply_authors": "4;1;2;2", "rating_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.5, 0.8660254037844386 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 86.75, 39.76414842543469 ], "wc_strengths_avg": [ 103.75, 83.36778454535062 ], "wc_weaknesses_avg": [ 220.75, 225.72369724953558 ], "wc_questions_avg": [ 46.5, 45.85029988996801 ], "wc_limitations_avg": [ 12.0, 14.017845768876187 ], "wc_review_avg": [ 469.75, 257.0276006579838 ], "wc_reply_reviewers_avg": [ 41.25, 34.694199803425356 ], "wc_reply_authors_avg": [ 252.5, 291.3533078583458 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.19245008972987526, "gs_citation": 49, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7827261160887712440&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "nus.edu.sg;u.nus.edu;ntu.edu.sg;sea.com;nus.edu.sg", "author_num": 5, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "National University of Singapore;Nanyang Technological University;Sea Group", "aff_unique_dep": ";;", "aff_unique_url": "https://www.nus.edu.sg;https://www.ntu.edu.sg;", "aff_unique_abbr": "NUS;NTU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Singapore;" }, { "title": "QGFN: Controllable Greediness with Action Values", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93895", "id": "kQ9LgM2JQT", "proceeding": "", "pdf": "https://openreview.net/pdf?id=kQ9LgM2JQT", "openreview": "https://openreview.net/forum?id=kQ9LgM2JQT", "poster": "", "project": "", "author_site": "Elaine Lau, Stephen Lu, Ling Pan, Doina Precup, Emmanuel Bengio", "tldr": "", "abstract": "Generative Flow Networks (GFlowNets; GFNs) are a family of energy-based generative methods for combinatorial objects, capable of generating diverse and high-utility samples. However, consistently biasing GFNs towards producing high-utility samples is non-trivial. In this work, we leverage connections between GFNs and reinforcement learning (RL) and propose to combine the GFN policy with an action-value estimate, $Q$, to create greedier sampling policies which can be controlled by a mixing parameter. We show that several variants of the proposed method, QGFN, are able to improve on the number of high-reward samples generated in a variety of tasks without sacrificing diversity.", "keywords": "GFlowNets;generative models;molecule design", "primary_area": "generative_models", "supplementary_material": "/attachment/c53465f098891b8062244cda47c3e3f40cef60e5.zip", "author": "Elaine Lau;Stephen Zhewen Lu;Ling Pan;Doina Precup;Emmanuel Bengio", "authorids": "~Elaine_Lau1;~Stephen_Zhewen_Lu1;~Ling_Pan1;~Doina_Precup1;~Emmanuel_Bengio1", "gender": "F;M;F;F;M", "homepage": ";https://matrixmaster.me/;https://ling-pan.github.io/;http://cs.mcgill.ca/~dprecup/;http://folinoid.com", "dblp": ";;199/9303/;p/DoinaPrecup;137/8040", "google_scholar": "jC63xPkAAAAJ;cwBga44AAAAJ;qZ_zlacAAAAJ;https://scholar.google.com.tw/citations?user=j54VcVEAAAAJ;https://scholar.google.ca/citations?user=yVtSOt8AAAAJ", "orcid": ";0009-0005-2401-0793;;;", "linkedin": "yunglau/;stephenzlu/;;;", "or_profile": "~Elaine_Lau1;~Stephen_Zhewen_Lu1;~Ling_Pan1;~Doina_Precup1;~Emmanuel_Bengio1", "aff": "Mila - Quebec Artificial Intelligence Institute;McGill University;Montreal Institute for Learning Algorithms (MILA);McGill University;Valence Labs powered by recursion", "aff_domain": "mila.quebec;mcgill.ca;mila.umontreal.ca;mcgill.ca;valencelabs.com", "position": "MS student;Undergrad student;Postdoc;Associate Professor;Researcher", "bibtex": "@inproceedings{\nlau2024qgfn,\ntitle={{QGFN}: Controllable Greediness with Action Values},\nauthor={Elaine Lau and Stephen Zhewen Lu and Ling Pan and Doina Precup and Emmanuel Bengio},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=kQ9LgM2JQT}\n}", "github": "", "reviewers": "bgQq;eVPz;Fo8k", "pdf_size": 1178596, "rating": "4;5;7", "confidence": "5;3;4", "soundness": "2;3;3", "novelty": "2;3;3", "presentation": "2;3;4", "wc_summary": "109;91;80", "wc_strengths": "124;76;73", "wc_weaknesses": "452;227;6", "wc_questions": "145;2;82", "wc_limitations": "29;2;9", "wc_review": "859;398;250", "wc_reply_reviewers": "357;639;11", "wc_reply_authors": "47;1291;0", "reply_reviewers": "1;3;1", "reply_authors": "2;3;1", "rating_avg": [ 5.333333333333333, 1.247219128924647 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 93.33333333333333, 11.953614051360738 ], "wc_strengths_avg": [ 91.0, 23.366642891095847 ], "wc_weaknesses_avg": [ 228.33333333333334, 182.08117847695175 ], "wc_questions_avg": [ 76.33333333333333, 58.516854172299986 ], "wc_limitations_avg": [ 13.333333333333334, 11.440668201153676 ], "wc_review_avg": [ 502.3333333333333, 259.33804622966954 ], "wc_reply_reviewers_avg": [ 335.6666666666667, 256.82332881228336 ], "wc_reply_authors_avg": [ 446.0, 597.8132372795593 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.9428090415820634 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.3273268353539886, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16895341509739628246&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "mila.quebec;mcgill.ca;mila.umontreal.ca;mcgill.ca;valencelabs.com", "author_num": 5, "aff_unique_index": "0;1;2;1;3", "aff_unique_norm": "Quebec Artificial Intelligence Institute;McGill University;Montreal Institute for Learning Algorithms;Valence Labs", "aff_unique_dep": "Artificial Intelligence;;Artificial Intelligence;", "aff_unique_url": "https://mila.quebec;https://www.mcgill.ca;https://mila.quebec;", "aff_unique_abbr": "Mila;McGill;MILA;", "aff_campus_unique_index": "1", "aff_campus_unique": ";Montreal", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Canada;" }, { "title": "Advancing Spiking Neural Networks for Sequential Modeling with Central Pattern Generators", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93894", "id": "kQMyiDWbOG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=kQMyiDWbOG", "openreview": "https://openreview.net/forum?id=kQMyiDWbOG", "poster": "/media/PosterPDFs/NeurIPS%202024/93894.png?t=1729752577.3460462", "project": "", "author_site": "Changze Lv, Dongqi Han, Yansen Wang, Xiaoqing Zheng, Xuanjing Huang, Dongsheng Li", "tldr": "", "abstract": "Spiking neural networks (SNNs) represent a promising approach to developing artificial neural networks that are both energy-efficient and biologically plausible.\nHowever, applying SNNs to sequential tasks, such as text classification and time-series forecasting, has been hindered by the challenge of creating an effective and hardware-friendly spike-form positional encoding (PE) strategy.\nDrawing inspiration from the central pattern generators (CPGs) in the human brain, which produce rhythmic patterned outputs without requiring rhythmic inputs, we propose a novel PE technique for SNNs, termed CPG-PE.\nWe demonstrate that the commonly used sinusoidal PE is mathematically a specific solution to the membrane potential dynamics of a particular CPG.\nMoreover, extensive experiments across various domains, including time-series forecasting, natural language processing, and image classification, show that SNNs with CPG-PE outperform their conventional counterparts.\nAdditionally, we perform analysis experiments to elucidate the mechanism through which SNNs encode positional information and to explore the function of CPGs in the human brain.\nThis investigation may offer valuable insights into the fundamental principles of neural computation.", "keywords": "Spiking Neural Networks;Central Pattern Generators;Positional Encoding", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "/attachment/f7c9aa40426d0a22968f925000e80da246c8f0f8.zip", "author": "Changze Lv;Dongqi Han;Yansen Wang;Xiaoqing Zheng;Xuanjing Huang;Dongsheng Li", "authorids": "~Changze_Lv1;~Dongqi_Han1;~Yansen_Wang2;~Xiaoqing_Zheng2;~Xuanjing_Huang1;~Dongsheng_Li2", "gender": "M;M;;;F;M", "homepage": "https://lvchangze.github.io;https://frosthan.github.io/;;;https://xuanjing-huang.github.io/;http://recmind.cn", "dblp": "350/4445;;;;05/6735-1;254/0830-2.html", "google_scholar": "t3-viUwAAAAJ;3V_9fRUAAAAJ;;;RGsMgZA4H78C;VNg5rA8AAAAJ", "orcid": ";0000-0002-6872-7121;;;0000-0001-9197-9426;0000-0003-3103-8442", "linkedin": ";;;;;", "or_profile": "~Changze_Lv1;~Dongqi_Han1;~Yansen_Wang2;~Xiaoqing_Zheng2;~Xuanjing_Huang1;~Dongsheng_Li2", "aff": "Fudan University;Microsoft;;;Fudan University;Microsoft Research Asia", "aff_domain": "fudan.edu.cn;microsoft.com;;;fudan.edu.cn;microsoft.com", "position": "PhD student;Researcher;;;Full Professor;Principal Researcher", "bibtex": "@inproceedings{\nlv2024advancing,\ntitle={Advancing Spiking Neural Networks for Sequential Modeling with Central Pattern Generators},\nauthor={Changze Lv and Dongqi Han and Yansen Wang and Xiaoqing Zheng and Xuanjing Huang and Dongsheng Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=kQMyiDWbOG}\n}", "github": "", "reviewers": "s3xa;P6YU;Pjez;3Lmi", "pdf_size": 1297472, "rating": "6;6;7;7", "confidence": "5;4;5;5", "soundness": "3;2;4;3", "novelty": "3;2;4;3", "presentation": "3;2;3;3", "wc_summary": "63;57;54;64", "wc_strengths": "79;76;63;69", "wc_weaknesses": "47;178;129;48", "wc_questions": "4;7;208;24", "wc_limitations": "30;1;6;9", "wc_review": "223;319;460;214", "wc_reply_reviewers": "20;28;51;38", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 59.5, 4.153311931459037 ], "wc_strengths_avg": [ 71.75, 6.219927652312364 ], "wc_weaknesses_avg": [ 100.5, 55.76064920712455 ], "wc_questions_avg": [ 60.75, 85.3562387878004 ], "wc_limitations_avg": [ 11.5, 11.05667219374799 ], "wc_review_avg": [ 304.0, 99.02272466459404 ], "wc_reply_reviewers_avg": [ 34.25, 11.583932838203095 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:SZB3pZHs1w0J:scholar.google.com/&scioq=Advancing+Spiking+Neural+Networks+for+Sequential+Modeling+with+Central+Pattern+Generators&hl=en&as_sdt=0,33", "gs_version_total": 7, "email": "fudan.edu.cn;microsoft.com;;;fudan.edu.cn;microsoft.com", "author_num": 6, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "Fudan University;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.fudan.edu.cn;https://www.microsoft.com", "aff_unique_abbr": "Fudan;Microsoft", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "China;United States" }, { "title": "Synthetic Programming Elicitation for Text-to-Code in Very Low-Resource Programming and Formal Languages", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93893", "id": "kQPzFiwVIu", "proceeding": "", "pdf": "https://openreview.net/pdf?id=kQPzFiwVIu", "openreview": "https://openreview.net/forum?id=kQPzFiwVIu", "poster": "", "project": "", "author_site": "Federico Mora, Justin Wong, Haley Lepe, Sahil Bhatia, Karim Elmaaroufi, George Varghese, Joseph Gonzalez, Elizabeth Polgreen, Sanjit Seshia", "tldr": "", "abstract": "Recent advances in large language models (LLMs) for code applications have demonstrated remarkable zero-shot fluency and instruction following on challenging code related tasks ranging from test case generation to self-repair. Unsurprisingly, however, models struggle to compose syntactically valid programs in programming languages unrepresented in pre-training, referred to as very low-resource Programming Languages (VLPLs). VLPLs appear in crucial settings, including domain-specific languages for internal tools, tool-chains for legacy languages, and formal verification frameworks. Inspired by a technique called natural programming elicitation, we propose designing an intermediate language that LLMs ``naturally'' know how to use and which can be automatically compiled to a target VLPL. When LLMs generate code that lies outside of this intermediate language, we use compiler techniques to repair the code into programs in the intermediate language. Overall, we introduce _synthetic programming elicitation and compilation_ (SPEAC), an approach that enables LLMs to generate syntactically valid code even for VLPLs. We empirically evaluate the performance of SPEAC in a case study for the UCLID5 formal verification language and find that, compared to existing retrieval and fine-tuning baselines, SPEAC produces syntactically correct programs more frequently and without sacrificing semantic correctness.", "keywords": "Text-to-Code;Low-Resource Programming Languages;MAX-SAT;Parsing;Program Repair", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "/attachment/4ee03c6902936a232b37e15b803ce88c0751d69c.zip", "author": "Federico Mora;Justin Wong;Haley Lepe;Sahil Bhatia;Karim Elmaaroufi;George Varghese;Joseph E. Gonzalez;Elizabeth Polgreen;Sanjit A. Seshia", "authorids": "~Federico_Mora1;~Justin_Wong1;~Haley_Lepe1;~Sahil_Bhatia3;~Karim_Elmaaroufi1;~George_Varghese1;~Joseph_E._Gonzalez1;~Elizabeth_Polgreen2;~Sanjit_A._Seshia1", "gender": "M;M;F;;M;M;M;F;", "homepage": "https://federico.morarocha.ca/;https://people.eecs.berkeley.edu/~wong.justin/;;;https://kael.tech.blog/;http://web.cs.ucla.edu/~varghese/;http://eecs.berkeley.edu/~jegonzal;https://polgreen.github.io/;", "dblp": "82/5093-2;;;;229/1913.html;;61/8262;https://dblp.uni-trier.de/pid/183/7353.html;", "google_scholar": "mm91pq0AAAAJ;l49M8zUAAAAJ;;;;SEnY0XQAAAAJ;https://scholar.google.com.tw/citations?user=gM2WW9UAAAAJ;wFYhUkIAAAAJ;", "orcid": ";;;;;;0000-0003-2921-956X;0000-0001-9032-7661;", "linkedin": ";justin-wong-23155411b;haleylepe;;;;;;", "or_profile": "~Federico_Mora1;~Justin_Wong1;~Haley_Lepe1;~Sahil_Bhatia3;~Karim_Elmaaroufi1;~George_Varghese1;~Joseph_E._Gonzalez1;~Elizabeth_Polgreen2;~Sanjit_A._Seshia1", "aff": "University of California, Berkeley;Meta Facebook;MiraCosta College;;University of California, Berkeley;University of California, Los Angeles;University of California, Berkeley;University of Edinburgh, University of Edinburgh;", "aff_domain": "berkeley.edu;meta.com;miracosta.edu;;berkeley.edu;ucla.edu;berkeley.edu;ed.ac.uk;", "position": "PhD student;Researcher;Undergrad student;;PhD student;Full Professor;Associate Professor;Assistant Professor;", "bibtex": "@inproceedings{\nmora2024synthetic,\ntitle={Synthetic Programming Elicitation for Text-to-Code in Very Low-Resource Programming and Formal Languages},\nauthor={Federico Mora and Justin Wong and Haley Lepe and Sahil Bhatia and Karim Elmaaroufi and George Varghese and Joseph E. Gonzalez and Elizabeth Polgreen and Sanjit A. Seshia},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=kQPzFiwVIu}\n}", "github": "", "reviewers": "9mNj;9eZc;6oM2;tGyy", "pdf_size": 341900, "rating": "4;5;7;8", "confidence": "4;2;4;4", "soundness": "2;3;2;4", "novelty": "2;2;3;3", "presentation": "1;3;3;4", "wc_summary": "93;131;170;127", "wc_strengths": "32;65;9;48", "wc_weaknesses": "31;92;21;42", "wc_questions": "95;159;207;37", "wc_limitations": "1;3;3;36", "wc_review": "252;450;410;290", "wc_reply_reviewers": "67;0;52;33", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 1.5811388300841898 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 130.25, 27.288962970402523 ], "wc_strengths_avg": [ 38.5, 20.645822822062577 ], "wc_weaknesses_avg": [ 46.5, 27.299267389437397 ], "wc_questions_avg": [ 124.5, 64.27091099401035 ], "wc_limitations_avg": [ 10.75, 14.600941750448838 ], "wc_review_avg": [ 350.5, 81.8581089446855 ], "wc_reply_reviewers_avg": [ 38.0, 25.029982021567655 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.36514837167011077, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11931233105124385006&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "berkeley.edu;meta.com;miracosta.edu;;berkeley.edu;ucla.edu;berkeley.edu;ed.ac.uk;", "author_num": 9, "aff_unique_index": "0;1;2;0;3;0;4", "aff_unique_norm": "University of California, Berkeley;Meta;MiraCosta College;University of California, Los Angeles;University of Edinburgh", "aff_unique_dep": ";Meta Platforms, Inc.;;;", "aff_unique_url": "https://www.berkeley.edu;https://meta.com;https://www.miracosta.edu;https://www.ucla.edu;https://www.ed.ac.uk", "aff_unique_abbr": "UC Berkeley;Meta;;UCLA;Edinburgh", "aff_campus_unique_index": "0;0;2;0", "aff_campus_unique": "Berkeley;;Los Angeles", "aff_country_unique_index": "0;0;0;0;0;0;1", "aff_country_unique": "United States;United Kingdom" }, { "title": "Identifying Latent State-Transition Processes for Individualized Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93892", "id": "kREpCQtHdN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=kREpCQtHdN", "openreview": "https://openreview.net/forum?id=kREpCQtHdN", "poster": "/media/PosterPDFs/NeurIPS%202024/93892.png?t=1731264782.8035228", "project": "", "author_site": "Yuewen Sun, Biwei Huang, Yu Yao, Donghuo Zeng, Xinshuai Dong, Songyao Jin, Boyang Sun, Roberto Legaspi, Kazushi Ikeda, Peter Spirtes, Kun Zhang", "tldr": "", "abstract": "The application of reinforcement learning (RL) involving interactions with individuals has grown significantly in recent years. These interactions, influenced by factors such as personal preferences and physiological differences, causally influence state transitions, ranging from health conditions in healthcare to learning progress in education. As a result, different individuals may exhibit different state-transition processes. Understanding individualized state-transition processes is essential for optimizing individualized policies. In practice, however, identifying these state-transition processes is challenging, as individual-specific factors often remain latent. In this paper, we establish the identifiability of these latent factors and introduce a practical method that effectively learns these processes from observed state-action trajectories. Experiments on various datasets show that the proposed method can effectively identify latent state-transition processes and facilitate the learning of individualized RL policies.", "keywords": "Reinforcement Learning;Identifiability;Individualization", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Yuewen Sun;Biwei Huang;Yu Yao;Donghuo Zeng;Xinshuai Dong;Songyao Jin;Boyang Sun;Roberto Legaspi;Kazushi Ikeda;Peter Spirtes;Kun Zhang", "authorids": "~Yuewen_Sun1;~Biwei_Huang1;~Yu_Yao3;~Donghuo_Zeng1;~Xinshuai_Dong1;~Songyao_Jin1;~Boyang_Sun1;~Roberto_Legaspi1;~Kazushi_Ikeda2;~Peter_Spirtes1;~Kun_Zhang1", "gender": "F;F;M;M;M;M;M;M;M;M;M", "homepage": "https://yuewen-sun.github.io/;;https://a5507203.github.io/;https://scholar.google.com/citations?user=S2tvWCUAAAAJ&hl=en&authuser=1;https://dongxinshuai.github.io/;https://github.com/Songyao-Jin;;https://www.researchgate.net/profile/Roberto-Legaspi;https://scholar.google.co.jp/citations?user=_2Ns9dAAAAAJ&hl=ja;https://www.cmu.edu/dietrich/philosophy/people/faculty/spirtes.html;http://www.andrew.cmu.edu/user/kunz1/", "dblp": "219/9893;165/3288;230/9625;;279/6151.html;365/4229;;296/0450.html;;87/3550;96/3115-1", "google_scholar": "https://scholar.google.com/citations?hl=en;;OkcaMKAAAAAJ;S2tvWCUAAAAJ;A7JyL1sAAAAJ;IPNzHfgAAAAJ;;zE7Zhk0AAAAJ;;mar1eCwAAAAJ;RGoypN4AAAAJ", "orcid": ";;;0000-0003-1320-8815;;;0000-0002-0118-4425;0000-0001-8909-635X;;;", "linkedin": ";;yu-yao-150377134/;donghuo-zeng-205a14223/?originalSubdomain=jp;;songyao-jin-b97466223/;;roberto-legaspi-5a3a4361/;;;", "or_profile": "~Yuewen_Sun1;~Biwei_Huang1;~Yu_Yao3;~Donghuo_Zeng1;~Xinshuai_Dong1;~Songyao_Jin1;~Boyang_Sun1;~Roberto_Legaspi1;~Kazushi_Ikeda2;~Peter_Spirtes1;~Kun_Zhang1", "aff": "Mohamed bin Zayed University of Artificial Intelligence;University of California, San Diego;University of Sydney;KDDI Corporation;Carnegie Mellon University;Mohamed bin Zayed University of Artificial Intelligence;Mohamed bin Zayed University of Artificial Intelligence;KDDI Research, Inc.;;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "mbzuai.ac.ae;ucsd.edu;sydney.edu.au;kddi.com;cmu.edu;mbzuai.ac.ae;mbzuai.ac.ae;kddi-research.jp;;cmu.edu;cmu.edu", "position": "Postdoc;Assistant Professor;Lecturer;Researcher;PhD student;MS student;PhD student;Researcher;;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nsun2024identifying,\ntitle={Identifying Latent State-Transition Processes for Individualized Reinforcement Learning},\nauthor={Yuewen Sun and Biwei Huang and Yu Yao and Donghuo Zeng and Xinshuai Dong and Songyao Jin and Boyang Sun and Roberto Legaspi and Kazushi Ikeda and Peter Spirtes and Kun Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=kREpCQtHdN}\n}", "github": "", "reviewers": "pu6U;t6CB;ZPqL;cEn6", "pdf_size": 7609162, "rating": "5;6;6;7", "confidence": "3;3;4;4", "soundness": "3;2;4;3", "novelty": "3;3;3;3", "presentation": "4;3;3;3", "wc_summary": "83;75;91;219", "wc_strengths": "35;48;102;148", "wc_weaknesses": "39;35;88;286", "wc_questions": "49;515;37;55", "wc_limitations": "51;20;6;91", "wc_review": "257;693;324;799", "wc_reply_reviewers": "0;150;0;29", "wc_reply_authors": "71;361;0;40", "reply_reviewers": "0;2;0;1", "reply_authors": "2;3;1;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 117.0, 59.16079783099616 ], "wc_strengths_avg": [ 83.25, 45.04095358670818 ], "wc_weaknesses_avg": [ 112.0, 102.60360617444204 ], "wc_questions_avg": [ 164.0, 202.75354497517424 ], "wc_limitations_avg": [ 42.0, 32.64199748789893 ], "wc_review_avg": [ 518.25, 232.02518721035437 ], "wc_reply_reviewers_avg": [ 44.75, 61.9087029423166 ], "wc_reply_authors_avg": [ 118.0, 142.53596037491732 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.7071067811865476, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14326577476552009830&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "mbzuai.ac.ae;ucsd.edu;sydney.edu.au;kddi.com;cmu.edu;mbzuai.ac.ae;mbzuai.ac.ae;kddi-research.jp;;cmu.edu;cmu.edu", "author_num": 11, "aff_unique_index": "0;1;2;3;4;0;0;5;4;4", "aff_unique_norm": "Mohamed bin Zayed University of Artificial Intelligence;University of California, San Diego;University of Sydney;KDDI Corporation;Carnegie Mellon University;KDDI Research", "aff_unique_dep": ";;;;;", "aff_unique_url": "https://mbzuai.ac.ae;https://www.ucsd.edu;https://www.sydney.edu.au;https://www.kddi.com;https://www.cmu.edu;https://www.kddi-research.com", "aff_unique_abbr": "MBZUAI;UCSD;USYD;KDDI;CMU;KDDI", "aff_campus_unique_index": "1", "aff_campus_unique": ";San Diego", "aff_country_unique_index": "0;1;2;3;1;0;0;3;1;1", "aff_country_unique": "United Arab Emirates;United States;Australia;Japan" }, { "title": "Dimension-free Private Mean Estimation for Anisotropic Distributions", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93891", "id": "kRwQCAIA7z", "proceeding": "", "pdf": "https://openreview.net/pdf?id=kRwQCAIA7z", "openreview": "https://openreview.net/forum?id=kRwQCAIA7z", "poster": "/media/PosterPDFs/NeurIPS%202024/93891.png?t=1732441123.8337934", "project": "", "author_site": "Yuval Dagan, Michael Jordan, Xuelin Yang, Lydia Zakynthinou, Nikita Zhivotovskiy", "tldr": "", "abstract": "We present differentially private algorithms for high-dimensional mean estimation. Previous private estimators on distributions over $\\mathbb{R}^d$ suffer from a curse of dimensionality, as they require $\\Omega(d^{1/2})$ samples to achieve non-trivial error, even in cases where $O(1)$ samples suffice without privacy. This rate is unavoidable when the distribution is isotropic, namely, when the covariance is a multiple of the identity matrix. Yet, real-world data is often highly anisotropic, with signals concentrated on a small number of principal components. We develop estimators that are appropriate for such signals---our estimators are $(\\varepsilon,\\delta)$-differentially private and have sample complexity that is dimension-independent for anisotropic subgaussian distributions. Given $n$ samples from a distribution with known covariance-proxy $\\Sigma$ and unknown mean $\\mu$, we present an estimator $\\hat{\\mu}$ that achieves error, $\\|\\hat{\\mu}-\\mu\\|_2\\leq \\alpha$, as long as $n\\gtrsim \\text{tr}(\\Sigma)/\\alpha^2+ \\text{tr}(\\Sigma^{1/2})/(\\alpha\\varepsilon)$. We show that this is the optimal sample complexity for this task up to logarithmic factors. Moreover, for the case of unknown covariance, we present an algorithm whose sample complexity has improved dependence on the dimension, from $d^{1/2}$ to $d^{1/4}$.", "keywords": "differential privacy;mean estimation;anisotropic;covariance-adaptive error", "primary_area": "privacy", "supplementary_material": "", "author": "Yuval Dagan;Michael Jordan;Xuelin Yang;Lydia Zakynthinou;Nikita Zhivotovskiy", "authorids": "~Yuval_Dagan1;~Michael_Jordan1;~Xuelin_Yang1;~Lydia_Zakynthinou1;~Nikita_Zhivotovskiy1", "gender": "M;M;;F;M", "homepage": "https://yuvaldagan.wordpress.com/;http://www.cs.berkeley.edu/~jordan/;;https://lydiazakynthinou.com/;", "dblp": "190/7292;j/MichaelIJordan;;220/4255;", "google_scholar": ";https://scholar.google.com.tw/citations?user=yxUduqMAAAAJ;;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0001-8935-817X;;;", "linkedin": ";;;lydia-zakynthinou-a9408a94/;", "or_profile": "~Yuval_Dagan1;~Michael_Jordan1;~Xuelin_Yang1;~Lydia_Zakynthinou1;~Nikita_Zhivotovskiy1", "aff": ";University of California, Berkeley;;University of California, Berkeley;University of California, Berkeley", "aff_domain": ";berkeley.edu;;berkeley.edu;berkeley.edu", "position": ";Full Professor;;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\ndagan2024dimensionfree,\ntitle={Dimension-free Private Mean Estimation for Anisotropic Distributions},\nauthor={Yuval Dagan and Michael Jordan and Xuelin Yang and Lydia Zakynthinou and Nikita Zhivotovskiy},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=kRwQCAIA7z}\n}", "github": "", "reviewers": "PrhZ;Tkrg;HUbD;XFCk;JTxB", "pdf_size": 559336, "rating": "5;6;7;7;7", "confidence": "4;3;4;1;4", "soundness": "3;3;4;3;4", "novelty": "2;3;3;3;3", "presentation": "3;3;4;3;3", "wc_summary": "168;73;201;233;367", "wc_strengths": "33;22;63;71;111", "wc_weaknesses": "37;27;11;65;87", "wc_questions": "127;48;7;36;70", "wc_limitations": "4;4;1;9;1", "wc_review": "369;174;283;414;636", "wc_reply_reviewers": "26;0;0;42;7", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;0;0;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.4, 0.7999999999999999 ], "confidence_avg": [ 3.2, 1.16619037896906 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 208.4, 95.68615364826826 ], "wc_strengths_avg": [ 60.0, 31.317726609701413 ], "wc_weaknesses_avg": [ 45.4, 27.22939588018801 ], "wc_questions_avg": [ 57.6, 40.22238182902649 ], "wc_limitations_avg": [ 3.8, 2.9257477676655586 ], "wc_review_avg": [ 375.2, 153.9758422610508 ], "wc_reply_reviewers_avg": [ 15.0, 16.516658257650064 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.30012252399939054, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10872849549494867269&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": ";berkeley.edu;;berkeley.edu;berkeley.edu", "author_num": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Text-Infused Attention and Foreground-Aware Modeling for Zero-Shot Temporal Action Detection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93890", "id": "kS9dciADtY", "proceeding": "", "pdf": "https://openreview.net/pdf?id=kS9dciADtY", "openreview": "https://openreview.net/forum?id=kS9dciADtY", "poster": "/media/PosterPDFs/NeurIPS%202024/93890.png?t=1731502670.8848512", "project": "", "author_site": "Yearang Lee, Ho-Joong Kim, Seong-Whan Lee", "tldr": "", "abstract": "Zero-Shot Temporal Action Detection (ZSTAD) aims to classify and localize action segments in untrimmed videos for unseen action categories. Most existing ZSTAD methods utilize a foreground-based approach, limiting the integration of text and visual features due to their reliance on pre-extracted proposals. In this paper, we introduce a cross-modal ZSTAD baseline with mutual cross-attention, integrating both text and visual information throughout the detection process. Our simple approach results in superior performance compared to previous methods. Despite this improvement, we further identify a common-action bias issue that the cross-modal baseline over-focus on common sub-actions due to a lack of ability to discriminate text-related visual parts. To address this issue, we propose Text-infused attention and Foreground-aware Action Detection (Ti-FAD), which enhances the ability to focus on text-related sub-actions and distinguish relevant action segments from the background. Our extensive experiments demonstrate that Ti-FAD outperforms the state-of-the-art methods on ZSTAD benchmarks by a large margin: 41.2\\% (+ 11.0\\%) on THUMOS14 and 32.0\\% (+ 5.4\\%) on ActivityNet v1.3. Code is available at: https://github.com/YearangLee/Ti-FAD.", "keywords": "Temporal Action Detection;Vision and Language;Zero-Shot Learning", "primary_area": "machine_vision", "supplementary_material": "", "author": "Yearang Lee;Ho-Joong Kim;Seong-Whan Lee", "authorids": "~Yearang_Lee1;~Ho-Joong_Kim1;~Seong-Whan_Lee3", "gender": "F;M;", "homepage": ";;http://pr.korea.ac.kr/sub2_1.php?code=LSW", "dblp": ";57/9132;l/SeongWhanLee", "google_scholar": "MIh2bOkAAAAJ;https://scholar.google.co.kr/citations?user=magC6DgAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0002-6249-4996", "linkedin": ";;", "or_profile": "~Yearang_Lee1;~HoJoong_Kim1;~Seong-whan_Lee1", "aff": "Korea University;Korea University;Korea University", "aff_domain": "korea.ac.kr;korea.ac.kr;korea.ac.kr", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nlee2024textinfused,\ntitle={Text-Infused Attention and Foreground-Aware Modeling for Zero-Shot Temporal Action Detection},\nauthor={Yearang Lee and Ho-Joong Kim and Seong-Whan Lee},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=kS9dciADtY}\n}", "github": "", "reviewers": "QEJb;EXtU;mst5", "pdf_size": 10562886, "rating": "5;5;6", "confidence": "5;4;3", "soundness": "3;2;4", "novelty": "3;3;4", "presentation": "3;2;4", "wc_summary": "56;52;58", "wc_strengths": "52;46;69", "wc_weaknesses": "169;318;126", "wc_questions": "2;116;27", "wc_limitations": "7;9;4", "wc_review": "286;541;284", "wc_reply_reviewers": "33;41;28", "wc_reply_authors": "0;258;0", "reply_reviewers": "1;2;1", "reply_authors": "1;2;1", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 55.333333333333336, 2.494438257849294 ], "wc_strengths_avg": [ 55.666666666666664, 9.741092797468305 ], "wc_weaknesses_avg": [ 204.33333333333334, 82.26920579549935 ], "wc_questions_avg": [ 48.333333333333336, 48.923977307201376 ], "wc_limitations_avg": [ 6.666666666666667, 2.0548046676563256 ], "wc_review_avg": [ 370.3333333333333, 120.68231942675872 ], "wc_reply_reviewers_avg": [ 34.0, 5.354126134736337 ], "wc_reply_authors_avg": [ 86.0, 121.62236636408618 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:6LcfqC5Pw9cJ:scholar.google.com/&scioq=Text-Infused+Attention+and+Foreground-Aware+Modeling+for+Zero-Shot+Temporal+Action+Detection&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": "korea.ac.kr;korea.ac.kr;korea.ac.kr", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Korea University", "aff_unique_dep": "", "aff_unique_url": "https://www.korea.ac.kr", "aff_unique_abbr": "KU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "ODGEN: Domain-specific Object Detection Data Generation with Diffusion Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93889", "id": "kTtK65vKvD", "proceeding": "", "pdf": "https://openreview.net/pdf?id=kTtK65vKvD", "openreview": "https://openreview.net/forum?id=kTtK65vKvD", "poster": "/media/PosterPDFs/NeurIPS%202024/93889.png?t=1731548374.4901655", "project": "", "author_site": "Jingyuan Zhu, Shiyu Li, Yuxuan (Andy) Liu, Jian Yuan, Ping Huang, Jiulong Shan, Huimin Ma", "tldr": "", "abstract": "Modern diffusion-based image generative models have made significant progress and become promising to enrich training data for the object detection task. However, the generation quality and the controllability for complex scenes containing multi-class objects and dense objects with occlusions remain limited. This paper presents ODGEN, a novel method to generate high-quality images conditioned on bounding boxes, thereby facilitating data synthesis for object detection. Given a domain-specific object detection dataset, we first fine-tune a pre-trained diffusion model on both cropped foreground objects and entire images to fit target distributions. Then we propose to control the diffusion model using synthesized visual prompts with spatial constraints and object-wise textual descriptions. ODGEN exhibits robustness in handling complex scenes and specific domains. Further, we design a dataset synthesis pipeline to evaluate ODGEN on 7 domain-specific benchmarks to demonstrate its effectiveness. Adding training data generated by ODGEN improves up to 25.3% mAP@.50:.95 with object detectors like YOLOv5 and YOLOv7, outperforming prior controllable generative methods. In addition, we design an evaluation protocol based on COCO-2014 to validate ODGEN in general domains and observe an advantage up to 5.6% in mAP@.50:.95 against existing methods.", "keywords": "Object Detection Dataset Generation;Complex Scene Synthesis;Domain-Specific;Diffusion Models", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "JingYuan Zhu;Shiyu Li;Yuxuan Liu;Jian Yuan;Ping Huang;Jiulong Shan;Huimin Ma", "authorids": "~JingYuan_Zhu1;~Shiyu_Li2;~Yuxuan_Liu11;~Jian_Yuan1;~Ping_Huang1;~Jiulong_Shan2;~Huimin_Ma1", "gender": "M;M;;;M;;F", "homepage": ";;https://andy9999678.me/;http://bdktzweb.tsinghua.edu.cn/yuanjian/zh_CN/index.htm;;;http://server.3dimagelab.cn:5000", "dblp": "302/9530;;;64/4192;;;69/7694-1", "google_scholar": "a3ErJwkAAAAJ;;;;;;32hwVLEAAAAJ", "orcid": ";;;;;;", "linkedin": ";shiyu-li-a43360a0/;;;ping-huang-82845138/;;", "or_profile": "~JingYuan_Zhu1;~Shiyu_Li2;~Yuxuan_Liu11;~Jian_Yuan1;~Ping_Huang1;~Jiulong_Shan2;~Huimin_Ma1", "aff": "Electronic Engineering, Tsinghua University, Tsinghua University;Apple;Apple;Tsinghua University;Apple;;University of Science and Technology Beijing", "aff_domain": "mails.tsinghua.edu.cn;apple.com;apple.com;tsinghua.edu.cn;apple.com;;ustb.edu.cn", "position": "PhD student;Researcher;Researcher;Professor;Researcher;;Full Professor", "bibtex": "@inproceedings{\nzhu2024odgen,\ntitle={{ODGEN}: Domain-specific Object Detection Data Generation with Diffusion Models},\nauthor={JingYuan Zhu and Shiyu Li and Yuxuan Liu and Jian Yuan and Ping Huang and Jiulong Shan and Huimin Ma},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=kTtK65vKvD}\n}", "github": "", "reviewers": "PYXX;FcFR;2vbs;N8et;Fyyh", "pdf_size": 41754937, "rating": "5;6;6;6;6", "confidence": "4;5;4;4;3", "soundness": "3;3;3;3;3", "novelty": "3;3;3;3;3", "presentation": "3;3;4;3;3", "wc_summary": "42;127;219;89;96", "wc_strengths": "63;128;71;30;108", "wc_weaknesses": "142;178;427;16;191", "wc_questions": "6;21;169;197;102", "wc_limitations": "6;8;8;44;1", "wc_review": "259;462;894;376;498", "wc_reply_reviewers": "13;18;24;27;0", "wc_reply_authors": "84;540;77;84;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "3;3;3;3;1", "rating_avg": [ 5.8, 0.39999999999999997 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 114.6, 58.86459037485948 ], "wc_strengths_avg": [ 80.0, 34.51955967274206 ], "wc_weaknesses_avg": [ 190.8, 133.33476665896256 ], "wc_questions_avg": [ 99.0, 76.48006276148053 ], "wc_limitations_avg": [ 13.4, 15.512575543732254 ], "wc_review_avg": [ 497.8, 214.53055726399444 ], "wc_reply_reviewers_avg": [ 16.4, 9.520504188329525 ], "wc_reply_authors_avg": [ 157.0, 194.11130827440218 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 2.6, 0.8 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17765650897045693545&as_sdt=8005&sciodt=0,7&hl=en", "gs_version_total": 4, "email": "mails.tsinghua.edu.cn;apple.com;apple.com;tsinghua.edu.cn;apple.com;;ustb.edu.cn", "author_num": 7, "aff_unique_index": "0;1;1;0;1;2", "aff_unique_norm": "Tsinghua University;Apple;University of Science and Technology Beijing", "aff_unique_dep": "Electronic Engineering;Apple Inc.;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.apple.com;http://www.ustb.edu.cn", "aff_unique_abbr": "THU;Apple;USTB", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0;1;0", "aff_country_unique": "China;United States" }, { "title": "Adaptive Passive-Aggressive Framework for Online Regression with Side Information", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93888", "id": "kV80nC1afE", "proceeding": "", "pdf": "https://openreview.net/pdf?id=kV80nC1afE", "openreview": "https://openreview.net/forum?id=kV80nC1afE", "poster": "/media/PosterPDFs/NeurIPS%202024/93888.png?t=1731587191.169153", "project": "", "author_site": "Runhao Shi, Jiaxi Ying, Daniel Palomar", "tldr": "", "abstract": "The Passive-Aggressive (PA) method is widely used in online regression problems for handling large-scale streaming data, typically updating model parameters in a passive-aggressive manner based on whether the error exceeds a predefined threshold. However, this approach struggles with determining optimal thresholds and adapting to complex scenarios with side information, where tracking accuracy is not the sole metric in the regression model. To address these challenges, we introduce a novel adaptive framework that allows finer adjustments to the weight vector in PA using side information. This framework adaptively selects the threshold parameter in PA, theoretically ensuring convergence to the optimal setting. Additionally, we present an efficient implementation of our algorithm that significantly reduces computational complexity. Numerical experiments show that our model achieves outstanding performance associated with the side information while maintaining low tracking error, demonstrating marked improvements over traditional PA methods across various scenarios.", "keywords": "passive-aggressive;online learning;adaptive method;online regression problem;side information;financial engineering", "primary_area": "online_learning", "supplementary_material": "", "author": "Runhao Shi;Jiaxi Ying;Daniel P. Palomar", "authorids": "~Runhao_Shi1;~Jiaxi_Ying1;~Daniel_P._Palomar1", "gender": "F;M;M", "homepage": "https://jennysrh.github.io/;https://jxying.github.io/;https://www.danielppalomar.com/", "dblp": ";179/2448;", "google_scholar": ";_IzItlcAAAAJ;qlReqq8AAAAJ", "orcid": "0000-0003-2632-6212;;0000-0001-5250-4874", "linkedin": ";;", "or_profile": "~Runhao_Shi1;~Jiaxi_Ying1;~Daniel_P._Palomar1", "aff": "Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;Hong Kong University of Science and Technology", "aff_domain": "connect.ust.hk;ust.hk;ust.hk", "position": "PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\nshi2024adaptive,\ntitle={Adaptive Passive-Aggressive Framework for Online Regression with Side Information},\nauthor={Runhao Shi and Jiaxi Ying and Daniel P. Palomar},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=kV80nC1afE}\n}", "github": "", "reviewers": "WzCc;sZCN;JuTC;77Tr", "pdf_size": 638602, "rating": "5;6;6;7", "confidence": "1;3;4;4", "soundness": "2;3;4;3", "novelty": "2;3;3;3", "presentation": "2;3;2;3", "wc_summary": "164;101;79;115", "wc_strengths": "55;57;94;99", "wc_weaknesses": "210;48;12;62", "wc_questions": "94;63;125;21", "wc_limitations": "16;23;1;43", "wc_review": "539;292;311;340", "wc_reply_reviewers": "0;21;28;17", "wc_reply_authors": "0;31;23;35", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.0, 1.224744871391589 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 114.75, 31.19595326320387 ], "wc_strengths_avg": [ 76.25, 20.339309231141552 ], "wc_weaknesses_avg": [ 83.0, 75.55792479945436 ], "wc_questions_avg": [ 75.75, 38.46670638357279 ], "wc_limitations_avg": [ 20.75, 15.105876340020794 ], "wc_review_avg": [ 370.5, 98.77373132569205 ], "wc_reply_reviewers_avg": [ 16.5, 10.307764064044152 ], "wc_reply_authors_avg": [ 22.25, 13.5531361684298 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:2SpzYzmNRREJ:scholar.google.com/&scioq=Adaptive+Passive-Aggressive+Framework+for+Online+Regression+with+Side+Information&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "connect.ust.hk;ust.hk;ust.hk", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Hong Kong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.ust.hk", "aff_unique_abbr": "HKUST", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Repurposing Language Models into Embedding Models: Finding the Compute-Optimal Recipe", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93887", "id": "kVL5rvkqGG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=kVL5rvkqGG", "openreview": "https://openreview.net/forum?id=kVL5rvkqGG", "poster": "/media/PosterPDFs/NeurIPS%202024/93887.png?t=1731755479.3546174", "project": "", "author_site": "Albert Q. Jiang, Alicja Ziarko, Bartosz Piotrowski, Wenda Li, Mateja Jamnik, Piotr Mi\u0142o\u015b", "tldr": "", "abstract": "Text embeddings are essential for tasks such as document retrieval, clustering, and semantic similarity assessment. In this paper, we study how to contrastively train text embedding models in a compute-optimal fashion, given a suite of pretrained decoder-only language models. Our innovation is an algorithm that produces optimal configurations of model sizes, data quantities, and fine-tuning methods for text-embedding models at different computational budget levels. The resulting recipe, which we obtain through extensive experiments, can be used by practitioners to make informed design choices for their embedding models. Specifically, our findings suggest that full fine-tuning and Low-Rank Adaptation fine-tuning produce optimal models at lower and higher computational budgets respectively.", "keywords": "text embedding;embedding models;scaling laws", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/ad32bcb230e70841be3aaada050827e3ffe367e7.tgz", "author": "Albert Q. Jiang;Alicja Ziarko;Bartosz Piotrowski;Wenda Li;Mateja Jamnik;Piotr Mi\u0142o\u015b", "authorids": "~Albert_Q._Jiang1;~Alicja_Ziarko1;~Bartosz_Piotrowski1;~Wenda_Li1;~Mateja_Jamnik1;~Piotr_Mi\u0142o\u015b1", "gender": ";F;;M;F;", "homepage": ";https://github.com/Podkamienna;;https://wenda302.github.io;http://www.cl.cam.ac.uk/~mj201;", "dblp": ";;;132/9868.html;41/1392;208/0989.html", "google_scholar": ";;;ufYxQkEAAAAJ;d5QiyJkAAAAJ;Se68XecAAAAJ", "orcid": ";;;;0000-0003-2772-2532;", "linkedin": ";;;;;piotr-milos-4b02151/", "or_profile": "~Albert_Q._Jiang1;~Alicja_Ziarko1;~Bartosz_Piotrowski1;~Wenda_Li1;~Mateja_Jamnik1;~Piotr_Mi\u0142o\u015b1", "aff": ";University of Warsaw;;University of Edinburgh;University of Cambridge;IDEAS NCBR", "aff_domain": ";uw.edu.pl;;ed.ac.uk;cam.ac.uk;ideas-ncbr.pl", "position": ";PhD student;;Lecturer;Professor in Artificial Intelligence;Researcher", "bibtex": "@inproceedings{\njiang2024repurposing,\ntitle={Repurposing Language Models into Embedding Models: Finding the Compute-Optimal Recipe},\nauthor={Albert Q. Jiang and Alicja Ziarko and Bartosz Piotrowski and Wenda Li and Mateja Jamnik and Piotr Mi{\\l}o{\\'s}},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=kVL5rvkqGG}\n}", "github": "", "reviewers": "4Bw9;GCkf;xguk;zJJS", "pdf_size": 1132431, "rating": "5;6;7;7", "confidence": "3;3;3;4", "soundness": "2;2;3;3", "novelty": "2;4;3;3", "presentation": "3;3;2;3", "wc_summary": "63;83;63;139", "wc_strengths": "66;84;39;107", "wc_weaknesses": "247;84;168;119", "wc_questions": "6;6;216;2", "wc_limitations": "10;8;17;9", "wc_review": "392;265;503;376", "wc_reply_reviewers": "0;0;25;21", "wc_reply_authors": "43;30;30;17", "reply_reviewers": "0;0;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 87.0, 31.11269837220809 ], "wc_strengths_avg": [ 74.0, 24.889756929307286 ], "wc_weaknesses_avg": [ 154.5, 61.173932356846244 ], "wc_questions_avg": [ 57.5, 91.5245868605808 ], "wc_limitations_avg": [ 11.0, 3.5355339059327378 ], "wc_review_avg": [ 384.0, 84.335638967165 ], "wc_reply_reviewers_avg": [ 11.5, 11.586630226256467 ], "wc_reply_authors_avg": [ 30.0, 9.192388155425117 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1447665922440996225&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": ";uw.edu.pl;;ed.ac.uk;cam.ac.uk;ideas-ncbr.pl", "author_num": 6, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "University of Warsaw;University of Edinburgh;University of Cambridge;Institute for Development, Economic Analysis, and Simulation (IDEAS)", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.uw.edu.pl;https://www.ed.ac.uk;https://www.cam.ac.uk;https://www.ideas-ncbr.gov.pl", "aff_unique_abbr": "UW;Edinburgh;Cambridge;IDEAS", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "Poland;United Kingdom" }, { "title": "Data Attribution for Text-to-Image Models by Unlearning Synthesized Images", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93886", "id": "kVr3L73pNH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=kVr3L73pNH", "openreview": "https://openreview.net/forum?id=kVr3L73pNH", "poster": "/media/PosterPDFs/NeurIPS%202024/93886.png?t=1731748122.9745927", "project": "", "author_site": "Sheng-Yu Wang, Aaron Hertzmann, Alexei Efros, Jun-Yan Zhu, Richard Zhang", "tldr": "", "abstract": "The goal of data attribution for text-to-image models is to identify the training images that most influence the generation of a new image. Influence is defined such that, for a given output, if a model is retrained from scratch without the most influential images, the model would fail to reproduce the same output. Unfortunately, directly searching for these influential images is computationally infeasible, since it would require repeatedly retraining models from scratch. In our work, we propose an efficient data attribution method by simulating unlearning the synthesized image. We achieve this by increasing the training loss on the output image, without catastrophic forgetting of other, unrelated concepts. We then identify training images with significant loss deviations after the unlearning process and label these as influential. We evaluate our method with a computationally intensive but \"gold-standard\" retraining from scratch and demonstrate our method's advantages over previous methods.", "keywords": "Data Attribution;Influence Estimation;Text-to-Image models;Machine Unlearning", "primary_area": "generative_models", "supplementary_material": "", "author": "Sheng-Yu Wang;Aaron Hertzmann;Alexei A Efros;Jun-Yan Zhu;Richard Zhang", "authorids": "~Sheng-Yu_Wang1;~Aaron_Hertzmann1;~Alexei_A_Efros1;~Jun-Yan_Zhu1;~Richard_Zhang1", "gender": ";M;M;M;M", "homepage": "https://peterwang512.github.io/;http://www.dgp.toronto.edu/~hertzman/;https://www.cs.cmu.edu/~junyanz/;http://richzhang.github.io;http://www.eecs.berkeley.edu/~efros/", "dblp": "30/11438;91/3132;117/4782.html;;40/6158", "google_scholar": "0TpaABgAAAAJ;ZcWO2AEAAAAJ;UdpacsMAAAAJ;LW8ze_UAAAAJ;https://scholar.google.com.tw/citations?user=d97bGd8AAAAJ", "orcid": ";0000-0001-9667-0292;0000-0001-8504-3410;;0000-0001-5720-8070", "linkedin": ";aaron-hertzmann-97844a153/;jun-yan-zhu-99b18814;;alexei-efros-890736a3/", "or_profile": "~Sheng-Yu_Wang1;~Aaron_Hertzmann1;~Jun-Yan_Zhu1;~Richard_Zhang1;~Alyosha_Efros1", "aff": "Carnegie Mellon University;Adobe;Carnegie Mellon University;Adobe Systems;University of California, Berkeley", "aff_domain": "andrew.cmu.edu;adobe.com;cmu.edu;adobe.com;berkeley.edu", "position": "PhD student;Principal Scientist;Assistant Professor;Research Scientist;Professor", "bibtex": "@inproceedings{\nwang2024data,\ntitle={Data Attribution for Text-to-Image Models by Unlearning Synthesized Images},\nauthor={Sheng-Yu Wang and Aaron Hertzmann and Alexei A Efros and Jun-Yan Zhu and Richard Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=kVr3L73pNH}\n}", "github": "", "reviewers": "YhLa;UBLR;8wwb;ZhUS", "pdf_size": 20825765, "rating": "6;6;6;7", "confidence": "3;3;2;3", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;2", "wc_summary": "32;107;105;178", "wc_strengths": "32;37;63;162", "wc_weaknesses": "71;224;57;474", "wc_questions": "3;53;32;334", "wc_limitations": "8;8;1;8", "wc_review": "146;429;258;1156", "wc_reply_reviewers": "0;188;26;287", "wc_reply_authors": "0;187;13;21", "reply_reviewers": "0;2;1;1", "reply_authors": "1;3;2;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 105.5, 51.62605931116571 ], "wc_strengths_avg": [ 73.5, 52.43329095145564 ], "wc_weaknesses_avg": [ 206.5, 167.75950047612804 ], "wc_questions_avg": [ 105.5, 133.1136732270581 ], "wc_limitations_avg": [ 6.25, 3.031088913245535 ], "wc_review_avg": [ 497.25, 393.45480998457754 ], "wc_reply_reviewers_avg": [ 125.25, 117.9393382209685 ], "wc_reply_authors_avg": [ 55.25, 76.43420373105224 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4943356947271291983&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "andrew.cmu.edu;adobe.com;cmu.edu;adobe.com;berkeley.edu", "author_num": 5, "aff_unique_index": "0;1;0;1;2", "aff_unique_norm": "Carnegie Mellon University;Adobe;University of California, Berkeley", "aff_unique_dep": ";Adobe Inc.;", "aff_unique_url": "https://www.cmu.edu;https://www.adobe.com;https://www.berkeley.edu", "aff_unique_abbr": "CMU;Adobe;UC Berkeley", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "SkipPredict: When to Invest in Predictions for Scheduling", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93885", "id": "kVuw8vzsqZ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=kVuw8vzsqZ", "openreview": "https://openreview.net/forum?id=kVuw8vzsqZ", "poster": "/media/PosterPDFs/NeurIPS%202024/93885.png?t=1731705156.8142703", "project": "", "author_site": "Rana Shahout, Michael Mitzenmacher", "tldr": "", "abstract": "Expanding on recent work on scheduling with predicted job sizes, we consider the effect of the cost of predictions in queueing systems, removing the assumption in prior research that predictions are external to the system\u2019s resources and/or cost-free. Additionally, we introduce a novel approach to utilizing predictions, SkipPredict, designed to address their inherent cost. Rather than uniformly applying predictions to all jobs, we propose a tailored approach that categorizes jobs to improve the effectiveness of prediction on performance. To achieve this, we employ one-bit \u201ccheap predictions\u201d to classify jobs as either short or long. SkipPredict prioritizes predicted short jobs over long jobs, and for the long jobs, SkipPredict applies a second round of more detailed \u201cexpensive predictions\u201d to approximate Shortest Remaining Processing Time for these jobs. Importantly, our analyses take into account the cost of prediction. We derive closed-form formulas that calculate the mean response time of jobs with size predictions accounting for the prediction cost. We examine the effect of this cost for two distinct models in real-world and synthetic datasets. In the external cost model, predictions are generated by external method without impacting job service times but incur a cost. In the server time cost model, predictions themselves require server processing time and are scheduled on the same server as the jobs.", "keywords": "Algorithms with predictions; scheduling", "primary_area": "optimization", "supplementary_material": "/attachment/c818a54f5e0e172cf0f465f090c3bbe2114a3c44.zip", "author": "Rana Shahout;Michael Mitzenmacher", "authorids": "~Rana_Shahout1;~Michael_Mitzenmacher1", "gender": "F;M", "homepage": "https://sites.google.com/view/ranash/home;", "dblp": "218/6048;74/838", "google_scholar": "M0EIKRoAAAAJ;e8aRmAsAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Rana_Shahout1;~Michael_Mitzenmacher1", "aff": "Harvard University;Harvard University", "aff_domain": "harvard.edu;harvard.edu", "position": "Postdoc;Full Professor", "bibtex": "@inproceedings{\nshahout2024skippredict,\ntitle={SkipPredict: When to Invest in Predictions for Scheduling},\nauthor={Rana Shahout and Michael Mitzenmacher},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=kVuw8vzsqZ}\n}", "github": "", "reviewers": "oree;kmdF;oTX6;ckzA", "pdf_size": 1500924, "rating": "4;5;6;7", "confidence": "2;3;4;4", "soundness": "2;3;4;4", "novelty": "2;2;3;3", "presentation": "3;3;3;4", "wc_summary": "194;289;207;128", "wc_strengths": "74;103;59;57", "wc_weaknesses": "95;127;51;99", "wc_questions": "1;10;1;64", "wc_limitations": "1;1;1;1", "wc_review": "365;530;319;349", "wc_reply_reviewers": "0;79;0;102", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 204.5, 57.24727067729954 ], "wc_strengths_avg": [ 73.25, 18.38987493160299 ], "wc_weaknesses_avg": [ 93.0, 27.202941017470888 ], "wc_questions_avg": [ 19.0, 26.239283526803852 ], "wc_limitations_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 390.75, 82.07428038064056 ], "wc_reply_reviewers_avg": [ 45.25, 45.97485725915851 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.9438798074485388, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7353053671604661718&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "harvard.edu;harvard.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Harvard University", "aff_unique_dep": "", "aff_unique_url": "https://www.harvard.edu", "aff_unique_abbr": "Harvard", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Improving Adversarial Robust Fairness via Anti-Bias Soft Label Distillation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93884", "id": "kW30LbNwdV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=kW30LbNwdV", "openreview": "https://openreview.net/forum?id=kW30LbNwdV", "poster": "/media/PosterPDFs/NeurIPS%202024/93884.png?t=1731330214.1355836", "project": "", "author_site": "Shiji Zhao, Ranjie Duan, xizhewang, Xingxing Wei", "tldr": "", "abstract": "Adversarial Training (AT) has been widely proved to be an effective method to improve the adversarial robustness against adversarial examples for Deep Neural Networks (DNNs). As a variant of AT, Adversarial Robustness Distillation (ARD) has demonstrated its superior performance in improving the robustness of small student models with the guidance of large teacher models. However, both AT and ARD encounter the robust fairness problem: these models exhibit strong robustness when facing part of classes (easy class), but weak robustness when facing others (hard class). In this paper, we give an in-depth analysis of the potential factors and argue that the smoothness degree of samples' soft labels for different classes (i.e., hard class or easy class) will affect the robust fairness of DNNs from both empirical observation and theoretical analysis. Based on the above finding, we propose an Anti-Bias Soft Label Distillation (ABSLD) method to mitigate the adversarial robust fairness problem within the framework of Knowledge Distillation (KD). Specifically, ABSLD adaptively reduces the student's error risk gap between different classes to achieve fairness by adjusting the class-wise smoothness degree of samples' soft labels during the training process, and the smoothness degree of soft labels is controlled by assigning different temperatures in KD to different classes. Extensive experiments demonstrate that ABSLD outperforms state-of-the-art AT, ARD, and robust fairness methods in the comprehensive metric (Normalized Standard Deviation) of robustness and fairness.", "keywords": "Adversarial Robustness;Robust Fairness;Knowledge Distillation", "primary_area": "machine_vision", "supplementary_material": "/attachment/25db253ede48e8e9d957ed7118a00214544864fc.zip", "author": "Shiji Zhao;Ranjie Duan;xizhewang;Xingxing Wei", "authorids": "~Shiji_Zhao1;~Ranjie_Duan1;~xizhewang1;~Xingxing_Wei1", "gender": "M;Not Specified;F;M", "homepage": ";;https://github.com/xizhe-21;https://sites.google.com/site/xingxingwei1988/", "dblp": "287/0559;261/3330.html;147/0434;57/4066", "google_scholar": "https://scholar.google.com/citations?hl=en;;;ak8D_cQAAAAJ", "orcid": "0000-0001-6033-6049;;;", "linkedin": ";;;", "or_profile": "~Shiji_Zhao1;~Ranjie_Duan1;~xizhewang1;~Xingxing_Wei1", "aff": "Beihang University;, Tsinghua University;Beihang University;Beihang University", "aff_domain": "buaa.edu.cn;cs.tsinghua.edu.cn;buaa.edu.cn;buaa.edu.cn", "position": "PhD student;Postdoc;MS student;Associate Professor", "bibtex": "@inproceedings{\nzhao2024improving,\ntitle={Improving Adversarial Robust Fairness via Anti-Bias Soft Label Distillation},\nauthor={Shiji Zhao and Ranjie Duan and xizhewang and Xingxing Wei},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=kW30LbNwdV}\n}", "github": "", "reviewers": "6NLr;Dt8R;Vte2;H27W", "pdf_size": 586095, "rating": "3;5;5;5", "confidence": "4;5;5;4", "soundness": "2;2;3;3", "novelty": "2;2;2;3", "presentation": "3;2;3;3", "wc_summary": "165;70;97;146", "wc_strengths": "15;19;58;62", "wc_weaknesses": "95;177;146;253", "wc_questions": "5;70;4;14", "wc_limitations": "3;6;1;21", "wc_review": "283;342;306;496", "wc_reply_reviewers": "0;297;0;0", "wc_reply_authors": "0;2437;0;0", "reply_reviewers": "0;4;0;0", "reply_authors": "1;6;1;1", "rating_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 119.5, 37.84507894033252 ], "wc_strengths_avg": [ 38.5, 21.592822881689184 ], "wc_weaknesses_avg": [ 167.75, 57.26855594477654 ], "wc_questions_avg": [ 23.25, 27.270634389394026 ], "wc_limitations_avg": [ 7.75, 7.854139036202504 ], "wc_review_avg": [ 356.75, 83.10046630434753 ], "wc_reply_reviewers_avg": [ 74.25, 128.60477246198914 ], "wc_reply_authors_avg": [ 609.25, 1055.2519545113385 ], "reply_reviewers_avg": [ 1.0, 1.7320508075688772 ], "reply_authors_avg": [ 2.25, 2.165063509461097 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10158748557381461148&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "buaa.edu.cn;cs.tsinghua.edu.cn;buaa.edu.cn;buaa.edu.cn", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Beihang University;Tsinghua University", "aff_unique_dep": ";", "aff_unique_url": "http://www.buaa.edu.cn/;https://www.tsinghua.edu.cn", "aff_unique_abbr": "BUAA;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Multi-Scale Representation Learning for Protein Fitness Prediction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93883", "id": "kWMVzIdCEn", "proceeding": "", "pdf": "https://openreview.net/pdf?id=kWMVzIdCEn", "openreview": "https://openreview.net/forum?id=kWMVzIdCEn", "poster": "", "project": "", "author_site": "Zuobai Zhang, Pascal Notin, Yining Huang, Aurelie Lozano, Vijil Chenthamarakshan, Debora Marks, Payel Das, Jian Tang", "tldr": "", "abstract": "Designing novel functional proteins crucially depends on accurately modeling their fitness landscape. Given the limited availability of functional annotations from wet-lab experiments, previous methods have primarily relied on self-supervised models trained on vast, unlabeled protein sequence or structure datasets. While initial protein representation learning studies solely focused on either sequence or structural features, recent hybrid architectures have sought to merge these modalities to harness their respective strengths. However, these sequence-structure models have so far achieved only incremental improvements when compared to the leading sequence-only approaches, highlighting unresolved challenges effectively leveraging these modalities together. Moreover, the function of certain proteins is highly dependent on the granular aspects of their surface topology, which have been overlooked by prior models.\nTo address these limitations, we introduce the Sequence-Structure-Surface Fitness (**S3F**) model \u2014 a novel multimodal representation learning framework that integrates protein features across several scales. Our approach combines sequence representations from a protein language model with Geometric Vector Perceptron networks encoding protein backbone and detailed surface topology. The proposed method achieves state-of-the-art fitness prediction on the ProteinGym benchmark encompassing 217 substitution deep mutational scanning assays, and provides insights into the determinants of protein function.\nOur code is at https://github.com/DeepGraphLearning/S3F.", "keywords": "Protein representation learning; protein fitness prediction; self-supervised pre-training", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "/attachment/229c096c8d5f97f10a461f44a15e6a3715d40575.zip", "author": "Zuobai Zhang;Pascal Notin;Yining Huang;Aurelie Lozano;Vijil Chenthamarakshan;Debora Susan Marks;Payel Das;Jian Tang", "authorids": "~Zuobai_Zhang1;~Pascal_Notin1;~Yining_Huang1;~Aurelie_Lozano1;~Vijil_Chenthamarakshan1;~Debora_Susan_Marks1;~Payel_Das1;~Jian_Tang1", "gender": "M;M;F;M;F;F;;M", "homepage": "https://oxer11.github.io/;https://yiningsamhuang.com/;https://research.ibm.com/people/aurelie-lozano;https://researcher.watson.ibm.com/researcher/view.php?person=us-ecvijil;https://www.deboramarkslab.com/;;http://www.jian-tang.com;https://www.pascalnotin.com", "dblp": "256/9098.html;;06/274;;;56/7926;181/2667-5;270/9032", "google_scholar": "UCDbNccAAAAJ;YBBQLMcAAAAJ;4wTGaDsAAAAJ;g9hboJ0AAAAJ;qFmoeNkAAAAJ;;https://scholar.google.ca/citations?user=1ir6WUEAAAAJ;soxv0s0AAAAJ", "orcid": ";;;;0000-0001-9388-2281;;;0000-0002-1877-8983", "linkedin": ";yining-huang-83b45a169;;;debora-marks-3932a97/;;;", "or_profile": "~Zuobai_Zhang1;~Yining_Huang1;~Aurelie_Lozano1;~Vijil_Chenthamarakshan1;~Debora_Susan_Marks1;~Payel_Das1;~Jian_Tang1;~Pascal_M_Notin1", "aff": "NVIDIA;Harvard University;IBM Research;International Business Machines;Harvard Medical School, Harvard University;IBM, International Business Machines;Mila, HEC Montreal;Department of Computer Science, University of Oxford", "aff_domain": "nvidia.com;harvard.edu;us.ibm.com;ibm.com;hms.harvard.edu;us.ibm.com;hec.ca;cs.ox.ac.uk", "position": "Intern;MS student;Principal Researcher;Senior Technical Staff member;Full Professor;Principal Researcher;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nzhang2024multiscale,\ntitle={Multi-Scale Representation Learning for Protein Fitness Prediction},\nauthor={Zuobai Zhang and Pascal Notin and Yining Huang and Aurelie Lozano and Vijil Chenthamarakshan and Debora Susan Marks and Payel Das and Jian Tang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=kWMVzIdCEn}\n}", "github": "", "reviewers": "6ssw;rjUq;ChRY;6FTz", "pdf_size": 1789989, "rating": "5;5;5;6", "confidence": "5;3;4;4", "soundness": "3;2;3;3", "novelty": "2;2;2;3", "presentation": "3;3;4;3", "wc_summary": "90;146;39;136", "wc_strengths": "176;114;80;74", "wc_weaknesses": "354;134;210;123", "wc_questions": "208;104;125;71", "wc_limitations": "1;57;36;51", "wc_review": "829;555;490;455", "wc_reply_reviewers": "76;58;55;54", "wc_reply_authors": "52;108;189;200", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 102.75, 42.43450836288786 ], "wc_strengths_avg": [ 111.0, 40.50925820105819 ], "wc_weaknesses_avg": [ 205.25, 92.18290242772788 ], "wc_questions_avg": [ 127.0, 50.57173123396113 ], "wc_limitations_avg": [ 36.25, 21.741377601246892 ], "wc_review_avg": [ 582.25, 146.91047443936733 ], "wc_reply_reviewers_avg": [ 60.75, 8.926785535678562 ], "wc_reply_authors_avg": [ 137.25, 60.70162683157676 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:MRLzCJw11CcJ:scholar.google.com/&scioq=Multi-Scale+Representation+Learning+for+Protein+Fitness+Prediction&hl=en&as_sdt=0,14", "gs_version_total": 7, "email": "nvidia.com;harvard.edu;us.ibm.com;ibm.com;hms.harvard.edu;us.ibm.com;hec.ca;cs.ox.ac.uk", "author_num": 8, "aff_unique_index": "0;1;2;3;1;4;5;6", "aff_unique_norm": "NVIDIA;Harvard University;IBM;International Business Machines Corporation;International Business Machines;HEC Montreal;University of Oxford", "aff_unique_dep": "NVIDIA Corporation;;IBM Research;;;HEC Business School;Department of Computer Science", "aff_unique_url": "https://www.nvidia.com;https://www.harvard.edu;https://www.ibm.com/research;https://www.ibm.com;https://www.ibm.com;https://www.hec.ca;https://www.ox.ac.uk", "aff_unique_abbr": "NVIDIA;Harvard;IBM;IBM;IBM;HEC;Oxford", "aff_campus_unique_index": "1;2;3", "aff_campus_unique": ";Cambridge;Montreal;Oxford", "aff_country_unique_index": "0;0;0;0;0;0;1;2", "aff_country_unique": "United States;Canada;United Kingdom" }, { "title": "A Data-Centric Perspective on Evaluating Machine Learning Models for Tabular Data", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97534", "id": "kWTvdSSH5W", "proceeding": "", "pdf": "https://openreview.net/pdf?id=kWTvdSSH5W", "openreview": "https://openreview.net/forum?id=kWTvdSSH5W", "poster": "/media/PosterPDFs/NeurIPS%202024/97534.png?t=1733244488.090357", "project": "", "author_site": "Andrej Tschalzev, Sascha Marton, Stefan L\u00fcdtke, Christian Bartelt, Heiner Stuckenschmidt", "tldr": "", "abstract": "Tabular data is prevalent in real-world machine learning applications, and new models for supervised learning of tabular data are frequently proposed. Comparative studies assessing performance differences typically have model-centered evaluation setups with overly standardized data preprocessing. This limits the external validity of these studies, as in real-world modeling pipelines, models are typically applied after dataset-specific preprocessing and feature engineering. We address this gap by proposing a data-centric evaluation framework. We select 10 relevant datasets from Kaggle competitions and implement expert-level preprocessing pipelines for each dataset. We conduct experiments with different preprocessing pipelines and hyperparameter optimization (HPO) regimes to quantify the impact of model selection, HPO, feature engineering, and test-time adaptation. Our main findings reveal: 1) After dataset-specific feature engineering, model rankings change considerably, performance differences decrease, and the importance of model selection reduces. 2) Recent models, despite their measurable progress, still significantly benefit from manual feature engineering. This holds true for both tree-based models and neural networks. 3) While tabular data is typically considered static, samples are often collected over time, and adapting to distribution shifts can be important even in supposedly static data. These insights suggest that research efforts should be directed toward a data-centric perspective, acknowledging that tabular data requires feature engineering and often exhibits temporal characteristics.", "keywords": "Machine Learning for tabular data;Deep Learning for tabular data;Feature Engineering;Test-time adaptation", "primary_area": "", "supplementary_material": "", "author": "Andrej Tschalzev;Sascha Marton;Stefan L\u00fcdtke;Christian Bartelt;Heiner Stuckenschmidt", "authorids": "~Andrej_Tschalzev1;~Sascha_Marton1;~Stefan_L\u00fcdtke1;~Christian_Bartelt1;~Heiner_Stuckenschmidt2", "gender": "M;M;;M;M", "homepage": "https://www.uni-mannheim.de/en/ines/about-us/researchers/andrej-tschalzev/;https://s-marton.github.io/;;;https://www.uni-mannheim.de/dws/people/professors/prof-dr-heiner-stuckenschmidt/", "dblp": "322/3939.html;281/6606;;15/73.html;https://dblp.uni-trier.de/pers/s/Stuckenschmidt:Heiner", "google_scholar": "N7yih0oAAAAJ;https://scholar.google.com/citations?view_op=list_works;;https://scholar.google.de/citations?user=9FcF1gwAAAAJ;oqpT1YUAAAAJ", "orcid": "0000-0002-0638-5744;0000-0001-8151-9223;;0000-0003-0426-6714;0000-0002-0209-3859", "linkedin": "andrej-tschalzev-8a9718145/;sascha-marton-a19630185/;;christian-bartelt-digital/?originalSubdomain=de;", "or_profile": "~Andrej_Tschalzev1;~Sascha_Marton1;~Stefan_L\u00fcdtke1;~Christian_Bartelt1;~Heiner_Stuckenschmidt2", "aff": "University of Mannheim;University of Mannheim;;Universit\u00e4t Mannheim;University of Mannheim", "aff_domain": "uni-mannheim.de;uni-mannheim.de;;uni-mannheim.de;uni-mannheim.de", "position": "PhD student;PhD student;;Researcher;Full Professor", "bibtex": "@inproceedings{\ntschalzev2024a,\ntitle={A Data-Centric Perspective on Evaluating Machine Learning Models for Tabular Data},\nauthor={Andrej Tschalzev and Sascha Marton and Stefan L{\\\"u}dtke and Christian Bartelt and Heiner Stuckenschmidt},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=kWTvdSSH5W}\n}", "github": "", "reviewers": "Rzfd;VLQP;wbvk", "pdf_size": 578970, "rating": "6;8;8", "confidence": "4;5;4", "wc_summary_and_contributions": "79;127;140", "wc_strengths": "5;5;36", "wc_improvement": "202;609;29", "wc_limitations": "18;6;25", "wc_correctness": "19;36;48", "wc_clarity": "24;8;1", "wc_relation_to_prior_work": "77;8;1", "wc_documentation": "41;24;30", "wc_additional_feedback": "1;1;1", "wc_review": "466;824;311", "wc_reply_reviewers": "184;666;67", "wc_reply_authors": "2836;2129;0", "reply_reviewers": "1;1;1", "reply_authors": "4;4;1", "rating_avg": [ 7.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 115.33333333333333, 26.233989826601334 ], "wc_strengths_avg": [ 15.333333333333334, 14.613540144521982 ], "wc_improvement_avg": [ 280.0, 243.12273992094336 ], "wc_limitations_avg": [ 16.333333333333332, 7.845734863959881 ], "wc_correctness_avg": [ 34.333333333333336, 11.897712198383164 ], "wc_clarity_avg": [ 11.0, 9.626352718795768 ], "wc_relation_to_prior_work_avg": [ 28.666666666666668, 34.29609631171195 ], "wc_documentation_avg": [ 31.666666666666668, 7.039570693980958 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 533.6666666666666, 214.8275794419536 ], "wc_reply_reviewers_avg": [ 305.6666666666667, 259.23262826186743 ], "wc_reply_authors_avg": [ 1655.0, 1205.3301069278352 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.0, 1.4142135623730951 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6014376245174600960&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "uni-mannheim.de;uni-mannheim.de;;uni-mannheim.de;uni-mannheim.de", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Mannheim", "aff_unique_dep": "", "aff_unique_url": "https://www.uni-mannheim.de", "aff_unique_abbr": "UM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "title": "General Detection-based Text Line Recognition", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93882", "id": "kXErlJSZ84", "proceeding": "", "pdf": "https://openreview.net/pdf?id=kXErlJSZ84", "openreview": "https://openreview.net/forum?id=kXErlJSZ84", "poster": "/media/PosterPDFs/NeurIPS%202024/93882.png?t=1731693569.342826", "project": "", "author_site": "Raphael Baena, Syrine Kalleli, Mathieu Aubry", "tldr": "", "abstract": "We introduce a general detection-based approach to text line recognition, be it printed (OCR) or handwritten text (HTR), with latin, chinese or ciphered characters. Detection-based approaches have until now largely been discarded for HTR because reading characters separately is often challenging, and character-level annotation is difficult and expensive. We overcome these challenges thanks to three main insights: (i) synthetic pre-training with diverse enough data to learn reasonable character localization in any script; (ii) modern transformer-based detectors can jointly detect a large number of instances and, if trained with an adequate masking strategy, leverage consistency between the different detections; (iii) once a pre-trained detection model with approximate character localization is available, it is possible to fine-tune it with line-level annotation on real data, even with a different alphabet. Our approach thus builds on a completely different paradigm than most state-of-the-art methods, which rely on autoregressive decoding, predicting character values one by one, while we treat a complete line in parallel. Remarkably, our method demonstrates good performance on range of scripts, usually tackled with specialized approaches: latin script, chinese script, and ciphers, for which we significantly improve state-of-the-art performances. \nOur code and models are available at [https://github.com/raphael-baena/DTLR](https://github.com/raphael-baena/DTLR).", "keywords": "Text Recognition;Handwritten Text Recognition;Optical Character Recognition;Transformer", "primary_area": "machine_vision", "supplementary_material": "", "author": "Raphael Baena;syrine kalleli;Mathieu Aubry", "authorids": "~Raphael_Baena1;~syrine_kalleli1;~Mathieu_Aubry3", "gender": "M;F;", "homepage": ";;http://imagine.enpc.fr/~aubrym/", "dblp": ";;57/10067", "google_scholar": "8MU98WQAAAAJ;;https://scholar.google.fr/citations?user=0MiPsosAAAAJ", "orcid": ";;0000-0002-3804-0193", "linkedin": ";syrine-kalleli/;", "or_profile": "~Raphael_Baena1;~syrine_kalleli1;~Mathieu_Aubry3", "aff": "Ecole Nationale des Ponts et Chausees;Ecole Nationale des Ponts et Chausees;ENPC", "aff_domain": "enpc.fr;enpc.fr;enpc.fr", "position": "Postdoc;PhD student;Principal Researcher", "bibtex": "@inproceedings{\nbaena2024general,\ntitle={General Detection-based Text Line Recognition},\nauthor={Raphael Baena and syrine kalleli and Mathieu Aubry},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=kXErlJSZ84}\n}", "github": "", "reviewers": "jpUH;KLkw;9JMV;hupU", "pdf_size": 34962509, "rating": "6;6;6;6", "confidence": "4;4;5;4", "soundness": "2;3;2;3", "novelty": "2;2;2;2", "presentation": "2;3;2;3", "wc_summary": "49;79;113;80", "wc_strengths": "33;51;139;21", "wc_weaknesses": "158;62;147;13", "wc_questions": "117;74;180;42", "wc_limitations": "17;17;51;12", "wc_review": "374;283;630;168", "wc_reply_reviewers": "10;52;57;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 80.25, 22.64260364887395 ], "wc_strengths_avg": [ 61.0, 46.281745861624536 ], "wc_weaknesses_avg": [ 95.0, 60.1788999567124 ], "wc_questions_avg": [ 103.25, 51.6883690978928 ], "wc_limitations_avg": [ 24.25, 15.578430601315397 ], "wc_review_avg": [ 363.75, 170.17105364896815 ], "wc_reply_reviewers_avg": [ 29.75, 25.063668925358872 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:NmZA5wG9YU8J:scholar.google.com/&scioq=General+Detection-based+Text+Line+Recognition&hl=en&as_sdt=0,10", "gs_version_total": 3, "email": "enpc.fr;enpc.fr;enpc.fr", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Ecole Nationale des Ponts et Chaussees;\u00c9cole Nationale des Ponts et Chauss\u00e9es", "aff_unique_dep": ";", "aff_unique_url": "https://www.enpc.fr;https://www.enpc.fr", "aff_unique_abbr": "ENPC;ENPC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "France" }, { "title": "Input-to-State Stable Coupled Oscillator Networks for Closed-form Model-based Control in Latent Space", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93881", "id": "kXKrLsR4aJ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=kXKrLsR4aJ", "openreview": "https://openreview.net/forum?id=kXKrLsR4aJ", "poster": "/media/PosterPDFs/NeurIPS%202024/93881.png?t=1733884633.445531", "project": "", "author_site": "Maximilian St\u00f6lzle, Cosimo Della Santina", "tldr": "", "abstract": "Even though a variety of methods have been proposed in the literature, efficient and effective latent-space control (i.e., control in a learned low-dimensional space) of physical systems remains an open challenge.\nWe argue that a promising avenue is to leverage powerful and well-understood closed-form strategies from control theory literature in combination with learned dynamics, such as potential-energy shaping.\nWe identify three fundamental shortcomings in existing latent-space models that have so far prevented this powerful combination: (i) they lack the mathematical structure of a physical system, (ii) they do not inherently conserve the stability properties of the real systems, (iii) these methods do not have an invertible mapping between input and latent-space forcing.\nThis work proposes a novel Coupled Oscillator Network (CON) model that simultaneously tackles all these issues. \nMore specifically, (i) we show analytically that CON is a Lagrangian system - i.e., it possesses well-defined potential and kinetic energy terms. Then, (ii) we provide formal proof of global Input-to-State stability using Lyapunov arguments.\nMoving to the experimental side, we demonstrate that CON reaches SoA performance when learning complex nonlinear dynamics of mechanical systems directly from images.\nAn additional methodological innovation contributing to achieving this third goal is an approximated closed-form solution for efficient integration of network dynamics, which eases efficient training.\nWe tackle (iii) by approximating the forcing-to-input mapping with a decoder that is trained to reconstruct the input based on the encoded latent space force.\nFinally, we leverage these three properties and show that they enable latent-space control. We use an integral-saturated PID with potential force compensation and demonstrate high-quality performance on a soft robot using raw pixels as the only feedback information.", "keywords": "Dynamical Systems;Control Theory;Robotics;Decision and Control;Deep Autoencoders", "primary_area": "robotics", "supplementary_material": "", "author": "Maximilian St\u00f6lzle;Cosimo Della Santina", "authorids": "~Maximilian_St\u00f6lzle1;~Cosimo_Della_Santina1", "gender": "M;Not Specified", "homepage": "https://maximilian.stoelzle.ch;https://cosimodellasantina.eu/", "dblp": "302/1303;", "google_scholar": "yHmOzBsAAAAJ;https://scholar.google.it/citations?hl=en", "orcid": "0000-0002-2608-9758;0000-0002-6693-1022", "linkedin": "maximilian-stoelzle-501a0657/;cosimodellasantina/", "or_profile": "~Maximilian_St\u00f6lzle1;~Cosimo_Della_Santina1", "aff": "Massachusetts Institute of Technology;German Aerospace Center", "aff_domain": "mit.edu;dlr.de", "position": "Researcher;Researcher", "bibtex": "@inproceedings{\nst{\\\"o}lzle2024inputtostate,\ntitle={Input-to-State Stable Coupled Oscillator Networks for Closed-form Model-based Control in Latent Space},\nauthor={Maximilian St{\\\"o}lzle and Cosimo Della Santina},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=kXKrLsR4aJ}\n}", "github": "", "reviewers": "Ys99;W9L3", "pdf_size": 3921659, "rating": "7;8", "confidence": "4;3", "soundness": "3;4", "novelty": "3;3", "presentation": "3;3", "wc_summary": "134;231", "wc_strengths": "83;49", "wc_weaknesses": "146;29", "wc_questions": "70;33", "wc_limitations": "1;8", "wc_review": "434;350", "wc_reply_reviewers": "323;124", "wc_reply_authors": "1169;1407", "reply_reviewers": "2;1", "reply_authors": "4;4", "rating_avg": [ 7.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 182.5, 48.5 ], "wc_strengths_avg": [ 66.0, 17.0 ], "wc_weaknesses_avg": [ 87.5, 58.5 ], "wc_questions_avg": [ 51.5, 18.5 ], "wc_limitations_avg": [ 4.5, 3.5 ], "wc_review_avg": [ 392.0, 42.0 ], "wc_reply_reviewers_avg": [ 223.5, 99.5 ], "wc_reply_authors_avg": [ 1288.0, 119.0 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 4.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.9999999999999999, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11596950576256519073&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "mit.edu;dlr.de", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Massachusetts Institute of Technology;German Aerospace Center", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.dlr.de", "aff_unique_abbr": "MIT;DLR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;Germany" }, { "title": "Mitigating Reward Overoptimization via Lightweight Uncertainty Estimation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93880", "id": "kYio3xH6eb", "proceeding": "", "pdf": "https://openreview.net/pdf?id=kYio3xH6eb", "openreview": "https://openreview.net/forum?id=kYio3xH6eb", "poster": "", "project": "", "author_site": "Xiaoying Zhang, Jean-Francois Ton, Wei Shen, Hongning Wang, Yang Liu", "tldr": "", "abstract": "Reinforcement Learning from Human Feedback (RLHF) has been pivotal in aligning Large Language Models with human values but often suffers from overoptimization due to its reliance on a proxy reward model. To mitigate this limitation, we first propose a lightweight uncertainty quantification method that assesses the reliability of the proxy reward using only the last layer embeddings of the reward model. Enabled by this efficient uncertainty quantification method, we formulate AdvPO, a distributionally robust optimization procedure to tackle the reward overoptimization problem in RLHF. Through extensive experiments on the Anthropic HH and TL;DR summarization datasets, we verify the effectiveness of AdvPO in mitigating the overoptimization problem, resulting in enhanced RLHF performance as evaluated through human-assisted evaluation.", "keywords": "Overoptimization in RLHF;Lightweight Uncertainty Estimation;Adversarial Policy Optimization", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Xiaoying Zhang;Jean-Francois Ton;Wei Shen;Hongning Wang;Yang Liu", "authorids": "~Xiaoying_Zhang3;~Jean-Francois_Ton2;~Wei_Shen12;~Hongning_Wang1;~Yang_Liu3", "gender": "F;Not Specified;;M;M", "homepage": "https://github.com/Xiaoyinggit;https://savior287.github.io/JFT-webpage/;http://github.com/fakerbaby;http://www.cs.virginia.edu/~hw5x/;http://www.yliuu.com", "dblp": "46/7725;;;05/6545;51/3710-18", "google_scholar": "lwKg4C4AAAAJ;WWVOu4kAAAAJ;-DlGT8IAAAAJ;qkdvKNoAAAAJ;jKrIVCIAAAAJ", "orcid": ";;;0000-0002-6524-9195;0000-0001-8420-6011", "linkedin": ";;;;", "or_profile": "~Xiaoying_Zhang3;~Jean-Francois_Ton2;~Wei_Shen12;~Hongning_Wang1;~Yang_Liu3", "aff": "ByteDance AILab;Bytedance;Fudan University;Tsinghua University;University of California, Santa Cruz", "aff_domain": "bytedance.com;bytedance.com;fudan.edu.cn;tsinghua.edu.cn;ucsc.edu", "position": "Researcher;Researcher;MS student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhang2024mitigating,\ntitle={Mitigating Reward Overoptimization via Lightweight Uncertainty Estimation},\nauthor={Xiaoying Zhang and Jean-Francois Ton and Wei Shen and Hongning Wang and Yang Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=kYio3xH6eb}\n}", "github": "", "reviewers": "fmPe;NjZD;2q4b;JbJ9", "pdf_size": 1563014, "rating": "6;6;6;6", "confidence": "4;3;3;3", "soundness": "2;2;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "57;136;52;32", "wc_strengths": "27;123;37;29", "wc_weaknesses": "154;368;45;5", "wc_questions": "61;45;90;140", "wc_limitations": "9;30;2;5", "wc_review": "308;702;226;211", "wc_reply_reviewers": "17;202;21;14", "wc_reply_authors": "31;406;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;3;1;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 69.25, 39.65712420234226 ], "wc_strengths_avg": [ 54.0, 40.01249804748511 ], "wc_weaknesses_avg": [ 143.0, 140.88470463467635 ], "wc_questions_avg": [ 84.0, 36.13170353027933 ], "wc_limitations_avg": [ 11.5, 10.965856099730654 ], "wc_review_avg": [ 361.75, 199.88293448916542 ], "wc_reply_reviewers_avg": [ 63.5, 80.00156248474151 ], "wc_reply_authors_avg": [ 109.25, 171.79548160530882 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:VjSFRZsC7xIJ:scholar.google.com/&scioq=Mitigating+Reward+Overoptimization+via+Lightweight+Uncertainty+Estimation&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": "bytedance.com;bytedance.com;fudan.edu.cn;tsinghua.edu.cn;ucsc.edu", "author_num": 5, "aff_unique_index": "0;0;1;2;3", "aff_unique_norm": "ByteDance;Fudan University;Tsinghua University;University of California, Santa Cruz", "aff_unique_dep": "AILab;;;", "aff_unique_url": "https://ailab.bytedance.com/;https://www.fudan.edu.cn;https://www.tsinghua.edu.cn;https://www.ucsc.edu", "aff_unique_abbr": "ByteDance AILab;Fudan;THU;UCSC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Santa Cruz", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "China;United States" }, { "title": "Rethinking Model-based, Policy-based, and Value-based Reinforcement Learning via the Lens of Representation Complexity", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93879", "id": "kZ4Kc5GhGB", "proceeding": "", "pdf": "https://openreview.net/pdf?id=kZ4Kc5GhGB", "openreview": "https://openreview.net/forum?id=kZ4Kc5GhGB", "poster": "", "project": "", "author_site": "Guhao Feng, Han Zhong", "tldr": "", "abstract": "Reinforcement Learning (RL) encompasses diverse paradigms, including model-based RL, policy-based RL, and value-based RL, each tailored to approximate the model, optimal policy, and optimal value function, respectively. This work investigates the potential hierarchy of representation complexity among these RL paradigms. By utilizing computational complexity measures, including time complexity and circuit complexity, we theoretically unveil a potential representation complexity hierarchy within RL. We find that representing the model emerges as the easiest task, followed by the optimal policy, while representing the optimal value function presents the most intricate challenge. Additionally, we reaffirm this hierarchy from the perspective of the expressiveness of Multi-Layer Perceptrons (MLPs), which align more closely with practical deep RL and contribute to a completely new perspective in theoretical studying representation complexity in RL. Finally, we conduct deep RL experiments to validate our theoretical findings.", "keywords": "RL theory;representation complexity;model-based RL;policy-based RL;value-based RL", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Guhao Feng;Han Zhong", "authorids": "~Guhao_Feng1;~Han_Zhong1", "gender": "M;", "homepage": ";https://hanzhong-ml.github.io/", "dblp": ";137/8096.html", "google_scholar": "wmDqYvUAAAAJ;Bk5q_pAAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Guhao_Feng1;~Han_Zhong1", "aff": "Peking University;Peking University", "aff_domain": "pku.edu.cn;stu.pku.edu.cn", "position": "Undergrad student;PhD student", "bibtex": "@inproceedings{\nfeng2024rethinking,\ntitle={Rethinking Model-based, Policy-based, and Value-based Reinforcement Learning via the Lens of Representation Complexity},\nauthor={Guhao Feng and Han Zhong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=kZ4Kc5GhGB}\n}", "github": "", "reviewers": "4u4n;UVj9;YjQh;eFNz", "pdf_size": 914802, "rating": "5;5;6;7", "confidence": "2;3;4;2", "soundness": "3;3;3;3", "novelty": "3;2;2;3", "presentation": "3;3;3;3", "wc_summary": "45;71;51;29", "wc_strengths": "41;55;136;66", "wc_weaknesses": "46;103;106;2", "wc_questions": "54;8;236;118", "wc_limitations": "21;7;2;11", "wc_review": "207;244;531;226", "wc_reply_reviewers": "35;17;238;27", "wc_reply_authors": "44;19;28;20", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 49.0, 15.033296378372908 ], "wc_strengths_avg": [ 74.5, 36.595764782280476 ], "wc_weaknesses_avg": [ 64.25, 43.16465568031326 ], "wc_questions_avg": [ 104.0, 85.63877626402656 ], "wc_limitations_avg": [ 10.25, 6.977642868476432 ], "wc_review_avg": [ 302.0, 132.85894776039737 ], "wc_reply_reviewers_avg": [ 79.25, 91.87593536938822 ], "wc_reply_authors_avg": [ 27.75, 10.0093706095838 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.0909090909090909, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10547499706356102737&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "pku.edu.cn;stu.pku.edu.cn", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Multi-Winner Reconfiguration", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93878", "id": "kZfxICBXd1", "proceeding": "", "pdf": "https://openreview.net/pdf?id=kZfxICBXd1", "openreview": "https://openreview.net/forum?id=kZfxICBXd1", "poster": "/media/PosterPDFs/NeurIPS%202024/93878.png?t=1733407702.788558", "project": "", "author_site": "Jiehua Chen, Christian Hatschka, Sofia Simola", "tldr": "", "abstract": "We introduce a multi-winner reconfiguration model to examine how to transition between subsets of alternatives (aka. committees) through a sequence of minor yet impactful modifications, called reconfiguration path. We analyze this model under four approval-based voting rules: Chamberlin-Courant (CC), Proportional Approval Voting (PAV), Approval Voting (AV), and Satisfaction Approval Voting (SAV). The problem exhibits computational intractability for CC and PAV, and polynomial solvability for AV and SAV. We provide a detailed multivariate complexity analysis for CC and PAV, demonstrating that although the problem remains challenging in many scenarios, there are specific cases that allow for efficient parameterized algorithms.", "keywords": "computational social choice;computational complexity;algorithmic design", "primary_area": "algorithmic_game_theory", "supplementary_material": "/attachment/f072147f62f0c10914e1d111458b0468b8f69e39.zip", "author": "Jiehua Chen;Christian Hatschka;Sofia Simola", "authorids": "~Jiehua_Chen1;~Christian_Hatschka1;~Sofia_Simola1", "gender": "F;M;F", "homepage": "https://www.ac.tuwien.ac.at/jchen/about/;;https://informatics.tuwien.ac.at/people/sofia-simola", "dblp": "72/4415-1;314/7275.html;303/4421.html", "google_scholar": "guvQn0EAAAAJ;;", "orcid": "0000-0002-8163-1327;0000-0002-0881-8259;0000-0001-7941-0018", "linkedin": ";;", "or_profile": "~Jiehua_Chen1;~Christian_Hatschka1;~Sofia_Simola1", "aff": "Technische Universit\u00e4t Wien;Technische Universit\u00e4t Wien;Technische Universit\u00e4t Wien", "aff_domain": "tuwien.ac.at;tuwien.ac.at;tuwien.ac.at", "position": "Associate Professor;PhD student;PhD student", "bibtex": "@inproceedings{\nchen2024multiwinner,\ntitle={Multi-Winner Reconfiguration},\nauthor={Jiehua Chen and Christian Hatschka and Sofia Simola},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=kZfxICBXd1}\n}", "github": "", "reviewers": "go2g;tnVW;wJyv;q2Mc", "pdf_size": 1034934, "rating": "6;6;7;7", "confidence": "3;3;4;4", "soundness": "3;4;4;4", "novelty": "3;3;3;3", "presentation": "3;3;4;4", "wc_summary": "53;79;178;186", "wc_strengths": "48;30;38;163", "wc_weaknesses": "129;25;126;775", "wc_questions": "1;87;33;109", "wc_limitations": "1;1;16;29", "wc_review": "232;222;391;1262", "wc_reply_reviewers": "9;16;31;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 124.0, 58.79200625935468 ], "wc_strengths_avg": [ 69.75, 54.214273950685715 ], "wc_weaknesses_avg": [ 263.75, 298.12361110787583 ], "wc_questions_avg": [ 57.5, 42.7638866334668 ], "wc_limitations_avg": [ 11.75, 11.691342951089922 ], "wc_review_avg": [ 526.75, 429.75887134531615 ], "wc_reply_reviewers_avg": [ 14.0, 11.335784048754634 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8228107347751728078&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": "tuwien.ac.at;tuwien.ac.at;tuwien.ac.at", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Technische Universit\u00e4t Wien", "aff_unique_dep": "", "aff_unique_url": "https://www.tuwien.ac.at", "aff_unique_abbr": "TU Wien", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Austria" }, { "title": "GTA: Generative Trajectory Augmentation with Guidance for Offline Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93877", "id": "kZpNDbZrzy", "proceeding": "", "pdf": "https://openreview.net/pdf?id=kZpNDbZrzy", "openreview": "https://openreview.net/forum?id=kZpNDbZrzy", "poster": "/media/PosterPDFs/NeurIPS%202024/93877.png?t=1731243279.368128", "project": "", "author_site": "Jaewoo Lee, Sujin Yun, Taeyoung Yun, Jinkyoo Park", "tldr": "", "abstract": "Offline Reinforcement Learning (Offline RL) presents challenges of learning effective decision-making policies from static datasets without any online interactions. Data augmentation techniques, such as noise injection and data synthesizing, aim to improve Q-function approximation by smoothing the learned state-action region. However, these methods often fall short of directly improving the quality of offline datasets, leading to suboptimal results. In response, we introduce GTA, Generative Trajectory Augmentation, a novel generative data augmentation approach designed to enrich offline data by augmenting trajectories to be both high-rewarding and dynamically plausible. GTA applies a diffusion model within the data augmentation framework. GTA partially noises original trajectories and then denoises them with classifier-free guidance via conditioning on amplified return value. Our results show that GTA, as a general data augmentation strategy, enhances the performance of widely used offline RL algorithms across various tasks with unique challenges. Furthermore, we conduct a quality analysis of data augmented by GTA and demonstrate that GTA improves the quality of the data. Our code is available at https://github.com/Jaewoopudding/GTA", "keywords": "Offline Reinforcement Learning;Data Augmentation;Diffusion Models.", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Jaewoo Lee;Sujin Yun;Taeyoung Yun;Jinkyoo Park", "authorids": "~Jaewoo_Lee3;~Sujin_Yun1;~Taeyoung_Yun1;~Jinkyoo_Park1", "gender": "M;F;M;M", "homepage": ";https://github.com/sujinyun999;https://dbsxodud-11.github.io;http://silab.kaist.ac.kr/", "dblp": ";;358/5797.html;156/7535", "google_scholar": ";wpTfpnIAAAAJ;_51PhLQAAAAJ;sH2a0nkAAAAJ", "orcid": ";0009-0008-2263-9944;0009-0001-4602-6367;0000-0003-2620-1479", "linkedin": "%EC%9E%AC%EC%9A%B0-%EC%9D%B4-2a7b2a1b7;;;", "or_profile": "~Jaewoo_Lee3;~Sujin_Yun1;~Taeyoung_Yun1;~Jinkyoo_Park1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.edu;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "position": "MS student;MS student;MS student;Associate Professor", "bibtex": "@inproceedings{\nlee2024gta,\ntitle={{GTA}: Generative Trajectory Augmentation with Guidance for Offline Reinforcement Learning},\nauthor={Jaewoo Lee and Sujin Yun and Taeyoung Yun and Jinkyoo Park},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=kZpNDbZrzy}\n}", "github": "", "reviewers": "UiuM;hVee;WvGz;cNng;ABdH", "pdf_size": 6276287, "rating": "6;6;6;6;7", "confidence": "4;4;4;4;4", "soundness": "3;3;3;3;3", "novelty": "2;3;3;2;3", "presentation": "3;3;3;3;3", "wc_summary": "93;61;70;110;63", "wc_strengths": "42;59;120;140;142", "wc_weaknesses": "33;111;148;257;93", "wc_questions": "223;119;158;74;1", "wc_limitations": "9;40;11;87;24", "wc_review": "400;390;507;668;323", "wc_reply_reviewers": "100;25;5;82;4", "wc_reply_authors": "64;54;75;70;24", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 6.2, 0.39999999999999997 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 79.4, 19.06410239166796 ], "wc_strengths_avg": [ 100.6, 41.969512744371954 ], "wc_weaknesses_avg": [ 128.4, 74.26870134854924 ], "wc_questions_avg": [ 115.0, 75.10792235177325 ], "wc_limitations_avg": [ 34.2, 28.631451238105274 ], "wc_review_avg": [ 457.6, 120.59286877755251 ], "wc_reply_reviewers_avg": [ 43.2, 40.14673087562672 ], "wc_reply_authors_avg": [ 57.4, 18.10635247641004 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15308095431067933050&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 4, "email": "kaist.edu;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Prior-itizing Privacy: A Bayesian Approach to Setting the Privacy Budget in Differential Privacy", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93876", "id": "kamAXSJxGV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=kamAXSJxGV", "openreview": "https://openreview.net/forum?id=kamAXSJxGV", "poster": "", "project": "", "author_site": "Zeki Kazan, Jerome Reiter", "tldr": "", "abstract": "When releasing outputs from confidential data, agencies need to balance the analytical usefulness of the released data with the obligation to protect data subjects' confidentiality. For releases satisfying differential privacy, this balance is reflected by the privacy budget, $\\varepsilon$. We provide a framework for setting $\\varepsilon$ based on its relationship with Bayesian posterior probabilities of disclosure. The agency responsible for the data release decides how much posterior risk it is willing to accept at various levels of prior risk, which implies a unique $\\varepsilon$. Agencies can evaluate different risk profiles to determine one that leads to an acceptable trade-off in risk and utility.", "keywords": "confidentiality;disclosure;risk;semantics;utility", "primary_area": "privacy", "supplementary_material": "/attachment/8517c8fcd53056ebd33c9625649cc973ca97481a.zip", "author": "Zeki Kazan;Jerome Reiter", "authorids": "~Zeki_Kazan1;~Jerome_Reiter1", "gender": "M;M", "homepage": ";https://www2.stat.duke.edu/~jerry/", "dblp": ";", "google_scholar": "IURUXKYAAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Zeki_Kazan1;~Jerome_Reiter1", "aff": "Duke University;Duke University", "aff_domain": "duke.edu;duke.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nkazan2024prioritizing,\ntitle={Prior-itizing Privacy: A Bayesian Approach to Setting the Privacy Budget in Differential Privacy},\nauthor={Zeki Kazan and Jerome Reiter},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=kamAXSJxGV}\n}", "github": "", "reviewers": "E851;QVWW;XDrZ;HWnJ", "pdf_size": 1210722, "rating": "4;6;6;7", "confidence": "4;4;3;2", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "2;3;3;3", "wc_summary": "182;158;48;45", "wc_strengths": "111;102;55;74", "wc_weaknesses": "709;181;22;57", "wc_questions": "72;3;17;14", "wc_limitations": "156;28;12;3", "wc_review": "1230;472;154;193", "wc_reply_reviewers": "98;201;12;0", "wc_reply_authors": "132;90;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;1;1", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 108.25, 62.3392933870765 ], "wc_strengths_avg": [ 85.5, 22.276669409945463 ], "wc_weaknesses_avg": [ 242.25, 275.8780301147592 ], "wc_questions_avg": [ 26.5, 26.781523481684157 ], "wc_limitations_avg": [ 49.75, 61.99344723436502 ], "wc_review_avg": [ 512.25, 432.15990963993875 ], "wc_reply_reviewers_avg": [ 77.75, 80.5741118474166 ], "wc_reply_authors_avg": [ 55.5, 57.452154006616674 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.7608859102526822, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1691987389638589960&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "duke.edu;duke.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Duke University", "aff_unique_dep": "", "aff_unique_url": "https://www.duke.edu", "aff_unique_abbr": "Duke", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Operator World Models for Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93875", "id": "kbBjVMcJ7G", "proceeding": "", "pdf": "https://openreview.net/pdf?id=kbBjVMcJ7G", "openreview": "https://openreview.net/forum?id=kbBjVMcJ7G", "poster": "/media/PosterPDFs/NeurIPS%202024/93875.png?t=1732707626.0451183", "project": "", "author_site": "Pietro Novelli, Marco Prattic\u00f2, Massimiliano Pontil, Carlo Ciliberto", "tldr": "", "abstract": "Policy Mirror Descent (PMD) is a powerful and theoretically sound methodology for sequential decision-making. However, it is not directly applicable to Reinforcement Learning (RL) due to the inaccessibility of explicit action-value functions. We address this challenge by introducing a novel approach based on learning a world model of the environment using conditional mean embeddings. Leveraging tools from operator theory we derive a closed-form expression of the action-value function in terms of the world model via simple matrix operations. Combining these estimators with PMD leads to POWR, a new RL algorithm for which we prove convergence rates to the global optimum. Preliminary experiments in finite and infinite state settings support the effectiveness of our method.", "keywords": "Reinforcement Learning;Transfer Operators;World Models;Policy Gradient;Conditional Mean Embeddings;Mirror Descent", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Pietro Novelli;Marco Prattic\u00f2;Massimiliano Pontil;Carlo Ciliberto", "authorids": "~Pietro_Novelli1;~Marco_Prattic\u00f21;~Massimiliano_Pontil4;~Carlo_Ciliberto1", "gender": "M;M;Not Specified;M", "homepage": ";;https://www.iit.it/web/computational-statistics-and-machine-learning;https://cciliber.github.io/", "dblp": "318/3513;;;88/10332", "google_scholar": ";;lcOacs8AAAAJ;XUcUAisAAAAJ", "orcid": "0000-0003-1623-5659;;0000-0001-9415-098X;", "linkedin": ";marco-prattic%C3%B2-538498245/;;", "or_profile": "~Pietro_Novelli1;~Marco_Prattic\u00f21;~Massimiliano_Pontil4;~Carlo_Ciliberto1", "aff": "Istituto Italiano di Tecnologia;Universit\u00e0 degli Studi di Genova, Istituto Italiano di Tecnologia;University College London, University of London;University College London", "aff_domain": "iit.it;iit.it;ucl.ac.uk;ucl.ac.uk", "position": "Postdoc;Researcher;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nnovelli2024operator,\ntitle={Operator World Models for Reinforcement Learning},\nauthor={Pietro Novelli and Marco Prattic{\\`o} and Massimiliano Pontil and Carlo Ciliberto},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=kbBjVMcJ7G}\n}", "github": "", "reviewers": "68rN;41Mf;vreF;Tuc7", "pdf_size": 774272, "rating": "5;7;7;8", "confidence": "2;3;3;4", "soundness": "3;2;3;4", "novelty": "3;3;3;4", "presentation": "3;3;3;4", "wc_summary": "10;57;118;88", "wc_strengths": "21;62;17;78", "wc_weaknesses": "33;143;69;78", "wc_questions": "1;97;33;9", "wc_limitations": "1;49;6;20", "wc_review": "66;408;243;273", "wc_reply_reviewers": "19;87;88;5", "wc_reply_authors": "149;119;33;0", "reply_reviewers": "1;2;1;1", "reply_authors": "2;2;2;1", "rating_avg": [ 6.75, 1.0897247358851685 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 68.25, 39.95231532714969 ], "wc_strengths_avg": [ 44.5, 26.158172719056658 ], "wc_weaknesses_avg": [ 80.75, 39.68863187362346 ], "wc_questions_avg": [ 35.0, 37.68288736283355 ], "wc_limitations_avg": [ 19.0, 18.66815470259447 ], "wc_review_avg": [ 247.5, 121.83287733612795 ], "wc_reply_reviewers_avg": [ 49.75, 38.07476198218447 ], "wc_reply_authors_avg": [ 75.25, 60.829166523962826 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.9733285267845754, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:zeP8KNELS1YJ:scholar.google.com/&scioq=Operator+World+Models+for+Reinforcement+Learning&hl=en&as_sdt=0,44", "gs_version_total": 3, "email": "iit.it;iit.it;ucl.ac.uk;ucl.ac.uk", "author_num": 4, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "Istituto Italiano di Tecnologia;Universit\u00e0 degli Studi di Genova;University College London", "aff_unique_dep": ";;", "aff_unique_url": "https://www.iit.it;https://www.unige.it;https://www.ucl.ac.uk", "aff_unique_abbr": "IIT;UniGe;UCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1", "aff_country_unique": "Italy;United Kingdom" }, { "title": "Localize, Understand, Collaborate: Semantic-Aware Dragging via Intention Reasoner", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93874", "id": "kcQKIzQPZj", "proceeding": "", "pdf": "https://openreview.net/pdf?id=kcQKIzQPZj", "openreview": "https://openreview.net/forum?id=kcQKIzQPZj", "poster": "/media/PosterPDFs/NeurIPS%202024/93874.png?t=1731496881.9790905", "project": "", "author_site": "Xing Cui, Peipei Li, Zekun Li, Xuannan Liu, Yueying Zou, Zhaofeng He", "tldr": "", "abstract": "Flexible and accurate drag-based editing is a challenging task that has recently garnered significant attention. Current methods typically model this problem as automatically learning \"how to drag\" through point dragging and often produce one deterministic estimation, which presents two key limitations: 1) Overlooking the inherently ill-posed nature of drag-based editing, where multiple results may correspond to a given input, as illustrated in Fig.1; 2) Ignoring the constraint of image quality, which may lead to unexpected distortion.\nTo alleviate this, we propose LucidDrag, which shifts the focus from \"how to drag\" to \"what-then-how\" paradigm. LucidDrag comprises an intention reasoner and a collaborative guidance sampling mechanism. The former infers several optimal editing strategies, identifying what content and what semantic direction to be edited. Based on the former, the latter addresses \"how to drag\" by collaboratively integrating existing editing guidance with the newly proposed semantic guidance and quality guidance.\nSpecifically, semantic guidance is derived by establishing a semantic editing direction based on reasoned intentions, while quality guidance is achieved through classifier guidance using an image fidelity discriminator.\nBoth qualitative and quantitative comparisons demonstrate the superiority of LucidDrag over previous methods.", "keywords": "Image editing;Diffusion model;Large Language Model", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Xing Cui;Pei Pei Li;Zekun Li;Xuannan Liu;Yueying Zou;Zhaofeng He", "authorids": "~Xing_Cui1;~Pei_Pei_Li2;~Zekun_Li2;~Xuannan_Liu1;~Yueying_Zou1;~Zhaofeng_He1", "gender": "M;F;;M;F;M", "homepage": ";;;;;https://teacher.bupt.edu.cn/zhaofenghe/zh_CN/index.htm", "dblp": ";;;296/7706;;13/3992", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;;ddbkQoIAAAAJ;;https://scholar.google.com.hk/citations?hl=zh-CN", "orcid": "0000-0001-6788-1920;;;;0009-0008-4762-989X;0000-0002-3433-8435", "linkedin": ";;;;;", "or_profile": "~Xing_Cui1;~Pei_Pei_Li2;~Zekun_Li2;~Xuannan_Liu1;~Yueying_Zou1;~Zhaofeng_He1", "aff": "Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;;Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;Beijing University of Post and Telecommunication", "aff_domain": "bupt.edu.cn;bupt.edu.cn;;bupt.edu.cn;bupt.edu.cn;bupt.edu.cn", "position": "PhD student;Assistant Professor;;PhD student;MS student;Full Professor", "bibtex": "@inproceedings{\ncui2024localize,\ntitle={Localize, Understand, Collaborate: Semantic-Aware Dragging via Intention Reasoner},\nauthor={Xing Cui and Pei Pei Li and Zekun Li and Xuannan Liu and Yueying Zou and Zhaofeng He},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=kcQKIzQPZj}\n}", "github": "", "reviewers": "7sLK;6UnY;17V7;ND9N", "pdf_size": 0, "rating": "4;5;5;6", "confidence": "3;2;4;4", "soundness": "2;3;2;3", "novelty": "2;3;2;3", "presentation": "3;3;4;3", "wc_summary": "87;52;103;66", "wc_strengths": "72;40;36;46", "wc_weaknesses": "222;109;205;54", "wc_questions": "4;84;94;6", "wc_limitations": "1;7;20;8", "wc_review": "386;292;458;180", "wc_reply_reviewers": "0;33;32;14", "wc_reply_authors": "98;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "3;1;1;1", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 77.0, 19.50640920313116 ], "wc_strengths_avg": [ 48.5, 14.026760139105537 ], "wc_weaknesses_avg": [ 147.5, 69.06699645995909 ], "wc_questions_avg": [ 47.0, 42.15447781671598 ], "wc_limitations_avg": [ 9.0, 6.892024376045111 ], "wc_review_avg": [ 329.0, 104.23531071570709 ], "wc_reply_reviewers_avg": [ 19.75, 13.681648292512127 ], "wc_reply_authors_avg": [ 24.5, 42.4352447854375 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.4264014327112209, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16636905311400000850&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "bupt.edu.cn;bupt.edu.cn;;bupt.edu.cn;bupt.edu.cn;bupt.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Beijing University of Posts and Telecommunications", "aff_unique_dep": "", "aff_unique_url": "http://www.bupt.edu.cn/", "aff_unique_abbr": "BUPT", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Beijing", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Towards Unified Multimodal Editing with Enhanced Knowledge Collaboration", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93873", "id": "kf80ZS3fVy", "proceeding": "", "pdf": "https://openreview.net/pdf?id=kf80ZS3fVy", "openreview": "https://openreview.net/forum?id=kf80ZS3fVy", "poster": "", "project": "", "author_site": "Kaihang Pan, Zhaoyu Fan, Juncheng Li, Qifan Yu, Hao Fei, Siliang Tang, Richang Hong, Hanwang Zhang, QIANRU SUN", "tldr": "", "abstract": "The swift advancement in Multimodal LLMs (MLLMs) also presents significant challenges for effective knowledge editing. Current methods, including intrinsic knowledge editing and external knowledge resorting, each possess strengths and weaknesses, struggling to balance the desired properties of reliability, generality, and locality when applied to MLLMs. In this paper, we propose \\textbf{UniKE}, a novel multimodal editing method that establishes a unified perspective and paradigm for intrinsic knowledge editing and external knowledge resorting. Both types of knowledge are conceptualized as vectorized key-value memories, with the corresponding editing processes resembling the assimilation and accommodation phases of human cognition, conducted at the same semantic levels. Within such a unified framework, we further promote knowledge collaboration by disentangling the knowledge representations into the semantic and truthfulness spaces. Extensive experiments validate the effectiveness of our method, which ensures that the post-edit MLLM simultaneously maintains excellent reliability, generality, and locality. The code for UniKE is available at https://github.com/beepkh/UniKE.", "keywords": "vision and language;multimodal understanding", "primary_area": "machine_vision", "supplementary_material": "", "author": "Kaihang Pan;Zhaoyu Fan;Juncheng Li;Qifan Yu;Hao Fei;Siliang Tang;Richang Hong;Hanwang Zhang;Qianru Sun", "authorids": "~Kaihang_Pan1;~Zhaoyu_Fan1;~Juncheng_Li3;~Qifan_Yu1;~Hao_Fei1;~Siliang_Tang1;~Richang_Hong1;~Hanwang_Zhang3;~Qianru_Sun2", "gender": "M;M;M;M;M;M;M;M;F", "homepage": "https://github.com/1308024507pkh;https://github.com/FanBB2333;;https://github.com/Yuqifan1117;https://haofei.vip/;https://person.zju.edu.cn/en/siliang;https://sites.google.com/site/homeofrichanghong/;https://mreallab.github.io/index.html;https://qianrusun.com/", "dblp": "344/0647.html;;182/7674-6;;81/3569-1;44/5693;59/1501;79/8116.html;127/6132.html", "google_scholar": "https://scholar.google.com.hk/citations?user=lMQADDUAAAAJ;;lm9s-QgAAAAJ;uodH3cwAAAAJ;YGDX46AAAAAJ;8e7H3PcAAAAJ;https://scholar.google.com/scholar?hl=en;YG0DFyYAAAAJ;https://scholar.google.de/citations?user=fNfrGMIAAAAJ", "orcid": ";;0000-0003-2258-1291;;0000-0003-3026-6347;0000-0002-7356-9711;;;0000-0003-2689-317X", "linkedin": ";;;;;siliang-tang-4734272a/;;;", "or_profile": "~Kaihang_Pan1;~Zhaoyu_Fan1;~Juncheng_Li3;~Qifan_Yu1;~Hao_Fei1;~Siliang_Tang1;~Richang_Hong1;~Hanwang_Zhang3;~Qianru_Sun2", "aff": "Zhejiang University;Zhejiang University;National University of Singapore;Zhejiang University;National University of Singapore;Zhejiang University;Hefei University of Technology;Nanyang Technological University;Singapore Management University", "aff_domain": "zju.edu.cn;zju.edu.cn;nus.edu;zju.edu.cn;nus.edu.sg;zju.edu.cn;hfut.edu;ntu.edu.sg;smu.edu.sg", "position": "PhD student;MS student;Postdoc;PhD student;Postdoc;Full Professor;Full Professor;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\npan2024towards,\ntitle={Towards Unified Multimodal Editing with Enhanced Knowledge Collaboration},\nauthor={Kaihang Pan and Zhaoyu Fan and Juncheng Li and Qifan Yu and Hao Fei and Siliang Tang and Richang Hong and Hanwang Zhang and Qianru Sun},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=kf80ZS3fVy}\n}", "github": "", "reviewers": "4G2a;w8aR;mp35;V1xr", "pdf_size": 2566650, "rating": "6;6;7;9", "confidence": "4;3;4;5", "soundness": "3;3;3;4", "novelty": "3;3;3;4", "presentation": "2;3;3;4", "wc_summary": "64;103;66;93", "wc_strengths": "27;115;54;265", "wc_weaknesses": "109;155;100;236", "wc_questions": "32;8;6;3", "wc_limitations": "1;11;5;12", "wc_review": "233;392;231;609", "wc_reply_reviewers": "15;37;18;64", "wc_reply_authors": "404;376;912;20", "reply_reviewers": "1;1;1;1", "reply_authors": "3;3;3;2", "rating_avg": [ 7.0, 1.224744871391589 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 81.5, 16.889345754054535 ], "wc_strengths_avg": [ 115.25, 92.14763968762304 ], "wc_weaknesses_avg": [ 150.0, 53.85629025471398 ], "wc_questions_avg": [ 12.25, 11.54068888758379 ], "wc_limitations_avg": [ 7.25, 4.493050188902857 ], "wc_review_avg": [ 366.25, 154.62757677723596 ], "wc_reply_reviewers_avg": [ 33.5, 19.525624189766635 ], "wc_reply_authors_avg": [ 428.0, 317.80497164141406 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.8660254037844386, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8367299329339889643&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "zju.edu.cn;zju.edu.cn;nus.edu;zju.edu.cn;nus.edu.sg;zju.edu.cn;hfut.edu;ntu.edu.sg;smu.edu.sg", "author_num": 9, "aff_unique_index": "0;0;1;0;1;0;2;3;4", "aff_unique_norm": "Zhejiang University;National University of Singapore;Hefei University of Technology;Nanyang Technological University;Singapore Management University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.zju.edu.cn;https://www.nus.edu.sg;http://www.hfut.edu.cn/;https://www.ntu.edu.sg;https://www.smu.edu.sg", "aff_unique_abbr": "ZJU;NUS;HUT;NTU;SMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;1;0;0;1;1", "aff_country_unique": "China;Singapore" }, { "title": "A generalized neural tangent kernel for surrogate gradient learning", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93872", "id": "kfdEXQu6MC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=kfdEXQu6MC", "openreview": "https://openreview.net/forum?id=kfdEXQu6MC", "poster": "/media/PosterPDFs/NeurIPS%202024/93872.png?t=1732634488.4781232", "project": "", "author_site": "Luke Eilers, Raoul-Martin Memmesheimer, Sven Goedeke", "tldr": "", "abstract": "State-of-the-art neural network training methods depend on the gradient of the network function. Therefore, they cannot be applied to networks whose activation functions do not have useful derivatives, such as binary and discrete-time spiking neural networks. To overcome this problem, the activation function's derivative is commonly substituted with a surrogate derivative, giving rise to surrogate gradient learning (SGL). This method works well in practice but lacks theoretical foundation.\n\nThe neural tangent kernel (NTK) has proven successful in the analysis of gradient descent. Here, we provide a generalization of the NTK, which we call the surrogate gradient NTK, that enables the analysis of SGL. First, we study a naive extension of the NTK to activation functions with jumps, demonstrating that gradient descent for such activation functions is also ill-posed in the infinite-width limit. To address this problem, we generalize the NTK to gradient descent with surrogate derivatives, i.e., SGL. We carefully define this generalization and expand the existing key theorems on the NTK with mathematical rigor. Further, we illustrate our findings with numerical experiments. Finally, we numerically compare SGL in networks with sign activation function and finite width to kernel regression with the surrogate gradient NTK; the results confirm that the surrogate gradient NTK provides a good characterization of SGL.", "keywords": "Neural Tangent Kernel;Surrogate Gradient Descent;Binary Neural Networks;Infinite Width", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "/attachment/d8c7546b617ea1918b78a3ceb36fa344c44a23b9.zip", "author": "Luke Eilers;Raoul-Martin Memmesheimer;Sven Goedeke", "authorids": "~Luke_Eilers1;~Raoul-Martin_Memmesheimer1;~Sven_Goedeke1", "gender": ";;", "homepage": ";;", "dblp": ";;", "google_scholar": ";;", "orcid": ";;", "linkedin": "https://de.linkedin.com/in/luke-eilers-a963321a8;;", "or_profile": "~Luke_Eilers1;~Raoul-Martin_Memmesheimer1;~Sven_Goedeke1", "aff": "Universit\u00e4t Bern;;", "aff_domain": "unibe.ch;;", "position": "PhD student;;", "bibtex": "@inproceedings{\neilers2024a,\ntitle={A generalized neural tangent kernel for surrogate gradient learning},\nauthor={Luke Eilers and Raoul-Martin Memmesheimer and Sven Goedeke},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=kfdEXQu6MC}\n}", "github": "", "reviewers": "TYqA;Ljsu;L5ST;PShm", "pdf_size": 1520838, "rating": "6;8;8;8", "confidence": "3;3;3;3", "soundness": "3;4;4;3", "novelty": "3;3;4;3", "presentation": "3;3;3;3", "wc_summary": "61;112;70;20", "wc_strengths": "67;84;48;104", "wc_weaknesses": "26;72;209;46", "wc_questions": "44;3;6;46", "wc_limitations": "15;16;4;9", "wc_review": "213;287;337;225", "wc_reply_reviewers": "0;13;47;40", "wc_reply_authors": "0;17;30;49", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 65.75, 32.683137854251385 ], "wc_strengths_avg": [ 75.75, 20.69269194667528 ], "wc_weaknesses_avg": [ 88.25, 71.59739869576268 ], "wc_questions_avg": [ 24.75, 20.29008378494283 ], "wc_limitations_avg": [ 11.0, 4.847679857416329 ], "wc_review_avg": [ 265.5, 49.92744736114595 ], "wc_reply_reviewers_avg": [ 25.0, 19.222382786741086 ], "wc_reply_authors_avg": [ 24.0, 17.930421077041107 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17244991395876374488&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "unibe.ch;;", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "University of Bern", "aff_unique_dep": "", "aff_unique_url": "https://www.unibe.ch", "aff_unique_abbr": "UniBE", "aff_country_unique_index": "0", "aff_country_unique": "Switzerland" }, { "title": "HydraViT: Stacking Heads for a Scalable ViT", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93871", "id": "kk0Eaunc58", "proceeding": "", "pdf": "https://openreview.net/pdf?id=kk0Eaunc58", "openreview": "https://openreview.net/forum?id=kk0Eaunc58", "poster": "", "project": "", "author_site": "Janek Haberer, Ali Hojjat, Olaf Landsiedel", "tldr": "", "abstract": "The architecture of Vision Transformers (ViTs), particularly the Multi-head Attention (MHA) mechanism, imposes substantial hardware demands. Deploying ViTs on devices with varying constraints, such as mobile phones, requires multiple models of different sizes. However, this approach has limitations, such as training and storing each required model separately. This paper introduces HydraViT, a novel approach that addresses these limitations by stacking attention heads to achieve a scalable ViT. By repeatedly changing the size of the embedded dimensions throughout each layer and their corresponding number of attention heads in MHA during training, HydraViT induces multiple subnetworks. Thereby, HydraViT achieves adaptability across a wide spectrum of hardware environments while maintaining performance. Our experimental results demonstrate the efficacy of HydraViT in achieving a scalable ViT with up to 10 subnetworks, covering a wide range of resource constraints. HydraViT achieves up to 5 p.p. more accuracy with the same GMACs and up to 7 p.p. more accuracy with the same throughput on ImageNet-1K compared to the baselines, making it an effective solution for scenarios where hardware availability is diverse or varies over time. The source code is available at https://github.com/ds-kiel/HydraViT.", "keywords": "Deep Learning;Transformers;Vision Transformers;Scalable Transformers", "primary_area": "machine_vision", "supplementary_material": "/attachment/3ab7715caebceea6466b92c61f3aa38afe9b9d78.zip", "author": "Janek Haberer;Ali Hojjat;Olaf Landsiedel", "authorids": "~Janek_Haberer1;~Ali_Hojjat1;~Olaf_Landsiedel1", "gender": "M;;M", "homepage": ";;https://www.tuhh.de/ncps/team/landsiedel/", "dblp": "346/0118;;27/481.html", "google_scholar": "s-sj0tIAAAAJ;;_kenE7EAAAAJ", "orcid": "0000-0001-6115-4263;;0000-0001-6432-300X", "linkedin": ";;olaflandsiedel/?originalSubdomain=de", "or_profile": "~Janek_Haberer1;~Ali_Hojjat1;~Olaf_Landsiedel1", "aff": "Christian-Albrechts-Universit\u00e4t Kiel;;Christian-Albrechts-Universit\u00e4t Kiel", "aff_domain": "uni-kiel.de;;uni-kiel.de", "position": "PhD student;;Full Professor", "bibtex": "@inproceedings{\nhaberer2024hydravit,\ntitle={HydraViT: Stacking Heads for a Scalable ViT},\nauthor={Janek Haberer and Ali Hojjat and Olaf Landsiedel},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=kk0Eaunc58}\n}", "github": "", "reviewers": "aEuL;Yk3V;1s2T;2Apv", "pdf_size": 2949975, "rating": "6;6;7;7", "confidence": "3;4;4;4", "soundness": "2;3;4;3", "novelty": "3;4;3;3", "presentation": "3;4;4;3", "wc_summary": "76;81;66;87", "wc_strengths": "52;102;109;49", "wc_weaknesses": "249;94;51;371", "wc_questions": "73;67;19;168", "wc_limitations": "8;20;16;158", "wc_review": "458;364;261;833", "wc_reply_reviewers": "48;91;34;49", "wc_reply_authors": "0;0;0;187", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 77.5, 7.697402159170326 ], "wc_strengths_avg": [ 78.0, 27.631503759296198 ], "wc_weaknesses_avg": [ 191.25, 127.25245577198108 ], "wc_questions_avg": [ 81.75, 54.01562274009252 ], "wc_limitations_avg": [ 50.5, 62.215351803232615 ], "wc_review_avg": [ 479.0, 215.93170216529114 ], "wc_reply_reviewers_avg": [ 55.5, 21.33658829335187 ], "wc_reply_authors_avg": [ 46.75, 80.97337525384502 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:_M_LzTxzYnQJ:scholar.google.com/&scioq=HydraViT:+Stacking+Heads+for+a+Scalable+ViT&hl=en&as_sdt=0,14", "gs_version_total": 3, "email": "uni-kiel.de;;uni-kiel.de", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Christian-Albrechts-Universit\u00e4t", "aff_unique_dep": "", "aff_unique_url": "https://www.uni-kiel.de", "aff_unique_abbr": "CAU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Kiel", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "title": "Robust Conformal Prediction Using Privileged Information", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93870", "id": "kkmPe0rzY1", "proceeding": "", "pdf": "https://openreview.net/pdf?id=kkmPe0rzY1", "openreview": "https://openreview.net/forum?id=kkmPe0rzY1", "poster": "/media/PosterPDFs/NeurIPS%202024/93870.png?t=1731696130.5687747", "project": "", "author_site": "Shai Feldman, Yaniv Romano", "tldr": "", "abstract": "We develop a method to generate prediction sets with a guaranteed coverage rate that is robust to corruptions in the training data, such as missing or noisy variables. \nOur approach builds on conformal prediction, a powerful framework to construct prediction sets that are valid under the i.i.d assumption. Importantly, naively applying conformal prediction does not provide reliable predictions in this setting, due to the distribution shift induced by the corruptions. \nTo account for the distribution shift, we assume access to privileged information (PI). The PI is formulated as additional features that explain the distribution shift, however, they are only available during training and absent at test time.\nWe approach this problem by introducing a novel generalization of weighted conformal prediction and support our method with theoretical coverage guarantees. \nEmpirical experiments on both real and synthetic datasets indicate that our approach achieves a valid coverage rate and constructs more informative predictions compared to existing methods, which are not supported by theoretical guarantees.", "keywords": "Conformal Prediction;Uncertainty Quantification;Distribution Shift;Corrupted Data;Privileged Information", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/94a8cd10f96199c5cbd95fa62606eb31e618d6d0.zip", "author": "Shai Feldman;Yaniv Romano", "authorids": "~Shai_Feldman1;~Yaniv_Romano1", "gender": "M;M", "homepage": ";https://sites.google.com/view/yaniv-romano/", "dblp": ";142/0021", "google_scholar": ";L_m67ywAAAAJ", "orcid": ";", "linkedin": "shai-feldman-444485212/;", "or_profile": "~Shai_Feldman1;~Yaniv_Romano1", "aff": "Computer Science Departmen, Technion-Israel Institute of Technology;Technion, Technion", "aff_domain": "cs.technion.ac.il;technion.ac.il", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nfeldman2024robust,\ntitle={Robust Conformal Prediction Using Privileged Information},\nauthor={Shai Feldman and Yaniv Romano},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=kkmPe0rzY1}\n}", "github": "", "reviewers": "R7TW;hB3A;UnMc;3YMV", "pdf_size": 4199480, "rating": "6;6;7;8", "confidence": "4;4;2;4", "soundness": "3;3;3;4", "novelty": "2;3;3;4", "presentation": "3;3;3;4", "wc_summary": "36;64;190;92", "wc_strengths": "21;79;118;119", "wc_weaknesses": "48;247;19;69", "wc_questions": "151;23;1;15", "wc_limitations": "6;42;1;3", "wc_review": "262;455;329;298", "wc_reply_reviewers": "60;151;0;0", "wc_reply_authors": "1413;223;0;0", "reply_reviewers": "1;2;0;0", "reply_authors": "2;2;1;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 95.5, 58.0409338312195 ], "wc_strengths_avg": [ 84.25, 39.921015768639954 ], "wc_weaknesses_avg": [ 95.75, 89.11053529184976 ], "wc_questions_avg": [ 47.5, 60.27229877812858 ], "wc_limitations_avg": [ 13.0, 16.837458240482736 ], "wc_review_avg": [ 336.0, 72.6808090213641 ], "wc_reply_reviewers_avg": [ 52.75, 61.787438043667095 ], "wc_reply_authors_avg": [ 409.0, 586.7652852717175 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16391156885119109477&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 4, "email": "cs.technion.ac.il;technion.ac.il", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Technion-Israel Institute of Technology;Technion - Israel Institute of Technology", "aff_unique_dep": "Computer Science Department;", "aff_unique_url": "https://www.technion.ac.il;https://www.technion.ac.il/en/", "aff_unique_abbr": "Technion;Technion", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Israel" }, { "title": "On the Scalability of GNNs for Molecular Graphs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93869", "id": "klqhrq7fvB", "proceeding": "", "pdf": "https://openreview.net/pdf?id=klqhrq7fvB", "openreview": "https://openreview.net/forum?id=klqhrq7fvB", "poster": "", "project": "", "author_site": "Maciej Sypetkowski, Frederik Wenkel, Farimah Poursafaei, Nia Dickson, Karush Suri, Philip Fradkin, Dominique Beaini", "tldr": "", "abstract": "Scaling deep learning models has been at the heart of recent revolutions in language modelling and image generation. Practitioners have observed a strong relationship between model size, dataset size, and performance. However, structure-based architectures such as Graph Neural Networks (GNNs) are yet to show the benefits of scale mainly due to lower efficiency of sparse operations, large data requirements, and lack of clarity about the effectiveness of various architectures. We address this drawback of GNNs by studying their scaling behavior. Specifically, we analyze message-passing networks, graph Transformers, and hybrid architectures on the largest public collection of 2D molecular graphs for supervised pretraining. For the first time, we observe that GNNs benefit tremendously from the increasing scale of depth, width, number of molecules and associated labels. A major factor is the diversity of the pretraining data that comprises thousands of labels per molecule derived from bio-assays, quantum simulations, transcriptomics and phenomic imaging. We further demonstrate strong finetuning scaling behavior on 38 highly competitive downstream tasks, outclassing previous large models. This gives rise to MolGPS, a new graph foundation model that allows to navigate the chemical space, outperforming the previous state-of-the-arts on 26 out the 38 downstream tasks. We hope that our work paves the way for an era where foundational GNNs drive pharmaceutical drug discovery.", "keywords": "Molecular Biology;Graph Neural Networks;Graph Transformers;Scaling Laws;Pretraining;Finetuning", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Maciej Sypetkowski;Frederik Wenkel;Farimah Poursafaei;Nia Dickson;Karush Suri;Philip Fradkin;Dominique Beaini", "authorids": "~Maciej_Sypetkowski1;~Frederik_Wenkel1;~Farimah_Poursafaei1;~Nia_Dickson1;~Karush_Suri1;~Philip_Fradkin1;~Dominique_Beaini1", "gender": ";;F;F;M;M;M", "homepage": ";;;https://orcid.org/0000-0001-9079-8666;https://karush17.github.io/;https://phil-fradkin.github.io/;", "dblp": ";261/3715;277/0215;;252/3260;;201/8526", "google_scholar": ";;https://scholar.google.ca/citations?user=gZ7HEsMAAAAJ;;https://scholar.google.co.in/citations?user=ZFCHp9gAAAAJ;siqpD8oAAAAJ;https://scholar.google.ca/citations?hl=en", "orcid": ";;;0000-0001-9079-8666;;;0000-0002-4613-9388", "linkedin": ";https://www.linkedin.com/mwlite/in/frederik-wenkel-669388185;farimah-poursafaei-133195167/?originalSubdomain=ca;;;phil-fradkin-330a417b/;dbeaini/", "or_profile": "~Maciej_Sypetkowski1;~Frederik_Wenkel1;~Farimah_Poursafaei1;~Nia_Dickson1;~Karush_Suri1;~Philip_Fradkin1;~Dominique_Beaini1", "aff": ";University of Montreal;Mila, Quebec AI Institute;NVIDIA Corporation;Recursion;University of Toronto;Mila - Institut Qu\u00e9b\u00e9cois d'intelligence artificielle", "aff_domain": ";umontreal.ca;mila.quebec;nvidia.com;valencelabs.com;toronto.edu;mila.quebec", "position": ";PhD student;Postdoc;Senior Software Engineer;Researcher;PhD student;Associate Professor", "bibtex": "@inproceedings{\nsypetkowski2024on,\ntitle={On the Scalability of {GNN}s for Molecular Graphs},\nauthor={Maciej Sypetkowski and Frederik Wenkel and Farimah Poursafaei and Nia Dickson and Karush Suri and Philip Fradkin and Dominique Beaini},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=klqhrq7fvB}\n}", "github": "", "reviewers": "Lxfs;YrHv;FQ72;kaW1", "pdf_size": 3695611, "rating": "3;6;7;7", "confidence": "5;3;4;4", "soundness": "2;3;4;3", "novelty": "2;3;3;3", "presentation": "2;3;3;4", "wc_summary": "52;66;49;78", "wc_strengths": "39;27;166;48", "wc_weaknesses": "172;62;85;62", "wc_questions": "2;2;169;16", "wc_limitations": "50;26;1;1", "wc_review": "315;183;470;205", "wc_reply_reviewers": "176;10;36;11", "wc_reply_authors": "1167;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "4;1;1;1", "rating_avg": [ 5.75, 1.6393596310755 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 61.25, 11.60549438843516 ], "wc_strengths_avg": [ 70.0, 55.924055646921744 ], "wc_weaknesses_avg": [ 95.25, 45.29555717727733 ], "wc_questions_avg": [ 47.25, 70.52437521878517 ], "wc_limitations_avg": [ 19.5, 20.35313243704762 ], "wc_review_avg": [ 293.25, 113.64060673896458 ], "wc_reply_reviewers_avg": [ 58.25, 68.77635858345512 ], "wc_reply_authors_avg": [ 291.75, 505.32582310821994 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.6469966392206306, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2299910863944292764&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": ";umontreal.ca;mila.quebec;nvidia.com;valencelabs.com;toronto.edu;mila.quebec", "author_num": 7, "aff_unique_index": "0;1;2;3;4;5", "aff_unique_norm": "University of Montreal;Quebec AI Institute;NVIDIA;Recursion;University of Toronto;Mila - Quebec Artificial Intelligence Institute", "aff_unique_dep": ";AI Institute;NVIDIA Corporation;;;Artificial Intelligence", "aff_unique_url": "https://wwwumontreal.ca;https://www.mila.quebec;https://www.nvidia.com;;https://www.utoronto.ca;https://mila.quebec", "aff_unique_abbr": "UM;Mila;NVIDIA;;U of T;Mila", "aff_campus_unique_index": "1", "aff_campus_unique": ";Quebec", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "Canada;United States;" }, { "title": "Group-wise oracle-efficient algorithms for online multi-group learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93868", "id": "klsyhjLlX5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=klsyhjLlX5", "openreview": "https://openreview.net/forum?id=klsyhjLlX5", "poster": "", "project": "", "author_site": "Samuel Deng, Jingwen Liu, Daniel Hsu", "tldr": "", "abstract": "We study the problem of online multi-group learning, a learning model in which an online learner must simultaneously achieve small prediction regret on a large collection of (possibly overlapping) subsequences corresponding to a family of groups. Groups are subsets of the context space, and in fairness applications, they may correspond to subpopulations defined by expressive functions of demographic attributes. In this paper, we design such oracle-efficient algorithms with sublinear regret under a variety of settings, including: (i) the i.i.d. setting, (ii) the adversarial setting with smoothed context distributions, and (iii) the adversarial transductive setting.", "keywords": "multi-group learning;online learning;oracle-efficient", "primary_area": "learning_theory", "supplementary_material": "", "author": "Samuel Deng;Jingwen Liu;Daniel Hsu", "authorids": "~Samuel_Deng1;~Jingwen_Liu1;~Daniel_Hsu1", "gender": "M;F;M", "homepage": "https://samuel-deng.github.io/;https://simons.berkeley.edu/people/jingwen-liu;https://www.cs.columbia.edu/~djhsu/", "dblp": "251/9512;;h/DanielHsu.html", "google_scholar": ";https://scholar.google.com/citations?hl=en;Bp6tvy0AAAAJ", "orcid": ";;0000-0002-3495-7113", "linkedin": ";;", "or_profile": "~Samuel_Deng1;~Jingwen_Liu1;~Daniel_Hsu1", "aff": "Columbia University;Columbia University;Columbia University", "aff_domain": "columbia.edu;cs.columbia.edu;columbia.edu", "position": "PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\ndeng2024groupwise,\ntitle={Group-wise oracle-efficient algorithms for online multi-group learning},\nauthor={Samuel Deng and Jingwen Liu and Daniel Hsu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=klsyhjLlX5}\n}", "github": "", "reviewers": "sHAP;K3De;TY8p;wskr", "pdf_size": 491238, "rating": "6;6;7;7", "confidence": "4;3;3;5", "soundness": "3;3;3;4", "novelty": "2;3;4;3", "presentation": "2;2;4;4", "wc_summary": "48;74;66;113", "wc_strengths": "40;149;101;61", "wc_weaknesses": "147;88;92;109", "wc_questions": "5;2;9;18", "wc_limitations": "9;2;1;7", "wc_review": "249;315;269;308", "wc_reply_reviewers": "29;22;15;55", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 1.0 ], "wc_summary_avg": [ 75.25, 23.742103950576915 ], "wc_strengths_avg": [ 87.75, 41.601532423698046 ], "wc_weaknesses_avg": [ 109.0, 23.313086453749534 ], "wc_questions_avg": [ 8.5, 6.020797289396148 ], "wc_limitations_avg": [ 4.75, 3.344772040064913 ], "wc_review_avg": [ 285.25, 27.2981226460722 ], "wc_reply_reviewers_avg": [ 30.25, 15.122417134836613 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4212311335600479395&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "columbia.edu;cs.columbia.edu;columbia.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Columbia University", "aff_unique_dep": "", "aff_unique_url": "https://www.columbia.edu", "aff_unique_abbr": "Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Normal-GS: 3D Gaussian Splatting with Normal-Involved Rendering", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93867", "id": "kngLs5H6l1", "proceeding": "", "pdf": "https://openreview.net/pdf?id=kngLs5H6l1", "openreview": "https://openreview.net/forum?id=kngLs5H6l1", "poster": "", "project": "", "author_site": "Meng Wei, Qianyi Wu, Jianmin Zheng, Hamid Rezatofighi, Jianfei Cai", "tldr": "", "abstract": "Rendering and reconstruction are long-standing topics in computer vision and graphics. Achieving both high rendering quality and accurate geometry is a challenge. Recent advancements in 3D Gaussian Splatting (3DGS) have enabled high-fidelity novel view synthesis at real-time speeds. However, the noisy and discrete nature of 3D Gaussian primitives hinders accurate surface estimation. Previous attempts to regularize 3D Gaussian normals often degrade rendering quality due to the fundamental disconnect between normal vectors and the rendering pipeline in 3DGS-based methods. Therefore, we introduce Normal-GS, a novel approach that integrates normal vectors into the 3DGS rendering pipeline. The core idea is to model the interaction between normals and incident lighting using the physically-based rendering equation. Our approach re-parameterizes surface colors as the product of normals and a designed Integrated Directional Illumination Vector (IDIV). To optimize memory usage and simplify optimization, we employ an anchor-based 3DGS to implicitly encode locally-shared IDIVs. Additionally, Normal-GS leverages optimized normals and Integrated Directional Encoding (IDE) to accurately model specular effects, enhancing both rendering quality and surface normal precision. Extensive experiments demonstrate that Normal-GS achieves near state-of-the-art visual quality while obtaining accurate surface normals and preserving real-time rendering performance.", "keywords": "neural rendering;3D Gaussian Splatting;neural radiance field;computer vision;computer graphics", "primary_area": "machine_vision", "supplementary_material": "", "author": "Meng Wei;Qianyi Wu;Jianmin Zheng;Hamid Rezatofighi;Jianfei Cai", "authorids": "~Meng_Wei10;~Qianyi_Wu2;~Jianmin_Zheng1;~Hamid_Rezatofighi1;~Jianfei_Cai1", "gender": "M;M;M;M;M", "homepage": "https://github.com/Meng-Wei;https://wuqianyi.top;https://personal.ntu.edu.sg/asjmzheng/;https://research.monash.edu/en/persons/hamid-rezatofighi;https://jianfei-cai.github.io/", "dblp": ";217/2011;09/5452;37/8192;83/6096", "google_scholar": ";XI0RtesAAAAJ;https://scholar.google.com.sg/citations?user=sGCf2k0AAAAJ;https://scholar.google.com.au/citations?user=VxAuxMwAAAAJ;https://scholar.google.com.tw/citations?user=N6czCoUAAAAJ", "orcid": ";;;;", "linkedin": ";;;s-hamid-rezatofighi-b275a052/;", "or_profile": "~Meng_Wei10;~Qianyi_Wu2;~Jianmin_Zheng1;~Hamid_Rezatofighi1;~Jianfei_Cai1", "aff": "Monash University;Monash University;Nanyang Technological University;Monash University;Monash University", "aff_domain": "monash.edu;monash.edu;ntu.edu.sg;monash.edu;monash.edu", "position": "PhD student;PhD student;Full Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nwei2024normalgs,\ntitle={Normal-{GS}: 3D Gaussian Splatting with Normal-Involved Rendering},\nauthor={Meng Wei and Qianyi Wu and Jianmin Zheng and Hamid Rezatofighi and Jianfei Cai},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=kngLs5H6l1}\n}", "github": "", "reviewers": "5i8r;Gwpd;68er;cnbL", "pdf_size": 38535425, "rating": "3;6;6;7", "confidence": "5;4;4;4", "soundness": "3;3;4;3", "novelty": "2;2;3;3", "presentation": "2;2;4;3", "wc_summary": "77;85;100;141", "wc_strengths": "57;57;72;82", "wc_weaknesses": "344;158;51;212", "wc_questions": "76;12;91;112", "wc_limitations": "1;9;1;46", "wc_review": "555;321;315;593", "wc_reply_reviewers": "208;75;18;99", "wc_reply_authors": "534;127;114;127", "reply_reviewers": "1;1;1;1", "reply_authors": "3;3;3;3", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 100.75, 24.661457783350926 ], "wc_strengths_avg": [ 67.0, 10.606601717798213 ], "wc_weaknesses_avg": [ 191.25, 105.52102870992114 ], "wc_questions_avg": [ 72.75, 37.332124236373154 ], "wc_limitations_avg": [ 14.25, 18.619546181365433 ], "wc_review_avg": [ 446.0, 128.7206277175496 ], "wc_reply_reviewers_avg": [ 100.0, 68.94563075351476 ], "wc_reply_authors_avg": [ 225.5, 178.1916103524518 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.0, 0.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.9622504486493763, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9028168295576134694&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 3, "email": "monash.edu;monash.edu;ntu.edu.sg;monash.edu;monash.edu", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Monash University;Nanyang Technological University", "aff_unique_dep": ";", "aff_unique_url": "https://www.monash.edu;https://www.ntu.edu.sg", "aff_unique_abbr": "Monash;NTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "Australia;Singapore" }, { "title": "shapiq: Shapley Interactions for Machine Learning", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97533", "id": "knxGmi6SJi", "proceeding": "", "pdf": "https://openreview.net/pdf?id=knxGmi6SJi", "openreview": "https://openreview.net/forum?id=knxGmi6SJi", "poster": "", "project": "", "author_site": "Maximilian Muschalik, Hubert Baniecki, Fabian Fumagalli, Patrick Kolpaczki, Barbara Hammer, Eyke H\u00fcllermeier", "tldr": "", "abstract": "Originally rooted in game theory, the Shapley Value (SV) has recently become an important tool in machine learning research. Perhaps most notably, it is used for feature attribution and data valuation in explainable artificial intelligence. Shapley Interactions (SIs) naturally extend the SV and address its limitations by assigning joint contributions to groups of entities, which enhance understanding of black box machine learning models. Due to the exponential complexity of computing SVs and SIs, various methods have been proposed that exploit structural assumptions or yield probabilistic estimates given limited resources. In this work, we introduce shapiq, an open-source Python package that unifies state-of-the-art algorithms to efficiently compute SVs and any-order SIs in an application-agnostic framework. Moreover, it includes a benchmarking suite containing 11 machine learning applications of SIs with pre-computed games and ground-truth values to systematically assess computational performance across domains. For practitioners, shapiq is able to explain and visualize any-order feature interactions in predictions of models, including vision transformers, language models, as well as XGBoost and LightGBM with TreeSHAP-IQ. With shapiq, we extend shap beyond feature attributions and consolidate the application of SVs and SIs in machine learning that facilitates future research. The source code and documentation are available at https://github.com/mmschlk/shapiq.", "keywords": "Explainable Artificial Intelligence;Shapley Interaction;Shapley Value;Game Theory;Feature Interaction;Feature Attribution", "primary_area": "", "supplementary_material": "/attachment/02c21c5e7ad9b382eaa8161b1229dc3152a19385.pdf", "author": "Maximilian Muschalik;Hubert Baniecki;Fabian Fumagalli;Patrick Kolpaczki;Barbara Hammer;Eyke H\u00fcllermeier", "authorids": "~Maximilian_Muschalik1;~Hubert_Baniecki1;~Fabian_Fumagalli1;~Patrick_Kolpaczki1;~Barbara_Hammer4;~Eyke_H\u00fcllermeier1", "gender": "M;M;M;M;F;M", "homepage": "https://maxmuschalik.com/;https://hbaniecki.com;https://hammer-lab.techfak.uni-bielefeld.de/people/316634936/;https://www.kiml.ifi.lmu.de/people/employees/kolpaczki/index.html;https://www.techfak.uni-bielefeld.de/~bhammer/;https://cs.uni-paderborn.de/index.php?id=60202", "dblp": "329/4090;264/5189;329/4508;304/9952;h/BarbaraHammer;h/EykeHullermeier", "google_scholar": "https://scholar.google.de/citations?user=jJBCW74AAAAJ;H72DRC0AAAAJ;anUMB08AAAAJ;PVwqZS8AAAAJ;1d3OxaUAAAAJ;https://scholar.google.de/citations?user=usVJeNN3xFAC", "orcid": "0000-0002-6921-0204;;0000-0003-3955-3510;;0000-0002-2615-8151;0000-0002-9944-4108", "linkedin": "maximilian-muschalik/;hbaniecki;fabian-fumagalli/;;;", "or_profile": "~Maximilian_Muschalik1;~Hubert_Baniecki1;~Fabian_Fumagalli1;~Patrick_Kolpaczki1;~Barbara_Hammer4;~Eyke_H\u00fcllermeier1", "aff": "Institute of Computer Science, Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;Universit\u00e4t Bielefeld;Universit\u00e4t Paderborn;Universit\u00e4t Bielefeld;Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen", "aff_domain": "ifi.lmu.de;lmu.de;uni-bielefeld.de;uni-paderborn.de;uni-bielefeld.de;lmu.de", "position": "PhD student;Intern;PhD student;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nmuschalik2024shapiq,\ntitle={shapiq: Shapley Interactions for Machine Learning},\nauthor={Maximilian Muschalik and Hubert Baniecki and Fabian Fumagalli and Patrick Kolpaczki and Barbara Hammer and Eyke H{\\\"u}llermeier},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=knxGmi6SJi}\n}", "github": "", "reviewers": "StcH;78Q9;D8y8;Z6Aj", "pdf_size": 4088667, "rating": "6;7;7;9", "confidence": "4;5;4;4", "wc_summary_and_contributions": "43;111;88;51", "wc_strengths": "46;3;65;334", "wc_improvement": "469;37;10;287", "wc_limitations": "13;1;31;35", "wc_correctness": "6;1;8;6", "wc_clarity": "7;1;13;4", "wc_relation_to_prior_work": "29;1;14;27", "wc_documentation": "11;1;29;11", "wc_additional_feedback": "1;1;1;1", "wc_review": "625;157;259;756", "wc_reply_reviewers": "0;0;37;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 7.25, 1.0897247358851685 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 73.25, 27.625848403261752 ], "wc_strengths_avg": [ 112.0, 130.12493996156155 ], "wc_improvement_avg": [ 200.75, 188.8099242624709 ], "wc_limitations_avg": [ 20.0, 13.74772708486752 ], "wc_correctness_avg": [ 5.25, 2.5860201081971503 ], "wc_clarity_avg": [ 6.25, 4.437059837324712 ], "wc_relation_to_prior_work_avg": [ 17.75, 11.255554184490428 ], "wc_documentation_avg": [ 13.0, 10.099504938362077 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 449.25, 248.2885166494818 ], "wc_reply_reviewers_avg": [ 9.25, 16.021469970012117 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.13245323570650439, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11988462780313804138&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "email": "ifi.lmu.de;lmu.de;uni-bielefeld.de;uni-paderborn.de;uni-bielefeld.de;lmu.de", "author_num": 6, "aff_unique_index": "0;0;1;2;1;0", "aff_unique_norm": "Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;Universit\u00e4t Bielefeld;University of Paderborn", "aff_unique_dep": "Institute of Computer Science;;", "aff_unique_url": "https://www.uni-muenchen.de;https://www.uni-bielefeld.de/;https://www.uni-paderborn.de", "aff_unique_abbr": "LMU M\u00fcnchen;Uni Bielefeld;UPB", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "Germany" }, { "title": "Functional Gradient Flows for Constrained Sampling", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93866", "id": "kpo6ZCgVZH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=kpo6ZCgVZH", "openreview": "https://openreview.net/forum?id=kpo6ZCgVZH", "poster": "/media/PosterPDFs/NeurIPS%202024/93866.png?t=1733382236.8512287", "project": "", "author_site": "Shiyue Zhang, Longlin Yu, Ziheng Cheng, Cheng Zhang", "tldr": "", "abstract": "Recently, through a unified gradient flow perspective of Markov chain Monte Carlo (MCMC) and variational inference (VI), particle-based variational inference methods (ParVIs) have been proposed that tend to combine the best of both worlds. While typical ParVIs such as Stein Variational Gradient Descent (SVGD) approximate the gradient flow within a reproducing kernel Hilbert space (RKHS), many attempts have been made recently to replace RKHS with more expressive function spaces, such as neural networks. While successful, these methods are mainly designed for sampling from unconstrained domains. In this paper, we offer a general solution to constrained sampling by introducing a boundary condition for the gradient flow which would confine the particles within the specific domain. This allows us to propose a new functional gradient ParVI method for constrained sampling, called *constrained functional gradient flow* (CFG), with provable continuous-time convergence in total variation (TV). We also present novel numerical strategies to handle the boundary integral term arising from the domain constraints. Our theory and experiments demonstrate the effectiveness of the proposed framework.", "keywords": "particle-based variational inference;constrained sampling;functional gradient flow;boundary integral", "primary_area": "probabilistic_methods", "supplementary_material": "/attachment/305109022a9222fb2d5d17ea07f1da1e210e2a51.zip", "author": "Shiyue Zhang;Longlin Yu;Ziheng Cheng;Cheng Zhang", "authorids": "~Shiyue_Zhang3;~Longlin_Yu1;~Ziheng_Cheng4;~Cheng_Zhang3", "gender": "M;M;M;M", "homepage": "https://github.com/ShiyueZhang66;https://github.com/longinYu;https://alexczh1.github.io/;https://zcrabbit.github.io", "dblp": ";;;", "google_scholar": "nu6YfFkAAAAJ;;M8Hz2NSNe3QC;PddDrLgAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Shiyue_Zhang3;~Longlin_Yu1;~Ziheng_Cheng4;~Cheng_Zhang3", "aff": "Peking University;Peking University;Peking University;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn", "position": "Undergrad student;PhD student;Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\nzhang2024functional,\ntitle={Functional Gradient Flows for Constrained Sampling},\nauthor={Shiyue Zhang and Longlin Yu and Ziheng Cheng and Cheng Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=kpo6ZCgVZH}\n}", "github": "", "reviewers": "VuAH;s6xw;YhS1;SeKQ;2Jzp;PQmA", "pdf_size": 9981853, "rating": "5;5;6;7;7;7", "confidence": "3;4;5;4;3;2", "soundness": "3;3;4;4;3;3", "novelty": "3;2;3;3;3;3", "presentation": "2;3;3;4;4;3", "wc_summary": "195;173;90;104;81;73", "wc_strengths": "111;74;33;106;39;30", "wc_weaknesses": "231;193;77;28;9;23", "wc_questions": "380;138;18;50;7;11", "wc_limitations": "22;5;4;5;1;6", "wc_review": "939;583;222;293;137;143", "wc_reply_reviewers": "68;76;11;30;0;0", "wc_reply_authors": "13;21;17;17;0;0", "reply_reviewers": "1;1;1;1;0;0", "reply_authors": "2;2;2;2;1;1", "rating_avg": [ 6.166666666666667, 0.8975274678557507 ], "confidence_avg": [ 3.5, 0.9574271077563381 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.8333333333333335, 0.3726779962499649 ], "presentation_avg": [ 3.1666666666666665, 0.6871842709362768 ], "wc_summary_avg": [ 119.33333333333333, 47.11215931747934 ], "wc_strengths_avg": [ 65.5, 33.668729309751704 ], "wc_weaknesses_avg": [ 93.5, 87.06271685782995 ], "wc_questions_avg": [ 100.66666666666667, 132.68718434305887 ], "wc_limitations_avg": [ 7.166666666666667, 6.8170537787391865 ], "wc_review_avg": [ 386.1666666666667, 289.02503736220194 ], "wc_reply_reviewers_avg": [ 30.833333333333332, 30.86754858351333 ], "wc_reply_authors_avg": [ 11.333333333333334, 8.339997335464536 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.2909286827258562, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:-EfZjiWsNJwJ:scholar.google.com/&scioq=Functional+Gradient+Flows+for+Constrained+Sampling&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Aligner: Efficient Alignment by Learning to Correct", "status": "Oral", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93865", "id": "kq166jACVP", "proceeding": "", "pdf": "https://openreview.net/pdf?id=kq166jACVP", "openreview": "https://openreview.net/forum?id=kq166jACVP", "poster": "/media/PosterPDFs/NeurIPS%202024/93865.png?t=1731600576.4826484", "project": "", "author_site": "Jiaming Ji, Boyuan Chen, Hantao Lou, Donghai Hong, Borong Zhang, Xuehai Pan, Tianyi (Alex) Qiu, Juntao Dai, Yaodong Yang", "tldr": "", "abstract": "With the rapid development of large language models (LLMs) and ever-evolving practical requirements, finding an efficient and effective alignment method has never been more critical. However, the tension between the complexity of current alignment methods and the need for rapid iteration in deployment scenarios necessitates the development of a model-agnostic alignment approach that can operate under these constraints. In this paper, we introduce Aligner, a novel and simple alignment paradigm that learns the correctional residuals between preferred and dispreferred answers using a small model. Designed as a model-agnostic, plug-and-play module, Aligner can be directly applied to various open-source and API-based models with only one-off training, making it suitable for rapid iteration. Notably, Aligner can be applied to any powerful, large-scale upstream models. Moreover, it can even iteratively bootstrap the upstream models using corrected responses as synthetic human preference data, breaking through the model's performance ceiling. Our experiments demonstrate performance improvements by deploying the same Aligner model across 11 different LLMs, evaluated on the 3H dimensions (helpfulness, harmlessness, and honesty). Specifically, Aligner-7B has achieved an average improvement of 68.9% in helpfulness and 22.8% in harmlessness across the tested LLMs while also effectively reducing hallucination. In the Alpaca-Eval leaderboard, stacking Aligner-2B on GPT-4 Turbo improved its LC Win Rate from 55.0% to 58.3%, surpassing GPT-4 Omni's 57.5% Win Rate (community report).", "keywords": "Large Language Models;Alignment;Reinforcement Learning from Human Feedback", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Jiaming Ji;Boyuan Chen;Hantao Lou;Donghai Hong;Borong Zhang;Xuehai Pan;Tianyi Qiu;Juntao Dai;Yaodong Yang", "authorids": "~Jiaming_Ji2;~Boyuan_Chen4;~Hantao_Lou1;~Donghai_Hong1;~Borong_Zhang1;~Xuehai_Pan1;~Tianyi_Qiu1;~Juntao_Dai1;~Yaodong_Yang1", "gender": "M;M;M;M;;M;M;M;M", "homepage": "https://jijiaming.com;https://cby-pku.github.io/;http://htlou.github.io;https://github.com/Kass123777;;https://github.com/XuehaiPan;https://tianyiqiu.net;https://person.zju.edu.cn/jtdai;https://www.yangyaodong.com", "dblp": "313/9356.html;;;367/7553;;;149/9552;;170/1496-1", "google_scholar": "aW8WbYYAAAAJ;https://scholar.google.com/citations?hl=zh-CN;h1s9iX4AAAAJ;https://scholar.google.com/citations?hl=zh-CN;;VIwJg4gAAAAJ;teiNc0sAAAAJ;;https://scholar.google.co.uk/citations?user=6yL0xw8AAAAJ", "orcid": ";;;;;;;;0000-0001-8132-5613", "linkedin": ";;;;;;ty-qiu/;;yaodong-yang", "or_profile": "~Jiaming_Ji2;~Boyuan_Chen4;~Hantao_Lou1;~Donghai_Hong1;~Borong_Zhang1;~Xuehai_Pan1;~Tianyi_Qiu1;~Juntao_Dai1;~Yaodong_Yang1", "aff": "Peking University;Peking University;Peking University;Nanjing University;;Peking University;University of California, Berkeley;Zhejiang University;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;stu.pku.edu.cn;nju.edu;;pku.edu.cn;berkeley.edu;zju.edu.cn;pku.edu.cn", "position": "PhD student;Undergrad student;Undergrad student;Undergrad student;;PhD student;Intern;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nji2024aligner,\ntitle={Aligner: Efficient Alignment by Learning to Correct},\nauthor={Jiaming Ji and Boyuan Chen and Hantao Lou and Donghai Hong and Borong Zhang and Xuehai Pan and Tianyi Qiu and Juntao Dai and Yaodong Yang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=kq166jACVP}\n}", "github": "", "reviewers": "7osp;JeaM;tcrV;3sZj", "pdf_size": 2934029, "rating": "5;6;7;7", "confidence": "4;5;3;3", "soundness": "3;2;3;4", "novelty": "2;3;3;3", "presentation": "3;3;3;4", "wc_summary": "72;58;55;232", "wc_strengths": "47;32;47;148", "wc_weaknesses": "74;114;29;375", "wc_questions": "17;34;38;29", "wc_limitations": "8;1;6;15", "wc_review": "218;239;175;799", "wc_reply_reviewers": "31;32;13;34", "wc_reply_authors": "2532;3338;3205;3625", "reply_reviewers": "1;1;1;1", "reply_authors": "8;9;9;10", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 104.25, 74.03504237859258 ], "wc_strengths_avg": [ 68.5, 46.30604712129939 ], "wc_weaknesses_avg": [ 148.0, 134.46374976178524 ], "wc_questions_avg": [ 29.5, 7.88986691902975 ], "wc_limitations_avg": [ 7.5, 5.024937810560445 ], "wc_review_avg": [ 357.75, 255.79813818712597 ], "wc_reply_reviewers_avg": [ 27.5, 8.440971508067067 ], "wc_reply_authors_avg": [ 3175.0, 401.066702681736 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 9.0, 0.7071067811865476 ], "replies_avg": [ 47, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.6363636363636364, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10809872303572345357&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "pku.edu.cn;pku.edu.cn;stu.pku.edu.cn;nju.edu;;pku.edu.cn;berkeley.edu;zju.edu.cn;pku.edu.cn", "author_num": 9, "aff_unique_index": "0;0;0;1;0;2;3;0", "aff_unique_norm": "Peking University;Nanjing University;University of California, Berkeley;Zhejiang University", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.pku.edu.cn;https://www.nju.edu.cn;https://www.berkeley.edu;https://www.zju.edu.cn", "aff_unique_abbr": "Peking U;Nanjing U;UC Berkeley;ZJU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;0;0;0;0;1;0;0", "aff_country_unique": "China;United States" }, { "title": "Optimal Flow Matching: Learning Straight Trajectories in Just One Step", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93864", "id": "kqmucDKVcU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=kqmucDKVcU", "openreview": "https://openreview.net/forum?id=kqmucDKVcU", "poster": "", "project": "", "author_site": "Nikita Kornilov, Petr Mokrov, Alexander Gasnikov, Aleksandr Korotin", "tldr": "", "abstract": "Over the several recent years, there has been a boom in development of Flow Matching (FM) methods for generative modeling. One intriguing property pursued by the community is the ability to learn flows with straight trajectories which realize the Optimal Transport (OT) displacements. Straightness is crucial for the fast integration (inference) of the learned flow's paths. Unfortunately, most existing flow straightening methods are based on non-trivial iterative FM procedures which accumulate the error during training or exploit heuristics based on minibatch OT. To address these issues, we develop and theoretically justify the novel Optimal Flow Matching approach which allows recovering the straight OT displacement for the quadratic transport in just one FM step. The main idea of our approach is the employment of vector field for FM which are parameterized by convex functions. The code of our OFM implementation and the conducted experiments is available at https://github.com/Jhomanik/Optimal-Flow-Matching", "keywords": "Flow Matching;Optimal Transport;Rectified Flow;straight trajectories", "primary_area": "generative_models", "supplementary_material": "/attachment/9c77a7619ed0f9aedd65011b02c018c185b973bb.zip", "author": "Nikita Maksimovich Kornilov;Petr Mokrov;Alexander Gasnikov;Alexander Korotin", "authorids": "~Nikita_Maksimovich_Kornilov1;~Petr_Mokrov1;~Alexander_Gasnikov1;~Alexander_Korotin2", "gender": "Not Specified;M;M;M", "homepage": "https://clck.ru/3469vX;https://github.com/PetrMokrov;https://arxiv.org/search/?query=Gasnikov&searchtype=all&source=header;https://akorotin.netlify.app", "dblp": ";;153/1930;209/9906", "google_scholar": ";CRsi4IkAAAAJ;AmeE8qkAAAAJ;https://scholar.google.ru/citations?user=1rIIvjAAAAAJ", "orcid": ";;;0000-0003-4286-925X", "linkedin": ";;;", "or_profile": "~Nikita_Maksimovich_Kornilov1;~Petr_Mokrov1;~Alexander_Vladimirovich_Gasnikov1;~Alexander_Andreevich_Korotin1", "aff": ";Skolkovo Institute of Science and Technology;Moscow Institute of Physics and Technology;Skolkovo Institute of Science and Technology", "aff_domain": ";skolkovotech.ru;mipt.ru;skoltech.ru", "position": ";PhD student;Associate Professor;Head of Research Group", "bibtex": "@inproceedings{\nkornilov2024optimal,\ntitle={Optimal Flow Matching: Learning Straight Trajectories in Just One Step},\nauthor={Nikita Maksimovich Kornilov and Petr Mokrov and Alexander Gasnikov and Alexander Korotin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=kqmucDKVcU}\n}", "github": "", "reviewers": "nvT5;Bhuh;YPMs;qt8g", "pdf_size": 11809211, "rating": "5;5;6;7", "confidence": "4;4;2;3", "soundness": "2;3;2;4", "novelty": "2;2;2;4", "presentation": "2;2;1;3", "wc_summary": "105;91;33;118", "wc_strengths": "62;19;11;100", "wc_weaknesses": "256;148;98;74", "wc_questions": "228;69;128;49", "wc_limitations": "74;47;1;6", "wc_review": "725;374;271;347", "wc_reply_reviewers": "159;37;73;15", "wc_reply_authors": "527;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "3;1;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 86.75, 32.46825372575495 ], "wc_strengths_avg": [ 48.0, 35.742132001323036 ], "wc_weaknesses_avg": [ 144.0, 69.95712972957081 ], "wc_questions_avg": [ 118.5, 69.57190524917368 ], "wc_limitations_avg": [ 32.0, 30.108138434649195 ], "wc_review_avg": [ 429.25, 174.8776357914299 ], "wc_reply_reviewers_avg": [ 71.0, 54.86346689738081 ], "wc_reply_authors_avg": [ 131.75, 228.19769389719957 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.6363636363636364, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=53374582255542982&as_sdt=800005&sciodt=0,15&hl=en", "gs_version_total": 3, "email": ";skolkovotech.ru;mipt.ru;skoltech.ru", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Skolkovo Institute of Science and Technology;Moscow Institute of Physics and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.skoltech.ru;https://www.mipt.ru/en", "aff_unique_abbr": "Skoltech;MIPT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Russian Federation" }, { "title": "Tell What You Hear From What You See - Video to Audio Generation Through Text", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93863", "id": "kr7eN85mIT", "proceeding": "", "pdf": "https://openreview.net/pdf?id=kr7eN85mIT", "openreview": "https://openreview.net/forum?id=kr7eN85mIT", "poster": "", "project": "", "author_site": "Xiulong Liu, Kun Su, Eli Shlizerman", "tldr": "", "abstract": "The content of visual and audio scenes is multi-faceted such that a video stream can\nbe paired with various audio streams and vice-versa. Thereby, in video-to-audio\ngeneration task, it is imperative to introduce steering approaches for controlling the\ngenerated audio. While Video-to-Audio generation is a well-established generative\ntask, existing methods lack such controllability. In this work, we propose VATT, a\nmulti-modal generative framework that takes a video and an optional text prompt\nas input, and generates audio and optional textual description (caption) of the\naudio. Such a framework has two unique advantages: i) Video-to-Audio generation\nprocess can be refined and controlled via text which complements the context\nof the visual information, and ii) The model can suggest what audio to generate\nfor the video by generating audio captions. VATT consists of two key modules:\nVATT Converter, which is an LLM that has been fine-tuned for instructions and\nincludes a projection layer that maps video features to the LLM vector space, and\nVATT Audio, a bi-directional transformer that generates audio tokens from visual\nframes and from optional text prompt using iterative parallel decoding. The audio\ntokens and the text prompt are used by a pretrained neural codec to convert them\ninto a waveform. Our experiments show that when VATT is compared to existing\nvideo-to-audio generation methods in objective metrics, such as VGGSound audiovisual dataset, it achieves competitive performance when the audio caption is\nnot provided. When the audio caption is provided as a prompt, VATT achieves\neven more refined performance (with lowest KLD score of 1.41). Furthermore,\nsubjective studies asking participants to choose the most compatible generated\naudio for a given silent video, show that VATT Audio has been chosen on average\nas a preferred generated audio than the audio generated by existing methods. VATT\nenables controllable video-to-audio generation through text as well as suggesting\ntext prompts for videos through audio captions, unlocking novel applications such\nas text-guided video-to-audio generation and video-to-audio captioning.", "keywords": "multi-modal learning;audio-visual learning;multi-modal large-language-model;text-guided video-to-audio generation;video-to-audio captioning", "primary_area": "speech_and_audio", "supplementary_material": "", "author": "Xiulong Liu;Kun Su;Eli Shlizerman", "authorids": "~Xiulong_Liu1;~Kun_Su1;~Eli_Shlizerman1", "gender": "M;M;", "homepage": ";https://kun-su.netlify.app/;http://faculty.washington.edu/shlizee/", "dblp": ";184/8269;00/9501", "google_scholar": "e5GPhrMAAAAJ;y52GkywAAAAJ;oJnSO50AAAAJ", "orcid": ";0009-0004-8112-9419;0000-0002-3136-4531", "linkedin": "xiulong-liu-33040a130/;;", "or_profile": "~Xiulong_Liu1;~Kun_Su1;~Eli_Shlizerman1", "aff": "University of Washington, Seattle;University of Washington, Seattle;University of Washington", "aff_domain": "uw.edu;uw.edu;u.washington.edu", "position": "PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nliu2024tell,\ntitle={Tell What You Hear From What You See - Video to Audio Generation Through Text},\nauthor={Xiulong Liu and Kun Su and Eli Shlizerman},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=kr7eN85mIT}\n}", "github": "", "reviewers": "Gn3y;rzvn;BS9v;5yrj", "pdf_size": 47375462, "rating": "5;5;6;6", "confidence": "4;5;5;4", "soundness": "2;2;3;3", "novelty": "2;2;3;3", "presentation": "2;3;3;3", "wc_summary": "52;70;129;193", "wc_strengths": "43;32;71;76", "wc_weaknesses": "141;122;80;231", "wc_questions": "46;2;80;92", "wc_limitations": "6;1;8;7", "wc_review": "288;227;368;599", "wc_reply_reviewers": "17;0;18;31", "wc_reply_authors": "28;95;27;28", "reply_reviewers": "1;0;1;1", "reply_authors": "2;3;2;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 111.0, 55.24943438624508 ], "wc_strengths_avg": [ 55.5, 18.5 ], "wc_weaknesses_avg": [ 143.5, 55.12939324897382 ], "wc_questions_avg": [ 55.0, 34.942810419312295 ], "wc_limitations_avg": [ 5.5, 2.692582403567252 ], "wc_review_avg": [ 370.5, 141.08242271806932 ], "wc_reply_reviewers_avg": [ 16.5, 11.01135777277262 ], "wc_reply_authors_avg": [ 44.5, 29.159046623646667 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1375361734408198844&as_sdt=4005&sciodt=0,6&hl=en", "gs_version_total": 3, "email": "uw.edu;uw.edu;u.washington.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Washington", "aff_unique_dep": "", "aff_unique_url": "https://www.washington.edu", "aff_unique_abbr": "UW", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Seattle;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "ks0FrTSCnK", "title": "Rethinking Open-set Noise in Learning with Noisy Labels", "track": "main", "status": "Reject", "tldr": "", "abstract": "To reduce reliance on labeled data, learning with noisy labels (LNL) has gained increasing attention. However, prevailing works typically assume that such datasets are primarily affected by closed-set noise (where the true/clean labels of noisy samples come from another known category), and ignore therefore the ubiquitous presence of open-set noise (where the true/clean labels of noisy samples may not belong to any known category).\nIn this paper, we formally refine the LNL problem setting considering the presence of open-set noise. We theoretically analyze and compare the effects of open-set noise and closed-set noise, as well as the effects between different open-set noise modes. We also analyze common open-set noise detection mechanisms based on prediction entropy values. To empirically validate the theoretical results, we construct two open-set noisy datasets - CIFAR100-O/ImageNet-O and introduce a novel open-set test set for the widely used WebVision benchmark. Our work suggests that open-set noise exhibits qualitatively and quantitatively distinct characteristics, and how to fairly and comprehensively evaluate models in this condition requires more exploration.", "keywords": "Open-set noise;Noisy labels", "primary_area": "machine_vision", "supplementary_material": "", "author": "Chen Feng;Nicu Sebe;Ioannis Patras", "authorids": "~Chen_Feng3;~Nicu_Sebe1;~Ioannis_Patras2", "gender": "M;M;M", "homepage": "https://mrchenfeng.github.io/;http://disi.unitn.it/~sebe/;http://www.eecs.qmul.ac.uk/~ioannisp/", "dblp": ";20/3519;18/1556", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.it/citations?user=stFCYOAAAAAJ;https://scholar.google.com.tw/citations?user=OBYLxRkAAAAJ", "orcid": "0000-0001-9199-559X;0000-0002-6597-7248;0000-0003-3913-4738", "linkedin": "drchenfeng/;;ioannis-patras-1053767/", "or_profile": "~Chen_Feng3;~Nicu_Sebe1;~Ioannis_Patras2", "aff": "Queen Mary University London;University of Trento;Queen Mary, University of London", "aff_domain": "qmul.ac.uk;unitn.it;qmul.ac.uk", "position": "PhD student;Full Professor;Full Professor", "bibtex": "@misc{\nanonymous2024rethinking,\ntitle={Rethinking Open-set Noise in Learning with Noisy Labels},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=ks0FrTSCnK}\n}", "github": "", "project": "", "reviewers": "AwyQ;3Ttr;gSVD;6bHU", "site": "https://openreview.net/forum?id=ks0FrTSCnK", "pdf_size": 9815359, "rating": "4;5;6;6", "confidence": "4;3;4;2", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "2;2;2;3", "wc_summary": "111;122;110;49", "wc_strengths": "26;25;113;49", "wc_weaknesses": "23;150;40;124", "wc_questions": "207;9;23;2", "wc_limitations": "1;21;44;2", "wc_review": "368;327;330;226", "wc_reply_reviewers": "186;231;0;33", "wc_reply_authors": "368;70;0;51", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;1;2", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 98.0, 28.679260799399973 ], "wc_strengths_avg": [ 53.25, 35.80764583158184 ], "wc_weaknesses_avg": [ 84.25, 53.881235138032984 ], "wc_questions_avg": [ 60.25, 85.06284441517342 ], "wc_limitations_avg": [ 17.0, 17.507141400011598 ], "wc_review_avg": [ 312.75, 52.6278205894943 ], "wc_reply_reviewers_avg": [ 112.5, 98.0063773435178 ], "wc_reply_authors_avg": [ 122.25, 144.17415683817956 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.4545454545454545, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:GBoA0XRKpOwJ:scholar.google.com/&scioq=Rethinking+Open-set+Noise+in+Learning+with+Noisy+Labels&hl=en&as_sdt=0,14", "gs_version_total": 0, "aff_unique_index": "0;1;2", "aff_unique_norm": "Queen Mary University of London;University of Trento;Queen Mary, University of London", "aff_unique_dep": ";;", "aff_unique_url": "https://www.qmul.ac.uk;https://www.unitn.it;https://www.qmul.ac.uk", "aff_unique_abbr": "QMUL;UniTN;QMUL", "aff_campus_unique_index": "0;0", "aff_campus_unique": "London;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United Kingdom;Italy" }, { "title": "TSGM: A Flexible Framework for Generative Modeling of Synthetic Time Series", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97532", "id": "ktYaxX12RN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ktYaxX12RN", "openreview": "https://openreview.net/forum?id=ktYaxX12RN", "poster": "/media/PosterPDFs/NeurIPS%202024/97532.png?t=1733734004.637904", "project": "", "author_site": "Alexander Nikitin, Letizia Iannucci, Samuel Kaski", "tldr": "", "abstract": "Time series data are essential in a wide range of machine learning (ML) applications. However, temporal data are often scarce or highly sensitive, limiting data sharing and the use of data-intensive ML methods. A possible solution to this problem is the generation of synthetic datasets that resemble real data. In this work, we introduce Time Series Generative Modeling (TSGM), an open-source framework for the generative modeling and evaluation of synthetic time series datasets. TSGM includes a broad repertoire of machine learning methods: generative models, probabilistic, simulation-based approaches, and augmentation techniques. The framework enables users to evaluate the quality of the produced data from different angles: similarity, downstream effectiveness, predictive consistency, diversity, fairness, and privacy. TSGM is extensible and user-friendly, which allows researchers to rapidly implement their own methods and compare them in a shareable environment. The framework has been tested on open datasets and in production and proved to be beneficial in both cases. https://github.com/AlexanderVNikitin/tsgm", "keywords": "synthetic data;generative modeling;time series", "primary_area": "", "supplementary_material": "", "author": "Alexander V Nikitin;Letizia Iannucci;Samuel Kaski", "authorids": "~Alexander_V_Nikitin1;~Letizia_Iannucci1;~Samuel_Kaski1", "gender": "Not Specified;F;M", "homepage": "https://anikitin.me;;https://people.aalto.fi/samuel.kaski", "dblp": "320/6937;;64/5826", "google_scholar": ";;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0003-1925-9154", "linkedin": ";letiziaiannucci/;samuel-kaski-27790/", "or_profile": "~Alexander_V_Nikitin1;~Letizia_Iannucci1;~Samuel_Kaski1", "aff": "Aalto University;Aalto University;Aalto University", "aff_domain": "aalto.fi;aalto.fi;aalto.fi", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nnikitin2024tsgm,\ntitle={{TSGM}: A Flexible Framework for Generative Modeling of Synthetic Time Series},\nauthor={Alexander V Nikitin and Letizia Iannucci and Samuel Kaski},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=ktYaxX12RN}\n}", "github": "", "reviewers": "S8WW;6muW;BaRA;dX7D", "pdf_size": 4077426, "rating": "7;7;7;7", "confidence": "3;3;4;4", "wc_summary_and_contributions": "94;74;63;126", "wc_strengths": "94;65;81;54", "wc_improvement": "24;124;82;88", "wc_limitations": "64;55;12;87", "wc_correctness": "20;20;21;4", "wc_clarity": "7;17;22;5", "wc_relation_to_prior_work": "17;19;22;13", "wc_documentation": "4;23;16;4", "wc_additional_feedback": "1;1;1;1", "wc_review": "325;398;320;382", "wc_reply_reviewers": "15;11;0;11", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 3.5, 0.5 ], "wc_summary_and_contributions_avg": [ 89.25, 23.951774464536026 ], "wc_strengths_avg": [ 73.5, 15.239750654128171 ], "wc_improvement_avg": [ 79.5, 35.84340943604556 ], "wc_limitations_avg": [ 54.5, 27.17075633838705 ], "wc_correctness_avg": [ 16.25, 7.084313657652377 ], "wc_clarity_avg": [ 12.75, 7.013380069552769 ], "wc_relation_to_prior_work_avg": [ 17.75, 3.2691742076555053 ], "wc_documentation_avg": [ 11.75, 8.13557004763649 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 356.25, 34.266419421935524 ], "wc_reply_reviewers_avg": [ 9.25, 5.584576975922169 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5554847044181951196&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "aalto.fi;aalto.fi;aalto.fi", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Aalto University", "aff_unique_dep": "", "aff_unique_url": "https://www.aalto.fi", "aff_unique_abbr": "Aalto", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Finland" }, { "title": "BMRS: Bayesian Model Reduction for Structured Pruning", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93862", "id": "ktpG37Dzh5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ktpG37Dzh5", "openreview": "https://openreview.net/forum?id=ktpG37Dzh5", "poster": "", "project": "", "author_site": "Dustin Wright, Christian Igel, Raghavendra Selvan", "tldr": "", "abstract": "Modern neural networks are often massively overparameterized leading to high compute costs during training and at inference. One effective method to improve both the compute and energy efficiency of neural networks while maintaining good performance is structured pruning, where full network structures (e.g. neurons or convolutional filters) that have limited impact on the model output are removed. In this work, we propose Bayesian Model Reduction for Structured pruning (BMRS), a fully end-to-end Bayesian method of structured pruning. BMRS is based on two recent methods: Bayesian structured pruning with multiplicative noise, and Bayesian model reduction (BMR), a method which allows efficient comparison of Bayesian models under a change in prior. We present two realizations of BMRS derived from different priors which yield different structured pruning characteristics: 1) BMRS_N with the truncated log-normal prior, which offers reliable compression rates and accuracy without the need for tuning any thresholds and 2) BMRS_U with the truncated log-uniform prior that can achieve more aggressive compression based on the boundaries of truncation. Overall, we find that BMRS offers a theoretically grounded approach to structured pruning of neural networks yielding both high compression rates and accuracy. Experiments on multiple datasets and neural networks of varying complexity showed that the two BMRS methods offer a competitive performance-efficiency trade-off compared to other pruning methods.", "keywords": "Bayesian model reduction;structured pruning;variational inference;efficient machine learning;deep learning", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Dustin Wright;Christian Igel;Raghavendra Selvan", "authorids": "~Dustin_Wright2;~Christian_Igel1;~Raghavendra_Selvan1", "gender": "M;M;M", "homepage": "https://www.dustinbwright.com;https://christian-igel.github.io/;https://raghavian.github.io/", "dblp": "164/6559-1;38/6146;183/9041", "google_scholar": "OGk5UnYAAAAJ;https://scholar.google.dk/citations?user=d-jF4zIAAAAJ;R9VBQ54AAAAJ", "orcid": "0000-0001-6514-8733;0000-0003-2868-0856;", "linkedin": ";christianigel/;", "or_profile": "~Dustin_Wright2;~Christian_Igel1;~Raghavendra_Selvan1", "aff": "University of Copenhagen;University of Copenhagen;University of Copenhagen", "aff_domain": "ku.dk;ku.dk;ku.dk", "position": "Postdoc;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nwright2024bmrs,\ntitle={{BMRS}: Bayesian Model Reduction for Structured Pruning},\nauthor={Dustin Wright and Christian Igel and Raghavendra Selvan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ktpG37Dzh5}\n}", "github": "", "reviewers": "238H;v8Lj;L897", "pdf_size": 1721471, "rating": "5;6;8", "confidence": "5;3;2", "soundness": "3;3;3", "novelty": "2;2;3", "presentation": "2;3;4", "wc_summary": "22;79;84", "wc_strengths": "8;84;114", "wc_weaknesses": "248;242;180", "wc_questions": "7;57;74", "wc_limitations": "4;11;4", "wc_review": "289;473;456", "wc_reply_reviewers": "87;146;24", "wc_reply_authors": "639;187;8", "reply_reviewers": "2;2;1", "reply_authors": "3;3;2", "rating_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.3333333333333335, 1.247219128924647 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 61.666666666666664, 28.122746823325937 ], "wc_strengths_avg": [ 68.66666666666667, 44.611906731524286 ], "wc_weaknesses_avg": [ 223.33333333333334, 30.73904502239601 ], "wc_questions_avg": [ 46.0, 28.437065014988214 ], "wc_limitations_avg": [ 6.333333333333333, 3.299831645537222 ], "wc_review_avg": [ 406.0, 83.02208541506691 ], "wc_reply_reviewers_avg": [ 85.66666666666667, 49.81521409725703 ], "wc_reply_authors_avg": [ 278.0, 265.51961635002914 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.9285714285714286, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:kibneYfKuYoJ:scholar.google.com/&scioq=BMRS:+Bayesian+Model+Reduction+for+Structured+Pruning&hl=en&as_sdt=0,44", "gs_version_total": 4, "email": "ku.dk;ku.dk;ku.dk", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Copenhagen", "aff_unique_dep": "", "aff_unique_url": "https://www.ku.dk", "aff_unique_abbr": "UCPH", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Denmark" }, { "title": "VB-LoRA: Extreme Parameter Efficient Fine-Tuning with Vector Banks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93861", "id": "kuCY0mW4Q3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=kuCY0mW4Q3", "openreview": "https://openreview.net/forum?id=kuCY0mW4Q3", "poster": "/media/PosterPDFs/NeurIPS%202024/93861.png?t=1732243191.199338", "project": "", "author_site": "Yang Li, Shaobo Han, Jonathan Shihao Ji", "tldr": "", "abstract": "As the adoption of large language models increases and the need for per-user or per-task model customization grows, the parameter-efficient fine-tuning (PEFT) methods, such as low-rank adaptation (LoRA) and its variants, incur substantial storage and transmission costs. To further reduce stored parameters, we introduce a \"divide-and-share\" paradigm that breaks the barriers of low-rank decomposition across matrix dimensions, modules, and layers by sharing parameters globally via a vector bank. As an instantiation of the paradigm to LoRA, our proposed VB-LoRA composites all the low-rank matrices of LoRA from a shared vector bank with a differentiable top-$k$ admixture module. VB-LoRA achieves extreme parameter efficiency while maintaining comparable or better performance compared to state-of-the-art PEFT methods. Extensive experiments demonstrate the effectiveness of VB-LoRA on natural language understanding, natural language generation, instruction tuning, and mathematical reasoning tasks. When fine-tuning the Llama2-13B model, VB-LoRA only uses 0.4% of LoRA's stored parameters, yet achieves superior results. Our source code is available at https://github.com/leo-yangli/VB-LoRA. This method has been merged into the Hugging Face PEFT package.", "keywords": "Parameter-efficient fine-tuning;Low-rank adaptation;Transfer learning;Transformer;Top-k admixture module", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/410881688a57341c02da0a1aacc882d5c423b383.zip", "author": "Yang Li;Shaobo Han;Shihao Ji", "authorids": "~Yang_Li101;~Shaobo_Han1;~Shihao_Ji1", "gender": "M;M;M", "homepage": "https://www.linkedin.com/in/mkdirleo/;https://shaobohan.net/;https://sji.soc.uconn.edu/", "dblp": "37/4190-146;;35/4137", "google_scholar": "-mw27v4AAAAJ;3L333oYAAAAJ;qMfWf9EAAAAJ", "orcid": "0009-0006-4018-5582;;", "linkedin": "mkdirleo/;;", "or_profile": "~Yang_Li101;~Shaobo_Han1;~Shihao_Ji2", "aff": "Georgia State University;NEC Labs America;Georgia State University", "aff_domain": "gsu.edu;nec-labs.com;gsu.edu", "position": "PhD student;Researcher;Associate Professor", "bibtex": "@inproceedings{\nli2024vblora,\ntitle={{VB}-Lo{RA}: Extreme Parameter Efficient Fine-Tuning with Vector Banks},\nauthor={Yang Li and Shaobo Han and Shihao Ji},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=kuCY0mW4Q3}\n}", "github": "", "reviewers": "eNJC;wW2k;o6ri;e7Nd", "pdf_size": 763943, "rating": "5;7;8;8", "confidence": "5;4;5;4", "soundness": "2;3;4;4", "novelty": "2;3;4;4", "presentation": "2;4;3;4", "wc_summary": "117;160;134;108", "wc_strengths": "24;198;66;297", "wc_weaknesses": "282;322;73;261", "wc_questions": "67;162;50;70", "wc_limitations": "6;73;1;77", "wc_review": "496;915;324;813", "wc_reply_reviewers": "215;29;12;0", "wc_reply_authors": "157;0;0;0", "reply_reviewers": "2;1;1;0", "reply_authors": "2;1;1;1", "rating_avg": [ 7.0, 1.224744871391589 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 129.75, 19.803724397193573 ], "wc_strengths_avg": [ 146.25, 108.15353669667951 ], "wc_weaknesses_avg": [ 234.5, 95.78230525519837 ], "wc_questions_avg": [ 87.25, 43.82564888281747 ], "wc_limitations_avg": [ 39.25, 35.82160660830276 ], "wc_review_avg": [ 637.0, 237.75512612770308 ], "wc_reply_reviewers_avg": [ 64.0, 87.7866732482784 ], "wc_reply_authors_avg": [ 39.25, 67.98299419707844 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.40824829046386296, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5223542161229055516&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 4, "email": "gsu.edu;nec-labs.com;gsu.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Georgia State University;NEC Labs America", "aff_unique_dep": ";", "aff_unique_url": "https://www.gsu.edu;https://www.nec-labs.com", "aff_unique_abbr": "GSU;NEC LA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "EvoCodeBench: An Evolving Code Generation Benchmark with Domain-Specific Evaluations", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97531", "id": "kvjbFVHpny", "proceeding": "", "pdf": "https://openreview.net/pdf?id=kvjbFVHpny", "openreview": "https://openreview.net/forum?id=kvjbFVHpny", "poster": "/media/PosterPDFs/NeurIPS%202024/97531.png?t=1730278832.8122492", "project": "", "author_site": "Jia Li, Ge Li, Xuanming Zhang, YunFei Zhao, Yihong Dong, Zhi Jin, Binhua Li, Fei Huang, Yongbin Li", "tldr": "", "abstract": "How to evaluate Large Language Models (LLMs) in code generation remains an open question. \nMany benchmarks have been proposed, but they have two limitations, i.e., data leakage and lack of domain-specific evaluation.\nThe former hurts the fairness of benchmarks, and the latter hinders practitioners from selecting superior LLMs for specific programming domains.\n\nTo address these two limitations, we propose a new benchmark - EvoCodeBench, which has the following advances: \n(1) Evolving data. EvoCodeBench will be dynamically updated every period (e.g., 6 months) to avoid data leakage. This paper releases the first version - EvoCodeBench-2403, containing 275 samples from 25 repositories.\n(2) A domain taxonomy and domain labels. Based on the statistics of open-source communities, we design a programming domain taxonomy consisting of 10 popular domains. Based on the taxonomy, we annotate each sample in EvoCodeBench with a domain label. EvoCodeBench provides a broad platform for domain-specific evaluations.\n(3) Domain-specific evaluations. Besides the Pass@k, we compute the Domain-Specific Improvement (DSI) and define LLMs' comfort and strange domains. These evaluations help practitioners select superior LLMs in specific domains and discover the shortcomings of existing LLMs.\nBesides, EvoCodeBench is collected by a rigorous pipeline and aligns with real-world repositories in multiple aspects (e.g., code distributions).\nWe evaluate 8 popular LLMs (e.g., gpt-4, DeepSeek Coder, StarCoder 2) on EvoCodeBench and summarize some insights. EvoCodeBench reveals the actual abilities of these LLMs in real-world repositories. For example, the highest Pass@1 of gpt-4 on EvoCodeBench-2403 is only 20.74%. Besides, we evaluate LLMs in different domains and discover their comfort and strange domains. For example, gpt-4 performs best in most domains but falls behind others in the Internet domain. StarCoder 2-15B unexpectedly performs well in the Database domain and even outperforms 33B LLMs. We release EvoCodeBench, all prompts, and LLMs' completions for further community analysis.", "keywords": "Code Generation Benchmark;Large Language Models", "primary_area": "", "supplementary_material": "", "author": "Jia Li;Ge Li;Xuanming Zhang;Yunfei Zhao;Yihong Dong;Zhi Jin;Binhua Li;Fei Huang;Yongbin Li", "authorids": "~Jia_Li14;~Ge_Li4;~Xuanming_Zhang3;~Yunfei_Zhao2;~Yihong_Dong1;~Zhi_Jin1;~Binhua_Li1;~Fei_Huang2;~Yongbin_Li2", "gender": "M;M;M;M;F;M;M;M;M", "homepage": "https://lj2lijia.github.io/;https://ligechina.github.io;http://web.stanford.edu/people/zhangxm;;http://faculty.pku.edu.cn/zhijin/en/index.htm;;https://sites.google.com/view/fei-huang;https://yongbin-li.github.io/;https://github.com/qikahh", "dblp": "23/6950-11.html;24/712-1;;;22/3510;236/5662.html;h/FeiHuang.html;;", "google_scholar": "https://scholar.google.com.sg/citations?user=Us0ZgUcAAAAJ;PPqcVRwAAAAJ;zP5k9lsAAAAJ;-bNMkykAAAAJ;https://scholar.google.com.tw/citations?user=ZC7SObAAAAAJ;;9r98PpoAAAAJ;xF5VrokAAAAJ;", "orcid": "0000-0002-5579-8852;;0009-0006-6357-8266;;0000-0003-1087-226X;;;;", "linkedin": ";;%E8%BD%A9%E9%93%AD-%E5%BC%A0-54b9b5290/;;;;fei-huang-cas-cmu;;", "or_profile": "~Jia_Li14;~Ge_Li4;~Xuanming_Zhang3;~Yihong_Dong1;~Zhi_Jin1;~Binhua_Li1;~Fei_Huang2;~Yongbin_Li2;~YunFei_Zhao1", "aff": "Peking University;Peking University;Peking University;Peking University;Peking University;;Alibaba Group US;Alibaba Group;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn;;alibaba-inc.com;alibaba-inc.com;pku.edu.cn", "position": "PhD student;Full Professor;Undergrad student;PhD student;Full Professor;;Senior Research Director;Researcher;PhD student", "bibtex": "@inproceedings{\nli2024evocodebench,\ntitle={EvoCodeBench: An Evolving Code Generation Benchmark with Domain-Specific Evaluations},\nauthor={Jia Li and Ge Li and Xuanming Zhang and Yunfei Zhao and Yihong Dong and Zhi Jin and Binhua Li and Fei Huang and Yongbin Li},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=kvjbFVHpny}\n}", "github": "", "reviewers": "CzHa;LgtM;7mRR;ci62", "pdf_size": 840620, "rating": "5;6;6;7", "confidence": "3;4;3;2", "wc_summary_and_contributions": "69;128;71;86", "wc_strengths": "30;109;181;52", "wc_improvement": "39;137;172;53", "wc_limitations": "1;1;92;63", "wc_correctness": "4;172;58;53", "wc_clarity": "3;1;39;49", "wc_relation_to_prior_work": "1;1;39;55", "wc_documentation": "1;1;46;51", "wc_additional_feedback": "1;1;1;1", "wc_review": "149;551;699;463", "wc_reply_reviewers": "0;30;0;0", "wc_reply_authors": "0;20;0;0", "reply_reviewers": "0;1;0;0", "reply_authors": "2;3;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 88.5, 23.732888572611635 ], "wc_strengths_avg": [ 93.0, 58.41660722773961 ], "wc_improvement_avg": [ 100.25, 55.86311394829329 ], "wc_limitations_avg": [ 39.25, 39.60034722069997 ], "wc_correctness_avg": [ 71.75, 61.605093133603816 ], "wc_clarity_avg": [ 23.0, 21.307275752662516 ], "wc_relation_to_prior_work_avg": [ 24.0, 23.68543856465402 ], "wc_documentation_avg": [ 24.75, 23.81569860407206 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 465.5, 201.25295028893365 ], "wc_reply_reviewers_avg": [ 7.5, 12.99038105676658 ], "wc_reply_authors_avg": [ 5.0, 8.660254037844387 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5226067679174332295&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn;;alibaba-inc.com;alibaba-inc.com;pku.edu.cn", "author_num": 9, "aff_unique_index": "0;0;0;0;0;1;1;0", "aff_unique_norm": "Peking University;Alibaba Group", "aff_unique_dep": ";", "aff_unique_url": "http://www.pku.edu.cn;https://www.alibaba.com", "aff_unique_abbr": "Peking U;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;1;0;0", "aff_country_unique": "China;United States" }, { "title": "VRSBench: A Versatile Vision-Language Benchmark Dataset for Remote Sensing Image Understanding", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97530", "id": "kwDOxOmGE0", "proceeding": "", "pdf": "https://openreview.net/pdf?id=kwDOxOmGE0", "openreview": "https://openreview.net/forum?id=kwDOxOmGE0", "poster": "/media/PosterPDFs/NeurIPS%202024/97530.png?t=1733743959.940501", "project": "", "author_site": "Xiang Li, Jian Ding, Mohamed Elhoseiny", "tldr": "", "abstract": "We introduce a new benchmark designed to advance the development of general-purpose, large-scale vision-language models for remote sensing images. Although several vision-language datasets in remote sensing have been proposed to pursue this goal, existing datasets are typically tailored to single tasks, lack detailed object information, or suffer from inadequate quality control. Exploring these improvement opportunities, we present a Versatile vision-language Benchmark for Remote Sensing image understanding, termed VRSBench. This benchmark comprises 29,614 images, with 29,614 human-verified detailed captions, 52,472 object references, and 123,221 question-answer pairs. It facilitates the training and evaluation of vision-language models across a broad spectrum of remote sensing image understanding tasks. We further evaluated state-of-the-art models on this benchmark for three vision-language tasks: image captioning, visual grounding, and visual question answering. Our work aims to significantly contribute to the development of advanced vision-language models in the field of remote sensing. The data and code can be accessed at https://vrsbench.github.io.", "keywords": "Vision Language Dataset;Remote Sensing;Detailed Captions;Visual Grounding;Visual Question Answering", "primary_area": "", "supplementary_material": "/attachment/b2d35941fc3f06f220a9e373c60edeae4d4eef2f.pdf", "author": "Xiang Li;Jian Ding;Mohamed Elhoseiny", "authorids": "~Xiang_Li18;~Jian_Ding3;~Mohamed_Elhoseiny1", "gender": "M;M;M", "homepage": "http://xiangli.ac.cn;https://dingjiansw101.github.io/;http://www.mohamed-elhoseiny.com", "dblp": "40/1491-46;;125/2894", "google_scholar": "4Apl5FgAAAAJ;https://scholar.google.com.hk/citations?user=5HM8wcgAAAAJ;iRBUTOAAAAAJ", "orcid": "0000-0002-9946-7000;0000-0002-7188-5884;0000-0001-9659-1551", "linkedin": ";;mohamed-elhoseiny-8a836215/", "or_profile": "~Xiang_Li18;~Jian_Ding3;~Mohamed_Elhoseiny1", "aff": "King Abdullah University of Science and Technology;King Abdullah University of Science and Technology;KAUST", "aff_domain": "kaust.edu.sa;kaust.edu.sa;kaust.edu.sa", "position": "Postdoc;Postdoc;Associate Professor", "bibtex": "@inproceedings{\nli2024vrsbench,\ntitle={{VRSB}ench: A Versatile Vision-Language Benchmark Dataset for Remote Sensing Image Understanding},\nauthor={Xiang Li and Jian Ding and Mohamed Elhoseiny},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=kwDOxOmGE0}\n}", "github": "", "reviewers": "YF6C;YFnT;5s1x", "pdf_size": 1194421, "rating": "7;7;7", "confidence": "4;4;4", "wc_summary_and_contributions": "60;109;38", "wc_strengths": "39;60;103", "wc_improvement": "68;148;102", "wc_limitations": "5;30;104", "wc_correctness": "24;39;15", "wc_clarity": "17;12;13", "wc_relation_to_prior_work": "29;13;22", "wc_documentation": "34;5;10", "wc_additional_feedback": "1;1;1", "wc_review": "277;417;408", "wc_reply_reviewers": "0;0;22", "wc_reply_authors": "59;59;59", "reply_reviewers": "0;0;1", "reply_authors": "2;2;2", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 4.0, 0.0 ], "wc_summary_and_contributions_avg": [ 69.0, 29.676028485406647 ], "wc_strengths_avg": [ 67.33333333333333, 26.637484032009397 ], "wc_improvement_avg": [ 106.0, 32.78210894171799 ], "wc_limitations_avg": [ 46.333333333333336, 42.0343774652235 ], "wc_correctness_avg": [ 26.0, 9.899494936611665 ], "wc_clarity_avg": [ 14.0, 2.160246899469287 ], "wc_relation_to_prior_work_avg": [ 21.333333333333332, 6.548960901462833 ], "wc_documentation_avg": [ 16.333333333333332, 12.657891697365017 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 367.3333333333333, 63.98089992767807 ], "wc_reply_reviewers_avg": [ 7.333333333333333, 10.370899457402697 ], "wc_reply_authors_avg": [ 59.0, 0.0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15586536337418118173&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "kaust.edu.sa;kaust.edu.sa;kaust.edu.sa", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "King Abdullah University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kast.kau.edu.sa", "aff_unique_abbr": "KAUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Saudi Arabia" }, { "title": "Acceleration Exists! Optimization Problems When Oracle Can Only Compare Objective Function Values", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93860", "id": "kxBsNEWB42", "proceeding": "", "pdf": "https://openreview.net/pdf?id=kxBsNEWB42", "openreview": "https://openreview.net/forum?id=kxBsNEWB42", "poster": "", "project": "", "author_site": "Aleksandr Lobanov, Alexander Gasnikov, Andrey Krasnov", "tldr": "", "abstract": "Frequently, the burgeoning field of black-box optimization encounters challenges due to a limited understanding of the mechanisms of the objective function. To address such problems, in this work we focus on the deterministic concept of Order Oracle, which only utilizes order access between function values (possibly with some bounded noise), but without assuming access to their values. As theoretical results, we propose a new approach to create non-accelerated optimization algorithms (obtained by integrating Order Oracle into existing optimization \u201ctools\u201d) in non-convex, convex, and strongly convex settings that are as good as both SOTA coordinate algorithms with first-order oracle and SOTA algorithms with Order Oracle up to logarithm factor. Moreover, using the proposed approach, _we provide the first accelerated optimization algorithm using the Order Oracle_. And also, using an already different approach we provide the asymptotic convergence of _the first algorithm with the stochastic Order Oracle concept_. Finally, our theoretical results demonstrate effectiveness of proposed algorithms through numerical experiments.", "keywords": "Black Box Optimization;Order Oracle;Accelerated Algorithms;Asymptotic Convergence", "primary_area": "optimization", "supplementary_material": "", "author": "Aleksandr Lobanov;Alexander Gasnikov;Andrey Krasnov", "authorids": "~Aleksandr_Lobanov1;~Alexander_Gasnikov1;~Andrey_Krasnov1", "gender": "M;M;M", "homepage": "https://alexander-lobanov.github.io/;;https://arxiv.org/search/?query=Gasnikov&searchtype=all&source=header", "dblp": "360/8623;;153/1930", "google_scholar": "https://scholar.google.com/citations?hl=ru;vh53HT0AAAAJ;AmeE8qkAAAAJ", "orcid": "0000-0003-1620-9581;;", "linkedin": ";;", "or_profile": "~Aleksandr_Lobanov1;~Andrey_Krasnov1;~Alexander_Vladimirovich_Gasnikov1", "aff": "Moscow Institute of Physics and Technology;Moscow Institute of Physics and Technology;Moscow Institute of Physics and Technology", "aff_domain": "mipt.ru;phystech.edu;mipt.ru", "position": "Researcher;Undergrad student;Associate Professor", "bibtex": "@inproceedings{\nlobanov2024acceleration,\ntitle={Acceleration Exists! Optimization Problems When Oracle Can Only Compare Objective Function Values},\nauthor={Aleksandr Lobanov and Alexander Gasnikov and Andrey Krasnov},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=kxBsNEWB42}\n}", "github": "", "reviewers": "ErqJ;WKzP;uSy5;XqbR;K16N", "pdf_size": 1005208, "rating": "5;5;5;6;7", "confidence": "3;3;3;4;4", "soundness": "3;3;3;3;4", "novelty": "2;3;2;3;3", "presentation": "3;3;3;2;3", "wc_summary": "95;57;97;151;209", "wc_strengths": "47;13;38;113;58", "wc_weaknesses": "191;177;213;94;27", "wc_questions": "204;112;8;41;133", "wc_limitations": "4;19;2;136;47", "wc_review": "541;378;358;535;474", "wc_reply_reviewers": "176;34;95;45;25", "wc_reply_authors": "578;17;477;18;19", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 5.6, 0.8 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 121.8, 52.893855975907066 ], "wc_strengths_avg": [ 53.8, 33.114347343711906 ], "wc_weaknesses_avg": [ 140.4, 69.54595602908914 ], "wc_questions_avg": [ 99.6, 69.27221665285441 ], "wc_limitations_avg": [ 41.6, 49.86622103187688 ], "wc_review_avg": [ 457.2, 76.77343290487929 ], "wc_reply_reviewers_avg": [ 75.0, 56.00357131469385 ], "wc_reply_authors_avg": [ 221.8, 251.63894770086765 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.9185586535436918, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5627783158021693164&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "mipt.ru;phystech.edu;mipt.ru", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Moscow Institute of Physics and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.mipt.ru/en", "aff_unique_abbr": "MIPT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Russian Federation" }, { "title": "LP-3DGS: Learning to Prune 3D Gaussian Splatting", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93859", "id": "kzJ9P7VPnS", "proceeding": "", "pdf": "https://openreview.net/pdf?id=kzJ9P7VPnS", "openreview": "https://openreview.net/forum?id=kzJ9P7VPnS", "poster": "", "project": "", "author_site": "Zhaoliang Zhang, Tianchen Song, Yongjae Lee, Li Yang, Cheng Peng, Rama Chellappa, Deliang Fan", "tldr": "", "abstract": "Recently, 3D Gaussian Splatting (3DGS) has become one of the mainstream methodologies for novel view synthesis (NVS) due to its high quality and fast rendering speed. However, as a point-based scene representation, 3DGS potentially generates a large number of Gaussians to fit the scene, leading to high memory usage. Improvements that have been proposed require either an empirical pre-set pruning ratio or importance score threshold to prune the point cloud. Such hyperparameters require multiple rounds of training to optimize and achieve the maximum pruning ratio while maintaining the rendering quality for each scene. In this work, we propose learning-to-prune 3DGS (LP-3DGS), where a trainable binary mask is applied to the importance score to automatically find a favorable pruning ratio. Instead of using the traditional straight-through estimator (STE) method to approximate the binary mask gradient, we redesign the masking function to leverage the Gumbel-Sigmoid method, making it differentiable and compatible with the existing training process of 3DGS. Extensive experiments have shown that LP-3DGS consistently achieves a good balance between efficiency and high quality.", "keywords": "Novel view synthesis;Gaussian splatting;Learn to prune", "primary_area": "machine_vision", "supplementary_material": "", "author": "Zhaoliang Zhang;Tianchen Song;Yongjae Lee;Li Yang;Cheng Peng;Rama Chellappa;Deliang Fan", "authorids": "~Zhaoliang_Zhang1;~Tianchen_Song1;~Yongjae_Lee4;~Li_Yang6;~Cheng_Peng2;~Rama_Chellappa1;~Deliang_Fan1", "gender": "M;M;M;M;M;;M", "homepage": ";;https://nfyfamr.github.io/;https://lyang-666.github.io/;https://sites.google.com/view/cheng-peng/home;;https://faculty.engineering.asu.edu/dfan/", "dblp": ";;;;82/3044-8.html;;129/1701", "google_scholar": "https://scholar.google.com/citations?hl=en;;VTa2yOsAAAAJ;qpUT1I8AAAAJ;HiQ4T3wAAAAJ;;sAflhJUAAAAJ", "orcid": ";;0000-0003-1692-2117;0000-0002-2839-6196;;;0000-0002-7989-6297", "linkedin": ";tianchen-song-67b318202?utm_source=share&utm_campaign=share_via&utm_content=profile&utm_medium=ios_app;yong-jae-lee/;li-yang-268710139/;;;", "or_profile": "~Zhaoliang_Zhang1;~Tianchen_Song1;~Yongjae_Lee4;~Li_Yang6;~Cheng_Peng2;~Rama_Chellappa1;~Deliang_Fan1", "aff": "Johns Hopkins University;Johns Hopkins University;Johns Hopkins University;University of North Carolina at Charlotte;Department of Computer Science, Whiting School of Engineering;;Johns Hopkins University", "aff_domain": "jh.edu;jh.edu;jh.edu;uncc.edu;cs.jhu.edu;;jhu.edu", "position": "PhD student;MS student;PhD student;Assistant Professor;Postdoc;;Associate Professor", "bibtex": "@inproceedings{\nzhang2024lpdgs,\ntitle={{LP}-3{DGS}: Learning to Prune 3D Gaussian Splatting},\nauthor={Zhaoliang Zhang and Tianchen Song and Yongjae Lee and Li Yang and Cheng Peng and Rama Chellappa and Deliang Fan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=kzJ9P7VPnS}\n}", "github": "", "reviewers": "CWFJ;1UP2;fvLt;o7SR", "pdf_size": 15007770, "rating": "4;5;6;6", "confidence": "3;4;3;4", "soundness": "2;2;3;3", "novelty": "1;2;2;3", "presentation": "2;2;2;3", "wc_summary": "51;20;96;143", "wc_strengths": "28;12;57;51", "wc_weaknesses": "233;124;259;306", "wc_questions": "6;4;28;101", "wc_limitations": "6;5;9;16", "wc_review": "324;165;449;617", "wc_reply_reviewers": "0;807;83;348", "wc_reply_authors": "133;1868;38;489", "reply_reviewers": "0;5;1;3", "reply_authors": "4;7;2;3", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 77.5, 46.47848964843845 ], "wc_strengths_avg": [ 37.0, 18.041618552668716 ], "wc_weaknesses_avg": [ 230.5, 66.82252614201292 ], "wc_questions_avg": [ 34.75, 39.39146481155531 ], "wc_limitations_avg": [ 9.0, 4.301162633521313 ], "wc_review_avg": [ 388.75, 165.81974399931994 ], "wc_reply_reviewers_avg": [ 309.5, 314.6748321680652 ], "wc_reply_authors_avg": [ 632.0, 733.140845949808 ], "reply_reviewers_avg": [ 2.25, 1.920286436967152 ], "reply_authors_avg": [ 4.0, 1.8708286933869707 ], "replies_avg": [ 31, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11728503976080079180&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "jh.edu;jh.edu;jh.edu;uncc.edu;cs.jhu.edu;;jhu.edu", "author_num": 7, "aff_unique_index": "0;0;0;1;0;0", "aff_unique_norm": "Johns Hopkins University;University of North Carolina at Charlotte", "aff_unique_dep": ";", "aff_unique_url": "https://www.jhu.edu;https://www.uncc.edu", "aff_unique_abbr": "JHU;UNCC", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Charlotte;Baltimore", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Bandits with Abstention under Expert Advice", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93858", "id": "l04i6dPMxK", "proceeding": "", "pdf": "https://openreview.net/pdf?id=l04i6dPMxK", "openreview": "https://openreview.net/forum?id=l04i6dPMxK", "poster": "", "project": "", "author_site": "Stephen Pasteris, Alberto Rumi, Maximilian Thiessen, Shota Saito, Atsushi Miyauchi, Fabio Vitale, Mark Herbster", "tldr": "", "abstract": "We study the classic problem of prediction with expert advice under bandit feedback. Our model assumes that one action, corresponding to the learner's abstention from play, has no reward or loss on every trial. We propose the CBA (Confidence-rated Bandits with Abstentions) algorithm, which exploits this assumption to obtain reward bounds that can significantly improve those of the classical Exp4 algorithm. Our problem can be construed as the aggregation of confidence-rated predictors, with the learner having the option to abstain from play. We are the first to achieve bounds on the expected cumulative reward for general confidence-rated predictors. In the special case of specialists, we achieve a novel reward bound, significantly improving previous bounds of SpecialistExp (treating abstention as another action). We discuss how CBA can be applied to the problem of adversarial contextual bandits with the option of abstaining from selecting any action. We are able to leverage a wide range of inductive biases, outperforming previous approaches both theoretically and in preliminary experimental analysis. Additionally, we achieve a reduction in runtime from quadratic to almost linear in the number of contexts for the specific case of metric space contexts.", "keywords": "Multi-armed bandits;Expert advice;Abstention;Contextual bandits", "primary_area": "bandits", "supplementary_material": "/attachment/6ea72c832be95585af9cd99066d1152ca9685eaa.zip", "author": "Stephen Pasteris;Alberto Rumi;Maximilian Thiessen;Shota Saito;Atsushi Miyauchi;Fabio Vitale;Mark Herbster", "authorids": "~Stephen_Pasteris1;~Alberto_Rumi1;~Maximilian_Thiessen1;~Shota_Saito2;~Atsushi_Miyauchi1;~Fabio_Vitale1;~Mark_Herbster1", "gender": "M;M;;M;M;;M", "homepage": ";https://albertorumi.github.io/;https://maxthiessen.github.io;http://sites.google.com/site/ssaito1989/;https://sites.google.com/view/miyauchi/home?pli=1;;http://www0.cs.ucl.ac.uk/staff/M.Herbster/", "dblp": "126/1728;255/3722;https://dblp.uni-trier.de/pid/274/6633;;136/5974-1.html;;76/6979", "google_scholar": ";;https://scholar.google.de/citations?user=XO5rGcwAAAAJ;https://scholar.google.co.jp/citations?user=B4jqiWUAAAAJ;yMyLQ2cAAAAJ;;https://scholar.google.com.tw/citations?user=MBwWHlgAAAAJ", "orcid": ";;0000-0001-9333-2685;;0000-0002-6033-6433;;", "linkedin": ";;maximilian-thiessen/;;https://linkedin.com/in/atsushi-miyauchi-75a273188;;", "or_profile": "~Stephen_Pasteris1;~Alberto_Rumi1;~Maximilian_Thiessen1;~Shota_Saito2;~Atsushi_Miyauchi1;~Fabio_Vitale1;~Mark_Herbster1", "aff": "Alan Turing Institute;Imperial College London;TU Wien;University College London;CENTAI Institute;;University College London", "aff_domain": "turing.ac.uk;imperial.ac.uk;tuwien.ac.at;ucl.ac.uk;centai.eu;;ucl.edu", "position": "Senior Reasearch Associate;PhD student;PhD student;PhD student;Researcher;;Professor", "bibtex": "@inproceedings{\npasteris2024bandits,\ntitle={Bandits with Abstention under Expert Advice},\nauthor={Stephen Pasteris and Alberto Rumi and Maximilian Thiessen and Shota Saito and Atsushi Miyauchi and Fabio Vitale and Mark Herbster},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=l04i6dPMxK}\n}", "github": "", "reviewers": "zGbq;3cAo;Edre;BsH5", "pdf_size": 17373779, "rating": "4;5;5;8", "confidence": "2;3;2;4", "soundness": "2;3;3;4", "novelty": "2;2;2;4", "presentation": "2;3;2;4", "wc_summary": "118;35;90;218", "wc_strengths": "48;11;83;13", "wc_weaknesses": "175;37;214;3", "wc_questions": "26;24;95;1", "wc_limitations": "3;20;17;1", "wc_review": "370;127;499;236", "wc_reply_reviewers": "10;11;18;8", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 115.25, 66.41300700916952 ], "wc_strengths_avg": [ 38.75, 29.4819860253681 ], "wc_weaknesses_avg": [ 107.25, 89.1469993886502 ], "wc_questions_avg": [ 36.5, 35.174564673923115 ], "wc_limitations_avg": [ 10.25, 8.347903928532 ], "wc_review_avg": [ 308.0, 139.88388041515006 ], "wc_reply_reviewers_avg": [ 11.75, 3.766629793329841 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.9045340337332909, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8301968525800298928&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 5, "email": "turing.ac.uk;imperial.ac.uk;tuwien.ac.at;ucl.ac.uk;centai.eu;;ucl.edu", "author_num": 7, "aff_unique_index": "0;1;2;3;4;3", "aff_unique_norm": "Alan Turing Institute;Imperial College London;Technische Universit\u00e4t Wien;University College London;CENTAI Institute", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.turing.ac.uk;https://www.imperial.ac.uk;https://www.tuwien.ac.at;https://www.ucl.ac.uk;", "aff_unique_abbr": "ATI;ICL;TU Wien;UCL;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "United Kingdom;Austria;" }, { "title": "IMPACT: A Large-scale Integrated Multimodal Patent Analysis and Creation Dataset for Design Patents", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97529", "id": "l0Ydsl10ci", "proceeding": "", "pdf": "https://openreview.net/pdf?id=l0Ydsl10ci", "openreview": "https://openreview.net/forum?id=l0Ydsl10ci", "poster": "", "project": "", "author_site": "Homaira Huda Shomee, Zhu Wang, Sathya Ravi, Sourav Medya", "tldr": "", "abstract": "In this paper, we introduce IMPACT (Integrated Multimodal Patent Analysis and Creation Dataset for Design Patents), a large-scale multimodal patent dataset with detailed captions for design patent figures. Our dataset includes half a million design patents comprising 3.61 million figures along with captions from patents granted by the United States Patent and Trademark Office (USPTO) over a 16-year period from 2007 to 2022. We incorporate the metadata of each patent application with elaborate captions that are coherent with multiple viewpoints of designs. Even though patents themselves contain a variety of design figures, titles, and descriptions of viewpoints, we find that they lack detailed descriptions that are necessary to perform multimodal tasks such as classification and retrieval. IMPACT closes this gap thereby providing researchers with necessary ingredients to instantiate a variety of multimodal tasks. Our dataset has a huge potential for novel design inspiration and can be used with advanced computer vision models in tandem. We perform preliminary evaluations on the dataset on the popular patent analysis tasks such as classification and retrieval. Our results indicate that integrating images with generated captions significantly improves the performance of different models on the corresponding tasks. Given that design patents offer various benefits for modeling novel tasks, we propose two standard computer vision tasks that have not been investigated in analyzing patents as future directions using IMPACT as a benchmark viz., 3D Image Construction and Visual Question Answering (VQA). To facilitate research in these directions, we make our IMPACT dataset and the code/models used in this work publicly available at https://github.com/AI4Patents/IMPACT.", "keywords": "Patent;Dataset;Design;Multimodal", "primary_area": "", "supplementary_material": "/attachment/f3178c2986917e4657470c16f6d7f7963bcb03ef.zip", "author": "Homaira Huda Shomee;Zhu Wang;Sathya N. Ravi;Sourav Medya", "authorids": "~Homaira_Huda_Shomee1;~Zhu_Wang2;~Sathya_N._Ravi1;~Sourav_Medya1", "gender": "F;F;M;M", "homepage": "https://hhshomee.github.io/;;http://sathyaravi.com;https://souravmedya.github.io/", "dblp": "310/4183;;159/2123;178/3021", "google_scholar": "0vrRwsgAAAAJ;mMyQX4oAAAAJ;FW-0thoAAAAJ;RCFhOM4AAAAJ", "orcid": ";;0000-0003-3881-6323;0000-0003-0996-2807", "linkedin": "homaira-huda-shomee/;;sathya-narayanan-ravi-74a5a128/;sourav-medya-35987a49/", "or_profile": "~Homaira_Huda_Shomee1;~Zhu_Wang2;~Sathya_N._Ravi1;~Sourav_Medya1", "aff": "University of Illinois at Chicago;University of Illinois at Chicago;University of Illinois, Chicago;University of Illinois at Chicago", "aff_domain": "uic.edu;cs.uic.edu;uic.edu;uic.edu", "position": "PhD student;PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nshomee2024impact,\ntitle={{IMPACT}: A Large-scale Integrated Multimodal Patent Analysis and Creation Dataset for Design Patents},\nauthor={Homaira Huda Shomee and Zhu Wang and Sathya N. Ravi and Sourav Medya},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=l0Ydsl10ci}\n}", "github": "", "reviewers": "C8Cv;H2SD;pWTK", "pdf_size": 18385630, "rating": "6;7;7", "confidence": "3;3;4", "wc_summary_and_contributions": "61;81;74", "wc_strengths": "15;4;48", "wc_improvement": "295;4;55", "wc_limitations": "3;1;2", "wc_correctness": "3;1;5", "wc_clarity": "3;1;6", "wc_relation_to_prior_work": "3;1;5", "wc_documentation": "34;1;3", "wc_additional_feedback": "1;1;1", "wc_review": "418;95;199", "wc_reply_reviewers": "87;0;20", "wc_reply_authors": "271;25;34", "reply_reviewers": "1;0;1", "reply_authors": "4;2;2", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 72.0, 8.286535263104035 ], "wc_strengths_avg": [ 22.333333333333332, 18.696404883173546 ], "wc_improvement_avg": [ 118.0, 126.87789405566282 ], "wc_limitations_avg": [ 2.0, 0.816496580927726 ], "wc_correctness_avg": [ 3.0, 1.632993161855452 ], "wc_clarity_avg": [ 3.3333333333333335, 2.0548046676563256 ], "wc_relation_to_prior_work_avg": [ 3.0, 1.632993161855452 ], "wc_documentation_avg": [ 12.666666666666666, 15.107025591499548 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 237.33333333333334, 134.6212794802103 ], "wc_reply_reviewers_avg": [ 35.666666666666664, 37.205137040766594 ], "wc_reply_authors_avg": [ 110.0, 113.9034679015525 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 0.9428090415820634 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5030251295446745349&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": "uic.edu;cs.uic.edu;uic.edu;uic.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Illinois at Chicago", "aff_unique_dep": "", "aff_unique_url": "https://www.uic.edu", "aff_unique_abbr": "UIC", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Chicago", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Diffusion Actor-Critic with Entropy Regulator", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93857", "id": "l0c1j4QvTq", "proceeding": "", "pdf": "https://openreview.net/pdf?id=l0c1j4QvTq", "openreview": "https://openreview.net/forum?id=l0c1j4QvTq", "poster": "/media/PosterPDFs/NeurIPS%202024/93857.png?t=1730909743.7371237", "project": "", "author_site": "Yinuo Wang, Likun Wang, Yuxuan Jiang, Wenjun Zou, Tong Liu, Xujie Song, Wenxuan Wang, Liming Xiao, Jiang Wu, Jingliang Duan, Shengbo Li", "tldr": "", "abstract": "Reinforcement learning (RL) has proven highly effective in addressing complex decision-making and control tasks. However, in most traditional RL algorithms, the policy is typically parameterized as a diagonal Gaussian distribution with learned mean and variance, which constrains their capability to acquire complex policies. In response to this problem, we propose an online RL algorithm termed diffusion actor-critic with entropy regulator (DACER). This algorithm conceptualizes the reverse process of the diffusion model as a novel policy function and leverages the capability of the diffusion model to fit multimodal distributions, thereby enhancing the representational capacity of the policy. Since the distribution of the diffusion policy lacks an analytical expression, its entropy cannot be determined analytically. To mitigate this, we propose a method to estimate the entropy of the diffusion policy utilizing Gaussian mixture model. Building on the estimated entropy, we can learn a parameter $\\alpha$ that modulates the degree of exploration and exploitation. Parameter $\\alpha$ will be employed to adaptively regulate the variance of the added noise, which is applied to the action output by the diffusion model. Experimental trials on MuJoCo benchmarks and a multimodal task demonstrate that the DACER algorithm achieves state-of-the-art (SOTA) performance in most MuJoCo control tasks while exhibiting a stronger representational capacity of the diffusion policy.", "keywords": "Diffusion model;online reinforcement learning;Gaussian mixture model", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Yinuo Wang;Likun Wang;Yuxuan Jiang;Wenjun Zou;Tong Liu;Xujie Song;Wenxuan Wang;Liming Xiao;Jiang WU;Jingliang Duan;Shengbo Eben Li", "authorids": "~Yinuo_Wang5;~Likun_Wang1;~Yuxuan_Jiang1;~Wenjun_Zou1;~Tong_Liu11;~Xujie_Song1;~Wenxuan_Wang5;~Liming_Xiao1;~Jiang_WU9;~Jingliang_Duan1;~Shengbo_Eben_Li2", "gender": "M;M;M;;;M;;M;M;M;M", "homepage": ";https://github.com/ParadoxKKKK/traffic_flow_in_intersection;https://github.com/jjyyxx;https://www.researchgate.net/profile/Wenjun-Zou-6;https://www.researchgate.net/profile/Tong-Liu-94;;;https://sdc-laboratory.github.io;;http://www.idlab-tsinghua.com/thulab/labweb/dpeople.html?11;", "dblp": ";;;;;;;;208/9091;;", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;ZcwQMfkAAAAJ;;;;;;https://scholar.google.com/citations?hl=zh-CN;Dxiw1K8AAAAJ;", "orcid": ";;0000-0003-4285-0495;;;;0009-0006-4566-1103;;;;", "linkedin": ";;;;;xujie-song/;;;;;https://www.linkedin.cn/incareer/in/%E6%96%87%E8%BD%A9-%E7%8E%8B-b363b9182", "or_profile": "~Yinuo_Wang5;~Likun_Wang1;~Yuxuan_Jiang1;~Wenjun_Zou1;~Tong_Liu11;~Xujie_Song1;~Liming_Xiao1;~Jiang_WU9;~Jingliang_Duan1;~Shengbo_Eben_Li2;~wenxuan_Wang4", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University;University of Science and Technology Beijing;University of Science and Technology Beijing;University of Science and Technology Beijing;Tsinghua University;Tsinghua University", "aff_domain": "mails.tsinghua.edu.cn;mail.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;mails.tsinghua.edu.cn;tsinghua.edu.cn;ustb.edu.cn;ustb.edu.cn;ustb.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": "MS student;MS student;MS student;PhD student;MS student;MS student;MS student;MS student;Associate Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\nwang2024diffusion,\ntitle={Diffusion Actor-Critic with Entropy Regulator},\nauthor={Yinuo Wang and Likun Wang and Yuxuan Jiang and Wenjun Zou and Tong Liu and Xujie Song and Wenxuan Wang and Liming Xiao and Jiang WU and Jingliang Duan and Shengbo Eben Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=l0c1j4QvTq}\n}", "github": "", "reviewers": "3PmU;xVKC;NPyG;QFqf", "pdf_size": 2573330, "rating": "5;5;6;7", "confidence": "4;3;4;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;2;3;4", "wc_summary": "74;74;54;45", "wc_strengths": "82;153;64;48", "wc_weaknesses": "132;265;425;74", "wc_questions": "621;8;37;18", "wc_limitations": "15;16;32;7", "wc_review": "924;516;612;192", "wc_reply_reviewers": "374;22;432;13", "wc_reply_authors": "713;417;2421;0", "reply_reviewers": "3;1;3;1", "reply_authors": "4;3;6;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 61.75, 12.65652005884714 ], "wc_strengths_avg": [ 86.75, 40.095978601351035 ], "wc_weaknesses_avg": [ 224.0, 135.13511756756643 ], "wc_questions_avg": [ 171.0, 260.01634564003854 ], "wc_limitations_avg": [ 17.5, 9.069178573608527 ], "wc_review_avg": [ 561.0, 261.03448048102763 ], "wc_reply_reviewers_avg": [ 210.25, 193.86383752520737 ], "wc_reply_authors_avg": [ 887.75, 920.746809660506 ], "reply_reviewers_avg": [ 2.0, 1.0 ], "reply_authors_avg": [ 3.5, 1.8027756377319946 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12542451772357749043&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "mails.tsinghua.edu.cn;mail.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;mails.tsinghua.edu.cn;tsinghua.edu.cn;ustb.edu.cn;ustb.edu.cn;ustb.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 11, "aff_unique_index": "0;0;0;0;0;0;1;1;1;0;0", "aff_unique_norm": "Tsinghua University;University of Science and Technology Beijing", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;http://www.ustb.edu.cn", "aff_unique_abbr": "THU;USTB", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Improved Sample Complexity for Multiclass PAC Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93856", "id": "l2yvtrz3On", "proceeding": "", "pdf": "https://openreview.net/pdf?id=l2yvtrz3On", "openreview": "https://openreview.net/forum?id=l2yvtrz3On", "poster": "/media/PosterPDFs/NeurIPS%202024/93856.png?t=1733738668.018757", "project": "", "author_site": "Steve Hanneke, Shay Moran, Qian Zhang", "tldr": "", "abstract": "We aim to understand the optimal PAC sample complexity in multiclass learning. While finiteness of the Daniely-Shalev-Shwartz (DS) dimension has been shown to characterize the PAC learnability of a concept class [Brukhim, Carmon, Dinur, Moran, and Yehudayoff, 2022], there exist polylog factor gaps in the leading term of the sample complexity. In this paper, we reduce the gap in terms of the dependence on the error parameter to a single log factor and also propose two possible routes towards completely resolving the optimal sample complexity, each based on a key open question we formulate: one concerning list learning with bounded list size, the other concerning a new type of shifting for multiclass concept classes. We prove that a positive answer to either of the two questions would completely resolve the optimal sample complexity up to log factors of the DS dimension.", "keywords": "Multiclass learning;PAC learning;Statistical learning;List learning", "primary_area": "learning_theory", "supplementary_material": "", "author": "Steve Hanneke;Shay Moran;Qian Zhang", "authorids": "~Steve_Hanneke1;~Shay_Moran1;~Qian_Zhang10", "gender": "M;M;M", "homepage": "http://www.stevehanneke.com;http://www.cs.technion.ac.il/~shaymrn/;", "dblp": "40/154;119/5111;04/2024-67.html", "google_scholar": "fEhNO7YAAAAJ;kALYnggAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;", "linkedin": ";;", "or_profile": "~Steve_Hanneke1;~Shay_Moran1;~Qian_Zhang10", "aff": "Purdue University;Google;Purdue University", "aff_domain": "purdue.edu;google.com;purdue.edu", "position": "Assistant Professor;Visiting Faculty;PhD student", "bibtex": "@inproceedings{\nhanneke2024improved,\ntitle={Improved Sample Complexity for Multiclass {PAC} Learning},\nauthor={Steve Hanneke and Shay Moran and Qian Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=l2yvtrz3On}\n}", "github": "", "reviewers": "aJcM;wmES;AhgM;s6iC;jEdN", "pdf_size": 545683, "rating": "5;5;6;7;7", "confidence": "3;4;4;3;3", "soundness": "3;3;3;4;3", "novelty": "3;2;3;3;3", "presentation": "1;2;3;3;3", "wc_summary": "138;23;83;226;185", "wc_strengths": "47;34;87;53;146", "wc_weaknesses": "133;49;132;103;125", "wc_questions": "67;50;7;2;2", "wc_limitations": "1;1;12;1;2", "wc_review": "386;157;321;385;460", "wc_reply_reviewers": "168;10;11;19;0", "wc_reply_authors": "21;6;12;6;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "2;2;2;2;1", "rating_avg": [ 6.0, 0.8944271909999159 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.4, 0.8 ], "wc_summary_avg": [ 131.0, 72.05275844823709 ], "wc_strengths_avg": [ 73.4, 40.30186099921442 ], "wc_weaknesses_avg": [ 108.4, 31.607594024221456 ], "wc_questions_avg": [ 25.6, 27.45614685275412 ], "wc_limitations_avg": [ 3.4, 4.31740662898458 ], "wc_review_avg": [ 341.8, 102.34725203931954 ], "wc_reply_reviewers_avg": [ 41.6, 63.4873215689558 ], "wc_reply_authors_avg": [ 9.0, 7.0992957397195395 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.45643546458763845, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:V3RQutmlN38J:scholar.google.com/&scioq=Improved+Sample+Complexity+for+Multiclass+PAC+Learning&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "purdue.edu;google.com;purdue.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Purdue University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.purdue.edu;https://www.google.com", "aff_unique_abbr": "Purdue;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Parameter Competition Balancing for Model Merging", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93855", "id": "l5SbrtvSRS", "proceeding": "", "pdf": "https://openreview.net/pdf?id=l5SbrtvSRS", "openreview": "https://openreview.net/forum?id=l5SbrtvSRS", "poster": "/media/PosterPDFs/NeurIPS%202024/93855.png?t=1733492207.923188", "project": "", "author_site": "Guodong DU, Junlin Lee, Jing Li, Runhua Jiang, Yifei Guo, Shuyang Yu, Hanting Liu, Sim Kuan Goh, Ho-Kin Tang, Daojing He, Min Zhang", "tldr": "", "abstract": "While fine-tuning pretrained models has become common practice, these models often underperform outside their specific domains. Recently developed model merging techniques enable the direct integration of multiple models, each fine-tuned for distinct tasks, into a single model. This strategy promotes multitasking capabilities without requiring retraining on the original datasets. However, existing methods fall short in addressing potential conflicts and complex correlations between tasks, especially in parameter-level adjustments, posing a challenge in effectively balancing parameter competition across various tasks. This paper introduces an innovative technique named **PCB-Merging** (Parameter Competition Balancing), a *lightweight* and *training-free* technique that adjusts the coefficients of each parameter for effective model merging. PCB-Merging employs intra-balancing to gauge parameter significance within individual tasks and inter-balancing to assess parameter similarities across different tasks. Parameters with low importance scores are dropped, and the remaining ones are rescaled to form the final merged model. We assessed our approach in diverse merging scenarios, including cross-task, cross-domain, and cross-training configurations, as well as out-of-domain generalization. The experimental results reveal that our approach achieves substantial performance enhancements across multiple modalities, domains, model sizes, number of tasks, fine-tuning forms, and large language models, outperforming existing model merging methods.", "keywords": "Model Merging;Knowledge Fusion;Model Editing;Task Arithmetic;Robust Fine-tuning", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/ea14e3ed37db8b74c9bb0d015d397727110dc79c.zip", "author": "Guodong DU;Junlin Lee;Jing Li;Runhua Jiang;Yifei Guo;Shuyang Yu;Hanting Liu;Sim Kuan Goh;Ho-Kin Tang;Daojing He;Min Zhang", "authorids": "~Guodong_DU2;~Junlin_Lee1;~Jing_Li19;~Runhua_Jiang2;~Yifei_Guo2;~Shuyang_Yu2;~Hanting_Liu1;~Sim_Kuan_Goh2;~Ho-Kin_Tang1;~Daojing_He1;~Min_Zhang9", "gender": "M;M;M;M;F;F;F;M;M;M;M", "homepage": "https://duguodong7.github.io;https://github.com/Leejunlin28;https://www.li-jing.com;;;;;https://simkuangoh.github.io/;http://faculty.hitsz.edu.cn/denghaojian;http://faculty.hitsz.edu.cn/hedaojing;https://zhangmin-nlp-ai.github.io/", "dblp": "213/8915-4;;l/JingLi34;;;;;152/7784;;60/7270;83/5342-5", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;https://scholar.google.com.sg/citations?user=2QxEwWsAAAAJ;;;sfFLGycAAAAJ;https://scholar.google.com/citations?hl=en;;https://scholar.google.com.hk/citations?user=a4yjxI4AAAAJ;;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";;;;;;;;0000-0002-8378-815X;0000-0002-3820-8128;", "linkedin": "\u56fd\u4e1c-\u675c-30b496169/;;;runhua-jiang-295004261/?originalSubdomain=my;yifeiguo118/;shuyang-yu;hanting-liu-075b44218/;;;;", "or_profile": "~Guodong_DU2;~Junlin_Lee1;~Jing_Li19;~Runhua_Jiang2;~Yifei_Guo2;~Shuyang_Yu2;~Hanting_Liu1;~Sim_Kuan_Goh2;~Ho-Kin_Tang1;~Daojing_He1;~Min_Zhang9", "aff": "Harbin Institute of Technology;Harbin Institute of Technology, Shenzhen;Harbin Institute of Technology;Xiamen University;Xiamen University Malaysia ;Xiamen University Malaysia;Xiamen University;Xiamen University Malaysia;Harbin Institute of Technology;Harbin Institute of Technology;Harbin Institute of Technology, Shenzhen", "aff_domain": "hit.edu.cn;stu.hit.edu.cn;hit.edu.cn;xmu.edu.cn;xmu.edu.my;xmu.edu.my;xmu.edu.cn;xmu.edu.cn;hit.edu.cn;hit.edu.cn;hit.edu.cn", "position": "Researcher;MS student;Full Professor;Undergrad student;Undergrad student;Undergrad student;Undergrad student;Assistant Professor;Associate Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\ndu2024parameter,\ntitle={Parameter Competition Balancing for Model Merging},\nauthor={Guodong DU and Junlin Lee and Jing Li and Runhua Jiang and Yifei Guo and Shuyang Yu and Hanting Liu and Sim Kuan Goh and Ho-Kin Tang and Daojing He and Min Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=l5SbrtvSRS}\n}", "github": "", "reviewers": "6PSY;Zv3Q;28gt;s8Ho", "pdf_size": 673806, "rating": "5;6;6;7", "confidence": "5;4;4;4", "soundness": "3;3;3;3", "novelty": "3;2;3;4", "presentation": "4;3;2;3", "wc_summary": "72;21;27;27", "wc_strengths": "54;12;85;50", "wc_weaknesses": "169;25;6;92", "wc_questions": "4;80;112;14", "wc_limitations": "1;1;44;14", "wc_review": "300;139;274;197", "wc_reply_reviewers": "16;10;32;0", "wc_reply_authors": "27;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 36.75, 20.498475553074673 ], "wc_strengths_avg": [ 50.25, 25.907286619790966 ], "wc_weaknesses_avg": [ 73.0, 63.97265040624782 ], "wc_questions_avg": [ 52.5, 45.08602887813474 ], "wc_limitations_avg": [ 15.0, 17.564168070250297 ], "wc_review_avg": [ 227.5, 63.602279833351886 ], "wc_reply_reviewers_avg": [ 14.5, 11.6081867662439 ], "wc_reply_authors_avg": [ 6.75, 11.691342951089922 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9788361150401734231&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "hit.edu.cn;stu.hit.edu.cn;hit.edu.cn;xmu.edu.cn;xmu.edu.my;xmu.edu.my;xmu.edu.cn;xmu.edu.cn;hit.edu.cn;hit.edu.cn;hit.edu.cn", "author_num": 11, "aff_unique_index": "0;0;0;1;1;1;1;1;0;0;0", "aff_unique_norm": "Harbin Institute of Technology;Xiamen University", "aff_unique_dep": ";", "aff_unique_url": "http://www.hit.edu.cn/;https://www.xmu.edu.cn", "aff_unique_abbr": "HIT;XMU", "aff_campus_unique_index": "0;1;0;3;3;3;0;0;1", "aff_campus_unique": "Harbin;Shenzhen;;Malaysia", "aff_country_unique_index": "0;0;0;0;1;1;0;1;0;0;0", "aff_country_unique": "China;Malaysia" }, { "title": "Towards the Transferability of Rewards Recovered via Regularized Inverse Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93854", "id": "l5wEQPcDab", "proceeding": "", "pdf": "https://openreview.net/pdf?id=l5wEQPcDab", "openreview": "https://openreview.net/forum?id=l5wEQPcDab", "poster": "/media/PosterPDFs/NeurIPS%202024/93854.png?t=1733879500.225345", "project": "", "author_site": "Andreas Schlaginhaufen, Maryam Kamgarpour", "tldr": "", "abstract": "Inverse reinforcement learning (IRL) aims to infer a reward from expert demonstrations, motivated by the idea that the reward, rather than the policy, is the most succinct and transferable description of a task [Ng et al., 2000]. However, the reward corresponding to an optimal policy is not unique, making it unclear if an IRL-learned reward is transferable to new transition laws in the sense that its optimal policy aligns with the optimal policy corresponding to the expert's true reward. Past work has addressed this problem only under the assumption of full access to the expert's policy, guaranteeing transferability when learning from two experts with the same reward but different transition laws that satisfy a specific rank condition [Rolland et al., 2022]. In this work, we show that the conditions developed under full access to the expert's policy cannot guarantee transferability in the more practical scenario where we have access only to demonstrations of the expert. Instead of a binary rank condition, we propose principal angles as a more refined measure of similarity and dissimilarity between transition laws. Based on this, we then establish two key results: 1) a sufficient condition for transferability to any transition laws when learning from at least two experts with sufficiently different transition laws, and 2) a sufficient condition for transferability to local changes in the transition law when learning from a single expert. Furthermore, we also provide a probably approximately correct (PAC) algorithm and an end-to-end analysis for learning transferable rewards from demonstrations of multiple experts.", "keywords": "Inverse reinforcement learning;Transferability;Identifiability;Robustness;Alignment", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/3d0f3744530a5a023386fa8947d2547a0d17133c.zip", "author": "Andreas Schlaginhaufen;Maryam Kamgarpour", "authorids": "~Andreas_Schlaginhaufen1;~Maryam_Kamgarpour1", "gender": "M;F", "homepage": ";https://www.epfl.ch/labs/sycamore/", "dblp": ";https://dblp.org/pers/k/Kamgarpour:Maryam.html", "google_scholar": ";https://scholar.google.ch/citations?user=m6YgGqAAAAAJ", "orcid": ";", "linkedin": "andreas-schlaginhaufen/;", "or_profile": "~Andreas_Schlaginhaufen1;~Maryam_Kamgarpour1", "aff": "EPFL - EPF Lausanne;EPFL - EPF Lausanne", "aff_domain": "epfl.ch;epfl.ch", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nschlaginhaufen2024towards,\ntitle={Towards the Transferability of Rewards Recovered via Regularized Inverse Reinforcement Learning},\nauthor={Andreas Schlaginhaufen and Maryam Kamgarpour},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=l5wEQPcDab}\n}", "github": "", "reviewers": "jeG2;hiXS;k5eY;fi4j", "pdf_size": 1386481, "rating": "5;6;7;8", "confidence": "4;4;5;4", "soundness": "3;4;4;3", "novelty": "3;3;3;3", "presentation": "4;2;4;3", "wc_summary": "129;119;89;290", "wc_strengths": "177;117;76;185", "wc_weaknesses": "160;222;39;272", "wc_questions": "179;67;174;74", "wc_limitations": "12;60;15;10", "wc_review": "657;585;393;831", "wc_reply_reviewers": "12;122;55;30", "wc_reply_authors": "0;0;203;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;3;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 156.75, 78.32743772140131 ], "wc_strengths_avg": [ 138.75, 44.75698269544094 ], "wc_weaknesses_avg": [ 173.25, 87.07288613569669 ], "wc_questions_avg": [ 123.5, 53.08719242905957 ], "wc_limitations_avg": [ 24.25, 20.716840975399702 ], "wc_review_avg": [ 616.5, 156.99920381963724 ], "wc_reply_reviewers_avg": [ 54.75, 41.72154719087009 ], "wc_reply_authors_avg": [ 50.75, 87.90157848412052 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.2581988897471611, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8994830714902179519&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "epfl.ch;epfl.ch", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "EPFL", "aff_unique_dep": "", "aff_unique_url": "https://www.epfl.ch", "aff_unique_abbr": "EPFL", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Lausanne", "aff_country_unique_index": "0;0", "aff_country_unique": "Switzerland" }, { "title": "Practical $0.385$-Approximation for Submodular Maximization Subject to a Cardinality Constraint", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93853", "id": "l6iICoILGB", "proceeding": "", "pdf": "https://openreview.net/pdf?id=l6iICoILGB", "openreview": "https://openreview.net/forum?id=l6iICoILGB", "poster": "/media/PosterPDFs/NeurIPS%202024/93853.png?t=1731623536.999788", "project": "", "author_site": "Morad Tukan, Loay Mualem, Moran Feldman", "tldr": "", "abstract": "Non-monotone constrained submodular maximization plays a crucial role in various machine learning applications. However, existing algorithms often struggle with a trade-off between approximation guarantees and practical efficiency. The current state-of-the-art is a recent $0.401$-approximation algorithm, but its computational complexity makes it highly impractical. The best practical algorithms for the problem only guarantee $1/e$-approximation. In this work, we present a novel algorithm for submodular maximization subject to a cardinality constraint that combines a guarantee of $0.385$-approximation with a low and practical query complexity of $O(n+k^2)$. Furthermore, we evaluate our algorithm's performance through extensive machine learning applications, including Movie Recommendation, Image Summarization, and more. These evaluations demonstrate the efficacy of our approach.", "keywords": "Submodular maximization;Discrete optimization;Machine learning", "primary_area": "optimization", "supplementary_material": "", "author": "Murad Tukan;Loay Mualem;Moran Feldman", "authorids": "~Murad_Tukan1;~Loay_Mualem2;~Moran_Feldman1", "gender": "M;M;", "homepage": ";;https://cs.haifa.ac.il/~moranfe/", "dblp": "259/0724;293/7129.html;41/771", "google_scholar": ";;https://scholar.google.co.il/citations?hl=en", "orcid": ";;0000-0002-1535-2979", "linkedin": ";;", "or_profile": "~Murad_Tukan1;~Loay_Mualem2;~Moran_Feldman1", "aff": "Dataheroes;University of Haifa;University of Haifa", "aff_domain": "dataheroes.ai;haifa.ac.il;haifa.ac.il", "position": "Researcher;PhD student;Associate Professor", "bibtex": "@inproceedings{\ntukan2024practical,\ntitle={Practical \\$0.385\\$-Approximation for Submodular Maximization Subject to a Cardinality Constraint},\nauthor={Murad Tukan and Loay Mualem and Moran Feldman},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=l6iICoILGB}\n}", "github": "", "reviewers": "yvAe;2jpf;q8Tb;USjH", "pdf_size": 600101, "rating": "5;6;6;7", "confidence": "4;2;4;5", "soundness": "2;3;2;4", "novelty": "2;3;2;3", "presentation": "2;2;4;3", "wc_summary": "66;70;293;74", "wc_strengths": "52;57;45;160", "wc_weaknesses": "76;66;246;77", "wc_questions": "4;147;30;134", "wc_limitations": "1;31;1;1", "wc_review": "199;371;615;446", "wc_reply_reviewers": "11;62;27;10", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 125.75, 96.60324787500677 ], "wc_strengths_avg": [ 78.5, 47.24669300596604 ], "wc_weaknesses_avg": [ 116.25, 75.0345753636282 ], "wc_questions_avg": [ 78.75, 62.599420923839226 ], "wc_limitations_avg": [ 8.5, 12.99038105676658 ], "wc_review_avg": [ 407.75, 149.45128805065548 ], "wc_reply_reviewers_avg": [ 27.5, 21.02974084481309 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3244428422615251, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13107076841472227166&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 3, "email": "dataheroes.ai;haifa.ac.il;haifa.ac.il", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Dataheroes;University of Haifa", "aff_unique_dep": ";", "aff_unique_url": ";https://www.haifa.ac.il", "aff_unique_abbr": ";UoH", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1;1", "aff_country_unique": ";Israel" }, { "title": "MambaLLIE: Implicit Retinex-Aware Low Light Enhancement with Global-then-Local State Space", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93852", "id": "l6xVqzm72i", "proceeding": "", "pdf": "https://openreview.net/pdf?id=l6xVqzm72i", "openreview": "https://openreview.net/forum?id=l6xVqzm72i", "poster": "/media/PosterPDFs/NeurIPS%202024/93852.png?t=1731563582.1257293", "project": "", "author_site": "Jiangwei Weng, Zhiqiang Yan, Ying Tai, Jianjun Qian, Jian Yang, Jun Li", "tldr": "", "abstract": "Recent advances in low light image enhancement have been dominated by Retinex-based learning framework, leveraging convolutional neural networks (CNNs) and Transformers. However, the vanilla Retinex theory primarily addresses global illumination degradation and neglects local issues such as noise and blur in dark conditions. Moreover, CNNs and Transformers struggle to capture global degradation due to their limited receptive fields. While state space models (SSMs) have shown promise in the long-sequence modeling, they face challenges in combining local invariants and global context in visual data. In this paper, we introduce MambaLLIE, an implicit Retinex-aware low light enhancer featuring a global-then-local state space design. We first propose a Local-Enhanced State Space Module (LESSM) that incorporates an augmented local bias within a 2D selective scan mechanism, enhancing the original SSMs by preserving local 2D dependency. Additionally, an Implicit Retinex-aware Selective Kernel module (IRSK) dynamically selects features using spatially-varying operations, adapting to varying inputs through an adaptive kernel selection process. Our Global-then-Local State Space Block (GLSSB) integrates LESSM and IRSK with layer normalization (LN) as its core. This design enables MambaLLIE to achieve comprehensive global long-range modeling and flexible local feature aggregation. Extensive experiments demonstrate that MambaLLIE significantly outperforms state-of-the-art CNN and Transformer-based methods. Our code is available at https://github.com/wengjiangwei/MambaLLIE.", "keywords": "Low Light Enhancement;State Space Models;Feature Control", "primary_area": "machine_vision", "supplementary_material": "/attachment/7614fe2adeff99c6083bb41f73ca3e8b76498b64.zip", "author": "Jiangwei Weng;Zhiqiang Yan;Ying Tai;Jianjun Qian;Jian Yang;Jun Li", "authorids": "~Jiangwei_Weng1;~Zhiqiang_Yan1;~Ying_Tai1;~Jianjun_Qian2;~Jian_Yang1;~Jun_Li16", "gender": "M;M;M;M;M;M", "homepage": "http://wengjiangwei.github.io;https://yanzq95.github.io/;https://tyshiwo.github.io/;http://www.patternrecognition.asia/qian/;;", "dblp": "349/1730.html;;158/1384;03/3289;y/JianYang3.html;", "google_scholar": "https://scholar.google.com.hk/citations?user=6aOtZU4AAAAJ;hnrkzIEAAAAJ;NKaiUasAAAAJ;;https://scholar.google.com.hk/citations?user=6CIDtZQAAAAJ;iGPEwQsAAAAJ", "orcid": ";0000-0003-3502-438X;;0000-0002-0968-8556;;", "linkedin": ";;;;;", "or_profile": "~Jiangwei_Weng1;~Zhiqiang_Yan1;~Ying_Tai1;~Jianjun_Qian2;~Jian_Yang1;~Jun_Li16", "aff": "Nanjing University of Science and Technology;Nanjing University of Science and Technology;Nanjing University;Nanjing University of Science and Techonology;Nanjing University of Science and Technology;Nanjing University of Science and Technology", "aff_domain": "njust.edu.cn;njust.edu.cn;nju.edu.cn;njust.edu.cn;njust.edu.cn;njust.edu.cn", "position": "PhD student;PhD student;Associate Professor;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nweng2024mamballie,\ntitle={Mamba{LLIE}: Implicit Retinex-Aware Low Light Enhancement with Global-then-Local State Space},\nauthor={Jiangwei Weng and Zhiqiang Yan and Ying Tai and Jianjun Qian and Jian Yang and Jun Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=l6xVqzm72i}\n}", "github": "", "reviewers": "1bAi;ahrr;DUEm;1rUY", "pdf_size": 0, "rating": "4;4;6;8", "confidence": "5;5;5;5", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "59;54;68;74", "wc_strengths": "27;46;45;176", "wc_weaknesses": "189;151;62;247", "wc_questions": "21;23;59;15", "wc_limitations": "9;11;19;11", "wc_review": "305;285;253;523", "wc_reply_reviewers": "0;92;0;100", "wc_reply_authors": "77;440;0;155", "reply_reviewers": "0;1;0;1", "reply_authors": "2;3;1;2", "rating_avg": [ 5.5, 1.6583123951777 ], "confidence_avg": [ 5.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 63.75, 7.75806032459145 ], "wc_strengths_avg": [ 73.5, 59.659450215368224 ], "wc_weaknesses_avg": [ 162.25, 67.22118341713421 ], "wc_questions_avg": [ 29.5, 17.284386017443605 ], "wc_limitations_avg": [ 12.5, 3.840572873934304 ], "wc_review_avg": [ 341.5, 106.41780866001704 ], "wc_reply_reviewers_avg": [ 48.0, 48.08326112068523 ], "wc_reply_authors_avg": [ 168.0, 166.32648616501223 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2050106596750682354&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "njust.edu.cn;njust.edu.cn;nju.edu.cn;njust.edu.cn;njust.edu.cn;njust.edu.cn", "author_num": 6, "aff_unique_index": "0;0;1;0;0;0", "aff_unique_norm": "Nanjing University of Science and Technology;Nanjing University", "aff_unique_dep": ";", "aff_unique_url": "http://www.nust.edu.cn/;https://www.nju.edu.cn", "aff_unique_abbr": "NUST;Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Toward Conditional Distribution Calibration in Survival Prediction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93851", "id": "l8XnqbQYBK", "proceeding": "", "pdf": "https://openreview.net/pdf?id=l8XnqbQYBK", "openreview": "https://openreview.net/forum?id=l8XnqbQYBK", "poster": "/media/PosterPDFs/NeurIPS%202024/93851.png?t=1732823415.0879714", "project": "", "author_site": "Shi-ang Qi, Yakun Yu, Russell Greiner", "tldr": "", "abstract": "Survival prediction often involves estimating the time-to-event distribution from censored datasets. Previous approaches have focused on enhancing discrimination and marginal calibration. In this paper, we highlight the significance of *conditional calibration* for real-world applications \u2013 especially its role in individual decision-making. We propose a method based on conformal prediction that uses the model\u2019s predicted individual survival probability at that instance\u2019s observed time. This method effectively improves the model\u2019s marginal and conditional calibration, without compromising discrimination. We provide asymptotic theoretical guarantees for both marginal and conditional calibration and test it extensively across 15 diverse real-world datasets, demonstrating the method\u2019s practical effectiveness and\nversatility in various settings.", "keywords": "survival analysis; calibration; conformal prediction; censorship; discrimination", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "/attachment/94dd887e4b1d9cde90ef738657ea1c65e44f6979.zip", "author": "Shi-ang Qi;Yakun Yu;Russell Greiner", "authorids": "~Shi-ang_Qi1;~Yakun_Yu1;~Russell_Greiner2", "gender": "M;F;M", "homepage": "https://shi-ang.github.io/;;https://webdocs.cs.ualberta.ca/~rgreiner/", "dblp": "229/0946.html;;g/RussellGreiner", "google_scholar": "https://scholar.google.ca/citations?user=EcXcCD4AAAAJ;;https://scholar.google.com.tw/citations?user=Rn7APGIAAAAJ", "orcid": "0000-0002-4319-5501;0000-0003-4571-1570;0000-0001-8327-934X", "linkedin": "shi-ang-qi-236819197/;;", "or_profile": "~Shi-ang_Qi1;~Yakun_Yu1;~Russell_Greiner1", "aff": "University of Alberta;University of Alberta;University of Alberta", "aff_domain": "cs.ualberta.ca;ualberta.ca;ualberta.ca", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nqi2024toward,\ntitle={Toward Conditional Distribution Calibration in Survival Prediction},\nauthor={Shi-ang Qi and Yakun Yu and Russell Greiner},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=l8XnqbQYBK}\n}", "github": "", "reviewers": "b9k1;rRGm;a6Gj;kAoa", "pdf_size": 27572108, "rating": "3;5;7;7", "confidence": "4;3;4;4", "soundness": "1;3;3;4", "novelty": "2;2;2;3", "presentation": "2;2;3;3", "wc_summary": "15;90;116;98", "wc_strengths": "15;97;112;107", "wc_weaknesses": "704;137;265;130", "wc_questions": "1;62;184;112", "wc_limitations": "10;16;1;17", "wc_review": "745;402;678;464", "wc_reply_reviewers": "0;30;67;99", "wc_reply_authors": "0;0;0;106", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;2", "rating_avg": [ 5.5, 1.6583123951777 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 1.0897247358851685 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 79.75, 38.55110244856819 ], "wc_strengths_avg": [ 82.75, 39.486548342441885 ], "wc_weaknesses_avg": [ 309.0, 234.3000213401612 ], "wc_questions_avg": [ 89.75, 67.12814238454689 ], "wc_limitations_avg": [ 11.0, 6.363961030678928 ], "wc_review_avg": [ 572.25, 142.94120294722583 ], "wc_reply_reviewers_avg": [ 49.0, 37.36977388210959 ], "wc_reply_authors_avg": [ 26.5, 45.89934640057525 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:sRc2QaMxAkEJ:scholar.google.com/&scioq=Toward+Conditional+Distribution+Calibration+in+Survival+Prediction&hl=en&as_sdt=0,44", "gs_version_total": 3, "email": "cs.ualberta.ca;ualberta.ca;ualberta.ca", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Alberta", "aff_unique_dep": "", "aff_unique_url": "https://www.ualberta.ca", "aff_unique_abbr": "UAlberta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Canada" }, { "id": "l985bXCatk", "title": "LRVS-Fashion: Extending Visual Search with Referring Instructions", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "This paper introduces a new challenge for image similarity search in the context of fashion, addressing the inherent ambiguity in this domain stemming from complex images. We present Referred Visual Search (RVS), a task allowing users to define more precisely the desired similarity, following recent interest in the industry. We release a new large public dataset, LRVS-Fashion, consisting of 272k fashion products with 842k images extracted from fashion catalogs, designed explicitly for this task. However, unlike traditional visual search methods in the industry, we demonstrate that superior performance can be achieved by bypassing explicit object detection and adopting weakly-supervised conditional contrastive learning on image tuples. Our method is lightweight and demonstrates robustness, reaching Recall at one superior to strong detection-based baselines against 2M distractors.", "keywords": "Visual Search;Image Embedding;Retrieval", "primary_area": "", "supplementary_material": "/attachment/081579b5cc85731f69175475531953a5732174af.pdf", "author": "Simon Lepage;Jeremie Mary;David Picard", "authorids": "~Simon_Lepage1;~Jeremie_Mary1;~David_Picard1", "gender": "M;M;", "homepage": "https://simon-lepage.github.io;;", "dblp": "348/9757;17/3180;", "google_scholar": ";https://scholar.google.fr/citations?user=T3dQRjAAAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Simon_Lepage1;~Jeremie_Mary1;~David_Picard1", "aff": "Criteo;Inria;", "aff_domain": "criteo.com;inria.fr;", "position": "PhD student;Associate Professor;", "bibtex": "@misc{\nanonymous2024lrvsfashion,\ntitle={{LRVS}-Fashion: Extending Visual Search with Referring Instructions},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=l985bXCatk}\n}", "github": "", "project": "", "reviewers": "UHox;rUM1;3aj4", "site": "https://openreview.net/forum?id=l985bXCatk", "pdf_size": 2449131, "rating": "5;6;7", "confidence": "3;4;4", "wc_summary_and_contributions": "58;79;125", "wc_strengths": "37;53;40", "wc_improvement": "305;32;180", "wc_limitations": "40;43;16", "wc_correctness": "3;25;40", "wc_clarity": "4;14;1", "wc_relation_to_prior_work": "55;80;1", "wc_documentation": "36;37;30", "wc_additional_feedback": "1;1;1", "wc_review": "539;364;434", "wc_reply_reviewers": "77;0;0", "wc_reply_authors": "275;0;0", "reply_reviewers": "1;0;0", "reply_authors": "2;1;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 87.33333333333333, 27.980151695244412 ], "wc_strengths_avg": [ 43.333333333333336, 6.944222218666553 ], "wc_improvement_avg": [ 172.33333333333334, 111.58355115736768 ], "wc_limitations_avg": [ 33.0, 12.083045973594572 ], "wc_correctness_avg": [ 22.666666666666668, 15.195028426721976 ], "wc_clarity_avg": [ 6.333333333333333, 5.557777333511022 ], "wc_relation_to_prior_work_avg": [ 45.333333333333336, 32.96799795087486 ], "wc_documentation_avg": [ 34.333333333333336, 3.0912061651652345 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 445.6666666666667, 71.91816336797139 ], "wc_reply_reviewers_avg": [ 25.666666666666668, 36.29814810090944 ], "wc_reply_authors_avg": [ 91.66666666666667, 129.63624321753372 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16667221937806636819&as_sdt=8000005&sciodt=0,19&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Criteo;INRIA", "aff_unique_dep": ";", "aff_unique_url": "https://www.criteo.com;https://www.inria.fr", "aff_unique_abbr": "Criteo;Inria", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "France" }, { "title": "QUEST: Quadruple Multimodal Contrastive Learning with Constraints and Self-Penalization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93850", "id": "lA48H7pW3q", "proceeding": "", "pdf": "https://openreview.net/pdf?id=lA48H7pW3q", "openreview": "https://openreview.net/forum?id=lA48H7pW3q", "poster": "/media/PosterPDFs/NeurIPS%202024/93850.png?t=1731490956.0846884", "project": "", "author_site": "Qi Song, Tianxiang Gong, Shiqi Gao, Haoyi Zhou, Jianxin Li", "tldr": "", "abstract": "Multimodal contrastive learning (MCL) has recently demonstrated significant success across various tasks. However, the existing MCL treats all negative samples equally and ignores the potential semantic association with positive samples, which limits the model's ability to achieve fine-grained alignment. In multi-view scenarios, MCL tends to prioritize shared information while neglecting modality-specific unique information across different views, leading to feature suppression and suboptimal performance in downstream tasks. To address these limitations, we propose a novel contrastive framework name *QUEST: Quadruple Multimodal Contrastive Learning with Constraints and Self-Penalization*. In the QUEST framework, we propose quaternion contrastive objectives and orthogonal constraints to extract sufficient unique information. Meanwhile, a shared information-guided penalization is introduced to ensure that shared information does not excessively influence the optimization of unique information. Our method leverages quaternion vector spaces to simultaneously optimize shared and unique information. Experiments on multiple datasets show that our method achieves superior performance in multimodal contrastive learning benchmarks. On public benchmark, our approach achieves state-of-the-art performance, and on synthetic shortcut datasets, we outperform existing baseline methods by an average of 97.95\\% on the CLIP model.", "keywords": "Contrastive Learning;Multi-View Learning;Multimodal Learning;Vision-Language Representation Degeneration", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/8d3204bdc96772a07a1531ee3879d0acdc6f5668.zip", "author": "Qi Song;Tianxiang Gong;Shiqi Gao;Haoyi Zhou;Jianxin Li", "authorids": "~Qi_Song9;~Tianxiang_Gong1;~Shiqi_Gao2;~Haoyi_Zhou1;~Jianxin_Li3", "gender": "M;M;M;M;M", "homepage": "https://github.com/Peterwatwec;https://www.gongtx.org;https://orcid.org/0009-0007-1483-5550;https://www.zhouhaoyi.com/;http://myjianxin.github.io", "dblp": ";;;162/1287;l/JianxinLi-2.html", "google_scholar": ";;;mbrFlN0AAAAJ;EY2lqD0AAAAJ", "orcid": ";0009-0007-1374-1237;0009-0007-1483-5550;0000-0002-2393-3634;0000-0001-5152-0055", "linkedin": ";;;haoyi-zhou-54a7a69a/;", "or_profile": "~Qi_Song9;~Tianxiang_Gong1;~Shiqi_Gao2;~Haoyi_Zhou1;~Jianxin_Li3", "aff": "The Insititute of Advanced Computing Technology, Beijing University of Aeronautics and Astronautics;Beihang University;Beihang University;Beihang University;Beihang University ", "aff_domain": "act.buaa.edu.cn;buaa.edu.cn;buaa.edu.cn;buaa.edu.cn;buaa.edu.cn", "position": "MS student;MS student;PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nsong2024quest,\ntitle={{QUEST}: Quadruple Multimodal Contrastive Learning with Constraints and Self-Penalization},\nauthor={Qi Song and Tianxiang Gong and Shiqi Gao and Haoyi Zhou and Jianxin Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=lA48H7pW3q}\n}", "github": "", "reviewers": "9n5E;KzFg;zJ9C", "pdf_size": 3440178, "rating": "7;7;8", "confidence": "4;3;4", "soundness": "4;2;3", "novelty": "3;2;3", "presentation": "4;2;3", "wc_summary": "75;57;95", "wc_strengths": "28;70;99", "wc_weaknesses": "42;71;106", "wc_questions": "41;21;20", "wc_limitations": "11;21;16", "wc_review": "197;240;336", "wc_reply_reviewers": "27;21;29", "wc_reply_authors": "37;52;21", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 7.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 75.66666666666667, 15.520595635763755 ], "wc_strengths_avg": [ 65.66666666666667, 29.147136318265567 ], "wc_weaknesses_avg": [ 73.0, 26.166135875720485 ], "wc_questions_avg": [ 27.333333333333332, 9.672412085697939 ], "wc_limitations_avg": [ 16.0, 4.08248290463863 ], "wc_review_avg": [ 257.6666666666667, 58.105268454953574 ], "wc_reply_reviewers_avg": [ 25.666666666666668, 3.39934634239519 ], "wc_reply_authors_avg": [ 36.666666666666664, 12.657891697365017 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:GPOh7n91HxQJ:scholar.google.com/&scioq=QUEST:+Quadruple+Multimodal+Contrastive+Learning+with+Constraints+and+Self-Penalization&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "act.buaa.edu.cn;buaa.edu.cn;buaa.edu.cn;buaa.edu.cn;buaa.edu.cn", "author_num": 5, "aff_unique_index": "0;1;1;1;1", "aff_unique_norm": "Beijing University of Aeronautics and Astronautics;Beihang University", "aff_unique_dep": "The Insititute of Advanced Computing Technology;", "aff_unique_url": "http://www.buaa.edu.cn;http://www.buaa.edu.cn/", "aff_unique_abbr": "BUAA;BUAA", "aff_campus_unique_index": "0", "aff_campus_unique": "Beijing;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "TurboHopp: Accelerated Molecule Scaffold Hopping with Consistency Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93849", "id": "lBh5kuuY1L", "proceeding": "", "pdf": "https://openreview.net/pdf?id=lBh5kuuY1L", "openreview": "https://openreview.net/forum?id=lBh5kuuY1L", "poster": "/media/PosterPDFs/NeurIPS%202024/93849.png?t=1733707191.9100008", "project": "", "author_site": "Kiwoong Yoo, Owen Oertell, Junhyun Lee, Sanghoon Lee, Jaewoo Kang", "tldr": "", "abstract": "Navigating the vast chemical space of druggable compounds is a formidable challenge in drug discovery, where generative models are increasingly employed to identify viable candidates. Conditional 3D structure-based drug design (3D-SBDD) models, which take into account complex three-dimensional interactions and molecular geometries, are particularly promising. Scaffold hopping is an efficient strategy that facilitates the identification of similar active compounds by strategically modifying the core structure of molecules, effectively narrowing the wide chemical space and enhancing the discovery of drug-like products. However, the practical application of 3D-SBDD generative models is hampered by their slow processing speeds. To address this bottleneck, we introduce TurboHopp, an accelerated pocket-conditioned 3D scaffold hopping model that merges the strategic effectiveness of traditional scaffold hopping with rapid generation capabilities of consistency models. This synergy not only enhances efficiency but also significantly boosts generation speeds, achieving up to 30 times faster inference speed as well as superior generation quality compared to existing diffusion-based models, establishing TurboHopp as a powerful tool in drug discovery. Supported by faster inference speed, we further optimize our model, using Reinforcement Learning for Consistency Models (RLCM), to output desirable molecules. We demonstrate the broad applicability of TurboHopp across multiple drug discovery scenarios, underscoring its potential in diverse molecular settings.The code is provided at https://github.com/orgw/TurboHopp", "keywords": "Scaffold Hopping;Consistency Models;Diffusion Models;3D Structure-Based Drug Design;Reinforcement Learning;Drug Discovery;Generative Models", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "/attachment/03b8b017a135c929c3affa4c62c4efe71aa0603f.zip", "author": "Kiwoong Yoo;Owen Oertell;Junhyun Lee;Sanghoon Lee;Jaewoo Kang", "authorids": "~Kiwoong_Yoo1;~Owen_Oertell1;~Junhyun_Lee1;~Sanghoon_Lee5;~Jaewoo_Kang1", "gender": "M;Not Specified;M;M;M", "homepage": ";https://owenoertell.com;https://junhyunlee.com;;https://dmis.korea.ac.kr", "dblp": "368/1411;368/5285;155/8661;;k/JaewooKang", "google_scholar": ";https://scholar.google.com/citations?hl=en;kyZHNxYAAAAJ;https://scholar.google.co.kr/citations?user=HNtrvdIAAAAJ;https://scholar.google.co.kr/citations?user=RaBZafQAAAAJ", "orcid": ";;0000-0002-2385-4047;;0000-0001-6798-9106", "linkedin": "kiwoong-yoo-793924164/;;;sanghoon-lee-5b45971a7/;", "or_profile": "~Kiwoong_Yoo1;~Owen_Oertell1;~Junhyun_Lee1;~Sanghoon_Lee5;~Jaewoo_Kang1", "aff": "Aigen Sciences;Cornell University;Korea University;Korea University;Korea University", "aff_domain": "aigensciences.com;cornell.edu;korea.ac.kr;korea.ac.kr;korea.ac.kr", "position": "Principal Researcher;Undergrad student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nyoo2024turbohopp,\ntitle={TurboHopp: Accelerated Molecule Scaffold Hopping with Consistency Models},\nauthor={Kiwoong Yoo and Owen Oertell and Junhyun Lee and Sanghoon Lee and Jaewoo Kang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=lBh5kuuY1L}\n}", "github": "", "reviewers": "M9sk;VN4j;Cq9Y", "pdf_size": 4436756, "rating": "3;4;7", "confidence": "5;3;2", "soundness": "3;2;4", "novelty": "2;3;3", "presentation": "3;2;4", "wc_summary": "72;57;130", "wc_strengths": "63;44;204", "wc_weaknesses": "75;333;113", "wc_questions": "191;3;73", "wc_limitations": "23;6;11", "wc_review": "424;443;531", "wc_reply_reviewers": "0;69;94", "wc_reply_authors": "0;276;20", "reply_reviewers": "0;1;1", "reply_authors": "1;2;2", "rating_avg": [ 4.666666666666667, 1.699673171197595 ], "confidence_avg": [ 3.3333333333333335, 1.247219128924647 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 86.33333333333333, 31.47838764754143 ], "wc_strengths_avg": [ 103.66666666666667, 71.36914988673716 ], "wc_weaknesses_avg": [ 173.66666666666666, 113.72872206361163 ], "wc_questions_avg": [ 89.0, 77.58006616822821 ], "wc_limitations_avg": [ 13.333333333333334, 7.1336448530109 ], "wc_review_avg": [ 466.0, 46.611872593435535 ], "wc_reply_reviewers_avg": [ 54.333333333333336, 39.75200903378623 ], "wc_reply_authors_avg": [ 98.66666666666667, 125.65915096888973 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8910421112136307, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15579716468265202921&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "aigensciences.com;cornell.edu;korea.ac.kr;korea.ac.kr;korea.ac.kr", "author_num": 5, "aff_unique_index": "0;1;2;2;2", "aff_unique_norm": "AIGEN Sciences;Cornell University;Korea University", "aff_unique_dep": ";;", "aff_unique_url": ";https://www.cornell.edu;https://www.korea.ac.kr", "aff_unique_abbr": ";Cornell;KU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1;1", "aff_country_unique": "United States;South Korea" }, { "title": "RMLR: Extending Multinomial Logistic Regression into General Geometries", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93848", "id": "lBp2cda7sp", "proceeding": "", "pdf": "https://openreview.net/pdf?id=lBp2cda7sp", "openreview": "https://openreview.net/forum?id=lBp2cda7sp", "poster": "/media/PosterPDFs/NeurIPS%202024/93848.png?t=1729086064.6160984", "project": "", "author_site": "Ziheng Chen, Yue Song, Rui Wang, Xiaojun Wu, Nicu Sebe", "tldr": "", "abstract": "Riemannian neural networks, which extend deep learning techniques to Riemannian spaces, have gained significant attention in machine learning. To better classify the manifold-valued features, researchers have started extending Euclidean multinomial logistic regression (MLR) into Riemannian manifolds. However, existing approaches suffer from limited applicability due to their strong reliance on specific geometric properties. This paper proposes a framework for designing Riemannian MLR over general geometries, referred to as RMLR. Our framework only requires minimal geometric properties, thus exhibiting broad applicability and enabling its use with a wide range of geometries. Specifically, we showcase our framework on the Symmetric Positive Definite (SPD) manifold and special orthogonal group, i.e., the set of rotation matrices. On the SPD manifold, we develop five families of SPD MLRs under five types of power-deformed metrics. On rotation matrices we propose Lie MLR based on the popular bi-invariant metric. Extensive experiments on different Riemannian backbone networks validate the effectiveness of our framework.", "keywords": "Riemannian neural networks;Matrix manifolds;SPD manifolds;Special orthogonal groups", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Ziheng Chen;Yue Song;Rui Wang;Xiaojun Wu;Nicu Sebe", "authorids": "~Ziheng_Chen2;~Yue_Song1;~Rui_Wang39;~Xiaojun_Wu2;~Nicu_Sebe1", "gender": "M;M;M;M;M", "homepage": "https://gitzh-chen.github.io/;https://kingjamessong.github.io/;https://scholar.google.com/citations?user=5IST34sAAAAJ&hl=en;http://disi.unitn.it/~sebe/;https://github.com/GitWR", "dblp": ";11/1346;13/5168-1.html;20/3519;", "google_scholar": "47Y-sn4AAAAJ;Uza2i10AAAAJ;5IST34sAAAAJ;https://scholar.google.it/citations?user=stFCYOAAAAAJ;https://scholar.google.com.hk/citations?user=Ow1zCswAAAAJ", "orcid": "0000-0002-5366-7293;;0009-0000-0199-5001;0000-0002-6597-7248;0000-0002-9984-1752", "linkedin": "ziheng-chen-geometrier/;;;;", "or_profile": "~Ziheng_Chen2;~Yue_Song1;~Xiaojun_Wu2;~Nicu_Sebe1;~Rui_Wang14", "aff": "University of Trento;University of Trento, Italy;Jiangnan University;University of Trento;Jiangnan University", "aff_domain": "unitn.it;unitn.it;jiangnan.edu.cn;unitn.it;jiangnan.edu", "position": "PhD student;PhD student;Full Professor;Full Professor;Lecturer", "bibtex": "@inproceedings{\nchen2024rmlr,\ntitle={{RMLR}: Extending Multinomial Logistic Regression into General Geometries},\nauthor={Ziheng Chen and Yue Song and Rui Wang and Xiaojun Wu and Nicu Sebe},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=lBp2cda7sp}\n}", "github": "", "reviewers": "jvDJ;5QCn;dBzm", "pdf_size": 8727518, "rating": "7;7;8", "confidence": "4;3;4", "soundness": "4;3;4", "novelty": "4;2;4", "presentation": "4;3;3", "wc_summary": "52;26;112", "wc_strengths": "64;36;144", "wc_weaknesses": "54;54;52", "wc_questions": "86;40;29", "wc_limitations": "5;1;1", "wc_review": "261;157;338", "wc_reply_reviewers": "20;19;80", "wc_reply_authors": "28;18;17", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 7.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.3333333333333335, 0.9428090415820634 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 63.333333333333336, 36.01234356285202 ], "wc_strengths_avg": [ 81.33333333333333, 45.7626728046147 ], "wc_weaknesses_avg": [ 53.333333333333336, 0.9428090415820634 ], "wc_questions_avg": [ 51.666666666666664, 24.689178916188272 ], "wc_limitations_avg": [ 2.3333333333333335, 1.8856180831641267 ], "wc_review_avg": [ 252.0, 74.16647940051264 ], "wc_reply_reviewers_avg": [ 39.666666666666664, 28.522895287041877 ], "wc_reply_authors_avg": [ 21.0, 4.96655480858378 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14464656675341048391&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 5, "email": "unitn.it;unitn.it;jiangnan.edu.cn;unitn.it;jiangnan.edu", "author_num": 5, "aff_unique_index": "0;0;1;0;1", "aff_unique_norm": "University of Trento;Jiangnan University", "aff_unique_dep": ";", "aff_unique_url": "https://www.unitn.it;https://www.jnu.edu.cn", "aff_unique_abbr": "UniTN;JNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;1", "aff_country_unique": "Italy;China" }, { "title": "Replicable Uniformity Testing", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93847", "id": "lCiqPxcyC0", "proceeding": "", "pdf": "https://openreview.net/pdf?id=lCiqPxcyC0", "openreview": "https://openreview.net/forum?id=lCiqPxcyC0", "poster": "/media/PosterPDFs/NeurIPS%202024/93847.png?t=1732469236.6553512", "project": "", "author_site": "Sihan Liu, Christopher Ye", "tldr": "", "abstract": "Uniformity testing is arguably one of the most fundamental distribution testing problems. Given sample access to an unknown distribution $\\mathbf{p}$ on $[n]$, one must decide if $\\mathbf{p}$ is uniform or $\\varepsilon$-far from uniform (in total variation distance). A long line of work established that uniformity testing has sample complexity $\\Theta(\\sqrt{n}\\varepsilon^{-2})$. However, when the input distribution is neither uniform nor far from uniform, known algorithms may have highly non-replicable behavior. \nConsequently, if these algorithms are applied in scientific studies, they may lead to contradictory results that erode public trust in science.\n\nIn this work, we revisit uniformity testing under the framework of algorithmic replicability [STOC '22], requiring the algorithm to be replicable under arbitrary distributions. While replicability typically incurs a $\\rho^{-2}$ factor overhead in sample complexity, we obtain a replicable uniformity tester using only $\\tilde{O}(\\sqrt{n} \\varepsilon^{-2} \\rho^{-1})$ samples. To our knowledge, this is the first replicable learning algorithm with (nearly) linear dependence on $\\rho$.\n\nLastly, we consider a class of ``symmetric\" algorithms [FOCS '00] whose outputs are invariant under relabeling of the domain $[n]$, which includes all existing uniformity testers (including ours). For this natural class of algorithms, we prove a nearly matching sample complexity lower bound for replicable uniformity testing.", "keywords": "Replicability;Uniformity Testing", "primary_area": "learning_theory", "supplementary_material": "", "author": "Sihan Liu;Christopher Ye", "authorids": "~Sihan_Liu2;~Christopher_Ye1", "gender": "M;M", "homepage": "https://lteins.github.io/;https://czye17.github.io", "dblp": ";304/2086", "google_scholar": "eq7JPDgAAAAJ;WmAZ-WIAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Sihan_Liu2;~Christopher_Ye1", "aff": "Computer Science and Engineering Department, University of California, San Diego;University of California, San Diego", "aff_domain": "cse.ucsd.edu;ucsd.edu", "position": "PhD student;PhD student", "bibtex": "@inproceedings{\nliu2024replicable,\ntitle={Replicable Uniformity Testing},\nauthor={Sihan Liu and Christopher Ye},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=lCiqPxcyC0}\n}", "github": "", "reviewers": "MK1R;H8kM;g6cx;sPzn", "pdf_size": 478489, "rating": "4;5;7;8", "confidence": "5;3;4;4", "soundness": "1;3;4;4", "novelty": "1;2;3;4", "presentation": "1;3;4;3", "wc_summary": "69;123;186;159", "wc_strengths": "1;43;278;90", "wc_weaknesses": "62;15;93;69", "wc_questions": "1;1;34;9", "wc_limitations": "1;1;1;1", "wc_review": "134;183;592;328", "wc_reply_reviewers": "5;20;21;5", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 1.5811388300841898 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 1.224744871391589 ], "novelty_avg": [ 2.5, 1.118033988749895 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 134.25, 43.80282525134652 ], "wc_strengths_avg": [ 103.0, 105.82769013826201 ], "wc_weaknesses_avg": [ 59.75, 28.27874643614883 ], "wc_questions_avg": [ 11.25, 13.534677683639163 ], "wc_limitations_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 309.25, 178.1507437537099 ], "wc_reply_reviewers_avg": [ 12.75, 7.75806032459145 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.223606797749979, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:KGRn0ktE4o0J:scholar.google.com/&scioq=Replicable+Uniformity+Testing&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "cse.ucsd.edu;ucsd.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of California, San Diego", "aff_unique_dep": "Computer Science and Engineering Department", "aff_unique_url": "https://www.ucsd.edu", "aff_unique_abbr": "UCSD", "aff_campus_unique_index": "0;0", "aff_campus_unique": "San Diego", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "John Ellipsoids via Lazy Updates", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93846", "id": "lCj0Rvr4D6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=lCj0Rvr4D6", "openreview": "https://openreview.net/forum?id=lCj0Rvr4D6", "poster": "", "project": "", "author_site": "David Woodruff, Taisuke Yasuda", "tldr": "", "abstract": "We give a faster algorithm for computing an approximate John ellipsoid around $n$ points in $d$ dimensions. The best known prior algorithms are based on repeatedly computing the leverage scores of the points and reweighting them by these scores (Cohen et al., 2019). We show that this algorithm can be substantially sped up by delaying the computation of high accuracy leverage scores by using sampling, and then later computing multiple batches of high accuracy leverage scores via fast rectangular matrix multiplication. We also give low-space streaming algorithms for John ellipsoids using similar ideas.", "keywords": "John ellipsoid;sketching;sampling;fast matrix multiplication", "primary_area": "optimization", "supplementary_material": "", "author": "David Woodruff;Taisuke Yasuda", "authorids": "~David_Woodruff1;~Taisuke_Yasuda1", "gender": "M;M", "homepage": "http://www.cs.cmu.edu/~dwoodruf/;https://taisukeyasuda.github.io/", "dblp": "w/DPWoodruff;177/9741-2", "google_scholar": "https://scholar.google.com.tw/citations?user=0G2t-6sAAAAJ;c62WqiEAAAAJ", "orcid": ";", "linkedin": ";taisukeyasuda/", "or_profile": "~David_Woodruff1;~Taisuke_Yasuda1", "aff": "Carnegie Mellon University;School of Computer Science, Carnegie Mellon University", "aff_domain": "cmu.edu;cs.cmu.edu", "position": "Full Professor;PhD student", "bibtex": "@inproceedings{\nwoodruff2024john,\ntitle={John Ellipsoids via Lazy Updates},\nauthor={David Woodruff and Taisuke Yasuda},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=lCj0Rvr4D6}\n}", "github": "", "reviewers": "hiPW;wsbb;VRzA", "pdf_size": 363527, "rating": "3;6;6", "confidence": "2;3;4", "soundness": "2;2;4", "novelty": "2;2;3", "presentation": "1;2;3", "wc_summary": "30;43;118", "wc_strengths": "17;5;139", "wc_weaknesses": "37;35;293", "wc_questions": "2;74;5", "wc_limitations": "1;1;1", "wc_review": "87;158;556", "wc_reply_reviewers": "0;0;229", "wc_reply_authors": "0;0;135", "reply_reviewers": "0;0;2", "reply_authors": "1;1;2", "rating_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 2.6666666666666665, 0.9428090415820634 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.0, 0.816496580927726 ], "wc_summary_avg": [ 63.666666666666664, 38.784303812524755 ], "wc_strengths_avg": [ 53.666666666666664, 60.53832578531462 ], "wc_weaknesses_avg": [ 121.66666666666667, 121.15371320030691 ], "wc_questions_avg": [ 27.0, 33.25657829663178 ], "wc_limitations_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 267.0, 206.3992894044615 ], "wc_reply_reviewers_avg": [ 76.33333333333333, 107.95163526114627 ], "wc_reply_authors_avg": [ 45.0, 63.63961030678928 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.9428090415820634 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.8660254037844387, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:9gQkqcoXGcAJ:scholar.google.com/&scioq=John+Ellipsoids+via+Lazy+Updates&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "cmu.edu;cs.cmu.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pittsburgh", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Real-time Core-Periphery Guided ViT with Smart Data Layout Selection on Mobile Devices", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93845", "id": "lD7ziaMHbf", "proceeding": "", "pdf": "https://openreview.net/pdf?id=lD7ziaMHbf", "openreview": "https://openreview.net/forum?id=lD7ziaMHbf", "poster": "/media/PosterPDFs/NeurIPS%202024/93845.png?t=1732476779.3549318", "project": "", "author_site": "Zhihao Shu, Xiaowei Yu, Zihao Wu, Wenqi Jia, Yinchen Shi, Miao Yin, Tianming Liu, Dajiang Zhu, Wei Niu", "tldr": "", "abstract": "Mobile devices have become essential enablers for AI applications, particularly in scenarios that require real-time performance. Vision Transformer (ViT) has become a fundamental cornerstone in this regard due to its high accuracy. Recent efforts have been dedicated to developing various transformer architectures that offer im- proved accuracy while reducing the computational requirements. However, existing research primarily focuses on reducing the theoretical computational complexity through methods such as local attention and model pruning, rather than considering realistic performance on mobile hardware. Although these optimizations reduce computational demands, they either introduce additional overheads related to data transformation (e.g., Reshape and Transpose) or irregular computation/data-access patterns. These result in significant overhead on mobile devices due to their limited bandwidth, which even makes the latency worse than vanilla ViT on mobile. In this paper, we present ECP-ViT, a real-time framework that employs the core-periphery principle inspired by the brain functional networks to guide self-attention in ViTs and enable the deployment of ViT models on smartphones. We identify the main bottleneck in transformer structures caused by data transformation and propose a hardware-friendly core-periphery guided self-attention to decrease computation demands. Additionally, we design the system optimizations for intensive data transformation in pruned models. ECP-ViT, with the proposed algorithm-system co-optimizations, achieves a speedup of 4.6\u00d7 to 26.9\u00d7 on mobile GPUs across four datasets: STL-10, CIFAR100, TinyImageNet, and ImageNet.", "keywords": "Mobile DNN Acceleration;Model Pruning;ViT", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Zhihao Shu;Xiaowei Yu;Zihao Wu;Wenqi Jia;Yinchen Shi;Miao Yin;Tianming Liu;Dajiang Zhu;Wei Niu", "authorids": "~Zhihao_Shu1;~Xiaowei_Yu1;~Zihao_Wu1;~Wenqi_Jia2;~Yinchen_Shi1;~Miao_Yin1;~Tianming_Liu3;~Dajiang_Zhu1;~Wei_Niu3", "gender": "M;M;M;Not Specified;M;;M;M;M", "homepage": ";http://shawey94.github.io/;;https://github.com/BitCalSaul;https://www.linkedin.com/in/justin-shi/;https://noodle-lab.github.io/;https://cobweb.cs.uga.edu/~tliu/;https://mentis.uta.edu/explore/profile/dajiang-zhu;https://www.niuwei.info", "dblp": "369/3951;;185/2651;254/8216-3;;199/1982;96/5013-1.html;https://dblp.uni-trier.de/pers/hd/z/Zhu:Dajiang;68/828-2.html", "google_scholar": "dkxKQq0AAAAJ;Kc1FjToAAAAJ;RMvoE4sAAAAJ;3bXf6H0AAAAJ;;ILDdu98AAAAJ;92RPXm0AAAAJ;cFgudIYAAAAJ;w1RoaOMAAAAJ", "orcid": ";;0000-0001-7483-6570;0009-0007-9473-6703;;;;;", "linkedin": ";shawey94;;;;miao-yin-55ab64170/;;;", "or_profile": "~Zhihao_Shu1;~Xiaowei_Yu1;~Zihao_Wu1;~Wenqi_Jia2;~Yinchen_Shi1;~Miao_Yin1;~Tianming_Liu3;~Dajiang_Zhu1;~Wei_Niu3", "aff": "University of Georgia;University of Texas at Arlington, University of Texas at Arlington;University of Georgia;University of Texas at Arlington;Amazon;The University of Texas at Arlington;University of Georgia;University of Texas at Arlington;University of Georgia", "aff_domain": "uga.edu;mavs.uta.edu;uga.edu;uta.edu;amazon.com;uta.edu;uga.edu;uta.edu;uga.edu", "position": "PhD student;PhD student;PhD student;PhD student;Software Engineer, Machine Learning;Assistant Professor;Professor;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nshu2024realtime,\ntitle={Real-time Core-Periphery Guided ViT with Smart Data Layout Selection on Mobile Devices},\nauthor={Zhihao Shu and Xiaowei Yu and Zihao Wu and Wenqi Jia and Yinchen Shi and Miao Yin and Tianming Liu and Dajiang Zhu and Wei Niu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=lD7ziaMHbf}\n}", "github": "", "reviewers": "MoNf;wumZ;gvXM;PnBL;r1Lw", "pdf_size": 1465460, "rating": "3;4;5;6;7", "confidence": "3;3;3;5;3", "soundness": "2;3;3;4;3", "novelty": "3;2;2;3;3", "presentation": "2;3;3;3;2", "wc_summary": "57;69;148;46;109", "wc_strengths": "23;99;36;49;63", "wc_weaknesses": "30;135;61;147;94", "wc_questions": "320;150;67;22;29", "wc_limitations": "4;1;2;5;38", "wc_review": "434;454;314;269;333", "wc_reply_reviewers": "0;0;25;0;12", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;1;0;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 85.8, 37.690317058894586 ], "wc_strengths_avg": [ 54.0, 26.138094804327267 ], "wc_weaknesses_avg": [ 93.4, 43.98454273946701 ], "wc_questions_avg": [ 117.6, 110.96413835109071 ], "wc_limitations_avg": [ 10.0, 14.071247279470288 ], "wc_review_avg": [ 360.8, 71.32292758994123 ], "wc_reply_reviewers_avg": [ 7.4, 9.951884243699784 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.3535533905932738, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:AFlrvLUV8hYJ:scholar.google.com/&scioq=Real-time+Core-Periphery+Guided+ViT+with+Smart+Data+Layout+Selection+on+Mobile+Devices&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "uga.edu;mavs.uta.edu;uga.edu;uta.edu;amazon.com;uta.edu;uga.edu;uta.edu;uga.edu", "author_num": 9, "aff_unique_index": "0;1;0;1;2;1;0;1;0", "aff_unique_norm": "University of Georgia;University of Texas at Arlington;Amazon", "aff_unique_dep": ";;Amazon.com, Inc.", "aff_unique_url": "https://www.uga.edu;https://www.uta.edu;https://www.amazon.com", "aff_unique_abbr": "UGA;UTA;Amazon", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Arlington", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Quadratic Quantum Variational Monte Carlo", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93844", "id": "lDtABI541U", "proceeding": "", "pdf": "https://openreview.net/pdf?id=lDtABI541U", "openreview": "https://openreview.net/forum?id=lDtABI541U", "poster": "/media/PosterPDFs/NeurIPS%202024/93844.png?t=1733895280.8770635", "project": "", "author_site": "Baiyu Su, Qiang Liu", "tldr": "", "abstract": "This paper introduces the Quadratic Quantum Variational Monte Carlo (Q$^2$VMC) algorithm, an innovative algorithm in quantum chemistry that significantly enhances the efficiency and accuracy of solving the Schr\u00f6dinger equation. Inspired by the discretization of imaginary-time Schr\u00f6dinger evolution, Q$^2$VMC employs a novel quadratic update mechanism that integrates seamlessly with neural network-based ansatzes. Our extensive experiments showcase Q$^2$VMC's superior performance, achieving faster convergence and lower ground state energies in wavefunction optimization across various molecular systems, without additional computational cost. This study not only advances the field of computational quantum chemistry but also highlights the important role of discretized evolution in variational quantum algorithms, offering a scalable and robust framework for future quantum research.", "keywords": "AI for science;Machine learning for physics;Machine learning for chemistry;Quantum physics;Variational Monte Carlo;MCMC;Transformers", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "/attachment/40c42f218498b9a391b198f109c40a4b2bcef9e6.zip", "author": "Baiyu Su;qiang liu", "authorids": "~Baiyu_Su1;~qiang_liu4", "gender": ";M", "homepage": ";https://www.cs.utexas.edu/~lqiang/", "dblp": "359/3874;61/3234-1", "google_scholar": ";https://scholar.google.com.tw/citations?user=2qDh4WUAAAAJ", "orcid": ";", "linkedin": "baiyu-su;", "or_profile": "~Baiyu_Su1;~Qiang_Liu1", "aff": "University of Cambridge;University of Texas, Austin", "aff_domain": "cam.ac.uk;utexas.edu", "position": "Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\nsu2024quadratic,\ntitle={Quadratic Quantum Variational Monte Carlo},\nauthor={Baiyu Su and qiang liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=lDtABI541U}\n}", "github": "", "reviewers": "L9eG;77wD;2zvx;yQzZ", "pdf_size": 919683, "rating": "5;5;5;5", "confidence": "5;4;3;4", "soundness": "3;3;2;3", "novelty": "2;3;2;3", "presentation": "1;2;3;3", "wc_summary": "46;73;62;43", "wc_strengths": "36;54;80;22", "wc_weaknesses": "217;139;238;50", "wc_questions": "191;57;32;3", "wc_limitations": "1;31;82;14", "wc_review": "491;354;494;132", "wc_reply_reviewers": "252;43;70;34", "wc_reply_authors": "858;93;252;245", "reply_reviewers": "2;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 56.0, 12.186057606953941 ], "wc_strengths_avg": [ 48.0, 21.6794833886788 ], "wc_weaknesses_avg": [ 161.0, 73.94254526319743 ], "wc_questions_avg": [ 70.75, 72.00824605557338 ], "wc_limitations_avg": [ 32.0, 30.76524012583032 ], "wc_review_avg": [ 367.75, 147.39127348659417 ], "wc_reply_reviewers_avg": [ 99.75, 88.89424897033554 ], "wc_reply_authors_avg": [ 362.0, 293.3283143510016 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:kfH9YUq9XdEJ:scholar.google.com/&scioq=Quadratic+Quantum+Variational+Monte+Carlo&hl=en&as_sdt=0,47", "gs_version_total": 0, "email": "cam.ac.uk;utexas.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "University of Cambridge;University of Texas at Austin", "aff_unique_dep": ";", "aff_unique_url": "https://www.cam.ac.uk;https://www.utexas.edu", "aff_unique_abbr": "Cambridge;UT Austin", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Cambridge;Austin", "aff_country_unique_index": "0;1", "aff_country_unique": "United Kingdom;United States" }, { "id": "lEDuaGGiCV", "title": "Lucy: Think and Reason to Solve Text-to-SQL", "track": "main", "status": "Reject", "tldr": "", "abstract": "Large Language Models (LLMs) have made significant progress in assisting users to query databases in natural language. While LLM-based techniques provide state-of-the-art results on many standard benchmarks, their performance significantly drops when applied to large enterprise databases. The reason is that these databases have a large number of tables with complex relationships that are challenging for LLMs to reason about. We analyze challenges that LLMs face in these settings and propose a new solution that combines the power of LLMs in understanding questions with automated reasoning techniques to handle complex database constraints. Based on these ideas, we have developed a new framework that outperforms state-of-the-art techniques in zero-shot text-to-SQL on complex benchmarks", "keywords": "Large Language Models;Automated Reasoning;Text-to-SQL", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/bce0151ee176c89384e3fff583f8342934f3270c.gz", "author": "Nina Narodytska;shay vargaftik", "authorids": "~Nina_Narodytska1;~shay_vargaftik1", "gender": "F;", "homepage": ";", "dblp": "87/3366;", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "~Nina_Narodytska1;~shay_vargaftik1", "aff": "VMware;", "aff_domain": "vmware.com;", "position": "Researcher;", "bibtex": "@misc{\nanonymous2024lucy,\ntitle={Lucy: Think and Reason to Solve Text-to-{SQL}},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=lEDuaGGiCV}\n}", "github": "", "project": "", "reviewers": "P27x;YkDn;ZvFe;rcsp", "site": "https://openreview.net/forum?id=lEDuaGGiCV", "pdf_size": 783553, "rating": "3;3;4;6", "confidence": "4;4;2;4", "soundness": "2;1;3;3", "novelty": "2;2;3;2", "presentation": "2;1;4;2", "wc_summary": "157;69;83;276", "wc_strengths": "25;8;25;100", "wc_weaknesses": "276;121;164;146", "wc_questions": "20;33;14;105", "wc_limitations": "15;16;7;77", "wc_review": "493;247;293;704", "wc_reply_reviewers": "23;52;77;12", "wc_reply_authors": "31;135;82;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;3;1", "rating_avg": [ 4.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 1.0897247358851685 ], "wc_summary_avg": [ 146.25, 82.03467254764902 ], "wc_strengths_avg": [ 39.5, 35.61249780624774 ], "wc_weaknesses_avg": [ 176.75, 59.30166523800154 ], "wc_questions_avg": [ 43.0, 36.44859393721519 ], "wc_limitations_avg": [ 28.75, 28.07467720206236 ], "wc_review_avg": [ 434.25, 181.12892507824364 ], "wc_reply_reviewers_avg": [ 41.0, 25.406692031825003 ], "wc_reply_authors_avg": [ 62.0, 51.3176382932808 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2131364110082212054&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0", "aff_unique_norm": "VMware, Inc.", "aff_unique_dep": "", "aff_unique_url": "https://www.vmware.com", "aff_unique_abbr": "VMware", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "S$^{2}$FT: Efficient, Scalable and Generalizable LLM Fine-tuning by Structured Sparsity", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93843", "id": "lEUle8S4xQ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=lEUle8S4xQ", "openreview": "https://openreview.net/forum?id=lEUle8S4xQ", "poster": "/media/PosterPDFs/NeurIPS%202024/93843.png?t=1733799209.8661017", "project": "", "author_site": "Xinyu Yang, Jixuan Leng, Geyang Guo, Jiawei Zhao, Ryumei Nakada, Linjun Zhang, Huaxiu Yao, Beidi Chen", "tldr": "", "abstract": "Current PEFT methods for LLMs can achieve high quality, efficient training, or scalable serving, but not all three simultaneously. \nTo address this limitation, we investigate sparse fine-tuning and observe a remarkable improvement in generalization ability. \nUtilizing this key insight, we propose a family of Structured Sparse Fine-Tuning (S${^2}$FT) methods for LLMs, which concurrently achieve state-of-the-art fine-tuning performance, training efficiency, and inference scalability. S${^2}$FT accomplishes this by \"selecting sparsely and computing densely\". Based on the coupled structures in LLMs, \\model selects a few attention heads and channels in the MHA and FFN modules for each Transformer block, respectively. Next, it co-permutes the weight matrices on both sides of all coupled structures to connect the selected subsets in each layer into a dense submatrix. Finally, S${^2}$FT performs in-place gradient updates on all selected submatrices.\nThrough theoretical analyses and empirical results, our method prevents forgetting while simplifying optimization, delivers SOTA performance on both commonsense and arithmetic reasoning with 4.6% and 1.3% average improvements compared to LoRA, and surpasses full FT by 11.5% when generalizing to various domains after instruction tuning. \nUsing our partial back-propagation algorithm, S${^2}$FT saves training memory up to 3$\\times$ and improves latency by 1.5-2.7$\\times$ compared to full FT, while achieving an average 10\\% improvement over LoRA on both metrics. We further demonstrate that the weight updates in S${^2}$FT can be decoupled into adapters, enabling effective fusion, fast switch, and efficient parallelism when serving multiple fine-tuned models.", "keywords": "PEFT; LLM Efficiency; LLM Post-training", "primary_area": "generative_models", "supplementary_material": "", "author": "Xinyu Yang;Jixuan Leng;Geyang Guo;Jiawei Zhao;Ryumei Nakada;Linjun Zhang;Huaxiu Yao;Beidi Chen", "authorids": "~Xinyu_Yang4;~Jixuan_Leng1;~Geyang_Guo2;~Jiawei_Zhao2;~Ryumei_Nakada1;~Linjun_Zhang1;~Huaxiu_Yao1;~Beidi_Chen1", "gender": "M;M;;M;;M;M;F", "homepage": "http://xinyuyang.me;https://jixuanleng.com/;https://guochry.github.io;https://jiaweizhao.com/;https://statistics.rutgers.edu/people-pages/faculty/people/135-graduate-students/581-ryumei-nakada;;http://huaxiuyao.mystrikingly.com;https://www.andrew.cmu.edu/user/beidic/", "dblp": "89/473-2.html;261/6970.html;128/5836;;;;197/1635;192/1339", "google_scholar": ";;4zOu3UkAAAAJ;;;TUAzs3sAAAAJ;A20BZnQAAAAJ;", "orcid": ";;;;;;;", "linkedin": ";jixuan-leng-2862b2227/;;;;;huaxiuyao/;", "or_profile": "~Xinyu_Yang4;~Jixuan_Leng1;~Geyang_Guo2;~Jiawei_Zhao2;~Ryumei_Nakada1;~Linjun_Zhang1;~Huaxiu_Yao1;~Beidi_Chen1", "aff": "Carnegie Mellon University;University of Rochester;Renmin University of China;California Institute of Technology;Rutgers University;Rutgers University;Department of Computer Science, University of North Carolina at Chapel Hill;Meta Facebook", "aff_domain": "cmu.edu;rochester.edu;ruc.edu.cn;caltech.edu;rutgers.edu;rutgers.edu;cs.unc.edu;fb.com", "position": "PhD student;Undergrad student;Undergrad student;PhD student;PhD student;Assistant Professor;Assistant Professor;Researcher", "bibtex": "@inproceedings{\nyang2024sft,\ntitle={S\\${\\textasciicircum}\\{2\\}\\${FT}: Efficient, Scalable and Generalizable {LLM} Fine-tuning by Structured Sparsity},\nauthor={Xinyu Yang and Jixuan Leng and Geyang Guo and Jiawei Zhao and Ryumei Nakada and Linjun Zhang and Huaxiu Yao and Beidi Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=lEUle8S4xQ}\n}", "github": "", "reviewers": "d9g6;CFWk;Cfys;8C1L", "pdf_size": 1495904, "rating": "5;5;6;6", "confidence": "4;3;4;4", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "3;3;3;3", "wc_summary": "47;109;199;90", "wc_strengths": "80;50;235;32", "wc_weaknesses": "30;134;221;89", "wc_questions": "6;180;180;32", "wc_limitations": "1;7;210;1", "wc_review": "164;480;1045;244", "wc_reply_reviewers": "0;36;11;19", "wc_reply_authors": "101;76;54;54", "reply_reviewers": "0;1;1;1", "reply_authors": "3;3;2;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 111.25, 55.41829571540431 ], "wc_strengths_avg": [ 99.25, 80.22896920688935 ], "wc_weaknesses_avg": [ 118.5, 69.72983579501675 ], "wc_questions_avg": [ 99.5, 81.02314484145873 ], "wc_limitations_avg": [ 54.75, 89.66709262600187 ], "wc_review_avg": [ 483.25, 344.5049890785328 ], "wc_reply_reviewers_avg": [ 16.5, 13.124404748406688 ], "wc_reply_authors_avg": [ 71.25, 19.382659776202026 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=757307549368114457&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "cmu.edu;rochester.edu;ruc.edu.cn;caltech.edu;rutgers.edu;rutgers.edu;cs.unc.edu;fb.com", "author_num": 8, "aff_unique_index": "0;1;2;3;4;4;5;6", "aff_unique_norm": "Carnegie Mellon University;University of Rochester;Renmin University of China;California Institute of Technology;Rutgers University;University of North Carolina at Chapel Hill;Meta", "aff_unique_dep": ";;;;;Department of Computer Science;Meta Platforms, Inc.", "aff_unique_url": "https://www.cmu.edu;https://www.rochester.edu;http://www.ruc.edu.cn;https://www.caltech.edu;https://www.rutgers.edu;https://www.unc.edu;https://meta.com", "aff_unique_abbr": "CMU;U of R;RUC;Caltech;Rutgers;UNC Chapel Hill;Meta", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Pasadena;Chapel Hill", "aff_country_unique_index": "0;0;1;0;0;0;0;0", "aff_country_unique": "United States;China" }, { "title": "Unitary Convolutions for Learning on Graphs and Groups", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93842", "id": "lG1VEQJvUH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=lG1VEQJvUH", "openreview": "https://openreview.net/forum?id=lG1VEQJvUH", "poster": "", "project": "", "author_site": "Bobak Kiani, Lukas Fesser, Melanie Weber", "tldr": "", "abstract": "Data with geometric structure is ubiquitous in machine learning often arising from fundamental symmetries in a domain, such as permutation-invariance in graphs and translation-invariance in images. Group-convolutional architectures, which encode symmetries as inductive bias, have shown great success in applications, but can suffer from instabilities as their depth increases and often struggle to learn long range dependencies in data. For instance, graph neural networks experience instability due to the convergence of node representations (over-smoothing), which can occur after only a few iterations of message-passing, reducing their effectiveness in downstream tasks. Here, we propose and study unitary group convolutions, which allow for deeper networks that are more stable during training. The main focus of the paper are graph neural networks, where we show that unitary graph convolutions provably avoid over-smoothing. Our experimental results confirm that unitary graph convolutional networks achieve competitive performance on benchmark datasets compared to state-of-the-art graph neural networks. We complement our analysis of the graph domain with the study of general unitary convolutions and analyze their role in enhancing stability in general group convolutional architectures.", "keywords": "graph neural networks;geometric deep learning;learning stability;unitary;orthogonal", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Bobak Kiani;Lukas Fesser;Melanie Weber", "authorids": "~Bobak_Kiani1;~Lukas_Fesser1;~Melanie_Weber1", "gender": ";;", "homepage": ";;", "dblp": "232/4086;;", "google_scholar": ";;", "orcid": ";;", "linkedin": "bobak-kiani;;", "or_profile": "~Bobak_Kiani1;~Lukas_Fesser1;~Melanie_Weber1", "aff": "Massachusetts Institute of Technology;;", "aff_domain": "mit.edu;;", "position": "PhD student;;", "bibtex": "@inproceedings{\nkiani2024unitary,\ntitle={Unitary Convolutions for Learning on Graphs and Groups},\nauthor={Bobak Kiani and Lukas Fesser and Melanie Weber},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=lG1VEQJvUH}\n}", "github": "", "reviewers": "vJVB;9k9v;9r1r;EVha;feJe", "pdf_size": 649385, "rating": "4;6;7;8;8", "confidence": "3;3;3;5;4", "soundness": "2;2;3;4;4", "novelty": "2;3;3;4;4", "presentation": "1;2;3;3;3", "wc_summary": "46;140;60;55;338", "wc_strengths": "58;105;28;50;107", "wc_weaknesses": "393;328;58;239;313", "wc_questions": "58;2;2;373;284", "wc_limitations": "1;1;1;12;9", "wc_review": "556;576;149;729;1051", "wc_reply_reviewers": "325;176;0;480;40", "wc_reply_authors": "0;392;0;543;0", "reply_reviewers": "2;2;0;3;1", "reply_authors": "1;2;1;4;1", "rating_avg": [ 6.6, 1.4966629547095764 ], "confidence_avg": [ 3.6, 0.8 ], "soundness_avg": [ 3.0, 0.8944271909999159 ], "novelty_avg": [ 3.2, 0.7483314773547882 ], "presentation_avg": [ 2.4, 0.8 ], "wc_summary_avg": [ 127.8, 110.38188257137128 ], "wc_strengths_avg": [ 69.6, 31.30878470972644 ], "wc_weaknesses_avg": [ 266.2, 115.04329619756207 ], "wc_questions_avg": [ 143.8, 154.76743843586738 ], "wc_limitations_avg": [ 4.8, 4.749736834815167 ], "wc_review_avg": [ 612.2, 291.6198895823123 ], "wc_reply_reviewers_avg": [ 204.2, 178.9484842070477 ], "wc_reply_authors_avg": [ 187.0, 233.95213185607008 ], "reply_reviewers_avg": [ 1.6, 1.019803902718557 ], "reply_authors_avg": [ 1.8, 1.1661903789690602 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.701560760020114, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9941861621552587658&as_sdt=800005&sciodt=0,15&hl=en", "gs_version_total": 3, "email": "mit.edu;;", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Mitigating Covariate Shift in Behavioral Cloning via Robust Stationary Distribution Correction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93841", "id": "lHcvjsQFQq", "proceeding": "", "pdf": "https://openreview.net/pdf?id=lHcvjsQFQq", "openreview": "https://openreview.net/forum?id=lHcvjsQFQq", "poster": "/media/PosterPDFs/NeurIPS%202024/93841.png?t=1733464460.6867292", "project": "", "author_site": "Seokin Seo, Byung-Jun Lee, Jongmin Lee, HyeongJoo Hwang, Hongseok Yang, Kee-Eung Kim", "tldr": "", "abstract": "We consider offline imitation learning (IL), which aims to train an agent to imitate from the dataset of expert demonstrations without online interaction with the environment. Behavioral Cloning (BC) has been a simple yet effective approach to offline IL, but it is also well-known to be vulnerable to the covariate shift resulting from the mismatch between the state distributions induced by the learned policy and the expert policy. Moreover, as often occurs in practice, when expert datasets are collected from an arbitrary state distribution instead of a stationary one, these shifts become more pronounced, potentially leading to substantial failures in existing IL methods. Specifically, we focus on covariate shift resulting from arbitrary state data distributions, such as biased data collection or incomplete trajectories, rather than shifts induced by changes in dynamics or noisy expert actions. In this paper, to mitigate the effect of the covariate shifts in BC, we propose DrilDICE, which utilizes a distributionally robust BC objective by employing a stationary distribution correction ratio estimation (DICE) to derive a feasible solution. We evaluate the effectiveness of our method through an extensive set of experiments covering diverse covariate shift scenarios. The results demonstrate the efficacy of the proposed approach in improving the robustness against the shifts, outperforming existing offline IL methods in such scenarios.", "keywords": "Imitation Learning;Behavioral Cloning;Robust Learning", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Seokin Seo;Byung-Jun Lee;Jongmin Lee;HyeongJoo Hwang;Hongseok Yang;Kee-Eung Kim", "authorids": "~Seokin_Seo1;~Byung-Jun_Lee1;~Jongmin_Lee1;~HyeongJoo_Hwang1;~Hongseok_Yang2;~Kee-Eung_Kim2", "gender": ";M;M;M;M;M", "homepage": "https://sites.google.com/view/siseo0;https://dmlab.korea.ac.kr/professor.html;https://www.jmlee.kr;https://github.com/gr8joo;http://ailab.kaist.ac.kr;https://sites.google.com/view/hongseokyang/home", "dblp": "231/7699;130/1678-1;68/222-4.html;;35/6703;82/5808", "google_scholar": "https://scholar.google.com/citations?hl=en;FwoohI4AAAAJ;https://scholar.google.co.kr/citations?user=rFcK8EEAAAAJ;IK5bNo0AAAAJ;https://scholar.google.com/citations?hl=ko;cLuwH14AAAAJ", "orcid": ";;;;;", "linkedin": "seokin-seo-026ab4150/;;jmlee123/;;;", "or_profile": "~Seokin_Seo1;~Byung-Jun_Lee1;~Jongmin_Lee1;~HyeongJoo_Hwang1;~Kee-Eung_Kim2;~Hongseok_Yang1", "aff": "Korea Advanced Institute of Science & Technology;Gauss Labs Inc.;University of California, Berkeley;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;gausslabs.ai;berkeley.edu;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "position": "PhD student;Applied Scientist;Postdoc;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nseo2024mitigating,\ntitle={Mitigating Covariate Shift in Behavioral Cloning via Robust Stationary Distribution Correction},\nauthor={Seokin Seo and Byung-Jun Lee and Jongmin Lee and HyeongJoo Hwang and Hongseok Yang and Kee-Eung Kim},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=lHcvjsQFQq}\n}", "github": "", "reviewers": "oZLc;k6J7;zQPx", "pdf_size": 988124, "rating": "5;6;6", "confidence": "3;2;4", "soundness": "3;3;2", "novelty": "2;3;3", "presentation": "3;3;3", "wc_summary": "76;80;28", "wc_strengths": "9;33;19", "wc_weaknesses": "102;12;44", "wc_questions": "63;55;441", "wc_limitations": "13;21;1", "wc_review": "263;201;533", "wc_reply_reviewers": "17;43;18", "wc_reply_authors": "1454;1360;1954", "reply_reviewers": "1;1;1", "reply_authors": "5;4;5", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 61.333333333333336, 23.6267268622258 ], "wc_strengths_avg": [ 20.333333333333332, 9.843215373488935 ], "wc_weaknesses_avg": [ 52.666666666666664, 37.249906785863985 ], "wc_questions_avg": [ 186.33333333333334, 180.10614154498515 ], "wc_limitations_avg": [ 11.666666666666666, 8.219218670625303 ], "wc_review_avg": [ 332.3333333333333, 144.1326549475247 ], "wc_reply_reviewers_avg": [ 26.0, 12.027745701779143 ], "wc_reply_authors_avg": [ 1589.3333333333333, 260.69820781040204 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 4.666666666666667, 0.4714045207910317 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:alul2L7AV3oJ:scholar.google.com/&scioq=Mitigating+Covariate+Shift+in+Behavioral+Cloning+via+Robust+Stationary+Distribution+Correction&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": "kaist.ac.kr;gausslabs.ai;berkeley.edu;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "author_num": 6, "aff_unique_index": "0;1;2;0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Gauss Labs Inc.;University of California, Berkeley", "aff_unique_dep": ";;", "aff_unique_url": "https://www.kaist.ac.kr;;https://www.berkeley.edu", "aff_unique_abbr": "KAIST;;UC Berkeley", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;1;1;0;0;0", "aff_country_unique": "South Korea;United States" }, { "title": "On the Role of Attention Masks and LayerNorm in Transformers", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93840", "id": "lIH6oCdppg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=lIH6oCdppg", "openreview": "https://openreview.net/forum?id=lIH6oCdppg", "poster": "/media/PosterPDFs/NeurIPS%202024/93840.png?t=1733711163.8637707", "project": "", "author_site": "Xinyi Wu, Amir Ajorlou, Yifei Wang, Stefanie Jegelka, Ali Jadbabaie", "tldr": "", "abstract": "Self-attention is the key mechanism of transformers, which are the essential building blocks of modern foundation models. Recent studies have shown that pure self-attention suffers from an increasing degree of rank collapse as depth increases, limiting model expressivity and further utilization of model depth. The existing literature on rank collapse, however, has mostly overlooked other critical components in transformers that may alleviate the rank collapse issue. In this paper, we provide a general analysis of rank collapse under self-attention, taking into account the effects of attention masks and layer normalization (LayerNorm). In particular, we find that although pure masked attention still suffers from exponential collapse to a rank one subspace, sparse or local masked attention can provably slow down the collapse rate. In the case of self-attention with LayerNorm, we first show that for certain classes of value matrices, collapse to a rank one subspace still happens exponentially. However, through construction of nontrivial counterexamples, we then establish that with proper choice of value matrices, a general class of sequences may not converge to a rank one subspace, and the self-attention dynamics with LayerNorm can simultaneously possess a rich set of equilibria with any possible rank between one and full. Our result refutes the previous hypothesis that LayerNorm plays no role in the rank collapse of self-attention and suggests that self-attention with LayerNorm constitutes a much more expressive, versatile nonlinear dynamical system than what was originally thought.", "keywords": "attention mechanism;transformers;layer normalization;deep learning theory;dynamical systems", "primary_area": "learning_theory", "supplementary_material": "", "author": "Xinyi Wu;Amir Ajorlou;Yifei Wang;Stefanie Jegelka;Ali Jadbabaie", "authorids": "~Xinyi_Wu3;~Amir_Ajorlou1;~Yifei_Wang1;~Stefanie_Jegelka3;~Ali_Jadbabaie1", "gender": "F;M;M;F;M", "homepage": "https://xinyiwu98.github.io;http://www.mit.edu/~ajorlou/;https://yifeiwang77.com;http://people.csail.mit.edu/stefje/;http://www.mit.edu/~jadbabai/www", "dblp": "98/7827;;00/555-1;38/7003;83/3158", "google_scholar": ";_2r1jtYAAAAJ;-CLy6YsAAAAJ;gTWUZlsAAAAJ;ZBc_WwYAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Xinyi_Wu3;~Amir_Ajorlou1;~Yifei_Wang1;~Stefanie_Jegelka3;~Ali_Jadbabaie1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;mit.edu;mit.edu;mit.edu", "position": "PhD student;Researcher;Postdoc;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nwu2024on,\ntitle={On the Role of Attention Masks and LayerNorm in Transformers},\nauthor={Xinyi Wu and Amir Ajorlou and Yifei Wang and Stefanie Jegelka and Ali Jadbabaie},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=lIH6oCdppg}\n}", "github": "", "reviewers": "ivrv;7spk;yhKR", "pdf_size": 752080, "rating": "4;6;7", "confidence": "4;4;3", "soundness": "3;3;3", "novelty": "2;2;3", "presentation": "2;3;3", "wc_summary": "96;117;42", "wc_strengths": "85;90;75", "wc_weaknesses": "72;460;32", "wc_questions": "35;180;73", "wc_limitations": "1;17;3", "wc_review": "289;864;225", "wc_reply_reviewers": "0;476;0", "wc_reply_authors": "0;1284;0", "reply_reviewers": "0;3;0", "reply_authors": "1;5;1", "rating_avg": [ 5.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 85.0, 31.591137997862628 ], "wc_strengths_avg": [ 83.33333333333333, 6.236095644623236 ], "wc_weaknesses_avg": [ 188.0, 193.0250415533349 ], "wc_questions_avg": [ 96.0, 61.389467066156115 ], "wc_limitations_avg": [ 7.0, 7.118052168020874 ], "wc_review_avg": [ 459.3333333333333, 287.33294663547065 ], "wc_reply_reviewers_avg": [ 158.66666666666666, 224.38855189653108 ], "wc_reply_authors_avg": [ 428.0, 605.2834046956847 ], "reply_reviewers_avg": [ 1.0, 1.4142135623730951 ], "reply_authors_avg": [ 2.3333333333333335, 1.8856180831641267 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.7559289460184545, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3044006971845778602&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "mit.edu;mit.edu;mit.edu;mit.edu;mit.edu", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "DisenGCD: A Meta Multigraph-assisted Disentangled Graph Learning Framework for Cognitive Diagnosis", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93839", "id": "lJuQxkDbDo", "proceeding": "", "pdf": "https://openreview.net/pdf?id=lJuQxkDbDo", "openreview": "https://openreview.net/forum?id=lJuQxkDbDo", "poster": "/media/PosterPDFs/NeurIPS%202024/93839.png?t=1731406661.0161877", "project": "", "author_site": "Shangshang Yang, Mingyang Chen, Ziwen Wang, Xiaoshan Yu, Panpan Zhang, Haiping Ma, Xingyi Zhang", "tldr": "", "abstract": "Existing graph learning-based cognitive diagnosis (CD) methods have made relatively good results, but their student, exercise, and concept representations are learned and exchanged in an implicit unified graph, which makes the interaction-agnostic exercise and concept representations be learned poorly, failing to provide high robustness against noise in students' interactions. Besides, lower-order exercise latent representations obtained in shallow layers are not well explored when learning the student representation. \nTo tackle the issues, this paper suggests a meta multigraph-assisted disentangled graph learning framework for CD (DisenGCD), which learns three types of representations on three disentangled graphs: student-exercise-concept interaction, exercise-concept relation, and concept dependency graphs, respectively. \nSpecifically, the latter two graphs are first disentangled from the interaction graph. \nThen, the student representation is learned from the interaction graph by a devised meta multigraph learning module; multiple learnable propagation paths in this module enable current student latent representation to access lower-order exercise latent representations,\nwhich can lead to more effective nad robust student representations learned; \nthe exercise and concept representations are learned on the relation and dependency graphs by graph attention modules. \nFinally, a novel diagnostic function is devised to handle three disentangled representations for prediction. Experiments show better performance and robustness of DisenGCD than state-of-the-art CD methods and demonstrate the effectiveness of the disentangled learning framework and meta multigraph module.The source code is available at https://github.com/BIMK/Intelligent-Education/tree/main/DisenGCD.", "keywords": "Cognitive Diagnosis;Intelligent Education;Disentanglement Learning;Robustness;Graph Learning", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "/attachment/6b4182aca74183a6ff68f78313ea081482a5546c.zip", "author": "Shangshang Yang;Mingyang Chen;Ziwen Wang;Xiaoshan Yu;Panpan Zhang;Haiping Ma;Xingyi Zhang", "authorids": "~Shangshang_Yang1;~Mingyang_Chen7;~Ziwen_Wang3;~Xiaoshan_Yu2;~Panpan_Zhang4;~Haiping_Ma1;~Xingyi_Zhang2", "gender": "M;;;;F;F;M", "homepage": "https://github.com/DevilYangS;;;;https://scholar.google.com/citations?hl=zh-CN&user=A35IbgcAAAAJ&view_op=list_works&gmla=ABOlHizDyQJ_4LkVw40ZIrrW5m309kNV5Johnt7evp1s9MJFDLTcjSYm_okD34Oz06JzhmosSkS65kpHsE9AzzLn1PeAytYdCMnWoKRLfR2ib_aGg8Cz6cjDImZA98xN7w;;https://cs.ahu.edu.cn/2023/0815/c20806a313390/page.htm", "dblp": "232/8063;;;;;83/8129.html;93/1107", "google_scholar": "https://scholar.google.com.hk/citations?user=arzbiNUAAAAJ;;;;;;https://scholar.google.com/citations?hl=zh-CN", "orcid": "0000-0003-0837-5424;;;;;0000-0002-3115-6855;0000-0002-5052-000X", "linkedin": ";;;;;;", "or_profile": "~Shangshang_Yang1;~Mingyang_Chen7;~Ziwen_Wang3;~Xiaoshan_Yu2;~Panpan_Zhang4;~Haiping_Ma1;~Xingyi_Zhang2", "aff": "Anhui University;;;;Anhui University;Anhui University;Anhui University", "aff_domain": "ahu.edu.cn;;;;ahu.edu.cn;ahu.edu.cn;ahu.edu.cn", "position": "Postdoc;;;;Postdoc;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nyang2024disengcd,\ntitle={Disen{GCD}: A Meta Multigraph-assisted Disentangled Graph Learning Framework for Cognitive Diagnosis},\nauthor={Shangshang Yang and Mingyang Chen and Ziwen Wang and Xiaoshan Yu and Panpan Zhang and Haiping Ma and Xingyi Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=lJuQxkDbDo}\n}", "github": "", "reviewers": "53z4;9V3d;2m1M;7tDb", "pdf_size": 1145995, "rating": "5;5;5;7", "confidence": "4;3;1;4", "soundness": "3;3;3;3", "novelty": "1;2;3;3", "presentation": "2;3;3;3", "wc_summary": "66;113;82;112", "wc_strengths": "42;194;95;77", "wc_weaknesses": "162;133;81;123", "wc_questions": "6;160;45;98", "wc_limitations": "1;1;1;1", "wc_review": "277;601;304;411", "wc_reply_reviewers": "20;36;19;27", "wc_reply_authors": "18;37;12;714", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.0, 1.224744871391589 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 93.25, 20.06707502353046 ], "wc_strengths_avg": [ 102.0, 56.431374252272114 ], "wc_weaknesses_avg": [ 124.75, 29.03769102390891 ], "wc_questions_avg": [ 77.25, 57.8678451300893 ], "wc_limitations_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 398.25, 127.33297883894808 ], "wc_reply_reviewers_avg": [ 25.5, 6.800735254367722 ], "wc_reply_authors_avg": [ 195.25, 299.64259960826666 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.4714045207910316, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12949347738472920880&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 5, "email": "ahu.edu.cn;;;;ahu.edu.cn;ahu.edu.cn;ahu.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Anhui University", "aff_unique_dep": "", "aff_unique_url": "http://www.ahu.edu.cn/", "aff_unique_abbr": "AHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Efficient and Private Marginal Reconstruction with Local Non-Negativity", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93838", "id": "lKnl4CLhhS", "proceeding": "", "pdf": "https://openreview.net/pdf?id=lKnl4CLhhS", "openreview": "https://openreview.net/forum?id=lKnl4CLhhS", "poster": "", "project": "", "author_site": "Brett Mullins, Miguel Fuentes, Yingtai Xiao, Daniel Kifer, Cameron Musco, Daniel Sheldon", "tldr": "", "abstract": "Differential privacy is the dominant standard for formal and quantifiable privacy and has been used in major deployments that impact millions of people. Many differentially private algorithms for query release and synthetic data contain steps that reconstruct answers to queries from answers to other queries that have been measured privately. Reconstruction is an important subproblem for such mechanisms to economize the privacy budget, minimize error on reconstructed answers, and allow for scalability to high-dimensional datasets. In this paper, we introduce a principled and efficient postprocessing method ReM (Residuals-to-Marginals) for reconstructing answers to marginal queries. Our method builds on recent work on efficient mechanisms for marginal query release, based on making measurements using a residual query basis that admits efficient pseudoinversion, which is an important primitive used in reconstruction. An extension GReM-LNN (Gaussian Residuals-to-Marginals with Local Non-negativity) reconstructs marginals under Gaussian noise satisfying consistency and non-negativity, which often reduces error on reconstructed answers. We demonstrate the utility of ReM and GReM-LNN by applying them to improve existing private query answering mechanisms.", "keywords": "differential privacy;query release;synthetic data", "primary_area": "privacy", "supplementary_material": "/attachment/9e2c09ea2eb19b4053d2fd887d6f2df2eecf549e.zip", "author": "Brett Mullins;Miguel Fuentes;Yingtai Xiao;Daniel Kifer;Cameron N Musco;Daniel Sheldon", "authorids": "~Brett_Mullins1;~Miguel_Fuentes1;~Yingtai_Xiao1;~Daniel_Kifer1;~Cameron_N_Musco1;~Daniel_Sheldon1", "gender": ";M;M;M;M;M", "homepage": "https://bcmullins.github.io;;https://jackyxiao98.github.io/;http://www.cse.psu.edu/~duk17/;https://people.cs.umass.edu/~cmusco/;https://people.cs.umass.edu/~sheldon/", "dblp": "251/3111;;244/1962;84/114;149/2327;58/766", "google_scholar": "ZwLzI1wAAAAJ;;eCBNsH0AAAAJ;https://scholar.google.com.tw/citations?hl=en;EeYGZCwAAAAJ;https://scholar.google.com.tw/citations?user=P1bHFuoAAAAJ", "orcid": ";;0000-0001-8964-6753;;;", "linkedin": "brettcmullins/;miguel-fuentes-28679980/;yingtai-xiao-2b795515a/;;;", "or_profile": "~Brett_Mullins1;~Miguel_Fuentes1;~Yingtai_Xiao1;~Daniel_Kifer1;~Cameron_N_Musco1;~Dan_Sheldon1", "aff": "University of Massachusetts at Amherst;University of Massachusetts at Amherst;Pennsylvania State University;Pennsylvania State University;University of Massachusetts, Amherst;University of Massachusetts, Amherst", "aff_domain": "umass.edu;umass.edu;psu.edu;psu.edu;umass.edu;umass.edu", "position": "PhD student;PhD student;PhD student;Professor;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nmullins2024efficient,\ntitle={Efficient and Private Marginal Reconstruction with Local Non-Negativity},\nauthor={Brett Mullins and Miguel Fuentes and Yingtai Xiao and Daniel Kifer and Cameron N Musco and Daniel Sheldon},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=lKnl4CLhhS}\n}", "github": "", "reviewers": "JUY8;r5Ym;hTTb;a9Gz", "pdf_size": 540752, "rating": "3;6;6;7", "confidence": "4;3;3;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;2", "wc_summary": "139;218;67;152", "wc_strengths": "39;82;19;62", "wc_weaknesses": "111;238;39;227", "wc_questions": "67;41;34;6", "wc_limitations": "1;1;5;13", "wc_review": "357;580;164;460", "wc_reply_reviewers": "0;0;10;27", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 144.0, 53.6050370767524 ], "wc_strengths_avg": [ 50.5, 23.71181140275875 ], "wc_weaknesses_avg": [ 153.75, 82.85340970654134 ], "wc_questions_avg": [ 37.0, 21.714050750608465 ], "wc_limitations_avg": [ 5.0, 4.898979485566356 ], "wc_review_avg": [ 390.25, 152.61450619125299 ], "wc_reply_reviewers_avg": [ 9.25, 11.031205736455105 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.33333333333333337, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:3Bp48Da4loQJ:scholar.google.com/&scioq=Efficient+and+Private+Marginal+Reconstruction+with+Local+Non-Negativity&hl=en&as_sdt=0,5", "gs_version_total": 7, "email": "umass.edu;umass.edu;psu.edu;psu.edu;umass.edu;umass.edu", "author_num": 6, "aff_unique_index": "0;0;1;1;0;0", "aff_unique_norm": "University of Massachusetts Amherst;Pennsylvania State University", "aff_unique_dep": ";", "aff_unique_url": "https://www.umass.edu;https://www.psu.edu", "aff_unique_abbr": "UMass Amherst;PSU", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Amherst;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Thought of Search: Planning with Language Models Through The Lens of Efficiency", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93837", "id": "lNCsyA5uS1", "proceeding": "", "pdf": "https://openreview.net/pdf?id=lNCsyA5uS1", "openreview": "https://openreview.net/forum?id=lNCsyA5uS1", "poster": "/media/PosterPDFs/NeurIPS%202024/93837.png?t=1732293689.7413976", "project": "", "author_site": "Michael Katz, Harsha Kokel, Kavitha Srinivas, Shirin Sohrabi Araghi", "tldr": "", "abstract": "Among the most important properties of algorithms investigated in computer science are soundness, completeness, and complexity. These properties, however, are rarely analyzed for the vast collection of recently proposed methods for planning with large language models. In this work, we alleviate this gap. We analyse these properties of using LLMs for planning and highlight that recent trends abandon both soundness and completeness for the sake of inefficiency. We propose a significantly more efficient approach that can, at the same time, maintain both soundness and completeness. We exemplify on four representative search problems, comparing to the LLM-based solutions from the literature that attempt to solve these problems. We show that by using LLMs to produce the code for the search components we can solve the entire datasets with 100% accuracy with only a few calls to the LLM. In contrast, the compared approaches require hundreds of thousands of calls and achieve significantly lower accuracy. We argue for a responsible use of compute resources; urging research community to investigate sound and complete LLM-based approaches that uphold efficiency.", "keywords": "planning;large language models;search", "primary_area": "generative_models", "supplementary_material": "", "author": "Michael Katz;Harsha Kokel;Kavitha Srinivas;Shirin Sohrabi", "authorids": "~Michael_Katz1;~Harsha_Kokel1;~Kavitha_Srinivas2;~Shirin_Sohrabi1", "gender": ";F;F;F", "homepage": "https://researcher.watson.ibm.com/researcher/view.php?person=ibm-Michael.Katz1;http://harshakokel.com;;https://researcher.watson.ibm.com/researcher/view.php?person=us-ssohrab", "dblp": "75/1295-1;164/7457;39/4444;51/410", "google_scholar": "pltkfcMAAAAJ;eiGQoO4AAAAJ;https://scholar.google.com/citations?hl=en;DD1Z2zcAAAAJ", "orcid": ";0000-0002-7548-3719;;", "linkedin": ";https://linkedin.com/in/harshakokel;;", "or_profile": "~Michael_Katz1;~Harsha_Kokel1;~Kavitha_Srinivas2;~Shirin_Sohrabi1", "aff": "International Business Machines;IBM Research;IBM Research;International Business Machines", "aff_domain": "ibm.com;ibm.com;ibm.com;ibm.com", "position": "Principal Researcher;Research Scientist;Research Staff Member;Researcher", "bibtex": "@inproceedings{\nkatz2024thought,\ntitle={Thought of Search: Planning with Language Models Through The Lens of Efficiency},\nauthor={Michael Katz and Harsha Kokel and Kavitha Srinivas and Shirin Sohrabi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=lNCsyA5uS1}\n}", "github": "", "reviewers": "uMM4;4XWb;PhwG;BYf6", "pdf_size": 488260, "rating": "3;4;6;7", "confidence": "5;3;3;3", "soundness": "3;3;3;3", "novelty": "1;2;3;4", "presentation": "3;4;2;3", "wc_summary": "72;52;157;76", "wc_strengths": "29;21;157;172", "wc_weaknesses": "130;273;173;256", "wc_questions": "140;39;2;9", "wc_limitations": "18;1;5;15", "wc_review": "389;386;494;528", "wc_reply_reviewers": "159;384;52;61", "wc_reply_authors": "110;970;25;13", "reply_reviewers": "2;2;1;1", "reply_authors": "3;4;2;2", "rating_avg": [ 5.0, 1.5811388300841898 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 1.118033988749895 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 89.25, 40.158280590682665 ], "wc_strengths_avg": [ 94.75, 70.00848162901407 ], "wc_weaknesses_avg": [ 208.0, 58.81751439834907 ], "wc_questions_avg": [ 47.5, 55.1837838499681 ], "wc_limitations_avg": [ 9.75, 6.977642868476432 ], "wc_review_avg": [ 449.25, 62.9181015288923 ], "wc_reply_reviewers_avg": [ 164.0, 133.77032555839878 ], "wc_reply_authors_avg": [ 279.5, 400.41010227016 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.7302967433402215, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6146337157100641155&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "ibm.com;ibm.com;ibm.com;ibm.com", "author_num": 4, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "International Business Machines Corporation;IBM", "aff_unique_dep": ";IBM Research", "aff_unique_url": "https://www.ibm.com;https://www.ibm.com/research", "aff_unique_abbr": "IBM;IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "PaCE: Parsimonious Concept Engineering for Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93836", "id": "lOMHt16T8R", "proceeding": "", "pdf": "https://openreview.net/pdf?id=lOMHt16T8R", "openreview": "https://openreview.net/forum?id=lOMHt16T8R", "poster": "", "project": "", "author_site": "Jinqi Luo, Tianjiao Ding, Kwan Ho Ryan Chan, Darshan Thaker, Aditya Chattopadhyay, Chris Callison-Burch, Rene Vidal", "tldr": "", "abstract": "Large Language Models (LLMs) are being used for a wide variety of tasks. While they are capable of generating human-like responses, they can also produce undesirable output including potentially harmful information, racist or sexist language, and hallucinations. Alignment methods are designed to reduce such undesirable output, via techniques such as fine-tuning, prompt engineering, and representation engineering. However, existing methods face several challenges: some require costly fine-tuning for every alignment task; some do not adequately remove undesirable concepts, failing alignment; some remove benign concepts, lowering the linguistic capabilities of LLMs. To address these issues, we propose Parsimonious Concept Engineering (PaCE), a novel activation engineering framework for alignment. First, to sufficiently model the concepts, we construct a large-scale concept dictionary in the activation space, in which each atom corresponds to a semantic concept. Given any alignment task, we instruct a concept partitioner to efficiently annotate the concepts as benign or undesirable. Then, at inference time, we decompose the LLM activations along the concept dictionary via sparse coding, to accurately represent the activations as linear combinations of benign and undesirable components. By removing the latter ones from the activations, we reorient the behavior of the LLM towards the alignment goal. We conduct experiments on tasks such as response detoxification, faithfulness enhancement, and sentiment revising, and show that PaCE achieves state-of-the-art alignment performance while maintaining linguistic capabilities.", "keywords": "Large Language Model;Sparse Coding;Trustworthy Machine Learning", "primary_area": "generative_models", "supplementary_material": "/attachment/540e3153649547b718e80b9890a73f21d73d0393.zip", "author": "Jinqi Luo;Tianjiao Ding;Kwan Ho Ryan Chan;Darshan Thaker;Aditya Chattopadhyay;Chris Callison-Burch;Rene Vidal", "authorids": "~Jinqi_Luo1;~Tianjiao_Ding1;~Kwan_Ho_Ryan_Chan1;~Darshan_Thaker1;~Aditya_Chattopadhyay1;~Chris_Callison-Burch1;~Rene_Vidal1", "gender": ";M;M;M;M;M;", "homepage": ";https://tianjiaoding.com/;https://ryanchankh.github.io/;https://darshanthaker.github.io;;https://www.cis.upenn.edu/~ccb/;http://www.vision.jhu.edu", "dblp": ";230/1227;267/5496;99/1272;207/8574;;v/ReneVidal", "google_scholar": ";L3wy9QMAAAAJ;DBXWBqcAAAAJ;q-EqG80AAAAJ;aekzv1gAAAAJ;nv-MV58AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;;;;", "linkedin": ";;ryanchankh/;;;chris-callison-burch-40bb87b7/;rene-vidal-74844928/", "or_profile": "~Jinqi_Luo1;~Tianjiao_Ding1;~Kwan_Ho_Ryan_Chan1;~Darshan_Thaker1;~Aditya_Chattopadhyay1;~Chris_Callison-Burch1;~Rene_Vidal1", "aff": ";University of Pennsylvania;University of Pennsylvania ;University of Pennsylvania;Johns Hopkins University;Allen Institute for Artificial Intelligence;Amazon", "aff_domain": ";seas.upenn.edu;seas.upenn.edu;seas.upenn.edu;jhu.edu;allenai.org;amazon.com", "position": ";PhD student;PhD student;PhD student;PhD student;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nluo2024pace,\ntitle={Pa{CE}: Parsimonious Concept Engineering for Large Language Models},\nauthor={Jinqi Luo and Tianjiao Ding and Kwan Ho Ryan Chan and Darshan Thaker and Aditya Chattopadhyay and Chris Callison-Burch and Rene Vidal},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=lOMHt16T8R}\n}", "github": "", "reviewers": "75Pi;s54k;Cdr5;rWAy", "pdf_size": 4009595, "rating": "3;6;6;7", "confidence": "5;4;4;4", "soundness": "3;3;3;4", "novelty": "2;3;3;3", "presentation": "3;2;3;4", "wc_summary": "77;73;79;79", "wc_strengths": "36;22;151;68", "wc_weaknesses": "336;41;176;81", "wc_questions": "155;243;107;66", "wc_limitations": "62;7;80;1", "wc_review": "666;386;593;295", "wc_reply_reviewers": "0;35;0;38", "wc_reply_authors": "61;535;59;61", "reply_reviewers": "0;1;0;1", "reply_authors": "2;3;2;2", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 77.0, 2.449489742783178 ], "wc_strengths_avg": [ 69.25, 50.05684268908697 ], "wc_weaknesses_avg": [ 158.5, 113.60567767501763 ], "wc_questions_avg": [ 142.75, 65.895276765486 ], "wc_limitations_avg": [ 37.5, 34.16504061171302 ], "wc_review_avg": [ 485.0, 150.27142110195138 ], "wc_reply_reviewers_avg": [ 18.25, 18.280795934531955 ], "wc_reply_authors_avg": [ 179.0, 205.53831759552767 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.9622504486493763, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8449809682579853035&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": ";seas.upenn.edu;seas.upenn.edu;seas.upenn.edu;jhu.edu;allenai.org;amazon.com", "author_num": 7, "aff_unique_index": "0;0;0;1;2;3", "aff_unique_norm": "University of Pennsylvania;Johns Hopkins University;Allen Institute for Artificial Intelligence;Amazon", "aff_unique_dep": ";;;Amazon.com, Inc.", "aff_unique_url": "https://www.upenn.edu;https://www.jhu.edu;https://allenai.org;https://www.amazon.com", "aff_unique_abbr": "UPenn;JHU;AI2;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Optimizing over Multiple Distributions under Generalized Quasar-Convexity Condition", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93835", "id": "lOV9kSX3Uo", "proceeding": "", "pdf": "https://openreview.net/pdf?id=lOV9kSX3Uo", "openreview": "https://openreview.net/forum?id=lOV9kSX3Uo", "poster": "", "project": "", "author_site": "Ding Shihong, Long Yang, Luo Luo, Cong Fang", "tldr": "", "abstract": "We study a typical optimization model where the optimization variable is composed of multiple probability distributions. Though the model appears frequently in practice, such as for policy problems, it lacks specific analysis in the general setting. For this optimization problem, we propose a new structural condition/landscape description named generalized quasar-convexity (GQC) beyond the realms of convexity. In contrast to original quasar-convexity \\citep{hinder2020near}, GQC allows an individual quasar-convex parameter $\\gamma_i$ for each variable block $i$ and the smaller of $\\gamma_i$ implies less block-convexity. To minimize the objective function, we consider a generalized oracle termed as the internal function that includes the standard gradient oracle as a special case. We provide optimistic mirror descent (OMD) for multiple distributions and prove that the algorithm can achieve an adaptive $\\tilde{\\mathcal{O}}((\\sum_{i=1}^d1/\\gamma_i)\\epsilon^{-1})$ iteration complexity to find an $\\varepsilon$-suboptimal global solution without pre-known the exact values of $\\gamma_i$ when the objective admits ``polynomial-like'' structural. Notably, it achieves iteration complexity that does not explicitly depend on the number of distributions and strictly faster $(\\sum_{i=1}^d 1/\\gamma_i \\text{ v.s. } d\\max_{i\\in[1:d]} 1/\\gamma_i)$ than mirror decent methods. We also extend GQC to the minimax optimization problem proposing the generalized quasar-convexity-concavity (GQCC) condition and a decentralized variant of OMD with regularization. Finally, we show the applications of our algorithmic framework on discounted Markov Decision Processes problem and Markov games, which bring new insights on the landscape analysis of reinforcement learning.", "keywords": "Generalized Quasar-Convexity;Generalized Quasar-Convexity-Concavity", "primary_area": "optimization", "supplementary_material": "/attachment/715738c83f90988002d48e3c28dce727f6e4e390.zip", "author": "Shihong Ding;Long Yang;Luo Luo;Cong Fang", "authorids": "~Shihong_Ding1;~Long_Yang4;~Luo_Luo1;~Cong_Fang1", "gender": "M;M;M;M", "homepage": "https://zero-lab-pku.github.io/personwise/dingshihong/;https://person.zju.edu.cn/longyang;https://luoluo-sds.github.io/;https://congfang-ml.github.io/", "dblp": ";;https://dblp.org/pers/hd/l/Luo:Luo;140/6568", "google_scholar": "SMCv8isAAAAJ;;NggI9EsAAAAJ;N2M9RPoAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Shihong_Ding1;~Long_Yang4;~Luo_Luo1;~Cong_Fang1", "aff": "Peking University;;Fudan University;Peking University", "aff_domain": "pku.edu.cn;;fudan.edu.cn;pku.edu.cn", "position": "PhD student;;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nding2024optimizing,\ntitle={Optimizing over Multiple Distributions under Generalized Quasar-Convexity Condition},\nauthor={Shihong Ding and Long Yang and Luo Luo and Cong Fang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=lOV9kSX3Uo}\n}", "github": "", "reviewers": "e1HQ;NA4U;oxwM;hhK8", "pdf_size": 674761, "rating": "4;6;7;7", "confidence": "4;2;2;2", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "37;125;93;254", "wc_strengths": "14;73;34;2", "wc_weaknesses": "37;76;154;2", "wc_questions": "14;1;70;2", "wc_limitations": "1;74;51;2", "wc_review": "103;349;402;262", "wc_reply_reviewers": "0;0;10;0", "wc_reply_authors": "59;0;0;0", "reply_reviewers": "0;0;1;0", "reply_authors": "2;1;1;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 2.5, 0.8660254037844386 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 127.25, 79.6692381035491 ], "wc_strengths_avg": [ 30.75, 26.93858756505248 ], "wc_weaknesses_avg": [ 67.25, 56.51271980713722 ], "wc_questions_avg": [ 21.75, 28.32291475113393 ], "wc_limitations_avg": [ 32.0, 31.567388235329194 ], "wc_review_avg": [ 279.0, 113.24089367361951 ], "wc_reply_reviewers_avg": [ 2.5, 4.330127018922194 ], "wc_reply_authors_avg": [ 14.75, 25.54774941164094 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.9428090415820632, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:s0Taifznc0MJ:scholar.google.com/&scioq=Optimizing+over+Multiple+Distributions+under+Generalized+Quasar-Convexity+Condition&hl=en&as_sdt=0,14", "gs_version_total": 3, "email": "pku.edu.cn;;fudan.edu.cn;pku.edu.cn", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Peking University;Fudan University", "aff_unique_dep": ";", "aff_unique_url": "http://www.pku.edu.cn;https://www.fudan.edu.cn", "aff_unique_abbr": "Peking U;Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Contextual Linear Optimization with Bandit Feedback", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93834", "id": "lOdBHkqzRH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=lOdBHkqzRH", "openreview": "https://openreview.net/forum?id=lOdBHkqzRH", "poster": "/media/PosterPDFs/NeurIPS%202024/93834.png?t=1731368273.4414554", "project": "", "author_site": "Yichun Hu, Nathan Kallus, Xiaojie Mao, Yanchen Wu", "tldr": "", "abstract": "Contextual linear optimization (CLO) uses predictive contextual features to reduce uncertainty in random cost coefficients and thereby improve average-cost performance. An example is the stochastic shortest path problem with random edge costs (e.g., traffic) and contextual features (e.g., lagged traffic, weather). Existing work on CLO assumes the data has fully observed cost coefficient vectors, but in many applications, we can only see the realized cost of a historical decision, that is, just one projection of the random cost coefficient vector, to which we refer as bandit feedback. We study a class of offline learning algorithms for CLO with bandit feedback, which we term induced empirical risk minimization (IERM), where we fit a predictive model to directly optimize the downstream performance of the policy it induces. We show a fast-rate regret bound for IERM that allows for misspecified model classes and flexible choices of the optimization estimate, and we develop computationally tractable surrogate losses. A byproduct of our theory of independent interest is fast-rate regret bound for IERM with full feedback and misspecified policy class. We compare the performance of different modeling choices numerically using a stochastic shortest path example and provide practical insights from the empirical results.", "keywords": "Contextual stochastic optimization;Personalized decision making;Prescriptive analytics", "primary_area": "optimization", "supplementary_material": "", "author": "Yichun Hu;Nathan Kallus;Xiaojie Mao;Yanchen Wu", "authorids": "~Yichun_Hu1;~Nathan_Kallus1;~Xiaojie_Mao1;~Yanchen_Wu1", "gender": "F;;M;M", "homepage": "https://yichunhu.github.io/;http://nathankallus.com/;https://xiaojiemao.github.io/;https://yanchenwu2001.github.io/", "dblp": "248/8980;142/2900;222/3283;", "google_scholar": "PhL__SwAAAAJ;K2WfIlsAAAAJ;XtSSJm0AAAAJ;", "orcid": "0000-0002-5826-9665;0000-0003-1672-0507;;", "linkedin": "%E9%80%B8%E7%BA%AF-%E8%83%A1-636b49a9/;;;", "or_profile": "~Yichun_Hu1;~Nathan_Kallus1;~Xiaojie_Mao1;~Yanchen_Wu1", "aff": "Cornell University;Cornell University;Tsinghua University;Tsinghua University", "aff_domain": "cornell.edu;cornell.edu;tsinghua.edu.cn;mails.tsinghua.edu.cn", "position": "Assistant Professor;Associate Professor;Associate Professor;PhD student", "bibtex": "@inproceedings{\nhu2024contextual,\ntitle={Contextual Linear Optimization with Bandit Feedback},\nauthor={Yichun Hu and Nathan Kallus and Xiaojie Mao and Yanchen Wu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=lOdBHkqzRH}\n}", "github": "", "reviewers": "h4hQ;wdeG;d35g", "pdf_size": 489500, "rating": "6;6;8", "confidence": "3;1;4", "soundness": "3;3;4", "novelty": "3;3;4", "presentation": "3;2;3", "wc_summary": "53;184;92", "wc_strengths": "39;12;28", "wc_weaknesses": "87;108;118", "wc_questions": "1;1;71", "wc_limitations": "7;1;18", "wc_review": "187;306;327", "wc_reply_reviewers": "59;35;8", "wc_reply_authors": "55;0;0", "reply_reviewers": "2;1;1", "reply_authors": "2;1;1", "rating_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 2.6666666666666665, 1.247219128924647 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 109.66666666666667, 54.92014404771916 ], "wc_strengths_avg": [ 26.333333333333332, 11.08552609887726 ], "wc_weaknesses_avg": [ 104.33333333333333, 12.918548250050733 ], "wc_questions_avg": [ 24.333333333333332, 32.99831645537222 ], "wc_limitations_avg": [ 8.666666666666666, 7.039570693980959 ], "wc_review_avg": [ 273.3333333333333, 61.64594246357357 ], "wc_reply_reviewers_avg": [ 34.0, 20.83266665599966 ], "wc_reply_authors_avg": [ 18.333333333333332, 25.927248643506744 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.7559289460184545, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Fx-XJJYD_FUJ:scholar.google.com/&scioq=Contextual+Linear+Optimization+with+Bandit+Feedback&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "cornell.edu;cornell.edu;tsinghua.edu.cn;mails.tsinghua.edu.cn", "author_num": 4, "aff_unique_index": "0;0;1;1", "aff_unique_norm": "Cornell University;Tsinghua University", "aff_unique_dep": ";", "aff_unique_url": "https://www.cornell.edu;https://www.tsinghua.edu.cn", "aff_unique_abbr": "Cornell;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1", "aff_country_unique": "United States;China" }, { "title": "SPEAR: Exact Gradient Inversion of Batches in Federated Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93833", "id": "lPDxPVS6ix", "proceeding": "", "pdf": "https://openreview.net/pdf?id=lPDxPVS6ix", "openreview": "https://openreview.net/forum?id=lPDxPVS6ix", "poster": "/media/PosterPDFs/NeurIPS%202024/93833.png?t=1733852004.3334253", "project": "", "author_site": "Dimitar I. Dimitrov, Maximilian Baader, Mark M\u00fcller, Martin Vechev", "tldr": "", "abstract": "Federated learning is a framework for collaborative machine learning where clients only share gradient updates and not their private data with a server. However, it was recently shown that gradient inversion attacks can reconstruct this data from the shared gradients. In the important honest-but-curious setting, existing attacks enable exact reconstruction only for batch size of $b=1$, with larger batches permitting only approximate reconstruction. In this work, we propose SPEAR, *the first algorithm reconstructing whole batches with $b >1$ exactly*. SPEAR combines insights into the explicit low-rank structure of gradients with a sampling-based algorithm. Crucially, we leverage ReLU-induced gradient sparsity to precisely filter out large numbers of incorrect samples, making a final reconstruction step tractable. We provide an efficient GPU implementation for fully connected networks and show that it recovers high-dimensional ImageNet inputs in batches of up to $b \\lesssim 25$ exactly while scaling to large networks. Finally, we show theoretically that much larger batches can be reconstructed with high probability given exponential time.", "keywords": "Federated Learning;Exact Gradient Inversion;Gradient Leakage;Privacy;Attack", "primary_area": "privacy", "supplementary_material": "/attachment/425190129086719899d40db794d7ff0e4ecdaec9.zip", "author": "Dimitar Iliev Dimitrov;Maximilian Baader;Mark Niklas Mueller;Martin Vechev", "authorids": "~Dimitar_Iliev_Dimitrov2;~Maximilian_Baader1;~Mark_Niklas_Mueller2;~Martin_Vechev1", "gender": "M;;M;M", "homepage": "https://www.sri.inf.ethz.ch/people/dimitadi;https://www.sri.inf.ethz.ch/people/max;https://www.sri.inf.ethz.ch/people/mark;https://www.sri.inf.ethz.ch/people/martin", "dblp": "271/0915;249/8060;287/4254;93/2189.html", "google_scholar": "https://scholar.google.com/citations?hl=en;LKqCkWoAAAAJ;RBpmcCAAAAAJ;https://scholar.google.ch/citations?user=aZ1Rh50AAAAJ", "orcid": "0000-0001-9813-0900;0000-0002-9271-6422;0000-0002-2496-6542;", "linkedin": ";;mark-m%C3%BCller-8bb4b1140/;", "or_profile": "~Dimitar_Iliev_Dimitrov2;~Maximilian_Baader1;~Mark_Niklas_Mueller2;~Martin_Vechev1", "aff": "Swiss Federal Institute of Technology;ETH Zurich;Swiss Federal Institute of Technology;Swiss Federal Institute of Technology", "aff_domain": "ethz.ch;ethz.ch;ethz.ch;ethz.ch", "position": "PhD student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\ndimitrov2024spear,\ntitle={{SPEAR}: Exact Gradient Inversion of Batches in Federated Learning},\nauthor={Dimitar Iliev Dimitrov and Maximilian Baader and Mark Niklas Mueller and Martin Vechev},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=lPDxPVS6ix}\n}", "github": "", "reviewers": "LPF2;Xnvh;38cL;Brdw", "pdf_size": 2806096, "rating": "5;6;7;7", "confidence": "4;3;3;3", "soundness": "3;4;3;4", "novelty": "2;3;3;4", "presentation": "3;3;3;4", "wc_summary": "110;58;137;58", "wc_strengths": "76;48;89;47", "wc_weaknesses": "118;37;261;34", "wc_questions": "43;86;2;25", "wc_limitations": "4;7;2;2", "wc_review": "351;236;491;166", "wc_reply_reviewers": "0;0;127;0", "wc_reply_authors": "0;0;282;0", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;2;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 90.75, 34.11286414243166 ], "wc_strengths_avg": [ 65.0, 18.096961070853858 ], "wc_weaknesses_avg": [ 112.5, 92.12084454671484 ], "wc_questions_avg": [ 39.0, 30.78148794324277 ], "wc_limitations_avg": [ 3.75, 2.0463381929681126 ], "wc_review_avg": [ 311.0, 123.1361035602475 ], "wc_reply_reviewers_avg": [ 31.75, 54.99261314031185 ], "wc_reply_authors_avg": [ 70.5, 122.10958193360585 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16637009199350623198&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "ethz.ch;ethz.ch;ethz.ch;ethz.ch", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Swiss Federal Institute of Technology;ETH Zurich", "aff_unique_dep": ";", "aff_unique_url": "https://www.ethz.ch;https://www.ethz.ch", "aff_unique_abbr": "ETH Zurich;ETHZ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Switzerland" }, { "title": "The Selective $G$-Bispectrum and its Inversion: Applications to $G$-Invariant Networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93832", "id": "lPTWdyIY4O", "proceeding": "", "pdf": "https://openreview.net/pdf?id=lPTWdyIY4O", "openreview": "https://openreview.net/forum?id=lPTWdyIY4O", "poster": "/media/PosterPDFs/NeurIPS%202024/93832.png?t=1731348691.515012", "project": "", "author_site": "Simon Mataigne, Johan Mathe, Sophia Sanborn, Christopher Hillar, Nina Miolane", "tldr": "", "abstract": "An important problem in signal processing and deep learning is to achieve *invariance* to nuisance factors not relevant for the task. Since many of these factors are describable as the action of a group $G$ (e.g. rotations, translations, scalings), we want methods to be $G$-invariant. The $G$-Bispectrum extracts every characteristic of a given signal up to group action: for example, the shape of an object in an image, but not its orientation. Consequently, the $G$-Bispectrum has been incorporated into deep neural network architectures as a computational primitive for $G$-invariance\\textemdash akin to a pooling mechanism, but with greater selectivity and robustness. However, the computational cost of the $G$-Bispectrum ($\\mathcal{O}(|G|^2)$, with $|G|$ the size of the group) has limited its widespread adoption. Here, we show that the $G$-Bispectrum computation contains redundancies that can be reduced into a *selective $G$-Bispectrum* with $\\mathcal{O}(|G|)$ complexity. We prove desirable mathematical properties of the selective $G$-Bispectrum and demonstrate how its integration in neural networks enhances accuracy and robustness compared to traditional approaches, while enjoying considerable speeds-up compared to the full $G$-Bispectrum.", "keywords": "CNN;group invariance;bispectrum;Neural Network;AI", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Simon Mataigne;Johan Mathe;Sophia Sanborn;Christopher Hillar;Nina Miolane", "authorids": "~Simon_Mataigne1;~Johan_Mathe1;~Sophia_Sanborn1;~Christopher_Hillar3;~Nina_Miolane2", "gender": "M;M;F;;", "homepage": "https://uclouvain.be/fr/repertoires/simon.mataigne;http://johmathe.name;https://sophiasanborn.com;;https://www.ece.ucsb.edu/people/faculty/nina-miolane", "dblp": ";;212/5137;;", "google_scholar": ";https://scholar.google.fr/citations?user=_OZC7N8AAAAJ;tCUvC4oAAAAJ;;", "orcid": ";;0000-0002-1957-7067;;", "linkedin": ";johmathe/;sophia-sanborn;;", "or_profile": "~Simon_Mataigne1;~Johan_Mathe1;~Sophia_Sanborn1;~Christopher_Hillar3;~Nina_Miolane2", "aff": "UCLouvain;;Science;;University of California, Santa Barbara", "aff_domain": "uclouvain.be;;science.xyz;;ucsb.edu", "position": "PhD student;;Researcher;;Assistant Professor", "bibtex": "@inproceedings{\nmataigne2024the,\ntitle={The Selective \\$G\\$-Bispectrum and its Inversion: Applications to \\$G\\$-Invariant Networks},\nauthor={Simon Mataigne and Johan Mathe and Sophia Sanborn and Christopher Hillar and Nina Miolane},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=lPTWdyIY4O}\n}", "github": "", "reviewers": "Wppf;ZVp5;UtSC;P7yz", "pdf_size": 1920953, "rating": "4;5;5;7", "confidence": "5;4;3;3", "soundness": "2;2;3;3", "novelty": "3;2;3;3", "presentation": "3;3;3;2", "wc_summary": "77;63;121;136", "wc_strengths": "44;160;157;173", "wc_weaknesses": "260;100;239;445", "wc_questions": "221;133;138;3", "wc_limitations": "28;1;1;34", "wc_review": "630;457;656;791", "wc_reply_reviewers": "153;0;0;122", "wc_reply_authors": "126;124;153;39", "reply_reviewers": "1;0;0;1", "reply_authors": "3;2;2;2", "rating_avg": [ 5.25, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 99.25, 30.13614938906429 ], "wc_strengths_avg": [ 133.5, 52.02163011671203 ], "wc_weaknesses_avg": [ 261.0, 122.74159849048732 ], "wc_questions_avg": [ 123.75, 77.98517487317702 ], "wc_limitations_avg": [ 16.0, 15.149257407543116 ], "wc_review_avg": [ 633.5, 118.82445034587789 ], "wc_reply_reviewers_avg": [ 68.75, 69.61815495975169 ], "wc_reply_authors_avg": [ 110.5, 42.83981792678396 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.7608859102526822, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:CN52rR6DKgIJ:scholar.google.com/&scioq=The+Selective+%24G%24-Bispectrum+and+its+Inversion:+Applications+to+%24G%24-Invariant+Networks&hl=en&as_sdt=0,31", "gs_version_total": 5, "email": "uclouvain.be;;science.xyz;;ucsb.edu", "author_num": 5, "aff_unique_index": "0;1;2", "aff_unique_norm": "Universit\u00e9 catholique de Louvain;Science;University of California, Santa Barbara", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uclouvain.be;;https://www.ucsb.edu", "aff_unique_abbr": "UCL;;UCSB", "aff_campus_unique_index": "1", "aff_campus_unique": ";Santa Barbara", "aff_country_unique_index": "0;2", "aff_country_unique": "Belgium;;United States" }, { "title": "Order-Independence Without Fine Tuning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93831", "id": "lQ45aR8L7D", "proceeding": "", "pdf": "https://openreview.net/pdf?id=lQ45aR8L7D", "openreview": "https://openreview.net/forum?id=lQ45aR8L7D", "poster": "", "project": "", "author_site": "Reid McIlroy-Young, Katrina Brown, Conlan Olson, Linjun Zhang, Cynthia Dwork", "tldr": "", "abstract": "The development of generative language models that can create long and coherent textual outputs via autoregression has lead to a proliferation of uses and a corresponding sweep of analyses as researches work to determine the limitations of this new paradigm. Unlike humans, these '*Large Language Models*' (LLMs) are highly sensitive to small changes in their inputs, leading to unwanted inconsistency in their behavior. One problematic inconsistency when LLMs are used to answer multiple-choice questions or analyze multiple inputs is *order dependency*: the output of an LLM can (and often does) change significantly when sub-sequences are swapped, despite both orderings being semantically identical. In this paper we present , a technique that *guarantees* the output of an LLM will not have order dependence on a specified set of sub-sequences. We show that this method *provably* eliminates order dependency, and that it can be applied to *any* transformer-based LLM to enable text generation that is unaffected by re-orderings. Delving into the implications of our method, we show that, despite our inputs being out of distribution, the impact on expected accuracy is small, where the expectation is over the order of uniformly chosen shuffling of the candidate responses, and usually significantly less in practice. Thus, can be used as a '*dropped-in*' method on fully trained models. Finally, we discuss how our method's success suggests that other strong guarantees can be obtained on LLM performance via modifying the input representations.\n\nCode is available at [github.com/reidmcy/set-based-prompting](https://github.com/reidmcy/set-based-prompting.).", "keywords": "LLMs;Multiple Choice Questions;Transformers;Positional Encodings;Modified Attention Mask", "primary_area": "generative_models", "supplementary_material": "/attachment/25d494ed09b901ee73225f77c01cd36ed85a5fea.zip", "author": "Reid McIlroy-Young;Katrina Brown;Conlan Olson;Linjun Zhang;Cynthia Dwork", "authorids": "~Reid_McIlroy-Young1;~Katrina_Brown1;~Conlan_Olson1;~Linjun_Zhang1;~Cynthia_Dwork2", "gender": "M;F;Non-Binary;M;F", "homepage": "https://reidmcy.com/;;;;https://dwork.seas.harvard.edu/", "dblp": "196/4704;;;;", "google_scholar": "https://scholar.google.ca/citations?user=7Tclf3kAAAAJ;;;TUAzs3sAAAAJ;", "orcid": "0000-0001-9104-4145;;0000-0001-5227-348X;;", "linkedin": ";katrina-brown-5096bb215/;;;", "or_profile": "~Reid_McIlroy-Young1;~Katrina_Brown1;~Conlan_Olson1;~Linjun_Zhang1;~Cynthia_Dwork2", "aff": "Harvard University;Harvard University;School of Engineering and Applied Sciences, Harvard University;Rutgers University;Harvard University", "aff_domain": "harvard.edu;harvard.edu;seas.harvard.edu;rutgers.edu;fas.harvard.edu", "position": "Postdoc;Undergrad student;Researcher;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nmcilroy-young2024orderindependence,\ntitle={Order-Independence Without Fine Tuning},\nauthor={Reid McIlroy-Young and Katrina Brown and Conlan Olson and Linjun Zhang and Cynthia Dwork},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=lQ45aR8L7D}\n}", "github": "", "reviewers": "CaKN;bKgM;gPKU", "pdf_size": 511513, "rating": "5;5;9", "confidence": "4;4;4", "soundness": "3;3;4", "novelty": "1;2;4", "presentation": "1;2;4", "wc_summary": "174;176;87", "wc_strengths": "30;53;77", "wc_weaknesses": "335;299;26", "wc_questions": "43;20;180", "wc_limitations": "33;6;6", "wc_review": "615;554;376", "wc_reply_reviewers": "0;0;138", "wc_reply_authors": "0;0;14", "reply_reviewers": "0;0;1", "reply_authors": "1;1;2", "rating_avg": [ 6.333333333333333, 1.8856180831641267 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 1.247219128924647 ], "presentation_avg": [ 2.3333333333333335, 1.247219128924647 ], "wc_summary_avg": [ 145.66666666666666, 41.49163235588057 ], "wc_strengths_avg": [ 53.333333333333336, 19.189117286165672 ], "wc_weaknesses_avg": [ 220.0, 137.96376335835436 ], "wc_questions_avg": [ 81.0, 70.63049388661152 ], "wc_limitations_avg": [ 15.0, 12.727922061357855 ], "wc_review_avg": [ 515.0, 101.3936224161395 ], "wc_reply_reviewers_avg": [ 46.0, 65.05382386916237 ], "wc_reply_authors_avg": [ 4.666666666666667, 6.599663291074443 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7218214403524366695&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "email": "harvard.edu;harvard.edu;seas.harvard.edu;rutgers.edu;fas.harvard.edu", "author_num": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Harvard University;Rutgers University", "aff_unique_dep": ";", "aff_unique_url": "https://www.harvard.edu;https://www.rutgers.edu", "aff_unique_abbr": "Harvard;Rutgers", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "D2R2: Diffusion-based Representation with Random Distance Matching for Tabular Few-shot Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93830", "id": "lS9e36lkxG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=lS9e36lkxG", "openreview": "https://openreview.net/forum?id=lS9e36lkxG", "poster": "/media/PosterPDFs/NeurIPS%202024/93830.png?t=1731223886.6952176", "project": "", "author_site": "Ruoxue Liu, Linjiajie Fang, Wenjia Wang, Bingyi Jing", "tldr": "", "abstract": "Tabular data is widely utilized in a wide range of real-world applications. The challenge of few-shot learning with tabular data stands as a crucial problem in both industry and academia, due to the high cost or even impossibility of annotating additional samples. However, the inherent heterogeneity of tabular features, combined with the scarcity of labeled data, presents a significant challenge in tabular few-shot classification. In this paper, we propose a novel approach named Diffusion-based Representation with Random Distance matching (D2R2) for tabular few-shot learning. D2R2 leverages the powerful expression ability of diffusion models to extract essential semantic knowledge crucial for denoising process. This semantic knowledge proves beneficial in few-shot downstream tasks. During the training process of our designed diffusion model, we introduce a random distance matching to preserve distance information in the embeddings, thereby improving effectiveness for classification. During the classification stage, we introduce an instance-wise iterative prototype scheme to improve performance by accommodating the multimodality of embeddings and increasing clustering robustness. Our experiments reveal the significant efficacy of D2R2 across various tabular few-shot learning benchmarks, demonstrating its state-of-the-art performance in this field.", "keywords": "Classification;Few-shot learning;Tabular data;Representation learning;Diffusion models;Self-supervised learning", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/cc9f2272a1cdaff658dc3a9c1dff800f2d4521c2.zip", "author": "Ruoxue Liu;Linjiajie Fang;Wenjia Wang;Bingyi Jing", "authorids": "~Ruoxue_Liu1;~Linjiajie_Fang1;~Wenjia_Wang2;~Bingyi_Jing1", "gender": "F;M;M;M", "homepage": "https://1234.com;;https://www.wenjia-w.com/;https://www.sustech.edu.cn/en/faculties/jingbing-yi.html", "dblp": ";;;15/8051", "google_scholar": ";;EKS1sO0AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0002-5021-5988;;0000-0002-8876-1570", "linkedin": ";;;", "or_profile": "~Ruoxue_Liu1;~Linjiajie_Fang1;~Wenjia_Wang2;~Bingyi_Jing1", "aff": "Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;HKUST (GZ);South University of Science and Technology", "aff_domain": "hkust.edu;ust.hk;hkust-gz.edu.cn;sustech.edu.cn", "position": "PhD student;PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nliu2024dr,\ntitle={D2R2: Diffusion-based Representation with Random Distance Matching for Tabular Few-shot Learning},\nauthor={Ruoxue Liu and Linjiajie Fang and Wenjia Wang and Bingyi Jing},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=lS9e36lkxG}\n}", "github": "", "reviewers": "m3dm;rjfR;93Kt;wij1", "pdf_size": 954938, "rating": "6;6;6;6", "confidence": "3;3;4;2", "soundness": "2;3;2;3", "novelty": "3;3;2;2", "presentation": "3;3;3;3", "wc_summary": "154;155;143;69", "wc_strengths": "40;33;124;43", "wc_weaknesses": "133;155;156;61", "wc_questions": "107;38;5;53", "wc_limitations": "16;14;17;7", "wc_review": "450;395;445;233", "wc_reply_reviewers": "20;55;31;32", "wc_reply_authors": "27;26;23;25", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 130.25, 35.67474596966319 ], "wc_strengths_avg": [ 60.0, 37.12815643147395 ], "wc_weaknesses_avg": [ 126.25, 38.77740966078059 ], "wc_questions_avg": [ 50.75, 36.82645109157275 ], "wc_limitations_avg": [ 13.5, 3.905124837953327 ], "wc_review_avg": [ 380.75, 87.97265200049388 ], "wc_reply_reviewers_avg": [ 34.5, 12.737739202856996 ], "wc_reply_authors_avg": [ 25.25, 1.479019945774904 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:LPxYX_wJuyIJ:scholar.google.com/&scioq=D2R2:+Diffusion-based+Representation+with+Random+Distance+Matching+for+Tabular+Few-shot+Learning&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "hkust.edu;ust.hk;hkust-gz.edu.cn;sustech.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Hong Kong University of Science and Technology;South University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.ust.hk;https://www.sustech.edu.cn", "aff_unique_abbr": "HKUST;SUSTech", "aff_campus_unique_index": "0;0;1", "aff_campus_unique": "Hong Kong SAR;Guangzhou;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Kangaroo: Lossless Self-Speculative Decoding for Accelerating LLMs via Double Early Exiting", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93829", "id": "lT3oc04mDp", "proceeding": "", "pdf": "https://openreview.net/pdf?id=lT3oc04mDp", "openreview": "https://openreview.net/forum?id=lT3oc04mDp", "poster": "", "project": "", "author_site": "Fangcheng Liu, Yehui Tang, Zhenhua Liu, Yunsheng Ni, Duyu Tang, Kai Han, Yunhe Wang", "tldr": "", "abstract": "Speculative decoding has demonstrated its effectiveness in accelerating the inference of large language models (LLMs) while maintaining an identical sampling distribution. However, the conventional approach of training separate draft model to achieve a satisfactory token acceptance rate can be costly and impractical. In this paper, we propose a novel self-speculative decoding framework \\emph{Kangaroo} with \\emph{double} early exiting strategy, which leverages the shallow sub-network and the \\texttt{LM Head} of the well-trained target LLM to construct a self-drafting model. Then, the self-verification stage only requires computing the remaining layers over the \\emph{early-exited} hidden states in parallel. To bridge the representation gap between the sub-network and the full model, we train a lightweight and efficient adapter module on top of the sub-network. One significant challenge that comes with the proposed method is that the inference latency of the self-draft model may no longer be negligible compared to the big model. To boost the token acceptance rate while minimizing the latency of the self-drafting model, we introduce an additional \\emph{early exiting} mechanism for both single-sequence and the tree decoding scenarios. Specifically, we dynamically halt the small model's subsequent prediction during the drafting phase once the confidence level for the current step falls below a certain threshold. This approach reduces unnecessary computations and improves overall efficiency. Extensive experiments on multiple benchmarks demonstrate our effectiveness, where Kangaroo achieves walltime speedups up to 2.04$\\times$, outperforming Medusa-1 with 88.7\\% fewer additional parameters. The code for Kangaroo is available at https://github.com/Equationliu/Kangaroo.", "keywords": "speculative decoding;LLMs;self-drafting;early exiting", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Fangcheng Liu;Yehui Tang;Zhenhua Liu;Yunsheng Ni;Duyu Tang;Kai Han;Yunhe Wang", "authorids": "~Fangcheng_Liu1;~Yehui_Tang1;~Zhenhua_Liu2;~Yunsheng_Ni1;~Duyu_Tang3;~Kai_Han2;~Yunhe_Wang1", "gender": "M;M;M;Not Specified;M;M;M", "homepage": "https://scholar.google.com/;;;https://niyunsheng.github.io/;;https://iamhankai.github.io;https://www.wangyunhe.site/", "dblp": ";244/9659;02/1825-3.html;;;51/4757-2;63/8217-1", "google_scholar": "https://scholar.google.com/;TkSZQ6gAAAAJ;bihqxP4AAAAJ;CSpaTt8AAAAJ;9uz-D-kAAAAJ;vThoBVcAAAAJ;https://scholar.google.com.sg/citations?user=isizOkYAAAAJ", "orcid": ";;;;;0000-0002-9761-2702;0000-0002-0142-509X", "linkedin": ";;;;;;", "or_profile": "~Fangcheng_Liu1;~Yehui_Tang1;~Zhenhua_Liu2;~Yunsheng_Ni1;~Duyu_Tang3;~Kai_Han2;~Yunhe_Wang1", "aff": ";Huawei Technologies Ltd.;Huawei Technologies Ltd.;;Huawei Technologies Ltd.;Huawei Noah's Ark Lab;Huawei Noah's Ark Lab", "aff_domain": ";huawei.com;huawei.com;;huawei.com;huawei.com;huawei.com", "position": ";Researcher;Researcher;;Researcher;Principal Researcher;Principal Researcher", "bibtex": "@inproceedings{\nliu2024kangaroo,\ntitle={Kangaroo: Lossless Self-Speculative Decoding for Accelerating {LLM}s via Double Early Exiting},\nauthor={Fangcheng Liu and Yehui Tang and Zhenhua Liu and Yunsheng Ni and Duyu Tang and Kai Han and Yunhe Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=lT3oc04mDp}\n}", "github": "", "reviewers": "J1nq;oUCN;xb3n;NQcw;Tuhh", "pdf_size": 747080, "rating": "4;5;6;6;7", "confidence": "5;3;4;3;4", "soundness": "3;4;3;3;3", "novelty": "2;3;3;2;3", "presentation": "2;3;3;3;4", "wc_summary": "68;102;42;100;82", "wc_strengths": "104;79;91;68;71", "wc_weaknesses": "203;28;89;173;23", "wc_questions": "473;27;5;75;81", "wc_limitations": "2;8;5;48;1", "wc_review": "850;244;232;464;258", "wc_reply_reviewers": "415;43;27;37;0", "wc_reply_authors": "1584;123;14;234;88", "reply_reviewers": "3;1;1;1;0", "reply_authors": "6;3;2;3;2", "rating_avg": [ 5.6, 1.0198039027185568 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 78.8, 22.22071105972984 ], "wc_strengths_avg": [ 82.6, 13.335666462535722 ], "wc_weaknesses_avg": [ 103.2, 73.64889680097048 ], "wc_questions_avg": [ 132.2, 172.7916664657182 ], "wc_limitations_avg": [ 12.8, 17.769637024992942 ], "wc_review_avg": [ 409.6, 236.16062330541052 ], "wc_reply_reviewers_avg": [ 104.4, 155.99692304657808 ], "wc_reply_authors_avg": [ 408.6, 591.9647286789983 ], "reply_reviewers_avg": [ 1.2, 0.9797958971132713 ], "reply_authors_avg": [ 3.2, 1.469693845669907 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.36689969285267154, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5796169666297707641&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 2, "email": ";huawei.com;huawei.com;;huawei.com;huawei.com;huawei.com", "author_num": 7, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Huawei", "aff_unique_dep": "Huawei Technologies", "aff_unique_url": "https://www.huawei.com", "aff_unique_abbr": "Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "lTUXlmjcva", "title": "From Alexnet to Transformers: Measuring the Non-linearity of Deep Neural Networks with Affine Optimal Transport", "track": "main", "status": "Reject", "tldr": "", "abstract": "In the last decade, we have witnessed the introduction of several novel deep neural network (DNN) architectures exhibiting ever-increasing performance across diverse tasks. Explaining the upward trend of their performance, however, remains difficult as different DNN architectures of comparable depth and width -- common factors associated with their expressive power -- may exhibit a drastically different performance even when trained on the same dataset. In this paper, we introduce the concept of the non-linearity signature of DNN, the first theoretically sound solution for approximately measuring the non-linearity of deep neural networks. Built upon a score derived from closed-form optimal transport mappings, this signature provides a better understanding of the inner workings of a wide range of DNN architectures and learning paradigms, with a particular emphasis on the computer vision task. We provide extensive experimental results that highlight the practical usefulness of the proposed non-linearity signature and its potential for long-reaching implications.", "keywords": "deep neural networks;optimal transport;activation functions", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/26b4f99a15f2452e28144525adaed9e311b34c7d.zip", "author": "Quentin Bouniot;Ievgen Redko;Anton Mallasto;Charlotte Laclau;Oliver Struckmeier;Karol Arndt;Markus Heinonen;Ville Kyrki;Samuel Kaski", "authorids": "~Quentin_Bouniot1;~Ievgen_Redko2;~Anton_Mallasto2;~Charlotte_Laclau2;~Oliver_Struckmeier1;~Karol_Arndt1;~Markus_Heinonen1;~Ville_Kyrki1;~Samuel_Kaski1", "gender": "M;;M;F;M;M;M;;M", "homepage": "https://qbouniot.github.io/;;;https://laclauc.github.io/index.html;;;https://users.aalto.fi/~heinom10/;https://irobotics.aalto.fi;https://people.aalto.fi/samuel.kaski", "dblp": "271/7069;150/3980;;153/2640;;;22/7709;07/2806;64/5826", "google_scholar": "https://scholar.google.com/citations?hl=fr;https://scholar.google.fr/citations?user=qJ1-XewAAAAJ;OENVuJ4AAAAJ;https://scholar.google.fr/citations?user=47i5TpcAAAAJ;https://scholar.google.fi/citations?user=TSZpN5gAAAAJ;https://scholar.google.fr/citations?user=yBxCckoAAAAJ;hFtfHZoAAAAJ;8OBnyXQAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-0982-372X;;;;0000-0003-4536-3190;;;;0000-0003-1925-9154", "linkedin": "quentin-bouniot/;;;;oliverstruckmeier/;;;;samuel-kaski-27790/", "or_profile": "~Quentin_Bouniot1;~Ievgen_Redko2;~Anton_Mallasto2;~Charlotte_Laclau2;~Oliver_Struckmeier1;~Karol_Arndt1;~Markus_Heinonen1;~Ville_Kyrki1;~Samuel_Kaski1", "aff": "T\u00e9l\u00e9com ParisTech;Huawei Technologies Ltd.;Smartly;T\u00e9lecom Paris;Aalto University;Nomagic;Aalto University;Aalto University;Aalto University", "aff_domain": "telecom-paristech.fr;huawei.com;smartly.io;telecom-paris.fr;aalto.fi;nomagic.ai;aalto.fi;aalto.fi;aalto.fi", "position": "Postdoc;Principal Researcher;Senior Data Scientist;Associate Professor;PhD student;Researcher;Researcher;Full Professor;Full Professor", "bibtex": "@misc{\nanonymous2024from,\ntitle={From Alexnet to Transformers: Measuring the Non-linearity of Deep Neural Networks with Affine Optimal Transport},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=lTUXlmjcva}\n}", "github": "", "project": "", "reviewers": "ctip;8vYn;2p6h", "site": "https://openreview.net/forum?id=lTUXlmjcva", "pdf_size": 8965663, "rating": "4;7;7", "confidence": "4;2;4", "soundness": "2;3;4", "novelty": "1;3;4", "presentation": "2;4;3", "wc_summary": "53;146;156", "wc_strengths": "30;37;177", "wc_weaknesses": "317;120;149", "wc_questions": "5;68;15", "wc_limitations": "143;3;9", "wc_review": "548;374;506", "wc_reply_reviewers": "89;85;0", "wc_reply_authors": "224;29;0", "reply_reviewers": "1;1;0", "reply_authors": "2;2;1", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.6666666666666665, 1.247219128924647 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 118.33333333333333, 46.37767662236745 ], "wc_strengths_avg": [ 81.33333333333333, 67.70688459988557 ], "wc_weaknesses_avg": [ 195.33333333333334, 86.84213007265284 ], "wc_questions_avg": [ 29.333333333333332, 27.644569488820444 ], "wc_limitations_avg": [ 51.666666666666664, 64.62885492478487 ], "wc_review_avg": [ 476.0, 74.13501197140255 ], "wc_reply_reviewers_avg": [ 58.0, 41.04469108991645 ], "wc_reply_authors_avg": [ 84.33333333333333, 99.46635388690768 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.5000000000000001, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9973804074502519971&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;2;3;4;5;4;4;4", "aff_unique_norm": "T\u00e9l\u00e9com ParisTech;Huawei;Smartly;T\u00e9l\u00e9com Paris;Aalto University;Nomagic", "aff_unique_dep": ";Huawei Technologies;;;;", "aff_unique_url": "https://www.telecom-paristech.fr;https://www.huawei.com;;https://www.telecom-paris.fr;https://www.aalto.fi;", "aff_unique_abbr": "TP;Huawei;;T\u00e9l\u00e9com Paris;Aalto;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;3;3;3;3", "aff_country_unique": "France;China;;Finland" }, { "title": "Listenable Maps for Zero-Shot Audio Classifiers", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93828", "id": "lV1wGHKd5x", "proceeding": "", "pdf": "https://openreview.net/pdf?id=lV1wGHKd5x", "openreview": "https://openreview.net/forum?id=lV1wGHKd5x", "poster": "/media/PosterPDFs/NeurIPS%202024/93828.png?t=1733784786.0388582", "project": "", "author_site": "Francesco Paissan, Luca Della Libera, Mirco Ravanelli, Cem Subakan", "tldr": "", "abstract": "Interpreting the decisions of deep learning models, including audio classifiers, is crucial for ensuring the transparency and trustworthiness of this technology. In this paper, we introduce LMAC-ZS (Listenable Maps for Zero-Shot Audio Classifiers), which, to the best of our knowledge, is the first decoder-based post-hoc explanation method for explaining the decisions of zero-shot audio classifiers. The proposed method utilizes a novel loss function that aims to closely reproduce the original similarity patterns between text-and-audio pairs in the generated explanations. We provide an extensive evaluation using the Contrastive Language-Audio Pretraining (CLAP) model to showcase that our interpreter remains faithful to the decisions in a zero-shot classification context. Moreover, we qualitatively show that our method produces meaningful explanations that correlate well with different text prompts.", "keywords": "Zero shot audio classifiers;Posthoc explanations", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Francesco Paissan;Luca Della Libera;Mirco Ravanelli;Cem Subakan", "authorids": "~Francesco_Paissan1;~Luca_Della_Libera1;~Mirco_Ravanelli1;~Cem_Subakan1", "gender": "M;M;;M", "homepage": "https://francescopaissan.it;https://github.com/lucadellalib;https://ycemsubakan.github.io/;https://sites.google.com/site/mircoravanelli/", "dblp": "246/9581;;275/7062;138/0284", "google_scholar": "QJtF3yQAAAAJ;mylZOd4AAAAJ;zXzV-0UAAAAJ;-6Pj3IYAAAAJ", "orcid": "0000-0002-5553-7935;0000-0001-6088-2410;;", "linkedin": ";luca-della-libera;;mirco-ravanelli-489b692a/", "or_profile": "~Francesco_Paissan1;~Luca_Della_Libera1;~Cem_Subakan1;~Mirco_Ravanellu1", "aff": "Fondazione Bruno Kessler;Meta Facebook;Concordia University;Montreal Institute for Learning Algorithms, University of Montreal, University of Montreal", "aff_domain": "fbk.eu;meta.com;concordia.ca;mila.umontreal.ca", "position": "Researcher;Intern;Affiliate Assistant Professor;Postdoc", "bibtex": "@inproceedings{\npaissan2024listenable,\ntitle={Listenable Maps for Zero-Shot Audio Classifiers},\nauthor={Francesco Paissan and Luca Della Libera and Mirco Ravanelli and Cem Subakan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=lV1wGHKd5x}\n}", "github": "", "reviewers": "DqWc;vLBn;Tsn7", "pdf_size": 4280325, "rating": "5;6;6", "confidence": "4;4;4", "soundness": "3;3;3", "novelty": "2;3;2", "presentation": "3;3;1", "wc_summary": "86;95;83", "wc_strengths": "67;85;75", "wc_weaknesses": "154;160;558", "wc_questions": "335;67;461", "wc_limitations": "12;6;184", "wc_review": "654;413;1361", "wc_reply_reviewers": "12;20;13", "wc_reply_authors": "28;0;10", "reply_reviewers": "1;1;1", "reply_authors": "2;1;2", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.9428090415820634 ], "wc_summary_avg": [ 88.0, 5.0990195135927845 ], "wc_strengths_avg": [ 75.66666666666667, 7.363574011458175 ], "wc_weaknesses_avg": [ 290.6666666666667, 189.04908239807872 ], "wc_questions_avg": [ 287.6666666666667, 164.29512740458523 ], "wc_limitations_avg": [ 67.33333333333333, 82.53214861831452 ], "wc_review_avg": [ 809.3333333333334, 402.3036443064147 ], "wc_reply_reviewers_avg": [ 15.0, 3.559026084010437 ], "wc_reply_authors_avg": [ 12.666666666666666, 11.585431464655176 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13862811543519782084&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "fbk.eu;meta.com;concordia.ca;mila.umontreal.ca", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Fondazione Bruno Kessler;Meta;Concordia University;University of Montreal", "aff_unique_dep": ";Meta Platforms, Inc.;;Montreal Institute for Learning Algorithms", "aff_unique_url": "https://www.fbk.eu;https://meta.com;https://www.concordia.ca;https://www.umontreal.ca", "aff_unique_abbr": "FBK;Meta;Concordia;UM", "aff_campus_unique_index": "1", "aff_campus_unique": ";Montreal", "aff_country_unique_index": "0;1;2;2", "aff_country_unique": "Italy;United States;Canada" }, { "title": "Model Fusion through Bayesian Optimization in Language Model Fine-Tuning", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93827", "id": "lV4kTHTgpJ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=lV4kTHTgpJ", "openreview": "https://openreview.net/forum?id=lV4kTHTgpJ", "poster": "", "project": "", "author_site": "Chaeyun Jang, Hyungi Lee, Jungtaek Kim, Juho Lee", "tldr": "", "abstract": "Fine-tuning pre-trained models for downstream tasks is a widely adopted technique known for its adaptability and reliability across various domains. Despite its conceptual simplicity, fine-tuning entails several troublesome engineering choices, such as selecting hyperparameters and determining checkpoints from an optimization trajectory. To tackle the difficulty of choosing the best model, one effective solution is model fusion, which combines multiple models in a parameter space. However, we observe a large discrepancy between loss and metric landscapes during the fine-tuning of pre-trained language models. Building on this observation, we introduce a novel model fusion technique that optimizes both the desired metric and loss through multi-objective Bayesian optimization. In addition, to effectively select hyperparameters, we establish a two-stage procedure by integrating Bayesian optimization processes into our framework. Experiments across various downstream tasks show considerable performance improvements using our Bayesian optimization-guided method.", "keywords": "fine-tuning;language model;bayesian optimization", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Chaeyun Jang;Hyungi Lee;Jungtaek Kim;Juho Lee", "authorids": "~Chaeyun_Jang1;~Hyungi_Lee1;~Jungtaek_Kim1;~Juho_Lee2", "gender": ";M;M;M", "homepage": "https://siml.kaist.ac.kr/;;https://jungtaekkim.github.io;https://juho.lee.github.io", "dblp": ";221/7959;31/3193-1;55/3410-1", "google_scholar": ";;KXNUYWgAAAAJ;Py4URJUAAAAJ", "orcid": ";;0000-0002-1905-1399;", "linkedin": ";hyungi-lee-a8b161149/;jungtaekkim;", "or_profile": "~Chaeyun_Jang1;~Hyungi_Lee1;~Jungtaek_Kim1;~Juho_Lee2", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;University of Pittsburgh;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.edu;kaist.ac.kr;pitt.edu;kaist.ac.kr", "position": "MS student;PhD student;Postdoc;Associate Professor", "bibtex": "@inproceedings{\njang2024model,\ntitle={Model Fusion through Bayesian Optimization in Language Model Fine-Tuning},\nauthor={Chaeyun Jang and Hyungi Lee and Jungtaek Kim and Juho Lee},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=lV4kTHTgpJ}\n}", "github": "", "reviewers": "ut18;JjKU;WU7w;csBg", "pdf_size": 2360090, "rating": "2;7;7;8", "confidence": "4;3;4;4", "soundness": "2;2;3;3", "novelty": "2;3;3;4", "presentation": "3;3;3;3", "wc_summary": "78;37;89;302", "wc_strengths": "20;16;161;88", "wc_weaknesses": "144;51;105;233", "wc_questions": "40;201;2;1", "wc_limitations": "144;1;10;1", "wc_review": "426;306;367;625", "wc_reply_reviewers": "0;33;25;232", "wc_reply_authors": "0;47;19;23", "reply_reviewers": "0;2;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 6.0, 2.345207879911715 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 126.5, 103.16128149649946 ], "wc_strengths_avg": [ 71.25, 59.191954689805605 ], "wc_weaknesses_avg": [ 133.25, 66.38665151971442 ], "wc_questions_avg": [ 61.0, 82.34379150852844 ], "wc_limitations_avg": [ 39.0, 60.73302231899875 ], "wc_review_avg": [ 431.0, 119.77270139727166 ], "wc_reply_reviewers_avg": [ 72.5, 92.888373868854 ], "wc_reply_authors_avg": [ 22.25, 16.723860200324566 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.24618298195866548, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Z8IKj9itK1IJ:scholar.google.com/&scioq=Model+Fusion+through+Bayesian+Optimization+in+Language+Model+Fine-Tuning&hl=en&as_sdt=0,5", "gs_version_total": 5, "email": "kaist.edu;kaist.ac.kr;pitt.edu;kaist.ac.kr", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;University of Pittsburgh", "aff_unique_dep": ";", "aff_unique_url": "https://www.kaist.ac.kr;https://www.pitt.edu", "aff_unique_abbr": "KAIST;Pitt", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "South Korea;United States" }, { "title": "Accelerated Regularized Learning in Finite N-Person Games", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93826", "id": "lW2zYQm0ox", "proceeding": "", "pdf": "https://openreview.net/pdf?id=lW2zYQm0ox", "openreview": "https://openreview.net/forum?id=lW2zYQm0ox", "poster": "", "project": "", "author_site": "Kyriakos Lotidis, Angeliki Giannou, Panayotis Mertikopoulos, Nicholas Bambos", "tldr": "", "abstract": "Motivated by the success of Nesterov's accelerated gradient algorithm for convex minimization problems, we examine whether it is possible to achieve similar performance gains in the context of online learning in games.\nTo that end, we introduce a family of accelerated learning methods, which we call \u201cfollow the accelerated leader\u201d (FTXL), and which incorporates the use of momentum within the general framework of regularized learning - and, in particular, the exponential / multiplicative weights algorithm and its variants.\nDrawing inspiration and techniques from the continuous-time analysis of Nesterov's algorithm, we show that FTXL converges locally to strict Nash equilibria at a superlinear rate, achieving in this way an exponential speed-up over vanilla regularized learning methods (which, by comparison, converge to strict equilibria at a geometric, linear rate).\nImportantly, the FTXL maintains its superlinear convergence rate in a broad range of feedback structures, from deterministic, full information models to stochastic, realization-based ones, and even bandit, payoff-based information, where players are only able to observe their individual realized payoffs.", "keywords": "game theory;Nash equilibrium;Nesterov's acceleration;regularized learning", "primary_area": "algorithmic_game_theory", "supplementary_material": "/attachment/cb955a5cd8536ddb4a5190a9ad887724c8b23c77.zip", "author": "Kyriakos Lotidis;Angeliki Giannou;Panayotis Mertikopoulos;Nicholas Bambos", "authorids": "~Kyriakos_Lotidis1;~Angeliki_Giannou1;~Panayotis_Mertikopoulos1;~Nicholas_Bambos1", "gender": ";F;M;M", "homepage": ";https://sites.google.com/view/angeliki-giannou/home;http://polaris.imag.fr/panayotis.mertikopoulos/;", "dblp": ";283/5898.html;49/6721;b/NicholasBambos", "google_scholar": ";;xsusqPYAAAAJ;", "orcid": ";;0000-0003-2026-9616;", "linkedin": ";;;", "or_profile": "~Kyriakos_Lotidis1;~Angeliki_Giannou1;~Panayotis_Mertikopoulos1;~Nicholas_Bambos1", "aff": ";University of Wisconsin - Madison;French National Center for Scientific Research;Stanford University", "aff_domain": ";wisc.edu;imag.fr;stanford.edu", "position": ";PhD student;Principal Researcher;Professor", "bibtex": "@inproceedings{\nlotidis2024accelerated,\ntitle={Accelerated Regularized Learning in Finite N-Person Games},\nauthor={Kyriakos Lotidis and Angeliki Giannou and Panayotis Mertikopoulos and Nicholas Bambos},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=lW2zYQm0ox}\n}", "github": "", "reviewers": "yttq;sdz4;3QUL;qtGD", "pdf_size": 748888, "rating": "5;6;7;8", "confidence": "3;3;4;3", "soundness": "3;3;3;4", "novelty": "2;3;3;4", "presentation": "3;3;3;4", "wc_summary": "89;63;45;55", "wc_strengths": "74;56;87;131", "wc_weaknesses": "110;156;196;2", "wc_questions": "58;4;97;73", "wc_limitations": "11;10;1;22", "wc_review": "342;289;426;283", "wc_reply_reviewers": "16;21;10;0", "wc_reply_authors": "12;12;14;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 63.0, 16.30950643030009 ], "wc_strengths_avg": [ 87.0, 27.685736399814257 ], "wc_weaknesses_avg": [ 116.0, 72.5120679611332 ], "wc_questions_avg": [ 58.0, 34.13942003022313 ], "wc_limitations_avg": [ 11.0, 7.44983221287567 ], "wc_review_avg": [ 335.0, 57.336724705898575 ], "wc_reply_reviewers_avg": [ 11.75, 7.8222439235810075 ], "wc_reply_authors_avg": [ 9.5, 5.545268253204709 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.2581988897471611, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:yAiNWvrVPjgJ:scholar.google.com/&scioq=Accelerated+Regularized+Learning+in+Finite+N-Person+Games&hl=en&as_sdt=0,5", "gs_version_total": 8, "email": ";wisc.edu;imag.fr;stanford.edu", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Wisconsin-Madison;French National Center for Scientific Research;Stanford University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.wisc.edu;https://www.cnrs.fr;https://www.stanford.edu", "aff_unique_abbr": "UW-Madison;CNRS;Stanford", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Madison;;Stanford", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;France" }, { "title": "From Chaos to Clarity: 3DGS in the Dark", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93825", "id": "lWHe7pmk7C", "proceeding": "", "pdf": "https://openreview.net/pdf?id=lWHe7pmk7C", "openreview": "https://openreview.net/forum?id=lWHe7pmk7C", "poster": "", "project": "", "author_site": "Zhihao Li, Yufei Wang, Alex Kot, Bihan Wen", "tldr": "", "abstract": "Novel view synthesis from raw images provides superior high dynamic range (HDR) information compared to reconstructions from low dynamic range RGB images. However, the inherent noise in unprocessed raw images compromises the accuracy of 3D scene representation. Our study reveals that 3D Gaussian Splatting (3DGS) is particularly susceptible to this noise, leading to numerous elongated Gaussian shapes that overfit the noise, thereby significantly degrading reconstruction quality and reducing inference speed, especially in scenarios with limited views. To address these issues, we introduce a novel self-supervised learning framework designed to reconstruct HDR 3DGS from a limited number of noisy raw images. This framework enhances 3DGS by integrating a noise extractor and employing a noise-robust reconstruction loss that leverages a noise distribution prior. Experimental results show that our method outperforms LDR/HDR 3DGS and previous state-of-the-art (SOTA) self-supervised and supervised pre-trained models in both reconstruction quality and inference speed on the RawNeRF dataset across a broad range of training views. We will release the code upon paper acceptance.", "keywords": "Novel view synthesis; Raw images; 3D Gaussian Splatting (3DGS); Denosing; Self-supervised learning", "primary_area": "machine_vision", "supplementary_material": "", "author": "Zhihao Li;Yufei Wang;Alex Kot;Bihan Wen", "authorids": "~Zhihao_Li14;~Yufei_Wang5;~Alex_Kot1;~Bihan_Wen2", "gender": "M;M;;M", "homepage": "https://lizhihao6.github.io;https://github.com/wyf0912/;https://www.ntu.edu.sg/home/eackot/;https://personal.ntu.edu.sg/bihan.wen/", "dblp": ";;;158/9840", "google_scholar": "gWlYsj0AAAAJ;jLd1l_sAAAAJ;;ypkClpwAAAAJ", "orcid": "0000-0002-2066-8775;;;0000-0002-6874-6453", "linkedin": ";;;", "or_profile": "~Zhihao_Li14;~Yufei_Wang5;~Alex_Kot1;~Bihan_Wen2", "aff": "Nanyang Technological University;Nanyang Technological University;Nanyang Technological University;Nanyang Technological University", "aff_domain": "ntu.edu.sg;ntu.edu.sg;ntu.edu.sg;ntu.edu.sg", "position": "Researcher;PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nli2024from,\ntitle={From Chaos to Clarity: 3{DGS} in the Dark},\nauthor={Zhihao Li and Yufei Wang and Alex Kot and Bihan Wen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=lWHe7pmk7C}\n}", "github": "", "reviewers": "m8Xp;h6sN;MTeY;JuHQ;ZDPY", "pdf_size": 6740771, "rating": "3;5;6;6;6", "confidence": "4;4;3;3;3", "soundness": "2;3;3;3;3", "novelty": "2;2;3;2;3", "presentation": "2;4;3;3;3", "wc_summary": "96;59;122;64;38", "wc_strengths": "32;25;103;51;38", "wc_weaknesses": "300;65;64;86;15", "wc_questions": "3;6;4;38;2", "wc_limitations": "1;21;61;11;6", "wc_review": "432;176;354;250;99", "wc_reply_reviewers": "658;140;16;48;0", "wc_reply_authors": "1263;228;0;70;0", "reply_reviewers": "2;1;1;1;0", "reply_authors": "4;2;1;2;1", "rating_avg": [ 5.2, 1.16619037896906 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 75.8, 29.640512816076576 ], "wc_strengths_avg": [ 49.8, 27.93850389695196 ], "wc_weaknesses_avg": [ 106.0, 99.76171610392436 ], "wc_questions_avg": [ 10.6, 13.763720427268202 ], "wc_limitations_avg": [ 20.0, 21.540659228538015 ], "wc_review_avg": [ 262.2, 119.55985948469494 ], "wc_reply_reviewers_avg": [ 172.4, 247.59450720886358 ], "wc_reply_authors_avg": [ 312.2, 482.64040444206495 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.0, 1.0954451150103321 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.840168050416806, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13586258365943090305&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "ntu.edu.sg;ntu.edu.sg;ntu.edu.sg;ntu.edu.sg", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Nanyang Technological University", "aff_unique_dep": "", "aff_unique_url": "https://www.ntu.edu.sg", "aff_unique_abbr": "NTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Singapore" }, { "title": "Full-Distance Evasion of Pedestrian Detectors in the Physical World", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93824", "id": "lWYwZklSvg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=lWYwZklSvg", "openreview": "https://openreview.net/forum?id=lWYwZklSvg", "poster": "/media/PosterPDFs/NeurIPS%202024/93824.png?t=1729578641.1262453", "project": "", "author_site": "Zhi Cheng, Zhanhao Hu, Yuqiu Liu, Jianmin Li, Hang Su, Xiaolin Hu", "tldr": "", "abstract": "Many studies have proposed attack methods to generate adversarial patterns for evading pedestrian detection, alarming the computer vision community about the need for more attention to the robustness of detectors. However, adversarial patterns optimized by these methods commonly have limited performance at medium to long distances in the physical world. To overcome this limitation, we identify two main challenges. First, in existing methods, there is commonly an appearance gap between simulated distant adversarial patterns and their physical world counterparts, leading to incorrect optimization. Second, there exists a conflict between adversarial losses at different distances, which causes difficulties in optimization. To overcome these challenges, we introduce a Full Distance Attack (FDA) method. Our physical world experiments demonstrate the effectiveness of our FDA patterns across various detection models like YOLOv5, Deformable-DETR, and Mask RCNN. Codes available at https://github.com/zhicheng2T0/Full-Distance-Attack.git", "keywords": "Adversarial Attacks;Pedestrian Detection", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/0a5dfd787fc79b1e6009ad5c36fb5faa38b4e424.zip", "author": "Zhi Cheng;Zhanhao Hu;Yuqiu Liu;Jianmin Li;Hang Su;Xiaolin Hu", "authorids": "~Zhi_Cheng2;~Zhanhao_Hu1;~Yuqiu_Liu2;~Jianmin_Li1;~Hang_Su3;~Xiaolin_Hu1", "gender": "M;F;M;M;M;M", "homepage": "https://whothu.github.io/;;;http://www.xlhu.cn/;;", "dblp": "208/4826;314/6258;71/5930-1;60/6028-1;26/5371-6;154/6564", "google_scholar": ";;PeF1aPkAAAAJ;PksdgoUAAAAJ;dxN1_X0AAAAJ;", "orcid": "0000-0002-3746-1447;0009-0008-5708-4467;;0000-0002-4907-7354;;0000-0003-0890-7013", "linkedin": ";;;;;", "or_profile": "~Zhanhao_Hu1;~Yuqiu_Liu2;~Jianmin_Li1;~Xiaolin_Hu1;~Hang_Su2;~Zhi_Cheng1", "aff": "University of California, Berkeley;Beijing Forestry University;Tsinghua University;Tsinghua University;Tsinghua University;QiYuan Lab", "aff_domain": "berkeley.edu;bjfu.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;qiyuanlab.com", "position": "Postdoc;MS student;Associate Professor;Associate Professor;Associate Professor;Researcher", "bibtex": "@inproceedings{\ncheng2024fulldistance,\ntitle={Full-Distance Evasion of Pedestrian Detectors in the Physical World},\nauthor={Zhi Cheng and Zhanhao Hu and Yuqiu Liu and Jianmin Li and Hang Su and Xiaolin Hu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=lWYwZklSvg}\n}", "github": "", "reviewers": "ouf1;e7ej;vR4J", "pdf_size": 2903586, "rating": "5;5;6", "confidence": "5;3;5", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "101;52;52", "wc_strengths": "95;104;43", "wc_weaknesses": "382;20;258", "wc_questions": "7;69;5", "wc_limitations": "7;18;18", "wc_review": "592;263;376", "wc_reply_reviewers": "22;28;212", "wc_reply_authors": "31;14;446", "reply_reviewers": "1;1;2", "reply_authors": "2;2;3", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.333333333333333, 0.9428090415820634 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 68.33333333333333, 23.098821518760552 ], "wc_strengths_avg": [ 80.66666666666667, 26.88659310676771 ], "wc_weaknesses_avg": [ 220.0, 150.2087436425279 ], "wc_questions_avg": [ 27.0, 29.709706606876257 ], "wc_limitations_avg": [ 14.333333333333334, 5.185449728701348 ], "wc_review_avg": [ 410.3333333333333, 136.49012988328334 ], "wc_reply_reviewers_avg": [ 87.33333333333333, 88.1866706985182 ], "wc_reply_authors_avg": [ 163.66666666666666, 199.7604120495906 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8503978149545285407&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": "berkeley.edu;bjfu.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;qiyuanlab.com", "author_num": 6, "aff_unique_index": "0;1;2;2;2;3", "aff_unique_norm": "University of California, Berkeley;Beijing Forestry University;Tsinghua University;Qiyuan Lab", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.berkeley.edu;https://www.bfu.edu.cn;https://www.tsinghua.edu.cn;", "aff_unique_abbr": "UC Berkeley;BFU;THU;", "aff_campus_unique_index": "0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;1;1;1;1", "aff_country_unique": "United States;China;" }, { "title": "Fine-grained Analysis of In-context Linear Estimation: Data, Architecture, and Beyond", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93823", "id": "lYPAYmfQqm", "proceeding": "", "pdf": "https://openreview.net/pdf?id=lYPAYmfQqm", "openreview": "https://openreview.net/forum?id=lYPAYmfQqm", "poster": "", "project": "", "author_site": "Yingcong Li, Ankit Rawat, Samet Oymak", "tldr": "", "abstract": "Recent research has shown that Transformers with linear attention are capable of in-context learning (ICL) by implementing a linear estimator through gradient descent steps. However, the existing results on the optimization landscape apply under stylized settings where task and feature vectors are assumed to be IID and the attention weights are fully parameterized. In this work, we develop a stronger characterization of the optimization and generalization landscape of ICL through contributions on architectures, low-rank parameterization, and correlated designs: (1) We study the landscape of 1-layer linear attention and 1-layer H3, a state-space model. Under a suitable correlated design assumption, we prove that both implement 1-step preconditioned gradient descent. We show that thanks to its native convolution filters, H3 also has the advantage of implementing sample weighting and outperforming linear attention in suitable settings. (2) By studying correlated designs, we provide new risk bounds for retrieval augmented generation (RAG) and task-feature alignment which reveal how ICL sample complexity benefits from distributional alignment. (3) We derive the optimal risk for low-rank parameterized attention weights in terms of covariance spectrum. Through this, we also shed light on how LoRA can adapt to a new distribution by capturing the shift between task covariances. Experimental results corroborate our theoretical findings. Overall, this work explores the optimization and risk landscape of ICL in practically meaningful settings and contributes to a more thorough understanding of its mechanics.", "keywords": "In-context learning;linear attention;state-space model;optimization;RAG;LoRA", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Yingcong Li;Ankit Singh Rawat;Samet Oymak", "authorids": "~Yingcong_Li1;~Ankit_Singh_Rawat1;~Samet_Oymak2", "gender": ";M;M", "homepage": "https://yingcong-li.github.io/;https://ankitsrawat.github.io/home/;https://sota.engin.umich.edu/", "dblp": "244/4435;https://dblp.org/pers/hd/r/Rawat:Ankit_Singh;89/8771", "google_scholar": "9uWgjIUAAAAJ;http://scholar.google.com/citations?user=U0_ab4cAAAAJ;AY6InkoAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Yingcong_Li1;~Ankit_Singh_Rawat1;~Samet_Oymak1", "aff": "University of Michigan - Ann Arbor;Google;University of Michigan - Ann Arbor", "aff_domain": "umich.edu;google.com;umich.edu", "position": "PhD student;Research Scientist;Assistant Professor", "bibtex": "@inproceedings{\nli2024finegrained,\ntitle={Fine-grained Analysis of In-context Linear Estimation: Data, Architecture, and Beyond},\nauthor={Yingcong Li and Ankit Singh Rawat and Samet Oymak},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=lYPAYmfQqm}\n}", "github": "", "reviewers": "LuUw;HGXo;EE6j", "pdf_size": 595712, "rating": "5;6;7", "confidence": "3;4;4", "soundness": "3;3;2", "novelty": "1;3;3", "presentation": "3;3;3", "wc_summary": "46;97;179", "wc_strengths": "4;52;101", "wc_weaknesses": "89;35;140", "wc_questions": "227;156;50", "wc_limitations": "1;39;19", "wc_review": "367;379;489", "wc_reply_reviewers": "131;0;17", "wc_reply_authors": "227;0;27", "reply_reviewers": "1;0;1", "reply_authors": "2;1;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.9428090415820634 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 107.33333333333333, 54.786454124679 ], "wc_strengths_avg": [ 52.333333333333336, 39.60078562632593 ], "wc_weaknesses_avg": [ 88.0, 42.871902220452036 ], "wc_questions_avg": [ 144.33333333333334, 72.72933077896121 ], "wc_limitations_avg": [ 19.666666666666668, 15.520595635763755 ], "wc_review_avg": [ 411.6666666666667, 54.901932773102104 ], "wc_reply_reviewers_avg": [ 49.333333333333336, 58.16260730820867 ], "wc_reply_authors_avg": [ 84.66666666666667, 101.24667347073132 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6840613320008184886&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "umich.edu;google.com;umich.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Michigan;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.umich.edu;https://www.google.com", "aff_unique_abbr": "UM;Google", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Ann Arbor;Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "EMR-Merging: Tuning-Free High-Performance Model Merging", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93822", "id": "lYdjzx3DYu", "proceeding": "", "pdf": "https://openreview.net/pdf?id=lYdjzx3DYu", "openreview": "https://openreview.net/forum?id=lYdjzx3DYu", "poster": "/media/PosterPDFs/NeurIPS%202024/93822.png?t=1728714550.948095", "project": "", "author_site": "Chenyu Huang, Peng Ye, Tao Chen, Tong He, Xiangyu Yue, Wanli Ouyang", "tldr": "", "abstract": "The success of pretrain-finetune paradigm brings about the release of numerous model weights. In this case, merging models finetuned on different tasks to enable a single model with multi-task capabilities is gaining increasing attention for its practicability. Existing model merging methods usually suffer from (1) significant performance degradation or (2) requiring tuning by additional data or training. In this paper, we rethink and analyze the existing model merging paradigm. We discover that using a single model's weights can hardly simulate all the models' performance. To tackle this issue, we propose Elect, Mask & Rescale-Merging (EMR-Merging). We first (a) elect a unified model from all the model weights and then (b) generate extremely lightweight task-specific modulators, including masks and rescalers, to align the direction and magnitude between the unified model and each specific model, respectively. EMR-Merging is tuning-free, thus requiring no data availability or any additional training while showing impressive performance. We find that EMR-Merging shows outstanding performance compared to existing merging methods under different classical and newly-established settings, including merging different numbers of vision models (up to 30), NLP models, PEFT models, and multi-modal models.", "keywords": "Model Merging;Model Compression;Multi-task Learning;Supervised Finetuning", "primary_area": "optimization_for_deep_networks", "supplementary_material": "/attachment/f53c2db76033b5b3daebf83830c57f8abd6d0c84.zip", "author": "Chenyu Huang;Peng Ye;Tao Chen;Tong He;Xiangyu Yue;Wanli Ouyang", "authorids": "~Chenyu_Huang2;~Peng_Ye4;~Tao_Chen6;~Tong_He2;~Xiangyu_Yue1;~Wanli_Ouyang1", "gender": "M;M;M;M;M;", "homepage": ";;https://eetchen.github.io/;http://tonghe90.github.io/;http://xyue.io/;", "dblp": ";53/930-6;69/510-3;02/1554-1;207/7518;", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;UEZZP5QAAAAJ;https://scholar.google.com.sg/citations?user=w3OoFL0AAAAJ;kWADCMUAAAAJ;-xQ-C1sAAAAJ;", "orcid": "0000-0003-0100-8488;0000-0002-8486-7562;;0000-0003-2772-9320;;", "linkedin": ";;;;;", "or_profile": "~Chenyu_Huang2;~Peng_Ye4;~Tao_Chen6;~Tong_He2;~Xiangyu_Yue1;~Wanli_Ouyang1", "aff": "Southeast University;Fudan University;Fudan University;Shanghai AI lab;The Chinese University of Hong Kong;", "aff_domain": "seu.edu.cn;fudan.edu.cn;fudan.edu.cn;pjlab.org.cn;ie.cuhk.edu;", "position": "MS student;PhD student;Full Professor;Researcher;Assistant Professor;", "bibtex": "@inproceedings{\nhuang2024emrmerging,\ntitle={{EMR}-Merging: Tuning-Free High-Performance Model Merging},\nauthor={Chenyu Huang and Peng Ye and Tao Chen and Tong He and Xiangyu Yue and Wanli Ouyang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=lYdjzx3DYu}\n}", "github": "", "reviewers": "b5vt;Qs8J;S3RQ;Lqs2", "pdf_size": 25198467, "rating": "5;5;5;6", "confidence": "4;5;4;4", "soundness": "3;3;3;3", "novelty": "3;3;2;3", "presentation": "2;3;3;3", "wc_summary": "55;65;46;71", "wc_strengths": "74;28;60;54", "wc_weaknesses": "266;184;195;168", "wc_questions": "1;60;61;5", "wc_limitations": "1;6;58;1", "wc_review": "397;343;420;299", "wc_reply_reviewers": "165;0;0;0", "wc_reply_authors": "443;0;0;0", "reply_reviewers": "2;0;0;0", "reply_authors": "3;1;1;1", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 59.25, 9.54921462739214 ], "wc_strengths_avg": [ 54.0, 16.673332000533065 ], "wc_weaknesses_avg": [ 203.25, 37.47916087641237 ], "wc_questions_avg": [ 31.75, 28.78693279944913 ], "wc_limitations_avg": [ 16.5, 24.046829312822098 ], "wc_review_avg": [ 364.75, 47.14008379288268 ], "wc_reply_reviewers_avg": [ 41.25, 71.44709581221619 ], "wc_reply_authors_avg": [ 110.75, 191.82462693825315 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4124707410752300868&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "seu.edu.cn;fudan.edu.cn;fudan.edu.cn;pjlab.org.cn;ie.cuhk.edu;", "author_num": 6, "aff_unique_index": "0;1;1;2;3", "aff_unique_norm": "Southeast University;Fudan University;Shanghai AI Lab;Chinese University of Hong Kong", "aff_unique_dep": ";;AI Research;", "aff_unique_url": "https://www.seu.edu.cn/;https://www.fudan.edu.cn;https://www.shanghaiailab.com;https://www.cuhk.edu.hk", "aff_unique_abbr": "SEU;Fudan;Shanghai AI Lab;CUHK", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Deep Learning in Medical Image Registration: Magic or Mirage?", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93821", "id": "lZJ0WYI5YC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=lZJ0WYI5YC", "openreview": "https://openreview.net/forum?id=lZJ0WYI5YC", "poster": "/media/PosterPDFs/NeurIPS%202024/93821.png?t=1733252937.0650697", "project": "", "author_site": "Rohit Jena, Deeksha Sethi, Pratik Chaudhari, James Gee", "tldr": "", "abstract": "Classical optimization and learning-based methods are the two reigning paradigms in deformable image registration. While optimization-based methods boast generalizability across modalities and robust performance, learning-based methods promise peak performance, incorporating weak supervision and amortized optimization. However, the exact conditions for either paradigm to perform well over the other are shrouded and not explicitly outlined in the existing literature. In this paper, we make an explicit correspondence between the mutual information of the distribution of per-pixel intensity and labels, and the performance of classical registration methods. This strong correlation hints to the fact that architectural designs in learning-based methods is unlikely to affect this correlation, and therefore, the performance of learning-based methods. This hypothesis is thoroughly validated with state-of-the-art classical and learning-based methods. However, learning-based methods with weak supervision can perform high-fidelity intensity and label registration, which is not possible with classical methods. Next, we show that this high-fidelity feature learning does not translate to invariance to domain shift, and learning-based methods are sensitive to such changes in the data distribution. We reassess and recalibrate performance expectations from classical and DLIR methods under access to label supervision, training time, and its generalization capabilities under minor domain shifts.", "keywords": "image registration;image alignment;medical image registration;T1-weighed MRI;image alignment;deformable image registration;diffeomorphism;optimization;fairness;evaluation", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "/attachment/6cdfdae0ea7153dd31505b0c8cea3efa8b4b810e.zip", "author": "Rohit Jena;Deeksha Sethi;Pratik Chaudhari;James Gee", "authorids": "~Rohit_Jena1;~Deeksha_Sethi1;~Pratik_Chaudhari1;~James_Gee1", "gender": "M;F;M;M", "homepage": "https://jenaroh.it;;https://pratikac.github.io/;https://www.med.upenn.edu/apps/faculty/index.php/g5455356/p10656", "dblp": "239/8707;;;30/6904", "google_scholar": "kZQQFE4AAAAJ;P7nTj5EAAAAJ;c_z5hWEAAAAJ;https://scholar.google.com.tw/citations?user=fU8fmEIAAAAJ", "orcid": ";0000-0002-7937-6056;;", "linkedin": "rohitrango/;deeksha-sethi-887484a0/;pratik-chaudhari-59508765;", "or_profile": "~Rohit_Jena1;~Deeksha_Sethi1;~Pratik_Chaudhari1;~James_Gee1", "aff": "University of Pennsylvania;University of Pennsylvania;School of Engineering and Applied Science, University of Pennsylvania;University of Pennsylvania", "aff_domain": "upenn.edu;seas.upenn.edu;seas.upenn.edu;upenn.edu", "position": "PhD student;MS student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\njena2024deep,\ntitle={Deep Learning in Medical Image Registration: Magic or Mirage?},\nauthor={Rohit Jena and Deeksha Sethi and Pratik Chaudhari and James Gee},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=lZJ0WYI5YC}\n}", "github": "", "reviewers": "Xf1e;23gn;Aeaf;tuv2", "pdf_size": 506082, "rating": "4;4;5;6", "confidence": "5;4;3;3", "soundness": "3;3;3;4", "novelty": "1;2;3;3", "presentation": "3;2;3;3", "wc_summary": "23;90;37;76", "wc_strengths": "21;56;49;50", "wc_weaknesses": "102;808;48;121", "wc_questions": "48;16;12;13", "wc_limitations": "1;7;1;31", "wc_review": "195;977;147;291", "wc_reply_reviewers": "32;152;17;18", "wc_reply_authors": "0;0;33;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;2;1", "rating_avg": [ 4.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 56.5, 27.408940147331492 ], "wc_strengths_avg": [ 44.0, 13.546217184144066 ], "wc_weaknesses_avg": [ 269.75, 311.91054406672436 ], "wc_questions_avg": [ 22.25, 14.939461168328663 ], "wc_limitations_avg": [ 10.0, 12.36931687685298 ], "wc_review_avg": [ 402.5, 335.71528115353937 ], "wc_reply_reviewers_avg": [ 54.75, 56.45960945667265 ], "wc_reply_authors_avg": [ 8.25, 14.289419162443238 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8181818181818182, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1474415323525423662&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "upenn.edu;seas.upenn.edu;seas.upenn.edu;upenn.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Pennsylvania", "aff_unique_dep": "", "aff_unique_url": "https://www.upenn.edu", "aff_unique_abbr": "UPenn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Cascade Speculative Drafting for Even Faster LLM Inference", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93820", "id": "lZY9u0ijP7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=lZY9u0ijP7", "openreview": "https://openreview.net/forum?id=lZY9u0ijP7", "poster": "", "project": "", "author_site": "Ziyi Chen, Xiaocong Yang, Jiacheng Lin, Chenkai Sun, Kevin Chang, Jie Huang", "tldr": "", "abstract": "Introduced to enhance the efficiency of large language model (LLM) inference, speculative decoding operates by having a smaller model generate a draft. A larger target model then reviews this draft to align with its output, and any acceptance by the target model results in a reduction of the number of the target model runs, ultimately improving efficiency. However, the drafting process in speculative decoding includes slow autoregressive generation and allocates equal time to generating tokens, irrespective of their importance. These inefficiencies collectively contribute to the suboptimal performance of speculative decoding. To further improve LLM inference, we introduce Cascade Speculative Drafting (CS Drafting), a speculative execution algorithm that incorporates two types of cascades. The *Vertical Cascade* eliminates autoregressive generation from neural models, while the *Horizontal Cascade* optimizes time allocation in drafting for improved efficiency. Combining both cascades, CS Drafting achieves greater speedup compared to the baselines in our experiments, while preserving the same output distribution as the target model. Our code is publicly available at https://github.com/lfsszd/CS-Drafting.", "keywords": "Speculative Decoding;Large Language Models;Efficient NLP", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/c530e526cc568eb45379de7175a1c813758c01d4.zip", "author": "Ziyi Chen;Xiaocong Yang;Jiacheng Lin;Chenkai Sun;Kevin Chang;Jie Huang", "authorids": "~Ziyi_Chen8;~Xiaocong_Yang3;~Jiacheng_Lin3;~Chenkai_Sun1;~Kevin_Chang1;~Jie_Huang3", "gender": ";M;M;M;M;", "homepage": "https://www.linkedin.com/in/ziyi-c-7ab9a213a/;;https://linjc16.github.io/;https://chenkaisun.github.io/;https://siebelschool.illinois.edu/about/people/faculty/kcchang;https://jeffhj.github.io/", "dblp": ";;;251/9509;c/KCCChang;29/6643-9", "google_scholar": ";s0K3OecAAAAJ;https://scholar.google.com.tw/citations?user=h9tJLt8AAAAJ;ipzG4asAAAAJ;https://scholar.google.com.tw/citations?user=sugWZ6MAAAAJ;GIoPkMoAAAAJ", "orcid": ";;;0000-0002-7999-6933;0000-0003-0997-6803;", "linkedin": ";;;chenkaisun/;;jie-huang-4b0104151/", "or_profile": "~Ziyi_Chen8;~Xiaocong_Yang3;~Jiacheng_Lin3;~Chenkai_Sun1;~Kevin_Chang1;~Jie_Huang3", "aff": "University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;Department of Computer Science, University of Illinois;University of Illinois Urbana Champaign;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign", "aff_domain": "cs.illinois.edu;uiuc.edu;cs.illinois.edu;illinois.edu;illinois.edu;illinois.edu", "position": "MS student;MS student;PhD student;PhD student;Full Professor;PhD student", "bibtex": "@inproceedings{\nchen2024cascade,\ntitle={Cascade Speculative Drafting for Even Faster {LLM} Inference},\nauthor={Ziyi Chen and Xiaocong Yang and Jiacheng Lin and Chenkai Sun and Kevin Chang and Jie Huang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=lZY9u0ijP7}\n}", "github": "", "reviewers": "qvRP;FGcM;LRu7", "pdf_size": 433372, "rating": "5;6;6", "confidence": "4;3;3", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "2;3;2", "wc_summary": "63;41;79", "wc_strengths": "60;22;113", "wc_weaknesses": "109;54;183", "wc_questions": "69;1;5", "wc_limitations": "16;1;1", "wc_review": "317;119;381", "wc_reply_reviewers": "32;0;25", "wc_reply_authors": "23;34;34", "reply_reviewers": "1;0;1", "reply_authors": "2;2;2", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 61.0, 15.57776192739723 ], "wc_strengths_avg": [ 65.0, 37.31844941401862 ], "wc_weaknesses_avg": [ 115.33333333333333, 52.85409686633152 ], "wc_questions_avg": [ 25.0, 31.15552385479446 ], "wc_limitations_avg": [ 6.0, 7.0710678118654755 ], "wc_review_avg": [ 272.3333333333333, 111.52677805003704 ], "wc_reply_reviewers_avg": [ 19.0, 13.73559851869101 ], "wc_reply_authors_avg": [ 30.333333333333332, 5.185449728701348 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.9999999999999997, "gs_citation": 39, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6043476729851183426&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "cs.illinois.edu;uiuc.edu;cs.illinois.edu;illinois.edu;illinois.edu;illinois.edu", "author_num": 6, "aff_unique_index": "0;0;1;0;0;0", "aff_unique_norm": "University of Illinois Urbana-Champaign;University of Illinois", "aff_unique_dep": ";Department of Computer Science", "aff_unique_url": "https://illinois.edu;https://illinois.edu", "aff_unique_abbr": "UIUC;UIUC", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Urbana-Champaign;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "VISA: Variational Inference with Sequential Sample-Average Approximations", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93819", "id": "lbLC5OV9GY", "proceeding": "", "pdf": "https://openreview.net/pdf?id=lbLC5OV9GY", "openreview": "https://openreview.net/forum?id=lbLC5OV9GY", "poster": "", "project": "", "author_site": "Heiko Zimmermann, Christian Andersson Naesseth, Jan-Willem van de Meent", "tldr": "", "abstract": "We present variational inference with sequential sample-average approximations (VISA), a method for approximate inference in computationally intensive models, such as those based on numerical simulations. VISA extends importance-weighted forward-KL variational inference by employing a sequence of sample-average approximations, which are considered valid inside a trust region. This makes it possible to reuse model evaluations across multiple gradient steps, thereby reducing computational cost. We perform experiments on high-dimensional Gaussians, Lotka-Volterra dynamics, and a Pickover attractor, which demonstrate that VISA can achieve comparable approximation accuracy to standard importance-weighted forward-KL variational inference with computational savings of a factor two or more for conservatively chosen learning rates.", "keywords": "Variational Inference;Sample Average Approximations;Importance Sampling", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Heiko Zimmermann;Christian A. Naesseth;Jan-Willem van de Meent", "authorids": "~Heiko_Zimmermann1;~Christian_A._Naesseth1;~Jan-Willem_van_de_Meent1", "gender": ";M;M", "homepage": ";https://jwvdm.github.io/;https://naesseth.github.io/", "dblp": "96/10433;137/3263;146/0902", "google_scholar": "6etmkQYAAAAJ;CX9Lu38AAAAJ;GQ6rOssAAAAJ", "orcid": ";0000-0001-9465-5398;", "linkedin": ";;", "or_profile": "~Heiko_Zimmermann1;~Jan-Willem_van_de_Meent1;~Christian_A_Naesseth1", "aff": "University of Amsterdam;University of Amsterdam;University of Amsterdam", "aff_domain": "uva.nl;uva.nl;uva.nl", "position": "PhD student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nzimmermann2024visa,\ntitle={{VISA}: Variational Inference with Sequential Sample-Average Approximations},\nauthor={Heiko Zimmermann and Christian A. Naesseth and Jan-Willem van de Meent},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=lbLC5OV9GY}\n}", "github": "", "reviewers": "snbt;sEay;Bkjs;kZyU", "pdf_size": 1579024, "rating": "4;5;6;6", "confidence": "3;3;3;3", "soundness": "3;3;3;3", "novelty": "2;2;3;2", "presentation": "2;3;3;3", "wc_summary": "61;67;76;72", "wc_strengths": "29;27;56;29", "wc_weaknesses": "61;194;90;89", "wc_questions": "2;5;62;132", "wc_limitations": "13;38;10;11", "wc_review": "166;331;294;333", "wc_reply_reviewers": "49;225;15;104", "wc_reply_authors": "60;679;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 69.0, 5.612486080160912 ], "wc_strengths_avg": [ 35.25, 12.007809958522827 ], "wc_weaknesses_avg": [ 108.5, 50.71735403192876 ], "wc_questions_avg": [ 50.25, 52.9073482608985 ], "wc_limitations_avg": [ 18.0, 11.597413504743201 ], "wc_review_avg": [ 281.0, 68.18724220849528 ], "wc_reply_reviewers_avg": [ 98.25, 79.77272403522397 ], "wc_reply_authors_avg": [ 184.75, 286.40476165734395 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:gzxzEP_zYiUJ:scholar.google.com/&scioq=VISA:+Variational+Inference+with+Sequential+Sample-Average+Approximations&hl=en&as_sdt=0,44", "gs_version_total": 3, "email": "uva.nl;uva.nl;uva.nl", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Amsterdam", "aff_unique_dep": "", "aff_unique_url": "https://www.uva.nl", "aff_unique_abbr": "UvA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Netherlands" }, { "title": "Automated Multi-Task Learning for Joint Disease Prediction on Electronic Health Records", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93818", "id": "lbSI1j8m6p", "proceeding": "", "pdf": "https://openreview.net/pdf?id=lbSI1j8m6p", "openreview": "https://openreview.net/forum?id=lbSI1j8m6p", "poster": "", "project": "", "author_site": "Suhan Cui, Prasenjit Mitra", "tldr": "", "abstract": "In the realm of big data and digital healthcare, Electronic Health Records (EHR) have become a rich source of information with the potential to improve patient care and medical research. In recent years, machine learning models have proliferated for analyzing EHR data to predict patients' future health conditions. Among them, some studies advocate for multi-task learning (MTL) to jointly predict multiple target diseases for improving the prediction performance over single task learning. Nevertheless, current MTL frameworks for EHR data have significant limitations due to their heavy reliance on human experts to identify task groups for joint training and design model architectures. To reduce human intervention and improve the framework design, we propose an automated approach named AutoDP, which can search for the optimal configuration of task grouping and architectures simultaneously. To tackle the vast joint search space encompassing task combinations and architectures, we employ surrogate model-based optimization, enabling us to efficiently discover the optimal solution. Experimental results on real-world EHR data demonstrate the efficacy of the proposed AutoDP framework. It achieves significant performance improvements over both hand-crafted and automated state-of-the-art methods, also maintains a feasible search cost at the same time.", "keywords": "Electronic Health Records; Multi-Task Learning; Automated Machine Learning", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "/attachment/01ffa1d846e770181915f518521c3440cee3fa74.zip", "author": "Suhan Cui;Prasenjit Mitra", "authorids": "~Suhan_Cui1;~Prasenjit_Mitra1", "gender": "M;M", "homepage": ";http://www.personal.psu.edu/pum10/", "dblp": "294/0930;19/3308", "google_scholar": "BYh25MsAAAAJ;8PbgiPkAAAAJ", "orcid": ";", "linkedin": "%E8%8B%8F%E6%99%97-%E5%B4%94-aa067818b/;prasenjit-mitra-962471/", "or_profile": "~Suhan_Cui1;~Prasenjit_Mitra1", "aff": "Pennsylvania State University;Pennsylvania State University", "aff_domain": "psu.edu;psu.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\ncui2024automated,\ntitle={Automated Multi-Task Learning for Joint Disease Prediction on Electronic Health Records},\nauthor={Suhan Cui and Prasenjit Mitra},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=lbSI1j8m6p}\n}", "github": "", "reviewers": "23Pe;Zp9P;WEv3;S7e4;azWt", "pdf_size": 1162819, "rating": "1;4;5;5;8", "confidence": "5;3;4;2;4", "soundness": "1;2;3;3;4", "novelty": "1;3;3;3;3", "presentation": "1;2;3;2;3", "wc_summary": "1;100;50;48;42", "wc_strengths": "1;58;52;126;49", "wc_weaknesses": "22;74;57;373;14", "wc_questions": "1;131;70;1;15", "wc_limitations": "1;142;21;1;31", "wc_review": "26;505;250;549;151", "wc_reply_reviewers": "0;30;104;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;1;1;0;0", "reply_authors": "0;1;1;1;1", "rating_avg": [ 4.6, 2.244994432064365 ], "confidence_avg": [ 3.6, 1.019803902718557 ], "soundness_avg": [ 2.6, 1.019803902718557 ], "novelty_avg": [ 2.6, 0.8000000000000002 ], "presentation_avg": [ 2.2, 0.7483314773547882 ], "wc_summary_avg": [ 48.2, 31.473163171184428 ], "wc_strengths_avg": [ 57.2, 39.96698637625809 ], "wc_weaknesses_avg": [ 108.0, 134.32349012737868 ], "wc_questions_avg": [ 43.6, 50.56322774507181 ], "wc_limitations_avg": [ 39.2, 52.69686897719826 ], "wc_review_avg": [ 296.2, 201.85678091161566 ], "wc_reply_reviewers_avg": [ 26.8, 40.31079259950119 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 0.8, 0.4000000000000001 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.3319568649619408, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=981461861441023937&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "psu.edu;psu.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Pennsylvania State University", "aff_unique_dep": "", "aff_unique_url": "https://www.psu.edu", "aff_unique_abbr": "PSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Towards Universal Mesh Movement Networks", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93817", "id": "lcALCNF2qe", "proceeding": "", "pdf": "https://openreview.net/pdf?id=lcALCNF2qe", "openreview": "https://openreview.net/forum?id=lcALCNF2qe", "poster": "/media/PosterPDFs/NeurIPS%202024/93817.png?t=1731691609.735794", "project": "", "author_site": "Mingrui Zhang, Chunyang Wang, Stephan C. Kramer, Joseph G Wallwork, Siyi Li, Jiancheng Liu, Xiang Chen, Matthew Piggott", "tldr": "", "abstract": "Solving complex Partial Differential Equations (PDEs) accurately and efficiently is an essential and challenging problem in all scientific and engineering disciplines. Mesh movement methods provide the capability to improve the accuracy of the numerical solution without increasing the overall mesh degree of freedom count. Conventional sophisticated mesh movement methods are extremely expensive and struggle to handle scenarios with complex boundary geometries. However, existing learning-based methods require re-training from scratch given a different PDE type or boundary geometry, which limits their applicability, and also often suffer from robustness issues in the form of inverted elements. In this paper, we introduce the Universal Mesh Movement Network (UM2N), which -- once trained -- can be applied in a non-intrusive, zero-shot manner to move meshes with different size distributions and structures, for solvers applicable to different PDE types and boundary geometries. UM2N consists of a Graph Transformer (GT) encoder for extracting features and a Graph Attention Network (GAT) based decoder for moving the mesh. We evaluate our method on advection and Navier-Stokes based examples, as well as a real-world tsunami simulation case. Our method out-performs existing learning-based mesh movement methods in terms of the benchmarks described above. In comparison to the conventional sophisticated Monge-Amp\u00e8re PDE-solver based method, our approach not only significantly accelerates mesh movement, but also proves effective in scenarios where the conventional method fails. Our project page can be found at https://erizmr.github.io/UM2N/.", "keywords": "PDE;Physical Simulation;Mesh Adaptation;Physical Science", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "/attachment/eb2db1c1e9dd888a1deefbf1ce455329dd87338a.zip", "author": "Mingrui Zhang;Chunyang Wang;Stephan C. Kramer;Joseph Gregory Wallwork;Siyi Li;Jiancheng Liu;Xiang Chen;Matthew D Piggott", "authorids": "~Mingrui_Zhang4;~Chunyang_Wang1;~Stephan_C._Kramer1;~Joseph_Gregory_Wallwork1;~Siyi_Li6;~Jiancheng_Liu2;~Xiang_Chen8;~Matthew_D_Piggott1", "gender": "M;M;;M;M;M;;M", "homepage": "https://erizmr.github.io/;;https://www.imperial.ac.uk/people/s.kramer;https://jwallwork23.github.io/;;https://ljcc0930.github.io/;;https://www.imperial.ac.uk/people/m.d.piggott", "dblp": ";;;284/8889.html;;74/3002;;", "google_scholar": "IN75qZcAAAAJ;;ELR8CUYAAAAJ;7GdIt6YAAAAJ;L9NgwIEAAAAJ;ReWNzl4AAAAJ;2cj3OTIAAAAJ;https://scholar.google.co.uk/citations?user=TTt_pZMAAAAJ", "orcid": ";;0000-0002-9193-5092;0000-0002-3646-091X;;;;0000-0002-7526-6853", "linkedin": ";chunyang-wang-83135b201/;;joseph-wallwork;;;;", "or_profile": "~Mingrui_Zhang4;~Chunyang_Wang1;~Stephan_C._Kramer1;~Joseph_Gregory_Wallwork1;~Siyi_Li6;~Jiancheng_Liu2;~Xiang_Chen8;~Matthew_D_Piggott1", "aff": "Imperial College London;Imperial College London;Imperial College London;Met Office;Imperial College London;Michigan State University;Huawei Technologies Ltd.;", "aff_domain": "imperial.ac.uk;imperial.ac.uk;imperial.ac.uk;metoffice.gov.uk;imperial.ac.uk;msu.edu;huawei.com;", "position": "PhD student;PhD student;Researcher;Scientific Software Engineer;PhD student;MS student;Researcher;", "bibtex": "@inproceedings{\nzhang2024towards,\ntitle={Towards Universal Mesh Movement Networks},\nauthor={Mingrui Zhang and Chunyang Wang and Stephan C. Kramer and Joseph Gregory Wallwork and Siyi Li and Jiancheng Liu and Xiang Chen and Matthew D Piggott},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=lcALCNF2qe}\n}", "github": "", "reviewers": "xMNp;Jzyt;jEN5;yaVX", "pdf_size": 13952883, "rating": "5;6;6;6", "confidence": "4;3;2;3", "soundness": "2;3;3;4", "novelty": "4;3;3;3", "presentation": "4;3;3;3", "wc_summary": "91;47;98;66", "wc_strengths": "140;76;51;870", "wc_weaknesses": "219;63;151;1540", "wc_questions": "32;100;54;313", "wc_limitations": "1;25;3;1", "wc_review": "483;311;357;2790", "wc_reply_reviewers": "84;16;16;781", "wc_reply_authors": "109;26;12;749", "reply_reviewers": "1;1;1;4", "reply_authors": "2;2;2;3", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 75.5, 20.303940504246953 ], "wc_strengths_avg": [ 284.25, 339.7369386746163 ], "wc_weaknesses_avg": [ 493.25, 606.8666966476246 ], "wc_questions_avg": [ 124.75, 111.42121656129949 ], "wc_limitations_avg": [ 7.5, 10.136567466356647 ], "wc_review_avg": [ 985.25, 1043.8736453709328 ], "wc_reply_reviewers_avg": [ 224.25, 322.6363084031306 ], "wc_reply_authors_avg": [ 224.0, 305.36781100829864 ], "reply_reviewers_avg": [ 1.75, 1.299038105676658 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6034193084736039460&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 2, "email": "imperial.ac.uk;imperial.ac.uk;imperial.ac.uk;metoffice.gov.uk;imperial.ac.uk;msu.edu;huawei.com;", "author_num": 8, "aff_unique_index": "0;0;0;1;0;2;3", "aff_unique_norm": "Imperial College London;Met Office;Michigan State University;Huawei", "aff_unique_dep": ";;;Huawei Technologies", "aff_unique_url": "https://www.imperial.ac.uk;https://www.metoffice.gov.uk;https://www.msu.edu;https://www.huawei.com", "aff_unique_abbr": "ICL;Met Office;MSU;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;1;2", "aff_country_unique": "United Kingdom;United States;China" }, { "title": "Coherent 3D Scene Diffusion From a Single RGB Image", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93816", "id": "lckAdnVzsT", "proceeding": "", "pdf": "https://openreview.net/pdf?id=lckAdnVzsT", "openreview": "https://openreview.net/forum?id=lckAdnVzsT", "poster": "/media/PosterPDFs/NeurIPS%202024/93816.png?t=1733846014.1906593", "project": "", "author_site": "Manuel Dahnert, Angela Dai, Norman M\u00fcller, Matthias Niessner", "tldr": "", "abstract": "We present a novel diffusion-based approach for coherent 3D scene reconstruction from a single RGB image. \nOur method utilizes an image-conditioned 3D scene diffusion model to simultaneously denoise the 3D poses and geometries of all objects within the scene.\n\nMotivated by the ill-posed nature of the task and to obtain consistent scene reconstruction results, we learn a generative scene prior by conditioning on all scene objects simultaneously to capture scene context and by allowing the model to learn inter-object relationships throughout the diffusion process.\n\nWe further propose an efficient surface alignment loss to facilitate training even in the absence of full ground-truth annotation, which is common in publicly available datasets. This loss leverages an expressive shape representation, which enables direct point sampling from intermediate shape predictions.\n\nBy framing the task of single RGB image 3D scene reconstruction as a conditional diffusion process, our approach surpasses current state-of-the-art methods, achieving a 12.04\\% improvement in AP3D on SUN RGB-D and a 13.43\\% increase in F-Score on Pix3D.", "keywords": "Single RGB Image 3D Scene Reconstruction;Diffusion Models;Scene Understanding;3D Scene Prior", "primary_area": "machine_vision", "supplementary_material": "", "author": "Manuel Dahnert;Angela Dai;Norman M\u00fcller;Matthias Nie\u00dfner", "authorids": "~Manuel_Dahnert1;~Angela_Dai1;~Norman_M\u00fcller1;~Matthias_Nie\u00dfner2", "gender": "M;F;M;", "homepage": "https://manuel-dahnert.com;https://angeladai.github.io/;https://www.niessnerlab.org/members/norman_mueller/profile.html;https://niessnerlab.org/", "dblp": "230/7786;149/1202;34/3030;84/8221.html", "google_scholar": "R1O631AAAAAJ;g-tGztMAAAAJ;https://scholar.google.com/citations?hl=en;eUtEs6YAAAAJ", "orcid": "0000-0003-2855-3929;;;", "linkedin": ";;;", "or_profile": "~Manuel_Dahnert1;~Angela_Dai1;~Norman_M\u00fcller1;~Matthias_Niessner1", "aff": "Technical University Munich;Technical University of Munich;Meta;Technical University of Munich", "aff_domain": "tum.de;tum.de;meta.com;tum.de", "position": "PhD student;Assistant Professor;Researcher;Professor", "bibtex": "@inproceedings{\ndahnert2024coherent,\ntitle={Coherent 3D Scene Diffusion From a Single {RGB} Image},\nauthor={Manuel Dahnert and Angela Dai and Norman M{\\\"u}ller and Matthias Nie{\\ss}ner},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=lckAdnVzsT}\n}", "github": "", "reviewers": "U6My;W8nX;BXMX;scCV", "pdf_size": 7415877, "rating": "5;5;6;6", "confidence": "2;3;4;4", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "2;2;3;3", "wc_summary": "68;95;97;87", "wc_strengths": "69;15;73;87", "wc_weaknesses": "195;277;117;185", "wc_questions": "72;4;180;106", "wc_limitations": "11;6;1;15", "wc_review": "415;397;468;480", "wc_reply_reviewers": "11;23;11;32", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 86.75, 11.453711188955307 ], "wc_strengths_avg": [ 61.0, 27.386127875258307 ], "wc_weaknesses_avg": [ 193.5, 56.78688228807776 ], "wc_questions_avg": [ 90.5, 63.39361166552983 ], "wc_limitations_avg": [ 8.25, 5.261891294962297 ], "wc_review_avg": [ 440.0, 34.84967718645325 ], "wc_reply_reviewers_avg": [ 19.25, 8.842369591913696 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.9045340337332909, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17643248771151849700&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "tum.de;tum.de;meta.com;tum.de", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Technical University of Munich;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://www.tum.de;https://meta.com", "aff_unique_abbr": "TUM;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Germany;United States" }, { "title": "Optimal Aggregation of Prediction Intervals under Unsupervised Domain Shift", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93815", "id": "ldXyNSvXEr", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ldXyNSvXEr", "openreview": "https://openreview.net/forum?id=ldXyNSvXEr", "poster": "/media/PosterPDFs/NeurIPS%202024/93815.png?t=1729431029.7983313", "project": "", "author_site": "Jiawei Ge, Debarghya Mukherjee, Jianqing Fan", "tldr": "", "abstract": "As machine learning models are increasingly deployed in dynamic environments, it becomes paramount to assess and quantify uncertainties associated with distribution shifts.\nA distribution shift occurs when the underlying data-generating process changes, leading to a deviation in the model's performance. \nThe prediction interval, which captures the range of likely outcomes for a given prediction, serves as a crucial tool for characterizing uncertainties induced by their underlying distribution. \nIn this paper, we propose methodologies for aggregating prediction intervals to obtain one with minimal width and adequate coverage on the target domain under unsupervised domain shift, under which we have labeled samples from a related source domain and unlabeled covariates from the target domain.\nOur analysis encompasses scenarios where the source and the target domain are related via i) a bounded density ratio, and ii) a measure-preserving transformation.\nOur proposed methodologies are computationally efficient and easy to implement. Beyond illustrating the performance of our method through real-world datasets, we also delve into the theoretical details. This includes establishing rigorous theoretical guarantees, coupled with finite sample bounds, regarding the coverage and width of our prediction intervals. Our approach excels in practical applications and is underpinned by a solid theoretical framework, ensuring its reliability and effectiveness across diverse contexts.", "keywords": "prediction intervals aggregation;unsupervised domain shift;minimal width;adequate coverage;distribution shift", "primary_area": "learning_theory", "supplementary_material": "/attachment/8c78f83257f9fcda0a0d8e6f7cc878c3b8dad560.zip", "author": "Jiawei Ge;Debarghya Mukherjee;Jianqing Fan", "authorids": "~Jiawei_Ge3;~Debarghya_Mukherjee1;~Jianqing_Fan1", "gender": "F;M;M", "homepage": "https://jiaweige0416.github.io/research.html;https://debarghya-mukherjee.github.io;https://fan.princeton.edu", "dblp": "https://dblp.org/rec/journals/corr/abs-2303-01566;;33/2768", "google_scholar": ";https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0003-3250-7677", "linkedin": "jiawei-ge-b378491bb/;;", "or_profile": "~Jiawei_Ge3;~Debarghya_Mukherjee1;~Jianqing_Fan1", "aff": "Princeton University;Boston University, Boston University;Princeton University", "aff_domain": "princeton.edu;bu.edu;princeton.edu", "position": "PhD student;Assistant Professor;Professor", "bibtex": "@inproceedings{\nge2024optimal,\ntitle={Optimal Aggregation of Prediction Intervals under Unsupervised Domain Shift},\nauthor={Jiawei Ge and Debarghya Mukherjee and Jianqing Fan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ldXyNSvXEr}\n}", "github": "", "reviewers": "zRWP;6ToN;EZSx;88pD", "pdf_size": 879258, "rating": "5;5;6;6", "confidence": "3;4;2;4", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "3;3;3;2", "wc_summary": "81;61;73;119", "wc_strengths": "32;13;46;57", "wc_weaknesses": "69;332;77;690", "wc_questions": "27;79;166;2", "wc_limitations": "1;1;1;8", "wc_review": "210;486;363;876", "wc_reply_reviewers": "680;23;83;17", "wc_reply_authors": "1711;88;304;23", "reply_reviewers": "5;1;2;1", "reply_authors": "6;3;3;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 83.5, 21.696773953747133 ], "wc_strengths_avg": [ 37.0, 16.446884203398525 ], "wc_weaknesses_avg": [ 292.0, 252.96145951508106 ], "wc_questions_avg": [ 68.5, 62.77141068989927 ], "wc_limitations_avg": [ 2.75, 3.031088913245535 ], "wc_review_avg": [ 483.75, 246.67019986208305 ], "wc_reply_reviewers_avg": [ 200.75, 277.89600123067623 ], "wc_reply_authors_avg": [ 531.5, 688.8833355510932 ], "reply_reviewers_avg": [ 2.25, 1.6393596310755 ], "reply_authors_avg": [ 3.5, 1.5 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16599843083299334969&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "princeton.edu;bu.edu;princeton.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Princeton University;Boston University", "aff_unique_dep": ";", "aff_unique_url": "https://www.princeton.edu;https://www.bu.edu", "aff_unique_abbr": "Princeton;BU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Boston", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Pedestrian-Centric 3D Pre-collision Pose and Shape Estimation from Dashcam Perspective", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93814", "id": "ldvfaYzG35", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ldvfaYzG35", "openreview": "https://openreview.net/forum?id=ldvfaYzG35", "poster": "/media/PosterPDFs/NeurIPS%202024/93814.png?t=1732092701.7482152", "project": "", "author_site": "MeiJun Wang, Yu Meng, Zhongwei Qiu, Chao Zheng, Yan Xu, Pengxiaorui, Jian Gao", "tldr": "", "abstract": "Pedestrian pre-collision pose is one of the key factors to determine the degree of pedestrian-vehicle injury in collision. Human pose estimation algorithm is an effective method to estimate pedestrian emergency pose from accident video. However, the pose estimation model trained by the existing daily human pose datasets has poor robustness under specific poses such as pedestrian pre-collision pose, and it is difficult to obtain human pose datasets in the wild scenes, especially lacking scarce data such as pedestrian pre-collision pose in traffic scenes. In this paper, we collect pedestrian-vehicle collision pose from the dashcam perspective of dashcam and construct the first Pedestrian-Vehicle Collision Pose dataset (PVCP) in a semi-automatic way, including 40k+ accident frames and 20K+ pedestrian pre-collision pose annotation (2D, 3D, Mesh). Further, we construct a Pedestrian Pre-collision Pose Estimation Network (PPSENet) to estimate the collision pose and shape sequence of pedestrians from pedestrian-vehicle accident videos. The PPSENet first estimates the 2D pose from the image (Image to Pose, ITP) and then lifts the 2D pose to 3D mesh (Pose to Mesh, PTM). Due to the small size of the dataset, we introduce a pre-training model that learns the human pose prior on a large number of pose datasets, and use iterative regression to estimate the pre-collision pose and shape of pedestrians. Further, we classify the pre-collision pose sequence and introduce pose class loss, which achieves the best accuracy compared with the existing relevant \\textit{state-of-the-art} methods. Code and data are available for research at https://github.com/wmj142326/PVCP.", "keywords": "Pedestrian Pre-collision pose;Human pose and shape estimation;Dashcam Perspective;Pedestrian-Vehicle Collision Pose dataset", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "MeiJun Wang;Yu Meng;Zhongwei Qiu;Chao Zheng;Yan Xu;Pengxiaorui;Jian Gao", "authorids": "~MeiJun_Wang1;~Yu_Meng6;~Zhongwei_Qiu1;~Chao_Zheng7;~Yan_Xu14;~Pengxiaorui1;~Jian_Gao11", "gender": "M;M;M;M;M;M;M", "homepage": "https://me.ustb.edu.cn/shiziduiwu/jiaoshixinxi/2022-03-24/543.html;https://ericzw.github.io/;;;https://github.com/pxrw;https://github.com/jianGao555;https://wmj142326.github.io/", "dblp": ";246/5883;;;;;", "google_scholar": ";uVV3rqcAAAAJ;;woIuooIAAAAJ;;;Ganf8zgAAAAJ", "orcid": ";;0000-0002-9981-4623;;;;0000-0002-6536-0600", "linkedin": ";;;;;;", "or_profile": "~Yu_Meng6;~Zhongwei_Qiu1;~Chao_Zheng7;~Yan_Xu14;~Pengxiaorui1;~Jian_Gao11;~Wang_MeiJun1", "aff": "University of Science and Technology Beijing;Alibaba DAMO Academy;University of Science and Technology Beijing;University of Science and Technology Beijing;University of Science and Technology Beijing;Northwest University Xi'an;University of Science and Technology Beijing", "aff_domain": "ustb.edu.cn;alibaba-inc.com;ustb.edu.cn;ustb.edu.cn;ustb.edu.cn;nwu.edu.cn;ustb.edu.cn", "position": "Full Professor;Researcher;PhD student;PhD student;PhD student;PhD student;PhD student", "bibtex": "@inproceedings{\nwang2024pedestriancentric,\ntitle={Pedestrian-Centric 3D Pre-collision Pose and Shape Estimation from Dashcam Perspective},\nauthor={MeiJun Wang and Yu Meng and Zhongwei Qiu and Chao Zheng and Yan Xu and Pengxiaorui and Jian Gao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ldvfaYzG35}\n}", "github": "", "reviewers": "j3Vv;3M7p;SmGM;9csR", "pdf_size": 5677588, "rating": "5;5;5;7", "confidence": "4;3;4;4", "soundness": "3;3;2;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "121;37;107;74", "wc_strengths": "75;37;84;63", "wc_weaknesses": "291;30;286;60", "wc_questions": "58;9;53;37", "wc_limitations": "6;8;2;30", "wc_review": "551;121;532;264", "wc_reply_reviewers": "208;0;176;99", "wc_reply_authors": "754;0;1013;244", "reply_reviewers": "1;0;1;2", "reply_authors": "2;1;3;3", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 84.75, 32.42202183701689 ], "wc_strengths_avg": [ 64.75, 17.66882848408462 ], "wc_weaknesses_avg": [ 166.75, 122.22392359926923 ], "wc_questions_avg": [ 39.25, 19.109879643786353 ], "wc_limitations_avg": [ 11.5, 10.897247358851684 ], "wc_review_avg": [ 367.0, 181.80071507010086 ], "wc_reply_reviewers_avg": [ 120.75, 80.18533219984812 ], "wc_reply_authors_avg": [ 502.75, 400.99586967947687 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5483228205660874586&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "ustb.edu.cn;alibaba-inc.com;ustb.edu.cn;ustb.edu.cn;ustb.edu.cn;nwu.edu.cn;ustb.edu.cn", "author_num": 7, "aff_unique_index": "0;1;0;0;0;2;0", "aff_unique_norm": "University of Science and Technology Beijing;Alibaba Group;Northwest University", "aff_unique_dep": ";DAMO Academy;", "aff_unique_url": "http://www.ustb.edu.cn;https://www.alibaba-group.com;https://www.nwu.edu.cn", "aff_unique_abbr": "USTB;Alibaba DAMO;NWU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Xi'an", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "SearchLVLMs: A Plug-and-Play Framework for Augmenting Large Vision-Language Models by Searching Up-to-Date Internet Knowledge", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93813", "id": "leeosk2RAM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=leeosk2RAM", "openreview": "https://openreview.net/forum?id=leeosk2RAM", "poster": "/media/PosterPDFs/NeurIPS%202024/93813.png?t=1731595213.5824764", "project": "", "author_site": "Chuanhao Li, Zhen Li, Chenchen Jing, Shuo Liu, Wenqi Shao, Yuwei Wu, Ping Luo, Yu Qiao, Kaipeng Zhang", "tldr": "", "abstract": "Large vision-language models (LVLMs) are ignorant of the up-to-date knowledge, such as LLaVA series, because they cannot be updated frequently due to the large amount of resources required, and therefore fail in many cases. For example, if a LVLM was released on January 2024, and it wouldn't know the singer of the theme song for the new Detective Conan movie, which wasn't released until April 2024. To solve the problem, a promising solution motivated by retrieval-augmented generation (RAG) is to provide LVLMs with up-to-date knowledge via internet search during inference, i.e., internet-augmented generation (IAG), which is already integrated in some closed-source commercial LVLMs such as GPT-4V. However, the specific mechanics underpinning them remain a mystery. In this paper, we propose a plug-and-play framework, for augmenting existing LVLMs in handling visual question answering (VQA) about up-to-date knowledge, dubbed SearchLVLMs. A hierarchical filtering model is trained to effectively and efficiently find the most helpful content from the websites returned by a search engine to prompt LVLMs with up-to-date knowledge. To train the model and evaluate our framework's performance, we propose a pipeline to automatically generate news-related VQA samples to construct a dataset, dubbed UDK-VQA. A multi-model voting mechanism is introduced to label the usefulness of website/content for VQA samples to construct the training set. Experimental results demonstrate the effectiveness of our framework, outperforming GPT-4o by $\\sim$30\\% in accuracy.", "keywords": "Internet-Augmented Generation;Large Vision-Language Models", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Chuanhao Li;Zhen Li;Chenchen Jing;Shuo Liu;Wenqi Shao;Yuwei Wu;Ping Luo;Yu Qiao;Kaipeng Zhang", "authorids": "~Chuanhao_Li2;~Zhen_Li15;~Chenchen_Jing2;~Shuo_Liu5;~Wenqi_Shao2;~Yuwei_Wu1;~Ping_Luo2;~Yu_Qiao1;~Kaipeng_Zhang1", "gender": "M;;M;F;M;M;;;M", "homepage": "https://nevermorelch.github.io/;;https://jingchenchen.github.io/;;https://wqshao126.github.io/;https://wu-yuwei-bit.github.io/;;;http://kpzhang93.github.io/", "dblp": "195/9947-1;;219/1679.html;07/6773;227/3122;63/5298-1.html;;;179/2126", "google_scholar": "https://scholar.google.com.hk/citations?hl=zh-CN;https://scholar.google.com.hk/citations?user=aKvi_rEAAAAJ;0TKm3fgAAAAJ;https://scholar.google.com.tw/citations?hl=zh-CN;Bs9mrwwAAAAJ;MUF2MeAAAAAJ;;;4OqZBmYAAAAJ", "orcid": ";;;;;0000-0002-0263-925X;;;", "linkedin": ";;;;;;;;", "or_profile": "~Chuanhao_Li2;~Zhen_Li15;~Chenchen_Jing2;~Shuo_Liu5;~Wenqi_Shao2;~Yuwei_Wu1;~Ping_Luo2;~Yu_Qiao1;~Kaipeng_Zhang1", "aff": "Beijing Institute of Technology;Beijing Institute of Technology;Zhejiang University;Shanghai AI lab;Shanghai AI Laboratory;Beijing Institute of Technology;;;Shanghai AI Laboratory", "aff_domain": "bit.edu.cn;bit.edu.cn;zju.edu.cn;pjlab.org;pjlab.org.cn;bit.edu.cn;;;pjlab.org.cn", "position": "PhD student;MS student;Postdoc;Researcher;Researcher;Associate Professor;;;Researcher", "bibtex": "@inproceedings{\nli2024searchlvlms,\ntitle={Search{LVLM}s: A Plug-and-Play Framework for Augmenting Large Vision-Language Models by Searching Up-to-Date Internet Knowledge},\nauthor={Chuanhao Li and Zhen Li and Chenchen Jing and Shuo Liu and Wenqi Shao and Yuwei Wu and Ping Luo and Yu Qiao and Kaipeng Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=leeosk2RAM}\n}", "github": "", "reviewers": "qhJo;PaHW;jo4p", "pdf_size": 1672434, "rating": "4;6;7", "confidence": "4;1;4", "soundness": "2;3;4", "novelty": "2;3;3", "presentation": "3;3;4", "wc_summary": "98;57;115", "wc_strengths": "65;28;149", "wc_weaknesses": "251;17;62", "wc_questions": "3;23;9", "wc_limitations": "98;1;1", "wc_review": "515;126;336", "wc_reply_reviewers": "0;36;31", "wc_reply_authors": "1657;32;25", "reply_reviewers": "0;1;1", "reply_authors": "6;2;2", "rating_avg": [ 5.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.0, 1.4142135623730951 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 90.0, 24.34474618201362 ], "wc_strengths_avg": [ 80.66666666666667, 50.624982853220686 ], "wc_weaknesses_avg": [ 110.0, 101.3804714922948 ], "wc_questions_avg": [ 11.666666666666666, 8.379870059984356 ], "wc_limitations_avg": [ 33.333333333333336, 45.726238516730064 ], "wc_review_avg": [ 325.6666666666667, 158.9765880737021 ], "wc_reply_reviewers_avg": [ 22.333333333333332, 15.923427883328248 ], "wc_reply_authors_avg": [ 571.3333333333334, 767.687581130646 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 3.3333333333333335, 1.8856180831641267 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.18898223650461363, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4058514424880651644&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "bit.edu.cn;bit.edu.cn;zju.edu.cn;pjlab.org;pjlab.org.cn;bit.edu.cn;;;pjlab.org.cn", "author_num": 9, "aff_unique_index": "0;0;1;2;3;0;3", "aff_unique_norm": "Beijing Institute of Technology;Zhejiang University;Shanghai AI Lab;Shanghai AI Laboratory", "aff_unique_dep": ";;AI Research;", "aff_unique_url": "http://www.bit.edu.cn/;https://www.zju.edu.cn;https://www.shanghaiailab.com;https://www.shanghai-ai-lab.com", "aff_unique_abbr": "BIT;ZJU;Shanghai AI Lab;SAIL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "OPEL: Optimal Transport Guided ProcedurE Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93812", "id": "leqD3bJ4Ly", "proceeding": "", "pdf": "https://openreview.net/pdf?id=leqD3bJ4Ly", "openreview": "https://openreview.net/forum?id=leqD3bJ4Ly", "poster": "/media/PosterPDFs/NeurIPS%202024/93812.png?t=1730487406.1119115", "project": "", "author_site": "Sayeed Shafayet Chowdhury, Soumyadeep Chandra, Kaushik Roy", "tldr": "", "abstract": "Procedure learning refers to the task of identifying the key-steps and determining their logical order, given several videos of the same task. For both third-person and first-person (egocentric) videos, state-of-the-art (SOTA) methods aim at finding correspondences across videos in time to accomplish procedure learning. However, to establish temporal relationships within the sequences, these methods often rely on frame-to-frame mapping, or assume monotonic alignment of video pairs, leading to sub-optimal results. To this end, we propose to treat the video frames as samples from an unknown distribution, enabling us to frame their distance calculation as an optimal transport (OT) problem. Notably, the OT-based formulation allows us to relax the previously mentioned assumptions. To further improve performance, we enhance the OT formulation by introducing two regularization terms. The first, inverse difference moment regularization, promotes transportation between instances that are homogeneous in the embedding space as well as being temporally closer. The second, regularization based on the KL-divergence with an exponentially decaying prior smooths the alignment while enforcing conformity to the optimality (alignment obtained from vanilla OT optimization) and temporal priors. The resultant optimal transport guided procedure learning framework (`OPEL') significantly outperforms the SOTA on benchmark datasets. Specifically, we achieve 22.4\\% (IoU) and 26.9\\% (F1) average improvement compared to the current SOTA on large scale egocentric benchmark, EgoProceL. Furthermore, for the third person benchmarks (ProCeL and CrossTask), the proposed approach obtains 46.2\\% (F1) average enhancement over SOTA.", "keywords": "Procedure learning;Egocentric vision;EgoProceL;Optimal Transport", "primary_area": "machine_vision", "supplementary_material": "/attachment/0d4561ad68409abed8bd5e48af9d86b66788ef34.zip", "author": "Sayeed Shafayet Chowdhury;Soumyadeep Chandra;Kaushik Roy", "authorids": "~Sayeed_Shafayet_Chowdhury3;~Soumyadeep_Chandra1;~Kaushik_Roy1", "gender": "M;M;M", "homepage": ";;https://engineering.purdue.edu/NRL/Group", "dblp": ";;r/KaushikRoy", "google_scholar": "646ndV4AAAAJ;xaRULDcAAAAJ;to4P8KgAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Sayeed_Shafayet_Chowdhury3;~Soumyadeep_Chandra1;~Kaushik_Roy1", "aff": "Purdue University;Purdue University;Purdue University", "aff_domain": "purdue.edu;purdue.edu;purdue.edu", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nchowdhury2024opel,\ntitle={{OPEL}: Optimal Transport Guided ProcedurE Learning},\nauthor={Sayeed Shafayet Chowdhury and Soumyadeep Chandra and Kaushik Roy},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=leqD3bJ4Ly}\n}", "github": "", "reviewers": "3Xtn;Yy5N;pvnX;XDhK", "pdf_size": 9613430, "rating": "5;5;6;7", "confidence": "4;3;3;3", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;2;2;3", "wc_summary": "76;35;72;133", "wc_strengths": "58;42;47;127", "wc_weaknesses": "40;65;43;97", "wc_questions": "25;156;16;36", "wc_limitations": "1;1;6;2", "wc_review": "200;299;184;395", "wc_reply_reviewers": "0;0;23;23", "wc_reply_authors": "27;31;38;293", "reply_reviewers": "0;0;1;1", "reply_authors": "2;2;3;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 79.0, 35.035696082709705 ], "wc_strengths_avg": [ 68.5, 34.267331381360876 ], "wc_weaknesses_avg": [ 61.25, 22.78568629644497 ], "wc_questions_avg": [ 58.25, 56.87870867029244 ], "wc_limitations_avg": [ 2.5, 2.0615528128088303 ], "wc_review_avg": [ 269.5, 84.7953418531938 ], "wc_reply_reviewers_avg": [ 11.5, 11.5 ], "wc_reply_authors_avg": [ 97.25, 113.08486857223649 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2608552537600304193&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": "purdue.edu;purdue.edu;purdue.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Purdue University", "aff_unique_dep": "", "aff_unique_url": "https://www.purdue.edu", "aff_unique_abbr": "Purdue", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Shuffling Gradient-Based Methods for Nonconvex-Concave Minimax Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93811", "id": "lfY0SUT3m9", "proceeding": "", "pdf": "https://openreview.net/pdf?id=lfY0SUT3m9", "openreview": "https://openreview.net/forum?id=lfY0SUT3m9", "poster": "/media/PosterPDFs/NeurIPS%202024/93811.png?t=1731336240.9607644", "project": "", "author_site": "Quoc Tran Dinh, Trang H. Tran, Lam Nguyen", "tldr": "", "abstract": "This paper aims at developing novel shuffling gradient-based methods for tackling two classes of minimax problems: nonconvex-linear and nonconvex-strongly concave settings. The first algorithm addresses the nonconvex-linear minimax model and achieves the state-of-the-art oracle complexity typically observed in nonconvex optimization. It also employs a new shuffling estimator for the ``hyper-gradient'', departing from standard shuffling techniques in optimization. The second method consists of two variants: semi-shuffling and full-shuffling schemes. These variants tackle the nonconvex-strongly concave minimax setting. We establish their oracle complexity bounds under standard assumptions, which, to our best knowledge, are the best-known for this specific setting. Numerical examples demonstrate the performance of our algorithms and compare them with two other methods. Our results show that the new methods achieve comparable performance with SGD, supporting the potential of incorporating shuffling strategies into minimax algorithms.", "keywords": "Shuffling gradient method; nonconvex-concave minimax problem; oracle complexity; sample without replacement", "primary_area": "optimization", "supplementary_material": "/attachment/376adae174f55b456ad423e90e80dbd440a9d085.zip", "author": "Quoc Tran-Dinh;Trang H. Tran;Lam M. Nguyen", "authorids": "~Quoc_Tran-Dinh2;~Trang_H._Tran1;~Lam_M._Nguyen1", "gender": "M;M;F", "homepage": "http://quoctd.web.unc.edu;https://lamnguyen-mltd.github.io/;https://htt-trangtran.github.io/", "dblp": ";181/1428;279/4007", "google_scholar": "Wyxqvt8AAAAJ;DeFL5Q8AAAAJ;EWGuYl4AAAAJ", "orcid": ";;0000-0002-9551-4738", "linkedin": ";lam-m-nguyen-71b54750/;trang-tran-313b49195/", "or_profile": "~Quoc_Tran-Dinh2;~Lam_M_Nguyen1;~Trang_H_Tran1", "aff": "University of North Carolina, Chapel Hill;IBM Research, Thomas J. Watson Research Center;Cornell University", "aff_domain": "unc.edu;ibm.com;cornell.edu", "position": "Associate Professor;Staff Research Scientist;PhD student", "bibtex": "@inproceedings{\ntran-dinh2024shuffling,\ntitle={Shuffling Gradient-Based Methods for Nonconvex-Concave Minimax Optimization},\nauthor={Quoc Tran-Dinh and Trang H. Tran and Lam M. Nguyen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=lfY0SUT3m9}\n}", "github": "", "reviewers": "jEDb;czC8;4w6E;QEyv", "pdf_size": 860358, "rating": "5;5;6;7", "confidence": "3;2;3;3", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "2;3;3;3", "wc_summary": "101;62;77;17", "wc_strengths": "84;24;86;31", "wc_weaknesses": "216;77;129;39", "wc_questions": "267;34;2;288", "wc_limitations": "9;1;1;55", "wc_review": "677;198;295;430", "wc_reply_reviewers": "8;15;0;14", "wc_reply_authors": "93;180;0;32", "reply_reviewers": "1;1;0;1", "reply_authors": "3;4;1;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 64.25, 30.621683493890405 ], "wc_strengths_avg": [ 56.25, 28.86498744153546 ], "wc_weaknesses_avg": [ 115.25, 66.36405276955288 ], "wc_questions_avg": [ 147.75, 130.45377533824 ], "wc_limitations_avg": [ 16.5, 22.46664193866097 ], "wc_review_avg": [ 400.0, 179.90136186254955 ], "wc_reply_reviewers_avg": [ 9.25, 5.973901572674261 ], "wc_reply_authors_avg": [ 76.25, 68.58707968706642 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:iYd3WCR9pQoJ:scholar.google.com/&scioq=Shuffling+Gradient-Based+Methods+for+Nonconvex-Concave+Minimax+Optimization&hl=en&as_sdt=0,5", "gs_version_total": 5, "email": "unc.edu;ibm.com;cornell.edu", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of North Carolina;IBM;Cornell University", "aff_unique_dep": ";IBM Research;", "aff_unique_url": "https://www.unc.edu;https://www.ibm.com/research;https://www.cornell.edu", "aff_unique_abbr": "UNC;IBM;Cornell", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Chapel Hill;Yorktown Heights;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "(FL)$^2$: Overcoming Few Labels in Federated Semi-Supervised Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93810", "id": "lflwtGE6Vf", "proceeding": "", "pdf": "https://openreview.net/pdf?id=lflwtGE6Vf", "openreview": "https://openreview.net/forum?id=lflwtGE6Vf", "poster": "/media/PosterPDFs/NeurIPS%202024/93810.png?t=1731676797.8447814", "project": "", "author_site": "Seungjoo Lee, Thanh-Long V. Le, Jaemin Shin, Sung-Ju Lee", "tldr": "", "abstract": "Federated Learning (FL) is a distributed machine learning framework that trains accurate global models while preserving clients' privacy-sensitive data. However, most FL approaches assume that clients possess labeled data, which is often not the case in practice. Federated Semi-Supervised Learning (FSSL) addresses this label deficiency problem, targeting situations where only the server has a small amount of labeled data while clients do not. However, a significant performance gap exists between Centralized Semi-Supervised Learning (SSL) and FSSL. This gap arises from confirmation bias, which is more pronounced in FSSL due to multiple local training epochs and the separation of labeled and unlabeled data. We propose $(FL)^2$, a robust training method for unlabeled clients using sharpness-aware consistency regularization. We show that regularizing the original pseudo-labeling loss is suboptimal, and hence we carefully select unlabeled samples for regularization. We further introduce client-specific adaptive thresholding and learning status-aware aggregation to adjust the training process based on the learning progress of each client. Our experiments on three benchmark datasets demonstrate that our approach significantly improves performance and bridges the gap with SSL, particularly in scenarios with scarce labeled data.", "keywords": "Federated Learning;Semi-Supervised Learning;Federated Semi-Supervised Learning", "primary_area": "other", "supplementary_material": "", "author": "Seungjoo Lee;Thanh-Long V. Le;Jaemin Shin;Sung-Ju Lee", "authorids": "~Seungjoo_Lee1;~Thanh-Long_V._Le1;~Jaemin_Shin1;~Sung-Ju_Lee1", "gender": "M;M;M;M", "homepage": ";;https://jaemin-shin.github.io/;https://nmsl.kaist.ac.kr/sjlee", "dblp": ";;;28/1552", "google_scholar": ";;d94q-zQAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0009-0003-3359-3842;;;0000-0002-5518-2126", "linkedin": "seungjoolee99/;lvtlong/;;sungjulee/", "or_profile": "~Seungjoo_Lee1;~Thanh-Long_V._Le1;~Jaemin_Shin1;~Sung-Ju_Lee1", "aff": "KAIST;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "ee.kaist.ac.kr;kaist.ac.kr;kaist.edu;kaist.ac.kr", "position": "MS student;Undergrad student;PhD student;Full Professor", "bibtex": "@inproceedings{\nlee2024fl,\ntitle={({FL})\\${\\textasciicircum}2\\$: Overcoming Few Labels in Federated Semi-Supervised Learning},\nauthor={Seungjoo Lee and Thanh-Long V. Le and Jaemin Shin and Sung-Ju Lee},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=lflwtGE6Vf}\n}", "github": "", "reviewers": "LSrU;YhMN;ytwd;dbte", "pdf_size": 2477825, "rating": "4;5;6;7", "confidence": "4;4;4;4", "soundness": "3;2;4;3", "novelty": "2;2;3;3", "presentation": "3;3;4;4", "wc_summary": "44;78;132;69", "wc_strengths": "8;11;66;103", "wc_weaknesses": "133;136;167;68", "wc_questions": "67;87;164;19", "wc_limitations": "3;27;46;16", "wc_review": "255;339;575;275", "wc_reply_reviewers": "97;22;13;20", "wc_reply_authors": "1112;47;40;19", "reply_reviewers": "1;1;1;1", "reply_authors": "4;2;2;2", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 80.75, 32.104322138927024 ], "wc_strengths_avg": [ 47.0, 39.73034105063786 ], "wc_weaknesses_avg": [ 126.0, 36.0347054934545 ], "wc_questions_avg": [ 84.25, 52.25598051897983 ], "wc_limitations_avg": [ 23.0, 15.763882770434446 ], "wc_review_avg": [ 361.0, 127.38916751435343 ], "wc_reply_reviewers_avg": [ 38.0, 34.22718218024966 ], "wc_reply_authors_avg": [ 304.5, 466.32418980790607 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1978306946892338283&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 4, "email": "ee.kaist.ac.kr;kaist.ac.kr;kaist.edu;kaist.ac.kr", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "In-Context Learning with Transformers: Softmax Attention Adapts to Function Lipschitzness", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93809", "id": "lfxIASyLxB", "proceeding": "", "pdf": "https://openreview.net/pdf?id=lfxIASyLxB", "openreview": "https://openreview.net/forum?id=lfxIASyLxB", "poster": "", "project": "", "author_site": "Liam Collins, Advait Parulekar, Aryan Mokhtari, Sujay Sanghavi, Sanjay Shakkottai", "tldr": "", "abstract": "A striking property of transformers is their ability to perform in-context learning (ICL), a machine learning framework in which the learner is presented with a novel context during inference implicitly through some data, and tasked with making a prediction in that context. As such, that learner must adapt to the context without additional training. We explore the role of *softmax* attention in an ICL setting where each context encodes a regression task. We show that an attention unit learns a window that it uses to implement a nearest-neighbors predictor adapted to the landscape of the pretraining tasks. Specifically, we show that this window widens with decreasing Lipschitzness and increasing label noise in the pretraining tasks. We also show that on low-rank, linear problems, the attention unit learns to project onto the appropriate subspace before inference. Further, we show that this adaptivity relies crucially on the softmax activation and thus cannot be replicated by the linear activation often studied in prior theoretical analyses.", "keywords": "Transformers;self-attention;in-context learning", "primary_area": "learning_theory", "supplementary_material": "/attachment/0f62b7dca18a67a31c823e4d897b5d40bb3923c2.zip", "author": "Liam Collins;Advait U Parulekar;Aryan Mokhtari;sujay sanghavi;Sanjay Shakkottai", "authorids": "~Liam_Collins1;~Advait_U_Parulekar1;~Aryan_Mokhtari3;~sujay_sanghavi1;~Sanjay_Shakkottai1", "gender": ";M;M;M;M", "homepage": "https://liamc2196.github.io/;https://advaitparulekar.github.io/;https://sites.utexas.edu/mokhtari/;https://sites.utexas.edu/sanghavi;https://sites.google.com/view/sanjay-shakkottai/", "dblp": "170/1157;256/1561;140/7407;69/4911.html;61/4596", "google_scholar": "MRLe02cAAAAJ;9u8SExAAAAAJ;glcep6EAAAAJ;O-DazBUAAAAJ;", "orcid": "0009-0006-3139-3339;;;;", "linkedin": ";advait-parulekar-98150614b/;;;", "or_profile": "~Liam_Collins1;~Advait_U_Parulekar1;~Aryan_Mokhtari3;~sujay_sanghavi1;~Sanjay_Shakkottai1", "aff": "University of Texas, Austin;University of Texas, Austin;University of Texas, Austin;University of Texas, Austin;University of Texas at Austin", "aff_domain": "utexas.edu;utexas.edu;utexas.edu;utexas.edu;utexas.edu", "position": "PhD student;PhD student;Assistant Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\ncollins2024incontext,\ntitle={In-Context Learning with Transformers: Softmax Attention Adapts to Function Lipschitzness},\nauthor={Liam Collins and Advait U Parulekar and Aryan Mokhtari and sujay sanghavi and Sanjay Shakkottai},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=lfxIASyLxB}\n}", "github": "", "reviewers": "3LPG;BJxR;hHKc;ftLf", "pdf_size": 2958342, "rating": "6;7;7;8", "confidence": "3;3;5;3", "soundness": "2;3;4;4", "novelty": "2;3;3;4", "presentation": "3;2;4;4", "wc_summary": "72;80;231;53", "wc_strengths": "15;157;290;105", "wc_weaknesses": "77;121;96;48", "wc_questions": "140;60;39;99", "wc_limitations": "3;59;59;6", "wc_review": "307;477;715;311", "wc_reply_reviewers": "10;22;16;21", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 109.0, 71.11610225539643 ], "wc_strengths_avg": [ 141.75, 99.53234398927817 ], "wc_weaknesses_avg": [ 85.5, 26.688012290165037 ], "wc_questions_avg": [ 84.5, 38.60375629391523 ], "wc_limitations_avg": [ 31.75, 27.270634389394026 ], "wc_review_avg": [ 452.5, 166.35729620308211 ], "wc_reply_reviewers_avg": [ 17.25, 4.763139720814412 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12965230203614789421&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "utexas.edu;utexas.edu;utexas.edu;utexas.edu;utexas.edu", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Clustering with Non-adaptive Subset Queries", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93808", "id": "lgtsXxk4dF", "proceeding": "", "pdf": "https://openreview.net/pdf?id=lgtsXxk4dF", "openreview": "https://openreview.net/forum?id=lgtsXxk4dF", "poster": "/media/PosterPDFs/NeurIPS%202024/93808.png?t=1731027046.6152463", "project": "", "author_site": "Hadley Black, Euiwoong Lee, Arya Mazumdar, Barna Saha", "tldr": "", "abstract": "Recovering the underlying clustering of a set $U$ of $n$ points by asking pair-wise same-cluster queries has garnered significant interest in the last decade. Given a query $S \\subset U$, $|S|=2$, the oracle returns \"yes\" if the points are in the same cluster and \"no\" otherwise. We study a natural generalization of this problem to subset queries for $|S|>2$, where the oracle returns the number of clusters intersecting $S$. Our aim is to determine the minimum number of queries needed for exactly recovering an arbitrary $k$-clustering. We focus on non-adaptive schemes, where all the queries are asked in one round, thus allowing for the querying process to be parallelized, which is a highly desirable property. \n\nFor adaptive algorithms with pair-wise queries, the complexity is known to be $\\Theta(nk)$, where $k$ is the number of clusters. \nIn contrast, non-adaptive pair-wise query algorithms are extremely limited: even for $k=3$, such algorithms require $\\Omega(n^2)$ queries, which matches the trivial $O(n^2)$ upper bound attained by querying every pair of points. Allowing for subset queries of unbounded size, $O(n)$ queries is possible with an adaptive scheme. However, the realm of non-adaptive algorithms remains completely unknown. Is it possible to attain algorithms that are non-adaptive while still making a near-linear number of queries?\n\nIn this paper, we give the first non-adaptive algorithms for clustering with subset queries. We provide, (i) a non-adaptive algorithm making $O(n \\log^2 n \\log k)$ queries which improves to $O(n \\log k)$ when the cluster sizes are within any constant factor of each other, (ii) for constant $k$, a non-adaptive algorithm making $O(n \\log{\\log{n}})$ queries. In addition to non-adaptivity, we take into account other practical considerations, such as enforcing a bound on query size. For constant $k$, we give an algorithm making $\\smash{\\widetilde{O}(n^2/s^2)}$ queries on subsets of size at most $s \\leq \\sqrt{n}$, which is optimal among all non-adaptive algorithms within a $\\log n$-factor. For arbitrary $k$, the dependence varies as $\\tilde{O}(n^2/s)$.", "keywords": "Clustering;query algorithms", "primary_area": "learning_theory", "supplementary_material": "", "author": "Hadley Black;Euiwoong Lee;Arya Mazumdar;Barna Saha", "authorids": "~Hadley_Black1;~Euiwoong_Lee2;~Arya_Mazumdar1;~Barna_Saha3", "gender": "M;F;M;", "homepage": "https://hablack.github.io/;https://web.eecs.umich.edu/~euiwoong/;http://www.cs.umass.edu/~arya;https://barnasaha.net", "dblp": "207/8123;129/5624.html;77/6050;", "google_scholar": "hR31sa4AAAAJ;9Yxhn6oAAAAJ;https://scholar.google.com.tw/citations?user=9tjQU1EAAAAJ;BsmjRdoAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Hadley_Black1;~Euiwoong_Lee2;~Arya_Mazumdar1;~Barna_Saha3", "aff": "University of California, San Diego;University of Michigan;University of California, San Diego;University of California, San Diego", "aff_domain": "ucsd.edu;umich.edu;ucsd.edu;ucsd.edu", "position": "Postdoc;Assistant Professor;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nblack2024clustering,\ntitle={Clustering with Non-adaptive Subset Queries},\nauthor={Hadley Black and Euiwoong Lee and Arya Mazumdar and Barna Saha},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=lgtsXxk4dF}\n}", "github": "", "reviewers": "MjFn;hP14;WJzB;SHzL", "pdf_size": 508035, "rating": "6;7;7;8", "confidence": "4;2;3;4", "soundness": "4;4;3;4", "novelty": "3;4;3;4", "presentation": "3;3;3;4", "wc_summary": "232;217;317;368", "wc_strengths": "50;84;24;78", "wc_weaknesses": "60;155;41;152", "wc_questions": "51;117;1;44", "wc_limitations": "12;37;1;9", "wc_review": "405;610;384;651", "wc_reply_reviewers": "15;30;94;14", "wc_reply_authors": "0;0;135;0", "reply_reviewers": "1;1;2;1", "reply_authors": "1;1;2;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 283.5, 61.92132104533946 ], "wc_strengths_avg": [ 59.0, 23.93741840717165 ], "wc_weaknesses_avg": [ 102.0, 51.94708846509109 ], "wc_questions_avg": [ 53.25, 41.48719682022395 ], "wc_limitations_avg": [ 14.75, 13.460590625971804 ], "wc_review_avg": [ 512.5, 119.11863834010192 ], "wc_reply_reviewers_avg": [ 38.25, 32.80529682840867 ], "wc_reply_authors_avg": [ 33.75, 58.45671475544961 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:525wKFekcN0J:scholar.google.com/&scioq=Clustering+with+Non-adaptive+Subset+Queries&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "ucsd.edu;umich.edu;ucsd.edu;ucsd.edu", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of California, San Diego;University of Michigan", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucsd.edu;https://www.umich.edu", "aff_unique_abbr": "UCSD;UM", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "San Diego;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Maximum Entropy Reinforcement Learning via Energy-Based Normalizing Flow", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93807", "id": "lhlIUxD5eE", "proceeding": "", "pdf": "https://openreview.net/pdf?id=lhlIUxD5eE", "openreview": "https://openreview.net/forum?id=lhlIUxD5eE", "poster": "/media/PosterPDFs/NeurIPS%202024/93807.png?t=1731728996.810478", "project": "", "author_site": "Chen-Hao Chao, Chien Feng, Wei-Fang Sun, Cheng-Kuang Lee, Simon See, Chun-Yi Lee", "tldr": "", "abstract": "Existing Maximum-Entropy (MaxEnt) Reinforcement Learning (RL) methods for continuous action spaces are typically formulated based on actor-critic frameworks and optimized through alternating steps of policy evaluation and policy improvement. In the policy evaluation steps, the critic is updated to capture the soft Q-function. In the policy improvement steps, the actor is adjusted in accordance with the updated soft Q-function. In this paper, we introduce a new MaxEnt RL framework modeled using Energy-Based Normalizing Flows (EBFlow). This framework integrates the policy evaluation steps and the policy improvement steps, resulting in a single objective training process. Our method enables the calculation of the soft value function used in the policy evaluation target without Monte Carlo approximation. Moreover, this design supports the modeling of multi-modal action distributions while facilitating efficient action sampling. To evaluate the performance of our method, we conducted experiments on the MuJoCo benchmark suite and a number of high-dimensional robotic tasks simulated by Omniverse Isaac Gym. The evaluation results demonstrate that our method achieves superior performance compared to widely-adopted representative baselines.", "keywords": "Reinforcement Learning;Maximum Entropy Reinforcement Learning;Normalizing Flows;Energy-Based Models;Energy-Based Normalizing Flow", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Chen-Hao Chao;Chien Feng;Wei-Fang Sun;Cheng-Kuang Lee;Simon See;Chun-Yi Lee", "authorids": "~Chen-Hao_Chao2;~Chien_Feng2;~Wei-Fang_Sun1;~Cheng-Kuang_Lee1;~Simon_See1;~Chun-Yi_Lee1", "gender": ";M;M;M;M;M", "homepage": ";;;;https://elsalab.ai;https://chen-hao-chao.github.io/", "dblp": ";275/9039;;62/6547;36/3668;291/4406", "google_scholar": ";TgMlVRUAAAAJ;WWvSfXkAAAAJ;ebIHTEoAAAAJ;https://scholar.google.com.tw/citations?user=5mYNdo0AAAAJ;puKAQDgAAAAJ", "orcid": ";;;0000-0002-4958-9237;0000-0002-4680-4800;0000-0003-1409-7467", "linkedin": "chien-feng-56300320b;;cheng-kuang-ck-lee-b97258157/?originalSubdomain=tw;simonsee/;;", "or_profile": "~Chien_Feng2;~Wei-Fang_Sun1;~Cheng-Kuang_Lee1;~Simon_See1;~Chun-Yi_Lee1;~CHEN-HAO_CHAO1", "aff": "Department of Computer Science, National Tsing Hua University, National Tsinghua University;NVIDIA AI Technology Center;;NVIDIA;National Tsing Hua University;Department of Computer Science, National Tsing Hua University, National Tsing Hua University", "aff_domain": "cs.nthu.edu.tw;nvidia.com;;nvidia.com;nthu.edu.tw;cs.nthu.edu.tw", "position": "MS student;Researcher;;Associate Professor;Full Professor;MS student", "bibtex": "@inproceedings{\nchao2024maximum,\ntitle={Maximum Entropy Reinforcement Learning via Energy-Based Normalizing Flow},\nauthor={Chen-Hao Chao and Chien Feng and Wei-Fang Sun and Cheng-Kuang Lee and Simon See and Chun-Yi Lee},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=lhlIUxD5eE}\n}", "github": "", "reviewers": "S3sW;PzAH;HRPY;UfJ6;he3t", "pdf_size": 23460108, "rating": "5;6;6;7;7", "confidence": "3;3;4;3;3", "soundness": "3;3;4;3;3", "novelty": "3;3;3;3;4", "presentation": "3;2;4;3;3", "wc_summary": "114;153;62;90;53", "wc_strengths": "140;114;60;84;73", "wc_weaknesses": "251;177;118;122;78", "wc_questions": "131;23;51;189;156", "wc_limitations": "5;31;10;20;16", "wc_review": "641;498;301;505;376", "wc_reply_reviewers": "51;120;69;20;88", "wc_reply_authors": "16;95;22;16;34", "reply_reviewers": "1;2;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 94.4, 36.3351069903475 ], "wc_strengths_avg": [ 94.2, 29.026884090442778 ], "wc_weaknesses_avg": [ 149.2, 59.864513695510794 ], "wc_questions_avg": [ 110.0, 63.00476172480934 ], "wc_limitations_avg": [ 16.4, 8.912911982062878 ], "wc_review_avg": [ 464.2, 117.02888532323975 ], "wc_reply_reviewers_avg": [ 69.6, 33.72002372478406 ], "wc_reply_authors_avg": [ 36.6, 29.930586362448697 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.13363062095621223, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8557168275121043072&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "cs.nthu.edu.tw;nvidia.com;;nvidia.com;nthu.edu.tw;cs.nthu.edu.tw", "author_num": 6, "aff_unique_index": "0;1;1;2;2", "aff_unique_norm": "National Tsinghua University;NVIDIA;National Tsing Hua University", "aff_unique_dep": "Department of Computer Science;NVIDIA AI Technology Center;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.nvidia.com/en-us/research/;https://www.nthu.edu.tw", "aff_unique_abbr": "THU;NVIDIA;NTHU", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Taiwan", "aff_country_unique_index": "0;1;1;0;0", "aff_country_unique": "China;United States" }, { "title": "M3LEO: A Multi-Modal, Multi-Label Earth Observation Dataset Integrating Interferometric SAR and Multispectral Data", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97528", "id": "li3iFfkwRL", "proceeding": "", "pdf": "https://openreview.net/pdf?id=li3iFfkwRL", "openreview": "https://openreview.net/forum?id=li3iFfkwRL", "poster": "/media/PosterPDFs/NeurIPS%202024/97528.png?t=1733552425.1236954", "project": "", "author_site": "Matthew Allen, Francisco Dorr, Joseph Alejandro Gallego Mejia, Laura Mart\u00ednez-Ferrer, Anna Jungbluth, Freddie Kalaitzis, Raul Ramos-Poll\u00e1n", "tldr": "", "abstract": "Satellite-based remote sensing has revolutionised the way we address global challenges in a rapidly evolving world. Huge quantities of Earth Observation (EO) data are generated by satellite sensors daily, but processing these large datasets for use in ML pipelines is technically and computationally challenging. Specifically, different types of EO data are often hosted on a variety of platforms, with\ndiffering degrees of availability for Python preprocessing tools. In addition, spatial alignment across data sources and data tiling for easier handling can present significant technical hurdles for novice users. While some preprocessed Earth observation datasets exist, their content is often limited to optical or near-optical wavelength data, which is ineffective at night or in adverse weather conditions.\nSynthetic Aperture Radar (SAR), an active sensing technique based on microwave length radiation, offers a viable alternative. However, the application of machine learning to SAR has been limited due to a lack of ML-ready data and pipelines, particularly for the full diversity of SAR data, including polarimetry, coherence and interferometry. In this work, we introduce M3LEO, a multi-modal, multi-label\nEarth observation dataset that includes polarimetric, interferometric, and coherence SAR data derived from Sentinel-1, alongside multispectral Sentinel-2 imagery and a suite of auxiliary data describing terrain properties such as land use. M3LEO spans approximately 17M data chips, each measuring 4x4 km, across six diverse geographic regions. The dataset is complemented by a flexible PyTorch Lightning framework, with configuration management using Hydra, to accommodate its use across diverse ML applications in Earth observation. Additionally, we provide tools to process any dataset available on popular platforms such as Google Earth Engine for seamless integration with our framework. We show that the distribution shift in self-supervised embeddings is substantial across geographic regions, even when controlling for terrain properties. Data is available at huggingface.co/M3LEO, and code at github.com/spaceml-org/M3LEO.", "keywords": "Earth Observation;Synthetic Aperture Radar;SAR;Remote Sensing;Computer Vision;Machine Learning;Dataset", "primary_area": "", "supplementary_material": "/attachment/a76b6d7a71704e56af7789d320e8179bdb0e3f97.zip", "author": "Matthew J Allen;Francisco Dorr;Joseph Alejandro Gallego Mejia;Laura Mart\u00ednez-Ferrer;Anna Jungbluth;Freddie Kalaitzis;Ra\u00fal Ramos-Poll\u00e1n", "authorids": "~Matthew_J_Allen1;~Francisco_Dorr1;~Joseph_Alejandro_Gallego_Mejia1;~Laura_Mart\u00ednez-Ferrer1;~Anna_Jungbluth1;~Freddie_Kalaitzis1;~Ra\u00fal_Ramos-Poll\u00e1n1", "gender": "Not Specified;;M;;F;M;M", "homepage": ";https://github.com/frandorr;;;;https://scienti.colciencias.gov.co/cvlac/visualizador/generarCurriculoCv.do?cod_rh=0001472262;https://github.com/alkalait", "dblp": ";;;;;25/8671;178/8581", "google_scholar": "EiCZMlQAAAAJ;;https://scholar.google.cl/citations?user=DS0IfX4AAAAJ;https://scholar.google.es/citations?hl=es;;https://scholar.google.com/citations?hl=en;SywTqIIAAAAJ", "orcid": ";;0000-0001-8971-4998;0000-0002-4730-8657;0000-0002-9888-6262;0000-0001-6195-3612;0000-0002-1471-646X", "linkedin": ";;joagalejo/;;;raul-ramos-pollan-9372ba12/;alfredokalaitzis/", "or_profile": "~Matthew_J_Allen1;~Francisco_Dorr1;~Joseph_Alejandro_Gallego_Mejia1;~Laura_Mart\u00ednez-Ferrer1;~Anna_Jungbluth1;~Ra\u00fal_Ramos-Poll\u00e1n1;~Alfredo_A._Kalaitzis1", "aff": "University of Cambridge;;Drexel University;Universidad de Valencia;European Space Agency;Universidad de Antioqu\u00eda;Department of Computer Science, University of Oxford", "aff_domain": "cam.ac.uk;;drexel.edu;uv.es;esa.int;udea.edu.co;cs.ox.ac.uk", "position": "PhD student;;Assistant Professor;PhD student;Postdoc;Full Professor;Senior Research Fellow", "bibtex": "@inproceedings{\nallen2024mleo,\ntitle={M3{LEO}: A Multi-Modal, Multi-Label Earth Observation Dataset Integrating Interferometric {SAR} and Multispectral Data},\nauthor={Matthew J Allen and Francisco Dorr and Joseph Alejandro Gallego Mejia and Laura Mart{\\'\\i}nez-Ferrer and Anna Jungbluth and Freddie Kalaitzis and Ra{\\'u}l Ramos-Poll{\\'a}n},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=li3iFfkwRL}\n}", "github": "", "reviewers": "XVwA;B6VH;jNsP;eCWi", "pdf_size": 24055183, "rating": "4;5;7;8", "confidence": "5;4;4;5", "wc_summary_and_contributions": "124;68;180;102", "wc_strengths": "48;33;156;133", "wc_improvement": "222;129;413;252", "wc_limitations": "68;1;143;15", "wc_correctness": "66;1;257;24", "wc_clarity": "5;1;143;10", "wc_relation_to_prior_work": "36;1;101;7", "wc_documentation": "30;5;214;18", "wc_additional_feedback": "1;1;1;1", "wc_review": "600;240;1608;562", "wc_reply_reviewers": "161;0;185;36", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 1.5811388300841898 ], "confidence_avg": [ 4.5, 0.5 ], "wc_summary_and_contributions_avg": [ 118.5, 40.72775466435635 ], "wc_strengths_avg": [ 92.5, 52.89848769104841 ], "wc_improvement_avg": [ 254.0, 102.38896424908302 ], "wc_limitations_avg": [ 56.75, 55.71523579776002 ], "wc_correctness_avg": [ 87.0, 100.87863995911127 ], "wc_clarity_avg": [ 39.75, 59.69662888304498 ], "wc_relation_to_prior_work_avg": [ 36.25, 39.65712420234226 ], "wc_documentation_avg": [ 66.75, 85.47331454904507 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 752.5, 513.3427217756184 ], "wc_reply_reviewers_avg": [ 95.5, 78.99525302193797 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9612449408333463378&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 2, "email": "cam.ac.uk;;drexel.edu;uv.es;esa.int;udea.edu.co;cs.ox.ac.uk", "author_num": 7, "aff_unique_index": "0;1;2;3;4;5", "aff_unique_norm": "University of Cambridge;Drexel University;Universidad de Valencia;European Space Agency;Universidad de Antioqu\u00eda;University of Oxford", "aff_unique_dep": ";;;;;Department of Computer Science", "aff_unique_url": "https://www.cam.ac.uk;https://www.drexel.edu;https://www.uv.es;https://www.esa.int;https://www.udea.edu.co;https://www.ox.ac.uk", "aff_unique_abbr": "Cambridge;Drexel;UV;ESA;UdeA;Oxford", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Cambridge;;Oxford", "aff_country_unique_index": "0;1;2;3;4;0", "aff_country_unique": "United Kingdom;United States;Spain;Unknown;Colombia" }, { "title": "FewViewGS: Gaussian Splatting with Few View Matching and Multi-stage Training", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93806", "id": "liHe9iumIi", "proceeding": "", "pdf": "https://openreview.net/pdf?id=liHe9iumIi", "openreview": "https://openreview.net/forum?id=liHe9iumIi", "poster": "/media/PosterPDFs/NeurIPS%202024/93806.png?t=1730901125.8649776", "project": "", "author_site": "Ruihong Yin, Vladimir Yugay, Yue Li, Sezer Karaoglu, Theo Gevers", "tldr": "", "abstract": "The field of novel view synthesis from images has seen rapid advancements with the introduction of Neural Radiance Fields (NeRF) and more recently with 3D Gaussian Splatting. Gaussian Splatting became widely adopted due to its efficiency and ability to render novel views accurately. While Gaussian Splatting performs well when a sufficient amount of training images are available, its unstructured explicit representation tends to overfit in scenarios with sparse input images, resulting in poor rendering performance. To address this, we present a 3D Gaussian-based novel view synthesis method using sparse input images that can accurately render the scene from the viewpoints not covered by the training images. We propose a multi-stage training scheme with matching-based consistency constraints imposed on the novel views without relying on pre-trained depth estimation or diffusion models. This is achieved by using the matches of the available training images to supervise the generation of the novel views sampled between the training frames with color, geometry, and semantic losses. In addition, we introduce a locality preserving regularization for 3D Gaussians which removes rendering artifacts by preserving the local color structure of the scene. Evaluation on synthetic and real-world datasets demonstrates competitive or superior performance of our method in few-shot novel view synthesis compared to existing state-of-the-art methods.", "keywords": "Few-shot view synthesis;gaussian splatting", "primary_area": "machine_vision", "supplementary_material": "", "author": "Ruihong Yin;Vladimir Yugay;Yue Li;Sezer Karaoglu;Theo Gevers", "authorids": "~Ruihong_Yin1;~Vladimir_Yugay1;~Yue_Li12;~Sezer_Karaoglu1;~Theo_Gevers1", "gender": "F;M;;M;M", "homepage": ";https://vladimiryugay.github.io/;;https://karaoglusezer.github.io/;https://staff.science.uva.nl/th.gevers/", "dblp": "273/5999;;;75/9557;12/6600", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;sae4r88AAAAJ;;https://scholar.google.nl/citations?user=EjRB7dEAAAAJ;yqsvxQgAAAAJ", "orcid": ";;;;", "linkedin": ";vladimir-yugay;;sezer-karaoglu-1650a01a/;theo-gevers-a215244/", "or_profile": "~Ruihong_Yin1;~Vladimir_Yugay1;~Yue_Li12;~Sezer_Karaoglu1;~Theo_Gevers1", "aff": "University of Amsterdam;University of Amsterdam;;University of Amsterdam;University of Amsterdam, University of Amsterdam", "aff_domain": "uva.nl;uva.nl;;uva.nl;ivi.uva.nl", "position": "PhD student;PhD student;;Postdoc;Full Professor", "bibtex": "@inproceedings{\nyin2024fewviewgs,\ntitle={FewView{GS}: Gaussian Splatting with Few View Matching and Multi-stage Training},\nauthor={Ruihong Yin and Vladimir Yugay and Yue Li and Sezer Karaoglu and Theo Gevers},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=liHe9iumIi}\n}", "github": "", "reviewers": "oLUf;8CAh;ZPEG;Eqzk", "pdf_size": 20491344, "rating": "3;5;6;6", "confidence": "5;5;4;4", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "2;3;4;3", "wc_summary": "63;49;85;54", "wc_strengths": "19;40;230;21", "wc_weaknesses": "281;101;107;25", "wc_questions": "12;20;71;2", "wc_limitations": "36;6;19;1", "wc_review": "411;216;512;103", "wc_reply_reviewers": "1036;0;0;55", "wc_reply_authors": "1623;0;0;164", "reply_reviewers": "3;0;0;1", "reply_authors": "4;1;1;2", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 62.75, 13.790848414800301 ], "wc_strengths_avg": [ 77.5, 88.4265231703701 ], "wc_weaknesses_avg": [ 128.5, 93.79099103858536 ], "wc_questions_avg": [ 26.25, 26.61179249881526 ], "wc_limitations_avg": [ 15.5, 13.536986370680884 ], "wc_review_avg": [ 310.5, 160.22562217073772 ], "wc_reply_reviewers_avg": [ 272.75, 441.234277340281 ], "wc_reply_authors_avg": [ 446.75, 682.4006795864143 ], "reply_reviewers_avg": [ 1.0, 1.224744871391589 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8164965809277259, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2030536676171144310&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "uva.nl;uva.nl;;uva.nl;ivi.uva.nl", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Amsterdam", "aff_unique_dep": "", "aff_unique_url": "https://www.uva.nl", "aff_unique_abbr": "UvA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Netherlands" }, { "id": "lizRmKCnBp", "title": "NeCGS: Neural Compression for 3D Geometry Sets", "track": "main", "status": "Reject", "tldr": "", "abstract": "This paper explores the problem of effectively compressing 3D geometry sets containing diverse categories. We make the first attempt to tackle this fundamental and challenging problem and propose NeCGS, a neural compression paradigm, which can compress hundreds of detailed and diverse 3D mesh models ($\\sim$684 MB) by about 900 times (0.76 MB) with high accuracy and preservation of detailed geometric details. Specifically, we first represent each \\textit{irregular} mesh model/shape in a regular representation that implicitly describes the geometry structure of the model using a 4D regular volume, called TSDF-Def volume. Such a regular representation can not only capture local surfaces more effectively but also facilitate the subsequent process. Then we construct a quantization-aware auto-decoder network architecture to regress these 4D volumes, which can summarize the similarity of local geometric structures within a model and across different models for redundancy elimination, resulting in more compact representations, including an embedded feature of a smaller size associated with each model and a network parameter set shared by all models. We finally quantize and encode the resulting features and network parameters into bitstreams through entropy coding. After decompressing the features and network parameters, we can reconstruct the TSDF-Def volumes, where the 3D surfaces can be extracted through the deformable marching cubes. Extensive experiments and ablation studies demonstrate the significant advantages of our NeCGS over state-of-the-art methods both quantitatively and qualitatively. We have included the source code in the Supplemental Material.", "keywords": "geometry compression", "primary_area": "machine_vision", "supplementary_material": "/attachment/4debf7408e0fa352c2c56a7ee58ddc5d8e81672b.zip", "author": "Siyu Ren;Junhui Hou;Wenping Wang", "authorids": "~Siyu_Ren3;~Junhui_Hou2;~Wenping_Wang1", "gender": "M;M;M", "homepage": "https://scholars.cityu.edu.hk/en/persons/siyu-ren(3eb70e9f-b56c-4ce5-88e6-c7ffbbcbaf36).html;http://www.cityu.edu.hk/stfprofile/csjhhou.htm;https://engineering.tamu.edu/cse/profiles/Wang-Wenping.html", "dblp": ";122/2673.html;", "google_scholar": "xSm7_VwAAAAJ;j6eefhwAAAAJ;28shvv0AAAAJ", "orcid": ";0000-0003-3431-2021;0000-0002-2284-3952", "linkedin": "siyu-ren-22403a227/;;", "or_profile": "~Siyu_Ren3;~Junhui_Hou2;~Wenping_Wang1", "aff": "Tianjin University;City University of Hong Kong;Texas A&M University - College Station", "aff_domain": "tju.edu.cn;cityu.edu.hk;tamu.edu", "position": "PhD student;Assistant Professor;Full Professor", "bibtex": "@misc{\nanonymous2024necgs,\ntitle={Ne{CGS}: Neural Compression for 3D Geometry Sets},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=lizRmKCnBp}\n}", "github": "", "project": "", "reviewers": "tAuE;18DA;jc1U;qNSe", "site": "https://openreview.net/forum?id=lizRmKCnBp", "pdf_size": 17504937, "rating": "2;5;5;5", "confidence": "5;4;4;5", "soundness": "2;3;3;3", "novelty": "2;2;3;2", "presentation": "2;3;3;3", "wc_summary": "37;76;100;114", "wc_strengths": "9;105;76;42", "wc_weaknesses": "127;177;45;237", "wc_questions": "3;100;234;11", "wc_limitations": "1;1;1;4", "wc_review": "177;459;456;408", "wc_reply_reviewers": "297;23;391;208", "wc_reply_authors": "951;70;624;261", "reply_reviewers": "2;1;1;1", "reply_authors": "4;3;4;3", "rating_avg": [ 4.25, 1.299038105676658 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 81.75, 29.192250684042847 ], "wc_strengths_avg": [ 58.0, 36.020827308655754 ], "wc_weaknesses_avg": [ 146.5, 70.36156621338101 ], "wc_questions_avg": [ 87.0, 93.01881530099166 ], "wc_limitations_avg": [ 1.75, 1.299038105676658 ], "wc_review_avg": [ 375.0, 116.09263542533608 ], "wc_reply_reviewers_avg": [ 229.75, 135.77808180998875 ], "wc_reply_authors_avg": [ 476.5, 338.5959982043497 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.5, 0.5 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:wjIjE-WpS7IJ:scholar.google.com/&scioq=NeCGS:+Neural+Compression+for+3D+Geometry+Sets&hl=en&as_sdt=0,33", "gs_version_total": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Tianjin University;City University of Hong Kong;Texas A&M University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.tju.edu.cn;https://www.cityu.edu.hk;https://www.tamu.edu", "aff_unique_abbr": "TJU;CityU;TAMU", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Hong Kong SAR;College Station", "aff_country_unique_index": "0;0;1", "aff_country_unique": "China;United States" }, { "title": "ProbTS: Benchmarking Point and Distributional Forecasting across Diverse Prediction Horizons", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97527", "id": "lk7SW0bH4x", "proceeding": "", "pdf": "https://openreview.net/pdf?id=lk7SW0bH4x", "openreview": "https://openreview.net/forum?id=lk7SW0bH4x", "poster": "/media/PosterPDFs/NeurIPS%202024/97527.png?t=1731568105.9571488", "project": "", "author_site": "Jiawen Zhang, Xumeng Wen, Zhenwei Zhang, Shun Zheng, Jia Li, Jiang Bian", "tldr": "", "abstract": "Delivering precise point and distributional forecasts across a spectrum of prediction horizons represents a significant and enduring challenge in the application of time-series forecasting within various industries.\nPrior research on developing deep learning models for time-series forecasting has often concentrated on isolated aspects, such as long-term point forecasting or short-term probabilistic estimations. This narrow focus may result in skewed methodological choices and hinder the adaptability of these models to uncharted scenarios.\nWhile there is a rising trend in developing universal forecasting models, a thorough understanding of their advantages and drawbacks, especially regarding essential forecasting needs like point and distributional forecasts across short and long horizons, is still lacking.\nIn this paper, we present ProbTS, a benchmark tool designed as a unified platform to evaluate these fundamental forecasting needs and to conduct a rigorous comparative analysis of numerous cutting-edge studies from recent years.\nWe dissect the distinctive data characteristics arising from disparate forecasting requirements and elucidate how these characteristics can skew methodological preferences in typical research trajectories, which often fail to fully accommodate essential forecasting needs.\nBuilding on this, we examine the latest models for universal time-series forecasting and discover that our analyses of methodological strengths and weaknesses are also applicable to these universal models.\nFinally, we outline the limitations inherent in current research and underscore several avenues for future exploration.", "keywords": "time-series forecasting;probabilistic forecasting", "primary_area": "", "supplementary_material": "", "author": "Jiawen Zhang;Xumeng Wen;Zhenwei Zhang;Shun Zheng;Jia Li;Jiang Bian", "authorids": "~Jiawen_Zhang1;~Xumeng_Wen1;~Zhenwei_Zhang2;~Shun_Zheng1;~Jia_Li4;~Jiang_Bian1", "gender": "F;M;M;M;M;M", "homepage": "https://imjiawen.com;https://github.com/xumwen;https://github.com/zhangzw16;;https://sites.google.com/view/lijia;https://sites.google.com/view/jiangbian", "dblp": "59/11040-1;358/9194.html;;179/2615.html;23/6950-9;09/851-2.html", "google_scholar": "https://scholar.google.com/citations?hl=en;;dn70U1wAAAAJ;21Q9To4AAAAJ;1gSbcYoAAAAJ;pZBEnY8AAAAJ", "orcid": "0009-0000-1855-9177;;0009-0001-2464-9248;0009-0005-7355-7090;0000-0002-6362-4385;0000-0002-9472-600X", "linkedin": ";;;;;jbian/", "or_profile": "~Jiawen_Zhang1;~Xumeng_Wen1;~Zhenwei_Zhang2;~Shun_Zheng1;~Jia_Li4;~Jiang_Bian1", "aff": "The Hong Kong University of Science and Technology (Guangzhou);Microsoft Research Asia;Tsinghua University;Microsoft;Hong Kong University of Science and Technology (Guangzhou);Microsoft", "aff_domain": "connect.hkust-gz.edu.cn;microsoft.com;tsinghua.edu.cn;microsoft.com;ust.hk;microsoft.com", "position": "PhD student;Researcher;PhD student;Senior Researcher;Assistant Professor;Partner Research Manager", "bibtex": "@inproceedings{\nzhang2024probts,\ntitle={Prob{TS}: Benchmarking Point and Distributional Forecasting across Diverse Prediction Horizons},\nauthor={Jiawen Zhang and Xumeng Wen and Zhenwei Zhang and Shun Zheng and Jia Li and Jiang Bian},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=lk7SW0bH4x}\n}", "github": "", "reviewers": "LXN2;HvVZ;Yg3R", "pdf_size": 12887630, "rating": "5;8;8", "confidence": "3;4;3", "wc_summary_and_contributions": "50;63;64", "wc_strengths": "77;55;26", "wc_improvement": "127;152;22", "wc_limitations": "14;70;14", "wc_correctness": "10;20;17", "wc_clarity": "27;8;6", "wc_relation_to_prior_work": "15;119;10", "wc_documentation": "16;32;7", "wc_additional_feedback": "1;1;1", "wc_review": "337;520;167", "wc_reply_reviewers": "16;0;191", "wc_reply_authors": "214;0;196", "reply_reviewers": "1;0;2", "reply_authors": "7;1;8", "rating_avg": [ 7.0, 1.4142135623730951 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 59.0, 6.377042156569663 ], "wc_strengths_avg": [ 52.666666666666664, 20.885933597094056 ], "wc_improvement_avg": [ 100.33333333333333, 56.322484162386004 ], "wc_limitations_avg": [ 32.666666666666664, 26.398653164297777 ], "wc_correctness_avg": [ 15.666666666666666, 4.189935029992178 ], "wc_clarity_avg": [ 13.666666666666666, 9.46337971105226 ], "wc_relation_to_prior_work_avg": [ 48.0, 50.24606120549815 ], "wc_documentation_avg": [ 18.333333333333332, 10.338708279513881 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 341.3333333333333, 144.14421790538654 ], "wc_reply_reviewers_avg": [ 69.0, 86.51396804370187 ], "wc_reply_authors_avg": [ 136.66666666666666, 96.91691745453365 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 5.333333333333333, 3.0912061651652345 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11896820281162458228&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 3, "email": "connect.hkust-gz.edu.cn;microsoft.com;tsinghua.edu.cn;microsoft.com;ust.hk;microsoft.com", "author_num": 6, "aff_unique_index": "0;1;2;1;0;1", "aff_unique_norm": "Hong Kong University of Science and Technology;Microsoft;Tsinghua University", "aff_unique_dep": ";Research;", "aff_unique_url": "https://www.ust.hk;https://www.microsoft.com/en-us/research/group/asia;https://www.tsinghua.edu.cn", "aff_unique_abbr": "HKUST;MSR Asia;THU", "aff_campus_unique_index": "0;1;3", "aff_campus_unique": "Guangzhou;Asia;;Hong Kong SAR", "aff_country_unique_index": "0;0;0;1;0;1", "aff_country_unique": "China;United States" }, { "title": "Compressing Large Language Models using Low Rank and Low Precision Decomposition", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93805", "id": "lkx3OpcqSZ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=lkx3OpcqSZ", "openreview": "https://openreview.net/forum?id=lkx3OpcqSZ", "poster": "/media/PosterPDFs/NeurIPS%202024/93805.png?t=1733203586.6059253", "project": "", "author_site": "Rajarshi Saha, Naomi Sagan, Varun Srivastava, Andrea Goldsmith, Mert Pilanci", "tldr": "", "abstract": "The prohibitive sizes of Large Language Models (LLMs) today make it difficult to deploy them on memory-constrained edge devices. This work introduces $\\rm CALDERA$ -- a new post-training LLM compression algorithm that harnesses the inherent low-rank structure of a weight matrix $\\mathbf{W}$ by approximating it via a low-rank, low-precision decomposition as $\\mathbf{W} \\approx \\mathbf{Q} + \\mathbf{L}\\mathbf{R}$. Here, $\\mathbf{L}$ and $\\mathbf{R}$ are low rank factors, and the entries of $\\mathbf{Q}$, $\\mathbf{L}$ and $\\mathbf{R}$ are quantized. The model is compressed by substituting each layer with its $\\mathbf{Q} + \\mathbf{L}\\mathbf{R}$ decomposition, and the zero-shot performance of the compressed model is evaluated. Additionally, $\\mathbf{L}$ and $\\mathbf{R}$ are readily amenable to low-rank adaptation, consequently enhancing the zero-shot performance. $\\rm CALDERA$ obtains this decomposition by formulating it as an optimization problem $\\min_{\\mathbf{Q},\\mathbf{L},\\mathbf{R}}\\lVert(\\mathbf{Q} + \\mathbf{L}\\mathbf{R} - \\mathbf{W})\\mathbf{X}^\\top\\rVert_{\\rm F}^2$, where $\\mathbf{X}$ is the calibration data, and $\\mathbf{Q}, \\mathbf{L}, \\mathbf{R}$ are constrained to be representable using low-precision formats. Theoretical upper bounds on the approximation error of $\\rm CALDERA$ are established using a rank-constrained regression framework, and the tradeoff between compression ratio and model performance is studied by analyzing the impact of target rank and quantization bit budget. Results illustrate that compressing LlaMa-$2$ $7$B/$13$B/$70$B and LlaMa-$3$ $8$B models obtained using $\\rm CALDERA$ outperforms existing post-training LLM compression techniques in the regime of less than $2.5$ bits per parameter.", "keywords": "Large Language Models (LLMs);Model Compression;Post-training Quantization;Low-Rank Decomposition;Low-Precision Formats;Quantization Error Analysis;Rank-Constrained Regression;Randomized Linear Algebra;Sketching", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Rajarshi Saha;Naomi Sagan;Varun Srivastava;Andrea Goldsmith;Mert Pilanci", "authorids": "~Rajarshi_Saha1;~Naomi_Sagan1;~Varun_Srivastava1;~Andrea_Goldsmith1;~Mert_Pilanci3", "gender": "M;F;M;F;M", "homepage": "https://sites.google.com/view/rajarshi-saha/;;https://varunsrivastava.com;https://ece.princeton.edu/people/andrea-goldsmith;https://stanford.edu/~pilanci/", "dblp": "232/9641;;;;45/8056", "google_scholar": "https://scholar.google.com/citations?hl=en;;oNN_GhUAAAAJ;https://scholar.google.com/citations?hl=en;aSAS-aAAAAAJ", "orcid": "0000-0002-7863-9811;0000-0003-2218-0500;0000-0001-5420-3952;;", "linkedin": "rajarshisaha95/;;vsriva/;;mert-pilanci-ba615743/", "or_profile": "~Rajarshi_Saha1;~Naomi_Sagan1;~Varun_Srivastava1;~Andrea_Goldsmith1;~Mert_Pilanci3", "aff": "Stanford University;Stanford University;;Princeton University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;;princeton.edu;stanford.edu", "position": "PhD student;PhD student;;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nsaha2024compressing,\ntitle={Compressing Large Language Models using Low Rank and Low Precision Decomposition},\nauthor={Rajarshi Saha and Naomi Sagan and Varun Srivastava and Andrea Goldsmith and Mert Pilanci},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=lkx3OpcqSZ}\n}", "github": "", "reviewers": "32tN;xRtR;23tr;GVVM;ZG6L", "pdf_size": 1173315, "rating": "2;4;7;7;7", "confidence": "5;2;3;2;4", "soundness": "3;2;3;4;3", "novelty": "1;2;3;3;3", "presentation": "1;3;3;3;3", "wc_summary": "55;67;153;76;80", "wc_strengths": "61;84;83;92;55", "wc_weaknesses": "279;143;87;110;67", "wc_questions": "2;2;22;18;36", "wc_limitations": "1;1;1;3;18", "wc_review": "398;297;346;299;256", "wc_reply_reviewers": "929;0;18;14;15", "wc_reply_authors": "730;82;0;0;82", "reply_reviewers": "3;0;1;1;1", "reply_authors": "3;2;1;1;2", "rating_avg": [ 5.4, 2.0591260281974 ], "confidence_avg": [ 3.2, 1.16619037896906 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.4, 0.8 ], "presentation_avg": [ 2.6, 0.8000000000000002 ], "wc_summary_avg": [ 86.2, 34.48709903717621 ], "wc_strengths_avg": [ 75.0, 14.352700094407323 ], "wc_weaknesses_avg": [ 137.2, 75.2712428487799 ], "wc_questions_avg": [ 16.0, 12.89961239727768 ], "wc_limitations_avg": [ 4.8, 6.6452990903344595 ], "wc_review_avg": [ 319.2, 48.626741614054296 ], "wc_reply_reviewers_avg": [ 195.2, 366.95253099004503 ], "wc_reply_authors_avg": [ 178.8, 278.0290632290085 ], "reply_reviewers_avg": [ 1.2, 0.9797958971132712 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.44975020814061956, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14868791838572510583&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 6, "email": "stanford.edu;stanford.edu;;princeton.edu;stanford.edu", "author_num": 5, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Stanford University;Princeton University", "aff_unique_dep": ";", "aff_unique_url": "https://www.stanford.edu;https://www.princeton.edu", "aff_unique_abbr": "Stanford;Princeton", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Personalized Adapter for Large Meteorology Model on Devices: Towards Weather Foundation Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93804", "id": "llTroju97T", "proceeding": "", "pdf": "https://openreview.net/pdf?id=llTroju97T", "openreview": "https://openreview.net/forum?id=llTroju97T", "poster": "/media/PosterPDFs/NeurIPS%202024/93804.png?t=1731125726.653033", "project": "", "author_site": "Shengchao Chen, Guodong Long, Jing Jiang, Chengqi Zhang", "tldr": "", "abstract": "This paper demonstrates that pre-trained language models (PLMs) are strong foundation models for on-device meteorological variable modeling. We present LM-Weather, a generic approach to taming PLMs, that have learned massive sequential knowledge from the universe of natural language databases, to acquire an immediate capability to obtain highly customized models for heterogeneous meteorological data on devices while keeping high efficiency. Concretely, we introduce a lightweight personalized adapter into PLMs and endows it with weather pattern awareness. During communication between clients and the server, low-rank-based transmission is performed to effectively fuse the global knowledge among devices while maintaining high communication efficiency and ensuring privacy. Experiments on real-wold dataset show that LM-Weather outperforms the state-of-the-art results by a large margin across various tasks (e.g., forecasting and imputation at different scales). We provide extensive and in-depth analyses experiments, which verify that LM-Weather can (1) indeed leverage sequential knowledge from natural language to accurately handle meteorological sequence, (2) allows each devices obtain highly customized models under significant heterogeneity, and (3) generalize under data-limited and out-of-distribution (OOD) scenarios.", "keywords": "Meteorological Variable Modeling;Federared Learning;On-device Intelligence;Foundation Model", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Shengchao Chen;Guodong Long;Jing Jiang;Chengqi Zhang", "authorids": "~Shengchao_Chen1;~Guodong_Long2;~Jing_Jiang6;~Chengqi_Zhang1", "gender": ";M;F;M", "homepage": ";https://www.uts.edu.au/staff/guodong.long;https://www.uts.edu.au/staff/jing.jiang;https://research.polyu.edu.hk/en/persons/chengqi-zhang", "dblp": ";34/10089;68/1974-2;71/964", "google_scholar": ";https://scholar.google.com.au/citations?user=Pl8m7hMAAAAJ;https://scholar.google.com.au/citations?hl=en;https://scholar.google.com.au/citations?user=B6lBmqEAAAAJ", "orcid": ";0000-0003-3740-9515;;0000-0001-5715-7154", "linkedin": ";;;chengqi-zhang-55aa8910/", "or_profile": "~Shengchao_Chen1;~Guodong_Long2;~Jing_Jiang6;~Chengqi_Zhang1", "aff": ";University of Technology Sydney;University of Technology Sydney;University of Technology Sydney", "aff_domain": ";uts.edu.au;uts.edu.au;uts.edu.au", "position": ";Associate Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nchen2024personalized,\ntitle={Personalized Adapter for Large Meteorology Model on Devices: Towards Weather Foundation Models},\nauthor={Shengchao Chen and Guodong Long and Jing Jiang and Chengqi Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=llTroju97T}\n}", "github": "", "reviewers": "Jjnz;C54F;tT2F;mBbn", "pdf_size": 24046135, "rating": "6;7;7;8", "confidence": "4;3;4;5", "soundness": "3;3;4;4", "novelty": "3;3;3;4", "presentation": "2;2;3;3", "wc_summary": "66;80;111;37", "wc_strengths": "59;23;94;111", "wc_weaknesses": "226;98;130;134", "wc_questions": "106;97;161;88", "wc_limitations": "117;1;50;1", "wc_review": "574;299;546;371", "wc_reply_reviewers": "17;18;0;20", "wc_reply_authors": "20;23;0;26", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;1;2", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 73.5, 26.63174797117155 ], "wc_strengths_avg": [ 71.75, 33.818449106959356 ], "wc_weaknesses_avg": [ 147.0, 47.69696007084728 ], "wc_questions_avg": [ 113.0, 28.434134416225863 ], "wc_limitations_avg": [ 42.25, 47.567714891510185 ], "wc_review_avg": [ 447.5, 115.76808713976403 ], "wc_reply_reviewers_avg": [ 13.75, 8.011710179481033 ], "wc_reply_authors_avg": [ 17.25, 10.18270592720815 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7994050741430573170&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": ";uts.edu.au;uts.edu.au;uts.edu.au", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Technology Sydney", "aff_unique_dep": "", "aff_unique_url": "https://www.uts.edu.au", "aff_unique_abbr": "UTS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Australia" }, { "title": "Human-Object Interaction Detection Collaborated with Large Relation-driven Diffusion Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93803", "id": "lmsCSDymEP", "proceeding": "", "pdf": "https://openreview.net/pdf?id=lmsCSDymEP", "openreview": "https://openreview.net/forum?id=lmsCSDymEP", "poster": "", "project": "", "author_site": "Liulei Li, Wenguan Wang, Yi Yang", "tldr": "", "abstract": "Prevalent human-object interaction (HOI) detection approaches typically leverage large-scale visual-linguistic models to help recognize events involving humans and objects. Though promising, models trained via contrastive learning on text-image pairs often neglect mid/low-level visual cues and struggle at compositional reasoning. In response, we introduce DIFFUSIONHOI, a new HOI detector shedding light on text-to-image diffusion models. Unlike the aforementioned models, diffusion models excel in discerning mid/low-level visual concepts as generative models, and possess strong compositionality to handle novel concepts expressed in text inputs. Considering diffusion models usually emphasize instance objects, we first devise an inversion-based strategy to learn the expression of relation patterns between humans and objects in embedding space. These learned relation embeddings then serve as textual prompts, to steer diffusion models generate images that depict specific interactions, and extract HOI-relevant cues from images without heavy finetuning. Benefited from above, DIFFUSIONHOI achieves SOTA performance on three datasets under both regular and zero-shot setups.", "keywords": "HOI detection;relation understanding;diffusion models", "primary_area": "machine_vision", "supplementary_material": "", "author": "Liulei Li;Wenguan Wang;Yi Yang", "authorids": "~Liulei_Li1;~Wenguan_Wang4;~Yi_Yang22", "gender": "M;M;M", "homepage": ";https://sites.google.com/view/wenguanwang/;https://person.zju.edu.cn/yiyang", "dblp": "295/8925;145/1078;33/4854-1.html", "google_scholar": "eCrBWngAAAAJ;CqAQQkgAAAAJ;RMSuNFwAAAAJ", "orcid": "0000-0002-4637-0328;0000-0002-0802-9567;", "linkedin": ";wenguanwang;", "or_profile": "~Liulei_Li1;~Wenguan_Wang4;~Yi_Yang22", "aff": "University of Technology Sydney;Zhejiang University;Zhejiang University", "aff_domain": "uts.edu.au;zju.edu.cn;zju.edu.cn", "position": "PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nli2024humanobject,\ntitle={Human-Object Interaction Detection Collaborated with Large Relation-driven Diffusion Models},\nauthor={Liulei Li and Wenguan Wang and Yi Yang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=lmsCSDymEP}\n}", "github": "", "reviewers": "Xw3E;MQdd;UENt", "pdf_size": 4519392, "rating": "5;5;7", "confidence": "3;4;4", "soundness": "3;2;3", "novelty": "3;2;3", "presentation": "3;3;3", "wc_summary": "64;53;122", "wc_strengths": "26;56;67", "wc_weaknesses": "86;120;169", "wc_questions": "1;17;4", "wc_limitations": "1;6;4", "wc_review": "178;252;366", "wc_reply_reviewers": "18;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;0;0", "reply_authors": "1;1;1", "rating_avg": [ 5.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 79.66666666666667, 30.26916289265731 ], "wc_strengths_avg": [ 49.666666666666664, 17.326921891156037 ], "wc_weaknesses_avg": [ 125.0, 34.068558329736625 ], "wc_questions_avg": [ 7.333333333333333, 6.944222218666553 ], "wc_limitations_avg": [ 3.6666666666666665, 2.0548046676563256 ], "wc_review_avg": [ 265.3333333333333, 77.32758599332811 ], "wc_reply_reviewers_avg": [ 6.0, 8.48528137423857 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.49999999999999983, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6505394603272686563&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "uts.edu.au;zju.edu.cn;zju.edu.cn", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "University of Technology Sydney;Zhejiang University", "aff_unique_dep": ";", "aff_unique_url": "https://www.uts.edu.au;https://www.zju.edu.cn", "aff_unique_abbr": "UTS;ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Australia;China" }, { "id": "lnnNPiZtzR", "title": "FungiTastic: A multi-modal dataset and benchmark for image categorization", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "We introduce a new, highly challenging benchmark and a dataset -- FungiTastic -- based on data continuously collected over a twenty-year span.\nThe dataset originates in fungal records labeled and curated by experts. It consists of about 350k multi-modal observations that include more than 650k photographs from 5k fine-grained categories and diverse \naccompanying information, e.g., acquisition metadata, satellite images, and body part segmentation. \nFungiTastic is the only benchmark that includes a test set with partially DNA-sequenced ground truth of unprecedented label reliability.\nThe benchmark is designed to support \n(i) standard close-set classification, \n(ii) open-set classification,\n(iii) multi-modal classification,\n(iv) few-shot learning, \n(v) domain shift, and many more.\nWe provide baseline methods tailored for almost all the use-cases.\nWe provide a multitude of ready-to-use pre-trained models on HuggingFace and a framework for model training.\nA comprehensive documentation describing the dataset features and the baselines are available at \\href{https://sulc.github.io/DanishFungi2024/}{GitHub} and Kaggle.", "keywords": "image classification;fine grained visual categorization;multi-modal;open-set recognition;species identification;domain shift;segmentation;few-shot learning", "primary_area": "", "supplementary_material": "/attachment/738c976e851c7881b7f6addc58aaa28b2cd4a692.zip", "author": "Lukas Picek;Klara Janouskova;Milan \u0160ulc;Jiri Matas", "authorids": "~Lukas_Picek1;~Klara_Janouskova2;~Milan_\u0160ulc1;~Jiri_Matas3", "gender": "M;F;M;M", "homepage": "https://sites.google.com/view/picekl/;https://klarajanouskova.github.io/;https://sulc.github.io/;http://cmp.felk.cvut.cz/~matas/", "dblp": "224/1991;270/0510;140/7921.html;m/JiriMatas", "google_scholar": ";AMQpoa0AAAAJ;lTFaQsgAAAAJ;EJCNY6QAAAAJ", "orcid": "0000-0002-6041-9722;0000-0002-0191-7510;0000-0002-6321-0131;0000-0003-0863-4844", "linkedin": ";kl%C3%A1ra-janou%C5%A1kov%C3%A1-280198159/;milansulc01/;jiri-matas-5627972b/?originalSubdomain=cz", "or_profile": "~Lukas_Picek1;~Klara_Janouskova2;~Milan_\u0160ulc1;~Jiri_Matas3", "aff": "University of West Bohemia;Czech Technical Univeresity in Prague, Czech Technical University of Prague;Rossum;Czech Techical University", "aff_domain": "zcu.cz;fel.cvut.cz;rossum.ai;cvut.cz", "position": "Researcher;PhD student;Principal Researcher;Full Professor", "bibtex": "@misc{\nanonymous2024fungitastic,\ntitle={FungiTastic: A multi-modal dataset and benchmark for image categorization},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=lnnNPiZtzR}\n}", "github": "", "project": "", "reviewers": "RwjS;WN3u;3VQA;sU1H", "site": "https://openreview.net/forum?id=lnnNPiZtzR", "pdf_size": 22907539, "rating": "3;5;8;8", "confidence": "4;3;4;4", "wc_summary_and_contributions": "36;65;248;149", "wc_strengths": "25;40;54;97", "wc_improvement": "85;105;86;70", "wc_limitations": "6;5;4;45", "wc_correctness": "13;37;30;1", "wc_clarity": "4;15;7;1", "wc_relation_to_prior_work": "13;16;10;1", "wc_documentation": "38;20;29;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "221;304;469;366", "wc_reply_reviewers": "1414;164;26;0", "wc_reply_authors": "350;370;0;0", "reply_reviewers": "5;2;1;0", "reply_authors": "5;2;1;1", "rating_avg": [ 6.0, 2.1213203435596424 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 124.5, 82.5 ], "wc_strengths_avg": [ 54.0, 26.860752037126584 ], "wc_improvement_avg": [ 86.5, 12.419742348374221 ], "wc_limitations_avg": [ 15.0, 17.334935823359714 ], "wc_correctness_avg": [ 20.25, 14.13108276106258 ], "wc_clarity_avg": [ 6.75, 5.2141634036535525 ], "wc_relation_to_prior_work_avg": [ 10.0, 5.612486080160912 ], "wc_documentation_avg": [ 22.0, 13.693063937629153 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 340.0, 90.5179540201832 ], "wc_reply_reviewers_avg": [ 401.0, 588.1674931513982 ], "wc_reply_authors_avg": [ 180.0, 180.13883534651822 ], "reply_reviewers_avg": [ 2.0, 1.8708286933869707 ], "reply_authors_avg": [ 2.25, 1.6393596310755 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.2721655269759087, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14506037553571480818&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "University of West Bohemia;Czech Technical University in Prague;Rossum;Czech Technical University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.zcu.cz;https://www.ctu.cz;;https://www.ctu.cz", "aff_unique_abbr": "ZCU;CTU;;CTU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Prague", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Czech Republic;" }, { "title": "RedPajama: an Open Dataset for Training Large Language Models", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97526", "id": "lnuXaRpwvw", "proceeding": "", "pdf": "https://openreview.net/pdf?id=lnuXaRpwvw", "openreview": "https://openreview.net/forum?id=lnuXaRpwvw", "poster": "/media/PosterPDFs/NeurIPS%202024/97526.png?t=1733936499.7920725", "project": "", "author_site": "Maurice Weber, Dan Fu, Quentin Anthony, Yonatan Oren, Shane Adams, Anton Alexandrov, Xiaozhong Lyu, Huu Nguyen, Xiaozhe Yao, Virginia Adams, Ben Athiwaratkun, Rahul Chalamala, Kezhen Chen, Max Ryabinin, Tri Dao, Percy Liang, Christopher R\u00e9, Irina Rish, Ce Zhang", "tldr": "", "abstract": "Large language models are increasingly becoming a cornerstone technology in artificial intelligence, the sciences, and society as a whole, yet the optimal strategies for dataset composition and filtering remain largely elusive. Many of the top-performing models lack transparency in their dataset curation and model development processes, posing an obstacle to the development of fully open language models. \nIn this paper, we identify three core data-related challenges that must be addressed to advance open-source language models. These include (1) transparency in model development, including the data curation process, (2) access to large quantities of high-quality data, and (3) availability of artifacts and metadata for dataset curation and analysis. \nTo address these challenges, we release RedPajama-V1, an open reproduction of the LLaMA training dataset. In addition, we release RedPajama-V2, a massive web-only dataset consisting of raw, unfiltered text data together with quality signals and metadata.\nTogether, the RedPajama datasets comprise over 100 trillion tokens spanning multiple domains and with their quality signals facilitate the filtering of data, aiming to inspire the development of numerous new datasets. To date, these datasets have already been used in the training of strong language models used in production, such as Snowflake Arctic, Salesforce's XGen and AI2's OLMo. To provide insight into the quality of RedPajama, we present a series of analyses and ablation studies with decoder-only language models with up to 1.6B parameters. Our findings demonstrate how quality signals for web data can be effectively leveraged to curate high-quality subsets of the dataset, underscoring the potential of RedPajama to advance the development of transparent and high-performing language models at scale.", "keywords": "large language models;pretraining data;data quality", "primary_area": "", "supplementary_material": "", "author": "Maurice Weber;Daniel Y Fu;Quentin Gregory Anthony;Yonatan Oren;Shane Adams;Anton Alexandrov;Xiaozhong Lyu;Huu Nguyen;Xiaozhe Yao;Virginia Adams;Ben Athiwaratkun;Rahul Chalamala;Kezhen Chen;Max Ryabinin;Tri Dao;Percy Liang;Christopher Re;Irina Rish;Ce Zhang", "authorids": "~Maurice_Weber1;~Daniel_Y_Fu1;~Quentin_Gregory_Anthony1;~Yonatan_Oren1;~Shane_Adams1;~Anton_Alexandrov1;~Xiaozhong_Lyu1;~Huu_Nguyen2;~Xiaozhe_Yao1;~Virginia_Adams1;~Ben_Athiwaratkun1;~Rahul_Chalamala1;~Kezhen_Chen2;~Max_Ryabinin1;~Tri_Dao1;~Percy_Liang1;~Christopher_Re1;~Irina_Rish1;~Ce_Zhang1", "gender": ";;M;;M;M;M;;M;F;M;M;M;Not Specified;;;;F;", "homepage": ";;https://quentin-anthony.github.io/;;;;https://vlg.inf.ethz.ch/team/Xiaozhong-Lyu.html;;https://about.yao.sh;https://www.linkedin.com/in/virginia-adams-a3b99abb;https://benathi.github.io;https://chalamala.com/;https://www.kezhenchen.net/;https://mryab.github.io/;https://tridao.me/;https://cs.stanford.edu/~pliang/;;http://irina-rish.com;", "dblp": ";;;;;;215/0034;;212/8935;;166/1659;;;276/0192;206/7018;04/1701;;;97/919", "google_scholar": ";;https://scholar.google.com/citations?hl=en;;;;;HLWECfMAAAAJ;;;KZpZTTQAAAAJ;KDtORmwAAAAJ;5Qf2XBIAAAAJ;930PERsAAAAJ;NQRw0bQAAAAJ;pouyVyUAAAAJ;;Avse5gIAAAAJ;", "orcid": ";;0000-0002-6823-9080;;;;;;;;;;;;;;;;", "linkedin": ";;quentin-anthony;;adamsch1/;anton-alexandrov-4624a2182/?originalSubdomain=bg;;huu-ai-machine-learning/;;virginia-adams-a3b99abb;;rchalamala/;;;;;;irina-rish-8b2162;", "or_profile": "~Maurice_Weber1;~Daniel_Y_Fu1;~Quentin_Gregory_Anthony1;~Yonatan_Oren1;~Shane_Adams1;~Anton_Alexandrov1;~Xiaozhong_Lyu1;~Huu_Nguyen2;~Xiaozhe_Yao1;~Virginia_Adams1;~Ben_Athiwaratkun1;~Rahul_Chalamala1;~Kezhen_Chen2;~Max_Ryabinin1;~Tri_Dao1;~Percy_Liang1;~Christopher_Re1;~Irina_Rish1;~Ce_Zhang1", "aff": ";;Ohio State University, Columbus;;together.ai;;Department of Computer Science, ETHZ - ETH Zurich;Ontocord.AI;Department of Computer Science, ETHZ - ETH Zurich;Computer Science Department, Stanford University;Amazon;California Institute of Technology;Google;Together AI;Princeton University;Stanford University;;University of Montreal;University of Chicago", "aff_domain": ";;osu.edu;;together.ai;;inf.ethz.ch;ontocord.ai;inf.ethz.ch;cs.stanford.edu;amazon.com;caltech.edu;google.com;together.ai;princeton.edu;stanford.edu;;mila.quebec;uchicago.edu", "position": ";;PhD student;;Programmer;;PhD student;Principal Researcher;PhD student;MS student;AI Scientist;Undergrad student;Researcher;Researcher;Assistant Professor;Associate Professor;;Professor;Associate Professor", "bibtex": "@inproceedings{\nweber2024redpajama,\ntitle={RedPajama: an Open Dataset for Training Large Language Models},\nauthor={Maurice Weber and Daniel Y Fu and Quentin Gregory Anthony and Yonatan Oren and Shane Adams and Anton Alexandrov and Xiaozhong Lyu and Huu Nguyen and Xiaozhe Yao and Virginia Adams and Ben Athiwaratkun and Rahul Chalamala and Kezhen Chen and Max Ryabinin and Tri Dao and Percy Liang and Christopher Re and Irina Rish and Ce Zhang},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=lnuXaRpwvw}\n}", "github": "", "reviewers": "zjih;zDq6;CeY6;TTPJ", "pdf_size": 4840617, "rating": "6;7;8;9", "confidence": "3;4;4;4", "wc_summary_and_contributions": "96;58;240;152", "wc_strengths": "111;41;273;112", "wc_improvement": "341;90;83;78", "wc_limitations": "1;1;227;1", "wc_correctness": "1;1;55;1", "wc_clarity": "9;1;11;1", "wc_relation_to_prior_work": "9;1;42;1", "wc_documentation": "1;4;78;11", "wc_additional_feedback": "1;1;1;1", "wc_review": "570;198;1010;358", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "1;2;1;1", "rating_avg": [ 7.5, 1.118033988749895 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 136.5, 68.47444778893802 ], "wc_strengths_avg": [ 134.25, 85.1216041907106 ], "wc_improvement_avg": [ 148.0, 111.5100892296298 ], "wc_limitations_avg": [ 57.5, 97.86087062764156 ], "wc_correctness_avg": [ 14.5, 23.382685902179844 ], "wc_clarity_avg": [ 5.5, 4.55521678957215 ], "wc_relation_to_prior_work_avg": [ 13.25, 16.917077170717167 ], "wc_documentation_avg": [ 23.5, 31.67412192942371 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 534.0, 304.85406344675806 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 19, 0 ], "corr_rating_confidence": 0.7745966692414834, "gs_citation": 49, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15854208196488568984&as_sdt=5,24&sciodt=0,24&hl=en", "gs_version_total": 4, "email": ";;osu.edu;;together.ai;;inf.ethz.ch;ontocord.ai;inf.ethz.ch;cs.stanford.edu;amazon.com;caltech.edu;google.com;together.ai;princeton.edu;stanford.edu;;mila.quebec;uchicago.edu", "author_num": 19, "aff_unique_index": "0;1;2;3;2;4;5;6;7;1;8;4;9;10", "aff_unique_norm": "Ohio State University;Together AI;ETH Zurich;Ontocord.AI;Stanford University;Amazon;California Institute of Technology;Google;Princeton University;University of Montreal;University of Chicago", "aff_unique_dep": ";;Department of Computer Science;;Computer Science Department;Amazon.com, Inc.;;Google;;;", "aff_unique_url": "https://www.osu.edu;https://www.together.ai;https://www.ethz.ch;https://ontocord.ai;https://www.stanford.edu;https://www.amazon.com;https://www.caltech.edu;https://www.google.com;https://www.princeton.edu;https://wwwumontreal.ca;https://www.uchicago.edu", "aff_unique_abbr": "OSU;Together AI;ETHZ;Ontocord.AI;Stanford;Amazon;Caltech;Google;Princeton;UM;UChicago", "aff_campus_unique_index": "0;2;2;3;4;5;3", "aff_campus_unique": "Columbus;;Zurich;Stanford;Pasadena;Mountain View", "aff_country_unique_index": "0;0;1;0;1;0;0;0;0;0;0;0;2;0", "aff_country_unique": "United States;Switzerland;Canada" }, { "title": "FinBen: A Holistic Financial Benchmark for Large Language Models", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97525", "id": "loDHZstVP6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=loDHZstVP6", "openreview": "https://openreview.net/forum?id=loDHZstVP6", "poster": "/media/PosterPDFs/NeurIPS%202024/97525.png?t=1731393973.8212132", "project": "", "author_site": "Qianqian Xie, Weiguang Han, Zhengyu Chen, Ruoyu Xiang, Xiao Zhang, Yueru He, Mengxi Xiao, Dong Li, Yongfu Dai, Duanyu Feng, Yijing Xu, Haoqiang Kang, Ziyan Kuang, Chenhan Yuan, Kailai Yang, Zheheng Luo, Tianlin Zhang, Zhiwei Liu, GUOJUN XIONG, Zhiyang Deng, Yuechen Jiang, Zhiyuan Yao, Haohang Li, Yangyang Yu, Gang Hu, Huang Jiajia, Xiaoyang Liu, Alejandro Lopez-Lira, Benyou Wang, Yanzhao Lai, Hao Wang, Min Peng, Sophia Ananiadou, Jimin Huang", "tldr": "", "abstract": "LLMs have transformed NLP and shown promise in various fields, yet their potential in finance is underexplored due to a lack of comprehensive benchmarks, the rapid development of LLMs, and the complexity of financial tasks. In this paper, we introduce FinBen, the first extensive open-source evaluation benchmark, including 42 datasets spanning 24 financial tasks, covering eight critical aspects: information extraction (IE), textual analysis, question answering (QA), text generation, risk management, forecasting, decision-making, and bilingual (English and Spanish). FinBen offers several key innovations: a broader range of tasks and datasets, the first evaluation of stock trading, novel agent and Retrieval-Augmented Generation (RAG) evaluation, and two novel datasets for regulations and stock trading. Our evaluation of 21 representative LLMs, including GPT-4, ChatGPT, and the latest Gemini, reveals several key findings: While LLMs excel in IE and textual analysis, they struggle with advanced reasoning and complex tasks like text generation and forecasting. GPT-4 excels in IE and stock trading, while Gemini is better at text generation and forecasting. Instruction-tuned LLMs improve textual analysis but offer limited benefits for complex tasks such as QA. FinBen has been used to host the first financial LLMs shared task at the FinNLP-AgentScen workshop during IJCAI-2024, attracting 12 teams. Their novel solutions outperformed GPT-4, showcasing FinBen's potential to drive innovations in financial LLMs. All datasets and code are publicly available for the research community, with results shared and updated regularly on the Open Financial LLM Leaderboard.", "keywords": "Financial Large language models; Financial NLP; Financial forecasting; Stock-trading", "primary_area": "", "supplementary_material": "/attachment/f87d20980a0cc312e98f943dea79e02f53b9d2eb.pdf", "author": "Qianqian Xie;Weiguang Han;Zhengyu Chen;Ruoyu Xiang;Xiao Zhang;Yueru He;Mengxi Xiao;Dong Li;Yongfu Dai;Duanyu Feng;Yijing Xu;Haoqiang Kang;Ziyan Kuang;Chenhan Yuan;Kailai Yang;Zheheng Luo;Tianlin Zhang;Zhiwei Liu;GUOJUN XIONG;Zhiyang Deng;Yuechen Jiang;Zhiyuan Yao;Haohang Li;Yangyang Yu;Gang Hu;Huang Jiajia;Xiao-Yang Liu;Alejandro Lopez-Lira;Benyou Wang;Yanzhao Lai;Hao Wang;Min Peng;Sophia Ananiadou;Jimin Huang", "authorids": "~Qianqian_Xie1;~Weiguang_Han1;~Zhengyu_Chen7;~Ruoyu_Xiang1;~Xiao_Zhang24;~Yueru_He1;~Mengxi_Xiao1;~Dong_Li27;~Yongfu_Dai1;~Duanyu_Feng1;~Yijing_Xu1;~Haoqiang_Kang1;~Ziyan_Kuang1;~Chenhan_Yuan1;~Kailai_Yang1;~Zheheng_Luo1;~Tianlin_Zhang1;~Zhiwei_Liu5;~GUOJUN_XIONG1;~Zhiyang_Deng1;~Yuechen_Jiang1;~Zhiyuan_Yao2;~Haohang_Li1;~Yangyang_Yu1;~Gang_Hu3;~Huang_Jiajia1;~Xiao-Yang_Liu1;~Alejandro_Lopez-Lira1;~Benyou_Wang2;~Yanzhao_Lai1;~Hao_Wang80;~Min_Peng2;~Sophia_Ananiadou1;~Jimin_Huang1", "gender": "F;M;M;M;;;F;F;M;M;F;M;F;;M;M;M;M;;M;F;M;M;F;M;F;M;;M;M;M;F;F;M", "homepage": ";https://orcid.org/0000-0003-1821-4667;;https://github.com/ASCRX;https://github.com/shinohara-xiao;https://yueruhe-linda.myportfolio.com/work;;https://github.com/ldruth28;;https://colfeng.github.io/;;https://mk322.github.io;https://www.linkedin.com/in/%E7%B4%AB%E5%AB%A3-%E5%86%B5-23800027b?lipi=urn%3Ali%3Apage%3Ad_flagship3_profile_view_base_contact_details%3BQIHhPDWSTrWFRmFJa1cOJg%3D%3D;https://chenhan97.github.io/;https://stevekgyang.github.io/;https://zhehengluok.github.io/;http://www.zhangtianlin.top/;https://lzw108.github.io/;https://xionggj001.github.io/;https://scholar.google.com/citations?user=JuLlgnUAAAAJ;https://www.linkedin.com/in/carolyn-yuechen-jiang-a72764194/;https://theling.github.io/;https://scholar.google.com/citations?user=gtaqrtoAAAAJ&hl=en&oi=ao;;;https://jenny-hjj.github.io/;http://www.tensorlet.org/publications/;;https://wabyking.github.io/old.html;;https://math.scu.edu.cn/info/1013/9318.htm;;http://www.manchester.ac.uk/research/Sophia.ananiadou/;", "dblp": ";258/5894;;;;371/1115;;;;273/0679.html;;;;;277/3317;;;;214/2134.html;277/4880.html;361/6734;;313/5337;59/3689;24/1820-3;;125/9849;;169/1793;;;;47/4142;163/4119", "google_scholar": "UYW7X_0AAAAJ;;https://scholar.google.com/citations?hl=en;;;;;;https://scholar.google.com.hk/citations?user=TI1KZkwAAAAJ;https://scholar.google.com.hk/citations?user=aRXvKmwAAAAJ;;Wt_cl7UAAAAJ;;ztwJYigAAAAJ;df4H1aQAAAAJ;hSSwrCsAAAAJ;Yy88kOoAAAAJ;gfqqbIwAAAAJ;FIBwLnoAAAAJ;JuLlgnUAAAAJ;jlDkIlQAAAAJ;ZLlWPBAAAAAJ;gtaqrtoAAAAJ;;;;https://scholar.google.com/citations?hl=en;;Jk4vJU8AAAAJ;;;;https://scholar.google.com.tw/citations?user=quhi-K0AAAAJ;SnQ_CycAAAAJ", "orcid": "0000-0002-9588-7454;;;;;;0000-0001-9910-160X;0009-0001-1291-7735;0009-0005-9710-7561;0000-0002-8288-1002;;;;;0000-0003-3142-2516;;0000-0003-0843-1916;0000-0002-7015-5054;;0009-0007-6960-2025;0009-0001-7480-1041;0000-0001-5436-2910;0009-0002-3604-7284;0009-0009-4595-1786;;;;;0000-0002-1501-9914;0000-0002-8067-0842;0000-0003-2480-4436;0000-0002-8766-1105;0000-0002-4097-9191;0000-0002-3501-3907", "linkedin": ";;;;;yueruhe/;;;;;yijing-xu/;;;;;;;zhiwei-liu-63727a220/;guojun-%E5%9B%BD%E9%92%A7-xiong-48696aa6/;zhiyang-deng-784587157/;;zhiyuan-yao-74b84113a/;haohang-li-acatsama/;yangyang%EF%BC%88shirley%EF%BC%89-yu-88542174/;;;;;;;;;sophia-ananiadou-ba98b63/;", "or_profile": "~Qianqian_Xie1;~Weiguang_Han1;~Zhengyu_Chen7;~Ruoyu_Xiang1;~Xiao_Zhang24;~Yueru_He1;~Mengxi_Xiao1;~Dong_Li27;~Yongfu_Dai1;~Duanyu_Feng1;~Yijing_Xu1;~Haoqiang_Kang1;~Ziyan_Kuang1;~Chenhan_Yuan1;~Kailai_Yang1;~Zheheng_Luo1;~Tianlin_Zhang1;~Zhiwei_Liu5;~GUOJUN_XIONG1;~Zhiyang_Deng1;~Yuechen_Jiang1;~Zhiyuan_Yao2;~Haohang_Li1;~Yangyang_Yu1;~Gang_Hu3;~Huang_Jiajia1;~Xiao-Yang_Liu1;~Alejandro_Lopez-Lira1;~Benyou_Wang2;~Yanzhao_Lai1;~Hao_Wang80;~Min_Peng2;~Sophia_Ananiadou1;~Jimin_Huang1", "aff": "Yale University;Wuhan University;Wuhan University;New York University;SUN YAT-SEN UNIVERSITY;Columbia University;Wuhan University;Wuhan University;Sichuan University;Sichuan University;;University of California, San Diego;Jiangxi Normal University;The University of Manchester;University of Manchester;University of Manchester;University of Manchester;University of Manchester;State University of New York at Stony Brook;Stevens Institute of Technology;Stevens Institute of Technology;Stevens Institute of Technology;Stevens Institute of Technology;Stevens Institute of Technology;Yunnan University; NanJing Audit University;Columbia University;;The Chinese University of Hong Kong, Shenzhen;Southwest Jiaotong University;Sichuan University;Wuhan University;University of Manchester;The Fin AI", "aff_domain": "yale.edu;whu.edu.cn;whu.edu.cn;nyu.edu;sysu.edu.cn;columbia.edu;whu.edu.cn;whu.edu.cn;scu.edu.cn;scu.edu.cn;;ucsd.edu;jxnu.edu.cn;manchester.ac.uk;cs.manchester.ac.uk;cs.manchester.ac.uk;manchester.ac.uk;cs.manchester.ac.uk;stonybrook.edu;stevens.edu;stevens.edu;stevens.edu;stevens.edu;stevens.edu;ynu.edu.cn;nau.edu;columbia.edu;;cuhk.edu.cn;swjtu.edu.cn;scu.edu.cn;whu.edu.cn;manchester.ac.uk;thefin.ai", "position": "Postdoc;Postdoc;MS student;MS student;MS student;MS student;MS student;PhD student;MS student;PhD student;;PhD student;MS student;PhD student;PhD student;PhD student;PhD student;PhD student;PhD student;PhD student;Intern;PhD student;PhD student;PhD student;Lecturer;Assistant Professor;PhD student;;Assistant Professor;Assistant Professor;Associate Professor;Full Professor;Full Professor;Principal Researcher", "bibtex": "@inproceedings{\nxie2024finben,\ntitle={FinBen: An Holistic Financial Benchmark for Large Language Models},\nauthor={Qianqian Xie and Weiguang Han and Zhengyu Chen and Ruoyu Xiang and Xiao Zhang and Yueru He and Mengxi Xiao and Dong Li and Yongfu Dai and Duanyu Feng and Yijing Xu and Haoqiang Kang and Ziyan Kuang and Chenhan Yuan and Kailai Yang and Zheheng Luo and Tianlin Zhang and Zhiwei Liu and GUOJUN XIONG and Zhiyang Deng and Yuechen Jiang and Zhiyuan Yao and Haohang Li and Yangyang Yu and Gang Hu and Huang Jiajia and Xiao-Yang Liu and Alejandro Lopez-Lira and Benyou Wang and Yanzhao Lai and Hao Wang and Min Peng and Sophia Ananiadou and Jimin Huang},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=loDHZstVP6}\n}", "github": "", "reviewers": "m9nq;wuHu;9EDL;M41M", "pdf_size": 7780237, "rating": "5;5;7;9", "confidence": "5;4;3;4", "wc_summary_and_contributions": "37;27;102;61", "wc_strengths": "22;10;66;15", "wc_improvement": "18;48;74;85", "wc_limitations": "1;1;9;1", "wc_correctness": "1;1;11;1", "wc_clarity": "1;2;6;12", "wc_relation_to_prior_work": "1;4;12;27", "wc_documentation": "1;1;13;35", "wc_additional_feedback": "1;1;1;1", "wc_review": "83;95;294;238", "wc_reply_reviewers": "0;0;0;12", "wc_reply_authors": "113;113;113;67", "reply_reviewers": "0;0;0;1", "reply_authors": "3;3;3;2", "rating_avg": [ 6.5, 1.6583123951777 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 56.75, 28.899610724021873 ], "wc_strengths_avg": [ 28.25, 22.20782519743885 ], "wc_improvement_avg": [ 56.25, 25.849323008543184 ], "wc_limitations_avg": [ 3.0, 3.4641016151377544 ], "wc_correctness_avg": [ 3.5, 4.330127018922194 ], "wc_clarity_avg": [ 5.25, 4.322904116447646 ], "wc_relation_to_prior_work_avg": [ 11.0, 10.074720839804943 ], "wc_documentation_avg": [ 12.5, 13.88344337691482 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 177.5, 90.78683825313006 ], "wc_reply_reviewers_avg": [ 3.0, 5.196152422706632 ], "wc_reply_authors_avg": [ 101.5, 19.91858428704209 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 34, 0 ], "corr_rating_confidence": -0.42640143271122083, "gs_citation": 81, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8358143678813812509&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "yale.edu;whu.edu.cn;whu.edu.cn;nyu.edu;sysu.edu.cn;columbia.edu;whu.edu.cn;whu.edu.cn;scu.edu.cn;scu.edu.cn;;ucsd.edu;jxnu.edu.cn;manchester.ac.uk;cs.manchester.ac.uk;cs.manchester.ac.uk;manchester.ac.uk;cs.manchester.ac.uk;stonybrook.edu;stevens.edu;stevens.edu;stevens.edu;stevens.edu;stevens.edu;ynu.edu.cn;nau.edu;columbia.edu;;cuhk.edu.cn;swjtu.edu.cn;scu.edu.cn;whu.edu.cn;manchester.ac.uk;thefin.ai", "author_num": 34, "aff_unique_index": "0;1;1;2;3;4;1;1;5;5;6;7;8;8;8;8;8;9;10;10;10;10;10;11;12;4;13;14;5;1;8;15", "aff_unique_norm": "Yale University;Wuhan University;New York University;Sun Yat-sen University;Columbia University;Sichuan University;University of California, San Diego;Jiangxi Normal University;University of Manchester;State University of New York at Stony Brook;Stevens Institute of Technology;Yunnan University;Nanjing Audit University;Chinese University of Hong Kong;Southwest Jiao Tong University;Fin AI", "aff_unique_dep": ";;;;;;;;;;;;;;;", "aff_unique_url": "https://www.yale.edu;http://www.whu.edu.cn/;https://www.nyu.edu;http://www.sysu.edu.cn;https://www.columbia.edu;https://www.scu.edu.cn;https://www.ucsd.edu;http://www.jxnu.edu.cn;https://www.manchester.ac.uk;https://www.stonybrook.edu;https://www.stevens.edu;http://www.ynu.edu.cn;http://www.nau.edu.cn/;https://www.cuhk.edu.cn;https://www.swjtu.edu.cn;https://www.thefinai.com", "aff_unique_abbr": "Yale;WHU;NYU;SYSU;Columbia;SCU;UCSD;JXNU;UoM;SUNY Stony Brook;SIT;YNU;NAU;CUHK;SWJTU;Fin AI", "aff_campus_unique_index": "1;2;3", "aff_campus_unique": ";San Diego;Stony Brook;Shenzhen", "aff_country_unique_index": "0;1;1;0;1;0;1;1;1;1;0;1;2;2;2;2;2;0;0;0;0;0;0;1;1;0;1;1;1;1;2;0", "aff_country_unique": "United States;China;United Kingdom" }, { "title": "MMLONGBENCH-DOC: Benchmarking Long-context Document Understanding with Visualizations", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97524", "id": "loJM1acwzf", "proceeding": "", "pdf": "https://openreview.net/pdf?id=loJM1acwzf", "openreview": "https://openreview.net/forum?id=loJM1acwzf", "poster": "/media/PosterPDFs/NeurIPS%202024/97524.png?t=1731683156.8280988", "project": "", "author_site": "Yubo Ma, Yuhang Zang, Liangyu Chen, Meiqi Chen, Yizhu Jiao, Xinze Li, Xinyuan Lu, Ziyu Liu, Yan Ma, Xiaoyi Dong, Pan Zhang, Liangming Pan, Yu-Gang Jiang, Jiaqi Wang, Yixin Cao, Aixin Sun", "tldr": "", "abstract": "Understanding documents with rich layouts and multi-modal components is a long-standing and practical task. Recent Large Vision-Language Models (LVLMs) have made remarkable strides in various tasks, particularly in single-page document understanding (DU). However, their abilities on long-context DU remain an open problem. This work presents MMLONGBENCH-DOC, a long-context, multi- modal benchmark comprising 1,082 expert-annotated questions. Distinct from previous datasets, it is constructed upon 135 lengthy PDF-formatted documents with an average of 47.5 pages and 21,214 textual tokens. Towards comprehensive evaluation, answers to these questions rely on pieces of evidence from (1) different sources (text, image, chart, table, and layout structure) and (2) various locations (i.e., page number). Moreover, 33.7\\% of the questions are cross-page questions requiring evidence across multiple pages. 20.6\\% of the questions are designed to be unanswerable for detecting potential hallucinations. Experiments on 14 LVLMs demonstrate that long-context DU greatly challenges current models. Notably, the best-performing model, GPT-4o, achieves an F1 score of only 44.9\\%, while the second-best, GPT-4V, scores 30.5\\%. Furthermore, 12 LVLMs (all except GPT-4o and GPT-4V) even present worse performance than their LLM counterparts which are fed with lossy-parsed OCR documents. These results validate the necessity of future research toward more capable long-context LVLMs.", "keywords": "Long-context;multi-modality document understanding", "primary_area": "", "supplementary_material": "/attachment/a2fe71df388f402f589b6c1ce57b6b4903e883b4.pdf", "author": "Yubo Ma;Yuhang Zang;Liangyu Chen;Meiqi Chen;Yizhu Jiao;Xinze Li;Xinyuan Lu;Ziyu Liu;Yan Ma;Xiaoyi Dong;Pan Zhang;Liangming Pan;Yu-Gang Jiang;Jiaqi Wang;Yixin Cao;Aixin Sun", "authorids": "~Yubo_Ma1;~Yuhang_Zang1;~Liangyu_Chen3;~Meiqi_Chen1;~Yizhu_Jiao1;~Xinze_Li3;~Xinyuan_Lu1;~Ziyu_Liu5;~Yan_Ma5;~Xiaoyi_Dong1;~Pan_Zhang1;~Liangming_Pan1;~Yu-Gang_Jiang1;~Jiaqi_Wang1;~Yixin_Cao2;~Aixin_Sun1", "gender": "M;;F;F;F;M;F;M;M;M;M;M;M;M;M;M", "homepage": "https://yuhangzang.github.io;https://www.cliangyu.com/;;https://yzjiao.github.io/;https://xinyuanlu00.github.io/;https://liuziyu77.github.io/;;;https://panzhang0212.github.io/;https://liangmingpan.bio;https://fvl.fudan.edu.cn/people/yugangjiang/;https://myownskyw7.github.io/;https://sites.google.com/view/yixin-homepage;https://personal.ntu.edu.sg/axsun/;;https://mayubo2333.github.io/", "dblp": "230/4433;;217/4084-1.html;https://dblp.uni-trier.de/pid/250/9757;88/422;;;230/3711;;186/9707;24/5818;44/740-3;20/8038-2;78/5155;;229/7323", "google_scholar": "hW23VKIAAAAJ;vi5Zt9oAAAAJ;qo2YMZAAAAAJ;sHgBvMgAAAAJ;-NtdX2sAAAAJ;;;FscToE0AAAAJ;moHH480AAAAJ;JcjjOTUAAAAJ;f3_FP8AAAAAJ;https://scholar.google.com.hk/citations?user=GDvt570AAAAJ;https://scholar.google.co.uk/citations?user=CnhTvdoAAAAJ;https://scholar.google.com.sg/citations?user=wyKGVKUAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=en", "orcid": "0000-0003-1110-5062;;;;;;0009-0007-9087-0577;;;;;;;0000-0003-0764-4258;;", "linkedin": "yuhang-zang/;chen-liangyu/;;;;;;;;;;;;aixin-sun-%E5%AD%99%E7%88%B1%E6%AC%A3-43056622/;xinze-li-1581581b0/;yubo-ma-17054b168/", "or_profile": "~Yuhang_Zang1;~Liangyu_Chen3;~Meiqi_Chen1;~Yizhu_Jiao1;~Xinyuan_Lu1;~Ziyu_Liu5;~Yan_Ma5;~Xiaoyi_Dong1;~Pan_Zhang1;~Liangming_Pan1;~Yu-Gang_Jiang1;~Jiaqi_Wang1;~Yixin_Cao2;~Aixin_Sun1;~XINZE_LI2;~Ma_Yubo1", "aff": "Shanghai Artificial Intelligence Laboratory;Nanyang Technological University;Peking University;UIUC;National University of Singapore;Wuhan University;Xi'an Jiaotong University;Shanghai Artificial Intelligence Laboratory;Shanghai Artificial Intelligence Laboratory;University of California, Santa Barbara;Fudan University;Shanghai AI Laboratory;Singapore Management University;Nanyang Technological University;School of Computer Science and Engineering, Nanyang Technological University;School of Computer Science and Engineering, Nanyang Technological University", "aff_domain": "pjlab.org.cn;ntu.edu.sg;pku.edu.cn;illinois.edu;u.nus.edu;whu.edu.cn;xjtu.edu.cn;pjlab.org.cn;pjlab.org.cn;ucsb.edu;fudan.edu.cn;pjlab.org.cn;smu.edu.sg;ntu.edu.sg;scse.ntu.edu.sg;e.ntu.edu.sg", "position": "Researcher;Researcher;PhD student;PhD student;PhD student;Undergrad student;PhD student;Researcher;Researcher;Postdoc;Full Professor;Research Scientist;Assistant Professor;Associate Professor;PhD student;PhD student", "bibtex": "@inproceedings{\nma2024mmlongbenchdoc,\ntitle={{MMLONGBENCH}-{DOC}: Benchmarking Long-context Document Understanding with Visualizations},\nauthor={Yubo Ma and Yuhang Zang and Liangyu Chen and Meiqi Chen and Yizhu Jiao and Xinze Li and Xinyuan Lu and Ziyu Liu and Yan Ma and Xiaoyi Dong and Pan Zhang and Liangming Pan and Yu-Gang Jiang and Jiaqi Wang and Yixin Cao and Aixin Sun},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=loJM1acwzf}\n}", "github": "", "reviewers": "yYQY;xHZL;ecPu;u61o", "pdf_size": 20958055, "rating": "7;7;7;9", "confidence": "3;3;4;3", "wc_summary_and_contributions": "89;65;49;190", "wc_strengths": "38;212;105;413", "wc_improvement": "73;135;93;410", "wc_limitations": "1;7;37;513", "wc_correctness": "1;21;9;469", "wc_clarity": "3;52;1;1", "wc_relation_to_prior_work": "1;22;14;1", "wc_documentation": "18;13;1;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "225;528;310;1999", "wc_reply_reviewers": "11;44;0;0", "wc_reply_authors": "0;0;87;0", "reply_reviewers": "1;1;0;0", "reply_authors": "1;1;2;1", "rating_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 98.25, 54.851504081474374 ], "wc_strengths_avg": [ 192.0, 141.8855172313228 ], "wc_improvement_avg": [ 177.75, 135.9436923876941 ], "wc_limitations_avg": [ 139.5, 216.07116883101273 ], "wc_correctness_avg": [ 125.0, 198.73600579663466 ], "wc_clarity_avg": [ 14.25, 21.810261346439663 ], "wc_relation_to_prior_work_avg": [ 9.5, 8.958236433584458 ], "wc_documentation_avg": [ 8.25, 7.46240577829965 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 765.5, 720.685264175701 ], "wc_reply_reviewers_avg": [ 13.75, 18.0329559418305 ], "wc_reply_authors_avg": [ 21.75, 37.67210506462308 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 16, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16822330691956291853&as_sdt=800005&sciodt=0,15&hl=en", "gs_version_total": 6, "email": "pjlab.org.cn;ntu.edu.sg;pku.edu.cn;illinois.edu;u.nus.edu;whu.edu.cn;xjtu.edu.cn;pjlab.org.cn;pjlab.org.cn;ucsb.edu;fudan.edu.cn;pjlab.org.cn;smu.edu.sg;ntu.edu.sg;scse.ntu.edu.sg;e.ntu.edu.sg", "author_num": 16, "aff_unique_index": "0;1;2;3;4;5;6;0;0;7;8;9;10;1;1;1", "aff_unique_norm": "Shanghai Artificial Intelligence Laboratory;Nanyang Technological University;Peking University;University of Illinois Urbana-Champaign;National University of Singapore;Wuhan University;Xi'an Jiao Tong University;University of California, Santa Barbara;Fudan University;Shanghai AI Laboratory;Singapore Management University", "aff_unique_dep": ";;;;;;;;;;", "aff_unique_url": "http://www.shailab.org/;https://www.ntu.edu.sg;http://www.pku.edu.cn;https://www illinois.edu;https://www.nus.edu.sg;http://www.whu.edu.cn/;https://www.xjtu.edu.cn;https://www.ucsb.edu;https://www.fudan.edu.cn;https://www.shanghai-ai-lab.com;https://www.smu.edu.sg", "aff_unique_abbr": "Shanghai AI Lab;NTU;Peking U;UIUC;NUS;WHU;XJTU;UCSB;Fudan;SAIL;SMU", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Urbana-Champaign;Santa Barbara", "aff_country_unique_index": "0;1;0;2;1;0;0;0;0;2;0;0;1;1;1;1", "aff_country_unique": "China;Singapore;United States" }, { "title": "Diffusion Models With Learned Adaptive Noise", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93802", "id": "loMa99A4p8", "proceeding": "", "pdf": "https://openreview.net/pdf?id=loMa99A4p8", "openreview": "https://openreview.net/forum?id=loMa99A4p8", "poster": "/media/PosterPDFs/NeurIPS%202024/93802.png?t=1733958752.2373445", "project": "", "author_site": "Subham Sahoo, Aaron Gokaslan, Christopher De Sa, Volodymyr Kuleshov", "tldr": "", "abstract": "Diffusion models have gained traction as powerful algorithms for synthesizing high-quality images. Central to these algorithms is the diffusion process, a set of equations which maps data to noise \nin a way that can significantly affect performance. \nIn this paper, we explore whether the diffusion\nprocess can be learned from data.\nOur work is grounded in Bayesian inference and seeks to improve log-likelihood estimation by casting the learned diffusion process as an approximate variational posterior that yields a tighter lower bound (ELBO) on the likelihood.\nA widely held assumption is that the ELBO is invariant to the noise process: our work dispels this assumption and proposes multivariate learned adaptive noise (MuLAN), a learned diffusion process that applies noise at different rates across an image. Our method consists of three components: a multivariate noise schedule, adaptive input-conditional diffusion, and auxiliary variables; these components ensure that the ELBO is no longer invariant to the choice of the noise schedule as in previous works. Empirically, MuLAN sets a new **state-of-the-art** in density estimation on CIFAR-10 and ImageNet while matching the performance of previous state-of-the-art models with **50%** fewer steps. We provide the code, along with a blog post and video tutorial on the project page: https://s-sahoo.com/MuLAN", "keywords": "Generative Models;Diffusion Models;Image Diffusion", "primary_area": "generative_models", "supplementary_material": "", "author": "Subham Sekhar Sahoo;Aaron Gokaslan;Christopher De Sa;Volodymyr Kuleshov", "authorids": "~Subham_Sekhar_Sahoo1;~Aaron_Gokaslan1;~Christopher_De_Sa2;~Volodymyr_Kuleshov1", "gender": "M;M;;M", "homepage": ";https://skylion007.github.io/;https://www.cs.cornell.edu/~kuleshov/;http://cs.cornell.edu/~cdesa", "dblp": ";220/6816;81/8612;154/6336", "google_scholar": "Z7DoDbAAAAAJ;Mt2wyL4AAAAJ;RY_t8XAAAAAJ;", "orcid": ";0000-0002-3575-2961;;", "linkedin": "shakeh3r/;aarongokaslan/;;", "or_profile": "~Subham_Sekhar_Sahoo1;~Aaron_Gokaslan1;~Volodymyr_Kuleshov1;~Christopher_De_Sa1", "aff": "Department of Computer Science, Cornell University;Cornell University;Cornell University;Cornell University", "aff_domain": "cs.cornell.edu;cornell.edu;cornell.edu;cornell.edu", "position": "PhD student;PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nsahoo2024diffusion,\ntitle={Diffusion Models With Learned Adaptive Noise},\nauthor={Subham Sekhar Sahoo and Aaron Gokaslan and Christopher De Sa and Volodymyr Kuleshov},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=loMa99A4p8}\n}", "github": "", "reviewers": "uSJa;22Fq;QQVN;1B1k", "pdf_size": 7927446, "rating": "6;7;7;7", "confidence": "4;4;4;5", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;2;3", "wc_summary": "303;49;92;44", "wc_strengths": "2;49;58;86", "wc_weaknesses": "2;239;99;330", "wc_questions": "2;30;6;276", "wc_limitations": "2;50;13;7", "wc_review": "311;417;268;743", "wc_reply_reviewers": "7;34;40;13", "wc_reply_authors": "51;14;728;1792", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;4", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 122.0, 106.15319119084457 ], "wc_strengths_avg": [ 48.75, 30.243801017729236 ], "wc_weaknesses_avg": [ 167.5, 126.09619343977042 ], "wc_questions_avg": [ 78.5, 114.52838076214995 ], "wc_limitations_avg": [ 18.0, 18.881207588499205 ], "wc_review_avg": [ 434.75, 186.04619721993782 ], "wc_reply_reviewers_avg": [ 23.5, 13.82931668593933 ], "wc_reply_authors_avg": [ 646.25, 719.9806855048266 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=677316914839232121&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "cs.cornell.edu;cornell.edu;cornell.edu;cornell.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Cornell University", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.cornell.edu", "aff_unique_abbr": "Cornell", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Be Confident in What You Know: Bayesian Parameter Efficient Fine-Tuning of Vision Foundation Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93801", "id": "loQCk0qruU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=loQCk0qruU", "openreview": "https://openreview.net/forum?id=loQCk0qruU", "poster": "/media/PosterPDFs/NeurIPS%202024/93801.png?t=1731174118.7681115", "project": "", "author_site": "Deep Pandey, Spandan Pyakurel, Qi Yu", "tldr": "", "abstract": "Large transformer-based foundation models have been commonly used as pre-trained models that can be adapted to different challenging datasets and settings with state-of-the-art generalization performance. Parameter efficient fine-tuning ($\\texttt{PEFT}$) provides promising generalization performance in adaptation while incurring minimum computational overhead. However, adaptation of these foundation models through $\\texttt{PEFT}$ leads to accurate but severely underconfident models, especially in few-shot learning settings. Moreover, the adapted models lack accurate fine-grained uncertainty quantification capabilities limiting their broader applicability in critical domains. To fill out this critical gap, we develop a novel lightweight {Bayesian Parameter Efficient Fine-Tuning} (referred to as $\\texttt{Bayesian-PEFT}$) framework for large transformer-based foundation models. The framework integrates state-of-the-art $\\texttt{PEFT}$ techniques with two Bayesian components to address the under-confidence issue while ensuring reliable prediction under challenging few-shot settings. The first component performs base rate adjustment to strengthen the prior belief corresponding to the knowledge gained through pre-training, making the model more confident in its predictions; the second component builds an evidential ensemble that leverages belief regularization to ensure diversity among different ensemble components.\nOur thorough theoretical analysis justifies that the Bayesian components can ensure reliable and accurate few-shot adaptations with well-calibrated uncertainty quantification. Extensive experiments across diverse datasets, few-shot learning scenarios, and multiple $\\texttt{PEFT}$ techniques demonstrate the outstanding prediction and calibration performance by $\\texttt{Bayesian-PEFT}$.", "keywords": "Model calibration;uncertainty quantification;few-shot adaptation;Parameter-Efficient Fine-Tuning", "primary_area": "machine_vision", "supplementary_material": "/attachment/37d7a1c90c82bcb36e1ff2d5520c8f53e1bb92a9.zip", "author": "Deep Shankar Pandey;Spandan Pyakurel;Qi Yu", "authorids": "~Deep_Shankar_Pandey1;~Spandan_Pyakurel1;~Qi_Yu1", "gender": "M;F;M", "homepage": "https://pandeydeep9.github.io/;;https://www.rit.edu/mining/", "dblp": "306/7473.html;384/4136.html;58/6957-1", "google_scholar": "EGkWnakAAAAJ;1ZSPfBoAAAAJ;L3gWdfEAAAAJ", "orcid": ";;0000-0002-0426-5407", "linkedin": "pandeydeep9;;", "or_profile": "~Deep_Shankar_Pandey1;~Spandan_Pyakurel1;~Qi_Yu1", "aff": "Rochester Institute of Technology;Rochester Institute of Technology;Rochester Institute of Technology", "aff_domain": "rit.edu;rit.edu;rit.edu", "position": "PhD student;PhD student;Professor", "bibtex": "@inproceedings{\npandey2024be,\ntitle={Be Confident in What You Know: Bayesian Parameter Efficient Fine-Tuning of Vision Foundation Models},\nauthor={Deep Shankar Pandey and Spandan Pyakurel and Qi Yu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=loQCk0qruU}\n}", "github": "", "reviewers": "CbTK;YJfL;DMeg;7WZR", "pdf_size": 1663341, "rating": "6;6;6;6", "confidence": "4;2;3;3", "soundness": "3;3;3;2", "novelty": "2;3;3;2", "presentation": "3;3;3;3", "wc_summary": "79;32;85;234", "wc_strengths": "41;26;38;244", "wc_weaknesses": "105;42;55;924", "wc_questions": "10;21;44;243", "wc_limitations": "1;1;1;8", "wc_review": "236;122;223;1653", "wc_reply_reviewers": "0;0;0;251", "wc_reply_authors": "68;77;94;284", "reply_reviewers": "0;0;0;1", "reply_authors": "2;2;2;3", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 107.5, 75.86336401715917 ], "wc_strengths_avg": [ 87.25, 90.6735214933224 ], "wc_weaknesses_avg": [ 281.5, 371.69241315905276 ], "wc_questions_avg": [ 79.5, 95.19059827524985 ], "wc_limitations_avg": [ 2.75, 3.031088913245535 ], "wc_review_avg": [ 558.5, 633.4486956336717 ], "wc_reply_reviewers_avg": [ 62.75, 108.68618817494705 ], "wc_reply_authors_avg": [ 130.75, 88.97014948846608 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7494926007488799124&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 2, "email": "rit.edu;rit.edu;rit.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Rochester Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.rit.edu", "aff_unique_abbr": "RIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Black-Box Forgetting", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93800", "id": "lpFDhC91Oj", "proceeding": "", "pdf": "https://openreview.net/pdf?id=lpFDhC91Oj", "openreview": "https://openreview.net/forum?id=lpFDhC91Oj", "poster": "/media/PosterPDFs/NeurIPS%202024/93800.png?t=1731577503.1606147", "project": "", "author_site": "Yusuke Kuwana, Yuta Goto, Takashi Shibata, Go Irie", "tldr": "", "abstract": "Large-scale pre-trained models (PTMs) provide remarkable zero-shot classification capability covering a wide variety of object classes. However, practical applications do not always require the classification of all kinds of objects, and leaving the model capable of recognizing unnecessary classes not only degrades overall accuracy but also leads to operational disadvantages. To mitigate this issue, we explore the selective forgetting problem for PTMs, where the task is to make the model unable to recognize only the specified classes, while maintaining accuracy for the rest. All the existing methods assume ''white-box'' settings, where model information such as architectures, parameters, and gradients is available for training. However, PTMs are often ''black-box,'' where information on such models is unavailable for commercial reasons or social responsibilities. In this paper, we address a novel problem of selective forgetting for black-box models, named Black-Box Forgetting, and propose an approach to the problem. Given that information on the model is unavailable, we optimize the input prompt to decrease the accuracy of specified classes through derivative-free optimization. To avoid difficult high-dimensional optimization while ensuring high forgetting performance, we propose Latent Context Sharing, which introduces common low-dimensional latent components among multiple tokens for the prompt. Experiments on four standard benchmark datasets demonstrate the superiority of our method with reasonable baselines. The code is available at https://github.com/yusukekwn/Black-Box-Forgetting.", "keywords": "Black-Box Tuning;Vision-Language Models", "primary_area": "machine_vision", "supplementary_material": "", "author": "Yusuke Kuwana;Yuta Goto;Takashi Shibata;Go Irie", "authorids": "~Yusuke_Kuwana1;~Yuta_Goto1;~Takashi_Shibata3;~Go_Irie3", "gender": ";M;M;M", "homepage": ";;;", "dblp": ";;37/2234-1;98/7454", "google_scholar": ";;n_Je1nsAAAAJ;2bCSG1AAAAAJ", "orcid": ";;0000-0001-8072-3847;0000-0002-4309-4700", "linkedin": ";yutagoto-520367303/;;", "or_profile": "~Yusuke_Kuwana1;~Yuta_Goto1;~Takashi_Shibata3;~Go_Irie3", "aff": ";;NEC;Keio University", "aff_domain": ";;nec.com;keio.ac.jp", "position": ";;Senior Principal Research Scientist;Visiting Associate Professor", "bibtex": "@inproceedings{\nkuwana2024blackbox,\ntitle={Black-Box Forgetting},\nauthor={Yusuke Kuwana and Yuta Goto and Takashi Shibata and Go Irie},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=lpFDhC91Oj}\n}", "github": "", "reviewers": "SKod;bJB5;VErt;LjLN", "pdf_size": 700824, "rating": "5;6;6;6", "confidence": "4;3;4;4", "soundness": "3;2;3;3", "novelty": "2;2;3;3", "presentation": "2;2;3;2", "wc_summary": "68;56;76;148", "wc_strengths": "39;60;113;127", "wc_weaknesses": "234;38;251;220", "wc_questions": "154;454;10;125", "wc_limitations": "10;62;14;161", "wc_review": "505;670;464;781", "wc_reply_reviewers": "485;102;0;829", "wc_reply_authors": "812;219;0;1231", "reply_reviewers": "3;1;0;3", "reply_authors": "4;2;1;4", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 87.0, 35.93048844644336 ], "wc_strengths_avg": [ 84.75, 36.36189626518397 ], "wc_weaknesses_avg": [ 185.75, 86.00690379266074 ], "wc_questions_avg": [ 185.75, 163.97008111237855 ], "wc_limitations_avg": [ 61.75, 60.84560378531879 ], "wc_review_avg": [ 605.0, 127.5558701118847 ], "wc_reply_reviewers_avg": [ 354.0, 328.48363733982245 ], "wc_reply_authors_avg": [ 565.5, 485.67092768663844 ], "reply_reviewers_avg": [ 1.75, 1.299038105676658 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:62RrDoCiEKQJ:scholar.google.com/&scioq=Black-Box+Forgetting&hl=en&as_sdt=0,33", "gs_version_total": 6, "email": ";;nec.com;keio.ac.jp", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "NEC Corporation;Keio University", "aff_unique_dep": ";", "aff_unique_url": "https://www.nec.com;https://www.keio.ac.jp", "aff_unique_abbr": "NEC;Keio", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Japan" }, { "title": "Vaccine: Perturbation-aware Alignment for Large Language Models against Harmful Fine-tuning Attack", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93799", "id": "lpXDZKiAnt", "proceeding": "", "pdf": "https://openreview.net/pdf?id=lpXDZKiAnt", "openreview": "https://openreview.net/forum?id=lpXDZKiAnt", "poster": "/media/PosterPDFs/NeurIPS%202024/93799.png?t=1733447239.0953732", "project": "", "author_site": "Tiansheng Huang, Sihao Hu, Ling Liu", "tldr": "", "abstract": "The new paradigm of fine-tuning-as-a-service introduces a new attack surface for Large Language Models (LLMs): a few harmful data uploaded by users can easily trick the fine-tuning to produce an alignment-broken model. We conduct an empirical analysis and uncover\na \\textit{harmful embedding drift} phenomenon, showing a probable \ncause of the alignment-broken effect. Inspired by our findings, we propose Vaccine, a perturbation-aware alignment technique to mitigate the security risk of users fine-tuning. The core idea of Vaccine is to produce invariant hidden embeddings by progressively adding crafted perturbation to them in the alignment phase. This enables the embeddings to withstand harmful perturbation from un-sanitized user data in the fine-tuning phase. Our results on open source mainstream LLMs (e.g., Llama2, Opt, Vicuna) demonstrate that Vaccine can boost the robustness of alignment against harmful prompts induced embedding drift while reserving reasoning ability towards benign prompts. Our code is available at https://github.com/git-disl/Vaccine.", "keywords": "Larger language model;safety alignment;perturbation-aware alignment;harmful finetuning attack", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Tiansheng Huang;Sihao Hu;Ling Liu", "authorids": "~Tiansheng_Huang1;~Sihao_Hu1;~Ling_Liu3", "gender": "M;M;", "homepage": "https://huangtiansheng.github.io/;https://bayi-hu.github.io/;", "dblp": "249/2114;266/4995;", "google_scholar": "zz6Oq8wAAAAJ;WcZbKF4AAAAJ;", "orcid": "0000-0002-4557-1865;0000-0003-3297-6991;", "linkedin": ";;", "or_profile": "~Tiansheng_Huang1;~Sihao_Hu1;~Ling_Liu3", "aff": "Georgia Institute of Technology;Georgia Institute of Technology;", "aff_domain": "gatech.edu;gatech.edu;", "position": "PhD student;PhD student;", "bibtex": "@inproceedings{\nhuang2024vaccine,\ntitle={Vaccine: Perturbation-aware Alignment for Large Language Models against Harmful Fine-tuning Attack},\nauthor={Tiansheng Huang and Sihao Hu and Ling Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=lpXDZKiAnt}\n}", "github": "", "reviewers": "M5RT;CLDA;Pifs;a2cG", "pdf_size": 1440542, "rating": "6;6;6;6", "confidence": "3;3;3;3", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "43;130;57;85", "wc_strengths": "61;104;43;18", "wc_weaknesses": "27;136;40;106", "wc_questions": "20;31;8;48", "wc_limitations": "10;11;1;12", "wc_review": "161;412;149;269", "wc_reply_reviewers": "16;103;40;39", "wc_reply_authors": "28;102;35;494", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;4", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 78.75, 33.22931687531358 ], "wc_strengths_avg": [ 56.5, 31.388692231439016 ], "wc_weaknesses_avg": [ 77.25, 45.25138119439008 ], "wc_questions_avg": [ 26.75, 14.720309100015529 ], "wc_limitations_avg": [ 8.5, 4.387482193696061 ], "wc_review_avg": [ 247.75, 105.71985385914984 ], "wc_reply_reviewers_avg": [ 49.5, 32.34578797927173 ], "wc_reply_authors_avg": [ 164.75, 192.27503088024716 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15722852483233335561&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 2, "email": "gatech.edu;gatech.edu;", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Georgia Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.gatech.edu", "aff_unique_abbr": "Georgia Tech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "ShowMaker: Creating High-Fidelity 2D Human Video via Fine-Grained Diffusion Modeling", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93798", "id": "lpxdG0hk4H", "proceeding": "", "pdf": "https://openreview.net/pdf?id=lpxdG0hk4H", "openreview": "https://openreview.net/forum?id=lpxdG0hk4H", "poster": "/media/PosterPDFs/NeurIPS%202024/93798.png?t=1731595077.9061346", "project": "", "author_site": "Quanwei Yang, Jiazhi Guan, Kaisiyuan Wang, Lingyun Yu, Wenqing Chu, Hang Zhou, ZhiQiang Feng, Haocheng Feng, Errui Ding, Jingdong Wang, Hongtao Xie", "tldr": "", "abstract": "Although significant progress has been made in human video generation, most previous studies focus on either human facial animation or full-body animation, which cannot be directly applied to produce realistic conversational human videos with frequent hand gestures and various facial movements simultaneously.\nTo address these limitations, we propose a 2D human video generation framework, named ShowMaker, capable of generating high-fidelity half-body conversational videos via fine-grained diffusion modeling.\nWe leverage dual-stream diffusion models as the backbone of our framework and carefully design two novel components for crucial local regions (i.e., hands and face) that can be easily integrated into our backbone.\nSpecifically, to handle the challenging hand generation caused by sparse motion guidance, we propose a novel Key Point-based Fine-grained Hand Modeling module by amplifying positional information from raw hand key points and constructing a corresponding key point-based codebook. \nMoreover, to restore richer facial details in generated results, we introduce a Face Recapture module, which extracts facial texture features and global identity features from the aligned human face and integrates them into the diffusion process for face enhancement. \nExtensive quantitative and qualitative experiments demonstrate the superior visual quality and temporal consistency of our method.", "keywords": "human video generation; diffusion model", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/e67b0b5ed920a7549b42fbab3926f6da635b951b.zip", "author": "Quanwei Yang;Jiazhi Guan;Kaisiyuan Wang;Lingyun Yu;Wenqing Chu;Hang Zhou;ZhiQiang Feng;Haocheng Feng;Errui Ding;Jingdong Wang;Hongtao Xie", "authorids": "~Quanwei_Yang1;~Jiazhi_Guan1;~Kaisiyuan_Wang2;~Lingyun_Yu1;~Wenqing_Chu1;~Hang_Zhou4;~ZhiQiang_Feng1;~Haocheng_Feng1;~Errui_Ding2;~Jingdong_Wang1;~Hongtao_Xie2", "gender": "M;M;M;F;M;M;;;M;M;M", "homepage": "https://imcc.ustc.edu.cn/_upload/tpl/0d/13/3347/template3347/members.html;;https://github.com/uniBruce;http://home.ustc.edu.cn/~yuly/;;https://hangz-nju-cuhk.github.io/;;;;https://jingdongwang2017.github.io/;http://imcc.ustc.edu.cn/main.htm", "dblp": "329/0636;279/0597;278/3589.html;47/3963-2;126/6785;;;;180/5531;49/3441;", "google_scholar": ";;2Pedf3EAAAAJ;M1fL0BwAAAAJ;1Ae0CMgAAAAJ;https://scholar.google.com.hk/citations?user=iqbKD9UAAAAJ;;;1wzEtxcAAAAJ;z5SPCmgAAAAJ;", "orcid": ";;0000-0002-2120-8383;0000-0001-6403-761X;;;;;;0000-0002-4888-4445;0000-0002-0163-9434", "linkedin": ";;;;;;;;;;", "or_profile": "~Quanwei_Yang1;~Jiazhi_Guan1;~Kaisiyuan_Wang2;~Lingyun_Yu1;~Wenqing_Chu1;~Hang_Zhou4;~ZhiQiang_Feng1;~Haocheng_Feng1;~Errui_Ding2;~Jingdong_Wang1;~Hongtao_Xie2", "aff": "University of Science and Technology of China;Tsinghua University, China;Baidu Inc.;University of Science and Technology of China;Baidu;Baidu;;;Baidu;Baidu;University of Science and Technology of China", "aff_domain": "ustc.edu.cn;tsinghua.edu.cn;baidu.com;ustc.edu.cn;baidu.com;baidu.com;;;baidu.com;baidu.com;ustc.edu.cn", "position": "PhD student;PhD student;Researcher;Assistant Professor;Researcher;Researcher;;;Director;Chief Scientist for Computer Vision;Full Professor", "bibtex": "@inproceedings{\nyang2024showmaker,\ntitle={ShowMaker: Creating High-Fidelity 2D Human Video via Fine-Grained Diffusion Modeling},\nauthor={Quanwei Yang and Jiazhi Guan and Kaisiyuan Wang and Lingyun Yu and Wenqing Chu and Hang Zhou and ZhiQiang Feng and Haocheng Feng and Errui Ding and Jingdong Wang and Hongtao Xie},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=lpxdG0hk4H}\n}", "github": "", "reviewers": "HRx2;xzB4;R54L;jdWr", "pdf_size": 7029086, "rating": "5;6;7;7", "confidence": "4;5;5;4", "soundness": "3;3;3;4", "novelty": "3;2;3;3", "presentation": "2;3;3;3", "wc_summary": "39;22;105;108", "wc_strengths": "28;14;71;136", "wc_weaknesses": "134;258;76;64", "wc_questions": "70;150;88;84", "wc_limitations": "7;6;10;10", "wc_review": "278;450;350;402", "wc_reply_reviewers": "22;111;0;27", "wc_reply_authors": "29;56;0;14", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;1;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 68.5, 38.48701079585163 ], "wc_strengths_avg": [ 62.25, 47.478284509868296 ], "wc_weaknesses_avg": [ 133.0, 76.87002016391045 ], "wc_questions_avg": [ 98.0, 30.757112998459398 ], "wc_limitations_avg": [ 8.25, 1.7853571071357126 ], "wc_review_avg": [ 370.0, 63.812224534175265 ], "wc_reply_reviewers_avg": [ 40.0, 42.23150482755735 ], "wc_reply_authors_avg": [ 24.75, 20.753011829611623 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9783942952468689908&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": "ustc.edu.cn;tsinghua.edu.cn;baidu.com;ustc.edu.cn;baidu.com;baidu.com;;;baidu.com;baidu.com;ustc.edu.cn", "author_num": 11, "aff_unique_index": "0;1;2;0;2;2;2;2;0", "aff_unique_norm": "University of Science and Technology of China;Tsinghua University;Baidu", "aff_unique_dep": ";;Baidu Inc.", "aff_unique_url": "http://www.ustc.edu.cn;https://www.tsinghua.edu.cn;https://www.baidu.com", "aff_unique_abbr": "USTC;THU;Baidu", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "CODA: A Correlation-Oriented Disentanglement and Augmentation Modeling Scheme for Better Resisting Subpopulation Shifts", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93797", "id": "lrSrJZZCle", "proceeding": "", "pdf": "https://openreview.net/pdf?id=lrSrJZZCle", "openreview": "https://openreview.net/forum?id=lrSrJZZCle", "poster": "/media/PosterPDFs/NeurIPS%202024/93797.png?t=1733029564.266941", "project": "", "author_site": "Ziquan OU, Zijun Zhang", "tldr": "", "abstract": "Data-driven models learned often struggle to generalize due to widespread subpopulation shifts, especially the presence of both spurious correlations and group imbalance (SC-GI). To learn models more powerful for defending against SC-GI, we propose a {\\bf Correlation-Oriented Disentanglement and Augmentation (CODA)} modeling scheme, which includes two unique developments: (1) correlation-oriented disentanglement and (2) strategic sample augmentation with reweighted consistency (RWC) loss. In (1), a bi-branch encoding process is developed to enable the disentangling of variant and invariant correlations by coordinating with a decoy classifier and the decoder reconstruction. In (2), a strategic sample augmentation based on disentangled latent features with RWC loss is designed to reinforce the training of a more generalizable model. The effectiveness of CODA is verified by benchmarking against a set of SOTA models in terms of worst-group accuracy and maximum group accuracy gap based on two famous datasets, ColoredMNIST and CelebA.", "keywords": "deep neural networks;feature engineering;model generalizability;class imbalance;spurious correlations", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Ziquan OU;Zijun Zhang", "authorids": "~Ziquan_OU1;~Zijun_Zhang2", "gender": "M;M", "homepage": ";http://www.cityu.edu.hk/stfprofile/00308048.htm", "dblp": ";", "google_scholar": ";B3LrwQsAAAAJ", "orcid": "0000-0001-8262-9387;0000-0002-2717-5033", "linkedin": ";", "or_profile": "~Ziquan_OU1;~Zijun_Zhang2", "aff": "City University;City University of Hong Kong", "aff_domain": "cityu.edu;cityu.edu.hk", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nou2024coda,\ntitle={{CODA}: A Correlation-Oriented Disentanglement and Augmentation Modeling Scheme for Better Resisting Subpopulation Shifts},\nauthor={Ziquan OU and Zijun Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=lrSrJZZCle}\n}", "github": "", "reviewers": "CR4y;gJ1P;p3rM", "pdf_size": 14119885, "rating": "7;7;7", "confidence": "4;3;3", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "53;56;99", "wc_strengths": "59;72;48", "wc_weaknesses": "69;93;30", "wc_questions": "3;111;111", "wc_limitations": "6;1;1", "wc_review": "190;333;289", "wc_reply_reviewers": "23;35;0", "wc_reply_authors": "20;30;0", "reply_reviewers": "1;1;0", "reply_authors": "2;2;1", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 69.33333333333333, 21.013223349965983 ], "wc_strengths_avg": [ 59.666666666666664, 9.809292646374773 ], "wc_weaknesses_avg": [ 64.0, 25.96150997149434 ], "wc_questions_avg": [ 75.0, 50.91168824543142 ], "wc_limitations_avg": [ 2.6666666666666665, 2.3570226039551585 ], "wc_review_avg": [ 270.6666666666667, 59.801523577767 ], "wc_reply_reviewers_avg": [ 19.333333333333332, 14.522013940527977 ], "wc_reply_authors_avg": [ 16.666666666666668, 12.472191289246473 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:w64rt6ukp3QJ:scholar.google.com/&scioq=CODA:+A+Correlation-Oriented+Disentanglement+and+Augmentation+Modeling+Scheme+for+Better+Resisting+Subpopulation+Shifts&hl=en&as_sdt=0,34", "gs_version_total": 0, "email": "cityu.edu;cityu.edu.hk", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "City University;City University of Hong Kong", "aff_unique_dep": ";", "aff_unique_url": "https://www.cityuniversity.edu;https://www.cityu.edu.hk", "aff_unique_abbr": "CityU;CityU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;China" }, { "title": "Exploiting the Replay Memory Before Exploring the Environment: Enhancing Reinforcement Learning Through Empirical MDP Iteration", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93796", "id": "lsd27JUJ8v", "proceeding": "", "pdf": "https://openreview.net/pdf?id=lsd27JUJ8v", "openreview": "https://openreview.net/forum?id=lsd27JUJ8v", "poster": "/media/PosterPDFs/NeurIPS%202024/93796.png?t=1731444279.2444925", "project": "", "author_site": "Hongming Zhang, Chenjun Xiao, Chao Gao, Han Wang, bo xu, Martin M\u00fcller", "tldr": "", "abstract": "Reinforcement learning (RL) algorithms are typically based on optimizing a Markov Decision Process (MDP) using the optimal Bellman equation. Recent studies have revealed that focusing the optimization of Bellman equations solely on in-sample actions tends to result in more stable optimization, especially in the presence of function approximation. Upon on these findings, in this paper, we propose an Empirical MDP Iteration (EMIT) framework. EMIT constructs a sequence of empirical MDPs using data from the growing replay memory. For each of these empirical MDPs, it learns an estimated Q-function denoted as $\\widehat{Q}$. The key strength is that by restricting the Bellman update to in-sample bootstrapping, each empirical MDP converges to a unique optimal $\\widehat{Q}$ function. Furthermore, gradually expanding from the empirical MDPs to the original MDP induces a monotonic policy improvement. Instead of creating entirely new algorithms, we demonstrate that EMIT can be seamlessly integrated with existing online RL algorithms, effectively acting as a regularizer for contemporary Q-learning methods. We show this by implementing EMIT for two representative RL algorithms, DQN and TD3. Experimental results on Atari and MuJoCo benchmarks show that EMIT significantly reduces estimation errors and substantially improves the performance of both algorithms.", "keywords": "Deep Reinforcement Learning; Empirical MDP Iteration;", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Hongming Zhang;Chenjun Xiao;Chao Gao;Han Wang;bo xu;Martin M\u00fcller", "authorids": "~Hongming_Zhang3;~Chenjun_Xiao1;~Chao_Gao1;~Han_Wang8;~bo_xu1;~Martin_M\u00fcller2", "gender": "M;;M;;M;M", "homepage": "https://github.com/initial-h;https://chenjun-x.github.io/;http://cgao3.github.io;;;https://webdocs.cs.ualberta.ca/~mmueller/", "dblp": ";178/8641;;;;https://dblp.org/pers/hd/m/M=uuml=ller_0003:Martin", "google_scholar": "https://scholar.google.ca/citations?user=mwbsY3AAAAAJ;;https://scholar.google.ca/citations?user=N2H5y_MAAAAJ;;;J60BcHkAAAAJ", "orcid": ";0000-0002-5493-1500;;;;0000-0002-5639-5318", "linkedin": ";;;han-wang-b68a7a17b/;;", "or_profile": "~Hongming_Zhang3;~Chenjun_Xiao1;~Chao_Gao1;~Han_Wang8;~bo_xu1;~Martin_Mueller1", "aff": "University of Alberta;Huawei Technologies Ltd.;Huawei Technologies Canada;University of Alberta;;University of Alberta", "aff_domain": "ualberta.ca;huawei.com;huawei.com;ualberta.ca;;ualberta.ca", "position": "PhD student;Researcher;Researcher;PhD student;;Full Professor", "bibtex": "@inproceedings{\nzhang2024exploiting,\ntitle={Exploiting the Replay Memory Before Exploring the Environment: Enhancing Reinforcement Learning Through Empirical {MDP} Iteration},\nauthor={Hongming Zhang and Chenjun Xiao and Chao Gao and Han Wang and bo xu and Martin M{\\\"u}ller},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=lsd27JUJ8v}\n}", "github": "", "reviewers": "eYau;tCA6;4uKn;jQwU", "pdf_size": 5125663, "rating": "5;5;7;7", "confidence": "5;3;4;3", "soundness": "2;2;3;3", "novelty": "1;2;3;4", "presentation": "2;3;3;3", "wc_summary": "56;180;83;104", "wc_strengths": "19;93;25;63", "wc_weaknesses": "152;223;21;243", "wc_questions": "47;74;52;18", "wc_limitations": "1;52;7;28", "wc_review": "275;622;188;456", "wc_reply_reviewers": "12;10;5;0", "wc_reply_authors": "22;47;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;1;1", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 1.118033988749895 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 105.75, 46.121442952275466 ], "wc_strengths_avg": [ 50.0, 30.01666203960727 ], "wc_weaknesses_avg": [ 159.75, 86.95221388785912 ], "wc_questions_avg": [ 47.75, 19.954636052807377 ], "wc_limitations_avg": [ 22.0, 20.0124960961895 ], "wc_review_avg": [ 385.25, 167.42069017896205 ], "wc_reply_reviewers_avg": [ 6.75, 4.656984002549289 ], "wc_reply_authors_avg": [ 17.25, 19.382659776202026 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:S6H0qwHYtUAJ:scholar.google.com/&scioq=Exploiting+the+Replay+Memory+Before+Exploring+the+Environment:+Enhancing+Reinforcement+Learning+Through+Empirical+MDP+Iteration&hl=en&as_sdt=0,33", "gs_version_total": 2, "email": "ualberta.ca;huawei.com;huawei.com;ualberta.ca;;ualberta.ca", "author_num": 6, "aff_unique_index": "0;1;1;0;0", "aff_unique_norm": "University of Alberta;Huawei", "aff_unique_dep": ";Huawei Technologies", "aff_unique_url": "https://www.ualberta.ca;https://www.huawei.com", "aff_unique_abbr": "UAlberta;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "Canada;China" }, { "title": "Latent Intrinsics Emerge from Training to Relight", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93795", "id": "ltnDg0EzF9", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ltnDg0EzF9", "openreview": "https://openreview.net/forum?id=ltnDg0EzF9", "poster": "/media/PosterPDFs/NeurIPS%202024/93795.png?t=1733931970.9349496", "project": "", "author_site": "Xiao Zhang, William Gao, Seemandhar Jain, Michael Maire, David Forsyth, Anand Bhattad", "tldr": "", "abstract": "Image relighting is the task of showing what a scene from a source image would look like if illuminated differently. Inverse graphic schemes recover an explicit representation of geometry and a set of chosen intrinsics, then relight with some form of renderer. But error control for inverse graphics is difficult, and inverse graphics methods can represent only the effects of the chosen intrinsics. This paper describes a relighting method that is entirely data-driven, where intrinsics and lighting are each represented as latent variables. Our approach produces SOTA relightings of real scenes, as measured by standard metrics. We show that albedo can be recovered from our latent intrinsics without using any example albedos, and that the albedos recovered are competitive with SOTA methods.", "keywords": "Emergent Albedos;Latent Intrinsics;Relighting;Unsupervised Learning;Intrinsic Images", "primary_area": "machine_vision", "supplementary_material": "", "author": "Xiao Zhang;William Gao;Seemandhar Jain;Michael Maire;David Forsyth;Anand Bhattad", "authorids": "~Xiao_Zhang11;~William_Gao1;~Seemandhar_Jain1;~Michael_Maire1;~David_Forsyth1;~Anand_Bhattad1", "gender": "M;;M;M;M;", "homepage": "https://computerscience.uchicago.edu/people/profile/xiao-zhang/;;https://seemandhar.github.io/;http://people.cs.uchicago.edu/~mmaire/;https://cs.illinois.edu/directory/profile/daf;https://anandbhattad.github.io/", "dblp": ";;;73/1498.html;f/DavidAForsyth;215/4305", "google_scholar": ";;Fi0JMRYAAAAJ;HXowq5YAAAAJ;https://scholar.google.com.tw/citations?user=5H0arvkAAAAJ;XUsauXIAAAAJ", "orcid": ";;;;0000-0002-2278-0752;", "linkedin": ";;seemandharjain/;;;", "or_profile": "~Xiao_Zhang11;~William_Gao1;~Seemandhar_Jain1;~Michael_Maire1;~David_Forsyth1;~Anand_Bhattad1", "aff": "University of Chicago;;Department of Computer Science;University of Chicago;University of Illinois, Urbana-Champaign;University of Illinois Urbana Champaign", "aff_domain": "uchicago.edu;;cs.illinois.edu;uchicago.edu;uiuc.edu;illinois.edu", "position": "PhD student;;MS student;Associate Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\nzhang2024latent,\ntitle={Latent Intrinsics Emerge from Training to Relight},\nauthor={Xiao Zhang and William Gao and Seemandhar Jain and Michael Maire and David Forsyth and Anand Bhattad},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ltnDg0EzF9}\n}", "github": "", "reviewers": "t1Cn;VUQQ;n7pv", "pdf_size": 28153596, "rating": "6;6;7", "confidence": "4;4;4", "soundness": "3;3;4", "novelty": "3;3;4", "presentation": "3;2;4", "wc_summary": "232;77;64", "wc_strengths": "97;75;62", "wc_weaknesses": "116;132;31", "wc_questions": "63;43;4", "wc_limitations": "56;12;10", "wc_review": "564;339;171", "wc_reply_reviewers": "48;156;24", "wc_reply_authors": "31;56;29", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 124.33333333333333, 76.31659205062961 ], "wc_strengths_avg": [ 78.0, 14.445299120013633 ], "wc_weaknesses_avg": [ 93.0, 44.3245605355165 ], "wc_questions_avg": [ 36.666666666666664, 24.499433100017278 ], "wc_limitations_avg": [ 26.0, 21.228911104120876 ], "wc_review_avg": [ 358.0, 161.00310556011024 ], "wc_reply_reviewers_avg": [ 76.0, 57.41080037762929 ], "wc_reply_authors_avg": [ 38.666666666666664, 12.283683848458853 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=302574867951700010&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 4, "email": "uchicago.edu;;cs.illinois.edu;uchicago.edu;uiuc.edu;illinois.edu", "author_num": 6, "aff_unique_index": "0;1;0;2;3", "aff_unique_norm": "University of Chicago;Unknown Institution;University of Illinois;University of Illinois Urbana-Champaign", "aff_unique_dep": ";Department of Computer Science;;", "aff_unique_url": "https://www.uchicago.edu;;https://illinois.edu;https://illinois.edu", "aff_unique_abbr": "UChicago;;UIUC;UIUC", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States;" }, { "title": "UniBias: Unveiling and Mitigating LLM Bias through Internal Attention and FFN Manipulation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93794", "id": "luQiVmnviX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=luQiVmnviX", "openreview": "https://openreview.net/forum?id=luQiVmnviX", "poster": "/media/PosterPDFs/NeurIPS%202024/93794.png?t=1731686493.0161607", "project": "", "author_site": "Hanzhang Zhou, Zijian Feng, Zixiao Zhu, Junlang Qian, Kezhi Mao", "tldr": "", "abstract": "Large language models (LLMs) have demonstrated impressive capabilities in various tasks using the in-context learning (ICL) paradigm. However, their effectiveness is often compromised by inherent bias, leading to prompt brittleness\u2014sensitivity to design settings such as example selection, order, and prompt formatting. Previous studies have addressed LLM bias through external adjustment of model outputs, but the internal mechanisms that lead to such bias remain unexplored. Our work delves into these mechanisms, particularly investigating how feedforward neural networks (FFNs) and attention heads result in the bias of LLMs. By Interpreting the contribution of individual FFN vectors and attention heads, we identify the biased LLM components that skew LLMs' prediction toward specific labels. To mitigate these biases, we introduce UniBias, an inference-only method that effectively identifies and eliminates biased FFN vectors and attention heads. Extensive experiments across 12 NLP datasets demonstrate that UniBias significantly enhances ICL performance and alleviates prompt brittleness of LLMs.", "keywords": "LLM Bias;In-Context Learning;Attention and FFN Manipulation;Prompt Brittleness", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Hanzhang Zhou;Zijian Feng;Zixiao Zhu;Junlang Qian;Kezhi Mao", "authorids": "~Hanzhang_Zhou1;~Zijian_Feng2;~Zixiao_Zhu2;~Junlang_Qian1;~Kezhi_Mao1", "gender": "M;M;;M;F", "homepage": ";;https://github.com/qianjunlang;https://dr.ntu.edu.sg/cris/rp/rp00158;", "dblp": "295/8180;45/10114;;m/KezhiMao;155/4344", "google_scholar": ";;hxwdDa4AAAAJ;jCsRJXUAAAAJ;", "orcid": "0000-0003-3758-636X;0000-0003-1311-988X;;;0000-0002-8834-868X", "linkedin": ";;;;zixiaozhu818/", "or_profile": "~Hanzhang_Zhou1;~Zijian_Feng2;~Junlang_Qian1;~Kezhi_Mao1;~ZIXIAO_ZHU1", "aff": "Nanyang Technological University;Nanyang Technological University;Nanyang Technological University;Nanyang Technological University;Nanyang Technological University", "aff_domain": "ntu.edu.sg;ntu.edu.sg;ntu.edu.sg;ntu.edu.sg;ntu.edu.sg", "position": "PhD student;PhD student;PhD student;Associate Professor;PhD student", "bibtex": "@inproceedings{\nzhou2024unibias,\ntitle={UniBias: Unveiling and Mitigating {LLM} Bias through Internal Attention and {FFN} Manipulation},\nauthor={Hanzhang Zhou and Zijian Feng and Zixiao Zhu and Junlang Qian and Kezhi Mao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=luQiVmnviX}\n}", "github": "", "reviewers": "LH5i;CnNd;8E5c;BhZE", "pdf_size": 2772252, "rating": "5;6;6;6", "confidence": "4;4;4;3", "soundness": "1;3;4;3", "novelty": "2;3;3;3", "presentation": "2;2;3;3", "wc_summary": "16;58;90;74", "wc_strengths": "16;38;86;91", "wc_weaknesses": "126;175;171;64", "wc_questions": "2;1;191;50", "wc_limitations": "8;5;1;1", "wc_review": "168;277;539;280", "wc_reply_reviewers": "0;431;0;0", "wc_reply_authors": "0;986;0;56", "reply_reviewers": "0;2;0;0", "reply_authors": "1;3;1;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 1.0897247358851685 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 59.5, 27.54541704167864 ], "wc_strengths_avg": [ 57.75, 31.767711595266032 ], "wc_weaknesses_avg": [ 134.0, 44.760473634670134 ], "wc_questions_avg": [ 61.0, 77.62409419761367 ], "wc_limitations_avg": [ 3.75, 2.947456530637899 ], "wc_review_avg": [ 316.0, 136.42763649642254 ], "wc_reply_reviewers_avg": [ 107.75, 186.62847451554651 ], "wc_reply_authors_avg": [ 260.5, 419.491060691405 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=623166336966486491&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "ntu.edu.sg;ntu.edu.sg;ntu.edu.sg;ntu.edu.sg;ntu.edu.sg", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Nanyang Technological University", "aff_unique_dep": "", "aff_unique_url": "https://www.ntu.edu.sg", "aff_unique_abbr": "NTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Singapore" }, { "title": "EEGPT: Pretrained Transformer for Universal and Reliable Representation of EEG Signals", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93793", "id": "lvS2b8CjG5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=lvS2b8CjG5", "openreview": "https://openreview.net/forum?id=lvS2b8CjG5", "poster": "/media/PosterPDFs/NeurIPS%202024/93793.png?t=1731394068.3297632", "project": "", "author_site": "Guangyu Wang, Wenchao Liu, Yuhong He, Cong Xu, Lin Ma, Haifeng Li", "tldr": "", "abstract": "Electroencephalography (EEG) is crucial for recording brain activity, \n with applications in medicine, neuroscience, and brain-computer interfaces (BCI). \n However, challenges such as low signal-to-noise ratio (SNR), high inter-subject variability, and channel mismatch complicate the extraction of robust, \n universal EEG representations. \n We propose EEGPT, a novel 10-million-parameter pretrained transformer model designed for universal EEG feature extraction. \n In EEGPT, a mask-based dual self-supervised learning method for efficient feature extraction is designed. \n Compared to other mask-based self-supervised learning methods, \n EEGPT introduces spatio-temporal representation alignment. \n This involves constructing a self-supervised task based on \n EEG representations that possess high SNR and rich semantic information, \n rather than on raw signals. \n Consequently, this approach mitigates the issue of poor feature quality typically \n extracted from low SNR signals.\n Additionally, EEGPT's hierarchical structure processes spatial and temporal information separately, \n reducing computational complexity while increasing flexibility and adaptability for BCI applications. \n By training on a large mixed multi-task EEG dataset, we fully exploit EEGPT's capabilities.\n The experiment validates the efficacy and scalability of EEGPT, \n achieving state-of-the-art performance on a range of downstream tasks with linear-probing.\n Our research advances EEG representation learning, offering innovative solutions for bio-signal processing and AI applications.\n The code for this paper is available at: https://github.com/BINE022/EEGPT", "keywords": "Electroencephalography;self-supervised learning;representational learning;masked autoencoder;transformer;brain-computer interfaces", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "/attachment/920c72c1c6669ec8a289f1d986e76e08db113099.zip", "author": "Guangyu Wang;Wenchao Liu;Yuhong He;Cong Xu;Lin Ma;Haifeng Li", "authorids": "~Guangyu_Wang2;~Wenchao_Liu2;~Yuhong_He1;~Cong_Xu5;~Lin_Ma11;~Haifeng_Li2", "gender": "M;;;M;F;M", "homepage": "https://gitee.com/whiteram;https://github.com/makeitperfect;;;http://homepage.hit.edu.cn/malinhit;http://homepage.hit.edu.cn/lihaifeng", "dblp": ";;;;;", "google_scholar": ";7JQkkD8AAAAJ;;https://scholar.google.com.hk/citations?hl=zh-CN;;", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Guangyu_Wang2;~Wenchao_Liu2;~Yuhong_He1;~Cong_Xu5;~Lin_Ma11;~Haifeng_Li2", "aff": "Harbin Institute of Technology;Harbin Institute of Technology;;Harbin Institute of Technology;Harbin Institute of Technology;Harbin Institute of Technology", "aff_domain": "hit.edu.cn;stu.hit.edu.cn;;hit.edu.cn;hit.edu.cn;hit.edu.cn", "position": "PhD student;PhD student;;Assistant Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nwang2024eegpt,\ntitle={{EEGPT}: Pretrained Transformer for Universal and Reliable Representation of {EEG} Signals},\nauthor={Guangyu Wang and Wenchao Liu and Yuhong He and Cong Xu and Lin Ma and Haifeng Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=lvS2b8CjG5}\n}", "github": "", "reviewers": "izki;DgXb;23GR", "pdf_size": 8649213, "rating": "5;5;6", "confidence": "4;4;4", "soundness": "3;3;2", "novelty": "2;2;3", "presentation": "3;2;3", "wc_summary": "38;206;68", "wc_strengths": "81;144;222", "wc_weaknesses": "182;597;179", "wc_questions": "22;248;407", "wc_limitations": "6;54;1", "wc_review": "329;1249;877", "wc_reply_reviewers": "33;0;164", "wc_reply_authors": "0;0;337", "reply_reviewers": "1;0;1", "reply_authors": "1;1;2", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 104.0, 73.15736463268753 ], "wc_strengths_avg": [ 149.0, 57.67148342118486 ], "wc_weaknesses_avg": [ 319.3333333333333, 196.34380277688646 ], "wc_questions_avg": [ 225.66666666666666, 157.96694450281538 ], "wc_limitations_avg": [ 20.333333333333332, 23.893281249943236 ], "wc_review_avg": [ 818.3333333333334, 377.8724064489964 ], "wc_reply_reviewers_avg": [ 65.66666666666667, 70.82529366139065 ], "wc_reply_authors_avg": [ 112.33333333333333, 158.8633235065777 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18056495735375580778&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 3, "email": "hit.edu.cn;stu.hit.edu.cn;;hit.edu.cn;hit.edu.cn;hit.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Harbin Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.hit.edu.cn/", "aff_unique_abbr": "HIT", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Harbin", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "MotionCraft: Physics-Based Zero-Shot Video Generation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93792", "id": "lvcWA24dxB", "proceeding": "", "pdf": "https://openreview.net/pdf?id=lvcWA24dxB", "openreview": "https://openreview.net/forum?id=lvcWA24dxB", "poster": "", "project": "", "author_site": "Antonio Montanaro, Luca Savant Aira, Emanuele Aiello, Diego Valsesia, Enrico Magli", "tldr": "", "abstract": "Generating videos with realistic and physically plausible motion is one of the main recent challenges in computer vision. \nWhile diffusion models are achieving compelling results in image generation, video diffusion models are limited by heavy training and huge models, resulting in videos that are still biased to the training dataset. In this work we propose MotionCraft, a new zero-shot video generator to craft physics-based and realistic videos. MotionCraft is able to warp the noise latent space of an image diffusion model, such as Stable Diffusion, by applying an optical flow derived from a physics simulation. We show that warping the noise latent space results in coherent application of the desired motion while allowing the model to generate missing elements consistent with the scene evolution, which would otherwise result in artefacts or missing content if the flow was applied in the pixel space.\nWe compare our method with the state-of-the-art Text2Video-Zero reporting qualitative and quantitative improvements, demonstrating the effectiveness of our approach to generate videos with finely-prescribed complex motion dynamics.", "keywords": "Zero-shot video generation;diffusion model;physics-based video generation", "primary_area": "generative_models", "supplementary_material": "/attachment/1c486da7f9ad3348d6c4d46255d7d6ac422054c7.zip", "author": "Antonio Montanaro;Luca Savant Aira;Emanuele Aiello;Diego Valsesia;Enrico Magli", "authorids": "~Antonio_Montanaro1;~Luca_Savant_Aira1;~Emanuele_Aiello1;~Diego_Valsesia1;~Enrico_Magli1", "gender": "M;M;M;;M", "homepage": ";https://github.com/mezzelfo;;;https://ipl.polito.it/enrico-magli", "dblp": ";317/0636;;136/4988;81/5607", "google_scholar": ";-4oI6CEAAAAJ;oZlKgH4AAAAJ;https://scholar.google.it/citations?user=5YXpLhQAAAAJ;", "orcid": " 0000-0002-6913-9161;0009-0002-6728-0855;0000-0001-7133-4137;0000-0003-1997-2910;0000-0002-0901-0251", "linkedin": ";;emanuele-aiello-765945105/;;", "or_profile": "~Antonio_Montanaro1;~Luca_Savant_Aira1;~Emanuele_Aiello1;~Diego_Valsesia1;~Enrico_Magli1", "aff": "Politecnico di Torino;Polytechnic Institute of Turin;Politecnico di Torino;Politecnico di Torino;Politecnico di Torino", "aff_domain": "polito.it;polito.it;polito.it;polito.it;polito.it", "position": "PhD student;PhD student;PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nmontanaro2024motioncraft,\ntitle={MotionCraft: Physics-Based Zero-Shot Video Generation},\nauthor={Antonio Montanaro and Luca Savant Aira and Emanuele Aiello and Diego Valsesia and Enrico Magli},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=lvcWA24dxB}\n}", "github": "", "reviewers": "zFVb;cTbz;ypn7", "pdf_size": 30068538, "rating": "5;5;6", "confidence": "4;4;4", "soundness": "2;3;3", "novelty": "3;3;3", "presentation": "2;3;3", "wc_summary": "122;226;82", "wc_strengths": "47;3;35", "wc_weaknesses": "117;155;131", "wc_questions": "3;48;24", "wc_limitations": "3;3;21", "wc_review": "292;435;293", "wc_reply_reviewers": "35;18;0", "wc_reply_authors": "0;122;0", "reply_reviewers": "1;1;0", "reply_authors": "1;2;1", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 143.33333333333334, 60.69230227595222 ], "wc_strengths_avg": [ 28.333333333333332, 18.571184369578823 ], "wc_weaknesses_avg": [ 134.33333333333334, 15.691469727919761 ], "wc_questions_avg": [ 25.0, 18.384776310850235 ], "wc_limitations_avg": [ 9.0, 8.48528137423857 ], "wc_review_avg": [ 340.0, 67.17638473948018 ], "wc_reply_reviewers_avg": [ 17.666666666666668, 14.29063407348401 ], "wc_reply_authors_avg": [ 40.666666666666664, 57.51135153650587 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13287342538606109654&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "email": "polito.it;polito.it;polito.it;polito.it;polito.it", "author_num": 5, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "Politecnico di Torino;Polytechnic Institute of Turin", "aff_unique_dep": ";", "aff_unique_url": "https://www.polito.it;https://www.polito.it", "aff_unique_abbr": "Polito;Polito", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Italy" }, { "title": "Unifying Generation and Prediction on Graphs with Latent Graph Diffusion", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93791", "id": "lvibangnAs", "proceeding": "", "pdf": "https://openreview.net/pdf?id=lvibangnAs", "openreview": "https://openreview.net/forum?id=lvibangnAs", "poster": "/media/PosterPDFs/NeurIPS%202024/93791.png?t=1733616581.321274", "project": "", "author_site": "Cai Zhou, Xiyuan Wang, Muhan Zhang", "tldr": "", "abstract": "In this paper, we propose the first framework that enables solving graph learning tasks of all levels (node, edge and graph) and all types (generation, regression and classification) using one formulation. We first formulate prediction tasks including regression and classification into a generic (conditional) generation framework, which enables diffusion models to perform deterministic tasks with provable guarantees. We then propose Latent Graph Diffusion (LGD), a generative model that can generate node, edge, and graph-level features of all categories simultaneously. We achieve this goal by embedding the graph structures and features into a latent space leveraging a powerful encoder and decoder, then training a diffusion model in the latent space. LGD is also capable of conditional generation through a specifically designed cross-attention mechanism. Leveraging LGD and the ``all tasks as generation'' formulation, our framework is capable of solving graph tasks of various levels and types. We verify the effectiveness of our framework with extensive experiments, where our models achieve state-of-the-art or highly competitive results across a wide range of generation and regression tasks.", "keywords": "latent graph generation;theory of diffusion model;general purpose generative models", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Cai Zhou;Xiyuan Wang;Muhan Zhang", "authorids": "~Cai_Zhou2;~Xiyuan_Wang1;~Muhan_Zhang1", "gender": "M;;M", "homepage": "http://homepage.zhouc.ai/;;https://muhanzhang.github.io/", "dblp": "235/4068;95/8542;157/5518", "google_scholar": "3Pd27KUAAAAJ;;https://scholar.google.com.hk/citations?user=OBBqkosAAAAJ", "orcid": ";;0000-0002-7680-6401", "linkedin": ";%E5%B8%8C%E5%85%83-%E7%8E%8B-969660221/;jerry-muhan-zhang-a33a1777/", "or_profile": "~Cai_Zhou2;~Xiyuan_Wang1;~Muhan_Zhang1", "aff": "Tsinghua University;Peking University;Peking University", "aff_domain": "tsinghua.edu.cn;pku.edu.cn;pku.edu.cn", "position": "Undergrad student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nzhou2024unifying,\ntitle={Unifying Generation and Prediction on Graphs with Latent Graph Diffusion},\nauthor={Cai Zhou and Xiyuan Wang and Muhan Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=lvibangnAs}\n}", "github": "", "reviewers": "PVEb;suUP;a5WK;XBTL", "pdf_size": 600319, "rating": "5;5;6;6", "confidence": "4;3;4;4", "soundness": "2;2;3;3", "novelty": "2;2;2;3", "presentation": "3;3;2;3", "wc_summary": "36;59;34;105", "wc_strengths": "48;37;37;80", "wc_weaknesses": "393;178;67;515", "wc_questions": "19;23;60;35", "wc_limitations": "1;1;46;8", "wc_review": "497;298;244;743", "wc_reply_reviewers": "21;20;39;39", "wc_reply_authors": "242;20;90;18", "reply_reviewers": "1;1;1;1", "reply_authors": "4;2;2;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 58.5, 28.587584717845612 ], "wc_strengths_avg": [ 50.5, 17.613914953808536 ], "wc_weaknesses_avg": [ 288.25, 175.70909908140786 ], "wc_questions_avg": [ 34.25, 15.990231392947383 ], "wc_limitations_avg": [ 14.0, 18.694919095839918 ], "wc_review_avg": [ 445.5, 195.9062275681914 ], "wc_reply_reviewers_avg": [ 29.75, 9.256754290786809 ], "wc_reply_authors_avg": [ 92.5, 91.05355566917747 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17738278171376491124&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 4, "email": "tsinghua.edu.cn;pku.edu.cn;pku.edu.cn", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Tsinghua University;Peking University", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;http://www.pku.edu.cn", "aff_unique_abbr": "THU;Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Controlling Multiple Errors Simultaneously with a PAC-Bayes Bound", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93790", "id": "lwpfH9wVkO", "proceeding": "", "pdf": "https://openreview.net/pdf?id=lwpfH9wVkO", "openreview": "https://openreview.net/forum?id=lwpfH9wVkO", "poster": "/media/PosterPDFs/NeurIPS%202024/93790.png?t=1730987811.5993109", "project": "", "author_site": "Reuben Adams, John Shawe-Taylor, Benjamin Guedj", "tldr": "", "abstract": "Current PAC-Bayes generalisation bounds are restricted to scalar metrics of performance, such as the loss or error rate. However, one ideally wants more information-rich certificates that control the entire distribution of possible outcomes, such as the distribution of the test loss in regression, or the probabilities of different mis-classifications. We provide the first PAC-Bayes bound capable of providing such rich information by bounding the Kullback-Leibler divergence between the empirical and true probabilities of a set of $M$ error types, which can either be discretized loss values for regression, or the elements of the confusion matrix (or a partition thereof) for classification. We transform our bound into a differentiable training objective. Our bound is especially useful in cases where the severity of different mis-classifications may change over time; existing PAC-Bayes bounds can only bound a particular pre-decided weighting of the error types. In contrast our bound implicitly controls all uncountably many weightings simultaneously.", "keywords": "PAC-Bayes;Generalization;Statistical Learning Theory", "primary_area": "learning_theory", "supplementary_material": "", "author": "Reuben Adams;John Shawe-Taylor;Benjamin Guedj", "authorids": "~Reuben_Adams1;~John_Shawe-Taylor1;~Benjamin_Guedj1", "gender": "M;M;M", "homepage": ";;https://bguedj.github.io", "dblp": ";59/41;177/7258", "google_scholar": "xXQzg80AAAAJ;;https://scholar.google.fr/citations?user=q-JTC2sAAAAJ", "orcid": ";;0000-0003-1237-7430", "linkedin": ";;benjaminguedj/", "or_profile": "~Reuben_Adams1;~John_Shawe-Taylor1;~Benjamin_Guedj1", "aff": "University College London, University of London;University College London;University College London, University of London", "aff_domain": "ucl.ac.uk;ucl.ac.uk;ucl.ac.uk", "position": "PhD student;Professor;Principal Researcher", "bibtex": "@inproceedings{\nadams2024controlling,\ntitle={Controlling Multiple Errors Simultaneously with a {PAC}-Bayes Bound},\nauthor={Reuben Adams and John Shawe-Taylor and Benjamin Guedj},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=lwpfH9wVkO}\n}", "github": "", "reviewers": "R3U1;cfTA;fvxp;4oQc", "pdf_size": 2459472, "rating": "5;6;7;7", "confidence": "4;3;3;3", "soundness": "3;4;4;3", "novelty": "3;2;3;3", "presentation": "3;3;3;3", "wc_summary": "135;44;82;121", "wc_strengths": "42;27;35;99", "wc_weaknesses": "94;135;80;96", "wc_questions": "30;2;158;29", "wc_limitations": "4;8;258;1", "wc_review": "305;216;613;346", "wc_reply_reviewers": "74;49;27;11", "wc_reply_authors": "49;31;51;35", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 95.5, 35.51408171415952 ], "wc_strengths_avg": [ 50.75, 28.358199872347328 ], "wc_weaknesses_avg": [ 101.25, 20.437404434027332 ], "wc_questions_avg": [ 54.75, 60.660427792754646 ], "wc_limitations_avg": [ 67.75, 109.86895603399533 ], "wc_review_avg": [ 370.0, 147.95776424371923 ], "wc_reply_reviewers_avg": [ 40.25, 23.699947257325277 ], "wc_reply_authors_avg": [ 41.5, 8.645808232895291 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=472750032890478417&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "ucl.ac.uk;ucl.ac.uk;ucl.ac.uk", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University College London", "aff_unique_dep": "", "aff_unique_url": "https://www.ucl.ac.uk", "aff_unique_abbr": "UCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Search for Efficient Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93789", "id": "lxSmLxlVks", "proceeding": "", "pdf": "https://openreview.net/pdf?id=lxSmLxlVks", "openreview": "https://openreview.net/forum?id=lxSmLxlVks", "poster": "/media/PosterPDFs/NeurIPS%202024/93789.png?t=1732995815.8887708", "project": "", "author_site": "Xuan Shen, Pu Zhao, Yifan Gong, Zhenglun Kong, Zheng Zhan, Yushu Wu, Ming Lin, Chao Wu, Xue Lin, Yanzhi Wang", "tldr": "", "abstract": "Large Language Models (LLMs) have long held sway in the realms of artificial intelligence research.\nNumerous efficient techniques, including weight pruning, quantization, and distillation, have been embraced to compress LLMs, targeting memory reduction and inference acceleration, which underscore the redundancy in LLMs.\nHowever, most model compression techniques concentrate on weight optimization, overlooking the exploration of optimal architectures.\nBesides, traditional architecture search methods, limited by the elevated complexity with extensive parameters, struggle to demonstrate their effectiveness on LLMs.\nIn this paper, we propose a training-free architecture search framework to identify optimal subnets that preserve the fundamental strengths of the original LLMs while achieving inference acceleration.\nFurthermore, after generating subnets that inherit specific weights from the original LLMs, we introduce a reformation algorithm that utilizes the omitted weights to rectify the inherited weights with a small amount of calibration data.\nCompared with SOTA training-free structured pruning works that can generate smaller networks, our method demonstrates superior performance across standard benchmarks.\nFurthermore, our generated subnets can directly reduce the usage of GPU memory and achieve inference acceleration.", "keywords": "Architecture Search;Large Language Models", "primary_area": "generative_models", "supplementary_material": "", "author": "Xuan Shen;Pu Zhao;Yifan Gong;Zhenglun Kong;Zheng Zhan;Yushu Wu;Ming Lin;Chao Wu;Xue Lin;Yanzhi Wang", "authorids": "~Xuan_Shen1;~Pu_Zhao1;~Yifan_Gong2;~Zhenglun_Kong1;~Zheng_Zhan3;~Yushu_Wu1;~Ming_Lin4;~Chao_Wu5;~Xue_Lin1;~Yanzhi_Wang3", "gender": "M;M;F;M;;M;M;M;F;M", "homepage": "https://shawnricecake.github.io/;https://puzhao.info/;https://yifanfanfanfan.github.io/;https://sites.google.com/husky.neu.edu/zlk/home?authuser=1;;https://wuyushuwys.github.io;https://minglin-home.github.io/;https://juanhair.github.io/wuchao.github.io/;https://coe.northeastern.edu/people/lin-xue/;https://web.northeastern.edu/yanzhiwang/", "dblp": ";75/8475-1;49/3073-4.html;211/6323;156/4008-1.html;166/4244;;;;", "google_scholar": "Pvj14ZUAAAAJ;rWZLnpwAAAAJ;U_gevVgAAAAJ;XYa4NVYAAAAJ;hwTuEX0AAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;UH51bhYAAAAJ;p87KNLIAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0001-5018-2859;0000-0002-3912-097X;0000-0002-8120-4456;;0000-0001-9883-7973;;;0000-0001-6210-8883;", "linkedin": ";;yifan-gong-3059b8132/;zhenglun-kong-35b527150/;;;;;;", "or_profile": "~Xuan_Shen1;~Pu_Zhao1;~Yifan_Gong2;~Zhenglun_Kong1;~Zheng_Zhan3;~Yushu_Wu1;~Ming_Lin4;~Chao_Wu5;~Xue_Lin1;~Yanzhi_Wang3", "aff": "Northeastern University;Northeastern University;Northeastern University;Northeastern University;Northeastern University;Northeastern University;Oracle;Northeastern University;Northeastern University;Northeastern University", "aff_domain": "neu.edu;neu.edu;neu.edu;northeastern.edu;neu.edu;northeastern.edu;oracle.com;gmail.com;neu.edu;northeastern.edu", "position": "PhD student;Researcher;PhD student;PhD student;PhD student;PhD student;Principal Researcher;Postdoc;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nshen2024search,\ntitle={Search for Efficient Large Language Models},\nauthor={Xuan Shen and Pu Zhao and Yifan Gong and Zhenglun Kong and Zheng Zhan and Yushu Wu and Ming Lin and Chao Wu and Xue Lin and Yanzhi Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=lxSmLxlVks}\n}", "github": "", "reviewers": "n31g;BTSA;Xurt;Qp2U", "pdf_size": 2078247, "rating": "4;6;6;6", "confidence": "2;4;4;2", "soundness": "2;3;3;3", "novelty": "2;3;3;2", "presentation": "1;2;3;3", "wc_summary": "52;49;77;31", "wc_strengths": "72;49;84;28", "wc_weaknesses": "174;184;156;8", "wc_questions": "80;240;80;7", "wc_limitations": "8;48;1;4", "wc_review": "386;570;398;78", "wc_reply_reviewers": "0;84;17;11", "wc_reply_authors": "0;82;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.0, 1.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 52.25, 16.391689967785506 ], "wc_strengths_avg": [ 58.25, 21.52179128232592 ], "wc_weaknesses_avg": [ 130.5, 71.43353554178877 ], "wc_questions_avg": [ 101.75, 85.2008656059315 ], "wc_limitations_avg": [ 15.25, 19.070592544543548 ], "wc_review_avg": [ 358.0, 177.29072169744248 ], "wc_reply_reviewers_avg": [ 28.0, 32.901367752724205 ], "wc_reply_authors_avg": [ 20.5, 35.50704155516198 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15248453258136186492&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "neu.edu;neu.edu;neu.edu;northeastern.edu;neu.edu;northeastern.edu;oracle.com;gmail.com;neu.edu;northeastern.edu", "author_num": 10, "aff_unique_index": "0;0;0;0;0;0;1;0;0;0", "aff_unique_norm": "Northeastern University;Oracle Corporation", "aff_unique_dep": ";", "aff_unique_url": "https://www.northeastern.edu;https://www.oracle.com", "aff_unique_abbr": "NEU;Oracle", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Predictive Attractor Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93788", "id": "lxhoVDf1Sw", "proceeding": "", "pdf": "https://openreview.net/pdf?id=lxhoVDf1Sw", "openreview": "https://openreview.net/forum?id=lxhoVDf1Sw", "poster": "/media/PosterPDFs/NeurIPS%202024/93788.png?t=1730305216.3258278", "project": "", "author_site": "Ramy Mounir, Sudeep Sarkar", "tldr": "", "abstract": "Sequential memory, the ability to form and accurately recall a sequence of events or stimuli in the correct order, is a fundamental prerequisite for biological and artificial intelligence as it underpins numerous cognitive functions (e.g., language comprehension, planning, episodic memory formation, etc.) However, existing methods of sequential memory suffer from catastrophic forgetting, limited capacity, slow iterative learning procedures, low-order Markov memory, and, most importantly, the inability to represent and generate multiple valid future possibilities stemming from the same context. Inspired by biologically plausible neuroscience theories of cognition, we propose Predictive Attractor Models (PAM), a novel sequence memory architecture with desirable generative properties. PAM is a streaming model that learns a sequence in an online, continuous manner by observing each input only once. Additionally, we find that PAM avoids catastrophic forgetting by uniquely representing past context through lateral inhibition in cortical minicolumns, which prevents new memories from overwriting previously learned knowledge. PAM generates future predictions by sampling from a union set of predicted possibilities; this generative ability is realized through an attractor model trained alongside the predictor. We show that PAM is trained with local computations through Hebbian plasticity rules in a biologically plausible framework. Other desirable traits (e.g., noise tolerance, CPU-based learning, capacity scaling) are discussed throughout the paper. Our findings suggest that PAM represents a significant step forward in the pursuit of biologically plausible and computationally efficient sequential memory models, with broad implications for cognitive science and artificial intelligence research. Illustration videos and code are available on our project page: https://ramymounir.com/publications/pam.", "keywords": "Sequential Memory;Predictive Models;Fixed-point Attractors;Associative Memory Models;State Space Models;Biologically plausible;Hebbian Plasticity Rules;Local Computations;Hierarchical Temporal Memory;Continual Learning;Multiple Possibilities Generation;Noise Tolerance", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "/attachment/29633e37d3cd6750a5eab0009bc61db9bd218b2b.zip", "author": "Ramy Mounir;Sudeep Sarkar", "authorids": "~Ramy_Mounir1;~Sudeep_Sarkar1", "gender": "M;Unspecified", "homepage": "https://ramymounir.com/;", "dblp": "264/5593.html;72/3470", "google_scholar": "93KqVdMAAAAJ;xX2D9FQAAAAJ", "orcid": ";0000-0001-7332-4207", "linkedin": "ramy-mounir-2a8b76113;profsudeepsarkar", "or_profile": "~Ramy_Mounir1;~Sudeep_Sarkar1", "aff": "University of South Florida;University of South Florida", "aff_domain": "usf.edu;usf.edu", "position": "PhD student;Professor", "bibtex": "@inproceedings{\nmounir2024predictive,\ntitle={Predictive Attractor Models},\nauthor={Ramy Mounir and Sudeep Sarkar},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=lxhoVDf1Sw}\n}", "github": "", "reviewers": "E7YU;qn9J;T6nZ;YbwU;sj6N", "pdf_size": 6847300, "rating": "4;4;6;6;7", "confidence": "4;2;3;3;4", "soundness": "3;3;3;3;3", "novelty": "2;2;4;3;3", "presentation": "2;2;2;3;4", "wc_summary": "41;87;99;115;198", "wc_strengths": "63;97;88;54;96", "wc_weaknesses": "241;173;89;42;187", "wc_questions": "78;161;106;78;118", "wc_limitations": "32;12;25;40;24", "wc_review": "455;530;407;329;623", "wc_reply_reviewers": "162;23;0;96;0", "wc_reply_authors": "151;17;26;862;0", "reply_reviewers": "2;1;0;1;0", "reply_authors": "3;2;2;4;1", "rating_avg": [ 5.4, 1.2 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 2.6, 0.8 ], "wc_summary_avg": [ 108.0, 51.30302135352264 ], "wc_strengths_avg": [ 79.6, 17.738094598913378 ], "wc_weaknesses_avg": [ 146.4, 71.41316405257507 ], "wc_questions_avg": [ 108.2, 30.701140043978825 ], "wc_limitations_avg": [ 26.6, 9.28654941299512 ], "wc_review_avg": [ 468.8, 101.07106410837872 ], "wc_reply_reviewers_avg": [ 56.2, 63.55595959467531 ], "wc_reply_authors_avg": [ 211.2, 329.78259505316527 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 2.4, 1.019803902718557 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.3563483225498991, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:6Zo311Qr0xQJ:scholar.google.com/&scioq=Predictive+Attractor+Models&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "usf.edu;usf.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of South Florida", "aff_unique_dep": "", "aff_unique_url": "https://www.usf.edu", "aff_unique_abbr": "USF", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Unified Domain Generalization and Adaptation for Multi-View 3D Object Detection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93787", "id": "lxuXvJSOcP", "proceeding": "", "pdf": "https://openreview.net/pdf?id=lxuXvJSOcP", "openreview": "https://openreview.net/forum?id=lxuXvJSOcP", "poster": "/media/PosterPDFs/NeurIPS%202024/93787.png?t=1731216178.2780092", "project": "", "author_site": "Gyusam Chang, Jiwon Lee, Donghyun Kim, Jinkyu Kim, Dongwook Lee, Daehyun Ji, Sujin Jang, Sangpil Kim", "tldr": "", "abstract": "Recent advances in 3D object detection leveraging multi-view cameras have demonstrated their practical and economical value in various challenging vision tasks.\nHowever, typical supervised learning approaches face challenges in achieving satisfactory adaptation toward unseen and unlabeled target datasets (i.e., direct transfer) due to the inevitable geometric misalignment between the source and target domains.\nIn practice, we also encounter constraints on resources for training models and collecting annotations for the successful deployment of 3D object detectors.\nIn this paper, we propose Unified Domain Generalization and Adaptation (UDGA), a practical solution to mitigate those drawbacks.\nWe first propose Multi-view Overlap Depth Constraint that leverages the strong association between multi-view, significantly alleviating geometric gaps due to perspective view changes.\nThen, we present a Label-Efficient Domain Adaptation approach to handle unfamiliar targets with significantly fewer amounts of labels (i.e., 1$\\%$ and 5$\\%)$, while preserving well-defined source knowledge for training efficiency.\nOverall, UDGA framework enables stable detection performance in both source and target domains, effectively bridging inevitable domain gaps, while demanding fewer annotations.\nWe demonstrate the robustness of UDGA with large-scale benchmarks: nuScenes, Lyft, and Waymo, where our framework outperforms the current state-of-the-art methods.", "keywords": "Domain Generalization.+Domain Adaptation.+Multi-view 3D Object Detection.+Autonomous driving.+Domain Generalization.", "primary_area": "machine_vision", "supplementary_material": "", "author": "Gyusam Chang;Jiwon Lee;Donghyun Kim;Jinkyu Kim;Dongwook Lee;Daehyun Ji;Sujin Jang;Sangpil Kim", "authorids": "~Gyusam_Chang1;~Jiwon_Lee1;~Donghyun_Kim2;~Jinkyu_Kim1;~Dongwook_Lee4;~Daehyun_Ji1;~Sujin_Jang2;~Sangpil_Kim4", "gender": "M;;M;M;M;;M;M", "homepage": "https://kuaicv.com/student-2/?theme=pub/antoniau0026hide_banners=true;https://none.none.mn;https://cs-people.bu.edu/donhk;https://visionai.korea.ac.kr/;;;https://sujinjang.github.io/;https://kuaicv.com/", "dblp": "323/7273;;;;;274/9684;146/6241;182/2231", "google_scholar": "https://scholar.google.com/citations?hl=ko;;https://scholar.google.co.kr/citations?user=UsqNPH4AAAAJ;;;https://scholar.google.co.kr/citations?user=WEZZefcAAAAJ;https://scholar.google.co.kr/citations?hl=en;mzH6yYgAAAAJ", "orcid": ";;;;;;0000-0002-2723-5606;0000-0002-7349-0018", "linkedin": "gyusam-chang-735290231;;;;dongwook-lee-4903644b;captainzone/;sujin-jang-7996b354;spkim921", "or_profile": "~Gyusam_Chang1;~Jiwon_Lee1;~Donghyun_Kim2;~Jinkyu_Kim1;~Dongwook_Lee4;~Daehyun_Ji1;~Sujin_Jang2;~Sangpil_Kim4", "aff": "Korea University;Samsung;Korea University;Korea University;Samsung;Samsung;Samsung Advanced Institute of Technology (SAIT);Korea University", "aff_domain": "korea.ac.kr;samsung.com;korea.ac.kr;korea.ac.kr;samsung.com;samsung.com;samsung.com;korea.ac.kr", "position": "PhD student;Researcher;Assistant Professor;Assistant Professor;Principal Researcher;Principal Researcher;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nchang2024unified,\ntitle={Unified Domain Generalization and Adaptation for Multi-View 3D Object Detection},\nauthor={Gyusam Chang and Jiwon Lee and Donghyun Kim and Jinkyu Kim and Dongwook Lee and Daehyun Ji and Sujin Jang and Sangpil Kim},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=lxuXvJSOcP}\n}", "github": "", "reviewers": "2JqY;gahV;LaL5;pgwk", "pdf_size": 3326955, "rating": "4;5;5;7", "confidence": "4;4;3;3", "soundness": "3;2;2;3", "novelty": "2;2;2;3", "presentation": "2;3;2;4", "wc_summary": "73;49;63;67", "wc_strengths": "29;19;65;47", "wc_weaknesses": "221;200;107;8", "wc_questions": "87;10;6;29", "wc_limitations": "4;6;13;5", "wc_review": "414;284;254;156", "wc_reply_reviewers": "362;0;73;0", "wc_reply_authors": "487;79;56;0", "reply_reviewers": "1;0;1;0", "reply_authors": "3;2;2;1", "rating_avg": [ 5.25, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 63.0, 8.831760866327848 ], "wc_strengths_avg": [ 40.0, 17.578395831246947 ], "wc_weaknesses_avg": [ 134.0, 84.45412956155549 ], "wc_questions_avg": [ 33.0, 32.36510466536452 ], "wc_limitations_avg": [ 7.0, 3.5355339059327378 ], "wc_review_avg": [ 277.0, 92.17917335276988 ], "wc_reply_reviewers_avg": [ 108.75, 149.22026504466476 ], "wc_reply_authors_avg": [ 155.5, 193.53617232961903 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.6882472016116854, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13768745379792309840&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "korea.ac.kr;samsung.com;korea.ac.kr;korea.ac.kr;samsung.com;samsung.com;samsung.com;korea.ac.kr", "author_num": 8, "aff_unique_index": "0;1;0;0;1;1;1;0", "aff_unique_norm": "Korea University;Samsung", "aff_unique_dep": ";Samsung", "aff_unique_url": "https://www.korea.ac.kr;https://www.samsung.com", "aff_unique_abbr": "KU;Samsung", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "FairJob: A Real-World Dataset for Fairness in Online Systems", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97523", "id": "lygceqe21t", "proceeding": "", "pdf": "https://openreview.net/pdf?id=lygceqe21t", "openreview": "https://openreview.net/forum?id=lygceqe21t", "poster": "/media/PosterPDFs/NeurIPS%202024/97523.png?t=1733842338.3998444", "project": "", "author_site": "Mariia Vladimirova, Federico Pavone, Eustache Diemert", "tldr": "", "abstract": "We introduce a fairness-aware dataset for job recommendation in advertising, designed to foster research in algorithmic fairness within real-world scenarios. It was collected and prepared to comply with privacy standards and business confidentiality. An additional challenge is the lack of access to protected user attributes such as gender, for which we propose a pragmatic solution to obtain a proxy estimate. Despite being anonymized and including a proxy for a sensitive attribute, our dataset preserves predictive power and maintains a realistic and challenging benchmark. This dataset addresses a significant gap in the availability of fairness-focused resources for high-impact domains like advertising -- the actual impact being having access or not to precious employment opportunities, where balancing fairness and utility is a common industrial challenge. We also explore various stages in the advertising process where unfairness can occur and introduce a method to compute a fair utility metric for the job recommendations in online systems case from a biased dataset. Experimental evaluations of bias mitigation techniques on the released dataset demonstrate potential improvements in fairness and the associated trade-offs with utility.\n\nThe dataset is hosted at https://huggingface.co/datasets/criteo/FairJob. Source code for the experiments is hosted at https://github.com/criteo-research/FairJob-dataset/.", "keywords": "fairness;ads;tabular data;causal effect", "primary_area": "", "supplementary_material": "", "author": "Mariia Vladimirova;Federico Pavone;Eustache Diemert", "authorids": "~Mariia_Vladimirova2;~Federico_Pavone1;~Eustache_Diemert2", "gender": "F;;M", "homepage": "https://www.mvladimirova.com;https://fpavone.github.io/;", "dblp": "228/9261;;https://dblp.uni-trier.de/pers/hd/d/Diemert:Eustache", "google_scholar": ";Odn9XRkAAAAJ;https://scholar.google.fr/citations?user=33C_LQ0AAAAJ", "orcid": ";;0000-0003-2240-501X", "linkedin": ";;", "or_profile": "~Mariia_Vladimirova2;~Federico_Pavone1;~Eustache_Diemert2", "aff": "Criteo;Universit\u00e9 Paris-Dauphine - PSL;Criteo", "aff_domain": "criteo.com;dauphine.psl.eu;criteo.com", "position": "Researcher;Postdoc;Researcher", "bibtex": "@inproceedings{\nvladimirova2024fairjob,\ntitle={FairJob: A Real-World Dataset for Fairness in Online Systems},\nauthor={Mariia Vladimirova and Federico Pavone and Eustache Diemert},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=lygceqe21t}\n}", "github": "", "reviewers": "Z6gg;xwg7;XLhY;tYXm", "pdf_size": 1063170, "rating": "6;7;7;9", "confidence": "2;4;3;3", "wc_summary_and_contributions": "11;56;69;111", "wc_strengths": "6;18;17;201", "wc_improvement": "41;1;68;54", "wc_limitations": "18;1;30;49", "wc_correctness": "1;1;33;8", "wc_clarity": "1;1;7;23", "wc_relation_to_prior_work": "1;1;26;18", "wc_documentation": "1;1;65;19", "wc_additional_feedback": "1;1;1;1", "wc_review": "81;81;316;484", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "2;0;2;1", "rating_avg": [ 7.25, 1.0897247358851685 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 61.75, 35.66072769868837 ], "wc_strengths_avg": [ 60.5, 81.2542306590863 ], "wc_improvement_avg": [ 41.0, 24.9899979991996 ], "wc_limitations_avg": [ 24.5, 17.5 ], "wc_correctness_avg": [ 10.75, 13.160072188251856 ], "wc_clarity_avg": [ 8.0, 9.0 ], "wc_relation_to_prior_work_avg": [ 11.5, 10.874281585465774 ], "wc_documentation_avg": [ 21.5, 26.16772821625905 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 240.5, 170.20061692015102 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.82915619758885 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3244428422615251, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=853836351552170695&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 5, "email": "criteo.com;dauphine.psl.eu;criteo.com", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Criteo;Universit\u00e9 Paris-Dauphine", "aff_unique_dep": ";", "aff_unique_url": "https://www.criteo.com;https://www.univ-paris-dauphine.fr", "aff_unique_abbr": "Criteo;UPD", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "France" }, { "title": "Do LLMs Build World Representations? Probing Through the Lens of State Abstraction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93786", "id": "lzfzjYuWgY", "proceeding": "", "pdf": "https://openreview.net/pdf?id=lzfzjYuWgY", "openreview": "https://openreview.net/forum?id=lzfzjYuWgY", "poster": "/media/PosterPDFs/NeurIPS%202024/93786.png?t=1731651308.5400028", "project": "", "author_site": "Zichao Li, Yanshuai Cao, Jackie CK Cheung", "tldr": "", "abstract": "How do large language models (LLMs) encode the state of the world, including the status of entities and their relations, as described by a text? While existing work directly probes for a complete state of the world, our research explores whether and how LLMs abstract this world state in their internal representations. We propose a new framework for probing for world representations through the lens of state abstraction theory from reinforcement learning, which emphasizes different levels of abstraction, distinguishing between general abstractions that facilitate predicting future states and goal-oriented abstractions that guide the subsequent actions to accomplish tasks. To instantiate this framework, we design a text-based planning task, where an LLM acts as an agent in an environment and interacts with objects in containers to achieve a specified goal state. Our experiments reveal that fine-tuning as well as advanced pre-training strengthens LLM-built representations' tendency of maintaining goal-oriented abstractions during decoding, prioritizing task completion over recovery of the world's state and dynamics.", "keywords": "Large Language Models;World Models;World Representation;Probing;Reinforcement Learning;State Abstraction", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Zichao Li;Yanshuai Cao;Jackie CK Cheung", "authorids": "~Zichao_Li3;~Yanshuai_Cao1;~Jackie_CK_Cheung1", "gender": ";;M", "homepage": ";;http://cs.mcgill.ca/~jcheung/", "dblp": "95/147-3;;00/9012", "google_scholar": ";https://scholar.google.ca/citations?user=RTVRTSsAAAAJ;https://scholar.google.com.tw/citations?user=Um-wmYQAAAAJ", "orcid": ";;", "linkedin": ";yanshuai-cao-b59878a4/;", "or_profile": "~Zichao_Li3;~Yanshuai_Cao1;~Jackie_CK_Cheung1", "aff": "McGill University;Borealis AI;Microsoft", "aff_domain": "mcgill.ca;borealisai.com;microsoft.com", "position": "PhD student;Researcher;Consulting Researcher", "bibtex": "@inproceedings{\nli2024do,\ntitle={Do {LLM}s Build World Representations? Probing Through the Lens of State Abstraction},\nauthor={Zichao Li and Yanshuai Cao and Jackie CK Cheung},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=lzfzjYuWgY}\n}", "github": "", "reviewers": "nz33;9vdN;CXua;4mUW;ZobB", "pdf_size": 6459400, "rating": "4;6;7;7;7", "confidence": "4;3;4;4;4", "soundness": "3;3;3;4;3", "novelty": "3;2;3;3;2", "presentation": "3;3;3;4;3", "wc_summary": "173;115;91;128;145", "wc_strengths": "103;69;82;74;69", "wc_weaknesses": "244;235;279;125;120", "wc_questions": "6;2;163;86;9", "wc_limitations": "1;12;6;121;11", "wc_review": "527;433;621;534;354", "wc_reply_reviewers": "36;0;1200;50;0", "wc_reply_authors": "573;162;2319;507;159", "reply_reviewers": "1;0;2;1;0", "reply_authors": "4;3;8;4;3", "rating_avg": [ 6.2, 1.16619037896906 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 130.4, 27.65212469232699 ], "wc_strengths_avg": [ 79.4, 12.7216351150314 ], "wc_weaknesses_avg": [ 200.6, 65.46021692600782 ], "wc_questions_avg": [ 53.2, 63.14237879586103 ], "wc_limitations_avg": [ 30.2, 45.5692879031481 ], "wc_review_avg": [ 493.8, 91.8028322003194 ], "wc_reply_reviewers_avg": [ 257.2, 471.8128442507686 ], "wc_reply_authors_avg": [ 744.0, 805.8515992414484 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 4.4, 1.8547236990991407 ], "replies_avg": [ 34, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.08574929257125442, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7347309398937080136&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "mcgill.ca;borealisai.com;microsoft.com", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "McGill University;Borealis AI;Microsoft", "aff_unique_dep": ";;Microsoft Corporation", "aff_unique_url": "https://www.mcgill.ca;https://www.borealisai.com;https://www.microsoft.com", "aff_unique_abbr": "McGill;Borealis AI;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Canada;United States" }, { "title": "Should We Really Edit Language Models? On the Evaluation of Edited Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93785", "id": "m0DS4OOmSY", "proceeding": "", "pdf": "https://openreview.net/pdf?id=m0DS4OOmSY", "openreview": "https://openreview.net/forum?id=m0DS4OOmSY", "poster": "/media/PosterPDFs/NeurIPS%202024/93785.png?t=1731224730.9760196", "project": "", "author_site": "Qi Li, Xiang Liu, Zhenheng Tang, Peijie Dong, Zeyu Li, Xinglin Pan, Xiaowen Chu", "tldr": "", "abstract": "Model editing has become an increasingly popular alternative for efficiently updating knowledge within language models. \nCurrent methods mainly focus on reliability, generalization, and locality, with many methods excelling across these criteria. \nSome recent works disclose the pitfalls of these editing methods such as knowledge distortion or conflict. However, the general abilities of post-edited language models remain unexplored. \nIn this paper, we perform a comprehensive evaluation on various editing methods and different language models, and have following findings.\n(1) Existing editing methods lead to inevitable performance deterioration on general benchmarks, indicating that existing editing methods maintain the general abilities of the model within only a few dozen edits.\nWhen the number of edits is slightly large, the intrinsic knowledge structure of the model is disrupted or even completely damaged. \n(2) Instruction-tuned models are more robust to editing, showing less performance drop on general knowledge after editing. \n(3) Language model with large scale is more resistant to editing compared to small model.\n(4) The safety of the edited model, is significantly weakened, even for those safety-aligned models.\nOur findings indicate that current editing methods are only suitable for small-scale knowledge updates within language models, which motivates further research on more practical and reliable editing methods.", "keywords": "Model Editing;Language Models;Language Model Evaluation", "primary_area": "evaluation", "supplementary_material": "", "author": "Qi Li;Xiang Liu;Zhenheng Tang;Peijie Dong;Zeyu Li;Xinglin Pan;Xiaowen Chu", "authorids": "~Qi_Li23;~Xiang_Liu10;~Zhenheng_Tang2;~Peijie_Dong1;~Zeyu_Li3;~Xinglin_Pan1;~Xiaowen_Chu2", "gender": ";M;;M;M;M;M", "homepage": ";https://dominic789654.github.io/;;https://pprp.github.io;https://zeyuli.cn;http://smilelab.uestc.edu.cn/members/panxinglin/;https://facultyprofiles.hkust-gz.edu.cn/faculty-personal-page/CHU-Xiaowen/xwchu", "dblp": ";31/5736-1;;315/4734;;273/3352;24/2536", "google_scholar": ";VtK5lwUAAAAJ;;TqS6s4gAAAAJ;EWZlQykAAAAJ;e0oE3QIAAAAJ;https://scholar.google.com.hk/citations?user=v4rX24EAAAAJ", "orcid": ";;;0000-0003-1952-4544;;;0000-0001-9745-4372", "linkedin": ";;;;;;", "or_profile": "~Qi_Li23;~Xiang_Liu10;~Zhenheng_Tang2;~Peijie_Dong1;~Zeyu_Li3;~Xinglin_Pan1;~Xiaowen_Chu2", "aff": ";Hong Kong University of Science and Technology (Guang Zhou));;The Hong Kong University of Science and Technology (Guang Zhou);Hong Kong University of Science and Technology;The Hong Kong University of Science and Technology (Guangzhou);Hong Kong University of Science and Technology (Guangzhou)", "aff_domain": ";hkust-gz.edu.cn;;connect.hkust-gz.edu.cn;connect.hkust-gz.edu.cn;hkust-gz.edu.cn;ust.hk", "position": ";PhD student;;Phd student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nli2024should,\ntitle={Should We Really Edit Language Models? On the Evaluation of Edited Language Models},\nauthor={Qi Li and Xiang Liu and Zhenheng Tang and Peijie Dong and Zeyu Li and Xinglin Pan and Xiaowen Chu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=m0DS4OOmSY}\n}", "github": "", "reviewers": "FFCf;mS2B;2vUP", "pdf_size": 1217822, "rating": "5;6;6", "confidence": "2;4;4", "soundness": "4;3;3", "novelty": "3;2;3", "presentation": "4;3;4", "wc_summary": "71;50;132", "wc_strengths": "53;82;163", "wc_weaknesses": "88;272;267", "wc_questions": "50;3;51", "wc_limitations": "11;1;59", "wc_review": "273;408;672", "wc_reply_reviewers": "23;0;0", "wc_reply_authors": "107;272;133", "reply_reviewers": "1;0;0", "reply_authors": "2;2;2", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 84.33333333333333, 34.77866523539332 ], "wc_strengths_avg": [ 99.33333333333333, 46.549853800940014 ], "wc_weaknesses_avg": [ 209.0, 85.58426646683762 ], "wc_questions_avg": [ 34.666666666666664, 22.395436042987765 ], "wc_limitations_avg": [ 23.666666666666668, 25.315783394730033 ], "wc_review_avg": [ 451.0, 165.7045563646335 ], "wc_reply_reviewers_avg": [ 7.666666666666667, 10.842303978193728 ], "wc_reply_authors_avg": [ 170.66666666666666, 72.43541184316473 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5229063436504937593&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": ";hkust-gz.edu.cn;;connect.hkust-gz.edu.cn;connect.hkust-gz.edu.cn;hkust-gz.edu.cn;ust.hk", "author_num": 7, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Hong Kong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.ust.hk", "aff_unique_abbr": "HKUST", "aff_campus_unique_index": "0;0;0;1;0", "aff_campus_unique": "Hong Kong SAR;Guangzhou", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "AR-Pro: Counterfactual Explanations for Anomaly Repair with Formal Properties", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93784", "id": "m0jZUvlKl7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=m0jZUvlKl7", "openreview": "https://openreview.net/forum?id=m0jZUvlKl7", "poster": "/media/PosterPDFs/NeurIPS%202024/93784.png?t=1731688734.8729596", "project": "", "author_site": "Xiayan Ji, Anton Xue, Eric Wong, Oleg Sokolsky, Insup Lee", "tldr": "", "abstract": "Anomaly detection is widely used for identifying critical errors and suspicious behaviors, but current methods lack interpretability.\nWe leverage common properties of existing methods and recent advances in generative models to introduce counterfactual explanations for anomaly detection.\nGiven an input, we generate its counterfactual as a diffusion-based repair that shows what a non-anomalous version $\\textit{should have looked like}$.\nA key advantage of this approach is that it enables a domain-independent formal specification of explainability desiderata, offering a unified framework for generating and evaluating explanations.\nWe demonstrate the effectiveness of our anomaly explainability framework, AR-Pro, on vision (MVTec, VisA) and time-series (SWaT, WADI, HAI) anomaly datasets. The code used for the experiments is accessible at: https://github.com/xjiae/arpro.", "keywords": "anomaly detection;anomaly explanation;anomaly repair;diffusion model", "primary_area": "interpretability_and_explainability", "supplementary_material": "/attachment/c1726cc57ec0c80dc0728f3ad080dc50940110d8.zip", "author": "Xiayan Ji;Anton Xue;Eric Wong;Oleg Sokolsky;Insup Lee", "authorids": "~Xiayan_Ji1;~Anton_Xue1;~Eric_Wong1;~Oleg_Sokolsky1;~Insup_Lee1", "gender": ";M;M;M;", "homepage": ";https://antonxue.github.io/;http://riceric22.github.io/;https://www.cis.upenn.edu/~sokolsky/;https://www.cis.upenn.edu/~lee/", "dblp": ";242/4544;64/1811-1.html;31/4030;l/InsupLee.html", "google_scholar": "dChT9WwAAAAJ;W6e3zzkAAAAJ;pWnTMRkAAAAJ;J0SKz5YAAAAJ;qPlUgrgAAAAJ", "orcid": ";;;0000-0001-5282-0658;0000-0003-2672-1132", "linkedin": "xiayan-ji-b52b7117b/;;;;", "or_profile": "~Xiayan_Ji1;~Anton_Xue1;~Eric_Wong1;~Oleg_Sokolsky1;~Insup_Lee1", "aff": "University of Pennsylvania;University of Pennsylvania;University of Pennsylvania;University of Pennsylvania;University of Pennsylvania", "aff_domain": "seas.upenn.edu;seas.upenn.edu;upenn.edu;upenn.edu;upenn.edu", "position": "PhD student;PhD student;Assistant Professor;Research Professor;Full Professor", "bibtex": "@inproceedings{\nji2024arpro,\ntitle={{AR}-Pro: Counterfactual Explanations for Anomaly Repair with Formal Properties},\nauthor={Xiayan Ji and Anton Xue and Eric Wong and Oleg Sokolsky and Insup Lee},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=m0jZUvlKl7}\n}", "github": "", "reviewers": "F7R5;k8Af;DUtN", "pdf_size": 9858112, "rating": "3;6;7", "confidence": "4;4;4", "soundness": "2;3;4", "novelty": "1;3;4", "presentation": "2;3;4", "wc_summary": "68;53;114", "wc_strengths": "35;40;43", "wc_weaknesses": "161;172;124", "wc_questions": "9;3;223", "wc_limitations": "37;3;24", "wc_review": "310;271;528", "wc_reply_reviewers": "12;72;11", "wc_reply_authors": "477;36;0", "reply_reviewers": "1;1;1", "reply_authors": "2;2;1", "rating_avg": [ 5.333333333333333, 1.699673171197595 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.6666666666666665, 1.247219128924647 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 78.33333333333333, 25.952948879762307 ], "wc_strengths_avg": [ 39.333333333333336, 3.299831645537222 ], "wc_weaknesses_avg": [ 152.33333333333334, 20.531818125912658 ], "wc_questions_avg": [ 78.33333333333333, 102.32410381831949 ], "wc_limitations_avg": [ 21.333333333333332, 14.007934259633796 ], "wc_review_avg": [ 369.6666666666667, 113.08502209498047 ], "wc_reply_reviewers_avg": [ 31.666666666666668, 28.522895287041873 ], "wc_reply_authors_avg": [ 171.0, 216.8732348631338 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7853695314100671896&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 4, "email": "seas.upenn.edu;seas.upenn.edu;upenn.edu;upenn.edu;upenn.edu", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of Pennsylvania", "aff_unique_dep": "", "aff_unique_url": "https://www.upenn.edu", "aff_unique_abbr": "UPenn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "GLinSAT: The General Linear Satisfiability Neural Network Layer By Accelerated Gradient Descent", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93783", "id": "m1PVjNHvtP", "proceeding": "", "pdf": "https://openreview.net/pdf?id=m1PVjNHvtP", "openreview": "https://openreview.net/forum?id=m1PVjNHvtP", "poster": "/media/PosterPDFs/NeurIPS%202024/93783.png?t=1731483975.7331326", "project": "", "author_site": "Hongtai Zeng, Chao Yang, Yanzhen Zhou, Cheng Yang, Qinglai Guo", "tldr": "", "abstract": "Ensuring that the outputs of neural networks satisfy specific constraints is crucial for applying neural networks to real-life decision-making problems. In this paper, we consider making a batch of neural network outputs satisfy bounded and general linear constraints. We first reformulate the neural network output projection problem as an entropy-regularized linear programming problem. We show that such a problem can be equivalently transformed into an unconstrained convex optimization problem with Lipschitz continuous gradient according to the duality theorem. Then, based on an accelerated gradient descent algorithm with numerical performance enhancement, we present our architecture, GLinSAT, to solve the problem. To the best of our knowledge, this is the first general linear satisfiability layer in which all the operations are differentiable and matrix-factorization-free. Despite the fact that we can explicitly perform backpropagation based on automatic differentiation mechanism, we also provide an alternative approach in GLinSAT to calculate the derivatives based on implicit differentiation of the optimality condition. Experimental results on constrained traveling salesman problems, partial graph matching with outliers, predictive portfolio allocation and power system unit commitment demonstrate the advantages of GLinSAT over existing satisfiability layers. Our implementation is available at https://github.com/HunterTracer/GLinSAT.", "keywords": "Differentiable general linear satisfiability neural network layer;Constraint Satisfaction;Accelerated gradient descent", "primary_area": "optimization", "supplementary_material": "/attachment/b9330e3e9d961dfdf9fb80bfbc1e760d792f4d98.zip", "author": "Hongtai Zeng;Chao Yang;Yanzhen Zhou;Cheng Yang;Qinglai Guo", "authorids": "~Hongtai_Zeng1;~Chao_Yang7;~Yanzhen_Zhou2;~Cheng_Yang3;~Qinglai_Guo1", "gender": ";;F;M;M", "homepage": ";;;;https://www.eea.tsinghua.edu.cn/en/faculties/guoqinglai.htm", "dblp": "374/3993;;;;", "google_scholar": ";;;5QdPzoAAAAAJ;6G8hSssAAAAJ", "orcid": "0000-0002-1080-7567;;0000-0003-4212-8099;;0000-0003-1435-5796", "linkedin": ";%E8%B6%85-%E6%9D%A8-400740153/;;;", "or_profile": "~Hongtai_Zeng1;~Chao_Yang7;~Yanzhen_Zhou2;~Cheng_Yang3;~Qinglai_Guo1", "aff": "Electrical Engineering;;Tsinghua University;Alibaba Group;Tsinghua University", "aff_domain": "mails.tsinghua.edu.cn;;mail.tsinghua.edu.cn;alibaba-inc.com;tsinghua.edu.cn", "position": "PhD student;;Researcher;Researcher;Full Professor", "bibtex": "@inproceedings{\nzeng2024glinsat,\ntitle={{GL}in{SAT}: The General Linear Satisfiability Neural Network Layer By Accelerated Gradient Descent},\nauthor={Hongtai Zeng and Chao Yang and Yanzhen Zhou and Cheng Yang and Qinglai Guo},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=m1PVjNHvtP}\n}", "github": "", "reviewers": "MtC5;6WNJ;TMC5;U49f", "pdf_size": 664715, "rating": "5;6;7;8", "confidence": "2;3;4;4", "soundness": "3;3;3;4", "novelty": "3;2;3;4", "presentation": "3;3;3;3", "wc_summary": "53;125;73;96", "wc_strengths": "53;64;115;112", "wc_weaknesses": "33;248;198;52", "wc_questions": "7;2;176;90", "wc_limitations": "8;1;8;1", "wc_review": "154;440;570;351", "wc_reply_reviewers": "0;12;16;10", "wc_reply_authors": "0;28;29;25", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 86.75, 26.81767141270845 ], "wc_strengths_avg": [ 86.0, 27.793884219374593 ], "wc_weaknesses_avg": [ 132.75, 92.21001843617644 ], "wc_questions_avg": [ 68.75, 71.1033578672625 ], "wc_limitations_avg": [ 4.5, 3.5 ], "wc_review_avg": [ 378.75, 151.33633899364688 ], "wc_reply_reviewers_avg": [ 9.5, 5.894913061275798 ], "wc_reply_authors_avg": [ 20.5, 11.926860441876563 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.9438798074485388, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:WbTyfKL9WqQJ:scholar.google.com/&scioq=GLinSAT:+The+General+Linear+Satisfiability+Neural+Network+Layer+By+Accelerated+Gradient+Descent&hl=en&as_sdt=0,44", "gs_version_total": 3, "email": "mails.tsinghua.edu.cn;;mail.tsinghua.edu.cn;alibaba-inc.com;tsinghua.edu.cn", "author_num": 5, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "Electrical Engineering Department;Tsinghua University;Alibaba Group", "aff_unique_dep": "Electrical Engineering;;", "aff_unique_url": ";https://www.tsinghua.edu.cn;https://www.alibaba.com", "aff_unique_abbr": ";THU;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1;1;1", "aff_country_unique": ";China" }, { "title": "AgentDojo: A Dynamic Environment to Evaluate Prompt Injection Attacks and Defenses for LLM Agents", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97522", "id": "m1YYAQjO3w", "proceeding": "", "pdf": "https://openreview.net/pdf?id=m1YYAQjO3w", "openreview": "https://openreview.net/forum?id=m1YYAQjO3w", "poster": "/media/PosterPDFs/NeurIPS%202024/97522.png?t=1733316312.9531724", "project": "", "author_site": "Edoardo Debenedetti, Jie Zhang, Mislav Balunovic, Luca Beurer-Kellner, Marc Fischer, Florian Tramer", "tldr": "", "abstract": "AI agents aim to solve complex tasks by combining text-based reasoning with external tool calls.\nUnfortunately, AI agents are vulnerable to prompt injection attacks where data returned by external tools hijacks the agent to execute malicious tasks.\nTo measure the adversarial robustness of AI agents, we introduce AgentDojo, an evaluation framework for agents that execute tools over untrusted data.\nTo capture the evolving nature of attacks and defenses, AgentDojo is not a static test suite, but rather an extensible environment for designing and evaluating new agent tasks, defenses, and adaptive attacks.\nWe populate the environment with 97 realistic tasks (e.g., managing an email client, navigating an e-banking website, or making travel bookings), 629 security test cases, and various attack and defense paradigms from the literature.\nWe find that AgentDojo poses a challenge for both attacks and defenses: state-of-the-art LLMs fail at many tasks (even in the absence of attacks), and existing prompt injection attacks break some security properties but not all. We hope that AgentDojo can foster research on new design principles for AI agents that solve common tasks in a reliable and robust manner.", "keywords": "large language models;prompt injection;agents;tool calling;security", "primary_area": "", "supplementary_material": "/attachment/cefd4ff1c2b85eb956e15539f7b9c1ae126b9c15.zip", "author": "Edoardo Debenedetti;Jie Zhang;Mislav Balunovic;Luca Beurer-Kellner;Marc Fischer;Florian Tram\u00e8r", "authorids": "~Edoardo_Debenedetti1;~Jie_Zhang14;~Mislav_Balunovic1;~Luca_Beurer-Kellner1;~Marc_Fischer1;~Florian_Tram\u00e8r1", "gender": "M;M;M;M;M;M", "homepage": "https://edoardo.science/;https://zj-jayzhang.github.io/;https://www.sri.inf.ethz.ch/people/mislav;;;http://floriantramer.com", "dblp": "319/7073;84/6889-81;231/7686;314/2627;37/9373-2;158/7224", "google_scholar": "6Urve9wAAAAJ;soDBSE8AAAAJ;fxkgmGwAAAAJ;https://scholar.google.com/citations?hl=de;;https://scholar.google.ch/citations?user=ijH0-a8AAAAJ", "orcid": "0000-0003-3343-9477;;;;;", "linkedin": "https://linkedin.com/in/edoardo-debenedetti/;;;;;", "or_profile": "~Edoardo_Debenedetti1;~Jie_Zhang14;~Mislav_Balunovic1;~Luca_Beurer-Kellner1;~Marc_Fischer1;~Florian_Tramer1", "aff": "Google;Department of Computer Science, ETHZ - ETH Zurich;Swiss Federal Institute of Technology;ETHZ - ETH Zurich;Swiss Federal Institute of Technology;ETHZ - ETH Zurich", "aff_domain": "google.com;inf.ethz.ch;ethz.ch;ethz.ch;ethz.ch;ethz.ch", "position": "Intern;PhD student;PhD student;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\ndebenedetti2024agentdojo,\ntitle={AgentDojo: A Dynamic Environment to Evaluate Prompt Injection Attacks and Defenses for {LLM} Agents},\nauthor={Edoardo Debenedetti and Jie Zhang and Mislav Balunovic and Luca Beurer-Kellner and Marc Fischer and Florian Tram{\\`e}r},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=m1YYAQjO3w}\n}", "github": "", "reviewers": "Evxt;JjxT;Ri9d;kow4", "pdf_size": 900328, "rating": "5;6;6;7", "confidence": "3;3;4;4", "wc_summary_and_contributions": "114;102;116;63", "wc_strengths": "68;2;3;139", "wc_improvement": "7;18;7;58", "wc_limitations": "7;11;61;5", "wc_correctness": "17;1;49;1", "wc_clarity": "5;1;7;1", "wc_relation_to_prior_work": "10;1;1;1", "wc_documentation": "19;1;1;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "248;138;246;270", "wc_reply_reviewers": "142;24;7;16", "wc_reply_authors": "333;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "wc_summary_and_contributions_avg": [ 98.75, 21.323402636539974 ], "wc_strengths_avg": [ 53.0, 56.39592183837409 ], "wc_improvement_avg": [ 22.5, 20.982135258357285 ], "wc_limitations_avg": [ 21.0, 23.194827009486403 ], "wc_correctness_avg": [ 17.0, 19.595917942265423 ], "wc_clarity_avg": [ 3.5, 2.598076211353316 ], "wc_relation_to_prior_work_avg": [ 3.25, 3.897114317029974 ], "wc_documentation_avg": [ 5.5, 7.794228634059948 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 225.5, 51.388228223981415 ], "wc_reply_reviewers_avg": [ 47.25, 55.033512517374355 ], "wc_reply_authors_avg": [ 83.25, 144.19322973010904 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.7071067811865476, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18413357951230627732&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 2, "email": "google.com;inf.ethz.ch;ethz.ch;ethz.ch;ethz.ch;ethz.ch", "author_num": 6, "aff_unique_index": "0;1;2;1;2;1", "aff_unique_norm": "Google;ETH Zurich;Swiss Federal Institute of Technology", "aff_unique_dep": "Google;Department of Computer Science;", "aff_unique_url": "https://www.google.com;https://www.ethz.ch;https://www.ethz.ch", "aff_unique_abbr": "Google;ETHZ;ETH Zurich", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Mountain View;Zurich;", "aff_country_unique_index": "0;1;1;1;1;1", "aff_country_unique": "United States;Switzerland" }, { "title": "Generalization Error Bounds for Two-stage Recommender Systems with Tree Structure", "status": "Oral", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93782", "id": "m1a4CrRJR7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=m1a4CrRJR7", "openreview": "https://openreview.net/forum?id=m1a4CrRJR7", "poster": "/media/PosterPDFs/NeurIPS%202024/93782.png?t=1731683541.5991454", "project": "", "author_site": "Jin Zhang, Ze Liu, Defu Lian, Enhong Chen", "tldr": "", "abstract": "Two-stage recommender systems play a crucial role in efficiently identifying relevant items and personalizing recommendations from a vast array of options. This paper, based on an error decomposition framework, analyzes the generalization error for two-stage recommender systems with a tree structure, which consist of an efficient tree-based retriever and a more precise yet time-consuming ranker. We use the Rademacher complexity to establish the generalization upper bound for various tree-based retrievers using beam search, as well as for different ranker models under a shifted training distribution. Both theoretical insights and practical experiments on real-world datasets indicate that increasing the branches in tree-based retrievers and harmonizing distributions across stages can enhance the generalization performance of two-stage recommender systems.", "keywords": "Two-stage Recommender Systems;Recommender Systems;Generalization error bounds;Rademacher complexities;Tree-based Learning", "primary_area": "learning_theory", "supplementary_material": "/attachment/f4a1019fc100b199547ca32632e0acce9ea849da.zip", "author": "Jin Zhang;Ze Liu;Defu Lian;Enhong Chen", "authorids": "~Jin_Zhang18;~Ze_Liu4;~Defu_Lian1;~Enhong_Chen1", "gender": "M;M;M;M", "homepage": "https://jzhang-0.github.io/;https://www.ustc.edu.cn/;https://faculty.ustc.edu.cn/liandefu/en/index.htm;http://staff.ustc.edu.cn/~cheneh", "dblp": ";;87/10734;07/258", "google_scholar": "VNXrlX0AAAAJ;;QW0ad4sAAAAJ;Q9h02J0AAAAJ", "orcid": ";0009-0002-5208-8908;0000-0002-3507-9607;0000-0002-4835-4102", "linkedin": ";;;", "or_profile": "~Jin_Zhang18;~Ze_Liu4;~Defu_Lian1;~Enhong_Chen1", "aff": "University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China", "aff_domain": "mail.ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn", "position": "PhD student;MS student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nzhang2024generalization,\ntitle={Generalization Error Bounds for Two-stage Recommender Systems with Tree Structure},\nauthor={Jin Zhang and Ze Liu and Defu Lian and Enhong Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=m1a4CrRJR7}\n}", "github": "", "reviewers": "g6F7;homr;1EMB;pcvU", "pdf_size": 489953, "rating": "7;7;7;8", "confidence": "5;3;4;4", "soundness": "3;3;3;3", "novelty": "3;2;3;4", "presentation": "4;2;3;3", "wc_summary": "116;41;90;95", "wc_strengths": "169;102;150;178", "wc_weaknesses": "27;37;177;98", "wc_questions": "14;1;2;97", "wc_limitations": "46;18;1;1", "wc_review": "372;199;420;469", "wc_reply_reviewers": "20;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 85.5, 27.48181216732259 ], "wc_strengths_avg": [ 149.75, 29.363029475856198 ], "wc_weaknesses_avg": [ 84.75, 59.79287164871746 ], "wc_questions_avg": [ 28.5, 39.87793876318083 ], "wc_limitations_avg": [ 16.5, 18.39157415774952 ], "wc_review_avg": [ 365.0, 101.79145347228322 ], "wc_reply_reviewers_avg": [ 5.0, 8.660254037844387 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:wDj7bLtmRU4J:scholar.google.com/&scioq=Generalization+Error+Bounds+for+Two-stage+Recommender+Systems+with+Tree+Structure&hl=en&as_sdt=0,47", "gs_version_total": 2, "email": "mail.ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Science and Technology of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ustc.edu.cn", "aff_unique_abbr": "USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Scanning Trojaned Models Using Out-of-Distribution Samples", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93781", "id": "m296WJXyzQ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=m296WJXyzQ", "openreview": "https://openreview.net/forum?id=m296WJXyzQ", "poster": "", "project": "", "author_site": "Hossein Mirzaei, Ali Ansari, Bahar Dibaei Nia, Mojtaba Nafez, Moein Madadi, Sepehr Rezaee, Zeinab Taghavi, Arad Maleki, Kian Shamsaie, Mahdi Hajialilue, Jafar Habibi, Mohammad Sabokrou, Mohammad Hossein Rohban", "tldr": "", "abstract": "Scanning for trojan (backdoor) in deep neural networks is crucial due to their significant real-world applications. There has been an increasing focus on developing effective general trojan scanning methods across various trojan attacks. Despite advancements, there remains a shortage of methods that perform effectively without preconceived assumptions about the backdoor attack method. Additionally, we have observed that current methods struggle to identify classifiers trojaned using adversarial training. Motivated by these challenges, our study introduces a novel scanning method named TRODO (TROjan scanning by Detection of adversarial shifts in Out-of-distribution samples). TRODO leverages the concept of \"blind spots\"\u2014regions where trojaned classifiers erroneously identify out-of-distribution (OOD) samples as in-distribution (ID). We scan for these blind spots by adversarially shifting OOD samples towards in-distribution. The increased likelihood of perturbed OOD samples being classified as ID serves as a signature for trojan detection. TRODO is both trojan and label mapping agnostic, effective even against adversarially trained trojaned classifiers. It is applicable even in scenarios where training data is absent, demonstrating high accuracy and adaptability across various scenarios and datasets, highlighting its potential as a robust trojan scanning strategy.", "keywords": "Trojan Scanning Method;Trojan Post-Training Defense;Backdoor Attacks;Out-of-Distribution Samples;Adversarially Perturbed Out-of-Distribution Samples", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/9f6232153a8708654d58a0fadd512fd893b8f78e.zip", "author": "Hossein Mirzaei;Ali Ansari;Bahar Dibaei Nia;Mojtaba Nafez;Moein Madadi;Sepehr Rezaee;Zeinab Sadat Taghavi;Arad Maleki;Kian Shamsaie;Mahdi Hajialilue;Jafar Habibi;Mohammad Sabokrou;Mohammad Hossein Rohban", "authorids": "~Hossein_Mirzaei1;~Ali_Ansari2;~Bahar_Dibaei_Nia1;~Mojtaba_Nafez1;~Moein_Madadi1;~Sepehr_Rezaee1;~Zeinab_Sadat_Taghavi1;~Arad_Maleki1;~Kian_Shamsaie3;~Mahdi_Hajialilue1;~Jafar_Habibi1;~Mohammad_Sabokrou1;~Mohammad_Hossein_Rohban1", "gender": "M;M;F;M;M;M;F;;M;M;M;M;M", "homepage": ";https://allliance.github.io;https://bhrdbn.github.io;https://mojtaba-nafez.github.io/;https://github.com/moein72002;https://sepehrrezaee.github.io/;https://zeinabtaghavi.github.io/;;https://openreview.net/;;http://sharif.ir/~jhabibi/;https://sabokrou.github.io/;http://sharif.edu/~rohban/", "dblp": ";200/9876-1;;;;;130/3941-1;;;;;163/2030;43/8108", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;WYxYPXUAAAAJ;R3e5mekAAAAJ;;;kYl9IWkAAAAJ;G9So2dEAAAAJ;;;;https://scholar.google.com.tw/citations?user=fKvyreEAAAAJ;jqHXvT0AAAAJ;pRyJ6FkAAAAJ", "orcid": ";0000-0002-9798-6966;;;;;0009-0003-0663-2754;;;;;;", "linkedin": "hossein-mirzaei-6bb2301aa;https://linkedin.com/in/ali-ansari-195999213;bahar-dibaeinia-19a257185;;moein-madadi/;sepehr-rezaee-136815216/;zeinab-taghavi/;;;mahdi-hajialilue-0604a4226/;;;", "or_profile": "~Hossein_Mirzaei1;~Ali_Ansari2;~Bahar_Dibaei_Nia1;~Mojtaba_Nafez1;~Moein_Madadi1;~Sepehr_Rezaee1;~Zeinab_Sadat_Taghavi1;~Arad_Maleki1;~Kian_Shamsaie3;~Mahdi_Hajialilue1;~Jafar_Habibi1;~Mohammad_Sabokrou1;~Mohammad_Hossein_Rohban1", "aff": "EPFL - EPF Lausanne;Sharif University of Technology;Sharif University of Technology;Sharif University of Technology;Sharif University of Technology;The National University of Iran;Sharif University of Technology, Sharif University of Technology;;Sharif University of Technology;Sharif University of Technology;Sharif University of Technology;Okinawa Institute of Science and Technology (OIST);Sharif University of Technology", "aff_domain": "epfl.ch;sharif.edu;sharif.edu;sharif.ac.ir;sharif.edu;sbu.ac.ir;ce.sharif.edu;;sharif.edu;sharif.edu;sharif.edu;oist.jp;sharif.edu", "position": "PhD student;Undergrad student;Undergrad student;MS student;Undergrad student;Undergrad student;MS student;;Undergrad student;Undergrad student;Full Professor;Researcher;Associate Professor", "bibtex": "@inproceedings{\nmirzaei2024scanning,\ntitle={Scanning Trojaned Models Using Out-of-Distribution Samples},\nauthor={Hossein Mirzaei and Ali Ansari and Bahar Dibaei Nia and Mojtaba Nafez and Moein Madadi and Sepehr Rezaee and Zeinab Sadat Taghavi and Arad Maleki and Kian Shamsaie and Mahdi Hajialilue and Jafar Habibi and Mohammad Sabokrou and Mohammad Hossein Rohban},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=m296WJXyzQ}\n}", "github": "", "reviewers": "PnTa;d7KJ;3HiF;CctU;T6nS;eToF", "pdf_size": 6089357, "rating": "3;4;5;6;6;6", "confidence": "3;4;4;4;4;4", "soundness": "3;3;3;2;4;3", "novelty": "3;3;2;3;4;3", "presentation": "3;2;3;1;4;2", "wc_summary": "94;95;93;53;188;52", "wc_strengths": "66;77;11;46;74;40", "wc_weaknesses": "212;122;159;229;67;193", "wc_questions": "50;34;16;71;219;2", "wc_limitations": "4;5;4;113;30;8", "wc_review": "426;333;283;512;578;295", "wc_reply_reviewers": "676;0;25;126;254;19", "wc_reply_authors": "1363;0;31;27;476;24", "reply_reviewers": "3;0;1;1;2;1", "reply_authors": "4;1;2;2;2;2", "rating_avg": [ 5.0, 1.1547005383792515 ], "confidence_avg": [ 3.8333333333333335, 0.3726779962499649 ], "soundness_avg": [ 3.0, 0.5773502691896257 ], "novelty_avg": [ 3.0, 0.5773502691896257 ], "presentation_avg": [ 2.5, 0.9574271077563381 ], "wc_summary_avg": [ 95.83333333333333, 45.20846774173568 ], "wc_strengths_avg": [ 52.333333333333336, 22.96857756926962 ], "wc_weaknesses_avg": [ 163.66666666666666, 55.62872958782679 ], "wc_questions_avg": [ 65.33333333333333, 72.2349561423615 ], "wc_limitations_avg": [ 27.333333333333332, 39.38555854907002 ], "wc_review_avg": [ 404.5, 111.04466068508953 ], "wc_reply_reviewers_avg": [ 183.33333333333334, 236.88511045558676 ], "wc_reply_authors_avg": [ 320.1666666666667, 495.239139765382 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.9428090415820632 ], "reply_authors_avg": [ 2.1666666666666665, 0.8975274678557507 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 13, 0 ], "corr_rating_confidence": 0.7745966692414835, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9184727112981187718&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "epfl.ch;sharif.edu;sharif.edu;sharif.ac.ir;sharif.edu;sbu.ac.ir;ce.sharif.edu;;sharif.edu;sharif.edu;sharif.edu;oist.jp;sharif.edu", "author_num": 13, "aff_unique_index": "0;1;1;1;1;2;1;1;1;1;3;1", "aff_unique_norm": "EPFL;Sharif University of Technology;National University of Iran;Okinawa Institute of Science and Technology", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.epfl.ch;https://www.sharif.edu;https://www.nui.ac.ir;https://www.oist.jp", "aff_unique_abbr": "EPFL;SUT;;OIST", "aff_campus_unique_index": "0", "aff_campus_unique": "Lausanne;", "aff_country_unique_index": "0;1;1;1;1;1;1;1;1;1;2;1", "aff_country_unique": "Switzerland;Iran;Japan" }, { "title": "Practical Shuffle Coding", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93780", "id": "m2DaXpCoIi", "proceeding": "", "pdf": "https://openreview.net/pdf?id=m2DaXpCoIi", "openreview": "https://openreview.net/forum?id=m2DaXpCoIi", "poster": "", "project": "", "author_site": "Julius Kunze, Daniel Severo, Jan-Willem van de Meent, James Townsend", "tldr": "", "abstract": "We present a general method for lossless compression of unordered data structures, including multisets and graphs. It is a variant of shuffle coding that is many orders of magnitude faster than the original and enables 'one-shot' compression of single unordered objects. Our method achieves state-of-the-art compression rates on various large-scale network graphs at speeds of megabytes per second, efficiently handling even a multi-gigabyte plain graph with one billion edges. We release an implementation that can be easily adapted to different data types and statistical models.", "keywords": "graph compression;entropy coding;bits-back coding;lossless compression;generative models;information theory;probabilistic models;graph neural networks;multiset compression;asymmetric numeral systems;compression;entropy;shuffle coding", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Julius Kunze;Daniel Severo;Jan-Willem van de Meent;James Townsend", "authorids": "~Julius_Kunze1;~Daniel_Severo1;~Jan-Willem_van_de_Meent1;~James_Townsend1", "gender": "M;M;M;M", "homepage": "http://juliuskunze.com;http://dsevero.com;https://jwvdm.github.io/;https://j-towns.github.io", "dblp": "202/2450;249/9390;137/3263;159/2177", "google_scholar": "Nte3grUAAAAJ;5bQjLz4AAAAJ;CX9Lu38AAAAJ;https://scholar.google.co.uk/citations?user=fhYBZTcAAAAJ", "orcid": ";0000-0003-0472-5300;0000-0001-9465-5398;0000-0002-4210-2393", "linkedin": "juliuskunze/;danielsevero/;;", "or_profile": "~Julius_Kunze1;~Daniel_Severo1;~Jan-Willem_van_de_Meent1;~James_Townsend1", "aff": "University College London;Vector Institute;University of Amsterdam;University of Amsterdam", "aff_domain": "ucl.ac.uk;vectorinstitute.ai;uva.nl;uva.nl", "position": "PhD Student;PhD student;Associate Professor;Postdoc", "bibtex": "@inproceedings{\nkunze2024practical,\ntitle={Practical Shuffle Coding},\nauthor={Julius Kunze and Daniel Severo and Jan-Willem van de Meent and James Townsend},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=m2DaXpCoIi}\n}", "github": "", "reviewers": "Htst;NPhs;tkqf;AUZQ", "pdf_size": 505576, "rating": "4;6;7;7", "confidence": "4;2;3;3", "soundness": "3;3;4;3", "novelty": "2;3;3;2", "presentation": "2;4;4;3", "wc_summary": "55;29;134;85", "wc_strengths": "34;37;96;65", "wc_weaknesses": "32;33;119;192", "wc_questions": "29;7;106;156", "wc_limitations": "18;16;19;33", "wc_review": "168;122;474;531", "wc_reply_reviewers": "0;14;44;153", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 75.75, 39.03443992168967 ], "wc_strengths_avg": [ 58.0, 25.0499500997507 ], "wc_weaknesses_avg": [ 94.0, 66.69707639769527 ], "wc_questions_avg": [ 74.5, 59.70971445250764 ], "wc_limitations_avg": [ 21.5, 6.726812023536855 ], "wc_review_avg": [ 323.75, 180.61613300034966 ], "wc_reply_reviewers_avg": [ 52.75, 60.022391655114845 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15488655900709674833&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 0, "email": "ucl.ac.uk;vectorinstitute.ai;uva.nl;uva.nl", "author_num": 4, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "University College London;Vector Institute;University of Amsterdam", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ucl.ac.uk;https://vectorinstitute.ai/;https://www.uva.nl", "aff_unique_abbr": "UCL;Vector Institute;UvA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;2", "aff_country_unique": "United Kingdom;Canada;Netherlands" }, { "title": "Deep Graph Mating", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93779", "id": "m4NI2yIwJA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=m4NI2yIwJA", "openreview": "https://openreview.net/forum?id=m4NI2yIwJA", "poster": "/media/PosterPDFs/NeurIPS%202024/93779.png?t=1731499207.1230063", "project": "", "author_site": "Yongcheng Jing, Seok-Hee Hong, Dacheng Tao", "tldr": "", "abstract": "In this paper, we introduce the first learning-free model reuse task within the non-Euclidean domain, termed as Deep Graph Mating (Grama). We strive to create a child Graph Neural Network (GNN) that integrates knowledge from pre-trained parent models without requiring re-training, fine-tuning, or annotated labels. To this end, we begin by investigating the permutation invariance property of GNNs, which leads us to develop two vanilla approaches for Grama: Vanilla Parameter Interpolation (VPI) and Vanilla Alignment Prior to Interpolation (VAPI), both employing topology-independent interpolation in the parameter space. However, neither approach has achieved the anticipated results. Through theoretical analysis of VPI and VAPI, we identify critical challenges unique to Grama, including increased sensitivity to parameter misalignment and further the inherent topology-dependent complexities. Motivated by these findings, we propose the Dual-Message Coordination and Calibration (DuMCC) methodology, comprising the Parent Message Coordination (PMC) scheme to optimise the permutation matrices for parameter interpolation by coordinating aggregated messages, and the Child Message Calibration (CMC) scheme to mitigate over-smoothing identified in PMC by calibrating the message statistics within child GNNs. Experiments across diverse domains, including node and graph property prediction, 3D object recognition, and large-scale semantic parsing, demonstrate that the proposed DuMCC effectively enables training-free knowledge transfer, yielding results on par with those of pre-trained models.", "keywords": "Efficient Learning;Graph Neural Networks", "primary_area": "graph_neural_networks", "supplementary_material": "/attachment/d30f83567540154fa9838461a3f3c2739e035cab.zip", "author": "Yongcheng Jing;Seok-Hee Hong;Dacheng Tao", "authorids": "~Yongcheng_Jing1;~Seok-Hee_Hong1;~Dacheng_Tao1", "gender": ";F;", "homepage": ";https://www.sydney.edu.au/engineering/about/our-people/academic-staff/seokhee-hong.html;", "dblp": ";h/SeokHeeHong.html;", "google_scholar": ";https://scholar.google.com.au/citations?user=ctnBV1kAAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Yongcheng_Jing1;~Seok-Hee_Hong1;~Dacheng_Tao1", "aff": ";University of Sydney;", "aff_domain": ";sydney.edu.au;", "position": ";Full Professor;", "bibtex": "@inproceedings{\njing2024deep,\ntitle={Deep Graph Mating},\nauthor={Yongcheng Jing and Seok-Hee Hong and Dacheng Tao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=m4NI2yIwJA}\n}", "github": "", "reviewers": "tRzs;856w;rPnJ;Uv5e", "pdf_size": 3008170, "rating": "6;6;6;7", "confidence": "5;4;5;5", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;2;3;4", "wc_summary": "257;88;101;177", "wc_strengths": "328;87;91;199", "wc_weaknesses": "211;71;343;696", "wc_questions": "90;5;56;64", "wc_limitations": "14;11;7;32", "wc_review": "900;262;598;1168", "wc_reply_reviewers": "0;163;20;138", "wc_reply_authors": "0;291;736;837", "reply_reviewers": "0;1;1;2", "reply_authors": "1;2;2;3", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 155.75, 67.62164963974186 ], "wc_strengths_avg": [ 176.25, 98.46160419168478 ], "wc_weaknesses_avg": [ 330.25, 232.038116480892 ], "wc_questions_avg": [ 53.75, 30.825111516424396 ], "wc_limitations_avg": [ 16.0, 9.565563234854496 ], "wc_review_avg": [ 732.0, 338.07395640599117 ], "wc_reply_reviewers_avg": [ 80.25, 71.15607844731187 ], "wc_reply_authors_avg": [ 466.0, 338.4974150565998 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16586278169077932692&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 3, "email": ";sydney.edu.au;", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "University of Sydney", "aff_unique_dep": "", "aff_unique_url": "https://www.sydney.edu.au", "aff_unique_abbr": "USYD", "aff_country_unique_index": "0", "aff_country_unique": "Australia" }, { "title": "Practical Bayesian Algorithm Execution via Posterior Sampling", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93778", "id": "m4ZcDrVvid", "proceeding": "", "pdf": "https://openreview.net/pdf?id=m4ZcDrVvid", "openreview": "https://openreview.net/forum?id=m4ZcDrVvid", "poster": "", "project": "", "author_site": "Chu Xin Cheng, Raul Astudillo, Thomas A Desautels, Yisong Yue", "tldr": "", "abstract": "We consider Bayesian algorithm execution (BAX), a framework for efficiently selecting evaluation points of an expensive function to infer a property of interest encoded as the output of a base algorithm. Since the base algorithm typically requires more evaluations than are feasible, it cannot be directly applied. Instead, BAX methods sequentially select evaluation points using a probabilistic numerical approach. Current BAX methods use expected information gain to guide this selection. However, this approach is computationally intensive. Observing that, in many tasks, the property of interest corresponds to a target set of points defined by the function, we introduce PS-BAX, a simple, effective, and scalable BAX method based on posterior sampling. PS-BAX is applicable to a wide range of problems, including many optimization variants and level set estimation. Experiments across diverse tasks demonstrate that PS-BAX performs competitively with existing baselines while being significantly faster, simpler to implement, and easily parallelizable, setting a strong baseline for future research. Additionally, we establish conditions under which PS-BAX is asymptotically convergent, offering new insights into posterior sampling as an algorithm design paradigm.", "keywords": "Bayesian algorithm execution;Bayesian optimization;posterior sampling;probabilistic numerics", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Chu Xin Cheng;Raul Astudillo;Thomas Desautels;Yisong Yue", "authorids": "~Chu_Xin_Cheng1;~Raul_Astudillo1;~Thomas_Desautels1;~Yisong_Yue1", "gender": "F;M;;M", "homepage": "https://clorischeng.github.io/;https://raulastudillo.netlify.app/;;http://www.yisongyue.com", "dblp": "366/3945;242/3889;124/8980;28/1244", "google_scholar": ";r1Jkj7MAAAAJ;;tEk4qo8AAAAJ", "orcid": ";;;0000-0001-9127-1989", "linkedin": ";;;yisongyue/", "or_profile": "~Chu_Xin_Cheng1;~Raul_Astudillo1;~Thomas_Desautels1;~Yisong_Yue1", "aff": "California Institute of Technology;California Institute of Technology;Lawrence Livermore National Labs;California Institute of Technology", "aff_domain": "caltech.edu;caltech.edu;llnl.gov;caltech.edu", "position": "Undergrad student;Postdoc;Researcher;Full Professor", "bibtex": "@inproceedings{\ncheng2024practical,\ntitle={Practical Bayesian Algorithm Execution via Posterior Sampling},\nauthor={Chu Xin Cheng and Raul Astudillo and Thomas Desautels and Yisong Yue},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=m4ZcDrVvid}\n}", "github": "", "reviewers": "Ta95;FDTf;qVHf;BTqx", "pdf_size": 867397, "rating": "5;5;5;7", "confidence": "2;4;3;3", "soundness": "3;1;2;4", "novelty": "2;2;3;4", "presentation": "3;3;3;4", "wc_summary": "137;61;77;25", "wc_strengths": "21;9;34;34", "wc_weaknesses": "45;235;149;24", "wc_questions": "196;3;121;60", "wc_limitations": "59;16;1;35", "wc_review": "458;324;382;178", "wc_reply_reviewers": "18;195;214;7", "wc_reply_authors": "36;453;950;19", "reply_reviewers": "1;2;2;1", "reply_authors": "2;3;4;2", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 1.118033988749895 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 75.0, 40.44749683231337 ], "wc_strengths_avg": [ 24.5, 10.404326023342406 ], "wc_weaknesses_avg": [ 113.25, 84.74188751733112 ], "wc_questions_avg": [ 95.0, 71.70425370924657 ], "wc_limitations_avg": [ 27.75, 21.695333599647643 ], "wc_review_avg": [ 335.5, 102.59995126704496 ], "wc_reply_reviewers_avg": [ 108.5, 96.31329087929662 ], "wc_reply_authors_avg": [ 364.5, 380.1068928604163 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:kQv3qCONGnAJ:scholar.google.com/&scioq=Practical+Bayesian+Algorithm+Execution+via+Posterior+Sampling&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "caltech.edu;caltech.edu;llnl.gov;caltech.edu", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "California Institute of Technology;Lawrence Livermore National Laboratory", "aff_unique_dep": ";", "aff_unique_url": "https://www.caltech.edu;https://www.llnl.gov", "aff_unique_abbr": "Caltech;LLNL", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Pasadena;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Are More LLM Calls All You Need? Towards the Scaling Properties of Compound AI Systems", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93777", "id": "m5106RRLgx", "proceeding": "", "pdf": "https://openreview.net/pdf?id=m5106RRLgx", "openreview": "https://openreview.net/forum?id=m5106RRLgx", "poster": "/media/PosterPDFs/NeurIPS%202024/93777.png?t=1731714826.3373487", "project": "", "author_site": "Lingjiao Chen, Jared Quincy Davis, Boris Hanin, Peter Bailis, Ion Stoica, Matei A Zaharia, James Zou", "tldr": "", "abstract": "Many recent state-of-the-art results in language tasks were achieved using compound systems that perform multiple Language Model (LM) calls and aggregate their responses. However, there is little understanding of how the number of LM calls -- e.g., when asking the LM to answer each question multiple times and taking a majority vote -- affects such a compound system's performance. In this paper, we initiate the study of scaling properties of compound inference systems. We analyze, theoretically and empirically, how the number of LM calls affects the performance of Vote and Filter-Vote, two of the simplest compound system designs, which aggregate LM responses via majority voting, optionally applying LM filters. We find, surprisingly, that across multiple language tasks, the performance of both Vote and Filter-Vote can first increase but then decrease as a function of the number of LM calls. Our theoretical results suggest that this non-monotonicity is due to the diversity of query difficulties within a task: more LM calls lead to higher performance on \"easy\" queries, but lower performance on \"hard\" queries, and non-monotone behavior can emerge when a task contains both types of queries. This insight then allows us to compute, from a small number of samples, the number of LM calls that maximizes system performance, and define an analytical scaling model for both systems. Experiments show that our scaling model can accurately predict the performance of Vote and Filter-Vote systems and thus find the optimal number of LM calls to make.", "keywords": "Scaling Laws; Compound AI systems; language models", "primary_area": "generative_models", "supplementary_material": "", "author": "Lingjiao Chen;Jared Quincy Davis;Boris Hanin;Peter Bailis;Ion Stoica;Matei Zaharia;James Zou", "authorids": "~Lingjiao_Chen1;~Jared_Quincy_Davis2;~Boris_Hanin1;~Peter_Bailis2;~Ion_Stoica1;~Matei_Zaharia1;~James_Zou1", "gender": ";M;;M;M;M;", "homepage": ";;https://hanin.princeton.edu;http://www.bailis.org/;http://people.eecs.berkeley.edu/~istoica/;https://cs.stanford.edu/~matei/;", "dblp": "131/6638.html;37/1018;205/2534;47/8816;s/IonStoica;36/2133;", "google_scholar": ";IfUdw2MAAAAJ;;qG1LVpQAAAAJ;vN-is70AAAAJ;I1EvjZsAAAAJ;23ZXZvEAAAAJ", "orcid": ";;;;;0000-0002-7547-7204;", "linkedin": ";;;;ionstoica;mateizaharia/;", "or_profile": "~Lingjiao_Chen1;~Jared_Quincy_Davis2;~Boris_Hanin1;~Peter_Bailis2;~Ion_Stoica1;~Matei_Zaharia1;~James_Zou1", "aff": "Stanford University;Stanford University;Princeton University;Stanford University;University of California, Berkeley;Databricks;Stanford University", "aff_domain": "stanford.edu;cs.stanford.edu;princeton.edu;stanford.edu;berkeley.edu;databricks.com;stanford.edu", "position": "PhD student;PhD student;Assistant Professor;Adjunct Professor;Full Professor;CTO;Assistant Professor", "bibtex": "@inproceedings{\nchen2024are,\ntitle={Are More {LLM} Calls All You Need? Towards the Scaling Properties of Compound {AI} Systems},\nauthor={Lingjiao Chen and Jared Quincy Davis and Boris Hanin and Peter Bailis and Ion Stoica and Matei Zaharia and James Zou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=m5106RRLgx}\n}", "github": "", "reviewers": "NgrA;A9Tt;sDbu;DYM5", "pdf_size": 1466548, "rating": "5;5;7;8", "confidence": "3;4;3;3", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "2;2;4;4", "wc_summary": "105;25;142;124", "wc_strengths": "21;24;136;56", "wc_weaknesses": "128;23;90;67", "wc_questions": "5;11;24;104", "wc_limitations": "2;82;74;10", "wc_review": "261;165;466;361", "wc_reply_reviewers": "59;16;0;113", "wc_reply_authors": "9;10;0;32", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;1;2", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 1.0 ], "wc_summary_avg": [ 99.0, 44.68221122549778 ], "wc_strengths_avg": [ 59.25, 46.38628568876797 ], "wc_weaknesses_avg": [ 77.0, 38.03288051147323 ], "wc_questions_avg": [ 36.0, 39.85599076675927 ], "wc_limitations_avg": [ 42.0, 36.22154055254967 ], "wc_review_avg": [ 313.25, 112.16143499438655 ], "wc_reply_reviewers_avg": [ 47.0, 43.78926809162263 ], "wc_reply_authors_avg": [ 12.75, 11.776565713313877 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.5555555555555555, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17514539612962077286&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "stanford.edu;cs.stanford.edu;princeton.edu;stanford.edu;berkeley.edu;databricks.com;stanford.edu", "author_num": 7, "aff_unique_index": "0;0;1;0;2;3;0", "aff_unique_norm": "Stanford University;Princeton University;University of California, Berkeley;Databricks", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.stanford.edu;https://www.princeton.edu;https://www.berkeley.edu;https://databricks.com", "aff_unique_abbr": "Stanford;Princeton;UC Berkeley;Databricks", "aff_campus_unique_index": "0;0;0;2;0", "aff_campus_unique": "Stanford;;Berkeley", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Label Delay in Online Continual Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93776", "id": "m5CAnUui0Z", "proceeding": "", "pdf": "https://openreview.net/pdf?id=m5CAnUui0Z", "openreview": "https://openreview.net/forum?id=m5CAnUui0Z", "poster": "", "project": "", "author_site": "Botos Csaba, Wenxuan Zhang, Matthias M\u00fcller, Ser Nam Lim, Philip Torr, Adel Bibi", "tldr": "", "abstract": "Online continual learning, the process of training models on streaming data, has gained increasing attention in recent years. However, a critical aspect often overlooked is the label delay, where new data may not be labeled due to slow and costly annotation processes. We introduce a new continual learning framework with explicit modeling of the label delay between data and label streams over time steps. In each step, the framework reveals both unlabeled data from the current time step t and labels delayed with d steps, from the time step t\u2212d. In our extensive experiments amounting to 1060 GPU days, we show that merely augmenting the computational resources is insufficient to tackle this challenge. Our findings underline a notable performance decline when solely relying on labeled data when the label delay becomes significant. More surprisingly, when using state-of-the-art SSL and TTA techniques to utilize the newer, unlabeled data, they fail to surpass the performance of a na\u00efve method that simply trains on the delayed supervised stream. To this end, we introduce a simple, efficient baseline that rehearses from the labeled memory samples that are most similar to the new unlabeled samples. This method bridges the accuracy gap caused by label delay without significantly increasing computational complexity. We show experimentally that our method is the least affected by the label delay factor and in some cases successfully recovers the accuracy of the non-delayed counterpart. We conduct various ablations and sensitivity experiments, demonstrating the effectiveness of our approach.", "keywords": "Online continual learning;delayed feedback;computational constraint;efficient learning", "primary_area": "online_learning", "supplementary_material": "", "author": "Botos Csaba;Wenxuan Zhang;Matthias M\u00fcller;Ser-Nam Lim;Philip Torr;Adel Bibi", "authorids": "~Botos_Csaba1;~Wenxuan_Zhang3;~Matthias_M\u00fcller1;~Ser-Nam_Lim3;~Philip_Torr1;~Adel_Bibi1", "gender": "M;F;;;M;M", "homepage": "https://www.linkedin.com/in/botos-csaba/;https://cemse.kaust.edu.sa/vcc/people/person/wenxuan-zhang;https://matthias.pw;http://www.robots.ox.ac.uk/~tvg/;http://adelbibi.com;https://sites.google.com/site/sernam", "dblp": "236/6044;;169/4686-1;;176/0964;04/6633", "google_scholar": "n68BdMgAAAAJ;;AeMLOMEAAAAJ;;Q4j2laYAAAAJ;HX0BfLYAAAAJ", "orcid": ";;;;0000-0002-6169-3918;", "linkedin": "botos-csaba/;;;;adel-bibi-ba3671ab/;", "or_profile": "~Botos_Csaba1;~Wenxuan_Zhang3;~Matthias_M\u00fcller1;~Philip_Torr1;~Adel_Bibi1;~Ser-Nam_Lim1", "aff": "University of Oxford;Samsung Research America;Apple;University of Oxford;University of Oxford;University of Central Florida", "aff_domain": "oxford.ac.uk;samsung.com;apple.com;ox.ac.uk;ox.ac.uk;ucf.edu", "position": "PhD student;Intern;Researcher;Full Professor;Senior Researcher;Associate Professor", "bibtex": "@inproceedings{\ncsaba2024label,\ntitle={Label Delay in Online Continual Learning},\nauthor={Botos Csaba and Wenxuan Zhang and Matthias M{\\\"u}ller and Ser-Nam Lim and Philip Torr and Adel Bibi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=m5CAnUui0Z}\n}", "github": "", "reviewers": "UAN4;dj2M;HwKE;bdVn", "pdf_size": 5149658, "rating": "5;5;6;6", "confidence": "4;4;4;4", "soundness": "3;2;3;3", "novelty": "2;2;3;2", "presentation": "3;3;4;3", "wc_summary": "80;66;116;82", "wc_strengths": "42;57;76;54", "wc_weaknesses": "295;99;485;190", "wc_questions": "3;3;2;3", "wc_limitations": "1;1;15;3", "wc_review": "421;226;694;332", "wc_reply_reviewers": "84;0;604;32", "wc_reply_authors": "1091;15;2373;30", "reply_reviewers": "2;0;2;1", "reply_authors": "4;2;6;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 86.0, 18.384776310850235 ], "wc_strengths_avg": [ 57.25, 12.193748398257199 ], "wc_weaknesses_avg": [ 267.25, 143.57989935920696 ], "wc_questions_avg": [ 2.75, 0.4330127018922193 ], "wc_limitations_avg": [ 5.0, 5.830951894845301 ], "wc_review_avg": [ 418.25, 173.52575457262822 ], "wc_reply_reviewers_avg": [ 180.0, 246.62522174343806 ], "wc_reply_authors_avg": [ 877.25, 967.5051356452843 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 3.5, 1.6583123951777 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15019926005459501372&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 4, "email": "oxford.ac.uk;samsung.com;apple.com;ox.ac.uk;ox.ac.uk;ucf.edu", "author_num": 6, "aff_unique_index": "0;1;2;0;0;3", "aff_unique_norm": "University of Oxford;Samsung;Apple;University of Central Florida", "aff_unique_dep": ";Samsung Research America;Apple Inc.;", "aff_unique_url": "https://www.ox.ac.uk;https://www.samsung.com/us/careers/research/;https://www.apple.com;https://www.ucf.edu", "aff_unique_abbr": "Oxford;SRA;Apple;UCF", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0;0;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "How many classifiers do we need?", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93775", "id": "m5dyKArVn8", "proceeding": "", "pdf": "https://openreview.net/pdf?id=m5dyKArVn8", "openreview": "https://openreview.net/forum?id=m5dyKArVn8", "poster": "", "project": "", "author_site": "Hyunsuk Kim, Liam Hodgkinson, Ryan Theisen, Michael Mahoney", "tldr": "", "abstract": "As performance gains through scaling data and/or model size experience diminishing returns, it is becoming increasingly popular to turn to ensembling, where the predictions of multiple models are combined to improve accuracy. \nIn this paper, we provide a detailed analysis of how the disagreement and the polarization (a notion we introduce and define in this paper) among classifiers relate to the performance gain achieved by aggregating individual classifiers, for majority vote strategies in classification tasks.\nWe address these questions in the following ways. \n(1) An upper bound for polarization is derived, and we propose what we call a neural polarization law: most interpolating neural network models are 4/3-polarized. Our empirical results not only support this conjecture but also show that polarization is nearly constant for a dataset, regardless of hyperparameters or architectures of classifiers. \n(2) The error rate of the majority vote classifier is considered under restricted entropy conditions, and we present a tight upper bound that indicates that the disagreement is linearly correlated with the error rate, and that the slope is linear in the polarization.\n(3) We prove results for the asymptotic behavior of the disagreement in terms of the number of classifiers, which we show can help in predicting the performance for a larger number of classifiers from that of a smaller number. \nOur theoretical findings are supported by empirical results on several image classification tasks with various types of neural networks.", "keywords": "ensemble;model aggregation;machine learning;computer vision", "primary_area": "learning_theory", "supplementary_material": "", "author": "Hyunsuk Kim;Liam Hodgkinson;Ryan Theisen;Michael W. Mahoney", "authorids": "~Hyunsuk_Kim1;~Liam_Hodgkinson1;~Ryan_Theisen1;~Michael_W._Mahoney1", "gender": ";M;;", "homepage": "https://statistics.berkeley.edu/people/hyunsuk-kim;http://www.liamhodgkinson.com;http://ryantheisen.com;", "dblp": ";238/1555;251/5575;", "google_scholar": ";;T1phq10AAAAJ;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Hyunsuk_Kim1;~Liam_Hodgkinson1;~Ryan_Theisen1;~Michael_W._Mahoney1", "aff": "University of California, Berkeley;University of Melbourne;;", "aff_domain": "berkeley.edu;unimelb.edu;;", "position": "PhD student;Lecturer;;", "bibtex": "@inproceedings{\nkim2024how,\ntitle={How many classifiers do we need?},\nauthor={Hyunsuk Kim and Liam Hodgkinson and Ryan Theisen and Michael W. Mahoney},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=m5dyKArVn8}\n}", "github": "", "reviewers": "CG1s;2Qk3;ueod;UfPx", "pdf_size": 563757, "rating": "6;7;7;7", "confidence": "4;3;2;3", "soundness": "3;4;3;3", "novelty": "3;3;3;2", "presentation": "2;3;3;3", "wc_summary": "67;124;77;106", "wc_strengths": "50;51;164;37", "wc_weaknesses": "108;172;201;133", "wc_questions": "167;83;112;94", "wc_limitations": "3;15;7;1", "wc_review": "395;445;561;371", "wc_reply_reviewers": "16;24;34;109", "wc_reply_authors": "24;17;29;127", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 93.5, 22.699118925632334 ], "wc_strengths_avg": [ 75.5, 51.39309292113095 ], "wc_weaknesses_avg": [ 153.5, 35.668613654023616 ], "wc_questions_avg": [ 114.0, 32.30325061042619 ], "wc_limitations_avg": [ 6.5, 5.361902647381804 ], "wc_review_avg": [ 443.0, 73.17103251970687 ], "wc_reply_reviewers_avg": [ 45.75, 37.07003506877219 ], "wc_reply_authors_avg": [ 49.25, 45.090880452703516 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:OZXGqIc7Y_UJ:scholar.google.com/&scioq=How+many+classifiers+do+we+need%3F&hl=en&as_sdt=0,31", "gs_version_total": 3, "email": "berkeley.edu;unimelb.edu;;", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "University of California, Berkeley;University of Melbourne", "aff_unique_dep": ";", "aff_unique_url": "https://www.berkeley.edu;https://www.unimelb.edu.au", "aff_unique_abbr": "UC Berkeley;UniMelb", "aff_campus_unique_index": "0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;Australia" }, { "title": "Neglected Hessian component explains mysteries in sharpness regularization", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93774", "id": "m6pVpdIN0y", "proceeding": "", "pdf": "https://openreview.net/pdf?id=m6pVpdIN0y", "openreview": "https://openreview.net/forum?id=m6pVpdIN0y", "poster": "", "project": "", "author_site": "Yann Dauphin, Atish Agarwala, Hossein Mobahi", "tldr": "", "abstract": "Recent work has shown that methods that regularize second order information like SAM can improve generalization in deep learning. Seemingly similar methods like weight noise and gradient penalties often fail to provide such benefits. We investigate this inconsistency and reveal its connection to the the structure of the Hessian of the loss. Specifically, its decomposition into the positive semi-definite Gauss-Newton matrix and an indefinite matrix, which we call the Nonlinear Modeling Error (NME) matrix. Previous studies have largely overlooked the significance of the NME in their analysis for various reasons. However, we provide empirical and theoretical evidence that the NME is important to the performance of gradient penalties and explains their sensitivity to activation functions. We also provide evidence that the difference in regularization performance between gradient penalties and weight noise can be explained by the NME. Our findings emphasize the necessity of considering the NME in both experimental design and theoretical analysis for sharpness regularization.", "keywords": "sharpness;flatness;regularization", "primary_area": "other", "supplementary_material": "", "author": "Yann Dauphin;Atish Agarwala;Hossein Mobahi", "authorids": "~Yann_Dauphin1;~Atish_Agarwala1;~Hossein_Mobahi2", "gender": "M;;M", "homepage": "https://www.dauphin.io;;http://people.csail.mit.edu/hmobahi/", "dblp": "22/9988;265/6410.html;94/1490", "google_scholar": "XSforroAAAAJ;https://scholar.google.com/citations?hl=en;GSHmKZkAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Yann_Dauphin1;~Atish_Agarwala1;~Hossein_Mobahi1", "aff": "Google;Google;Google", "aff_domain": "google.com;google.com;google.com", "position": "Researcher;Researcher;Research Scientist", "bibtex": "@inproceedings{\ndauphin2024neglected,\ntitle={Neglected Hessian component explains mysteries in sharpness regularization},\nauthor={Yann Dauphin and Atish Agarwala and Hossein Mobahi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=m6pVpdIN0y}\n}", "github": "", "reviewers": "L6WX;PSYG;ZfRd;NVNp", "pdf_size": 1513997, "rating": "6;7;7;8", "confidence": "4;4;4;2", "soundness": "2;3;2;4", "novelty": "3;3;3;4", "presentation": "3;3;3;4", "wc_summary": "73;82;119;173", "wc_strengths": "67;46;73;106", "wc_weaknesses": "80;78;99;18", "wc_questions": "4;174;2;170", "wc_limitations": "7;1;43;21", "wc_review": "231;381;336;488", "wc_reply_reviewers": "22;24;32;10", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 111.75, 39.34065962843023 ], "wc_strengths_avg": [ 73.0, 21.529050141610984 ], "wc_weaknesses_avg": [ 68.75, 30.425112982534674 ], "wc_questions_avg": [ 87.5, 84.5147916047836 ], "wc_limitations_avg": [ 18.0, 16.15549442140351 ], "wc_review_avg": [ 359.0, 92.24695116913078 ], "wc_reply_reviewers_avg": [ 22.0, 7.874007874011811 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11515413139675986100&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "google.com;google.com;google.com", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "m772lelDe3", "title": "Consensus over matrix-weighted networks with time-delays", "track": "main", "status": "Reject", "tldr": "", "abstract": "This paper studies consensus conditions for leaderless and leader-follower matrix-weighted consensus networks under the presence of constant time-delays. Several delayed consensus algorithms for networks of single- and double-integrators using only the relative positions are considered. Conditions for the networks to asymptotically converge to a consensus or clustering configuration are derived based on direct eigenvalue evaluation or the Lyapunov-Krasovkii theorem. Furthermore, an application of these algorithms in bearing-based network localization is also considered. The theoretical results are supported by numerical simulations.", "keywords": "optimization and control theory; multi-agent systems; matrix-weighted networks; time-delays; network localization", "primary_area": "optimization", "supplementary_material": "", "author": "Pham Tung Lam;Trinh Hoang Minh;Nguyen Minh Hieu;Chuong Nguyen;Tuynh Van Pham", "authorids": "~Pham_Tung_Lam1;~Trinh_Hoang_Minh1;~Nguyen_Minh_Hieu2;~Chuong_Nguyen2;~Tuynh_Van_Pham1", "gender": "M;M;M;;M", "homepage": ";https://ieeexplore.ieee.org/author/37089454710;https://nguyenvchuong.github.io/?fbclid=IwAR0HESbWH4Dyj061mnFHjwI406ybH7WmP34_Xj8Fh8s4psafGQjqHK-l4q0;;https://sites.google.com/view/minhhoangtrinh", "dblp": ";;;;", "google_scholar": ";;https://scholar.google.co.kr/citations?user=eF7sD5IAAAAJ;;https://scholar.google.co.kr/citations?user=Zp50z80AAAAJ", "orcid": ";;0000-0002-8046-044X;0000-0002-1763-2317;0000-0001-5736-6693", "linkedin": "t%C3%B9ng-l%C3%A2m-ph%E1%BA%A1m-1b21222a9/;hieunm3538/;chuong-nguyen-2aa19040/;;", "or_profile": "~Pham_Tung_Lam1;~Nguyen_Minh_Hieu2;~Chuong_Nguyen2;~Tuynh_Van_Pham1;~Minh_Hoang_Trinh1", "aff": ";;University of Southern California;Hanoi University of Science and Technology;FPT University", "aff_domain": ";;usc.edu;hust.edu.vn;fpt.edu.vn", "position": ";;PhD student;Lecturer;Lecturer", "bibtex": "@misc{\nanonymous2024consensus,\ntitle={Consensus over matrix-weighted networks with time-delays},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=m772lelDe3}\n}", "github": "", "project": "", "reviewers": "L9gF;9VrG;BoCp", "site": "https://openreview.net/forum?id=m772lelDe3", "pdf_size": 4145151, "rating": "3;4;4", "confidence": "4;4;4", "soundness": "3;3;2", "novelty": "2;2;2", "presentation": "3;2;3", "wc_summary": "72;91;85", "wc_strengths": "42;40;54", "wc_weaknesses": "153;332;137", "wc_questions": "7;94;190", "wc_limitations": "1;77;11", "wc_review": "275;634;477", "wc_reply_reviewers": "50;260;266", "wc_reply_authors": "0;287;416", "reply_reviewers": "1;1;1", "reply_authors": "1;2;2", "rating_avg": [ 3.6666666666666665, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 82.66666666666667, 7.93025150224688 ], "wc_strengths_avg": [ 45.333333333333336, 6.182412330330469 ], "wc_weaknesses_avg": [ 207.33333333333334, 88.39431857057079 ], "wc_questions_avg": [ 97.0, 74.73954776421918 ], "wc_limitations_avg": [ 29.666666666666668, 33.717782977071444 ], "wc_review_avg": [ 462.0, 146.94443394244868 ], "wc_reply_reviewers_avg": [ 192.0, 100.43903623591775 ], "wc_reply_authors_avg": [ 234.33333333333334, 173.86648772996926 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:6Da2Xt7SAgkJ:scholar.google.com/&scioq=Consensus+over+matrix-weighted+networks+with+time-delays&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Southern California;Hanoi University of Science and Technology;FPT University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.usc.edu;https://www.hust.edu.vn;https://www.fpt.edu.vn", "aff_unique_abbr": "USC;HUST;FPT", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Los Angeles;Hanoi;", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United States;Vietnam" }, { "title": "Fetch and Forge: Efficient Dataset Condensation for Object Detection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93773", "id": "m8MElyzuwp", "proceeding": "", "pdf": "https://openreview.net/pdf?id=m8MElyzuwp", "openreview": "https://openreview.net/forum?id=m8MElyzuwp", "poster": "/media/PosterPDFs/NeurIPS%202024/93773.png?t=1731334072.9391985", "project": "", "author_site": "Ding Qi, Jian Li, Jinlong Peng, Bo Zhao, Shuguang Dou, Jialin Li, Jiangning Zhang, Yabiao Wang, Chengjie Wang, Cairong Zhao", "tldr": "", "abstract": "Dataset condensation (DC) is an emerging technique capable of creating compact synthetic datasets from large originals while maintaining considerable performance. It is crucial for accelerating network training and reducing data storage requirements. \nHowever, current research on DC mainly focuses on image classification, with less exploration of object detection.\nThis is primarily due to two challenges: (i) the multitasking nature of object detection complicates the condensation process, and (ii) Object detection datasets are characterized by large-scale and high-resolution data, which are difficult for existing DC methods to handle.\nAs a remedy, we propose DCOD, the first dataset condensation framework for object detection. It operates in two stages: Fetch and Forge, initially storing key localization and classification information into model parameters, and then reconstructing synthetic images via model inversion. \nFor the complex of multiple objects in an image, we propose Foreground Background Decoupling to centrally update the foreground of multiple instances and Incremental PatchExpand to further enhance the diversity of foregrounds.\nExtensive experiments on various detection datasets demonstrate the superiority of DCOD. Even at an extremely low compression rate of 1\\%, we achieve 46.4\\% and 24.7\\% $\\text{AP}_{50}$ on the VOC and COCO, respectively, significantly reducing detector training duration.", "keywords": "Dataset Condensation;Object Detection", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Ding Qi;Jian Li;Jinlong Peng;Bo Zhao;Shuguang Dou;Jialin Li;Jiangning Zhang;Yabiao Wang;Chengjie Wang;Cairong Zhao", "authorids": "~Ding_Qi1;~Jian_Li12;~Jinlong_Peng1;~Bo_Zhao4;~Shuguang_Dou1;~Jialin_Li3;~Jiangning_Zhang1;~Yabiao_Wang1;~Chengjie_Wang1;~Cairong_Zhao2", "gender": ";M;M;M;M;M;M;;M;", "homepage": ";https://swordlidev.github.io/;https://www.linkedin.com/in/jinlong-peng-893a12125/;;https://shuguang-52.github.io/;https://li-jl16.github.io/;https://www.researchgate.net/profile/Jiangning_Zhang2;;;", "dblp": "75/3714;33/5448-62;215/9880;;224/4114;;241/9593;;;", "google_scholar": ";ACb5C40AAAAJ;;R3_AR5EAAAAJ;https://scholar.google.com.hk/citations?user=n2YT06EAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.hk/citations?user=2hA4X9wAAAAJ;;fqte5H4AAAAJ;", "orcid": "0000-0002-6754-4817;0000-0002-0242-6481;;;;;;;0000-0003-4216-8090;", "linkedin": ";;jinlong-peng-893a12125/;;;;;;;", "or_profile": "~Ding_Qi1;~Jian_Li12;~Jinlong_Peng1;~Bo_Zhao4;~Shuguang_Dou1;~Jialin_Li3;~Jiangning_Zhang1;~Yabiao_Wang1;~Chengjie_Wang1;~Cairong_Zhao2", "aff": "Tongji University;Tencent Youtu;Tencent Youtu Lab;BAAI;Tongji University;Tencent YouTu Lab;Tencent Youtu Lab;;Tencent YouTu Lab;", "aff_domain": "tongji.edu.cn;tencent.com;tencent.com;baai.ac.cn;tongji.edu.cn;tencent.com;tencent.com;;tencent.com;", "position": "PhD student;Researcher;Researcher;Principal Researcher;PhD student;Researcher;Principal Researcher;;Researcher;", "bibtex": "@inproceedings{\nqi2024fetch,\ntitle={Fetch and Forge: Efficient Dataset Condensation for Object Detection},\nauthor={Ding Qi and Jian Li and Jinlong Peng and Bo Zhao and Shuguang Dou and Jialin Li and Jiangning Zhang and Yabiao Wang and Chengjie Wang and Cairong Zhao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=m8MElyzuwp}\n}", "github": "", "reviewers": "SZRX;8mRE;z3Af;8kYh", "pdf_size": 12691176, "rating": "5;6;6;7", "confidence": "4;5;4;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "62;113;158;54", "wc_strengths": "47;133;41;29", "wc_weaknesses": "17;271;356;72", "wc_questions": "151;66;101;3", "wc_limitations": "47;1;10;3", "wc_review": "324;584;666;161", "wc_reply_reviewers": "0;37;96;117", "wc_reply_authors": "47;35;24;315", "reply_reviewers": "0;1;1;3", "reply_authors": "2;2;2;4", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 96.75, 41.984372092482225 ], "wc_strengths_avg": [ 62.5, 41.21589499210226 ], "wc_weaknesses_avg": [ 179.0, 139.18153613177287 ], "wc_questions_avg": [ 80.25, 53.867313836871425 ], "wc_limitations_avg": [ 15.25, 18.632968094214082 ], "wc_review_avg": [ 433.75, 201.83703203327184 ], "wc_reply_reviewers_avg": [ 62.5, 46.5 ], "wc_reply_authors_avg": [ 105.25, 121.37210346698289 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4601413935976911074&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "tongji.edu.cn;tencent.com;tencent.com;baai.ac.cn;tongji.edu.cn;tencent.com;tencent.com;;tencent.com;", "author_num": 10, "aff_unique_index": "0;1;1;2;0;1;1;1", "aff_unique_norm": "Tongji University;Tencent;Beijing Academy of Artificial Intelligence", "aff_unique_dep": ";Tencent Youtu;", "aff_unique_url": "https://www.tongji.edu.cn;https://www.tencent.com;https://www.baaic.cn", "aff_unique_abbr": "Tongji;Tencent;BAAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Bayesian Adaptive Calibration and Optimal Design", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93772", "id": "m906PS5G9x", "proceeding": "", "pdf": "https://openreview.net/pdf?id=m906PS5G9x", "openreview": "https://openreview.net/forum?id=m906PS5G9x", "poster": "/media/PosterPDFs/NeurIPS%202024/93772.png?t=1733913872.5854695", "project": "", "author_site": "Rafael Oliveira, Dino Sejdinovic, David Howard, Edwin Bonilla", "tldr": "", "abstract": "The process of calibrating computer models of natural phenomena is essential for applications in the physical sciences, where plenty of domain knowledge can be embedded into simulations and then calibrated against real observations. Current machine learning approaches, however, mostly rely on rerunning simulations over a fixed set of designs available in the observed data, potentially neglecting informative correlations across the design space and requiring a large amount of simulations. Instead, we consider the calibration process from the perspective of Bayesian adaptive experimental design and propose a data-efficient algorithm to run maximally informative simulations within a batch-sequential process. At each round, the algorithm jointly estimates the parameters posterior distribution and optimal designs by maximising a variational lower bound of the expected information gain. The simulator is modelled as a sample from a Gaussian process, which allows us to correlate simulations and real data with the unknown calibration parameters. We show the benefits of our method when compared to related approaches across synthetic and real-data problems.", "keywords": "Gaussian processes;Bayesian inference;variational inference;experimental design;active learning;calibration", "primary_area": "active_learning", "supplementary_material": "", "author": "Rafael Oliveira;Dino Sejdinovic;David Howard;Edwin V. Bonilla", "authorids": "~Rafael_Oliveira1;~Dino_Sejdinovic1;~David_Howard1;~Edwin_V._Bonilla1", "gender": "M;M;M;M", "homepage": "https://rafaeloliveira.me;https://sejdino.github.io/;;http://ebonilla.github.io/", "dblp": "62/7803;31/1783;;23/1754", "google_scholar": "vdGqGjQAAAAJ;v8Dg1lIAAAAJ;https://scholar.google.com.au/citations?user=euOvnfFnvo8C;https://scholar.google.com.au/citations?user=uDLRZQMAAAAJ", "orcid": "0000-0002-3586-5026;0000-0001-5547-9213;;0000-0002-9904-2408", "linkedin": "rafael-dos-santos-de-oliveira-34569251/;https://linkedin.com/in/dinosejdinovic;;", "or_profile": "~Rafael_Oliveira1;~Dino_Sejdinovic1;~David_Howard1;~Edwin_V_Bonilla1", "aff": "CSIRO;University of Adelaide;CSIRO;CSIRO's Data61", "aff_domain": "csiro.au;adelaide.edu.au;csiro.au;data61.csiro.au", "position": "Researcher;Full Professor;Lecturer;Principal Research Scientist", "bibtex": "@inproceedings{\noliveira2024bayesian,\ntitle={Bayesian Adaptive Calibration and Optimal Design},\nauthor={Rafael Oliveira and Dino Sejdinovic and David Howard and Edwin V. Bonilla},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=m906PS5G9x}\n}", "github": "", "reviewers": "bPdt;bRKR;CjgP;Z11Z", "pdf_size": 16962028, "rating": "3;5;7;7", "confidence": "4;4;3;3", "soundness": "3;2;4;3", "novelty": "2;2;3;2", "presentation": "3;2;3;3", "wc_summary": "51;102;81;69", "wc_strengths": "44;42;87;88", "wc_weaknesses": "115;38;83;162", "wc_questions": "136;48;4;126", "wc_limitations": "67;12;7;7", "wc_review": "413;242;262;452", "wc_reply_reviewers": "48;18;18;73", "wc_reply_authors": "209;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.5, 1.6583123951777 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 75.75, 18.538810641462412 ], "wc_strengths_avg": [ 65.25, 22.26404051379713 ], "wc_weaknesses_avg": [ 99.5, 45.27968639467372 ], "wc_questions_avg": [ 78.5, 54.87030162118666 ], "wc_limitations_avg": [ 23.25, 25.341418665891617 ], "wc_review_avg": [ 342.25, 91.57066943077352 ], "wc_reply_reviewers_avg": [ 39.25, 23.01494079940246 ], "wc_reply_authors_avg": [ 52.25, 90.49965469547384 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.9045340337332909, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:hA-hNtqaHM0J:scholar.google.com/&scioq=Bayesian+Adaptive+Calibration+and+Optimal+Design&hl=en&as_sdt=0,44", "gs_version_total": 4, "email": "csiro.au;adelaide.edu.au;csiro.au;data61.csiro.au", "author_num": 4, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Commonwealth Scientific and Industrial Research Organisation;University of Adelaide;CSIRO", "aff_unique_dep": ";;Data61", "aff_unique_url": "https://www.csiro.au;https://www.adelaide.edu.au;https://www.csiro.au", "aff_unique_abbr": "CSIRO;Adelaide;CSIRO", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Australia" }, { "title": "Directional Smoothness and Gradient Methods: Convergence and Adaptivity", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93771", "id": "m9WZrEXWl5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=m9WZrEXWl5", "openreview": "https://openreview.net/forum?id=m9WZrEXWl5", "poster": "/media/PosterPDFs/NeurIPS%202024/93771.png?t=1732790909.0779786", "project": "", "author_site": "Aaron Mishkin, Ahmed Khaled, Yuanhao Wang, Aaron Defazio, Robert Gower", "tldr": "", "abstract": "We develop new sub-optimality bounds for gradient descent (GD) that depend on the conditioning of the objective along the path of optimization, rather than on global, worst-case constants. Key to our proofs is directional smoothness, a measure of gradient variation that we use to develop upper-bounds on the objective. Minimizing these upper-bounds requires solving implicit equations to obtain a sequence of strongly adapted step-sizes; we show that these equations are straightforward to solve for convex quadratics and lead to new guarantees for two classical step-sizes. For general functions, we prove that the Polyak step-size and normalized GD obtain fast, path-dependent rates despite using no knowledge of the directional smoothness. Experiments on logistic regression show our convergence guarantees are tighter than the classical theory based on $L$-smoothness.", "keywords": "directional smoothness;gradient descent;exponential search;polyak stepsize;normalized gradient descent", "primary_area": "optimization", "supplementary_material": "", "author": "Aaron Mishkin;Ahmed Khaled;Yuanhao Wang;Aaron Defazio;Robert M. Gower", "authorids": "~Aaron_Mishkin1;~Ahmed_Khaled1;~Yuanhao_Wang1;~Aaron_Defazio1;~Robert_M._Gower1", "gender": "M;M;;M;M", "homepage": "https://www.cs.stanford.edu/~amishkin/;https://www.akhaled.net;;https://www.aarondefazio.com/;https://gowerrobert.github.io/", "dblp": "230/3809;154/3591-1;;116/2969;143/0056", "google_scholar": "j7qgASIAAAAJ;Bc3wOdsAAAAJ;yj2b7pgAAAAJ;KEzJsdkAAAAJ;okKw87MAAAAJ", "orcid": "0000-0002-5072-2314;;;;", "linkedin": ";;;;", "or_profile": "~Aaron_Mishkin1;~Ahmed_Khaled1;~Yuanhao_Wang1;~Aaron_Defazio1;~Robert_M._Gower1", "aff": "INRIA;Princeton University;Princeton University;Meta;Flatiron Institute", "aff_domain": "inria.fr;princeton.edu;princeton.edu;meta.com;simonsfoundation.org", "position": "Intern;PhD student;PhD student;Research Scientist;Researcher", "bibtex": "@inproceedings{\nmishkin2024directional,\ntitle={Directional Smoothness and Gradient Methods: Convergence and Adaptivity},\nauthor={Aaron Mishkin and Ahmed Khaled and Yuanhao Wang and Aaron Defazio and Robert M. Gower},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=m9WZrEXWl5}\n}", "github": "", "reviewers": "Z7fB;vUdK;ETZ9", "pdf_size": 1475257, "rating": "5;6;6", "confidence": "3;4;3", "soundness": "3;3;4", "novelty": "2;2;3", "presentation": "3;3;4", "wc_summary": "90;76;118", "wc_strengths": "26;99;81", "wc_weaknesses": "194;201;51", "wc_questions": "83;692;73", "wc_limitations": "2;19;6", "wc_review": "395;1087;329", "wc_reply_reviewers": "61;855;18", "wc_reply_authors": "35;1456;0", "reply_reviewers": "1;4;1", "reply_authors": "2;5;1", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 94.66666666666667, 17.46106780494506 ], "wc_strengths_avg": [ 68.66666666666667, 31.051927834229907 ], "wc_weaknesses_avg": [ 148.66666666666666, 69.11986368299316 ], "wc_questions_avg": [ 282.6666666666667, 289.4711653266272 ], "wc_limitations_avg": [ 9.0, 7.2571803523590805 ], "wc_review_avg": [ 603.6666666666666, 342.8287554385652 ], "wc_reply_reviewers_avg": [ 311.3333333333333, 384.83098743330027 ], "wc_reply_authors_avg": [ 497.0, 678.2659262167507 ], "reply_reviewers_avg": [ 2.0, 1.4142135623730951 ], "reply_authors_avg": [ 2.6666666666666665, 1.699673171197595 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=618435353652349238&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 4, "email": "inria.fr;princeton.edu;princeton.edu;meta.com;simonsfoundation.org", "author_num": 5, "aff_unique_index": "0;1;1;2;3", "aff_unique_norm": "INRIA;Princeton University;Meta;Flatiron Institute", "aff_unique_dep": ";;Meta Platforms, Inc.;", "aff_unique_url": "https://www.inria.fr;https://www.princeton.edu;https://meta.com;https://flatironinstitute.org", "aff_unique_abbr": "INRIA;Princeton;Meta;Flatiron", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1", "aff_country_unique": "France;United States" }, { "id": "mAG68wdggA", "title": "RedCode: Risky Code Execution and Generation Benchmark for Code Agents", "track": "Datasets & Benchmarks", "status": "Poster", "tldr": "", "abstract": "With the rapidly increasing capabilities and adoption of code agents for AI-assisted coding and software development, safety and security concerns, such as generating or executing malicious code, have become significant barriers to the real-world deployment of these agents. To provide comprehensive and practical evaluations on the safety of code agents, we propose RedCode, an evaluation platform with benchmarks grounded in four key principles: real interaction with systems, holistic evaluation of unsafe code generation and execution, diverse input formats, and high-quality safety scenarios and tests. RedCode consists of two parts to evaluate agents\u2019 safety in unsafe code execution and generation: (1) RedCode-Exec provides challenging code prompts in Python as inputs, aiming to evaluate code agents\u2019 ability to recognize and handle unsafe code. We then map the Python code to other programming languages (e.g., Bash) and natural text summaries or descriptions for evaluation, leading to a total of over 4,000 testing instances. We provide 25 types of critical vulnerabilities spanning various domains, such as websites, file systems, and operating systems. We provide a Docker sandbox environment to evaluate the execution capabilities of code agents and design corresponding evaluation metrics to assess their execution results. (2) RedCode-Gen provides 160 prompts with function signatures and docstrings as input to assess whether code agents will follow instructions to generate harmful code or software. Our empirical findings, derived from evaluating three agent frameworks based on 19 LLMs, provide insights into code agents\u2019 vulnerabilities. For instance, evaluations on RedCode-Exec show that agents are more likely to reject executing unsafe operations on the operating system, but are less likely to reject executing technically buggy code, indicating high risks. Unsafe operations described in natural text lead to a lower rejection rate than those in code format. Additionally, evaluations on RedCode-Gen reveal that more capable base models and agents with stronger overall coding abilities, such as GPT4, tend to produce more sophisticated and effective harmful software. Our findings highlight the need for stringent safety evaluations for diverse code agents. Our dataset and code are publicly available at https://github.com/AI-secure/RedCode.", "keywords": "Code Agents;LLM;Safety benchmark;Trustworthy machine learning", "primary_area": "", "supplementary_material": "", "author": "Chengquan Guo;Xun Liu;Chulin Xie;Andy Zhou;Yi Zeng;Zinan Lin;Dawn Song;Bo Li", "authorids": "~Chengquan_Guo1;~Xun_Liu6;~Chulin_Xie1;~Andy_Zhou2;~Yi_Zeng3;~Zinan_Lin1;~Dawn_Song1;~Bo_Li19", "gender": "M;M;F;M;M;M;F;F", "homepage": "https://github.com/1mocat;https://antiquality.github.io;;https://www.andyzhou.ai;https://yizeng623.github.io/;https://zinanlin.me/;;http://boli.cs.illinois.edu/", "dblp": ";;245/4284;;75/148;64/237-1;s/DXSong;50/3402-26", "google_scholar": ";;WeJnzAgAAAAJ;https://scholar.google.com/citations?hl=en;slUNmHQAAAAJ;67nE-wQ_g_cC;;K8vJkTcAAAAJ", "orcid": ";;;;0000-0002-6901-9194;;;", "linkedin": ";;;andy-zhou-679376206/;chnyizeng/;;;", "or_profile": "~Chengquan_Guo1;~Xun_Liu6;~Chulin_Xie1;~Andy_Zhou2;~Yi_Zeng3;~Zinan_Lin1;~Dawn_Song1;~Bo_Li19", "aff": "Zhejiang University;University of Chinese Academy of Sciences;University of Illinois, Urbana Champaign;Department of Computer Science;Virginia Tech;Microsoft;University of California, Berkeley;University of Illinois, Urbana Champaign", "aff_domain": "zju.edu.cn;ucas.ac.cn;illinois.edu;cs.illinois.edu;vt.edu;microsoft.com;berkeley.edu;illinois.edu", "position": "Undergrad student;Undergrad student;PhD student;Undergrad student;PhD student;Senior Researcher;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nguo2024redcode,\ntitle={RedCode: Risky Code Execution and Generation Benchmark for Code Agents},\nauthor={Chengquan Guo and Xun Liu and Chulin Xie and Andy Zhou and Yi Zeng and Zinan Lin and Dawn Song and Bo Li},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=mAG68wdggA}\n}", "github": "", "project": "", "reviewers": "6LJS;JteB;EFxd", "site": "https://openreview.net/forum?id=mAG68wdggA", "pdf_size": 4972903, "rating": "6;7;8", "confidence": "3;3;4", "wc_summary_and_contributions": "135;93;97", "wc_strengths": "5;96;83", "wc_improvement": "26;213;60", "wc_limitations": "5;39;41", "wc_correctness": "4;36;4", "wc_clarity": "1;56;8", "wc_relation_to_prior_work": "1;18;14", "wc_documentation": "3;41;15", "wc_additional_feedback": "1;1;1", "wc_review": "181;593;323", "wc_reply_reviewers": "0;32;14", "wc_reply_authors": "0;22;19", "reply_reviewers": "0;1;1", "reply_authors": "1;2;2", "rating_avg": [ 7.0, 0.816496580927726 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 108.33333333333333, 18.926759422104517 ], "wc_strengths_avg": [ 61.333333333333336, 40.18568014714805 ], "wc_improvement_avg": [ 99.66666666666667, 81.3319672016415 ], "wc_limitations_avg": [ 28.333333333333332, 16.519348924485154 ], "wc_correctness_avg": [ 14.666666666666666, 15.084944665313014 ], "wc_clarity_avg": [ 21.666666666666668, 24.444949489732135 ], "wc_relation_to_prior_work_avg": [ 11.0, 7.2571803523590805 ], "wc_documentation_avg": [ 19.666666666666668, 15.86050300449376 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 365.6666666666667, 170.8826758009392 ], "wc_reply_reviewers_avg": [ 15.333333333333334, 13.097921802925667 ], "wc_reply_authors_avg": [ 13.666666666666666, 9.741092797468305 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7212083542053176097&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;2;3;4;5;6;2", "aff_unique_norm": "Zhejiang University;University of Chinese Academy of Sciences;University of Illinois Urbana-Champaign;Unknown Institution;Virginia Tech;Microsoft;University of California, Berkeley", "aff_unique_dep": ";;;Department of Computer Science;;Microsoft Corporation;", "aff_unique_url": "https://www.zju.edu.cn;http://www.ucas.ac.cn;https://illinois.edu;;https://www.vt.edu;https://www.microsoft.com;https://www.berkeley.edu", "aff_unique_abbr": "ZJU;UCAS;UIUC;;VT;Microsoft;UC Berkeley", "aff_campus_unique_index": "1;2;1", "aff_campus_unique": ";Urbana-Champaign;Berkeley", "aff_country_unique_index": "0;0;1;1;1;1;1", "aff_country_unique": "China;United States;" }, { "title": "START: A Generalized State Space Model with Saliency-Driven Token-Aware Transformation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93769", "id": "mAdGQ1Hh3L", "proceeding": "", "pdf": "https://openreview.net/pdf?id=mAdGQ1Hh3L", "openreview": "https://openreview.net/forum?id=mAdGQ1Hh3L", "poster": "/media/PosterPDFs/NeurIPS%202024/93769.png?t=1731408091.5533986", "project": "", "author_site": "Jintao Guo, Lei Qi, Yinghuan Shi, Yang Gao", "tldr": "", "abstract": "Domain Generalization (DG) aims to enable models to generalize to unseen target domains by learning from multiple source domains. Existing DG methods primarily rely on convolutional neural networks (CNNs), which inherently learn texture biases due to their limited receptive fields, making them prone to overfitting source domains. While some works have introduced transformer-based methods (ViTs) for DG to leverage the global receptive field, these methods incur high computational costs due to the quadratic complexity of self-attention. Recently, advanced state space models (SSMs), represented by Mamba, have shown promising results in supervised learning tasks by achieving linear complexity in sequence length during training and fast RNN-like computation during inference. Inspired by this, we investigate the generalization ability of the Mamba model under domain shifts and find that input-dependent matrices within SSMs could accumulate and amplify domain-specific features, thus hindering model generalization. To address this issue, we propose a novel SSM-based architecture with saliency-based token-aware transformation (namely START), which achieves state-of-the-art (SOTA) performances and offers a competitive alternative to CNNs and ViTs. Our START can selectively perturb and suppress domain-specific features in salient tokens within the input-dependent matrices of SSMs, thus effectively reducing the discrepancy between different domains. Extensive experiments on five benchmarks demonstrate that START outperforms existing SOTA DG methods with efficient linear complexity. Our code is available at https://github.com/lingeringlight/START.", "keywords": "Domain generalization;state space models", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/ce883f1aa347d3f4ebb234dc743b97b4275be824.zip", "author": "Jintao Guo;Lei Qi;Yinghuan Shi;Yang Gao", "authorids": "~Jintao_Guo1;~Lei_Qi1;~Yinghuan_Shi3;~Yang_Gao3", "gender": "M;M;M;M", "homepage": "https://lingeringlight.github.io/;http://palm.seu.edu.cn/qilei/;https://cs.nju.edu.cn/shiyh/;https://cs.nju.edu.cn/gaoyang/", "dblp": "239/9938;15/2464-1;30/7184;89/4402-1", "google_scholar": "K4lrdKc_YLUC;7mm8iZwAAAAJ;m6BKDUMAAAAJ;https://scholar.google.com.tw/citations?user=CJwLwzQAAAAJ", "orcid": "0000-0003-2256-9233;0000-0001-7091-0702;;", "linkedin": ";;;", "or_profile": "~Jintao_Guo1;~Lei_Qi1;~Yinghuan_Shi3;~Yang_Gao3", "aff": "Nanjing University;Southeast University;Nanjing University;Nanjing University", "aff_domain": "nju.edu.cn;seu.edu.cn;nju.edu.cn;nju.edu.cn", "position": "PhD student;Associate Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nguo2024start,\ntitle={{START}: A Generalized State Space Model with Saliency-Driven Token-Aware Transformation},\nauthor={Jintao Guo and Lei Qi and Yinghuan Shi and Yang Gao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=mAdGQ1Hh3L}\n}", "github": "", "reviewers": "4RVV;3dvs;EPCk;qvXD;AMLN", "pdf_size": 2275880, "rating": "5;5;5;6;6", "confidence": "4;4;3;3;4", "soundness": "4;2;3;3;3", "novelty": "3;2;2;3;3", "presentation": "4;2;2;3;3", "wc_summary": "115;62;79;112;186", "wc_strengths": "50;26;104;174;58", "wc_weaknesses": "228;62;266;11;200", "wc_questions": "60;17;117;5;3", "wc_limitations": "23;8;17;5;7", "wc_review": "476;175;583;307;454", "wc_reply_reviewers": "25;16;74;0;66", "wc_reply_authors": "0;0;491;0;0", "reply_reviewers": "1;1;2;0;1", "reply_authors": "1;1;2;1;1", "rating_avg": [ 5.4, 0.48989794855663565 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 110.8, 42.583564904784566 ], "wc_strengths_avg": [ 82.4, 52.31290471766981 ], "wc_weaknesses_avg": [ 153.4, 99.04261708981643 ], "wc_questions_avg": [ 40.4, 43.476890413183874 ], "wc_limitations_avg": [ 12.0, 6.870225614927067 ], "wc_review_avg": [ 399.0, 142.44297104455524 ], "wc_reply_reviewers_avg": [ 36.2, 28.847183571364468 ], "wc_reply_authors_avg": [ 98.2, 196.4 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.16666666666666669, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11176405007861542029&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "nju.edu.cn;seu.edu.cn;nju.edu.cn;nju.edu.cn", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Nanjing University;Southeast University", "aff_unique_dep": ";", "aff_unique_url": "https://www.nju.edu.cn;https://www.seu.edu.cn/", "aff_unique_abbr": "Nanjing U;SEU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Exact Gradients for Stochastic Spiking Neural Networks Driven by Rough Signals", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93768", "id": "mCWZj7pa0M", "proceeding": "", "pdf": "https://openreview.net/pdf?id=mCWZj7pa0M", "openreview": "https://openreview.net/forum?id=mCWZj7pa0M", "poster": "/media/PosterPDFs/NeurIPS%202024/93768.png?t=1733562985.4400332", "project": "", "author_site": "Christian Holberg, Cristopher Salvi", "tldr": "", "abstract": "We introduce a mathematically rigorous framework based on rough path theory to model stochastic spiking neural networks (SSNNs) as stochastic differential equations with event discontinuities (Event SDEs) and driven by c\u00e0dl\u00e0g rough paths. Our formalism is general enough to allow for potential jumps to be present both in the solution trajectories as well as in the driving noise. We then identify a set of sufficient conditions ensuring the existence of pathwise gradients of solution trajectories and event times with respect to the network's parameters and show how these gradients satisfy a recursive relation. Furthermore, we introduce a general-purpose loss function defined by means of a new class of signature kernels indexed on c\u00e0dl\u00e0g rough paths and use it to train SSNNs as generative models. We provide an end-to-end autodifferentiable solver for Event SDEs and make its implementation available as part of the $\\texttt{diffrax}$ library. Our framework is, to our knowledge, the first enabling gradient-based training of SSNNs with noise affecting both the spike timing and the network's dynamics.", "keywords": "stochastic differential equations;spiking neural networks;backpropagation;rough paths;signatures", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "/attachment/9e08e5f309e846752db3ca8580f4ff48f9d37ef6.zip", "author": "Christian Holberg;Cristopher Salvi", "authorids": "~Christian_Holberg1;~Cristopher_Salvi1", "gender": "M;M", "homepage": "https://github.com/cholberg;https://www.maths.ox.ac.uk/people/cristopher.salvi", "dblp": ";", "google_scholar": "TRN3Ot0AAAAJ;FVxJ4iIAAAAJ", "orcid": "0000-0003-1603-1170;", "linkedin": ";cristopher-salvi/", "or_profile": "~Christian_Holberg1;~Cristopher_Salvi1", "aff": "University of Copenhagen;Imperial College London", "aff_domain": "math.ku.dk;ic.ac.uk", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nholberg2024exact,\ntitle={Exact Gradients for Stochastic Spiking Neural Networks Driven by Rough Signals},\nauthor={Christian Holberg and Cristopher Salvi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=mCWZj7pa0M}\n}", "github": "", "reviewers": "ywki;sP65;NV97;2eDR", "pdf_size": 824669, "rating": "6;6;6;7", "confidence": "2;2;2;4", "soundness": "4;3;3;3", "novelty": "3;3;3;3", "presentation": "2;3;3;3", "wc_summary": "53;72;70;121", "wc_strengths": "192;65;79;78", "wc_weaknesses": "147;16;55;108", "wc_questions": "157;162;9;203", "wc_limitations": "29;7;7;11", "wc_review": "578;322;220;521", "wc_reply_reviewers": "9;33;18;35", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 2.5, 0.8660254037844386 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 79.0, 25.347583711273153 ], "wc_strengths_avg": [ 103.5, 51.39309292113095 ], "wc_weaknesses_avg": [ 81.5, 49.96248592694322 ], "wc_questions_avg": [ 132.75, 73.64229423368069 ], "wc_limitations_avg": [ 13.5, 9.096702699330127 ], "wc_review_avg": [ 410.25, 145.24870911646684 ], "wc_reply_reviewers_avg": [ 23.75, 10.755812382149477 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5659860939972569458&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "math.ku.dk;ic.ac.uk", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "University of Copenhagen;Imperial College London", "aff_unique_dep": ";", "aff_unique_url": "https://www.ku.dk;https://www.imperial.ac.uk", "aff_unique_abbr": "UCPH;ICL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Denmark;United Kingdom" }, { "id": "mDRmX8IlBI", "title": "MMWorld: Towards Multi-discipline Multi-faceted World Model Evaluation in Videos", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "Multimodal Language Language Models (MLLMs) demonstrate the emerging abilities of \"world models\"---interpreting and reasoning about complex real-world dynamics. \nTo assess these abilities, we posit videos are the ideal medium, as they encapsulate rich representations of real-world dynamics and causalities.\nTo this end, we introduce MMWorld, a new benchmark for multi-discipline, multi-faceted multimodal video understanding.\nMMWorld distinguishes itself from previous video understanding benchmarks with two unique advantages: (1) multi-discipline, covering various disciplines that often require domain expertise for comprehensive understanding; (2) multi-faceted reasoning, including explanation, counterfactual thinking, future prediction, etc.\nMMWorld consists of a human-annotated dataset to evaluate MLLMs with questions about the whole videos and a synthetic dataset to analyze MLLMs within a single modality of perception. \nTogether, MMWorld encompasses 1,910 videos across seven broad disciplines and 69 subdisciplines, complete with 6,627 question-answer pairs and associated captions. \nThe evaluation includes 2 proprietary and 10 open-source MLLMs, which struggle on MMWorld (e.g., GPT-4V performs the best with only 52.3\\% accuracy), showing large room for improvement. Further ablation studies reveal other interesting findings such as models' different skill sets from humans. We hope MMWorld can serve as an essential step towards world model evaluation in videos.", "keywords": "Video Understanding;Benchmark", "primary_area": "", "supplementary_material": "/attachment/411324ab5a3f5562c953684001d61431ea6162b0.pdf", "author": "Xuehai He;Weixi Feng;Kaizhi Zheng;Yujie Lu;Wanrong Zhu;Jiachen Li;Yue Fan;Jianfeng Wang;Linjie Li;Zhengyuan Yang;Kevin Lin;William Yang Wang;Lijuan Wang;Xin Eric Wang", "authorids": "~Xuehai_He1;~Weixi_Feng2;~Kaizhi_Zheng1;~Yujie_Lu1;~Wanrong_Zhu1;~Jiachen_Li6;~Yue_Fan3;~Jianfeng_Wang4;~Linjie_Li1;~Zhengyuan_Yang1;~Kevin_Lin3;~William_Yang_Wang2;~Lijuan_Wang1;~Xin_Eric_Wang2", "gender": "M;M;M;;;M;M;M;F;M;;;F;M", "homepage": ";https://weixi-feng.github.io/;https://kzzheng.github.io/;https://yujielu10.github.io/;;https://sites.google.com/view/jiachenli/;http://www.yfan.site;;;http://zhengyuan.info/;https://sites.google.com/site/kevinlin311tw/;;https://www.microsoft.com/en-us/research/people/lijuanw/;https://eric-xw.github.io", "dblp": "251/0763;322/1026;;;;;;;200/8256;163/9713;;;51/2527.html;10/5630-61", "google_scholar": "kDzxOzUAAAAJ;https://scholar.google.com/citations?hl=en;jLa6wpUAAAAJ;pcmr6GMAAAAJ;;https://scholar.google.com/citations?hl=en;;vJWEw_8AAAAJ;WR875gYAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.tw/citations?user=LKSy1kwAAAAJ;;cDcWXuIAAAAJ;YjqluE0AAAAJ", "orcid": ";0000-0002-7201-5688;0000-0003-3534-3487;;;;;;;;0000-0001-8944-1336;;;0000-0003-2605-5504", "linkedin": ";weixifeng/;;;;;;;;;;;;", "or_profile": "~Xuehai_He1;~Weixi_Feng2;~Kaizhi_Zheng1;~Yujie_Lu1;~Wanrong_Zhu1;~Jiachen_Li6;~Yue_Fan3;~Jianfeng_Wang4;~Linjie_Li1;~Zhengyuan_Yang1;~Kevin_Lin3;~William_Yang_Wang2;~Lijuan_Wang1;~Xin_Eric_Wang2", "aff": "University of California Santa Curz;University of California, Santa Barbara;University of California, Santa Cruz;UC Santa Barbara;;University of California, Santa Barbara;University of California, Santa Cruz;Microsoft;Microsoft;Microsoft;Microsoft;;Microsoft;University of California, Santa Cruz", "aff_domain": "ucsc.edu;ucsb.edu;ucsc.edu;ucsb.edu;;ucsb.edu;ucsc.edu;microsoft.com;microsoft.com;microsoft.com;microsoft.com;;microsoft.com;ucsc.edu", "position": "PhD student;PhD student;PhD student;PhD student;;PhD student;PhD student;Principal Researcher;Researcher;Researcher;Principal Researcher;;Principal Researcher;Assistant Professor", "bibtex": "@misc{\nanonymous2024mmworld,\ntitle={{MMW}orld: Towards Multi-discipline Multi-faceted World Model Evaluation in Videos},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=mDRmX8IlBI}\n}", "github": "", "project": "", "reviewers": "7qvY;XiDY;YWxW;ipky", "site": "https://openreview.net/forum?id=mDRmX8IlBI", "pdf_size": 3554908, "rating": "4;7;8;8", "confidence": "5;4;4;5", "wc_summary_and_contributions": "63;71;91;83", "wc_strengths": "2;149;23;67", "wc_improvement": "2;18;84;53", "wc_limitations": "1;17;7;41", "wc_correctness": "1;1;2;38", "wc_clarity": "1;1;2;4", "wc_relation_to_prior_work": "1;4;5;4", "wc_documentation": "1;1;2;3", "wc_additional_feedback": "1;1;1;1", "wc_review": "73;263;217;294", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "72;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "3;2;1;1", "rating_avg": [ 6.75, 1.6393596310755 ], "confidence_avg": [ 4.5, 0.5 ], "wc_summary_and_contributions_avg": [ 77.0, 10.770329614269007 ], "wc_strengths_avg": [ 60.25, 56.35323859371349 ], "wc_improvement_avg": [ 39.25, 31.744093938873103 ], "wc_limitations_avg": [ 16.5, 15.256146302392358 ], "wc_correctness_avg": [ 10.5, 15.88238017426859 ], "wc_clarity_avg": [ 2.0, 1.224744871391589 ], "wc_relation_to_prior_work_avg": [ 3.5, 1.5 ], "wc_documentation_avg": [ 1.75, 0.82915619758885 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 211.75, 84.66219640429841 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 18.0, 31.176914536239792 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 14, 0 ], "corr_rating_confidence": -0.45749571099781405, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7953547302484503208&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;0;1;1;0;2;2;2;2;2;0", "aff_unique_norm": "University of California, Santa Cruz;University of California, Santa Barbara;Microsoft", "aff_unique_dep": ";;Microsoft Corporation", "aff_unique_url": "https://www.ucsc.edu;https://www.ucsb.edu;https://www.microsoft.com", "aff_unique_abbr": "UCSC;UCSB;Microsoft", "aff_campus_unique_index": "0;1;0;1;1;0;0", "aff_campus_unique": "Santa Cruz;Santa Barbara;", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "mEJgnZZyfv", "title": "OpenMixup: Open Mixup Toolbox and Benchmark for Visual Representation Learning", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "Mixup augmentation has emerged as a powerful technique for improving the generalization ability of deep neural networks. However, the lack of standardized implementations and benchmarks has hindered progress, resulting in poor reproducibility, unfair comparisons, and conflicting insights. In this paper, we introduce OpenMixup, the \\textit{first} mixup augmentation benchmark for visual representation learning, where 18 representative mixup baselines are trained \\textit{from scratch} and systematically evaluated on 11 image datasets across varying scales and granularity, spanning fine-grained scenarios to complex non-iconic scenes. We also open-source a modular codebase for streamlined mixup method design, training, and evaluations, which comprises a collection of widely-used vision backbones, optimization policies, and analysis toolkits. Notably, the codebase not only underpins all our benchmarking but supports broader mixup applications beyond classification, such as self-supervised learning and regression tasks. Through extensive experiments, we present insights on performance-complexity trade-offs and identify preferred mixup strategies for different needs. To the best of our knowledge, OpenMixup has contributed to a number of studies in the mixup community. We hope this work can further advance reproducible mixup research and fair comparisons, thereby laying a solid foundation for future progress. The source code is publicly available at \\url{https://github.com/Westlake-AI/openmixup}.", "keywords": "Data Augmentation;Image Classification;Mixup;Vision Transformer;Mixup", "primary_area": "", "supplementary_material": "/attachment/88b972e99bb6003984537a633cc7dfb2a8c25590.pdf", "author": "Siyuan Li;Zedong Wang;Zicheng Liu;Di Wu;Cheng Tan;Weiyang Jin;Stan Z. Li", "authorids": "~Siyuan_Li6;~Zedong_Wang1;~Zicheng_Liu2;~Di_Wu10;~Cheng_Tan1;~Weiyang_Jin1;~Stan_Z._Li2", "gender": "M;M;M;M;M;M;M", "homepage": "https://lupin1998.github.io/;https://jacky1128.github.io;;;https://chengtan9907.github.io/;;https://en.westlake.edu.cn/academics/School_of_Engineering/About/Our_People/Faculty/201912/t20191206_2497.shtml", "dblp": "63/9705-2;179/8811.html;l/ZichengLiu-6;;70/1533-12.html;344/6132;l/StanZLi", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=en;6kTV6aMAAAAJ;;https://scholar.google.com/citations?hl=zh-CN", "orcid": "0000-0001-6806-2468;0009-0000-0112-0491;;;;0000-0001-5351-1400;", "linkedin": "https://www.linkedin.cn/incareer/in/siyuan-li-lupin1998/;;;;;;stan-z-li-%E6%9D%8E%E5%AD%90%E9%9D%92-55753224/", "or_profile": "~Siyuan_Li6;~Zedong_Wang1;~Zicheng_Liu2;~Di_Wu10;~Cheng_Tan1;~Weiyang_Jin1;~Stan_Z._Li1", "aff": "Alibaba Group;Westlake University;Zhejiang University;Westlake University;Zhejiang University & Westlake University;Beijing Jiaotong University;Westlake University", "aff_domain": "alibaba-inc.com;westlake.edu;zju.edu.cn;westlake.edu.cn;westlake.edu.cn;bjtu.edu.cn;westlake.edu.cn", "position": "Intern;Intern;PhD student;PhD student;PhD student;Undergrad student;Chair Professor", "bibtex": "@misc{\nanonymous2024openmixup,\ntitle={OpenMixup: Open Mixup Toolbox and Benchmark for Visual Representation Learning},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=mEJgnZZyfv}\n}", "github": "", "project": "", "reviewers": "SK4a;TiUv;aWQj;BRDY;pGHJ;r5tK", "site": "https://openreview.net/forum?id=mEJgnZZyfv", "pdf_size": 5987389, "rating": "4;5;6;7;7;9", "confidence": "2;3;3;5;5;4", "wc_summary_and_contributions": "74;50;52;68;33;65", "wc_strengths": "42;30;59;5;45;82", "wc_improvement": "181;79;243;5;109;44", "wc_limitations": "15;21;1;102;24;3", "wc_correctness": "25;20;1;18;7;3", "wc_clarity": "52;1;33;40;16;2", "wc_relation_to_prior_work": "1;1;1;18;5;2", "wc_documentation": "12;1;27;4;4;2", "wc_additional_feedback": "1;1;1;1;1;1", "wc_review": "403;204;418;261;244;204", "wc_reply_reviewers": "0;0;0;19;0;0", "wc_reply_authors": "117;120;116;0;0;0", "reply_reviewers": "0;0;0;1;0;0", "reply_authors": "3;3;2;1;1;1", "rating_avg": [ 6.333333333333333, 1.5986105077709065 ], "confidence_avg": [ 3.6666666666666665, 1.1055415967851332 ], "wc_summary_and_contributions_avg": [ 57.0, 13.686976778431873 ], "wc_strengths_avg": [ 43.833333333333336, 23.77265001822239 ], "wc_improvement_avg": [ 110.16666666666667, 80.69782456106786 ], "wc_limitations_avg": [ 27.666666666666668, 34.310672133058276 ], "wc_correctness_avg": [ 12.333333333333334, 9.086008780292673 ], "wc_clarity_avg": [ 24.0, 19.139836293274122 ], "wc_relation_to_prior_work_avg": [ 4.666666666666667, 6.128258770283411 ], "wc_documentation_avg": [ 8.333333333333334, 9.067647005823629 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 289.0, 88.40814442120137 ], "wc_reply_reviewers_avg": [ 3.1666666666666665, 7.080881928749334 ], "wc_reply_authors_avg": [ 58.833333333333336, 58.84560778474088 ], "reply_reviewers_avg": [ 0.16666666666666666, 0.372677996249965 ], "reply_authors_avg": [ 1.8333333333333333, 0.8975274678557508 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.7229988054812212, "gs_citation": 33, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13084441875628660621&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;2;1;2;3;1", "aff_unique_norm": "Alibaba Group;Westlake University;Zhejiang University;Beijing Jiao Tong University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.alibaba.com;https://www.westlake.edu.cn;https://www.zju.edu.cn;http://www.njtu.edu.cn/en", "aff_unique_abbr": "Alibaba;WU;ZJU;BJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Interventional Causal Discovery in a Mixture of DAGs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93767", "id": "mFrlCI8sov", "proceeding": "", "pdf": "https://openreview.net/pdf?id=mFrlCI8sov", "openreview": "https://openreview.net/forum?id=mFrlCI8sov", "poster": "/media/PosterPDFs/NeurIPS%202024/93767.png?t=1732771698.6621127", "project": "", "author_site": "Burak Var\u0131c\u0131, Dmitriy Katz, Dennis Wei, Prasanna Sattigeri, Ali Tajer", "tldr": "", "abstract": "Causal interactions among a group of variables are often modeled by a single causal graph. In some domains, however, these interactions are best described by multiple co-existing causal graphs, e.g., in dynamical systems or genomics. This paper addresses the hitherto unknown role of interventions in learning causal interactions among variables governed by a mixture of causal systems, each modeled by one directed acyclic graph (DAG). Causal discovery from mixtures is fundamentally more challenging than single-DAG causal discovery. Two major difficulties stem from (i) an inherent uncertainty about the skeletons of the component DAGs that constitute the mixture and (ii) possibly cyclic relationships across these component DAGs. This paper addresses these challenges and aims to identify edges that exist in at least one component DAG of the mixture, referred to as the *true* edges. First, it establishes matching necessary and sufficient conditions on the size of interventions required to identify the true edges. Next, guided by the necessity results, an adaptive algorithm is designed that learns all true edges using ${\\cal O}(n^2)$ interventions, where $n$ is the number of nodes. Remarkably, the size of the interventions is optimal if the underlying mixture model does not contain cycles across its components. More generally, the gap between the intervention size used by the algorithm and the optimal size is quantified. It is shown to be bounded by the *cyclic complexity number* of the mixture model, defined as the size of the minimal intervention that can break the cycles in the mixture, which is upper bounded by the number of cycles among the ancestors of a node.", "keywords": "causal discovery;intervention design;mixture of DAGs;mixture models", "primary_area": "causal_inference", "supplementary_material": "", "author": "Burak Var\u0131c\u0131;Dmitriy A Katz;Dennis Wei;Prasanna Sattigeri;Ali Tajer", "authorids": "~Burak_Var\u0131c\u01311;~Dmitriy_A_Katz1;~Dennis_Wei1;~Prasanna_Sattigeri1;~Ali_Tajer1", "gender": ";M;;M;M", "homepage": "https://www.linkedin.com/in/dmitriy-katz-a5093b79;https://sites.google.com/site/dennislwei/;;https://www.isg-rpi.com/;https://bvarici.github.io/", "dblp": "41/9879;59/8761;00/7428;65/2830;289/8565", "google_scholar": ";r4ldy4AAAAAJ;m-s38ikAAAAJ;;v_SL5c4AAAAJ", "orcid": ";;0000-0003-4435-0486;;", "linkedin": ";dennis-wei-4886036b/;prasannasattigeri/;;", "or_profile": "~Dmitriy_A_Katz1;~Dennis_Wei1;~Prasanna_Sattigeri1;~Ali_Tajer1;~Burak_Varici1", "aff": "International Business Machines;International Business Machines;IBM Research;Rensselaer Polytechnic Institute;Rensselaer Polytechnic Institute", "aff_domain": "ibm.com;ibm.com;ibm.com;rpi.edu;rpi.edu", "position": "research staff member;Research Staff Member;Researcher;Associate Professor;PhD student", "bibtex": "@inproceedings{\nvar{\\i}c{\\i}2024interventional,\ntitle={Interventional Causal Discovery in a Mixture of {DAG}s},\nauthor={Burak Var{\\i}c{\\i} and Dmitriy A Katz and Dennis Wei and Prasanna Sattigeri and Ali Tajer},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=mFrlCI8sov}\n}", "github": "", "reviewers": "Tiwj;2inT;bGit;5C9J", "pdf_size": 1307145, "rating": "5;6;7;7", "confidence": "2;4;3;3", "soundness": "3;3;3;4", "novelty": "3;3;4;3", "presentation": "3;3;4;3", "wc_summary": "83;78;100;110", "wc_strengths": "51;30;135;126", "wc_weaknesses": "106;71;45;82", "wc_questions": "34;703;47;6", "wc_limitations": "1;1;1;6", "wc_review": "275;883;328;330", "wc_reply_reviewers": "4;137;12;12", "wc_reply_authors": "0;366;0;0", "reply_reviewers": "1;2;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 92.75, 12.871965661856 ], "wc_strengths_avg": [ 85.5, 45.71925196238451 ], "wc_weaknesses_avg": [ 76.0, 21.920310216782973 ], "wc_questions_avg": [ 197.5, 292.22636773569906 ], "wc_limitations_avg": [ 2.25, 2.165063509461097 ], "wc_review_avg": [ 454.0, 248.66342714601197 ], "wc_reply_reviewers_avg": [ 41.25, 55.37768052202981 ], "wc_reply_authors_avg": [ 91.5, 158.48264889255228 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.42640143271122083, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:4TrB5OwosYUJ:scholar.google.com/&scioq=Interventional+Causal+Discovery+in+a+Mixture+of+DAGs&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "ibm.com;ibm.com;ibm.com;rpi.edu;rpi.edu", "author_num": 5, "aff_unique_index": "0;0;1;2;2", "aff_unique_norm": "International Business Machines Corporation;IBM;Rensselaer Polytechnic Institute", "aff_unique_dep": ";IBM Research;", "aff_unique_url": "https://www.ibm.com;https://www.ibm.com/research;https://www.rpi.edu", "aff_unique_abbr": "IBM;IBM;RPI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Long-tailed Object Detection Pretraining: Dynamic Rebalancing Contrastive Learning with Dual Reconstruction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93766", "id": "mGz3Jux9wS", "proceeding": "", "pdf": "https://openreview.net/pdf?id=mGz3Jux9wS", "openreview": "https://openreview.net/forum?id=mGz3Jux9wS", "poster": "/media/PosterPDFs/NeurIPS%202024/93766.png?t=1730815734.1529977", "project": "", "author_site": "Chen-Long Duan, Yong Li, Xiu-Shen Wei, Lin Zhao", "tldr": "", "abstract": "Pre-training plays a vital role in various vision tasks, such as object recognition and detection. Commonly used pre-training methods, which typically rely on randomized approaches like uniform or Gaussian distributions to initialize model parameters, often fall short when confronted with long-tailed distributions, especially in detection tasks. This is largely due to extreme data imbalance and the issue of simplicity bias. In this paper, we introduce a novel pre-training framework for object detection, called Dynamic Rebalancing Contrastive Learning with Dual Reconstruction (2DRCL). Our method builds on a Holistic-Local Contrastive Learning mechanism, which aligns pre-training with object detection by capturing both global contextual semantics and detailed local patterns. To tackle the imbalance inherent in long-tailed data, we design a dynamic rebalancing strategy that adjusts the sampling of underrepresented instances throughout the pre-training process, ensuring better representation of tail classes. Moreover, Dual Reconstruction addresses simplicity bias by enforcing a reconstruction task aligned with the self-consistency principle, specifically benefiting underrepresented tail classes. Experiments on COCO and LVIS v1.0 datasets demonstrate the effectiveness of our method, particularly in improving the mAP/AP scores for tail classes.", "keywords": "Long-tailed object detection;pretraining", "primary_area": "machine_vision", "supplementary_material": "", "author": "Chen-Long Duan;Yong Li;Xiu-Shen Wei;Lin Zhao", "authorids": "~Chen-Long_Duan1;~Yong_Li13;~Xiu-Shen_Wei1;~Lin_Zhao14", "gender": ";M;;", "homepage": ";https://mysee1989.github.io/;;https://sharling-lz.github.io/", "dblp": ";;;72/2195-3.html", "google_scholar": ";HRBTJYYAAAAJ;;https://scholar.google.com.hk/citations?user=OJ4qnYcAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Chen-Long_Duan1;~Yong_Li13;~Xiu-Shen_Wei1;~Lin_Zhao14", "aff": ";City University of Hong Kong;;Nanjing University of Science and Technology", "aff_domain": ";cityu.edu;;njust.edu.cn", "position": ";Postdoc;;Associate Professor", "bibtex": "@inproceedings{\nduan2024longtailed,\ntitle={Long-tailed Object Detection Pretraining: Dynamic Rebalancing Contrastive Learning with Dual Reconstruction},\nauthor={Chen-Long Duan and Yong Li and Xiu-Shen Wei and Lin Zhao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=mGz3Jux9wS}\n}", "github": "", "reviewers": "kReB;ZXbG;XbTc;gQpU", "pdf_size": 3647667, "rating": "5;6;6;7", "confidence": "3;3;4;5", "soundness": "2;3;2;4", "novelty": "2;3;2;4", "presentation": "3;3;3;4", "wc_summary": "57;84;83;82", "wc_strengths": "38;114;82;104", "wc_weaknesses": "104;57;866;45", "wc_questions": "2;18;244;12", "wc_limitations": "1;1;6;1", "wc_review": "202;274;1281;244", "wc_reply_reviewers": "18;0;477;0", "wc_reply_authors": "43;0;347;0", "reply_reviewers": "1;0;2;0", "reply_authors": "2;1;3;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 76.5, 11.280514172678478 ], "wc_strengths_avg": [ 84.5, 29.236107812087436 ], "wc_weaknesses_avg": [ 268.0, 345.95881257745117 ], "wc_questions_avg": [ 69.0, 101.19782606360673 ], "wc_limitations_avg": [ 2.25, 2.165063509461097 ], "wc_review_avg": [ 500.25, 451.49107134028685 ], "wc_reply_reviewers_avg": [ 123.75, 204.08132570129976 ], "wc_reply_authors_avg": [ 97.5, 145.11460987784793 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8528028654224418, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=232321782248622089&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 5, "email": ";cityu.edu;;njust.edu.cn", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "City University of Hong Kong;Nanjing University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.cityu.edu.hk;http://www.nust.edu.cn/", "aff_unique_abbr": "CityU;NUST", "aff_campus_unique_index": "0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "MaNo: Exploiting Matrix Norm for Unsupervised Accuracy Estimation Under Distribution Shifts", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93765", "id": "mH1xtt2bJE", "proceeding": "", "pdf": "https://openreview.net/pdf?id=mH1xtt2bJE", "openreview": "https://openreview.net/forum?id=mH1xtt2bJE", "poster": "/media/PosterPDFs/NeurIPS%202024/93765.png?t=1733154769.1521683", "project": "", "author_site": "RENCHUNZI XIE, Ambroise Odonnat, Vasilii Feofanov, Weijian Deng, Jianfeng Zhang, Bo An", "tldr": "", "abstract": "Leveraging the model\u2019s outputs, specifically the logits, is a common approach to estimating the test accuracy of a pre-trained neural network on out-of-distribution (OOD) samples without requiring access to the corresponding ground-truth labels.\nDespite their ease of implementation and computational efficiency, current logit-based methods are vulnerable to overconfidence issues, leading to prediction bias, especially under the natural shift. In this work, we first study the relationship between logits and generalization performance from the view of low-density separation assumption. Our findings motivate our proposed method \\method{} that \\textbf{(1)}~applies a data-dependent normalization on the logits to reduce prediction bias, and \\textbf{(2)} takes the $L_p$ norm of the matrix of normalized logits as the estimation score. Our theoretical analysis highlights the connection between the provided score and the model's uncertainty. \nWe conduct an extensive empirical study on common unsupervised accuracy estimation benchmarks and demonstrate that \\method{} achieves state-of-the-art performance across various architectures in the presence of synthetic, natural, or subpopulation shifts. The code is available at https://github.com/Renchunzi-Xie/MaNo.", "keywords": "Unsupervised Learning;Distribution Shifts;Unsupervised Accuracy Estimation;Generalization;Deep Learning", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "RENCHUNZI XIE;Ambroise Odonnat;Vasilii Feofanov;Weijian Deng;Jianfeng Zhang;Bo An", "authorids": "~RENCHUNZI_XIE1;~Ambroise_Odonnat1;~Vasilii_Feofanov1;~Weijian_Deng1;~Jianfeng_Zhang2;~Bo_An2", "gender": ";M;M;M;M;M", "homepage": ";https://ambroiseodt.github.io/;;http://weijiandeng.xyz;;https://personal.ntu.edu.sg/boan/", "dblp": ";359/3799;245/3361;198/1517;74/5065;42/6178-1.html", "google_scholar": ";M_OS-3kAAAAJ;https://scholar.google.ru/citations?user=UIteS6oAAAAJ;https://scholar.google.com.hk/citations?user=lReHnAEAAAAJ;_Wzsb6YAAAAJ;PEEpuNwAAAAJ", "orcid": ";;0000-0002-5777-4205;;;0000-0002-7064-7438", "linkedin": ";ambroise-odonnat/;;;;", "or_profile": "~RENCHUNZI_XIE1;~Ambroise_Odonnat1;~Vasilii_Feofanov1;~Weijian_Deng1;~Jianfeng_Zhang2;~Bo_An2", "aff": ";Huawei Technologies Ltd.;Huawei Noah's Ark Lab;Australian National University;Huawei Technologies Ltd.;Nanyang Technological University", "aff_domain": ";huawei.com;huawei.com;anu.edu.au;huawei.com;ntu.edu.sg", "position": ";Intern;Researcher;Postdoc;Researcher;Full Professor", "bibtex": "@inproceedings{\nxie2024mano,\ntitle={MaNo: Exploiting Matrix Norm for Unsupervised Accuracy Estimation Under Distribution Shifts},\nauthor={RENCHUNZI XIE and Ambroise Odonnat and Vasilii Feofanov and Weijian Deng and Jianfeng Zhang and Bo An},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=mH1xtt2bJE}\n}", "github": "", "reviewers": "A5Zk;5Gdr;6N1W;Uw9o", "pdf_size": 1658425, "rating": "6;6;6;7", "confidence": "4;4;3;4", "soundness": "3;3;3;3", "novelty": "3;4;2;3", "presentation": "3;4;3;4", "wc_summary": "79;45;109;92", "wc_strengths": "115;45;83;59", "wc_weaknesses": "144;129;114;123", "wc_questions": "45;53;5;2", "wc_limitations": "15;4;1;1", "wc_review": "398;276;312;277", "wc_reply_reviewers": "51;112;0;25", "wc_reply_authors": "136;61;0;26", "reply_reviewers": "2;1;0;1", "reply_authors": "3;2;1;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 81.25, 23.47738273317535 ], "wc_strengths_avg": [ 75.5, 26.547127904916568 ], "wc_weaknesses_avg": [ 127.5, 10.920164833920778 ], "wc_questions_avg": [ 26.25, 22.949673200287624 ], "wc_limitations_avg": [ 5.25, 5.7608593109014565 ], "wc_review_avg": [ 315.75, 49.650654577759596 ], "wc_reply_reviewers_avg": [ 47.0, 41.6353215431321 ], "wc_reply_authors_avg": [ 55.75, 51.13890397730479 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10908447533662018498&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": ";huawei.com;huawei.com;anu.edu.au;huawei.com;ntu.edu.sg", "author_num": 6, "aff_unique_index": "0;0;1;0;2", "aff_unique_norm": "Huawei;Australian National University;Nanyang Technological University", "aff_unique_dep": "Huawei Technologies;;", "aff_unique_url": "https://www.huawei.com;https://www.anu.edu.au;https://www.ntu.edu.sg", "aff_unique_abbr": "Huawei;ANU;NTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;2", "aff_country_unique": "China;Australia;Singapore" }, { "title": "Bounds for the smallest eigenvalue of the NTK for arbitrary spherical data of arbitrary dimension", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93764", "id": "mHVmsy9len", "proceeding": "", "pdf": "https://openreview.net/pdf?id=mHVmsy9len", "openreview": "https://openreview.net/forum?id=mHVmsy9len", "poster": "/media/PosterPDFs/NeurIPS%202024/93764.png?t=1733816247.2194936", "project": "", "author_site": "Kedar Karhadkar, Michael Murray, Guido Montufar", "tldr": "", "abstract": "Bounds on the smallest eigenvalue of the neural tangent kernel (NTK) are a key ingredient in the analysis of neural network optimization and memorization. However, existing results require distributional assumptions on the data and are limited to a high-dimensional setting, where the input dimension $d_0$ scales at least logarithmically in the number of samples $n$. In this work we remove both of these requirements and instead provide bounds in terms of a measure of distance between data points: notably these bounds hold with high probability even when $d_0$ is held constant versus $n$. We prove our results through a novel application of the hemisphere transform.", "keywords": "neural tangent kernel;initialization;minimum eigenvalue;smallest eigenvalue;low-dimensional;hemisphere transform;spherical harmonics;separated", "primary_area": "learning_theory", "supplementary_material": "", "author": "Kedar Karhadkar;Michael Murray;Guido Montufar", "authorids": "~Kedar_Karhadkar1;~Michael_Murray3;~Guido_Montufar1", "gender": "M;M;M", "homepage": "https://www.math.ucla.edu/~kedar/;https://www.math.ucla.edu/people/visiting/mmurray;http://www.math.ucla.edu/~montufar/", "dblp": "278/8407;;", "google_scholar": "WVdm3mQAAAAJ;wplO7UoAAAAJ;https://scholar.google.de/citations?user=pDIuuVwAAAAJ", "orcid": ";;0000-0002-0131-2669", "linkedin": ";;", "or_profile": "~Kedar_Karhadkar1;~Michael_Murray3;~Guido_Montufar1", "aff": "Susquehanna International Group;University of California, Los Angeles;UCLA ", "aff_domain": "sig.com;ucla.edu;math.ucla.edu", "position": "Intern;Postdoc;Associate Professor", "bibtex": "@inproceedings{\nkarhadkar2024bounds,\ntitle={Bounds for the smallest eigenvalue of the {NTK} for arbitrary spherical data of arbitrary dimension},\nauthor={Kedar Karhadkar and Michael Murray and Guido Montufar},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=mHVmsy9len}\n}", "github": "", "reviewers": "qjpA;k95g;fqQK;NBnj;Hffy", "pdf_size": 578309, "rating": "6;6;6;6;7", "confidence": "5;3;5;3;3", "soundness": "3;3;3;3;3", "novelty": "3;3;3;3;3", "presentation": "3;3;3;3;3", "wc_summary": "123;64;43;97;27", "wc_strengths": "20;20;57;20;87", "wc_weaknesses": "117;17;143;29;25", "wc_questions": "276;1;4;113;38", "wc_limitations": "9;21;1;1;14", "wc_review": "545;123;248;260;191", "wc_reply_reviewers": "213;0;23;45;48", "wc_reply_authors": "0;0;0;27;0", "reply_reviewers": "1;0;1;1;1", "reply_authors": "1;0;1;2;1", "rating_avg": [ 6.2, 0.39999999999999997 ], "confidence_avg": [ 3.8, 0.9797958971132712 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 70.8, 35.0679340708859 ], "wc_strengths_avg": [ 40.8, 27.18381871628782 ], "wc_weaknesses_avg": [ 66.2, 52.87872918291438 ], "wc_questions_avg": [ 86.4, 103.03513963692193 ], "wc_limitations_avg": [ 9.2, 7.704544113703289 ], "wc_review_avg": [ 273.4, 144.22288306645376 ], "wc_reply_reviewers_avg": [ 65.8, 75.60264545635953 ], "wc_reply_authors_avg": [ 5.4, 10.8 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.0, 0.6324555320336759 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.408248290463863, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=205172292314551104&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "sig.com;ucla.edu;math.ucla.edu", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Susquehanna International Group;University of California, Los Angeles", "aff_unique_dep": ";", "aff_unique_url": "https://www.susquehannainternational.com;https://www.ucla.edu", "aff_unique_abbr": "SIG;UCLA", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Adaptive Image Quality Assessment via Teaching Large Multimodal Model to Compare", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93763", "id": "mHtOyh5taj", "proceeding": "", "pdf": "https://openreview.net/pdf?id=mHtOyh5taj", "openreview": "https://openreview.net/forum?id=mHtOyh5taj", "poster": "", "project": "", "author_site": "Hanwei Zhu, Haoning Wu, Yixuan Li, Zicheng Zhang, Baoliang Chen, Lingyu Zhu, Yuming Fang, Guangtao Zhai, Weisi Lin, Shiqi Wang", "tldr": "", "abstract": "While recent advancements in large multimodal models (LMMs) have significantly improved their abilities in image quality assessment (IQA) relying on absolute quality rating, how to transfer reliable relative quality comparison outputs to continuous perceptual quality scores remains largely unexplored. To address this gap, we introduce an all-around LMM-based NR-IQA model, which is capable of producing qualitatively comparative responses and effectively translating these discrete comparison outcomes into a continuous quality score. Specifically, during training, we present to generate scaled-up comparative instructions by comparing images from the same IQA dataset, allowing for more flexible integration of diverse IQA datasets. Utilizing the established large-scale training corpus, we develop a human-like visual quality comparator. During inference, moving beyond binary choices, we propose a soft comparison method that calculates the likelihood of the test image being preferred over multiple predefined anchor images. The quality score is further optimized by maximum a posteriori estimation with the resulting probability matrix. Extensive experiments on nine IQA datasets validate that the Compare2Score effectively bridges text-defined comparative levels during training with converted single image quality scores for inference, surpassing state-of-the-art IQA models across diverse scenarios. Moreover, we verify that the probability-matrix-based inference conversion not only improves the rating accuracy of Compare2Score but also zero-shot general-purpose LMMs, suggesting its intrinsic effectiveness.", "keywords": "Image Quality Assessment;Paired Comparsion;Large Multimodal Model", "primary_area": "evaluation", "supplementary_material": "/attachment/8cf5ec211f31b5c95ff2c2c734218c1922377f95.zip", "author": "Hanwei Zhu;Haoning Wu;Yixuan Li;Zicheng Zhang;Baoliang Chen;Lingyu Zhu;Yuming Fang;Guangtao Zhai;Weisi Lin;Shiqi Wang", "authorids": "~Hanwei_Zhu1;~Haoning_Wu1;~Yixuan_Li12;~Zicheng_Zhang7;~Baoliang_Chen2;~Lingyu_Zhu3;~Yuming_Fang1;~Guangtao_Zhai1;~Weisi_Lin1;~Shiqi_Wang1", "gender": "M;M;;;;M;M;M;M;M", "homepage": "https://h4nwei.github.io/;https://teowu.github.io;;;;;https://scholar.google.com/citations?user=_Tu-eHkAAAAJ&hl=en;https://faculty.sjtu.edu.cn/zhaiguangtao/en/index.htm;http://www.ntu.edu.sg/home/wslin/;https://www.cs.cityu.edu.hk/~shiqwang/", "dblp": "214/8898;264/5802-1;;;;282/1603.html;31/9004.html;19/3230;14/3737.html;58/9145-1", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com.hk/citations?user=wth-VbMAAAAJ;;;;https://scholar.google.com.tw/citations?user=IhyTEDkAAAAJ;_Tu-eHkAAAAJ;E6zbSYgAAAAJ;https://scholar.google.com.tw/citations?user=D_S41X4AAAAJ;Pr7s2VUAAAAJ", "orcid": ";0000-0001-8642-8101;;;;;;;;", "linkedin": ";;;;;;;;;", "or_profile": "~Hanwei_Zhu1;~Haoning_Wu1;~Yixuan_Li12;~Zicheng_Zhang7;~Baoliang_Chen2;~Lingyu_Zhu3;~Yuming_Fang1;~Guangtao_Zhai1;~Weisi_Lin1;~Shiqi_Wang1", "aff": "City University of Hong Kong;Nanyang Technological University;;;;City University of Hong Kong;Jiangxi University of Finance and Economics;Shanghai Jiaotong University;Nanyang Technological University;City University of Hong Kong", "aff_domain": "cityu.edu.hk;ntu.edu.sg;;;;cityu.edu;jxufe.edu.cn;sjtu.edu.cn;ntu.edu.sg;cityu.edu.hk", "position": "PhD student;PhD student;;;;PhD student;Full Professor;Full Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nzhu2024adaptive,\ntitle={Adaptive Image Quality Assessment via Teaching Large Multimodal Model to Compare},\nauthor={Hanwei Zhu and Haoning Wu and Yixuan Li and Zicheng Zhang and Baoliang Chen and Lingyu Zhu and Yuming Fang and Guangtao Zhai and Weisi Lin and Shiqi Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=mHtOyh5taj}\n}", "github": "", "reviewers": "nq4X;WZ3r;WPaz;qViG", "pdf_size": 6195246, "rating": "7;7;7;7", "confidence": "4;5;4;4", "soundness": "3;4;3;4", "novelty": "3;3;3;3", "presentation": "4;4;3;4", "wc_summary": "87;73;60;52", "wc_strengths": "52;60;94;45", "wc_weaknesses": "18;112;47;89", "wc_questions": "62;8;6;23", "wc_limitations": "32;33;5;1", "wc_review": "251;286;212;210", "wc_reply_reviewers": "0;0;20;0", "wc_reply_authors": "0;0;29;0", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;2;1", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 68.0, 13.285330255586423 ], "wc_strengths_avg": [ 62.75, 18.806581294855267 ], "wc_weaknesses_avg": [ 66.5, 36.43144246389374 ], "wc_questions_avg": [ 24.75, 22.487496525847426 ], "wc_limitations_avg": [ 17.75, 14.821858857781638 ], "wc_review_avg": [ 239.75, 31.307946275666183 ], "wc_reply_reviewers_avg": [ 5.0, 8.660254037844387 ], "wc_reply_authors_avg": [ 7.25, 12.55736835487436 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11519501046734869464&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "cityu.edu.hk;ntu.edu.sg;;;;cityu.edu;jxufe.edu.cn;sjtu.edu.cn;ntu.edu.sg;cityu.edu.hk", "author_num": 10, "aff_unique_index": "0;1;0;2;3;1;0", "aff_unique_norm": "City University of Hong Kong;Nanyang Technological University;Jiangxi University of Finance and Economics;Shanghai Jiao Tong University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.cityu.edu.hk;https://www.ntu.edu.sg;http://www.jxufe.edu.cn;https://www.sjtu.edu.cn", "aff_unique_abbr": "CityU;NTU;;SJTU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;1;0;0;0;1;0", "aff_country_unique": "China;Singapore" }, { "title": "In-Trajectory Inverse Reinforcement Learning: Learn Incrementally Before an Ongoing Trajectory Terminates", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93762", "id": "mJZH9w8qgu", "proceeding": "", "pdf": "https://openreview.net/pdf?id=mJZH9w8qgu", "openreview": "https://openreview.net/forum?id=mJZH9w8qgu", "poster": "/media/PosterPDFs/NeurIPS%202024/93762.png?t=1732469538.8465757", "project": "", "author_site": "Shicheng Liu, Minghui Zhu", "tldr": "", "abstract": "Inverse reinforcement learning (IRL) aims to learn a reward function and a corresponding policy that best fit the demonstrated trajectories of an expert. However, current IRL works cannot learn incrementally from an ongoing trajectory because they have to wait to collect at least one complete trajectory to learn. To bridge the gap, this paper considers the problem of learning a reward function and a corresponding policy while observing the initial state-action pair of an ongoing trajectory and keeping updating the learned reward and policy when new state-action pairs of the ongoing trajectory are observed. We formulate this problem as an online bi-level optimization problem where the upper level dynamically adjusts the learned reward according to the newly observed state-action pairs with the help of a meta-regularization term, and the lower level learns the corresponding policy. We propose a novel algorithm to solve this problem and guarantee that the algorithm achieves sub-linear local regret $O(\\sqrt{T}+\\log T+\\sqrt{T}\\log T)$. If the reward function is linear, we prove that the proposed algorithm achieves sub-linear regret $O(\\log T)$. Experiments are used to validate the proposed algorithm.", "keywords": "inverse reinforcement learning", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/c1baf38d10361bc13924a33244c763faf63aa595.zip", "author": "Shicheng Liu;Minghui Zhu", "authorids": "~Shicheng_Liu1;~Minghui_Zhu1", "gender": ";", "homepage": ";", "dblp": "231/6618;", "google_scholar": ";", "orcid": ";", "linkedin": "shicheng-liu-404a21239/;", "or_profile": "~Shicheng_Liu1;~Minghui_Zhu1", "aff": "Pennsylvania State University;", "aff_domain": "psu.edu;", "position": "PhD student;", "bibtex": "@inproceedings{\nliu2024intrajectory,\ntitle={In-Trajectory Inverse Reinforcement Learning: Learn Incrementally From An Ongoing Trajectory},\nauthor={Shicheng Liu and Minghui Zhu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=mJZH9w8qgu}\n}", "github": "", "reviewers": "L1zi;MWk1;cmJ6", "pdf_size": 1286605, "rating": "4;7;8", "confidence": "4;4;4", "soundness": "2;3;4", "novelty": "2;3;3", "presentation": "2;3;4", "wc_summary": "77;82;53", "wc_strengths": "14;105;22", "wc_weaknesses": "77;56;32", "wc_questions": "57;18;74", "wc_limitations": "15;9;8", "wc_review": "240;270;189", "wc_reply_reviewers": "97;56;30", "wc_reply_authors": "777;0;0", "reply_reviewers": "2;1;1", "reply_authors": "3;1;1", "rating_avg": [ 6.333333333333333, 1.699673171197595 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 70.66666666666667, 12.657891697365017 ], "wc_strengths_avg": [ 47.0, 41.14203041497426 ], "wc_weaknesses_avg": [ 55.0, 18.384776310850235 ], "wc_questions_avg": [ 49.666666666666664, 23.442601296689656 ], "wc_limitations_avg": [ 10.666666666666666, 3.0912061651652345 ], "wc_review_avg": [ 233.0, 33.436506994600975 ], "wc_reply_reviewers_avg": [ 61.0, 27.58018612458347 ], "wc_reply_authors_avg": [ 259.0, 366.2813126546316 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8476456791074283225&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "psu.edu;", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Pennsylvania State University", "aff_unique_dep": "", "aff_unique_url": "https://www.psu.edu", "aff_unique_abbr": "PSU", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "RClicks: Realistic Click Simulation for Benchmarking Interactive Segmentation", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97520", "id": "mK1Utjiy8z", "proceeding": "", "pdf": "https://openreview.net/pdf?id=mK1Utjiy8z", "openreview": "https://openreview.net/forum?id=mK1Utjiy8z", "poster": "/media/PosterPDFs/NeurIPS%202024/97520.png?t=1731499475.903583", "project": "", "author_site": "Anton Antonov, Andrei Moskalenko, Denis Shepelev, Vlad Shakhuro, Alexander Krapukhin, Konstantin Soshin, Anton Konushin", "tldr": "", "abstract": "The emergence of Segment Anything (SAM) sparked research interest in the field of interactive segmentation, especially in the context of image editing tasks and speeding up data annotation. Unlike common semantic segmentation, interactive segmentation methods allow users to directly influence their output through prompts (e.g. clicks). However, click patterns in real-world interactive segmentation scenarios remain largely unexplored. Most methods rely on the assumption that users would click in the center of the largest erroneous area. Nevertheless, recent studies show that this is not always the case. Thus, methods may have poor performance in real-world deployment despite high metrics in a baseline benchmark. To accurately simulate real-user clicks, we conducted a large crowdsourcing study of click patterns in an interactive segmentation scenario and collected 475K real-user clicks. Drawing on ideas from saliency tasks, we develop a clickability model that enables sampling clicks, which closely resemble actual user inputs. Using our model and dataset, we propose RClicks benchmark for a comprehensive comparison of existing interactive segmentation methods on realistic clicks. Specifically, we evaluate not only the average quality of methods, but also the robustness w.r.t. click patterns. According to our benchmark, in real-world usage interactive segmentation models may perform worse than it has been reported in the baseline benchmark, and most of the methods are not robust. We believe that RClicks is a significant step towards creating interactive segmentation methods that provide the best user experience in real-world cases.", "keywords": "interactive segmentation;benchmark;real user clicks", "primary_area": "", "supplementary_material": "", "author": "Anton Antonov;Andrey Moskalenko;Denis Shepelev;Alexander Krapukhin;Konstantin Soshin;Anton Konushin;Vlad Shakhuro", "authorids": "~Anton_Antonov1;~Andrey_Moskalenko1;~Denis_Shepelev1;~Alexander_Krapukhin1;~Konstantin_Soshin1;~Anton_Konushin1;~Vlad_Shakhuro1", "gender": "M;M;M;M;M;M;", "homepage": ";;;;;;", "dblp": "30/5061;291/4062;368/5241;;;69/4994-1;223/2243.html", "google_scholar": "https://scholar.google.com/citations?hl=en;_13jza8AAAAJ;IUkFdpUAAAAJ;GAS_EXwAAAAJ;XWKhjF4AAAAJ;ZT_k-wMAAAAJ;https://scholar.google.ru/citations?user=MuPLmJsAAAAJ", "orcid": ";0000-0003-4965-0867;0000-0002-9170-3064;;;0000-0002-6152-0021;", "linkedin": ";;;https://linkedin.com/in/akrapukhin;;;", "or_profile": "~Anton_Antonov1;~Andrey_Moskalenko1;~Denis_Shepelev1;~Alexander_Krapukhin1;~Konstantin_Soshin1;~Anton_Konushin1;~Vlad_Shakhuro1", "aff": "Samsung Research;Lomonosov Moscow State University;Samsung Research;Artificial Intelligence Research Institute;Samsung;Samsung;Samsung", "aff_domain": "samsung.com;msu.ru;samsung.com;airi.net;samsung.com;samsung.com;samsung.com", "position": "Researcher;PhD student;Leading Engineer;Researcher;Researcher;Principal Researcher;Senior researcher", "bibtex": "@inproceedings{\nantonov2024rclicks,\ntitle={{RC}licks: Realistic Click Simulation for Benchmarking Interactive Segmentation},\nauthor={Anton Antonov and Andrey Moskalenko and Denis Shepelev and Alexander Krapukhin and Konstantin Soshin and Anton Konushin and Vlad Shakhuro},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=mK1Utjiy8z}\n}", "github": "", "reviewers": "yi3z;QS5p;1xhL", "pdf_size": 41555776, "rating": "6;6;8", "confidence": "3;3;5", "wc_summary_and_contributions": "34;49;55", "wc_strengths": "43;62;146", "wc_improvement": "146;73;87", "wc_limitations": "6;73;1", "wc_correctness": "1;73;1", "wc_clarity": "1;5;1", "wc_relation_to_prior_work": "1;1;1", "wc_documentation": "1;1;1", "wc_additional_feedback": "1;1;1", "wc_review": "234;338;294", "wc_reply_reviewers": "0;0;55", "wc_reply_authors": "0;0;92", "reply_reviewers": "0;0;1", "reply_authors": "1;1;2", "rating_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "wc_summary_and_contributions_avg": [ 46.0, 8.831760866327848 ], "wc_strengths_avg": [ 83.66666666666667, 44.75364665464579 ], "wc_improvement_avg": [ 102.0, 31.63331577098213 ], "wc_limitations_avg": [ 26.666666666666668, 32.82614134429381 ], "wc_correctness_avg": [ 25.0, 33.94112549695428 ], "wc_clarity_avg": [ 2.3333333333333335, 1.8856180831641267 ], "wc_relation_to_prior_work_avg": [ 1.0, 0.0 ], "wc_documentation_avg": [ 1.0, 0.0 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 288.6666666666667, 42.62497963505542 ], "wc_reply_reviewers_avg": [ 18.333333333333332, 25.927248643506744 ], "wc_reply_authors_avg": [ 30.666666666666668, 43.36921591277491 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:4ZW2A3l4XNIJ:scholar.google.com/&scioq=RClicks:+Realistic+Click+Simulation+for+Benchmarking+Interactive+Segmentation&hl=en&as_sdt=0,5", "gs_version_total": 5, "email": "samsung.com;msu.ru;samsung.com;airi.net;samsung.com;samsung.com;samsung.com", "author_num": 7, "aff_unique_index": "0;1;0;2;0;0;0", "aff_unique_norm": "Samsung;Lomonosov Moscow State University;Artificial Intelligence Research Institute", "aff_unique_dep": "Samsung Research;;", "aff_unique_url": "https://research.samsung.com;https://www.msu.ru;", "aff_unique_abbr": "Samsung;MSU;", "aff_campus_unique_index": "1", "aff_campus_unique": ";Moscow", "aff_country_unique_index": "0;1;0;2;0;0;0", "aff_country_unique": "South Korea;Russian Federation;United States" }, { "title": "Can Large Language Models Analyze Graphs like Professionals? A Benchmark, Datasets and Models", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97519", "id": "mMnL0n7Cwy", "proceeding": "", "pdf": "https://openreview.net/pdf?id=mMnL0n7Cwy", "openreview": "https://openreview.net/forum?id=mMnL0n7Cwy", "poster": "/media/PosterPDFs/NeurIPS%202024/97519.png?t=1733138936.7338872", "project": "", "author_site": "Xin Li, Weize Chen, Qizhi Chu, Haopeng Li, Zhaojun Sun, Ran Li, Chen Qian, Yiwei Wei, Chuan Shi, Zhiyuan Liu, Maosong Sun, Cheng Yang", "tldr": "", "abstract": "The need to analyze graphs is ubiquitous across various fields, from social networks to biological research and recommendation systems. Therefore, enabling the ability of large language models (LLMs) to process graphs is an important step toward more advanced general intelligence. However, current LLM benchmarks on graph analysis require models to directly reason over the prompts describing graph\ntopology, and are thus limited to small graphs with only a few dozens of nodes. In contrast, human experts typically write programs based on popular libraries for task solving, and can thus handle graphs with different scales. To this end, a question naturally arises: can LLMs analyze graphs like professionals? In this paper, we introduce ProGraph, a manually crafted benchmark containing 3 categories of graph tasks. The benchmark expects solutions based on programming instead of directly reasoning over raw inputs. Our findings reveal that the performance of current LLMs is unsatisfactory, with the best model achieving only 36% accuracy. To bridge this gap, we propose LLM4Graph datasets, which include crawled documents and auto-generated codes based on 6 widely used graph libraries. By augmenting closed-source LLMs with document retrieval and fine-tuning open-source ones on the codes, we show 11-32% absolute improvements in their accuracies. Our results underscore that the capabilities of LLMs in handling structured data are still under-explored, and show the effectiveness of LLM4Graph in enhancing LLMs\u2019 proficiency of graph analysis. The benchmark, datasets and enhanced open-source\nmodels are available at https://github.com/BUPT-GAMMA/ProGraph.", "keywords": "large language model;graph analysis;graph reasoning", "primary_area": "", "supplementary_material": "/attachment/b4536d43354e8c7d18394305df5bc8f16b632c16.zip", "author": "Xin Li;Weize Chen;Qizhi Chu;Haopeng Li;Zhaojun Sun;Ran Li;Chen Qian;Yiwei Wei;Chuan Shi;Zhiyuan Liu;Maosong Sun;Cheng Yang", "authorids": "~Xin_Li81;~Weize_Chen1;~Qizhi_Chu1;~Haopeng_Li2;~Zhaojun_Sun1;~Ran_Li6;~Chen_Qian8;~Yiwei_Wei1;~Chuan_Shi1;~Zhiyuan_Liu1;~Maosong_Sun1;~Cheng_Yang6", "gender": "M;M;;M;M;M;;M;M;M;M;M", "homepage": "http://lixin4sky.github.io;https://chenweize1998.github.io/;;https://github.com/hp-l33;https://github.com/UNAOUN;https://asl-r.github.io/;;;http://www.shichuan.org/;http://nlp.csai.tsinghua.edu.cn/~lzy;https://www.cs.tsinghua.edu.cn/csen/info/1312/4394.htm;https://albertyang33.github.io/", "dblp": ";245/7488;;;;;;;64/3041-1;53/3245-1;95/3291-1;49/1457-2", "google_scholar": "pHPTHHwAAAAJ;0CoGHtIAAAAJ;;;;https://scholar.google.com/citations?hl=en;;ZD3JBK0AAAAJ;tUq_v90AAAAJ;dT0v5u0AAAAJ;https://scholar.google.com.tw/citations?user=zIgT0HMAAAAJ;OlLjVUcAAAAJ", "orcid": ";;;;;;;;0000-0002-3734-0266;0000-0002-7709-2543;;0000-0001-7821-0030", "linkedin": ";;;;;;;;;;;", "or_profile": "~Xin_Li81;~Weize_Chen1;~Qizhi_Chu1;~Haopeng_Li2;~Zhaojun_Sun1;~Ran_Li6;~Chen_Qian8;~Yiwei_Wei1;~Chuan_Shi1;~Zhiyuan_Liu1;~Maosong_Sun1;~Cheng_Yang6", "aff": "China University of Petroleum - Beijing at Karamay;Tsinghua University;;Huawei Technologies Ltd.;Beijing University of Posts and Telecommunications;Tsinghua University;;Tianjin University;Beijing University of Post and Telecommunication;Tsinghua University;Tsinghua University;Beijing University of Posts and Telecommunications", "aff_domain": "cupk.edu.cn;tsinghua.edu.cn;;huawei.com;bupt.edu.cn;tsinghua.edu.cn;;tju.edu.cn;bupt.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;bupt.edu.cn", "position": "Undergrad student;PhD student;;Intern;Undergrad student;Research Assistant;;PhD student;Full Professor;Associate Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nli2024can,\ntitle={Can Large Language Models Analyze Graphs like Professionals? A Benchmark, Datasets and Models},\nauthor={Xin Li and Weize Chen and Qizhi Chu and Haopeng Li and Zhaojun Sun and Ran Li and Chen Qian and Yiwei Wei and Chuan Shi and Zhiyuan Liu and Maosong Sun and Cheng Yang},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=mMnL0n7Cwy}\n}", "github": "", "reviewers": "r4SH;LfTu;vPYW", "pdf_size": 1059019, "rating": "6;6;7", "confidence": "3;4;3", "wc_summary_and_contributions": "74;151;53", "wc_strengths": "104;4;48", "wc_improvement": "243;1;302", "wc_limitations": "1;2;28", "wc_correctness": "1;13;13", "wc_clarity": "1;1;16", "wc_relation_to_prior_work": "1;15;12", "wc_documentation": "1;1;16", "wc_additional_feedback": "1;1;1", "wc_review": "427;189;489", "wc_reply_reviewers": "0;371;192", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;3;2", "reply_authors": "2;6;4", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 92.66666666666667, 42.129430198958175 ], "wc_strengths_avg": [ 52.0, 40.92269134192748 ], "wc_improvement_avg": [ 182.0, 130.23312430663202 ], "wc_limitations_avg": [ 10.333333333333334, 12.498888839501783 ], "wc_correctness_avg": [ 9.0, 5.656854249492381 ], "wc_clarity_avg": [ 6.0, 7.0710678118654755 ], "wc_relation_to_prior_work_avg": [ 9.333333333333334, 6.018490028422597 ], "wc_documentation_avg": [ 6.0, 7.0710678118654755 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 368.3333333333333, 129.3092761130805 ], "wc_reply_reviewers_avg": [ 187.66666666666666, 151.4911071830804 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.6666666666666667, 1.247219128924647 ], "reply_authors_avg": [ 4.0, 1.632993161855452 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15809926069109984052&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "cupk.edu.cn;tsinghua.edu.cn;;huawei.com;bupt.edu.cn;tsinghua.edu.cn;;tju.edu.cn;bupt.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;bupt.edu.cn", "author_num": 12, "aff_unique_index": "0;1;2;3;1;4;3;1;1;3", "aff_unique_norm": "China University of Petroleum;Tsinghua University;Huawei;Beijing University of Posts and Telecommunications;Tianjin University", "aff_unique_dep": ";;Huawei Technologies;;", "aff_unique_url": "http://www.cup.edu.cn;https://www.tsinghua.edu.cn;https://www.huawei.com;http://www.bupt.edu.cn/;http://www.tju.edu.cn", "aff_unique_abbr": "CUP;THU;Huawei;BUPT;TJU", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Beijing;", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Quality-Improved and Property-Preserved Polarimetric Imaging via Complementarily Fusing", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93761", "id": "mOK4yD8JFd", "proceeding": "", "pdf": "https://openreview.net/pdf?id=mOK4yD8JFd", "openreview": "https://openreview.net/forum?id=mOK4yD8JFd", "poster": "/media/PosterPDFs/NeurIPS%202024/93761.png?t=1729478216.6570492", "project": "", "author_site": "Chu Zhou, Yixing Liu, Chao Xu, Boxin Shi", "tldr": "", "abstract": "Polarimetric imaging is a challenging problem in the field of polarization-based vision, since setting a short exposure time reduces the signal-to-noise ratio, making the degree of polarization (DoP) and the angle of polarization (AoP) severely degenerated, while if setting a relatively long exposure time, the DoP and AoP would tend to be over-smoothed due to the frequently-occurring motion blur. This work proposes a polarimetric imaging framework that can produce clean and clear polarized snapshots by complementarily fusing a degraded pair of noisy and blurry ones. By adopting a neural network-based three-phase fusing scheme with specially-designed modules tailored to each phase, our framework can not only improve the image quality but also preserve the polarization properties. Experimental results show that our framework achieves state-of-the-art performance.", "keywords": "Polarimetric Imaging;Exposure fusion;Deep Learning", "primary_area": "machine_vision", "supplementary_material": "/attachment/f182b64be525a6ba17ceab047d6561795d8d167b.zip", "author": "Chu Zhou;Yixing Liu;Chao Xu;Boxin Shi", "authorids": "~Chu_Zhou1;~Yixing_Liu7;~Chao_Xu1;~Boxin_Shi3", "gender": "M;M;M;M", "homepage": "https://fourson.github.io/;http://www.cis.pku.edu.cn/faculty/vision/xuchao/xuchao01.htm;http://camera.pku.edu.cn;https://github.com/LuigiNixy", "dblp": "193/1769;;69/783;", "google_scholar": "ud8V49kAAAAJ;https://scholar.google.co.uk/citations?hl=zh-CN;K1LjZxcAAAAJ;", "orcid": ";;0000-0001-6749-0364;", "linkedin": ";;;", "or_profile": "~Chu_Zhou1;~Chao_Xu1;~Boxin_Shi3;~YIXING_LIU6", "aff": "Peking University;Peking University;Peking University;Peking University", "aff_domain": "pku.edu.cn;pku.edu;pku.edu.cn;stu.pku.edu.cn", "position": "PhD student;Full Professor;Assistant Professor;Undergrad student", "bibtex": "@inproceedings{\nzhou2024qualityimproved,\ntitle={Quality-Improved and Property-Preserved Polarimetric Imaging via Complementarily Fusing},\nauthor={Chu Zhou and Yixing Liu and Chao Xu and Boxin Shi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=mOK4yD8JFd}\n}", "github": "", "reviewers": "WQ4j;4C9V;SSTW", "pdf_size": 13737474, "rating": "6;6;8", "confidence": "4;4;4", "soundness": "3;4;3", "novelty": "3;3;3", "presentation": "4;4;3", "wc_summary": "37;76;56", "wc_strengths": "27;71;155", "wc_weaknesses": "20;31;61", "wc_questions": "172;57;47", "wc_limitations": "7;1;7", "wc_review": "263;236;326", "wc_reply_reviewers": "9;31;29", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 56.333333333333336, 15.923427883328248 ], "wc_strengths_avg": [ 84.33333333333333, 53.099487337973 ], "wc_weaknesses_avg": [ 37.333333333333336, 17.326921891156037 ], "wc_questions_avg": [ 92.0, 56.71566509057852 ], "wc_limitations_avg": [ 5.0, 2.8284271247461903 ], "wc_review_avg": [ 275.0, 37.70941526992961 ], "wc_reply_reviewers_avg": [ 23.0, 9.93310961716756 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:6vHHNPCSXVIJ:scholar.google.com/&scioq=Quality-Improved+and+Property-Preserved+Polarimetric+Imaging+via+Complementarily+Fusing&hl=en&as_sdt=0,48", "gs_version_total": 0, "email": "pku.edu.cn;pku.edu;pku.edu.cn;stu.pku.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "A Simple yet Scalable Granger Causal Structural Learning Approach for Topological Event Sequences", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93760", "id": "mP084aMFsd", "proceeding": "", "pdf": "https://openreview.net/pdf?id=mP084aMFsd", "openreview": "https://openreview.net/forum?id=mP084aMFsd", "poster": "/media/PosterPDFs/NeurIPS%202024/93760.png?t=1731463396.9123807", "project": "", "author_site": "Mingjia Li, Shuo Liu, Hong Qian, Aimin Zhou", "tldr": "", "abstract": "In modern telecommunication networks, faults manifest as alarms, generating thousands of events daily. Network operators need an efficient method to identify the root causes of these alarms to mitigate potential losses. This task is challenging due to the increasing scale of telecommunication networks and the interconnected nature of devices, where one fault can trigger a cascade of alarms across multiple devices within a topological network. Recent years have seen a growing focus on causal approaches to addressing this problem, emphasizing the importance of learning a Granger causal graph from topological event sequences. Such causal graphs delineate the relations among alarms and can significantly aid engineers in identifying and rectifying faults. However, existing methods either ignore the topological relationships among devices or suffer from relatively low scalability and efficiency, failing to deliver high-quality responses in a timely manner. To this end, this paper proposes $S^2GCSL$, a simple yet scalable Granger causal structural learning approach for topological event sequences. $S^2GCSL$ utilizes a linear kernel to model activation interactions among various event types within a topological network, and employs gradient descent to efficiently optimize the likelihood function. Notably, it can seamlessly incorporate expert knowledge as constraints within the optimization process, which enhances the interpretability of the outcomes. Extensive experimental results on both large-scale synthetic and real-world problems verify the scalability and efficacy of $S^2GCSL$.", "keywords": "Telecommunication Network Fault Diagnosis;Topological Hawkes Processes;Causal structure learning;Event Sequences;Scalability", "primary_area": "causal_inference", "supplementary_material": "", "author": "Mingjia Li;Shuo Liu;Hong Qian;Aimin Zhou", "authorids": "~Mingjia_Li3;~Shuo_Liu7;~Hong_Qian1;~Aimin_Zhou1", "gender": "M;M;;", "homepage": ";https://scholar.google.com/citations?user=sRoqbLwAAAAJ&hl=en;;", "dblp": ";07/6773-17;83/1352;", "google_scholar": ";sRoqbLwAAAAJ;https://scholar.google.com.hk/citations?user=hLjvz-EAAAAJ;", "orcid": "0000-0001-6274-0926;0000-0001-7970-3187;0000-0003-2170-5264;", "linkedin": ";;;", "or_profile": "~Mingjia_Li3;~Shuo_Liu7;~Hong_Qian1;~Aimin_Zhou1", "aff": "East China Normal University;East China Normal University;East China Normal University;", "aff_domain": "ecnu.edu.cn;ecnu.edu.cn;ecnu.edu.cn;", "position": "PhD student;MS student;Associate Professor;", "bibtex": "@inproceedings{\nli2024a,\ntitle={A Simple yet Scalable Granger Causal Structural Learning Approach for Topological Event Sequences},\nauthor={Mingjia Li and Shuo Liu and Hong Qian and Aimin Zhou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=mP084aMFsd}\n}", "github": "", "reviewers": "wGGW;6ZcE;Dc6x;B6jL", "pdf_size": 574024, "rating": "7;7;7;8", "confidence": "4;4;4;5", "soundness": "4;4;4;4", "novelty": "3;3;4;4", "presentation": "3;3;3;4", "wc_summary": "131;99;81;68", "wc_strengths": "108;158;174;182", "wc_weaknesses": "131;163;185;196", "wc_questions": "63;138;103;123", "wc_limitations": "1;1;1;1", "wc_review": "434;559;544;570", "wc_reply_reviewers": "12;14;17;0", "wc_reply_authors": "52;52;52;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 4.0, 0.0 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 94.75, 23.64714570513744 ], "wc_strengths_avg": [ 155.5, 28.75326068465975 ], "wc_weaknesses_avg": [ 168.75, 24.823124299733102 ], "wc_questions_avg": [ 106.75, 28.145825622994256 ], "wc_limitations_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 526.75, 54.338637266681616 ], "wc_reply_reviewers_avg": [ 10.75, 6.456585785072479 ], "wc_reply_authors_avg": [ 39.0, 22.516660498395403 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:DU_KT5S6MRsJ:scholar.google.com/&scioq=A+Simple+yet+Scalable+Granger+Causal+Structural+Learning+Approach+for+Topological+Event+Sequences&hl=en&as_sdt=0,14", "gs_version_total": 2, "email": "ecnu.edu.cn;ecnu.edu.cn;ecnu.edu.cn;", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "East China Normal University", "aff_unique_dep": "", "aff_unique_url": "http://www.ecnu.edu.cn", "aff_unique_abbr": "ECNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "AutoGuide: Automated Generation and Selection of Context-Aware Guidelines for Large Language Model Agents", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93759", "id": "mRIQz8Zd6O", "proceeding": "", "pdf": "https://openreview.net/pdf?id=mRIQz8Zd6O", "openreview": "https://openreview.net/forum?id=mRIQz8Zd6O", "poster": "/media/PosterPDFs/NeurIPS%202024/93759.png?t=1731710807.5168464", "project": "", "author_site": "Yao Fu, Dong-Ki Kim, Jaekyeom Kim, Sungryull Sohn, Lajanugen Logeswaran, Kyunghoon Bae, Honglak Lee", "tldr": "", "abstract": "Recent advances in large language models (LLMs) have empowered AI agents capable of performing various sequential decision-making tasks. However, effectively guiding LLMs to perform well in unfamiliar domains like web navigation, where they lack sufficient knowledge, has proven to be difficult with the demonstration-based in-context learning paradigm. In this paper, we introduce a novel framework, called AutoGuide, which addresses this limitation by automatically generating context-aware guidelines from offline experiences. Importantly, each context-aware guideline is expressed in concise natural language and follows a conditional structure, clearly describing the context where it is applicable. As a result, our guidelines facilitate the provision of relevant knowledge for the agent's current decision-making process, overcoming the limitations of the conventional demonstration-based learning paradigm. Our evaluation demonstrates that AutoGuide significantly outperforms competitive baselines in complex benchmark domains, including real-world web navigation.", "keywords": "large language model agents;sequential decision-making", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Yao Fu;Dong-Ki Kim;Jaekyeom Kim;Sungryull Sohn;Lajanugen Logeswaran;Kyunghoon Bae;Honglak Lee", "authorids": "~Yao_Fu5;~Dong-Ki_Kim1;~Jaekyeom_Kim1;~Sungryull_Sohn1;~Lajanugen_Logeswaran1;~Kyunghoon_Bae2;~Honglak_Lee2", "gender": "F;;M;M;M;M;M", "homepage": "https://www.google.com/?client=safari;https://dkkim93.github.io/;https://jaekyeom.github.io/;;https://sites.google.com/umich.edu/llajan/;https://www.lgresear.ai;http://web.eecs.umich.edu/~honglak", "dblp": ";199/2089;228/6696;172/9884;157/3603;276/0021.html;58/2562", "google_scholar": ";https://scholar.google.com/citations?hl=en;8PR-AaoAAAAJ;https://scholar.google.com/citations?hl=en;dcv4kpIAAAAJ;;fmSHtE8AAAAJ", "orcid": ";;;;;;", "linkedin": ";;jaekyeom-kim-14157428;;;;", "or_profile": "~Yao_Fu5;~Dong-Ki_Kim1;~Jaekyeom_Kim1;~Sungryull_Sohn1;~Lajanugen_Logeswaran1;~Kyunghoon_Bae2;~Honglak_Lee1", "aff": "University of Michigan - Ann Arbor;LG AI Research;LG AI Research;LG AI Research;LG AI Research;LG AI Research;LG AI Research", "aff_domain": "umich.edu;lgresearch.ai;lgresearch.ai;lgresearch.ai;lgresearch.ai;lgresearch.ai;lgresearch.ai", "position": "PhD student;Research Scientist;Researcher;Researcher;Researcher;Principal Researcher;Chief Scientist", "bibtex": "@inproceedings{\nfu2024autoguide,\ntitle={AutoGuide: Automated Generation and Selection of Context-Aware Guidelines for Large Language Model Agents},\nauthor={Yao Fu and Dong-Ki Kim and Jaekyeom Kim and Sungryull Sohn and Lajanugen Logeswaran and Kyunghoon Bae and Honglak Lee},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=mRIQz8Zd6O}\n}", "github": "", "reviewers": "rEoJ;zgYy;wFet", "pdf_size": 23425144, "rating": "6;6;7", "confidence": "3;4;4", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "95;92;29", "wc_strengths": "47;93;17", "wc_weaknesses": "59;51;166", "wc_questions": "12;1;10", "wc_limitations": "7;1;2", "wc_review": "220;238;224", "wc_reply_reviewers": "14;0;58", "wc_reply_authors": "18;0;192", "reply_reviewers": "1;0;2", "reply_authors": "2;1;3", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 72.0, 30.430248109405877 ], "wc_strengths_avg": [ 52.333333333333336, 31.25522178594945 ], "wc_weaknesses_avg": [ 92.0, 52.42772803266099 ], "wc_questions_avg": [ 7.666666666666667, 4.784233364802441 ], "wc_limitations_avg": [ 3.3333333333333335, 2.6246692913372702 ], "wc_review_avg": [ 227.33333333333334, 7.71722460186015 ], "wc_reply_reviewers_avg": [ 24.0, 24.711670657134185 ], "wc_reply_authors_avg": [ 70.0, 86.57944328765345 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10527528978060783802&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "umich.edu;lgresearch.ai;lgresearch.ai;lgresearch.ai;lgresearch.ai;lgresearch.ai;lgresearch.ai", "author_num": 7, "aff_unique_index": "0;1;1;1;1;1;1", "aff_unique_norm": "University of Michigan;LG", "aff_unique_dep": ";LG AI Research", "aff_unique_url": "https://www.umich.edu;https://www.lgaires.com", "aff_unique_abbr": "UM;LG AI", "aff_campus_unique_index": "0", "aff_campus_unique": "Ann Arbor;", "aff_country_unique_index": "0;1;1;1;1;1;1", "aff_country_unique": "United States;South Korea" }, { "title": "Improving the Training of Rectified Flows", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93758", "id": "mSHs6C7Nfa", "proceeding": "", "pdf": "https://openreview.net/pdf?id=mSHs6C7Nfa", "openreview": "https://openreview.net/forum?id=mSHs6C7Nfa", "poster": "/media/PosterPDFs/NeurIPS%202024/93758.png?t=1733058376.2914405", "project": "", "author_site": "Sangyun Lee, Zinan Lin, Giulia Fanti", "tldr": "", "abstract": "Diffusion models have shown great promise for image and video generation, but sampling from state-of-the-art models requires expensive numerical integration of a generative ODE.\n One approach for tackling this problem is rectified flows, which iteratively learn smooth ODE paths that are less susceptible to truncation error.\n However, rectified flows still require a relatively large number of function evaluations (NFEs).\n In this work, we propose improved techniques for training rectified flows, allowing them to compete with knowledge distillation methods even in the low NFE setting.\n Our main insight is that under realistic settings, a single iteration of the Reflow algorithm for training rectified flows is sufficient to learn nearly straight trajectories; hence, the current practice of using multiple Reflow iterations is unnecessary.\n We thus propose techniques to improve one-round training of rectified flows, including a U-shaped timestep distribution and LPIPS-Huber premetric.\n With these techniques, we improve the FID of the previous 2-rectified flow by up to 75\\% in the 1 NFE setting on CIFAR-10.\n On ImageNet 64$\\times$64, our improved rectified flow outperforms the state-of-the-art distillation methods\n such as consistency distillation and progressive distillation in both one-step and two-step settings and rivals the performance of improved consistency training (iCT) in FID.\n Code is available at https://github.com/sangyun884/rfpp.", "keywords": "generative modeling;rectified flow;diffusion model", "primary_area": "generative_models", "supplementary_material": "", "author": "Sangyun Lee;Zinan Lin;Giulia Fanti", "authorids": "~Sangyun_Lee1;~Zinan_Lin1;~Giulia_Fanti1", "gender": ";M;", "homepage": "https://sangyun884.github.io/about/;https://zinanlin.me/;https://www.andrew.cmu.edu/user/gfanti/", "dblp": "87/8208;64/237-1;141/9910", "google_scholar": "CGFkx-IAAAAJ;67nE-wQ_g_cC;Rn_BmTYAAAAJ", "orcid": ";;0000-0002-7671-2624", "linkedin": ";;", "or_profile": "~Sangyun_Lee1;~Zinan_Lin1;~Giulia_Fanti1", "aff": "Carnegie Mellon University;Microsoft;Carnegie Mellon University", "aff_domain": "andrew.cmu.edu;microsoft.com;andrew.cmu.edu", "position": "PhD student;Senior Researcher;Assistant Professor", "bibtex": "@inproceedings{\nlee2024improving,\ntitle={Improving the Training of Rectified Flows},\nauthor={Sangyun Lee and Zinan Lin and Giulia Fanti},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=mSHs6C7Nfa}\n}", "github": "", "reviewers": "eZ8c;aFYc;nW7F;Wooo", "pdf_size": 24729944, "rating": "4;6;6;10", "confidence": "4;5;4;4", "soundness": "2;3;2;4", "novelty": "2;2;2;4", "presentation": "2;3;3;4", "wc_summary": "24;141;56;182", "wc_strengths": "77;58;35;53", "wc_weaknesses": "125;621;169;31", "wc_questions": "2;215;1;76", "wc_limitations": "6;7;5;20", "wc_review": "234;1042;266;362", "wc_reply_reviewers": "0;249;229;20", "wc_reply_authors": "23;230;404;0", "reply_reviewers": "0;1;2;1", "reply_authors": "2;3;3;1", "rating_avg": [ 6.5, 2.179449471770337 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 100.75, 63.4719426203421 ], "wc_strengths_avg": [ 55.75, 14.956186011146023 ], "wc_weaknesses_avg": [ 236.5, 227.51868055172963 ], "wc_questions_avg": [ 73.5, 87.17367721967452 ], "wc_limitations_avg": [ 9.5, 6.103277807866851 ], "wc_review_avg": [ 476.0, 330.1575381541364 ], "wc_reply_reviewers_avg": [ 124.5, 114.93585167387937 ], "wc_reply_authors_avg": [ 164.25, 164.87324676854035 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.13245323570650439, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10582072522373375293&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "andrew.cmu.edu;microsoft.com;andrew.cmu.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Carnegie Mellon University;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.cmu.edu;https://www.microsoft.com", "aff_unique_abbr": "CMU;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "SeeA*: Efficient Exploration-Enhanced A* Search by Selective Sampling", "status": "Oral", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93757", "id": "mSaqxZVZW8", "proceeding": "", "pdf": "https://openreview.net/pdf?id=mSaqxZVZW8", "openreview": "https://openreview.net/forum?id=mSaqxZVZW8", "poster": "/media/PosterPDFs/NeurIPS%202024/93757.png?t=1731295927.5659387", "project": "", "author_site": "Dengwei Zhao, Shikui Tu, Lei Xu", "tldr": "", "abstract": "Monte-Carlo tree search (MCTS) and reinforcement learning contributed crucially to the success of AlphaGo and AlphaZero, and A$^*$ is a tree search algorithm among the most well-known ones in the classical AI literature. MCTS and A$^*$ both perform heuristic search and are mutually beneficial. Efforts have been made to the renaissance of A$^*$ from three possible aspects, two of which have been confirmed by studies in recent years, while the third is about the OPEN list that consists of open nodes of A$^*$ search, but still lacks deep investigation. This paper aims at the third, i.e., developing the Sampling-exploration enhanced A$^*$ (SeeA$^*$) search by constructing a dynamic subset of OPEN through a selective sampling process, such that the node with the best heuristic value in this subset instead of in the OPEN is expanded. Nodes with the best heuristic values in OPEN are most probably picked into this subset, but sometimes may not be included, which enables SeeA$^*$ to explore other promising branches. Three sampling techniques are presented for comparative investigations. Moreover, under the assumption about the distribution of prediction errors, we have theoretically shown the superior efficiency of SeeA$^*$ over A$^*$ search, particularly when the accuracy of the guiding heuristic function is insufficient. Experimental results on retrosynthetic planning in organic chemistry, logic synthesis in integrated circuit design, and the classical Sokoban game empirically demonstrate the efficiency of SeeA$^*$, in comparison with the state-of-the-art heuristic search algorithms.", "keywords": "search algorithm;reinforcement learning;exploration", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/c251dd9ab9e8bc48d54131bc46852e44a599927d.zip", "author": "Dengwei Zhao;Shikui Tu;Lei Xu", "authorids": "~Dengwei_Zhao1;~Shikui_Tu1;~Lei_Xu7", "gender": "M;M;M", "homepage": "http://cmach.sjtu.edu.cn/;http://www.cs.sjtu.edu.cn/~tushikui;http://www.cse.cuhk.edu.hk/~lxu/", "dblp": "323/9550;04/115;19/360-1", "google_scholar": "Va0JP5EAAAAJ;Yewd61kAAAAJ;rN2ny9kAAAAJ", "orcid": "0000-0003-4764-2759;0000-0001-6270-0449;0000-0002-2752-1573", "linkedin": ";;", "or_profile": "~Dengwei_Zhao1;~Shikui_Tu1;~Lei_Xu7", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "position": "PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nzhao2024seea,\ntitle={SeeA*: Efficient Exploration-Enhanced A* Search by Selective Sampling},\nauthor={Dengwei Zhao and Shikui Tu and Lei Xu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=mSaqxZVZW8}\n}", "github": "", "reviewers": "BBaT;KUFf;8E9W", "pdf_size": 4394855, "rating": "6;7;7", "confidence": "4;3;4", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;2;4", "wc_summary": "80;80;61", "wc_strengths": "42;35;64", "wc_weaknesses": "33;60;41", "wc_questions": "107;244;343", "wc_limitations": "39;18;65", "wc_review": "301;437;574", "wc_reply_reviewers": "0;14;57", "wc_reply_authors": "0;702;262", "reply_reviewers": "0;1;1", "reply_authors": "1;3;3", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 73.66666666666667, 8.956685895029603 ], "wc_strengths_avg": [ 47.0, 12.355835328567093 ], "wc_weaknesses_avg": [ 44.666666666666664, 11.323525167642018 ], "wc_questions_avg": [ 231.33333333333334, 96.76202193468721 ], "wc_limitations_avg": [ 40.666666666666664, 19.223827807061618 ], "wc_review_avg": [ 437.3333333333333, 111.4520325321865 ], "wc_reply_reviewers_avg": [ 23.666666666666668, 24.253293018108327 ], "wc_reply_authors_avg": [ 321.3333333333333, 289.6449934354506 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.9428090415820634 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ENTohZahLCwJ:scholar.google.com/&scioq=SeeA*:+Efficient+Exploration-Enhanced+A*+Search+by+Selective+Sampling&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "LiteVAE: Lightweight and Efficient Variational Autoencoders for Latent Diffusion Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93756", "id": "mTAbl8kUzq", "proceeding": "", "pdf": "https://openreview.net/pdf?id=mTAbl8kUzq", "openreview": "https://openreview.net/forum?id=mTAbl8kUzq", "poster": "/media/PosterPDFs/NeurIPS%202024/93756.png?t=1733705702.5542848", "project": "", "author_site": "Seyedmorteza Sadat, Jakob Buhmann, Derek Bradley, Otmar Hilliges, Romann M Weber", "tldr": "", "abstract": "Advances in latent diffusion models (LDMs) have revolutionized high-resolution image generation, but the design space of the autoencoder that is central to these systems remains underexplored. In this paper, we introduce LiteVAE, a new autoencoder design for LDMs, which leverages the 2D discrete wavelet transform to enhance scalability and computational efficiency over standard variational autoencoders (VAEs) with no sacrifice in output quality. We investigate the training methodologies and the decoder architecture of LiteVAE and propose several enhancements that improve the training dynamics and reconstruction quality. Our base LiteVAE model matches the quality of the established VAEs in current LDMs with a six-fold reduction in encoder parameters, leading to faster training and lower GPU memory requirements, while our larger model outperforms VAEs of comparable complexity across all evaluated metrics (rFID, LPIPS, PSNR, and SSIM).", "keywords": "variational autoencoders;latent diffusion models", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Seyedmorteza Sadat;Jakob Buhmann;Derek Bradley;Otmar Hilliges;Romann M. Weber", "authorids": "~Seyedmorteza_Sadat1;~Jakob_Buhmann1;~Derek_Bradley1;~Otmar_Hilliges1;~Romann_M._Weber1", "gender": ";M;M;M;M", "homepage": ";;http://zurich.disneyresearch.com/derekbradley/;https://ait.ethz.ch/people/hilliges/;", "dblp": ";;57/3202.html;82/2289;218/7180", "google_scholar": ";;gRPJPkkAAAAJ;-epU9OsAAAAJ;QoETwl4AAAAJ", "orcid": ";0009-0008-3038-4881;;0000-0002-5068-3474;0000-0003-1196-5425", "linkedin": ";jakob-buhmann-02aa41141/;;;romannmweber/", "or_profile": "~Seyedmorteza_Sadat1;~Jakob_Buhmann1;~Derek_Bradley1;~Otmar_Hilliges1;~Romann_M._Weber1", "aff": ";DisneyResearch|Studios;Disney Research, Disney;ETHZ - ETH Zurich;Disney Research, Disney", "aff_domain": ";disneyresearch.com;disneyresearch.com;ethz.ch;disneyresearch.com", "position": ";Researcher;Research Scientist;Full Professor;Research Scientist", "bibtex": "@inproceedings{\nsadat2024litevae,\ntitle={Lite{VAE}: Lightweight and Efficient Variational Autoencoders for Latent Diffusion Models},\nauthor={Seyedmorteza Sadat and Jakob Buhmann and Derek Bradley and Otmar Hilliges and Romann M. Weber},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=mTAbl8kUzq}\n}", "github": "", "reviewers": "86Z2;xykJ;rH5p;U437", "pdf_size": 25282030, "rating": "5;5;5;6", "confidence": "4;4;2;4", "soundness": "3;3;3;3", "novelty": "2;3;2;3", "presentation": "3;3;3;4", "wc_summary": "36;53;74;177", "wc_strengths": "38;71;92;219", "wc_weaknesses": "134;60;168;55", "wc_questions": "64;4;138;55", "wc_limitations": "14;1;5;19", "wc_review": "286;189;477;525", "wc_reply_reviewers": "111;0;219;66", "wc_reply_authors": "230;0;78;0", "reply_reviewers": "3;0;1;1", "reply_authors": "3;1;2;1", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 85.0, 54.79507277118993 ], "wc_strengths_avg": [ 105.0, 68.57477670397476 ], "wc_weaknesses_avg": [ 104.25, 48.30307961196677 ], "wc_questions_avg": [ 65.25, 47.82977629050757 ], "wc_limitations_avg": [ 9.75, 7.119515432949071 ], "wc_review_avg": [ 369.25, 137.1939776375042 ], "wc_reply_reviewers_avg": [ 99.0, 79.74020316001207 ], "wc_reply_authors_avg": [ 77.0, 93.89888178247918 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6054976759765328793&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": ";disneyresearch.com;disneyresearch.com;ethz.ch;disneyresearch.com", "author_num": 5, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Disney Research;ETH Zurich", "aff_unique_dep": "Studios;", "aff_unique_url": "https://www.disneyresearch.com;https://www.ethz.ch", "aff_unique_abbr": "Disney Research;ETHZ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;Switzerland" }, { "title": "Unified Mechanism-Specific Amplification by Subsampling and Group Privacy Amplification", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93755", "id": "mVfRrMfGdY", "proceeding": "", "pdf": "https://openreview.net/pdf?id=mVfRrMfGdY", "openreview": "https://openreview.net/forum?id=mVfRrMfGdY", "poster": "/media/PosterPDFs/NeurIPS%202024/93755.png?t=1732901539.190607", "project": "", "author_site": "Jan Schuchardt, Mihail Stoian, Arthur Kosmala, Stephan G\u00fcnnemann", "tldr": "", "abstract": "Amplification by subsampling is one of the main primitives in machine learning with differential privacy (DP): Training a model on random batches instead of complete datasets results in stronger privacy. This is traditionally formalized via mechanism-agnostic subsampling guarantees that express the privacy parameters of a subsampled mechanism as a function of the original mechanism's privacy parameters. We propose the first general framework for deriving mechanism-specific guarantees, which leverage additional information beyond these parameters to more tightly characterize the subsampled mechanism's privacy. Such guarantees are of particular importance for privacy accounting, i.e., tracking privacy over multiple iterations. Overall, our framework based on conditional optimal transport lets us derive existing and novel guarantees for approximate DP, accounting with Renyi DP, and accounting with dominating pairs in a unified, principled manner. As an application, we analyze how subsampling affects the privacy of groups of multiple users. Our tight mechanism-specific bounds outperform tight mechanism-agnostic bounds and classic group privacy results.", "keywords": "Differential Privacy;Privacy-Preserving Machine Learning;Amplification;Subsampling;Accounting", "primary_area": "privacy", "supplementary_material": "", "author": "Jan Schuchardt;Mihail Stoian;Arthur Kosmala;Stephan G\u00fcnnemann", "authorids": "~Jan_Schuchardt1;~Mihail_Stoian1;~Arthur_Kosmala1;~Stephan_G\u00fcnnemann1", "gender": ";M;;M", "homepage": "https://www.cs.cit.tum.de/daml/team/jan-schuchardt/;https://stoianmihail.github.io/;;http://www.daml.in.tum.de", "dblp": "241/5487;255/9132;;43/3011", "google_scholar": "O-cixlwAAAAJ;7PEMO6UAAAAJ;;", "orcid": ";0000-0002-8843-3374;;", "linkedin": ";mihail-stoian-77023119a/;arthur-kosmala-9219371b2/;", "or_profile": "~Jan_Schuchardt1;~Mihail_Stoian1;~Arthur_Kosmala1;~Stephan_G\u00fcnnemann1", "aff": "Morgan Stanley;University of Technology Nuremberg;Technische Universit\u00e4t M\u00fcnchen;Technical University Munich", "aff_domain": "morganstanley.com;utn.de;tum.de;tum.de", "position": "Intern;PhD student;PhD student;Professor", "bibtex": "@inproceedings{\nschuchardt2024unified,\ntitle={Unified Mechanism-Specific Amplification by Subsampling and Group Privacy Amplification},\nauthor={Jan Schuchardt and Mihail Stoian and Arthur Kosmala and Stephan G{\\\"u}nnemann},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=mVfRrMfGdY}\n}", "github": "", "reviewers": "t4hg;cbWU;T1S1;7Wbe;eK4M", "pdf_size": 1780957, "rating": "5;6;6;7;7", "confidence": "3;4;3;2;3", "soundness": "3;3;3;3;3", "novelty": "3;3;3;4;3", "presentation": "2;2;2;3;3", "wc_summary": "75;192;85;65;75", "wc_strengths": "47;126;24;46;28", "wc_weaknesses": "17;438;52;48;146", "wc_questions": "7;34;20;36;2", "wc_limitations": "1;54;3;69;1", "wc_review": "147;844;184;264;252", "wc_reply_reviewers": "0;17;9;21;10", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 98.4, 47.22541688540187 ], "wc_strengths_avg": [ 54.2, 37.07505900197598 ], "wc_weaknesses_avg": [ 140.2, 155.03341575286277 ], "wc_questions_avg": [ 19.8, 13.746272221951667 ], "wc_limitations_avg": [ 25.6, 29.702525145179155 ], "wc_review_avg": [ 338.2, 256.5559588082101 ], "wc_reply_reviewers_avg": [ 11.4, 7.227724399837061 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.42257712736425823, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8702628144278172591&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 2, "email": "morganstanley.com;utn.de;tum.de;tum.de", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Morgan Stanley;Nuremberg University of Technology;Technische Universit\u00e4t M\u00fcnchen;Technical University of Munich", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.morganstanley.com;https://www.tu-nuernberg.de;https://www.tum.de;https://www.tum.de", "aff_unique_abbr": "Morgan Stanley;TUN;TUM;TUM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "United States;Germany" }, { "title": "A Compositional Atlas for Algebraic Circuits", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93754", "id": "mXlR1FLFDc", "proceeding": "", "pdf": "https://openreview.net/pdf?id=mXlR1FLFDc", "openreview": "https://openreview.net/forum?id=mXlR1FLFDc", "poster": "/media/PosterPDFs/NeurIPS%202024/93754.png?t=1733528239.9061286", "project": "", "author_site": "Benjie Wang, Denis Mau\u00e1, Guy Van den Broeck, YooJung Choi", "tldr": "", "abstract": "Circuits based on sum-product structure have become a ubiquitous representation to compactly encode knowledge, from Boolean functions to probability distributions. By imposing constraints on the structure of such circuits, certain inference queries become tractable, such as model counting and most probable configuration. Recent works have explored analyzing probabilistic and causal inference queries\nas compositions of basic operators to derive tractability conditions. In this paper, we take an algebraic perspective for compositional inference, and show that a large class of queries\u2014including marginal MAP, probabilistic answer set programming inference, and causal backdoor adjustment\u2014correspond to a combination of basic operators over semirings: aggregation, product, and elementwise mapping. Using this framework, we uncover simple and general sufficient conditions for tractable composition of these operators, in terms of circuit properties (e.g., marginal determinism, compatibility) and conditions on the elementwise mappings. Applying our analysis, we derive novel tractability conditions for many such compositional queries. Our results unify tractability conditions for existing problems on circuits, while providing a blueprint for analysing novel compositional inference queries.", "keywords": "semiring;probabilistic circuits;logic circuits;probabilistic inference;algebraic", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Benjie Wang;Denis Mau\u00e1;Guy Van den Broeck;YooJung Choi", "authorids": "~Benjie_Wang1;~Denis_Mau\u00e11;~Guy_Van_den_Broeck1;~YooJung_Choi1", "gender": ";M;;M", "homepage": "https://web.cs.ucla.edu/~benjiewang/;http://www.ime.usp.br/~ddm;https://yoojungchoi.github.io/;http://web.cs.ucla.edu/~guyvdb/", "dblp": "255/7169;53/10263;204/2937;96/7521.html", "google_scholar": "https://scholar.google.co.uk/citations?user=tOeUlJoAAAAJ;https://scholar.google.com.br/citations?user=5JgMAPIAAAAJ;VCtamFwAAAAJ;d0KQ9z0AAAAJ", "orcid": ";0000-0003-2297-6349;;0000-0003-3434-2503", "linkedin": ";;;guyvdb", "or_profile": "~Benjie_Wang1;~Denis_Mau\u00e11;~YooJung_Choi1;~Guy_Van_den_Broek1", "aff": "University of California, Los Angeles;Universidade de S\u00e3o Paulo;Arizona State University;University of California, Los Angeles", "aff_domain": "ucla.edu;usp.br;asu.edu;ucla.edu", "position": "Postdoc;Associate Professor;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nwang2024a,\ntitle={A Compositional Atlas for Algebraic Circuits},\nauthor={Benjie Wang and Denis Mau{\\'a} and Guy Van den Broeck and YooJung Choi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=mXlR1FLFDc}\n}", "github": "", "reviewers": "41Ra;oPB2;x2Dq;oGJN", "pdf_size": 637968, "rating": "5;6;6;7", "confidence": "4;3;3;3", "soundness": "4;4;3;3", "novelty": "2;2;3;3", "presentation": "3;3;3;4", "wc_summary": "17;129;46;66", "wc_strengths": "26;35;22;195", "wc_weaknesses": "242;89;17;526", "wc_questions": "12;64;80;138", "wc_limitations": "1;11;1;11", "wc_review": "298;328;166;936", "wc_reply_reviewers": "156;18;0;22", "wc_reply_authors": "238;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 64.5, 41.11265012134343 ], "wc_strengths_avg": [ 69.5, 72.6102609828666 ], "wc_weaknesses_avg": [ 218.5, 195.2440780151859 ], "wc_questions_avg": [ 73.5, 44.93050188902857 ], "wc_limitations_avg": [ 6.0, 5.0 ], "wc_review_avg": [ 432.0, 297.29783046635237 ], "wc_reply_reviewers_avg": [ 49.0, 62.32976816898969 ], "wc_reply_authors_avg": [ 59.5, 103.05702305034819 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6036750614964392510&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 9, "email": "ucla.edu;usp.br;asu.edu;ucla.edu", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of California, Los Angeles;Universidade de S\u00e3o Paulo;Arizona State University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ucla.edu;https://www.usp.br;https://www.asu.edu", "aff_unique_abbr": "UCLA;USP;ASU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;Brazil" }, { "title": "SWE-agent: Agent-Computer Interfaces Enable Automated Software Engineering", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93753", "id": "mXpq6ut8J3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=mXpq6ut8J3", "openreview": "https://openreview.net/forum?id=mXpq6ut8J3", "poster": "", "project": "", "author_site": "John Yang, Carlos Jimenez, Alexander Wettig, Kilian Lieret, Shunyu Yao, Karthik Narasimhan, Ofir Press", "tldr": "", "abstract": "Language model agents are increasingly being used to automate complicated tasks in digital environments. Just as humans benefit from powerful software applications, such as integrated development environments, for complex tasks like software engineering, we posit that language model agents represent a new category of end users with their own needs and abilities, and would benefit from specially built interfaces to the software they use. We investigate how the role of interface design affects the performance of language model agents. As a result of this exploration, we introduce SWE-agent: a system that facilitates language model agents to autonomously use computers to solve software engineering tasks. SWE-agent's custom agent-computer interface significantly enhances an agent's ability to create and edit code files, navigate entire repositories, and execute tests and other programs. We evaluate SWE-agent on SWE-bench and HumanEvalFix, achieving state-of-the-art performance on both with a pass@1 rate of 12.5% and 87.7%, respectively, far exceeding the previous state-of-the-art achieved with non-interactive language models. Finally, we provide insight on how the design of the agent-computer interface can impact agents' behavior and performance.", "keywords": "Language models;Natural language processing;Software engineering", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/3dfcb29ed523e757fce7d25335349794e197bade.zip", "author": "John Yang;Carlos E Jimenez;Alexander Wettig;Kilian Lieret;Shunyu Yao;Karthik R Narasimhan;Ofir Press", "authorids": "~John_Yang3;~Carlos_E_Jimenez1;~Alexander_Wettig1;~Kilian_Lieret1;~Shunyu_Yao1;~Karthik_R_Narasimhan1;~Ofir_Press1", "gender": "M;M;;M;M;M;M", "homepage": "https://john-b-yang.github.io/;https://www.carlosejimenez.com;https://www.cs.princeton.edu/~awettig/;https://lieret.net;https://ysymyth.github.io;http://www.karthiknarasimhan.com;https://ofir.io/about", "dblp": ";153/0588;302/0235;276/7672;156/1038;147/0322;185/0577", "google_scholar": "71G11ksAAAAJ;Ue4wghAAAAAJ;N_jSE08AAAAJ;byA9yI0AAAAJ;qJBXk9cAAAAJ;euc0GX4AAAAJ;LeHa8psAAAAJ", "orcid": ";0000-0001-9370-3909;;0000-0003-2792-7511;;;", "linkedin": "jyang20/;;alexander-wettig/;kilian-lieret-ph-d-0b0667104/;;;", "or_profile": "~John_Yang3;~Carlos_E_Jimenez1;~Alexander_Wettig1;~Kilian_Lieret1;~Shunyu_Yao1;~Karthik_R_Narasimhan1;~Ofir_Press1", "aff": "Stanford University;Princeton University;Allen Institute for Artificial Intelligence;Princeton University;Princeton University;Princeton University;Princeton University", "aff_domain": "stanford.edu;princeton.edu;allenai.org;princeton.edu;princeton.edu;princeton.edu;princeton.edu", "position": "PhD student;PhD student;Intern;Postdoc;PhD student;Assistant Professor;Postdoc", "bibtex": "@inproceedings{\nyang2024sweagent,\ntitle={{SWE}-agent: Agent-Computer Interfaces Enable Automated Software Engineering},\nauthor={John Yang and Carlos E Jimenez and Alexander Wettig and Kilian Lieret and Shunyu Yao and Karthik R Narasimhan and Ofir Press},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=mXpq6ut8J3}\n}", "github": "", "reviewers": "3ebt;N4zh;KbN4;BuFF", "pdf_size": 4411168, "rating": "6;6;7;8", "confidence": "5;3;4;4", "soundness": "2;3;4;4", "novelty": "2;3;4;3", "presentation": "3;3;3;4", "wc_summary": "17;97;125;73", "wc_strengths": "57;109;81;81", "wc_weaknesses": "138;98;31;54", "wc_questions": "50;86;29;66", "wc_limitations": "8;2;4;1", "wc_review": "270;392;270;275", "wc_reply_reviewers": "31;0;18;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 78.0, 39.736632972611055 ], "wc_strengths_avg": [ 82.0, 18.411952639521967 ], "wc_weaknesses_avg": [ 80.25, 41.12405014100629 ], "wc_questions_avg": [ 57.75, 20.932928605429293 ], "wc_limitations_avg": [ 3.75, 2.680951323690902 ], "wc_review_avg": [ 301.75, 52.14582917166051 ], "wc_reply_reviewers_avg": [ 12.25, 13.083864108129525 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 217, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14754089104562008325&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "stanford.edu;princeton.edu;allenai.org;princeton.edu;princeton.edu;princeton.edu;princeton.edu", "author_num": 7, "aff_unique_index": "0;1;2;1;1;1;1", "aff_unique_norm": "Stanford University;Princeton University;Allen Institute for Artificial Intelligence", "aff_unique_dep": ";;", "aff_unique_url": "https://www.stanford.edu;https://www.princeton.edu;https://allenai.org", "aff_unique_abbr": "Stanford;Princeton;AI2", "aff_campus_unique_index": "0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "DDGS-CT: Direction-Disentangled Gaussian Splatting for Realistic Volume Rendering", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93752", "id": "mY0ZnS2s9u", "proceeding": "", "pdf": "https://openreview.net/pdf?id=mY0ZnS2s9u", "openreview": "https://openreview.net/forum?id=mY0ZnS2s9u", "poster": "/media/PosterPDFs/NeurIPS%202024/93752.png?t=1731530396.3709326", "project": "", "author_site": "Zhongpai Gao, Benjamin Planche, Meng Zheng, Xiao Chen, Terrence Chen, Ziyan Wu", "tldr": "", "abstract": "Digitally reconstructed radiographs (DRRs) are simulated 2D X-ray images generated from 3D CT volumes, widely used in preoperative settings but limited in intraoperative applications due to computational bottlenecks. Physics-based Monte Carlo simulations provide accurate representations but are extremely computationally intensity. Analytical DRR renderers are much more efficient, but at the price of ignoring anisotropic X-ray image formation phenomena such as Compton scattering. We propose a novel approach that balances realistic physics-inspired X-ray simulation with efficient, differentiable DRR generation using 3D Gaussian splatting (3DGS). Our direction-disentangled 3DGS (DDGS) method decomposes the radiosity contribution into isotropic and direction-dependent components, able to approximate complex anisotropic interactions without complex runtime simulations. Additionally, we adapt the 3DGS initialization to account for tomography data properties, enhancing accuracy and efficiency. Our method outperforms state-of-the-art techniques in image accuracy and inference speed, demonstrating its potential for intraoperative applications and inverse problems like pose registration.", "keywords": "3D Gaussian splatting;image registration;pose estimation", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "", "author": "Zhongpai Gao;Benjamin Planche;Meng Zheng;Xiao Chen;Terrence Chen;Ziyan Wu", "authorids": "~Zhongpai_Gao1;~Benjamin_Planche1;~Meng_Zheng1;~Xiao_Chen12;~Terrence_Chen4;~Ziyan_Wu2", "gender": "M;M;F;;M;M", "homepage": "https://sites.google.com/site/gaozhongpai/home;https://planche.me;https://mzhengrpi.github.io/;;;http://wuziyan.com", "dblp": "149/4942;153/6611;31/7910-2;;51/4242.html;", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.de/citations?user=cP3ahiAAAAAJ;1D5PfMgAAAAJ;DV61tzoAAAAJ;S2BT6ogAAAAJ;CkPUb-4AAAAJ", "orcid": "0000-0003-4344-4501;0000-0002-6110-6437;0000-0002-6677-2017;;;0000-0002-9774-7770", "linkedin": ";benjaminplanche;;;terrencechen/;", "or_profile": "~Zhongpai_Gao1;~Benjamin_Planche1;~Meng_Zheng1;~Xiao_Chen12;~Terrence_Chen4;~Ziyan_Wu2", "aff": "United Imaging Intelligence;United Imaging Intelligence;Rensselaer Polytechnic Institute;United Imaging Intelligence, America;United Imaging Intelligence;United Imaging Intelligence", "aff_domain": "uii-ai.com;uii-ai.com;rpi.edu;uii-ai.com;united-imaging.com;uii-ai.com", "position": "Researcher;Researcher;Researcher;Principal Researcher;CEO;Principal Expert Scientist", "bibtex": "@inproceedings{\ngao2024ddgsct,\ntitle={{DDGS}-{CT}: Direction-Disentangled Gaussian Splatting for Realistic Volume Rendering},\nauthor={Zhongpai Gao and Benjamin Planche and Meng Zheng and Xiao Chen and Terrence Chen and Ziyan Wu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=mY0ZnS2s9u}\n}", "github": "", "reviewers": "ZGY5;g5Vp;Zg8U;YFhr", "pdf_size": 2435174, "rating": "3;4;5;6", "confidence": "4;4;4;3", "soundness": "2;2;3;3", "novelty": "2;2;3;3", "presentation": "2;2;2;3", "wc_summary": "74;68;80;70", "wc_strengths": "74;27;84;126", "wc_weaknesses": "134;220;111;111", "wc_questions": "57;111;79;8", "wc_limitations": "32;38;7;8", "wc_review": "371;464;361;323", "wc_reply_reviewers": "17;18;17;40", "wc_reply_authors": "66;326;71;89", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 4.5, 1.118033988749895 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 73.0, 4.58257569495584 ], "wc_strengths_avg": [ 77.75, 35.202095108104004 ], "wc_weaknesses_avg": [ 144.0, 44.87204029236914 ], "wc_questions_avg": [ 63.75, 37.47916087641237 ], "wc_limitations_avg": [ 21.25, 13.91716565971678 ], "wc_review_avg": [ 379.75, 51.83326634507997 ], "wc_reply_reviewers_avg": [ 23.0, 9.82344135219425 ], "wc_reply_authors_avg": [ 138.0, 108.8783725080422 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.7745966692414834, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11219478101842047916&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 5, "email": "uii-ai.com;uii-ai.com;rpi.edu;uii-ai.com;united-imaging.com;uii-ai.com", "author_num": 6, "aff_unique_index": "0;0;1;0;0;0", "aff_unique_norm": "United Imaging Intelligence;Rensselaer Polytechnic Institute", "aff_unique_dep": ";", "aff_unique_url": "https://www.united-imaging.com;https://www.rpi.edu", "aff_unique_abbr": ";RPI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1;0;0", "aff_country_unique": "China;United States" }, { "title": "Towards Robust Multimodal Sentiment Analysis with Incomplete Data", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93751", "id": "mYEjc7qGRA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=mYEjc7qGRA", "openreview": "https://openreview.net/forum?id=mYEjc7qGRA", "poster": "/media/PosterPDFs/NeurIPS%202024/93751.png?t=1730374011.0061004", "project": "", "author_site": "Haoyu Zhang, Wenbin Wang, Tianshu Yu", "tldr": "", "abstract": "The field of Multimodal Sentiment Analysis (MSA) has recently witnessed an emerging direction seeking to tackle the issue of data incompleteness. Recognizing that the language modality typically contains dense sentiment information, we consider it as the dominant modality and present an innovative Language-dominated Noise-resistant Learning Network (LNLN) to achieve robust MSA. The proposed LNLN features a dominant modality correction (DMC) module and dominant modality based multimodal learning (DMML) module, which enhances the model's robustness across various noise scenarios by ensuring the quality of dominant modality representations. Aside from the methodical design, we perform comprehensive experiments under random data missing scenarios, utilizing diverse and meaningful settings on several popular datasets (e.g., MOSI, MOSEI, and SIMS), providing additional uniformity, transparency, and fairness compared to existing evaluations in the literature. Empirically, LNLN consistently outperforms existing baselines, demonstrating superior performance across these challenging and extensive evaluation metrics.", "keywords": "Multimodal Sentiment Analysis;Multimodal Learning;Robust MSA", "primary_area": "human-AI_interaction", "supplementary_material": "", "author": "Haoyu Zhang;Wenbin Wang;Tianshu Yu", "authorids": "~Haoyu_Zhang5;~Wenbin_Wang2;~Tianshu_Yu2", "gender": ";;M", "homepage": ";https://dreammr.github.io/;https://mypage.cuhk.edu.cn/academics/yutianshu/", "dblp": ";;152/6675", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;MTHO7DsAAAAJ", "orcid": ";0000-0003-2910-1959;0000-0002-6537-1924", "linkedin": ";;", "or_profile": "~Haoyu_Zhang5;~Wenbin_Wang2;~Tianshu_Yu2", "aff": ";Wuhan University;Chinese University of Hong Kong (Shenzhen)", "aff_domain": ";whu.edu.cn;cuhk.edu.cn", "position": ";PhD student;Assistant Professor", "bibtex": "@inproceedings{\nzhang2024towards,\ntitle={Towards Robust Multimodal Sentiment Analysis with Incomplete Data},\nauthor={Haoyu Zhang and Wenbin Wang and Tianshu Yu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=mYEjc7qGRA}\n}", "github": "", "reviewers": "ugFZ;LeNg;xa9F", "pdf_size": 3848364, "rating": "4;7;7", "confidence": "4;3;4", "soundness": "2;3;3", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "90;71;89", "wc_strengths": "78;106;53", "wc_weaknesses": "71;17;204", "wc_questions": "45;12;204", "wc_limitations": "4;20;75", "wc_review": "288;226;625", "wc_reply_reviewers": "0;0;14", "wc_reply_authors": "0;0;33", "reply_reviewers": "0;0;1", "reply_authors": "1;1;2", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 83.33333333333333, 8.73053390247253 ], "wc_strengths_avg": [ 79.0, 21.64871050817269 ], "wc_weaknesses_avg": [ 97.33333333333333, 78.58046124125825 ], "wc_questions_avg": [ 87.0, 83.8212383587835 ], "wc_limitations_avg": [ 33.0, 30.40833219146796 ], "wc_review_avg": [ 379.6666666666667, 175.31368711224144 ], "wc_reply_reviewers_avg": [ 4.666666666666667, 6.599663291074443 ], "wc_reply_authors_avg": [ 11.0, 15.556349186104045 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=781279584644955381&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": ";whu.edu.cn;cuhk.edu.cn", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Wuhan University;Chinese University of Hong Kong", "aff_unique_dep": ";", "aff_unique_url": "http://www.whu.edu.cn/;https://www.cuhk.edu.cn", "aff_unique_abbr": "WHU;CUHK", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Stochastic Optimal Control and Estimation with Multiplicative and Internal Noise", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93750", "id": "mZHbkbYWTp", "proceeding": "", "pdf": "https://openreview.net/pdf?id=mZHbkbYWTp", "openreview": "https://openreview.net/forum?id=mZHbkbYWTp", "poster": "/media/PosterPDFs/NeurIPS%202024/93750.png?t=1733421029.128642", "project": "", "author_site": "Francesco Damiani, Akiyuki Anzai, Jan Drugowitsch, Gregory DeAngelis, Ruben Moreno Bote", "tldr": "", "abstract": "A pivotal brain computation relies on the ability to sustain perception-action loops. Stochastic optimal control theory offers a mathematical framework to explain these processes at the algorithmic level through optimality principles. However, incorporating a realistic noise model of the sensorimotor system \u2014 accounting for multiplicative noise in feedback and motor output, as well as internal noise in estimation \u2014 makes the problem challenging. Currently, the algorithm that is commonly used is the one proposed in the seminal study in (Todorov, 2005). After discovering some pitfalls in the original derivation, i.e., unbiased estimation does not hold, we improve the algorithm by proposing an efficient gradient descent-based optimization that minimizes the cost-to-go while only imposing linearity of the control law. The optimal solution is obtained by iteratively propagating in closed form the sufficient statistics to compute the expected cost and then minimizing this cost with respect to the filter and control gains. We demonstrate that this approach results in a significantly lower overall cost than current state-of-the-art solutions, particularly in the presence of internal noise, though the improvement is present in other circumstances as well, with theoretical explanations for this enhanced performance. Providing the optimal control law is key for inverse control inference, especially in explaining behavioral data under rationality assumptions.", "keywords": "control theory;stochastic optimal control;sensorimotor system;multiplicative and internal noise;motor control", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "/attachment/574f9e5b0f869d73c1d6ceb26a93af71ae1d7863.zip", "author": "Francesco Damiani;Akiyuki Anzai;Jan Drugowitsch;Gregory C DeAngelis;Rub\u00e9n Moreno-Bote", "authorids": "~Francesco_Damiani1;~Akiyuki_Anzai1;~Jan_Drugowitsch1;~Gregory_C_DeAngelis1;~Rub\u00e9n_Moreno-Bote1", "gender": "M;;M;M;M", "homepage": ";;https://drugowitschlab.hms.harvard.edu/;;https://www.upf.edu/web/tcn", "dblp": "88/4388;;34/2980;;67/908", "google_scholar": ";;https://scholar.google.fr/citations?user=fCUx98wAAAAJ;;https://scholar.google.es/citations?user=dk4k91QAAAAJ", "orcid": "0009-0009-4486-0647;0000-0001-8564-1701;0000-0002-7846-0408;0000-0002-1635-1273;", "linkedin": ";;;;", "or_profile": "~Francesco_Damiani1;~Akiyuki_Anzai1;~Jan_Drugowitsch1;~Gregory_C_DeAngelis1;~Rub\u00e9n_Moreno-Bote1", "aff": "Universitat Pompeu Fabra;University of Rochester;Harvard University;University of Rochester;Universitat Pompeu Fabra", "aff_domain": "upf.edu;rochester.edu;harvard.edu;ur.rochester.edu;upf.edu", "position": "PhD student;Researcher;Associate Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\ndamiani2024stochastic,\ntitle={Stochastic Optimal Control and Estimation with Multiplicative and Internal Noise},\nauthor={Francesco Damiani and Akiyuki Anzai and Jan Drugowitsch and Gregory C DeAngelis and Rub{\\'e}n Moreno-Bote},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=mZHbkbYWTp}\n}", "github": "", "reviewers": "EneD;5RsR;C3X4;HNsh", "pdf_size": 7057402, "rating": "6;6;7;7", "confidence": "4;4;3;4", "soundness": "3;2;4;3", "novelty": "2;3;3;4", "presentation": "2;3;4;4", "wc_summary": "59;57;116;149", "wc_strengths": "108;38;140;123", "wc_weaknesses": "840;44;267;61", "wc_questions": "174;235;184;27", "wc_limitations": "1;40;8;23", "wc_review": "1182;414;715;383", "wc_reply_reviewers": "423;66;91;0", "wc_reply_authors": "1299;126;134;0", "reply_reviewers": "3;1;1;0", "reply_authors": "3;2;2;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 95.25, 39.04084399702445 ], "wc_strengths_avg": [ 102.25, 38.78385617753861 ], "wc_weaknesses_avg": [ 303.0, 322.2227490417149 ], "wc_questions_avg": [ 155.0, 77.43707122560873 ], "wc_limitations_avg": [ 18.0, 14.983324063771697 ], "wc_review_avg": [ 673.5, 320.94586770980555 ], "wc_reply_reviewers_avg": [ 145.0, 163.91003630040476 ], "wc_reply_authors_avg": [ 389.75, 527.6392588691634 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:VH62iIv5O40J:scholar.google.com/&scioq=Stochastic+Optimal+Control+and+Estimation+with+Multiplicative+and+Internal+Noise&hl=en&as_sdt=0,44", "gs_version_total": 0, "email": "upf.edu;rochester.edu;harvard.edu;ur.rochester.edu;upf.edu", "author_num": 5, "aff_unique_index": "0;1;2;1;0", "aff_unique_norm": "Universitat Pompeu Fabra;University of Rochester;Harvard University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.upf.edu/;https://www.rochester.edu;https://www.harvard.edu", "aff_unique_abbr": "UPF;U of R;Harvard", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;0", "aff_country_unique": "Spain;United States" }, { "title": "JourneyBench: A Challenging One-Stop Vision-Language Understanding Benchmark of Generated Images", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97518", "id": "mZLlWaoeKq", "proceeding": "", "pdf": "https://openreview.net/pdf?id=mZLlWaoeKq", "openreview": "https://openreview.net/forum?id=mZLlWaoeKq", "poster": "/media/PosterPDFs/NeurIPS%202024/97518.png?t=1734049367.986098", "project": "", "author_site": "Zhecan Wang, Junzhang Liu, Chia-Wei Tang, Hani Alomari, Anushka Sivakumar, Rui Sun, Wenhao Li, Md. Atabuzzaman, Hammad Ayyubi, Haoxuan You, Alvi Md Ishmam, Kai-Wei Chang, Shih-Fu Chang, Christopher Thomas", "tldr": "", "abstract": "Existing vision-language understanding benchmarks largely consist of images of objects in their usual contexts.\nAs a consequence, recent multimodal large language models can perform well with only a shallow visual understanding by relying on background language biases. Thus, strong performance on these benchmarks does not necessarily correlate with strong visual understanding. In this paper, we release JourneyBench, a comprehensive human-annotated benchmark of generated images designed to assess the model's fine-grained multimodal reasoning abilities across five tasks: complementary multimodal chain of thought, multi-image VQA, imaginary image captioning, VQA with hallucination triggers, and fine-grained retrieval with sample-specific distractors.\nUnlike existing benchmarks, JourneyBench explicitly requires fine-grained multimodal reasoning in unusual imaginary scenarios where language bias and holistic image gist are insufficient. We benchmark state-of-the-art models on JourneyBench and analyze performance along a number of fine-grained dimensions. Results across all five tasks show that JourneyBench is exceptionally challenging for even the best models, indicating that models' visual reasoning abilities are not as strong as they first appear. We discuss the implications of our findings and propose avenues for further research.", "keywords": "benchmark;visual language understanding;visual question answering;retrieval;chain of thought;multi-image", "primary_area": "", "supplementary_material": "/attachment/be1c3ec278915e2269c17412fdf11410a9085ba5.zip", "author": "Zhecan Wang;Junzhang Liu;Chia-Wei Tang;Hani Alomari;Anushka Sivakumar;Rui Sun;Wenhao Li;Md. Atabuzzaman;Hammad Ayyubi;Haoxuan You;Alvi Md Ishmam;Kai-Wei Chang;Shih-Fu Chang;Chris Thomas", "authorids": "~Zhecan_Wang2;~Junzhang_Liu1;~Chia-Wei_Tang1;~Hani_Alomari1;~Anushka_Sivakumar1;~Rui_Sun10;~Wenhao_Li5;~Md._Atabuzzaman1;~Hammad_Ayyubi1;~Haoxuan_You1;~Alvi_Md_Ishmam1;~Kai-Wei_Chang1;~Shih-Fu_Chang3;~Chris_Thomas1", "gender": "M;M;M;;M;M;M;;M;M;M;M;M;M", "homepage": "https://www.zhecanwang.com/;;https://hanialomari.github.io/;;https://threesr.github.io/;https://www.linkedin.com/in/benningtonli/;;https://hammad001.github.io/;https://hxyou.github.io/;;http://kwchang.net;http://www.ee.columbia.edu/~sfchang/;https://github.com/Tverous;https://people.cs.vt.edu/chris/", "dblp": "167/4251;;242/4855;388/0844.html;;;291/4477;251/5554;210/2628;218/7656;18/2428;c/ShihFuChang;;21/4235-4", "google_scholar": "uqHPnmgAAAAJ;;Ft_qTcwAAAAJ;https://scholar.google.com/citations?hl=en;;;eroLR7EAAAAJ;;BhysChMAAAAJ;stXv3yAAAAAJ;fqDBtzYAAAAJ;OMVTRscAAAAJ;;https://scholar.google.com/citations?hl=en", "orcid": "0009-0003-7785-4637;;0000-0001-8663-6162;;0000-0001-9927-8392;;0000-0002-0635-7073;;;;0000-0001-5365-0072;;;", "linkedin": "jameszhecanwang/;junzhangliu/;hani-al-omari-50727289/;anushka-sivakumar/;rui-sun-three/;;md-atabuzzaman-9049b2181;hammad-ayyubi-56380688/;;;kai-wei-chang-41239040;;;", "or_profile": "~Zhecan_Wang2;~Junzhang_Liu1;~Hani_Alomari1;~Anushka_Sivakumar1;~Rui_Sun10;~Wenhao_Li5;~Md._Atabuzzaman1;~Hammad_Ayyubi1;~Haoxuan_You1;~Alvi_Md_Ishmam1;~Kai-Wei_Chang1;~Shih-Fu_Chang3;~ChiaWei_Tang1;~Christopher_Thomas1", "aff": "Columbia University;Columbia University;Virginia Polytechnic Institute and State University;Virginia Polytechnic Institute and State University;University of California, Los Angeles;;Virginia Polytechnic Institute and State University;Adobe Systems;Columbia University;Virginia Polytechnic Institute and State University;Amazon;Columbia University;Virginia Polytechnic Institute and State University;Virginia Polytechnic Institute and State University", "aff_domain": "columbia.edu;columbia.edu;vt.edu;vt.edu;ucla.edu;;vt.edu;adobe.com;columbia.edu;vt.edu;amazon.com;ee.columbia.edu;vt.edu;vt.edu", "position": "PhD student;Researcher;PhD student;MS student;Research Assistant;;PhD student;Intern;PhD student;PhD student;Researcher;Full Professor;MS student;Assistant Professor", "bibtex": "@inproceedings{\nwang2024journeybench,\ntitle={JourneyBench: A Challenging One-Stop Vision-Language Understanding Benchmark of Generated Images},\nauthor={Zhecan Wang and Junzhang Liu and Chia-Wei Tang and Hani Alomari and Anushka Sivakumar and Rui Sun and Wenhao Li and Md. Atabuzzaman and Hammad Ayyubi and Haoxuan You and Alvi Md Ishmam and Kai-Wei Chang and Shih-Fu Chang and Chris Thomas},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=mZLlWaoeKq}\n}", "github": "", "reviewers": "jgCg;njtE;uNZR;RFCq", "pdf_size": 5018610, "rating": "6;6;6;7", "confidence": "5;2;5;3", "wc_summary_and_contributions": "58;49;166;86", "wc_strengths": "53;67;109;92", "wc_improvement": "73;2;568;42", "wc_limitations": "9;1;73;1", "wc_correctness": "11;10;44;1", "wc_clarity": "6;5;1;1", "wc_relation_to_prior_work": "8;6;3;1", "wc_documentation": "7;1;60;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "226;142;1025;226", "wc_reply_reviewers": "10;10;370;0", "wc_reply_authors": "29;29;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 1.299038105676658 ], "wc_summary_and_contributions_avg": [ 89.75, 46.088908644054484 ], "wc_strengths_avg": [ 80.25, 21.695333599647643 ], "wc_improvement_avg": [ 171.25, 230.44237349064082 ], "wc_limitations_avg": [ 21.0, 30.199337741083 ], "wc_correctness_avg": [ 16.5, 16.347782724271816 ], "wc_clarity_avg": [ 3.25, 2.277608394786075 ], "wc_relation_to_prior_work_avg": [ 4.5, 2.692582403567252 ], "wc_documentation_avg": [ 17.25, 24.80297361204902 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 404.75, 359.7397496802376 ], "wc_reply_reviewers_avg": [ 97.5, 157.38090735537142 ], "wc_reply_authors_avg": [ 14.5, 14.5 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 14, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4665239222256792846&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "columbia.edu;columbia.edu;vt.edu;vt.edu;ucla.edu;;vt.edu;adobe.com;columbia.edu;vt.edu;amazon.com;ee.columbia.edu;vt.edu;vt.edu", "author_num": 14, "aff_unique_index": "0;0;1;1;2;1;3;0;1;4;0;1;1", "aff_unique_norm": "Columbia University;Virginia Tech;University of California, Los Angeles;Adobe;Amazon", "aff_unique_dep": ";;;Adobe Systems Incorporated;Amazon.com, Inc.", "aff_unique_url": "https://www.columbia.edu;https://www.vt.edu;https://www.ucla.edu;https://www.adobe.com;https://www.amazon.com", "aff_unique_abbr": "Columbia;VT;UCLA;Adobe;Amazon", "aff_campus_unique_index": "1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "ECMamba: Consolidating Selective State Space Model with Retinex Guidance for Efficient Multiple Exposure Correction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93749", "id": "mZsvm58FPG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=mZsvm58FPG", "openreview": "https://openreview.net/forum?id=mZsvm58FPG", "poster": "/media/PosterPDFs/NeurIPS%202024/93749.png?t=1731457164.6534612", "project": "", "author_site": "Wei Dong, Han Zhou, Yulun Zhang, Xiaohong Liu, Jun Chen", "tldr": "", "abstract": "Exposure Correction (EC) aims to recover proper exposure conditions for images captured under over-exposure or under-exposure scenarios. While existing deep learning models have shown promising results, few have fully embedded Retinex theory into their architecture, highlighting a gap in current methodologies. Additionally, the balance between high performance and efficiency remains an under-explored problem for exposure correction task. Inspired by Mamba which demonstrates powerful and highly efficient sequence modeling, we introduce a novel framework based on \\textbf{Mamba} for \\textbf{E}xposure \\textbf{C}orrection (\\textbf{ECMamba}) with dual pathways, each dedicated to the restoration of reflectance and illumination map, respectively. Specifically, we firstly derive the Retinex theory and we train a Retinex estimator capable of mapping inputs into two intermediary spaces, each approximating the target reflectance and illumination map, respectively. This setup facilitates the refined restoration process of the subsequent \\textbf{E}xposure \\textbf{C}orrection \\textbf{M}amba \\textbf{M}odule (\\textbf{ECMM}). Moreover, we develop a novel \\textbf{2D S}elective \\textbf{S}tate-space layer guided by \\textbf{Retinex} information (\\textbf{Retinex-SS2D}) as the core operator of \\textbf{ECMM}. This architecture incorporates an innovative 2D scanning strategy based on deformable feature aggregation, thereby enhancing both efficiency and effectiveness. Extensive experiment results and comprehensive ablation studies demonstrate the outstanding performance and the importance of each component of our proposed ECMamba. Code is available at \\url{https://github.com/LowlevelAI/ECMamba}.", "keywords": "Retinex Theory;Mamba;Exposure Correction", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Wei Dong;Han Zhou;Yulun Zhang;Xiaohong Liu;Jun Chen", "authorids": "~Wei_Dong9;~Han_Zhou6;~Yulun_Zhang1;~Xiaohong_Liu2;~Jun_Chen8", "gender": ";;M;M;M", "homepage": ";;http://yulunzhang.com/;https://jhc.sjtu.edu.cn/~xiaohongliu/;https://www.ece.mcmaster.ca/~junchen/", "dblp": ";;166/2763-1.html;95/2454-1;85/5901-5.html", "google_scholar": ";;ORmLjWoAAAAJ;https://scholar.google.ca/citations?hl=en;https://scholar.google.ca/citations?user=XI79Mw0AAAAJ", "orcid": ";;0000-0002-2288-5079;;", "linkedin": ";;yulun-zhang-1116b5b9/;xiaohong-liu/;", "or_profile": "~Wei_Dong9;~Han_Zhou6;~Yulun_Zhang1;~Xiaohong_Liu2;~Jun_Chen8", "aff": ";;Swiss Federal Institute of Technology;Shanghai Jiaotong University;McMaster University", "aff_domain": ";;ethz.ch;sjtu.edu.cn;mcmaster.ca", "position": ";;Postdoc;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\ndong2024ecmamba,\ntitle={{ECM}amba: Consolidating Selective State Space Model with Retinex Guidance for Efficient Multiple Exposure Correction},\nauthor={Wei Dong and Han Zhou and Yulun Zhang and Xiaohong Liu and Jun Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=mZsvm58FPG}\n}", "github": "", "reviewers": "EgGV;5Zdz;zooA;fpJd", "pdf_size": 0, "rating": "5;5;7;7", "confidence": "4;5;5;5", "soundness": "2;3;3;4", "novelty": "2;3;3;4", "presentation": "4;3;3;3", "wc_summary": "62;97;50;127", "wc_strengths": "84;69;27;153", "wc_weaknesses": "141;134;15;193", "wc_questions": "58;5;122;68", "wc_limitations": "28;16;23;20", "wc_review": "373;321;237;561", "wc_reply_reviewers": "28;45;0;18", "wc_reply_authors": "24;38;0;31", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;1;2", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 84.0, 30.240701050074883 ], "wc_strengths_avg": [ 83.25, 45.36725140450984 ], "wc_weaknesses_avg": [ 120.75, 65.17044959182037 ], "wc_questions_avg": [ 63.25, 41.51731566467177 ], "wc_limitations_avg": [ 21.75, 4.380353866983808 ], "wc_review_avg": [ 373.0, 118.89491158161479 ], "wc_reply_reviewers_avg": [ 22.75, 16.29992331270304 ], "wc_reply_authors_avg": [ 23.25, 14.306903927824496 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5731052953196511334&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": ";;ethz.ch;sjtu.edu.cn;mcmaster.ca", "author_num": 5, "aff_unique_index": "0;1;2", "aff_unique_norm": "Swiss Federal Institute of Technology;Shanghai Jiao Tong University;McMaster University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ethz.ch;https://www.sjtu.edu.cn;https://www.mcmaster.ca", "aff_unique_abbr": "ETH Zurich;SJTU;McMaster", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2", "aff_country_unique": "Switzerland;China;Canada" }, { "title": "Polynomial-Time Computation of Exact $\\Phi$-Equilibria in Polyhedral Games", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93748", "id": "mZwilh3hd2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=mZwilh3hd2", "openreview": "https://openreview.net/forum?id=mZwilh3hd2", "poster": "/media/PosterPDFs/NeurIPS%202024/93748.png?t=1733865798.547033", "project": "", "author_site": "Gabriele Farina, Charilaos Pipis", "tldr": "", "abstract": "It is a well-known fact that correlated equilibria can be computed in polynomial time in a large class of concisely represented games using the celebrated Ellipsoid Against Hope algorithm \\citep{Papadimitriou2008:Computing, Jiang2015:Polynomial}. However, the landscape of efficiently computable equilibria in sequential (extensive-form) games remains unknown. The Ellipsoid Against Hope does not apply directly to these games, because they do not have the required ``polynomial type'' property. Despite this barrier, \\citet{Huang2008:Computing} altered the algorithm to compute exact extensive-form correlated equilibria.\n\nIn this paper, we generalize the Ellipsoid Against Hope and develop a simple algorithmic framework for efficiently computing saddle-points in bilinear zero-sum games, even when one of the dimensions is exponentially large. Moreover, the framework only requires a ``good-enough-response'' oracle, which is a weakened notion of a best-response oracle.\n\nUsing this machinery, we develop a general algorithmic framework for computing exact linear $\\Phi$-equilibria in any polyhedral game (under mild assumptions), including correlated equilibria in normal-form games, and extensive-form correlated equilibria in extensive-form games. This enables us to give the first polynomial-time algorithm for computing exact linear-deviation correlated equilibria in extensive-form games, thus resolving an open question by \\citet{Farina2023:Polynomial}. Furthermore, even for the cases for which a polynomial time algorithm for exact equilibria was already known, our framework provides a conceptually simpler solution.", "keywords": "equilibrium computation;exact equilibria;extensive-form games;polyhedral games;ellipsoid;phi-equilibria", "primary_area": "algorithmic_game_theory", "supplementary_material": "", "author": "Gabriele Farina;Charilaos Pipis", "authorids": "~Gabriele_Farina1;~Charilaos_Pipis1", "gender": "M;", "homepage": "http://www.cs.cmu.edu/~gfarina/about/;", "dblp": ";", "google_scholar": "sktDNcEAAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Gabriele_Farina1;~Charilaos_Pipis1", "aff": "Massachusetts Institute of Technology;", "aff_domain": "mit.edu;", "position": "Assistant Professor;", "bibtex": "@inproceedings{\nfarina2024polynomialtime,\ntitle={Polynomial-Time Computation of Exact \\${\\textbackslash}Phi\\$-Equilibria in Polyhedral Games},\nauthor={Gabriele Farina and Charilaos Pipis},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=mZwilh3hd2}\n}", "github": "", "reviewers": "Ht5B;un2H;r2xR;2L7d", "pdf_size": 470261, "rating": "6;7;7;7", "confidence": "5;3;3;4", "soundness": "3;3;3;4", "novelty": "3;3;3;2", "presentation": "3;4;3;4", "wc_summary": "78;31;234;332", "wc_strengths": "132;13;101;287", "wc_weaknesses": "133;3;123;2", "wc_questions": "178;124;34;10", "wc_limitations": "1;1;1;3", "wc_review": "522;172;493;634", "wc_reply_reviewers": "0;3;20;20", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 168.75, 120.53915339009147 ], "wc_strengths_avg": [ 133.25, 98.92010665178238 ], "wc_weaknesses_avg": [ 65.25, 62.85051710208914 ], "wc_questions_avg": [ 86.5, 67.79933627993714 ], "wc_limitations_avg": [ 1.5, 0.8660254037844386 ], "wc_review_avg": [ 455.25, 171.80130238155937 ], "wc_reply_reviewers_avg": [ 10.75, 9.310612224768036 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17696601314909952349&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "mit.edu;", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Once Read is Enough: Domain-specific Pretraining-free Language Models with Cluster-guided Sparse Experts for Long-tail Domain Knowledge", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93747", "id": "manHbkpIW6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=manHbkpIW6", "openreview": "https://openreview.net/forum?id=manHbkpIW6", "poster": "/media/PosterPDFs/NeurIPS%202024/93747.png?t=1729964547.5012977", "project": "", "author_site": "Fang Dong, Mengyi Chen, Jixian Zhou, Yubin Shi, Yixuan Chen, Mingzhi Dong, Yujiang Wang, Dongsheng Li, Xiaochen Yang, Rui Zhu, Robert Dick, Qin Lv, Fan Yang, Tun Lu, Ning Gu, Li Shang", "tldr": "", "abstract": "Language models (LMs) only pretrained on a general and massive corpus usually cannot attain satisfying performance on domain-specific downstream tasks, and hence, applying domain-specific pretraining to LMs is a common and indispensable practice.\nHowever, domain-specific pretraining can be costly and time-consuming, hindering LMs' deployment in real-world applications.\nIn this work, we consider the incapability to memorize domain-specific knowledge embedded in the general corpus with rare occurrences and long-tail distributions as the leading cause for pretrained LMs' inferior downstream performance. \nAnalysis of Neural Tangent Kernels (NTKs) reveals that those long-tail data are commonly overlooked in the model's gradient updates and, consequently, are not effectively memorized, leading to poor domain-specific downstream performance.\nBased on the intuition that data with similar semantic meaning are closer in the embedding space, we devise a Cluster-guided Sparse Expert (CSE) layer to actively learn long-tail domain knowledge typically neglected in previous pretrained LMs.\nDuring pretraining, a CSE layer efficiently clusters domain knowledge together and assigns long-tail knowledge to designate extra experts. CSE is also a lightweight structure that only needs to be incorporated in several deep layers.\nWith our training strategy, we found that during pretraining, data of long-tail knowledge gradually formulate isolated, outlier clusters in an LM's representation spaces, especially in deeper layers. Our experimental results show that only pretraining CSE-based LMs is enough to achieve superior performance than regularly pretrained-finetuned LMs on various downstream tasks, implying the prospects of domain-specific-pretraining-free language models.", "keywords": "language model;long-tail;clustering", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Fang Dong;Mengyi Chen;Jixian Zhou;Yubin Shi;Yixuan Chen;Mingzhi Dong;Yujiang Wang;Dongsheng Li;Xiaochen Yang;Rui Zhu;Robert P. Dick;Qin Lv;Fan Yang;Tun Lu;Ning Gu;Li Shang", "authorids": "~Fang_Dong2;~Mengyi_Chen2;~Jixian_Zhou1;~Yubin_Shi1;~Yixuan_Chen1;~Mingzhi_Dong1;~Yujiang_Wang1;~Dongsheng_Li2;~Xiaochen_Yang2;~Rui_Zhu8;~Robert_P._Dick1;~Qin_Lv1;~Fan_Yang31;~Tun_Lu1;~Ning_Gu2;~Li_Shang3", "gender": "M;F;M;M;F;M;M;M;;M;M;F;M;M;M;", "homepage": "https://github.com/Frandium;https://github.com/blesindis;https://github.com/chrisburn123;;;;;http://recmind.cn;;https://ruizhugeographer.com/;http://robertdick.org/;https://home.cs.colorado.edu/~lv/;https://ephonic.github.io;;https://cscw.fudan.edu.cn/;https://cscw.fudan.edu.cn/lishang/list.htm", "dblp": ";;;221/2003;30/7103-3;118/4806;125/0429-1;254/0830-2.html;;;84/523.html;11/808;;41/2472;;", "google_scholar": ";Z99WZjcAAAAJ;;IyLkK_kAAAAJ;cmdWHrIAAAAJ;;https://scholar.google.co.uk/citations?user=3xxDPJUAAAAJ;VNg5rA8AAAAJ;;7ZN8prIAAAAJ;;dTkWR0MAAAAJ;;;https://scholar.google.com.au/citations?user=AUnPpaUAAAAJ;AnBUn0QAAAAJ", "orcid": ";;;;;;;0000-0003-3103-8442;;;;0000-0002-9437-1376;;0000-0002-6633-4826;0000-0002-2915-974X;", "linkedin": ";;;;;;;;;;;;;;;", "or_profile": "~Fang_Dong2;~Mengyi_Chen2;~Jixian_Zhou1;~Yubin_Shi1;~Yixuan_Chen1;~Mingzhi_Dong1;~Yujiang_Wang1;~Dongsheng_Li2;~Xiaochen_Yang2;~Rui_Zhu8;~Robert_P._Dick1;~Qin_Lv1;~Fan_Yang31;~Tun_Lu1;~Ning_Gu2;~Li_Shang3", "aff": "Fudan University;Fudan University;Fudan University;Fudan University;Fudan University;University of Bath;University of Oxford;Microsoft Research Asia;;University of Bristol;University of Michigan;University of Colorado at Boulder;Fudan University;Fudan University;Fudan University;Fudan University", "aff_domain": "fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu;bath.ac.uk;ox.ac.uk;microsoft.com;;bris.ac.uk;umich.edu;colorado.edu;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn", "position": "PhD student;MS student;MS student;MS student;PhD student;Lecturer;Postdoc;Principal Researcher;;Lecturer;Full Professor;Full Professor;Professor;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\ndong2024once,\ntitle={Once Read is Enough: Domain-specific Pretraining-free Language Models with Cluster-guided Sparse Experts for Long-tail Domain Knowledge},\nauthor={Fang Dong and Mengyi Chen and Jixian Zhou and Yubin Shi and Yixuan Chen and Mingzhi Dong and Yujiang Wang and Dongsheng Li and Xiaochen Yang and Rui Zhu and Robert P. Dick and Qin Lv and Fan Yang and Tun Lu and Ning Gu and Li Shang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=manHbkpIW6}\n}", "github": "", "reviewers": "P2qX;1S4s;bZcS;4NSY;YnWw", "pdf_size": 1821339, "rating": "4;6;6;7;7", "confidence": "3;4;4;4;3", "soundness": "2;3;3;4;3", "novelty": "1;3;3;3;3", "presentation": "1;3;3;4;3", "wc_summary": "62;134;127;64;69", "wc_strengths": "44;51;66;126;49", "wc_weaknesses": "423;54;151;38;9", "wc_questions": "11;36;51;101;1", "wc_limitations": "3;1;1;11;1", "wc_review": "543;276;396;340;129", "wc_reply_reviewers": "179;110;0;0;0", "wc_reply_authors": "1124;302;0;0;0", "reply_reviewers": "1;1;0;0;0", "reply_authors": "3;2;1;1;1", "rating_avg": [ 6.0, 1.0954451150103321 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.6, 0.8000000000000002 ], "presentation_avg": [ 2.8, 0.9797958971132712 ], "wc_summary_avg": [ 91.2, 32.24530973645625 ], "wc_strengths_avg": [ 67.2, 30.30115509349437 ], "wc_weaknesses_avg": [ 135.0, 151.68783735026352 ], "wc_questions_avg": [ 40.0, 35.270384177096794 ], "wc_limitations_avg": [ 3.4, 3.8781438859330635 ], "wc_review_avg": [ 336.8, 136.3163966659917 ], "wc_reply_reviewers_avg": [ 57.8, 74.07671698988825 ], "wc_reply_authors_avg": [ 285.2, 435.4043637815313 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 16, 0 ], "corr_rating_confidence": 0.372677996249965, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:egjuLrqVvWsJ:scholar.google.com/&scioq=Once+Read+is+Enough:+Domain-specific+Pretraining-free+Language+Models+with+Cluster-guided+Sparse+Experts+for+Long-tail+Domain+Knowledge&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu;bath.ac.uk;ox.ac.uk;microsoft.com;;bris.ac.uk;umich.edu;colorado.edu;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn", "author_num": 16, "aff_unique_index": "0;0;0;0;0;1;2;3;4;5;6;0;0;0;0", "aff_unique_norm": "Fudan University;University of Bath;University of Oxford;Microsoft;University of Bristol;University of Michigan;University of Colorado", "aff_unique_dep": ";;;Research;;;", "aff_unique_url": "https://www.fudan.edu.cn;https://www.bath.ac.uk;https://www.ox.ac.uk;https://www.microsoft.com/en-us/research/group/asia;https://www.bristol.ac.uk;https://www.umich.edu;https://www.colorado.edu", "aff_unique_abbr": "Fudan;Bath;Oxford;MSR Asia;Bristol;UM;CU", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Asia;Boulder", "aff_country_unique_index": "0;0;0;0;0;1;1;0;1;2;2;0;0;0;0", "aff_country_unique": "China;United Kingdom;United States" }, { "title": "USCILab3D: A Large-scale, Long-term, Semantically Annotated Outdoor Dataset", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97517", "id": "mbAgKSfhjM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=mbAgKSfhjM", "openreview": "https://openreview.net/forum?id=mbAgKSfhjM", "poster": "", "project": "", "author_site": "Kiran Lekkala, Henghui Bao, Peixu Cai, Wei Lim, Chen Liu, Laurent Itti", "tldr": "", "abstract": "In this paper, we introduce the \\textbf{USCILab3D dataset}, a large-scale, annotated outdoor dataset designed for versatile applications across multiple domains, including computer vision, robotics, and machine learning. The dataset was acquired using a mobile robot equipped with 5 cameras and a 32-beam, $360^{\\circ}$ scanning LIDAR. The robot was teleoperated, over the course of a year and under a variety of weather and lighting conditions, through a rich variety of paths within the USC campus (229 acres = $\\sim 92.7$ hectares). The raw data was annotated using state-of-the-art large foundation models, and processed to provide multi-view imagery, 3D reconstructions, semantically-annotated images and point clouds (267 semantic categories), and text descriptions of images and objects within. The dataset also offers a diverse array of complex analyses using pose-stamping and trajectory data. In sum, the dataset offers 1.4M point clouds and 10M images ($\\sim 6$TB of data). Despite covering a narrower geographical scope compared to a whole-city dataset, our dataset prioritizes intricate intersections along with denser multi-view scene images and semantic point clouds, enabling more precise 3D labelling and facilitating a broader spectrum of 3D vision tasks. For data, code and more details, please visit our website.", "keywords": "benchmark;dataset;computer vision;vision-language;3D computer vision;robotics", "primary_area": "", "supplementary_material": "/attachment/4c03a50b975eedc253fa24058cae344c85b8f411.pdf", "author": "Kiran Lekkala;Henghui Bao;Peixu Cai;Wei Zer Lim;Chen Liu;Laurent Itti", "authorids": "~Kiran_Lekkala1;~Henghui_Bao1;~Peixu_Cai2;~Wei_Zer_Lim1;~Chen_Liu18;~Laurent_Itti1", "gender": "M;M;M;M;M;M", "homepage": "http://ilab.usc.edu/people/;https://github.com/HenghuiB;;;https://www.linkedin.com/in/chen-liu-55656219a/;http://ilab.usc.edu", "dblp": ";;;;;31/3256", "google_scholar": "uDKnFSgAAAAJ;88XEeKAAAAAJ;;;;xhUvqK8AAAAJ", "orcid": ";;;;;0000-0002-0168-2977", "linkedin": ";;;kevin-lim-b01021203/;;", "or_profile": "~Kiran_Lekkala1;~Henghui_Bao1;~Peixu_Cai2;~Wei_Zer_Lim1;~Chen_Liu18;~Laurent_Itti1", "aff": "University of Southern California;University of Southern California;University of Southern California;University of Southern California;;University of Southern California", "aff_domain": "usc.edu;usc.edu;usc.edu;usc.edu;;usc.edu", "position": "PhD student;MS student;MS student;MS student;;Professor", "bibtex": "@inproceedings{\nlekkala2024uscilabd,\ntitle={{USCIL}ab3D: A Large-scale, Long-term, Semantically Annotated Outdoor Dataset},\nauthor={Kiran Lekkala and Henghui Bao and Peixu Cai and Wei Zer Lim and Chen Liu and Laurent Itti},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=mbAgKSfhjM}\n}", "github": "", "reviewers": "8QQu;2uvC;12vN;RXE8", "pdf_size": 19874508, "rating": "5;6;6;8", "confidence": "5;3;3;4", "wc_summary_and_contributions": "86;84;90;98", "wc_strengths": "73;137;97;88", "wc_improvement": "53;198;393;107", "wc_limitations": "31;15;14;2", "wc_correctness": "14;5;22;1", "wc_clarity": "75;5;5;1", "wc_relation_to_prior_work": "27;9;159;1", "wc_documentation": "40;4;48;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "400;458;829;300", "wc_reply_reviewers": "79;0;131;0", "wc_reply_authors": "0;0;67;0", "reply_reviewers": "1;0;1;0", "reply_authors": "1;1;2;1", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "wc_summary_and_contributions_avg": [ 89.5, 5.361902647381804 ], "wc_strengths_avg": [ 98.75, 23.689396362085716 ], "wc_improvement_avg": [ 187.75, 129.33556162169785 ], "wc_limitations_avg": [ 15.5, 10.307764064044152 ], "wc_correctness_avg": [ 10.5, 8.139410298049853 ], "wc_clarity_avg": [ 21.5, 30.931375656443088 ], "wc_relation_to_prior_work_avg": [ 49.0, 64.20280367709809 ], "wc_documentation_avg": [ 23.25, 20.96872671384698 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 496.75, 199.97671739480074 ], "wc_reply_reviewers_avg": [ 52.5, 55.625983137379244 ], "wc_reply_authors_avg": [ 16.75, 29.011851026778693 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.20751433915982243, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:dwrnEAtlojYJ:scholar.google.com/&scioq=USCILab3D:+A+Large-scale,+Long-term,+Semantically+Annotated+Outdoor+Dataset&hl=en&as_sdt=0,48", "gs_version_total": 3, "email": "usc.edu;usc.edu;usc.edu;usc.edu;;usc.edu", "author_num": 6, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of Southern California", "aff_unique_dep": "", "aff_unique_url": "https://www.usc.edu", "aff_unique_abbr": "USC", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Learning Cooperative Trajectory Representations for Motion Forecasting", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93746", "id": "mcY221BgKi", "proceeding": "", "pdf": "https://openreview.net/pdf?id=mcY221BgKi", "openreview": "https://openreview.net/forum?id=mcY221BgKi", "poster": "/media/PosterPDFs/NeurIPS%202024/93746.png?t=1731498948.183345", "project": "", "author_site": "Hongzhi Ruan, Haibao Yu, Wenxian Yang, Siqi Fan, Zaiqing Nie", "tldr": "", "abstract": "Motion forecasting is an essential task for autonomous driving, and utilizing information from infrastructure and other vehicles can enhance forecasting capabilities.\nExisting research mainly focuses on leveraging single-frame cooperative information to enhance the limited perception capability of the ego vehicle, while underutilizing the motion and interaction context of traffic participants observed from cooperative devices. \nIn this paper, we propose a forecasting-oriented representation paradigm to utilize motion and interaction features from cooperative information. \nSpecifically, we present V2X-Graph, a representative framework to achieve interpretable and end-to-end trajectory feature fusion for cooperative motion forecasting. \nV2X-Graph is evaluated on V2X-Seq in vehicle-to-infrastructure (V2I) scenarios.\nTo further evaluate on vehicle-to-everything (V2X) scenario, we construct the first real-world V2X motion forecasting dataset V2X-Traj, which contains multiple autonomous vehicles and infrastructure in every scenario.\nExperimental results on both V2X-Seq and V2X-Traj show the advantage of our method. \nWe hope both V2X-Graph and V2X-Traj will benefit the further development of cooperative motion forecasting.\nFind the project at https://github.com/AIR-THU/V2X-Graph.", "keywords": "Cooperative Autonomous Driving;Motion Forecasting", "primary_area": "robotics", "supplementary_material": "/attachment/36df1818fba82926f5a9bc27ec8ca3641e9a8de3.zip", "author": "Hongzhi Ruan;Haibao Yu;Wenxian Yang;Siqi Fan;Zaiqing Nie", "authorids": "~Hongzhi_Ruan1;~Haibao_Yu2;~Wenxian_Yang1;~Siqi_Fan2;~Zaiqing_Nie2", "gender": "M;M;;M;M", "homepage": "https://github.com/HzRynn;;https://www.researchgate.net/profile/Wenxian-Yang-2;https://leofansq.github.io/;https://air.tsinghua.edu.cn/en/info/1046/1192.htm", "dblp": "347/1558;246/4643;;149/1267;n/ZaiqingNie", "google_scholar": "https://scholar.google.com/citations?hl=en;JW4F5HoAAAAJ;Kiz73xwAAAAJ;Ahy5smMAAAAJ;", "orcid": "0009-0005-2299-8179;;;;0000-0002-1134-2343", "linkedin": ";;;;", "or_profile": "~Hongzhi_Ruan1;~Haibao_Yu2;~Wenxian_Yang1;~Siqi_Fan2;~Zaiqing_Nie2", "aff": "University of Chinese Academy of Sciences;The University of Hong Kong;Tsinghua University;Institute for AI Industry Research, Tsinghua University;Tsinghua University", "aff_domain": "ucas.edu.cn;hku.hk;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": "MS student;PhD student;Researcher;Researcher;Full Professor", "bibtex": "@inproceedings{\nruan2024learning,\ntitle={Learning Cooperative Trajectory Representations for Motion Forecasting},\nauthor={Hongzhi Ruan and Haibao Yu and Wenxian Yang and Siqi Fan and Zaiqing Nie},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=mcY221BgKi}\n}", "github": "", "reviewers": "w4tS;vBSp;vdKw;zJGd", "pdf_size": 2327783, "rating": "5;5;5;5", "confidence": "4;3;2;4", "soundness": "2;2;2;2", "novelty": "2;3;2;3", "presentation": "3;2;3;3", "wc_summary": "80;177;65;137", "wc_strengths": "89;27;35;239", "wc_weaknesses": "39;97;84;210", "wc_questions": "109;33;33;94", "wc_limitations": "11;1;7;203", "wc_review": "328;335;224;883", "wc_reply_reviewers": "9;10;0;289", "wc_reply_authors": "88;17;0;461", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;1;2", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 114.75, 44.86855803343807 ], "wc_strengths_avg": [ 97.5, 85.1043477150257 ], "wc_weaknesses_avg": [ 107.5, 62.970231061986745 ], "wc_questions_avg": [ 67.25, 34.65815199920504 ], "wc_limitations_avg": [ 55.5, 85.23350280259518 ], "wc_review_avg": [ 442.5, 258.0934908129223 ], "wc_reply_reviewers_avg": [ 77.0, 122.46019761538848 ], "wc_reply_authors_avg": [ 141.5, 187.39330297531978 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3982701750402621310&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "ucas.edu.cn;hku.hk;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 5, "aff_unique_index": "0;1;2;2;2", "aff_unique_norm": "University of Chinese Academy of Sciences;University of Hong Kong;Tsinghua University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.ucas.ac.cn;https://www.hku.hk;https://www.tsinghua.edu.cn", "aff_unique_abbr": "UCAS;HKU;THU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "mdK1vhgpa5", "title": "Variational Continual Test-Time Adaptation", "track": "main", "status": "Reject", "tldr": "", "abstract": "Continual Test-Time Adaptation (CTTA) task investigates effective domain adaptation under the scenario of continuous domain shifts during testing time. \nDue to the utilization of solely unlabeled samples, there exists significant uncertainty in model updates, leading CTTA to encounter severe error accumulation issues.\nIn this paper, we introduce VCoTTA, a variational Bayesian approach to measure uncertainties in CTTA. \nAt the source stage, we transform a pre-trained deterministic model into a Bayesian Neural Network (BNN) via a variational warm-up strategy, injecting uncertainties into the model. \nDuring the testing time, we employ a mean-teacher update strategy using variational inference for the student model and exponential moving average for the teacher model. \nOur novel approach updates the student model by combining priors from both the source and teacher models. \nThe evidence lower bound is formulated as the cross-entropy between the student and teacher models, along with the Kullback-Leibler (KL) divergence of the prior mixture. \nExperimental results on three datasets demonstrate the method's effectiveness in mitigating error accumulation within the CTTA framework.", "keywords": "Continual test-time adaptation;Continual learning;Domain adaptation;Test-time adaptation", "primary_area": "learning_theory", "supplementary_material": "", "author": "Fan Lyu;Kaile Du;Yuyang Li;hanyu zhao;Fuyuan Hu;Zhang Zhang;Guangcan Liu;Liang Wang", "authorids": "~Fan_Lyu1;~Kaile_Du1;~Yuyang_Li4;~hanyu_zhao3;~Fuyuan_Hu2;~Zhang_Zhang1;~Guangcan_Liu2;~Liang_Wang3", "gender": ";M;M;;M;;M;M", "homepage": ";;;;http://eie.usts.edu.cn/szdw/jsfc.htm;https://zhangzhang80.github.io/;;", "dblp": ";315/8814;;;;94/2468-1;07/3768.html;56/4499-1", "google_scholar": ";p6wgS0EAAAAJ;;;;rnRNwEMAAAAJ;JIfH-5IAAAAJ;", "orcid": ";;0009-0007-3996-9942;;;0000-0001-9425-3065;;", "linkedin": ";;;hanyu-zhao-2486hann/;;;;", "or_profile": "~Fan_Lyu1;~Kaile_Du1;~Yuyang_Li4;~hanyu_zhao3;~Fuyuan_Hu2;~Zhang_Zhang1;~Guangcan_Liu2;~Liang_Wang3", "aff": ";Southeast University;Southeast University;University of Bristol;Suzhou University of Science and Technology;Institute of Automation, Chinese Academy of Sciences;Southeast University;Institute of Automation\uff0c CAS\uff0cChina", "aff_domain": ";seu.edu.cn;seu.edu.cn;bristol.ac.uk;usts.edu.cn;ia.ac.cn;seu.edu.cn;ia.ac.cn", "position": ";PhD student;MS student;MS student;Full Professor;Associate Professor;Full Professor;Full Professor", "bibtex": "@misc{\nanonymous2024variational,\ntitle={Variational Continual Test-Time Adaptation},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=mdK1vhgpa5}\n}", "github": "", "project": "", "reviewers": "yBx1;p6Bz;9v84;st3m", "site": "https://openreview.net/forum?id=mdK1vhgpa5", "pdf_size": 577301, "rating": "4;5;6;6", "confidence": "3;3;2;3", "soundness": "3;2;3;3", "novelty": "2;2;3;3", "presentation": "2;2;2;3", "wc_summary": "135;120;140;110", "wc_strengths": "41;176;165;90", "wc_weaknesses": "241;162;344;90", "wc_questions": "114;178;11;95", "wc_limitations": "67;20;10;50", "wc_review": "598;656;670;435", "wc_reply_reviewers": "177;393;0;148", "wc_reply_authors": "0;491;0;173", "reply_reviewers": "1;2;0;2", "reply_authors": "1;2;1;2", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 126.25, 11.92424001771182 ], "wc_strengths_avg": [ 118.0, 55.421115109676386 ], "wc_weaknesses_avg": [ 209.25, 94.36465175053634 ], "wc_questions_avg": [ 99.5, 59.63430220938282 ], "wc_limitations_avg": [ 36.75, 22.84047941703501 ], "wc_review_avg": [ 589.75, 93.33374255862667 ], "wc_reply_reviewers_avg": [ 179.5, 140.3575790614814 ], "wc_reply_authors_avg": [ 166.0, 200.49064816095537 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2984398287283233063&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;1;2;3;0;3", "aff_unique_norm": "Southeast University;University of Bristol;Suzhou University of Science and Technology;Chinese Academy of Sciences", "aff_unique_dep": ";;;Institute of Automation", "aff_unique_url": "https://www.seu.edu.cn/;https://www.bristol.ac.uk;https://www.sust.edu.cn;http://www.ia.cas.cn", "aff_unique_abbr": "SEU;Bristol;;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;0;0", "aff_country_unique": "China;United Kingdom" }, { "title": "RGMDT: Return-Gap-Minimizing Decision Tree Extraction in Non-Euclidean Metric Space", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93745", "id": "mdWz5koY5p", "proceeding": "", "pdf": "https://openreview.net/pdf?id=mdWz5koY5p", "openreview": "https://openreview.net/forum?id=mdWz5koY5p", "poster": "/media/PosterPDFs/NeurIPS%202024/93745.png?t=1731100437.3162394", "project": "", "author_site": "Jingdi Chen, Hanhan Zhou, Yongsheng Mei, Carlee Joe-Wong, Gina C. Adam, Nathaniel Bastian, Tian Lan", "tldr": "", "abstract": "Deep Reinforcement Learning (DRL) algorithms have achieved great success in solving many challenging tasks while their black-box nature hinders interpretability and real-world applicability, making it difficult for human experts to interpret and understand DRL policies. \nExisting works on interpretable reinforcement learning have shown promise in extracting decision tree (DT) based policies from DRL policies with most focus on the single-agent settings while prior attempts to introduce DT policies in multi-agent scenarios mainly focus on heuristic designs which do not provide any quantitative guarantees on the expected return.\nIn this paper, we establish an upper bound on the return gap between the oracle expert policy and an optimal decision tree policy. This enables us to recast the DT extraction problem into a novel non-euclidean clustering problem over the local observation and action values space of each agent, with action values as cluster labels and the upper bound on the return gap as clustering loss.\nBoth the algorithm and the upper bound are extended to multi-agent decentralized DT extractions by an iteratively-grow-DT procedure guided by an action-value function conditioned on the current DTs of other agents. Further, we propose the Return-Gap-Minimization Decision Tree (RGMDT) algorithm, which is a surprisingly simple design and is integrated with reinforcement learning through the utilization of a novel Regularized Information Maximization loss. Evaluations on tasks like D4RL show that RGMDT significantly outperforms heuristic DT-based baselines and can achieve nearly optimal returns under given DT complexity constraints (e.g., maximum number of DT nodes).", "keywords": "Multi-agent Reinforcement Learning;Decision Tree;Explainable AI", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/b2028ea02551eca88ca0f4350cbaeea4901349b4.zip", "author": "Jingdi Chen;Hanhan Zhou;Yongsheng Mei;Carlee Joe-Wong;Gina Adam;Nathaniel D. Bastian;Tian Lan", "authorids": "~Jingdi_Chen1;~Hanhan_Zhou1;~Yongsheng_Mei1;~Carlee_Joe-Wong1;~Gina_Adam1;~Nathaniel_D._Bastian1;~Tian_Lan4", "gender": "F;;M;F;;M;M", "homepage": "https://jingdichen.com/info;https://hanhanzhou.com/;;https://www.andrew.cmu.edu/user/cjoewong/;http://adam.seas.gwu.edu;https://cyber.army.mil/About-Us/ACI-Research-Team/Bastian/;https://www2.seas.gwu.edu/~tlan/", "dblp": "298/4248;307/3083;261/7750;40/9937.html;;132/5837.html;", "google_scholar": "IdRTGbYAAAAJ;https://scholar.google.com/citations?hl=en;y4Qyp24AAAAJ;XEztdZgAAAAJ;pNqSzacAAAAJ;M2aMMxQAAAAJ;", "orcid": "0009-0001-8564-0407;;0000-0001-7606-8931;;;0000-0001-9957-2778;", "linkedin": "jingdi-chen-366001166?original_referer=https%3A%2F%2Fwww.google.com%2F;hanhanzhou/;yongshengmei/;;;nathanielbastian/;", "or_profile": "~Jingdi_Chen1;~Hanhan_Zhou1;~Yongsheng_Mei1;~Carlee_Joe-Wong1;~Gina_Adam1;~Nathaniel_D._Bastian1;~Tian_Lan4", "aff": "George Washington University;George Washington University;George Washington University;Carnegie Mellon University;George Washington University;United States Military Academy;George Washington University", "aff_domain": "gwu.edu;gwu.edu;gwu.edu;cmu.edu;gwu.edu;westpoint.edu;gwu.edu", "position": "PhD student;PhD student;PhD student;Assistant Professor;Assistant Professor;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nchen2024rgmdt,\ntitle={{RGMDT}: Return-Gap-Minimizing Decision Tree Extraction in Non-Euclidean Metric Space},\nauthor={Jingdi Chen and Hanhan Zhou and Yongsheng Mei and Carlee Joe-Wong and Gina Adam and Nathaniel D. Bastian and Tian Lan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=mdWz5koY5p}\n}", "github": "", "reviewers": "p7eT;uDjD;Cxpi", "pdf_size": 2156400, "rating": "5;6;7", "confidence": "2;3;3", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "2;3;3", "wc_summary": "74;139;53", "wc_strengths": "49;51;54", "wc_weaknesses": "167;144;64", "wc_questions": "68;2;54", "wc_limitations": "1;2;8", "wc_review": "359;338;233", "wc_reply_reviewers": "0;22;18", "wc_reply_authors": "70;9;13", "reply_reviewers": "0;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 88.66666666666667, 36.609045633862436 ], "wc_strengths_avg": [ 51.333333333333336, 2.0548046676563256 ], "wc_weaknesses_avg": [ 125.0, 44.14370472294625 ], "wc_questions_avg": [ 41.333333333333336, 28.394052585395805 ], "wc_limitations_avg": [ 3.6666666666666665, 3.091206165165235 ], "wc_review_avg": [ 310.0, 55.11805511808268 ], "wc_reply_reviewers_avg": [ 13.333333333333334, 9.568466729604882 ], "wc_reply_authors_avg": [ 30.666666666666668, 27.86076492528915 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10467087592386974641&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "gwu.edu;gwu.edu;gwu.edu;cmu.edu;gwu.edu;westpoint.edu;gwu.edu", "author_num": 7, "aff_unique_index": "0;0;0;1;0;2;0", "aff_unique_norm": "George Washington University;Carnegie Mellon University;United States Military Academy", "aff_unique_dep": ";;", "aff_unique_url": "https://www.gwu.edu;https://www.cmu.edu;https://www.usma.edu/", "aff_unique_abbr": "GWU;CMU;USMA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Semantics and Spatiality of Emergent Communication", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93744", "id": "me1MpmENpw", "proceeding": "", "pdf": "https://openreview.net/pdf?id=me1MpmENpw", "openreview": "https://openreview.net/forum?id=me1MpmENpw", "poster": "", "project": "", "author_site": "Rotem Ben Zion, Boaz Carmeli, Orr Paradise, Yonatan Belinkov", "tldr": "", "abstract": "When artificial agents are jointly trained to perform collaborative tasks using a communication channel, they develop opaque goal-oriented communication protocols. Good task performance is often considered sufficient evidence that meaningful communication is taking place, but existing empirical results show that communication strategies induced by common objectives can be counterintuitive whilst solving the task nearly perfectly. In this work, we identify a goal-agnostic prerequisite to meaningful communication, which we term semantic consistency, based on the idea that messages should have similar meanings across instances. We provide a formal definition for this idea, and use it to compare the two most common objectives in the field of emergent communication: discrimination and reconstruction. We prove, under mild assumptions, that semantically inconsistent communication protocols can be optimal solutions to the discrimination task, but not to reconstruction. We further show that the reconstruction objective encourages a stricter property, spatial meaningfulness, which also accounts for the distance between messages. Experiments with emergent communication games validate our theoretical results. These findings demonstrate an inherent advantage of distance-based communication goals, and contextualize previous empirical discoveries.", "keywords": "emergent communication;Lewis' games", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Rotem Ben Zion;Boaz Carmeli;Orr Paradise;Yonatan Belinkov", "authorids": "~Rotem_Ben_Zion1;~Boaz_Carmeli1;~Orr_Paradise1;~Yonatan_Belinkov1", "gender": "M;M;M;M", "homepage": ";https://researcher.watson.ibm.com/researcher/view.php?person=il-BOAZC;https://people.eecs.berkeley.edu/~orrp/;https://www.belinkov.com", "dblp": ";68/2688;236/4369;136/8705", "google_scholar": "TGjTqzgAAAAJ;l5xTrKoAAAAJ;9At07_kAAAAJ;https://scholar.google.com/citations?authorid=K-6ujU4AAAAJ", "orcid": ";;;", "linkedin": "rotem-ben-zion-a27955274/;boaz-carmeli-5456794/?originalSubdomain=il;;", "or_profile": "~Rotem_Ben_Zion1;~Boaz_Carmeli1;~Orr_Paradise1;~Yonatan_Belinkov1", "aff": "Technion - Israel Institute of Technology, Technion - Israel Institute of Technology;International Business Machines;University of California, Berkeley;Technion, Technion", "aff_domain": "campus.technion.ac.il;ibm.com;berkeley.edu;technion.ac.il", "position": "MS student;Researcher;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nzion2024semantics,\ntitle={Semantics and Spatiality of Emergent Communication},\nauthor={Rotem Ben Zion and Boaz Carmeli and Orr Paradise and Yonatan Belinkov},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=me1MpmENpw}\n}", "github": "", "reviewers": "cajw;XigD;TFDu;o4TS", "pdf_size": 1044402, "rating": "3;5;6;7", "confidence": "5;4;3;4", "soundness": "2;3;3;3", "novelty": "2;2;2;2", "presentation": "3;2;3;2", "wc_summary": "185;136;163;245", "wc_strengths": "100;43;36;227", "wc_weaknesses": "526;137;29;1446", "wc_questions": "280;2;495;357", "wc_limitations": "26;48;15;323", "wc_review": "1117;366;738;2598", "wc_reply_reviewers": "0;28;28;235", "wc_reply_authors": "0;18;0;191", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;1;2", "rating_avg": [ 5.25, 1.479019945774904 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 182.25, 40.17072939342775 ], "wc_strengths_avg": [ 101.5, 76.59144860883622 ], "wc_weaknesses_avg": [ 534.5, 557.772579103706 ], "wc_questions_avg": [ 283.5, 179.85341253365198 ], "wc_limitations_avg": [ 103.0, 127.57154855217522 ], "wc_review_avg": [ 1204.75, 847.0836366616935 ], "wc_reply_reviewers_avg": [ 72.75, 94.36995019602374 ], "wc_reply_authors_avg": [ 52.25, 80.44369148665419 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.7171371656006362, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3044475509809920533&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "campus.technion.ac.il;ibm.com;berkeley.edu;technion.ac.il", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Technion - Israel Institute of Technology;International Business Machines Corporation;University of California, Berkeley", "aff_unique_dep": ";;", "aff_unique_url": "https://www.technion.ac.il/en/;https://www.ibm.com;https://www.berkeley.edu", "aff_unique_abbr": "Technion;IBM;UC Berkeley", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "Israel;United States" }, { "title": "DiffPO: A causal diffusion model for learning distributions of potential outcomes", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93743", "id": "merJ77Jipt", "proceeding": "", "pdf": "https://openreview.net/pdf?id=merJ77Jipt", "openreview": "https://openreview.net/forum?id=merJ77Jipt", "poster": "", "project": "", "author_site": "Yuchen Ma, Valentyn Melnychuk, Jonas Schweisthal, Stefan Feuerriegel", "tldr": "", "abstract": "Predicting potential outcomes of interventions from observational data is crucial for decision-making in medicine, but the task is challenging due to the fundamental problem of causal inference. Existing methods are largely limited to point estimates of potential outcomes with no uncertain quantification; thus, the full information about the distributions of potential outcomes is typically ignored. In this paper, we propose a novel causal diffusion model called DiffPO, which is carefully designed for reliable inferences in medicine by learning the distribution of potential outcomes. In our DiffPO, we leverage a tailored conditional denoising diffusion model to learn complex distributions, where we address the selection bias through a novel orthogonal diffusion loss. Another strength of our DiffPO method is that it is highly flexible (e.g., it can also be used to estimate different causal quantities such as CATE). Across a wide range of experiments, we show that our method achieves state-of-the-art performance.", "keywords": "Causal inference;Treatment effect estimation;Diffusion models;Machine Learning for healthcare;CATE", "primary_area": "causal_inference", "supplementary_material": "", "author": "Yuchen Ma;Valentyn Melnychuk;Jonas Schweisthal;Stefan Feuerriegel", "authorids": "~Yuchen_Ma3;~Valentyn_Melnychuk1;~Jonas_Schweisthal1;~Stefan_Feuerriegel1", "gender": "F;M;M;M", "homepage": "https://www.som.lmu.de/ai/en/institute/contact-page/yuchen-ma-df26be0b.html;https://valentyn1997.github.io/;https://www.som.lmu.de/ai/en/institute/contact-page/jonas-schweisthal-0f01481a.html;http://www.ai.bwl.lmu.de", "dblp": "192/2001-5;254/1513;329/4240;125/0630", "google_scholar": "w6hmCEYAAAAJ;EMExrOMAAAAJ;https://scholar.google.de/citations?user=GHpjcEsAAAAJ;https://scholar.google.de/citations?hl=de", "orcid": "0009-0003-9900-2822;0000-0002-2401-6803;0000-0003-3725-3821;0000-0001-7856-8729", "linkedin": ";valentyn-melnychuk/;;", "or_profile": "~Yuchen_Ma3;~Valentyn_Melnychuk1;~Jonas_Schweisthal1;~Stefan_Feuerriegel1", "aff": "University of Cambridge;Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;LMU Munich", "aff_domain": "cam.ac.uk;lmu.de;lmu.de;lmu.de", "position": "Researcher;PhD student;PhD student;Professor", "bibtex": "@inproceedings{\nma2024diffpo,\ntitle={Diff{PO}: A causal diffusion model for learning distributions of potential outcomes},\nauthor={Yuchen Ma and Valentyn Melnychuk and Jonas Schweisthal and Stefan Feuerriegel},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=merJ77Jipt}\n}", "github": "", "reviewers": "KUb5;PmLo;Cvxf;vD2m", "pdf_size": 949552, "rating": "4;5;6;6", "confidence": "4;4;4;3", "soundness": "3;2;3;3", "novelty": "3;2;3;4", "presentation": "3;2;3;4", "wc_summary": "73;42;121;71", "wc_strengths": "42;25;95;72", "wc_weaknesses": "60;202;93;142", "wc_questions": "79;33;48;83", "wc_limitations": "19;2;12;56", "wc_review": "273;304;369;424", "wc_reply_reviewers": "33;12;13;6", "wc_reply_authors": "366;35;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 76.75, 28.340562803162538 ], "wc_strengths_avg": [ 58.5, 26.9675731203236 ], "wc_weaknesses_avg": [ 124.25, 53.53678641831241 ], "wc_questions_avg": [ 60.75, 20.980645843252777 ], "wc_limitations_avg": [ 22.25, 20.40067400847335 ], "wc_review_avg": [ 342.5, 58.431583925134184 ], "wc_reply_reviewers_avg": [ 16.0, 10.173494974687902 ], "wc_reply_authors_avg": [ 100.25, 154.0947354713976 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2498622567883021737&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "cam.ac.uk;lmu.de;lmu.de;lmu.de", "author_num": 4, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "University of Cambridge;Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;Ludwig Maximilian University of Munich", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cam.ac.uk;https://www.lmu.de;https://www.lmu.de", "aff_unique_abbr": "Cambridge;LMU;LMU", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Cambridge;;Munich", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "United Kingdom;Germany" }, { "id": "mfJifAlRMY", "title": "Revisiting Referring Expression Comprehension Evaluation in the Era of Large Multimodal Models", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "Referring expression comprehension (REC) involves localizing a target instance based on a textual description. Recent advancements in REC have been driven by large multimodal models (LMMs) like CogVLM, which achieved 92.44% accuracy on RefCOCO. However, this study questions whether existing benchmarks such as RefCOCO, RefCOCO+, and RefCOCOg, capture LMMs' comprehensive capabilities. We begin with a manual examination of these benchmarks, revealing high labeling error rates: 14% in RefCOCO, 24% in RefCOCO+, and 5% in RefCOCOg, which undermines the authenticity of evaluations. We address this by excluding problematic instances and reevaluating several LMMs capable of handling the REC task, showing significant accuracy improvements, thus highlighting the impact of benchmark noise. In response, we introduce Ref-L4, a comprehensive REC benchmark, specifically designed to evaluate modern REC models. Ref-L4 is distinguished by four key features: 1) a substantial sample size with 45,341 annotations; 2) a diverse range of object categories with 365 distinct types and varying instance scales from 30 to 3,767; 3) lengthy referring expressions averaging 24.2 words; and 4) an extensive vocabulary comprising 22,813 unique words. We evaluate a total of 24 large models on Ref-L4 and provide valuable insights. The cleaned versions of RefCOCO, RefCOCO+, and RefCOCOg, as well as our Ref-L4 benchmark and evaluation code, are available at https://github.com/JierunChen/Ref-L4.", "keywords": "Referring expression comprehension;large multimodal models", "primary_area": "", "supplementary_material": "/attachment/e788d69f3b276c7e860c163fba5a9b7b3f7a6abf.pdf", "author": "Jierun Chen;Fangyun Wei;Jinjing Zhao;Sizhe Song;BOHUAI WU;Zhuoxuan Peng;S.-H. Chan;Hongyang Zhang", "authorids": "~Jierun_Chen1;~Fangyun_Wei1;~Jinjing_Zhao1;~Sizhe_Song1;~BOHUAI_WU1;~Zhuoxuan_Peng1;~S.-H._Chan1;~Hongyang_Zhang1", "gender": "M;M;M;M;M;M;M;M", "homepage": ";;;https://sausage-song.github.io/home/;;https://shimmer93.github.io/pzx/;https://home.cse.ust.hk/~gchan/;https://hongyanz.github.io/", "dblp": "283/5437;161/2636;;278/8561.html;;340/6004;c/ShuengHanGaryChan;23/10537-1", "google_scholar": "8rPHNOsAAAAJ;-ncz2s8AAAAJ;;HbALOMgAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=uiCSOycAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;0009-0004-5494-7299;0000-0001-8344-830X;;0009-0003-2497-5187;0000-0003-4207-764X;", "linkedin": ";;zhaojingjing713/;%E6%80%9D%E5%93%B2-%E5%AE%8B-900826204/;bohuai-wu-a632611a6/;;;", "or_profile": "~Jierun_Chen1;~Fangyun_Wei1;~Jinjing_Zhao1;~Sizhe_Song1;~BOHUAI_WU1;~Zhuoxuan_Peng1;~S.-H._Chan1;~Hongyang_Zhang1", "aff": "HKUST;Microsoft Research;\tMicrosoft Research Asia;Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;School of Computer Science, University of Waterloo", "aff_domain": "cse.ust.hk;microsoft.com;microsoft.com;hkust.edu.hk;hkust.edu;hkust.edu.hk;ust.hk;uwaterloo.ca", "position": "PhD student;Researcher;Intern;PhD student;MS student;MS student;Full Professor;Assistant Professor", "bibtex": "@misc{\nanonymous2024revisiting,\ntitle={Revisiting Referring Expression Comprehension Evaluation in the Era of Large Multimodal Models},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=mfJifAlRMY}\n}", "github": "", "project": "", "reviewers": "mG3P;RbzE;SkZD", "site": "https://openreview.net/forum?id=mfJifAlRMY", "pdf_size": 2206790, "rating": "6;6;6", "confidence": "4;4;3", "wc_summary_and_contributions": "87;57;105", "wc_strengths": "11;83;6", "wc_improvement": "159;98;6", "wc_limitations": "159;16;105", "wc_correctness": "1;2;48", "wc_clarity": "1;1;6", "wc_relation_to_prior_work": "1;16;1", "wc_documentation": "1;20;36", "wc_additional_feedback": "1;1;1", "wc_review": "421;294;314", "wc_reply_reviewers": "0;16;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;1;0", "reply_authors": "2;2;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 83.0, 19.79898987322333 ], "wc_strengths_avg": [ 33.333333333333336, 35.1789078220964 ], "wc_improvement_avg": [ 87.66666666666667, 62.88790733431101 ], "wc_limitations_avg": [ 93.33333333333333, 58.95949645495815 ], "wc_correctness_avg": [ 17.0, 21.924111536540465 ], "wc_clarity_avg": [ 2.6666666666666665, 2.3570226039551585 ], "wc_relation_to_prior_work_avg": [ 6.0, 7.0710678118654755 ], "wc_documentation_avg": [ 19.0, 14.30617582258329 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 343.0, 55.75541827182957 ], "wc_reply_reviewers_avg": [ 5.333333333333333, 7.542472332656507 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5406225254300233921&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;1;0;0;0;0;2", "aff_unique_norm": "Hong Kong University of Science and Technology;Microsoft;University of Waterloo", "aff_unique_dep": ";Microsoft Research;School of Computer Science", "aff_unique_url": "https://www.ust.hk;https://www.microsoft.com/en-us/research;https://uwaterloo.ca", "aff_unique_abbr": "HKUST;MSR;UWaterloo", "aff_campus_unique_index": "0;2;0;0;0;0;3", "aff_campus_unique": "Hong Kong SAR;;Asia;Waterloo", "aff_country_unique_index": "0;1;0;0;0;0;0;2", "aff_country_unique": "China;United States;Canada" }, { "title": "Dueling over Dessert, Mastering the Art of Repeated Cake Cutting", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93742", "id": "mfTvNzhsht", "proceeding": "", "pdf": "https://openreview.net/pdf?id=mfTvNzhsht", "openreview": "https://openreview.net/forum?id=mfTvNzhsht", "poster": "/media/PosterPDFs/NeurIPS%202024/93742.png?t=1733785542.415718", "project": "", "author_site": "Simina Branzei, MohammadTaghi Hajiaghayi, Reed Phillips, Suho Shin, Kun Wang", "tldr": "", "abstract": "We consider the setting of repeated fair division between two players, denoted Alice and Bob, with private valuations over a cake. In each round, a new cake arrives, which is identical to the ones in previous rounds. Alice cuts the cake at a point of her choice, while Bob chooses the left piece or the right piece, leaving the remainder for Alice. \nWe consider two versions: sequential, where Bob observes Alice's cut point before choosing left/right, and simultaneous, where he only observes her cut point after making his choice. The simultaneous version was first considered by Aumann and Maschler.\n \nWe observe that if Bob is almost myopic and chooses his favorite piece too often, then he can be systematically exploited by Alice through a strategy akin to a binary search. This strategy allows Alice to approximate Bob's preferences with increasing precision, thereby securing a disproportionate share of the resource over time.\n\nWe analyze the limits of how much a player can exploit the other one and show that fair utility profiles are in fact achievable. Specifically, the players can enforce the equitable utility profile of $(1/2, 1/2)$ in the limit on every trajectory of play, by keeping the other player's utility to approximately $1/2$ on average while guaranteeing they themselves get at least approximately $1/2$ on average. We show this theorem using a connection with Blackwell approachability.\n\nFinally, we analyze a natural dynamic known as fictitious play, where players best respond to the empirical distribution of the other player. We show that\nfictitious play converges to the equitable utility profile of $(1/2, 1/2)$ at a rate of $O(1/\\sqrt{T})$.", "keywords": "fair division;online learning;fictitious play;repeated games", "primary_area": "algorithmic_game_theory", "supplementary_material": "/attachment/aba829d784d7537a79fbcb6cdb7b0a004f3bc092.zip", "author": "Simina Branzei;MohammadTaghi Hajiaghayi;Reed Phillips;Suho Shin;Kun Wang", "authorids": "~Simina_Branzei1;~MohammadTaghi_Hajiaghayi1;~Reed_Phillips1;~Suho_Shin1;~Kun_Wang9", "gender": "F;M;M;M;M", "homepage": "https://simina.info;http://www.cs.umd.edu/~hajiagha/;https://www.cs.purdue.edu/people/graduate-students/phill289.html;https://suhoshin.github.io/;https://a865143034.github.io", "dblp": "90/7113;334/4488;;218/5505;", "google_scholar": "https://scholar.google.com.tw/citations?user=SJYC6DQAAAAJ;https://scholar.google.com.tw/citations?user=SQ1eGN4AAAAJ;;-p5eVQsAAAAJ;3SGYEokAAAAJ", "orcid": ";0000-0003-4842-0533;;;", "linkedin": ";mohammad-hajiaghayi-2139a913a&ved=2ahUKEwjMyeH-5-_-AhV3K1kFHeeBDKwQjjh6BAgSEAE&usg=AOvVaw1NSVoT5FCGtOTi4eT8nr4b;;;", "or_profile": "~Simina_Branzei1;~MohammadTaghi_Hajiaghayi1;~Reed_Phillips1;~Suho_Shin1;~Kun_Wang9", "aff": "Purdue University;University of Maryland, College Park;Purdue University;University of Maryland, College Park;Purdue University", "aff_domain": "purdue.edu;umd.edu;purdue.edu;umd.edu;purdue.edu", "position": "Assistant Professor;Full Professor;PhD student;PhD student;PhD student", "bibtex": "@inproceedings{\nbranzei2024dueling,\ntitle={Dueling over Dessert, Mastering the Art of Repeated Cake Cutting},\nauthor={Simina Branzei and MohammadTaghi Hajiaghayi and Reed Phillips and Suho Shin and Kun Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=mfTvNzhsht}\n}", "github": "", "reviewers": "xPAG;6j8m;zKTZ;17dK", "pdf_size": 1029992, "rating": "7;7;7;8", "confidence": "1;4;4;3", "soundness": "3;4;4;4", "novelty": "3;3;4;4", "presentation": "3;4;3;4", "wc_summary": "158;345;124;73", "wc_strengths": "75;15;49;58", "wc_weaknesses": "53;59;92;22", "wc_questions": "53;16;21;100", "wc_limitations": "11;1;1;1", "wc_review": "350;436;287;254", "wc_reply_reviewers": "0;13;0;14", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 3.0, 1.224744871391589 ], "soundness_avg": [ 3.75, 0.4330127018922193 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 175.0, 102.70589077555387 ], "wc_strengths_avg": [ 49.25, 21.867498713844707 ], "wc_weaknesses_avg": [ 56.5, 24.84451649760969 ], "wc_questions_avg": [ 47.5, 33.47013594235912 ], "wc_limitations_avg": [ 3.5, 4.330127018922194 ], "wc_review_avg": [ 331.75, 69.3699322473361 ], "wc_reply_reviewers_avg": [ 6.75, 6.7592529172978875 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1767395188668151998&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "purdue.edu;umd.edu;purdue.edu;umd.edu;purdue.edu", "author_num": 5, "aff_unique_index": "0;1;0;1;0", "aff_unique_norm": "Purdue University;University of Maryland", "aff_unique_dep": ";", "aff_unique_url": "https://www.purdue.edu;https://www/umd.edu", "aff_unique_abbr": "Purdue;UMD", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";College Park", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Latent Functional Maps: a spectral framework for representation alignment", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93741", "id": "mfvKEdJ4zW", "proceeding": "", "pdf": "https://openreview.net/pdf?id=mfvKEdJ4zW", "openreview": "https://openreview.net/forum?id=mfvKEdJ4zW", "poster": "/media/PosterPDFs/NeurIPS%202024/93741.png?t=1731739475.1296737", "project": "", "author_site": "Marco Fumero, Marco Pegoraro, Valentino Maiorca, Francesco Locatello, Emanuele Rodol\u00e0", "tldr": "", "abstract": "Neural models learn data representations that lie on low-dimensional manifolds, yet modeling the relation between these representational spaces is an ongoing challenge.\nBy integrating spectral geometry principles into neural modeling, we show that this problem can be better addressed in the functional domain, mitigating complexity, while enhancing interpretability and performances on downstream tasks. \nTo this end, we introduce a multi-purpose framework to the representation learning community, which allows to: (i) compare different spaces in an interpretable way and measure their intrinsic similarity; (ii) find correspondences between them, both in unsupervised and weakly supervised settings, and (iii) to effectively transfer representations between distinct spaces.\nWe validate our framework on various applications, ranging from stitching to retrieval tasks, and on multiple modalities, demonstrating that Latent Functional Maps can serve as a swiss-army knife for representation alignment.", "keywords": "Representation alignment; Spectral methods;", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Marco Fumero;Marco Pegoraro;Valentino Maiorca;Francesco Locatello;Emanuele Rodol\u00e0", "authorids": "~Marco_Fumero1;~Marco_Pegoraro1;~Valentino_Maiorca1;~Francesco_Locatello1;~Emanuele_Rodol\u00e01", "gender": ";M;M;M;M", "homepage": ";;https://gladia.di.uniroma1.it/authors/maiorca/;https://twitter.com/FrancescoLocat8;", "dblp": "273/9625;117/4931-2;305/9789;195/6074;54/8401", "google_scholar": "VYEljYEAAAAJ;3YCdMCYAAAAJ;https://scholar.google.it/citations?user=2VUUfFEAAAAJ;;-EH4wBYAAAAJ", "orcid": "0000-0001-5614-5004;0000-0001-5690-8403;0000-0001-5795-3695;;0000-0003-0091-7241", "linkedin": ";;valentino-maiorca;;", "or_profile": "~Marco_Fumero1;~Marco_Pegoraro1;~Valentino_Maiorca1;~Francesco_Locatello1;~Emanuele_Rodol\u00e01", "aff": "Institute of Science and Technology Austria(ISTA);University of Roma \"La Sapienza\";University of Roma \"La Sapienza\";Institute of Science and Technology;Sapienza University of Rome", "aff_domain": "ista.ac.at;uniroma1.it;uniroma1.it;ist.ac.at;uniroma1.it", "position": "Postdoc;PhD student;PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nfumero2024latent,\ntitle={Latent Functional Maps: a spectral framework for representation alignment},\nauthor={Marco Fumero and Marco Pegoraro and Valentino Maiorca and Francesco Locatello and Emanuele Rodol{\\`a}},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=mfvKEdJ4zW}\n}", "github": "", "reviewers": "9utW;neES;5Rrs;yAi6", "pdf_size": 4629720, "rating": "5;6;6;7", "confidence": "4;4;3;5", "soundness": "3;2;3;3", "novelty": "3;2;3;3", "presentation": "2;2;1;2", "wc_summary": "152;101;94;99", "wc_strengths": "38;81;17;37", "wc_weaknesses": "242;178;131;258", "wc_questions": "27;132;29;3", "wc_limitations": "69;1;6;140", "wc_review": "528;493;277;537", "wc_reply_reviewers": "145;64;23;48", "wc_reply_authors": "338;209;26;75", "reply_reviewers": "1;1;1;1", "reply_authors": "3;3;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_avg": [ 111.5, 23.521266972678152 ], "wc_strengths_avg": [ 43.25, 23.34925052330374 ], "wc_weaknesses_avg": [ 202.25, 50.87423218093812 ], "wc_questions_avg": [ 47.75, 49.7060107029321 ], "wc_limitations_avg": [ 54.0, 56.422513237182194 ], "wc_review_avg": [ 458.75, 106.21293471136178 ], "wc_reply_reviewers_avg": [ 70.0, 45.70010940905941 ], "wc_reply_authors_avg": [ 162.0, 121.70661444638085 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14366736165795752347&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "ista.ac.at;uniroma1.it;uniroma1.it;ist.ac.at;uniroma1.it", "author_num": 5, "aff_unique_index": "0;1;1;2;3", "aff_unique_norm": "Institute of Science and Technology Austria;University of Rome La Sapienza;Institute of Science and Technology;Sapienza University of Rome", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.ista.ac.at;https://www.uniroma1.it;;https://www.uniroma1.it", "aff_unique_abbr": "ISTA;La Sapienza;;Sapienza", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Rome", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "Austria;Italy;" }, { "title": "Reciprocal Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93740", "id": "mhhlZeAr67", "proceeding": "", "pdf": "https://openreview.net/pdf?id=mhhlZeAr67", "openreview": "https://openreview.net/forum?id=mhhlZeAr67", "poster": "/media/PosterPDFs/NeurIPS%202024/93740.png?t=1733211963.6633103", "project": "", "author_site": "Julian Rodemann, Christoph Jansen, Georg Schollmeyer", "tldr": "", "abstract": "We demonstrate that numerous machine learning algorithms are specific instances of one single paradigm: reciprocal learning. These instances range from active learning over multi-armed bandits to self-training. We show that all these algorithms not only learn parameters from data but also vice versa: They iteratively alter training data in a way that depends on the current model fit. We introduce reciprocal learning as a generalization of these algorithms using the language of decision theory. This allows us to study under what conditions they converge. The key is to guarantee that reciprocal learning contracts such that the Banach fixed-point theorem applies. In this way, we find that reciprocal learning converges at linear rates to an approximately optimal model under some assumptions on the loss function, if their predictions are probabilistic and the sample adaption is both non-greedy and either randomized or regularized. We interpret these findings and provide corollaries that relate them to active learning, self-training, and bandits.", "keywords": "Convergence;Decision Theory;Bandits;Active Learning;Self-Training;Semi-Supervised Learning;Online Learning", "primary_area": "learning_theory", "supplementary_material": "", "author": "Julian Rodemann;Christoph Jansen;Georg Schollmeyer", "authorids": "~Julian_Rodemann1;~Christoph_Jansen1;~Georg_Schollmeyer1", "gender": "M;M;M", "homepage": "https://chjansen.weebly.com/;https://georgschollmeyer.weebly.com;https://www.julian-rodemann.de", "dblp": ";;306/7585", "google_scholar": "https://scholar.google.de/citations?user=Lrh5XU8AAAAJ;54Hl7LAAAAAJ;hIcjYocAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Christoph_Jansen1;~Georg_Schollmeyer1;~Julian_Martin_Rodemann1", "aff": "Lancaster University Leipzig;Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen", "aff_domain": "lancaster.ac.uk;lmu.de;lmu.de", "position": "Assistant Professor;Postdoc;PhD student", "bibtex": "@inproceedings{\nrodemann2024reciprocal,\ntitle={Reciprocal Learning},\nauthor={Julian Rodemann and Christoph Jansen and Georg Schollmeyer},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=mhhlZeAr67}\n}", "github": "", "reviewers": "y1vN;T1mz;MkM4;Br7x", "pdf_size": 2208953, "rating": "5;6;6;7", "confidence": "2;3;4;4", "soundness": "3;3;4;3", "novelty": "3;3;3;3", "presentation": "2;3;3;3", "wc_summary": "31;134;29;48", "wc_strengths": "27;205;42;53", "wc_weaknesses": "46;349;28;46", "wc_questions": "70;43;171;70", "wc_limitations": "2;1;1;1", "wc_review": "176;732;271;218", "wc_reply_reviewers": "14;179;22;0", "wc_reply_authors": "24;83;29;0", "reply_reviewers": "1;2;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 60.5, 43.072613108563544 ], "wc_strengths_avg": [ 81.75, 71.75435526851314 ], "wc_weaknesses_avg": [ 117.25, 134.00256527395288 ], "wc_questions_avg": [ 88.5, 48.89018306367854 ], "wc_limitations_avg": [ 1.25, 0.4330127018922193 ], "wc_review_avg": [ 349.25, 223.53005949983552 ], "wc_reply_reviewers_avg": [ 53.75, 72.74054921431375 ], "wc_reply_authors_avg": [ 34.0, 30.33974291255613 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8528028654224418, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4084694778987454221&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "email": "lancaster.ac.uk;lmu.de;lmu.de", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Lancaster University;Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen", "aff_unique_dep": ";", "aff_unique_url": "https://www.lancaster.ac.uk;https://www.lmu.de", "aff_unique_abbr": "Lancaster;LMU", "aff_campus_unique_index": "0", "aff_campus_unique": "Leipzig;", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United Kingdom;Germany" }, { "title": "NN4SysBench: Characterizing Neural Network Verification for Computer Systems", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97516", "id": "mhjRudcHcB", "proceeding": "", "pdf": "https://openreview.net/pdf?id=mhjRudcHcB", "openreview": "https://openreview.net/forum?id=mhjRudcHcB", "poster": "/media/PosterPDFs/NeurIPS%202024/97516.png?t=1731749562.5709763", "project": "", "author_site": "Shuyi Lin, Haoyu He, Tianhao WEI, Kaidi Xu, Huan Zhang, Gagandeep Singh, Changliu Liu, Cheng Tan", "tldr": "", "abstract": "We present NN4SysBench, a benchmark suite for neural network verification that is composed of applications from the domain of computer systems. We call these neural networks for computer systems or NN4Sys. NN4Sys is booming: there are many proposals for using neural networks in computer systems\u2014for example, databases, OSes, and networked systems\u2014many of which are safety critical. Neural network verification is a technique to formally verify whether neural networks satisfy safety properties. We however observe that NN4Sys has some unique characteristics that today\u2019s verification tools overlook and have limited support. Therefore, this benchmark suite aims at bridging the gap between NN4Sys and the verification by using impactful NN4Sys applications as benchmarks to illustrate computer systems\u2019 unique challenges. We also build a compatible version of NN4SysBench, so that today\u2019s verifiers can also work on these benchmarks with approximately the same verification difficulties. The code is available at https://github.com/lydialin1212/NN4Sys_Benchmark.", "keywords": "Neural network verification;NN4Sys;Benchmark", "primary_area": "", "supplementary_material": "/attachment/f1905a5631cc4ce0d72a9aeca5e0e167f319fec6.pdf", "author": "Shuyi Lin;Haoyu He;Tianhao Wei;Kaidi Xu;Huan Zhang;Gagandeep Singh;Changliu Liu;Cheng Tan", "authorids": "~Shuyi_Lin1;~Haoyu_He4;~Tianhao_Wei1;~Kaidi_Xu1;~Huan_Zhang1;~Gagandeep_Singh1;~Changliu_Liu1;~Cheng_Tan3", "gender": "F;M;M;M;M;M;F;M", "homepage": "https://github.com/shuyilinn;;;https://kaidixu.com/;http://huan-zhang.com;https://ggndpsngh.github.io/;http://www.cs.cmu.edu/~cliu6/index.html;http://naizhengtan.github.io/", "dblp": "206/0723;184/4312;222/5386;195/8175;23/1797-1.html;64/3747-1;166/3563;70/1533-5", "google_scholar": ";iCuAM4IAAAAJ;V22j1C0AAAAJ;lYK0wlsAAAAJ;LTa3GzEAAAAJ;https://scholar.google.ch/citations?user=m4b2ruEAAAAJ;;", "orcid": ";;;;;0000-0002-9299-2961;;", "linkedin": ";;;;;gagandeep-singh-1bb01b49/;;", "or_profile": "~Shuyi_Lin1;~Haoyu_He4;~Tianhao_Wei1;~Kaidi_Xu1;~Huan_Zhang1;~Gagandeep_Singh1;~Changliu_Liu1;~Cheng_Tan3", "aff": "Northeastern University;Eberhard-Karls-Universit\u00e4t T\u00fcbingen;Carnegie Mellon University;Drexel University;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;Carnegie Mellon University;Northeastern University", "aff_domain": "neu.edu;uni-tuebingen.de;andrew.cmu.edu;drexel.edu;uiuc.edu;illinois.edu;cmu.edu;northeastern.edu", "position": "MS student;PhD student;PhD student;Assistant Professor;Assistant Professor;Assistant Professor;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nlin2024nnsysbench,\ntitle={{NN}4SysBench: Characterizing Neural Network Verification for Computer Systems},\nauthor={Shuyi Lin and Haoyu He and Tianhao Wei and Kaidi Xu and Huan Zhang and Gagandeep Singh and Changliu Liu and Cheng Tan},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=mhjRudcHcB}\n}", "github": "", "reviewers": "7er3;15jd;1WSJ", "pdf_size": 316421, "rating": "5;7;7", "confidence": "5;4;3", "wc_summary_and_contributions": "50;91;68", "wc_strengths": "39;77;53", "wc_improvement": "42;67;42", "wc_limitations": "9;4;63", "wc_correctness": "12;46;1", "wc_clarity": "6;20;1", "wc_relation_to_prior_work": "13;23;6", "wc_documentation": "36;35;1", "wc_additional_feedback": "1;1;1", "wc_review": "208;364;236", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "wc_summary_and_contributions_avg": [ 69.66666666666667, 16.77961726487096 ], "wc_strengths_avg": [ 56.333333333333336, 15.69146972791976 ], "wc_improvement_avg": [ 50.333333333333336, 11.785113019775793 ], "wc_limitations_avg": [ 25.333333333333332, 26.71246067953223 ], "wc_correctness_avg": [ 19.666666666666668, 19.154343864744856 ], "wc_clarity_avg": [ 9.0, 8.04155872120988 ], "wc_relation_to_prior_work_avg": [ 14.0, 6.97614984548545 ], "wc_documentation_avg": [ 24.0, 16.268579122549905 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 269.3333333333333, 67.90843508398316 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:7TAN02Tcg-EJ:scholar.google.com/&scioq=NN4SysBench:+Characterizing+Neural+Network+Verification+for+Computer+Systems&hl=en&as_sdt=0,44", "gs_version_total": 4, "email": "neu.edu;uni-tuebingen.de;andrew.cmu.edu;drexel.edu;uiuc.edu;illinois.edu;cmu.edu;northeastern.edu", "author_num": 8, "aff_unique_index": "0;1;2;3;4;4;2;0", "aff_unique_norm": "Northeastern University;Eberhard Karls University of T\u00fcbingen;Carnegie Mellon University;Drexel University;University of Illinois Urbana-Champaign", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.northeastern.edu;https://www.uni-tuebingen.de/;https://www.cmu.edu;https://www.drexel.edu;https://illinois.edu", "aff_unique_abbr": "NEU;Uni T\u00fcbingen;CMU;Drexel;UIUC", "aff_campus_unique_index": "1;2;2", "aff_campus_unique": ";T\u00fcbingen;Urbana-Champaign", "aff_country_unique_index": "0;1;0;0;0;0;0;0", "aff_country_unique": "United States;Germany" }, { "title": "Online Relational Inference for Evolving Multi-agent Interacting Systems", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93739", "id": "miO8odRzto", "proceeding": "", "pdf": "https://openreview.net/pdf?id=miO8odRzto", "openreview": "https://openreview.net/forum?id=miO8odRzto", "poster": "", "project": "", "author_site": "Beomseok Kang, Priyabrata Saha, Sudarshan Sharma, Biswadeep Chakraborty, Saibal Mukhopadhyay", "tldr": "", "abstract": "We introduce a novel framework, Online Relational Inference (ORI), designed to efficiently identify hidden interaction graphs in evolving multi-agent interacting systems using streaming data. Unlike traditional offline methods that rely on a fixed training set, ORI employs online backpropagation, updating the model with each new data point, thereby allowing it to adapt to changing environments in real-time. A key innovation is the use of an adjacency matrix as a trainable parameter, optimized through a new adaptive learning rate technique called AdaRelation, which adjusts based on the historical sensitivity of the decoder to changes in the interaction graph. Additionally, a data augmentation method named Trajectory Mirror (TM) is introduced to improve generalization by exposing the model to varied trajectory patterns. Experimental results on both synthetic datasets and real-world data (CMU MoCap for human motion) demonstrate that ORI significantly improves the accuracy and adaptability of relational inference in dynamic settings compared to existing methods. This approach is model-agnostic, enabling seamless integration with various neural relational inference (NRI) architectures, and offers a robust solution for real-time applications in complex, evolving systems.", "keywords": "Neural Relational Inference;Online Learning;Multi-agent System", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Beomseok Kang;Priyabrata Saha;Sudarshan Sharma;Biswadeep Chakraborty;Saibal Mukhopadhyay", "authorids": "~Beomseok_Kang1;~Priyabrata_Saha1;~Sudarshan_Sharma1;~Biswadeep_Chakraborty1;~Saibal_Mukhopadhyay2", "gender": "M;M;M;M;M", "homepage": "https://sites.google.com/view/beomseok-kang;https://www.priyabratasaha.com/;https://www.sudarshan-sh.com;;https://greenlab.ece.gatech.edu", "dblp": "309/1053;216/9953;;238/0554;66/1210", "google_scholar": "kbqaf1EAAAAJ;jhzXDhoAAAAJ;xcOv-28AAAAJ;8soIjY8AAAAJ;5KRtMEkAAAAJ", "orcid": ";;;;0000-0002-8894-3390", "linkedin": ";;;;", "or_profile": "~Beomseok_Kang1;~Priyabrata_Saha1;~Sudarshan_Sharma1;~Biswadeep_Chakraborty1;~Saibal_Mukhopadhyay2", "aff": "Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology", "aff_domain": "gatech.edu;gatech.edu;gatech.edu;gatech.edu;gatech.edu", "position": "PhD student;Postdoc;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nkang2024online,\ntitle={Online Relational Inference for Evolving Multi-agent Interacting Systems},\nauthor={Beomseok Kang and Priyabrata Saha and Sudarshan Sharma and Biswadeep Chakraborty and Saibal Mukhopadhyay},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=miO8odRzto}\n}", "github": "", "reviewers": "arYB;mc8m;fSnF", "pdf_size": 4012646, "rating": "5;5;7", "confidence": "1;3;4", "soundness": "3;2;3", "novelty": "2;2;3", "presentation": "2;2;3", "wc_summary": "90;77;102", "wc_strengths": "23;29;99", "wc_weaknesses": "77;84;80", "wc_questions": "2;309;134", "wc_limitations": "1;38;1", "wc_review": "193;537;416", "wc_reply_reviewers": "36;510;50", "wc_reply_authors": "145;2327;54", "reply_reviewers": "1;3;1", "reply_authors": "3;7;2", "rating_avg": [ 5.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 2.6666666666666665, 1.247219128924647 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 89.66666666666667, 10.208928554075703 ], "wc_strengths_avg": [ 50.333333333333336, 34.49959742116163 ], "wc_weaknesses_avg": [ 80.33333333333333, 2.8674417556808756 ], "wc_questions_avg": [ 148.33333333333334, 125.74135711407321 ], "wc_limitations_avg": [ 13.333333333333334, 17.441967269268172 ], "wc_review_avg": [ 382.0, 142.48040800989682 ], "wc_reply_reviewers_avg": [ 198.66666666666666, 220.2200919282546 ], "wc_reply_authors_avg": [ 842.0, 1050.7105532289409 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.9428090415820634 ], "reply_authors_avg": [ 4.0, 2.160246899469287 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.7559289460184545, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3937418397348676&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 3, "email": "gatech.edu;gatech.edu;gatech.edu;gatech.edu;gatech.edu", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Georgia Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.gatech.edu", "aff_unique_abbr": "Georgia Tech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Learning-Augmented Approximation Algorithms for Maximum Cut and Related Problems", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93738", "id": "mirkQqx6po", "proceeding": "", "pdf": "https://openreview.net/pdf?id=mirkQqx6po", "openreview": "https://openreview.net/forum?id=mirkQqx6po", "poster": "/media/PosterPDFs/NeurIPS%202024/93738.png?t=1732756090.763726", "project": "", "author_site": "Vincent Cohen-Addad, Tommaso d\u2019Orsi, Anupam Gupta, Euiwoong Lee, Debmalya Panigrahi", "tldr": "", "abstract": "In recent years, there has been a surge of interest in the use of machine-learned predictions to bypass worst-case lower bounds for classical problems in combinatorial optimization. So far, the focus has mostly been on online algorithms, where information-theoretic barriers are overcome using predictions about the unknown future. In this paper, we consider the complementary question of using learned information to overcome computational barriers in the form of approximation hardness of polynomial-time algorithms for NP-hard (offline) problems. We show that noisy predictions about the optimal solution can be used to break classical hardness results for maximization problems such as the max-cut problem and more generally, maximization versions of constraint satisfaction problems (CSPs).", "keywords": "learning-augmented algorithm;approximation algorithm;maximization;CSPs;learning with advice", "primary_area": "optimization", "supplementary_material": "", "author": "Vincent Cohen-Addad;Tommaso d'Orsi;Anupam Gupta;Euiwoong Lee;Debmalya Panigrahi", "authorids": "~Vincent_Cohen-Addad1;~Tommaso_d'Orsi1;~Anupam_Gupta2;~Euiwoong_Lee2;~Debmalya_Panigrahi1", "gender": ";;M;F;M", "homepage": ";https://tommasodorsi.github.io;https://cs.nyu.edu/~anupamg/;https://web.eecs.umich.edu/~euiwoong/;https://www.debmalyapanigrahi.org/", "dblp": "136/5814;275/8135;27/2931;129/5624.html;81/6547", "google_scholar": ";;QuwaU-8AAAAJ;9Yxhn6oAAAAJ;https://scholar.google.com.tw/citations?user=syv4e-EAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Vincent_Cohen-Addad1;~Tommaso_d'Orsi1;~Anupam_Gupta2;~Euiwoong_Lee2;~Debmalya_Panigrahi1", "aff": "Google;Bocconi University;New York University;University of Michigan;Department of Computer Science, Duke University", "aff_domain": "google.com;unibocconi.it;cs.nyu.edu;umich.edu;cs.duke.edu", "position": "Researcher;Assistant Professor;Full Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\ncohen-addad2024learningaugmented,\ntitle={Learning-Augmented Approximation Algorithms for Maximum Cut and Related Problems},\nauthor={Vincent Cohen-Addad and Tommaso d'Orsi and Anupam Gupta and Euiwoong Lee and Debmalya Panigrahi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=mirkQqx6po}\n}", "github": "", "reviewers": "PkHv;fPP4;969c;SRjy", "pdf_size": 336399, "rating": "5;7;7;7", "confidence": "3;3;4;3", "soundness": "3;3;3;4", "novelty": "3;4;3;4", "presentation": "3;3;3;4", "wc_summary": "116;189;144;174", "wc_strengths": "60;63;79;46", "wc_weaknesses": "75;40;2;36", "wc_questions": "99;1;39;287", "wc_limitations": "15;9;6;1", "wc_review": "365;302;270;544", "wc_reply_reviewers": "0;0;5;9", "wc_reply_authors": "533;11;275;764", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 155.75, 28.092481200492063 ], "wc_strengths_avg": [ 62.0, 11.726039399558575 ], "wc_weaknesses_avg": [ 38.25, 25.849323008543184 ], "wc_questions_avg": [ 106.5, 109.91246517115336 ], "wc_limitations_avg": [ 7.75, 5.0682837331783235 ], "wc_review_avg": [ 370.25, 105.97729709706698 ], "wc_reply_reviewers_avg": [ 3.5, 3.774917217635375 ], "wc_reply_authors_avg": [ 395.75, 281.539850642853 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1699933859317241232&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "google.com;unibocconi.it;cs.nyu.edu;umich.edu;cs.duke.edu", "author_num": 5, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "Google;Bocconi University;New York University;University of Michigan;Duke University", "aff_unique_dep": "Google;;;;Department of Computer Science", "aff_unique_url": "https://www.google.com;https://www.bocconi.edu;https://www.nyu.edu;https://www.umich.edu;https://www.duke.edu", "aff_unique_abbr": "Google;Bocconi;NYU;UM;Duke", "aff_campus_unique_index": "0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "United States;Italy" }, { "title": "Yo'LLaVA: Your Personalized Language and Vision Assistant", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93737", "id": "mjGy8g3pgi", "proceeding": "", "pdf": "https://openreview.net/pdf?id=mjGy8g3pgi", "openreview": "https://openreview.net/forum?id=mjGy8g3pgi", "poster": "/media/PosterPDFs/NeurIPS%202024/93737.png?t=1729115312.34047", "project": "", "author_site": "Thao Nguyen, Haotian Liu, Yuheng Li, Mu Cai, Utkarsh Ojha, Yong Jae Lee", "tldr": "", "abstract": "Large Multimodal Models (LMMs) have shown remarkable capabilities across a variety of tasks (e.g., image captioning, visual question answering).\nWhile broad, their knowledge remains generic (e.g., recognizing a dog), and they are unable to handle personalized subjects (e.g., recognizing a user's pet dog).\n\nHuman reasoning, in contrast, typically operates within the context of specific subjects in our surroundings. For example, one might ask, \"What should I buy for *my dog*'s birthday?\"; as opposed to a generic inquiry about \"What should I buy for *a dog*'s birthday?\".\nSimilarly, when looking at a friend's image, the interest lies in seeing their activities (e.g., \"*my friend* is holding a cat\"), rather than merely observing generic human actions (e.g., \"*a man* is holding a cat\").\n\nIn this paper, we introduce the novel task of personalizing LMMs, so that they can have conversations about a specific subject. We propose Yo'LLaVA, which learns to embed a personalized subject into a set of latent tokens given a handful of example images of the subject. Our qualitative and quantitative analyses reveal that Yo'LLaVA can learn the concept more efficiently using fewer tokens and more effectively encode the visual attributes compared to strong prompting baselines (e.g., LLaVA).", "keywords": "personalization;multimodal models", "primary_area": "machine_vision", "supplementary_material": "", "author": "Thao Nguyen;Haotian Liu;Yuheng Li;Mu Cai;Utkarsh Ojha;Yong Jae Lee", "authorids": "~Thao_Nguyen4;~Haotian_Liu1;~Yuheng_Li1;~Mu_Cai1;~Utkarsh_Ojha1;~Yong_Jae_Lee2", "gender": "F;;M;M;M;M", "homepage": "https://thaoshibe.github.io/;https://hliu.cc;;https://pages.cs.wisc.edu/~mucai/;https://utkarshojha.github.io/;https://pages.cs.wisc.edu/~yongjaelee/", "dblp": ";66/10511;39/3954;279/6529;194/5532;15/5471", "google_scholar": "P_6-46UAAAAJ;Xo6wfnQAAAAJ;ZphbAXEAAAAJ;euruCPEAAAAJ;QGdSgfoAAAAJ;4GTpCxcAAAAJ", "orcid": ";;;0009-0008-7967-9752;;", "linkedin": ";;;mu-cai/;utkarsh-ojha-16a20b11b/;", "or_profile": "~Thao_Nguyen4;~Haotian_Liu1;~Yuheng_Li1;~Mu_Cai1;~Utkarsh_Ojha1;~Yong_Jae_Lee1", "aff": "Department of Computer Science, University of Wisconsin - Madison;Department of Computer Science, University of Wisconsin - Madison;University of Wisconsin - Madison;Microsoft;University of Wisconsin - Madison;University of Wisconsin - Madison", "aff_domain": "cs.wisc.edu;cs.wisc.edu;wisc.edu;microsoft.com;wisc.edu;cs.wisc.edu", "position": "PhD student;PhD student;PhD student;Intern;PhD student;Associate Professor", "bibtex": "@inproceedings{\nnguyen2024yollava,\ntitle={Yo'{LL}a{VA}: Your Personalized Language and Vision Assistant},\nauthor={Thao Nguyen and Haotian Liu and Yuheng Li and Mu Cai and Utkarsh Ojha and Yong Jae Lee},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=mjGy8g3pgi}\n}", "github": "", "reviewers": "VkWL;PZy9;wZcX;ddkC", "pdf_size": 22586996, "rating": "5;5;7;7", "confidence": "5;4;5;4", "soundness": "3;3;4;3", "novelty": "2;3;4;4", "presentation": "4;4;4;4", "wc_summary": "52;64;333;81", "wc_strengths": "51;114;10;114", "wc_weaknesses": "274;166;26;104", "wc_questions": "74;33;12;43", "wc_limitations": "14;2;1;1", "wc_review": "465;379;382;343", "wc_reply_reviewers": "350;0;10;16", "wc_reply_authors": "613;0;0;0", "reply_reviewers": "2;0;1;1", "reply_authors": "3;1;1;1", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 4.0, 0.0 ], "wc_summary_avg": [ 132.5, 116.21639299169459 ], "wc_strengths_avg": [ 72.25, 44.19488092528364 ], "wc_weaknesses_avg": [ 142.5, 90.6904074309957 ], "wc_questions_avg": [ 40.5, 22.34390297150433 ], "wc_limitations_avg": [ 4.5, 5.5 ], "wc_review_avg": [ 392.25, 44.71786555729153 ], "wc_reply_reviewers_avg": [ 94.0, 147.9121360808504 ], "wc_reply_authors_avg": [ 153.25, 265.43678625993044 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10311003483729423424&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 3, "email": "cs.wisc.edu;cs.wisc.edu;wisc.edu;microsoft.com;wisc.edu;cs.wisc.edu", "author_num": 6, "aff_unique_index": "0;0;0;1;0;0", "aff_unique_norm": "University of Wisconsin-Madison;Microsoft", "aff_unique_dep": "Department of Computer Science;Microsoft Corporation", "aff_unique_url": "https://www.wisc.edu;https://www.microsoft.com", "aff_unique_abbr": "UW-Madison;Microsoft", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Madison;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Explanations that reveal all through the de\ufb01nition of encoding", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93736", "id": "mkw6x0OExg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=mkw6x0OExg", "openreview": "https://openreview.net/forum?id=mkw6x0OExg", "poster": "/media/PosterPDFs/NeurIPS%202024/93736.png?t=1733245386.0283792", "project": "", "author_site": "Aahlad Manas Puli, Nhi Nguyen, Rajesh Ranganath", "tldr": "", "abstract": "Feature attributions attempt to highlight what inputs drive predictive power. Good attributions or explanations are thus those that produce inputs that retain this predictive power; accordingly, evaluations of explanations score their quality of prediction. However, evaluations produce scores better than what appears possible from the values in the explanation for a class of explanations, called encoding explanations. Probing for encoding remains a challenge because there is no general characterization of what gives the extra predictive power. We develop a de\ufb01nition of encoding that identi\ufb01es this extra predictive power via conditional dependence and show that the de\ufb01nition \ufb01ts existing examples of encoding. This de\ufb01nition implies, in contrast to encoding explanations, that non-encoding explanations contain all the informative inputs used to produce the explanation, giving them a \u201cwhat you see is what you get\u201d property, which makes them transparent and simple to use. Next, we prove that existing scores (ROAR, FRESH, EVAL-X) do not rank non-encoding explanations above encoding ones, and develop STRIPE-X which ranks them correctly. After empirically demonstrating the theoretical insights, we use STRIPE-X to show that despite prompting an LLM to produce non-encoding explanations for a sentiment analysis task, the LLM-generated explanations encode.", "keywords": "feature attributions;model explanations;evaluating explanations;encoding the prediction;interpretability", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Aahlad Manas Puli;Nhi Nguyen;Rajesh Ranganath", "authorids": "~Aahlad_Manas_Puli1;~Nhi_Nguyen1;~Rajesh_Ranganath2", "gender": "M;F;", "homepage": "http://aahladmanas.github.io;https://www.linkedin.com/in/nhi-nguyen-a427401a9/;", "dblp": "228/9272;;97/7057", "google_scholar": "xWmCmBQAAAAJ;;", "orcid": ";;", "linkedin": ";nhi-nguyen-a427401a9/;", "or_profile": "~Aahlad_Manas_Puli1;~Nhi_Nguyen1;~Rajesh_Ranganath2", "aff": "New York University;New York University;New York University", "aff_domain": "nyu.edu;nyu.edu;nyu.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\npuli2024explanations,\ntitle={Explanations that reveal all through the definition of encoding},\nauthor={Aahlad Manas Puli and Nhi Nguyen and Rajesh Ranganath},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=mkw6x0OExg}\n}", "github": "", "reviewers": "PabQ;Fr3U;A1S9", "pdf_size": 1276297, "rating": "5;5;7", "confidence": "3;3;3", "soundness": "3;2;4", "novelty": "2;2;3", "presentation": "2;1;3", "wc_summary": "141;91;92", "wc_strengths": "83;32;100", "wc_weaknesses": "566;76;71", "wc_questions": "266;169;46", "wc_limitations": "79;10;61", "wc_review": "1135;378;370", "wc_reply_reviewers": "29;174;0", "wc_reply_authors": "30;30;0", "reply_reviewers": "1;1;0", "reply_authors": "2;2;1", "rating_avg": [ 5.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.0, 0.816496580927726 ], "wc_summary_avg": [ 108.0, 23.338094752285727 ], "wc_strengths_avg": [ 71.66666666666667, 28.89444391035911 ], "wc_weaknesses_avg": [ 237.66666666666666, 232.17569975248392 ], "wc_questions_avg": [ 160.33333333333334, 90.02345373413654 ], "wc_limitations_avg": [ 50.0, 29.223278392404914 ], "wc_review_avg": [ 627.6666666666666, 358.75370691077495 ], "wc_reply_reviewers_avg": [ 67.66666666666667, 76.11540944878084 ], "wc_reply_authors_avg": [ 20.0, 14.142135623730951 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5890401262602471375&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "nyu.edu;nyu.edu;nyu.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "New York University", "aff_unique_dep": "", "aff_unique_url": "https://www.nyu.edu", "aff_unique_abbr": "NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Non-asymptotic Global Convergence Analysis of BFGS with the Armijo-Wolfe Line Search", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93735", "id": "mkzpN2T87C", "proceeding": "", "pdf": "https://openreview.net/pdf?id=mkzpN2T87C", "openreview": "https://openreview.net/forum?id=mkzpN2T87C", "poster": "", "project": "", "author_site": "Qiujiang Jin, Ruichen Jiang, Aryan Mokhtari", "tldr": "", "abstract": "In this paper, we present the first explicit and non-asymptotic global convergence rates of the BFGS method when implemented with an inexact line search scheme satisfying the Armijo-Wolfe conditions. We show that BFGS achieves a global linear convergence rate of $(1 - \\frac{1}{\\kappa})^t$ for $\\mu$-strongly convex functions with $L$-Lipschitz gradients, where $\\kappa = \\frac{L}{\\mu}$ represents the condition number. Additionally, if the objective function's Hessian is Lipschitz, BFGS with the Armijo-Wolfe line search achieves a linear convergence rate that depends solely on the line search parameters, independent of the condition number. We also establish a global superlinear convergence rate of $\\mathcal{O}((\\frac{1}{t})^t)$. These global bounds are all valid for any starting point $x_0$ and any symmetric positive definite initial Hessian approximation matrix $B_0$, though the choice of $B_0$ impacts the number of iterations needed to achieve these rates. By synthesizing these results, we outline the first global complexity characterization of BFGS with the Armijo-Wolfe line search. Additionally, we clearly define a mechanism for selecting the step size to satisfy the Armijo-Wolfe conditions and characterize its overall complexity.", "keywords": "Quasi-Newton method;BFGS Algorithm;Global convergence analysis;Non-asymptotic convergence analysis;Inexact line search", "primary_area": "optimization", "supplementary_material": "", "author": "Qiujiang Jin;Ruichen Jiang;Aryan Mokhtari", "authorids": "~Qiujiang_Jin1;~Ruichen_Jiang1;~Aryan_Mokhtari3", "gender": ";;M", "homepage": "https://qiujiangjin.github.io/;https://ruichen-jiang.github.io/;https://sites.utexas.edu/mokhtari/", "dblp": ";271/7916;140/7407", "google_scholar": ";BGFt1UMAAAAJ;glcep6EAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Qiujiang_Jin1;~Ruichen_Jiang1;~Aryan_Mokhtari3", "aff": "University of Texas, Austin;University of Texas at Austin;University of Texas, Austin", "aff_domain": "utexas.edu;utexas.edu;utexas.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\njin2024nonasymptotic,\ntitle={Non-asymptotic Global Convergence Analysis of {BFGS} with the Armijo-Wolfe Line Search},\nauthor={Qiujiang Jin and Ruichen Jiang and Aryan Mokhtari},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=mkzpN2T87C}\n}", "github": "", "reviewers": "EiJP;7mqx;GXyX", "pdf_size": 1258425, "rating": "6;6;8", "confidence": "4;3;4", "soundness": "3;3;4", "novelty": "3;3;4", "presentation": "2;3;3", "wc_summary": "92;45;32", "wc_strengths": "89;29;2", "wc_weaknesses": "475;90;1", "wc_questions": "5;43;446", "wc_limitations": "33;1;1", "wc_review": "694;208;482", "wc_reply_reviewers": "0;10;56", "wc_reply_authors": "0;0;39", "reply_reviewers": "0;1;2", "reply_authors": "1;1;2", "rating_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 56.333333333333336, 25.772509040103607 ], "wc_strengths_avg": [ 40.0, 36.359317925395686 ], "wc_weaknesses_avg": [ 188.66666666666666, 205.70259005553518 ], "wc_questions_avg": [ 164.66666666666666, 199.5366855715733 ], "wc_limitations_avg": [ 11.666666666666666, 15.084944665313014 ], "wc_review_avg": [ 461.3333333333333, 198.94611219009926 ], "wc_reply_reviewers_avg": [ 22.0, 24.385788210895843 ], "wc_reply_authors_avg": [ 13.0, 18.384776310850235 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.49999999999999983, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9250639355130200832&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "utexas.edu;utexas.edu;utexas.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "FreeSplat: Generalizable 3D Gaussian Splatting Towards Free View Synthesis of Indoor Scenes", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93734", "id": "ml01XyP698", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ml01XyP698", "openreview": "https://openreview.net/forum?id=ml01XyP698", "poster": "/media/PosterPDFs/NeurIPS%202024/93734.png?t=1731395210.1410453", "project": "", "author_site": "Yunsong Wang, Tianxin Huang, Hanlin Chen, Gim Hee Lee", "tldr": "", "abstract": "Empowering 3D Gaussian Splatting with generalization ability is appealing. However, existing generalizable 3D Gaussian Splatting methods are largely confined to narrow-range interpolation between stereo images due to their heavy backbones, thus lacking the ability to accurately localize 3D Gaussian and support free-view synthesis across wide view range. In this paper, we present a novel framework FreeSplat that is capable of reconstructing geometrically consistent 3D scenes from long sequence input towards free-view synthesis.Specifically, we firstly introduce Low-cost Cross-View Aggregation achieved by constructing adaptive cost volumes among nearby views and aggregating features using a multi-scale structure. Subsequently, we present the Pixel-wise Triplet Fusion to eliminate redundancy of 3D Gaussians in overlapping view regions and to aggregate features observed across multiple views. Additionally, we propose a simple but effective free-view training strategy that ensures robust view synthesis across broader view range regardless of the number of views. Our empirical results demonstrate state-of-the-art novel view synthesis peformances in both novel view rendered color maps quality and depth maps accuracy across different numbers of input views. We also show that FreeSplat performs inference more efficiently and can effectively reduce redundant Gaussians, offering the possibility of feed-forward large scene reconstruction without depth priors. Our code will be made open-source upon paper acceptance.", "keywords": "3D Gaussian Splatting;Generalization;3D from multi-view sensors;Novel View Synthesis;3D Computer Vision", "primary_area": "machine_vision", "supplementary_material": "", "author": "Yunsong Wang;Tianxin Huang;Hanlin Chen;Gim Hee Lee", "authorids": "~Yunsong_Wang1;~Tianxin_Huang1;~Hanlin_Chen2;~Gim_Hee_Lee1", "gender": "M;M;M;", "homepage": "https://wangys16.github.io/;https://tianxinhuang.github.io/;https://hlinchen.github.io/;https://www.comp.nus.edu.sg/~leegh/", "dblp": "182/0203;251/3784;;49/9455", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com.hk/citations?user=Fg7WYfcAAAAJ;fBpYOzAAAAAJ;https://scholar.google.com.sg/citations?user=7hNKrPsAAAAJ", "orcid": ";;0000-0002-3323-8213;0000-0002-1583-0475", "linkedin": "yunsong-wang-32ab98202/;;;", "or_profile": "~Yunsong_Wang1;~Tianxin_Huang1;~Hanlin_Chen2;~Gim_Hee_Lee1", "aff": "National University of Singapore;National University of Singapore;National University of Singapore;National University of Singapore", "aff_domain": "nus.edu.sg;nus.edu.sg;nus.edu;nus.edu.sg", "position": "PhD student;Research Fellow;PhD student;Associate Professor", "bibtex": "@inproceedings{\nwang2024freesplat,\ntitle={FreeSplat: Generalizable 3D Gaussian Splatting Towards Free View Synthesis of Indoor Scenes},\nauthor={Yunsong Wang and Tianxin Huang and Hanlin Chen and Gim Hee Lee},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ml01XyP698}\n}", "github": "", "reviewers": "ZuRj;emYN;Hd37;gTxT", "pdf_size": 11119659, "rating": "5;5;6;6", "confidence": "3;4;4;3", "soundness": "2;2;3;3", "novelty": "2;2;3;3", "presentation": "2;3;3;3", "wc_summary": "57;157;51;48", "wc_strengths": "77;77;33;44", "wc_weaknesses": "72;200;154;61", "wc_questions": "86;4;37;127", "wc_limitations": "46;39;1;32", "wc_review": "338;477;276;312", "wc_reply_reviewers": "72;25;225;144", "wc_reply_authors": "419;0;470;274", "reply_reviewers": "2;1;2;2", "reply_authors": "3;1;2;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 78.25, 45.58165749509335 ], "wc_strengths_avg": [ 57.75, 19.638928178492836 ], "wc_weaknesses_avg": [ 121.75, 57.72510285828861 ], "wc_questions_avg": [ 63.5, 46.853495067070504 ], "wc_limitations_avg": [ 29.5, 17.18284027743958 ], "wc_review_avg": [ 350.75, 76.14254723871537 ], "wc_reply_reviewers_avg": [ 116.5, 75.63233435508916 ], "wc_reply_authors_avg": [ 290.75, 182.61623011112675 ], "reply_reviewers_avg": [ 1.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8536142410212003075&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 5, "email": "nus.edu.sg;nus.edu.sg;nus.edu;nus.edu.sg", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "National University of Singapore", "aff_unique_dep": "", "aff_unique_url": "https://www.nus.edu.sg", "aff_unique_abbr": "NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Singapore" }, { "title": "Multi-Chain Graphs of Graphs: A New Approach to Analyzing Blockchain Datasets", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97515", "id": "mlbVgVKwD7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=mlbVgVKwD7", "openreview": "https://openreview.net/forum?id=mlbVgVKwD7", "poster": "/media/PosterPDFs/NeurIPS%202024/97515.png?t=1731663437.54348", "project": "", "author_site": "Bingqiao Luo, Zhen Zhang, Qian Wang, Bingsheng He", "tldr": "", "abstract": "Machine learning applied to blockchain graphs offers significant opportunities for enhanced data analysis and applications. However, the potential of this field is constrained by the lack of a large-scale, cross-chain dataset that includes hierarchical graph-level data. To address this issue, we present novel datasets that provide detailed label information at the token level and integrate interactions between tokens across multiple blockchain platforms. We model transactions within each token as local graphs and the relationships between tokens as global graphs, collectively forming a \"Graphs of Graphs\" (GoG) approach. This innovative approach facilitates a deeper understanding of systemic structures and hierarchical interactions, which are essential for applications such as link prediction, anomaly detection, and token classification. We conduct a series of experiments demonstrating that this dataset delivers new insights and challenges for exploring GoG within the blockchain domain. Our work promotes advancements and opens new avenues for research in both the blockchain and graph communities. Source code and datasets are available at https://github.com/Xtra-Computing/Cryptocurrency-Graphs-of-graphs.", "keywords": "Blockchain;Cryptocurrency;Graph", "primary_area": "", "supplementary_material": "/attachment/4c3a092581898326c7a61be1202a2695b54faa4b.pdf", "author": "Bingqiao Luo;Zhen Zhang;Qian Wang;Bingsheng He", "authorids": "~Bingqiao_Luo1;~Zhen_Zhang14;~Qian_Wang25;~Bingsheng_He1", "gender": ";M;;M", "homepage": ";https://cszhangzhen.github.io/;;http://www.comp.nus.edu.sg/~hebs/", "dblp": "344/3342;19/5112-23;;h/BingshengHe.html", "google_scholar": ";8hclVjIAAAAJ;KAGrBdoAAAAJ;https://scholar.google.com.tw/citations?user=RogYLKYAAAAJ", "orcid": ";0000-0001-5769-8786;;0000-0001-8618-4581", "linkedin": "bingqiao-luo-3993031a3/;;;bingsheng-he-7734b131", "or_profile": "~Bingqiao_Luo1;~Zhen_Zhang14;~Qian_Wang25;~Bingsheng_He1", "aff": "National University of Singapore;National University of Singapore;National University of Singapore;National University of Singapore", "aff_domain": "u.nus.edu;nus.edu.sg;nus.edu.sg;nus.edu.sg", "position": "PhD student;Postdoc;PhD student;Full Professor", "bibtex": "@inproceedings{\nluo2024multichain,\ntitle={Multi-Chain Graphs of Graphs: A New Approach to Analyzing Blockchain Datasets},\nauthor={Bingqiao Luo and Zhen Zhang and Qian Wang and Bingsheng He},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=mlbVgVKwD7}\n}", "github": "", "reviewers": "kLuX;vWPB;Rsox;RMtu", "pdf_size": 958175, "rating": "5;6;6;7", "confidence": "5;4;3;5", "wc_summary_and_contributions": "65;78;49;78", "wc_strengths": "46;47;44;109", "wc_improvement": "94;253;119;243", "wc_limitations": "17;174;8;13", "wc_correctness": "10;48;5;5", "wc_clarity": "5;82;12;5", "wc_relation_to_prior_work": "14;25;8;8", "wc_documentation": "20;52;17;18", "wc_additional_feedback": "1;1;1;1", "wc_review": "272;760;263;480", "wc_reply_reviewers": "0;251;11;54", "wc_reply_authors": "1162;796;16;125", "reply_reviewers": "0;2;1;1", "reply_authors": "8;6;3;3", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "wc_summary_and_contributions_avg": [ 67.5, 11.926860441876563 ], "wc_strengths_avg": [ 61.5, 27.445400343226915 ], "wc_improvement_avg": [ 177.25, 71.38758645590983 ], "wc_limitations_avg": [ 53.0, 69.93210993527937 ], "wc_correctness_avg": [ 17.0, 18.01388353465182 ], "wc_clarity_avg": [ 26.0, 32.45766473423496 ], "wc_relation_to_prior_work_avg": [ 13.75, 6.94172168845741 ], "wc_documentation_avg": [ 26.75, 14.618053906043718 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 443.75, 202.17365679039395 ], "wc_reply_reviewers_avg": [ 79.0, 101.33360745576958 ], "wc_reply_authors_avg": [ 524.75, 473.89100803876835 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 5.0, 2.1213203435596424 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:s2xLfdAQ4ToJ:scholar.google.com/&scioq=Multi-Chain+Graphs+of+Graphs:+A+New+Approach+to+Analyzing+Blockchain+Datasets&hl=en&as_sdt=0,48", "gs_version_total": 0, "email": "u.nus.edu;nus.edu.sg;nus.edu.sg;nus.edu.sg", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "National University of Singapore", "aff_unique_dep": "", "aff_unique_url": "https://www.nus.edu.sg", "aff_unique_abbr": "NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Singapore" }, { "title": "HEST-1k: A Dataset For Spatial Transcriptomics and Histology Image Analysis", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97514", "id": "mlhFJE7PKo", "proceeding": "", "pdf": "https://openreview.net/pdf?id=mlhFJE7PKo", "openreview": "https://openreview.net/forum?id=mlhFJE7PKo", "poster": "/media/PosterPDFs/NeurIPS%202024/97514.png?t=1731452883.8307118", "project": "", "author_site": "Guillaume Jaume, Paul Doucet, Andrew Song, Ming Y. Lu, Cristina Almagro P\u00e9rez, Sophia Wagner, Anurag Vaidya, Richard Chen, Drew Williamson, Ahrong Kim, Faisal Mahmood", "tldr": "", "abstract": "Spatial transcriptomics enables interrogating the molecular composition of tissue with ever-increasing resolution and sensitivity. However, costs, rapidly evolving technology, and lack of standards have constrained computational methods in ST to narrow tasks and small cohorts. In addition, the underlying tissue morphology, as reflected by H&E-stained whole slide images (WSIs), encodes rich information often overlooked in ST studies. Here, we introduce HEST-1k, a collection of 1,229 spatial transcriptomic profiles, each linked to a WSI and extensive metadata. HEST-1k was assembled from 153 public and internal cohorts encompassing 26 organs, two species (Homo Sapiens and Mus Musculus), and 367 cancer samples from 25 cancer types. HEST-1k processing enabled the identification of 2.1 million expression-morphology pairs and over 76 million nuclei. To support its development, we additionally introduce the HEST-Library, a Python package designed to perform a range of actions with HEST samples. We test HEST-1k and Library on three use cases: (1) benchmarking foundation models for pathology (HEST-Benchmark), (2) biomarker exploration, and (3) multimodal representation learning. HEST-1k, HEST-Library, and HEST-Benchmark can be freely accessed at https://github.com/mahmoodlab/hest.", "keywords": "Spatial Transcriptomics;Computational Pathology;Whole-slide image", "primary_area": "", "supplementary_material": "/attachment/36e76ca957e60bac969a7eba0846ed97db7ca20f.zip", "author": "Guillaume Jaume;Paul Doucet;Andrew H. Song;Ming Y. Lu;Cristina Almagro P\u00e9rez;Sophia J Wagner;Anurag Jayant Vaidya;Richard J. Chen;Drew FK Williamson;Ahrong Kim;Faisal Mahmood", "authorids": "~Guillaume_Jaume2;~Paul_Doucet1;~Andrew_H._Song1;~Ming_Y._Lu1;~Cristina_Almagro_P\u00e9rez1;~Sophia_J_Wagner1;~Anurag_Jayant_Vaidya1;~Richard_J._Chen1;~Drew_FK_Williamson1;~Ahrong_Kim1;~Faisal_Mahmood1", "gender": "M;;;;;M;M;M;F;M;M", "homepage": "https://guillaumejaume.github.io/;;;;;https://ajv012.github.io;http://richarizardd.me;https://www.williamsonlab.org/;;http://www.mahmoodlab.org;https://andrewhsong.com", "dblp": ";;251/5562;;298/1052;;244/1941;;;;229/0474", "google_scholar": "am5XqsQAAAAJ;;GhzAXmIAAAAJ;YWOImZQAAAAJ;skcoUZMAAAAJ;4Z2Qu_YAAAAJ;yhGqdMgAAAAJ;tAxVl44AAAAJ;https://scholar.google.com/scholar?hl=ko;9MsdbKoAAAAJ;1UNlyTcAAAAJ", "orcid": ";;0000-0003-0009-9699;;0000-0003-3763-2282;;0000-0003-0389-1331;0000-0003-1745-8846;;0000-0001-7587-1562;", "linkedin": ";paul-doucet-66128522b/;ming-yang-max-lu-76b78017a/;;;;richardchen95;drew-williamson-md/;;;", "or_profile": "~Guillaume_Jaume2;~Paul_Doucet1;~Ming_Y._Lu1;~Cristina_Almagro_P\u00e9rez1;~Sophia_J_Wagner1;~Anurag_Jayant_Vaidya1;~Richard_J._Chen1;~Drew_FK_Williamson1;~Ahrong_Kim1;~Faisal_Mahmood1;~Andrew_Song1", "aff": "Harvard University;ETHZ - ETH Zurich;Massachusetts Institute of Technology;;Helmholtz Zentrum M\u00fcnchen;Massachusetts Institute of Technology;Harvard University;Emory University;Pusan National University;Harvard University;Brigham and Women's hospital", "aff_domain": "harvard.edu;ethz.ch;mit.edu;;helmholtz-munich.de;mit.edu;harvard.edu;emory.edu;medicine.pnu.ac.kr;harvard.edu;bwh.harvard.edu", "position": "Postdoc;MS student;PhD student;;PhD student;PhD student;PhD student;Assistant Professor;Associate Professor;Associate Professor;Postdoc", "bibtex": "@inproceedings{\njaume2024hestk,\ntitle={{HEST}-1k: A Dataset For Spatial Transcriptomics and Histology Image Analysis},\nauthor={Guillaume Jaume and Paul Doucet and Andrew H. Song and Ming Y. Lu and Cristina Almagro P{\\'e}rez and Sophia J Wagner and Anurag Jayant Vaidya and Richard J. Chen and Drew FK Williamson and Ahrong Kim and Faisal Mahmood},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=mlhFJE7PKo}\n}", "github": "", "reviewers": "st9K;edqT;fjb3;2cr7", "pdf_size": 6790959, "rating": "6;7;8;9", "confidence": "4;4;4;5", "wc_summary_and_contributions": "50;82;70;54", "wc_strengths": "38;111;7;113", "wc_improvement": "120;73;19;102", "wc_limitations": "4;73;23;41", "wc_correctness": "7;38;5;14", "wc_clarity": "6;3;5;5", "wc_relation_to_prior_work": "5;11;14;8", "wc_documentation": "6;20;7;34", "wc_additional_feedback": "1;1;1;1", "wc_review": "237;412;151;372", "wc_reply_reviewers": "0;8;24;7", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.5, 1.118033988749895 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 64.0, 12.806248474865697 ], "wc_strengths_avg": [ 67.25, 46.07805876987441 ], "wc_improvement_avg": [ 78.5, 38.2262998470948 ], "wc_limitations_avg": [ 35.25, 25.420218331084413 ], "wc_correctness_avg": [ 16.0, 13.133925536563698 ], "wc_clarity_avg": [ 4.75, 1.0897247358851685 ], "wc_relation_to_prior_work_avg": [ 9.5, 3.3541019662496847 ], "wc_documentation_avg": [ 16.75, 11.388041973930374 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 293.0, 104.52511659883476 ], "wc_reply_reviewers_avg": [ 9.75, 8.78564169540279 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.7745966692414834, "gs_citation": 33, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2063467461757708276&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "harvard.edu;ethz.ch;mit.edu;;helmholtz-munich.de;mit.edu;harvard.edu;emory.edu;medicine.pnu.ac.kr;harvard.edu;bwh.harvard.edu", "author_num": 11, "aff_unique_index": "0;1;2;3;2;0;4;5;0;6", "aff_unique_norm": "Harvard University;ETH Zurich;Massachusetts Institute of Technology;Helmholtz Zentrum M\u00fcnchen;Emory University;Pusan National University;Brigham and Women's Hospital", "aff_unique_dep": ";;;;;;", "aff_unique_url": "https://www.harvard.edu;https://www.ethz.ch;https://web.mit.edu;https://www.helmholtz-muenchen.de;https://www.emory.edu;https://www.pnu.ac.kr;https://www.brighamandwomens.org", "aff_unique_abbr": "Harvard;ETHZ;MIT;;Emory;PNU;BWH", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;2;0;0;0;3;0;0", "aff_country_unique": "United States;Switzerland;Germany;South Korea" }, { "title": "Enhancing Reasoning Capabilities of LLMs via Principled Synthetic Logic Corpus", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93733", "id": "mljDUaQpln", "proceeding": "", "pdf": "https://openreview.net/pdf?id=mljDUaQpln", "openreview": "https://openreview.net/forum?id=mljDUaQpln", "poster": "/media/PosterPDFs/NeurIPS%202024/93733.png?t=1731300617.9290364", "project": "", "author_site": "Terufumi Morishita, Gaku Morio, Atsuki Yamaguchi, Yasuhiro Sogawa", "tldr": "", "abstract": "Large language models (LLMs) are capable of solving a wide range of tasks, yet they have struggled with reasoning.\nTo address this, we propose $\\textbf{Additional Logic Training (ALT)}$, which aims to enhance LLMs' reasoning capabilities by program-generated logical reasoning samples.\nWe first establish principles for designing high-quality samples by integrating symbolic logic theory and previous empirical insights.\nThen, based on these principles, we construct a synthetic corpus named $\\textbf{Formal} \\ \\textbf{Logic} \\ \\textbf{\\textit{D}eduction} \\ \\textbf{\\textit{D}iverse}$ (FLD$ _{\\times2}$), comprising numerous samples of multi-step deduction with unknown facts, diverse reasoning rules, diverse linguistic expressions, and challenging distractors.\nFinally, we empirically show that ALT on FLD$ _{\\times2}$ substantially enhances the reasoning capabilities of state-of-the-art LLMs, including LLaMA-3.1-70B.\nImprovements include gains of up to 30 points on logical reasoning benchmarks, up to 10 points on math and coding benchmarks, and 5 points on the benchmark suite BBH.", "keywords": "large language model;artificial intelligence;reasoning;logical reasoning;math;coding;synthetic corpus", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Terufumi Morishita;Gaku Morio;Atsuki Yamaguchi;Yasuhiro Sogawa", "authorids": "~Terufumi_Morishita1;~Gaku_Morio1;~Atsuki_Yamaguchi1;~Yasuhiro_Sogawa1", "gender": "M;;M;M", "homepage": ";;https://gucci-j.github.io/about/;", "dblp": "249/9379.html;;266/2820;26/8485", "google_scholar": "DBD9bzYAAAAJ;;BaNSQ0cAAAAJ;", "orcid": ";;0000-0001-8327-7598;", "linkedin": ";;atsuki-yamaguchi/;", "or_profile": "~Terufumi_Morishita1;~Gaku_Morio1;~Atsuki_Yamaguchi1;~Yasuhiro_Sogawa1", "aff": "Hitachi, ltd;;University of Sheffield;Hitachi, Ltd.", "aff_domain": "hitachi.com;;sheffield.ac.uk;hitachi.com", "position": "Researcher;;PhD student;Senior Researcher", "bibtex": "@inproceedings{\nmorishita2024enhancing,\ntitle={Enhancing Reasoning Capabilities of {LLM}s via Principled Synthetic Logic Corpus},\nauthor={Terufumi Morishita and Gaku Morio and Atsuki Yamaguchi and Yasuhiro Sogawa},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=mljDUaQpln}\n}", "github": "", "reviewers": "BW1m;zCCP;aRM4", "pdf_size": 819056, "rating": "4;7;8", "confidence": "3;4;3", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "2;4;3", "wc_summary": "74;74;53", "wc_strengths": "72;107;106", "wc_weaknesses": "160;104;68", "wc_questions": "29;124;38", "wc_limitations": "8;14;75", "wc_review": "343;423;340", "wc_reply_reviewers": "0;21;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;1;0", "reply_authors": "1;1;1", "rating_avg": [ 6.333333333333333, 1.699673171197595 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 67.0, 9.899494936611665 ], "wc_strengths_avg": [ 95.0, 16.268579122549905 ], "wc_weaknesses_avg": [ 110.66666666666667, 37.853518844209034 ], "wc_questions_avg": [ 63.666666666666664, 42.82003684673281 ], "wc_limitations_avg": [ 32.333333333333336, 30.26916289265731 ], "wc_review_avg": [ 368.6666666666667, 38.43898484033567 ], "wc_reply_reviewers_avg": [ 7.0, 9.899494936611665 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.2773500981126145, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16250820415118659242&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 5, "email": "hitachi.com;;sheffield.ac.uk;hitachi.com", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Hitachi, Ltd.;University of Sheffield", "aff_unique_dep": ";", "aff_unique_url": "https://www.hitachi.com;https://www.sheffield.ac.uk", "aff_unique_abbr": "Hitachi;Sheffield", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Japan;United Kingdom" }, { "title": "Tight Rates for Bandit Control Beyond Quadratics", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93732", "id": "mlm3nUwOeQ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=mlm3nUwOeQ", "openreview": "https://openreview.net/forum?id=mlm3nUwOeQ", "poster": "", "project": "", "author_site": "Y. Jennifer Sun, Zhou Lu", "tldr": "", "abstract": "Unlike classical control theory, such as Linear Quadratic Control (LQC), real-world control problems are highly complex. These problems often involve adversarial perturbations, bandit feedback models, and non-quadratic, adversarially chosen cost functions. A fundamental yet unresolved question is whether optimal regret can be achieved for these general control problems. The standard approach to addressing this problem involves a reduction to bandit convex optimization with memory. In the bandit setting, constructing a gradient estimator with low variance is challenging due to the memory structure and non-quadratic loss functions.\n\nIn this paper, we provide an affirmative answer to this question. Our main contribution is an algorithm that achieves an $\\tilde{O}(\\sqrt{T})$ optimal regret for bandit non-stochastic control with strongly-convex and smooth cost functions in the presence of adversarial perturbations, improving the previously known $\\tilde{O}(T^{2/3})$ regret bound from \\citep{cassel2020bandit}. Our algorithm overcomes the memory issue by reducing the problem to Bandit Convex Optimization (BCO) without memory and addresses general strongly-convex costs using recent advancements in BCO from \\citep{suggala2024second}. Along the way, we develop an improved algorithm for BCO with memory, which may be of independent interest.", "keywords": "Bandit Problem;Online Control", "primary_area": "online_learning", "supplementary_material": "", "author": "Y. Jennifer Sun;Zhou Lu", "authorids": "~Y._Jennifer_Sun1;~Zhou_Lu1", "gender": ";", "homepage": "https://orfe.princeton.edu/people/jennifer-sun;https://leozoroaster.github.io/", "dblp": ";68/11524", "google_scholar": ";17_nX_kAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Y._Jennifer_Sun1;~Zhou_Lu1", "aff": "Princeton University;", "aff_domain": "princeton.edu;", "position": "PhD student;", "bibtex": "@inproceedings{\nsun2024tight,\ntitle={Tight Rates for Bandit Control Beyond Quadratics},\nauthor={Y. Jennifer Sun and Zhou Lu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=mlm3nUwOeQ}\n}", "github": "", "reviewers": "iRqM;vynp;r1Ah", "pdf_size": 495773, "rating": "6;6;6", "confidence": "3;3;3", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "3;3;3", "wc_summary": "38;81;95", "wc_strengths": "24;42;43", "wc_weaknesses": "96;107;125", "wc_questions": "16;2;21", "wc_limitations": "1;2;11", "wc_review": "175;234;295", "wc_reply_reviewers": "19;0;18", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;0;1", "reply_authors": "1;1;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 71.33333333333333, 24.25329301810833 ], "wc_strengths_avg": [ 36.333333333333336, 8.73053390247253 ], "wc_weaknesses_avg": [ 109.33333333333333, 11.953614051360738 ], "wc_questions_avg": [ 13.0, 8.04155872120988 ], "wc_limitations_avg": [ 4.666666666666667, 4.496912521077347 ], "wc_review_avg": [ 234.66666666666666, 48.99206284922306 ], "wc_reply_reviewers_avg": [ 12.333333333333334, 8.73053390247253 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9531877698225836122&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "princeton.edu;", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Princeton University", "aff_unique_dep": "", "aff_unique_url": "https://www.princeton.edu", "aff_unique_abbr": "Princeton", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "DMNet: Self-comparison Driven Model for Subject-independent Seizure Detection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93731", "id": "mlmTxJwVsb", "proceeding": "", "pdf": "https://openreview.net/pdf?id=mlmTxJwVsb", "openreview": "https://openreview.net/forum?id=mlmTxJwVsb", "poster": "", "project": "", "author_site": "Shihao Tu, Linfeng Cao, Daoze Zhang, Junru Chen, Lvbin Ma, Yin Zhang, YANG YANG", "tldr": "", "abstract": "Automated seizure detection (ASD) using intracranial electroencephalography (iEEG) is critical for effective epilepsy treatment. However, the significant domain shift of iEEG signals across subjects poses a major challenge, limiting their applicability in real-world clinical scenarios. In this paper, we address this issue by analyzing the primary cause behind the failure of existing iEEG models for subject-independent seizure detection, and identify a critical universal seizure pattern: seizure events consistently exhibit higher average amplitude compared to adjacent normal events. To mitigate the domain shifts and preserve the universal seizure patterns, we propose a novel self-comparison mechanism. This mechanism effectively aligns iEEG signals across subjects and time intervals. Building upon these findings, we propose Difference Matrix-based Neural Network (DMNet), a subject-independent seizure detection model, which leverages self-comparison based on two constructed (contextual, channel-level) references to mitigate shifts of iEEG, and utilize a simple yet effective difference matrix to encode the universal seizure patterns. Extensive experiments show that DMNet significantly outperforms previous SOTAs while maintaining high efficiency on a real-world clinical dataset collected by us and two public datasets for subject-independent seizure detection. Moreover, the visualization results demonstrate that the generated difference matrix can effectively capture the seizure activity changes during the seizure evolution process. Additionally, we deploy our method in an online diagnosis system to illustrate its effectiveness in real clinical applications.", "keywords": "seizure detection;domain generalization", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "/attachment/c4ae1d4d6b58f349801ececb9bdcbb47c72d2d80.zip", "author": "Shihao Tu;Linfeng Cao;Daoze Zhang;Junru Chen;Lvbin Ma;Yin Zhang;Yang Yang", "authorids": "~Shihao_Tu4;~Linfeng_Cao1;~Daoze_Zhang1;~Junru_Chen1;~Lvbin_Ma1;~Yin_Zhang1;~Yang_Yang35", "gender": ";M;M;M;M;;M", "homepage": ";https://caolinfeng.github.io/homepage/;https://daozezhang.github.io/;https://mrnobodycali.github.io/;https://ieeexplore.ieee.org/author/37088918935;https://person.zju.edu.cn/en/0099160;http://yangy.org", "dblp": ";39/2734;;212/6753;;;", "google_scholar": ";9RH26YEAAAAJ;5beC28QAAAAJ;https://scholar.google.com/citations?hl=zh-CN;;;", "orcid": ";0000-0001-6930-115X;;;;;0000-0002-5058-4417", "linkedin": ";;;;;;", "or_profile": "~Shihao_Tu4;~Linfeng_Cao1;~Daoze_Zhang1;~Junru_Chen1;~Lvbin_Ma1;~Yin_Zhang1;~Yang_Yang35", "aff": ";The Ohio State University, Columbus;Zhejiang University;Zhejiang University;Zhejiang Huayun Information Technology Co. Ltd;Zhejiang University;Zhejiang University", "aff_domain": ";osu.edu;zju.edu.cn;zju.edu.cn;huayun.com;zju.edu.cn;zju.edu.cn", "position": ";PhD student;MS student;PhD student;manager;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\ntu2024dmnet,\ntitle={{DMN}et: Self-comparison Driven Model for Subject-independent Seizure Detection},\nauthor={Shihao Tu and Linfeng Cao and Daoze Zhang and Junru Chen and Lvbin Ma and Yin Zhang and Yang Yang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=mlmTxJwVsb}\n}", "github": "", "reviewers": "EpQU;s7PY;sFSG;NPyS", "pdf_size": 2585399, "rating": "5;5;5;9", "confidence": "3;4;5;4", "soundness": "3;3;3;4", "novelty": "2;3;2;4", "presentation": "3;4;3;4", "wc_summary": "84;182;79;115", "wc_strengths": "27;95;60;58", "wc_weaknesses": "177;137;138;189", "wc_questions": "337;164;3;74", "wc_limitations": "4;17;11;1", "wc_review": "629;595;291;437", "wc_reply_reviewers": "242;0;28;21", "wc_reply_authors": "1292;0;13;18", "reply_reviewers": "2;0;1;1", "reply_authors": "4;1;2;2", "rating_avg": [ 6.0, 1.7320508075688772 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 115.0, 41.06701839676214 ], "wc_strengths_avg": [ 60.0, 24.072806234421446 ], "wc_weaknesses_avg": [ 160.25, 23.14492384951828 ], "wc_questions_avg": [ 144.5, 124.92897982453871 ], "wc_limitations_avg": [ 8.25, 6.219927652312364 ], "wc_review_avg": [ 488.0, 134.8517704741024 ], "wc_reply_reviewers_avg": [ 72.75, 98.25826937209916 ], "wc_reply_authors_avg": [ 330.75, 555.0168353302447 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9545442971239148482&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": ";osu.edu;zju.edu.cn;zju.edu.cn;huayun.com;zju.edu.cn;zju.edu.cn", "author_num": 7, "aff_unique_index": "0;1;1;2;1;1", "aff_unique_norm": "Ohio State University;Zhejiang University;Zhejiang Huayun Information Technology Co. Ltd", "aff_unique_dep": ";;", "aff_unique_url": "https://www.osu.edu;https://www.zju.edu.cn;", "aff_unique_abbr": "OSU;ZJU;", "aff_campus_unique_index": "0", "aff_campus_unique": "Columbus;", "aff_country_unique_index": "0;1;1;1;1;1", "aff_country_unique": "United States;China" }, { "title": "Validating Climate Models with Spherical Convolutional Wasserstein Distance", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93730", "id": "mmSFfib6pI", "proceeding": "", "pdf": "https://openreview.net/pdf?id=mmSFfib6pI", "openreview": "https://openreview.net/forum?id=mmSFfib6pI", "poster": "/media/PosterPDFs/NeurIPS%202024/93730.png?t=1733320388.307806", "project": "", "author_site": "Robert Garrett, Trevor Harris, Zhuo Wang, Bo Li", "tldr": "", "abstract": "The validation of global climate models is crucial to ensure the accuracy and efficacy of model output. We introduce the spherical convolutional Wasserstein distance to more comprehensively measure differences between climate models and reanalysis data. This new similarity measure accounts for spatial variability using convolutional projections and quantifies local differences in the distribution of climate variables. We apply this method to evaluate the historical model outputs of the Coupled Model Intercomparison Project (CMIP) members by comparing them to observational and reanalysis data products. Additionally, we investigate the progression from CMIP phase 5 to phase 6 and find modest improvements in the phase 6 models regarding their ability to produce realistic climatologies.", "keywords": "Climate Models;Wasserstein Distance;Convolution;Functional Data", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "/attachment/1680c763a1dd3abf00496cf670963e7fa4f690b6.zip", "author": "Robert C. Garrett;Trevor Harris;Zhuo Wang;Bo Li", "authorids": "~Robert_C._Garrett1;~Trevor_Harris1;~Zhuo_Wang6;~Bo_Li8", "gender": "M;M;F;", "homepage": ";https://trevor-harris.github.io;https://experts.illinois.edu/en/persons/zhuo-wang;", "dblp": ";;;", "google_scholar": "f_AZDVYAAAAJ;aE9Fu2MAAAAJ;;", "orcid": ";;;", "linkedin": "robert-garrett-586b82105/;;;", "or_profile": "~Robert_C._Garrett1;~Trevor_Harris1;~Zhuo_Wang6;~Bo_Li8", "aff": "University of Illinois, Urbana Champaign;Texas A&M University - College Station;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign", "aff_domain": "illinois.edu;tamu.edu;illinois.edu;", "position": "PhD student;Assistant Professor;Full Professor;", "bibtex": "@inproceedings{\ngarrett2024validating,\ntitle={Validating Climate Models with Spherical Convolutional Wasserstein Distance},\nauthor={Robert C. Garrett and Trevor Harris and Zhuo Wang and Bo Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=mmSFfib6pI}\n}", "github": "", "reviewers": "vRQU;qRTk;UJm5;V9JK", "pdf_size": 1236315, "rating": "5;7;7;8", "confidence": "4;4;5;3", "soundness": "3;3;4;4", "novelty": "1;3;4;3", "presentation": "3;3;3;4", "wc_summary": "39;41;133;83", "wc_strengths": "60;65;75;106", "wc_weaknesses": "47;47;49;6", "wc_questions": "183;20;39;57", "wc_limitations": "11;1;39;16", "wc_review": "340;174;335;268", "wc_reply_reviewers": "40;29;13;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 1.0897247358851685 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 74.0, 38.3275357934736 ], "wc_strengths_avg": [ 76.5, 17.867568385205637 ], "wc_weaknesses_avg": [ 37.25, 18.06066167115701 ], "wc_questions_avg": [ 74.75, 63.852858197577966 ], "wc_limitations_avg": [ 16.75, 13.935117509371782 ], "wc_review_avg": [ 279.25, 67.08716345173643 ], "wc_reply_reviewers_avg": [ 20.5, 15.239750654128171 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.3244428422615251, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4995000742284124160&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "illinois.edu;tamu.edu;illinois.edu;", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Illinois Urbana-Champaign;Texas A&M University", "aff_unique_dep": ";", "aff_unique_url": "https://illinois.edu;https://www.tamu.edu", "aff_unique_abbr": "UIUC;TAMU", "aff_campus_unique_index": "0;1;0;0", "aff_campus_unique": "Urbana-Champaign;College Station", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "moMoWj7jLm", "title": "FormulaReasoning: A Dataset for Formula-Based Numerical Reasoning", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "The application of formulas is a fundamental ability of humans when addressing numerical reasoning problems. However, existing numerical reasoning datasets seldom explicitly indicate the formulas employed during the reasoning steps. To bridge this gap, we propose a dataset for formula-based numerical reasoning called FormulaReasoning, which consists of 5,420 questions. We further conduct evaluations on LLMs with size ranging from 7B to over 100B parameters utilizing zero-shot and few-shot chain-of-thoughts methods and we explored the approach of using retrieval-augmented LLMs when providing an external formula database. We divide the reasoning process into formula generation, parameter extraction, and calculation, and use the data augmentation method to enhance the model ability of the model with parameters count less than 7B. Our empirical findings underscore the significant potential for improvement in existing models when applied to our complex, formula-driven FormulaReasoning.", "keywords": "formula;numerical reasoning;question answering", "primary_area": "", "supplementary_material": "/attachment/e50191ea563e9ec1d675fab84fac42e1b1f2bf0e.pdf", "author": "Xiao Li;Bolin Zhu;Sichen Liu;Yin Zhu;Yiwei liu;Gong Cheng", "authorids": "~Xiao_Li10;~Bolin_Zhu2;~Sichen_Liu1;~Yin_Zhu2;~Yiwei_liu3;~Gong_Cheng3", "gender": "M;M;M;M;M;M", "homepage": ";;;;https://bsnmldb.github.io/;http://ws.nju.edu.cn/~gcheng", "dblp": "66/2069;156/8264;93/881;;;69/1215-1", "google_scholar": "WAHzzk0AAAAJ;;jSdijbgAAAAJ;;;_ncKAiwAAAAJ", "orcid": "0009-0008-2670-9495;0009-0007-3228-5046;;;0009-0003-4999-4421;0000-0003-3539-7776", "linkedin": ";;;yin-zhu-a09269225/;;gongcheng/", "or_profile": "~Xiao_Li10;~Bolin_Zhu2;~Sichen_Liu1;~Yin_Zhu2;~Yiwei_liu3;~Gong_Cheng3", "aff": "Nanjing University;Nanjing University;Nanjing University;Nanjing University;Nanjing University;Nanjing University", "aff_domain": "nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu;nju.edu.cn;nju.edu.cn", "position": "PhD student;MS student;MS student;MS student;Undergrad student;Full Professor", "bibtex": "@misc{\nanonymous2024formulareasoning,\ntitle={FormulaReasoning: A Dataset for Formula-Based Numerical Reasoning},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=moMoWj7jLm}\n}", "github": "", "project": "", "reviewers": "CPqs;A79m;Wss8", "site": "https://openreview.net/forum?id=moMoWj7jLm", "pdf_size": 2063193, "rating": "6;7;8", "confidence": "4;3;4", "wc_summary_and_contributions": "97;61;47", "wc_strengths": "5;16;3", "wc_improvement": "28;14;3", "wc_limitations": "5;4;3", "wc_correctness": "1;6;2", "wc_clarity": "3;2;2", "wc_relation_to_prior_work": "13;17;1", "wc_documentation": "1;1;1", "wc_additional_feedback": "1;1;1", "wc_review": "154;122;63", "wc_reply_reviewers": "0;12;0", "wc_reply_authors": "408;422;94", "reply_reviewers": "0;1;0", "reply_authors": "1;1;1", "rating_avg": [ 7.0, 0.816496580927726 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 68.33333333333333, 21.060758665241753 ], "wc_strengths_avg": [ 8.0, 5.715476066494082 ], "wc_improvement_avg": [ 15.0, 10.23067283548187 ], "wc_limitations_avg": [ 4.0, 0.816496580927726 ], "wc_correctness_avg": [ 3.0, 2.160246899469287 ], "wc_clarity_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_relation_to_prior_work_avg": [ 10.333333333333334, 6.79869268479038 ], "wc_documentation_avg": [ 1.0, 0.0 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 113.0, 37.69173207305107 ], "wc_reply_reviewers_avg": [ 4.0, 5.656854249492381 ], "wc_reply_authors_avg": [ 308.0, 151.42875112298412 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:jui33bwvQtgJ:scholar.google.com/&scioq=FormulaReasoning:+A+Dataset+for+Formula-Based+Numerical+Reasoning&hl=en&as_sdt=0,31", "gs_version_total": 3, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Nanjing University", "aff_unique_dep": "", "aff_unique_url": "https://www.nju.edu.cn", "aff_unique_abbr": "Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "P$^2$C$^2$Net: PDE-Preserved Coarse Correction Network for efficient prediction of spatiotemporal dynamics", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93729", "id": "motImXq3B1", "proceeding": "", "pdf": "https://openreview.net/pdf?id=motImXq3B1", "openreview": "https://openreview.net/forum?id=motImXq3B1", "poster": "/media/PosterPDFs/NeurIPS%202024/93729.png?t=1731640247.9509554", "project": "", "author_site": "Qi Wang, Pu Ren, Hao Zhou, Xin-Yang Liu, Zhiwen Deng, Yi Zhang, Zeruizhi Cheng, Hongsheng Liu, Zidong Wang, Jian-Xun Wang, Ji-Rong Wen, Hao Sun, Yang Liu", "tldr": "", "abstract": "When solving partial differential equations (PDEs), classical numerical methods often require fine mesh grids and small time stepping to meet stability, consistency, and convergence conditions, leading to high computational cost. Recently, machine learning has been increasingly utilized to solve PDE problems, but they often encounter challenges related to interpretability, generalizability, and strong dependency on rich labeled data. Hence, we introduce a new PDE-Preserved Coarse Correction Network (P$^2$C$^2$Net) to efficiently solve spatiotemporal PDE problems on coarse mesh grids in small data regimes. The model consists of two synergistic modules: (1) a trainable PDE block that learns to update the coarse solution (i.e., the system state), based on a high-order numerical scheme with boundary condition encoding, and (2) a neural network block that consistently corrects the solution on the fly. In particular, we propose a learnable symmetric Conv filter, with weights shared over the entire model, to accurately estimate the spatial derivatives of PDE based on the neural-corrected system state. The resulting physics-encoded model is capable of handling limited training data (e.g., 3--5 trajectories) and accelerates the prediction of PDE solutions on coarse spatiotemporal grids while maintaining a high accuracy. P$^2$C$^2$Net achieves consistent state-of-the-art performance with over 50\\% gain (e.g., in terms of relative prediction error) across four datasets covering complex reaction-diffusion processes and turbulent flows.", "keywords": "physics-informed learning;coarse model;spatiotemporal dynamics prediction", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "", "author": "Qi Wang;Pu Ren;Hao Zhou;Xin-Yang Liu;Zhiwen Deng;Yi Zhang;Ruizhi Chengze;Hongsheng Liu;Zidong Wang;Jian-Xun Wang;Ji-Rong Wen;Hao Sun;Yang Liu", "authorids": "~Qi_Wang30;~Pu_Ren1;~Hao_Zhou33;~Xin-Yang_Liu1;~Zhiwen_Deng1;~Yi_Zhang92;~Ruizhi_Chengze1;~Hongsheng_Liu1;~Zidong_Wang1;~Jian-Xun_Wang1;~Ji-Rong_Wen1;~Hao_Sun4;~Yang_Liu52", "gender": "M;M;M;M;M;M;M;M;;M;M;;F", "homepage": ";https://paulpuren.github.io/;https://github.com/Haozhouhao;;https://www.researchgate.net/profile/Zhiwen-Deng-2;https://gitee.com/yi-zhang95;;https://hsliuustc.github.io/;;http://sites.nd.edu/jianxun-wang/;https://gsai.ruc.edu.cn/english/jrwen;;", "dblp": ";;;;;;389/6204;59/9103-2;97/5229-10.html;163/4396;w/JRWen;;", "google_scholar": ";FiuAyGwAAAAJ;;DI9KTLoAAAAJ;;;;M7ag7rIAAAAJ;;1cXHUD4AAAAJ;tbxCHJgAAAAJ;;34upg6YAAAAJ", "orcid": "0009-0009-4712-3474;0000-0002-6354-385X;;;;0000-0003-3487-7073;0009-0002-5736-0165;0000-0003-0509-7967;0009-0002-6145-4594;;0000-0002-9777-9676;;0000-0003-0127-4030", "linkedin": ";;;xin-yang-liu-560147117;;;https://linkedin.com/in/zeruizhi;;%E7%B4%AB%E4%B8%9C-%E7%8E%8B-230849105/;;;;", "or_profile": "~Qi_Wang30;~Pu_Ren1;~Hao_Zhou33;~Xin-Yang_Liu1;~Zhiwen_Deng1;~Yi_Zhang92;~Ruizhi_Chengze1;~Hongsheng_Liu1;~Zidong_Wang1;~Jian-Xun_Wang1;~Ji-Rong_Wen1;~Hao_Sun4;~Yang_Liu52", "aff": "Renmin University of China;Lawrence Berkeley National Lab;Renmin University of China;University of Notre Dame;ByteDance Inc.;Huawei Technologies Ltd.;Huawei Technologies Ltd.;Huawei Technologies Ltd.;Zhejiang University;University of Notre Dame;Renmin University of China;;University of Chinese Academy of Sciences", "aff_domain": "ruc.edu.cn;lbl.gov;ruc.edu.cn;nd.edu;bytedance.com;huawei.com;huawei.com;huawei.com;zju.edu.cn;nd.edu;ruc.edu.cn;;ucas.ac.cn", "position": "PhD student;Postdoc;PhD student;PhD student;Researcher;Researcher;Researcher;Researcher;Researcher;Assistant Professor;Full Professor;;Associate Professor", "bibtex": "@inproceedings{\nwang2024pcnet,\ntitle={P\\${\\textasciicircum}2\\$C\\${\\textasciicircum}2\\$Net: {PDE}-Preserved Coarse Correction Network for efficient prediction of spatiotemporal dynamics},\nauthor={Qi Wang and Pu Ren and Hao Zhou and Xin-Yang Liu and Zhiwen Deng and Yi Zhang and Ruizhi Chengze and Hongsheng Liu and Zidong Wang and Jian-Xun Wang and Ji-Rong Wen and Hao Sun and Yang Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=motImXq3B1}\n}", "github": "", "reviewers": "bvnV;aBLf;WfL9;xwvp", "pdf_size": 9911653, "rating": "6;6;6;6", "confidence": "5;4;4;5", "soundness": "2;3;2;4", "novelty": "2;2;2;3", "presentation": "3;3;3;3", "wc_summary": "129;46;59;113", "wc_strengths": "90;32;30;69", "wc_weaknesses": "136;115;185;109", "wc_questions": "52;51;117;621", "wc_limitations": "7;6;1;1", "wc_review": "414;250;392;913", "wc_reply_reviewers": "0;22;9;0", "wc_reply_authors": "105;128;128;105", "reply_reviewers": "0;1;1;0", "reply_authors": "3;4;4;3", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 86.75, 35.01696017646306 ], "wc_strengths_avg": [ 55.25, 25.370997221236692 ], "wc_weaknesses_avg": [ 136.25, 29.877876430563134 ], "wc_questions_avg": [ 210.25, 238.64971715885187 ], "wc_limitations_avg": [ 3.75, 2.7726341266023544 ], "wc_review_avg": [ 492.25, 250.9425980179531 ], "wc_reply_reviewers_avg": [ 7.75, 9.01041064547005 ], "wc_reply_authors_avg": [ 116.5, 11.5 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 3.5, 0.5 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 13, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2639643869478942194&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "ruc.edu.cn;lbl.gov;ruc.edu.cn;nd.edu;bytedance.com;huawei.com;huawei.com;huawei.com;zju.edu.cn;nd.edu;ruc.edu.cn;;ucas.ac.cn", "author_num": 13, "aff_unique_index": "0;1;0;2;3;4;4;4;5;2;0;6", "aff_unique_norm": "Renmin University of China;Lawrence Berkeley National Laboratory;University of Notre Dame;ByteDance;Huawei;Zhejiang University;University of Chinese Academy of Sciences", "aff_unique_dep": ";;;;Huawei Technologies;;", "aff_unique_url": "http://www.ruc.edu.cn;https://www.lbl.gov;https://www.nd.edu;https://www.bytedance.com;https://www.huawei.com;https://www.zju.edu.cn;http://www.ucas.ac.cn", "aff_unique_abbr": "RUC;LBNL;Notre Dame;ByteDance;Huawei;ZJU;UCAS", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;1;0;1;0;0;0;0;0;1;0;0", "aff_country_unique": "China;United States" }, { "title": "Autonomous Agents for Collaborative Task under Information Asymmetry", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93728", "id": "mp6OWpDIJC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=mp6OWpDIJC", "openreview": "https://openreview.net/forum?id=mp6OWpDIJC", "poster": "/media/PosterPDFs/NeurIPS%202024/93728.png?t=1733814125.425699", "project": "", "author_site": "Wei Liu, Chenxi Wang, YiFei Wang, Zihao Xie, Rennai Qiu, Yufan Dang, Zhuoyun Du, Weize Chen, Cheng Yang, Chen Qian", "tldr": "", "abstract": "Large Language Model Multi-Agent Systems (LLM-MAS) have greatly progressed in solving complex tasks. It communicates among agents within the system to collaboratively solve tasks, under the premise of shared information. However, when agents' collaborations are leveraged to perform multi-person tasks, a new challenge arises due to information asymmetry, since each agent can only access the information of its human user. Previous MAS struggle to complete tasks under this condition. To address this, we propose a new MAS paradigm termed iAgents, which denotes Informative Multi-Agent Systems. In iAgents, the human social network is mirrored in the agent network, where agents proactively exchange human information necessary for task resolution, thereby overcoming information asymmetry. iAgents employs a novel agent reasoning mechanism, InfoNav, to navigate agents' communication towards effective information exchange. Together with InfoNav, iAgents organizes human information in a mixed memory to provide agents with accurate and comprehensive information for exchange. Additionally, we introduce InformativeBench, the first benchmark tailored for evaluating LLM agents' task-solving ability under information asymmetry. Experimental results show that iAgents can collaborate within a social network of 140 individuals and 588 relationships, autonomously communicate over 30 turns, and retrieve information from nearly 70,000 messages to complete tasks within 3 minutes.", "keywords": "autonomous agent;social network;large language model", "primary_area": "machine_learning_for_social_sciences", "supplementary_material": "/attachment/6b708535a83900de21332a4c91d5966ab83e1ab4.zip", "author": "Wei Liu;Chenxi Wang;YiFei Wang;Zihao Xie;Rennai Qiu;Yufan Dang;Zhuoyun Du;Weize Chen;Cheng Yang;Chen Qian", "authorids": "~Wei_Liu40;~Chenxi_Wang6;~YiFei_Wang13;~Zihao_Xie1;~Rennai_Qiu1;~Yufan_Dang1;~Zhuoyun_Du1;~Weize_Chen1;~Cheng_Yang6;~Chen_Qian8", "gender": "M;F;M;;;F;M;M;M;", "homepage": "https://thinkwee.top/about/;https://github.com/Aurora-cx;https://github.com/wyifei26;https://github.com/XZH-HZX;https://github.com/Qu-rance;https://na-wen.github.io/;https://xiaodu-flying.github.io/;https://chenweize1998.github.io/;https://albertyang33.github.io/;", "dblp": ";;;;;366/0731;379/6964;245/7488;49/1457-2;", "google_scholar": "QvW2leIAAAAJ;Gtj8924AAAAJ;;;;https://scholar.google.com.sg/citations?user=zBvXAyIAAAAJ;pY1vwigAAAAJ;0CoGHtIAAAAJ;OlLjVUcAAAAJ;", "orcid": "0000-0003-0011-7797;0009-0006-3611-0000;;;;;0009-0002-6050-1889;;0000-0001-7821-0030;", "linkedin": "thinkwee/;;;;;;;;;", "or_profile": "~Wei_Liu40;~Chenxi_Wang6;~YiFei_Wang13;~Zihao_Xie1;~Rennai_Qiu1;~Yufan_Dang1;~Zhuoyun_Du1;~Weize_Chen1;~Cheng_Yang6;~Chen_Qian8", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University;Jinan University;Tsinghua University;Beijing University of Posts and Telecommunications;", "aff_domain": "mail.tsinghua.edu.cn;tsinghua.edu.cn;mails.tsinghua.edu.cn;tsinghua.edu.cn;mails.tsinghua.edu.cn;mails.tsinghua.edu.cn;jnu.edu.cn;tsinghua.edu.cn;bupt.edu.cn;", "position": "Researcher;Intern;Undergrad student;Undergrad student;Undergrad student;Undergrad student;Undergrad student;PhD student;Associate Professor;", "bibtex": "@inproceedings{\nliu2024autonomous,\ntitle={Autonomous Agents for Collaborative Task under Information Asymmetry},\nauthor={Wei Liu and Chenxi Wang and YiFei Wang and Zihao Xie and Rennai Qiu and Yufan Dang and Zhuoyun Du and Weize Chen and Cheng Yang and Chen Qian},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=mp6OWpDIJC}\n}", "github": "", "reviewers": "UPLR;7PRt;Jbnv", "pdf_size": 12679461, "rating": "5;6;6", "confidence": "3;4;2", "soundness": "3;3;2", "novelty": "3;3;2", "presentation": "2;3;3", "wc_summary": "43;83;53", "wc_strengths": "39;66;85", "wc_weaknesses": "112;62;72", "wc_questions": "17;216;36", "wc_limitations": "1;9;9", "wc_review": "212;436;255", "wc_reply_reviewers": "0;17;31", "wc_reply_authors": "0;1050;545", "reply_reviewers": "0;1;1", "reply_authors": "1;4;3", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 59.666666666666664, 16.99673171197595 ], "wc_strengths_avg": [ 63.333333333333336, 18.87385022252275 ], "wc_weaknesses_avg": [ 82.0, 21.602468994692867 ], "wc_questions_avg": [ 89.66666666666667, 89.6672862432126 ], "wc_limitations_avg": [ 6.333333333333333, 3.7712361663282534 ], "wc_review_avg": [ 301.0, 97.06011882676977 ], "wc_reply_reviewers_avg": [ 16.0, 12.675435561221029 ], "wc_reply_authors_avg": [ 531.6666666666666, 428.7643745565726 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 1.247219128924647 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3040703402880303067&as_sdt=1005&sciodt=0,4&hl=en", "gs_version_total": 4, "email": "mail.tsinghua.edu.cn;tsinghua.edu.cn;mails.tsinghua.edu.cn;tsinghua.edu.cn;mails.tsinghua.edu.cn;mails.tsinghua.edu.cn;jnu.edu.cn;tsinghua.edu.cn;bupt.edu.cn;", "author_num": 10, "aff_unique_index": "0;0;0;0;0;0;1;0;2", "aff_unique_norm": "Tsinghua University;Jinan University;Beijing University of Posts and Telecommunications", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.jnu.edu.cn;http://www.bupt.edu.cn/", "aff_unique_abbr": "THU;JNU;BUPT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Beijing", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "DuQuant: Distributing Outliers via Dual Transformation Makes Stronger Quantized LLMs", "status": "Oral", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93727", "id": "mp8u2Pcmqz", "proceeding": "", "pdf": "https://openreview.net/pdf?id=mp8u2Pcmqz", "openreview": "https://openreview.net/forum?id=mp8u2Pcmqz", "poster": "/media/PosterPDFs/NeurIPS%202024/93727.png?t=1733734195.4992867", "project": "", "author_site": "Haokun Lin, Haobo Xu, Yichen WU, Jingzhi Cui, Yingtao Zhang, Linzhan Mou, Linqi Song, Zhenan Sun, Ying Wei", "tldr": "", "abstract": "Quantization of large language models (LLMs) faces significant challenges, particularly due to the presence of outlier activations that impede efficient low-bit representation. Traditional approaches predominantly address Normal Outliers, which are activations across all tokens with relatively large magnitudes. However, these methods struggle with smoothing Massive Outliers that display significantly larger values, which leads to significant performance degradation in low-bit quantization. In this paper, we introduce DuQuant, a novel approach that utilizes rotation and permutation transformations to more effectively mitigate both massive and normal outliers. First, DuQuant starts by constructing the rotation matrix, using specific outlier dimensions as prior knowledge, to redistribute outliers to adjacent channels by block-wise rotation. Second, We further employ a zigzag permutation to balance the distribution of outliers across blocks, thereby reducing block-wise variance. A subsequent rotation further smooths the activation landscape, enhancing model performance. DuQuant simplifies the quantization process and excels in managing outliers, outperforming the state-of-the-art baselines across various sizes and types of LLMs on multiple tasks, even with 4-bit weight-activation quantization. Our code is available at https://github.com/Hsu1023/DuQuant.", "keywords": "Model compression;Post-training Quantization;PTQ of Large Language Models", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Haokun Lin;Haobo Xu;Yichen Wu;Jingzhi Cui;Yingtao Zhang;Linzhan Mou;Linqi Song;Zhenan Sun;Ying Wei", "authorids": "~Haokun_Lin4;~Haobo_Xu2;~Yichen_Wu2;~Jingzhi_Cui1;~Yingtao_Zhang3;~Linzhan_Mou1;~Linqi_Song1;~Zhenan_Sun1;~Ying_Wei1", "gender": ";;M;M;M;;M;M;F", "homepage": ";;https://wuyichen-97.github.io/;https://github.com/sunlight02;https://brain.tsinghua.edu.cn/en/info/1048/1156.htm;;https://sites.google.com/site/aisquaredlab/;http://cripac.ia.ac.cn/people/znsun/index.html;https://wei-ying.net/", "dblp": ";;;;;;137/7963.html;13/5916;14/4899-1", "google_scholar": ";;https://scholar.google.com/citations?hl=zh-CN;;;;UcGN3MoAAAAJ;PuZGODYAAAAJ;5UpFdKsAAAAJ", "orcid": ";;0000-0003-2859-3285;;;;0000-0003-2756-4984;0000-0003-4029-9935;", "linkedin": ";;;;;;;;", "or_profile": "~Haokun_Lin4;~Haobo_Xu2;~Yichen_Wu2;~Jingzhi_Cui1;~Yingtao_Zhang3;~Linzhan_Mou1;~Linqi_Song1;~Zhenan_Sun1;~Ying_Wei1", "aff": ";;City University of Hong Kong;Tsinghua University;Tsinghua University;;City University of Hong Kong;Institute of Automation, Chinese Academy of Sciences;Nanyang Technological University", "aff_domain": ";;cityu.edu.hk;mails.tsinghua.edu.cn;tsinghua.edu.cn;;cityu.edu.hk;ia.ac.cn;ntu.edu.sg", "position": ";;PhD student;Undergrad student;PhD student;;Assistant Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nlin2024duquant,\ntitle={DuQuant: Distributing Outliers via Dual Transformation Makes Stronger Quantized {LLM}s},\nauthor={Haokun Lin and Haobo Xu and Yichen Wu and Jingzhi Cui and Yingtao Zhang and Linzhan Mou and Linqi Song and Zhenan Sun and Ying Wei},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=mp8u2Pcmqz}\n}", "github": "", "reviewers": "8e4p;x516;Rbrg;XTv7;nwuk", "pdf_size": 23836691, "rating": "7;7;8;8;8", "confidence": "4;3;3;4;3", "soundness": "3;4;3;3;3", "novelty": "2;3;3;3;3", "presentation": "2;3;4;3;2", "wc_summary": "34;130;91;139;52", "wc_strengths": "66;136;79;316;53", "wc_weaknesses": "70;171;209;134;62", "wc_questions": "74;3;59;155;38", "wc_limitations": "2;2;16;50;1", "wc_review": "246;442;454;794;206", "wc_reply_reviewers": "26;28;139;90;22", "wc_reply_authors": "40;49;521;49;34", "reply_reviewers": "1;1;2;1;1", "reply_authors": "2;2;3;2;2", "rating_avg": [ 7.6, 0.48989794855663565 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 89.2, 41.42173342582369 ], "wc_strengths_avg": [ 130.0, 97.21933963980624 ], "wc_weaknesses_avg": [ 129.2, 56.84857078238643 ], "wc_questions_avg": [ 65.8, 50.57034704251099 ], "wc_limitations_avg": [ 14.2, 18.74459922217597 ], "wc_review_avg": [ 428.4, 208.4395355972566 ], "wc_reply_reviewers_avg": [ 61.0, 46.389654018972806 ], "wc_reply_authors_avg": [ 138.6, 191.284709268671 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.16666666666666666, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12670905123174606829&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": ";;cityu.edu.hk;mails.tsinghua.edu.cn;tsinghua.edu.cn;;cityu.edu.hk;ia.ac.cn;ntu.edu.sg", "author_num": 9, "aff_unique_index": "0;1;1;0;2;3", "aff_unique_norm": "City University of Hong Kong;Tsinghua University;Chinese Academy of Sciences;Nanyang Technological University", "aff_unique_dep": ";;Institute of Automation;", "aff_unique_url": "https://www.cityu.edu.hk;https://www.tsinghua.edu.cn;http://www.ia.cas.cn;https://www.ntu.edu.sg", "aff_unique_abbr": "CityU;THU;CAS;NTU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0;1", "aff_country_unique": "China;Singapore" }, { "title": "CONTRAST: Continual Multi-source Adaptation to Dynamic Distributions", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93726", "id": "mpDbWjLzfT", "proceeding": "", "pdf": "https://openreview.net/pdf?id=mpDbWjLzfT", "openreview": "https://openreview.net/forum?id=mpDbWjLzfT", "poster": "/media/PosterPDFs/NeurIPS%202024/93726.png?t=1733786377.8077967", "project": "", "author_site": "Sk Miraj Ahmed, Fahim Faisal Niloy, Xiangyu Chang, Dripta S. Raychaudhuri, Samet Oymak, Amit Roy-Chowdhury", "tldr": "", "abstract": "Adapting to dynamic data distributions is a practical yet challenging task. One effective strategy is to use a model ensemble, which leverages the diverse expertise of different models to transfer knowledge to evolving data distributions. However, this approach faces difficulties when the dynamic test distribution is available only in small batches and without access to the original source data. To address the challenge of adapting to dynamic distributions in such practical settings, we propose continual multi-source adaptation to dynamic distributions (CONTRAST), a novel method that optimally combines multiple source models to adapt to the dynamic test data. CONTRAST has two distinguishing features. First, it efficiently computes the optimal combination weights to combine the source models to adapt to the test data distribution continuously as a function of time. Second, it identifies which of the source model parameters to update so that only the model which is most correlated to the target data is adapted, leaving the less correlated ones untouched; this mitigates the issue of ``forgetting\" the source model parameters by focusing only on the source model that exhibits the strongest correlation with the test batch distribution. Through theoretical analysis we show that the proposed method is able to optimally combine the source models and prioritize updates to the model least prone to forgetting. Experimental analysis on diverse datasets demonstrates that the combination of multiple source models does at least as well as the best source (with hindsight knowledge), and performance does not degrade as the test data distribution changes over time (robust to forgetting).", "keywords": "Multi-source Adaptation;Online learning;Test Time Adaptation", "primary_area": "online_learning", "supplementary_material": "", "author": "Sk Miraj Ahmed;Fahim Faisal Niloy;Xiangyu Chang;Dripta S. Raychaudhuri;Samet Oymak;Amit Roy-Chowdhury", "authorids": "~Sk_Miraj_Ahmed1;~Fahim_Faisal_Niloy1;~Xiangyu_Chang2;~Dripta_S._Raychaudhuri2;~Samet_Oymak2;~Amit_Roy-Chowdhury2", "gender": "M;;M;M;;", "homepage": ";;;https://driptarc.github.io/;;", "dblp": "270/8108;;;209/9843;;", "google_scholar": "GcOJlW8AAAAJ;;mQh2GmoAAAAJ;vT3WBpUAAAAJ;;", "orcid": "0000-0002-9308-6266;;;;;", "linkedin": "miraj-ahmed-94727167;;;dripta-raychaudhuri-236205100/;;", "or_profile": "~Sk_Miraj_Ahmed1;~Fahim_Faisal_Niloy1;~Xiangyu_Chang2;~Dripta_S._Raychaudhuri2;~Samet_Oymak2;~Amit_Roy-Chowdhury2", "aff": "University of California, Riverside;;University of California, Riverside;Amazon;;", "aff_domain": "ucr.edu;;ucr.edu;amazon.com;;", "position": "PhD student;;PhD student;Researcher;;", "bibtex": "@inproceedings{\nahmed2024contrast,\ntitle={{CONTRAST}: Continual Multi-source Adaptation to Dynamic Distributions},\nauthor={Sk Miraj Ahmed and Fahim Faisal Niloy and Xiangyu Chang and Dripta S. Raychaudhuri and Samet Oymak and Amit Roy-Chowdhury},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=mpDbWjLzfT}\n}", "github": "", "reviewers": "z72Z;C1aF;f99A;YLKD", "pdf_size": 3598246, "rating": "5;6;6;7", "confidence": "4;4;3;5", "soundness": "2;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "73;82;134;81", "wc_strengths": "59;41;25;56", "wc_weaknesses": "167;165;138;49", "wc_questions": "6;113;113;14", "wc_limitations": "7;1;8;12", "wc_review": "312;402;418;212", "wc_reply_reviewers": "9;22;0;0", "wc_reply_authors": "18;8;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "2;2;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 92.5, 24.212600025606502 ], "wc_strengths_avg": [ 45.25, 13.534677683639163 ], "wc_weaknesses_avg": [ 129.75, 48.0071609241788 ], "wc_questions_avg": [ 61.5, 51.577611422011394 ], "wc_limitations_avg": [ 7.0, 3.9370039370059056 ], "wc_review_avg": [ 336.0, 82.20705565825844 ], "wc_reply_reviewers_avg": [ 7.75, 9.01041064547005 ], "wc_reply_authors_avg": [ 6.5, 7.399324293474371 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5926225343237557865&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "ucr.edu;;ucr.edu;amazon.com;;", "author_num": 6, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of California, Riverside;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.ucr.edu;https://www.amazon.com", "aff_unique_abbr": "UCR;Amazon", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Riverside;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Phased Consistency Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93725", "id": "mtBmKqyqGS", "proceeding": "", "pdf": "https://openreview.net/pdf?id=mtBmKqyqGS", "openreview": "https://openreview.net/forum?id=mtBmKqyqGS", "poster": "/media/PosterPDFs/NeurIPS%202024/93725.png?t=1730775002.148292", "project": "", "author_site": "Fu-Yun Wang, Zhaoyang Huang, Alexander Bergman, Dazhong Shen, Peng Gao, Michael Lingelbach, Keqiang Sun, Weikang Bian, Guanglu Song, Yu Liu, Xiaogang Wang, Hongsheng Li", "tldr": "", "abstract": "Consistency Models (CMs) have made significant progress in accelerating the generation of diffusion models. However, their application to high-resolution, text-conditioned image generation in the latent space remains unsatisfactory. In this paper, we identify three key flaws in the current design of Latent Consistency Models~(LCMs). We investigate the reasons behind these limitations and propose Phased Consistency Models (PCMs), which generalize the design space and address the identified limitations. Our evaluations demonstrate that PCMs outperform LCMs across 1--16 step generation settings. While PCMs are specifically designed for multi-step refinement, they achieve comparable 1-step generation results to previously state-of-the-art specifically designed 1-step methods. Furthermore, we show the methodology of PCMs is versatile and applicable to video generation, enabling us to train the state-of-the-art few-step text-to-video generator. Our code is available at https://github.com/G-U-N/Phased-Consistency-Model.", "keywords": "Consistency Models;Diffusion Models;Distillation", "primary_area": "generative_models", "supplementary_material": "", "author": "Fu-Yun Wang;Zhaoyang Huang;Alexander William Bergman;Dazhong Shen;Peng Gao;Michael Lingelbach;Keqiang Sun;Weikang Bian;Guanglu Song;Yu Liu;Xiaogang Wang;Hongsheng Li", "authorids": "~Fu-Yun_Wang1;~Zhaoyang_Huang2;~Alexander_William_Bergman1;~Dazhong_Shen1;~Peng_Gao3;~Michael_Lingelbach1;~Keqiang_Sun1;~Weikang_Bian2;~Guanglu_Song2;~Yu_Liu2;~Xiaogang_Wang2;~Hongsheng_Li3", "gender": "M;;M;M;M;M;M;M;M;M;M;", "homepage": "https://g-u-n.github.io/;https://drinkingcoder.github.io/;https://alexanderbergman7.github.io/;http://www.shendazhong.com/;;https://keqiangsun.github.io/;;http://liuyu.us;http://www.ee.cuhk.edu.hk/~xgwang/;http://www.ee.cuhk.edu.hk/~hsli;;https://wkbian.github.io/", "dblp": "309/6410;;256/1554;222/7906;;;207/4745;97/2274-15;91/6236-1.html;27/7402-1;;252/4248", "google_scholar": "R15m3J4AAAAJ;y2xos7IAAAAJ;-DqNXmAAAAAJ;5vSh09YAAAAJ;d4xUjL8AAAAJ;AJ7qJDEAAAAJ;Bd3v08QAAAAJ;;https://scholar.google.com.hk/citations?user=-B5JgjsAAAAJ;BN2Ze-QAAAAJ;miFIAFMAAAAJ;_PjUeqcAAAAJ", "orcid": ";0000-0001-7688-1471;;0000-0002-3947-4153;;0000-0003-2900-1202;;;;;;0000-0001-9986-3348", "linkedin": ";;;;;;;;;;;", "or_profile": "~Fu-Yun_Wang1;~Zhaoyang_Huang2;~Alexander_William_Bergman1;~Dazhong_Shen1;~Michael_Lingelbach1;~Keqiang_Sun1;~Guanglu_Song2;~Yu_Liu2;~Xiaogang_Wang2;~Hongsheng_Li3;~Gao_Peng1;~Weikang_BIAN1", "aff": "The Chinese University of Hong Kong;Avolution AI;Hedra;Shanghai Artificial Intelligence Laboratory;Stanford University;The Chinese University of Hong Kong, The Chinese University of Hong Kong;Sensetime;SenseTime;The Chinese University of Hong Kong;The Chinese University of Hong Kong;shanghai ai lab ;The Chinese University of Hong Kong, The Chinese University of Hong Kong", "aff_domain": "cuhk.edu.hk;avolutionai.com;hedra.com;pjlab.org.cn;stanford.edu;ee.cuhk.edu.hk;sensetime.com;sensetime.com;cuhk.edu.hk;cuhk.edu.hk;pjlab.org.cn;ee.cuhk.edu.hk", "position": "PhD student;Researcher;Principal Researcher;Researcher;PhD student;PhD student;Computer Vision Researcher;Principal Researcher;Full Professor;Associate Professor;Researcher;Intern", "bibtex": "@inproceedings{\nwang2024phased,\ntitle={Phased Consistency Models},\nauthor={Fu-Yun Wang and Zhaoyang Huang and Alexander William Bergman and Dazhong Shen and Peng Gao and Michael Lingelbach and Keqiang Sun and Weikang Bian and Guanglu Song and Yu Liu and Xiaogang Wang and Hongsheng Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=mtBmKqyqGS}\n}", "github": "", "reviewers": "ukQK;sXTg;tuze", "pdf_size": 47885545, "rating": "3;6;8", "confidence": "5;3;4", "soundness": "2;3;4", "novelty": "1;2;3", "presentation": "2;2;3", "wc_summary": "68;74;115", "wc_strengths": "44;70;103", "wc_weaknesses": "369;72;5", "wc_questions": "3;4;57", "wc_limitations": "6;17;14", "wc_review": "490;237;294", "wc_reply_reviewers": "755;13;11", "wc_reply_authors": "949;37;53", "reply_reviewers": "2;1;1", "reply_authors": "6;2;2", "rating_avg": [ 5.666666666666667, 2.0548046676563256 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.0, 0.816496580927726 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 85.66666666666667, 20.885933597094056 ], "wc_strengths_avg": [ 72.33333333333333, 24.143091949642425 ], "wc_weaknesses_avg": [ 148.66666666666666, 158.18203297326644 ], "wc_questions_avg": [ 21.333333333333332, 25.223445883190152 ], "wc_limitations_avg": [ 12.333333333333334, 4.642796092394707 ], "wc_review_avg": [ 340.3333333333333, 108.35845862486026 ], "wc_reply_reviewers_avg": [ 259.6666666666667, 350.2545106379392 ], "wc_reply_authors_avg": [ 346.3333333333333, 426.19974451215035 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 3.3333333333333335, 1.8856180831641267 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": -0.5960395606792698, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10094469015338156587&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "cuhk.edu.hk;avolutionai.com;hedra.com;pjlab.org.cn;stanford.edu;ee.cuhk.edu.hk;sensetime.com;sensetime.com;cuhk.edu.hk;cuhk.edu.hk;pjlab.org.cn;ee.cuhk.edu.hk", "author_num": 12, "aff_unique_index": "0;1;2;3;4;0;5;5;0;0;6;0", "aff_unique_norm": "Chinese University of Hong Kong;Avolution AI;Hedra;Shanghai Artificial Intelligence Laboratory;Stanford University;SenseTime;Shanghai AI Lab", "aff_unique_dep": ";;;;;;", "aff_unique_url": "https://www.cuhk.edu.hk;;;http://www.shailab.org/;https://www.stanford.edu;https://www.sensetime.com;https://www.shanghaiailab.com", "aff_unique_abbr": "CUHK;;;Shanghai AI Lab;Stanford;SenseTime;Shanghai AI Lab", "aff_campus_unique_index": "0;2;0;0;0;0", "aff_campus_unique": "Hong Kong SAR;;Stanford", "aff_country_unique_index": "0;1;0;3;0;0;0;0;0;0;0", "aff_country_unique": "China;Unknown;;United States" }, { "title": "A Novel Unified Architecture for Low-Shot Counting by Detection and Segmentation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93724", "id": "mtOPyMkSRk", "proceeding": "", "pdf": "https://openreview.net/pdf?id=mtOPyMkSRk", "openreview": "https://openreview.net/forum?id=mtOPyMkSRk", "poster": "/media/PosterPDFs/NeurIPS%202024/93724.png?t=1733315910.4605708", "project": "", "author_site": "Jer Pelhan, Alan Lukezic, Vitjan Zavrtanik, Matej Kristan", "tldr": "", "abstract": "Low-shot object counters estimate the number of objects in an image using few or no annotated exemplars. Objects are localized by matching them to prototypes, which are constructed by unsupervised image-wide object appearance aggregation.\nDue to potentially diverse object appearances, the existing approaches often lead to overgeneralization and false positive detections.\nFurthermore, the best-performing methods train object localization by a surrogate loss, that predicts a unit Gaussian at each object center. This loss is sensitive to annotation error, hyperparameters and does not directly optimize the detection task, leading to suboptimal counts.\nWe introduce GeCo, a novel low-shot counter that achieves accurate object detection, segmentation, and count estimation in a unified architecture.\nGeCo robustly generalizes the prototypes across objects appearances through a novel dense object query formulation. \nIn addition, a novel counting loss is proposed, that directly optimizes the detection task and avoids the issues of the standard surrogate loss. \nGeCo surpasses the leading few-shot detection-based counters by $\\sim$25\\% in the total count MAE, achieves superior detection accuracy and sets a new solid state-of-the-art result across all low-shot counting setups. \nThe code will be available on GitHub.", "keywords": "low-shot counting;few-shot counting;object counting;small object detection; small object coutning;small objects;detection;counting;loss function", "primary_area": "machine_vision", "supplementary_material": "", "author": "Jer Pelhan;Alan Lukezic;Vitjan Zavrtanik;Matej Kristan", "authorids": "~Jer_Pelhan2;~Alan_Lukezic1;~Vitjan_Zavrtanik1;~Matej_Kristan1", "gender": "M;M;M;M", "homepage": "https://fri.uni-lj.si/sl/o-fakulteti/osebje/jer-pelhan;;;https://www.vicos.si/people/matej_kristan/", "dblp": "376/8661;160/3679;256/0502;79/1648", "google_scholar": "pDLR7N8AAAAJ;1_5im9oAAAAJ;GO-UpVgAAAAJ;z_8FrEYAAAAJ", "orcid": ";;;0000-0002-4252-4342", "linkedin": ";;;", "or_profile": "~Jer_Pelhan2;~Alan_Lukezic1;~Vitjan_Zavrtanik1;~Matej_Kristan1", "aff": "Faculty of Computer and Information Science;University of Ljubljana;University of Ljubljana;University of Ljubljana", "aff_domain": "fri.uni-lj.si;uni-lj.si;uni-lj.si;uni-lj.si", "position": "Researcher;Postdoc;PhD student;Full Professor", "bibtex": "@inproceedings{\npelhan2024a,\ntitle={A Novel Unified Architecture for Low-Shot Counting by Detection and Segmentation},\nauthor={Jer Pelhan and Alan Lukezic and Vitjan Zavrtanik and Matej Kristan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=mtOPyMkSRk}\n}", "github": "", "reviewers": "HCSh;s2FV;QayL", "pdf_size": 17079743, "rating": "5;5;6", "confidence": "4;3;4", "soundness": "3;2;3", "novelty": "2;2;2", "presentation": "2;3;3", "wc_summary": "89;66;66", "wc_strengths": "23;41;92", "wc_weaknesses": "169;92;135", "wc_questions": "15;13;84", "wc_limitations": "1;8;91", "wc_review": "297;220;468", "wc_reply_reviewers": "0;15;84", "wc_reply_authors": "0;0;108", "reply_reviewers": "0;1;2", "reply_authors": "1;1;2", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 73.66666666666667, 10.842303978193728 ], "wc_strengths_avg": [ 52.0, 29.223278392404914 ], "wc_weaknesses_avg": [ 132.0, 31.506613062445584 ], "wc_questions_avg": [ 37.333333333333336, 33.00841643513901 ], "wc_limitations_avg": [ 33.333333333333336, 40.876507787345155 ], "wc_review_avg": [ 328.3333333333333, 103.64147603906245 ], "wc_reply_reviewers_avg": [ 33.0, 36.578682316343766 ], "wc_reply_authors_avg": [ 36.0, 50.91168824543142 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16780748641308398701&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "fri.uni-lj.si;uni-lj.si;uni-lj.si;uni-lj.si", "author_num": 4, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Faculty of Computer and Information Science;University of Ljubljana", "aff_unique_dep": "Computer and Information Science;", "aff_unique_url": ";https://www.uni-lj.si", "aff_unique_abbr": ";UL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1;1;1", "aff_country_unique": ";Slovenia" }, { "title": "S2HPruner: Soft-to-Hard Distillation Bridges the Discretization Gap in Pruning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93723", "id": "mtyy3Myyhz", "proceeding": "", "pdf": "https://openreview.net/pdf?id=mtyy3Myyhz", "openreview": "https://openreview.net/forum?id=mtyy3Myyhz", "poster": "", "project": "", "author_site": "Weihao Lin, Shengji Tang, Chong Yu, Peng Ye, Tao Chen", "tldr": "", "abstract": "Recently, differentiable mask pruning methods optimize the continuous relaxation architecture (soft network) as the proxy of the pruned discrete network (hard network) for superior sub-architecture search. However, due to the agnostic impact of the discretization process, the hard network struggles with the equivalent representational capacity as the soft network, namely discretization gap, which severely spoils the pruning performance. In this paper, we first investigate the discretization gap and propose a novel structural differentiable mask pruning framework named S2HPruner to bridge the discretization gap in a one-stage manner. In the training procedure, SH2Pruner forwards both the soft network and its corresponding hard network, then distills the hard network under the supervision of the soft network. To optimize the mask and prevent performance degradation, we propose a decoupled bidirectional knowledge distillation. It blocks the weight updating from the hard to the soft network while maintaining the gradient corresponding to the mask. Compared with existing pruning arts, S2HPruner achieves surpassing pruning performance without fine-tuning on comprehensive benchmarks, including CIFAR-100, Tiny ImageNet, and ImageNet with a variety of network architectures. Besides, investigation and analysis experiments explain the effectiveness of S2HPruner. Codes will be released soon.", "keywords": "model pruning;knowledge distillation;model compression", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Weihao Lin;Shengji Tang;Chong Yu;Peng Ye;Tao Chen", "authorids": "~Weihao_Lin1;~Shengji_Tang1;~Chong_Yu1;~Peng_Ye4;~Tao_Chen6", "gender": "M;M;M;M;M", "homepage": "https://github.com/opposj;;;;https://eetchen.github.io/", "dblp": "295/0536-2.html;330/9865;128/4478;53/930-6;69/510-3", "google_scholar": "k5MQpaIAAAAJ;K7drMDgAAAAJ;https://scholar.google.com/citations?hl=zh-CN;UEZZP5QAAAAJ;https://scholar.google.com.sg/citations?user=w3OoFL0AAAAJ", "orcid": "0009-0009-1894-7626;0009-0001-3456-8310;;0000-0002-8486-7562;", "linkedin": ";;;;", "or_profile": "~Weihao_Lin1;~Shengji_Tang1;~Chong_Yu1;~Peng_Ye4;~Tao_Chen6", "aff": "Fudan University;Fudan University;NVIDIA Corporation;Fudan University;Fudan University", "aff_domain": "fudan.edu.cn;fudan.edu.cn;nvidia.com;fudan.edu.cn;fudan.edu.cn", "position": "PhD student;MS student;Senior architect;PhD student;Full Professor", "bibtex": "@inproceedings{\nlin2024shpruner,\ntitle={S2{HP}runer: Soft-to-Hard Distillation Bridges the Discretization Gap in Pruning},\nauthor={Weihao Lin and Shengji Tang and Chong Yu and Peng Ye and Tao Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=mtyy3Myyhz}\n}", "github": "", "reviewers": "aPhZ;ECuu;MZh4", "pdf_size": 700439, "rating": "5;6;6", "confidence": "3;5;5", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "60;67;90", "wc_strengths": "18;64;33", "wc_weaknesses": "131;51;73", "wc_questions": "5;3;58", "wc_limitations": "1;27;3", "wc_review": "215;212;257", "wc_reply_reviewers": "0;0;31", "wc_reply_authors": "0;0;18", "reply_reviewers": "0;0;1", "reply_authors": "1;1;2", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.333333333333333, 0.9428090415820634 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 72.33333333333333, 12.81492185782739 ], "wc_strengths_avg": [ 38.333333333333336, 19.154343864744856 ], "wc_weaknesses_avg": [ 85.0, 33.7441352929167 ], "wc_questions_avg": [ 22.0, 25.468935326524086 ], "wc_limitations_avg": [ 10.333333333333334, 11.813363431112899 ], "wc_review_avg": [ 228.0, 20.54263858417414 ], "wc_reply_reviewers_avg": [ 10.333333333333334, 14.613540144521982 ], "wc_reply_authors_avg": [ 6.0, 8.48528137423857 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.9999999999999997, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Fj2FOtMOezwJ:scholar.google.com/&scioq=S2HPruner:+Soft-to-Hard+Distillation+Bridges+the+Discretization+Gap+in+Pruning&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "fudan.edu.cn;fudan.edu.cn;nvidia.com;fudan.edu.cn;fudan.edu.cn", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Fudan University;NVIDIA", "aff_unique_dep": ";NVIDIA Corporation", "aff_unique_url": "https://www.fudan.edu.cn;https://www.nvidia.com", "aff_unique_abbr": "Fudan;NVIDIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "China;United States" }, { "title": "MGF: Mixed Gaussian Flow for Diverse Trajectory Prediction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93722", "id": "muYhNDlxWc", "proceeding": "", "pdf": "https://openreview.net/pdf?id=muYhNDlxWc", "openreview": "https://openreview.net/forum?id=muYhNDlxWc", "poster": "/media/PosterPDFs/NeurIPS%202024/93722.png?t=1731657151.6441517", "project": "", "author_site": "Jiahe Chen, Jinkun Cao, Dahua Lin, Kris Kitani, Jiangmiao Pang", "tldr": "", "abstract": "To predict future trajectories, the normalizing flow with a standard Gaussian prior suffers from weak diversity. \nThe ineffectiveness comes from the conflict between the fact of asymmetric and multi-modal distribution of likely outcomes and symmetric and single-modal original distribution and supervision losses.\nInstead, we propose constructing a mixed Gaussian prior for a normalizing flow model for trajectory prediction.\nThe prior is constructed by analyzing the trajectory patterns in the training samples without requiring extra annotations while showing better expressiveness and being multi-modal and asymmetric.\nBesides diversity, it also provides better controllability for probabilistic trajectory generation.\nWe name our method Mixed Gaussian Flow (MGF). It achieves state-of-the-art performance in the evaluation of both trajectory alignment and diversity on the popular UCY/ETH and SDD datasets. Code is available at https://github.com/mulplue/MGF.", "keywords": "trajectory prediction;trajectory forecasting", "primary_area": "machine_vision", "supplementary_material": "/attachment/7491e7fad72cd61338a3859a4fbe9edce575d7a0.zip", "author": "Jiahe Chen;Jinkun Cao;Dahua Lin;Kris M. Kitani;Jiangmiao Pang", "authorids": "~Jiahe_Chen1;~Jinkun_Cao1;~Dahua_Lin1;~Kris_M._Kitani1;~Jiangmiao_Pang1", "gender": "M;M;M;M;M", "homepage": "https://jhchen.info;https://www.jinkuncao.com;http://dahua.site;http://www.cs.cmu.edu/~kkitani/;https://oceanpang.github.io/", "dblp": ";224/0126;53/6088;42/163;231/7630", "google_scholar": "zrVXBcoAAAAJ;xDtTbmQAAAAJ;GMzzRRUAAAAJ;yv3sH74AAAAJ;https://scholar.google.com/citations?authuser=0", "orcid": ";;;0000-0002-9389-4060;0000-0002-6711-9319", "linkedin": ";;;;", "or_profile": "~Jiahe_Chen1;~Jinkun_Cao1;~Dahua_Lin1;~Kris_M._Kitani1;~Jiangmiao_Pang1", "aff": "Zhejiang University;Carnegie Mellon University;The Chinese University of Hong Kong;Carnegie Mellon University;Shanghai AI Laboratory ", "aff_domain": "zju.edu.cn;andrew.cmu.edu;cuhk.edu.hk;cmu.edu;pjlab.org.cn", "position": "Undergrad student;PhD student;Associate Professor;Associate Professor;Research Scientist", "bibtex": "@inproceedings{\nchen2024mgf,\ntitle={{MGF}: Mixed Gaussian Flow for Diverse Trajectory Prediction},\nauthor={Jiahe Chen and Jinkun Cao and Dahua Lin and Kris M. Kitani and Jiangmiao Pang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=muYhNDlxWc}\n}", "github": "", "reviewers": "pmqe;qRNj;1BoH", "pdf_size": 2612175, "rating": "3;6;7", "confidence": "4;4;4", "soundness": "3;3;3", "novelty": "1;2;3", "presentation": "2;3;2", "wc_summary": "62;117;89", "wc_strengths": "54;54;35", "wc_weaknesses": "297;230;22", "wc_questions": "199;97;412", "wc_limitations": "7;108;1", "wc_review": "619;606;559", "wc_reply_reviewers": "0;964;334", "wc_reply_authors": "398;1521;538", "reply_reviewers": "0;3;2", "reply_authors": "3;4;3", "rating_avg": [ 5.333333333333333, 1.699673171197595 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.0, 0.816496580927726 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 89.33333333333333, 22.45489305746572 ], "wc_strengths_avg": [ 47.666666666666664, 8.9566858950296 ], "wc_weaknesses_avg": [ 183.0, 117.08401541912826 ], "wc_questions_avg": [ 236.0, 131.232617896619 ], "wc_limitations_avg": [ 38.666666666666664, 49.08722395446249 ], "wc_review_avg": [ 594.6666666666666, 25.772509040103607 ], "wc_reply_reviewers_avg": [ 432.6666666666667, 399.6876558291765 ], "wc_reply_authors_avg": [ 819.0, 499.6685568120798 ], "reply_reviewers_avg": [ 1.6666666666666667, 1.247219128924647 ], "reply_authors_avg": [ 3.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:uieUYP6jl_wJ:scholar.google.com/&scioq=MGF:+Mixed+Gaussian+Flow+for+Diverse+Trajectory+Prediction&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "zju.edu.cn;andrew.cmu.edu;cuhk.edu.hk;cmu.edu;pjlab.org.cn", "author_num": 5, "aff_unique_index": "0;1;2;1;3", "aff_unique_norm": "Zhejiang University;Carnegie Mellon University;Chinese University of Hong Kong;Shanghai AI Laboratory", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.zju.edu.cn;https://www.cmu.edu;https://www.cuhk.edu.hk;https://www.shanghai-ai-lab.com", "aff_unique_abbr": "ZJU;CMU;CUHK;SAIL", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;0;1;0", "aff_country_unique": "China;United States" }, { "id": "mwIZW97PVQ", "title": "Deep Unlearn: Benchmarking Machine Unlearning", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "Machine unlearning (MU) aims to remove the influence of particular data points from the learnable parameters of a trained machine learning model. This is a key capability in light of data privacy requirements, trustworthiness, and safety in deployed models. MU is particularly challenging for deep neural networks (DNNs), such as convolutional nets or vision transformers, as such DNNs tend to memorize a notable portion of their training dataset. Nevertheless, the community lacks a rigorous and multifaceted study that looks into the success of MU methods for DNNs. In this paper, we investigate 18 state-of-the-art MU methods across various benchmark datasets and models, with each evaluation conducted over 10 different initializations, a comprehensive evaluation involving MU over 100K models. We show that, with the proper hyperparameters, Masked Small Gradients (MSG) and Convolution Transpose (CT), consistently perform better in terms of model accuracy and run-time efficiency across different models, datasets, and initializations,\nassessed by population-based membership inference attacks (MIA) and per-sample unlearning likelihood ratio attacks (U-LiRA).\nFurthermore, our benchmark highlights the fact that comparing a MU method only with commonly used baselines, such as Gradient Ascent (GA) or Successive Random Relabeling (SRL), is inadequate, \nand we need better baselines like Negative Gradient Plus (NG+) with proper hyperparameter selection.", "keywords": "Machine Unlearning;Deep Learning", "primary_area": "", "supplementary_material": "/attachment/514f6053e6ed88f4805827bff1f620a3fe7b7358.pdf", "author": "Xavier Cadet;Anastasia Borovykh;Mohammad Malekzadeh;Sara Ahmadi-Abhari;Hamed Haddadi", "authorids": "~Xavier_Cadet1;~Anastasia_Borovykh1;~Mohammad_Malekzadeh1;~Sara_Ahmadi-Abhari1;~Hamed_Haddadi1", "gender": "M;;M;;M", "homepage": ";;https://mmalekzadeh.github.io/;https://www.imperial.ac.uk/people/s.ahmadi-abhari;https://haddadi.github.io/about/", "dblp": "253/6617;;153/2838;;33/5454", "google_scholar": "https://scholar.google.com/citations?hl=en;;https://scholar.google.co.uk/citations?user=xZr9WQMAAAAJ;https://scholar.google.co.uk/citations?hl=en;gkA7zvoAAAAJ", "orcid": "0000-0002-8545-0371;;0000-0002-4247-906X;0000-0003-4440-4050;", "linkedin": "https://linkedin.com/in/xavier-cadet;;mlkzade/;sara-ahmadi-abhari-a3043334/;", "or_profile": "~Xavier_Cadet1;~Anastasia_Borovykh1;~Mohammad_Malekzadeh1;~Sara_Ahmadi-Abhari1;~Hamed_Haddadi1", "aff": "Imperial College London;;Nokia networks GmbH;Imperial College London;Imperial College London", "aff_domain": "ic.ac.uk;;nokia-bell-labs.com;ic.ac.uk;imperial.ac.uk", "position": "PhD student;;Researcher;Lecturer;Full Professor", "bibtex": "@misc{\nanonymous2024deep,\ntitle={Deep Unlearn: Benchmarking Machine Unlearning},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=mwIZW97PVQ}\n}", "github": "", "project": "", "reviewers": "sLqq;K9a2;CWjQ;T7vM", "site": "https://openreview.net/forum?id=mwIZW97PVQ", "pdf_size": 522377, "rating": "3;4;6;8", "confidence": "5;4;3;4", "wc_summary_and_contributions": "54;102;87;41", "wc_strengths": "46;65;64;132", "wc_improvement": "3;306;166;164", "wc_limitations": "21;7;61;17", "wc_correctness": "146;1;51;13", "wc_clarity": "14;1;7;7", "wc_relation_to_prior_work": "22;26;21;56", "wc_documentation": "7;1;44;24", "wc_additional_feedback": "1;1;1;1", "wc_review": "314;510;502;455", "wc_reply_reviewers": "49;54;0;0", "wc_reply_authors": "0;150;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "2;2;1;1", "rating_avg": [ 5.25, 1.920286436967152 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 71.0, 24.525496936861444 ], "wc_strengths_avg": [ 76.75, 32.78242669480098 ], "wc_improvement_avg": [ 159.75, 107.25757549003241 ], "wc_limitations_avg": [ 26.5, 20.56088519495209 ], "wc_correctness_avg": [ 52.75, 56.91386035053324 ], "wc_clarity_avg": [ 7.25, 4.602988159880492 ], "wc_relation_to_prior_work_avg": [ 31.25, 14.411367041332339 ], "wc_documentation_avg": [ 19.0, 16.718253497300488 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 445.25, 78.63642603781024 ], "wc_reply_reviewers_avg": [ 25.75, 25.810608284191986 ], "wc_reply_authors_avg": [ 37.5, 64.9519052838329 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.552344770738994, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:T-WR6p5aBC8J:scholar.google.com/&scioq=Deep+Unlearn:+Benchmarking+Machine+Unlearning&hl=en&as_sdt=0,5", "gs_version_total": 3, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Imperial College London;Nokia Networks", "aff_unique_dep": ";", "aff_unique_url": "https://www.imperial.ac.uk;https://networks.nokia.com", "aff_unique_abbr": "ICL;Nokia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United Kingdom;Germany" }, { "title": "Learning De-Biased Representations for Remote-Sensing Imagery", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93721", "id": "mwN1bbD5DQ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=mwN1bbD5DQ", "openreview": "https://openreview.net/forum?id=mwN1bbD5DQ", "poster": "", "project": "", "author_site": "Zichen Tian, Zhaozheng CHEN, QIANRU SUN", "tldr": "", "abstract": "Remote sensing (RS) imagery, which requires specialized satellites to collect and is difficult to annotate, suffers from data scarcity and class imbalance in certain spectrums. Due to their data scarcity, training large-scale RS models from scratch is unrealistic, and the alternative is to transfer pre-trained models by fine-tuning or a more data-efficient method LoRA. Due to class imbalance, transferred models exhibit strong bias, where features of the major class dominate over those of the minor class. In this paper, we propose debLoRA, a generic training approach that works with any LoRA variants to yield debiased features. It is an unsupervised learning approach that can diversify minor class features based on the shared attributes with major classes, where the attributes are obtained by a simple step of clustering. To evaluate it, we conduct extensive experiments in two transfer learning scenarios in the RS domain: from natural to optical RS images, and from optical RS to multi-spectrum RS images. We perform object classification and oriented object detection tasks on the optical RS dataset DOTA and the SAR dataset FUSRS. Results show that our debLoRA consistently surpasses prior arts across these RS adaptation settings, yielding up to 3.3 and 4.7 percentage points gains on the tail classes for natural $\\to$ optical RS and optical RS $\\to$ multi-spectrum RS adaptations, respectively, while preserving the performance on head classes, substantiating its efficacy and adaptability", "keywords": "Adaptation;Long-tailed learning;Remote Sensing", "primary_area": "machine_vision", "supplementary_material": "", "author": "Zichen Tian;Zhaozheng Chen;Qianru Sun", "authorids": "~Zichen_Tian1;~Zhaozheng_Chen1;~Qianru_Sun2", "gender": ";M;F", "homepage": ";;https://qianrusun.com/", "dblp": ";230/7948;127/6132.html", "google_scholar": ";rBW50p4AAAAJ;https://scholar.google.de/citations?user=fNfrGMIAAAAJ", "orcid": ";;0000-0003-2689-317X", "linkedin": ";;", "or_profile": "~Zichen_Tian1;~Zhaozheng_Chen1;~Qianru_Sun2", "aff": ";Singapore Management University;Singapore Management University", "aff_domain": ";smu.edu.sg;smu.edu.sg", "position": ";PhD student;Assistant Professor", "bibtex": "@inproceedings{\ntian2024learning,\ntitle={Learning De-Biased Representations for Remote-Sensing Imagery},\nauthor={Zichen Tian and Zhaozheng Chen and Qianru Sun},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=mwN1bbD5DQ}\n}", "github": "", "reviewers": "bDAb;r61t;meTz", "pdf_size": 14309649, "rating": "2;6;7", "confidence": "5;3;3", "soundness": "2;3;3", "novelty": "2;2;3", "presentation": "2;3;3", "wc_summary": "39;63;120", "wc_strengths": "41;74;86", "wc_weaknesses": "372;58;66", "wc_questions": "39;65;40", "wc_limitations": "10;8;14", "wc_review": "501;268;326", "wc_reply_reviewers": "0;30;152", "wc_reply_authors": "0;40;43", "reply_reviewers": "0;1;1", "reply_authors": "1;2;2", "rating_avg": [ 5.0, 2.160246899469287 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 74.0, 33.97057550292606 ], "wc_strengths_avg": [ 67.0, 19.026297590440446 ], "wc_weaknesses_avg": [ 165.33333333333334, 146.17189272299316 ], "wc_questions_avg": [ 48.0, 12.027745701779143 ], "wc_limitations_avg": [ 10.666666666666666, 2.494438257849294 ], "wc_review_avg": [ 365.0, 99.03871296955886 ], "wc_reply_reviewers_avg": [ 60.666666666666664, 65.73346855969864 ], "wc_reply_authors_avg": [ 27.666666666666668, 19.601587237318874 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.9819805060619656, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8060604830756482754&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": ";smu.edu.sg;smu.edu.sg", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Singapore Management University", "aff_unique_dep": "", "aff_unique_url": "https://www.smu.edu.sg", "aff_unique_abbr": "SMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Singapore" }, { "title": "Crafting Interpretable Embeddings for Language Neuroscience by Asking LLMs Questions", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93720", "id": "mxMvWwyBWe", "proceeding": "", "pdf": "https://openreview.net/pdf?id=mxMvWwyBWe", "openreview": "https://openreview.net/forum?id=mxMvWwyBWe", "poster": "/media/PosterPDFs/NeurIPS%202024/93720.png?t=1731710005.1708755", "project": "", "author_site": "Vinamra Benara, Chandan Singh, John Morris, Richard Antonello, Ion Stoica, Alexander Huth, Jianfeng Gao", "tldr": "", "abstract": "Large language models (LLMs) have rapidly improved text embeddings for a growing array of natural-language processing tasks. However, their opaqueness and proliferation into scientific domains such as neuroscience have created a growing need for interpretability. Here, we ask whether we can obtain interpretable embeddings through LLM prompting. We introduce question-answering embeddings (QA-Emb), embeddings where each feature represents an answer to a yes/no question asked to an LLM. Training QA-Emb reduces to selecting a set of underlying questions rather than learning model weights.\n\nWe use QA-Emb to flexibly generate interpretable models for predicting fMRI voxel responses to language stimuli. QA-Emb significantly outperforms an established interpretable baseline, and does so while requiring very few questions. This paves the way towards building flexible feature spaces that can concretize and evaluate our understanding of semantic brain representations. We additionally find that QA-Emb can be effectively approximated with an efficient model, and we explore broader applications in simple NLP tasks.", "keywords": "fMRI;Encoding models;Neuroscience;Language neuroscience;Interpretability;Large language models;Explainability;Brain mapping", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "", "author": "Vinamra Benara;Chandan Singh;John Xavier Morris;Richard Antonello;Ion Stoica;Alexander Huth;Jianfeng Gao", "authorids": "~Vinamra_Benara1;~Chandan_Singh1;~John_Xavier_Morris1;~Richard_Antonello1;~Ion_Stoica1;~Alexander_Huth1;~Jianfeng_Gao1", "gender": "M;M;M;M;M;;M", "homepage": ";https://csinva.io/;http://jxmo.io;https://www.cs.utexas.edu/~huth/people.html;http://people.eecs.berkeley.edu/~istoica/;https://www.cs.utexas.edu/~huth/;https://www.microsoft.com/en-us/research/people/jfgao/", "dblp": ";38/2317;263/9958.html;;s/IonStoica;44/8860.html;92/5339", "google_scholar": "https://scholar.google.co.in/citations?user=XcEA0RIAAAAJ;https://scholar.google.com/citations?hl=en;Utsbve4AAAAJ;;vN-is70AAAAJ;JNXWWkIAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0003-0318-2340;;;;;", "linkedin": ";csinva/;;;ionstoica;;", "or_profile": "~Vinamra_Benara1;~Chandan_Singh1;~John_Xavier_Morris1;~Richard_Antonello1;~Ion_Stoica1;~Alexander_Huth1;~Jianfeng_Gao1", "aff": "University of California, Berkeley;Microsoft Research;Cornell University;University of Texas, Austin;University of California, Berkeley;The University of Texas at Austin;Microsoft Research", "aff_domain": "berkeley.edu;microsoft.com;cornell.edu;utexas.edu;berkeley.edu;utexas.edu;microsoft.com", "position": "PhD student;Researcher;PhD student;PhD student;Full Professor;Assistant Professor;Principal Researcher", "bibtex": "@inproceedings{\nbenara2024crafting,\ntitle={Crafting Interpretable Embeddings for Language Neuroscience by Asking {LLM}s Questions},\nauthor={Vinamra Benara and Chandan Singh and John Xavier Morris and Richard Antonello and Ion Stoica and Alexander Huth and Jianfeng Gao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=mxMvWwyBWe}\n}", "github": "", "reviewers": "Z19r;5mep;wvR8;s4gE", "pdf_size": 49841354, "rating": "3;5;5;7", "confidence": "4;3;3;4", "soundness": "2;2;2;3", "novelty": "2;3;2;4", "presentation": "3;3;3;4", "wc_summary": "81;86;69;183", "wc_strengths": "65;42;46;52", "wc_weaknesses": "159;165;190;67", "wc_questions": "42;20;6;123", "wc_limitations": "1;3;12;1", "wc_review": "348;316;323;426", "wc_reply_reviewers": "4;121;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 104.75, 45.598108513402174 ], "wc_strengths_avg": [ 51.25, 8.699856320652657 ], "wc_weaknesses_avg": [ 145.25, 46.64962486451525 ], "wc_questions_avg": [ 47.75, 45.30107614615794 ], "wc_limitations_avg": [ 4.25, 4.548351349665063 ], "wc_review_avg": [ 353.25, 43.654180784891615 ], "wc_reply_reviewers_avg": [ 31.25, 51.842911762361496 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:erS6_35QGY4J:scholar.google.com/&scioq=Crafting+Interpretable+Embeddings+for+Language+Neuroscience+by+Asking+LLMs+Questions&hl=en&as_sdt=0,44", "gs_version_total": 2, "email": "berkeley.edu;microsoft.com;cornell.edu;utexas.edu;berkeley.edu;utexas.edu;microsoft.com", "author_num": 7, "aff_unique_index": "0;1;2;3;0;3;1", "aff_unique_norm": "University of California, Berkeley;Microsoft;Cornell University;University of Texas at Austin", "aff_unique_dep": ";Microsoft Research;;", "aff_unique_url": "https://www.berkeley.edu;https://www.microsoft.com/en-us/research;https://www.cornell.edu;https://www.utexas.edu", "aff_unique_abbr": "UC Berkeley;MSR;Cornell;UT Austin", "aff_campus_unique_index": "0;2;0;2", "aff_campus_unique": "Berkeley;;Austin", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Interpreting and Analysing CLIP's Zero-Shot Image Classification via Mutual Knowledge", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93719", "id": "n01yLUy7Mj", "proceeding": "", "pdf": "https://openreview.net/pdf?id=n01yLUy7Mj", "openreview": "https://openreview.net/forum?id=n01yLUy7Mj", "poster": "/media/PosterPDFs/NeurIPS%202024/93719.png?t=1731233927.7931736", "project": "", "author_site": "Fawaz Sammani, Nikos Deligiannis", "tldr": "", "abstract": "Contrastive Language-Image Pretraining (CLIP) performs zero-shot image classification by mapping images and textual class representation into a shared embedding space, then retrieving the class closest to the image. This work provides a new approach for interpreting CLIP models for image classification from the lens of mutual knowledge between the two modalities. Specifically, we ask: what concepts do both vision and language CLIP encoders learn in common that influence the joint embedding space, causing points to be closer or further apart? We answer this question via an approach of textual concept-based explanations, showing their effectiveness, and perform an analysis encompassing a pool of 13 CLIP models varying in architecture, size and pretraining datasets. We explore those different aspects in relation to mutual knowledge, and analyze zero-shot predictions. Our approach demonstrates an effective and human-friendly way of understanding zero-shot classification decisions with CLIP.", "keywords": "CLIP;vision-language;explainability;interpretability;mutual knowledge", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Fawaz Sammani;Nikos Deligiannis", "authorids": "~Fawaz_Sammani1;~Nikos_Deligiannis1", "gender": "M;M", "homepage": "https://github.com/fawazsammani;https://www.etrovub.be/people/member/about-bio/ndeligia/", "dblp": "248/8242;90/5258", "google_scholar": "xxlAF58AAAAJ;https://scholar.google.be/citations?user=iUGMLcYAAAAJ", "orcid": ";0000-0001-9300-5860", "linkedin": "https://linkedin.com/in/fawaz-sammani-2ba4a7184;nikos-deligiannis-a3b70a18/?originalSubdomain=be", "or_profile": "~Fawaz_Sammani1;~Nikos_Deligiannis1", "aff": "Saarland Informatics Campus, Max-Planck Institute;Vrije Universiteit Brussel", "aff_domain": "mpi-inf.mpg.de;vub.be", "position": "Intern;Associate Professor", "bibtex": "@inproceedings{\nsammani2024interpreting,\ntitle={Interpreting and Analysing {CLIP}'s Zero-Shot Image Classification via Mutual Knowledge},\nauthor={Fawaz Sammani and Nikos Deligiannis},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=n01yLUy7Mj}\n}", "github": "", "reviewers": "Whov;wbb9;BMwb", "pdf_size": 7899652, "rating": "5;6;6", "confidence": "3;4;4", "soundness": "2;3;3", "novelty": "2;2;2", "presentation": "1;2;3", "wc_summary": "101;66;79", "wc_strengths": "94;21;70", "wc_weaknesses": "1044;44;130", "wc_questions": "65;43;22", "wc_limitations": "85;4;31", "wc_review": "1389;178;332", "wc_reply_reviewers": "0;10;32", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;1;1", "reply_authors": "1;1;1", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.0, 0.816496580927726 ], "wc_summary_avg": [ 82.0, 14.445299120013633 ], "wc_strengths_avg": [ 61.666666666666664, 30.379086373505192 ], "wc_weaknesses_avg": [ 406.0, 452.49825045702295 ], "wc_questions_avg": [ 43.333333333333336, 17.55625877635159 ], "wc_limitations_avg": [ 40.0, 33.67491648096547 ], "wc_review_avg": [ 633.0, 538.2570637406134 ], "wc_reply_reviewers_avg": [ 14.0, 13.366625103842281 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.9999999999999997, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4775569665591072040&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "mpi-inf.mpg.de;vub.be", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Max-Planck Institute;Vrije Universiteit Brussel", "aff_unique_dep": "Informatics;", "aff_unique_url": "https://www.mpi-sws.org;https://www.vub.be", "aff_unique_abbr": "MPI-SWS;VUB", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Saarland;Brussels", "aff_country_unique_index": "0;1", "aff_country_unique": "Germany;Belgium" }, { "title": "BLAST: Block-Level Adaptive Structured Matrices for Efficient Deep Neural Network Inference", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93718", "id": "n0arS0DDot", "proceeding": "", "pdf": "https://openreview.net/pdf?id=n0arS0DDot", "openreview": "https://openreview.net/forum?id=n0arS0DDot", "poster": "/media/PosterPDFs/NeurIPS%202024/93718.png?t=1732314446.3677611", "project": "", "author_site": "Changwoo Lee, Soo Min Kwon, Qing Qu, Hun-Seok Kim", "tldr": "", "abstract": "Large-scale foundation models have demonstrated exceptional performance in language and vision tasks. However, the numerous dense matrix-vector operations involved in these large networks pose significant computational challenges during inference. To address these challenges, we introduce the Block-Level Adaptive STructured (BLAST) matrix, designed to learn and leverage efficient structures prevalent in the weight matrices of linear layers within deep learning models. Compared to existing structured matrices, the BLAST matrix offers substantial flexibility, as it can represent various types of structures that are either learned from data or computed from pre-existing weight matrices. We demonstrate the efficiency of using the BLAST matrix for compressing both language and vision tasks, showing that (i) for medium-sized models such as ViT and GPT-2, training with BLAST weights boosts performance while reducing complexity by 70\\% and 40\\%, respectively; and (ii) for large foundation models such as Llama-7B and DiT-XL, the BLAST matrix achieves a 2x compression while exhibiting the lowest performance degradation among all tested structured matrices. Our code is available at https://github.com/changwoolee/BLAST.", "keywords": "Efficiency;Compression;Low Rank;Pruning;Matrix Factorization;Structured Matrix;Acceleration;Optimization;Transformer;Large Language Model;Diffusion Model;Vision Model;Preconditioned Gradient Descent", "primary_area": "optimization_for_deep_networks", "supplementary_material": "/attachment/32ffae24f835ceb9bc516fe294dc933ca5ede97a.zip", "author": "Changwoo Lee;Soo Min Kwon;Qing Qu;Hun-Seok Kim", "authorids": "~Changwoo_Lee2;~Soo_Min_Kwon1;~Qing_Qu2;~Hun-Seok_Kim1", "gender": "M;M;M;", "homepage": "https://changwoolee.github.io;https://soominkwon.github.io/;https://qingqu.engin.umich.edu/;", "dblp": "52/6193-1;256/2093;127/6874-1;", "google_scholar": "https://scholar.google.com/citations?hl=en;_sdoF5IAAAAJ;JfblW3MAAAAJ;", "orcid": "0000-0002-5610-0671;;0000-0001-9136-558X;", "linkedin": "changwoo-lee/;;qing-q-1a0b9746/;", "or_profile": "~Changwoo_Lee2;~Soo_Min_Kwon1;~Qing_Qu2;~Hun-Seok_Kim1", "aff": "Samsung Research America;University of Michigan - Ann Arbor;University of Michigan;", "aff_domain": "samsung.com;umich.edu;umich.edu;", "position": "Intern;PhD student;Assistant Professor;", "bibtex": "@inproceedings{\nlee2024blast,\ntitle={{BLAST}: Block-Level Adaptive Structured Matrices for Efficient Deep Neural Network Inference},\nauthor={Changwoo Lee and Soo Min Kwon and Qing Qu and Hun-Seok Kim},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=n0arS0DDot}\n}", "github": "", "reviewers": "2dKz;V5GM;xrxa", "pdf_size": 2898638, "rating": "4;4;5", "confidence": "4;3;3", "soundness": "3;3;2", "novelty": "2;2;3", "presentation": "2;3;3", "wc_summary": "61;41;127", "wc_strengths": "19;34;176", "wc_weaknesses": "129;47;450", "wc_questions": "75;108;120", "wc_limitations": "9;1;40", "wc_review": "293;231;913", "wc_reply_reviewers": "147;5;1604", "wc_reply_authors": "139;0;748", "reply_reviewers": "1;1;2", "reply_authors": "2;1;3", "rating_avg": [ 4.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 76.33333333333333, 36.745370078721784 ], "wc_strengths_avg": [ 76.33333333333333, 70.74052743811161 ], "wc_weaknesses_avg": [ 208.66666666666666, 173.90099354390003 ], "wc_questions_avg": [ 101.0, 19.026297590440446 ], "wc_limitations_avg": [ 16.666666666666668, 16.81930108205715 ], "wc_review_avg": [ 479.0, 307.9263981321944 ], "wc_reply_reviewers_avg": [ 585.3333333333334, 722.6351469140949 ], "wc_reply_authors_avg": [ 295.6666666666667, 324.84286799757336 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:GteMZNKFs40J:scholar.google.com/&scioq=BLAST:+Block-Level+Adaptive+Structured+Matrices+for+Efficient+Deep+Neural+Network+Inference&hl=en&as_sdt=0,48", "gs_version_total": 5, "email": "samsung.com;umich.edu;umich.edu;", "author_num": 4, "aff_unique_index": "0;1;1", "aff_unique_norm": "Samsung;University of Michigan", "aff_unique_dep": "Samsung Research America;", "aff_unique_url": "https://www.samsung.com/us/careers/research/;https://www.umich.edu", "aff_unique_abbr": "SRA;UM", "aff_campus_unique_index": "1", "aff_campus_unique": ";Ann Arbor", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Task-oriented Time Series Imputation Evaluation via Generalized Representers", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93717", "id": "n2dvAKKQoM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=n2dvAKKQoM", "openreview": "https://openreview.net/forum?id=n2dvAKKQoM", "poster": "/media/PosterPDFs/NeurIPS%202024/93717.png?t=1730371742.773131", "project": "", "author_site": "Zhixian Wang, Linxiao Yang, Liang Sun, Qingsong Wen, Yi Wang", "tldr": "", "abstract": "Time series analysis is widely used in many fields such as power energy, economics, and transportation, including different tasks such as forecasting, anomaly detection, classification, etc. Missing values are widely observed in these tasks, and often leading to unpredictable negative effects on existing methods, hindering their further application. In response to this situation, existing time series imputation methods mainly focus on restoring sequences based on their data characteristics, while ignoring the performance of the restored sequences in downstream tasks. Considering different requirements of downstream tasks (e.g., forecasting), this paper proposes an efficient downstream task-oriented time series imputation evaluation approach. By combining time series imputation with neural network models used for downstream tasks, the gain of different imputation strategies on downstream tasks is estimated without retraining, and the most favorable imputation value for downstream tasks is given by combining different imputation strategies according to the estimated gain.", "keywords": "time series imputation;downstream task oriented imputation evaluation", "primary_area": "other", "supplementary_material": "/attachment/c68fae5da8e6fa8be6a7a3d1aad1ec1a4d5424f5.zip", "author": "Zhixian Wang;Linxiao Yang;Liang Sun;Qingsong Wen;Yi Wang", "authorids": "~Zhixian_Wang2;~Linxiao_Yang1;~Liang_Sun2;~Qingsong_Wen2;~Yi_Wang43", "gender": "M;;M;M;M", "homepage": ";https://github.com/DAMO-DI-ML;https://www.linkedin.com/in/liang-sun-a0a87621/;http://www.eeyiwang.com/;https://sites.google.com/site/qingsongwen8/", "dblp": ";160/8447;18/5837-1;;27/561", "google_scholar": ";;D_cOMBgAAAAJ;KYDSElAAAAAJ;vjPJvwYAAAAJ", "orcid": "0009-0000-2734-5074;0000-0001-9558-7163;0009-0002-5835-7259;0000-0003-1143-0666;0000-0003-4516-2524", "linkedin": ";;;;qingsong-wen-22814156/", "or_profile": "~Zhixian_Wang2;~Linxiao_Yang1;~Liang_Sun2;~Yi_Wang43;~Qingsong_Wen1", "aff": " University of Hong Kong;Alibaba Group;Alibaba Group;;Squirrel Ai Learning", "aff_domain": "eee.hku.hk;alibaba-inc.com;alibaba-inc.com;;squirrelai.com", "position": "PhD student;Engineer;Staff Software Engineer;;Principal Researcher", "bibtex": "@inproceedings{\nwang2024taskoriented,\ntitle={Task-oriented Time Series Imputation Evaluation via Generalized Representers},\nauthor={Zhixian Wang and Linxiao Yang and Liang Sun and Qingsong Wen and Yi Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=n2dvAKKQoM}\n}", "github": "", "reviewers": "z2pu;xbnY;wGbj;aC85;56K9", "pdf_size": 539619, "rating": "4;5;5;5;7", "confidence": "4;4;4;3;4", "soundness": "2;3;3;3;2", "novelty": "2;3;2;3;3", "presentation": "3;3;2;2;3", "wc_summary": "84;46;66;57;66", "wc_strengths": "98;52;75;58;138", "wc_weaknesses": "291;158;73;200;165", "wc_questions": "3;4;46;2;235", "wc_limitations": "1;114;1;10;197", "wc_review": "477;374;261;327;801", "wc_reply_reviewers": "20;1413;62;22;17", "wc_reply_authors": "332;4004;156;169;39", "reply_reviewers": "1;9;1;1;1", "reply_authors": "3;12;2;3;2", "rating_avg": [ 5.2, 0.9797958971132712 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 63.8, 12.496399481450648 ], "wc_strengths_avg": [ 84.2, 31.281943673627442 ], "wc_weaknesses_avg": [ 177.4, 70.49141791736068 ], "wc_questions_avg": [ 58.0, 90.05553841935541 ], "wc_limitations_avg": [ 64.6, 78.79238541889691 ], "wc_review_avg": [ 448.0, 190.01894642377113 ], "wc_reply_reviewers_avg": [ 306.8, 553.3452448517111 ], "wc_reply_authors_avg": [ 940.0, 1534.8392749731158 ], "reply_reviewers_avg": [ 2.6, 3.2000000000000006 ], "reply_authors_avg": [ 4.4, 3.826225293941799 ], "replies_avg": [ 42, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.10206207261596575, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:NT86WJu-CrAJ:scholar.google.com/&scioq=Task-oriented+Time+Series+Imputation+Evaluation+via+Generalized+Representers&hl=en&as_sdt=0,30", "gs_version_total": 5, "email": "eee.hku.hk;alibaba-inc.com;alibaba-inc.com;;squirrelai.com", "author_num": 5, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "University of Hong Kong;Alibaba Group;Squirrel Ai Learning", "aff_unique_dep": ";;", "aff_unique_url": "https://www.hku.hk;https://www.alibaba.com;https://www.squirrelai.com/", "aff_unique_abbr": "HKU;Alibaba;", "aff_campus_unique_index": "0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "WildTeaming at Scale: From In-the-Wild Jailbreaks to (Adversarially) Safer Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93716", "id": "n5R6TvBVcX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=n5R6TvBVcX", "openreview": "https://openreview.net/forum?id=n5R6TvBVcX", "poster": "", "project": "", "author_site": "Liwei Jiang, Kavel Rao, Seungju Han, Allyson Ettinger, Faeze Brahman, Sachin Kumar, Niloofar Mireshghallah, Ximing Lu, Maarten Sap, Yejin Choi, Nouha Dziri", "tldr": "", "abstract": "We introduce WildTeaming, an automatic red-teaming framework that mines in-the-wild user-chatbot interactions to discover 5.7K unique clusters of novel jailbreak tactics, and then composes selections of multiple mined tactics for systematic exploration of novel and even more challenging jailbreaks.\nCompared to prior work that performed red-teaming via recruited human workers, gradient-based optimization, or iterative revision with large language models (LLMs), our work investigates jailbreaks from chatbot users in-the-wild who were not specifically instructed to break the system. WildTeaming reveals previously unidentified vulnerabilities of frontier LLMs, resulting in more diverse and successful adversarial attacks compared to state-of-the-art jailbreaking methods. \n\nWhile there exist many datasets for jailbreak evaluation, very few open-source datasets exist for jailbreak training, as safety training data has been closed among all frontier models even when their weights are open. Therefore, with WildTeaming we create WildJailbreak, a large-scale open-source synthetic safety dataset with 262K vanilla (direct request) and adversarial (complex jailbreak) prompt-response pairs. In order to mitigate exaggerated safety behaviors, WildJailbreak provides two contrastive types of queries: 1) harmful queries (both vanilla and adversarial) and 2) benign queries that resemble harmful queries in form but contain no harmful intent. As WildJailbreak considerably upgrades the quality and scale of existing safety resources, it uniquely enables us to examine the scaling effects of data and the interplay of data properties and model capabilities during safety training. Through extensive model training and evaluations, we identify the training properties that enable an ideal balance of safety behaviors: appropriate safeguarding without over-refusal, effective handling of both vanilla and adversarial queries, and minimal, if any, decrease in general capabilities. All the components of WildJailbreak contribute to achieving balanced safety behaviors of models", "keywords": "Red-teaming;AI Safety;Safety Training;LLM Defense;Safety Training Data;Adversarial Training;Adversarial Attacks;Jailbreak", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Liwei Jiang;Kavel Rao;Seungju Han;Allyson Ettinger;Faeze Brahman;Sachin Kumar;Niloofar Mireshghallah;Ximing Lu;Maarten Sap;Yejin Choi;Nouha Dziri", "authorids": "~Liwei_Jiang2;~Kavel_Rao1;~Seungju_Han2;~Allyson_Ettinger1;~Faeze_Brahman1;~Sachin_Kumar1;~Niloofar_Mireshghallah1;~Ximing_Lu1;~Maarten_Sap1;~Yejin_Choi1;~Nouha_Dziri2", "gender": "F;M;M;F;F;M;;F;M;F;", "homepage": "https://liweijiang.me;http://kavelrao.dev;https://seungjuhan.me;https://aetting.github.io;https://fabrahman.github.io;https://shocheen.com;;https://gloriaximinglu.github.io/;http://maartensap.com;https://yejinc.github.io/;", "dblp": ";;;165/0758;276/6005;31/4484-9;;24/10879;153/9519;89/579-1;", "google_scholar": "lcPsDgUAAAAJ;;g_anRqAAAAAJ;;viCG2ikAAAAJ;qO38fRIAAAAJ;;https://scholar.google.com/citations?hl=en;gFN4QUYAAAAJ;vhP-tlcAAAAJ;", "orcid": ";;;;;;;;;;", "linkedin": ";;seungju-han-66b85017a/;;;;;;;;", "or_profile": "~Liwei_Jiang2;~Kavel_Rao1;~Seungju_Han2;~Allyson_Ettinger1;~Faeze_Brahman1;~Sachin_Kumar1;~Niloofar_Mireshghallah1;~Ximing_Lu1;~Maarten_Sap1;~Yejin_Choi1;~Nouha_Dziri2", "aff": "University of Washington;Department of Computer Science, University of Washington;Seoul National University;Allen Institute for Artificial Intelligence;Allen Institute for AI;Allen Institute for Artificial Intelligence;;University of Washington;Carnegie Mellon University;Department of Computer Science, University of Washington;", "aff_domain": "washington.edu;cs.washington.edu;snu.ac.kr;allenai.org;allenai.org;allenai.org;;cs.washington.edu;cmu.edu;cs.washington.edu;", "position": "PhD student;Undergrad student;Undergrad student;Researcher;Postdoc;Postdoc;;PhD student;Assistant Professor;Full Professor;", "bibtex": "@inproceedings{\njiang2024wildteaming,\ntitle={WildTeaming at Scale: From In-the-Wild Jailbreaks to (Adversarially) Safer Language Models},\nauthor={Liwei Jiang and Kavel Rao and Seungju Han and Allyson Ettinger and Faeze Brahman and Sachin Kumar and Niloofar Mireshghallah and Ximing Lu and Maarten Sap and Yejin Choi and Nouha Dziri},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=n5R6TvBVcX}\n}", "github": "", "reviewers": "idQi;e44H;wDdc;ZrdC", "pdf_size": 14314856, "rating": "5;5;6;7", "confidence": "5;4;4;3", "soundness": "3;3;3;3", "novelty": "3;3;4;3", "presentation": "2;3;3;2", "wc_summary": "114;78;77;76", "wc_strengths": "239;77;87;68", "wc_weaknesses": "448;54;191;244", "wc_questions": "403;83;56;22", "wc_limitations": "331;52;18;14", "wc_review": "1535;344;429;424", "wc_reply_reviewers": "0;0;11;330", "wc_reply_authors": "0;0;58;1464", "reply_reviewers": "0;0;1;2", "reply_authors": "1;1;2;5", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 86.25, 16.037066439969625 ], "wc_strengths_avg": [ 117.75, 70.32558211632521 ], "wc_weaknesses_avg": [ 234.25, 141.5492405490047 ], "wc_questions_avg": [ 141.0, 152.80215967060153 ], "wc_limitations_avg": [ 103.75, 132.03100961516577 ], "wc_review_avg": [ 683.0, 493.0572989014563 ], "wc_reply_reviewers_avg": [ 85.25, 141.3778182742965 ], "wc_reply_authors_avg": [ 380.5, 626.0069887788794 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.25, 1.6393596310755 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": -0.8528028654224417, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1053039073690687050&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 5, "email": "washington.edu;cs.washington.edu;snu.ac.kr;allenai.org;allenai.org;allenai.org;;cs.washington.edu;cmu.edu;cs.washington.edu;", "author_num": 11, "aff_unique_index": "0;0;1;2;3;2;0;4;0", "aff_unique_norm": "University of Washington;Seoul National University;Allen Institute for Artificial Intelligence;Allen Institute for AI;Carnegie Mellon University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.washington.edu;https://www.snu.ac.kr;https://allenai.org;https://allenai.org;https://www.cmu.edu", "aff_unique_abbr": "UW;SNU;AI2;AI2;CMU", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Seattle", "aff_country_unique_index": "0;0;1;0;0;0;0;0;0", "aff_country_unique": "United States;South Korea" }, { "title": "Evidential Mixture Machines: Deciphering Multi-Label Correlations for Active Learning Sensitivity", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93715", "id": "n5lLSskwtu", "proceeding": "", "pdf": "https://openreview.net/pdf?id=n5lLSskwtu", "openreview": "https://openreview.net/forum?id=n5lLSskwtu", "poster": "/media/PosterPDFs/NeurIPS%202024/93715.png?t=1733242957.6265674", "project": "", "author_site": "Dayou Yu, Minghao Li, Weishi Shi, Qi Yu", "tldr": "", "abstract": "Multi-label active learning is a crucial yet challenging area in contemporary machine learning, often complicated by a large and sparse label space. This challenge is further exacerbated in active learning scenarios where labeling resources are constrained. Drawing inspiration from existing mixture of Bernoulli models, which efficiently compress the label space into a more manageable weight coefficient space by learning correlated Bernoulli components, we propose a novel model called Evidential Mixture Machines (EMM). Our model leverages mixture components derived from unsupervised learning in the label space and improves prediction accuracy by predicting weight coefficients following the evidential learning paradigm. These coefficients are aggregated as proxy pseudo counts to enhance component offset predictions. The evidential learning approach provides an uncertainty-aware connection between input features and the predicted coefficients and components. Additionally, our method combines evidential uncertainty with predicted label embedding covariances for active sample selection, creating a richer, multi-source uncertainty metric beyond traditional uncertainty scores. Experiments on synthetic datasets show the effectiveness of evidential uncertainty prediction and EMM's capability to capture label correlations through predicted components. Further testing on real-world datasets demonstrates improved performance compared to existing multi-label active learning methods.", "keywords": "Active learning;multi-label classification", "primary_area": "active_learning", "supplementary_material": "", "author": "Dayou Yu;Minghao Li;Weishi Shi;Qi Yu", "authorids": "~Dayou_Yu1;~Minghao_Li12;~Weishi_Shi2;~Qi_Yu1", "gender": ";M;M;M", "homepage": "https://people.rit.edu/~dy2507/;;http://www.linkedin.com/in/weishi-shi-9b5b89b4;https://www.rit.edu/mining/", "dblp": "319/4611;;202/1055;58/6957-1", "google_scholar": "Obh2NOwAAAAJ;;;L3gWdfEAAAAJ", "orcid": "0009-0002-2373-4907;;;0000-0002-0426-5407", "linkedin": ";minghao-li-72312a30a;;", "or_profile": "~Dayou_Yu1;~Minghao_Li12;~Weishi_Shi2;~Qi_Yu1", "aff": "Rochester Institute of Technology;University of North Texas;University of North Texas;Rochester Institute of Technology", "aff_domain": "rit.edu;unt.edu;unt.edu;rit.edu", "position": "PhD student;PhD student;Assistant Professor;Professor", "bibtex": "@inproceedings{\nyu2024evidential,\ntitle={Evidential Mixture Machines: Deciphering Multi-Label Correlations for Active Learning Sensitivity},\nauthor={Dayou Yu and Minghao Li and Weishi Shi and Qi Yu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=n5lLSskwtu}\n}", "github": "", "reviewers": "9WcB;fLE1;dBCp;FbZQ", "pdf_size": 965022, "rating": "5;5;5;6", "confidence": "3;2;4;2", "soundness": "2;2;3;3", "novelty": "2;2;2;3", "presentation": "2;2;3;3", "wc_summary": "63;56;54;99", "wc_strengths": "49;26;99;109", "wc_weaknesses": "193;34;99;49", "wc_questions": "59;157;5;81", "wc_limitations": "1;10;13;14", "wc_review": "365;283;270;352", "wc_reply_reviewers": "23;20;13;49", "wc_reply_authors": "26;84;256;34", "reply_reviewers": "1;1;1;1", "reply_authors": "2;3;2;2", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 68.0, 18.207141456033124 ], "wc_strengths_avg": [ 70.75, 34.412025514346 ], "wc_weaknesses_avg": [ 93.75, 62.1505229261991 ], "wc_questions_avg": [ 75.5, 54.577925940805045 ], "wc_limitations_avg": [ 9.5, 5.123475382979799 ], "wc_review_avg": [ 317.5, 41.512046444375635 ], "wc_reply_reviewers_avg": [ 26.25, 13.626720074911644 ], "wc_reply_authors_avg": [ 100.0, 92.76852914647294 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:66Xk_KV_WxIJ:scholar.google.com/&scioq=Evidential+Mixture+Machines:+Deciphering+Multi-Label+Correlations+for+Active+Learning+Sensitivity&hl=en&as_sdt=0,44", "gs_version_total": 0, "email": "rit.edu;unt.edu;unt.edu;rit.edu", "author_num": 4, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Rochester Institute of Technology;University of North Texas", "aff_unique_dep": ";", "aff_unique_url": "https://www.rit.edu;https://www.unt.edu", "aff_unique_abbr": "RIT;UNT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Hyperbolic Embeddings of Supervised Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93714", "id": "n60xBFZWrk", "proceeding": "", "pdf": "https://openreview.net/pdf?id=n60xBFZWrk", "openreview": "https://openreview.net/forum?id=n60xBFZWrk", "poster": "/media/PosterPDFs/NeurIPS%202024/93714.png?t=1731245060.146072", "project": "", "author_site": "Richard Nock, Ehsan Amid, Frank Nielsen, Alexander Soen, Manfred Warmuth", "tldr": "", "abstract": "Models of hyperbolic geometry have been successfully used in ML for two main tasks: embedding *models* in unsupervised learning (*e.g.* hierarchies) and embedding *data*. \nTo our knowledge, there are no approaches that provide embeddings for supervised models; even when hyperbolic geometry provides convenient properties for expressing popular hypothesis classes, such as decision trees (and ensembles).\nIn this paper, we propose a full-fledged solution to the problem in three independent contributions. The first linking the theory of losses for class probability estimation to hyperbolic embeddings in Poincar\\'e disk model. The second resolving an issue for a clean, unambiguous embedding of (ensembles of) decision trees in this model. The third showing how to smoothly tweak the Poincar\\'e hyperbolic distance to improve its encoding and visualization properties near the border of the disk, a crucial region for our application, while keeping hyperbolicity.\nThis last step has substantial independent interest as it is grounded in a generalization of Leibniz-Newton's fundamental Theorem of calculus.", "keywords": "Hyperbolic geometry;supervised model embedding;decision trees;boosting", "primary_area": "learning_theory", "supplementary_material": "/attachment/029dfc73d375c21608ec7602bc715ab553f0f50b.zip", "author": "Richard Nock;Ehsan Amid;Frank Nielsen;Alexander Soen;Manfred K Warmuth", "authorids": "~Richard_Nock1;~Ehsan_Amid1;~Frank_Nielsen1;~Alexander_Soen1;~Manfred_K_Warmuth1", "gender": ";M;M;M;M", "homepage": "http://users.cecs.anu.edu.au/~rnock/;https://sites.google.com/corp/view/eamid/;https://franknielsen.github.io/;https://alexandersoen.github.io/;https://mwarmuth.bitbucket.io/", "dblp": "n/RichardNock;142/5754;http://dblp.uni-trier.de/pers/hd/n/Nielsen:Frank;245/9661.html;w/ManfredKWarmuth.html", "google_scholar": "https://scholar.google.fr/citations?user=0J2s3YQAAAAJ;https://scholar.google.fi/citations?user=F6omR3gAAAAJ;c-cuO9cAAAAJ;apRX4awAAAAJ;LR6kjO4AAAAJ", "orcid": ";;0000-0001-5728-0726;;", "linkedin": ";ehsan-amid-63aba754;;;", "or_profile": "~Richard_Nock1;~Ehsan_Amid1;~Frank_Nielsen1;~Alexander_Soen1;~Manfred_K_Warmuth1", "aff": "Google Research;Google DeepMind;Sony Computer Science Laboratories Inc (Tokyo);Australian National University;Google Research", "aff_domain": "google.com;google.com;sonycsl.co.jp;anu.edu.au;google.com", "position": "Researcher;Research Scientist;Fellow;PhD student;Principal Researcher", "bibtex": "@inproceedings{\nnock2024hyperbolic,\ntitle={Hyperbolic Embeddings of Supervised Models},\nauthor={Richard Nock and Ehsan Amid and Frank Nielsen and Alexander Soen and Manfred K Warmuth},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=n60xBFZWrk}\n}", "github": "", "reviewers": "Ry4X;j9dG;t8YB;5SUh", "pdf_size": 7514994, "rating": "4;5;5;6", "confidence": "3;2;3;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;1;3;3", "wc_summary": "45;91;91;331", "wc_strengths": "66;76;112;118", "wc_weaknesses": "150;283;91;137", "wc_questions": "41;87;86;1", "wc_limitations": "1;13;17;1", "wc_review": "303;550;397;588", "wc_reply_reviewers": "31;262;25;26", "wc_reply_authors": "247;259;0;266", "reply_reviewers": "1;2;1;1", "reply_authors": "2;2;1;2", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 139.5, 112.14611005291268 ], "wc_strengths_avg": [ 93.0, 22.38302928559939 ], "wc_weaknesses_avg": [ 165.25, 71.4295982069058 ], "wc_questions_avg": [ 53.75, 35.67474596966319 ], "wc_limitations_avg": [ 8.0, 7.14142842854285 ], "wc_review_avg": [ 459.5, 115.21827112051282 ], "wc_reply_reviewers_avg": [ 86.0, 101.63906729206049 ], "wc_reply_authors_avg": [ 193.0, 111.63556780883053 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4039489431062588913&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "google.com;google.com;sonycsl.co.jp;anu.edu.au;google.com", "author_num": 5, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Google;Sony Computer Science Laboratories Inc;Australian National University", "aff_unique_dep": "Google Research;;", "aff_unique_url": "https://research.google;https://www.sony.net/;https://www.anu.edu.au", "aff_unique_abbr": "Google Research;Sony CSL;ANU", "aff_campus_unique_index": "0;2;0", "aff_campus_unique": "Mountain View;;Tokyo", "aff_country_unique_index": "0;1;2;3;0", "aff_country_unique": "United States;United Kingdom;Japan;Australia" }, { "title": "The FineWeb Datasets: Decanting the Web for the Finest Text Data at Scale", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97513", "id": "n6SCkn2QaG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=n6SCkn2QaG", "openreview": "https://openreview.net/forum?id=n6SCkn2QaG", "poster": "", "project": "", "author_site": "Guilherme Penedo, Hynek Kydl\u00ed\u010dek, Loubna Ben allal, Anton Lozhkov, Margaret Mitchell, Colin Raffel, Leandro Von Werra, Thomas Wolf", "tldr": "", "abstract": "The performance of a large language model (LLM) depends heavily on the quality and size of its pretraining dataset. However, the pretraining datasets for state-of-the-art open LLMs like Llama 3 and Mixtral are not publicly available and very little is known about how they were created. In this work, we introduce FineWeb, a 15-trillion token dataset derived from 96 Common Crawl snapshots that produces better-performing LLMs than other open pretraining datasets. To advance the understanding of how best to curate high-quality pretraining datasets, we carefully document and ablate all of the design choices used in FineWeb, including in-depth investigations of deduplication and filtering strategies. In addition, we introduce FineWeb-Edu, a 1.3-trillion token collection of educational text filtered from FineWeb. LLMs pretrained on FineWeb-Edu exhibit dramatically better performance on knowledge- and reasoning-intensive benchmarks like MMLU and ARC. Along with our datasets, we publicly release our data curation codebase and all of the models trained during our ablation experiments.", "keywords": "dataset;pretraining;web data;machine learning", "primary_area": "", "supplementary_material": "/attachment/9bc80f042e64551556faf3b75e8ad3c1a25ec5d7.pdf", "author": "Guilherme Penedo;Hynek Kydl\u00ed\u010dek;Loubna Ben allal;Anton Lozhkov;Margaret Mitchell;Colin Raffel;Leandro Von Werra;Thomas Wolf", "authorids": "~Guilherme_Penedo1;~Hynek_Kydl\u00ed\u010dek1;~Loubna_Ben_allal1;~Anton_Lozhkov1;~Margaret_Mitchell3;~Colin_Raffel1;~Leandro_Von_Werra1;~Thomas_Wolf1", "gender": "M;M;F;;F;;M;M", "homepage": "https://github.com/guipenedo;https://me.hynky.name/;https://loubnabnl.github.io/;;http://m-mitchell.com;http://colinraffel.com;https://github.com/lvwerra;https://thomwolf.io", "dblp": ";;;;56/2856;149/0082;223/1855;", "google_scholar": "L-jmoJYAAAAJ;;reU1i-sAAAAJ;xlMMVCAAAAAJ;5na92fcAAAAJ;I66ZBYwAAAAJ;https://scholar.google.com/citations?hl=en;D2H5EFEAAAAJ", "orcid": ";;;;;;;", "linkedin": ";;https://www.linkedin.com/mwlite/in/loubna-ben-allal-238690152;anton-lozhkov/;margaret-mitchell-9b13429/;;lvwerra/;", "or_profile": "~Guilherme_Penedo1;~Hynek_Kydl\u00ed\u010dek1;~Loubna_Ben_allal1;~Anton_Lozhkov1;~Margaret_Mitchell3;~Colin_Raffel1;~Leandro_Von_Werra1;~Thomas_Wolf1", "aff": "HuggingFace;Huggingface;Hugging Face;Hugging Face;;Hugging Face;Hugging Face;Hugging Face", "aff_domain": "huggingface.co;huggingface.co;hugggingface.co;huggingface.co;;huggingface.co;hf.co;huggingface.co", "position": "Researcher;Intern;Researcher;Machine Learning Engineer;;Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\npenedo2024the,\ntitle={The FineWeb Datasets: Decanting the Web for the Finest Text Data at Scale},\nauthor={Guilherme Penedo and Hynek Kydl{\\'\\i}{\\v{c}}ek and Loubna Ben allal and Anton Lozhkov and Margaret Mitchell and Colin Raffel and Leandro Von Werra and Thomas Wolf},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=n6SCkn2QaG}\n}", "github": "", "reviewers": "RcqR;DjLJ;gcLr;rXCs", "pdf_size": 3214844, "rating": "7;7;7;8", "confidence": "4;5;3;5", "wc_summary_and_contributions": "55;103;72;85", "wc_strengths": "121;65;25;27", "wc_improvement": "150;113;20;61", "wc_limitations": "13;51;1;162", "wc_correctness": "3;14;4;15", "wc_clarity": "6;165;1;5", "wc_relation_to_prior_work": "25;28;6;9", "wc_documentation": "12;16;16;23", "wc_additional_feedback": "1;1;1;1", "wc_review": "386;556;146;388", "wc_reply_reviewers": "0;0;0;126", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "wc_summary_and_contributions_avg": [ 78.75, 17.583728273605686 ], "wc_strengths_avg": [ 59.5, 38.91978931083774 ], "wc_improvement_avg": [ 86.0, 49.512624652708524 ], "wc_limitations_avg": [ 56.75, 63.507381460740454 ], "wc_correctness_avg": [ 9.0, 5.522680508593631 ], "wc_clarity_avg": [ 44.25, 69.7401426726387 ], "wc_relation_to_prior_work_avg": [ 17.0, 9.617692030835672 ], "wc_documentation_avg": [ 16.75, 3.960744879438715 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 369.0, 146.0719001040241 ], "wc_reply_reviewers_avg": [ 31.5, 54.559600438419636 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 86, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12077055439257234910&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "huggingface.co;huggingface.co;hugggingface.co;huggingface.co;;huggingface.co;hf.co;huggingface.co", "author_num": 8, "aff_unique_index": "0;0;1;1;1;1;1", "aff_unique_norm": "Huggingface;Hugging Face", "aff_unique_dep": ";", "aff_unique_url": "https://huggingface.co;https://huggingface.co", "aff_unique_abbr": "HuggingFace;Hugging Face", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Few-Shot Adversarial Prompt Learning on Vision-Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93713", "id": "n9xVaQMJNK", "proceeding": "", "pdf": "https://openreview.net/pdf?id=n9xVaQMJNK", "openreview": "https://openreview.net/forum?id=n9xVaQMJNK", "poster": "/media/PosterPDFs/NeurIPS%202024/93713.png?t=1731378167.3586323", "project": "", "author_site": "Yiwei Zhou, Xiaobo Xia, Zhiwei Lin, Bo Han, Tongliang Liu", "tldr": "", "abstract": "The vulnerability of deep neural networks to imperceptible adversarial perturbations has attracted widespread attention. Inspired by the success of vision-language foundation models, previous efforts achieved zero-shot adversarial robustness by aligning adversarial visual features with text supervision. However, in practice, they are still unsatisfactory due to several issues, including heavy adaptation cost, suboptimal text supervision, and uncontrolled natural generalization capacity. In this paper, to address these issues, we propose a few-shot adversarial prompt framework where adapting input sequences with limited data makes significant adversarial robustness improvement. Specifically, we achieve this by providing adversarially correlated text supervision that is end-to-end learned from adversarial examples. We also propose a novel training objective that enhances the consistency of multi-modal features while encourages differentiated uni-modal features between natural and adversarial examples. The proposed framework gives access to learn adversarial text supervision, which provides superior cross-modal adversarial alignment and matches state-of-the-art zero-shot adversarial robustness with only 1\\% training data. Code is available at: https://github.com/lionel-w2/FAP.", "keywords": "Adversarial Robustness;Prompt Learning;Vision-Language Models", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/8e0e03e69725e20fbb54ff650ededbc7423c6235.zip", "author": "Yiwei Zhou;Xiaobo Xia;Zhiwei Lin;Bo Han;Tongliang Liu", "authorids": "~Yiwei_Zhou2;~Xiaobo_Xia1;~Zhiwei_Lin3;~Bo_Han1;~Tongliang_Liu1", "gender": "M;M;F;M;M", "homepage": ";https://xiaoboxia.github.io/;https://ac.bit.edu.cn/szdw/jsml/znxxclykzyjs1/f7b90bc12ff14897bf00a110ab3ee4ab.htm;https://tongliang-liu.github.io/;https://bhanml.github.io/", "dblp": "139/1191;242/8072;;150/6667;241/0472-3", "google_scholar": ";jRsugY0AAAAJ;https://scholar.google.com/citations?view_op=list_works;https://scholar.google.com.au/citations?user=EiLdZ_YAAAAJ;nTNjqHwAAAAJ", "orcid": "0000-0001-8291-1884;;0000-0003-4426-1221;;", "linkedin": ";;;;", "or_profile": "~Yiwei_Zhou2;~Xiaobo_Xia1;~Zhiwei_Lin3;~Tongliang_Liu1;~bo_han2", "aff": "Beijing Institute of Technology;The University of Sydney;Beijing Institute of Technology;Mohamed bin Zayed University of Artificial Intelligence;MBZUAI", "aff_domain": "bit.edu.cn;sydney.edu.au;bit.edu.cn;mbzuai.ac.ae;mbzuai.ac.ae", "position": "MS student;PhD student;Associate Professor;Affiliated Associate Professor;Researcher", "bibtex": "@inproceedings{\nzhou2024fewshot,\ntitle={Few-Shot Adversarial Prompt Learning on Vision-Language Models},\nauthor={Yiwei Zhou and Xiaobo Xia and Zhiwei Lin and Bo Han and Tongliang Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=n9xVaQMJNK}\n}", "github": "", "reviewers": "KV22;Rdwb;Lycp;M4S3", "pdf_size": 11360343, "rating": "5;6;6;7", "confidence": "3;2;4;4", "soundness": "2;2;3;3", "novelty": "2;3;3;3", "presentation": "2;2;3;3", "wc_summary": "99;90;116;115", "wc_strengths": "21;126;62;95", "wc_weaknesses": "89;263;52;57", "wc_questions": "52;40;16;35", "wc_limitations": "7;2;1;54", "wc_review": "268;521;247;356", "wc_reply_reviewers": "25;89;4;41", "wc_reply_authors": "97;32;12;44", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 105.0, 10.977249200050075 ], "wc_strengths_avg": [ 76.0, 38.99358921669048 ], "wc_weaknesses_avg": [ 115.25, 86.47651415268773 ], "wc_questions_avg": [ 35.75, 12.968712349342937 ], "wc_limitations_avg": [ 16.0, 22.056745000112777 ], "wc_review_avg": [ 348.0, 107.92821688511305 ], "wc_reply_reviewers_avg": [ 39.75, 31.315930450810495 ], "wc_reply_authors_avg": [ 46.25, 31.45135132232 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.4264014327112209, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16289353341082820841&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "bit.edu.cn;sydney.edu.au;bit.edu.cn;mbzuai.ac.ae;mbzuai.ac.ae", "author_num": 5, "aff_unique_index": "0;1;0;2;2", "aff_unique_norm": "Beijing Institute of Technology;University of Sydney;Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": ";;", "aff_unique_url": "http://www.bit.edu.cn/;https://www.sydney.edu.au;https://mbzuai.ac.ae", "aff_unique_abbr": "BIT;USYD;MBZUAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;2;2", "aff_country_unique": "China;Australia;United Arab Emirates" }, { "title": "Recurrent Reinforcement Learning with Memoroids", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93712", "id": "nA4Q983a1v", "proceeding": "", "pdf": "https://openreview.net/pdf?id=nA4Q983a1v", "openreview": "https://openreview.net/forum?id=nA4Q983a1v", "poster": "/media/PosterPDFs/NeurIPS%202024/93712.png?t=1732983170.3953779", "project": "", "author_site": "Steven Morad, Chris Lu, Ryan Kortvelesy, Stephan Liwicki, Jakob Foerster, Amanda Prorok", "tldr": "", "abstract": "Memory models such as Recurrent Neural Networks (RNNs) and Transformers address Partially Observable Markov Decision Processes (POMDPs) by mapping trajectories to latent Markov states. Neither model scales particularly well to long sequences, especially compared to an emerging class of memory models called Linear Recurrent Models. We discover that the recurrent update of these models resembles a monoid, leading us to reformulate existing models using a novel monoid-based framework that we call memoroids. We revisit the traditional approach to batching in recurrent reinforcement learning, highlighting theoretical and empirical deficiencies. We leverage memoroids to propose a batching method that improves sample efficiency, increases the return, and simplifies the implementation of recurrent loss functions in reinforcement learning.", "keywords": "POMDP;reinforcement learning;memory models;recurrent neural network", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/c1bbdc4e84c73bf23d26b078588310eddfa63161.zip", "author": "Steven Morad;Chris Lu;Ryan Kortvelesy;Stephan Liwicki;Jakob Nicolaus Foerster;Amanda Prorok", "authorids": "~Steven_Morad1;~Chris_Lu1;~Ryan_Kortvelesy1;~Stephan_Liwicki3;~Jakob_Nicolaus_Foerster1;~Amanda_Prorok1", "gender": "M;;M;;M;", "homepage": "http://www.dangersteve.com/home;;;;https://www.jakobfoerster.com;", "dblp": "247/9311;77/9579;289/0863;;176/5095;", "google_scholar": "KvCgriAAAAAJ;4WLoIRsAAAAJ;fMxXjiIAAAAJ;;6z4lQzMAAAAJ;", "orcid": "0000-0002-8413-2953;;0000-0001-6654-0796;;;", "linkedin": ";;;;;", "or_profile": "~Steven_Morad1;~Chris_Lu1;~Ryan_Kortvelesy1;~Stephan_Liwicki3;~Jakob_Nicolaus_Foerster1;~Amanda_Prorok1", "aff": "University of Cambridge;University of Oxford;;;University of Oxford, University of Oxford;", "aff_domain": "cam.ac.uk;ox.ac.uk;;;eng.ox.ac.uk;", "position": "PhD student;PhD student;;;Associate Professor;", "bibtex": "@inproceedings{\nmorad2024recurrent,\ntitle={Recurrent Reinforcement Learning with Memoroids},\nauthor={Steven Morad and Chris Lu and Ryan Kortvelesy and Stephan Liwicki and Jakob Nicolaus Foerster and Amanda Prorok},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=nA4Q983a1v}\n}", "github": "", "reviewers": "WNca;hZNa;M71j;KMv5", "pdf_size": 3266236, "rating": "5;6;6;7", "confidence": "4;3;3;4", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "2;2;2;4", "wc_summary": "110;78;151;93", "wc_strengths": "84;40;117;57", "wc_weaknesses": "520;125;168;57", "wc_questions": "43;74;86;128", "wc_limitations": "1;18;9;9", "wc_review": "758;335;531;344", "wc_reply_reviewers": "522;263;60;52", "wc_reply_authors": "263;344;96;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 108.0, 27.285527299284507 ], "wc_strengths_avg": [ 74.5, 29.124731758421397 ], "wc_weaknesses_avg": [ 217.5, 179.07610114138626 ], "wc_questions_avg": [ 82.75, 30.474374480865066 ], "wc_limitations_avg": [ 9.25, 6.015604707757983 ], "wc_review_avg": [ 492.0, 172.35863772958987 ], "wc_reply_reviewers_avg": [ 224.25, 191.5755399313806 ], "wc_reply_authors_avg": [ 175.75, 135.24861367126837 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:sCsg3baikzEJ:scholar.google.com/&scioq=Recurrent+Reinforcement+Learning+with+Memoroids&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "cam.ac.uk;ox.ac.uk;;;eng.ox.ac.uk;", "author_num": 6, "aff_unique_index": "0;1;1", "aff_unique_norm": "University of Cambridge;University of Oxford", "aff_unique_dep": ";", "aff_unique_url": "https://www.cam.ac.uk;https://www.ox.ac.uk", "aff_unique_abbr": "Cambridge;Oxford", "aff_campus_unique_index": "0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "MARPLE: A Benchmark for Long-Horizon Inference", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97512", "id": "nAFBHoMpQs", "proceeding": "", "pdf": "https://openreview.net/pdf?id=nAFBHoMpQs", "openreview": "https://openreview.net/forum?id=nAFBHoMpQs", "poster": "/media/PosterPDFs/NeurIPS%202024/97512.png?t=1733465818.786301", "project": "", "author_site": "Emily Jin, Zhuoyi Huang, Jan-Philipp Fraenken, Weiyu Liu, Hannah Cha, Erik Brockbank, Sarah Wu, Ruohan Zhang, Jiajun Wu, Tobias Gerstenberg", "tldr": "", "abstract": "Reconstructing past events requires reasoning across long time horizons. To figure out what happened, humans draw on prior knowledge about the world and human behavior and integrate insights from various sources of evidence including visual, language, and auditory cues. We introduce MARPLE, a benchmark for evaluating long-horizon inference capabilities using multi-modal evidence. Our benchmark features agents interacting with simulated households, supporting vision, language, and auditory stimuli, as well as procedurally generated environments and agent behaviors. Inspired by classic ``whodunit'' stories, we ask AI models and human participants to infer which agent caused a change in the environment based on a step-by-step replay of what actually happened. The goal is to correctly identify the culprit as early as possible. Our findings show that human participants outperform both traditional Monte Carlo simulation methods and an LLM baseline (GPT-4) on this task. Compared to humans, traditional inference models are less robust and performant, while GPT-4 has difficulty comprehending environmental changes. We analyze factors influencing inference performance and ablate different modes of evidence, finding that all modes are valuable for performance. Overall, our experiments demonstrate that the long-horizon, multimodal inference tasks in our benchmark present a challenge to current models. Project website: https://marple-benchmark.github.io/.", "keywords": "machine learning;long-horizon inference;benchmark", "primary_area": "", "supplementary_material": "/attachment/66d748f17232272a841f6d0dc256aac4dac84a96.pdf", "author": "Emily Jin;Zhuoyi Huang;Jan-Philipp Fr\u00e4nken;Weiyu Liu;Hannah Cha;Erik Brockbank;Sarah A Wu;Ruohan Zhang;Jiajun Wu;Tobias Gerstenberg", "authorids": "~Emily_Jin1;~Zhuoyi_Huang1;~Jan-Philipp_Fr\u00e4nken1;~Weiyu_Liu1;~Hannah_Cha1;~Erik_Brockbank1;~Sarah_A_Wu1;~Ruohan_Zhang1;~Jiajun_Wu1;~Tobias_Gerstenberg1", "gender": "F;F;;M;F;M;;M;M;M", "homepage": ";;https://janphilippfranken.github.io/;http://weiyuliu.com/;https://www.hannahcha.com/;;;https://ai.stanford.edu/~zharu/;https://jiajunwu.com;http://cicl.stanford.edu/member/tobias_gerstenberg", "dblp": ";;;133/0311.html;389/5581.html;;;;117/4768;", "google_scholar": ";;s2omqQcAAAAJ;PHi0YEQAAAAJ;O3s0H5AAAAAJ;N2ZY9xsAAAAJ;;-bqvNWoAAAAJ;2efgcS0AAAAJ;d0TfP8EAAAAJ", "orcid": ";;0000-0001-5467-1887;;;;;;0000-0002-4176-343X;0000-0002-9162-0779", "linkedin": "emily-jin-020/;zhuoyi-huang/;;;hannah-cha-b15b6b230/;;;;jiajunwu/;", "or_profile": "~Emily_Jin1;~Zhuoyi_Huang1;~Jan-Philipp_Fr\u00e4nken1;~Weiyu_Liu1;~Hannah_Cha1;~Erik_Brockbank1;~Sarah_A_Wu1;~Ruohan_Zhang1;~Jiajun_Wu1;~Tobias_Gerstenberg1", "aff": "Stanford University;Microsoft;Stanford University;Stanford University;Stanford University;Stanford University;;Stanford University;Stanford University;Stanford University", "aff_domain": "stanford.edu;microsoft.com;stanford.edu;stanford.edu;stanford.edu;stanford.edu;;stanford.edu;stanford.edu;stanford.edu", "position": "Undergrad student;Researcher;Postdoc;Postdoc;Undergrad student;Postdoc;;Postdoc;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\njin2024marple,\ntitle={{MARPLE}: A Benchmark for Long-Horizon Inference},\nauthor={Emily Jin and Zhuoyi Huang and Jan-Philipp Fr{\\\"a}nken and Weiyu Liu and Hannah Cha and Erik Brockbank and Sarah A Wu and Ruohan Zhang and Jiajun Wu and Tobias Gerstenberg},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=nAFBHoMpQs}\n}", "github": "", "reviewers": "8BjY;shNX;fKzP;AsPM", "pdf_size": 6897638, "rating": "6;7;7;8", "confidence": "4;3;4;4", "wc_summary_and_contributions": "70;96;74;63", "wc_strengths": "10;104;3;28", "wc_improvement": "29;126;3;6", "wc_limitations": "36;167;1;7", "wc_correctness": "1;115;1;20", "wc_clarity": "1;1;9;6", "wc_relation_to_prior_work": "1;99;1;5", "wc_documentation": "8;79;1;4", "wc_additional_feedback": "1;1;1;1", "wc_review": "157;788;94;140", "wc_reply_reviewers": "23;0;0;55", "wc_reply_authors": "100;119;118;115", "reply_reviewers": "1;0;0;1", "reply_authors": "3;3;3;3", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 75.75, 12.336429791475327 ], "wc_strengths_avg": [ 36.25, 40.16450547436131 ], "wc_improvement_avg": [ 41.0, 50.094909921068826 ], "wc_limitations_avg": [ 52.75, 67.2769462743368 ], "wc_correctness_avg": [ 34.25, 47.261903262564445 ], "wc_clarity_avg": [ 4.25, 3.418698582794336 ], "wc_relation_to_prior_work_avg": [ 26.5, 41.88973621306298 ], "wc_documentation_avg": [ 23.0, 32.42684073418192 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 294.75, 285.7090959350087 ], "wc_reply_reviewers_avg": [ 19.5, 22.544400635190993 ], "wc_reply_authors_avg": [ 113.0, 7.648529270389178 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 3.0, 0.0 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:t4FGc1iSgDkJ:scholar.google.com/&scioq=MARPLE:+A+Benchmark+for+Long-Horizon+Inference&hl=en&as_sdt=0,44", "gs_version_total": 6, "email": "stanford.edu;microsoft.com;stanford.edu;stanford.edu;stanford.edu;stanford.edu;;stanford.edu;stanford.edu;stanford.edu", "author_num": 10, "aff_unique_index": "0;1;0;0;0;0;0;0;0", "aff_unique_norm": "Stanford University;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.stanford.edu;https://www.microsoft.com", "aff_unique_abbr": "Stanford;Microsoft", "aff_campus_unique_index": "0;0;0;0;0;0;0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Applying Guidance in a Limited Interval Improves Sample and Distribution Quality in Diffusion Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93711", "id": "nAIhvNy15T", "proceeding": "", "pdf": "https://openreview.net/pdf?id=nAIhvNy15T", "openreview": "https://openreview.net/forum?id=nAIhvNy15T", "poster": "/media/PosterPDFs/NeurIPS%202024/93711.png?t=1731409037.95878", "project": "", "author_site": "Tuomas Kynk\u00e4\u00e4nniemi, Miika Aittala, Tero Karras, Samuli Laine, Timo Aila, Jaakko Lehtinen", "tldr": "", "abstract": "Guidance is a crucial technique for extracting the best performance out of image-generating diffusion models. Traditionally, a constant guidance weight has been applied throughout the sampling chain of an image. We show that guidance is clearly harmful toward the beginning of the chain (high noise levels), largely unnecessary toward the end (low noise levels), and only beneficial in the middle. We thus restrict it to a specific range of noise levels, improving both the inference speed and result quality. This limited guidance interval improves the record FID in ImageNet-512 significantly, from 1.81 to 1.40. We show that it is quantitatively and qualitatively beneficial across different sampler parameters, network architectures, and datasets, including the large-scale setting of Stable Diffusion XL. We thus suggest exposing the guidance interval as a hyperparameter in all diffusion models that use guidance.", "keywords": "generative models;diffusion models;classifier-free guidance", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Tuomas Kynk\u00e4\u00e4nniemi;Miika Aittala;Tero Karras;Samuli Laine;Timo Aila;Jaakko Lehtinen", "authorids": "~Tuomas_Kynk\u00e4\u00e4nniemi1;~Miika_Aittala2;~Tero_Karras1;~Samuli_Laine1;~Timo_Aila1;~Jaakko_Lehtinen1", "gender": ";M;M;M;M;M", "homepage": ";https://people.csail.mit.edu/miika/;http://research.nvidia.com/person/tero-karras;https://users.aalto.fi/~laines9/;https://users.aalto.fi/~ailat1/;https://users.aalto.fi/~lehtinj7/", "dblp": "239/6466;;32/7864;51/226;95/2789;71/4075", "google_scholar": "https://scholar.google.fi/citations?user=7sATEtIAAAAJ;-_EKVQ0AAAAJ;https://scholar.google.fi/citations?user=-50qJW8AAAAJ;UCXJOTUAAAAJ;e7abmgkAAAAJ;https://scholar.google.fi/citations?user=Vpr6s3sAAAAJ", "orcid": ";;;0000-0002-0903-3197;;", "linkedin": ";;;;;", "or_profile": "~Tuomas_Kynk\u00e4\u00e4nniemi1;~Miika_Aittala2;~Tero_Karras1;~Samuli_Laine1;~Timo_Aila1;~Jaakko_Lehtinen1", "aff": "Aalto University;NVIDIA;NVIDIA;NVIDIA;NVIDIA;NVIDIA", "aff_domain": "aalto.fi;nvidia.com;nvidia.com;nvidia.com;nvidia.com;nvidia.com", "position": "PhD student;Senior Research Scientist;Distinguished Research Scientist;Distinguished Research Scientist;Distinguished Research Scientist;Distinguished Research Scientist", "bibtex": "@inproceedings{\nkynk{\\\"a}{\\\"a}nniemi2024applying,\ntitle={Applying Guidance in a Limited Interval Improves Sample and Distribution Quality in Diffusion Models},\nauthor={Tuomas Kynk{\\\"a}{\\\"a}nniemi and Miika Aittala and Tero Karras and Samuli Laine and Timo Aila and Jaakko Lehtinen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=nAIhvNy15T}\n}", "github": "", "reviewers": "2mKR;HBPT;JNG5", "pdf_size": 23236452, "rating": "3;6;7", "confidence": "5;4;5", "soundness": "3;3;4", "novelty": "1;3;2", "presentation": "3;3;4", "wc_summary": "22;72;94", "wc_strengths": "30;105;86", "wc_weaknesses": "64;186;257", "wc_questions": "2;75;150", "wc_limitations": "1;37;11", "wc_review": "119;475;598", "wc_reply_reviewers": "0;81;53", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;1;1", "reply_authors": "1;1;1", "rating_avg": [ 5.333333333333333, 1.699673171197595 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.0, 0.816496580927726 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 62.666666666666664, 30.12566274052001 ], "wc_strengths_avg": [ 73.66666666666667, 31.836387288063253 ], "wc_weaknesses_avg": [ 169.0, 79.7036176510619 ], "wc_questions_avg": [ 75.66666666666667, 60.422585916930835 ], "wc_limitations_avg": [ 16.333333333333332, 15.173075568988056 ], "wc_review_avg": [ 397.3333333333333, 203.11627759050288 ], "wc_reply_reviewers_avg": [ 44.666666666666664, 33.58901936976163 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.2773500981126146, "gs_citation": 48, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7920181224492512252&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "aalto.fi;nvidia.com;nvidia.com;nvidia.com;nvidia.com;nvidia.com", "author_num": 6, "aff_unique_index": "0;1;1;1;1;1", "aff_unique_norm": "Aalto University;NVIDIA", "aff_unique_dep": ";NVIDIA Corporation", "aff_unique_url": "https://www.aalto.fi;https://www.nvidia.com", "aff_unique_abbr": "Aalto;NVIDIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1;1", "aff_country_unique": "Finland;United States" }, { "title": "Generating Highly Designable Proteins with Geometric Algebra Flow Matching", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93710", "id": "nAnEStxyfy", "proceeding": "", "pdf": "https://openreview.net/pdf?id=nAnEStxyfy", "openreview": "https://openreview.net/forum?id=nAnEStxyfy", "poster": "/media/PosterPDFs/NeurIPS%202024/93710.png?t=1731676696.719574", "project": "", "author_site": "Simon Wagner, Leif Seute, Vsevolod Viliuga, Nicolas Wolf, Frauke Gr\u00e4ter, Jan St\u00fchmer", "tldr": "", "abstract": "We introduce a generative model for protein backbone design utilizing geometric products and higher order message passing. In particular, we propose Clifford Frame Attention (CFA), an extension of the invariant point attention (IPA) architecture from AlphaFold2, in which the backbone residue frames and geometric features are represented in the projective geometric algebra. This enables to construct geometrically expressive messages between residues, including higher order terms, using the bilinear operations of the algebra. We evaluate our architecture by incorporating it into the framework of FrameFlow, a state-of-the-art flow matching model for protein backbone generation. The proposed model achieves high designability, diversity and novelty, while also sampling protein backbones that follow the statistical distribution of secondary structure elements found in naturally occurring proteins, a property so far only insufficiently achieved by many state-of-the-art generative models.", "keywords": "Proteins; Flow Matching; Geometric Algebra; Generative models; Equivariant models; Protein design; AlphaFold; Local frames", "primary_area": "generative_models", "supplementary_material": "", "author": "Simon Wagner;Leif Seute;Vsevolod Viliuga;Nicolas Wolf;Frauke Gr\u00e4ter;Jan Stuehmer", "authorids": "~Simon_Wagner1;~Leif_Seute1;~Vsevolod_Viliuga1;~Nicolas_Wolf1;~Frauke_Gr\u00e4ter1;~Jan_Stuehmer1", "gender": "M;M;M;;F;M", "homepage": ";;https://github.com/ncyx;;https://www.h-its.org/research/mbm/;", "dblp": "152/4244;;;;35/9500;91/8483", "google_scholar": "j8LinAoAAAAJ;https://scholar.google.de/citations?user=BPCn0u0AAAAJ;;;https://scholar.google.de/citations?user=ctkiSbUAAAAJ;pGukv5YAAAAJ", "orcid": ";;;;0000-0003-2891-3381;0009-0002-0122-5482", "linkedin": "simon-wagner-91b192236;;ncyx/;nicolas-wolf-670245309/;;", "or_profile": "~Simon_Wagner1;~Leif_Seute1;~Vsevolod_Viliuga1;~Nicolas_Wolf1;~Frauke_Gr\u00e4ter1;~Jan_Stuehmer1", "aff": "Ruprecht-Karls-Universit\u00e4t Heidelberg;Heidelberg Institute for Theoretical Studies;Ruprecht-Karls-Universit\u00e4t Heidelberg;Ruprecht-Karls-Universit\u00e4t Heidelberg;Ruprecht-Karls-Universit\u00e4t Heidelberg;Karlsruhe Institute of Technology", "aff_domain": "uni-heidelberg.de;h-its.org;uni-heidelberg.de;uni-heidelberg.de;uni-heidelberg.de;kit.edu", "position": "MS student;PhD student;MS student;MS student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nwagner2024generating,\ntitle={Generating Highly Designable Proteins with Geometric Algebra Flow Matching},\nauthor={Simon Wagner and Leif Seute and Vsevolod Viliuga and Nicolas Wolf and Frauke Gr{\\\"a}ter and Jan Stuehmer},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=nAnEStxyfy}\n}", "github": "", "reviewers": "zVgj;yVxp;5KkW;ZJDu", "pdf_size": 4059479, "rating": "6;6;6;7", "confidence": "4;4;3;5", "soundness": "3;3;3;3", "novelty": "3;2;2;3", "presentation": "3;3;3;3", "wc_summary": "148;74;73;37", "wc_strengths": "147;35;58;52", "wc_weaknesses": "221;258;191;85", "wc_questions": "37;52;31;37", "wc_limitations": "16;1;6;6", "wc_review": "569;420;359;217", "wc_reply_reviewers": "100;33;16;20", "wc_reply_authors": "37;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 83.0, 40.37945021913993 ], "wc_strengths_avg": [ 73.0, 43.54882317583335 ], "wc_weaknesses_avg": [ 188.75, 64.42970976187927 ], "wc_questions_avg": [ 39.25, 7.75806032459145 ], "wc_limitations_avg": [ 7.25, 5.448623679425842 ], "wc_review_avg": [ 391.25, 126.31780357495138 ], "wc_reply_reviewers_avg": [ 42.25, 33.92915413033458 ], "wc_reply_authors_avg": [ 9.25, 16.021469970012117 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15719495733995468960&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "uni-heidelberg.de;h-its.org;uni-heidelberg.de;uni-heidelberg.de;uni-heidelberg.de;kit.edu", "author_num": 6, "aff_unique_index": "0;1;0;0;0;2", "aff_unique_norm": "Ruprecht-Karls-Universit\u00e4t Heidelberg;Heidelberg Institute for Theoretical Studies;Karlsruhe Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uni-heidelberg.de/;https://www.hits.org/;https://www.kit.edu", "aff_unique_abbr": "Uni Heidelberg;HITS;KIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "Germany" }, { "title": "UniTS: A Unified Multi-Task Time Series Model", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93709", "id": "nBOdYBptWW", "proceeding": "", "pdf": "https://openreview.net/pdf?id=nBOdYBptWW", "openreview": "https://openreview.net/forum?id=nBOdYBptWW", "poster": "", "project": "", "author_site": "Shanghua Gao, Teddy Koker, Owen Queen, Tom Hartvigsen, Theodoros Tsiligkaridis, Marinka Zitnik", "tldr": "", "abstract": "Although pre-trained transformers and reprogrammed text-based LLMs have shown strong performance on time series tasks, the best-performing architectures vary widely across tasks, with most models narrowly focused on specific areas, such as time series forecasting. Unifying predictive and generative time series tasks within a single model remains challenging. We introduce UniTS, a unified multi-task time series model that utilizes task tokenization to integrate predictive and generative tasks into a single framework. UniTS employs a modified transformer block to capture universal time series representations, enabling transferability from a heterogeneous, multi-domain pre-training dataset\u2014characterized by diverse dynamic patterns, sampling rates, and temporal scales\u2014to a wide range of downstream datasets with varied task specifications and data domains. Tested on 38 datasets across human activity sensors, healthcare, engineering, and finance, UniTS achieves superior performance compared to 12 forecasting models, 20 classification models, 18 anomaly detection models, and 16 imputation models, including adapted text-based LLMs. UniTS also demonstrates strong few-shot and prompt capabilities when applied to new domains and tasks. In single-task settings, UniTS outperforms competitive task-specialized time series models. Code and datasets are available at https://github.com/mims-harvard/UniTS.", "keywords": "Time Series Forecasting;Time Series Classification;Time Series Imputation;Time Series Anomaly Detection;Prompt Learning;Pretraining;Few-Shot Learning;Unified Model;Multi-task;Task Tokenization", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Shanghua Gao;Teddy Koker;Owen Queen;Thomas Hartvigsen;Theodoros Tsiligkaridis;Marinka Zitnik", "authorids": "~Shanghua_Gao1;~Teddy_Koker1;~Owen_Queen1;~Thomas_Hartvigsen1;~Theodoros_Tsiligkaridis1;~Marinka_Zitnik1", "gender": ";M;M;M;M;", "homepage": ";https://teddykoker.com;https://owencqueen.github.io/;https://www.tomhartvigsen.com;https://sites.google.com/view/theo-t;https://zitniklab.hms.harvard.edu", "dblp": ";283/5878;;211/5752;64/10412;53/11277.html", "google_scholar": ";br990A8AAAAJ;https://scholar.google.com/citations?hl=en;rIjeeRsAAAAJ;hVUVOTIAAAAJ;YtUDgPIAAAAJ", "orcid": ";;;;;", "linkedin": ";teddykoker/;;;;", "or_profile": "~Shanghua_Gao1;~Teddy_Koker1;~Owen_Queen1;~Thomas_Hartvigsen1;~Theodoros_Tsiligkaridis1;~Marinka_Zitnik1", "aff": ";MIT Lincoln Laboratory, Massachusetts Institute of Technology;Harvard Medical School, Harvard University;University of Virginia, Charlottesville;MIT Lincoln Laboratory, Massachusetts Institute of Technology;Harvard University", "aff_domain": ";ll.mit.edu;hms.harvard.edu;virginia.edu;ll.mit.edu;harvard.edu", "position": ";Researcher;Researcher;Assistant Professor;Senior AI Research Scientist;Associate Professor", "bibtex": "@inproceedings{\ngao2024units,\ntitle={Uni{TS}: A Unified Multi-Task Time Series Model},\nauthor={Shanghua Gao and Teddy Koker and Owen Queen and Thomas Hartvigsen and Theodoros Tsiligkaridis and Marinka Zitnik},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=nBOdYBptWW}\n}", "github": "", "reviewers": "YrSc;bSMF;EsEH;h6vS", "pdf_size": 2543655, "rating": "5;6;6;6", "confidence": "4;4;3;4", "soundness": "3;3;3;3", "novelty": "2;2;2;3", "presentation": "2;3;2;3", "wc_summary": "91;58;51;95", "wc_strengths": "44;51;58;19", "wc_weaknesses": "213;21;184;381", "wc_questions": "63;26;222;2", "wc_limitations": "5;1;6;4", "wc_review": "416;157;521;501", "wc_reply_reviewers": "72;19;22;0", "wc_reply_authors": "1347;236;126;2090", "reply_reviewers": "1;1;1;0", "reply_authors": "4;2;2;6", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 73.75, 19.45989465541887 ], "wc_strengths_avg": [ 43.0, 14.713938969562161 ], "wc_weaknesses_avg": [ 199.75, 127.69764093357402 ], "wc_questions_avg": [ 78.25, 85.79153513022132 ], "wc_limitations_avg": [ 4.0, 1.8708286933869707 ], "wc_review_avg": [ 398.75, 145.03512505596706 ], "wc_reply_reviewers_avg": [ 28.25, 26.630574533794796 ], "wc_reply_authors_avg": [ 949.75, 813.3235441692316 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.5, 1.6583123951777 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15628702149476825258&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": ";ll.mit.edu;hms.harvard.edu;virginia.edu;ll.mit.edu;harvard.edu", "author_num": 6, "aff_unique_index": "0;1;2;0;1", "aff_unique_norm": "Massachusetts Institute of Technology;Harvard University;University of Virginia", "aff_unique_dep": "Lincoln Laboratory;Harvard Medical School;", "aff_unique_url": "https://web.mit.edu;https://www.harvard.edu;https://www.virginia.edu", "aff_unique_abbr": "MIT;Harvard;UVA", "aff_campus_unique_index": "0;0;1;0", "aff_campus_unique": "Cambridge;Charlottesville;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Geometric Analysis of Nonlinear Manifold Clustering", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93708", "id": "nBQHTBVnfr", "proceeding": "", "pdf": "https://openreview.net/pdf?id=nBQHTBVnfr", "openreview": "https://openreview.net/forum?id=nBQHTBVnfr", "poster": "", "project": "", "author_site": "Nimita Shinde, Tianjiao Ding, Daniel Robinson, Rene Vidal", "tldr": "", "abstract": "Manifold clustering is an important problem in motion and video segmentation, natural image clustering, and other applications where high-dimensional data lie on multiple, low-dimensional, nonlinear manifolds. While current state-of-the-art methods on large-scale datasets such as CIFAR provide good empirical performance, they do not have any proof of theoretical correctness. In this work, we propose a method that clusters data belonging to a union of nonlinear manifolds. Furthermore, for a given input data sample $y$ belonging to the $l$th manifold $\\mathcal{M}_l$, we provide geometric conditions that guarantee a manifold-preserving representation of $y$ can be recovered from the solution to the proposed model. The geometric conditions require that (i) $\\mathcal{M}_l$ is well-sampled in the neighborhood of $y$, with the sampling density given as a function of the curvature, and (ii) $\\mathcal{M}_l$ is sufficiently separated from the other manifolds. In addition to providing proof of correctness in this setting, a numerical comparison with state-of-the-art methods on CIFAR datasets shows that our method performs competitively although marginally worse than methods without", "keywords": "Manifold clustering;large scale clustering", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "/attachment/94c24b54fa0a75f41c505f0dfdb8fa9fd669eb49.zip", "author": "Nimita Shinde;Tianjiao Ding;Daniel Robinson;Rene Vidal", "authorids": "~Nimita_Shinde1;~Tianjiao_Ding1;~Daniel_Robinson2;~Rene_Vidal1", "gender": "M;;;F", "homepage": "https://tianjiaoding.com/;https://coral.ise.lehigh.edu/danielprobinson/;http://www.vision.jhu.edu;https://sites.google.com/view/nimitashinde", "dblp": "230/1227;;v/ReneVidal;304/0987", "google_scholar": "L3wy9QMAAAAJ;;https://scholar.google.com/citations?hl=en;https://scholar.google.com.eg/citations?user=JwVUMxsAAAAJ", "orcid": ";;;", "linkedin": ";;rene-vidal-74844928/;", "or_profile": "~Tianjiao_Ding1;~Daniel_Robinson2;~Rene_Vidal1;~Nimita_Rajendra_Shinde1", "aff": "University of Pennsylvania;Lehigh University;Amazon;Lehigh University", "aff_domain": "seas.upenn.edu;lehigh.edu;amazon.com;lehigh.edu", "position": "PhD student;Associate Professor;Principal Researcher;Postdoc", "bibtex": "@inproceedings{\nshinde2024geometric,\ntitle={Geometric Analysis of Nonlinear Manifold Clustering},\nauthor={Nimita Shinde and Tianjiao Ding and Daniel Robinson and Rene Vidal},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=nBQHTBVnfr}\n}", "github": "", "reviewers": "yHKx;Mh3Q;XZ5T", "pdf_size": 735107, "rating": "6;7;8", "confidence": "4;3;4", "soundness": "3;4;4", "novelty": "3;3;4", "presentation": "3;4;3", "wc_summary": "93;29;89", "wc_strengths": "56;33;76", "wc_weaknesses": "372;34;12", "wc_questions": "310;20;2", "wc_limitations": "1;11;4", "wc_review": "832;127;183", "wc_reply_reviewers": "513;11;4", "wc_reply_authors": "371;0;0", "reply_reviewers": "2;1;1", "reply_authors": "2;1;1", "rating_avg": [ 7.0, 0.816496580927726 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 70.33333333333333, 29.272664533466866 ], "wc_strengths_avg": [ 55.0, 17.568911937472585 ], "wc_weaknesses_avg": [ 139.33333333333334, 164.765152734295 ], "wc_questions_avg": [ 110.66666666666667, 141.1413790810083 ], "wc_limitations_avg": [ 5.333333333333333, 4.189935029992178 ], "wc_review_avg": [ 380.6666666666667, 319.95867788756027 ], "wc_reply_reviewers_avg": [ 176.0, 238.31212026807756 ], "wc_reply_authors_avg": [ 123.66666666666667, 174.89107721347276 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:kBmboKwDnbgJ:scholar.google.com/&scioq=Geometric+Analysis+of+Nonlinear+Manifold+Clustering&hl=en&as_sdt=0,44", "gs_version_total": 0, "email": "seas.upenn.edu;lehigh.edu;amazon.com;lehigh.edu", "author_num": 4, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "University of Pennsylvania;Lehigh University;Amazon", "aff_unique_dep": ";;Amazon.com, Inc.", "aff_unique_url": "https://www.upenn.edu;https://www.lehigh.edu;https://www.amazon.com", "aff_unique_abbr": "UPenn;Lehigh;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Efficient Graph Matching for Correlated Stochastic Block Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93707", "id": "nBhfIcDnRP", "proceeding": "", "pdf": "https://openreview.net/pdf?id=nBhfIcDnRP", "openreview": "https://openreview.net/forum?id=nBhfIcDnRP", "poster": "", "project": "", "author_site": "Shuwen Chai, Miklos Z. Racz", "tldr": "", "abstract": "We study learning problems on correlated stochastic block models with two balanced communities. Our main result gives the first efficient algorithm for graph matching in this setting. In the most interesting regime where the average degree is logarithmic in the number of vertices, this algorithm correctly matches all but a vanishing fraction of vertices with high probability, whenever the edge correlation parameter $s$ satisfies $s^2 > \\alpha \\approx 0.338$, where $\\alpha$ is Otter's tree-counting constant. Moreover, we extend this to an efficient algorithm for exact graph matching whenever this is information-theoretically possible, positively resolving an open problem of R\u00e1cz and Sridhar (NeurIPS 2021). Our algorithm generalizes the recent breakthrough work of Mao, Wu, Xu, and Yu (STOC 2023), which is based on centered subgraph counts of a large family of trees termed chandeliers. A major technical challenge that we overcome is dealing with the additional estimation errors that are necessarily present due to the fact that, in relevant parameter regimes, the latent community partition cannot be exactly recovered from a single graph. As an application of our results, we give an efficient algorithm for exact community recovery using multiple correlated graphs in parameter regimes where it is information-theoretically impossible to do so using just a single graph.", "keywords": "Graph matching;correlated random graphs;stochastic block model;community recovery;subgraph counting", "primary_area": "learning_theory", "supplementary_material": "", "author": "Shuwen Chai;Miklos Z. Racz", "authorids": "~Shuwen_Chai1;~Miklos_Z._Racz1", "gender": ";", "homepage": ";", "dblp": ";", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "~Shuwen_Chai1;~Miklos_Z._Racz1", "aff": ";", "aff_domain": ";", "position": ";", "bibtex": "@inproceedings{\nchai2024efficient,\ntitle={Efficient Graph Matching for Correlated Stochastic Block Models},\nauthor={Shuwen Chai and Miklos Z. Racz},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=nBhfIcDnRP}\n}", "github": "", "reviewers": "K457;PrRf;9uZn;xbYm;Nbs5", "pdf_size": 1375041, "rating": "5;6;6;6;7", "confidence": "3;2;4;2;4", "soundness": "3;3;3;3;4", "novelty": "2;4;3;3;4", "presentation": "3;3;3;3;4", "wc_summary": "247;84;174;181;463", "wc_strengths": "19;50;62;89;51", "wc_weaknesses": "77;95;100;61;1", "wc_questions": "1;51;22;13;1", "wc_limitations": "1;53;35;7;1", "wc_review": "345;333;393;351;517", "wc_reply_reviewers": "5;15;0;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;0;0;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.0, 0.6324555320336759 ], "confidence_avg": [ 3.0, 0.8944271909999159 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.2, 0.7483314773547882 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 229.8, 127.61724021463557 ], "wc_strengths_avg": [ 54.2, 22.533530571128885 ], "wc_weaknesses_avg": [ 66.8, 35.67856499356441 ], "wc_questions_avg": [ 17.6, 18.478095139921752 ], "wc_limitations_avg": [ 19.4, 20.99142682144308 ], "wc_review_avg": [ 387.8, 67.6886992045201 ], "wc_reply_reviewers_avg": [ 4.0, 5.830951894845301 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.35355339059327373, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5617682480105038820&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": ";", "author_num": 2 }, { "title": "Fine-Tuning Large Vision-Language Models as Decision-Making Agents via Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93706", "id": "nBjmMF2IZU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=nBjmMF2IZU", "openreview": "https://openreview.net/forum?id=nBjmMF2IZU", "poster": "/media/PosterPDFs/NeurIPS%202024/93706.png?t=1731380249.8938909", "project": "", "author_site": "Simon Zhai, Hao Bai, Zipeng Lin, Jiayi Pan, Peter Tong, Yifei Zhou, Alane Suhr, Saining Xie, Yann LeCun, Yi Ma, Sergey Levine", "tldr": "", "abstract": "Large vision-language models (VLMs) fine-tuned on specialized visual instruction-following data have exhibited impressive language reasoning capabilities across various scenarios. However, this fine-tuning paradigm may not be able to efficiently learn optimal decision-making agents in multi-step goal-directed tasks from interactive environments. To address this challenge, we propose an algorithmic framework that fine-tunes VLMs with reinforcement learning (RL). Specifically, our framework provides a task description and then prompts the VLM to generate chain-of-thought (CoT) reasoning, enabling the VLM to efficiently explore intermediate reasoning steps that lead to the final text-based action. Next, the open-ended text output is parsed into an executable action to interact with the environment to obtain goal-directed task rewards. Finally, our framework uses these task rewards to fine-tune the entire VLM with RL. Empirically, we demonstrate that our proposed framework enhances the decision-making capabilities of VLM agents across various tasks, enabling 7b models to outperform commercial models such as GPT4-V or Gemini. Furthermore, we find that CoT reasoning is a crucial component for performance improvement, as removing the CoT reasoning results in a significant decrease in the overall performance of our method.", "keywords": "large vision language model;reinforcement learning", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/3c38e47903a6d4e66fc398bb8297e61c56b73238.zip", "author": "Yuexiang Zhai;Hao Bai;Zipeng Lin;Jiayi Pan;Shengbang Tong;Yifei Zhou;Alane Suhr;Saining Xie;Yann LeCun;Yi Ma;Sergey Levine", "authorids": "~Yuexiang_Zhai1;~Hao_Bai1;~Zipeng_Lin1;~Jiayi_Pan1;~Shengbang_Tong1;~Yifei_Zhou1;~Alane_Suhr1;~Saining_Xie2;~Yann_LeCun1;~Yi_Ma4;~Sergey_Levine1", "gender": ";M;;M;M;M;Not Specified;M;M;M;M", "homepage": ";https://www.jackgethome.com;https://yuslzp.github.io;https://www.jiayipan.me/;https://tsb0601.github.io/petertongsb/;https://yifeizhou02.github.io/;http://www.alanesuhr.com;http://yann.lecun.com;http://people.eecs.berkeley.edu/~yima/;https://people.eecs.berkeley.edu/~svlevine/;", "dblp": "241/6124.html;53/8975;;39/6476-2;306/1406;50/7699;203/9306;l/YannLeCun;;80/7594;126/0960", "google_scholar": "78WTKm4AAAAJ;https://scholar.google.com/citations?hl=zh-CN;;n9Y_sQEAAAAJ;https://scholar.google.com/citations?hl=en;;daslsUkAAAAJ;WLN3QrAAAAAJ;https://scholar.google.com.hk/citations?user=XqLiBQMAAAAJ;8R35rCwAAAAJ;https://scholar.google.co.uk/citations?user=Y2GtJkAAAAAJ", "orcid": ";0000-0001-9723-7490;;0000-0003-0817-4083;;;;;;;", "linkedin": ";jackgethome/;;;;yifei-zhou-57aa9b222/;;;;;", "or_profile": "~Yuexiang_Zhai1;~Hao_Bai1;~Zipeng_Lin1;~Jiayi_Pan1;~Shengbang_Tong1;~Yifei_Zhou1;~Alane_Suhr1;~Yann_LeCun1;~Yi_Ma4;~Sergey_Levine1;~Saining_Xie1", "aff": "University of California, Berkeley;University of Illinois, Urbana Champaign;University of California, Berkeley;University of California, Berkeley;New York University;University of California, Berkeley;University of California, Berkeley;New York University;University of California, Berkeley;Google;New York University", "aff_domain": "berkeley.edu;illinois.edu;berkeley.edu;berkeley.edu;nyu.edu;berkeley.edu;berkeley.edu;nyu.edu;berkeley.edu;google.com;nyu.edu", "position": "PhD student;MS student;Undergrad student;PhD student;PhD student;PhD student;Assistant Professor;Full Professor;Full Professor;Research Scientist;Assistant Professor", "bibtex": "@inproceedings{\nzhai2024finetuning,\ntitle={Fine-Tuning Large Vision-Language Models as Decision-Making Agents via Reinforcement Learning},\nauthor={Yuexiang Zhai and Hao Bai and Zipeng Lin and Jiayi Pan and Shengbang Tong and Yifei Zhou and Alane Suhr and Saining Xie and Yann LeCun and Yi Ma and Sergey Levine},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=nBjmMF2IZU}\n}", "github": "", "reviewers": "MKrq;gYxm;QEzb;RYx1", "pdf_size": 3160532, "rating": "3;6;7;7", "confidence": "4;4;4;4", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "3;3;3;3", "wc_summary": "58;96;191;84", "wc_strengths": "28;68;124;78", "wc_weaknesses": "19;153;35;28", "wc_questions": "279;16;19;23", "wc_limitations": "1;1;15;2", "wc_review": "385;334;384;215", "wc_reply_reviewers": "295;50;46;47", "wc_reply_authors": "1214;22;45;7", "reply_reviewers": "2;1;1;1", "reply_authors": "4;2;2;2", "rating_avg": [ 5.75, 1.6393596310755 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 107.25, 50.26616655365714 ], "wc_strengths_avg": [ 74.5, 34.15772240650714 ], "wc_weaknesses_avg": [ 58.75, 54.71003107292117 ], "wc_questions_avg": [ 84.25, 112.46638386646919 ], "wc_limitations_avg": [ 4.75, 5.931905258852336 ], "wc_review_avg": [ 329.5, 69.24774364555137 ], "wc_reply_reviewers_avg": [ 109.5, 107.10858975824488 ], "wc_reply_authors_avg": [ 322.0, 515.1742423685408 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 68, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3962050417326837201&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "berkeley.edu;illinois.edu;berkeley.edu;berkeley.edu;nyu.edu;berkeley.edu;berkeley.edu;nyu.edu;berkeley.edu;google.com;nyu.edu", "author_num": 11, "aff_unique_index": "0;1;0;0;2;0;0;2;0;3;2", "aff_unique_norm": "University of California, Berkeley;University of Illinois Urbana-Champaign;New York University;Google", "aff_unique_dep": ";;;Google", "aff_unique_url": "https://www.berkeley.edu;https://illinois.edu;https://www.nyu.edu;https://www.google.com", "aff_unique_abbr": "UC Berkeley;UIUC;NYU;Google", "aff_campus_unique_index": "0;1;0;0;0;0;0;3", "aff_campus_unique": "Berkeley;Urbana-Champaign;;Mountain View", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Rad-NeRF: Ray-decoupled Training of Neural Radiance Field", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93705", "id": "nBrnfYeKf9", "proceeding": "", "pdf": "https://openreview.net/pdf?id=nBrnfYeKf9", "openreview": "https://openreview.net/forum?id=nBrnfYeKf9", "poster": "/media/PosterPDFs/NeurIPS%202024/93705.png?t=1730300007.1859934", "project": "", "author_site": "Lidong Guo, Xuefei Ning, Yonggan Fu, Tianchen Zhao, Zhuoliang Kang, Jincheng Yu, Yingyan (Celine) Lin, Yu Wang", "tldr": "", "abstract": "Although the neural radiance field (NeRF) exhibits high-fidelity visualization on the rendering task, it still suffers from rendering defects, especially in complex scenes. In this paper, we delve into the reason for the unsatisfactory performance and conjecture that it comes from interference in the training process. Due to occlusions in complex scenes, a 3D point may be invisible to some rays. On such a point, training with those rays that do not contain valid information about the point might interfere with the NeRF training. Based on the above intuition, we decouple the training process of NeRF in the ray dimension softly and propose a Ray-decoupled Training Framework for neural rendering (Rad-NeRF). Specifically, we construct an ensemble of sub-NeRFs and train a soft gate module to assign the gating scores to these sub-NeRFs based on specific rays. The gate module is jointly optimized with the sub-NeRF ensemble to learn the preference of sub-NeRFs for different rays automatically. Furthermore, we introduce depth-based mutual learning to enhance the rendering consistency among multiple sub-NeRFs and mitigate the depth ambiguity. Experiments on five datasets demonstrate that Rad-NeRF can enhance the rendering performance across a wide range of scene types compared with existing single-NeRF and multi-NeRF methods. With only 0.2% extra parameters, Rad-NeRF improves rendering performance by up to 1.5dB. Code is available at https://github.com/thu-nics/Rad-NeRF.", "keywords": "Neural rendering field;Mutual learning;Novel view synthesis;Soft gate module;Complex scenes with occlusions", "primary_area": "machine_vision", "supplementary_material": "/attachment/3c53f1f3f2d2618e4488169a0b5b36d4e0c5c47b.zip", "author": "Lidong Guo;Xuefei Ning;Yonggan Fu;Tianchen Zhao;Zhuoliang Kang;Jincheng Yu;Yingyan Celine Lin;Yu Wang", "authorids": "~Lidong_Guo1;~Xuefei_Ning1;~Yonggan_Fu1;~Tianchen_Zhao2;~Zhuoliang_Kang3;~Jincheng_Yu2;~Yingyan_Celine_Lin1;~Yu_Wang3", "gender": "M;Not Specified;M;M;M;M;M;F", "homepage": ";https://nics-effalg.com/ningxuefei/;https://www.yongganfu.com/;https://nicsefc.ee.tsinghua.edu.cn/people/tianchen-zhao/;https://zhuoliang.me/;http://nicsefc.ee.tsinghua.edu.cn/people/JinchengYu;https://nicsefc.ee.tsinghua.edu.cn;https://eiclab.scs.gatech.edu/", "dblp": "233/2101;202/9525;244/8166;217/2471;;;w/YuWang2.html;120/6981", "google_scholar": ";oVslpJsAAAAJ;https://scholar.google.com/citations?hl=en;;W1ZXjMkAAAAJ;1UDGpucAAAAJ;https://scholar.google.com.hk/citations?user=j8JGVvoAAAAJ;dio8IesAAAAJ", "orcid": "0000-0003-4162-6360;;;;;;0000-0001-6108-5157;", "linkedin": ";;yonggan-fu-b211831b0;;;;;yingyan-celine-lin-a281211a/", "or_profile": "~Lidong_Guo1;~Xuefei_Ning1;~Yonggan_Fu1;~Tianchen_Zhao2;~Zhuoliang_Kang3;~Jincheng_Yu2;~Yu_Wang3;~Yingyan_Lin1", "aff": "Tsinghua University;Tsinghua University;Georgia Institute of Technology;Infinigence;Meituan;;Tsinghua University;Georgia Institute of Technology", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;gatech.edu;infini-ai.com;meituan.com;;tsinghua.edu.cn;gatech.edu", "position": "PhD student;Research Assistant Professor;PhD student;Intern;Researcher;;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nguo2024radnerf,\ntitle={Rad-Ne{RF}: Ray-decoupled Training of Neural Radiance Field},\nauthor={Lidong Guo and Xuefei Ning and Yonggan Fu and Tianchen Zhao and Zhuoliang Kang and Jincheng Yu and Yingyan Celine Lin and Yu Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=nBrnfYeKf9}\n}", "github": "", "reviewers": "tZHj;nJqP;5G52;PUvY", "pdf_size": 12681111, "rating": "5;5;6;6", "confidence": "5;5;3;5", "soundness": "3;3;3;3", "novelty": "3;2;2;2", "presentation": "3;3;3;3", "wc_summary": "146;117;138;85", "wc_strengths": "148;57;78;16", "wc_weaknesses": "160;44;18;296", "wc_questions": "2;104;41;2", "wc_limitations": "2;10;6;1", "wc_review": "458;332;281;400", "wc_reply_reviewers": "116;29;78;345", "wc_reply_authors": "733;97;371;1208", "reply_reviewers": "1;1;2;2", "reply_authors": "2;3;3;4", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 121.5, 23.58495283014151 ], "wc_strengths_avg": [ 74.75, 47.80886424084973 ], "wc_weaknesses_avg": [ 129.5, 109.99431803506943 ], "wc_questions_avg": [ 37.25, 41.69757187175291 ], "wc_limitations_avg": [ 4.75, 3.5619517121937516 ], "wc_review_avg": [ 367.75, 67.06107291119044 ], "wc_reply_reviewers_avg": [ 142.0, 121.19199643540823 ], "wc_reply_authors_avg": [ 602.25, 416.16785976334114 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 3.0, 0.7071067811865476 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9457610642050643446&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "tsinghua.edu.cn;tsinghua.edu.cn;gatech.edu;infini-ai.com;meituan.com;;tsinghua.edu.cn;gatech.edu", "author_num": 8, "aff_unique_index": "0;0;1;2;3;0;1", "aff_unique_norm": "Tsinghua University;Georgia Institute of Technology;Infinigence;Meituan", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.gatech.edu;;https://www.meituan.com", "aff_unique_abbr": "THU;Georgia Tech;;Meituan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;1", "aff_country_unique": "China;United States;" }, { "id": "nEnazjpwOx", "title": "Diffusion Models Meet Contextual Bandits with Large Action Spaces", "track": "main", "status": "Reject", "tldr": "", "abstract": "Efficient exploration in contextual bandits is crucial due to their large action space, where uninformed exploration can lead to computational and statistical inefficiencies. However, the rewards of actions are often correlated, which can be leveraged for more efficient exploration. In this work, we use pre-trained diffusion model priors to capture these correlations and develop diffusion Thompson sampling (dTS). We establish both theoretical and algorithmic foundations for dTS. Specifically, we derive efficient posterior approximations (required by dTS) under a diffusion model prior, which are of independent interest beyond bandits and reinforcement learning. We analyze dTS in linear instances and provide a Bayes regret bound highlighting the benefits of using diffusion models as priors. Our experiments validate our theory and demonstrate dTS's favorable performance.", "keywords": "Diffusion models;Bayesian bandit;Thompson sampling;Contextual bandit", "primary_area": "bandits", "supplementary_material": "/attachment/d46f68d371444868bc65fc60e8d520b81e7e60b0.zip", "author": "Imad Aouali", "authorids": "~Imad_Aouali2", "gender": "M", "homepage": "https://www.iaouali.com/", "dblp": "298/2200", "google_scholar": "cG9L1BwAAAAJ", "orcid": "", "linkedin": "imad-aouali/", "or_profile": "~Imad_AOUALI1", "aff": "Ecole Nationale de la Statistique et de l'Administration Economique", "aff_domain": "ensae.fr", "position": "PhD student", "bibtex": "@misc{\nanonymous2024diffusion,\ntitle={Diffusion Models Meet Contextual Bandits with Large Action Spaces},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=nEnazjpwOx}\n}", "github": "", "project": "", "reviewers": "89g4;YAin;ByDg;wiX3", "site": "https://openreview.net/forum?id=nEnazjpwOx", "pdf_size": 2203308, "rating": "5;6;7;8", "confidence": "4;4;2;5", "soundness": "3;3;3;4", "novelty": "2;2;3;4", "presentation": "3;3;4;4", "wc_summary": "122;127;85;16", "wc_strengths": "21;99;86;63", "wc_weaknesses": "478;150;51;9", "wc_questions": "113;418;4;34", "wc_limitations": "4;22;12;15", "wc_review": "738;816;238;137", "wc_reply_reviewers": "0;102;24;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 87.5, 44.35369206729018 ], "wc_strengths_avg": [ 67.25, 29.65109610115619 ], "wc_weaknesses_avg": [ 172.0, 183.9361302191606 ], "wc_questions_avg": [ 142.25, 164.1072438986165 ], "wc_limitations_avg": [ 13.25, 6.456585785072479 ], "wc_review_avg": [ 482.25, 298.1831442251557 ], "wc_reply_reviewers_avg": [ 31.5, 41.865857210858586 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.1025978352085154, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:9r9k-mJlwW0J:scholar.google.com/&scioq=Diffusion+Models+Meet+Contextual+Bandits+with+Large+Action+Spaces&hl=en&as_sdt=0,33", "gs_version_total": 10, "aff_unique_index": "0", "aff_unique_norm": "Ecole Nationale de la Statistique et de l'Administration Economique", "aff_unique_dep": "", "aff_unique_url": "https://ensae.fr", "aff_unique_abbr": "ENSAE", "aff_country_unique_index": "0", "aff_country_unique": "France" }, { "title": "Self-Distilled Depth Refinement with Noisy Poisson Fusion", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93704", "id": "nEqU0iCa0s", "proceeding": "", "pdf": "https://openreview.net/pdf?id=nEqU0iCa0s", "openreview": "https://openreview.net/forum?id=nEqU0iCa0s", "poster": "/media/PosterPDFs/NeurIPS%202024/93704.png?t=1733216155.656462", "project": "", "author_site": "Jiaqi Li, Yiran Wang, Jinghong Zheng, Zihao Huang, Ke Xian, Zhiguo Cao, Jianming Zhang", "tldr": "", "abstract": "Depth refinement aims to infer high-resolution depth with fine-grained edges and details, refining low-resolution results of depth estimation models. The prevailing methods adopt tile-based manners by merging numerous patches, which lacks efficiency and produces inconsistency. Besides, prior arts suffer from fuzzy depth boundaries and limited generalizability. Analyzing the fundamental reasons for these limitations, we model depth refinement as a noisy Poisson fusion problem with local inconsistency and edge deformation noises. We propose the Self-distilled Depth Refinement (SDDR) framework to enforce robustness against the noises, which mainly consists of depth edge representation and edge-based guidance. With noisy depth predictions as input, SDDR generates low-noise depth edge representations as pseudo-labels by coarse-to-fine self-distillation. Edge-based guidance with edge-guided gradient loss and edge-based fusion loss serves as the optimization objective equivalent to Poisson fusion. When depth maps are better refined, the labels also become more noise-free. Our model can acquire strong robustness to the noises, achieving significant improvements in accuracy, edge quality, efficiency, and generalizability on five different benchmarks. Moreover, directly training another model with edge labels produced by SDDR brings improvements, suggesting that our method could help with training robust refinement models in future works.", "keywords": "Depth refinement;Noisy Poisson fusion;Self-distilled training", "primary_area": "machine_vision", "supplementary_material": "", "author": "Jiaqi Li;Yiran Wang;Jinghong Zheng;Zihao Huang;Ke Xian;Zhiguo Cao;Jianming Zhang", "authorids": "~Jiaqi_Li7;~Yiran_Wang3;~Jinghong_Zheng1;~Zihao_Huang2;~Ke_Xian2;~Zhiguo_Cao1;~Jianming_Zhang1", "gender": "M;M;M;M;M;M;M", "homepage": "https://raymondwang987.github.io/;https://github.com/DeepZheng;https://inso-13.github.io/;https://sites.google.com/site/kexian1991/;http://faculty.hust.edu.cn/caozhiguo1/zh_CN/index.htm;https://jimmie33.github.io/;https://lijia7.github.io/", "dblp": "181/2894-5;54/5344-2;244/4353-1;178/1416;01/4449-1;;118/4502-7", "google_scholar": "https://scholar.google.com.hk/citations?view_op=list_works;sLTEDCsAAAAJ;5-lExnYAAAAJ;UbK_AGwAAAAJ;396o2BAAAAAJ;TkVHKDgAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": "0000-0002-2785-9638;0009-0000-7996-8927;0000-0002-8804-191X;0000-0002-0884-5126;0000-0002-9223-1863;0000-0002-9954-6294;0009-0004-7799-3407", "linkedin": ";;;;;;", "or_profile": "~Yiran_Wang3;~Jinghong_Zheng1;~Zihao_Huang2;~Ke_Xian2;~Zhiguo_Cao1;~Jianming_Zhang1;~jiaqi_li6", "aff": "Huazhong University of Science and Technology;Huazhong University of Science and Technology;Huazhong University of Science and Technology;Huazhong University of Science and Technology;Huazhong University of Science and Technology;Adobe Systems;Huazhong University of Science and Technology", "aff_domain": "hust.edu.cn;hust.edu.cn;hust.edu.cn;hust.edu.cn;hust.edu.cn;adobe.com;hust.edu.cn", "position": "PhD student;MS student;MS student;Lecturer;Full Professor;Research Scientist;MS student", "bibtex": "@inproceedings{\nli2024selfdistilled,\ntitle={Self-Distilled Depth Refinement with Noisy Poisson Fusion},\nauthor={Jiaqi Li and Yiran Wang and Jinghong Zheng and Zihao Huang and Ke Xian and Zhiguo Cao and Jianming Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=nEqU0iCa0s}\n}", "github": "", "reviewers": "Ca4W;DKoq;9uAR", "pdf_size": 14002234, "rating": "5;5;6", "confidence": "3;5;4", "soundness": "3;2;3", "novelty": "2;2;3", "presentation": "2;2;2", "wc_summary": "93;76;48", "wc_strengths": "105;76;34", "wc_weaknesses": "64;93;188", "wc_questions": "187;44;60", "wc_limitations": "41;86;3", "wc_review": "490;375;333", "wc_reply_reviewers": "21;19;53", "wc_reply_authors": "162;16;58", "reply_reviewers": "1;1;1", "reply_authors": "3;2;2", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 72.33333333333333, 18.553226733434325 ], "wc_strengths_avg": [ 71.66666666666667, 29.147136318265567 ], "wc_weaknesses_avg": [ 115.0, 52.959103718498355 ], "wc_questions_avg": [ 97.0, 63.97395303298575 ], "wc_limitations_avg": [ 43.333333333333336, 33.9247533357118 ], "wc_review_avg": [ 399.3333333333333, 66.36431437317967 ], "wc_reply_reviewers_avg": [ 31.0, 15.57776192739723 ], "wc_reply_authors_avg": [ 78.66666666666667, 61.369554521946974 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16844639046561765399&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 3, "email": "hust.edu.cn;hust.edu.cn;hust.edu.cn;hust.edu.cn;hust.edu.cn;adobe.com;hust.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;0;0;1;0", "aff_unique_norm": "Huazhong University of Science and Technology;Adobe", "aff_unique_dep": ";Adobe Systems Incorporated", "aff_unique_url": "http://www.hust.edu.cn;https://www.adobe.com", "aff_unique_abbr": "HUST;Adobe", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;1;0", "aff_country_unique": "China;United States" }, { "id": "nEvnCuviMO", "title": "Metric Distortion Under Probabilistic Voting", "track": "main", "status": "Reject", "tldr": "", "abstract": "Metric distortion in social choice provides a framework for assessing how well voting rules minimize social cost in scenarios where voters and candidates exist in a shared metric space, with voters submitting rankings and the rule outputting a single winner. We expand this framework to include probabilistic voting. Our extension encompasses a broad range of probability functions, including widely studied models like Plackett-Luce (PL) and Bradley-Terry, and a novel \"pairwise quantal voting\" model inspired by quantal response theory.\n\nWe demonstrate that distortion results under probabilistic voting better correspond with conventional intuitions regarding popular voting rules such as Plurality, Copeland, and Random Dictator (RD) than those under deterministic voting. For example, in the PL model with candidate strength inversely proportional to the square of their metric distance, we show that Copeland's distortion is at most 2, whereas that of RD is $\\Omega(\\sqrt{m})$ in large elections, where $m$ is the number of candidates. This contrasts sharply with the classical model, where RD beats Copeland with a distortion of 3 versus 5 [1].", "keywords": "Social choice;Metric Distortion;Probabilistic voting", "primary_area": "algorithmic_game_theory", "supplementary_material": "", "author": "Sahasrajit Sarmasarkar;Mohak Goyal", "authorids": "~Sahasrajit_Sarmasarkar1;~Mohak_Goyal1", "gender": ";M", "homepage": "https://sahasrajit123.github.io;https://sites.google.com/view/mohakg/home", "dblp": ";", "google_scholar": ";qjLMilkAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Sahasrajit_Sarmasarkar1;~Mohak_Goyal1", "aff": "Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu", "position": "PhD student;PhD student", "bibtex": "@misc{\nanonymous2024metric,\ntitle={Metric Distortion Under Probabilistic Voting},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=nEvnCuviMO}\n}", "github": "", "project": "", "reviewers": "iXV7;fsCM;tx3A;2kBY", "site": "https://openreview.net/forum?id=nEvnCuviMO", "pdf_size": 573333, "rating": "4;5;6;7", "confidence": "3;4;4;4", "soundness": "4;3;4;2", "novelty": "3;3;2;3", "presentation": "2;3;3;3", "wc_summary": "116;232;89;98", "wc_strengths": "50;59;54;85", "wc_weaknesses": "237;225;145;152", "wc_questions": "2;1;1;130", "wc_limitations": "1;26;17;16", "wc_review": "406;543;306;481", "wc_reply_reviewers": "0;0;25;27", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 133.75, 57.55160727555747 ], "wc_strengths_avg": [ 62.0, 13.656500283747663 ], "wc_weaknesses_avg": [ 189.75, 41.541395017500314 ], "wc_questions_avg": [ 33.5, 55.71579668280801 ], "wc_limitations_avg": [ 15.0, 8.972179222463181 ], "wc_review_avg": [ 434.0, 88.39966063283275 ], "wc_reply_reviewers_avg": [ 13.0, 13.019216566291536 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.7745966692414834, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:jXcMBDHcNI0J:scholar.google.com/&scioq=Metric+Distortion+Under+Probabilistic+Voting&hl=en&as_sdt=0,5", "gs_version_total": 6, "aff_unique_index": "0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "FineCLIP: Self-distilled Region-based CLIP for Better Fine-grained Understanding", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93703", "id": "nExI4FuKWD", "proceeding": "", "pdf": "https://openreview.net/pdf?id=nExI4FuKWD", "openreview": "https://openreview.net/forum?id=nExI4FuKWD", "poster": "/media/PosterPDFs/NeurIPS%202024/93703.png?t=1731812257.5500958", "project": "", "author_site": "Dong Jing, Xiaolong He, Yutian Luo, Nanyi Fei, guoxing Yang, Wei Wei, Huiwen Zhao, Zhiwu Lu", "tldr": "", "abstract": "Contrastive Language-Image Pre-training (CLIP) achieves impressive performance on tasks like image classification and image-text retrieval by learning on large-scale image-text datasets. However, CLIP struggles with dense prediction tasks due to the poor grasp of the fine-grained details. Although existing works pay attention to this issue, they achieve limited improvements and usually sacrifice the important visual-semantic consistency. To overcome these limitations, we propose FineCLIP, which keeps the global contrastive learning to preserve the visual-semantic consistency and further enhances the fine-grained understanding through two innovations: 1) A real-time self-distillation scheme that facilitates the transfer of representation capability from global to local features. 2) A semantically-rich regional contrastive learning paradigm with generated region-text pairs, boosting the local representation capabilities with abundant fine-grained knowledge. \nBoth cooperate to fully leverage diverse semantics and multi-grained complementary information.\nTo validate the superiority of our FineCLIP and the rationality of each design, we conduct extensive experiments on challenging dense prediction and image-level tasks. \nAll the observations demonstrate the effectiveness of FineCLIP.", "keywords": "CLIP;Fine-grained Understanding", "primary_area": "machine_vision", "supplementary_material": "/attachment/65ec4e49350e13daa3d5c3d5f3858057985de330.zip", "author": "Dong Jing;Xiaolong He;Yutian Luo;Nanyi Fei;Guoxing Yang;Wei Wei;Huiwen Zhao;Zhiwu Lu", "authorids": "~Dong_Jing1;~Xiaolong_He1;~Yutian_Luo1;~Nanyi_Fei1;~Guoxing_Yang3;~Wei_Wei28;~Huiwen_Zhao1;~Zhiwu_Lu1", "gender": "M;M;M;M;F;M;M;M", "homepage": "https://timsty1.github.io/;https://www.researchgate.net/profile/Xiaolong-He-17;;;https://github.com/viviansmlie;https://github.com/huiwenzhao;https://gsai.ruc.edu.cn/luzhiwu;https://github.com/GuoxingY", "dblp": "206/3646;;;232/2227;;;53/5234;271/9521", "google_scholar": "eDA8Ol8AAAAJ;;;Oz6VqeQAAAAJ;;;OUXS8doAAAAJ;", "orcid": ";;0000-0001-8707-0180;;;;;", "linkedin": ";;;;;;;", "or_profile": "~Dong_Jing1;~Xiaolong_He1;~Yutian_Luo1;~Nanyi_Fei1;~Wei_Wei28;~Huiwen_Zhao1;~Zhiwu_Lu1;~GuoXing_Yang2", "aff": "Renmin University of China;Renmin University of China;Renmin University of China;Renmin University of China;AI Vision Department;Honor Device Co., Ltd.;Renmin University of China;Renmin University of China", "aff_domain": "ruc.edu.cn;ruc.edu.cn;ruc.edu.cn;ruc.edu.cn;honor.com;honor.com;ruc.edu.cn;ruc.edu.cn", "position": "PhD student;MS student;PhD student;PhD student;Researcher;Researcher;Full Professor;PhD student", "bibtex": "@inproceedings{\njing2024fineclip,\ntitle={Fine{CLIP}: Self-distilled Region-based {CLIP} for Better Fine-grained Understanding},\nauthor={Dong Jing and Xiaolong He and Yutian Luo and Nanyi Fei and Guoxing Yang and Wei Wei and Huiwen Zhao and Zhiwu Lu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=nExI4FuKWD}\n}", "github": "", "reviewers": "rzQ8;Ask3;oDkS;5tL8", "pdf_size": 2368945, "rating": "5;5;5;5", "confidence": "5;4;4;3", "soundness": "2;3;3;2", "novelty": "3;4;2;3", "presentation": "3;4;4;3", "wc_summary": "118;69;84;170", "wc_strengths": "59;50;84;61", "wc_weaknesses": "306;35;185;138", "wc_questions": "35;198;41;229", "wc_limitations": "4;1;11;44", "wc_review": "522;353;405;642", "wc_reply_reviewers": "61;315;111;159", "wc_reply_authors": "315;508;432;363", "reply_reviewers": "1;2;2;3", "reply_authors": "3;3;3;4", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 110.25, 38.79674599757047 ], "wc_strengths_avg": [ 63.5, 12.539936203984453 ], "wc_weaknesses_avg": [ 166.0, 97.34731634719058 ], "wc_questions_avg": [ 125.75, 88.45726369270078 ], "wc_limitations_avg": [ 15.0, 17.131841699011815 ], "wc_review_avg": [ 480.5, 111.53586867012781 ], "wc_reply_reviewers_avg": [ 161.5, 95.15645012294227 ], "wc_reply_authors_avg": [ 404.5, 72.8028158796073 ], "reply_reviewers_avg": [ 2.0, 0.7071067811865476 ], "reply_authors_avg": [ 3.25, 0.4330127018922193 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6745809902744699819&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "ruc.edu.cn;ruc.edu.cn;ruc.edu.cn;ruc.edu.cn;honor.com;honor.com;ruc.edu.cn;ruc.edu.cn", "author_num": 8, "aff_unique_index": "0;0;0;0;1;2;0;0", "aff_unique_norm": "Renmin University of China;AI Vision Department;Honor Device Co., Ltd.", "aff_unique_dep": ";AI Vision Department;", "aff_unique_url": "http://www.ruc.edu.cn;;", "aff_unique_abbr": "RUC;;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China;" }, { "title": "Identification of Analytic Nonlinear Dynamical Systems with Non-asymptotic Guarantees", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93702", "id": "nF34qXcY0b", "proceeding": "", "pdf": "https://openreview.net/pdf?id=nF34qXcY0b", "openreview": "https://openreview.net/forum?id=nF34qXcY0b", "poster": "/media/PosterPDFs/NeurIPS%202024/93702.png?t=1733417897.655294", "project": "", "author_site": "Negin Musavi, Ziyao Guo, Geir Dullerud, Yingying Li", "tldr": "", "abstract": "This paper focuses on the system identification of an important class of nonlinear systems: nonlinear systems that are linearly parameterized, which enjoy wide applications in robotics and other mechanical systems. We consider two system identification methods: least-squares estimation (LSE), which is a point estimation method; and set-membership estimation (SME), which estimates an uncertainty set that contains the true parameters. We provide non-asymptotic convergence rates for LSE and SME under i.i.d. control inputs and control policies with i.i.d. random perturbations, both of which are considered as non-active-exploration inputs. Compared with the counter-example based on piecewise-affine systems in the literature, the success of non-active exploration in our setting relies on a key assumption about the system dynamics: we require the system functions to be real-analytic. Our results, together with the piecewise-affine counter-example, reveal the importance of differentiability in nonlinear system identification through non-active exploration. Lastly, we numerically compare our theoretical bounds with the empirical performance of LSE and SME on a pendulum example and a quadrotor example.", "keywords": "set-membership;least-squares;nonlinear systems;non-asymptotic guarantees", "primary_area": "learning_theory", "supplementary_material": "/attachment/ae7d9dab406eeaf52c397476e92c7c51911b0bc1.zip", "author": "Negin Musavi;Ziyao Guo;Geir Dullerud;Yingying Li", "authorids": "~Negin_Musavi1;~Ziyao_Guo2;~Geir_Dullerud1;~Yingying_Li3", "gender": "F;M;M;F", "homepage": ";;https://mechanical.illinois.edu/directory/faculty/dullerud/;https://yingying.li", "dblp": ";;;63/5869", "google_scholar": "9USS81QAAAAJ;apxw1N4AAAAJ;https://scholar.google.com.tw/citations?user=afhaKpYAAAAJ;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Negin_Musavi1;~Ziyao_Guo2;~Geir_Dullerud1;~Yingying_Li3", "aff": "University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;University of Illinois;University of Illinois, Urbana Champaign", "aff_domain": "illinois.edu;illinois.edu;illinois.edu;illinois.edu", "position": "PhD student;PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nmusavi2024identification,\ntitle={Identification of Analytic Nonlinear Dynamical Systems with Non-asymptotic Guarantees},\nauthor={Negin Musavi and Ziyao Guo and Geir Dullerud and Yingying Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=nF34qXcY0b}\n}", "github": "", "reviewers": "QHWP;on2R;wxwx;Dfmt", "pdf_size": 928755, "rating": "5;5;6;8", "confidence": "4;3;3;4", "soundness": "3;3;3;4", "novelty": "2;3;3;3", "presentation": "3;2;3;4", "wc_summary": "161;77;45;85", "wc_strengths": "104;55;88;45", "wc_weaknesses": "219;34;88;44", "wc_questions": "20;30;35;114", "wc_limitations": "42;9;43;3", "wc_review": "546;205;299;291", "wc_reply_reviewers": "56;19;35;0", "wc_reply_authors": "123;23;23;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 92.0, 42.555845661906424 ], "wc_strengths_avg": [ 73.0, 23.947860029656095 ], "wc_weaknesses_avg": [ 96.25, 73.72372413273763 ], "wc_questions_avg": [ 49.75, 37.485830656396026 ], "wc_limitations_avg": [ 24.25, 18.376275465937052 ], "wc_review_avg": [ 335.25, 127.13452520853649 ], "wc_reply_reviewers_avg": [ 27.5, 20.59732992404598 ], "wc_reply_authors_avg": [ 42.25, 47.557202398795496 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.40824829046386296, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8573441898746176821&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "illinois.edu;illinois.edu;illinois.edu;illinois.edu", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Illinois Urbana-Champaign;University of Illinois", "aff_unique_dep": ";", "aff_unique_url": "https://illinois.edu;https://www.illinois.edu", "aff_unique_abbr": "UIUC;UIUC", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Urbana-Champaign;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "DiffSF: Diffusion Models for Scene Flow Estimation", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93701", "id": "nIeufGuQ9x", "proceeding": "", "pdf": "https://openreview.net/pdf?id=nIeufGuQ9x", "openreview": "https://openreview.net/forum?id=nIeufGuQ9x", "poster": "/media/PosterPDFs/NeurIPS%202024/93701.png?t=1731661674.6550329", "project": "", "author_site": "Yushan Zhang, Bastian Wandt, Maria Magnusson, Michael Felsberg", "tldr": "", "abstract": "Scene flow estimation is an essential ingredient for a variety of real-world applications, especially for autonomous agents, such as self-driving cars and robots. While recent scene flow estimation approaches achieve reasonable accuracy, their applicability to real-world systems additionally benefits from a reliability measure. Aiming at improving accuracy while additionally providing an estimate for uncertainty, we propose DiffSF that combines transformer-based scene flow estimation with denoising diffusion models. In the diffusion process, the ground truth scene flow vector field is gradually perturbed by adding Gaussian noise. In the reverse process, starting from randomly sampled Gaussian noise, the scene flow vector field prediction is recovered by conditioning on a source and a target point cloud. We show that the diffusion process greatly increases the robustness of predictions compared to prior approaches resulting in state-of-the-art performance on standard scene flow estimation benchmarks. Moreover, by sampling multiple times with different initial states, the denoising process predicts multiple hypotheses, which enables measuring the output uncertainty, allowing our approach to detect a majority of the inaccurate predictions. The code is available at https://github.com/ZhangYushan3/DiffSF.", "keywords": "Scene Flow Estimation;Denoising Diffusion Models;Uncertainty", "primary_area": "machine_vision", "supplementary_material": "", "author": "Yushan Zhang;Bastian Wandt;Maria Magnusson;Michael Felsberg", "authorids": "~Yushan_Zhang1;~Bastian_Wandt2;~Maria_Magnusson1;~Michael_Felsberg2", "gender": "F;M;F;", "homepage": "https://liu.se/en/employee/yuszh17;http://bastianwandt.de;https://liu.se/medarbetare/segma96;https://liu.se/en/employee/micfe03", "dblp": ";;36/6944;00/78", "google_scholar": "mvY4rdIAAAAJ;z4aXEBYAAAAJ;;https://scholar.google.se/citations?hl=en", "orcid": ";;0000-0002-9072-2204;0000-0002-6096-3648", "linkedin": "https://se.linkedin.com/in/yushan-zhang-402395294;;;https://linkedin.com/in/michael-felsberg-668a202", "or_profile": "~Yushan_Zhang1;~Bastian_Wandt2;~Maria_Magnusson1;~Michael_Felsberg2", "aff": "NVIDIA;Link\u00f6ping University;Link\u00f6ping University;Link\u00f6ping University", "aff_domain": "nvidia.com;liu.se;liu.se;liu.se", "position": "Intern;Assistant Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nzhang2024diffsf,\ntitle={Diff{SF}: Diffusion Models for Scene Flow Estimation},\nauthor={Yushan Zhang and Bastian Wandt and Maria Magnusson and Michael Felsberg},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=nIeufGuQ9x}\n}", "github": "", "reviewers": "gpDD;2g78;qmev;yrBG", "pdf_size": 8443454, "rating": "6;7;7;8", "confidence": "5;3;5;5", "soundness": "3;3;2;4", "novelty": "3;3;2;4", "presentation": "2;3;2;4", "wc_summary": "142;96;71;73", "wc_strengths": "51;83;43;61", "wc_weaknesses": "292;46;144;72", "wc_questions": "6;39;50;21", "wc_limitations": "22;49;1;15", "wc_review": "513;313;309;242", "wc_reply_reviewers": "91;0;24;0", "wc_reply_authors": "216;0;0;0", "reply_reviewers": "2;0;1;0", "reply_authors": "2;1;1;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 4.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 95.5, 28.587584717845612 ], "wc_strengths_avg": [ 59.5, 14.99166435056495 ], "wc_weaknesses_avg": [ 138.5, 95.61772848169946 ], "wc_questions_avg": [ 29.0, 16.837458240482736 ], "wc_limitations_avg": [ 21.75, 17.455300054711177 ], "wc_review_avg": [ 344.25, 101.428238178527 ], "wc_reply_reviewers_avg": [ 28.75, 37.25167781456293 ], "wc_reply_authors_avg": [ 54.0, 93.53074360871938 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4075955599168149828&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "nvidia.com;liu.se;liu.se;liu.se", "author_num": 4, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "NVIDIA;Link\u00f6ping University", "aff_unique_dep": "NVIDIA Corporation;", "aff_unique_url": "https://www.nvidia.com;https://www.liu.se", "aff_unique_abbr": "NVIDIA;LiU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "United States;Sweden" }, { "title": "Learning the Latent Causal Structure for Modeling Label Noise", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93700", "id": "nJKfNiEBvq", "proceeding": "", "pdf": "https://openreview.net/pdf?id=nJKfNiEBvq", "openreview": "https://openreview.net/forum?id=nJKfNiEBvq", "poster": "/media/PosterPDFs/NeurIPS%202024/93700.png?t=1731593164.056119", "project": "", "author_site": "Yexiong Lin, Yu Yao, Tongliang Liu", "tldr": "", "abstract": "In label-noise learning, the noise transition matrix reveals how an instance transitions from its clean label to its noisy label. Accurately estimating an instance's noise transition matrix is crucial for estimating its clean label. However, when only a noisy dataset is available, noise transition matrices can be estimated only for some \"special\" instances. To leverage these estimated transition matrices to help estimate the transition matrices of other instances, it is essential to explore relations between the matrices of these \"special\" instances and those of others. Existing studies typically build the relation by explicitly defining the similarity between the estimated noise transition matrices of \"special\" instances and those of other instances. However, these similarity-based assumptions are hard to validate and may not align with real-world data. If these assumptions fail, both noise transition matrices and clean labels cannot be accurately estimated. In this paper, we found that by learning the latent causal structure governing the generating process of noisy data, we can estimate noise transition matrices without the need for similarity-based assumptions. Unlike previous generative label-noise learning methods, we consider causal relations between latent causal variables and model them with a learnable graphical model. Utilizing only noisy data, our method can effectively learn the latent causal structure. Experimental results on various noisy datasets demonstrate that our method achieves state-of-the-art performance in estimating noise transition matrices, which leads to improved classification accuracy. The code is available at: https://github.com/tmllab/2024_NeurIPS_CSGN.", "keywords": "Label noise;transition matrices", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Yexiong Lin;Yu Yao;Tongliang Liu", "authorids": "~Yexiong_Lin1;~Yu_Yao3;~Tongliang_Liu1", "gender": "M;M;M", "homepage": "https://yexionglin.github.io/;https://a5507203.github.io/;https://tongliang-liu.github.io/", "dblp": "287/6488;230/9625;150/6667", "google_scholar": "OfsQPbwAAAAJ;OkcaMKAAAAAJ;https://scholar.google.com.au/citations?user=EiLdZ_YAAAAJ", "orcid": ";;", "linkedin": ";yu-yao-150377134/;", "or_profile": "~Yexiong_Lin1;~Yu_Yao3;~Tongliang_Liu1", "aff": "University of Sydney;University of Sydney;Mohamed bin Zayed University of Artificial Intelligence", "aff_domain": "usyd.edu.au;sydney.edu.au;mbzuai.ac.ae", "position": "PhD student;Lecturer;Affiliated Associate Professor", "bibtex": "@inproceedings{\nlin2024learning,\ntitle={Learning the Latent Causal Structure for Modeling Label Noise},\nauthor={Yexiong Lin and Yu Yao and Tongliang Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=nJKfNiEBvq}\n}", "github": "", "reviewers": "pfn2;22VP;afzG;S6tG", "pdf_size": 1514517, "rating": "3;5;5;7", "confidence": "5;4;3;4", "soundness": "3;3;2;4", "novelty": "1;2;3;3", "presentation": "3;2;2;3", "wc_summary": "62;103;55;113", "wc_strengths": "7;50;47;49", "wc_weaknesses": "296;117;191;84", "wc_questions": "95;29;95;33", "wc_limitations": "1;10;1;1", "wc_review": "461;309;389;280", "wc_reply_reviewers": "396;23;274;73", "wc_reply_authors": "2118;0;1506;48", "reply_reviewers": "2;1;2;2", "reply_authors": "6;1;3;3", "rating_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 83.25, 25.1234452255259 ], "wc_strengths_avg": [ 38.25, 18.07449860992 ], "wc_weaknesses_avg": [ 172.0, 81.40331688573875 ], "wc_questions_avg": [ 63.0, 32.03123475609393 ], "wc_limitations_avg": [ 3.25, 3.897114317029974 ], "wc_review_avg": [ 359.75, 70.78620981518928 ], "wc_reply_reviewers_avg": [ 191.5, 150.88157607872475 ], "wc_reply_authors_avg": [ 918.0, 919.9684777208402 ], "reply_reviewers_avg": [ 1.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.25, 1.7853571071357126 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8706371000076287298&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 2, "email": "usyd.edu.au;sydney.edu.au;mbzuai.ac.ae", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Sydney;Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": ";", "aff_unique_url": "https://www.sydney.edu.au;https://mbzuai.ac.ae", "aff_unique_abbr": "USYD;MBZUAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Australia;United Arab Emirates" }, { "title": "Shared Autonomy with IDA: Interventional Diffusion Assistance", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93699", "id": "nJvkQSu9Z5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=nJvkQSu9Z5", "openreview": "https://openreview.net/forum?id=nJvkQSu9Z5", "poster": "", "project": "", "author_site": "Brandon McMahan, Zhenghao (Mark) Peng, Bolei Zhou, Jonathan Kao", "tldr": "", "abstract": "The rapid development of artificial intelligence (AI) has unearthed the potential to assist humans in controlling advanced technologies. Shared autonomy (SA) facilitates control by combining inputs from a human pilot and an AI copilot. In prior SA studies, the copilot is constantly active in determining the action played at each time step. This limits human autonomy that may have deleterious effects on performance. In general, the amount of helpful copilot assistance varies greatly depending on the task dynamics. We therefore hypothesized that human autonomy and SA performance improves through dynamic and selective copilot intervention. To address this, we develop a goal-agnostic intervention assistance (IA) that dynamically shares control by having the copilot intervene only when the expected value of the copilot\u2019s action exceeds that of the human\u2019s action. We implement IA with a diffusion copilot (termed IDA) trained on expert demonstrations with goal masking. We prove that IDA performance is lower bounded by human performance, so that IDA does not negatively impact human control. In experiments with simulated human pilots, we show that IDA achieves higher performance than both pilot-only and traditional SA control in variants of the Reacher environment and Lunar Lander. We then demonstrate with human-in the-loop experiments that IDA achieves better control in Lunar Lander and that human participants experience greater autonomy and prefer IDA over pilot-only and traditional SA control. We attribute the success of IDA to preserving human autonomy while simultaneously offering assistance to prevent the human from entering universally bad states.", "keywords": "Shared Autonomy;Diffusion Models;copilots;intervention reinforcement learning;reinforcement learning;lunar lander;Mujoco", "primary_area": "human-AI_interaction", "supplementary_material": "/attachment/c3f5bcf3c8ecbe9a038aa7366d4ff206b3a3faef.zip", "author": "Brandon J McMahan;Zhenghao Peng;Bolei Zhou;Jonathan Kao", "authorids": "~Brandon_J_McMahan1;~Zhenghao_Peng1;~Bolei_Zhou5;~Jonathan_Kao1", "gender": ";M;M;", "homepage": ";https://pengzhenghao.github.io;https://boleizhou.github.io/;http://seas.ucla.edu/~kao", "dblp": ";220/3963;46/8066;145/1310", "google_scholar": ";JZ8ws6IAAAAJ;9D4aG8AAAAAJ;", "orcid": ";;;0000-0002-9298-0143", "linkedin": "brandon-mcmahan-5b0267139/;;;", "or_profile": "~Brandon_J_McMahan1;~Zhenghao_Peng1;~Bolei_Zhou5;~Jonathan_Kao1", "aff": "University of California, Los Angeles;University of California, Los Angeles;University of California, Los Angeles;University of California, Los Angeles", "aff_domain": "ucla.edu;cs.ucla.edu;ucla.edu;ucla.edu", "position": "PhD student;PhD student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nmcmahan2024shared,\ntitle={Shared Autonomy with {IDA}: Interventional Diffusion Assistance},\nauthor={Brandon J McMahan and Zhenghao Peng and Bolei Zhou and Jonathan Kao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=nJvkQSu9Z5}\n}", "github": "", "reviewers": "zqQF;yfMr;Y2yZ", "pdf_size": 2304141, "rating": "6;6;6", "confidence": "3;3;4", "soundness": "3;3;3", "novelty": "2;2;3", "presentation": "3;3;3", "wc_summary": "123;127;66", "wc_strengths": "80;60;37", "wc_weaknesses": "302;166;159", "wc_questions": "77;78;85", "wc_limitations": "33;52;42", "wc_review": "615;483;389", "wc_reply_reviewers": "34;32;20", "wc_reply_authors": "41;38;37", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 105.33333333333333, 27.86076492528915 ], "wc_strengths_avg": [ 59.0, 17.568911937472585 ], "wc_weaknesses_avg": [ 209.0, 65.82299496883036 ], "wc_questions_avg": [ 80.0, 3.559026084010437 ], "wc_limitations_avg": [ 42.333333333333336, 7.760297817881877 ], "wc_review_avg": [ 495.6666666666667, 92.69783648440178 ], "wc_reply_reviewers_avg": [ 28.666666666666668, 6.182412330330469 ], "wc_reply_authors_avg": [ 38.666666666666664, 1.699673171197595 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15030312491087417859&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "ucla.edu;cs.ucla.edu;ucla.edu;ucla.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of California, Los Angeles", "aff_unique_dep": "", "aff_unique_url": "https://www.ucla.edu", "aff_unique_abbr": "UCLA", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Text-Aware Diffusion for Policy Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93698", "id": "nK6OnCpd3n", "proceeding": "", "pdf": "https://openreview.net/pdf?id=nK6OnCpd3n", "openreview": "https://openreview.net/forum?id=nK6OnCpd3n", "poster": "", "project": "", "author_site": "Calvin Luo, Mandy He, Zilai Zeng, Chen Sun", "tldr": "", "abstract": "Training an agent to achieve particular goals or perform desired behaviors is often accomplished through reinforcement learning, especially in the absence of expert demonstrations. However, supporting novel goals or behaviors through reinforcement learning requires the ad-hoc design of appropriate reward functions, which quickly becomes intractable. To address this challenge, we propose Text-Aware Diffusion for Policy Learning (TADPoLe), which uses a pretrained, frozen text-conditioned diffusion model to compute dense zero-shot reward signals for text-aligned policy learning. We hypothesize that large-scale pretrained generative models encode rich priors that can supervise a policy to behave not only in a text-aligned manner, but also in alignment with a notion of naturalness summarized from internet-scale training data. In our experiments, we demonstrate that TADPoLe is able to learn policies for novel goal-achievement and continuous locomotion behaviors specified by natural language, in both Humanoid and Dog environments. The behaviors are learned zero-shot without ground-truth rewards or expert demonstrations, and are qualitatively more natural according to human evaluation. We further show that TADPoLe performs competitively when applied to robotic manipulation tasks in the Meta-World environment, without having access to any in-domain demonstrations.", "keywords": "diffusion models;reinforcement learning", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/2716387f28f6530aaef3405e7cf114b39fc09cd6.zip", "author": "Calvin Luo;Mandy He;Zilai Zeng;Chen Sun", "authorids": "~Calvin_Luo2;~Mandy_He1;~Zilai_Zeng1;~Chen_Sun1", "gender": "M;F;M;M", "homepage": "https://calvinyluo.com/;;https://zilaiz.github.io;https://chensun.me", "dblp": ";;306/6661;01/6072-2", "google_scholar": "https://scholar.google.com/citations?hl=en;;nyqMsxQAAAAJ;vQa7heEAAAAJ", "orcid": ";0000-0001-6004-0416;;", "linkedin": ";he-mandy/;;", "or_profile": "~Calvin_Luo2;~Mandy_He1;~Zilai_Zeng1;~Chen_Sun1", "aff": "Brown University;Brown University;Brown University;Google", "aff_domain": "brown.edu;brown.edu;brown.edu;google.com", "position": "PhD student;Undergrad student;MS student;Research Scientist", "bibtex": "@inproceedings{\nluo2024textaware,\ntitle={Text-Aware Diffusion for Policy Learning},\nauthor={Calvin Luo and Mandy He and Zilai Zeng and Chen Sun},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=nK6OnCpd3n}\n}", "github": "", "reviewers": "TWnR;z3di;iiYV;KsL9", "pdf_size": 6896305, "rating": "4;4;6;7", "confidence": "4;4;3;4", "soundness": "3;2;2;4", "novelty": "2;2;3;4", "presentation": "1;3;3;4", "wc_summary": "66;52;90;50", "wc_strengths": "75;30;100;68", "wc_weaknesses": "326;162;113;95", "wc_questions": "179;61;98;131", "wc_limitations": "17;46;5;16", "wc_review": "663;351;406;360", "wc_reply_reviewers": "282;69;0;5", "wc_reply_authors": "864;700;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "3;2;1;1", "rating_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 64.5, 15.960889699512368 ], "wc_strengths_avg": [ 68.25, 25.083610186733488 ], "wc_weaknesses_avg": [ 174.0, 91.11805529092464 ], "wc_questions_avg": [ 117.25, 43.40722866067356 ], "wc_limitations_avg": [ 21.0, 15.182226450688976 ], "wc_review_avg": [ 445.0, 127.57938704978952 ], "wc_reply_reviewers_avg": [ 89.0, 114.70178725721757 ], "wc_reply_authors_avg": [ 391.0, 395.2758530444277 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17572084262700471599&as_sdt=5,38&sciodt=0,38&hl=en", "gs_version_total": 4, "email": "brown.edu;brown.edu;brown.edu;google.com", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Brown University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.brown.edu;https://www.google.com", "aff_unique_abbr": "Brown;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Active learning of neural population dynamics using two-photon holographic optogenetics", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93697", "id": "nLQeE8QGGe", "proceeding": "", "pdf": "https://openreview.net/pdf?id=nLQeE8QGGe", "openreview": "https://openreview.net/forum?id=nLQeE8QGGe", "poster": "/media/PosterPDFs/NeurIPS%202024/93697.png?t=1733525654.2949111", "project": "", "author_site": "Andrew Wagenmaker, Lu Mi, Marton Rozsa, Matthew Bull, Karel Svoboda, Kayvon Daie, Matthew Golub, Kevin Jamieson", "tldr": "", "abstract": "Recent advances in techniques for monitoring and perturbing neural populations have greatly enhanced our ability to study circuits in the brain. In particular, two-photon holographic optogenetics now enables precise photostimulation of experimenter-specified groups of individual neurons, while simultaneous two-photon calcium imaging enables the measurement of ongoing and induced activity across the neural population. Despite the enormous space of potential photostimulation patterns and the time-consuming nature of photostimulation experiments, very little algorithmic work has been done to determine the most effective photostimulation patterns for identifying the neural population dynamics. Here, we develop methods to efficiently select which neurons to stimulate such that the resulting neural responses will best inform a dynamical model of the neural population activity. Using neural population responses to photostimulation in mouse motor cortex, we demonstrate the efficacy of a low-rank linear dynamical systems model, and develop an active learning procedure which takes advantage of low-rank structure to determine informative photostimulation patterns. We demonstrate our approach on both real and synthetic data, obtaining in some cases as much as a two-fold reduction in the amount of data required to reach a given predictive power. Our active stimulation design method is based on a novel active learning procedure for low-rank regression, which may be of independent interest.", "keywords": "active learning;experiment design;neural system identification;neural behavior", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "", "author": "Andrew Wagenmaker;Lu Mi;Marton Rozsa;Matthew Storm Bull;Karel Svoboda;Kayvon Daie;Matthew D. Golub;Kevin Jamieson", "authorids": "~Andrew_Wagenmaker1;~Lu_Mi1;~Marton_Rozsa1;~Matthew_Storm_Bull1;~Karel_Svoboda1;~Kayvon_Daie1;~Matthew_D._Golub2;~Kevin_Jamieson1", "gender": "M;F;M;;M;M;;M", "homepage": "https://wagenmaker.github.io;https://lumimim.github.io;;;;;https://homes.cs.washington.edu/~mgolub/;", "dblp": "195/1036;185/3258;;;;;137/3298;85/10260", "google_scholar": "ym8AZSIAAAAJ;vokCG-MAAAAJ;https://scholar.google.co.uk/citations?hl=en;79lMvCMRCK8C;;_9DJ3cYAAAAJ;unFTH2EAAAAJ;", "orcid": ";;;;0000-0002-6670-7362;;0000-0003-4508-0537;", "linkedin": ";lu-mi-698899172/;;;;;;", "or_profile": "~Andrew_Wagenmaker1;~Lu_Mi1;~Marton_Rozsa1;~Matthew_Storm_Bull1;~Karel_Svoboda1;~Kayvon_Daie1;~Matthew_D._Golub2;~Kevin_Jamieson1", "aff": "University of Washington, Seattle;Allen Institute;Allen Institute;Allen Institute + University of Washington;;allen institute;University of Washington;University of Washington", "aff_domain": "uw.edu;allen.org;alleninstitute.org;alleninstitute.org;;alleninstitute.org;cs.washington.edu;washington.edu", "position": "PhD student;Postdoc;Postdoc;Postdoc;;Researcher;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nwagenmaker2024active,\ntitle={Active design of two-photon holographic stimulation for identifying neural population dynamics},\nauthor={Andrew Wagenmaker and Lu Mi and Marton Rozsa and Matthew Storm Bull and Karel Svoboda and Kayvon Daie and Matthew D. Golub and Kevin Jamieson},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=nLQeE8QGGe}\n}", "github": "", "reviewers": "d6y7;YQWs;cE26", "pdf_size": 6444889, "rating": "6;6;7", "confidence": "3;3;2", "soundness": "4;3;3", "novelty": "3;3;3", "presentation": "2;3;3", "wc_summary": "44;73;84", "wc_strengths": "47;67;161", "wc_weaknesses": "108;122;130", "wc_questions": "183;191;90", "wc_limitations": "8;49;131", "wc_review": "390;502;596", "wc_reply_reviewers": "0;39;22", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 67.0, 16.87206764645835 ], "wc_strengths_avg": [ 91.66666666666667, 49.701330185642135 ], "wc_weaknesses_avg": [ 120.0, 9.092121131323903 ], "wc_questions_avg": [ 154.66666666666666, 45.84272631023983 ], "wc_limitations_avg": [ 62.666666666666664, 51.13598428591053 ], "wc_review_avg": [ 496.0, 84.20609637470832 ], "wc_reply_reviewers_avg": [ 20.333333333333332, 15.96524001977073 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.9999999999999997, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:DV9_wEz9TYYJ:scholar.google.com/&scioq=Active+learning+of+neural+population+dynamics+using+two-photon+holographic+optogenetics&hl=en&as_sdt=0,5", "gs_version_total": 7, "email": "uw.edu;allen.org;alleninstitute.org;alleninstitute.org;;alleninstitute.org;cs.washington.edu;washington.edu", "author_num": 8, "aff_unique_index": "0;1;1;1+0;2;0;0", "aff_unique_norm": "University of Washington;Allen Institute for Artificial Intelligence;Allen Institute", "aff_unique_dep": ";;", "aff_unique_url": "https://www.washington.edu;https://allenai.org;https://www.alleninstitute.org", "aff_unique_abbr": "UW;AI2;AI", "aff_campus_unique_index": "0;", "aff_campus_unique": "Seattle;", "aff_country_unique_index": "0;0;0;0+0;0;0;0", "aff_country_unique": "United States" }, { "title": "To Err Like Human: Affective Bias-Inspired Measures for Visual Emotion Recognition Evaluation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93696", "id": "nLSLbJgL7f", "proceeding": "", "pdf": "https://openreview.net/pdf?id=nLSLbJgL7f", "openreview": "https://openreview.net/forum?id=nLSLbJgL7f", "poster": "/media/PosterPDFs/NeurIPS%202024/93696.png?t=1731039769.368892", "project": "", "author_site": "Chenxi Zhao, Jinglei Shi, Liqiang Nie, Jufeng Yang", "tldr": "", "abstract": "Accuracy is a commonly adopted performance metric in various classification tasks, which measures the proportion of correctly classified samples among all samples. It assumes equal importance for all classes, hence equal severity for misclassifications. However, in the task of emotional classification, due to the psychological similarities between emotions, misclassifying a certain emotion into one class may be more severe than another, e.g., misclassifying 'excitement' as 'anger' apparently is more severe than as 'awe'. Albeit high meaningful for many applications, metrics capable of measuring these cases of misclassifications in visual emotion recognition tasks have yet to be explored. In this paper, based on Mikel's emotion wheel from psychology, we propose a novel approach for evaluating the performance in visual emotion recognition, which takes into account the distance on the emotion wheel between different emotions to mimic the psychological nuances of emotions. Experimental results in semi-supervised learning on emotion recognition and user study have shown that our proposed metrics is more effective than the accuracy to assess the performance and conforms to the cognitive laws of human emotions. The code is available at https://github.com/ZhaoChenxi-nku/ECC.", "keywords": "visual emotion recognition;evaluation measure", "primary_area": "evaluation", "supplementary_material": "/attachment/f025452dacf49776acfd5917e7c0423d8e07c6e6.zip", "author": "Chenxi Zhao;Jinglei Shi;Liqiang Nie;Jufeng Yang", "authorids": "~Chenxi_Zhao3;~Jinglei_Shi1;~Liqiang_Nie2;~Jufeng_Yang1", "gender": "M;M;M;", "homepage": "https://cv.nankai.edu.cn/;;https://liqiangnie.github.io/index.html;http://cv.nankai.edu.cn", "dblp": ";;92/8277;71/6229", "google_scholar": ";9tYW9LcAAAAJ;yywVMhUAAAAJ;c5vDJv0AAAAJ", "orcid": "0009-0005-9898-8049;0000-0003-2926-0415;0000-0003-1476-0273;", "linkedin": ";;;", "or_profile": "~Chenxi_Zhao3;~Jinglei_Shi1;~Liqiang_Nie2;~Jufeng_Yang1", "aff": "Nankai University;Nankai University;Shandong University;Nankai University", "aff_domain": "nankai.edu.cn;nankai.edu.cn;sdu.edu.cn;nankai.edu.cn", "position": "MS student;Lecturer;Full Professor;Full Professor", "bibtex": "@inproceedings{\nzhao2024to,\ntitle={To Err Like Human: Affective Bias-Inspired Measures for Visual Emotion Recognition Evaluation},\nauthor={Chenxi Zhao and Jinglei Shi and Liqiang Nie and Jufeng Yang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=nLSLbJgL7f}\n}", "github": "", "reviewers": "jkV6;MhCQ;133Y;Ro51", "pdf_size": 5765971, "rating": "4;5;5;5", "confidence": "4;3;3;3", "soundness": "2;2;3;2", "novelty": "2;2;3;2", "presentation": "3;2;2;2", "wc_summary": "67;76;45;82", "wc_strengths": "40;56;48;75", "wc_weaknesses": "294;297;134;86", "wc_questions": "5;24;25;99", "wc_limitations": "23;30;4;62", "wc_review": "429;483;256;404", "wc_reply_reviewers": "18;228;26;28", "wc_reply_authors": "18;695;20;35", "reply_reviewers": "1;2;1;1", "reply_authors": "2;4;2;2", "rating_avg": [ 4.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 67.5, 14.044571905188139 ], "wc_strengths_avg": [ 54.75, 12.987975207860538 ], "wc_weaknesses_avg": [ 202.75, 94.29574486688145 ], "wc_questions_avg": [ 38.25, 35.967867604293694 ], "wc_limitations_avg": [ 29.75, 20.90902915010642 ], "wc_review_avg": [ 393.0, 84.0922112921286 ], "wc_reply_reviewers_avg": [ 75.0, 88.41379982785493 ], "wc_reply_authors_avg": [ 192.0, 290.4814968289719 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3836876655552724774&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "nankai.edu.cn;nankai.edu.cn;sdu.edu.cn;nankai.edu.cn", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Nankai University;Shandong University", "aff_unique_dep": ";", "aff_unique_url": "http://www.nankai.edu.cn;http://www.sdu.edu.cn", "aff_unique_abbr": "NKU;SDU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "nMFVdphOc9", "title": "Rule Based Learning with Dynamic (Graph) Neural Networks", "track": "main", "status": "Reject", "tldr": "", "abstract": "A common problem of classical neural network architectures is that additional information or expert knowledge cannot be naturally integrated into the learning process.\n To overcome this limitation, we propose a two-step approach consisting of (1) generating formal rules from knowledge and (2) using these rules to define rule based layers -- a\n new type of dynamic neural network layer.\n The focus of this work is on the second step, i.e., rule based layers that are designed to dynamically arrange learnable parameters in the weight matrices and bias vectors for each input sample following a formal rule.\n Indeed, we prove that our approach generalizes classical feed-forward layers such as fully connected and convolutional layers by choosing appropriate rules.\n As a concrete application we present rule based graph neural networks (RuleGNNs) that are by definition permutation equivariant and able to handle graphs of arbitrary sizes.\n Our experiments show that RuleGNNs are comparable to state-of-the-art graph classifiers using simple rules based on the Weisfeiler-Leman labeling and pattern counting.\n Moreover, we introduce new synthetic benchmark graph datasets to show how to integrate expert knowledge into RuleGNNs making them more powerful than ordinary graph neural networks.", "keywords": "graph neural networks;dynamic neural networks;rule based learning", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Florian Seiffarth", "authorids": "~Florian_Seiffarth1", "gender": "M", "homepage": "https://mlai.cs.uni-bonn.de/people/florian-seiffarth", "dblp": "", "google_scholar": "", "orcid": "", "linkedin": "", "or_profile": "~Florian_Seiffarth1", "aff": "Rheinische Friedrich-Wilhelms-Universit\u00e4t Bonn, Rheinische Friedrich-Wilhelms Universit\u00e4t Bonn", "aff_domain": "cs.uni-bonn.de", "position": "Postdoc", "bibtex": "@misc{\nanonymous2024rule,\ntitle={Rule Based Learning with Dynamic (Graph) Neural Networks},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=nMFVdphOc9}\n}", "github": "", "project": "", "reviewers": "kGau;hyNs;9LgA;yEHT", "site": "https://openreview.net/forum?id=nMFVdphOc9", "pdf_size": 6525036, "rating": "3;3;3;4", "confidence": "3;4;3;3", "soundness": "2;1;3;3", "novelty": "2;2;1;3", "presentation": "1;1;1;3", "wc_summary": "49;86;140;65", "wc_strengths": "12;83;27;74", "wc_weaknesses": "31;816;144;106", "wc_questions": "315;193;33;46", "wc_limitations": "23;44;33;63", "wc_review": "430;1222;377;354", "wc_reply_reviewers": "158;215;330;53", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 3.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "novelty_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 1.5, 0.8660254037844386 ], "wc_summary_avg": [ 85.0, 34.35840508521896 ], "wc_strengths_avg": [ 49.0, 30.14133374620307 ], "wc_weaknesses_avg": [ 274.25, 315.4111404183435 ], "wc_questions_avg": [ 146.75, 115.6900492695893 ], "wc_limitations_avg": [ 40.75, 14.8387162517517 ], "wc_review_avg": [ 595.75, 362.61437850697536 ], "wc_reply_reviewers_avg": [ 189.0, 100.01749846901791 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4708094578787220429&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0", "aff_unique_norm": "Rheinische Friedrich-Wilhelms-Universit\u00e4t Bonn", "aff_unique_dep": "", "aff_unique_url": "https://www.uni-bonn.de", "aff_unique_abbr": "Uni Bonn", "aff_country_unique_index": "0", "aff_country_unique": "Germany" }, { "title": "UGC: Universal Graph Coarsening", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93695", "id": "nN6NSd1Qds", "proceeding": "", "pdf": "https://openreview.net/pdf?id=nN6NSd1Qds", "openreview": "https://openreview.net/forum?id=nN6NSd1Qds", "poster": "/media/PosterPDFs/NeurIPS%202024/93695.png?t=1730033773.7312088", "project": "", "author_site": "Mohit Kataria, Sandeep Kumar, Jayadeva Dr", "tldr": "", "abstract": "In the era of big data, graphs have emerged as a natural representation of intricate relationships. However, graph sizes often become unwieldy, leading to storage, computation, and analysis challenges. A crucial demand arises for methods that can effectively downsize large graphs while retaining vital insights. Graph coarsening seeks to simplify large graphs while maintaining the basic statistics of the graphs, such as spectral properties and $\\epsilon$-similarity in the coarsened graph. This ensures that downstream processes are more efficient and effective. Most published methods are suitable for homophilic datasets, limiting their universal use. We propose **U**niversal **G**raph **C**oarsening (UGC), a framework equally suitable for homophilic and heterophilic datasets. UGC integrates node attributes and adjacency information, leveraging the dataset's heterophily factor. Results on benchmark datasets demonstrate that UGC preserves spectral similarity while coarsening. In comparison to existing methods, UGC is 4x to 15x faster, has lower eigen-error, and yields superior performance on downstream processing tasks even at 70% coarsening ratios.", "keywords": "Graph Coarsening;Graph Neural Networks;Locality sensitive hashing;Heterophilic Graph;Scaling Graph Learning", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Mohit Kataria;Sandeep Kumar;Jayadeva Jayadeva", "authorids": "~Mohit_Kataria1;~Sandeep_Kumar8;~Jayadeva_Jayadeva1", "gender": "M;M;M", "homepage": ";https://sites.google.com/view/sandeepkr/home;", "dblp": ";;58/4288", "google_scholar": ";lycMMW8AAAAJ;", "orcid": ";;", "linkedin": "passenger/;sandeep-kumar-84463332/;", "or_profile": "~Mohit_Kataria1;~Sandeep_Kumar8;~Jayadeva_Jayadeva1", "aff": "Indian Institute of Technology, Delhi;Indian Institute of Technology Delhi;Indian Institute of Technology Delhi", "aff_domain": "iitd.ac.in;iitd.ac.in;iitd.ac.in", "position": "PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nkataria2024ugc,\ntitle={{UGC}: Universal Graph Coarsening},\nauthor={Mohit Kataria and Sandeep Kumar and Jayadeva Jayadeva},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=nN6NSd1Qds}\n}", "github": "", "reviewers": "QBq8;mriD;C55f;FD1i", "pdf_size": 8667049, "rating": "5;6;6;7", "confidence": "4;4;3;4", "soundness": "2;3;3;3", "novelty": "2;3;2;3", "presentation": "1;3;3;3", "wc_summary": "84;50;75;51", "wc_strengths": "35;68;23;76", "wc_weaknesses": "37;208;16;127", "wc_questions": "463;5;18;2", "wc_limitations": "7;16;7;1", "wc_review": "626;347;139;257", "wc_reply_reviewers": "74;14;15;0", "wc_reply_authors": "0;0;86;103", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 65.0, 14.849242404917497 ], "wc_strengths_avg": [ 50.5, 22.096379793984354 ], "wc_weaknesses_avg": [ 97.0, 76.45586962424795 ], "wc_questions_avg": [ 122.0, 196.9682715566139 ], "wc_limitations_avg": [ 7.75, 5.356071321407137 ], "wc_review_avg": [ 342.25, 179.6627048109874 ], "wc_reply_reviewers_avg": [ 25.75, 28.481353549296074 ], "wc_reply_authors_avg": [ 47.25, 47.63074112377426 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6309238831352609926&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 3, "email": "iitd.ac.in;iitd.ac.in;iitd.ac.in", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Indian Institute of Technology Delhi", "aff_unique_dep": "", "aff_unique_url": "https://www.iitdelhi.ac.in", "aff_unique_abbr": "IIT Delhi", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Delhi", "aff_country_unique_index": "0;0;0", "aff_country_unique": "India" }, { "title": "On conditional diffusion models for PDE simulations", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93694", "id": "nQl8EjyMzh", "proceeding": "", "pdf": "https://openreview.net/pdf?id=nQl8EjyMzh", "openreview": "https://openreview.net/forum?id=nQl8EjyMzh", "poster": "/media/PosterPDFs/NeurIPS%202024/93694.png?t=1731418541.2241497", "project": "", "author_site": "Aliaksandra Shysheya, Cristiana Diaconu, Federico Bergamin, Paris Perdikaris, Jos\u00e9 Miguel Hern\u00e1ndez-Lobato, Richard Turner, Emile Mathieu", "tldr": "", "abstract": "Modelling partial differential equations (PDEs) is of crucial importance in science and engineering, and it includes tasks ranging from forecasting to inverse problems, such as data assimilation. However, most previous numerical and machine learning approaches that target forecasting cannot be applied out-of-the-box for data assimilation. Recently, diffusion models have emerged as a powerful tool for conditional generation, being able to flexibly incorporate observations without retraining. In this work, we perform a comparative study of score-based diffusion models for forecasting and assimilation of sparse observations. In particular, we focus on diffusion models that are either trained in a conditional manner, or conditioned after unconditional training. We address the shortcomings of existing models by proposing 1) an autoregressive sampling approach, that significantly improves performance in forecasting, 2) a new training strategy for conditional score-based models that achieves stable performance over a range of history lengths, and 3) a hybrid model which employs flexible pre-training conditioning on initial conditions and flexible post-training conditioning to handle data assimilation. We empirically show that these modifications are crucial for successfully tackling the combination of forecasting and data assimilation, a task commonly encountered in real-world scenarios.", "keywords": "neural PDE solver;PDE;partial differential equation;forecasting;data-assimilation;diffusion;denoising;autoregressive;neural surrogate;reconstruction guidance;conditional score", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Aliaksandra Shysheya;Cristiana Diaconu;Federico Bergamin;Paris Perdikaris;Jos\u00e9 Miguel Hern\u00e1ndez-Lobato;Richard E. Turner;Emile Mathieu", "authorids": "~Aliaksandra_Shysheya1;~Cristiana_Diaconu1;~Federico_Bergamin1;~Paris_Perdikaris1;~Jos\u00e9_Miguel_Hern\u00e1ndez-Lobato1;~Richard_E_Turner1;~Emile_Mathieu1", "gender": "F;F;;M;;M;M", "homepage": ";https://cddcam.github.io/;;https://directory.seas.upenn.edu/paris-perdikaris/;;https://rich-turner-group.github.io/;http://emilemathieu.fr", "dblp": "241/6203;380/4379.html;;180/9141;;40/5352;223/6084.html", "google_scholar": ";Ws2IoZIAAAAJ;;h_zkt1oAAAAJ;;https://scholar.google.co.uk/citations?user=DgLEyZgAAAAJ;g9BjTqgAAAAJ", "orcid": ";0009-0007-5165-2630;;0000-0002-2816-3229;;;", "linkedin": ";https://linkedin.com/in/cristiana-diaconu-99a3ba161;;paris-perdikaris-093068102/;;;", "or_profile": "~Aliaksandra_Shysheya1;~Cristiana_Diaconu1;~Federico_Bergamin1;~Paris_Perdikaris1;~Jos\u00e9_Miguel_Hern\u00e1ndez-Lobato1;~Richard_E_Turner1;~Emile_Mathieu1", "aff": "University of Cambridge;University of Cambridge;;University of Pennsylvania;;Microsoft Research;University of Cambridge", "aff_domain": "cam.ac.uk;cam.ac.uk;;upenn.edu;;research.microsoft.com;cam.ac.uk", "position": "PhD student;PhD student;;Associate Professor;;Researcher;Postdoc", "bibtex": "@inproceedings{\nshysheya2024on,\ntitle={On conditional diffusion models for {PDE} simulations},\nauthor={Aliaksandra Shysheya and Cristiana Diaconu and Federico Bergamin and Paris Perdikaris and Jos{\\'e} Miguel Hern{\\'a}ndez-Lobato and Richard E. Turner and Emile Mathieu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=nQl8EjyMzh}\n}", "github": "", "reviewers": "4n49;ofj7;D3JV;iiGR;D4vs", "pdf_size": 15446384, "rating": "6;6;7;7;7", "confidence": "3;3;4;3;2", "soundness": "3;3;3;3;3", "novelty": "3;3;3;2;3", "presentation": "3;2;3;2;3", "wc_summary": "54;92;83;42;156", "wc_strengths": "95;73;88;70;87", "wc_weaknesses": "426;157;113;158;190", "wc_questions": "248;115;258;182;34", "wc_limitations": "61;1;10;47;10", "wc_review": "884;438;552;499;477", "wc_reply_reviewers": "100;264;84;218;25", "wc_reply_authors": "43;267;49;207;25", "reply_reviewers": "1;2;1;1;1", "reply_authors": "2;3;2;2;2", "rating_avg": [ 6.6, 0.48989794855663565 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 85.4, 39.75726348731764 ], "wc_strengths_avg": [ 82.6, 9.520504188329523 ], "wc_weaknesses_avg": [ 208.8, 111.32906179430418 ], "wc_questions_avg": [ 167.4, 84.2130631196847 ], "wc_limitations_avg": [ 25.8, 23.676148335402868 ], "wc_review_avg": [ 570.0, 161.26623949233763 ], "wc_reply_reviewers_avg": [ 138.2, 88.77477119091888 ], "wc_reply_authors_avg": [ 118.2, 99.15321477390434 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6253238347830208429&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "cam.ac.uk;cam.ac.uk;;upenn.edu;;research.microsoft.com;cam.ac.uk", "author_num": 7, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "University of Cambridge;University of Pennsylvania;Microsoft", "aff_unique_dep": ";;Microsoft Research", "aff_unique_url": "https://www.cam.ac.uk;https://www.upenn.edu;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "Cambridge;UPenn;MSR", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;0;1;1;0", "aff_country_unique": "United Kingdom;United States" }, { "title": "Towards a \"Universal Translator\" for Neural Dynamics at Single-Cell, Single-Spike Resolution", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93693", "id": "nRRJsDahEg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=nRRJsDahEg", "openreview": "https://openreview.net/forum?id=nRRJsDahEg", "poster": "/media/PosterPDFs/NeurIPS%202024/93693.png?t=1732939832.7037408", "project": "", "author_site": "Yizi Zhang, Yanchen Wang, Donato Jim\u00e9nez-Benet\u00f3, Zixuan Wang, Mehdi Azabou, Blake Richards, Renee Tung, Olivier Winter, Brain Laboratory International, Eva Dyer, Liam Paninski, Cole Hurwitz", "tldr": "", "abstract": "Neuroscience research has made immense progress over the last decade, but our understanding of the brain remains fragmented and piecemeal: the dream of probing an arbitrary brain region and automatically reading out the information encoded in its neural activity remains out of reach. In this work, we build towards a first foundation model for neural spiking data that can solve a diverse set of tasks across multiple brain areas. We introduce a novel self-supervised modeling approach for population activity in which the model alternates between masking out and reconstructing neural activity across different time steps, neurons, and brain regions. To evaluate our approach, we design unsupervised and supervised prediction tasks using the International Brain Laboratory repeated site dataset, which is comprised of Neuropixels recordings targeting the same brain locations across 48 animals and experimental sessions. The prediction tasks include single-neuron and region-level activity prediction, forward prediction, and behavior decoding. We demonstrate that our multi-task-masking (MtM) approach significantly improves the performance of current state-of-the-art population models and enables multi-task learning. We also show that by training on multiple animals, we can improve the generalization ability of the model to unseen animals, paving the way for a foundation model of the brain at single-cell, single-spike resolution.", "keywords": "neural dynamics;transformer;self-supervised learning;pretraining;multi-task learning;masked modeling;brain-computer interfaces", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "/attachment/56cd6abe93c1d3c010184b45cf190771c8989c61.zip", "author": "Yizi Zhang;Yanchen Wang;Donato M. Jim\u00e9nez-Benet\u00f3;Zixuan Wang;Mehdi Azabou;Blake Aaron Richards;Renee Tung;Olivier Winter;International Brain Laboratory;Eva L Dyer;Liam Paninski;Cole Lincoln Hurwitz", "authorids": "~Yizi_Zhang1;~Yanchen_Wang1;~Donato_M._Jim\u00e9nez-Benet\u00f31;~Zixuan_Wang13;~Mehdi_Azabou2;~Blake_Aaron_Richards1;~Renee_Tung1;~Olivier_Winter1;~International_Brain_Laboratory1;~Eva_L_Dyer1;~Liam_Paninski1;~Cole_Lincoln_Hurwitz1", "gender": ";M;;M;M;M;F;M;;;;", "homepage": ";https://ppwangyc.github.io/;;https://github.com/wdk0082;https://www.mehai.dev;http://linclab.org;;;https://www.internationalbrainlab.com/;;;https://colehurwitz.github.io/", "dblp": ";;;;281/8371;70/10850;;;;;94/2691;", "google_scholar": ";K87vA9sAAAAJ;;;jXxyYCoAAAAJ;https://scholar.google.ca/citations?user=1CPY1LsAAAAJ;ME98XEkAAAAJ;;83-cG94AAAAJ;;;https://scholar.google.co.uk/citations?hl=en", "orcid": ";;;;;0000-0001-9662-2151;0000-0003-4603-8170;0000-0001-9278-2721;;;;", "linkedin": ";;;;;;renee-tung-2021;;;;;", "or_profile": "~Yizi_Zhang1;~Yanchen_Wang1;~Donato_M._Jim\u00e9nez-Benet\u00f31;~Zixuan_Wang13;~Mehdi_Azabou2;~Blake_Aaron_Richards1;~Renee_Tung1;~Olivier_Winter1;~International_Brain_Laboratory1;~Eva_L_Dyer1;~Liam_Paninski1;~Cole_Lincoln_Hurwitz1", "aff": ";Stanford University;;Zhejiang University;Georgia Institute of Technology;Mila - Quebec Artificial Intelligence Institute;Columbia University;;University College London, University of London;;Columbia University;Columbia University", "aff_domain": ";stanford.edu;;zju.edu.cn;gatech.edu;mila.quebec;columbia.edu;;ucl.ac.uk;;columbia.edu;columbia.edu", "position": ";Researcher;;Undergrad student;PhD student;Associate Professor;Medical student;;Emeritus;;Full Professor;Postdoc", "bibtex": "@inproceedings{\nzhang2024towards,\ntitle={Towards a ''Universal Translator'' for Neural Dynamics at Single-Cell, Single-Spike Resolution},\nauthor={Yizi Zhang and Yanchen Wang and Donato M. Jim{\\'e}nez-Benet{\\'o} and Zixuan Wang and Mehdi Azabou and Blake Aaron Richards and Renee Tung and Olivier Winter and International Brain Laboratory and Eva L Dyer and Liam Paninski and Cole Lincoln Hurwitz},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=nRRJsDahEg}\n}", "github": "", "reviewers": "P8rK;xe9o;JcgB;BBJQ", "pdf_size": 17752834, "rating": "6;6;6;6", "confidence": "4;4;4;4", "soundness": "4;3;3;3", "novelty": "3;2;3;2", "presentation": "3;3;4;3", "wc_summary": "63;182;82;91", "wc_strengths": "37;47;15;88", "wc_weaknesses": "98;149;103;174", "wc_questions": "79;72;31;308", "wc_limitations": "2;40;14;9", "wc_review": "279;490;245;670", "wc_reply_reviewers": "50;508;29;16", "wc_reply_authors": "231;1000;34;22", "reply_reviewers": "1;2;1;1", "reply_authors": "2;3;2;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 104.5, 45.87210481327405 ], "wc_strengths_avg": [ 46.75, 26.479945241635225 ], "wc_weaknesses_avg": [ 131.0, 31.804087787578503 ], "wc_questions_avg": [ 122.5, 108.65656906050366 ], "wc_limitations_avg": [ 16.25, 14.359230480774379 ], "wc_review_avg": [ 421.0, 171.68430330114631 ], "wc_reply_reviewers_avg": [ 150.75, 206.61482884827024 ], "wc_reply_authors_avg": [ 321.75, 400.28388363760035 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7078770171613776060&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 8, "email": ";stanford.edu;;zju.edu.cn;gatech.edu;mila.quebec;columbia.edu;;ucl.ac.uk;;columbia.edu;columbia.edu", "author_num": 12, "aff_unique_index": "0;1;2;3;4;5;4;4", "aff_unique_norm": "Stanford University;Zhejiang University;Georgia Institute of Technology;Quebec Artificial Intelligence Institute;Columbia University;University College London", "aff_unique_dep": ";;;Artificial Intelligence;;", "aff_unique_url": "https://www.stanford.edu;https://www.zju.edu.cn;https://www.gatech.edu;https://mila.quebec;https://www.columbia.edu;https://www.ucl.ac.uk", "aff_unique_abbr": "Stanford;ZJU;Georgia Tech;Mila;Columbia;UCL", "aff_campus_unique_index": "0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;1;0;2;0;3;0;0", "aff_country_unique": "United States;China;Canada;United Kingdom" }, { "title": "Fight Back Against Jailbreaking via Prompt Adversarial Tuning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93692", "id": "nRdST1qifJ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=nRdST1qifJ", "openreview": "https://openreview.net/forum?id=nRdST1qifJ", "poster": "/media/PosterPDFs/NeurIPS%202024/93692.png?t=1730202897.0245943", "project": "", "author_site": "Yichuan Mo, Yuji Wang, Zeming Wei, Yisen Wang", "tldr": "", "abstract": "While Large Language Models (LLMs) have achieved tremendous success in various applications, they are also susceptible to jailbreaking attacks. Several primary defense strategies have been proposed to protect LLMs from producing harmful information, mostly focusing on model fine-tuning or heuristical defense designs. However, how to achieve intrinsic robustness through prompt optimization remains an open problem. In this paper, motivated by adversarial training paradigms for achieving reliable robustness, we propose an approach named **Prompt Adversarial Tuning (PAT)** that trains a prompt control attached to the user prompt as a guard prefix. To achieve our defense goal whilst maintaining natural performance, we optimize the control prompt with both adversarial and benign prompts. Comprehensive experiments show that our method is effective against both grey-box and black-box attacks, reducing the success rate of advanced attacks to nearly 0, while maintaining the model's utility on the benign task and incurring only negligible computational overhead, charting a new perspective for future explorations in LLM security. Our code is available at https://github.com/PKU-ML/PAT.", "keywords": "Large Language Model;Jailbreak Defense;Prompt Tuning", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Yichuan Mo;Yuji Wang;Zeming Wei;Yisen Wang", "authorids": "~Yichuan_Mo1;~Yuji_Wang3;~Zeming_Wei1;~Yisen_Wang1", "gender": "M;M;M;M", "homepage": "https://www.linkedin.com/in/%E6%98%93%E5%B7%9D-%E8%8E%AB-446841212/;https://flower-nutria-c3c.notion.site/CV-a300ba143dbf48f29ec51bb8c04f8251?pvs=4;https://weizeming.github.io;https://yisenwang.github.io/", "dblp": "321/6790;;276/6608;172/1346-1", "google_scholar": "xvSYG1gAAAAJ;;Kyn1zdQAAAAJ;uMWPDboAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Yichuan_Mo1;~Yuji_Wang3;~Zeming_Wei1;~Yisen_Wang1", "aff": "Peking University;Wuhan University;University of California, Berkeley;Peking University", "aff_domain": "stu.pku.edu.cn;whu.edu.cn;berkeley.edu;pku.edu.cn", "position": "PhD student;Undergrad student;Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\nmo2024fight,\ntitle={Fight Back Against Jailbreaking via Prompt Adversarial Tuning},\nauthor={Yichuan Mo and Yuji Wang and Zeming Wei and Yisen Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=nRdST1qifJ}\n}", "github": "", "reviewers": "jHn4;1UaM;LA5t;DPyJ;zrTY", "pdf_size": 5001495, "rating": "4;4;5;5;6", "confidence": "4;3;4;4;3", "soundness": "3;2;3;2;4", "novelty": "3;2;2;3;3", "presentation": "2;3;2;3;3", "wc_summary": "115;94;61;38;96", "wc_strengths": "50;41;45;49;46", "wc_weaknesses": "108;141;164;88;63", "wc_questions": "68;12;72;46;13", "wc_limitations": "89;21;11;1;1", "wc_review": "430;309;353;222;219", "wc_reply_reviewers": "15;0;51;71;29", "wc_reply_authors": "1290;132;237;127;44", "reply_reviewers": "1;0;1;2;1", "reply_authors": "4;3;2;3;2", "rating_avg": [ 4.8, 0.7483314773547882 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 80.8, 27.56374430297887 ], "wc_strengths_avg": [ 46.2, 3.1874754901018454 ], "wc_weaknesses_avg": [ 112.8, 36.15190174804086 ], "wc_questions_avg": [ 42.2, 25.81782330096788 ], "wc_limitations_avg": [ 24.6, 33.04300228490141 ], "wc_review_avg": [ 306.6, 80.2710408553421 ], "wc_reply_reviewers_avg": [ 33.2, 25.285569006846572 ], "wc_reply_authors_avg": [ 366.0, 466.0424873335048 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.8, 0.7483314773547882 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.21821789023599233, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16358571454265026813&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "stu.pku.edu.cn;whu.edu.cn;berkeley.edu;pku.edu.cn", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Peking University;Wuhan University;University of California, Berkeley", "aff_unique_dep": ";;", "aff_unique_url": "http://www.pku.edu.cn;http://www.whu.edu.cn/;https://www.berkeley.edu", "aff_unique_abbr": "Peking U;WHU;UC Berkeley", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "China;United States" }, { "title": "InternLM-XComposer2-4KHD: A Pioneering Large Vision-Language Model Handling Resolutions from 336 Pixels to 4K HD", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93691", "id": "nRp0XhTf61", "proceeding": "", "pdf": "https://openreview.net/pdf?id=nRp0XhTf61", "openreview": "https://openreview.net/forum?id=nRp0XhTf61", "poster": "/media/PosterPDFs/NeurIPS%202024/93691.png?t=1731747903.1503663", "project": "", "author_site": "Xiaoyi Dong, Pan Zhang, Yuhang Zang, Yuhang Cao, Bin Wang, Linke Ouyang, Songyang Zhang, Haodong Duan, Wenwei Zhang, Yining Li, Hang Yan, Yang Gao, Zhe Chen, xinyue zhang, Wei Li, Li Jingwen, Wenhai Wang, Kai Chen, Conghui He, Xingcheng ZHANG, Jifeng Dai, Yu Qiao, Dahua Lin, Jiaqi Wang", "tldr": "", "abstract": "The Large Vision-Language Model (LVLM) field has seen significant advancements, yet its progression has been hindered by challenges in comprehending fine-grained visual content due to limited resolution. Recent efforts have aimed to enhance the high-resolution understanding capabilities of LVLMs, yet they remain capped at approximately 1500 $\\times$ 1500 pixels and constrained to a relatively narrow resolution range. This paper represents InternLM-XComposer2-4KHD, a groundbreaking exploration into elevating LVLM resolution capabilities up to 4K HD (3840 \u00d7 1600) and beyond. Concurrently, considering the ultra-high resolution may not be necessary in all scenarios, it supports a wide range of diverse resolutions from 336 pixels to 4K standard, significantly broadening its scope of applicability. Specifically, this research advances the patch division paradigm by introducing a novel extension: dynamic resolution with automatic patch configuration. It maintains the training image aspect ratios while automatically varying patch counts and configuring layouts based on a pre-trained Vision Transformer (ViT) (336 $\\times$ 336), leading to dynamic training resolution from 336 pixels to 4K standard. Our research demonstrates that scaling training resolution up to 4K HD leads to consistent performance enhancements without hitting the ceiling of potential improvements. InternLM-XComposer2-4KHD shows superb capability that matches or even surpasses GPT-4V and Gemini Pro in 10 of the 16 benchmarks.", "keywords": "Large Vision Language Model (LVLM)", "primary_area": "machine_vision", "supplementary_material": "", "author": "Xiaoyi Dong;Pan Zhang;Yuhang Zang;Yuhang Cao;Bin Wang;Linke Ouyang;Songyang Zhang;Haodong Duan;Wenwei Zhang;Yining Li;Hang Yan;Yang Gao;Zhe Chen;xinyue zhang;Wei Li;Li Jingwen;Wenhai Wang;Kai Chen;Conghui He;Xingcheng ZHANG;Jifeng Dai;Yu Qiao;Dahua Lin;Jiaqi Wang", "authorids": "~Xiaoyi_Dong1;~Pan_Zhang1;~Yuhang_Zang1;~Yuhang_Cao3;~Bin_Wang21;~Linke_Ouyang1;~Songyang_Zhang1;~Haodong_Duan1;~Wenwei_Zhang1;~Yining_Li1;~Hang_Yan2;~Yang_Gao7;~Zhe_Chen10;~xinyue_zhang8;~Wei_Li86;~Li_Jingwen1;~Wenhai_Wang2;~Kai_Chen4;~Conghui_He2;~Xingcheng_ZHANG2;~Jifeng_Dai1;~Yu_Qiao1;~Dahua_Lin1;~Jiaqi_Wang1", "gender": "M;M;M;M;M;F;M;M;M;M;;M;M;F;F;;;M;;M;M;;M;M", "homepage": ";https://panzhang0212.github.io/;https://yuhangzang.github.io;;https://wangbindl.github.io/;;https://www.zhangsongyang.com/;https://kennymckormick.github.io;https://zhangwenwei.cn;https://liyn.site;;;https://czczup.github.io/;https://www.shlab.org.cn;;;;https://chenkai.site/;;;https://jifengdai.org/;;http://dahua.site;https://myownskyw7.github.io/", "dblp": "230/3711;;230/4433;212/6480;13/1898-65;318/4644;;211/7919;;166/3420;;;06/4240-17;;;;;181/2839-26;;190/7261;14/9399;;53/6088;44/740-3", "google_scholar": "FscToE0AAAAJ;moHH480AAAAJ;hW23VKIAAAAJ;sJkqsqkAAAAJ;WljXYoYAAAAJ;https://scholar.google.com/citations?view_op=list_works;8XQPi7YAAAAJ;vi3W-m8AAAAJ;QDXADSEAAAAJ;https://scholar.google.com.hk/citations?user=y_cp1sUAAAAJ;;D1pRedQAAAAJ;j1rq_lYAAAAJ;;U0UKdakAAAAJ;;;https://scholar.google.com.hk/citations?user=eGD0b7IAAAAJ;;3L8CsIIAAAAJ;SH_-B_AAAAAJ;;GMzzRRUAAAAJ;https://scholar.google.com.hk/citations?user=GDvt570AAAAJ", "orcid": ";;0000-0003-1110-5062;;0000-0002-5625-2966;;;0000-0002-3052-4177;0000-0002-2748-4514;;;;;;;;;0000-0002-6820-2325;;0009-0006-8525-0608;;;;", "linkedin": ";;yuhang-zang/;;;;;haodong-duan-bb9349166/;wenweizhang-b9769a124/;;;;;;;;;;;xingchengzhang/;;;;", "or_profile": "~Xiaoyi_Dong1;~Pan_Zhang1;~Yuhang_Zang1;~Yuhang_Cao3;~Bin_Wang21;~Linke_Ouyang1;~Songyang_Zhang1;~Haodong_Duan1;~Wenwei_Zhang1;~Yining_Li1;~Hang_Yan2;~Yang_Gao7;~Zhe_Chen10;~xinyue_zhang8;~Wei_Li86;~Li_Jingwen1;~Wenhai_Wang2;~Kai_Chen4;~Conghui_He2;~Xingcheng_ZHANG2;~Jifeng_Dai1;~Yu_Qiao1;~Dahua_Lin1;~Jiaqi_Wang1", "aff": "Shanghai Artificial Intelligence Laboratory;Shanghai Artificial Intelligence Laboratory;Shanghai Artificial Intelligence Laboratory;The Chinese University of Hong Kong;Shanghai AI Laboratory;Shanghai AI Lab;Shanghai Artificial Intelligence Laboratory;Shanghai Artificial Intelligence Laboratory;Shanghai AI Laboratory;Shanghai AI Laboratory;;Shanghai Artificial Intelligence Laboratory;Nanjing University;;Shanghai AI Laboratory;;;Shanghai AI Laboratory;;Sensetime;Tsinghua University;;The Chinese University of Hong Kong;Shanghai AI Laboratory", "aff_domain": "pjlab.org.cn;pjlab.org.cn;pjlab.org.cn;cuhk.edu.hk;pjlab.org.cn;pjlab.org.cn;pjlab.org.cn;pjlab.org.cn;pjlab.org.cn;pjlab.org.cn;;pjlab.org.cn;nju.edu.cn;;pjlab.edu;;;pjlab.org.cn;;sensetime.com;tsinghua.edu.cn;;cuhk.edu.hk;pjlab.org.cn", "position": "Researcher;Researcher;Researcher;PhD student;Researcher;Researcher;Postdoc;Postdoc;Researcher;Researcher;;Researcher;PhD student;;Researcher;;;Researcher;;Researcher;Associate Professor;;Associate Professor;Research Scientist", "bibtex": "@inproceedings{\ndong2024internlmxcomposerkhd,\ntitle={Intern{LM}-{XC}omposer2-4{KHD}: A Pioneering Large Vision-Language Model Handling Resolutions from 336 Pixels to 4K {HD}},\nauthor={Xiaoyi Dong and Pan Zhang and Yuhang Zang and Yuhang Cao and Bin Wang and Linke Ouyang and Songyang Zhang and Haodong Duan and Wenwei Zhang and Yining Li and Hang Yan and Yang Gao and Zhe Chen and xinyue zhang and Wei Li and Li Jingwen and Wenhai Wang and Kai Chen and Conghui He and Xingcheng ZHANG and Jifeng Dai and Yu Qiao and Dahua Lin and Jiaqi Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=nRp0XhTf61}\n}", "github": "", "reviewers": "kpFC;339k;Lqsg", "pdf_size": 1861146, "rating": "3;5;8", "confidence": "5;3;5", "soundness": "3;4;4", "novelty": "2;3;4", "presentation": "3;3;3", "wc_summary": "16;76;82", "wc_strengths": "23;87;113", "wc_weaknesses": "155;288;233", "wc_questions": "20;78;1", "wc_limitations": "2;29;1", "wc_review": "216;558;430", "wc_reply_reviewers": "359;75;120", "wc_reply_authors": "631;96;55", "reply_reviewers": "2;1;1", "reply_authors": "3;2;2", "rating_avg": [ 5.333333333333333, 2.0548046676563256 ], "confidence_avg": [ 4.333333333333333, 0.9428090415820634 ], "soundness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 58.0, 29.79932885150268 ], "wc_strengths_avg": [ 74.33333333333333, 37.81827894315422 ], "wc_weaknesses_avg": [ 225.33333333333334, 54.5669822837543 ], "wc_questions_avg": [ 33.0, 32.751590292177674 ], "wc_limitations_avg": [ 10.666666666666666, 12.970050972229147 ], "wc_review_avg": [ 401.3333333333333, 141.08468693975576 ], "wc_reply_reviewers_avg": [ 184.66666666666666, 124.6336854769029 ], "wc_reply_authors_avg": [ 260.6666666666667, 262.39961043331516 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 24, 0 ], "corr_rating_confidence": 0.1147078669352809, "gs_citation": 134, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7354881944323678809&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "pjlab.org.cn;pjlab.org.cn;pjlab.org.cn;cuhk.edu.hk;pjlab.org.cn;pjlab.org.cn;pjlab.org.cn;pjlab.org.cn;pjlab.org.cn;pjlab.org.cn;;pjlab.org.cn;nju.edu.cn;;pjlab.edu;;;pjlab.org.cn;;sensetime.com;tsinghua.edu.cn;;cuhk.edu.hk;pjlab.org.cn", "author_num": 24, "aff_unique_index": "0;0;0;1;2;3;0;0;2;2;0;4;2;2;5;6;1;2", "aff_unique_norm": "Shanghai Artificial Intelligence Laboratory;Chinese University of Hong Kong;Shanghai AI Laboratory;Shanghai AI Lab;Nanjing University;SenseTime;Tsinghua University", "aff_unique_dep": ";;;;;;", "aff_unique_url": "http://www.shailab.org/;https://www.cuhk.edu.hk;https://www.shanghai-ai-lab.com;https://www.shanghaiailab.com;https://www.nju.edu.cn;https://www.sensetime.com;https://www.tsinghua.edu.cn", "aff_unique_abbr": "Shanghai AI Lab;CUHK;SAIL;SAIL;Nanjing U;SenseTime;THU", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "RTify: Aligning Deep Neural Networks with Human Behavioral Decisions", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93690", "id": "nTJeOXlWyV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=nTJeOXlWyV", "openreview": "https://openreview.net/forum?id=nTJeOXlWyV", "poster": "/media/PosterPDFs/NeurIPS%202024/93690.png?t=1731351692.2362742", "project": "", "author_site": "Yu-Ang Cheng, Ivan F Rodriguez Rodriguez, Sixuan Chen, Kohitij Kar, Takeo Watanabe, Thomas Serre", "tldr": "", "abstract": "Current neural network models of primate vision focus on replicating overall levels of behavioral accuracy, often neglecting perceptual decisions' rich, dynamic nature. Here, we introduce a novel computational framework to model the dynamics of human behavioral choices by learning to align the temporal dynamics of a recurrent neural network (RNN) to human reaction times (RTs). We describe an approximation that allows us to constrain the number of time steps an RNN takes to solve a task with human RTs. The approach is extensively evaluated against various psychophysics experiments. We also show that the approximation can be used to optimize an ``ideal-observer'' RNN model to achieve an optimal tradeoff between speed and accuracy without human data. The resulting model is found to account well for human RT data. Finally, we use the approximation to train a deep learning implementation of the popular Wong-Wang decision-making model. The model is integrated with a convolutional neural network (CNN) model of visual processing and evaluated using both artificial and natural image stimuli. Overall, we present a novel framework that helps align current vision models with human behavior, bringing us closer to an integrated model of human vision.", "keywords": "Alignment; Recurrent neural networks; Reaction times; Visual decision making; Speed-accuracy tradeoff", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "", "author": "Yu-Ang Cheng;Ivan F Rodriguez Rodriguez;Sixuan Chen;Kohitij Kar;Takeo Watanabe;Thomas Serre", "authorids": "~Yu-Ang_Cheng1;~Ivan_F_Rodriguez_Rodriguez1;~Sixuan_Chen2;~Kohitij_Kar2;~Takeo_Watanabe1;~Thomas_Serre1", "gender": ";M;F;;;M", "homepage": ";;;;;https://serre-lab.clps.brown.edu/", "dblp": ";327/9480;;;;", "google_scholar": "QZZfiVMAAAAJ;https://scholar.google.com/citations?hl=en;;;;kZlPW4wAAAAJ", "orcid": "0009-0004-0145-1660;;;;;", "linkedin": ";ivan-felipe-rodriguez/en;sixuan-chen-08564912a/;;;", "or_profile": "~Yu-Ang_Cheng1;~Ivan_F_Rodriguez_Rodriguez1;~Sixuan_Chen2;~Kohitij_Kar2;~Takeo_Watanabe1;~Thomas_Serre1", "aff": "Brown University;Brown University;Brown University;;;Universit\u00e9 de Toulouse", "aff_domain": "brown.edu;brown.edu;brown.edu;;;univ-toulouse.fr", "position": "PhD student;PhD student;PhD student;;;Full Professor", "bibtex": "@inproceedings{\ncheng2024rtify,\ntitle={{RT}ify: Aligning Deep Neural Networks with Human Behavioral Decisions},\nauthor={Yu-Ang Cheng and Ivan F Rodriguez Rodriguez and Sixuan Chen and Kohitij Kar and Takeo Watanabe and Thomas Serre},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=nTJeOXlWyV}\n}", "github": "", "reviewers": "QgJJ;imBE;UxtZ;6b9L", "pdf_size": 6888680, "rating": "4;6;7;7", "confidence": "3;3;4;5", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;4", "wc_summary": "77;45;127;77", "wc_strengths": "49;75;51;157", "wc_weaknesses": "122;155;158;315", "wc_questions": "93;49;14;7", "wc_limitations": "9;47;76;11", "wc_review": "350;371;426;567", "wc_reply_reviewers": "133;44;41;0", "wc_reply_authors": "277;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;1;1;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 81.5, 29.338541204361203 ], "wc_strengths_avg": [ 83.0, 43.93176527297759 ], "wc_weaknesses_avg": [ 187.5, 74.95498649189392 ], "wc_questions_avg": [ 40.75, 34.10553474144629 ], "wc_limitations_avg": [ 35.75, 27.725214156071004 ], "wc_review_avg": [ 428.5, 84.64189270095513 ], "wc_reply_reviewers_avg": [ 54.5, 48.541219597369 ], "wc_reply_authors_avg": [ 69.25, 119.94451842414475 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.7385489458759963, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13489417292292874698&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "brown.edu;brown.edu;brown.edu;;;univ-toulouse.fr", "author_num": 6, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Brown University;Universit\u00e9 de Toulouse", "aff_unique_dep": ";", "aff_unique_url": "https://www.brown.edu;https://www.univ-toulouse.fr", "aff_unique_abbr": "Brown;UT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "United States;France" }, { "title": "Toward Real Ultra Image Segmentation: Leveraging Surrounding Context to Cultivate General Segmentation Model", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93689", "id": "nU4lvlMwrt", "proceeding": "", "pdf": "https://openreview.net/pdf?id=nU4lvlMwrt", "openreview": "https://openreview.net/forum?id=nU4lvlMwrt", "poster": "/media/PosterPDFs/NeurIPS%202024/93689.png?t=1731823653.4154217", "project": "", "author_site": "Sai Wang, Yutian Lin, Yu Wu, Bo Du", "tldr": "", "abstract": "Existing ultra image segmentation methods suffer from two major challenges, namely the scalability issue (i.e. they lack the stability and generality of standard segmentation models, as they are tailored to specific datasets), and the architectural issue (i.e. they are incompatible with real-world ultra image scenes, as they compromise between image size and computing resources).\nTo tackle these issues, we revisit the classic sliding inference framework, upon which we propose a Surrounding Guided Segmentation framework (SGNet) for ultra image segmentation. \nThe SGNet leverages a larger area around each image patch to refine the general segmentation results of local patches.\nSpecifically, we propose a surrounding context integration module to absorb surrounding context information and extract specific features that are beneficial to local patches. Note that, SGNet can be seamlessly integrated to any general segmentation model.\nExtensive experiments on five datasets demonstrate that SGNet achieves competitive performance and consistent improvements across a variety of general segmentation models, surpassing the traditional ultra image segmentation methods by a large margin.", "keywords": "Ultra Image Segmentation", "primary_area": "machine_vision", "supplementary_material": "", "author": "Sai Wang;Yutian Lin;Yu Wu;Bo Du", "authorids": "~Sai_Wang1;~Yutian_Lin2;~Yu_Wu3;~Bo_Du3", "gender": ";M;M;F", "homepage": "https://github.com/BubbleSai;https://yu-wu.net;;https://vana77.github.io/", "dblp": "89/10686;22/0-11;70/6443-1.html;198/1146", "google_scholar": ";23SZHUwAAAAJ;Shy1gnMAAAAJ;gB6Xq5IAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Sai_Wang1;~Yu_Wu3;~Bo_Du1;~Yutian_Lin1", "aff": "Wuhan University;Wuhan University;Wuhan University;Wuhan University", "aff_domain": "whu.edu.cn;whu.edu.cn;whu.edu.cn;whu.edu.cn", "position": "PhD student;Full Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nwang2024toward,\ntitle={Toward Real Ultra Image Segmentation: Leveraging Surrounding Context to Cultivate General Segmentation Model},\nauthor={Sai Wang and Yutian Lin and Yu Wu and Bo Du},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=nU4lvlMwrt}\n}", "github": "", "reviewers": "HefE;MJRN;xcUS;1ux8", "pdf_size": 24466029, "rating": "3;5;7;7", "confidence": "5;5;4;4", "soundness": "3;3;4;3", "novelty": "3;3;4;3", "presentation": "3;2;4;3", "wc_summary": "60;42;26;64", "wc_strengths": "43;65;136;51", "wc_weaknesses": "69;375;47;43", "wc_questions": "2;2;165;96", "wc_limitations": "2;1;54;8", "wc_review": "176;485;428;262", "wc_reply_reviewers": "25;128;0;25", "wc_reply_authors": "120;347;0;0", "reply_reviewers": "1;2;0;1", "reply_authors": "2;4;1;1", "rating_avg": [ 5.5, 1.6583123951777 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 48.0, 15.165750888103101 ], "wc_strengths_avg": [ 73.75, 36.79249244071404 ], "wc_weaknesses_avg": [ 133.5, 139.781078834011 ], "wc_questions_avg": [ 66.25, 68.72545016222156 ], "wc_limitations_avg": [ 16.25, 21.958768180387533 ], "wc_review_avg": [ 337.75, 124.22635589922132 ], "wc_reply_reviewers_avg": [ 44.5, 49.277276710467675 ], "wc_reply_authors_avg": [ 116.75, 141.67458311214472 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.9045340337332909, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13297607555090017126&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "whu.edu.cn;whu.edu.cn;whu.edu.cn;whu.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Wuhan University", "aff_unique_dep": "", "aff_unique_url": "http://www.whu.edu.cn/", "aff_unique_abbr": "WHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "SEEV: Synthesis with Efficient Exact Verification for ReLU Neural Barrier Functions", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93688", "id": "nWMqQHzI3W", "proceeding": "", "pdf": "https://openreview.net/pdf?id=nWMqQHzI3W", "openreview": "https://openreview.net/forum?id=nWMqQHzI3W", "poster": "", "project": "", "author_site": "Hongchao Zhang, Zhizhen Qin, Sicun Gao, Andrew Clark", "tldr": "", "abstract": "Neural Control Barrier Functions (NCBFs) have shown significant promise in enforcing safety constraints on nonlinear autonomous systems. State-of-the-art exact approaches to verifying safety of NCBF-based controllers exploit the piecewise-linear structure of ReLU neural networks, however, such approaches still rely on enumerating all of the activation regions of the network near the safety boundary, thus incurring high computation cost. In this paper, we propose a framework for Synthesis with Efficient Exact Verification (SEEV). Our framework consists of two components, namely (i) an NCBF synthesis algorithm that introduces a novel regularizer to reduce the number of activation regions at the safety boundary, and (ii) a verification algorithm that exploits tight over-approximations of the safety conditions to reduce the cost of verifying each piecewise-linear segment. Our simulations show that SEEV significantly improves verification efficiency while maintaining the CBF quality across various benchmark systems and neural network structures. Our code is available at https://github.com/HongchaoZhang-HZ/SEEV.", "keywords": "Safe Control;Barrier Functions;Control Barrier Functions;Neural Networks", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/30b91db354495f76f642059c3cd273ea85e52ea7.zip", "author": "Hongchao Zhang;Zhizhen Qin;Sicun Gao;Andrew Clark", "authorids": "~Hongchao_Zhang2;~Zhizhen_Qin1;~Sicun_Gao1;~Andrew_Clark1", "gender": "M;M;M;M", "homepage": ";https://zhizhenqin.github.io;;https://awclark587.wixsite.com/mysite", "dblp": ";294/2353;22/8296;", "google_scholar": "J9NlFfgAAAAJ;4OMmbNwAAAAJ;;", "orcid": ";;;", "linkedin": ";zhizhenqin/;;", "or_profile": "~Hongchao_Zhang2;~Zhizhen_Qin1;~Sicun_Gao1;~Andrew_Clark1", "aff": "Washington University, Saint Louis;University of California, San Diego;University of California, San Diego;Washington University, Saint Louis", "aff_domain": "wustl.edu;ucsd.edu;ucsd.edu;wustl.edu", "position": "PhD student;PhD student;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nzhang2024seev,\ntitle={{SEEV}: Synthesis with Efficient Exact Verification for Re{LU} Neural Barrier Functions},\nauthor={Hongchao Zhang and Zhizhen Qin and Sicun Gao and Andrew Clark},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=nWMqQHzI3W}\n}", "github": "", "reviewers": "bZn2;q91P;nDkY;Ufq9", "pdf_size": 1312101, "rating": "5;6;6;7", "confidence": "4;3;3;3", "soundness": "3;3;4;4", "novelty": "2;3;4;3", "presentation": "2;2;2;3", "wc_summary": "76;48;117;222", "wc_strengths": "41;67;86;53", "wc_weaknesses": "278;226;124;175", "wc_questions": "57;2;82;41", "wc_limitations": "4;12;45;10", "wc_review": "456;355;454;501", "wc_reply_reviewers": "79;69;99;52", "wc_reply_authors": "139;58;57;71", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 115.75, 66.06956561080146 ], "wc_strengths_avg": [ 61.75, 16.753730927766508 ], "wc_weaknesses_avg": [ 200.75, 57.35579744018908 ], "wc_questions_avg": [ 45.5, 29.055980451535273 ], "wc_limitations_avg": [ 17.75, 16.005858302509115 ], "wc_review_avg": [ 441.5, 53.35962893424204 ], "wc_reply_reviewers_avg": [ 74.75, 17.005513811702368 ], "wc_reply_authors_avg": [ 81.25, 33.796264586489436 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:hI6RmZvypBoJ:scholar.google.com/&scioq=SEEV:+Synthesis+with+Efficient+Exact+Verification+for+ReLU+Neural+Barrier+Functions&hl=en&as_sdt=0,5", "gs_version_total": 5, "email": "wustl.edu;ucsd.edu;ucsd.edu;wustl.edu", "author_num": 4, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Washington University in St. Louis;University of California, San Diego", "aff_unique_dep": ";", "aff_unique_url": "https://wustl.edu;https://www.ucsd.edu", "aff_unique_abbr": "WUSTL;UCSD", "aff_campus_unique_index": "0;1;1;0", "aff_campus_unique": "Saint Louis;San Diego", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "A hierarchical decomposition for explaining ML performance discrepancies", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93686", "id": "nXXwYsARXB", "proceeding": "", "pdf": "https://openreview.net/pdf?id=nXXwYsARXB", "openreview": "https://openreview.net/forum?id=nXXwYsARXB", "poster": "/media/PosterPDFs/NeurIPS%202024/93686.png?t=1733899094.6122346", "project": "", "author_site": "Harvineet Singh, Fan Xia, Adarsh Subbaswamy, Alexej Gossmann, Jean Feng", "tldr": "", "abstract": "Machine learning (ML) algorithms can often differ in performance across domains. Understanding why their performance differs is crucial for determining what types of interventions (e.g., algorithmic or operational) are most effective at closing the performance gaps. Aggregate decompositions express the total performance gap as the gap due to a shift in the feature distribution $p(X)$ plus the gap due to a shift in the outcome's conditional distribution $p(Y|X)$. While this coarse explanation is helpful for guiding root cause analyses, it provides limited details and can only suggest coarse fixes involving all variables in an ML system. Detailed decompositions quantify the importance of each variable to each term in the aggregate decomposition, which can provide a deeper understanding and suggest more targeted interventions. Although parametric methods exist for conducting a full hierarchical decomposition of an algorithm's performance gap at the aggregate and detailed levels, current nonparametric methods only cover parts of the hierarchy; many also require knowledge of the entire causal graph. We introduce a nonparametric hierarchical framework for explaining why the performance of an ML algorithm differs across domains, without requiring causal knowledge. Furthermore, we derive debiased, computationally-efficient estimators and statistical inference procedures to construct confidence intervals for the explanations.", "keywords": "explainability;distribution shift;double machine learning", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Harvineet Singh;Fan Xia;Adarsh Subbaswamy;Alexej Gossmann;Jean Feng", "authorids": "~Harvineet_Singh1;~Fan_Xia2;~Adarsh_Subbaswamy1;~Alexej_Gossmann1;~Jean_Feng1", "gender": "M;;;;", "homepage": "http://harvineet.github.io;https://profiles.ucsf.edu/fan.xia;https://asubbaswamy.github.io/;;https://www.jeanfeng.com", "dblp": "161/9986;;151/7371;;222/9902", "google_scholar": "VaaScNkAAAAJ;;rxkuYnwAAAAJ;;", "orcid": ";0000-0002-2435-794X;;;", "linkedin": ";;;;", "or_profile": "~Harvineet_Singh1;~Fan_Xia2;~Adarsh_Subbaswamy1;~Alexej_Gossmann1;~Jean_Feng1", "aff": "University of California, San Francisco;University of California, San Francisco;U.S. Food and Drug Administration;;University of California, San Francisco", "aff_domain": "ucsf.edu;ucsf.edu;fda.hhs.gov;;ucsf.edu", "position": "Postdoctoral scholar;Assistant Professor;Researcher;;Assistant Professor", "bibtex": "@inproceedings{\nsingh2024a,\ntitle={A hierarchical decomposition for explaining {ML} performance discrepancies},\nauthor={Harvineet Singh and Fan Xia and Adarsh Subbaswamy and Alexej Gossmann and Jean Feng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=nXXwYsARXB}\n}", "github": "", "reviewers": "tcta;tcwU;7cwp;fcBN", "pdf_size": 2136874, "rating": "5;5;6;8", "confidence": "4;2;3;2", "soundness": "3;3;2;3", "novelty": "2;3;3;4", "presentation": "3;2;3;1", "wc_summary": "350;73;37;46", "wc_strengths": "164;70;53;19", "wc_weaknesses": "613;50;100;189", "wc_questions": "130;134;64;51", "wc_limitations": "2;1;2;5", "wc_review": "1259;328;256;310", "wc_reply_reviewers": "21;0;198;27", "wc_reply_authors": "21;56;436;21", "reply_reviewers": "1;0;2;2", "reply_authors": "2;2;3;2", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 126.5, 129.71603601714014 ], "wc_strengths_avg": [ 76.5, 53.751744157748035 ], "wc_weaknesses_avg": [ 238.0, 222.15647638545224 ], "wc_questions_avg": [ 94.75, 37.559120064240055 ], "wc_limitations_avg": [ 2.5, 1.5 ], "wc_review_avg": [ 538.25, 416.96784948002886 ], "wc_reply_reviewers_avg": [ 61.5, 79.44337606119217 ], "wc_reply_authors_avg": [ 133.5, 175.23198908875057 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.4923659639173309, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13491231054348379269&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "ucsf.edu;ucsf.edu;fda.hhs.gov;;ucsf.edu", "author_num": 5, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of California, San Francisco;Food and Drug Administration", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucsf.edu;https://www.fda.gov", "aff_unique_abbr": "UCSF;FDA", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "San Francisco;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Calibrated Self-Rewarding Vision Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93685", "id": "nXYedmTf1T", "proceeding": "", "pdf": "https://openreview.net/pdf?id=nXYedmTf1T", "openreview": "https://openreview.net/forum?id=nXYedmTf1T", "poster": "", "project": "", "author_site": "Yiyang Zhou, Zhiyuan Fan, Dongjie Cheng, Sihan Yang, Zhaorun Chen, Chenhang Cui, Xiyao Wang, Yun Li, Linjun Zhang, Huaxiu Yao", "tldr": "", "abstract": "Large Vision-Language Models (LVLMs) have made substantial progress by integrating pre-trained large language models (LLMs) and vision models through instruction tuning. Despite these advancements, LVLMs often exhibit the hallucination phenomenon, where generated text responses appear linguistically plausible but contradict the input image, indicating a misalignment between image and text pairs. This misalignment arises because the model tends to prioritize textual information over visual input, even when both the language model and visual representations are of high quality. Existing methods leverage additional models or human annotations to curate preference data and enhance modality alignment through preference optimization. These approaches are resource-intensive and may not effectively reflect the target LVLM's preferences, making the curated preferences easily distinguishable. Our work addresses these challenges by proposing the Calibrated Self-Rewarding (CSR) approach, which enables the model to self-improve by iteratively generating candidate responses, evaluating the reward for each response, and curating preference data for fine-tuning. In the reward modeling, we employ a step-wise strategy and incorporate visual constraints into the self-rewarding process to place greater emphasis on visual input. Empirical results demonstrate that CSR significantly enhances performance and reduces hallucinations across twelve benchmarks and tasks, achieving substantial improvements over existing methods by 7.62\\%. Our empirical results are further supported by rigorous theoretical analysis, under mild assumptions, verifying the effectiveness of introducing visual constraints into the self-rewarding paradigm. Additionally, CSR shows compatibility with different vision-language models and the ability to incrementally improve performance through iterative fine-tuning.", "keywords": "Calibrated self-rewarding;large Vision-language models;Modality alignment", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Yiyang Zhou;Zhiyuan Fan;Dongjie Cheng;Sihan Yang;Zhaorun Chen;Chenhang Cui;Xiyao Wang;Yun Li;Linjun Zhang;Huaxiu Yao", "authorids": "~Yiyang_Zhou1;~Zhiyuan_Fan2;~Dongjie_Cheng2;~Sihan_Yang1;~Zhaorun_Chen1;~Chenhang_Cui1;~Xiyao_Wang1;~Yun_Li7;~Linjun_Zhang1;~Huaxiu_Yao1", "gender": "M;M;M;M;M;M;M;Not Specified;M;M", "homepage": "https://yiyangzhou.github.io/;https://zhiyuan.fan;https://dongjie-cheng.github.io/;https://github.com/sihany077;https://billchan226.github.io/;https://gzcch.github.io;;https://yunliweb.its.unc.edu;;http://huaxiuyao.mystrikingly.com", "dblp": "175/1589.html;210/1532;345/9471;240/7223;302/1064;347/3407;;;;197/1635", "google_scholar": "https://scholar.google.com.hk/citations?user=6KltFMAAAAAJ;;s6x7gQcAAAAJ;z1rj_2gAAAAJ;UZg5N5UAAAAJ;V5X1gdAAAAAJ;puVqfbwAAAAJ;https://scholar.google.com/citations?hl=en;TUAzs3sAAAAJ;A20BZnQAAAAJ", "orcid": ";;0009-0006-5589-3733;;0000-0002-2668-6587;;;0000-0002-9275-4189;;", "linkedin": ";;;;zhaorun-chen-1793b6226/;;;;;huaxiuyao/", "or_profile": "~Yiyang_Zhou1;~Zhiyuan_Fan2;~Dongjie_Cheng2;~Sihan_Yang1;~Zhaorun_Chen1;~Chenhang_Cui1;~Xiyao_Wang1;~Yun_Li7;~Linjun_Zhang1;~Huaxiu_Yao1", "aff": "Xi'an Jiaotong University;Tianjin University;Sichuan University;Shanghai Artificial Intelligence Laboratory;University of Chicago;University of Electronic Science and Technology of China;University of Maryland, College Park;University of North Carolina at Chapel Hill;Rutgers University;Department of Computer Science, University of North Carolina at Chapel Hill", "aff_domain": "xjtu.edu.cn;tju.edu.cn;scu.edu.cn;pjlab.org.cn;uchicago.edu;uestc.edu.cn;umd.edu;unc.edu;rutgers.edu;cs.unc.edu", "position": "MS student;Undergrad student;Undergrad student;Intern;PhD student;Undergrad student;PhD student;Full Professor;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhou2024calibrated,\ntitle={Calibrated Self-Rewarding Vision Language Models},\nauthor={Yiyang Zhou and Zhiyuan Fan and Dongjie Cheng and Sihan Yang and Zhaorun Chen and Chenhang Cui and Xiyao Wang and Yun Li and Linjun Zhang and Huaxiu Yao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=nXYedmTf1T}\n}", "github": "", "reviewers": "bHbK;oVPD;myph;GvTb;4vrA", "pdf_size": 19298892, "rating": "5;5;7;7;8", "confidence": "3;5;4;4;3", "soundness": "3;3;3;3;4", "novelty": "2;2;2;3;4", "presentation": "3;3;3;4;4", "wc_summary": "54;92;113;41;71", "wc_strengths": "41;112;37;26;126", "wc_weaknesses": "91;139;256;38;45", "wc_questions": "6;109;3;33;108", "wc_limitations": "6;102;8;9;1", "wc_review": "198;554;417;147;351", "wc_reply_reviewers": "95;0;17;134;22", "wc_reply_authors": "33;0;22;1145;22", "reply_reviewers": "1;0;1;2;1", "reply_authors": "2;1;2;4;2", "rating_avg": [ 6.4, 1.2 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.8 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 74.2, 25.85652722234755 ], "wc_strengths_avg": [ 68.4, 41.84065009055189 ], "wc_weaknesses_avg": [ 113.8, 79.81829364249776 ], "wc_questions_avg": [ 51.8, 47.461142000588225 ], "wc_limitations_avg": [ 25.2, 38.49883115108821 ], "wc_review_avg": [ 333.4, 147.67613212702992 ], "wc_reply_reviewers_avg": [ 53.6, 51.74785019689224 ], "wc_reply_authors_avg": [ 244.4, 450.42761904661216 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.2, 0.9797958971132712 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.3563483225498991, "gs_citation": 40, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15461348115330694510&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 4, "email": "xjtu.edu.cn;tju.edu.cn;scu.edu.cn;pjlab.org.cn;uchicago.edu;uestc.edu.cn;umd.edu;unc.edu;rutgers.edu;cs.unc.edu", "author_num": 10, "aff_unique_index": "0;1;2;3;4;5;6;7;8;9", "aff_unique_norm": "Xi'an Jiao Tong University;Tianjin University;Sichuan University;Shanghai Artificial Intelligence Laboratory;University of Chicago;University of Electronic Science and Technology of China;University of Maryland;University of North Carolina;Rutgers University;University of North Carolina at Chapel Hill", "aff_unique_dep": ";;;;;;;;;Department of Computer Science", "aff_unique_url": "https://www.xjtu.edu.cn;http://www.tju.edu.cn;https://www.scu.edu.cn;http://www.shailab.org/;https://www.uchicago.edu;https://www.uestc.edu.cn;https://www/umd.edu;https://www.unc.edu;https://www.rutgers.edu;https://www.unc.edu", "aff_unique_abbr": "XJTU;TJU;SCU;Shanghai AI Lab;UChicago;UESTC;UMD;UNC;Rutgers;UNC Chapel Hill", "aff_campus_unique_index": "1;2;2", "aff_campus_unique": ";College Park;Chapel Hill", "aff_country_unique_index": "0;0;0;0;1;0;1;1;1;1", "aff_country_unique": "China;United States" }, { "title": "Time-Reversal Provides Unsupervised Feedback to LLMs", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93684", "id": "nY0BrZdqLt", "proceeding": "", "pdf": "https://openreview.net/pdf?id=nY0BrZdqLt", "openreview": "https://openreview.net/forum?id=nY0BrZdqLt", "poster": "/media/PosterPDFs/NeurIPS%202024/93684.png?t=1733510884.577893", "project": "", "author_site": "Yerram Varun, Rahul Madhavan, Sravanti Addepalli, Arun Suggala, Karthikeyan Shanmugam, Prateek Jain", "tldr": "", "abstract": "Large Language Models (LLMs) are typically trained to predict in the forward direction of time. However, recent works have shown that prompting these models to look back and critique their own generations can produce useful feedback. Motivated by this, we explore the question of whether LLMs can be empowered to think (predict and score) backwards to provide unsupervised feedback that complements forward LLMs. Towards this, we introduce Time Reversed Language Models (TRLMs), which can score and generate queries when conditioned on responses, effectively functioning in the reverse direction of time. Further, to effectively infer in the response to query direction, we pre-train and fine-tune a language model (TRLM-Ba) in the reverse token order from scratch. We show empirically (and theoretically in a stylized setting) that time-reversed models can indeed complement forward model predictions when used to score the query given response for re-ranking multiple forward generations. We obtain up to 5\\% improvement on the widely used AlpacaEval Leaderboard over the competent baseline of best-of-N re-ranking using self log-perplexity scores. We further show that TRLM scoring outperforms conventional forward scoring of response given query, resulting in significant gains in applications such as citation generation and passage retrieval. We next leverage the generative ability of TRLM to augment or provide unsupervised feedback to input safety filters of LLMs, demonstrating a drastic reduction in false negative rate with negligible impact on false positive rates against several attacks published on the popular JailbreakBench leaderboard.", "keywords": "LLMs;Reranking;reverse LLMs;reverse scoring;defenses;generative models;sequence reversal", "primary_area": "generative_models", "supplementary_material": "", "author": "Yerram Varun;Rahul Madhavan;Sravanti Addepalli;Arun Suggala;Karthikeyan Shanmugam;Prateek Jain", "authorids": "~Yerram_Varun1;~Rahul_Madhavan1;~Sravanti_Addepalli1;~Arun_Suggala1;~Karthikeyan_Shanmugam1;~Prateek_Jain1", "gender": "M;M;F;M;M;M", "homepage": "https://varun221.github.io/;;;;https://sites.google.com/corp/view/karthikeyan-shanmugam/;http://prateekjain.org", "dblp": "321/0471;290/2008;127/7715;164/7327;;https://dblp.uni-trier.de/pers/j/Jain_0002:Prateek.html", "google_scholar": ";HrM2xRcAAAAJ;MOO12i0AAAAJ;CKgmfDMAAAAJ;https://scholar.google.ca/citations?user=m4DyPcUAAAAJ;qYhRbJoAAAAJ", "orcid": ";;;;0009-0008-2879-5868;", "linkedin": "yvarun25221/;rahul-madhavan/;sravanti-addepalli/;;;", "or_profile": "~Yerram_Varun1;~Rahul_Madhavan1;~Sravanti_Addepalli1;~Arun_Suggala1;~Karthikeyan_Shanmugam1;~Prateek_Jain1", "aff": "Google DeepMind;Indian Institute of Science, Bangalore;Indian Institute of Science;Google;Google Research;Google", "aff_domain": "research.google.com;iisc.ac.in;iisc.ac.in;google.com;google.com;google.com", "position": "Researcher;PhD student;PhD student;Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\nvarun2024timereversal,\ntitle={Time-Reversal Provides Unsupervised Feedback to {LLM}s},\nauthor={Yerram Varun and Rahul Madhavan and Sravanti Addepalli and Arun Suggala and Karthikeyan Shanmugam and Prateek Jain},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=nY0BrZdqLt}\n}", "github": "", "reviewers": "4Q1B;xuG3;6ZRs", "pdf_size": 645721, "rating": "7;7;8", "confidence": "1;3;3", "soundness": "3;3;4", "novelty": "3;3;4", "presentation": "3;3;3", "wc_summary": "110;154;135", "wc_strengths": "187;53;75", "wc_weaknesses": "101;150;86", "wc_questions": "70;27;132", "wc_limitations": "36;1;9", "wc_review": "504;385;437", "wc_reply_reviewers": "12;327;20", "wc_reply_authors": "0;30;0", "reply_reviewers": "1;1;1", "reply_authors": "1;2;1", "rating_avg": [ 7.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 2.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 133.0, 18.01850900231944 ], "wc_strengths_avg": [ 105.0, 58.67424193516833 ], "wc_weaknesses_avg": [ 112.33333333333333, 27.329267990359508 ], "wc_questions_avg": [ 76.33333333333333, 43.099368389287974 ], "wc_limitations_avg": [ 15.333333333333334, 14.974051630144134 ], "wc_review_avg": [ 442.0, 48.71002634639676 ], "wc_reply_reviewers_avg": [ 119.66666666666667, 146.64317993763711 ], "wc_reply_authors_avg": [ 10.0, 14.142135623730951 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:jHXdsdcxfYAJ:scholar.google.com/&scioq=Time-Reversal+Provides+Unsupervised+Feedback+to+LLMs&hl=en&as_sdt=0,44", "gs_version_total": 4, "email": "research.google.com;iisc.ac.in;iisc.ac.in;google.com;google.com;google.com", "author_num": 6, "aff_unique_index": "0;1;1;0;0;0", "aff_unique_norm": "Google;Indian Institute of Science", "aff_unique_dep": "Google DeepMind;", "aff_unique_url": "https://deepmind.com;https://www.iisc.ac.in", "aff_unique_abbr": "DeepMind;IISc", "aff_campus_unique_index": "1;2;2;2", "aff_campus_unique": ";Bangalore;Mountain View", "aff_country_unique_index": "0;1;1;2;2;2", "aff_country_unique": "United Kingdom;India;United States" }, { "title": "Graph Neural Networks Do Not Always Oversmooth", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93683", "id": "nY7fGtsspU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=nY7fGtsspU", "openreview": "https://openreview.net/forum?id=nY7fGtsspU", "poster": "/media/PosterPDFs/NeurIPS%202024/93683.png?t=1731679091.421702", "project": "", "author_site": "Bastian Epping, Alexandre Ren\u00e9, Moritz Helias, Michael T Schaub", "tldr": "", "abstract": "Graph neural networks (GNNs) have emerged as powerful tools for processing relational data in applications. However, GNNs suffer from the problem of oversmoothing, the property that features of all nodes exponentially converge to the same vector over layers, prohibiting the design of deep GNNs. In this work we study oversmoothing in graph convolutional networks (GCNs) by using their Gaussian process (GP) equivalence in the limit of infinitely many hidden features. By generalizing methods from conventional deep neural networks (DNNs), we can describe the distribution of features at the output layer of deep GCNs in terms of a GP: as expected, we find that typical parameter choices from the literature lead to oversmoothing. The theory, however, allows us to identify a new, non-oversmoothing phase: if the initial weights of the network have sufficiently large variance, GCNs do not oversmooth, and node features remain informative even at large depth. We demonstrate the validity of this prediction in finite-size GCNs by training a linear classifier on their output. Moreover, using the linearization of the GCN GP, we generalize the concept of propagation depth of information from DNNs to GCNs. This propagation depth diverges at the transition between the oversmoothing and non-oversmoothing phase. We test the predictions of our approach and find good agreement with finite-size GCNs. Initializing GCNs near the transition to the non-oversmoothing phase, we obtain networks which are both deep and expressive.", "keywords": "graph neural networks;oversmoothing;Gaussian processes;chaos", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Bastian Epping;Alexandre Ren\u00e9;Moritz Helias;Michael T Schaub", "authorids": "~Bastian_Epping1;~Alexandre_Ren\u00e91;~Moritz_Helias1;~Michael_T_Schaub1", "gender": "M;M;M;", "homepage": ";;https://www.fz-juelich.de/profile/helias_m;https://michaelschaub.github.io/", "dblp": "381/5012;;79/8535;72/10263", "google_scholar": ";;https://scholar.google.de/citations?user=NZQ_gSAAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0001-5767-7053;0000-0003-3795-5073;0000-0002-0404-8656;0000-0003-2426-6404", "linkedin": ";;;", "or_profile": "~Bastian_Epping1;~Alexandre_Ren\u00e91;~Moritz_Helias1;~Michael_T_Schaub1", "aff": "RWTH Aachen University;University of Ottawa;Forschungszentrum J\u00fclich;RWTH Aachen University", "aff_domain": "rwth-aachen.de;uottawa.ca;fz-juelich.de;rwth-aachen.de", "position": "PhD student;PhD student;Principal Researcher;Assistant Professor", "bibtex": "@inproceedings{\nepping2024graph,\ntitle={Graph Neural Networks Do Not Always Oversmooth},\nauthor={Bastian Epping and Alexandre Ren{\\'e} and Moritz Helias and Michael T Schaub},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=nY7fGtsspU}\n}", "github": "", "reviewers": "HWT8;WQpY;kVv2", "pdf_size": 10015769, "rating": "5;6;7", "confidence": "3;3;3", "soundness": "3;3;4", "novelty": "3;3;3", "presentation": "3;3;4", "wc_summary": "85;69;54", "wc_strengths": "76;55;62", "wc_weaknesses": "63;100;184", "wc_questions": "97;80;40", "wc_limitations": "1;49;55", "wc_review": "322;353;395", "wc_reply_reviewers": "0;22;85", "wc_reply_authors": "0;0;20", "reply_reviewers": "0;1;1", "reply_authors": "1;1;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 69.33333333333333, 12.657891697365017 ], "wc_strengths_avg": [ 64.33333333333333, 8.73053390247253 ], "wc_weaknesses_avg": [ 115.66666666666667, 50.624982853220686 ], "wc_questions_avg": [ 72.33333333333333, 23.893281249943232 ], "wc_limitations_avg": [ 35.0, 24.166091947189145 ], "wc_review_avg": [ 356.6666666666667, 29.914693528246097 ], "wc_reply_reviewers_avg": [ 35.666666666666664, 36.02159845919605 ], "wc_reply_authors_avg": [ 6.666666666666667, 9.428090415820632 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8436080119499678035&as_sdt=5,24&sciodt=0,24&hl=en", "gs_version_total": 3, "email": "rwth-aachen.de;uottawa.ca;fz-juelich.de;rwth-aachen.de", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "RWTH Aachen University;University of Ottawa;Forschungszentrum J\u00fclich", "aff_unique_dep": ";;", "aff_unique_url": "https://www.rwth-aachen.de;https://www.uottawa.ca;https://www.fz-juelich.de", "aff_unique_abbr": "RWTH;U Ottawa;FZJ", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Aachen;", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "Germany;Canada" }, { "title": "Implicit Curriculum in Procgen Made Explicit", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93682", "id": "nZB1FpXUU6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=nZB1FpXUU6", "openreview": "https://openreview.net/forum?id=nZB1FpXUU6", "poster": "", "project": "", "author_site": "Zhenxiong Tan, Kaixin Wang, Xinchao Wang", "tldr": "", "abstract": "Procedurally generated environments such as Procgen Benchmark provide a testbed for evaluating the agent's ability to robustly learn a relevant skill, by situating the agent in ever-changing levels. The diverse levels associated with varying contexts are naturally connected to curriculum learning. Existing works mainly focus on arranging the levels to explicitly form a curriculum. In this work, we take a close look at the learning process itself under the multi-level training in Procgen. Interestingly, the learning process exhibits a gradual shift from easy contexts to hard contexts, suggesting an implicit curriculum in multi-level training. Our analysis is made possible through C-Procgen, a benchmark we build upon Procgen that enables explicit control of the contexts. We believe our findings will foster a deeper understanding of learning in diverse contexts, and our benchmark will benefit future research in curriculum reinforcement learning.", "keywords": "Reinforcement Learning;Curriculum Learning;Procedurally Generated Environment", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Zhenxiong Tan;Kaixin Wang;Xinchao Wang", "authorids": "~Zhenxiong_Tan1;~Kaixin_Wang1;~Xinchao_Wang1", "gender": "M;M;M", "homepage": "https://github.com/Yuanshi9815;https://kaixin96.github.io;https://sites.google.com/site/sitexinchaowang/", "dblp": "255/5472;;", "google_scholar": ";https://scholar.google.com.sg/citations?hl=en;https://scholar.google.com.tw/citations?user=w69Buq0AAAAJ", "orcid": ";0000-0001-8237-9285;", "linkedin": ";;", "or_profile": "~Zhenxiong_Tan1;~Kaixin_Wang1;~Xinchao_WANG3", "aff": "National University of Singapore;Technion - Israel Institute of Technology, Technion - Israel Institute of Technology;National University of Singapore", "aff_domain": "nus.edu;campus.technion.ac.il;nus.edu", "position": "MS student;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\ntan2024implicit,\ntitle={Implicit Curriculum in Procgen Made Explicit},\nauthor={Zhenxiong Tan and Kaixin Wang and Xinchao Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=nZB1FpXUU6}\n}", "github": "", "reviewers": "2ABJ;h4d8;7pFG", "pdf_size": 2506029, "rating": "6;6;7", "confidence": "5;4;4", "soundness": "3;2;2", "novelty": "3;2;3", "presentation": "2;3;1", "wc_summary": "115;159;171", "wc_strengths": "243;90;63", "wc_weaknesses": "616;221;136", "wc_questions": "96;25;87", "wc_limitations": "135;22;14", "wc_review": "1205;517;471", "wc_reply_reviewers": "142;252;319", "wc_reply_authors": "140;178;933", "reply_reviewers": "1;1;3", "reply_authors": "3;2;6", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.0, 0.816496580927726 ], "wc_summary_avg": [ 148.33333333333334, 24.073960113690383 ], "wc_strengths_avg": [ 132.0, 79.25906888173743 ], "wc_weaknesses_avg": [ 324.3333333333333, 209.13844431115214 ], "wc_questions_avg": [ 69.33333333333333, 31.562988170042175 ], "wc_limitations_avg": [ 57.0, 55.25094267672423 ], "wc_review_avg": [ 731.0, 335.69430538313674 ], "wc_reply_reviewers_avg": [ 237.66666666666666, 72.96726815649755 ], "wc_reply_authors_avg": [ 417.0, 365.1967506244636 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.9428090415820634 ], "reply_authors_avg": [ 3.6666666666666665, 1.699673171197595 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:GbrvMN55PJ8J:scholar.google.com/&scioq=Implicit+Curriculum+in+Procgen+Made+Explicit&hl=en&as_sdt=0,33", "gs_version_total": 0, "email": "nus.edu;campus.technion.ac.il;nus.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "National University of Singapore;Technion - Israel Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.nus.edu.sg;https://www.technion.ac.il/en/", "aff_unique_abbr": "NUS;Technion", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Singapore;Israel" }, { "id": "nblJSwtdrJ", "title": "Text-to-Model: Text-Conditioned Neural Network Diffusion for Train-Once-for-All Personalization", "track": "main", "status": "Reject", "tldr": "", "abstract": "Generative artificial intelligence (GenAI) has made significant progress in understanding world knowledge and generating content from human languages across various modalities, like text-to-text large language models, text-to-image stable diffusion, and text-to-video Sora. While in this paper, we investigate the capability of GenAI for text-to-model generation, to see whether GenAI can comprehend hyper-level knowledge embedded within AI itself parameters. Specifically, we study a practical scenario termed train-once-for-all personalization, aiming to generate personalized models for diverse end-users and tasks using text prompts. Inspired by the recent emergence of neural network diffusion, we present Tina, a text-conditioned neural network diffusion for train-once-for-all personalization. Tina leverages a diffusion transformer model conditioned on task descriptions embedded using a CLIP model. Despite the astronomical number of potential personalized tasks (e.g., $1.73\\times10^{13}$), by our design, Tina demonstrates remarkable in-distribution and out-of-distribution generalization even trained on small datasets ($\\sim 1000$). \nWe further verify whether and how \\Tina understands world knowledge by analyzing its capabilities under zero-shot/few-shot image prompts, different numbers of personalized classes, prompts of natural language descriptions, and predicting unseen entities.", "keywords": "Generative Model;Parameter Generation;Diffusion Model;Text-to-model", "primary_area": "generative_models", "supplementary_material": "", "author": "Zexi Li;Lingzhi Gao;Chao Wu", "authorids": "~Zexi_Li1;~Lingzhi_Gao1;~Chao_Wu1", "gender": "M;;M", "homepage": "https://zexilee.github.io/about-zexili/;;", "dblp": "151/9187-1;;45/3158-1", "google_scholar": "https://scholar.google.com.hk/citations?user=6lMg5eoAAAAJ;;gpTPt58AAAAJ", "orcid": "0000-0003-0831-3549;;0000-0003-0885-6869", "linkedin": ";;", "or_profile": "~Zexi_Li1;~Lingzhi_Gao1;~Chao_Wu1", "aff": "Zhejiang University;;Zhejiang University", "aff_domain": "zju.edu.cn;;zju.edu.cn", "position": "PhD student;;Associate Professor", "bibtex": "@misc{\nanonymous2024texttomodel,\ntitle={Text-to-Model: Text-Conditioned Neural Network Diffusion for Train-Once-for-All Personalization},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=nblJSwtdrJ}\n}", "github": "", "project": "", "reviewers": "htFA;Gsfd;BHPs", "site": "https://openreview.net/forum?id=nblJSwtdrJ", "pdf_size": 2448127, "rating": "4;6;6", "confidence": "4;4;4", "soundness": "3;2;3", "novelty": "2;3;3", "presentation": "4;3;3", "wc_summary": "86;76;83", "wc_strengths": "49;114;70", "wc_weaknesses": "68;151;57", "wc_questions": "19;1;85", "wc_limitations": "8;6;23", "wc_review": "230;348;318", "wc_reply_reviewers": "65;66;25", "wc_reply_authors": "322;30;32", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 5.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 81.66666666666667, 4.189935029992178 ], "wc_strengths_avg": [ 77.66666666666667, 27.0842307543625 ], "wc_weaknesses_avg": [ 92.0, 41.960298696108765 ], "wc_questions_avg": [ 35.0, 36.11094017053558 ], "wc_limitations_avg": [ 12.333333333333334, 7.586537784494029 ], "wc_review_avg": [ 298.6666666666667, 50.07549855523713 ], "wc_reply_reviewers_avg": [ 52.0, 19.096247449870006 ], "wc_reply_authors_avg": [ 128.0, 137.18114544887962 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16433330079246399690&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Zhejiang University", "aff_unique_dep": "", "aff_unique_url": "https://www.zju.edu.cn", "aff_unique_abbr": "ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Gradient-free Decoder Inversion in Latent Diffusion Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93681", "id": "nbqvjkOs6S", "proceeding": "", "pdf": "https://openreview.net/pdf?id=nbqvjkOs6S", "openreview": "https://openreview.net/forum?id=nbqvjkOs6S", "poster": "/media/PosterPDFs/NeurIPS%202024/93681.png?t=1731161231.3427835", "project": "", "author_site": "Seongmin Hong, Suh Yoon Jeon, Kyeonghyun Lee, Ernest Ryu, Se Young Chun", "tldr": "", "abstract": "In latent diffusion models (LDMs), denoising diffusion process efficiently takes place on latent space whose dimension is lower than that of pixel space. Decoder is typically used to transform the representation in latent space to that in pixel space. While a decoder is assumed to have an encoder as an accurate inverse, exact encoder-decoder pair rarely exists in practice even though applications often require precise inversion of decoder. In other words, encoder is not the left-inverse but the right-inverse of the decoder; decoder inversion seeks the left-inverse. Prior works for decoder inversion in LDMs employed gradient descent inspired by inversions of generative adversarial networks. However, gradient-based methods require larger GPU memory and longer computation time for larger latent space. For example, recent video LDMs can generate more than 16 frames, but GPUs with 24 GB memory can only perform gradient-based decoder inversion for 4 frames. Here, we propose an efficient gradient-free decoder inversion for LDMs, which can be applied to diverse latent models. Theoretical convergence property of our proposed inversion has been investigated not only for the forward step method, but also for the inertial Krasnoselskii-Mann (KM) iterations under mild assumption on cocoercivity that is satisfied by recent LDMs. Our proposed gradient-free method with Adam optimizer and learning rate scheduling significantly reduced computation time and memory usage over prior gradient-based methods and enabled efficient computation in applications such as noise-space watermarking and background-preserving image editing while achieving comparable error levels.", "keywords": "Latent diffusion model;Inversion;Gradient-free inversion;Resource-efficient inversion", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/30e9826032abfaa538b85ba902a9ca61eba6ef58.zip", "author": "Seongmin Hong;Suh Yoon Jeon;Kyeonghyun Lee;Ernest K. Ryu;Se Young Chun", "authorids": "~Seongmin_Hong1;~Suh_Yoon_Jeon1;~Kyeonghyun_Lee1;~Ernest_K._Ryu1;~Se_Young_Chun2", "gender": "M;F;M;M;", "homepage": "https://icl.snu.ac.kr/;;;http://www.math.snu.ac.kr/~ernestryu/;https://icl.snu.ac.kr", "dblp": "218/1199;;362/3110;165/5192;85/2542", "google_scholar": "icHnJqsAAAAJ;aIWPv7YAAAAJ;guC7J3YAAAAJ;CNOqUZoAAAAJ;https://scholar.google.co.kr/citations?user=ntw4vH4AAAAJ", "orcid": ";;;0000-0001-6820-9095;0000-0001-8739-8960", "linkedin": ";SuhYoonJeon;%EA%B2%BD%ED%98%84-%EC%9D%B4-359089212/;;sychun-953593206/", "or_profile": "~Seongmin_Hong1;~Suh_Yoon_Jeon1;~Kyeonghyun_Lee1;~Ernest_K._Ryu1;~Se_Young_Chun1", "aff": "Seoul National University;Seoul National University;Seoul National University;Seoul National University;Seoul National University", "aff_domain": "snu.ac.kr;snu.ac.kr;snu.ac.kr;snu.ac.kr;snu.ac.kr", "position": "PhD student;MS student;Undergrad student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nhong2024gradientfree,\ntitle={Gradient-free Decoder Inversion in Latent Diffusion Models},\nauthor={Seongmin Hong and Suh Yoon Jeon and Kyeonghyun Lee and Ernest K. Ryu and Se Young Chun},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=nbqvjkOs6S}\n}", "github": "", "reviewers": "DG1n;2mTK;YFNw;9ej4", "pdf_size": 4426019, "rating": "3;4;6;6", "confidence": "4;4;3;3", "soundness": "2;2;1;4", "novelty": "2;2;2;3", "presentation": "1;3;2;3", "wc_summary": "154;75;82;64", "wc_strengths": "52;59;68;69", "wc_weaknesses": "166;179;66;107", "wc_questions": "126;5;66;50", "wc_limitations": "9;4;34;1", "wc_review": "507;322;316;291", "wc_reply_reviewers": "15;15;24;30", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 4.75, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.25, 1.0897247358851685 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 93.75, 35.3721288587498 ], "wc_strengths_avg": [ 62.0, 6.96419413859206 ], "wc_weaknesses_avg": [ 129.5, 45.609757727924844 ], "wc_questions_avg": [ 61.75, 43.31498008772485 ], "wc_limitations_avg": [ 12.0, 13.019216566291536 ], "wc_review_avg": [ 359.0, 86.23514364805105 ], "wc_reply_reviewers_avg": [ 21.0, 6.363961030678928 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.9622504486493761, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12588758463728224547&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "snu.ac.kr;snu.ac.kr;snu.ac.kr;snu.ac.kr;snu.ac.kr", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Seoul National University", "aff_unique_dep": "", "aff_unique_url": "https://www.snu.ac.kr", "aff_unique_abbr": "SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Chimera: Effectively Modeling Multivariate Time Series with 2-Dimensional State Space Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93680", "id": "ncYGjx2vnE", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ncYGjx2vnE", "openreview": "https://openreview.net/forum?id=ncYGjx2vnE", "poster": "", "project": "", "author_site": "Ali Behrouz, Michele Santacatterina, Ramin Zabih", "tldr": "", "abstract": "Modeling multivariate time series is a well-established problem with a wide range of applications from healthcare to financial markets. It, however, is challenging as it requires methods to (1) have high expressive power of representing complicated dependencies along the time axis to capture both long-term progression and seasonal patterns, (2) capture the inter-variate dependencies when it is informative, (3) dynamically model the dependencies of variate and time dimensions, and (4) have efficient training and inference for very long sequences. Traditional State Space Models (SSMs) are classical approaches for univariate time series modeling due to their simplicity and expressive power to represent linear dependencies. They, however, have fundamentally limited expressive power to capture non-linear dependencies, are slow in practice, and fail to model the inter-variate information flow. Despite recent attempts to improve the expressive power of SSMs by using deep structured SSMs, the existing methods are either limited to univariate time series, fail to model complex patterns (e.g., seasonal patterns), fail to dynamically model the dependencies of variate and time dimensions, and/or are input-independent. We present Chimera, an expressive variation of the 2-dimensional SSMs with careful design of parameters to maintain high expressive power while keeping the training complexity linear. Using two SSM heads with different discretization processes and input-dependent parameters, Chimera is provably able to learn long-term progression, seasonal patterns, and desirable dynamic autoregressive processes. To improve the efficiency of complex 2D recurrence, we present a fast training using a new 2-dimensional parallel selective scan. Our experimental evaluation shows the superior performance of Chimera on extensive and diverse benchmarks, including ECG and speech time series classification, long-term and short-term time series forecasting, and time series anomaly detection.", "keywords": "Multivariate Time Series;Time Series Modeling;Time Series Forecasting;Time Series Classification", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Ali Behrouz;Michele Santacatterina;Ramin Zabih", "authorids": "~Ali_Behrouz1;~Michele_Santacatterina2;~Ramin_Zabih1", "gender": "M;M;M", "homepage": "https://Abehrouz.github.io;https://michelesantacatterina.github.io/;https://www.cs.cornell.edu/~rdz", "dblp": "220/4163;230/3613;z/RaminZabih", "google_scholar": "UbwVuqIAAAAJ;mZPN7H4AAAAJ;8Lp0W54AAAAJ", "orcid": ";;0000-0001-8769-5666", "linkedin": "ali-behrouz-506aa2127;;", "or_profile": "~Ali_Behrouz1;~Michele_Santacatterina2;~Ramin_Zabih1", "aff": "Cornell University;New York University;Cornell", "aff_domain": "cornell.edu;nyu.edu;cornell.edu", "position": "PhD student;Assistant Professor;Professor", "bibtex": "@inproceedings{\nbehrouz2024chimera,\ntitle={Chimera: Effectively Modeling Multivariate Time Series with 2-Dimensional State Space Models},\nauthor={Ali Behrouz and Michele Santacatterina and Ramin Zabih},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ncYGjx2vnE}\n}", "github": "", "reviewers": "hxax;v8PB;yYuG;gRR3", "pdf_size": 3338453, "rating": "5;5;6;7", "confidence": "4;4;3;4", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "2;2;2;3", "wc_summary": "101;88;72;43", "wc_strengths": "83;65;41;36", "wc_weaknesses": "710;51;392;72", "wc_questions": "2;30;10;124", "wc_limitations": "9;1;36;23", "wc_review": "905;235;551;298", "wc_reply_reviewers": "265;25;73;212", "wc_reply_authors": "2262;0;174;861", "reply_reviewers": "2;1;1;2", "reply_authors": "5;1;2;3", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 76.0, 21.644860821913362 ], "wc_strengths_avg": [ 56.25, 18.93904696651867 ], "wc_weaknesses_avg": [ 306.25, 269.4405082759458 ], "wc_questions_avg": [ 41.5, 48.71088174114691 ], "wc_limitations_avg": [ 17.25, 13.386093530227555 ], "wc_review_avg": [ 497.25, 263.4505408990462 ], "wc_reply_reviewers_avg": [ 143.75, 98.06471077813873 ], "wc_reply_authors_avg": [ 824.25, 890.3213956207051 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.75, 1.479019945774904 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7102353793362966668&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "cornell.edu;nyu.edu;cornell.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Cornell University;New York University", "aff_unique_dep": ";", "aff_unique_url": "https://www.cornell.edu;https://www.nyu.edu", "aff_unique_abbr": "Cornell;NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Neural P$^3$M: A Long-Range Interaction Modeling Enhancer for Geometric GNNs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93679", "id": "ncqauwSyl5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ncqauwSyl5", "openreview": "https://openreview.net/forum?id=ncqauwSyl5", "poster": "/media/PosterPDFs/NeurIPS%202024/93679.png?t=1731315820.2856185", "project": "", "author_site": "Yusong Wang, Chaoran Cheng, Shaoning Li, Yuxuan Ren, Bin Shao, Ge Liu, Pheng-Ann Heng, Nanning Zheng", "tldr": "", "abstract": "Geometric graph neural networks (GNNs) have emerged as powerful tools for modeling molecular geometry. However, they encounter limitations in effectively capturing long-range interactions in large molecular systems. To address this challenge, we introduce **Neural P$^3$M**, a versatile enhancer of geometric GNNs to expand the scope of their capabilities by incorporating mesh points alongside atoms and reimaging traditional mathematical operations in a trainable manner. Neural P$^3$M exhibits flexibility across a wide range of molecular systems and demonstrates remarkable accuracy in predicting energies and forces, outperforming on benchmarks such as the MD22 dataset. \nIt also achieves an average improvement of 22% on the OE62 dataset while integrating with various architectures. Codes are available at https://github.com/OnlyLoveKFC/Neural_P3M.", "keywords": "Molecule geometry modeling;Geometric GNNs;Long-range interactions", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Yusong Wang;Chaoran Cheng;Shaoning Li;Yuxuan Ren;Bin Shao;Ge Liu;Pheng-Ann Heng;Nanning Zheng", "authorids": "~Yusong_Wang1;~Chaoran_Cheng2;~Shaoning_Li2;~Yuxuan_Ren2;~Bin_Shao1;~Ge_Liu2;~Pheng-Ann_Heng1;~Nanning_Zheng1", "gender": "M;M;;;;F;M;M", "homepage": "https://yusowa0716.github.io;https://ccr-cheng.github.io/;;;https://www.binshao.info/;http://www.mit.edu/~geliu/;http://www.cse.cuhk.edu.hk/~pheng;", "dblp": ";;;;;;52/2889;07/256-1", "google_scholar": "sCBrMcIAAAAJ;SrGZZ1wAAAAJ;;;h9L4CgIAAAAJ;P6EahzcAAAAJ;https://scholar.google.com/citations?sortby=pubdate;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";;;;;0000-0001-9383-5186;;", "linkedin": ";chaoran-cheng-a70638214/;;;;;;", "or_profile": "~Yusong_Wang1;~Chaoran_Cheng2;~Shaoning_Li2;~Yuxuan_Ren2;~Bin_Shao1;~Ge_Liu2;~Pheng-Ann_Heng1;~Nanning_Zheng1", "aff": "Xi'an Jiaotong University;University of Illinois, Urbana Champaign;;;Microsoft;University of Washington;The Chinese University of Hong Kong;Xi'an Jiaotong University", "aff_domain": "stu.xjtu.edu.cn;illinois.edu;;;microsoft.com;uw.edu;cuhk.edu.hk;xjtu.edu.cn", "position": "PhD student;PhD student;;;Principal Research Manager;Postdoc;Full Professor;Full Professor", "bibtex": "@inproceedings{\nwang2024neural,\ntitle={Neural P\\${\\textasciicircum}3\\$M: A Long-Range Interaction Modeling Enhancer for Geometric {GNN}s},\nauthor={Yusong Wang and Chaoran Cheng and Shaoning Li and Yuxuan Ren and Bin Shao and Ge Liu and Pheng-Ann Heng and Nanning Zheng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ncqauwSyl5}\n}", "github": "", "reviewers": "M1Fm;iSFT;7zWV;g8KM", "pdf_size": 1928274, "rating": "3;6;6;7", "confidence": "3;4;4;3", "soundness": "2;3;3;3", "novelty": "2;3;2;3", "presentation": "3;3;2;3", "wc_summary": "68;40;222;91", "wc_strengths": "63;116;154;85", "wc_weaknesses": "123;50;145;158", "wc_questions": "22;197;98;52", "wc_limitations": "10;11;51;26", "wc_review": "286;414;670;412", "wc_reply_reviewers": "0;22;30;10", "wc_reply_authors": "131;19;173;23", "reply_reviewers": "0;1;1;1", "reply_authors": "2;2;3;2", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 105.25, 69.78314624606718 ], "wc_strengths_avg": [ 104.5, 34.2235299172952 ], "wc_weaknesses_avg": [ 119.0, 41.75523919222593 ], "wc_questions_avg": [ 92.25, 66.25849002203415 ], "wc_limitations_avg": [ 24.5, 16.560495161679196 ], "wc_review_avg": [ 445.5, 139.60211316452197 ], "wc_reply_reviewers_avg": [ 15.5, 11.434596626029272 ], "wc_reply_authors_avg": [ 86.5, 67.17700499426869 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.33333333333333337, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2648213514020345990&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "stu.xjtu.edu.cn;illinois.edu;;;microsoft.com;uw.edu;cuhk.edu.hk;xjtu.edu.cn", "author_num": 8, "aff_unique_index": "0;1;2;3;4;0", "aff_unique_norm": "Xi'an Jiao Tong University;University of Illinois Urbana-Champaign;Microsoft;University of Washington;Chinese University of Hong Kong", "aff_unique_dep": ";;Microsoft Corporation;;", "aff_unique_url": "https://www.xjtu.edu.cn;https://illinois.edu;https://www.microsoft.com;https://www.washington.edu;https://www.cuhk.edu.hk", "aff_unique_abbr": "XJTU;UIUC;Microsoft;UW;CUHK", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Urbana-Champaign;Hong Kong SAR", "aff_country_unique_index": "0;1;1;1;0;0", "aff_country_unique": "China;United States" }, { "title": "A Geometric View of Data Complexity: Efficient Local Intrinsic Dimension Estimation with Diffusion Models", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93678", "id": "nd8Q4a8aWl", "proceeding": "", "pdf": "https://openreview.net/pdf?id=nd8Q4a8aWl", "openreview": "https://openreview.net/forum?id=nd8Q4a8aWl", "poster": "/media/PosterPDFs/NeurIPS%202024/93678.png?t=1731259954.9363716", "project": "", "author_site": "Hamid Kamkari, Brendan Ross, Rasa Hosseinzadeh, Jesse Cresswell, Gabriel Loaiza-Ganem", "tldr": "", "abstract": "High-dimensional data commonly lies on low-dimensional submanifolds, and estimating the local intrinsic dimension (LID) of a datum -- i.e. the dimension of the submanifold it belongs to -- is a longstanding problem. LID can be understood as the number of local factors of variation: the more factors of variation a datum has, the more complex it tends to be. Estimating this quantity has proven useful in contexts ranging from generalization in neural networks to detection of out-of-distribution data, adversarial examples, and AI-generated text. The recent successes of deep generative models present an opportunity to leverage them for LID estimation, but current methods based on generative models produce inaccurate estimates, require more than a single pre-trained model, are computationally intensive, or do not exploit the best available deep generative models: diffusion models (DMs). In this work, we show that the Fokker-Planck equation associated with a DM can provide an LID estimator which addresses the aforementioned deficiencies. Our estimator, called FLIPD, is easy to implement and compatible with all popular DMs. Applying FLIPD to synthetic LID estimation benchmarks, we find that DMs implemented as fully-connected networks are highly effective LID estimators that outperform existing baselines. We also apply FLIPD to natural images where the true LID is unknown. Despite being sensitive to the choice of network architecture, FLIPD estimates remain a useful measure of relative complexity; compared to competing estimators, FLIPD exhibits a consistently higher correlation with image PNG compression rate and better aligns with qualitative assessments of complexity. Notably, FLIPD is orders of magnitude faster than other LID estimators, and the first to be tractable at the scale of Stable Diffusion.", "keywords": "diffusion models;deep generative modelling;manifold hypothesis;intrinsic dimension", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Hamidreza Kamkari;Brendan Leigh Ross;Rasa Hosseinzadeh;Jesse C. Cresswell;Gabriel Loaiza-Ganem", "authorids": "~Hamidreza_Kamkari1;~Brendan_Leigh_Ross1;~Rasa_Hosseinzadeh2;~Jesse_C._Cresswell1;~Gabriel_Loaiza-Ganem1", "gender": "M;M;M;M;", "homepage": "https://hamidrezakmk.github.io/;;;https://sites.google.com/view/gabriel-loaiza-ganem/about-me;https://jescresswell.github.io/", "dblp": ";295/0098;266/1688;238/1617;279/6764", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.ca/citations?user=TyY1aSYAAAAJ;;https://scholar.google.com/citations?hl=en;https://scholar.google.ca/citations?hl=en", "orcid": ";;;;0000-0002-9284-8804", "linkedin": "hamidreza-kamkari/;brendan-ross;rasa-hosseinzadeh-6204a6208/;;", "or_profile": "~Hamidreza_Kamkari1;~Brendan_Leigh_Ross1;~Rasa_Hosseinzadeh2;~Gabriel_Loaiza-Ganem1;~Jesse_C_Cresswell1", "aff": "Department of Computer Science;Layer 6 AI;Layer6;Layer 6 AI;Layer 6 AI", "aff_domain": "cs.toronto.edu;layer6.ai;layer6.ai;layer6.ai;layer6.ai", "position": "MS student;Senior Machine Learning Scientist;Researcher;Machine Learning Research Scientist;Staff Machine Learning Scientist", "bibtex": "@inproceedings{\nkamkari2024a,\ntitle={A Geometric View of Data Complexity: Efficient Local Intrinsic Dimension Estimation with Diffusion Models},\nauthor={Hamidreza Kamkari and Brendan Leigh Ross and Rasa Hosseinzadeh and Jesse C. Cresswell and Gabriel Loaiza-Ganem},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=nd8Q4a8aWl}\n}", "github": "", "reviewers": "g4nR;MdUJ;9X9u;MJca", "pdf_size": 45734532, "rating": "6;7;7;7", "confidence": "4;3;2;4", "soundness": "3;4;3;3", "novelty": "3;3;3;4", "presentation": "2;4;3;3", "wc_summary": "124;204;98;148", "wc_strengths": "75;82;73;70", "wc_weaknesses": "130;25;28;96", "wc_questions": "55;149;60;351", "wc_limitations": "2;1;4;90", "wc_review": "386;461;263;755", "wc_reply_reviewers": "16;19;21;1439", "wc_reply_authors": "12;0;0;1291", "reply_reviewers": "1;1;1;4", "reply_authors": "2;1;1;4", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 143.5, 39.1503512117069 ], "wc_strengths_avg": [ 75.0, 4.415880433163924 ], "wc_weaknesses_avg": [ 69.75, 44.90197657119339 ], "wc_questions_avg": [ 153.75, 119.8652889705773 ], "wc_limitations_avg": [ 24.25, 37.97614382740828 ], "wc_review_avg": [ 466.25, 181.07646865343935 ], "wc_reply_reviewers_avg": [ 373.75, 615.0249486809458 ], "wc_reply_authors_avg": [ 325.75, 557.3088797964733 ], "reply_reviewers_avg": [ 1.75, 1.299038105676658 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17224929254729539030&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "cs.toronto.edu;layer6.ai;layer6.ai;layer6.ai;layer6.ai", "author_num": 5, "aff_unique_index": "0;1;2;1;1", "aff_unique_norm": "Unknown Institution;Layer 6 AI;Layer6 AI", "aff_unique_dep": "Department of Computer Science;;", "aff_unique_url": ";https://layer6.ai;https://layer6.ai", "aff_unique_abbr": ";Layer 6 AI;Layer6", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1;1;1;1", "aff_country_unique": ";Canada" }, { "id": "ndIum4ByTU", "title": "Meta Flow Matching: Integrating Vector Fields on the Wasserstein Manifold", "track": "main", "status": "Reject", "tldr": "", "abstract": "Numerous biological and physical processes can be modeled as systems of interacting samples evolving continuously over time, e.g. the dynamics of communicating cells or physical particles. Flow-based models allow for learning these dynamics at the population level --- they model the evolution of the entire distribution of samples. However, current flow-based models are limited to a single initial population and a set of predefined conditions which describe different dynamics. We argue that multiple processes in natural sciences have to be represented as vector fields on the Wasserstein manifold of probability densities. That is, the change of the population at any moment in time depends on the population itself due to the interactions between samples. In particular, this is crucial for personalized medicine where the development of diseases and their treatments depend on the microenvironment of cells specific to each patient. We propose _Meta Flow Matching_ (MFM), a practical approach to integrating along these vector fields on the Wasserstein manifold by amortizing the flow model over the initial populations. Namely, we embed the population of samples using a Graph Neural Network (GNN) and use these embeddings to train a _Flow Matching_ model. This gives Meta Flow Matching the ability to generalize over the initial distributions unlike previously proposed methods. Finally, we demonstrate the ability of MFM to improve prediction of individual treatment responses on a large scale multi-patient single-cell drug screen dataset.", "keywords": "Flow matching;Diffusion;Dynamics;Cell dynamics", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "/attachment/c13e67150ec8a4ce83e548b2d471997003c0e3ac.zip", "author": "Xi Zhang;Lazar Atanackovic;Brandon Amos;Mathieu Blanchette;Leo J Lee;Yoshua Bengio;Alexander Tong;Kirill Neklyudov", "authorids": "~Xi_Zhang18;~Lazar_Atanackovic1;~Brandon_Amos1;~Mathieu_Blanchette1;~Leo_J_Lee1;~Yoshua_Bengio1;~Alexander_Tong1;~Kirill_Neklyudov1", "gender": "F;M;;M;;M;;M", "homepage": ";https://lazaratan.github.io/;http://bamos.github.io;http://www.cs.mcgill.ca/~blanchem/;;http://yoshuabengio.org;https://alextong.net;https://necludov.github.io/", "dblp": ";235/6207;133/4801.html;;;56/953;153/9296;195/1093", "google_scholar": "CblgXekAAAAJ;qhTWIh4AAAAJ;d8gdZR4AAAAJ;https://scholar.google.com.tw/citations?user=8vXnOQsAAAAJ;;kukA0LcAAAAJ;CS80pt4AAAAJ;https://scholar.google.ru/citations?user=eOttYWgAAAAJ", "orcid": "0000-0001-6485-4564;;;;;;0000-0002-2031-4096;", "linkedin": ";;bdamos;;;yoshuabengio/?originalSubdomain=ca;atong01/;", "or_profile": "~Xi_Zhang18;~Lazar_Atanackovic1;~Brandon_Amos1;~Mathieu_Blanchette1;~Leo_J_Lee1;~Yoshua_Bengio1;~Alexander_Tong1;~Kirill_Neklyudov1", "aff": "McGill University;Valence Labs powered by recursion;Meta;McGill University;;University of Montreal;Universit\u00e9 de Montr\u00e9al;Vector Institute", "aff_domain": "mcgill.ca;valencelabs.com;meta.com;mcgill.ca;;umontreal.ca;umontreal.ca;vectorinstitute.ai", "position": "PhD student;Intern;Research Scientist;Associate Professor;;Full Professor;Postdoc;Postdoc", "bibtex": "@misc{\nanonymous2024meta,\ntitle={Meta Flow Matching: Integrating Vector Fields on the Wasserstein Manifold},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=ndIum4ByTU}\n}", "github": "", "project": "", "reviewers": "HpVg;btyA;mrP3;BiEg", "site": "https://openreview.net/forum?id=ndIum4ByTU", "pdf_size": 2540367, "rating": "5;5;6;7", "confidence": "4;4;4;4", "soundness": "3;3;3;4", "novelty": "2;2;3;3", "presentation": "2;3;3;4", "wc_summary": "119;64;63;82", "wc_strengths": "75;65;73;109", "wc_weaknesses": "324;129;173;75", "wc_questions": "2;29;252;190", "wc_limitations": "2;6;19;7", "wc_review": "522;293;580;463", "wc_reply_reviewers": "87;15;81;52", "wc_reply_authors": "61;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 82.0, 22.66053838724932 ], "wc_strengths_avg": [ 80.5, 16.874537030686206 ], "wc_weaknesses_avg": [ 175.25, 92.62930151955157 ], "wc_questions_avg": [ 118.25, 105.49496433479656 ], "wc_limitations_avg": [ 8.5, 6.34428877022476 ], "wc_review_avg": [ 464.5, 107.30913288252776 ], "wc_reply_reviewers_avg": [ 58.75, 28.516442625264464 ], "wc_reply_authors_avg": [ 15.25, 26.413774815425377 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17837634745883063222&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 5, "aff_unique_index": "0;1;2;0;3;4;5", "aff_unique_norm": "McGill University;Valence Labs;Meta;University of Montreal;Universit\u00e9 de Montr\u00e9al;Vector Institute", "aff_unique_dep": ";;Meta Platforms, Inc.;;;", "aff_unique_url": "https://www.mcgill.ca;;https://meta.com;https://wwwumontreal.ca;https://www.umontreal.ca;https://vectorinstitute.ai/", "aff_unique_abbr": "McGill;;Meta;UM;UdeM;Vector Institute", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;2;0;0;0;0", "aff_country_unique": "Canada;;United States" }, { "title": "One for All: Multi-Domain Joint Training for Point Cloud Based 3D Object Detection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93677", "id": "ndoeHX1Acq", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ndoeHX1Acq", "openreview": "https://openreview.net/forum?id=ndoeHX1Acq", "poster": "", "project": "", "author_site": "Zhenyu Wang, Ya-Li Li, Hengshuang Zhao, Shengjin Wang", "tldr": "", "abstract": "The current trend in computer vision is to utilize one universal model to address all various tasks. Achieving such a universal model inevitably requires incorporating multi-domain data for joint training to learn across multiple problem scenarios. In point cloud based 3D object detection, however, such multi-domain joint training is highly challenging, because large domain gaps among point clouds from different datasets lead to the severe domain-interference problem. In this paper, we propose OneDet3D, a universal one-for-all model that addresses 3D detection across different domains, including diverse indoor and outdoor scenes, within the same framework and only one set of parameters. We propose the domain-aware partitioning in scatter and context, guided by a routing mechanism, to address the data interference issue, and further incorporate the text modality for a language-guided classification to unify the multi-dataset label spaces and mitigate the category interference issue. The fully sparse structure and anchor-free head further accommodate point clouds with significant scale disparities. Extensive experiments demonstrate the strong universal ability of OneDet3D to utilize only one trained model for addressing almost all 3D object detection tasks (Fig. 1). We will open-source the code for future research and applications.", "keywords": "3D object detection", "primary_area": "machine_vision", "supplementary_material": "", "author": "Zhenyu Wang;Ya-Li Li;Hengshuang Zhao;Shengjin Wang", "authorids": "~Zhenyu_Wang3;~Ya-Li_Li1;~Hengshuang_Zhao2;~Shengjin_Wang1", "gender": "M;F;M;M", "homepage": ";;https://hszhao.github.io;http://www.ee.tsinghua.edu.cn/publish/eeen/8316/index.html", "dblp": "22/1486-5;05/1013-1.html;185/7848;", "google_scholar": "x_-kOjoAAAAJ;https://scholar.google.com/citations?hl=zh-CN;4uE10I0AAAAJ;", "orcid": ";;0000-0001-8277-2706;", "linkedin": ";;hengshuang-zhao-347b8391/?originalSubdomain=hk;", "or_profile": "~Zhenyu_Wang3;~Ya-Li_Li1;~Hengshuang_Zhao2;~Shengjin_Wang1", "aff": "Tsinghua University;Tsinghua University;The University of Hong Kong;Tsinghua University", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;hku.hk;tsinghua.edu.cn", "position": "PhD student;Associate Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nwang2024one,\ntitle={One for All: Multi-Domain Joint Training for Point Cloud Based 3D Object Detection},\nauthor={Zhenyu Wang and Ya-Li Li and Hengshuang Zhao and Shengjin Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ndoeHX1Acq}\n}", "github": "", "reviewers": "PVw6;q3RX;HkhJ", "pdf_size": 1138646, "rating": "4;5;6", "confidence": "4;4;5", "soundness": "2;2;4", "novelty": "2;3;4", "presentation": "2;2;3", "wc_summary": "216;52;48", "wc_strengths": "90;30;36", "wc_weaknesses": "218;348;147", "wc_questions": "194;17;6", "wc_limitations": "119;14;1", "wc_review": "837;461;238", "wc_reply_reviewers": "0;47;18", "wc_reply_authors": "132;0;0", "reply_reviewers": "0;1;1", "reply_authors": "3;1;1", "rating_avg": [ 5.0, 0.816496580927726 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.9428090415820634 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 105.33333333333333, 78.27018731434225 ], "wc_strengths_avg": [ 52.0, 26.981475126464083 ], "wc_weaknesses_avg": [ 237.66666666666666, 83.22793334505484 ], "wc_questions_avg": [ 72.33333333333333, 86.14845068575264 ], "wc_limitations_avg": [ 44.666666666666664, 52.82886416428891 ], "wc_review_avg": [ 512.0, 247.1854904048105 ], "wc_reply_reviewers_avg": [ 21.666666666666668, 19.362047641943473 ], "wc_reply_authors_avg": [ 44.0, 62.22539674441618 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1118928120467838287&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "tsinghua.edu.cn;tsinghua.edu.cn;hku.hk;tsinghua.edu.cn", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Tsinghua University;University of Hong Kong", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.hku.hk", "aff_unique_abbr": "THU;HKU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "AdaSociety: An Adaptive Environment with Social Structures for Multi-Agent Decision-Making", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97511", "id": "nes2rMnbyL", "proceeding": "", "pdf": "https://openreview.net/pdf?id=nes2rMnbyL", "openreview": "https://openreview.net/forum?id=nes2rMnbyL", "poster": "/media/PosterPDFs/NeurIPS%202024/97511.png?t=1731648044.487945", "project": "", "author_site": "Yizhe Huang, Xingbo Wang, Hao Liu, Fanqi Kong, Aoyang Qin, Min Tang, Xiaoxi Wang, Song-Chun Zhu, Mingjie Bi, Siyuan Qi, Xue Feng", "tldr": "", "abstract": "Traditional interactive environments limit agents' intelligence growth with fixed tasks. Recently, single-agent environments address this by generating new tasks based on agent actions, enhancing task diversity. We consider the decision-making problem in multi-agent settings, where tasks are further influenced by social connections, affecting rewards and information access. However, existing multi-agent environments lack a combination of adaptive physical surroundings and social connections, hindering the learning of intelligent behaviors.\nTo address this, we introduce AdaSociety, a customizable multi-agent environment featuring expanding state and action spaces, alongside explicit and alterable social structures. As agents progress, the environment adaptively generates new tasks with social structures for agents to undertake. In AdaSociety, we develop three mini-games showcasing distinct social structures and tasks. Initial results demonstrate that specific social structures can promote both individual and collective benefits, though current reinforcement learning and LLM-based algorithms show limited effectiveness in leveraging social structures to enhance performance. Overall, AdaSociety serves as a valuable research platform for exploring intelligence in diverse physical and social settings. The code is available at https://github.com/bigai-ai/AdaSociety.", "keywords": "multi-agent decision making;reinforcement learning;dynamic social relationship", "primary_area": "", "supplementary_material": "/attachment/30322318fde9d17657a964da6e62459962f0d291.pdf", "author": "Yizhe Huang;Xingbo Wang;Hao Liu;Fanqi Kong;Aoyang Qin;Min Tang;Xiaoxi Wang;Song-Chun Zhu;Mingjie Bi;Siyuan Qi;Xue Feng", "authorids": "~Yizhe_Huang2;~Xingbo_Wang2;~Hao_Liu42;~Fanqi_Kong1;~Aoyang_Qin1;~Min_Tang5;~Xiaoxi_Wang2;~Song-Chun_Zhu1;~Mingjie_Bi1;~Siyuan_Qi1;~Xue_Feng3", "gender": ";M;M;M;M;M;M;M;;F;F", "homepage": ";https://github.com/JackSimbol;https://haoliu4391.github.io/Hao_Liu.github.io/;https://github.com/kfq20;https://github.com/qayqaq;;https://zhusongchun.net/;;;;https://github.com/zmsud", "dblp": ";;;377/2991;;;10/10313;;177/5178;;", "google_scholar": "j5AxMFUAAAAJ;TVewRagAAAAJ;;Aj13PkAAAAAJ;;;https://scholar.google.com.tw/citations?user=Al8dyb4AAAAJ;YsGfIU8AAAAJ;ePclJR4AAAAJ;;", "orcid": "0000-0001-8722-7221;;;;;;;0000-0001-6825-1445;0000-0002-4070-733X;0000-0002-7163-7274;", "linkedin": ";;;;;https://www.linkedin.cn/incareer/in/vshallc;;mingjie-bi-a81307176/;;;", "or_profile": "~Yizhe_Huang2;~Xingbo_Wang2;~Hao_Liu42;~Fanqi_Kong1;~Aoyang_Qin1;~Xiaoxi_Wang2;~Song-Chun_Zhu1;~Mingjie_Bi1;~Siyuan_Qi1;~Xue_Feng3;~Tangmin1", "aff": "Peking University;Peking University;Tsinghua University;Tsinghua University;Tsinghua University;Tencent Game AI Research Center;Peking University;Beijing Institute for General Artificial Intelligence;Beijing Institute for General Artificial Intelligence;Beijing Institute for General Artificial Intelligence;Wuhan University", "aff_domain": "pku.edu.cn;pku.edu.cn;mails.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tencent.com;pku.edu.cn;bigai.ai;bigai.ai;bigai.ai;whu.edu", "position": "PhD student;Undergrad student;Undergrad student;Undergrad student;PhD student;Researcher;Full Professor;Researcher;Researcher;Researcher;Undergrad student", "bibtex": "@inproceedings{\nhuang2024adasociety,\ntitle={AdaSociety: An Adaptive Environment with Social Structures for Multi-Agent Decision-Making},\nauthor={Yizhe Huang and Xingbo Wang and Hao Liu and Fanqi Kong and Aoyang Qin and Min Tang and Xiaoxi Wang and Song-Chun Zhu and Mingjie Bi and Siyuan Qi and Xue Feng},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=nes2rMnbyL}\n}", "github": "", "reviewers": "vq28;nGQ4;CKwz", "pdf_size": 2081890, "rating": "6;7;7", "confidence": "3;4;4", "wc_summary_and_contributions": "97;82;73", "wc_strengths": "27;94;81", "wc_improvement": "256;152;52", "wc_limitations": "2;102;1", "wc_correctness": "1;246;1", "wc_clarity": "1;240;1", "wc_relation_to_prior_work": "1;41;1", "wc_documentation": "3;19;1", "wc_additional_feedback": "1;1;1", "wc_review": "389;977;212", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "2;3;2", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 84.0, 9.899494936611665 ], "wc_strengths_avg": [ 67.33333333333333, 29.00957696271277 ], "wc_improvement_avg": [ 153.33333333333334, 83.28798766262088 ], "wc_limitations_avg": [ 35.0, 47.37791327893902 ], "wc_correctness_avg": [ 82.66666666666667, 115.49410759380277 ], "wc_clarity_avg": [ 80.66666666666667, 112.66568046905657 ], "wc_relation_to_prior_work_avg": [ 14.333333333333334, 18.856180831641264 ], "wc_documentation_avg": [ 7.666666666666667, 8.055363982396381 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 526.0, 326.98929646090863 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.9999999999999997, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10256991300152274949&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "pku.edu.cn;pku.edu.cn;mails.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tencent.com;pku.edu.cn;bigai.ai;bigai.ai;bigai.ai;whu.edu", "author_num": 11, "aff_unique_index": "0;0;1;1;1;2;0;3;3;3;4", "aff_unique_norm": "Peking University;Tsinghua University;Tencent;Beijing Institute for General Artificial Intelligence;Wuhan University", "aff_unique_dep": ";;Game AI Research Center;;", "aff_unique_url": "http://www.pku.edu.cn;https://www.tsinghua.edu.cn;https://www.tencent.com;http://www.bigaiai.org/;http://www.whu.edu.cn/", "aff_unique_abbr": "Peking U;THU;Tencent;BIGAI;WHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "nfC1OA6NeE", "title": "SDEs for Adaptive Methods: The Role of Noise", "track": "main", "status": "Reject", "tldr": "", "abstract": "Despite the vast empirical evidence supporting the efficacy of adaptive optimization methods in deep learning, their theoretical understanding is far from complete. In this work, we introduce novel SDEs for commonly used adaptive optimizers: SignSGD, RMSprop(W), and Adam(W). Our SDEs offer a quantitatively accurate description of these optimizers and help bring to light an intricate relationship between adaptivity, gradient noise, and curvature. Our novel analysis of SignSGD highlights a noteworthy and precise contrast to SGD in terms of convergence speed, stationary distribution, and robustness to heavy-tail noise. We extend this analysis to AdamW and RMSpropW, for which we observe that the role of noise is much more complex. Crucially, we support our theoretical analysis with experimental evidence by verifying our insights: this includes numerically integrating our SDEs using Euler-Maruyama discretization on various neural network architectures such as MLPs, CNNs, ResNets, and Transformers. Our SDEs accurately track the behavior of the respective optimizers, especially when compared to previous SDEs derived for Adam and RMSprop. We believe our approach can provide valuable insights into best training practices and novel scaling rules.", "keywords": "Stochastic Differential Equations;Stochastic Optimization;Adaptive Methods", "primary_area": "optimization", "supplementary_material": "/attachment/2fc484f1a807b7efd1639328bbafc742376cb3ac.zip", "author": "Enea Monzio Compagnoni;Tianlin Liu;Rustem Islamov;Frank Norbert Proske;Antonio Orvieto;Aurelien Lucchi", "authorids": "~Enea_Monzio_Compagnoni1;~Tianlin_Liu2;~Rustem_Islamov1;~Frank_Norbert_Proske1;~Antonio_Orvieto3;~Aurelien_Lucchi1", "gender": "M;M;M;M;M;M", "homepage": "https://eneamc.github.io/;http://www.tianlinliu.com;https://rustem-islamov.github.io/;https://www.mn.uio.no/math/english/people/aca/proske/;http://orvi.altervista.org/;http://people.inf.ethz.ch/alucchi/", "dblp": "310/1851;20/7667;285/5128;;;14/5780", "google_scholar": "6qKgak8AAAAJ;;-dlYjUsAAAAJ;;xkuLyHoAAAAJ;https://scholar.google.ch/citations?user=V1ONSgIAAAAJ", "orcid": "0009-0004-7094-2586;;;;;", "linkedin": "eneamc/;;rustem-islamov-053345228/;;antonio-orvieto-947ab0130/;", "or_profile": "~Enea_Monzio_Compagnoni1;~Tianlin_Liu2;~Rustem_Islamov1;~Frank_Norbert_Proske1;~Antonio_Orvieto3;~Aurelien_Lucchi1", "aff": "University of Basel;University of Basel;University of Basel;University of Oslo;ELLIS Institute T\u00fcbingen, Max Planck Institute for Intelligent Systems, T\u00fcbingen AI Center, T\u00fcbingen, Germany;University of Basel", "aff_domain": "unibas.ch;unibas.ch;unibas.ch;nissen.uio.no;tue.ellis.eu;unibas.ch", "position": "PhD student;PhD student;PhD student;Full Professor;Principal Researcher;Assistant Professor", "bibtex": "@misc{\nanonymous2024sdes,\ntitle={{SDE}s for Adaptive Methods: The Role of Noise},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=nfC1OA6NeE}\n}", "github": "", "project": "", "reviewers": "Pw4R;PBh6;ehSv", "site": "https://openreview.net/forum?id=nfC1OA6NeE", "pdf_size": 17777850, "rating": "6;6;7", "confidence": "4;2;4", "soundness": "3;3;3", "novelty": "3;2;3", "presentation": "3;2;2", "wc_summary": "82;23;27", "wc_strengths": "73;10;53", "wc_weaknesses": "109;63;5", "wc_questions": "23;24;148", "wc_limitations": "8;1;9", "wc_review": "295;121;242", "wc_reply_reviewers": "23;27;29", "wc_reply_authors": "55;19;637", "reply_reviewers": "1;1;1", "reply_authors": "2;2;3", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 44.0, 26.919633479426622 ], "wc_strengths_avg": [ 45.333333333333336, 26.284765338288427 ], "wc_weaknesses_avg": [ 59.0, 42.551929059287865 ], "wc_questions_avg": [ 65.0, 58.69128271444292 ], "wc_limitations_avg": [ 6.0, 3.559026084010437 ], "wc_review_avg": [ 219.33333333333334, 72.82093716019376 ], "wc_reply_reviewers_avg": [ 26.333333333333332, 2.494438257849294 ], "wc_reply_authors_avg": [ 237.0, 283.2242927434015 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:aSI03LXoxsUJ:scholar.google.com/&scioq=SDEs+for+Adaptive+Methods:+The+Role+of+Noise&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0;1;2;0", "aff_unique_norm": "University of Basel;University of Oslo;ELLIS Institute T\u00fcbingen", "aff_unique_dep": ";;", "aff_unique_url": "https://www.unibas.ch;https://www.uio.no;", "aff_unique_abbr": "UniBas;UiO;", "aff_campus_unique_index": "1", "aff_campus_unique": ";T\u00fcbingen", "aff_country_unique_index": "0;0;0;1;2;0", "aff_country_unique": "Switzerland;Norway;Germany" }, { "title": "HaloScope: Harnessing Unlabeled LLM Generations for Hallucination Detection", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93676", "id": "nfK0ZXFFSn", "proceeding": "", "pdf": "https://openreview.net/pdf?id=nfK0ZXFFSn", "openreview": "https://openreview.net/forum?id=nfK0ZXFFSn", "poster": "/media/PosterPDFs/NeurIPS%202024/93676.png?t=1731441405.8926642", "project": "", "author_site": "Xuefeng Du, Chaowei Xiao, Sharon Li", "tldr": "", "abstract": "The surge in applications of large language models (LLMs) has prompted concerns about the generation of misleading or fabricated information, known as hallucinations. Therefore, detecting hallucinations has become critical to maintaining trust in LLM-generated content. A primary challenge in learning a truthfulness classifier is the lack of a large amount of labeled truthful and hallucinated data. To address the challenge, we introduce HaloScope, a novel learning framework that leverages the unlabeled LLM generations in the wild for hallucination detection. Such unlabeled data arises freely upon deploying LLMs in the open world, and consists of both truthful and hallucinated information. To harness the unlabeled data, we present an automated scoring function for distinguishing between truthful and untruthful generations within unlabeled mixture data, thereby enabling the training of a binary classifier on top. Importantly, our framework does not require extra data collection and human annotations, offering strong flexibility and practicality for real-world applications. Extensive experiments show that HaloScope can achieve superior hallucination detection performance, outperforming the competitive rivals by a significant margin.", "keywords": "hallucination detection;LLM safety", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Xuefeng Du;Chaowei Xiao;Yixuan Li", "authorids": "~Xuefeng_Du1;~Chaowei_Xiao2;~Yixuan_Li1", "gender": "M;F;M", "homepage": "https://d12306.github.io/;http://pages.cs.wisc.edu/~sharonli/;https://xiaocw11.github.io/", "dblp": "34/3557;144/6087-1;150/3317", "google_scholar": "GE_aEh4AAAAJ;https://scholar.google.com/citations?hl=en;Juoqtj8AAAAJ", "orcid": ";;0000-0002-7043-4926", "linkedin": "xuefeng-du-094723192/;liyixuan;", "or_profile": "~Xuefeng_Du1;~Yixuan_Li1;~chaowei_xiao1", "aff": "University of Wisconsin, Madison;Cornell University;NVIDIA", "aff_domain": "wisc.edu;cornell.edu;nvidia.com", "position": "PhD student;Graduate Student;Researcher", "bibtex": "@inproceedings{\ndu2024haloscope,\ntitle={HaloScope: Harnessing Unlabeled {LLM} Generations for Hallucination Detection},\nauthor={Xuefeng Du and Chaowei Xiao and Yixuan Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=nfK0ZXFFSn}\n}", "github": "", "reviewers": "JMXE;6VqW;smZn;tABH", "pdf_size": 1145035, "rating": "6;6;8;9", "confidence": "5;3;4;4", "soundness": "3;3;3;4", "novelty": "3;3;4;4", "presentation": "3;3;3;4", "wc_summary": "89;62;206;108", "wc_strengths": "44;30;52;81", "wc_weaknesses": "317;18;105;92", "wc_questions": "2;54;105;41", "wc_limitations": "8;1;41;45", "wc_review": "460;165;509;367", "wc_reply_reviewers": "24;8;0;38", "wc_reply_authors": "27;24;0;314", "reply_reviewers": "1;1;0;2", "reply_authors": "2;2;1;3", "rating_avg": [ 7.25, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 116.25, 54.33403629402108 ], "wc_strengths_avg": [ 51.75, 18.632968094214082 ], "wc_weaknesses_avg": [ 133.0, 111.29465395965792 ], "wc_questions_avg": [ 50.5, 36.82729965664059 ], "wc_limitations_avg": [ 23.75, 19.45989465541887 ], "wc_review_avg": [ 375.25, 131.6669567507353 ], "wc_reply_reviewers_avg": [ 17.5, 14.654350889752845 ], "wc_reply_authors_avg": [ 91.25, 129.02979307121282 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16738776022063180526&as_sdt=800005&sciodt=0,15&hl=en", "gs_version_total": 5, "email": "wisc.edu;cornell.edu;nvidia.com", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Wisconsin;Cornell University;NVIDIA", "aff_unique_dep": ";;NVIDIA Corporation", "aff_unique_url": "https://www.wisc.edu;https://www.cornell.edu;https://www.nvidia.com", "aff_unique_abbr": "UW;Cornell;NVIDIA", "aff_campus_unique_index": "0", "aff_campus_unique": "Madison;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Preference Learning of Latent Decision Utilities with a Human-like Model of Preferential Choice", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93675", "id": "nfq3GKfb4h", "proceeding": "", "pdf": "https://openreview.net/pdf?id=nfq3GKfb4h", "openreview": "https://openreview.net/forum?id=nfq3GKfb4h", "poster": "/media/PosterPDFs/NeurIPS%202024/93675.png?t=1733242927.695841", "project": "", "author_site": "Sebastiaan De Peuter, Shibei Zhu, Yujia Guo, Andrew Howes, Samuel Kaski", "tldr": "", "abstract": "Preference learning methods make use of models of human choice in order to infer the latent utilities that underlie human behavior. However, accurate modeling of human choice behavior is challenging due to a range of context effects that arise from how humans contrast and evaluate options. Cognitive science has proposed several models that capture these intricacies but, due to their intractable nature, work on preference learning has, in practice, had to rely on tractable but simplified variants of the well-known Bradley-Terry model. In this paper, we take one state-of-the-art intractable cognitive model and propose a tractable surrogate that is suitable for deployment in preference learning. We then introduce a mechanism for fitting the surrogate to human data and extend it to account for data that cannot be explained by the original cognitive model. We demonstrate on large-scale human data that this model produces significantly better inferences on static and actively elicited data than existing Bradley-Terry variants. We further show in simulation that when using this model for preference learning, we can significantly improve utility in a range of real-world tasks.", "keywords": "preference learning;human-in-the-loop;AI-assistance for decision making;user modeling;cogntitive science;retrosynthesis planning", "primary_area": "human-AI_interaction", "supplementary_material": "", "author": "Sebastiaan De Peuter;Shibei Zhu;Yujia Guo;Andrew Howes;Samuel Kaski", "authorids": "~Sebastiaan_De_Peuter1;~Shibei_Zhu1;~Yujia_Guo1;~Andrew_Howes1;~Samuel_Kaski1", "gender": "M;F;F;M;M", "homepage": ";;https://www.linkedin.com/in/yujia-guo-6738a0203/;https://computerscience.exeter.ac.uk/people/profile/index.php?username=ah1248;https://people.aalto.fi/samuel.kaski", "dblp": "298/1740;;204/7435;70/4286;64/5826", "google_scholar": "xxp2We8AAAAJ;;;https://scholar.google.co.uk/citations?user=iswtssoAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-0684-0110;;;;0000-0003-1925-9154", "linkedin": "sebastiaan-de-peuter/;www.linkedin.com/in/shibei-zhu;;;samuel-kaski-27790/", "or_profile": "~Sebastiaan_De_Peuter1;~Shibei_Zhu1;~Yujia_Guo1;~Andrew_Howes1;~Samuel_Kaski1", "aff": "Aalto University;Aalto University;Aalto University;University of Exeter;Aalto University", "aff_domain": "aalto.fi;aalto.fi;aalto.fi;exeter.ac.uk;aalto.fi", "position": "PhD student;PhD student;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\npeuter2024preference,\ntitle={Preference Learning of Latent Decision Utilities with a Human-like Model of Preferential Choice},\nauthor={Sebastiaan De Peuter and Shibei Zhu and Yujia Guo and Andrew Howes and Samuel Kaski},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=nfq3GKfb4h}\n}", "github": "", "reviewers": "hXMa;CwtL;rsF2;cVZM", "pdf_size": 778298, "rating": "4;4;5;7", "confidence": "3;4;3;3", "soundness": "3;2;3;3", "novelty": "2;3;2;3", "presentation": "3;3;3;3", "wc_summary": "69;61;111;54", "wc_strengths": "68;44;87;152", "wc_weaknesses": "203;203;145;126", "wc_questions": "65;211;1;52", "wc_limitations": "2;20;1;21", "wc_review": "407;539;345;405", "wc_reply_reviewers": "329;113;133;0", "wc_reply_authors": "1003;180;180;0", "reply_reviewers": "3;1;1;0", "reply_authors": "4;2;2;1", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 73.75, 22.151467220028564 ], "wc_strengths_avg": [ 87.75, 40.10221315588455 ], "wc_weaknesses_avg": [ 169.25, 34.412025514346 ], "wc_questions_avg": [ 82.25, 78.08769109149021 ], "wc_limitations_avg": [ 11.0, 9.513148795220223 ], "wc_review_avg": [ 424.0, 70.9154426059656 ], "wc_reply_reviewers_avg": [ 143.75, 118.36674997650312 ], "wc_reply_authors_avg": [ 340.75, 389.34777192119645 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.4714045207910316, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:_g98UnxL0zoJ:scholar.google.com/&scioq=Preference+Learning+of+Latent+Decision+Utilities+with+a+Human-like+Model+of+Preferential+Choice&hl=en&as_sdt=0,5", "gs_version_total": 5, "email": "aalto.fi;aalto.fi;aalto.fi;exeter.ac.uk;aalto.fi", "author_num": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Aalto University;University of Exeter", "aff_unique_dep": ";", "aff_unique_url": "https://www.aalto.fi;https://www.exeter.ac.uk", "aff_unique_abbr": "Aalto;Exeter", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "Finland;United Kingdom" }, { "title": "On the Power of Decision Trees in Auto-Regressive Language Modeling", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93674", "id": "nge5deRsEH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=nge5deRsEH", "openreview": "https://openreview.net/forum?id=nge5deRsEH", "poster": "/media/PosterPDFs/NeurIPS%202024/93674.png?t=1733883584.4137683", "project": "", "author_site": "Yulu Gan, Tomer Galanti, Tomaso Poggio, Eran Malach", "tldr": "", "abstract": "Originally proposed for handling time series data, Auto-regressive Decision Trees (ARDTs) have not yet been explored for language modeling. This paper delves into both the theoretical and practical applications of ARDTs in this new context. We theoretically demonstrate that ARDTs can compute complex functions, such as simulating automata, Turing machines, and sparse circuits, by leveraging \"chain-of-thought\" computations. Our analysis provides bounds on the size, depth, and computational efficiency of ARDTs, highlighting their surprising computational power. Empirically, we train ARDTs on simple language generation tasks, showing that they can learn to generate coherent and grammatically correct text on par with a smaller Transformer model. Additionally, we show that ARDTs can be used on top of transformer representations to solve complex reasoning tasks. This research reveals the unique computational abilities of ARDTs, aiming to broaden the architectural diversity in language model development.", "keywords": "Decision Trees;Auto-Regressive;Language Models", "primary_area": "learning_theory", "supplementary_material": "/attachment/3b3b786b76e2b209d6e1b9be46f058ba61a5897c.zip", "author": "Yulu Gan;Tomer Galanti;Tomaso A Poggio;eran malach", "authorids": "~Yulu_Gan1;~Tomer_Galanti1;~Tomaso_A_Poggio2;~eran_malach1", "gender": "M;M;M;M", "homepage": "https://yulugan.com/;https://tomergalanti.github.io;;https://cbmm.mit.edu/about/people/poggio", "dblp": "307/1107;198/1490;202/2566;12/5544", "google_scholar": "https://scholar.google.com/citations?hl=en;;I15dUOwAAAAJ;WgAGy7wAAAAJ", "orcid": "0009-0002-8541-2911;;;", "linkedin": "yulu-g-31a626281/;tomer-galanti-5880b1104/;;", "or_profile": "~Yulu_Gan1;~Tomer_Galanti1;~eran_malach1;~Tomaso_Poggio1", "aff": "Peking University;Texas A&M University - College Station;Harvard University;Massachusetts Institute of Technology", "aff_domain": "pku.edu.cn;tamu.edu;harvard.edu;mit.edu", "position": "MS student;Assistant Professor;Postdoc;Full Professor", "bibtex": "@inproceedings{\ngan2024on,\ntitle={On the Power of Decision Trees in Auto-Regressive Language Modeling},\nauthor={Yulu Gan and Tomer Galanti and Tomaso A Poggio and eran malach},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=nge5deRsEH}\n}", "github": "", "reviewers": "qHp8;97Y1;o5To", "pdf_size": 1979285, "rating": "5;6;7", "confidence": "4;2;3", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "135;55;63", "wc_strengths": "164;28;35", "wc_weaknesses": "720;26;27", "wc_questions": "56;31;12", "wc_limitations": "68;50;1", "wc_review": "1143;190;138", "wc_reply_reviewers": "255;24;13", "wc_reply_authors": "295;0;0", "reply_reviewers": "1;1;1", "reply_authors": "2;1;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 84.33333333333333, 35.97530016861877 ], "wc_strengths_avg": [ 75.66666666666667, 62.52643885234114 ], "wc_weaknesses_avg": [ 257.6666666666667, 326.9192900735933 ], "wc_questions_avg": [ 33.0, 18.01850900231944 ], "wc_limitations_avg": [ 39.666666666666664, 28.311756490114764 ], "wc_review_avg": [ 490.3333333333333, 461.99302544038 ], "wc_reply_reviewers_avg": [ 97.33333333333333, 111.57757640115489 ], "wc_reply_authors_avg": [ 98.33333333333333, 139.06433363335435 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:LPjiqMkmrVcJ:scholar.google.com/&scioq=On+the+Power+of+Decision+Trees+in+Auto-Regressive+Language+Modeling&hl=en&as_sdt=0,5", "gs_version_total": 8, "email": "pku.edu.cn;tamu.edu;harvard.edu;mit.edu", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Peking University;Texas A&M University;Harvard University;Massachusetts Institute of Technology", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.pku.edu.cn;https://www.tamu.edu;https://www.harvard.edu;https://web.mit.edu", "aff_unique_abbr": "Peking U;TAMU;Harvard;MIT", "aff_campus_unique_index": "1", "aff_campus_unique": ";College Station", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "China;United States" }, { "title": "On the Impacts of the Random Initialization in the Neural Tangent Kernel Theory", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93673", "id": "ni3Ud2BV3G", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ni3Ud2BV3G", "openreview": "https://openreview.net/forum?id=ni3Ud2BV3G", "poster": "/media/PosterPDFs/NeurIPS%202024/93673.png?t=1731326249.2865891", "project": "", "author_site": "Guhan Chen, Yicheng Li, Qian Lin", "tldr": "", "abstract": "This paper aims to discuss the impact of random initialization of neural networks in the neural tangent kernel (NTK) theory, which is ignored by most recent works in the NTK theory. It is well known that as the network's width tends to infinity, the neural network with random initialization converges to a Gaussian process \\(f^{\\mathrm{GP}}\\), which takes values in \\(L^{2}(\\mathcal{X})\\), where \\(\\mathcal{X}\\) is the domain of the data. In contrast, to adopt the traditional theory of kernel regression, most recent works introduced a special mirrored architecture and a mirrored (random) initialization to ensure the network's output is identically zero at initialization. Therefore, it remains a question whether the conventional setting and mirrored initialization would make wide neural networks exhibit different generalization capabilities. In this paper, we first show that the training dynamics of the gradient flow of neural networks with random initialization converge uniformly to that of the corresponding NTK regression with random initialization \\(f^{\\mathrm{GP}}\\). We then show that \\(\\mathbf{P}(f^{\\mathrm{GP}} \\in [\\mathcal{H}^{\\mathrm{NT}}]^{s}) = 1\\) for any \\(s < \\frac{3}{d+1}\\) and \\(\\mathbf{P}(f^{\\mathrm{GP}} \\in [\\mathcal{H}^{\\mathrm{NT}}]^{s}) = 0\\) for any \\(s \\geq \\frac{3}{d+1}\\), where \\([\\mathcal{H}^{\\mathrm{NT}}]^{s}\\) is the real interpolation space of the RKHS \\(\\mathcal{H}^{\\mathrm{NT}}\\) associated with the NTK. Consequently, the generalization error of the wide neural network trained by gradient descent is \\(\\Omega(n^{-\\frac{3}{d+3}})\\), and it still suffers from the curse of dimensionality. Thus, the NTK theory may not explain the superior performance of neural networks.", "keywords": "neural tangent kernel;random initialization;non-parametric regression;reproducing kernel Hilbert space", "primary_area": "learning_theory", "supplementary_material": "", "author": "Guhan Chen;Yicheng Li;Qian Lin", "authorids": "~Guhan_Chen1;~Yicheng_Li2;~Qian_Lin2", "gender": "M;M;M", "homepage": ";https://sites.google.com/site/qianlincd/;", "dblp": ";79/3108;", "google_scholar": ";kHPrqdgAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-9497-0379;;", "linkedin": ";;", "or_profile": "~Yicheng_Li2;~Qian_Lin2;~Chen_Guhan1", "aff": "Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;mails.tsinghua.edu.cn", "position": "PhD student;Associate Professor;PhD student", "bibtex": "@inproceedings{\nchen2024on,\ntitle={On the Impacts of the Random Initialization in the Neural Tangent Kernel Theory},\nauthor={Guhan Chen and Yicheng Li and Qian Lin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ni3Ud2BV3G}\n}", "github": "", "reviewers": "Yc4R;iQcQ;XKWd;43yC", "pdf_size": 749541, "rating": "5;6;7;7", "confidence": "3;4;3;3", "soundness": "3;4;3;3", "novelty": "3;3;3;3", "presentation": "2;4;2;2", "wc_summary": "85;88;147;188", "wc_strengths": "29;110;60;59", "wc_weaknesses": "78;60;211;219", "wc_questions": "169;56;45;211", "wc_limitations": "12;4;1;106", "wc_review": "373;318;464;783", "wc_reply_reviewers": "53;56;12;55", "wc_reply_authors": "349;90;97;1012", "reply_reviewers": "1;1;1;1", "reply_authors": "4;2;2;5", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 127.0, 43.02905994790033 ], "wc_strengths_avg": [ 64.5, 29.073183520213263 ], "wc_weaknesses_avg": [ 142.0, 73.3314393694819 ], "wc_questions_avg": [ 120.25, 71.41909758600987 ], "wc_limitations_avg": [ 30.75, 43.631267458097064 ], "wc_review_avg": [ 484.5, 180.05346428214037 ], "wc_reply_reviewers_avg": [ 44.0, 18.506755523321747 ], "wc_reply_authors_avg": [ 387.0, 375.62547836907976 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.25, 1.299038105676658 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13604665493405635220&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 3, "email": "tsinghua.edu.cn;tsinghua.edu.cn;mails.tsinghua.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "A Layer-Wise Natural Gradient Optimizer for Training Deep Neural Networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93672", "id": "niG3Yyb6oA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=niG3Yyb6oA", "openreview": "https://openreview.net/forum?id=niG3Yyb6oA", "poster": "/media/PosterPDFs/NeurIPS%202024/93672.png?t=1730979625.3301675", "project": "", "author_site": "Xiaolei Liu, Shaoshuai Li, Kaixin Gao, Binfeng Wang", "tldr": "", "abstract": "Second-order optimization algorithms, such as the Newton method and the natural gradient descent (NGD) method exhibit excellent convergence properties for training deep neural networks, but the high computational cost limits its practical application. In this paper, we focus on the NGD method and propose a novel layer-wise natural gradient descent (LNGD) method to further reduce computational costs and accelerate the training process. Specifically, based on the block diagonal approximation of the Fisher information matrix, we first propose the layer-wise sample method to compute each block matrix without performing a complete back-propagation. Then, each block matrix is approximated as a Kronecker product of two smaller matrices, one of which is a diagonal matrix, while keeping the traces equal before and after approximation. By these two steps, we provide a new approximation for the Fisher information matrix, which can effectively reduce the computational cost while preserving the main information of each block matrix. Moreover, we propose a new adaptive layer-wise learning rate to further accelerate training. Based on these new approaches, we propose the LNGD optimizer. The global convergence analysis of LNGD is established under some assumptions. Experiments on image classification and machine translation tasks show that our method is quite competitive compared to the state-of-the-art methods.", "keywords": "Natural gradient method;second-order optimization;deep neural networks;Kronecker factorization", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Xiaolei Liu;Shaoshuai Li;Kaixin Gao;Binfeng Wang", "authorids": "~Xiaolei_Liu3;~Shaoshuai_Li1;~Kaixin_Gao1;~Binfeng_Wang1", "gender": "M;M;M;M", "homepage": ";https://orcid.org/my-orcid?orcid=0000-0003-2949-668X;;https://sci-hub.org.cn/citations?user=YfVN5JQAAAAJ&hl=zh-TW&oi=ao", "dblp": ";;;", "google_scholar": ";;;", "orcid": "0000-0002-7082-641X;;0000-0003-3728-2294;", "linkedin": ";;;", "or_profile": "~Xiaolei_Liu3;~Shaoshuai_Li1;~Kaixin_Gao1;~Binfeng_Wang1", "aff": ";Alibaba Group;Tianjin University;Alibaba Group", "aff_domain": ";antgroup.com;tju.edu.cn;antgroup.com", "position": ";Researcher;PhD student;Instructor", "bibtex": "@inproceedings{\nliu2024a,\ntitle={A Layer-Wise Natural Gradient Optimizer for Training Deep Neural Networks},\nauthor={Xiaolei Liu and Shaoshuai Li and Kaixin Gao and Binfeng Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=niG3Yyb6oA}\n}", "github": "", "reviewers": "gNTY;HWh2;Mvn4;k9Mw", "pdf_size": 1357137, "rating": "4;4;7;7", "confidence": "3;4;3;4", "soundness": "4;3;3;2", "novelty": "2;2;3;3", "presentation": "4;2;3;3", "wc_summary": "120;42;53;76", "wc_strengths": "50;63;59;67", "wc_weaknesses": "171;398;29;340", "wc_questions": "83;2;25;68", "wc_limitations": "7;2;30;18", "wc_review": "431;507;196;569", "wc_reply_reviewers": "560;0;157;41", "wc_reply_authors": "1024;32;91;216", "reply_reviewers": "2;0;1;1", "reply_authors": "4;2;2;3", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 72.75, 29.911327285829362 ], "wc_strengths_avg": [ 59.75, 6.299801584177076 ], "wc_weaknesses_avg": [ 234.5, 145.02155012273175 ], "wc_questions_avg": [ 44.5, 32.48461174156157 ], "wc_limitations_avg": [ 14.25, 10.779030568655049 ], "wc_review_avg": [ 425.75, 141.3636710757046 ], "wc_reply_reviewers_avg": [ 189.5, 221.52257221330743 ], "wc_reply_authors_avg": [ 340.75, 400.02960827918724 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Lii132SNrHkJ:scholar.google.com/&scioq=A+Layer-Wise+Natural+Gradient+Optimizer+for+Training+Deep+Neural+Networks&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": ";antgroup.com;tju.edu.cn;antgroup.com", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Alibaba Group;Tianjin University", "aff_unique_dep": ";", "aff_unique_url": "https://www.alibaba.com;http://www.tju.edu.cn", "aff_unique_abbr": "Alibaba;TJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "HardCore Generation: Generating Hard UNSAT Problems for Data Augmentation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93671", "id": "njvPjG0BfK", "proceeding": "", "pdf": "https://openreview.net/pdf?id=njvPjG0BfK", "openreview": "https://openreview.net/forum?id=njvPjG0BfK", "poster": "/media/PosterPDFs/NeurIPS%202024/93671.png?t=1732286524.6512187", "project": "", "author_site": "Joseph Cotnareanu, Zhanguang Zhang, Hui-Ling Zhen, Yingxue Zhang, Mark Coates", "tldr": "", "abstract": "Efficiently determining the satisfiability of a boolean equation --- known as the SAT problem for brevity --- is crucial in various industrial problems. Recently, the advent of deep learning methods has introduced significant potential for enhancing SAT solving. However, a major barrier to the advancement of this field has been the scarcity of large, realistic datasets. The majority of current public datasets are either randomly generated or extremely limited, containing only a few examples from unrelated problem families. These datasets are inadequate for meaningful training of deep learning methods. In light of this, researchers have started exploring generative techniques to create data that more accurately reflect SAT problems encountered in practical situations. These methods have so far suffered from either the inability to produce challenging SAT problems or time-scalability obstacles. In this paper we address both by identifying and manipulating the key contributors to a problem's ``hardness'', known as cores. Although some previous work has addressed cores, the time costs are unacceptably high due to the expense of traditional heuristic core detection techniques. We introduce a fast core detection procedure that uses a graph neural network. Our empirical results demonstrate that we can efficiently generate problems that remain hard to solve and retain key attributes of the original example problems. We show via experiment that the generated synthetic SAT problems can be used in a data augmentation setting to provide improved prediction of solver runtimes.", "keywords": "Graph Learning;Boolean Satisfiability;Circuit Design", "primary_area": "graph_neural_networks", "supplementary_material": "/attachment/42770601d4aa0bb3ff5c559ef76ca491c0e16d78.zip", "author": "Joseph Cotnareanu;Zhanguang Zhang;Hui-Ling Zhen;Yingxue Zhang;Mark Coates", "authorids": "~Joseph_Cotnareanu1;~Zhanguang_Zhang1;~Hui-Ling_Zhen1;~Yingxue_Zhang1;~Mark_Coates1", "gender": "M;;F;M;F", "homepage": "https://github.com/joseph-cotnareanu;;;http://www.ece.mcgill.ca/~mcoate/;", "dblp": "357/5120;326/4221;174/0010-1.html;c/MarkCoates;135/7690.html", "google_scholar": ";WisvONUAAAAJ;4bsYpogAAAAJ;https://scholar.google.ca/citations?user=qxWORNoAAAAJ;https://scholar.google.com.hk/citations?user=gq29DtwAAAAJ", "orcid": "0009-0004-4876-2765;0000-0003-1134-045X;;0000-0001-5030-1379;", "linkedin": ";;yingxue-zhang-03971b112/;;hui-ling-zhen-912662158/", "or_profile": "~Joseph_Cotnareanu1;~Zhanguang_Zhang1;~Yingxue_Zhang1;~Mark_Coates1;~Huiling_Zhen1", "aff": "McGill University;Huawei Noah's Ark Lab;Huawei Canada, Huawei Noah's Ark Lab;McGill University;Huawei Technologies Ltd.", "aff_domain": "mcgill.ca;huawei.com;huawei.com;mcgill.ca;huawei.com", "position": "MS student;Researcher;Researcher;Full Professor;Principal Researcher", "bibtex": "@inproceedings{\ncotnareanu2024hardcore,\ntitle={HardCore Generation: Generating Hard {UNSAT} Problems for Data Augmentation},\nauthor={Joseph Cotnareanu and Zhanguang Zhang and Hui-Ling Zhen and Yingxue Zhang and Mark Coates},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=njvPjG0BfK}\n}", "github": "", "reviewers": "vCMn;kFMW;1y8R;RF4y", "pdf_size": 967757, "rating": "4;6;7;7", "confidence": "5;5;5;4", "soundness": "2;3;4;4", "novelty": "2;3;3;3", "presentation": "2;3;4;4", "wc_summary": "64;110;98;116", "wc_strengths": "56;130;93;113", "wc_weaknesses": "181;118;137;137", "wc_questions": "143;12;29;117", "wc_limitations": "10;37;11;53", "wc_review": "454;407;368;536", "wc_reply_reviewers": "148;0;17;5", "wc_reply_authors": "538;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 97.0, 20.12461179749811 ], "wc_strengths_avg": [ 98.0, 27.55902755904134 ], "wc_weaknesses_avg": [ 143.25, 23.134119823325893 ], "wc_questions_avg": [ 75.25, 55.840733340456765 ], "wc_limitations_avg": [ 27.75, 18.15729880791744 ], "wc_review_avg": [ 441.25, 62.607407708672945 ], "wc_reply_reviewers_avg": [ 42.5, 61.22295321201028 ], "wc_reply_authors_avg": [ 134.5, 232.960833618014 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.4714045207910316, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:hMus04zhLDoJ:scholar.google.com/&scioq=HardCore+Generation:+Generating+Hard+UNSAT+Problems+for+Data+Augmentation&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "mcgill.ca;huawei.com;huawei.com;mcgill.ca;huawei.com", "author_num": 5, "aff_unique_index": "0;1;1;0;1", "aff_unique_norm": "McGill University;Huawei", "aff_unique_dep": ";Noah's Ark Lab", "aff_unique_url": "https://www.mcgill.ca;https://www.huawei.com", "aff_unique_abbr": "McGill;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;1", "aff_country_unique": "Canada;China" }, { "title": "DistrictNet: Decision-aware learning for geographical districting", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93670", "id": "njwYBFau8E", "proceeding": "", "pdf": "https://openreview.net/pdf?id=njwYBFau8E", "openreview": "https://openreview.net/forum?id=njwYBFau8E", "poster": "/media/PosterPDFs/NeurIPS%202024/93670.png?t=1731693053.4036503", "project": "", "author_site": "Cheikh Ahmed, Alexandre Forel, Axel Parmentier, Thibaut Vidal", "tldr": "", "abstract": "Districting is a complex combinatorial problem that consists in partitioning a geographical area into small districts. In logistics, it is a major strategic decision determining operating costs for several years. Solving districting problems using traditional methods is intractable even for small geographical areas and existing heuristics often provide sub-optimal results. We present a structured learning approach to find high-quality solutions to real-world districting problems in a few minutes. It is based on integrating a combinatorial optimization layer, the capacitated minimum spanning tree problem, into a graph neural network architecture. To train this pipeline in a decision-aware fashion, we show how to construct target solutions embedded in a suitable space and learn from target solutions. Experiments show that our approach outperforms existing methods as it can significantly reduce costs on real-world cities.", "keywords": "routing;combinatorial optimization;decision-focused learning", "primary_area": "optimization", "supplementary_material": "", "author": "Cheikh Ahmed;Alexandre Forel;Axel Parmentier;Thibaut Vidal", "authorids": "~Cheikh_Ahmed1;~Alexandre_Forel1;~Axel_Parmentier1;~Thibaut_Vidal1", "gender": "M;M;M;M", "homepage": ";https://alexforel.github.io/;https://cermics.enpc.fr/~parmenta/;https://w1.cirrelt.ca/~vidalt/en/home-thibaut-vidal.html", "dblp": "395/1502;321/1816;150/6250;40/11481", "google_scholar": ";https://scholar.google.ca/citations?user=DcR5I4cAAAAJ;https://scholar.google.fr/citations?hl=fr;https://scholar.google.com.tw/citations?user=qbO0xwUAAAAJ", "orcid": ";0000-0002-9868-4804;0000-0003-1762-4947;0000-0001-5183-8485", "linkedin": "cheikh-ahmed/;alexandre-forel-275267147/;axel-parmentier-466548148/;thibaut-vidal-7a877055/", "or_profile": "~Cheikh_Ahmed1;~Alexandre_Forel1;~Axel_Parmentier1;~Thibaut_Vidal1", "aff": "\u00c9cole Polytechnique de Montr\u00e9al, Universit\u00e9 de Montr\u00e9al;Polytechnique Montr\u00e9al;Ecole Nationale des Ponts et Chausees;Polytechnique Montreal", "aff_domain": "polymtl.ca;polymtl.ca;enpc.fr;polymtl.ca", "position": "MS student;Postdoc;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nahmed2024districtnet,\ntitle={DistrictNet: Decision-aware learning for geographical districting},\nauthor={Cheikh Ahmed and Alexandre Forel and Axel Parmentier and Thibaut Vidal},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=njwYBFau8E}\n}", "github": "", "reviewers": "qHFv;VFD5;S4NW;WW6p", "pdf_size": 16422781, "rating": "5;6;6;6", "confidence": "4;4;4;1", "soundness": "3;3;4;2", "novelty": "3;3;3;2", "presentation": "3;4;3;2", "wc_summary": "135;82;56;109", "wc_strengths": "32;80;61;41", "wc_weaknesses": "102;179;73;263", "wc_questions": "4;28;78;39", "wc_limitations": "86;1;14;12", "wc_review": "359;370;282;464", "wc_reply_reviewers": "0;11;0;14", "wc_reply_authors": "0;19;0;9", "reply_reviewers": "0;1;0;1", "reply_authors": "1;2;1;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 1.299038105676658 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 95.5, 29.516944286290883 ], "wc_strengths_avg": [ 53.5, 18.553975315279473 ], "wc_weaknesses_avg": [ 154.25, 73.77457217768192 ], "wc_questions_avg": [ 37.25, 26.714930282521795 ], "wc_limitations_avg": [ 28.25, 33.70738049745189 ], "wc_review_avg": [ 368.75, 64.6040826883255 ], "wc_reply_reviewers_avg": [ 6.25, 6.339361166552983 ], "wc_reply_authors_avg": [ 7.0, 7.842193570679061 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:M5zOzzv1NscJ:scholar.google.com/&scioq=DistrictNet:+Decision-aware+learning+for+geographical+districting&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "polymtl.ca;polymtl.ca;enpc.fr;polymtl.ca", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "\u00c9cole Polytechnique de Montr\u00e9al;Polytechnique Montr\u00e9al;Ecole Nationale des Ponts et Chaussees;Polytechnique Montreal", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.polymtl.ca;https://www.polymtl.ca;https://www.enpc.fr;https://www.polymtl.ca", "aff_unique_abbr": "Polytechnique Montr\u00e9al;PolyMTL;ENPC;PolyMTL", "aff_campus_unique_index": "0;0;2", "aff_campus_unique": "Montr\u00e9al;;Montreal", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Canada;France" }, { "title": "Visual Fourier Prompt Tuning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93669", "id": "nkHEl4n0JU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=nkHEl4n0JU", "openreview": "https://openreview.net/forum?id=nkHEl4n0JU", "poster": "/media/PosterPDFs/NeurIPS%202024/93669.png?t=1733321097.4163384", "project": "", "author_site": "Runjia Zeng, Cheng Han, Qifan Wang, Chunshu Wu, Tong Geng, Lifu Huangg, Ying Nian Wu, Dongfang Liu", "tldr": "", "abstract": "With the scale of vision Transformer-based models continuing to grow, finetuning these large-scale pretrained models for new tasks has become increasingly parameter-intensive. Visual prompt tuning is introduced as a parameter-efficient finetuning (PEFT) method to this trend. Despite its successes, a notable research challenge persists within almost all PEFT approaches: significant performance degradation is observed when there is a substantial disparity between the datasets applied in pretraining and finetuning phases. To address this challenge, we draw inspiration from human visual cognition, and propose the Visual Fourier Prompt Tuning (VFPT) method as a general and effective solution for adapting large-scale transformer-based models. Our approach innovatively incorporates the Fast Fourier Transform into prompt embeddings and harmoniously considers both spatial and frequency domain information. Apart from its inherent simplicity and intuitiveness, VFPT exhibits superior performance across all datasets, offering a general solution to dataset challenges, irrespective of data disparities. Empirical results demonstrate that our approach outperforms current state-of-the-art baselines on two benchmarks, with low parameter usage (e.g., 0.57% of model parameters on VTAB-1k) and notable performance enhancements (e.g., 73.20% of mean accuracy on VTAB-1k). Our code is avaliable at https://github.com/runtsang/VFPT.", "keywords": "Parameter-efficient finetuning", "primary_area": "machine_vision", "supplementary_material": "", "author": "Runjia Zeng;Cheng Han;Qifan Wang;Chunshu Wu;Tong Geng;Lifu Huang;Ying Nian Wu;Dongfang Liu", "authorids": "~Runjia_Zeng1;~Cheng_Han1;~Qifan_Wang2;~Chunshu_Wu1;~Tong_Geng1;~Lifu_Huang1;~Ying_Nian_Wu1;~Dongfang_Liu1", "gender": "M;M;M;M;M;M;;M", "homepage": "http://runjia.tech;https://chenghan111.github.io/;https://wqfcr.github.io/;;https://tonytgeng.com;https://wilburone.github.io/;https://www.rit.edu/directory/dxleec-dongfang-liu;http://www.stat.ucla.edu/~ywu/", "dblp": "392/4342;53/6096-1.html;33/8610;243/1631;188/5531;127/0072;;18/568.html", "google_scholar": "iiLwKSsAAAAJ;VgkEKZwAAAAJ;LrSyLosAAAAJ;ZawigMMAAAAJ;1B_nk28AAAAJ;76IEGtYAAAAJ;uICY0vEAAAAJ;7k_1QFIAAAAJ", "orcid": "0009-0008-2818-4418;0000-0002-8145-3436;0000-0002-7570-5756;0009-0006-2039-0853;0000-0002-3644-2922;;;", "linkedin": "runjiazeng;chenghan-87129219a/;;;;;;", "or_profile": "~Runjia_Zeng1;~Cheng_Han1;~Qifan_Wang2;~Chunshu_Wu1;~Tong_Geng1;~Lifu_Huang1;~Dongfang_Liu1;~Yingnian_Wu1", "aff": "Guangdong University of Technology;Rochester Institute of Technology;Meta AI;University of Rochester;University of Rochester;Virginia Tech;Rochester Institute of Technology;UCLA", "aff_domain": "gdut.edu.cn;rit.edu;fb.com;rochester.edu;rochester.edu;vt.edu;rit.edu;stat.ucla.edu", "position": "Undergrad student;PhD student;Principal Researcher;Postdoc;Assistant Professor;Assistant Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nzeng2024visual,\ntitle={Visual Fourier Prompt Tuning},\nauthor={Runjia Zeng and Cheng Han and Qifan Wang and Chunshu Wu and Tong Geng and Lifu Huang and Ying Nian Wu and Dongfang Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=nkHEl4n0JU}\n}", "github": "", "reviewers": "9y4B;PyKu;TrZ7", "pdf_size": 7047151, "rating": "6;6;7", "confidence": "3;4;5", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "108;65;141", "wc_strengths": "193;54;148", "wc_weaknesses": "508;161;171", "wc_questions": "6;22;107", "wc_limitations": "206;9;8", "wc_review": "1021;311;575", "wc_reply_reviewers": "28;18;31", "wc_reply_authors": "667;32;17", "reply_reviewers": "1;1;1", "reply_authors": "3;2;2", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 104.66666666666667, 31.116269413639905 ], "wc_strengths_avg": [ 131.66666666666666, 57.90989169006928 ], "wc_weaknesses_avg": [ 280.0, 161.27202691932246 ], "wc_questions_avg": [ 45.0, 44.3245605355165 ], "wc_limitations_avg": [ 74.33333333333333, 93.10328792380119 ], "wc_review_avg": [ 635.6666666666666, 293.01346195847196 ], "wc_reply_reviewers_avg": [ 25.666666666666668, 5.557777333511022 ], "wc_reply_authors_avg": [ 238.66666666666666, 302.93930451861513 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10868818044604113299&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "gdut.edu.cn;rit.edu;fb.com;rochester.edu;rochester.edu;vt.edu;rit.edu;stat.ucla.edu", "author_num": 8, "aff_unique_index": "0;1;2;3;3;4;1;5", "aff_unique_norm": "Guangdong University of Technology;Rochester Institute of Technology;Meta;University of Rochester;Virginia Tech;University of California, Los Angeles", "aff_unique_dep": ";;Meta AI;;;", "aff_unique_url": "http://www.gdut.edu.cn;https://www.rit.edu;https://meta.com;https://www.rochester.edu;https://www.vt.edu;https://www.ucla.edu", "aff_unique_abbr": "GDUT;RIT;Meta;U of R;VT;UCLA", "aff_campus_unique_index": "1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;1;1;1;1;1;1;1", "aff_country_unique": "China;United States" }, { "title": "Dual-Personalizing Adapter for Federated Foundation Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93668", "id": "nkwPiBSw1f", "proceeding": "", "pdf": "https://openreview.net/pdf?id=nkwPiBSw1f", "openreview": "https://openreview.net/forum?id=nkwPiBSw1f", "poster": "/media/PosterPDFs/NeurIPS%202024/93668.png?t=1729760385.7382205", "project": "", "author_site": "yiyuan yang, Guodong Long, Tao Shen, Jing Jiang, Michael Blumenstein", "tldr": "", "abstract": "Recently, foundation models, particularly large language models (LLMs), have demonstrated an impressive ability to adapt to various tasks by fine-tuning diverse instruction data. Notably, federated foundation models (FedFM) emerge as a privacy preservation method to fine-tune models collaboratively under federated learning (FL) settings by leveraging many distributed datasets with non-IID data. To alleviate communication and computation overhead, parameter-efficient methods are introduced for efficiency, and some research adapted personalization methods to FedFM for better user preferences alignment. However, a critical gap in existing research is the neglect of test-time distribution shifts in real-world applications, and conventional methods for test-time distribution shifts in personalized FL are less effective for FedFM due to their failure to adapt to complex distribution shift scenarios and the requirement to train all parameters. To bridge this gap, we refine the setting in FedFM, termed test-time personalization, which aims to learn personalized federated foundation models on clients while effectively handling test-time distribution shifts simultaneously. To address challenges in this setting, we explore a simple yet effective solution, a Federated Dual-Personalizing Adapter (FedDPA) architecture. By co-working with a foundation model, a global adapter and a local adapter jointly tackle the test-time distribution shifts and client-specific personalization. Additionally, we introduce an instance-wise dynamic weighting mechanism that dynamically integrates the global and local adapters for each test instance during inference, facilitating effective test-time personalization. The effectiveness of the proposed method has been evaluated on benchmark datasets across different NLP tasks.", "keywords": "Federated Learning;Foundation Models;Personalization;Test-Time Distribution Shifts", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "/attachment/a60742d6c6c8c0198a2d04997a248a0565303210.zip", "author": "yiyuan yang;Guodong Long;Tao Shen;Jing Jiang;Michael Blumenstein", "authorids": "~yiyuan_yang2;~Guodong_Long2;~Tao_Shen1;~Jing_Jiang6;~Michael_Blumenstein2", "gender": "F;M;M;F;", "homepage": ";https://www.uts.edu.au/staff/guodong.long;;https://www.uts.edu.au/staff/jing.jiang;https://profiles.uts.edu.au/Michael.Blumenstein", "dblp": ";34/10089;95/4097-1;68/1974-2;45/1824", "google_scholar": ";https://scholar.google.com.au/citations?user=Pl8m7hMAAAAJ;https://scholar.google.com.au/citations?user=SegyX9AAAAAJ;https://scholar.google.com.au/citations?hl=en;https://scholar.google.com.au/citations?user=4m2G-H8AAAAJ", "orcid": ";0000-0003-3740-9515;;;0000-0002-9908-3744", "linkedin": "yiyuan-yang/;;;;", "or_profile": "~yiyuan_yang2;~Guodong_Long2;~Tao_Shen1;~Jing_Jiang6;~Michael_Blumenstein2", "aff": "University of Technology Sydney;University of Technology Sydney;University of Technology Sydney;University of Technology Sydney;University of Technology Sydney", "aff_domain": "uts.edu.au;uts.edu.au;uts.edu.au;uts.edu.au;uts.edu.au", "position": "PhD student;Associate Professor;Postdoc;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nyang2024dualpersonalizing,\ntitle={Dual-Personalizing Adapter for Federated Foundation Models},\nauthor={yiyuan yang and Guodong Long and Tao Shen and Jing Jiang and Michael Blumenstein},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=nkwPiBSw1f}\n}", "github": "", "reviewers": "94he;mFUh;D5Ab;vXiN", "pdf_size": 2085807, "rating": "3;6;7;8", "confidence": "4;4;5;5", "soundness": "2;2;4;4", "novelty": "2;3;4;4", "presentation": "2;4;4;4", "wc_summary": "32;39;33;58", "wc_strengths": "16;28;73;127", "wc_weaknesses": "163;348;77;108", "wc_questions": "24;2;123;79", "wc_limitations": "1;1;1;1", "wc_review": "236;418;307;373", "wc_reply_reviewers": "77;46;27;0", "wc_reply_authors": "856;27;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "5;2;1;1", "rating_avg": [ 6.0, 1.8708286933869707 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 1.0 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.5, 0.8660254037844386 ], "wc_summary_avg": [ 40.5, 10.452272480183437 ], "wc_strengths_avg": [ 61.0, 43.62911871674696 ], "wc_weaknesses_avg": [ 174.0, 105.07378359990659 ], "wc_questions_avg": [ 57.0, 47.312788968734445 ], "wc_limitations_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 333.5, 68.75499981819505 ], "wc_reply_reviewers_avg": [ 37.5, 28.0579756931964 ], "wc_reply_authors_avg": [ 220.75, 366.92735997742113 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 1.6393596310755 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.8017837257372733, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16756551041837096899&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "uts.edu.au;uts.edu.au;uts.edu.au;uts.edu.au;uts.edu.au", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of Technology Sydney", "aff_unique_dep": "", "aff_unique_url": "https://www.uts.edu.au", "aff_unique_abbr": "UTS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Australia" }, { "title": "Enhancing Motion in Text-to-Video Generation with Decomposed Encoding and Conditioning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93667", "id": "nkzSE5KkCA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=nkzSE5KkCA", "openreview": "https://openreview.net/forum?id=nkzSE5KkCA", "poster": "/media/PosterPDFs/NeurIPS%202024/93667.png?t=1731651689.7704477", "project": "", "author_site": "PENGHUI RUAN, Pichao WANG, Divya Saxena, Jiannong Cao, Yuhui Shi", "tldr": "", "abstract": "Despite advancements in Text-to-Video (T2V) generation, producing videos with realistic motion remains challenging. Current models often yield static or minimally dynamic outputs, failing to capture complex motions described by text. This issue stems from the internal biases in text encoding which overlooks motions, and inadequate conditioning mechanisms in T2V generation models. To address this, we propose a novel framework called DEcomposed MOtion (DEMO), which enhances motion synthesis in T2V generation by decomposing both text encoding and conditioning into content and motion components. Our method includes a content encoder for static elements and a motion encoder for temporal dynamics, alongside separate content and motion conditioning mechanisms. Crucially, we introduce text-motion and video-motion supervision to improve the model's understanding and generation of motion. Evaluations on benchmarks such as MSR-VTT, UCF-101, WebVid-10M, EvalCrafter, and VBench demonstrate DEMO's superior ability to produce videos with enhanced motion dynamics while maintaining high visual quality. Our approach significantly advances T2V generation by integrating comprehensive motion understanding directly from textual descriptions. Project page: https://PR-Ryan.github.io/DEMO-project/", "keywords": "Text-to-Video Generation;Diffusion Models", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/2a3a287f34d2907f429a408dd04b398960286edd.zip", "author": "Penghui Ruan;Pichao WANG;Divya Saxena;Jiannong Cao;Yuhui Shi", "authorids": "~Penghui_Ruan1;~Pichao_WANG3;~Divya_Saxena1;~Jiannong_Cao1;~Yuhui_Shi2", "gender": ";M;F;M;M", "homepage": "https://pr-ryan.github.io/;https://wangpichao.github.io/;https://sites.google.com/site/saxenadivyakul1/;https://www4.comp.polyu.edu.hk/~csjcao/;https://www.sustech.edu.cn/en/faculties/shiyuhui.html", "dblp": "359/3774;;162/2376;c/JiannongCao;66/2014.html", "google_scholar": "tTUajvgAAAAJ;;JfLt2VYAAAAJ;q2jH-3sAAAAJ;xSvAHWgAAAAJ", "orcid": "0000-0003-0238-299X;;0000-0002-6847-585X;0000-0002-2725-2529;0000-0002-8840-723X", "linkedin": ";;divya-saxena-1757b831/;;", "or_profile": "~Penghui_Ruan1;~Pichao_WANG3;~Divya_Saxena1;~Jiannong_Cao1;~Yuhui_Shi2", "aff": "Hong Kong Polytechnic University;Amazon;The Hong Kong Polytechnic University, Hong Kong Polytechnic University;Hong Kong Polytechnic University;Southern University of Science and Technology", "aff_domain": "connect.polyu.hk;amazon.com;comp.polyu.edu.hk;polyu.edu.hk;sustech.edu.cn", "position": "PhD student;Researcher;Research Assistant Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nruan2024enhancing,\ntitle={Enhancing Motion in Text-to-Video Generation with Decomposed Encoding and Conditioning},\nauthor={Penghui Ruan and Pichao WANG and Divya Saxena and Jiannong Cao and Yuhui Shi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=nkzSE5KkCA}\n}", "github": "", "reviewers": "gMRr;uuEr;5zwJ;WCt2", "pdf_size": 48522810, "rating": "4;5;7;7", "confidence": "3;5;3;4", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "2;3;3;3", "wc_summary": "102;65;143;106", "wc_strengths": "81;63;43;46", "wc_weaknesses": "128;116;88;25", "wc_questions": "39;5;102;129", "wc_limitations": "2;7;1;6", "wc_review": "352;256;377;312", "wc_reply_reviewers": "0;16;17;79", "wc_reply_authors": "0;20;20;20", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 104.0, 27.613402542968153 ], "wc_strengths_avg": [ 58.25, 15.188400179084036 ], "wc_weaknesses_avg": [ 89.25, 39.83324616447924 ], "wc_questions_avg": [ 68.75, 49.20556370980827 ], "wc_limitations_avg": [ 4.0, 2.5495097567963922 ], "wc_review_avg": [ 324.25, 45.71856843778029 ], "wc_reply_reviewers_avg": [ 28.0, 30.20761493398643 ], "wc_reply_authors_avg": [ 15.0, 8.660254037844387 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.058025885318565944, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:meS5YhS0fqMJ:scholar.google.com/&scioq=Enhancing+Motion+in+Text-to-Video+Generation+with+Decomposed+Encoding+and+Conditioning&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "connect.polyu.hk;amazon.com;comp.polyu.edu.hk;polyu.edu.hk;sustech.edu.cn", "author_num": 5, "aff_unique_index": "0;1;0;0;2", "aff_unique_norm": "Hong Kong Polytechnic University;Amazon;Southern University of Science and Technology", "aff_unique_dep": ";Amazon.com, Inc.;", "aff_unique_url": "https://www.polyu.edu.hk;https://www.amazon.com;https://www.sustech.edu.cn", "aff_unique_abbr": "PolyU;Amazon;SUSTech", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "China;United States" }, { "title": "The Representation Landscape of Few-Shot Learning and Fine-Tuning in Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93666", "id": "nmUkwoOHFO", "proceeding": "", "pdf": "https://openreview.net/pdf?id=nmUkwoOHFO", "openreview": "https://openreview.net/forum?id=nmUkwoOHFO", "poster": "/media/PosterPDFs/NeurIPS%202024/93666.png?t=1733564510.1068306", "project": "", "author_site": "Diego Doimo, Alessandro Serra, Alessio Ansuini, Alberto Cazzaniga", "tldr": "", "abstract": "In-context learning (ICL) and supervised fine-tuning (SFT) are two common strategies for improving the performance of modern large language models (LLMs) on specific tasks. Despite their different natures, these strategies often lead to comparable performance gains. \nHowever, little is known about whether they induce similar representations inside LLMs. We approach this problem by analyzing the probability landscape of their hidden representations in the two cases. More specifically, we compare how LLMs solve the same question-answering task, finding that ICL and SFT create very different internal structures, in both cases undergoing a sharp transition in the middle of the network. In the first half of the network, ICL shapes interpretable representations hierarchically organized according to their semantic content. In contrast, the probability landscape obtained with SFT is fuzzier and semantically mixed. In the second half of the model, the fine-tuned representations develop probability modes that better encode the identity of answers, while less-defined peaks characterize the landscape of ICL representations. Our approach reveals the diverse computational strategies developed inside LLMs to solve the same task across different conditions, allowing us to make a step towards designing optimal methods to extract information from language models.", "keywords": "LLMs;geometry hidden representations;clustering", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Diego Doimo;Alessandro Pietro Serra;Alessio ansuini;Alberto Cazzaniga", "authorids": "~Diego_Doimo1;~Alessandro_Pietro_Serra2;~Alessio_ansuini1;~Alberto_Cazzaniga1", "gender": ";M;M;M", "homepage": ";https://alexserra98.github.io/;;https://areasciencepark-rit.gitlab.io/lade/alberto.cazzaniga/", "dblp": "270/0353;;232/2196;339/6443", "google_scholar": "yu7h58MAAAAJ;;6lhdu6kAAAAJ;AmafJqIAAAAJ", "orcid": "0000-0002-1553-1504;;0000-0002-3117-3532;0000-0001-6271-3303", "linkedin": "diego-doimo-84575b158;alessandro-pietro-serra-b489531a3;alessioansuini/;alberto-cazzaniga-4155b6164/", "or_profile": "~Diego_Doimo1;~Alessandro_Pietro_Serra2;~Alessio_ansuini1;~Alberto_Cazzaniga1", "aff": "Area Science Park;AREA Area Science Park Ricerca Istituto Innovazione (RIT);AREA Science Park;AREA Science Park", "aff_domain": "areasciencepark.it;areasciencepark.it;areasciencepark.it;areasciencepark.it", "position": "Researcher;Intern;Researcher;Researcher", "bibtex": "@inproceedings{\ndoimo2024the,\ntitle={The Representation Landscape of Few-Shot Learning and Fine-Tuning in Large Language Models},\nauthor={Diego Doimo and Alessandro Pietro Serra and Alessio ansuini and Alberto Cazzaniga},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=nmUkwoOHFO}\n}", "github": "", "reviewers": "UJWa;mxnJ;P3MS", "pdf_size": 2000189, "rating": "5;6;6", "confidence": "3;3;4", "soundness": "2;3;3", "novelty": "2;3;3", "presentation": "2;3;3", "wc_summary": "100;87;120", "wc_strengths": "42;50;153", "wc_weaknesses": "239;2;168", "wc_questions": "1;59;19", "wc_limitations": "1;1;33", "wc_review": "383;199;493", "wc_reply_reviewers": "115;59;60", "wc_reply_authors": "46;314;22", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 102.33333333333333, 13.572848714334887 ], "wc_strengths_avg": [ 81.66666666666667, 50.545908725522864 ], "wc_weaknesses_avg": [ 136.33333333333334, 99.31207826286231 ], "wc_questions_avg": [ 26.333333333333332, 24.239545283597124 ], "wc_limitations_avg": [ 11.666666666666666, 15.084944665313014 ], "wc_review_avg": [ 358.3333333333333, 121.28570493764805 ], "wc_reply_reviewers_avg": [ 78.0, 26.166135875720485 ], "wc_reply_authors_avg": [ 127.33333333333333, 132.3564211597693 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1546516814044260736&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "areasciencepark.it;areasciencepark.it;areasciencepark.it;areasciencepark.it", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Area Science Park", "aff_unique_dep": "", "aff_unique_url": "", "aff_unique_abbr": "", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1;1;1", "aff_country_unique": ";Italy" }, { "title": "Understanding and Minimising Outlier Features in Transformer Training", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93665", "id": "npJQ6qS4bg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=npJQ6qS4bg", "openreview": "https://openreview.net/forum?id=npJQ6qS4bg", "poster": "/media/PosterPDFs/NeurIPS%202024/93665.png?t=1733816721.7828228", "project": "", "author_site": "Bobby He, Lorenzo Noci, Daniele Paliotta, Imanol Schlag, Thomas Hofmann", "tldr": "", "abstract": "Outlier Features (OFs) are neurons whose activation magnitudes significantly exceed the average over a neural network's (NN) width. They are well known to emerge during standard transformer training and have the undesirable effect of hindering quantisation in afflicted models. Despite their practical importance, little is known behind why OFs emerge during training, nor how one can minimise them.\n\nOur work focuses on the above questions, first identifying several quantitative metrics, such as the kurtosis over neuron activation norms, to measure OFs. With these metrics, we study how architectural and optimisation choices influence OFs, and provide practical insights to minimise OFs during training. As highlights, we introduce a novel unnormalised transformer block, the Outlier Protected block, and present a previously unknown benefit of non-diagonal preconditioning optimisers, finding both approaches to significantly reduce OFs and improve quantisation without compromising convergence speed, at scales of up to 7B parameters. Notably, our combination of OP block and non-diagonal preconditioner (SOAP) achieves 14.87 weight-and-activation int8 perplexity (from 14.71 in standard precision), compared to 63.4 int8 perplexity (from 16.00) with a default OF-prone combination of Pre-Norm model and Adam, when quantising OPT-125m models post-training.", "keywords": "Outlier Features;Training Dynamics;Normalisation Layers;Signal Propagation;NN Optimisers;NN Architectures;Transformers;Understanding Deep Learning", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Bobby He;Lorenzo Noci;Daniele Paliotta;Imanol Schlag;Thomas Hofmann", "authorids": "~Bobby_He1;~Lorenzo_Noci1;~Daniele_Paliotta1;~Imanol_Schlag3;~Thomas_Hofmann1", "gender": ";M;M;M;M", "homepage": "http://csml.stats.ox.ac.uk/people/he/;;https://danielepaliotta.com;;http://www.da.inf.ethz.ch/", "dblp": "270/3685;268/6839;314/5880;213/4144;h/ThHofmann", "google_scholar": ";;_xugfIEAAAAJ;https://scholar.google.ch/citations?user=nFQJEskAAAAJ;T3hAyLkAAAAJ", "orcid": ";;;;", "linkedin": ";lorenzo-noci-97aa59130;;;thomas-hofmann-1ab2402/", "or_profile": "~Bobby_He1;~Lorenzo_Noci1;~Daniele_Paliotta1;~Imanol_Schlag3;~Thomas_Hofmann1", "aff": "Department of Computer Science, ETHZ - ETH Zurich;ETHZ - ETH Zurich;University of Geneva;ETHZ - ETH Zurich;Swiss Federal Institute of Technology", "aff_domain": "inf.ethz.ch;ethz.ch;unige.ch;ethz.ch;ethz.ch", "position": "Postdoc;PhD student;PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\nhe2024understanding,\ntitle={Understanding and Minimising Outlier Features in Transformer Training},\nauthor={Bobby He and Lorenzo Noci and Daniele Paliotta and Imanol Schlag and Thomas Hofmann},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=npJQ6qS4bg}\n}", "github": "", "reviewers": "Lb1w;Yyp1;7V85;cUUo", "pdf_size": 6707088, "rating": "3;5;7;10", "confidence": "4;3;3;4", "soundness": "2;3;3;4", "novelty": "1;3;3;4", "presentation": "1;2;4;4", "wc_summary": "98;120;123;87", "wc_strengths": "51;81;136;142", "wc_weaknesses": "311;164;204;46", "wc_questions": "2;2;10;59", "wc_limitations": "4;1;22;5", "wc_review": "466;368;495;339", "wc_reply_reviewers": "565;12;400;238", "wc_reply_authors": "780;0;32;15", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;2;2", "rating_avg": [ 6.25, 2.5860201081971503 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 1.0897247358851685 ], "presentation_avg": [ 2.75, 1.299038105676658 ], "wc_summary_avg": [ 107.0, 15.049916943292411 ], "wc_strengths_avg": [ 102.5, 38.069016273079605 ], "wc_weaknesses_avg": [ 181.25, 94.79286629277543 ], "wc_questions_avg": [ 18.25, 23.75263143316967 ], "wc_limitations_avg": [ 8.0, 8.215838362577491 ], "wc_review_avg": [ 417.0, 65.13447627792827 ], "wc_reply_reviewers_avg": [ 303.75, 204.3017070413265 ], "wc_reply_authors_avg": [ 206.75, 331.1596103089868 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.09667364890456637, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6019027880354913989&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": "inf.ethz.ch;ethz.ch;unige.ch;ethz.ch;ethz.ch", "author_num": 5, "aff_unique_index": "0;0;1;0;2", "aff_unique_norm": "ETH Zurich;University of Geneva;Swiss Federal Institute of Technology", "aff_unique_dep": "Department of Computer Science;;", "aff_unique_url": "https://www.ethz.ch;https://www.unige.ch;https://www.ethz.ch", "aff_unique_abbr": "ETHZ;UNIGE;ETH Zurich", "aff_campus_unique_index": "0", "aff_campus_unique": "Zurich;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Switzerland" }, { "title": "NeuralFuse: Learning to Recover the Accuracy of Access-Limited Neural Network Inference in Low-Voltage Regimes", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93664", "id": "npoHt6WV1F", "proceeding": "", "pdf": "https://openreview.net/pdf?id=npoHt6WV1F", "openreview": "https://openreview.net/forum?id=npoHt6WV1F", "poster": "", "project": "", "author_site": "Hao-Lun Sun, Lei Hsiung, Nandhini Chandramoorthy, Pin-Yu Chen, Tsung-Yi Ho", "tldr": "", "abstract": "Deep neural networks (DNNs) have become ubiquitous in machine learning, but their energy consumption remains problematically high. An effective strategy for reducing such consumption is supply-voltage reduction, but if done too aggressively, it can lead to accuracy degradation. This is due to random bit-flips in static random access memory (SRAM), where model parameters are stored. To address this challenge, we have developed NeuralFuse, a novel add-on module that handles the energy-accuracy tradeoff in low-voltage regimes by learning input transformations and using them to generate error-resistant data representations, thereby protecting DNN accuracy in both nominal and low-voltage scenarios. As well as being easy to implement, NeuralFuse can be readily applied to DNNs with limited access, such cloud-based APIs that are accessed remotely or non-configurable hardware. Our experimental results demonstrate that, at a 1% bit-error rate, NeuralFuse can reduce SRAM access energy by up to 24% while recovering accuracy by up to 57%. To the best of our knowledge, this is the first approach to addressing low-voltage-induced bit errors that requires no model retraining.", "keywords": "machine learning;energy efficient inference;bit error resilience", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Hao-Lun Sun;Lei Hsiung;Nandhini Chandramoorthy;Pin-Yu Chen;Tsung-Yi Ho", "authorids": "~Hao-Lun_Sun1;~Lei_Hsiung1;~Nandhini_Chandramoorthy1;~Pin-Yu_Chen1;~Tsung-Yi_Ho2", "gender": "M;;F;M;M", "homepage": ";https://hsiung.cc/;;http://www.pinyuchen.com;https://www.cse.cuhk.edu.hk/people/faculty/tsung-yi-ho/", "dblp": "283/1534;313/9417;29/11469.html;39/8969;63/4181.html", "google_scholar": "g2MolmMAAAAJ;CJaZ2NcAAAAJ;;jxwlCUUAAAAJ;TRDUYkAAAAAJ", "orcid": ";;;0000-0003-1039-8369;0000-0001-7348-5625", "linkedin": "hao-lun-sun-24b910233/;l-hsiung/;;pin-yu-chen-940062a2;", "or_profile": "~Hao-Lun_Sun1;~Lei_Hsiung1;~Nandhini_Chandramoorthy1;~Pin-Yu_Chen1;~Tsung-Yi_Ho2", "aff": "Mediatek;Dartmouth College;;International Business Machines;Department of Computer Science and Engineering, The Chinese University of Hong Kong", "aff_domain": "mediatek.com;dartmouth.edu;;ibm.com;cse.cuhk.edu.hk", "position": "Engineer;PhD student;;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nsun2024neuralfuse,\ntitle={NeuralFuse: Learning to Recover the Accuracy of Access-Limited Neural Network Inference in Low-Voltage Regimes},\nauthor={Hao-Lun Sun and Lei Hsiung and Nandhini Chandramoorthy and Pin-Yu Chen and Tsung-Yi Ho},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=npoHt6WV1F}\n}", "github": "", "reviewers": "LgKi;e1Hd;nPtF;aRGH", "pdf_size": 5134030, "rating": "4;5;5;7", "confidence": "4;3;4;4", "soundness": "3;2;2;4", "novelty": "2;2;3;3", "presentation": "3;3;2;4", "wc_summary": "50;120;87;67", "wc_strengths": "27;100;115;301", "wc_weaknesses": "189;315;186;274", "wc_questions": "7;5;118;58", "wc_limitations": "1;31;44;5", "wc_review": "274;571;550;705", "wc_reply_reviewers": "0;64;21;229", "wc_reply_authors": "241;223;26;55", "reply_reviewers": "0;1;1;1", "reply_authors": "3;3;2;2", "rating_avg": [ 5.25, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 81.0, 26.04803255526221 ], "wc_strengths_avg": [ 135.75, 101.04794654024396 ], "wc_weaknesses_avg": [ 241.0, 55.43915583772899 ], "wc_questions_avg": [ 47.0, 46.16817085395522 ], "wc_limitations_avg": [ 20.25, 17.907749719046222 ], "wc_review_avg": [ 525.0, 156.63811796622176 ], "wc_reply_reviewers_avg": [ 78.5, 89.90133480655335 ], "wc_reply_authors_avg": [ 136.25, 96.50744789911295 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.13245323570650439, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16994568417590757467&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "mediatek.com;dartmouth.edu;;ibm.com;cse.cuhk.edu.hk", "author_num": 5, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "MediaTek Inc.;Dartmouth College;International Business Machines Corporation;Chinese University of Hong Kong", "aff_unique_dep": ";;;Department of Computer Science and Engineering", "aff_unique_url": "https://www.mediatek.com/;https://www.dartmouth.edu;https://www.ibm.com;https://www.cuhk.edu.hk", "aff_unique_abbr": "MediaTek;Dartmouth;IBM;CUHK", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Taiwan;;Hong Kong SAR", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "China;United States" }, { "title": "Wings: Learning Multimodal LLMs without Text-only Forgetting", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93663", "id": "nqWaya7hiX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=nqWaya7hiX", "openreview": "https://openreview.net/forum?id=nqWaya7hiX", "poster": "", "project": "", "author_site": "Yi-Kai Zhang, Shiyin Lu, Yang Li, YanQing Ma, Qingguo Chen, Zhao Xu, Weihua Luo, Kaifu Zhang, De-Chuan Zhan, Han-Jia Ye", "tldr": "", "abstract": "Multimodal large language models (MLLMs), initiated with a trained LLM, first align images with text and then fine-tune on multimodal mixed inputs. However, during the continued training, the MLLM catastrophically forgets the text-only instructions that the initial LLM masters. In this paper, we present Wings, a novel MLLM that excels in both text-only and multimodal instructions. By examining attention across layers of MLLM, we find that *text-only forgetting* is related to the attention shifts from pre-image to post-image text. From that, we construct an additional Low-Rank Residual Attention (LoRRA) block that acts as the \"modality learner\" to expand the learnable space and compensate for the attention shift. The complementary learners, like \"wings\" on either side, are connected in parallel to each layer's attention block. The LoRRA mirrors the structure of attention but utilizes low-rank connections to ensure efficiency. Initially, image and text inputs are aligned with visual learners operating alongside the main attention, balancing focus on visual elements. Later, textual learners are integrated with token-wise routing, blending the outputs of both modality learners collaboratively. Our experimental results demonstrate that Wings outperforms equally-scaled MLLMs in both text-only and visual question-answering tasks. Wings with *compensation of learners* addresses text-only forgetting during visual modality expansion in general MLLMs.", "keywords": "Visual and Textual Learners;Large Language Models;Multimodal Large Language Models", "primary_area": "generative_models", "supplementary_material": "", "author": "Yi-Kai Zhang;Shiyin Lu;Yang Li;YanQing Ma;Qing-Guo Chen;Zhao Xu;Weihua Luo;Kaifu Zhang;De-Chuan Zhan;Han-Jia Ye", "authorids": "~Yi-Kai_Zhang2;~Shiyin_Lu1;~Yang_Li104;~YanQing_Ma1;~Qing-Guo_Chen1;~Zhao_Xu7;~Weihua_Luo2;~Kaifu_Zhang2;~De-Chuan_Zhan1;~Han-Jia_Ye1", "gender": "M;M;M;;M;;M;M;M;M", "homepage": "http://www.lamda.nju.edu.cn/zhangyk;http://www.lamda.nju.edu.cn/lusy;;https://github.com/Rabbit19731;;;;;http://www.lamda.nju.edu.cn/zhandc/;http://www.lamda.nju.edu.cn/yehj", "dblp": "330/8964;228/8223;;;63/10301;;22/1116.html;;74/498;165/3014", "google_scholar": ";https://scholar.google.com.hk/citations?user=WFMoc84AAAAJ;g7tY0NAAAAAJ;;GlqRHLcAAAAJ;;tsKl9GUAAAAJ;e3gpYTYAAAAJ;mYJf4TcAAAAJ;mgOYhtoAAAAJ", "orcid": ";;;;;;0000-0002-5002-6933;;0000-0002-3533-2078;", "linkedin": ";;;;;;;;;", "or_profile": "~Yi-Kai_Zhang2;~Shiyin_Lu1;~Yang_Li104;~YanQing_Ma1;~Qing-Guo_Chen1;~Zhao_Xu7;~Weihua_Luo2;~Kaifu_Zhang2;~De-Chuan_Zhan1;~Han-Jia_Ye1", "aff": "Nanjing University;Alibaba Group;Alibaba Group;Alibaba Group;Alibaba Group;;Alibaba International Digital Commerce Group;Alibaba Group;Nanjing University;Nanjing University", "aff_domain": "nju.edu.cn;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;;alibaba-inc.com;alibaba-inc.com;nju.edu.cn;nju.edu.cn", "position": "PhD student;Researcher;Researcher;Researcher;Researcher;;Researcher;vice president;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nzhang2024wings,\ntitle={Wings: Learning Multimodal {LLM}s without Text-only Forgetting},\nauthor={Yi-Kai Zhang and Shiyin Lu and Yang Li and YanQing Ma and Qing-Guo Chen and Zhao Xu and Weihua Luo and Kaifu Zhang and De-Chuan Zhan and Han-Jia Ye},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=nqWaya7hiX}\n}", "github": "", "reviewers": "TqCY;LUvX;3jFi;GnKS", "pdf_size": 5374739, "rating": "5;6;6;6", "confidence": "5;4;4;4", "soundness": "2;3;3;3", "novelty": "2;3;2;3", "presentation": "2;3;3;2", "wc_summary": "78;76;101;128", "wc_strengths": "27;33;53;61", "wc_weaknesses": "110;132;206;87", "wc_questions": "83;48;399;137", "wc_limitations": "7;41;53;8", "wc_review": "305;330;812;421", "wc_reply_reviewers": "734;24;258;26", "wc_reply_authors": "1949;20;828;4", "reply_reviewers": "3;1;3;1", "reply_authors": "6;2;6;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 95.75, 21.05201890555868 ], "wc_strengths_avg": [ 43.5, 13.955285736952863 ], "wc_weaknesses_avg": [ 133.75, 44.64512851364637 ], "wc_questions_avg": [ 166.75, 137.78674645988272 ], "wc_limitations_avg": [ 27.25, 20.20365066021485 ], "wc_review_avg": [ 467.0, 203.8099605024249 ], "wc_reply_reviewers_avg": [ 260.5, 289.4525004210535 ], "wc_reply_authors_avg": [ 700.25, 794.2293041055586 ], "reply_reviewers_avg": [ 2.0, 1.0 ], "reply_authors_avg": [ 4.0, 2.0 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7104943861119524461&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "nju.edu.cn;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;;alibaba-inc.com;alibaba-inc.com;nju.edu.cn;nju.edu.cn", "author_num": 10, "aff_unique_index": "0;1;1;1;1;1;1;0;0", "aff_unique_norm": "Nanjing University;Alibaba Group", "aff_unique_dep": ";", "aff_unique_url": "https://www.nju.edu.cn;https://www.alibaba.com", "aff_unique_abbr": "Nanjing U;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "BLEnD: A Benchmark for LLMs on Everyday Knowledge in Diverse Cultures and Languages", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97510", "id": "nrEqH502eC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=nrEqH502eC", "openreview": "https://openreview.net/forum?id=nrEqH502eC", "poster": "/media/PosterPDFs/NeurIPS%202024/97510.png?t=1733807388.8252976", "project": "", "author_site": "Junho Myung, Nayeon Lee, Yi Zhou, Jiho Jin, Rifki Putri, Dimosthenis Antypas, Hsuvas Borkakoty, Eunsu Kim, Carla Perez-Almendros, Abinew Ali Ayele, Victor Gutierrez Basulto, Yazmin Ibanez-Garcia, Hwaran Lee, Shamsuddeen H Muhammad, Kiwoong Park, Anar Rzayev, Nina White, Seid Muhie Yimam, Mohammad Taher Pilehvar, Nedjma Ousidhoum, Jose Camacho-Collados, Alice Oh", "tldr": "", "abstract": "Large language models (LLMs) often lack culture-specific everyday knowledge, especially across diverse regions and non-English languages. Existing benchmarks for evaluating LLMs' cultural sensitivities are usually limited to a single language or online sources like Wikipedia, which may not reflect the daily habits, customs, and lifestyles of different regions. That is, information about the food people eat for their birthday celebrations, spices they typically use, musical instruments youngsters play or the sports they practice in school is not always explicitly written online. To address this issue, we introduce BLEnD, a hand-crafted benchmark designed to evaluate LLMs' everyday knowledge across diverse cultures and languages. The benchmark comprises 52.6k question-answer pairs from 16 countries/regions, in 13 different languages, including low-resource ones such as Amharic, Assamese, Azerbaijani, Hausa, and Sundanese. We evaluate LLMs in two formats: short-answer questions, and multiple-choice questions. We show that LLMs perform better in cultures that are more present online, with a maximum 57.34% difference in GPT-4, the best-performing model, in the short-answer format.\nFurthermore, we find that LLMs perform better in their local languages for mid-to-high-resource languages. Interestingly, for languages deemed to be low-resource, LLMs provide better answers in English. We make our dataset publicly available at: https://github.com/nlee0212/BLEnD.", "keywords": "cross-culture;multilingual;benchmark;cultural nlp", "primary_area": "", "supplementary_material": "/attachment/300a44a99b6c824d109b57b46800dcd6dd618c70.pdf", "author": "Junho Myung;Nayeon Lee;Yi Zhou;Jiho Jin;Rifki Afina Putri;Dimosthenis Antypas;Hsuvas Borkakoty;Eunsu Kim;Carla Perez-Almendros;Abinew Ali Ayele;Victor Gutierrez Basulto;Yazmin Ibanez-Garcia;Hwaran Lee;Shamsuddeen Hassan Muhammad;Kiwoong Park;Anar Sabuhi Rzayev;Nina White;Seid Muhie Yimam;Mohammad Taher Pilehvar;Nedjma Ousidhoum;Jose Camacho-Collados;Alice Oh", "authorids": "~Junho_Myung1;~Nayeon_Lee2;~Yi_Zhou14;~Jiho_Jin1;~Rifki_Afina_Putri1;~Dimosthenis_Antypas1;~Hsuvas_Borkakoty1;~Eunsu_Kim1;~Carla_Perez-Almendros1;~Abinew_Ali_Ayele1;~Victor_Gutierrez_Basulto1;~Yazmin_Ibanez-Garcia1;~Hwaran_Lee1;~Shamsuddeen_Hassan_Muhammad1;~Kiwoong_Park2;~Anar_Sabuhi_Rzayev1;~Nina_White1;~Seid_Muhie_Yimam1;~Mohammad_Taher_Pilehvar2;~Nedjma_Ousidhoum2;~Jose_Camacho-Collados1;~Alice_Oh1", "gender": ";F;F;;;M;;F;F;M;;F;F;;M;M;;M;M;;M;F", "homepage": "https://junhomyung.github.io/;;https://aclanthology.org/people/y/yi-zhou/;https://jinjh0123.github.io/;https://rifkiaputri.github.io/;;;https://www.linkedin.com/in/%EC%9D%80%EC%88%98-%EA%B9%80-93131023a/;;https://www.inf.uni-hamburg.de/en/inst/ab/lt/people/abinew-ali.html;;;https://hwaranlee.github.io;;;;;https://seyyaw.github.io/;http://pilehvar.github.io;;http://www.josecamachocollados.com;http://uilab.kr", "dblp": ";212/6295;01/1901-19;320/5607;243/1733;;;;242/4717.html;;;57/8677;127/9475;;;;;136/8659;;;165/0790;50/7562", "google_scholar": ";C8MUVcEAAAAJ;3BdddIMAAAAJ;-I0ahKwAAAAJ;https://scholar.google.com/citations?hl=en;;;;https://scholar.google.es/citations?user=mFrqGw4AAAAJ;g2m1wH4AAAAJ;;https://scholar.google.co.uk/citations?user=7ag8h6sAAAAJ;https://scholar.google.co.kr/citations?user=Jf6padoAAAAJ;;wdZSCcQAAAAJ;;;https://scholar.google.de/citations?user=rDKEGNgAAAAJ;P-c9CsIAAAAJ;;NP4KdQQAAAAJ;https://scholar.google.co.kr/citations?user=B88-xMEAAAAJ", "orcid": ";;0000-0001-7009-8515;0000-0002-1767-3733;0000-0002-6118-4566;;;;0000-0001-9360-4011;0000-0003-4686-5053;;;0000-0002-3773-4871;;;;;0000-0002-8289-388X;;;;", "linkedin": ";nayeon-lee-1081401bb;yi-zhou-867578210/;jiho-jin;rifki-afina-putri-73692a6b;;;;;;;;hwaranlee/;;kiwoong-park-755816193/;anar-rzayev-2118a0168/;nina-white99;seyaw;;;;alice-oh-4677544/", "or_profile": "~Junho_Myung1;~Nayeon_Lee2;~Yi_Zhou14;~Jiho_Jin1;~Rifki_Afina_Putri1;~Dimosthenis_Antypas1;~Hsuvas_Borkakoty1;~Eunsu_Kim1;~Carla_Perez-Almendros1;~Abinew_Ali_Ayele1;~Victor_Gutierrez_Basulto1;~Yazmin_Ibanez-Garcia1;~Hwaran_Lee1;~Shamsuddeen_Hassan_Muhammad1;~Kiwoong_Park2;~Anar_Sabuhi_Rzayev1;~Nina_White1;~Seid_Muhie_Yimam1;~Mohammad_Taher_Pilehvar2;~Nedjma_Ousidhoum2;~Jose_Camacho-Collados1;~Alice_Oh1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Cardiff University;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Cardiff University;;KAIST;Cardiff University;Universit\u00e4t Hamburg;;Cardiff University;NAVER AI Lab;;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Cardiff University;Universit\u00e4t Hamburg;TeIAS;;Cardiff University;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr;cardiff.ac.uk;kaist.ac.kr;kaist.ac.kr;cardiff.ac.uk;;kaist.ac.kr;cardiff.ac.uk;uni-hamburg.de;;cardiff.ac.uk;navercorp.com;;kaist.ac.kr;kaist.edu;cardiff.ac;uni-hamburg.de;teias.institute;;cardiff.ac.uk;kaist.ac.kr", "position": "MS student;MS student;Postdoc;PhD student;PhD student;PhD student;;MS student;Reserach Associate;PhD student;;Lecturer;Lead;;PhD student;Undergrad student;MS student;Researcher;Assistant Professor;;Full Professor;Full Professor", "bibtex": "@inproceedings{\nmyung2024blend,\ntitle={{BLE}nD: A Benchmark for {LLM}s on Everyday Knowledge in Diverse Cultures and Languages},\nauthor={Junho Myung and Nayeon Lee and Yi Zhou and Jiho Jin and Rifki Afina Putri and Dimosthenis Antypas and Hsuvas Borkakoty and Eunsu Kim and Carla Perez-Almendros and Abinew Ali Ayele and Victor Gutierrez Basulto and Yazmin Ibanez-Garcia and Hwaran Lee and Shamsuddeen Hassan Muhammad and Kiwoong Park and Anar Sabuhi Rzayev and Nina White and Seid Muhie Yimam and Mohammad Taher Pilehvar and Nedjma Ousidhoum and Jose Camacho-Collados and Alice Oh},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=nrEqH502eC}\n}", "github": "", "reviewers": "CEVb;17UX;QRj4;a2x3;bPDk", "pdf_size": 2535825, "rating": "4;7;7;7;8", "confidence": "4;4;3;4;3", "wc_summary_and_contributions": "42;59;131;67;184", "wc_strengths": "50;35;131;57;300", "wc_improvement": "139;80;2;114;27", "wc_limitations": "62;9;121;25;20", "wc_correctness": "1;3;1;9;4", "wc_clarity": "1;1;1;7;6", "wc_relation_to_prior_work": "1;15;1;10;5", "wc_documentation": "5;1;1;2;10", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "302;204;390;292;557", "wc_reply_reviewers": "30;4;0;0;0", "wc_reply_authors": "30;0;0;0;0", "reply_reviewers": "1;1;0;0;0", "reply_authors": "2;1;1;1;1", "rating_avg": [ 6.6, 1.3564659966250536 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "wc_summary_and_contributions_avg": [ 96.6, 53.09086550434077 ], "wc_strengths_avg": [ 114.6, 98.45730038955973 ], "wc_improvement_avg": [ 72.4, 51.461053234460714 ], "wc_limitations_avg": [ 47.4, 40.88324840322745 ], "wc_correctness_avg": [ 3.6, 2.939387691339814 ], "wc_clarity_avg": [ 3.2, 2.7129319932501073 ], "wc_relation_to_prior_work_avg": [ 6.4, 5.425863986500215 ], "wc_documentation_avg": [ 3.8, 3.4292856398964493 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 349.0, 119.52238284103944 ], "wc_reply_reviewers_avg": [ 6.8, 11.702991070662236 ], "wc_reply_authors_avg": [ 6.0, 12.0 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 22, 0 ], "corr_rating_confidence": -0.5417363388859615, "gs_citation": 48, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14995773709068085580&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "kaist.ac.kr;kaist.ac.kr;cardiff.ac.uk;kaist.ac.kr;kaist.ac.kr;cardiff.ac.uk;;kaist.ac.kr;cardiff.ac.uk;uni-hamburg.de;;cardiff.ac.uk;navercorp.com;;kaist.ac.kr;kaist.edu;cardiff.ac;uni-hamburg.de;teias.institute;;cardiff.ac.uk;kaist.ac.kr", "author_num": 22, "aff_unique_index": "0;0;1;0;0;1;0;1;2;1;3;0;0;1;2;4;1;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Cardiff University;University of Hamburg;NAVER Corporation;TeIAS", "aff_unique_dep": ";;;NAVER AI Lab;", "aff_unique_url": "https://www.kaist.ac.kr;https://www.cardiff.ac.uk;https://www.uni-hamburg.de;https://www.naver.com;", "aff_unique_abbr": "KAIST;Cardiff;UHH;NAVER;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;1;0;1;2;1;0;0;0;1;2;1;0", "aff_country_unique": "South Korea;United Kingdom;Germany;" }, { "id": "nrcFNxF57E", "title": "Partial Gromov Wasserstein Metric", "track": "main", "status": "Reject", "tldr": "", "abstract": "The Gromov-Wasserstein (GW) distance has gained increasing interest in the machine learning community in recent years, as it allows for the comparison of measures in different metric spaces. To overcome the limitations imposed by the equal mass requirements of the classical GW problem, researchers have begun exploring its application in unbalanced settings. However, Unbalanced GW (UGW) can only be regarded as a discrepancy rather than a rigorous metric/distance between two metric measure spaces (mm-spaces). In this paper, we propose a particular case of the UGW problem, termed Partial Gromov-Wasserstein (PGW). We establish that PGW is a well-defined metric between mm-spaces and discuss its theoretical properties, including the existence of a minimizer for the PGW problem and the relationship between PGW and GW, among others. We then propose two variants of the Frank-Wolfe algorithm for solving the PGW problem and show that they are mathematically and computationally equivalent. Moreover, based on our PGW metric, we introduce the analogous concept of barycenters for mm-spaces. Finally, we validate the effectiveness of our PGW metric and related solvers in applications such as shape matching, shape retrieval, and shape interpolation, comparing them against existing baselines.", "keywords": "Optimal transport;machine learning", "primary_area": "optimization", "supplementary_material": "/attachment/ec0257ee4d485e2e8f6f2ba8aefafeb97c145a67.zip", "author": "Yikun Bai;Rocio Diaz Martin;Abihith Kothapalli;Hengrong Du;Xinran Liu;Soheil Kolouri", "authorids": "~Yikun_Bai2;~Rocio_Diaz_Martin2;~Abihith_Kothapalli1;~Hengrong_Du1;~Xinran_Liu2;~Soheil_Kolouri1", "gender": "M;M;M;;M;F", "homepage": ";https://abi-kothapalli.github.io/;https://hengrongdu.netlify.app/;;https://skolouri.github.io/;", "dblp": "273/3993.html;;366/8373;;143/9637;", "google_scholar": "zLm6JOAAAAAJ;;OtzsCPcAAAAJ;ZHz5VScAAAAJ;yREBSy0AAAAJ;7RHakmMAAAAJ", "orcid": ";;0000-0003-2392-8963;;0000-0001-8495-5362;0000-0002-3732-6296", "linkedin": "yikun-bai-b70050138/?trk=public_profile_browsemap;abikothapalli/;;xinran-l-5777a0205/;skolouri/;", "or_profile": "~Yikun_Bai2;~Abihith_Kothapalli1;~Hengrong_Du1;~Xinran_Liu2;~Soheil_Kolouri1;~ROCIO_DIAZ_MARTIN1", "aff": "Vanderbilt University;Vanderbilt University;Vanderbilt University;Vanderbilt University;Vanderbilt University;Tufts University", "aff_domain": "vanderbilt.edu;vanderbilt.edu;vanderbilt.edu;vanderbilt.edu;vanderbilt.edu;tufts.edu", "position": "Postdoc;Undergrad student;Postdoc;PhD student;Assistant Professor;Postdoc", "bibtex": "@misc{\nanonymous2024partial,\ntitle={Partial Gromov Wasserstein Metric},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=nrcFNxF57E}\n}", "github": "", "project": "", "reviewers": "AmvP;Xvbd;6xzd;qHto", "site": "https://openreview.net/forum?id=nrcFNxF57E", "pdf_size": 15257362, "rating": "4;7;7;7", "confidence": "5;4;3;3", "soundness": "2;4;4;4", "novelty": "2;3;4;3", "presentation": "1;3;4;3", "wc_summary": "85;89;102;80", "wc_strengths": "172;89;45;84", "wc_weaknesses": "208;140;62;99", "wc_questions": "67;48;125;123", "wc_limitations": "1;1;1;24", "wc_review": "533;367;335;410", "wc_reply_reviewers": "1100;23;66;20", "wc_reply_authors": "1722;28;25;19", "reply_reviewers": "6;1;1;1", "reply_authors": "7;2;2;2", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.5, 0.8660254037844386 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 89.0, 8.154753215150045 ], "wc_strengths_avg": [ 97.5, 46.26283605660163 ], "wc_weaknesses_avg": [ 127.25, 54.172756067972024 ], "wc_questions_avg": [ 90.75, 33.92915413033458 ], "wc_limitations_avg": [ 6.75, 9.959292143521045 ], "wc_review_avg": [ 411.25, 75.16107702794046 ], "wc_reply_reviewers_avg": [ 302.25, 460.94054660010113 ], "wc_reply_authors_avg": [ 448.5, 735.262708152671 ], "reply_reviewers_avg": [ 2.25, 2.165063509461097 ], "reply_authors_avg": [ 3.25, 2.165063509461097 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.8703882797784892, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ryu4XDR0mIsJ:scholar.google.com/&scioq=Partial+Gromov+Wasserstein+Metric&hl=en&as_sdt=0,22", "gs_version_total": 2, "aff_unique_index": "0;0;0;0;0;1", "aff_unique_norm": "Vanderbilt University;Tufts University", "aff_unique_dep": ";", "aff_unique_url": "https://www.vanderbilt.edu;https://www.tufts.edu", "aff_unique_abbr": "Vanderbilt;Tufts", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "SS1: Accelerating Inference with Fast and Expressive Sketch Structured Transform", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93662", "id": "nrgyOGU7ZP", "proceeding": "", "pdf": "https://openreview.net/pdf?id=nrgyOGU7ZP", "openreview": "https://openreview.net/forum?id=nrgyOGU7ZP", "poster": "", "project": "", "author_site": "Aditya Desai, Kimia Saedi, Apoorv Walia, Jihyeong Lee, Keren Zhou, Anshumali Shrivastava", "tldr": "", "abstract": "Tensor multiplication with learned weight matrices is the fundamental building block in deep learning models. These matrices can often be sparsified, decomposed, quantized, or subjected to random parameter sharing without losing accuracy, suggesting the possibility of more efficient transforms. Although many variants of weight matrices exist, unstructured ones are incompatible with modern hardware, slowing inference and training. On the other hand, structured variants often limit expressivity or fail to deliver the promised latency benefits. We present Sketch Structured Transform (SS1), an expressive and GPU-friendly operator that accelerates inference. SS1 leverages parameter sharing in a random yet structured manner to reduce computation while retraining the rich expressive nature of parameter sharing. We confirm empirically that SS1 offers better quality-efficiency tradeoffs than competing variants. Interestingly SS1 can be combined with Quantization to achieve gains unattainable by either method alone, a finding we justify via theoretical analysis. The analysis may be of independent interest.\nMoreover, existing pre-trained models can be projected onto SS1 and finetuned for efficient deployment. Surprisingly, these projected models can perform reasonably well even without finetuning. Our experiments highlight various applications of the SS1:\n(a) Training GPT2 and DLRM models from scratch for faster inference. (b) Finetuning projected BERT models for 1.31\u00d7 faster inference while maintaining GLUE scores. (c) Proof of concept with Llama-3-8b, showing 1.11\u00d7 faster wall clock inference using projected SS1 layers without finetuning. We open source our code :https://github.com/apd10/Sketch-Structured-Linear/", "keywords": "fast linear transform;parameter sharing;latency improvement;deployment", "primary_area": "other", "supplementary_material": "", "author": "Aditya Desai;Kimia Saedi;Apoorv Walia;Jihyeong Lee;Keren Zhou;Anshumali Shrivastava", "authorids": "~Aditya_Desai1;~Kimia_Saedi1;~Apoorv_Walia1;~Jihyeong_Lee1;~Keren_Zhou2;~Anshumali_Shrivastava1", "gender": "F;M;M;M;M;M", "homepage": ";;;https://www.jokeren.tech/;https://www.cs.rice.edu/~as143/;https://apd10.github.io/", "dblp": ";;;;63/9828;18/8339", "google_scholar": ";;;;https://scholar.google.com.tw/citations?user=SGT23RAAAAAJ;ymdbDZwAAAAJ", "orcid": ";;;;;0009-0002-9111-9391", "linkedin": "kimia-saedi/;apoorv-walia-788b66120/;jihyeonglee98/;;;aditya-desai-ai/", "or_profile": "~Kimia_Saedi1;~Apoorv_Walia1;~Jihyeong_Lee1;~Keren_Zhou2;~Anshumali_Shrivastava1;~Adity_Desai1", "aff": ", Rice University;Rice University;George Mason University;George Mason University;ThirdAI Corp.;Rice University", "aff_domain": "cs.rice.edu;rice.edu;gmu.edu;gmu.edu;thirdai.com;rice.edu", "position": "PhD student;MS student;PhD student;Assistant Professor;CEO;PhD student", "bibtex": "@inproceedings{\ndesai2024ss,\ntitle={{SS}1: Accelerating Inference with Fast and Expressive Sketch Structured Transform},\nauthor={Aditya Desai and Kimia Saedi and Apoorv Walia and Jihyeong Lee and Keren Zhou and Anshumali Shrivastava},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=nrgyOGU7ZP}\n}", "github": "", "reviewers": "ZoDN;983k;xFDi;KgD7;AurA", "pdf_size": 613320, "rating": "5;5;6;6;6", "confidence": "5;3;3;4;3", "soundness": "2;3;3;3;3", "novelty": "2;3;3;3;3", "presentation": "1;2;3;1;3", "wc_summary": "56;55;106;85;98", "wc_strengths": "62;73;73;39;143", "wc_weaknesses": "206;296;21;139;36", "wc_questions": "138;87;1;104;127", "wc_limitations": "23;8;1;41;5", "wc_review": "485;519;202;408;409", "wc_reply_reviewers": "52;98;0;49;168", "wc_reply_authors": "56;82;0;0;195", "reply_reviewers": "1;1;0;1;2", "reply_authors": "2;2;1;1;4", "rating_avg": [ 5.6, 0.48989794855663565 ], "confidence_avg": [ 3.6, 0.8 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.0, 0.8944271909999159 ], "wc_summary_avg": [ 80.0, 21.099763031844695 ], "wc_strengths_avg": [ 78.0, 34.790803382503256 ], "wc_weaknesses_avg": [ 139.6, 103.60424701719519 ], "wc_questions_avg": [ 91.4, 48.55759466859948 ], "wc_limitations_avg": [ 15.6, 14.718695594379279 ], "wc_review_avg": [ 404.6, 110.11739190518453 ], "wc_reply_reviewers_avg": [ 73.4, 56.560056577057985 ], "wc_reply_authors_avg": [ 66.6, 71.70383532280543 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.0, 1.0954451150103321 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.40824829046386296, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:GUa-oWlIQIwJ:scholar.google.com/&scioq=SS1:+Accelerating+Inference+with+Fast+and+Expressive+Sketch+Structured+Transform&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "cs.rice.edu;rice.edu;gmu.edu;gmu.edu;thirdai.com;rice.edu", "author_num": 6, "aff_unique_index": "0;0;1;1;2;0", "aff_unique_norm": "Rice University;George Mason University;ThirdAI Corp.", "aff_unique_dep": ";;", "aff_unique_url": "https://www.rice.edu;https://www.gmu.edu;", "aff_unique_abbr": "Rice;GMU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Estimating Generalization Performance Along the Trajectory of Proximal SGD in Robust Regression", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93661", "id": "ntF7D8tAlQ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ntF7D8tAlQ", "openreview": "https://openreview.net/forum?id=ntF7D8tAlQ", "poster": "", "project": "", "author_site": "Kai Tan, Pierre C Bellec", "tldr": "", "abstract": "This paper studies the generalization performance of iterates obtained by Gradient Descent (GD), Stochastic Gradient Descent (SGD) and their proximal variants in high-dimensional robust regression problems. The number of features is comparable to the sample size and errors may be heavy-tailed. We introduce estimators that precisely track the generalization error of the iterates along the trajectory of the iterative algorithm. These estimators are provably consistent under suitable conditions. The results are illustrated through several examples, including Huber regression, pseudo-Huber regression, and their penalized variants with non-smooth regularizer. We provide explicit generalization error estimates for iterates generated from GD and SGD, or from proximal SGD in the presence of a non-smooth regularizer. The proposed risk estimates serve as effective proxies for the actual generalization error, allowing us to determine the optimal stopping iteration that minimizes the generalization error. Extensive simulations confirm the effectiveness of the proposed generalization error estimates.", "keywords": "Robust regression;generalization error;stochastic gradient descent;early stopping;Stein's formula", "primary_area": "learning_theory", "supplementary_material": "/attachment/b58033d62b582794f61261ec29940f7048af8a57.zip", "author": "Kai Tan;Pierre C Bellec", "authorids": "~Kai_Tan1;~Pierre_C_Bellec1", "gender": "M;", "homepage": "https://kaitan365.github.io/;", "dblp": ";", "google_scholar": "Qqqeap8AAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Kai_Tan1;~Pierre_C_Bellec1", "aff": "Rutgers University, New Brunswick;", "aff_domain": "rutgers.edu;", "position": "PhD student;", "bibtex": "@inproceedings{\ntan2024estimating,\ntitle={Estimating Generalization Performance Along the Trajectory of Proximal {SGD} in Robust Regression},\nauthor={Kai Tan and Pierre C Bellec},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ntF7D8tAlQ}\n}", "github": "", "reviewers": "1ZGt;yB9W;boVy;3EXM", "pdf_size": 880552, "rating": "4;6;7;7", "confidence": "4;4;4;3", "soundness": "4;3;3;3", "novelty": "2;3;3;3", "presentation": "4;3;3;3", "wc_summary": "110;83;169;76", "wc_strengths": "26;23;65;95", "wc_weaknesses": "125;186;126;107", "wc_questions": "5;49;265;125", "wc_limitations": "10;10;19;1", "wc_review": "276;351;644;404", "wc_reply_reviewers": "113;89;27;0", "wc_reply_authors": "195;53;11;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 109.5, 36.62308015445998 ], "wc_strengths_avg": [ 52.25, 29.72688177390962 ], "wc_weaknesses_avg": [ 136.0, 29.841246622753548 ], "wc_questions_avg": [ 111.0, 98.73196037757987 ], "wc_limitations_avg": [ 10.0, 6.363961030678928 ], "wc_review_avg": [ 418.75, 137.77041590994781 ], "wc_reply_reviewers_avg": [ 57.25, 45.57617250274534 ], "wc_reply_authors_avg": [ 64.75, 77.7572343901196 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.4714045207910316, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13664771317771859719&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 3, "email": "rutgers.edu;", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Rutgers University", "aff_unique_dep": "", "aff_unique_url": "https://www.rutgers.edu", "aff_unique_abbr": "Rutgers", "aff_campus_unique_index": "0", "aff_campus_unique": "New Brunswick", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Constrained Binary Decision Making", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93660", "id": "ntV5xZfzEk", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ntV5xZfzEk", "openreview": "https://openreview.net/forum?id=ntV5xZfzEk", "poster": "/media/PosterPDFs/NeurIPS%202024/93660.png?t=1732802702.6962354", "project": "", "author_site": "Daniel Pr\u016f\u0161a, Vojtech Franc", "tldr": "", "abstract": "Binary statistical decision making involves choosing between two states based on statistical evidence. The optimal decision strategy is typically formulated through a constrained optimization problem, where both the objective and constraints are expressed as integrals involving two Lebesgue measurable functions, one of which represents the strategy being optimized. In this work, we present a comprehensive formulation of the binary decision making problem and provide a detailed characterization of the optimal solution. Our framework encompasses a wide range of well-known and recently proposed decision making problems as specific cases. We demonstrate how our generic approach can be used to derive the optimal decision strategies for these diverse instances. Our results offer a robust mathematical tool that simplifies the process of solving both existing and novel formulations of binary decision making problems which are in the core of many Machine Learning algorithms.", "keywords": "binary statistical decision making;constrained optimization;Neyman-Pearson problem;selective classification", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Daniel Pr\u016f\u0161a;Vojtech Franc", "authorids": "~Daniel_Pr\u016f\u0161a1;~Vojtech_Franc1", "gender": "M;M", "homepage": "https://cmp.felk.cvut.cz/~prusapa1/;http://cmp.felk.cvut.cz/~xfrancv/", "dblp": "https://dblp.uni-trier.de/pid/50/6664;60/1691", "google_scholar": "https://scholar.google.cz/citations?user=-ddEK3QAAAAJ;https://scholar.google.cz/citations?user=9a5P_D8AAAAJ", "orcid": "0000-0003-4866-5709;0000-0001-7189-1224", "linkedin": ";vojtech-franc-48139b42/", "or_profile": "~Daniel_Pr\u016f\u0161a1;~Vojtech_Franc1", "aff": "Czech Technical University;Czech Technical University in Prague, Faculty of Electrical Engineering", "aff_domain": "cvut.cz;cvut.cz", "position": "Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\npr{\\r{u}}{\\v{s}}a2024constrained,\ntitle={Constrained Binary Decision Making},\nauthor={Daniel Pr{\\r{u}}{\\v{s}}a and Vojtech Franc},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ntV5xZfzEk}\n}", "github": "", "reviewers": "CrjX;bcuc;aUyF;ALNx;oCzV", "pdf_size": 337747, "rating": "6;6;6;6;8", "confidence": "2;4;3;3;3", "soundness": "3;3;3;3;3", "novelty": "3;3;3;3;4", "presentation": "3;3;3;3;4", "wc_summary": "26;46;63;99;66", "wc_strengths": "18;63;95;235;28", "wc_weaknesses": "42;889;546;93;52", "wc_questions": "2;34;226;68;21", "wc_limitations": "2;7;26;1;58", "wc_review": "90;1039;956;496;225", "wc_reply_reviewers": "25;49;213;0;11", "wc_reply_authors": "0;0;433;267;0", "reply_reviewers": "1;1;1;0;1", "reply_authors": "1;1;2;1;1", "rating_avg": [ 6.4, 0.7999999999999999 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 60.0, 24.157814470684222 ], "wc_strengths_avg": [ 87.8, 78.46374959177008 ], "wc_weaknesses_avg": [ 324.4, 339.2277111322128 ], "wc_questions_avg": [ 70.2, 80.82177924297386 ], "wc_limitations_avg": [ 18.8, 21.572204337990126 ], "wc_review_avg": [ 561.2, 380.38685571402175 ], "wc_reply_reviewers_avg": [ 59.6, 78.42856622430375 ], "wc_reply_authors_avg": [ 140.0, 179.3198260092843 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:jApgeErPvqAJ:scholar.google.com/&scioq=Constrained+Binary+Decision+Making&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "cvut.cz;cvut.cz", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Czech Technical University;Czech Technical University in Prague", "aff_unique_dep": ";Faculty of Electrical Engineering", "aff_unique_url": "https://www.cvut.cz;https://www.fel.cvut.cz", "aff_unique_abbr": "CTU;CTU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Prague", "aff_country_unique_index": "0;0", "aff_country_unique": "Czech Republic" }, { "title": "ActAnywhere: Subject-Aware Video Background Generation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93659", "id": "ntlFREw59A", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ntlFREw59A", "openreview": "https://openreview.net/forum?id=ntlFREw59A", "poster": "", "project": "", "author_site": "Boxiao Pan, Zhan Xu, Chun-Hao Huang, Krishna Kumar Singh, Yang Zhou, Leonidas Guibas, Jimei Yang", "tldr": "", "abstract": "We study a novel problem to automatically generate video background that tailors to foreground subject motion. It is an important problem for the movie industry and visual effects community, which traditionally requires tedious manual efforts to solve. To this end, we propose ActAnywhere, a video diffusion model that takes as input a sequence of foreground subject segmentation and an image of a novel background and generates a video of the subject interacting in this background. We train our model on a large-scale dataset of 2.4M videos of human-scene interactions. Through extensive evaluation, we show that our model produces videos with realistic foreground-background interaction while strictly following the guidance of the condition image. Our model generalizes to diverse scenarios including non-human subjects, gaming and animation clips, as well as videos with multiple moving subjects. Both quantitative and qualitative comparisons demonstrate that our model significantly outperforms existing methods, which fail to accomplish the studied task. Please visit our project webpage at https://actanywhere.github.io.", "keywords": "Video Background Generation;Video Generation;Video Synthesis;Video Editing", "primary_area": "machine_vision", "supplementary_material": "/attachment/25ef7faf3ede09b16187f03b68bf377e404cb2fd.zip", "author": "Boxiao Pan;Zhan Xu;Chun-Hao Paul Huang;Krishna Kumar Singh;Yang Zhou;Leonidas Guibas;Jimei Yang", "authorids": "~Boxiao_Pan1;~Zhan_Xu1;~Chun-Hao_Paul_Huang2;~Krishna_Kumar_Singh4;~Yang_Zhou10;~Leonidas_Guibas1;~Jimei_Yang1", "gender": "M;;Not Specified;;M;M;", "homepage": "https://cs.stanford.edu/~bxpan/;;https://research.adobe.com/person/paulchhuang;;https://yzhou359.github.io/;http://geometry.stanford.edu/;https://jimeiyang.github.io/", "dblp": "231/8774;;289/7223;;07/4580-9;g/LeonidasJGuibas;73/3039", "google_scholar": "eW8dJ4EAAAAJ;;https://scholar.google.com.tw/citations?user=LphRgywAAAAJ;;UuwugFEAAAAJ;https://scholar.google.com.tw/citations?user=5JlEyTAAAAAJ;GwKF9rMAAAAJ", "orcid": ";;0000-0002-1268-6527;;;;", "linkedin": "boxiao-leo-pan-245339105/;;;;;;", "or_profile": "~Boxiao_Pan1;~Zhan_Xu1;~Chun-Hao_Paul_Huang2;~Krishna_Kumar_Singh4;~Yang_Zhou10;~Leonidas_Guibas1;~Jimei_Yang1", "aff": "Stanford University;;Adobe Systems;;Adobe Research;Stanford University;Adobe Research", "aff_domain": "stanford.edu;;adobe.com;;adobe.com;stanford.edu;adobe.com", "position": "PhD student;;Researcher;;Research Scientist;Full Professor;Researcher", "bibtex": "@inproceedings{\npan2024actanywhere,\ntitle={ActAnywhere: Subject-Aware Video Background Generation},\nauthor={Boxiao Pan and Zhan Xu and Chun-Hao Paul Huang and Krishna Kumar Singh and Yang Zhou and Leonidas Guibas and Jimei Yang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ntlFREw59A}\n}", "github": "", "reviewers": "ghM9;Pq5G;Aquh;gAcX", "pdf_size": 7914827, "rating": "5;5;5;7", "confidence": "5;4;5;4", "soundness": "3;3;4;4", "novelty": "2;3;2;3", "presentation": "4;2;3;4", "wc_summary": "179;209;69;59", "wc_strengths": "80;55;19;48", "wc_weaknesses": "126;172;77;50", "wc_questions": "106;3;2;54", "wc_limitations": "1;1;2;16", "wc_review": "492;440;169;227", "wc_reply_reviewers": "65;28;0;0", "wc_reply_authors": "40;67;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "2;2;1;1", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 129.0, 65.95452979136459 ], "wc_strengths_avg": [ 50.5, 21.73131381210073 ], "wc_weaknesses_avg": [ 106.25, 46.72459202604128 ], "wc_questions_avg": [ 41.25, 42.89157842747222 ], "wc_limitations_avg": [ 5.0, 6.363961030678928 ], "wc_review_avg": [ 332.0, 136.80095028909705 ], "wc_reply_reviewers_avg": [ 23.25, 26.677471769266294 ], "wc_reply_authors_avg": [ 26.75, 28.4022446296063 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1390406393310300474&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 4, "email": "stanford.edu;;adobe.com;;adobe.com;stanford.edu;adobe.com", "author_num": 7, "aff_unique_index": "0;1;1;0;1", "aff_unique_norm": "Stanford University;Adobe", "aff_unique_dep": ";Adobe Systems Incorporated", "aff_unique_url": "https://www.stanford.edu;https://www.adobe.com", "aff_unique_abbr": "Stanford;Adobe", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "nu2Sqrsnr7", "title": "Compute-Optimal Solutions for Acoustic Wave Equation Using Hard-Constraint PINNs", "track": "main", "status": "Reject", "tldr": "", "abstract": "This paper explores the optimal imposition of hard constraints, strategic sampling of PDEs, and computational domain scaling for solving the acoustic wave equation within a specified computational budget. First, we derive a formula to systematically enforce hard boundary and initial conditions in Physics-Informed Neural Networks (PINNs), employing continuous functions within the PINN ansatz to ensure that these conditions are satisfied. We demonstrate that optimally selecting these functions significantly enhances the convergence of the solution. Secondly, we introduce an importance sampling strategy that optimizes the efficiency of hard-constraint PINNs under a fixed number of sampling points. Leveraging these strategies, we develop an algorithm to determine the optimal computational domain size, given a computational budget. Our approach offers a practical framework for domain decomposition in large-scale implementation of acoustic wave equation systems.", "keywords": "compute-optimal;acoustic wave equation;hard-constraint;PINN", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "", "author": "Chen Gu;ZHUOYU CHEN", "authorids": "~Chen_Gu1;~ZHUOYU_CHEN1", "gender": "F;M", "homepage": ";https://nekko.moe/", "dblp": ";", "google_scholar": "51ViiJcAAAAJ;", "orcid": "0000-0003-0003-8992;", "linkedin": ";", "or_profile": "~Chen_Gu1;~ZHUOYU_CHEN1", "aff": "Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;mails.tsinghua.edu.cn", "position": "Assistant Professor;Undergrad student", "bibtex": "@misc{\nanonymous2024computeoptimal,\ntitle={Compute-Optimal Solutions for Acoustic Wave Equation Using Hard-Constraint {PINN}s},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=nu2Sqrsnr7}\n}", "github": "", "project": "", "reviewers": "U3hW;AwP6;cEZM;QJWv", "site": "https://openreview.net/forum?id=nu2Sqrsnr7", "pdf_size": 33126443, "rating": "2;3;3;3", "confidence": "5;4;5;3", "soundness": "2;2;2;3", "novelty": "1;2;1;2", "presentation": "2;1;2;2", "wc_summary": "58;42;48;48", "wc_strengths": "19;19;24;45", "wc_weaknesses": "115;124;181;220", "wc_questions": "15;109;2;81", "wc_limitations": "16;9;8;42", "wc_review": "223;303;263;436", "wc_reply_reviewers": "19;25;38;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 2.75, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 1.5, 0.5 ], "presentation_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_avg": [ 49.0, 5.744562646538029 ], "wc_strengths_avg": [ 26.75, 10.732543966832841 ], "wc_weaknesses_avg": [ 160.0, 42.90104893822527 ], "wc_questions_avg": [ 51.75, 44.60591328512398 ], "wc_limitations_avg": [ 18.75, 13.77270852083932 ], "wc_review_avg": [ 306.25, 80.07301355637865 ], "wc_reply_reviewers_avg": [ 20.5, 13.683932183404009 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:D8jxite2fQQJ:scholar.google.com/&scioq=Compute-Optimal+Solutions+for+Acoustic+Wave+Equation+Using+Hard-Constraint+PINNs&hl=en&as_sdt=0,44", "gs_version_total": 0, "aff_unique_index": "0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Non-Euclidean Mixture Model for Social Network Embedding", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93658", "id": "nuZv2iTlvn", "proceeding": "", "pdf": "https://openreview.net/pdf?id=nuZv2iTlvn", "openreview": "https://openreview.net/forum?id=nuZv2iTlvn", "poster": "/media/PosterPDFs/NeurIPS%202024/93658.png?t=1731711221.674857", "project": "", "author_site": "Roshni Iyer, Yewen Wang, Wei Wang, Yizhou Sun", "tldr": "", "abstract": "It is largely agreed that social network links are formed due to either homophily or social influence. Inspired by this, we aim at understanding the generation of links via providing a novel embedding-based graph formation model. Different from existing graph representation learning, where link generation probabilities are defined as a simple function of the corresponding node embeddings, we model the link generation as a mixture model of the two factors. In addition, we model the homophily factor in spherical space and the influence factor in hyperbolic space to accommodate the fact that (1) homophily results in cycles and (2) influence results in hierarchies in networks. We also design a special projection to align these two spaces. We call this model Non-Euclidean Mixture Model, i.e., NMM. We further integrate NMM with our non-Euclidean graph variational autoencoder (VAE) framework, NMM-GNN. NMM-GNN learns embeddings through a unified framework which uses non-Euclidean GNN encoders, non-Euclidean Gaussian priors, a non-Euclidean decoder, and a novel space unification loss component to unify distinct non-Euclidean geometric spaces. Experiments on public datasets show NMM-GNN significantly outperforms state-of-the-art baselines on social network generation and classification tasks, demonstrating its ability to better explain how the social network is formed.", "keywords": "network science;social influence;representation learning", "primary_area": "machine_learning_for_social_sciences", "supplementary_material": "", "author": "Roshni Iyer;YEWEN WANG;Wei Wang;Yizhou Sun", "authorids": "~Roshni_Iyer1;~YEWEN_WANG1;~Wei_Wang13;~Yizhou_Sun1", "gender": "F;F;F;F", "homepage": ";;http://www.cs.ucla.edu/~weiwang;http://web.cs.ucla.edu/~yzsun/", "dblp": "262/3496.html;219/8145.html;w/WeiWang.html;37/3868", "google_scholar": ";;UedS9LQAAAAJ;https://scholar.google.com.tw/citations?user=TQgOjK0AAAAJ", "orcid": ";;0000-0002-8180-2886;", "linkedin": "roshni-iyer-56b868141;;wei-wang-8800845/;", "or_profile": "~Roshni_Iyer1;~YEWEN_WANG1;~Wei_Wang13;~Yizhou_Sun1", "aff": "University of California, Los Angeles;University of California, Los Angeles;University of California, Los Angeles;University of California, Los Angeles", "aff_domain": "cs.ucla.edu;ucla.edu;ucla.edu;ucla.edu", "position": "PhD student;PhD student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\niyer2024noneuclidean,\ntitle={Non-Euclidean Mixture Model for Social Network Embedding},\nauthor={Roshni Iyer and YEWEN WANG and Wei Wang and Yizhou Sun},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=nuZv2iTlvn}\n}", "github": "", "reviewers": "BmsX;3fkq;BsLx;iEGq", "pdf_size": 1533591, "rating": "5;6;7;7", "confidence": "5;3;4;3", "soundness": "4;3;3;3", "novelty": "3;3;2;3", "presentation": "4;3;3;3", "wc_summary": "50;95;19;89", "wc_strengths": "19;39;43;55", "wc_weaknesses": "104;13;65;99", "wc_questions": "24;159;95;1", "wc_limitations": "23;17;14;2", "wc_review": "220;323;236;246", "wc_reply_reviewers": "21;170;23;0", "wc_reply_authors": "31;256;23;0", "reply_reviewers": "1;2;1;0", "reply_authors": "2;3;2;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 63.25, 30.84132779242813 ], "wc_strengths_avg": [ 39.0, 12.96148139681572 ], "wc_weaknesses_avg": [ 70.25, 36.29996556472196 ], "wc_questions_avg": [ 69.75, 62.0941824972356 ], "wc_limitations_avg": [ 14.0, 7.648529270389178 ], "wc_review_avg": [ 256.25, 39.63820757804268 ], "wc_reply_reviewers_avg": [ 53.5, 67.86199230791857 ], "wc_reply_authors_avg": [ 77.5, 103.68341236668476 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.6363636363636364, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:91iqaWn_jLEJ:scholar.google.com/&scioq=Non-Euclidean+Mixture+Model+for+Social+Network+Embedding&hl=en&as_sdt=0,5", "gs_version_total": 7, "email": "cs.ucla.edu;ucla.edu;ucla.edu;ucla.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of California, Los Angeles", "aff_unique_dep": "", "aff_unique_url": "https://www.ucla.edu", "aff_unique_abbr": "UCLA", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Membership Inference Attacks against Large Vision-Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93657", "id": "nv2Qt5cj1a", "proceeding": "", "pdf": "https://openreview.net/pdf?id=nv2Qt5cj1a", "openreview": "https://openreview.net/forum?id=nv2Qt5cj1a", "poster": "/media/PosterPDFs/NeurIPS%202024/93657.png?t=1731021602.4678411", "project": "", "author_site": "Zhan Li, Yongtao Wu, Yihang Chen, Francesco Tonin, Elias Abad Rocamora, Volkan Cevher", "tldr": "", "abstract": "Large vision-language models (VLLMs) exhibit promising capabilities for processing multi-modal tasks across various application scenarios. However, their emergence also raises significant data security concerns, given the potential inclusion of sensitive information, such as private photos and medical records, in their training datasets. Detecting inappropriately used data in VLLMs remains a critical and unresolved issue, mainly due to the lack of standardized datasets and suitable methodologies. In this study, we introduce the first membership inference attack (MIA) benchmark tailored for various VLLMs to facilitate training data detection. Then, we propose a novel MIA pipeline specifically designed for token-level image detection. Lastly, we present a new metric called MaxR\u00e9nyi-K%, which is based on the confidence of the model output and applies to both text and image data. We believe that our work can deepen the understanding and methodology of MIAs in the context of VLLMs. Our code and datasets are available at https://github.com/LIONS-EPFL/VL-MIA.", "keywords": "Membership inference attack; Large Vision-Language Models", "primary_area": "privacy", "supplementary_material": "", "author": "Zhan Li;Yongtao Wu;Yihang Chen;Francesco Tonin;Elias Abad Rocamora;Volkan Cevher", "authorids": "~Zhan_Li7;~Yongtao_Wu1;~Yihang_Chen1;~Francesco_Tonin1;~Elias_Abad_Rocamora1;~Volkan_Cevher1", "gender": "F;M;M;;M;M", "homepage": "https://github.com/JaineLi;https://www.epfl.ch/labs/lions/people/phds/yongtao-wu/;https://yhangchen.github.io/;https://taralloc.github.io/;https://megaelius.github.io/;http://lions.epfl.ch", "dblp": ";322/3726;;279/6777;329/4351;70/5301", "google_scholar": ";rLgDE9AAAAAJ;HzlOQRkAAAAJ;;lHfp1OAAAAAJ;https://scholar.google.ch/citations?user=hlWhzU8AAAAJ", "orcid": ";;;0000-0002-5644-0086;;", "linkedin": ";;;;el%C3%ADas-abad-rocamora-8587261b8/?originalSubdomain=es;", "or_profile": "~Zhan_Li7;~Yongtao_Wu1;~Yihang_Chen1;~Francesco_Tonin1;~Elias_Abad_Rocamora1;~Volkan_Cevher1", "aff": "EPFL - EPF Lausanne;Swiss Federal Institute of Technology Lausanne;EPFL - EPF Lausanne;EPFL - EPF Lausanne;EPFL - EPF Lausanne;Amazon Development Center Germany", "aff_domain": "epfl.ch;epfl.ch;epfl.ch;epfl.ch;epfl.ch;amazon.de", "position": "MS student;PhD student;MS student;Postdoc;PhD student;Amazon Scholar", "bibtex": "@inproceedings{\nli2024membership,\ntitle={Membership Inference Attacks against Large Vision-Language Models},\nauthor={Zhan Li and Yongtao Wu and Yihang Chen and Francesco Tonin and Elias Abad Rocamora and Volkan Cevher},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=nv2Qt5cj1a}\n}", "github": "", "reviewers": "rMMx;M6Nm;e87W;FZ3m", "pdf_size": 2447543, "rating": "5;5;6;6", "confidence": "2;3;3;3", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "3;3;3;2", "wc_summary": "37;104;98;55", "wc_strengths": "57;29;50;37", "wc_weaknesses": "68;155;17;88", "wc_questions": "31;9;170;51", "wc_limitations": "14;10;11;8", "wc_review": "207;307;346;239", "wc_reply_reviewers": "29;112;174;0", "wc_reply_authors": "0;224;233;0", "reply_reviewers": "1;2;1;0", "reply_authors": "1;2;2;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 73.5, 28.30635970943632 ], "wc_strengths_avg": [ 43.25, 10.917302780449024 ], "wc_weaknesses_avg": [ 82.0, 49.462106708064915 ], "wc_questions_avg": [ 65.25, 62.27509534316266 ], "wc_limitations_avg": [ 10.75, 2.165063509461097 ], "wc_review_avg": [ 274.75, 54.73744148204225 ], "wc_reply_reviewers_avg": [ 78.75, 68.65629978377804 ], "wc_reply_authors_avg": [ 114.25, 114.29430213269602 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2379349800849226415&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "epfl.ch;epfl.ch;epfl.ch;epfl.ch;epfl.ch;amazon.de", "author_num": 6, "aff_unique_index": "0;1;0;0;0;2", "aff_unique_norm": "EPFL;Swiss Federal Institute of Technology Lausanne;Amazon", "aff_unique_dep": ";;Development Center", "aff_unique_url": "https://www.epfl.ch;https://www.epfl.ch;https://www.amazon.de", "aff_unique_abbr": "EPFL;EPFL;Amazon", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Lausanne;", "aff_country_unique_index": "0;0;0;0;0;1", "aff_country_unique": "Switzerland;Germany" }, { "title": "Precise asymptotics of reweighted least-squares algorithms for linear diagonal networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93656", "id": "nv7ox1vd3q", "proceeding": "", "pdf": "https://openreview.net/pdf?id=nv7ox1vd3q", "openreview": "https://openreview.net/forum?id=nv7ox1vd3q", "poster": "", "project": "", "author_site": "Chiraag Kaushik, Justin Romberg, Vidya Muthukumar", "tldr": "", "abstract": "The classical iteratively reweighted least-squares (IRLS) algorithm aims to recover an unknown signal from linear measurements by performing a sequence of weighted least squares problems, where the weights are recursively updated at each step. Varieties of this algorithm have been shown to achieve favorable empirical performance and theoretical guarantees for sparse recovery and $\\ell_p$-norm minimization. Recently, some preliminary connections have also been made between IRLS and certain types of non-convex linear neural network architectures that are observed to exploit low-dimensional structure in high-dimensional linear models. In this work, we provide a unified asymptotic analysis for a family of algorithms that encompasses IRLS, the recently proposed lin-RFM algorithm (which was motivated by feature learning in neural networks), and the alternating minimization algorithm on linear diagonal neural networks. Our analysis operates in a \"batched\" setting with i.i.d. Gaussian covariates and shows that, with appropriately chosen reweighting policy, the algorithm can achieve favorable performance in only a handful of iterations. We also extend our results to the case of group-sparse recovery and show that leveraging this structure in the reweighting scheme provably improves test error compared to coordinate-wise reweighting.", "keywords": "high-dimensional asymptotics;linear diagonal neural networks;feature learning;iteratively reweighted least-squares;sparse recovery", "primary_area": "learning_theory", "supplementary_material": "/attachment/165ecd042baf16c3554576f948660f1b2ffb2720.zip", "author": "Chiraag Kaushik;Justin Romberg;Vidya Muthukumar", "authorids": "~Chiraag_Kaushik1;~Justin_Romberg1;~Vidya_Muthukumar3", "gender": "M;M;F", "homepage": "https://chiraagk7.github.io/;https://jrom.ece.gatech.edu/;https://vmuthukumar.ece.gatech.edu", "dblp": ";;149/0019", "google_scholar": "rr6QwiwAAAAJ;;K2OEs2YAAAAJ", "orcid": ";;", "linkedin": "chiraag-kaushik/;;", "or_profile": "~Chiraag_Kaushik1;~Justin_Romberg1;~Vidya_Muthukumar3", "aff": "Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology", "aff_domain": "gatech.edu;gatech.edu;gatech.edu", "position": "PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nkaushik2024precise,\ntitle={Precise asymptotics of reweighted least-squares algorithms for linear diagonal networks},\nauthor={Chiraag Kaushik and Justin Romberg and Vidya Muthukumar},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=nv7ox1vd3q}\n}", "github": "", "reviewers": "yJHL;cDEg;kkyZ", "pdf_size": 507766, "rating": "5;6;7", "confidence": "4;4;4", "soundness": "4;3;4", "novelty": "3;3;3", "presentation": "3;3;4", "wc_summary": "123;79;79", "wc_strengths": "30;126;114", "wc_weaknesses": "37;357;88", "wc_questions": "173;48;239", "wc_limitations": "12;2;1", "wc_review": "375;612;521", "wc_reply_reviewers": "0;40;23", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 93.66666666666667, 20.741798914805393 ], "wc_strengths_avg": [ 90.0, 42.708313008125245 ], "wc_weaknesses_avg": [ 160.66666666666666, 140.38122698170466 ], "wc_questions_avg": [ 153.33333333333334, 79.20577981154882 ], "wc_limitations_avg": [ 5.0, 4.96655480858378 ], "wc_review_avg": [ 502.6666666666667, 97.6194425079121 ], "wc_reply_reviewers_avg": [ 21.0, 16.391054470858997 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8418683337688221460&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "gatech.edu;gatech.edu;gatech.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Georgia Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.gatech.edu", "aff_unique_abbr": "Georgia Tech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "VisionLLM v2: An End-to-End Generalist Multimodal Large Language Model for Hundreds of Vision-Language Tasks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93655", "id": "nvYDPF4LJK", "proceeding": "", "pdf": "https://openreview.net/pdf?id=nvYDPF4LJK", "openreview": "https://openreview.net/forum?id=nvYDPF4LJK", "poster": "/media/PosterPDFs/NeurIPS%202024/93655.png?t=1731761312.4018908", "project": "", "author_site": "Jiannan Wu, Muyan Zhong, Sen Xing, Zeqiang Lai, Zhaoyang Liu, Zhe Chen, Wenhai Wang, Xizhou Zhu, Lewei Lu, Tong Lu, Ping Luo, Yu Qiao, Jifeng Dai", "tldr": "", "abstract": "We present VisionLLM v2, an end-to-end generalist multimodal large model (MLLM) that unifies visual perception, understanding, and generation within a single framework. Unlike traditional MLLMs limited to text output, VisionLLM v2 significantly broadens its application scope. It excels not only in conventional visual question answering (VQA) but also in open-ended, cross-domain vision tasks such as object localization, pose estimation, and image generation and editing. To this end, we propose a new information transmission mechanism termed ``super link'', as a medium to connect MLLM with task-specific decoders. It not only allows flexible transmission of task information and gradient feedback between the MLLM and multiple downstream decoders but also effectively resolves training conflicts in multi-tasking scenarios. In addition, to support the diverse range of tasks, we carefully collected and combed training data from hundreds of public vision and vision-language tasks. In this way, our model can be joint-trained end-to-end on hundreds of vision language tasks and generalize to these tasks using a set of shared parameters through different user prompts, achieving performance comparable to task-specific models. We believe VisionLLM v2 will offer a new perspective on the generalization of MLLMs.", "keywords": "Mutlimodal Large Language Model;Vision Generalist Model", "primary_area": "machine_vision", "supplementary_material": "", "author": "Jiannan Wu;Muyan Zhong;Sen Xing;Zeqiang Lai;Zhaoyang Liu;Zhe Chen;Wenhai Wang;Xizhou Zhu;Lewei Lu;Tong Lu;Ping Luo;Yu Qiao;Jifeng Dai", "authorids": "~Jiannan_Wu2;~Muyan_Zhong1;~Sen_Xing1;~Zeqiang_Lai1;~Zhaoyang_Liu1;~Zhe_Chen10;~Wenhai_Wang2;~Xizhou_Zhu1;~Lewei_Lu1;~Tong_Lu1;~Ping_Luo2;~Yu_Qiao1;~Jifeng_Dai1", "gender": "M;;;;M;M;;;M;M;;;M", "homepage": ";;https://github.com/xings19;https://zeqiang-lai.github.io/;https://scholar.google.com/citations?user=btgwZosAAAAJ&hl=en;https://czczup.github.io/;;;;https://cs.nju.edu.cn/lutong/;;;https://jifengdai.org/", "dblp": "277/0616;;334/0585;315/3760;120/5899-1;06/4240-17;;170/1608;247/6438;;;;14/9399", "google_scholar": "1euA66EAAAAJ;;;WUMu1KkAAAAJ;btgwZosAAAAJ;j1rq_lYAAAAJ;;02RXI00AAAAJ;https://scholar.google.com.hk/citations?user=zdgKJXIAAAAJ;;;;SH_-B_AAAAAJ", "orcid": ";;;;0000-0003-0258-3097;;;;;;;;", "linkedin": ";;;;;;;;lewei-lu-94015977/;;;;", "or_profile": "~Jiannan_Wu2;~Muyan_Zhong1;~Sen_Xing1;~Zeqiang_Lai1;~Zhaoyang_Liu1;~Zhe_Chen10;~Wenhai_Wang2;~Xizhou_Zhu1;~Lewei_Lu1;~Tong_Lu1;~Ping_Luo2;~Yu_Qiao1;~Jifeng_Dai1", "aff": "University of Hong Kong;;Tsinghua University;IFLYTEK CO.LTD.;Shanghai AI Laboratory ;Nanjing University;;Tsinghua University;SenseTime;Nanjing University;;;Tsinghua University", "aff_domain": "hku.hk;;tsinghua.edu.cn;iflytek.com;pjlab.org.cn;nju.edu.cn;;tsinghua.edu.cn;sensetime.com;nju.edu.cn;;;tsinghua.edu.cn", "position": "PhD student;;MS student;Researcher;Intern;PhD student;;Postdoc;Researcher;Undergrad student;;;Associate Professor", "bibtex": "@inproceedings{\nwu2024visionllm,\ntitle={Vision{LLM} v2: An End-to-End Generalist Multimodal Large Language Model for Hundreds of Vision-Language Tasks},\nauthor={Jiannan Wu and Muyan Zhong and Sen Xing and Zeqiang Lai and Zhaoyang Liu and Zhe Chen and Wenhai Wang and Xizhou Zhu and Lewei Lu and Tong Lu and Ping Luo and Yu Qiao and Jifeng Dai},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=nvYDPF4LJK}\n}", "github": "", "reviewers": "nisa;x7n8;W77r", "pdf_size": 16037857, "rating": "5;5;7", "confidence": "4;4;5", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "3;3;3", "wc_summary": "53;70;106", "wc_strengths": "45;15;58", "wc_weaknesses": "302;174;232", "wc_questions": "5;3;77", "wc_limitations": "12;4;14", "wc_review": "417;266;487", "wc_reply_reviewers": "40;27;124", "wc_reply_authors": "34;36;258", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 5.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 76.33333333333333, 22.095751225568733 ], "wc_strengths_avg": [ 39.333333333333336, 18.00617178142601 ], "wc_weaknesses_avg": [ 236.0, 52.33227175144097 ], "wc_questions_avg": [ 28.333333333333332, 34.4222150491349 ], "wc_limitations_avg": [ 10.0, 4.320493798938574 ], "wc_review_avg": [ 390.0, 92.22074965357127 ], "wc_reply_reviewers_avg": [ 63.666666666666664, 42.99095512107427 ], "wc_reply_authors_avg": [ 109.33333333333333, 105.12637897100592 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 13, 0 ], "corr_rating_confidence": 0.9999999999999997, "gs_citation": 51, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=693001975095915525&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "hku.hk;;tsinghua.edu.cn;iflytek.com;pjlab.org.cn;nju.edu.cn;;tsinghua.edu.cn;sensetime.com;nju.edu.cn;;;tsinghua.edu.cn", "author_num": 13, "aff_unique_index": "0;1;2;3;4;1;5;4;1", "aff_unique_norm": "University of Hong Kong;Tsinghua University;iFLYTEK;Shanghai AI Laboratory;Nanjing University;SenseTime", "aff_unique_dep": ";;;;;", "aff_unique_url": "https://www.hku.hk;https://www.tsinghua.edu.cn;https://www.iflytek.com;https://www.shanghai-ai-lab.com;https://www.nju.edu.cn;https://www.sensetime.com", "aff_unique_abbr": "HKU;THU;iFLYTEK;SAIL;Nanjing U;SenseTime", "aff_campus_unique_index": "0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Diff-eRank: A Novel Rank-Based Metric for Evaluating Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93654", "id": "nvn80cscVm", "proceeding": "", "pdf": "https://openreview.net/pdf?id=nvn80cscVm", "openreview": "https://openreview.net/forum?id=nvn80cscVm", "poster": "/media/PosterPDFs/NeurIPS%202024/93654.png?t=1730294270.044832", "project": "", "author_site": "Lai Wei, Zhiquan Tan, Chenghai Li, Jindong Wang, Weiran Huang", "tldr": "", "abstract": "Large Language Models (LLMs) have transformed natural language processing and extended their powerful capabilities to multi-modal domains. As LLMs continue to advance, it is crucial to develop diverse and appropriate metrics for their evaluation. In this paper, we introduce a novel rank-based metric, Diff-eRank, grounded in information theory and geometry principles. Diff-eRank assesses LLMs by analyzing their hidden representations, providing a quantitative measure of how efficiently they eliminate redundant information during training. We demonstrate the applicability of Diff-eRank in both single-modal (e.g., language) and multi-modal settings. For language models, our results show that Diff-eRank increases with model size and correlates well with conventional metrics such as loss and accuracy. In the multi-modal context, we propose an alignment evaluation method based on the eRank, and verify that contemporary multi-modal LLMs exhibit strong alignment performance based on our method. Our code is publicly available at https://github.com/waltonfuture/Diff-eRank.", "keywords": "Large Language Model;Multi-modal Large Language Model;Evaluation Metric", "primary_area": "evaluation", "supplementary_material": "", "author": "Lai Wei;Zhiquan Tan;Chenghai Li;Jindong Wang;Weiran Huang", "authorids": "~Lai_Wei7;~Zhiquan_Tan1;~Chenghai_Li1;~Jindong_Wang4;~Weiran_Huang1", "gender": "M;M;M;M;M", "homepage": "https://waltonfuture.github.io/;;https://lich99.github.io/;https://jd92.wang/;https://www.weiranhuang.com", "dblp": "36/4168-5;326/0177;;19/2969-1;170/0073-1", "google_scholar": "ic3N84oAAAAJ;;;hBZ_tKsAAAAJ;AjJ2rf8AAAAJ", "orcid": ";;;0000-0002-4833-0880;", "linkedin": ";https://www.linkedin.cn/incareer/in/ACoAAC1A8_QBFX8OlchWmVI_pNXN4zm_t6vPKCs;chenghai-li-236750193/;jindong-wang/;", "or_profile": "~Lai_Wei7;~Zhiquan_Tan1;~Chenghai_Li1;~Jindong_Wang4;~Weiran_Huang1", "aff": "Shanghai Jiaotong University;Tsinghua University;;Microsoft Research;Shanghai AI Laboratory", "aff_domain": "sjtu.edu.cn;tsinghua.edu.cn;;microsoft.com;pjlab.org.cn", "position": "Undergrad student;PhD student;;Researcher;Consultant", "bibtex": "@inproceedings{\nwei2024differank,\ntitle={Diff-eRank: A Novel Rank-Based Metric for Evaluating Large Language Models},\nauthor={Lai Wei and Zhiquan Tan and Chenghai Li and Jindong Wang and Weiran Huang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=nvn80cscVm}\n}", "github": "", "reviewers": "c8rT;YACX;y598;LQXh", "pdf_size": 515864, "rating": "5;5;5;6", "confidence": "3;4;2;4", "soundness": "3;2;2;3", "novelty": "3;3;2;3", "presentation": "3;2;2;3", "wc_summary": "226;239;96;93", "wc_strengths": "38;168;74;65", "wc_weaknesses": "120;161;118;246", "wc_questions": "44;109;81;15", "wc_limitations": "11;125;1;39", "wc_review": "439;802;370;458", "wc_reply_reviewers": "0;0;46;244", "wc_reply_authors": "0;0;76;359", "reply_reviewers": "0;0;1;2", "reply_authors": "1;1;2;3", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 163.5, 69.16104394816492 ], "wc_strengths_avg": [ 86.25, 49.02231634674151 ], "wc_weaknesses_avg": [ 161.25, 51.85255538543882 ], "wc_questions_avg": [ 62.25, 35.716767770894386 ], "wc_limitations_avg": [ 44.0, 48.79549159502341 ], "wc_review_avg": [ 517.25, 167.62961403045705 ], "wc_reply_reviewers_avg": [ 72.5, 100.78070251789278 ], "wc_reply_authors_avg": [ 108.75, 147.77580146965877 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8366680822958306682&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "sjtu.edu.cn;tsinghua.edu.cn;;microsoft.com;pjlab.org.cn", "author_num": 5, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Shanghai Jiao Tong University;Tsinghua University;Microsoft;Shanghai AI Laboratory", "aff_unique_dep": ";;Microsoft Research;", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.tsinghua.edu.cn;https://www.microsoft.com/en-us/research;https://www.shanghai-ai-lab.com", "aff_unique_abbr": "SJTU;THU;MSR;SAIL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "China;United States" }, { "title": "Discovering plasticity rules that organize and maintain neural circuits", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93653", "id": "nw4TWuEPGx", "proceeding": "", "pdf": "https://openreview.net/pdf?id=nw4TWuEPGx", "openreview": "https://openreview.net/forum?id=nw4TWuEPGx", "poster": "", "project": "", "author_site": "David Bell, Alison Duffy, Adrienne Fairhall", "tldr": "", "abstract": "Intrinsic dynamics within the brain can accelerate learning by providing a prior scaffolding for dynamics aligned with task objectives. Such intrinsic dynamics would ideally self-organize and self-sustain in the face of biological noise including synaptic turnover and cell death. An example of such dynamics is the formation of sequences, a ubiquitous motif in neural activity. The sequence-generating circuit in zebra finch HVC provides a reliable timing scaffold for motor output in song and demonstrates a remarkable capacity for unsupervised recovery following perturbation. Inspired by HVC, we seek a local plasticity rule capable of organizing and maintaining sequence-generating dynamics despite continual network perturbations. We adopt a meta-learning approach introduced by Confavreux et al, which parameterizes a learning rule using basis functions constructed from pre- and postsynaptic activity and synapse size, with tunable time constants. Candidate rules are simulated within initially random networks, and their fitness is evaluated according to a loss function that measures the fidelity with which the resulting dynamics encode time. We use this approach to introduce biological noise, forcing meta-learning to find robust solutions. We first show that, in the absence of perturbations, meta-learning identifies a temporally asymmetric generalization of Oja's rule that reliably organizes sparse sequential activity. When synaptic turnover is introduced, the learned rule incorporates a form of homeostasis, better maintaining robust sequential dynamics relative to other previously proposed rules. Additionally, inspired by recent findings demonstrating that the strength of projections from inhibitory interneurons in HVC also dynamically responds to perturbations, we explore the role of inhibitory plasticity in sequence-generating circuits. We find that learned plasticity adjusts both excitation and inhibition in response to manipulations, outperforming rules applied only to excitatory connections. We demonstrate how plasticity acting on both excitatory and inhibitory synapses can better shape excitatory cell dynamics to scaffold timing representations.", "keywords": "biologically plausible learning rules plasticity self-organization RNNs homeostasis meta-learning", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "/attachment/875501345ad2812b7c03756b233dad7aad0535d1.zip", "author": "David G Bell;Alison Duffy;Adrienne Fairhall", "authorids": "~David_G_Bell1;~Alison_Duffy1;~Adrienne_Fairhall1", "gender": "M;;F", "homepage": ";;https://fairhalllab.com/", "dblp": ";;", "google_scholar": ";https://scholar.google.com/citations?hl=en;https://scholar.google.com.au/citations?user=Qd2_0VkAAAAJ", "orcid": ";;0000-0001-6779-953X", "linkedin": "davidgrbell/;;adrienne-fairhall-ab472715", "or_profile": "~David_G_Bell1;~Alison_Duffy1;~Adrienne_Fairhall1", "aff": "University of Washington;University of Washington, University of Washington;University of Washington", "aff_domain": "uw.edu;u.washington.edu;u.washington.edu", "position": "PhD student;Instructor;Full Professor", "bibtex": "@inproceedings{\nbell2024discovering,\ntitle={Discovering plasticity rules that organize and maintain neural circuits},\nauthor={David G Bell and Alison Duffy and Adrienne Fairhall},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=nw4TWuEPGx}\n}", "github": "", "reviewers": "96SN;rSVd;JAan;6oGR;4Ykq", "pdf_size": 2659273, "rating": "5;6;6;7;7", "confidence": "2;3;3;3;3", "soundness": "3;2;3;3;3", "novelty": "2;2;3;3;3", "presentation": "2;2;3;3;3", "wc_summary": "67;76;60;168;81", "wc_strengths": "72;93;37;169;143", "wc_weaknesses": "298;246;323;144;211", "wc_questions": "1;55;53;113;2", "wc_limitations": "1;12;12;18;11", "wc_review": "439;482;485;612;448", "wc_reply_reviewers": "69;28;36;12;21", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 2.8, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 90.4, 39.46948188157529 ], "wc_strengths_avg": [ 102.8, 47.69234739452442 ], "wc_weaknesses_avg": [ 244.4, 63.63835321565133 ], "wc_questions_avg": [ 44.8, 41.40724574274411 ], "wc_limitations_avg": [ 10.8, 5.491812087098392 ], "wc_review_avg": [ 493.2, 62.107648482292426 ], "wc_reply_reviewers_avg": [ 33.2, 19.56936381183609 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8017837257372733, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:1wTIEkUt1H0J:scholar.google.com/&scioq=Discovering+plasticity+rules+that+organize+and+maintain+neural+circuits&hl=en&as_sdt=0,5", "gs_version_total": 6, "email": "uw.edu;u.washington.edu;u.washington.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Washington", "aff_unique_dep": "", "aff_unique_url": "https://www.washington.edu", "aff_unique_abbr": "UW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Probabilistic Federated Prompt-Tuning with Non-IID and Imbalanced Data", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93652", "id": "nw6ANsC66G", "proceeding": "", "pdf": "https://openreview.net/pdf?id=nw6ANsC66G", "openreview": "https://openreview.net/forum?id=nw6ANsC66G", "poster": "/media/PosterPDFs/NeurIPS%202024/93652.png?t=1733717662.9790711", "project": "", "author_site": "Pei-Yau Weng, Minh Hoang, Lam Nguyen, My T. Thai, Lily Weng, Nghia Hoang", "tldr": "", "abstract": "Fine-tuning pre-trained models is a popular approach in machine learning for solving complex tasks with moderate data. However, fine-tuning the entire pre-trained model is ineffective in federated data scenarios where local data distributions are diversely skewed. To address this, we explore integrating federated learning with a more effective prompt-tuning method, optimizing for a small set of input prefixes to reprogram the pre-trained model's behavior. Our approach transforms federated learning into a distributed set modeling task, aggregating diverse sets of prompts to globally fine-tune the pre-trained model. We benchmark various baselines based on direct adaptations of existing federated model aggregation techniques and introduce a new probabilistic prompt aggregation method that substantially outperforms these baselines. Our reported results on a variety of computer vision datasets confirm that the proposed method is most effective to combat extreme data heterogeneity in federated learning.", "keywords": "Probabilistic Learning", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Pei-Yau Weng;Minh Hoang;Lam M. Nguyen;My T. Thai;Tsui-Wei Weng;Trong Nghia Hoang", "authorids": "~Pei-Yau_Weng1;~Minh_Hoang1;~Lam_M._Nguyen1;~My_T._Thai2;~Tsui-Wei_Weng1;~Trong_Nghia_Hoang1", "gender": "M;M;F;M;F;M", "homepage": ";;https://lilywenglab.github.io;https://lamnguyen-mltd.github.io/;http://www.cise.ufl.edu/~mythai;https://htnghia87.github.io/", "dblp": ";;177/9197;181/1428;63/4711;62/540", "google_scholar": ";56Mb6DY0_NUC;v8GM4xoAAAAJ;DeFL5Q8AAAAJ;zLLJimcAAAAJ;E-kZZeQAAAAJ", "orcid": ";;;;0000-0003-0503-2012;", "linkedin": "peiyauweng831225/;;;lam-m-nguyen-71b54750/;;", "or_profile": "~Pei-Yau_Weng1;~Minh_Hoang1;~Tsui-Wei_Weng1;~Lam_M_Nguyen1;~My_Thai1;~Nghia_Hoang2", "aff": "Washington State University;Princeton University;University of California, San Diego;IBM Research, Thomas J. Watson Research Center;University of Florida;Washington State University", "aff_domain": "wsu.edu;princeton.edu;ucsd.edu;ibm.com;ufl.edu;eecs.wsu.edu", "position": "PhD student;Postdoc;Assistant Professor;Staff Research Scientist;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nweng2024probabilistic,\ntitle={Probabilistic Federated Prompt-Tuning with Non-{IID} and Imbalanced Data},\nauthor={Pei-Yau Weng and Minh Hoang and Lam M. Nguyen and My T. Thai and Tsui-Wei Weng and Trong Nghia Hoang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=nw6ANsC66G}\n}", "github": "", "reviewers": "h6E3;DxxX;tCnK;HV8G", "pdf_size": 1747865, "rating": "5;5;6;7", "confidence": "5;3;3;4", "soundness": "2;3;2;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "46;87;89;74", "wc_strengths": "34;102;55;53", "wc_weaknesses": "204;43;102;71", "wc_questions": "46;1;182;45", "wc_limitations": "1;1;1;11", "wc_review": "331;234;429;254", "wc_reply_reviewers": "42;13;22;18", "wc_reply_authors": "209;0;433;41", "reply_reviewers": "1;1;1;1", "reply_authors": "3;1;5;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 74.0, 17.161002301730512 ], "wc_strengths_avg": [ 61.0, 25.0499500997507 ], "wc_weaknesses_avg": [ 105.0, 60.848171706305195 ], "wc_questions_avg": [ 68.5, 68.0018382104484 ], "wc_limitations_avg": [ 3.5, 4.330127018922194 ], "wc_review_avg": [ 312.0, 76.64528687401463 ], "wc_reply_reviewers_avg": [ 23.75, 11.008519428151999 ], "wc_reply_authors_avg": [ 170.75, 170.46168924424046 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.75, 1.479019945774904 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.0909090909090909, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18393117295421486074&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "wsu.edu;princeton.edu;ucsd.edu;ibm.com;ufl.edu;eecs.wsu.edu", "author_num": 6, "aff_unique_index": "0;1;2;3;4;0", "aff_unique_norm": "Washington State University;Princeton University;University of California, San Diego;IBM;University of Florida", "aff_unique_dep": ";;;IBM Research;", "aff_unique_url": "https://wsu.edu;https://www.princeton.edu;https://www.ucsd.edu;https://www.ibm.com/research;https://www.ufl.edu", "aff_unique_abbr": "WSU;Princeton;UCSD;IBM;UF", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";San Diego;Yorktown Heights", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "3D Equivariant Pose Regression via Direct Wigner-D Harmonics Prediction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93651", "id": "nw8cXoNvep", "proceeding": "", "pdf": "https://openreview.net/pdf?id=nw8cXoNvep", "openreview": "https://openreview.net/forum?id=nw8cXoNvep", "poster": "/media/PosterPDFs/NeurIPS%202024/93651.png?t=1731655677.266813", "project": "", "author_site": "Jongmin Lee, Minsu Cho", "tldr": "", "abstract": "Determining the 3D orientations of an object in an image, known as single-image pose estimation, is a crucial task in 3D vision applications. Existing methods typically learn 3D rotations parametrized in the spatial domain using Euler angles or quaternions, but these representations often introduce discontinuities and singularities. SO(3)-equivariant networks enable the structured capture of pose patterns with data-efficient learning, but the parametrizations in spatial domain are incompatible with their architecture, particularly spherical CNNs, which operate in the frequency domain to enhance computational efficiency. To overcome these issues, we propose a frequency-domain approach that directly predicts Wigner-D coefficients for 3D rotation regression, aligning with the operations of spherical CNNs. Our SO(3)-equivariant pose harmonics predictor overcomes the limitations of spatial parameterizations, ensuring consistent pose estimation under arbitrary rotations. Trained with a frequency-domain regression loss, our method achieves state-of-the-art results on benchmarks such as ModelNet10-SO(3) and PASCAL3D+, with significant improvements in accuracy, robustness, and data efficiency.", "keywords": "SO(3) pose estimation;3D rotation representation;SO(3)-equivariance;3D equivariant networks;spherical harmonics;Wigner-D Matrix;spherical CNNs;Wigner-D coefficients prediction;uncertainty modeling;data sampling efficiency", "primary_area": "machine_vision", "supplementary_material": "", "author": "Jongmin Lee;Minsu Cho", "authorids": "~Jongmin_Lee2;~Minsu_Cho1", "gender": "M;M", "homepage": "https://bluedream1121.github.io/;http://cvlab.postech.ac.kr/~mcho/", "dblp": "68/222-5;", "google_scholar": "https://scholar.google.co.kr/citations?user=WVVqJX8AAAAJ;5TyoF5QAAAAJ", "orcid": ";", "linkedin": ";minsu-cho-062b3750/", "or_profile": "~Jongmin_Lee2;~Minsu_Cho1", "aff": "POSTECH;POSTECH", "aff_domain": "postech.ac.kr;postech.ac.kr", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nlee2024d,\ntitle={3D Equivariant Pose Regression via Direct Wigner-D Harmonics Prediction},\nauthor={Jongmin Lee and Minsu Cho},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=nw8cXoNvep}\n}", "github": "", "reviewers": "HCET;iMp8;f5v8;HnJh", "pdf_size": 3159490, "rating": "3;5;6;6", "confidence": "4;3;5;4", "soundness": "2;3;3;3", "novelty": "1;3;2;2", "presentation": "2;2;2;3", "wc_summary": "64;121;119;120", "wc_strengths": "30;82;82;244", "wc_weaknesses": "425;15;392;894", "wc_questions": "56;28;311;82", "wc_limitations": "10;18;85;21", "wc_review": "585;264;989;1361", "wc_reply_reviewers": "770;69;197;263", "wc_reply_authors": "646;338;401;76", "reply_reviewers": "2;2;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 106.0, 24.259018941416407 ], "wc_strengths_avg": [ 109.5, 80.50310553015952 ], "wc_weaknesses_avg": [ 431.5, 311.841706639763 ], "wc_questions_avg": [ 119.25, 112.34183325903133 ], "wc_limitations_avg": [ 33.5, 30.004166377354995 ], "wc_review_avg": [ 799.75, 413.510202413435 ], "wc_reply_reviewers_avg": [ 324.75, 266.35913256353723 ], "wc_reply_authors_avg": [ 365.25, 202.79715851066553 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.28867513459481287, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:LZRF-Vg7FpcJ:scholar.google.com/&scioq=3D+Equivariant+Pose+Regression+via+Direct+Wigner-D+Harmonics+Prediction&hl=en&as_sdt=0,10", "gs_version_total": 6, "email": "postech.ac.kr;postech.ac.kr", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Pohang University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.postech.ac.kr", "aff_unique_abbr": "POSTECH", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Pohang", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "title": "Nonlinear dynamics of localization in neural receptive fields", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93650", "id": "nw9JmfL99s", "proceeding": "", "pdf": "https://openreview.net/pdf?id=nw9JmfL99s", "openreview": "https://openreview.net/forum?id=nw9JmfL99s", "poster": "", "project": "", "author_site": "Leon Lufkin, Andrew Saxe, Erin Grant", "tldr": "", "abstract": "Localized receptive fields\u2014neurons that are selective for certain contiguous spatiotemporal features of their input\u2014populate early sensory regions of the mammalian brain. Unsupervised learning algorithms that optimize explicit sparsity or independence criteria replicate features of these localized receptive fields, but fail to explain directly how localization arises through learning without efficient coding, as occurs in early layers of deep neural networks and might occur in early sensory regions of biological systems. We consider an alternative model in which localized receptive fields emerge without explicit top-down efficiency constraints\u2014a feed-forward neural network trained on a data model inspired by the structure of natural images. Previous work identified the importance of non-Gaussian statistics to localization in this setting but left open questions about the mechanisms driving dynamical emergence. We address these questions by deriving the effective learning dynamics for a single nonlinear neuron, making precise how higher-order statistical properties of the input data drive emergent localization, and we demonstrate that the predictions of these effective dynamics extend to the many-neuron setting. Our analysis provides an alternative explanation for the ubiquity of localization as resulting from the nonlinear dynamics of learning in neural circuits", "keywords": "localization;receptive fields;learning dynamics;emergence", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "", "author": "Leon Lufkin;Andrew M Saxe;Erin Grant", "authorids": "~Leon_Lufkin1;~Andrew_M_Saxe1;~Erin_Grant1", "gender": ";M;F", "homepage": ";https://www.saxelab.org;https://eringrant.github.io/", "dblp": ";39/6894;169/3175", "google_scholar": ";h0Al1fcAAAAJ;OSg3D9MAAAAJ", "orcid": ";0000-0002-9831-8812;0009-0002-8623-7254", "linkedin": ";;eringrant914", "or_profile": "~Leon_Lufkin1;~Andrew_M_Saxe1;~Erin_Grant1", "aff": ";University College London, University of London;University College London", "aff_domain": ";ucl.ac.uk;ucl.ac.uk", "position": ";Full Professor;Postdoc", "bibtex": "@inproceedings{\nlufkin2024nonlinear,\ntitle={Nonlinear dynamics of localization in neural receptive fields},\nauthor={Leon Lufkin and Andrew M Saxe and Erin Grant},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=nw9JmfL99s}\n}", "github": "", "reviewers": "PEfQ;iKD4;TcXg", "pdf_size": 4099486, "rating": "7;7;8", "confidence": "4;3;4", "soundness": "2;4;4", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "53;45;282", "wc_strengths": "53;88;54", "wc_weaknesses": "230;128;78", "wc_questions": "189;17;111", "wc_limitations": "21;7;1", "wc_review": "546;285;526", "wc_reply_reviewers": "36;35;11", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 7.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.9428090415820634 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 126.66666666666667, 109.88579930495517 ], "wc_strengths_avg": [ 65.0, 16.268579122549905 ], "wc_weaknesses_avg": [ 145.33333333333334, 63.252580096695574 ], "wc_questions_avg": [ 105.66666666666667, 70.31990393116938 ], "wc_limitations_avg": [ 9.666666666666666, 8.379870059984357 ], "wc_review_avg": [ 452.3333333333333, 118.60391599306024 ], "wc_reply_reviewers_avg": [ 27.333333333333332, 11.55662388223981 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12015370223028150865&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": ";ucl.ac.uk;ucl.ac.uk", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "University College London", "aff_unique_dep": "", "aff_unique_url": "https://www.ucl.ac.uk", "aff_unique_abbr": "UCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "title": "Model LEGO: Creating Models Like Disassembling and Assembling Building Blocks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93649", "id": "nxL7eazKBI", "proceeding": "", "pdf": "https://openreview.net/pdf?id=nxL7eazKBI", "openreview": "https://openreview.net/forum?id=nxL7eazKBI", "poster": "/media/PosterPDFs/NeurIPS%202024/93649.png?t=1732105538.3943534", "project": "", "author_site": "Jiacong Hu, Jing Gao, Jingwen Ye, Yang Gao, Xingen Wang, Zunlei Feng, Mingli Song", "tldr": "", "abstract": "With the rapid development of deep learning, the increasing complexity and scale of parameters make training a new model increasingly resource-intensive. In this paper, we start from the classic convolutional neural network (CNN) and explore a paradigm that does not require training to obtain new models. Similar to the birth of CNN inspired by receptive fields in the biological visual system, we draw inspiration from the information subsystem pathways in the biological visual system and propose Model Disassembling and Assembling (MDA). During model disassembling, we introduce the concept of relative contribution and propose a component locating technique to extract task-aware components from trained CNN classifiers. For model assembling, we present the alignment padding strategy and parameter scaling strategy to construct a new model tailored for a specific task, utilizing the disassembled task-aware components.\nThe entire process is akin to playing with LEGO bricks, enabling arbitrary assembly of new models, and providing a novel perspective for model creation and reuse. Extensive experiments showcase that task-aware components disassembled from CNN classifiers or new models assembled using these components closely match or even surpass the performance of the baseline,\ndemonstrating its promising results for model reuse. Furthermore, MDA exhibits diverse potential applications, with comprehensive experiments exploring model decision route analysis, model compression, knowledge distillation, and more.", "keywords": "Disassembling Models;Assembling Models;Model Reuse;Model Interpretability", "primary_area": "machine_vision", "supplementary_material": "/attachment/ca84f5e5a96d15e1f630ef96580b6e4fa212d745.zip", "author": "Jiacong Hu;Jing Gao;Jingwen Ye;Yang Gao;Xingen Wang;Zunlei Feng;Mingli Song", "authorids": "~Jiacong_Hu1;~Jing_Gao4;~Jingwen_Ye1;~Yang_Gao21;~Xingen_Wang1;~Zunlei_Feng1;~Mingli_Song1", "gender": "M;F;F;M;M;M;M", "homepage": "https://jiaconghu.com;https://github.com/Jing-G2;https://jngwenye.github.io/;;https://person.zju.edu.cn/xingenwang;https://person.zju.edu.cn/en/zunleifeng;https://person.zju.edu.cn/msong", "dblp": "136/3061;;200/7853;;;191/2455;71/5333", "google_scholar": ";;8GQnNP0AAAAJ;;S8C1Y0gAAAAJ;wMtjcGwAAAAJ;7oLbhAwAAAAJ", "orcid": ";;;0000-0003-2635-1434;;;0000-0003-2621-6048", "linkedin": ";;;;;;", "or_profile": "~Jiacong_Hu1;~Jing_Gao4;~Jingwen_Ye1;~Yang_Gao21;~Xingen_Wang1;~Zunlei_Feng1;~Mingli_Song1", "aff": "Zhejiang University;Carnegie Mellon University;National University of Singapore;College of Computer Science and Technology, Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;cmu.edu;nus.edu.sg;cs.zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn", "position": "PhD student;MS student;Postdoc;PhD student;Researcher;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nhu2024model,\ntitle={Model {LEGO}: Creating Models Like Disassembling and Assembling Building Blocks},\nauthor={Jiacong Hu and Jing Gao and Jingwen Ye and Yang Gao and Xingen Wang and Zunlei Feng and Mingli Song},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=nxL7eazKBI}\n}", "github": "", "reviewers": "UTXG;17d9;gfoc;FhUd", "pdf_size": 5024300, "rating": "2;7;7;7", "confidence": "3;5;5;4", "soundness": "2;4;4;4", "novelty": "2;3;4;4", "presentation": "2;3;3;3", "wc_summary": "55;76;135;68", "wc_strengths": "19;59;183;58", "wc_weaknesses": "126;39;163;84", "wc_questions": "25;5;112;2", "wc_limitations": "37;11;18;7", "wc_review": "262;190;611;219", "wc_reply_reviewers": "270;11;92;0", "wc_reply_authors": "1707;21;555;0", "reply_reviewers": "1;1;1;0", "reply_authors": "5;2;3;1", "rating_avg": [ 5.75, 2.165063509461097 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.5, 0.8660254037844386 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 83.5, 30.663496212923928 ], "wc_strengths_avg": [ 79.75, 61.755060521385616 ], "wc_weaknesses_avg": [ 103.0, 46.330335634441504 ], "wc_questions_avg": [ 36.0, 44.760473634670134 ], "wc_limitations_avg": [ 18.25, 11.519006033508273 ], "wc_review_avg": [ 320.5, 169.6651113222751 ], "wc_reply_reviewers_avg": [ 93.25, 108.05409524862998 ], "wc_reply_authors_avg": [ 570.75, 692.6927078438173 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 1.479019945774904 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:dclk0QUnQ9AJ:scholar.google.com/&scioq=Model+LEGO:+Creating+Models+Like+Disassembling+and+Assembling+Building+Blocks&hl=en&as_sdt=0,44", "gs_version_total": 3, "email": "zju.edu.cn;cmu.edu;nus.edu.sg;cs.zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn", "author_num": 7, "aff_unique_index": "0;1;2;0;0;0;0", "aff_unique_norm": "Zhejiang University;Carnegie Mellon University;National University of Singapore", "aff_unique_dep": ";;", "aff_unique_url": "https://www.zju.edu.cn;https://www.cmu.edu;https://www.nus.edu.sg", "aff_unique_abbr": "ZJU;CMU;NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;0;0;0;0", "aff_country_unique": "China;United States;Singapore" }, { "title": "If You Want to Be Robust, Be Wary of Initialization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93648", "id": "nxumYwxJPB", "proceeding": "", "pdf": "https://openreview.net/pdf?id=nxumYwxJPB", "openreview": "https://openreview.net/forum?id=nxumYwxJPB", "poster": "", "project": "", "author_site": "Sofiane ENNADIR, Johannes Lutzeyer, Michalis Vazirgiannis, El Houcine Bergou", "tldr": "", "abstract": "Graph Neural Networks (GNNs) have demonstrated remarkable performance across a spectrum of graph-related tasks, however concerns persist regarding their vulnerability to adversarial perturbations. While prevailing defense strategies focus primarily on pre-processing techniques and adaptive message-passing schemes, this study delves into an under-explored dimension: the impact of weight initialization and associated hyper-parameters, such as training epochs, on a model\u2019s robustness.\nWe introduce a theoretical framework bridging the connection between initialization strategies and a network's resilience to adversarial perturbations. Our analysis reveals a direct relationship between initial weights, number of training epochs and the model\u2019s vulnerability, offering new insights into adversarial robustness beyond conventional defense mechanisms. While our primary focus is on GNNs, we extend our theoretical framework, providing a general upper-bound applicable to Deep Neural Networks.\nExtensive experiments, spanning diverse models and real-world datasets subjected to various adversarial attacks, validate our findings. We illustrate that selecting appropriate initialization not only ensures performance on clean datasets but also enhances model robustness against adversarial perturbations, with observed gaps of up to 50\\% compared to alternative initialization approaches.", "keywords": "Adversarial Robustness;Graph Neural Networks", "primary_area": "graph_neural_networks", "supplementary_material": "/attachment/63ca99e1eaca651f9ceb0c2bc40bc716fbbc49da.zip", "author": "Sofiane ENNADIR;Johannes F. Lutzeyer;Michalis Vazirgiannis;El houcine Bergou", "authorids": "~Sofiane_ENNADIR1;~Johannes_F._Lutzeyer1;~Michalis_Vazirgiannis1;~El_houcine_Bergou1", "gender": "M;M;M;M", "homepage": "https://sennadir.github.io/;https://johanneslutzeyer.com/;;https://ecrc.kaust.edu.sa/Pages/Bergou.aspx", "dblp": "355/0360;253/8868;v/MVazirgiannis;https://dblp.uni-trier.de/pers/b/Bergou:El_Houcine.html", "google_scholar": "https://scholar.google.fr/citations?user=EqqOxfMAAAAJ;OfT4ns8AAAAJ;https://scholar.google.gr/citations?user=aWGJYcMAAAAJ;", "orcid": "0000-0001-9969-4660;;;", "linkedin": "sofiane-ennadir/;johannes-lutzeyer-213b7480/;;ehbergou/", "or_profile": "~Sofiane_ENNADIR1;~Johannes_F._Lutzeyer1;~Michalis_Vazirgiannis1;~El_houcine_Bergou1", "aff": "KTH Royal Institute of Technology;Ecole Polytechique;Ecole Polytechnique, France;", "aff_domain": "kth.se;polytechnique.edu;polytechnique.fr;", "position": "PhD student;Assistant Professor;Full Professor;", "bibtex": "@inproceedings{\nennadir2024if,\ntitle={If You Want to Be Robust, Be Wary of Initialization},\nauthor={Sofiane ENNADIR and Johannes F. Lutzeyer and Michalis Vazirgiannis and El houcine Bergou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=nxumYwxJPB}\n}", "github": "", "reviewers": "A9tB;ZfjT;bCNj;7mx1", "pdf_size": 675861, "rating": "5;6;6;7", "confidence": "3;5;3;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;4;3;3", "wc_summary": "33;85;98;164", "wc_strengths": "28;57;129;83", "wc_weaknesses": "267;265;121;197", "wc_questions": "4;34;45;178", "wc_limitations": "1;8;60;9", "wc_review": "333;449;453;631", "wc_reply_reviewers": "138;115;21;43", "wc_reply_authors": "63;48;29;29", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 95.0, 46.674404120459855 ], "wc_strengths_avg": [ 74.25, 37.1172129880464 ], "wc_weaknesses_avg": [ 212.5, 59.87278179607158 ], "wc_questions_avg": [ 65.25, 66.80334946692419 ], "wc_limitations_avg": [ 19.5, 23.58495283014151 ], "wc_review_avg": [ 466.5, 106.50234739197066 ], "wc_reply_reviewers_avg": [ 79.25, 48.5714679621689 ], "wc_reply_authors_avg": [ 42.25, 14.271912976192084 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.4264014327112209, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13707654749381445064&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 0, "email": "kth.se;polytechnique.edu;polytechnique.fr;", "author_num": 4, "aff_unique_index": "0;1;1", "aff_unique_norm": "KTH Royal Institute of Technology;Ecole Polytechnique", "aff_unique_dep": ";", "aff_unique_url": "https://www.kth.se;https://www.polytechnique.edu", "aff_unique_abbr": "KTH;Polytechnique", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Sweden;France" }, { "title": "Is Value Learning Really the Main Bottleneck in Offline RL?", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93647", "id": "nyp59a31Ju", "proceeding": "", "pdf": "https://openreview.net/pdf?id=nyp59a31Ju", "openreview": "https://openreview.net/forum?id=nyp59a31Ju", "poster": "/media/PosterPDFs/NeurIPS%202024/93647.png?t=1730669307.444786", "project": "", "author_site": "Seohong Park, Kevin Frans, Sergey Levine, Aviral Kumar", "tldr": "", "abstract": "While imitation learning requires access to high-quality data, offline reinforcement learning (RL) should, in principle, perform similarly or better with substantially lower data quality by using a value function. However, current results indicate that offline RL often performs worse than imitation learning, and it is often unclear what holds back the performance of offline RL. Motivated by this observation, we aim to understand the bottlenecks in current offline RL algorithms. While poor performance of offline RL is typically attributed to an imperfect value function, we ask: *is the main bottleneck of offline RL indeed in learning the value function, or something else?* To answer this question, we perform a systematic empirical study of (1) value learning, (2) policy extraction, and (3) policy generalization in offline RL problems, analyzing how these components affect performance. We make two surprising observations. First, we find that the choice of a policy extraction algorithm significantly affects the performance and scalability of offline RL, often more so than the value learning objective. For instance, we show that common value-weighted behavioral cloning objectives (e.g., AWR) do not fully leverage the learned value function, and switching to behavior-constrained policy gradient objectives (e.g., DDPG+BC) often leads to substantial improvements in performance and scalability. Second, we find that a big barrier to improving offline RL performance is often imperfect policy generalization on test-time states out of the support of the training data, rather than policy learning on in-distribution states. We then show that the use of suboptimal but high-coverage data or test-time policy training techniques can address this generalization issue in practice. Specifically, we propose two simple test-time policy improvement methods and show that these methods lead to better performance.", "keywords": "reinforcement learning;offline reinforcement learning", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/86596bf90003909755701437b2fe331bf6466609.zip", "author": "Seohong Park;Kevin Frans;Sergey Levine;Aviral Kumar", "authorids": "~Seohong_Park1;~Kevin_Frans1;~Sergey_Levine1;~Aviral_Kumar2", "gender": ";M;M;M", "homepage": "https://seohong.me/;http://kvfrans.com;https://people.eecs.berkeley.edu/~svlevine/;https://aviralkumar2907.github.io/", "dblp": "227/6308;199/2314;80/7594;202/7961", "google_scholar": ";NQ2ZWBoAAAAJ;8R35rCwAAAAJ;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Seohong_Park1;~Kevin_Frans1;~Sergey_Levine1;~Aviral_Kumar2", "aff": "University of California, Berkeley;University of California, Berkeley;Google;Google DeepMind", "aff_domain": "berkeley.edu;berkeley.edu;google.com;google.com", "position": "PhD student;PhD student;Research Scientist;Researcher", "bibtex": "@inproceedings{\npark2024is,\ntitle={Is Value Learning Really the Main Bottleneck in Offline {RL}?},\nauthor={Seohong Park and Kevin Frans and Sergey Levine and Aviral Kumar},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=nyp59a31Ju}\n}", "github": "", "reviewers": "C9VZ;VnDz;Bu3n;BVrx", "pdf_size": 1817818, "rating": "6;7;7;8", "confidence": "4;4;4;4", "soundness": "2;3;3;4", "novelty": "2;3;3;3", "presentation": "2;4;3;4", "wc_summary": "85;52;79;113", "wc_strengths": "118;90;92;86", "wc_weaknesses": "395;223;256;57", "wc_questions": "2;137;56;31", "wc_limitations": "15;10;7;9", "wc_review": "615;512;490;296", "wc_reply_reviewers": "239;17;24;0", "wc_reply_authors": "443;27;22;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 82.25, 21.672274915199836 ], "wc_strengths_avg": [ 96.5, 12.599603168354152 ], "wc_weaknesses_avg": [ 232.75, 120.25883543424159 ], "wc_questions_avg": [ 56.5, 50.25186563700894 ], "wc_limitations_avg": [ 10.25, 2.947456530637899 ], "wc_review_avg": [ 478.25, 115.31776749486612 ], "wc_reply_reviewers_avg": [ 70.0, 97.96172722037929 ], "wc_reply_authors_avg": [ 123.0, 185.031078470618 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2206218407171575889&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "berkeley.edu;berkeley.edu;google.com;google.com", "author_num": 4, "aff_unique_index": "0;0;1;1", "aff_unique_norm": "University of California, Berkeley;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.berkeley.edu;https://www.google.com", "aff_unique_abbr": "UC Berkeley;Google", "aff_campus_unique_index": "0;0;1", "aff_campus_unique": "Berkeley;Mountain View;", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "United States;United Kingdom" }, { "id": "nzzxLPJENZ", "title": "Efficient Evaluation of LLMs via Branching Preference Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Large language models (LLMs) have made significant advances across various generative tasks, progressing toward achieving near-human levels of intelligence. However, in many scenarios, LLMs face the challenge of insufficient human evaluation or even the inability to evaluate reliably. Particularly, in complex dialogue scenarios involving diverse and intricate user intents, LLMs as evaluators of AI responses exhibit a substantial gap compared to humans. Moreover, due to the scarcity of high-quality evaluation data, LLMs exhibit deficiencies in their evaluation capabilities. In this work, we conceptualize the evaluation process as a decision tree, where each node represents an evaluation action, and each path from the root to a leaf node represents a trajectory of evaluation reasoning. We demonstrate that within a limited search space, there exist better decision-making behaviors that facilitate the model in making reasonable and accurate judgments. Specifically, we propose a tree-based data sampling method to generate supervised data and preference pairs derived from the evaluation tree. Furthermore, we introduce preference learning based on the DPO algorithm, which empowers the fine-grained evaluation model to explore and learn better branching strategies within budget-limited scenarios. Our model significantly reduces the dependency on labeled data and demonstrates strong performance across three different evaluation settings: in-distribution, out-of-distribution, and transfer evaluation. Experiments indicate that our model can reduce inference costs by 90\\% compared to conducting searches across the entire evaluation tree, thereby significantly enhancing efficiency.", "keywords": "LLMs; dialogue evaluation\uff1bRLHF", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/4a002888c111e02a295d45bc74542d54ff8e171f.zip", "author": "xiaobo liang;Haoke Zhang;Juntao Li;Qiaoming Zhu;Min Zhang", "authorids": "~xiaobo_liang1;~Haoke_Zhang1;~Juntao_Li2;~Qiaoming_Zhu1;~Min_Zhang9", "gender": "M;M;M;M;M", "homepage": ";https://banner-z.github.io/;https://lijuntaopku.github.io/;https://scst.suda.edu.cn/0f/a2/c11250a528290/page.htm;https://zhangmin-nlp-ai.github.io/", "dblp": ";;;28/1279;83/5342-5", "google_scholar": ";;sZSygsYAAAAJ;6BXGJK8AAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": "0009-0001-1550-2877;;0000-0002-6286-7529;0000-0002-2708-8976;", "linkedin": ";;;;", "or_profile": "~xiaobo_liang1;~Haoke_Zhang1;~Juntao_Li2;~Qiaoming_Zhu1;~Min_Zhang9", "aff": "Soochow University, China;Baidu;Soochow University, China;Soochow University;Harbin Institute of Technology, Shenzhen", "aff_domain": "suda.edu.cn;baidu.com;suda.edu.cn;suda.edu.cn;hit.edu.cn", "position": "PhD student;Intern;Associate Professor;Full Professor;Full Professor", "bibtex": "@misc{\nanonymous2024efficient,\ntitle={Efficient Evaluation of {LLM}s via Branching Preference Learning},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=nzzxLPJENZ}\n}", "github": "", "project": "", "reviewers": "RFGW;GhxJ;cKwh;sKgh;YdUv", "site": "https://openreview.net/forum?id=nzzxLPJENZ", "pdf_size": 2191427, "rating": "4;4;4;6;6", "confidence": "5;3;2;3;2", "soundness": "2;3;2;4;3", "novelty": "2;2;2;3;3", "presentation": "1;3;2;4;3", "wc_summary": "60;95;84;80;101", "wc_strengths": "11;74;62;40;27", "wc_weaknesses": "141;122;111;90;65", "wc_questions": "42;36;66;255;41", "wc_limitations": "1;1;36;1;6", "wc_review": "255;328;359;466;240", "wc_reply_reviewers": "156;96;215;0;23", "wc_reply_authors": "629;650;731;0;0", "reply_reviewers": "2;1;1;0;1", "reply_authors": "2;2;2;1;1", "rating_avg": [ 4.8, 0.9797958971132712 ], "confidence_avg": [ 3.0, 1.0954451150103321 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.6, 1.019803902718557 ], "wc_summary_avg": [ 84.0, 14.15627069534911 ], "wc_strengths_avg": [ 42.8, 22.850820554194545 ], "wc_weaknesses_avg": [ 105.8, 26.24042682579687 ], "wc_questions_avg": [ 88.0, 84.14511275172195 ], "wc_limitations_avg": [ 9.0, 13.638181696985855 ], "wc_review_avg": [ 329.6, 81.32060009616261 ], "wc_reply_reviewers_avg": [ 98.0, 80.35670476071054 ], "wc_reply_authors_avg": [ 402.0, 329.9945454094658 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.372677996249965, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:AKf2CqXlCyAJ:scholar.google.com/&scioq=Efficient+Evaluation+of+LLMs+via+Branching+Preference+Learning&hl=en&as_sdt=0,3", "gs_version_total": 0, "aff_unique_index": "0;1;0;0;2", "aff_unique_norm": "Soochow University;Baidu;Harbin Institute of Technology", "aff_unique_dep": ";Baidu, Inc.;", "aff_unique_url": "https://www.soochow.edu.cn;https://www.baidu.com;http://en.hhit.edu.cn/", "aff_unique_abbr": "Soochow U;Baidu;HIT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Provable Partially Observable Reinforcement Learning with Privileged Information", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93646", "id": "o3i1JEfzKw", "proceeding": "", "pdf": "https://openreview.net/pdf?id=o3i1JEfzKw", "openreview": "https://openreview.net/forum?id=o3i1JEfzKw", "poster": "", "project": "", "author_site": "Yang Cai, Xiangyu Liu, Argyris Oikonomou, Kaiqing Zhang", "tldr": "", "abstract": "Partial observability of the underlying states generally presents significant challenges for reinforcement learning (RL). In practice, certain *privileged information* , e.g., the access to states from simulators, has been exploited in training and achieved prominent empirical successes. To better understand the benefits of privileged information, we revisit and examine several simple and practically used paradigms in this setting, with both computation and sample efficiency analyses. Specifically, we first formalize the empirical paradigm of *expert distillation* (also known as *teacher-student* learning), demonstrating its pitfall in finding near-optimal policies. We then identify a condition of the partially observable environment, the deterministic filter condition, under which expert distillation achieves sample and computational complexities that are *both* polynomial. Furthermore, we investigate another successful empirical paradigm of *asymmetric actor-critic*, and focus on the more challenging setting of observable partially observable Markov decision processes. We develop a belief-weighted optimistic asymmetric actor-critic algorithm with polynomial sample and quasi-polynomial computational complexities, where one key component is a new provable oracle for learning belief states that preserve *filter stability* under a misspecified model, which may be of independent interest. Finally, we also investigate the provable efficiency of partially observable multi-agent RL (MARL) with privileged information. We develop algorithms with the feature of centralized-training-with-decentralized-execution, a popular framework in empirical MARL, with polynomial sample and (quasi-)polynomial computational complexity in both paradigms above. Compared with a few recent related theoretical studies, our focus is on understanding practically inspired algorithmic paradigms, without computationally intractable oracles.", "keywords": "reinforcement learning;pomdp;partial observability;computational;privileged information;expert distillation;teacher-student learning", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/6cc718938c2bc8c0e1bacc884fc4063c546cde03.zip", "author": "Yang Cai;Xiangyu Liu;Argyris Oikonomou;Kaiqing Zhang", "authorids": "~Yang_Cai1;~Xiangyu_Liu4;~Argyris_Oikonomou1;~Kaiqing_Zhang3", "gender": ";M;;", "homepage": ";https://xiangyu-liu.github.io;http://aroikonomou.github.io;", "dblp": ";;254/2082;", "google_scholar": ";;;", "orcid": ";;0000-0002-6456-0109;", "linkedin": ";;;", "or_profile": "~Yang_Cai1;~Xiangyu_Liu4;~Argyris_Oikonomou1;~Kaiqing_Zhang3", "aff": ";University of Maryland, College Park;Meta;", "aff_domain": ";umd.edu;meta.com;", "position": ";PhD student;Intern;", "bibtex": "@inproceedings{\ncai2024provable,\ntitle={Provable Partially Observable Reinforcement Learning with Privileged Information},\nauthor={Yang Cai and Xiangyu Liu and Argyris Oikonomou and Kaiqing Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=o3i1JEfzKw}\n}", "github": "", "reviewers": "AXXx;tn5m;zFhw;9C6t", "pdf_size": 964716, "rating": "4;5;5;6", "confidence": "2;3;4;3", "soundness": "3;3;2;4", "novelty": "3;2;2;3", "presentation": "1;3;1;3", "wc_summary": "124;60;55;54", "wc_strengths": "58;74;32;44", "wc_weaknesses": "119;223;382;57", "wc_questions": "111;20;42;17", "wc_limitations": "15;5;13;28", "wc_review": "427;382;524;200", "wc_reply_reviewers": "60;14;212;56", "wc_reply_authors": "1122;167;1514;531", "reply_reviewers": "1;1;2;1", "reply_authors": "4;2;5;3", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.0, 1.0 ], "wc_summary_avg": [ 73.25, 29.38856069970083 ], "wc_strengths_avg": [ 52.0, 15.684387141358123 ], "wc_weaknesses_avg": [ 195.25, 123.05765924963794 ], "wc_questions_avg": [ 47.5, 37.911080174534725 ], "wc_limitations_avg": [ 15.25, 8.257572282456872 ], "wc_review_avg": [ 383.25, 117.58693592402176 ], "wc_reply_reviewers_avg": [ 85.5, 75.224663508719 ], "wc_reply_authors_avg": [ 833.5, 520.1059988117806 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.5, 1.118033988749895 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13124471719404533736&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": ";umd.edu;meta.com;", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "University of Maryland;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://www/umd.edu;https://meta.com", "aff_unique_abbr": "UMD;Meta", "aff_campus_unique_index": "0", "aff_campus_unique": "College Park;", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Measuring Goal-Directedness", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93645", "id": "o4coDIby7e", "proceeding": "", "pdf": "https://openreview.net/pdf?id=o4coDIby7e", "openreview": "https://openreview.net/forum?id=o4coDIby7e", "poster": "/media/PosterPDFs/NeurIPS%202024/93645.png?t=1731489378.261841", "project": "", "author_site": "Matt MacDermott, James Fox, Francesco Belardinelli, Tom Everitt", "tldr": "", "abstract": "We define maximum entropy goal-directedness (MEG), a formal measure of goal-\ndirectedness in causal models and Markov decision processes, and give algorithms\nfor computing it. Measuring goal-directedness is important, as it is a critical\nelement of many concerns about harm from AI. It is also of philosophical interest,\nas goal-directedness is a key aspect of agency. MEG is based on an adaptation of\nthe maximum causal entropy framework used in inverse reinforcement learning. It\ncan measure goal-directedness with respect to a known utility function, a hypothesis\nclass of utility functions, or a set of random variables. We prove that MEG satisfies\nseveral desiderata and demonstrate our algorithms with small-scale experiments.", "keywords": "Causality;Graphical Models;Maximum Causal Entropy;Agency", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Matt MacDermott;James Fox;Francesco Belardinelli;Tom Everitt", "authorids": "~Matt_MacDermott1;~James_Fox2;~Francesco_Belardinelli1;~Tom_Everitt1", "gender": "M;M;M;M", "homepage": "https://mattmacdermott.com/;;https://www.doc.ic.ac.uk/~fbelard/;https://tomeveritt.se", "dblp": "326/8184;;59/2916;151/4259", "google_scholar": ";hMZs5tsAAAAJ;https://scholar.google.fr/citations?user=Mr35r1EAAAAJ;https://scholar.google.com.au/citations?user=BdulyjIAAAAJ", "orcid": "0000-0003-1269-9253;0000-0003-1485-2995;0000-0002-7768-1794;0000-0003-1210-9866", "linkedin": ";;;", "or_profile": "~Matt_MacDermott1;~James_Fox2;~Francesco_Belardinelli1;~Tom_Everitt1", "aff": "Imperial College London;Department of Computer Science;Imperial College London;Google DeepMind", "aff_domain": "imperial.ac.uk;cs.ox.ac.uk;imperial.ac.uk;google.com", "position": "PhD student;PhD student;Lecturer;Researcher", "bibtex": "@inproceedings{\nmacdermott2024measuring,\ntitle={Measuring Goal-Directedness},\nauthor={Matt MacDermott and James Fox and Francesco Belardinelli and Tom Everitt},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=o4coDIby7e}\n}", "github": "", "reviewers": "Kmdz;AseF;8tgY;8w2P", "pdf_size": 502841, "rating": "6;6;7;8", "confidence": "3;2;3;3", "soundness": "4;3;4;3", "novelty": "2;3;4;4", "presentation": "3;3;4;2", "wc_summary": "97;92;142;291", "wc_strengths": "87;80;59;229", "wc_weaknesses": "173;63;48;1412", "wc_questions": "16;127;180;564", "wc_limitations": "1;3;4;190", "wc_review": "374;365;433;2686", "wc_reply_reviewers": "86;188;63;426", "wc_reply_authors": "42;0;18;70", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;2;3", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 155.5, 80.61792604625848 ], "wc_strengths_avg": [ 113.75, 67.33266295045816 ], "wc_weaknesses_avg": [ 424.0, 572.4600422736944 ], "wc_questions_avg": [ 221.75, 206.26969602925195 ], "wc_limitations_avg": [ 49.5, 81.12490369793976 ], "wc_review_avg": [ 964.5, 994.2516029657684 ], "wc_reply_reviewers_avg": [ 190.75, 143.73826039019673 ], "wc_reply_authors_avg": [ 32.5, 26.28212320190285 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12215760501274902190&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "imperial.ac.uk;cs.ox.ac.uk;imperial.ac.uk;google.com", "author_num": 4, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Imperial College London;Unknown Institution;Google", "aff_unique_dep": ";Department of Computer Science;Google DeepMind", "aff_unique_url": "https://www.imperial.ac.uk;;https://deepmind.com", "aff_unique_abbr": "ICL;;DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom;" }, { "title": "Constrained Sampling with Primal-Dual Langevin Monte Carlo", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93644", "id": "o6Hk6vld20", "proceeding": "", "pdf": "https://openreview.net/pdf?id=o6Hk6vld20", "openreview": "https://openreview.net/forum?id=o6Hk6vld20", "poster": "/media/PosterPDFs/NeurIPS%202024/93644.png?t=1732123094.7719014", "project": "", "author_site": "Luiz F. O. Chamon, Mohammad Reza Karimi Jaghargh, Anna Korba", "tldr": "", "abstract": "This work considers the problem of sampling from a probability distribution known up to a normalization constant while satisfying a set of statistical constraints specified by the expected values of general nonlinear functions. This problem finds applications in, e.g., Bayesian inference, where it can constrain moments to evaluate counterfactual scenarios or enforce desiderata such as prediction fairness. Methods developed to handle support constraints, such as those based on mirror maps, barriers, and penalties, are not suited for this task. This work therefore relies on gradient descent-ascent dynamics in Wasserstein space to put forward a discrete-time primal-dual Langevin Monte Carlo algorithm (PD-LMC) that simultaneously constrains the target distribution and samples from it. We analyze the convergence of PD-LMC under standard assumptions on the target distribution and constraints, namely (strong) convexity and log-Sobolev inequalities. To do so, we bring classical optimization arguments for saddle-point algorithms to the geometry of Wasserstein space. We illustrate the relevance and effectiveness of PD-LMC in several applications.", "keywords": "Sampling;optimization;constrained optimization;duality", "primary_area": "optimization", "supplementary_material": "", "author": "Luiz F. O. Chamon;Mohammad Reza Karimi Jaghargh;Anna Korba", "authorids": "~Luiz_F._O._Chamon1;~Mohammad_Reza_Karimi_Jaghargh1;~Anna_Korba2", "gender": "M;M;", "homepage": "https://www.luizchamon.com;http://moreka.github.io;", "dblp": "120/6982;https://dblp.uni-trier.de/pers/hd/k/Karimi:Mohammad_Reza;182/8959.html", "google_scholar": "https://scholar.google.ca/citations?user=FIm-l-sAAAAJ;CEZbTgMAAAAJ;https://scholar.google.fr/citations?user=dbH6E3kAAAAJ", "orcid": "0000-0001-7731-6650;;", "linkedin": "luiz-chamon-abb07a18;;", "or_profile": "~Luiz_F._O._Chamon1;~Mohammad_Reza_Karimi_Jaghargh1;~Anna_Korba2", "aff": "Universit\u00e4t Stuttgart;Swiss Federal Institute of Technology;Ensae ParisTech", "aff_domain": "uni-stuttgart.de;ethz.ch;ensae.fr", "position": "Principal Researcher;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nchamon2024constrained,\ntitle={Constrained Sampling with Primal-Dual Langevin Monte Carlo},\nauthor={Luiz F. O. Chamon and Mohammad Reza Karimi Jaghargh and Anna Korba},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=o6Hk6vld20}\n}", "github": "", "reviewers": "ZMkL;d9K2;4nfu", "pdf_size": 1688893, "rating": "5;5;7", "confidence": "3;4;5", "soundness": "2;3;3", "novelty": "2;2;3", "presentation": "3;3;4", "wc_summary": "35;85;82", "wc_strengths": "29;157;44", "wc_weaknesses": "205;87;49", "wc_questions": "4;210;78", "wc_limitations": "8;19;4", "wc_review": "281;558;257", "wc_reply_reviewers": "69;93;21", "wc_reply_authors": "0;490;0", "reply_reviewers": "1;2;1", "reply_authors": "1;2;1", "rating_avg": [ 5.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 67.33333333333333, 22.89589968143253 ], "wc_strengths_avg": [ 76.66666666666667, 57.13337222869154 ], "wc_weaknesses_avg": [ 113.66666666666667, 66.41954197038365 ], "wc_questions_avg": [ 97.33333333333333, 85.20302550705318 ], "wc_limitations_avg": [ 10.333333333333334, 6.342099196813483 ], "wc_review_avg": [ 365.3333333333333, 136.58778211180612 ], "wc_reply_reviewers_avg": [ 61.0, 29.93325909419153 ], "wc_reply_authors_avg": [ 163.33333333333334, 230.98821518760553 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ppZyZhHwbPYJ:scholar.google.com/&scioq=Constrained+Sampling+with+Primal-Dual+Langevin+Monte+Carlo&hl=en&as_sdt=0,23", "gs_version_total": 3, "email": "uni-stuttgart.de;ethz.ch;ensae.fr", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Stuttgart;Swiss Federal Institute of Technology;ENSAE ParisTech", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uni-stuttgart.de;https://www.ethz.ch;https://www.ensae.fr", "aff_unique_abbr": "Uni Stuttgart;ETH Zurich;Ensae", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2", "aff_country_unique": "Germany;Switzerland;France" }, { "title": "LookHere: Vision Transformers with Directed Attention Generalize and Extrapolate", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93643", "id": "o7DOGbZeyP", "proceeding": "", "pdf": "https://openreview.net/pdf?id=o7DOGbZeyP", "openreview": "https://openreview.net/forum?id=o7DOGbZeyP", "poster": "/media/PosterPDFs/NeurIPS%202024/93643.png?t=1731423166.0929098", "project": "", "author_site": "Anthony Fuller, Daniel Kyrollos, Yousef Yassin, James Green", "tldr": "", "abstract": "High-resolution images offer more information about scenes that can improve model accuracy. However, the dominant model architecture in computer vision, the vision transformer (ViT), cannot effectively leverage larger images without finetuning \u2014 ViTs poorly extrapolate to more patches at test time, although transformers offer sequence length flexibility. We attribute this shortcoming to the current patch position encoding methods, which create a distribution shift when extrapolating.\n\nWe propose a drop-in replacement for the position encoding of plain ViTs that restricts attention heads to fixed fields of view, pointed in different directions, using 2D attention masks. Our novel method, called LookHere, provides translation-equivariance, ensures attention head diversity, and limits the distribution shift that attention heads face when extrapolating. We demonstrate that LookHere improves performance on classification (avg. 1.6%), against adversarial attack (avg. 5.4%), and decreases calibration error (avg. 1.5%) \u2014 on ImageNet without extrapolation. With extrapolation, LookHere outperforms the current SoTA position encoding method, 2D-RoPE, by 21.7% on ImageNet when trained at $224^2$ px and tested at $1024^2$ px. Additionally, we release a high-resolution test set to improve the evaluation of high-resolution image classifiers, called ImageNet-HR.", "keywords": "vision transformers;position encoding;computer vision", "primary_area": "machine_vision", "supplementary_material": "/attachment/6e93b3c09d71bb24debf7a2a2ade81edd99a65fe.zip", "author": "Anthony Fuller;Daniel Kyrollos;Yousef Yassin;James R Green", "authorids": "~Anthony_Fuller1;~Daniel_Kyrollos1;~Yousef_Yassin1;~James_R_Green1", "gender": "M;M;M;M", "homepage": "https://antofuller.github.io/;;;http://www.sce.carleton.ca/faculty/green", "dblp": "329/0632;;;20/1472", "google_scholar": "https://scholar.google.ca/citations?user=9NvupxcAAAAJ;IyYsKTIAAAAJ;https://scholar.google.ca/citations?user=KJoJ8nMAAAAJ;https://scholar.google.ca/citations?user=nmxbwm4AAAAJ", "orcid": "0000-0001-8187-5850;;;0000-0002-6039-2355", "linkedin": ";;;", "or_profile": "~Anthony_Fuller1;~Daniel_Kyrollos1;~Yousef_Yassin1;~James_R_Green1", "aff": "Carleton University;Carleton University;Carleton University;Carleton University", "aff_domain": "carleton.ca;carleton.ca;carleton.ca;carleton.ca", "position": "PhD student;MS student;Undergrad student;Full Professor", "bibtex": "@inproceedings{\nfuller2024lookhere,\ntitle={LookHere: Vision Transformers with Directed Attention Generalize and Extrapolate},\nauthor={Anthony Fuller and Daniel Kyrollos and Yousef Yassin and James R Green},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=o7DOGbZeyP}\n}", "github": "", "reviewers": "U7cz;irRJ;Fbdg;cHUb", "pdf_size": 50608639, "rating": "6;7;7;8", "confidence": "4;5;5;3", "soundness": "3;4;3;3", "novelty": "2;4;3;4", "presentation": "3;4;2;4", "wc_summary": "86;53;61;77", "wc_strengths": "52;79;63;86", "wc_weaknesses": "101;66;140;68", "wc_questions": "34;3;5;27", "wc_limitations": "1;1;1;1", "wc_review": "274;202;270;259", "wc_reply_reviewers": "22;10;18;33", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 69.25, 12.968712349342937 ], "wc_strengths_avg": [ 70.0, 13.322912594474229 ], "wc_weaknesses_avg": [ 93.75, 30.102948360584218 ], "wc_questions_avg": [ 17.25, 13.497684986693088 ], "wc_limitations_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 251.25, 28.960101864461734 ], "wc_reply_reviewers_avg": [ 20.75, 8.287792227125388 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.4264014327112209, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:VLJlYdLUs3AJ:scholar.google.com/&scioq=LookHere:+Vision+Transformers+with+Directed+Attention+Generalize+and+Extrapolate&hl=en&as_sdt=0,33", "gs_version_total": 5, "email": "carleton.ca;carleton.ca;carleton.ca;carleton.ca", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Carleton University", "aff_unique_dep": "", "aff_unique_url": "https://carleton.ca", "aff_unique_abbr": "Carleton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Canada" }, { "title": "Code Repair with LLMs gives an Exploration-Exploitation Tradeoff", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93642", "id": "o863gX6DxA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=o863gX6DxA", "openreview": "https://openreview.net/forum?id=o863gX6DxA", "poster": "/media/PosterPDFs/NeurIPS%202024/93642.png?t=1733434998.1209776", "project": "", "author_site": "Hao Tang, Keya Hu, Jin Zhou, Si Cheng Zhong, Wei-Long Zheng, Xujie Si, Kevin Ellis", "tldr": "", "abstract": "Iteratively improving and repairing source code with large language models (LLMs), known as refinement, has emerged as a popular way of generating programs that would be too complex to construct in one shot. Given a bank of test cases, together with a candidate program, an LLM can improve that program by being prompted with failed test cases. But it remains an open question how to best iteratively refine code, with prior work employing simple greedy or breadth-first strategies. We show here that refinement exposes an explore-exploit tradeoff: exploit by refining the program that passes the most test cases, or explore by refining a lesser considered program. We frame this as an arm-acquiring bandit problem, which we solve with Thompson Sampling. The resulting LLM-based program synthesis algorithm is broadly applicable: Across loop invariant synthesis, visual reasoning puzzles, and competition programming problems, we find that our new method can solve more problems using fewer language model calls.", "keywords": "Program Synthesis;LLM", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Hao Tang;Keya Hu;Jin Peng Zhou;Si Cheng Zhong;Wei-Long Zheng;Xujie Si;Kevin Ellis", "authorids": "~Hao_Tang5;~Keya_Hu1;~Jin_Peng_Zhou1;~Si_Cheng_Zhong1;~Wei-Long_Zheng1;~Xujie_Si1;~Kevin_Ellis1", "gender": "M;F;M;;;M;M", "homepage": "https://haotang1995.github.io/;https://github.com/lillian039;;;https://weilongzheng.github.io/;https://xujie.si;https://www.cs.cornell.edu/~ellisk/", "dblp": "07/5751-8;;255/1107;;150/4150;142/8449;", "google_scholar": ";;Nf48jqcAAAAJ;;MZXXe8UAAAAJ;Ru-jrx4AAAAJ;L7XI6asAAAAJ", "orcid": ";;;;;;", "linkedin": ";;https://ca.linkedin.com/in/jinpeng-zhou;si-cheng-zhong/;;;", "or_profile": "~Hao_Tang5;~Keya_Hu1;~Jin_Peng_Zhou1;~Si_Cheng_Zhong1;~Wei-Long_Zheng1;~Xujie_Si1;~Kevin_Ellis1", "aff": "Cornell University;Shanghai Jiaotong University;Department of Computer Science, Cornell University;Department of Computer Science, University of Toronto;Shanghai Jiaotong University;University of Toronto;Cornell University", "aff_domain": "cornell.edu;sjtu.edu;cs.cornell.edu;cs.toronto.edu;sjtu.edu.cn;toronto.edu;cornell.edu", "position": "PhD student;Undergrad student;PhD student;MS student;Associate Professor;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\ntang2024code,\ntitle={Code Repair with {LLM}s gives an Exploration-Exploitation Tradeoff},\nauthor={Hao Tang and Keya Hu and Jin Peng Zhou and Si Cheng Zhong and Wei-Long Zheng and Xujie Si and Kevin Ellis},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=o863gX6DxA}\n}", "github": "", "reviewers": "UuYA;ikA5;rHDY;mETF", "pdf_size": 3032635, "rating": "5;5;6;7", "confidence": "4;3;4;4", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "3;2;3;4", "wc_summary": "93;140;83;49", "wc_strengths": "59;171;55;81", "wc_weaknesses": "131;413;553;319", "wc_questions": "10;352;8;152", "wc_limitations": "12;37;5;25", "wc_review": "305;1113;704;626", "wc_reply_reviewers": "56;126;20;10", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 91.25, 32.529794035622174 ], "wc_strengths_avg": [ 91.5, 46.9547654663507 ], "wc_weaknesses_avg": [ 354.0, 153.32644912082193 ], "wc_questions_avg": [ 130.5, 140.58004837102595 ], "wc_limitations_avg": [ 19.75, 12.275483697190918 ], "wc_review_avg": [ 687.0, 287.8411019990022 ], "wc_reply_reviewers_avg": [ 53.0, 45.48626166217664 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6663911409850052557&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "cornell.edu;sjtu.edu;cs.cornell.edu;cs.toronto.edu;sjtu.edu.cn;toronto.edu;cornell.edu", "author_num": 7, "aff_unique_index": "0;1;0;2;1;2;0", "aff_unique_norm": "Cornell University;Shanghai Jiao Tong University;University of Toronto", "aff_unique_dep": ";;Department of Computer Science", "aff_unique_url": "https://www.cornell.edu;https://www.sjtu.edu.cn;https://www.utoronto.ca", "aff_unique_abbr": "Cornell;SJTU;U of T", "aff_campus_unique_index": "1", "aff_campus_unique": ";Toronto", "aff_country_unique_index": "0;1;0;2;1;2;0", "aff_country_unique": "United States;China;Canada" }, { "title": "Attention Temperature Matters in ViT-Based Cross-Domain Few-Shot Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93641", "id": "o8m4RM5mBk", "proceeding": "", "pdf": "https://openreview.net/pdf?id=o8m4RM5mBk", "openreview": "https://openreview.net/forum?id=o8m4RM5mBk", "poster": "/media/PosterPDFs/NeurIPS%202024/93641.png?t=1731465203.3715699", "project": "", "author_site": "Yixiong Zou, Ran Ma, Yuhua Li, Ruixuan Li", "tldr": "", "abstract": "Cross-domain few-shot learning (CDFSL) is proposed to transfer knowledge from large-scale source-domain datasets to downstream target-domain datasets with only a few training samples. However, Vision Transformer (ViT), as a strong backbone network to achieve many top performances, is still under-explored in the CDFSL task in its transferability against large domain gaps. In this paper, we find an interesting phenomenon of ViT in the CDFSL task: by simply multiplying a temperature (even as small as 0) to the attention in ViT blocks, the target-domain performance consistently increases, even though the attention map is downgraded to a uniform map. In this paper, we delve into this phenomenon for an interpretation. Through experiments, we interpret this phenomenon as a remedy for the ineffective target-domain attention caused by the query-key attention mechanism under large domain gaps. Based on it, we further propose a simple but effective method for the CDFSL task to boost ViT's transferability by resisting the learning of query-key parameters and encouraging that of non-query-key ones. Experiments on four CDFSL datasets validate the rationale of our interpretation and method, showing we can consistently outperform state-of-the-art methods. Our codes are available at https://github.com/Zoilsen/Attn_Temp_CDFSL.", "keywords": "Cross-domain few-shot learning;Vision transformer;attention temperature", "primary_area": "machine_vision", "supplementary_material": "/attachment/29f4ec35094843b475df702d2304fe25208b5673.zip", "author": "Yixiong Zou;Ran Ma;Yuhua Li;Ruixuan Li", "authorids": "~Yixiong_Zou1;~Ran_Ma1;~Yuhua_Li2;~Ruixuan_Li1", "gender": ";F;F;M", "homepage": ";https://github.com/Pumpkinder;;http://idc.hust.edu.cn/rxli/index.html", "dblp": ";;79/5796-3;60/4429.html", "google_scholar": ";;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/scholar?q=ruixuan+li", "orcid": ";;;0000-0002-7791-5511", "linkedin": ";;;https://www.linkedin.cn/incareer/in/ruixuan-li-b367319", "or_profile": "~Yixiong_Zou1;~Ran_Ma1;~Yuhua_Li2;~Ruixuan_Li1", "aff": ";Huazhong University of Science and Technology;Huazhong University of Science and Technology;Huazhong University of Science and Technology", "aff_domain": ";hust.edu.cn;hust.edu.cn;hust.edu.cn", "position": ";MS student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nzou2024attention,\ntitle={Attention Temperature Matters in ViT-Based Cross-Domain Few-Shot Learning},\nauthor={Yixiong Zou and Ran Ma and Yuhua Li and Ruixuan Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=o8m4RM5mBk}\n}", "github": "", "reviewers": "XXJ8;ujLR;D8qR;G6Ww", "pdf_size": 4100630, "rating": "5;5;6;7", "confidence": "3;5;4;4", "soundness": "2;3;3;3", "novelty": "2;2;4;3", "presentation": "2;3;3;3", "wc_summary": "67;94;89;82", "wc_strengths": "13;20;68;141", "wc_weaknesses": "434;214;58;116", "wc_questions": "63;7;76;127", "wc_limitations": "1;36;1;6", "wc_review": "578;371;292;472", "wc_reply_reviewers": "85;220;6;102", "wc_reply_authors": "265;646;18;237", "reply_reviewers": "2;2;1;2", "reply_authors": "4;6;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 83.0, 10.173494974687902 ], "wc_strengths_avg": [ 60.5, 51.07102896946565 ], "wc_weaknesses_avg": [ 205.5, 143.22272864318708 ], "wc_questions_avg": [ 68.25, 42.692944381946766 ], "wc_limitations_avg": [ 11.0, 14.577379737113251 ], "wc_review_avg": [ 428.25, 107.44853419195628 ], "wc_reply_reviewers_avg": [ 103.25, 76.5224640220112 ], "wc_reply_authors_avg": [ 291.5, 225.91204040510988 ], "reply_reviewers_avg": [ 1.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.5, 1.6583123951777 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=527078944499907374&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": ";hust.edu.cn;hust.edu.cn;hust.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Huazhong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.hust.edu.cn", "aff_unique_abbr": "HUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Identifying and Solving Conditional Image Leakage in Image-to-Video Diffusion Model", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93640", "id": "o9Lkiv1qpc", "proceeding": "", "pdf": "https://openreview.net/pdf?id=o9Lkiv1qpc", "openreview": "https://openreview.net/forum?id=o9Lkiv1qpc", "poster": "/media/PosterPDFs/NeurIPS%202024/93640.png?t=1731167993.898359", "project": "", "author_site": "Min Zhao, Hongzhou Zhu, Chendong Xiang, Kaiwen Zheng, Chongxuan LI, Jun Zhu", "tldr": "", "abstract": "Diffusion models have obtained substantial progress in image-to-video generation. However, in this paper, we find that these models tend to generate videos with less motion than expected. We attribute this to the issue called conditional image leakage, where the image-to-video diffusion models (I2V-DMs) tend to over-rely on the conditional image at large time steps. We further address this challenge from both inference and training aspects. First, we propose to start the generation process from an earlier time step to avoid the unreliable large-time steps of I2V-DMs, as well as an initial noise distribution with optimal analytic expressions (Analytic-Init) by minimizing the KL divergence between it and the actual marginal distribution to bridge the training-inference gap. Second, we design a time-dependent noise distribution (TimeNoise) for the conditional image during training, applying higher noise levels at larger time steps to disrupt it and reduce the model's dependency on it. We validate these general strategies on various I2V-DMs on our collected open-domain image benchmark and the UCF101 dataset. Extensive results show that our methods outperform baselines by producing higher motion scores with lower errors while maintaining image alignment and temporal consistency, thereby yielding superior overall performance and enabling more accurate motion control. The project page: \\url{https://cond-image-leak.github.io/}.", "keywords": "diffusion model;image-to-video diffusion model;video generation;video diffusion model", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/cf426bc3c30072be76ef5de07b90f0c11e45c8f0.zip", "author": "Min Zhao;Hongzhou Zhu;Chendong Xiang;Kaiwen Zheng;Chongxuan Li;Jun Zhu", "authorids": "~Min_Zhao3;~Hongzhou_Zhu1;~Chendong_Xiang1;~Kaiwen_Zheng2;~Chongxuan_Li1;~Jun_Zhu2", "gender": "F;M;M;M;M;M", "homepage": ";https://github.com/zhuhz22;https://xiang-cd.github.io/;https://zhengkw18.github.io/;http://ml.cs.tsinghua.edu.cn/~chongxuan;http://ml.cs.tsinghua.edu.cn/~jun", "dblp": "67/1336-13.html;;344/2858;;161/9965;50/2644-1", "google_scholar": "ExIZrLAAAAAJ;GjmajhoAAAAJ;BZJFXVAAAAAJ;0d80xSIAAAAJ;UKMcQn4AAAAJ;axsP38wAAAAJ", "orcid": ";;;;0000-0002-0912-9076;", "linkedin": ";;;;;", "or_profile": "~Min_Zhao3;~Hongzhou_Zhu1;~Chendong_Xiang1;~Kaiwen_Zheng2;~Chongxuan_Li1;~Jun_Zhu2", "aff": "Institute of Automation, Chinese Academy of Sciences;Tsinghua University;Tsinghua University;Tsinghua University;Renmin University of China;Tsinghua University", "aff_domain": "ia.ac.cn;cs.tsinghua.edu.cn;tsinghua.edu.cn;cs.tsinghua.edu.cn;ruc.edu.cn;mail.tsinghua.edu.cn", "position": "PhD student;Undergrad student;Undergrad student;MS student;Associate Professor;Professor", "bibtex": "@inproceedings{\nzhao2024identifying,\ntitle={Identifying and Solving Conditional Image Leakage in Image-to-Video Diffusion Model},\nauthor={Min Zhao and Hongzhou Zhu and Chendong Xiang and Kaiwen Zheng and Chongxuan Li and Jun Zhu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=o9Lkiv1qpc}\n}", "github": "", "reviewers": "f9AW;hjyn;NwFJ;iEBW", "pdf_size": 12217043, "rating": "5;6;6;7", "confidence": "5;4;5;4", "soundness": "3;2;3;3", "novelty": "2;3;3;3", "presentation": "2;3;2;3", "wc_summary": "67;137;77;67", "wc_strengths": "34;77;67;66", "wc_weaknesses": "378;211;345;86", "wc_questions": "20;3;39;8", "wc_limitations": "7;9;7;1", "wc_review": "506;437;535;228", "wc_reply_reviewers": "32;70;320;0", "wc_reply_authors": "60;18;824;0", "reply_reviewers": "1;1;3;0", "reply_authors": "2;2;4;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 87.0, 29.154759474226502 ], "wc_strengths_avg": [ 61.0, 16.170961628796228 ], "wc_weaknesses_avg": [ 255.0, 115.89434843856709 ], "wc_questions_avg": [ 17.5, 13.865424623862047 ], "wc_limitations_avg": [ 6.0, 3.0 ], "wc_review_avg": [ 426.5, 120.00520822031017 ], "wc_reply_reviewers_avg": [ 105.5, 126.29627864668063 ], "wc_reply_authors_avg": [ 225.5, 346.2293315130883 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.7071067811865476, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12512459737787840037&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "ia.ac.cn;cs.tsinghua.edu.cn;tsinghua.edu.cn;cs.tsinghua.edu.cn;ruc.edu.cn;mail.tsinghua.edu.cn", "author_num": 6, "aff_unique_index": "0;1;1;1;2;1", "aff_unique_norm": "Chinese Academy of Sciences;Tsinghua University;Renmin University of China", "aff_unique_dep": "Institute of Automation;;", "aff_unique_url": "http://www.ia.cas.cn;https://www.tsinghua.edu.cn;http://www.ruc.edu.cn", "aff_unique_abbr": "CAS;THU;RUC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "oAmHzy8btj", "title": "Exact Random Graph Matching with Multiple Graphs", "track": "main", "status": "Reject", "tldr": "", "abstract": "This work studies fundamental limits for recovering the underlying correspondence among multiple correlated random graphs. We identify a necessary condition for any algorithm to correctly match all nodes across all graphs, and propose two algorithms for which the same condition is also sufficient. The first algorithm employs global information to simultaneously match all the graphs, whereas the second algorithm first partially matches the graphs pairwise and then combines the partial matchings by transitivity. Remarkably, both algorithms work down to the information theoretic threshold. Our analysis reveals a scenario where exact matching between two graphs alone is impossible, but leveraging more than two graphs allows exact matching among all the graphs. Along the way, we derive independent results about the $k$-core of Erdos-Renyi graphs.", "keywords": "Graph Matching;Network Alignment;Random Graphs;Graphical Inference", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Taha Ameen;Bruce Hajek", "authorids": "~Taha_Ameen1;~Bruce_Hajek1", "gender": "M;", "homepage": "https://www.taha-ameen.com;", "dblp": ";", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "~Taha_Ameen1;~Bruce_Hajek1", "aff": "University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign", "aff_domain": "illinois.edu;", "position": "PhD student;", "bibtex": "@misc{\nanonymous2024exact,\ntitle={Exact Random Graph Matching with Multiple Graphs},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=oAmHzy8btj}\n}", "github": "", "project": "", "reviewers": "Yg8a;Vr8o;tQgp;T9Pj", "site": "https://openreview.net/forum?id=oAmHzy8btj", "pdf_size": 2233470, "rating": "4;6;7;8", "confidence": "3;4;5;4", "soundness": "2;4;4;4", "novelty": "2;4;4;4", "presentation": "2;4;3;4", "wc_summary": "46;71;293;94", "wc_strengths": "11;43;123;56", "wc_weaknesses": "265;21;187;8", "wc_questions": "2;31;580;36", "wc_limitations": "8;14;23;6", "wc_review": "332;180;1206;200", "wc_reply_reviewers": "50;36;17;12", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 1.479019945774904 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.5, 0.8660254037844386 ], "novelty_avg": [ 3.5, 0.8660254037844386 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 126.0, 97.90045965162778 ], "wc_strengths_avg": [ 58.25, 40.81283499096822 ], "wc_weaknesses_avg": [ 120.25, 109.38321397728264 ], "wc_questions_avg": [ 162.25, 241.53713482609666 ], "wc_limitations_avg": [ 12.75, 6.609652033201143 ], "wc_review_avg": [ 479.5, 423.49114512584555 ], "wc_reply_reviewers_avg": [ 28.75, 15.188400179084036 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.7171371656006362, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2980194004852797187&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0", "aff_unique_norm": "University of Illinois Urbana-Champaign", "aff_unique_dep": "", "aff_unique_url": "https://illinois.edu", "aff_unique_abbr": "UIUC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Urbana-Champaign", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "GAVEL: Generating Games via Evolution and Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93639", "id": "oBvaZJ1C71", "proceeding": "", "pdf": "https://openreview.net/pdf?id=oBvaZJ1C71", "openreview": "https://openreview.net/forum?id=oBvaZJ1C71", "poster": "/media/PosterPDFs/NeurIPS%202024/93639.png?t=1733427068.6991699", "project": "", "author_site": "Graham Todd, Alexander G Padula, Matthew Stephenson, Eric Piette, Dennis Soemers, Julian Togelius", "tldr": "", "abstract": "Automatically generating novel and interesting games is a complex task. Challenges include representing game rules in a computationally workable form, searching through the large space of potential games under most such representations, and accurately evaluating the originality and quality of previously unseen games. Prior work in automated game generation has largely focused on relatively restricted rule representations and relied on domain-specific heuristics. In this work, we explore the generation of novel games in the comparatively expansive Ludii game description language, which encodes the rules of over 1000 board games in a variety of styles and modes of play. We draw inspiration from recent advances in large language models and evolutionary computation in order to train a model that intelligently mutates and recombines games and mechanics expressed as code. We demonstrate both quantitatively and qualitatively that our approach is capable of generating new and interesting games, including in regions of the potential rules space not covered by existing games in the Ludii dataset.", "keywords": "games;llms;language models;evolution;quality diversity;pcg", "primary_area": "generative_models", "supplementary_material": "", "author": "Graham Todd;Alexander George Padula;Matthew Stephenson;Eric Piette;Dennis J. N. J. Soemers;Julian Togelius", "authorids": "~Graham_Todd1;~Alexander_George_Padula1;~Matthew_Stephenson2;~Eric_Piette1;~Dennis_J._N._J._Soemers1;~Julian_Togelius1", "gender": ";M;;M;M;M", "homepage": ";;https://matthewstephenson.info/;http://piette.info/eric;https://dennissoemers.github.io/;http://julian.togelius.com", "dblp": ";;190/8740;145/2739.html;195/5774;47/767", "google_scholar": "NyBWyCIAAAAJ;;3Ebic40AAAAJ;https://scholar.google.fr/citations?user=MgwAnUIAAAAJ;DennisSoemers;lr4I9BwAAAAJ", "orcid": ";0009-0005-1128-8323;;;0000-0003-3241-8957;0000-0003-3128-4598", "linkedin": ";alexander-padula-8a0a3b138/;;ericpiette/;dennis-soemers/;togelius/", "or_profile": "~Graham_Todd1;~Alexander_George_Padula1;~Matthew_Stephenson2;~Eric_Piette1;~Dennis_J._N._J._Soemers1;~Julian_Togelius1", "aff": "New York University;Maastricht University;Flinders University of South Australia;UCLouvain;Maastricht University;New York University", "aff_domain": "nyu.edu;maastrichtuniversity.nl;flinders.edu.au;uclouvain.be;maastrichtuniversity.nl;nyu.edu", "position": "PhD student;Undergrad student;Lecturer;Assistant Professor;Postdoc;Associate Professor", "bibtex": "@inproceedings{\ntodd2024gavel,\ntitle={{GAVEL}: Generating Games via Evolution and Language Models},\nauthor={Graham Todd and Alexander George Padula and Matthew Stephenson and Eric Piette and Dennis J. N. J. Soemers and Julian Togelius},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=oBvaZJ1C71}\n}", "github": "", "reviewers": "shZT;ocXt;29DD;HLJN", "pdf_size": 1557570, "rating": "5;5;5;6", "confidence": "4;4;5;4", "soundness": "3;3;2;3", "novelty": "1;3;2;2", "presentation": "3;3;4;4", "wc_summary": "88;73;258;39", "wc_strengths": "187;81;176;85", "wc_weaknesses": "298;123;253;175", "wc_questions": "148;32;356;214", "wc_limitations": "33;1;50;1", "wc_review": "754;310;1093;514", "wc_reply_reviewers": "219;63;72;12", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 114.5, 84.73045497340375 ], "wc_strengths_avg": [ 132.25, 49.423552077931426 ], "wc_weaknesses_avg": [ 212.25, 67.76199155869018 ], "wc_questions_avg": [ 187.5, 117.0843712884004 ], "wc_limitations_avg": [ 21.25, 21.123150806638673 ], "wc_review_avg": [ 667.75, 291.50503854993656 ], "wc_reply_reviewers_avg": [ 91.5, 77.08599094517758 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8805409391357146642&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "email": "nyu.edu;maastrichtuniversity.nl;flinders.edu.au;uclouvain.be;maastrichtuniversity.nl;nyu.edu", "author_num": 6, "aff_unique_index": "0;1;2;3;1;0", "aff_unique_norm": "New York University;Maastricht University;Flinders University;Universit\u00e9 catholique de Louvain", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.nyu.edu;https://www.maastrichtuniversity.nl;https://www.flinders.edu.au;https://www.uclouvain.be", "aff_unique_abbr": "NYU;MU;Flinders;UCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;3;1;0", "aff_country_unique": "United States;Netherlands;Australia;Belgium" }, { "title": "Self-playing Adversarial Language Game Enhances LLM Reasoning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93638", "id": "oCGkSH7ys2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=oCGkSH7ys2", "openreview": "https://openreview.net/forum?id=oCGkSH7ys2", "poster": "", "project": "", "author_site": "Pengyu Cheng, Tianhao Hu, Han Xu, Zhisong Zhang, Yong Dai, Lei Han, nan du, Xiaolong Li", "tldr": "", "abstract": "We explore the potential of self-play training for large language models (LLMs) in a two-player adversarial language game called Adversarial Taboo. In this game, an attacker and a defender communicate around a target word only visible to the attacker. The attacker aims to induce the defender to speak the target word unconsciously, while the defender tries to infer the target word from the attacker's utterances. To win the game, both players must have sufficient knowledge about the target word and high-level reasoning ability to infer and express in this information-reserved conversation. Hence, we are curious about whether LLMs' reasoning ability can be further enhanced by Self-Playing this Adversarial language Game (SPAG). With this goal, we select several open-source LLMs and let each act as the attacker and play with a copy of itself as the defender on an extensive range of target words. Through reinforcement learning on the game outcomes, we observe that the LLMs' performances uniformly improve on a broad range of reasoning benchmarks. Furthermore, iteratively adopting this self-play process can continuously promote LLMs' reasoning abilities. The code is available at https://github.com/Linear95/SPAG.", "keywords": "Self-play;Large Language Models;LLM Reasoning", "primary_area": "generative_models", "supplementary_material": "/attachment/6bf65db077e8378c0d82032f3abd5b70faadafb8.zip", "author": "Pengyu Cheng;Tianhao Hu;Han Xu;Zhisong Zhang;Yong Dai;Lei Han;nan du;Xiaolong Li", "authorids": "~Pengyu_Cheng1;~Tianhao_Hu2;~Han_Xu11;~Zhisong_Zhang1;~Yong_Dai1;~Lei_Han1;~nan_du3;~Xiaolong_Li4", "gender": "M;M;M;M;M;M;M;M", "homepage": "https://linear95.github.io/;;https://zzsfornlp.github.io/;https://daiyongya.github.io/;https://www.leihan.org;https://scholar.google.com/citations?user=BO4jEkAAAAAJ&hl=en;https://sites.google.com/site/xiaolongbruceli/home;", "dblp": "223/6048;;174/7415;;75/2307-1;;82/6624-5.html;230/3226", "google_scholar": "eeQ_yCkAAAAJ;https://scholar.google.com.hk/citations?view_op=list_works;373vlUEAAAAJ;https://scholar.google.com/citations?hl=zh-CN;Tz4_zi8AAAAJ;;https://scholar.google.com/citations?hl=zh-TW;https://scholar.google.com/citations?hl=en", "orcid": "0000-0003-0421-8376;;;0000-0002-3041-5851;;;;", "linkedin": ";;;yong-dai-9255311a6/;;;;", "or_profile": "~Pengyu_Cheng1;~Han_Xu11;~Zhisong_Zhang1;~Yong_Dai1;~Lei_Han1;~nan_du3;~Xiaolong_Li4;~TianhaoHu2", "aff": "Tencent;Shenzhen University;Tencent;Tencent AI Lab;Tencent Robotics X;Tencent INC;Tencent America LLC;Shenzhen University", "aff_domain": "tencent.com;szu.edu.cn;tencent.com;tencent.com;tencent.com;tencent.com;tencent.com;szu.edu.cn", "position": "Researcher;Undergrad student;Researcher;Researcher;Principal Researcher;Principal Researcher;Principal Researcher;Undergrad student", "bibtex": "@inproceedings{\ncheng2024selfplaying,\ntitle={Self-playing Adversarial Language Game Enhances {LLM} Reasoning},\nauthor={Pengyu Cheng and Tianhao Hu and Han Xu and Zhisong Zhang and Yong Dai and Lei Han and nan du and Xiaolong Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=oCGkSH7ys2}\n}", "github": "", "reviewers": "R1Zh;1AP3;ib6k;cmRY", "pdf_size": 1796335, "rating": "5;6;6;7", "confidence": "4;3;3;4", "soundness": "3;3;2;4", "novelty": "3;2;2;3", "presentation": "3;3;4;3", "wc_summary": "52;83;250;86", "wc_strengths": "41;44;48;32", "wc_weaknesses": "139;76;93;18", "wc_questions": "25;27;96;23", "wc_limitations": "1;7;1;2", "wc_review": "258;237;488;161", "wc_reply_reviewers": "8;44;70;0", "wc_reply_authors": "0;95;20;0", "reply_reviewers": "1;2;1;0", "reply_authors": "1;2;2;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 117.75, 77.50604815109592 ], "wc_strengths_avg": [ 41.25, 5.889609494694874 ], "wc_weaknesses_avg": [ 81.5, 43.30415684434925 ], "wc_questions_avg": [ 42.75, 30.776411421736615 ], "wc_limitations_avg": [ 2.75, 2.48746859276655 ], "wc_review_avg": [ 286.0, 122.07989187413298 ], "wc_reply_reviewers_avg": [ 30.5, 28.191310718020897 ], "wc_reply_authors_avg": [ 28.75, 39.111219617905036 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16781771142243579760&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "tencent.com;szu.edu.cn;tencent.com;tencent.com;tencent.com;tencent.com;tencent.com;szu.edu.cn", "author_num": 8, "aff_unique_index": "0;1;0;0;0;0;0;1", "aff_unique_norm": "Tencent;Shenzhen University", "aff_unique_dep": "Tencent Holdings Limited;", "aff_unique_url": "https://www.tencent.com;https://www.szu.edu.cn", "aff_unique_abbr": "Tencent;SZU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;1;0", "aff_country_unique": "China;United States" }, { "title": "Weight decay induces low-rank attention layers", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93637", "id": "oDeqjIM9Sk", "proceeding": "", "pdf": "https://openreview.net/pdf?id=oDeqjIM9Sk", "openreview": "https://openreview.net/forum?id=oDeqjIM9Sk", "poster": "", "project": "", "author_site": "Seijin Kobayashi, Yassir Akram, Johannes von Oswald", "tldr": "", "abstract": "The effect of regularizers such as weight decay when training deep neural networks is not well understood. We study the influence of weight decay as well as $L2$-regularization when training neural network models in which parameter matrices interact multiplicatively. This combination is of particular interest as this parametrization is common in attention layers, the workhorse of transformers. Here, key-query, as well as value-projection parameter matrices, are multiplied directly with each other: $W_K^TW_Q$ and $PW_V$. \nWe extend previous results and show on one hand that any local minimum of a $L2$-regularized loss of the form $L(AB^\\top) + \\lambda (\\|A\\|^2 + \\|B\\|^2)$ coincides with a minimum of the nuclear norm-regularized loss $L(AB^\\top) + \\lambda\\|AB^\\top\\|_*$, and on the other hand that the 2 losses become identical exponentially quickly during training. We thus complement existing works linking $L2$-regularization with low-rank regularization, and in particular, explain why such regularization on the matrix product affects early stages of training.\nBased on these theoretical insights, we verify empirically that the key-query and value-projection matrix products $W_K^TW_Q, PW_V$ within attention layers, when optimized with weight decay, as usually done in vision tasks and language modelling, indeed induce a significant reduction in the rank of $W_K^TW_Q$ and $PW_V$, even in fully online training.\nWe find that, in accordance with existing work, inducing low rank in attention matrix products can damage language model performance, and observe advantages when decoupling weight decay in attention layers from the rest of the parameters.", "keywords": "weight decay;attention", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Seijin Kobayashi;Yassir Akram;Johannes von Oswald", "authorids": "~Seijin_Kobayashi1;~Yassir_Akram1;~Johannes_von_Oswald2", "gender": ";M;Not Specified", "homepage": ";https://as.inf.ethz.ch/people/members/yakram/index.html;https://as.inf.ethz.ch/people/members/voswaldj/index.html", "dblp": ";329/5709;242/8029", "google_scholar": ";_eYyLKIAAAAJ;https://scholar.google.ch/citations?user=jdnL-PgAAAAJ", "orcid": ";;", "linkedin": ";;johswald/?originalSubdomain=de", "or_profile": "~Seijin_Kobayashi1;~Yassir_Akram1;~Johannes_von_Oswald2", "aff": ";Department of Computer Science, ETHZ - ETH Zurich;Google", "aff_domain": ";inf.ethz.ch;research.google.com", "position": ";PhD student;Researcher", "bibtex": "@inproceedings{\nkobayashi2024weight,\ntitle={Weight decay induces low-rank attention layers},\nauthor={Seijin Kobayashi and Yassir Akram and Johannes von Oswald},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=oDeqjIM9Sk}\n}", "github": "", "reviewers": "vYXP;hhKp;fDM3;veeJ;eBg4", "pdf_size": 1678923, "rating": "3;6;6;6;7", "confidence": "4;3;4;4;3", "soundness": "3;3;2;3;3", "novelty": "1;3;2;3;3", "presentation": "3;3;3;2;3", "wc_summary": "45;101;107;83;99", "wc_strengths": "25;80;26;51;47", "wc_weaknesses": "89;58;37;258;147", "wc_questions": "15;510;140;71;73", "wc_limitations": "16;19;7;1;8", "wc_review": "190;768;317;464;374", "wc_reply_reviewers": "817;46;58;20;48", "wc_reply_authors": "1240;0;0;0;0", "reply_reviewers": "2;1;1;1;1", "reply_authors": "3;1;1;1;1", "rating_avg": [ 5.6, 1.3564659966250536 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.8 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 87.0, 22.44994432064365 ], "wc_strengths_avg": [ 45.8, 20.113676938839404 ], "wc_weaknesses_avg": [ 117.8, 79.30044136068852 ], "wc_questions_avg": [ 161.8, 178.54904088233013 ], "wc_limitations_avg": [ 10.2, 6.49307323229917 ], "wc_review_avg": [ 422.6, 194.23243807356175 ], "wc_reply_reviewers_avg": [ 197.8, 309.85441742857245 ], "wc_reply_authors_avg": [ 248.0, 496.0 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.4, 0.8000000000000002 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5417363388859615, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15643102551094204734&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": ";inf.ethz.ch;research.google.com", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "ETH Zurich;Google", "aff_unique_dep": "Department of Computer Science;Google", "aff_unique_url": "https://www.ethz.ch;https://www.google.com", "aff_unique_abbr": "ETHZ;Google", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Zurich;Mountain View", "aff_country_unique_index": "0;1", "aff_country_unique": "Switzerland;United States" }, { "title": "NeuralSteiner: Learning Steiner Tree for Overflow-avoiding Global Routing in Chip Design", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93636", "id": "oEKFPSOWpp", "proceeding": "", "pdf": "https://openreview.net/pdf?id=oEKFPSOWpp", "openreview": "https://openreview.net/forum?id=oEKFPSOWpp", "poster": "/media/PosterPDFs/NeurIPS%202024/93636.png?t=1731511340.5023446", "project": "", "author_site": "RUIZHI LIU, ZhishengZeng, Shizhe Ding, Jingyan Sui, Xingquan Li, Dongbo Bu", "tldr": "", "abstract": "Global routing plays a critical role in modern chip design. The routing paths generated by global routers often form a rectilinear Steiner tree (RST). Recent advances from the machine learning community have shown the power of learning-based route generation; however, the yielded routing paths by the existing approaches often suffer from considerable overflow, thus greatly hindering their application in practice.\nWe propose NeuralSteiner, an accurate approach to overflow-avoiding global routing in chip design. The key idea of NeuralSteiner approach is to learn Steiner trees: we first predict the locations of highly likely Steiner points by adopting a neural network considering full-net spatial and overflow information, then select appropriate points by running a graph-based post-processing algorithm, and finally connect these points with the input pins to yield overflow-avoiding RSTs. NeuralSteiner offers two advantages over previous learning-based models. First, by using the learning scheme, NeuralSteiner ensures the connectivity of generated routes while significantly reducing congestion. Second, NeuralSteiner can effectively scale to large nets and transfer to unseen chip designs without any modifications or fine-tuning. Extensive experiments over public large-scale benchmarks reveal that, compared with the state-of-the-art deep generative methods, NeuralSteiner achieves up to a 99.8\\% reduction in overflow while speeding up the generation and maintaining a slight wirelength loss within only 1.8\\%.", "keywords": "Global routing;chip design;neural network;Steiner tree;deep learning;congestion", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Ruizhi Liu;ZhishengZeng;Shizhe Ding;Jingyan Sui;Xingquan Li;Dongbo Bu", "authorids": "~Ruizhi_Liu1;~ZhishengZeng1;~Shizhe_Ding2;~Jingyan_Sui1;~Xingquan_Li1;~Dongbo_Bu1", "gender": "M;;;;M;M", "homepage": "https://github.com/liuruizhi96;;https://github.com/DingShizhe;;;http://bioinfo.ict.ac.cn/~dbu/", "dblp": "174/9270;;;309/1052;;", "google_scholar": ";;;;JNoylQgAAAAJ;", "orcid": ";0009-0009-3686-4576;;;;", "linkedin": ";;;https://www.linkedin.cn/incareer/in/ACoAADBMZ4cB344SaCp2DF0xsYG7GSEzjj5cxCY;;", "or_profile": "~Ruizhi_Liu1;~ZhishengZeng1;~Shizhe_Ding2;~Jingyan_Sui1;~Xingquan_Li1;~Dongbo_Bu1", "aff": "Institute of Computing Science, CAS;University of Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;University of Chinese Academy of Sciences;PCL;University of Chinese Academy of Sciences", "aff_domain": "ict.ac.cn;ucas.ac.cn;ict.ac.cn;ucas.edu.cn;pcl.ac;ucas.ac.cn", "position": "PhD student;PhD student;PhD student;PhD student;Researcher;Full Professor", "bibtex": "@inproceedings{\nliu2024neuralsteiner,\ntitle={NeuralSteiner: Learning Steiner Tree for Overflow-avoiding Global Routing in Chip Design},\nauthor={Ruizhi Liu and ZhishengZeng and Shizhe Ding and Jingyan Sui and Xingquan Li and Dongbo Bu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=oEKFPSOWpp}\n}", "github": "", "reviewers": "JcjD;4X1w;1eER;1MAW", "pdf_size": 15651340, "rating": "5;6;6;7", "confidence": "4;5;3;4", "soundness": "1;3;2;3", "novelty": "2;3;2;3", "presentation": "2;3;3;3", "wc_summary": "56;167;65;43", "wc_strengths": "12;82;63;88", "wc_weaknesses": "31;108;40;48", "wc_questions": "373;169;29;74", "wc_limitations": "3;1;1;16", "wc_review": "475;527;198;269", "wc_reply_reviewers": "25;28;19;178", "wc_reply_authors": "47;19;36;269", "reply_reviewers": "1;1;1;2", "reply_authors": "2;2;2;3", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 82.75, 49.266494699744975 ], "wc_strengths_avg": [ 61.25, 29.894606536965828 ], "wc_weaknesses_avg": [ 56.75, 30.194163343268844 ], "wc_questions_avg": [ 161.25, 132.28827423471816 ], "wc_limitations_avg": [ 5.25, 6.2599920127744575 ], "wc_review_avg": [ 367.25, 137.3214750139249 ], "wc_reply_reviewers_avg": [ 62.5, 66.76263925280367 ], "wc_reply_authors_avg": [ 92.75, 102.24572118186659 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:WLhUM5Lf3V0J:scholar.google.com/&scioq=NeuralSteiner:+Learning+Steiner+Tree+for+Overflow-avoiding+Global+Routing+in+Chip+Design&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "ict.ac.cn;ucas.ac.cn;ict.ac.cn;ucas.edu.cn;pcl.ac;ucas.ac.cn", "author_num": 6, "aff_unique_index": "0;1;0;1;2;1", "aff_unique_norm": "Chinese Academy of Sciences;University of Chinese Academy of Sciences;Pattern Recognition and Machine Learning Group, University of British Columbia", "aff_unique_dep": "Institute of Computing Science;;Department of Computer Science", "aff_unique_url": "http://www.cas.cn;http://www.ucas.ac.cn;https://www.cs.ubc.ca", "aff_unique_abbr": "CAS;UCAS;UBC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Vancouver", "aff_country_unique_index": "0;0;0;0;1;0", "aff_country_unique": "China;Canada" }, { "title": "Fully Distributed, Flexible Compositional Visual Representations via Soft Tensor Products", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93635", "id": "oEVsxVdush", "proceeding": "", "pdf": "https://openreview.net/pdf?id=oEVsxVdush", "openreview": "https://openreview.net/forum?id=oEVsxVdush", "poster": "/media/PosterPDFs/NeurIPS%202024/93635.png?t=1733183149.9604073", "project": "", "author_site": "Bethia Sun, Maurice Pagnucco, Yang Song", "tldr": "", "abstract": "Since the inception of the classicalist vs. connectionist debate, it has been argued that the ability to systematically combine symbol-like entities into compositional representations is crucial for human intelligence. In connectionist systems, the field of disentanglement has gained prominence for its ability to produce explicitly compositional representations; however, it relies on a fundamentally *symbolic, concatenative* representation of compositional structure that clashes with the *continuous, distributed* foundations of deep learning. To resolve this tension, we extend Smolensky's Tensor Product Representation (TPR) and introduce *Soft TPR*, a representational form that encodes compositional structure in an inherently *distributed, flexible* manner, along with *Soft TPR Autoencoder*, a theoretically-principled architecture designed specifically to learn Soft TPRs. Comprehensive evaluations in the visual representation learning domain demonstrate that the Soft TPR framework consistently outperforms conventional disentanglement alternatives -- achieving state-of-the-art disentanglement, boosting representation learner convergence, and delivering superior sample efficiency and low-sample regime performance in downstream tasks. These findings highlight the promise of a *distributed* and *flexible* approach to representing compositional structure by potentially enhancing alignment with the core principles of deep learning over the conventional symbolic approach.", "keywords": "representation learning;compositional representations;disentanglement;tensor product representations;compositionality", "primary_area": "machine_vision", "supplementary_material": "/attachment/ee3b9dd34eabcba5698f62767ca6e11b7e95e1a8.zip", "author": "Bethia Sun;Maurice Pagnucco;Yang Song", "authorids": "~Bethia_Sun1;~Maurice_Pagnucco1;~Yang_Song4", "gender": ";M;", "homepage": "https://gomb0c.github.io/;http://www.cse.unsw.edu.au/~morri/;http://www.cse.unsw.edu.au/~ysong/", "dblp": ";p/MauricePagnucco;24/4470-1", "google_scholar": "https://scholar.google.com.au/citations?user=5q3Zu2wAAAAJ;lqjmockAAAAJ;https://scholar.google.com.au/citations?user=7u3M9hMAAAAJ", "orcid": ";0000-0001-7712-6646;", "linkedin": ";mauricepagnucco/?originalSubdomain=au;", "or_profile": "~Bethia_Sun1;~Maurice_Pagnucco1;~Yang_Song4", "aff": "University of New South Wales;University of New South Wales;University of New South Wales", "aff_domain": "unsw.edu.au;unsw.edu.au;unsw.edu.au", "position": "MS student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nsun2024soft,\ntitle={Soft Tensor Product Representations for Fully Continuous, Compositional Visual Representations},\nauthor={Bethia Sun and Maurice Pagnucco and Yang Song},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=oEVsxVdush}\n}", "github": "", "reviewers": "6VtP;xKnt;93av", "pdf_size": 2455205, "rating": "5;7;7", "confidence": "4;3;4", "soundness": "3;4;3", "novelty": "2;3;3", "presentation": "3;4;3", "wc_summary": "78;53;117", "wc_strengths": "69;138;143", "wc_weaknesses": "83;197;216", "wc_questions": "98;121;167", "wc_limitations": "7;27;1", "wc_review": "335;536;644", "wc_reply_reviewers": "15;87;19", "wc_reply_authors": "29;38;30", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 82.66666666666667, 26.335442953471574 ], "wc_strengths_avg": [ 116.66666666666667, 33.76717669901087 ], "wc_weaknesses_avg": [ 165.33333333333334, 58.732917138139456 ], "wc_questions_avg": [ 128.66666666666666, 28.686039965266886 ], "wc_limitations_avg": [ 11.666666666666666, 11.115554667022044 ], "wc_review_avg": [ 505.0, 128.03905654135383 ], "wc_reply_reviewers_avg": [ 40.333333333333336, 33.03869784897031 ], "wc_reply_authors_avg": [ 32.333333333333336, 4.027681991198191 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.49999999999999983, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:uHXtNPj2_Q4J:scholar.google.com/&scioq=Fully+Distributed,+Flexible+Compositional+Visual+Representations+via+Soft+Tensor+Products&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "unsw.edu.au;unsw.edu.au;unsw.edu.au", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of New South Wales", "aff_unique_dep": "", "aff_unique_url": "https://www.unsw.edu.au", "aff_unique_abbr": "UNSW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Australia" }, { "id": "oEmyoy5H5P", "title": "Grounding and Validation of Algorithmic Recourse in Real-World Contexts: A Systematized Literature Review", "track": "main", "status": "Reject", "tldr": "", "abstract": "The aim of algorithmic recourse (AR) is generally understood to be the provision of \"actionable\" recommendations to individuals affected by algorithmic decision-making systems, in an attempt to offer the capacity for taking actions that may lead to more desirable outcomes in the future. Over the past few years, AR literature has largely focused on theoretical frameworks to generate \"actionable\" counterfactual explanations that further satisfy various desiderata, such as diversity or robustness. We believe that algorithmic recourse, by its nature, should be seen as a practical problem: real-world socio-technical decision-making systems are complex dynamic entities involving various actors (end users, domain experts, civil servants, system owners, etc.) engaged in social and technical processes. Thus, research needs to account for the specificities of systems where it would be applied. To evaluate how authors envision AR \"in the wild\", we carry out a systematized review of 127 publications pertaining to the problem and identify the real-world considerations that motivate them. Among others, we look at the ways to make recourse (individually) actionable, the involved stakeholders, the perceived challenges, and the availability of practitioner-friendly open-source codebases. We find that there is a strong disconnect between the existing research and the practical requirements for AR. Most importantly, the grounding and validation of algorithmic recourse in real-world contexts remain underexplored. As an attempt to bridge this gap, we provide other authors with five recommendations to make future solutions easier to adapt to their potential real-world applications.", "keywords": "Algorithmic Recourse;Counterfactual Explanations;Explainable AI;Real-world Systems", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Aleksander Buszydlik;Patrick Altmeyer;Cynthia C. S. Liem;Roel Dobbe", "authorids": "~Aleksander_Buszydlik1;~Patrick_Altmeyer1;~Cynthia_C._S._Liem2;~Roel_Dobbe1", "gender": "M;M;M;F", "homepage": ";https://www.paltmeyer.com/;https://www.tudelft.nl/tbm/over-de-faculteit/afdelingen/engineering-systems-and-services/people/assistant-professors/drir-rij-roel-dobbe/;https://www.tudelft.nl/ewi/over-de-faculteit/afdelingen/intelligent-systems/multimedia-computing/people/cynthia-liem/", "dblp": ";;122/8701;36/9954", "google_scholar": ";e7KRRa8AAAAJ;https://scholar.google.nl/citations?user=c4h76VYAAAAJ;https://scholar.google.com.tw/citations?user=BFhUNNEAAAAJ", "orcid": ";0000-0003-4726-8613;;0000-0002-5385-7695", "linkedin": "https://linkedin.com/in/abuszydlik;patrick-altmeyer-a2a25494/;;", "or_profile": "~Aleksander_Buszydlik1;~Patrick_Altmeyer1;~Roel_Dobbe1;~Cynthia_Liem1", "aff": "Delft University of Technology;Delft University of Technology;Delft University of Technology;Delft University of Technology", "aff_domain": "tudelft.nl;tudelft.nl;tudelft.nl;tudelft.nl", "position": "MS student;PhD student;Assistant Professor;Associate Professor", "bibtex": "@misc{\nanonymous2024grounding,\ntitle={Grounding and Validation of Algorithmic Recourse in Real-World Contexts: A Systematized Literature Review},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=oEmyoy5H5P}\n}", "github": "", "project": "", "reviewers": "yANZ;pd5W;Wg7R;bnWe", "site": "https://openreview.net/forum?id=oEmyoy5H5P", "pdf_size": 338087, "rating": "3;3;5;7", "confidence": "5;5;4;4", "soundness": "2;3;2;3", "novelty": "2;1;2;2", "presentation": "3;3;3;2", "wc_summary": "49;70;86;131", "wc_strengths": "76;26;289;51", "wc_weaknesses": "352;293;265;37", "wc_questions": "9;25;95;117", "wc_limitations": "12;1;22;10", "wc_review": "498;415;757;346", "wc_reply_reviewers": "210;222;146;17", "wc_reply_authors": "596;550;68;16", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 4.5, 1.6583123951777 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 84.0, 30.14133374620307 ], "wc_strengths_avg": [ 110.5, 104.5621824561825 ], "wc_weaknesses_avg": [ 236.75, 119.52484051443031 ], "wc_questions_avg": [ 61.5, 45.52746423863292 ], "wc_limitations_avg": [ 11.25, 7.46240577829965 ], "wc_review_avg": [ 504.0, 155.6679157694353 ], "wc_reply_reviewers_avg": [ 148.75, 81.36760719106836 ], "wc_reply_authors_avg": [ 307.5, 266.6322373607513 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.9045340337332909, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:WJKJ-lWSCiEJ:scholar.google.com/&scioq=Grounding+and+Validation+of+Algorithmic+Recourse+in+Real-World+Contexts:+A+Systematized+Literature+Review&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Delft University of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.tudelft.nl", "aff_unique_abbr": "TU Delft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Netherlands" }, { "title": "Masked Pre-training Enables Universal Zero-shot Denoiser", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93634", "id": "oFgTScAsBr", "proceeding": "", "pdf": "https://openreview.net/pdf?id=oFgTScAsBr", "openreview": "https://openreview.net/forum?id=oFgTScAsBr", "poster": "/media/PosterPDFs/NeurIPS%202024/93634.png?t=1729407416.1770706", "project": "", "author_site": "Xiaoxiao Ma, Zhixiang Wei, Yi Jin, Pengyang Ling, Tianle Liu, Ben Wang, Junkang Dai, Huaian Chen", "tldr": "", "abstract": "In this work, we observe that model trained on vast general images via masking strategy, has been naturally embedded with their distribution knowledge, thus spontaneously attains the underlying potential for strong image denoising.\nBased on this observation, we propose a novel zero-shot denoising paradigm, i.e., $\\textbf{M}$asked $\\textbf{P}$re-train then $\\textbf{I}$terative fill ($\\textbf{MPI}$).\nMPI first trains model via masking and then employs pre-trained weight for high-quality zero-shot image denoising on a single noisy image.\nConcretely, MPI comprises two key procedures:\n$\\textbf{1) Masked Pre-training}$ involves training model to reconstruct massive natural images with random masking for generalizable representations, gathering the potential for valid zero-shot denoising on images with varying noise degradation and even in distinct image types.\n$\\textbf{2) Iterative filling}$ exploits pre-trained knowledge for effective zero-shot denoising. It iteratively optimizes the image by leveraging pre-trained weights, focusing on alternate reconstruction of different image parts, and gradually assembles fully denoised image within limited number of iterations.\nComprehensive experiments across various noisy scenarios underscore the notable advances of MPI over previous approaches with a marked reduction in inference time.", "keywords": "image restoration;image denoising\uff0cself-supervised learning", "primary_area": "other", "supplementary_material": "/attachment/7dfccf01ca546a6848a2c0db1445c2eaef47bd3d.zip", "author": "Xiaoxiao Ma;Zhixiang Wei;Yi Jin;Pengyang Ling;Tianle Liu;Ben Wang;Junkang Dai;Huaian Chen", "authorids": "~Xiaoxiao_Ma3;~Zhixiang_Wei1;~Yi_Jin1;~Pengyang_Ling1;~Tianle_Liu2;~Ben_Wang6;~Junkang_Dai1;~Huaian_Chen1", "gender": "F;;M;M;;;M;", "homepage": "https://krennic999.github.io/;;;https://github.com/LPengYang;;;https://www.researchgate.net/profile/Junkang-Dai?ev=hdr_xprf&_sg=YXQCDxGuYQvHCyJin2_YivFzWAIJoWmI_iLMJ7ZiTD03dVCshTEDUlcSn8PJKD4VwDpp6ce2s7cOdi-cmYHyYwkc&_tp=eyJjb250ZXh0Ijp7ImZpcnN0UGFnZSI6Il9kaXJlY3QiLCJwYWdlIjoiX2RpcmVjdCIsInBvc2l0aW9uIjoiZ2xvYmFsSGVhZGVyIn19;", "dblp": ";;38/4674-2;;;;;", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;;https://scholar.google.ca/citations?hl=en;https://scholar.google.com/citations?hl=en;;https://scholar.google.com/citations?hl=zh-CN;;", "orcid": ";;0000-0001-8232-3863;;;;;", "linkedin": ";;;;;;;", "or_profile": "~Xiaoxiao_Ma3;~Zhixiang_Wei1;~Yi_Jin1;~Pengyang_Ling1;~Tianle_Liu2;~Ben_Wang6;~Junkang_Dai1;~Huaian_Chen1", "aff": "University of Science and Technology of China;;University of Science and Technology of China;University of Science and Technology of China;;University of Science and Technology of China;University of Science and Technology of China;", "aff_domain": "ustc.edu.cn;;ustc.edu.cn;ustc.edu.cn;;ustc.edu.cn;ustc.edu.cn;", "position": "MS student;;Associate Professor;PhD student;;PhD student;PhD student;", "bibtex": "@inproceedings{\nma2024masked,\ntitle={Masked Pre-training Enables Universal Zero-shot Denoiser},\nauthor={Xiaoxiao Ma and Zhixiang Wei and Yi Jin and Pengyang Ling and Tianle Liu and Ben Wang and Junkang Dai and Huaian Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=oFgTScAsBr}\n}", "github": "", "reviewers": "VFTG;iS6g;uJXC;mCpu", "pdf_size": 9543467, "rating": "4;5;6;6", "confidence": "4;4;4;4", "soundness": "3;2;2;3", "novelty": "2;3;3;3", "presentation": "3;2;2;3", "wc_summary": "72;73;46;144", "wc_strengths": "43;25;24;284", "wc_weaknesses": "193;251;246;133", "wc_questions": "10;79;88;1", "wc_limitations": "10;1;6;2", "wc_review": "328;429;410;564", "wc_reply_reviewers": "56;63;25;15", "wc_reply_authors": "385;76;40;22", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 83.75, 36.43058467826175 ], "wc_strengths_avg": [ 94.0, 109.95680970271918 ], "wc_weaknesses_avg": [ 205.75, 47.7565440541922 ], "wc_questions_avg": [ 44.5, 39.2587569849072 ], "wc_limitations_avg": [ 4.75, 3.5619517121937516 ], "wc_review_avg": [ 432.75, 84.75073745991831 ], "wc_reply_reviewers_avg": [ 39.75, 20.216020874544032 ], "wc_reply_authors_avg": [ 130.75, 148.07325045395606 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8862340236586251129&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 0, "email": "ustc.edu.cn;;ustc.edu.cn;ustc.edu.cn;;ustc.edu.cn;ustc.edu.cn;", "author_num": 8, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of Science and Technology of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ustc.edu.cn", "aff_unique_abbr": "USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Rethinking the Evaluation of Out-of-Distribution Detection: A Sorites Paradox", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97509", "id": "oIt0LAkqNb", "proceeding": "", "pdf": "https://openreview.net/pdf?id=oIt0LAkqNb", "openreview": "https://openreview.net/forum?id=oIt0LAkqNb", "poster": "/media/PosterPDFs/NeurIPS%202024/97509.png?t=1731643633.7507029", "project": "", "author_site": "Xingming Long, Jie Zhang, Shiguang Shan, Xilin Chen", "tldr": "", "abstract": "Most existing out-of-distribution (OOD) detection benchmarks classify samples with novel labels as the OOD data. However, some marginal OOD samples actually have close semantic contents to the in-distribution (ID) sample, which makes determining the OOD sample a Sorites Paradox. In this paper, we construct a benchmark named Incremental Shift OOD (IS-OOD) to address the issue, in which we divide the test samples into subsets with different semantic and covariate shift degrees relative to the ID dataset. The data division is achieved through a shift measuring method based on our proposed Language Aligned Image feature Decomposition (LAID). Moreover, we construct a Synthetic Incremental Shift (Syn-IS) dataset that contains high-quality generated images with more diverse covariate contents to complement the IS-OOD benchmark. We evaluate current OOD detection methods on our benchmark and find several important insights: (1) The performance of most OOD detection methods significantly improves as the semantic shift increases; (2) Some methods like GradNorm may have different OOD detection mechanisms as they rely less on semantic shifts to make decisions; (3) Excessive covariate shifts in the image are also likely to be considered as OOD for some methods. Our code and data are released in https://github.com/qqwsad5/IS-OOD.", "keywords": "Out-of-Distribution Detection;Benchmark", "primary_area": "", "supplementary_material": "", "author": "Xingming Long;Jie Zhang;Shiguang Shan;Xilin Chen", "authorids": "~Xingming_Long1;~Jie_Zhang1;~Shiguang_Shan2;~Xilin_Chen1", "gender": "M;M;M;M", "homepage": "https://vipl.ict.ac.cn/edu/student/master/202210/t20221019_123554.html;https://jiezhang010.github.io/;http://vipl.ict.ac.cn/people/sgshan/;http://vipl.ict.ac.cn/people/_xlchen/", "dblp": "47/3598;84/6889-71;s/ShiguangShan;c/XilinChen", "google_scholar": ";https://scholar.google.com.hk/citations?user=hJAhF0sAAAAJ;https://scholar.google.com.tw/citations?user=Vkzd7MIAAAAJ;vVx2v20AAAAJ", "orcid": "0000-0001-7229-9142;0000-0002-8899-3996;0000-0002-8348-392X;0000-0003-3024-4404", "linkedin": ";;;", "or_profile": "~Xingming_Long1;~Jie_Zhang1;~Shiguang_Shan2;~Xilin_Chen4", "aff": "Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology", "aff_domain": "ict.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn", "position": "PhD student;Associate Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nlong2024rethinking,\ntitle={Rethinking the Evaluation of Out-of-Distribution Detection: A Sorites Paradox},\nauthor={Xingming Long and Jie Zhang and Shiguang Shan and Xilin Chen},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=oIt0LAkqNb}\n}", "github": "", "reviewers": "pWf4;XvCJ;xkod", "pdf_size": 1926884, "rating": "5;7;7", "confidence": "5;4;3", "wc_summary_and_contributions": "77;131;474", "wc_strengths": "44;48;565", "wc_improvement": "141;103;350", "wc_limitations": "59;5;564", "wc_correctness": "20;1;706", "wc_clarity": "5;3;1", "wc_relation_to_prior_work": "9;5;1", "wc_documentation": "12;1;1", "wc_additional_feedback": "1;1;1", "wc_review": "368;298;2663", "wc_reply_reviewers": "70;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;0;0", "reply_authors": "2;1;2", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "wc_summary_and_contributions_avg": [ 227.33333333333334, 175.80734405087352 ], "wc_strengths_avg": [ 219.0, 244.66439599309635 ], "wc_improvement_avg": [ 198.0, 108.59404526338756 ], "wc_limitations_avg": [ 209.33333333333334, 251.75428938197302 ], "wc_correctness_avg": [ 242.33333333333334, 327.9535875021884 ], "wc_clarity_avg": [ 3.0, 1.632993161855452 ], "wc_relation_to_prior_work_avg": [ 5.0, 3.265986323710904 ], "wc_documentation_avg": [ 4.666666666666667, 5.185449728701348 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 1109.6666666666667, 1098.7442326988064 ], "wc_reply_reviewers_avg": [ 23.333333333333332, 32.99831645537222 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:qX4Lc4xWj3YJ:scholar.google.com/&scioq=Rethinking+the+Evaluation+of+Out-of-Distribution+Detection:+A+Sorites+Paradox&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "ict.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Chinese Academy of Sciences;Institute of Computing Technology", "aff_unique_dep": "Institute of Computing Technology;", "aff_unique_url": "http://www.ict.ac.cn;http://www.ict.ac.cn", "aff_unique_abbr": "CAS;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "CoMix: A Comprehensive Benchmark for Multi-Task Comic Understanding", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97508", "id": "oJhYtNwGl9", "proceeding": "", "pdf": "https://openreview.net/pdf?id=oJhYtNwGl9", "openreview": "https://openreview.net/forum?id=oJhYtNwGl9", "poster": "/media/PosterPDFs/NeurIPS%202024/97508.png?t=1731528397.2246323", "project": "", "author_site": "Emanuele Vivoli, Marco Bertini, Dimosthenis Karatzas", "tldr": "", "abstract": "The comic domain is rapidly advancing with the development of single-page analysis and synthesis models. However, evaluation metrics and datasets lag behind, often limited to small-scale or single-style test sets. We introduce a novel benchmark, CoMix, designed to evaluate the multi-task capabilities of models in comic analysis. Unlike existing benchmarks that focus on isolated tasks such as object detection or text recognition, CoMix addresses a broader range of tasks including object detection, speaker identification, character re-identification, reading order, and multi-modal reasoning tasks like character naming and dialogue generation. Our benchmark comprises three existing datasets with expanded annotations to support multi-task evaluation. To mitigate the over-representation of manga-style data, we have incorporated a new dataset of carefully selected American comic-style books, thereby enriching the diversity of comic styles. CoMix is designed to assess pre-trained models in zero-shot and limited fine-tuning settings, probing their transfer capabilities across different comic styles and tasks. The validation split of the benchmark is publicly available for research purposes, and an evaluation server for the held-out test split is also provided. Comparative results between human performance and state-of-the-art models reveal a significant performance gap, highlighting substantial opportunities for advancements in comic understanding. The dataset, baseline models, and code are accessible at https://github.com/emanuelevivoli/CoMix-dataset. This initiative sets a new standard for comprehensive comic analysis, providing the community with a common benchmark for evaluation on a large and varied set.", "keywords": "comics;manga;benchmarks;detection;linking;diarization", "primary_area": "", "supplementary_material": "/attachment/37214abe1c132e7a41af88d06cf5c4bb16fa4a3f.pdf", "author": "Emanuele Vivoli;Marco Bertini;Dimosthenis Karatzas", "authorids": "~Emanuele_Vivoli1;~Marco_Bertini1;~Dimosthenis_Karatzas1", "gender": "M;M;M", "homepage": "https://emanuelevivoli.github.io;https://www.micc.unifi.it/bertini;https://www.vlr.ai/", "dblp": "327/3511;70/1173-1;03/6509", "google_scholar": ";https://scholar.google.it/citations?user=SBm9ZpYAAAAJ;xASEtrUAAAAJ", "orcid": ";0000-0002-1364-218X;0000-0001-8762-4454", "linkedin": ";marcobertini/;dimostheniskaratzas/", "or_profile": "~Emanuele_Vivoli1;~Marco_Bertini1;~Dimosthenis_Karatzas1", "aff": "University of Florence;Universit\u00e0 degli Studi di Firenze;Computer Vision Center, Universitat Aut\u00f2noma de Barcelona", "aff_domain": "unifi.it;unifi.it;cvc.uab.es", "position": "PhD student;Associate Professor;Associate Director", "bibtex": "@inproceedings{\nvivoli2024comix,\ntitle={CoMix: A Comprehensive Benchmark for Multi-Task Comic Understanding},\nauthor={Emanuele Vivoli and Marco Bertini and Dimosthenis Karatzas},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=oJhYtNwGl9}\n}", "github": "", "reviewers": "CRQ4;voB9;7Mtd", "pdf_size": 5642576, "rating": "6;6;7", "confidence": "3;4;5", "wc_summary_and_contributions": "35;36;123", "wc_strengths": "15;25;2", "wc_improvement": "28;32;2", "wc_limitations": "1;10;2", "wc_correctness": "1;4;1", "wc_clarity": "7;9;1", "wc_relation_to_prior_work": "6;6;1", "wc_documentation": "1;5;1", "wc_additional_feedback": "1;1;1", "wc_review": "95;128;134", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "2;1;2", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "wc_summary_and_contributions_avg": [ 64.66666666666667, 41.24991582482994 ], "wc_strengths_avg": [ 14.0, 9.41629792788369 ], "wc_improvement_avg": [ 20.666666666666668, 13.299958228840003 ], "wc_limitations_avg": [ 4.333333333333333, 4.0276819911981905 ], "wc_correctness_avg": [ 2.0, 1.4142135623730951 ], "wc_clarity_avg": [ 5.666666666666667, 3.39934634239519 ], "wc_relation_to_prior_work_avg": [ 4.333333333333333, 2.357022603955158 ], "wc_documentation_avg": [ 2.3333333333333335, 1.8856180831641267 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 119.0, 17.146428199482248 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12132787221314821001&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 5, "email": "unifi.it;unifi.it;cvc.uab.es", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Florence;Universitat Aut\u00f2noma de Barcelona", "aff_unique_dep": ";Computer Vision Center", "aff_unique_url": "https://www.unifi.it;https://www.uab.cat", "aff_unique_abbr": "UNIFI;UAB", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Italy;Spain" }, { "title": "AdaPKC: PeakConv with Adaptive Peak Receptive Field for Radar Semantic Segmentation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93633", "id": "oLcPadFrY3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=oLcPadFrY3", "openreview": "https://openreview.net/forum?id=oLcPadFrY3", "poster": "/media/PosterPDFs/NeurIPS%202024/93633.png?t=1730950348.8746943", "project": "", "author_site": "Teng Li, Liwen Zhang, Youcheng Zhang, ZijunHu, Pengcheng Pi, Zongqing Lu, Qingmin Liao, Zhe Ma", "tldr": "", "abstract": "Deep learning-based radar detection technology is receiving increasing attention in areas such as autonomous driving, UAV surveillance, and marine monitoring. Among recent efforts, PeakConv (PKC) provides a solution that can retain the peak response characteristics of radar signals and play the characteristics of deep convolution, thereby improving the effect of radar semantic segmentation (RSS). However, due to the use of a pre-set fixed peak receptive field sampling rule, PKC still has limitations in dealing with problems such as inconsistency of target frequency domain response broadening, non-homogeneous and time-varying characteristic of noise/clutter distribution. Therefore, this paper proposes an idea of adaptive peak receptive field, and upgrades PKC to AdaPKC based on this idea. Beyond that, a novel fine-tuning technology to further boost the performance of AdaPKC-based RSS networks is presented. Through experimental verification using various real-measured radar data (including publicly available low-cost millimeter-wave radar dataset for autonomous driving and self-collected Ku-band surveillance radar dataset), we found that the performance of AdaPKC-based models surpasses other SoTA methods in RSS tasks. The code is available at https://github.com/lihua199710/AdaPKC.", "keywords": "Radar Semantic Segmentation;Adaptive Peak Convolution", "primary_area": "machine_vision", "supplementary_material": "", "author": "Teng Li;Liwen Zhang;Youcheng Zhang;ZijunHu;Pengcheng Pi;Zongqing Lu;Qingmin Liao;Zhe Ma", "authorids": "~Teng_Li7;~Liwen_Zhang5;~Youcheng_Zhang1;~ZijunHu1;~Pengcheng_Pi1;~Zongqing_Lu3;~Qingmin_Liao1;~Zhe_Ma2", "gender": "M;M;F;M;;M;M;M", "homepage": "https://www.researchgate.net/profile/Teng-Li-61;;;https://scholar.google.com/citations?user=eucZucIAAAAJ&hl=zh-CN;;https://www.x-mol.com/university/faculty/61332;https://www.sigs.tsinghua.edu.cn/lqm_en/main.htm;https://dblp.org/pid/22/6672", "dblp": ";94/905;;159/9813.html;;99/965-1.html;13/322;22/6672-1", "google_scholar": ";;;eucZucIAAAAJ;;;;", "orcid": "0009-0004-6668-1950;0000-0001-8457-2943;0000-0001-9762-7966;0009-0009-2510-8898;;0000-0002-1191-9069;0000-0002-7509-3964;", "linkedin": ";;;https://www.linkedin.com/;;;;", "or_profile": "~Teng_Li7;~Liwen_Zhang5;~Youcheng_Zhang1;~ZijunHu1;~Pengcheng_Pi1;~Zongqing_Lu3;~Qingmin_Liao1;~Zhe_Ma2", "aff": "Tsinghua University;Harbin Institute of Technology;Intelligent Science and Technology Academy of CASIC;;;Tsinghua University;Tsinghua University;Intelligent science and technology academy limited of CASIC", "aff_domain": "tsinghua.edu.cn;hit.edu.cn;casic.com.cn;;;tsinghua.edu.cn;tsinghua.edu.cn;casic.com", "position": "PhD student;Researcher;Engineer;;;Assistant Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nli2024adapkc,\ntitle={Ada{PKC}: PeakConv with Adaptive Peak Receptive Field for Radar Semantic Segmentation},\nauthor={Teng Li and Liwen Zhang and Youcheng Zhang and ZijunHu and Pengcheng Pi and Zongqing Lu and Qingmin Liao and Zhe Ma},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=oLcPadFrY3}\n}", "github": "", "reviewers": "qbsg;mTDV;ixjc;i1nF", "pdf_size": 4985425, "rating": "3;5;5;6", "confidence": "5;2;3;5", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;2", "wc_summary": "36;49;59;79", "wc_strengths": "21;63;38;63", "wc_weaknesses": "43;29;107;45", "wc_questions": "34;28;26;180", "wc_limitations": "10;1;13;4", "wc_review": "144;170;243;371", "wc_reply_reviewers": "0;113;0;17", "wc_reply_authors": "186;149;199;98", "reply_reviewers": "0;1;0;1", "reply_authors": "3;3;3;2", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 1.299038105676658 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 55.75, 15.706288549495072 ], "wc_strengths_avg": [ 46.25, 17.795715776557007 ], "wc_weaknesses_avg": [ 56.0, 30.083217912982647 ], "wc_questions_avg": [ 67.0, 65.30696746902278 ], "wc_limitations_avg": [ 7.0, 4.743416490252569 ], "wc_review_avg": [ 232.0, 88.07667114508813 ], "wc_reply_reviewers_avg": [ 32.5, 46.99202059924642 ], "wc_reply_authors_avg": [ 158.0, 39.198214245039274 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.22075539284417398, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:CVvrdiK0RoIJ:scholar.google.com/&scioq=AdaPKC:+PeakConv+with+Adaptive+Peak+Receptive+Field+for+Radar+Semantic+Segmentation&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "tsinghua.edu.cn;hit.edu.cn;casic.com.cn;;;tsinghua.edu.cn;tsinghua.edu.cn;casic.com", "author_num": 8, "aff_unique_index": "0;1;2;0;0;2", "aff_unique_norm": "Tsinghua University;Harbin Institute of Technology;China Aerospace Science and Industry Corporation", "aff_unique_dep": ";;Intelligent Science and Technology Academy", "aff_unique_url": "https://www.tsinghua.edu.cn;http://www.hit.edu.cn/;http://www.casic.com.cn/", "aff_unique_abbr": "THU;HIT;CASIC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Harbin", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Self-Taught Recognizer: Toward Unsupervised Adaptation for Speech Foundation Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93632", "id": "oLoqHRbXYE", "proceeding": "", "pdf": "https://openreview.net/pdf?id=oLoqHRbXYE", "openreview": "https://openreview.net/forum?id=oLoqHRbXYE", "poster": "/media/PosterPDFs/NeurIPS%202024/93632.png?t=1733667450.3972077", "project": "", "author_site": "Yuchen Hu, CHEN CHEN, Chao-Han Yang, Chengwei Qin, Pin-Yu Chen, Eng-Siong Chng, Chao Zhang", "tldr": "", "abstract": "We propose an unsupervised adaptation framework, Self-TAught Recognizer (STAR), which leverages unlabeled data to enhance the robustness of automatic speech recognition (ASR) systems in diverse target domains, such as noise and accents. STAR is developed for prevalent speech foundation models based on Transformer-related architecture with auto-regressive decoding (e.g., Whisper, Canary). Specifically, we propose a novel indicator that empirically integrates step-wise information during decoding to assess the token-level quality of pseudo labels without ground truth, thereby guiding model updates for effective unsupervised adaptation. Experimental results show that STAR achieves an average of 13.5% relative reduction in word error rate across 14 target domains, and it sometimes even approaches the upper-bound performance of supervised adaptation. Surprisingly, we also observe that STAR prevents the adapted model from the common catastrophic forgetting problem without recalling source-domain data. Furthermore, STAR exhibits high data efficiency that only requires less than one-hour unlabeled data, and seamless generality to alternative large speech models and speech translation tasks. Our code aims to open source to the research communities.", "keywords": "Automatic speech recognition;speech foundation models;unsupervised domain adaptation", "primary_area": "speech_and_audio", "supplementary_material": "/attachment/0a9dc027cc5eabb8df5e7b0737398f9b68de699a.zip", "author": "Yuchen Hu;Chen Chen;Chao-Han Huck Yang;Chengwei Qin;Pin-Yu Chen;EngSiong Chng;Chao Zhang", "authorids": "~Yuchen_Hu1;~Chen_Chen56;~Chao-Han_Huck_Yang1;~Chengwei_Qin1;~Pin-Yu_Chen1;~EngSiong_Chng1;~Chao_Zhang20", "gender": "M;M;M;M;M;M;M", "homepage": "https://yuchen005.github.io/;https://huckiyang.github.io/;;http://www.pinyuchen.com;https://personal.ntu.edu.sg/aseschng/intro1.html;http://mi.eng.cam.ac.uk/~cz277/;", "dblp": ";230/4012;195/2732;39/8969;c/ChngEngSiong;94/3019-31.html;65/4423", "google_scholar": "Neo-1mIAAAAJ;TT3XJW8AAAAJ;;jxwlCUUAAAAJ;https://scholar.google.com.tw/citations?user=FJodrCcAAAAJ;https://scholar.google.co.uk/citations?view_op=list_works;uUmSp1QAAAAJ", "orcid": ";0000-0003-2879-8811;;0000-0003-1039-8369;;;", "linkedin": ";;chengwei-qin-3401a1107/;pin-yu-chen-940062a2;;;", "or_profile": "~Yuchen_Hu1;~Chao-Han_Huck_Yang1;~Chengwei_Qin1;~Pin-Yu_Chen1;~EngSiong_Chng1;~Chao_Zhang20;~CHEN_CHEN37", "aff": "Nanyang Technological University;NVIDIA Research;Nanyang Technological University;International Business Machines;Nanyang Technological University;University College London;Nanyang Technological University", "aff_domain": "ntu.edu.sg;nvidia.com;ntu.edu.sg;ibm.com;ntu.edu.sg;ucl.ac.uk;ntu.edu", "position": "PhD student;Researcher;PhD student;Principal Researcher;Associate Professor;Associate Professor;PhD student", "bibtex": "@inproceedings{\nhu2024selftaught,\ntitle={Self-Taught Recognizer: Toward Unsupervised Adaptation for Speech Foundation Models},\nauthor={Yuchen Hu and Chen Chen and Chao-Han Huck Yang and Chengwei Qin and Pin-Yu Chen and EngSiong Chng and Chao Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=oLoqHRbXYE}\n}", "github": "", "reviewers": "8Y8C;zXq9;LswZ;vnSx", "pdf_size": 7480768, "rating": "6;6;6;7", "confidence": "5;5;4;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;4", "wc_summary": "90;130;135;98", "wc_strengths": "28;115;50;66", "wc_weaknesses": "141;218;265;88", "wc_questions": "84;64;9;142", "wc_limitations": "22;63;1;7", "wc_review": "365;590;460;401", "wc_reply_reviewers": "27;25;30;23", "wc_reply_authors": "29;29;85;29", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 113.25, 19.536824204563032 ], "wc_strengths_avg": [ 64.75, 31.995116814914116 ], "wc_weaknesses_avg": [ 178.0, 68.26053032316699 ], "wc_questions_avg": [ 74.75, 47.557202398795496 ], "wc_limitations_avg": [ 23.25, 24.190649019817553 ], "wc_review_avg": [ 454.0, 85.53069624409707 ], "wc_reply_reviewers_avg": [ 26.25, 2.5860201081971503 ], "wc_reply_authors_avg": [ 43.0, 24.24871130596428 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=252158630870004293&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "ntu.edu.sg;nvidia.com;ntu.edu.sg;ibm.com;ntu.edu.sg;ucl.ac.uk;ntu.edu", "author_num": 7, "aff_unique_index": "0;1;0;2;0;3;0", "aff_unique_norm": "Nanyang Technological University;NVIDIA;International Business Machines Corporation;University College London", "aff_unique_dep": ";NVIDIA Research;;", "aff_unique_url": "https://www.ntu.edu.sg;https://www.nvidia.com/research;https://www.ibm.com;https://www.ucl.ac.uk", "aff_unique_abbr": "NTU;NVIDIA;IBM;UCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1;0;2;0", "aff_country_unique": "Singapore;United States;United Kingdom" }, { "title": "Prompt-Agnostic Adversarial Perturbation for Customized Diffusion Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93631", "id": "oMHpejyGdx", "proceeding": "", "pdf": "https://openreview.net/pdf?id=oMHpejyGdx", "openreview": "https://openreview.net/forum?id=oMHpejyGdx", "poster": "/media/PosterPDFs/NeurIPS%202024/93631.png?t=1731573052.371355", "project": "", "author_site": "Cong Wan, Yuhang He, Xiang Song, Yihong Gong", "tldr": "", "abstract": "Diffusion models have revolutionized customized text-to-image generation, allowing for efficient synthesis of photos from personal data with textual descriptions. However, these advancements bring forth risks including privacy breaches and unauthorized replication of artworks. Previous researches primarily center around using \u201cprompt-specific methods\u201d to generate adversarial examples to protect personal images, yet the effectiveness of existing methods is hindered by constrained adaptability to different prompts.\nIn this paper, we introduce a Prompt-Agnostic Adversarial Perturbation (PAP) method for customized diffusion models. PAP first models the prompt distribution using a Laplace Approximation, and then produces prompt-agnostic perturbations by maximizing a disturbance expectation based on the modeled distribution.\nThis approach effectively tackles the prompt-agnostic attacks, leading to improved defense stability.\nExtensive experiments in face privacy and artistic style protection, demonstrate the superior generalization of our method in comparison to existing techniques.", "keywords": "Adversarial perturbations;customized diffusion models;privacy protection;prompt distribution modeling", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/cb14949ddc54aee0cc2058599c7d1229c3815aff.zip", "author": "Cong Wan;Yuhang He;Xiang Song;Yihong Gong", "authorids": "~Cong_Wan1;~Yuhang_He2;~Xiang_Song2;~Yihong_Gong1", "gender": "M;M;M;M", "homepage": "https://github.com/vancyland;https://gehenhe.github.io/;https://github.com/SONGX1997;http://gr.xjtu.edu.cn/web/ygong", "dblp": "131/9875;;71/6574-5;62/6520", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.hk/citations?user=9VCIiVcAAAAJ;https://scholar.google.com.hk/citations?user=DnNdGckAAAAJ;", "orcid": "0009-0007-2107-0007;0000-0002-6750-1403;0000-0003-1740-9104;", "linkedin": ";;;", "or_profile": "~Cong_Wan1;~Yuhang_He2;~Xiang_Song2;~Yihong_Gong1", "aff": "Xi'an Jiaotong University;Xi'an Jiaotong University;Xi'an Jiaotong University;Xi'an Jiaotong University", "aff_domain": "xjtu.edu;xjtu.edu;xjtu.edu.cn;xjtu.edu.cn", "position": "MS student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nwan2024promptagnostic,\ntitle={Prompt-Agnostic Adversarial Perturbation for Customized Diffusion Models},\nauthor={Cong Wan and Yuhang He and Xiang Song and Yihong Gong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=oMHpejyGdx}\n}", "github": "", "reviewers": "Q3Lh;JLDf;JA4c;nNzN", "pdf_size": 6666346, "rating": "5;5;6;7", "confidence": "4;4;4;3", "soundness": "3;2;3;3", "novelty": "3;2;2;3", "presentation": "4;3;2;3", "wc_summary": "102;192;41;50", "wc_strengths": "37;71;8;37", "wc_weaknesses": "99;150;18;2", "wc_questions": "3;122;126;59", "wc_limitations": "1;70;1;8", "wc_review": "242;605;194;156", "wc_reply_reviewers": "0;340;17;22", "wc_reply_authors": "0;688;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;3;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 96.25, 59.98489393172251 ], "wc_strengths_avg": [ 38.25, 22.30891077574161 ], "wc_weaknesses_avg": [ 67.25, 60.28836952514141 ], "wc_questions_avg": [ 77.5, 50.55937104039171 ], "wc_limitations_avg": [ 20.0, 29.008619408720573 ], "wc_review_avg": [ 299.25, 179.13594697882388 ], "wc_reply_reviewers_avg": [ 94.75, 141.82978354351388 ], "wc_reply_authors_avg": [ 172.0, 297.9127389018469 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15012896035700905784&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 5, "email": "xjtu.edu;xjtu.edu;xjtu.edu.cn;xjtu.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Xi'an Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.xjtu.edu.cn", "aff_unique_abbr": "XJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "A Label is Worth A Thousand Images in Dataset Distillation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93630", "id": "oNMnR0NJ2e", "proceeding": "", "pdf": "https://openreview.net/pdf?id=oNMnR0NJ2e", "openreview": "https://openreview.net/forum?id=oNMnR0NJ2e", "poster": "/media/PosterPDFs/NeurIPS%202024/93630.png?t=1733522595.7444375", "project": "", "author_site": "Tian Qin, Zhiwei Deng, David Alvarez-Melis", "tldr": "", "abstract": "Data *quality* is a crucial factor in the performance of machine learning models, a principle that dataset distillation methods exploit by compressing training datasets into much smaller counterparts that maintain similar downstream performance. Understanding how and why data distillation methods work is vital not only for improving these methods but also for revealing fundamental characteristics of \"good\u201d training data. However, a major challenge in achieving this goal is the observation that distillation approaches, which rely on sophisticated but mostly disparate methods to generate synthetic data, have little in common with each other. In this work, we highlight a largely overlooked aspect common to most of these methods: the use of soft (probabilistic) labels. Through a series of ablation experiments, we study the role of soft labels in depth. Our results reveal that the main factor explaining the performance of state-of-the-art distillation methods is not the specific techniques used to generate synthetic data but rather the use of soft labels. Furthermore, we demonstrate that not all soft labels are created equal; they must contain *structured information* to be beneficial. We also provide empirical scaling laws that characterize the effectiveness of soft labels as a function of images-per-class in the distilled dataset and establish an empirical Pareto frontier for data-efficient learning. Combined, our findings challenge conventional wisdom in dataset distillation, underscore the importance of soft labels in learning, and suggest new directions for improving distillation methods. Code for all experiments is available at https://github.com/sunnytqin/no-distillation.", "keywords": "Dataset distillation;Data condensation;Synthetic data generation;Data-efficient learning", "primary_area": "machine_vision", "supplementary_material": "/attachment/f0c912e0794b86294477c96a2e59d41adccaa847.zip", "author": "Tian Qin;Zhiwei Deng;David Alvarez-Melis", "authorids": "~Tian_Qin3;~Zhiwei_Deng3;~David_Alvarez-Melis1", "gender": "F;M;M", "homepage": "https://sunnytqin.github.io/;http://www.zhiweideng.com;https://dmelis.github.io/", "dblp": ";160/3578;168/8255", "google_scholar": ";tWBPUHwAAAAJ;XsxZrYYAAAAJ", "orcid": ";;0000-0002-9591-8986", "linkedin": "sunny-qin-b70567203/;;", "or_profile": "~Tian_Qin3;~Zhiwei_Deng3;~David_Alvarez-Melis1", "aff": "Harvard University, Harvard University;Google Deepmind;Microsoft", "aff_domain": "g.harvard.edu;google.com;microsoft.com", "position": "PhD student;Research Scientist;Senior Researcher", "bibtex": "@inproceedings{\nqin2024a,\ntitle={A Label is Worth A Thousand Images in Dataset Distillation},\nauthor={Tian Qin and Zhiwei Deng and David Alvarez-Melis},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=oNMnR0NJ2e}\n}", "github": "", "reviewers": "UKPz;hrBt;pAYe;Zv9o", "pdf_size": 12602843, "rating": "5;6;6;7", "confidence": "4;4;4;4", "soundness": "3;3;3;3", "novelty": "3;4;3;4", "presentation": "2;4;3;3", "wc_summary": "31;77;76;62", "wc_strengths": "45;83;49;47", "wc_weaknesses": "62;128;102;202", "wc_questions": "70;4;98;2", "wc_limitations": "70;1;1;5", "wc_review": "278;293;326;318", "wc_reply_reviewers": "0;26;0;27", "wc_reply_authors": "264;0;0;0", "reply_reviewers": "0;1;0;1", "reply_authors": "3;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 61.5, 18.580904176062045 ], "wc_strengths_avg": [ 56.0, 15.652475842498529 ], "wc_weaknesses_avg": [ 123.5, 51.0563414278775 ], "wc_questions_avg": [ 43.5, 41.69832130913665 ], "wc_limitations_avg": [ 19.25, 29.345996319770776 ], "wc_review_avg": [ 303.75, 19.2142525225417 ], "wc_reply_reviewers_avg": [ 13.25, 13.254716141811564 ], "wc_reply_authors_avg": [ 66.0, 114.3153532995459 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4612234211112010820&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "g.harvard.edu;google.com;microsoft.com", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Harvard University;DeepMind;Microsoft", "aff_unique_dep": ";DeepMind;Microsoft Corporation", "aff_unique_url": "https://www.harvard.edu;https://deepmind.com;https://www.microsoft.com", "aff_unique_abbr": "Harvard;DeepMind;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;United Kingdom" }, { "id": "oOiXomyexS", "title": "A Benchmark on Directed Graph Representation Learning in Hardware Designs", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "To keep pace with the rapid advancements in design complexity within modern computing systems, directed graph representation learning (DGRL) has become crucial, particularly for encoding circuit netlists, computational graphs, and developing surrogate models for hardware performance prediction. However, DGRL remains relatively unexplored, especially in the hardware domain, mainly due to the lack of comprehensive and user-friendly benchmarks. This study presents a novel benchmark comprising five hardware design datasets and 13 prediction tasks spanning various levels of circuit abstraction. We evaluate 21 DGRL models, employing diverse graph neural networks and graph transformers (GTs) as backbones, enhanced by positional encodings (PEs) tailored for directed graphs. Our results highlight that bidirected (BI) message passing neural networks (MPNNs) and robust PEs significantly enhance model performance. Notably, the top-performing models include PE-enhanced GTs interleaved with BI-MPNN layers and BI-Graph Isomorphism Network, both surpassing baselines across the 13 tasks. Additionally, our investigation into out-of-distribution (OOD) performance emphasizes the urgent need to improve OOD generalization in DGRL models. This benchmark, implemented with a modular codebase, streamlines the evaluation of DGRL models for both hardware and ML practitioners.", "keywords": "hardware design;electronic design automation;directed graph;graph neural network;positional encoding;graph transformer", "primary_area": "", "supplementary_material": "/attachment/af954a234addc4663ddf7da03e8584a99af9e2d8.zip", "author": "Haoyu Peter Wang;Yinan Huang;Nan Wu;Pan Li", "authorids": "~Haoyu_Peter_Wang1;~Yinan_Huang1;~Nan_Wu2;~Pan_Li2", "gender": "M;;F;", "homepage": ";;;", "dblp": ";288/1207;;https://dblp.org/pers/hd/l/Li_0005:Pan", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=zh-CN;;IroP0EwAAAAJ", "orcid": ";;0000-0001-8291-4292;", "linkedin": ";;;pan-li-b951105a/", "or_profile": "~Haoyu_Peter_Wang1;~Yinan_Huang1;~Nan_Wu2;~Pan_Li2", "aff": "Georgia Institute of Technology;Georgia Institute of Technology;George Washington University;Purdue University", "aff_domain": "gatech.edu;gatech.edu;gwu.edu;purdue.edu", "position": "PhD student;PhD student;Assistant Professor;Assistant Professor", "bibtex": "@misc{\nanonymous2024a,\ntitle={A Benchmark on Directed Graph Representation Learning in Hardware Designs},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=oOiXomyexS}\n}", "github": "", "project": "", "reviewers": "sDXw;UpZa;7fAD;JZ3c", "site": "https://openreview.net/forum?id=oOiXomyexS", "pdf_size": 608788, "rating": "5;7;7;8", "confidence": "5;4;4;3", "wc_summary_and_contributions": "68;94;50;63", "wc_strengths": "80;61;114;44", "wc_improvement": "6;1;2;31", "wc_limitations": "231;16;95;8", "wc_correctness": "18;17;11;8", "wc_clarity": "7;13;16;12", "wc_relation_to_prior_work": "1;38;10;26", "wc_documentation": "1;37;1;14", "wc_additional_feedback": "1;1;1;1", "wc_review": "413;278;300;207", "wc_reply_reviewers": "121;9;0;0", "wc_reply_authors": "207;0;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "2;1;1;1", "rating_avg": [ 6.75, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 68.75, 15.990231392947383 ], "wc_strengths_avg": [ 74.75, 25.9939896899264 ], "wc_improvement_avg": [ 10.0, 12.267844146385297 ], "wc_limitations_avg": [ 87.5, 89.55584849690165 ], "wc_correctness_avg": [ 13.5, 4.153311931459037 ], "wc_clarity_avg": [ 12.0, 3.24037034920393 ], "wc_relation_to_prior_work_avg": [ 18.75, 14.271912976192084 ], "wc_documentation_avg": [ 13.25, 14.703315952532613 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 299.5, 73.99493225890541 ], "wc_reply_reviewers_avg": [ 32.5, 51.227434056372566 ], "wc_reply_authors_avg": [ 51.75, 89.6336292916894 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.9733285267845754, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:_SYh6-4E6aUJ:scholar.google.com/&scioq=A+Benchmark+on+Directed+Graph+Representation+Learning+in+Hardware+Designs&hl=en&as_sdt=0,48", "gs_version_total": 3, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Georgia Institute of Technology;George Washington University;Purdue University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.gatech.edu;https://www.gwu.edu;https://www.purdue.edu", "aff_unique_abbr": "Georgia Tech;GWU;Purdue", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Enhancing Efficiency of Safe Reinforcement Learning via Sample Manipulation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93629", "id": "oPFjhl6DpR", "proceeding": "", "pdf": "https://openreview.net/pdf?id=oPFjhl6DpR", "openreview": "https://openreview.net/forum?id=oPFjhl6DpR", "poster": "", "project": "", "author_site": "Shangding Gu, Laixi Shi, Yuhao Ding, Alois Knoll, Costas J Spanos, Adam Wierman, Ming Jin", "tldr": "", "abstract": "Safe reinforcement learning (RL) is crucial for deploying RL agents in real-world applications, as it aims to maximize long-term rewards while satisfying safety constraints. However, safe RL often suffers from sample inefficiency, requiring extensive interactions with the environment to learn a safe policy. We propose Efficient Safe Policy Optimization (ESPO), a novel approach that enhances the efficiency of safe RL through sample manipulation. ESPO employs an optimization framework with three modes: maximizing rewards, minimizing costs, and balancing the trade-off between the two. By dynamically adjusting the sampling process based on the observed conflict between reward and safety gradients, ESPO theoretically guarantees convergence, optimization stability, and improved sample complexity bounds. Experiments on the Safety-MuJoCo and Omnisafe benchmarks demonstrate that ESPO significantly outperforms existing primal-based and primal-dual-based baselines in terms of reward maximization and constraint satisfaction. Moreover, ESPO achieves substantial gains in sample efficiency, requiring 25--29\\% fewer samples than baselines, and reduces training time by 21--38\\%.", "keywords": "Safe Reinforcement Learning;Sample Manipulation;Efficient Learning", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Shangding Gu;Laixi Shi;Yuhao Ding;Alois Knoll;Costas Spanos;Adam Wierman;Ming Jin", "authorids": "~Shangding_Gu1;~Laixi_Shi1;~Yuhao_Ding2;~Alois_Knoll1;~Costas_Spanos1;~Adam_Wierman1;~Ming_Jin2", "gender": "M;F;M;M;;M;M", "homepage": "https://people.eecs.berkeley.edu/~shangding.gu/index.html;https://laixishi.github.io/;https://yuhaod.github.io/homepage/;https://www.in.tum.de/i06/people/prof-dr-ing-habil-alois-knoll/;https://www2.eecs.berkeley.edu/Faculty/Homepages/spanos.html;https://adamwierman.com/;http://www.jinming.tech/", "dblp": "268/8183;211/7965;218/2837;k/AloisKnoll;;56/4447;", "google_scholar": "E1GCDXUAAAAJ;V8RkRr8AAAAJ;Q65PtLgAAAAJ;https://scholar.google.de/citations?user=-CA8QgwAAAAJ;;4OvOdSgAAAAJ;YdxdTtkAAAAJ", "orcid": ";;;0000-0003-4840-076X;;0000-0002-5923-0199;", "linkedin": ";;;alois-knoll-505480166;;adam-wierman-a529474/;", "or_profile": "~Shangding_Gu1;~Laixi_Shi1;~Yuhao_Ding2;~Alois_Knoll1;~Costas_Spanos1;~Adam_Wierman1;~Ming_Jin2", "aff": "University of California, Berkeley;California Institute of Technology;Cubist;Technical University Munich;University of California, Berkeley;California Institute of Technology;Virginia Tech", "aff_domain": "berkeley.edu;caltech.edu;cubistsystematic.com;tum.de;berkeley.edu;caltech.edu;vt.edu", "position": "Postdoc;Postdoc;Researcher;Full Professor;Emeritus;Professor;Assistant Professor", "bibtex": "@inproceedings{\ngu2024enhancing,\ntitle={Enhancing Efficiency of Safe Reinforcement Learning via Sample Manipulation},\nauthor={Shangding Gu and Laixi Shi and Yuhao Ding and Alois Knoll and Costas Spanos and Adam Wierman and Ming Jin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=oPFjhl6DpR}\n}", "github": "", "reviewers": "qrLu;by8k;oXwv;qn1v", "pdf_size": 1784442, "rating": "5;5;6;6", "confidence": "3;4;3;4", "soundness": "3;2;3;3", "novelty": "2;2;2;3", "presentation": "2;3;3;3", "wc_summary": "82;69;59;121", "wc_strengths": "56;57;64;94", "wc_weaknesses": "107;42;21;93", "wc_questions": "243;50;1;35", "wc_limitations": "7;65;4;5", "wc_review": "495;283;149;348", "wc_reply_reviewers": "123;0;109;40", "wc_reply_authors": "651;259;588;47", "reply_reviewers": "2;0;2;1", "reply_authors": "4;2;4;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 82.75, 23.54118731075389 ], "wc_strengths_avg": [ 67.75, 15.465687828221544 ], "wc_weaknesses_avg": [ 65.75, 35.39332564199075 ], "wc_questions_avg": [ 82.25, 94.49173244257933 ], "wc_limitations_avg": [ 20.25, 25.85899263312475 ], "wc_review_avg": [ 318.75, 124.51179662987761 ], "wc_reply_reviewers_avg": [ 68.0, 50.284192347098504 ], "wc_reply_authors_avg": [ 386.25, 246.00749480452825 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 3.0, 1.0 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=727083994924844738&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 5, "email": "berkeley.edu;caltech.edu;cubistsystematic.com;tum.de;berkeley.edu;caltech.edu;vt.edu", "author_num": 7, "aff_unique_index": "0;1;2;3;0;1;4", "aff_unique_norm": "University of California, Berkeley;California Institute of Technology;Cubist;Technical University of Munich;Virginia Tech", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.berkeley.edu;https://www.caltech.edu;;https://www.tum.de;https://www.vt.edu", "aff_unique_abbr": "UC Berkeley;Caltech;;TUM;VT", "aff_campus_unique_index": "0;1;0;1", "aff_campus_unique": "Berkeley;Pasadena;", "aff_country_unique_index": "0;0;2;0;0;0", "aff_country_unique": "United States;;Germany" }, { "title": "Referencing Where to Focus: Improving Visual Grounding with Referential Query", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93628", "id": "oPvBnPTbQv", "proceeding": "", "pdf": "https://openreview.net/pdf?id=oPvBnPTbQv", "openreview": "https://openreview.net/forum?id=oPvBnPTbQv", "poster": "/media/PosterPDFs/NeurIPS%202024/93628.png?t=1731662056.0897477", "project": "", "author_site": "Yabing Wang, Zhuotao Tian, Qingpei Guo, Zheng Qin, Sanping Zhou, Ming Yang, Le Wang", "tldr": "", "abstract": "Visual Grounding aims to localize the referring object in an image given a natural language expression. Recent advancements in DETR-based visual grounding methods have attracted considerable attention, as they directly predict the coordinates of the target object without relying on additional efforts, such as pre-generated proposal candidates or pre-defined anchor boxes. However, existing research primarily focuses on designing stronger multi-modal decoder, which typically generates learnable queries by random initialization or by using linguistic embeddings. This vanilla query generation approach inevitably increases the learning difficulty for the model, as it does not involve any target-related information at the beginning of decoding. Furthermore, they only use the deepest image feature during the query learning process, overlooking the importance of features from other levels. To address these issues, we propose a novel approach, called RefFormer. It consists of the query adaption module that can be seamlessly integrated into CLIP and generate the referential query to provide the prior context for decoder, along with a task-specific decoder. By incorporating the referential query into the decoder, we can effectively mitigate the learning difficulty of the decoder, and accurately concentrate on the target object. Additionally, our proposed query adaption module can also act as an adapter, preserving the rich knowledge within CLIP without the need to tune the parameters of the backbone network. Extensive experiments demonstrate the effectiveness and efficiency of our proposed method, outperforming state-of-the-art approaches on five visual grounding benchmarks.", "keywords": "Visual Grounding", "primary_area": "other", "supplementary_material": "/attachment/ad03a61584bbd5986a51e2d2808f584c9bcfb1ed.zip", "author": "Yabing Wang;Zhuotao Tian;Qingpei Guo;Zheng Qin;Sanping Zhou;Ming Yang;Le Wang", "authorids": "~Yabing_Wang1;~Zhuotao_Tian1;~Qingpei_Guo1;~Zheng_Qin3;~Sanping_Zhou1;~Ming_Yang2;~Le_Wang8", "gender": "F;M;M;M;M;M;M", "homepage": ";https://scholar.google.com/citations?user=mEjhz-IAAAAJ&hl=zh-CN;;;http://gr.xjtu.edu.cn/web/spzhou;http://users.ece.northwestern.edu/~mya671/;http://gr.xjtu.edu.cn/web/lewang", "dblp": ";243/7181;164/5991;;179/0508;98/2604-7;79/652-3", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;mEjhz-IAAAAJ;https://scholar.google.com/citations?hl=zh-CN;;2Drvv44AAAAJ;uBHJx08AAAAJ;RypRCUQAAAAJ", "orcid": ";;;;;0000-0003-1691-6817;", "linkedin": ";;;https://www.linkedin.cn/incareer/in/ACoAAD7pBK4BVPuVah75zLbU85V5hgKGyMPo0tM;;ming-yang-29ba294/;", "or_profile": "~Yabing_Wang1;~Zhuotao_Tian1;~Qingpei_Guo1;~Zheng_Qin3;~Sanping_Zhou1;~Ming_Yang2;~Le_Wang8", "aff": "Xi'an Jiaotong University;SmartMore;Ant Group;Alibaba Group;Xi'an Jiaotong University;Ant Group;Xi'an Jiaotong University", "aff_domain": "xjtu.edu.cn;smartmore.com;antgroup.com;antgroup.com;xjtu.edu;antgroup.com;xjtu.edu.cn", "position": "PhD student;Researcher;Researcher;Intern;Associate Professor;Researcher;Full Professor", "bibtex": "@inproceedings{\nwang2024referencing,\ntitle={Referencing Where to Focus: Improving Visual Grounding with Referential Query},\nauthor={Yabing Wang and Zhuotao Tian and Qingpei Guo and Zheng Qin and Sanping Zhou and Ming Yang and Le Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=oPvBnPTbQv}\n}", "github": "", "reviewers": "DDuF;cZhf;VGeh;RwV9", "pdf_size": 3834690, "rating": "3;6;7;7", "confidence": "5;4;5;5", "soundness": "3;2;3;4", "novelty": "3;2;4;4", "presentation": "3;4;4;3", "wc_summary": "51;74;94;98", "wc_strengths": "35;57;141;89", "wc_weaknesses": "48;174;70;113", "wc_questions": "13;97;18;40", "wc_limitations": "4;21;61;4", "wc_review": "151;423;384;344", "wc_reply_reviewers": "20;30;60;56", "wc_reply_authors": "315;0;0;47", "reply_reviewers": "1;1;1;1", "reply_authors": "3;1;1;2", "rating_avg": [ 5.75, 1.6393596310755 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 79.25, 18.673175948402566 ], "wc_strengths_avg": [ 80.5, 39.859126934743564 ], "wc_weaknesses_avg": [ 101.25, 48.06961098240759 ], "wc_questions_avg": [ 42.0, 33.3391661563393 ], "wc_limitations_avg": [ 22.5, 23.286262044390035 ], "wc_review_avg": [ 325.5, 104.54783594125705 ], "wc_reply_reviewers_avg": [ 41.5, 16.9336942218761 ], "wc_reply_authors_avg": [ 90.5, 131.02766883372382 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.08804509063256237, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7535393315567535385&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "xjtu.edu.cn;smartmore.com;antgroup.com;antgroup.com;xjtu.edu;antgroup.com;xjtu.edu.cn", "author_num": 7, "aff_unique_index": "0;1;2;3;0;2;0", "aff_unique_norm": "Xi'an Jiao Tong University;SmartMore;Ant Group;Alibaba Group", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.xjtu.edu.cn;;https://www.antgroup.com;https://www.alibaba.com", "aff_unique_abbr": "XJTU;;Ant Group;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China;" }, { "title": "Penalty-based Methods for Simple Bilevel Optimization under H\u00f6lderian Error Bounds", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93627", "id": "oQ1Zj9iH88", "proceeding": "", "pdf": "https://openreview.net/pdf?id=oQ1Zj9iH88", "openreview": "https://openreview.net/forum?id=oQ1Zj9iH88", "poster": "/media/PosterPDFs/NeurIPS%202024/93627.png?t=1731512130.5783417", "project": "", "author_site": "Pengyu Chen, Xu Shi, Rujun Jiang, Jiulin Wang", "tldr": "", "abstract": "This paper investigates simple bilevel optimization problems where we minimize a convex upper-level objective over the optimal solution set of a convex lower-level objective. Existing methods for such problems either only guarantee asymptotic convergence, have slow sublinear rates, or require strong assumptions. To address these challenges, we propose a penalization framework that delineates the relationship between approximate solutions of the original problem and its reformulated counterparts. This framework accommodates varying assumptions regarding smoothness and convexity, enabling the application of specific methods with different complexity results. \nSpecifically, when both upper- and lower-level objectives are composite convex functions, under an $\\alpha$-H\u00f6lderian error bound condition and certain mild assumptions, our algorithm attains an $(\\epsilon,\\epsilon^{\\beta})$-optimal solution of the original problem for any $\\beta> 0$ within $\\mathcal{O}\\left(\\sqrt{{1}/{\\epsilon^{\\max\\\\{\\alpha,\\beta\\\\}}}}\\right)$ iterations. The result can be improved further if the smooth part of the upper-level objective is strongly convex. We also establish complexity results when the upper- and lower-level objectives are general nonsmooth functions. Numerical experiments demonstrate the effectiveness of our algorithms.", "keywords": "Simple Bilevel Optimization;H\u00f6lderian Error Bound;Penalization;Complexity", "primary_area": "optimization", "supplementary_material": "/attachment/b7cbcd6fd5b4c5860fd7cf08570e56d2e1be4b9f.zip", "author": "Pengyu Chen;Xu Shi;Rujun Jiang;Jiulin Wang", "authorids": "~Pengyu_Chen3;~Xu_Shi1;~Rujun_Jiang1;~Jiulin_Wang1", "gender": "F;M;M;M", "homepage": "https://github.com/BrittanyChan0108;https://github.com/XuShi22;https://rjjiang.github.io/;", "dblp": ";140/3901;187/7827;208/9839", "google_scholar": ";;UxH6ELwAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";0009-0005-3513-2040;0000-0002-6610-6778;", "linkedin": ";;;", "or_profile": "~Pengyu_Chen3;~Xu_Shi1;~Rujun_Jiang1;~Jiulin_Wang1", "aff": "Fudan University;Fudan University;Fudan University;Fudan University", "aff_domain": "fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn", "position": "MS student;PhD student;Associate Professor;Postdoc", "bibtex": "@inproceedings{\nchen2024penaltybased,\ntitle={Penalty-based Methods for Simple Bilevel Optimization under H\\\"olderian Error Bounds},\nauthor={Pengyu Chen and Xu Shi and Rujun Jiang and Jiulin Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=oQ1Zj9iH88}\n}", "github": "", "reviewers": "fmaf;EYqV;j2Bu;WQ6y", "pdf_size": 2669184, "rating": "5;5;7;7", "confidence": "3;3;3;3", "soundness": "4;3;4;3", "novelty": "3;2;3;2", "presentation": "4;3;3;3", "wc_summary": "103;63;45;86", "wc_strengths": "98;36;60;26", "wc_weaknesses": "118;153;314;243", "wc_questions": "46;58;137;289", "wc_limitations": "4;4;14;2", "wc_review": "369;314;570;646", "wc_reply_reviewers": "17;0;48;42", "wc_reply_authors": "320;383;225;298", "reply_reviewers": "1;0;1;1", "reply_authors": "3;2;3;3", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 74.25, 22.060994991160303 ], "wc_strengths_avg": [ 55.0, 27.730849247724095 ], "wc_weaknesses_avg": [ 207.0, 76.78215938614908 ], "wc_questions_avg": [ 132.5, 96.8826609874027 ], "wc_limitations_avg": [ 6.0, 4.69041575982343 ], "wc_review_avg": [ 474.75, 137.31601326866434 ], "wc_reply_reviewers_avg": [ 26.75, 19.330998422223306 ], "wc_reply_authors_avg": [ 306.5, 56.45573487255303 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=199536305651226193&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Fudan University", "aff_unique_dep": "", "aff_unique_url": "https://www.fudan.edu.cn", "aff_unique_abbr": "Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "oQc7TOCk5G", "title": "On Theoretical Limits of Learning with Label Differential Privacy", "track": "main", "status": "Reject", "tldr": "", "abstract": "Label differential privacy (DP) is designed for learning problems with private labels and public features. Although various methods have been proposed for learning under label DP, the theoretical limits remain unknown. The main challenge is to take infimum over all possible learners with arbitrary model complexity. In this paper, we investigate the fundamental limits of learning with label DP under both central and local models. To overcome the challenge above, we derive new lower bounds on testing errors that are adaptive to the model complexity. Our analyses indicate that $\\epsilon$-local label DP only enlarges the sample complexity with respect to $\\epsilon$, without affecting the convergence rate over the sample size $N$, except the case with heavy-tailed label. Under the central model, the performance loss due to the privacy mechanism is further weakened, such that the additional sample complexity becomes negligible. Overall, our analysis validates the promise of learning under the label DP from a theoretical perspective and shows that the learning performance can be significantly improved by weakening the DP definition to only labels.", "keywords": "label differential privacy", "primary_area": "learning_theory", "supplementary_material": "", "author": "Puning Zhao;Huiwen Wu;Jiafei Wu;Zhe Liu", "authorids": "~Puning_Zhao1;~Huiwen_Wu2;~Jiafei_Wu1;~Zhe_Liu11", "gender": "M;F;M;M", "homepage": "https://scst.sysu.edu.cn/members/members01/1417942.htm;;https://www.eee.hku.hk/~wujiafei/;", "dblp": "216/2680;90/3516;227/7227;70/1220-1.html", "google_scholar": "1jc7kasAAAAJ;EYswXa8AAAAJ;;https://scholar.google.com/citations?hl=en", "orcid": "0009-0002-3264-3417;0000-0001-8471-4219;;0000-0001-8578-2635", "linkedin": ";;;", "or_profile": "~Puning_Zhao1;~Huiwen_Wu2;~Jiafei_Wu1;~Zhe_Liu11", "aff": "Zhejiang Lab;Zhejiang Lab;Zhejiang Lab;Zhejiang Lab", "aff_domain": "zhejianglab.com;zhejianglab.edu.cn;zhejianglab.com;zhejianglab.com", "position": "Researcher;Assistant Professor;Principal Researcher;Full Professor", "bibtex": "@misc{\nanonymous2024on,\ntitle={On Theoretical Limits of Learning with Label Differential Privacy},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=oQc7TOCk5G}\n}", "github": "", "project": "", "reviewers": "kpNG;26J5;WgYH;vnjU", "site": "https://openreview.net/forum?id=oQc7TOCk5G", "pdf_size": 594018, "rating": "4;5;6;7", "confidence": "2;3;3;2", "soundness": "2;4;3;3", "novelty": "2;3;3;3", "presentation": "2;2;2;2", "wc_summary": "27;209;294;103", "wc_strengths": "34;135;66;27", "wc_weaknesses": "145;225;46;332", "wc_questions": "12;220;277;15", "wc_limitations": "4;1;17;6", "wc_review": "222;790;700;483", "wc_reply_reviewers": "175;63;23;47", "wc_reply_authors": "112;58;25;27", "reply_reviewers": "2;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 2.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 158.25, 101.5907845230068 ], "wc_strengths_avg": [ 65.5, 42.7346463656832 ], "wc_weaknesses_avg": [ 187.0, 105.01666534412526 ], "wc_questions_avg": [ 131.0, 119.22038416311197 ], "wc_limitations_avg": [ 7.0, 6.041522986797286 ], "wc_review_avg": [ 548.75, 219.18414062153312 ], "wc_reply_reviewers_avg": [ 77.0, 58.34380858325929 ], "wc_reply_authors_avg": [ 55.5, 35.14612354157995 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:J9K03SMLh9wJ:scholar.google.com/&scioq=On+Theoretical+Limits+of+Learning+with+Label+Differential+Privacy&hl=en&as_sdt=0,34", "gs_version_total": 0, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Zhejiang Lab", "aff_unique_dep": "", "aff_unique_url": "http://www.zhejianglab.com", "aff_unique_abbr": "", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "oRZN9a53ag", "title": "Score matching through the roof: linear, nonlinear, and latent variables causal discovery", "track": "main", "status": "Reject", "tldr": "", "abstract": "Causal discovery from observational data holds great promise, but existing methods rely on strong assumptions about the underlying causal structure, often requiring full observability of all relevant variables. We tackle these challenges by leveraging the score function $\\nabla \\log p(X)$ of observed variables for causal discovery and propose the following contributions. First, we generalize the existing results of identifiability with the score to additive noise models with minimal requirements on the causal mechanisms. Second, we establish conditions for inferring causal relations from the score even in the presence of hidden variables; this result is two-faced: we demonstrate the score's potential as an alternative to conditional independence tests to infer the equivalence class of causal graphs with hidden variables, and we provide the necessary conditions for identifying direct causes in latent variable models. Building on these insights, we propose a flexible algorithm for causal discovery across linear, nonlinear, and latent variable models, which we empirically validate.", "keywords": "Causal discovery;score matching;latent variables", "primary_area": "causal_inference", "supplementary_material": "/attachment/51283df7f1cd7c00643c5f1d2c76a033f733e816.zip", "author": "Francesco Montagna;Philipp Michael Faller;Patrick Bl\u00f6baum;Elke Kirschbaum;Francesco Locatello", "authorids": "~Francesco_Montagna2;~Philipp_Michael_Faller1;~Patrick_Bl\u00f6baum1;~Elke_Kirschbaum2;~Francesco_Locatello1", "gender": ";M;;;M", "homepage": "https://www.francescomontagna.com/;;;;https://twitter.com/FrancescoLocat8", "dblp": ";306/8510;;;195/6074", "google_scholar": "StwghVgAAAAJ;https://scholar.google.de/citations?user=kUD-kVkAAAAJ;;;", "orcid": ";;;;", "linkedin": "francesco-montagna/;https://www.linkedin.com/mwlite/in/philipp-faller-747793169;;;", "or_profile": "~Francesco_Montagna2;~Philipp_Michael_Faller1;~Patrick_Bl\u00f6baum1;~Elke_Kirschbaum2;~Francesco_Locatello1", "aff": "University of Genoa;Karlsruher Institut f\u00fcr Technologie;;;Institute of Science and Technology", "aff_domain": "unige.it;kit.edu;;;ist.ac.at", "position": "PhD student;PhD student;;;Assistant Professor", "bibtex": "@misc{\nanonymous2024score,\ntitle={Score matching through the roof: linear, nonlinear, and latent variables causal discovery},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=oRZN9a53ag}\n}", "github": "", "project": "", "reviewers": "TL8k;mmww;WsKx", "site": "https://openreview.net/forum?id=oRZN9a53ag", "pdf_size": 629007, "rating": "3;4;8", "confidence": "4;4;4", "soundness": "3;3;4", "novelty": "1;2;4", "presentation": "3;3;3", "wc_summary": "59;116;19", "wc_strengths": "36;57;84", "wc_weaknesses": "431;293;401", "wc_questions": "29;337;145", "wc_limitations": "67;1;20", "wc_review": "622;804;669", "wc_reply_reviewers": "609;1170;17", "wc_reply_authors": "103;3548;0", "reply_reviewers": "1;4;1", "reply_authors": "2;7;1", "rating_avg": [ 5.0, 2.160246899469287 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 1.247219128924647 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 64.66666666666667, 39.802289158065044 ], "wc_strengths_avg": [ 59.0, 19.6468827043885 ], "wc_weaknesses_avg": [ 375.0, 59.262129560116215 ], "wc_questions_avg": [ 170.33333333333334, 127.0100608438385 ], "wc_limitations_avg": [ 29.333333333333332, 27.740864362084245 ], "wc_review_avg": [ 698.3333333333334, 77.14200469840597 ], "wc_reply_reviewers_avg": [ 598.6666666666666, 470.76698647585255 ], "wc_reply_authors_avg": [ 1217.0, 1648.8021914913463 ], "reply_reviewers_avg": [ 2.0, 1.4142135623730951 ], "reply_authors_avg": [ 3.3333333333333335, 2.6246692913372702 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:1NnzAlJTSfMJ:scholar.google.com/&scioq=Score+matching+through+the+roof:+linear,+nonlinear,+and+latent+variables+causal+discovery&hl=en&as_sdt=0,5", "gs_version_total": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Genoa;Karlsruher Institut f\u00fcr Technologie;Institute of Science and Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.unige.it;https://www.kit.edu;", "aff_unique_abbr": "UniGe;KIT;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Italy;Germany;" }, { "title": "Implicit Regularization of Sharpness-Aware Minimization for Scale-Invariant Problems", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93626", "id": "oSOVME9kl2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=oSOVME9kl2", "openreview": "https://openreview.net/forum?id=oSOVME9kl2", "poster": "/media/PosterPDFs/NeurIPS%202024/93626.png?t=1729775543.365539", "project": "", "author_site": "Bingcong Li, Liang Zhang, Niao He", "tldr": "", "abstract": "Sharpness-aware minimization (SAM) improves generalization of various deep learning tasks. Motivated by popular architectures such as LoRA, we explore the implicit regularization of SAM for scale-invariant problems involving two groups of variables. Instead of focusing on commonly used sharpness, this work introduces a concept termed *balancedness*, defined as the difference between the squared norm of two variables. This allows us to depict richer global behaviors of SAM. In particular, our theoretical and empirical findings reveal that i) SAM promotes balancedness; and ii) the regularization on balancedness is *data-responsive* -- outliers have stronger impact. \nThe latter coincides with empirical observations that SAM outperforms SGD in the presence of outliers. \nLeveraging the implicit regularization, we develop a resource-efficient SAM variant, balancedness-aware regularization (BAR), tailored for scale-invariant problems such as finetuning language models with LoRA. BAR saves 95% computational overhead of SAM, with enhanced test performance across various tasks on RoBERTa, GPT2, and OPT-1.3B.", "keywords": "sharpness-aware minimization;LoRA;implicit regularization;finetuning;language models", "primary_area": "optimization_for_deep_networks", "supplementary_material": "/attachment/ad7d92c6f46e2030c4c47365d8af22966ff21284.zip", "author": "Bingcong Li;Liang Zhang;Niao He", "authorids": "~Bingcong_Li1;~Liang_Zhang6;~Niao_He3", "gender": ";M;", "homepage": ";https://liang137.github.io/;", "dblp": ";50/6759;", "google_scholar": ";OIgmMCkAAAAJ;", "orcid": ";0009-0007-4012-8040;", "linkedin": ";;", "or_profile": "~Bingcong_Li1;~Liang_Zhang6;~Niao_He3", "aff": ";Department of Computer Science, ETHZ - ETH Zurich;", "aff_domain": ";inf.ethz.ch;", "position": ";PhD student;", "bibtex": "@inproceedings{\nli2024implicit,\ntitle={Implicit Regularization of Sharpness-Aware Minimization for Scale-Invariant Problems},\nauthor={Bingcong Li and Liang Zhang and Niao He},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=oSOVME9kl2}\n}", "github": "", "reviewers": "Cq3N;9YQw;7PUq;t2ze", "pdf_size": 1256138, "rating": "6;7;7;8", "confidence": "5;3;3;3", "soundness": "3;4;3;4", "novelty": "4;4;3;4", "presentation": "2;3;3;4", "wc_summary": "114;73;92;61", "wc_strengths": "39;36;44;254", "wc_weaknesses": "38;58;30;45", "wc_questions": "302;68;28;75", "wc_limitations": "1;5;7;19", "wc_review": "494;240;201;454", "wc_reply_reviewers": "116;69;19;24", "wc_reply_authors": "331;114;19;28", "reply_reviewers": "2;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 85.0, 20.062402647738878 ], "wc_strengths_avg": [ 93.25, 92.85304249188607 ], "wc_weaknesses_avg": [ 42.75, 10.280442597476044 ], "wc_questions_avg": [ 118.25, 107.59269259573347 ], "wc_limitations_avg": [ 8.0, 6.708203932499369 ], "wc_review_avg": [ 347.25, 128.27972365108994 ], "wc_reply_reviewers_avg": [ 57.0, 39.2364626336269 ], "wc_reply_authors_avg": [ 123.0, 125.68412787619604 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5667797163563747286&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 6, "email": ";inf.ethz.ch;", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "ETH Zurich", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.ethz.ch", "aff_unique_abbr": "ETHZ", "aff_campus_unique_index": "0", "aff_campus_unique": "Zurich", "aff_country_unique_index": "0", "aff_country_unique": "Switzerland" }, { "title": "Binocular-Guided 3D Gaussian Splatting with View Consistency for Sparse View Synthesis", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93625", "id": "oTEttMIymz", "proceeding": "", "pdf": "https://openreview.net/pdf?id=oTEttMIymz", "openreview": "https://openreview.net/forum?id=oTEttMIymz", "poster": "", "project": "", "author_site": "Liang Han, Junsheng Zhou, Yu-Shen Liu, Zhizhong Han", "tldr": "", "abstract": "Novel view synthesis from sparse inputs is a vital yet challenging task in 3D computer vision. Previous methods explore 3D Gaussian Splatting with neural priors (e.g. depth priors) as an additional supervision, demonstrating promising quality and efficiency compared to the NeRF based methods. However, the neural priors from 2D pretrained models are often noisy and blurry, which struggle to precisely guide the learning of radiance fields. In this paper, We propose a novel method for synthesizing novel views from sparse views with Gaussian Splatting that does not require external prior as supervision. Our key idea lies in exploring the self-supervisions inherent in the binocular stereo consistency between each pair of binocular images constructed with disparity-guided image warping. To this end, we additionally introduce a Gaussian opacity constraint which regularizes the Gaussian locations and avoids Gaussian redundancy for\nimproving the robustness and efficiency of inferring 3D Gaussians from sparse views. Extensive experiments on the LLFF, DTU, and Blender datasets demonstrate that our method significantly outperforms the state-of-the-art methods.", "keywords": "Sparse View Synthesis;Gaussian Splatting;View Consistency", "primary_area": "machine_vision", "supplementary_material": "/attachment/8c8cbc732d1a2db01ef49e439971c2dce8ab3538.zip", "author": "Liang Han;Junsheng Zhou;Yu-Shen Liu;Zhizhong Han", "authorids": "~Liang_Han3;~Junsheng_Zhou3;~Yu-Shen_Liu1;~Zhizhong_Han2", "gender": "M;M;M;M", "homepage": "https://hanl2010.github.io/hanliang/;https://junshengzhou.github.io/;https://yushen-liu.github.io/;https://h312h.github.io/", "dblp": ";;44/2229.html;166/5173", "google_scholar": ";afPIrLYAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0001-7305-1915;", "linkedin": ";;;", "or_profile": "~Liang_Han3;~Junsheng_Zhou3;~Yu-Shen_Liu1;~Zhizhong_Han2", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;Wayne State University", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;wayne.edu", "position": "PhD student;MS student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nhan2024binocularguided,\ntitle={Binocular-Guided 3D Gaussian Splatting with View Consistency for Sparse View Synthesis},\nauthor={Liang Han and Junsheng Zhou and Yu-Shen Liu and Zhizhong Han},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=oTEttMIymz}\n}", "github": "", "reviewers": "gh5j;sng2;q3N3;UVce", "pdf_size": 35138098, "rating": "4;5;5;7", "confidence": "4;4;4;4", "soundness": "2;2;3;3", "novelty": "2;3;3;3", "presentation": "3;2;3;4", "wc_summary": "67;49;50;165", "wc_strengths": "52;15;44;284", "wc_weaknesses": "594;128;57;526", "wc_questions": "23;2;66;186", "wc_limitations": "5;24;3;35", "wc_review": "741;218;220;1196", "wc_reply_reviewers": "265;26;0;102", "wc_reply_authors": "514;24;0;245", "reply_reviewers": "1;1;0;1", "reply_authors": "4;2;1;2", "rating_avg": [ 5.25, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 82.75, 48.022781052329734 ], "wc_strengths_avg": [ 98.75, 107.83639228015744 ], "wc_weaknesses_avg": [ 326.25, 236.32009542144317 ], "wc_questions_avg": [ 69.25, 71.24385938451117 ], "wc_limitations_avg": [ 16.75, 13.348689074212494 ], "wc_review_avg": [ 593.75, 407.81881700088337 ], "wc_reply_reviewers_avg": [ 98.25, 103.31111992423662 ], "wc_reply_authors_avg": [ 195.75, 207.0777329893294 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5295129319744672903&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;wayne.edu", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Tsinghua University;Wayne State University", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;https://wayne.edu", "aff_unique_abbr": "THU;WSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "China;United States" }, { "title": "Identify Then Recommend: Towards Unsupervised Group Recommendation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93624", "id": "oTZYhOAMhX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=oTZYhOAMhX", "openreview": "https://openreview.net/forum?id=oTZYhOAMhX", "poster": "", "project": "", "author_site": "Yue Liu, Shihao Zhu, Tianyuan Yang, Jian Ma, Wenliang Zhong", "tldr": "", "abstract": "Group Recommendation (GR), which aims to recommend items to groups of users, has become a promising and practical direction for recommendation systems. This paper points out two issues of the state-of-the-art GR models. (1) The pre-defined and fixed number of user groups is inadequate for real-time industrial recommendation systems, where the group distribution can shift dynamically. (2) The training schema of existing GR methods is supervised, necessitating expensive user-group and group-item labels, leading to significant annotation costs. To this end, we present a novel unsupervised group recommendation framework named $\\underline{\\text{I}}$dentify $\\underline{\\text{T}}$hen $\\underline{\\text{R}}$ecommend ($\\underline{\\text{ITR}}$), where it first identifies the user groups in an unsupervised manner even without the pre-defined number of groups, and then two pre-text tasks are designed to conduct self-supervised group recommendation. Concretely, at the group identification stage, we first estimate the adaptive density of each user point, where areas with higher densities are more likely to be recognized as group centers. Then, a heuristic merge-and-split strategy is designed to discover the user groups and decision boundaries. Subsequently, at the self-supervised learning stage, the pull-and-repulsion pre-text task is proposed to optimize the user-group distribution. Besides, the pseudo group recommendation pre-text task is designed to assist the recommendations. Extensive experiments demonstrate the superiority and effectiveness of ITR on both user recommendation (e.g., 22.22\\% NDCG@5 $\\uparrow$) and group recommendation (e.g., 22.95\\% NDCG@5 $\\uparrow$). Furthermore, we deploy ITR on the industrial recommender and achieve promising results.", "keywords": "Unsupervised Learning;Self-supervised Learning;Clustering;Recommendation", "primary_area": "other", "supplementary_material": "", "author": "Yue Liu;Shihao Zhu;Tianyuan Yang;Jian Ma;Wenliang Zhong", "authorids": "~Yue_Liu10;~Shihao_Zhu1;~Tianyuan_Yang1;~Jian_Ma9;~Wenliang_Zhong1", "gender": "M;M;M;M;M", "homepage": "https://yueliu1999.github.io/;http://alexzsh.github.com;;https://www.lamda.nju.edu.cn/maj/;https://scholar.google.com/citations?user=M2n8XvQAAAAJ&hl=en", "dblp": "74/1932-8;;;;81/8863.html", "google_scholar": "5tfpu3MAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;;M2n8XvQAAAAJ", "orcid": ";;0000-0003-1228-0647;;0009-0006-8861-9503", "linkedin": ";;;;", "or_profile": "~Yue_Liu10;~Shihao_Zhu1;~Tianyuan_Yang1;~Jian_Ma9;~Wenliang_Zhong1", "aff": "University of Illinois, Urbana Champaign;;;Ant Group;Ant Group", "aff_domain": "uiuc.edu;;;antgroup.com;antgroup.com", "position": "Intern;;;Researcher;Researcher", "bibtex": "@inproceedings{\nliu2024identify,\ntitle={Identify Then Recommend: Towards Unsupervised Group Recommendation},\nauthor={Yue Liu and Shihao Zhu and Tianyuan Yang and Jian Ma and Wenliang Zhong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=oTZYhOAMhX}\n}", "github": "", "reviewers": "hAA1;x7i3;GTSo;dQ8T", "pdf_size": 1001274, "rating": "4;5;5;7", "confidence": "5;4;3;5", "soundness": "3;3;3;3", "novelty": "2;3;2;4", "presentation": "2;3;3;4", "wc_summary": "62;72;71;69", "wc_strengths": "64;48;43;48", "wc_weaknesses": "233;374;152;74", "wc_questions": "15;4;78;94", "wc_limitations": "34;1;24;1", "wc_review": "408;499;368;286", "wc_reply_reviewers": "307;530;18;16", "wc_reply_authors": "588;2433;659;940", "reply_reviewers": "2;4;1;1", "reply_authors": "5;9;6;5", "rating_avg": [ 5.25, 1.0897247358851685 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 68.5, 3.905124837953327 ], "wc_strengths_avg": [ 50.75, 7.917543811056558 ], "wc_weaknesses_avg": [ 208.25, 110.9873303580188 ], "wc_questions_avg": [ 47.75, 38.86113096655835 ], "wc_limitations_avg": [ 15.0, 14.439529078193651 ], "wc_review_avg": [ 390.25, 76.65629458824631 ], "wc_reply_reviewers_avg": [ 217.75, 215.6784354079007 ], "wc_reply_authors_avg": [ 1155.0, 749.5021681089388 ], "reply_reviewers_avg": [ 2.0, 1.224744871391589 ], "reply_authors_avg": [ 6.25, 1.6393596310755 ], "replies_avg": [ 41, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.20751433915982243, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9697384896394765243&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "uiuc.edu;;;antgroup.com;antgroup.com", "author_num": 5, "aff_unique_index": "0;1;1", "aff_unique_norm": "University of Illinois Urbana-Champaign;Ant Group", "aff_unique_dep": ";", "aff_unique_url": "https://illinois.edu;https://www.antgroup.com", "aff_unique_abbr": "UIUC;Ant Group", "aff_campus_unique_index": "0", "aff_campus_unique": "Urbana-Champaign;", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United States;China" }, { "title": "A theoretical design of concept sets: improving the predictability of concept bottleneck models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93623", "id": "oTv6Qa12G0", "proceeding": "", "pdf": "https://openreview.net/pdf?id=oTv6Qa12G0", "openreview": "https://openreview.net/forum?id=oTv6Qa12G0", "poster": "", "project": "", "author_site": "Max Ruiz Luyten, Mihaela van der Schaar", "tldr": "", "abstract": "Concept-based learning, a promising approach in machine learning, emphasizes the value of high-level representations called concepts. However, despite growing interest in concept-bottleneck models (CBMs), there is a lack of clear understanding regarding the properties of concept sets and their impact on model performance. In this work, we define concepts within the machine learning context, highlighting their core properties: 'expressiveness' and 'model-aware inductive bias', and we make explicit the underlying assumption of CBMs. We establish theoretical results for concept-bottleneck models (CBMs), revealing how these properties guide the design of concept sets that optimize model performance. Specifically, we demonstrate that well-chosen concept sets can improve sample efficiency and out-of-distribution robustness in the appropriate regimes. Based on these insights, we propose a method to effectively identify informative and non-redundant concepts. We validate our approach with experiments on CIFAR-10 and MetaShift, showing that concept-bottleneck models outperform the foundational embedding counterpart, particularly in low-data regimes and under distribution shifts. We also examine failure modes and discuss how they can be tackled.", "keywords": "Representation Learning;Embedding Approaches;Learning Theory;Neural Abstract Machines;Nonlinear Dimensionality Reduction and Manifold Learning;One-Shot/Low-Shot Learning Approaches", "primary_area": "other", "supplementary_material": "", "author": "Max Ruiz Luyten;Mihaela van der Schaar", "authorids": "~Max_Ruiz_Luyten1;~Mihaela_van_der_Schaar2", "gender": ";F", "homepage": ";https://www.vanderschaar-lab.com", "dblp": ";", "google_scholar": "P4z6s_MAAAAJ;DZ3S--MAAAAJ", "orcid": ";", "linkedin": "max-ruiz/;", "or_profile": "~Max_Ruiz_Luyten1;~Mihaela_van_der_Schaar2", "aff": "University of Cambridge;University of California, Los Angeles", "aff_domain": "cam.ac.uk;ucla.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nluyten2024a,\ntitle={A theoretical design of concept sets: improving the predictability of concept bottleneck models},\nauthor={Max Ruiz Luyten and Mihaela van der Schaar},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=oTv6Qa12G0}\n}", "github": "", "reviewers": "7Ykd;sfwM;QdnR", "pdf_size": 875578, "rating": "5;6;7", "confidence": "4;3;1", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "2;2;2", "wc_summary": "80;42;82", "wc_strengths": "87;86;61", "wc_weaknesses": "124;68;114", "wc_questions": "90;1;269", "wc_limitations": "6;1;18", "wc_review": "387;198;544", "wc_reply_reviewers": "0;173;24", "wc_reply_authors": "0;337;29", "reply_reviewers": "0;2;1", "reply_authors": "1;2;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 2.6666666666666665, 1.247219128924647 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 68.0, 18.40289832245635 ], "wc_strengths_avg": [ 78.0, 12.027745701779143 ], "wc_weaknesses_avg": [ 102.0, 24.385788210895843 ], "wc_questions_avg": [ 120.0, 111.4480446964713 ], "wc_limitations_avg": [ 8.333333333333334, 7.133644853010899 ], "wc_review_avg": [ 376.3333333333333, 141.4551361936199 ], "wc_reply_reviewers_avg": [ 65.66666666666667, 76.52595783276563 ], "wc_reply_authors_avg": [ 122.0, 152.48825091352668 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.9819805060619659, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4931039480240687425&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 0, "email": "cam.ac.uk;ucla.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "University of Cambridge;University of California, Los Angeles", "aff_unique_dep": ";", "aff_unique_url": "https://www.cam.ac.uk;https://www.ucla.edu", "aff_unique_abbr": "Cambridge;UCLA", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Cambridge;Los Angeles", "aff_country_unique_index": "0;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "Intruding with Words: Towards Understanding Graph Injection Attacks at the Text Level", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93622", "id": "oTzydUKWpq", "proceeding": "", "pdf": "https://openreview.net/pdf?id=oTzydUKWpq", "openreview": "https://openreview.net/forum?id=oTzydUKWpq", "poster": "/media/PosterPDFs/NeurIPS%202024/93622.png?t=1730010939.5551965", "project": "", "author_site": "Runlin Lei, Yuwei Hu, Yuchen Ren, Zhewei Wei", "tldr": "", "abstract": "Graph Neural Networks (GNNs) excel across various applications but remain vulnerable to adversarial attacks, particularly Graph Injection Attacks (GIAs), which inject malicious nodes into the original graph and pose realistic threats.\nText-attributed graphs (TAGs), where nodes are associated with textual features, are crucial due to their prevalence in real-world applications and are commonly used to evaluate these vulnerabilities.\nHowever, existing research only focuses on embedding-level GIAs, which inject node embeddings rather than actual textual content, limiting their applicability and simplifying detection.\nIn this paper, we pioneer the exploration of GIAs at the text level, presenting three novel attack designs that inject textual content into the graph.\nThrough theoretical and empirical analysis, we demonstrate that text interpretability, a factor previously overlooked at the embedding level, plays a crucial role in attack strength. \nAmong the designs we investigate, the Word-frequency-based Text-level GIA (WTGIA) is particularly notable for its balance between performance and interpretability. \nDespite the success of WTGIA, we discover that defenders can easily enhance their defenses with customized text embedding methods or large language model (LLM)--based predictors. \nThese insights underscore the necessity for further research into the potential and practical significance of text-level GIAs.", "keywords": "Graph Neural Networks;Graph Adversarial Attack;Graph Injection Attack", "primary_area": "graph_neural_networks", "supplementary_material": "/attachment/9198cd5b41f74080b2f9658979984c14d4bb1ea0.zip", "author": "Runlin Lei;Yuwei Hu;Yuchen Ren;Zhewei Wei", "authorids": "~Runlin_Lei1;~Yuwei_Hu1;~Yuchen_Ren5;~Zhewei_Wei1", "gender": "M;M;;M", "homepage": "https://github.com/Leirunlin;https://hyworrywart.github.io/;;http://weizhewei.com", "dblp": "321/1838;;;94/4260", "google_scholar": "pckN2lAAAAAJ;;;https://scholar.google.com.hk/citations?user=qZ7dj4gAAAAJ", "orcid": ";0009-0001-3302-1807;;0000-0003-3620-5086", "linkedin": ";;;", "or_profile": "~Runlin_Lei1;~Yuwei_Hu1;~Yuchen_Ren5;~Zhewei_Wei1", "aff": "Renmin University of China;Renmin University of China;;Renmin University of China", "aff_domain": "ruc.edu.cn;ruc.edu.cn;;ruc.edu.cn", "position": "PhD student;PhD student;;Full Professor", "bibtex": "@inproceedings{\nlei2024intruding,\ntitle={Intruding with Words: Towards Understanding Graph Injection Attacks at the Text Level},\nauthor={Runlin Lei and Yuwei Hu and Yuchen Ren and Zhewei Wei},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=oTzydUKWpq}\n}", "github": "", "reviewers": "YfqF;Da4q;nfHh;ZG7Z", "pdf_size": 5006446, "rating": "4;7;7;8", "confidence": "4;4;4;4", "soundness": "2;4;4;4", "novelty": "2;4;4;4", "presentation": "2;3;3;3", "wc_summary": "58;73;96;85", "wc_strengths": "21;57;87;50", "wc_weaknesses": "175;62;60;256", "wc_questions": "3;37;110;57", "wc_limitations": "8;8;38;1", "wc_review": "265;237;391;449", "wc_reply_reviewers": "28;13;33;0", "wc_reply_authors": "128;19;19;94", "reply_reviewers": "1;1;1;0", "reply_authors": "3;2;2;2", "rating_avg": [ 6.5, 1.5 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.5, 0.8660254037844386 ], "novelty_avg": [ 3.5, 0.8660254037844386 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 78.0, 14.124446891825535 ], "wc_strengths_avg": [ 53.75, 23.466731770743024 ], "wc_weaknesses_avg": [ 138.25, 82.39045757853272 ], "wc_questions_avg": [ 51.75, 38.77740966078059 ], "wc_limitations_avg": [ 13.75, 14.289419162443238 ], "wc_review_avg": [ 335.5, 87.51428454829531 ], "wc_reply_reviewers_avg": [ 18.5, 12.971121771072847 ], "wc_reply_authors_avg": [ 65.0, 47.544715794712666 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3195601394981121848&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "ruc.edu.cn;ruc.edu.cn;;ruc.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Renmin University of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ruc.edu.cn", "aff_unique_abbr": "RUC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Universal Physics Transformers: A Framework For Efficiently Scaling Neural Operators", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93621", "id": "oUXiNX5KRm", "proceeding": "", "pdf": "https://openreview.net/pdf?id=oUXiNX5KRm", "openreview": "https://openreview.net/forum?id=oUXiNX5KRm", "poster": "/media/PosterPDFs/NeurIPS%202024/93621.png?t=1730118633.1459596", "project": "", "author_site": "Benedikt Alkin, Andreas F\u00fcrst, Simon Schmid, Lukas Gruber, Markus Holzleitner, Johannes Brandstetter", "tldr": "", "abstract": "Neural operators, serving as physics surrogate models, have recently gained increased interest. With ever increasing problem complexity, the natural question arises: what is an efficient way to scale neural operators to larger and more complex simulations - most importantly by taking into account different types of simulation datasets. This is of special interest since, akin to their numerical counterparts, different techniques are used across applications, even if the underlying dynamics of the systems are similar. Whereas the flexibility of transformers has enabled unified architectures across domains, neural operators mostly follow a problem specific design, where GNNs are commonly used for Lagrangian simulations and grid-based models predominate Eulerian simulations. \n\nWe introduce Universal Physics Transformers (UPTs), an efficient and unified learning paradigm for a wide range of spatio-temporal problems. UPTs operate without grid- or particle-based latent structures, enabling flexibility and scalability across meshes and particles. UPTs efficiently propagate dynamics in the latent space, emphasized by inverse encoding and decoding techniques. Finally, UPTs allow for queries of the latent space representation at any point in space-time. We demonstrate diverse applicability and efficacy of UPTs in mesh-based fluid simulations, and steady-state Reynolds averaged Navier-Stokes simulations, and Lagrangian-based dynamics.", "keywords": "neural operator;computational fluid dynamics;Lagrangian simulations;transformers;latent space modeling", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "/attachment/ffbee57cf57b024cc7564d74926efc865fcb2da6.zip", "author": "Benedikt Alkin;Andreas F\u00fcrst;Simon Lucas Schmid;Lukas Gruber;Markus Holzleitner;Johannes Brandstetter", "authorids": "~Benedikt_Alkin1;~Andreas_F\u00fcrst1;~Simon_Lucas_Schmid1;~Lukas_Gruber2;~Markus_Holzleitner1;~Johannes_Brandstetter1", "gender": "M;M;Not Specified;;M;M", "homepage": "https://www.jku.at/institut-fuer-machine-learning/ueber-uns/team/di-benedikt-alkin/;;https://www.jku.at/en/institute-for-machine-learning/;;;https://www.jku.at/institut-fuer-machine-learning/ueber-uns/team/dipl-ing-andreas-fuerst/", "dblp": "345/2573;;18/7703;271/0626;251/8691;", "google_scholar": "https://scholar.google.at/citations?user=Zv7XQGoAAAAJ;;;518MXv8AAAAJ;KiRvOHcAAAAJ;", "orcid": "0009-0001-6006-4837;0009-0007-0299-487X;;;;", "linkedin": "benedikt-alkin-452a37223;;;;;", "or_profile": "~Benedikt_Alkin1;~Simon_Lucas_Schmid1;~Lukas_Gruber2;~Markus_Holzleitner1;~Johannes_Brandstetter1;~Andreas_Fuerst1", "aff": "Johannes Kepler Universit\u00e4t Linz;Johannes Kepler Universit\u00e4t Linz;Johannes Kepler University Linz;Johannes Kepler University Linz;Microsoft;Johannes Kepler University Linz", "aff_domain": "jku.at;jku.at;jku.at;jku.at;microsoft.com;jku.at", "position": "PhD student;PhD student;PhD student;Postdoc;Researcher;PhD student", "bibtex": "@inproceedings{\nalkin2024universal,\ntitle={Universal Physics Transformers: A Framework For Efficiently Scaling Neural Operators},\nauthor={Benedikt Alkin and Andreas F{\\\"u}rst and Simon Lucas Schmid and Lukas Gruber and Markus Holzleitner and Johannes Brandstetter},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=oUXiNX5KRm}\n}", "github": "", "reviewers": "QzDc;5AUz;1stF;CUfq;JKEE", "pdf_size": 7317843, "rating": "6;6;7;7;7", "confidence": "4;4;3;3;3", "soundness": "4;3;3;3;3", "novelty": "3;3;3;4;3", "presentation": "3;3;3;3;4", "wc_summary": "76;57;69;120;47", "wc_strengths": "43;904;88;31;138", "wc_weaknesses": "180;1271;113;54;406", "wc_questions": "275;224;43;162;105", "wc_limitations": "1;1;6;6;94", "wc_review": "575;2457;319;373;790", "wc_reply_reviewers": "45;527;13;13;19", "wc_reply_authors": "0;1267;0;0;0", "reply_reviewers": "1;5;1;1;1", "reply_authors": "1;3;1;1;1", "rating_avg": [ 6.6, 0.48989794855663565 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 73.8, 25.150745515789385 ], "wc_strengths_avg": [ 240.8, 333.73366626698004 ], "wc_weaknesses_avg": [ 404.8, 449.2319668055692 ], "wc_questions_avg": [ 161.8, 82.48975693987708 ], "wc_limitations_avg": [ 21.6, 36.2689950232978 ], "wc_review_avg": [ 902.8, 794.6351112303055 ], "wc_reply_reviewers_avg": [ 123.4, 202.1460857894607 ], "wc_reply_authors_avg": [ 253.4, 506.80000000000007 ], "reply_reviewers_avg": [ 1.8, 1.6000000000000003 ], "reply_authors_avg": [ 1.4, 0.8000000000000002 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1228151014294947995&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "jku.at;jku.at;jku.at;jku.at;microsoft.com;jku.at", "author_num": 6, "aff_unique_index": "0;0;1;1;2;1", "aff_unique_norm": "Johannes Kepler University Linz;Johannes Kepler University;Microsoft", "aff_unique_dep": ";;Microsoft Corporation", "aff_unique_url": "https://www.jku.at;https://www.jku.at;https://www.microsoft.com", "aff_unique_abbr": "JKU;JKU;Microsoft", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Linz;", "aff_country_unique_index": "0;0;0;0;1;0", "aff_country_unique": "Austria;United States" }, { "id": "oW6s6zFYj9", "title": "Stepwise Weighted Spike Coding for Deep Spiking Neural Networks", "track": "main", "status": "Reject", "tldr": "", "abstract": "Spiking Neural Networks (SNNs) seek to mimic the spiking behavior of biological neurons and are expected to play a key role in the advancement of neural computing and artificial intelligence. The efficiency of SNNs is often determined by the neural coding schemes. Existing coding schemes either cause huge delays and energy consumption or necessitate intricate neuron models and training techniques. To address these issues, we propose a novel Stepwise Weighted Spike (SWS) coding scheme to enhance the encoding of information in spikes. This approach compresses the spikes by weighting the significance of the spike in each step of neural computation, achieving high performance and low energy consumption. A Ternary Self-Amplifying (TSA) neuron model with a silent period is proposed for supporting SWS-based computing, aimed at minimizing the residual error resulting from stepwise weighting in neural computation. Our experimental results show that the SWS coding scheme outperforms the existing neural coding schemes in very deep SNNs, and significantly reduces operations and latency.", "keywords": "Spiking neural network;Spike coding scheme;Stepwise weighted spike", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "", "author": "Yiwen Gu;Junchuan Gu;Haibin Shen;Kejie Huang", "authorids": "~Yiwen_Gu2;~Junchuan_Gu1;~Haibin_Shen1;~Kejie_Huang1", "gender": ";M;;M", "homepage": "https://github.com/GarrettGu;http://www.isee.zju.edu.cn/iseexyw/2020/0927/c21188a2200013/page.htm;;https://person.zju.edu.cn/huangkejie", "dblp": ";;;05/10461", "google_scholar": ";;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Yiwen_Gu2;~Junchuan_Gu1;~Haibin_Shen1;~Kejie_Huang1", "aff": "Zhejiang University;Zhejiang University;;Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn;;zju.edu.cn", "position": "Undergrad student;PhD student;;Full Professor", "bibtex": "@misc{\nanonymous2024stepwise,\ntitle={Stepwise Weighted Spike Coding for Deep Spiking Neural Networks},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=oW6s6zFYj9}\n}", "github": "", "project": "", "reviewers": "zR1q;FCv3;ZNdM;YWtV;jeCw", "site": "https://openreview.net/forum?id=oW6s6zFYj9", "pdf_size": 829865, "rating": "3;4;4;5;6", "confidence": "4;4;4;4;4", "soundness": "2;2;3;3;3", "novelty": "2;2;2;2;2", "presentation": "2;2;3;2;2", "wc_summary": "143;90;64;134;44", "wc_strengths": "269;37;47;36;11", "wc_weaknesses": "63;232;140;219;18", "wc_questions": "188;61;2;67;11", "wc_limitations": "219;24;2;53;21", "wc_review": "882;444;255;509;105", "wc_reply_reviewers": "0;86;11;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;1;1;0;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 4.4, 1.0198039027185568 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.2, 0.39999999999999997 ], "wc_summary_avg": [ 95.0, 38.50194800266605 ], "wc_strengths_avg": [ 80.0, 95.24284750048163 ], "wc_weaknesses_avg": [ 134.4, 84.09661110889071 ], "wc_questions_avg": [ 65.8, 66.37891231407758 ], "wc_limitations_avg": [ 63.8, 79.29539709213896 ], "wc_review_avg": [ 439.0, 263.28159829353814 ], "wc_reply_reviewers_avg": [ 19.4, 33.57141641337166 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9294520218723861256&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Zhejiang University", "aff_unique_dep": "", "aff_unique_url": "https://www.zju.edu.cn", "aff_unique_abbr": "ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "BiDM: Pushing the Limit of Quantization for Diffusion Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93620", "id": "oWAItGB8LJ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=oWAItGB8LJ", "openreview": "https://openreview.net/forum?id=oWAItGB8LJ", "poster": "/media/PosterPDFs/NeurIPS%202024/93620.png?t=1731680024.686727", "project": "", "author_site": "Xingyu Zheng, Xianglong Liu, Yichen Bian, Xudong Ma, Yulun Zhang, Jiakai Wang, Jinyang Guo, Haotong Qin", "tldr": "", "abstract": "Diffusion models (DMs) have been significantly developed and widely used in various applications due to their excellent generative qualities. However, the expensive computation and massive parameters of DMs hinder their practical use in resource-constrained scenarios. As one of the effective compression approaches, quantization allows DMs to achieve storage saving and inference acceleration by reducing bit-width while maintaining generation performance. However, as the most extreme quantization form, 1-bit binarization causes the generation performance of DMs to face severe degradation or even collapse. This paper proposes a novel method, namely BiDM, for fully binarizing weights and activations of DMs, pushing quantization to the 1-bit limit. From a temporal perspective, we introduce the Timestep-friendly Binary Structure (TBS), which uses learnable activation binarizers and cross-timestep feature connections to address the highly timestep-correlated activation features of DMs. From a spatial perspective, we propose Space Patched Distillation (SPD) to address the difficulty of matching binary features during distillation, focusing on the spatial locality of image generation tasks and noise estimation networks. As the first work to fully binarize DMs, the W1A1 BiDM on the LDM-4 model for LSUN-Bedrooms 256$\\times$256 achieves a remarkable FID of 22.74, significantly outperforming the current state-of-the-art general binarization methods with an FID of 59.44 and invalid generative samples, and achieves up to excellent 28.0 times storage and 52.7 times OPs savings.", "keywords": "Diffusion Model;Model Binarization;Low-bit Quantization;Machine Learning", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/4c403f26c27dc94583dc373308e56bcc854150c2.zip", "author": "Xingyu Zheng;Xianglong Liu;Yichen Bian;Xudong Ma;Yulun Zhang;Jiakai Wang;Jinyang Guo;Haotong Qin", "authorids": "~Xingyu_Zheng1;~Xianglong_Liu3;~Yichen_Bian1;~Xudong_Ma3;~Yulun_Zhang1;~Jiakai_Wang1;~Jinyang_Guo1;~Haotong_Qin1", "gender": ";;M;M;M;M;M;M", "homepage": "https://github.com/Xingyu-Zheng;;https://blog.csdn.net/weixin_64148276?spm=1000.2115.3001.5343;https://macaronlin.github.io/;http://yulunzhang.com/;https://jiakaiwangcn.github.io/;https://jinyangguo.github.io/;https://htqin.github.io/", "dblp": ";;;19/2951;166/2763-1.html;202/9216;;262/3626.html", "google_scholar": "ISXNTf8AAAAJ;;https://scholar.google.com/citations?hl=zh-CN;3_zPktkAAAAJ;ORmLjWoAAAAJ;https://scholar.google.com/citations?hl=zh-CN;uJGeT1AAAAAJ;mK6n-KgAAAAJ", "orcid": "0009-0009-6283-7635;;;;0000-0002-2288-5079;0000-0001-5884-3412;;", "linkedin": ";;;;yulun-zhang-1116b5b9/;;;", "or_profile": "~Xingyu_Zheng1;~Xianglong_Liu3;~Yichen_Bian1;~Xudong_Ma3;~Yulun_Zhang1;~Jiakai_Wang1;~Jinyang_Guo1;~Haotong_Qin1", "aff": "Beihang University;;Beihang University;Beihang University;Swiss Federal Institute of Technology;Zhongguancun Laboratory;Beihang University;ETHZ - ETH Zurich", "aff_domain": "buaa.edu.cn;;buaa.edu.cn;buaa.edu.cn;ethz.ch;mail.zgclab.edu.cn;buaa.edu.cn;ethz.ch", "position": "Undergrad student;;Undergrad student;PhD student;Postdoc;Researcher;Assistant Professor;Postdoc", "bibtex": "@inproceedings{\nzheng2024bidm,\ntitle={Bi{DM}: Pushing the Limit of Quantization for Diffusion Models},\nauthor={Xingyu Zheng and Xianglong Liu and Yichen Bian and Xudong Ma and Yulun Zhang and Jiakai Wang and Jinyang Guo and Haotong Qin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=oWAItGB8LJ}\n}", "github": "", "reviewers": "AtKg;4HDt;cNoX", "pdf_size": 3759441, "rating": "5;5;7", "confidence": "4;3;4", "soundness": "2;2;3", "novelty": "2;2;3", "presentation": "3;2;3", "wc_summary": "47;90;93", "wc_strengths": "70;47;244", "wc_weaknesses": "238;129;148", "wc_questions": "81;64;2", "wc_limitations": "7;1;35", "wc_review": "443;331;522", "wc_reply_reviewers": "126;21;38", "wc_reply_authors": "917;59;59", "reply_reviewers": "2;1;1", "reply_authors": "4;2;2", "rating_avg": [ 5.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 76.66666666666667, 21.013223349965983 ], "wc_strengths_avg": [ 120.33333333333333, 87.94821708760723 ], "wc_weaknesses_avg": [ 171.66666666666666, 47.541794478355804 ], "wc_questions_avg": [ 49.0, 33.95094500402996 ], "wc_limitations_avg": [ 14.333333333333334, 14.817407180595245 ], "wc_review_avg": [ 432.0, 78.36240595251442 ], "wc_reply_reviewers_avg": [ 61.666666666666664, 46.016905106227604 ], "wc_reply_authors_avg": [ 345.0, 404.4650788387052 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 0.9428090415820634 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.49999999999999983, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4809052314537177192&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "buaa.edu.cn;;buaa.edu.cn;buaa.edu.cn;ethz.ch;mail.zgclab.edu.cn;buaa.edu.cn;ethz.ch", "author_num": 8, "aff_unique_index": "0;0;0;1;2;0;3", "aff_unique_norm": "Beihang University;Swiss Federal Institute of Technology;Zhongguancun Laboratory;ETH Zurich", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.buaa.edu.cn/;https://www.ethz.ch;;https://www.ethz.ch", "aff_unique_abbr": "BUAA;ETH Zurich;;ETHZ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;0;1", "aff_country_unique": "China;Switzerland" }, { "title": "Private Stochastic Convex Optimization with Heavy Tails: Near-Optimality from Simple Reductions", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93619", "id": "oX6aIl9f0Y", "proceeding": "", "pdf": "https://openreview.net/pdf?id=oX6aIl9f0Y", "openreview": "https://openreview.net/forum?id=oX6aIl9f0Y", "poster": "", "project": "", "author_site": "Hilal Asi, Daogao Liu, Kevin Tian", "tldr": "", "abstract": "We study the problem of differentially private stochastic convex optimization (DP-SCO) with heavy-tailed gradients, where we assume a $k^{\\text{th}}$-moment bound on the Lipschitz constants of sample functions, rather than a uniform bound. We propose a new reduction-based approach that enables us to obtain the first optimal rates (up to logarithmic factors) in the heavy-tailed setting, achieving error $G_2 \\cdot \\frac 1 {\\sqrt n} + G_k \\cdot (\\frac{\\sqrt d}{n\\epsilon})^{1 - \\frac 1 k}$ under $(\\epsilon, \\delta)$-approximate differential privacy, up to a mild $\\textup{polylog}(\\frac{1}{\\delta})$ factor, where $G_2^2$ and $G_k^k$ are the $2^{\\text{nd}}$ and $k^{\\text{th}}$ moment bounds on sample Lipschitz constants, nearly-matching a lower bound of [LR23].\nWe then give a suite of private algorithms for DP-SCO with heavy-tailed gradients improving our basic result under additional assumptions, including an optimal algorithm under a known-Lipschitz constant assumption, a near-linear time algorithm for smooth functions, and an optimal linear time algorithm for smooth generalized linear models.", "keywords": "Stochastic Convex Optimization;Heavy-Tailed Distributions;Differential Privacy", "primary_area": "privacy", "supplementary_material": "", "author": "Hilal Asi;Daogao Liu;Kevin Tian", "authorids": "~Hilal_Asi1;~Daogao_Liu1;~Kevin_Tian4", "gender": "M;M;", "homepage": "http://web.stanford.edu/~asi/;https://daogaoliu.github.io/;https://kjtian.github.io", "dblp": ";245/4078;", "google_scholar": "QGcz9-kAAAAJ;auA3AaQAAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Hilal_Asi1;~Daogao_Liu1;~Kevin_Tian4", "aff": "Apple;University of Washington, Seattle;University of Texas at Austin", "aff_domain": "apple.com;uw.edu;utexas.edu", "position": "Researcher;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nasi2024private,\ntitle={Private Stochastic Convex Optimization with Heavy Tails: Near-Optimality from Simple Reductions},\nauthor={Hilal Asi and Daogao Liu and Kevin Tian},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=oX6aIl9f0Y}\n}", "github": "", "reviewers": "UUcG;1QCz;KReZ;8sRE;S4Vx", "pdf_size": 588870, "rating": "3;5;6;7;7", "confidence": "4;3;2;3;2", "soundness": "2;3;3;3;4", "novelty": "2;2;3;3;3", "presentation": "1;1;3;3;4", "wc_summary": "332;62;125;35;242", "wc_strengths": "63;14;51;64;55", "wc_weaknesses": "32;66;72;1;43", "wc_questions": "208;1;47;1;1", "wc_limitations": "6;1;1;1;1", "wc_review": "641;144;296;102;342", "wc_reply_reviewers": "0;19;20;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;1;1;0;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.6, 1.4966629547095764 ], "confidence_avg": [ 2.8, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.4, 1.2 ], "wc_summary_avg": [ 159.2, 112.03463750108713 ], "wc_strengths_avg": [ 49.4, 18.358649187780674 ], "wc_weaknesses_avg": [ 42.8, 25.51391777050322 ], "wc_questions_avg": [ 51.6, 80.20374056114841 ], "wc_limitations_avg": [ 2.0, 2.0 ], "wc_review_avg": [ 305.0, 190.51299168298206 ], "wc_reply_reviewers_avg": [ 7.8, 9.558242516278815 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.7857142857142858, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13624497150750119720&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "apple.com;uw.edu;utexas.edu", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Apple;University of Washington;University of Texas at Austin", "aff_unique_dep": "Apple Inc.;;", "aff_unique_url": "https://www.apple.com;https://www.washington.edu;https://www.utexas.edu", "aff_unique_abbr": "Apple;UW;UT Austin", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Seattle;Austin", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Implicit Regularization Paths of Weighted Neural Representations", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93618", "id": "oXCmwwkQTZ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=oXCmwwkQTZ", "openreview": "https://openreview.net/forum?id=oXCmwwkQTZ", "poster": "/media/PosterPDFs/NeurIPS%202024/93618.png?t=1730988226.6971054", "project": "", "author_site": "Jin-Hong Du, Pratik Patil", "tldr": "", "abstract": "We study the implicit regularization effects induced by (observation) weighting of pretrained features.\nFor weight and feature matrices of bounded operator norms that are infinitesimally free with respect to (normalized) trace functionals, we derive equivalence paths connecting different weighting matrices and ridge regularization levels.\nSpecifically, we show that ridge estimators trained on weighted features along the same path are asymptotically equivalent when evaluated against test vectors of bounded norms.\nThese paths can be interpreted as matching the effective degrees of freedom of ridge estimators fitted with weighted features.\nFor the special case of subsampling without replacement, our results apply to independently sampled random features and kernel features and confirm recent conjectures (Conjectures 7 and 8) of the authors on the existence of such paths in Patil and Du (2023).\nWe also present an additive risk decomposition for ensembles of weighted estimators and show that the risks are equivalent along the paths when the ensemble size goes to infinity.\nAs a practical consequence of the path equivalences, we develop an efficient cross-validation method for tuning and apply it to subsampled pretrained representations across several models (e.g., ResNet-50) and datasets (e.g., CIFAR-100).", "keywords": "pretrained representations;implicit regularization;random features;kernel regression;neural tangent kernel;cross-validation", "primary_area": "learning_theory", "supplementary_material": "/attachment/7eaa4df2041c4a47746533290fd68dbe5f775b14.zip", "author": "Jin-Hong Du;Pratik Patil", "authorids": "~Jin-Hong_Du1;~Pratik_Patil1", "gender": ";", "homepage": ";https://www.stat.berkeley.edu/~pratikpatil/", "dblp": ";48/2268", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "~Jin-Hong_Du1;~Pratik_Patil1", "aff": ";University of California, Berkeley", "aff_domain": ";berkeley.edu", "position": ";Postdoc", "bibtex": "@inproceedings{\ndu2024implicit,\ntitle={Implicit Regularization Paths of Weighted Neural Representations},\nauthor={Jin-Hong Du and Pratik Patil},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=oXCmwwkQTZ}\n}", "github": "", "reviewers": "cp5Y;HnNV;nM8u", "pdf_size": 3536764, "rating": "5;7;7", "confidence": "3;2;2", "soundness": "2;3;3", "novelty": "3;4;3", "presentation": "1;3;3", "wc_summary": "60;74;141", "wc_strengths": "56;18;139", "wc_weaknesses": "158;23;173", "wc_questions": "208;2;40", "wc_limitations": "15;1;9", "wc_review": "497;118;502", "wc_reply_reviewers": "18;20;46", "wc_reply_authors": "19;27;27", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 2.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.9428090415820634 ], "wc_summary_avg": [ 91.66666666666667, 35.34905310691583 ], "wc_strengths_avg": [ 71.0, 50.52392172690741 ], "wc_weaknesses_avg": [ 118.0, 67.45368781616021 ], "wc_questions_avg": [ 83.33333333333333, 89.5072933092916 ], "wc_limitations_avg": [ 8.333333333333334, 5.734883511361751 ], "wc_review_avg": [ 372.3333333333333, 179.8524086268763 ], "wc_reply_reviewers_avg": [ 28.0, 12.754084313139327 ], "wc_reply_authors_avg": [ 24.333333333333332, 3.7712361663282534 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.9999999999999998, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11201901634588796281&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 4, "email": ";berkeley.edu", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "SparseLLM: Towards Global Pruning of Pre-trained Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93617", "id": "oXHyYHp4Zb", "proceeding": "", "pdf": "https://openreview.net/pdf?id=oXHyYHp4Zb", "openreview": "https://openreview.net/forum?id=oXHyYHp4Zb", "poster": "", "project": "", "author_site": "Guangji Bai, Yijiang Li, Chen LING, Kibaek Kim, Liang Zhao", "tldr": "", "abstract": "The transformative impact of large language models (LLMs) like LLaMA and GPT on natural language processing is countered by their prohibitive computational demands. Pruning has emerged as a pivotal compression strategy, introducing sparsity to enhance both memory and computational efficiency. Yet, traditional global pruning is impractical for LLMs due to scalability issues, while local pruning, despite its efficiency, leads to suboptimal solutions. Addressing these challenges, we propose *SparseLLM*, a novel framework that redefines the global pruning process into manageable, coordinated subproblems, allowing for resource-efficient optimization with global optimality. SparseLLM's approach, which conceptualizes LLMs as a chain of modular functions and leverages auxiliary variables for problem decomposition, not only facilitates a pragmatic application on LLMs but also demonstrates significant performance improvements, particularly in high-sparsity regimes where it surpasses current state-of-the-art methods. Our source code is publicly available at https://github.com/BaiTheBest/SparseLLM.", "keywords": "Large Language Model;Pruning", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Guangji Bai;Yijiang Li;Chen Ling;Kibaek Kim;Liang Zhao", "authorids": "~Guangji_Bai1;~Yijiang_Li2;~Chen_Ling3;~Kibaek_Kim1;~Liang_Zhao6", "gender": "M;M;M;M;M", "homepage": "https://baithebest.github.io/;;https://kibaekkim.github.io;https://cs.emory.edu/~lzhao41/;https://lingchen0331.github.io/", "dblp": "286/0892;;124/2557;63/5422-2;", "google_scholar": "gBMbU28AAAAJ;;RE9h8MsAAAAJ;qnvyqtwAAAAJ;275NKcEAAAAJ", "orcid": "0000-0003-3932-2472;0000-0002-6655-9224;0000-0002-5820-6533;0000-0002-2648-9989;0000-0001-8044-6026", "linkedin": "https://linkedin.com/in/guangji-bai/;;kibaekkim/;;", "or_profile": "~Guangji_Bai1;~Yijiang_Li2;~Kibaek_Kim1;~Liang_Zhao6;~Chen_LING2", "aff": "Emory University;Argonne National Laboratory;Argonne National Laboratory;Emory University;Emory University", "aff_domain": "emory.edu;anl.gov;anl.gov;emory.edu;emory.edu", "position": "PhD student;Postdoc;Researcher;Associate Professor;PhD student", "bibtex": "@inproceedings{\nbai2024sparsellm,\ntitle={Sparse{LLM}: Towards Global Pruning of Pre-trained Language Models},\nauthor={Guangji Bai and Yijiang Li and Chen Ling and Kibaek Kim and Liang Zhao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=oXHyYHp4Zb}\n}", "github": "", "reviewers": "K3H3;35o1;snMo;FMFD", "pdf_size": 1345756, "rating": "5;5;5;5", "confidence": "2;3;4;4", "soundness": "2;3;2;3", "novelty": "2;3;2;3", "presentation": "2;3;4;3", "wc_summary": "40;52;96;68", "wc_strengths": "47;26;99;33", "wc_weaknesses": "73;105;139;119", "wc_questions": "36;25;51;11", "wc_limitations": "16;6;8;42", "wc_review": "212;214;393;273", "wc_reply_reviewers": "9;0;23;0", "wc_reply_authors": "29;0;115;0", "reply_reviewers": "1;0;1;0", "reply_authors": "2;1;3;1", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 64.0, 20.97617696340303 ], "wc_strengths_avg": [ 51.25, 28.586491565073178 ], "wc_weaknesses_avg": [ 109.0, 24.041630560342615 ], "wc_questions_avg": [ 30.75, 14.669270602180601 ], "wc_limitations_avg": [ 18.0, 14.352700094407323 ], "wc_review_avg": [ 273.0, 73.48809427383459 ], "wc_reply_reviewers_avg": [ 8.0, 9.40744386111339 ], "wc_reply_authors_avg": [ 36.0, 47.12218161333365 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2570638658173911419&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "emory.edu;anl.gov;anl.gov;emory.edu;emory.edu", "author_num": 5, "aff_unique_index": "0;1;1;0;0", "aff_unique_norm": "Emory University;Argonne National Laboratory", "aff_unique_dep": ";", "aff_unique_url": "https://www.emory.edu;https://www.anl.gov", "aff_unique_abbr": "Emory;ANL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Measuring Per-Unit Interpretability at Scale Without Humans", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93616", "id": "oYyEsVz6DX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=oYyEsVz6DX", "openreview": "https://openreview.net/forum?id=oYyEsVz6DX", "poster": "", "project": "", "author_site": "Roland S. Zimmermann, David Klindt, Wieland Brendel", "tldr": "", "abstract": "In today\u2019s era, whatever we can measure at scale, we can optimize. So far, measuring the interpretability of units in deep neural networks (DNNs) for computer vision still requires direct human evaluation and is not scalable. As a result, the inner workings of DNNs remain a mystery despite the remarkable progress we have seen in their applications. In this work, we introduce the first scalable method to measure the per-unit interpretability in vision DNNs. This method does not require any human evaluations, yet its prediction correlates well with existing human interpretability measurements. We validate its predictive power through an interventional human psychophysics study. We demonstrate the usefulness of this measure by performing previously infeasible experiments: (1) A large-scale interpretability analysis across more than 70 million units from 835 computer vision models, and (2) an extensive analysis of how units transform during training. We find an anticorrelation between a model's downstream classification performance and per-unit interpretability, which is also observable during model training. Furthermore, we see that a layer's location and width influence its interpretability.", "keywords": "interpretability;explainability;deep learning;neural networks;analysis;activation maximization;alignment;evaluation", "primary_area": "interpretability_and_explainability", "supplementary_material": "/attachment/702ea8de49793984e5ea356c6625eb27385a1ce1.zip", "author": "Roland S. Zimmermann;David Klindt;Wieland Brendel", "authorids": "~Roland_S._Zimmermann1;~David_Klindt1;~Wieland_Brendel1", "gender": ";;M", "homepage": ";;", "dblp": ";;37/11107", "google_scholar": ";;v-JL-hsAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Roland_S._Zimmermann1;~David_Klindt1;~Wieland_Brendel1", "aff": ";;ELLIS Institute T\u00fcbingen", "aff_domain": ";;tue.ellis.eu", "position": ";;Principal Researcher", "bibtex": "@inproceedings{\nzimmermann2024measuring,\ntitle={Measuring Per-Unit Interpretability at Scale Without Humans},\nauthor={Roland S. Zimmermann and David Klindt and Wieland Brendel},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=oYyEsVz6DX}\n}", "github": "", "reviewers": "j42j;8Jb5;P5jG;gaJB", "pdf_size": 7132997, "rating": "5;5;6;7", "confidence": "4;4;3;4", "soundness": "2;3;3;4", "novelty": "2;2;3;3", "presentation": "2;3;3;4", "wc_summary": "112;175;57;113", "wc_strengths": "143;35;57;206", "wc_weaknesses": "362;464;38;91", "wc_questions": "104;138;338;187", "wc_limitations": "72;10;6;10", "wc_review": "793;822;496;607", "wc_reply_reviewers": "251;196;0;94", "wc_reply_authors": "49;658;0;0", "reply_reviewers": "1;2;0;1", "reply_authors": "2;3;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 114.25, 41.75748435909424 ], "wc_strengths_avg": [ 110.25, 68.4447770103753 ], "wc_weaknesses_avg": [ 238.75, 178.92648630093868 ], "wc_questions_avg": [ 191.75, 89.44376725071457 ], "wc_limitations_avg": [ 24.5, 27.472713735632308 ], "wc_review_avg": [ 679.5, 134.27304271520774 ], "wc_reply_reviewers_avg": [ 135.25, 96.2844094337188 ], "wc_reply_authors_avg": [ 176.75, 278.56899953153436 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7408457291881816331&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 0, "email": ";;tue.ellis.eu", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "ELLIS Institute", "aff_unique_dep": "", "aff_unique_url": "https://ellis.eu/", "aff_unique_abbr": "ELLIS", "aff_campus_unique_index": "0", "aff_campus_unique": "T\u00fcbingen", "aff_country_unique_index": "0", "aff_country_unique": "Germany" }, { "title": "Boosting the Potential of Large Language Models with an Intelligent Information Assistant", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93615", "id": "oZy4a11SUg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=oZy4a11SUg", "openreview": "https://openreview.net/forum?id=oZy4a11SUg", "poster": "/media/PosterPDFs/NeurIPS%202024/93615.png?t=1731572248.14611", "project": "", "author_site": "Yujia Zhou, Zheng Liu, Zhicheng Dou", "tldr": "", "abstract": "The emergence of Large Language Models (LLMs) has significantly advanced natural language processing, but these models often generate factually incorrect information, known as \"hallucination.\" Initial retrieval-augmented generation (RAG) methods like the \"Retrieve-Read\" framework was inadequate for complex reasoning tasks. Subsequent prompt-based RAG strategies and Supervised Fine-Tuning (SFT) methods improved performance but required frequent retraining and risked altering foundational LLM capabilities. To cope with these challenges, we propose Assistant-based Retrieval-Augmented Generation (AssistRAG), integrating an intelligent information assistant within LLMs. This assistant manages memory and knowledge through tool usage, action execution, memory building, and plan specification. Using a two-phase training approach\u2014Curriculum Assistant Learning and Reinforced Preference Optimization\u2014AssistRAG enhances information retrieval and decision-making. Experiments show AssistRAG significantly outperforms benchmarks, especially benefiting less advanced LLMs, by providing superior reasoning capabilities and accurate responses.", "keywords": "information retrieval;large language model;information assistant", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/54a1dbf015befccf979634ecabfb7054eb815760.zip", "author": "Yujia Zhou;Zheng Liu;Zhicheng Dou", "authorids": "~Yujia_Zhou1;~Zheng_Liu4;~Zhicheng_Dou1", "gender": "M;;", "homepage": "https://www.zhouyujia.cn/;https://www.microsoft.com/en-us/research/people/zhengliu/;https://playbigdata.ruc.edu.cn/dou", "dblp": "https://dblp.uni-trier.de/pid/166/2544-2.html;06/3580-11;18/5740", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.hk/citations?user=k2SF4M0AAAAJ;ChCjAAwAAAAJ", "orcid": "0000-0002-3530-3787;0000-0001-7765-8466;0000-0002-9781-948X", "linkedin": ";;", "or_profile": "~Yujia_Zhou1;~Zheng_Liu4;~Zhicheng_Dou1", "aff": "Tsinghua University;Microsoft Research;Renmin University of China", "aff_domain": "mail.tsinghua.edu.cn;research.microsoft.com;ruc.edu.cn", "position": "Postdoc;Researcher;Full Professor", "bibtex": "@inproceedings{\nzhou2024boosting,\ntitle={Boosting the Potential of Large Language Models with an Intelligent Information Assistant},\nauthor={Yujia Zhou and Zheng Liu and Zhicheng Dou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=oZy4a11SUg}\n}", "github": "", "reviewers": "zFfj;Y6YL;xG33", "pdf_size": 4907283, "rating": "5;7;8", "confidence": "4;3;4", "soundness": "3;3;4", "novelty": "2;3;4", "presentation": "2;3;4", "wc_summary": "43;71;66", "wc_strengths": "8;37;123", "wc_weaknesses": "135;47;105", "wc_questions": "66;75;5", "wc_limitations": "21;115;1", "wc_review": "273;345;300", "wc_reply_reviewers": "131;0;78", "wc_reply_authors": "201;0;48", "reply_reviewers": "1;0;1", "reply_authors": "2;1;2", "rating_avg": [ 6.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 60.0, 12.192894105447921 ], "wc_strengths_avg": [ 56.0, 48.83304891839815 ], "wc_weaknesses_avg": [ 95.66666666666667, 36.527006751473934 ], "wc_questions_avg": [ 48.666666666666664, 31.09483701338357 ], "wc_limitations_avg": [ 45.666666666666664, 49.70133018564214 ], "wc_review_avg": [ 306.0, 29.698484809834994 ], "wc_reply_reviewers_avg": [ 69.66666666666667, 53.80417166808619 ], "wc_reply_authors_avg": [ 83.0, 85.70880934886448 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.18898223650461363, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12748526420420026937&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "mail.tsinghua.edu.cn;research.microsoft.com;ruc.edu.cn", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Tsinghua University;Microsoft;Renmin University of China", "aff_unique_dep": ";Microsoft Research;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.microsoft.com/en-us/research;http://www.ruc.edu.cn", "aff_unique_abbr": "THU;MSR;RUC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "China;United States" }, { "title": "Understanding Representation of Deep Equilibrium Models from Neural Collapse Perspective", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93614", "id": "obUXeUMmq1", "proceeding": "", "pdf": "https://openreview.net/pdf?id=obUXeUMmq1", "openreview": "https://openreview.net/forum?id=obUXeUMmq1", "poster": "/media/PosterPDFs/NeurIPS%202024/93614.png?t=1731727930.817676", "project": "", "author_site": "Haixiang Sun, Ye Shi", "tldr": "", "abstract": "Deep Equilibrium Model (DEQ), which serves as a typical implicit neural network, emphasizes their memory efficiency and competitive performance compared to explicit neural networks. However, there has been relatively limited theoretical analysis on the representation of DEQ. In this paper, we utilize the Neural Collapse ($\\mathcal{NC}$) as a tool to systematically analyze the representation of DEQ under both balanced and imbalanced conditions. $\\mathcal{NC}$ is an interesting phenomenon in the neural network training process that characterizes the geometry of class features and classifier weights. While extensively studied in traditional explicit neural networks, the $\\mathcal{NC}$ phenomenon has not received substantial attention in the context of implicit neural networks. \nWe theoretically show that $\\mathcal{NC}$ exists in DEQ under balanced conditions. Moreover, in imbalanced settings, despite the presence of minority collapse, DEQ demonstrated advantages over explicit neural networks. These advantages include the convergence of extracted features to the vertices of a simplex equiangular tight frame and self-duality properties under mild conditions, highlighting DEQ's superiority in handling imbalanced datasets. Finally, we validate our theoretical analyses through experiments in both balanced and imbalanced scenarios.", "keywords": "Deep Equilibrium Models;Neural Collapse;Imbalance learning", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Haixiang Sun;Ye Shi", "authorids": "~Haixiang_Sun1;~Ye_Shi1", "gender": "M;M", "homepage": ";http://faculty.sist.shanghaitech.edu.cn/faculty/shiye", "dblp": "185/9693;34/11191-1", "google_scholar": ";gMqbZPUAAAAJ", "orcid": ";", "linkedin": "haixiang-sun-582451323/;", "or_profile": "~Haixiang_Sun1;~Ye_Shi1", "aff": "ShanghaiTech University;ShanghaiTech University", "aff_domain": "shanghaitech.edu.cn;shanghaitech.edu.cn", "position": "MS student;Assistant Professor", "bibtex": "@inproceedings{\nsun2024understanding,\ntitle={Understanding Representation of Deep Equilibrium Models from Neural Collapse Perspective},\nauthor={Haixiang Sun and Ye Shi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=obUXeUMmq1}\n}", "github": "", "reviewers": "6ZyB;C4pT;Zj9t;a3Sj", "pdf_size": 768960, "rating": "5;5;6;7", "confidence": "4;2;3;4", "soundness": "3;3;3;3", "novelty": "3;3;2;3", "presentation": "1;3;3;2", "wc_summary": "57;173;108;47", "wc_strengths": "49;45;47;88", "wc_weaknesses": "198;44;117;80", "wc_questions": "135;2;161;4", "wc_limitations": "1;1;7;7", "wc_review": "440;265;440;226", "wc_reply_reviewers": "525;0;41;132", "wc_reply_authors": "1050;0;29;22", "reply_reviewers": "4;0;1;1", "reply_authors": "5;1;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 96.25, 49.9868732768914 ], "wc_strengths_avg": [ 57.25, 17.80975856096876 ], "wc_weaknesses_avg": [ 109.75, 57.115562677785114 ], "wc_questions_avg": [ 75.5, 73.08385594644004 ], "wc_limitations_avg": [ 4.0, 3.0 ], "wc_review_avg": [ 342.75, 98.22264250161467 ], "wc_reply_reviewers_avg": [ 174.5, 207.92366387691422 ], "wc_reply_authors_avg": [ 275.25, 447.43009230493203 ], "reply_reviewers_avg": [ 1.5, 1.5 ], "reply_authors_avg": [ 2.5, 1.5 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.4545454545454545, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Ji2LJfLERBAJ:scholar.google.com/&scioq=Understanding+Representation+of+Deep+Equilibrium+Models+from+Neural+Collapse+Perspective&hl=en&as_sdt=0,47", "gs_version_total": 3, "email": "shanghaitech.edu.cn;shanghaitech.edu.cn", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "ShanghaiTech University", "aff_unique_dep": "", "aff_unique_url": "https://www.shanghaitech.edu.cn", "aff_unique_abbr": "ShanghaiTech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Generalization Bounds via Conditional $f$-Information", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93613", "id": "ocxVXe5XN1", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ocxVXe5XN1", "openreview": "https://openreview.net/forum?id=ocxVXe5XN1", "poster": "", "project": "", "author_site": "Ziqiao Wang, Yongyi Mao", "tldr": "", "abstract": "In this work, we introduce novel information-theoretic generalization bounds using the conditional $f$-information framework, an extension of the traditional conditional mutual information (MI) framework. We provide a generic approach to derive generalization bounds via $f$-information in the supersample setting, applicable to both bounded and unbounded loss functions. Unlike previous MI-based bounds, our proof strategy does not rely on upper bounding the cumulant-generating function (CGF) in the variational formula of MI. Instead, we set the CGF or its upper bound to zero by carefully selecting the measurable function invoked in the variational formula. Although some of our techniques are partially inspired by recent advances in the coin-betting framework (e.g., Jang et al. (2023)), our results are independent of any previous findings from regret guarantees of online gambling algorithms. Additionally, our newly derived MI-based bound recovers many previous results and improves our understanding of their potential limitations. Finally, we empirically compare various $f$-information measures for generalization, demonstrating the improvement of our new bounds over the previous bounds.", "keywords": "information-theoretic generalization bound;generalization;f-divergence", "primary_area": "learning_theory", "supplementary_material": "/attachment/e1e134b5abc564c0a0a8b27b1fb4d5ac3f04fd02.zip", "author": "Ziqiao Wang;Yongyi Mao", "authorids": "~Ziqiao_Wang1;~Yongyi_Mao2", "gender": "M;M", "homepage": "https://ziqiaowanggeothe.github.io;http://www.eecs.uottawa.ca/~yymao", "dblp": "222/9220;86/2933", "google_scholar": "iBL7APIAAAAJ;https://scholar.google.ca/citations?user=jM5l70wAAAAJ", "orcid": "0000-0003-0504-4830;0000-0001-5298-5778", "linkedin": "ziqiao-wang-987565155/?locale=en_US;", "or_profile": "~Ziqiao_Wang1;~Yongyi_Mao1", "aff": "University of Ottawa;University of Ottawa", "aff_domain": "uottawa.ca;eecs.uottawa.ca", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nwang2024generalization,\ntitle={Generalization Bounds via Conditional \\$f\\$-Information},\nauthor={Ziqiao Wang and Yongyi Mao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ocxVXe5XN1}\n}", "github": "", "reviewers": "Ka5u;D8Mx;gd7k;abMK", "pdf_size": 580973, "rating": "4;6;6;7", "confidence": "3;3;3;3", "soundness": "4;3;3;4", "novelty": "2;3;2;3", "presentation": "3;3;2;3", "wc_summary": "52;81;77;42", "wc_strengths": "28;43;35;140", "wc_weaknesses": "228;23;116;249", "wc_questions": "31;66;39;44", "wc_limitations": "4;4;1;38", "wc_review": "343;217;268;513", "wc_reply_reviewers": "317;18;57;21", "wc_reply_authors": "711;15;671;20", "reply_reviewers": "1;1;2;1", "reply_authors": "2;2;3;2", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 63.0, 16.446884203398525 ], "wc_strengths_avg": [ 61.5, 45.63167759353145 ], "wc_weaknesses_avg": [ 154.0, 90.9752713653551 ], "wc_questions_avg": [ 45.0, 12.98075498574717 ], "wc_limitations_avg": [ 11.75, 15.20485119953497 ], "wc_review_avg": [ 335.25, 111.98297861728808 ], "wc_reply_reviewers_avg": [ 103.25, 124.35910702477724 ], "wc_reply_authors_avg": [ 354.25, 337.0514612043686 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:1gMGGUcFmg0J:scholar.google.com/&scioq=Generalization+Bounds+via+Conditional+%24f%24-Information&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "uottawa.ca;eecs.uottawa.ca", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Ottawa", "aff_unique_dep": "", "aff_unique_url": "https://www.uottawa.ca", "aff_unique_abbr": "U Ottawa", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Canada" }, { "title": "Classifier-guided Gradient Modulation for Enhanced Multimodal Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93612", "id": "oe5ZEqTOaz", "proceeding": "", "pdf": "https://openreview.net/pdf?id=oe5ZEqTOaz", "openreview": "https://openreview.net/forum?id=oe5ZEqTOaz", "poster": "/media/PosterPDFs/NeurIPS%202024/93612.png?t=1729493076.8485985", "project": "", "author_site": "Zirun Guo, Tao Jin, Jingyuan Chen, Zhou Zhao", "tldr": "", "abstract": "Multimodal learning has developed very fast in recent years. However, during the multimodal training process, the model tends to rely on only one modality based on which it could learn faster, thus leading to inadequate use of other modalities. Existing methods to balance the training process always have some limitations on the loss functions, optimizers and the number of modalities and only consider modulating the magnitude of the gradients while ignoring the directions of the gradients. To solve these problems, in this paper, we present a novel method to balance multimodal learning with **C**lassifier-**G**uided **G**radient **M**odulation (CGGM), considering both the magnitude and directions of the gradients. We conduct extensive experiments on four multimodal datasets: UPMC-Food 101, CMU-MOSI, IEMOCAP and BraTS 2021, covering classification, regression and segmentation tasks. The results show that CGGM outperforms all the baselines and other state-of-the-art methods consistently, demonstrating its effectiveness and versatility. Our code is available at https://github.com/zrguo/CGGM.", "keywords": "balanced multimodal learning;gradient modulation", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Zirun Guo;Tao Jin;Jingyuan Chen;Zhou Zhao", "authorids": "~Zirun_Guo1;~Tao_Jin2;~Jingyuan_Chen3;~Zhou_Zhao3", "gender": ";M;;", "homepage": "https://openreview.net/profile?id=~Zirun_Guo1;https://hugddygff.github.io/;;", "dblp": "382/7932;88/4850-4.html;;", "google_scholar": ";;;", "orcid": ";0000-0003-3564-1628;;", "linkedin": ";;;", "or_profile": "~Zirun_Guo1;~Tao_Jin2;~Jingyuan_Chen3;~Zhou_Zhao3", "aff": "Zhejiang University;Zhejiang University;;", "aff_domain": "zju.edu.cn;zju.edu.cn;;", "position": "MS student;Assistant Professor;;", "bibtex": "@inproceedings{\nguo2024classifierguided,\ntitle={Classifier-guided Gradient Modulation for Enhanced Multimodal Learning},\nauthor={Zirun Guo and Tao Jin and Jingyuan Chen and Zhou Zhao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=oe5ZEqTOaz}\n}", "github": "", "reviewers": "S6e7;kP2D;hVC3;evvZ", "pdf_size": 0, "rating": "4;5;5;7", "confidence": "5;4;4;4", "soundness": "2;2;2;2", "novelty": "2;2;2;2", "presentation": "3;2;3;2", "wc_summary": "35;26;35;38", "wc_strengths": "57;9;25;43", "wc_weaknesses": "236;8;214;246", "wc_questions": "2;163;7;75", "wc_limitations": "1;8;6;1", "wc_review": "331;214;287;403", "wc_reply_reviewers": "0;0;46;23", "wc_reply_authors": "172;172;65;147", "reply_reviewers": "0;0;1;1", "reply_authors": "3;3;2;3", "rating_avg": [ 5.25, 1.0897247358851685 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.0, 0.0 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 33.5, 4.5 ], "wc_strengths_avg": [ 33.5, 18.131464364468744 ], "wc_weaknesses_avg": [ 176.0, 97.68316129200569 ], "wc_questions_avg": [ 61.75, 65.18195685924135 ], "wc_limitations_avg": [ 4.0, 3.082207001484488 ], "wc_review_avg": [ 308.75, 68.60894620965986 ], "wc_reply_reviewers_avg": [ 17.25, 19.070592544543548 ], "wc_reply_authors_avg": [ 139.0, 43.9260742611948 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.6622661785325219, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16578778300019950952&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "zju.edu.cn;zju.edu.cn;;", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Zhejiang University", "aff_unique_dep": "", "aff_unique_url": "https://www.zju.edu.cn", "aff_unique_abbr": "ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Recovering Complete Actions for Cross-dataset Skeleton Action Recognition", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93611", "id": "oe7MfqFK1M", "proceeding": "", "pdf": "https://openreview.net/pdf?id=oe7MfqFK1M", "openreview": "https://openreview.net/forum?id=oe7MfqFK1M", "poster": "/media/PosterPDFs/NeurIPS%202024/93611.png?t=1731064960.3343747", "project": "", "author_site": "Hanchao Liu, Yujiang Li, Tai-Jiang Mu, Shi-min Hu", "tldr": "", "abstract": "Despite huge progress in skeleton-based action recognition, its generalizability to different domains remains a challenging issue. \nIn this paper, to solve the skeleton action generalization problem, we present a recover-and-resample augmentation framework based on a novel complete action prior. We observe that human daily actions are confronted with temporal mismatch across different datasets, as they are usually partial observations of their complete action sequences. By recovering complete actions and resampling from these full sequences, we can generate strong augmentations for unseen domains. At the same time, we discover the nature of general action completeness within large datasets, indicated by the per-frame diversity over time. This allows us to exploit two assets of transferable knowledge that can be shared across action samples and be helpful for action completion: boundary poses for determining the action start, and linear temporal transforms for capturing global action patterns. Therefore, we formulate the recovering stage as a two-step stochastic action completion with boundary pose-conditioned extrapolation followed by smooth linear transforms. Both the boundary poses and linear transforms can be efficiently learned from the whole dataset via clustering. We validate our approach on a cross-dataset setting with three skeleton action datasets, outperforming other domain generalization approaches by a considerable margin.", "keywords": "skeleton action recognition;domain generalization;data augmentation", "primary_area": "machine_vision", "supplementary_material": "", "author": "Hanchao Liu;Yujiang Li;Tai-Jiang Mu;Shi-min Hu", "authorids": "~Hanchao_Liu3;~Yujiang_Li2;~Tai-Jiang_Mu2;~Shi-min_Hu1", "gender": "M;F;M;M", "homepage": ";https://git.tsinghua.edu.cn/liyujian20;https://cg.cs.tsinghua.edu.cn/people/~mtj/;http://cg.cs.tsinghua.edu.cn/shimin.htm", "dblp": "211/5813;;146/4849;h/ShiMinHu", "google_scholar": ";;https://scholar.google.com.hk/citations?user=V-0oiTYAAAAJ;https://scholar.google.com.tw/citations?user=LDb4tb0AAAAJ", "orcid": "0000-0003-0604-7769;;0000-0002-9197-346X;", "linkedin": ";;;", "or_profile": "~Hanchao_Liu3;~Yujiang_Li2;~Tai-Jiang_Mu2;~Shi-min_Hu1", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;mails.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;Undergrad student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nliu2024recovering,\ntitle={Recovering Complete Actions for Cross-dataset Skeleton Action Recognition},\nauthor={Hanchao Liu and Yujiang Li and Tai-Jiang Mu and Shi-min Hu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=oe7MfqFK1M}\n}", "github": "", "reviewers": "UxEw;Vnjk;V13L;UadT", "pdf_size": 2896058, "rating": "4;5;5;6", "confidence": "5;3;5;4", "soundness": "2;1;2;3", "novelty": "2;2;2;4", "presentation": "3;1;3;3", "wc_summary": "51;55;56;66", "wc_strengths": "48;71;86;68", "wc_weaknesses": "84;187;157;195", "wc_questions": "1;698;3;140", "wc_limitations": "1;10;4;5", "wc_review": "185;1021;306;474", "wc_reply_reviewers": "124;59;0;20", "wc_reply_authors": "0;80;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;2;1;1", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 57.0, 5.522680508593631 ], "wc_strengths_avg": [ 68.25, 13.534677683639163 ], "wc_weaknesses_avg": [ 155.75, 43.77998972133273 ], "wc_questions_avg": [ 210.5, 287.0422442777369 ], "wc_limitations_avg": [ 5.0, 3.24037034920393 ], "wc_review_avg": [ 496.5, 319.73778319116434 ], "wc_reply_reviewers_avg": [ 50.75, 47.314770421085214 ], "wc_reply_authors_avg": [ 20.0, 34.64101615137755 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.4264014327112209, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:q093_I3-oKgJ:scholar.google.com/&scioq=Recovering+Complete+Actions+for+Cross-dataset+Skeleton+Action+Recognition&hl=en&as_sdt=0,7", "gs_version_total": 3, "email": "tsinghua.edu.cn;mails.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Carrot and Stick: Eliciting Comparison Data and Beyond", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93610", "id": "ofjTu2ktxO", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ofjTu2ktxO", "openreview": "https://openreview.net/forum?id=ofjTu2ktxO", "poster": "/media/PosterPDFs/NeurIPS%202024/93610.png?t=1731011001.2055302", "project": "", "author_site": "Yiling Chen, Shi Feng, Fang-Yi Yu", "tldr": "", "abstract": "Comparison data elicited from people are fundamental to many machine learning tasks, including reinforcement learning from human feedback for large language models and estimating ranking models. They are typically subjective and not directly verifiable. How to truthfully elicit such comparison data from rational individuals? We design peer prediction mechanisms for eliciting comparison data using a bonus-penalty payment. Our design leverages on the strong stochastic transitivity for comparison data to create symmetrically strongly truthful mechanisms such that truth-telling 1) forms a strict Bayesian Nash equilibrium, and 2) yields the highest payment among all symmetric equilibria. Each individual only needs to evaluate one pair of items and report her comparison in our mechanism.\n\nWe further extend the bonus-penalty payment concept to eliciting networked data, designing a symmetrically strongly truthful mechanism when agents\u2019 private signals are sampled according to the Ising models. We provide the necessary and sufficient conditions for our bonus-penalty payment to have truth-telling as a strict Bayesian Nash equilibrium. Experiments on two real-world datasets further support our theoretical discoveries.", "keywords": "information elicitation;mechanism design;crowdsourcing;ranking data;social network", "primary_area": "algorithmic_game_theory", "supplementary_material": "/attachment/da688323a2458b8e49fde4b8ea0e36903ed8a8ba.zip", "author": "Yiling Chen;Shi Feng;Fang-Yi Yu", "authorids": "~Yiling_Chen1;~Shi_Feng2;~Fang-Yi_Yu1", "gender": "F;M;", "homepage": "https://yiling.seas.harvard.edu/;https://fengshi.link;https://cs.gmu.edu/~fangyiyu/", "dblp": "72/3762-1;97/1374;183/3719", "google_scholar": "x_7xA0UAAAAJ;7XxWCegAAAAJ;LhLBzWEAAAAJ", "orcid": ";;0000-0002-3697-8807", "linkedin": ";;", "or_profile": "~Yiling_Chen1;~Shi_Feng2;~Fang-Yi_Yu1", "aff": "Harvard University;School of Engineering and Applied Sciences, Harvard University;George Mason University", "aff_domain": "fas.harvard.edu;g.harvard.edu;gmu.edu", "position": "Full Professor;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nchen2024carrot,\ntitle={Carrot and Stick: Eliciting Comparison Data and Beyond},\nauthor={Yiling Chen and Shi Feng and Fang-Yi Yu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ofjTu2ktxO}\n}", "github": "", "reviewers": "A4wM;c3bN;oRu4", "pdf_size": 1105309, "rating": "6;6;7", "confidence": "2;3;5", "soundness": "3;3;4", "novelty": "3;2;4", "presentation": "2;2;4", "wc_summary": "113;232;215", "wc_strengths": "33;40;69", "wc_weaknesses": "128;87;57", "wc_questions": "174;165;9", "wc_limitations": "25;29;23", "wc_review": "473;553;373", "wc_reply_reviewers": "26;19;18", "wc_reply_authors": "7;36;5", "reply_reviewers": "1;1;1", "reply_authors": "2;3;2", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 1.247219128924647 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 2.6666666666666665, 0.9428090415820634 ], "wc_summary_avg": [ 186.66666666666666, 52.55050480780899 ], "wc_strengths_avg": [ 47.333333333333336, 15.584892970081281 ], "wc_weaknesses_avg": [ 90.66666666666667, 29.101355447622865 ], "wc_questions_avg": [ 116.0, 75.7495874576225 ], "wc_limitations_avg": [ 25.666666666666668, 2.494438257849294 ], "wc_review_avg": [ 466.3333333333333, 73.63574011458174 ], "wc_reply_reviewers_avg": [ 21.0, 3.559026084010437 ], "wc_reply_authors_avg": [ 16.0, 14.165686240583852 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.944911182523068, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:0JgRbV8j7NAJ:scholar.google.com/&scioq=Carrot+and+Stick:+Eliciting+Comparison+Data+and+Beyond&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "fas.harvard.edu;g.harvard.edu;gmu.edu", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Harvard University;George Mason University", "aff_unique_dep": ";", "aff_unique_url": "https://www.harvard.edu;https://www.gmu.edu", "aff_unique_abbr": "Harvard;GMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "RAW: A Robust and Agile Plug-and-Play Watermark Framework for AI-Generated Images with Provable Guarantees", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93609", "id": "ogaeChzbKu", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ogaeChzbKu", "openreview": "https://openreview.net/forum?id=ogaeChzbKu", "poster": "/media/PosterPDFs/NeurIPS%202024/93609.png?t=1731696328.854848", "project": "", "author_site": "Xun Xian, Ganghua Wang, Xuan Bi, Jayanth Srinivasa, Ashish Kundu, Mingyi Hong, Jie Ding", "tldr": "", "abstract": "Safeguarding intellectual property and preventing potential misuse of AI-generated images are of paramount importance. This paper introduces a robust and agile plug-and-play watermark detection framework, referred to as RAW.\nAs a departure from existing encoder-decoder methods, which incorporate fixed binary codes as watermarks within latent representations, our approach introduces learnable watermarks directly into the original image data. Subsequently, we employ a classifier that is jointly trained with the watermark to detect the presence of the watermark.\nThe proposed framework is compatible with various generative architectures and supports on-the-fly watermark injection after training. By incorporating state-of-the-art smoothing techniques, we show that the framework also provides provable guarantees regarding the false positive rate for misclassifying a watermarked image, even in the presence of adversarial attacks targeting watermark removal. \nExperiments on a diverse range of images generated by state-of-the-art diffusion models demonstrate substantially improved watermark encoding speed and watermark detection performance, under adversarial attacks, while maintaining image quality. Our code is publicly available [here](https://github.com/jeremyxianx/RAWatermark).", "keywords": "Copyright Protection;Privacy", "primary_area": "privacy", "supplementary_material": "/attachment/28722f6d4f5d7d79a717a44c14f08854f8d406c4.zip", "author": "Xun Xian;Ganghua Wang;Xuan Bi;Jayanth Srinivasa;Ashish Kundu;Mingyi Hong;Jie Ding", "authorids": "~Xun_Xian1;~Ganghua_Wang1;~Xuan_Bi1;~Jayanth_Srinivasa1;~Ashish_Kundu1;~Mingyi_Hong1;~Jie_Ding2", "gender": "M;M;;M;;M;M", "homepage": "https://jeremyxianx.github.io/;https://gwang.umn.edu;;;;http://people.ece.umn.edu/~mhong/mingyi.html;http://jding.org", "dblp": "262/3278;200/9632;;285/5006;;57/8053;94/1825-2", "google_scholar": "https://scholar.google.com/citations?hl=en;;F3eRk9MAAAAJ;HtNfeKYAAAAJ;;qRnP-p0AAAAJ;ZyqvoqcAAAAJ", "orcid": ";0000-0002-0888-167X;;;;;", "linkedin": ";;;;;;", "or_profile": "~Xun_Xian1;~Ganghua_Wang1;~Xuan_Bi1;~Jayanth_Srinivasa1;~Ashish_Kundu1;~Mingyi_Hong1;~Jie_Ding2", "aff": "University of Minnesota, Minneapolis;University of Minnesota, Minneapolis;University of Minnesota - Twin Cities;Cisco;;University of Minnesota, Minneapolis;University of Minnesota - Twin Cities", "aff_domain": "umn.edu;umn.edu;umn.edu;cisco.com;;umn.edu;umn.edu", "position": "PhD student;PhD student;Assistant Professor;Researcher;;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nxian2024raw,\ntitle={{RAW}: A Robust and Agile Plug-and-Play Watermark Framework for {AI}-Generated Images with Provable Guarantees},\nauthor={Xun Xian and Ganghua Wang and Xuan Bi and Jayanth Srinivasa and Ashish Kundu and Mingyi Hong and Jie Ding},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ogaeChzbKu}\n}", "github": "", "reviewers": "FV9y;h31G;8Sds", "pdf_size": 23491083, "rating": "5;5;5", "confidence": "4;5;4", "soundness": "3;3;3", "novelty": "3;2;2", "presentation": "2;3;3", "wc_summary": "58;64;60", "wc_strengths": "65;30;31", "wc_weaknesses": "283;91;135", "wc_questions": "127;1;2", "wc_limitations": "60;1;2", "wc_review": "593;187;230", "wc_reply_reviewers": "0;24;38", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;1;1", "reply_authors": "1;1;1", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 60.666666666666664, 2.494438257849294 ], "wc_strengths_avg": [ 42.0, 16.268579122549905 ], "wc_weaknesses_avg": [ 169.66666666666666, 82.12727250365063 ], "wc_questions_avg": [ 43.333333333333336, 59.162675921751735 ], "wc_limitations_avg": [ 21.0, 27.58018612458347 ], "wc_review_avg": [ 336.6666666666667, 182.10314537523934 ], "wc_reply_reviewers_avg": [ 20.666666666666668, 15.69146972791976 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6222180116382464665&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 4, "email": "umn.edu;umn.edu;umn.edu;cisco.com;;umn.edu;umn.edu", "author_num": 7, "aff_unique_index": "0;0;0;1;0;0", "aff_unique_norm": "University of Minnesota;Cisco Systems", "aff_unique_dep": ";", "aff_unique_url": "https://www.minnesota.edu;https://www.cisco.com", "aff_unique_abbr": "UMN;Cisco", "aff_campus_unique_index": "0;0;1;0;1", "aff_campus_unique": "Minneapolis;Twin Cities;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "One-Step Diffusion Distillation through Score Implicit Matching", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93608", "id": "ogk236hsJM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ogk236hsJM", "openreview": "https://openreview.net/forum?id=ogk236hsJM", "poster": "", "project": "", "author_site": "Weijian Luo, Zemin Huang, Zhengyang Geng, J. Zico Kolter, Guo-Jun Qi", "tldr": "", "abstract": "Despite their strong performances on many generative tasks, diffusion models require a large number of sampling steps in order to generate realistic samples. This has motivated the community to develop effective methods to distill pre-trained diffusion models into more efficient models, but these methods still typically require few-step inference or perform substantially worse than the underlying model. In this paper, we present Score Implicit Matching (SIM) a new approach to distilling pre-trained diffusion models into single-step generator models, while maintaining almost the same sample generation ability as the original model as well as being data-free with no need of training samples for distillation. The method rests upon the fact that, although the traditional score-based loss is intractable to minimize for generator models, under certain conditions we \\emph{can} efficiently compute the \\emph{gradients} for a wide class of score-based divergences between a diffusion model and a generator. SIM shows strong empirical performances for one-step generators: on the CIFAR10 dataset, it achieves an FID of 2.06 for unconditional generation and 1.96 for class-conditional generation. Moreover, by applying SIM to a leading transformer-based diffusion model, we distill a single-step generator for text-to-image (T2I) generation that attains an aesthetic score of 6.42 with no performance decline over the original multi-step counterpart, clearly outperforming the other one-step generators including SDXL-TURBO of 5.33, SDXL-LIGHTNING of 5.34 and HYPER-SDXL of 5.85. We will release this industry-ready one-step transformer-based T2I generator along with this paper.", "keywords": "Diffusion mode;Diffusion Distillation;Text-to-Image Generation;Generative Adversarial Network", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/b77b751f06109aa1d01c7642fa8d226927c9770b.zip", "author": "Weijian Luo;Zemin Huang;Zhengyang Geng;J Zico Kolter;Guo-Jun Qi", "authorids": "~Weijian_Luo1;~Zemin_Huang1;~Zhengyang_Geng1;~J_Zico_Kolter1;~Guo-Jun_Qi1", "gender": ";;;;M", "homepage": ";;https://gsunshine.github.io/;;http://maple-lab.net/gqi/", "dblp": ";;250/2651.html;;41/943", "google_scholar": ";;lNkw3QYAAAAJ;;https://scholar.google.com.tw/citations?user=Nut-uvoAAAAJ", "orcid": ";;;;0000-0003-3508-1851", "linkedin": ";;;;", "or_profile": "~Weijian_Luo1;~Zemin_Huang1;~Zhengyang_Geng1;~J_Zico_Kolter1;~Guo-Jun_Qi1", "aff": ";;Massachusetts Institute of Technology;;Guangdong OPPO Mobile Telecommunications Corp.,Ltd.", "aff_domain": ";;mit.edu;;oppo.com", "position": ";;Visiting student;;Dean and Chief Scientist", "bibtex": "@inproceedings{\nluo2024onestep,\ntitle={One-Step Diffusion Distillation through Score Implicit Matching},\nauthor={Weijian Luo and Zemin Huang and Zhengyang Geng and J Zico Kolter and Guo-Jun Qi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ogk236hsJM}\n}", "github": "", "reviewers": "pChh;wPU2;WiWu;Jq2C;G1sA", "pdf_size": 50647723, "rating": "5;6;7;8;8", "confidence": "3;4;4;3;4", "soundness": "2;3;3;3;3", "novelty": "2;3;3;3;4", "presentation": "2;2;3;3;3", "wc_summary": "31;93;91;66;351", "wc_strengths": "54;85;141;99;412", "wc_weaknesses": "124;319;213;62;225", "wc_questions": "24;8;1;130;154", "wc_limitations": "1;14;1;1;57", "wc_review": "234;519;447;358;1199", "wc_reply_reviewers": "0;22;23;22;19", "wc_reply_authors": "0;51;21;21;0", "reply_reviewers": "0;1;1;1;1", "reply_authors": "1;2;2;2;1", "rating_avg": [ 6.8, 1.16619037896906 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 126.4, 114.51043620561403 ], "wc_strengths_avg": [ 158.2, 129.9467583281707 ], "wc_weaknesses_avg": [ 188.6, 88.4592561578493 ], "wc_questions_avg": [ 63.4, 65.05259410661499 ], "wc_limitations_avg": [ 14.8, 21.69239498073 ], "wc_review_avg": [ 551.4, 337.4851700445517 ], "wc_reply_reviewers_avg": [ 17.2, 8.704022058795578 ], "wc_reply_authors_avg": [ 18.6, 18.72538384119268 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.21004201260420152, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8714202232837912576&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 5, "email": ";;mit.edu;;oppo.com", "author_num": 5, "aff_unique_index": "0;1", "aff_unique_norm": "Massachusetts Institute of Technology;OPPO Mobile Telecommunications Corp.,Ltd.", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.oppo.com", "aff_unique_abbr": "MIT;OPPO", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;China" }, { "title": "Neuro-Vision to Language: Enhancing Brain Recording-based Visual Reconstruction and Language Interaction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93607", "id": "ohi00YhT3T", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ohi00YhT3T", "openreview": "https://openreview.net/forum?id=ohi00YhT3T", "poster": "/media/PosterPDFs/NeurIPS%202024/93607.png?t=1730023015.8869414", "project": "", "author_site": "Guobin Shen, Dongcheng Zhao, Xiang He, Linghao Feng, Yiting Dong, Jihang Wang, Qian Zhang, Yi Zeng", "tldr": "", "abstract": "Decoding non-invasive brain recordings is pivotal for advancing our understanding of human cognition but faces challenges due to individual differences and complex neural signal representations. Traditional methods often require customized models and extensive trials, lacking interpretability in visual reconstruction tasks. Our framework integrates 3D brain structures with visual semantics using a *Vision Transformer 3D*. This unified feature extractor efficiently aligns fMRI features with multiple levels of visual embeddings, eliminating the need for subject-specific models and allowing extraction from single-trial data. The extractor consolidates multi-level visual features into one network, simplifying integration with Large Language Models (LLMs). Additionally, we have enhanced the fMRI dataset with diverse fMRI-image-related textual data to support multimodal large model development. Integrating with LLMs enhances decoding capabilities, enabling tasks such as brain captioning, complex reasoning, concept localization, and visual reconstruction. Our approach demonstrates superior performance across these tasks, precisely identifying language-based concepts within brain signals, enhancing interpretability, and providing deeper insights into neural processes. These advances significantly broaden the applicability of non-invasive brain decoding in neuroscience and human-computer interaction, setting the stage for advanced brain-computer interfaces and cognitive models.", "keywords": "Neural decoding;Mind Reader;Visual Reconstruction;Multimodal Large Model;Concept Localization", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "/attachment/776d5156552946adfb0496f7911d7e76a5ae5c57.zip", "author": "Guobin Shen;Dongcheng Zhao;Xiang He;Linghao Feng;Yiting Dong;Jihang Wang;Qian Zhang;Yi Zeng", "authorids": "~Guobin_Shen1;~Dongcheng_Zhao2;~Xiang_He3;~Linghao_Feng1;~Yiting_Dong1;~Jihang_Wang1;~Qian_Zhang13;~Yi_Zeng1", "gender": ";M;;M;M;;F;M", "homepage": ";;;https://github.com/Caelum2000;;;;https://bii.ia.ac.cn/~yizeng", "dblp": ";177/8581;;;176/1090;;;75/148-1", "google_scholar": ";2E9Drq8AAAAJ;;;;;;", "orcid": ";;;;0000-0002-8405-3139;;my-orcid?orcid=0000-0001-5314-4233;0000-0002-9595-9091", "linkedin": ";;;;;;;", "or_profile": "~Guobin_Shen1;~Dongcheng_Zhao2;~Xiang_He3;~Linghao_Feng1;~Yiting_Dong1;~Jihang_Wang1;~Qian_Zhang13;~Yi_Zeng1", "aff": ";Institute of Automation, Chinese Academy of Sciences;;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences", "aff_domain": ";ia.ac.cn;;ia.ac.cn;ia.ac.cn;;ia.ac.cn;ia.ac.cn", "position": ";Assistant Professor;;PhD student;PhD student;;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nshen2024neurovision,\ntitle={Neuro-Vision to Language: Enhancing Brain Recording-based Visual Reconstruction and Language Interaction},\nauthor={Guobin Shen and Dongcheng Zhao and Xiang He and Linghao Feng and Yiting Dong and Jihang Wang and Qian Zhang and Yi Zeng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ohi00YhT3T}\n}", "github": "", "reviewers": "4C2X;k95k;h32m;fArY", "pdf_size": 12351669, "rating": "6;6;6;6", "confidence": "4;5;4;4", "soundness": "3;4;2;2", "novelty": "3;3;2;2", "presentation": "3;4;3;2", "wc_summary": "453;79;44;86", "wc_strengths": "5;47;42;69", "wc_weaknesses": "5;51;208;227", "wc_questions": "5;70;99;138", "wc_limitations": "5;9;20;11", "wc_review": "473;256;413;531", "wc_reply_reviewers": "42;33;63;101", "wc_reply_authors": "17;11;23;23", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 165.5, 166.74906296588296 ], "wc_strengths_avg": [ 40.75, 23.004075725836064 ], "wc_weaknesses_avg": [ 122.75, 96.37005499635247 ], "wc_questions_avg": [ 78.0, 48.56439024635232 ], "wc_limitations_avg": [ 11.25, 5.494315243958978 ], "wc_review_avg": [ 418.25, 102.54602625163005 ], "wc_reply_reviewers_avg": [ 59.75, 26.185635375144138 ], "wc_reply_authors_avg": [ 18.5, 4.9749371855331 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1361226457605402936&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": ";ia.ac.cn;;ia.ac.cn;ia.ac.cn;;ia.ac.cn;ia.ac.cn", "author_num": 8, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Chinese Academy of Sciences", "aff_unique_dep": "Institute of Automation", "aff_unique_url": "http://www.ia.cas.cn", "aff_unique_abbr": "CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "CSPG: Crossing Sparse Proximity Graphs for Approximate Nearest Neighbor Search", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93606", "id": "ohvXBIPV7e", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ohvXBIPV7e", "openreview": "https://openreview.net/forum?id=ohvXBIPV7e", "poster": "/media/PosterPDFs/NeurIPS%202024/93606.png?t=1730868292.2146382", "project": "", "author_site": "Ming Yang, Yuzheng Cai, Weiguo Zheng", "tldr": "", "abstract": "The state-of-the-art approximate nearest neighbor search (ANNS) algorithm builds a large proximity graph on the dataset and performs a greedy beam search, which may bring many unnecessary explorations. We develop a novel framework, namely *corssing sparse proximity graph (CSPG)*, based on random partitioning of the dataset. It produces a smaller sparse proximity graph for each partition and routing vectors that bind all the partitions. An efficient two-staged approach is designed for exploring *CSPG*, with fast approaching and cross-partition expansion. We theoretically prove that *CSPG* can accelerate the existing graph-based ANNS algorithms by reducing unnecessary explorations. In addition, we conduct extensive experiments on benchmark datasets. The experimental results confirm that the existing graph-based methods can be significantly outperformed by incorporating *CSPG*, achieving 1.5x to 2x speedups of *QPS* in almost all recalls.", "keywords": "similarity search;approximate nearest neighbor search;high-dimensional space;graph index", "primary_area": "other", "supplementary_material": "", "author": "Ming Yang;Yuzheng Cai;Weiguo Zheng", "authorids": "~Ming_Yang27;~Yuzheng_Cai1;~Weiguo_Zheng1", "gender": "M;;M", "homepage": "https://github.com/PUITAR;https://yz-cai.github.io/;https://weiguozheng.github.io/index-en.html", "dblp": "98/2604-43;289/8689.html;10/5927.html", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;https://scholar.google.com.hk/citations?user=vy-F5U4AAAAJ;", "orcid": "0009-0004-8789-1252;0009-0006-7096-5328;0000-0003-1200-7368", "linkedin": ";;", "or_profile": "~Ming_Yang27;~Yuzheng_Cai1;~Weiguo_Zheng1", "aff": "Hunan University;Fudan University;Fudan University", "aff_domain": "hnu.edu.cn;fudan.edu.cn;fudan.edu.cn", "position": "Undergrad student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nyang2024cspg,\ntitle={{CSPG}: Crossing Sparse Proximity Graphs for Approximate Nearest Neighbor Search},\nauthor={Ming Yang and Yuzheng Cai and Weiguo Zheng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ohvXBIPV7e}\n}", "github": "", "reviewers": "4Bip;U15c;niwV;PE2C", "pdf_size": 1159291, "rating": "4;5;7;7", "confidence": "4;4;4;3", "soundness": "3;2;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;2", "wc_summary": "60;81;63;65", "wc_strengths": "21;46;35;76", "wc_weaknesses": "29;217;165;17", "wc_questions": "282;20;74;40", "wc_limitations": "7;1;16;13", "wc_review": "399;365;353;211", "wc_reply_reviewers": "201;285;109;0", "wc_reply_authors": "491;638;63;0", "reply_reviewers": "2;1;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 67.25, 8.13557004763649 ], "wc_strengths_avg": [ 44.5, 20.22992832414391 ], "wc_weaknesses_avg": [ 107.0, 86.09297300012354 ], "wc_questions_avg": [ 104.0, 104.56576877735849 ], "wc_limitations_avg": [ 9.25, 5.7608593109014565 ], "wc_review_avg": [ 332.0, 71.8679344353238 ], "wc_reply_reviewers_avg": [ 148.75, 106.0669010577758 ], "wc_reply_authors_avg": [ 298.0, 272.43256046221785 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5555555555555555, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16475047969803480026&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 2, "email": "hnu.edu.cn;fudan.edu.cn;fudan.edu.cn", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Hunan University;Fudan University", "aff_unique_dep": ";", "aff_unique_url": "http://www.hunu.edu.cn/;https://www.fudan.edu.cn", "aff_unique_abbr": "HNU;Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "ojFEP11Cqj", "title": "NRGBoost: Energy-Based Generative Boosted Trees", "track": "main", "status": "Reject", "tldr": "", "abstract": "Despite the rise to dominance of deep learning in unstructured data domains, \ntree-based methods such as Random Forests (RF) and Gradient Boosted Decision Trees (GBDT) are still the workhorses for handling discriminative tasks on tabular data.\nWe explore generative extensions of these popular algorithms with a focus on explicitly modeling the data density (up to a normalization constant), \nthus enabling other applications besides sampling.\nAs our main contribution we propose an effective energy-based generative boosting algorithm that is analogous to the\nsecond order boosting algorithm implemented in popular packages like XGBoost.\nWe show that, despite producing a generative model capable of handling inference tasks over any input variable, \nour proposed algorithm can achieve similar discriminative performance to GBDT algorithms on a number of real world tabular datasets and outperform competing approaches for sampling.", "keywords": "Generative Models;Energy-Based Models;Gradient Boosting;Tabular Data", "primary_area": "generative_models", "supplementary_material": "", "author": "Jo\u00e3o Bravo", "authorids": "~Jo\u00e3o_Bravo1", "gender": "M", "homepage": "", "dblp": "259/1073", "google_scholar": "", "orcid": "", "linkedin": "", "or_profile": "~Jo\u00e3o_Bravo1", "aff": "Feedzai", "aff_domain": "feedzai.com", "position": "Researcher", "bibtex": "@misc{\nanonymous2024nrgboost,\ntitle={{NRGB}oost: Energy-Based Generative Boosted Trees},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=ojFEP11Cqj}\n}", "github": "", "project": "", "reviewers": "UJVb;trn6;QQ7t;fQFZ", "site": "https://openreview.net/forum?id=ojFEP11Cqj", "pdf_size": 538192, "rating": "4;5;6;9", "confidence": "2;2;4;4", "soundness": "2;3;3;4", "novelty": "2;3;2;4", "presentation": "2;2;3;4", "wc_summary": "37;48;27;433", "wc_strengths": "43;51;21;36", "wc_weaknesses": "103;172;15;15", "wc_questions": "292;10;101;39", "wc_limitations": "9;10;14;20", "wc_review": "484;291;178;543", "wc_reply_reviewers": "162;49;16;103", "wc_reply_authors": "2210;862;400;26", "reply_reviewers": "2;1;1;1", "reply_authors": "6;3;3;2", "rating_avg": [ 6.0, 1.8708286933869707 ], "confidence_avg": [ 3.0, 1.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 136.25, 171.48961338809997 ], "wc_strengths_avg": [ 37.75, 11.031205736455105 ], "wc_weaknesses_avg": [ 76.25, 65.92941301118948 ], "wc_questions_avg": [ 110.5, 109.82372239184028 ], "wc_limitations_avg": [ 13.25, 4.322904116447646 ], "wc_review_avg": [ 374.0, 146.59979536138513 ], "wc_reply_reviewers_avg": [ 82.5, 55.418859605733495 ], "wc_reply_authors_avg": [ 874.5, 825.9568693339864 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.5, 1.5 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.8017837257372733, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:nWgC_KwdsecJ:scholar.google.com/&scioq=NRGBoost:+Energy-Based+Generative+Boosted+Trees&hl=en&as_sdt=0,44", "gs_version_total": 3, "aff_unique_index": "0", "aff_unique_norm": "Feedzai", "aff_unique_dep": "", "aff_unique_url": "https://www.feedzai.com", "aff_unique_abbr": "Feedzai", "aff_country_unique_index": "0", "aff_country_unique": "Portugal" }, { "title": "Efficient Discrepancy Testing for Learning with Distribution Shift", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93605", "id": "ojIhvhQBAQ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ojIhvhQBAQ", "openreview": "https://openreview.net/forum?id=ojIhvhQBAQ", "poster": "", "project": "", "author_site": "Gautam Chandrasekaran, Adam Klivans, Vasilis Kontonis, Konstantinos Stavropoulos, Arsen Vasilyan", "tldr": "", "abstract": "A fundamental notion of distance between train and test distributions from the field of domain adaptation is discrepancy distance. While in general hard to compute, here we provide the first set of provably efficient algorithms for testing *localized* discrepancy distance, where discrepancy is computed with respect to a fixed output classifier. These results imply a broad set of new, efficient learning algorithms in the recently introduced model of Testable Learning with Distribution Shift (TDS learning) due to Klivans et al. (2023).\n\nOur approach generalizes and improves all prior work on TDS learning: (1) we obtain *universal* learners that succeed simultaneously for large classes of test distributions, (2) achieve near-optimal error rates, and (3) give exponential improvements for constant depth circuits. Our methods further extend to semi-parametric settings and imply the first positive results for low-dimensional convex sets. Additionally, we separate learning and testing phases and obtain algorithms that run in fully polynomial time at test time.", "keywords": "pac learning;testable learning;distribution shift;distribution testing;discrepancy distance", "primary_area": "learning_theory", "supplementary_material": "", "author": "Gautam Chandrasekaran;Adam Klivans;Vasilis Kontonis;Konstantinos Stavropoulos;Arsen Vasilyan", "authorids": "~Gautam_Chandrasekaran1;~Adam_Klivans1;~Vasilis_Kontonis1;~Konstantinos_Stavropoulos1;~Arsen_Vasilyan1", "gender": "M;M;M;;", "homepage": ";http://www.cs.utexas.edu/~klivans;http://vkonton.github.io/;;", "dblp": ";k/AdamRKlivans;203/8777;;", "google_scholar": ";;7_44KWAAAAAJ;;", "orcid": ";;;;", "linkedin": "gautam-chandrasekaran-5139a1188/;;;;", "or_profile": "~Gautam_Chandrasekaran1;~Adam_Klivans1;~Vasilis_Kontonis1;~Konstantinos_Stavropoulos1;~Arsen_Vasilyan1", "aff": " University of Texas at Austin;University of Texas, Austin;, University of Texas at Austin;;", "aff_domain": "cs.utexas.edu;cs.utexas.edu;cs.utexas.edu;;", "position": "PhD student;Professor;Postdoc;;", "bibtex": "@inproceedings{\nchandrasekaran2024efficient,\ntitle={Efficient Discrepancy Testing for Learning with Distribution Shift},\nauthor={Gautam Chandrasekaran and Adam Klivans and Vasilis Kontonis and Konstantinos Stavropoulos and Arsen Vasilyan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ojIhvhQBAQ}\n}", "github": "", "reviewers": "aevL;YUiz;P6Ww;hgSz;D9yX", "pdf_size": 1112271, "rating": "6;6;7;7;7", "confidence": "2;3;4;2;4", "soundness": "3;3;3;3;4", "novelty": "3;2;4;3;3", "presentation": "2;3;3;3;4", "wc_summary": "56;304;96;58;167", "wc_strengths": "54;17;122;63;49", "wc_weaknesses": "93;66;124;4;76", "wc_questions": "25;110;101;203;1", "wc_limitations": "5;11;19;1;1", "wc_review": "233;508;462;329;294", "wc_reply_reviewers": "4;53;4;29;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.6, 0.48989794855663565 ], "confidence_avg": [ 3.0, 0.8944271909999159 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 136.2, 93.0255878777447 ], "wc_strengths_avg": [ 61.0, 34.21695486158872 ], "wc_weaknesses_avg": [ 72.6, 39.545416927881796 ], "wc_questions_avg": [ 88.0, 71.29656373206215 ], "wc_limitations_avg": [ 7.4, 6.8585712797928995 ], "wc_review_avg": [ 365.2, 103.55558893657069 ], "wc_reply_reviewers_avg": [ 18.0, 20.307634032550418 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.4564354645876384, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15960476871557865860&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "cs.utexas.edu;cs.utexas.edu;cs.utexas.edu;;", "author_num": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Shaping the distribution of neural responses with interneurons in a recurrent circuit model", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93604", "id": "ojLIEQ0j9T", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ojLIEQ0j9T", "openreview": "https://openreview.net/forum?id=ojLIEQ0j9T", "poster": "", "project": "", "author_site": "David Lipshutz, Eero Simoncelli", "tldr": "", "abstract": "Efficient coding theory posits that sensory circuits transform natural signals into neural representations that maximize information transmission subject to resource constraints. Local interneurons are thought to play an important role in these transformations, shaping patterns of circuit activity to facilitate and direct information flow. However, the relationship between these coordinated, nonlinear, circuit-level transformations and the properties of interneurons (e.g., connectivity, activation functions) remains unknown. Here, we propose a normative computational model that establishes such a relationship. Our model is derived from an optimal transport objective that conceptualizes the circuit's input-response function as transforming the inputs to achieve a target response distribution. The circuit, which is comprised of primary neurons that are recurrently connected to a set of local interneurons, continuously optimizes this objective by dynamically adjusting both the synaptic connections between neurons as well as the interneuron activation functions. In an application motivated by redundancy reduction theory, we demonstrate that when the inputs are natural image statistics and the target distribution is a spherical Gaussian, the circuit learns a nonlinear transformation that significantly reduces statistical dependencies in neural responses. Overall, our results provide a framework in which the distribution of circuit responses is systematically and nonlinearly controlled by adjustment of interneuron connectivity and activation functions.", "keywords": "Efficient coding;optimal transport;Gaussianization;Hebbian plasticity;gain modulation;neural adaptation", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "", "author": "David Lipshutz;Eero P Simoncelli", "authorids": "~David_Lipshutz1;~Eero_P_Simoncelli1", "gender": "M;M", "homepage": "https://lipshutzlab.com;https://www.cns.nyu.edu/~eero/", "dblp": "173/4650;30/5604", "google_scholar": "XeWdtXcAAAAJ;MplR7_cAAAAJ", "orcid": "0000-0001-9347-8326;0000-0002-1206-527X", "linkedin": ";eero-simoncelli-445782123", "or_profile": "~David_Lipshutz1;~Eero_Peter_Simoncelli1", "aff": "Flatiron Institute;New York University", "aff_domain": "flatironinstitute.org;nyu.edu", "position": "Associate Research Scientist;Full Professor", "bibtex": "@inproceedings{\nlipshutz2024shaping,\ntitle={Shaping the distribution of neural responses with interneurons in a recurrent circuit model},\nauthor={David Lipshutz and Eero P Simoncelli},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ojLIEQ0j9T}\n}", "github": "", "reviewers": "txmi;Voaj;zp8S;DTUH", "pdf_size": 4583843, "rating": "5;5;5;8", "confidence": "2;3;3;4", "soundness": "3;3;3;3", "novelty": "2;2;1;4", "presentation": "2;2;3;4", "wc_summary": "124;83;159;23", "wc_strengths": "157;48;84;31", "wc_weaknesses": "89;260;180;76", "wc_questions": "26;157;100;140", "wc_limitations": "7;5;45;12", "wc_review": "403;553;568;282", "wc_reply_reviewers": "18;222;112;85", "wc_reply_authors": "0;258;88;0", "reply_reviewers": "1;3;2;1", "reply_authors": "1;4;2;1", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.25, 1.0897247358851685 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 97.25, 50.60817621689207 ], "wc_strengths_avg": [ 80.0, 48.399380161320245 ], "wc_weaknesses_avg": [ 151.25, 74.4828000279259 ], "wc_questions_avg": [ 105.75, 50.47957507745088 ], "wc_limitations_avg": [ 17.25, 16.223054582907622 ], "wc_review_avg": [ 451.5, 117.21454687878975 ], "wc_reply_reviewers_avg": [ 109.25, 73.54377947862076 ], "wc_reply_authors_avg": [ 86.5, 105.33161918436458 ], "reply_reviewers_avg": [ 1.75, 0.82915619758885 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17320816965806457031&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "flatironinstitute.org;nyu.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Flatiron Institute;New York University", "aff_unique_dep": ";", "aff_unique_url": "https://flatironinstitute.org;https://www.nyu.edu", "aff_unique_abbr": "Flatiron;NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Policy Mirror Descent with Lookahead", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93603", "id": "om2Aa0gUha", "proceeding": "", "pdf": "https://openreview.net/pdf?id=om2Aa0gUha", "openreview": "https://openreview.net/forum?id=om2Aa0gUha", "poster": "/media/PosterPDFs/NeurIPS%202024/93603.png?t=1733960093.3201697", "project": "", "author_site": "Kimon Protopapas, Anas Barakat", "tldr": "", "abstract": "Policy Mirror Descent (PMD) stands as a versatile algorithmic framework encompassing several seminal policy gradient algorithms such as natural policy gradient, with connections with state-of-the-art reinforcement learning (RL) algorithms such as TRPO and PPO. PMD can be seen as a soft Policy Iteration algorithm implementing regularized 1-step greedy policy improvement. However, 1-step greedy policies might not be the best choice and recent remarkable empirical successes in RL such as AlphaGo and AlphaZero have demonstrated that greedy approaches with respect to multiple steps outperform their 1-step counterpart. In this work, we propose a new class of PMD algorithms called $h$-PMD which incorporates multi-step greedy policy improvement with lookahead depth $h$ to the PMD update rule. To solve discounted infinite horizon Markov Decision Processes with discount factor $\\gamma$, we show that $h$-PMD which generalizes the standard PMD enjoys a faster dimension-free $\\gamma^h$-linear convergence rate, contingent on the computation of multi-step greedy policies. We propose an inexact version of $h$-PMD where lookahead action values are estimated. Under a generative model, we establish a sample complexity for $h$-PMD which improves over prior work. Finally, we extend our result to linear function approximation to scale to large state spaces. Under suitable assumptions, our sample complexity only involves dependence on the dimension of the feature map space instead of the state space size.", "keywords": "Policy mirror descent;policy gradient methods;policy iteration;multi-step greedy policy improvement;policy optimization", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/d8d2a06b9bfa06761f3fd82bb38b717d1083cb29.zip", "author": "Kimon Protopapas;Anas Barakat", "authorids": "~Kimon_Protopapas1;~Anas_Barakat1", "gender": "M;M", "homepage": "http://www.example.com;https://anasbarakat.github.io", "dblp": ";228/9320", "google_scholar": ";5YyyWPkAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Kimon_Protopapas1;~Anas_Barakat1", "aff": "ETHZ - ETH Zurich;Department of Computer Science, ETHZ - ETH Zurich", "aff_domain": "ethz.ch;inf.ethz.ch", "position": "MS student;Postdoc", "bibtex": "@inproceedings{\nprotopapas2024policy,\ntitle={Policy Mirror Descent with Lookahead},\nauthor={Kimon Protopapas and Anas Barakat},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=om2Aa0gUha}\n}", "github": "", "reviewers": "t6qC;Pv8t;QSY1;vPQx;SGix", "pdf_size": 1062704, "rating": "4;4;6;7;7", "confidence": "4;3;4;2;2", "soundness": "3;3;3;3;3", "novelty": "2;2;3;3;4", "presentation": "2;3;3;3;4", "wc_summary": "117;36;121;82;50", "wc_strengths": "120;38;55;48;40", "wc_weaknesses": "152;44;329;15;57", "wc_questions": "275;73;172;21;38", "wc_limitations": "190;24;1;16;2", "wc_review": "854;215;678;182;187", "wc_reply_reviewers": "0;0;139;22;23", "wc_reply_authors": "0;0;96;18;28", "reply_reviewers": "0;0;2;1;1", "reply_authors": "1;1;3;2;2", "rating_avg": [ 5.6, 1.3564659966250536 ], "confidence_avg": [ 3.0, 0.8944271909999159 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 81.2, 34.3010203929854 ], "wc_strengths_avg": [ 60.2, 30.505081543900186 ], "wc_weaknesses_avg": [ 119.4, 114.43181375823771 ], "wc_questions_avg": [ 115.8, 95.26258447050446 ], "wc_limitations_avg": [ 46.6, 72.22077263502517 ], "wc_review_avg": [ 423.2, 285.5964985779763 ], "wc_reply_reviewers_avg": [ 36.8, 52.08224265524671 ], "wc_reply_authors_avg": [ 28.4, 35.47167884383258 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.6593804733957871, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:SQe57UNhx_0J:scholar.google.com/&scioq=Policy+Mirror+Descent+with+Lookahead&hl=en&as_sdt=0,5", "gs_version_total": 6, "email": "ethz.ch;inf.ethz.ch", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "ETH Zurich", "aff_unique_dep": "", "aff_unique_url": "https://www.ethz.ch", "aff_unique_abbr": "ETHZ", "aff_campus_unique_index": "1", "aff_campus_unique": ";Zurich", "aff_country_unique_index": "0;0", "aff_country_unique": "Switzerland" }, { "title": "Learning to Mitigate Externalities: the Coase Theorem with Hindsight Rationality", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93602", "id": "omyzrkacme", "proceeding": "", "pdf": "https://openreview.net/pdf?id=omyzrkacme", "openreview": "https://openreview.net/forum?id=omyzrkacme", "poster": "/media/PosterPDFs/NeurIPS%202024/93602.png?t=1731607173.5550802", "project": "", "author_site": "Antoine Scheid, Aymeric Capitaine, Etienne Boursier, Eric Moulines, Michael Jordan, Alain Durmus", "tldr": "", "abstract": "In Economics, the concept of externality refers to any indirect effect resulting from an interaction between players and affecting a third party without compensation. Most of the models within which externality has been studied assume that agents have perfect knowledge of their environment and preferences. This is a major hindrance to the practical implementation of many proposed solutions. To adress this issue, we consider a two-players bandit game setting where the actions of one of the player affect the other one. Building upon this setup, we extend the Coase theorem [Coase, 2013], which suggests that the optimal approach for maximizing the social welfare in the presence of externality is to establish property rights, i.e., enabling transfers and bargaining between the players. Nonetheless, this fundamental result relies on the assumption that bargainers possess perfect knowledge of the underlying game. We first demonstrate that in the absence of property rights in the considered online scenario, the social welfare breaks down. We then provide a policy for the players, which allows them to learn a bargaining strategy which maximizes the total welfare, recovering the Coase theorem under uncertainty.", "keywords": "Online Learning;Bandits;Algorithmic Game Theory;Externality;Policy Design;Two-Players Game", "primary_area": "algorithmic_game_theory", "supplementary_material": "", "author": "Antoine Scheid;Aymeric Capitaine;Etienne Boursier;Eric Moulines;Michael Jordan;Alain Oliviero Durmus", "authorids": "~Antoine_Scheid1;~Aymeric_Capitaine1;~Etienne_Boursier1;~Eric_Moulines1;~Michael_Jordan1;~Alain_Oliviero_Durmus1", "gender": "M;M;M;M;M;M", "homepage": ";https://fr.linkedin.com/in/aymeric-capitaine-ab00a818b;https://eboursier.github.io/;;http://www.cs.berkeley.edu/~jordan/;", "dblp": ";;203/8633;54/2358;j/MichaelIJordan;01/11275", "google_scholar": "M9zQVwgAAAAJ;;https://scholar.google.fr/citations?user=-9todDUAAAAJ;https://scholar.google.fr/citations?user=_XE1LvQAAAAJ;https://scholar.google.com.tw/citations?user=yxUduqMAAAAJ;", "orcid": ";;;0000-0002-2058-0693;0000-0001-8935-817X;", "linkedin": "antoine-scheid-687735239/;;;;;", "or_profile": "~Antoine_Scheid1;~Aymeric_Capitaine1;~Etienne_Boursier1;~Eric_Moulines1;~Michael_Jordan1;~Alain_Durmus1", "aff": "\u00c9cole Polytechnique;\u00c9cole Polytechnique;INRIA;Ecole polytechnique;University of California, Berkeley;\u00c9cole Polytechnique", "aff_domain": "polytechnique.edu;polytechnique.fr;inria.fr;polytechnique.edu;berkeley.edu;polytechnique.fr", "position": "PhD student;PhD student;Researcher;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nscheid2024learning,\ntitle={Learning to Mitigate Externalities: the Coase Theorem with Hindsight Rationality},\nauthor={Antoine Scheid and Aymeric Capitaine and Etienne Boursier and Eric Moulines and Michael Jordan and Alain Oliviero Durmus},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=omyzrkacme}\n}", "github": "", "reviewers": "rbRK;UVPQ;aRFf", "pdf_size": 631243, "rating": "6;7;7", "confidence": "4;2;2", "soundness": "3;4;3", "novelty": "3;3;3", "presentation": "3;4;4", "wc_summary": "200;186;161", "wc_strengths": "82;161;85", "wc_weaknesses": "157;51;5", "wc_questions": "8;34;159", "wc_limitations": "12;10;9", "wc_review": "459;442;419", "wc_reply_reviewers": "22;0;10", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;0;1", "reply_authors": "1;1;1", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 2.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 182.33333333333334, 16.131404843417148 ], "wc_strengths_avg": [ 109.33333333333333, 36.55437350334734 ], "wc_weaknesses_avg": [ 71.0, 63.64484791926733 ], "wc_questions_avg": [ 67.0, 65.91408549518583 ], "wc_limitations_avg": [ 10.333333333333334, 1.247219128924647 ], "wc_review_avg": [ 440.0, 16.391054470858997 ], "wc_reply_reviewers_avg": [ 10.666666666666666, 8.993825042154695 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13362913044320337815&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 4, "email": "polytechnique.edu;polytechnique.fr;inria.fr;polytechnique.edu;berkeley.edu;polytechnique.fr", "author_num": 6, "aff_unique_index": "0;0;1;0;2;0", "aff_unique_norm": "Ecole Polytechnique;INRIA;University of California, Berkeley", "aff_unique_dep": ";;", "aff_unique_url": "https://www.polytechnique.edu;https://www.inria.fr;https://www.berkeley.edu", "aff_unique_abbr": "X;INRIA;UC Berkeley", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;0;0;0;1;0", "aff_country_unique": "France;United States" }, { "title": "Forgetting, Ignorance or Myopia: Revisiting Key Challenges in Online Continual Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93601", "id": "opaRhDvQRD", "proceeding": "", "pdf": "https://openreview.net/pdf?id=opaRhDvQRD", "openreview": "https://openreview.net/forum?id=opaRhDvQRD", "poster": "", "project": "", "author_site": "Wang Xinrui, Chuanxing Geng, Wenhai Wan, Shao-Yuan Li, Songcan Chen", "tldr": "", "abstract": "Online continual learning (OCL) requires the models to learn from constant, endless streams of data. While significant efforts have been made in this field, most were focused on mitigating the \\textit{catastrophic forgetting} issue to achieve better classification ability, at the cost of a much heavier training workload. They overlooked that in real-world scenarios, e.g., in high-speed data stream environments, data do not pause to accommodate slow models. In this paper, we emphasize that \\textit{model throughput}-- defined as the maximum number of training samples that a model can process within a unit of time -- is equally important. It directly limits how much data a model can utilize and presents a challenging dilemma for current methods. With this understanding, we revisit key challenges in OCL from both empirical and theoretical perspectives, highlighting two critical issues beyond the well-documented catastrophic forgetting: (\\romannumeral1) Model's ignorance: the single-pass nature of OCL challenges models to learn effective features within constrained training time and storage capacity, leading to a trade-off between effective learning and model throughput; (\\romannumeral2) Model's myopia: the local learning nature of OCL on the current task leads the model to adopt overly simplified, task-specific features and \\textit{excessively sparse classifier}, resulting in the gap between the optimal solution for the current task and the global objective. To tackle these issues, we propose the Non-sparse Classifier Evolution framework (NsCE) to facilitate effective global discriminative feature learning with minimal time cost. NsCE integrates non-sparse maximum separation regularization and targeted experience replay techniques with the help of pre-trained models, enabling rapid acquisition of new globally discriminative features. Extensive experiments demonstrate the substantial improvements of our framework in performance, throughput and real-world practicality.", "keywords": "online continual learning;data stream;model throughput", "primary_area": "machine_vision", "supplementary_material": "/attachment/5b5389f97aa0f6cd572584536296184395d9b2a3.zip", "author": "Wang Xinrui;Chuanxing Geng;Wenhai Wan;Shao-Yuan Li;Songcan Chen", "authorids": "~Wang_Xinrui1;~Chuanxing_Geng1;~Wenhai_Wan1;~Shao-Yuan_Li1;~Songcan_Chen1", "gender": "M;M;M;F;", "homepage": "https://wxr99.github.io/dudusama/;https://faculty.nuaa.edu.cn/gengchuanxing/zh_CN/index.htm;https://openreview.net/;http://parnec.nuaa.edu.cn/lisy;", "dblp": ";224/2052;342/6476;79/1523;", "google_scholar": "3juyXgQAAAAJ;thqYKQIAAAAJ;;https://scholar.google.com/citations?hl=en;", "orcid": "0009-0006-0208-9063;;;0000-0003-0610-8568;", "linkedin": ";;;;", "or_profile": "~Wang_Xinrui1;~Chuanxing_Geng1;~Wenhai_Wan1;~Shao-Yuan_Li1;~Songcan_Chen1", "aff": "Nanjing University of Aeronautics and Astronautics;Hong Kong Baptist University;Nanjing University of Aeronautics and Astronautics;Nanjing University of Aeronautics and Astronautics;", "aff_domain": "nuaa.edu.cn;hkbu.edu.hk;nuaa.edu.cn;nuaa.edu.cn;", "position": "PhD student;Postdoc;MS student;Associate Professor;", "bibtex": "@inproceedings{\nxinrui2024forgetting,\ntitle={Forgetting, Ignorance or Myopia: Revisiting Key Challenges in Online Continual Learning},\nauthor={Wang Xinrui and Chuanxing Geng and Wenhai Wan and Shao-Yuan Li and Songcan Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=opaRhDvQRD}\n}", "github": "", "reviewers": "abk8;u1kw;WX6u", "pdf_size": 2196025, "rating": "6;7;8", "confidence": "5;5;4", "soundness": "3;4;4", "novelty": "3;3;3", "presentation": "3;4;3", "wc_summary": "63;58;91", "wc_strengths": "83;132;67", "wc_weaknesses": "120;165;533", "wc_questions": "128;95;2", "wc_limitations": "7;1;8", "wc_review": "401;451;701", "wc_reply_reviewers": "0;0;357", "wc_reply_authors": "0;0;1264", "reply_reviewers": "0;0;2", "reply_authors": "1;1;4", "rating_avg": [ 7.0, 0.816496580927726 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "soundness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 70.66666666666667, 14.522013940527977 ], "wc_strengths_avg": [ 94.0, 27.65260686927485 ], "wc_weaknesses_avg": [ 272.6666666666667, 184.99789788595498 ], "wc_questions_avg": [ 75.0, 53.34791467339656 ], "wc_limitations_avg": [ 5.333333333333333, 3.0912061651652345 ], "wc_review_avg": [ 517.6666666666666, 131.23346456686352 ], "wc_reply_reviewers_avg": [ 119.0, 168.2914139223983 ], "wc_reply_authors_avg": [ 421.3333333333333, 595.855314279864 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.9428090415820634 ], "reply_authors_avg": [ 2.0, 1.4142135623730951 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=225420907291423286&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": "nuaa.edu.cn;hkbu.edu.hk;nuaa.edu.cn;nuaa.edu.cn;", "author_num": 5, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Nanjing University of Aeronautics and Astronautics;Hong Kong Baptist University", "aff_unique_dep": ";", "aff_unique_url": "http://www.nuaa.edu.cn;https://www.hkbu.edu.hk", "aff_unique_abbr": "NUAA;HKBU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "opdiIAHfBr", "title": "AlignedCut: Visual Concepts Discovery on Brain-Guided Universal Feature Space", "track": "main", "status": "Reject", "tldr": "", "abstract": "We study the intriguing connection between visual data, deep networks, and the brain. Our method creates a universal channel alignment by using brain voxel fMRI response prediction as the training objective. We discover that deep networks, trained with different objectives, share common feature channels across various models. These channels can be clustered into recurring sets, corresponding to distinct brain regions, indicating the formation of visual concepts. Tracing the clusters of channel responses onto the images, we see semantically meaningful object segments emerge, even without any supervised decoder. Furthermore, the universal feature alignment and the clustering of channels produce a picture and quantification of how visual information is processed through the different network layers, which produces precise comparisons between the networks.", "keywords": "Feature alignment;Visual concepts;Explainability;Brain encoding", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Huzheng Yang;James Gee;Jianbo Shi", "authorids": "~Huzheng_Yang1;~James_Gee1;~Jianbo_Shi1", "gender": ";M;M", "homepage": "https://huzeyann.github.io/;https://www.med.upenn.edu/apps/faculty/index.php/g5455356/p10656;http://www.cs.cmu.edu/~jshi/", "dblp": "250/3888.html;30/6904;71/3879", "google_scholar": "8yVLKyYAAAAJ;https://scholar.google.com.tw/citations?user=fU8fmEIAAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Huzheng_Yang1;~James_Gee1;~Jianbo_Shi1", "aff": "University of Pennsylvania;University of Pennsylvania;University of Pennsylvania", "aff_domain": "upenn.edu;upenn.edu;upenn.edu", "position": "PhD student;Full Professor;Professor", "bibtex": "@misc{\nanonymous2024alignedcut,\ntitle={AlignedCut: Visual Concepts Discovery on Brain-Guided Universal Feature Space},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=opdiIAHfBr}\n}", "github": "", "project": "", "reviewers": "UZU3;8iP4;yRLt;UPzh", "site": "https://openreview.net/forum?id=opdiIAHfBr", "pdf_size": 26301325, "rating": "3;4;4;4", "confidence": "3;3;4;2", "soundness": "3;3;2;1", "novelty": "2;3;2;1", "presentation": "1;3;2;1", "wc_summary": "491;116;150;54", "wc_strengths": "77;91;114;45", "wc_weaknesses": "938;422;281;218", "wc_questions": "116;72;211;650", "wc_limitations": "36;32;8;23", "wc_review": "1658;733;764;990", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 3.75, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "novelty_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 1.75, 0.82915619758885 ], "wc_summary_avg": [ 202.75, 169.94318903680724 ], "wc_strengths_avg": [ 81.75, 24.993749218554626 ], "wc_weaknesses_avg": [ 464.75, 283.038314544162 ], "wc_questions_avg": [ 262.25, 229.43449500892407 ], "wc_limitations_avg": [ 24.75, 10.755812382149477 ], "wc_review_avg": [ 1036.25, 372.4220556035853 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:WE99fHDvlFoJ:scholar.google.com/&scioq=AlignedCut:+Visual+Concepts+Discovery+on+Brain-Guided+Universal+Feature+Space&hl=en&as_sdt=0,5", "gs_version_total": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Pennsylvania", "aff_unique_dep": "", "aff_unique_url": "https://www.upenn.edu", "aff_unique_abbr": "UPenn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Optimal ablation for interpretability", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93600", "id": "opt72TYzwZ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=opt72TYzwZ", "openreview": "https://openreview.net/forum?id=opt72TYzwZ", "poster": "", "project": "", "author_site": "Maximilian Li, Lucas Janson", "tldr": "", "abstract": "Interpretability studies often involve tracing the flow of information through machine learning models to identify specific model components that perform relevant computations for tasks of interest. Prior work quantifies the importance of a model component on a particular task by measuring the impact of performing ablation on that component, or simulating model inference with the component disabled.\n We propose a new method, optimal ablation (OA), and show that OA-based component importance has theoretical and empirical advantages over measuring importance via other ablation methods. We also show that OA-based component importance can benefit several downstream interpretability tasks, including circuit discovery, localization of factual recall, and latent prediction.", "keywords": "mechanistic intepretability;model internals;ablation;activation patching;automatic circuit discovery;causal tracing;tuned lens", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Maximilian Li;Lucas Janson", "authorids": "~Maximilian_Li1;~Lucas_Janson2", "gender": ";", "homepage": "https://www.linkedin.com/in/maxtli/;http://lucasjanson.fas.harvard.edu/", "dblp": ";131/6726", "google_scholar": ";Njlo7WAAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Maximilian_Li1;~Lucas_Janson2", "aff": "Harvard University;Harvard University", "aff_domain": "harvard.edu;harvard.edu", "position": "Undergrad student;Associate Professor", "bibtex": "@inproceedings{\nli2024optimal,\ntitle={Optimal ablation for interpretability},\nauthor={Maximilian Li and Lucas Janson},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=opt72TYzwZ}\n}", "github": "", "reviewers": "WFFz;Nr7M;nbx7;mH4h", "pdf_size": 3870290, "rating": "4;8;8;9", "confidence": "3;3;4;3", "soundness": "2;3;3;4", "novelty": "2;4;4;4", "presentation": "2;3;3;4", "wc_summary": "60;287;177;98", "wc_strengths": "51;85;447;25", "wc_weaknesses": "312;73;891;8", "wc_questions": "4;50;141;1", "wc_limitations": "4;10;43;2", "wc_review": "431;505;1699;134", "wc_reply_reviewers": "0;67;758;0", "wc_reply_authors": "0;0;475;0", "reply_reviewers": "0;1;1;0", "reply_authors": "1;1;2;1", "rating_avg": [ 7.25, 1.920286436967152 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.5, 0.8660254037844386 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 155.5, 86.86339850593 ], "wc_strengths_avg": [ 152.0, 171.64206943520577 ], "wc_weaknesses_avg": [ 321.0, 348.01364915761565 ], "wc_questions_avg": [ 49.0, 56.555282688710875 ], "wc_limitations_avg": [ 14.75, 16.57369904396722 ], "wc_review_avg": [ 692.25, 597.599939340693 ], "wc_reply_reviewers_avg": [ 206.25, 319.72517495499164 ], "wc_reply_authors_avg": [ 118.75, 205.68103339880417 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.22549380840084865, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17047971455091268125&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "harvard.edu;harvard.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Harvard University", "aff_unique_dep": "", "aff_unique_url": "https://www.harvard.edu", "aff_unique_abbr": "Harvard", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Compositional 3D-aware Video Generation with LLM Director", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93599", "id": "oqdy2EFrja", "proceeding": "", "pdf": "https://openreview.net/pdf?id=oqdy2EFrja", "openreview": "https://openreview.net/forum?id=oqdy2EFrja", "poster": "/media/PosterPDFs/NeurIPS%202024/93599.png?t=1733241422.2946844", "project": "", "author_site": "Hanxin Zhu, Tianyu He, Anni Tang, Junliang Guo, Zhibo Chen, Jiang Bian", "tldr": "", "abstract": "Significant progress has been made in text-to-video generation through the use of powerful generative models and large-scale internet data. However, substantial challenges remain in precisely controlling individual elements within the generated video, such as the movement and appearance of specific characters and the manipulation of viewpoints. In this work, we propose a novel paradigm that generates each element in 3D representation separately and then composites them with priors from Large Language Models (LLMs) and 2D diffusion models. Specifically, given an input textual query, our scheme consists of four stages: 1) we leverage the LLMs as the director to first decompose the complex query into several sub-queries, where each sub-query describes each element of the generated video; 2) to generate each element, pre-trained models are invoked by the LLMs to obtain the corresponding 3D representation; 3) to composite the generated 3D representations, we prompt multi-modal LLMs to produce coarse guidance on the scale, location, and trajectory of different objects; 4) to make the results adhere to natural distribution, we further leverage 2D diffusion priors and use score distillation sampling to refine the composition. Extensive experiments demonstrate that our method can generate high-fidelity videos from text with flexible control over each element.", "keywords": "3D-aware Video Generation; LLM; Compositional", "primary_area": "generative_models", "supplementary_material": "/attachment/f503358b9a24890ebeeef71682c24641703b4cbc.zip", "author": "Hanxin Zhu;Tianyu He;Anni Tang;Junliang Guo;Zhibo Chen;Jiang Bian", "authorids": "~Hanxin_Zhu1;~Tianyu_He1;~Anni_Tang1;~Junliang_Guo1;~Zhibo_Chen1;~Jiang_Bian1", "gender": "M;M;F;M;M;M", "homepage": ";https://www.microsoft.com/en-us/research/people/tianyuhe/;;https://leoguojl.me/;https://faculty.ustc.edu.cn/chenzhibo;https://sites.google.com/view/jiangbian", "dblp": "261/8127;198/4010;;209/9674;54/6561.html;09/851-2.html", "google_scholar": "MbVZAGQAAAAJ;P08KU1YAAAAJ;sfx8pqoAAAAJ;https://scholar.google.com.sg/citations?user=S88C9ewAAAAJ;1ayDJfsAAAAJ;pZBEnY8AAAAJ", "orcid": ";0000-0002-4828-3228;;0000-0001-8360-5483;;0000-0002-9472-600X", "linkedin": ";;;;;jbian/", "or_profile": "~Hanxin_Zhu1;~Tianyu_He1;~Anni_Tang1;~Junliang_Guo1;~Zhibo_Chen1;~Jiang_Bian1", "aff": "Microsoft Research Asia;Microsoft Research Asia;Shanghai Jiaotong University;Microsoft;University of Science and Technology of China;Microsoft", "aff_domain": "microsoft.com;microsoft.com;sjtu.edu.cn;microsoft.com;ustc.edu.cn;microsoft.com", "position": "Intern;Researcher;PhD student;Researcher;Full Professor;Partner Research Manager", "bibtex": "@inproceedings{\nzhu2024compositional,\ntitle={Compositional 3D-aware Video Generation with {LLM} Director},\nauthor={Hanxin Zhu and Tianyu He and Anni Tang and Junliang Guo and Zhibo Chen and Jiang Bian},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=oqdy2EFrja}\n}", "github": "", "reviewers": "7svJ;BLtC;N95p;6K4s", "pdf_size": 4717388, "rating": "3;5;5;5", "confidence": "5;4;5;3", "soundness": "2;3;2;3", "novelty": "2;3;2;3", "presentation": "3;3;3;3", "wc_summary": "96;83;85;49", "wc_strengths": "207;90;23;48", "wc_weaknesses": "266;39;74;308", "wc_questions": "23;41;13;18", "wc_limitations": "20;8;4;1", "wc_review": "612;261;199;424", "wc_reply_reviewers": "40;55;44;0", "wc_reply_authors": "107;344;46;0", "reply_reviewers": "1;1;1;0", "reply_authors": "3;2;2;1", "rating_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 78.25, 17.597940220378067 ], "wc_strengths_avg": [ 92.0, 70.57974213611155 ], "wc_weaknesses_avg": [ 171.75, 116.85969151080282 ], "wc_questions_avg": [ 23.75, 10.568230693924125 ], "wc_limitations_avg": [ 8.25, 7.224091638399945 ], "wc_review_avg": [ 374.0, 160.10777620090786 ], "wc_reply_reviewers_avg": [ 34.75, 20.801141795584204 ], "wc_reply_authors_avg": [ 124.25, 132.42804650073185 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15972877124649001373&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "microsoft.com;microsoft.com;sjtu.edu.cn;microsoft.com;ustc.edu.cn;microsoft.com", "author_num": 6, "aff_unique_index": "0;0;1;0;2;0", "aff_unique_norm": "Microsoft;Shanghai Jiao Tong University;University of Science and Technology of China", "aff_unique_dep": "Research;;", "aff_unique_url": "https://www.microsoft.com/en-us/research/group/asia;https://www.sjtu.edu.cn;http://www.ustc.edu.cn", "aff_unique_abbr": "MSR Asia;SJTU;USTC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Asia;", "aff_country_unique_index": "0;0;0;1;0;1", "aff_country_unique": "China;United States" }, { "title": "Getting More Juice Out of the SFT Data: Reward Learning from Human Demonstration Improves SFT for LLM Alignment", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93598", "id": "orxQccN8Fm", "proceeding": "", "pdf": "https://openreview.net/pdf?id=orxQccN8Fm", "openreview": "https://openreview.net/forum?id=orxQccN8Fm", "poster": "/media/PosterPDFs/NeurIPS%202024/93598.png?t=1731465977.5375016", "project": "", "author_site": "Jiaxiang Li, Siliang Zeng, Hoi-To Wai, Chenliang Li, Alfredo Garcia, Mingyi Hong", "tldr": "", "abstract": "Aligning human preference and value is an important requirement for contemporary foundation models. State-of-the-art techniques such as Reinforcement Learning from Human Feedback (RLHF) often consist of two stages: 1) supervised fine-tuning (SFT), where the model is fine-tuned by learning from human demonstration data; 2) Preference learning, where preference data is used to learn a reward model, which is in turn used by a reinforcement learning (RL) step to fine-tune the model. Such reward model serves as a proxy to human preference, and it is critical to guide the RL step towards improving the model quality. In this work, we argue that the SFT stage significantly benefits from learning a reward model as well. Instead of using the human demonstration data directly via supervised learning, we propose to leverage an Inverse Reinforcement Learning (IRL) technique to {\\it simultaneously} build an reward model and a policy model. This approach leads to new SFT algorithms that are not only efficient to implement, but are robust to the presence of low-quality supervised learning data. Moreover, we discover a connection between the proposed IRL based approach, and a recent line of works called Self-Play Fine-tune (SPIN, \\cite{chen2024self}). Theoretically, we show that the proposed algorithms converge to the stationary solutions of the IRL problem. Empirically, we align 1B and 7B models using proposed methods and evaluate them on a reward benchmark model and the HuggingFace Open LLM Leaderboard. The proposed methods show significant performance improvement over existing SFT approaches. Our results indicate that it is beneficial to leverage reward learning throughout the entire alignment process. Our code is available at \\url{https://github.com/JasonJiaxiangLi/Reward_learning_SFT}.", "keywords": "Large language models;Fine-tune;alignment;Reinforcement learning", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Jiaxiang Li;Siliang Zeng;Hoi To Wai;Chenliang Li;Alfredo Garcia;Mingyi Hong", "authorids": "~Jiaxiang_Li1;~Siliang_Zeng1;~Hoi_To_Wai1;~Chenliang_Li3;~Alfredo_Garcia1;~Mingyi_Hong1", "gender": "M;M;M;M;M;M", "homepage": "https://jasonjiaxiangli.github.io/;https://siliangzeng.github.io/index.html;http://www1.se.cuhk.edu.hk/~htwai/;;https://agarcia.engr.tamu.edu;http://people.ece.umn.edu/~mhong/mingyi.html", "dblp": ";38/9;29/9875;;;57/8053", "google_scholar": "h5OWvc0AAAAJ;IfqsDyYAAAAJ;https://scholar.google.com.hk/citations?user=5-J7LeMAAAAJ;;;qRnP-p0AAAAJ", "orcid": "0009-0001-5555-6511;;;;;", "linkedin": "jiaxiang-li-9aa485118/;;;https://www.linkedin.cn/incareer/in/%E7%90%9B%E8%89%AF-%E6%9D%8E-5a333a23b;;", "or_profile": "~Jiaxiang_Li1;~Siliang_Zeng1;~Hoi_To_Wai1;~Chenliang_Li3;~Alfredo_Garcia1;~Mingyi_Hong1", "aff": "University of Minnesota - Twin Cities;University of Minnesota, Twin Cities;The Chinese University of Hong Kong;Texas A&M University - College Station;Texas A&M University - College Station;University of Minnesota, Minneapolis", "aff_domain": "umn.edu;umn.edu;cuhk.edu.hk;tamu.edu;tamu.edu;umn.edu", "position": "Postdoc;PhD student;Assistant Professor;PhD student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nli2024getting,\ntitle={Getting More Juice Out of the {SFT} Data: Reward Learning from Human Demonstration Improves {SFT} for {LLM} Alignment},\nauthor={Jiaxiang Li and Siliang Zeng and Hoi To Wai and Chenliang Li and Alfredo Garcia and Mingyi Hong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=orxQccN8Fm}\n}", "github": "", "reviewers": "J5fv;AMst;RhUZ;Tyd8", "pdf_size": 699648, "rating": "5;5;6;7", "confidence": "3;3;4;4", "soundness": "2;2;3;3", "novelty": "3;2;3;2", "presentation": "2;3;3;3", "wc_summary": "88;39;51;50", "wc_strengths": "75;32;28;62", "wc_weaknesses": "251;79;58;44", "wc_questions": "5;33;100;75", "wc_limitations": "25;28;5;8", "wc_review": "444;211;242;239", "wc_reply_reviewers": "71;46;114;12", "wc_reply_authors": "0;0;1259;0", "reply_reviewers": "1;1;2;1", "reply_authors": "1;1;4;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 57.0, 18.506755523321747 ], "wc_strengths_avg": [ 49.25, 19.84155991851447 ], "wc_weaknesses_avg": [ 108.0, 83.49550886125553 ], "wc_questions_avg": [ 53.25, 36.73128775308592 ], "wc_limitations_avg": [ 16.5, 10.111874208078342 ], "wc_review_avg": [ 284.0, 93.16383418473072 ], "wc_reply_reviewers_avg": [ 60.75, 37.19795021234369 ], "wc_reply_authors_avg": [ 314.75, 545.1629916823041 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.9045340337332909, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10221224899131187822&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "umn.edu;umn.edu;cuhk.edu.hk;tamu.edu;tamu.edu;umn.edu", "author_num": 6, "aff_unique_index": "0;0;1;2;2;0", "aff_unique_norm": "University of Minnesota;Chinese University of Hong Kong;Texas A&M University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.minnesota.edu;https://www.cuhk.edu.hk;https://www.tamu.edu", "aff_unique_abbr": "UMN;CUHK;TAMU", "aff_campus_unique_index": "0;0;1;2;2;3", "aff_campus_unique": "Twin Cities;Hong Kong SAR;College Station;Minneapolis", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "United States;China" }, { "title": "OctreeOcc: Efficient and Multi-Granularity Occupancy Prediction Using Octree Queries", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93597", "id": "os14qXhy55", "proceeding": "", "pdf": "https://openreview.net/pdf?id=os14qXhy55", "openreview": "https://openreview.net/forum?id=os14qXhy55", "poster": "", "project": "", "author_site": "Yuhang Lu, Xinge ZHU, Tai WANG, Yuexin Ma", "tldr": "", "abstract": "Occupancy prediction has increasingly garnered attention in recent years for its fine-grained understanding of 3D scenes. Traditional approaches typically rely on dense, regular grid representations, which often leads to excessive computational demands and a loss of spatial details for small objects. This paper introduces OctreeOcc, an innovative 3D occupancy prediction framework that leverages the octree representation to adaptively capture valuable information in 3D, offering variable granularity to accommodate object shapes and semantic regions of varying sizes and complexities. In particular, we incorporate image semantic information to improve the accuracy of initial octree structures and design an effective rectification mechanism to refine the octree structure iteratively. Our extensive evaluations show that OctreeOcc not only surpasses state-of-the-art methods in occupancy prediction, but also achieves a 15%-24% reduction in computational overhead compared to dense-grid-based methods.", "keywords": "3D Scene Understanding; Occupancy Prediction; Octree", "primary_area": "machine_vision", "supplementary_material": "", "author": "Yuhang Lu;Xinge ZHU;Tai Wang;Yuexin Ma", "authorids": "~Yuhang_Lu2;~Xinge_ZHU2;~Tai_Wang2;~Yuexin_Ma2", "gender": "M;F;M;M", "homepage": "https://yuhanglu2000.github.io/;http://yuexinma.me/aboutme.html;https://tai-wang.github.io/;https://xingezhu.me/aboutme.html", "dblp": ";209/5925;;204/3002", "google_scholar": "HiwDOksAAAAJ;;JmbbZWIAAAAJ;https://scholar.google.com.hk/citations?user=yHAcRooAAAAJ", "orcid": ";;;", "linkedin": ";;%E6%B3%B0-%E7%8E%8B-2b2738147/;", "or_profile": "~Yuhang_Lu2;~Yuexin_Ma2;~Tai_WANG1;~Xinge_Zhu3", "aff": "ShanghaiTech University;ShanghaiTech University;Shanghai AI Laboratory;The Chinese University of Hong Kong", "aff_domain": "shanghaitech.edu.cn;shanghaitech.edu.cn;pjlab.org.cn;cuhk.edu.hk", "position": "Intern;Assistant Professor;Research Scientist;PhD student", "bibtex": "@inproceedings{\nlu2024octreeocc,\ntitle={OctreeOcc: Efficient and Multi-Granularity Occupancy Prediction Using Octree Queries},\nauthor={Yuhang Lu and Xinge ZHU and Tai Wang and Yuexin Ma},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=os14qXhy55}\n}", "github": "", "reviewers": "mQ4T;5724;Mixn;NcWJ", "pdf_size": 14920598, "rating": "5;5;6;7", "confidence": "5;5;4;4", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "3;2;3;3", "wc_summary": "43;91;44;51", "wc_strengths": "48;55;65;43", "wc_weaknesses": "61;98;60;148", "wc_questions": "50;4;159;4", "wc_limitations": "17;1;46;7", "wc_review": "219;249;374;253", "wc_reply_reviewers": "72;49;23;30", "wc_reply_authors": "49;0;0;19", "reply_reviewers": "2;1;1;1", "reply_authors": "2;1;1;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 57.25, 19.727835664360143 ], "wc_strengths_avg": [ 52.75, 8.257572282456872 ], "wc_weaknesses_avg": [ 91.75, 35.90525727522364 ], "wc_questions_avg": [ 54.25, 63.32604124686779 ], "wc_limitations_avg": [ 17.75, 17.282577932704367 ], "wc_review_avg": [ 273.75, 59.35223247696754 ], "wc_reply_reviewers_avg": [ 43.5, 19.00657780874821 ], "wc_reply_authors_avg": [ 17.0, 20.03746490951388 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.9045340337332909, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14538557963693410020&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "shanghaitech.edu.cn;shanghaitech.edu.cn;pjlab.org.cn;cuhk.edu.hk", "author_num": 4, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "ShanghaiTech University;Shanghai AI Laboratory;Chinese University of Hong Kong", "aff_unique_dep": ";;", "aff_unique_url": "https://www.shanghaitech.edu.cn;https://www.shanghai-ai-lab.com;https://www.cuhk.edu.hk", "aff_unique_abbr": "ShanghaiTech;SAIL;CUHK", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "FreqBlender: Enhancing DeepFake Detection by Blending Frequency Knowledge", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93596", "id": "otZPBS0un6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=otZPBS0un6", "openreview": "https://openreview.net/forum?id=otZPBS0un6", "poster": "", "project": "", "author_site": "hanzhe li, Jiaran Zhou, Yuezun Li, Baoyuan Wu, Bin Li, Junyu Dong", "tldr": "", "abstract": "Generating synthetic fake faces, known as pseudo-fake faces, is an effective way to improve the generalization of DeepFake detection. Existing methods typically generate these faces by blending real or fake faces in spatial domain. While these methods have shown promise, they overlook the simulation of frequency distribution in pseudo-fake faces, limiting the learning of generic forgery traces in-depth. To address this, this paper introduces {\\em FreqBlender}, a new method that can generate pseudo-fake faces by blending frequency knowledge. Concretely, we investigate the major frequency components and propose a Frequency Parsing Network to adaptively partition frequency components related to forgery traces. Then we blend this frequency knowledge from fake faces into real faces to generate pseudo-fake faces. Since there is no ground truth for frequency components, we describe a dedicated training strategy by leveraging the inner correlations among different frequency knowledge to instruct the learning process. Experimental results demonstrate the effectiveness of our method in enhancing DeepFake detection, making it a potential plug-and-play strategy for other methods.", "keywords": "DeepFake Detection;Security and Privacy;Multimedia Forensics", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Hanzhe LI;Jiaran Zhou;Yuezun Li;Baoyuan Wu;Bin Li;Junyu Dong", "authorids": "~Hanzhe_LI2;~Jiaran_Zhou1;~Yuezun_Li1;~Baoyuan_Wu1;~Bin_Li16;~Junyu_Dong1", "gender": "M;F;M;M;M;M", "homepage": "https://github.com/lihanzhe;;https://yuezunli.github.io/;https://sites.google.com/site/baoyuanwu2015/;http://media-sec.szu.edu.cn/dr-bin-li/index.html;http://ai-ouc.cn/people/dongjy.html", "dblp": ";221/2254;136/5524;73/7781;89/6764-11.html;40/6682", "google_scholar": ";tkz-3AgAAAAJ;v0Qt7BAAAAAJ;JNTG1KoAAAAJ;g0iR9IkAAAAJ;iPYdUpAAAAAJ", "orcid": ";0000-0002-2943-2806;;0000-0003-2183-5990;;", "linkedin": ";;;;;", "or_profile": "~Hanzhe_LI2;~Jiaran_Zhou1;~Yuezun_Li1;~Baoyuan_Wu1;~Bin_Li16;~Junyu_Dong1", "aff": "Ocean University of China;Ocean University of China;Ocean University of China;The Chinese University of Hong Kong, Shenzhen;Shenzhen University;Ocean University of China", "aff_domain": "ouc.edu.cn;ouc.edu.cn;ouc.edu.cn;cuhk.edu.cn;szu.edu.cn;ouc.edu.cn", "position": "MS student;Lecturer;Lecturer;Associate Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nli2024freqblender,\ntitle={FreqBlender: Enhancing DeepFake Detection by Blending Frequency Knowledge},\nauthor={Hanzhe LI and Jiaran Zhou and Yuezun Li and Baoyuan Wu and Bin Li and Junyu Dong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=otZPBS0un6}\n}", "github": "", "reviewers": "GPf8;RzXt;Z1eA;Xqko", "pdf_size": 0, "rating": "4;5;6;6", "confidence": "5;5;5;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "62;34;222;152", "wc_strengths": "50;75;154;149", "wc_weaknesses": "86;319;151;134", "wc_questions": "66;26;249;219", "wc_limitations": "31;1;45;92", "wc_review": "295;455;821;746", "wc_reply_reviewers": "295;0;37;0", "wc_reply_authors": "851;52;41;49", "reply_reviewers": "1;0;1;0", "reply_authors": "3;2;2;2", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 117.5, 74.43621430459773 ], "wc_strengths_avg": [ 107.0, 45.403744338985966 ], "wc_weaknesses_avg": [ 172.5, 87.8763335602937 ], "wc_questions_avg": [ 140.0, 95.64779140157916 ], "wc_limitations_avg": [ 42.25, 32.828151029261456 ], "wc_review_avg": [ 579.25, 213.59116905902266 ], "wc_reply_reviewers_avg": [ 83.0, 123.32680162884303 ], "wc_reply_authors_avg": [ 248.25, 348.0211020900888 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12345065435474249034&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "ouc.edu.cn;ouc.edu.cn;ouc.edu.cn;cuhk.edu.cn;szu.edu.cn;ouc.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;1;2;0", "aff_unique_norm": "Ocean University of China;Chinese University of Hong Kong;Shenzhen University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.ouc.edu.cn;https://www.cuhk.edu.cn;https://www.szu.edu.cn", "aff_unique_abbr": "OUC;CUHK;SZU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "From an Image to a Scene: Learning to Imagine the World from a Million 360\u00b0 Videos", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93595", "id": "otxOtsWCMb", "proceeding": "", "pdf": "https://openreview.net/pdf?id=otxOtsWCMb", "openreview": "https://openreview.net/forum?id=otxOtsWCMb", "poster": "", "project": "", "author_site": "Matthew Wallingford, Anand Bhattad, Aditya Kusupati, Vivek Ramanujan, Matt Deitke, Aniruddha Kembhavi, Roozbeh Mottaghi, Wei-Chiu Ma, Ali Farhadi", "tldr": "", "abstract": "Three-dimensional (3D) understanding of objects and scenes play a key role in humans' ability to interact with the world and has been an active area of research in computer vision, graphics, and robotics. Large scale synthetic and object-centric 3D datasets have shown to be effective in training models that have 3D understanding of objects. However, applying a similar approach to real-world objects and scenes is difficult due to a lack of large-scale data. Videos are a potential source for real-world 3D data, but finding diverse yet corresponding views of the same content have shown to be difficult at scale. Furthermore, standard videos come with fixed viewpoints, determined at the time of capture. This restricts the ability to access scenes from a variety of more diverse and potentially useful perspectives. We argue that large scale ODIN videos can address these limitations to provide scalable corresponding frames from diverse views. In this paper we introduce 360-1M, a 360\u00b0 video dataset consisting of 1 million videos, and a process for efficiently finding corresponding frames from diverse viewpoints at scale. We train our diffusion-based model, ODIN, on 360-1M. Empowered by the largest real-world, multi-view dataset to date, ODIN is able to freely generate novel views of real-world scenes. Unlike previous methods, ODIN can move the camera through the environment, enabling the model to infer the geometry and layout of the scene. Additionally, we show improved performance on standard novel view synthesis and 3D reconstruction benchmarks.", "keywords": "Novel View Synthesis;3D;Video;360 Video;Large-Scale;Data;Scene Generation", "primary_area": "machine_vision", "supplementary_material": "/attachment/bd85b6068d813279b5a145c8c87a9a185320139f.zip", "author": "Matthew Wallingford;Anand Bhattad;Aditya Kusupati;Vivek Ramanujan;Matt Deitke;Aniruddha Kembhavi;Roozbeh Mottaghi;Wei-Chiu Ma;Ali Farhadi", "authorids": "~Matthew_Wallingford1;~Anand_Bhattad1;~Aditya_Kusupati1;~Vivek_Ramanujan1;~Matt_Deitke1;~Aniruddha_Kembhavi1;~Roozbeh_Mottaghi1;~Wei-Chiu_Ma1;~Ali_Farhadi3", "gender": ";M;M;M;M;;M;M;M", "homepage": "https://anandbhattad.github.io/;http://www.adityakusupati.com/;https://vkramanuj.github.io;https://mattdeitke.com;https://anikem.github.io/;http://roozbehm.info;https://www.cs.cornell.edu/~weichiu/;https://homes.cs.washington.edu/~ali/;https://mattwallingford.github.io/", "dblp": "215/4305;231/7662;225/4845;;81/7583;36/633;151/4277;37/5826;263/1795", "google_scholar": "XUsauXIAAAAJ;https://scholar.google.co.in/citations?user=qULx8g8AAAAJ;yXFPyNMAAAAJ;k4VxCcYAAAAJ;JnUevM0AAAAJ;CCV58dgAAAAJ;SVIdh6AAAAAJ;jeOFRDsAAAAJ;", "orcid": ";0000-0001-8455-1851;;;;;;;", "linkedin": ";adityakusupati/;;;;roozbeh-mottaghi-63397aa0;;;", "or_profile": "~Anand_Bhattad1;~Aditya_Kusupati1;~Vivek_Ramanujan1;~Matt_Deitke1;~Aniruddha_Kembhavi1;~Roozbeh_Mottaghi1;~Wei-Chiu_Ma1;~Ali_Farhadi3;~Matthew_C_Wallingford2", "aff": "University of Illinois Urbana Champaign;Department of Computer Science, University of Washington;Meta Facebook;Allen Institute for Artificial Intelligence;Allen Institute for Artificial Intelligence;University of Washington;Allen Institute for Artificial Intelligence;University of Washington;University of Washington", "aff_domain": "illinois.edu;cs.washington.edu;meta.com;allenai.org;allenai.org;cs.washington.edu;allenai.org;cs.uw.edu;washington.edu", "position": "PhD student;PhD student;Intern;Researcher;Research Manager;Affiliate Professor ;Postdoc;Full Professor;PhD student", "bibtex": "@inproceedings{\nwallingford2024from,\ntitle={From an Image to a Scene: Learning to Imagine the World from a Million 360{\\textdegree} Videos},\nauthor={Matthew Wallingford and Anand Bhattad and Aditya Kusupati and Vivek Ramanujan and Matt Deitke and Aniruddha Kembhavi and Roozbeh Mottaghi and Wei-Chiu Ma and Ali Farhadi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=otxOtsWCMb}\n}", "github": "", "reviewers": "Jg5T;8VWi;whPn;pxup", "pdf_size": 12719860, "rating": "6;6;7;7", "confidence": "4;5;4;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;4", "wc_summary": "119;70;63;77", "wc_strengths": "144;85;111;146", "wc_weaknesses": "124;266;411;224", "wc_questions": "6;29;320;250", "wc_limitations": "1;1;43;4", "wc_review": "394;451;948;701", "wc_reply_reviewers": "85;28;334;13", "wc_reply_authors": "0;0;492;0", "reply_reviewers": "1;1;3;1", "reply_authors": "1;1;3;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 82.25, 21.787324296480282 ], "wc_strengths_avg": [ 121.5, 25.243811122728676 ], "wc_weaknesses_avg": [ 256.25, 103.16582525235768 ], "wc_questions_avg": [ 151.25, 136.26330210295066 ], "wc_limitations_avg": [ 12.25, 17.795715776557007 ], "wc_review_avg": [ 623.5, 220.07555520775134 ], "wc_reply_reviewers_avg": [ 115.0, 129.26136313686314 ], "wc_reply_authors_avg": [ 123.0, 213.0422493309719 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:h-kbsy13STEJ:scholar.google.com/&scioq=From+an+Image+to+a+Scene:+Learning+to+Imagine+the+World+from+a+Million+360%C2%B0+Videos&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": "illinois.edu;cs.washington.edu;meta.com;allenai.org;allenai.org;cs.washington.edu;allenai.org;cs.uw.edu;washington.edu", "author_num": 9, "aff_unique_index": "0;1;2;3;3;1;3;1;1", "aff_unique_norm": "University of Illinois Urbana-Champaign;University of Washington;Meta;Allen Institute for Artificial Intelligence", "aff_unique_dep": ";Department of Computer Science;Meta Platforms, Inc.;", "aff_unique_url": "https://illinois.edu;https://www.washington.edu;https://meta.com;https://allenai.org", "aff_unique_abbr": "UIUC;UW;Meta;AI2", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Urbana-Champaign;Seattle;", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "CALANet: Cheap All-Layer Aggregation for Human Activity Recognition", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93594", "id": "ouoBW2PXFQ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ouoBW2PXFQ", "openreview": "https://openreview.net/forum?id=ouoBW2PXFQ", "poster": "/media/PosterPDFs/NeurIPS%202024/93594.png?t=1731717997.3479896", "project": "", "author_site": "Jaegyun Park, Dae-Won Kim, Jaesung Lee", "tldr": "", "abstract": "With the steady growth of sensing technology and wearable devices, sensor-based human activity recognition has become essential in widespread applications, such as healthcare monitoring and fitness tracking, where accurate and real-time systems are required. \nTo achieve real-time response, recent studies have focused on lightweight neural network models.\nSpecifically, they designed the network architectures by restricting the number of layers shallowly or connections of each layer.\nHowever, these approaches suffer from limited accuracy because the classifier only uses the features at the last layer.\nIn this study, we propose a cheap all-layer aggregation network, CALANet, for accuracy improvement while maintaining the efficiency of existing real-time HAR models.\nSpecifically, CALANet allows the classifier to aggregate the features for all layers, resulting in a performance gain.\nIn addition, this work proves that the theoretical computation cost of CALANet is equivalent to that of conventional networks. \nEvaluated on seven publicly available datasets, CALANet outperformed existing methods, achieving state-of-the-art performance. \nThe source codes of the CALANet are publicly available at https://github.com/jgpark92/CALANet.", "keywords": "Human activity recognition;Wearable sensors;Neural networks;Real-time systems", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/72a4cc2f97451f7a7534aa09bb9b7032a193c6d1.zip", "author": "Jaegyun Park;Dae-Won Kim;Jaesung Lee", "authorids": "~Jaegyun_Park2;~Dae-Won_Kim1;~Jaesung_Lee2", "gender": "M;M;M", "homepage": ";http://air.cau.ac.kr;http://ml.cau.ac.kr", "dblp": "40/4849;;", "google_scholar": "https://scholar.google.co.kr/citations?view_op=list_works;an1c3NcAAAAJ;https://scholar.google.co.kr/citations?user=WgfyWHkAAAAJ", "orcid": "0000-0002-2813-521X;;0000-0002-3757-3510", "linkedin": ";;", "or_profile": "~Jaegyun_Park2;~Dae-Won_Kim1;~Jaesung_Lee2", "aff": "Chung-Ang University;Chung-Ang University;Chung-Ang University", "aff_domain": "cau.ac.kr;cau.ac.kr;cau.ac.kr", "position": "PhD student;Full Professor;Director", "bibtex": "@inproceedings{\npark2024calanet,\ntitle={{CALAN}et: Cheap All-Layer Aggregation for Human Activity Recognition},\nauthor={Jaegyun Park and Dae-Won Kim and Jaesung Lee},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ouoBW2PXFQ}\n}", "github": "", "reviewers": "2n6z;1Z8p;VmYr;QGqB", "pdf_size": 1915573, "rating": "4;5;6;6", "confidence": "3;4;4;4", "soundness": "2;2;3;3", "novelty": "2;2;3;2", "presentation": "3;2;3;3", "wc_summary": "119;36;157;134", "wc_strengths": "62;59;66;27", "wc_weaknesses": "289;104;213;45", "wc_questions": "3;133;96;92", "wc_limitations": "3;42;1;26", "wc_review": "476;374;533;324", "wc_reply_reviewers": "82;7;132;64", "wc_reply_authors": "548;0;251;252", "reply_reviewers": "1;1;2;1", "reply_authors": "2;1;2;2", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 111.5, 45.64263357870578 ], "wc_strengths_avg": [ 53.5, 15.5 ], "wc_weaknesses_avg": [ 162.75, 94.57900136922572 ], "wc_questions_avg": [ 81.0, 47.78598120788146 ], "wc_limitations_avg": [ 18.0, 16.98528775146303 ], "wc_review_avg": [ 426.75, 82.24164091261798 ], "wc_reply_reviewers_avg": [ 71.25, 44.68430932665291 ], "wc_reply_authors_avg": [ 262.75, 194.07392277171087 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:xr3I7jP8e9gJ:scholar.google.com/&scioq=CALANet:+Cheap+All-Layer+Aggregation+for+Human+Activity+Recognition&hl=en&as_sdt=0,44", "gs_version_total": 2, "email": "cau.ac.kr;cau.ac.kr;cau.ac.kr", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Chung-Ang University", "aff_unique_dep": "", "aff_unique_url": "http://www.cau.ac.kr", "aff_unique_abbr": "CAU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "Direct Preference-Based Evolutionary Multi-Objective Optimization with Dueling Bandits", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93593", "id": "owHj0G15cd", "proceeding": "", "pdf": "https://openreview.net/pdf?id=owHj0G15cd", "openreview": "https://openreview.net/forum?id=owHj0G15cd", "poster": "", "project": "", "author_site": "Tian Huang, Shengbo Wang, Ke Li", "tldr": "", "abstract": "The ultimate goal of multi-objective optimization (MO) is to assist human decision-makers (DMs) in identifying solutions of interest (SOI) that optimally reconcile multiple objectives according to their preferences. Preference-based evolutionary MO (PBEMO) has emerged as a promising framework that progressively approximates SOI by involving human in the optimization-cum-decision-making process. Yet, current PBEMO approaches are prone to be inefficient and misaligned with the DM\u2019s true aspirations, especially when inadvertently exploiting mis-calibrated reward models. This is further exacerbated when considering the stochastic nature of human feedback. This paper proposes a novel framework that navigates MO to SOI by directly leveraging human feedback without being restricted by a predefined reward model nor cumbersome model selection. Specifically, we developed a clustering-based stochastic dueling bandits algorithm that strategically scales well to high-dimensional dueling bandits, and achieves a regret of $\\mathcal{O}(K^2\\log T)$, where $K$ is the number of clusters and $T$ is the number of rounds. The learned preferences are then transformed into a unified probabilistic format that can be readily adapted to prevalent EMO algorithms. This also leads to a principled termination criterion that strategically manages human cognitive loads and computational budget. Experiments on $48$ benchmark test problems, including synthetic problems, RNA inverse design and protein structure prediction, fully demonstrate the effectiveness of our proposed approach.", "keywords": "multi-objective optimization;dueling bandit;interactive multi-objective optimization;preference learning", "primary_area": "optimization", "supplementary_material": "", "author": "Tian Huang;Shengbo Wang;Ke Li", "authorids": "~Tian_Huang3;~Shengbo_Wang2;~Ke_Li5", "gender": "F;M;M", "homepage": ";;https://colalab.ai/", "dblp": ";;75/6627-1.html", "google_scholar": ";4Kdt5eQAAAAJ;https://scholar.google.co.uk/citations?user=lUFU8KsAAAAJ", "orcid": "0009-0008-0406-7815;0000-0003-2447-166X;0000-0001-7200-4244", "linkedin": ";;ke-li-29423226/", "or_profile": "~Tian_Huang3;~Shengbo_Wang2;~Ke_Li5", "aff": "University of Electronic Science and Technology of China;University of Electronic Science and Technology of China;University of Exeter", "aff_domain": "uestc.edu.cn;uestc.edu;exeter.ac.uk", "position": "MS student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nhuang2024direct,\ntitle={Direct Preference-Based Evolutionary Multi-Objective Optimization with Dueling Bandits},\nauthor={Tian Huang and Shengbo Wang and Ke Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=owHj0G15cd}\n}", "github": "", "reviewers": "FUFf;hDtX;dDig;PNDi", "pdf_size": 11213242, "rating": "5;5;6;6", "confidence": "3;2;4;1", "soundness": "2;2;3;3", "novelty": "2;2;3;3", "presentation": "1;2;4;2", "wc_summary": "31;83;78;85", "wc_strengths": "33;32;59;72", "wc_weaknesses": "163;54;141;21", "wc_questions": "3;32;141;59", "wc_limitations": "1;27;8;1", "wc_review": "231;228;427;238", "wc_reply_reviewers": "10;24;111;26", "wc_reply_authors": "22;244;163;8", "reply_reviewers": "1;1;2;1", "reply_authors": "2;3;4;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 2.5, 1.118033988749895 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 1.0897247358851685 ], "wc_summary_avg": [ 69.25, 22.230328382639787 ], "wc_strengths_avg": [ 49.0, 17.131841699011815 ], "wc_weaknesses_avg": [ 94.75, 58.942238674824694 ], "wc_questions_avg": [ 58.75, 51.450826038072506 ], "wc_limitations_avg": [ 9.25, 10.638961415476606 ], "wc_review_avg": [ 281.0, 84.3712036182962 ], "wc_reply_reviewers_avg": [ 42.75, 39.88342387508876 ], "wc_reply_authors_avg": [ 109.25, 98.62903984121512 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10307751691723826793&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "uestc.edu.cn;uestc.edu;exeter.ac.uk", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Electronic Science and Technology of China;University of Exeter", "aff_unique_dep": ";", "aff_unique_url": "https://www.uestc.edu.cn;https://www.exeter.ac.uk", "aff_unique_abbr": "UESTC;Exeter", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "China;United Kingdom" }, { "title": "Emergence of Hidden Capabilities: Exploring Learning Dynamics in Concept Space", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93592", "id": "owuEcT6BTl", "proceeding": "", "pdf": "https://openreview.net/pdf?id=owuEcT6BTl", "openreview": "https://openreview.net/forum?id=owuEcT6BTl", "poster": "", "project": "", "author_site": "Core Francisco Park, Maya Okawa, Andrew Lee, Ekdeep S Lubana, Hidenori Tanaka", "tldr": "", "abstract": "Modern generative models demonstrate impressive capabilities, likely stemming from an ability to identify and manipulate abstract concepts underlying their training data. However, fundamental questions remain: what determines the concepts a model learns, the order in which it learns them, and its ability to manipulate those concepts? To address these questions, we propose analyzing a model\u2019s learning dynamics via a framework we call the concept space, where each axis represents an independent concept underlying the data generating process. By characterizing learning dynamics in this space, we identify how the speed at which a concept is learned, and hence the order of concept learning, is controlled by properties of the data we term concept signal. Further, we observe moments of sudden turns in the direction of a model\u2019s learning dynamics in concept space. Surprisingly, these points precisely correspond to the emergence of hidden capabilities, i.e., where latent interventions show the model possesses the capability to manipulate a concept, but these capabilities cannot yet be elicited via naive input prompting. While our results focus on synthetically defined toy datasets, we hypothesize a general claim on emergence of hidden capabilities may hold: generative models possess latent capabilities that emerge suddenly and consistently during training, though a model might not exhibit these capabilities under naive input prompting.", "keywords": "Learning Dynamics;Compositional Generalization;Emergent Abilities;Diffusion Models;Mechanistic Interpretability", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Core Francisco Park;Maya Okawa;Andrew Lee;Ekdeep Singh Lubana;Hidenori Tanaka", "authorids": "~Core_Francisco_Park1;~Maya_Okawa1;~Andrew_Lee2;~Ekdeep_Singh_Lubana1;~Hidenori_Tanaka1", "gender": "M;;;M;", "homepage": "https://cfpark00.github.io/;;;https://ekdeepslubana.github.io/;https://sites.google.com/view/htanaka/home", "dblp": "304/2901;;;228/2683;", "google_scholar": "RfXjPuEAAAAJ;;oQiCjnwAAAAJ;https://scholar.google.co.in/citations?user=OP7S3vsAAAAJ;f_pWOGIAAAAJ", "orcid": "0000-0002-9542-2913;;;;", "linkedin": "core-francisco-park-aab305284/;;;;", "or_profile": "~Core_Francisco_Park1;~Maya_Okawa1;~Andrew_Lee2;~Ekdeep_Singh_Lubana1;~Hidenori_Tanaka1", "aff": "Harvard University;;University of Michigan;University of Michigan;Physics & Informatics Lab, NTT Research, Inc.", "aff_domain": "harvard.edu;;umich.edu;umich.edu;ntt-research.com", "position": "PhD student;;PhD student;PhD student;Senior Research Scientist", "bibtex": "@inproceedings{\npark2024emergence,\ntitle={Emergence of Hidden Capabilities: Exploring Learning Dynamics in Concept Space},\nauthor={Core Francisco Park and Maya Okawa and Andrew Lee and Ekdeep Singh Lubana and Hidenori Tanaka},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=owuEcT6BTl}\n}", "github": "", "reviewers": "G81k;cQgr;uzj6;o8es", "pdf_size": 8382884, "rating": "5;5;7;8", "confidence": "3;4;4;4", "soundness": "3;3;3;3", "novelty": "3;2;3;4", "presentation": "2;1;3;4", "wc_summary": "151;147;78;86", "wc_strengths": "117;162;34;55", "wc_weaknesses": "634;283;93;135", "wc_questions": "434;66;1;251", "wc_limitations": "44;79;1;20", "wc_review": "1380;737;207;547", "wc_reply_reviewers": "38;0;48;38", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 115.5, 33.64892271678248 ], "wc_strengths_avg": [ 92.0, 50.64089256717342 ], "wc_weaknesses_avg": [ 286.25, 212.8160884425799 ], "wc_questions_avg": [ 188.0, 169.0695123314668 ], "wc_limitations_avg": [ 36.0, 29.129023327259016 ], "wc_review_avg": [ 717.75, 426.89774829577163 ], "wc_reply_reviewers_avg": [ 31.0, 18.35755975068582 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5555555555555555, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6592684490227131218&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 5, "email": "harvard.edu;;umich.edu;umich.edu;ntt-research.com", "author_num": 5, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Harvard University;University of Michigan;NTT Research, Inc.", "aff_unique_dep": ";;Physics & Informatics Lab", "aff_unique_url": "https://www.harvard.edu;https://www.umich.edu;https://www.ntt-research.com", "aff_unique_abbr": "Harvard;UM;NTT Research", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "oxV50yFy5T", "title": "Lightning-UQ-Box: A Comprehensive Framework for Uncertainty Quantification in Deep Learning", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "Uncertainty quantification (UQ) is an essential tool for applying deep neural networks (DNNs) to real world tasks, as it attaches a degree of confidence to DNN outputs. However, despite its benefits, UQ is often left out of the standard DNN workflow due to the additional technical knowledge required to apply and evaluate existing UQ procedures. Hence there is a need for a comprehensive toolbox that allows the user to integrate UQ into their modeling workflow, without significant overhead. We introduce Lightning UQ Box: a unified interface for applying and evaluating various approaches to UQ. In this paper, we provide a theoretical and quantitative comparison of the wide range of state-of-the-art UQ methods implemented in our toolbox. We focus on two challenging vision tasks: (i) estimating tropical cyclone wind speeds from infrared satellite imagery and (ii) estimating the power output of solar panels from RGB images of the sky. Our results demonstrate the need for a broad and approachable experimental framework for UQ, that can be used for benchmarking by highlighting the differences between UQ methods.\nThe toolbox, example implementations, and further information are available at: https://github.com/lightning-uq-box/lightning-uq-box.", "keywords": "Uncertainty Quantification;Bayesian Deep Learning;Conformal Prediction", "primary_area": "", "supplementary_material": "/attachment/215cbc2f5602993db2b665ac68ba253ea640cf88.zip", "author": "Nils Lehmann;Jakob Gawlikowski;Adam J Stewart;Vytautas Jan\u010dauskas;Stefan Depeweg;Eric Nalisnick;Nina Maria Gottschling", "authorids": "~Nils_Lehmann1;~Jakob_Gawlikowski1;~Adam_J_Stewart1;~Vytautas_Jan\u010dauskas1;~Stefan_Depeweg1;~Eric_Nalisnick1;~Nina_Maria_Gottschling1", "gender": ";;M;M;;M;Not Specified", "homepage": "https://nilsleh.info/;;https://github.com/adamjstewart;;;https://enalisnick.github.io;https://orcid.org/0009-0004-0275-7522", "dblp": ";;;;;136/4057;", "google_scholar": "dWXUzLoAAAAJ;;IQ19q4AAAAAJ;;;cb1ZN7AAAAAJ;", "orcid": ";;0000-0002-0468-5006;0000-0002-6051-210X;;;0009-0004-0275-7522", "linkedin": ";;;;;;", "or_profile": "~Nils_Lehmann1;~Jakob_Gawlikowski1;~Adam_J_Stewart1;~Vytautas_Jan\u010dauskas1;~Stefan_Depeweg1;~Eric_Nalisnick1;~Nina_Maria_Gottschling1", "aff": "Technische Universit\u00e4t M\u00fcnchen;;Technische Universit\u00e4t M\u00fcnchen;German Aerospace Center (DLR);;University of Amsterdam;German Aerospace Center (DLR)", "aff_domain": "tum.de;;tum.de;dlr.de;;uva.nl;dlr.de", "position": "PhD student;;Postdoc;Researcher;;Assistant Professor;Postdoc", "bibtex": "@misc{\nanonymous2024lightninguqbox,\ntitle={Lightning-{UQ}-Box: A Comprehensive Framework for Uncertainty Quantification in Deep Learning},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=oxV50yFy5T}\n}", "github": "", "project": "", "reviewers": "hARe;H9aE;z1fD;mBbc", "site": "https://openreview.net/forum?id=oxV50yFy5T", "pdf_size": 3045861, "rating": "5;5;7;7", "confidence": "3;2;5;4", "wc_summary_and_contributions": "84;34;91;26", "wc_strengths": "14;41;9;23", "wc_improvement": "21;33;8;34", "wc_limitations": "27;9;10;34", "wc_correctness": "5;1;244;1", "wc_clarity": "9;1;231;1", "wc_relation_to_prior_work": "60;39;14;1", "wc_documentation": "8;1;20;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "229;160;628;122", "wc_reply_reviewers": "0;14;22;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;0", "reply_authors": "2;1;1;1", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "wc_summary_and_contributions_avg": [ 58.75, 28.994611568358696 ], "wc_strengths_avg": [ 21.75, 12.193748398257199 ], "wc_improvement_avg": [ 24.0, 10.559356040971437 ], "wc_limitations_avg": [ 20.0, 10.793516572461451 ], "wc_correctness_avg": [ 62.75, 104.6574770381935 ], "wc_clarity_avg": [ 60.5, 98.49238549248363 ], "wc_relation_to_prior_work_avg": [ 28.5, 22.74313083108832 ], "wc_documentation_avg": [ 7.5, 7.762087348130012 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 284.75, 201.85313349066445 ], "wc_reply_reviewers_avg": [ 9.0, 9.433981132056603 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.8944271909999159, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:GG6IbWjgNxcJ:scholar.google.com/&scioq=Lightning-UQ-Box:+A+Comprehensive+Framework+for+Uncertainty+Quantification+in+Deep+Learning&hl=en&as_sdt=0,33", "gs_version_total": 3, "aff_unique_index": "0;0;1;2;1", "aff_unique_norm": "Technische Universit\u00e4t M\u00fcnchen;German Aerospace Center;University of Amsterdam", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tum.de;https://www.dlr.de;https://www.uva.nl", "aff_unique_abbr": "TUM;DLR;UvA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "Germany;Netherlands" }, { "title": "Exploration by Learning Diverse Skills through Successor State Representations", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93591", "id": "oyiBLfNJvY", "proceeding": "", "pdf": "https://openreview.net/pdf?id=oyiBLfNJvY", "openreview": "https://openreview.net/forum?id=oyiBLfNJvY", "poster": "/media/PosterPDFs/NeurIPS%202024/93591.png?t=1731602388.5555956", "project": "", "author_site": "Paul-Antoine LE TOLGUENEC, Yann BESSE, Florent Teichteil-Koenigsbuch, Dennis Wilson, Emmanuel Rachelson", "tldr": "", "abstract": "The ability to perform different skills can encourage agents to explore. In this work, we aim to construct a set of diverse skills that uniformly cover the state space. We propose a formalization of this search for diverse skills, building on a previous definition based on the mutual information between states and skills. We consider the distribution of states reached by a policy conditioned on each skill and leverage the successor state representation to maximize the difference between these skill distributions. We call this approach LEADS: Learning Diverse Skills through Successor State Representations. We demonstrate our approach on a set of maze navigation and robotic control tasks which show that our method is capable of constructing a diverse set of skills which exhaustively cover the state space without relying on reward or exploration bonuses. Our findings demonstrate that this new formalization promotes more robust and efficient exploration by combining mutual information maximization and exploration bonuses.", "keywords": "Reinforcement Learning;Exploration;Deep Learning", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Paul-Antoine LE TOLGUENEC;Yann BESSE;Florent Teichteil-K\u00f6nigsbuch;Dennis George Wilson;Emmanuel Rachelson", "authorids": "~Paul-Antoine_LE_TOLGUENEC1;~Yann_BESSE1;~Florent_Teichteil-K\u00f6nigsbuch1;~Dennis_George_Wilson1;~Emmanuel_Rachelson1", "gender": "M;;M;M;M", "homepage": ";;https://d9w.github.io/;https://personnel.isae-supaero.fr/emmanuel-rachelson;", "dblp": ";;126/7863;52/6241;70/1418", "google_scholar": ";;https://scholar.google.fr/citations?user=k7moRpYAAAAJ;https://scholar.google.fr/citations?user=KtG9BSgAAAAJ;avOLwlYAAAAJ", "orcid": "0000-0003-4555-6718;;0000-0003-2414-0051;0000-0002-8559-1617;0000-0001-6815-096X", "linkedin": ";;dennis-g-wilson/;emmanuelrachelson/;florent-teichteil-koenigsbuch-a198453b/", "or_profile": "~Paul-Antoine_LE_TOLGUENEC1;~Yann_BESSE1;~Dennis_George_Wilson1;~Emmanuel_Rachelson1;~Florent_Teichteil-Koenigsbuch1", "aff": "Institut Sup\u00e9rieur de l'A\u00e9ronautique et de l'Espace;;Institut Sup\u00e9rieur de l'A\u00e9ronautique et de l'Espace;Institut Sup\u00e9rieur de l'A\u00e9ronautique et de l'Espace;AIRBUS SAS", "aff_domain": "isae-supaero.fr;;isae-supaero.fr;isae-supaero.fr;airbus.com", "position": "PhD student;;Associate Professor;Full Professor;Researcher", "bibtex": "@inproceedings{\ntolguenec2024exploration,\ntitle={Exploration by Learning Diverse Skills through Successor State Representations},\nauthor={Paul-Antoine LE TOLGUENEC and Yann BESSE and Florent Teichteil-K{\\\"o}nigsbuch and Dennis George Wilson and Emmanuel Rachelson},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=oyiBLfNJvY}\n}", "github": "", "reviewers": "axfE;8kZQ;98FT;QVxs", "pdf_size": 5508583, "rating": "6;7;7;7", "confidence": "2;4;2;4", "soundness": "3;3;3;2", "novelty": "2;3;3;3", "presentation": "2;4;3;2", "wc_summary": "73;90;90;94", "wc_strengths": "39;52;83;180", "wc_weaknesses": "89;34;65;278", "wc_questions": "4;50;11;125", "wc_limitations": "37;10;9;1", "wc_review": "242;236;258;678", "wc_reply_reviewers": "30;94;27;31", "wc_reply_authors": "32;36;16;42", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.0, 1.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 86.75, 8.104782538723663 ], "wc_strengths_avg": [ 88.5, 55.19284373902109 ], "wc_weaknesses_avg": [ 116.5, 95.25885785584457 ], "wc_questions_avg": [ 47.5, 48.054656382082264 ], "wc_limitations_avg": [ 14.25, 13.589977924926883 ], "wc_review_avg": [ 353.5, 187.5226652967582 ], "wc_reply_reviewers_avg": [ 45.5, 28.040149785619906 ], "wc_reply_authors_avg": [ 31.5, 9.630680142129112 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "email": "isae-supaero.fr;;isae-supaero.fr;isae-supaero.fr;airbus.com", "author_num": 5, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Institut Sup\u00e9rieur de l'A\u00e9ronautique et de l'Espace;Airbus", "aff_unique_dep": ";", "aff_unique_url": "https://www.isae-supaero.fr;https://www.airbus.com", "aff_unique_abbr": "ISAE-SUPAERO;Airbus", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "France" }, { "title": "Uni-Med: A Unified Medical Generalist Foundation Model For Multi-Task Learning Via Connector-MoE", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93590", "id": "oyl2Fnzune", "proceeding": "", "pdf": "https://openreview.net/pdf?id=oyl2Fnzune", "openreview": "https://openreview.net/forum?id=oyl2Fnzune", "poster": "/media/PosterPDFs/NeurIPS%202024/93590.png?t=1730739185.5755298", "project": "", "author_site": "Xun Zhu, Ying Hu, Fanbin Mo, Miao Li, Ji Wu", "tldr": "", "abstract": "Multi-modal large language models (MLLMs) have shown impressive capabilities as a general-purpose interface for various visual and linguistic tasks. However, building a unified MLLM for multi-task learning in the medical field remains a thorny challenge. To mitigate the tug-of-war problem of multi-modal multi-task optimization in MLLMs, recent advances primarily focus on improving the LLM components, while neglecting the connector that bridges the gap between modalities. In this paper, we introduce Uni-Med, a novel medical generalist foundation model which consists of a universal visual feature extraction module, a connector mixture-of-experts (CMoE) module, and an LLM. Benefiting from the proposed CMoE that leverages a well-designed router with a mixture of projection experts at the connector, Uni-Med achieves efficient solution to the tug-of-war problem and can perform six different medical tasks including question answering, visual question answering, report generation, referring expression comprehension, referring expression generation and image classification. To the best of our knowledge, Uni-Med is the first effort to tackle multi-task interference at the connector in MLLMs. Extensive ablation experiments validate the effectiveness of introducing CMoE under any configuration, with up to an average 8% performance gains. We further provide interpretation analysis of the tug-of-war problem from the perspective of gradient optimization and parameter statistics. Compared to previous state-of-the-art medical MLLMs, Uni-Med achieves competitive or superior evaluation metrics on diverse tasks. Code and resources are available at https://github.com/MSIIP/Uni-Med.", "keywords": "medical generalist foundation model;multi-task learning;mixture-of-experts", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "", "author": "Xun Zhu;Ying Hu;Fanbin Mo;Miao Li;Ji Wu", "authorids": "~Xun_Zhu1;~Ying_Hu4;~Fanbin_Mo1;~Miao_Li10;~Ji_Wu3", "gender": "M;F;;M;M", "homepage": ";;;http://web.ee.tsinghua.edu.cn/limiao/zh_CN/index.htm;http://speech.tsinghua.edu.cn/en/", "dblp": ";;;;91/4957-2", "google_scholar": "6RIBsDMAAAAJ;;;NZkelOcAAAAJ;", "orcid": "0000-0001-8598-8748;0009-0000-5322-9421;;0009-0009-5672-7448;0000-0001-6170-726X", "linkedin": ";;;;", "or_profile": "~Xun_Zhu1;~Ying_Hu4;~Fanbin_Mo1;~Miao_Li10;~Ji_Wu3", "aff": "Beijing University of Posts and Telecommunications;Tsinghua University;;Tsinghua University;Tsinghua University", "aff_domain": "bupt.edu.cn;mail.tsinghua.edu.cn;;tsinghua.edu.cn;tsinghua.edu.cn", "position": "MS student;Postdoc;;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nzhu2024unimed,\ntitle={Uni-Med: A Unified Medical Generalist Foundation Model For Multi-Task Learning Via Connector-MoE},\nauthor={Xun Zhu and Ying Hu and Fanbin Mo and Miao Li and Ji Wu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=oyl2Fnzune}\n}", "github": "", "reviewers": "Snz8;wK4x;b87p;mvVf", "pdf_size": 5927764, "rating": "3;6;7;8", "confidence": "5;5;4;3", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "3;4;3;4", "wc_summary": "71;53;57;80", "wc_strengths": "59;58;71;59", "wc_weaknesses": "221;145;49;122", "wc_questions": "160;15;60;11", "wc_limitations": "40;1;1;16", "wc_review": "551;272;238;288", "wc_reply_reviewers": "0;27;25;220", "wc_reply_authors": "0;0;0;610", "reply_reviewers": "0;1;1;2", "reply_authors": "1;1;1;4", "rating_avg": [ 6.0, 1.8708286933869707 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 65.25, 10.825317547305483 ], "wc_strengths_avg": [ 61.75, 5.356071321407137 ], "wc_weaknesses_avg": [ 134.25, 61.35704930975739 ], "wc_questions_avg": [ 61.5, 60.035406219996545 ], "wc_limitations_avg": [ 14.5, 15.945218719101975 ], "wc_review_avg": [ 337.25, 124.72244184588433 ], "wc_reply_reviewers_avg": [ 68.0, 88.39966063283275 ], "wc_reply_authors_avg": [ 152.5, 264.1377481542538 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8058229640253803, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11141649603839542286&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "bupt.edu.cn;mail.tsinghua.edu.cn;;tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 5, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Beijing University of Posts and Telecommunications;Tsinghua University", "aff_unique_dep": ";", "aff_unique_url": "http://www.bupt.edu.cn/;https://www.tsinghua.edu.cn", "aff_unique_abbr": "BUPT;THU", "aff_campus_unique_index": "0", "aff_campus_unique": "Beijing;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Infinite Limits of Multi-head Transformer Dynamics", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93589", "id": "p0BBKhD5aI", "proceeding": "", "pdf": "https://openreview.net/pdf?id=p0BBKhD5aI", "openreview": "https://openreview.net/forum?id=p0BBKhD5aI", "poster": "", "project": "", "author_site": "Blake Bordelon, Hamza Chaudhry, Cengiz Pehlevan", "tldr": "", "abstract": "In this work we analyze various scaling limits of the training dynamics of transformer models in the feature learning regime. We identify the set of parameterizations which admit well defined infinite width and depth limits that allow the attention layers to update throughout training, a relevant notion of feature learning in these models. We then use tools from dynamical mean field theory (DMFT) to analyze various infinite limits (infinite heads, infinite key/query dimension, and infinite depth) which have different statistical descriptions depending on which infinite limit is taken and how attention layers are scaled. We provide numerical evidence of convergence to the limits and show they maintain the correct scale of updates for both SGD and Adam.", "keywords": "infinite limits;transformers;mean field theory;learning dynamics", "primary_area": "optimization_for_deep_networks", "supplementary_material": "/attachment/da69fefd99f6d26882846fc5e8b597b2885178b5.zip", "author": "Blake Bordelon;Hamza Tahir Chaudhry;Cengiz Pehlevan", "authorids": "~Blake_Bordelon1;~Hamza_Tahir_Chaudhry1;~Cengiz_Pehlevan2", "gender": "M;Not Specified;", "homepage": "https://blakebordelon.github.io/;;https://pehlevan.seas.harvard.edu/", "dblp": "228/6993;;145/3480", "google_scholar": "yeQ8_pgAAAAJ;;veDLTPEAAAAJ", "orcid": "0000-0003-0455-9445;;0000-0001-9767-6063", "linkedin": ";hamzatc/;", "or_profile": "~Blake_Bordelon1;~Hamza_Tahir_Chaudhry1;~Cengiz_Pehlevan2", "aff": "Harvard University;Harvard University;School of Engineering and Applied Sciences, Harvard University", "aff_domain": "harvard.edu;harvard.edu;seas.harvard.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nbordelon2024infinite,\ntitle={Infinite Limits of Multi-head Transformer Dynamics},\nauthor={Blake Bordelon and Hamza Tahir Chaudhry and Cengiz Pehlevan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=p0BBKhD5aI}\n}", "github": "", "reviewers": "UGyb;rXAV;CMzU;FJYA", "pdf_size": 7476932, "rating": "6;6;7;7", "confidence": "2;3;2;3", "soundness": "3;2;3;3", "novelty": "3;2;3;3", "presentation": "2;3;2;2", "wc_summary": "89;66;28;185", "wc_strengths": "113;61;18;71", "wc_weaknesses": "2;74;38;512", "wc_questions": "122;66;75;5", "wc_limitations": "8;6;10;94", "wc_review": "334;273;169;867", "wc_reply_reviewers": "23;0;12;272", "wc_reply_authors": "640;58;0;751", "reply_reviewers": "1;0;1;2", "reply_authors": "2;2;1;4", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 2.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 92.0, 57.94393842327254 ], "wc_strengths_avg": [ 65.75, 33.77406549410361 ], "wc_weaknesses_avg": [ 156.5, 206.82057441173495 ], "wc_questions_avg": [ 67.0, 41.6353215431321 ], "wc_limitations_avg": [ 29.5, 37.265936188428164 ], "wc_review_avg": [ 410.75, 269.94108153447115 ], "wc_reply_reviewers_avg": [ 76.75, 113.02073924727267 ], "wc_reply_authors_avg": [ 362.25, 336.1788028713292 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9849534447572480494&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 4, "email": "harvard.edu;harvard.edu;seas.harvard.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Harvard University", "aff_unique_dep": "", "aff_unique_url": "https://www.harvard.edu", "aff_unique_abbr": "Harvard", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "PromptFix: You Prompt and We Fix the Photo", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93588", "id": "p1LpXNPmIa", "proceeding": "", "pdf": "https://openreview.net/pdf?id=p1LpXNPmIa", "openreview": "https://openreview.net/forum?id=p1LpXNPmIa", "poster": "/media/PosterPDFs/NeurIPS%202024/93588.png?t=1731474655.261982", "project": "", "author_site": "yongsheng yu, Ziyun Zeng, Hang Hua, Jianlong Fu, Jiebo Luo", "tldr": "", "abstract": "Diffusion models equipped with language models demonstrate excellent controllability in image generation tasks, allowing image processing to adhere to human instructions. However, the lack of diverse instruction-following data hampers the development of models that effectively recognize and execute user-customized instructions, particularly in low-level tasks. Moreover, the stochastic nature of the diffusion process leads to deficiencies in image generation or editing tasks that require the detailed preservation of the generated images. To address these limitations, we propose PromptFix, a comprehensive framework that enables diffusion models to follow human instructions to perform a wide variety of image-processing tasks. First, we construct a large-scale instruction-following dataset that covers comprehensive image-processing tasks, including low-level tasks, image editing, and object creation. Next, we propose a high-frequency guidance sampling method to explicitly control the denoising process and preserve high-frequency details in unprocessed areas. Finally, we design an auxiliary prompting adapter, utilizing Vision-Language Models (VLMs) to enhance text prompts and improve the model's task generalization. Experimental results show that PromptFix outperforms previous methods in various image-processing tasks. Our proposed model also achieves comparable inference efficiency with these baseline models and exhibits superior zero-shot capabilities in blind restoration and combination tasks.", "keywords": "diffusion model;image processing", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Yongsheng Yu;Ziyun Zeng;Hang Hua;Jianlong Fu;Jiebo Luo", "authorids": "~Yongsheng_Yu1;~Ziyun_Zeng2;~Hang_Hua1;~Jianlong_Fu1;~Jiebo_Luo1", "gender": "M;M;M;M;M", "homepage": ";;https://hanghuacs.owlstown.net/;;https://www.cs.rochester.edu/u/jluo/", "dblp": "00/10049;;226/9632;83/8692;25/5545", "google_scholar": "QXF5p7QAAAAJ;b2DIlscAAAAJ;K9aLTwUAAAAJ;-WqSwu8AAAAJ;CcbnBvgAAAAJ", "orcid": ";;;;0000-0002-4516-9729", "linkedin": ";;;;jieboluo/", "or_profile": "~Yongsheng_Yu1;~Ziyun_Zeng2;~Hang_Hua1;~Jianlong_Fu1;~Jiebo_Luo3", "aff": "University of Rochester;University of Rochester;University of Rochester;Microsoft;University of Rochester", "aff_domain": "ur.rochester.edu;ur.rochester.edu;rochester.edu;microsoft.com;rochester.edu", "position": "PhD student;MS student;PhD student;Senior Researcher;Full Professor", "bibtex": "@inproceedings{\nyu2024promptfix,\ntitle={PromptFix: You Prompt and We Fix the Photo},\nauthor={Yongsheng Yu and Ziyun Zeng and Hang Hua and Jianlong Fu and Jiebo Luo},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=p1LpXNPmIa}\n}", "github": "", "reviewers": "gUoB;uR5Y;6nKQ;sDoY", "pdf_size": 19441257, "rating": "4;5;6;6", "confidence": "4;4;4;4", "soundness": "2;3;3;3", "novelty": "2;3;2;3", "presentation": "2;2;3;3", "wc_summary": "101;84;52;67", "wc_strengths": "163;26;36;60", "wc_weaknesses": "220;81;202;128", "wc_questions": "70;99;156;48", "wc_limitations": "30;1;7;7", "wc_review": "584;291;453;310", "wc_reply_reviewers": "0;29;224;36", "wc_reply_authors": "44;69;1015;90", "reply_reviewers": "0;1;2;1", "reply_authors": "2;3;4;3", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 76.0, 18.34393632784414 ], "wc_strengths_avg": [ 71.25, 54.39381858262941 ], "wc_weaknesses_avg": [ 157.75, 56.1443452183744 ], "wc_questions_avg": [ 93.25, 40.49305496007926 ], "wc_limitations_avg": [ 11.25, 11.098986440211556 ], "wc_review_avg": [ 409.5, 118.62229975851926 ], "wc_reply_reviewers_avg": [ 72.25, 88.64641842736795 ], "wc_reply_authors_avg": [ 304.5, 410.53044954059135 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 3.0, 0.7071067811865476 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4035208342472767130&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "ur.rochester.edu;ur.rochester.edu;rochester.edu;microsoft.com;rochester.edu", "author_num": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "University of Rochester;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.rochester.edu;https://www.microsoft.com", "aff_unique_abbr": "U of R;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Linear Transformers are Versatile In-Context Learners", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93587", "id": "p1ft33Mu3J", "proceeding": "", "pdf": "https://openreview.net/pdf?id=p1ft33Mu3J", "openreview": "https://openreview.net/forum?id=p1ft33Mu3J", "poster": "/media/PosterPDFs/NeurIPS%202024/93587.png?t=1733538735.4454844", "project": "", "author_site": "Max Vladymyrov, Johannes von Oswald, Mark Sandler, Rong Ge", "tldr": "", "abstract": "Recent research has demonstrated that transformers, particularly linear attention models, implicitly execute gradient-descent-like algorithms on data provided in-context during their forward inference step. However, their capability in handling more complex problems remains unexplored. In this paper, we prove that each layer of a linear transformer maintains a weight vector for an implicit linear regression problem and can be interpreted as performing a variant of preconditioned gradient descent. We also investigate the use of linear transformers in a challenging scenario where the training data is corrupted with different levels of noise. Remarkably, we demonstrate that for this problem linear transformers discover an intricate and highly effective optimization algorithm, surpassing or matching in performance many reasonable baselines. We analyze this algorithm and show that it is a novel approach incorporating momentum and adaptive rescaling based on noise levels. Our findings show that even linear transformers possess the surprising ability to discover sophisticated optimization strategies.", "keywords": "Linear Transformers;In-Context Learning;Noisy Linear Regression;Model Selection;Mesa-optimization", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Max Vladymyrov;Johannes von Oswald;Mark Sandler;Rong Ge", "authorids": "~Max_Vladymyrov1;~Johannes_von_Oswald2;~Mark_Sandler1;~Rong_Ge1", "gender": "M;Not Specified;M;M", "homepage": "https://max-vladymyrov.github.io/;https://as.inf.ethz.ch/people/members/voswaldj/index.html;;https://users.cs.duke.edu/~rongge/", "dblp": "116/3059;242/8029;s/MarkSandler;89/6869-1.html", "google_scholar": "pQZCrqcAAAAJ;https://scholar.google.ch/citations?user=jdnL-PgAAAAJ;IcPc-OUAAAAJ;https://scholar.google.com.tw/citations?user=MVxcjEoAAAAJ", "orcid": ";;0000-0003-0352-6051;", "linkedin": "max-vladymyrov-5803b711/;johswald/?originalSubdomain=de;;", "or_profile": "~Max_Vladymyrov1;~Johannes_von_Oswald2;~Mark_Sandler1;~Rong_Ge1", "aff": "Google Research;Google;Google;Duke University", "aff_domain": "google.com;research.google.com;google.com;duke.edu", "position": "Research Scientist;Researcher;Research Scientist;Associate Professor", "bibtex": "@inproceedings{\nvladymyrov2024linear,\ntitle={Linear Transformers are Versatile In-Context Learners},\nauthor={Max Vladymyrov and Johannes von Oswald and Mark Sandler and Rong Ge},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=p1ft33Mu3J}\n}", "github": "", "reviewers": "WFVs;dAYB;xxDm;KrhZ", "pdf_size": 1154012, "rating": "4;6;6;7", "confidence": "2;4;3;3", "soundness": "2;4;3;3", "novelty": "2;3;2;3", "presentation": "2;3;3;3", "wc_summary": "38;105;56;60", "wc_strengths": "28;53;113;55", "wc_weaknesses": "18;32;198;63", "wc_questions": "2;78;8;84", "wc_limitations": "1;8;1;1", "wc_review": "87;276;376;263", "wc_reply_reviewers": "0;41;5;37", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 64.75, 24.67159297653883 ], "wc_strengths_avg": [ 62.25, 31.171902412268647 ], "wc_weaknesses_avg": [ 77.75, 71.31050062929022 ], "wc_questions_avg": [ 43.0, 38.118237105091836 ], "wc_limitations_avg": [ 2.75, 3.031088913245535 ], "wc_review_avg": [ 250.5, 104.03004373737426 ], "wc_reply_reviewers_avg": [ 20.75, 18.38987493160299 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.6488856845230502, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9321763810258217138&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "google.com;research.google.com;google.com;duke.edu", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Google;Duke University", "aff_unique_dep": "Google Research;", "aff_unique_url": "https://research.google;https://www.duke.edu", "aff_unique_abbr": "Google Research;Duke", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "$\\text{Di}^2\\text{Pose}$: Discrete Diffusion Model for Occluded 3D Human Pose Estimation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93586", "id": "p2PO2PUPFY", "proceeding": "", "pdf": "https://openreview.net/pdf?id=p2PO2PUPFY", "openreview": "https://openreview.net/forum?id=p2PO2PUPFY", "poster": "/media/PosterPDFs/NeurIPS%202024/93586.png?t=1729778345.5978673", "project": "", "author_site": "Weiquan Wang, Jun Xiao, Chunping Wang, Wei Liu, Zhao Wang, Long Chen", "tldr": "", "abstract": "Diffusion models have demonstrated their effectiveness in addressing the inherent uncertainty and indeterminacy in monocular 3D human pose estimation (HPE). \nDespite their strengths, the need for large search spaces and the corresponding demand for substantial training data make these models prone to generating biomechanically unrealistic poses. \nThis challenge is particularly noticeable in occlusion scenarios, where the complexity of inferring 3D structures from 2D images intensifies. \nIn response to these limitations, we introduce the **Di**screte **Di**ffusion **Pose** (**$\\text{Di}^2\\text{Pose}$**), a novel framework designed for occluded 3D HPE that capitalizes on the benefits of a discrete diffusion model. \nSpecifically, **$\\text{Di}^2\\text{Pose}$** employs a two-stage process: it first converts 3D poses into a discrete representation through a pose quantization step, which is subsequently modeled in latent space through a discrete diffusion process. \nThis methodological innovation restrictively confines the search space towards physically viable configurations and enhances the model\u2019s capability to comprehend how occlusions affect human pose within the latent space. \nExtensive evaluations conducted on various benchmarks (e.g., Human3.6M, 3DPW, and 3DPW-Occ) have demonstrated its effectiveness.", "keywords": "Occluded 3D human pose estimation; Discrete diffusion model", "primary_area": "machine_vision", "supplementary_material": "/attachment/c196b73a791d1437868bdb316f6969c1250c31a1.zip", "author": "Weiquan Wang;Jun Xiao;Chunping Wang;Wei Liu;Zhao Wang;Long Chen", "authorids": "~Weiquan_Wang1;~Jun_Xiao1;~Chunping_Wang1;~Wei_Liu3;~Zhao_Wang5;~Long_Chen8", "gender": "M;M;F;M;M;M", "homepage": ";;;https://sites.google.com/view/cuweiliu;;https://zjuchenlong.github.io/", "dblp": ";71/2308-1;54/2715-1;49/3283-5;;64/5725-16", "google_scholar": "https://scholar.google.cz/citations?user=xvRwVSAAAAAJ;fqOwFhQAAAAJ;Rmy5RogAAAAJ;AjxoEpIAAAAJ;mCaOnp4AAAAJ;https://scholar.google.com.sg/citations?user=-gtmMpIAAAAJ", "orcid": ";;0000-0003-1854-8667;0000-0002-3865-8145;0000-0002-7144-1511;0000-0001-6148-9709", "linkedin": ";;https://linkedin.com/in/chunping-wang-7b94a15/;;;", "or_profile": "~Weiquan_Wang1;~Jun_Xiao1;~Chunping_Wang1;~Wei_Liu3;~Zhao_Wang5;~Long_Chen8", "aff": "Zhejiang University;Zhejiang University;Finvolution Group;Tencent;Zhejiang University;Hong Kong University of Science and Technology", "aff_domain": "zju.edu.cn;zju.edu.cn;xinye.com;tencent.com;zju.edu.cn;ust.hk", "position": "PhD student;Full Professor;Principal Scientist;Distinguished Scientist;Lecturer;Assistant Professor", "bibtex": "@inproceedings{\nwang2024textditextpose,\ntitle={\\${\\textbackslash}text\\{Di\\}{\\textasciicircum}2{\\textbackslash}text\\{Pose\\}\\$: Discrete Diffusion Model for Occluded 3D Human Pose Estimation},\nauthor={Weiquan Wang and Jun Xiao and Chunping Wang and Wei Liu and Zhao Wang and Long Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=p2PO2PUPFY}\n}", "github": "", "reviewers": "ACgE;tz5G;s6UY;Z93t", "pdf_size": 2412421, "rating": "5;5;6;7", "confidence": "4;4;3;3", "soundness": "2;2;3;3", "novelty": "2;2;4;2", "presentation": "3;4;3;3", "wc_summary": "144;144;142;114", "wc_strengths": "99;99;87;305", "wc_weaknesses": "448;371;212;152", "wc_questions": "196;20;95;2", "wc_limitations": "78;2;10;29", "wc_review": "965;636;546;602", "wc_reply_reviewers": "305;194;368;0", "wc_reply_authors": "805;200;311;0", "reply_reviewers": "2;1;1;0", "reply_authors": "3;2;2;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 136.0, 12.727922061357855 ], "wc_strengths_avg": [ 147.5, 91.06453755441797 ], "wc_weaknesses_avg": [ 295.75, 118.8704652131891 ], "wc_questions_avg": [ 78.25, 76.40803295465733 ], "wc_limitations_avg": [ 29.75, 29.532820725423434 ], "wc_review_avg": [ 687.25, 163.5472026663862 ], "wc_reply_reviewers_avg": [ 216.75, 139.78800914241535 ], "wc_reply_authors_avg": [ 329.0, 296.5560655255596 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.9045340337332909, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "email": "zju.edu.cn;zju.edu.cn;xinye.com;tencent.com;zju.edu.cn;ust.hk", "author_num": 6, "aff_unique_index": "0;0;1;2;0;3", "aff_unique_norm": "Zhejiang University;FinVolution Group;Tencent;Hong Kong University of Science and Technology", "aff_unique_dep": ";;Tencent Holdings Limited;", "aff_unique_url": "https://www.zju.edu.cn;https://www.finvolutiongroup.com;https://www.tencent.com;https://www.ust.hk", "aff_unique_abbr": "ZJU;;Tencent;HKUST", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "p32gjG4yqw", "title": "Constructive Universal Approximation Theorems for Deep Joint-Equivariant Networks by Schur's Lemma", "track": "main", "status": "Reject", "tldr": "", "abstract": "We present a unified constructive universal approximation theorem covering a wide range of learning machines including both shallow and deep neural networks based on the group representation theory. Constructive here means that the distribution of parameters is given in a closed-form expression (called the *ridgelet transform*). Contrary to the case of shallow models, expressive power analysis of deep models has been conducted in a case-by-case manner. Recently, Sonoda et al. (2023a,b) developed a systematic method to show a constructive approximation theorem from *scalar-valued joint-group-invariant* feature maps, covering a formal deep network. However, each hidden layer was formalized as an abstract group action, so it was not possible to cover real deep networks defined by composites of nonlinear activation function. In this study, we extend the method for *vector-valued joint-group-equivariant* feature maps, so to cover such real networks.", "keywords": "Schur's lemma;deep neural network;joint-group-equivariant;universality;ridgelet transform", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Sho Sonoda;Yuka Hashimoto;Isao Ishikawa;Masahiro Ikeda", "authorids": "~Sho_Sonoda2;~Yuka_Hashimoto2;~Isao_Ishikawa1;~Masahiro_Ikeda1", "gender": ";M;M;M", "homepage": "https://www.rd.ntt/e/ns/qos/person/hashimoto/index.html;https://researchmap.jp/1sa014kawa/?lang=en;https://sites.google.com/view/masahiroikedaswebpage/home;https://sites.google.com/view/shosonoda/research", "dblp": "220/5306;220/5361;43/5572;139/0716", "google_scholar": ";https://scholar.google.com/citations?hl=en;https://scholar.google.com.tr/citations?user=6ozp0qMAAAAJ;zrox37AAAAAJ", "orcid": "0000-0002-1424-4298;0000-0002-3100-6187;;0000-0001-7242-4740", "linkedin": ";;;", "or_profile": "~Yuka_Hashimoto2;~Isao_Ishikawa1;~Masahiro_Ikeda1;~Sho_Sonoda1", "aff": "NTT;Ehime University;RIKEN;RIKEN", "aff_domain": "ntt.co.jp;ehime-u.ac.jp;riken.jp;riken.jp", "position": "Researcher;Associate Professor;Researcher;Researcher", "bibtex": "@misc{\nanonymous2024constructive,\ntitle={Constructive Universal Approximation Theorems for Deep Joint-Equivariant Networks by Schur's Lemma},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=p32gjG4yqw}\n}", "github": "", "project": "", "reviewers": "joae;bUuw;9wR6", "site": "https://openreview.net/forum?id=p32gjG4yqw", "pdf_size": 402053, "rating": "4;5;6", "confidence": "3;3;3", "soundness": "3;3;4", "novelty": "2;2;2", "presentation": "1;3;3", "wc_summary": "72;78;92", "wc_strengths": "52;160;203", "wc_weaknesses": "619;182;164", "wc_questions": "109;134;33", "wc_limitations": "8;1;2", "wc_review": "860;555;494", "wc_reply_reviewers": "336;30;71", "wc_reply_authors": "88;130;224", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 5.0, 0.816496580927726 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.3333333333333335, 0.9428090415820634 ], "wc_summary_avg": [ 80.66666666666667, 8.379870059984357 ], "wc_strengths_avg": [ 138.33333333333334, 63.52077525415515 ], "wc_weaknesses_avg": [ 321.6666666666667, 210.37479781465163 ], "wc_questions_avg": [ 92.0, 42.949582846247374 ], "wc_limitations_avg": [ 3.6666666666666665, 3.091206165165235 ], "wc_review_avg": [ 636.3333333333334, 160.10482677157347 ], "wc_reply_reviewers_avg": [ 145.66666666666666, 135.62284304480406 ], "wc_reply_authors_avg": [ 147.33333333333334, 56.858498827254394 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:PZRZGMuLjUAJ:scholar.google.com/&scioq=Constructive+Universal+Approximation+Theorems+for+Deep+Joint-Equivariant+Networks+by+Schur%27s+Lemma&hl=en&as_sdt=0,33", "gs_version_total": 2, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "NTT Corporation;Ehime University;RIKEN", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ntt.co.jp;https://www.ehime-u.ac.jp;https://www.riken.jp", "aff_unique_abbr": "NTT;Ehime U;RIKEN", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Japan" }, { "title": "Exact, Tractable Gauss-Newton Optimization in Deep Reversible Architectures Reveal Poor Generalization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93585", "id": "p37NlKi9vl", "proceeding": "", "pdf": "https://openreview.net/pdf?id=p37NlKi9vl", "openreview": "https://openreview.net/forum?id=p37NlKi9vl", "poster": "/media/PosterPDFs/NeurIPS%202024/93585.png?t=1730992787.7245717", "project": "", "author_site": "Davide Buffelli, Jamie McGowan, Wangkun Xu, Alexandru Cioba, Da-shan Shiu, Guillaume Hennequin, Alberto Bernacchia", "tldr": "", "abstract": "Second-order optimization has been shown to accelerate the training of deep neural networks in many applications, often yielding faster progress per iteration on the training loss compared to first-order optimizers. However, the generalization properties of second-order methods are still being debated. Theoretical investigations have proved difficult to carry out outside the tractable settings of heavily simplified model classes - thus, the relevance of existing theories to practical deep learning applications remains unclear. Similarly, empirical studies in large-scale models and real datasets are significantly confounded by the necessity to approximate second-order updates in practice. It is often unclear whether the observed generalization behaviour arises specifically from the second-order nature of the parameter updates, or instead reflects the specific structured (e.g. Kronecker) approximations used or any damping-based interpolation towards first-order updates. Here, we show for the first time that exact Gauss-Newton (GN) updates take on a tractable form in a class of deep reversible architectures that are sufficiently expressive to be meaningfully applied to common benchmark datasets. We exploit this novel setting to study the training and generalization properties of the GN optimizer. We find that exact GN generalizes poorly. In the mini-batch training setting, this manifests as rapidly saturating progress even on the training loss, with parameter updates found to overfit each mini-batch without producing the features that would support generalization to other mini-batches. In contrast to previous work, we show that our experiments run in the feature learning regime, in which the neural tangent kernel (NTK) changes during the course of training. However, changes in the NTK are not associated with any significant change in neural representations, explaining the lack of generalization.", "keywords": "Gauss-Newton;Optimization;Deep Learning;Reversible Neural Networks", "primary_area": "optimization_for_deep_networks", "supplementary_material": "/attachment/dab218a4efd4dc2767b90fed555c323b8cf41d22.zip", "author": "Davide Buffelli;Jamie McGowan;Wangkun Xu;Alexandru Cioba;Da-shan Shiu;Guillaume Hennequin;Alberto Bernacchia", "authorids": "~Davide_Buffelli1;~Jamie_McGowan1;~Wangkun_Xu1;~Alexandru_Cioba1;~Da-shan_Shiu1;~Guillaume_Hennequin1;~Alberto_Bernacchia1", "gender": "M;M;;M;M;M;", "homepage": "https://davidebuffelli.github.io;;https://www.linkedin.com/in/wangkun-xu/;;;https://cbl-cambridge.org;", "dblp": "267/1651;;;288/0091.html;95/2355;56/10432;68/9669", "google_scholar": "v28My7wAAAAJ;;;;https://scholar.google.com/citations?hl=en;-NkKYYcAAAAJ;n48pFqcAAAAJ", "orcid": "0000-0001-5565-1634;;;;;;", "linkedin": "davide-buffelli/;jamie-mcgowan-62389a133?challengeId=AQFS7f4yOXP3uQAAAXTRG0twUFETxxxc8VUw0Tnac4tk6pEeLPqEP7kun2VfGlUzHZ3qgDxSEkislAO2ITU45CvwbM_ONiLy6Q&submissionId=f998adb0-b6bb-3816-1fb5-cfabfeb03251;;alexandru-cioba-aa8aa9161/;;;", "or_profile": "~Davide_Buffelli1;~Jamie_McGowan1;~Wangkun_Xu1;~Alexandru_Cioba1;~Da-shan_Shiu1;~Guillaume_Hennequin1;~Alberto_Bernacchia1", "aff": "MediaTek Research;MediaTek Research;Imperial College London;Mediatek Research;;MediaTek Research;MedaiTek Research", "aff_domain": "mtkresearch.com;mtkresearch.com;imperial.ac.uk;mtkresearch.com;;mtkresearch.com;mtkresearch.com", "position": "Researcher;Researcher;PhD student;Researcher;;Researcher;Team Lead", "bibtex": "@inproceedings{\nbuffelli2024exact,\ntitle={Exact, Tractable Gauss-Newton Optimization in Deep Reversible Architectures Reveal Poor Generalization},\nauthor={Davide Buffelli and Jamie McGowan and Wangkun Xu and Alexandru Cioba and Da-shan Shiu and Guillaume Hennequin and Alberto Bernacchia},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=p37NlKi9vl}\n}", "github": "", "reviewers": "5mk4;TbBj;9txE", "pdf_size": 3141887, "rating": "5;6;7", "confidence": "4;5;3", "soundness": "2;4;3", "novelty": "3;3;3", "presentation": "3;4;4", "wc_summary": "196;101;167", "wc_strengths": "64;79;32", "wc_weaknesses": "50;71;97", "wc_questions": "159;21;88", "wc_limitations": "1;1;6", "wc_review": "470;273;390", "wc_reply_reviewers": "135;21;69", "wc_reply_authors": "43;0;50", "reply_reviewers": "1;1;3", "reply_authors": "2;1;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 154.66666666666666, 39.75200903378623 ], "wc_strengths_avg": [ 58.333333333333336, 19.601587237318874 ], "wc_weaknesses_avg": [ 72.66666666666667, 19.223827807061618 ], "wc_questions_avg": [ 89.33333333333333, 56.346152387619945 ], "wc_limitations_avg": [ 2.6666666666666665, 2.3570226039551585 ], "wc_review_avg": [ 377.6666666666667, 80.89636717567868 ], "wc_reply_reviewers_avg": [ 75.0, 46.73328578219169 ], "wc_reply_authors_avg": [ 31.0, 22.105806175452337 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.9428090415820634 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Y7aBOYpJaZYJ:scholar.google.com/&scioq=Exact,+Tractable+Gauss-Newton+Optimization+in+Deep+Reversible+Architectures+Reveal+Poor+Generalization&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": "mtkresearch.com;mtkresearch.com;imperial.ac.uk;mtkresearch.com;;mtkresearch.com;mtkresearch.com", "author_num": 7, "aff_unique_index": "0;0;1;0;0;2", "aff_unique_norm": "MediaTek Inc.;Imperial College London;MedaiTek", "aff_unique_dep": "Research;;Research", "aff_unique_url": "https://www.mediatek.com/;https://www.imperial.ac.uk;", "aff_unique_abbr": "MediaTek;ICL;", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Taiwan;", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "China;United Kingdom;" }, { "title": "Particle Semi-Implicit Variational Inference", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93584", "id": "p3gMGkHMkM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=p3gMGkHMkM", "openreview": "https://openreview.net/forum?id=p3gMGkHMkM", "poster": "", "project": "", "author_site": "Jen Ning Lim, Adam Johansen", "tldr": "", "abstract": "Semi-implicit variational inference (SIVI) enriches the expressiveness of variational\nfamilies by utilizing a kernel and a mixing distribution to hierarchically define the\nvariational distribution. Existing SIVI methods parameterize the mixing distribution\nusing implicit distributions, leading to intractable variational densities. As a result,\ndirectly maximizing the evidence lower bound (ELBO) is not possible, so they\nresort to one of the following: optimizing bounds on the ELBO, employing costly\ninner-loop Markov chain Monte Carlo runs, or solving minimax objectives. In this\npaper, we propose a novel method for SIVI called Particle Variational Inference\n(PVI) which employs empirical measures to approximate the optimal mixing\ndistributions characterized as the minimizer of a free energy functional. PVI arises\nnaturally as a particle approximation of a Euclidean\u2013Wasserstein gradient flow and,\nunlike prior works, it directly optimizes the ELBO whilst making no parametric\nassumption about the mixing distribution. Our empirical results demonstrate that\nPVI performs favourably compared to other SIVI methods across various tasks.\nMoreover, we provide a theoretical analysis of the behaviour of the gradient flow\nof a related free energy functional: establishing the existence and uniqueness of\nsolutions as well as propagation of chaos results.", "keywords": "variational inference;gradient flow", "primary_area": "probabilistic_methods", "supplementary_material": "/attachment/59c31b7c9fb7e0ba3afd41d0675dc1fcb6d1b56e.zip", "author": "Jen Ning Lim;Adam Michael Johansen", "authorids": "~Jen_Ning_Lim1;~Adam_Michael_Johansen1", "gender": ";M", "homepage": ";https://go.warwick.ac.uk/amjohansen", "dblp": "250/9539;43/3875", "google_scholar": "Uryp_N8AAAAJ;https://scholar.google.co.uk/citations?user=KOaq7EEAAAAJ", "orcid": ";0000-0002-3531-7628", "linkedin": ";adam-johansen-6b71154/", "or_profile": "~Jen_Ning_Lim1;~Adam_Michael_Johansen1", "aff": "The University of Warwick;University of Warwick", "aff_domain": "warwick.ac.uk;warwick.ac.uk", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nlim2024particle,\ntitle={Particle Semi-Implicit Variational Inference},\nauthor={Jen Ning Lim and Adam Michael Johansen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=p3gMGkHMkM}\n}", "github": "", "reviewers": "o93p;KYHY;38kT;JYYZ", "pdf_size": 2947327, "rating": "6;7;7;9", "confidence": "4;3;2;5", "soundness": "3;4;3;4", "novelty": "3;3;3;4", "presentation": "2;3;3;4", "wc_summary": "49;62;71;99", "wc_strengths": "46;78;27;198", "wc_weaknesses": "232;27;9;294", "wc_questions": "34;70;13;68", "wc_limitations": "1;10;1;6", "wc_review": "362;247;121;665", "wc_reply_reviewers": "41;12;11;203", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.25, 1.0897247358851685 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 70.25, 18.34904629674251 ], "wc_strengths_avg": [ 87.25, 66.48825084178407 ], "wc_weaknesses_avg": [ 140.5, 124.60838655564079 ], "wc_questions_avg": [ 46.25, 23.94133454926855 ], "wc_limitations_avg": [ 4.5, 3.774917217635375 ], "wc_review_avg": [ 348.75, 201.50232628930118 ], "wc_reply_reviewers_avg": [ 66.75, 79.58132632722327 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.5129891760425771, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5317969533449197897&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "warwick.ac.uk;warwick.ac.uk", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Warwick", "aff_unique_dep": "", "aff_unique_url": "https://warwick.ac.uk", "aff_unique_abbr": "Warwick", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "title": "A Walsh Hadamard Derived Linear Vector Symbolic Architecture", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93583", "id": "p3hNrpeWMe", "proceeding": "", "pdf": "https://openreview.net/pdf?id=p3hNrpeWMe", "openreview": "https://openreview.net/forum?id=p3hNrpeWMe", "poster": "/media/PosterPDFs/NeurIPS%202024/93583.png?t=1731158765.2944014", "project": "", "author_site": "Mohammad Mahmudul Alam, Alexander Oberle, Edward Raff, Stella Biderman, Tim Oates, James Holt", "tldr": "", "abstract": "Vector Symbolic Architectures (VSAs) are one approach to developing Neuro-symbolic AI, where two vectors in $\\mathbb{R}^d$ are 'bound' together to produce a new vector in the same space. VSAs support the commutativity and associativity of this binding operation, along with an inverse operation, allowing one to construct symbolic-style manipulations over real-valued vectors. Most VSAs were developed before deep learning and automatic differentiation became popular and instead focused on efficacy in hand-designed systems. In this work, we introduce the Hadamard-derived linear Binding (HLB), which is designed to have favorable computational efficiency, and efficacy in classic VSA tasks, and perform well in differentiable systems.", "keywords": "Vector Symbolic Architectures;Holographic Reduced Representations;Hadamard Transformation;HRR;VTB;MAP;HLB", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Mohammad Mahmudul Alam;Alexander Oberle;Edward Raff;Stella Biderman;Tim Oates;James Holt", "authorids": "~Mohammad_Mahmudul_Alam1;~Alexander_Oberle1;~Edward_Raff1;~Stella_Biderman1;~Tim_Oates2;~James_Holt1", "gender": "M;M;M;F;M;M", "homepage": "https://mahmudulalam.github.io;;http://www.edwardraff.com/;http://www.stellabiderman.com;;", "dblp": "267/7007;;204/3369;239/5641;34/3824;93/1248", "google_scholar": "9z9HFSEAAAAJ;;debM2bUAAAAJ;bO7H0DAAAAAJ;8aKvmuAAAAAJ;GtVgGjkAAAAJ", "orcid": ";;0000-0002-9900-1972;0000-0001-8228-1042;;0000-0002-6368-8696", "linkedin": "mahmudul-alam/;alex-o-5a301b127/;edward-raff-09992040/;stellabiderman;;jeholt/", "or_profile": "~Mohammad_Mahmudul_Alam1;~Alexander_Oberle1;~Edward_Raff1;~Stella_Biderman1;~Tim_Oates2;~James_Holt1", "aff": "University of Maryland, Baltimore County;University of Maryland, Baltimore County;Booz Allen Hamilton;Booz Allen Hamilton;University of Maryland, Baltimore County;Laboratory for Physical Sciences", "aff_domain": "umbc.edu;umbc.edu;boozallen.com;boozallen.com;umbc.edu;umd.edu", "position": "PhD student;Undergrad student;Principal Researcher;Industry researcher;UMBC;Principal Researcher", "bibtex": "@inproceedings{\nalam2024a,\ntitle={A Walsh Hadamard Derived Linear Vector Symbolic Architecture},\nauthor={Mohammad Mahmudul Alam and Alexander Oberle and Edward Raff and Stella Biderman and Tim Oates and James Holt},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=p3hNrpeWMe}\n}", "github": "", "reviewers": "6ktE;FSGF;xpT1;rBtB", "pdf_size": 639516, "rating": "6;7;7;8", "confidence": "3;3;4;5", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "2;3;3;2", "wc_summary": "49;95;52;173", "wc_strengths": "62;43;161;84", "wc_weaknesses": "52;7;59;364", "wc_questions": "60;33;139;140", "wc_limitations": "29;7;5;7", "wc_review": "252;185;416;768", "wc_reply_reviewers": "36;0;26;72", "wc_reply_authors": "28;0;51;95", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;2;2", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 92.25, 50.04685304791901 ], "wc_strengths_avg": [ 87.5, 44.84696199298231 ], "wc_weaknesses_avg": [ 120.5, 141.9938378944664 ], "wc_questions_avg": [ 93.0, 47.471043805671684 ], "wc_limitations_avg": [ 12.0, 9.848857801796104 ], "wc_review_avg": [ 405.25, 225.66498953094165 ], "wc_reply_reviewers_avg": [ 33.5, 25.821502667350714 ], "wc_reply_authors_avg": [ 43.5, 34.78864757359791 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.8528028654224418, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:QNpR6dPvFaYJ:scholar.google.com/&scioq=A+Walsh+Hadamard+Derived+Linear+Vector+Symbolic+Architecture&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "umbc.edu;umbc.edu;boozallen.com;boozallen.com;umbc.edu;umd.edu", "author_num": 6, "aff_unique_index": "0;0;1;1;0;2", "aff_unique_norm": "University of Maryland, Baltimore County;Booz Allen Hamilton;Laboratory for Physical Sciences", "aff_unique_dep": ";;", "aff_unique_url": "https://www.umbc.edu;https://www.boozallen.com;", "aff_unique_abbr": "UMBC;BAH;", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Baltimore County;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "A Surprisingly Simple Approach to Generalized Few-Shot Semantic Segmentation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93582", "id": "p3nPHMpx04", "proceeding": "", "pdf": "https://openreview.net/pdf?id=p3nPHMpx04", "openreview": "https://openreview.net/forum?id=p3nPHMpx04", "poster": "/media/PosterPDFs/NeurIPS%202024/93582.png?t=1733711270.2774596", "project": "", "author_site": "Tomoya Sakai, Haoxiang Qiu, Takayuki Katsuki, Daiki Kimura, Takayuki Osogami, Tadanobu Inoue", "tldr": "", "abstract": "The goal of *generalized* few-shot semantic segmentation (GFSS) is to recognize *novel-class* objects through training with a few annotated examples and the *base-class* model that learned the knowledge about the base classes.\nUnlike the classic few-shot semantic segmentation, GFSS aims to classify pixels into both base and novel classes, meaning it is a more practical setting.\nCurrent GFSS methods rely on several techniques such as using combinations of customized modules, carefully designed loss functions, meta-learning, and transductive learning.\nHowever, we found that a simple rule and standard supervised learning substantially improve the GFSS performance.\nIn this paper, we propose a simple yet effective method for GFSS that does not use the techniques mentioned above.\nAlso, we theoretically show that our method perfectly maintains the segmentation performance of the base-class model over most of the base classes.\nThrough numerical experiments, we demonstrated the effectiveness of our method.\nIt improved in novel-class segmentation performance in the $1$-shot scenario by $6.1$% on the PASCAL-$5^i$ dataset, $4.7$% on the PASCAL-$10^i$ dataset, and $1.0$% on the COCO-$20^i$ dataset.\nOur code is publicly available at https://github.com/IBM/BCM.", "keywords": "few-shot learning;semantic segmentation;catastrophic forgetting", "primary_area": "machine_vision", "supplementary_material": "", "author": "Tomoya Sakai;Haoxiang Qiu;Takayuki Katsuki;Daiki Kimura;Takayuki Osogami;Tadanobu Inoue", "authorids": "~Tomoya_Sakai2;~Haoxiang_Qiu1;~Takayuki_Katsuki2;~Daiki_Kimura1;~Takayuki_Osogami1;~Tadanobu_Inoue1", "gender": ";M;;M;M;", "homepage": ";;https://research.ibm.com/people/takayuki-katsuki;http://ibm.biz/daiki-kimura;https://sites.google.com/site/takayukiosogami/;", "dblp": ";;01/10264;126/9778;95/5631;", "google_scholar": ";;bZZ0I4UAAAAJ;https://scholar.google.co.jp/citations?user=UisUpBkAAAAJ;wtOZ8wwAAAAJ;", "orcid": ";;0000-0002-3670-1138;;;", "linkedin": ";haoxiang-qiu-545441164/;;;takayuki-osogami-1151853/?ppe=1;", "or_profile": "~Tomoya_Sakai2;~Haoxiang_Qiu1;~Takayuki_Katsuki2;~Daiki_Kimura1;~Takayuki_Osogami1;~Tadanobu_Inoue1", "aff": ";International Business Machines;International Business Machines;IBM Research;International Business Machines;", "aff_domain": ";ibm.com;ibm.com;ibm.com;ibm.com;", "position": ";Researcher;Research staff member;Researcher;Principal Researcher;", "bibtex": "@inproceedings{\nsakai2024a,\ntitle={A Surprisingly Simple Approach to Generalized Few-Shot Semantic Segmentation},\nauthor={Tomoya Sakai and Haoxiang Qiu and Takayuki Katsuki and Daiki Kimura and Takayuki Osogami and Tadanobu Inoue},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=p3nPHMpx04}\n}", "github": "", "reviewers": "HhLp;MDAS;NeFB", "pdf_size": 811479, "rating": "5;6;6", "confidence": "3;5;4", "soundness": "2;3;4", "novelty": "2;3;3", "presentation": "3;3;3", "wc_summary": "75;35;61", "wc_strengths": "37;33;42", "wc_weaknesses": "80;144;41", "wc_questions": "6;30;5", "wc_limitations": "1;6;10", "wc_review": "199;248;159", "wc_reply_reviewers": "0;257;9", "wc_reply_authors": "0;205;0", "reply_reviewers": "0;3;1", "reply_authors": "1;3;1", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 57.0, 16.57307052620807 ], "wc_strengths_avg": [ 37.333333333333336, 3.6817870057290873 ], "wc_weaknesses_avg": [ 88.33333333333333, 42.460439103816256 ], "wc_questions_avg": [ 13.666666666666666, 11.556623882239812 ], "wc_limitations_avg": [ 5.666666666666667, 3.6817870057290873 ], "wc_review_avg": [ 202.0, 36.39597047293377 ], "wc_reply_reviewers_avg": [ 88.66666666666667, 119.08633656115026 ], "wc_reply_authors_avg": [ 68.33333333333333, 96.6379267621615 ], "reply_reviewers_avg": [ 1.3333333333333333, 1.247219128924647 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11218507354811295966&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": ";ibm.com;ibm.com;ibm.com;ibm.com;", "author_num": 6, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "International Business Machines Corporation;IBM", "aff_unique_dep": ";IBM Research", "aff_unique_url": "https://www.ibm.com;https://www.ibm.com/research", "aff_unique_abbr": "IBM;IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Drift-Resilient TabPFN: In-Context Learning Temporal Distribution Shifts on Tabular Data", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93581", "id": "p3tSEFMwpG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=p3tSEFMwpG", "openreview": "https://openreview.net/forum?id=p3tSEFMwpG", "poster": "/media/PosterPDFs/NeurIPS%202024/93581.png?t=1730496803.084114", "project": "", "author_site": "Kai Helli, David Schnurr, Noah Hollmann, Samuel M\u00fcller, Frank Hutter", "tldr": "", "abstract": "While most ML models expect independent and identically distributed data, this assumption is often violated in real-world scenarios due to distribution shifts, resulting in the degradation of machine learning model performance. Until now, no tabular method has consistently outperformed classical supervised learning, which ignores these shifts. To address temporal distribution shifts, we present Drift-Resilient TabPFN, a fresh approach based on In-Context Learning with a Prior-Data Fitted Network that learns the learning algorithm itself: it accepts the entire training dataset as input and makes predictions on the test set in a single forward pass. Specifically, it learns to approximate Bayesian inference on synthetic datasets drawn from a prior that specifies the model's inductive bias. This prior is based on structural causal models (SCM), which gradually shift over time. To model shifts of these causal models, we use a secondary SCM, that specifies changes in the primary model parameters. The resulting Drift-Resilient TabPFN can be applied to unseen data, runs in seconds on small to moderately sized datasets and needs no hyperparameter tuning. Comprehensive evaluations across 18 synthetic and real-world datasets demonstrate large performance improvements over a wide range of baselines, such as XGB, CatBoost, TabPFN, and applicable methods featured in the Wild-Time benchmark. Compared to the strongest baselines, it improves accuracy from 0.688 to 0.744 and ROC AUC from 0.786 to 0.832 while maintaining stronger calibration. This approach could serve as significant groundwork for further research on out-of-distribution prediction.", "keywords": "Temporal Distribution Shifts;In-Context Learning;Bayesian Inference;Prior-Data Fitted Networks;Temporal Domain Generalization;Structural Causal Models;TabPFN;Tabular Data Modeling;Out-Of-Distribution Generalization;Domain Generalization;Meta-Learning;Concept Drift", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Kai Helli;David Schnurr;Noah Hollmann;Samuel M\u00fcller;Frank Hutter", "authorids": "~Kai_Helli1;~David_Schnurr1;~Noah_Hollmann1;~Samuel_M\u00fcller1;~Frank_Hutter1", "gender": "M;M;;;M", "homepage": "https://www.helli.xyz;;http://www.noahhollmann.com;https://uncoolis.cool;http://ml.informatik.uni-freiburg.de/~hutter/", "dblp": ";;;284/9655;89/5383", "google_scholar": ";;;pevYEjAAAAAJ;https://scholar.google.de/citations?user=YUrxwrkAAAAJ", "orcid": ";;0000-0001-8556-518X;;0000-0002-2037-3694", "linkedin": "kai-helli/;david-schnurr/;;;frank-hutter-9190b24b/", "or_profile": "~Kai_Helli1;~David_Schnurr1;~Noah_Hollmann1;~Samuel_M\u00fcller1;~Frank_Hutter1", "aff": "Albert-Ludwigs-Universit\u00e4t Freiburg;ETHZ - ETH Zurich;Charite Universit\u00e4tsmedizin Berlin;University of Freiburg, Universit\u00e4t Freiburg;Albert-Ludwigs-Universit\u00e4t Freiburg", "aff_domain": "uni-freiburg.de;ethz.ch;charite.de;cs.uni-freiburg.de;uni-freiburg.de", "position": "Researcher;MS student;Undergrad student;PhD student;Full Professor", "bibtex": "@inproceedings{\nhelli2024driftresilient,\ntitle={Drift-Resilient Tab{PFN}: In-Context Learning Temporal Distribution Shifts on Tabular Data},\nauthor={Kai Helli and David Schnurr and Noah Hollmann and Samuel M{\\\"u}ller and Frank Hutter},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=p3tSEFMwpG}\n}", "github": "", "reviewers": "aHHC;m2Lp;LELq;84bT", "pdf_size": 13691578, "rating": "5;6;6;7", "confidence": "4;4;4;4", "soundness": "2;2;3;3", "novelty": "2;3;3;3", "presentation": "2;2;3;4", "wc_summary": "91;65;222;71", "wc_strengths": "48;40;1008;103", "wc_weaknesses": "63;211;2;342", "wc_questions": "153;65;2;281", "wc_limitations": "1;1;1;26", "wc_review": "356;382;1235;823", "wc_reply_reviewers": "90;97;924;132", "wc_reply_authors": "910;1122;2979;1466", "reply_reviewers": "2;1;3;2", "reply_authors": "3;4;8;3", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 112.25, 64.09124355167404 ], "wc_strengths_avg": [ 299.75, 409.62688815555066 ], "wc_weaknesses_avg": [ 154.5, 132.26583081053096 ], "wc_questions_avg": [ 125.25, 104.70046561501051 ], "wc_limitations_avg": [ 7.25, 10.825317547305483 ], "wc_review_avg": [ 699.0, 360.83583524921687 ], "wc_reply_reviewers_avg": [ 310.75, 354.4173916443718 ], "wc_reply_authors_avg": [ 1619.25, 809.737418858731 ], "reply_reviewers_avg": [ 2.0, 0.7071067811865476 ], "reply_authors_avg": [ 4.5, 2.0615528128088303 ], "replies_avg": [ 36, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9324536715210142364&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "uni-freiburg.de;ethz.ch;charite.de;cs.uni-freiburg.de;uni-freiburg.de", "author_num": 5, "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "Albert-Ludwigs-Universit\u00e4t Freiburg;ETH Zurich;Charite Universit\u00e4tsmedizin Berlin;University of Freiburg", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.uni-freiburg.de;https://www.ethz.ch;https://www.charite.de;https://www.uni-freiburg.de", "aff_unique_abbr": "Albert-Ludwigs-Universit\u00e4t;ETHZ;Charite;UoF", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Freiburg;", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "Germany;Switzerland" }, { "title": "Learning to Solve Quadratic Unconstrained Binary Optimization in a Classification Way", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93580", "id": "p43ObIwJFW", "proceeding": "", "pdf": "https://openreview.net/pdf?id=p43ObIwJFW", "openreview": "https://openreview.net/forum?id=p43ObIwJFW", "poster": "/media/PosterPDFs/NeurIPS%202024/93580.png?t=1731471099.2966664", "project": "", "author_site": "Ming Chen, Jie Chun, Shang Xiang, Luona Wei, Yonghao Du, Qian Wan, Yuning Chen, Yingwu Chen", "tldr": "", "abstract": "The quadratic unconstrained binary optimization (QUBO) is a well-known NP-hard problem that takes an $n\\times n$ matrix $Q$ as input and decides an $n$-dimensional 0-1 vector $x$, to optimize a quadratic function. Existing learning-based models that always formulate the solution process as sequential decisions suffer from high computational overload. To overcome this issue, we propose a neural solver called the Value Classification Model (VCM) that formulates the solution process from a classification perspective. It applies a Depth Value Network (DVN) based on graph convolution that exploits the symmetry property in $Q$ to auto-grasp value features. These features are then fed into a Value Classification Network (VCN) which directly generates classification solutions. Trained by a highly efficient model-tailored Greedy-guided Self Trainer (GST) which does not require any priori optimal labels, VCM significantly outperforms competitors in both computational efficiency and solution quality with a remarkable generalization ability. It can achieve near-optimal solutions in milliseconds with an average optimality gap of just 0.362\\% on benchmarks with up to 2500 variables. Notably, a VCM trained at a specific DVN depth can steadily find better solutions by simply extending the testing depth, which narrows the gap to 0.034\\% on benchmarks. To our knowledge, this is the first learning-based model to reach such a performance.", "keywords": "quadratic unconstrained binary optimization;combinational optimization;machine learning;classification;neural solver", "primary_area": "optimization", "supplementary_material": "", "author": "Ming Chen;Jie Chun;Shang Xiang;Luona Wei;Yonghao Du;Qian Wan;Yuning Chen;Yingwu Chen", "authorids": "~Ming_Chen17;~Jie_Chun1;~Shang_Xiang1;~Luona_Wei1;~Yonghao_Du1;~Qian_Wan2;~Yuning_Chen2;~Yingwu_Chen2", "gender": "M;;;F;;M;M;", "homepage": ";;;https://www.researchgate.net/profile/Luona-Wei;https://www.researchgate.net/scientific-contributions/Yonghao-Du-2115103304;https://orcid.org/my-orcid?orcid=0000-0003-4504-3912;https://www.researchgate.net/profile/Yuning-Chen;https://xueshu.baidu.com/scholarID/CN-BS7388CJ?site=xueshu_ri", "dblp": ";;;;;25/3876-7;;", "google_scholar": ";;;;;https://scholar.google.com/citations?hl=zh-CN;;", "orcid": "0009-0004-2269-0732;0009-0000-3396-4605;;;;0000-0003-4504-3912;;", "linkedin": ";;;;;;;", "or_profile": "~Ming_Chen17;~Jie_Chun1;~Shang_Xiang1;~Luona_Wei1;~Yonghao_Du1;~Qian_Wan2;~Yuning_Chen2;~Yingwu_Chen2", "aff": "National University of Defense Technology;National University of Defense Technology;;South-Central Minzu University;National University of Defense Technology;Central China Normal University;National University of Defense Technology;National University of Defense Technology", "aff_domain": "nudt.edu.cn;nudt.edu.cn;;scuec.edu.cn;nudt.edu.cn;ccnu.edu.cn;nudt.edu.cn;nudt.edu.cn", "position": "PhD student;MS student;;Associate Professor;Associate Professor;Associate Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nchen2024learning,\ntitle={Learning to Solve Quadratic Unconstrained Binary Optimization in a Classification Way},\nauthor={Ming Chen and Jie Chun and Shang Xiang and Luona Wei and Yonghao Du and Qian Wan and Yuning Chen and Yingwu Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=p43ObIwJFW}\n}", "github": "", "reviewers": "dxsN;VEhe;GWxe", "pdf_size": 1966286, "rating": "5;7;7", "confidence": "3;3;2", "soundness": "2;3;3", "novelty": "2;3;3", "presentation": "3;4;3", "wc_summary": "54;73;119", "wc_strengths": "123;76;79", "wc_weaknesses": "311;60;13", "wc_questions": "44;89;2", "wc_limitations": "6;55;1", "wc_review": "538;353;214", "wc_reply_reviewers": "136;103;12", "wc_reply_authors": "311;51;32", "reply_reviewers": "2;1;1", "reply_authors": "3;2;2", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 82.0, 27.28858125052797 ], "wc_strengths_avg": [ 92.66666666666667, 21.483844059096022 ], "wc_weaknesses_avg": [ 128.0, 130.8153915510964 ], "wc_questions_avg": [ 45.0, 35.52463933666322 ], "wc_limitations_avg": [ 20.666666666666668, 24.362995619495468 ], "wc_review_avg": [ 368.3333333333333, 132.71607120298415 ], "wc_reply_reviewers_avg": [ 83.66666666666667, 52.43620462576936 ], "wc_reply_authors_avg": [ 131.33333333333334, 127.28009358191966 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.49999999999999983, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=290679909355453794&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "nudt.edu.cn;nudt.edu.cn;;scuec.edu.cn;nudt.edu.cn;ccnu.edu.cn;nudt.edu.cn;nudt.edu.cn", "author_num": 8, "aff_unique_index": "0;0;1;0;2;0;0", "aff_unique_norm": "National University of Defense Technology;South-Central Minzu University;Central China Normal University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.nudt.edu.cn/;http://www.scminzu.edu.cn;http://www.ccnu.edu.cn", "aff_unique_abbr": "NUDT;SCMU;CCNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "From Dictionary to Tensor: A Scalable Multi-View Subspace Clustering Framework with Triple Information Enhancement", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93579", "id": "p4a1nSvwD7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=p4a1nSvwD7", "openreview": "https://openreview.net/forum?id=p4a1nSvwD7", "poster": "/media/PosterPDFs/NeurIPS%202024/93579.png?t=1731507860.2169666", "project": "", "author_site": "Zhibin Gu, Songhe Feng", "tldr": "", "abstract": "While Tensor-based Multi-view Subspace Clustering (TMSC) has garnered significant attention for its capacity to effectively capture high-order correlations among multiple views, three notable limitations in current TMSC methods necessitate consideration: 1) high computational complexity and reliance on dictionary completeness resulting from using observed data as the dictionary, 2) inaccurate subspace representation stemming from the oversight of local geometric information and 3) under-penalization of noise-related singular values within tensor data caused by treating all singular values equally. To address these limitations, this paper presents a \\textbf{S}calable TMSC framework with \\textbf{T}riple inf\\textbf{O}rmatio\\textbf{N} \\textbf{E}nhancement (\\textbf{STONE}). Notably, an enhanced anchor dictionary learning mechanism has been utilized to recover the low-rank anchor structure, resulting in reduced computational complexity and increased resilience, especially in scenarios with inadequate dictionaries. Additionally, we introduce an anchor hypergraph Laplacian regularizer to preserve the inherent geometry of the data within the subspace representation. Simultaneously, an improved hyperbolic tangent function has been employed as a precise approximation for tensor rank, effectively capturing the significant variations in singular values. Extensive experimentation on a variety of datasets demonstrates that our approach surpasses SOTA methods in both effectiveness and efficiency.", "keywords": "Multi-view clustering;Low-rank tensor representation;Anchor hypergraph Laplacian regularization", "primary_area": "learning_theory", "supplementary_material": "/attachment/47b5a6f1560f7e297ee31c96c5aabcb32bb20c7b.zip", "author": "Zhibin Gu;Songhe Feng", "authorids": "~Zhibin_Gu1;~Songhe_Feng1", "gender": "M;M", "homepage": "https://guzhibin23.github.io/guzhibin23-github.io/;http://faculty.bjtu.edu.cn/8407/", "dblp": "236/0821;92/2415", "google_scholar": "-uBBqdYAAAAJ;K5lqMYgAAAAJ", "orcid": "0000-0002-1085-9084;0000-0002-5922-9358", "linkedin": ";", "or_profile": "~Zhibin_Gu1;~Songhe_Feng1", "aff": "Beijing Jiaotong University;Beijing Jiaotong University", "aff_domain": "bjtu.edu.cn;bjtu.edu.cn", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\ngu2024from,\ntitle={From Dictionary to Tensor: A Scalable Multi-View Subspace Clustering Framework with Triple Information Enhancement},\nauthor={Zhibin Gu and Songhe Feng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=p4a1nSvwD7}\n}", "github": "", "reviewers": "eQ9V;QZiQ;iXTf;6B6d", "pdf_size": 1296585, "rating": "3;4;7;7", "confidence": "5;4;5;5", "soundness": "2;3;4;4", "novelty": "1;2;3;3", "presentation": "3;3;4;4", "wc_summary": "30;77;91;82", "wc_strengths": "22;58;92;103", "wc_weaknesses": "223;213;149;133", "wc_questions": "10;1;3;111", "wc_limitations": "6;1;32;3", "wc_review": "291;350;367;432", "wc_reply_reviewers": "0;0;10;10", "wc_reply_authors": "62;69;18;41", "reply_reviewers": "0;0;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.25, 1.7853571071357126 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 70.0, 23.632604596192948 ], "wc_strengths_avg": [ 68.75, 31.68102744546016 ], "wc_weaknesses_avg": [ 179.5, 39.073648409126065 ], "wc_questions_avg": [ 31.25, 46.16478636363435 ], "wc_limitations_avg": [ 10.5, 12.539936203984453 ], "wc_review_avg": [ 360.0, 50.23445033042563 ], "wc_reply_reviewers_avg": [ 5.0, 5.0 ], "wc_reply_authors_avg": [ 47.5, 19.90602923739438 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.4042260417272216, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9147847172037190452&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 2, "email": "bjtu.edu.cn;bjtu.edu.cn", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Beijing Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "http://www.njtu.edu.cn/en", "aff_unique_abbr": "BJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Dual Risk Minimization: Towards Next-Level Robustness in Fine-tuning Zero-Shot Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93578", "id": "p50Dyqk0GX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=p50Dyqk0GX", "openreview": "https://openreview.net/forum?id=p50Dyqk0GX", "poster": "", "project": "", "author_site": "Kaican Li, Weiyan XIE, Yongxiang Huang, Didan Deng, Lanqing Hong, Zhenguo Li, Ricardo Silva, Nevin L. Zhang", "tldr": "", "abstract": "Fine-tuning foundation models often compromises their robustness to distribution shifts. To remedy this, most robust fine-tuning methods aim to preserve the pre-trained features. However, not all pre-trained features are robust and those methods are largely indifferent to which ones to preserve. We propose dual risk minimization (DRM), which combines empirical risk minimization with worst-case risk minimization, to better preserve the core features of downstream tasks. In particular, we utilize core-feature descriptions generated by LLMs to induce core-based zero-shot predictions which then serve as proxies to estimate the worst-case risk. DRM balances two crucial aspects of model robustness: expected performance and worst-case performance, establishing a new state of the art on various real-world benchmarks. DRM significantly improves the out-of-distribution performance of CLIP ViT-L/14@336 on ImageNet (75.9$\\to$77.1), WILDS-iWildCam (47.1$\\to$51.8), and WILDS-FMoW (50.7$\\to$53.1); opening up new avenues for robust fine-tuning. Our code is available at https://github.com/vaynexie/DRM.", "keywords": "robustness;fine-tuning zero-shot models;CLIP;concept descriptions", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Kaican Li;Weiyan Xie;Yongxiang Huang;Didan Deng;Lanqing HONG;Zhenguo Li;Ricardo Silva;Nevin L. Zhang", "authorids": "~Kaican_Li1;~Weiyan_Xie1;~Yongxiang_Huang1;~Didan_Deng1;~Lanqing_HONG1;~Zhenguo_Li1;~Ricardo_Silva1;~Nevin_L._Zhang1", "gender": "M;M;M;F;F;M;M;M", "homepage": ";;;;https://racheltechie.github.io/;http://www.ee.columbia.edu/~zgli/;http://www.homepages.ucl.ac.uk/~ucgtrbd/;https://cse.hkust.edu.hk/~lzhang/teach/courses.html", "dblp": "272/5206;316/9920;26/6387;220/5502;226/4258;23/6479;42/2642-1;https://dblp.uni-trier.de/pid/z/NevinLianwenZhang.html", "google_scholar": "Mc-lzZMAAAAJ;_kc1e7EAAAAJ;https://scholar.google.com/citations?hl=zh-CN;sM_MNbgAAAAJ;https://scholar.google.com.sg/citations?user=2p7x6OUAAAAJ;XboZC1AAAAAJ;I-ANa0QAAAAJ;", "orcid": ";;0000-0002-8573-3400;;;;;", "linkedin": ";;%E6%B0%B8%E7%A5%A5-%E9%BB%84-01919aa9/;;;;;", "or_profile": "~Kaican_Li1;~Weiyan_Xie1;~Yongxiang_Huang1;~Didan_Deng1;~Lanqing_HONG1;~Zhenguo_Li1;~Ricardo_Silva1;~Nevin_Zhang1", "aff": "Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;Huawei Technologies Ltd.;Huawei Technologies Ltd.;Huawei Technologies Ltd.;Huawei Noah's Ark Lab;University College London;Hong Kong University of Science and Technology", "aff_domain": "connect.ust.hk;ust.hk;huawei.com;huawei.com;huawei.com;huawei.com;ucl.ac.uk;ust.hk", "position": "PhD student;PhD student;Principal Researcher;Researcher;Researcher;Principal Researcher;Full Professor;Full Professor", "bibtex": "@inproceedings{\nli2024dual,\ntitle={Dual Risk Minimization: Towards Next-Level Robustness in Fine-tuning Zero-Shot Models},\nauthor={Kaican Li and Weiyan Xie and Yongxiang Huang and Didan Deng and Lanqing HONG and Zhenguo Li and Ricardo Silva and Nevin L. Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=p50Dyqk0GX}\n}", "github": "", "reviewers": "zSx8;D5j1;gi7f;eddC", "pdf_size": 1624142, "rating": "5;5;6;6", "confidence": "4;3;3;2", "soundness": "4;2;2;3", "novelty": "4;2;2;3", "presentation": "3;3;2;3", "wc_summary": "90;103;87;89", "wc_strengths": "125;47;26;38", "wc_weaknesses": "205;204;103;64", "wc_questions": "82;144;2;5", "wc_limitations": "32;54;1;8", "wc_review": "534;552;219;204", "wc_reply_reviewers": "224;0;16;0", "wc_reply_authors": "0;0;25;0", "reply_reviewers": "1;0;1;0", "reply_authors": "1;1;2;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 92.25, 6.299801584177076 ], "wc_strengths_avg": [ 59.0, 38.82653731663435 ], "wc_weaknesses_avg": [ 144.0, 62.05239721396749 ], "wc_questions_avg": [ 58.25, 58.98463783054025 ], "wc_limitations_avg": [ 23.75, 20.90902915010642 ], "wc_review_avg": [ 377.25, 165.95688446099487 ], "wc_reply_reviewers_avg": [ 60.0, 94.91048414163738 ], "wc_reply_authors_avg": [ 6.25, 10.825317547305483 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.7071067811865475, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:TBDUjrLuk8MJ:scholar.google.com/&scioq=Dual+Risk+Minimization:+Towards+Next-Level+Robustness+in+Fine-tuning+Zero-Shot+Models&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "connect.ust.hk;ust.hk;huawei.com;huawei.com;huawei.com;huawei.com;ucl.ac.uk;ust.hk", "author_num": 8, "aff_unique_index": "0;0;1;1;1;1;2;0", "aff_unique_norm": "Hong Kong University of Science and Technology;Huawei;University College London", "aff_unique_dep": ";Huawei Technologies;", "aff_unique_url": "https://www.ust.hk;https://www.huawei.com;https://www.ucl.ac.uk", "aff_unique_abbr": "HKUST;Huawei;UCL", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0;0;1;0", "aff_country_unique": "China;United Kingdom" }, { "title": "A Globally Optimal Portfolio for m-Sparse Sharpe Ratio Maximization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93577", "id": "p54CYwdjVP", "proceeding": "", "pdf": "https://openreview.net/pdf?id=p54CYwdjVP", "openreview": "https://openreview.net/forum?id=p54CYwdjVP", "poster": "/media/PosterPDFs/NeurIPS%202024/93577.png?t=1731689830.0332568", "project": "", "author_site": "Yizun Lin, Zhao-Rong Lai, Cheng Li", "tldr": "", "abstract": "The Sharpe ratio is an important and widely-used risk-adjusted return in financial engineering. In modern portfolio management, one may require an m-sparse (no more than m active assets) portfolio to save managerial and financial costs. However, few existing methods can optimize the Sharpe ratio with the m-sparse constraint, due to the nonconvexity and the complexity of this constraint. We propose to convert the m-sparse fractional optimization problem into an equivalent m-sparse quadratic programming problem. The semi-algebraic property of the resulting objective function allows us to exploit the Kurdyka-Lojasiewicz property to develop an efficient Proximal Gradient Algorithm (PGA) that leads to a portfolio which achieves the globally optimal m-sparse Sharpe ratio under certain conditions. The convergence rates of PGA are also provided. To the best of our knowledge, this is the first proposal that achieves a globally optimal m-sparse Sharpe ratio with a theoretically-sound guarantee.", "keywords": "Sharpe ratio;$\\ell_0$ constraint;proximal gradient algorithm;global optimality", "primary_area": "optimization", "supplementary_material": "/attachment/4af49110b8c57c67581b78c99890cbd38214e046.zip", "author": "Yizun Lin;Zhao-Rong Lai;Cheng Li", "authorids": "~Yizun_Lin1;~Zhao-Rong_Lai1;~Cheng_Li24", "gender": "M;M;F", "homepage": ";https://cybsec.jnu.edu.cn/2023/1120/c39593a781893/page.htm;https://scholar.google.com/citations?user=ZFroLyIAAAAJ&hl=zh-TW", "dblp": ";142/3902;16/6465-18", "google_scholar": ";https://scholar.google.com.hk/citations?user=psPB6TsAAAAJ;ZFroLyIAAAAJ", "orcid": "0000-0003-1400-278X;;0000-0002-9019-0922", "linkedin": ";;", "or_profile": "~Yizun_Lin1;~Zhao-Rong_Lai1;~Cheng_Li24", "aff": "Jinan University;Jinan University;Jinan University", "aff_domain": "jnu.edu.cn;jnu.edu.cn;jnu.edu.cn", "position": "Assistant Professor;Associate Professor;PhD student", "bibtex": "@inproceedings{\nlin2024a,\ntitle={A Globally Optimal Portfolio for m-Sparse Sharpe Ratio Maximization},\nauthor={Yizun Lin and Zhao-Rong Lai and Cheng Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=p54CYwdjVP}\n}", "github": "", "reviewers": "pxYT;4h7E;EtEi;iiAu", "pdf_size": 709996, "rating": "5;5;5;6", "confidence": "4;3;3;1", "soundness": "3;2;3;3", "novelty": "3;2;2;2", "presentation": "3;2;3;3", "wc_summary": "178;34;77;47", "wc_strengths": "46;118;93;78", "wc_weaknesses": "159;140;51;83", "wc_questions": "120;111;42;7", "wc_limitations": "68;22;1;6", "wc_review": "571;425;264;221", "wc_reply_reviewers": "36;18;0;12", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 2.75, 1.0897247358851685 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 84.0, 56.4668044075455 ], "wc_strengths_avg": [ 83.75, 26.06122598804592 ], "wc_weaknesses_avg": [ 108.25, 43.29766159967534 ], "wc_questions_avg": [ 70.0, 47.25991959366838 ], "wc_limitations_avg": [ 24.25, 26.423237878806603 ], "wc_review_avg": [ 370.25, 138.62066043703587 ], "wc_reply_reviewers_avg": [ 16.5, 12.99038105676658 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.9271726499455306, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:l8OcQUvqE5QJ:scholar.google.com/&scioq=A+Globally+Optimal+Portfolio+for+m-Sparse+Sharpe+Ratio+Maximization&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "jnu.edu.cn;jnu.edu.cn;jnu.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Jinan University", "aff_unique_dep": "", "aff_unique_url": "https://www.jnu.edu.cn", "aff_unique_abbr": "JNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "ImageNet3D: Towards General-Purpose Object-Level 3D Understanding", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97507", "id": "p8eUitex7p", "proceeding": "", "pdf": "https://openreview.net/pdf?id=p8eUitex7p", "openreview": "https://openreview.net/forum?id=p8eUitex7p", "poster": "/media/PosterPDFs/NeurIPS%202024/97507.png?t=1732173197.890019", "project": "", "author_site": "Wufei Ma, Guanning Zeng, Guofeng Zhang, Qihao Liu, Letian Zhang, Adam Kortylewski, Yaoyao Liu, Alan Yuille", "tldr": "", "abstract": "A vision model with general-purpose object-level 3D understanding should be capable of inferring both 2D (*e.g.*, class name and bounding box) and 3D information (*e.g.*, 3D location and 3D viewpoint) for arbitrary rigid objects in natural images. This is a challenging task, as it involves inferring 3D information from 2D signals and most importantly, generalizing to rigid objects from unseen categories. However, existing datasets with object-level 3D annotations are often limited by the number of categories or the quality of annotations. Models developed on these datasets become specialists for certain categories or domains, and fail to generalize. In this work, we present ImageNet3D, a large dataset for general-purpose object-level 3D understanding. ImageNet3D augments 200 categories from the ImageNet dataset with 2D bounding box, 3D pose, 3D location annotations, and image captions interleaved with 3D information. With the new annotations available in ImageNet3D, we could (i) analyze the object-level 3D awareness of visual foundation models, and (ii) study and develop general-purpose models that infer both 2D and 3D information for arbitrary rigid objects in natural images, and (iii) integrate unified 3D models with large language models for 3D-related reasoning. We consider two new tasks, probing of object-level 3D awareness and open vocabulary pose estimation, besides standard classification and pose estimation. Experimental results on ImageNet3D demonstrate the potential of our dataset in building vision models with stronger general-purpose object-level 3D understanding. Our dataset and project page are available here: https://imagenet3d.github.io.", "keywords": "3D Understanding;Pose Estimation;Self-Supervised", "primary_area": "", "supplementary_material": "", "author": "Wufei Ma;Guofeng Zhang;Qihao Liu;Guanning Zeng;Adam Kortylewski;Yaoyao Liu;Alan Yuille", "authorids": "~Wufei_Ma1;~Guofeng_Zhang4;~Qihao_Liu1;~Guanning_Zeng1;~Adam_Kortylewski1;~Yaoyao_Liu1;~Alan_Yuille1", "gender": "M;M;;M;;;M", "homepage": "https://wufeim.github.io;https://richard-guofeng-zhang.github.io/;;https://github.com/guanning03;https://gvrl.mpi-inf.mpg.de/;https://yaoyaoliu.web.illinois.edu/;", "dblp": "243/2814;;158/2755;;161/0772;12/10033-1;y/AlanLYuille", "google_scholar": "mYkvHdIAAAAJ;vl0mzhEAAAAJ;WFl3hH0AAAAJ;SU6ooAQAAAAJ;https://scholar.google.ch/citations?user=tRLUOBIAAAAJ;Qi2PSmEAAAAJ;", "orcid": ";;;;0000-0002-9146-4403;0000-0002-5316-3028;", "linkedin": "wufei-ma-256352133/;;;;;;", "or_profile": "~Wufei_Ma1;~Guofeng_Zhang4;~Qihao_Liu1;~Guanning_Zeng1;~Adam_Kortylewski1;~Yaoyao_Liu1;~Alan_Yuille1", "aff": "Johns Hopkins University;Johns Hopkins University;Johns Hopkins University;Department of Computer Science and Technology, Tsinghua University;Albert-Ludwigs-Universit\u00e4t Freiburg;Johns Hopkins University;Johns Hopkins University", "aff_domain": "jhu.edu;jhu.edu;jh.edu;mail.tsinghua.edu.cn;uni-freiburg.de;jhu.edu;johnshopkins.edu", "position": "PhD student;PhD student;PhD student;Undergrad student;Research Group Leader;Postdoc;Full Professor", "bibtex": "@inproceedings{\nma2024imagenetd,\ntitle={ImageNet3D: Towards General-Purpose Object-Level 3D Understanding},\nauthor={Wufei Ma and Guofeng Zhang and Qihao Liu and Guanning Zeng and Adam Kortylewski and Yaoyao Liu and Alan Yuille},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=p8eUitex7p}\n}", "github": "", "reviewers": "pu6E;ZDJV;kP2A;rBvX;D8US", "pdf_size": 14137708, "rating": "6;6;6;7;8", "confidence": "4;4;4;5;5", "wc_summary_and_contributions": "142;142;53;112;35", "wc_strengths": "126;47;27;91;112", "wc_improvement": "43;156;18;237;150", "wc_limitations": "6;4;5;25;3", "wc_correctness": "1;1;1;39;1", "wc_clarity": "1;1;1;59;1", "wc_relation_to_prior_work": "11;1;1;1;1", "wc_documentation": "1;23;1;1;1", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "332;376;108;566;305", "wc_reply_reviewers": "0;243;29;61;0", "wc_reply_authors": "0;223;187;45;0", "reply_reviewers": "0;1;1;1;0", "reply_authors": "1;3;2;3;1", "rating_avg": [ 6.6, 0.8 ], "confidence_avg": [ 4.4, 0.48989794855663565 ], "wc_summary_and_contributions_avg": [ 96.8, 44.843728658531504 ], "wc_strengths_avg": [ 80.6, 37.83437590340298 ], "wc_improvement_avg": [ 120.8, 80.2680509293704 ], "wc_limitations_avg": [ 8.6, 8.260750571225353 ], "wc_correctness_avg": [ 8.6, 15.200000000000001 ], "wc_clarity_avg": [ 12.6, 23.2 ], "wc_relation_to_prior_work_avg": [ 3.0, 4.0 ], "wc_documentation_avg": [ 5.4, 8.8 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 337.4, 146.59549788448484 ], "wc_reply_reviewers_avg": [ 66.6, 91.03098373630816 ], "wc_reply_authors_avg": [ 91.0, 95.2029411310386 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 2.0, 0.8944271909999159 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.9185586535436916, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4743639495812628971&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 6, "email": "jhu.edu;jhu.edu;jh.edu;mail.tsinghua.edu.cn;uni-freiburg.de;jhu.edu;johnshopkins.edu", "author_num": 7, "aff_unique_index": "0;0;0;1;2;0;0", "aff_unique_norm": "Johns Hopkins University;Tsinghua University;Albert-Ludwigs-Universit\u00e4t Freiburg", "aff_unique_dep": ";Department of Computer Science and Technology;", "aff_unique_url": "https://www.jhu.edu;https://www.tsinghua.edu.cn;https://www.uni-freiburg.de", "aff_unique_abbr": "JHU;THU;Albert-Ludwigs-Universit\u00e4t", "aff_campus_unique_index": "1", "aff_campus_unique": ";Freiburg", "aff_country_unique_index": "0;0;0;1;2;0;0", "aff_country_unique": "United States;China;Germany" }, { "title": "Zipfian Whitening", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93576", "id": "pASJxzMJb7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=pASJxzMJb7", "openreview": "https://openreview.net/forum?id=pASJxzMJb7", "poster": "/media/PosterPDFs/NeurIPS%202024/93576.png?t=1733962535.9519765", "project": "", "author_site": "Sho Yokoi, Han Bao, Hiroto Kurita, Hidetoshi Shimodaira", "tldr": "", "abstract": "The word embedding space in neural models is skewed, and correcting this can improve task performance.\nWe point out that most approaches for modeling, correcting, and measuring the symmetry of an embedding space implicitly assume that the word frequencies are *uniform*; in reality, word frequencies follow a highly non-uniform distribution, known as *Zipf's law*.\nSurprisingly, simply performing PCA whitening weighted by the empirical word frequency that follows Zipf's law significantly improves task performance, surpassing established baselines.\nFrom a theoretical perspective, both our approach and existing methods can be clearly categorized: word representations are distributed according to an exponential family with either uniform or Zipfian base measures.\nBy adopting the latter approach, we can naturally emphasize informative low-frequency words in terms of their vector norm, which becomes evident from the information-geometric perspective (Oyama et al., EMNLP 2023), and in terms of the loss functions for imbalanced classification (Menon et al. ICLR 2021).\nAdditionally, our theory corroborates that popular natural language processing methods, such as skip-gram negative sampling (Mikolov et al., NIPS 2013), WhiteningBERT (Huang et al., Findings of EMNLP 2021), and headless language models (Godey et al., ICLR 2024), work well just because their word embeddings encode the empirical word frequency into the underlying probabilistic model.", "keywords": "representation learning;word embeddings;isotropy;natural language processing", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/467a9702d4364b40ecc3a9c2c193c2348086aaeb.zip", "author": "Sho Yokoi;Han Bao;Hiroto Kurita;Hidetoshi Shimodaira", "authorids": "~Sho_Yokoi1;~Han_Bao2;~Hiroto_Kurita1;~Hidetoshi_Shimodaira1", "gender": ";M;;M", "homepage": "http://www.cl.ecei.tohoku.ac.jp/~yokoi/;https://hermite.jp/;https://kurita.dev;http://stat.sys.i.kyoto-u.ac.jp/", "dblp": "184/8316;120/1444-2;;19/3393", "google_scholar": "https://scholar.google.co.jp/citations?user=EW2QPKoAAAAJ;MqMzjeMAAAAJ;72QbISAAAAAJ;LvoIaIsAAAAJ", "orcid": "0009-0002-4437-5245;0000-0002-4473-2604;;0000-0002-3371-7724", "linkedin": "shoyokoi/;;hiroto-kurita;shimo/", "or_profile": "~Sho_Yokoi1;~Han_Bao2;~Hiroto_Kurita1;~Hidetoshi_Shimodaira1", "aff": "Tohoku University;Kyoto University, Kyoto University;Tohoku University;RIKEN", "aff_domain": "tohoku.ac.jp;i.kyoto-u.ac.jp;tohoku.ac.jp;riken.jp", "position": "Assistant Professor;Assistant Professor;MS student;Researcher", "bibtex": "@inproceedings{\nyokoi2024zipfian,\ntitle={Zipfian Whitening},\nauthor={Sho Yokoi and Han Bao and Hiroto Kurita and Hidetoshi Shimodaira},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=pASJxzMJb7}\n}", "github": "", "reviewers": "fUiC;9YCm;jUzu;VABi", "pdf_size": 1032908, "rating": "6;7;7;8", "confidence": "2;3;4;4", "soundness": "2;3;3;3", "novelty": "2;3;3;4", "presentation": "1;4;3;4", "wc_summary": "86;57;84;58", "wc_strengths": "27;52;153;57", "wc_weaknesses": "171;15;81;141", "wc_questions": "364;140;134;1", "wc_limitations": "6;14;14;2", "wc_review": "654;278;466;259", "wc_reply_reviewers": "140;26;24;59", "wc_reply_authors": "676;0;0;298", "reply_reviewers": "2;1;1;1", "reply_authors": "3;1;1;2", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 1.224744871391589 ], "wc_summary_avg": [ 71.25, 13.77270852083932 ], "wc_strengths_avg": [ 72.25, 47.98632617736015 ], "wc_weaknesses_avg": [ 102.0, 59.77457653551382 ], "wc_questions_avg": [ 159.75, 130.35792074131896 ], "wc_limitations_avg": [ 9.0, 5.196152422706632 ], "wc_review_avg": [ 414.25, 160.33149253967542 ], "wc_reply_reviewers_avg": [ 62.25, 46.991355588022785 ], "wc_reply_authors_avg": [ 243.5, 277.7638385391446 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8528028654224418, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15160834532869828417&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "tohoku.ac.jp;i.kyoto-u.ac.jp;tohoku.ac.jp;riken.jp", "author_num": 4, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Tohoku University;Kyoto University;RIKEN", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tohoku.ac.jp;https://www.kyoto-u.ac.jp;https://www.riken.jp", "aff_unique_abbr": "Tohoku U;Kyoto U;RIKEN", "aff_campus_unique_index": "1", "aff_campus_unique": ";Kyoto", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Japan" }, { "id": "pBeoAGdIuy", "title": "Query of CC: Unearthing Large Scale Domain-Specific Knowledge from Public Corpora", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "Large language models(LLMs) have demonstrated remarkable potential in various tasks, however, there remains a significant lack of open-source models and data for specific domains. Previous work has primarily focused on manually specifying resources and collecting high-quality data for specific domains, which is extremely time-consuming and labor-intensive. To address this limitation, we introduce large models into the data collection pipeline to guide the generation of domain-specific information and retrieve relevant data from Common Crawl(CC), a large public corpus. We called this method as Query of CC. It not only collects data related to domain-specific knowledge but also mines the data with potential reasoning procedures from the public corpus. By applying this method, we have collected a knowledge domain-related dataset named Knowledge Pile, which covers four main domains, including the sciences, humanities, and other categories. Through the analysis of Knowledge Pile, Query of CC can effectively retrieve relevant data from the covered knowledge domains and significantly enhance the performance in tests of mathematical and knowledge-related reasoning abilities. We have open-sourced our data on HuggingFace to promote academic progress in knowledge reasoning capabilities.", "keywords": "domain-specific knowledge;data collection;large language model", "primary_area": "", "supplementary_material": "/attachment/98352dd9bf14a0b2466ba0dc3a810d99b1bfecf9.pdf", "author": "Zhaoye Fei;Yunfan Shao;Linyang Li;Zhiyuan Zeng;Conghui He;Hang Yan;Dahua Lin;Xipeng Qiu", "authorids": "~Zhaoye_Fei1;~Yunfan_Shao1;~Linyang_Li1;~Zhiyuan_Zeng4;~Conghui_He2;~Hang_Yan2;~Dahua_Lin1;~Xipeng_Qiu1", "gender": "M;M;M;;;;M;M", "homepage": "https://ngc7292.github.io/;;https://github.com/LinyangLee;;;;http://dahua.site;https://xpqiu.github.io/", "dblp": "304/3217.html;236/5806;228/8051;;;;53/6088;69/1395", "google_scholar": "_Az3RuEAAAAJ;pw5QEtoAAAAJ;T6eEqcMAAAAJ;;;;GMzzRRUAAAAJ;Pq4Yp_kAAAAJ", "orcid": ";;;;;;;0000-0001-7163-5247", "linkedin": ";;;;;;;", "or_profile": "~Zhaoye_Fei1;~Yunfan_Shao1;~Linyang_Li1;~Zhiyuan_Zeng4;~Conghui_He2;~Hang_Yan2;~Dahua_Lin1;~Xipeng_Qiu1", "aff": "Fudan University;Fudan University;Fudan University;;;;The Chinese University of Hong Kong;Fudan University", "aff_domain": "fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;;;;cuhk.edu.hk;fudan.edu.cn", "position": "PhD student;PhD student;PhD student;;;;Associate Professor;Full Professor", "bibtex": "@misc{\nanonymous2024query,\ntitle={Query of {CC}: Unearthing Large Scale Domain-Specific Knowledge from Public Corpora},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=pBeoAGdIuy}\n}", "github": "", "project": "", "reviewers": "menD;E2JB;4CSd;uPAi", "site": "https://openreview.net/forum?id=pBeoAGdIuy", "pdf_size": 685950, "rating": "4;6;6;7", "confidence": "4;5;4;2", "wc_summary_and_contributions": "22;67;81;55", "wc_strengths": "11;18;83;27", "wc_improvement": "20;66;178;25", "wc_limitations": "14;5;81;19", "wc_correctness": "170;5;75;5", "wc_clarity": "197;1;143;8", "wc_relation_to_prior_work": "3;1;62;13", "wc_documentation": "157;1;24;16", "wc_additional_feedback": "1;1;1;1", "wc_review": "595;165;728;169", "wc_reply_reviewers": "384;114;47;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "2;1;1;0", "reply_authors": "3;1;4;1", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "wc_summary_and_contributions_avg": [ 56.25, 21.810261346439663 ], "wc_strengths_avg": [ 34.75, 28.428638729281428 ], "wc_improvement_avg": [ 72.25, 63.60964942522479 ], "wc_limitations_avg": [ 29.75, 30.011456145945335 ], "wc_correctness_avg": [ 63.75, 67.67338841819581 ], "wc_clarity_avg": [ 87.25, 84.95991701973348 ], "wc_relation_to_prior_work_avg": [ 19.75, 24.81305100143874 ], "wc_documentation_avg": [ 49.5, 62.61189982742897 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 414.25, 251.68569188573275 ], "wc_reply_reviewers_avg": [ 136.25, 148.6646814142485 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.25, 1.299038105676658 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.4736842105263159, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8214974896973031252&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Fudan University;Chinese University of Hong Kong", "aff_unique_dep": ";", "aff_unique_url": "https://www.fudan.edu.cn;https://www.cuhk.edu.hk", "aff_unique_abbr": "Fudan;CUHK", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Unlocking the Capabilities of Thought: A Reasoning Boundary Framework to Quantify and Optimize Chain-of-Thought", "status": "Oral", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93575", "id": "pC44UMwy2v", "proceeding": "", "pdf": "https://openreview.net/pdf?id=pC44UMwy2v", "openreview": "https://openreview.net/forum?id=pC44UMwy2v", "poster": "/media/PosterPDFs/NeurIPS%202024/93575.png?t=1731419974.5572975", "project": "", "author_site": "Qiguang Chen, Libo Qin, Jiaqi Wang, Jingxuan Zhou, Wanxiang Che", "tldr": "", "abstract": "Chain-of-Thought (CoT) reasoning has emerged as a promising approach for enhancing the performance of large language models (LLMs) on complex reasoning tasks. Recently, a series of studies attempt to explain the mechanisms underlying CoT, aiming to deepen the understanding of its efficacy. Nevertheless, the existing research faces two major challenges: (1) a lack of quantitative metrics to assess CoT capabilities and (2) a dearth of guidance on optimizing CoT performance. Motivated by this, in this work, we introduce a novel reasoning boundary framework (RBF) to address these challenges. To solve the lack of quantification, we first define a reasoning boundary (RB) to quantify the upper-bound of CoT and establish a combination law for RB, enabling a practical quantitative approach applicable to various real-world CoT tasks. To address the lack of optimization, we propose three categories of RBs. We further optimize these categories with combination laws focused on RB promotion and reasoning path optimization for CoT improvement. Through extensive experiments on 27 models and 5 tasks, the study validates the existence and rationality of the proposed framework. Furthermore, it explains the effectiveness of 10 CoT strategies and guides optimization from two perspectives. We hope this work can provide a comprehensive understanding of the boundaries and optimization strategies for reasoning in LLMs. Our code and data are available at https://github.com/LightChen233/reasoning-boundary.", "keywords": "Chain-of-Thought;Reasoning Granularity;Reasoning Boundary", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Qiguang Chen;Libo Qin;Jiaqi WANG;Jingxuan Zhou;Wanxiang Che", "authorids": "~Qiguang_Chen1;~Libo_Qin1;~Jiaqi_WANG11;~Jingxuan_Zhou2;~Wanxiang_Che1", "gender": "M;;F;M;M", "homepage": "https://scholar.google.com/citations?user=8j8AfF0AAAAJ;;https://github.com/kokolerk;;http://ir.hit.edu.cn/~car/", "dblp": "292/9953;;;;https://dblp.uni-trier.de/pers/hd/c/Che:Wanxiang", "google_scholar": "8j8AfF0AAAAJ;;UKlrS74AAAAJ;https://scholar.google.com/citations?hl=zh-CN;SVlQ6IEAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Qiguang_Chen1;~Libo_Qin1;~Jiaqi_WANG11;~Jingxuan_Zhou2;~Wanxiang_Che1", "aff": "Harbin Institute of Technology;;Department of Computer Science and Engineering, The Chinese University of Hong Kong;Central South University;Harbin Institute of Technology", "aff_domain": "hit.edu.cn;;cse.cuhk.edu.hk;csu.edu.cn;hit.edu.cn", "position": "PhD student;;PhD student;MS student;Full Professor", "bibtex": "@inproceedings{\nchen2024unlocking,\ntitle={Unlocking the Capabilities of Thought: A Reasoning Boundary Framework to Quantify and Optimize Chain-of-Thought},\nauthor={Qiguang Chen and Libo Qin and Jiaqi WANG and Jingxuan Zhou and Wanxiang Che},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=pC44UMwy2v}\n}", "github": "", "reviewers": "MXRP;z3Vj;ynAv;fHpa;oUcy", "pdf_size": 5541304, "rating": "6;7;7;7;8", "confidence": "3;4;3;4;3", "soundness": "3;3;3;4;3", "novelty": "3;3;2;3;4", "presentation": "3;3;3;3;3", "wc_summary": "42;256;42;73;57", "wc_strengths": "77;162;69;52;36", "wc_weaknesses": "79;74;93;55;6", "wc_questions": "4;62;29;96;11", "wc_limitations": "5;66;1;4;1", "wc_review": "207;620;234;280;111", "wc_reply_reviewers": "0;30;33;220;2", "wc_reply_authors": "0;26;145;635;31", "reply_reviewers": "0;1;2;2;1", "reply_authors": "1;2;2;3;2", "rating_avg": [ 7.0, 0.6324555320336759 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 94.0, 81.80709015727182 ], "wc_strengths_avg": [ 79.2, 43.75111427152456 ], "wc_weaknesses_avg": [ 61.4, 30.25623902602569 ], "wc_questions_avg": [ 40.4, 34.284690460903974 ], "wc_limitations_avg": [ 15.4, 25.35034516530298 ], "wc_review_avg": [ 290.4, 173.81898630471875 ], "wc_reply_reviewers_avg": [ 57.0, 82.6413939863069 ], "wc_reply_authors_avg": [ 167.4, 239.07036621045276 ], "reply_reviewers_avg": [ 1.2, 0.7483314773547883 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5795455539638292973&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "hit.edu.cn;;cse.cuhk.edu.hk;csu.edu.cn;hit.edu.cn", "author_num": 5, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Harbin Institute of Technology;Chinese University of Hong Kong;Central South University", "aff_unique_dep": ";Department of Computer Science and Engineering;", "aff_unique_url": "http://www.hit.edu.cn/;https://www.cuhk.edu.hk;https://www.csu.edu.cn", "aff_unique_abbr": "HIT;CUHK;CSU", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Harbin;Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Hamba: Single-view 3D Hand Reconstruction with Graph-guided Bi-Scanning Mamba", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93574", "id": "pCJ0l1JVUX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=pCJ0l1JVUX", "openreview": "https://openreview.net/forum?id=pCJ0l1JVUX", "poster": "/media/PosterPDFs/NeurIPS%202024/93574.png?t=1733598249.4728186", "project": "", "author_site": "Haoye Dong, Aviral Chharia, Wenbo Gou, Francisco Vicente Carrasco, Fernando D De la Torre", "tldr": "", "abstract": "3D Hand reconstruction from a single RGB image is challenging due to the articulated motion, self-occlusion, and interaction with objects. Existing SOTA methods employ attention-based transformers to learn the 3D hand pose and shape, yet they do not fully achieve robust and accurate performance, primarily due to inefficiently modeling spatial relations between joints. To address this problem, we propose a novel graph-guided Mamba framework, named Hamba, which bridges graph learning and state space modeling. Our core idea is to reformulate Mamba's scanning into graph-guided bidirectional scanning for 3D reconstruction using a few effective tokens. This enables us to efficiently learn the spatial relationships between joints for improving reconstruction performance. Specifically, we design a Graph-guided State Space (GSS) block that learns the graph-structured relations and spatial sequences of joints and uses 88.5\\% fewer tokens than attention-based methods. Additionally, we integrate the state space features and the global features using a fusion module. By utilizing the GSS block and the fusion module, Hamba effectively leverages the graph-guided state space features and jointly considers global and local features to improve performance. Experiments on several benchmarks and in-the-wild tests demonstrate that Hamba significantly outperforms existing SOTAs, achieving the PA-MPVPE of 5.3mm and F@15mm of 0.992 on FreiHAND. At the time of this paper's acceptance, Hamba holds the top position, Rank 1, in two competition leaderboards on 3D hand reconstruction.", "keywords": "3D Hand Reconstruction;Mamba;State Space Model", "primary_area": "machine_vision", "supplementary_material": "", "author": "Haoye Dong;Aviral Chharia;Wenbo Gou;Francisco Vicente Carrasco;Fernando De la Torre", "authorids": "~Haoye_Dong1;~Aviral_Chharia2;~Wenbo_Gou1;~Francisco_Vicente_Carrasco1;~Fernando_De_la_Torre2", "gender": "M;;M;Not Specified;", "homepage": "https://www.haoyed.com;;https://github.com/MakingBigNewsWallace;https://www.linkedin.com/in/francisco-vicente-carrasco-32a508144/;", "dblp": "163/4089;;;;", "google_scholar": "acsao-8AAAAJ;;;3elKp9wAAAAJ;", "orcid": ";;;;", "linkedin": ";;wenbogou/;;", "or_profile": "~Haoye_Dong1;~Aviral_Chharia2;~Wenbo_Gou1;~Francisco_Vicente_Carrasco1;~Fernando_De_la_Torre2", "aff": "Carnegie Mellon University;;Carnegie Mellon University;Carnegie Mellon University;", "aff_domain": "cmu.edu;;cmu.edu;andrew.cmu.edu;", "position": "Postdoc;;MS student;Researcher;", "bibtex": "@inproceedings{\ndong2024hamba,\ntitle={Hamba: Single-view 3D Hand Reconstruction with Graph-guided Bi-Scanning Mamba},\nauthor={Haoye Dong and Aviral Chharia and Wenbo Gou and Francisco Vicente Carrasco and Fernando De la Torre},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=pCJ0l1JVUX}\n}", "github": "", "reviewers": "i6yS;TVui;shEo", "pdf_size": 16911899, "rating": "4;5;6", "confidence": "5;4;4", "soundness": "2;3;4", "novelty": "2;3;4", "presentation": "2;3;4", "wc_summary": "60;138;79", "wc_strengths": "9;50;140", "wc_weaknesses": "203;82;98", "wc_questions": "75;47;50", "wc_limitations": "51;12;5", "wc_review": "398;329;372", "wc_reply_reviewers": "248;44;28", "wc_reply_authors": "329;157;23", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 5.0, 0.816496580927726 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 92.33333333333333, 33.209770985191824 ], "wc_strengths_avg": [ 66.33333333333333, 54.713394663058104 ], "wc_weaknesses_avg": [ 127.66666666666667, 53.667701853370076 ], "wc_questions_avg": [ 57.333333333333336, 12.552113589175153 ], "wc_limitations_avg": [ 22.666666666666668, 20.237478982214054 ], "wc_review_avg": [ 366.3333333333333, 28.4526897771644 ], "wc_reply_reviewers_avg": [ 106.66666666666667, 100.15099711047425 ], "wc_reply_authors_avg": [ 169.66666666666666, 125.24464947542559 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15990901881981840220&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "cmu.edu;;cmu.edu;andrew.cmu.edu;", "author_num": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "The Empirical Impact of Neural Parameter Symmetries, or Lack Thereof", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93573", "id": "pCVxYw6FKg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=pCVxYw6FKg", "openreview": "https://openreview.net/forum?id=pCVxYw6FKg", "poster": "", "project": "", "author_site": "Derek Lim, Theo Putterman, Robin Walters, Haggai Maron, Stefanie Jegelka", "tldr": "", "abstract": "Many algorithms and observed phenomena in deep learning appear to be affected by parameter symmetries --- transformations of neural network parameters that do not change the underlying neural network function. These include linear mode connectivity, model merging, Bayesian neural network inference, metanetworks, and several other characteristics of optimization or loss-landscapes. However, theoretical analysis of the relationship between parameter space symmetries and these phenonmena is difficult. In this work, we empirically investigate the impact of neural parameter symmetries by introducing new neural network architectures that have reduced parameter space symmetries. We develop two methods, with some provable guarantees, of modifying standard neural networks to reduce parameter space symmetries. With these new methods, we conduct a comprehensive experimental study consisting of multiple tasks aimed at assessing the effect of removing parameter symmetries. Our experiments reveal several interesting observations on the empirical impact of parameter symmetries; for instance, we observe linear mode connectivity between our networks without alignment of weight spaces, and we find that our networks allow for faster and more effective Bayesian neural network training.", "keywords": "Parameter symmetry;loss landscapes;identifiability;mode connectivity;Bayesian neural networks", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Derek Lim;Theo Putterman;Robin Walters;Haggai Maron;Stefanie Jegelka", "authorids": "~Derek_Lim1;~Theo_Putterman1;~Robin_Walters1;~Haggai_Maron1;~Stefanie_Jegelka3", "gender": "M;M;M;M;F", "homepage": "https://cptq.github.io/;https://sites.google.com/berkeley.edu/moeputterman/home?authuser=0;http://www.robinwalters.com;https://haggaim.github.io/;http://people.csail.mit.edu/stefje/", "dblp": "267/5433;;258/3416;181/6629;38/7003", "google_scholar": "y9YTBIsAAAAJ;;fnprJmUAAAAJ;https://scholar.google.co.il/citations?user=4v8uJrIAAAAJ;gTWUZlsAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Derek_Lim1;~Theo_Putterman1;~Robin_Walters1;~Haggai_Maron1;~Stefanie_Jegelka3", "aff": "Liquid AI;University of California, Berkeley;Northeastern University ;NVIDIA;Massachusetts Institute of Technology", "aff_domain": "liquid.ai;berkeley.edu;northeastern.edu;nvidia.com;mit.edu", "position": "Researcher;Undergrad student;Assistant Professor;Research Scientist;Associate Professor", "bibtex": "@inproceedings{\nlim2024the,\ntitle={The Empirical Impact of Neural Parameter Symmetries, or Lack Thereof},\nauthor={Derek Lim and Theo Putterman and Robin Walters and Haggai Maron and Stefanie Jegelka},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=pCVxYw6FKg}\n}", "github": "", "reviewers": "ceUs;hbBt;tX9W;ajZW", "pdf_size": 862273, "rating": "5;6;7;7", "confidence": "4;4;4;4", "soundness": "1;3;3;3", "novelty": "2;2;4;4", "presentation": "2;4;4;3", "wc_summary": "47;110;86;137", "wc_strengths": "22;97;65;77", "wc_weaknesses": "86;196;240;401", "wc_questions": "20;1;98;219", "wc_limitations": "94;1;122;12", "wc_review": "269;405;611;846", "wc_reply_reviewers": "487;90;206;72", "wc_reply_authors": "624;372;68;0", "reply_reviewers": "3;1;4;1", "reply_authors": "4;2;3;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 3.0, 1.0 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 95.0, 33.0681115275729 ], "wc_strengths_avg": [ 65.25, 27.462474396892937 ], "wc_weaknesses_avg": [ 230.75, 113.17105416138881 ], "wc_questions_avg": [ 84.5, 85.7394308355263 ], "wc_limitations_avg": [ 57.25, 51.85255538543882 ], "wc_review_avg": [ 532.75, 218.02107122936536 ], "wc_reply_reviewers_avg": [ 213.75, 165.9312734236678 ], "wc_reply_authors_avg": [ 266.0, 249.67979493743582 ], "reply_reviewers_avg": [ 2.25, 1.299038105676658 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12196085782569556378&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "liquid.ai;berkeley.edu;northeastern.edu;nvidia.com;mit.edu", "author_num": 5, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "Liquid AI;University of California, Berkeley;Northeastern University;NVIDIA;Massachusetts Institute of Technology", "aff_unique_dep": ";;;NVIDIA Corporation;", "aff_unique_url": ";https://www.berkeley.edu;https://www.northeastern.edu;https://www.nvidia.com;https://web.mit.edu", "aff_unique_abbr": ";UC Berkeley;NEU;NVIDIA;MIT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;1;1;1;1", "aff_country_unique": "Unknown;United States" }, { "title": "Constrained Latent Action Policies for Model-Based Offline Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93572", "id": "pEhvscmSgG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=pEhvscmSgG", "openreview": "https://openreview.net/forum?id=pEhvscmSgG", "poster": "/media/PosterPDFs/NeurIPS%202024/93572.png?t=1731320815.3761778", "project": "", "author_site": "Marvin Alles, Philip Becker-Ehmck, Patrick van der Smagt, Maximilian Karl", "tldr": "", "abstract": "In offline reinforcement learning, a policy is learned using a static dataset in the absence of costly feedback from the environment. In contrast to the online setting, only using static datasets poses additional challenges, such as policies generating out-of-distribution samples. Model-based offline reinforcement learning methods try to overcome these by learning a model of the underlying dynamics of the environment and using it to guide policy search. It is beneficial but, with limited datasets, errors in the model and the issue of value overestimation among out-of-distribution states can worsen performance. Current model-based methods apply some notion of conservatism to the Bellman update, often implemented using uncertainty estimation derived from model ensembles. In this paper, we propose Constrained Latent Action Policies (C-LAP) which learns a generative model of the joint distribution of observations and actions. We cast policy learning as a constrained objective to always stay within the support of the latent action distribution, and use the generative capabilities of the model to impose an implicit constraint on the generated actions. Thereby eliminating the need to use additional uncertainty penalties on the Bellman update and significantly decreasing the number of gradient steps required to learn a policy. We empirically evaluate C-LAP on the D4RL and V-D4RL benchmark, and show that C-LAP is competitive to state-of-the-art methods, especially outperforming on datasets with visual observations.", "keywords": "Offline Reinforcement Learning;Model-Based Reinforcement Learning;Latent Action;Constrained Policy;Generative Model;Applied Reinforcement Learning", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Marvin Alles;Philip Becker-Ehmck;Patrick van der Smagt;Maximilian Karl", "authorids": "~Marvin_Alles1;~Philip_Becker-Ehmck1;~Patrick_van_der_Smagt1;~Maximilian_Karl1", "gender": "M;M;M;M", "homepage": ";;https://argmax.org;", "dblp": ";241/9813;24/6573.html;", "google_scholar": ";https://scholar.google.de/citations?user=eFLK7OYAAAAJ;https://scholar.google.de/citations?user=5ybzvbsAAAAJ;noekAeoAAAAJ", "orcid": ";;0000-0003-4418-4916;0000-0001-8959-368X", "linkedin": "marvin-alles/;;smagt/;", "or_profile": "~Marvin_Alles1;~Philip_Becker-Ehmck1;~Patrick_van_der_Smagt1;~Maximilian_Karl1", "aff": "Technical University of Munich;Volkswagen Group;Machine Learning Research Lab; Volkswagen Group;Machine Learning Research Lab, Volkswagen Group", "aff_domain": "tum.de;volkswagen.de;volkswagen.de;volkswagen.de", "position": "PhD student;Researcher;Full Professor;AI research scientist", "bibtex": "@inproceedings{\nalles2024constrained,\ntitle={Constrained Latent Action Policies for Model-Based Offline Reinforcement Learning},\nauthor={Marvin Alles and Philip Becker-Ehmck and Patrick van der Smagt and Maximilian Karl},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=pEhvscmSgG}\n}", "github": "", "reviewers": "6cNB;UrFS;932j", "pdf_size": 2267695, "rating": "5;6;7", "confidence": "4;4;4", "soundness": "2;3;2", "novelty": "3;3;3", "presentation": "3;4;4", "wc_summary": "66;118;143", "wc_strengths": "64;179;157", "wc_weaknesses": "251;135;541", "wc_questions": "5;101;17", "wc_limitations": "10;21;225", "wc_review": "396;554;1083", "wc_reply_reviewers": "20;29;23", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 109.0, 32.072833779799794 ], "wc_strengths_avg": [ 133.33333333333334, 49.84197249529981 ], "wc_weaknesses_avg": [ 309.0, 170.74737674900504 ], "wc_questions_avg": [ 41.0, 42.708313008125245 ], "wc_limitations_avg": [ 85.33333333333333, 98.86129452700666 ], "wc_review_avg": [ 677.6666666666666, 293.78261048302744 ], "wc_reply_reviewers_avg": [ 24.0, 3.7416573867739413 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:LRGHYqt0Z7oJ:scholar.google.com/&scioq=Constrained+Latent+Action+Policies+for+Model-Based+Offline+Reinforcement+Learning&hl=en&as_sdt=0,44", "gs_version_total": 5, "email": "tum.de;volkswagen.de;volkswagen.de;volkswagen.de", "author_num": 4, "aff_unique_index": "0;1;2;1;1", "aff_unique_norm": "Technical University of Munich;Volkswagen Group;Machine Learning Research Lab", "aff_unique_dep": ";;Machine Learning Research", "aff_unique_url": "https://www.tum.de;https://www.volkswagenag.com;", "aff_unique_abbr": "TUM;VW Group;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany;" }, { "id": "pF5QjxzwHi", "title": "Continuous Perception Benchmark", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "Humans continuously perceive and process visual signals. However, current video models typically either sample key frames sparsely or divide videos into chunks and densely sample within each chunk. This approach stems from the fact that most existing video benchmarks can be addressed by analyzing key frames or aggregating information from separate chunks. We anticipate that the next generation of vision models will emulate human perception by processing visual input continuously and holistically. To facilitate the development of such models, we propose the Continuous Perception Benchmark, a video question answering task that cannot be solved by focusing solely on a few frames or by captioning small chunks and then summarizing using language models. Extensive experiments demonstrate that existing vision models, whether commercial or open-source, struggle with these tasks, indicating the need for new technical advancements in this direction.", "keywords": "Video understanding;Continuous perception", "primary_area": "", "supplementary_material": "/attachment/2d86b483751a69af96e0447250e75c4ab770220a.pdf", "author": "Zeyu Wang;Zhenzhen Weng;Serena Yeung-Levy", "authorids": "~Zeyu_Wang1;~Zhenzhen_Weng1;~Serena_Yeung-Levy1", "gender": ";F;F", "homepage": ";https://zzweng.github.io/;http://ai.stanford.edu/~syyeung/", "dblp": "132/7882-4.html;248/3274;147/5023", "google_scholar": "https://scholar.google.com/citations?hl=en;diDrNrgAAAAJ;Tw2m5kUAAAAJ", "orcid": "0000-0002-7057-1613;0009-0004-1108-4155;0000-0003-0529-0628", "linkedin": ";;", "or_profile": "~Zeyu_Wang1;~Zhenzhen_Weng1;~Serena_Yeung1", "aff": "Stanford University;Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu", "position": "Postdoc;PhD student;Assistant Professor", "bibtex": "@misc{\nanonymous2024continuous,\ntitle={Continuous Perception Benchmark},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=pF5QjxzwHi}\n}", "github": "", "project": "", "reviewers": "L2mZ;CGQb;zVtE;Ric1", "site": "https://openreview.net/forum?id=pF5QjxzwHi", "pdf_size": 5098328, "rating": "4;4;6;6", "confidence": "3;4;4;4", "wc_summary_and_contributions": "38;32;131;54", "wc_strengths": "13;51;64;73", "wc_improvement": "116;104;10;88", "wc_limitations": "6;1;14;8", "wc_correctness": "2;5;57;12", "wc_clarity": "2;8;32;1", "wc_relation_to_prior_work": "2;8;107;1", "wc_documentation": "2;1;17;13", "wc_additional_feedback": "1;1;1;1", "wc_review": "182;211;433;251", "wc_reply_reviewers": "0;319;624;9", "wc_reply_authors": "0;56;0;0", "reply_reviewers": "0;2;2;1", "reply_authors": "1;3;3;1", "rating_avg": [ 5.0, 1.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 63.75, 39.65081966365891 ], "wc_strengths_avg": [ 50.25, 22.884219453588535 ], "wc_improvement_avg": [ 79.5, 41.33702940463913 ], "wc_limitations_avg": [ 7.25, 4.656984002549289 ], "wc_correctness_avg": [ 19.0, 22.237355957937087 ], "wc_clarity_avg": [ 10.75, 12.55736835487436 ], "wc_relation_to_prior_work_avg": [ 29.5, 44.8246583924518 ], "wc_documentation_avg": [ 8.25, 6.905613658466566 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 269.25, 97.66364472002876 ], "wc_reply_reviewers_avg": [ 238.0, 257.21683459680474 ], "wc_reply_authors_avg": [ 14.0, 24.24871130596428 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 2.0, 1.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:lfnHC2f38w0J:scholar.google.com/&scioq=Continuous+Perception+Benchmark&hl=en&as_sdt=0,5", "gs_version_total": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Faster Accelerated First-order Methods for Convex Optimization with Strongly Convex Function Constraints", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93571", "id": "pG380vLYRU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=pG380vLYRU", "openreview": "https://openreview.net/forum?id=pG380vLYRU", "poster": "/media/PosterPDFs/NeurIPS%202024/93571.png?t=1731120189.5297647", "project": "", "author_site": "zhenwei lin, Qi Deng", "tldr": "", "abstract": "In this paper, we introduce faster accelerated primal-dual algorithms for minimizing a convex function subject to strongly convex function constraints. \nPrior to our work, the best complexity bound was $\\mathcal{O}(1/{\\varepsilon})$, regardless of the strong convexity of the constraint function.\nIt is unclear whether the strong convexity assumption can enable even better convergence results. \nTo address this issue, we have developed novel techniques to progressively estimate the strong convexity of the Lagrangian function.\nOur approach, for the first time, effectively leverages the constraint strong convexity, obtaining an improved complexity of $\\mathcal{O}(1/\\sqrt{\\varepsilon})$. This rate matches the complexity lower bound for strongly-convex-concave saddle point optimization and is therefore order-optimal.\nWe show the superior performance of our methods in sparsity-inducing constrained optimization, notably Google's personalized PageRank problem. Furthermore, we show that a restarted version of the proposed methods can effectively identify the optimal solution's sparsity pattern within a finite number of steps, a result that appears to have independent significance.", "keywords": "Convex optimization; Accelerated primal dual algorithm; Sparse Optimization", "primary_area": "optimization", "supplementary_material": "/attachment/7f254ffb02516109c7485761ea0f4757fadbfcd4.zip", "author": "Zhenwei Lin;Qi Deng", "authorids": "~Zhenwei_Lin3;~Qi_Deng1", "gender": "M;M", "homepage": "https://github.com/zhenweilin;http://sime.shufe.edu.cn/teacher/show/225", "dblp": "309/0132;", "google_scholar": "Bq-FHQsAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";", "linkedin": ";", "or_profile": "~Zhenwei_Lin3;~Qi_Deng1", "aff": "Shanghai University of Finance and Economics;Shanghai University of Finance and Economics", "aff_domain": "shufe.edu;sufe.edu.cn", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nlin2024faster,\ntitle={Faster Accelerated First-order Methods for Convex Optimization with Strongly Convex Function Constraints},\nauthor={Zhenwei Lin and Qi Deng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=pG380vLYRU}\n}", "github": "", "reviewers": "h3VH;rJ7a;8Vg8;ecU7", "pdf_size": 733359, "rating": "5;6;6;7", "confidence": "5;3;4;3", "soundness": "3;3;2;3", "novelty": "2;3;3;3", "presentation": "2;3;1;3", "wc_summary": "18;82;56;98", "wc_strengths": "58;65;20;76", "wc_weaknesses": "232;43;1456;58", "wc_questions": "2;38;118;112", "wc_limitations": "1;1;7;51", "wc_review": "311;229;1657;395", "wc_reply_reviewers": "120;0;869;316", "wc_reply_authors": "317;0;1790;767", "reply_reviewers": "2;0;3;2", "reply_authors": "2;1;6;3", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 63.5, 30.244834269673227 ], "wc_strengths_avg": [ 54.75, 21.063890903629368 ], "wc_weaknesses_avg": [ 447.25, 587.1206754151995 ], "wc_questions_avg": [ 67.5, 49.221438418640304 ], "wc_limitations_avg": [ 15.0, 20.92844953645635 ], "wc_review_avg": [ 648.0, 585.4955166352685 ], "wc_reply_reviewers_avg": [ 326.25, 333.0393182493623 ], "wc_reply_authors_avg": [ 718.5, 676.0009245555808 ], "reply_reviewers_avg": [ 1.75, 1.0897247358851685 ], "reply_authors_avg": [ 3.0, 1.8708286933869707 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.8528028654224418, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:QFa9Cu1b2PIJ:scholar.google.com/&scioq=Faster+Accelerated+First-order+Methods+for+Convex+Optimization+with+Strongly+Convex+Function+Constraints&hl=en&as_sdt=0,33", "gs_version_total": 0, "email": "shufe.edu;sufe.edu.cn", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Shanghai University of Finance and Economics", "aff_unique_dep": "", "aff_unique_url": "http://www.sufe.edu.cn", "aff_unique_abbr": "SUFE", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Span-Based Optimal Sample Complexity for Weakly Communicating and General Average Reward MDPs", "status": "Oral", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93570", "id": "pGEY8JQ3qx", "proceeding": "", "pdf": "https://openreview.net/pdf?id=pGEY8JQ3qx", "openreview": "https://openreview.net/forum?id=pGEY8JQ3qx", "poster": "/media/PosterPDFs/NeurIPS%202024/93570.png?t=1731737688.6848917", "project": "", "author_site": "Matthew Zurek, Yudong Chen", "tldr": "", "abstract": "We study the sample complexity of learning an $\\varepsilon$-optimal policy in an average-reward Markov decision process (MDP) under a generative model. For weakly communicating MDPs, we establish the complexity bound $\\widetilde{O}\\left(SA\\frac{\\mathsf{H}}{\\varepsilon^2} \\right)$, where $\\mathsf{H}$ is the span of the bias function of the optimal policy and $SA$ is the cardinality of the state-action space. Our result is the first that is minimax optimal (up to log factors) in all parameters $S,A,\\mathsf{H}$, and $\\varepsilon$, improving on existing work that either assumes uniformly bounded mixing times for all policies or has suboptimal dependence on the parameters. We also initiate the study of sample complexity in general (multichain) average-reward MDPs. We argue a new transient time parameter $\\mathsf{B}$ is necessary, establish an $\\widetilde{O}\\left(SA\\frac{\\mathsf{B} + \\mathsf{H}}{\\varepsilon^2} \\right)$ complexity bound, and prove a matching (up to log factors) minimax lower bound. Both results are based on reducing the average-reward MDP to a discounted MDP, which requires new ideas in the general setting. To optimally analyze this reduction, we develop improved bounds for $\\gamma$-discounted MDPs, showing that $\\widetilde{O}\\left(SA\\frac{\\mathsf{H}}{(1-\\gamma)^2\\varepsilon^2} \\right)$ and $\\widetilde{O}\\left(SA\\frac{\\mathsf{B} + \\mathsf{H}}{(1-\\gamma)^2\\varepsilon^2} \\right)$ samples suffice to learn $\\varepsilon$-optimal policies in weakly communicating and in general MDPs, respectively. Both these results circumvent the well-known minimax lower bound of $\\widetilde{\\Omega}\\left(SA\\frac{1}{(1-\\gamma)^3\\varepsilon^2} \\right)$ for $\\gamma$-discounted MDPs, and establish a quadratic rather than cubic horizon dependence for a fixed MDP instance.", "keywords": "reinforcement learning theory;average reward;sample complexity", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Matthew Zurek;Yudong Chen", "authorids": "~Matthew_Zurek1;~Yudong_Chen1", "gender": ";M", "homepage": ";https://pages.cs.wisc.edu/~yudongchen/", "dblp": ";15/1975-1", "google_scholar": ";ze5rCdwAAAAJ", "orcid": ";0000-0002-6416-5635", "linkedin": ";", "or_profile": "~Matthew_Zurek1;~Yudong_Chen1", "aff": ";Department of Computer Sciences, University of Wisconsin - Madison", "aff_domain": ";cs.wisc.edu", "position": ";Associate Professor", "bibtex": "@inproceedings{\nzurek2024spanbased,\ntitle={Span-Based Optimal Sample Complexity for Weakly Communicating and General Average Reward {MDP}s},\nauthor={Matthew Zurek and Yudong Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=pGEY8JQ3qx}\n}", "github": "", "reviewers": "hsZR;AFus;i8Mw;6ECs", "pdf_size": 522165, "rating": "7;7;7;10", "confidence": "3;3;3;4", "soundness": "3;4;3;4", "novelty": "3;3;3;4", "presentation": "4;4;3;4", "wc_summary": "100;39;14;99", "wc_strengths": "98;33;44;15", "wc_weaknesses": "40;13;119;1", "wc_questions": "69;119;71;1", "wc_limitations": "1;73;48;1", "wc_review": "308;277;296;117", "wc_reply_reviewers": "9;13;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 7.75, 1.299038105676658 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 63.0, 37.556623916427846 ], "wc_strengths_avg": [ 47.5, 30.939457008809963 ], "wc_weaknesses_avg": [ 43.25, 45.95854109956059 ], "wc_questions_avg": [ 65.0, 42.02380277890139 ], "wc_limitations_avg": [ 30.75, 31.03526220285564 ], "wc_review_avg": [ 249.5, 77.29327266974792 ], "wc_reply_reviewers_avg": [ 5.5, 5.678908345800274 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7989803427005154347&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 5, "email": ";cs.wisc.edu", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "University of Wisconsin-Madison", "aff_unique_dep": "Department of Computer Sciences", "aff_unique_url": "https://www.wisc.edu", "aff_unique_abbr": "UW-Madison", "aff_campus_unique_index": "0", "aff_campus_unique": "Madison", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Mixture of Scales: Memory-Efficient Token-Adaptive Binarization for Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93569", "id": "pGOBEYcXzs", "proceeding": "", "pdf": "https://openreview.net/pdf?id=pGOBEYcXzs", "openreview": "https://openreview.net/forum?id=pGOBEYcXzs", "poster": "/media/PosterPDFs/NeurIPS%202024/93569.png?t=1731487432.7979915", "project": "", "author_site": "Dongwon Jo, Taesu Kim, Yulhwa Kim, jae-joon kim", "tldr": "", "abstract": "Binarization, which converts weight parameters to binary values, has emerged as an effective strategy to reduce the size of large language models (LLMs). However, typical binarization techniques significantly diminish linguistic effectiveness of LLMs.\nTo address this issue, we introduce a novel binarization technique called Mixture of Scales (BinaryMoS). Unlike conventional methods, BinaryMoS employs multiple scaling experts for binary weights, dynamically merging these experts for each token to adaptively generate scaling factors. This token-adaptive approach boosts the representational power of binarized LLMs by enabling contextual adjustments to the values of binary weights. Moreover, because this adaptive process only involves the scaling factors rather than the entire weight matrix, BinaryMoS maintains compression efficiency similar to traditional static binarization methods. Our experimental results reveal that BinaryMoS surpasses conventional binarization techniques in various natural language processing tasks and even outperforms 2-bit quantization methods, all while maintaining similar model size to static binarization techniques.", "keywords": "Large Language Models;Binarization;Quantization;Model Compression;Efficient LLM", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/a998ffdc4e689a84d6bb97f427dc7a28935c31db.zip", "author": "Dongwon Jo;Taesu Kim;Yulhwa Kim;Jae-Joon Kim", "authorids": "~Dongwon_Jo1;~Taesu_Kim1;~Yulhwa_Kim1;~Jae-Joon_Kim2", "gender": "M;M;;M", "homepage": "https://vlsi.snu.ac.kr;;https://eic.skku.edu/;http://vlsi.snu.ac.kr", "dblp": ";44/6997;223/9434;", "google_scholar": "https://scholar.google.co.kr/citations?user=GA_k0xAAAAAJ;zzII2gsAAAAJ;VRkM404AAAAJ;Ee994T0AAAAJ", "orcid": ";;0000-0003-3735-821X;", "linkedin": "dongwon-jo-36378b244/;;;", "or_profile": "~Dongwon_Jo1;~Taesu_Kim1;~Yulhwa_Kim1;~jae-joon_kim1", "aff": "Seoul National University;SqueezeBits Inc.;Seoul National University;Seoul National University", "aff_domain": "snu.ac.kr;squeezebits.com;snu.ac.kr;snu.ac.kr", "position": "PhD student;Researcher;Researcher;Full Professor", "bibtex": "@inproceedings{\njo2024mixture,\ntitle={Mixture of Scales: Memory-Efficient Token-Adaptive Binarization for Large Language Models},\nauthor={Dongwon Jo and Taesu Kim and Yulhwa Kim and Jae-Joon Kim},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=pGOBEYcXzs}\n}", "github": "", "reviewers": "fpKa;iCrR;s3b8;scP1", "pdf_size": 871346, "rating": "3;7;7;7", "confidence": "4;4;4;3", "soundness": "2;3;3;3", "novelty": "2;2;3;2", "presentation": "2;4;3;3", "wc_summary": "95;91;95;66", "wc_strengths": "11;50;33;78", "wc_weaknesses": "82;47;16;168", "wc_questions": "62;40;192;2", "wc_limitations": "1;10;11;56", "wc_review": "251;238;347;370", "wc_reply_reviewers": "259;0;10;13", "wc_reply_authors": "475;0;0;14", "reply_reviewers": "2;0;1;1", "reply_authors": "2;1;1;2", "rating_avg": [ 6.0, 1.7320508075688772 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 86.75, 12.090802289343747 ], "wc_strengths_avg": [ 43.0, 24.48468909339059 ], "wc_weaknesses_avg": [ 78.25, 56.83473849680317 ], "wc_questions_avg": [ 74.0, 71.42828571371429 ], "wc_limitations_avg": [ 19.5, 21.43011899173684 ], "wc_review_avg": [ 301.5, 57.76028047023318 ], "wc_reply_reviewers_avg": [ 70.5, 108.93690834606974 ], "wc_reply_authors_avg": [ 122.25, 203.74049057563397 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8172614852757300662&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "snu.ac.kr;squeezebits.com;snu.ac.kr;snu.ac.kr", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Seoul National University;SqueezeBits Inc.", "aff_unique_dep": ";", "aff_unique_url": "https://www.snu.ac.kr;", "aff_unique_abbr": "SNU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "South Korea;United States" }, { "title": "Learning on Large Graphs using Intersecting Communities", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93568", "id": "pGR5X4e1gy", "proceeding": "", "pdf": "https://openreview.net/pdf?id=pGR5X4e1gy", "openreview": "https://openreview.net/forum?id=pGR5X4e1gy", "poster": "/media/PosterPDFs/NeurIPS%202024/93568.png?t=1731295281.3271208", "project": "", "author_site": "Ben Finkelshtein, Ismail Ceylan, Michael Bronstein, Ron Levie", "tldr": "", "abstract": "Message Passing Neural Networks (MPNNs) are a staple of graph machine learning. MPNNs iteratively update each node\u2019s representation in an input graph by aggregating messages from the node\u2019s neighbors, which necessitates a memory complexity of the order of the __number of graph edges__. This complexity might quickly become prohibitive for large graphs provided they are not very sparse. In this paper, we propose a novel approach to alleviate this problem by approximating the input graph as an intersecting community graph (ICG) -- a combination of intersecting cliques. The key insight is that the number of communities required to approximate a graph __does not depend on the graph size__. We develop a new constructive version of the Weak Graph Regularity Lemma to efficiently construct an approximating ICG for any input graph. We then devise an efficient graph learning algorithm operating directly on ICG in linear memory and time with respect to the __number of nodes__ (rather than edges). This offers a new and fundamentally different pipeline for learning on very large non-sparse graphs, whose applicability is demonstrated empirically on node classification tasks and spatio-temporal data processing.", "keywords": "graph representation learning;graph neural networks;regularity lemma;intersecting communities;graphon;scalability", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Ben Finkelshtein;Ismail Ilkan Ceylan;Michael M. Bronstein;Ron Levie", "authorids": "~Ben_Finkelshtein1;~Ismail_Ilkan_Ceylan2;~Michael_M._Bronstein1;~Ron_Levie1", "gender": "M;;M;", "homepage": "https://benfinkelshtein.github.io/;https://www.cs.ox.ac.uk/people/ismaililkan.ceylan/;http://www.inf.usi.ch/bronstein/;", "dblp": "278/2449.html;147/6111;07/2668;", "google_scholar": "goWM7VwAAAAJ;avJ5kQcAAAAJ;UU3N6-UAAAAJ;", "orcid": ";0000-0003-4118-4689;;", "linkedin": "ben-finkelshtein/;;mbronstein/;", "or_profile": "~Ben_Finkelshtein1;~Ismail_Ilkan_Ceylan2;~Michael_M._Bronstein1;~Ron_Levie1", "aff": "University of Oxford;University of Oxford;University of Oxford;", "aff_domain": "cs.ox.ac.uk;oxford.ac.uk;ox.ac.uk;", "position": "PhD student;Assistant Professor;Full Professor;", "bibtex": "@inproceedings{\nfinkelshtein2024learning,\ntitle={Learning on Large Graphs using Intersecting Communities},\nauthor={Ben Finkelshtein and Ismail Ilkan Ceylan and Michael M. Bronstein and Ron Levie},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=pGR5X4e1gy}\n}", "github": "", "reviewers": "XnC3;2roR;hec5;Z1J6", "pdf_size": 2443591, "rating": "5;5;6;6", "confidence": "4;3;4;3", "soundness": "3;3;2;3", "novelty": "2;4;3;3", "presentation": "3;3;3;3", "wc_summary": "123;96;66;222", "wc_strengths": "74;93;63;153", "wc_weaknesses": "149;171;332;526", "wc_questions": "55;53;87;131", "wc_limitations": "1;23;6;36", "wc_review": "402;436;554;1068", "wc_reply_reviewers": "50;45;36;257", "wc_reply_authors": "238;114;135;135", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;3;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 126.75, 58.572070989508305 ], "wc_strengths_avg": [ 95.75, 34.751798514609284 ], "wc_weaknesses_avg": [ 294.5, 151.17952903749898 ], "wc_questions_avg": [ 81.5, 31.603006186120965 ], "wc_limitations_avg": [ 16.5, 13.901438774457844 ], "wc_review_avg": [ 615.0, 267.5537329210714 ], "wc_reply_reviewers_avg": [ 97.0, 92.51216136270949 ], "wc_reply_authors_avg": [ 155.5, 48.396797414705034 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6468638069832976775&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "cs.ox.ac.uk;oxford.ac.uk;ox.ac.uk;", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Oxford", "aff_unique_dep": "", "aff_unique_url": "https://www.ox.ac.uk", "aff_unique_abbr": "Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Speculative Decoding with CTC-based Draft Model for LLM Inference Acceleration", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93567", "id": "pGeAcYhnN5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=pGeAcYhnN5", "openreview": "https://openreview.net/forum?id=pGeAcYhnN5", "poster": "/media/PosterPDFs/NeurIPS%202024/93567.png?t=1731324357.3664138", "project": "", "author_site": "Zhuofan Wen, Shangtong Gui, Yang Feng", "tldr": "", "abstract": "Inference acceleration of large language models (LLMs) has been put forward in many application scenarios and speculative decoding has shown its advantage in addressing inference acceleration. Speculative decoding usually introduces a draft model to assist the base LLM where the draft model produces drafts and the base LLM verifies the draft for acceptance or rejection. In this framework, the final inference speed is decided by the decoding speed of the draft model and the acceptance rate of the draft provided by the draft model. Currently the widely used draft models usually generate draft tokens for the next several positions in a non-autoregressive way without considering the correlations between draft tokens. Therefore, it has a high decoding speed but an unsatisfactory acceptance rate. In this paper, we focus on how to improve the performance of the draft model and aim to accelerate inference via a high acceptance rate. To this end, we propose a CTC-based draft model which strengthens the correlations between draft tokens during the draft phase, thereby generating higher-quality draft candidate sequences. Experiment results show that compared to strong baselines, the proposed method can achieve a higher acceptance rate and hence a faster inference speed.", "keywords": "Pretrained language model;Speculative decoding;CTC decoding algorithm", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/1732507d4d60f03c8b9aa4ae8b3f8ee1cae85ef9.zip", "author": "Zhuofan Wen;Shangtong Gui;Yang Feng", "authorids": "~Zhuofan_Wen2;~Shangtong_Gui1;~Yang_Feng4", "gender": "M;M;", "homepage": ";;http://people.ucas.edu.cn/~yangfeng?language=en", "dblp": ";342/3895.html;07/6095-4.html", "google_scholar": ";OZ0ZTxUAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0009-0002-3444-5641;;", "linkedin": ";%E5%B0%9A%E5%BD%A4-%E6%A1%82-9598a6199/;", "or_profile": "~Zhuofan_Wen2;~Shangtong_Gui1;~Yang_Feng4", "aff": "University of Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences", "aff_domain": "ucas.ac.cn;ict.ac.cn;ict.ac.cn", "position": "Undergrad student;MS student;Full Professor", "bibtex": "@inproceedings{\nwen2024speculative,\ntitle={Speculative Decoding with {CTC}-based Draft Model for {LLM} Inference Acceleration},\nauthor={Zhuofan Wen and Shangtong Gui and Yang Feng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=pGeAcYhnN5}\n}", "github": "", "reviewers": "EXv7;NRvU;6n7r", "pdf_size": 712054, "rating": "5;6;6", "confidence": "3;3;4", "soundness": "2;3;3", "novelty": "2;3;3", "presentation": "3;3;2", "wc_summary": "83;101;39", "wc_strengths": "44;35;17", "wc_weaknesses": "67;135;65", "wc_questions": "57;113;3", "wc_limitations": "30;6;31", "wc_review": "281;390;155", "wc_reply_reviewers": "16;203;17", "wc_reply_authors": "785;832;813", "reply_reviewers": "1;2;1", "reply_authors": "3;4;4", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 74.33333333333333, 26.042699979499478 ], "wc_strengths_avg": [ 32.0, 11.224972160321824 ], "wc_weaknesses_avg": [ 89.0, 32.53715824509981 ], "wc_questions_avg": [ 57.666666666666664, 44.90978611493144 ], "wc_limitations_avg": [ 22.333333333333332, 11.55662388223981 ], "wc_review_avg": [ 275.3333333333333, 96.02198822260566 ], "wc_reply_reviewers_avg": [ 78.66666666666667, 87.91789098673577 ], "wc_reply_authors_avg": [ 810.0, 19.30457631409368 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 3.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5265412547955085542&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "ucas.ac.cn;ict.ac.cn;ict.ac.cn", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "University of Chinese Academy of Sciences;Chinese Academy of Sciences", "aff_unique_dep": ";Institute of Computing Technology", "aff_unique_url": "http://www.ucas.ac.cn;http://www.ict.ac.cn", "aff_unique_abbr": "UCAS;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Refusal in Language Models Is Mediated by a Single Direction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93566", "id": "pH3XAQME6c", "proceeding": "", "pdf": "https://openreview.net/pdf?id=pH3XAQME6c", "openreview": "https://openreview.net/forum?id=pH3XAQME6c", "poster": "", "project": "", "author_site": "Andy Arditi, Oscar Obeso, Aaquib Syed, Daniel Paleka, Nina Panickssery, Wes Gurnee, Neel Nanda", "tldr": "", "abstract": "Conversational large language models are fine-tuned for both instruction-following and safety, resulting in models that obey benign requests but refuse harmful ones. While this refusal behavior is widespread across chat models, its underlying mechanisms remain poorly understood. In this work, we show that refusal is mediated by a one-dimensional subspace, across 13 popular open-source chat models up to 72B parameters in size. Specifically, for each model, we find a single direction such that erasing this direction from the model's residual stream activations prevents it from refusing harmful instructions, while adding this direction elicits refusal on even harmless instructions. Leveraging this insight, we propose a novel white-box jailbreak method that surgically disables a model's ability to refuse, with minimal effect on other capabilities. This interpretable rank-one weight edit results in an effective jailbreak technique that is simpler and more efficient than fine-tuning. Finally, we mechanistically analyze how adversarial suffixes suppress propagation of the refusal-mediating direction. Our findings underscore the brittleness of current safety fine-tuning methods. More broadly, our work showcases how an understanding of model internals can be leveraged to develop practical methods for controlling model behavior.", "keywords": "mechanistic interpretability;refusal;jailbreaks;language models;steering vectors;representation engineering", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Andy Arditi;Oscar Balcells Obeso;Aaquib Syed;Daniel Paleka;Nina Rimsky;Wes Gurnee;Neel Nanda", "authorids": "~Andy_Arditi1;~Oscar_Balcells_Obeso1;~Aaquib_Syed1;~Daniel_Paleka1;~Nina_Rimsky1;~Wes_Gurnee1;~Neel_Nanda1", "gender": "M;M;M;;F;;M", "homepage": "https://andyrdt.com;https://oscarbalcells.com;;https://danielpaleka.com/;https://ninapanickssery.com;https://www.wesg.me/;https://neelnanda.io", "dblp": ";;;324/2779;;;285/6389", "google_scholar": "NgyIgX4AAAAJ;;;;6-_i-jsAAAAJ;5sxXSfwAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;;;;", "linkedin": ";;aaquib-syed/;;nina-panickssery/;;https://linkedin.com/in/neel-nanda-993580151", "or_profile": "~Andy_Arditi1;~Oscar_Balcells_Obeso1;~Aaquib_Syed1;~Daniel_Paleka1;~Nina_Rimsky1;~Wes_Gurnee1;~Neel_Nanda1", "aff": "University of Chicago;ETHZ - ETH Zurich;University of Maryland, College Park;Department of Computer Science, ETHZ - ETH Zurich;Anthropic;Massachusetts Institute of Technology;Google DeepMind", "aff_domain": "uchicago.edu;ethz.ch;umd.edu;inf.ethz.ch;anthropic.com;mit.edu;deepmind.com", "position": "PhD student;Undergrad student;Undergrad student;PhD student;Researcher;PhD student;Researcher", "bibtex": "@inproceedings{\narditi2024refusal,\ntitle={Refusal in Language Models Is Mediated by a Single Direction},\nauthor={Andy Arditi and Oscar Balcells Obeso and Aaquib Syed and Daniel Paleka and Nina Rimsky and Wes Gurnee and Neel Nanda},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=pH3XAQME6c}\n}", "github": "", "reviewers": "zJsi;dLQd;ZAwD;DgJh", "pdf_size": 877083, "rating": "5;6;7;8", "confidence": "4;4;4;4", "soundness": "3;1;4;3", "novelty": "3;3;3;3", "presentation": "3;2;3;4", "wc_summary": "40;87;97;94", "wc_strengths": "64;112;90;28", "wc_weaknesses": "45;504;189;210", "wc_questions": "1;239;178;65", "wc_limitations": "1;40;12;8", "wc_review": "151;982;566;405", "wc_reply_reviewers": "159;51;70;57", "wc_reply_authors": "0;0;0;151", "reply_reviewers": "1;1;1;2", "reply_authors": "1;1;1;2", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 1.0897247358851685 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 79.5, 23.092206477510977 ], "wc_strengths_avg": [ 73.5, 31.284980421921315 ], "wc_weaknesses_avg": [ 237.0, 166.72282387243806 ], "wc_questions_avg": [ 120.75, 93.15142242606926 ], "wc_limitations_avg": [ 15.25, 14.821858857781638 ], "wc_review_avg": [ 526.0, 301.9942052424185 ], "wc_reply_reviewers_avg": [ 84.25, 43.69997139587164 ], "wc_reply_authors_avg": [ 37.75, 65.38491798572512 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 107, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4051505923704312467&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "uchicago.edu;ethz.ch;umd.edu;inf.ethz.ch;anthropic.com;mit.edu;deepmind.com", "author_num": 7, "aff_unique_index": "0;1;2;1;3;4;5", "aff_unique_norm": "University of Chicago;ETH Zurich;University of Maryland;Anthropic;Massachusetts Institute of Technology;Google", "aff_unique_dep": ";;;;;Google DeepMind", "aff_unique_url": "https://www.uchicago.edu;https://www.ethz.ch;https://www/umd.edu;https://www.anthropic.com;https://web.mit.edu;https://deepmind.com", "aff_unique_abbr": "UChicago;ETHZ;UMD;Anthropic;MIT;DeepMind", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";College Park;Zurich", "aff_country_unique_index": "0;1;0;1;0;0;2", "aff_country_unique": "United States;Switzerland;United Kingdom" }, { "title": "Make-An-Agent: A Generalizable Policy Network Generator with Behavior-Prompted Diffusion", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93565", "id": "pHiTmEsAfZ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=pHiTmEsAfZ", "openreview": "https://openreview.net/forum?id=pHiTmEsAfZ", "poster": "", "project": "", "author_site": "Yongyuan Liang, Tingqiang Xu, Kaizhe Hu, Guangqi Jiang, Furong Huang, Huazhe Xu", "tldr": "", "abstract": "Can we generate a control policy for an agent using just one demonstration of desired behaviors as a prompt, as effortlessly as creating an image from a textual description?\nIn this paper, we present **Make-An-Agent**, a novel policy parameter generator that leverages the power of conditional diffusion models for behavior-to-policy generation. Guided by behavior embeddings that encode trajectory information, our policy generator synthesizes latent parameter representations, which can then be decoded into policy networks. \nTrained on policy network checkpoints and their corresponding trajectories, our generation model demonstrates remarkable versatility and scalability on multiple tasks and has a strong generalization ability on unseen tasks to output well-performed policies with only few-shot demonstrations as inputs. We showcase its efficacy and efficiency on various domains and tasks, including varying objectives, behaviors, and even across different robot manipulators. Beyond simulation, we directly deploy policies generated by **Make-An-Agent** onto real-world robots on locomotion tasks. Project page: https://cheryyunl.github.io/make-an-agent/.", "keywords": "Diffusion Model;Policy Learning;Parameter Generation;Reinforcement Learning", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Yongyuan Liang;Tingqiang Xu;Kaizhe Hu;Guangqi Jiang;Furong Huang;Huazhe Xu", "authorids": "~Yongyuan_Liang1;~Tingqiang_Xu1;~Kaizhe_Hu1;~Guangqi_Jiang1;~Furong_Huang1;~Huazhe_Xu1", "gender": "F;M;M;M;F;M", "homepage": "https://cheryyunl.github.io/;;https://hukz18.github.io/;https://lucca-cherries.github.io/;https://furong-huang.com;http://hxu.rocks", "dblp": "238/4104;;330/4940;221/0785;72/8513;164/9006", "google_scholar": "GQToORIAAAAJ;HGTHVUgAAAAJ;mPpYLhcAAAAJ;;13yyuCcAAAAJ;t9HPFawAAAAJ", "orcid": ";;;0000-0003-4815-0174;;", "linkedin": "https://linkedin.com/in/yongyuan-l-31462a17a;;%E5%BC%80%E5%93%B2-%E8%83%A1-40137718a/?miniProfileUrn=urn%3Ali%3Afs_miniProfile%3AACoAACyMbIEBJhMDJ4b7wLQyHotP_JGOnWDoEDU;lucas-chiang1016/;;", "or_profile": "~Yongyuan_Liang1;~Tingqiang_Xu1;~Kaizhe_Hu1;~Guangqi_Jiang1;~Furong_Huang1;~Huazhe_Xu1", "aff": "University of Maryland, College Park;Tsinghua University;Stanford University;Sichuan University;University of Maryland;Tsinghua University", "aff_domain": "umd.edu;mails.tsinghua.edu.cn;stanford.edu;scu.edu.cn;cs.umd.edu;tsinghua.edu.cn", "position": "PhD student;Undergrad student;Researcher;Undergrad student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nliang2024makeanagent,\ntitle={Make-An-Agent: A Generalizable Policy Network Generator with Behavior-Prompted Diffusion},\nauthor={Yongyuan Liang and Tingqiang Xu and Kaizhe Hu and Guangqi Jiang and Furong Huang and Huazhe Xu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=pHiTmEsAfZ}\n}", "github": "", "reviewers": "YLrf;8Q7F;957J;U6yw;Pxd2", "pdf_size": 7792870, "rating": "4;5;6;6;7", "confidence": "4;4;4;3;4", "soundness": "3;3;3;3;4", "novelty": "2;2;3;3;4", "presentation": "2;3;4;3;4", "wc_summary": "119;44;171;103;97", "wc_strengths": "109;78;55;16;144", "wc_weaknesses": "84;159;171;37;127", "wc_questions": "182;26;32;354;65", "wc_limitations": "256;24;34;6;7", "wc_review": "750;331;463;516;440", "wc_reply_reviewers": "0;96;125;101;113", "wc_reply_authors": "102;494;326;246;621", "reply_reviewers": "0;2;1;1;1", "reply_authors": "2;3;2;2;2", "rating_avg": [ 5.6, 1.0198039027185568 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 106.8, 40.803921380181094 ], "wc_strengths_avg": [ 80.4, 43.957251961422706 ], "wc_weaknesses_avg": [ 115.6, 49.516058001420106 ], "wc_questions_avg": [ 131.8, 124.50606410934368 ], "wc_limitations_avg": [ 65.4, 95.88242800430118 ], "wc_review_avg": [ 500.0, 138.77031382828247 ], "wc_reply_reviewers_avg": [ 87.0, 44.64526850630423 ], "wc_reply_authors_avg": [ 357.8, 182.6301179981002 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.1961161351381841, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9387796930757281273&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "umd.edu;mails.tsinghua.edu.cn;stanford.edu;scu.edu.cn;cs.umd.edu;tsinghua.edu.cn", "author_num": 6, "aff_unique_index": "0;1;2;3;0;1", "aff_unique_norm": "University of Maryland;Tsinghua University;Stanford University;Sichuan University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www/umd.edu;https://www.tsinghua.edu.cn;https://www.stanford.edu;https://www.scu.edu.cn", "aff_unique_abbr": "UMD;THU;Stanford;SCU", "aff_campus_unique_index": "0;2", "aff_campus_unique": "College Park;;Stanford", "aff_country_unique_index": "0;1;0;1;0;1", "aff_country_unique": "United States;China" }, { "title": "Scalable Constrained Policy Optimization for Safe Multi-agent Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93564", "id": "pJlFURyTG5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=pJlFURyTG5", "openreview": "https://openreview.net/forum?id=pJlFURyTG5", "poster": "/media/PosterPDFs/NeurIPS%202024/93564.png?t=1731499125.1635182", "project": "", "author_site": "Lijun Zhang, Lin Li, Wei Wei, Huizhong Song, Yaodong Yang, Jiye Liang", "tldr": "", "abstract": "A challenging problem in seeking to bring multi-agent reinforcement learning (MARL) techniques into real-world applications, such as autonomous driving and drone swarms, is how to control multiple agents safely and cooperatively to accomplish tasks. Most existing safe MARL methods learn the centralized value function by introducing a global state to guide safety cooperation. However, the global coupling arising from agents\u2019 safety constraints and the exponential growth of the state-action space size limit their applicability in instant communication or computing resource-constrained systems and larger multi-agent systems.\u00a0In this paper, we develop a novel scalable\u00a0and theoretically-justified multi-agent constrained policy optimization method. This method utilizes the rigorous bounds of the trust region method and the bounds of the truncated advantage function to provide a new local policy optimization objective for each agent. Also, we prove that the safety constraints and the joint policy improvement can\u00a0be met\u00a0when each agent adopts a sequential update scheme to optimize a $\\kappa$-hop policy. Then, we propose a practical algorithm called Scalable MAPPO-Lagrangian (Scal-MAPPO-L). The proposed method\u2019s effectiveness\u00a0is verified\u00a0on a collection of benchmark tasks, and the results support our theory that decentralized training with local interactions can still improve reward performance and satisfy safe constraints.", "keywords": "Multi-agent reinforcement learning;policy optimization;safe learning;scalable method", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/cd180c748ace2ac76c5d5d4cc9d97e7bebf96639.zip", "author": "Lijun Zhang;Lin Li;Wei Wei;Huizhong Song;Yaodong Yang;Jiye Liang", "authorids": "~Lijun_Zhang8;~Lin_Li17;~Wei_Wei13;~Huizhong_Song1;~Yaodong_Yang1;~Jiye_Liang1", "gender": "M;F;M;M;M;M", "homepage": ";http://cs.sxu.edu.cn/faculty/lecturer/4526/index.htm;;https://github.com/Bonstop;https://www.yangyaodong.com;https://jiyeliang.github.io/index.html", "dblp": ";;24/4105-18.html;;170/1496-1;80/6535", "google_scholar": ";;;;https://scholar.google.co.uk/citations?user=6yL0xw8AAAAJ;iGc61hUAAAAJ", "orcid": "0000-0003-4360-9716;;;;0000-0001-8132-5613;0000-0001-5887-9327", "linkedin": ";;;;yaodong-yang;", "or_profile": "~Lijun_Zhang8;~Lin_Li17;~Wei_Wei13;~Huizhong_Song1;~Yaodong_Yang1;~Jiye_Liang1", "aff": "Shanxi University;;shanxi university;Shanxi University;Peking University;Shanxi University", "aff_domain": "sxu.edu.cn;;sxu.edu.cn;sxu.edu.cn;pku.edu.cn;sxu.edu.cn", "position": "PhD student;;Full Professor;PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nzhang2024scalable,\ntitle={Scalable Constrained Policy Optimization for Safe Multi-agent Reinforcement Learning},\nauthor={Lijun Zhang and Lin Li and Wei Wei and Huizhong Song and Yaodong Yang and Jiye Liang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=pJlFURyTG5}\n}", "github": "", "reviewers": "GtLQ;UGAx;a1c1", "pdf_size": 793963, "rating": "6;6;7", "confidence": "4;4;3", "soundness": "3;2;3", "novelty": "2;2;3", "presentation": "2;3;3", "wc_summary": "58;54;63", "wc_strengths": "68;116;65", "wc_weaknesses": "105;203;99", "wc_questions": "152;214;64", "wc_limitations": "13;1;6", "wc_review": "396;588;297", "wc_reply_reviewers": "19;37;38", "wc_reply_authors": "50;48;37", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 58.333333333333336, 3.6817870057290873 ], "wc_strengths_avg": [ 83.0, 23.366642891095847 ], "wc_weaknesses_avg": [ 135.66666666666666, 47.67482447674966 ], "wc_questions_avg": [ 143.33333333333334, 61.543119481836115 ], "wc_limitations_avg": [ 6.666666666666667, 4.921607686744467 ], "wc_review_avg": [ 427.0, 120.8056290079233 ], "wc_reply_reviewers_avg": [ 31.333333333333332, 8.73053390247253 ], "wc_reply_authors_avg": [ 45.0, 5.715476066494082 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.9999999999999997, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1018388648564285061&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": "sxu.edu.cn;;sxu.edu.cn;sxu.edu.cn;pku.edu.cn;sxu.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Shanxi University;Peking University", "aff_unique_dep": ";", "aff_unique_url": "http://www.sxu.edu.cn;http://www.pku.edu.cn", "aff_unique_abbr": "SXU;Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "The Limits of Transfer Reinforcement Learning with Latent Low-rank Structure", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93563", "id": "pK2qGRY2Hv", "proceeding": "", "pdf": "https://openreview.net/pdf?id=pK2qGRY2Hv", "openreview": "https://openreview.net/forum?id=pK2qGRY2Hv", "poster": "", "project": "", "author_site": "Tyler Sam, Yudong Chen, Christina Yu", "tldr": "", "abstract": "Many reinforcement learning (RL) algorithms are too costly to use in practice due to the large sizes $S,A$ of the problem's state and action space. To resolve this issue, we study transfer RL with latent low rank structure. We consider the problem of transferring a latent low rank representation when the source and target MDPs have transition kernels with Tucker rank $(S, d, A)$, $(S ,S , d), (d, S , A )$, or $(d , d , d )$. In each setting, we introduce the transfer-ability coefficient $\\alpha$ that measures the difficulty of representational transfer. Our algorithm learns latent representations in each source MDP and then exploits the linear structure to remove the dependence on $S , A $, or $SA $ in the target MDP regret bound. We complement our positive results with information theoretic lower bounds that show our algorithms (excluding the ($d, d, d$) setting) are minimax-optimal with respect to $\\alpha$.", "keywords": "Reinforcement Learning;Transfer Learning;Representation Learning", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Tyler Sam;Yudong Chen;Christina Yu", "authorids": "~Tyler_Sam1;~Yudong_Chen1;~Christina_Yu1", "gender": ";M;", "homepage": ";https://pages.cs.wisc.edu/~yudongchen/;https://cleeyu.orie.cornell.edu/", "dblp": ";15/1975-1;246/4764", "google_scholar": ";ze5rCdwAAAAJ;GyzNZMcAAAAJ", "orcid": " 0000-0001-7974-3226;0000-0002-6416-5635;", "linkedin": ";;", "or_profile": "~Tyler_Sam1;~Yudong_Chen1;~Christina_Yu1", "aff": "Cornell University;Department of Computer Sciences, University of Wisconsin - Madison;Cornell University", "aff_domain": "cornell.edu;cs.wisc.edu;cornell.edu", "position": "PhD student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nsam2024the,\ntitle={The Limits of Transfer Reinforcement Learning with Latent Low-rank Structure},\nauthor={Tyler Sam and Yudong Chen and Christina Yu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=pK2qGRY2Hv}\n}", "github": "", "reviewers": "LmTe;nw9E;Stu9", "pdf_size": 719285, "rating": "5;6;7", "confidence": "2;3;3", "soundness": "2;3;3", "novelty": "3;2;3", "presentation": "1;3;2", "wc_summary": "166;111;56", "wc_strengths": "30;97;68", "wc_weaknesses": "86;106;52", "wc_questions": "843;173;167", "wc_limitations": "8;60;7", "wc_review": "1133;547;350", "wc_reply_reviewers": "122;24;248", "wc_reply_authors": "16;0;614", "reply_reviewers": "1;1;2", "reply_authors": "2;1;3", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.0, 0.816496580927726 ], "wc_summary_avg": [ 111.0, 44.90731195102493 ], "wc_strengths_avg": [ 65.0, 27.434771124736336 ], "wc_weaknesses_avg": [ 81.33333333333333, 22.291004663067316 ], "wc_questions_avg": [ 394.3333333333333, 317.2646984599593 ], "wc_limitations_avg": [ 25.0, 24.752104287649296 ], "wc_review_avg": [ 676.6666666666666, 332.5480750541525 ], "wc_reply_reviewers_avg": [ 131.33333333333334, 91.68545262047967 ], "wc_reply_authors_avg": [ 210.0, 285.7458077849379 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:BVDxYYWpxg4J:scholar.google.com/&scioq=The+Limits+of+Transfer+Reinforcement+Learning+with+Latent+Low-rank+Structure&hl=en&as_sdt=0,33", "gs_version_total": 5, "email": "cornell.edu;cs.wisc.edu;cornell.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Cornell University;University of Wisconsin-Madison", "aff_unique_dep": ";Department of Computer Sciences", "aff_unique_url": "https://www.cornell.edu;https://www.wisc.edu", "aff_unique_abbr": "Cornell;UW-Madison", "aff_campus_unique_index": "1", "aff_campus_unique": ";Madison", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Unleashing Multispectral Video's Potential in Semantic Segmentation: A Semi-supervised Viewpoint and New UAV-View Benchmark", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93562", "id": "pLoX8Og3bH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=pLoX8Og3bH", "openreview": "https://openreview.net/forum?id=pLoX8Og3bH", "poster": "/media/PosterPDFs/NeurIPS%202024/93562.png?t=1731721850.7969189", "project": "", "author_site": "Wei Ji, Jingjing Li, Wenbo Li, Yilin Shen, Li cheng, Hongxia Jin", "tldr": "", "abstract": "Thanks to the rapid progress in RGB & thermal imaging, also known as multispectral imaging, the task of multispectral video semantic segmentation, or MVSS in short, has recently drawn significant attentions. Noticeably, it offers new opportunities in improving segmentation performance under unfavorable visual conditions such as poor light or overexposure. Unfortunately, there are currently very few datasets available, including for example MVSeg dataset that focuses purely toward eye-level view; and it features the sparse annotation nature due to the intensive demands of labeling process. To address these key challenges of the MVSS task, this paper presents two major contributions: the introduction of MVUAV, a new MVSS benchmark dataset, and the development of a dedicated semi-supervised MVSS baseline - SemiMV. Our MVUAV dataset is captured via Unmanned Aerial Vehicles (UAV), which offers a unique oblique bird\u2019s-eye view complementary to the existing MVSS datasets; it also encompasses a broad range of day/night lighting conditions and over 30 semantic categories. In the meantime, to better leverage the sparse annotations and extra unlabeled RGB-Thermal videos, a semi-supervised learning baseline, SemiMV, is proposed to enforce consistency regularization through a dedicated Cross-collaborative Consistency Learning (C3L) module and a denoised temporal aggregation strategy. Comprehensive empirical evaluations on both MVSeg and MVUAV benchmark datasets have showcased the efficacy of our SemiMV baseline.", "keywords": "Computer Vision;Deep Learning;Semantic Segmentation", "primary_area": "machine_vision", "supplementary_material": "/attachment/3f8cc41b8a160fd01781401dd9c8904614944a9c.zip", "author": "Wei Ji;Jingjing Li;Wenbo Li;Yilin Shen;Li cheng;Hongxia Jin", "authorids": "~Wei_Ji2;~Jingjing_Li5;~Wenbo_Li5;~Yilin_Shen1;~Li_Cheng1;~Hongxia_Jin1", "gender": ";F;M;M;Not Specified;", "homepage": ";;https://www.albany.edu/~wl523363/main.html;;https://www.ece.ualberta.ca/~lcheng5/;", "dblp": ";;;30/383;13/4938-1;", "google_scholar": ";1QYsOAUAAAAJ;gIAOq9YAAAAJ;9PSFMzAAAAAJ;https://scholar.google.ca/citations?user=9IRFiEQAAAAJ;", "orcid": ";;;;0000-0003-3261-3533;", "linkedin": ";;wenbo-li-2003a829/;;;", "or_profile": "~Wei_Ji2;~Jingjing_Li5;~Wenbo_Li5;~Yilin_Shen1;~Li_Cheng1;~Hongxia_Jin1", "aff": ";University of Alberta;Samsung Research America;Samsung Research America;University of Alberta;", "aff_domain": ";ualberta.ca;samsung.com;gmail.com;ualberta.ca;", "position": ";PhD student;Staff Researcher;Principal Researcher;Full Professor;", "bibtex": "@inproceedings{\nji2024unleashing,\ntitle={Unleashing Multispectral Video's Potential in Semantic Segmentation: A Semi-supervised Viewpoint and New {UAV}-View Benchmark},\nauthor={Wei Ji and Jingjing Li and Wenbo Li and Yilin Shen and Li cheng and Hongxia Jin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=pLoX8Og3bH}\n}", "github": "", "reviewers": "XBRU;Mx7S;w3ua", "pdf_size": 5261461, "rating": "5;6;7", "confidence": "5;4;4", "soundness": "2;2;3", "novelty": "2;2;4", "presentation": "1;2;3", "wc_summary": "110;44;52", "wc_strengths": "70;81;69", "wc_weaknesses": "265;201;46", "wc_questions": "6;4;120", "wc_limitations": "17;4;53", "wc_review": "468;334;340", "wc_reply_reviewers": "15;27;22", "wc_reply_authors": "54;68;55", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.9428090415820634 ], "presentation_avg": [ 2.0, 0.816496580927726 ], "wc_summary_avg": [ 68.66666666666667, 29.4089933334837 ], "wc_strengths_avg": [ 73.33333333333333, 5.436502143433363 ], "wc_weaknesses_avg": [ 170.66666666666666, 91.94321919291033 ], "wc_questions_avg": [ 43.333333333333336, 54.21766829692656 ], "wc_limitations_avg": [ 24.666666666666668, 20.725722075613085 ], "wc_review_avg": [ 380.6666666666667, 61.80255298574287 ], "wc_reply_reviewers_avg": [ 21.333333333333332, 4.9216076867444665 ], "wc_reply_authors_avg": [ 59.0, 6.377042156569663 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:a3sTuBiQ45IJ:scholar.google.com/&scioq=Unleashing+Multispectral+Video%27s+Potential+in+Semantic+Segmentation:+A+Semi-supervised+Viewpoint+and+New+UAV-View+Benchmark&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": ";ualberta.ca;samsung.com;gmail.com;ualberta.ca;", "author_num": 6, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "University of Alberta;Samsung", "aff_unique_dep": ";Samsung Research America", "aff_unique_url": "https://www.ualberta.ca;https://www.samsung.com/us/careers/research/", "aff_unique_abbr": "UAlberta;SRA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "Canada;United States" }, { "title": "OT4P: Unlocking Effective Orthogonal Group Path for Permutation Relaxation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93561", "id": "pMJFaBzoG3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=pMJFaBzoG3", "openreview": "https://openreview.net/forum?id=pMJFaBzoG3", "poster": "/media/PosterPDFs/NeurIPS%202024/93561.png?t=1730784836.1516554", "project": "", "author_site": "Yaming Guo, chen zhu, Hengshu Zhu, Tieru Wu", "tldr": "", "abstract": "Optimization over permutations is typically an NP-hard problem that arises extensively in ranking, matching, tracking, etc. Birkhoff polytope-based relaxation methods have made significant advancements, particularly in penalty-free optimization and probabilistic inference. Relaxation onto the orthogonal group offers unique potential advantages such as a lower representation dimension and preservation of inner products; however, equally effective approaches remain unexplored. To bridge the gap, we present a temperature-controlled differentiable transformation that maps unconstrained vector space to the orthogonal group, where the temperature, in the limit, concentrates orthogonal matrices near permutation matrices. This transformation naturally implements a parameterization for the relaxation of permutation matrices, allowing for gradient-based optimization of problems involving permutations. Additionally, by deriving a re-parameterized gradient estimator, this transformation also provides efficient stochastic optimization over the latent permutations. Extensive experiments involving the optimization over permutation matrices validate the effectiveness of the proposed method.", "keywords": "permutation matrix;orthogonal group;differentiable transformation;stochastic optimization", "primary_area": "optimization", "supplementary_material": "", "author": "Yaming Guo;Chen Zhu;Hengshu Zhu;Tieru Wu", "authorids": "~Yaming_Guo1;~Chen_Zhu5;~Hengshu_Zhu1;~Tieru_Wu1", "gender": "M;;;M", "homepage": "https://yamingguo98.github.io/;;http://www.zhuhengshu.com/;http://sai.jlu.edu.cn/info/1094/3443.htm", "dblp": ";;61/10440;", "google_scholar": "zGBRJwMAAAAJ;;55MQBzYAAAAJ;", "orcid": ";;0000-0003-4570-643X;", "linkedin": ";;;", "or_profile": "~Yaming_Guo1;~Chen_Zhu5;~Hengshu_Zhu1;~Tieru_Wu1", "aff": "Jilin University;;Kanzhun Limited (BOSS Zhipin);school of AI, Jilin University", "aff_domain": "jlu.edu.cn;;kanzhun.com;sai.jlu.edu.cn", "position": "MS student;;Chief Research Scientist;Full Professor", "bibtex": "@inproceedings{\nguo2024otp,\ntitle={{OT}4P: Unlocking Effective Orthogonal Group Path for Permutation Relaxation},\nauthor={Yaming Guo and Chen Zhu and Hengshu Zhu and Tieru Wu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=pMJFaBzoG3}\n}", "github": "", "reviewers": "k6w2;FuEB;3CCZ", "pdf_size": 2435785, "rating": "6;6;6", "confidence": "4;3;4", "soundness": "3;3;3", "novelty": "3;2;2", "presentation": "3;3;3", "wc_summary": "69;31;42", "wc_strengths": "71;59;61", "wc_weaknesses": "433;367;228", "wc_questions": "68;32;34", "wc_limitations": "50;3;54", "wc_review": "691;492;419", "wc_reply_reviewers": "27;38;53", "wc_reply_authors": "47;47;49", "reply_reviewers": "1;1;2", "reply_authors": "2;2;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 47.333333333333336, 15.965240019770729 ], "wc_strengths_avg": [ 63.666666666666664, 5.2493385826745405 ], "wc_weaknesses_avg": [ 342.6666666666667, 85.44133790046959 ], "wc_questions_avg": [ 44.666666666666664, 16.519348924485154 ], "wc_limitations_avg": [ 35.666666666666664, 23.156472577277874 ], "wc_review_avg": [ 534.0, 114.9463643038207 ], "wc_reply_reviewers_avg": [ 39.333333333333336, 10.656244908763853 ], "wc_reply_authors_avg": [ 47.666666666666664, 0.9428090415820634 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15857926471429403828&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "jlu.edu.cn;;kanzhun.com;sai.jlu.edu.cn", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Jilin University;Kanzhun Limited", "aff_unique_dep": ";", "aff_unique_url": "http://www.jlu.edu.cn;https://www.zhipin.com", "aff_unique_abbr": "JLU;BOSS Zhipin", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "The Implicit Bias of Heterogeneity towards Invariance: A Study of Multi-Environment Matrix Sensing", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93560", "id": "pMPBxMf8T3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=pMPBxMf8T3", "openreview": "https://openreview.net/forum?id=pMPBxMf8T3", "poster": "", "project": "", "author_site": "Yang Xu, Yihong Gu, Cong Fang", "tldr": "", "abstract": "Models are expected to engage in invariance learning, which involves distinguishing the core relations that remain consistent across varying environments to ensure the predictions are safe, robust and fair. While existing works consider specific algorithms to realize invariance learning, we show that model has the potential to learn invariance through standard training procedures. In other words, this paper studies the implicit bias of Stochastic Gradient Descent (SGD) over heterogeneous data and shows that the implicit bias drives the model learning towards an invariant solution. We call the phenomenon the implicit invariance learning. Specifically, we theoretically investigate the multi-environment low-rank matrix sensing problem where in each environment, the signal comprises (i) a lower-rank invariant part shared across all environments; and (ii) a significantly varying environment-dependent spurious component. The key insight is, through simply employing the large step size large-batch SGD sequentially in each environment without any explicit regularization, the oscillation caused by heterogeneity can provably prevent model learning spurious signals. The model reaches the invariant solution after certain iterations. In contrast, model learned using pooled SGD over all data would simultaneously learn both the invariant and spurious signals. Overall, we unveil another implicit bias that is a result of the symbiosis between the heterogeneity of data and modern algorithms, which is, to the best of our knowledge, first in the literature.", "keywords": "implicit bias;matrix sensing;invariance learning;non-convex optimization", "primary_area": "learning_theory", "supplementary_material": "", "author": "Yang Xu;Yihong Gu;Cong Fang", "authorids": "~Yang_Xu17;~Yihong_Gu1;~Cong_Fang1", "gender": ";M;M", "homepage": ";https://sites.google.com/view/yihongg/;https://congfang-ml.github.io/", "dblp": ";;140/6568", "google_scholar": ";ChwmtBkAAAAJ;N2M9RPoAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Yang_Xu17;~Yihong_Gu1;~Cong_Fang1", "aff": ";Princeton University;Peking University", "aff_domain": ";princeton.edu;pku.edu.cn", "position": ";PhD student;Assistant Professor", "bibtex": "@inproceedings{\nxu2024the,\ntitle={The Implicit Bias of Heterogeneity towards Invariance: A Study of Multi-Environment Matrix Sensing},\nauthor={Yang Xu and Yihong Gu and Cong Fang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=pMPBxMf8T3}\n}", "github": "", "reviewers": "uFbc;JQ35;hg76;a5nG", "pdf_size": 794804, "rating": "5;6;7;7", "confidence": "3;3;2;3", "soundness": "3;2;4;3", "novelty": "3;3;3;3", "presentation": "3;3;4;3", "wc_summary": "163;62;86;49", "wc_strengths": "28;17;41;61", "wc_weaknesses": "214;169;18;67", "wc_questions": "105;19;283;273", "wc_limitations": "1;1;2;14", "wc_review": "511;268;430;464", "wc_reply_reviewers": "0;170;13;22", "wc_reply_authors": "0;222;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 90.0, 44.1871021905714 ], "wc_strengths_avg": [ 36.75, 16.37643123516232 ], "wc_weaknesses_avg": [ 117.0, 78.124899999936 ], "wc_questions_avg": [ 170.0, 112.25417586887359 ], "wc_limitations_avg": [ 4.5, 5.5 ], "wc_review_avg": [ 418.25, 91.39030309611627 ], "wc_reply_reviewers_avg": [ 51.25, 69.0049817042219 ], "wc_reply_authors_avg": [ 55.5, 96.12881982007269 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:id1ZUxvizYkJ:scholar.google.com/&scioq=The+Implicit+Bias+of+Heterogeneity+towards+Invariance:+A+Study+of+Multi-Environment+Matrix+Sensing&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": ";princeton.edu;pku.edu.cn", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Princeton University;Peking University", "aff_unique_dep": ";", "aff_unique_url": "https://www.princeton.edu;http://www.pku.edu.cn", "aff_unique_abbr": "Princeton;Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;China" }, { "title": "Artificial Generational Intelligence: Cultural Accumulation in Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93559", "id": "pMaCRgu8GV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=pMaCRgu8GV", "openreview": "https://openreview.net/forum?id=pMaCRgu8GV", "poster": "", "project": "", "author_site": "Jonathan Cook, Chris Lu, Edward Hughes, Joel Leibo, Jakob Foerster", "tldr": "", "abstract": "Cultural accumulation drives the open-ended and diverse progress in capabilities spanning human history. It builds an expanding body of knowledge and skills by combining individual exploration with inter-generational information transmission. Despite its widespread success among humans, the capacity for artificial learning agents to accumulate culture remains under-explored. In particular, approaches to reinforcement learning typically strive for improvements over only a single lifetime. Generational algorithms that do exist fail to capture the open-ended, emergent nature of cultural accumulation, which allows individuals to trade-off innovation and imitation. Building on the previously demonstrated ability for reinforcement learning agents to perform social learning, we find that training setups which balance this with independent learning give rise to cultural accumulation. These accumulating agents outperform those trained for a single lifetime with the same cumulative experience. We explore this accumulation by constructing two models under two distinct notions of a generation: episodic generations, in which accumulation occurs via in-context learning and train-time generations, in which accumulation occurs via in-weights learning. In-context and in-weights cultural accumulation can be interpreted as analogous to knowledge and skill accumulation, respectively. To the best of our knowledge, this work is the first to present general models that achieve emergent cultural accumulation in reinforcement learning, opening up new avenues towards more open-ended learning systems, as well as presenting new opportunities for modelling human culture.", "keywords": "social learning;cultural accumulation;in-context learning;reinforcement learning", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "", "author": "Jonathan Cook;Chris Lu;Edward Hughes;Joel Z Leibo;Jakob Nicolaus Foerster", "authorids": "~Jonathan_Cook3;~Chris_Lu1;~Edward_Hughes1;~Joel_Z_Leibo1;~Jakob_Nicolaus_Foerster1", "gender": "M;;M;M;M", "homepage": ";;http://www.edwardhughes.io;http://www.jzleibo.com;https://www.jakobfoerster.com", "dblp": ";77/9579;217/2003;33/11107;176/5095", "google_scholar": "7tcPHHYAAAAJ;4WLoIRsAAAAJ;3tj5358AAAAJ;https://scholar.google.com/citations?hl=en;6z4lQzMAAAAJ", "orcid": ";;;0000-0002-3153-916X;", "linkedin": "jonathan-cook-78339618a/;;;;", "or_profile": "~Jonathan_Cook3;~Chris_Lu1;~Edward_Hughes1;~Joel_Z_Leibo1;~Jakob_Nicolaus_Foerster1", "aff": "University of Oxford;University of Oxford;Google DeepMind;Google DeepMind;University of Oxford, University of Oxford", "aff_domain": "ox.ac.uk;ox.ac.uk;deepmind.com;deepmind.com;eng.ox.ac.uk", "position": "PhD student;PhD student;Researcher;Research scientist;Associate Professor", "bibtex": "@inproceedings{\ncook2024artificial,\ntitle={Artificial Generational Intelligence: Cultural Accumulation in Reinforcement Learning},\nauthor={Jonathan Cook and Chris Lu and Edward Hughes and Joel Z Leibo and Jakob Nicolaus Foerster},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=pMaCRgu8GV}\n}", "github": "", "reviewers": "2ZBi;R6qB;h1KJ;9itL", "pdf_size": 3364572, "rating": "5;6;7;8", "confidence": "3;4;3;4", "soundness": "2;2;4;4", "novelty": "3;3;3;4", "presentation": "2;2;2;3", "wc_summary": "51;197;237;141", "wc_strengths": "32;23;128;66", "wc_weaknesses": "452;367;199;72", "wc_questions": "253;30;370;78", "wc_limitations": "19;1;237;83", "wc_review": "807;618;1171;440", "wc_reply_reviewers": "261;459;35;447", "wc_reply_authors": "836;690;43;71", "reply_reviewers": "2;2;1;1", "reply_authors": "4;3;2;2", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 1.0 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 156.5, 69.80508577460527 ], "wc_strengths_avg": [ 62.25, 41.20907060344846 ], "wc_weaknesses_avg": [ 272.5, 147.2693111276073 ], "wc_questions_avg": [ 182.75, 136.29265387393409 ], "wc_limitations_avg": [ 85.0, 92.89779329994873 ], "wc_review_avg": [ 759.0, 270.9658650088605 ], "wc_reply_reviewers_avg": [ 300.5, 172.21715942379262 ], "wc_reply_authors_avg": [ 410.0, 356.89144007667096 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.4472135954999579, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12620998011345752775&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "ox.ac.uk;ox.ac.uk;deepmind.com;deepmind.com;eng.ox.ac.uk", "author_num": 5, "aff_unique_index": "0;0;1;1;0", "aff_unique_norm": "University of Oxford;Google", "aff_unique_dep": ";Google DeepMind", "aff_unique_url": "https://www.ox.ac.uk;https://deepmind.com", "aff_unique_abbr": "Oxford;DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "pNQB78UDp4", "title": "CVPT: Cross-Attention help Visual Prompt Tuning adapt visual task", "track": "main", "status": "Reject", "tldr": "", "abstract": "In recent years, the rapid expansion of model sizes has led to large-scale pre-trained models demonstrating remarkable capabilities. Consequently, there has been a trend towards increasing the scale of models. However, this trend introduces significant challenges, including substantial computational costs of training and transfer to downstream tasks. To address these issues, Parameter-Efficient Fine-Tuning (PEFT) methods have been introduced. These methods optimize large-scale pre-trained models for specific tasks by fine-tuning a select group of parameters. Among these PEFT methods, adapter-based and prompt-based methods are the primary techniques. Specifically, in the field of visual fine-tuning, adapters gain prominence over prompts because of the latter\u2019s relatively weaker performance and efficiency. Under the circumstances, we refine the widely-used Visual Prompt Tuning (VPT) method, proposing Cross Visual Prompt Tuning (CVPT). CVPT calculates cross-attention between the prompt tokens and the embedded tokens, which allows us to compute the semantic relationship between them and conduct the fine-tuning of models exactly to adapt visual tasks better. Furthermore, we introduce the weight-sharing mechanism to initialize the parameters of cross-attention, which avoids massive learnable parameters from cross-attention and enhances the representative capability of cross-attention. We conduct comprehensive testing across 25 datasets and the result indicates that CVPT significantly improves VPT\u2019s performance and efficiency in visual tasks. For example, on the VTAB-1K benchmark, CVPT outperforms VPT over 4\\% in average accuracy, rivaling the advanced adapter methods in performance and efficiency. Our experiments confirm that prompt-based methods can achieve exceptional results in visual fine-tuning.", "keywords": "deep learning;transfer learning;parameter-efficient fine-tuning", "primary_area": "machine_vision", "supplementary_material": "", "author": "Lingyun Huang;Jianxu Mao;Yaonan Wang;Junfei YI;Ziming Tao", "authorids": "~Lingyun_Huang2;~Jianxu_Mao1;~Yaonan_Wang3;~Junfei_YI1;~Ziming_Tao1", "gender": "M;M;M;M;M", "homepage": "https://www.researchgate.net/profile/Lingyun-Huang-5;http://eeit.hnu.edu.cn/info/1404/4625.htm;https://eeit.hnu.edu.cn/info/1277/4490.htm;;https://www.researchgate.net/profile/Tao-Ziming-2", "dblp": ";;90/548-1;;", "google_scholar": ";;;uPcYnKIAAAAJ;https://scholar.google.com.hk/citations?hl=zh-CN", "orcid": ";;0009-0004-5365-6254;;0009-0003-5280-5281", "linkedin": ";;;;", "or_profile": "~Lingyun_Huang2;~Jianxu_Mao1;~Yaonan_Wang3;~Junfei_YI1;~Ziming_Tao1", "aff": "Hunan University;Hunan University;Hunan University;Hunan University;Hunan University", "aff_domain": "hnu.edu.cn;hnu.edu.cn;hnu.edu.cn;hnu.edu.cn;hnu.edu.cn", "position": "MS student;Full Professor;Full Professor;PhD student;PhD student", "bibtex": "@misc{\nanonymous2024cvpt,\ntitle={{CVPT}: Cross-Attention help Visual Prompt Tuning adapt visual task},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=pNQB78UDp4}\n}", "github": "", "project": "", "reviewers": "MMER;LiJE;cNsm;ADwB", "site": "https://openreview.net/forum?id=pNQB78UDp4", "pdf_size": 1168818, "rating": "3;4;4;6", "confidence": "5;4;4;4", "soundness": "2;2;2;3", "novelty": "1;2;2;3", "presentation": "3;1;3;3", "wc_summary": "43;262;28;102", "wc_strengths": "44;57;34;51", "wc_weaknesses": "175;359;267;60", "wc_questions": "22;5;21;23", "wc_limitations": "1;19;6;32", "wc_review": "285;702;356;268", "wc_reply_reviewers": "46;139;10;45", "wc_reply_authors": "0;714;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 4.25, 1.0897247358851685 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 108.75, 92.70214398815165 ], "wc_strengths_avg": [ 46.5, 8.558621384311845 ], "wc_weaknesses_avg": [ 215.25, 110.75282163448477 ], "wc_questions_avg": [ 17.75, 7.39509972887452 ], "wc_limitations_avg": [ 14.5, 12.05197079319395 ], "wc_review_avg": [ 402.75, 175.89680923768913 ], "wc_reply_reviewers_avg": [ 60.0, 47.859168400631454 ], "wc_reply_authors_avg": [ 178.5, 309.1710691510446 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.6622661785325219, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10478989809034675646&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Hunan University", "aff_unique_dep": "", "aff_unique_url": "http://www.hunu.edu.cn/", "aff_unique_abbr": "HNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "KV Cache is 1 Bit Per Channel: Efficient Large Language Model Inference with Coupled Quantization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93558", "id": "pNnvzQsS4P", "proceeding": "", "pdf": "https://openreview.net/pdf?id=pNnvzQsS4P", "openreview": "https://openreview.net/forum?id=pNnvzQsS4P", "poster": "/media/PosterPDFs/NeurIPS%202024/93558.png?t=1733933130.9527807", "project": "", "author_site": "Tianyi Zhang, Jonah Yi, Zhaozhuo Xu, Anshumali Shrivastava", "tldr": "", "abstract": "Efficient deployment of Large Language Models (LLMs) requires batching multiple requests together to improve throughput. As batch size, context length, or model size increases, the size of key and value (KV) cache quickly becomes the main contributor to GPU memory usage and the bottleneck of inference latency and throughput. Quantization has emerged as an effective technique for KV cache compression, but existing methods still fail at very low bit widths. Currently, KV cache quantization is performed per-channel or per-token independently. Our analysis shows that distinct channels of a key/value activation embedding are highly interdependent, and the joint entropy of multiple channels grows at a slower rate than the sum of their marginal entropy, which implies that per-channel independent quantization is sub-optimal. To mitigate this sub-optimality, we propose Coupled Quantization (CQ), which couples multiple key/value channels together for quantization to exploit their interdependence and encode the activations in a more information-efficient manner. Extensive experiments reveal that CQ compares favorably with existing baselines in preserving model quality, and improves inference throughput by 1.4\u20133.5$\\times$ relative to the uncompressed baseline. Furthermore, we demonstrate that CQ can preserve model quality reasonably with KV cache quantized down to 1 bit.", "keywords": "large language model;efficiency;kv cache;quantization", "primary_area": "other", "supplementary_material": "", "author": "Tianyi Zhang;Jonah Wonkyu Yi;Zhaozhuo Xu;Anshumali Shrivastava", "authorids": "~Tianyi_Zhang6;~Jonah_Wonkyu_Yi1;~Zhaozhuo_Xu1;~Anshumali_Shrivastava1", "gender": "M;M;M;M", "homepage": "https://github.com/tonyzhang617;https://www.jonahyi.com/;https://ottovonxu.github.io/;https://www.cs.rice.edu/~as143/", "dblp": "17/322-11.html;;195/4352;63/9828", "google_scholar": "ekRl428AAAAJ;;7tDlVAsAAAAJ;https://scholar.google.com.tw/citations?user=SGT23RAAAAAJ", "orcid": ";;;", "linkedin": ";jonah-w-yi;;", "or_profile": "~Tianyi_Zhang6;~Jonah_Wonkyu_Yi1;~Zhaozhuo_Xu1;~Anshumali_Shrivastava1", "aff": "Rice University;Rice University;Stevens Institute of Technology;ThirdAI Corp.", "aff_domain": "rice.edu;rice.edu;stevens.edu;thirdai.com", "position": "PhD student;Undergrad student;Assistant Professor;CEO", "bibtex": "@inproceedings{\nzhang2024kv,\ntitle={{KV} Cache is 1 Bit Per Channel: Efficient Large Language Model Inference with Coupled Quantization},\nauthor={Tianyi Zhang and Jonah Wonkyu Yi and Zhaozhuo Xu and Anshumali Shrivastava},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=pNnvzQsS4P}\n}", "github": "", "reviewers": "p3Eo;rmbd;qfND;KwWk;dwgV", "pdf_size": 4062807, "rating": "5;6;6;6;7", "confidence": "4;5;4;3;4", "soundness": "3;3;3;2;4", "novelty": "2;3;2;2;3", "presentation": "3;3;3;1;4", "wc_summary": "89;87;85;69;48", "wc_strengths": "19;88;54;51;36", "wc_weaknesses": "177;101;21;242;23", "wc_questions": "11;113;49;34;339", "wc_limitations": "14;20;1;37;63", "wc_review": "310;409;210;433;509", "wc_reply_reviewers": "119;107;0;45;75", "wc_reply_authors": "390;0;0;377;0", "reply_reviewers": "3;1;0;1;1", "reply_authors": "3;1;1;4;1", "rating_avg": [ 6.0, 0.6324555320336759 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.9797958971132712 ], "wc_summary_avg": [ 75.6, 15.512575543732254 ], "wc_strengths_avg": [ 49.6, 22.878811157925142 ], "wc_weaknesses_avg": [ 112.8, 86.53877743532087 ], "wc_questions_avg": [ 109.2, 119.78714455232667 ], "wc_limitations_avg": [ 27.0, 21.400934559032695 ], "wc_review_avg": [ 374.2, 103.84873615022958 ], "wc_reply_reviewers_avg": [ 69.2, 43.16665379665188 ], "wc_reply_authors_avg": [ 153.4, 187.92083439576356 ], "reply_reviewers_avg": [ 1.2, 0.9797958971132713 ], "reply_authors_avg": [ 2.0, 1.2649110640673518 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5758307712842791472&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "rice.edu;rice.edu;stevens.edu;thirdai.com", "author_num": 4, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Rice University;Stevens Institute of Technology;ThirdAI Corp.", "aff_unique_dep": ";;", "aff_unique_url": "https://www.rice.edu;https://www.stevens.edu;", "aff_unique_abbr": "Rice;SIT;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "What Variables Affect Out-of-Distribution Generalization in Pretrained Models?", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93557", "id": "pOXgdFEB7q", "proceeding": "", "pdf": "https://openreview.net/pdf?id=pOXgdFEB7q", "openreview": "https://openreview.net/forum?id=pOXgdFEB7q", "poster": "/media/PosterPDFs/NeurIPS%202024/93557.png?t=1733252098.664673", "project": "", "author_site": "Md Yousuf Harun, Kyungbok Lee, Gianmarco Gallardo, Giri Krishnan, Christopher Kanan", "tldr": "", "abstract": "Embeddings produced by pre-trained deep neural networks (DNNs) are widely used; however, their efficacy for downstream tasks can vary widely. We study the factors influencing transferability and out-of-distribution (OOD) generalization of pre-trained DNN embeddings through the lens of the tunnel effect hypothesis, which is closely related to intermediate neural collapse. This hypothesis suggests that deeper DNN layers compress representations and hinder OOD generalization. Contrary to earlier work, our experiments show this is not a universal phenomenon. We comprehensively investigate the impact of DNN architecture, training data, image resolution, and augmentations on transferability. We identify that training with high-resolution datasets containing many classes greatly reduces representation compression and improves transferability. Our results emphasize the danger of generalizing findings from toy datasets to broader contexts.", "keywords": "Image Embeddings;Out-of-Distribution Generalization;Tunnel Effect;Neural Collapse", "primary_area": "evaluation", "supplementary_material": "/attachment/a55e924e17df2081f8641fb78f4180eb2247d560.zip", "author": "Md Yousuf Harun;Kyungbok Lee;Jhair Gallardo;Giri Prashanth;Christopher Kanan", "authorids": "~Md_Yousuf_Harun1;~Kyungbok_Lee2;~Jhair_Gallardo1;~Giri_Prashanth1;~Christopher_Kanan1", "gender": "M;M;M;M;M", "homepage": "https://yousuf907.github.io;;https://research.gatech.edu/giri-krishnan;https://chriskanan.com/;https://jhairgallardo.github.io/", "dblp": ";;;14/8653;207/4744", "google_scholar": "https://scholar.google.com/citations?hl=en;;IGsdszkAAAAJ;jMxZjBoAAAAJ;gFQHAtQAAAAJ", "orcid": "0000-0001-6544-6159;;;0000-0002-6412-995X;", "linkedin": "md-yousuf-harun-71748572;kyungbok-lee/;;chriskanan/;jhairgallardo/", "or_profile": "~Md_Yousuf_Harun1;~Kyungbok_Lee2;~Giri_Prashanth1;~Christopher_Kanan1;~Gianmarco_Jhair_Gallardo1", "aff": "Rochester Institute of Technology;University of Rochester;University of California, San Diego;University of Rochester;Rochester Institute of Technology", "aff_domain": "rit.edu;rochester.edu;ucsd.edu;rochester.edu;rit.edu", "position": "PhD student;Undergrad student;Research Scientist;Associate Professor;PhD student", "bibtex": "@inproceedings{\nharun2024what,\ntitle={What Variables Affect Out-of-Distribution Generalization in Pretrained Models?},\nauthor={Md Yousuf Harun and Kyungbok Lee and Jhair Gallardo and Giri Prashanth and Christopher Kanan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=pOXgdFEB7q}\n}", "github": "", "reviewers": "gXLk;tyj4;gM3N;WrHJ", "pdf_size": 23528907, "rating": "4;4;5;7", "confidence": "4;4;2;4", "soundness": "2;2;3;3", "novelty": "2;2;2;3", "presentation": "3;2;2;3", "wc_summary": "28;32;74;145", "wc_strengths": "14;27;15;171", "wc_weaknesses": "69;77;83;444", "wc_questions": "14;5;29;162", "wc_limitations": "31;1;1;8", "wc_review": "156;142;202;930", "wc_reply_reviewers": "0;26;23;50", "wc_reply_authors": "87;84;181;141", "reply_reviewers": "0;1;1;1", "reply_authors": "2;2;3;3", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 69.75, 47.03389735074056 ], "wc_strengths_avg": [ 56.75, 66.16031665583229 ], "wc_weaknesses_avg": [ 168.25, 159.2817864666265 ], "wc_questions_avg": [ 52.5, 63.79851095441021 ], "wc_limitations_avg": [ 10.25, 12.316147936753602 ], "wc_review_avg": [ 357.5, 331.27745169268616 ], "wc_reply_reviewers_avg": [ 24.75, 17.711225254058512 ], "wc_reply_authors_avg": [ 123.25, 40.32601517631019 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18104774089714401770&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "rit.edu;rochester.edu;ucsd.edu;rochester.edu;rit.edu", "author_num": 5, "aff_unique_index": "0;1;2;1;0", "aff_unique_norm": "Rochester Institute of Technology;University of Rochester;University of California, San Diego", "aff_unique_dep": ";;", "aff_unique_url": "https://www.rit.edu;https://www.rochester.edu;https://www.ucsd.edu", "aff_unique_abbr": "RIT;U of R;UCSD", "aff_campus_unique_index": "1", "aff_campus_unique": ";San Diego", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Smoothie: Label Free Language Model Routing", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93556", "id": "pPSWHsgqRp", "proceeding": "", "pdf": "https://openreview.net/pdf?id=pPSWHsgqRp", "openreview": "https://openreview.net/forum?id=pPSWHsgqRp", "poster": "/media/PosterPDFs/NeurIPS%202024/93556.png?t=1733893953.6601532", "project": "", "author_site": "Neel Guha, Mayee Chen, Trevor Chow, Ishan Khare, Christopher R\u00e9", "tldr": "", "abstract": "Large language models (LLMs) are increasingly used in applications where LLM inputs may span many different tasks. Recent work has found that the choice of LLM is consequential, and different LLMs may be good for different input samples. Prior approaches have thus explored how engineers might select an LLM to use for each sample (i.e. _routing_). While existing routing methods mostly require training auxiliary models on human-annotated data, our work explores whether it is possible to perform _unsupervised_ routing. We propose Smoothie, a weak supervision-inspired routing approach that requires no labeled data. Given a set of outputs from different LLMs, Smoothie constructs a latent variable graphical model over embedding representations of observable LLM outputs and unknown \u201ctrue\u201d outputs. Using this graphical model, we estimate sample-dependent quality scores for each LLM, and route each sample to the LLM with the highest corresponding score. We find that Smoothie's LLM quality-scores correlate with ground-truth model quality (correctly identifying the optimal model on 9/14 tasks), and that Smoothie outperforms baselines for routing by up to 10 points accuracy.", "keywords": "large language models;weak supervision;graphical models;routing", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Neel Guha;Mayee F Chen;Trevor Chow;Ishan S. Khare;Christopher Re", "authorids": "~Neel_Guha1;~Mayee_F_Chen1;~Trevor_Chow1;~Ishan_S._Khare1;~Christopher_Re1", "gender": "M;;;M;", "homepage": "http://neelguha.com;;https://tmychow.com;https://iskhare.github.io/;", "dblp": "130/0311;;;;", "google_scholar": "YI5N4HQAAAAJ;;iKEmTDUAAAAJ;licjJcoAAAAJ;", "orcid": ";;;0000-0003-2286-8153;", "linkedin": ";;;iskhare/;", "or_profile": "~Neel_Guha1;~Mayee_F_Chen1;~Trevor_Chow1;~Ishan_S._Khare1;~Christopher_Re1", "aff": "Computer Science Department, Stanford University;;Stanford University;Stanford University;", "aff_domain": "cs.stanford.edu;;stanford.edu;stanford.edu;", "position": "PhD student;;Undergrad student;Undergrad student;", "bibtex": "@inproceedings{\nguha2024smoothie,\ntitle={Smoothie: Label Free Language Model Routing},\nauthor={Neel Guha and Mayee F Chen and Trevor Chow and Ishan S. Khare and Christopher Re},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=pPSWHsgqRp}\n}", "github": "", "reviewers": "gxgv;Cr26;r3vp;RLiV", "pdf_size": 5528350, "rating": "5;5;6;7", "confidence": "4;3;3;4", "soundness": "2;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;2;4", "wc_summary": "71;62;100;120", "wc_strengths": "54;32;64;222", "wc_weaknesses": "93;247;24;137", "wc_questions": "29;7;49;26", "wc_limitations": "11;1;15;1", "wc_review": "258;349;252;506", "wc_reply_reviewers": "74;65;26;18", "wc_reply_authors": "29;325;81;56", "reply_reviewers": "1;1;1;1", "reply_authors": "2;3;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 88.25, 23.09085316743407 ], "wc_strengths_avg": [ 93.0, 75.37240874484509 ], "wc_weaknesses_avg": [ 125.25, 81.01350196109289 ], "wc_questions_avg": [ 27.75, 14.889173919328098 ], "wc_limitations_avg": [ 7.0, 6.164414002968976 ], "wc_review_avg": [ 341.25, 102.58989960030178 ], "wc_reply_reviewers_avg": [ 45.75, 24.128561913218117 ], "wc_reply_authors_avg": [ 122.75, 118.20823786860204 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13613651418433220164&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "cs.stanford.edu;;stanford.edu;stanford.edu;", "author_num": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "Computer Science Department", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "MSAGPT: Neural Prompting Protein Structure Prediction via MSA Generative Pre-Training", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93555", "id": "pPeXYByHNd", "proceeding": "", "pdf": "https://openreview.net/pdf?id=pPeXYByHNd", "openreview": "https://openreview.net/forum?id=pPeXYByHNd", "poster": "/media/PosterPDFs/NeurIPS%202024/93555.png?t=1730884321.9299028", "project": "", "author_site": "Bo Chen, Zhilei Bei, Xingyi Cheng, Pan Li, Jie Tang, Le Song", "tldr": "", "abstract": "Multiple Sequence Alignment (MSA) plays a pivotal role in unveiling the evolutionary trajectories of protein families. The accuracy of protein structure predictions is often compromised for protein sequences that lack sufficient homologous information to construct high-quality MSA. Although various methods have been proposed to generate high-quality MSA under these conditions, they fall short in comprehensively capturing the intricate co-evolutionary patterns within MSA or require guidance from external oracle models. Here we introduce MSAGPT, a novel approach to prompt protein structure predictions via MSA generative pre-training in a low-MSA regime. MSAGPT employs a simple yet effective 2D evolutionary positional encoding scheme to model the complex evolutionary patterns. Endowed by this, the flexible 1D MSA decoding framework facilitates zero- or few-shot learning. Moreover, we demonstrate leveraging the feedback from AlphaFold2 (AF2) can further enhance the model\u2019s capacity via Rejective Fine-tuning (RFT) and Reinforcement Learning from AF2 Feedback (RLAF). Extensive experiments confirm the efficacy of MSAGPT in generating faithful and informative MSA (up to +8.5% TM-Score on few-shot scenarios). The transfer learning also demonstrates its great potential for the wide range of tasks resorting to the quality of MSA.", "keywords": "Computational Biology;Protein Language Model;Protein Structure Prediction;MSA Generative Pre-Training", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Bo Chen;Zhilei Bei;Xingyi Cheng;Pan Li;Jie Tang;Le Song", "authorids": "~Bo_Chen11;~Zhilei_Bei1;~Xingyi_Cheng3;~Pan_Li11;~Jie_Tang1;~Le_Song1", "gender": "M;F;M;M;;M", "homepage": "https://allanchen95.github.io/;https://github.com/zhileibei;;;;http://www.cc.gatech.edu/~lsong", "dblp": ";;206/6376;;;94/3481", "google_scholar": "ZHtOJowAAAAJ;;shO7XmIAAAAJ;;;Xl4E0CsAAAAJ", "orcid": ";;;0000-0002-6902-5762;;", "linkedin": ";;;;;", "or_profile": "~Bo_Chen11;~Zhilei_Bei1;~Xingyi_Cheng3;~Pan_Li11;~Jie_Tang1;~Le_Song1", "aff": "Tsinghua University;Tsinghua University;BioMap;;;College of Computing, Georgia Institute of Technology", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;biomap.com;;;cc.gatech.edu", "position": "PhD student;Undergrad student;Principal Researcher;;;Associate Professor", "bibtex": "@inproceedings{\nchen2024msagpt,\ntitle={{MSAGPT}: Neural Prompting Protein Structure Prediction via {MSA} Generative Pre-Training},\nauthor={Bo Chen and Zhilei Bei and Xingyi Cheng and Pan Li and Jie Tang and Le Song},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=pPeXYByHNd}\n}", "github": "", "reviewers": "Reh4;zdxm;wE2x", "pdf_size": 13207159, "rating": "7;7;7", "confidence": "4;5;3", "soundness": "3;4;3", "novelty": "3;3;3", "presentation": "3;4;3", "wc_summary": "66;111;43", "wc_strengths": "68;103;26", "wc_weaknesses": "96;56;158", "wc_questions": "120;376;2", "wc_limitations": "36;1;1", "wc_review": "386;647;230", "wc_reply_reviewers": "23;24;10", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 73.33333333333333, 28.241026106633512 ], "wc_strengths_avg": [ 65.66666666666667, 31.478387647541428 ], "wc_weaknesses_avg": [ 103.33333333333333, 41.96294661828324 ], "wc_questions_avg": [ 166.0, 156.11107156978542 ], "wc_limitations_avg": [ 12.666666666666666, 16.49915822768611 ], "wc_review_avg": [ 421.0, 172.02906731131225 ], "wc_reply_reviewers_avg": [ 19.0, 6.377042156569663 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14174272375498099796&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 6, "email": "tsinghua.edu.cn;tsinghua.edu.cn;biomap.com;;;cc.gatech.edu", "author_num": 6, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Tsinghua University;BioMap;Georgia Institute of Technology", "aff_unique_dep": ";;College of Computing", "aff_unique_url": "https://www.tsinghua.edu.cn;;https://www.gatech.edu", "aff_unique_abbr": "THU;;Georgia Tech", "aff_campus_unique_index": "1", "aff_campus_unique": ";Atlanta", "aff_country_unique_index": "0;0;2", "aff_country_unique": "China;;United States" }, { "title": "Leveraging Catastrophic Forgetting to Develop Safe Diffusion Models against Malicious Finetuning", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93554", "id": "pR37AmwbOt", "proceeding": "", "pdf": "https://openreview.net/pdf?id=pR37AmwbOt", "openreview": "https://openreview.net/forum?id=pR37AmwbOt", "poster": "/media/PosterPDFs/NeurIPS%202024/93554.png?t=1731040840.4424026", "project": "", "author_site": "Jiadong Pan, Hongcheng Gao, Zongyu Wu, Taihang Hu, Li Su, Qingming Huang, Liang Li", "tldr": "", "abstract": "Diffusion models (DMs) have demonstrated remarkable proficiency in producing images based on textual prompts. Numerous methods have been proposed to ensure these models generate safe images. Early methods attempt to incorporate safety filters into models to mitigate the risk of generating harmful images but such external filters do not inherently detoxify the model and can be easily bypassed. Hence, model unlearning and data cleaning are the most essential methods for maintaining the safety of models, given their impact on model parameters.\nHowever, malicious fine-tuning can still make models prone to generating harmful or undesirable images even with these methods.\nInspired by the phenomenon of catastrophic forgetting, we propose a training policy using contrastive learning to increase the latent space distance between clean and harmful data distribution, thereby protecting models from being fine-tuned to generate harmful images due to forgetting.\nThe experimental results demonstrate that our methods not only maintain clean image generation capabilities before malicious fine-tuning but also effectively prevent DMs from producing harmful images after malicious fine-tuning. Our method can also be combined with other safety methods to maintain their safety against malicious fine-tuning further.", "keywords": "Diffusion Model;AI Safety", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/9dded155b5f38253789c1371191bf54dc652e1c9.zip", "author": "Jiadong Pan;Hongcheng Gao;Zongyu Wu;taihang Hu;Li Su;Qingming Huang;Liang Li", "authorids": "~Jiadong_Pan1;~Hongcheng_Gao1;~Zongyu_Wu1;~taihang_Hu1;~Li_Su4;~Qingming_Huang2;~Liang_Li3", "gender": "M;M;M;M;F;;M", "homepage": ";https://gao-hongcheng.github.io/;https://wu-zongyu.github.io/;https://hutaihang.github.io/;http://people.ucas.edu.cn/~0010466?language=en;https://qmhuang-ucas.github.io/;http://www.ict.cas.cn/sourcedb_2018_ict_cas/cn/jssrck/201711/t20171114_4894220.html", "dblp": "395/3193;318/1404;322/4801;344/1728;05/365-3;68/4388;14/1395-3.html", "google_scholar": ";https://scholar.google.com/citations?hl=en;uq-XDSYAAAAJ;;;https://scholar.google.com.hk/citations?user=J1vMnRgAAAAJ;Q-4mZnQAAAAJ", "orcid": "0000-0002-4436-8830;;0009-0001-8378-7632;;;;", "linkedin": "%E4%BD%B3%E6%A0%8B-%E6%BD%98-002482220/;;;;;;", "or_profile": "~Jiadong_Pan1;~Hongcheng_Gao1;~Zongyu_Wu1;~taihang_Hu1;~Li_Su4;~Qingming_Huang2;~Liang_Li3", "aff": "Institute of Computing Technology, Chinese Academy of Sciences;University of Chinese Academy of Sciences;The Pennsylvania State University;Nankai University;University of Chinese Academy of Sciences;University of Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences", "aff_domain": "ict.ac.cn;ucas.ac.cn;psu.edu;nankai.edu.cn;ucas.ac.cn;ucas.ac.cn;ict.ac.cn", "position": "MS student;MS student;PhD student;MS student;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\npan2024leveraging,\ntitle={Leveraging Catastrophic Forgetting to Develop Safe Diffusion Models against Malicious Finetuning},\nauthor={Jiadong Pan and Hongcheng Gao and Zongyu Wu and taihang Hu and Li Su and Qingming Huang and Liang Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=pR37AmwbOt}\n}", "github": "", "reviewers": "XqJj;r3Dt;Umiy;EJzH", "pdf_size": 17498480, "rating": "6;6;6;7", "confidence": "3;4;4;4", "soundness": "2;1;3;4", "novelty": "3;3;3;4", "presentation": "3;2;3;4", "wc_summary": "60;85;96;69", "wc_strengths": "54;50;128;63", "wc_weaknesses": "234;197;106;53", "wc_questions": "28;59;2;40", "wc_limitations": "2;12;6;9", "wc_review": "378;403;338;234", "wc_reply_reviewers": "218;58;9;44", "wc_reply_authors": "434;58;0;0", "reply_reviewers": "2;2;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 1.118033988749895 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 77.5, 13.937359864766353 ], "wc_strengths_avg": [ 73.75, 31.67313530422904 ], "wc_weaknesses_avg": [ 147.5, 71.73736822605078 ], "wc_questions_avg": [ 32.25, 20.668514702319566 ], "wc_limitations_avg": [ 7.25, 3.6996621467371855 ], "wc_review_avg": [ 338.25, 64.4995155020563 ], "wc_reply_reviewers_avg": [ 82.25, 80.38151217786339 ], "wc_reply_authors_avg": [ 123.0, 181.11046352985792 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1900285477662986184&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "email": "ict.ac.cn;ucas.ac.cn;psu.edu;nankai.edu.cn;ucas.ac.cn;ucas.ac.cn;ict.ac.cn", "author_num": 7, "aff_unique_index": "0;1;2;3;1;1;0", "aff_unique_norm": "Chinese Academy of Sciences;University of Chinese Academy of Sciences;Pennsylvania State University;Nankai University", "aff_unique_dep": "Institute of Computing Technology;;;", "aff_unique_url": "http://www.ict.ac.cn;http://www.ucas.ac.cn;https://www.psu.edu;http://www.nankai.edu.cn", "aff_unique_abbr": "CAS;UCAS;PSU;NKU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;0;0", "aff_country_unique": "China;United States" }, { "title": "$\\boldsymbol{\\mu}\\mathbf{P^2}$: Effective Sharpness Aware Minimization Requires Layerwise Perturbation Scaling", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93553", "id": "pR5g1bBqoV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=pR5g1bBqoV", "openreview": "https://openreview.net/forum?id=pR5g1bBqoV", "poster": "/media/PosterPDFs/NeurIPS%202024/93553.png?t=1732206509.2867923", "project": "", "author_site": "Moritz Haas, Jin Xu, Volkan Cevher, Leena Chennuru Vankadara", "tldr": "", "abstract": "Sharpness Aware Minimization (SAM) enhances performance across various neural architectures and datasets. As models are continually scaled up to improve performance, a rigorous understanding of SAM\u2019s scaling behaviour is paramount. To this end, we study the infinite-width limit of neural networks trained with SAM, using the Tensor Programs framework. Our findings reveal that the dynamics of standard SAM effectively reduce to applying SAM solely in the last layer in wide neural networks, even with optimal hyperparameters. In contrast, we identify a stable parameterization with layerwise perturbation scaling, which we call *Maximal Update and Perturbation Parameterization* ($\\mu$P$^2$), that ensures all layers are both feature learning and effectively perturbed in the limit. Through experiments with MLPs, ResNets and Vision Transformers, we empirically demonstrate that $\\mu$P$^2$ is the first parameterization to achieve hyperparameter transfer of the joint optimum of learning rate and perturbation radius across model scales. Moreover, we provide an intuitive condition to derive $\\mu$P$^2$ for other perturbation rules like Adaptive SAM and SAM-ON, also ensuring balanced perturbation effects across all layers.", "keywords": "Deep Learning Theory;Optimal Hyperparameter Transfer;Sharpness Aware Minimization;Infinite Width Limits;Signal Propagation Theory;Tensor Programs", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Moritz Haas;Jin Xu;Volkan Cevher;Leena Chennuru Vankadara", "authorids": "~Moritz_Haas1;~Jin_Xu7;~Volkan_Cevher1;~Leena_Chennuru_Vankadara2", "gender": ";M;M;F", "homepage": "https://www.tml.cs.uni-tuebingen.de/team/haas/index.php;https://jinxu06.github.io/;http://lions.epfl.ch;https://leenacvankadara.com", "dblp": "332/4834;97/3265-11;70/5301;", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;b5JQt5QAAAAJ;https://scholar.google.ch/citations?user=hlWhzU8AAAAJ;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Moritz_Haas1;~Jin_Xu7;~Volkan_Cevher1;~Leena_Chennuru_Vankadara2", "aff": "Amazon;Microsoft;Amazon Development Center Germany;Amazon Development Center Germany", "aff_domain": "amazon.com;microsoft.com;amazon.de;amazon.de", "position": "Intern;Researcher;Amazon Scholar;Applied Scientist II", "bibtex": "@inproceedings{\nhaas2024boldsymbolmumathbfp,\ntitle={\\${\\textbackslash}boldsymbol\\{{\\textbackslash}mu\\}{\\textbackslash}mathbf\\{P{\\textasciicircum}2\\}\\$: Effective Sharpness Aware Minimization Requires Layerwise Perturbation Scaling},\nauthor={Moritz Haas and Jin Xu and Volkan Cevher and Leena Chennuru Vankadara},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=pR5g1bBqoV}\n}", "github": "", "reviewers": "dLWn;Tz6c;egBf;CqBd", "pdf_size": 4688560, "rating": "4;6;6;7", "confidence": "3;3;5;2", "soundness": "2;3;3;3", "novelty": "3;2;3;3", "presentation": "1;2;2;3", "wc_summary": "52;70;68;90", "wc_strengths": "15;75;66;20", "wc_weaknesses": "223;103;436;34", "wc_questions": "44;138;40;1", "wc_limitations": "10;7;14;1", "wc_review": "344;393;624;146", "wc_reply_reviewers": "135;25;521;0", "wc_reply_authors": "452;25;403;0", "reply_reviewers": "1;1;2;0", "reply_authors": "2;2;2;1", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.25, 1.0897247358851685 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 70.0, 13.490737563232042 ], "wc_strengths_avg": [ 44.0, 26.74883175019051 ], "wc_weaknesses_avg": [ 199.0, 152.63191016298 ], "wc_questions_avg": [ 55.75, 50.3705022806007 ], "wc_limitations_avg": [ 8.0, 4.743416490252569 ], "wc_review_avg": [ 376.75, 170.084354071737 ], "wc_reply_reviewers_avg": [ 170.25, 208.77664500609256 ], "wc_reply_authors_avg": [ 220.0, 208.40945276066535 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 33, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.15789473684210528, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "email": "amazon.com;microsoft.com;amazon.de;amazon.de", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Amazon;Microsoft", "aff_unique_dep": "Amazon.com, Inc.;Microsoft Corporation", "aff_unique_url": "https://www.amazon.com;https://www.microsoft.com", "aff_unique_abbr": "Amazon;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1", "aff_country_unique": "United States;Germany" }, { "title": "Transformers as Game Players: Provable In-context Game-playing Capabilities of Pre-trained Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93552", "id": "pRQmRaonxf", "proceeding": "", "pdf": "https://openreview.net/pdf?id=pRQmRaonxf", "openreview": "https://openreview.net/forum?id=pRQmRaonxf", "poster": "", "project": "", "author_site": "Chengshuai Shi, Kun Yang, Jing Yang, Cong Shen", "tldr": "", "abstract": "The in-context learning (ICL) capability of pre-trained models based on the transformer architecture has received growing interest in recent years. While theoretical understanding has been obtained for ICL in reinforcement learning (RL), the previous results are largely confined to the single-agent setting. This work proposes to further explore the in-context learning capabilities of pre-trained transformer models in competitive multi-agent games, i.e., in-context game-playing (ICGP). Focusing on the classical two-player zero-sum games, theoretical guarantees are provided to demonstrate that pre-trained transformers can provably learn to approximate Nash equilibrium in an in-context manner for both decentralized and centralized learning settings. As a key part of the proof, constructional results are established to demonstrate that the transformer architecture is sufficiently rich to realize celebrated multi-agent game-playing algorithms, in particular, decentralized V-learning and centralized VI-ULCB.", "keywords": "In-context Learning; Multi-agent Competitive Games; Transformers; Decision-making", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Chengshuai Shi;Kun Yang;Jing Yang;Cong Shen", "authorids": "~Chengshuai_Shi1;~Kun_Yang7;~Jing_Yang3;~Cong_Shen1", "gender": "M;M;;M", "homepage": "https://chengshuai-shi.github.io/;;http://www.ee.psu.edu/yang;https://cshen317.github.io/", "dblp": "259/3938;;;79/6027-1.html", "google_scholar": "twvDiW8AAAAJ;-BzQrlgAAAAJ;https://scholar.google.com/citations?hl=en;70LBhKcAAAAJ", "orcid": "0000-0002-2727-8251;;;0000-0002-3148-4453", "linkedin": ";;;cong-shen-3372404/", "or_profile": "~Chengshuai_Shi1;~Kun_Yang7;~Jing_Yang3;~Cong_Shen1", "aff": "University of Virginia;University of Virginia, Charlottesville;Pennsylvania State University;University of Virginia", "aff_domain": "virginia.edu;virginia.edu;psu.edu;virginia.edu", "position": "PhD student;PhD student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nshi2024transformers,\ntitle={Transformers as Game Players: Provable In-context Game-playing Capabilities of Pre-trained Models},\nauthor={Chengshuai Shi and Kun Yang and Jing Yang and Cong Shen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=pRQmRaonxf}\n}", "github": "", "reviewers": "yLUv;h1zf;beky;kZH2", "pdf_size": 1059734, "rating": "6;6;7;8", "confidence": "2;3;2;2", "soundness": "3;3;3;4", "novelty": "2;3;3;4", "presentation": "3;3;3;4", "wc_summary": "75;155;40;133", "wc_strengths": "63;90;100;370", "wc_weaknesses": "102;68;145;625", "wc_questions": "95;53;2;195", "wc_limitations": "8;25;12;1", "wc_review": "343;391;299;1324", "wc_reply_reviewers": "21;46;19;52", "wc_reply_authors": "20;34;20;43", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 2.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 100.75, 45.65290242689943 ], "wc_strengths_avg": [ 155.75, 124.43547524721397 ], "wc_weaknesses_avg": [ 235.0, 226.81380028560872 ], "wc_questions_avg": [ 86.25, 70.89913610193004 ], "wc_limitations_avg": [ 11.5, 8.73212459828649 ], "wc_review_avg": [ 589.25, 425.45409564370163 ], "wc_reply_reviewers_avg": [ 34.5, 14.67140075112121 ], "wc_reply_authors_avg": [ 29.25, 9.781998773256925 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2778542005279670482&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "virginia.edu;virginia.edu;psu.edu;virginia.edu", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Virginia;Pennsylvania State University", "aff_unique_dep": ";", "aff_unique_url": "https://www.virginia.edu;https://www.psu.edu", "aff_unique_abbr": "UVA;PSU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Charlottesville", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Learning Partitions from Context", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93551", "id": "pRSgf5VdD0", "proceeding": "", "pdf": "https://openreview.net/pdf?id=pRSgf5VdD0", "openreview": "https://openreview.net/forum?id=pRSgf5VdD0", "poster": "", "project": "", "tldr": "", "abstract": "In this paper, we study the problem of learning the structure of a discrete set of $N$ tokens based on their interactions with other tokens. We focus on a setting where the tokens can be partitioned into a small number of classes, and there exists a real-valued function $f$ defined on certain sets of tokens. This function, which captures the interactions between tokens, depends only on the class memberships of its arguments. The goal is to recover the class memberships of all tokens from a finite number of samples of $f$. We begin by analyzing this problem from both complexity-theoretic and information-theoretic viewpoints. We prove that it is NP-complete in general, and for random instances, we show that on the order of $N\\ln(N)$ samples, implying very sparse interactions, suffice to identify the partition. We then investigate the conditions under which gradient flow dynamics of token embeddings can reveal the class structure, finding that this is achievable in certain settings when given on the order of $N^2\\ln^2(N)$ samples.", "keywords": "clustering;gradient-flow;token embeddings;partitions;complexity;sample complexity", "primary_area": "learning_theory", "supplementary_material": "", "author": "Simon Buchholz", "authorids": "~Simon_Buchholz1", "gender": "", "homepage": "https://www.is.mpg.de/person/sbuchholz", "dblp": "207/9068", "google_scholar": "", "orcid": "", "linkedin": "", "or_profile": "~Simon_Buchholz1", "aff": "Max-Planck Institute", "aff_domain": "mpg.de", "position": "Postdoc", "bibtex": "@inproceedings{\nbuchholz2024learning,\ntitle={Learning Partitions from Context},\nauthor={Simon Buchholz},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=pRSgf5VdD0}\n}", "github": "", "reviewers": "w8Q8;8unk;Rx9k;NSnU", "pdf_size": 593389, "rating": "5;6;6;7", "confidence": "2;4;4;3", "soundness": "2;4;3;4", "novelty": "3;3;3;3", "presentation": "2;4;3;3", "wc_summary": "86;120;34;57", "wc_strengths": "29;116;20;106", "wc_weaknesses": "62;154;17;184", "wc_questions": "66;39;21;110", "wc_limitations": "22;10;1;7", "wc_review": "265;439;93;464", "wc_reply_reviewers": "41;10;33;10", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 74.25, 32.205395510690444 ], "wc_strengths_avg": [ 67.75, 43.51077452769601 ], "wc_weaknesses_avg": [ 104.25, 67.5143503264306 ], "wc_questions_avg": [ 59.0, 33.51865152418874 ], "wc_limitations_avg": [ 10.0, 7.648529270389178 ], "wc_review_avg": [ 315.25, 149.4663423650957 ], "wc_reply_reviewers_avg": [ 23.5, 13.793114224133722 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.4264014327112209, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:DRnLzRcUus8J:scholar.google.com/&scioq=Learning+Partitions+from+Context&hl=en&as_sdt=0,33", "gs_version_total": 2, "email": "mpg.de", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "Max-Planck-Gesellschaft zur F\u00f6rderung der Wissenschaften e.V.", "aff_unique_dep": "", "aff_unique_url": "https://www.mpg.de", "aff_unique_abbr": "MPG", "aff_country_unique_index": "0", "aff_country_unique": "Germany" }, { "title": "Causal Dependence Plots", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93550", "id": "pU0z2sNM1M", "proceeding": "", "pdf": "https://openreview.net/pdf?id=pU0z2sNM1M", "openreview": "https://openreview.net/forum?id=pU0z2sNM1M", "poster": "/media/PosterPDFs/NeurIPS%202024/93550.png?t=1733956243.747561", "project": "", "author_site": "Joshua Loftus, Lucius Bynum, Sakina Hansen", "tldr": "", "abstract": "To use artificial intelligence and machine learning models wisely we must understand how they interact with the world, including how they depend causally on data inputs. In this work we develop Causal Dependence Plots (CDPs) to visualize how a model's predicted outcome depends on changes in a given predictor *along with consequent causal changes in other predictor variables*. Crucially, this differs from standard methods based on independence or holding other predictors constant, such as regression coefficients or Partial Dependence Plots (PDPs). Our explanatory framework generalizes PDPs, including them as a special case, as well as a variety of other interpretive plots that show, for example, the total, direct, and indirect effects of causal mediation. We demonstrate with simulations and real data experiments how CDPs can be combined in a modular way with methods for causal learning or sensitivity analysis. Since people often think causally about input-output dependence, CDPs can be powerful tools in the xAI or interpretable machine learning toolkit and contribute to applications like scientific machine learning and algorithmic fairness.", "keywords": "Interpretable machine learning;interpretability;explainable AI;explainability;causality;partial dependence plots;total dependence plots;model agnostic explanations", "primary_area": "interpretability_and_explainability", "supplementary_material": "/attachment/45f40fd01b23181ce54a7c57fb1c88e88cdc2669.zip", "author": "Joshua R. Loftus;Lucius E.J. Bynum;Sakina Hansen", "authorids": "~Joshua_R._Loftus1;~Lucius_E.J._Bynum1;~Sakina_Hansen1", "gender": "M;;F", "homepage": "http://joshualoftus.com/;;https://www.linkedin.com/in/sakinahansen/", "dblp": "198/0795;;", "google_scholar": "SIbr3XUAAAAJ;;", "orcid": "0000-0002-2905-1632;;", "linkedin": ";;", "or_profile": "~Joshua_R._Loftus1;~Lucius_E.J._Bynum1;~Sakina_Hansen1", "aff": "London School of Economics;;London School of Economics and Political Science, University of London", "aff_domain": "lse.ac.uk;;lse.ac.uk", "position": "Assistant Professor;;PhD student", "bibtex": "@inproceedings{\nloftus2024causal,\ntitle={Causal Dependence Plots},\nauthor={Joshua R. Loftus and Lucius E.J. Bynum and Sakina Hansen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=pU0z2sNM1M}\n}", "github": "", "reviewers": "GtVe;M8fQ;o4Ea;6pua", "pdf_size": 2684085, "rating": "6;6;6;6", "confidence": "3;3;5;4", "soundness": "3;2;3;3", "novelty": "2;2;3;2", "presentation": "3;3;4;2", "wc_summary": "47;190;98;130", "wc_strengths": "87;143;90;92", "wc_weaknesses": "108;142;180;1439", "wc_questions": "41;143;42;10", "wc_limitations": "2;58;68;75", "wc_review": "285;676;478;1746", "wc_reply_reviewers": "16;314;149;36", "wc_reply_authors": "49;285;576;88", "reply_reviewers": "2;2;2;1", "reply_authors": "2;3;3;3", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 116.25, 51.85737652446371 ], "wc_strengths_avg": [ 103.0, 23.16246964380094 ], "wc_weaknesses_avg": [ 467.25, 561.6179194968764 ], "wc_questions_avg": [ 59.0, 50.174694817208405 ], "wc_limitations_avg": [ 50.75, 28.78693279944913 ], "wc_review_avg": [ 796.25, 565.4964080345692 ], "wc_reply_reviewers_avg": [ 128.75, 118.36674997650312 ], "wc_reply_authors_avg": [ 249.5, 208.653420772342 ], "reply_reviewers_avg": [ 1.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4901746401825241180&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "lse.ac.uk;;lse.ac.uk", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "London School of Economics;London School of Economics and Political Science", "aff_unique_dep": ";", "aff_unique_url": "https://www.lse.ac.uk;https://www.lse.ac.uk", "aff_unique_abbr": "LSE;LSE", "aff_campus_unique_index": "1", "aff_campus_unique": ";London", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "title": "UltraMedical: Building Specialized Generalists in Biomedicine", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97506", "id": "pUcTrjRLOM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=pUcTrjRLOM", "openreview": "https://openreview.net/forum?id=pUcTrjRLOM", "poster": "/media/PosterPDFs/NeurIPS%202024/97506.png?t=1731427821.5570474", "project": "", "author_site": "Kaiyan Zhang, Sihang Zeng, Ermo Hua, Ning Ding, Zhang-Ren Chen, Zhiyuan Ma, Haoxin Li, Ganqu Cui, Biqing Qi, Xuekai Zhu, Xingtai Lv, Hu Jinfang, Zhiyuan Liu, Bowen Zhou", "tldr": "", "abstract": "Large Language Models (LLMs) have demonstrated remarkable capabilities across various domains and are moving towards more specialized areas. Recent advanced proprietary models such as GPT-4 and Gemini have achieved significant advancements in biomedicine, which have also raised privacy and security challenges. The construction of specialized generalists hinges largely on high-quality datasets, enhanced by techniques like supervised fine-tuning and reinforcement learning from human or AI feedback, and direct preference optimization. However, these leading technologies (e.g., preference learning) are still significantly limited in the open source community due to the scarcity of specialized data. In this paper, we present the UltraMedical collections, which consist of high-quality manual and synthetic datasets in the biomedicine domain, featuring preference annotations across multiple advanced LLMs. By utilizing these datasets, we fine-tune a suite of specialized medical models based on Llama-3 series, demonstrating breathtaking capabilities across various medical benchmarks. Moreover, we develop powerful reward models skilled in biomedical and general reward benchmark, enhancing further online preference learning within the biomedical LLM community.", "keywords": "Large Language Models;Preference Learning;BioMedicine", "primary_area": "", "supplementary_material": "/attachment/31f1badb1f14c741b2416494643075889507b5ee.pdf", "author": "Kaiyan Zhang;Sihang Zeng;Ermo Hua;Ning Ding;Zhang-Ren Chen;Zhiyuan Ma;Haoxin Li;Ganqu Cui;Biqing Qi;Xuekai Zhu;Xingtai Lv;Hu Jinfang;Zhiyuan Liu;Bowen Zhou", "authorids": "~Kaiyan_Zhang1;~Sihang_Zeng1;~Ermo_Hua1;~Ning_Ding5;~Zhang-Ren_Chen1;~Zhiyuan_Ma1;~Haoxin_Li5;~Ganqu_Cui1;~Biqing_Qi1;~Xuekai_Zhu1;~Xingtai_Lv1;~Hu_Jinfang1;~Zhiyuan_Liu1;~Bowen_Zhou8", "gender": "M;M;M;M;M;M;M;M;M;M;;F;M;", "homepage": "https://iseesaw.github.io/;https://zengsihang.github.io/;https://messihua.zone;https://www.stingning.cn/;;;https://github.com/hanqdemo;https://cgq15.github.io/;https://biqing-qi.github.io/;;https://github.com/telxt/telxt.github.io;https://xueshu.baidu.com/scholarID/CN-BQ75BE2J;http://nlp.csai.tsinghua.edu.cn/~lzy;http://web.ee.tsinghua.edu.cn/zhoubowen/zh_CN/index.htm?eqid=b894e49b0000ec7d0000000464857b51", "dblp": ";;372/0768.html;;;138/5978-5;;232/3064;233/4949.html;327/9656;351/0835;;53/3245-1;", "google_scholar": "https://scholar.google.com/citations?hl=en-US;yhMm2S0AAAAJ;;uZXQuYAAAAAJ;;https://scholar.google.com.hk/citations?hl=zh-CN;;3IVSzZgAAAAJ;;plXXtQkAAAAJ;Q3a25IEAAAAJ;;dT0v5u0AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0009-0001-0595-3084;;;;0000-0002-1520-3519;;;;0000-0002-4072-0577;;;;0000-0002-7709-2543;0000-0003-1062-9526", "linkedin": ";;;;;;;;;;;;;", "or_profile": "~Kaiyan_Zhang1;~Sihang_Zeng1;~Ermo_Hua1;~Ning_Ding5;~Zhang-Ren_Chen1;~Zhiyuan_Ma1;~Haoxin_Li5;~Ganqu_Cui1;~Biqing_Qi1;~Xuekai_Zhu1;~Xingtai_Lv1;~Hu_Jinfang1;~Zhiyuan_Liu1;~Bowen_Zhou8", "aff": "Tsinghua University;University of Washington;Tsinghua University;Tsinghua University;Nanchang University;Tsinghua University;Tsinghua University;Tsinghua University;Harbin Institute of Technology;Shanghai Jiaotong University;Tsinghua University;;Tsinghua University;Tsinghua University", "aff_domain": "mails.tsinghua.edu.cn;uw.edu;tsinghua.edu.cn;mail.tsinghua.edu.cn;ncu.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;hit.edu.cn;sjtu.edu.cn;mails.tsinghua.edu.cn;;tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;PhD student;PhD student;Postdoc;Researcher;Postdoc;Undergrad student;PhD student;PhD student;PhD student;Undergrad student;;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nzhang2024ultramedical,\ntitle={UltraMedical: Building Specialized Generalists in Biomedicine},\nauthor={Kaiyan Zhang and Sihang Zeng and Ermo Hua and Ning Ding and Zhang-Ren Chen and Zhiyuan Ma and Haoxin Li and Ganqu Cui and Biqing Qi and Xuekai Zhu and Xingtai Lv and Hu Jinfang and Zhiyuan Liu and Bowen Zhou},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=pUcTrjRLOM}\n}", "github": "", "reviewers": "AmEP;QMZQ;ipWt;n4GX", "pdf_size": 5509842, "rating": "7;8;8;9", "confidence": "4;4;3;4", "wc_summary_and_contributions": "57;94;214;120", "wc_strengths": "2;61;174;35", "wc_improvement": "94;44;42;31", "wc_limitations": "1;40;12;1", "wc_correctness": "1;8;15;1", "wc_clarity": "1;5;12;1", "wc_relation_to_prior_work": "1;7;28;17", "wc_documentation": "1;3;56;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "159;263;554;208", "wc_reply_reviewers": "14;27;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 8.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 121.25, 58.04039541560688 ], "wc_strengths_avg": [ 68.0, 64.67225061802009 ], "wc_improvement_avg": [ 52.75, 24.324627438051337 ], "wc_limitations_avg": [ 13.5, 15.945218719101975 ], "wc_correctness_avg": [ 6.25, 5.80409338312195 ], "wc_clarity_avg": [ 4.75, 4.493050188902857 ], "wc_relation_to_prior_work_avg": [ 13.25, 10.256095748383007 ], "wc_documentation_avg": [ 15.25, 23.54118731075389 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 296.0, 153.43239553627518 ], "wc_reply_reviewers_avg": [ 10.25, 11.233320969330485 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 14, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3484156452660624146&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "mails.tsinghua.edu.cn;uw.edu;tsinghua.edu.cn;mail.tsinghua.edu.cn;ncu.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;hit.edu.cn;sjtu.edu.cn;mails.tsinghua.edu.cn;;tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 14, "aff_unique_index": "0;1;0;0;2;0;0;0;3;4;0;0;0", "aff_unique_norm": "Tsinghua University;University of Washington;Nanchang University;Harbin Institute of Technology;Shanghai Jiao Tong University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.washington.edu;https://www.ncu.edu.cn;http://www.hit.edu.cn/;https://www.sjtu.edu.cn", "aff_unique_abbr": "THU;UW;NCU;HIT;SJTU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Harbin", "aff_country_unique_index": "0;1;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China;United States" }, { "title": "Learning to Merge Tokens via Decoupled Embedding for Efficient Vision Transformers", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93549", "id": "pVPyCgXv57", "proceeding": "", "pdf": "https://openreview.net/pdf?id=pVPyCgXv57", "openreview": "https://openreview.net/forum?id=pVPyCgXv57", "poster": "/media/PosterPDFs/NeurIPS%202024/93549.png?t=1733708921.1355414", "project": "", "author_site": "Dong Hoon Lee, Seunghoon Hong", "tldr": "", "abstract": "Recent token reduction methods for Vision Transformers (ViTs) incorporate token merging, which measures the similarities between token embeddings and combines the most similar pairs.\nHowever, their merging policies are directly dependent on intermediate features in ViTs, which prevents exploiting features tailored for merging and requires end-to-end training to improve token merging.\nIn this paper, we propose Decoupled Token Embedding for Merging (DTEM) that enhances token merging through a decoupled embedding learned via a continuously relaxed token merging process.\nOur method introduces a lightweight embedding module decoupled from the ViT forward pass to extract dedicated features for token merging, thereby addressing the restriction from using intermediate features.\nThe continuously relaxed token merging, applied during training, enables us to learn the decoupled embeddings in a differentiable manner.\nThanks to the decoupled structure, our method can be seamlessly integrated into existing ViT backbones and trained either modularly by learning only the decoupled embeddings or end-to-end by fine-tuning. \nWe demonstrate the applicability of DTEM on various tasks, including classification, captioning, and segmentation, with consistent improvement in token merging.\nEspecially in the ImageNet-1k classification, DTEM achieves a 37.2\\% reduction in FLOPs while maintaining a top-1 accuracy of 79.85\\% with DeiT-small.", "keywords": "token merging;token reduction method;efficient inference;vision transformer", "primary_area": "machine_vision", "supplementary_material": "", "author": "Dong Hoon Lee;Seunghoon Hong", "authorids": "~Dong_Hoon_Lee1;~Seunghoon_Hong2", "gender": "M;M", "homepage": "https://movinghoon.github.io;https://maga33.github.io/", "dblp": "99/6872;142/3014.html", "google_scholar": "fbHhzWsAAAAJ;hvr3ALkAAAAJ", "orcid": ";", "linkedin": ";seunghoon-hong-194489a4/", "or_profile": "~Dong_Hoon_Lee1;~Seunghoon_Hong1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nlee2024learning,\ntitle={Learning to Merge Tokens via Decoupled Embedding for Efficient Vision Transformers},\nauthor={Dong Hoon Lee and Seunghoon Hong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=pVPyCgXv57}\n}", "github": "", "reviewers": "FV5y;wYAw;oGzP;PcYu", "pdf_size": 2614019, "rating": "4;5;6;6", "confidence": "4;5;5;3", "soundness": "2;3;3;2", "novelty": "2;3;2;2", "presentation": "3;3;3;3", "wc_summary": "61;31;86;74", "wc_strengths": "144;39;57;34", "wc_weaknesses": "168;151;61;99", "wc_questions": "98;9;108;43", "wc_limitations": "4;26;24;4", "wc_review": "475;256;336;254", "wc_reply_reviewers": "0;67;0;44", "wc_reply_authors": "0;302;0;52", "reply_reviewers": "0;1;0;1", "reply_authors": "1;2;1;2", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 63.0, 20.481699148264042 ], "wc_strengths_avg": [ 68.5, 44.421278684882544 ], "wc_weaknesses_avg": [ 119.75, 42.387350707492914 ], "wc_questions_avg": [ 64.5, 40.48765243873742 ], "wc_limitations_avg": [ 14.5, 10.523782589924593 ], "wc_review_avg": [ 330.25, 89.87873775259641 ], "wc_reply_reviewers_avg": [ 27.75, 28.9169068193678 ], "wc_reply_authors_avg": [ 88.5, 125.0789750517648 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.0909090909090909, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14975733250620010142&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "kaist.ac.kr;kaist.ac.kr", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "title": "ReMoDetect: Reward Models Recognize Aligned LLM's Generations", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93548", "id": "pW9Jwim918", "proceeding": "", "pdf": "https://openreview.net/pdf?id=pW9Jwim918", "openreview": "https://openreview.net/forum?id=pW9Jwim918", "poster": "/media/PosterPDFs/NeurIPS%202024/93548.png?t=1731390959.7087133", "project": "", "author_site": "Hyunseok Lee, Jihoon Tack, Jinwoo Shin", "tldr": "", "abstract": "The remarkable capabilities and easy accessibility of large language models (LLMs) have significantly increased societal risks (e.g., fake news generation), necessitating the development of LLM-generated text (LGT) detection methods for safe usage. However, detecting LGTs is challenging due to the vast number of LLMs, making it impractical to account for each LLM individually; hence, it is crucial to identify the common characteristics shared by these models. In this paper, we draw attention to a common feature of recent powerful LLMs, namely the alignment training, i.e., training LLMs to generate human-preferable texts. Our key finding is that as these aligned LLMs are trained to maximize the human preferences, they generate texts with higher estimated preferences even than human-written texts; thus, such texts are easily detected by using the reward model (i.e., an LLM trained to model human preference distribution). Based on this finding, we propose two training schemes to further improve the detection ability of the reward model, namely (i) continual preference fine-tuning to make reward model prefer aligned LGTs even further and (ii) reward modeling of Human/LLM mixed texts (a rephrased texts from human-written texts using aligned LLMs), which serves as a median preference text corpus between LGTs and human-written texts to learn the decision boundary better. We provide an extensive evaluation by considering six text domains across twelve aligned LLMs, where our method demonstrates state-of-the-art results.", "keywords": "Large Language Model;LLM Generated Text Detection;Reward Model", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/09566f45889e628ec12dab055fec86d2deeda0fd.zip", "author": "Hyunseok Lee;Jihoon Tack;Jinwoo Shin", "authorids": "~Hyunseok_Lee1;~Jihoon_Tack1;~Jinwoo_Shin1", "gender": "M;M;M", "homepage": ";https://jihoontack.github.io;https://sites.google.com/site/mijirim/", "dblp": ";267/5487;31/7062", "google_scholar": ";eW8-OT4AAAAJ;https://scholar.google.com.tw/citations?user=m3eDp7kAAAAJ", "orcid": ";;", "linkedin": "\ud604\uc11d-\uc774-9a07351a7/;;", "or_profile": "~Hyunseok_Lee1;~Jihoon_Tack1;~Jinwoo_Shin1", "aff": "Korea Advanced Institute of Science & Technology;Meta FAIR;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;meta.com;kaist.ac.kr", "position": "MS student;Intern;Full Professor", "bibtex": "@inproceedings{\nlee2024remodetect,\ntitle={ReMoDetect: Reward Models Recognize Aligned {LLM}'s Generations},\nauthor={Hyunseok Lee and Jihoon Tack and Jinwoo Shin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=pW9Jwim918}\n}", "github": "", "reviewers": "3CSf;dzQj;kLmr;CEwj", "pdf_size": 1889366, "rating": "5;7;7;7", "confidence": "3;4;4;3", "soundness": "3;3;3;3", "novelty": "3;3;4;3", "presentation": "3;3;4;3", "wc_summary": "45;74;71;51", "wc_strengths": "50;44;31;105", "wc_weaknesses": "151;85;19;64", "wc_questions": "93;16;41;80", "wc_limitations": "14;1;85;10", "wc_review": "353;220;247;310", "wc_reply_reviewers": "0;89;0;0", "wc_reply_authors": "0;200;0;0", "reply_reviewers": "0;2;0;0", "reply_authors": "1;3;1;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 60.25, 12.47747971346778 ], "wc_strengths_avg": [ 57.5, 28.271009886454358 ], "wc_weaknesses_avg": [ 79.75, 47.54668758178639 ], "wc_questions_avg": [ 57.5, 30.663496212923928 ], "wc_limitations_avg": [ 27.5, 33.529837458597974 ], "wc_review_avg": [ 282.5, 52.18476789255654 ], "wc_reply_reviewers_avg": [ 22.25, 38.53813046840752 ], "wc_reply_authors_avg": [ 50.0, 86.60254037844386 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2431418596719991807&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "kaist.ac.kr;meta.com;kaist.ac.kr", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://www.kaist.ac.kr;https://meta.com", "aff_unique_abbr": "KAIST;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "South Korea;United States" }, { "title": "E-Motion: Future Motion Simulation via Event Sequence Diffusion", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93547", "id": "pWowK7jqok", "proceeding": "", "pdf": "https://openreview.net/pdf?id=pWowK7jqok", "openreview": "https://openreview.net/forum?id=pWowK7jqok", "poster": "/media/PosterPDFs/NeurIPS%202024/93547.png?t=1731487595.1686604", "project": "", "author_site": "Song Wu, Zhiyu Zhu, Junhui Hou, GUANGMING Shi, Jinjian Wu", "tldr": "", "abstract": "Forecasting a typical object's future motion is a critical task for interpreting and interacting with dynamic environments in computer vision. Event-based sensors, which could capture changes in the scene with exceptional temporal granularity, may potentially offer a unique opportunity to predict future motion with a level of detail and precision previously unachievable. Inspired by that, we propose to integrate the strong learning capacity of the video diffusion model with the rich motion information of an event camera as a motion simulation framework. Specifically, we initially employ pre-trained stable video diffusion models to adapt the event sequence dataset. This process facilitates the transfer of extensive knowledge from RGB videos to an event-centric domain. Moreover, we introduce an alignment mechanism that utilizes reinforcement learning techniques to enhance the reverse generation trajectory of the diffusion model, ensuring improved performance and accuracy. Through extensive testing and validation, we demonstrate the effectiveness of our method in various complex scenarios, showcasing its potential to revolutionize motion flow prediction in computer vision applications such as autonomous vehicle guidance, robotic navigation, and interactive media. Our findings suggest a promising direction for future research in enhancing the interpretative power and predictive accuracy of computer vision systems. The source code is\npublicly available at https://github.com/p4r4mount/E-Motion.", "keywords": "Event-based vision;video diffusion model", "primary_area": "machine_vision", "supplementary_material": "/attachment/b7cd23bba2a4e949afb6a1b5dde875bca9559798.zip", "author": "Song Wu;Zhiyu Zhu;Junhui Hou;Guangming Shi;Jinjian Wu", "authorids": "~Song_Wu7;~Zhiyu_Zhu1;~Junhui_Hou2;~Guangming_Shi1;~Jinjian_Wu1", "gender": "M;M;M;M;M", "homepage": "https://github.com/p4r4mount;;http://www.cityu.edu.hk/stfprofile/csjhhou.htm;http://see.xidian.edu.cn/faculty/gmshi/;https://web.xidian.edu.cn/wjj/index.html", "dblp": ";;122/2673.html;97/3742;01/8056", "google_scholar": ";d1L0KkoAAAAJ;j6eefhwAAAAJ;11aRt9oAAAAJ;", "orcid": ";0000-0002-0726-4522;0000-0003-3431-2021;;", "linkedin": ";;;;", "or_profile": "~Song_Wu7;~Zhiyu_Zhu1;~Junhui_Hou2;~Guangming_Shi1;~Jinjian_Wu1", "aff": "Xi'an University of Electronic Science and Technology;City University of Hong Kong;City University of Hong Kong;Xidian University;Xidian University", "aff_domain": "stu.xidian.edu.cn;cityu.edu.hk;cityu.edu.hk;xidian.edu.cn;xidian.edu", "position": "PhD student;Postdoc;Assistant Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nwu2024emotion,\ntitle={E-Motion: Future Motion Simulation via Event Sequence Diffusion},\nauthor={Song Wu and Zhiyu Zhu and Junhui Hou and Guangming Shi and Jinjian Wu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=pWowK7jqok}\n}", "github": "", "reviewers": "8wZD;vmNW;AE3a;Yrkm", "pdf_size": 18053887, "rating": "4;5;5;6", "confidence": "4;4;2;4", "soundness": "2;3;1;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "71;78;72;108", "wc_strengths": "207;63;63;79", "wc_weaknesses": "305;128;149;280", "wc_questions": "134;2;31;78", "wc_limitations": "2;1;4;87", "wc_review": "719;272;319;632", "wc_reply_reviewers": "229;0;158;76", "wc_reply_authors": "957;86;49;92", "reply_reviewers": "1;0;1;1", "reply_authors": "6;3;3;4", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 82.25, 15.105876340020794 ], "wc_strengths_avg": [ 103.0, 60.398675482166 ], "wc_weaknesses_avg": [ 215.5, 77.86045209218862 ], "wc_questions_avg": [ 61.25, 49.996874902337645 ], "wc_limitations_avg": [ 23.5, 36.67764987018661 ], "wc_review_avg": [ 485.5, 193.18967363707617 ], "wc_reply_reviewers_avg": [ 115.75, 86.00690379266074 ], "wc_reply_authors_avg": [ 296.0, 381.98363839305995 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 4.0, 1.224744871391589 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:w689M8IYsJgJ:scholar.google.com/&scioq=E-Motion:+Future+Motion+Simulation+via+Event+Sequence+Diffusion&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "stu.xidian.edu.cn;cityu.edu.hk;cityu.edu.hk;xidian.edu.cn;xidian.edu", "author_num": 5, "aff_unique_index": "0;1;1;2;2", "aff_unique_norm": "Xi'an University of Electronic Science and Technology;City University of Hong Kong;Xidian University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.xidian.edu.cn/;https://www.cityu.edu.hk;http://www.xidian.edu.cn/", "aff_unique_abbr": "Xidian University;CityU;Xidian", "aff_campus_unique_index": "0;1;1", "aff_campus_unique": "Xi'an;Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Data Free Backdoor Attacks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93546", "id": "pX71TM2MLh", "proceeding": "", "pdf": "https://openreview.net/pdf?id=pX71TM2MLh", "openreview": "https://openreview.net/forum?id=pX71TM2MLh", "poster": "", "project": "", "author_site": "Bochuan Cao, Jinyuan Jia, Chuxuan Hu, Wenbo Guo, Zhen Xiang, Jinghui Chen, Bo Li, Dawn Song", "tldr": "", "abstract": "Backdoor attacks aim to inject a backdoor into a classifier such that it predicts any input with an attacker-chosen backdoor trigger as an attacker-chosen target class. \nExisting backdoor attacks require either retraining the classifier with some clean data or modifying the model's architecture.\nAs a result, they are 1) not applicable when clean data is unavailable, 2) less efficient when the model is large, and 3) less stealthy due to architecture changes. \nIn this work, we propose DFBA, a novel retraining-free and data-free backdoor attack without changing the model architecture. \nTechnically, our proposed method modifies a few parameters of a classifier to inject a backdoor. \nThrough theoretical analysis, we verify that our injected backdoor is provably undetectable and unremovable by various state-of-the-art defenses under mild assumptions. \nOur evaluation on multiple datasets further demonstrates that our injected backdoor: 1) incurs negligible classification loss, 2) achieves 100\\% attack success rates, and 3) bypasses six existing state-of-the-art defenses. \nMoreover, our comparison with a state-of-the-art non-data-free backdoor attack shows our attack is more stealthy and effective against various defenses while achieving less classification accuracy loss.\nWe will release our code upon paper acceptance.", "keywords": "Data free backdoor attacks", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Bochuan Cao;Jinyuan Jia;Chuxuan Hu;Wenbo Guo;Zhen Xiang;Jinghui Chen;Bo Li;Dawn Song", "authorids": "~Bochuan_Cao1;~Jinyuan_Jia2;~Chuxuan_Hu1;~Wenbo_Guo1;~Zhen_Xiang1;~Jinghui_Chen1;~Bo_Li19;~Dawn_Song1", "gender": ";;F;M;M;M;F;F", "homepage": "https://aaaaaasuka.github.io/;https://jinyuan-jia.github.io/;;https://henrygwb.github.io/;https://zhenxianglance.github.io/;https://jinghuichen.github.io/;http://boli.cs.illinois.edu/;", "dblp": "334/3881;24/5124-1.html;332/5523;144/1238-2.html;20/2799.html;67/5633;50/3402-26;s/DXSong", "google_scholar": "eOZCg2IAAAAJ;iyg4ytkAAAAJ;3O_PDBkAAAAJ;KyPheRMAAAAJ;https://scholar.google.com/citations?hl=en;mKia7Y4AAAAJ;K8vJkTcAAAAJ;", "orcid": ";0000-0002-9785-7769;;;;;;", "linkedin": ";;chuxuan-hu-57bb481b3/;;;;;", "or_profile": "~Bochuan_Cao1;~Jinyuan_Jia2;~Chuxuan_Hu1;~Wenbo_Guo1;~Zhen_Xiang1;~Jinghui_Chen1;~Bo_Li19;~Dawn_Song1", "aff": "Pennsylvania State University;Pennsylvania State University;University of Illinois, Urbana Champaign;University of California, Santa Barbara;University of Illinois, Urbana Champaign;Pennsylvania State University;University of Illinois, Urbana Champaign;University of California, Berkeley", "aff_domain": "psu.edu;psu.edu;illinois.edu;ucsb.edu;illinois.edu;psu.edu;illinois.edu;berkeley.edu", "position": "PhD student;Assistant Professor;PhD student;Assistant Professor;Postdoc;Assistant Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\ncao2024data,\ntitle={Data Free Backdoor Attacks},\nauthor={Bochuan Cao and Jinyuan Jia and Chuxuan Hu and Wenbo Guo and Zhen Xiang and Jinghui Chen and Bo Li and Dawn Song},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=pX71TM2MLh}\n}", "github": "", "reviewers": "Npgg;AzFZ;NvZc;dAVA;f5oW", "pdf_size": 711192, "rating": "5;5;6;6;7", "confidence": "5;4;4;3;4", "soundness": "2;3;2;4;3", "novelty": "3;3;3;3;3", "presentation": "3;3;4;2;3", "wc_summary": "67;117;122;82;87", "wc_strengths": "22;51;90;57;53", "wc_weaknesses": "337;118;376;155;208", "wc_questions": "54;27;152;91;6", "wc_limitations": "55;3;56;5;6", "wc_review": "535;316;796;390;360", "wc_reply_reviewers": "150;62;21;27;19", "wc_reply_authors": "638;393;25;23;21", "reply_reviewers": "2;1;1;1;1", "reply_authors": "3;3;2;2;2", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 95.0, 21.118712081942874 ], "wc_strengths_avg": [ 54.6, 21.62036077404815 ], "wc_weaknesses_avg": [ 238.8, 101.02554132495406 ], "wc_questions_avg": [ 66.0, 51.548035850069006 ], "wc_limitations_avg": [ 25.0, 24.92388412747901 ], "wc_review_avg": [ 479.4, 174.5137243886566 ], "wc_reply_reviewers_avg": [ 55.8, 49.61209529943278 ], "wc_reply_authors_avg": [ 220.0, 253.41191763608907 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 2.4, 0.4898979485566356 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.42257712736425823, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8807784465752410104&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "psu.edu;psu.edu;illinois.edu;ucsb.edu;illinois.edu;psu.edu;illinois.edu;berkeley.edu", "author_num": 8, "aff_unique_index": "0;0;1;2;1;0;1;3", "aff_unique_norm": "Pennsylvania State University;University of Illinois Urbana-Champaign;University of California, Santa Barbara;University of California, Berkeley", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.psu.edu;https://illinois.edu;https://www.ucsb.edu;https://www.berkeley.edu", "aff_unique_abbr": "PSU;UIUC;UCSB;UC Berkeley", "aff_campus_unique_index": "1;2;1;1;3", "aff_campus_unique": ";Urbana-Champaign;Santa Barbara;Berkeley", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Multi-Stage Predict+Optimize for (Mixed Integer) Linear Programs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93545", "id": "pXFiHHySEw", "proceeding": "", "pdf": "https://openreview.net/pdf?id=pXFiHHySEw", "openreview": "https://openreview.net/forum?id=pXFiHHySEw", "poster": "/media/PosterPDFs/NeurIPS%202024/93545.png?t=1731419669.8698478", "project": "", "author_site": "Xinyi Hu, Jasper Lee, Jimmy Lee, Peter Stuckey", "tldr": "", "abstract": "The recently-proposed framework of Predict+Optimize tackles optimization problems with parameters that are unknown at solving time, in a supervised learning setting. Prior frameworks consider only the scenario where all unknown parameters are (eventually) revealed simultaneously. In this work, we propose Multi-Stage Predict+Optimize, a novel extension catering to applications where unknown parameters are revealed in sequential stages, with optimization decisions made in between. We further develop three training algorithms for neural networks (NNs) for our framework as proof of concept, both of which handle all mixed integer linear programs. The first baseline algorithm is a natural extension of prior work, training a single NN which makes a single prediction of unknown parameters. The second and third algorithms instead leverage the possibility of updating parameter predictions between stages, and trains one NN per stage. To handle the interdependency between the neural networks, we adopt sequential and parallelized versions of coordinate descent for training. Experimentation on three benchmarks demonstrates the superior learning performance of our methods over classical approaches.", "keywords": "Constraint Optimization;Machine Learning;Predict+Optimize", "primary_area": "optimization", "supplementary_material": "/attachment/fe403c74aef3898516e178f45898f1b78014d78b.zip", "author": "Xinyi HU;Jasper C.H. Lee;Jimmy H.M. Lee;Peter J. Stuckey", "authorids": "~Xinyi_HU2;~Jasper_C.H._Lee1;~Jimmy_H.M._Lee1;~Peter_J._Stuckey1", "gender": "Not Specified;M;M;M", "homepage": "https://elizabethxyhu.github.io/;https://jasperchlee.github.io/;http://www.cse.cuhk.edu.hk/~jlee;https://people.eng.unimelb.edu.au/pstuckey/", "dblp": ";150/4950;l/JimmyHoManLee;s/PeterJStuckey", "google_scholar": "hANa7zAAAAAJ;z0Y4snAAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.au/citations?user=tvFekxwAAAAJ", "orcid": ";;0000-0001-9526-5850;0000-0003-2186-0459", "linkedin": ";;;peter-stuckey-564620109/", "or_profile": "~Xinyi_HU2;~Jasper_C.H._Lee1;~Jimmy_H.M._Lee1;~Peter_J._Stuckey1", "aff": "Department of Computer Science and Engineering;University of Wisconsin - Madison;The Chinese University of Hong Kong;Monash University", "aff_domain": "cse.cuhk.edu.hk;wisc.edu;cse.cuhk.edu.hk;monash.edu", "position": "PhD student;Postdoc;Full Professor;Full Professor", "bibtex": "@inproceedings{\nhu2024multistage,\ntitle={Multi-Stage Predict+Optimize for (Mixed Integer) Linear Programs},\nauthor={Xinyi HU and Jasper C.H. Lee and Jimmy H.M. Lee and Peter J. Stuckey},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=pXFiHHySEw}\n}", "github": "", "reviewers": "aupq;GGkj;fVWn;Bz3V", "pdf_size": 667732, "rating": "3;5;6;6", "confidence": "3;1;4;2", "soundness": "2;3;4;3", "novelty": "2;3;3;2", "presentation": "3;3;3;3", "wc_summary": "273;81;110;50", "wc_strengths": "74;107;70;56", "wc_weaknesses": "451;188;200;109", "wc_questions": "42;26;86;65", "wc_limitations": "5;1;1;126", "wc_review": "845;403;467;406", "wc_reply_reviewers": "0;21;57;232", "wc_reply_authors": "0;23;32;295", "reply_reviewers": "0;1;1;2", "reply_authors": "1;2;2;3", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 2.5, 1.118033988749895 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 128.5, 86.08280896903864 ], "wc_strengths_avg": [ 76.75, 18.699933154960743 ], "wc_weaknesses_avg": [ 237.0, 128.40366038396257 ], "wc_questions_avg": [ 54.75, 22.75274708689041 ], "wc_limitations_avg": [ 33.25, 53.57413088422434 ], "wc_review_avg": [ 530.25, 183.50664156918137 ], "wc_reply_reviewers_avg": [ 77.5, 91.5 ], "wc_reply_authors_avg": [ 87.5, 120.36714668047922 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:COO7PQTTN0YJ:scholar.google.com/&scioq=Multi-Stage+Predict%2BOptimize+for+(Mixed+Integer)+Linear+Programs&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "cse.cuhk.edu.hk;wisc.edu;cse.cuhk.edu.hk;monash.edu", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "University of California, San Diego;University of Wisconsin-Madison;Chinese University of Hong Kong;Monash University", "aff_unique_dep": "Department of Computer Science and Engineering;;;", "aff_unique_url": "https://cse.ucsd.edu;https://www.wisc.edu;https://www.cuhk.edu.hk;https://www.monash.edu", "aff_unique_abbr": "UCSD CSE;UW-Madison;CUHK;Monash", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Madison;Hong Kong SAR", "aff_country_unique_index": "0;0;1;2", "aff_country_unique": "United States;China;Australia" }, { "title": "VidProM: A Million-scale Real Prompt-Gallery Dataset for Text-to-Video Diffusion Models", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97505", "id": "pYNl76onJL", "proceeding": "", "pdf": "https://openreview.net/pdf?id=pYNl76onJL", "openreview": "https://openreview.net/forum?id=pYNl76onJL", "poster": "/media/PosterPDFs/NeurIPS%202024/97505.png?t=1727964696.0452228", "project": "", "author_site": "Wenhao Wang, Yi Yang", "tldr": "", "abstract": "The arrival of Sora marks a new era for text-to-video diffusion models, bringing significant advancements in video generation and potential applications. However, Sora, along with other text-to-video diffusion models, is highly reliant on prompts, and there is no publicly available dataset that features a study of text-to-video prompts. In this paper, we introduce VidProM, the first large-scale dataset comprising 1.67 Million unique text-to-Video Prompts from real users. Additionally, this dataset includes 6.69 million videos generated by four state-of-the-art diffusion models, alongside some related data. We initially discuss the curation of this large-scale dataset, a process that is both time-consuming and costly. Subsequently, we underscore the need for a new prompt dataset specifically designed for text-to-video generation by illustrating how VidProM differs from DiffusionDB, a large-scale prompt-gallery dataset for image generation. Our extensive and diverse dataset also opens up many exciting new research areas. For instance, we suggest exploring text-to-video prompt engineering, efficient video generation, and video copy detection for diffusion models to develop better, more efficient, and safer models. The project (including the collected dataset VidProM and related code) is publicly available at https://vidprom.github.io under the CC-BY-NC 4.0 License.", "keywords": "text-to-video;prompt;diffusion models", "primary_area": "", "supplementary_material": "", "author": "Wenhao Wang;Yi Yang", "authorids": "~Wenhao_Wang2;~Yi_Yang22", "gender": "M;M", "homepage": "http://wangwenhao0716.github.io/;https://person.zju.edu.cn/yiyang", "dblp": ";33/4854-1.html", "google_scholar": "k3mq3XMAAAAJ;RMSuNFwAAAAJ", "orcid": "0000-0001-8727-1572;", "linkedin": ";", "or_profile": "~Wenhao_Wang2;~Yi_Yang22", "aff": "University of Technology Sydney;Zhejiang University", "aff_domain": "uts.edu.au;zju.edu.cn", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nwang2024vidprom,\ntitle={VidProM: A Million-scale Real Prompt-Gallery Dataset for Text-to-Video Diffusion Models},\nauthor={Wenhao Wang and Yi Yang},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=pYNl76onJL}\n}", "github": "", "reviewers": "rjWb;iKQy;74gs", "pdf_size": 9444582, "rating": "6;9;9", "confidence": "3;5;5", "wc_summary_and_contributions": "129;40;62", "wc_strengths": "10;2;67", "wc_improvement": "30;25;52", "wc_limitations": "1;6;13", "wc_correctness": "1;1;1", "wc_clarity": "1;1;6", "wc_relation_to_prior_work": "1;1;19", "wc_documentation": "1;1;6", "wc_additional_feedback": "1;1;1", "wc_review": "175;78;227", "wc_reply_reviewers": "20;21;0", "wc_reply_authors": "22;42;32", "reply_reviewers": "1;2;0", "reply_authors": "2;2;2", "rating_avg": [ 8.0, 1.4142135623730951 ], "confidence_avg": [ 4.333333333333333, 0.9428090415820634 ], "wc_summary_and_contributions_avg": [ 77.0, 37.85058343892029 ], "wc_strengths_avg": [ 26.333333333333332, 28.940552094864323 ], "wc_improvement_avg": [ 35.666666666666664, 11.728408057172787 ], "wc_limitations_avg": [ 6.666666666666667, 4.9216076867444665 ], "wc_correctness_avg": [ 1.0, 0.0 ], "wc_clarity_avg": [ 2.6666666666666665, 2.3570226039551585 ], "wc_relation_to_prior_work_avg": [ 7.0, 8.48528137423857 ], "wc_documentation_avg": [ 2.6666666666666665, 2.3570226039551585 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 160.0, 61.746794788609606 ], "wc_reply_reviewers_avg": [ 13.666666666666666, 9.672412085697939 ], "wc_reply_authors_avg": [ 32.0, 8.16496580927726 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1265012661338929005&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "uts.edu.au;zju.edu.cn", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "University of Technology Sydney;Zhejiang University", "aff_unique_dep": ";", "aff_unique_url": "https://www.uts.edu.au;https://www.zju.edu.cn", "aff_unique_abbr": "UTS;ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Australia;China" }, { "title": "Sequence-Augmented SE(3)-Flow Matching For Conditional Protein Generation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93544", "id": "paYwtPBpyZ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=paYwtPBpyZ", "openreview": "https://openreview.net/forum?id=paYwtPBpyZ", "poster": "", "project": "", "author_site": "Guillaume Huguet, James Vuckovic, Kilian FATRAS, Eric Thibodeau-Laufer, Pablo Lemos, Riashat Islam, Chenghao Liu, Jarrid Rector-Brooks, Tara Akhound-Sadegh, Michael Bronstein, Alexander Tong, Joey Bose", "tldr": "", "abstract": "Proteins are essential for almost all biological processes and derive their diverse functions from complex $3 \\rm D$ structures, which are in turn determined by their amino acid sequences. \nIn this paper, we exploit the rich biological inductive bias of amino acid sequences and introduce FoldFlow++, a novel sequence-conditioned $\\text{SE}(3)$-equivariant flow matching model for protein structure generation. FoldFlow++ presents substantial new architectural features over the previous FoldFlow family of models including a protein large language model to encode sequence, a new multi-modal fusion trunk that combines structure and sequence representations, and a geometric transformer based decoder. To increase \ndiversity and novelty of generated samples -- crucial for de-novo drug design -- we\ntrain FoldFlow++ at scale on a new dataset \nthat is an order of magnitude \nlarger than PDB datasets of prior works, containing both known proteins in PDB and high-quality synthetic structures achieved through filtering. We further demonstrate the ability to align FoldFlow++ to arbitrary rewards, e.g. increasing secondary structures diversity, by introducing a Reinforced Finetuning (ReFT) objective. We empirically observe that FoldFlow++ outperforms previous state-of-the-art protein structure-based generative models, improving over RFDiffusion in terms of unconditional generation across all metrics including designability, diversity, and novelty across all protein lengths, as well as exhibiting generalization on the task of equilibrium conformation sampling. Finally, we demonstrate that a fine-tuned FoldFlow++ makes progress on challenging conditional design tasks such as designing scaffolds for the VHH nanobody.", "keywords": "Proteins;Flow Matching;Generative Models", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "", "author": "Guillaume Huguet;James Vuckovic;Kilian FATRAS;Eric Thibodeau-Laufer;Pablo Lemos;Riashat Islam;Cheng-Hao Liu;Jarrid Rector-Brooks;Tara Akhound-Sadegh;Michael M. Bronstein;Alexander Tong;Joey Bose", "authorids": "~Guillaume_Huguet1;~James_Vuckovic1;~Kilian_FATRAS1;~Eric_Thibodeau-Laufer1;~Pablo_Lemos1;~Riashat_Islam1;~Cheng-Hao_Liu1;~Jarrid_Rector-Brooks2;~Tara_Akhound-Sadegh1;~Michael_M._Bronstein1;~Alexander_Tong1;~Joey_Bose1", "gender": "M;M;M;M;M;M;M;M;;M;;M", "homepage": "https://mila.quebec/personne/guillaume-huguet/;http://www.jamesvuckovic.com/;http://kilianfatras.github.io;;https://pablo-lemos.github.io;https://riashat.github.io/;https://pchliu.github.io/;;https://sites.google.com/view/taraakhound-sadegh/home;http://www.inf.usi.ch/bronstein/;https://alextong.net;https://joeybose.github.io/", "dblp": "286/5365;;;119/3142;313/2645;198/0459;;230/4010;;07/2668;153/9296;174/3372", "google_scholar": "L8kYu9IAAAAJ;;https://scholar.google.ca/citations?user=DHMjyDgAAAAJ;https://scholar.google.ca/citations?user=NBUWnTYAAAAJ;AklQTTsAAAAJ;https://scholar.google.ca/citations?user=2_4Rs44AAAAJ;iVJGx0cAAAAJ;gxRPZh4AAAAJ;RHDoTkkAAAAJ;UU3N6-UAAAAJ;CS80pt4AAAAJ;ybPyI7IAAAAJ", "orcid": ";;;;0000-0002-4728-8473;;0000-0001-7923-6806;;;;0000-0002-2031-4096;", "linkedin": ";;;eric-laufer-8b378679/;;;chenghao-peter-liu/;;tara-akhound-sadegh-574748101/;mbronstein/;atong01/;", "or_profile": "~Guillaume_Huguet1;~James_Vuckovic1;~Kilian_FATRAS1;~Eric_Thibodeau-Laufer1;~Pablo_Lemos1;~Riashat_Islam1;~Cheng-Hao_Liu1;~Jarrid_Rector-Brooks2;~Tara_Akhound-Sadegh1;~Michael_M._Bronstein1;~Alexander_Tong1;~Joey_Bose1", "aff": "University of Montreal;Microsoft;McGill University;Montreal Institute for Learning Algorithms, University of Montreal, Universit\u00e9 de Montr\u00e9al;Universit\u00e9 de Montr\u00e9al;Saudi Data and AI Authority, Saudi Data and AI Authority;Montreal Institute for Learning Algorithms, University of Montreal, Universit\u00e9 de Montr\u00e9al;Montreal Institute for Learning Algorithms, University of Montreal, University of Montreal;McGill University;University of Oxford;Universit\u00e9 de Montr\u00e9al;University of Oxford", "aff_domain": "umontreal.ca;microsoft.com;mcgill.ca;mila.umontreal.ca;umontreal.ca;sdaia.gov.sa;mila.umontreal.ca;mila.umontreal.ca;mcgill.ca;ox.ac.uk;umontreal.ca;oxford.ac.uk", "position": "PhD student;Applied Scientist;Postdoc;Researcher;Postdoc;Researcher;PhD student intern;PhD student;PhD student;Full Professor;Postdoc;Postdoc", "bibtex": "@inproceedings{\nhuguet2024sequenceaugmented,\ntitle={Sequence-Augmented {SE}(3)-Flow Matching For Conditional Protein Generation},\nauthor={Guillaume Huguet and James Vuckovic and Kilian FATRAS and Eric Thibodeau-Laufer and Pablo Lemos and Riashat Islam and Cheng-Hao Liu and Jarrid Rector-Brooks and Tara Akhound-Sadegh and Michael M. Bronstein and Alexander Tong and Joey Bose},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=paYwtPBpyZ}\n}", "github": "", "reviewers": "ffQt;pz4y;PUPL;VRjh", "pdf_size": 13861569, "rating": "5;6;6;7", "confidence": "4;3;4;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "46;31;88;74", "wc_strengths": "55;16;75;150", "wc_weaknesses": "91;56;38;36", "wc_questions": "24;134;133;51", "wc_limitations": "1;17;1;39", "wc_review": "217;254;335;350", "wc_reply_reviewers": "0;0;0;17", "wc_reply_authors": "153;155;155;14", "reply_reviewers": "0;0;0;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 59.75, 22.454119889231908 ], "wc_strengths_avg": [ 74.0, 48.73910134583936 ], "wc_weaknesses_avg": [ 55.25, 22.060994991160303 ], "wc_questions_avg": [ 85.5, 48.94129136015927 ], "wc_limitations_avg": [ 14.5, 15.580436450882884 ], "wc_review_avg": [ 289.0, 55.330823236239674 ], "wc_reply_reviewers_avg": [ 4.25, 7.361215932167728 ], "wc_reply_authors_avg": [ 119.25, 60.77160109788124 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1512679293920114250&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "umontreal.ca;microsoft.com;mcgill.ca;mila.umontreal.ca;umontreal.ca;sdaia.gov.sa;mila.umontreal.ca;mila.umontreal.ca;mcgill.ca;ox.ac.uk;umontreal.ca;oxford.ac.uk", "author_num": 12, "aff_unique_index": "0;1;2;0;3;4;0;0;2;5;3;5", "aff_unique_norm": "University of Montreal;Microsoft;McGill University;Universit\u00e9 de Montr\u00e9al;Saudi Data and AI Authority;University of Oxford", "aff_unique_dep": ";Microsoft Corporation;;;;", "aff_unique_url": "https://wwwumontreal.ca;https://www.microsoft.com;https://www.mcgill.ca;https://www.umontreal.ca;https://sdaia.gov.sa;https://www.ox.ac.uk", "aff_unique_abbr": "UM;Microsoft;McGill;UdeM;SDAIA;Oxford", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Montreal", "aff_country_unique_index": "0;1;0;0;0;2;0;0;0;3;0;3", "aff_country_unique": "Canada;United States;Saudi Arabia;United Kingdom" }, { "title": "End-to-End Video Semantic Segmentation in Adverse Weather using Fusion Blocks and Temporal-Spatial Teacher-Student Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93543", "id": "paobkszgIA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=paobkszgIA", "openreview": "https://openreview.net/forum?id=paobkszgIA", "poster": "/media/PosterPDFs/NeurIPS%202024/93543.png?t=1731669260.1021125", "project": "", "author_site": "Xin Yang, YAN WENDING, Michael Bi Mi, Yuan Yuan, Robby Tan", "tldr": "", "abstract": "Adverse weather conditions can significantly degrade the video frames, causing existing video semantic segmentation methods to produce erroneous predictions. In this work, we target adverse weather conditions and introduce an end-to-end domain adaptation strategy that leverages a fusion block, temporal-spatial teacher-student learning, and a temporal weather degradation augmentation approach. The fusion block integrates temporal information from adjacent frames at the feature level, trained end-to-end, eliminating the need for pretrained optical flow, distinguishing our method from existing approaches. Our teacher-student approach involves two teachers: one focuses on exploring temporal information from adjacent frames, and the other harnesses spatial information from the current frame. Finally, we apply temporal weather degradation augmentation to consecutive frames to more accurately represent adverse weather degradations. Our method achieves a performance of 25.4 and 33.0 mIoU on the adaptation from VIPER and Synthia to MVSS, respectively, representing an improvement of 4.3 and 5.8 mIoU over the existing state-of-the-art method.", "keywords": "optical-flow free;video semantic segmentation;adverse conditions", "primary_area": "machine_vision", "supplementary_material": "", "author": "Xin Yang;YAN WENDING;Michael Bi Mi;Yuan Yuan;Robby T. Tan", "authorids": "~Xin_Yang13;~YAN_WENDING1;~Michael_Bi_Mi1;~Yuan_Yuan12;~Robby_T._Tan1", "gender": "M;M;M;M;", "homepage": "https://suishen01.github.io/;;https://www.huawei.com/en/;https://sites.google.com/site/sorsyuanyuan/home;https://tanrobby.github.io/", "dblp": ";262/3827;317/0937.html;64/5845;t/RobbyTTan", "google_scholar": "0tsWd3YAAAAJ;;;V8rA6HcAAAAJ;MOD0gv4AAAAJ", "orcid": "0000-0002-4617-5733;0000-0001-5993-8405;;;0000-0001-7532-6919", "linkedin": ";;michael-bi-mi-957122a2/;;robby-t-tan-875a5a31/?originalSubdomain=sg", "or_profile": "~Xin_Yang13;~YAN_WENDING1;~Michael_Bi_Mi1;~Yuan_Yuan12;~Robby_Tan4", "aff": "National University of Singapore;Huawei Technologies Ltd.;Huawei Technologies Ltd.;Huawei Technologies Ltd.;National University of Singapore", "aff_domain": "u.nus.edu;huawei.com;huawei.com;huawei.com;nus.edu.sg", "position": "PhD student;Researcher;Researcher;Principal Researcher;Associate Professor", "bibtex": "@inproceedings{\nyang2024endtoend,\ntitle={End-to-End Video Semantic Segmentation in Adverse Weather using Fusion Blocks and Temporal-Spatial Teacher-Student Learning},\nauthor={Xin Yang and YAN WENDING and Michael Bi Mi and Yuan Yuan and Robby T. Tan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=paobkszgIA}\n}", "github": "", "reviewers": "L8p7;G5E9;dY49;1Ffg", "pdf_size": 8768510, "rating": "5;5;7;7", "confidence": "4;5;5;5", "soundness": "2;2;3;3", "novelty": "2;2;3;3", "presentation": "3;2;3;2", "wc_summary": "100;53;167;81", "wc_strengths": "55;47;55;86", "wc_weaknesses": "124;192;294;318", "wc_questions": "5;5;5;79", "wc_limitations": "1;1;9;8", "wc_review": "285;298;530;572", "wc_reply_reviewers": "156;33;27;60", "wc_reply_authors": "403;29;23;36", "reply_reviewers": "2;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 100.25, 42.00818372650738 ], "wc_strengths_avg": [ 60.75, 14.939461168328663 ], "wc_weaknesses_avg": [ 232.0, 78.26876771739798 ], "wc_questions_avg": [ 23.5, 32.04293994002423 ], "wc_limitations_avg": [ 4.75, 3.766629793329841 ], "wc_review_avg": [ 421.25, 130.677800333492 ], "wc_reply_reviewers_avg": [ 69.0, 51.74456493198102 ], "wc_reply_authors_avg": [ 122.75, 161.86780872057298 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15721086820321756064&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "u.nus.edu;huawei.com;huawei.com;huawei.com;nus.edu.sg", "author_num": 5, "aff_unique_index": "0;1;1;1;0", "aff_unique_norm": "National University of Singapore;Huawei", "aff_unique_dep": ";Huawei Technologies", "aff_unique_url": "https://www.nus.edu.sg;https://www.huawei.com", "aff_unique_abbr": "NUS;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;0", "aff_country_unique": "Singapore;China" }, { "title": "RealMAN: A Real-Recorded and Annotated Microphone Array Dataset for Dynamic Speech Enhancement and Localization", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97504", "id": "pbscHlRG35", "proceeding": "", "pdf": "https://openreview.net/pdf?id=pbscHlRG35", "openreview": "https://openreview.net/forum?id=pbscHlRG35", "poster": "/media/PosterPDFs/NeurIPS%202024/97504.png?t=1731915808.8986526", "project": "", "author_site": "Bing Yang, Changsheng Quan, Yabo Wang, Pengyu Wang, Yujie Yang, Ying Fang, Nian Shao, Hui Bu, Xin Xu, Xiaofei Li", "tldr": "", "abstract": "The training of deep learning-based multichannel speech enhancement and source localization systems relies heavily on the simulation of room impulse response and multichannel diffuse noise, due to the lack of large-scale real-recorded datasets. However, the acoustic mismatch between simulated and real-world data could degrade the model performance when applying in real-world scenarios. To bridge this simulation-to-real gap, this paper presents a new relatively large-scale Real-recorded and annotated Microphone Array speech&Noise (RealMAN) dataset. The proposed dataset is valuable in two aspects: 1) benchmarking speech enhancement and localization algorithms in real scenarios; 2) offering a substantial amount of real-world training data for potentially improving the performance of real-world applications. Specifically, a 32-channel array with high-fidelity microphones is used for recording. A loudspeaker is used for playing source speech signals (about 35 hours of Mandarin speech). A total of 83.7 hours of speech signals (about 48.3 hours for static speaker and 35.4 hours for moving speaker) are recorded in 32 different scenes, and 144.5 hours of background noise are recorded in 31 different scenes. Both speech and noise recording scenes cover various common indoor, outdoor, semi-outdoor and transportation environments, which enables the training of general-purpose speech enhancement and source localization networks. To obtain the task-specific annotations, speaker location is annotated with an omni-directional fisheye camera by automatically detecting the loudspeaker. The direct-path signal is set as the target clean speech for speech enhancement, which is obtained by filtering the source speech signal with an estimated direct-path propagation filter. Baseline experiments demonstrate that i) compared to using simulated data, the proposed dataset is indeed able to train better speech enhancement and source localization networks; ii) using various sub-arrays of the proposed 32-channel microphone array can successfully train variable-array networks that can be directly used to unseen arrays.", "keywords": "Multi-channel;audio dataset;speech enhancement;sound source localization", "primary_area": "", "supplementary_material": "/attachment/965a3bf28c839525c625ff386b58726b9967ae63.pdf", "author": "Bing Yang;Changsheng Quan;Yabo Wang;Pengyu Wang;Yujie Yang;Ying Fang;Nian Shao;Hui Bu;Xin Xu;Xiaofei Li", "authorids": "~Bing_Yang4;~Changsheng_Quan2;~Yabo_Wang3;~Pengyu_Wang7;~Yujie_Yang6;~Ying_Fang3;~Nian_Shao1;~Hui_Bu1;~Xin_Xu16;~Xiaofei_Li6", "gender": "F;;F;M;F;F;M;F;M;M", "homepage": "https://sites.google.com/view/bing-yang;https://github.com/quancs;;;;;https://saoyear.github.io;https://www.aishelltech.com/Company_Profile;https://audio.westlake.edu.cn;https://www.aishelltech.com/Company_Profile", "dblp": ";;;;;;;;;", "google_scholar": "https://scholar.google.com.hk/citations?user=_rt11bkAAAAJ;;;j4EqUL8AAAAJ;VP-10UEAAAAJ;https://scholar.google.com/citations?hl=zh-CN;MV_AgBoAAAAJ;;;https://scholar.google.com/citations?hl=zh-CN", "orcid": "0000-0002-8978-2322;;0009-0009-3621-4055;0000-0001-5768-0658;;0009-0003-8767-1172;;;;", "linkedin": ";;;;;;;;;", "or_profile": "~Bing_Yang4;~Changsheng_Quan2;~Yabo_Wang3;~Pengyu_Wang7;~Yujie_Yang6;~Ying_Fang3;~Nian_Shao1;~Xin_Xu16;~Xiaofei_Li6;~HUIBU1", "aff": "Westlake University;Westlake University;Westlake University;Westlake University;Westlake University;Westlake University;Westlake University;Beijing AISHELL Technology Co., Ltd.;Westlake University;Beijing AIShell Technology Co. Ltd, China", "aff_domain": "westlake.edu.cn;westlake.edu.cn;westlake.edu.cn;westlake.edu.cn;westlake.edu.cn;westlake.edu.cn;westlake.edu.cn;aishelltech.com;westlake.edu.cn;aishelltech.com", "position": "Postdoc;PhD student;PhD student;PhD student;PhD student;PhD student;PhD student;Technician;Assistant Professor;Employer", "bibtex": "@inproceedings{\nyang2024realman,\ntitle={Real{MAN}: A Real-Recorded and Annotated Microphone Array Dataset for Dynamic Speech Enhancement and Localization},\nauthor={Bing Yang and Changsheng Quan and Yabo Wang and Pengyu Wang and Yujie Yang and Ying Fang and Nian Shao and Hui Bu and Xin Xu and Xiaofei Li},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=pbscHlRG35}\n}", "github": "", "reviewers": "R9w9;Uhcz;UpSw;DaAx", "pdf_size": 4147832, "rating": "5;6;7;7", "confidence": "5;2;4;5", "wc_summary_and_contributions": "66;43;75;122", "wc_strengths": "22;52;64;53", "wc_improvement": "9;71;56;37", "wc_limitations": "25;5;5;28", "wc_correctness": "29;7;6;11", "wc_clarity": "34;10;6;16", "wc_relation_to_prior_work": "49;7;11;104", "wc_documentation": "10;3;13;17", "wc_additional_feedback": "1;1;1;1", "wc_review": "245;199;237;389", "wc_reply_reviewers": "21;16;17;65", "wc_reply_authors": "52;52;52;52", "reply_reviewers": "1;1;1;1", "reply_authors": "4;2;2;3", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 4.0, 1.224744871391589 ], "wc_summary_and_contributions_avg": [ 76.5, 28.74456470360962 ], "wc_strengths_avg": [ 47.75, 15.594470173750693 ], "wc_improvement_avg": [ 43.25, 23.155722834755128 ], "wc_limitations_avg": [ 15.75, 10.80219885023415 ], "wc_correctness_avg": [ 13.25, 9.283722313813572 ], "wc_clarity_avg": [ 16.5, 10.712142642814275 ], "wc_relation_to_prior_work_avg": [ 42.75, 38.97675589373749 ], "wc_documentation_avg": [ 10.75, 5.11737237261468 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 267.5, 72.26859622270243 ], "wc_reply_reviewers_avg": [ 29.75, 20.437404434027332 ], "wc_reply_authors_avg": [ 52.0, 0.0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12414244664535270849&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "westlake.edu.cn;westlake.edu.cn;westlake.edu.cn;westlake.edu.cn;westlake.edu.cn;westlake.edu.cn;westlake.edu.cn;aishelltech.com;westlake.edu.cn;aishelltech.com", "author_num": 10, "aff_unique_index": "0;0;0;0;0;0;0;1;0;1", "aff_unique_norm": "Westlake University;AISHELL Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.westlake.edu.cn;http://www.aisHELL.com", "aff_unique_abbr": "WU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "LoTLIP: Improving Language-Image Pre-training for Long Text Understanding", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93542", "id": "pc4GSBi1Hx", "proceeding": "", "pdf": "https://openreview.net/pdf?id=pc4GSBi1Hx", "openreview": "https://openreview.net/forum?id=pc4GSBi1Hx", "poster": "/media/PosterPDFs/NeurIPS%202024/93542.png?t=1733634438.2781224", "project": "", "author_site": "Wei Wu, Kecheng Zheng, Shuailei Ma, Fan Lu, Yuxin Guo, Yifei Zhang, Wei Chen, Qingpei Guo, Yujun Shen, Zheng-Jun Zha", "tldr": "", "abstract": "In this work, we empirically confirm that the key reason causing such an issue is that the training images are usually paired with short captions, leaving certain tokens easily overshadowed by salient tokens. Towards this problem, our initial attempt is to relabel the data with long captions, however, directly learning with which may lead to performance degradation in understanding short text (e.g., in the image classification task). Then, after incorporating corner tokens to aggregate diverse textual information, we manage to help the model catch up to its original level of short text understanding yet greatly enhance its capability of long text understanding. We further look into whether the model can continuously benefit from longer captions and notice a clear trade-off between the performance and the efficiency. Finally, we validate the effectiveness of our approach using a self-constructed large-scale dataset, which consists of 100M long caption oriented text-image pairs. Our method achieves superior performance in long-text-image retrieval tasks. The project page is available at https://wuw2019.github.io/lot-lip.", "keywords": "Language-Image Pre-Training", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Wei Wu;Kecheng Zheng;Shuailei Ma;Fan Lu;Yuxin Guo;Yifei Zhang;Wei Chen;Qingpei Guo;Yujun Shen;Zheng-Jun Zha", "authorids": "~Wei_Wu20;~Kecheng_Zheng2;~Shuailei_Ma1;~Fan_Lu4;~Yuxin_Guo2;~Yifei_Zhang4;~Wei_Chen34;~Qingpei_Guo1;~Yujun_Shen1;~Zheng-Jun_Zha2", "gender": "F;M;M;M;F;;M;M;;M", "homepage": ";https://zkcys001.github.io/;https://github.com/xiaomabufei;https://github.com/LuFan31;;;http://www.cad.zju.edu.cn/home/chenwei/;;;", "dblp": ";228/1362;337/1450;;;;c/WeiChen1;164/5991;;23/1818", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;hMDQifQAAAAJ;dNhzCu4AAAAJ;ILpxpfwAAAAJ;x_0spxgAAAAJ;https://scholar.google.com/citations?hl=zh-CN;EgQyYGUAAAAJ;https://scholar.google.com/citations?hl=zh-CN;;", "orcid": "0000-0001-5218-1139;;;;;0009-0005-4831-883X;0000-0002-8365-4741;;;", "linkedin": ";;;;;zhang-yf-2bb8a61a1;;;;", "or_profile": "~Wei_Wu20;~Kecheng_Zheng2;~Shuailei_Ma1;~Fan_Lu4;~Yuxin_Guo2;~Yifei_Zhang4;~Wei_Chen34;~Qingpei_Guo1;~Yujun_Shen1;~Zheng-Jun_Zha2", "aff": "University of Science and Technology of China;Zhejiang University;Northeastern University;University of Science and Technology of China;Alibaba Group;Shanghai Jiaotong University;State key laboratory of CAD&CG, Zhejiang University;Ant Group;;University of Science and Technology of China", "aff_domain": "ustc.edu.cn;zju.edu.cn;neu.edu.cn;mail.ustc.edu.cn;alibaba-inc.com;sjtu.edu.cn;zju.edu.cn;antgroup.com;;ustc.edu.cn", "position": "PhD student;Postdoc;PhD student;PhD student;Intern;PhD student;Full Professor;Researcher;;Full Professor", "bibtex": "@inproceedings{\nwu2024lotlip,\ntitle={Lo{TLIP}: Improving Language-Image Pre-training for Long Text Understanding},\nauthor={Wei Wu and Kecheng Zheng and Shuailei Ma and Fan Lu and Yuxin Guo and Yifei Zhang and Wei Chen and Qingpei Guo and Yujun Shen and Zheng-Jun Zha},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=pc4GSBi1Hx}\n}", "github": "", "reviewers": "NJUx;Zcuk;f34j;sjvn", "pdf_size": 1198233, "rating": "5;5;5;7", "confidence": "4;4;3;4", "soundness": "2;2;3;3", "novelty": "2;2;3;3", "presentation": "1;2;3;3", "wc_summary": "125;50;84;87", "wc_strengths": "61;22;114;104", "wc_weaknesses": "433;66;64;72", "wc_questions": "51;15;4;6", "wc_limitations": "11;16;11;13", "wc_review": "681;169;277;282", "wc_reply_reviewers": "493;18;17;33", "wc_reply_authors": "2519;37;24;29", "reply_reviewers": "2;1;1;1", "reply_authors": "6;2;2;2", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 86.5, 26.556543449779003 ], "wc_strengths_avg": [ 75.25, 36.62905267680288 ], "wc_weaknesses_avg": [ 158.75, 158.3656765211452 ], "wc_questions_avg": [ 19.0, 18.934096228761486 ], "wc_limitations_avg": [ 12.75, 2.0463381929681126 ], "wc_review_avg": [ 352.25, 195.09917349901818 ], "wc_reply_reviewers_avg": [ 140.25, 203.75889551133713 ], "wc_reply_authors_avg": [ 652.25, 1077.7785892751813 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.0, 1.7320508075688772 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9512945619963394489&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "ustc.edu.cn;zju.edu.cn;neu.edu.cn;mail.ustc.edu.cn;alibaba-inc.com;sjtu.edu.cn;zju.edu.cn;antgroup.com;;ustc.edu.cn", "author_num": 10, "aff_unique_index": "0;1;2;0;3;4;1;5;0", "aff_unique_norm": "University of Science and Technology of China;Zhejiang University;Northeastern University;Alibaba Group;Shanghai Jiao Tong University;Ant Group", "aff_unique_dep": ";;;;;", "aff_unique_url": "http://www.ustc.edu.cn;https://www.zju.edu.cn;https://www.northeastern.edu;https://www.alibaba.com;https://www.sjtu.edu.cn;https://www.antgroup.com", "aff_unique_abbr": "USTC;ZJU;NEU;Alibaba;SJTU;Ant Group", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;0;0;0;0", "aff_country_unique": "China;United States" }, { "title": "Sharing Key Semantics in Transformer Makes Efficient Image Restoration", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93541", "id": "pebP89l4v6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=pebP89l4v6", "openreview": "https://openreview.net/forum?id=pebP89l4v6", "poster": "/media/PosterPDFs/NeurIPS%202024/93541.png?t=1731724130.7537441", "project": "", "author_site": "Bin Ren, Yawei Li, Jingyun Liang, Rakesh Ranjan, Mengyuan Liu, Rita Cucchiara, Luc V Gool, Ming-Hsuan Yang, Nicu Sebe", "tldr": "", "abstract": "Image Restoration (IR), a classic low-level vision task, has witnessed significant advancements through deep models that effectively model global information. Notably, the emergence of Vision Transformers (ViTs) has further propelled these advancements. When computing, the self-attention mechanism, a cornerstone of ViTs, tends to encompass all global cues, even those from semantically unrelated objects or regions. This inclusivity introduces computational inefficiencies, particularly noticeable with high input resolution, as it requires processing irrelevant information, thereby impeding efficiency. Additionally, for IR, it is commonly noted that small segments of a degraded image, particularly those closely aligned semantically, provide particularly relevant information to aid in the restoration process, as they contribute essential contextual cues crucial for accurate reconstruction. To address these challenges, we propose boosting IR's performance by sharing the key semantics via Transformer for IR (i.e., SemanIR) in this paper. Specifically, SemanIR initially constructs a sparse yet comprehensive key-semantic dictionary within each transformer stage by establishing essential semantic connections for every degraded patch. Subsequently, this dictionary is shared across all subsequent transformer blocks within the same stage. This strategy optimizes attention calculation within each block by focusing exclusively on semantically related components stored in the key-semantic dictionary. As a result, attention calculation achieves linear computational complexity within each window. Extensive experiments across 6 IR tasks confirm the proposed SemanIR's state-of-the-art performance, quantitatively and qualitatively showcasing advancements. The visual results, code, and trained models are available at: https://github.com/Amazingren/SemanIR.", "keywords": "Low-level Vision;Image Restoration;Vision Transformer", "primary_area": "machine_vision", "supplementary_material": "/attachment/951d033e5c576600fe755692c7c9878192d3d59a.zip", "author": "Bin Ren;Yawei Li;Jingyun Liang;Rakesh Ranjan;Mengyuan Liu;Rita Cucchiara;Luc Van Gool;Ming-Hsuan Yang;Nicu Sebe", "authorids": "~Bin_Ren2;~Yawei_Li1;~Jingyun_Liang1;~Rakesh_Ranjan2;~Mengyuan_Liu2;~Rita_Cucchiara1;~Luc_Van_Gool1;~Ming-Hsuan_Yang1;~Nicu_Sebe1", "gender": ";M;;;;F;;M;M", "homepage": ";https://yaweili.bitbucket.io/;https://jingyunliang.github.io/;;https://www.semanticscholar.org/author/Mengyuan-Liu/47842072;https://aimagelab.ing.unimore.it/imagelab/;;https://faculty.ucmerced.edu/mhyang/;http://disi.unitn.it/~sebe/", "dblp": ";32/6740-1;210/5052;;;c/RitaCucchiara;61/5017;79/3711.html;20/3519", "google_scholar": ";IFLsTGsAAAAJ;https://scholar.google.com.hk/citations?user=3-Hz9BgAAAAJ;;woX_4AcAAAAJ;OM3sZEoAAAAJ;https://scholar.google.be/citations?user=TwMib_QAAAAJ;p9-ohHsAAAAJ;https://scholar.google.it/citations?user=stFCYOAAAAAJ", "orcid": ";0000-0002-8948-7892;;;0000-0002-6332-8316;0000-0002-2239-283X;;0000-0003-4848-2304;0000-0002-6597-7248", "linkedin": ";yawei-li-89912ba8/;;;;rita-cucchiara-a4653a13/?originalSubdomain=it;;minghsuanyang/;", "or_profile": "~Bin_Ren2;~Yawei_Li1;~Jingyun_Liang1;~Rakesh_Ranjan2;~Mengyuan_Liu2;~Rita_Cucchiara1;~Luc_Van_Gool1;~Ming-Hsuan_Yang1;~Nicu_Sebe1", "aff": ";ETHZ - ETH Zurich;ETH Zurich;;Peking University;Universit\u00e0 di modena e reggio emilia;KU Leuven;University of California at Merced;University of Trento", "aff_domain": ";ethz.ch;ethz.ch;;pku.edu.cn;unimore.it;kuleuven.be;umcerced.edu;unitn.it", "position": ";Lecturer;PhD student;;Assistant Professor;Full Professor;Emeritus;Professor;Full Professor", "bibtex": "@inproceedings{\nren2024sharing,\ntitle={Sharing Key Semantics in Transformer Makes Efficient Image Restoration},\nauthor={Bin Ren and Yawei Li and Jingyun Liang and Rakesh Ranjan and Mengyuan Liu and Rita Cucchiara and Luc Van Gool and Ming-Hsuan Yang and Nicu Sebe},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=pebP89l4v6}\n}", "github": "", "reviewers": "YUtD;aTgi;hRwb;xhpu", "pdf_size": 43101642, "rating": "6;6;6;6", "confidence": "5;4;4;3", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "65;104;47;65", "wc_strengths": "34;59;51;66", "wc_weaknesses": "216;101;164;67", "wc_questions": "2;563;34;53", "wc_limitations": "11;9;9;12", "wc_review": "328;836;305;263", "wc_reply_reviewers": "108;17;95;56", "wc_reply_authors": "960;56;102;98", "reply_reviewers": "2;1;1;1", "reply_authors": "5;2;3;3", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 70.25, 20.825165065372232 ], "wc_strengths_avg": [ 52.5, 11.926860441876563 ], "wc_weaknesses_avg": [ 137.0, 57.37159575957427 ], "wc_questions_avg": [ 163.0, 231.65815332079293 ], "wc_limitations_avg": [ 10.25, 1.299038105676658 ], "wc_review_avg": [ 433.0, 233.83648132829916 ], "wc_reply_reviewers_avg": [ 69.0, 35.601966237835796 ], "wc_reply_authors_avg": [ 304.0, 379.17014650417826 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.25, 1.0897247358851685 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17224219746821410212&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": ";ethz.ch;ethz.ch;;pku.edu.cn;unimore.it;kuleuven.be;umcerced.edu;unitn.it", "author_num": 9, "aff_unique_index": "0;0;1;2;3;4;5", "aff_unique_norm": "ETH Zurich;Peking University;Universit\u00e0 di Modena e Reggio Emilia;Katholieke Universiteit Leuven;University of California, Merced;University of Trento", "aff_unique_dep": ";;;;;", "aff_unique_url": "https://www.ethz.ch;http://www.pku.edu.cn;https://www.unimore.it;https://www.kuleuven.be;https://www.ucmerced.edu;https://www.unitn.it", "aff_unique_abbr": "ETHZ;Peking U;Unimore;KU Leuven;UC Merced;UniTN", "aff_campus_unique_index": "1", "aff_campus_unique": ";Merced", "aff_country_unique_index": "0;0;1;2;3;4;2", "aff_country_unique": "Switzerland;China;Italy;Belgium;United States" }, { "title": "Scaling Laws for Reward Model Overoptimization in Direct Alignment Algorithms", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93540", "id": "pf4OuJyn4Q", "proceeding": "", "pdf": "https://openreview.net/pdf?id=pf4OuJyn4Q", "openreview": "https://openreview.net/forum?id=pf4OuJyn4Q", "poster": "", "project": "", "author_site": "Rafael Rafailov, Yaswanth Chittepu, Ryan Park, Harshit Sushil Sikchi, Joey Hejna, Brad Knox, Chelsea Finn, Scott Niekum", "tldr": "", "abstract": "Reinforcement Learning from Human Feedback (RLHF)has been crucial to the recent success of Large Language Models (LLMs), however it is often a complex and brittle process. In the classical RLHF framework, a reward model is first trained to represent human preferences, which is in turn used by an online reinforcement learning (RL) algorithm to optimized the LLM. A prominent issue with such methods is reward over-optimization or reward hacking, where the performance as measured by the learned proxy reward model increases, but the true model quality plateaus or even deteriorates. Direct Alignment Algorithms (DDAs), such as Direct Preference Optimization (DPO) have emerged as alternatives to the classical RLHF pipeline. However, despite not training a separate proxy reward model or using RL, they still commonly deteriorate from over-optimization. While the so-called reward hacking phenomenon is not well-defined for DAAs, we still uncover similar trends: at higher KL-budgets, DAA algorithms exhibit similar degradation patters to their classic RLHF counterparts. In particular, we find that DAA methods deteriorate not only across a wide range of KL-budgets, but also often before even a single epoch of the dataset is completed. Through extensive empirical experimentation this work formulates the reward over-optimization or hacking problem for DAAs and explores its consequences across objectives, training regimes, and model scales.", "keywords": "Reinforcement Learning From Human Feedback;Direct Preference Optimization;Reward Hacking", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/a328dc0d62a6e5c792f1e0aca02f183839ead34d.zip", "author": "Rafael Rafailov;Yaswanth Chittepu;Ryan Park;Harshit Sikchi;Joey Hejna;W. Bradley Knox;Chelsea Finn;Scott Niekum", "authorids": "~Rafael_Rafailov1;~Yaswanth_Chittepu1;~Ryan_Park1;~Harshit_Sikchi1;~Joey_Hejna1;~W._Bradley_Knox2;~Chelsea_Finn1;~Scott_Niekum1", "gender": "M;M;;M;M;F;M;M", "homepage": "https://rmrafailov.github.io/;;;https://hari-sikchi.github.io/;https://www.bradknox.net;https://ai.stanford.edu/~cbfinn/;https://people.cs.umass.edu/~sniekum/index.php;https://joeyhejna.com", "dblp": "272/5358;379/6061;;271/4663;29/853;131/1783;62/8399;336/3297", "google_scholar": "TwABcRgAAAAJ;sGEz950AAAAJ;1pLSC-MAAAAJ;jFOPZE0AAAAJ;0a58TKgAAAAJ;vfPE6hgAAAAJ;4wXYfSUAAAAJ;y_sLoXoAAAAJ", "orcid": ";;;;0000-0002-6006-9523;;;", "linkedin": ";;;;wbknox/;;;", "or_profile": "~Rafael_Rafailov1;~Yaswanth_Chittepu1;~Ryan_Park1;~Harshit_Sikchi1;~W._Bradley_Knox2;~Chelsea_Finn1;~Scott_Niekum1;~Donald_Joseph_Hejna_III1", "aff": "Stanford University;University of Massachusetts at Amherst;Stanford University;University of Texas, Austin;Massachusetts Institute of Technology;Google;University of Massachusetts at Amherst;Google", "aff_domain": "stanford.edu;umass.edu;stanford.edu;utexas.edu;mit.edu;google.com;umass.edu;google.com", "position": "PhD student;PhD student;Undergrad student;PhD student;Researcher;Research Scientist;Associate Professor;Intern", "bibtex": "@inproceedings{\nrafailov2024scaling,\ntitle={Scaling Laws for Reward Model Overoptimization in Direct Alignment Algorithms},\nauthor={Rafael Rafailov and Yaswanth Chittepu and Ryan Park and Harshit Sikchi and Joey Hejna and W. Bradley Knox and Chelsea Finn and Scott Niekum},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=pf4OuJyn4Q}\n}", "github": "", "reviewers": "r4HU;bFR8;Q9V7;TztD", "pdf_size": 9996286, "rating": "5;6;6;7", "confidence": "4;4;4;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;4", "wc_summary": "23;25;122;186", "wc_strengths": "37;56;57;152", "wc_weaknesses": "146;213;133;372", "wc_questions": "120;7;73;9", "wc_limitations": "5;8;1;4", "wc_review": "331;309;386;723", "wc_reply_reviewers": "0;7;24;329", "wc_reply_authors": "45;45;45;230", "reply_reviewers": "0;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 89.0, 68.82949948968103 ], "wc_strengths_avg": [ 75.5, 44.88039661143827 ], "wc_weaknesses_avg": [ 216.0, 95.04472631345729 ], "wc_questions_avg": [ 52.25, 47.27248142418589 ], "wc_limitations_avg": [ 4.5, 2.5 ], "wc_review_avg": [ 437.25, 167.34451738852994 ], "wc_reply_reviewers_avg": [ 90.0, 138.26243162913056 ], "wc_reply_authors_avg": [ 91.25, 80.10734985006057 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 47, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1137093214448936661&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "stanford.edu;umass.edu;stanford.edu;utexas.edu;mit.edu;google.com;umass.edu;google.com", "author_num": 8, "aff_unique_index": "0;1;0;2;3;4;1;4", "aff_unique_norm": "Stanford University;University of Massachusetts Amherst;University of Texas at Austin;Massachusetts Institute of Technology;Google", "aff_unique_dep": ";;;;Google", "aff_unique_url": "https://www.stanford.edu;https://www.umass.edu;https://www.utexas.edu;https://web.mit.edu;https://www.google.com", "aff_unique_abbr": "Stanford;UMass Amherst;UT Austin;MIT;Google", "aff_campus_unique_index": "0;1;0;2;4;1;4", "aff_campus_unique": "Stanford;Amherst;Austin;;Mountain View", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "On Causal Discovery in the Presence of Deterministic Relations", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93539", "id": "pfvcsgFrJ6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=pfvcsgFrJ6", "openreview": "https://openreview.net/forum?id=pfvcsgFrJ6", "poster": "", "project": "", "author_site": "Loka Li, Haoyue Dai, Hanin Al Ghothani, Biwei Huang, Jiji Zhang, Shahar Harel, Isaac Bentwich, Guangyi Chen, Kun Zhang", "tldr": "", "abstract": "Many causal discovery methods typically rely on the assumption of independent noise, yet real-life situations often involve deterministic relationships. In these cases, observed variables are represented as deterministic functions of their parental variables without noise.\nWhen determinism is present, constraint-based methods encounter challenges due to the violation of the faithfulness assumption. In this paper, we find, supported by both theoretical analysis and empirical evidence, that score-based methods with exact search can naturally address the issues of deterministic relations under rather mild assumptions. Nonetheless, exact score-based methods can be computationally expensive. To enhance the efficiency and scalability, we develop a novel framework for causal discovery that can detect and handle deterministic relations, called Determinism-aware Greedy Equivalent Search (DGES). DGES comprises three phases: (1) identify minimal deterministic clusters (i.e., a minimal set of variables with deterministic relationships), (2) run modified Greedy Equivalent Search (GES) to obtain an initial graph, and (3) perform exact search exclusively on the deterministic cluster and its neighbors. The proposed DGES accommodates both linear and nonlinear causal relationships, as well as both continuous and discrete data types. Furthermore, we investigate the identifiability conditions of DGES. We conducted extensive experiments on both simulated and real-world datasets to show the efficacy of our proposed method.", "keywords": "causal discovery;deterministic relations", "primary_area": "causal_inference", "supplementary_material": "/attachment/e564eb57d44d2c413ed7b99b7e0af0e2f09d1c17.zip", "author": "Loka Li;Haoyue Dai;Hanin Al Ghothani;Biwei Huang;Jiji Zhang;Shahar Harel;Isaac Bentwich;Guangyi Chen;Kun Zhang", "authorids": "~Loka_Li1;~Haoyue_Dai1;~Hanin_Al_Ghothani1;~Biwei_Huang1;~Jiji_Zhang1;~Shahar_Harel2;~Isaac_Bentwich1;~Guangyi_Chen1;~Kun_Zhang1", "gender": "M;;F;F;M;M;M;M;M", "homepage": "https://lokali.github.io;https://hyda.cc;;;;;;https://chengy12.github.io/;http://www.andrew.cmu.edu/user/kunz1/", "dblp": "371/1096;277/1316;;165/3288;99/3659;;;c/GuangyiChen-2;96/3115-1", "google_scholar": "PT5AMzgAAAAJ;f4tCtoMAAAAJ;;;5GAikocAAAAJ;WR5abPAAAAAJ;BnjU0PQAAAAJ;https://scholar.google.com/citations?hl=zh-CN;RGoypN4AAAAJ", "orcid": ";;;;;;;;", "linkedin": ";;hanin-al-ghothani-a90328128;;;;bentwich/;;", "or_profile": "~Loka_Li1;~Haoyue_Dai1;~Hanin_Al_Ghothani1;~Biwei_Huang1;~Jiji_Zhang1;~Shahar_Harel2;~Isaac_Bentwich1;~Guangyi_Chen1;~Kun_Zhang1", "aff": "Mohamed bin Zayed University of Artificial Intelligence;Carnegie Mellon University;Mohamed bin Zayed University of Artificial Intelligence;University of California, San Diego;The Chinese University of Hong Kong;;Quris-AI;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "mbzuai.ac.ae;cmu.edu;mbzuai.ac.ae;ucsd.edu;cuhk.edu.hk;;quris.ai;cmu.edu;cmu.edu", "position": "PhD student;PhD student;PhD student;Assistant Professor;Professor;;Researcher;Postdoc;Associate Professor", "bibtex": "@inproceedings{\nli2024on,\ntitle={On Causal Discovery in the Presence of Deterministic Relations},\nauthor={Loka Li and Haoyue Dai and Hanin Al Ghothani and Biwei Huang and Jiji Zhang and Shahar Harel and Isaac Bentwich and Guangyi Chen and Kun Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=pfvcsgFrJ6}\n}", "github": "", "reviewers": "Wfp4;j5ZZ;zDFG;Nu91", "pdf_size": 5770516, "rating": "6;6;7;8", "confidence": "3;3;4;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;4;4", "wc_summary": "157;47;32;115", "wc_strengths": "69;28;49;88", "wc_weaknesses": "71;44;47;854", "wc_questions": "135;4;267;92", "wc_limitations": "45;3;16;58", "wc_review": "477;126;411;1207", "wc_reply_reviewers": "20;18;275;15", "wc_reply_authors": "40;19;1719;1290", "reply_reviewers": "1;1;3;1", "reply_authors": "3;2;6;4", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 87.75, 50.76108253376793 ], "wc_strengths_avg": [ 58.5, 22.36626924634504 ], "wc_weaknesses_avg": [ 254.0, 346.56817511133363 ], "wc_questions_avg": [ 124.5, 94.85910604681028 ], "wc_limitations_avg": [ 30.5, 21.982947936980608 ], "wc_review_avg": [ 555.25, 398.73699038338543 ], "wc_reply_reviewers_avg": [ 82.0, 111.44281044553749 ], "wc_reply_authors_avg": [ 767.0, 752.9717790196389 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 3.75, 1.479019945774904 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.9045340337332909, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17728316998578546339&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 2, "email": "mbzuai.ac.ae;cmu.edu;mbzuai.ac.ae;ucsd.edu;cuhk.edu.hk;;quris.ai;cmu.edu;cmu.edu", "author_num": 9, "aff_unique_index": "0;1;0;2;3;4;1;1", "aff_unique_norm": "Mohamed bin Zayed University of Artificial Intelligence;Carnegie Mellon University;University of California, San Diego;Chinese University of Hong Kong;Quris-AI", "aff_unique_dep": ";;;;", "aff_unique_url": "https://mbzuai.ac.ae;https://www.cmu.edu;https://www.ucsd.edu;https://www.cuhk.edu.hk;", "aff_unique_abbr": "MBZUAI;CMU;UCSD;CUHK;", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";San Diego;Hong Kong SAR", "aff_country_unique_index": "0;1;0;1;2;1;1", "aff_country_unique": "United Arab Emirates;United States;China;" }, { "title": "AdaptiveISP: Learning an Adaptive Image Signal Processor for Object Detection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93538", "id": "pgQCsyKdpN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=pgQCsyKdpN", "openreview": "https://openreview.net/forum?id=pgQCsyKdpN", "poster": "/media/PosterPDFs/NeurIPS%202024/93538.png?t=1733404705.3965046", "project": "", "author_site": "Yujin Wang, Tianyi Xu, Zhang Fan, Tianfan Xue, Jinwei Gu", "tldr": "", "abstract": "Image Signal Processors (ISPs) convert raw sensor signals into digital images, which significantly influence the image quality and the performance of downstream computer vision tasks. \n Designing ISP pipeline and tuning ISP parameters are two key steps for building an imaging and vision system.\n To find optimal ISP configurations, recent works use deep neural networks as a proxy to search for ISP parameters or ISP pipelines. However, these methods are primarily designed to maximize the image quality, which are sub-optimal in the performance of high-level computer vision tasks such as detection, recognition, and tracking. Moreover, after training, the learned ISP pipelines are mostly fixed at the inference time, whose performance degrades in dynamic scenes. \n To jointly optimize ISP structures and parameters, we propose AdaptiveISP, a task-driven and scene-adaptive ISP. \n One key observation is that for the majority of input images, only a few processing modules are needed to improve the performance of downstream recognition tasks, and only a few inputs require more processing.\n Based on this, AdaptiveISP utilizes deep reinforcement learning to automatically generate an optimal ISP pipeline and the associated ISP parameters to maximize the detection performance. Experimental results show that AdaptiveISP not only surpasses the prior state-of-the-art methods for object detection but also dynamically manages the trade-off between detection performance and computational cost, especially suitable for scenes with large dynamic range variations.\nProject website: https://openimaginglab.github.io/AdaptiveISP/.", "keywords": "Image Signal Processor;Image Processing;Reinforcement Learning", "primary_area": "machine_vision", "supplementary_material": "", "author": "Yujin Wang;Xu Tian yi;Zhang Fan;Tianfan Xue;Jinwei Gu", "authorids": "~Yujin_Wang2;~Xu_Tian_yi1;~Zhang_Fan1;~Tianfan_Xue2;~Jinwei_Gu1", "gender": ";M;M;M;M", "homepage": ";https://github.com/PhotonTec;https://ivp.ee.cuhk.edu.hk/projects/demo/piqm/index.html;https://tianfan.info;http://www.gujinwei.org", "dblp": ";;21/3626-93;54/8652;61/1140", "google_scholar": ";;;RfSQKrIAAAAJ;k_T8t30AAAAJ", "orcid": ";;;0000-0001-5031-6618;0000-0001-8705-8237", "linkedin": ";;;tianfan-xue-54016716;", "or_profile": "~Yujin_Wang2;~Xu_Tian_yi1;~Zhang_Fan1;~Tianfan_Xue2;~Jinwei_Gu1", "aff": ";Peking University;Tetras;The Chinese University of Hong Kong;Department of Computer Science and Engineering, The Chinese University of Hong Kong", "aff_domain": ";pku.edu.cn;tetras.ai;cuhk.edu.hk;cse.cuhk.edu.hk", "position": ";Undergrad student;Researcher;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nwang2024adaptiveisp,\ntitle={Adaptive{ISP}: Learning an Adaptive Image Signal Processor for Object Detection},\nauthor={Yujin Wang and Xu Tian yi and Zhang Fan and Tianfan Xue and Jinwei Gu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=pgQCsyKdpN}\n}", "github": "", "reviewers": "Ln32;z6yj;Sot8;bosW", "pdf_size": 5629732, "rating": "3;5;5;7", "confidence": "5;4;4;4", "soundness": "2;3;3;3", "novelty": "2;3;2;3", "presentation": "2;3;3;3", "wc_summary": "15;61;45;88", "wc_strengths": "29;37;33;112", "wc_weaknesses": "246;42;139;108", "wc_questions": "6;81;28;42", "wc_limitations": "13;2;32;7", "wc_review": "309;223;277;357", "wc_reply_reviewers": "0;40;14;0", "wc_reply_authors": "0;48;0;0", "reply_reviewers": "0;1;1;0", "reply_authors": "1;2;1;1", "rating_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 52.25, 26.43269755435491 ], "wc_strengths_avg": [ 52.75, 34.3247359785913 ], "wc_weaknesses_avg": [ 133.75, 73.66944753423905 ], "wc_questions_avg": [ 39.25, 27.307279249313726 ], "wc_limitations_avg": [ 13.5, 11.368817000902073 ], "wc_review_avg": [ 291.5, 48.731406710662476 ], "wc_reply_reviewers_avg": [ 13.5, 16.332482971061076 ], "wc_reply_authors_avg": [ 12.0, 20.784609690826528 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7587232701031663005&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 4, "email": ";pku.edu.cn;tetras.ai;cuhk.edu.hk;cse.cuhk.edu.hk", "author_num": 5, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "Peking University;Tetras;Chinese University of Hong Kong", "aff_unique_dep": ";;", "aff_unique_url": "http://www.pku.edu.cn;;https://www.cuhk.edu.hk", "aff_unique_abbr": "Peking U;;CUHK", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China;" }, { "title": "Near-Optimal Distributed Minimax Optimization under the Second-Order Similarity", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93537", "id": "pgUQFIJ6BE", "proceeding": "", "pdf": "https://openreview.net/pdf?id=pgUQFIJ6BE", "openreview": "https://openreview.net/forum?id=pgUQFIJ6BE", "poster": "/media/PosterPDFs/NeurIPS%202024/93537.png?t=1729872750.232562", "project": "", "author_site": "Qihao Zhou, Haishan Ye, Luo Luo", "tldr": "", "abstract": "This paper considers the distributed convex-concave minimax optimization under the second-order similarity.\nWe propose stochastic variance-reduced optimistic gradient sliding (SVOGS) method, which takes the advantage of the finite-sum structure in the objective by involving the mini-batch client sampling and variance reduction.\nWe prove SVOGS can achieve the $\\varepsilon$-duality gap within communication rounds of \n${\\mathcal O}(\\delta D^2/\\varepsilon)$, \ncommunication complexity of ${\\mathcal O}(n+\\sqrt{n}\\delta D^2/\\varepsilon)$,\nand local gradient calls of \n$\\tilde{\\mathcal O}(n+(\\sqrt{n}\\delta+L)D^2/\\varepsilon\\log(1/\\varepsilon))$, \nwhere $n$ is the number of nodes, $\\delta$ is the degree of the second-order similarity, $L$ is the smoothness parameter and $D$ is the diameter of the constraint set.\nWe can verify that all of above complexity (nearly) matches the corresponding lower bounds.\nFor the specific $\\mu$-strongly-convex-$\\mu$-strongly-convex case, \nour algorithm has the upper bounds on communication rounds, communication complexity, and local gradient calls of $\\mathcal O(\\delta/\\mu\\log(1/\\varepsilon))$, ${\\mathcal O}((n+\\sqrt{n}\\delta/\\mu)\\log(1/\\varepsilon))$, and $\\tilde{\\mathcal O}(n+(\\sqrt{n}\\delta+L)/\\mu)\\log(1/\\varepsilon))$ respectively, which are also nearly tight.\nFurthermore, we conduct the numerical experiments to show the empirical advantages of proposed method.", "keywords": "distributed optimization;minimax optmization;second-order similarity", "primary_area": "optimization", "supplementary_material": "/attachment/75e8ea30251fc488f76336345898287bb1fc8544.zip", "author": "Qihao Zhou;Haishan Ye;Luo Luo", "authorids": "~Qihao_Zhou1;~Haishan_Ye2;~Luo_Luo1", "gender": ";M;M", "homepage": ";;https://luoluo-sds.github.io/", "dblp": ";162/0002.html;https://dblp.org/pers/hd/l/Luo:Luo", "google_scholar": ";;NggI9EsAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Qihao_Zhou1;~Haishan_Ye2;~Luo_Luo1", "aff": ";Xi'an Jiaotong University;Fudan University", "aff_domain": ";xjtu.edu.cn;fudan.edu.cn", "position": ";Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhou2024nearoptimal,\ntitle={Near-Optimal Distributed Minimax Optimization under the Second-Order Similarity},\nauthor={Qihao Zhou and Haishan Ye and Luo Luo},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=pgUQFIJ6BE}\n}", "github": "", "reviewers": "DkHm;Vpvu;qmRX", "pdf_size": 886753, "rating": "5;6;7", "confidence": "4;4;3", "soundness": "2;3;4", "novelty": "3;2;4", "presentation": "3;3;3", "wc_summary": "53;36;78", "wc_strengths": "42;51;46", "wc_weaknesses": "82;166;23", "wc_questions": "2;94;42", "wc_limitations": "1;30;11", "wc_review": "180;377;200", "wc_reply_reviewers": "0;23;7", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 55.666666666666664, 17.249798710580816 ], "wc_strengths_avg": [ 46.333333333333336, 3.6817870057290873 ], "wc_weaknesses_avg": [ 90.33333333333333, 58.67613559948277 ], "wc_questions_avg": [ 46.0, 37.66519171153476 ], "wc_limitations_avg": [ 14.0, 12.027745701779143 ], "wc_review_avg": [ 252.33333333333334, 88.52996981562546 ], "wc_reply_reviewers_avg": [ 10.0, 9.626352718795768 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:7V201RmPp2wJ:scholar.google.com/&scioq=Near-Optimal+Distributed+Minimax+Optimization+under+the+Second-Order+Similarity&hl=en&as_sdt=0,4", "gs_version_total": 6, "email": ";xjtu.edu.cn;fudan.edu.cn", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Xi'an Jiao Tong University;Fudan University", "aff_unique_dep": ";", "aff_unique_url": "https://www.xjtu.edu.cn;https://www.fudan.edu.cn", "aff_unique_abbr": "XJTU;Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Wasserstein Distributionally Robust Optimization through the Lens of Structural Causal Models and Individual Fairness", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93536", "id": "piOzFx9whU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=piOzFx9whU", "openreview": "https://openreview.net/forum?id=piOzFx9whU", "poster": "", "project": "", "author_site": "Ahmad-Reza Ehyaei, Golnoosh Farnadi, Samira Samadi", "tldr": "", "abstract": "In recent years, Wasserstein Distributionally Robust Optimization (DRO) has garnered substantial interest for its efficacy in data-driven decision-making under distributional uncertainty. However, limited research has explored the application of DRO to address individual fairness concerns, particularly when considering causal structures and discrete sensitive attributes in learning problems.\nTo address this gap, we first formulate the DRO problem from the perspectives of causality and individual fairness. We then present the DRO dual formulation as an efficient tool to convert the main problem into a more tractable and computationally efficient form. Next, we characterize the closed form of the approximate worst-case loss quantity as a regularizer, eliminating the max-step in the Min-Max DRO problem. We further estimate the regularizer in more general cases and explore the relationship between DRO and classical robust optimization. Finally, by removing the assumption of a known structural causal model, we provide finite sample error bounds when designing DRO with empirical distributions and estimated causal structures to ensure efficiency and robust learning.", "keywords": "Wasserstein Distributionally Robust Optimization;Individual Fairness;Structural Causal Model;Regularized Optimization", "primary_area": "fairness", "supplementary_material": "/attachment/36b96fbb84d779888fed7a50cb401eb86d3ce4e7.zip", "author": "Ahmad Reza Ehyaei;Golnoosh Farnadi;Samira Samadi", "authorids": "~Ahmad_Reza_Ehyaei1;~Golnoosh_Farnadi1;~Samira_Samadi1", "gender": "M;F;F", "homepage": ";http://www.cwi.ugent.be/people.php?userid=golnoosh;http://www.samirasamadi.com", "dblp": "339/7354;148/1397;https://dblp.uni-trier.de/pers/hd/s/Samadi:Samira", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.nl/citations?user=4Vjp6hwAAAAJ;s8xc2K4AAAAJ", "orcid": "0000-0003-3892-4113;;", "linkedin": ";gfarnadi/;samira-samadi-200662108/", "or_profile": "~Ahmad_Reza_Ehyaei1;~Golnoosh_Farnadi1;~Samira_Samadi1", "aff": "Max-Planck-Institute for Intelligent Systems, Max-Planck Institute;McGill University;Max Planck Institute for Intelligent Systems, Max-Planck Institute", "aff_domain": "is.mpg.de;mcgill.ca;tuebingen.mpg.de", "position": "PhD student;Assistant Professor;Research Group Leader", "bibtex": "@inproceedings{\nehyaei2024wasserstein,\ntitle={Wasserstein Distributionally Robust Optimization through the Lens of Structural Causal Models and Individual Fairness},\nauthor={Ahmad Reza Ehyaei and Golnoosh Farnadi and Samira Samadi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=piOzFx9whU}\n}", "github": "", "reviewers": "zg8Z;vsYr;rhFX;rNAj", "pdf_size": 737839, "rating": "5;5;6;7", "confidence": "3;4;2;3", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "2;2;3;3", "wc_summary": "50;119;18;316", "wc_strengths": "78;84;17;65", "wc_weaknesses": "525;130;25;123", "wc_questions": "177;47;24;78", "wc_limitations": "11;1;2;20", "wc_review": "841;381;86;602", "wc_reply_reviewers": "0;56;14;14", "wc_reply_authors": "129;105;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 125.75, 115.74622024066272 ], "wc_strengths_avg": [ 61.0, 26.315394733881533 ], "wc_weaknesses_avg": [ 200.75, 191.75293348473187 ], "wc_questions_avg": [ 81.5, 58.37165407969865 ], "wc_limitations_avg": [ 8.5, 7.697402159170326 ], "wc_review_avg": [ 477.5, 278.4856369725376 ], "wc_reply_reviewers_avg": [ 21.0, 21.0 ], "wc_reply_authors_avg": [ 58.5, 59.112181485714096 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.42640143271122083, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:9tvTraDwOL4J:scholar.google.com/&scioq=Wasserstein+Distributionally+Robust+Optimization+through+the+Lens+of+Structural+Causal+Models+and+Individual+Fairness&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "is.mpg.de;mcgill.ca;tuebingen.mpg.de", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Max-Planck-Institute for Intelligent Systems;McGill University;Max Planck Institute for Intelligent Systems", "aff_unique_dep": "Intelligent Systems;;Intelligent Systems", "aff_unique_url": "https://www.mpi-is.mpg.de;https://www.mcgill.ca;https://www.mpi-is.mpg.de", "aff_unique_abbr": "MPI-IS;McGill;MPI-IS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Germany;Canada" }, { "title": "HumanVLA: Towards Vision-Language Directed Object Rearrangement by Physical Humanoid", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93535", "id": "pjD08dtAh0", "proceeding": "", "pdf": "https://openreview.net/pdf?id=pjD08dtAh0", "openreview": "https://openreview.net/forum?id=pjD08dtAh0", "poster": "/media/PosterPDFs/NeurIPS%202024/93535.png?t=1731663134.2438831", "project": "", "author_site": "Xinyu Xu, Yizheng Zhang, Yong-Lu Li, Lei Han, Cewu Lu", "tldr": "", "abstract": "Physical Human-Scene Interaction (HSI) plays a crucial role in numerous applications. \n However, existing HSI techniques are limited to specific object dynamics and privileged information, which prevents the development of more comprehensive applications.\n To address this limitation, we introduce HumanVLA for general object rearrangement directed by practical vision and language. \n A teacher-student framework is utilized to develop HumanVLA.\n A state-based teacher policy is trained first using goal-conditioned reinforcement learning and adversarial motion prior.\n Then, it is distilled into a vision-language-action model via behavior cloning.\n We propose several key insights to facilitate the large-scale learning process.\n To support general object rearrangement by physical humanoid, we introduce a novel Human-in-the-Room dataset encompassing various rearrangement tasks.\n Through extensive experiments and analysis, we demonstrate the effectiveness of our approach.", "keywords": "Human-Scene Interaction; Object Rearrangement; Vision-Language-Action Model; Physical Humanoid", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/46fa8c802381832248958f02765a7f740a0f3ea2.zip", "author": "Xinyu Xu;Yizheng Zhang;Yong-Lu Li;Lei Han;Cewu Lu", "authorids": "~Xinyu_Xu2;~Yizheng_Zhang1;~Yong-Lu_Li1;~Lei_Han1;~Cewu_Lu3", "gender": "M;M;M;M;M", "homepage": "https://xuxinyu.website/;;https://dirtyharrylyl.github.io/;https://www.leihan.org;https://www.mvig.org/", "dblp": "59/6858;;198/9345;75/2307-1;", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;https://scholar.google.com.hk/citations?user=UExAaVgAAAAJ;Tz4_zi8AAAAJ;https://scholar.google.com.tw/citations?user=QZVQEWAAAAAJ", "orcid": ";0000-0002-0488-9869;0000-0003-0478-0692;;", "linkedin": ";;%E6%B0%B8%E9%9C%B2-%E6%9D%8E-991b99139/;;", "or_profile": "~Xinyu_Xu2;~Yizheng_Zhang1;~Yong-Lu_Li1;~Lei_Han1;~Cewu_Lu3", "aff": "Shanghai Jiaotong University;Tencent Robotics X;Shanghai Jiaotong University;Tencent Robotics X;Shanghai Jiaotong University", "aff_domain": "sjtu.edu;tencent.com;sjtu.edu.cn;tencent.com;sjtu.edu.cn", "position": "MS student;Researcher;Assistant Professor;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nxu2024humanvla,\ntitle={Human{VLA}: Towards Vision-Language Directed Object Rearrangement by Physical Humanoid},\nauthor={Xinyu Xu and Yizheng Zhang and Yong-Lu Li and Lei Han and Cewu Lu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=pjD08dtAh0}\n}", "github": "", "reviewers": "hR7q;vb1s;2Cr4", "pdf_size": 2500962, "rating": "6;6;6", "confidence": "4;4;5", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "2;4;3", "wc_summary": "61;151;79", "wc_strengths": "67;52;125", "wc_weaknesses": "147;57;348", "wc_questions": "73;224;34", "wc_limitations": "7;7;10", "wc_review": "355;491;596", "wc_reply_reviewers": "40;49;273", "wc_reply_authors": "0;0;352", "reply_reviewers": "1;1;2", "reply_authors": "1;1;3", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 97.0, 38.88444419044716 ], "wc_strengths_avg": [ 81.33333333333333, 31.47838764754143 ], "wc_weaknesses_avg": [ 184.0, 121.64703037887936 ], "wc_questions_avg": [ 110.33333333333333, 81.9362896130619 ], "wc_limitations_avg": [ 8.0, 1.4142135623730951 ], "wc_review_avg": [ 480.6666666666667, 98.65878346886078 ], "wc_reply_reviewers_avg": [ 120.66666666666667, 107.7785796075557 ], "wc_reply_authors_avg": [ 117.33333333333333, 165.93439131844315 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9185883550605249573&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "sjtu.edu;tencent.com;sjtu.edu.cn;tencent.com;sjtu.edu.cn", "author_num": 5, "aff_unique_index": "0;1;0;1;0", "aff_unique_norm": "Shanghai Jiao Tong University;Tencent", "aff_unique_dep": ";Tencent Robotics X", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.tencent.com", "aff_unique_abbr": "SJTU;Tencent Robotics X", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Cost-efficient Knowledge-based Question Answering with Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93534", "id": "pje1Y71jad", "proceeding": "", "pdf": "https://openreview.net/pdf?id=pje1Y71jad", "openreview": "https://openreview.net/forum?id=pje1Y71jad", "poster": "", "project": "", "author_site": "Junnan Dong, Qinggang Zhang, Chuang Zhou, Hao Chen, Daochen Zha, Xiao Huang", "tldr": "", "abstract": "Knowledge-based question answering (KBQA) is widely used in many scenarios that necessitate domain knowledge. Large language models (LLMs) bring opportunities to KBQA, while their costs are significantly higher and absence of domain-specific knowledge during pre-training. We are motivated to combine LLMs and prior small models on knowledge graphs (KGMs) for both inferential accuracy and cost saving. However, it remains challenging since accuracy and cost are not readily combined in the optimization as two distinct metrics. It is also laborious for model selection since different models excel in diverse knowledge. To this end, we propose Coke, a novel cost-efficient strategy for KBQA with LLMs, modeled as a tailored multi-armed bandit problem to minimize calls to LLMs within limited budgets. We first formulate the accuracy expectation with a cluster-level Thompson Sampling for either KGMs or LLMs. A context-aware policy is optimized to further distinguish the expert model subject to the question semantics. The overall decision is bounded by the cost regret according to historical expenditure on failures. Extensive experiments showcase the superior performance of Coke, which moves the Pareto frontier with up to 20.89% saving of GPT-4 fees while achieving a 2.74% higher accuracy on the benchmark datasets.", "keywords": "Large Language Models;Knowledge-based Question Answering;Efficient Machine Learning", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Junnan Dong;Qinggang Zhang;Chuang Zhou;Hao Chen;Daochen Zha;Xiao Huang", "authorids": "~Junnan_Dong1;~Qinggang_Zhang2;~Chuang_Zhou1;~Hao_Chen18;~Daochen_Zha1;~Xiao_Huang1", "gender": "M;M;M;M;;M", "homepage": "https://junnandong.github.io;https://qing145.github.io/;https://www4.comp.polyu.edu.hk/~xiaohuang/deeplab.html;;http://dczha.com/;https://www4.comp.polyu.edu.hk/~xiaohuang/", "dblp": "322/5787;17/8559;;175/3324-62;167/0903;25/692-1.html", "google_scholar": "https://scholar.google.com.hk/citations?user=m7_ceMcAAAAJ;eF8PATI7r3IC;;https://scholar.google.com/citations?view_op=list_works;jK0NgMcAAAAJ;Be21PkYAAAAJ", "orcid": "0000-0003-2117-6083;0000-0002-1536-6529;;0000-0001-6816-5344;0000-0002-6677-7504;0000-0002-3867-900X", "linkedin": ";;;hao-chen-phd-37136b1b5/;daochen-zha;", "or_profile": "~Junnan_Dong1;~Qinggang_Zhang2;~Chuang_Zhou1;~Hao_Chen18;~Daochen_Zha1;~Xiao_Huang1", "aff": "The Hong Kong Polytechnic University;The Hong Kong Polytechnic University, Hong Kong Polytechnic University;Hong Kong Polytechnic University;Hong Kong Polytechnic University;Airbnb;The Hong Kong Polytechnic University", "aff_domain": "connect.polyu.hk;comp.polyu.edu.hk;polyu.edu.hk;polyu.edu.hk;airbnb.com;polyu.edu.hk", "position": "PhD student;PhD student;PhD student;Postdoc;Researcher;Assistant Professor", "bibtex": "@inproceedings{\ndong2024costefficient,\ntitle={Cost-efficient Knowledge-based Question Answering with Large Language Models},\nauthor={Junnan Dong and Qinggang Zhang and Chuang Zhou and Hao Chen and Daochen Zha and Xiao Huang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=pje1Y71jad}\n}", "github": "", "reviewers": "dW52;JWNj;HDrN;FvaD", "pdf_size": 1188891, "rating": "5;6;7;8", "confidence": "3;3;3;4", "soundness": "2;3;4;4", "novelty": "2;2;3;4", "presentation": "2;2;3;3", "wc_summary": "32;69;33;81", "wc_strengths": "28;107;48;177", "wc_weaknesses": "66;183;161;132", "wc_questions": "2;96;1;56", "wc_limitations": "12;9;1;52", "wc_review": "140;464;244;498", "wc_reply_reviewers": "23;25;84;66", "wc_reply_authors": "63;61;731;61", "reply_reviewers": "1;1;2;1", "reply_authors": "2;2;3;2", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 53.75, 21.672274915199836 ], "wc_strengths_avg": [ 90.0, 58.02154772151464 ], "wc_weaknesses_avg": [ 135.5, 44.01420225336363 ], "wc_questions_avg": [ 38.75, 39.84579651606929 ], "wc_limitations_avg": [ 18.5, 19.75474626513841 ], "wc_review_avg": [ 336.5, 149.5886025070092 ], "wc_reply_reviewers_avg": [ 49.5, 26.291633650269812 ], "wc_reply_authors_avg": [ 229.0, 289.8309852310481 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.7745966692414834, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15338724780606815649&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "connect.polyu.hk;comp.polyu.edu.hk;polyu.edu.hk;polyu.edu.hk;airbnb.com;polyu.edu.hk", "author_num": 6, "aff_unique_index": "0;0;0;0;1;0", "aff_unique_norm": "Hong Kong Polytechnic University;Airbnb", "aff_unique_dep": ";", "aff_unique_url": "https://www.polyu.edu.hk;https://www.airbnb.com", "aff_unique_abbr": "PolyU;Airbnb", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;1;0", "aff_country_unique": "China;United States" }, { "title": "Algorithmic Capabilities of Random Transformers", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93533", "id": "plH8gW7tPQ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=plH8gW7tPQ", "openreview": "https://openreview.net/forum?id=plH8gW7tPQ", "poster": "/media/PosterPDFs/NeurIPS%202024/93533.png?t=1731291903.0677178", "project": "", "author_site": "Ziqian Zhong, Jacob Andreas", "tldr": "", "abstract": "Trained transformer models have been found to implement interpretable procedures for tasks like arithmetic and associative recall, but little is understood about how the circuits that implement these procedures originate during training. To what extent do they depend on the supervisory signal provided to models, and to what extent are they attributable to behavior already present in models at the beginning of training? To investigate these questions, we investigate what functions can be learned by randomly initialized transformers in which only the embedding layers are optimized, so that the only input--output mappings learnable from data are those already implemented (up to a choice of encoding scheme) by the randomly initialized model. We find that these random transformers can perform a wide range of meaningful algorithmic tasks, including modular arithmetic, in-weights and in-context associative recall, decimal addition, parenthesis balancing, and even some aspects of natural language text generation. Our results indicate that some algorithmic capabilities are present in transformers (and accessible via appropriately structured inputs) even before these models are trained.", "keywords": "transformer;deep learning;interpretability;capability;emergence;randomness;language models", "primary_area": "interpretability_and_explainability", "supplementary_material": "/attachment/d226e75cd84d1a4dbc5ca1a8e02159ca8a793ea3.zip", "author": "Ziqian Zhong;Jacob Andreas", "authorids": "~Ziqian_Zhong1;~Jacob_Andreas1", "gender": "Not Specified;M", "homepage": "https://fjzzq2002.github.io/;http://web.mit.edu/jda/www", "dblp": "314/7033.html;97/8154", "google_scholar": "iZpSjEYAAAAJ;dnZ8udEAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Ziqian_Zhong1;~Jacob_Andreas1", "aff": "Massachusetts Institute of Technology;Microsoft", "aff_domain": "mit.edu;microsoft.com", "position": "Undergrad student;Researcher", "bibtex": "@inproceedings{\nzhong2024algorithmic,\ntitle={Algorithmic Capabilities of Random Transformers},\nauthor={Ziqian Zhong and Jacob Andreas},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=plH8gW7tPQ}\n}", "github": "", "reviewers": "LwAU;ZcHJ;wwwU;B9i5", "pdf_size": 2071898, "rating": "4;5;5;7", "confidence": "3;4;3;4", "soundness": "3;3;2;3", "novelty": "2;2;2;3", "presentation": "1;3;3;3", "wc_summary": "69;137;51;295", "wc_strengths": "61;61;27;92", "wc_weaknesses": "75;388;197;1389", "wc_questions": "61;275;72;632", "wc_limitations": "17;10;1;52", "wc_review": "283;871;348;2460", "wc_reply_reviewers": "14;108;29;137", "wc_reply_authors": "0;0;0;538", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;2", "rating_avg": [ 5.25, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 138.0, 96.15092303249095 ], "wc_strengths_avg": [ 60.25, 22.993205518152532 ], "wc_weaknesses_avg": [ 512.25, 518.3383909185195 ], "wc_questions_avg": [ 260.0, 231.05951614248656 ], "wc_limitations_avg": [ 20.0, 19.32614809008769 ], "wc_review_avg": [ 990.5, 878.5034149051443 ], "wc_reply_reviewers_avg": [ 72.0, 51.80250959171766 ], "wc_reply_authors_avg": [ 134.5, 232.960833618014 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.6882472016116854, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16118397304236644587&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "mit.edu;microsoft.com", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Massachusetts Institute of Technology;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://web.mit.edu;https://www.microsoft.com", "aff_unique_abbr": "MIT;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Towards Heterogeneous Long-tailed Learning: Benchmarking, Metrics, and Toolbox", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97503", "id": "plIuBfYpXj", "proceeding": "", "pdf": "https://openreview.net/pdf?id=plIuBfYpXj", "openreview": "https://openreview.net/forum?id=plIuBfYpXj", "poster": "/media/PosterPDFs/NeurIPS%202024/97503.png?t=1731348594.790205", "project": "", "author_site": "Haohui Wang, Weijie Guan, Chen Jianpeng, Zi Wang, Dawei Zhou", "tldr": "", "abstract": "Long-tailed data distributions pose challenges for a variety of domains like e-commerce, finance, biomedical science, and cyber security, where the performance of machine learning models is often dominated by head categories while tail categories are inadequately learned. This work aims to provide a systematic view of long-tailed learning with regard to three pivotal angles: (A1) the characterization of data long-tailedness, (A2) the data complexity of various domains, and (A3) the heterogeneity of emerging tasks. We develop HeroLT, a comprehensive long-tailed learning benchmark integrating 18 state-of-the-art algorithms, 10 evaluation metrics, and 17 real-world datasets across 6 tasks and 4 data modalities. HeroLT with novel angles and extensive experiments (315 in total) enables effective and fair evaluation of newly proposed methods compared with existing baselines on varying dataset types. Finally, we conclude by highlighting the significant applications of long-tailed learning and identifying several promising future directions. For accessibility and reproducibility, we open-source our benchmark HeroLT and corresponding results at https://github.com/SSSKJ/HeroLT.", "keywords": "long-tailed learning;benchmark", "primary_area": "", "supplementary_material": "", "author": "Haohui Wang;Weijie Guan;Jianpeng Chen;Zi Wang;Dawei Zhou", "authorids": "~Haohui_Wang1;~Weijie_Guan1;~Jianpeng_Chen1;~Zi_Wang10;~Dawei_Zhou1", "gender": "F;M;M;M;M", "homepage": "https://github.com/wanghh7;https://github.com/SSSKJ;https://github.com/cjpcool;;https://sites.google.com/view/dawei-zhou/home?authuser=0", "dblp": "294/8598;352/4229.html;234/5858;;39/3130-3.html", "google_scholar": "ijh64HMAAAAJ;;https://scholar.google.com/citations?view_op=list_works;https://scholar.google.com/citations?hl=zh-CN;8dakqOgAAAAJ", "orcid": "0009-0000-7391-096X;0000-0003-4001-7862;;;0000-0002-7065-2990", "linkedin": ";;;;dawei-zhou-31035668/", "or_profile": "~Haohui_Wang1;~Weijie_Guan1;~Jianpeng_Chen1;~Zi_Wang10;~Dawei_Zhou1", "aff": "Virginia Polytechnic Institute and State University;Virginia Polytechnic Institute and State University;Virginia Polytechnic Institute and State University;Texas A&M University - College Station;Virginia Polytechnic Institute and State University", "aff_domain": "vt.edu;vt.edu;vt.edu;tamu.edu;vt.edu", "position": "PhD student;PhD student;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nwang2024towards,\ntitle={Towards Heterogeneous Long-tailed Learning: Benchmarking, Metrics, and Toolbox},\nauthor={Haohui Wang and Weijie Guan and Jianpeng Chen and Zi Wang and Dawei Zhou},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=plIuBfYpXj}\n}", "github": "", "reviewers": "smqd;9zXC;UBRj;xqDp", "pdf_size": 558112, "rating": "5;6;7;8", "confidence": "5;2;4;4", "wc_summary_and_contributions": "61;108;57;57", "wc_strengths": "2;64;4;47", "wc_improvement": "60;25;144;12", "wc_limitations": "55;1;8;8", "wc_correctness": "1;1;5;1", "wc_clarity": "2;1;7;1", "wc_relation_to_prior_work": "18;1;7;1", "wc_documentation": "1;13;7;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "201;215;240;129", "wc_reply_reviewers": "43;11;0;0", "wc_reply_authors": "125;0;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "3;1;1;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "wc_summary_and_contributions_avg": [ 70.75, 21.568205766822608 ], "wc_strengths_avg": [ 29.25, 26.93858756505248 ], "wc_improvement_avg": [ 60.25, 51.44110710317187 ], "wc_limitations_avg": [ 18.0, 21.552262062252307 ], "wc_correctness_avg": [ 2.0, 1.7320508075688772 ], "wc_clarity_avg": [ 2.75, 2.48746859276655 ], "wc_relation_to_prior_work_avg": [ 6.75, 6.94172168845741 ], "wc_documentation_avg": [ 5.5, 4.9749371855331 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 196.25, 41.26363411043676 ], "wc_reply_reviewers_avg": [ 13.5, 17.613914953808536 ], "wc_reply_authors_avg": [ 31.25, 54.12658773652741 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.1025978352085154, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:67C-ipccx88J:scholar.google.com/&scioq=Towards+Heterogeneous+Long-tailed+Learning:+Benchmarking,+Metrics,+and+Toolbox&hl=en&as_sdt=0,33", "gs_version_total": 2, "email": "vt.edu;vt.edu;vt.edu;tamu.edu;vt.edu", "author_num": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Virginia Tech;Texas A&M University", "aff_unique_dep": ";", "aff_unique_url": "https://www.vt.edu;https://www.tamu.edu", "aff_unique_abbr": "VT;TAMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";College Station", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "AllClear: A Comprehensive Dataset and Benchmark for Cloud Removal in Satellite Imagery", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97502", "id": "pn0Jpyvrc2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=pn0Jpyvrc2", "openreview": "https://openreview.net/forum?id=pn0Jpyvrc2", "poster": "/media/PosterPDFs/NeurIPS%202024/97502.png?t=1731543577.0225363", "project": "", "author_site": "Hangyu Zhou, Chia-Hsiang Kao, Cheng Perng Phoo, Utkarsh Mall, Bharath Hariharan, Kavita Bala", "tldr": "", "abstract": "Clouds in satellite imagery pose a significant challenge for downstream applications.\nA major challenge in current cloud removal research is the absence of a comprehensive benchmark and a sufficiently large and diverse training dataset.\nTo address this problem, we introduce the largest public dataset -- *AllClear* for cloud removal, featuring 23,742 globally distributed regions of interest (ROIs) with diverse land-use patterns, comprising 4 million images in total. Each ROI includes complete temporal captures from the year 2022, with (1) multi-spectral optical imagery from Sentinel-2 and Landsat 8/9, (2) synthetic aperture radar (SAR) imagery from Sentinel-1, and (3) auxiliary remote sensing products such as cloud masks and land cover maps.\nWe validate the effectiveness of our dataset by benchmarking performance, demonstrating the scaling law - the PSNR rises from $28.47$ to $33.87$ with $30\\times$ more data, and conducting ablation studies on the temporal length and the importance of individual modalities. This dataset aims to provide comprehensive coverage of the Earth's surface and promote better cloud removal results.", "keywords": "remote sensing;cloud removal;satellite imagery", "primary_area": "", "supplementary_material": "/attachment/fefccb46551e064456c4cfeaf08382eeb0d3b37b.pdf", "author": "Hangyu Zhou;Chia Hsiang Kao;Cheng Perng Phoo;Utkarsh Mall;Bharath Hariharan;Kavita Bala", "authorids": "~Hangyu_Zhou1;~Chia_Hsiang_Kao1;~Cheng_Perng_Phoo1;~Utkarsh_Mall1;~Bharath_Hariharan3;~Kavita_Bala1", "gender": "M;M;M;M;;M", "homepage": "https://zhou-hangyu.github.io/;https://iandrover.github.io;https://cpphoo.github.io/;http://www.cs.cornell.edu/~utkarshm/;http://www.cs.cornell.edu/~kb;http://home.bharathh.info", "dblp": ";241/3791;226/0521;183/0911;b/KavitaBala;05/8412", "google_scholar": ";https://scholar.google.com.tw/citations?user=W_i9B0sAAAAJ;kt9D2usAAAAJ;AK0AFWwAAAAJ;Rh16nsIAAAAJ;TpglobcAAAAJ", "orcid": ";;;;0000-0001-9761-6503;", "linkedin": "hangyu-zhou-b146931b2/;;;;;", "or_profile": "~Hangyu_Zhou1;~Chia_Hsiang_Kao1;~Cheng_Perng_Phoo1;~Utkarsh_Mall1;~Kavita_Bala1;~Bharath_Hariharan2", "aff": "Cornell University;Cornell University;Cornell University;Columbia University;Cornell University;Cornell University", "aff_domain": "cornell.edu;cornell.edu;cornell.edu;columbia.edu;cornell.edu;cornell.edu", "position": "MS student;PhD student;PhD student;Postdoc;Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhou2024allclear,\ntitle={AllClear: A Comprehensive Dataset and Benchmark for Cloud Removal in Satellite Imagery},\nauthor={Hangyu Zhou and Chia Hsiang Kao and Cheng Perng Phoo and Utkarsh Mall and Bharath Hariharan and Kavita Bala},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=pn0Jpyvrc2}\n}", "github": "", "reviewers": "ypDk;jSod;W34B;NUUv", "pdf_size": 8491974, "rating": "3;6;7;8", "confidence": "5;5;3;4", "wc_summary_and_contributions": "50;91;90;39", "wc_strengths": "42;120;101;31", "wc_improvement": "234;221;95;212", "wc_limitations": "55;37;54;93", "wc_correctness": "29;8;39;8", "wc_clarity": "5;9;30;251", "wc_relation_to_prior_work": "5;14;35;6", "wc_documentation": "16;13;12;40", "wc_additional_feedback": "1;1;1;1", "wc_review": "437;514;457;681", "wc_reply_reviewers": "0;10;0;109", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 1.8708286933869707 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "wc_summary_and_contributions_avg": [ 67.5, 23.32916629457641 ], "wc_strengths_avg": [ 73.5, 37.80542289143186 ], "wc_improvement_avg": [ 190.5, 55.688867828319154 ], "wc_limitations_avg": [ 59.75, 20.48627589387588 ], "wc_correctness_avg": [ 21.0, 13.47219358530748 ], "wc_clarity_avg": [ 73.75, 102.77493614690306 ], "wc_relation_to_prior_work_avg": [ 15.0, 12.062338081814818 ], "wc_documentation_avg": [ 20.25, 11.497282287566918 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 522.25, 95.9097883430049 ], "wc_reply_reviewers_avg": [ 29.75, 45.93677720519801 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.6446583712203042, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3201146145411282458&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "cornell.edu;cornell.edu;cornell.edu;columbia.edu;cornell.edu;cornell.edu", "author_num": 6, "aff_unique_index": "0;0;0;1;0;0", "aff_unique_norm": "Cornell University;Columbia University", "aff_unique_dep": ";", "aff_unique_url": "https://www.cornell.edu;https://www.columbia.edu", "aff_unique_abbr": "Cornell;Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "CAT: Coordinating Anatomical-Textual Prompts for Multi-Organ and Tumor Segmentation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93532", "id": "pnmUiVAGnv", "proceeding": "", "pdf": "https://openreview.net/pdf?id=pnmUiVAGnv", "openreview": "https://openreview.net/forum?id=pnmUiVAGnv", "poster": "/media/PosterPDFs/NeurIPS%202024/93532.png?t=1730298995.1886513", "project": "", "author_site": "Zhongzhen Huang, Yankai Jiang, Rongzhao Zhang, Shaoting Zhang, Xiaofan Zhang", "tldr": "", "abstract": "Existing promptable segmentation methods in the medical imaging field primarily consider either textual or visual prompts to segment relevant objects, yet they often fall short when addressing anomalies in medical images, like tumors, which may vary greatly in shape, size, and appearance. Recognizing the complexity of medical scenarios and the limitations of textual or visual prompts, we propose a novel dual-prompt schema that leverages the complementary strengths of visual and textual prompts for segmenting various organs and tumors. Specifically, we introduce $\\textbf{\\textit{CAT}}$, an innovative model that $\\textbf{C}$oordinates $\\textbf{A}$natomical prompts derived from 3D cropped images with $\\textbf{T}$extual prompts enriched by medical domain knowledge. The model architecture adopts a general query-based design, where prompt queries facilitate segmentation queries for mask prediction. To synergize two types of prompts within a unified framework, we implement a ShareRefiner, which refines both segmentation and prompt queries while disentangling the two types of prompts. Trained on a consortium of 10 public CT datasets, $\\textbf{\\textit{CAT}}$ demonstrates superior performance in multiple segmentation tasks. Further validation on a specialized in-house dataset reveals the remarkable capacity of segmenting tumors across multiple cancer stages. This approach confirms that coordinating multimodal prompts is a promising avenue for addressing complex scenarios in the medical domain.", "keywords": "Promptable model;Visual-Textual prompt;Multi-organ and tumor segmentation", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "/attachment/f349e7c7b2cd60cddaeda8b41f4825e127649975.zip", "author": "Zhongzhen Huang;Yankai Jiang;Rongzhao Zhang;Shaoting Zhang;Xiaofan Zhang", "authorids": "~Zhongzhen_Huang1;~Yankai_Jiang1;~Rongzhao_Zhang1;~Shaoting_Zhang4;~Xiaofan_Zhang2", "gender": "M;M;M;M;F", "homepage": "https://github.com/zongzi3zz/zongzi3zz.github.io;https://yankai96.github.io/;;;", "dblp": "320/0462;308/2080-3;193/4706;53/3894;", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;NMp31uMAAAAJ;oiBMWK4AAAAJ;30e95fEAAAAJ", "orcid": ";0000-0003-1903-2232;;0000-0002-8719-448X;", "linkedin": ";;;;", "or_profile": "~Zhongzhen_Huang1;~Yankai_Jiang1;~Rongzhao_Zhang1;~Shaoting_Zhang4;~Xiaofan_Zhang2", "aff": "Shanghai Jiaotong University;Shanghai Artificial Intelligence Laboratory;Shanghai Artificial Intelligence Laboratory;Shanghai Artificial Intelligence Laboratory;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;pjlab.org.cn;pjlab.org.cn;pjlab.org.cn;sjtu.edu.cn", "position": "PhD student;Researcher;Researcher;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nhuang2024cat,\ntitle={{CAT}: Coordinating Anatomical-Textual Prompts for Multi-Organ and Tumor Segmentation},\nauthor={Zhongzhen Huang and Yankai Jiang and Rongzhao Zhang and Shaoting Zhang and Xiaofan Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=pnmUiVAGnv}\n}", "github": "", "reviewers": "Yp5t;qosx;wEKt", "pdf_size": 5066094, "rating": "5;5;6", "confidence": "5;4;3", "soundness": "3;3;3", "novelty": "3;2;3", "presentation": "3;3;3", "wc_summary": "40;91;60", "wc_strengths": "39;88;69", "wc_weaknesses": "174;414;94", "wc_questions": "4;143;73", "wc_limitations": "9;6;26", "wc_review": "266;742;322", "wc_reply_reviewers": "61;203;9", "wc_reply_authors": "29;968;29", "reply_reviewers": "1;2;1", "reply_authors": "2;4;2", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 63.666666666666664, 20.98147330914162 ], "wc_strengths_avg": [ 65.33333333333333, 20.171487027209693 ], "wc_weaknesses_avg": [ 227.33333333333334, 135.97385369580758 ], "wc_questions_avg": [ 73.33333333333333, 56.74700187870917 ], "wc_limitations_avg": [ 13.666666666666666, 8.806563209081938 ], "wc_review_avg": [ 443.3333333333333, 212.4230579658328 ], "wc_reply_reviewers_avg": [ 91.0, 81.99186951562129 ], "wc_reply_authors_avg": [ 342.0, 442.64884502277874 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 0.9428090415820634 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13658731286061735044&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "sjtu.edu.cn;pjlab.org.cn;pjlab.org.cn;pjlab.org.cn;sjtu.edu.cn", "author_num": 5, "aff_unique_index": "0;1;1;1;0", "aff_unique_norm": "Shanghai Jiao Tong University;Shanghai Artificial Intelligence Laboratory", "aff_unique_dep": ";", "aff_unique_url": "https://www.sjtu.edu.cn;http://www.shailab.org/", "aff_unique_abbr": "SJTU;Shanghai AI Lab", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "SnapKV: LLM Knows What You are Looking for Before Generation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93531", "id": "poE54GOq2l", "proceeding": "", "pdf": "https://openreview.net/pdf?id=poE54GOq2l", "openreview": "https://openreview.net/forum?id=poE54GOq2l", "poster": "", "project": "", "author_site": "Yuhong Li, Yingbing Huang, Bowen Yang, Bharat Venkitesh, Acyr Locatelli, Hanchen Ye, Tianle Cai, Patrick Lewis, Deming Chen", "tldr": "", "abstract": "Large Language Models (LLMs) have made remarkable progress in processing extensive contexts, with the Key-Value (KV) cache playing a vital role in enhancing their performance. However, the growth of the KV cache in response to increasing input length poses challenges to memory and time efficiency. To address this problem, this paper introduces SnapKV, an innovative and fine-tuning-free approach that efficiently minimizes KV cache size while still delivering comparable performance in real-world applications.\n\nWe discover that each attention head in the model consistently focuses on specific prompt attention features during generation. Meanwhile, this robust pattern can be obtained from an `observation' window located at the end of the prompts. Drawing on this insight, SnapKV automatically compresses KV caches by selecting clustered important KV positions for each attention head. Our approach significantly reduces the growing computational overhead and memory footprint when processing long input sequences. Specifically, SnapKV achieves a consistent decoding speed with a 3.6x increase in generation speed and an 8.2x enhancement in memory efficiency compared to baseline when processing inputs of 16K tokens. At the same time, it maintains comparable performance to baseline models across 16 long sequence datasets. Moreover, SnapKV can process up to 380K context tokens on a single A100-80GB GPU using HuggingFace implementation with minor changes, exhibiting only a negligible accuracy drop in the Needle-in-a-Haystack test. Further comprehensive studies suggest SnapKV's potential for practical applications.", "keywords": "Large Language Model;Key-Value Cache Compression;Natural Language Processing;Inference;Machine Learning", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Yuhong Li;Yingbing Huang;Bowen Yang;Bharat Venkitesh;Acyr Locatelli;Hanchen Ye;Tianle Cai;Patrick Lewis;Deming Chen", "authorids": "~Yuhong_Li2;~Yingbing_Huang1;~Bowen_Yang1;~Bharat_Venkitesh1;~Acyr_Locatelli1;~Hanchen_Ye1;~Tianle_Cai1;~Patrick_Lewis2;~Deming_Chen1", "gender": "M;;M;M;M;M;M;M;", "homepage": "https://leeyeehoo.github.io/;https://wendyh1108.github.io/;;;https://acyrl.github.io/;https://hanchenye.com;https://tianle.website;https://patricklewis.io;", "dblp": ";;;194/3553;330/2243.html;;241/9458;227/3197;", "google_scholar": "Qh-6mV8AAAAJ;;89rJ1rYAAAAJ;r6fDYb0AAAAJ;pv4OI2EAAAAJ;;CvwLRSMAAAAJ;JN7Zg-kAAAAJ;", "orcid": "0000-0002-3769-6772;;;;;;;0000-0002-2192-9543;", "linkedin": ";;bowen-yang-43624a127/;bharat-venkitesh-92350671/;;;;patrick-s-h-lewis/;", "or_profile": "~Yuhong_Li2;~Yingbing_Huang1;~Bowen_Yang1;~Bharat_Venkitesh1;~Acyr_Locatelli1;~Hanchen_Ye1;~Tianle_Cai1;~Patrick_Lewis2;~Deming_Chen1", "aff": "University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;Cohere;Cohere;Cohere;University of Illinois Urbana-Champaign;Princeton University;Cohere;", "aff_domain": "illinois.edu;uiuc.edu;cohere.com;cohere.ai;cohere.com;illinois.edu;princeton.edu;cohere.ai;", "position": "PhD student;PhD student;Researcher;Member of Technical Staff;Researcher;PhD student;PhD student;Director of Agentic AI;", "bibtex": "@inproceedings{\nli2024snapkv,\ntitle={Snap{KV}: {LLM} Knows What You are Looking for Before Generation},\nauthor={Yuhong Li and Yingbing Huang and Bowen Yang and Bharat Venkitesh and Acyr Locatelli and Hanchen Ye and Tianle Cai and Patrick Lewis and Deming Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=poE54GOq2l}\n}", "github": "", "reviewers": "Akz1;Zgi7;wNH7", "pdf_size": 1323120, "rating": "6;6;7", "confidence": "4;3;5", "soundness": "2;3;3", "novelty": "2;2;3", "presentation": "2;3;3", "wc_summary": "109;95;105", "wc_strengths": "26;52;127", "wc_weaknesses": "121;17;168", "wc_questions": "172;32;43", "wc_limitations": "6;1;1", "wc_review": "434;197;444", "wc_reply_reviewers": "65;0;124", "wc_reply_authors": "15;0;46", "reply_reviewers": "1;0;1", "reply_authors": "2;1;2", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 103.0, 5.887840577551898 ], "wc_strengths_avg": [ 68.33333333333333, 42.82003684673281 ], "wc_weaknesses_avg": [ 102.0, 63.09252464964979 ], "wc_questions_avg": [ 82.33333333333333, 63.56274240639891 ], "wc_limitations_avg": [ 2.6666666666666665, 2.3570226039551585 ], "wc_review_avg": [ 358.3333333333333, 114.15291888028483 ], "wc_reply_reviewers_avg": [ 63.0, 50.642538114382326 ], "wc_reply_authors_avg": [ 20.333333333333332, 19.154343864744856 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 147, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2967920787170875627&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "illinois.edu;uiuc.edu;cohere.com;cohere.ai;cohere.com;illinois.edu;princeton.edu;cohere.ai;", "author_num": 9, "aff_unique_index": "0;0;1;1;1;0;2;1", "aff_unique_norm": "University of Illinois Urbana-Champaign;Cohere;Princeton University", "aff_unique_dep": ";;", "aff_unique_url": "https://illinois.edu;https://cohere.ai;https://www.princeton.edu", "aff_unique_abbr": "UIUC;;Princeton", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Urbana-Champaign;", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "SuperDeepFool: a new fast and accurate minimal adversarial attack", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93530", "id": "pqD7ckR8AF", "proceeding": "", "pdf": "https://openreview.net/pdf?id=pqD7ckR8AF", "openreview": "https://openreview.net/forum?id=pqD7ckR8AF", "poster": "", "project": "", "author_site": "alireza abdollahpour, Mahed Abroshan, Seyed-Mohsen Moosavi-Dezfooli", "tldr": "", "abstract": "Deep neural networks have been known to be vulnerable to adversarial examples, which are inputs that are modified slightly to fool the network into making incorrect predictions. This has led to a significant amount of research on evaluating the robustness of these networks against such perturbations. One particularly important robustness metric is the robustness to minimal $\\ell_{2}$ adversarial perturbations. However, existing methods for evaluating this robustness metric are either computationally expensive or not very accurate. In this paper, we introduce a new family of adversarial attacks that strike a balance between effectiveness and computational efficiency. Our proposed attacks are generalizations of the well-known DeepFool (DF) attack, while they remain simple to understand and implement. We demonstrate that our attacks outperform existing methods in terms of both effectiveness and computational efficiency. Our proposed attacks are also suitable for evaluating the robustness of large models and can be used to perform adversarial training (AT) to achieve state-of-the-art robustness to minimal $\\ell_{2}$ adversarial perturbations.", "keywords": "Deep Learning;Adversarial Attacks;Robustness;Interpretable AI;ML Security", "primary_area": "privacy", "supplementary_material": "", "author": "Alireza Abdolahpourrostam;Mahed Abroshan;Seyed-Mohsen Moosavi-Dezfooli", "authorids": "~Alireza_Abdolahpourrostam1;~Mahed_Abroshan1;~Seyed-Mohsen_Moosavi-Dezfooli1", "gender": "M;M;M", "homepage": "https://people.epfl.ch/alireza.abdollahpoorrostam;;", "dblp": ";;", "google_scholar": "https://scholar.google.com/citations?hl=en;tYSPRRwAAAAJ;https://scholar.google.ch/citations?user=qosS83IAAAAJ", "orcid": ";;", "linkedin": "alireza-abdollahpour-511a36191?lipi=urn%3Ali%3Apage%3Ad_flagship3_profile_view_base_contact_details%3B2ISItUEmSiqSdoHejAhw%2BA%3D%3D;mahed-abroshan/;", "or_profile": "~Alireza_Abdolahpourrostam1;~Mahed_Abroshan1;~Seyed-Mohsen_Moosavi-Dezfooli1", "aff": "EPFL - EPF Lausanne;Optum AI;Imperial College London, Imperial College London", "aff_domain": "epfl.ch;optum.com;imperial.ac.uk", "position": "MS student;Principal Researcher;Assistant Professor", "bibtex": "@inproceedings{\nabdolahpourrostam2024superdeepfool,\ntitle={SuperDeepFool: a new fast and accurate minimal adversarial attack},\nauthor={Alireza Abdolahpourrostam and Mahed Abroshan and Seyed-Mohsen Moosavi-Dezfooli},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=pqD7ckR8AF}\n}", "github": "", "reviewers": "Vkrz;ojpB;e1KW;UfKL", "pdf_size": 1427438, "rating": "6;6;6;7", "confidence": "4;4;3;4", "soundness": "4;3;3;4", "novelty": "3;2;3;4", "presentation": "4;2;4;4", "wc_summary": "66;64;75;51", "wc_strengths": "80;95;84;89", "wc_weaknesses": "45;164;126;17", "wc_questions": "35;1;196;39", "wc_limitations": "44;10;41;11", "wc_review": "270;334;522;207", "wc_reply_reviewers": "20;0;24;30", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.5, 0.8660254037844386 ], "wc_summary_avg": [ 64.0, 8.573214099741124 ], "wc_strengths_avg": [ 87.0, 5.612486080160912 ], "wc_weaknesses_avg": [ 88.0, 59.39276050159649 ], "wc_questions_avg": [ 67.75, 75.50289729540185 ], "wc_limitations_avg": [ 26.5, 16.03901493234544 ], "wc_review_avg": [ 333.25, 117.86300310105797 ], "wc_reply_reviewers_avg": [ 18.5, 11.258330249197702 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:2n4vRRGMKsoJ:scholar.google.com/&scioq=SuperDeepFool:+a+new+fast+and+accurate+minimal+adversarial+attack&hl=en&as_sdt=0,33", "gs_version_total": 0, "email": "epfl.ch;optum.com;imperial.ac.uk", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "EPFL;Optum;Imperial College London", "aff_unique_dep": ";Optum AI;", "aff_unique_url": "https://www.epfl.ch;https://www.optum.com;https://www.imperial.ac.uk", "aff_unique_abbr": "EPFL;Optum;ICL", "aff_campus_unique_index": "0", "aff_campus_unique": "Lausanne;", "aff_country_unique_index": "0;1;2", "aff_country_unique": "Switzerland;United States;United Kingdom" }, { "title": "Spectral Editing of Activations for Large Language Model Alignment", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93529", "id": "pqYceEa87j", "proceeding": "", "pdf": "https://openreview.net/pdf?id=pqYceEa87j", "openreview": "https://openreview.net/forum?id=pqYceEa87j", "poster": "", "project": "", "author_site": "Yifu QIU, Zheng Zhao, Yftah Ziser, Anna Korhonen, Edoardo Maria Ponti, Shay Cohen", "tldr": "", "abstract": "Large language models (LLMs) often exhibit undesirable behaviours, such as generating untruthful or biased content. Editing their internal representations has been shown to be effective in mitigating such behaviours on top of the existing alignment methods. We propose a novel inference-time editing method, namely spectral editing of activations (SEA), to project the input representations into directions with maximal covariance with the positive demonstrations (e.g., truthful) while minimising covariance with the negative demonstrations (e.g., hallucinated). We also extend our method to non-linear editing using feature functions. We run extensive experiments on benchmarks concerning truthfulness and bias with six open-source LLMs of different sizes and model families. The results demonstrate the superiority of SEA in effectiveness, generalisation to similar tasks, as well as computation and data efficiency. We also show that SEA editing only has a limited negative impact on other model capabilities.", "keywords": "Large Language Model;Alignment;Spectral Decomposition;Representation Engineering;Model Editing", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/635b5790922c9248dd48bf2c276381cbb0b7a78d.zip", "author": "Yifu QIU;Zheng Zhao;Yftah Ziser;Anna Korhonen;Edoardo Ponti;Shay B Cohen", "authorids": "~Yifu_QIU1;~Zheng_Zhao2;~Yftah_Ziser1;~Anna_Korhonen1;~Edoardo_Ponti1;~Shay_B_Cohen1", "gender": "Not Specified;M;M;;;M", "homepage": "https://yfqiu.netlify.app/;http://www.inf.ed.ac.uk/people/students/Zheng_Zhao.html;https://yftah89.github.io/;https://sites.google.com/site/annakorhonen/;https://ducdauge.github.io/;http://homepages.inf.ed.ac.uk/scohen", "dblp": "316/9904;75/6680-5;188/6096.html;14/6532;178/8829;04/5629", "google_scholar": "OA6GaMwAAAAJ;UO0MJeQAAAAJ;https://scholar.google.co.il/citations?user=37SMCrsAAAAJ;https://scholar.google.co.uk/citations?user=SCoVoOYAAAAJ;https://scholar.google.ca/citations?user=tklL2q0AAAAJ;", "orcid": ";;0009-0002-6228-9471;;0000-0002-6308-1050;0000-0003-4753-8353", "linkedin": "yifu-qiu-turing/;;;anna-korhonen-534a9b5/;edoardo-maria-ponti/;", "or_profile": "~Yifu_QIU1;~Zheng_Zhao2;~Yftah_Ziser1;~Anna_Korhonen1;~Edoardo_Ponti1;~Shay_B_Cohen1", "aff": "University of Edinburgh, University of Edinburgh;University of Edinburgh, University of Edinburgh;University of Edinburgh;University of Cambridge;NVIDIA;University of Edinburgh", "aff_domain": "ed.ac.uk;ed.ac.uk;edinburgh.org;cam.ac.uk;nvidia.com;ed.ac.uk", "position": "PhD student;PhD student;Postdoc;Professor;Researcher;Reader", "bibtex": "@inproceedings{\nqiu2024spectral,\ntitle={Spectral Editing of Activations for Large Language Model Alignment},\nauthor={Yifu QIU and Zheng Zhao and Yftah Ziser and Anna Korhonen and Edoardo Ponti and Shay B Cohen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=pqYceEa87j}\n}", "github": "", "reviewers": "fDnn;DKrA;sYbq;CvPa", "pdf_size": 2811903, "rating": "4;6;7;7", "confidence": "5;1;3;4", "soundness": "3;2;3;3", "novelty": "3;2;3;2", "presentation": "3;3;3;3", "wc_summary": "75;355;109;178", "wc_strengths": "39;44;114;87", "wc_weaknesses": "323;276;61;99", "wc_questions": "73;54;77;91", "wc_limitations": "1;48;30;7", "wc_review": "511;777;391;462", "wc_reply_reviewers": "0;38;0;0", "wc_reply_authors": "38;15;38;38", "reply_reviewers": "0;1;0;0", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.25, 1.479019945774904 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 179.25, 108.04252634958144 ], "wc_strengths_avg": [ 71.0, 31.056400306539068 ], "wc_weaknesses_avg": [ 189.75, 111.81094534972861 ], "wc_questions_avg": [ 73.75, 13.216939887886303 ], "wc_limitations_avg": [ 21.5, 18.741664813991314 ], "wc_review_avg": [ 535.25, 145.9492634445272 ], "wc_reply_reviewers_avg": [ 9.5, 16.454482671904334 ], "wc_reply_authors_avg": [ 32.25, 9.959292143521045 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.4140393356054125, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4559673949692375548&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 4, "email": "ed.ac.uk;ed.ac.uk;edinburgh.org;cam.ac.uk;nvidia.com;ed.ac.uk", "author_num": 6, "aff_unique_index": "0;0;0;1;2;0", "aff_unique_norm": "University of Edinburgh;University of Cambridge;NVIDIA", "aff_unique_dep": ";;NVIDIA Corporation", "aff_unique_url": "https://www.ed.ac.uk;https://www.cam.ac.uk;https://www.nvidia.com", "aff_unique_abbr": "Edinburgh;Cambridge;NVIDIA", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;0;0;0;1;0", "aff_country_unique": "United Kingdom;United States" }, { "title": "Hallo3D: Multi-Modal Hallucination Detection and Mitigation for Consistent 3D Content Generation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93528", "id": "pqi4vqBYXW", "proceeding": "", "pdf": "https://openreview.net/pdf?id=pqi4vqBYXW", "openreview": "https://openreview.net/forum?id=pqi4vqBYXW", "poster": "/media/PosterPDFs/NeurIPS%202024/93528.png?t=1729476716.2483194", "project": "", "author_site": "Hongbo Wang, Jie Cao, Jin Liu, Xiaoqiang Zhou, Huaibo Huang, Ran He", "tldr": "", "abstract": "Recent advancements in 3D content generation have been significant, primarily due to the visual priors provided by pretrained diffusion models. However, large 2D visual models exhibit spatial perception hallucinations, leading to multi-view inconsistency in 3D content generated through Score Distillation Sampling (SDS). This phenomenon, characterized by overfitting to specific views, is referred to as the \"Janus Problem\". In this work, we investigate the hallucination issues of pretrained models and find that large multimodal models without geometric constraints possess the capability to infer geometric structures, which can be utilized to mitigate multi-view inconsistency. Building on this, we propose a novel tuning-free method. We represent the multimodal inconsistency query information to detect specific hallucinations in 3D content, using this as an enhanced prompt to re-consist the 2D renderings of 3D and jointly optimize the structure and appearance across different views. Our approach does not require 3D training data and can be implemented plug-and-play within existing frameworks. Extensive experiments demonstrate that our method significantly improves the consistency of 3D content generation and specifically mitigates hallucinations caused by pretrained large models, achieving state-of-the-art performance compared to other optimization methods.", "keywords": "3D Generation;Hallucination Detection and Mitigation;Multi-modal Inference;View Alignment", "primary_area": "generative_models", "supplementary_material": "", "author": "Hongbo Wang;Jie Cao;Jin Liu;Xiaoqiang Zhou;Huaibo Huang;Ran He", "authorids": "~Hongbo_Wang3;~Jie_Cao2;~Jin_Liu10;~Xiaoqiang_Zhou2;~Huaibo_Huang1;~Ran_He1", "gender": "M;M;;M;M;M", "homepage": ";https://ttxsjie.github.io/;;https://xiaoqiangzhou.cn/;https://people.ucas.edu.cn/~huanghuaibo;https://rhe-web.github.io/", "dblp": ";39/6191-2;;13/1515;211/7251.html;61/6198-1", "google_scholar": "U1_mEuUAAAAJ;https://scholar.google.com/citations?hl=en;;Z2BTkNIAAAAJ;XMvLciUAAAAJ;ayrg9AUAAAAJ", "orcid": ";0000-0001-6368-4495;;;0000-0001-5866-2283;0000-0002-3807-991X", "linkedin": ";;;;;", "or_profile": "~Hongbo_Wang3;~Jie_Cao2;~Jin_Liu10;~Xiaoqiang_Zhou2;~Huaibo_Huang1;~Ran_He1", "aff": "Beijing Institute of Technology;Institute of Automation, Chinese Academy of Sciences;;University of Science and Technology of China;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences", "aff_domain": "bit.edu;ia.ac.cn;;ustc.edu;ia.ac.cn;ia.ac.cn", "position": "Undergrad student;Associate Professor;;PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nwang2024hallod,\ntitle={Hallo3D: Multi-Modal Hallucination Detection and Mitigation for Consistent 3D Content Generation},\nauthor={Hongbo Wang and Jie Cao and Jin Liu and Xiaoqiang Zhou and Huaibo Huang and Ran He},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=pqi4vqBYXW}\n}", "github": "", "reviewers": "bBV6;isRS;gwDu", "pdf_size": 12599339, "rating": "5;6;6", "confidence": "4;4;5", "soundness": "2;3;3", "novelty": "3;3;3", "presentation": "2;3;3", "wc_summary": "65;63;75", "wc_strengths": "94;87;99", "wc_weaknesses": "174;129;310", "wc_questions": "88;164;68", "wc_limitations": "34;20;76", "wc_review": "455;463;628", "wc_reply_reviewers": "89;32;181", "wc_reply_authors": "622;24;443", "reply_reviewers": "2;1;1", "reply_authors": "4;2;2", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 67.66666666666667, 5.2493385826745405 ], "wc_strengths_avg": [ 93.33333333333333, 4.9216076867444665 ], "wc_weaknesses_avg": [ 204.33333333333334, 76.94298033103618 ], "wc_questions_avg": [ 106.66666666666667, 41.35483311805553 ], "wc_limitations_avg": [ 43.333333333333336, 23.79542439676633 ], "wc_review_avg": [ 515.3333333333334, 79.73428093082394 ], "wc_reply_reviewers_avg": [ 100.66666666666667, 61.38584708401621 ], "wc_reply_authors_avg": [ 363.0, 250.6006118641107 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 0.9428090415820634 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14603740559836137894&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "bit.edu;ia.ac.cn;;ustc.edu;ia.ac.cn;ia.ac.cn", "author_num": 6, "aff_unique_index": "0;1;2;1;1", "aff_unique_norm": "Beijing Institute of Technology;Chinese Academy of Sciences;University of Science and Technology of China", "aff_unique_dep": ";Institute of Automation;", "aff_unique_url": "http://www.bit.edu.cn/;http://www.ia.cas.cn;http://www.ustc.edu.cn", "aff_unique_abbr": "BIT;CAS;USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Frieren: Efficient Video-to-Audio Generation Network with Rectified Flow Matching", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93527", "id": "prXfM5X2Db", "proceeding": "", "pdf": "https://openreview.net/pdf?id=prXfM5X2Db", "openreview": "https://openreview.net/forum?id=prXfM5X2Db", "poster": "/media/PosterPDFs/NeurIPS%202024/93527.png?t=1731398692.7239933", "project": "", "author_site": "Yongqi Wang, Wenxiang Guo, Rongjie Huang, Jiawei Huang, Zehan Wang, Fuming You, Ruiqi Li, Zhou Zhao", "tldr": "", "abstract": "Video-to-audio (V2A) generation aims to synthesize content-matching audio from silent video, and it remains challenging to build V2A models with high generation quality, efficiency, and visual-audio temporal synchrony. \nWe propose Frieren, a V2A model based on rectified flow matching. Frieren regresses the conditional transport vector field from noise to spectrogram latent with straight paths and conducts sampling by solving ODE, outperforming autoregressive and score-based models in terms of audio quality. By employing a non-autoregressive vector field estimator based on a feed-forward transformer and channel-level cross-modal feature fusion with strong temporal alignment, our model generates audio that is highly synchronized with the input video. Furthermore, through reflow and one-step distillation with guided vector field, our model can generate decent audio in a few, or even only one sampling step. Experiments indicate that Frieren achieves state-of-the-art performance in both generation quality and temporal alignment on VGGSound, with alignment accuracy reaching 97.22\\%, and 6.2\\% improvement in inception score over the strong diffusion-based baseline. Audio samples and code are available at http://frieren-v2a.github.io.", "keywords": "video-to-audio generation;rectified flow model;efficient generation", "primary_area": "speech_and_audio", "supplementary_material": "", "author": "Yongqi Wang;Wenxiang Guo;Rongjie Huang;Jiawei Huang;Zehan Wang;Fuming You;Ruiqi Li;Zhou Zhao", "authorids": "~Yongqi_Wang1;~Wenxiang_Guo1;~Rongjie_Huang1;~Jiawei_Huang5;~Zehan_Wang2;~Fuming_You3;~Ruiqi_Li2;~Zhou_Zhao3", "gender": "M;M;M;M;M;;;", "homepage": ";https://gwx314.github.io/;;;https://github.com/12zehan17;;;", "dblp": ";256/7852;212/8936-1;13/4208-8;126/7826-1;;;", "google_scholar": "9_79D6IAAAAJ;tFg-qdwAAAAJ;iRHBUsgAAAAJ;https://scholar.google.com/citations?hl=zh-CN;euXK0lkAAAAJ;;;", "orcid": "0000-0003-4695-3440;0009-0006-7997-4140;;;0009-0007-7509-7563;;;", "linkedin": ";;;;;;;", "or_profile": "~Yongqi_Wang1;~Wenxiang_Guo1;~Rongjie_Huang1;~Jiawei_Huang5;~Zehan_Wang2;~Fuming_You3;~Ruiqi_Li2;~Zhou_Zhao3", "aff": "Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University;;;", "aff_domain": "zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;;;", "position": "MS student;PhD student;MS student;MS student;PhD student;;;", "bibtex": "@inproceedings{\nwang2024frieren,\ntitle={Frieren: Efficient Video-to-Audio Generation Network with Rectified Flow Matching},\nauthor={Yongqi Wang and Wenxiang Guo and Rongjie Huang and Jiawei Huang and Zehan Wang and Fuming You and Ruiqi Li and Zhou Zhao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=prXfM5X2Db}\n}", "github": "", "reviewers": "faHj;MSJt;WgLP;hYKZ", "pdf_size": 13313175, "rating": "5;5;7;7", "confidence": "4;3;4;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;2;3;4", "wc_summary": "107;31;17;93", "wc_strengths": "28;29;70;146", "wc_weaknesses": "140;111;95;182", "wc_questions": "25;42;23;66", "wc_limitations": "1;2;9;7", "wc_review": "301;215;214;494", "wc_reply_reviewers": "58;0;0;59", "wc_reply_authors": "234;0;0;0", "reply_reviewers": "1;0;0;1", "reply_authors": "3;1;1;1", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 62.0, 38.63935817272331 ], "wc_strengths_avg": [ 68.25, 47.98111607705682 ], "wc_weaknesses_avg": [ 132.0, 33.0681115275729 ], "wc_questions_avg": [ 39.0, 17.24818831066034 ], "wc_limitations_avg": [ 4.75, 3.344772040064913 ], "wc_review_avg": [ 306.0, 114.14245485357321 ], "wc_reply_reviewers_avg": [ 29.25, 29.252136674096132 ], "wc_reply_authors_avg": [ 58.5, 101.32497224277932 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4664075196182723558&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;;;", "author_num": 8, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Zhejiang University", "aff_unique_dep": "", "aff_unique_url": "https://www.zju.edu.cn", "aff_unique_abbr": "ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Hydra: Bidirectional State Space Models Through Generalized Matrix Mixers", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93526", "id": "preo49P1VY", "proceeding": "", "pdf": "https://openreview.net/pdf?id=preo49P1VY", "openreview": "https://openreview.net/forum?id=preo49P1VY", "poster": "/media/PosterPDFs/NeurIPS%202024/93526.png?t=1731451997.7106645", "project": "", "author_site": "Sukjun Hwang, Aakash Sunil Lahoti, Ratish Puduppully, Tri Dao, Albert Gu", "tldr": "", "abstract": "A wide array of sequence models are built on a framework modeled after Transformers, comprising alternating sequence mixer and channel mixer layers. This paper studies a unifying *matrix mixer* view of sequence mixers that can be conceptualized as a linear map on the input sequence. This framework encompasses a broad range of well-known sequence models, including the self-attention of Transformers as well as recent strong alternatives such as structured state space models (SSMs), and allows understanding downstream characteristics such as efficiency and expressivity through properties of their structured matrix class. We identify a key axis of matrix parameterizations termed *sequence alignment*, which increases the flexibility and performance of matrix mixers, providing insights into the strong performance of Transformers and recent SSMs such as Mamba. Furthermore, the matrix mixer framework offers a systematic approach to developing sequence mixers with desired properties, allowing us to develop several new sub-quadratic sequence models. In particular, we propose a natural bidirectional extension of the Mamba model (**Hydra**), parameterized as a *quasiseparable matrix mixer*, which demonstrates superior performance over other sequence models including Transformers on non-causal tasks. As a drop-in replacement for attention layers, \\name outperforms BERT by 0.8 points on the GLUE benchmark and ViT by 2% Top-1 accuracy on ImageNet.", "keywords": "Deep learning;sequence models;state space models;mamba", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Sukjun Hwang;Aakash Lahoti;Ratish Puduppully;Tri Dao;Albert Gu", "authorids": "~Sukjun_Hwang1;~Aakash_Lahoti1;~Ratish_Puduppully1;~Tri_Dao1;~Albert_Gu1", "gender": "M;M;M;;M", "homepage": "https://sukjunhwang.github.io;;https://ratishsp.github.io/;https://tridao.me/;", "dblp": "280/0070;;165/0748;206/7018;130/0612", "google_scholar": "rW5dpvMAAAAJ;wGUvxZQAAAAJ;https://scholar.google.co.uk/citations?user=FrB_UMIAAAAJ;NQRw0bQAAAAJ;DVCHv1kAAAAJ", "orcid": ";;;;0000-0002-4946-6042", "linkedin": "sukjun-hwang-343360191/;aakashlahoti/;ratishsp/;;", "or_profile": "~Sukjun_Hwang1;~Aakash_Lahoti1;~Ratish_Puduppully1;~Tri_Dao1;~Albert_Gu1", "aff": "Carnegie Mellon University;Carnegie Mellon University;A*STAR;Princeton University;Carnegie Mellon University", "aff_domain": "cs.cmu.edu;andrew.cmu.edu;a-star.edu.sg;princeton.edu;cmu.edu", "position": "PhD student;PhD student;Researcher;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nhwang2024hydra,\ntitle={Hydra: Bidirectional State Space Models Through Generalized Matrix Mixers},\nauthor={Sukjun Hwang and Aakash Lahoti and Ratish Puduppully and Tri Dao and Albert Gu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=preo49P1VY}\n}", "github": "", "reviewers": "9VST;RwkG;onHY;5fYM", "pdf_size": 718468, "rating": "4;5;6;9", "confidence": "4;3;4;4", "soundness": "3;2;3;4", "novelty": "2;3;3;4", "presentation": "3;1;2;4", "wc_summary": "53;185;75;83", "wc_strengths": "70;83;59;96", "wc_weaknesses": "77;841;166;19", "wc_questions": "52;39;29;93", "wc_limitations": "1;10;7;2", "wc_review": "253;1158;336;293", "wc_reply_reviewers": "0;512;174;36", "wc_reply_authors": "0;1314;0;43", "reply_reviewers": "0;3;1;1", "reply_authors": "1;5;1;2", "rating_avg": [ 6.0, 1.8708286933869707 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 99.0, 50.85272854036448 ], "wc_strengths_avg": [ 77.0, 13.874436925511608 ], "wc_weaknesses_avg": [ 275.75, 330.5203284217175 ], "wc_questions_avg": [ 53.25, 24.355440870573457 ], "wc_limitations_avg": [ 5.0, 3.6742346141747673 ], "wc_review_avg": [ 510.0, 375.2725676091979 ], "wc_reply_reviewers_avg": [ 180.5, 202.110736973571 ], "wc_reply_authors_avg": [ 339.25, 563.0459017700067 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.25, 1.6393596310755 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.3086066999241838, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=179306550734726696&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "cs.cmu.edu;andrew.cmu.edu;a-star.edu.sg;princeton.edu;cmu.edu", "author_num": 5, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Carnegie Mellon University;Agency for Science, Technology and Research;Princeton University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cmu.edu;https://www.a-star.edu.sg;https://www.princeton.edu", "aff_unique_abbr": "CMU;A*STAR;Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "United States;Singapore" }, { "title": "Stochastic Kernel Regularisation Improves Generalisation in Deep Kernel Machines", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93525", "id": "prgxz9fYbf", "proceeding": "", "pdf": "https://openreview.net/pdf?id=prgxz9fYbf", "openreview": "https://openreview.net/forum?id=prgxz9fYbf", "poster": "/media/PosterPDFs/NeurIPS%202024/93525.png?t=1729081597.1517112", "project": "", "author_site": "Edward Milsom, Ben Anson, Laurence Aitchison", "tldr": "", "abstract": "Recent work developed convolutional deep kernel machines, achieving 92.7% test accuracy on CIFAR-10 using a ResNet-inspired architecture, which is SOTA for kernel methods. However, this still lags behind neural networks, which easily achieve over 94% test accuracy with similar architectures. In this work we introduce several modifications to improve the convolutional deep kernel machine\u2019s generalisation, including stochastic kernel regularisation, which adds noise to the learned Gram matrices during training. The resulting model achieves 94.5% test accuracy on CIFAR-10. This finding has important theoretical and practical implications, as it demonstrates that the ability to perform well on complex tasks like image classification is not unique to neural networks. Instead, other approaches including deep kernel methods can achieve excellent performance on such tasks, as long as they have the capacity to learn representations from data.", "keywords": "gaussian process;deep gaussian process;kernel methods;representation learning", "primary_area": "probabilistic_methods", "supplementary_material": "/attachment/43f32caf210fb3c8264ea08f31531fae2a5b18a2.zip", "author": "Edward Milsom;Ben Anson;Laurence Aitchison", "authorids": "~Edward_Milsom1;~Ben_Anson1;~Laurence_Aitchison1", "gender": "M;M;", "homepage": ";;http://www.gatsby.ucl.ac.uk/~laurence/", "dblp": "348/6212;;155/1918.html", "google_scholar": ";https://scholar.google.com/citations?hl=en;", "orcid": "0000-0003-0404-4564;;", "linkedin": ";ben-anson-7b7519183/;", "or_profile": "~Edward_Milsom1;~Ben_Anson1;~Laurence_Aitchison1", "aff": "University of Bristol;University of Bristol;University of Bristol", "aff_domain": "bristol.ac.uk;bristol.ac.uk;bristol.ac.uk", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nmilsom2024stochastic,\ntitle={Stochastic Kernel Regularisation Improves Generalisation in Deep Kernel Machines},\nauthor={Edward Milsom and Ben Anson and Laurence Aitchison},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=prgxz9fYbf}\n}", "github": "", "reviewers": "44ig;cbyd;YneB;V8UE", "pdf_size": 1032750, "rating": "5;6;7;7", "confidence": "2;2;2;2", "soundness": "2;3;3;4", "novelty": "2;2;2;3", "presentation": "2;3;3;3", "wc_summary": "22;77;51;101", "wc_strengths": "34;35;149;80", "wc_weaknesses": "79;22;158;133", "wc_questions": "13;22;32;326", "wc_limitations": "1;1;4;57", "wc_review": "149;157;394;697", "wc_reply_reviewers": "25;0;30;48", "wc_reply_authors": "11;0;254;0", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;2;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 2.0, 0.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 62.75, 29.431063521388417 ], "wc_strengths_avg": [ 74.5, 46.853495067070504 ], "wc_weaknesses_avg": [ 98.0, 52.34978509984544 ], "wc_questions_avg": [ 98.25, 131.6631592359837 ], "wc_limitations_avg": [ 15.75, 23.84716964337697 ], "wc_review_avg": [ 349.25, 223.60274484003992 ], "wc_reply_reviewers_avg": [ 25.75, 17.151894939043906 ], "wc_reply_authors_avg": [ 66.25, 108.49049497536639 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:aKxAqYEDLJEJ:scholar.google.com/&scioq=Stochastic+Kernel+Regularisation+Improves+Generalisation+in+Deep+Kernel+Machines&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "bristol.ac.uk;bristol.ac.uk;bristol.ac.uk", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Bristol", "aff_unique_dep": "", "aff_unique_url": "https://www.bristol.ac.uk", "aff_unique_abbr": "Bristol", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Efficient Combinatorial Optimization via Heat Diffusion", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93524", "id": "psDrko9v1D", "proceeding": "", "pdf": "https://openreview.net/pdf?id=psDrko9v1D", "openreview": "https://openreview.net/forum?id=psDrko9v1D", "poster": "/media/PosterPDFs/NeurIPS%202024/93524.png?t=1729069429.334472", "project": "", "author_site": "Hengyuan Ma, Wenlian Lu, Jianfeng Feng", "tldr": "", "abstract": "Combinatorial optimization problems are widespread but inherently challenging due to their discrete nature. The primary limitation of existing methods is that they can only access a small fraction of the solution space at each iteration, resulting in limited efficiency for searching the global optimal. To overcome this challenge, diverging from conventional efforts of expanding the solver's search scope, we focus on enabling information to actively propagate to the solver through heat diffusion. By transforming the target function while preserving its optima, heat diffusion facilitates information flow from distant regions to the solver, providing more efficient navigation. Utilizing heat diffusion, we propose a framework for solving general combinatorial optimization problems. The proposed methodology demonstrates superior performance across a range of the most challenging and widely encountered combinatorial optimizations. Echoing recent advancements in harnessing thermodynamics for generative artificial intelligence, our study further reveals its significant potential in advancing combinatorial optimization.", "keywords": "Combinatorial optimization;Heat equation", "primary_area": "optimization", "supplementary_material": "/attachment/6ebe041c2a6204dbbe820c64633ed1e5855c2364.zip", "author": "Hengyuan Ma;Wenlian Lu;Jianfeng Feng", "authorids": "~Hengyuan_Ma1;~Wenlian_Lu1;~Jianfeng_Feng2", "gender": "M;M;M", "homepage": ";;https://www.dcs.warwick.ac.uk/~feng/", "dblp": "268/5474;41/2305;19/6212", "google_scholar": "xl1WCzUAAAAJ;;https://scholar.google.co.uk/citations?user=0MtAVz4AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Hengyuan_Ma1;~Wenlian_Lu1;~Feng_Jian_Feng1", "aff": "Fudan University;Fudan University;The University of Warwick", "aff_domain": "fudan.edu.cn;fudan.edu.cn;warwick.ac.uk", "position": "PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nma2024efficient,\ntitle={Efficient Combinatorial Optimization via Heat Diffusion},\nauthor={Hengyuan Ma and Wenlian Lu and Jianfeng Feng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=psDrko9v1D}\n}", "github": "", "reviewers": "uVQ4;VgKr;NRJw;c1oa", "pdf_size": 1153637, "rating": "5;6;6;8", "confidence": "3;3;5;4", "soundness": "2;3;3;3", "novelty": "3;2;3;3", "presentation": "3;3;2;4", "wc_summary": "36;60;42;49", "wc_strengths": "52;26;57;63", "wc_weaknesses": "178;85;29;50", "wc_questions": "1;24;95;4", "wc_limitations": "2;21;27;1", "wc_review": "269;216;250;167", "wc_reply_reviewers": "77;0;0;14", "wc_reply_authors": "247;0;0;0", "reply_reviewers": "2;0;0;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 46.75, 8.926785535678562 ], "wc_strengths_avg": [ 49.5, 14.115594213493104 ], "wc_weaknesses_avg": [ 85.5, 57.02850164610675 ], "wc_questions_avg": [ 31.0, 37.993420483025744 ], "wc_limitations_avg": [ 12.75, 11.453711188955307 ], "wc_review_avg": [ 225.5, 38.74596753211875 ], "wc_reply_reviewers_avg": [ 22.75, 31.838459447655442 ], "wc_reply_authors_avg": [ 61.75, 106.95413736737817 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3458572319330373, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4047354633981990873&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "fudan.edu.cn;fudan.edu.cn;warwick.ac.uk", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Fudan University;University of Warwick", "aff_unique_dep": ";", "aff_unique_url": "https://www.fudan.edu.cn;https://warwick.ac.uk", "aff_unique_abbr": "Fudan;Warwick", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "China;United Kingdom" }, { "title": "Achieving $\\tilde{O}(1/\\epsilon)$ Sample Complexity for Constrained Markov Decision Process", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93523", "id": "psG4LXlDNs", "proceeding": "", "pdf": "https://openreview.net/pdf?id=psG4LXlDNs", "openreview": "https://openreview.net/forum?id=psG4LXlDNs", "poster": "", "project": "", "author_site": "Jiashuo Jiang, Yinyu Ye", "tldr": "", "abstract": "We consider the reinforcement learning problem for the constrained Markov decision process (CMDP), which plays a central role in satisfying safety or resource constraints in sequential learning and decision-making. In this problem, we are given finite resources and a MDP with unknown transition probabilities. At each stage, we take an action, collecting a reward and consuming some resources, all assumed to be unknown and need to be learned over time. In this work, we take the first step towards deriving optimal problem-dependent guarantees for the CMDP problems. We derive a logarithmic regret bound, which translates into a $O(\\frac{1}{\\Delta\\cdot\\epsilon}\\cdot\\log^2(1/\\epsilon))$ sample complexity bound, with $\\Delta$ being a problem-dependent parameter, yet independent of $\\epsilon$. Our sample complexity bound improves upon the state-of-art $O(1/\\epsilon^2)$ sample complexity for CMDP problems established in the previous literature, in terms of the dependency on $\\epsilon$. To achieve this advance, we develop a new framework for analyzing CMDP problems. To be specific, our algorithm operates in the primal space and we resolve the primal LP for the CMDP problem at each period in an online manner, with \\textit{adaptive} remaining resource capacities. The key elements of our algorithm are: i) a characterization of the instance hardness via LP basis, ii) an eliminating procedure that identifies one optimal basis of the primal LP, and; iii) a resolving procedure that is adaptive to the remaining resources and sticks to the characterized optimal basis.", "keywords": "constrained MDP;reinforcement learning;online linear programming", "primary_area": "learning_theory", "supplementary_material": "", "author": "Jiashuo Jiang;Yinyu Ye", "authorids": "~Jiashuo_Jiang1;~Yinyu_Ye1", "gender": ";M", "homepage": "https://jiashuo3.github.io/;https://web.stanford.edu/~yyye/", "dblp": "281/6676;42/1372", "google_scholar": ";BgOXDogAAAAJ", "orcid": "0000-0001-5230-4231;", "linkedin": ";", "or_profile": "~Jiashuo_Jiang1;~Yinyu_Ye1", "aff": "Hong Kong University of Science and Technology;", "aff_domain": "ust.hk;", "position": "Assistant Professor;", "bibtex": "@inproceedings{\njiang2024achieving,\ntitle={Achieving \\${\\textbackslash}tilde\\{O\\}(1/{\\textbackslash}epsilon)\\$ Sample Complexity for Constrained Markov Decision Process},\nauthor={Jiashuo Jiang and Yinyu Ye},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=psG4LXlDNs}\n}", "github": "", "reviewers": "w3CH;hDFd;rqcB;55Qh;sc31", "pdf_size": 510938, "rating": "4;5;6;7;8", "confidence": "3;3;4;4;3", "soundness": "2;2;2;4;3", "novelty": "2;3;3;4;3", "presentation": "1;2;1;3;4", "wc_summary": "45;66;54;49;73", "wc_strengths": "29;42;58;100;29", "wc_weaknesses": "20;196;149;226;48", "wc_questions": "108;21;80;30;1", "wc_limitations": "12;12;41;12;90", "wc_review": "214;337;382;417;241", "wc_reply_reviewers": "0;27;18;18;0", "wc_reply_authors": "0;4;8;6;0", "reply_reviewers": "0;1;1;1;0", "reply_authors": "1;2;2;2;1", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 2.6, 0.8 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.2, 1.16619037896906 ], "wc_summary_avg": [ 57.4, 10.518555033843764 ], "wc_strengths_avg": [ 51.6, 26.446927987953536 ], "wc_weaknesses_avg": [ 127.8, 80.91081509909537 ], "wc_questions_avg": [ 48.0, 39.713977388320096 ], "wc_limitations_avg": [ 33.4, 30.447331574376104 ], "wc_review_avg": [ 318.2, 78.74363466338089 ], "wc_reply_reviewers_avg": [ 12.6, 10.8 ], "wc_reply_authors_avg": [ 3.6, 3.2 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.2886751345948129, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "email": "ust.hk;", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Hong Kong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.ust.hk", "aff_unique_abbr": "HKUST", "aff_campus_unique_index": "0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "title": "Abstracted Shapes as Tokens - A Generalizable and Interpretable Model for Time-series Classification", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93522", "id": "pwKkNSuuEs", "proceeding": "", "pdf": "https://openreview.net/pdf?id=pwKkNSuuEs", "openreview": "https://openreview.net/forum?id=pwKkNSuuEs", "poster": "/media/PosterPDFs/NeurIPS%202024/93522.png?t=1732559420.1102076", "project": "", "author_site": "Yunshi Wen, Tengfei Ma, Lily Weng, Lam Nguyen, Anak Agung Julius", "tldr": "", "abstract": "In time-series analysis, many recent works seek to provide a unified view and representation for time-series across multiple domains, leading to the development of foundation models for time-series data. Despite diverse modeling techniques, existing models are black boxes and fail to provide insights and explanations about their representations. In this paper, we present VQShape, a pre-trained, generalizable, and interpretable model for time-series representation learning and classification. By introducing a novel representation for time-series data, we forge a connection between the latent space of VQShape and shape-level features. Using vector quantization, we show that time-series from different domains can be described using a unified set of low-dimensional codes, where each code can be represented as an abstracted shape in the time domain. On classification tasks, we show that the representations of VQShape can be utilized to build interpretable classifiers, achieving comparable performance to specialist models. Additionally, in zero-shot learning, VQShape and its codebook can generalize to previously unseen datasets and domains that are not included in the pre-training process. The code and pre-trained weights are available at https://github.com/YunshiWen/VQShape.", "keywords": "Time-series;Interpretability;Self-supervised Learning;Pre-trained Model", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Yunshi Wen;Tengfei Ma;Tsui-Wei Weng;Lam M. Nguyen;Anak Agung Julius", "authorids": "~Yunshi_Wen1;~Tengfei_Ma1;~Tsui-Wei_Weng1;~Lam_M._Nguyen1;~Anak_Agung_Julius1", "gender": "M;M;F;M;M", "homepage": "https://YunshiWen.github.io;https://sites.google.com/site/matf0123/;https://lilywenglab.github.io;https://www.ecse.rpi.edu/~agung;https://lamnguyen-mltd.github.io/", "dblp": "337/2191;94/9023-1;177/9197;;181/1428", "google_scholar": "ugTcX4MAAAAJ;9OvNakkAAAAJ;v8GM4xoAAAAJ;OmgSVN4AAAAJ;DeFL5Q8AAAAJ", "orcid": ";0000-0002-1086-529X;;;", "linkedin": ";;;;lam-m-nguyen-71b54750/", "or_profile": "~Yunshi_Wen1;~Tengfei_Ma1;~Tsui-Wei_Weng1;~Anak_Agung_Julius1;~Lam_M_Nguyen1", "aff": "Rensselaer Polytechnic Institute;State University of New York at Stony Brook;University of California, San Diego;Rensselaer Polytechnic Institute;IBM Research, Thomas J. Watson Research Center", "aff_domain": "rpi.edu;stonybrook.edu;ucsd.edu;rpi.edu;ibm.com", "position": "PhD student;Assistant Professor;Assistant Professor;Full Professor;Staff Research Scientist", "bibtex": "@inproceedings{\nwen2024abstracted,\ntitle={Abstracted Shapes as Tokens - A Generalizable and Interpretable Model for Time-series Classification},\nauthor={Yunshi Wen and Tengfei Ma and Tsui-Wei Weng and Lam M. Nguyen and Anak Agung Julius},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=pwKkNSuuEs}\n}", "github": "", "reviewers": "mtPy;F3fg;8xHF;8MWE", "pdf_size": 1208142, "rating": "4;6;6;7", "confidence": "4;5;3;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "86;42;124;83", "wc_strengths": "17;46;112;51", "wc_weaknesses": "396;103;86;63", "wc_questions": "1;87;9;595", "wc_limitations": "7;13;1;50", "wc_review": "507;291;332;842", "wc_reply_reviewers": "477;293;0;384", "wc_reply_authors": "890;167;47;711", "reply_reviewers": "2;1;0;2", "reply_authors": "4;3;2;3", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 83.75, 29.02046691560975 ], "wc_strengths_avg": [ 56.5, 34.57238782612506 ], "wc_weaknesses_avg": [ 162.0, 135.84366013914672 ], "wc_questions_avg": [ 173.0, 245.9471487942074 ], "wc_limitations_avg": [ 17.75, 19.09679292446771 ], "wc_review_avg": [ 493.0, 217.21072717524797 ], "wc_reply_reviewers_avg": [ 288.5, 178.81904261012025 ], "wc_reply_authors_avg": [ 453.75, 355.02209438287076 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 3.0, 0.7071067811865476 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6327311620983551409&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "rpi.edu;stonybrook.edu;ucsd.edu;rpi.edu;ibm.com", "author_num": 5, "aff_unique_index": "0;1;2;0;3", "aff_unique_norm": "Rensselaer Polytechnic Institute;State University of New York at Stony Brook;University of California, San Diego;IBM", "aff_unique_dep": ";;;IBM Research", "aff_unique_url": "https://www.rpi.edu;https://www.stonybrook.edu;https://www.ucsd.edu;https://www.ibm.com/research", "aff_unique_abbr": "RPI;SUNY Stony Brook;UCSD;IBM", "aff_campus_unique_index": "1;2;3", "aff_campus_unique": ";Stony Brook;San Diego;Yorktown Heights", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Train-Attention: Meta-Learning Where to Focus in Continual Knowledge Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93521", "id": "pwLdvYIMrF", "proceeding": "", "pdf": "https://openreview.net/pdf?id=pwLdvYIMrF", "openreview": "https://openreview.net/forum?id=pwLdvYIMrF", "poster": "/media/PosterPDFs/NeurIPS%202024/93521.png?t=1731326963.8076186", "project": "", "author_site": "Seo Yeongbin, Dongha Lee, Jinyoung Yeo", "tldr": "", "abstract": "Previous studies on continual knowledge learning (CKL) in large language models (LLMs) have predominantly focused on approaches such as regularization, architectural modifications, and rehearsal techniques to mitigate catastrophic forgetting. However, these methods naively inherit the inefficiencies of standard training procedures, indiscriminately applying uniform weight across all tokens, which can lead to unnecessary parameter updates and increased forgetting. To address these shortcomings, we propose a novel CKL approach termed Train-Attention-Augmented Language Model (TAALM), which enhances learning efficiency by dynamically predicting and applying weights to tokens based on their usefulness. This method employs a meta-learning framework that optimizes token importance predictions, facilitating targeted knowledge updates and minimizing forgetting. Also, we observe that existing benchmarks do not clearly exhibit the trade-off between learning and retaining, therefore we propose a new benchmark, LAMA-ckl, to address this issue. Through experiments conducted on both newly introduced and established CKL benchmarks, TAALM proves the state-of-the-art performance upon the baselines, and also shows synergistic compatibility when integrated with previous CKL approaches. The code and the dataset are available online.", "keywords": "continual learning;continual knowledge learning;large language models;meta-learning;train-attention;token weight", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/ee3f367218a95940bdb9e8012b9a8b9870276c78.zip", "author": "Yeongbin Seo;Dongha Lee;Jinyoung Yeo", "authorids": "~Yeongbin_Seo1;~Dongha_Lee1;~Jinyoung_Yeo1", "gender": "M;M;M", "homepage": "https://github.com/suhcrates-web;https://donalee.github.io;https://diyonsei.notion.site", "dblp": ";12/760-3;121/4335", "google_scholar": ";driVwKwAAAAJ;rJBSLtAAAAAJ", "orcid": ";0000-0003-2173-3476;0000-0003-3847-4917", "linkedin": ";;jinyoung-yeo-4623a3128/", "or_profile": "~Yeongbin_Seo1;~Dongha_Lee1;~Jinyoung_Yeo1", "aff": "Yonsei University;Yonsei University;Yonsei University", "aff_domain": "yonsei.ac.kr;yonsei.ac.kr;yonsei.ac.kr", "position": "MS student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nyeongbin2024trainattention,\ntitle={Train-Attention: Meta-Learning Where to Focus in Continual Knowledge Learning},\nauthor={Yeongbin Seo and Dongha Lee and Jinyoung Yeo},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=pwLdvYIMrF}\n}", "github": "", "reviewers": "6R4i;dYuZ;Az1R;CkYa", "pdf_size": 8019763, "rating": "5;6;6;6", "confidence": "4;3;4;4", "soundness": "3;3;3;3", "novelty": "2;3;3;2", "presentation": "3;3;2;3", "wc_summary": "35;31;112;98", "wc_strengths": "49;29;45;83", "wc_weaknesses": "48;78;93;98", "wc_questions": "96;52;23;60", "wc_limitations": "1;6;9;60", "wc_review": "229;196;282;399", "wc_reply_reviewers": "23;19;0;30", "wc_reply_authors": "37;44;39;44", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 69.0, 36.36619309193636 ], "wc_strengths_avg": [ 51.5, 19.665960439297137 ], "wc_weaknesses_avg": [ 79.25, 19.48557158514987 ], "wc_questions_avg": [ 57.75, 26.022826518270456 ], "wc_limitations_avg": [ 19.0, 23.843238035132728 ], "wc_review_avg": [ 276.5, 77.09247693517183 ], "wc_reply_reviewers_avg": [ 18.0, 11.113055385446435 ], "wc_reply_authors_avg": [ 41.0, 3.082207001484488 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13961983452522290908&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 4, "email": "yonsei.ac.kr;yonsei.ac.kr;yonsei.ac.kr", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Yonsei University", "aff_unique_dep": "", "aff_unique_url": "https://www.yonsei.ac.kr", "aff_unique_abbr": "Yonsei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "Apathetic or Empathetic? Evaluating LLMs' Emotional Alignments with Humans", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93520", "id": "pwRVGRWtGg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=pwRVGRWtGg", "openreview": "https://openreview.net/forum?id=pwRVGRWtGg", "poster": "/media/PosterPDFs/NeurIPS%202024/93520.png?t=1731232440.5657601", "project": "", "author_site": "Jen-Tse Huang, Man Ho LAM, Eric John Li, Shujie Ren, Wenxuan Wang, Wenxiang Jiao, Zhaopeng Tu, Michael R Lyu", "tldr": "", "abstract": "Evaluating Large Language Models\u2019 (LLMs) anthropomorphic capabilities has become increasingly important in contemporary discourse. Utilizing the emotion appraisal theory from psychology, we propose to evaluate the empathy ability of LLMs, i.e., how their feelings change when presented with specific situations. After a careful and comprehensive survey, we collect a dataset containing over 400 situations that have proven effective in eliciting the eight emotions central to our study. Categorizing the situations into 36 factors, we conduct a human evaluation involving more than 1,200 subjects worldwide. With the human evaluation results as references, our evaluation includes seven LLMs, covering both commercial and open-source models, including variations in model sizes, featuring the latest iterations, such as GPT-4, Mixtral-8x22B, and LLaMA-3.1. We find that, despite several misalignments, LLMs can generally respond appropriately to certain situations. Nevertheless, they fall short in alignment with the emotional behaviors of human beings and cannot establish connections between similar situations. Our collected dataset of situations, the human evaluation results, and the code of our testing framework, i.e., EmotionBench, are publicly available at https://github.com/CUHK-ARISE/EmotionBench.", "keywords": "LLM;Evaluation;Emotions", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/b1009fd2f333180134e04f3f7deb2e53c12aeb2a.zip", "author": "Jen-tse Huang;Man Ho LAM;Eric John Li;Shujie Ren;Wenxuan Wang;Wenxiang Jiao;Zhaopeng Tu;Michael Lyu", "authorids": "~Jen-tse_Huang1;~Man_Ho_LAM1;~Eric_John_Li1;~Shujie_Ren1;~Wenxuan_Wang2;~Wenxiang_Jiao1;~Zhaopeng_Tu1;~Michael_Lyu1", "gender": "M;M;M;;;M;M;M", "homepage": "https://penguinnnnn.github.io/;https://donaldlamnl.github.io/DonaldLamNL/home;;;;https://wxjiao.github.io/;http://www.zptu.net;http://www.cse.cuhk.edu.hk/~lyu", "dblp": "317/7026;;348/7146;;203/1536-1;239/4883;71/9281;l/MichaelRLyu", "google_scholar": "XBzDTAQAAAAJ;hOd8TtoAAAAJ;https://scholar.google.com.hk/citations?user=m71w120AAAAJ;https://scholar.google.com/citations?hl=en;4v5x0bUAAAAJ;CvtODukAAAAJ;IvE2zRgAAAAJ;uQnBgK0AAAAJ", "orcid": "0000-0003-3446-0083;;;;;;;0000-0002-3666-5798", "linkedin": "jen-tse-huang-08a169200/;donald-lam-17236126b/;eric-john-li-b380a2250;;;;tuzhaopeng;michael-lyu-58b154/", "or_profile": "~Jen-tse_Huang1;~Man_Ho_LAM1;~Eric_John_Li1;~Shujie_Ren1;~Wenxuan_Wang2;~Wenxiang_Jiao1;~Zhaopeng_Tu1;~Michael_Lyu1", "aff": "The Chinese University of Hong Kong;Department of Computer Science and Engineering, The Chinese University of Hong Kong;The Chinese University of Hong Kong;Tianjin Medical University ;The Chinese University of Hong Kong;Tencent AI Lab;Tencent AI Lab;The Chinese University of Hong Kong", "aff_domain": "cuhk.edu.hk;cse.cuhk.edu.hk;cse.cuhk.edu.hk;tmu.edu.cn;cuhk.edu.hk;tencent.com;tencent.com;cuhk.edu.hk", "position": "PhD student;Undergrad student;Undergrad student;MS student;PhD student;Researcher;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nhuang2024apathetic,\ntitle={Apathetic or Empathetic? Evaluating {LLM}s' Emotional Alignments with Humans},\nauthor={Jen-tse Huang and Man Ho LAM and Eric John Li and Shujie Ren and Wenxuan Wang and Wenxiang Jiao and Zhaopeng Tu and Michael Lyu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=pwRVGRWtGg}\n}", "github": "", "reviewers": "Ed98;cRFW;gNfF;mcGQ", "pdf_size": 642924, "rating": "5;5;5;5", "confidence": "4;5;2;4", "soundness": "3;2;3;2", "novelty": "2;3;3;2", "presentation": "3;3;3;2", "wc_summary": "59;52;40;67", "wc_strengths": "28;70;37;14", "wc_weaknesses": "89;96;59;64", "wc_questions": "45;40;21;61", "wc_limitations": "10;8;4;1", "wc_review": "231;266;161;207", "wc_reply_reviewers": "34;21;33;259", "wc_reply_authors": "75;203;203;317", "reply_reviewers": "1;1;1;3", "reply_authors": "2;3;3;4", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 54.5, 9.912113800799505 ], "wc_strengths_avg": [ 37.25, 20.60794749605113 ], "wc_weaknesses_avg": [ 77.0, 15.795568998931314 ], "wc_questions_avg": [ 41.75, 14.271912976192084 ], "wc_limitations_avg": [ 5.75, 3.491060010942235 ], "wc_review_avg": [ 216.25, 38.17967391165095 ], "wc_reply_reviewers_avg": [ 86.75, 99.58005573406756 ], "wc_reply_authors_avg": [ 199.5, 85.6314778571525 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 3.0, 0.7071067811865476 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14434768918374868786&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "cuhk.edu.hk;cse.cuhk.edu.hk;cse.cuhk.edu.hk;tmu.edu.cn;cuhk.edu.hk;tencent.com;tencent.com;cuhk.edu.hk", "author_num": 8, "aff_unique_index": "0;0;0;1;0;2;2;0", "aff_unique_norm": "Chinese University of Hong Kong;Tianjin Medical University;Tencent", "aff_unique_dep": ";;Tencent AI Lab", "aff_unique_url": "https://www.cuhk.edu.hk;http://www.tmu.edu.cn/;https://ai.tencent.com", "aff_unique_abbr": "CUHK;TMU;Tencent AI Lab", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Pseudo-Private Data Guided Model Inversion Attacks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93519", "id": "pyqPUf36D2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=pyqPUf36D2", "openreview": "https://openreview.net/forum?id=pyqPUf36D2", "poster": "/media/PosterPDFs/NeurIPS%202024/93519.png?t=1731237503.946745", "project": "", "author_site": "Xiong Peng, Bo Han, Feng Liu, Tongliang Liu, Mingyuan Zhou", "tldr": "", "abstract": "In model inversion attacks (MIAs), adversaries attempt to recover private training data by exploiting access to a well-trained target model. Recent advancements have improved MIA performance using a two-stage generative framework. This approach first employs a generative adversarial network to learn a fixed distributional prior, which is then used to guide the inversion process during the attack. However, in this paper, we observed a phenomenon that such a fixed prior would lead to a low probability of sampling actual private data during the inversion process due to the inherent distribution gap between the prior distribution and the private data distribution, thereby constraining attack performance. To address this limitation, we propose increasing the density around high-quality pseudo-private data\u2014recovered samples through model inversion that exhibit characteristics of the private training data\u2014by slightly tuning the generator. This strategy effectively increases the probability of sampling actual private data that is close to these pseudo-private data during the inversion process. After integrating our method, the generative model inversion pipeline is strengthened, leading to improvements over state-of-the-art MIAs. This paves the way for new research directions in generative MIAs.", "keywords": "Model Inversion Attacks", "primary_area": "privacy", "supplementary_material": "/attachment/ab952f8b298019580810ea5dc7040dc50c241775.zip", "author": "Xiong Peng;Bo Han;Feng Liu;Tongliang Liu;Mingyuan Zhou", "authorids": "~Xiong_Peng1;~Bo_Han1;~Feng_Liu2;~Tongliang_Liu1;~Mingyuan_Zhou1", "gender": "M;M;M;M;M", "homepage": ";https://fengliu90.github.io/index.html;https://tongliang-liu.github.io/;http://mingyuanzhou.github.io;https://bhanml.github.io/", "dblp": ";77/1318-3;150/6667;;241/0472-3", "google_scholar": "https://scholar.google.com.hk/citations?user=txYzDpMAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.au/citations?user=EiLdZ_YAAAAJ;LXwCIisAAAAJ;nTNjqHwAAAAJ", "orcid": ";0000-0002-5005-9129;;;", "linkedin": ";alexfengliu;;;", "or_profile": "~Xiong_Peng1;~Feng_Liu2;~Tongliang_Liu1;~Mingyuan_Zhou1;~bo_han2", "aff": "Hong Kong Baptist University;University of Melbourne;Mohamed bin Zayed University of Artificial Intelligence;Google;MBZUAI", "aff_domain": "hkbu.edu.hk;unimelb.edu.au;mbzuai.ac.ae;google.com;mbzuai.ac.ae", "position": "PhD student;Assistant Professor;Affiliated Associate Professor;Researcher;Researcher", "bibtex": "@inproceedings{\npeng2024pseudoprivate,\ntitle={Pseudo-Private Data Guided Model Inversion Attacks},\nauthor={Xiong Peng and Bo Han and Feng Liu and Tongliang Liu and Mingyuan Zhou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=pyqPUf36D2}\n}", "github": "", "reviewers": "qx3K;4Zsf;eDgc;CEeM", "pdf_size": 25882877, "rating": "4;5;7;7", "confidence": "4;5;5;3", "soundness": "3;3;3;3", "novelty": "1;2;3;3", "presentation": "3;3;3;2", "wc_summary": "22;68;144;56", "wc_strengths": "44;30;137;64", "wc_weaknesses": "523;135;692;77", "wc_questions": "65;78;85;25", "wc_limitations": "40;15;74;12", "wc_review": "694;326;1132;234", "wc_reply_reviewers": "392;22;61;14", "wc_reply_authors": "1598;81;922;36", "reply_reviewers": "1;1;1;1", "reply_authors": "6;3;4;2", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 72.5, 44.59540335056966 ], "wc_strengths_avg": [ 68.75, 41.215136782497765 ], "wc_weaknesses_avg": [ 356.75, 258.58497152773595 ], "wc_questions_avg": [ 63.25, 23.220411279734044 ], "wc_limitations_avg": [ 35.25, 24.873429598670143 ], "wc_review_avg": [ 596.5, 353.85131058115354 ], "wc_reply_reviewers_avg": [ 122.25, 156.75199360773692 ], "wc_reply_authors_avg": [ 659.25, 646.7423656294676 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.75, 1.479019945774904 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:j5gJ3dFlVhUJ:scholar.google.com/&scioq=Pseudo-Private+Data+Guided+Model+Inversion+Attacks&hl=en&as_sdt=0,33", "gs_version_total": 0, "email": "hkbu.edu.hk;unimelb.edu.au;mbzuai.ac.ae;google.com;mbzuai.ac.ae", "author_num": 5, "aff_unique_index": "0;1;2;3;2", "aff_unique_norm": "Hong Kong Baptist University;University of Melbourne;Mohamed bin Zayed University of Artificial Intelligence;Google", "aff_unique_dep": ";;;Google", "aff_unique_url": "https://www.hkbu.edu.hk;https://www.unimelb.edu.au;https://mbzuai.ac.ae;https://www.google.com", "aff_unique_abbr": "HKBU;UniMelb;MBZUAI;Google", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Hong Kong SAR;;Mountain View", "aff_country_unique_index": "0;1;2;3;2", "aff_country_unique": "China;Australia;United Arab Emirates;United States" }, { "title": "NeuralClothSim: Neural Deformation Fields Meet the Thin Shell Theory", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93518", "id": "pzJjlnMvk5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=pzJjlnMvk5", "openreview": "https://openreview.net/forum?id=pzJjlnMvk5", "poster": "/media/PosterPDFs/NeurIPS%202024/93518.png?t=1733498958.6726048", "project": "", "author_site": "Navami Kairanda, Marc Habermann, Christian Theobalt, Vladislav Golyanik", "tldr": "", "abstract": "Despite existing 3D cloth simulators producing realistic results, they predominantly operate on discrete surface representations (e.g. points and meshes) with a fixed spatial resolution, which often leads to large memory consumption and resolution-dependent simulations. Moreover, back-propagating gradients through the existing solvers is difficult and they hence cannot be easily integrated into modern neural architectures. In response, this paper re-thinks physically plausible cloth simulation: We propose NeuralClothSim, i.e., a new quasistatic cloth simulator using thin shells, in which surface deformation is encoded in neural network weights in form of a neural field. Our memory-efficient solver operates on a new continuous coordinate-based surface representation called neural deformation fields (NDFs); it supervises NDF equilibria with the laws of the non-linear Kirchhoff-Love shell theory with a non-linear anisotropic material model. NDFs are adaptive: They 1) allocate their capacity to the deformation details and 2) allow surface state queries at arbitrary spatial resolutions without re-training. We show how to train NeuralClothSim while imposing hard boundary conditions and demonstrate multiple applications, such as material interpolation and simulation editing. The experimental results highlight the effectiveness of our continuous neural formulation.", "keywords": "Neural field;implicit representation;cloth simulation;consistent simulation;multi-resolution;Kirchhoff-Love theory", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "/attachment/3d03d8213f50c0ea2648865edb9d7001562ea4bd.zip", "author": "Navami Kairanda;Marc Habermann;Christian Theobalt;Vladislav Golyanik", "authorids": "~Navami_Kairanda1;~Marc_Habermann1;~Christian_Theobalt2;~Vladislav_Golyanik1", "gender": "F;M;M;M", "homepage": "https://people.mpi-inf.mpg.de/~nkairand/;https://people.mpi-inf.mpg.de/~mhaberma/;https://www.mpi-inf.mpg.de/~theobalt/;https://people.mpi-inf.mpg.de/~golyanik/", "dblp": "317/1339;227/2744;55/3346;180/6438", "google_scholar": "zlrbxTEAAAAJ;oWstvNcAAAAJ;https://scholar.google.com.tw/citations?user=eIWg8NMAAAAJ;https://scholar.google.co.uk/citations?user=we9LnVcAAAAJ", "orcid": "0009-0004-3786-5937;;;", "linkedin": "navami-kairanda-a59a7464/?originalSubdomain=de;;;", "or_profile": "~Navami_Kairanda1;~Marc_Habermann1;~Christian_Theobalt2;~Vladislav_Golyanik1", "aff": "Saarland Informatics Campus, Max Planck Institute for Informatics;Saarland Informatics Campus, Max-Planck Institute;Max-Planck-Institute for Informatics, Saarland Informatics Campus;Saarland Informatics Campus, Max-Planck Institute for Informatics", "aff_domain": "mpi-inf.mpg.de;mpi-inf.mpg.de;mpi-inf.mpg.de;mpi-inf.mpg.de", "position": "PhD student;Principal Researcher;Director;Principal Researcher", "bibtex": "@inproceedings{\nkairanda2024neuralclothsim,\ntitle={NeuralClothSim: Neural Deformation Fields Meet the Thin Shell Theory},\nauthor={Navami Kairanda and Marc Habermann and Christian Theobalt and Vladislav Golyanik},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=pzJjlnMvk5}\n}", "github": "", "reviewers": "Gekm;BLQv;bD8m", "pdf_size": 16143747, "rating": "5;6;7", "confidence": "5;3;3", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "3;3;3", "wc_summary": "56;106;44", "wc_strengths": "25;105;24", "wc_weaknesses": "67;40;163", "wc_questions": "235;100;81", "wc_limitations": "4;12;18", "wc_review": "387;363;330", "wc_reply_reviewers": "0;19;51", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 68.66666666666667, 26.849374087469688 ], "wc_strengths_avg": [ 51.333333333333336, 37.95025984393549 ], "wc_weaknesses_avg": [ 90.0, 52.78257288158659 ], "wc_questions_avg": [ 138.66666666666666, 68.55816670698117 ], "wc_limitations_avg": [ 11.333333333333334, 5.734883511361751 ], "wc_review_avg": [ 360.0, 23.366642891095847 ], "wc_reply_reviewers_avg": [ 23.333333333333332, 21.044925490219462 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8660254037844387, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2065988186623508543&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "mpi-inf.mpg.de;mpi-inf.mpg.de;mpi-inf.mpg.de;mpi-inf.mpg.de", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Max Planck Institute for Informatics;Max-Planck Institute;Max-Planck-Institute for Informatics;Max-Planck Institute for Informatics", "aff_unique_dep": ";Informatics;;", "aff_unique_url": "https://mpi-inf.mpg.de;https://www.mpi-sws.org;https://mpi-inf.mpg.de;https://mpi-inf.mpg.de", "aff_unique_abbr": "MPII;MPI-SWS;MPII;MPII", "aff_campus_unique_index": "0;1;1;0", "aff_campus_unique": "Saarbr\u00fccken;Saarland", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "title": "BEACON: Benchmark for Comprehensive RNA Tasks and Language Models", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97501", "id": "q2IeJByeSM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=q2IeJByeSM", "openreview": "https://openreview.net/forum?id=q2IeJByeSM", "poster": "/media/PosterPDFs/NeurIPS%202024/97501.png?t=1733508784.2666638", "project": "", "author_site": "Yuchen Ren, Zhiyuan Chen, Lifeng Qiao, Hongtai Jing, Yuchen Cai, Sheng Xu, Peng Ye, Xinzhu Ma, Siqi Sun, Hongliang Yan, Dong Yuan, Wanli Ouyang, Xihui Liu", "tldr": "", "abstract": "RNA plays a pivotal role in translating genetic instructions into functional outcomes, underscoring its importance in biological processes and disease mechanisms. Despite the emergence of numerous deep learning approaches for RNA, particularly universal RNA language models, there remains a significant lack of standardized benchmarks to assess the effectiveness of these methods. In this study, we introduce the first comprehensive RNA benchmark BEACON **BE**nchm**A**rk for **CO**mprehensive R**N**A Task and Language Models).\nFirst, BEACON comprises 13 distinct tasks derived from extensive previous work covering structural analysis, functional studies, and engineering applications, enabling a comprehensive assessment of the performance of methods on various RNA understanding tasks. Second, we examine a range of models, including traditional approaches like CNNs, as well as advanced RNA foundation models based on language models, offering valuable insights into the task-specific performances of these models. Third, we investigate the vital RNA language model components from the tokenizer and positional encoding aspects. Notably, our findings emphasize the superiority of single nucleotide tokenization and the effectiveness of Attention with Linear Biases (ALiBi) over traditional positional encoding methods. Based on these insights, a simple yet strong baseline called BEACON-B is proposed, which can achieve outstanding performance with limited data and computational resources. \nThe datasets and source code of our benchmark are available at https://github.com/terry-r123/RNABenchmark.", "keywords": "RNA;Language Model;Benchmark;Biology", "primary_area": "", "supplementary_material": "", "author": "Yuchen Ren;Zhiyuan Chen;Lifeng Qiao;Hongtai Jing;Yuchen Cai;Sheng Xu;Peng Ye;Xinzhu Ma;Siqi Sun;Hongliang Yan;Dong Yuan;Wanli Ouyang;Xihui Liu", "authorids": "~Yuchen_Ren1;~Zhiyuan_Chen5;~Lifeng_Qiao1;~Hongtai_Jing1;~Yuchen_Cai2;~Sheng_Xu7;~Peng_Ye4;~Xinzhu_Ma1;~Siqi_Sun2;~Hongliang_Yan1;~Dong_Yuan1;~Wanli_Ouyang1;~Xihui_Liu1", "gender": ";M;M;M;M;M;M;M;M;M;M;;F", "homepage": ";https://zyc.ai;https://github.com/qiaoqiaoLF;https://github.com/HHHongtai;https://github.com/FINitenet;https://sxu99.github.io/;;https://github.com/xinzhuma;https://intersun.github.io/;;https://www.sydney.edu.au/engineering/about/our-people/academic-staff/dong-yuan.html;;https://xh-liu.github.io/", "dblp": ";192/0196-8;55/10318;;347/8903;;53/930-6;191/3902;120/1735;03/8409;;;184/3911", "google_scholar": ";CKiY8PIAAAAJ;;;;9ttWLMgAAAAJ;UEZZP5QAAAAJ;8PuKa_8AAAAJ;2dyg3WgAAAAJ;Obo7-bIAAAAJ;https://scholar.google.com.au/citations?user=UU0veX4AAAAJ;;https://scholar.google.com.hk/citations?user=4YL23GMAAAAJ", "orcid": ";0000-0003-3210-0324;;;;0000-0002-6507-9122;0000-0002-8486-7562;;;;0000-0003-1130-0888;;0000-0003-1831-9952", "linkedin": ";%E9%99%9F%E5%8E%9F-%E9%99%88-0b473aa9;;;;;;;;;;;", "or_profile": "~Yuchen_Ren1;~Zhiyuan_Chen5;~Lifeng_Qiao1;~Hongtai_Jing1;~Yuchen_Cai2;~Sheng_Xu7;~Peng_Ye4;~Xinzhu_Ma1;~Siqi_Sun2;~Hongliang_Yan1;~Dong_Yuan1;~Wanli_Ouyang1;~Xihui_Liu1", "aff": ";DP Technology;Shanghai Jiaotong University;;Fudan University;Shanghai AI Laboratory;Fudan University;The Chinese University of Hong Kong;Fudan University;Shanghai Artificial Intelligence Lab;University of Sydney;;University of Hong Kong", "aff_domain": ";dp.tech;sjtu.edu.cn;;fudan.edu.cn;pjlab.org.cn;fudan.edu.cn;cuhk.edu.hk;fudan.edu.cn;pjlab.org.cn;sydney.edu.au;;hku.hk", "position": ";Researcher;Undergrad student;;PhD student;Researcher;PhD student;Postdoc;Associate Professor;Postdoc;Associate Professor;;Assistant Professor", "bibtex": "@inproceedings{\nren2024beacon,\ntitle={{BEACON}: Benchmark for Comprehensive {RNA} Tasks and Language Models},\nauthor={Yuchen Ren and Zhiyuan Chen and Lifeng Qiao and Hongtai Jing and Yuchen Cai and Sheng Xu and Peng Ye and Xinzhu Ma and Siqi Sun and Hongliang Yan and Dong Yuan and Wanli Ouyang and Xihui Liu},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=q2IeJByeSM}\n}", "github": "", "reviewers": "Wr93;xw6V;M1uB;vm5g", "pdf_size": 636728, "rating": "6;8;8;8", "confidence": "3;2;4;3", "wc_summary_and_contributions": "77;82;173;33", "wc_strengths": "54;58;42;2", "wc_improvement": "123;98;154;18", "wc_limitations": "6;1;11;1", "wc_correctness": "2;12;76;1", "wc_clarity": "19;7;53;1", "wc_relation_to_prior_work": "7;13;53;1", "wc_documentation": "14;5;68;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "303;277;631;59", "wc_reply_reviewers": "26;13;0;0", "wc_reply_authors": "67;71;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "9;3;2;1", "rating_avg": [ 7.5, 0.8660254037844386 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 91.25, 50.903708116403465 ], "wc_strengths_avg": [ 39.0, 22.15851980616034 ], "wc_improvement_avg": [ 98.25, 50.40027281672194 ], "wc_limitations_avg": [ 4.75, 4.14578098794425 ], "wc_correctness_avg": [ 22.75, 31.04331651096577 ], "wc_clarity_avg": [ 20.0, 20.12461179749811 ], "wc_relation_to_prior_work_avg": [ 18.5, 20.3654118544163 ], "wc_documentation_avg": [ 22.0, 26.972207918522354 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 317.5, 204.30063631814758 ], "wc_reply_reviewers_avg": [ 9.75, 10.779030568655049 ], "wc_reply_authors_avg": [ 34.5, 34.528973341239094 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 3.75, 3.112474899497183 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 13, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4086323286768217290&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": ";dp.tech;sjtu.edu.cn;;fudan.edu.cn;pjlab.org.cn;fudan.edu.cn;cuhk.edu.hk;fudan.edu.cn;pjlab.org.cn;sydney.edu.au;;hku.hk", "author_num": 13, "aff_unique_index": "0;1;2;3;2;4;2;5;6;7", "aff_unique_norm": "DP Technology;Shanghai Jiao Tong University;Fudan University;Shanghai AI Laboratory;Chinese University of Hong Kong;Shanghai Artificial Intelligence Lab;University of Sydney;University of Hong Kong", "aff_unique_dep": ";;;;;;;", "aff_unique_url": ";https://www.sjtu.edu.cn;https://www.fudan.edu.cn;https://www.shanghai-ai-lab.com;https://www.cuhk.edu.hk;https://www.shailab.org;https://www.sydney.edu.au;https://www.hku.hk", "aff_unique_abbr": ";SJTU;Fudan;SAIL;CUHK;Shanghai AI Lab;USYD;HKU", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "1;1;1;1;1;1;1;2;1", "aff_country_unique": ";China;Australia" }, { "title": "Benchmarking Generative Models on Computational Thinking Tests in Elementary Visual Programming", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97500", "id": "q2WT19Ciad", "proceeding": "", "pdf": "https://openreview.net/pdf?id=q2WT19Ciad", "openreview": "https://openreview.net/forum?id=q2WT19Ciad", "poster": "/media/PosterPDFs/NeurIPS%202024/97500.png?t=1733406055.2270133", "project": "", "author_site": "Victor-Alexandru P\u0103durean, Adish Singla", "tldr": "", "abstract": "Generative models have demonstrated human-level proficiency in various benchmarks across domains like programming, natural sciences, and general knowledge. Despite these promising results on competitive benchmarks, they still struggle with seemingly simple problem-solving tasks typically carried out by elementary-level students. How do state-of-the-art models perform on standardized programming-related tests designed to assess computational thinking and problem-solving skills at schools? In this paper, we curate a novel benchmark involving computational thinking tests grounded in elementary visual programming domains. Our initial results show that state-of-the-art models like GPT-4o and Llama3 barely match the performance of an average school student. To further boost the performance of these models, we fine-tune them using a novel synthetic data generation methodology. The key idea is to develop a comprehensive dataset using symbolic methods that capture different skill levels, ranging from recognition of visual elements to multi-choice quizzes to synthesis-style tasks. We showcase how various aspects of symbolic information in synthetic data help improve fine-tuned models' performance. We will release the full implementation and datasets to facilitate further research on enhancing computational thinking in generative models.", "keywords": "generative models;computational thinking;problem-solving skills;elementary visual programming", "primary_area": "", "supplementary_material": "/attachment/e5f05ce9684882b0625ab1804c29a24326b8c019.pdf", "author": "Victor-Alexandru P\u0103durean;Adish Singla", "authorids": "~Victor-Alexandru_P\u0103durean1;~Adish_Singla2", "gender": ";", "homepage": "https://vicondrus.github.io/;https://machineteaching.mpi-sws.org/adishsingla.html", "dblp": "292/1561;58/657", "google_scholar": "zZelTn0AAAAJ;kXz2seUAAAAJ", "orcid": "0009-0004-2998-096X;", "linkedin": "victor-padurean/;", "or_profile": "~Victor-Alexandru_P\u0103durean1;~Adish_Kumar_Singla1", "aff": "Max Planck Institute for Software Systems;Max Planck Institute for Software Systems (MPI-SWS)", "aff_domain": "mpi-sws.org;mpi-sws.org", "position": "PhD student;Researcher", "bibtex": "@inproceedings{\np{\\u{a}}durean2024benchmarking,\ntitle={Benchmarking Generative Models on Computational Thinking Tests in Elementary Visual Programming},\nauthor={Victor-Alexandru P{\\u{a}}durean and Adish Singla},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=q2WT19Ciad}\n}", "github": "", "reviewers": "SQMT;pLhh;LHux;NVhh", "pdf_size": 883776, "rating": "6;6;6;8", "confidence": "4;3;3;5", "wc_summary_and_contributions": "94;64;114;60", "wc_strengths": "5;48;113;31", "wc_improvement": "38;171;107;78", "wc_limitations": "5;39;6;61", "wc_correctness": "1;16;14;1", "wc_clarity": "1;8;9;1", "wc_relation_to_prior_work": "1;30;12;31", "wc_documentation": "1;11;22;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "147;388;398;265", "wc_reply_reviewers": "0;239;0;0", "wc_reply_authors": "0;1248;0;0", "reply_reviewers": "0;2;0;0", "reply_authors": "2;5;2;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "wc_summary_and_contributions_avg": [ 83.0, 22.20360331117452 ], "wc_strengths_avg": [ 49.25, 39.86461463503692 ], "wc_improvement_avg": [ 98.5, 48.5 ], "wc_limitations_avg": [ 27.75, 23.573024837725004 ], "wc_correctness_avg": [ 8.0, 7.035623639735144 ], "wc_clarity_avg": [ 4.75, 3.766629793329841 ], "wc_relation_to_prior_work_avg": [ 18.5, 12.619429464123963 ], "wc_documentation_avg": [ 8.75, 8.671072598012312 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 299.5, 102.44632741099116 ], "wc_reply_reviewers_avg": [ 59.75, 103.49003575224042 ], "wc_reply_authors_avg": [ 312.0, 540.3998519614897 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 2.5, 1.5 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15818192068744021005&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "mpi-sws.org;mpi-sws.org", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Max Planck Institute for Software Systems", "aff_unique_dep": "", "aff_unique_url": "https://www.mpi-sws.org", "aff_unique_abbr": "MPI-SWS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "title": "Self-Play Fine-tuning of Diffusion Models for Text-to-image Generation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93517", "id": "q3XavKPorV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=q3XavKPorV", "openreview": "https://openreview.net/forum?id=q3XavKPorV", "poster": "/media/PosterPDFs/NeurIPS%202024/93517.png?t=1733816059.5463595", "project": "", "author_site": "Huizhuo Yuan, Zixiang Chen, Kaixuan Ji, Quanquan Gu", "tldr": "", "abstract": "Fine-tuning Diffusion Models remains an underexplored frontier in generative artificial intelligence (GenAI), especially when compared with the remarkable progress made in fine-tuning Large Language Models (LLMs). While cutting-edge diffusion models such as Stable Diffusion (SD) and SDXL rely on supervised fine-tuning, their performance inevitably plateaus after seeing a certain volume of data. Recently, reinforcement learning (RL) has been employed to fine-tune diffusion models with human preference data, but it requires at least two images (``winner'' and ``loser'' images) for each text prompt.\nIn this paper, we introduce an innovative technique called self-play fine-tuning for diffusion models (SPIN-Diffusion), where the diffusion model engages in competition with its earlier versions, facilitating an iterative self-improvement process. Our approach offers an alternative to conventional supervised fine-tuning and RL strategies, significantly improving both model performance and alignment. Our experiments on the Pick-a-Pic dataset reveal that SPIN-Diffusion outperforms the existing supervised fine-tuning method in aspects of human preference alignment and visual appeal right from its first iteration. By the second iteration, it exceeds the performance of RLHF-based methods across all metrics, achieving these results with less data. Codes are available at \\url{https://github.com/uclaml/SPIN-Diffusion/}.", "keywords": "Diffusion models; Stable Diffusion; RLHF; Self-play", "primary_area": "generative_models", "supplementary_material": "", "author": "Huizhuo Yuan;Zixiang Chen;Kaixuan Ji;Quanquan Gu", "authorids": "~Huizhuo_Yuan1;~Zixiang_Chen1;~Kaixuan_Ji2;~Quanquan_Gu1", "gender": ";M;Not Specified;M", "homepage": ";https://sites.google.com/view/zxchen;https://github.com/jkx19;http://web.cs.ucla.edu/~qgu/", "dblp": ";137/3624;252/7475;50/4597", "google_scholar": ";6nrCHr0AAAAJ;FOoKDukAAAAJ;GU9HgNAAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Huizhuo_Yuan1;~Zixiang_Chen1;~Kaixuan_Ji2;~Quanquan_Gu1", "aff": "; University of California, Los Angeles;University of California, Los Angeles;University of California, Los Angeles", "aff_domain": ";cs.ucla.edu;ucla.edu;cs.ucla.edu", "position": ";PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nyuan2024selfplay,\ntitle={Self-Play Fine-tuning of Diffusion Models for Text-to-image Generation},\nauthor={Huizhuo Yuan and Zixiang Chen and Kaixuan Ji and Quanquan Gu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=q3XavKPorV}\n}", "github": "", "reviewers": "CNjW;WzS9;K3rU", "pdf_size": 38782025, "rating": "5;6;7", "confidence": "3;3;4", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "3;3;4", "wc_summary": "81;80;148", "wc_strengths": "33;88;56", "wc_weaknesses": "186;117;91", "wc_questions": "66;101;20", "wc_limitations": "1;4;12", "wc_review": "367;390;327", "wc_reply_reviewers": "109;9;19", "wc_reply_authors": "707;5;10", "reply_reviewers": "2;1;1", "reply_authors": "4;2;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 103.0, 31.822423959633664 ], "wc_strengths_avg": [ 59.0, 22.55363976538303 ], "wc_weaknesses_avg": [ 131.33333333333334, 40.086018621071474 ], "wc_questions_avg": [ 62.333333333333336, 33.169597860423664 ], "wc_limitations_avg": [ 5.666666666666667, 4.642796092394707 ], "wc_review_avg": [ 361.3333333333333, 26.02989734047285 ], "wc_reply_reviewers_avg": [ 45.666666666666664, 44.96912521077348 ], "wc_reply_authors_avg": [ 240.66666666666666, 329.75378019903815 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 0.9428090415820634 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17319014917996837340&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 5, "email": ";cs.ucla.edu;ucla.edu;cs.ucla.edu", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of California, Los Angeles", "aff_unique_dep": "", "aff_unique_url": "https://www.ucla.edu", "aff_unique_abbr": "UCLA", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Enhancing LLM\u2019s Cognition via Structurization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93516", "id": "q5CkneUn6K", "proceeding": "", "pdf": "https://openreview.net/pdf?id=q5CkneUn6K", "openreview": "https://openreview.net/forum?id=q5CkneUn6K", "poster": "/media/PosterPDFs/NeurIPS%202024/93516.png?t=1731654258.9466565", "project": "", "author_site": "Kai Liu, Zhihang Fu, Chao Chen, Wei Zhang, Rongxin Jiang, Fan Zhou, Yaowu Chen, Yue Wu, Jieping Ye", "tldr": "", "abstract": "When reading long-form text, human cognition is complex and structurized. While large language models (LLMs) process input contexts through a causal and sequential perspective, this approach can potentially limit their ability to handle intricate and complex inputs effectively. To enhance LLM\u2019s cognition capability, this paper presents a novel concept of context structurization. Specifically, we transform the plain, unordered contextual sentences into well-ordered and hierarchically structurized elements. By doing so, LLMs can better grasp intricate and extended contexts through precise attention and information-seeking along the organized structures. Extensive evaluations are conducted across various model architectures and sizes (including a series of auto-regressive LLMs as well as BERT-like masking models) on a diverse set of NLP tasks (e.g., context-based question-answering, exhaustive hallucination evaluation, and passage-level dense retrieval). Empirical results show consistent and significant performance gains afforded by a single-round structurization. In particular, we boost the open-sourced LLaMA2-70B model to achieve comparable performance against GPT-3.5-Turbo as the halluci- nation evaluator. Besides, we show the feasibility of distilling advanced LLMs\u2019 language processing abilities to a smaller yet effective StruXGPT-7B to execute structurization, addressing the practicality of our approach. Code is available at https://github.com/alibaba/struxgpt.", "keywords": "Large Language Models;Structurization;Augmentation", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Kai Liu;Zhihang Fu;Chao Chen;Wei Zhang;Rongxin Jiang;Fan Zhou;Yaowu Chen;Yue Wu;Jieping Ye", "authorids": "~Kai_Liu8;~Zhihang_Fu1;~Chao_Chen19;~Wei_Zhang103;~Rongxin_Jiang1;~Fan_Zhou13;~Yaowu_Chen2;~Yue_Wu18;~Jieping_Ye4", "gender": "M;M;M;F;M;;;M;M", "homepage": "https://kail8.github.io/;https://zhihangfu.top/;https://chaochen.cc/;https://zwstart.github.io;https://person.zju.edu.cn/0008430;https://person.zju.edu.cn/fanzhou;https://person.zju.edu.cn/0088219;http://yelabs.net/;", "dblp": ";207/1894;66/3019-26.html;;10/2064-1;;;03/5454;", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;e_e3Ur0AAAAJ;https://scholar.google.com.hk/citations?user=_xDUAtQAAAAJ;https://scholar.google.com/citations?hl=en;;;;T9AzhwcAAAAJ;srajsjoAAAAJ", "orcid": ";;;;;;;0000-0001-8662-5818;", "linkedin": ";;;;;;;;", "or_profile": "~Kai_Liu8;~Zhihang_Fu1;~Chao_Chen19;~Wei_Zhang103;~Rongxin_Jiang1;~Fan_Zhou13;~Yaowu_Chen2;~Jieping_Ye4;~Yue_Wu3", "aff": "Alibaba Group;Alibaba Group;Alibaba Group;Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University;Alibaba Group;Alibaba Group", "aff_domain": "alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;alibaba-inc.com;alibaba-inc.com", "position": "Intern;Researcher;Researcher;PhD student;Researcher;Full Professor;Full Professor;Principal Researcher;Researcher", "bibtex": "@inproceedings{\nliu2024enhancing,\ntitle={Enhancing {LLM}{\\textquoteright}s Cognition via Structurization},\nauthor={Kai Liu and Zhihang Fu and Chao Chen and Wei Zhang and Rongxin Jiang and Fan Zhou and Yaowu Chen and Yue Wu and Jieping Ye},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=q5CkneUn6K}\n}", "github": "", "reviewers": "yxZb;7Sjn;eQho;8sM9", "pdf_size": 1588257, "rating": "5;5;6;8", "confidence": "4;4;5;3", "soundness": "3;3;3;3", "novelty": "2;2;2;3", "presentation": "3;3;3;4", "wc_summary": "124;105;90;87", "wc_strengths": "33;43;46;96", "wc_weaknesses": "124;117;100;56", "wc_questions": "29;20;26;119", "wc_limitations": "9;1;48;24", "wc_review": "319;286;310;382", "wc_reply_reviewers": "0;78;34;70", "wc_reply_authors": "0;256;29;37", "reply_reviewers": "0;2;1;1", "reply_authors": "1;3;2;2", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 101.5, 14.67140075112121 ], "wc_strengths_avg": [ 54.5, 24.438698819699873 ], "wc_weaknesses_avg": [ 99.25, 26.45160675649024 ], "wc_questions_avg": [ 48.5, 40.8319727664486 ], "wc_limitations_avg": [ 20.5, 17.89553016817328 ], "wc_review_avg": [ 324.25, 35.45683996071844 ], "wc_reply_reviewers_avg": [ 45.5, 31.060424980994707 ], "wc_reply_authors_avg": [ 80.5, 102.2558066810878 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=788079532930630088&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;alibaba-inc.com;alibaba-inc.com", "author_num": 9, "aff_unique_index": "0;0;0;1;1;1;1;0;0", "aff_unique_norm": "Alibaba Group;Zhejiang University", "aff_unique_dep": ";", "aff_unique_url": "https://www.alibaba.com;https://www.zju.edu.cn", "aff_unique_abbr": "Alibaba;ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "N-agent Ad Hoc Teamwork", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93515", "id": "q7TxGUWlhD", "proceeding": "", "pdf": "https://openreview.net/pdf?id=q7TxGUWlhD", "openreview": "https://openreview.net/forum?id=q7TxGUWlhD", "poster": "/media/PosterPDFs/NeurIPS%202024/93515.png?t=1731697172.9397957", "project": "", "author_site": "Caroline Wang, Muhammad Arrasy Rahman, Ishan Durugkar, Elad Liebman, Peter Stone", "tldr": "", "abstract": "Current approaches to learning cooperative multi-agent behaviors assume relatively restrictive settings. In standard fully cooperative multi-agent reinforcement learning, the learning algorithm controls *all* agents in the scenario, while in ad hoc teamwork, the learning algorithm usually assumes control over only a *single* agent in the scenario. However, many cooperative settings in the real world are much less restrictive. For example, in an autonomous driving scenario, a company might train its cars with the same learning algorithm, yet once on the road, these cars must cooperate with cars from another company. Towards expanding the class of scenarios that cooperative learning methods may optimally address, we introduce $N$*-agent ad hoc teamwork* (NAHT), where a set of autonomous agents must interact and cooperate with dynamically varying numbers and types of teammates. This paper formalizes the problem, and proposes the *Policy Optimization with Agent Modelling* (POAM) algorithm. POAM is a policy gradient, multi-agent reinforcement learning approach to the NAHT problem, that enables adaptation to diverse teammate behaviors by learning representations of teammate behaviors. Empirical evaluation on tasks from the multi-agent particle environment and StarCraft II shows that POAM improves cooperative task returns compared to baseline approaches, and enables out-of-distribution generalization to unseen teammates.", "keywords": "ad hoc teamwork;reinforcement learning;multi-agent systems;multi-agent reinforcement learning", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Caroline Wang;Arrasy Rahman;Ishan Durugkar;Elad Liebman;Peter Stone", "authorids": "~Caroline_Wang1;~Arrasy_Rahman1;~Ishan_Durugkar1;~Elad_Liebman1;~Peter_Stone1", "gender": "F;M;;M;M", "homepage": "https://carolinewang01.github.io/;http://cs.utexas.edu/~ishand;https://eladlieb.weebly.com;http://www.cs.utexas.edu/~pstone;https://raharrasy.github.io", "dblp": "230/4640;https://dblp.org/pers/d/Durugkar:Ishan;140/7197;s/PeterStone;242/9328", "google_scholar": "Dr2oUPgAAAAJ;eb81CnYAAAAJ;;qnwjcfAAAAAJ;3Z0YZ5QAAAAJ", "orcid": ";;;0000-0002-6795-420X;", "linkedin": "caroline-w-264b30bb/;;;;arrasy-rahman/", "or_profile": "~Caroline_Wang1;~Ishan_Durugkar1;~Elad_Liebman1;~Peter_Stone1;~Muhammad_Arrasy_Rahman1", "aff": "University of Texas at Austin;Sony AI Inc.;SparkCognition Research;University of Texas, Austin;University of Texas at Austin", "aff_domain": "utexas.edu;sony.com;sparkcognition.com;utexas.edu;cs.utexas.edu", "position": "PhD student;Researcher;Researcher;Full Professor;Postdoc", "bibtex": "@inproceedings{\nwang2024nagent,\ntitle={N-agent Ad Hoc Teamwork},\nauthor={Caroline Wang and Arrasy Rahman and Ishan Durugkar and Elad Liebman and Peter Stone},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=q7TxGUWlhD}\n}", "github": "", "reviewers": "5dYr;eZ5A;hc6T;9xa8", "pdf_size": 1734155, "rating": "4;5;7;7", "confidence": "3;4;4;4", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "2;3;4;4", "wc_summary": "112;106;52;59", "wc_strengths": "49;63;94;47", "wc_weaknesses": "296;111;171;206", "wc_questions": "379;5;91;44", "wc_limitations": "9;1;40;30", "wc_review": "845;286;448;386", "wc_reply_reviewers": "291;33;294;36", "wc_reply_authors": "2845;36;826;14", "reply_reviewers": "2;1;3;1", "reply_authors": "7;2;5;2", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 82.25, 26.947866334832522 ], "wc_strengths_avg": [ 63.25, 18.793283374652763 ], "wc_weaknesses_avg": [ 196.0, 66.9888050348713 ], "wc_questions_avg": [ 129.75, 147.09074579999927 ], "wc_limitations_avg": [ 20.0, 15.668439615992398 ], "wc_review_avg": [ 491.25, 212.25853928640893 ], "wc_reply_reviewers_avg": [ 163.5, 129.0087206354671 ], "wc_reply_authors_avg": [ 930.25, 1152.8587023135142 ], "reply_reviewers_avg": [ 1.75, 0.82915619758885 ], "reply_authors_avg": [ 4.0, 2.1213203435596424 ], "replies_avg": [ 31, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.7777777777777777, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15312500148389510565&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "email": "utexas.edu;sony.com;sparkcognition.com;utexas.edu;cs.utexas.edu", "author_num": 5, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "University of Texas at Austin;Sony AI Inc.;SparkCognition", "aff_unique_dep": ";;Research", "aff_unique_url": "https://www.utexas.edu;https://www.sony.ai;https://www.sparkcognition.com", "aff_unique_abbr": "UT Austin;Sony AI;SparkCognition", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Austin;", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "United States;Japan" }, { "title": "A Simulation Benchmark for Autonomous Racing with Large-Scale Human Data", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97499", "id": "q9HIe2EUjf", "proceeding": "", "pdf": "https://openreview.net/pdf?id=q9HIe2EUjf", "openreview": "https://openreview.net/forum?id=q9HIe2EUjf", "poster": "/media/PosterPDFs/NeurIPS%202024/97499.png?t=1732140345.0545993", "project": "", "author_site": "Adrian Remonda, Nicklas Hansen, Ayoub Raji, Nicola Musiu, Marko Bertogna, Eduardo Veas, Xiaolong Wang", "tldr": "", "abstract": "Despite the availability of international prize-money competitions, scaled vehicles, and simulation environments, research on autonomous racing and the control of sports cars operating close to the limit of handling has been limited by the high costs of vehicle acquisition and management, as well as the limited physics accuracy of open-source simulators. In this paper, we propose a racing simulation platform based on the simulator Assetto Corsa to test, validate, and benchmark autonomous driving algorithms, including reinforcement learning (RL) and classical Model Predictive Control (MPC), in realistic and challenging scenarios. Our contributions include the development of this simulation platform, several state-of-the-art algorithms tailored to the racing environment, and a comprehensive dataset collected from human drivers. Additionally, we evaluate algorithms in the offline RL setting. All the necessary code (including environment and benchmarks), working examples, and datasets are publicly released and can be found at: https://github.com/dasGringuen/assetto_corsa_gym.", "keywords": "Reinforcement Learning;Benchmark;Simulation;Autonomous Racing", "primary_area": "", "supplementary_material": "", "author": "Adrian Remonda;Nicklas Hansen;Ayoub Raji;Nicola Musiu;Marko Bertogna;Eduardo E. Veas;Xiaolong Wang", "authorids": "~Adrian_Remonda1;~Nicklas_Hansen1;~Ayoub_Raji1;~Nicola_Musiu1;~Marko_Bertogna1;~Eduardo_E._Veas1;~Xiaolong_Wang3", "gender": "M;Non-Binary;M;M;M;M;M", "homepage": ";https://nicklashansen.github.io;;;https://hipert.unimore.it/people/marko/;;https://xiaolonw.github.io/", "dblp": "290/8669;258/0744.html;;;;46/2797;91/952-4", "google_scholar": "5yRHUUcAAAAJ;OFtDgzwAAAAJ;https://scholar.google.it/citations?user=cxY2wrIAAAAJ;;ZRrTqs8AAAAJ;-78yV4YAAAAJ;Y8O9N_0AAAAJ", "orcid": ";0000-0001-9897-4003;0000-0003-4188-8854;;0000-0003-2115-4853;;", "linkedin": "adrian-remonda-46678735/;ncklas;ayoub-raji/;nicola-musiu-710910181/;marko-bertogna-19bbb99/;;", "or_profile": "~Adrian_Remonda1;~Nicklas_Hansen1;~Ayoub_Raji1;~Nicola_Musiu1;~Marko_Bertogna1;~Eduardo_E._Veas1;~Xiaolong_Wang3", "aff": "Technische Universit\u00e4t Graz;University of California, San Diego;University of Modena and Reggio Emilia;university of modena and reggio emilia;University of Modena and Reggio Emilia;Technische Universit\u00e4t Graz;University of California, San Diego", "aff_domain": "tugraz.at;ucsd.edu;unimore.it;unimore.it;unimore.it;tugraz.at;ucsd.edu", "position": "PhD student;PhD student;PhD student;PhD student;Full Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nremonda2024a,\ntitle={A Simulation Benchmark for Autonomous Racing with Large-Scale Human Data},\nauthor={Adrian Remonda and Nicklas Hansen and Ayoub Raji and Nicola Musiu and Marko Bertogna and Eduardo E. Veas and Xiaolong Wang},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=q9HIe2EUjf}\n}", "github": "", "reviewers": "Yg25;rG1V;7WzT", "pdf_size": 8161896, "rating": "6;6;8", "confidence": "4;4;4", "wc_summary_and_contributions": "79;94;132", "wc_strengths": "17;2;70", "wc_improvement": "151;5;122", "wc_limitations": "168;1;7", "wc_correctness": "8;6;119", "wc_clarity": "6;7;59", "wc_relation_to_prior_work": "22;6;15", "wc_documentation": "15;10;23", "wc_additional_feedback": "1;1;1", "wc_review": "467;132;548", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "rating_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.0 ], "wc_summary_and_contributions_avg": [ 101.66666666666667, 22.305953365762143 ], "wc_strengths_avg": [ 29.666666666666668, 29.169999809545573 ], "wc_improvement_avg": [ 92.66666666666667, 63.11013301276498 ], "wc_limitations_avg": [ 58.666666666666664, 77.34913631637323 ], "wc_correctness_avg": [ 44.333333333333336, 52.803619404565644 ], "wc_clarity_avg": [ 24.0, 24.752104287649296 ], "wc_relation_to_prior_work_avg": [ 14.333333333333334, 6.548960901462833 ], "wc_documentation_avg": [ 16.0, 5.354126134736337 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 382.3333333333333, 180.07467586778796 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11428669288494413715&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 6, "email": "tugraz.at;ucsd.edu;unimore.it;unimore.it;unimore.it;tugraz.at;ucsd.edu", "author_num": 7, "aff_unique_index": "0;1;2;2;2;0;1", "aff_unique_norm": "Technische Universit\u00e4t Graz;University of California, San Diego;University of Modena and Reggio Emilia", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tugraz.at;https://www.ucsd.edu;https://www.unimore.it", "aff_unique_abbr": "TU Graz;UCSD;", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";San Diego", "aff_country_unique_index": "0;1;2;2;2;0;1", "aff_country_unique": "Austria;United States;Italy" }, { "title": "FlexPlanner: Flexible 3D Floorplanning via Deep Reinforcement Learning in Hybrid Action Space with Multi-Modality Representation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93514", "id": "q9RLsvYOB3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=q9RLsvYOB3", "openreview": "https://openreview.net/forum?id=q9RLsvYOB3", "poster": "/media/PosterPDFs/NeurIPS%202024/93514.png?t=1731069372.123753", "project": "", "author_site": "Ruizhe Zhong, Xingbo Du, Shixiong Kai, Zhentao Tang, Siyuan Xu, Jianye Hao, Mingxuan Yuan, Junchi Yan", "tldr": "", "abstract": "In the Integrated Circuit (IC) design flow, floorplanning (FP) determines the position and shape of each block. Serving as a prototype for downstream tasks, it is critical and establishes the upper bound of the final PPA (Power, Performance, Area). However, with the emergence of 3D IC with stacked layers, existing methods are not flexible enough to handle the versatile constraints. Besides, they typically face difficulties in aligning the cross-die modules in 3D ICs due to their heuristic representations, which could potentially result in severe data transfer failures. To address these issues, we propose FlexPlanner, a flexible learning-based method in hybrid action space with multi-modality representation to simultaneously handle position, aspect ratio, and alignment of blocks. To our best knowledge, FlexPlanner is the first learning-based approach to discard heuristic-based search in the 3D FP task. Thus, the solution space is not limited by the heuristic floorplanning representation, allowing for significant improvements in both wirelength and alignment scores. Specifically, FlexPlanner models 3D FP based on multi-modalities, including vision, graph, and sequence. To address the non-trivial heuristic-dependent issue, we design a sophisticated policy network with hybrid action space and asynchronous layer decision mechanism that allow for determining the versatile properties of each block. Experiments on public benchmarks MCNC and GSRC show the effectiveness. We significantly improve the alignment score from 0.474 to 0.940 and achieve an average reduction of 16% in wirelength. Moreover, our method also demonstrates zero-shot transferability on unseen circuits.", "keywords": "Floorplanning;3D Floorplanning;EDA;Reinforcement Learning", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Ruizhe Zhong;Xingbo Du;Shixiong Kai;Zhentao Tang;Siyuan Xu;Jianye HAO;Mingxuan Yuan;Junchi Yan", "authorids": "~Ruizhe_Zhong1;~Xingbo_Du1;~Shixiong_Kai1;~Zhentao_Tang1;~Siyuan_Xu5;~Jianye_HAO1;~Mingxuan_Yuan1;~Junchi_Yan2", "gender": "M;M;M;M;M;M;M;M", "homepage": ";https://deepopo.github.io;;;;http://www.icdai.org/jianye.html;;http://thinklab.sjtu.edu.cn/", "dblp": "335/1752;246/3079;;195/1259.html;;21/7664.html;74/2356;60/7949.html", "google_scholar": "rE0M3EoAAAAJ;7NfbVboAAAAJ;https://scholar.google.com.hk/citations?hl=zh-CN;;;;https://scholar.google.com/citations?hl=en;ga230VoAAAAJ", "orcid": ";0000-0003-2390-8188;;0000-0002-2481-4119;0000-0001-6239-6774;0000-0002-0422-8235;0000-0002-2236-8784;0000-0001-9639-7679", "linkedin": ";;;;siyuanxu1991/;;;", "or_profile": "~Ruizhe_Zhong1;~Xingbo_Du1;~Shixiong_Kai1;~Zhentao_Tang1;~Siyuan_Xu5;~Jianye_HAO1;~Mingxuan_Yuan1;~Junchi_Yan1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Huawei Technologies Ltd.;Huawei Technologies Ltd.;Huawei Technologies Ltd.;Tianjin University;Huawei Technologies Ltd.;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;huawei.com;huawei.com;huawei.com;tju.edu.cn;huawei.com;sjtu.edu.cn", "position": "PhD student;PhD student;Researcher;Researcher;Researcher;Associate Professor;Researcher;Full Professor", "bibtex": "@inproceedings{\nzhong2024flexplanner,\ntitle={FlexPlanner: Flexible 3D Floorplanning via Deep Reinforcement Learning in Hybrid Action Space with Multi-Modality Representation},\nauthor={Ruizhe Zhong and Xingbo Du and Shixiong Kai and Zhentao Tang and Siyuan Xu and Jianye HAO and Mingxuan Yuan and Junchi Yan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=q9RLsvYOB3}\n}", "github": "", "reviewers": "jFwz;pH9C;5TLE;3CLd", "pdf_size": 1786430, "rating": "5;5;5;7", "confidence": "2;2;3;3", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;2;3;3", "wc_summary": "69;50;73;86", "wc_strengths": "73;52;39;103", "wc_weaknesses": "150;241;49;10", "wc_questions": "4;2;45;23", "wc_limitations": "1;9;1;3", "wc_review": "297;354;207;225", "wc_reply_reviewers": "14;0;0;13", "wc_reply_authors": "40;0;0;14", "reply_reviewers": "1;0;0;1", "reply_authors": "2;1;1;2", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 2.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 69.5, 12.893796958227627 ], "wc_strengths_avg": [ 66.75, 24.190649019817553 ], "wc_weaknesses_avg": [ 112.5, 90.07913187858773 ], "wc_questions_avg": [ 18.5, 17.356554957709783 ], "wc_limitations_avg": [ 3.5, 3.278719262151 ], "wc_review_avg": [ 270.75, 58.687200478468895 ], "wc_reply_reviewers_avg": [ 6.75, 6.7592529172978875 ], "wc_reply_authors_avg": [ 13.5, 16.332482971061076 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Z4eqDO4ZkVoJ:scholar.google.com/&scioq=FlexPlanner:+Flexible+3D+Floorplanning+via+Deep+Reinforcement+Learning+in+Hybrid+Action+Space+with+Multi-Modality+Representation&hl=en&as_sdt=0,33", "gs_version_total": 2, "email": "sjtu.edu.cn;sjtu.edu.cn;huawei.com;huawei.com;huawei.com;tju.edu.cn;huawei.com;sjtu.edu.cn", "author_num": 8, "aff_unique_index": "0;0;1;1;1;2;1;0", "aff_unique_norm": "Shanghai Jiao Tong University;Huawei;Tianjin University", "aff_unique_dep": ";Huawei Technologies;", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.huawei.com;http://www.tju.edu.cn", "aff_unique_abbr": "SJTU;Huawei;TJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Small steps no more: Global convergence of stochastic gradient bandits for arbitrary learning rates", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93513", "id": "q9dKv1AK6l", "proceeding": "", "pdf": "https://openreview.net/pdf?id=q9dKv1AK6l", "openreview": "https://openreview.net/forum?id=q9dKv1AK6l", "poster": "", "project": "", "author_site": "Jincheng Mei, Bo Dai, Alekh Agarwal, Sharan Vaswani, Anant Raj, Csaba Szepesvari, Dale Schuurmans", "tldr": "", "abstract": "We provide a new understanding of the stochastic gradient bandit algorithm by showing that it converges to a globally optimal policy almost surely using \\emph{any} constant learning rate. This result demonstrates that the stochastic gradient algorithm continues to balance exploration and exploitation appropriately even in scenarios where standard smoothness and noise control assumptions break down. The proofs are based on novel findings about action sampling rates and the relationship between cumulative progress and noise, and extend the current understanding of how simple stochastic gradient methods behave in bandit settings.", "keywords": "stochastic gradient bandit;arbitrary stepsize;global convergence", "primary_area": "bandits", "supplementary_material": "", "author": "Jincheng Mei;Bo Dai;Alekh Agarwal;Sharan Vaswani;Anant Raj;Csaba Szepesvari;Dale Schuurmans", "authorids": "~Jincheng_Mei1;~Bo_Dai1;~Alekh_Agarwal2;~Sharan_Vaswani1;~Anant_Raj2;~Csaba_Szepesvari1;~Dale_Schuurmans1", "gender": "M;;M;M;;M;", "homepage": "https://jinchengmei.github.io;https://bo-dai.github.io/;https://alekhagarwal.net;http://vaswanis.github.io;;https://sites.ualberta.ca/~szepesva/;", "dblp": "149/1408;64/2903;;136/5916;;http://dblp.uni-trier.de/pers/hd/s/Szepesv=aacute=ri:Csaba;", "google_scholar": ";TIKl_foAAAAJ;9nnDvooAAAAJ;https://scholar.google.ca/citations?user=bDb2zWwAAAAJ;;https://scholar.google.ca/citations?user=zvC19mQAAAAJ;", "orcid": ";0009-0002-8070-574X;;;;;", "linkedin": ";;;sharan-vaswani-05b8ab35/;;csaba-szepesvari-09376b1?trk=hp-identity-name;", "or_profile": "~Jincheng_Mei1;~Bo_Dai1;~Alekh_Agarwal2;~Sharan_Vaswani1;~Anant_Raj2;~Csaba_Szepesvari1;~Dale_Schuurmans1", "aff": "Google DeepMind;Google Brain;Google;Simon Fraser University;;Google DeepMind;", "aff_domain": "google.com;google.com;google.com;sfu.ca;;google.com;", "position": "Research Scientist;Research Scientist;Researcher;Assistant Professor;;Research Scientist;", "bibtex": "@inproceedings{\nmei2024small,\ntitle={Small steps no more: Global convergence of stochastic gradient bandits for arbitrary learning rates},\nauthor={Jincheng Mei and Bo Dai and Alekh Agarwal and Sharan Vaswani and Anant Raj and Csaba Szepesvari and Dale Schuurmans},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=q9dKv1AK6l}\n}", "github": "", "reviewers": "4TYZ;jZkA;RHGg;qHxg;E4oN", "pdf_size": 588615, "rating": "5;5;5;6;8", "confidence": "4;4;2;4;3", "soundness": "3;3;2;3;3", "novelty": "3;3;2;3;3", "presentation": "3;4;2;3;3", "wc_summary": "45;44;27;59;56", "wc_strengths": "71;125;40;33;119", "wc_weaknesses": "113;184;40;44;140", "wc_questions": "57;163;20;120;227", "wc_limitations": "79;10;6;9;1", "wc_review": "365;526;133;265;543", "wc_reply_reviewers": "22;20;13;29;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.8, 1.16619037896906 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 46.2, 11.267652816802618 ], "wc_strengths_avg": [ 77.6, 38.489479081951735 ], "wc_weaknesses_avg": [ 104.2, 55.62876953519644 ], "wc_questions_avg": [ 117.4, 73.80135500111092 ], "wc_limitations_avg": [ 21.0, 29.168476134347504 ], "wc_review_avg": [ 366.4, 155.83273083662496 ], "wc_reply_reviewers_avg": [ 16.8, 9.826494797230596 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.12862393885688164, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3896846791080078422&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "google.com;google.com;google.com;sfu.ca;;google.com;", "author_num": 7, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Google;Simon Fraser University", "aff_unique_dep": "Google DeepMind;", "aff_unique_url": "https://deepmind.com;https://www.sfu.ca", "aff_unique_abbr": "DeepMind;SFU", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;1;2;0", "aff_country_unique": "United Kingdom;United States;Canada" }, { "title": "Stealth edits to large language models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93512", "id": "qAP6RyYIJc", "proceeding": "", "pdf": "https://openreview.net/pdf?id=qAP6RyYIJc", "openreview": "https://openreview.net/forum?id=qAP6RyYIJc", "poster": "/media/PosterPDFs/NeurIPS%202024/93512.png?t=1733147693.8895829", "project": "", "author_site": "Oliver Sutton, Qinghua Zhou, Wei Wang, Desmond Higham, Alexander N Gorban, Alexander Bastounis, Ivan Tyukin", "tldr": "", "abstract": "We reveal the theoretical foundations of techniques for editing large language models, and present new methods which can do so without requiring retraining. Our theoretical insights show that a single metric (a measure of the intrinsic dimension of the model's features) can be used to assess a model's editability and reveals its previously unrecognised susceptibility to malicious *stealth attacks*. This metric is fundamental to predicting the success of a variety of editing approaches, and reveals new bridges between disparate families of editing methods. We collectively refer to these as *stealth editing* methods, because they directly update a model's weights to specify its response to specific known hallucinating prompts without affecting other model behaviour. By carefully applying our theoretical insights, we are able to introduce a new *jet-pack* network block which is optimised for highly selective model editing, uses only standard network operations, and can be inserted into existing networks. We also reveal the vulnerability of language models to stealth attacks: a small change to a model's weights which fixes its response to a single attacker-chosen prompt. Stealth attacks are computationally simple, do not require access to or knowledge of the model's training data, and therefore represent a potent yet previously unrecognised threat to redistributed foundation models. Extensive experimental results illustrate and support our methods and their theoretical underpinnings. Demos and source code are available at https://github.com/qinghua-zhou/stealth-edits.", "keywords": "large language models;stealth attacks;memory editing", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Oliver Sutton;Qinghua Zhou;Wei Wang;Desmond Higham;Alexander N. Gorban;Alexander Bastounis;Ivan Y Tyukin", "authorids": "~Oliver_Sutton1;~Qinghua_Zhou1;~Wei_Wang113;~Desmond_Higham1;~Alexander_N._Gorban2;~Alexander_Bastounis1;~Ivan_Y_Tyukin1", "gender": ";;M;;M;M;", "homepage": "https://oliversutton.info;;;https://www.maths.ed.ac.uk/~dhigham/;;;https://www.kcl.ac.uk/people/ivan-tyukin-1", "dblp": "203/4297;;;74/2160;45/3050.html;154/6767;87/1262", "google_scholar": "23pAfUcAAAAJ;;https://scholar.google.com/citations?hl=en;https://scholar.google.co.uk/citations?user=DHQy3wcHP4kC;https://scholar.google.com/citations?hl=en;;https://scholar.google.co.uk/citations?user=DFU2e3kAAAAJ", "orcid": "0000-0003-0184-4371;;;0000-0002-6635-3461;0000-0001-6224-1430;0000-0002-2867-4635;0000-0002-7359-7966", "linkedin": ";;williamweiwang/?originalSubdomain=uk;;alexander-gorban-4544597/;;", "or_profile": "~Oliver_Sutton1;~Qinghua_Zhou1;~Wei_Wang113;~Desmond_Higham1;~Alexander_N._Gorban2;~Alexander_Bastounis1;~Ivan_Y_Tyukin1", "aff": "King's College London, University of London;;University of Leicester;University of Edinburgh;University of Leicester: Leicester, Leicestershire, GB;University of Leicester;University of Leicester", "aff_domain": "kcl.ac.uk;;le.ac.uk;ed.ac.uk;le.ac.uk;le.ac.uk;le.ac.uk", "position": "Postdoc;;PhD student;Full Professor;Professor (Mathematics);Lecturer;Full Professor", "bibtex": "@inproceedings{\nsutton2024stealth,\ntitle={Stealth edits to large language models},\nauthor={Oliver Sutton and Qinghua Zhou and Wei Wang and Desmond Higham and Alexander N. Gorban and Alexander Bastounis and Ivan Y Tyukin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=qAP6RyYIJc}\n}", "github": "", "reviewers": "wwKs;jzRE;w4Ct;ypKH", "pdf_size": 9280615, "rating": "6;6;7;7", "confidence": "4;3;4;4", "soundness": "3;3;4;3", "novelty": "2;3;4;3", "presentation": "4;4;3;3", "wc_summary": "53;223;408;137", "wc_strengths": "56;162;92;64", "wc_weaknesses": "32;460;88;33", "wc_questions": "464;115;38;56", "wc_limitations": "23;19;85;1", "wc_review": "628;979;711;291", "wc_reply_reviewers": "249;533;12;14", "wc_reply_authors": "295;629;0;0", "reply_reviewers": "1;2;1;1", "reply_authors": "2;3;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 205.25, 131.58718592628995 ], "wc_strengths_avg": [ 93.5, 41.74625731727337 ], "wc_weaknesses_avg": [ 153.25, 178.5460374805333 ], "wc_questions_avg": [ 168.25, 173.11033331375685 ], "wc_limitations_avg": [ 32.0, 31.701734968294716 ], "wc_review_avg": [ 652.25, 245.61491709584743 ], "wc_reply_reviewers_avg": [ 202.0, 214.0175226470954 ], "wc_reply_authors_avg": [ 231.0, 259.4330356758753 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4947682709369484468&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "kcl.ac.uk;;le.ac.uk;ed.ac.uk;le.ac.uk;le.ac.uk;le.ac.uk", "author_num": 7, "aff_unique_index": "0;1;2;1;1;1", "aff_unique_norm": "King's College London;University of Leicester;University of Edinburgh", "aff_unique_dep": ";;", "aff_unique_url": "https://www.kcl.ac.uk;https://www.leicester.ac.uk;https://www.ed.ac.uk", "aff_unique_abbr": "KCL;Leicester;Edinburgh", "aff_campus_unique_index": "1", "aff_campus_unique": ";Leicester", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "FouRA: Fourier Low-Rank Adaptation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93511", "id": "qCJ1dq5M7N", "proceeding": "", "pdf": "https://openreview.net/pdf?id=qCJ1dq5M7N", "openreview": "https://openreview.net/forum?id=qCJ1dq5M7N", "poster": "/media/PosterPDFs/NeurIPS%202024/93511.png?t=1733505236.5995884", "project": "", "author_site": "Shubhankar Borse, Shreya Kadambi, Nilesh Pandey, Kartikeya Bhardwaj, Viswanath Ganapathy, Sweta Priyadarshi, Risheek Garrepalli, Rafael Esteves, Munawar Hayat, Fatih Porikli", "tldr": "", "abstract": "While Low-Rank Adaptation (LoRA) has proven beneficial for efficiently fine-tuning large models, LoRA fine-tuned text-to-image diffusion models lack diversity in the generated images, as the model tends to copy data from the observed training samples. This effect becomes more pronounced at higher values of adapter strength and for adapters with higher ranks which are fine-tuned on smaller datasets. To address these challenges, we present FouRA, a novel low-rank method that learns projections in the Fourier domain along with learning a flexible input-dependent adapter rank selection strategy. Through extensive experiments and analysis, we show that FouRA successfully solves the problems related to data copying and distribution collapse while significantly improving the generated image quality. We demonstrate that FouRA enhances the generalization of fine-tuned models thanks to its adaptive rank selection. We further show that the learned projections in the frequency domain are decorrelated and prove effective when merging multiple adapters. While FouRA is motivated for vision tasks, we also demonstrate its merits for language tasks on commonsense reasoning and GLUE benchmarks.", "keywords": "Low Rank Adapters;Fourier Transform;Generative Models", "primary_area": "generative_models", "supplementary_material": "", "author": "Shubhankar Borse;Shreya Kadambi;Nilesh Prasad Pandey;Kartikeya Bhardwaj;Viswanath Ganapathy;Sweta Priyadarshi;Risheek Garrepalli;Rafael Esteves;Munawar Hayat;Fatih Porikli", "authorids": "~Shubhankar_Borse1;~Shreya_Kadambi1;~Nilesh_Prasad_Pandey1;~Kartikeya_Bhardwaj3;~Viswanath_Ganapathy1;~Sweta_Priyadarshi1;~Risheek_Garrepalli1;~Rafael_Esteves1;~Munawar_Hayat2;~Fatih_Porikli2", "gender": "M;F;M;M;M;F;M;M;;M", "homepage": ";https://www.linkedin.com/in/shreyakadambi/;;;;https://swetap24.github.io/;;;;https://www.porikli.com", "dblp": "289/7536;;;127/1347;;;222/9870;;;p/FatihMuratPorikli", "google_scholar": "ZsgWCyMAAAAJ;4LmQNNoAAAAJ;rE7JoXgAAAAJ;https://scholar.google.com/citations?view_op=list_works;jRHlJyQAAAAJ;NARpCjAAAAAJ;https://scholar.google.com/citations?hl=en;;;https://scholar.google.com.tw/citations?user=VpB8NZ8AAAAJ", "orcid": ";;0009-0002-1118-0345;;;;;;;0000-0002-1520-4466", "linkedin": ";;nppandey/;;;swetap24?challengeId=AQGMzFETnINffQAAAYcUzXG6WD6WDSSwgS4j4gT6ao891h_G4KrTlPKE2pkPiL_leWbDN2AtSQKK1PjDBURLf8nly4lV0eGrgQ&submissionId=e9e1eabe-2b6d-4f17-31f9-e433ea8e5379&challengeSource=AgGfbNQx1pNrogAAAYcUzb3NYOiudZwr-nF4B8KA_6LUKiuC9yPpCgtrjWs53EE&challegeType=AgHDYRQh1IpIcAAAAYcUzb3QDzsdLvPf8M_rxPE4IShUe32MmsNLdKw&memberId=AgHZgaGN5fshUQAAAYcUzb3Tp9BvaOimUG3He6tT3pOj5NA&recognizeDevice=AgEXSUNfCgNb1wAAAYcUzb3XLnpeH3c45N3CqsiEprut0PfrGbpS;risheek-garrepalli-20138a4a/;rafael-esteves-124353145/;;fatih-porikli-a95643/", "or_profile": "~Shubhankar_Borse1;~Shreya_Kadambi1;~Nilesh_Prasad_Pandey1;~Kartikeya_Bhardwaj3;~Viswanath_Ganapathy1;~Sweta_Priyadarshi1;~Risheek_Garrepalli1;~Rafael_Esteves1;~Munawar_Hayat2;~Fatih_Porikli2", "aff": "Qualcomm Inc, QualComm;Qualcomm Inc, QualComm;Qualcomm AI Research;Qualcomm AI Research;Qualcomm Inc, QualComm;Qualcomm Inc, QualComm;Qualcomm Inc, QualComm;Qualcomm Inc, QualComm;;QualComm", "aff_domain": "qti.qualcomm.com;qti.qualcomm.com;qti.qualcomm.com;qualcomm.com;qti.qualcomm.com;qti.qualcomm.com;qti.qualcomm.com;qti.qualcomm.com;;qualcomm.com", "position": "Deep Learning Research Engineer;Researcher;Researcher;Researcher;Researcher;Researcher;Researcher;Researcher;;Senior Director", "bibtex": "@inproceedings{\nborse2024foura,\ntitle={Fou{RA}: Fourier Low-Rank Adaptation},\nauthor={Shubhankar Borse and Shreya Kadambi and Nilesh Prasad Pandey and Kartikeya Bhardwaj and Viswanath Ganapathy and Sweta Priyadarshi and Risheek Garrepalli and Rafael Esteves and Munawar Hayat and Fatih Porikli},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=qCJ1dq5M7N}\n}", "github": "", "reviewers": "4zX8;R4Sn;AmEw;yqCK", "pdf_size": 50223084, "rating": "4;5;6;7", "confidence": "5;4;3;4", "soundness": "3;2;3;3", "novelty": "3;3;3;3", "presentation": "2;3;3;4", "wc_summary": "52;71;77;66", "wc_strengths": "39;100;64;72", "wc_weaknesses": "590;72;123;131", "wc_questions": "61;31;4;28", "wc_limitations": "1;6;11;33", "wc_review": "743;280;279;330", "wc_reply_reviewers": "0;0;0;26", "wc_reply_authors": "0;0;0;22", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;2", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 66.5, 9.233092656309694 ], "wc_strengths_avg": [ 68.75, 21.76436307361187 ], "wc_weaknesses_avg": [ 229.0, 209.6485153775242 ], "wc_questions_avg": [ 31.0, 20.23610634484806 ], "wc_limitations_avg": [ 12.75, 12.214233500306108 ], "wc_review_avg": [ 408.0, 194.50835457635233 ], "wc_reply_reviewers_avg": [ 6.5, 11.258330249197702 ], "wc_reply_authors_avg": [ 5.5, 9.526279441628825 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.6324555320336758, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11546819527957622244&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 6, "email": "qti.qualcomm.com;qti.qualcomm.com;qti.qualcomm.com;qualcomm.com;qti.qualcomm.com;qti.qualcomm.com;qti.qualcomm.com;qti.qualcomm.com;;qualcomm.com", "author_num": 10, "aff_unique_index": "0;0;1;1;0;0;0;0;0", "aff_unique_norm": "Qualcomm Incorporated;Qualcomm", "aff_unique_dep": ";Qualcomm AI Research", "aff_unique_url": "https://www.qualcomm.com;https://www.qualcomm.com/research", "aff_unique_abbr": "Qualcomm;QAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Dynamic Neural Regeneration: Enhancing Deep Learning Generalization on Small Datasets", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93510", "id": "qCpCy0EQAJ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=qCpCy0EQAJ", "openreview": "https://openreview.net/forum?id=qCpCy0EQAJ", "poster": "/media/PosterPDFs/NeurIPS%202024/93510.png?t=1731492614.5309143", "project": "", "author_site": "Vijaya Raghavan Ramkumar, Elahe Arani, Bahram Zonooz", "tldr": "", "abstract": "The efficacy of deep learning techniques is contingent upon access to large volumes of data (labeled or unlabeled). However, in practical domains such as medical applications, data availability is often limited. This presents a significant challenge: How can we effectively train deep neural networks on relatively small datasets while improving generalization? Recent works have explored evolutionary or iterative training paradigms, which reinitialize a subset of parameters to enhance generalization performance for small datasets. However, these methods typically rely on randomly selected parameter subsets and maintain fixed masks throughout training, potentially leading to suboptimal outcomes. Inspired by neurogenesis in the brain, we propose a novel iterative training framework, Dynamic Neural Regeneration (DNR), that employs a data-aware dynamic masking scheme to eliminate redundant connections by estimating their significance. This approach increases the model's capacity for further learning through random weight reinitialization. Experimental results demonstrate that our approach outperforms existing methods in accuracy and robustness, highlighting its potential for real-world applications where data collection is challenging.", "keywords": "Small datasets Generalization;Overfitting;Iterative training;Neurogenesis", "primary_area": "machine_vision", "supplementary_material": "", "author": "Vijaya Raghavan T Ramkumar;Elahe Arani;Bahram Zonooz", "authorids": "~Vijaya_Raghavan_T_Ramkumar1;~Elahe_Arani1;~Bahram_Zonooz1", "gender": ";F;M", "homepage": ";https://sites.google.com/view/elahe-arani;https://sites.google.com/view/bahramzonooz", "dblp": ";;250/9573", "google_scholar": ";e_I_v6cAAAAJ;", "orcid": ";0000-0002-0952-7007;", "linkedin": ";elahe-arani-630870b2/;", "or_profile": "~Vijaya_Raghavan_T_Ramkumar1;~Elahe_Arani1;~Bahram_Zonooz1", "aff": ";Wayve Technologies Ltd;Eindhoven University of Technology", "aff_domain": ";wayve.ai;tue.nl", "position": ";Head of AI Research;Assistant Professor", "bibtex": "@inproceedings{\nramkumar2024dynamic,\ntitle={Dynamic Neural Regeneration: Enhancing Deep Learning Generalization on Small Datasets},\nauthor={Vijaya Raghavan T Ramkumar and Elahe Arani and Bahram Zonooz},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=qCpCy0EQAJ}\n}", "github": "", "reviewers": "Vrf1;DwbC;cy5M;ZzXp", "pdf_size": 611524, "rating": "5;5;6;7", "confidence": "4;4;3;3", "soundness": "3;3;3;3", "novelty": "2;3;2;3", "presentation": "3;2;3;4", "wc_summary": "57;61;55;76", "wc_strengths": "66;47;104;67", "wc_weaknesses": "275;22;278;62", "wc_questions": "147;484;83;50", "wc_limitations": "5;1;17;15", "wc_review": "550;615;537;270", "wc_reply_reviewers": "0;157;26;0", "wc_reply_authors": "67;181;26;0", "reply_reviewers": "0;1;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 62.25, 8.227241335952167 ], "wc_strengths_avg": [ 71.0, 20.65187642806338 ], "wc_weaknesses_avg": [ 159.25, 118.10456172392327 ], "wc_questions_avg": [ 191.0, 172.72087308718653 ], "wc_limitations_avg": [ 9.5, 6.689544080129826 ], "wc_review_avg": [ 493.0, 132.0965555947618 ], "wc_reply_reviewers_avg": [ 45.75, 65.10136327297609 ], "wc_reply_authors_avg": [ 68.5, 69.2044073740972 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.9045340337332909, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:6gn3UdSo6IkJ:scholar.google.com/&scioq=Dynamic+Neural+Regeneration:+Enhancing+Deep+Learning+Generalization+on+Small+Datasets&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": ";wayve.ai;tue.nl", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Wayve Technologies;Eindhoven University of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.wayvetechnologies.com;https://www.tue.nl", "aff_unique_abbr": "Wayve;TU/e", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United Kingdom;Netherlands" }, { "title": "Spec-Gaussian: Anisotropic View-Dependent Appearance for 3D Gaussian Splatting", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93509", "id": "qDfPSWXSLt", "proceeding": "", "pdf": "https://openreview.net/pdf?id=qDfPSWXSLt", "openreview": "https://openreview.net/forum?id=qDfPSWXSLt", "poster": "/media/PosterPDFs/NeurIPS%202024/93509.png?t=1730364116.9787543", "project": "", "author_site": "Ziyi Yang, Xinyu Gao, Yang-Tian Sun, Yihua Huang, Xiaoyang Lyu, Wen Zhou, Shaohui Jiao, Xiaojuan Qi, Xiaogang Jin", "tldr": "", "abstract": "The recent advancements in 3D Gaussian splatting (3D-GS) have not only facilitated real-time rendering through modern GPU rasterization pipelines but have also attained state-of-the-art rendering quality. Nevertheless, despite its exceptional rendering quality and performance on standard datasets, 3D-GS frequently encounters difficulties in accurately modeling specular and anisotropic components. This issue stems from the limited ability of spherical harmonics (SH) to represent high-frequency information. To overcome this challenge, we introduce Spec-Gaussian, an approach that utilizes an anisotropic spherical Gaussian (ASG) appearance field instead of SH for modeling the view-dependent appearance of each 3D Gaussian. Additionally, we have developed a coarse-to-fine training strategy to improve learning efficiency and eliminate floaters caused by overfitting in real-world scenes. Our experimental results demonstrate that our method surpasses existing approaches in terms of rendering quality. Thanks to ASG, we have significantly improved the ability of 3D-GS to model scenes with specular and anisotropic components without increasing the number of 3D Gaussians. This improvement extends the applicability of 3D GS to handle intricate scenarios with specular and anisotropic surfaces.", "keywords": "Gaussian Splatting;specular highlights modeling;real-time rendering", "primary_area": "machine_vision", "supplementary_material": "/attachment/d8ff4bc7ce08fcc450e5e5c50974d1f9421a1be8.zip", "author": "Ziyi Yang;Xinyu Gao;Yang-Tian Sun;Yi-Hua Huang;Xiaoyang Lyu;Wen Zhou;Shaohui Jiao;XIAOJUAN QI;Xiaogang Jin", "authorids": "~Ziyi_Yang4;~Xinyu_Gao1;~Yang-Tian_Sun1;~Yi-Hua_Huang1;~Xiaoyang_Lyu1;~Wen_Zhou10;~Shaohui_Jiao1;~XIAOJUAN_QI2;~Xiaogang_Jin1", "gender": "M;M;M;M;;;;F;M", "homepage": "https://ingra14m.github.io/;;http://sunyangtian.github.io;https://yihua7.github.io/website/;;https://orcid.org/0009-0003-1464-2805;;https://xjqi.github.io/;http://www.cad.zju.edu.cn/home/jin/", "dblp": ";;261/9614.html;50/4147;;;;176/1445-1.html;36/3676-1", "google_scholar": "B0IyfqQAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?view_op=list_works;;;;bGn0uacAAAAJ;yryOvLwAAAAJ", "orcid": "0000-0002-9318-4704;0009-0007-1079-6451;;;;;;;", "linkedin": ";;;;;;;;", "or_profile": "~Ziyi_Yang4;~Xinyu_Gao1;~Yang-Tian_Sun1;~Yi-Hua_Huang1;~Xiaoyang_Lyu1;~Wen_Zhou10;~Shaohui_Jiao1;~XIAOJUAN_QI2;~Xiaogang_Jin1", "aff": "Zhejiang University;Zhejiang University;University of Hong Kong;University of Hong Kong;;;;University of Hong Kong;Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn;hku.hk;hku.hk;;;;hku.hk;zju.edu.cn", "position": "MS student;MS student;PhD student;PhD student;;;;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nyang2024specgaussian,\ntitle={Spec-Gaussian: Anisotropic View-Dependent Appearance for 3D Gaussian Splatting},\nauthor={Ziyi Yang and Xinyu Gao and Yang-Tian Sun and Yi-Hua Huang and Xiaoyang Lyu and Wen Zhou and Shaohui Jiao and XIAOJUAN QI and Xiaogang Jin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=qDfPSWXSLt}\n}", "github": "", "reviewers": "8TqS;2PwK;Ye8M;anMP", "pdf_size": 49508779, "rating": "5;6;6;6", "confidence": "4;4;5;4", "soundness": "3;3;3;3", "novelty": "3;3;3;2", "presentation": "3;3;3;1", "wc_summary": "75;68;91;86", "wc_strengths": "26;54;66;79", "wc_weaknesses": "213;90;470;369", "wc_questions": "288;62;4;2", "wc_limitations": "5;9;23;3", "wc_review": "607;283;654;539", "wc_reply_reviewers": "74;35;201;170", "wc_reply_authors": "172;0;341;81", "reply_reviewers": "2;1;2;1", "reply_authors": "2;1;2;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 80.0, 9.027735042633894 ], "wc_strengths_avg": [ 56.25, 19.57517560585345 ], "wc_weaknesses_avg": [ 285.5, 145.33495794198998 ], "wc_questions_avg": [ 89.0, 117.39250401963491 ], "wc_limitations_avg": [ 10.0, 7.810249675906654 ], "wc_review_avg": [ 520.75, 143.22425597642322 ], "wc_reply_reviewers_avg": [ 120.0, 67.82698577999763 ], "wc_reply_authors_avg": [ 148.5, 126.70536689501357 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 45, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10414053200316653061&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "zju.edu.cn;zju.edu.cn;hku.hk;hku.hk;;;;hku.hk;zju.edu.cn", "author_num": 9, "aff_unique_index": "0;0;1;1;1;0", "aff_unique_norm": "Zhejiang University;University of Hong Kong", "aff_unique_dep": ";", "aff_unique_url": "https://www.zju.edu.cn;https://www.hku.hk", "aff_unique_abbr": "ZJU;HKU", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Differentially Private Equivalence Testing for Continuous Distributions and Applications", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93508", "id": "qDuqp1nZZ6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=qDuqp1nZZ6", "openreview": "https://openreview.net/forum?id=qDuqp1nZZ6", "poster": "", "project": "", "author_site": "Or Sheffet, Daniel Omer", "tldr": "", "abstract": "We present the first algorithm for testing equivalence between two continuous distributions using differential privacy (DP). Our algorithm is a private version of the algorithm of Diakonikolas et al. \nThe algorithm of Diakonikolas et al uses the data itself to repeatedly discretize the real line so that --- when the two distributions are far apart in ${\\cal A}_k$-norm --- one of the discretized distributions exhibits large $L_2$-norm difference; and upon repeated sampling such large gap would be detected. Designing its private analogue poses two difficulties. First, our DP algorithm can not resample new datapoints as a change to a single datapoint may lead to a very large change in the descretization of the real line. In contrast, the (sorted) index of the discretization point changes only by $1$ between neighboring instances, and so we use a novel algorithm that set the discretization points using random Bernoulli noise, resulting in only a few buckets being affected under the right coupling. Second, our algorithm, which doesn't resample data, requires we also revisit the utility analysis of the original algorithm and prove its correctness w.r.t. the original sorted data; a problem we tackle using sampling a subset of Poisson-drawn size from each discretized bin. Lastly, since any distribution can be reduced to a continuous distribution, our algorithm is successfully carried to multiple other families of distributions and thus has numerous applications.", "keywords": "Differential Privacy;Equivalence Tester;Continuous Distributions", "primary_area": "privacy", "supplementary_material": "", "author": "Or Sheffet;Daniel Omer", "authorids": "~Or_Sheffet1;~Daniel_Omer1", "gender": "M;M", "homepage": "http://www.ualberta.ca/science/about-us/contact-us/faculty-directory/or-sheffet/;", "dblp": "35/3489;", "google_scholar": "https://scholar.google.com.tw/citations?user=Zp2LpwUAAAAJ;", "orcid": ";", "linkedin": ";daniel-omer-86733914a/", "or_profile": "~Or_Sheffet1;~Daniel_Omer1", "aff": "Bar Ilan University, Technion;Bar-Ilan University", "aff_domain": "biu.ac.il;biu.ac.il", "position": "Assistant Professor;MS student", "bibtex": "@inproceedings{\nsheffet2024differentially,\ntitle={Differentially Private Equivalence Testing for Continuous Distributions and Applications},\nauthor={Or Sheffet and Daniel Omer},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=qDuqp1nZZ6}\n}", "github": "", "reviewers": "ZYiV;XXUu;aFH2;3bTx", "pdf_size": 454220, "rating": "6;6;6;7", "confidence": "4;3;4;3", "soundness": "3;3;3;3", "novelty": "2;3;3;2", "presentation": "2;2;2;2", "wc_summary": "98;240;97;255", "wc_strengths": "86;54;36;94", "wc_weaknesses": "362;18;121;107", "wc_questions": "201;235;2;30", "wc_limitations": "50;7;1;5", "wc_review": "797;554;257;491", "wc_reply_reviewers": "11;12;0;5", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.0, 0.0 ], "wc_summary_avg": [ 172.5, 75.18809746229785 ], "wc_strengths_avg": [ 67.5, 23.553131426627754 ], "wc_weaknesses_avg": [ 152.0, 127.51666557748442 ], "wc_questions_avg": [ 117.0, 102.19344401672741 ], "wc_limitations_avg": [ 15.75, 19.891895334532606 ], "wc_review_avg": [ 524.75, 192.22691668962491 ], "wc_reply_reviewers_avg": [ 7.0, 4.847679857416329 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:0j28i8Dxk28J:scholar.google.com/&scioq=Differentially+Private+Equivalence+Testing+for+Continuous+Distributions+and+Applications&hl=en&as_sdt=0,33", "gs_version_total": 2, "email": "biu.ac.il;biu.ac.il", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Bar-Ilan University", "aff_unique_dep": "", "aff_unique_url": "https://www.biu.ac.il", "aff_unique_abbr": "BIU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Israel" }, { "title": "HydraLoRA: An Asymmetric LoRA Architecture for Efficient Fine-Tuning", "status": "Oral", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93507", "id": "qEpi8uWX3N", "proceeding": "", "pdf": "https://openreview.net/pdf?id=qEpi8uWX3N", "openreview": "https://openreview.net/forum?id=qEpi8uWX3N", "poster": "/media/PosterPDFs/NeurIPS%202024/93507.png?t=1731430439.2014446", "project": "", "author_site": "Chunlin Tian, Zhan Shi, Zhijiang Guo, Li Li, Cheng-Zhong Xu", "tldr": "", "abstract": "Adapting Large Language Models (LLMs) to new tasks through fine-tuning has been made more efficient by the introduction of Parameter-Efficient Fine-Tuning (PEFT) techniques, such as LoRA. However, these methods often underperform compared to full fine-tuning, particularly in scenarios involving complex datasets. This issue becomes even more pronounced in complex domains, highlighting the need for improved PEFT approaches that can achieve better performance. Through a series of experiments, we have uncovered two critical insights that shed light on the training and parameter inefficiency of LoRA. Building on these insights, we have developed HydraLoRA, a LoRA framework with an asymmetric structure that eliminates the need for domain expertise. Our experiments demonstrate that HydraLoRA outperforms other PEFT approaches, even those that rely on domain knowledge during the training and inference phases. Our anonymous codes are submitted with the paper and will be publicly available. Code is available: https://github.com/Clin0212/HydraLoRA.", "keywords": "Large Language Models;Efficient Fine-Tuning;Asymmetric Structure", "primary_area": "generative_models", "supplementary_material": "", "author": "Chunlin Tian;Zhan Shi;Zhijiang Guo;Li Li;Cheng-zhong Xu", "authorids": "~Chunlin_Tian1;~Zhan_Shi3;~Zhijiang_Guo2;~Li_Li10;~Cheng-zhong_Xu1", "gender": "M;M;M;M;", "homepage": "https://scholar.google.com/citations?user=2D1fd0QAAAAJ&hl=zh-CN;https://aleczhanshi.github.io/;https://cartus.github.io/;https://www.fst.um.edu.mo/personal/llili/;", "dblp": "194/2903;;43/6147;53/2189-64;", "google_scholar": "2D1fd0QAAAAJ;w2I-wNQAAAAJ;8b-u3icAAAAJ;uLzU3OcAAAAJ;", "orcid": "0009-0009-5220-1609;;;0000-0002-2044-8289;", "linkedin": ";;;;", "or_profile": "~Chunlin_Tian1;~Zhan_Shi3;~Zhijiang_Guo2;~Li_Li10;~Cheng-zhong_Xu1", "aff": "University of Macau;;University of Cambridge;University of Macau;", "aff_domain": "um.edu.mo;;cam.ac.uk;um.edu.mo;", "position": "PhD student;;Postdoc;Assistant Professor;", "bibtex": "@inproceedings{\ntian2024hydralora,\ntitle={HydraLo{RA}: An Asymmetric Lo{RA} Architecture for Efficient Fine-Tuning},\nauthor={Chunlin Tian and Zhan Shi and Zhijiang Guo and Li Li and Cheng-zhong Xu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=qEpi8uWX3N}\n}", "github": "", "reviewers": "dwiN;6P2g;vvtw;LBHQ", "pdf_size": 0, "rating": "5;6;7;8", "confidence": "4;4;5;4", "soundness": "2;2;4;4", "novelty": "2;3;3;4", "presentation": "2;1;3;3", "wc_summary": "48;65;90;169", "wc_strengths": "77;95;121;334", "wc_weaknesses": "80;215;88;250", "wc_questions": "58;24;99;78", "wc_limitations": "57;36;1;1", "wc_review": "320;435;399;832", "wc_reply_reviewers": "9;73;4;0", "wc_reply_authors": "25;273;44;0", "reply_reviewers": "1;2;1;0", "reply_authors": "2;3;2;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 1.0 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 93.0, 46.351914739307155 ], "wc_strengths_avg": [ 156.75, 103.523849909091 ], "wc_weaknesses_avg": [ 158.25, 75.32720292165374 ], "wc_questions_avg": [ 64.75, 27.63489641739227 ], "wc_limitations_avg": [ 23.75, 23.93089007956035 ], "wc_review_avg": [ 496.5, 198.1167585036662 ], "wc_reply_reviewers_avg": [ 21.5, 29.90401310861136 ], "wc_reply_authors_avg": [ 85.5, 109.37207138936338 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.2581988897471611, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6815670011177040330&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "um.edu.mo;;cam.ac.uk;um.edu.mo;", "author_num": 5, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Macau;University of Cambridge", "aff_unique_dep": ";", "aff_unique_url": "https://www.um.edu.mo;https://www.cam.ac.uk", "aff_unique_abbr": "UM;Cambridge", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Macau SAR;Cambridge", "aff_country_unique_index": "0;1;0", "aff_country_unique": "China;United Kingdom" }, { "title": "Watermarking Makes Language Models Radioactive", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93506", "id": "qGiZQb1Khm", "proceeding": "", "pdf": "https://openreview.net/pdf?id=qGiZQb1Khm", "openreview": "https://openreview.net/forum?id=qGiZQb1Khm", "poster": "", "project": "", "author_site": "Tom Sander, Pierre Fernandez, Alain Durmus, Matthijs Douze, Teddy Furon", "tldr": "", "abstract": "We investigate the radioactivity of text generated by large language models (LLM), \\ie whether it is possible to detect that such synthetic input was used to train a subsequent LLM.\nCurrent methods like membership inference or active IP protection either work only in settings where the suspected text is known or do not provide reliable statistical guarantees.\nWe discover that, on the contrary, it is possible to reliably determine if a language model was trained on synthetic data if that data is output by a watermarked LLM.\nOur new methods, specialized for radioactivity, detects with a provable confidence weak residuals of the watermark signal in the fine-tuned LLM.\nWe link the radioactivity contamination level to the following properties: the watermark robustness, its proportion in the training set, and the fine-tuning process.\nFor instance, if the suspect model is open-weight, we demonstrate that training on watermarked instructions can be detected with high confidence ($p$-value $< 10^{-5}$) even when as little as $5\\%$ of training text is watermarked.", "keywords": "Watermarking;Large Language Models;Membership Inference", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/1f8e528f4717242ed8e45f4bf6e102e262e849cc.zip", "author": "Tom Sander;Pierre Fernandez;Alain Oliviero Durmus;Matthijs Douze;Teddy Furon", "authorids": "~Tom_Sander1;~Pierre_Fernandez1;~Alain_Oliviero_Durmus1;~Matthijs_Douze1;~Teddy_Furon1", "gender": "M;M;;M;M", "homepage": ";https://pierrefdz.github.io/;https://research.facebook.com/people/douze-matthijs/;http://people.rennes.inria.fr/Teddy.Furon;", "dblp": ";309/5729;64/5801;00/3862;01/11275", "google_scholar": ";osCX1YQAAAAJ;yZmnFbkAAAAJ;https://scholar.google.com/citations?hl=fr;", "orcid": ";0000-0003-3890-2248;;0000-0002-1565-765X;", "linkedin": "tomsdr;pierrefdz/;;;", "or_profile": "~Tom_Sander1;~Pierre_Fernandez1;~Matthijs_Douze1;~Teddy_Furon1;~Alain_Durmus1", "aff": "\u00c9cole Polytechnique;Universit\u00e9 Rennes 1;Meta;INRIA;\u00c9cole Polytechnique", "aff_domain": "polytechnique.fr;univ-rennes1.fr;meta.com;inria.fr;polytechnique.fr", "position": "PhD student;PhD student;researcher;Researcher;Full Professor", "bibtex": "@inproceedings{\nsander2024watermarking,\ntitle={Watermarking Makes Language Models Radioactive},\nauthor={Tom Sander and Pierre Fernandez and Alain Oliviero Durmus and Matthijs Douze and Teddy Furon},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=qGiZQb1Khm}\n}", "github": "", "reviewers": "q8YZ;wG1j;JVYy;Y2ta;ZoLZ", "pdf_size": 994612, "rating": "4;5;6;6;8", "confidence": "3;4;4;4;4", "soundness": "2;3;3;3;4", "novelty": "2;2;3;3;4", "presentation": "3;2;2;3;4", "wc_summary": "78;82;74;150;90", "wc_strengths": "34;18;50;195;291", "wc_weaknesses": "164;18;212;461;312", "wc_questions": "4;149;157;2;70", "wc_limitations": "1;7;21;1;11", "wc_review": "281;274;514;809;774", "wc_reply_reviewers": "0;15;0;0;20", "wc_reply_authors": "40;15;38;0;17", "reply_reviewers": "0;1;0;0;1", "reply_authors": "2;2;2;1;2", "rating_avg": [ 5.8, 1.32664991614216 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 94.8, 28.102668912400475 ], "wc_strengths_avg": [ 117.6, 107.27273651771917 ], "wc_weaknesses_avg": [ 233.4, 148.08862211527259 ], "wc_questions_avg": [ 76.4, 67.20892797835715 ], "wc_limitations_avg": [ 8.2, 7.4404300950953095 ], "wc_review_avg": [ 530.4, 230.29077272005495 ], "wc_reply_reviewers_avg": [ 7.0, 8.717797887081348 ], "wc_reply_authors_avg": [ 22.0, 15.086417732516887 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.6784005252999681, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=775794417529302321&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 20, "email": "polytechnique.fr;univ-rennes1.fr;meta.com;inria.fr;polytechnique.fr", "author_num": 5, "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "Ecole Polytechnique;Universit\u00e9 Rennes 1;Meta;INRIA", "aff_unique_dep": ";;Meta Platforms, Inc.;", "aff_unique_url": "https://www.polytechnique.edu;https://www.univ-rennes1.fr;https://meta.com;https://www.inria.fr", "aff_unique_abbr": "X;UR1;Meta;INRIA", "aff_campus_unique_index": "1", "aff_campus_unique": ";Rennes", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "France;United States" }, { "title": "A Closer Look at the CLS Token for Cross-Domain Few-Shot Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93505", "id": "qIkYlfDZaI", "proceeding": "", "pdf": "https://openreview.net/pdf?id=qIkYlfDZaI", "openreview": "https://openreview.net/forum?id=qIkYlfDZaI", "poster": "/media/PosterPDFs/NeurIPS%202024/93505.png?t=1731731104.773561", "project": "", "author_site": "Yixiong Zou, Shuai Yi, Yuhua Li, Ruixuan Li", "tldr": "", "abstract": "Vision Transformer (ViT) has shown great power in learning from large-scale datasets. However, collecting sufficient data for expert knowledge is always difficult. To handle this problem, Cross-Domain Few-Shot Learning (CDFSL) has been proposed to transfer the source-domain knowledge learned from sufficient data to target domains where only scarce data is available. In this paper, we find an intriguing phenomenon neglected by previous works for the CDFSL task based on ViT: leaving the CLS token to random initialization, instead of loading source-domain trained parameters, could consistently improve target-domain performance. We then delve into this phenomenon for an interpretation. We find **the CLS token naturally absorbs domain information** due to the inherent structure of the ViT, which is represented as the low-frequency component in the Fourier frequency space of images. Based on this phenomenon and interpretation, we further propose a method for the CDFSL task to decouple the domain information in the CLS token during the source-domain training, and adapt the CLS token on the target domain for efficient few-shot learning. Extensive experiments on four benchmarks validate our rationale and state-of-the-art performance. Our codes are available at https://github.com/Zoilsen/CLS_Token_CDFSL.", "keywords": "Cross-Domain Few-Shot Learning;CLS Token;Vision Transformer", "primary_area": "machine_vision", "supplementary_material": "/attachment/0edb608e5ba1deff13035359839547d01c55065b.zip", "author": "Yixiong Zou;Shuai Yi;Yuhua Li;Ruixuan Li", "authorids": "~Yixiong_Zou1;~Shuai_Yi5;~Yuhua_Li2;~Ruixuan_Li1", "gender": ";;F;M", "homepage": ";;;http://idc.hust.edu.cn/rxli/index.html", "dblp": ";;79/5796-3;60/4429.html", "google_scholar": ";;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/scholar?q=ruixuan+li", "orcid": ";;;0000-0002-7791-5511", "linkedin": ";;;https://www.linkedin.cn/incareer/in/ruixuan-li-b367319", "or_profile": "~Yixiong_Zou1;~Shuai_Yi5;~Yuhua_Li2;~Ruixuan_Li1", "aff": ";;Huazhong University of Science and Technology;Huazhong University of Science and Technology", "aff_domain": ";;hust.edu.cn;hust.edu.cn", "position": ";;Full Professor;Full Professor", "bibtex": "@inproceedings{\nzou2024a,\ntitle={A Closer Look at the {CLS} Token for Cross-Domain Few-Shot Learning},\nauthor={Yixiong Zou and Shuai Yi and Yuhua Li and Ruixuan Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=qIkYlfDZaI}\n}", "github": "", "reviewers": "aYYr;PfB7;dboq;epY7", "pdf_size": 2027771, "rating": "5;6;7;7", "confidence": "5;4;5;4", "soundness": "3;2;4;3", "novelty": "2;3;3;3", "presentation": "3;3;4;3", "wc_summary": "125;44;67;92", "wc_strengths": "39;104;84;69", "wc_weaknesses": "556;113;153;69", "wc_questions": "43;634;29;84", "wc_limitations": "33;108;18;33", "wc_review": "796;1003;351;347", "wc_reply_reviewers": "94;223;25;68", "wc_reply_authors": "16;87;0;19", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;1;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 82.0, 30.074906483645133 ], "wc_strengths_avg": [ 74.0, 23.717082451262844 ], "wc_weaknesses_avg": [ 222.75, 194.68227320431617 ], "wc_questions_avg": [ 197.5, 252.82256623964562 ], "wc_limitations_avg": [ 48.0, 35.17811819867572 ], "wc_review_avg": [ 624.25, 284.8169368208288 ], "wc_reply_reviewers_avg": [ 102.5, 73.80548760085526 ], "wc_reply_authors_avg": [ 30.5, 33.41032774457623 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3034409580267927946&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": ";;hust.edu.cn;hust.edu.cn", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Huazhong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.hust.edu.cn", "aff_unique_abbr": "HUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Persistence Homology Distillation for Semi-supervised Continual Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93504", "id": "qInb7EUmxz", "proceeding": "", "pdf": "https://openreview.net/pdf?id=qInb7EUmxz", "openreview": "https://openreview.net/forum?id=qInb7EUmxz", "poster": "/media/PosterPDFs/NeurIPS%202024/93504.png?t=1731691678.2700427", "project": "", "author_site": "YanFan, Yu Wang, Pengfei Zhu, Dongyue Chen, Qinghua Hu", "tldr": "", "abstract": "Semi-supervised continual learning (SSCL) has attracted significant attention for addressing catastrophic forgetting in semi-supervised data. Knowledge distillation, which leverages data representation and pair-wise similarity, has shown significant potential in preserving information in SSCL. However, traditional distillation strategies often fail in unlabeled data with inaccurate or noisy information, limiting their efficiency in feature spaces undergoing substantial changes during continual learning. To address these limitations, we propose Persistence Homology Distillation (PsHD) to preserve intrinsic structural information that is insensitive to noise in semi-supervised continual learning. First, we capture the structural features using persistence homology by homological evolution across different scales in vision data, where the multi-scale characteristic established its stability under noise interference. Next, we propose a persistence homology distillation loss in SSCL and design an acceleration algorithm to reduce the computational cost of persistence homology in our module. Furthermore, we demonstrate the superior stability of PsHD compared to sample representation and pair-wise similarity distillation methods theoretically and experimentally. Finally, experimental results on three widely used datasets validate that the new PsHD outperforms state-of-the-art with 3.9% improvements on average, and also achieves 1.5% improvements while reducing 60% memory buffer size, highlighting the potential of utilizing unlabeled data in SSCL. Our code is available: https://github.com/fanyan0411/PsHD.", "keywords": "Continual learning; Knowledge Distillation; Semi-supervised learning; Topological data anaylsis; Persistence Homology;", "primary_area": "machine_vision", "supplementary_material": "", "author": "YanFan;Yu Wang;Pengfei Zhu;Dongyue Chen;Qinghua Hu", "authorids": "~YanFan1;~Yu_Wang33;~Pengfei_Zhu1;~Dongyue_Chen3;~Qinghua_Hu1", "gender": "F;M;M;F;M", "homepage": "https://fanyan0411.github.io/;https://wangyutju.github.io/;http://aiskyeye.com/;https://dyuechen.github.io/;http://cic.tju.edu.cn/faculty/huqinghua/index.html", "dblp": ";02/5889-106;40/6172-1.html;;", "google_scholar": ";;https://scholar.google.com/citations?hl=zh-TW;https://scholar.google.hk/citations?user=WNFlHNoAAAAJ;TVSNq_wAAAAJ", "orcid": ";;;0000-0002-6077-7968;0000-0001-7765-8095", "linkedin": ";;;;", "or_profile": "~YanFan1;~Yu_Wang33;~Pengfei_Zhu1;~Dongyue_Chen3;~Qinghua_Hu1", "aff": "Tianjin University;Tianjin University;Tianjin University;Tianjin University;Tianjin University", "aff_domain": "tju.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu.cn", "position": "PhD student;Associate Professor;Full Professor;Postdoc;Professor", "bibtex": "@inproceedings{\nyanfan2024persistence,\ntitle={Persistence Homology Distillation for Semi-supervised Continual Learning},\nauthor={YanFan and Yu Wang and Pengfei Zhu and Dongyue Chen and Qinghua Hu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=qInb7EUmxz}\n}", "github": "", "reviewers": "2waA;K7Tp;AVV3", "pdf_size": 1625587, "rating": "5;5;5", "confidence": "4;3;5", "soundness": "3;2;3", "novelty": "2;2;3", "presentation": "1;3;4", "wc_summary": "30;58;52", "wc_strengths": "42;46;78", "wc_weaknesses": "171;135;100", "wc_questions": "3;151;22", "wc_limitations": "1;1;27", "wc_review": "247;391;279", "wc_reply_reviewers": "26;49;20", "wc_reply_authors": "31;33;25", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 1.247219128924647 ], "wc_summary_avg": [ 46.666666666666664, 12.036980056845191 ], "wc_strengths_avg": [ 55.333333333333336, 16.110727964792765 ], "wc_weaknesses_avg": [ 135.33333333333334, 28.986586936412884 ], "wc_questions_avg": [ 58.666666666666664, 65.7486797501584 ], "wc_limitations_avg": [ 9.666666666666666, 12.256517540566826 ], "wc_review_avg": [ 305.6666666666667, 61.7377968148812 ], "wc_reply_reviewers_avg": [ 31.666666666666668, 12.498888839501783 ], "wc_reply_authors_avg": [ 29.666666666666668, 3.39934634239519 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1679387123852745509&as_sdt=5,39&sciodt=0,39&hl=en", "gs_version_total": 0, "email": "tju.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Tianjin University", "aff_unique_dep": "", "aff_unique_url": "http://www.tju.edu.cn", "aff_unique_abbr": "TJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Neural network learns low-dimensional polynomials with SGD near the information-theoretic limit", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93503", "id": "qK4iS49KDm", "proceeding": "", "pdf": "https://openreview.net/pdf?id=qK4iS49KDm", "openreview": "https://openreview.net/forum?id=qK4iS49KDm", "poster": "", "project": "", "author_site": "Jason Lee, Kazusato Oko, Taiji Suzuki, Denny Wu", "tldr": "", "abstract": "We study the problem of gradient descent learning of a single-index target function $f_*(\\boldsymbol{x}) = \\textstyle\\sigma_*\\left(\\langle\\boldsymbol{x},\\boldsymbol{\\theta}\\rangle\\right)$ under isotropic Gaussian data in $\\mathbb{R}^d$, \nwhere the unknown link function $\\sigma_*:\\mathbb{R}\\to\\mathbb{R}$ has information exponent $p$ (defined as the lowest degree in the Hermite expansion). Prior works showed that gradient-based training of neural networks can learn this target with $n\\gtrsim d^{\\Theta(p)}$ samples, and such complexity is predicted to be necessary by the correlational statistical query lower bound. \nSurprisingly, we prove that a two-layer neural network optimized by an SGD-based algorithm (on the squared loss) learns $f_*$ with a complexity that is not governed by the information exponent. Specifically, for arbitrary polynomial single-index models, we establish a sample and runtime complexity of $n \\simeq T = \\Theta(d\\cdot\\mathrm{polylog} d)$, where $\\Theta(\\cdot)$ hides a constant only depending on the degree of $\\sigma_*$; this dimension dependence matches the information theoretic limit up to polylogarithmic factors. More generally, we show that $n\\gtrsim d^{(p_*-1)\\vee 1}$ samples are sufficient to achieve low generalization error, where $p_* \\le p$ is the \\textit{generative exponent} of the link function. Core to our analysis is the reuse of minibatch in the gradient computation, which gives rise to higher-order information beyond correlational queries.", "keywords": "single-index model;feature learning;statistical query;SGD", "primary_area": "learning_theory", "supplementary_material": "", "author": "Jason D. Lee;Kazusato Oko;Taiji Suzuki;Denny Wu", "authorids": "~Jason_D._Lee1;~Kazusato_Oko1;~Taiji_Suzuki1;~Denny_Wu2", "gender": "M;M;M;M", "homepage": "https://jasondlee88.github.io/;;http://ibis.t.u-tokyo.ac.jp/suzuki/;https://dennywu1.github.io/", "dblp": "88/3262;;08/312;", "google_scholar": "GR_DsT0AAAAJ;;x8osrBsAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;", "linkedin": ";kazusatooko/;;", "or_profile": "~Jason_D._Lee1;~Kazusato_Oko1;~Taiji_Suzuki1;~Denny_Wu2", "aff": "Princeton University;The University of Tokyo;The University of Tokyo;New York University", "aff_domain": "princeton.edu;u-tokyo.ac.jp;tokyo.ac.jp;nyu.edu", "position": "Assistant Professor;MS student;Associate Professor;Postdoc", "bibtex": "@inproceedings{\nlee2024neural,\ntitle={Neural network learns low-dimensional polynomials with {SGD} near the information-theoretic limit},\nauthor={Jason D. Lee and Kazusato Oko and Taiji Suzuki and Denny Wu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=qK4iS49KDm}\n}", "github": "", "reviewers": "Mv1X;p2Vt;6kdk;hk3C", "pdf_size": 677530, "rating": "6;6;7;7", "confidence": "3;2;4;3", "soundness": "3;3;3;3", "novelty": "3;2;4;3", "presentation": "3;2;3;4", "wc_summary": "150;59;158;59", "wc_strengths": "120;24;144;79", "wc_weaknesses": "223;35;63;80", "wc_questions": "99;301;164;142", "wc_limitations": "13;7;1;9", "wc_review": "605;426;530;369", "wc_reply_reviewers": "16;32;13;17", "wc_reply_authors": "0;42;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 106.5, 47.58413601191052 ], "wc_strengths_avg": [ 91.75, 45.499313181629454 ], "wc_weaknesses_avg": [ 100.25, 72.66833904803384 ], "wc_questions_avg": [ 176.5, 75.58604368532593 ], "wc_limitations_avg": [ 7.5, 4.330127018922194 ], "wc_review_avg": [ 482.5, 91.29211356957401 ], "wc_reply_reviewers_avg": [ 19.5, 7.365459931328117 ], "wc_reply_authors_avg": [ 10.5, 18.186533479473212 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.7071067811865475, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14583921220058215489&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "princeton.edu;u-tokyo.ac.jp;tokyo.ac.jp;nyu.edu", "author_num": 4, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Princeton University;University of Tokyo;New York University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.princeton.edu;https://www.u-tokyo.ac.jp;https://www.nyu.edu", "aff_unique_abbr": "Princeton;UTokyo;NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "United States;Japan" }, { "title": "Recognize Any Regions", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93502", "id": "qKfiWNHp6k", "proceeding": "", "pdf": "https://openreview.net/pdf?id=qKfiWNHp6k", "openreview": "https://openreview.net/forum?id=qKfiWNHp6k", "poster": "/media/PosterPDFs/NeurIPS%202024/93502.png?t=1733276806.3262167", "project": "", "author_site": "Haosen Yang, Chuofan Ma, Bin Wen, Yi Jiang, Zehuan Yuan, Xiatian Zhu", "tldr": "", "abstract": "Understanding the semantics of individual regions or patches of unconstrained images, such as open-world object detection, remains a critical yet challenging task in computer vision. Building on the success of powerful image-level vision-language (ViL) foundation models like CLIP, recent efforts have sought to harness their capabilities by either training a contrastive model from scratch with an extensive collection of region-label pairs or aligning the outputs of a detection model with image-level representations of region proposals. Despite notable progress, these approaches are plagued by computationally intensive training requirements, susceptibility to data noise, \nand deficiency in contextual information. To address these limitations, we explore the synergistic potential of off-the-shelf foundation models, leveraging their respective strengths in localization and semantics. We introduce a novel, generic, and efficient architecture, named RegionSpot, designed to integrate position-aware localization knowledge from a localization foundation model (e.g., SAM) with semantic information from a ViL model (e.g., CLIP). To fully exploit pretrained knowledge while minimizing training overhead, we keep both foundation models frozen, focusing optimization efforts solely on a lightweight attention-based knowledge integration module.\nExtensive experiments in open-world object recognition show that our RegionSpot achieves significant performance gain over prior alternatives, along with substantial computational savings (e.g., training our model with 3 million data in a single day using 8 V100 GPUs). \nRegionSpot outperforms GLIP-L by 2.9 in mAP on LVIS val set, with an even larger margin of 13.1 AP for more challenging and rare categories, and a 2.5 AP increase on ODinW. Furthermore, it exceeds GroundingDINO-L by 11.0 AP for rare categories on the LVIS minival set.", "keywords": "Open Vocabulary Object Recognition; Zero-shot; Representation Learning", "primary_area": "machine_vision", "supplementary_material": "", "author": "Haosen Yang;Chuofan Ma;Bin Wen;Yi Jiang;Zehuan Yuan;Xiatian Zhu", "authorids": "~Haosen_Yang1;~Chuofan_Ma1;~Bin_Wen1;~Yi_Jiang2;~Zehuan_Yuan1;~Xiatian_Zhu3", "gender": "M;;M;M;M;", "homepage": ";https://machuofan.github.io/;;https://enjoyyi.github.io/;https://shallowyuan.github.io/;https://x-up-lab.github.io", "dblp": "245/9949-3;330/3312;;;227/3298;128/7935", "google_scholar": "https://scholar.google.com/citations?hl=en;hgKtgWAAAAAJ;https://scholar.google.com/citations?view_op=list_works;https://scholar.google.com.hk/citations?user=6dikuoYAAAAJ;;ZbA-z1cAAAAJ", "orcid": ";;;0000-0002-2133-8719;;0000-0002-9284-2955", "linkedin": ";;;;;", "or_profile": "~Haosen_Yang1;~Chuofan_Ma1;~Bin_Wen1;~Yi_Jiang2;~Zehuan_Yuan1;~Xiatian_Zhu3", "aff": "University of Surrey;University of Hong Kong;;Bytedance;ByteDance Inc.;University of Surrey", "aff_domain": "surrey.ac.uk;hku.hk;;bytedance.com;bytedance.com;surrey.ac.uk", "position": "PhD student;PhD student;;Researcher;Researcher;Associate Professor", "bibtex": "@inproceedings{\nyang2024recognize,\ntitle={Recognize Any Regions},\nauthor={Haosen Yang and Chuofan Ma and Bin Wen and Yi Jiang and Zehuan Yuan and Xiatian Zhu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=qKfiWNHp6k}\n}", "github": "", "reviewers": "B3PH;AWre;4JVL;TiX5", "pdf_size": 2515207, "rating": "5;5;6;7", "confidence": "4;4;4;3", "soundness": "3;3;2;3", "novelty": "2;3;2;3", "presentation": "2;3;2;3", "wc_summary": "102;61;95;46", "wc_strengths": "55;43;101;89", "wc_weaknesses": "127;66;127;34", "wc_questions": "115;101;85;60", "wc_limitations": "127;47;26;3", "wc_review": "526;318;434;232", "wc_reply_reviewers": "32;32;0;8", "wc_reply_authors": "20;33;87;110", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;2;3", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 76.0, 23.24865587512534 ], "wc_strengths_avg": [ 72.0, 23.769728648009426 ], "wc_weaknesses_avg": [ 88.5, 40.12792045446661 ], "wc_questions_avg": [ 90.25, 20.437404434027332 ], "wc_limitations_avg": [ 50.75, 46.69247798093393 ], "wc_review_avg": [ 377.5, 111.75307602030469 ], "wc_reply_reviewers_avg": [ 18.0, 14.2828568570857 ], "wc_reply_authors_avg": [ 62.5, 37.192069047042814 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=969128012615460506&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 3, "email": "surrey.ac.uk;hku.hk;;bytedance.com;bytedance.com;surrey.ac.uk", "author_num": 6, "aff_unique_index": "0;1;2;2;0", "aff_unique_norm": "University of Surrey;University of Hong Kong;ByteDance", "aff_unique_dep": ";;", "aff_unique_url": "https://www.surrey.ac.uk;https://www.hku.hk;https://www.bytedance.com", "aff_unique_abbr": "Surrey;HKU;Bytedance", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;1;1;0", "aff_country_unique": "United Kingdom;China" }, { "id": "qL4nN6Ew7U", "title": "Fantasy: Transformer Meets Transformer in Text-to-Image Generation", "track": "main", "status": "Reject", "tldr": "", "abstract": "We present Fantasy, an efficient text-to-image generation model marrying the decoder-only Large Language Models (LLMs) and transformer-based masked image modeling (MIM). While diffusion models are currently in a leading position in this task, we demonstrate that with appropriate training strategies and high-quality data, MIM can also achieve comparable performance. By incorporating pre-trained decoder-only LLMs as the text encoder, we observe a significant improvement in text fidelity compared to the widely used CLIP text encoder, enhancing the text-image alignment. Our training approach involves two stages: 1) large-scale concept alignment pre-training, and 2) fine-tuning with high-quality instruction-image data. Evaluations on FID, HPSv2 benchmarks, and human feedback demonstrate the competitive performance of Fantasy against state-of-the-art diffusion and autoregressive models.", "keywords": "Text-to-Imgae Generation;Transformer;Masked Image Modeling", "primary_area": "generative_models", "supplementary_material": "/attachment/87bcf36dd2e9d1a689fd919485b3f37ea53f0d07.zip", "author": "Enxin Song;Wenhao Chai;Xun Guo;Gaoang Wang;Jenq-Neng Hwang;Yan Lu", "authorids": "~Enxin_Song2;~Wenhao_Chai1;~Xun_Guo1;~Gaoang_Wang2;~Jenq-Neng_Hwang1;~Yan_Lu7", "gender": "F;M;M;M;M;M", "homepage": "https://github.com/Espere-1119-Song/Espere-1119-Song.github.io;https://rese1f.github.io;;https://person.zju.edu.cn/en/gaoangwang;https://people.ece.uw.edu/hwang/;https://www.microsoft.com/en-us/research/people/yanlu/", "dblp": "353/2190.html;317/4392;32/5851;176/7523;78/4381;15/4830-1", "google_scholar": "sLqa-3oAAAAJ;SL--7UMAAAAJ;Ow4R8-EAAAAJ;GhsXNiwAAAAJ;b365J6kAAAAJ;djk5l-4AAAAJ", "orcid": ";0000-0003-2611-0008;;;;0000-0001-5383-6424", "linkedin": ";wenhao-chai-658274238/;;;;", "or_profile": "~Enxin_Song2;~Wenhao_Chai1;~Xun_Guo1;~Gaoang_Wang2;~Jenq-Neng_Hwang1;~Yan_Lu7", "aff": "Zhejiang University;Pika Lab;Microsoft Research Asia;Zhejiang University;University of Washington, Seattle;Microsoft Research Asia", "aff_domain": "zju.edu.cn;pika.art;microsoft.com;intl.zju.edu.cn;uw.edu;microsoft.com", "position": "MS student;Intern;Principal Researcher;Assistant Professor;Full Professor;Partner Research Manager", "bibtex": "@misc{\nanonymous2024fantasy,\ntitle={Fantasy: Transformer Meets Transformer in Text-to-Image Generation},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=qL4nN6Ew7U}\n}", "github": "", "project": "", "reviewers": "AAcU;5cW5;soBM;aiiy", "site": "https://openreview.net/forum?id=qL4nN6Ew7U", "pdf_size": 3167066, "rating": "4;4;5;7", "confidence": "4;4;4;2", "soundness": "2;2;3;3", "novelty": "2;2;3;2", "presentation": "2;2;3;3", "wc_summary": "43;118;59;110", "wc_strengths": "50;25;31;69", "wc_weaknesses": "93;219;131;83", "wc_questions": "15;20;116;1", "wc_limitations": "1;27;1;1", "wc_review": "202;409;338;264", "wc_reply_reviewers": "0;0;36;0", "wc_reply_authors": "155;169;186;99", "reply_reviewers": "0;0;1;0", "reply_authors": "3;3;3;2", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 82.5, 32.12864765283469 ], "wc_strengths_avg": [ 43.75, 17.25362280797862 ], "wc_weaknesses_avg": [ 131.5, 53.598041008977184 ], "wc_questions_avg": [ 38.0, 45.56862956025779 ], "wc_limitations_avg": [ 7.5, 11.258330249197702 ], "wc_review_avg": [ 303.25, 77.75401918872105 ], "wc_reply_reviewers_avg": [ 9.0, 15.588457268119896 ], "wc_reply_authors_avg": [ 152.25, 32.64486942844159 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.9428090415820632, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:1YadOA2uW_sJ:scholar.google.com/&scioq=Fantasy:+Transformer+Meets+Transformer+in+Text-to-Image+Generation&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;2;0;3;2", "aff_unique_norm": "Zhejiang University;Pika Lab;Microsoft;University of Washington", "aff_unique_dep": ";;Research;", "aff_unique_url": "https://www.zju.edu.cn;;https://www.microsoft.com/en-us/research/group/asia;https://www.washington.edu", "aff_unique_abbr": "ZJU;;MSR Asia;UW", "aff_campus_unique_index": "1;2;1", "aff_campus_unique": ";Asia;Seattle", "aff_country_unique_index": "0;0;0;2;0", "aff_country_unique": "China;;United States" }, { "title": "Prism: A Framework for Decoupling and Assessing the Capabilities of VLMs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93501", "id": "qLnXPVvwLx", "proceeding": "", "pdf": "https://openreview.net/pdf?id=qLnXPVvwLx", "openreview": "https://openreview.net/forum?id=qLnXPVvwLx", "poster": "", "project": "", "author_site": "Yuxuan Qiao, Haodong Duan, Xinyu Fang, Junming Yang, Lin Chen, Songyang Zhang, Jiaqi Wang, Dahua Lin, Kai Chen", "tldr": "", "abstract": "Vision Language Models (VLMs) demonstrate remarkable proficiency in addressing a wide array of visual questions, which requires strong perception and reasoning faculties. Assessing these two competencies independently is crucial for model refinement, despite the inherent difficulty due to the intertwined nature of seeing and reasoning in existing VLMs. To tackle this issue, we present Prism, an innovative framework designed to disentangle the perception and reasoning processes involved in visual question solving. Prism comprises two distinct stages: a perception stage that utilizes a VLM to extract and articulate visual information in textual form, and a reasoning stage that formulates responses based on the extracted visual information using a Large Language Model (LLM). This modular design enables the systematic comparison and assessment of both proprietary and open-source VLM for their perception and reasoning strengths. Our analytical framework provides several valuable insights, underscoring Prism's potential as a cost-effective solution for vision-language tasks.\nBy combining a streamlined VLM focused on perception with a powerful LLM tailored for reasoning, Prism achieves superior results in general vision-language tasks while substantially cutting down on training and operational expenses. Quantitative evaluations show that Prism, when configured with a vanilla 2B LLaVA and freely accessible GPT-3.5, delivers performance on par with VLMs $10 \\times$ larger on the rigorous multimodal benchmark MMStar.", "keywords": "Multi-modality;VLM;evaluation", "primary_area": "evaluation", "supplementary_material": "", "author": "Yuxuan Qiao;Haodong Duan;Xinyu Fang;Junming Yang;Lin Chen;Songyang Zhang;Jiaqi Wang;Dahua Lin;Kai Chen", "authorids": "~Yuxuan_Qiao1;~Haodong_Duan1;~Xinyu_Fang1;~Junming_Yang1;~Lin_Chen18;~Songyang_Zhang1;~Jiaqi_Wang1;~Dahua_Lin1;~Kai_Chen4", "gender": "M;M;M;M;M;M;M;M;M", "homepage": ";https://kennymckormick.github.io;https://github.com/FangXinyu-0913;https://junming-yang.github.io/;https://lin-chen.site;https://www.zhangsongyang.com/;https://myownskyw7.github.io/;http://dahua.site;https://chenkai.site/", "dblp": ";211/7919;143/0236;191/4782.html;13/3479-19;;44/740-3;53/6088;181/2839-26", "google_scholar": ";vi3W-m8AAAAJ;QZk6nZ8AAAAJ;L6R5ExQAAAAJ;https://scholar.google.com/citations?hl=en;8XQPi7YAAAAJ;https://scholar.google.com.hk/citations?user=GDvt570AAAAJ;GMzzRRUAAAAJ;https://scholar.google.com.hk/citations?user=eGD0b7IAAAAJ", "orcid": "0009-0003-4402-3650;0000-0002-3052-4177;0009-0003-3764-1266;0000-0002-4261-6271;0000-0002-1546-791X;;;;0000-0002-6820-2325", "linkedin": ";haodong-duan-bb9349166/;;;;;;;", "or_profile": "~Yuxuan_Qiao1;~Haodong_Duan1;~Xinyu_Fang1;~Junming_Yang1;~Lin_Chen18;~Songyang_Zhang1;~Jiaqi_Wang1;~Dahua_Lin1;~Kai_Chen4", "aff": "Nanjing University;Shanghai Artificial Intelligence Laboratory;College of Computer Science and Technology, Zhejiang University;Nanjing University of Posts and Telecommunications;University of Science and Technology of China;Shanghai Artificial Intelligence Laboratory;Shanghai AI Laboratory;The Chinese University of Hong Kong;Shanghai AI Laboratory", "aff_domain": "nju.edu.cn;pjlab.org.cn;cs.zju.edu.cn;njupt.edu.cn;ustc.edu.cn;pjlab.org.cn;pjlab.org.cn;cuhk.edu.hk;pjlab.org.cn", "position": "MS student;Postdoc;PhD student;Undergrad student;MS student;Postdoc;Research Scientist;Associate Professor;Researcher", "bibtex": "@inproceedings{\nqiao2024prism,\ntitle={Prism: A Framework for Decoupling and Assessing the Capabilities of {VLM}s},\nauthor={Yuxuan Qiao and Haodong Duan and Xinyu Fang and Junming Yang and Lin Chen and Songyang Zhang and Jiaqi Wang and Dahua Lin and Kai Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=qLnXPVvwLx}\n}", "github": "", "reviewers": "onkG;rB4V;KmBE", "pdf_size": 2529153, "rating": "6;7;7", "confidence": "4;4;4", "soundness": "2;3;4", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "75;72;86", "wc_strengths": "59;56;52", "wc_weaknesses": "177;81;16", "wc_questions": "74;124;31", "wc_limitations": "9;13;10", "wc_review": "394;346;195", "wc_reply_reviewers": "92;71;0", "wc_reply_authors": "350;246;86", "reply_reviewers": "1;1;0", "reply_authors": "3;3;2", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 77.66666666666667, 6.018490028422596 ], "wc_strengths_avg": [ 55.666666666666664, 2.8674417556808756 ], "wc_weaknesses_avg": [ 91.33333333333333, 66.13286290155263 ], "wc_questions_avg": [ 76.33333333333333, 38.00292386412159 ], "wc_limitations_avg": [ 10.666666666666666, 1.699673171197595 ], "wc_review_avg": [ 311.6666666666667, 84.79124692770802 ], "wc_reply_reviewers_avg": [ 54.333333333333336, 39.364394515292055 ], "wc_reply_authors_avg": [ 227.33333333333334, 108.5827897146791 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1199928128065989330&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "nju.edu.cn;pjlab.org.cn;cs.zju.edu.cn;njupt.edu.cn;ustc.edu.cn;pjlab.org.cn;pjlab.org.cn;cuhk.edu.hk;pjlab.org.cn", "author_num": 9, "aff_unique_index": "0;1;2;3;4;1;5;6;5", "aff_unique_norm": "Nanjing University;Shanghai Artificial Intelligence Laboratory;Zhejiang University;Nanjing University of Posts and Telecommunications;University of Science and Technology of China;Shanghai AI Laboratory;Chinese University of Hong Kong", "aff_unique_dep": ";;College of Computer Science and Technology;;;;", "aff_unique_url": "https://www.nju.edu.cn;http://www.shailab.org/;http://www.zju.edu.cn;http://www.njupt.edu.cn;http://www.ustc.edu.cn;https://www.shanghai-ai-lab.com;https://www.cuhk.edu.hk", "aff_unique_abbr": "Nanjing U;Shanghai AI Lab;ZJU;NJUPT;USTC;SAIL;CUHK", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Nanjing;Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "qLtLQ4KUCq", "title": "Generative Subspace Adversarial Active Learning for Outlier Detection in Multiple Views of High-dimensional Tabular Data", "track": "main", "status": "Reject", "tldr": "", "abstract": "Outlier detection in high-dimensional tabular data is an important task in data mining, essential for many downstream tasks and applications. Existing unsupervised outlier detection algorithms face one or more problems, including inlier assumption (IA), curse of dimensionality (CD), and multiple views (MV). To address these issues, we introduce Generative Subspace Adversarial Active Learning (GSAAL), a novel approach that uses a Generative Adversarial Network with multiple adversaries. These adversaries learn the marginal class probability functions over different data subspaces, while a single generator in the full space models the entire distribution of the inlier class. GSAAL is specifically designed to address the MV limitation while also handling the IA and CD, being the only method to do so. We provide a mathematical formulation of MV, convergence guarantees for the discriminators, and scalability results for GSAAL. Our extensive experiments demonstrate the effectiveness and scalability of GSAAL, highlighting its superior performance compared to other popular OD methods, especially in MV scenarios.", "keywords": "Outlier Detection;One-class classification;GAN;Active Learning;Tabular data", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/cae8f6ae9f1927c2fa4448fb728c4ffc0199219c.zip", "author": "Jose Cribeiro-Ramallo;Vadim Arzamasov;Federico Matteucci;Denis Wambold;Klemens B\u00f6hm", "authorids": "~Jose_Cribeiro-Ramallo1;~Vadim_Arzamasov1;~Federico_Matteucci1;~Denis_Wambold1;~Klemens_B\u00f6hm1", "gender": "M;;M;;M", "homepage": "https://dbis.ipd.kit.edu/722_2410.php;https://github.com/DrCohomology;;;https://cv.cribeiro.de", "dblp": "148/6294;352/4741;;b/KBohm;367/9205.html", "google_scholar": ";https://scholar.google.de/citations?user=X_O8eI0AAAAJ;;;SeWz2NwAAAAJ", "orcid": "0000-0002-6854-4931;0000-0003-3181-2071;;;0000-0003-0631-7431", "linkedin": "vadim-arzamasov-803292b7/?originalSubdomain=de;federico-matteucci-749ba41a4;denis-wambold/;;https://linkedin.com/in/cribeiro-ramallo-datascience", "or_profile": "~Vadim_Arzamasov1;~Federico_Matteucci1;~Denis_Wambold1;~Klemens_B\u00f6hm1;~Jose_A_Cribeiro-Ramallo1", "aff": "Karlsruher Institut f\u00fcr Technologie;Karlsruher Institut f\u00fcr Technologie;Karlsruher Institut f\u00fcr Technologie;Karlsruher Institut f\u00fcr Technologie;Karlsruher Institut f\u00fcr Technologie", "aff_domain": "kit.edu;kit.edu;kit.edu;kit.edu;kit.edu", "position": "Postdoc;PhD student;MS student;Full Professor;PhD student", "bibtex": "@misc{\nanonymous2024generative,\ntitle={Generative Subspace Adversarial Active Learning for Outlier Detection in Multiple Views of High-dimensional Tabular Data},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=qLtLQ4KUCq}\n}", "github": "", "project": "", "reviewers": "FsUf;hp34;vAno", "site": "https://openreview.net/forum?id=qLtLQ4KUCq", "pdf_size": 12874574, "rating": "4;4;7", "confidence": "4;3;5", "soundness": "3;2;4", "novelty": "2;2;4", "presentation": "2;1;4", "wc_summary": "61;100;24", "wc_strengths": "103;22;60", "wc_weaknesses": "154;143;6", "wc_questions": "44;38;110", "wc_limitations": "18;1;7", "wc_review": "380;304;207", "wc_reply_reviewers": "19;0;14", "wc_reply_authors": "82;0;0", "reply_reviewers": "1;0;1", "reply_authors": "2;1;1", "rating_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.6666666666666665, 0.9428090415820634 ], "presentation_avg": [ 2.3333333333333335, 1.247219128924647 ], "wc_summary_avg": [ 61.666666666666664, 31.03045099396541 ], "wc_strengths_avg": [ 61.666666666666664, 33.089105289942324 ], "wc_weaknesses_avg": [ 101.0, 67.32508200267317 ], "wc_questions_avg": [ 64.0, 32.61901286060018 ], "wc_limitations_avg": [ 8.666666666666666, 7.039570693980959 ], "wc_review_avg": [ 297.0, 70.80018832366667 ], "wc_reply_reviewers_avg": [ 11.0, 8.04155872120988 ], "wc_reply_authors_avg": [ 27.333333333333332, 38.6551707048646 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.8660254037844387, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ujYVeM830ZIJ:scholar.google.com/&scioq=Generative+Subspace+Adversarial+Active+Learning+for+Outlier+Detection+in+Multiple+Views+of+High-dimensional+Tabular+Data&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Karlsruher Institut f\u00fcr Technologie", "aff_unique_dep": "", "aff_unique_url": "https://www.kit.edu", "aff_unique_abbr": "KIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Germany" }, { "title": "Topic-Conversation Relevance (TCR) Dataset and Benchmarks", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97498", "id": "qMuzlVmiQh", "proceeding": "", "pdf": "https://openreview.net/pdf?id=qMuzlVmiQh", "openreview": "https://openreview.net/forum?id=qMuzlVmiQh", "poster": "/media/PosterPDFs/NeurIPS%202024/97498.png?t=1730705396.8305006", "project": "", "author_site": "Yaran Fan, Jamie Pool, Senja Filipi, Ross Cutler", "tldr": "", "abstract": "Workplace meetings are vital to organizational collaboration, yet a large percentage of meetings are rated as ineffective. To help improve meeting effectiveness by understanding if the conversation is on topic, we create a comprehensive Topic-Conversation Relevance (TCR) dataset that covers a variety of domains and meeting styles. The TCR dataset includes 1,500 unique meetings, 22 million words in transcripts, and over 15,000 meeting topics, sourced from both newly collected Speech Interruption Meeting (SIM) data and existing public datasets. Along with the text data, we also open source scripts to generate synthetic meetings or create augmented meetings from the TCR dataset to enhance data diversity. For each data source, benchmarks are created using GPT-4 to evaluate the model accuracy in understanding transcription-topic relevance.", "keywords": "dataset;transcripts;topic-conversation relevance;LLM", "primary_area": "", "supplementary_material": "/attachment/54d7bda3297610109dd82bb27ade485a78cadb30.pdf", "author": "Yaran Fan;Jamie Pool;Senja Filipi;Ross Cutler", "authorids": "~Yaran_Fan1;~Jamie_Pool1;~Senja_Filipi1;~Ross_Cutler1", "gender": "F;;F;", "homepage": ";;;https://www.microsoft.com/en-us/research/people/rcutler/", "dblp": ";;;94/6224", "google_scholar": ";;;Qee1AwMAAAAJ", "orcid": ";;;", "linkedin": "yaranfan;;senja-filipi/;rosscutler/", "or_profile": "~Yaran_Fan1;~Jamie_Pool1;~Senja_Filipi1;~Ross_Cutler1", "aff": "Microsoft;Microsoft;Microsoft;Microsoft", "aff_domain": "microsoft.com;microsoft.com;microsoft.com;microsoft.com", "position": "Researcher;Researcher;Software Engineer;Researcher", "bibtex": "@inproceedings{\nfan2024topicconversation,\ntitle={Topic-Conversation Relevance ({TCR}) Dataset and Benchmarks},\nauthor={Yaran Fan and Jamie Pool and Senja Filipi and Ross Cutler},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=qMuzlVmiQh}\n}", "github": "", "reviewers": "8mfY;n2Sz;CrsX;aWey", "pdf_size": 420933, "rating": "4;6;7;8", "confidence": "3;4;4;3", "wc_summary_and_contributions": "83;99;15;38", "wc_strengths": "50;40;1;4", "wc_improvement": "2;56;1;92", "wc_limitations": "1;86;1;1", "wc_correctness": "1;49;1;9", "wc_clarity": "1;21;1;1", "wc_relation_to_prior_work": "1;51;1;1", "wc_documentation": "1;2;1;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "141;405;23;148", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 1.479019945774904 ], "confidence_avg": [ 3.5, 0.5 ], "wc_summary_and_contributions_avg": [ 58.75, 33.737034546622496 ], "wc_strengths_avg": [ 23.75, 21.568205766822608 ], "wc_improvement_avg": [ 37.75, 38.42118556213486 ], "wc_limitations_avg": [ 22.25, 36.80607966083864 ], "wc_correctness_avg": [ 15.0, 19.8997487421324 ], "wc_clarity_avg": [ 6.0, 8.660254037844387 ], "wc_relation_to_prior_work_avg": [ 13.5, 21.650635094610966 ], "wc_documentation_avg": [ 1.25, 0.4330127018922193 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 179.25, 139.47826891670258 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.16903085094570333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:YOAcm5da03IJ:scholar.google.com/&scioq=Topic-Conversation+Relevance+(TCR)+Dataset+and+Benchmarks&hl=en&as_sdt=0,5", "gs_version_total": 6, "email": "microsoft.com;microsoft.com;microsoft.com;microsoft.com", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Microsoft", "aff_unique_dep": "Microsoft Corporation", "aff_unique_url": "https://www.microsoft.com", "aff_unique_abbr": "Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Uncertainty-aware Fine-tuning of Segmentation Foundation Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93500", "id": "qNXRXUC90b", "proceeding": "", "pdf": "https://openreview.net/pdf?id=qNXRXUC90b", "openreview": "https://openreview.net/forum?id=qNXRXUC90b", "poster": "/media/PosterPDFs/NeurIPS%202024/93500.png?t=1731312694.989072", "project": "", "author_site": "Kangning Liu, Brian Price, Jason Kuen, Yifei Fan, Zijun Wei, Luis Figueroa, Krzysztof Geras, Carlos Fernandez-Granda", "tldr": "", "abstract": "The Segment Anything Model (SAM) is a large-scale foundation model that has revolutionized segmentation methodology. Despite its impressive generalization ability, the segmentation accuracy of SAM on images with intricate structures is often unsatisfactory. Recent works have proposed lightweight fine-tuning using high-quality annotated data to improve accuracy on such images. However, here we provide extensive empirical evidence that this strategy leads to forgetting how to \"segment anything\": these models lose the original generalization abilities of SAM, in the sense that they perform worse for segmentation tasks not represented in the annotated fine-tuning set. To improve performance without forgetting, we introduce a novel framework that combines high-quality annotated data with a large unlabeled dataset. The framework relies on two methodological innovations. First, we quantify the uncertainty in the SAM pseudo labels associated with the unlabeled data and leverage it to perform uncertainty-aware fine-tuning. Second, we encode the type of segmentation task associated with each training example using a $\\textit{task prompt}$ to reduce ambiguity. We evaluated the proposed Segmentation with Uncertainty Model (SUM) on a diverse test set consisting of 14 public benchmarks, where it achieves state-of-the-art results. Notably, our method consistently surpasses SAM by 3-6 points in mean IoU and 4-7 in mean boundary IoU across point-prompt interactive segmentation rounds. Code is available at https://github.com/Kangningthu/SUM", "keywords": "Segmentation foundation model", "primary_area": "machine_vision", "supplementary_material": "", "author": "Kangning Liu;Brian L. Price;Jason Kuen;Yifei Fan;Zijun Wei;Luis Figueroa;Krzysztof J. Geras;Carlos Fernandez-Granda", "authorids": "~Kangning_Liu1;~Brian_L._Price2;~Jason_Kuen1;~Yifei_Fan1;~Zijun_Wei2;~Luis_Figueroa1;~Krzysztof_J._Geras1;~Carlos_Fernandez-Granda1", "gender": "M;;M;;M;;;", "homepage": "https://kangning-liu.github.io/;https://www.brianpricephd.com/;http://jasonkuen.com/;;;;;https://cims.nyu.edu/~cfgranda/", "dblp": "259/1458;38/5397;165/1403;;157/3589;;;77/11141", "google_scholar": "F3F2qAkAAAAJ;ntGll74AAAAJ;e6u7GlQAAAAJ;;8l3bFYYAAAAJ;;;GX-PtukAAAAJ", "orcid": ";;;;;;;", "linkedin": ";;;;;;;", "or_profile": "~Kangning_Liu1;~Brian_L._Price2;~Jason_Kuen1;~Yifei_Fan1;~Zijun_Wei2;~Luis_Figueroa1;~Krzysztof_J._Geras1;~Carlos_Fernandez-Granda1", "aff": "New York University;Adobe Systems;Adobe Research;;Adobe Systems;;;New York University", "aff_domain": "nyu.edu;adobe.com;adobe.com;;adobe.com;;;nyu.edu", "position": "PhD student;Principal Researcher;Researcher;;Research Scientist;;;Associate Professor", "bibtex": "@inproceedings{\nliu2024uncertaintyaware,\ntitle={Uncertainty-aware Fine-tuning of Segmentation Foundation Models},\nauthor={Kangning Liu and Brian L. Price and Jason Kuen and Yifei Fan and Zijun Wei and Luis Figueroa and Krzysztof J. Geras and Carlos Fernandez-Granda},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=qNXRXUC90b}\n}", "github": "", "reviewers": "MMBZ;cnn3;HNZW;NsqR", "pdf_size": 19145570, "rating": "4;6;7;7", "confidence": "3;3;5;3", "soundness": "3;3;3;4", "novelty": "2;4;3;4", "presentation": "2;2;3;4", "wc_summary": "79;35;171;169", "wc_strengths": "35;42;199;104", "wc_weaknesses": "212;58;170;75", "wc_questions": "72;30;3;103", "wc_limitations": "2;10;3;1", "wc_review": "400;175;546;452", "wc_reply_reviewers": "64;33;0;42", "wc_reply_authors": "66;61;0;56", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;1;2", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 113.5, 58.606740226700886 ], "wc_strengths_avg": [ 95.0, 65.77613549000884 ], "wc_weaknesses_avg": [ 128.75, 64.27820392636994 ], "wc_questions_avg": [ 52.0, 38.360135557633264 ], "wc_limitations_avg": [ 4.0, 3.5355339059327378 ], "wc_review_avg": [ 393.25, 136.43931801354037 ], "wc_reply_reviewers_avg": [ 34.75, 23.01494079940246 ], "wc_reply_authors_avg": [ 45.75, 26.649343331496933 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.4714045207910316, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:bMcG8K10988J:scholar.google.com/&scioq=Uncertainty-aware+Fine-tuning+of+Segmentation+Foundation+Models&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "nyu.edu;adobe.com;adobe.com;;adobe.com;;;nyu.edu", "author_num": 8, "aff_unique_index": "0;1;1;1;0", "aff_unique_norm": "New York University;Adobe", "aff_unique_dep": ";Adobe Systems Incorporated", "aff_unique_url": "https://www.nyu.edu;https://www.adobe.com", "aff_unique_abbr": "NYU;Adobe", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "qNYYb4nOKA", "title": "Assessing the quality of information extraction", "track": "main", "status": "Reject", "tldr": "", "abstract": "Advances in large language models have notably enhanced the efficiency of information extraction from unstructured and semi-structured data sources. As these technologies become integral to various applications, establishing an objective measure for the quality of information extraction becomes imperative. However, the scarcity of labeled data presents significant challenges to this endeavor. In this paper, we introduce an automatic framework to assess the quality of the information extraction/retrieval and its completeness. The framework focuses on information extraction in the form of entity and its properties. We discuss how to handle the input/output size limitations of the large language models and analyze their performance when extracting the information. In particular, we introduce scores to evaluate the quality of the extraction and provide an extensive discussion on how to interpret them.", "keywords": "information extraction;large language models;quality evaluation;name entity recognition;needle in a haystack test;schema.org", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Filip Seitl;Tomas Kovarik;Soheyla Mirshahi;Jan Kry\u0161t\u016ffek;Rastislav Dujava;Matus Ondreicka;Herbert Ullrich;Petr Gronat", "authorids": "~Filip_Seitl1;~Tomas_Kovarik1;~Soheyla_Mirshahi1;~Jan_Kry\u0161t\u016ffek1;~Rastislav_Dujava1;~Matus_Ondreicka1;~Herbert_Ullrich1;~Petr_Gronat1", "gender": ";;;M;M;;M;", "homepage": ";;;https://www.creativedock.com/;;;http://bertik.net;", "dblp": ";;;;;;;", "google_scholar": ";;;;;iELEbywAAAAJ;;", "orcid": "0000-0001-9225-0143;;;;;;;", "linkedin": "filip-seitl-1578701aa/;tomaskovarik5483846;soheyla-m-336961251/;;rastislav-dujava-58315626a/;;;", "or_profile": "~Filip_Seitl1;~Tomas_Kovarik1;~Soheyla_Mirshahi1;~Jan_Kry\u0161t\u016ffek1;~Rastislav_Dujava1;~Matus_Ondreicka1;~Herbert_Ullrich1;~Petr_Gronat1", "aff": "Creative Dock;Creative Dock;Creative Dock;Creativedock s.r.o.;Creative Dock;Creative Dock;Czech Technical University;", "aff_domain": "creativedock.com;creativedock.com;creativedock.ai;creativedock.com;creativedock.com;creativedock.com;fel.cvut.cz;", "position": "Researcher;Head of AI R&D;Researcher;Researcher;Researcher;Researcher;PhD student;", "bibtex": "@misc{\nanonymous2024assessing,\ntitle={Assessing the quality of information extraction},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=qNYYb4nOKA}\n}", "github": "", "project": "", "reviewers": "1gKP;W8Fk;bpwS;CRaf", "site": "https://openreview.net/forum?id=qNYYb4nOKA", "pdf_size": 490431, "rating": "1;3;3;3", "confidence": "5;4;4;4", "soundness": "1;1;1;2", "novelty": "1;1;2;2", "presentation": "1;1;2;2", "wc_summary": "83;97;51;68", "wc_strengths": "18;12;38;20", "wc_weaknesses": "248;216;200;106", "wc_questions": "33;198;2;51", "wc_limitations": "19;1;2;14", "wc_review": "401;524;293;259", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "0;0;0;0", "rating_avg": [ 2.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 1.25, 0.4330127018922193 ], "novelty_avg": [ 1.5, 0.5 ], "presentation_avg": [ 1.5, 0.5 ], "wc_summary_avg": [ 74.75, 17.122718826167766 ], "wc_strengths_avg": [ 22.0, 9.695359714832659 ], "wc_weaknesses_avg": [ 192.5, 52.846475757613206 ], "wc_questions_avg": [ 71.0, 75.38899123877438 ], "wc_limitations_avg": [ 9.0, 7.713624310270756 ], "wc_review_avg": [ 369.25, 103.5914451101055 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7098910962012576206&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff_unique_index": "0;0;0;1;0;0;2", "aff_unique_norm": "Creative Dock;Creativedock;Czech Technical University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.creativedock.com;https://www.creativedock.com/;https://www.cvut.cz", "aff_unique_abbr": ";;CTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "Czech Republic" }, { "title": "Continual learning with the neural tangent ensemble", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93499", "id": "qOSFiJdVkZ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=qOSFiJdVkZ", "openreview": "https://openreview.net/forum?id=qOSFiJdVkZ", "poster": "", "project": "", "author_site": "Ari Benjamin, Christian-Gernot Pehle, Kyle Daruwalla", "tldr": "", "abstract": "A natural strategy for continual learning is to weigh a Bayesian ensemble of fixed functions. This suggests that if a (single) neural network could be interpreted as an ensemble, one could design effective algorithms that learn without forgetting. To realize this possibility, we observe that a neural network classifier with N parameters can be interpreted as a weighted ensemble of N classifiers, and that in the lazy regime limit these classifiers are fixed throughout learning. We call these classifiers the *neural tangent experts* and show they output valid probability distributions over the labels. We then derive the likelihood and posterior probability of each expert given past data. Surprisingly, the posterior updates for these experts are equivalent to a scaled and projected form of stochastic gradient descent (SGD) over the network weights. Away from the lazy regime, networks can be seen as ensembles of adaptive experts which improve over time. These results offer a new interpretation of neural networks as Bayesian ensembles of experts, providing a principled framework for understanding and mitigating catastrophic forgetting in continual learning settings.", "keywords": "continual learning;catastrophic forgetting;Bayesian ensembles;Boosting and Ensemble Methods;mixture of experts", "primary_area": "online_learning", "supplementary_material": "", "author": "Ari S Benjamin;Christian-Gernot Pehle;Kyle Daruwalla", "authorids": "~Ari_S_Benjamin1;~Christian-Gernot_Pehle1;~Kyle_Daruwalla1", "gender": ";;M", "homepage": ";;https://darsnack.github.io", "dblp": "220/4207;218/5637.html;242/4758", "google_scholar": "GW6D4ZIAAAAJ;https://scholar.google.com/citations?hl=en;d4UlFQ0AAAAJ", "orcid": ";;0000-0001-7669-5943", "linkedin": ";;", "or_profile": "~Ari_S_Benjamin1;~Christian-Gernot_Pehle1;~Kyle_Daruwalla1", "aff": "Cold Spring Harbor Laboratory;Cold Spring Harbor Laboratory;Cold Spring Harbor Laboratory", "aff_domain": "cshl.edu;cshl.edu;cshl.edu", "position": "Postdoc;Researcher;Postdoc", "bibtex": "@inproceedings{\nbenjamin2024continual,\ntitle={Continual learning with the neural tangent ensemble},\nauthor={Ari S Benjamin and Christian-Gernot Pehle and Kyle Daruwalla},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=qOSFiJdVkZ}\n}", "github": "", "reviewers": "q1Zy;MP6D;Gy8g;Yq9r", "pdf_size": 5587021, "rating": "5;6;6;7", "confidence": "2;2;2;3", "soundness": "3;3;4;3", "novelty": "3;2;4;3", "presentation": "2;3;4;3", "wc_summary": "28;99;220;138", "wc_strengths": "58;53;72;94", "wc_weaknesses": "46;92;140;253", "wc_questions": "238;71;19;337", "wc_limitations": "15;14;30;25", "wc_review": "385;329;481;847", "wc_reply_reviewers": "24;61;61;115", "wc_reply_authors": "0;125;0;0", "reply_reviewers": "1;2;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 2.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 121.25, 69.3230661468461 ], "wc_strengths_avg": [ 69.25, 15.896147331979533 ], "wc_weaknesses_avg": [ 132.75, 76.97199165930424 ], "wc_questions_avg": [ 166.25, 127.53308394295183 ], "wc_limitations_avg": [ 21.0, 6.745368781616021 ], "wc_review_avg": [ 510.5, 201.739311984551 ], "wc_reply_reviewers_avg": [ 65.25, 32.45285041410076 ], "wc_reply_authors_avg": [ 31.25, 54.12658773652741 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4645886299015289592&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 4, "email": "cshl.edu;cshl.edu;cshl.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Cold Spring Harbor Laboratory", "aff_unique_dep": "", "aff_unique_url": "https://www.cshl.edu", "aff_unique_abbr": "CSHL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Source Code Foundation Models are Transferable Binary Analysis Knowledge Bases", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93498", "id": "qPpVDzPhSL", "proceeding": "", "pdf": "https://openreview.net/pdf?id=qPpVDzPhSL", "openreview": "https://openreview.net/forum?id=qPpVDzPhSL", "poster": "/media/PosterPDFs/NeurIPS%202024/93498.png?t=1733684298.292521", "project": "", "author_site": "Zian Su, Xiangzhe Xu, Ziyang Huang, Kaiyuan Zhang, Xiangyu Zhang", "tldr": "", "abstract": "Human-Oriented Binary Reverse Engineering (HOBRE) lies at the intersection of binary and source code, aiming to lift binary code to human-readable content relevant to source code, thereby bridging the binary-source semantic gap. Recent advancements in uni-modal code model pre-training, particularly in generative Source Code Foundation Models (SCFMs) and binary understanding models, have laid the groundwork for transfer learning applicable to HOBRE. However, existing approaches for HOBRE rely heavily on uni-modal models like SCFMs for supervised fine-tuning or general LLMs for prompting, resulting in sub-optimal performance. Inspired by recent progress in large multi-modal models, we propose that it is possible to harness the strengths of uni-modal code models from both sides to bridge the semantic gap effectively. In this paper, we introduce a novel probe-and-recover framework that incorporates a binary-source encoder-decoder model and black-box LLMs for binary analysis. Our approach leverages the pre-trained knowledge within SCFMs to synthesize relevant, symbol-rich code fragments as context. This additional context enables black-box LLMs to enhance recovery accuracy. We demonstrate significant improvements in zero-shot binary summarization and binary function name recovery, with a 10.3% relative gain in CHRF and a 16.7% relative gain in a GPT4-based metric for summarization, as well as a 6.7% and 7.4% absolute increase in token-level precision and recall for name recovery, respectively. These results highlight the effectiveness of our approach in automating and improving binary code analysis.", "keywords": "Code Language Models;Human-Understandable Binary Recovery;Cross-Modal Prompting", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Zian Su;Xiangzhe Xu;Ziyang Huang;Kaiyuan Zhang;Xiangyu Zhang", "authorids": "~Zian_Su1;~Xiangzhe_Xu1;~Ziyang_Huang4;~Kaiyuan_Zhang1;~Xiangyu_Zhang3", "gender": "M;;M;M;M", "homepage": ";https://sites.google.com/view/alex-xu/;;https://kaiyuanzhang.com/;https://www.cs.purdue.edu/homes/xyzhang", "dblp": ";276/3462;;147/6644-2;", "google_scholar": "gSQzZT0AAAA;;QfPKo60AAAAJ;https://scholar.google.com/citations?hl=en;PXbu1wIAAAAJ", "orcid": ";;0009-0009-2764-6091;0000-0001-6023-363X;", "linkedin": ";;;kaiyuan-zhang/;", "or_profile": "~Zian_Su1;~Xiangzhe_Xu1;~Ziyang_Huang4;~Kaiyuan_Zhang1;~Xiangyu_Zhang3", "aff": "Purdue University;Purdue University;Purdue University;Purdue University;Purdue University", "aff_domain": "purdue.edu;purdue.edu;cs.purdue.edu;cs.purdue.edu;cs.purdue.edu", "position": "PhD student;PhD student;Undergrad student;PhD student;Full Professor", "bibtex": "@inproceedings{\nsu2024source,\ntitle={Source Code Foundation Models are Transferable Binary Analysis Knowledge Bases},\nauthor={Zian Su and Xiangzhe Xu and Ziyang Huang and Kaiyuan Zhang and Xiangyu Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=qPpVDzPhSL}\n}", "github": "", "reviewers": "CUjj;9NVM;pcKr;dbwT", "pdf_size": 1079752, "rating": "4;6;7;7", "confidence": "4;4;4;3", "soundness": "2;2;3;3", "novelty": "2;2;3;3", "presentation": "1;3;4;3", "wc_summary": "56;103;203;83", "wc_strengths": "6;85;15;72", "wc_weaknesses": "234;224;5;117", "wc_questions": "55;146;109;24", "wc_limitations": "8;8;3;70", "wc_review": "359;566;335;366", "wc_reply_reviewers": "999;0;7;24", "wc_reply_authors": "3142;125;13;20", "reply_reviewers": "6;0;1;1", "reply_authors": "8;3;2;2", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 111.25, 55.5354616438902 ], "wc_strengths_avg": [ 44.5, 34.4564943080401 ], "wc_weaknesses_avg": [ 145.0, 92.93277139954452 ], "wc_questions_avg": [ 83.5, 47.193749586147526 ], "wc_limitations_avg": [ 22.25, 27.643941470058138 ], "wc_review_avg": [ 406.5, 92.80220902543215 ], "wc_reply_reviewers_avg": [ 257.5, 428.1941732438684 ], "wc_reply_authors_avg": [ 825.0, 1338.4560134722396 ], "reply_reviewers_avg": [ 2.0, 2.345207879911715 ], "reply_authors_avg": [ 3.75, 2.48746859276655 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.4714045207910316, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4241254477763675094&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "purdue.edu;purdue.edu;cs.purdue.edu;cs.purdue.edu;cs.purdue.edu", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Purdue University", "aff_unique_dep": "", "aff_unique_url": "https://www.purdue.edu", "aff_unique_abbr": "Purdue", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Empowering Visible-Infrared Person Re-Identification with Large Foundation Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93497", "id": "qQlmONeI5k", "proceeding": "", "pdf": "https://openreview.net/pdf?id=qQlmONeI5k", "openreview": "https://openreview.net/forum?id=qQlmONeI5k", "poster": "/media/PosterPDFs/NeurIPS%202024/93497.png?t=1731207227.23578", "project": "", "author_site": "Zhangyi Hu, Bin Yang, Mang Ye", "tldr": "", "abstract": "Visible-Infrared Person Re-identification (VI-ReID) is a challenging cross-modal retrieval task due to significant modality differences, primarily resulting from the absence of color information in the infrared modality. The development of large foundation models like Large Language Models (LLMs) and Vision Language Models (VLMs) motivates us to explore a feasible solution to empower VI-ReID with off-the-shelf large foundation models. To this end, we propose a novel Text-enhanced VI-ReID framework driven by Large Foundation Models (TVI-LFM). The core idea is to enrich the representation of the infrared modality with textual descriptions automatically generated by VLMs. Specifically, we incorporate a pre-trained VLM to extract textual features from texts generated by VLM and augmented by LLM, and incrementally fine-tune the text encoder to minimize the domain gap between generated texts and original visual modalities. Meanwhile, to enhance the infrared modality with extracted textual representations, we leverage modality alignment capabilities of VLMs and VLM-generated feature-level filters. This enables the text model to learn complementary features from the infrared modality, ensuring the semantic structural consistency between the fusion modality and the visible modality. Furthermore, we introduce modality joint learning to align features across all modalities, ensuring that textual features maintain stable semantic representation of overall pedestrian appearance during complementary information learning. Additionally, a modality ensemble retrieval strategy is proposed to leverage complementary strengths of each query modality to improve retrieval effectiveness and robustness. Extensive experiments on three expanded VI-ReID datasets demonstrate that our method significantly improves the retrieval performance, paving the way for the utilization of large foundation models in downstream multi-modal retrieval tasks.", "keywords": "multi-modal;large foundation model;Visible-Infrared Person Re-Identification", "primary_area": "machine_vision", "supplementary_material": "", "author": "Zhangyi Hu;Bin Yang;Mang Ye", "authorids": "~Zhangyi_Hu1;~Bin_Yang7;~Mang_Ye1", "gender": "M;M;M", "homepage": ";;https://marswhu.github.io/", "dblp": "241/7286;77/377-26;156/0610", "google_scholar": ";A_k-XFkAAAAJ;j-HxRy0AAAAJ", "orcid": ";0000-0003-0329-9346;0000-0003-3989-7655", "linkedin": ";;", "or_profile": "~Zhangyi_Hu1;~Bin_Yang7;~Mang_Ye1", "aff": "Wuhan University;Wuhan University;Wuhan University", "aff_domain": "whu.edu.cn;whu.edu.cn;whu.edu.cn", "position": "Undergrad student;PhD student;Professor", "bibtex": "@inproceedings{\nhu2024empowering,\ntitle={Empowering Visible-Infrared Person Re-Identification with Large Foundation Models},\nauthor={Zhangyi Hu and Bin Yang and Mang Ye},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=qQlmONeI5k}\n}", "github": "", "reviewers": "368Y;EhC5;88CP;hNfL;BZVu", "pdf_size": 4644819, "rating": "3;7;8;8;8", "confidence": "3;4;5;5;5", "soundness": "2;3;3;4;4", "novelty": "3;3;3;3;4", "presentation": "2;3;3;3;3", "wc_summary": "85;93;61;63;54", "wc_strengths": "157;105;53;142;86", "wc_weaknesses": "177;208;78;84;69", "wc_questions": "53;2;30;54;37", "wc_limitations": "90;28;11;24;30", "wc_review": "562;436;233;367;276", "wc_reply_reviewers": "60;117;65;101;86", "wc_reply_authors": "483;43;48;50;38", "reply_reviewers": "1;1;1;1;1", "reply_authors": "4;2;2;2;2", "rating_avg": [ 6.8, 1.9390719429665317 ], "confidence_avg": [ 4.4, 0.7999999999999999 ], "soundness_avg": [ 3.2, 0.7483314773547882 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 71.2, 15.051910177781423 ], "wc_strengths_avg": [ 108.6, 37.61170030721823 ], "wc_weaknesses_avg": [ 123.2, 57.62430042959307 ], "wc_questions_avg": [ 35.2, 18.988417522268673 ], "wc_limitations_avg": [ 36.6, 27.507089995126712 ], "wc_review_avg": [ 374.8, 117.23378352676332 ], "wc_reply_reviewers_avg": [ 85.8, 21.460661685977904 ], "wc_reply_authors_avg": [ 132.4, 175.34947961143197 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.4, 0.8000000000000002 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.9540646527893837, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6171032899453933125&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": "whu.edu.cn;whu.edu.cn;whu.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Wuhan University", "aff_unique_dep": "", "aff_unique_url": "http://www.whu.edu.cn/", "aff_unique_abbr": "WHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "DART-Eval: A Comprehensive DNA Language Model Evaluation Benchmark on Regulatory DNA", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97497", "id": "qR0x6H5WUX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=qR0x6H5WUX", "openreview": "https://openreview.net/forum?id=qR0x6H5WUX", "poster": "", "project": "", "author_site": "Aman Patel, Arpita Singhal, Austin Wang, Anusri Pampari, Maya Kasowski, Anshul Kundaje", "tldr": "", "abstract": "Recent advances in self-supervised models for natural language, vision, and protein sequences have inspired the development of large genomic DNA language models (DNALMs). These models aim to learn generalizable representations of diverse DNA elements, potentially enabling various genomic prediction, interpretation and design tasks. Despite their potential, existing benchmarks do not adequately assess the capabilities of DNALMs on key downstream applications involving an important class of non-coding DNA elements critical for regulating gene activity. In this study, we introduce DART-Eval, a suite of representative benchmarks specifically focused on regulatory DNA to evaluate model performance across zero-shot, probed, and fine-tuned scenarios against contemporary ab initio models as baselines. Our benchmarks target biologically meaningful downstream tasks such as functional sequence feature discovery, predicting cell-type specific regulatory activity, and counterfactual prediction of the impacts of genetic variants. We find that current DNALMs exhibit inconsistent performance and do not offer compelling gains over alternative baseline models for most tasks, while requiring significantly more computational resources. We discuss potentially promising modeling, data curation, and evaluation strategies for the next generation of DNALMs. Our code is available at https://github.com/kundajelab/DART-Eval", "keywords": "DNA;Language Models;LLMs;Biology;Foundation Models;Benchmarks;Gene Regulation;Healthcare", "primary_area": "", "supplementary_material": "/attachment/ab2c06dd307d46c0e638f6bdc28958f2b814da70.pdf", "author": "Aman Patel;Arpita Singhal;Austin Wang;Anusri Pampari;Maya Kasowski;Anshul Kundaje", "authorids": "~Aman_Patel2;~Arpita_Singhal1;~Austin_Wang3;~Anusri_Pampari1;~Maya_Kasowski1;~Anshul_Kundaje1", "gender": "M;;;;F;", "homepage": ";;;;https://med.stanford.edu/profiles/maya-kasowski;http://anshul.kundaje.net", "dblp": ";;;;;95/1107", "google_scholar": ";adwQbrUAAAAJ;;;;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0001-6096-9444;;;0000-0003-3084-2287", "linkedin": "amanspatel/;;;;;", "or_profile": "~Aman_Patel2;~Arpita_Singhal1;~Austin_Wang3;~Anusri_Pampari1;~Maya_Kasowski1;~Anshul_Kundaje1", "aff": "Stanford University;Stanford University;Computer Science Department, Stanford University;;Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;cs.stanford.edu;;stanford.edu;stanford.edu", "position": "PhD student;PhD student;PhD student;;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\npatel2024darteval,\ntitle={{DART}-Eval: A Comprehensive {DNA} Language Model Evaluation Benchmark on Regulatory {DNA}},\nauthor={Aman Patel and Arpita Singhal and Austin Wang and Anusri Pampari and Maya Kasowski and Anshul Kundaje},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=qR0x6H5WUX}\n}", "github": "", "reviewers": "jn6m;xoes;UVbC;jaVt", "pdf_size": 7909073, "rating": "6;6;7;7", "confidence": "5;3;4;4", "wc_summary_and_contributions": "106;91;39;78", "wc_strengths": "137;51;23;32", "wc_improvement": "203;101;4;65", "wc_limitations": "15;6;6;46", "wc_correctness": "8;2;4;29", "wc_clarity": "13;3;1;12", "wc_relation_to_prior_work": "11;7;1;29", "wc_documentation": "17;12;7;21", "wc_additional_feedback": "1;1;1;1", "wc_review": "511;274;86;313", "wc_reply_reviewers": "0;31;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 78.5, 24.86463351831271 ], "wc_strengths_avg": [ 60.75, 45.168434774740646 ], "wc_improvement_avg": [ 93.25, 72.23010106596834 ], "wc_limitations_avg": [ 18.25, 16.43738117827776 ], "wc_correctness_avg": [ 10.75, 10.755812382149477 ], "wc_clarity_avg": [ 7.25, 5.3091901453988255 ], "wc_relation_to_prior_work_avg": [ 12.0, 10.44030650891055 ], "wc_documentation_avg": [ 14.25, 5.261891294962297 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 296.0, 150.91222614486873 ], "wc_reply_reviewers_avg": [ 7.75, 13.423393758658799 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3236977653335843032&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "stanford.edu;stanford.edu;cs.stanford.edu;;stanford.edu;stanford.edu", "author_num": 6, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "4M-21: An Any-to-Any Vision Model for Tens of Tasks and Modalities", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93496", "id": "qRnmLJQHgx", "proceeding": "", "pdf": "https://openreview.net/pdf?id=qRnmLJQHgx", "openreview": "https://openreview.net/forum?id=qRnmLJQHgx", "poster": "", "project": "", "author_site": "Roman Bachmann, Oguzhan Fatih Kar, David Mizrahi, Ali Garjani, Mingfei Gao, David Griffiths, Jiaming Hu, Afshin Dehghan, Amir Zamir", "tldr": "", "abstract": "Current multimodal and multitask foundation models, like 4M or UnifiedIO, show promising results. However, their out-of-the-box abilities to accept diverse inputs and perform diverse tasks are limited by the (usually small) number of modalities and tasks they are trained on. In this paper, we develop a single any-to-any model trained on tens of highly diverse modalities and by performing co-training on large-scale multimodal datasets and text corpora. This includes training on images and text along with several semantic and geometric modalities, feature maps from recent state of the art models like DINOv2 and ImageBind, pseudo labels of specialist models like SAM and 4DHumans, and a range of new modalities that allow for novel ways to interact with the model and steer the generation, for example, image metadata or color palettes.\n\nA crucial step in this process is performing discrete tokenization on various modalities, whether they are image-like, neural network feature maps, vectors, structured data like instance segmentation or human poses, or data that can be represented as text.\n \nThrough this, we show the possibility of training one model to solve at least 3x more tasks/modalities than existing models and doing so without a loss in performance. In addition, this enables more fine-grained and controllable multimodal generation capabilities and allows studying the distillation of models trained on diverse data and objectives into one unified model.\nWe scale the training to a three billion parameter and different datasets. The multimodal models and training code are open sourced at https://4m.epfl.ch/.", "keywords": "multimodal learning;multitask learning;representation learning;transfer learning;foundation models;generative models;computer vision", "primary_area": "machine_vision", "supplementary_material": "", "author": "Roman Bachmann;O\u011fuzhan Fatih Kar;David Mizrahi;Ali Garjani;Mingfei Gao;David Griffiths;Jiaming Hu;Afshin Dehghan;Amir Zamir", "authorids": "~Roman_Bachmann1;~O\u011fuzhan_Fatih_Kar1;~David_Mizrahi1;~Ali_Garjani1;~Mingfei_Gao1;~David_Griffiths1;~Jiaming_Hu2;~Afshin_Dehghan5;~Amir_Zamir1", "gender": "M;;;M;;M;;;M", "homepage": ";;https://dmizrahi.com;;https://fly6464.github.io;https://dgriffiths.uk;;;https://amirzamir.com/", "dblp": "248/2626-1;;317/6970;296/0254;67/6825;10/6918;;;76/8610", "google_scholar": "-KHAy7kAAAAJ;;IF8OK3IAAAAJ;;kMe-G5AAAAAJ;https://scholar.google.co.uk/citations?user=xrSs8r8AAAAJ;;;RKjEFukAAAAJ", "orcid": "0000-0001-5324-2474;;;;;0000-0002-8582-138X;;;", "linkedin": ";;;ali-garjani-35a2a7180/;;david-griffiths-02865b142/;;;", "or_profile": "~Roman_Bachmann1;~O\u011fuzhan_Fatih_Kar1;~David_Mizrahi1;~Ali_Garjani1;~Mingfei_Gao1;~David_Griffiths1;~Jiaming_Hu2;~Afshin_Dehghan5;~Amir_Zamir1", "aff": "Apple;;Swiss Federal Institute of Technology Lausanne;EPFL - EPF Lausanne;Apple;Apple Inc.;;;Swiss Federal Institute of Technology Lausanne", "aff_domain": "apple.com;;epfl.ch;epfl.ch;apple.com;apple.com;;;epfl.ch", "position": "Intern;;MS student;MS student;Researcher;Researcher;;;Assistant Professor", "bibtex": "@inproceedings{\nbachmann2024m,\ntitle={4M-21: An Any-to-Any Vision Model for Tens of Tasks and Modalities},\nauthor={Roman Bachmann and O{\\u{g}}uzhan Fatih Kar and David Mizrahi and Ali Garjani and Mingfei Gao and David Griffiths and Jiaming Hu and Afshin Dehghan and Amir Zamir},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=qRnmLJQHgx}\n}", "github": "", "reviewers": "b2ju;PHF3;66Uu", "pdf_size": 42769485, "rating": "5;6;8", "confidence": "4;4;5", "soundness": "2;3;3", "novelty": "2;2;3", "presentation": "4;3;3", "wc_summary": "56;126;74", "wc_strengths": "59;55;24", "wc_weaknesses": "225;119;55", "wc_questions": "117;277;2", "wc_limitations": "9;7;2", "wc_review": "466;584;157", "wc_reply_reviewers": "219;50;0", "wc_reply_authors": "22;23;0", "reply_reviewers": "1;1;0", "reply_authors": "2;2;1", "rating_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 85.33333333333333, 29.67977238606942 ], "wc_strengths_avg": [ 46.0, 15.641824275533422 ], "wc_weaknesses_avg": [ 133.0, 70.10468362860406 ], "wc_questions_avg": [ 132.0, 112.76819882691514 ], "wc_limitations_avg": [ 6.0, 2.943920288775949 ], "wc_review_avg": [ 402.3333333333333, 180.04135327443217 ], "wc_reply_reviewers_avg": [ 89.66666666666667, 93.7028399901637 ], "wc_reply_authors_avg": [ 15.0, 10.614455552060438 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.944911182523068, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9197396351488578628&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "apple.com;;epfl.ch;epfl.ch;apple.com;apple.com;;;epfl.ch", "author_num": 9, "aff_unique_index": "0;1;2;0;0;1", "aff_unique_norm": "Apple;Swiss Federal Institute of Technology Lausanne;EPFL", "aff_unique_dep": "Apple Inc.;;", "aff_unique_url": "https://www.apple.com;https://www.epfl.ch;https://www.epfl.ch", "aff_unique_abbr": "Apple;EPFL;EPFL", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Lausanne", "aff_country_unique_index": "0;1;1;0;0;1", "aff_country_unique": "United States;Switzerland" }, { "title": "Geodesic Optimization for Predictive Shift Adaptation on EEG data", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93495", "id": "qTypwXvNJa", "proceeding": "", "pdf": "https://openreview.net/pdf?id=qTypwXvNJa", "openreview": "https://openreview.net/forum?id=qTypwXvNJa", "poster": "/media/PosterPDFs/NeurIPS%202024/93495.png?t=1732881224.6706257", "project": "", "author_site": "Apolline Mellot, Antoine Collas, Sylvain Chevallier, Alex Gramfort, Denis Engemann", "tldr": "", "abstract": "Electroencephalography (EEG) data is often collected from diverse contexts involving different populations and EEG devices. This variability can induce distribution shifts in the data $X$ and in the biomedical variables of interest $y$, thus limiting the application of supervised machine learning (ML) algorithms. While domain adaptation (DA) methods have been developed to mitigate the impact of these shifts, such methods struggle when distribution shifts occur simultaneously in $X$ and $y$. As state-of-the-art ML models for EEG represent the data by spatial covariance matrices, which lie on the Riemannian manifold of Symmetric Positive Definite (SPD) matrices, it is appealing to study DA techniques operating on the SPD manifold. This paper proposes a novel method termed Geodesic Optimization for Predictive Shift Adaptation (GOPSA) to address test-time multi-source DA for situations in which source domains have distinct $y$ distributions. GOPSA exploits the geodesic structure of the Riemannian manifold to jointly learn a domain-specific re-centering operator representing site-specific intercepts and the regression model. We performed empirical benchmarks on the cross-site generalization of age-prediction models with resting-state EEG data from a large multi-national dataset (HarMNqEEG), which included $14$ recording sites and more than $1500$ human participants. Compared to state-of-the-art methods, our results showed that GOPSA achieved significantly higher performance on three regression metrics ($R^2$, MAE, and Spearman's $\\rho$) for several source-target site combinations, highlighting its effectiveness in tackling multi-source DA with predictive shifts in EEG data analysis. Our method has the potential to combine the advantages of mixed-effects modeling with machine learning for biomedical applications of EEG, such as multicenter clinical trials.", "keywords": "EEG;brain age;Neurosciences;Riemannian geometry;Domain Adaptation;Mixed-effects models", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "/attachment/6f86484449baa97d11c0db12d8eaeb11e6996514.zip", "author": "Apolline Mellot;Antoine Collas;Sylvain Chevallier;Alexandre Gramfort;Denis Alexander Engemann", "authorids": "~Apolline_Mellot1;~Antoine_Collas1;~Sylvain_Chevallier1;~Alexandre_Gramfort1;~Denis_Alexander_Engemann1", "gender": "F;M;M;M;", "homepage": ";https://www.antoinecollas.fr;https://sylvchev.github.io/;http://alexandre.gramfort.net;https://denis-engemann.de", "dblp": ";271/5313;56/5283;15/7980;", "google_scholar": "IG5V0YEAAAAJ;jxTnfogAAAAJ;https://scholar.google.co.uk/citations?user=j5Tu_SQAAAAJ;fhxshS0AAAAJ;ombAzhMAAAAJ", "orcid": ";;0000-0003-3027-8241;0000-0001-9791-4404;0000-0002-7223-1014", "linkedin": ";antoinecollas/;;alexandregramfort/;denis-a-engemann-4a107b2a6/", "or_profile": "~Apolline_Mellot1;~Antoine_Collas1;~Sylvain_Chevallier1;~Alexandre_Gramfort1;~Denis_Alexander_Engemann1", "aff": "INRIA;INRIA;LISN, Universite Paris-Saclay;Meta;Roche Innovation Center Basel, Pharma Research & Early Development, F. Hoffmann-La Roche Ltd.", "aff_domain": "inria.fr;inria.fr;universite-paris-saclay.fr;meta.com;roche.com", "position": "PhD student;Postdoc;Full Professor;Researcher;Researcher", "bibtex": "@inproceedings{\nmellot2024geodesic,\ntitle={Geodesic Optimization for Predictive Shift Adaptation on {EEG} data},\nauthor={Apolline Mellot and Antoine Collas and Sylvain Chevallier and Alexandre Gramfort and Denis Alexander Engemann},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=qTypwXvNJa}\n}", "github": "", "reviewers": "r2hf;ucmS;N9Jm;85Rg", "pdf_size": 1512007, "rating": "7;7;8;8", "confidence": "4;3;4;5", "soundness": "4;3;3;4", "novelty": "3;2;3;4", "presentation": "4;4;3;4", "wc_summary": "43;73;74;44", "wc_strengths": "52;46;48;42", "wc_weaknesses": "140;241;35;40", "wc_questions": "57;6;122;101", "wc_limitations": "22;49;16;2", "wc_review": "314;415;295;229", "wc_reply_reviewers": "58;47;0;44", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.5, 0.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 58.5, 15.008331019803634 ], "wc_strengths_avg": [ 47.0, 3.605551275463989 ], "wc_weaknesses_avg": [ 114.0, 84.44228798416111 ], "wc_questions_avg": [ 71.5, 44.5 ], "wc_limitations_avg": [ 22.25, 17.06421694658152 ], "wc_review_avg": [ 313.25, 66.67973830182599 ], "wc_reply_reviewers_avg": [ 37.25, 22.128883839904805 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.7071067811865475, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6657228702332631419&as_sdt=5,38&sciodt=0,38&hl=en", "gs_version_total": 8, "email": "inria.fr;inria.fr;universite-paris-saclay.fr;meta.com;roche.com", "author_num": 5, "aff_unique_index": "0;0;1;2;3", "aff_unique_norm": "INRIA;Universite Paris-Saclay;Meta;F. Hoffmann-La Roche Ltd.", "aff_unique_dep": ";LISN;Meta Platforms, Inc.;Pharma Research & Early Development", "aff_unique_url": "https://www.inria.fr;https://www.universite-paris-saclay.fr;https://meta.com;https://www.roche.com", "aff_unique_abbr": "INRIA;;Meta;Roche", "aff_campus_unique_index": "1", "aff_campus_unique": ";Basel", "aff_country_unique_index": "0;0;0;1;2", "aff_country_unique": "France;United States;Switzerland" }, { "title": "PowerGraph: A power grid benchmark dataset for graph neural networks", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97496", "id": "qWTfCO4HvT", "proceeding": "", "pdf": "https://openreview.net/pdf?id=qWTfCO4HvT", "openreview": "https://openreview.net/forum?id=qWTfCO4HvT", "poster": "", "project": "", "author_site": "Anna Varbella, Kenza Amara, Blazhe Gjorgiev, Mennatallah El-Assady, Giovanni Sansavini", "tldr": "", "abstract": "Power grids are critical infrastructures of paramount importance to modern society and, therefore, engineered to operate under diverse conditions and failures. The ongoing energy transition poses new challenges for the decision-makers and system operators. Therefore, we must develop grid analysis algorithms to ensure reliable operations. These key tools include power flow analysis and system security analysis, both needed for effective operational and strategic planning. The literature review shows a growing trend of machine learning (ML) models that perform these analyses effectively. In particular, Graph Neural Networks (GNNs) stand out in such applications because of the graph-based structure of power grids. However, there is a lack of publicly available graph datasets for training and benchmarking ML models in electrical power grid applications. First, we present PowerGraph, which comprises GNN-tailored datasets for i) power flows, ii) optimal power flows, and iii) cascading failure analyses of power grids. Second, we provide ground-truth explanations for the cascading failure analysis. Finally, we perform a complete benchmarking of GNN methods for node-level and graph-level tasks and explainability. Overall, PowerGraph is a multifaceted GNN dataset for diverse tasks that includes power flow and fault scenarios with real-world explanations, providing a valuable resource for developing improved GNN models for node-level, graph-level tasks and explainability methods in power system modeling. The dataset is available at https://figshare.com/articles/dataset/PowerGraph/22820534 and the code at https://github.com/PowerGraph-Datasets.", "keywords": "Dataset;Power Systems;Graph Neural Networks;Explainable AI", "primary_area": "", "supplementary_material": "/attachment/3f0eebd0c2a5da3b96df21773bf97afd1090f734.pdf", "author": "Anna Varbella;Kenza Amara;Blazhe Gjorgiev;Mennatallah El-Assady;Giovanni Sansavini", "authorids": "~Anna_Varbella1;~Kenza_Amara1;~Blazhe_Gjorgiev1;~Mennatallah_El-Assady1;~Giovanni_Sansavini1", "gender": "F;F;;;Not Specified", "homepage": ";https://ai.ethz.ch/people/kenza-amara.html;https://rre.ethz.ch/the-lab/people/senior-researchers/dr--blazhe-gjorgiev.html;;http://www.rre.ethz.ch", "dblp": ";;;183/8957;", "google_scholar": "X87maeEAAAAJ;e4wlh1AAAAAJ;https://scholar.google.ch/citations?user=3pDpQIAAAAAJ;;", "orcid": "0000-0003-0778-5106;0000-0001-7139-5562;;0000-0001-8526-2613;", "linkedin": "anna-varbella-1661441b7/;kenza-amara/;;;", "or_profile": "~Anna_Varbella1;~Kenza_Amara1;~Blazhe_Gjorgiev1;~Mennatallah_El-Assady1;~Giovanni_Sansavini1", "aff": "ETHZ - ETH Zurich;ETH AI Center;ETHZ - ETH Zurich;Department of Computer Science, ETHZ - ETH Zurich;ETHZ - ETH Zurich", "aff_domain": "ethz.ch;infk.ethz.ch;ethz.ch;inf.ethz.ch;ethz.ch", "position": "PhD student;PhD student;Researcher;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nvarbella2024powergraph,\ntitle={PowerGraph: A power grid benchmark dataset for graph neural networks},\nauthor={Anna Varbella and Kenza Amara and Blazhe Gjorgiev and Mennatallah El-Assady and Giovanni Sansavini},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=qWTfCO4HvT}\n}", "github": "", "reviewers": "qprq;EEeb;dGzJ;j6SE", "pdf_size": 1801034, "rating": "6;6;7;8", "confidence": "4;3;2;4", "wc_summary_and_contributions": "93;91;55;172", "wc_strengths": "47;91;2;4", "wc_improvement": "276;111;198;38", "wc_limitations": "51;1;13;2", "wc_correctness": "11;9;8;9", "wc_clarity": "19;10;1;1", "wc_relation_to_prior_work": "9;1;13;29", "wc_documentation": "9;8;55;3", "wc_additional_feedback": "1;1;1;1", "wc_review": "516;323;346;259", "wc_reply_reviewers": "461;13;0;150", "wc_reply_authors": "110;46;0;110", "reply_reviewers": "3;1;0;2", "reply_authors": "3;2;1;3", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "wc_summary_and_contributions_avg": [ 102.75, 42.745613810074126 ], "wc_strengths_avg": [ 36.0, 36.489724581037876 ], "wc_improvement_avg": [ 155.75, 89.60015345969 ], "wc_limitations_avg": [ 16.75, 20.327014045353536 ], "wc_correctness_avg": [ 9.25, 1.0897247358851685 ], "wc_clarity_avg": [ 7.75, 7.46240577829965 ], "wc_relation_to_prior_work_avg": [ 13.0, 10.198039027185569 ], "wc_documentation_avg": [ 18.75, 21.05201890555868 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 361.0, 94.99736838460316 ], "wc_reply_reviewers_avg": [ 156.0, 185.63808876413268 ], "wc_reply_authors_avg": [ 66.5, 46.44082255946809 ], "reply_reviewers_avg": [ 1.5, 1.118033988749895 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0909090909090909, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6805421884575826990&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "ethz.ch;infk.ethz.ch;ethz.ch;inf.ethz.ch;ethz.ch", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "ETH Zurich", "aff_unique_dep": "", "aff_unique_url": "https://www.ethz.ch", "aff_unique_abbr": "ETHZ", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Zurich", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Switzerland" }, { "title": "Most Influential Subset Selection: Challenges, Promises, and Beyond", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93494", "id": "qWi33pPecC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=qWi33pPecC", "openreview": "https://openreview.net/forum?id=qWi33pPecC", "poster": "", "project": "", "author_site": "Yuzheng Hu, Pingbang Hu, Han Zhao, Jiaqi Ma", "tldr": "", "abstract": "How can we attribute the behaviors of machine learning models to their training data? While the classic influence function sheds light on the impact of individual samples, it often fails to capture the more complex and pronounced collective influence of a set of samples. To tackle this challenge, we study the Most Influential Subset Selection (MISS) problem, which aims to identify a subset of training samples with the greatest collective influence. We conduct a comprehensive analysis of the prevailing approaches in MISS, elucidating their strengths and weaknesses. Our findings reveal that influence-based greedy heuristics, a dominant class of algorithms in MISS, can provably fail even in linear regression. We delineate the failure modes, including the errors of influence function and the non-additive structure of the collective influence. Conversely, we demonstrate that an adaptive version of these heuristics which applies them iteratively, can effectively capture the interactions among samples and thus partially address the issues. Experiments on real-world datasets corroborate these theoretical findings, and further demonstrate that the merit of adaptivity can extend to more complex scenarios such as classification tasks and non-linear neural networks. We conclude our analysis by emphasizing the inherent trade-off between performance and computational efficiency, questioning the use of additive metrics such as the linear datamodeling score, and offering a range of discussions.", "keywords": "influential subset;influence function;data attribution;interpretability", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Yuzheng Hu;Pingbang Hu;Han Zhao;Jiaqi Ma", "authorids": "~Yuzheng_Hu1;~Pingbang_Hu1;~Han_Zhao1;~Jiaqi_Ma1", "gender": "M;M;M;", "homepage": "https://mirnegg.github.io;https://pbb.wtf;https://hanzhaoml.github.io/;https://jiaqima.github.io", "dblp": "231/2255.html;331/2848;03/3520-2;155/2199-1", "google_scholar": "cVVimVcAAAAJ;lPlQpqoAAAAJ;x942ipYAAAAJ;Z9X2A1MAAAAJ", "orcid": ";;0000-0002-8579-1600;0000-0001-8292-5901", "linkedin": "yuzheng-hu-a74b5823b/;;;", "or_profile": "~Yuzheng_Hu1;~Pingbang_Hu1;~Han_Zhao1;~Jiaqi_Ma1", "aff": "University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;University of Illinois Urbana-Champaign", "aff_domain": "uiuc.edu;illinois.edu;illinois.edu;illinois.edu", "position": "PhD student;PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nhu2024most,\ntitle={Most Influential Subset Selection: Challenges, Promises, and Beyond},\nauthor={Yuzheng Hu and Pingbang Hu and Han Zhao and Jiaqi Ma},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=qWi33pPecC}\n}", "github": "", "reviewers": "3kwV;Uyvf;2zFQ;btdJ", "pdf_size": 1315344, "rating": "2;7;7;7", "confidence": "3;4;4;3", "soundness": "3;4;3;3", "novelty": "1;2;3;3", "presentation": "2;4;4;4", "wc_summary": "78;123;124;148", "wc_strengths": "95;78;89;86", "wc_weaknesses": "351;140;46;191", "wc_questions": "1;114;115;226", "wc_limitations": "48;10;1;23", "wc_review": "573;465;375;674", "wc_reply_reviewers": "0;79;0;42", "wc_reply_authors": "0;9;0;0", "reply_reviewers": "0;1;0;1", "reply_authors": "1;2;1;1", "rating_avg": [ 5.75, 2.165063509461097 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 3.5, 0.8660254037844386 ], "wc_summary_avg": [ 118.25, 25.301926804099327 ], "wc_strengths_avg": [ 87.0, 6.123724356957945 ], "wc_weaknesses_avg": [ 182.0, 110.5689829925192 ], "wc_questions_avg": [ 114.0, 79.55186987117273 ], "wc_limitations_avg": [ 20.5, 17.698870020427858 ], "wc_review_avg": [ 521.75, 112.4308120579052 ], "wc_reply_reviewers_avg": [ 30.25, 32.95735881407975 ], "wc_reply_authors_avg": [ 2.25, 3.897114317029974 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=48978926960670774&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "uiuc.edu;illinois.edu;illinois.edu;illinois.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Illinois Urbana-Champaign", "aff_unique_dep": "", "aff_unique_url": "https://illinois.edu", "aff_unique_abbr": "UIUC", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Urbana-Champaign", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Prune and Repaint: Content-Aware Image Retargeting for any Ratio", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93493", "id": "qWi6ESgBjB", "proceeding": "", "pdf": "https://openreview.net/pdf?id=qWi6ESgBjB", "openreview": "https://openreview.net/forum?id=qWi6ESgBjB", "poster": "/media/PosterPDFs/NeurIPS%202024/93493.png?t=1730362545.848693", "project": "", "author_site": "Feihong Shen, Chao Li, Yifeng Geng, Yongjian Deng, Hao Chen", "tldr": "", "abstract": "Image retargeting is the task of adjusting the aspect ratio of images to suit different display devices or presentation environments. However, existing retargeting methods often struggle to balance the preservation of key semantics and image quality, resulting in either deformation or loss of important objects, or the introduction of local artifacts such as discontinuous pixels and inconsistent regenerated content. To address these issues, we propose a content-aware retargeting method called PruneRepaint. It incorporates semantic importance for each pixel to guide the identification of regions that need to be pruned or preserved in order to maintain key semantics. Additionally, we introduce an adaptive repainting module that selects image regions for repainting based on the distribution of pruned pixels and the proportion between foreground size and target aspect ratio, thus achieving local smoothness after pruning. By focusing on the content and structure of the foreground, our PruneRepaint approach adaptively avoids key content loss and deformation, while effectively mitigating artifacts with local repainting. We conduct experiments on the public RetargetMe benchmark and demonstrate through objective experimental results and subjective user studies that our method outperforms previous approaches in terms of preserving semantics and aesthetics, as well as better generalization across diverse aspect ratios. Codes will be available at\n https://github.com/fhshen2022/PruneRepaint.", "keywords": "Image Retargeting;content-aware seam-carving;adaptive repainting", "primary_area": "machine_vision", "supplementary_material": "", "author": "Feihong Shen;Chao Li;Yifeng Geng;Yongjian Deng;Hao Chen", "authorids": "~Feihong_Shen1;~Chao_Li17;~Yifeng_Geng2;~Yongjian_Deng1;~Hao_Chen39", "gender": "M;M;M;M;M", "homepage": "https://fhshen2022.github.io/;;;;", "dblp": ";66/190-64.html;45/10331;271/3286;", "google_scholar": ";o6zc8HMAAAAJ;-58L4KAAAAAJ;gKXu0XgAAAAJ;6qVQ7ZMAAAAJ", "orcid": ";;;0000-0001-6253-3564;", "linkedin": ";;yifeng-geng-2aaa774b/;;", "or_profile": "~Feihong_Shen1;~Chao_Li17;~Yifeng_Geng2;~Yongjian_Deng1;~Hao_Chen39", "aff": "Southeast University;Alibaba Group;Alibaba Group;Beijing University of Technology;Southeast University", "aff_domain": "seu.edu.cn;alibaba-inc.com;alibaba-inc.com;bjut.edu.cn;seu.edu.cn", "position": "MS student;Researcher;Researcher;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nshen2024prune,\ntitle={Prune and Repaint: Content-Aware Image Retargeting for any Ratio},\nauthor={Feihong Shen and Chao Li and Yifeng Geng and Yongjian Deng and Hao Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=qWi6ESgBjB}\n}", "github": "", "reviewers": "oiR7;4WgC;RHY7;HZC2", "pdf_size": 40057024, "rating": "6;6;7;7", "confidence": "4;5;4;3", "soundness": "3;1;3;3", "novelty": "3;1;3;3", "presentation": "3;1;3;3", "wc_summary": "91;18;54;93", "wc_strengths": "102;2;77;62", "wc_weaknesses": "175;84;243;90", "wc_questions": "198;3;175;142", "wc_limitations": "6;4;8;9", "wc_review": "572;111;557;396", "wc_reply_reviewers": "51;0;31;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 64.0, 30.76524012583032 ], "wc_strengths_avg": [ 60.75, 36.80607966083864 ], "wc_weaknesses_avg": [ 148.0, 65.60106706449217 ], "wc_questions_avg": [ 129.5, 75.69841477864645 ], "wc_limitations_avg": [ 6.75, 1.920286436967152 ], "wc_review_avg": [ 409.0, 185.36855180963138 ], "wc_reply_reviewers_avg": [ 20.5, 21.68524844220144 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.7071067811865475, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:WnFKt06aeJYJ:scholar.google.com/&scioq=Prune+and+Repaint:+Content-Aware+Image+Retargeting+for+any+Ratio&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "seu.edu.cn;alibaba-inc.com;alibaba-inc.com;bjut.edu.cn;seu.edu.cn", "author_num": 5, "aff_unique_index": "0;1;1;2;0", "aff_unique_norm": "Southeast University;Alibaba Group;Beijing University of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.seu.edu.cn/;https://www.alibaba.com;http://www.bjut.edu.cn", "aff_unique_abbr": "SEU;Alibaba;BJUT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Emotion-LLaMA: Multimodal Emotion Recognition and Reasoning with Instruction Tuning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93492", "id": "qXZVSy9LFR", "proceeding": "", "pdf": "https://openreview.net/pdf?id=qXZVSy9LFR", "openreview": "https://openreview.net/forum?id=qXZVSy9LFR", "poster": "/media/PosterPDFs/NeurIPS%202024/93492.png?t=1731555074.7878177", "project": "", "author_site": "Zebang Cheng, Zhi-Qi Cheng, Jun-Yan He, Kai Wang, Yuxiang Lin, Zheng Lian, Xiaojiang Peng, Alexander Hauptmann", "tldr": "", "abstract": "Accurate emotion perception is crucial for various applications, including human-computer interaction, education, and counseling.\nHowever, traditional single-modality approaches often fail to capture the complexity of real-world emotional expressions, which are inherently multimodal. Moreover, existing Multimodal Large Language Models (MLLMs) face challenges in integrating audio and recognizing subtle facial micro-expressions. To address this, we introduce the MERR dataset, containing 28,618 coarse-grained and 4,487 fine-grained annotated samples across diverse emotional categories. This dataset enables models to learn from varied scenarios and generalize to real-world applications. Furthermore, we propose Emotion-LLaMA, a model that seamlessly integrates audio, visual, and textual inputs through emotion-specific encoders. By aligning features into a shared space and employing a modified LLaMA model with instruction tuning, Emotion-LLaMA significantly enhances both emotional recognition and reasoning capabilities. Extensive evaluations show Emotion-LLaMA outperforms other MLLMs, achieving top scores in Clue Overlap (7.83) and Label Overlap (6.25) on EMER, an F1 score of 0.9036 on MER2023-SEMI challenge, and the highest UAR (45.59) and WAR (59.37) in zero-shot evaluations on DFEW dataset.", "keywords": "Instruction Tuning;Multi-modal Emotion Recognition;Multi-modal Emotion Reasoning", "primary_area": "human-AI_interaction", "supplementary_material": "", "author": "Zebang Cheng;Zhi-Qi Cheng;Jun-Yan He;Kai Wang;Yuxiang Lin;Zheng Lian;Xiaojiang Peng;Alexander G Hauptmann", "authorids": "~Zebang_Cheng1;~Zhi-Qi_Cheng1;~Jun-Yan_He2;~Kai_Wang8;~Yuxiang_Lin1;~Zheng_Lian3;~Xiaojiang_Peng1;~Alexander_G_Hauptmann1", "gender": "M;Not Specified;M;M;M;M;M;M", "homepage": ";https://faculty.washington.edu/zhiqics/;https://kaiwang960112.github.io/;https://lum1104.github.io;https://zeroqiaoba.github.io/Homepage/;https://pengxj.github.io/;;", "dblp": "343/2911;188/1193;78/2022-36;;;133/6556;h/AlexanderGHauptmann;173/3747", "google_scholar": ";uB2He2UAAAAJ;i2II0XIAAAAJ;https://scholar.google.com/citations?hl=en;S34nWz0AAAAJ;7oRD67kAAAAJ;https://scholar.google.co.uk/citations?user=Py54GcEAAAAJ;bjNZqGAAAAAJ", "orcid": "0009-0001-2854-7425;0000-0002-1720-2085;0000-0002-1154-5175;0009-0004-7835-9352;0000-0001-9477-0599;;;", "linkedin": ";zhiqicheng/;;;;;;", "or_profile": "~Zebang_Cheng1;~Zhi-Qi_Cheng1;~Kai_Wang8;~Yuxiang_Lin1;~Zheng_Lian3;~Xiaojiang_Peng1;~Alexander_G_Hauptmann1;~HE_JUNYAN1", "aff": "Shenzhen University;Carnegie Mellon University;National University of Singapore;Shenzhen Technology University;Institute of Automation, Chinese Academy of Sciences;Shenzhen Technology University;School of Computer Science, Carnegie Mellon University;Alibaba Group", "aff_domain": "szu.edu.cn;cmu.edu;u.nus.edu;sztu.edu.cn;ia.ac.cn;sztu.edu.cn;cs.cmu.edu;alibaba-inc.com", "position": "MS student;Project Scientist & Instructor;PhD student;Undergrad student;Assistant Professor;Full Professor;Full Professor;Researcher", "bibtex": "@inproceedings{\ncheng2024emotionllama,\ntitle={Emotion-{LL}a{MA}: Multimodal Emotion Recognition and Reasoning with Instruction Tuning},\nauthor={Zebang Cheng and Zhi-Qi Cheng and Jun-Yan He and Kai Wang and Yuxiang Lin and Zheng Lian and Xiaojiang Peng and Alexander G Hauptmann},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=qXZVSy9LFR}\n}", "github": "", "reviewers": "iNoC;3ouG;646t;V8m9", "pdf_size": 16282476, "rating": "3;4;5;5", "confidence": "4;3;4;3", "soundness": "2;3;2;3", "novelty": "2;4;1;3", "presentation": "2;3;2;3", "wc_summary": "38;162;28;56", "wc_strengths": "29;132;32;24", "wc_weaknesses": "562;120;16;79", "wc_questions": "1;160;77;39", "wc_limitations": "126;50;1;4", "wc_review": "756;624;154;202", "wc_reply_reviewers": "171;0;0;33", "wc_reply_authors": "323;385;322;368", "reply_reviewers": "1;0;0;1", "reply_authors": "4;4;4;4", "rating_avg": [ 4.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 1.118033988749895 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 71.0, 53.48831648126533 ], "wc_strengths_avg": [ 54.25, 44.97985660270606 ], "wc_weaknesses_avg": [ 194.25, 215.52769543610862 ], "wc_questions_avg": [ 69.25, 58.88282856656939 ], "wc_limitations_avg": [ 45.25, 50.50433149740723 ], "wc_review_avg": [ 434.0, 260.77193100485334 ], "wc_reply_reviewers_avg": [ 51.0, 70.57974213611155 ], "wc_reply_authors_avg": [ 349.5, 27.663152387246107 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 4.0, 0.0 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2448494290607946493&as_sdt=20000005&sciodt=0,21&hl=en", "gs_version_total": 6, "email": "szu.edu.cn;cmu.edu;u.nus.edu;sztu.edu.cn;ia.ac.cn;sztu.edu.cn;cs.cmu.edu;alibaba-inc.com", "author_num": 8, "aff_unique_index": "0;1;2;3;4;3;1;5", "aff_unique_norm": "Shenzhen University;Carnegie Mellon University;National University of Singapore;Shenzhen Technology University;Chinese Academy of Sciences;Alibaba Group", "aff_unique_dep": ";;;;Institute of Automation;", "aff_unique_url": "https://www.szu.edu.cn;https://www.cmu.edu;https://www.nus.edu.sg;https://www.sztu.edu.cn;http://www.ia.cas.cn;https://www.alibaba.com", "aff_unique_abbr": "SZU;CMU;NUS;;CAS;Alibaba", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pittsburgh", "aff_country_unique_index": "0;1;2;0;0;0;1;0", "aff_country_unique": "China;United States;Singapore" }, { "title": "Expert-level protocol translation for self-driving labs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93491", "id": "qXidsICaja", "proceeding": "", "pdf": "https://openreview.net/pdf?id=qXidsICaja", "openreview": "https://openreview.net/forum?id=qXidsICaja", "poster": "/media/PosterPDFs/NeurIPS%202024/93491.png?t=1731375626.818643", "project": "", "author_site": "Yu-Zhe Shi, Fanxu Meng, Haofei Hou, Zhangqian Bi, Qiao Xu, Lecheng Ruan, Qining Wang", "tldr": "", "abstract": "Recent development in Artificial Intelligence (AI) models has propelled their application in scientific discovery, but the validation and exploration of these discoveries require subsequent empirical experimentation. The concept of self-driving laboratories promises to automate and thus boost the experimental process following AI-driven discoveries. However, the transition of experimental protocols, originally crafted for human comprehension, into formats interpretable by machines presents significant challenges, which, within the context of specific expert domain, encompass the necessity for structured as opposed to natural language, the imperative for explicit rather than tacit knowledge, and the preservation of causality and consistency throughout protocol steps. Presently, the task of protocol translation predominantly requires the manual and labor-intensive involvement of domain experts and information technology specialists, rendering the process time-intensive. To address these issues, we propose a framework that automates the protocol translation process through a three-stage workflow, which incrementally constructs Protocol Dependence Graphs (PDGs) that approach structured on the syntax level, completed on the semantics level, and linked on the execution level. Quantitative and qualitative evaluations have demonstrated its performance at par with that of human experts, underscoring its potential to significantly expedite and democratize the process of scientific discovery by elevating the automation capabilities within self-driving laboratories.", "keywords": "Self-Driving Laboratories;Domain-Specific Language;Structural Representation;Knowledge Externalization", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "/attachment/b5e13d6050c51fb7e508c02b7b87db8cae62a632.zip", "author": "Yu-Zhe Shi;Fanxu Meng;Haofei Hou;Zhangqian Bi;Qiao Xu;Lecheng Ruan;Qining Wang", "authorids": "~Yu-Zhe_Shi1;~Fanxu_Meng3;~Haofei_Hou2;~Zhangqian_Bi2;~Qiao_Xu2;~Lecheng_Ruan1;~Qining_Wang1", "gender": "M;;M;M;M;;M", "homepage": "https://yuzheshi.github.io/;;https://yuechuhaoxi020609.github.io/;;;;http://www2.coe.pku.edu.cn/subpaget.asp?id=232", "dblp": "334/2089;;;322/8117;;;", "google_scholar": ";;cDoN854AAAAJ;dZr-xt0AAAAJ;MXr_mCUAAAAJ;;", "orcid": "0000-0003-2066-005X;0000-0002-9601-6490;0009-0004-4068-7772;;;;", "linkedin": ";;;;;;", "or_profile": "~Yu-Zhe_Shi1;~Fanxu_Meng3;~Haofei_Hou2;~Zhangqian_Bi2;~Qiao_Xu2;~Lecheng_Ruan1;~Qining_Wang1", "aff": "PersLab Research;Peking University;Huazhong University of Science and Technology;Huazhong University of Science and Technology;Huazhong University of Science and Technology;;Peking University", "aff_domain": "perslab.co;stu.pku.edu.cn;hust.edu.cn;hust.edu.cn;hust.edu.cn;;pku.edu.cn", "position": "Researcher;PhD student;Undergrad student;PhD student;Undergrad student;;Full Professor", "bibtex": "@inproceedings{\nshi2024expertlevel,\ntitle={Expert-level protocol translation for self-driving labs},\nauthor={Yu-Zhe Shi and Fanxu Meng and Haofei Hou and Zhangqian Bi and Qiao Xu and Lecheng Ruan and Qining Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=qXidsICaja}\n}", "github": "", "reviewers": "RqLD;uEvE;AwgV", "pdf_size": 16483897, "rating": "5;6;7", "confidence": "3;3;2", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "72;101;64", "wc_strengths": "72;51;35", "wc_weaknesses": "122;56;55", "wc_questions": "36;38;63", "wc_limitations": "80;8;7", "wc_review": "382;254;224", "wc_reply_reviewers": "115;0;0", "wc_reply_authors": "1559;674;5235", "reply_reviewers": "1;0;0", "reply_authors": "4;3;10", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 79.0, 15.895492023421818 ], "wc_strengths_avg": [ 52.666666666666664, 15.15109090315135 ], "wc_weaknesses_avg": [ 77.66666666666667, 31.351058816073323 ], "wc_questions_avg": [ 45.666666666666664, 12.283683848458853 ], "wc_limitations_avg": [ 31.666666666666668, 34.179265969622904 ], "wc_review_avg": [ 286.6666666666667, 68.51439427027158 ], "wc_reply_reviewers_avg": [ 38.333333333333336, 54.21151989096864 ], "wc_reply_authors_avg": [ 2489.3333333333335, 1974.8114396625876 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 5.666666666666667, 3.0912061651652345 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13997597682274906943&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "perslab.co;stu.pku.edu.cn;hust.edu.cn;hust.edu.cn;hust.edu.cn;;pku.edu.cn", "author_num": 7, "aff_unique_index": "0;1;2;2;2;1", "aff_unique_norm": "PersLab Research;Peking University;Huazhong University of Science and Technology", "aff_unique_dep": ";;", "aff_unique_url": ";http://www.pku.edu.cn;http://www.hust.edu.cn", "aff_unique_abbr": ";Peking U;HUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1;1;1;1;1", "aff_country_unique": ";China" }, { "title": "MMM-RS: A Multi-modal, Multi-GSD, Multi-scene Remote Sensing Dataset and Benchmark for Text-to-Image Generation", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97495", "id": "qXvepIzFL5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=qXvepIzFL5", "openreview": "https://openreview.net/forum?id=qXvepIzFL5", "poster": "/media/PosterPDFs/NeurIPS%202024/97495.png?t=1731330874.6317387", "project": "", "author_site": "jialin luo, Yuanzhi Wang, Ziqi Gu, Yide Qiu, Shuaizhen Yao, Fuyun Wang, Chunyan Xu, Wenhua Zhang, Dan Wang, Zhen Cui", "tldr": "", "abstract": "Recently, the diffusion-based generative paradigm has achieved impressive general image generation capabilities with text prompts due to its accurate distribution modeling and stable training process. However, generating diverse remote sensing (RS) images that are tremendously different from general images in terms of scale and perspective remains a formidable challenge due to the lack of a comprehensive remote sensing image generation dataset with various modalities, ground sample distances (GSD), and scenes. In this paper, we propose a Multi-modal, Multi-GSD, Multi-scene Remote Sensing (MMM-RS) dataset and benchmark for text-to-image generation in diverse remote sensing scenarios. Specifically, we first collect nine publicly available RS datasets and conduct standardization for all samples. To bridge RS images to textual semantic information, we utilize a large-scale pretrained vision-language model to automatically output text prompts and perform hand-crafted rectification, resulting in information-rich text-image pairs (including multi-modal images). In particular, we design some methods to obtain the images with different GSD and various environments (e.g., low-light, foggy) in a single sample. With extensive manual screening and refining annotations, we ultimately obtain a MMM-RS dataset that comprises approximately 2.1 million text-image pairs. Extensive experimental results verify that our proposed MMM-RS dataset allows off-the-shelf diffusion models to generate diverse RS images across various modalities, scenes, weather conditions, and GSD. The dataset is available at https://github.com/ljl5261/MMM-RS.", "keywords": "Remote Sensing;Multi-moda;Multi-GSD;Multi-scene;Text-to-Image Generation", "primary_area": "", "supplementary_material": "/attachment/67440bbb495391eb60bc6a3296895d8569ab24fa.pdf", "author": "Jialin Luo;Yuanzhi Wang;Ziqi Gu;Yide Qiu;Shuaizhen Yao;Fuyun Wang;Chunyan Xu;Wenhua Zhang;Dan Wang;Zhen Cui", "authorids": "~Jialin_Luo2;~Yuanzhi_Wang1;~Ziqi_Gu1;~Yide_Qiu1;~Shuaizhen_Yao1;~Fuyun_Wang1;~Chunyan_Xu3;~Wenhua_Zhang3;~Dan_Wang8;~Zhen_Cui4", "gender": "M;M;M;M;M;M;F;F;F;M", "homepage": "http://vgg.ai.cn/;https://mdswyz.github.io/;https://vgg-ai.cn/students/;https://vgg-ai.cn/students/;;https://fuyunwang.github.io;;;;http://aip.seu.edu.cn/zcui/", "dblp": ";09/7017;;;279/5287;;70/8453.html;;;59/8491-1", "google_scholar": ";https://scholar.google.com.hk/citations?user=pgZc4sgAAAAJ;;;https://scholar.google.com/citations?hl=zh-CN;Jc19RCMAAAAJ;VM_IRfMAAAAJ;;;ChRyl3kAAAAJ", "orcid": ";0000-0003-2594-2574;;;;0009-0003-7843-5215;;0000-0002-3260-535X;0009-0006-6908-374X;", "linkedin": ";;;;;;;;;", "or_profile": "~Jialin_Luo2;~Yuanzhi_Wang1;~Ziqi_Gu1;~Yide_Qiu1;~Shuaizhen_Yao1;~Fuyun_Wang1;~Chunyan_Xu3;~Wenhua_Zhang3;~Dan_Wang8;~Zhen_Cui4", "aff": "Nanjing University of Science and Technology;Nanjing University of Science and Technology;Nanjing University of Science and Technology;Nanjing University of Science and Technology;Nanjing University of Science and Technology;Nanjing University of Science and Technology;Nanjing University of Science and Technology;Nanjing University of Science and Technology;China Academy of Space Technology (CAST);Nanjing University of Science and Technology", "aff_domain": "njust.edu.cn;njust.edu.cn;njust.edu.cn;njust.edu.cn;njust.edu.cn;njust.edu.cn;njust.edu.cn;njust.edu.cn;cast.cn;njust.edu.cn", "position": "MS student;PhD student;PhD student;MS student;PhD student;PhD student;Full Professor;Postdoc;Full Professor;Full Professor", "bibtex": "@inproceedings{\nluo2024mmmrs,\ntitle={{MMM}-{RS}: A Multi-modal, Multi-{GSD}, Multi-scene Remote Sensing Dataset and Benchmark for Text-to-Image Generation},\nauthor={Jialin Luo and Yuanzhi Wang and Ziqi Gu and Yide Qiu and Shuaizhen Yao and Fuyun Wang and Chunyan Xu and Wenhua Zhang and Dan Wang and Zhen Cui},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=qXvepIzFL5}\n}", "github": "", "reviewers": "DrKY;7rKP;8np9;xUzE", "pdf_size": 6955053, "rating": "5;6;6;7", "confidence": "4;5;4;4", "wc_summary_and_contributions": "42;77;39;88", "wc_strengths": "4;12;53;90", "wc_improvement": "120;32;92;7", "wc_limitations": "18;35;4;133", "wc_correctness": "90;2;16;1", "wc_clarity": "13;3;10;1", "wc_relation_to_prior_work": "4;10;19;1", "wc_documentation": "38;17;10;5", "wc_additional_feedback": "1;1;1;1", "wc_review": "330;189;244;327", "wc_reply_reviewers": "96;30;37;44", "wc_reply_authors": "81;19;45;69", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 61.5, 21.383404780343096 ], "wc_strengths_avg": [ 39.75, 34.455587355318734 ], "wc_improvement_avg": [ 62.75, 45.240330458563186 ], "wc_limitations_avg": [ 47.5, 50.56925943693461 ], "wc_correctness_avg": [ 27.25, 36.71086351476903 ], "wc_clarity_avg": [ 6.75, 4.9180788932265 ], "wc_relation_to_prior_work_avg": [ 8.5, 6.87386354243376 ], "wc_documentation_avg": [ 17.5, 12.579745625409124 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 272.5, 59.28954376616504 ], "wc_reply_reviewers_avg": [ 51.75, 26.022826518270456 ], "wc_reply_authors_avg": [ 53.5, 23.76446927663229 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=389553793253895161&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "njust.edu.cn;njust.edu.cn;njust.edu.cn;njust.edu.cn;njust.edu.cn;njust.edu.cn;njust.edu.cn;njust.edu.cn;cast.cn;njust.edu.cn", "author_num": 10, "aff_unique_index": "0;0;0;0;0;0;0;0;1;0", "aff_unique_norm": "Nanjing University of Science and Technology;China Academy of Space Technology", "aff_unique_dep": ";", "aff_unique_url": "http://www.nust.edu.cn/;http://www.cast.cn/", "aff_unique_abbr": "NUST;CAST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Uncovering, Explaining, and Mitigating the Superficial Safety of Backdoor Defense", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93490", "id": "qZFshkbWDo", "proceeding": "", "pdf": "https://openreview.net/pdf?id=qZFshkbWDo", "openreview": "https://openreview.net/forum?id=qZFshkbWDo", "poster": "", "project": "", "author_site": "Rui Min, Zeyu Qin, Nevin L. Zhang, Li Shen, Minhao Cheng", "tldr": "", "abstract": "Backdoor attacks pose a significant threat to Deep Neural Networks (DNNs) as they allow attackers to manipulate model predictions with backdoor triggers. To address these security vulnerabilities, various backdoor purification methods have been proposed to purify compromised models. Typically, these purified models exhibit low Attack Success Rates (ASR), rendering them resistant to backdoored inputs. However, \\textit{Does achieving a low ASR through current safety purification methods truly eliminate learned backdoor features from the pretraining phase?} In this paper, we provide an affirmative answer to this question by thoroughly investigating the \\textit{Post-Purification Robustness} of current backdoor purification methods. We find that current safety purification methods are vulnerable to the rapid re-learning of backdoor behavior, even when further fine-tuning of purified models is performed using a very small number of poisoned samples. Based on this, we further propose the practical Query-based Reactivation Attack (QRA) which could effectively reactivate the backdoor by merely querying purified models. We find the failure to achieve satisfactory post-purification robustness stems from the insufficient deviation of purified models from the backdoored model along the backdoor-connected path. To improve the post-purification robustness, we propose a straightforward tuning defense, Path-Aware Minimization (PAM), which promotes deviation along backdoor-connected paths with extra model updates. Extensive experiments demonstrate that PAM significantly improves post-purification robustness while maintaining a good clean accuracy and low ASR. Our work provides a new perspective on understanding the effectiveness of backdoor safety tuning and highlights the importance of faithfully assessing the model's safety.", "keywords": "Backdoor Safety;Safety Tuning;Superficial Safety", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Rui Min;Zeyu Qin;Nevin L. Zhang;Li Shen;Minhao Cheng", "authorids": "~Rui_Min1;~Zeyu_Qin1;~Nevin_L._Zhang1;~Li_Shen1;~Minhao_Cheng1", "gender": "M;M;M;M;M", "homepage": "https://rmin2000.github.io/;https://alan-qin.github.io/;https://sites.google.com/site/mathshenli/home;https://cmhcbb.github.io/;https://cse.hkust.edu.hk/~lzhang/teach/courses.html", "dblp": ";271/5778;91/3680-8;174/1717;https://dblp.uni-trier.de/pid/z/NevinLianwenZhang.html", "google_scholar": "https://scholar.google.com/citations?hl=en;3LXI4-MAAAAJ;yVhgENIAAAAJ;_LkC1yoAAAAJ;", "orcid": "0009-0005-8528-9783;0000-0003-1733-7892;;0000-0003-3965-4215;", "linkedin": ";zeyu-qin-546398179/;;;", "or_profile": "~Rui_Min1;~Zeyu_Qin1;~Li_Shen1;~Minhao_Cheng1;~Nevin_Zhang1", "aff": "Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;JD Explore Academy;Pennsylvania State University;Hong Kong University of Science and Technology", "aff_domain": "cse.ust.hk;ust.hk;jd.com;psu.edu;ust.hk", "position": "PhD student;PhD student;Researcher;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nmin2024uncovering,\ntitle={Uncovering, Explaining, and Mitigating the Superficial Safety of Backdoor Defense},\nauthor={Rui Min and Zeyu Qin and Nevin L. Zhang and Li Shen and Minhao Cheng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=qZFshkbWDo}\n}", "github": "", "reviewers": "JZci;jgTc;R9Lq;YFJE", "pdf_size": 2528254, "rating": "5;6;6;7", "confidence": "4;5;4;3", "soundness": "2;3;2;3", "novelty": "2;3;4;3", "presentation": "2;2;3;3", "wc_summary": "91;75;63;121", "wc_strengths": "30;101;42;56", "wc_weaknesses": "247;166;42;29", "wc_questions": "75;18;23;38", "wc_limitations": "15;8;24;8", "wc_review": "458;368;194;252", "wc_reply_reviewers": "22;0;62;15", "wc_reply_authors": "20;39;275;100", "reply_reviewers": "1;0;1;1", "reply_authors": "2;2;2;3", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 87.5, 21.742814905158898 ], "wc_strengths_avg": [ 57.25, 26.88284769141841 ], "wc_weaknesses_avg": [ 121.0, 90.28565777575085 ], "wc_questions_avg": [ 38.5, 22.321514285549714 ], "wc_limitations_avg": [ 13.75, 6.5717197140474575 ], "wc_review_avg": [ 318.0, 102.26436329435587 ], "wc_reply_reviewers_avg": [ 24.75, 22.92787604642 ], "wc_reply_authors_avg": [ 108.5, 100.56962762186205 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7413930445951664455&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 3, "email": "cse.ust.hk;ust.hk;jd.com;psu.edu;ust.hk", "author_num": 5, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Hong Kong University of Science and Technology;JD;Pennsylvania State University", "aff_unique_dep": ";JD Explore Academy;", "aff_unique_url": "https://www.ust.hk;;https://www.psu.edu", "aff_unique_abbr": "HKUST;;PSU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;2;0", "aff_country_unique": "China;;United States" }, { "title": "Kaleido Diffusion: Improving Conditional Diffusion Models with Autoregressive Latent Modeling", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93489", "id": "qZSwlcLMCS", "proceeding": "", "pdf": "https://openreview.net/pdf?id=qZSwlcLMCS", "openreview": "https://openreview.net/forum?id=qZSwlcLMCS", "poster": "", "project": "", "author_site": "Jiatao Gu, Ying Shen, Shuangfei Zhai, Yizhe Zhang, Navdeep Jaitly, Joshua Susskind", "tldr": "", "abstract": "Diffusion models have emerged as a powerful tool for generating high-quality images from textual descriptions. Despite their successes, these models often exhibit limited diversity in the sampled images, particularly when sampling with a high classifier-free guidance weight. To address this issue, we present Kaleido, a novel approach that enhances the diversity of samples by incorporating autoregressive latent priors. Kaleido integrates an autoregressive language model that encodes the original caption and generates latent variables, serving as abstract and intermediary representations for guiding and facilitating the image generation process.\nIn this paper, we explore a variety of discrete latent representations, including textual descriptions, detection bounding boxes, object blobs, and visual tokens. These representations diversify and enrich the input conditions to the diffusion models, enabling more diverse outputs.\nOur experimental results demonstrate that Kaleido effectively broadens the diversity of the generated image samples from a given textual description while maintaining high image quality. Furthermore, we show that Kaleido adheres closely to the guidance provided by the generated latent variables, demonstrating its capability to effectively control and direct the image generation process.", "keywords": "diffusion models;autoregressive models;latent variable models;text-to-image", "primary_area": "generative_models", "supplementary_material": "", "author": "Jiatao Gu;Ying Shen;Shuangfei Zhai;Yizhe Zhang;Navdeep Jaitly;Joshua M. Susskind", "authorids": "~Jiatao_Gu1;~Ying_Shen4;~Shuangfei_Zhai3;~Yizhe_Zhang2;~Navdeep_Jaitly1;~Joshua_M._Susskind1", "gender": "M;F;M;M;M;M", "homepage": "http://jiataogu.me;https://yingshen-ys.github.io/;http://cs.binghamton.edu/~szhai2;https://dreasysnail.github.io;http://www.cs.toronto.edu/~ndjaitly/;http://www.apple.com", "dblp": "164/5848.html;01/8558-6;;132/4966-2.html;04/6137;132/7797", "google_scholar": "https://scholar.google.com.sg/citations?user=cB1mFBsAAAAJ;NytpXgwAAAAJ;G6vdBYsAAAAJ;WDVMfggAAAAJ;kjMNMLkAAAAJ;Sv2TGqsAAAAJ", "orcid": ";0009-0005-9847-4507;;;;", "linkedin": "jiatao-gu-204b2672/;ying-shen-ys/;;;;joshua-susskind-8ab2ab5/", "or_profile": "~Jiatao_Gu1;~Ying_Shen4;~Shuangfei_Zhai3;~Yizhe_Zhang2;~Navdeep_Jaitly1;~Joshua_M._Susskind1", "aff": "Apple;Virginia Polytechnic Institute and State University;Apple;Apple;Apple;Apple", "aff_domain": "apple.com;vt.edu;apple.com;apple.com;apple.com;apple.com", "position": "Researcher;PhD student;Research Scientist;Researcher;Principal Researcher;Researcher", "bibtex": "@inproceedings{\ngu2024kaleido,\ntitle={Kaleido Diffusion: Improving Conditional Diffusion Models with Autoregressive Latent Modeling},\nauthor={Jiatao Gu and Ying Shen and Shuangfei Zhai and Yizhe Zhang and Navdeep Jaitly and Joshua M. Susskind},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=qZSwlcLMCS}\n}", "github": "", "reviewers": "PSab;6rHP;d7gp;7a2N", "pdf_size": 49940925, "rating": "6;6;6;7", "confidence": "4;4;4;5", "soundness": "3;3;2;3", "novelty": "3;2;3;3", "presentation": "3;3;4;3", "wc_summary": "55;64;57;99", "wc_strengths": "77;30;41;61", "wc_weaknesses": "92;116;155;323", "wc_questions": "68;41;27;27", "wc_limitations": "13;1;2;7", "wc_review": "305;252;282;517", "wc_reply_reviewers": "12;21;15;183", "wc_reply_authors": "68;90;88;248", "reply_reviewers": "1;1;1;2", "reply_authors": "3;3;3;5", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 68.75, 17.781661902083282 ], "wc_strengths_avg": [ 52.25, 18.102140757380052 ], "wc_weaknesses_avg": [ 171.5, 90.31195934094221 ], "wc_questions_avg": [ 40.75, 16.7388022271607 ], "wc_limitations_avg": [ 5.75, 4.763139720814412 ], "wc_review_avg": [ 339.0, 104.47248441575418 ], "wc_reply_reviewers_avg": [ 57.75, 72.38568573965436 ], "wc_reply_authors_avg": [ 123.5, 72.39302452584779 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.5, 0.8660254037844386 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3510179941753623213&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 5, "email": "apple.com;vt.edu;apple.com;apple.com;apple.com;apple.com", "author_num": 6, "aff_unique_index": "0;1;0;0;0;0", "aff_unique_norm": "Apple;Virginia Tech", "aff_unique_dep": "Apple Inc.;", "aff_unique_url": "https://www.apple.com;https://www.vt.edu", "aff_unique_abbr": "Apple;VT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Towards Safe Concept Transfer of Multi-Modal Diffusion via Causal Representation Editing", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93488", "id": "qaC4sSztlF", "proceeding": "", "pdf": "https://openreview.net/pdf?id=qaC4sSztlF", "openreview": "https://openreview.net/forum?id=qaC4sSztlF", "poster": "/media/PosterPDFs/NeurIPS%202024/93488.png?t=1733588896.4532156", "project": "", "author_site": "Peiran Dong, Bingjie WANG, Song Guo, Junxiao Wang, Jie ZHANG, Zicong Hong", "tldr": "", "abstract": "Recent advancements in vision-language-to-image (VL2I) diffusion generation have made significant progress. While generating images from broad vision-language inputs holds promise, it also raises concerns about potential misuse, such as copying artistic styles without permission, which could have legal and social consequences. Therefore, it's crucial to establish governance frameworks to ensure ethical and copyright integrity, especially with widely used diffusion models. To address these issues, researchers have explored various approaches, such as dataset filtering, adversarial perturbations, machine unlearning, and inference-time refusals. However, these methods often lack either scalability or effectiveness. In response, we propose a new framework called causal representation editing (CRE), which extends representation editing from large language models (LLMs) to diffusion-based models. CRE enhances the efficiency and flexibility of safe content generation by intervening at diffusion timesteps causally linked to unsafe concepts. This allows for precise removal of harmful content while preserving acceptable content quality, demonstrating superior effectiveness, precision and scalability compared to existing methods. CRE can handle complex scenarios, including incomplete or blurred representations of unsafe concepts, offering a promising solution to challenges in managing harmful content generation in diffusion-based models.", "keywords": "Diffusion based models;Safe generation;Concept Transfer;Representation editing", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/ce29af17a50f338c016ea85e35de6c8dd185e168.zip", "author": "Peiran Dong;Bingjie WANG;Song Guo;Junxiao Wang;Jie ZHANG;Zicong Hong", "authorids": "~Peiran_Dong1;~Bingjie_WANG1;~Song_Guo5;~Junxiao_Wang1;~Jie_ZHANG18;~Zicong_Hong1", "gender": "M;M;M;M;F;M", "homepage": "https://polyu.netlify.app/people/peiran-dong/;;https://cse.hkust.edu.hk/~songguo/;http://jxiao.wang/;https://cugzj.github.io/zhangjie.github.io/;https://zicongs-homepage.webflow.io/", "dblp": "243/6454;136/5474;01/267-1;;84/6889-76;", "google_scholar": "https://scholar.google.com.hk/citations?hl=zh-CN;;https://scholar.google.com/citations?hl=en;H6RsGygAAAAJ;JRCNlI8AAAAJ;", "orcid": "0000-0002-1129-9218;0000-0002-5904-2024;;0000-0001-7263-174X;0000-0002-8073-2118;", "linkedin": ";;;junxiao-wang/;;", "or_profile": "~Peiran_Dong1;~Bingjie_WANG1;~Song_Guo5;~Junxiao_Wang1;~Jie_ZHANG18;~Zicong_Hong1", "aff": "Hong Kong Polytechnic University;Hong Kong Polytechnic University;Department of Computer Science and Engineering, Hong Kong University of Science and Technology;King Abdullah University of Science and Technology;The Hong Kong Polytechnic University;Hong Kong Polytechnic University", "aff_domain": "polyu.edu.hk;polyu.edu.hk;cse.ust.hk;kaust.edu.sa;polyu.edu.hk;polyu.edu.hk", "position": "PhD student;PhD student;Full Professor;Postdoc;Postdoc;PhD student", "bibtex": "@inproceedings{\ndong2024towards,\ntitle={Towards Safe Concept Transfer of Multi-Modal Diffusion via Causal Representation Editing},\nauthor={Peiran Dong and Bingjie WANG and Song Guo and Junxiao Wang and Jie ZHANG and Zicong Hong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=qaC4sSztlF}\n}", "github": "", "reviewers": "33ja;4T2N;MP63;Qo9h", "pdf_size": 13913712, "rating": "5;5;6;7", "confidence": "3;3;3;3", "soundness": "3;3;2;4", "novelty": "3;3;2;4", "presentation": "3;3;3;4", "wc_summary": "71;79;107;125", "wc_strengths": "45;138;74;116", "wc_weaknesses": "63;140;332;174", "wc_questions": "35;68;31;3", "wc_limitations": "6;2;8;15", "wc_review": "220;427;552;433", "wc_reply_reviewers": "0;0;163;23", "wc_reply_authors": "0;0;64;0", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;2;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 95.5, 21.650635094610966 ], "wc_strengths_avg": [ 93.25, 36.120458191999724 ], "wc_weaknesses_avg": [ 177.25, 97.97799497846442 ], "wc_questions_avg": [ 34.25, 23.05834989759675 ], "wc_limitations_avg": [ 7.75, 4.710360920354193 ], "wc_review_avg": [ 408.0, 119.44245476378991 ], "wc_reply_reviewers_avg": [ 46.5, 67.913547985656 ], "wc_reply_authors_avg": [ 16.0, 27.712812921102035 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:CyyN2mmIzAAJ:scholar.google.com/&scioq=Towards+Safe+Concept+Transfer+of+Multi-Modal+Diffusion+via+Causal+Representation+Editing&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "polyu.edu.hk;polyu.edu.hk;cse.ust.hk;kaust.edu.sa;polyu.edu.hk;polyu.edu.hk", "author_num": 6, "aff_unique_index": "0;0;1;2;0;0", "aff_unique_norm": "Hong Kong Polytechnic University;Hong Kong University of Science and Technology;King Abdullah University of Science and Technology", "aff_unique_dep": ";Department of Computer Science and Engineering;", "aff_unique_url": "https://www.polyu.edu.hk;https://www.ust.hk;https://www.kast.kau.edu.sa", "aff_unique_abbr": "PolyU;HKUST;KAUST", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;1;0;0", "aff_country_unique": "China;Saudi Arabia" }, { "title": "The Evolution of Statistical Induction Heads: In-Context Learning Markov Chains", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93487", "id": "qaRT6QTIqJ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=qaRT6QTIqJ", "openreview": "https://openreview.net/forum?id=qaRT6QTIqJ", "poster": "/media/PosterPDFs/NeurIPS%202024/93487.png?t=1731717468.3155277", "project": "", "author_site": "Ezra Edelman, Nikolaos Tsilivis, Benjamin Edelman, Eran Malach, Surbhi Goel", "tldr": "", "abstract": "Large language models have the ability to generate text that mimics patterns in their inputs. We introduce a simple Markov Chain sequence modeling task in order to study how this in-context learning capability emerges. In our setting, each example is sampled from a Markov chain drawn from a prior distribution over Markov chains. Transformers trained on this task form \\emph{statistical induction heads} which compute accurate next-token probabilities given the bigram statistics of the context. During the course of training, models pass through multiple phases: after an initial stage in which predictions are uniform, they learn to sub-optimally predict using in-context single-token statistics (unigrams); then, there is a rapid phase transition to the correct in-context bigram solution. We conduct an empirical and theoretical investigation of this multi-phase process, showing how successful learning results from the interaction between the transformer's layers, and uncovering evidence that the presence of the simpler unigram solution may delay formation of the final bigram solution. We examine how learning is affected by varying the prior distribution over Markov chains, and consider the generalization of our in-context learning of Markov chains (ICL-MC) task to $n$-grams for $n > 2$.", "keywords": "Deep Learning Theory;Transformers;Induction Heads;Phase Transitions;In-context learning;Markov Chains", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Ezra Edelman;Nikolaos Tsilivis;Benjamin L. Edelman;eran malach;Surbhi Goel", "authorids": "~Ezra_Edelman1;~Nikolaos_Tsilivis1;~Benjamin_L._Edelman1;~eran_malach1;~Surbhi_Goel1", "gender": "Non-Binary;;M;F;M", "homepage": "http://ezraedelman.com;https://tsili42.github.io;;https://www.surbhigoel.com;https://www.benjaminedelman.com/", "dblp": ";312/6719;202/2566;190/7815;241/9410", "google_scholar": ";uQ83NcQAAAAJ;I15dUOwAAAAJ;https://scholar.google.co.in/citations?user=Zqz4CQoAAAAJ;mQSj2C0AAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Ezra_Edelman1;~Nikolaos_Tsilivis1;~eran_malach1;~Surbhi_Goel1;~Benjamin_L_Edelman1", "aff": "University of Pennsylvania;Toyota Technological Institute at Chicago;Harvard University;University of Pennsylvania;Harvard University", "aff_domain": "upenn.edu;ttic.edu;harvard.edu;upenn.edu;harvard.edu", "position": "PhD student;Intern;Postdoc;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nedelman2024the,\ntitle={The Evolution of Statistical Induction Heads: In-Context Learning Markov Chains},\nauthor={Ezra Edelman and Nikolaos Tsilivis and Benjamin L. Edelman and eran malach and Surbhi Goel},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=qaRT6QTIqJ}\n}", "github": "", "reviewers": "aVXq;Kgpp;1EuX;oy83", "pdf_size": 1006168, "rating": "5;5;6;6", "confidence": "4;2;3;3", "soundness": "2;3;3;3", "novelty": "2;2;2;2", "presentation": "2;3;3;3", "wc_summary": "69;58;198;79", "wc_strengths": "88;42;228;78", "wc_weaknesses": "987;40;354;124", "wc_questions": "2;42;252;86", "wc_limitations": "22;66;12;1", "wc_review": "1168;248;1044;368", "wc_reply_reviewers": "46;131;239;11", "wc_reply_authors": "536;18;608;0", "reply_reviewers": "1;1;2;1", "reply_authors": "5;2;4;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 101.0, 56.49336244197189 ], "wc_strengths_avg": [ 109.0, 70.80254232723568 ], "wc_weaknesses_avg": [ 376.25, 370.8789391432196 ], "wc_questions_avg": [ 95.5, 95.11440479759099 ], "wc_limitations_avg": [ 25.25, 24.67159297653883 ], "wc_review_avg": [ 707.0, 403.637213348819 ], "wc_reply_reviewers_avg": [ 106.75, 87.94422948664682 ], "wc_reply_authors_avg": [ 290.5, 282.7202681096635 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.0, 1.5811388300841898 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 41, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12424724348862089773&as_sdt=800005&sciodt=0,15&hl=en", "gs_version_total": 5, "email": "upenn.edu;ttic.edu;harvard.edu;upenn.edu;harvard.edu", "author_num": 5, "aff_unique_index": "0;1;2;0;2", "aff_unique_norm": "University of Pennsylvania;Toyota Technological Institute at Chicago;Harvard University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.upenn.edu;https://www.tti-chicago.org;https://www.harvard.edu", "aff_unique_abbr": "UPenn;TTI Chicago;Harvard", "aff_campus_unique_index": "1", "aff_campus_unique": ";Chicago", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Protected Test-Time Adaptation via Online Entropy Matching: A Betting Approach", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93486", "id": "qamfjyhPeg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=qamfjyhPeg", "openreview": "https://openreview.net/forum?id=qamfjyhPeg", "poster": "", "project": "", "author_site": "Yarin Bar, Shalev Shaer, Yaniv Romano", "tldr": "", "abstract": "We present a novel approach for test-time adaptation via online self-training, consisting of two components. First, we introduce a statistical framework that detects distribution shifts in the classifier's entropy values obtained on a stream of unlabeled samples. Second, we devise an online adaptation mechanism that utilizes the evidence of distribution shifts captured by the detection tool to dynamically update the classifier's parameters. The resulting adaptation process drives the distribution of test entropy values obtained from the self-trained classifier to match those of the source domain, building invariance to distribution shifts. This approach departs from the conventional self-training method, which focuses on minimizing the classifier's entropy. Our approach combines concepts in betting martingales and online learning to form a detection tool capable of quickly reacting to distribution shifts. We then reveal a tight relation between our adaptation scheme and optimal transport, which forms the basis of our novel self-supervised loss. Experimental results demonstrate that our approach improves test-time accuracy under distribution shifts while maintaining accuracy and calibration in their absence, outperforming leading entropy minimization methods across various scenarios.", "keywords": "Test Time Domain Adaptation;Online Learning;Testing by Betting;Martingale;Distribution Shift Detection", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/583f707678efb06ce3bd550bc2f6967ba99d1ce6.zip", "author": "Yarin Bar;Shalev Shaer;Yaniv Romano", "authorids": "~Yarin_Bar1;~Shalev_Shaer2;~Yaniv_Romano1", "gender": "M;M;M", "homepage": ";https://sites.google.com/view/yaniv-romano/;", "dblp": ";142/0021;", "google_scholar": "https://scholar.google.co.il/citations?user=FOxY39YAAAAJ;L_m67ywAAAAJ;", "orcid": ";;", "linkedin": ";;shalev-shaer/", "or_profile": "~Yarin_Bar1;~Yaniv_Romano1;~shalev_shaer1", "aff": "Computer Science Department, Technion-Israel Institute of Technology;Technion, Technion;Technion, Technion", "aff_domain": "cs.technion.ac.il;technion.ac.il;technion.ac.il", "position": "MS student;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nbar2024protected,\ntitle={Protected Test-Time Adaptation via Online Entropy Matching: A Betting Approach},\nauthor={Yarin Bar and Shalev Shaer and Yaniv Romano},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=qamfjyhPeg}\n}", "github": "", "reviewers": "3rQG;Utu5;Nyvv", "pdf_size": 4499633, "rating": "5;5;6", "confidence": "4;2;4", "soundness": "3;2;3", "novelty": "3;3;3", "presentation": "3;2;3", "wc_summary": "55;80;69", "wc_strengths": "37;103;59", "wc_weaknesses": "142;174;286", "wc_questions": "35;168;106", "wc_limitations": "12;15;158", "wc_review": "281;540;678", "wc_reply_reviewers": "16;440;24", "wc_reply_authors": "22;1541;22", "reply_reviewers": "1;2;1", "reply_authors": "2;4;2", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 68.0, 10.23067283548187 ], "wc_strengths_avg": [ 66.33333333333333, 27.438820836342234 ], "wc_weaknesses_avg": [ 200.66666666666666, 61.7377968148812 ], "wc_questions_avg": [ 103.0, 54.33844556726542 ], "wc_limitations_avg": [ 61.666666666666664, 68.12896267781828 ], "wc_review_avg": [ 499.6666666666667, 164.56474578583212 ], "wc_reply_reviewers_avg": [ 160.0, 198.01683430119436 ], "wc_reply_authors_avg": [ 528.3333333333334, 716.0634670815772 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 0.9428090415820634 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3962907460083915603&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "cs.technion.ac.il;technion.ac.il;technion.ac.il", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Technion-Israel Institute of Technology;Technion - Israel Institute of Technology", "aff_unique_dep": "Computer Science Department;", "aff_unique_url": "https://www.technion.ac.il;https://www.technion.ac.il/en/", "aff_unique_abbr": "Technion;Technion", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Israel" }, { "title": "IODA: Instance-Guided One-shot Domain Adaptation for Super-Resolution", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93485", "id": "qbvt3ocQxB", "proceeding": "", "pdf": "https://openreview.net/pdf?id=qbvt3ocQxB", "openreview": "https://openreview.net/forum?id=qbvt3ocQxB", "poster": "/media/PosterPDFs/NeurIPS%202024/93485.png?t=1730123715.3406224", "project": "", "author_site": "Zaizuo Tang, Yu-Bin Yang", "tldr": "", "abstract": "The domain adaptation method effectively mitigates the negative impact of domain gaps on the performance of super-resolution (SR) networks through the guidance of numerous target domain low-resolution (LR) images. However, in real-world scenarios, the availability of target domain LR images is often limited, sometimes even to just one, which inevitably impairs the domain adaptation performance of SR networks. We propose Instance-guided One-shot Domain Adaptation for Super-Resolution (IODA) to enable efficient domain adaptation with only a single unlabeled target domain LR image. To address the limited diversity of the target domain distribution caused by a single target domain LR image, we propose an instance-guided target domain distribution expansion strategy. This strategy effectively expands the diversity of the target domain distribution by generating instance-specific features focused on different instances within the image. For SR tasks emphasizing texture details, we propose an image-guided domain adaptation method. Compared to existing methods that use text representation for domain difference, this method utilizes pixel-level representation with higher granularity, enabling efficient domain adaptation guidance for SR networks. Finally, we validate the effectiveness of IODA on multiple datasets and various network architectures, achieving satisfactory one-shot domain adaptation for SR networks. Our code is available at https://github.com/ZaizuoTang/IODA.", "keywords": "one-shot domain adaptation;super resolution;domain adaptation", "primary_area": "machine_vision", "supplementary_material": "", "author": "Zaizuo Tang;Yu-Bin Yang", "authorids": "~Zaizuo_Tang1;~Yu-Bin_Yang3", "gender": "M;M", "homepage": ";https://cs.nju.edu.cn/yangyubin/", "dblp": "303/9328.html;", "google_scholar": ";https://scholar.google.com/citations?hl=en", "orcid": "0000-0003-0585-4737;", "linkedin": ";", "or_profile": "~Zaizuo_Tang1;~Yu-Bin_Yang3", "aff": "Nanjing University;Nanjing University, China", "aff_domain": "nju.edu;nju.edu.cn", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\ntang2024ioda,\ntitle={{IODA}: Instance-Guided One-shot Domain Adaptation for Super-Resolution},\nauthor={Zaizuo Tang and Yu-Bin Yang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=qbvt3ocQxB}\n}", "github": "", "reviewers": "wDY8;ohtL;G4LY;V6qe", "pdf_size": 32083366, "rating": "3;5;5;5", "confidence": "4;4;4;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "2;3;3;3", "wc_summary": "111;21;66;85", "wc_strengths": "29;11;73;47", "wc_weaknesses": "182;53;89;55", "wc_questions": "6;14;67;130", "wc_limitations": "8;7;51;53", "wc_review": "336;106;346;370", "wc_reply_reviewers": "117;0;0;0", "wc_reply_authors": "1814;58;58;58", "reply_reviewers": "1;0;0;0", "reply_authors": "6;2;2;2", "rating_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 70.75, 32.86620604815834 ], "wc_strengths_avg": [ 40.0, 22.9128784747792 ], "wc_weaknesses_avg": [ 94.75, 52.36590016413353 ], "wc_questions_avg": [ 54.25, 49.62043429878461 ], "wc_limitations_avg": [ 29.75, 22.26404051379713 ], "wc_review_avg": [ 289.5, 106.66184884953007 ], "wc_reply_reviewers_avg": [ 29.25, 50.66248612138966 ], "wc_reply_authors_avg": [ 497.0, 760.3703045227371 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.0, 1.7320508075688772 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:3EBVuTgFTOgJ:scholar.google.com/&scioq=IODA:+Instance-Guided+One-shot+Domain+Adaptation+for+Super-Resolution&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": "nju.edu;nju.edu.cn", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Nanjing University", "aff_unique_dep": "", "aff_unique_url": "https://www.nju.edu.cn", "aff_unique_abbr": "Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Tighter Convergence Bounds for Shuffled SGD via Primal-Dual Perspective", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93484", "id": "qcPlGtzwW9", "proceeding": "", "pdf": "https://openreview.net/pdf?id=qcPlGtzwW9", "openreview": "https://openreview.net/forum?id=qcPlGtzwW9", "poster": "", "project": "", "author_site": "Xufeng Cai, Cheuk Yin Lin, Jelena Diakonikolas", "tldr": "", "abstract": "Stochastic gradient descent (SGD) is perhaps the most prevalent optimization method in modern machine learning. Contrary to the empirical practice of sampling from the datasets \\emph{without replacement} and with (possible) reshuffling at each epoch, the theoretical counterpart of SGD usually relies on the assumption of \\emph{sampling with replacement}. It is only very recently that SGD using sampling without replacement -- shuffled SGD -- has been analyzed with matching upper and lower bounds. However, we observe that those bounds are too pessimistic to explain often superior empirical performance of data permutations (sampling without replacement) over vanilla counterparts (sampling with replacement) on machine learning problems. Through fine-grained analysis in the lens of primal-dual cyclic coordinate methods and the introduction of novel smoothness parameters, we present several results for shuffled SGD on smooth and non-smooth convex losses, where our novel analysis framework provides tighter convergence bounds over all popular shuffling schemes (IG, SO, and RR). Notably, our new bounds predict faster convergence than existing bounds in the literature -- by up to a factor of $O(\\sqrt{n})$, mirroring benefits from tighter convergence bounds using component smoothness parameters in randomized coordinate methods. Lastly, we numerically demonstrate on common machine learning datasets that our bounds are indeed much tighter, thus offering a bridge between theory and practice.", "keywords": "primal-dual analysis;cyclic methods;shuffled sgd", "primary_area": "optimization", "supplementary_material": "", "author": "Xufeng Cai;Cheuk Yin Lin;Jelena Diakonikolas", "authorids": "~Xufeng_Cai1;~Cheuk_Yin_Lin1;~Jelena_Diakonikolas2", "gender": ";M;F", "homepage": ";https://ericlincc.com;http://www.jelena-diakonikolas.com/", "dblp": ";285/6061;147/5178", "google_scholar": ";;J8ixfu8AAAAJ", "orcid": ";;0000-0003-3439-0310", "linkedin": ";;", "or_profile": "~Xufeng_Cai1;~Cheuk_Yin_Lin1;~Jelena_Diakonikolas2", "aff": ";Department of Computer Science, University of Wisconsin, Madison;University of Wisconsin, Madison", "aff_domain": ";cs.wisc.edu;wisc.edu", "position": ";PhD student;Assistant Professor", "bibtex": "@inproceedings{\ncai2024tighter,\ntitle={Tighter Convergence Bounds for Shuffled {SGD} via Primal-Dual Perspective},\nauthor={Xufeng Cai and Cheuk Yin Lin and Jelena Diakonikolas},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=qcPlGtzwW9}\n}", "github": "", "reviewers": "Ua2v;JY5g;EeyJ;S4vW", "pdf_size": 1100591, "rating": "5;5;6;7", "confidence": "4;4;2;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "70;108;35;165", "wc_strengths": "55;74;29;27", "wc_weaknesses": "71;220;46;80", "wc_questions": "41;228;42;247", "wc_limitations": "1;16;15;2", "wc_review": "238;646;167;521", "wc_reply_reviewers": "0;8;21;44", "wc_reply_authors": "0;167;23;218", "reply_reviewers": "0;1;1;1", "reply_authors": "1;3;2;3", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 94.5, 48.20010373432821 ], "wc_strengths_avg": [ 46.25, 19.45989465541887 ], "wc_weaknesses_avg": [ 104.25, 67.97931670736328 ], "wc_questions_avg": [ 139.5, 98.23059604827816 ], "wc_limitations_avg": [ 8.5, 7.0178344238090995 ], "wc_review_avg": [ 393.0, 197.16363762113946 ], "wc_reply_reviewers_avg": [ 18.25, 16.64894891577243 ], "wc_reply_authors_avg": [ 102.0, 92.63638593986707 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7421514571066557672&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": ";cs.wisc.edu;wisc.edu", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "University of Wisconsin-Madison;University of Wisconsin", "aff_unique_dep": "Department of Computer Science;", "aff_unique_url": "https://www.wisc.edu;https://www.wisc.edu", "aff_unique_abbr": "UW-Madison;UW", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Madison", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "GRANOLA: Adaptive Normalization for Graph Neural Networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93483", "id": "qd8blc0o0F", "proceeding": "", "pdf": "https://openreview.net/pdf?id=qd8blc0o0F", "openreview": "https://openreview.net/forum?id=qd8blc0o0F", "poster": "", "project": "", "author_site": "Moshe Eliasof, Beatrice Bevilacqua, Carola-Bibiane Sch\u00f6nlieb, Haggai Maron", "tldr": "", "abstract": "Despite the widespread adoption of Graph Neural Networks (GNNs), these models often incorporate off-the-shelf normalization layers like BatchNorm or InstanceNorm, which were not originally designed for GNNs. Consequently, these normalization layers may not effectively capture the unique characteristics of graph-structured data, potentially even weakening the expressive power of the overall architecture. \nWhile existing graph-specific normalization layers have been proposed, they often struggle to offer substantial and consistent benefits. In this paper, we propose GRANOLA, a novel graph-adaptive normalization layer. Unlike existing normalization layers, GRANOLA normalizes node features by adapting to the specific characteristics of the graph, particularly by generating expressive representations of its nodes, obtained by leveraging the propagation of Random Node Features (RNF) in the graph. We provide theoretical results that support our design choices as well as an extensive empirical evaluation demonstrating the superior performance of GRANOLA over existing normalization techniques. Furthermore, GRANOLA emerges as the top-performing method among all baselines in the same time complexity class of Message Passing Neural Networks (MPNNs).", "keywords": "Graph Neural Networks;Normalization Layer in GNNs", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Moshe Eliasof;Beatrice Bevilacqua;Carola-Bibiane Sch\u00f6nlieb;Haggai Maron", "authorids": "~Moshe_Eliasof1;~Beatrice_Bevilacqua1;~Carola-Bibiane_Sch\u00f6nlieb1;~Haggai_Maron1", "gender": "M;F;F;M", "homepage": ";http://beabevi.github.io/;http://www.damtp.cam.ac.uk/research/cia/;https://haggaim.github.io/", "dblp": "239/6004;275/2364;07/8184;181/6629", "google_scholar": "44LKqBsAAAAJ;;nPeOXjwAAAAJ;https://scholar.google.co.il/citations?user=4v8uJrIAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Moshe_Eliasof1;~Beatrice_Bevilacqua1;~Carola-Bibiane_Sch\u00f6nlieb1;~Haggai_Maron1", "aff": "University of Cambridge;Purdue University;University of Cambridge;NVIDIA", "aff_domain": "cam.ac.uk;purdue.edu;cam.ac.uk;nvidia.com", "position": "Postdoc;PhD student;Full Professor;Research Scientist", "bibtex": "@inproceedings{\neliasof2024granola,\ntitle={{GRANOLA}: Adaptive Normalization for Graph Neural Networks},\nauthor={Moshe Eliasof and Beatrice Bevilacqua and Carola-Bibiane Sch{\\\"o}nlieb and Haggai Maron},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=qd8blc0o0F}\n}", "github": "", "reviewers": "vndL;PwFk;tP2u;Ao3Y", "pdf_size": 754384, "rating": "5;6;6;6", "confidence": "3;3;3;5", "soundness": "3;3;3;3", "novelty": "3;3;3;2", "presentation": "2;3;3;3", "wc_summary": "68;97;152;185", "wc_strengths": "26;97;107;177", "wc_weaknesses": "89;66;94;568", "wc_questions": "69;2;89;38", "wc_limitations": "6;1;1;36", "wc_review": "258;263;443;1004", "wc_reply_reviewers": "0;11;12;351", "wc_reply_authors": "0;0;0;735", "reply_reviewers": "0;1;1;2", "reply_authors": "1;1;1;3", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 125.5, 45.71925196238451 ], "wc_strengths_avg": [ 101.75, 53.50408862881415 ], "wc_weaknesses_avg": [ 204.25, 210.27645493492608 ], "wc_questions_avg": [ 49.5, 32.89756829919196 ], "wc_limitations_avg": [ 11.0, 14.577379737113251 ], "wc_review_avg": [ 492.0, 304.85324338113907 ], "wc_reply_reviewers_avg": [ 93.5, 148.74222668764912 ], "wc_reply_authors_avg": [ 183.75, 318.2643358907812 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3761735245065166453&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "cam.ac.uk;purdue.edu;cam.ac.uk;nvidia.com", "author_num": 4, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "University of Cambridge;Purdue University;NVIDIA", "aff_unique_dep": ";;NVIDIA Corporation", "aff_unique_url": "https://www.cam.ac.uk;https://www.purdue.edu;https://www.nvidia.com", "aff_unique_abbr": "Cambridge;Purdue;NVIDIA", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "Universal Sample Coding", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93482", "id": "qdV1vp1AtL", "proceeding": "", "pdf": "https://openreview.net/pdf?id=qdV1vp1AtL", "openreview": "https://openreview.net/forum?id=qdV1vp1AtL", "poster": "/media/PosterPDFs/NeurIPS%202024/93482.png?t=1733758455.845679", "project": "", "author_site": "Szymon Kobus, Tze-Yang Tung, Deniz Gunduz", "tldr": "", "abstract": "In this work, we study the problem of communicating multiple samples from an unknown probability distribution using as few bits as possible. This is a generalization of the channel simulation problem, which has recently found applications and achieved state of the art results in realistic image compression, neural network compression, and communication-efficient federated learning. In this problem, the transmitter wants the receiver to generate multiple independent and identically distributed (i.i.d.) samples from a target distribution $P$, while the transmitter and the receiver have access to independent samples from a reference distribution $Q$. The core idea is to employ channel simulation in multiple rounds while updating the reference distribution $Q$ after each round in order to reduce the KL-divergence between $P$ and $Q$, thereby reducing the communication cost in subsequent rounds. We derive a lower bound on the expected communication cost and construct a practical algorithm that achieves the lower bound up to a multiplicative constant. We then employ this algorithm in communication-efficient federated learning, in which model updates correspond to samples from a distribution, and achieve a 37% reduction in the communication load. To further highlight the potential of sample communication for generative models, we show that the number of bits needed to communicate samples from a large language model can be reduced by up to 16 times, compared to entropy-based data compression.", "keywords": "source coding;compression;sampling;channel simulation;federated learning;generative AI", "primary_area": "other", "supplementary_material": "", "author": "Szymon Kobus;Tze-Yang Tung;Deniz Gunduz", "authorids": "~Szymon_Kobus1;~Tze-Yang_Tung1;~Deniz_Gunduz1", "gender": ";M;", "homepage": ";https://tze-yang-tung.github.io/;https://www.imperial.ac.uk/information-processing-and-communications-lab", "dblp": ";;05/6552", "google_scholar": "ztxUZhoAAAAJ;gcoILEcAAAAJ;https://scholar.google.co.uk/citations?user=MbmKROkAAAAJ", "orcid": ";;0000-0002-7725-395X", "linkedin": ";;deniz-gunduz-33b2382/", "or_profile": "~Szymon_Kobus1;~Tze-Yang_Tung1;~Deniz_Gunduz1", "aff": "Imperial College London;;Imperial College London", "aff_domain": "imperial.ac.uk;;imperial.ac.uk", "position": "PhD student;;Full Professor", "bibtex": "@inproceedings{\nkobus2024universal,\ntitle={Universal Sample Coding},\nauthor={Szymon Kobus and Tze-Yang Tung and Deniz Gunduz},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=qdV1vp1AtL}\n}", "github": "", "reviewers": "YZzm;RwEb;Jywa;NNh9", "pdf_size": 1364942, "rating": "5;5;7;7", "confidence": "4;2;5;4", "soundness": "2;3;3;3", "novelty": "3;3;3;4", "presentation": "3;2;3;3", "wc_summary": "125;148;123;67", "wc_strengths": "20;21;93;109", "wc_weaknesses": "43;144;947;147", "wc_questions": "45;6;150;75", "wc_limitations": "29;1;15;2", "wc_review": "262;320;1328;400", "wc_reply_reviewers": "20;147;17;216", "wc_reply_authors": "0;69;0;109", "reply_reviewers": "1;2;1;2", "reply_authors": "1;2;1;2", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 115.75, 29.810862114336782 ], "wc_strengths_avg": [ 60.75, 40.64710936831794 ], "wc_weaknesses_avg": [ 320.25, 364.2673297181618 ], "wc_questions_avg": [ 69.0, 52.77783625727754 ], "wc_limitations_avg": [ 11.75, 11.388041973930374 ], "wc_review_avg": [ 577.5, 436.0627821770622 ], "wc_reply_reviewers_avg": [ 100.0, 85.0793747038611 ], "wc_reply_authors_avg": [ 44.5, 46.69314724882014 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.6882472016116854, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13021044268763759359&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": "imperial.ac.uk;;imperial.ac.uk", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Imperial College London", "aff_unique_dep": "", "aff_unique_url": "https://www.imperial.ac.uk", "aff_unique_abbr": "ICL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "title": "Task Me Anything", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97494", "id": "qeLh17biCr", "proceeding": "", "pdf": "https://openreview.net/pdf?id=qeLh17biCr", "openreview": "https://openreview.net/forum?id=qeLh17biCr", "poster": "", "project": "", "author_site": "Jieyu Zhang, Weikai Huang, Zixian Ma, Oscar Michel, Dong He, Tanmay Gupta, Wei-Chiu Ma, Ali Farhadi, Aniruddha Kembhavi, Ranjay Krishna", "tldr": "", "abstract": "Benchmarks for large multimodal language models (MLMs) now serve to simultaneously assess the general capabilities of models instead of evaluating for a specific capability. As a result, when a developer wants to identify which models to use for their application, they are overwhelmed by the number of benchmarks and remain uncertain about which benchmark's results are most reflective of their specific use case. This paper introduces Task-Me-Anything, a benchmark generation engine which produces a benchmark tailored to a user's needs. Task-Me-Anything maintains an extendable taxonomy of visual assets and can programmatically generate a vast number of task instances. Additionally, it algorithmically addresses user queries regarding MLM performance efficiently within a computational budget. It contains 113K images, 10K videos, 2K 3D object assets, over 365 object categories, 655 attributes, and 335 relationships. It can generate 500M image/video question-answering pairs, which focus on evaluating MLM perceptual capabilities. Task-Me-Anything reveals critical insights: open-source MLMs excel in object and attribute recognition but lack spatial and temporal understanding; each model exhibits unique strengths and weaknesses; larger models generally perform better, though exceptions exist; and GPT4O demonstrates challenges in recognizing rotating/moving objects and distinguishing colors.", "keywords": "large multimodal language models;model evaluation;benchmark generation;task generation;data-centric AI", "primary_area": "", "supplementary_material": "/attachment/237b76bb379d260d1b1c50e4d23ec6bcc4f5550e.pdf", "author": "Jieyu Zhang;Weikai Huang;Zixian Ma;Oscar Michel;Dong He;Tanmay Gupta;Wei-Chiu Ma;Ali Farhadi;Aniruddha Kembhavi;Ranjay Krishna", "authorids": "~Jieyu_Zhang1;~Weikai_Huang1;~Zixian_Ma1;~Oscar_Michel1;~Dong_He1;~Tanmay_Gupta1;~Wei-Chiu_Ma1;~Ali_Farhadi3;~Aniruddha_Kembhavi1;~Ranjay_Krishna1", "gender": "M;M;F;M;Not Specified;M;M;M;M;M", "homepage": "https://jieyuz2.github.io/;https://weikaih04.github.io/;https://zixianma.github.io/;;https://dongheuw.github.io/;http://tanmaygupta.info/;https://www.cs.cornell.edu/~weichiu/;https://homes.cs.washington.edu/~ali/;https://anikem.github.io/;http://ranjaykrishna.com", "dblp": ";;311/3682;308/2324;;62/1086;151/4277;37/5826;81/7583;167/3785", "google_scholar": "T_INUHUAAAAJ;myeLQPEAAAAJ;0E-IY2IAAAAJ;D0WvX4YAAAAJ;2FI_ec4AAAAJ;https://scholar.google.co.in/citations?user=zblQKM8AAAAJ;SVIdh6AAAAAJ;jeOFRDsAAAAJ;JnUevM0AAAAJ;IcqahyAAAAAJ", "orcid": "0000-0002-1846-2436;;;;;;;;;0000-0001-8784-2531", "linkedin": "jieyu-zhang-3baaa8154/;;zixian-ma/;;dongheuw/;;;;;ranjay-krishna-1a344444/", "or_profile": "~Jieyu_Zhang1;~Weikai_Huang1;~Zixian_Ma1;~Oscar_Michel1;~Dong_He1;~Tanmay_Gupta1;~Wei-Chiu_Ma1;~Ali_Farhadi3;~Aniruddha_Kembhavi1;~Ranjay_Krishna1", "aff": "University of Washington;University of Washington;Department of Computer Science, University of Washington;Allen Institute for Artificial Intelligence;University of Washington;Allen Institute for Artificial Intelligence;Allen Institute for Artificial Intelligence;University of Washington;Allen Institute for Artificial Intelligence;University of Washington", "aff_domain": "cs.washington.edu;uw.edu;cs.washington.edu;allenai.org;cs.washington.edu;allenai.org;allenai.org;cs.uw.edu;allenai.org;cs.washington.edu", "position": "PhD student;Undergrad student;PhD student;Researcher;PhD student;Research Scientist;Postdoc;Full Professor;Research Manager;Assistant Professor", "bibtex": "@inproceedings{\nzhang2024task,\ntitle={Task Me Anything},\nauthor={Jieyu Zhang and Weikai Huang and Zixian Ma and Oscar Michel and Dong He and Tanmay Gupta and Wei-Chiu Ma and Ali Farhadi and Aniruddha Kembhavi and Ranjay Krishna},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=qeLh17biCr}\n}", "github": "", "reviewers": "jor3;PNK4;d4Ru", "pdf_size": 13222591, "rating": "6;6;8", "confidence": "3;4;4", "wc_summary_and_contributions": "78;41;60", "wc_strengths": "5;60;32", "wc_improvement": "5;128;162", "wc_limitations": "27;5;144", "wc_correctness": "1;16;28", "wc_clarity": "1;5;28", "wc_relation_to_prior_work": "9;4;29", "wc_documentation": "5;4;21", "wc_additional_feedback": "1;1;1", "wc_review": "132;264;505", "wc_reply_reviewers": "0;24;9", "wc_reply_authors": "32;65;0", "reply_reviewers": "0;1;1", "reply_authors": "4;3;1", "rating_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 59.666666666666664, 15.107025591499546 ], "wc_strengths_avg": [ 32.333333333333336, 22.45489305746572 ], "wc_improvement_avg": [ 98.33333333333333, 67.44050876307371 ], "wc_limitations_avg": [ 58.666666666666664, 61.00455356410336 ], "wc_correctness_avg": [ 15.0, 11.045361017187261 ], "wc_clarity_avg": [ 11.333333333333334, 11.897712198383164 ], "wc_relation_to_prior_work_avg": [ 14.0, 10.801234497346433 ], "wc_documentation_avg": [ 10.0, 7.788880963698615 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 300.3333333333333, 154.4286962394691 ], "wc_reply_reviewers_avg": [ 11.0, 9.899494936611665 ], "wc_reply_authors_avg": [ 32.333333333333336, 26.537185649993525 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 1.247219128924647 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.49999999999999983, "gs_citation": 50, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=800082202156047141&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "cs.washington.edu;uw.edu;cs.washington.edu;allenai.org;cs.washington.edu;allenai.org;allenai.org;cs.uw.edu;allenai.org;cs.washington.edu", "author_num": 10, "aff_unique_index": "0;0;0;1;0;1;1;0;1;0", "aff_unique_norm": "University of Washington;Allen Institute for Artificial Intelligence", "aff_unique_dep": ";", "aff_unique_url": "https://www.washington.edu;https://allenai.org", "aff_unique_abbr": "UW;AI2", "aff_campus_unique_index": "1", "aff_campus_unique": ";Seattle", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "MAGIS: LLM-Based Multi-Agent Framework for GitHub Issue Resolution", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93481", "id": "qevq3FZ63J", "proceeding": "", "pdf": "https://openreview.net/pdf?id=qevq3FZ63J", "openreview": "https://openreview.net/forum?id=qevq3FZ63J", "poster": "/media/PosterPDFs/NeurIPS%202024/93481.png?t=1731650650.4243968", "project": "", "author_site": "Wei Tao, Yucheng Zhou, Yanlin Wang, Wenqiang Zhang, Hongyu Zhang, Yu Cheng", "tldr": "", "abstract": "In software development, resolving the emergent issues within GitHub repositories is a complex challenge that involves not only the incorporation of new code but also the maintenance of existing code.\nLarge Language Models (LLMs) have shown promise in code generation but face difficulties in resolving Github issues, particularly at the repository level. \nTo overcome this challenge, we empirically study the reason why LLMs fail to resolve GitHub issues and analyze the major factors. \nMotivated by the empirical findings, we propose a novel LLM-based **M**ulti-**A**gent framework for **G**itHub **I**ssue re**S**olution, **MAGIS**, consisting of four agents customized for software evolution: Manager, Repository Custodian, Developer, and Quality Assurance Engineer agents. \nThis framework leverages the collaboration of various agents in the planning and coding process to unlock the potential of LLMs to resolve GitHub issues. \nIn experiments, we employ the SWE-bench benchmark to compare MAGIS with popular LLMs, including GPT-3.5, GPT-4, and Claude-2. \nMAGIS can resolve **13.94%** GitHub issues, significantly outperforming the baselines.\nSpecifically, MAGIS achieves an eight-fold increase in resolved ratio over the direct application of GPT-4, the advanced LLM.", "keywords": "Code Change;LLM Agent;Software Evolution", "primary_area": "other", "supplementary_material": "", "author": "Wei Tao;Yucheng Zhou;Yanlin Wang;Wenqiang Zhang;Hongyu Zhang;Yu Cheng", "authorids": "~Wei_Tao4;~Yucheng_Zhou1;~Yanlin_Wang1;~Wenqiang_Zhang1;~Hongyu_Zhang1;~Yu_Cheng1", "gender": "M;M;F;M;M;M", "homepage": "https://itaowe.com;https://yczhou001.github.io/;https://yanlin.info/;https://www.fudanroilab.com/2021/07/01/WenqiangZhang.html;https://sites.google.com/site/hongyujohn;https://ych133.github.io", "dblp": "17/6159-3;306/0531;41/3458-1.html;;29/2726-2;96/3060-1.html", "google_scholar": "3RG4Lf4AAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=zh-CN;vL-VEJYAAAAJ;https://scholar.google.com.au/citations?user=zsUN6PkAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-1800-1904;0009-0006-9883-5621;;0000-0002-3339-8751;0000-0002-3063-9425;", "linkedin": ";yucheng-zhou-1947941a4/;;;;chengyu05/", "or_profile": "~Wei_Tao4;~Yucheng_Zhou1;~Yanlin_Wang1;~Wenqiang_Zhang1;~Hongyu_Zhang1;~Yu_Cheng1", "aff": "Fudan University;University of Macau;Sun Yat-Sen University;Fudan University;Chongqing University;The Chinese University of Hong Kong", "aff_domain": "fudan.edu.cn;umac.mo;mail.sysu.edu.cn;fudan.edu.cn;cqu.edu.cn;cuhk.edu.hk", "position": "PhD student;PhD student;Assistant Professor;Full Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\ntao2024magis,\ntitle={{MAGIS}: {LLM}-Based Multi-Agent Framework for GitHub Issue Resolution},\nauthor={Wei Tao and Yucheng Zhou and Yanlin Wang and Wenqiang Zhang and Hongyu Zhang and Yu Cheng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=qevq3FZ63J}\n}", "github": "", "reviewers": "1wgB;L511;gcvU;15zT", "pdf_size": 5896353, "rating": "4;6;6;7", "confidence": "5;3;4;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;4;3", "wc_summary": "67;88;141;48", "wc_strengths": "95;62;206;34", "wc_weaknesses": "253;57;121;26", "wc_questions": "5;139;197;55", "wc_limitations": "11;7;95;1", "wc_review": "431;353;760;164", "wc_reply_reviewers": "161;93;0;14", "wc_reply_authors": "676;122;0;0", "reply_reviewers": "2;1;0;1", "reply_authors": "3;2;1;1", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 86.0, 34.763486591537394 ], "wc_strengths_avg": [ 99.25, 65.30457487802826 ], "wc_weaknesses_avg": [ 114.25, 87.12455164877464 ], "wc_questions_avg": [ 99.0, 74.1215218408257 ], "wc_limitations_avg": [ 28.5, 38.55839726959615 ], "wc_review_avg": [ 427.0, 215.37757543439847 ], "wc_reply_reviewers_avg": [ 67.0, 64.82669203345178 ], "wc_reply_authors_avg": [ 199.5, 279.57959510665296 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.6488856845230502, "gs_citation": 39, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6092117101437278700&as_sdt=5,30&sciodt=0,30&hl=en", "gs_version_total": 5, "email": "fudan.edu.cn;umac.mo;mail.sysu.edu.cn;fudan.edu.cn;cqu.edu.cn;cuhk.edu.hk", "author_num": 6, "aff_unique_index": "0;1;2;0;3;4", "aff_unique_norm": "Fudan University;University of Macau;Sun Yat-sen University;Chongqing University;Chinese University of Hong Kong", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.fudan.edu.cn;https://www.um.edu.mo;http://www.sysu.edu.cn/;https://www.cqu.edu.cn;https://www.cuhk.edu.hk", "aff_unique_abbr": "Fudan;UM;SYSU;CQU;CUHK", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Macau SAR;Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "einspace: Searching for Neural Architectures from Fundamental Operations", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93480", "id": "qf1ncViBr5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=qf1ncViBr5", "openreview": "https://openreview.net/forum?id=qf1ncViBr5", "poster": "/media/PosterPDFs/NeurIPS%202024/93480.png?t=1733137243.3923385", "project": "", "author_site": "Linus Ericsson, Miguel Espinosa Minano, Chenhongyi Yang, Antreas Antoniou, Amos Storkey, Shay Cohen, Steven McDonagh, Elliot Crowley", "tldr": "", "abstract": "Neural architecture search (NAS) finds high performing networks for a given task. Yet the results of NAS are fairly prosaic; they did not e.g. create a shift from convolutional structures to transformers. This is not least because the search spaces in NAS often aren\u2019t diverse enough to include such transformations *a priori*. Instead, for NAS to provide greater potential for fundamental design shifts, we need a novel expressive search space design which is built from more fundamental operations. To this end, we introduce `einspace`, a search space based on a parameterised probabilistic context-free grammar. Our space is versatile, supporting architectures of various sizes and complexities, while also containing diverse network operations which allow it to model convolutions, attention components and more. It contains many existing competitive architectures, and provides flexibility for discovering new ones. Using this search space, we perform experiments to find novel architectures as well as improvements on existing ones on the diverse Unseen NAS datasets. We show that competitive architectures can be obtained by searching from scratch, and we consistently find large improvements when initialising the search with strong baselines. We believe that this work is an important advancement towards a transformative NAS paradigm where search space expressivity and strategic search initialisation play key roles.", "keywords": "neural architecture search;nas;deep learning architectures;context-free grammars;cfg;pcfg;neural networks;search space", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/9eb285992f675cdf9ec119c8dffc3b8c722eeed7.zip", "author": "Linus Ericsson;Miguel Espinosa;Chenhongyi Yang;Antreas Antoniou;Amos Storkey;Shay B Cohen;Steven McDonagh;Elliot J. Crowley", "authorids": "~Linus_Ericsson1;~Miguel_Espinosa1;~Chenhongyi_Yang3;~Antreas_Antoniou3;~Amos_Storkey1;~Shay_B_Cohen1;~Steven_McDonagh1;~Elliot_J._Crowley1", "gender": "M;;M;Not Specified;M;;M;M", "homepage": "https://linusericsson.github.io/;https://miquel-espinosa.github.io;https://chenhongyiyang.com;http://homepages.inf.ed.ac.uk/amos/;http://homepages.inf.ed.ac.uk/scohen;https://smcdonagh.github.io/;https://elliotjcrowley.github.io;https://antreasantoniou.github.io/", "dblp": "150/5139;;255/5498;;04/5629;159/2641;157/3601;", "google_scholar": "QRW9NN0AAAAJ;0gNqD9IAAAAJ;5i2hUToAAAAJ;;;https://scholar.google.co.uk/citations?user=k8-q2AoAAAAJ;https://scholar.google.co.uk/citations?user=RyKtqiQAAAAJ;", "orcid": ";;;;0000-0003-4753-8353;0000-0001-7025-5197;;", "linkedin": ";miguel-espinosa-7a1a34179/;chenhongyi-yang-9914571a1;;;;;", "or_profile": "~Linus_Ericsson1;~Miguel_Espinosa1;~Chenhongyi_Yang3;~Amos_Storkey1;~Shay_B_Cohen1;~Steven_McDonagh1;~Elliot_J._Crowley1;~Antreas_Antoniou2", "aff": "University of Edinburgh, University of Edinburgh;University of Edinburgh, University of Edinburgh;University of Edinburgh, University of Edinburgh;University of Edinburgh;University of Edinburgh;University of Edinburgh;University of Edinburgh;", "aff_domain": "ed.ac.uk;ed.ac.uk;ed.ac.uk;ed.ac.uk;ed.ac.uk;ed.ac.uk;ed.ac.uk;", "position": "Postdoc;PhD student;PhD student;Full Professor;Reader;Associate Professor;Assistant Professor;", "bibtex": "@inproceedings{\nericsson2024einspace,\ntitle={einspace: Searching for Neural Architectures from Fundamental Operations},\nauthor={Linus Ericsson and Miguel Espinosa and Chenhongyi Yang and Antreas Antoniou and Amos Storkey and Shay B Cohen and Steven McDonagh and Elliot J. Crowley},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=qf1ncViBr5}\n}", "github": "", "reviewers": "c4rx;sBZH;s1qw;ZVSn;wLBZ;duvy", "pdf_size": 1118074, "rating": "4;4;5;6;6;6", "confidence": "4;3;4;3;4;5", "soundness": "3;2;3;3;4;2", "novelty": "2;2;2;3;3;3", "presentation": "3;2;3;3;3;4", "wc_summary": "72;49;98;87;87;53", "wc_strengths": "82;62;113;119;49;191", "wc_weaknesses": "124;230;135;154;234;367", "wc_questions": "289;1;143;79;2;35", "wc_limitations": "55;1;9;27;20;7", "wc_review": "622;343;498;466;392;653", "wc_reply_reviewers": "44;27;22;0;51;59", "wc_reply_authors": "98;40;26;45;35;61", "reply_reviewers": "1;1;1;0;1;1", "reply_authors": "2;2;2;2;2;2", "rating_avg": [ 5.166666666666667, 0.8975274678557507 ], "confidence_avg": [ 3.8333333333333335, 0.6871842709362768 ], "soundness_avg": [ 2.8333333333333335, 0.6871842709362768 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.5773502691896257 ], "wc_summary_avg": [ 74.33333333333333, 18.18118685772619 ], "wc_strengths_avg": [ 102.66666666666667, 46.79268698798516 ], "wc_weaknesses_avg": [ 207.33333333333334, 83.40596834493054 ], "wc_questions_avg": [ 91.5, 101.00123761617974 ], "wc_limitations_avg": [ 19.833333333333332, 17.91104562987755 ], "wc_review_avg": [ 495.6666666666667, 112.28930888062713 ], "wc_reply_reviewers_avg": [ 33.833333333333336, 19.84453465202839 ], "wc_reply_authors_avg": [ 50.833333333333336, 23.617907518566405 ], "reply_reviewers_avg": [ 0.8333333333333334, 0.37267799624996495 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.31526414437773154, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5643682321885167597&as_sdt=20000005&sciodt=0,21&hl=en", "gs_version_total": 6, "email": "ed.ac.uk;ed.ac.uk;ed.ac.uk;ed.ac.uk;ed.ac.uk;ed.ac.uk;ed.ac.uk;", "author_num": 8, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "University of Edinburgh", "aff_unique_dep": "", "aff_unique_url": "https://www.ed.ac.uk", "aff_unique_abbr": "Edinburgh", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Reinforcement Learning Under Latent Dynamics: Toward Statistical and Algorithmic Modularity", "status": "Oral", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93479", "id": "qf2uZAdy1N", "proceeding": "", "pdf": "https://openreview.net/pdf?id=qf2uZAdy1N", "openreview": "https://openreview.net/forum?id=qf2uZAdy1N", "poster": "", "project": "", "author_site": "Philip Amortila, Dylan J Foster, Nan Jiang, Akshay Krishnamurthy, Zak Mhammedi", "tldr": "", "abstract": "Real-world applications of reinforcement learning often involve environments where agents operate on complex, high-dimensional observations, but the underlying (``latent'') dynamics are comparatively simple. However, beyond restrictive settings\n such as tabular latent dynamics, the fundamental statistical requirements and algorithmic principles for *reinforcement learning under latent dynamics* are poorly\n understood.\n\n This paper addresses the question of reinforcement learning under *general latent dynamics* from a\n statistical and algorithmic perspective. On the statistical side, our main negative\nresult shows that *most* well-studied settings for reinforcement learning with function approximation become intractable when composed with rich observations; we complement this with a positive result, identifying *latent pushforward coverability* as a\ngeneral condition that enables statistical tractability. Algorithmically, we develop provably efficient *observable-to-latent* reductions ---that is, reductions that transform an arbitrary algorithm for the\n latent MDP into an algorithm that can operate on rich observations--- in two settings: one where the agent has access to hindsight\nobservations of the latent dynamics (Lee et al., 2023) and one\nwhere the agent can estimate *self-predictive* latent models (Schwarzer et al., 2020). Together, our results serve as a\n first step toward a unified statistical and algorithmic theory for\nreinforcement learning under latent dynamics.", "keywords": "Reinforcement Learning;Representation Learning;Latent Dynamics;Function Approximation", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Philip Amortila;Dylan J Foster;Nan Jiang;Akshay Krishnamurthy;Zakaria Mhammedi", "authorids": "~Philip_Amortila1;~Dylan_J_Foster1;~Nan_Jiang2;~Akshay_Krishnamurthy1;~Zakaria_Mhammedi1", "gender": "M;;M;M;M", "homepage": "https://www.philipamortila.com;http://dylanfoster.net;http://nanjiang.cs.illinois.edu;https://www.cics.umass.edu/~akshay/;", "dblp": "222/2989;167/4271;06/4489-8;85/8024;192/1360", "google_scholar": "NZQkB8sAAAAJ;RqwU8xsAAAAJ;nUlanA8AAAAJ;https://scholar.google.com.tw/citations?user=K0kaNvkAAAAJ;", "orcid": ";;;;", "linkedin": ";;nan-jiang-28139937/;;", "or_profile": "~Philip_Amortila1;~Dylan_J_Foster1;~Nan_Jiang2;~Akshay_Krishnamurthy1;~Zakaria_Mhammedi1", "aff": "University of Illinois, Urbana Champaign;Microsoft Research;University of Illinois, Urbana Champaign;Microsoft Research;Research, Google", "aff_domain": "illinois.edu;microsoft.com;illinois.edu;research.microsoft.com;research.google.com", "position": "PhD student;Principal Researcher;Assistant Professor;Principal Researcher;Researcher", "bibtex": "@inproceedings{\namortila2024reinforcement,\ntitle={Reinforcement Learning Under Latent Dynamics: Toward Statistical and Algorithmic Modularity},\nauthor={Philip Amortila and Dylan J Foster and Nan Jiang and Akshay Krishnamurthy and Zakaria Mhammedi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=qf2uZAdy1N}\n}", "github": "", "reviewers": "qRgr;669T;U7JX", "pdf_size": 830853, "rating": "6;7;7", "confidence": "3;1;3", "soundness": "4;3;4", "novelty": "4;3;3", "presentation": "2;3;3", "wc_summary": "174;112;168", "wc_strengths": "51;103;84", "wc_weaknesses": "62;112;82", "wc_questions": "99;8;19", "wc_limitations": "9;25;15", "wc_review": "395;360;368", "wc_reply_reviewers": "19;18;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;0", "reply_authors": "1;1;1", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 2.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 151.33333333333334, 27.920522121829233 ], "wc_strengths_avg": [ 79.33333333333333, 21.483844059096022 ], "wc_weaknesses_avg": [ 85.33333333333333, 20.548046676563256 ], "wc_questions_avg": [ 42.0, 40.554490092549145 ], "wc_limitations_avg": [ 16.333333333333332, 6.599663291074443 ], "wc_review_avg": [ 374.3333333333333, 14.974051630144134 ], "wc_reply_reviewers_avg": [ 12.333333333333334, 8.73053390247253 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14528532951204878432&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "illinois.edu;microsoft.com;illinois.edu;research.microsoft.com;research.google.com", "author_num": 5, "aff_unique_index": "0;1;0;1;2", "aff_unique_norm": "University of Illinois Urbana-Champaign;Microsoft;Google", "aff_unique_dep": ";Microsoft Research;Google Research", "aff_unique_url": "https://illinois.edu;https://www.microsoft.com/en-us/research;https://research.google", "aff_unique_abbr": "UIUC;MSR;Google", "aff_campus_unique_index": "0;0;2", "aff_campus_unique": "Urbana-Champaign;;Mountain View", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Entity Alignment with Noisy Annotations from Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93478", "id": "qfCQ54ZTX1", "proceeding": "", "pdf": "https://openreview.net/pdf?id=qfCQ54ZTX1", "openreview": "https://openreview.net/forum?id=qfCQ54ZTX1", "poster": "", "project": "", "author_site": "CHEN SHENGYUAN, Qinggang Zhang, Junnan Dong, Wen Hua, Qing Li, Xiao Huang", "tldr": "", "abstract": "Entity alignment (EA) aims to merge two knowledge graphs (KGs) by identifying equivalent entity pairs. While existing methods heavily rely on human-generated labels, it is prohibitively expensive to incorporate cross-domain experts for annotation in real-world scenarios. The advent of Large Language Models (LLMs) presents new avenues for automating EA with annotations, inspired by their comprehensive capability to process semantic information. However, it is nontrivial to directly apply LLMs for EA since the annotation space in real-world KGs is large. LLMs could also generate noisy labels that may mislead the alignment. To this end, we propose a unified framework, LLM4EA, to effectively leverage LLMs for EA. Specifically, we design a novel active learning policy to significantly reduce the annotation space by prioritizing the most valuable entities based on the entire inter-KG and intra-KG structure. Moreover, we introduce an unsupervised label refiner to continuously enhance label accuracy through in-depth probabilistic reasoning. We iteratively optimize the policy based on the feedback from a base EA model. Extensive experiments demonstrate the advantages of LLM4EA on four benchmark datasets in terms of effectiveness, robustness, and efficiency.", "keywords": "Entity alignment;Large language models;probabilistic reasoning;active learning", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Shengyuan Chen;Qinggang Zhang;Junnan Dong;Wen Hua;Qing Li;Xiao Huang", "authorids": "~Shengyuan_Chen3;~Qinggang_Zhang2;~Junnan_Dong1;~Wen_Hua1;~Qing_Li5;~Xiao_Huang1", "gender": "M;M;F;M;M;M", "homepage": "https://qing145.github.io/;https://junnandong.github.io;https://web.comp.polyu.edu.hk/wenhua/;https://www4.comp.polyu.edu.hk/~csqli/;https://www4.comp.polyu.edu.hk/~xiaohuang/;https://chensycn.github.io/", "dblp": "17/8559;322/5787;68/11145;(2024-11-14-1812689);25/692-1.html;", "google_scholar": "eF8PATI7r3IC;https://scholar.google.com.hk/citations?user=m7_ceMcAAAAJ;_UbTxnYAAAAJ;https://scholar.google.co.in/citations?user=D1LEg-YAAAAJ;Be21PkYAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-1536-6529;0000-0003-2117-6083;0000-0001-5456-7035;0000-0003-3370-471X;0000-0002-3867-900X;0000-0001-6300-711X", "linkedin": ";;;;;shengyuan-chen-cn", "or_profile": "~Qinggang_Zhang2;~Junnan_Dong1;~Wen_Hua1;~Qing_Li5;~Xiao_Huang1;~CHEN_SHENGYUAN1", "aff": "The Hong Kong Polytechnic University, Hong Kong Polytechnic University;The Hong Kong Polytechnic University;Hong Kong Polytechnic University;Hong Kong Polytechnic University;The Hong Kong Polytechnic University;Hong Kong Polytechnic University", "aff_domain": "comp.polyu.edu.hk;connect.polyu.hk;polyu.edu.hk;polyu.edu.hk;polyu.edu.hk;polyu.edu.hk", "position": "PhD student;PhD student;Associate Professor;Full Professor;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nchen2024entity,\ntitle={Entity Alignment with Noisy Annotations from Large Language Models},\nauthor={Shengyuan Chen and Qinggang Zhang and Junnan Dong and Wen Hua and Qing Li and Xiao Huang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=qfCQ54ZTX1}\n}", "github": "", "reviewers": "SZjY;GZQy;n1nd;GwVz", "pdf_size": 553725, "rating": "4;6;7;7", "confidence": "5;3;5;2", "soundness": "2;3;2;3", "novelty": "1;3;1;3", "presentation": "3;2;2;3", "wc_summary": "37;94;50;133", "wc_strengths": "37;22;30;63", "wc_weaknesses": "163;31;460;82", "wc_questions": "3;1;2;4", "wc_limitations": "1;1;8;6", "wc_review": "241;149;550;288", "wc_reply_reviewers": "352;0;579;24", "wc_reply_authors": "1556;0;1222;0", "reply_reviewers": "2;0;3;1", "reply_authors": "4;1;4;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 1.299038105676658 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.0, 1.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 78.5, 37.897889123274396 ], "wc_strengths_avg": [ 38.0, 15.378556499229699 ], "wc_weaknesses_avg": [ 184.0, 166.15504807257588 ], "wc_questions_avg": [ 2.5, 1.118033988749895 ], "wc_limitations_avg": [ 4.0, 3.082207001484488 ], "wc_review_avg": [ 307.0, 148.93790652483335 ], "wc_reply_reviewers_avg": [ 238.75, 240.68379151908007 ], "wc_reply_authors_avg": [ 694.5, 704.4677068539054 ], "reply_reviewers_avg": [ 1.5, 1.118033988749895 ], "reply_authors_avg": [ 2.5, 1.5 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.4714045207910316, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2050459955320600427&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 4, "email": "comp.polyu.edu.hk;connect.polyu.hk;polyu.edu.hk;polyu.edu.hk;polyu.edu.hk;polyu.edu.hk", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Hong Kong Polytechnic University", "aff_unique_dep": "", "aff_unique_url": "https://www.polyu.edu.hk", "aff_unique_abbr": "PolyU", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "qgzdGyQcDt", "title": "EEVR: A Dataset of Paired Physiological Signals and Textual Descriptions for Joint Emotion Representation Learning", "track": "Datasets & Benchmarks", "status": "Poster", "tldr": "", "abstract": "EEVR (Emotion Elicitation in Virtual Reality) is a novel dataset specifically designed for language supervision-based pre-training of emotion recognition tasks, such as valence and arousal classification. It features high-quality physiological signals, including electrodermal activity (EDA) and photoplethysmography (PPG), acquired through emotion elicitation via 360-degree virtual reality (VR) videos.\nAdditionally, it includes subject-wise textual descriptions of emotions experienced during each stimulus gathered from qualitative interviews. The dataset consists of recordings from 37 participants and is the first dataset to pair raw text with physiological signals, providing additional contextual information that objective labels cannot offer. To leverage this dataset, we introduced the Contrastive Language Signal Pre-training (CLSP) method, which jointly learns representations using pairs of physiological signals and textual descriptions. Our results show that integrating self-reported textual descriptions with physiological signals significantly improves performance on emotion recognition tasks, such as arousal and valence classification. Moreover, our pre-trained CLSP model demonstrates strong zero-shot transferability to existing datasets, outperforming supervised baseline models, suggesting that the representations learned by our method are more contextualized and generalized. The dataset also includes baseline models for arousal, valence, and emotion classification, as well as code for data cleaning and feature extraction. Further details and access to the dataset are available at https://melangelabiiitd.github.io/EEVR/.", "keywords": "Emotion recognition;Physiological signal data;Emotion dataset;Affect Recognition;Representation Learning", "primary_area": "", "supplementary_material": "/attachment/bf32b2631ea70141464a86e2f9e3363052c1e753.pdf", "author": "Pragya Singh;Ritvik Budhiraja;Ankush Gupta;Anshul Goswami;Mohan Kumar;Pushpendra Singh", "authorids": "~Pragya_Singh1;~Ritvik_Budhiraja1;~Ankush_Gupta3;~Anshul_Goswami1;~Mohan_Kumar1;~Pushpendra_Singh1", "gender": "F;;M;M;M;M", "homepage": "https://alchemy18.github.io/pragyasingh/;https://ritvikbudhiraja.in;https://www.linkedin.com/in/ankush-gupta-9a9211224/;;https://www.cs.rit.edu/~mjk/;https://www.iiitd.edu.in/~pushpendra/", "dblp": ";;;;;55/1719-1", "google_scholar": "fxfhl98AAAAJ;;6LH_olwAAAAJ;;idH8IwYAAAAJ;https://scholar.google.com.tw/citations?user=qkSEUCMAAAAJ", "orcid": "0000-0003-3933-2224;;;0009-0009-6578-4233;0000-0002-0286-6997;0000-0003-2152-1027", "linkedin": "pragya-singh-438508113/;;ankush-gupta-9a9211224?trk=people-guest_people_search-card&originalSubdomain=in;anshul-goswami-83844b211/;mohan-kumar-0b05242/;pushpendras/", "or_profile": "~Pragya_Singh1;~Ritvik_Budhiraja1;~Ankush_Gupta3;~Anshul_Goswami1;~Mohan_Kumar1;~Pushpendra_Singh1", "aff": "Indraprastha Institute of Information Technology, Delhi;Indraprastha Institute of Information Technology, Delhi;Indraprastha Institute of Information Technology, Delhi;Indraprastha Institute of Information Technology, Delhi;Rochester Institute of Technology;Indraprastha Institute of Information Technology, Delhi", "aff_domain": "iiitd.ac.in;iiitd.ac.in;iiitd.ac.in;iiitd.ac.in;rit.edu;iiitd.ac.in", "position": "PhD student;Undergrad student;Undergrad student;Undergrad student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nsingh2024eevr,\ntitle={{EEVR}: A Dataset of Paired Physiological Signals and Textual Descriptions for Joint Emotion Representation Learning},\nauthor={Pragya Singh and Ritvik Budhiraja and Ankush Gupta and Anshul Goswami and Mohan Kumar and Pushpendra Singh},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=qgzdGyQcDt}\n}", "github": "", "project": "", "reviewers": "B6pf;d4N1;UtWW", "site": "https://openreview.net/forum?id=qgzdGyQcDt", "pdf_size": 429146, "rating": "5;6;9", "confidence": "4;4;4", "wc_summary_and_contributions": "12;77;118", "wc_strengths": "16;39;72", "wc_improvement": "18;134;75", "wc_limitations": "25;137;98", "wc_correctness": "30;19;21", "wc_clarity": "3;17;5", "wc_relation_to_prior_work": "1;73;20", "wc_documentation": "1;24;29", "wc_additional_feedback": "1;1;1", "wc_review": "107;521;439", "wc_reply_reviewers": "80;234;0", "wc_reply_authors": "766;507;0", "reply_reviewers": "1;1;0", "reply_authors": "3;3;1", "rating_avg": [ 6.666666666666667, 1.699673171197595 ], "confidence_avg": [ 4.0, 0.0 ], "wc_summary_and_contributions_avg": [ 69.0, 43.642486944108335 ], "wc_strengths_avg": [ 42.333333333333336, 22.9830855679176 ], "wc_improvement_avg": [ 75.66666666666667, 47.359147890232244 ], "wc_limitations_avg": [ 86.66666666666667, 46.420780787152744 ], "wc_correctness_avg": [ 23.333333333333332, 4.784233364802441 ], "wc_clarity_avg": [ 8.333333333333334, 6.182412330330469 ], "wc_relation_to_prior_work_avg": [ 31.333333333333332, 30.466739606039603 ], "wc_documentation_avg": [ 18.0, 12.192894105447921 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 355.6666666666667, 178.99224067601986 ], "wc_reply_reviewers_avg": [ 104.66666666666667, 97.10933128295252 ], "wc_reply_authors_avg": [ 424.3333333333333, 318.1344928729916 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.9428090415820634 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:b4YVhoN-3SgJ:scholar.google.com/&scioq=EEVR:+A+Dataset+of+Paired+Physiological+Signals+and+Textual+Descriptions+for+Joint+Emotion+Representation+Learning&hl=en&as_sdt=0,5", "gs_version_total": 4, "aff_unique_index": "0;0;0;0;1;0", "aff_unique_norm": "Indraprastha Institute of Information Technology;Rochester Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "http://www.iiitd.ac.in;https://www.rit.edu", "aff_unique_abbr": "IIIT-D;RIT", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Delhi;", "aff_country_unique_index": "0;0;0;0;1;0", "aff_country_unique": "India;United States" }, { "title": "SocraticLM: Exploring Socratic Personalized Teaching with Large Language Models", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93477", "id": "qkoZgJhxsA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=qkoZgJhxsA", "openreview": "https://openreview.net/forum?id=qkoZgJhxsA", "poster": "/media/PosterPDFs/NeurIPS%202024/93477.png?t=1730168447.439956", "project": "", "author_site": "Jiayu Liu, Zhenya Huang, Tong Xiao, Jing Sha, Jinze Wu, Qi Liu, Shijin Wang, Enhong Chen", "tldr": "", "abstract": "Large language models (LLMs) are considered a crucial technology for advancing intelligent education since they exhibit the potential for an in-depth understanding of teaching scenarios and providing students with personalized guidance. Nonetheless, current LLM-based application in personalized teaching predominantly follows a \"Question-Answering\" paradigm, where students are passively provided with answers and explanations. In this paper, we propose SocraticLM, which achieves a Socratic \"Thought-Provoking\" teaching paradigm that fulfills the role of a real classroom teacher in actively engaging students in the thought process required for genuine problem-solving mastery. To build SocraticLM, we first propose a novel \"Dean-Teacher-Student\" multi-agent pipeline to construct a new dataset, SocraTeach, which contains $35$K meticulously crafted Socratic-style multi-round (equivalent to $208$K single-round) teaching dialogues grounded in fundamental mathematical problems. Our dataset simulates authentic teaching scenarios, interacting with six representative types of simulated students with different cognitive states, and strengthening four crucial teaching abilities. SocraticLM is then fine-tuned on SocraTeach with three strategies balancing its teaching and reasoning abilities. Moreover, we contribute a comprehensive evaluation system encompassing five pedagogical dimensions for assessing the teaching quality of LLMs. Extensive experiments verify that SocraticLM achieves significant improvements in the teaching performance, outperforming GPT4 by more than 12\\%. Our dataset and code is available at https://github.com/Ljyustc/SocraticLM.", "keywords": "Socratic Teaching;Large Language Models", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "/attachment/81bc6359aad617a25345cb5efe6dabba70223da9.zip", "author": "Jiayu Liu;Zhenya Huang;Tong Xiao;Jing Sha;Jinze Wu;Qi Liu;Shijin Wang;Enhong Chen", "authorids": "~Jiayu_Liu2;~Zhenya_Huang2;~Tong_Xiao7;~Jing_Sha1;~Jinze_Wu1;~Qi_Liu3;~Shijin_Wang1;~Enhong_Chen1", "gender": "M;M;M;M;;M;M;M", "homepage": "http://home.ustc.edu.cn/~jy251198/;http://staff.ustc.edu.cn/~huangzhy/;https://tongxiao2002.github.io;;;http://staff.ustc.edu.cn/~qiliuql/;;http://staff.ustc.edu.cn/~cheneh", "dblp": "120/1047-1;178/8690;;96/5272;;95/2446-3;74/5750-1.html;07/258", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;dVZuU90AAAAJ;;;;5EoHAFwAAAAJ;;Q9h02J0AAAAJ", "orcid": "0000-0001-8639-3308;0000-0003-1661-0420;0009-0003-2697-5164;;0000-0001-9957-5733;0000-0001-6956-5550;0000-0002-9202-7678;0000-0002-4835-4102", "linkedin": ";;;jing-sha-52482737/;;;;", "or_profile": "~Jiayu_Liu2;~Zhenya_Huang2;~Tong_Xiao7;~Jing_Sha1;~Jinze_Wu1;~Qi_Liu3;~Shijin_Wang1;~Enhong_Chen1", "aff": "University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;iFLYTEK Research;;University of Science and Technology of China;State Key Laboratory of Cognitive Intelligence;University of Science and Technology of China", "aff_domain": "ustc.edu;ustc.edu.cn;mail.ustc;iflytek.com;;ustc.edu.cn;iflytek.com;ustc.edu.cn", "position": "PhD student;Associate Professor;MS student;Researcher;;Full Professor;Vice Dean;Full Professor", "bibtex": "@inproceedings{\nliu2024socraticlm,\ntitle={Socratic{LM}: Exploring Socratic Personalized Teaching with Large Language Models},\nauthor={Jiayu Liu and Zhenya Huang and Tong Xiao and Jing Sha and Jinze Wu and Qi Liu and Shijin Wang and Enhong Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=qkoZgJhxsA}\n}", "github": "", "reviewers": "twUm;sW5a;3PwF;3yrk", "pdf_size": 1409629, "rating": "6;7;8;8", "confidence": "3;4;5;4", "soundness": "3;3;3;3", "novelty": "3;4;4;3", "presentation": "2;3;3;3", "wc_summary": "147;15;122;120", "wc_strengths": "95;65;173;127", "wc_weaknesses": "182;194;72;104", "wc_questions": "122;38;67;82", "wc_limitations": "137;34;4;1", "wc_review": "683;346;438;434", "wc_reply_reviewers": "0;17;13;12", "wc_reply_authors": "54;136;78;128", "reply_reviewers": "0;1;1;1", "reply_authors": "2;3;2;3", "rating_avg": [ 7.25, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 101.0, 50.77893263943227 ], "wc_strengths_avg": [ 115.0, 40.024992192379 ], "wc_weaknesses_avg": [ 138.0, 51.43928459844674 ], "wc_questions_avg": [ 77.25, 30.293357357678268 ], "wc_limitations_avg": [ 44.0, 55.2222781130949 ], "wc_review_avg": [ 475.25, 125.45392580545257 ], "wc_reply_reviewers_avg": [ 10.5, 6.34428877022476 ], "wc_reply_authors_avg": [ 99.0, 34.19064199455752 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.8528028654224417, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5569411890469536615&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "ustc.edu;ustc.edu.cn;mail.ustc;iflytek.com;;ustc.edu.cn;iflytek.com;ustc.edu.cn", "author_num": 8, "aff_unique_index": "0;0;0;1;0;2;0", "aff_unique_norm": "University of Science and Technology of China;iFLYTEK;State Key Laboratory of Cognitive Intelligence", "aff_unique_dep": ";Research;", "aff_unique_url": "http://www.ustc.edu.cn;https://www.iflytek.com;", "aff_unique_abbr": "USTC;iFLYTEK;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Adaptive Proximal Gradient Method for Convex Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93476", "id": "qlH21Ig1IC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=qlH21Ig1IC", "openreview": "https://openreview.net/forum?id=qlH21Ig1IC", "poster": "/media/PosterPDFs/NeurIPS%202024/93476.png?t=1733439711.3340533", "project": "", "author_site": "Yura Malitsky, Konstantin Mishchenko", "tldr": "", "abstract": "In this paper, we explore two fundamental first-order algorithms in convex optimization, namely, gradient descent (GD) and proximal gradient method (ProxGD). Our focus is on making these algorithms entirely adaptive by leveraging local curvature information of smooth functions. We propose adaptive versions of GD and ProxGD that are based on observed gradient differences and, thus, have no added computational costs. Moreover, we prove convergence of our methods assuming only local Lipschitzness of the gradient. In addition, the proposed versions allow for even larger stepsizes than those initially suggested in [MM20].", "keywords": "adaptive methods;gradient descent;proximal gradient method", "primary_area": "optimization", "supplementary_material": "", "author": "Yura Malitsky;Konstantin Mishchenko", "authorids": "~Yura_Malitsky1;~Konstantin_Mishchenko1", "gender": ";", "homepage": "https://ymalitsky.com/;https://konstmish.com/", "dblp": "219/7903;222/9853", "google_scholar": "GI_-KjoAAAAJ;Z8Y8nhQAAAAJ", "orcid": "0000-0001-7325-5766;", "linkedin": ";", "or_profile": "~Yura_Malitsky1;~Konstantin_Mishchenko1", "aff": "Universit\u00e4t Vienna;Samsung", "aff_domain": "univie.ac.at;samsung.com", "position": "Assistant Professor;Researcher", "bibtex": "@inproceedings{\nmalitsky2024adaptive,\ntitle={Adaptive Proximal Gradient Method for Convex Optimization},\nauthor={Yura Malitsky and Konstantin Mishchenko},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=qlH21Ig1IC}\n}", "github": "", "reviewers": "NQsh;Wsqq;GkbV;5AVv", "pdf_size": 832993, "rating": "5;6;6;7", "confidence": "3;4;4;3", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;4", "wc_summary": "37;128;65;172", "wc_strengths": "38;111;90;120", "wc_weaknesses": "57;90;28;181", "wc_questions": "121;293;9;1", "wc_limitations": "3;1;1;1", "wc_review": "256;623;193;475", "wc_reply_reviewers": "337;120;0;0", "wc_reply_authors": "317;41;0;0", "reply_reviewers": "3;2;0;0", "reply_authors": "3;2;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 100.5, 52.822817039608935 ], "wc_strengths_avg": [ 89.75, 31.799174517587716 ], "wc_weaknesses_avg": [ 89.0, 57.467382052778426 ], "wc_questions_avg": [ 106.0, 117.9279441014724 ], "wc_limitations_avg": [ 1.5, 0.8660254037844386 ], "wc_review_avg": [ 386.75, 171.92785550922224 ], "wc_reply_reviewers_avg": [ 114.25, 137.61972060718622 ], "wc_reply_authors_avg": [ 89.5, 132.40940298936476 ], "reply_reviewers_avg": [ 1.25, 1.299038105676658 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15552626904192035834&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "univie.ac.at;samsung.com", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "University of Vienna;Samsung", "aff_unique_dep": ";Samsung", "aff_unique_url": "https://univie.ac.at;https://www.samsung.com", "aff_unique_abbr": "UV;Samsung", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Austria;South Korea" }, { "title": "Identifiable Object-Centric Representation Learning via Probabilistic Slot Attention", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93475", "id": "qmoVQbwmCY", "proceeding": "", "pdf": "https://openreview.net/pdf?id=qmoVQbwmCY", "openreview": "https://openreview.net/forum?id=qmoVQbwmCY", "poster": "/media/PosterPDFs/NeurIPS%202024/93475.png?t=1731524064.0534508", "project": "", "author_site": "Avinash Kori, Francesco Locatello, Ainkaran Santhirasekaram, Francesca Toni, Ben Glocker, Fabio De Sousa Ribeiro", "tldr": "", "abstract": "Learning modular object-centric representations is said to be crucial for systematic generalization. Existing methods show promising object-binding capabilities empirically, but theoretical identifiability guarantees remain relatively underdeveloped. Understanding when object-centric representations can theoretically be identified is important for scaling slot-based methods to high-dimensional images with correctness guarantees. To that end, we propose a probabilistic slot-attention algorithm that imposes an *aggregate* mixture prior over object-centric slot representations, thereby providing slot identifiability guarantees without supervision, up to an equivalence relation. We provide empirical verification of our theoretical identifiability result using both simple 2-dimensional data and high-resolution imaging datasets.", "keywords": "Object-centric learning;Probabilistic slot-attention;Identifiability;latent mixture models", "primary_area": "generative_models", "supplementary_material": "", "author": "Avinash Kori;Francesco Locatello;Ainkaran Santhirasekaram;Francesca Toni;Ben Glocker;Fabio De Sousa Ribeiro", "authorids": "~Avinash_Kori1;~Francesco_Locatello1;~Ainkaran_Santhirasekaram1;~Francesca_Toni1;~Ben_Glocker1;~Fabio_De_Sousa_Ribeiro1", "gender": "M;M;F;M;;M", "homepage": "https://twitter.com/FrancescoLocat8;;https://www.doc.ic.ac.uk/~ft/;https://www.doc.ic.ac.uk/~bglocker;https://github.com/fabio-deep;https://koriavinash1.github.io/", "dblp": "195/6074;304/4043;t/FrancescaToni;86/2890;222/8450;215/3763", "google_scholar": ";;https://scholar.google.com/citations?hl=en;https://scholar.google.co.uk/citations?user=g_HtjLIAAAAJ;iIcKRG0AAAAJ;https://scholar.google.co.uk/citations?user=X6o3q0gAAAAJ", "orcid": ";;0000-0001-8194-1459;0000-0002-4897-9356;;0000-0002-5878-3584", "linkedin": ";;;;;koriavinash1/", "or_profile": "~Francesco_Locatello1;~Ainkaran_Santhirasekaram1;~Francesca_Toni1;~Ben_Glocker1;~Fabio_De_Sousa_Ribeiro1;~Avinash_G._Kori1", "aff": "Institute of Science and Technology;Imperial College London;Imperial College London;Imperial College London;Imperial College London;Imperial College London", "aff_domain": "ist.ac.at;imperial.ac.uk;ic.ac.uk;imperial.ac.uk;imperial.ac.uk;ic.ac.uk", "position": "Assistant Professor;Researcher;Full Professor;Full Professor;Postdoc;PhD student", "bibtex": "@inproceedings{\nkori2024identifiable,\ntitle={Identifiable Object-Centric Representation Learning via Probabilistic Slot Attention},\nauthor={Avinash Kori and Francesco Locatello and Ainkaran Santhirasekaram and Francesca Toni and Ben Glocker and Fabio De Sousa Ribeiro},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=qmoVQbwmCY}\n}", "github": "", "reviewers": "HxYt;PnmW;27BN;11Sk", "pdf_size": 6943800, "rating": "6;6;6;6", "confidence": "3;4;3;3", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;2;3", "wc_summary": "105;84;85;45", "wc_strengths": "66;171;119;44", "wc_weaknesses": "318;920;241;10", "wc_questions": "91;105;153;7", "wc_limitations": "9;57;29;1", "wc_review": "589;1337;627;107", "wc_reply_reviewers": "45;860;191;25", "wc_reply_authors": "0;991;478;133", "reply_reviewers": "1;5;1;1", "reply_authors": "1;6;3;3", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 79.75, 21.741377601246892 ], "wc_strengths_avg": [ 100.0, 49.22905646059043 ], "wc_weaknesses_avg": [ 372.25, 335.9407499842792 ], "wc_questions_avg": [ 89.0, 52.630789467763066 ], "wc_limitations_avg": [ 24.0, 21.61018278497431 ], "wc_review_avg": [ 665.0, 438.79608020127074 ], "wc_reply_reviewers_avg": [ 280.25, 340.7971354046275 ], "wc_reply_authors_avg": [ 400.5, 382.96638233662236 ], "reply_reviewers_avg": [ 2.0, 1.7320508075688772 ], "reply_authors_avg": [ 3.25, 1.7853571071357126 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12398775727002098638&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 4, "email": "ist.ac.at;imperial.ac.uk;ic.ac.uk;imperial.ac.uk;imperial.ac.uk;ic.ac.uk", "author_num": 6, "aff_unique_index": "0;1;1;1;1;1", "aff_unique_norm": "Institute of Science and Technology;Imperial College London", "aff_unique_dep": ";", "aff_unique_url": ";https://www.imperial.ac.uk", "aff_unique_abbr": ";ICL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1;1;1;1;1", "aff_country_unique": ";United Kingdom" }, { "title": "WhodunitBench: Evaluating Large Multimodal Agents via Murder Mystery Games", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97492", "id": "qmvtDIfbmS", "proceeding": "", "pdf": "https://openreview.net/pdf?id=qmvtDIfbmS", "openreview": "https://openreview.net/forum?id=qmvtDIfbmS", "poster": "/media/PosterPDFs/NeurIPS%202024/97492.png?t=1731691791.3403513", "project": "", "author_site": "Junlin Xie, Ruifei Zhang, Zhihong Chen, Xiang Wan, Guanbin Li", "tldr": "", "abstract": "Recently, large language models (LLMs) have achieved superior performance, empowering the development of large multimodal agents (LMAs). An LMA is anticipated to execute practical tasks requires various capabilities including multimodal perception, interaction, reasoning, and decision making. However, existing benchmarks are limited in assessing compositional skills and actions demanded by practical scenarios, where they primarily focused on single tasks and static scenarios. To bridge this gap, we introduce WhodunitBench, a benchmark rooted from murder mystery games, where players are required to utilize the aforementioned skills to achieve their objective (i.e., identifying the `murderer' or hiding themselves), providing a simulated dynamic environment for evaluating LMAs. Specifically, WhodunitBench includes two evaluation modes. The first mode, the arena-style evaluation, is constructed from 50 meticulously curated scripts featuring clear reasoning clues and distinct murderers; The second mode, the chain of evaluation, consists of over 3000 curated multiple-choice questions and open-ended questions, aiming to assess every facet of the murder mystery games for LMAs. Experiments show that although current LMAs show acceptable performance in basic perceptual tasks, they are insufficiently equipped for complex multi-agent collaboration and multi-step reasoning tasks. Furthermore, the full application of the theory of mind to complete games in a manner akin to human behavior remains a significant challenge. We hope this work can illuminate the path forward, providing a solid foundation for the future development of LMAs. Our WhodunitBench is open-source and accessible at: https://github.com/\njun0wanan/WhodunitBench-Murder_Mystery_Games", "keywords": "Large Multimodal Agent;Multi-step Reasoning;Evaluation", "primary_area": "", "supplementary_material": "/attachment/48ca0df36a728f33b3e84bc10734a80dcc1a04a9.zip", "author": "Junlin Xie;Ruifei Zhang;Zhihong Chen;Xiang Wan;Guanbin Li", "authorids": "~Junlin_Xie1;~Ruifei_Zhang1;~Zhihong_Chen2;~Xiang_Wan1;~Guanbin_Li2", "gender": "F;M;M;M;M", "homepage": ";;;http://www.sribd.cn/teacher/28;http://guanbinli.com", "dblp": "291/5895;248/4758;78/3726;;126/4457", "google_scholar": ";W4zOhmEAAAAJ;y55sF8cAAAAJ;;2A2Bx2UAAAAJ", "orcid": ";;;;0000-0002-2486-2890", "linkedin": ";;;;", "or_profile": "~Junlin_Xie1;~Ruifei_Zhang1;~Zhihong_Chen2;~Xiang_Wan1;~Guanbin_Li2", "aff": "The Chinese University of Hong Kong, Shenzhen;The Chinese University of Hong Kong, Shenzhen;The Chinese University of Hong Kong, Shenzhen;Shenzhen Research Institute of Big Data;SUN YAT-SEN UNIVERSITY", "aff_domain": "link.cuhk.edu.cn;cuhk.edu.cn;cuhk.edu.cn;sribd.cn;sysu.edu.cn", "position": "PhD student;PhD student;PhD student;Principal Researcher;Associate Professor", "bibtex": "@inproceedings{\nxie2024whodunitbench,\ntitle={WhodunitBench: Evaluating Large Multimodal Agents via Murder Mystery Games},\nauthor={Junlin Xie and Ruifei Zhang and Zhihong Chen and Xiang Wan and Guanbin Li},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=qmvtDIfbmS}\n}", "github": "", "reviewers": "1p2a;7EJe;PMa7;NGnP", "pdf_size": 37365963, "rating": "6;6;8;8", "confidence": "2;4;3;4", "wc_summary_and_contributions": "33;42;115;103", "wc_strengths": "65;25;86;120", "wc_improvement": "4;389;80;345", "wc_limitations": "64;40;68;199", "wc_correctness": "41;76;10;13", "wc_clarity": "164;194;13;64", "wc_relation_to_prior_work": "37;1;22;69", "wc_documentation": "13;56;70;16", "wc_additional_feedback": "1;1;1;1", "wc_review": "422;824;465;930", "wc_reply_reviewers": "61;101;0;0", "wc_reply_authors": "88;81;0;0", "reply_reviewers": "1;2;0;0", "reply_authors": "2;2;1;1", "rating_avg": [ 7.0, 1.0 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "wc_summary_and_contributions_avg": [ 73.25, 36.1412160835797 ], "wc_strengths_avg": [ 74.0, 34.43109060137364 ], "wc_improvement_avg": [ 204.5, 165.43956600523347 ], "wc_limitations_avg": [ 92.75, 62.27108076788133 ], "wc_correctness_avg": [ 35.0, 26.580067720004024 ], "wc_clarity_avg": [ 108.75, 73.29861867729842 ], "wc_relation_to_prior_work_avg": [ 32.25, 24.772716847370617 ], "wc_documentation_avg": [ 38.75, 24.772716847370617 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 660.25, 220.49078778942217 ], "wc_reply_reviewers_avg": [ 40.5, 42.8981351576033 ], "wc_reply_authors_avg": [ 42.25, 42.32242313478754 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6769200826972011590&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "link.cuhk.edu.cn;cuhk.edu.cn;cuhk.edu.cn;sribd.cn;sysu.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;1;2", "aff_unique_norm": "Chinese University of Hong Kong;Shenzhen Research Institute of Big Data;Sun Yat-sen University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cuhk.edu.cn;http://www.sribd.cn;http://www.sysu.edu.cn", "aff_unique_abbr": "CUHK;;SYSU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Shenzhen;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Symmetry Discovery Beyond Affine Transformations", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93474", "id": "qo7NtGMr2u", "proceeding": "", "pdf": "https://openreview.net/pdf?id=qo7NtGMr2u", "openreview": "https://openreview.net/forum?id=qo7NtGMr2u", "poster": "", "project": "", "author_site": "Ben Shaw, Abram Magner, Kevin Moon", "tldr": "", "abstract": "Symmetry detection has been shown to improve various machine learning tasks. In the context of continuous symmetry detection, current state of the art experiments are limited to the detection of affine transformations. Under the manifold assumption, we outline a framework for discovering continuous symmetry in data beyond the affine transformation group. We also provide a similar framework for discovering discrete symmetry. We experimentally compare our method to an existing method known as LieGAN and show that our method is competitive at detecting affine symmetries for large sample sizes and superior than LieGAN for small sample sizes. We also show our method is able to detect continuous symmetries beyond the affine group and is generally more computationally efficient than LieGAN.", "keywords": "symmetry detection;isometries;infinitesimal generators;Killing vectors;Riemannian metric;transformation groups;manifold learning", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Ben Shaw;Abram Magner;Kevin R. Moon", "authorids": "~Ben_Shaw1;~Abram_Magner1;~Kevin_R._Moon1", "gender": "M;;M", "homepage": ";https://www.albany.edu/faculty/amagner;https://sites.google.com/a/umich.edu/kevin-r-moon/home", "dblp": ";141/2242;135/7183", "google_scholar": ";;CkwC_ikAAAAJ", "orcid": ";;", "linkedin": "ben-shaw-b6333290;;", "or_profile": "~Ben_Shaw1;~Abram_Magner1;~Kevin_R._Moon1", "aff": "Utah State University;State University of New York, Albany;Utah State University", "aff_domain": "usu.edu;albany.edu;usu.edu", "position": "PhD student;Assistant professor;Associate Professor", "bibtex": "@inproceedings{\nshaw2024symmetry,\ntitle={Symmetry Discovery Beyond Affine Transformations},\nauthor={Ben Shaw and Abram Magner and Kevin R. Moon},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=qo7NtGMr2u}\n}", "github": "", "reviewers": "U1Sk;fxa1;uBWo;S4uK", "pdf_size": 1426951, "rating": "4;5;6;6", "confidence": "4;4;4;3", "soundness": "2;3;3;3", "novelty": "2;2;2;2", "presentation": "2;2;4;2", "wc_summary": "63;83;59;63", "wc_strengths": "12;20;75;21", "wc_weaknesses": "132;176;99;204", "wc_questions": "41;30;42;227", "wc_limitations": "4;60;47;31", "wc_review": "252;369;322;546", "wc_reply_reviewers": "27;143;226;836", "wc_reply_authors": "0;172;1160;724", "reply_reviewers": "1;1;2;2", "reply_authors": "1;2;3;2", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 67.0, 9.38083151964686 ], "wc_strengths_avg": [ 32.0, 25.06990227344335 ], "wc_weaknesses_avg": [ 152.75, 40.2701812759764 ], "wc_questions_avg": [ 85.0, 82.11881635776298 ], "wc_limitations_avg": [ 35.5, 20.886598574205422 ], "wc_review_avg": [ 372.25, 108.61025504067284 ], "wc_reply_reviewers_avg": [ 308.0, 312.9273078527983 ], "wc_reply_authors_avg": [ 514.0, 458.95969321935013 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17627546920743382831&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "usu.edu;albany.edu;usu.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Utah State University;State University of New York", "aff_unique_dep": ";", "aff_unique_url": "https://www.usu.edu;https://www.albany.edu", "aff_unique_abbr": "USU;SUNY Albany", "aff_campus_unique_index": "1", "aff_campus_unique": ";Albany", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "On Softmax Direct Preference Optimization for Recommendation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93473", "id": "qp5VbGTaM0", "proceeding": "", "pdf": "https://openreview.net/pdf?id=qp5VbGTaM0", "openreview": "https://openreview.net/forum?id=qp5VbGTaM0", "poster": "/media/PosterPDFs/NeurIPS%202024/93473.png?t=1731348457.2816849", "project": "", "author_site": "Yuxin Chen, Junfei Tan, An Zhang, Zhengyi Yang, Leheng Sheng, Enzhi Zhang, Xiang Wang, Tat-Seng Chua", "tldr": "", "abstract": "Recommender systems aim to predict personalized rankings based on user preference data. With the rise of Language Models (LMs), LM-based recommenders have been widely explored due to their extensive world knowledge and powerful reasoning abilities. Most of the LM-based recommenders convert historical interactions into language prompts, pairing with a positive item as the target response and fine-tuning LM with a language modeling loss. However, the current objective fails to fully leverage preference data and is not optimized for personalized ranking tasks, which hinders the performance of LM-based recommenders. Inspired by the current advancement of Direct Preference Optimization (DPO) in human preference alignment and the success of softmax loss in recommendations, we propose Softmax-DPO (\\textbf{S-DPO}) to instill ranking information into the LM to help LM-based recommenders distinguish preferred items from negatives, rather than solely focusing on positives. Specifically, we incorporate multiple negatives in user preference data and devise an alternative version of DPO loss tailored for LM-based recommenders, which is extended from the traditional full-ranking Plackett-Luce (PL) model to partial rankings and connected to softmax sampling strategies. Theoretically, we bridge S-DPO with the softmax loss over negative sampling and find that it has an inherent benefit of mining hard negatives, which assures its exceptional capabilities in recommendation tasks. Empirically, extensive experiments conducted on three real-world datasets demonstrate the superiority of S-DPO to effectively model user preference and further boost recommendation performance while providing better rewards for preferred items. Our codes are available at https://github.com/chenyuxin1999/S-DPO.", "keywords": "Sequential Recommendation;Large Language Models", "primary_area": "other", "supplementary_material": "", "author": "Yuxin Chen;Junfei Tan;An Zhang;Zhengyi Yang;Leheng Sheng;Enzhi Zhang;Xiang Wang;Tat-Seng Chua", "authorids": "~Yuxin_Chen9;~Junfei_Tan1;~An_Zhang2;~Zhengyi_Yang1;~Leheng_Sheng2;~Enzhi_Zhang1;~Xiang_Wang6;~Tat-Seng_Chua2", "gender": "M;M;;M;M;M;F;M", "homepage": "https://scholar.google.com/citations?user=krdkLawAAAAJ&hl=en;https://github.com/sober-clever;https://github.com/YangZhengyi98;https://lehengthu.github.io/;;https://github.com/xiangwang1223;https://github.com/anzhang314;http://www.comp.nus.edu.sg/~chuats/", "dblp": ";379/9942;;359/0347.html;;31/2864-10;78/5581-3;", "google_scholar": "krdkLawAAAAJ;;;https://scholar.google.com.hk/citations?user=s8bNbU0AAAAJ;;https://scholar.google.com.sg/citations?user=HdhaQB0AAAAJ;https://scholar.google.com.sg/citations?user=BcX7GJcAAAAJ;https://scholar.google.com.tw/citations?user=Z9DWCBEAAAAJ", "orcid": "0009-0003-6715-4637;;;0000-0002-5764-6596;0000-0002-6421-0192;0000-0002-6148-6329;;0000-0001-6097-7807", "linkedin": ";;;;;;;", "or_profile": "~Yuxin_Chen9;~Junfei_Tan1;~Zhengyi_Yang1;~Leheng_Sheng2;~Enzhi_Zhang1;~Xiang_Wang6;~AN_ZHANG1;~Tat-seng_Chua1", "aff": "National University of Singapore;University of Science and Technology of China;University of Science and Technology of China;Tsinghua University;Hokkaido University;University of Science and Technology of China;National University of Singapore;National University of Singapore", "aff_domain": "u.nus.edu;mail.ustc.edu.cn;ustc.edu.cn;mails.tsinghua.edu.cn;hokudai.ac.jp;ustc.edu.cn;nus.edu.sg;nus.edu.sg", "position": "MS student;Undergrad student;PhD student;MS student;PhD student;Full Professor;Postdoc;Full Professor", "bibtex": "@inproceedings{\nchen2024on,\ntitle={On Softmax Direct Preference Optimization for Recommendation},\nauthor={Yuxin Chen and Junfei Tan and An Zhang and Zhengyi Yang and Leheng Sheng and Enzhi Zhang and Xiang Wang and Tat-Seng Chua},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=qp5VbGTaM0}\n}", "github": "", "reviewers": "gGah;6tmb;AMu8;iyxD", "pdf_size": 1444087, "rating": "4;5;5;5", "confidence": "4;3;4;3", "soundness": "3;3;2;2", "novelty": "2;2;2;3", "presentation": "2;3;3;2", "wc_summary": "48;118;7;26", "wc_strengths": "49;17;26;27", "wc_weaknesses": "106;15;43;415", "wc_questions": "14;302;43;117", "wc_limitations": "58;4;13;58", "wc_review": "275;456;132;643", "wc_reply_reviewers": "0;147;0;0", "wc_reply_authors": "331;416;119;926", "reply_reviewers": "0;1;0;0", "reply_authors": "4;5;3;3", "rating_avg": [ 4.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 49.75, 41.99032626689152 ], "wc_strengths_avg": [ 29.75, 11.776565713313877 ], "wc_weaknesses_avg": [ 144.75, 159.47158837861997 ], "wc_questions_avg": [ 119.0, 112.13161909113771 ], "wc_limitations_avg": [ 33.25, 24.953707139421187 ], "wc_review_avg": [ 376.5, 191.9798166474799 ], "wc_reply_reviewers_avg": [ 36.75, 63.65286717815624 ], "wc_reply_authors_avg": [ 448.0, 296.4110321833518 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.75, 0.82915619758885 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15461794692622136208&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "u.nus.edu;mail.ustc.edu.cn;ustc.edu.cn;mails.tsinghua.edu.cn;hokudai.ac.jp;ustc.edu.cn;nus.edu.sg;nus.edu.sg", "author_num": 8, "aff_unique_index": "0;1;1;2;3;1;0;0", "aff_unique_norm": "National University of Singapore;University of Science and Technology of China;Tsinghua University;Hokkaido University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.nus.edu.sg;http://www.ustc.edu.cn;https://www.tsinghua.edu.cn;https://www.hokudai.ac.jp", "aff_unique_abbr": "NUS;USTC;THU;Hokkaido U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;2;1;0;0", "aff_country_unique": "Singapore;China;Japan" }, { "title": "Variational Multi-scale Representation for Estimating Uncertainty in 3D Gaussian Splatting", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93472", "id": "qpeAtfUWOQ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=qpeAtfUWOQ", "openreview": "https://openreview.net/forum?id=qpeAtfUWOQ", "poster": "/media/PosterPDFs/NeurIPS%202024/93472.png?t=1732956341.494691", "project": "", "author_site": "Ruiqi Li, Yiu-ming Cheung", "tldr": "", "abstract": "Recently, 3D Gaussian Splatting (3DGS) has become popular in reconstructing dense 3D representations of appearance and geometry. However, the learning pipeline in 3DGS inherently lacks the ability to quantify uncertainty, which is an important factor in applications like robotics mapping and navigation. In this paper, we propose an uncertainty estimation method built upon the Bayesian inference framework. Specifically, we propose a method to build variational multi-scale 3D Gaussians, where we leverage explicit scale information in 3DGS parameters to construct diversified parameter space samples. We develop an offset table technique to draw local multi-scale samples efficiently by offsetting selected attributes and sharing other base attributes. Then, the offset table is learned by variational inference with multi-scale prior. The learned offset posterior can quantify the uncertainty of each individual Gaussian component, and be used in the forward pass to infer the predictive uncertainty. Extensive experimental results on various benchmark datasets show that the proposed method provides well-aligned calibration performance on estimated uncertainty and better rendering quality compared with the previous methods that enable uncertainty quantification with view synthesis. Besides, by leveraging the model parameter uncertainty estimated by our method, we can remove noisy Gaussians automatically, thereby obtaining a high-fidelity part of the reconstructed scene, which is of great help in improving the visual quality.", "keywords": "Neural Rendering;Uncertainty Quantification", "primary_area": "machine_vision", "supplementary_material": "/attachment/0510d95a78e5b2f2187c06e66cb604c5fe9c7164.zip", "author": "Ruiqi Li;Yiu-ming Cheung", "authorids": "~Ruiqi_Li7;~Yiu-ming_Cheung1", "gender": "M;", "homepage": "https://www.comp.hkbu.edu.hk/~csrqli/;", "dblp": ";", "google_scholar": "a4H_EcsAAAAJ;", "orcid": "0000-0003-4535-7364;", "linkedin": ";", "or_profile": "~Ruiqi_Li7;~Yiu-ming_Cheung1", "aff": "Hong Kong Baptist University;", "aff_domain": "hkbu.edu.hk;", "position": "PhD student;", "bibtex": "@inproceedings{\nli2024variational,\ntitle={Variational Multi-scale Representation for Estimating Uncertainty in 3D Gaussian Splatting},\nauthor={Ruiqi Li and Yiu-ming Cheung},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=qpeAtfUWOQ}\n}", "github": "", "reviewers": "4RNR;UQFr;ZTX9;XD3Y", "pdf_size": 19124055, "rating": "4;6;7;7", "confidence": "4;4;4;5", "soundness": "2;3;3;4", "novelty": "2;3;3;4", "presentation": "2;3;3;3", "wc_summary": "73;40;108;96", "wc_strengths": "96;50;56;136", "wc_weaknesses": "203;27;125;113", "wc_questions": "60;87;83;3", "wc_limitations": "18;14;9;14", "wc_review": "450;218;381;362", "wc_reply_reviewers": "0;43;47;30", "wc_reply_authors": "33;26;30;52", "reply_reviewers": "0;1;1;1", "reply_authors": "2;2;2;3", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 79.25, 25.916934618121797 ], "wc_strengths_avg": [ 84.5, 34.59407463713981 ], "wc_weaknesses_avg": [ 117.0, 62.40192304729078 ], "wc_questions_avg": [ 58.25, 33.521448357730605 ], "wc_limitations_avg": [ 13.75, 3.191786333700926 ], "wc_review_avg": [ 352.75, 84.40786397013018 ], "wc_reply_reviewers_avg": [ 30.0, 18.425525772688278 ], "wc_reply_authors_avg": [ 35.25, 9.98436277385793 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.4714045207910316, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1464655708550764637&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "hkbu.edu.hk;", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Hong Kong Baptist University", "aff_unique_dep": "", "aff_unique_url": "https://www.hkbu.edu.hk", "aff_unique_abbr": "HKBU", "aff_campus_unique_index": "0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "title": "Conjugated Semantic Pool Improves OOD Detection with Pre-trained Vision-Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93471", "id": "qqQFOcUEqM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=qqQFOcUEqM", "openreview": "https://openreview.net/forum?id=qqQFOcUEqM", "poster": "", "project": "", "author_site": "Mengyuan Chen, Junyu Gao, Changsheng Xu", "tldr": "", "abstract": "A straightforward pipeline for zero-shot out-of-distribution (OOD) detection involves selecting potential OOD labels from an extensive semantic pool and then leveraging a pre-trained vision-language model to perform classification on both in-distribution (ID) and OOD labels. In this paper, we theorize that enhancing performance requires expanding the semantic pool, while increasing the expected probability of selected OOD labels being activated by OOD samples, and ensuring low mutual dependence among the activations of these OOD labels. A natural expansion manner is to adopt a larger lexicon; however, the inevitable introduction of numerous synonyms and uncommon words fails to meet the above requirements, indicating that viable expansion manners move beyond merely selecting words from a lexicon. Since OOD detection aims to correctly classify input images into ID/OOD class groups, we can \"make up\" OOD label candidates which are not standard class names but beneficial for the process. Observing that the original semantic pool is comprised of unmodified specific class names, we correspondingly construct a conjugated semantic pool (CSP) consisting of modified superclass names, each serving as a cluster center for samples sharing similar properties across different categories. Consistent with our established theory, expanding OOD label candidates with the CSP satisfies the requirements and outperforms existing works by 7.89% in FPR95. Codes are available in https://github.com/MengyuanChen21/NeurIPS2024-CSP.", "keywords": "out-of-distribution detection;zero-shot OOD detection;pre-trained vision-language models", "primary_area": "machine_vision", "supplementary_material": "", "author": "Mengyuan Chen;Junyu Gao;Changsheng Xu", "authorids": "~Mengyuan_Chen1;~Junyu_Gao1;~Changsheng_Xu1", "gender": "M;M;M", "homepage": "https://github.com/MengyuanChen21;;", "dblp": ";153/4522;85/1301", "google_scholar": "uwaZ_wIAAAAJ;y1nOY24AAAAJ;https://scholar.google.com.sg/citations?user=hI9NRDkAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Mengyuan_Chen1;~Junyu_Gao1;~Changsheng_Xu1", "aff": "Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences", "aff_domain": "ia.ac.cn;ia.ac.cn;ia.ac.cn", "position": "PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nchen2024conjugated,\ntitle={Conjugated Semantic Pool Improves {OOD} Detection with Pre-trained Vision-Language Models},\nauthor={Mengyuan Chen and Junyu Gao and Changsheng Xu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=qqQFOcUEqM}\n}", "github": "", "reviewers": "pEkd;p3ao;gvS3;KrTH;QWh5", "pdf_size": 43158043, "rating": "4;6;6;7;7", "confidence": "4;5;5;5;4", "soundness": "2;3;3;4;3", "novelty": "2;2;4;4;3", "presentation": "3;3;3;4;4", "wc_summary": "59;94;83;96;81", "wc_strengths": "22;31;43;95;49", "wc_weaknesses": "75;426;200;361;157", "wc_questions": "2;2;2;2;1", "wc_limitations": "1;1;1;2;4", "wc_review": "159;554;329;556;292", "wc_reply_reviewers": "0;16;49;15;26", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.0, 1.0954451150103321 ], "confidence_avg": [ 4.6, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 3.0, 0.8944271909999159 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 82.6, 13.184839779079606 ], "wc_strengths_avg": [ 48.0, 25.298221281347036 ], "wc_weaknesses_avg": [ 243.8, 130.29105878762365 ], "wc_questions_avg": [ 1.8, 0.4 ], "wc_limitations_avg": [ 1.8, 1.1661903789690602 ], "wc_review_avg": [ 378.0, 155.18891713005797 ], "wc_reply_reviewers_avg": [ 21.2, 16.191355718407276 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.372677996249965, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4917826019639443518&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "ia.ac.cn;ia.ac.cn;ia.ac.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Chinese Academy of Sciences", "aff_unique_dep": "Institute of Automation", "aff_unique_url": "http://www.ia.cas.cn", "aff_unique_abbr": "CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "CURE4Rec: A Benchmark for Recommendation Unlearning with Deeper Influence", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97491", "id": "qqU8WPw44f", "proceeding": "", "pdf": "https://openreview.net/pdf?id=qqU8WPw44f", "openreview": "https://openreview.net/forum?id=qqU8WPw44f", "poster": "/media/PosterPDFs/NeurIPS%202024/97491.png?t=1731501882.8127618", "project": "", "author_site": "Chaochao Chen, Jiaming Zhang, Yizhao Zhang, Li Zhang, Lingjuan Lyu, Yuyuan Li, Biao Gong, Chenggang Yan", "tldr": "", "abstract": "With increasing privacy concerns in artificial intelligence, regulations have mandated the right to be forgotten, granting individuals the right to withdraw their data from models. Machine unlearning has emerged as a potential solution to enable selective forgetting in models, particularly in recommender systems where historical data contains sensitive user information. Despite recent advances in recommendation unlearning, evaluating unlearning methods comprehensively remains challenging due to the absence of a unified evaluation framework and overlooked aspects of deeper influence, e.g., fairness. To address these gaps, we propose CURE4Rec, the first comprehensive benchmark for recommendation unlearning evaluation. CURE4Rec covers four aspects, i.e., unlearning Completeness, recommendation Utility, unleaRning efficiency, and recommendation fairnEss, under three data selection strategies, i.e., core data, edge data, and random data. Specifically, we consider the deeper influence of unlearning on recommendation fairness and robustness towards data with varying impact levels. We construct multiple datasets with CURE4Rec evaluation and conduct extensive experiments on existing recommendation unlearning methods. Our code is released at https://github.com/xiye7lai/CURE4Rec.", "keywords": "recommendation unlearning;fairness;robustness;benchmark", "primary_area": "", "supplementary_material": "/attachment/f539b1092e65c5e462b4b2466aa6657ca9f6afdb.pdf", "author": "Chaochao Chen;Jiaming Zhang;Yizhao Zhang;Li Zhang;Lingjuan Lyu;Yuyuan Li;Biao Gong;Chenggang Yan", "authorids": "~Chaochao_Chen3;~Jiaming_Zhang7;~Yizhao_Zhang1;~Li_Zhang41;~Lingjuan_Lyu1;~Yuyuan_Li1;~Biao_Gong1;~Chenggang_Yan1", "gender": ";M;M;M;F;;M;M", "homepage": "https://sites.google.com/site/ccchomepage/;https://xiye7lai.github.io;https://www.linkedin.com/in/yizhao-zhang-84aa18151/;http://GitHub.io;https://sites.google.com/view/lingjuan-lyu;;https://github.com/Biao-Gong/;http://iipl.hdu.edu.cn/", "dblp": "26/1492-1;;;;178/9876;35/11288;252/9466;146/1605", "google_scholar": "qZTMyzwAAAAJ;aFC0W18AAAAJ;;;;v4e49qEAAAAJ;BwdpTiQAAAAJ;", "orcid": "0000-0003-1419-964X;0009-0001-7855-3372;;;;0000-0003-4896-2885;0000-0002-6156-0816;", "linkedin": "ccchomepage/;;yizhao-zhang-84aa18151/;;;;;", "or_profile": "~Chaochao_Chen3;~Jiaming_Zhang7;~Yizhao_Zhang1;~Li_Zhang41;~Lingjuan_Lyu1;~Yuyuan_Li1;~Biao_Gong1;~Chenggang_Yan1", "aff": "Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University;Sony;Hangzhou Dianzi University;Ant Group;Hangzhou Dianzi University, Tsinghua University", "aff_domain": "zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;sony.com;hdu.edu.cn;antgroup.com;hdu.edu.cn", "position": "Distinguished Research Fellow;MS student;MS student;MS student;scientist;Associate Professor;Researcher;Full Professor", "bibtex": "@inproceedings{\nchen2024curerec,\ntitle={{CURE}4Rec: A Benchmark for Recommendation Unlearning with Deeper Influence},\nauthor={Chaochao Chen and Jiaming Zhang and Yizhao Zhang and Li Zhang and Lingjuan Lyu and Yuyuan Li and Biao Gong and Chenggang Yan},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=qqU8WPw44f}\n}", "github": "", "reviewers": "Cg64;UvtH;nZM1;YV6m", "pdf_size": 745560, "rating": "7;7;7;8", "confidence": "4;3;4;4", "wc_summary_and_contributions": "37;81;41;74", "wc_strengths": "76;2;54;93", "wc_improvement": "12;61;44;175", "wc_limitations": "32;4;49;14", "wc_correctness": "40;8;43;8", "wc_clarity": "39;5;18;5", "wc_relation_to_prior_work": "31;4;12;6", "wc_documentation": "19;13;5;5", "wc_additional_feedback": "1;1;1;1", "wc_review": "287;179;267;381", "wc_reply_reviewers": "0;0;0;33", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 58.25, 19.45989465541887 ], "wc_strengths_avg": [ 56.25, 34.23722389446901 ], "wc_improvement_avg": [ 73.0, 61.461369981476984 ], "wc_limitations_avg": [ 24.75, 17.224619008848933 ], "wc_correctness_avg": [ 24.75, 16.78354849249705 ], "wc_clarity_avg": [ 16.75, 13.899190623917638 ], "wc_relation_to_prior_work_avg": [ 13.25, 10.662434056068061 ], "wc_documentation_avg": [ 10.5, 5.894913061275798 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 278.5, 71.78265807282425 ], "wc_reply_reviewers_avg": [ 8.25, 14.289419162443238 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4437632964070040149&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;sony.com;hdu.edu.cn;antgroup.com;hdu.edu.cn", "author_num": 8, "aff_unique_index": "0;0;0;0;1;2;3;2", "aff_unique_norm": "Zhejiang University;Sony Corporation;Hangzhou Dianzi University;Ant Group", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.zju.edu.cn;https://www.sony.com;http://www.hdu.edu.cn/;https://www.antgroup.com", "aff_unique_abbr": "ZJU;Sony;HGHDU;Ant Group", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;0;0;0", "aff_country_unique": "China;Japan" }, { "id": "qrZxL3Bto9", "title": "Evaluating language models as risk scores", "track": "Datasets & Benchmarks", "status": "Poster", "tldr": "", "abstract": "Current question-answering benchmarks predominantly focus on accuracy in realizable prediction tasks.\nConditioned on a question and answer-key, does the most likely token match the ground truth?\nSuch benchmarks necessarily fail to evaluate LLMs' ability to quantify ground-truth outcome uncertainty.\nIn this work, we focus on the use of LLMs as risk scores for unrealizable prediction tasks.\nWe introduce folktexts, a software package to systematically generate risk scores using LLMs, and evaluate them against US Census data products.\nA flexible API enables the use of different prompting schemes, local or web-hosted models, and diverse census columns that can be used to compose custom prediction tasks.\nWe evaluate 17 recent LLMs across five proposed benchmark tasks.\nWe find that zero-shot risk scores produced by multiple-choice question-answering have high predictive signal but are widely miscalibrated.\nBase models consistently overestimate outcome uncertainty, while instruction-tuned models underestimate uncertainty and produce over-confident risk scores.\nIn fact, instruction-tuning polarizes answer distribution regardless of true underlying data uncertainty.\nThis reveals a general inability of instruction-tuned models to express data uncertainty using multiple-choice answers.\nA separate experiment using verbalized chat-style risk queries yields substantially improved calibration across instruction-tuned models.\nThese differences in ability to quantify data uncertainty cannot be revealed in realizable settings, and highlight a blind-spot in the current evaluation ecosystem that folktexts covers.", "keywords": "large language models;uncertainty;calibration;risk scores;benchmark;census;tabular data", "primary_area": "", "supplementary_material": "/attachment/7016d575566b9bef1a657b6750dc70052121e2fe.pdf", "author": "Andr\u00e9 F Cruz;Moritz Hardt;Celestine Mendler-D\u00fcnner", "authorids": "~Andr\u00e9_F_Cruz1;~Moritz_Hardt1;~Celestine_Mendler-D\u00fcnner1", "gender": "Not Specified;;M", "homepage": "http://mrtz.org/;http://celestine.ai/;https://andrefcruz.github.io", "dblp": "26/4683;176/5511;231/2735", "google_scholar": "adnTgaAAAAAJ;UqtDdZUAAAAJ;https://scholar.google.pt/citations?user=ctk2MhUAAAAJ", "orcid": ";;0000-0002-3334-2838", "linkedin": ";;andre-f-cruz/", "or_profile": "~Moritz_Hardt1;~Celestine_Mendler-D\u00fcnner1;~Andr\u00e9_Cruz1", "aff": "Max-Planck-Institute for Intelligent Systems, Max-Planck Institute;Max Planck Institute for Intelligent Systems;Max Planck Institute for Intelligent Systems", "aff_domain": "is.mpg.de;tuebingen.mpg.de;is.mpg.de", "position": "Principal Researcher;Group Lead;PhD student", "bibtex": "@inproceedings{\ncruz2024evaluating,\ntitle={Evaluating language models as risk scores},\nauthor={Andr{\\'e} F Cruz and Moritz Hardt and Celestine Mendler-D{\\\"u}nner},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=qrZxL3Bto9}\n}", "github": "", "project": "", "reviewers": "fCVW;8s9c;88cS;BkaU", "site": "https://openreview.net/forum?id=qrZxL3Bto9", "pdf_size": 1177744, "rating": "6;6;6;7", "confidence": "3;3;4;4", "wc_summary_and_contributions": "77;65;68;60", "wc_strengths": "54;217;18;70", "wc_improvement": "76;184;283;185", "wc_limitations": "46;251;6;3", "wc_correctness": "14;1;1;3", "wc_clarity": "22;1;1;3", "wc_relation_to_prior_work": "67;1;1;29", "wc_documentation": "77;1;1;6", "wc_additional_feedback": "1;1;1;1", "wc_review": "434;722;380;360", "wc_reply_reviewers": "377;0;0;92", "wc_reply_authors": "840;857;0;757", "reply_reviewers": "2;0;0;2", "reply_authors": "4;3;1;3", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "wc_summary_and_contributions_avg": [ 67.5, 6.18465843842649 ], "wc_strengths_avg": [ 89.75, 75.84317701678906 ], "wc_improvement_avg": [ 182.0, 73.22909257938404 ], "wc_limitations_avg": [ 76.5, 102.16775420845855 ], "wc_correctness_avg": [ 4.75, 5.402545696243577 ], "wc_clarity_avg": [ 6.75, 8.842369591913696 ], "wc_relation_to_prior_work_avg": [ 24.5, 27.069355367278327 ], "wc_documentation_avg": [ 21.25, 32.251937926270415 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 474.0, 145.7189074897283 ], "wc_reply_reviewers_avg": [ 117.25, 154.59847185531945 ], "wc_reply_authors_avg": [ 613.5, 356.21938464940393 ], "reply_reviewers_avg": [ 1.0, 1.0 ], "reply_authors_avg": [ 2.75, 1.0897247358851685 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4906564416039180477&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Max-Planck-Institute for Intelligent Systems;Max Planck Institute for Intelligent Systems", "aff_unique_dep": "Intelligent Systems;Intelligent Systems", "aff_unique_url": "https://www.mpi-is.mpg.de;https://www.mpi-is.mpg.de", "aff_unique_abbr": "MPI-IS;MPI-IS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "title": "FactorizePhys: Matrix Factorization for Multidimensional Attention in Remote Physiological Sensing", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93470", "id": "qrfp4eeZ47", "proceeding": "", "pdf": "https://openreview.net/pdf?id=qrfp4eeZ47", "openreview": "https://openreview.net/forum?id=qrfp4eeZ47", "poster": "/media/PosterPDFs/NeurIPS%202024/93470.png?t=1730753981.5040057", "project": "", "author_site": "Jitesh Joshi, Sos Agaian, Youngjun Cho", "tldr": "", "abstract": "Remote photoplethysmography (rPPG) enables non-invasive extraction of blood volume pulse signals through imaging, transforming spatial-temporal data into time series signals. Advances in end-to-end rPPG approaches have focused on this transformation where attention mechanisms are crucial for feature extraction. However, existing methods compute attention disjointly across spatial, temporal, and channel dimensions. Here, we propose the Factorized Self-Attention Module (FSAM), which jointly computes multidimensional attention from voxel embeddings using nonnegative matrix factorization. To demonstrate FSAM's effectiveness, we developed FactorizePhys, an end-to-end 3D-CNN architecture for estimating blood volume pulse signals from raw video frames. Our approach adeptly factorizes voxel embeddings to achieve comprehensive spatial, temporal, and channel attention, enhancing performance of generic signal extraction tasks. Furthermore, we deploy FSAM within an existing 2D-CNN-based rPPG architecture to illustrate its versatility. FSAM and FactorizePhys are thoroughly evaluated against state-of-the-art rPPG methods, each representing different types of architecture and attention mechanism. We perform ablation studies to investigate the architectural decisions and hyperparameters of FSAM. Experiments on four publicly available datasets and intuitive visualization of learned spatial-temporal features substantiate the effectiveness of FSAM and enhanced cross-dataset generalization in estimating rPPG signals, suggesting its broader potential as a multidimensional attention mechanism. The code is accessible at https://github.com/PhysiologicAILab/FactorizePhys.", "keywords": "Time-series estimation;remote photo-plethysmography;spatial-temporal attention;non-negative matrix factorization", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/08900221396b55d861a2f70c0e4bf77ef2a8b210.zip", "author": "Jitesh Joshi;Sos Agaian;Youngjun Cho", "authorids": "~Jitesh_Joshi1;~Sos_Agaian1;~Youngjun_Cho1", "gender": "M;M;", "homepage": "https://profiles.ucl.ac.uk/79098-jitesh-joshi;https://www.csi.cuny.edu/campus-directory/sos-agaian;https://iris.ucl.ac.uk/iris/browse/profile?upi=YCHOX41", "dblp": ";64/3266.html;", "google_scholar": "RN2b6HIAAAAJ;FazfMZMAAAAJ;2h-v48AAAAAJ", "orcid": "0000-0002-1971-2118;0000-0003-4601-4507;", "linkedin": "jnj256/;;", "or_profile": "~Jitesh_Joshi1;~Sos_Agaian1;~Youngjun_Cho1", "aff": "University College London, University of London;CUNY College of Staten Island;University College London (UCL)", "aff_domain": "ucl.ac.uk;csi.cuny.edu;ucl.ac.uk", "position": "PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\njoshi2024factorizephys,\ntitle={FactorizePhys: Matrix Factorization for Multidimensional Attention in Remote Physiological Sensing},\nauthor={Jitesh Joshi and Sos Agaian and Youngjun Cho},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=qrfp4eeZ47}\n}", "github": "", "reviewers": "ECso;c6jF;a4X8", "pdf_size": 44672955, "rating": "3;6;6", "confidence": "4;3;4", "soundness": "2;3;3", "novelty": "2;3;2", "presentation": "2;3;3", "wc_summary": "49;34;54", "wc_strengths": "39;58;74", "wc_weaknesses": "134;132;70", "wc_questions": "5;29;54", "wc_limitations": "17;11;17", "wc_review": "244;264;269", "wc_reply_reviewers": "44;111;100", "wc_reply_authors": "517;1117;521", "reply_reviewers": "1;1;1", "reply_authors": "2;3;2", "rating_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 45.666666666666664, 8.498365855987974 ], "wc_strengths_avg": [ 57.0, 14.30617582258329 ], "wc_weaknesses_avg": [ 112.0, 29.709706606876257 ], "wc_questions_avg": [ 29.333333333333332, 20.005554784164875 ], "wc_limitations_avg": [ 15.0, 2.8284271247461903 ], "wc_review_avg": [ 259.0, 10.801234497346433 ], "wc_reply_reviewers_avg": [ 85.0, 29.337120967584166 ], "wc_reply_authors_avg": [ 718.3333333333334, 281.90463320460384 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17804904079363872523&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "ucl.ac.uk;csi.cuny.edu;ucl.ac.uk", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University College London;College of Staten Island, City University of New York", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucl.ac.uk;https://www.csi.cuny.edu", "aff_unique_abbr": "UCL;CSI", "aff_campus_unique_index": "1", "aff_campus_unique": ";Staten Island", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United Kingdom;United States" }, { "title": "PeRFlow: Piecewise Rectified Flow as Universal Plug-and-Play Accelerator", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93469", "id": "qrlguvKu7a", "proceeding": "", "pdf": "https://openreview.net/pdf?id=qrlguvKu7a", "openreview": "https://openreview.net/forum?id=qrlguvKu7a", "poster": "/media/PosterPDFs/NeurIPS%202024/93469.png?t=1731639661.4576316", "project": "", "author_site": "Hanshu Yan, Xingchao Liu, Jiachun Pan, Jun Hao Liew, Qiang Liu, Jiashi Feng", "tldr": "", "abstract": "We present Piecewise Rectified Flow (PeRFlow), a flow-based method for accelerating diffusion models. PeRFlow divides the sampling process of generative flows into several time windows and straightens the trajectories in each interval via the reflow operation, thereby approaching piecewise linear flows. PeRFlow achieves superior performance in a few-step generation. Moreover, through dedicated parameterizations, the PeRFlow models inherit knowledge from the pretrained diffusion models. Thus, the training converges fast and the obtained models show advantageous transfer ability, serving as universal plug-and-play accelerators that are compatible with various workflows based on the pre-trained diffusion models.", "keywords": "Flow Model;Diffusion Model;Generative Model;Image Generation", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Hanshu Yan;Xingchao Liu;Jiachun Pan;Jun Hao Liew;qiang liu;Jiashi Feng", "authorids": "~Hanshu_Yan1;~Xingchao_Liu1;~Jiachun_Pan1;~Jun_Hao_Liew1;~qiang_liu4;~Jiashi_Feng1", "gender": "M;F;;M;M;M", "homepage": ";;;;https://sites.google.com/site/jshfeng/;https://www.cs.utexas.edu/~lqiang/", "dblp": "228/7309;228/9156;;243/3583;56/8278;61/3234-1", "google_scholar": "VOTVE0UAAAAJ;;https://scholar.google.com.sg/citations?user=8gm-CYYAAAAJ;MG817V4AAAAJ;https://scholar.google.com.sg/citations?user=Q8iay0gAAAAJ;https://scholar.google.com.tw/citations?user=2qDh4WUAAAAJ", "orcid": ";;;;0000-0001-6843-0064;", "linkedin": ";;;;;", "or_profile": "~Xingchao_Liu1;~Jiachun_Pan1;~Jun_Hao_Liew1;~Hanshu_YAN2;~Jiashi_Feng2;~Qiang_Liu1", "aff": "University of Texas, Austin;National University of Singapore;ByteDance;ByteDance;ByteDance;University of Texas, Austin", "aff_domain": "utexas.edu;nus.edu.sg;bytedance.com;bytedance.com;bytedance.com;utexas.edu", "position": "PhD student;Postdoc;Researcher;Research Scientist;Research Lead;Assistant Professor", "bibtex": "@inproceedings{\nyan2024perflow,\ntitle={Pe{RF}low: Piecewise Rectified Flow as Universal Plug-and-Play Accelerator},\nauthor={Hanshu Yan and Xingchao Liu and Jiachun Pan and Jun Hao Liew and qiang liu and Jiashi Feng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=qrlguvKu7a}\n}", "github": "", "reviewers": "uqGM;FbPr;FZ2P;ecCT", "pdf_size": 20416762, "rating": "5;6;7;7", "confidence": "4;3;3;3", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "4;3;3;3", "wc_summary": "121;84;139;50", "wc_strengths": "53;77;91;24", "wc_weaknesses": "249;218;104;31", "wc_questions": "4;70;6;56", "wc_limitations": "15;10;1;1", "wc_review": "442;459;341;162", "wc_reply_reviewers": "11;43;0;16", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 98.5, 34.311076928595526 ], "wc_strengths_avg": [ 61.25, 25.439880109780393 ], "wc_weaknesses_avg": [ 150.5, 87.6085041534211 ], "wc_questions_avg": [ 34.0, 29.427877939124322 ], "wc_limitations_avg": [ 6.75, 6.015604707757983 ], "wc_review_avg": [ 351.0, 118.07412925785225 ], "wc_reply_reviewers_avg": [ 17.5, 15.819292019556375 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17274136741188159404&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "utexas.edu;nus.edu.sg;bytedance.com;bytedance.com;bytedance.com;utexas.edu", "author_num": 6, "aff_unique_index": "0;1;2;2;2;0", "aff_unique_norm": "University of Texas at Austin;National University of Singapore;ByteDance", "aff_unique_dep": ";;", "aff_unique_url": "https://www.utexas.edu;https://www.nus.edu.sg;https://www.bytedance.com", "aff_unique_abbr": "UT Austin;NUS;ByteDance", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Austin;", "aff_country_unique_index": "0;1;2;2;2;0", "aff_country_unique": "United States;Singapore;China" }, { "title": "Towards Editing Time Series", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93468", "id": "qu5NTwZtxA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=qu5NTwZtxA", "openreview": "https://openreview.net/forum?id=qu5NTwZtxA", "poster": "/media/PosterPDFs/NeurIPS%202024/93468.png?t=1733307529.5112946", "project": "", "author_site": "Baoyu Jing, Shuqi Gu, Tianyu Chen, Zhiyu Yang, Dongsheng Li, Jingrui He, Kan Ren", "tldr": "", "abstract": "Synthesizing time series data is pivotal in modern society, aiding effective decision making and ensuring privacy preservation in various scenarios. Time series are associated with various attributes, including trends, seasonality, and external information such as location. Recent research has predominantly focused on random unconditional synthesis or conditional synthesis. Nonetheless, these paradigms generate time series from scratch and are incapable of manipulating existing time series samples. This paper introduces a novel task, called Time Series Editing (TSE), to synthesize time series by manipulating existing time series. The objective is to modify the given time series according to the specified attributes while preserving other properties unchanged. This task is not trivial due to the inadequacy of data coverage and the intricate relationships between time series and their attributes. To address these issues, we introduce a novel diffusion model, called TEdit. The proposed TEdit is trained using a novel bootstrap learning algorithm that effectively enhances the coverage of the original data. It is also equipped with an innovative multi-resolution modeling and generation paradigm to capture the complex relationships between time series and their attributes. Experimental results demonstrate the efficacy of TEdit for editing specified attributes upon the existing time series data. The project page is at https://seqml.github.io/tse.", "keywords": "Time Series;Editing;Diffusion Model", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Baoyu Jing;Shuqi Gu;Tianyu Chen;Zhiyu Yang;Dongsheng Li;Jingrui He;Kan Ren", "authorids": "~Baoyu_Jing1;~Shuqi_Gu1;~Tianyu_Chen8;~Zhiyu_Yang6;~Dongsheng_Li2;~Jingrui_He1;~Kan_Ren1", "gender": ";M;M;M;M;F;M", "homepage": ";https://github.com/genius77777;https://github.com/chentianyu-github;https://github.com/felomid;http://recmind.cn;https://www.hejingrui.org;https://saying.ren", "dblp": "210/0936;;;;254/0830-2.html;34/2685;28/7458", "google_scholar": "cl9YMcUAAAAJ;;;;VNg5rA8AAAAJ;hXpZynkAAAAJ;USnQVWgAAAAJ", "orcid": "0000-0003-1564-6499;0009-0001-7021-7171;;;0000-0003-3103-8442;0000-0002-6429-6272;", "linkedin": "baoyu-jing-b37455a0/;;;;;;", "or_profile": "~Baoyu_Jing1;~Shuqi_Gu1;~Tianyu_Chen8;~Zhiyu_Yang6;~Dongsheng_Li2;~Jingrui_He1;~Kan_Ren1", "aff": "University of Illinois, Urbana Champaign;ShanghaiTech University;ShanghaiTech University;ShanghaiTech University;Microsoft Research Asia;University of Illinois, Urbana Champaign;ShanghaiTech University", "aff_domain": "illinois.edu;shanghaitech.edu.cn;shanghaitech.edu.cn;shanghaitech.edu.cn;microsoft.com;illinois.edu;shanghaitech.edu.cn", "position": "PhD student;Undergrad student;MS student;Undergrad student;Principal Researcher;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\njing2024towards,\ntitle={Towards Editing Time Series},\nauthor={Baoyu Jing and Shuqi Gu and Tianyu Chen and Zhiyu Yang and Dongsheng Li and Jingrui He and Kan Ren},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=qu5NTwZtxA}\n}", "github": "", "reviewers": "FqWw;ZaGr;tfFr;EFAk", "pdf_size": 3554196, "rating": "5;6;6;7", "confidence": "4;4;4;4", "soundness": "2;3;4;3", "novelty": "3;4;2;2", "presentation": "3;4;3;3", "wc_summary": "106;69;83;74", "wc_strengths": "96;130;56;33", "wc_weaknesses": "258;59;279;201", "wc_questions": "188;5;1;38", "wc_limitations": "76;6;4;4", "wc_review": "724;269;423;350", "wc_reply_reviewers": "35;33;55;281", "wc_reply_authors": "69;984;68;610", "reply_reviewers": "1;1;1;2", "reply_authors": "2;3;3;3", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 83.0, 14.19506956657839 ], "wc_strengths_avg": [ 78.75, 37.19795021234369 ], "wc_weaknesses_avg": [ 199.25, 85.85562008395257 ], "wc_questions_avg": [ 58.0, 76.41662122863062 ], "wc_limitations_avg": [ 22.5, 30.89902911096075 ], "wc_review_avg": [ 441.5, 171.95711674717043 ], "wc_reply_reviewers_avg": [ 101.0, 104.27847332982968 ], "wc_reply_authors_avg": [ 432.75, 387.50830636258627 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1726638577831586532&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 0, "email": "illinois.edu;shanghaitech.edu.cn;shanghaitech.edu.cn;shanghaitech.edu.cn;microsoft.com;illinois.edu;shanghaitech.edu.cn", "author_num": 7, "aff_unique_index": "0;1;1;1;2;0;1", "aff_unique_norm": "University of Illinois Urbana-Champaign;ShanghaiTech University;Microsoft", "aff_unique_dep": ";;Research", "aff_unique_url": "https://illinois.edu;https://www.shanghaitech.edu.cn;https://www.microsoft.com/en-us/research/group/asia", "aff_unique_abbr": "UIUC;ShanghaiTech;MSR Asia", "aff_campus_unique_index": "0;2;0", "aff_campus_unique": "Urbana-Champaign;;Asia", "aff_country_unique_index": "0;1;1;1;1;0;1", "aff_country_unique": "United States;China" }, { "title": "CLIPLoss and Norm-Based Data Selection Methods for Multimodal Contrastive Learning", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93467", "id": "qvdc0oCX2n", "proceeding": "", "pdf": "https://openreview.net/pdf?id=qvdc0oCX2n", "openreview": "https://openreview.net/forum?id=qvdc0oCX2n", "poster": "", "project": "", "author_site": "Yiping Wang, Yifang Chen, Wendan Yan, Alex Fang, Wenjing Zhou, Kevin Jamieson, Simon Du", "tldr": "", "abstract": "Data selection has emerged as a core issue for large-scale visual-language model pretaining (e.g., CLIP), particularly with noisy web-curated datasets. Three main data selection approaches are: (1) leveraging external non-CLIP models to aid data selection, (2) training new CLIP-style embedding models that are more effective at selecting high-quality data than the original OpenAI CLIP model, and (3) designing better metrics or strategies universally applicable to any CLIP embedding without requiring specific model properties (e.g., CLIPScore is one popular metric). While the first two approaches have been extensively studied, the third remains under-explored. In this paper, we advance the third approach by proposing two new methods. Firstly, instead of classical CLIP scores that only consider the alignment between two modalities from a single sample, we introduce $\\textbf{negCLIPLoss}$, a method inspired by CLIP training loss that adds the alignment between one sample and its contrastive pairs as an extra normalization term to CLIPScore for better quality measurement. Secondly, when downstream tasks are known, we propose a new norm-based metric, $\\textbf{NormSim}$, to measure the similarity between pretraining data and target data. We test our methods on the data selection benchmark, DataComp [Gadre et al., 2023]. Compared to the best baseline using only OpenAI's CLIP-L/14, our methods achieve a 5.3\\% improvement on ImageNet-1k and a 2.8\\% improvement on 38 downstream evaluation tasks. Moreover, both $\\textbf{negCLIPLoss}$ and $\\textbf{NormSim}$ are compatible with existing techniques. By combining our methods with the current best methods DFN [Fang et al., 2023] and HYPE [Kim et al., 2024], we can boost average performance on downstream tasks by 0.9\\%, achieving a new state-of-the-art on the DataComp-medium benchmark.", "keywords": "contrastive learning;visual-language pretraining;data selection;CLIP", "primary_area": "active_learning", "supplementary_material": "/attachment/bed22c85dcb601b44ba769a2b988a8e4aca65387.zip", "author": "Yiping Wang;Yifang Chen;Wendan Yan;Alex Fang;Wenjing Zhou;Kevin Jamieson;Simon Shaolei Du", "authorids": "~Yiping_Wang2;~Yifang_Chen1;~Wendan_Yan1;~Alex_Fang1;~Wenjing_Zhou2;~Kevin_Jamieson1;~Simon_Shaolei_Du1", "gender": "M;F;F;;F;M;M", "homepage": "https://ypwang61.github.io;;;;;;http://simonshaoleidu.com", "dblp": "13/1444-3;20/8403-1;;260/0449;;85/10260;176/5602", "google_scholar": "IuMFxFUAAAAJ;LUz2mN4AAAAJ;;;https://scholar.google.com/citations?hl=en;;OttawxUAAAAJ", "orcid": ";;;;0009-0002-4694-7569;;", "linkedin": "yiping-wang-323647294/;;emily-yan-17b1a0149/;alex-fang-8a11a8115/;katrinazh/;;", "or_profile": "~Yiping_Wang2;~Yifang_Chen1;~Wendan_Yan1;~Alex_Fang1;~Wenjing_Zhou2;~Kevin_Jamieson1;~Simon_Shaolei_Du1", "aff": "Department of Computer Science, University of Washington;Department of Computer Science, University of Washington;Microsoft;Department of Computer Science, University of Washington;Amazon;University of Washington;University of Washington", "aff_domain": "cs.washington.edu;cs.washington.edu;microsoft.com;cs.washington.edu;amazon.com;washington.edu;washington.edu", "position": "PhD student;PhD student;data & applied scientist ;PhD student;Researcher;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nwang2024cliploss,\ntitle={{CLIPL}oss and Norm-Based Data Selection Methods for Multimodal Contrastive Learning},\nauthor={Yiping Wang and Yifang Chen and Wendan Yan and Alex Fang and Wenjing Zhou and Kevin Jamieson and Simon Shaolei Du},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=qvdc0oCX2n}\n}", "github": "", "reviewers": "3xCf;NSn3;XD1b", "pdf_size": 11044649, "rating": "6;7;8", "confidence": "5;4;3", "soundness": "2;4;4", "novelty": "3;3;3", "presentation": "2;4;4", "wc_summary": "108;62;78", "wc_strengths": "94;21;155", "wc_weaknesses": "168;41;90", "wc_questions": "93;62;115", "wc_limitations": "11;1;8", "wc_review": "474;187;446", "wc_reply_reviewers": "61;0;24", "wc_reply_authors": "31;0;14", "reply_reviewers": "1;0;1", "reply_authors": "2;1;2", "rating_avg": [ 7.0, 0.816496580927726 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 3.3333333333333335, 0.9428090415820634 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.3333333333333335, 0.9428090415820634 ], "wc_summary_avg": [ 82.66666666666667, 19.067132861433453 ], "wc_strengths_avg": [ 90.0, 54.77834121864833 ], "wc_weaknesses_avg": [ 99.66666666666667, 52.29616514515083 ], "wc_questions_avg": [ 90.0, 21.740898478827106 ], "wc_limitations_avg": [ 6.666666666666667, 4.189935029992178 ], "wc_review_avg": [ 369.0, 129.2001031991332 ], "wc_reply_reviewers_avg": [ 28.333333333333332, 25.090945688745087 ], "wc_reply_authors_avg": [ 15.0, 12.675435561221029 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12968339044258286818&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "cs.washington.edu;cs.washington.edu;microsoft.com;cs.washington.edu;amazon.com;washington.edu;washington.edu", "author_num": 7, "aff_unique_index": "0;0;1;0;2;0;0", "aff_unique_norm": "University of Washington;Microsoft;Amazon", "aff_unique_dep": "Department of Computer Science;Microsoft Corporation;Amazon.com, Inc.", "aff_unique_url": "https://www.washington.edu;https://www.microsoft.com;https://www.amazon.com", "aff_unique_abbr": "UW;Microsoft;Amazon", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Seattle;", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "What to Say and When to Say it: Live Fitness Coaching as a Testbed for Situated Interaction", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97489", "id": "qwWu95yoZO", "proceeding": "", "pdf": "https://openreview.net/pdf?id=qwWu95yoZO", "openreview": "https://openreview.net/forum?id=qwWu95yoZO", "poster": "/media/PosterPDFs/NeurIPS%202024/97489.png?t=1733158491.8946545", "project": "", "author_site": "Sunny Panchal, Apratim Bhattacharyya, Guillaume Berger, Antoine Mercier, Cornelius B\u00f6hm, Florian Dietrichkeit, Reza Pourreza, Xuanlin Li, Pulkit Madan, Mingu Lee, Mark Todorovich, Ingo Bax, Roland Memisevic", "tldr": "", "abstract": "Vision-language models have shown impressive progress in recent years. However, existing models are largely limited to turn-based interactions, where each turn must be stepped (i.e., prompted) by the user. Open-ended, asynchronous interactions, where an AI model may proactively deliver timely responses or feedback based on the unfolding situation in real-time, are an open challenge. In this work, we present the QEVD benchmark and dataset, which explores human-AI interaction in the challenging, yet controlled, real-world domain of fitness coaching \u2013 a task which intrinsically requires monitoring live user activity and providing immediate feedback. The benchmark requires vision-language models to recognize complex human actions, identify possible mistakes, and provide appropriate feedback in real-time. Our experiments reveal the limitations of existing state-of-the-art vision-language models for such asynchronous situated interactions. Motivated by this, we propose a simple end-to-end streaming baseline that can respond asynchronously to human actions with appropriate feedback at the appropriate time.", "keywords": "Situated Interaction;visual perception;language model grounding;other applications", "primary_area": "", "supplementary_material": "", "author": "Sunny Panchal;Apratim Bhattacharyya;Guillaume Berger;Antoine Mercier;Cornelius B\u00f6hm;Florian Dietrichkeit;Reza Pourreza;Xuanlin Li;Pulkit Madan;Mingu Lee;Mark Todorovich;Ingo Bax;Roland Memisevic", "authorids": "~Sunny_Panchal1;~Apratim_Bhattacharyya1;~Guillaume_Berger1;~Antoine_Mercier1;~Cornelius_B\u00f6hm1;~Florian_Dietrichkeit1;~Reza_Pourreza1;~Xuanlin_Li1;~Pulkit_Madan1;~Mingu_Lee1;~Mark_Todorovich1;~Ingo_Bax2;~Roland_Memisevic1", "gender": ";M;M;M;M;;;;;M;M;M;M", "homepage": ";https://www.mpi-inf.mpg.de/departments/computer-vision-and-multimodal-computing/people/apratim-bhattacharyya/;;;;https://casualcoding.com;;https://xuanlinli17.github.io/;https://www.linkedin.com/in/madanpulkit;;;;", "dblp": ";180/5968;218/5817;;194/1263;;;251/3029;350/5227;;;02/3163;98/4508", "google_scholar": ";https://scholar.google.de/citations?user=SKb4VyUAAAAJ;OY4_O9UAAAAJ;ZwyyS-4AAAAJ;;;;7vyVxxQAAAAJ;;;;https://scholar.google.de/citations?user=BwtfvK0AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;;;;;;;;;;", "linkedin": ";;guillaumebrgr/;merciera/;corneliusboehm;floriandtk/;;xuanlin-li-4684b8145/;madanpulkit/;mingu-lee-0aa28aa5/;mark-todorovich-3815047/;ingo-bax-106b09b3;", "or_profile": "~Sunny_Panchal1;~Apratim_Bhattacharyya1;~Guillaume_Berger1;~Antoine_Mercier1;~Cornelius_B\u00f6hm1;~Florian_Dietrichkeit1;~Reza_Pourreza1;~Xuanlin_Li1;~Pulkit_Madan1;~Mingu_Lee1;~Mark_Todorovich1;~Ingo_Bax2;~Roland_Memisevic1", "aff": ";Qualcomm Technologies, Inc.;Qualcomm Inc, QualComm;Qualcomm Inc, QualComm;Aignostics GmbH;LifeBonus;;University of California, San Diego;Qualcomm Inc, QualComm;Qualcomm Inc, QualComm;Qualcomm;Qualcomm Inc;Qualcomm Inc, Qualcomm", "aff_domain": ";qualcomm.com;qti.qualcomm.com;qti.qualcomm.com;aignostics.com;lifebonus.health;;ucsd.edu;qti.qualcomm.com;qti.qualcomm.com;qualcomm.com;qualcomm.com;qti.qualcomm.com", "position": ";Researcher;Researcher;Researcher;Researcher;Researcher;;PhD student;Researcher;Researcher;Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\npanchal2024live,\ntitle={Live Fitness Coaching as a Testbed for Situated Interaction},\nauthor={Sunny Panchal and Apratim Bhattacharyya and Guillaume Berger and Antoine Mercier and Cornelius B{\\\"o}hm and Florian Dietrichkeit and Reza Pourreza and Xuanlin Li and Pulkit Madan and Mingu Lee and Mark Todorovich and Ingo Bax and Roland Memisevic},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=qwWu95yoZO}\n}", "github": "", "reviewers": "vHEx;xB99;Cn17;Ne6d", "pdf_size": 15419598, "rating": "5;6;6;7", "confidence": "3;3;3;5", "wc_summary_and_contributions": "77;93;49;61", "wc_strengths": "43;5;94;97", "wc_improvement": "72;113;24;48", "wc_limitations": "4;1;27;6", "wc_correctness": "13;49;9;5", "wc_clarity": "38;1;4;8", "wc_relation_to_prior_work": "29;1;6;17", "wc_documentation": "10;19;2;3", "wc_additional_feedback": "1;1;1;1", "wc_review": "287;283;216;246", "wc_reply_reviewers": "0;0;11;0", "wc_reply_authors": "49;49;21;49", "reply_reviewers": "0;0;1;0", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "wc_summary_and_contributions_avg": [ 70.0, 16.583123951777 ], "wc_strengths_avg": [ 59.75, 38.205856880850085 ], "wc_improvement_avg": [ 64.25, 32.86620604815834 ], "wc_limitations_avg": [ 9.5, 10.259142264341596 ], "wc_correctness_avg": [ 19.0, 17.549928774784245 ], "wc_clarity_avg": [ 12.75, 14.788086421170252 ], "wc_relation_to_prior_work_avg": [ 13.25, 10.779030568655049 ], "wc_documentation_avg": [ 8.5, 6.800735254367722 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 258.0, 29.04307146291521 ], "wc_reply_reviewers_avg": [ 2.75, 4.763139720814412 ], "wc_reply_authors_avg": [ 42.0, 12.12435565298214 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 13, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13272736419482191395&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "email": ";qualcomm.com;qti.qualcomm.com;qti.qualcomm.com;aignostics.com;lifebonus.health;;ucsd.edu;qti.qualcomm.com;qti.qualcomm.com;qualcomm.com;qualcomm.com;qti.qualcomm.com", "author_num": 13, "aff_unique_index": "0;1;1;2;3;4;1;1;1;1;1", "aff_unique_norm": "Qualcomm Technologies;Qualcomm Incorporated;Aignostics;LifeBonus;University of California, San Diego", "aff_unique_dep": "Inc.;;;;", "aff_unique_url": "https://www.qualcomm.com;https://www.qualcomm.com;;;https://www.ucsd.edu", "aff_unique_abbr": "QTI;Qualcomm;Aignostics;;UCSD", "aff_campus_unique_index": "1", "aff_campus_unique": ";San Diego", "aff_country_unique_index": "0;0;0;1;0;0;0;0;0;0", "aff_country_unique": "United States;Germany;" }, { "title": "Easy-to-Hard Generalization: Scalable Alignment Beyond Human Supervision", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93466", "id": "qwgfh2fTtN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=qwgfh2fTtN", "openreview": "https://openreview.net/forum?id=qwgfh2fTtN", "poster": "/media/PosterPDFs/NeurIPS%202024/93466.png?t=1733619625.858506", "project": "", "author_site": "Zhiqing Sun, Longhui Yu, Yikang Shen, Weiyang Liu, Yiming Yang, Sean Welleck, Chuang Gan", "tldr": "", "abstract": "Current AI alignment methodologies rely on human-provided demonstrations or judgments, and the learned capabilities of AI systems would be upper-bounded by human capabilities as a result. This raises a challenging research question: How can we keep improving the systems when their capabilities have surpassed the levels of humans? This paper answers this question in the context of tackling hard reasoning tasks (e.g., level 4-5 MATH problems) via learning from human annotations on easier tasks (e.g., level 1-3 MATH problems), which we term as easy-to-hard generalization. Our key insight is that an evaluator (reward model) trained on supervisions for easier tasks can be effectively used for scoring candidate solutions of harder tasks and hence facilitating easy-to-hard generalization over different levels of tasks. Based on this insight, we propose a novel approach to scalable alignment, which firstly trains the (process-supervised) reward models on easy problems (e.g., level 1-3), and then uses them to evaluate the performance of policy models on hard problems. We show that such easy-to-hard generalization from evaluators can enable easy-to-hard generalizations in generators either through re-ranking or reinforcement learning (RL). Notably, our process-supervised 7b RL model and 34b model (reranking@1024) achieves an accuracy of 34.0% and 52.5% on MATH500, respectively, despite only using human supervision on easy problems. Our approach suggests a promising path toward AI systems that advance beyond the frontier of human supervision.", "keywords": "easy-to-hard generalization;scalable oversight;AI alignment", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Zhiqing Sun;Longhui Yu;Yikang Shen;Weiyang Liu;Yiming Yang;Sean Welleck;Chuang Gan", "authorids": "~Zhiqing_Sun1;~Longhui_Yu1;~Yikang_Shen1;~Weiyang_Liu1;~Yiming_Yang1;~Sean_Welleck1;~Chuang_Gan1", "gender": "M;M;M;M;F;;M", "homepage": "https://www.cs.cmu.edu/~zhiqings/;https://yulonghui.github.io/;;http://wyliu.com/;http://www.cs.cmu.edu/~yiming/;;http://people.csail.mit.edu/ganchuang/", "dblp": "211/7692;313/9946;152/8226;137/1532;25/1666;;139/6993", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com.hk/citations?user=3eHjDDgAAAAJ;qff5rRYAAAAJ;DMjROf0AAAAJ;MlZq4XwAAAAJ;;PTeSCbIAAAAJ", "orcid": ";;;;0000-0001-8322-607X;;", "linkedin": "zhiqing-sun-5781b3100/;%E9%BE%99%E8%BE%89-%E8%99%9E-71655a154/;;;yiming-yang-24100924/;;", "or_profile": "~Zhiqing_Sun1;~Longhui_Yu1;~Yikang_Shen1;~Weiyang_Liu1;~Yiming_Yang1;~Sean_Welleck1;~Chuang_Gan1", "aff": "Carnegie Mellon University;;International Business Machines;University of Cambridge;School of Computer Science, Carnegie Mellon University;;University of Massachusetts at Amherst", "aff_domain": "cs.cmu.edu;;ibm.com;cam.ac.uk;cs.cmu.edu;;umass.edu", "position": "PhD student;;Researcher;Researcher;Full Professor;;Assistant Professor", "bibtex": "@inproceedings{\nsun2024easytohard,\ntitle={Easy-to-Hard Generalization: Scalable Alignment Beyond Human Supervision},\nauthor={Zhiqing Sun and Longhui Yu and Yikang Shen and Weiyang Liu and Yiming Yang and Sean Welleck and Chuang Gan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=qwgfh2fTtN}\n}", "github": "", "reviewers": "VNqz;bhN7;dzZx;3nAE", "pdf_size": 3813202, "rating": "5;7;7;8", "confidence": "4;4;4;4", "soundness": "3;3;3;3", "novelty": "2;3;4;4", "presentation": "2;3;3;2", "wc_summary": "71;141;71;122", "wc_strengths": "56;63;45;105", "wc_weaknesses": "215;156;364;78", "wc_questions": "96;15;159;59", "wc_limitations": "31;8;7;1", "wc_review": "469;383;646;365", "wc_reply_reviewers": "15;17;22;18", "wc_reply_authors": "22;20;19;16", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.75, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 101.25, 30.986892390170397 ], "wc_strengths_avg": [ 67.25, 22.71976012197312 ], "wc_weaknesses_avg": [ 203.25, 104.76014270704293 ], "wc_questions_avg": [ 82.25, 52.77961254120761 ], "wc_limitations_avg": [ 11.75, 11.431863365173676 ], "wc_review_avg": [ 465.75, 111.24157271452071 ], "wc_reply_reviewers_avg": [ 18.0, 2.5495097567963922 ], "wc_reply_authors_avg": [ 19.25, 2.165063509461097 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 56, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5081565265096169081&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "cs.cmu.edu;;ibm.com;cam.ac.uk;cs.cmu.edu;;umass.edu", "author_num": 7, "aff_unique_index": "0;1;2;0;3", "aff_unique_norm": "Carnegie Mellon University;International Business Machines Corporation;University of Cambridge;University of Massachusetts Amherst", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.cmu.edu;https://www.ibm.com;https://www.cam.ac.uk;https://www.umass.edu", "aff_unique_abbr": "CMU;IBM;Cambridge;UMass Amherst", "aff_campus_unique_index": "1;2;3", "aff_campus_unique": ";Cambridge;Pittsburgh;Amherst", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Integrating GNN and Neural ODEs for Estimating Non-Reciprocal Two-Body Interactions in Mixed-Species Collective Motion", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93465", "id": "qwl3EiDi9r", "proceeding": "", "pdf": "https://openreview.net/pdf?id=qwl3EiDi9r", "openreview": "https://openreview.net/forum?id=qwl3EiDi9r", "poster": "/media/PosterPDFs/NeurIPS%202024/93465.png?t=1733714796.1742945", "project": "", "author_site": "Masahito Uwamichi, Simon Schnyder, Tetsuya J. Kobayashi, Satoshi Sawai", "tldr": "", "abstract": "Analyzing the motion of multiple biological agents, be it cells or individual animals, is pivotal for the understanding of complex collective behaviors. \nWith the advent of advanced microscopy, detailed images of complex tissue formations involving multiple cell types have become more accessible in recent years. However, deciphering the underlying rules that govern cell movements is far from trivial. Here, we present a novel deep learning framework for estimating the underlying equations of motion from observed trajectories, a pivotal step in decoding such complex dynamics. \nOur framework integrates graph neural networks with neural differential equations, enabling effective prediction of two-body interactions based on the states of the interacting entities. We demonstrate the efficacy of our approach through two numerical experiments. First, we used simulated data from a toy model to tune the hyperparameters. Based on the obtained hyperparameters, we then applied this approach to a more complex model with non-reciprocal forces that mimic the collective dynamics of the cells of slime molds. Our results show that the proposed method can accurately estimate the functional forms of two-body interactions -- even when they are nonreciprocal -- thereby precisely replicating both individual and collective behaviors within these systems.", "keywords": "Deep Learning;Neural Differential Equations;Graph Neural Networks;System Identification;Active Matter;Collective Motion;Non-Reciprocal", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "/attachment/afbc048d5b7c6331922874fd73b31f7470892a32.zip", "author": "Masahito Uwamichi;Simon K. Schnyder;Tetsuya J. Kobayashi;Satoshi Sawai", "authorids": "~Masahito_Uwamichi1;~Simon_K._Schnyder1;~Tetsuya_J._Kobayashi1;~Satoshi_Sawai1", "gender": "M;M;M;", "homepage": "https://masahitouwamichi.github.io/MasahitoUWAMICHI/;;https://research.crmind.net/;", "dblp": ";;;", "google_scholar": "eP2GZzEAAAAJ;XkerYaAAAAAJ;vPLNIKMAAAAJ;x9j_HlYAAAAJ", "orcid": "0009-0000-7066-891X;;;", "linkedin": ";;;", "or_profile": "~Masahito_Uwamichi1;~Simon_K._Schnyder1;~Tetsuya_J._Kobayashi1;~Satoshi_Sawai1", "aff": "The University of Tokyo;The University of Tokyo, Tokyo Institute of Technology;The University of Tokyo;The University of Tokyo", "aff_domain": "g.ecc.u-tokyo.ac.jp;u-tokyo.ac.jp;iis.u-tokyo.ac.jp;u-tokyo.ac.jp", "position": "Postdoc;Assistant Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nuwamichi2024integrating,\ntitle={Integrating {GNN} and Neural {ODE}s for Estimating Non-Reciprocal Two-Body Interactions in Mixed-Species Collective Motion},\nauthor={Masahito Uwamichi and Simon K. Schnyder and Tetsuya J. Kobayashi and Satoshi Sawai},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=qwl3EiDi9r}\n}", "github": "", "reviewers": "KAZi;T7k9;Sm49;Shpk", "pdf_size": 12140473, "rating": "3;5;7;7", "confidence": "3;4;1;4", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "2;2;3;2", "wc_summary": "153;27;22;124", "wc_strengths": "46;53;50;56", "wc_weaknesses": "131;210;2;46", "wc_questions": "160;24;2;102", "wc_limitations": "7;6;2;1", "wc_review": "497;320;78;329", "wc_reply_reviewers": "355;30;0;14", "wc_reply_authors": "804;0;0;0", "reply_reviewers": "2;1;0;1", "reply_authors": "2;1;0;1", "rating_avg": [ 5.5, 1.6583123951777 ], "confidence_avg": [ 3.0, 1.224744871391589 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 81.5, 57.941781125540146 ], "wc_strengths_avg": [ 51.25, 3.6996621467371855 ], "wc_weaknesses_avg": [ 97.25, 79.92300982820905 ], "wc_questions_avg": [ 72.0, 62.94441992742486 ], "wc_limitations_avg": [ 4.0, 2.5495097567963922 ], "wc_review_avg": [ 306.0, 149.3234743769378 ], "wc_reply_reviewers_avg": [ 99.75, 147.75042301123878 ], "wc_reply_authors_avg": [ 201.0, 348.14221232134435 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.0, 0.7071067811865476 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.24618298195866542, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2741929922036504522&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "email": "g.ecc.u-tokyo.ac.jp;u-tokyo.ac.jp;iis.u-tokyo.ac.jp;u-tokyo.ac.jp", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Tokyo", "aff_unique_dep": "", "aff_unique_url": "https://www.u-tokyo.ac.jp", "aff_unique_abbr": "UTokyo", "aff_campus_unique_index": "1", "aff_campus_unique": ";Tokyo", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Japan" }, { "title": "Fast samplers for Inverse Problems in Iterative Refinement models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93464", "id": "qxS4IvtLdD", "proceeding": "", "pdf": "https://openreview.net/pdf?id=qxS4IvtLdD", "openreview": "https://openreview.net/forum?id=qxS4IvtLdD", "poster": "/media/PosterPDFs/NeurIPS%202024/93464.png?t=1731735411.160125", "project": "", "author_site": "Kushagra Pandey, Ruihan Yang, Stephan Mandt", "tldr": "", "abstract": "Constructing fast samplers for unconditional diffusion and flow-matching models has received much attention recently; however, existing methods for solving *inverse problems*, such as super-resolution, inpainting, or deblurring, still require hundreds to thousands of iterative steps to obtain high-quality results. We propose a plug-and-play framework for constructing efficient samplers for inverse problems, requiring only *pre-trained* diffusion or flow-matching models. We present *Conditional Conjugate Integrators*, which leverage the specific form of the inverse problem to project the respective conditional diffusion/flow dynamics into a more amenable space for sampling. Our method complements popular posterior approximation methods for solving inverse problems using diffusion/flow models. We evaluate the proposed method's performance on various linear image restoration tasks across multiple datasets, employing diffusion and flow-matching models. Notably, on challenging inverse problems like 4x super-resolution on the ImageNet dataset, our method can generate high-quality samples in as few as *5* conditional sampling steps and outperforms competing baselines requiring 20-1000 steps. Our code will be publicly available at https://github.com/mandt-lab/c-pigdm.", "keywords": "Inverse Problems;Diffusion models;Fast sampling", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Kushagra Pandey;Ruihan Yang;Stephan Mandt", "authorids": "~Kushagra_Pandey1;~Ruihan_Yang1;~Stephan_Mandt1", "gender": "M;M;M", "homepage": "https://kpandey008.github.io;;https://www.stephanmandt.com", "dblp": ";225/4834;147/5018", "google_scholar": "Vzcz2WwAAAAJ;mWEXfLwAAAAJ;HOrGe7wAAAAJ", "orcid": ";;", "linkedin": "kushagra-pandey-008/;;stephan-mandt-8702795a/", "or_profile": "~Kushagra_Pandey1;~Ruihan_Yang1;~Stephan_M_Mandt1", "aff": "University of California, Irvine;Microsoft;University of California, Irvine", "aff_domain": "uci.edu;microsoft.com;uci.edu", "position": "PhD student;Intern;Associate Professor", "bibtex": "@inproceedings{\npandey2024fast,\ntitle={Fast samplers for Inverse Problems in Iterative Refinement models},\nauthor={Kushagra Pandey and Ruihan Yang and Stephan Mandt},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=qxS4IvtLdD}\n}", "github": "", "reviewers": "PpUR;X2Z6;hLds", "pdf_size": 30622034, "rating": "5;7;7", "confidence": "3;3;4", "soundness": "3;2;3", "novelty": "3;3;3", "presentation": "3;2;3", "wc_summary": "53;40;165", "wc_strengths": "39;48;100", "wc_weaknesses": "14;89;62", "wc_questions": "83;11;102", "wc_limitations": "15;5;5", "wc_review": "204;193;434", "wc_reply_reviewers": "98;0;14", "wc_reply_authors": "854;0;0", "reply_reviewers": "1;0;1", "reply_authors": "2;1;1", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 86.0, 56.11298126696412 ], "wc_strengths_avg": [ 62.333333333333336, 26.886593106767712 ], "wc_weaknesses_avg": [ 55.0, 31.016124838541646 ], "wc_questions_avg": [ 65.33333333333333, 39.19467083956978 ], "wc_limitations_avg": [ 8.333333333333334, 4.714045207910316 ], "wc_review_avg": [ 277.0, 111.10655546216283 ], "wc_reply_reviewers_avg": [ 37.333333333333336, 43.27688631231329 ], "wc_reply_authors_avg": [ 284.6666666666667, 402.57946075554105 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.49999999999999983, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14220274955246117162&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "uci.edu;microsoft.com;uci.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of California, Irvine;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.uci.edu;https://www.microsoft.com", "aff_unique_abbr": "UCI;Microsoft", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Irvine;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Parametric model reduction of mean-field and stochastic systems via higher-order action matching", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93463", "id": "qyaz3XP0FN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=qyaz3XP0FN", "openreview": "https://openreview.net/forum?id=qyaz3XP0FN", "poster": "", "project": "", "author_site": "Jules Berman, Tobias Blickhan, Benjamin Peherstorfer", "tldr": "", "abstract": "The aim of this work is to learn models of population dynamics of physical systems that feature stochastic and mean-field effects and that depend on physics parameters. The learned models can act as surrogates of classical numerical models to efficiently predict the system behavior over the physics parameters. Building on the Benamou-Brenier formula from optimal transport and action matching, we use a variational problem to infer parameter- and time-dependent gradient fields that represent approximations of the population dynamics. The inferred gradient fields can then be used to rapidly generate sample trajectories that mimic the dynamics of the physical system on a population level over varying physics parameters. We show that combining Monte Carlo sampling with higher-order quadrature rules is critical for accurately estimating the training objective from sample data and for stabilizing the training process. We demonstrate on Vlasov-Poisson instabilities as well as on high-dimensional particle and chaotic systems that our approach accurately predicts population dynamics over a wide range of parameters and outperforms state-of-the-art diffusion-based and flow-based modeling that simply condition on time and physics parameters.", "keywords": "partial differential equations;reduced modeling;model reduction;stochastic dynamical systems;generative models", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "", "author": "Jules Berman;Tobias Blickhan;Benjamin Peherstorfer", "authorids": "~Jules_Berman1;~Tobias_Blickhan1;~Benjamin_Peherstorfer2", "gender": "M;M;", "homepage": ";;https://cims.nyu.edu/~pehersto/", "dblp": "308/1410;319/9995;96/8557", "google_scholar": "g44S1mAAAAAJ;laUsV00AAAAJ;C81WhlkAAAAJ", "orcid": ";0009-0009-6141-0808;", "linkedin": ";;", "or_profile": "~Jules_Berman1;~Tobias_Blickhan1;~Benjamin_Peherstorfer2", "aff": "New York University;New York University;New York University", "aff_domain": "nyu.edu;nyu.edu;nyu.edu", "position": "PhD student;Instructor;Associate Professor", "bibtex": "@inproceedings{\nberman2024parametric,\ntitle={Parametric model reduction of mean-field and stochastic systems via higher-order action matching},\nauthor={Jules Berman and Tobias Blickhan and Benjamin Peherstorfer},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=qyaz3XP0FN}\n}", "github": "", "reviewers": "zRLH;rAtu;gej1", "pdf_size": 979751, "rating": "4;5;9", "confidence": "3;4;4", "soundness": "2;3;4", "novelty": "2;3;4", "presentation": "2;2;4", "wc_summary": "132;93;72", "wc_strengths": "74;48;113", "wc_weaknesses": "113;42;53", "wc_questions": "210;178;42", "wc_limitations": "18;1;48", "wc_review": "547;362;328", "wc_reply_reviewers": "186;15;77", "wc_reply_authors": "780;18;30", "reply_reviewers": "1;1;2", "reply_authors": "2;2;2", "rating_avg": [ 6.0, 2.160246899469287 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 2.6666666666666665, 0.9428090415820634 ], "wc_summary_avg": [ 99.0, 24.859605789312106 ], "wc_strengths_avg": [ 78.33333333333333, 26.71246067953223 ], "wc_weaknesses_avg": [ 69.33333333333333, 31.201851796897497 ], "wc_questions_avg": [ 143.33333333333334, 72.83466817998755 ], "wc_limitations_avg": [ 22.333333333333332, 19.430788855719562 ], "wc_review_avg": [ 412.3333333333333, 96.2300484371811 ], "wc_reply_reviewers_avg": [ 92.66666666666667, 70.68396014812853 ], "wc_reply_authors_avg": [ 276.0, 356.4154878789641 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.654653670707977, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:oo5RAz6-95QJ:scholar.google.com/&scioq=Parametric+model+reduction+of+mean-field+and+stochastic+systems+via+higher-order+action+matching&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "nyu.edu;nyu.edu;nyu.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "New York University", "aff_unique_dep": "", "aff_unique_url": "https://www.nyu.edu", "aff_unique_abbr": "NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Bridging OOD Detection and Generalization: A Graph-Theoretic View", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93462", "id": "qzwAG8qxI1", "proceeding": "", "pdf": "https://openreview.net/pdf?id=qzwAG8qxI1", "openreview": "https://openreview.net/forum?id=qzwAG8qxI1", "poster": "", "project": "", "author_site": "Han Wang, Sharon Li", "tldr": "", "abstract": "In the context of modern machine learning, models deployed in real-world scenarios often encounter diverse data shifts like covariate and semantic shifts, leading to challenges in both out-of-distribution (OOD) generalization and detection. Despite considerable attention to these issues separately, a unified framework for theoretical understanding and practical usage is lacking. To bridge the gap, we introduce a graph-theoretic framework to jointly tackle both OOD generalization and detection problems. By leveraging the graph formulation, data representations are obtained through the factorization of the graph's adjacency matrix, enabling us to derive provable error quantifying OOD generalization and detection performance. Empirical results showcase competitive performance in comparison to existing methods, thereby validating our theoretical underpinnings.", "keywords": "graph spectral;distribution shift", "primary_area": "machine_vision", "supplementary_material": "", "author": "Han Wang;Yixuan Li", "authorids": "~Han_Wang19;~Yixuan_Li1", "gender": "M;F", "homepage": "https://rookiehb.github.io/;http://pages.cs.wisc.edu/~sharonli/", "dblp": "67/1771-19;144/6087-1", "google_scholar": "5C6OSIgAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";", "linkedin": ";liyixuan", "or_profile": "~Han_Wang19;~Yixuan_Li1", "aff": "Zhejiang University;Cornell University", "aff_domain": "zju.edu.cn;cornell.edu", "position": "Undergrad student;Graduate Student", "bibtex": "@inproceedings{\nwang2024bridging,\ntitle={Bridging {OOD} Detection and Generalization: A Graph-Theoretic View},\nauthor={Han Wang and Yixuan Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=qzwAG8qxI1}\n}", "github": "", "reviewers": "DU8N;LF4w;6JZj;ureZ", "pdf_size": 3970199, "rating": "5;6;6;6", "confidence": "3;3;3;4", "soundness": "2;3;3;3", "novelty": "2;3;3;2", "presentation": "3;3;3;3", "wc_summary": "55;49;66;83", "wc_strengths": "58;62;24;89", "wc_weaknesses": "82;139;83;264", "wc_questions": "5;83;35;95", "wc_limitations": "1;1;1;1", "wc_review": "201;334;209;532", "wc_reply_reviewers": "12;10;11;22", "wc_reply_authors": "100;21;21;26", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 63.25, 12.93010054098575 ], "wc_strengths_avg": [ 58.25, 23.09085316743407 ], "wc_weaknesses_avg": [ 142.0, 74.11814892453805 ], "wc_questions_avg": [ 54.5, 36.34212431875715 ], "wc_limitations_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 319.0, 133.8076978353637 ], "wc_reply_reviewers_avg": [ 13.75, 4.815340071064556 ], "wc_reply_authors_avg": [ 42.0, 33.54847239443251 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:bvZry_BgDH4J:scholar.google.com/&scioq=Bridging+OOD+Detection+and+Generalization:+A+Graph-Theoretic+View&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "zju.edu.cn;cornell.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Zhejiang University;Cornell University", "aff_unique_dep": ";", "aff_unique_url": "https://www.zju.edu.cn;https://www.cornell.edu", "aff_unique_abbr": "ZJU;Cornell", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "China;United States" }, { "title": "AsCAN: Asymmetric Convolution-Attention Networks for Efficient Recognition and Generation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93461", "id": "r0eSCJ6qsL", "proceeding": "", "pdf": "https://openreview.net/pdf?id=r0eSCJ6qsL", "openreview": "https://openreview.net/forum?id=r0eSCJ6qsL", "poster": "/media/PosterPDFs/NeurIPS%202024/93461.png?t=1731720123.6747537", "project": "", "author_site": "Anil Kag, n n, Jierun Chen, Junli Cao, Willi Menapace, Aliaksandr Siarohin, Sergey Tulyakov, Jian Ren", "tldr": "", "abstract": "Neural network architecture design requires making many crucial decisions. The common desiderata is that similar decisions, with little modifications, can be reused in a variety of tasks and applications. To satisfy that, architectures must provide promising latency and performance trade-offs, support a variety of tasks, scale efficiently with respect to the amounts of data and compute, leverage available data from other tasks, and efficiently support various hardware. To this end, we introduce AsCAN---a hybrid architecture, combining both convolutional and transformer blocks. We revisit the key design principles of hybrid architectures and propose a simple and effective \\emph{asymmetric} architecture, where the distribution of convolutional and transformer blocks is \\emph{asymmetric}, containing more convolutional blocks in the earlier stages, followed by more transformer blocks in later stages. AsCAN supports a variety of tasks: recognition, segmentation, class-conditional image generation, and features a superior trade-off between performance and latency. We then scale the same architecture to solve a large-scale text-to-image task and show state-of-the-art performance compared to the most recent public and commercial models. Notably, without performing any optimization of inference time our model shows faster execution, even when compared to works that do such optimization, highlighting the advantages and the value of our approach.", "keywords": "Text-to-Image Generation;Hybrid Architectures", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Anil Kag;Huseyin Coskun;Jierun Chen;Junli Cao;Willi Menapace;Aliaksandr Siarohin;Sergey Tulyakov;Jian Ren", "authorids": "~Anil_Kag1;~Huseyin_Coskun1;~Jierun_Chen1;~Junli_Cao2;~Willi_Menapace1;~Aliaksandr_Siarohin1;~Sergey_Tulyakov1;~Jian_Ren2", "gender": "M;M;M;M;M;M;M;M", "homepage": "https://anilkagak2.github.io/;;;;;;http://www.stulyakov.com/;https://alanspike.github.io/", "dblp": "213/9132;;283/5437;234/8466;271/8571;199/1971;40/6115;59/2180-5", "google_scholar": "bZdVsMkAAAAJ;https://scholar.google.de/citations?user=nwjxmycAAAAJ;8rPHNOsAAAAJ;;31ha1LgAAAAJ;https://scholar.google.it/citations?user=uMl5-k4AAAAJ;mgzXR0sAAAAJ;https://scholar.google.co.jp/citations?user=vDALiU4AAAAJ", "orcid": ";;;;;;;", "linkedin": ";;;junli-cao-5165b41a1;willi-menapace/;;sergeytulyakov/;", "or_profile": "~Anil_Kag1;~Huseyin_Coskun1;~Jierun_Chen1;~Junli_Cao2;~Willi_Menapace1;~Aliaksandr_Siarohin1;~Sergey_Tulyakov1;~Jian_Ren2", "aff": "Snap Inc.;Technical University Munich;HKUST;Snap Inc.;University of Trento;Snap Inc.;Snap Inc.;Snap Inc.", "aff_domain": "snap.com;tum.de;cse.ust.hk;snapchat.com;unitn.it;snapchat.com;snapchat.com;snapchat.com", "position": "Researcher;PhD student;PhD student;Researcher;PhD student;Intern;Director of Research;Research Scientist", "bibtex": "@inproceedings{\nkag2024ascan,\ntitle={As{CAN}: Asymmetric Convolution-Attention Networks for Efficient Recognition and Generation},\nauthor={Anil Kag and Huseyin Coskun and Jierun Chen and Junli Cao and Willi Menapace and Aliaksandr Siarohin and Sergey Tulyakov and Jian Ren},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=r0eSCJ6qsL}\n}", "github": "", "reviewers": "jBHn;3CqY;hpXB;fxge;R6Bu", "pdf_size": 12956409, "rating": "4;4;5;7;8", "confidence": "4;4;5;3;4", "soundness": "2;3;2;4;3", "novelty": "3;3;2;4;3", "presentation": "2;3;3;3;3", "wc_summary": "78;63;223;148;440", "wc_strengths": "10;105;38;53;172", "wc_weaknesses": "121;190;515;41;692", "wc_questions": "6;49;19;2;76", "wc_limitations": "6;40;3;5;65", "wc_review": "221;447;798;249;1445", "wc_reply_reviewers": "77;0;545;0;349", "wc_reply_authors": "378;175;552;0;976", "reply_reviewers": "1;0;1;0;3", "reply_authors": "4;4;5;1;7", "rating_avg": [ 5.6, 1.624807680927192 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 190.4, 137.13146976533142 ], "wc_strengths_avg": [ 75.6, 57.24543649934028 ], "wc_weaknesses_avg": [ 311.8, 249.16452395957174 ], "wc_questions_avg": [ 30.4, 28.132543432828818 ], "wc_limitations_avg": [ 23.8, 24.749949494897965 ], "wc_review_avg": [ 632.0, 455.7104343769188 ], "wc_reply_reviewers_avg": [ 194.2, 217.34157448587695 ], "wc_reply_authors_avg": [ 416.2, 336.0585663243834 ], "reply_reviewers_avg": [ 1.0, 1.0954451150103321 ], "reply_authors_avg": [ 4.2, 1.9390719429665317 ], "replies_avg": [ 33, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.3892494720807615, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9049599458039165563&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "snap.com;tum.de;cse.ust.hk;snapchat.com;unitn.it;snapchat.com;snapchat.com;snapchat.com", "author_num": 8, "aff_unique_index": "0;1;2;0;3;0;0;0", "aff_unique_norm": "Snap Inc.;Technical University of Munich;Hong Kong University of Science and Technology;University of Trento", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.snapinc.com;https://www.tum.de;https://www.ust.hk;https://www.unitn.it", "aff_unique_abbr": "Snap;TUM;HKUST;UniTN", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;2;0;3;0;0;0", "aff_country_unique": "United States;Germany;China;Italy" }, { "title": "How Control Information Influences Multilingual Text Image Generation and Editing?", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93460", "id": "r3c0WGCXgt", "proceeding": "", "pdf": "https://openreview.net/pdf?id=r3c0WGCXgt", "openreview": "https://openreview.net/forum?id=r3c0WGCXgt", "poster": "", "project": "", "author_site": "Boqiang Zhang, Zuan Gao, Yadong Qu, Hongtao Xie", "tldr": "", "abstract": "Visual text generation has significantly advanced through diffusion models aimed at producing images with readable and realistic text. Recent works primarily use a ControlNet-based framework, employing standard font text images to control diffusion models. Recognizing the critical role of control information in generating high-quality text, we investigate its influence from three perspectives: input encoding, role at different stages, and output features. Our findings reveal that: 1) Input control information has unique characteristics compared to conventional inputs like Canny edges and depth maps. 2) Control information plays distinct roles at different stages of the denoising process. 3) Output control features significantly differ from the base and skip features of the U-Net decoder in the frequency domain. Based on these insights, we propose TextGen, a novel framework designed to enhance generation quality by optimizing control information. We improve input and output features using Fourier analysis to emphasize relevant information and reduce noise. Additionally, we employ a two-stage generation framework to align the different roles of control information at different stages. Furthermore, we introduce an effective and lightweight dataset for training. Our method achieves state-of-the-art performance in both Chinese and English text generation. The code and dataset are available at https://github.com/CyrilSterling/TextGen.", "keywords": "Scene Text Generation;Scene Text Editing;Diffusion", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Boqiang Zhang;Zuan Gao;Yadong Qu;Hongtao Xie", "authorids": "~Boqiang_Zhang2;~Zuan_Gao1;~Yadong_Qu1;~Hongtao_Xie2", "gender": "M;M;;M", "homepage": "https://cyrilsterling.github.io/;https://github.com/FaltingsA/Gaozuan.github.io;;http://imcc.ustc.edu.cn/main.htm", "dblp": "346/9025;359/4127.html;;", "google_scholar": "HE-PXScAAAAJ;;_OJlYC8AAAAJ;", "orcid": "0000-0002-5314-4054;;0000-0003-0265-5011;0000-0002-0163-9434", "linkedin": ";;;", "or_profile": "~Boqiang_Zhang2;~Zuan_Gao1;~Yadong_Qu1;~Hongtao_Xie2", "aff": "University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China", "aff_domain": "mail.ustc.edu.cn;ustc.edu.cn;mail.ustc.edu.cn;ustc.edu.cn", "position": "MS student;MS student;PhD student;Full Professor", "bibtex": "@inproceedings{\nzhang2024how,\ntitle={How Control Information Influences Multilingual Text Image Generation and Editing?},\nauthor={Boqiang Zhang and Zuan Gao and Yadong Qu and Hongtao Xie},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=r3c0WGCXgt}\n}", "github": "", "reviewers": "bu9j;JA2v;dBLt;Z5LF", "pdf_size": 7544836, "rating": "3;5;7;7", "confidence": "4;5;4;4", "soundness": "2;2;3;3", "novelty": "2;1;3;4", "presentation": "3;2;3;4", "wc_summary": "72;68;54;101", "wc_strengths": "32;22;41;82", "wc_weaknesses": "55;261;51;114", "wc_questions": "250;147;59;28", "wc_limitations": "42;1;11;3", "wc_review": "451;499;216;328", "wc_reply_reviewers": "0;39;87;19", "wc_reply_authors": "76;31;254;8", "reply_reviewers": "0;1;2;1", "reply_authors": "2;2;3;2", "rating_avg": [ 5.5, 1.6583123951777 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 1.118033988749895 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 73.75, 17.09349291397168 ], "wc_strengths_avg": [ 44.25, 22.807619340913245 ], "wc_weaknesses_avg": [ 120.25, 85.00404402144642 ], "wc_questions_avg": [ 121.0, 86.32786340458102 ], "wc_limitations_avg": [ 14.25, 16.452583383772897 ], "wc_review_avg": [ 373.5, 110.26445483472904 ], "wc_reply_reviewers_avg": [ 36.25, 32.383444844549814 ], "wc_reply_authors_avg": [ 92.25, 96.53593890360212 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15349207583711074723&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "mail.ustc.edu.cn;ustc.edu.cn;mail.ustc.edu.cn;ustc.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Science and Technology of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ustc.edu.cn", "aff_unique_abbr": "USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "From Causal to Concept-Based Representation Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93459", "id": "r5nev2SHtJ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=r5nev2SHtJ", "openreview": "https://openreview.net/forum?id=r5nev2SHtJ", "poster": "", "project": "", "author_site": "Goutham Rajendran, Simon Buchholz, Bryon Aragam, Bernhard Sch\u00f6lkopf, Pradeep Ravikumar", "tldr": "", "abstract": "To build intelligent machine learning systems, modern representation learning attempts to recover latent generative factors from data, such as in causal representation learning. A key question in this growing field is to provide rigorous conditions under which latent factors can be identified and thus, potentially learned. Motivated by extensive empirical literature on linear representations and concept learning, we propose to relax causal notions with a geometric notion of concepts. We formally define a notion of concepts and show rigorously that they can be provably recovered from diverse data. Instead of imposing assumptions on the \"true\" generative latent space, we assume that concepts can be represented linearly in this latent space. The tradeoff is that instead of identifying the \"true\" generative factors, we identify a subset of desired human-interpretable concepts that are relevant for a given application. Experiments on synthetic data, multimodal CLIP models and large language models supplement our results and show the utility of our approach. In this way, we provide a foundation for moving from causal representations to interpretable, concept-based representations by bringing together ideas from these two neighboring disciplines.", "keywords": "concept learning;causal representation learning;interpretable representation learning", "primary_area": "causal_inference", "supplementary_material": "/attachment/29a6685daae442bbd86861b65b772ff3ce925ea3.zip", "author": "Goutham Rajendran;Simon Buchholz;Bryon Aragam;Bernhard Sch\u00f6lkopf;Pradeep Kumar Ravikumar", "authorids": "~Goutham_Rajendran1;~Simon_Buchholz1;~Bryon_Aragam1;~Bernhard_Sch\u00f6lkopf1;~Pradeep_Kumar_Ravikumar1", "gender": "M;;;;M", "homepage": "https://gouthamrdn.github.io/;https://www.is.mpg.de/person/sbuchholz;http://bryonaragam.com/;;http://www.cs.cmu.edu/~pradeepr/", "dblp": "274/1323;207/9068;140/7564;;94/3594", "google_scholar": "YVrGTe8AAAAJ;;u-W3_9QAAAAJ;;https://scholar.google.com.tw/citations?user=Q4DTPw4AAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Goutham_Rajendran1;~Simon_Buchholz1;~Bryon_Aragam1;~Bernhard_Sch\u00f6lkopf1;~Pradeep_Kumar_Ravikumar1", "aff": "Carnegie Mellon University;Max-Planck Institute;Booth School of Business;;Carnegie Mellon University", "aff_domain": "cmu.edu;mpg.de;chicagobooth.edu;;cmu.edu", "position": "Postdoc;Postdoc;Assistant Professor;;Full Professor", "bibtex": "@inproceedings{\nrajendran2024from,\ntitle={From Causal to Concept-Based Representation Learning},\nauthor={Goutham Rajendran and Simon Buchholz and Bryon Aragam and Bernhard Sch{\\\"o}lkopf and Pradeep Kumar Ravikumar},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=r5nev2SHtJ}\n}", "github": "", "reviewers": "kVe2;ohha;KRMD;jSX2", "pdf_size": 694762, "rating": "6;6;6;7", "confidence": "2;4;3;4", "soundness": "3;4;3;3", "novelty": "3;3;3;4", "presentation": "2;2;4;3", "wc_summary": "48;88;59;339", "wc_strengths": "71;197;75;156", "wc_weaknesses": "81;845;120;345", "wc_questions": "4;138;103;356", "wc_limitations": "6;41;1;2", "wc_review": "210;1309;358;1198", "wc_reply_reviewers": "0;612;0;410", "wc_reply_authors": "0;80;0;567", "reply_reviewers": "0;1;0;2", "reply_authors": "1;2;1;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 133.5, 119.54183368177017 ], "wc_strengths_avg": [ 124.75, 53.760464097699156 ], "wc_weaknesses_avg": [ 347.75, 304.25760056241813 ], "wc_questions_avg": [ 150.25, 128.5542200785334 ], "wc_limitations_avg": [ 12.5, 16.560495161679196 ], "wc_review_avg": [ 768.75, 489.14280890144954 ], "wc_reply_reviewers_avg": [ 255.5, 265.29370516467213 ], "wc_reply_authors_avg": [ 161.75, 236.23968231438172 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12193074628935999124&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "cmu.edu;mpg.de;chicagobooth.edu;;cmu.edu", "author_num": 5, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Carnegie Mellon University;Max-Planck-Gesellschaft zur F\u00f6rderung der Wissenschaften e.V.;University of Chicago Booth School of Business", "aff_unique_dep": ";;Booth School of Business", "aff_unique_url": "https://www.cmu.edu;https://www.mpg.de;https://www.chicagobooth.edu", "aff_unique_abbr": "CMU;MPG;Booth", "aff_campus_unique_index": "1", "aff_campus_unique": ";Chicago", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;Germany" }, { "title": "RG-SAN: Rule-Guided Spatial Awareness Network for End-to-End 3D Referring Expression Segmentation", "status": "Oral", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93458", "id": "r5spnrY6H3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=r5spnrY6H3", "openreview": "https://openreview.net/forum?id=r5spnrY6H3", "poster": "/media/PosterPDFs/NeurIPS%202024/93458.png?t=1731587254.498911", "project": "", "author_site": "Changli Wu, qi chen, Jiayi Ji, Haowei Wang, Yiwei Ma, You Huang, Gen Luo, Hao Fei, Xiaoshuai Sun, Rongrong Ji", "tldr": "", "abstract": "3D Referring Expression Segmentation (3D-RES) aims to segment 3D objects by correlating referring expressions with point clouds. However, traditional approaches frequently encounter issues like over-segmentation or mis-segmentation, due to insufficient emphasis on spatial information of instances. In this paper, we introduce a Rule-Guided Spatial Awareness Network (RG-SAN) by utilizing solely the spatial information of the target instance for supervision. This approach enables the network to accurately depict the spatial relationships among all entities described in the text, thus enhancing the reasoning capabilities. The RG-SAN consists of the Text-driven Localization Module (TLM) and the Rule-guided Weak Supervision (RWS) strategy. The TLM initially locates all mentioned instances and iteratively refines their positional information. The RWS strategy, acknowledging that only target objects have supervised positional information, employs dependency tree rules to precisely guide the core instance\u2019s positioning. Extensive testing on the ScanRefer benchmark has shown that RG-SAN not only establishes new performance benchmarks, with an mIoU increase of 5.1 points, but also exhibits significant improvements in robustness when processing descriptions with spatial ambiguity. All codes are available at https://github.com/sosppxo/RG-SAN.", "keywords": "3D Referring Expression Segmentation;Spatial Awareness Modeling;Rule-guided Supervision", "primary_area": "human-AI_interaction", "supplementary_material": "", "author": "Changli Wu;Qi Chen;Jiayi Ji;Haowei Wang;Yiwei Ma;You Huang;Gen Luo;Hao Fei;Xiaoshuai Sun;Rongrong Ji", "authorids": "~Changli_Wu1;~Qi_Chen17;~Jiayi_Ji1;~Haowei_Wang1;~Yiwei_Ma1;~You_Huang1;~Gen_Luo1;~Hao_Fei1;~Xiaoshuai_Sun3;~Rongrong_Ji5", "gender": ";M;M;M;M;M;;M;M;M", "homepage": ";;https://scholar.google.com/citations?user=xp_rICcAAAAJ&hl=zh-CN;https://mr-neko.github.io;https://xmu-xiaoma666.github.io/;;;https://haofei.vip/;https://sites.google.com/view/xssun;http://mac.xmu.edu.cn/rrji-en.html", "dblp": ";;250/9459;94/10479-1;;214/9824;;81/3569-1;26/5787.html;86/5681", "google_scholar": ";https://scholar.google.com.hk/citations?user=OSbZrRYAAAAJ;xp_rICcAAAAJ;https://scholar.google.com.hk/citations?user=SkV_NNsAAAAJ;KIDY5pUAAAAJ;WYmFVEMAAAAJ;;YGDX46AAAAAJ;KPMK3B4AAAAJ;", "orcid": ";;0000-0002-9956-6308;0009-0006-0289-9672;0000-0002-8744-3423;;;0000-0003-3026-6347;0000-0003-3912-9306;", "linkedin": ";;;;;you-huang-5075251b6/;;;;", "or_profile": "~Changli_Wu1;~Qi_Chen17;~Jiayi_Ji1;~Haowei_Wang1;~Yiwei_Ma1;~You_Huang1;~Gen_Luo1;~Hao_Fei1;~Xiaoshuai_Sun3;~Rongrong_Ji5", "aff": ";Xiamen University;Xiamen University;Xiamen University;Xiamen University;Xiamen University;;National University of Singapore;Xiamen University;Xiamen University", "aff_domain": ";xmu.edu.cn;xmu.edu.cn;xmu.edu.cn;xmu.edu.cn;xmu.edu.cn;;nus.edu.sg;xmu.edu.cn;xmu.edu.cn", "position": ";MS student;Postdoc;MS student;PhD student;PhD student;;Postdoc;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nwu2024rgsan,\ntitle={{RG}-{SAN}: Rule-Guided Spatial Awareness Network for End-to-End 3D Referring Expression Segmentation},\nauthor={Changli Wu and Qi Chen and Jiayi Ji and Haowei Wang and Yiwei Ma and You Huang and Gen Luo and Hao Fei and Xiaoshuai Sun and Rongrong Ji},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=r5spnrY6H3}\n}", "github": "", "reviewers": "6dEW;p7dx;MnE2;8gkL", "pdf_size": 14578117, "rating": "3;7;8;8", "confidence": "4;5;5;5", "soundness": "2;4;3;4", "novelty": "2;4;4;4", "presentation": "2;4;4;4", "wc_summary": "34;118;71;80", "wc_strengths": "32;261;184;193", "wc_weaknesses": "121;224;170;89", "wc_questions": "136;27;35;12", "wc_limitations": "8;30;5;8", "wc_review": "331;660;465;382", "wc_reply_reviewers": "245;101;166;173", "wc_reply_authors": "1634;214;114;211", "reply_reviewers": "2;1;1;1", "reply_authors": "6;4;3;4", "rating_avg": [ 6.5, 2.0615528128088303 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.5, 0.8660254037844386 ], "presentation_avg": [ 3.5, 0.8660254037844386 ], "wc_summary_avg": [ 75.75, 29.8695078633713 ], "wc_strengths_avg": [ 167.5, 83.70334521391604 ], "wc_weaknesses_avg": [ 151.0, 51.07347648241698 ], "wc_questions_avg": [ 52.5, 48.91063279083598 ], "wc_limitations_avg": [ 12.75, 10.034316120194738 ], "wc_review_avg": [ 459.5, 125.24875248879727 ], "wc_reply_reviewers_avg": [ 171.25, 51.001838202166795 ], "wc_reply_authors_avg": [ 543.25, 631.0282778925205 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 4.25, 1.0897247358851685 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.9801960588196067, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15590555915366048334&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": ";xmu.edu.cn;xmu.edu.cn;xmu.edu.cn;xmu.edu.cn;xmu.edu.cn;;nus.edu.sg;xmu.edu.cn;xmu.edu.cn", "author_num": 10, "aff_unique_index": "0;0;0;0;0;1;0;0", "aff_unique_norm": "Xiamen University;National University of Singapore", "aff_unique_dep": ";", "aff_unique_url": "https://www.xmu.edu.cn;https://www.nus.edu.sg", "aff_unique_abbr": "XMU;NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;1;0;0", "aff_country_unique": "China;Singapore" }, { "title": "GSDF: 3DGS Meets SDF for Improved Neural Rendering and Reconstruction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93457", "id": "r6V7EjANUK", "proceeding": "", "pdf": "https://openreview.net/pdf?id=r6V7EjANUK", "openreview": "https://openreview.net/forum?id=r6V7EjANUK", "poster": "/media/PosterPDFs/NeurIPS%202024/93457.png?t=1731648638.0498135", "project": "", "author_site": "Mulin Yu, Tao Lu, Linning Xu, Lihan Jiang, Yuanbo Xiangli, Bo Dai", "tldr": "", "abstract": "Representing 3D scenes from multiview images remains a core challenge in computer vision and graphics, requiring both reliable rendering and reconstruction, which often conflicts due to the mismatched prioritization of image quality over precise underlying scene geometry. Although both neural implicit surfaces and explicit Gaussian primitives have advanced with neural rendering techniques, current methods impose strict constraints on density fields or primitive shapes, which enhances the affinity for geometric reconstruction at the sacrifice of rendering quality. To address this dilemma, we introduce GSDF, a dual-branch architecture combining 3D Gaussian Splatting (3DGS) and neural Signed Distance Fields (SDF). Our approach leverages mutual guidance and joint supervision during the training process to mutually enhance reconstruction and rendering. Specifically, our method guides the Gaussian primitives to locate near potential surfaces and accelerates the SDF convergence. This implicit mutual guidance ensures robustness and accuracy in both synthetic and real-world scenarios. Experimental results demonstrate that our method boosts the SDF optimization process to reconstruct more detailed geometry, while reducing floaters and blurry edge artifacts in rendering by aligning Gaussian primitives with the underlying geometry.", "keywords": "Neural Rendering; 3D Reconstruction;3D Gaussian Splatting; Signed Distance Field", "primary_area": "machine_vision", "supplementary_material": "/attachment/81b40dc443e1ab9b1c59a8c0db71c1f4e1f7a79c.zip", "author": "Mulin Yu;Tao Lu;Linning Xu;Lihan Jiang;Yuanbo Xiangli;Bo Dai", "authorids": "~Mulin_Yu2;~Tao_Lu4;~Linning_Xu2;~Lihan_Jiang2;~Yuanbo_Xiangli1;~Bo_Dai2", "gender": "M;M;F;M;;M", "homepage": "https://mulinyu.github.io/;http://mcg.nju.edu.cn;https://eveneveno.github.io/linning/;https://github.com/jianglh-WHU;https://kam1107.github.io/;http://daibo.info/", "dblp": "330/1392;03/5189-5;242/9358;358/4168;186/4450;64/2903-2", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;Ch28NiIAAAAJ;I9Lrbs4AAAAJ;JM2zk1AAAAAJ;S6tTC-oAAAAJ;https://scholar.google.com.hk/citations?user=KNWTvgEAAAAJ", "orcid": ";;;0009-0001-2899-273X;;0000-0003-0777-9232", "linkedin": ";;;;;", "or_profile": "~Mulin_Yu2;~Tao_Lu4;~Linning_Xu2;~Lihan_Jiang2;~Yuanbo_Xiangli1;~Bo_Dai2", "aff": "Shanghai AI Laboratory;Shanghai AI Laboratory;The Chinese University of Hong Kong;Shanghai Artificial Intelligence Laboratory;Cornell University;Shanghai AI Laboratory", "aff_domain": "pjlab.org.cn;pjlab.org.cn;cuhk.edu.hk;pjlab.org.cn;cornell.edu;pjlab.org.cn", "position": "Postdoc;Researcher;Ph.D. student;Intern;Postdoc;Scientist", "bibtex": "@inproceedings{\nyu2024gsdf,\ntitle={{GSDF}: 3{DGS} Meets {SDF} for Improved Neural Rendering and Reconstruction},\nauthor={Mulin Yu and Tao Lu and Linning Xu and Lihan Jiang and Yuanbo Xiangli and Bo Dai},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=r6V7EjANUK}\n}", "github": "", "reviewers": "yTkf;96yp;H3YA;LpsC", "pdf_size": 26465456, "rating": "5;5;7;7", "confidence": "5;4;5;4", "soundness": "3;2;3;3", "novelty": "2;3;3;3", "presentation": "3;3;4;3", "wc_summary": "79;67;401;78", "wc_strengths": "89;57;175;76", "wc_weaknesses": "52;312;573;208", "wc_questions": "118;5;271;48", "wc_limitations": "1;10;78;8", "wc_review": "339;451;1498;418", "wc_reply_reviewers": "20;14;132;17", "wc_reply_authors": "8;8;18;8", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 156.25, 141.38489134274568 ], "wc_strengths_avg": [ 99.25, 45.190568706313044 ], "wc_weaknesses_avg": [ 286.25, 189.66071680767212 ], "wc_questions_avg": [ 110.5, 101.06062536913177 ], "wc_limitations_avg": [ 24.25, 31.21197686786276 ], "wc_review_avg": [ 676.5, 476.0359755312617 ], "wc_reply_reviewers_avg": [ 45.75, 49.84162417096778 ], "wc_reply_authors_avg": [ 10.5, 4.330127018922194 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5641292000888124428&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 2, "email": "pjlab.org.cn;pjlab.org.cn;cuhk.edu.hk;pjlab.org.cn;cornell.edu;pjlab.org.cn", "author_num": 6, "aff_unique_index": "0;0;1;2;3;0", "aff_unique_norm": "Shanghai AI Laboratory;Chinese University of Hong Kong;Shanghai Artificial Intelligence Laboratory;Cornell University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.shanghai-ai-lab.com;https://www.cuhk.edu.hk;http://www.shailab.org/;https://www.cornell.edu", "aff_unique_abbr": "SAIL;CUHK;Shanghai AI Lab;Cornell", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;1;0", "aff_country_unique": "China;United States" }, { "title": "Neural Signed Distance Function Inference through Splatting 3D Gaussians Pulled on Zero-Level Set", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93456", "id": "r6tnDXIkNS", "proceeding": "", "pdf": "https://openreview.net/pdf?id=r6tnDXIkNS", "openreview": "https://openreview.net/forum?id=r6tnDXIkNS", "poster": "", "project": "", "author_site": "Wenyuan Zhang, Yu-Shen Liu, Zhizhong Han", "tldr": "", "abstract": "It is vital to infer a signed distance function (SDF) for multi-view based surface reconstruction. 3D Gaussian splatting (3DGS) provides a novel perspective for volume rendering, and shows advantages in rendering efficiency and quality. Although 3DGS provides a promising neural rendering option, it is still hard to infer SDFs for surface reconstruction with 3DGS due to the discreteness, the sparseness, and the off-surface drift of 3D Gaussians. To resolve these issues, we propose a method that seamlessly merge 3DGS with the learning of neural SDFs. Our key idea is to more effectively constrain the SDF inference with the multi-view consistency. To this end, we dynamically align 3D Gaussians on the zero-level set of the neural SDF, and then render the aligned 3D Gaussians through the differentiable rasterization. Meanwhile, we update the neural SDF by pulling neighboring space to the pulled 3D Gaussians, which progressively refine the signed distance field near the surface. With both differentiable pulling and splatting, we jointly optimize 3D Gaussians and the neural SDF with both RGB and geometry constraints, which recovers more accurate, smooth, and complete surfaces with more geometry details. Our numerical and visual comparisons show our superiority over the state-of-the-art results on the widely used benchmarks.", "keywords": "Gaussian Splatting;Signed distance function;3D reconstruction", "primary_area": "machine_vision", "supplementary_material": "/attachment/e297a77afb12be75ef8059819041eb65df612b44.zip", "author": "Wenyuan Zhang;Yu-Shen Liu;Zhizhong Han", "authorids": "~Wenyuan_Zhang1;~Yu-Shen_Liu1;~Zhizhong_Han2", "gender": "M;M;M", "homepage": "https://wen-yuan-zhang.github.io/;https://yushen-liu.github.io/;https://h312h.github.io/", "dblp": ";44/2229.html;166/5173", "google_scholar": "qzH0hNAAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0001-7305-1915;", "linkedin": ";;", "or_profile": "~Wenyuan_Zhang1;~Yu-Shen_Liu1;~Zhizhong_Han2", "aff": "Software Engineering;Tsinghua University;Wayne State University", "aff_domain": "mails.tsinghua.edu.cn;tsinghua.edu.cn;wayne.edu", "position": "PhD student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhang2024neural,\ntitle={Neural Signed Distance Function Inference through Splatting 3D Gaussians Pulled on Zero-Level Set},\nauthor={Wenyuan Zhang and Yu-Shen Liu and Zhizhong Han},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=r6tnDXIkNS}\n}", "github": "", "reviewers": "ZhFP;SW8k;5rKf;CCgx", "pdf_size": 6314416, "rating": "5;5;7;7", "confidence": "4;5;5;4", "soundness": "2;1;3;3", "novelty": "3;2;3;3", "presentation": "2;3;3;4", "wc_summary": "98;33;134;85", "wc_strengths": "77;35;109;21", "wc_weaknesses": "95;294;129;121", "wc_questions": "119;285;4;45", "wc_limitations": "10;29;49;4", "wc_review": "399;676;425;276", "wc_reply_reviewers": "723;327;21;514", "wc_reply_authors": "2574;1008;48;708", "reply_reviewers": "6;4;1;2", "reply_authors": "7;5;2;3", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 87.5, 36.22499137335991 ], "wc_strengths_avg": [ 60.5, 34.767082132384935 ], "wc_weaknesses_avg": [ 159.75, 78.52189185189057 ], "wc_questions_avg": [ 113.25, 107.38336696155508 ], "wc_limitations_avg": [ 23.0, 17.621010186706094 ], "wc_review_avg": [ 444.0, 145.28764572392245 ], "wc_reply_reviewers_avg": [ 396.25, 257.99164230649023 ], "wc_reply_authors_avg": [ 1084.5, 927.4355772774733 ], "reply_reviewers_avg": [ 3.25, 1.920286436967152 ], "reply_authors_avg": [ 4.25, 1.920286436967152 ], "replies_avg": [ 40, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1310347660524398722&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "mails.tsinghua.edu.cn;tsinghua.edu.cn;wayne.edu", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Software Engineering;Tsinghua University;Wayne State University", "aff_unique_dep": "Software Engineering;;", "aff_unique_url": ";https://www.tsinghua.edu.cn;https://wayne.edu", "aff_unique_abbr": ";THU;WSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1;2", "aff_country_unique": ";China;United States" }, { "title": "Multi-Scale VMamba: Hierarchy in Hierarchy Visual State Space Model", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93455", "id": "r70jUOpDCM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=r70jUOpDCM", "openreview": "https://openreview.net/forum?id=r70jUOpDCM", "poster": "/media/PosterPDFs/NeurIPS%202024/93455.png?t=1730698831.821147", "project": "", "author_site": "Yuheng Shi, Minjing Dong, Chang Xu", "tldr": "", "abstract": "Despite the significant achievements of Vision Transformers (ViTs) in various vision tasks, they are constrained by the quadratic complexity. Recently, State Space Models (SSMs) have garnered widespread attention due to their global receptive field and linear complexity with respect to the input length, demonstrating substantial potential across fields including natural language processing and computer vision. To improve the performance of SSMs in vision tasks, a multi-scan strategy is widely adopted, which leads to significant redundancy of SSMs. For a better trade-off between efficiency and performance, we analyze the underlying reasons behind the success of the multi-scan strategy, where long-range dependency plays an important role. Based on the analysis, we introduce Multi-Scale Vision Mamba (MSVMamba) to preserve the superiority of SSMs in vision tasks with limited parameters. It employs a multi-scale 2D scanning technique on both original and downsampled feature maps, which not only benefits long-range dependency learning but also reduces computational costs. Additionally, we integrate a Convolutional Feed-Forward Network (ConvFFN) to address the lack of channel mixing. Our experiments demonstrate that MSVMamba is highly competitive, with the MSVMamba-Tiny model achieving 83.0% top-1 accuracy on ImageNet, 46.9% box mAP, and 42.5% instance mAP with the Mask R-CNN framework, 1x training schedule on COCO, and 47.9% mIoU with single-scale testing on ADE20K. Code is available at https://github.com/YuHengsss/MSVMamba.", "keywords": "Image Classification;Generic vision model;State Space Model", "primary_area": "machine_vision", "supplementary_material": "", "author": "Yuheng Shi;Minjing Dong;Chang Xu", "authorids": "~Yuheng_Shi1;~Minjing_Dong1;~Chang_Xu4", "gender": "M;M;", "homepage": "https://github.com/YuHengsss;https://www.cs.cityu.edu.hk/~minjdong/;https://sydney.edu.au/engineering/about/our-people/academic-staff/c-xu.html", "dblp": "327/3178;246/2900.html;97/2966-2", "google_scholar": "sn2PeEkAAAAJ;https://scholar.google.com.au/citations?user=gJJRqlsAAAAJ;N4F_3eoAAAAJ", "orcid": ";0009-0003-1717-818X;0000-0002-4756-0609", "linkedin": ";;", "or_profile": "~Yuheng_Shi1;~Minjing_Dong1;~Charles_Xu1", "aff": "Tianjin University;City University of Hong Kong;University of Sydney", "aff_domain": "tju.edu.cn;cityu.edu.hk;sydney.eud.au", "position": "MS student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nshi2024multiscale,\ntitle={Multi-Scale {VM}amba: Hierarchy in Hierarchy Visual State Space Model},\nauthor={Yuheng Shi and Minjing Dong and Chang Xu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=r70jUOpDCM}\n}", "github": "", "reviewers": "QVsi;TQwz;kG7u;HsBE;XVcS", "pdf_size": 5820210, "rating": "2;2;5;7;7", "confidence": "5;4;4;5;4", "soundness": "2;2;2;3;3", "novelty": "1;1;2;3;3", "presentation": "2;2;2;3;3", "wc_summary": "74;48;44;76;114", "wc_strengths": "85;6;46;50;73", "wc_weaknesses": "383;110;45;79;94", "wc_questions": "67;5;5;5;24", "wc_limitations": "1;5;1;1;5", "wc_review": "610;174;141;211;310", "wc_reply_reviewers": "677;0;16;16;21", "wc_reply_authors": "1512;0;0;0;0", "reply_reviewers": "4;0;1;1;1", "reply_authors": "5;1;1;1;1", "rating_avg": [ 4.6, 2.244994432064365 ], "confidence_avg": [ 4.4, 0.48989794855663565 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "novelty_avg": [ 2.0, 0.8944271909999159 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 71.2, 25.06312031651287 ], "wc_strengths_avg": [ 52.0, 27.151427218472328 ], "wc_weaknesses_avg": [ 142.2, 122.30028618118604 ], "wc_questions_avg": [ 21.2, 24.053274205396654 ], "wc_limitations_avg": [ 2.6, 1.9595917942265426 ], "wc_review_avg": [ 289.2, 170.1145496422925 ], "wc_reply_reviewers_avg": [ 146.0, 265.5944276523888 ], "wc_reply_authors_avg": [ 302.4, 604.8 ], "reply_reviewers_avg": [ 1.4, 1.3564659966250538 ], "reply_authors_avg": [ 1.8, 1.6000000000000003 ], "replies_avg": [ 31, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.036369648372665424, "gs_citation": 49, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1265921298218314684&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 5, "email": "tju.edu.cn;cityu.edu.hk;sydney.eud.au", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Tianjin University;City University of Hong Kong;University of Sydney", "aff_unique_dep": ";;", "aff_unique_url": "http://www.tju.edu.cn;https://www.cityu.edu.hk;https://www.sydney.edu.au", "aff_unique_abbr": "TJU;CityU;USYD", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;1", "aff_country_unique": "China;Australia" }, { "id": "r7mj17BKzw", "title": "SuperEncoder: Towards Iteration-Free Approximate Quantum State Preparation", "track": "main", "status": "Reject", "tldr": "", "abstract": "Numerous quantum algorithms operate under the assumption that classical data has already been converted into quantum states, a process termed Quantum State Preparation (QSP). However, achieving precise QSP requires a circuit depth that scales exponentially with the number of qubits, making it a substantial obstacle in harnessing quantum advantage. Recent research suggests using a Parameterized Quantum Circuit (PQC) to approximate a target state, offering a more scalable solution with reduced circuit depth compared to precise QSP. Despite this, the need for iterative updates of circuit parameters results in a lengthy runtime, limiting its practical application. To overcome this challenge, we introduce SuperEncoder, a pre-trained classical neural network model designed to directly estimate the parameters of a PQC for any given quantum state. By eliminating the need for iterative parameter tuning, SuperEncoder represents a pioneering step towards iteration-free approximate QSP.", "keywords": "Quantum Computing", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "/attachment/8b01a93a8e9f9574a957127ae672a92a84789c58.zip", "author": "Yilun Zhao;Bingmeng Wang;Wenle Jiang;Xiwei Pan;Bing Li;Yinhe Han;ying wang", "authorids": "~Yilun_Zhao2;~Bingmeng_Wang1;~Wenle_Jiang1;~Xiwei_Pan1;~Bing_Li9;~Yinhe_Han1;~ying_wang18", "gender": ";;M;M;F;M;Not Specified", "homepage": "https://zhaoyilun.org/;https://github.com/Beeeam;;;;http://www.ict.cas.cn/sourcedb_2018_ict_cas/cn/jssrck/201610/t20161010_4674169.html;https://wangying-ict.github.io/", "dblp": "271/8391-2.html;;;;13/2692-17.html;32/2695.html;", "google_scholar": "V6luXGsAAAAJ;;;;;;", "orcid": "0000-0002-6812-5120;;;0009-0003-1485-3418;0000-0003-0732-2267;;", "linkedin": ";;wenle-jiang-418494300/;;;;", "or_profile": "~Yilun_Zhao2;~Bingmeng_Wang1;~Wenle_Jiang1;~Xiwei_Pan1;~Bing_Li9;~Yinhe_Han1;~ying_wang18", "aff": "Institute of Computing Technology, Chinese Academy of Sciences;Capital Normal University;Beijing University of Posts and Telecommunications;University of Electronic Science and Technology of China;Capital normal university;;, Chinese Academy of Sciences", "aff_domain": "ict.ac.cn;cnu.edu.cn;bupt.edu.cn;uestc.edu.cn;cnu.edu.cn;;ict.ac.cn", "position": "PhD student;MS student;Undergrad student;Undergrad student;Associate Professor;;Full Professor", "bibtex": "@misc{\nanonymous2024superencoder,\ntitle={SuperEncoder: Towards Iteration-Free Approximate Quantum State Preparation},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=r7mj17BKzw}\n}", "github": "", "project": "", "reviewers": "4Ck1;dsnA;r6Nc", "site": "https://openreview.net/forum?id=r7mj17BKzw", "pdf_size": 2224000, "rating": "2;4;6", "confidence": "5;5;4", "soundness": "2;3;3", "novelty": "1;3;3", "presentation": "2;3;3", "wc_summary": "42;126;148", "wc_strengths": "10;2;146", "wc_weaknesses": "140;136;128", "wc_questions": "2;243;69", "wc_limitations": "6;1;4", "wc_review": "200;508;495", "wc_reply_reviewers": "398;233;123", "wc_reply_authors": "1318;403;952", "reply_reviewers": "2;1;1", "reply_authors": "5;2;3", "rating_avg": [ 4.0, 1.632993161855452 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.9428090415820634 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 105.33333333333333, 45.67518168789504 ], "wc_strengths_avg": [ 52.666666666666664, 66.07739569794063 ], "wc_weaknesses_avg": [ 134.66666666666666, 4.988876515698588 ], "wc_questions_avg": [ 104.66666666666667, 101.56880535982602 ], "wc_limitations_avg": [ 3.6666666666666665, 2.0548046676563256 ], "wc_review_avg": [ 401.0, 142.22751726254194 ], "wc_reply_reviewers_avg": [ 251.33333333333334, 113.01425672109791 ], "wc_reply_authors_avg": [ 891.0, 376.02925418110755 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 3.3333333333333335, 1.247219128924647 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ASDEHZWcWugJ:scholar.google.com/&scioq=SuperEncoder:+Towards+Iteration-Free+Approximate+Quantum+State+Preparation&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;2;3;1;0", "aff_unique_norm": "Chinese Academy of Sciences;Capital Normal University;Beijing University of Posts and Telecommunications;University of Electronic Science and Technology of China", "aff_unique_dep": "Institute of Computing Technology;;;", "aff_unique_url": "http://www.ict.ac.cn;http://www.cnu.edu.cn;http://www.bupt.edu.cn/;https://www.uestc.edu.cn", "aff_unique_abbr": "CAS;CNU;BUPT;UESTC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Beijing", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Dataset Decomposition: Faster LLM Training with Variable Sequence Length Curriculum", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93454", "id": "r8M9SfYMDi", "proceeding": "", "pdf": "https://openreview.net/pdf?id=r8M9SfYMDi", "openreview": "https://openreview.net/forum?id=r8M9SfYMDi", "poster": "/media/PosterPDFs/NeurIPS%202024/93454.png?t=1731697710.6528425", "project": "", "author_site": "Hadi Pouransari, Chun-Liang Li, Jen-Hao Chang, Pavan Kumar Anasosalu Vasu, Cem Koc, Vaishaal Shankar, Oncel Tuzel", "tldr": "", "abstract": "Large language models (LLMs) are commonly trained on datasets consisting of fixed-length token sequences. These datasets are created by randomly concatenating documents of various lengths and then chunking them into sequences of a predetermined target length (concat-and-chunk). Recent attention implementations mask cross-document attention, reducing the effective length of a chunk of tokens. Additionally, training on long sequences becomes computationally prohibitive due to the quadratic cost of attention. In this study, we introduce dataset decomposition, a novel variable sequence length training technique, to tackle these challenges. We decompose a dataset into a union of buckets, each containing sequences of the same size extracted from a unique document. During training, we use variable sequence length and batch-size, sampling simultaneously from all buckets with a curriculum. In contrast to the concat-and-chunk baseline, which incurs a fixed attention cost at every step of training, our proposed method incurs a computational cost proportional to the actual document lengths at each step, resulting in significant savings in training time. We train an 8k context-length 1B model at the same cost as a 2k context-length model trained with the baseline approach. Experiments on a web-scale corpus demonstrate that our approach significantly enhances performance on standard language evaluations and long-context benchmarks, reaching target accuracy with up to 6x faster training compared to the baseline. Our method not only enables efficient pretraining on long sequences but also scales effectively with dataset size. Lastly, we shed light on a critical yet less studied aspect of training large language models: the distribution and curriculum of sequence lengths, which results in a non-negligible difference in performance.", "keywords": "Large Language Models;Efficient Training;Learning Efficiency;Long Context", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Hadi Pouransari;Chun-Liang Li;Jen-Hao Rick Chang;Pavan Kumar Anasosalu Vasu;Cem Koc;Vaishaal Shankar;Oncel Tuzel", "authorids": "~Hadi_Pouransari1;~Chun-Liang_Li1;~Jen-Hao_Rick_Chang1;~Pavan_Kumar_Anasosalu_Vasu1;~Cem_Koc1;~Vaishaal_Shankar1;~Oncel_Tuzel2", "gender": "M;M;M;M;M;M;M", "homepage": ";http://chunliangli.github.io;https://rick-chang.github.io;;https://cemkoc.me;http://www.onceltuzel.net;http://vaishaal.com", "dblp": "162/5187;;169/4938;294/8896;;73/2943.html;159/3628", "google_scholar": "besz69AAAAAJ;https://scholar.google.com.tw/citations?user=vqHIt_sAAAAJ;F5Z9kN4AAAAJ;;Z7EXCYkAAAAJ;Fe7NTe0AAAAJ;", "orcid": ";;;;0000-0002-6888-6958;;", "linkedin": ";;;pavankumar-vasu-anasosalu-b0767329;;;", "or_profile": "~Hadi_Pouransari1;~Chun-Liang_Li1;~Jen-Hao_Rick_Chang1;~Pavan_Kumar_Anasosalu_Vasu1;~Cem_Koc1;~Oncel_Tuzel2;~vaishaal_naanny_shankar1", "aff": "Apple;Apple;Apple;Apple;Apple;Apple;Apple", "aff_domain": "apple.com;apple.com;apple.com;apple.com;apple.com;apple.com;apple.com", "position": "Principal Researcher;Researcher;Researcher;Researcher;Researcher;Principal Researcher;Researcher", "bibtex": "@inproceedings{\npouransari2024dataset,\ntitle={Dataset Decomposition: Faster {LLM} Training with Variable Sequence Length Curriculum},\nauthor={Hadi Pouransari and Chun-Liang Li and Jen-Hao Rick Chang and Pavan Kumar Anasosalu Vasu and Cem Koc and Vaishaal Shankar and Oncel Tuzel},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=r8M9SfYMDi}\n}", "github": "", "reviewers": "uip1;z2w6;bcif;H2jF", "pdf_size": 2225960, "rating": "5;5;5;6", "confidence": "4;3;4;3", "soundness": "3;2;2;3", "novelty": "3;3;3;3", "presentation": "3;3;3;2", "wc_summary": "29;71;133;85", "wc_strengths": "16;31;54;42", "wc_weaknesses": "6;240;109;124", "wc_questions": "31;5;55;148", "wc_limitations": "1;5;1;1", "wc_review": "83;352;352;400", "wc_reply_reviewers": "13;22;47;640", "wc_reply_authors": "12;12;21;176", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 79.5, 37.13152299596665 ], "wc_strengths_avg": [ 35.75, 14.00669482783144 ], "wc_weaknesses_avg": [ 119.75, 82.96497755077138 ], "wc_questions_avg": [ 59.75, 53.93224916504039 ], "wc_limitations_avg": [ 2.0, 1.7320508075688772 ], "wc_review_avg": [ 296.75, 124.95474180678379 ], "wc_reply_reviewers_avg": [ 180.5, 265.58473224189675 ], "wc_reply_authors_avg": [ 55.25, 69.81180057841225 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10077151726220830837&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "apple.com;apple.com;apple.com;apple.com;apple.com;apple.com;apple.com", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "Apple", "aff_unique_dep": "Apple Inc.", "aff_unique_url": "https://www.apple.com", "aff_unique_abbr": "Apple", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "r8PnfcWQol", "title": "${EFO}_k$-CQA: Towards Knowledge Graph Complex Query Answering beyond Set Operation", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "To answer complex queries on knowledge graphs, logical reasoning over incomplete knowledge needs learning-based methods because they are capable of generalizing over unobserved knowledge. Therefore, an appropriate dataset is fundamental to both obtaining and evaluating such methods under this paradigm. In this paper, we propose a comprehensive framework for data generation, model training, and method evaluation that covers the combinatorial space of Existential First-order Queries with multiple variables ($\\text{EFO}_k$). The combinatorial query space in our framework significantly extends those defined by set operations in the existing literature. Additionally, we construct a dataset, $\\text{EFO}_k$-CQA, with 741 query types for empirical evaluation, and our benchmark results provide new insights into how query hardness affects the results. Furthermore, we demonstrate that the existing dataset construction process is systematically biased and hinders the appropriate development of query-answering methods, highlighting the importance of our work. Our code and data are provided in~\\url{https://anonymous.4open.science/r/EFOK-CQA/README.md}.", "keywords": "knowledge graph;complex query answering", "primary_area": "", "supplementary_material": "", "author": "Hang Yin;Zihao Wang;Weizhi Fei;Yangqiu Song", "authorids": "~Hang_Yin3;~Zihao_Wang11;~Weizhi_Fei1;~Yangqiu_Song1", "gender": ";;;M", "homepage": ";https://zihao-wang.github.io;;https://www.cse.ust.hk/~yqsong/", "dblp": ";148/9655-1;;86/2159", "google_scholar": ";T28rR00AAAAJ;;MdQZ-q8AAAAJ", "orcid": ";0000-0002-3919-0396;;0000-0002-7818-6090", "linkedin": ";zihao-wang-6a0a3286/;;yqsong/", "or_profile": "~Hang_Yin3;~Zihao_Wang11;~Weizhi_Fei1;~Yangqiu_Song1", "aff": ";Hong Kong University of Science and Technology;;Hong Kong University of Science and Technology", "aff_domain": ";cse.ust.hk;;ust.hk", "position": ";PhD student;;Associate Professor", "bibtex": "@misc{\nanonymous2024efokcqa,\ntitle={\\$\\{{EFO}\\}\\_k\\$-{CQA}: Towards Knowledge Graph Complex Query Answering beyond Set Operation},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=r8PnfcWQol}\n}", "github": "", "project": "", "reviewers": "fgZH;51ZX;hXfY;wPcJ;jUbm", "site": "https://openreview.net/forum?id=r8PnfcWQol", "pdf_size": 1258973, "rating": "5;5;5;5;6", "confidence": "2;2;2;5;1", "wc_summary_and_contributions": "97;23;47;16;82", "wc_strengths": "6;63;7;37;96", "wc_improvement": "35;2;4;26;109", "wc_limitations": "7;32;4;50;1", "wc_correctness": "18;1;1;1;40", "wc_clarity": "163;1;1;8;176", "wc_relation_to_prior_work": "21;1;1;1;22", "wc_documentation": "9;1;1;27;30", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "357;125;67;167;557", "wc_reply_reviewers": "0;35;15;20;12", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 5.2, 0.39999999999999997 ], "confidence_avg": [ 2.4, 1.3564659966250536 ], "wc_summary_and_contributions_avg": [ 53.0, 31.881028841616764 ], "wc_strengths_avg": [ 41.8, 34.35927822291964 ], "wc_improvement_avg": [ 35.2, 39.00461511154802 ], "wc_limitations_avg": [ 18.8, 19.0934543757802 ], "wc_correctness_avg": [ 12.2, 15.380507143784303 ], "wc_clarity_avg": [ 69.8, 81.54851316854281 ], "wc_relation_to_prior_work_avg": [ 9.2, 10.047885349664377 ], "wc_documentation_avg": [ 13.6, 12.547509713086498 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 254.6, 179.77497044917013 ], "wc_reply_reviewers_avg": [ 16.4, 11.394735626595294 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.51604684654214, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5464308228007727041&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Hong Kong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.ust.hk", "aff_unique_abbr": "HKUST", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "DOPPLER: Differentially Private Optimizers with Low-pass Filter for Privacy Noise Reduction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93453", "id": "r8YntmAd0g", "proceeding": "", "pdf": "https://openreview.net/pdf?id=r8YntmAd0g", "openreview": "https://openreview.net/forum?id=r8YntmAd0g", "poster": "/media/PosterPDFs/NeurIPS%202024/93453.png?t=1732748845.917988", "project": "", "author_site": "Xinwei Zhang, Zhiqi Bu, Mingyi Hong, Meisam Razaviyayn", "tldr": "", "abstract": "Privacy is a growing concern in modern deep-learning systems and applications. Differentially private (DP) training prevents the leakage of sensitive information in the collected training data from the trained machine learning models. DP optimizers, including DP stochastic gradient descent (DPSGD) and its variants, privatize the training procedure by gradient clipping and *DP noise* injection. However, in practice, DP models trained using DPSGD and its variants often suffer from significant model performance degradation. Such degradation prevents the application of DP optimization in many key tasks, such as foundation model pretraining. In this paper, we provide a novel *signal processing perspective* to the design and analysis of DP optimizers. We show that a ''frequency domain'' operation called *low-pass filtering* can be used to effectively reduce the impact of DP noise. More specifically, by defining the ''frequency domain'' for both the gradient and differential privacy (DP) noise, we have developed a new component, called DOPPLER. This component is designed for DP algorithms and works by effectively amplifying the gradient while suppressing DP noise within this frequency domain. As a result, it maintains privacy guarantees and enhances the quality of the DP-protected model. Our experiments show that the proposed DP optimizers with a low-pass filter outperform their counterparts without the filter on various models and datasets. Both theoretical and practical evidence suggest that the DOPPLER is effective in closing the gap between DP and non-DP training.", "keywords": "differential privacy;optimization;low-pass filter;signal processing", "primary_area": "privacy", "supplementary_material": "/attachment/5f180602df6994b271c40afca495805423da0bad.zip", "author": "Xinwei Zhang;Zhiqi Bu;Mingyi Hong;Meisam Razaviyayn", "authorids": "~Xinwei_Zhang1;~Zhiqi_Bu1;~Mingyi_Hong1;~Meisam_Razaviyayn1", "gender": "M;M;M;M", "homepage": "https://564612540.github.io/;https://sites.google.com/view/zhiqi-bu;http://people.ece.umn.edu/~mhong/mingyi.html;https://sites.usc.edu/razaviyayn/", "dblp": "55/9870-1.html;245/2573;57/8053;43/8577", "google_scholar": "uq46meMAAAAJ;MEvTLxIAAAAJ;qRnP-p0AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0001-7967-7150;;;", "linkedin": ";;;", "or_profile": "~Xinwei_Zhang1;~Zhiqi_Bu1;~Mingyi_Hong1;~Meisam_Razaviyayn1", "aff": "University of Southern California;Amazon;University of Minnesota, Minneapolis;Google", "aff_domain": "usc.edu;amazon.com;umn.edu;google.com", "position": "Postdoc;Researcher;Associate Professor;Researcher", "bibtex": "@inproceedings{\nzhang2024doppler,\ntitle={{DOPPLER}: Differentially Private Optimizers with Low-pass Filter for Privacy Noise Reduction},\nauthor={Xinwei Zhang and Zhiqi Bu and Mingyi Hong and Meisam Razaviyayn},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=r8YntmAd0g}\n}", "github": "", "reviewers": "ahhw;kR6J;vvxz;WMCW", "pdf_size": 2252105, "rating": "5;5;6;7", "confidence": "4;3;4;3", "soundness": "2;3;3;4", "novelty": "3;2;3;4", "presentation": "4;2;3;3", "wc_summary": "51;125;101;131", "wc_strengths": "73;43;115;63", "wc_weaknesses": "6;55;532;185", "wc_questions": "371;157;148;45", "wc_limitations": "5;46;13;11", "wc_review": "506;426;909;435", "wc_reply_reviewers": "69;0;160;120", "wc_reply_authors": "47;46;560;202", "reply_reviewers": "1;0;2;1", "reply_authors": "2;2;3;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 102.0, 31.51190251317746 ], "wc_strengths_avg": [ 73.5, 26.28212320190285 ], "wc_weaknesses_avg": [ 194.5, 205.54135836857748 ], "wc_questions_avg": [ 180.25, 118.5946352075 ], "wc_limitations_avg": [ 18.75, 16.005858302509115 ], "wc_review_avg": [ 569.0, 198.72971594605573 ], "wc_reply_reviewers_avg": [ 87.25, 59.81377349741446 ], "wc_reply_authors_avg": [ 213.75, 209.7455303457025 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1414897218077089313&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "usc.edu;amazon.com;umn.edu;google.com", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "University of Southern California;Amazon;University of Minnesota;Google", "aff_unique_dep": ";Amazon.com, Inc.;;Google", "aff_unique_url": "https://www.usc.edu;https://www.amazon.com;https://www.minnesota.edu;https://www.google.com", "aff_unique_abbr": "USC;Amazon;UMN;Google", "aff_campus_unique_index": "0;2;3", "aff_campus_unique": "Los Angeles;;Minneapolis;Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "rC0OM4Jm4v", "title": "GeNIe: Generative Hard Negative Images Through Diffusion", "track": "main", "status": "Reject", "tldr": "", "abstract": "Data augmentation is crucial in training deep models, preventing them from overfitting to limited data. Recent advances in generative AI, e.g., diffusion models, have enabled more sophisticated augmentation techniques that produce data resembling natural images. We introduce GeNIe a novel augmentation method which leverages a latent diffusion model conditioned on a text prompt to combine two contrasting data points (an image from the source category and a text prompt from the target category) to generate challenging augmentations. To achieve this, we adjust the noise level (equivalently, number of diffusion iterations) to ensure the generated image retains low-level and background features from the source image while representing the target category, resulting in a hard negative sample for the source category. We further automate and enhance GeNIe by adaptively adjusting the noise level selection on a per image basis (coined as GeNIe-Ada), leading to further performance improvements. Our extensive experiments, in both few-shot and long-tail distribution settings, demonstrate the effectiveness of our novel augmentation method and its superior performance over the prior art.", "keywords": "Diffusion Models;Data Augmentation;Few-shot;Long-Tail", "primary_area": "machine_vision", "supplementary_material": "/attachment/1c3662f052951c02a408040bb341b84d181c5667.zip", "author": "Soroush Abbasi Koohpayegani;Anuj Singh;Navaneet K L;Hamed Pirsiavash;Hadi Jamali-Rad", "authorids": "~Soroush_Abbasi_Koohpayegani1;~Anuj_Singh1;~Navaneet_K_L1;~Hamed_Pirsiavash1;~Hadi_Jamali-Rad1", "gender": "M;M;M;M;M", "homepage": "http://soroush-abbasi.github.io;;https://web.cs.ucdavis.edu/~hpirsiav/;https://sites.google.com/view/jamalirad/home;", "dblp": "277/5486;226/4749;07/6340;63/8297;", "google_scholar": "JS10DM0AAAAJ;https://scholar.google.co.in/citations?hl=en;https://scholar.google.com.tw/citations?user=c9XXy4MAAAAJ;l4hw34oAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;;", "linkedin": ";;hpirsiav/;hjamalirad/;", "or_profile": "~Soroush_Abbasi_Koohpayegani1;~Navaneet_K_L1;~Hamed_Pirsiavash1;~Hadi_Jamali-Rad1;~Anuj_Rajeeva_Singh1", "aff": "University of California, Davis;University of California, Davis;University of California, Davis;Shell;Delft University of Technology", "aff_domain": "ucdavis.edu;ucdavis.edu;ucdavis.edu;shell.com;tudelft.nl", "position": "PhD student;PhD student;Associate Professor;Principal Researcher;Researcher", "bibtex": "@misc{\nanonymous2024genie,\ntitle={Ge{NI}e: Generative Hard Negative Images Through Diffusion},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=rC0OM4Jm4v}\n}", "github": "", "project": "", "reviewers": "hT8y;3KAK;Q6Ew;qsxy", "site": "https://openreview.net/forum?id=rC0OM4Jm4v", "pdf_size": 14416196, "rating": "6;6;6;7", "confidence": "4;4;3;3", "soundness": "4;2;3;4", "novelty": "2;3;3;3", "presentation": "4;3;3;4", "wc_summary": "77;59;72;50", "wc_strengths": "200;38;30;24", "wc_weaknesses": "250;193;64;65", "wc_questions": "124;6;124;47", "wc_limitations": "4;6;1;14", "wc_review": "655;302;291;200", "wc_reply_reviewers": "17;18;0;0", "wc_reply_authors": "19;24;96;98", "reply_reviewers": "1;1;0;0", "reply_authors": "2;2;2;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 64.5, 10.641898326896381 ], "wc_strengths_avg": [ 73.0, 73.49149610669251 ], "wc_weaknesses_avg": [ 143.0, 81.04628307331558 ], "wc_questions_avg": [ 75.25, 50.85948780709456 ], "wc_limitations_avg": [ 6.25, 4.815340071064556 ], "wc_review_avg": [ 362.0, 173.73399206833417 ], "wc_reply_reviewers_avg": [ 8.75, 8.757139944068497 ], "wc_reply_authors_avg": [ 59.25, 37.79798275040614 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2635400806457986346&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0;0;1;2", "aff_unique_norm": "University of California, Davis;Shell;Delft University of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ucdavis.edu;https://www.shell.com;https://www.tudelft.nl", "aff_unique_abbr": "UC Davis;Shell;TU Delft", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Davis;", "aff_country_unique_index": "0;0;0;1;2", "aff_country_unique": "United States;United Kingdom;Netherlands" }, { "title": "Improving Deep Learning Optimization through Constrained Parameter Regularization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93452", "id": "rCXTkIhkbF", "proceeding": "", "pdf": "https://openreview.net/pdf?id=rCXTkIhkbF", "openreview": "https://openreview.net/forum?id=rCXTkIhkbF", "poster": "/media/PosterPDFs/NeurIPS%202024/93452.png?t=1731514729.2069378", "project": "", "author_site": "J\u00f6rg Franke, Michael Hefenbrock, Gregor Koehler, Frank Hutter", "tldr": "", "abstract": "Regularization is a critical component in deep learning. The most commonly used approach, weight decay, applies a constant penalty coefficient uniformly across all parameters. This may be overly restrictive for some parameters, while insufficient for others. To address this, we present Constrained Parameter Regularization (CPR) as an alternative to traditional weight decay. Unlike the uniform application of a single penalty, CPR enforces an upper bound on a statistical measure, such as the L$_2$-norm, of individual parameter matrices. Consequently, learning becomes a constraint optimization problem, which we tackle using an adaptation of the augmented Lagrangian method. CPR introduces only a minor runtime overhead and only requires setting an upper bound. We propose simple yet efficient mechanisms for initializing this bound, making CPR rely on no hyperparameter or one, akin to weight decay. Our empirical studies on computer vision and language modeling tasks demonstrate CPR's effectiveness. The results show that CPR can outperform traditional weight decay and increase performance in pre-training and fine-tuning.", "keywords": "Deep Learning;Regularization;Augmented Lagrangian", "primary_area": "optimization_for_deep_networks", "supplementary_material": "/attachment/75db04187cacdcbf868816c6448356c494636797.zip", "author": "J\u00f6rg K.H. Franke;Michael Hefenbrock;Gregor Koehler;Frank Hutter", "authorids": "~J\u00f6rg_K.H._Franke1;~Michael_Hefenbrock1;~Gregor_Koehler1;~Frank_Hutter1", "gender": ";;M;M", "homepage": ";;;http://ml.informatik.uni-freiburg.de/~hutter/", "dblp": ";;251/8923;89/5383", "google_scholar": ";;b8U4UTAAAAAJ;https://scholar.google.de/citations?user=YUrxwrkAAAAJ", "orcid": ";;;0000-0002-2037-3694", "linkedin": ";;;frank-hutter-9190b24b/", "or_profile": "~J\u00f6rg_K.H._Franke1;~Michael_Hefenbrock1;~Gregor_Koehler1;~Frank_Hutter1", "aff": ";;German Cancer Research Center (DKFZ);Albert-Ludwigs-Universit\u00e4t Freiburg", "aff_domain": ";;dkfz.de;uni-freiburg.de", "position": ";;PhD student;Full Professor", "bibtex": "@inproceedings{\nfranke2024improving,\ntitle={Improving Deep Learning Optimization through Constrained Parameter Regularization},\nauthor={J{\\\"o}rg K.H. Franke and Michael Hefenbrock and Gregor Koehler and Frank Hutter},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=rCXTkIhkbF}\n}", "github": "", "reviewers": "TAzm;gmoY;4Jjm;dfuS;M5LW", "pdf_size": 1391381, "rating": "4;5;6;7;7", "confidence": "3;4;2;3;2", "soundness": "3;3;3;3;3", "novelty": "2;3;2;3;3", "presentation": "3;4;3;3;3", "wc_summary": "54;101;73;50;50", "wc_strengths": "87;51;61;57;46", "wc_weaknesses": "87;114;33;217;43", "wc_questions": "7;210;64;10;2", "wc_limitations": "7;7;1;4;1", "wc_review": "242;483;232;338;142", "wc_reply_reviewers": "47;117;18;27;15", "wc_reply_authors": "0;156;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;2;1;1;1", "rating_avg": [ 5.8, 1.16619037896906 ], "confidence_avg": [ 2.8, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 65.6, 19.64281038955475 ], "wc_strengths_avg": [ 60.4, 14.249210504445502 ], "wc_weaknesses_avg": [ 98.8, 66.00727232661565 ], "wc_questions_avg": [ 58.6, 78.96733501898112 ], "wc_limitations_avg": [ 4.0, 2.6832815729997477 ], "wc_review_avg": [ 287.4, 115.83712703619682 ], "wc_reply_reviewers_avg": [ 44.8, 37.791004220581385 ], "wc_reply_authors_avg": [ 31.2, 62.39999999999999 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5041841733655162, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Eu356Nslin4J:scholar.google.com/&scioq=Improving+Deep+Learning+Optimization+through+Constrained+Parameter+Regularization&hl=en&as_sdt=0,5", "gs_version_total": 6, "email": ";;dkfz.de;uni-freiburg.de", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "German Cancer Research Center;Albert-Ludwigs-Universit\u00e4t Freiburg", "aff_unique_dep": ";", "aff_unique_url": "https://www.dkfz.de;https://www.uni-freiburg.de", "aff_unique_abbr": "DKFZ;Albert-Ludwigs-Universit\u00e4t", "aff_campus_unique_index": "1", "aff_campus_unique": ";Freiburg", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "title": "Neural Persistence Dynamics", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93451", "id": "rCnZrFikX6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=rCnZrFikX6", "openreview": "https://openreview.net/forum?id=rCnZrFikX6", "poster": "/media/PosterPDFs/NeurIPS%202024/93451.png?t=1733405740.198434", "project": "", "author_site": "Sebastian Zeng, Florian Graf, Martin Uray, Stefan Huber, Roland Kwitt", "tldr": "", "abstract": "We consider the problem of learning the dynamics in the topology of time-evolving point clouds, the prevalent spatiotemporal model for systems exhibiting collective behavior, such as swarms of insects and birds or particles in physics. In such systems, patterns emerge from (local) interactions among self-propelled entities. While several well-understood governing equations for motion and interaction exist, they are notoriously difficult to fit to data, as most prior work requires knowledge about individual motion trajectories, i.e., a requirement that is challenging to satisfy with an increasing number of entities. To evade such confounding factors, we investigate collective behavior from a _topological perspective_, but instead of summarizing entire observation sequences (as done previously), we propose learning a latent dynamical model from topological features _per time point_. The latter is then used to formulate a downstream regression task to predict the parametrization of some a priori specified governing equation. We implement this idea based on a latent ODE learned from vectorized (static) persistence diagrams and show that a combination of recent stability results for persistent homology justifies this modeling choice. Various (ablation) experiments not only demonstrate the relevance of each model component but provide compelling empirical evidence that our proposed model -- _Neural Persistence Dynamics_ -- substantially outperforms the state-of-the-art across a diverse set of parameter regression tasks.", "keywords": "Persistent Homology;Multi-Time Attention;Latent ODE;Collective Behavior;Physical Sciences", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "/attachment/f828bf4be1cf823089b8da5879060b076f6f7bd6.zip", "author": "Sebastian Zeng;Florian Graf;Martin Uray;Stefan Huber;Roland Kwitt", "authorids": "~Sebastian_Zeng1;~Florian_Graf2;~Martin_Uray1;~Stefan_Huber2;~Roland_Kwitt1", "gender": "M;;;M;M", "homepage": "https://uni-salzburg.elsevierpure.com/de/persons/sebastian-zeng;;;https://www.sthu.org;http://rkwitt.org", "dblp": "297/5768;49/6607;;75/8074-1;60/4140", "google_scholar": "JICJ16IAAAAJ;https://scholar.google.com/citations?hl=de;;https://scholar.google.at/citations?hl=en;https://scholar.google.at/citations?user=sfGFi6UAAAAJ", "orcid": ";0000-0003-3621-0897;;0000-0002-8871-5814;", "linkedin": ";;;shuber-austria/;", "or_profile": "~Sebastian_Zeng1;~Florian_Graf2;~Martin_Uray1;~Stefan_Huber2;~Roland_Kwitt1", "aff": "University of Salzburg;Universit\u00e4t Salzburg;;Salzburg University of Applied Sciences;University of Salzburg", "aff_domain": "sbg.ac.at;sbg.ac.at;;fh-salzburg.ac.at;sbg.ac.at", "position": "PhD student;Postdoc;;Full Professor;Full Professor", "bibtex": "@inproceedings{\nzeng2024neural,\ntitle={Neural Persistence Dynamics},\nauthor={Sebastian Zeng and Florian Graf and Martin Uray and Stefan Huber and Roland Kwitt},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=rCnZrFikX6}\n}", "github": "", "reviewers": "HnKN;dbj5;83FG;bRy6", "pdf_size": 2362955, "rating": "5;5;6;6", "confidence": "3;4;3;3", "soundness": "2;3;2;3", "novelty": "2;3;3;2", "presentation": "2;2;3;3", "wc_summary": "77;169;150;175", "wc_strengths": "59;50;292;82", "wc_weaknesses": "188;204;260;140", "wc_questions": "123;307;115;249", "wc_limitations": "4;1;25;9", "wc_review": "451;731;842;655", "wc_reply_reviewers": "0;64;96;49", "wc_reply_authors": "27;39;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 142.75, 39.06644980030819 ], "wc_strengths_avg": [ 120.75, 99.55745828414865 ], "wc_weaknesses_avg": [ 198.0, 42.8485705712571 ], "wc_questions_avg": [ 198.5, 82.15077601581132 ], "wc_limitations_avg": [ 9.75, 9.256754290786809 ], "wc_review_avg": [ 669.75, 142.73292367215072 ], "wc_reply_reviewers_avg": [ 52.25, 34.61484508126535 ], "wc_reply_authors_avg": [ 16.5, 17.03672503740082 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:W4f9oBGJlgsJ:scholar.google.com/&scioq=Neural+Persistence+Dynamics&hl=en&as_sdt=0,31", "gs_version_total": 5, "email": "sbg.ac.at;sbg.ac.at;;fh-salzburg.ac.at;sbg.ac.at", "author_num": 5, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Salzburg;Salzburg University of Applied Sciences", "aff_unique_dep": ";", "aff_unique_url": "https://www.uni-salzburg.at;https://www.fh-salzburg.ac.at", "aff_unique_abbr": "USAL;FH Salzburg", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Austria" }, { "title": "KG-FIT: Knowledge Graph Fine-Tuning Upon Open-World Knowledge", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93450", "id": "rDoPMODpki", "proceeding": "", "pdf": "https://openreview.net/pdf?id=rDoPMODpki", "openreview": "https://openreview.net/forum?id=rDoPMODpki", "poster": "/media/PosterPDFs/NeurIPS%202024/93450.png?t=1733855857.6814418", "project": "", "author_site": "Pengcheng Jiang, Lang Cao, Cao (Danica) Xiao, Parminder Bhatia, Jimeng Sun, Jiawei Han", "tldr": "", "abstract": "Knowledge Graph Embedding (KGE) techniques are crucial in learning compact representations of entities and relations within a knowledge graph, facilitating efficient reasoning and knowledge discovery. While existing methods typically focus either on training KGE models solely based on graph structure or fine-tuning pre-trained language models with classification data in KG, KG-FIT leverages LLM-guided refinement to construct a semantically coherent hierarchical structure of entity clusters. By incorporating this hierarchical knowledge along with textual information during the fine-tuning process, KG-FIT effectively captures both global semantics from the LLM and local semantics from the KG. Extensive experiments on the benchmark datasets FB15K-237, YAGO3-10, and PrimeKG demonstrate the superiority of KG-FIT over state-of-the-art pre-trained language model-based methods, achieving improvements of 14.4\\%, 13.5\\%, and 11.9\\% in the Hits@10 metric for the link prediction task, respectively. Furthermore, KG-FIT yields substantial performance gains of 12.6\\%, 6.7\\%, and 17.7\\% compared to the structure-based base models upon which it is built. These results highlight the effectiveness of KG-FIT in incorporating open-world knowledge from LLMs to significantly enhance the expressiveness and informativeness of KG embeddings.", "keywords": "Knowledge Graphs;Large Language Models;Link Prediction", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/284ffd635e93b4cf1ff793f443798e258de12b05.zip", "author": "Pengcheng Jiang;Lang Cao;Cao Xiao;Parminder Bhatia;Jimeng Sun;Jiawei Han", "authorids": "~Pengcheng_Jiang2;~Lang_Cao2;~Cao_Xiao2;~Parminder_Bhatia1;~Jimeng_Sun3;~Jiawei_Han1", "gender": "M;M;F;M;;M", "homepage": "https://pat-jj.github.io/;https://github.com/windszzlang;https://sites.google.com/view/danicaxiao/home;;http://sunlab.org;http://hanj.cs.illinois.edu/", "dblp": "60/8352;133/4286;170/1833;168/8615;;h/JiaweiHan.html", "google_scholar": "TejDN9wAAAAJ;CefzkpEAAAAJ;ahaV25EAAAAJ;;9jmmp5sAAAAJ;https://scholar.google.com.tw/citations?user=Kv9AbjMAAAAJ", "orcid": "0000-0001-9925-3777;0000-0003-0011-5724;;;0000-0003-1512-6426;0000-0002-3629-2696", "linkedin": "patrick-j-3492b4235/;lang-cao-a73657260/;caoxiao/;;jimengsun/;", "or_profile": "~Pengcheng_Jiang2;~Lang_Cao2;~Cao_Xiao2;~Parminder_Bhatia1;~Jimeng_Sun3;~Jiawei_Han1", "aff": "University of Illinois at Urbana Champaign;University of Illinois Urbana-Champaign;GE Healthcare;GEHC;Georgia Institute of Technology;University of Illinois at Urbana-Champaign (UIUC)", "aff_domain": "cs.illinois.edu;cs.illinois.edu;ge.com;gehealthcare.com;gatech.edu;illinois.edu", "position": "MS student;MS student;VP of AI;Principal Researcher;Associate Professor;Full Professor", "bibtex": "@inproceedings{\njiang2024kgfit,\ntitle={{KG}-{FIT}: Knowledge Graph Fine-Tuning Upon Open-World Knowledge},\nauthor={Pengcheng Jiang and Lang Cao and Cao Xiao and Parminder Bhatia and Jimeng Sun and Jiawei Han},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=rDoPMODpki}\n}", "github": "", "reviewers": "dKZ2;G3dr;hiNn;9oop", "pdf_size": 1812042, "rating": "4;5;5;6", "confidence": "5;4;4;4", "soundness": "3;3;2;3", "novelty": "2;2;2;3", "presentation": "2;3;3;4", "wc_summary": "54;140;60;100", "wc_strengths": "44;54;33;33", "wc_weaknesses": "164;110;266;118", "wc_questions": "5;47;37;38", "wc_limitations": "1;5;1;12", "wc_review": "268;356;397;301", "wc_reply_reviewers": "0;50;0;28", "wc_reply_authors": "89;0;100;79", "reply_reviewers": "0;1;0;1", "reply_authors": "2;1;2;2", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 88.5, 34.59407463713981 ], "wc_strengths_avg": [ 41.0, 8.74642784226795 ], "wc_weaknesses_avg": [ 164.5, 62.11883772254597 ], "wc_questions_avg": [ 31.75, 15.927570436196476 ], "wc_limitations_avg": [ 4.75, 4.493050188902857 ], "wc_review_avg": [ 330.5, 49.62106407565239 ], "wc_reply_reviewers_avg": [ 19.5, 20.994046775217015 ], "wc_reply_authors_avg": [ 67.0, 39.38908478246226 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1321304007733989280&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 7, "email": "cs.illinois.edu;cs.illinois.edu;ge.com;gehealthcare.com;gatech.edu;illinois.edu", "author_num": 6, "aff_unique_index": "0;0;1;2;3;0", "aff_unique_norm": "University of Illinois Urbana-Champaign;GE Healthcare;General Electric Healthcare;Georgia Institute of Technology", "aff_unique_dep": ";;;", "aff_unique_url": "https://illinois.edu;https://www.gehealthcare.com;https://www.gehealthcare.com;https://www.gatech.edu", "aff_unique_abbr": "UIUC;GEHC;GEHC;Georgia Tech", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Urbana-Champaign;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "CLAP4CLIP: Continual Learning with Probabilistic Finetuning for Vision-Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93449", "id": "rF1YRtZfoJ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=rF1YRtZfoJ", "openreview": "https://openreview.net/forum?id=rF1YRtZfoJ", "poster": "/media/PosterPDFs/NeurIPS%202024/93449.png?t=1731132733.0382128", "project": "", "author_site": "Saurav Jha, Dong Gong, Lina Yao", "tldr": "", "abstract": "Continual learning (CL) aims to help deep neural networks to learn new knowledge while retaining what has been learned. Owing to their powerful generalizability, pre-trained vision-language models such as Contrastive Language-Image Pre-training (CLIP) have lately gained traction as practical CL candidates. However, the domain mismatch between the pre-training and the downstream CL tasks calls for finetuning of the CLIP on the latter. The deterministic nature of the existing finetuning methods makes them overlook the many possible interactions across the modalities and deems them unsafe for high-risk tasks requiring reliable uncertainty estimation. To address these, our work proposes **C**ontinual **L**e**A**rning with **P**robabilistic finetuning (CLAP) - a probabilistic modeling framework over visual-guided text features per task, thus providing more calibrated CL finetuning. Unlike recent data-hungry anti-forgetting CL techniques, CLAP alleviates forgetting by exploiting the rich pre-trained knowledge of CLIP for weight initialization and distribution regularization of task-specific parameters. Cooperating with the diverse range of existing prompting methods, CLAP can surpass the predominant deterministic finetuning approaches for CL with CLIP. We conclude with out-of-the-box applications of superior uncertainty estimation abilities of CLAP including novel data detection and exemplar selection within the existing CL setups. Our code is available at https://github.com/srvCodes/clap4clip.", "keywords": "Continual Learning;Vision-language models;Finetuning", "primary_area": "machine_vision", "supplementary_material": "/attachment/e5e8c43764c96a86f693d79ed29b24bbb5140da7.zip", "author": "Saurav Jha;Dong Gong;Lina Yao", "authorids": "~Saurav_Jha1;~Dong_Gong1;~Lina_Yao2", "gender": "M;M;F", "homepage": "http://sauravjha.com.np/;https://donggong1.github.io;https://www.linayao.com/", "dblp": "218/5159;125/5032;56/6651-1", "google_scholar": ";https://scholar.google.com.au/citations?user=e2u6hRoAAAAJ;https://scholar.google.com.au/citations?user=EU3snBgAAAAJ", "orcid": ";0000-0002-2668-9630;", "linkedin": "sauravonn;;linayao/", "or_profile": "~Saurav_Jha1;~Dong_Gong1;~Lina_Yao2", "aff": "Tencent AI Lab;University of New South Wales;CSIRO's Data61", "aff_domain": "tencent.com;unsw.edu.au;data61.csiro.au", "position": "Intern;Assistant Professor;Principal Researcher", "bibtex": "@inproceedings{\njha2024clapclip,\ntitle={{CLAP}4{CLIP}: Continual Learning with Probabilistic Finetuning for Vision-Language Models},\nauthor={Saurav Jha and Dong Gong and Lina Yao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=rF1YRtZfoJ}\n}", "github": "", "reviewers": "bibK;KrxH;giBN;ey1B", "pdf_size": 2623059, "rating": "3;4;5;8", "confidence": "4;4;4;3", "soundness": "2;2;3;4", "novelty": "3;2;3;4", "presentation": "3;3;3;4", "wc_summary": "38;81;117;90", "wc_strengths": "6;96;84;109", "wc_weaknesses": "138;80;416;70", "wc_questions": "47;83;52;5", "wc_limitations": "12;1;10;12", "wc_review": "241;341;679;286", "wc_reply_reviewers": "0;0;64;16", "wc_reply_authors": "91;79;0;0", "reply_reviewers": "0;0;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 5.0, 1.8708286933869707 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 81.5, 28.394541729001368 ], "wc_strengths_avg": [ 73.75, 40.10221315588455 ], "wc_weaknesses_avg": [ 176.0, 140.9751751195933 ], "wc_questions_avg": [ 46.75, 27.770262872360426 ], "wc_limitations_avg": [ 8.75, 4.548351349665063 ], "wc_review_avg": [ 386.75, 172.407040169478 ], "wc_reply_reviewers_avg": [ 20.0, 26.229754097208 ], "wc_reply_authors_avg": [ 42.5, 42.71123973850443 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.9258200997725515, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18364283829597796159&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "tencent.com;unsw.edu.au;data61.csiro.au", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Tencent;University of New South Wales;CSIRO", "aff_unique_dep": "Tencent AI Lab;;Data61", "aff_unique_url": "https://ai.tencent.com;https://www.unsw.edu.au;https://www.csiro.au", "aff_unique_abbr": "Tencent AI Lab;UNSW;CSIRO", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "China;Australia" }, { "title": "F-OAL: Forward-only Online Analytic Learning with Fast Training and Low Memory Footprint in Class Incremental Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93448", "id": "rGEDFS3emy", "proceeding": "", "pdf": "https://openreview.net/pdf?id=rGEDFS3emy", "openreview": "https://openreview.net/forum?id=rGEDFS3emy", "poster": "/media/PosterPDFs/NeurIPS%202024/93448.png?t=1731122648.0251846", "project": "", "author_site": "HUIPING ZHUANG, Yuchen Liu, Run He, Kai Tong, Ziqian Zeng, Cen Chen, Yi Wang, Lap-Pui Chau", "tldr": "", "abstract": "Online Class Incremental Learning (OCIL) aims to train models incrementally, where data arrive in mini-batches, and previous data are not accessible. A major challenge in OCIL is Catastrophic Forgetting, i.e., the loss of previously learned knowledge. Among existing baselines, replay-based methods show competitive results but requires extra memory for storing exemplars, while exemplar-free (i.e., data need not be stored for replay in production) methods are resource friendly but often lack accuracy. In this paper, we propose an exemplar-free approach\u2014Forward-only Online Analytic Learning (F-OAL). Unlike traditional methods, F-OAL does not rely on back-propagation and is forward-only, significantly reducing memory usage and computational time. Cooperating with a pre-trained frozen encoder with Feature Fusion, F-OAL only needs to update a linear classifier by recursive least square. This approach simultaneously achieves high accuracy and low resource consumption. Extensive experiments on bench mark datasets demonstrate F-OAL\u2019s robust performance in OCIL scenarios. Code is available at: https://github.com/liuyuchen-cz/F-OAL", "keywords": "Class incremental learning;closed-form solution;exemplar-free;continual learning;online continual learning", "primary_area": "online_learning", "supplementary_material": "", "author": "Huiping Zhuang;Yuchen Liu;Run He;Kai Tong;Ziqian Zeng;Cen Chen;Yi Wang;Lap-Pui Chau", "authorids": "~Huiping_Zhuang2;~Yuchen_Liu16;~Run_He1;~Kai_Tong1;~Ziqian_Zeng1;~Cen_Chen4;~Yi_Wang48;~Lap-Pui_Chau3", "gender": "M;M;M;F;M;M;M;M", "homepage": ";;;https://ziqianzeng.github.io;;https://zhuanghp.github.io/;https://www.polyu.edu.hk/eee/people/academic-staff-and-teaching-staff/prof-chau-lap-pui/;https://wangyintu.github.io", "dblp": ";21/10617;211/4831;155/0168;152/6215-2.html;194/5829;03/5597.html;17/221-68", "google_scholar": ";cN4SxagAAAAJ;;fuOr3nAAAAAJ;pPsNBWUAAAAJ;https://scholar.google.com.sg/citations?user=vCXxuLkAAAAJ;MYREIH0AAAAJ;https://scholar.google.com.sg/citations?user=MAG909MAAAAJ", "orcid": "0009-0001-3831-1168;;0009-0001-6073-8918;;0000-0003-1389-0148;0000-0002-4612-5445;0000-0003-4932-0593;0000-0001-8659-4724", "linkedin": ";;kai-tong-4530a6299/;;;;;yi-wang-479757ab/", "or_profile": "~Yuchen_Liu16;~Run_He1;~Kai_Tong1;~Ziqian_Zeng1;~Cen_Chen4;~HUIPING_ZHUANG1;~Lap-pui_Chau1;~YI_WANG28", "aff": ";South China University of Technology;South China University of Technology;South China University of Technology;South China University of Technology;South China University of Technology;The Hong Kong Polytechnic University;Hong Kong Polytechnic University", "aff_domain": ";scut.edu.cn;scut.edu.cn;scut.edu.cn;scut.edu.cn;scut.edu.cn;polyu.edu.hk;polyu.edu.hk", "position": ";MS student;PhD student;Associate Professor;Full Professor;Associate Professor;Full Professor;Research Assistant Professor", "bibtex": "@inproceedings{\nzhuang2024foal,\ntitle={F-{OAL}: Forward-only Online Analytic Learning with Fast Training and Low Memory Footprint in Class Incremental Learning},\nauthor={Huiping Zhuang and Yuchen Liu and Run He and Kai Tong and Ziqian Zeng and Cen Chen and Yi Wang and Lap-Pui Chau},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=rGEDFS3emy}\n}", "github": "", "reviewers": "XB6P;UyXG;Pgch;nHHU", "pdf_size": 524505, "rating": "4;5;5;8", "confidence": "4;3;4;4", "soundness": "2;3;2;3", "novelty": "2;3;2;2", "presentation": "2;2;2;3", "wc_summary": "62;78;71;220", "wc_strengths": "29;16;41;82", "wc_weaknesses": "42;83;125;312", "wc_questions": "13;16;5;195", "wc_limitations": "230;8;31;96", "wc_review": "376;201;273;905", "wc_reply_reviewers": "0;38;24;59", "wc_reply_authors": "0;28;27;19", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 107.75, 65.05526496756431 ], "wc_strengths_avg": [ 42.0, 24.728526037756478 ], "wc_weaknesses_avg": [ 140.5, 103.27269726311984 ], "wc_questions_avg": [ 57.25, 79.63157351201846 ], "wc_limitations_avg": [ 91.25, 86.36369318179949 ], "wc_review_avg": [ 438.75, 276.28099373644943 ], "wc_reply_reviewers_avg": [ 30.25, 21.45198126048035 ], "wc_reply_authors_avg": [ 18.5, 11.236102527122116 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.19245008972987526, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8455217262074076941&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": ";scut.edu.cn;scut.edu.cn;scut.edu.cn;scut.edu.cn;scut.edu.cn;polyu.edu.hk;polyu.edu.hk", "author_num": 8, "aff_unique_index": "0;0;0;0;0;1;1", "aff_unique_norm": "South China University of Technology;Hong Kong Polytechnic University", "aff_unique_dep": ";", "aff_unique_url": "https://www.scut.edu.cn;https://www.polyu.edu.hk", "aff_unique_abbr": "SCUT;PolyU", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "UAV3D: A Large-scale 3D Perception Benchmark for Unmanned Aerial Vehicles", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97488", "id": "rGdy9jrBs8", "proceeding": "", "pdf": "https://openreview.net/pdf?id=rGdy9jrBs8", "openreview": "https://openreview.net/forum?id=rGdy9jrBs8", "poster": "", "project": "", "author_site": "hui ye, Rajshekhar Sunderraman, Jonathan Shihao Ji", "tldr": "", "abstract": "Unmanned Aerial Vehicles (UAVs), equipped with cameras, are employed in numerous applications, including aerial photography, surveillance, and agriculture. In these applications, robust object detection and tracking are essential for the effective deployment of UAVs. However, existing benchmarks for UAV applications are mainly designed for traditional 2D perception tasks, restricting the\ndevelopment of real-world applications that require a 3D understanding of the environment. Furthermore, despite recent advancements in single-UAV perception, limited views of a single UAV platform significantly constrain its perception capabilities over long distances or in occluded areas. To address these challenges, we introduce UAV3D \u2013 a benchmark designed to advance research in both 3D and\ncollaborative 3D perception tasks with UAVs. UAV3D comprises 1,000 scenes, each of which has 20 frames with fully annotated 3D bounding boxes on vehicles. We provide the benchmark for four 3D perception tasks: single-UAV 3D object detection, single-UAV object tracking, collaborative-UAV 3D object detection, and collaborative-UAV object tracking. Our dataset and code are available at\nhttps://huiyegit.github.io/UAV3D_Benchmark/.", "keywords": "UAV;3D Perception Benchmark;Object Detection;Object Tracking", "primary_area": "", "supplementary_material": "", "author": "Hui Ye;Rajshekhar Sunderraman;Shihao Ji", "authorids": "~Hui_Ye2;~Rajshekhar_Sunderraman1;~Shihao_Ji1", "gender": "M;M;M", "homepage": ";http://tinman.cs.gsu.edu/~raj;https://sji.soc.uconn.edu/", "dblp": ";s/RSunderraman;35/4137", "google_scholar": "Fm0_NK8AAAAJ;Wlxc9FEAAAAJ;qMfWf9EAAAAJ", "orcid": ";0000-0001-6822-6629;", "linkedin": ";raj-sunderraman-a5591bb2/;", "or_profile": "~Hui_Ye2;~Rajshekhar_Sunderraman1;~Shihao_Ji2", "aff": "Georgia State University;Georgia State University;Georgia State University", "aff_domain": "gsu.edu;gsu.edu;gsu.edu", "position": "PhD student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nye2024uavd,\ntitle={{UAV}3D: A Large-scale 3D Perception Benchmark for Unmanned Aerial Vehicles},\nauthor={Hui Ye and Rajshekhar Sunderraman and Shihao Ji},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=rGdy9jrBs8}\n}", "github": "", "reviewers": "xNiq;LYae;7jif", "pdf_size": 7837983, "rating": "6;6;6", "confidence": "3;4;4", "wc_summary_and_contributions": "84;143;71", "wc_strengths": "20;100;2", "wc_improvement": "122;99;36", "wc_limitations": "7;56;37", "wc_correctness": "5;5;1", "wc_clarity": "10;12;7", "wc_relation_to_prior_work": "9;174;24", "wc_documentation": "21;17;11", "wc_additional_feedback": "1;1;1", "wc_review": "279;607;190", "wc_reply_reviewers": "51;120;268", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;3", "reply_authors": "1;2;3", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 99.33333333333333, 31.329787033357817 ], "wc_strengths_avg": [ 40.666666666666664, 42.59368758656876 ], "wc_improvement_avg": [ 85.66666666666667, 36.353205574688396 ], "wc_limitations_avg": [ 33.333333333333336, 20.17148702720969 ], "wc_correctness_avg": [ 3.6666666666666665, 1.8856180831641267 ], "wc_clarity_avg": [ 9.666666666666666, 2.0548046676563256 ], "wc_relation_to_prior_work_avg": [ 69.0, 74.4983221287567 ], "wc_documentation_avg": [ 16.333333333333332, 4.109609335312651 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 358.6666666666667, 179.31784319718128 ], "wc_reply_reviewers_avg": [ 146.33333333333334, 90.52562559236411 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.9428090415820634 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12378057369024993255&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "gsu.edu;gsu.edu;gsu.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Georgia State University", "aff_unique_dep": "", "aff_unique_url": "https://www.gsu.edu", "aff_unique_abbr": "GSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Towards Reliable Model Selection for Unsupervised Domain Adaptation: An Empirical Study and A Certified Baseline", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97487", "id": "rI7kbFTSpr", "proceeding": "", "pdf": "https://openreview.net/pdf?id=rI7kbFTSpr", "openreview": "https://openreview.net/forum?id=rI7kbFTSpr", "poster": "/media/PosterPDFs/NeurIPS%202024/97487.png?t=1731582468.0521383", "project": "", "author_site": "Dapeng Hu, Romy Luo, Jian Liang, Chuan Sheng Foo", "tldr": "", "abstract": "Selecting appropriate hyperparameters is crucial for unlocking the full potential of advanced unsupervised domain adaptation (UDA) methods in unlabeled target domains. Although this challenge remains under-explored, it has recently garnered increasing attention with the proposals of various model selection methods. Reliable model selection should maintain performance across diverse UDA methods and scenarios, especially avoiding highly risky worst-case selections\u2014selecting the model or hyperparameter with the worst performance in the pool.\n\\textit{Are existing model selection methods reliable and versatile enough for different UDA tasks?} In this paper, we provide a comprehensive empirical study involving 8 existing model selection approaches to answer this question. Our evaluation spans 12 UDA methods across 5 diverse UDA benchmarks and 5 popular UDA scenarios.\nSurprisingly, we find that none of these approaches can effectively avoid the worst-case selection. In contrast, a simple but overlooked ensemble-based selection approach, which we call EnsV, is both theoretically and empirically certified to avoid the worst-case selection, ensuring high reliability. Additionally, EnsV is versatile for various practical but challenging UDA scenarios, including validation of open-partial-set UDA and source-free UDA.\nFinally, we call for more attention to the reliability of model selection in UDA: avoiding the worst-case is as significant as achieving peak selection performance and should not be overlooked when developing new model selection methods. Code is available at https://github.com/LHXXHB/EnsV.", "keywords": "Unsupervised Domain Adaptation; Unsupervised Validation; Model Selection; Hyperparameter Selection", "primary_area": "", "supplementary_material": "/attachment/9376a9b40d5c503ddf7c8493e77a64ed3ae73456.pdf", "author": "Dapeng Hu;Mi Luo;Jian Liang;Chuan-Sheng Foo", "authorids": "~Dapeng_Hu2;~Mi_Luo1;~Jian_Liang1;~Chuan-Sheng_Foo1", "gender": "M;F;M;M", "homepage": "https://lhxxhb.github.io/;https://romyluo.com/;https://liangjian.xyz;http://ai.stanford.edu/~csfoo", "dblp": "247/3382;257/3359;19/2208-1;73/1823", "google_scholar": "wv9HjA0AAAAJ;eL-xIlAAAAAJ;https://scholar.google.com/citations?hl=en;AgbeqGkAAAAJ", "orcid": ";;0000-0003-3890-1894;0000-0002-4748-5792", "linkedin": ";;;", "or_profile": "~Dapeng_Hu2;~Mi_Luo1;~Jian_Liang1;~Chuan-Sheng_Foo1", "aff": "Apple;University of Texas at Austin;Institute of Automation, Chinese Academy of Sciences;Institute for Infocomm Research, A*STAR", "aff_domain": "apple.com;cs.utexas.edu;ia.ac.cn;i2r.a-star.edu.sg", "position": "Siri Engineer;PhD student;Associate Professor;Principal Scientist", "bibtex": "@inproceedings{\nhu2024towards,\ntitle={Towards Reliable Model Selection for Unsupervised Domain Adaptation: An Empirical Study and A Certified Baseline},\nauthor={Dapeng Hu and Mi Luo and Jian Liang and Chuan-Sheng Foo},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=rI7kbFTSpr}\n}", "github": "", "reviewers": "N1tk;deqf;yasW;uZP5", "pdf_size": 3623498, "rating": "5;6;7;7", "confidence": "4;4;2;2", "wc_summary_and_contributions": "59;32;91;104", "wc_strengths": "5;24;74;52", "wc_improvement": "5;89;99;39", "wc_limitations": "1;24;18;11", "wc_correctness": "1;1;9;26", "wc_clarity": "1;55;23;24", "wc_relation_to_prior_work": "1;1;18;33", "wc_documentation": "1;1;5;24", "wc_additional_feedback": "1;1;1;1", "wc_review": "75;228;338;314", "wc_reply_reviewers": "0;218;0;0", "wc_reply_authors": "0;356;0;0", "reply_reviewers": "0;1;0;0", "reply_authors": "1;2;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.0, 1.0 ], "wc_summary_and_contributions_avg": [ 71.5, 28.075790282732914 ], "wc_strengths_avg": [ 38.75, 26.337947907914163 ], "wc_improvement_avg": [ 58.0, 38.118237105091836 ], "wc_limitations_avg": [ 13.5, 8.558621384311845 ], "wc_correctness_avg": [ 9.25, 10.207227831296802 ], "wc_clarity_avg": [ 25.75, 19.22725929507375 ], "wc_relation_to_prior_work_avg": [ 13.25, 13.348689074212494 ], "wc_documentation_avg": [ 7.75, 9.522998477370455 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 238.75, 103.00819142184761 ], "wc_reply_reviewers_avg": [ 54.5, 94.39676901250381 ], "wc_reply_authors_avg": [ 89.0, 154.1525218736301 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.9045340337332909, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:6uNWsbTPZ3oJ:scholar.google.com/&scioq=Towards+Reliable+Model+Selection+for+Unsupervised+Domain+Adaptation:+An+Empirical+Study+and+A+Certified+Baseline&hl=en&as_sdt=0,44", "gs_version_total": 0, "email": "apple.com;cs.utexas.edu;ia.ac.cn;i2r.a-star.edu.sg", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Apple;University of Texas at Austin;Chinese Academy of Sciences;Institute for Infocomm Research", "aff_unique_dep": "Apple Inc.;;Institute of Automation;", "aff_unique_url": "https://www.apple.com;https://www.utexas.edu;http://www.ia.cas.cn;https://www.i2r.a-star.edu.sg", "aff_unique_abbr": "Apple;UT Austin;CAS;I2R", "aff_campus_unique_index": "1", "aff_campus_unique": ";Austin", "aff_country_unique_index": "0;0;1;2", "aff_country_unique": "United States;China;Singapore" }, { "title": "Learning Successor Features the Simple Way", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93447", "id": "rI7oZj1WMc", "proceeding": "", "pdf": "https://openreview.net/pdf?id=rI7oZj1WMc", "openreview": "https://openreview.net/forum?id=rI7oZj1WMc", "poster": "", "project": "", "author_site": "Raymond Chua, Arna Ghosh, Christos Kaplanis, Blake Richards, Doina Precup", "tldr": "", "abstract": "In Deep Reinforcement Learning (RL), it is a challenge to learn representations that do not exhibit catastrophic forgetting or interference in non-stationary environments. Successor Features (SFs) offer a potential solution to this challenge. However, canonical techniques for learning SFs from pixel-level observations often lead to representation collapse, wherein representations degenerate and fail to capture meaningful variations in the data. More recent methods for learning SFs can avoid representation collapse, but they often involve complex losses and multiple learning phases, reducing their efficiency. We introduce a novel, simple method for learning SFs directly from pixels. Our approach uses a combination of a Temporal-difference (TD) loss and a reward prediction loss, which together capture the basic mathematical definition of SFs. We show that our approach matches or outperforms existing SF learning techniques in both 2D (Minigrid) and 3D (Miniworld) mazes, for both single and continual learning scenarios. As well, our technique is efficient, and can reach higher levels of performance in less time than other approaches. Our work provides a new, streamlined technique for learning SFs directly from pixel observations, with no pretraining required.", "keywords": "deep reinforcement learning;representation learning;continual reinforcement learning;successor feature;successor representation", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Raymond Chua;Arna Ghosh;Christos Kaplanis;Blake Aaron Richards;Doina Precup", "authorids": "~Raymond_Chua1;~Arna_Ghosh1;~Christos_Kaplanis2;~Blake_Aaron_Richards1;~Doina_Precup1", "gender": "M;M;M;F;M", "homepage": "https://scholar.google.com/citations?user=8pVlWRsAAAAJ&hl=en;https://arnaghosh.github.io/;http://linclab.org;http://cs.mcgill.ca/~dprecup/;", "dblp": ";190/7223;70/10850;p/DoinaPrecup;200/8168", "google_scholar": ";https://scholar.google.ca/citations?user=YjS546oAAAAJ;https://scholar.google.ca/citations?user=1CPY1LsAAAAJ;https://scholar.google.com.tw/citations?user=j54VcVEAAAAJ;https://scholar.google.co.uk/citations?user=jaNr8IoAAAAJ", "orcid": ";;0000-0001-9662-2151;;", "linkedin": ";;;;", "or_profile": "~Raymond_Chua1;~Arna_Ghosh1;~Blake_Aaron_Richards1;~Doina_Precup1;~Christos_Kaplanis1", "aff": "McGill University;McGill University;Mila - Quebec Artificial Intelligence Institute;McGill University;Google", "aff_domain": "cs.mcgill.ca;mcgill.ca;mila.quebec;mcgill.ca;google.com", "position": "PhD student;PhD student;Associate Professor;Associate Professor;Research Scientist", "bibtex": "@inproceedings{\nchua2024learning,\ntitle={Learning Successor Features the Simple Way},\nauthor={Raymond Chua and Arna Ghosh and Christos Kaplanis and Blake Aaron Richards and Doina Precup},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=rI7oZj1WMc}\n}", "github": "", "reviewers": "2gcv;nkgZ;Tczf;7tgV", "pdf_size": 9704833, "rating": "4;5;5;6", "confidence": "4;4;4;5", "soundness": "2;3;3;3", "novelty": "1;3;3;2", "presentation": "2;2;2;3", "wc_summary": "66;44;74;143", "wc_strengths": "22;31;60;208", "wc_weaknesses": "253;132;100;519", "wc_questions": "4;16;85;65", "wc_limitations": "6;24;6;21", "wc_review": "351;247;325;956", "wc_reply_reviewers": "194;36;77;644", "wc_reply_authors": "2022;142;883;1417", "reply_reviewers": "1;1;1;1", "reply_authors": "6;3;4;4", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 81.75, 37.029549011566424 ], "wc_strengths_avg": [ 80.25, 75.08120603719682 ], "wc_weaknesses_avg": [ 251.0, 164.91664561226074 ], "wc_questions_avg": [ 42.5, 33.529837458597974 ], "wc_limitations_avg": [ 14.25, 8.317902379807062 ], "wc_review_avg": [ 469.75, 283.33317401956305 ], "wc_reply_reviewers_avg": [ 237.75, 241.6075071267447 ], "wc_reply_authors_avg": [ 1116.0, 691.8095836283276 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 4.25, 1.0897247358851685 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:D4Mky3UnnhEJ:scholar.google.com/&scioq=Learning+Successor+Features+the+Simple+Way&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": "cs.mcgill.ca;mcgill.ca;mila.quebec;mcgill.ca;google.com", "author_num": 5, "aff_unique_index": "0;0;1;0;2", "aff_unique_norm": "McGill University;Quebec Artificial Intelligence Institute;Google", "aff_unique_dep": ";Artificial Intelligence;Google", "aff_unique_url": "https://www.mcgill.ca;https://mila.quebec;https://www.google.com", "aff_unique_abbr": "McGill;Mila;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "Canada;United States" }, { "title": "Model Based Inference of Synaptic Plasticity Rules", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93446", "id": "rI80PHlnFm", "proceeding": "", "pdf": "https://openreview.net/pdf?id=rI80PHlnFm", "openreview": "https://openreview.net/forum?id=rI80PHlnFm", "poster": "", "project": "", "author_site": "Yash Mehta, Danil Tyulmankov, Adithya Rajagopalan, Glenn Turner, James Fitzgerald, Jan Funke", "tldr": "", "abstract": "Inferring the synaptic plasticity rules that govern learning in the brain is a key challenge in neuroscience. We present a novel computational method to infer these rules from experimental data, applicable to both neural and behavioral data. Our approach approximates plasticity rules using a parameterized function, employing either truncated Taylor series for theoretical interpretability or multilayer perceptrons. These plasticity parameters are optimized via gradient descent over entire trajectories to align closely with observed neural activity or behavioral learning dynamics. This method can uncover complex rules that induce long nonlinear time dependencies, particularly involving factors like postsynaptic activity and current synaptic weights. We validate our approach through simulations, successfully recovering established rules such as Oja's, as well as more intricate plasticity rules with reward-modulated terms. We assess the robustness of our technique to noise and apply it to behavioral data from \\textit{Drosophila} in a probabilistic reward-learning experiment. Notably, our findings reveal an active forgetting component in reward learning in flies, improving predictive accuracy over previous models. This modeling framework offers a promising new avenue for elucidating the computational principles of synaptic plasticity and learning in the brain.", "keywords": "computational neuroscience;plasticity rules;synaptic plasticity;biologically plausible learning", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "", "author": "Yash Mehta;Danil Tyulmankov;Adithya E. Rajagopalan;Glenn C Turner;James E Fitzgerald;Jan Funke", "authorids": "~Yash_Mehta1;~Danil_Tyulmankov1;~Adithya_E._Rajagopalan1;~Glenn_C_Turner1;~James_E_Fitzgerald1;~Jan_Funke3", "gender": "M;;;M;M;", "homepage": "https://yashsmehta.com/;;;https://www.janelia.org/lab/turner-lab;https://www.janelia.org/lab/fitzgerald-lab;https://www.janelia.org/lab/funke-lab", "dblp": ";;;;;60/9514", "google_scholar": "zFqBbIkAAAAJ;;;Mr8K0jMAAAAJ;https://scholar.google.com/citations?hl=en;7rqAapgAAAAJ", "orcid": "0000-0002-9610-7077;;;0000-0002-5341-2784;0000-0002-0949-4188;", "linkedin": "yashsmehta/;;;;;", "or_profile": "~Yash_Mehta1;~Danil_Tyulmankov1;~Adithya_E._Rajagopalan1;~Glenn_C_Turner1;~James_E_Fitzgerald1;~Jan_Funke3", "aff": "Johns Hopkins University;;;;Northwestern University;HHMI Janelia Research Campus", "aff_domain": "johnshopkins.edu;;;;northwestern.edu;janelia.hhmi.org", "position": "PhD student;;;;Associate Professor;Group leader", "bibtex": "@inproceedings{\nmehta2024model,\ntitle={Model Based Inference of Synaptic Plasticity Rules},\nauthor={Yash Mehta and Danil Tyulmankov and Adithya E. Rajagopalan and Glenn C Turner and James E Fitzgerald and Jan Funke},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=rI80PHlnFm}\n}", "github": "", "reviewers": "MyoH;5Cze;2Jtn", "pdf_size": 5615751, "rating": "7;7;7", "confidence": "4;3;4", "soundness": "4;3;3", "novelty": "3;3;4", "presentation": "4;4;3", "wc_summary": "66;100;279", "wc_strengths": "47;58;170", "wc_weaknesses": "67;110;326", "wc_questions": "75;141;13", "wc_limitations": "1;192;40", "wc_review": "256;601;828", "wc_reply_reviewers": "96;39;36", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 148.33333333333334, 93.43209060893133 ], "wc_strengths_avg": [ 91.66666666666667, 55.57177541002467 ], "wc_weaknesses_avg": [ 167.66666666666666, 113.32647038044064 ], "wc_questions_avg": [ 76.33333333333333, 52.264285660052366 ], "wc_limitations_avg": [ 77.66666666666667, 82.39875942971202 ], "wc_review_avg": [ 561.6666666666666, 235.16849751236288 ], "wc_reply_reviewers_avg": [ 57.0, 27.60434748368452 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=199466819784415940&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "johnshopkins.edu;;;;northwestern.edu;janelia.hhmi.org", "author_num": 6, "aff_unique_index": "0;1;2", "aff_unique_norm": "Johns Hopkins University;Northwestern University;HHMI Janelia Research Campus", "aff_unique_dep": ";;", "aff_unique_url": "https://www.jhu.edu;https://www.northwestern.edu;https://www.janelia.org", "aff_unique_abbr": "JHU;NU;HHMI Janelia", "aff_campus_unique_index": "1", "aff_campus_unique": ";Janelia", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "SELECT: A Large-Scale Benchmark of Data Curation Strategies for Image Classification", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97486", "id": "rIHx6puY5b", "proceeding": "", "pdf": "https://openreview.net/pdf?id=rIHx6puY5b", "openreview": "https://openreview.net/forum?id=rIHx6puY5b", "poster": "/media/PosterPDFs/NeurIPS%202024/97486.png?t=1732541818.4031126", "project": "", "author_site": "Benjamin Feuer, Jiawei Xu, Niv Cohen, Patrick Yubeaton, Govind Mittal, Chinmay Hegde", "tldr": "", "abstract": "Data curation is the problem of how to collect and organize samples into a dataset that supports efficient learning. Despite the centrality of the task, little work has been devoted towards a large-scale, systematic comparison of various curation methods. In this work, we take steps towards a formal evaluation of data curation strategies and introduce SELECT, the first large-scale benchmark of curation strategies for image classification.\n\nIn order to generate baseline methods for the SELECT benchmark, we create a new dataset, ImageNet++, which constitutes the largest superset of ImageNet-1K to date. Our dataset extends ImageNet with 5 new training-data shifts, each approximately the size of ImageNet-1K, and each assembled using a distinct curation strategy. We evaluate our data curation baselines in two ways: (i) using each training-data shift to train identical image classification models from scratch (ii) using it to inspect a fixed pretrained self-supervised representation.\n\nOur findings show interesting trends, particularly pertaining to recent methods for data curation such as synthetic data generation and lookup based on CLIP embeddings. We show that although these strategies are highly competitive for certain tasks, the curation strategy used to assemble the original ImageNet-1K dataset remains the gold standard. We anticipate that our benchmark can illuminate the path for new methods to further reduce the gap. We release our checkpoints, code, documentation, and a link to our dataset at https://github.com/jimmyxu123/SELECT.", "keywords": "data curation;data filtration;data-centric machine learning;computer vision;image classification;imagenet;vision-language models;CLIP", "primary_area": "", "supplementary_material": "/attachment/8fd42261af8f31470ffe81254a19984caf5030ea.pdf", "author": "Benjamin Feuer;Jiawei Xu;Niv Cohen;Patrick Yubeaton;Govind Mittal;Chinmay Hegde", "authorids": "~Benjamin_Feuer1;~Jiawei_Xu5;~Niv_Cohen1;~Patrick_Yubeaton1;~Govind_Mittal1;~Chinmay_Hegde1", "gender": "M;M;M;M;M;M", "homepage": "https://penfever.github.io/;;https://www.cs.huji.ac.il/w~nivc/;https://www.linkedin.com/in/william-patrick-yubeaton/;https://govindm.me;https://chinmayhegde.github.io/", "dblp": "322/5063.html;;259/2291;;242/0556;39/2056", "google_scholar": "VPXu100AAAAJ;;https://scholar.google.co.il/citations?user=ZMdC3OQAAAAJ;;JR1C0tcAAAAJ;eJAV17IAAAAJ", "orcid": "0000-0002-7938-542X;;;;;", "linkedin": "benjaminfeuer/;jiawei-xu-3b2605242/;niv-cohen-39b49521/;;https://linkedin.com/in/govindmittal;", "or_profile": "~Benjamin_Feuer1;~Jiawei_Xu5;~Niv_Cohen1;~Patrick_Yubeaton1;~Govind_Mittal1;~Chinmay_Hegde1", "aff": "Arthur AI;NYU, New York University;Hebrew University of Jerusalem;New York University;New York University;New York University", "aff_domain": "arthur.ai;cims.nyu.edu;huji.ac.il;nyu.edu;nyu.edu;nyu.edu", "position": "Intern;Undergrad student;PhD student;PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nfeuer2024select,\ntitle={{SELECT}: A Large-Scale Benchmark of Data Curation Strategies for Image Classification},\nauthor={Benjamin Feuer and Jiawei Xu and Niv Cohen and Patrick Yubeaton and Govind Mittal and Chinmay Hegde},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=rIHx6puY5b}\n}", "github": "", "reviewers": "kQRC;3uXb;5d11;U7mH", "pdf_size": 27182333, "rating": "6;6;7;7", "confidence": "3;4;3;4", "wc_summary_and_contributions": "69;109;202;35", "wc_strengths": "77;241;90;44", "wc_improvement": "208;260;81;20", "wc_limitations": "67;15;70;4", "wc_correctness": "5;8;8;10", "wc_clarity": "39;13;12;4", "wc_relation_to_prior_work": "49;11;5;12", "wc_documentation": "8;17;52;3", "wc_additional_feedback": "1;1;1;1", "wc_review": "523;675;521;133", "wc_reply_reviewers": "149;0;0;14", "wc_reply_authors": "146;0;0;0", "reply_reviewers": "1;0;0;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "wc_summary_and_contributions_avg": [ 103.75, 62.47949663689681 ], "wc_strengths_avg": [ 113.0, 75.7792847683323 ], "wc_improvement_avg": [ 142.25, 96.02701442823265 ], "wc_limitations_avg": [ 39.0, 29.77414986191881 ], "wc_correctness_avg": [ 7.75, 1.7853571071357126 ], "wc_clarity_avg": [ 17.0, 13.171939872319491 ], "wc_relation_to_prior_work_avg": [ 19.25, 17.383541066192468 ], "wc_documentation_avg": [ 20.0, 19.144189719076646 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 463.0, 200.50436404228213 ], "wc_reply_reviewers_avg": [ 40.75, 62.75896350323195 ], "wc_reply_authors_avg": [ 36.5, 63.21985447626402 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3789909460066094510&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "arthur.ai;cims.nyu.edu;huji.ac.il;nyu.edu;nyu.edu;nyu.edu", "author_num": 6, "aff_unique_index": "0;1;2;1;1;1", "aff_unique_norm": "Arthur AI;New York University;Hebrew University of Jerusalem", "aff_unique_dep": ";;", "aff_unique_url": "https://www.arthur.ai;https://www.nyu.edu;https://www.huji.ac.il", "aff_unique_abbr": "Arthur AI;NYU;HUJI", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";New York;Jerusalem", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "United States;Israel" }, { "title": "Graph Coarsening with Message-Passing Guarantees", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93445", "id": "rIOTceoNc8", "proceeding": "", "pdf": "https://openreview.net/pdf?id=rIOTceoNc8", "openreview": "https://openreview.net/forum?id=rIOTceoNc8", "poster": "/media/PosterPDFs/NeurIPS%202024/93445.png?t=1729676716.8099926", "project": "", "author_site": "Antonin Joly, Nicolas Keriven", "tldr": "", "abstract": "Graph coarsening aims to reduce the size of a large graph while preserving some of its key properties, which has been used in many applications to reduce computational load and memory footprint. For instance, in graph machine learning, training Graph Neural Networks (GNNs) on coarsened graphs leads to drastic savings in time and memory. However, GNNs rely on the Message-Passing (MP) paradigm, and classical spectral preservation guarantees for graph coarsening do not directly lead to theoretical guarantees when performing naive message-passing on the coarsened graph.\n\nIn this work, we propose a new message-passing operation specific to coarsened graphs, which exhibit theoretical guarantees on the preservation of the propagated signal. Interestingly, and in a sharp departure from previous proposals, this operation on coarsened graphs is oriented, even when the original graph is undirected. We conduct node classification tasks on synthetic and real data and observe improved results compared to performing naive message-passing on the coarsened graph.", "keywords": "graph coarsening;message passing;graph neural network", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Antonin Joly;Nicolas Keriven", "authorids": "~Antonin_Joly1;~Nicolas_Keriven1", "gender": ";", "homepage": "https://github.com/Arthemishigeru;https://nkeriven.github.io/", "dblp": "155/4557;142/4193", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "~Antonin_Joly1;~Nicolas_Keriven1", "aff": "CNRS;CNRS", "aff_domain": "cnrs.fr;cnrs.fr", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\njoly2024graph,\ntitle={Graph Coarsening with Message-Passing Guarantees},\nauthor={Antonin Joly and Nicolas Keriven},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=rIOTceoNc8}\n}", "github": "", "reviewers": "eigc;5Vcn;7C68;GXdc;foGd", "pdf_size": 693463, "rating": "5;5;6;6;7", "confidence": "1;3;2;5;4", "soundness": "3;3;3;2;3", "novelty": "2;3;4;2;3", "presentation": "3;3;2;3;2", "wc_summary": "89;32;30;26;44", "wc_strengths": "74;14;49;22;25", "wc_weaknesses": "104;251;51;507;55", "wc_questions": "106;2;39;9;12", "wc_limitations": "18;7;1;6;14", "wc_review": "391;306;170;570;150", "wc_reply_reviewers": "163;155;9;491;49", "wc_reply_authors": "68;532;0;179;29", "reply_reviewers": "1;2;1;3;1", "reply_authors": "2;2;1;3;2", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 3.0, 1.4142135623730951 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 44.2, 23.18965286501719 ], "wc_strengths_avg": [ 36.8, 21.95814199790137 ], "wc_weaknesses_avg": [ 193.6, 172.67727123162445 ], "wc_questions_avg": [ 33.6, 38.317619967842475 ], "wc_limitations_avg": [ 9.2, 6.04648658313239 ], "wc_review_avg": [ 317.4, 154.33936633276684 ], "wc_reply_reviewers_avg": [ 173.4, 169.59905660114978 ], "wc_reply_authors_avg": [ 161.6, 194.91392972283944 ], "reply_reviewers_avg": [ 1.6, 0.8 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.5669467095138409, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18296392246547727933&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "cnrs.fr;cnrs.fr", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Centre National de la Recherche Scientifique", "aff_unique_dep": "", "aff_unique_url": "https://www.cnrs.fr", "aff_unique_abbr": "CNRS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "France" }, { "title": "No Free Lunch in LLM Watermarking: Trade-offs in Watermarking Design Choices", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93444", "id": "rIOl7KbSkv", "proceeding": "", "pdf": "https://openreview.net/pdf?id=rIOl7KbSkv", "openreview": "https://openreview.net/forum?id=rIOl7KbSkv", "poster": "", "project": "", "author_site": "Qi Pang, Shengyuan Hu, Wenting Zheng, Virginia Smith", "tldr": "", "abstract": "Advances in generative models have made it possible for AI-generated text, code, and images to mirror human-generated content in many applications. Watermarking, a technique that aims to embed information in the output of a model to verify its source, is useful for mitigating the misuse of such AI-generated content. However, we show that common design choices in LLM watermarking schemes make the resulting systems surprisingly susceptible to attack---leading to fundamental trade-offs in robustness, utility, and usability. \nTo navigate these trade-offs, we rigorously study a set of simple yet effective attacks on common watermarking systems, and propose guidelines and defenses for LLM watermarking in practice.", "keywords": "watermarking;large language models;security;privacy", "primary_area": "privacy", "supplementary_material": "", "author": "Qi Pang;Shengyuan Hu;Wenting Zheng;Virginia Smith", "authorids": "~Qi_Pang1;~Shengyuan_Hu2;~Wenting_Zheng1;~Virginia_Smith1", "gender": ";;;F", "homepage": ";;https://wzheng.github.io/;", "dblp": ";226/6584-1;94/4314;120/0921", "google_scholar": ";m_ZHHToAAAAJ;OeDgxpgAAAAJ;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Qi_Pang1;~Shengyuan_Hu2;~Wenting_Zheng1;~Virginia_Smith1", "aff": ";Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": ";cmu.edu;cmu.edu;cmu.edu", "position": ";PhD student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\npang2024no,\ntitle={No Free Lunch in {LLM} Watermarking: Trade-offs in Watermarking Design Choices},\nauthor={Qi Pang and Shengyuan Hu and Wenting Zheng and Virginia Smith},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=rIOl7KbSkv}\n}", "github": "", "reviewers": "UAwD;Hb7x;iwzS;YBsi", "pdf_size": 875150, "rating": "5;5;6;7", "confidence": "4;5;3;4", "soundness": "3;3;3;4", "novelty": "2;1;3;3", "presentation": "3;2;2;3", "wc_summary": "147;49;44;188", "wc_strengths": "55;16;37;108", "wc_weaknesses": "308;433;209;69", "wc_questions": "125;170;8;67", "wc_limitations": "9;73;7;3", "wc_review": "644;741;305;435", "wc_reply_reviewers": "0;436;14;44", "wc_reply_authors": "0;886;0;0", "reply_reviewers": "0;2;1;1", "reply_authors": "1;3;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 107.0, 62.23744853382086 ], "wc_strengths_avg": [ 54.0, 34.09545424246464 ], "wc_weaknesses_avg": [ 254.75, 133.42109091144474 ], "wc_questions_avg": [ 92.5, 60.936442298512965 ], "wc_limitations_avg": [ 23.0, 28.948229652260256 ], "wc_review_avg": [ 531.25, 171.14376266753047 ], "wc_reply_reviewers_avg": [ 123.5, 181.1208160317306 ], "wc_reply_authors_avg": [ 221.5, 383.6492538765063 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.42640143271122083, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8253825395637094469&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 2, "email": ";cmu.edu;cmu.edu;cmu.edu", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Episodic Future Thinking Mechanism for Multi-agent Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93443", "id": "rL7OtNsD9a", "proceeding": "", "pdf": "https://openreview.net/pdf?id=rL7OtNsD9a", "openreview": "https://openreview.net/forum?id=rL7OtNsD9a", "poster": "/media/PosterPDFs/NeurIPS%202024/93443.png?t=1733528970.4205644", "project": "", "author_site": "Dongsu Lee, Minhae Kwon", "tldr": "", "abstract": "Understanding cognitive processes in multi-agent interactions is a primary goal in cognitive science. It can guide the direction of artificial intelligence (AI) research toward social decision-making in multi-agent systems, which includes uncertainty from character heterogeneity. In this paper, we introduce *episodic future thinking (EFT) mechanism* for a reinforcement learning (RL) agent, inspired by the cognitive processes observed in animals. To enable future thinking functionality, we first develop a *multi-character policy* that captures diverse characters with an ensemble of heterogeneous policies. The *character* of an agent is defined as a different weight combination on reward components, representing distinct behavioral preferences. The future thinking agent collects observation-action trajectories of the target agents and leverages the pre-trained multi-character policy to infer their characters. Once the character is inferred, the agent predicts the upcoming actions of target agents and simulates the potential future scenario. This capability allows the agent to adaptively select the optimal action, considering the predicted future scenario in multi-agent scenarios. To evaluate the proposed mechanism, we consider the multi-agent autonomous driving scenario in which autonomous vehicles with different driving traits are on the road. Simulation results demonstrate that the EFT mechanism with accurate character inference leads to a higher reward than existing multi-agent solutions. We also confirm that the effect of reward improvement remains valid across societies with different levels of character diversity.", "keywords": "Reinforcement learning;Episodic future thinking;multi-agent reinforcement learning", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Dongsu Lee;Minhae Kwon", "authorids": "~Dongsu_Lee1;~Minhae_Kwon1", "gender": "M;F", "homepage": "https://dongsuleetech.github.io/;https://bmil.ssu.ac.kr", "dblp": ";119/8973", "google_scholar": "v0hNlSUAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";", "linkedin": ";", "or_profile": "~Dongsu_Lee1;~Minhae_Kwon1", "aff": "Soongsil University;Soongsil University", "aff_domain": "soongsil.ac.kr;ssu.ac.kr", "position": "MS student;Associate Professor", "bibtex": "@inproceedings{\nlee2024episodic,\ntitle={Episodic Future Thinking Mechanism for Multi-agent Reinforcement Learning},\nauthor={Dongsu Lee and Minhae Kwon},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=rL7OtNsD9a}\n}", "github": "", "reviewers": "6v5k;ukHS;Cur7;pTth;RgnJ", "pdf_size": 1319036, "rating": "5;6;6;6;6", "confidence": "2;3;4;4;4", "soundness": "2;2;3;3;2", "novelty": "2;3;3;2;2", "presentation": "3;3;3;3;3", "wc_summary": "62;67;81;67;90", "wc_strengths": "62;58;87;76;91", "wc_weaknesses": "42;70;123;272;178", "wc_questions": "1;19;148;90;48", "wc_limitations": "1;48;166;7;8", "wc_review": "168;262;605;512;415", "wc_reply_reviewers": "0;30;258;30;132", "wc_reply_authors": "43;223;745;433;1059", "reply_reviewers": "0;1;2;1;2", "reply_authors": "2;3;4;3;4", "rating_avg": [ 5.8, 0.39999999999999997 ], "confidence_avg": [ 3.4, 0.8 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 73.4, 10.44222198576529 ], "wc_strengths_avg": [ 74.8, 13.105723940324701 ], "wc_weaknesses_avg": [ 137.0, 81.99512180611723 ], "wc_questions_avg": [ 61.2, 52.80681774165151 ], "wc_limitations_avg": [ 46.0, 62.28001284521383 ], "wc_review_avg": [ 392.4, 159.607769234458 ], "wc_reply_reviewers_avg": [ 90.0, 95.1714242827121 ], "wc_reply_authors_avg": [ 500.6, 364.02505408281996 ], "reply_reviewers_avg": [ 1.2, 0.7483314773547883 ], "reply_authors_avg": [ 3.2, 0.7483314773547882 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.8750000000000001, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Du8yfehGQsYJ:scholar.google.com/&scioq=Episodic+Future+Thinking+Mechanism+for+Multi-agent+Reinforcement+Learning&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "soongsil.ac.kr;ssu.ac.kr", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Soongsil University", "aff_unique_dep": "", "aff_unique_url": "https://www.soongsil.ac.kr", "aff_unique_abbr": "SSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "title": "GenWarp: Single Image to Novel Views with Semantic-Preserving Generative Warping", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93442", "id": "rLJisJmMKw", "proceeding": "", "pdf": "https://openreview.net/pdf?id=rLJisJmMKw", "openreview": "https://openreview.net/forum?id=rLJisJmMKw", "poster": "/media/PosterPDFs/NeurIPS%202024/93442.png?t=1733717923.1387255", "project": "", "author_site": "Junyoung Seo, Kazumi Fukuda, Takashi Shibuya, Takuya Narihira, Naoki Murata, Shoukang Hu, Chieh-Hsin Lai, Seungryong Kim, Yuki Mitsufuji", "tldr": "", "abstract": "Generating novel views from a single image remains a challenging task due to the complexity of 3D scenes and the limited diversity in the existing multi-view datasets to train a model on. Recent research combining large-scale text-to-image (T2I) models with monocular depth estimation (MDE) has shown promise in handling in-the-wild images. In these methods, an input view is geometrically warped to novel views with estimated depth maps, then the warped image is inpainted by T2I models. However, they struggle with noisy depth maps and loss of semantic details when warping an input view to novel viewpoints. In this paper, we propose a novel approach for single-shot novel view synthesis, a semantic-preserving generative warping framework that enables T2I generative models to learn where to warp and where to generate, through augmenting cross-view attention with self-attention. Our approach addresses the limitations of existing methods by conditioning the generative model on source view images and incorporating geometric warping signals. Qualitative and quantitative evaluations demonstrate that our model outperforms existing methods in both in-domain and out-of-domain scenarios. Project page is available at https://GenWarp-NVS.github.io.", "keywords": "Single-shot Novel View Generation;Generative Models;Novel View Generation;Diffusion Models", "primary_area": "generative_models", "supplementary_material": "", "author": "Junyoung Seo;Kazumi Fukuda;Takashi Shibuya;Takuya Narihira;Naoki Murata;Shoukang Hu;Chieh-Hsin Lai;Seungryong Kim;Yuki Mitsufuji", "authorids": "~Junyoung_Seo1;~Kazumi_Fukuda1;~Takashi_Shibuya1;~Takuya_Narihira2;~Naoki_Murata1;~Shoukang_Hu1;~Chieh-Hsin_Lai2;~Seungryong_Kim1;~Yuki_Mitsufuji1", "gender": "M;M;M;M;M;M;M;M;M", "homepage": "https://j0seo.github.io;https://ai.sony/;;;;https://skhu101.github.io/;https://chiehhsinjesselai.github.io/;https://cvlab.korea.ac.kr/members/faculty;https://www.yukimitsufuji.com/", "dblp": "209/9340;;23/6390-1;99/9125;166/6626;226/1865;239/4021;141/9955;136/5043", "google_scholar": "orJRvmEAAAAJ;;XCRO260AAAAJ;6tvxhK0AAAAJ;https://scholar.google.co.jp/citations?user=oyuTmwoAAAAJ;9cUPotAAAAAJ;KDnKGu8AAAAJ;cIK1hS8AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0002-4277-0164;;0000-0001-7418-5173;;;;0000-0002-6806-6140", "linkedin": ";;takashi-shibuya-7596a439/;;;;;;mittu1204", "or_profile": "~Junyoung_Seo1;~Kazumi_Fukuda1;~Takashi_Shibuya1;~Takuya_Narihira2;~Naoki_Murata1;~Shoukang_Hu1;~Chieh-Hsin_Lai2;~Seungryong_Kim1;~Yuki_Mitsufuji1", "aff": "Korea University;Sony AI;Sony AI;Sony AI;Sony Group Corporation;Nanyang Technological University;Sony AI;Korea University;Tokyo Institute of Technology, Tokyo Institute of Technology", "aff_domain": "korea.ac.kr;sony.com;sony.com;sony.com;sony.com;ntu.edu;sony.com;korea.ac.kr;titech.ac.jp", "position": "PhD student;Researcher;Staff AI Engineer;Senior Manager;Researcher;Postdoc;Researcher;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nseo2024genwarp,\ntitle={GenWarp: Single Image to Novel Views with Semantic-Preserving Generative Warping},\nauthor={Junyoung Seo and Kazumi Fukuda and Takashi Shibuya and Takuya Narihira and Naoki Murata and Shoukang Hu and Chieh-Hsin Lai and Seungryong Kim and Yuki Mitsufuji},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=rLJisJmMKw}\n}", "github": "", "reviewers": "3Fuk;Cjbv;vdto;2Nrv", "pdf_size": 11833584, "rating": "4;6;6;6", "confidence": "3;3;4;3", "soundness": "3;3;3;4", "novelty": "3;3;3;3", "presentation": "2;4;3;4", "wc_summary": "72;122;72;72", "wc_strengths": "56;29;70;61", "wc_weaknesses": "382;197;139;37", "wc_questions": "6;59;5;1", "wc_limitations": "1;19;11;25", "wc_review": "517;426;297;196", "wc_reply_reviewers": "0;80;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 84.5, 21.650635094610966 ], "wc_strengths_avg": [ 54.0, 15.280706789936126 ], "wc_weaknesses_avg": [ 188.75, 125.41605758434604 ], "wc_questions_avg": [ 17.75, 23.889066536807167 ], "wc_limitations_avg": [ 14.0, 9.0 ], "wc_review_avg": [ 359.0, 122.3376475170256 ], "wc_reply_reviewers_avg": [ 20.0, 34.64101615137755 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16995827043667419520&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "korea.ac.kr;sony.com;sony.com;sony.com;sony.com;ntu.edu;sony.com;korea.ac.kr;titech.ac.jp", "author_num": 9, "aff_unique_index": "0;1;1;1;2;3;1;0;4", "aff_unique_norm": "Korea University;Sony;Sony Group Corporation;Nanyang Technological University;Tokyo Institute of Technology", "aff_unique_dep": ";Sony AI;;;", "aff_unique_url": "https://www.korea.ac.kr;https://www.sony.com;https://www.sony.com;https://www.ntu.edu.sg;https://www.titech.ac.jp", "aff_unique_abbr": "KU;Sony AI;Sony;NTU;Titech", "aff_campus_unique_index": "1", "aff_campus_unique": ";Tokyo", "aff_country_unique_index": "0;1;1;1;1;2;1;0;1", "aff_country_unique": "South Korea;Japan;Singapore" }, { "title": "Activating Self-Attention for Multi-Scene Absolute Pose Regression", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93441", "id": "rM24UUgZg8", "proceeding": "", "pdf": "https://openreview.net/pdf?id=rM24UUgZg8", "openreview": "https://openreview.net/forum?id=rM24UUgZg8", "poster": "/media/PosterPDFs/NeurIPS%202024/93441.png?t=1731329487.0573764", "project": "", "author_site": "Miso Lee, Jihwan Kim, Jae-Pil Heo", "tldr": "", "abstract": "Multi-scene absolute pose regression addresses the demand for fast and memory-efficient camera pose estimation across various real-world environments. Nowadays, transformer-based model has been devised to regress the camera pose directly in multi-scenes. Despite its potential, transformer encoders are underutilized due to the collapsed self-attention map, having low representation capacity. This work highlights the problem and investigates it from a new perspective: distortion of query-key embedding space. Based on the statistical analysis, we reveal that queries and keys are mapped in completely different spaces while only a few keys are blended into the query region. This leads to the collapse of the self-attention map as all queries are considered similar to those few keys. Therefore, we propose simple but effective solutions to activate self-attention. Concretely, we present an auxiliary loss that aligns queries and keys, preventing the distortion of query-key space and encouraging the model to find global relations by self-attention. In addition, the fixed sinusoidal positional encoding is adopted instead of undertrained learnable one to reflect appropriate positional clues into the inputs of self-attention. As a result, our approach resolves the aforementioned problem effectively, thus outperforming existing methods in both outdoor and indoor scenes.", "keywords": "Multi-Scene Absolute Pose Regression;Transformer;Attention Collapse", "primary_area": "machine_vision", "supplementary_material": "/attachment/4d1b452d98fbb473862b63c0f95f9034b0d160ff.zip", "author": "Miso Lee;Jihwan Kim;Jae-Pil Heo", "authorids": "~Miso_Lee1;~Jihwan_Kim1;~Jae-Pil_Heo3", "gender": "F;M;M", "homepage": "https://leemiso.notion.site/Miso-Lee-3299f23affdb44ff8e950239496d2727;;", "dblp": "274/1052;;17/7557", "google_scholar": "https://scholar.google.co.kr/citations?user=uSny0V0AAAAJ;NAr1iIYAAAAJ;VXyJ_ssAAAAJ", "orcid": ";0000-0001-7231-383X;", "linkedin": ";damien1224;", "or_profile": "~Miso_Lee1;~Jihwan_Kim1;~Jae-pil_Heo1", "aff": "Sungkyunkwan University;Sungkyunkwan University;Sungkyunkwan University", "aff_domain": "skku.edu;skku.edu;skku.edu", "position": "PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nlee2024activating,\ntitle={Activating Self-Attention for Multi-Scene Absolute Pose Regression},\nauthor={Miso Lee and Jihwan Kim and Jae-Pil Heo},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=rM24UUgZg8}\n}", "github": "", "reviewers": "FKQN;AzjT;K2gT;aJYL", "pdf_size": 4387986, "rating": "4;5;5;7", "confidence": "4;4;5;5", "soundness": "2;2;2;4", "novelty": "2;2;2;3", "presentation": "3;3;3;4", "wc_summary": "61;66;45;114", "wc_strengths": "48;110;41;229", "wc_weaknesses": "165;150;140;47", "wc_questions": "55;4;36;17", "wc_limitations": "7;1;23;1", "wc_review": "336;331;285;408", "wc_reply_reviewers": "0;16;53;0", "wc_reply_authors": "64;64;62;0", "reply_reviewers": "0;1;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 5.25, 1.0897247358851685 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 71.5, 25.734218464915543 ], "wc_strengths_avg": [ 107.0, 75.3823586789376 ], "wc_weaknesses_avg": [ 125.5, 46.18711941656461 ], "wc_questions_avg": [ 28.0, 19.300259065618782 ], "wc_limitations_avg": [ 8.0, 9.0 ], "wc_review_avg": [ 340.0, 44.005681451376255 ], "wc_reply_reviewers_avg": [ 17.25, 21.649191670822262 ], "wc_reply_authors_avg": [ 47.5, 27.436289836637897 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.6882472016116854, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:yQkmlyDoi2gJ:scholar.google.com/&scioq=Activating+Self-Attention+for+Multi-Scene+Absolute+Pose+Regression&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "skku.edu;skku.edu;skku.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Sungkyunkwan University", "aff_unique_dep": "", "aff_unique_url": "https://www.skku.edu", "aff_unique_abbr": "SKKU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "Semidefinite Relaxations of the Gromov-Wasserstein Distance", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93440", "id": "rM3FFH1mqk", "proceeding": "", "pdf": "https://openreview.net/pdf?id=rM3FFH1mqk", "openreview": "https://openreview.net/forum?id=rM3FFH1mqk", "poster": "", "project": "", "author_site": "Junyu Chen, Binh T. Nguyen, Shang Koh, Yong Sheng Soh", "tldr": "", "abstract": "The Gromov-Wasserstein (GW) distance is an extension of the optimal transport problem that allows one to match objects between incomparable spaces. At its core, the GW distance is specified as the solution of a non-convex quadratic program and is not known to be tractable to solve. In particular, existing solvers for the GW distance are only able to find locally optimal solutions. In this work, we propose a semi-definite programming (SDP) relaxation of the GW distance. The relaxation can be viewed as the Lagrangian dual of the GW distance augmented with constraints that relate to the linear and quadratic terms of transportation plans. In particular, our relaxation provides a tractable (polynomial-time) algorithm to compute globally optimal transportation plans (in some instances) together with an accompanying proof of global optimality. Our numerical experiments suggest that the proposed relaxation is strong in that it frequently computes the globally optimal solution. Our Python implementation is available at https://github.com/tbng/gwsdp.", "keywords": "optimal transport;gromov-wasserstein;semidefinite programming;optimization", "primary_area": "optimization", "supplementary_material": "", "author": "Junyu Chen;Binh Nguyen;Shang Hui Koh;Yong Sheng Soh", "authorids": "~Junyu_Chen5;~Binh_Nguyen2;~Shang_Hui_Koh1;~Yong_Sheng_Soh1", "gender": "M;M;;", "homepage": "https://blog.nus.edu.sg/chenjunyu/;https://tbng.github.io/;;https://yssoh.github.io/", "dblp": ";241/2542;;123/9574.html", "google_scholar": "bDP1YOIAAAAJ;6rpHj_YAAAAJ;;OPntcXsAAAAJ", "orcid": ";;0000-0002-1237-9025;0000-0003-3367-1401", "linkedin": ";;;", "or_profile": "~Junyu_Chen5;~Binh_Nguyen2;~Shang_Hui_Koh1;~Yong_Sheng_Soh1", "aff": "National University of Singapore;National University of Singapore;National University of Singapore;National University of Singapore", "aff_domain": "nus.edu;nus.edu.sg;u.nus.edu;nus.edu.sg", "position": "PhD student;Research Fellow;Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\nchen2024semidefinite,\ntitle={Semidefinite Relaxations of the Gromov-Wasserstein Distance},\nauthor={Junyu Chen and Binh Nguyen and Shang Hui Koh and Yong Sheng Soh},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=rM3FFH1mqk}\n}", "github": "", "reviewers": "fpsx;Xfoj;3Vyi;nsV6", "pdf_size": 755170, "rating": "3;6;6;6", "confidence": "4;4;4;4", "soundness": "2;3;3;4", "novelty": "1;2;3;3", "presentation": "3;3;3;3", "wc_summary": "70;99;111;19", "wc_strengths": "37;229;115;41", "wc_weaknesses": "210;74;248;56", "wc_questions": "89;6;228;43", "wc_limitations": "48;1;22;5", "wc_review": "454;409;724;164", "wc_reply_reviewers": "357;531;49;63", "wc_reply_authors": "1025;453;12;190", "reply_reviewers": "2;1;1;1", "reply_authors": "4;2;2;2", "rating_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 74.75, 35.470938809115275 ], "wc_strengths_avg": [ 105.5, 77.77371021109897 ], "wc_weaknesses_avg": [ 147.0, 83.33666660000267 ], "wc_questions_avg": [ 91.5, 84.11450528892148 ], "wc_limitations_avg": [ 19.0, 18.506755523321747 ], "wc_review_avg": [ 437.75, 198.72641369480806 ], "wc_reply_reviewers_avg": [ 250.0, 203.58045092788257 ], "wc_reply_authors_avg": [ 420.0, 382.90925818005496 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4422907538494019687&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "nus.edu;nus.edu.sg;u.nus.edu;nus.edu.sg", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "National University of Singapore", "aff_unique_dep": "", "aff_unique_url": "https://www.nus.edu.sg", "aff_unique_abbr": "NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Singapore" }, { "title": "Regret Minimization in Stackelberg Games with Side Information", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93439", "id": "rPKCrzdqJx", "proceeding": "", "pdf": "https://openreview.net/pdf?id=rPKCrzdqJx", "openreview": "https://openreview.net/forum?id=rPKCrzdqJx", "poster": "", "project": "", "author_site": "Keegan Harris, Steven Wu, Maria-Florina Balcan", "tldr": "", "abstract": "Algorithms for playing in Stackelberg games have been deployed in real-world domains including airport security, anti-poaching efforts, and cyber-crime prevention. However, these algorithms often fail to take into consideration the additional information available to each player (e.g. traffic patterns, weather conditions, network congestion), a salient feature of reality which may significantly affect both players' optimal strategies. We formalize such settings as Stackelberg games with side information, in which both players observe an external context before playing. The leader commits to a (context-dependent) strategy, and the follower best-responds to both the leader's strategy and the context. We focus on the online setting in which a sequence of followers arrive over time, and the context may change from round-to-round. In sharp contrast to the non-contextual version, we show that it is impossible for the leader to achieve good performance (measured by regret) in the full adversarial setting. Motivated by our impossibility result, we show that no-regret learning is possible in two natural relaxations: the setting in which the sequence of followers is chosen stochastically and the sequence of contexts is adversarial, and the setting in which the sequence of contexts is stochastic and the sequence of followers is chosen by an adversary.", "keywords": "Stackelberg game;context;online learning", "primary_area": "algorithmic_game_theory", "supplementary_material": "", "author": "Keegan Harris;Steven Wu;Maria Florina Balcan", "authorids": "~Keegan_Harris1;~Steven_Wu1;~Maria_Florina_Balcan1", "gender": "M;F;M", "homepage": "https://keeganharris.github.io/;http://www.cs.cmu.edu/~ninamf/;https://zstevenwu.com/", "dblp": "294/5044;b/MariaFlorinaBalcan;137/8350", "google_scholar": "TnvQIrYAAAAJ;https://scholar.google.com.tw/citations?user=LWlN_BUAAAAJ;MbF6rTEAAAAJ", "orcid": ";;", "linkedin": ";;zstevenwu/", "or_profile": "~Keegan_Harris1;~Nina_Balcan1;~Zhiwei_Steven_Wu1", "aff": "Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "cmu.edu;cmu.edu;cmu.edu", "position": "PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nharris2024regret,\ntitle={Regret Minimization in Stackelberg Games with Side Information},\nauthor={Keegan Harris and Steven Wu and Maria Florina Balcan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=rPKCrzdqJx}\n}", "github": "", "reviewers": "8oy3;zDTp;YfNJ;C9HM", "pdf_size": 926370, "rating": "5;6;6;6", "confidence": "3;4;3;4", "soundness": "3;3;3;4", "novelty": "2;2;3;3", "presentation": "3;3;3;3", "wc_summary": "102;223;77;55", "wc_strengths": "40;76;75;70", "wc_weaknesses": "66;23;48;10", "wc_questions": "22;16;208;9", "wc_limitations": "19;1;38;6", "wc_review": "249;339;446;150", "wc_reply_reviewers": "0;13;26;14", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 114.25, 64.95142415682662 ], "wc_strengths_avg": [ 65.25, 14.7542366796795 ], "wc_weaknesses_avg": [ 36.75, 21.718367802392518 ], "wc_questions_avg": [ 63.75, 83.40975662355095 ], "wc_limitations_avg": [ 16.0, 14.300349646075091 ], "wc_review_avg": [ 296.0, 109.40063985187655 ], "wc_reply_reviewers_avg": [ 13.25, 9.202581159652981 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14269596086784238874&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 4, "email": "cmu.edu;cmu.edu;cmu.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Interaction-Force Transport Gradient Flows", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93438", "id": "rPgc5brxmT", "proceeding": "", "pdf": "https://openreview.net/pdf?id=rPgc5brxmT", "openreview": "https://openreview.net/forum?id=rPgc5brxmT", "poster": "", "project": "", "author_site": "Egor Gladin, Pavel Dvurechenskii, Alexander Mielke, Jia-Jie Zhu", "tldr": "", "abstract": "This paper presents a new gradient flow dissipation geometry over non-negative and probability measures.\nThis is motivated by a principled construction that combines the unbalanced optimal transport and interaction forces modeled by reproducing kernels. Using a precise connection between the Hellinger geometry and the maximum mean discrepancy (MMD), we propose the interaction-force transport (IFT) gradient flows and its spherical variant via an infimal convolution of the Wasserstein and spherical MMD tensors. We then develop a particle-based optimization algorithm based on the JKO-splitting scheme of the mass-preserving spherical IFT gradient flows. Finally, we provide both theoretical global exponential convergence guarantees and improved empirical simulation results for applying the IFT gradient flows to the sampling task of MMD-minimization. Furthermore, we prove that the spherical IFT gradient flow enjoys the best of both worlds by providing the global exponential convergence guarantee for both the MMD and KL energy.", "keywords": "kernel methods;gradient flow;optimal transport;Wasserstein;Fisher-Rao;Hellinger;unbalanced optimal transport;partial differential equation;optimization;calculus of variations;variational inference;MCMC;MMD", "primary_area": "probabilistic_methods", "supplementary_material": "/attachment/8db33b69455e12ccaa01f7ce23564c12cc0b382c.zip", "author": "Egor Gladin;Pavel Dvurechensky;Alexander Mielke;Jia-Jie Zhu", "authorids": "~Egor_Gladin1;~Pavel_Dvurechensky1;~Alexander_Mielke1;~Jia-Jie_Zhu1", "gender": "M;;M;", "homepage": ";http://wias-berlin.de/people/dvureche/?lang=1;https://www.wias-berlin.de/people/mielke/;", "dblp": "307/3908;164/7242;;", "google_scholar": "0kQHDOcAAAAJ;28MSou8AAAAJ;;", "orcid": "0000-0002-9086-996X;0000-0003-1201-2343;;", "linkedin": ";;;", "or_profile": "~Egor_Gladin1;~Pavel_Dvurechensky1;~Alexander_Mielke1;~Jia-Jie_Zhu1", "aff": "Weierstrass Institute for Applied Analysis and Stochastics;Weierstrass Institute for Applied Analysis and Stochastics;Weierstrass Institute for Applied Analysis and Stochastics;", "aff_domain": "wias-berlin.de;wias-berlin.de;wias-berlin.de;", "position": "Researcher;Postdoc;Full Professor;", "bibtex": "@inproceedings{\ngladin2024interactionforce,\ntitle={Interaction-Force Transport Gradient Flows},\nauthor={Egor Gladin and Pavel Dvurechensky and Alexander Mielke and Jia-Jie Zhu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=rPgc5brxmT}\n}", "github": "", "reviewers": "qhY4;2Ho7;rKJj;f71K", "pdf_size": 1747033, "rating": "4;5;6;8", "confidence": "3;2;2;2", "soundness": "3;2;3;4", "novelty": "2;2;4;4", "presentation": "2;3;4;4", "wc_summary": "58;52;68;131", "wc_strengths": "61;17;44;138", "wc_weaknesses": "651;305;42;48", "wc_questions": "111;148;38;70", "wc_limitations": "5;13;6;4", "wc_review": "886;535;198;391", "wc_reply_reviewers": "392;203;38;34", "wc_reply_authors": "925;269;96;23", "reply_reviewers": "1;1;1;1", "reply_authors": "3;3;3;2", "rating_avg": [ 5.75, 1.479019945774904 ], "confidence_avg": [ 2.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 1.0 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 77.25, 31.554516317002864 ], "wc_strengths_avg": [ 65.0, 44.972213643537714 ], "wc_weaknesses_avg": [ 261.5, 248.6790099706849 ], "wc_questions_avg": [ 91.75, 41.52333681196635 ], "wc_limitations_avg": [ 7.0, 3.5355339059327378 ], "wc_review_avg": [ 502.5, 251.63515255226167 ], "wc_reply_reviewers_avg": [ 166.75, 146.8423899968943 ], "wc_reply_authors_avg": [ 328.25, 355.9279245858633 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.6831300510639732, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15504449142325563120&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 6, "email": "wias-berlin.de;wias-berlin.de;wias-berlin.de;", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Weierstrass Institute for Applied Analysis and Stochastics", "aff_unique_dep": "", "aff_unique_url": "https://www.wias-berlin.de/", "aff_unique_abbr": "WIAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "title": "Monomial Matrix Group Equivariant Neural Functional Networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93437", "id": "rQYyWGYuzK", "proceeding": "", "pdf": "https://openreview.net/pdf?id=rQYyWGYuzK", "openreview": "https://openreview.net/forum?id=rQYyWGYuzK", "poster": "", "project": "", "author_site": "Hoang Tran, Thieu Vo, Tho Huu, An Nguyen The, Tan Nguyen", "tldr": "", "abstract": "Neural functional networks (NFNs) have recently gained significant attention due to their diverse applications, ranging from predicting network generalization and network editing to classifying implicit neural representation. Previous NFN designs often depend on permutation symmetries in neural networks' weights, which traditionally arise from the unordered arrangement of neurons in hidden layers. However, these designs do not take into account the weight scaling symmetries of $\\operatorname{ReLU}$ networks, and the weight sign flipping symmetries of $\\operatorname{sin}$ or $\\operatorname{Tanh}$ networks. In this paper, we extend the study of the group action on the network weights from the group of permutation matrices to the group of monomial matrices by incorporating scaling/sign-flipping symmetries. Particularly, we encode these scaling/sign-flipping symmetries by designing our corresponding equivariant and invariant layers. We name our new family of NFNs the Monomial Matrix Group Equivariant Neural Functional Networks (Monomial-NFN). Because of the expansion of the symmetries, Monomial-NFN has much fewer independent trainable parameters compared to the baseline NFNs in the literature, thus enhancing the model's efficiency. Moreover, for fully connected and convolutional neural networks, we theoretically prove that all groups that leave these networks invariant while acting on their weight spaces are some subgroups of the monomial matrix group. We provide empirical evidences to demonstrate the advantages of our model over existing baselines, achieving competitive performance and efficiency. The code is publicly available at https://github.com/MathematicalAI-NUS/Monomial-NFN.", "keywords": "neural functional networks;equivariant networks;monomial matrices;symmetry", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/6ed88010e6cf6cc8e9dc09993366a200e281e01f.zip", "author": "Hoang V. Tran;Thieu Vo;Tho Tran Huu;An Nguyen The;Tan Minh Nguyen", "authorids": "~Hoang_V._Tran1;~Thieu_Vo1;~Tho_Tran_Huu1;~An_Nguyen_The2;~Tan_Minh_Nguyen1", "gender": "M;M;M;M;M", "homepage": ";https://sites.google.com/tdtu.edu.vn/vongocthieu;;https://sg-nta.github.io/;https://tanmnguyen89.github.io/", "dblp": "154/5761;;337/2038;;255/4725", "google_scholar": "HXsV5dQAAAAJ;CM2qJSoAAAAJ;fG3mIYEAAAAJ;h0qziUcAAAAJ;OizOh88AAAAJ", "orcid": "0009-0004-9735-0361;;;;", "linkedin": ";;;an-nguyen-the-130515170/;", "or_profile": "~Hoang_V._Tran1;~Thieu_Vo1;~Tho_Tran_Huu1;~An_Nguyen_The2;~Tan_Minh_Nguyen1", "aff": "National University of Singapore;Ton Duc Thang University;National University of Singapore;Hanoi University of Science and Technology;National University of Singapore", "aff_domain": "u.nus.edu;tdtu.edu.vn;u.nus.edu;hust.edu.vn;nus.edu.sg", "position": "PhD student;Lecturer;PhD student;Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\ntran2024monomial,\ntitle={Monomial Matrix Group Equivariant Neural Functional Networks},\nauthor={Hoang V. Tran and Thieu Vo and Tho Tran Huu and An Nguyen The and Tan Minh Nguyen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=rQYyWGYuzK}\n}", "github": "", "reviewers": "xQCm;RAv9;yJwm;MDVG", "pdf_size": 4157419, "rating": "5;5;7;7", "confidence": "4;5;5;3", "soundness": "2;3;4;4", "novelty": "2;3;3;3", "presentation": "3;3;4;3", "wc_summary": "153;56;252;193", "wc_strengths": "52;17;264;74", "wc_weaknesses": "290;127;526;65", "wc_questions": "27;195;185;44", "wc_limitations": "1;14;50;17", "wc_review": "523;409;1277;393", "wc_reply_reviewers": "89;0;172;22", "wc_reply_authors": "81;180;99;66", "reply_reviewers": "1;0;1;1", "reply_authors": "3;3;3;3", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 163.5, 71.36000280269053 ], "wc_strengths_avg": [ 101.75, 95.85503377496667 ], "wc_weaknesses_avg": [ 252.0, 178.26525180191456 ], "wc_questions_avg": [ 112.75, 77.56408640601654 ], "wc_limitations_avg": [ 20.5, 18.062391868188442 ], "wc_review_avg": [ 650.5, 365.1667427354249 ], "wc_reply_reviewers_avg": [ 70.75, 67.02005296924197 ], "wc_reply_authors_avg": [ 106.5, 44.01420225336363 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.0, 0.0 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=286546919453524875&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 4, "email": "u.nus.edu;tdtu.edu.vn;u.nus.edu;hust.edu.vn;nus.edu.sg", "author_num": 5, "aff_unique_index": "0;1;0;2;0", "aff_unique_norm": "National University of Singapore;Ton Duc Thang University;Hanoi University of Science and Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.nus.edu.sg;https://www.tdtu.edu.vn;https://www.hust.edu.vn", "aff_unique_abbr": "NUS;TDTU;HUST", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hanoi", "aff_country_unique_index": "0;1;0;1;0", "aff_country_unique": "Singapore;Vietnam" }, { "id": "rSSpEmrN0C", "title": "A Bounding Box is Worth One Token: Interleaving Layout and Text in a Large Language Model for Document Understanding", "track": "main", "status": "Reject", "tldr": "", "abstract": "Recently, many studies have demonstrated that exclusively incorporating OCR-derived text and spatial layouts with large language models (LLMs) can be highly effective for document understanding tasks. However, existing methods that integrate spatial layouts with text have limitations, such as producing overly long text sequences or failing to fully leverage the autoregressive traits of LLMs. In this work, we introduce nterleaving Layout and Text in a Large Language Model (LayTextLLM)} for document understanding. In particular, LayTextLLM projects each bounding box to a single embedding and interleaves it with text, efficiently avoiding long sequence issues while leveraging autoregressive traits of LLMs. LayTextLLM not only streamlines the interaction of layout and textual data but also shows enhanced performance in Key Information Extraction (KIE) and Visual Question Answering (VQA). Comprehensive benchmark evaluations reveal significant improvements, with a 27.0\\% increase on KIE tasks and 24.1\\% on VQA tasks compared to previous state-of-the-art document understanding MLLMs, as well as a 15.5\\% improvement over other SOTA OCR-based LLMs on KIE tasks.", "keywords": "LLM;DocAI;Visually Rich Document Understanding;KIE", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Jinghui Lu;Haiyang Yu;Yanjie Wang;Yongjie Ye;Jingqun Tang;Ziwei Yang;Binghong Wu;Qi Liu;Hao Feng;Han Wang;Hao Liu;Can Huang", "authorids": "~Jinghui_Lu2;~Haiyang_Yu5;~Yanjie_Wang2;~Yongjie_Ye1;~Jingqun_Tang1;~Ziwei_Yang4;~Binghong_Wu1;~Qi_Liu25;~Hao_Feng4;~Han_Wang18;~Hao_Liu15;~Can_Huang1", "gender": "M;M;;M;M;M;M;M;M;M;M;M", "homepage": "https://scholar.google.com/citations?user=UdARxSMAAAAJ&hl=zh-CN;https://github.com/princewang1994;;http://yangziwei.cn;;;https://fh2019ustc.github.io/;https://github.com/Hon-Wong;;;https://georgeluimmortal.github.io/;https://github.com/sty-yyj/sty-yyj.github.io", "dblp": "90/6643-4;;317/5539;;236/1328;;46/4184-9;;09/3214-3;;14/983;", "google_scholar": "UdARxSMAAAAJ;;OxQXSioAAAAJ;;66ry4nMAAAAJ;5k41vLIAAAAJ;aB8DspEAAAAJ;;wFOk3PQAAAAJ;;ZzK_UdYAAAAJ;", "orcid": "0000-0003-2747-7338;;;;;0000-0001-6181-8824;0000-0001-8127-6639;;;0009-0006-9126-3069;0000-0001-7149-6961;", "linkedin": ";;;;%E7%A7%89%E6%B3%93-%E6%AD%A6-211300134/;;;;;https://www.linkedin.cn/incareer/in/can-huang-3175b256;jinghui-lu-2712aa105;", "or_profile": "~Haiyang_Yu5;~Yanjie_Wang2;~Jingqun_Tang1;~Ziwei_Yang4;~Binghong_Wu1;~Qi_Liu25;~Hao_Feng4;~Han_Wang18;~Hao_Liu15;~Can_Huang1;~JINGHUI_LU1;~Ye_yongjie1", "aff": "Fudan University;ByteDance Inc;Bytedance;;Bytedance;Bytedance Inc.;University of Science and Technology of China;ByteDance Inc.;Bytedance;Bytedance;ByteDance Inc.;ByteDance Inc.", "aff_domain": "fudan.edu.cn;bytedance.com;bytedance.com;;bytedance.com;bytedance.com;ustc.edu;bytedance.com;bytedance.com;bytedance.com;bytedance.com;bytedance.com", "position": "PhD student;Researcher;Researcher;;Researcher;Researcher;PhD student;Researcher;Researcher;Researcher;Researcher;Researcher", "bibtex": "@misc{\nanonymous2024a,\ntitle={A Bounding Box is Worth One Token: Interleaving Layout and Text in a Large Language Model for Document Understanding},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=rSSpEmrN0C}\n}", "github": "", "project": "", "reviewers": "Nepi;uyfs;tEb5;WMks", "site": "https://openreview.net/forum?id=rSSpEmrN0C", "pdf_size": 1789091, "rating": "4;5;6;7", "confidence": "4;4;3;4", "soundness": "3;2;2;3", "novelty": "2;1;3;4", "presentation": "3;3;3;3", "wc_summary": "67;70;58;76", "wc_strengths": "69;52;84;63", "wc_weaknesses": "144;93;302;43", "wc_questions": "17;54;231;30", "wc_limitations": "27;48;47;4", "wc_review": "324;317;722;216", "wc_reply_reviewers": "0;18;51;63", "wc_reply_authors": "176;125;54;39", "reply_reviewers": "0;1;1;1", "reply_authors": "3;3;2;2", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 1.118033988749895 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 67.75, 6.49519052838329 ], "wc_strengths_avg": [ 67.0, 11.554220008291344 ], "wc_weaknesses_avg": [ 145.5, 97.15580270884493 ], "wc_questions_avg": [ 83.0, 86.47253899360189 ], "wc_limitations_avg": [ 31.5, 17.95132307101624 ], "wc_review_avg": [ 394.75, 193.71031851710947 ], "wc_reply_reviewers_avg": [ 33.0, 25.18928343562 ], "wc_reply_authors_avg": [ 98.5, 55.29240454167281 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": -0.2581988897471611, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11634016876395038369&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;1;1;2;3;1;1;1;1;1", "aff_unique_norm": "Fudan University;ByteDance;Bytedance Inc.;University of Science and Technology of China", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.fudan.edu.cn;https://www.bytedance.com;https://www.bytedance.com;http://www.ustc.edu.cn", "aff_unique_abbr": "Fudan;ByteDance;Bytedance;USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Learning from Highly Sparse Spatio-temporal Data", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93436", "id": "rTONicCCJm", "proceeding": "", "pdf": "https://openreview.net/pdf?id=rTONicCCJm", "openreview": "https://openreview.net/forum?id=rTONicCCJm", "poster": "/media/PosterPDFs/NeurIPS%202024/93436.png?t=1731665944.7158597", "project": "", "author_site": "Leyan Deng, Chenwang Wu, Defu Lian, Enhong Chen", "tldr": "", "abstract": "Incomplete spatio-temporal data in real-world has spawned many research.\nHowever, existing methods often utilize iterative message-passing across temporal and spatial dimensions, resulting in substantial information loss and high computational cost.\nWe provide a theoretical analysis revealing that such iterative models are not only susceptible to data sparsity but also to graph sparsity, causing unstable performances on different datasets.\nTo overcome these limitations, we introduce a novel method named One-step Propagation and Confidence-based Refinement (OPCR).\nIn the first stage, OPCR leverages inherent spatial and temporal relationships by employing sparse attention mechanism.\nThese modules propagate limited observations directly to the global context through one-step imputation, which are theoretically effected only by data sparsity.\nFollowing this, we assign confidence levels to the initial imputations by correlating missing data with valid data.\nThis confidence-based propagation refines the seperate spatial and temporal imputation results through spatio-temporal dependencies.\nWe evaluate the proposed model across various downstream tasks involving highly sparse spatio-temporal data.\nEmpirical results indicate that our model outperforms state-of-the-art imputation methods, demonstrating its superior effectiveness and robustness.", "keywords": "spatio-temporal data mining; incomplete data imputation", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Leyan Deng;Chenwang Wu;Defu Lian;Enhong Chen", "authorids": "~Leyan_Deng1;~Chenwang_Wu1;~Defu_Lian1;~Enhong_Chen1", "gender": ";;M;M", "homepage": ";https://daftstone.github.io/;https://faculty.ustc.edu.cn/liandefu/en/index.htm;http://staff.ustc.edu.cn/~cheneh", "dblp": ";235/0588;87/10734;07/258", "google_scholar": "Nu4w3i0AAAAJ;https://scholar.google.com.hk/citations?user=cYg5xjMAAAAJ;QW0ad4sAAAAJ;Q9h02J0AAAAJ", "orcid": ";;0000-0002-3507-9607;0000-0002-4835-4102", "linkedin": ";;;", "or_profile": "~Leyan_Deng1;~Chenwang_Wu1;~Defu_Lian1;~Enhong_Chen1", "aff": "University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China", "aff_domain": "ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn", "position": "PhD student;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\ndeng2024learning,\ntitle={Learning from Highly Sparse Spatio-temporal Data},\nauthor={Leyan Deng and Chenwang Wu and Defu Lian and Enhong Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=rTONicCCJm}\n}", "github": "", "reviewers": "aMi7;Ctvj;g4JR;fc8s", "pdf_size": 696840, "rating": "5;5;7;8", "confidence": "3;4;4;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "1;2;4;3", "wc_summary": "99;66;76;153", "wc_strengths": "76;41;65;125", "wc_weaknesses": "316;100;55;167", "wc_questions": "9;44;33;34", "wc_limitations": "13;1;1;1", "wc_review": "513;252;230;480", "wc_reply_reviewers": "14;33;12;0", "wc_reply_authors": "98;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;1;1;1", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 98.5, 33.66377875402582 ], "wc_strengths_avg": [ 76.75, 30.597181242722343 ], "wc_weaknesses_avg": [ 159.5, 98.75348095130622 ], "wc_questions_avg": [ 30.0, 12.864680330268607 ], "wc_limitations_avg": [ 4.0, 5.196152422706632 ], "wc_review_avg": [ 368.75, 128.5172653770691 ], "wc_reply_reviewers_avg": [ 14.75, 11.818946653572814 ], "wc_reply_authors_avg": [ 24.5, 42.4352447854375 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5555555555555555, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:JqhFjondJ0QJ:scholar.google.com/&scioq=Learning+from+Highly+Sparse+Spatio-temporal+Data&hl=en&as_sdt=0,33", "gs_version_total": 0, "email": "ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Science and Technology of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ustc.edu.cn", "aff_unique_abbr": "USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Uncertainty-based Offline Variational Bayesian Reinforcement Learning for Robustness under Diverse Data Corruptions", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93435", "id": "rTxCIWsfsD", "proceeding": "", "pdf": "https://openreview.net/pdf?id=rTxCIWsfsD", "openreview": "https://openreview.net/forum?id=rTxCIWsfsD", "poster": "", "project": "", "author_site": "Rui Yang, Jie Wang, Guoping Wu, Bin Li", "tldr": "", "abstract": "Real-world offline datasets are often subject to data corruptions (such as noise or adversarial attacks) due to sensor failures or malicious attacks. Despite advances in robust offline reinforcement learning (RL), existing methods struggle to learn robust agents under high uncertainty caused by the diverse corrupted data (i.e., corrupted states, actions, rewards, and dynamics), leading to performance degradation in clean environments. To tackle this problem, we propose a novel robust variational Bayesian inference for offline RL (TRACER). It introduces Bayesian inference for the first time to capture the uncertainty via offline data for robustness against all types of data corruptions. Specifically, TRACER first models all corruptions as the uncertainty in the action-value function. Then, to capture such uncertainty, it uses all offline data as the observations to approximate the posterior distribution of the action-value function under a Bayesian inference framework. An appealing feature of TRACER is that it can distinguish corrupted data from clean data using an entropy-based uncertainty measure, since corrupted data often induces higher uncertainty and entropy. Based on the aforementioned measure, TRACER can regulate the loss associated with corrupted data to reduce its influence, thereby enhancing robustness and performance in clean environments. Experiments demonstrate that TRACER significantly outperforms several state-of-the-art approaches across both individual and simultaneous data corruptions.", "keywords": "Robust Offline Reinforcement Learning;Variational Bayesian Inference;Diverse Data Corruptions;Uncertainty", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Rui Yang;Jie Wang;Guoping Wu;Bin Li", "authorids": "~Rui_Yang9;~Jie_Wang1;~Guoping_Wu1;~Bin_Li8", "gender": "M;M;M;M", "homepage": "http://staff.ustc.edu.cn/~jwangx;https://github.com/lamperouge12;http://staff.ustc.edu.cn/~binli;https://www.researchgate.net/profile/Rui_Yang161", "dblp": "29/5259-5;;89/6764-25;", "google_scholar": "OugG4dUAAAAJ;;;8cwrNo0AAAAJ", "orcid": ";;0000-0002-2332-3959;0009-0004-5137-9302", "linkedin": ";;;", "or_profile": "~Jie_Wang1;~Guoping_Wu1;~Bin_Li8;~Yang_Rui1", "aff": "University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China", "aff_domain": "ustc.edu.cn;mail.ustc.edu.cn;ustc.edu.cn;ustc.edu.cn", "position": "Full Professor;MS student;Full Professor;PhD student", "bibtex": "@inproceedings{\nyang2024uncertaintybased,\ntitle={Uncertainty-based Offline Variational Bayesian Reinforcement Learning for Robustness under Diverse Data Corruptions},\nauthor={Rui Yang and Jie Wang and Guoping Wu and Bin Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=rTxCIWsfsD}\n}", "github": "", "reviewers": "wmJN;QZTE;V2Jx;vWB5", "pdf_size": 1089653, "rating": "6;6;6;7", "confidence": "3;4;4;3", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "2;2;3;3", "wc_summary": "109;97;43;54", "wc_strengths": "28;91;16;20", "wc_weaknesses": "168;223;72;93", "wc_questions": "82;200;66;72", "wc_limitations": "18;46;7;11", "wc_review": "405;657;204;250", "wc_reply_reviewers": "36;167;19;10", "wc_reply_authors": "47;950;49;43", "reply_reviewers": "1;2;1;1", "reply_authors": "2;5;2;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 75.75, 27.851166941440713 ], "wc_strengths_avg": [ 38.75, 30.474374480865066 ], "wc_weaknesses_avg": [ 139.0, 60.212125024782175 ], "wc_questions_avg": [ 105.0, 55.14526271584895 ], "wc_limitations_avg": [ 20.5, 15.239750654128171 ], "wc_review_avg": [ 379.0, 176.9364292620375 ], "wc_reply_reviewers_avg": [ 58.0, 63.619965419669946 ], "wc_reply_authors_avg": [ 272.25, 391.30510794008296 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6267045644444317259&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 3, "email": "ustc.edu.cn;mail.ustc.edu.cn;ustc.edu.cn;ustc.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Science and Technology of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ustc.edu.cn", "aff_unique_abbr": "USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Multi-turn Reinforcement Learning with Preference Human Feedback", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93434", "id": "rVSc3HIZS4", "proceeding": "", "pdf": "https://openreview.net/pdf?id=rVSc3HIZS4", "openreview": "https://openreview.net/forum?id=rVSc3HIZS4", "poster": "/media/PosterPDFs/NeurIPS%202024/93434.png?t=1733230427.9026546", "project": "", "author_site": "Lior Shani, Aviv Rosenberg, Asaf Cassel, Oran Lang, Daniele Calandriello, Avital Zipori, Hila Noga, Orgad Keller, Bilal Piot, Idan Szpektor, Avinatan Hassidim, Yossi Matias, Remi Munos", "tldr": "", "abstract": "Reinforcement Learning from Human Feedback (RLHF) has become the standard approach for aligning Large Language Models (LLMs) with human preferences, allowing LLMs to demonstrate remarkable abilities in various tasks. Existing methods work by emulating the human preference at the single decision (turn) level, limiting their capabilities in settings that require planning or multi-turn interactions to achieve a long-term goal. In this paper, we address this issue by developing novel methods for Reinforcement Learning (RL) from preference feedback between two full multi-turn conversations. In the tabular setting, we present a novel mirror-descent-based policy optimization algorithm for the general multi-turn preference-based RL problem, and prove its convergence to Nash equilibrium. To evaluate performance, we create a new environment, Education Dialogue, where a teacher agent guides a student in learning a random topic, and show that a deep RL variant of our algorithm outperforms RLHF baselines. Finally, we show that in an environment with explicit rewards, our algorithm recovers the same performance as a reward-based RL baseline, despite relying solely on a weaker preference signal.", "keywords": "Reinforcement Learning;RLHF;Human Feedback;Preferences;Optimization;LLMs;Large Language Models;Natural Language Processing;NLP;Learning Theory;RL", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Lior Shani;Aviv Rosenberg;Asaf Cassel;Oran Lang;Daniele Calandriello;Avital Zipori;Hila Noga;Orgad Keller;Bilal Piot;Idan Szpektor;Avinatan Hassidim;Yossi Matias;Remi Munos", "authorids": "~Lior_Shani2;~Aviv_Rosenberg1;~Asaf_Cassel1;~Oran_Lang1;~Daniele_Calandriello1;~Avital_Zipori1;~Hila_Noga1;~Orgad_Keller1;~Bilal_Piot1;~Idan_Szpektor1;~Avinatan_Hassidim3;~Yossi_Matias2;~Remi_Munos1", "gender": "M;M;;M;M;;;M;M;;;M;M", "homepage": ";https://sites.google.com/view/aviv-rosenberg/home;;;;;;http://orgadkeller.com;;;;https://research.google/people/YossiMatias/;http://researchers.lille.inria.fr/~munos/", "dblp": "https://dblp.uni-trier.de/pers/s/Shani:Lior;225/9369-2;;218/5554;129/1542;;;32/3363.html;;15/6513;;m/YossiMatias;69/6815", "google_scholar": "https://scholar.google.co.il/citations?user=TrQLB1gAAAAJ;https://scholar.google.co.il/citations?user=cg8_-foAAAAJ;;gypv57sAAAAJ;;;;uUHo18cAAAAJ;https://scholar.google.fr/citations?user=fqxNUREAAAAJ;XI2CP68AAAAJ;;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en", "orcid": ";;;;;;;0000-0001-8827-429X;;;;;", "linkedin": ";aviv-rosenberg-2a6222149/;;;;avitalzipori/;https://il.linkedin.com/in/hilanoga;https://linkedin.com/in/orgadk;;;;yossimatias/;", "or_profile": "~Lior_Shani2;~Aviv_Rosenberg1;~Asaf_Cassel1;~Oran_Lang1;~Daniele_Calandriello1;~Avital_Zipori1;~Hila_Noga1;~Orgad_Keller1;~Bilal_Piot1;~Idan_Szpektor1;~Avinatan_Hassidim3;~Yossi_Matias2;~Remi_Munos1", "aff": "Google Research;Google Research;;Google;Google DeepMind;Google;Google;Google;University Lille;Google;;Tel Aviv University;Google DeepMind", "aff_domain": "google.com;google.com;;google.com;deepmind.com;google.com;google.com;google.com;univ-lille1.fr;google.com;;tau.ac.il;google.com", "position": "Researcher;Researcher;;Researcher;Researcher;Researcher;Researcher;Researcher;Associate Professor;Researcher;;Faculty;Research scientist", "bibtex": "@inproceedings{\nshani2024multiturn,\ntitle={Multi-turn Reinforcement Learning with Preference Human Feedback},\nauthor={Lior Shani and Aviv Rosenberg and Asaf Cassel and Oran Lang and Daniele Calandriello and Avital Zipori and Hila Noga and Orgad Keller and Bilal Piot and Idan Szpektor and Avinatan Hassidim and Yossi Matias and Remi Munos},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=rVSc3HIZS4}\n}", "github": "", "reviewers": "TXJA;AyR4;G1Y8;rfuJ", "pdf_size": 516191, "rating": "5;5;5;6", "confidence": "4;4;3;4", "soundness": "3;2;3;3", "novelty": "3;2;2;3", "presentation": "3;1;2;4", "wc_summary": "41;60;28;125", "wc_strengths": "34;38;66;63", "wc_weaknesses": "49;450;70;211", "wc_questions": "95;4;3;174", "wc_limitations": "11;9;21;6", "wc_review": "230;561;188;579", "wc_reply_reviewers": "15;275;18;223", "wc_reply_authors": "55;1163;0;479", "reply_reviewers": "1;1;1;2", "reply_authors": "2;4;1;2", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 63.5, 37.286056375004314 ], "wc_strengths_avg": [ 50.25, 14.359230480774379 ], "wc_weaknesses_avg": [ 195.0, 159.8608770149845 ], "wc_questions_avg": [ 69.0, 71.207443431147 ], "wc_limitations_avg": [ 11.75, 5.629165124598851 ], "wc_review_avg": [ 389.5, 181.22154949122358 ], "wc_reply_reviewers_avg": [ 132.75, 117.69956457013764 ], "wc_reply_authors_avg": [ 424.25, 465.0491237493089 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 13, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7830806199095272023&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "google.com;google.com;;google.com;deepmind.com;google.com;google.com;google.com;univ-lille1.fr;google.com;;tau.ac.il;google.com", "author_num": 13, "aff_unique_index": "0;0;0;0;0;0;0;1;0;2;0", "aff_unique_norm": "Google;University of Lille;Tel Aviv University", "aff_unique_dep": "Google Research;;", "aff_unique_url": "https://research.google;https://www.univ-lille.fr;https://www.tau.ac.il", "aff_unique_abbr": "Google Research;ULille;TAU", "aff_campus_unique_index": "0;0;0;0;0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;1;0;0;0;2;0;3;1", "aff_country_unique": "United States;United Kingdom;France;Israel" }, { "title": "Everyday Object Meets Vision-and-Language Navigation Agent via Backdoor", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93433", "id": "rXGxbDJadh", "proceeding": "", "pdf": "https://openreview.net/pdf?id=rXGxbDJadh", "openreview": "https://openreview.net/forum?id=rXGxbDJadh", "poster": "/media/PosterPDFs/NeurIPS%202024/93433.png?t=1731564398.2399502", "project": "", "author_site": "Keji He, Kehan Chen, Jiawang Bai, Yan Huang, Qi Wu, Shu-Tao Xia, Liang Wang", "tldr": "", "abstract": "Vision-and-Language Navigation (VLN) requires an agent to dynamically explore environments following natural language.\nThe VLN agent, closely integrated into daily lives, poses a substantial threat to the security of privacy and property upon the occurrence of malicious behavior.\nHowever, this serious issue has long been overlooked.\nIn this paper, we pioneer the exploration of an object-aware backdoored VLN, achieved by implanting object-aware backdoors during the training phase. \nTailored to the unique VLN nature of cross-modality and continuous decision-making, we propose a novel backdoored VLN paradigm: IPR Backdoor. \nThis enables the agent to act in abnormal behavior once encountering the object triggers during language-guided navigation in unseen environments, thereby executing an attack on the target scene.\nExperiments demonstrate the effectiveness of our method in both physical and digital spaces across different VLN agents, as well as its robustness to various visual and textual variations. Additionally, our method also well ensures navigation performance in normal scenarios with remarkable stealthiness.", "keywords": "Vision-and-Language Navigation;Multimodal;Continous Decision-Making;Backdoor Attack", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Keji He;Kehan Chen;Jiawang Bai;Yan Huang;Qi Wu;Shu-Tao Xia;Liang Wang", "authorids": "~Keji_He1;~Kehan_Chen3;~Jiawang_Bai2;~Yan_Huang2;~Qi_Wu3;~Shu-Tao_Xia1;~Liang_Wang3", "gender": "M;M;M;M;M;M;M", "homepage": ";https://yanrockhuang.github.io/;http://qi-wu.me/;https://www.sigs.tsinghua.edu.cn/xst/list.htm;;https://github.com/Chenkehan21;", "dblp": "237/9675;75/6434-8;96/3446-1;03/6195;56/4499-1;;319/4518", "google_scholar": "https://scholar.google.com.hk/citations?user=sRksETcAAAAJ;6nUJrQ0AAAAJ;https://scholar.google.co.uk/citations?user=aKXe1FEAAAAJ;https://scholar.google.com.hk/citations?user=koAXTXgAAAAJ;;;RHPI-NQAAAAJ", "orcid": ";0000-0002-8239-7229;;0000-0002-8639-982X;;;0000-0001-5136-8444", "linkedin": ";;;;;;", "or_profile": "~Jiawang_Bai2;~Yan_Huang2;~Qi_Wu3;~Shu-Tao_Xia1;~Liang_Wang3;~kehan_chen1;~He_Keji2", "aff": "Tsinghua University;Institute of Automation, Chinese Academy of Sciences;The University of Adelaide;Shenzhen International Graduate School, Tsinghua University;Institute of Automation\uff0c CAS\uff0cChina;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences", "aff_domain": "tsinghua.edu.cn;ia.ac.cn;adelaide.edu.au;sz.tsinghua.edu.cn;ia.ac.cn;ia.ac.cn;ia.ac.cn", "position": "PhD student;Associate Professor;Associate Professor;Full Professor;Full Professor;PhD student;PhD student", "bibtex": "@inproceedings{\nhe2024everyday,\ntitle={Everyday Object Meets Vision-and-Language Navigation Agent via Backdoor},\nauthor={Keji He and Kehan Chen and Jiawang Bai and Yan Huang and Qi Wu and Shu-Tao Xia and Liang Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=rXGxbDJadh}\n}", "github": "", "reviewers": "tsfi;oCuR;iENo;5ibk", "pdf_size": 6022242, "rating": "5;6;6;7", "confidence": "3;3;3;5", "soundness": "3;4;3;4", "novelty": "3;2;2;3", "presentation": "3;4;3;4", "wc_summary": "61;90;88;67", "wc_strengths": "61;47;41;111", "wc_weaknesses": "58;78;200;29", "wc_questions": "2;94;3;57", "wc_limitations": "16;6;8;20", "wc_review": "198;315;340;284", "wc_reply_reviewers": "0;15;21;12", "wc_reply_authors": "0;31;25;31", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 76.5, 12.698425099200294 ], "wc_strengths_avg": [ 65.0, 27.53179979587241 ], "wc_weaknesses_avg": [ 91.25, 65.15894029218093 ], "wc_questions_avg": [ 39.0, 38.77499194068259 ], "wc_limitations_avg": [ 12.5, 5.722761571129799 ], "wc_review_avg": [ 284.25, 53.60212215948171 ], "wc_reply_reviewers_avg": [ 12.0, 7.648529270389178 ], "wc_reply_authors_avg": [ 21.75, 12.794041581923986 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:UKRfThXftIAJ:scholar.google.com/&scioq=Everyday+Object+Meets+Vision-and-Language+Navigation+Agent+via+Backdoor&hl=en&as_sdt=0,33", "gs_version_total": 0, "email": "tsinghua.edu.cn;ia.ac.cn;adelaide.edu.au;sz.tsinghua.edu.cn;ia.ac.cn;ia.ac.cn;ia.ac.cn", "author_num": 7, "aff_unique_index": "0;1;2;0;1;1;1", "aff_unique_norm": "Tsinghua University;Chinese Academy of Sciences;University of Adelaide", "aff_unique_dep": ";Institute of Automation;", "aff_unique_url": "https://www.tsinghua.edu.cn;http://www.ia.cas.cn;https://www.adelaide.edu.au", "aff_unique_abbr": "THU;CAS;Adelaide", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;0;1;0;0;0;0", "aff_country_unique": "China;Australia" }, { "title": "3-in-1: 2D Rotary Adaptation for Efficient Finetuning, Efficient Batching and Composability", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93432", "id": "rYjYwuM6yH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=rYjYwuM6yH", "openreview": "https://openreview.net/forum?id=rYjYwuM6yH", "poster": "/media/PosterPDFs/NeurIPS%202024/93432.png?t=1732817401.6696684", "project": "", "author_site": "Baohao Liao, Christof Monz", "tldr": "", "abstract": "Parameter-efficient finetuning (PEFT) methods effectively adapt large language models (LLMs) to diverse downstream tasks, reducing storage and GPU memory demands. Despite these advantages, several applications pose new challenges to PEFT beyond mere parameter efficiency. One notable challenge involves the efficient deployment of LLMs equipped with multiple task- or user-specific adapters, particularly when different adapters are needed for distinct requests within the same batch. Another challenge is the interpretability of LLMs, which is crucial for understanding how LLMs function. Previous studies introduced various approaches to address different challenges. In this paper, we introduce a novel method, RoAd, which employs a straightforward 2D rotation to adapt LLMs and addresses all the above challenges: (1) RoAd is remarkably parameter-efficient, delivering optimal performance on GLUE, eight commonsense reasoning tasks and four arithmetic reasoning tasks with <0.1% trainable parameters; (2) RoAd facilitates the efficient serving of requests requiring different adapters within a batch, with an overhead comparable to element-wise multiplication instead of batch matrix multiplication; (3) RoAd enhances LLM's interpretability through integration within a framework of distributed interchange intervention, demonstrated via composition experiments.", "keywords": "parameter-efficient finetuning;orthogonal finetuning;batching;interpretability", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Baohao Liao;Christof Monz", "authorids": "~Baohao_Liao1;~Christof_Monz1", "gender": "M;M", "homepage": "https://baohaoliao.github.io/;https://staff.fnwi.uva.nl/c.monz/", "dblp": "234/4096;m/ChristofMonz", "google_scholar": "Fbys5c8AAAAJ;0r3PWLQAAAAJ", "orcid": "0000-0001-8335-4573;", "linkedin": "baohaoliao;", "or_profile": "~Baohao_Liao1;~Christof_Monz1", "aff": "University of Amsterdam;University of Amsterdam, University of Amsterdam", "aff_domain": "uva.nl;ivi.uva.nl", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nliao2024in,\ntitle={3-in-1: 2D Rotary Adaptation for Efficient Finetuning, Efficient Batching and Composability},\nauthor={Baohao Liao and Christof Monz},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=rYjYwuM6yH}\n}", "github": "", "reviewers": "iQCQ;zyNQ;UyLU;vHYZ", "pdf_size": 895384, "rating": "5;6;6;6", "confidence": "3;4;4;4", "soundness": "3;3;3;3", "novelty": "3;3;2;3", "presentation": "2;3;4;3", "wc_summary": "43;42;59;84", "wc_strengths": "28;55;46;71", "wc_weaknesses": "216;78;170;122", "wc_questions": "2;5;11;20", "wc_limitations": "46;1;1;10", "wc_review": "335;181;287;307", "wc_reply_reviewers": "19;0;43;17", "wc_reply_authors": "189;0;57;52", "reply_reviewers": "1;0;1;1", "reply_authors": "3;1;2;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 57.0, 16.98528775146303 ], "wc_strengths_avg": [ 50.0, 15.540270267920054 ], "wc_weaknesses_avg": [ 146.5, 51.659945799429565 ], "wc_questions_avg": [ 9.5, 6.87386354243376 ], "wc_limitations_avg": [ 14.5, 18.553975315279473 ], "wc_review_avg": [ 277.5, 58.26448317800476 ], "wc_reply_reviewers_avg": [ 19.75, 15.31951369985353 ], "wc_reply_authors_avg": [ 74.5, 69.77284572095365 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:CUUfUvDjZcIJ:scholar.google.com/&scioq=3-in-1:+2D+Rotary+Adaptation+for+Efficient+Finetuning,+Efficient+Batching+and+Composability&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "uva.nl;ivi.uva.nl", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Amsterdam", "aff_unique_dep": "", "aff_unique_url": "https://www.uva.nl", "aff_unique_abbr": "UvA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Netherlands" }, { "title": "Trace is the Next AutoDiff: Generative Optimization with Rich Feedback, Execution Traces, and LLMs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93431", "id": "rYs2Dmn9tD", "proceeding": "", "pdf": "https://openreview.net/pdf?id=rYs2Dmn9tD", "openreview": "https://openreview.net/forum?id=rYs2Dmn9tD", "poster": "/media/PosterPDFs/NeurIPS%202024/93431.png?t=1733737027.6893067", "project": "", "author_site": "Ching-An Cheng, Allen Nie, Adith Swaminathan", "tldr": "", "abstract": "We study a class of optimization problems motivated by automating the design and update of AI systems like coding assistants, robots, and copilots. AutoDiff frameworks, like PyTorch, enable efficient end-to-end optimization of differentiable systems. However, general computational workflows can be non-differentiable and involve rich feedback (e.g. console output or user\u2019s responses), heterogeneous parameters (e.g. prompts, codes), and intricate objectives (beyond maximizing a score). We investigate end-to-end generative optimization \u2013 using generative models such as LLMs within the optimizer for automatic updating of general computational workflows. We discover that workflow execution traces are akin to back-propagated gradients in AutoDiff and can provide key information to interpret feedback for efficient optimization. Formally, we frame a new mathematical setup, Optimization with Trace Oracle (OPTO). In OPTO, an optimizer receives an execution trace along with feedback on the computed output and updates parameters iteratively. We provide a Python library, Trace, that efficiently converts a workflow optimization problem into an OPTO instance using PyTorch-like syntax. Using Trace, we develop a general LLM-based generative optimizer called OptoPrime. In empirical studies, we find that OptoPrime is capable of first-order numerical optimization, prompt optimization, hyper-parameter tuning, robot controller design, code debugging, etc., and is often competitive with specialized optimizers for each domain. We envision Trace as an open research platform for devising novel generative optimizers and developing the next generation of interactive learning agents. Website: https://microsoft.github.io/Trace/.", "keywords": "Optimization;Back-Propagation;Automatic Differentiation;LLM;Language Feedback;Execution Trace", "primary_area": "optimization_for_deep_networks", "supplementary_material": "/attachment/2ff35870bd3cac150509fc276e2f13e6e6256f60.zip", "author": "Ching-An Cheng;Allen Nie;Adith Swaminathan", "authorids": "~Ching-An_Cheng1;~Allen_Nie1;~Adith_Swaminathan1", "gender": "M;M;M", "homepage": "http://www.chinganc.com;https://anie.me;https://adith387.github.io/", "dblp": "123/6369;207/7996;", "google_scholar": "bMZFLZ_V4goC;r90OelAAAAAJ;WNHLjp0AAAAJ", "orcid": ";;", "linkedin": ";;adith-swaminathan-98198a21/", "or_profile": "~Ching-An_Cheng1;~Allen_Nie1;~Adith_Swaminathan1", "aff": "Microsoft Research;Google DeepMind;Microsoft", "aff_domain": "microsoft.com;google.com;microsoft.com", "position": "Principal Researcher;Intern;Researcher", "bibtex": "@inproceedings{\ncheng2024trace,\ntitle={Trace is the Next AutoDiff: Generative Optimization with Rich Feedback, Execution Traces, and {LLM}s},\nauthor={Ching-An Cheng and Allen Nie and Adith Swaminathan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=rYs2Dmn9tD}\n}", "github": "", "reviewers": "aQXt;YuBN;GPUE", "pdf_size": 2923785, "rating": "5;7;8", "confidence": "3;2;4", "soundness": "2;4;4", "novelty": "2;3;4", "presentation": "2;4;4", "wc_summary": "122;113;101", "wc_strengths": "61;76;163", "wc_weaknesses": "71;21;251", "wc_questions": "137;23;236", "wc_limitations": "1;1;60", "wc_review": "392;234;811", "wc_reply_reviewers": "0;12;14", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.3333333333333335, 0.9428090415820634 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.3333333333333335, 0.9428090415820634 ], "wc_summary_avg": [ 112.0, 8.602325267042627 ], "wc_strengths_avg": [ 100.0, 44.96665431183423 ], "wc_weaknesses_avg": [ 114.33333333333333, 98.77021593352703 ], "wc_questions_avg": [ 132.0, 87.02873088813831 ], "wc_limitations_avg": [ 20.666666666666668, 27.812866726670865 ], "wc_review_avg": [ 479.0, 243.45978449564655 ], "wc_reply_reviewers_avg": [ 8.666666666666666, 6.182412330330469 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3273268353539886, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3032475971506388210&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 2, "email": "microsoft.com;google.com;microsoft.com", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Microsoft;Google", "aff_unique_dep": "Microsoft Research;Google DeepMind", "aff_unique_url": "https://www.microsoft.com/en-us/research;https://deepmind.com", "aff_unique_abbr": "MSR;DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;United Kingdom" }, { "id": "rZlLfa81D8", "title": "WONDERBREAD: A Benchmark for Evaluating Multimodal Foundation Models on Business Process Management Tasks", "track": "Datasets & Benchmarks", "status": "Poster", "tldr": "", "abstract": "Existing ML benchmarks lack the depth and diversity of annotations needed for evaluating models on business process management (BPM) tasks. BPM is the practice of documenting, measuring, improving, and automating enterprise workflows. However, research has focused almost exclusively on one task -- full end-to-end automation using agents based on multimodal foundation models (FMs) like GPT-4. This focus on automation ignores the reality of how most BPM tools are applied today -- simply documenting the relevant workflow takes 60% of the time of the typical process optimization project. To address this gap we present WONDERBREAD, the first benchmark for evaluating multimodal FMs on BPM tasks beyond automation. Our contributions are: (1) a dataset containing 2928 documented workflow demonstrations; (2) 6 novel BPM tasks sourced from real-world applications ranging from workflow documentation to knowledge transfer to process improvement; and (3) an automated evaluation harness. Our benchmark shows that while state-of-the-art FMs can automatically generate documentation (e.g. recalling 88% of the steps taken in a video demonstration of a workflow), they struggle to re-apply that knowledge towards finer-grained validation of workflow completion (F1 < 0.3). We hope WONDERBREAD encourages the development of more \"human-centered\" AI tooling for enterprise applications and furthers the exploration of multimodal FMs for the broader universe of BPM tasks. We publish our dataset and experiments here: https://github.com/HazyResearch/wonderbread", "keywords": "process mining;foundation models;multimodal;benchmark;dataset;workflows;enterprise", "primary_area": "", "supplementary_material": "", "author": "Michael Wornow;Avanika Narayan;Benjamin Viggiano;Ishan S. Khare;Tathagat Verma;Tibor Thompson;Miguel Angel Fuentes Hernandez;Sudharsan Sundar;Chloe Trujillo;Krrish Chawla;Rongfei Lu;Justin Shen;Divya Nagaraj;Joshua Martinez;Vardhan Kishore Agrawal;Althea Hudson;Nigam Shah;Christopher Re", "authorids": "~Michael_Wornow1;~Avanika_Narayan1;~Benjamin_Viggiano1;~Ishan_S._Khare1;~Tathagat_Verma1;~Tibor_Thompson1;~Miguel_Angel_Fuentes_Hernandez1;~Sudharsan_Sundar1;~Chloe_Trujillo1;~Krrish_Chawla1;~Rongfei_Lu1;~Justin_Shen1;~Divya_Nagaraj1;~Joshua_Martinez1;~Vardhan_Kishore_Agrawal1;~Althea_Hudson1;~Nigam_Shah1;~Christopher_Re1", "gender": ";;M;M;M;M;;M;F;M;;M;;;;F;M;", "homepage": "https://michaelwornow.net;;https://bviggiano.github.io/;https://iskhare.github.io/;https://tathagatv.github.io/;;;;;http://krrishchawla.com;;;;;https://vardhanagrawal.com;;https://shahlab.stanford.edu/nigam_shah;", "dblp": "295/5424.html;;;;;;;;;;;;;;;;s/NHShah;", "google_scholar": "rXYzcbcAAAAJ;;;licjJcoAAAAJ;https://scholar.google.com/citations?hl=en;;;;;;;;https://scholar.google.com/citations?hl=en;;;;n63DmP8AAAAJ;", "orcid": "0000-0003-2215-6527;;;0000-0003-2286-8153;;;;;;;;;;;;;0000-0001-9385-7158;", "linkedin": ";;;iskhare/;tathagatv/;tibor-thompson-38668a239;miguel-%C3%A1ngel-fuentes-hern%C3%A1ndez-531b51134/;sudharsan-sundar/;chloetrujillo/;;rongfei-lu/;justinyshen/;;joshkmartinez/;;althea-h-321080192;;", "or_profile": "~Michael_Wornow1;~Avanika_Narayan1;~Benjamin_Viggiano1;~Ishan_S._Khare1;~Tathagat_Verma1;~Tibor_Thompson1;~Miguel_Angel_Fuentes_Hernandez1;~Sudharsan_Sundar1;~Chloe_Trujillo1;~Krrish_Chawla1;~Rongfei_Lu1;~Justin_Shen1;~Divya_Nagaraj1;~Joshua_Martinez1;~Vardhan_Kishore_Agrawal1;~Althea_Hudson1;~Nigam_Shah1;~Christopher_Re1", "aff": "Stanford University;;Stanford University;Stanford University;Computer Science Department, Stanford University;Stanford University;Stanford University;Stanford University;Computer Science Department, Stanford University;Computer Science Department, Stanford University;;Stanford University;Computer Science Department, Stanford University;Stanford University;Computer Science Department, Stanford University;Stanford University;Stanford University;", "aff_domain": "stanford.edu;;stanford.edu;stanford.edu;cs.stanford.edu;stanford.edu;stanford.edu;stanford.edu;cs.stanford.edu;cs.stanford.edu;;stanford.edu;cs.stanford.edu;stanford.edu;cs.stanford.edu;stanford.edu;stanford.edu;", "position": "PhD student;;PhD student;Undergrad student;MS student;Undergrad student;MS student;Undergrad student;Undergrad student;Undergrad student;;Undergrad student;MS student;Undergrad student;Undergrad student;Undergrad student;Full Professor;", "bibtex": "@inproceedings{\nwornow2024wonderbread,\ntitle={{WONDERBREAD}: A Benchmark for Evaluating Multimodal Foundation Models on Business Process Management Tasks},\nauthor={Michael Wornow and Avanika Narayan and Benjamin Viggiano and Ishan S. Khare and Tathagat Verma and Tibor Thompson and Miguel Angel Fuentes Hernandez and Sudharsan Sundar and Chloe Trujillo and Krrish Chawla and Rongfei Lu and Justin Shen and Divya Nagaraj and Joshua Martinez and Vardhan Kishore Agrawal and Althea Hudson and Nigam Shah and Christopher Re},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=rZlLfa81D8}\n}", "github": "", "project": "", "reviewers": "rYLa;4WGS;nBnV", "site": "https://openreview.net/forum?id=rZlLfa81D8", "pdf_size": 5849850, "rating": "5;6;8", "confidence": "4;4;2", "wc_summary_and_contributions": "109;89;48", "wc_strengths": "2;83;110", "wc_improvement": "2;51;117", "wc_limitations": "1;25;1", "wc_correctness": "1;16;1", "wc_clarity": "1;9;1", "wc_relation_to_prior_work": "1;17;1", "wc_documentation": "1;8;1", "wc_additional_feedback": "1;1;1", "wc_review": "119;299;281", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "6;2;2", "rating_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "wc_summary_and_contributions_avg": [ 82.0, 25.39028685672272 ], "wc_strengths_avg": [ 65.0, 45.89117562233506 ], "wc_improvement_avg": [ 56.666666666666664, 47.11923410054775 ], "wc_limitations_avg": [ 9.0, 11.313708498984761 ], "wc_correctness_avg": [ 6.0, 7.0710678118654755 ], "wc_clarity_avg": [ 3.6666666666666665, 3.7712361663282534 ], "wc_relation_to_prior_work_avg": [ 6.333333333333333, 7.542472332656507 ], "wc_documentation_avg": [ 3.3333333333333335, 3.2998316455372216 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 233.0, 80.94442537939224 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 3.3333333333333335, 1.8856180831641267 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 18, 0 ], "corr_rating_confidence": -0.944911182523068, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11403653051765795753&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Loki: Low-rank Keys for Efficient Sparse Attention", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93430", "id": "raABeiV71j", "proceeding": "", "pdf": "https://openreview.net/pdf?id=raABeiV71j", "openreview": "https://openreview.net/forum?id=raABeiV71j", "poster": "/media/PosterPDFs/NeurIPS%202024/93430.png?t=1733549046.8963656", "project": "", "author_site": "Prajwal Singhania, Siddharth Singh, Shwai He, Soheil Feizi, Abhinav Bhatele", "tldr": "", "abstract": "Inference on large language models (LLMs) can be expensive in terms of the\ncompute and memory costs involved, especially when long sequence lengths are\nused. In particular, the self-attention mechanism used in LLM inference contributes\nsignificantly to these costs, which has sparked an interest in approximating the self-attention \ncomputation to reduce such costs. In this work, we propose to approximate\nself-attention by focusing on the dimensionality of key vectors computed in the\nattention block. Our analysis reveals that key vectors lie in a significantly lower-dimensional \nspace, consistently across several datasets and models. Exploiting this\nobservation, we propose Loki, a novel sparse attention method that ranks and selects\ntokens in the KV-cache based on attention scores computed in low-dimensional\nspace. Our evaluations show that Loki is able to speed up the attention computation\ndue to reduced data movement (load/store) and compute costs while maintaining\nthe efficacy of the models better than other popular approximation methods.", "keywords": "Approximate Attention;Low-Dimensional Attention Keys;PCA;Top-K Attention", "primary_area": "infrastructure", "supplementary_material": "/attachment/28224bb5ddc1da9e62ed5cf574ca1475b3a5d739.zip", "author": "Prajwal Singhania;Siddharth Singh;Shwai He;Soheil Feizi;Abhinav Bhatele", "authorids": "~Prajwal_Singhania1;~Siddharth_Singh7;~Shwai_He1;~Soheil_Feizi2;~Abhinav_Bhatele1", "gender": "M;M;;M;", "homepage": "https://prajwal1210.github.io;https://siddharth9820.github.io/;;https://www.cs.umd.edu/~sfeizi/;https://www.cs.umd.edu/~bhatele", "dblp": "225/4526;;;57/2132;82/6441", "google_scholar": "https://scholar.google.co.in/citations?user=zz6yeyYAAAAJ;jNyBgaEAAAAJ;;lptAmrMAAAAJ;3x65qtwAAAAJ", "orcid": "0000-0003-4277-1287;0000-0002-2756-4290;;;", "linkedin": "prajwal1210/;;;;", "or_profile": "~Prajwal_Singhania1;~Siddharth_Singh7;~Shwai_He1;~Soheil_Feizi2;~Abhinav_Bhatele1", "aff": "University of Maryland, College Park;University of Maryland, College Park;;University of Maryland, College Park;University of Maryland, College Park", "aff_domain": "umd.edu;umd.edu;;umd.edu;umd.edu", "position": "PhD student;PhD student;;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nsinghania2024loki,\ntitle={Loki: Low-rank Keys for Efficient Sparse Attention},\nauthor={Prajwal Singhania and Siddharth Singh and Shwai He and Soheil Feizi and Abhinav Bhatele},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=raABeiV71j}\n}", "github": "", "reviewers": "KXhq;aGJV;vRA7;dYWK", "pdf_size": 2132936, "rating": "6;6;6;8", "confidence": "3;3;4;5", "soundness": "3;3;2;4", "novelty": "3;2;2;4", "presentation": "2;3;2;4", "wc_summary": "61;119;54;24", "wc_strengths": "46;46;30;19", "wc_weaknesses": "53;10;41;72", "wc_questions": "60;41;230;225", "wc_limitations": "1;1;1;42", "wc_review": "221;217;356;382", "wc_reply_reviewers": "9;0;13;615", "wc_reply_authors": "7;0;11;33", "reply_reviewers": "1;0;1;2", "reply_authors": "2;1;2;2", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 64.5, 34.39840112563373 ], "wc_strengths_avg": [ 35.25, 11.431863365173676 ], "wc_weaknesses_avg": [ 44.0, 22.52776065213762 ], "wc_questions_avg": [ 139.0, 88.77218032694702 ], "wc_limitations_avg": [ 11.25, 17.75352077758099 ], "wc_review_avg": [ 294.0, 75.57446658759822 ], "wc_reply_reviewers_avg": [ 159.25, 263.1695033623767 ], "wc_reply_authors_avg": [ 12.75, 12.336429791475327 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3865007290156923928&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "umd.edu;umd.edu;;umd.edu;umd.edu", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Maryland", "aff_unique_dep": "", "aff_unique_url": "https://www/umd.edu", "aff_unique_abbr": "UMD", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "College Park", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "FlexMol: A Flexible Toolkit for Benchmarking Molecular Relational Learning", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97484", "id": "raOYixthlY", "proceeding": "", "pdf": "https://openreview.net/pdf?id=raOYixthlY", "openreview": "https://openreview.net/forum?id=raOYixthlY", "poster": "", "project": "", "author_site": "Sizhe Liu, Jun Xia, Lecheng Zhang, Yuchen Liu, Yue Liu, Wenjie Du, Zhangyang Gao, Bozhen Hu, Cheng Tan, hongxin xiang, Stan Z. Li", "tldr": "", "abstract": "Molecular relational learning (MRL) is crucial for understanding the interaction behaviors between molecular pairs, a critical aspect of drug discovery and development. However, the large feasible model space of MRL poses significant challenges to benchmarking, and existing MRL frameworks face limitations in flexibility and scope. To address these challenges, avoid repetitive coding efforts, and ensure fair comparison of models, we introduce FlexMol, a comprehensive toolkit designed to facilitate the construction and evaluation of diverse model architectures across various datasets and performance metrics. FlexMol offers a robust suite of preset model components, including 16 drug encoders, 13 protein sequence encoders, 9 protein structure encoders, and 7 interaction layers. With its easy-to-use API and flexibility, FlexMol supports the dynamic construction of over 70, 000 distinct combinations of model architectures. Additionally, we provide detailed benchmark results and code examples to demonstrate FlexMol\u2019s effectiveness in simplifying and standardizing MRL model development and comparison. FlexMol is open-sourced and available at https://github.com/Steven51516/FlexMol.", "keywords": "Molecular relational learning;Toolkit;Benchmark", "primary_area": "", "supplementary_material": "/attachment/8acf92498ed4ab03513a4ff75cb17c947ff9f2a2.zip", "author": "Sizhe Liu;Jun Xia;Lecheng Zhang;Yuchen Liu;Yue Liu;Wenjie Du;Zhangyang Gao;Bozhen Hu;Cheng Tan;hongxin xiang;Stan Z. Li", "authorids": "~Sizhe_Liu1;~Jun_Xia1;~Lecheng_Zhang1;~Yuchen_Liu22;~Yue_Liu10;~Wenjie_Du2;~Zhangyang_Gao1;~Bozhen_Hu1;~Cheng_Tan1;~hongxin_xiang1;~Stan_Z._Li2", "gender": "M;M;M;F;M;M;M;M;M;M;M", "homepage": ";http://junxia97.github.io/;https://westlake.edu.cn/;;https://yueliu1999.github.io/;https://invokerqwer.github.io/duwenjie.github.io/;;;https://chengtan9907.github.io/;https://github.com/HongxinXiang;https://en.westlake.edu.cn/academics/School_of_Engineering/About/Our_People/Faculty/201912/t20191206_2497.shtml", "dblp": ";;;;74/1932-8;;275/3266;279/8665;70/1533-12.html;275/7890;l/StanZLi", "google_scholar": ";aPKKpSYAAAAJ;;;5tfpu3MAAAAJ;https://scholar.google.com/citations?hl=zh-CN;4SclT-QAAAAJ;https://scholar.google.com/citations?hl=zh-CN;6kTV6aMAAAAJ;Wocj2IgAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";;;;;0000-0002-3517-7354;0000-0003-1026-6083;0000-0002-3428-0114;;;", "linkedin": "steven-liu-195745241/;;;yuchen-liu-84b5b8247/;;;;;;;stan-z-li-%E6%9D%8E%E5%AD%90%E9%9D%92-55753224/", "or_profile": "~Sizhe_Liu1;~Jun_Xia1;~Lecheng_Zhang1;~Yuchen_Liu22;~Yue_Liu10;~Wenjie_Du2;~Zhangyang_Gao1;~Bozhen_Hu1;~Cheng_Tan1;~hongxin_xiang1;~Stan_Z._Li1", "aff": "University of Southern California;Westlake University, China;Westlake University;University of Southern California;University of Illinois, Urbana Champaign;University of Science and Technology of China;Westlake University, China;Westlake University;Zhejiang University & Westlake University;Hunan University;Westlake University", "aff_domain": "usc.edu;westlake.edu.cn;westlake.edu;usc.edu;uiuc.edu;ustc.edu.cn;westlake.edu.cn;westlake.edu.cn;westlake.edu.cn;hnu.edu.cn;westlake.edu.cn", "position": "Undergrad student;PhD student;Undergrad student;Undergrad student;Intern;Assistant Professor;PhD student;PhD student;PhD student;PhD student;Chair Professor", "bibtex": "@inproceedings{\nliu2024flexmol,\ntitle={FlexMol: A Flexible Toolkit for Benchmarking Molecular Relational Learning},\nauthor={Sizhe Liu and Jun Xia and Lecheng Zhang and Yuchen Liu and Yue Liu and Wenjie Du and Zhangyang Gao and Bozhen Hu and Cheng Tan and hongxin xiang and Stan Z. Li},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=raOYixthlY}\n}", "github": "", "reviewers": "Bk1c;yHQP;452B;6jyh", "pdf_size": 1626203, "rating": "7;7;7;8", "confidence": "5;3;3;5", "wc_summary_and_contributions": "54;89;67;92", "wc_strengths": "15;75;51;92", "wc_improvement": "5;20;196;65", "wc_limitations": "1;2;6;8", "wc_correctness": "1;1;24;8", "wc_clarity": "1;1;9;4", "wc_relation_to_prior_work": "1;1;5;9", "wc_documentation": "1;1;26;13", "wc_additional_feedback": "1;1;1;1", "wc_review": "80;191;385;292", "wc_reply_reviewers": "12;13;17;52", "wc_reply_authors": "20;77;19;113", "reply_reviewers": "1;1;1;2", "reply_authors": "2;3;2;3", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 1.0 ], "wc_summary_and_contributions_avg": [ 75.5, 15.724185193516387 ], "wc_strengths_avg": [ 58.25, 28.908260065247788 ], "wc_improvement_avg": [ 71.5, 75.19474715696569 ], "wc_limitations_avg": [ 4.25, 2.8613807855648994 ], "wc_correctness_avg": [ 8.5, 9.394147114027968 ], "wc_clarity_avg": [ 3.75, 3.2691742076555053 ], "wc_relation_to_prior_work_avg": [ 4.0, 3.3166247903554 ], "wc_documentation_avg": [ 10.25, 10.328964130056798 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 237.0, 113.68157282515051 ], "wc_reply_reviewers_avg": [ 23.5, 16.560495161679196 ], "wc_reply_authors_avg": [ 57.25, 39.83952183447989 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13977299269742369896&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "usc.edu;westlake.edu.cn;westlake.edu;usc.edu;uiuc.edu;ustc.edu.cn;westlake.edu.cn;westlake.edu.cn;westlake.edu.cn;hnu.edu.cn;westlake.edu.cn", "author_num": 11, "aff_unique_index": "0;1;1;0;2;3;1;1;4;5;1", "aff_unique_norm": "University of Southern California;Westlake University;University of Illinois Urbana-Champaign;University of Science and Technology of China;Zhejiang University;Hunan University", "aff_unique_dep": ";;;;;", "aff_unique_url": "https://www.usc.edu;https://www.westlake.edu.cn;https://illinois.edu;http://www.ustc.edu.cn;http://www.zju.edu.cn;http://www.hunu.edu.cn/", "aff_unique_abbr": "USC;WU;UIUC;USTC;ZJU;HNU", "aff_campus_unique_index": "0;0;2", "aff_campus_unique": "Los Angeles;;Urbana-Champaign", "aff_country_unique_index": "0;1;1;0;0;1;1;1;1;1;1", "aff_country_unique": "United States;China" }, { "title": "EM Distillation for One-step Diffusion Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93429", "id": "rafVvthuxD", "proceeding": "", "pdf": "https://openreview.net/pdf?id=rafVvthuxD", "openreview": "https://openreview.net/forum?id=rafVvthuxD", "poster": "/media/PosterPDFs/NeurIPS%202024/93429.png?t=1731758543.6317925", "project": "", "author_site": "Sirui Xie, Zhisheng Xiao, Diederik Kingma, Tingbo Hou, Ying Nian Wu, Kevin Murphy, Tim Salimans, Ben Poole, Ruiqi Gao", "tldr": "", "abstract": "While diffusion models can learn complex distributions, sampling requires a computationally expensive iterative process. Existing distillation methods enable efficient sampling, but have notable limitations, such as performance degradation with very few sampling steps, reliance on training data access, or mode-seeking optimization that may fail to capture the full distribution. We propose EM Distillation (EMD), a maximum likelihood-based approach that distills a diffusion model to a one-step generator model with minimal loss of perceptual quality. Our approach is derived through the lens of Expectation-Maximization (EM), where the generator parameters are updated using samples from the joint distribution of the diffusion teacher prior and inferred generator latents. We develop a reparametrized sampling scheme and a noise cancellation technique that together stabilizes the distillation process. We further reveal an interesting connection of our method with existing methods that minimize mode-seeking KL. EMD outperforms existing one-step generative methods in terms of FID scores on ImageNet-64 and ImageNet-128, and compares favorably with prior work on distilling text-to-image diffusion models.", "keywords": "Generative models", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Sirui Xie;Zhisheng Xiao;Diederik P Kingma;Tingbo Hou;Ying Nian Wu;Kevin Patrick Murphy;Tim Salimans;Ben Poole;Ruiqi Gao", "authorids": "~Sirui_Xie1;~Zhisheng_Xiao1;~Diederik_P_Kingma1;~Tingbo_Hou2;~Ying_Nian_Wu1;~Kevin_Patrick_Murphy1;~Tim_Salimans1;~Ben_Poole1;~Ruiqi_Gao1", "gender": "M;M;M;M;;M;M;F;M", "homepage": "https://www.siruixie.com;https://xavierxiao.github.io;http://www.dpkingma.com;https://research.google/people/106438/;https://www.cs.ubc.ca/~murphyk/;;https://cs.stanford.edu/~poole;http://www.stat.ucla.edu/~ruiqigao/;http://www.stat.ucla.edu/~ywu/", "dblp": "232/3072;;http://dblp.uni-trier.de/pers/hd/k/Kingma:Diederik_P=;35/3986;26/2599;116/2791;16/10397;206/7084;18/568.html", "google_scholar": "9GJn5FIAAAAJ;3Wex6VIAAAAJ;https://scholar.google.nl/citations?user=yyIoQu4AAAAJ;u-UDZcsAAAAJ;MxxZkEcAAAAJ;;i5FMLA4AAAAJ;VdlgOXoAAAAJ;7k_1QFIAAAAJ", "orcid": ";;;0009-0006-9667-9821;;;;;", "linkedin": ";;;tingbo-hou/;;;;;", "or_profile": "~Sirui_Xie1;~Zhisheng_Xiao1;~Diederik_P_Kingma1;~Tingbo_Hou2;~Kevin_Patrick_Murphy1;~Tim_Salimans1;~Ben_Poole1;~Ruiqi_Gao1;~Yingnian_Wu1", "aff": "University of California, Los Angeles;Google;Google;Google;Google;Google;Google;Google;UCLA", "aff_domain": "ucla.edu;google.com;google.com;google.com;google.com;google.com;google.com;google.com;stat.ucla.edu", "position": "PhD student;Researcher;Research Scientist;Researcher;Principal Researcher;Research Scientist;Research Scientist;Researcher;Full Professor", "bibtex": "@inproceedings{\nxie2024em,\ntitle={{EM} Distillation for One-step Diffusion Models},\nauthor={Sirui Xie and Zhisheng Xiao and Diederik P Kingma and Tingbo Hou and Ying Nian Wu and Kevin Patrick Murphy and Tim Salimans and Ben Poole and Ruiqi Gao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=rafVvthuxD}\n}", "github": "", "reviewers": "eeRQ;3oLm;MtDz;fdee;p5As", "pdf_size": 43432368, "rating": "3;5;6;6;6", "confidence": "5;4;3;4;4", "soundness": "2;3;2;3;3", "novelty": "2;3;2;3;3", "presentation": "2;3;2;3;3", "wc_summary": "45;75;156;66;155", "wc_strengths": "50;51;94;45;67", "wc_weaknesses": "155;112;228;72;107", "wc_questions": "26;57;248;2;9", "wc_limitations": "1;5;5;1;6", "wc_review": "277;300;731;186;344", "wc_reply_reviewers": "72;107;15;53;100", "wc_reply_authors": "38;76;4;73;20", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 5.2, 1.16619037896906 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 99.4, 46.82990497534668 ], "wc_strengths_avg": [ 61.4, 17.89525076661403 ], "wc_weaknesses_avg": [ 134.8, 53.536529584947885 ], "wc_questions_avg": [ 68.4, 91.78365867625892 ], "wc_limitations_avg": [ 3.6, 2.154065922853802 ], "wc_review_avg": [ 367.6, 188.8773146780735 ], "wc_reply_reviewers_avg": [ 69.4, 33.42214834507202 ], "wc_reply_authors_avg": [ 42.2, 28.498421008891004 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.8134892168199606, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=673060117530985203&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "ucla.edu;google.com;google.com;google.com;google.com;google.com;google.com;google.com;stat.ucla.edu", "author_num": 9, "aff_unique_index": "0;1;1;1;1;1;1;1;0", "aff_unique_norm": "University of California, Los Angeles;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.ucla.edu;https://www.google.com", "aff_unique_abbr": "UCLA;Google", "aff_campus_unique_index": "0;1;1;1;1;1;1;1;0", "aff_campus_unique": "Los Angeles;Mountain View", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "DeBaRA: Denoising-Based 3D Room Arrangement Generation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93428", "id": "rajRJ6WKj2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=rajRJ6WKj2", "openreview": "https://openreview.net/forum?id=rajRJ6WKj2", "poster": "/media/PosterPDFs/NeurIPS%202024/93428.png?t=1731510991.566541", "project": "", "author_site": "L\u00e9opold Maillard, Nicolas Sereyjol-Garros, Tom Durand, Maks Ovsjanikov", "tldr": "", "abstract": "Generating realistic and diverse layouts of furnished indoor 3D scenes unlocks multiple interactive applications impacting a wide range of industries. The inherent complexity of object interactions, the limited amount of available data and the requirement to fulfill spatial constraints all make generative modeling for 3D scene synthesis and arrangement challenging. Current methods address these challenges autoregressively or by using off-the-shelf diffusion objectives by simultaneously predicting all attributes without 3D reasoning considerations. In this paper, we introduce DeBaRA, a score-based model specifically tailored for precise, controllable and flexible arrangement generation in a bounded environment. We argue that the most critical component of a scene synthesis system is to accurately establish the size and position of various objects within a restricted area. Based on this insight, we propose a lightweight conditional score-based model designed with 3D spatial awareness at its core. We demonstrate that by focusing on spatial attributes of objects, a single trained DeBaRA model can be leveraged at test time to perform several downstream applications such as scene synthesis, completion and re-arrangement. Further, we introduce a novel Self Score Evaluation procedure so it can be optimally employed alongside external LLM models. We evaluate our approach through extensive experiments and demonstrate significant improvement upon state-of-the-art approaches in a range of scenarios.", "keywords": "Indoor 3D Scene Synthesis;Layout Generation;Score-based Generative Models;Diffusion Models;Conditional Generation", "primary_area": "machine_vision", "supplementary_material": "", "author": "L\u00e9opold Maillard;Nicolas Sereyjol-Garros;Tom Durand;Maks Ovsjanikov", "authorids": "~L\u00e9opold_Maillard1;~Nicolas_Sereyjol-Garros1;~Tom_Durand1;~Maks_Ovsjanikov1", "gender": "M;;;M", "homepage": ";;;http://www.lix.polytechnique.fr/~maks/", "dblp": ";;;94/5668", "google_scholar": ";;;https://scholar.google.com/citations?hl=en", "orcid": ";;;0000-0002-5867-4046", "linkedin": "leopold-maillard;sereyjol-garros-nicolas;https://fr.linkedin.com/in/tom-durand?challengeId=AQFTXsyVr9heNQAAAY9xzUhKpZZduvZS4moiCljSSBH05tKpBonjUg_kG30w_4g61fwoed5SJ9_w1bvLTR70aeygPrmf5NGKzw&submissionId=b0c89579-3a0a-cf17-9361-71f78c88de37&challengeSource=AgFZBMc2rvXZZQAAAY9xzX6OR3ojInp9szUD7k2O3G0BIqAZ4mEhXlIBfhlUbfs&challegeType=AgFC_Xln7uOFxAAAAY9xzX6RgOD5YXNJiHgUYhLd-S6hCqoEoypXKOM&memberId=AgHJum8GIAyGVwAAAY9xzX6VhUEuU8ZK0c1qK0N64xDJ_ac&recognizeDevice=AgEy6xU81JaFgwAAAY9xzX6ZvFlFrrJdXvE5fszW3lriuzix5xci&original_referer=https%3A%2F%2Fwww.linkedin.com%2F;", "or_profile": "~L\u00e9opold_Maillard1;~Nicolas_Sereyjol-Garros1;~Tom_Durand1;~Maks_Ovsjanikov1", "aff": "Dassault Syst\u00e8mes;Ecole Nationale des Ponts et Chausees;Dassault Syst\u00e8mes;\u00c9cole Polytechnique", "aff_domain": "3ds.com;enpc.fr;3ds.com;polytechnique.edu", "position": "Researcher;MS student;Researcher;Full Professor", "bibtex": "@inproceedings{\nmaillard2024debara,\ntitle={DeBa{RA}: Denoising-Based 3D Room Arrangement Generation},\nauthor={L{\\'e}opold Maillard and Nicolas Sereyjol-Garros and Tom Durand and Maks Ovsjanikov},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=rajRJ6WKj2}\n}", "github": "", "reviewers": "sfxx;xNz4;Fsdh", "pdf_size": 11811712, "rating": "3;6;7", "confidence": "5;2;4", "soundness": "2;2;3", "novelty": "2;3;3", "presentation": "1;3;4", "wc_summary": "34;109;52", "wc_strengths": "6;36;46", "wc_weaknesses": "179;23;375", "wc_questions": "41;1;3", "wc_limitations": "1;1;45", "wc_review": "261;170;521", "wc_reply_reviewers": "281;0;39", "wc_reply_authors": "935;0;0", "reply_reviewers": "1;0;1", "reply_authors": "3;1;1", "rating_avg": [ 5.333333333333333, 1.699673171197595 ], "confidence_avg": [ 3.6666666666666665, 1.247219128924647 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 1.247219128924647 ], "wc_summary_avg": [ 65.0, 31.96873472629156 ], "wc_strengths_avg": [ 29.333333333333332, 16.996731711975947 ], "wc_weaknesses_avg": [ 192.33333333333334, 144.01234514983622 ], "wc_questions_avg": [ 15.0, 18.40289832245635 ], "wc_limitations_avg": [ 15.666666666666666, 20.741798914805393 ], "wc_review_avg": [ 317.3333333333333, 148.72868661499777 ], "wc_reply_reviewers_avg": [ 106.66666666666667, 124.29624111595473 ], "wc_reply_authors_avg": [ 311.6666666666667, 440.7632269396145 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5765566601970552, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16009135537642974223&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "3ds.com;enpc.fr;3ds.com;polytechnique.edu", "author_num": 4, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Dassault Syst\u00e8mes;Ecole Nationale des Ponts et Chaussees;Ecole Polytechnique", "aff_unique_dep": ";;", "aff_unique_url": "https://www.3ds.com;https://www.enpc.fr;https://www.polytechnique.edu", "aff_unique_abbr": "3DS;ENPC;X", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "France" }, { "title": "Local Anti-Concentration Class: Logarithmic Regret for Greedy Linear Contextual Bandit", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93427", "id": "rblaF2euXQ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=rblaF2euXQ", "openreview": "https://openreview.net/forum?id=rblaF2euXQ", "poster": "/media/PosterPDFs/NeurIPS%202024/93427.png?t=1733849320.7080078", "project": "", "author_site": "Seok-Jin Kim, Min-hwan Oh", "tldr": "", "abstract": "We study the performance guarantees of exploration-free greedy algorithms for the linear contextual bandit problem. \nWe introduce a novel condition, named the \\textit{Local Anti-Concentration} (LAC) condition, which enables a greedy bandit algorithm to achieve provable efficiency. \nWe show that the LAC condition is satisfied by a broad class of distributions, including Gaussian, exponential, uniform, Cauchy, and Student's~$t$ distributions, along with other exponential family distributions and their truncated variants. \nThis significantly expands the class of distributions under which greedy algorithms can perform efficiently. \nUnder our proposed LAC condition, we prove that the cumulative expected regret of the greedy algorithm for the linear contextual bandit is bounded by $\\mathcal{O}(\\operatorname{poly} \\log T)$. \nOur results establish the widest range of distributions known to date that allow a sublinear regret bound for greedy algorithms, further achieving a sharp poly-logarithmic regret.", "keywords": "contextual bandit;greedy algorithm;exploration-exploitation;sequential decision-making;bandit algorithms", "primary_area": "bandits", "supplementary_material": "/attachment/e49a91b6d3bd3e543f3d2c1f988904ab77cec978.zip", "author": "Seok-Jin Kim;Min-hwan Oh", "authorids": "~Seok-Jin_Kim1;~Min-hwan_Oh1", "gender": ";M", "homepage": "https://minoh.io;https://sites.google.com/view/seok-jin-kim/home", "dblp": "172/0531;95/5036", "google_scholar": "KzVALFwAAAAJ;", "orcid": ";", "linkedin": ";seokjin-keem-557334217/", "or_profile": "~Min-hwan_Oh1;~Seok_Jin_Kim1", "aff": "Seoul National University;Columbia University", "aff_domain": "snu.ac.kr;columbia.edu", "position": "Assistant Professor;PhD student", "bibtex": "@inproceedings{\nkim2024local,\ntitle={Local Anti-Concentration Class: Logarithmic Regret for Greedy Linear Contextual Bandit},\nauthor={Seok-Jin Kim and Min-hwan Oh},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=rblaF2euXQ}\n}", "github": "", "reviewers": "PGWV;Fhbd;Lk9D;qUQr", "pdf_size": 2336696, "rating": "5;6;7;8", "confidence": "3;5;2;3", "soundness": "2;2;3;4", "novelty": "2;2;3;4", "presentation": "2;1;3;4", "wc_summary": "57;181;94;50", "wc_strengths": "23;175;54;42", "wc_weaknesses": "248;446;79;164", "wc_questions": "70;259;118;168", "wc_limitations": "22;2;6;1", "wc_review": "420;1063;351;425", "wc_reply_reviewers": "302;101;45;65", "wc_reply_authors": "583;587;33;65", "reply_reviewers": "2;1;1;1", "reply_authors": "3;3;2;2", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 3.25, 1.0897247358851685 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 95.5, 52.11765535785354 ], "wc_strengths_avg": [ 73.5, 59.63430220938282 ], "wc_weaknesses_avg": [ 234.25, 136.07419850948966 ], "wc_questions_avg": [ 153.75, 69.95132236062446 ], "wc_limitations_avg": [ 7.75, 8.437268515343103 ], "wc_review_avg": [ 564.75, 289.14734565615504 ], "wc_reply_reviewers_avg": [ 128.25, 102.30194279680127 ], "wc_reply_authors_avg": [ 317.0, 268.24242766572183 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.30779350562554625, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4852918212589434607&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "snu.ac.kr;columbia.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Seoul National University;Columbia University", "aff_unique_dep": ";", "aff_unique_url": "https://www.snu.ac.kr;https://www.columbia.edu", "aff_unique_abbr": "SNU;Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "South Korea;United States" }, { "title": "DeMo: Decoupling Motion Forecasting into Directional Intentions and Dynamic States", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93426", "id": "rbtnRsiXSN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=rbtnRsiXSN", "openreview": "https://openreview.net/forum?id=rbtnRsiXSN", "poster": "/media/PosterPDFs/NeurIPS%202024/93426.png?t=1730358881.1016245", "project": "", "author_site": "Bozhou Zhang, Nan Song, Li Zhang", "tldr": "", "abstract": "Accurate motion forecasting for traffic agents is crucial for ensuring the safety and efficiency of autonomous driving systems in dynamically changing environments. Mainstream methods adopt a one-query-one-trajectory paradigm, where each query corresponds to a unique trajectory for predicting multi-modal trajectories. While straightforward and effective, the absence of detailed representation of future trajectories may yield suboptimal outcomes, given that the agent states dynamically evolve over time. To address this problem, we introduce DeMo, a framework that decouples multi-modal trajectory queries into two types: mode queries capturing distinct directional intentions and state queries tracking the agent's dynamic states over time. By leveraging this format, we separately optimize the multi-modality and dynamic evolutionary properties of trajectories. Subsequently, the mode and state queries are integrated to obtain a comprehensive and detailed representation of the trajectories. To achieve these operations, we additionally introduce combined Attention and Mamba techniques for global information aggregation and state sequence modeling, leveraging their respective strengths. Extensive experiments on both the Argoverse 2 and nuScenes benchmarks demonstrate that our DeMo achieves state-of-the-art performance in motion forecasting. In addition, we will make our code and models publicly available.", "keywords": "Motion Forecasting;Autonomous Driving;Mamba;Attention", "primary_area": "robotics", "supplementary_material": "", "author": "Bozhou Zhang;Nan Song;Li Zhang", "authorids": "~Bozhou_Zhang1;~Nan_Song4;~Li_Zhang5", "gender": "M;M;M", "homepage": "https://zbozhou.github.io/;;http://www.robots.ox.ac.uk/~lz/", "dblp": "294/1268;;89/5992-40", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;-wOTCE8AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Bozhou_Zhang1;~Nan_Song4;~Li_Zhang5", "aff": "Fudan University;Fudan University;Fudan University", "aff_domain": "fudan.edu.cn;fudan.edu.cn;fudan.edu.cn", "position": "PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nzhang2024demo,\ntitle={DeMo: Decoupling Motion Forecasting into Directional Intentions and Dynamic States},\nauthor={Bozhou Zhang and Nan Song and Li Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=rbtnRsiXSN}\n}", "github": "", "reviewers": "Rxa7;pCB4;V1LD;ZMh7", "pdf_size": 0, "rating": "5;5;6;7", "confidence": "4;4;4;4", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "3;2;3;3", "wc_summary": "38;97;59;139", "wc_strengths": "29;28;13;119", "wc_weaknesses": "41;108;10;241", "wc_questions": "18;177;85;152", "wc_limitations": "1;11;2;2", "wc_review": "127;421;169;653", "wc_reply_reviewers": "21;106;14;0", "wc_reply_authors": "24;129;20;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;3;2;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 83.25, 38.51217340010818 ], "wc_strengths_avg": [ 47.25, 41.90689084148334 ], "wc_weaknesses_avg": [ 100.0, 88.7778125434503 ], "wc_questions_avg": [ 108.0, 61.89911146373589 ], "wc_limitations_avg": [ 4.0, 4.06201920231798 ], "wc_review_avg": [ 342.5, 211.6099005245265 ], "wc_reply_reviewers_avg": [ 35.25, 41.541395017500314 ], "wc_reply_authors_avg": [ 43.25, 50.335747734587194 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7499105854824574021&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "fudan.edu.cn;fudan.edu.cn;fudan.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Fudan University", "aff_unique_dep": "", "aff_unique_url": "https://www.fudan.edu.cn", "aff_unique_abbr": "Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "rcch4UsMBi", "title": "Synthetic Data (Almost) from Scratch: Generalized Instruction Tuning for Language Models", "track": "main", "status": "Reject", "tldr": "", "abstract": "We introduce Generalized Instruction Tuning (called GLAN), a general and scalable method for instruction tuning of Large Language Models (LLMs). \nUnlike prior work that relies on seed examples or existing datasets to construct instruction tuning data, \nGLAN exclusively utilizes a pre-curated taxonomy of human knowledge and capabilities as input and generates large-scale \nsynthetic instruction data across all disciplines.\nSpecifically, inspired by the systematic structure in human education system, we build the taxonomy by decomposing human knowledge and capabilities to various fields, sub-fields and ultimately, distinct disciplines semi-automatically, facilitated by LLMs. \nSubsequently, we generate a comprehensive list of subjects for every discipline and proceed to design a syllabus tailored to each subject, again utilizing LLMs.\nWith the fine-grained key concepts detailed in every class session of the syllabus, we are able to generate diverse instructions with a broad coverage across the entire spectrum of human knowledge and skills. \nExtensive experiments on large language models (e.g., Mistral) demonstrate that GLAN excels in multiple dimensions from mathematical reasoning, coding, academic exams, logical reasoning to general instruction following without using task-specific training data of these tasks. In addition, GLAN allows for easy customization and new fields or skills can be added by simply incorporating a new node into our taxonomy.", "keywords": "Synthetic data; large language models", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/296836ab822c832ed142c1726d18a8f5787dca4c.zip", "author": "Haoran Li;Qingxiu Dong;Zhengyang Tang;Chaojun Wang;Xingxing Zhang;Haoyang Huang;Shaohan Huang;Xiaolong Huang;Zeqiang Huang;Dongdong Zhang;Yuxian Gu;Xin Cheng;Xun Wang;Si-Qing Chen;Li Dong;Wei Lu;Zhifang Sui;Benyou Wang;Wai Lam;Furu Wei", "authorids": "~Haoran_Li4;~Qingxiu_Dong1;~Zhengyang_Tang1;~Chaojun_Wang1;~Xingxing_Zhang1;~Haoyang_Huang1;~Shaohan_Huang1;~Xiaolong_Huang1;~Zeqiang_Huang1;~Dongdong_Zhang4;~Yuxian_Gu1;~Xin_Cheng2;~Xun_Wang5;~Si-Qing_Chen1;~Li_Dong1;~Wei_Lu10;~Zhifang_Sui1;~Benyou_Wang2;~Wai_Lam1;~Furu_Wei1", "gender": "M;F;M;;M;M;M;M;M;M;M;;;F;M;;F;M;M;M", "homepage": "https://statnlp-research.github.io/;https://dqxiu.github.io/;;;https://xingxingzhang.github.io/;;;;;https://www.microsoft.com/en-us/research/people/dozhang/;https://t1101675.github.io/;;;;http://dong.li;;http://eecs.pku.edu.cn/EN/People/Faculty/Detail/?ID=6024;https://wabyking.github.io/old.html;http://www.se.cuhk.edu.hk/~textmine;https://www.microsoft.com/en-us/research/people/fuwei/", "dblp": ";284/0673;247/3097;;59/9985-2.html;;176/0380;;;02/621-1.html;;;;;85/5090-4;;;169/1793;48/1707;72/5870", "google_scholar": "X5QwHqwAAAAJ;ibcR7VkAAAAJ;2RRV0PQAAAAJ;;5yX53usAAAAJ;;;;;w2qu71oAAAAJ;zF9dr1sAAAAJ;;;;wEfQgPgAAAAJ;;;Jk4vJU8AAAAJ;ewA4NAcAAAAJ;G-V1VpwAAAAJ", "orcid": ";;;;;;;;;;;;;0000-0002-6945-4540;;;;0000-0002-1501-9914;;", "linkedin": ";qingxiu-dong-a3758a199/;;;;%E6%B5%A9%E6%B4%8B-%E9%BB%84-77a59016a/;;xiaolong-huang-446182b4/;zeqiang-huang-95795212b/;;;;;si-qing-chen-seattle/;;;;;;", "or_profile": "~Haoran_Li4;~Qingxiu_Dong1;~Zhengyang_Tang1;~Chaojun_Wang1;~Xingxing_Zhang1;~Haoyang_Huang1;~Shaohan_Huang1;~Xiaolong_Huang1;~Zeqiang_Huang1;~Dongdong_Zhang4;~Yuxian_Gu1;~Xin_Cheng2;~Xun_Wang5;~Si-Qing_Chen1;~Li_Dong1;~Wei_Lu10;~Zhifang_Sui1;~Benyou_Wang2;~Wai_Lam1;~Furu_Wei1", "aff": ";Peking University;The Chinese University of Hong Kong, Shenzhen;;Microsoft Research Asia;Microsoft Research Asia;Microsoft;Microsoft;Microsoft;Microsoft Research Asia;Tsinghua University;;;Microsoft;Microsoft Research;;Peking University;The Chinese University of Hong Kong, Shenzhen;The Chinese University of Hong Kong;Microsoft Research", "aff_domain": ";pku.edu.cn;cuhk.edu.cn;;microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com;tsinghua.edu.cn;;;microsoft.com;microsoft.com;;pku.edu.cn;cuhk.edu.cn;cuhk.edu.hk;microsoft.com", "position": ";PhD student;PhD student;;Researcher;FTE;Researcher;Researcher;Researcher;Researcher;PhD student;;;Partner Applied Science Manager;Principal Researcher;;Full Professor;Assistant Professor;Professor;Distinguished Scientist", "bibtex": "@misc{\nanonymous2024synthetic,\ntitle={Synthetic Data (Almost) from Scratch: Generalized Instruction Tuning for Language Models},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=rcch4UsMBi}\n}", "github": "", "project": "", "reviewers": "7Qp1;koAL;VaBs;oEvn", "site": "https://openreview.net/forum?id=rcch4UsMBi", "pdf_size": 533895, "rating": "4;5;6;7", "confidence": "4;3;3;4", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "3;3;4;3", "wc_summary": "84;41;142;40", "wc_strengths": "118;70;92;88", "wc_weaknesses": "296;29;52;29", "wc_questions": "38;4;61;80", "wc_limitations": "3;4;25;7", "wc_review": "539;148;372;244", "wc_reply_reviewers": "124;54;84;24", "wc_reply_authors": "801;89;381;37", "reply_reviewers": "1;1;2;1", "reply_authors": "3;3;3;2", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 76.75, 41.64957982981341 ], "wc_strengths_avg": [ 92.0, 17.146428199482248 ], "wc_weaknesses_avg": [ 101.5, 112.68651205889726 ], "wc_questions_avg": [ 45.75, 28.32291475113393 ], "wc_limitations_avg": [ 9.75, 8.926785535678562 ], "wc_review_avg": [ 325.75, 146.5373245968412 ], "wc_reply_reviewers_avg": [ 71.5, 36.99662146737185 ], "wc_reply_authors_avg": [ 327.0, 303.4534560686367 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 20, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 49, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3252147613670645508&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;2;2;2;2;2;2;3;2;2;0;1;1;2", "aff_unique_norm": "Peking University;Chinese University of Hong Kong;Microsoft;Tsinghua University", "aff_unique_dep": ";;Research;", "aff_unique_url": "http://www.pku.edu.cn;https://www.cuhk.edu.cn;https://www.microsoft.com/en-us/research/group/asia;https://www.tsinghua.edu.cn", "aff_unique_abbr": "Peking U;CUHK;MSR Asia;THU", "aff_campus_unique_index": "1;2;2;2;1;3", "aff_campus_unique": ";Shenzhen;Asia;Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;1;1;1;0;0;1;1;0;0;0;1", "aff_country_unique": "China;United States" }, { "id": "rdv2Fr6JTC", "title": "Precedence-Constrained Winter Value for Effective Graph Data Valuation", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "Data valuation is essential for quantifying data\u2019s worth, aiding in assessing data quality and determining fair compensation. While existing data valuation methods have proven effective in evaluating the value of Euclidean data, they face limitations when applied to the increasingly popular graph-structured data. Particularly, graph data valuation introduces unique challenges, primarily stemming from the intricate dependencies among nodes and the exponential growth in value estimation costs. To address the challenging problem of graph data valuation, we put forth an innovative solution, Precedence-Constrained Winter (PC-Winter) Value, to account for the complex graph structure. Furthermore, we develop a variety of strategies to address the computational challenges and enable efficient approximation of PC-Winter. Extensive experiments demonstrate the effectiveness of PC-Winter across diverse datasets and tasks.", "keywords": "Data Valuation;Graph Learning;Graph neural network", "primary_area": "", "supplementary_material": "", "author": "Hongliang Chi;Wei Jin;Charu C. Aggarwal;Yao Ma", "authorids": "~Hongliang_Chi1;~Wei_Jin4;~Charu_C._Aggarwal2;~Yao_Ma3", "gender": "M;;M;M", "homepage": "https://frankhlchi.github.io/;http://www.cs.emory.edu/~wjin30/;http://www.charuaggarwal.net;https://yaoma24.github.io/", "dblp": "326/5244;66/2173-9;a/CharuCAggarwal;212/7871.html", "google_scholar": "Q8S5vXsAAAAJ;eWow24EAAAAJ;x_wsduUAAAAJ;wf9TTOIAAAAJ", "orcid": ";;0000-0003-2579-7581;", "linkedin": "frank-hongliang-chi/;;;", "or_profile": "~Hongliang_Chi1;~Wei_Jin4;~Charu_C._Aggarwal2;~Yao_Ma3", "aff": "Rensselaer Polytechnic Institute;Emory University;International Business Machines;Rensselaer Polytechnic Institute", "aff_domain": "rpi.edu;emory.edu;ibm.com;rpi.edu", "position": "PhD student;Assistant Professor;Distinguished Research Staff Member;Assistant Professor", "bibtex": "@misc{\nanonymous2024precedenceconstrained,\ntitle={Precedence-Constrained Winter Value for Effective Graph Data Valuation},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=rdv2Fr6JTC}\n}", "github": "", "project": "", "reviewers": "DXbo;C9gV;deQC;2sH2", "site": "https://openreview.net/forum?id=rdv2Fr6JTC", "pdf_size": 1820762, "rating": "5;6;7;9", "confidence": "3;4;4;4", "wc_summary_and_contributions": "78;69;30;56", "wc_strengths": "3;83;2;238", "wc_improvement": "271;57;2;371", "wc_limitations": "1;21;197;26", "wc_correctness": "1;60;186;21", "wc_clarity": "1;59;98;105", "wc_relation_to_prior_work": "1;27;62;40", "wc_documentation": "1;89;99;20", "wc_additional_feedback": "1;1;1;1", "wc_review": "358;466;677;878", "wc_reply_reviewers": "0;10;367;0", "wc_reply_authors": "91;80;257;66", "reply_reviewers": "0;1;2;0", "reply_authors": "3;4;6;3", "rating_avg": [ 6.75, 1.479019945774904 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 58.25, 18.08832496390973 ], "wc_strengths_avg": [ 81.5, 96.14702283482313 ], "wc_improvement_avg": [ 175.25, 151.23223036112375 ], "wc_limitations_avg": [ 61.25, 78.93153678980285 ], "wc_correctness_avg": [ 67.0, 71.9061888852413 ], "wc_clarity_avg": [ 65.75, 41.287861412284364 ], "wc_relation_to_prior_work_avg": [ 32.5, 22.073740054644116 ], "wc_documentation_avg": [ 52.25, 42.43450836288786 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 594.75, 199.76407960391677 ], "wc_reply_reviewers_avg": [ 94.25, 157.5251963972748 ], "wc_reply_authors_avg": [ 123.5, 77.58382563395543 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 4.0, 1.224744871391589 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.6831300510639732, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1311315850307100539&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Rensselaer Polytechnic Institute;Emory University;International Business Machines Corporation", "aff_unique_dep": ";;", "aff_unique_url": "https://www.rpi.edu;https://www.emory.edu;https://www.ibm.com", "aff_unique_abbr": "RPI;Emory;IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Decision-Making Behavior Evaluation Framework for LLMs under Uncertain Context", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93425", "id": "re0ly2Ylcu", "proceeding": "", "pdf": "https://openreview.net/pdf?id=re0ly2Ylcu", "openreview": "https://openreview.net/forum?id=re0ly2Ylcu", "poster": "/media/PosterPDFs/NeurIPS%202024/93425.png?t=1732158344.0276258", "project": "", "author_site": "Jingru (Jessica) Jia, Zehua Yuan, Junhao Pan, Paul McNamara, Deming Chen", "tldr": "", "abstract": "When making decisions under uncertainty, individuals often deviate from rational behavior, which can be evaluated across three dimensions: risk preference, probability weighting, and loss aversion. Given the widespread use of large language models (LLMs) in supporting decision-making processes, it is crucial to assess whether their behavior aligns with human norms and ethical expectations or exhibits potential biases. Although several empirical studies have investigated the rationality and social behavior performance of LLMs, their internal decision-making tendencies and capabilities remain inadequately understood. This paper proposes a framework, grounded in behavioral economics theories, to evaluate the decision-making behaviors of LLMs. With a multiple-choice-list experiment, we initially estimate the degree of risk preference, probability weighting, and loss aversion in a context-free setting for three commercial LLMs: ChatGPT-4.0-Turbo, Claude-3-Opus, and Gemini-1.0-pro. Our results reveal that LLMs generally exhibit patterns similar to humans, such as risk aversion and loss aversion, with a tendency to overweight small probabilities, but there are significant variations in the degree to which these behaviors are expressed across different LLMs. Further, we explore their behavior when embedded with socio-demographic features of human beings, uncovering significant disparities across various demographic characteristics.", "keywords": "Large Language Model (LLM);LLM Reasoning;Fairness;Ethical AI;AI Decision-Making;LLM Prompting", "primary_area": "interpretability_and_explainability", "supplementary_material": "/attachment/d617cc2ce5b176fb5e7343d332d8b224e16f42e1.zip", "author": "Jingru Jia;Zehua Yuan;Junhao Pan;Paul E McNamara;Deming Chen", "authorids": "~Jingru_Jia1;~Zehua_Yuan1;~Junhao_Pan1;~Paul_E_McNamara1;~Deming_Chen1", "gender": "F;M;M;M;", "homepage": ";;;;", "dblp": ";;;;", "google_scholar": ";;O0wkdHYAAAAJ;https://scholar.google.com/citations?hl=en;", "orcid": "0000-0003-0449-5090;;;;", "linkedin": ";neoyuan;;;", "or_profile": "~Jingru_Jia1;~Zehua_Yuan1;~Junhao_Pan1;~Paul_E_McNamara1;~Deming_Chen1", "aff": "University of Illinois, Urbana Champaign;University of Illinois at Urbana Champaign;University of Illinois at Urbana Champaign;University of Illinois, Urbana Champaign;", "aff_domain": "illinois.edu;illinois.edu;illinois.edu;illinois.edu;", "position": "PhD student;PhD student;PhD student;Full Professor;", "bibtex": "@inproceedings{\njia2024decisionmaking,\ntitle={Decision-Making Behavior Evaluation Framework for {LLM}s under Uncertain Context},\nauthor={Jingru Jia and Zehua Yuan and Junhao Pan and Paul E McNamara and Deming Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=re0ly2Ylcu}\n}", "github": "", "reviewers": "YNJ7;gKiM;hBFU;1hfj", "pdf_size": 1677446, "rating": "4;4;6;6", "confidence": "3;3;4;4", "soundness": "3;2;4;3", "novelty": "3;2;3;2", "presentation": "3;2;4;4", "wc_summary": "89;49;78;136", "wc_strengths": "80;11;67;63", "wc_weaknesses": "115;68;292;212", "wc_questions": "4;79;155;1", "wc_limitations": "1;4;34;1", "wc_review": "289;211;626;413", "wc_reply_reviewers": "0;0;67;72", "wc_reply_authors": "0;0;32;35", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;2;2", "rating_avg": [ 5.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 88.0, 31.32890039564108 ], "wc_strengths_avg": [ 55.25, 26.309456474811487 ], "wc_weaknesses_avg": [ 171.75, 86.69594857892726 ], "wc_questions_avg": [ 59.75, 63.25098813457384 ], "wc_limitations_avg": [ 10.0, 13.910427743243556 ], "wc_review_avg": [ 384.75, 156.809398634138 ], "wc_reply_reviewers_avg": [ 34.75, 34.79493497622894 ], "wc_reply_authors_avg": [ 16.75, 16.78354849249705 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6803250294616240783&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "illinois.edu;illinois.edu;illinois.edu;illinois.edu;", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Illinois Urbana-Champaign", "aff_unique_dep": "", "aff_unique_url": "https://illinois.edu", "aff_unique_abbr": "UIUC", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Urbana-Champaign", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "MIDGArD: Modular Interpretable Diffusion over Graphs for Articulated Designs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93424", "id": "re2jPCnzkA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=re2jPCnzkA", "openreview": "https://openreview.net/forum?id=re2jPCnzkA", "poster": "/media/PosterPDFs/NeurIPS%202024/93424.png?t=1730214352.3875637", "project": "", "author_site": "Quentin Leboutet, Nina Wiedemann, zhipeng cai, Michael Paulitsch, Kai Yuan", "tldr": "", "abstract": "Providing functionality through articulation and interaction with objects is a key objective in 3D generation. We introduce MIDGArD (Modular Interpretable Diffusion over Graphs for Articulated Designs), a novel diffusion-based framework for articulated 3D asset generation. MIDGArD improves over foundational work in the field by enhancing quality, consistency, and controllability in the generation process. This is achieved through MIDGArD's modular approach that separates the problem into two primary components: structure generation and shape generation. The structure generation module of MIDGArD aims at producing coherent articulation features from noisy or incomplete inputs. It acts on the object's structural and kinematic attributes, represented as features of a graph that are being progressively denoised to issue coherent and interpretable articulation solutions. This denoised graph then serves as an advanced conditioning mechanism for the shape generation module, a 3D generative model that populates each link of the articulated structure with consistent 3D meshes. Experiments show the superiority of MIDGArD on the quality, consistency, and interpretability of the generated assets. Importantly, the generated models are fully simulatable, i.e., can be seamlessly integrated into standard physics engines such as MuJoCo, broadening MIDGArD's applicability to fields such as digital content creation, meta realities, and robotics.", "keywords": "3D articulated objects;diffusion models;generative models", "primary_area": "generative_models", "supplementary_material": "/attachment/a0e6fcbf36a8a2576351826c2cc667744af2430c.zip", "author": "Quentin Leboutet;Nina Wiedemann;zhipeng cai;Michael Paulitsch;Kai Yuan", "authorids": "~Quentin_Leboutet1;~Nina_Wiedemann1;~zhipeng_cai3;~Michael_Paulitsch1;~Kai_Yuan1", "gender": "M;F;M;M;M", "homepage": "https://github.com/quentin-leboutet;;https://zhipengcai.github.io;;https://www.linkedin.com/in/kai-yuan/", "dblp": "192/7108;247/3414;;79/3385.html;", "google_scholar": "SfiqI4AAAAAJ;qC1JKzoAAAAJ;;https://scholar.google.com/citations?hl=en;8eLlbhMAAAAJ", "orcid": "0000-0002-8155-0965;0000-0002-8160-7634;;0000-0002-9241-5806;", "linkedin": "quentinleboutet/;https://ch.linkedin.com/in/nina-wiedemann-79866917b;;michael-paulitsch-b96bb61/;", "or_profile": "~Quentin_Leboutet1;~Nina_Wiedemann1;~zhipeng_cai3;~Michael_Paulitsch1;~Kai_Yuan1", "aff": "Intel;Department of Informatics, University of Zurich, University of Zurich;Intel;Intel;Intel", "aff_domain": "intel.com;ifi.uzh.ch;intel.com;intel.com;intel.com", "position": "Researcher;Intern;Researcher;Principal Researcher;Researcher", "bibtex": "@inproceedings{\nleboutet2024midgard,\ntitle={{MIDGA}rD: Modular Interpretable Diffusion over Graphs for Articulated Designs},\nauthor={Quentin Leboutet and Nina Wiedemann and zhipeng cai and Michael Paulitsch and Kai Yuan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=re2jPCnzkA}\n}", "github": "", "reviewers": "T3Uj;czmP;2qcf;jtcL", "pdf_size": 12036857, "rating": "3;5;7;7", "confidence": "5;4;3;4", "soundness": "1;3;3;3", "novelty": "1;2;4;3", "presentation": "2;2;3;3", "wc_summary": "154;64;149;104", "wc_strengths": "84;76;51;69", "wc_weaknesses": "298;153;104;55", "wc_questions": "154;113;1;4", "wc_limitations": "79;6;8;5", "wc_review": "769;412;313;237", "wc_reply_reviewers": "854;44;30;37", "wc_reply_authors": "795;43;20;6", "reply_reviewers": "2;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 5.5, 1.6583123951777 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.5, 1.118033988749895 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 117.75, 36.635877224382114 ], "wc_strengths_avg": [ 70.0, 12.186057606953941 ], "wc_weaknesses_avg": [ 152.5, 90.869411795169 ], "wc_questions_avg": [ 68.0, 67.09321873334144 ], "wc_limitations_avg": [ 24.5, 31.48412298286233 ], "wc_review_avg": [ 432.75, 203.80919385542938 ], "wc_reply_reviewers_avg": [ 241.25, 353.80600263421195 ], "wc_reply_authors_avg": [ 216.0, 334.54670824863905 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8528028654224417, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:lTMnyh3BtcsJ:scholar.google.com/&scioq=MIDGArD:+Modular+Interpretable+Diffusion+over+Graphs+for+Articulated+Designs&hl=en&as_sdt=0,33", "gs_version_total": 2, "email": "intel.com;ifi.uzh.ch;intel.com;intel.com;intel.com", "author_num": 5, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "Intel;University of Zurich", "aff_unique_dep": "Intel Corporation;Department of Informatics", "aff_unique_url": "https://www.intel.com;https://www.uzh.ch", "aff_unique_abbr": "Intel;UZH", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "United States;Switzerland" }, { "title": "Aligning to Thousands of Preferences via System Message Generalization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93423", "id": "recsheQ7e8", "proceeding": "", "pdf": "https://openreview.net/pdf?id=recsheQ7e8", "openreview": "https://openreview.net/forum?id=recsheQ7e8", "poster": "/media/PosterPDFs/NeurIPS%202024/93423.png?t=1731754348.7429128", "project": "", "author_site": "Seongyun Lee, Sue Hyun Park, Seungone Kim, Minjoon Seo", "tldr": "", "abstract": "Although humans inherently have diverse values, current large language model (LLM) alignment methods often assume that aligning LLMs with the general public\u2019s preferences is optimal. A major challenge in adopting a more individualized approach to LLM alignment is its lack of scalability, as it involves repeatedly acquiring preference data and training new reward models and LLMs for each individual\u2019s preferences. To address these challenges, we propose a new paradigm where users specify what they value most within the system message, steering the LLM\u2019s generation behavior to better align with the user\u2019s intentions. However, a naive application of such an approach is non-trivial since LLMs are typically trained on a uniform system message (e.g., \u201cYou are a helpful assistant\u201d), which limits\ntheir ability to generalize to diverse, unseen system messages. To improve this generalization, we create Multifaceted Collection, augmenting 66k user instructions into 197k system messages through hierarchical user value combinations. Using this dataset, we train a 7B LLM called Janus and test it on 921 prompts from 5 benchmarks (AlpacaEval 2.0, FLASK, Koala, MT-Bench, and Self-Instruct)\nby adding system messages that reflect unseen user values. JANUS achieves tie+win rate of 75.2%, 72.4%, and 66.4% against Mistral 7B Instruct v0.2, GPT-3.5 Turbo, and GPT-4, respectively. Unexpectedly, on three benchmarks focused on response helpfulness (AlpacaEval 2.0, MT-Bench, Arena Hard Auto v0.1), JANUS also outperforms LLaMA 3 8B Instruct by a +4.0%p, +0.1%p, +3.0%p margin, underscoring that training with a vast array of system messages could also enhance alignment to the general public\u2019s preference as well. Our code, dataset, benchmark, and models are available at https://lklab.kaist.ac.kr/Janus/.", "keywords": "Large language model;Preference alignment;Pluralistic alignment;Multifacetedness;System message;Instruction tuning", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/9fa68d543d9376fc0b3f75cf52f1b1191581619b.zip", "author": "Seongyun Lee;Sue Hyun Park;Seungone Kim;Minjoon Seo", "authorids": "~Seongyun_Lee1;~Sue_Hyun_Park1;~Seungone_Kim1;~Minjoon_Seo1", "gender": "M;F;M;M", "homepage": ";https://suehyunpark.github.io/;https://github.com/SeungoneKim;https://seominjoon.github.io", "dblp": "339/6646;360/6203;324/2064.html;149/1367", "google_scholar": ";AyZdps8AAAAJ;https://scholar.google.co.kr/citations?user=qEf3e3EAAAAJ;zYze5fIAAAAJ", "orcid": ";0009-0000-7528-4042;;", "linkedin": "seongyun-lee-647753233/;sue-hyun-park/;seungone-kim-09b551264/;minjoon-seo/", "or_profile": "~Seongyun_Lee1;~Sue_Hyun_Park1;~Seungone_Kim1;~Minjoon_Seo1", "aff": "Korea University;Korea Advanced Institute of Science & Technology;KAIST;Twelve Labs", "aff_domain": "korea.ac.kr;kaist.edu;ee.kaist.ac.kr;twelvelabs.io", "position": "Undergrad student;MS student;MS student;Chief Scientist", "bibtex": "@inproceedings{\nlee2024aligning,\ntitle={Aligning to Thousands of Preferences via System Message Generalization},\nauthor={Seongyun Lee and Sue Hyun Park and Seungone Kim and Minjoon Seo},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=recsheQ7e8}\n}", "github": "", "reviewers": "k2dN;eXeU;bEUV;uZmQ", "pdf_size": 4746386, "rating": "5;5;6;6", "confidence": "4;4;5;4", "soundness": "3;3;2;4", "novelty": "2;3;3;3", "presentation": "3;3;2;3", "wc_summary": "124;76;44;109", "wc_strengths": "50;110;50;73", "wc_weaknesses": "101;138;75;35", "wc_questions": "87;217;14;12", "wc_limitations": "46;1;1;6", "wc_review": "408;542;184;235", "wc_reply_reviewers": "130;17;36;15", "wc_reply_authors": "764;0;129;0", "reply_reviewers": "2;1;1;1", "reply_authors": "3;1;2;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 88.25, 30.889925542156945 ], "wc_strengths_avg": [ 70.75, 24.529319191530774 ], "wc_weaknesses_avg": [ 87.25, 37.56577564752257 ], "wc_questions_avg": [ 82.5, 83.3261663584735 ], "wc_limitations_avg": [ 13.5, 18.874586088176873 ], "wc_review_avg": [ 342.25, 142.09921709847666 ], "wc_reply_reviewers_avg": [ 49.5, 47.193749586147526 ], "wc_reply_authors_avg": [ 223.25, 316.61283533678795 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7071489280693284905&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "korea.ac.kr;kaist.edu;ee.kaist.ac.kr;twelvelabs.io", "author_num": 4, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Korea University;Korea Advanced Institute of Science and Technology;Twelve Labs", "aff_unique_dep": ";;", "aff_unique_url": "https://www.korea.ac.kr;https://www.kaist.ac.kr;https://twelvelabs.com", "aff_unique_abbr": "KU;KAIST;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "South Korea;United States" }, { "title": "SS3DM: Benchmarking Street-View Surface Reconstruction with a Synthetic 3D Mesh Dataset", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97483", "id": "rfbSL1qXN3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=rfbSL1qXN3", "openreview": "https://openreview.net/forum?id=rfbSL1qXN3", "poster": "/media/PosterPDFs/NeurIPS%202024/97483.png?t=1730717447.5630913", "project": "", "author_site": "Yubin Hu, Kairui Wen, Heng Zhou, Xiaoyang Guo, Yong-jin Liu", "tldr": "", "abstract": "Reconstructing accurate 3D surfaces for street-view scenarios is crucial for applications such as digital entertainment and autonomous driving simulation. However, existing street-view datasets, including KITTI, Waymo, and nuScenes, only offer noisy LiDAR points as ground-truth data for geometric evaluation of reconstructed surfaces. These geometric ground-truths often lack the necessary precision to evaluate surface positions and do not provide data for assessing surface normals. To overcome these challenges, we introduce the SS3DM dataset, comprising precise \\textbf{S}ynthetic \\textbf{S}treet-view \\textbf{3D} \\textbf{M}esh models exported from the CARLA simulator. These mesh models facilitate accurate position evaluation and include normal vectors for evaluating surface normal. \n To simulate the input data in realistic driving scenarios for 3D reconstruction, we virtually drive a vehicle equipped with six RGB cameras and five LiDAR sensors in diverse outdoor scenes.\n Leveraging this dataset, we establish a benchmark for state-of-the-art surface reconstruction methods, providing a comprehensive evaluation of the associated challenges. \n For more information, visit our homepage at https://ss3dm.top.", "keywords": "Street-view surface reconstruction;Mesh models", "primary_area": "", "supplementary_material": "/attachment/df788527fdc967deed7120adb887c063e533f559.zip", "author": "Yubin Hu;Kairui Wen;Heng Zhou;Xiaoyang Guo;Yong-jin Liu", "authorids": "~Yubin_Hu1;~Kairui_Wen1;~Heng_Zhou3;~Xiaoyang_Guo1;~Yong-jin_Liu1", "gender": "M;M;M;M;M", "homepage": ";https://bluenine9.github.io/;https://henggg.cn;https://xy-guo.github.io/;https://cg.cs.tsinghua.edu.cn/people/~Yongjin/Yongjin.htm", "dblp": "266/8226-1;;;183/5605;27/2098", "google_scholar": "swN2J1QAAAAJ;;;CrK4w4UAAAAJ;https://scholar.google.com.tw/citations?user=GNDtwWQAAAAJ", "orcid": "0000-0001-6107-2858;;;0000-0001-8265-7441;0000-0001-5774-1916", "linkedin": ";;;;", "or_profile": "~Yubin_Hu1;~Kairui_Wen1;~Heng_Zhou3;~Xiaoyang_Guo1;~Yong-jin_Liu1", "aff": "Tsinghua University;Tsinghua University;Beijing University of Posts and Telecommunications;Horizon Robotics;Tsinghua University", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;bupt.edu.cn;horizon.cc;tsinghua.edu.cn", "position": "PhD student;Undergrad student;Undergrad student;Researcher;Full Professor", "bibtex": "@inproceedings{\nhu2024ssdm,\ntitle={{SS}3{DM}: Benchmarking Street-View Surface Reconstruction with a Synthetic 3D Mesh Dataset},\nauthor={Yubin Hu and Kairui Wen and Heng Zhou and Xiaoyang Guo and Yong-jin Liu},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=rfbSL1qXN3}\n}", "github": "", "reviewers": "3RDx;Mh8j;1uab", "pdf_size": 45822973, "rating": "5;7;7", "confidence": "4;3;4", "wc_summary_and_contributions": "98;112;57", "wc_strengths": "19;94;144", "wc_improvement": "42;90;146", "wc_limitations": "1;1;14", "wc_correctness": "1;1;36", "wc_clarity": "1;1;58", "wc_relation_to_prior_work": "1;1;17", "wc_documentation": "1;1;27", "wc_additional_feedback": "1;1;1", "wc_review": "165;302;500", "wc_reply_reviewers": "0;0;16", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;1", "reply_authors": "1;1;1", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 89.0, 23.338094752285727 ], "wc_strengths_avg": [ 85.66666666666667, 51.37011669140814 ], "wc_improvement_avg": [ 92.66666666666667, 42.49967320135794 ], "wc_limitations_avg": [ 5.333333333333333, 6.128258770283413 ], "wc_correctness_avg": [ 12.666666666666666, 16.49915822768611 ], "wc_clarity_avg": [ 20.0, 26.870057685088806 ], "wc_relation_to_prior_work_avg": [ 6.333333333333333, 7.542472332656507 ], "wc_documentation_avg": [ 9.666666666666666, 12.256517540566826 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 322.3333333333333, 137.5168676522589 ], "wc_reply_reviewers_avg": [ 5.333333333333333, 7.542472332656507 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.49999999999999983, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15042384886202927089&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "tsinghua.edu.cn;tsinghua.edu.cn;bupt.edu.cn;horizon.cc;tsinghua.edu.cn", "author_num": 5, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Tsinghua University;Beijing University of Posts and Telecommunications;Horizon Robotics", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tsinghua.edu.cn;http://www.bupt.edu.cn/;https://www.horizon-robotics.com/", "aff_unique_abbr": "THU;BUPT;Horizon Robotics", "aff_campus_unique_index": "1", "aff_campus_unique": ";Beijing", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Discovering Sparsity Allocation for Layer-wise Pruning of Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93422", "id": "rgtrYVC9n4", "proceeding": "", "pdf": "https://openreview.net/pdf?id=rgtrYVC9n4", "openreview": "https://openreview.net/forum?id=rgtrYVC9n4", "poster": "/media/PosterPDFs/NeurIPS%202024/93422.png?t=1730253080.7111483", "project": "", "author_site": "Lujun Li, Peijie Dong, Zhenheng Tang, Xiang Liu, Qiang Wang, Wenhan Luo, Wei Xue, Qifeng Liu, Xiaowen Chu, Yike Guo", "tldr": "", "abstract": "In this paper, we present DSA, the first automated framework for discovering sparsity allocation schemes for layer-wise pruning in Large Language Models (LLMs). LLMs have become increasingly powerful, but their large parameter counts make them computationally expensive. Existing pruning methods for compressing LLMs primarily focus on evaluating redundancies and removing element-wise weights. However, these methods fail to allocate adaptive layer-wise sparsities, leading to performance degradation in challenging tasks. We observe that per-layer importance statistics can serve as allocation indications, but their effectiveness depends on the allocation function between layers. To address this issue, we develop an expression discovery framework to explore potential allocation strategies. Our allocation functions involve two steps: reducing element-wise metrics to per-layer importance scores, and modelling layer importance to sparsity ratios. To search for the most effective allocation function, we construct a search space consisting of pre-process, reduction, transform, and post-process operations. We leverage an evolutionary algorithm to perform crossover and mutation on superior candidates within the population, guided by performance evaluation. Finally, we seamlessly integrate our discovered functions into various uniform methods, resulting in significant performance improvements. We conduct extensive experiments on multiple challenging tasks such as arithmetic, knowledge reasoning, and multimodal benchmarks spanning GSM8K, MMLU, SQA, and VQA, demonstrating that our DSA method achieves significant performance gains on the LLaMA-1|2|3, Mistral, and OPT models. Notably, the LLaMA-1|2|3 model pruned by our DSA reaches 4.73\\%|6.18\\%|10.65\\% gain over the state-of-the-art techniques (e.g., Wanda and SparseGPT).", "keywords": "network pruning;layerwise sparsity allocation;large language models;model compression.", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Lujun Li;Peijie Dong;Zhenheng Tang;Xiang Liu;Qiang Wang;Wenhan Luo;Wei Xue;Qifeng Liu;Xiaowen Chu;Yike Guo", "authorids": "~Lujun_Li1;~Peijie_Dong1;~Zhenheng_Tang2;~Xiang_Liu10;~Qiang_Wang14;~Wenhan_Luo1;~Wei_Xue5;~Qifeng_Liu1;~Xiaowen_Chu2;~Yike_Guo1", "gender": ";M;;M;M;M;M;M;M;M", "homepage": ";https://pprp.github.io;;https://dominic789654.github.io/;http://faculty.hitsz.edu.cn/wangqiang;https://whluo.github.io/;http://www.wei-xue.com;;https://facultyprofiles.hkust-gz.edu.cn/faculty-personal-page/CHU-Xiaowen/xwchu;https://cse.hkust.edu.hk/admin/people/faculty/profile/yikeguo", "dblp": ";315/4734;;31/5736-1;64/5630-22;64/9877;;23/992.html;24/2536;g/YikeGuo", "google_scholar": ";TqS6s4gAAAAJ;;VtK5lwUAAAAJ;6YzjcNgAAAAJ;g20Q12MAAAAJ;77lSoywAAAAJ;scR1CXcAAAAJ;https://scholar.google.com.hk/citations?user=v4rX24EAAAAJ;https://scholar.google.com.tw/citations?user=-0q6cIYAAAAJ", "orcid": ";0000-0003-1952-4544;;;0000-0002-2986-967X;0000-0002-5697-4168;;0000-0001-6191-076X;0000-0001-9745-4372;0009-0005-8401-282X", "linkedin": ";;;;;wenhan-luo-a1843480/;;qifeng-liu-483b3227/;;", "or_profile": "~Lujun_Li1;~Peijie_Dong1;~Zhenheng_Tang2;~Xiang_Liu10;~Qiang_Wang14;~Wenhan_Luo1;~Wei_Xue5;~Qifeng_Liu1;~Xiaowen_Chu2;~Yike_Guo1", "aff": ";The Hong Kong University of Science and Technology (Guang Zhou);;Hong Kong University of Science and Technology (Guang Zhou));Harbin Institute of Technology, Shenzhen;Sun Yat-sen University;Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;Hong Kong University of Science and Technology (Guangzhou);Imperial College London", "aff_domain": ";connect.hkust-gz.edu.cn;;hkust-gz.edu.cn;hit.edu.cn;sysu.edu.cn;ust.hk;hkust.edu.hk;ust.hk;imperial.ac.uk", "position": ";Phd student;;PhD student;Assistant Professor;Associate Professor;Assistant Professor;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nli2024discovering,\ntitle={Discovering Sparsity Allocation for Layer-wise Pruning of Large Language Models},\nauthor={Lujun Li and Peijie Dong and Zhenheng Tang and Xiang Liu and Qiang Wang and Wenhan Luo and Wei Xue and Qifeng Liu and Xiaowen Chu and Yike Guo},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=rgtrYVC9n4}\n}", "github": "", "reviewers": "VeXf;1f5c;gYqu;s93u;of38", "pdf_size": 2148977, "rating": "4;5;6;6;6", "confidence": "3;2;4;2;4", "soundness": "2;3;3;3;3", "novelty": "2;3;3;2;3", "presentation": "2;2;3;3;3", "wc_summary": "55;65;27;28;64", "wc_strengths": "57;38;25;24;52", "wc_weaknesses": "103;112;76;67;139", "wc_questions": "19;2;81;3;73", "wc_limitations": "11;1;45;1;1", "wc_review": "245;218;254;123;329", "wc_reply_reviewers": "235;94;95;0;23", "wc_reply_authors": "1477;245;1059;370;229", "reply_reviewers": "1;1;2;0;1", "reply_authors": "6;4;6;5;4", "rating_avg": [ 5.4, 0.7999999999999999 ], "confidence_avg": [ 3.0, 0.8944271909999159 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 47.8, 16.939893742287758 ], "wc_strengths_avg": [ 39.2, 13.526270735128733 ], "wc_weaknesses_avg": [ 99.4, 25.834860169933183 ], "wc_questions_avg": [ 35.6, 34.430219284808516 ], "wc_limitations_avg": [ 11.8, 17.045820602130014 ], "wc_review_avg": [ 233.8, 66.51736615350912 ], "wc_reply_reviewers_avg": [ 89.4, 82.04047781430822 ], "wc_reply_authors_avg": [ 676.0, 503.48704054821513 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 5.0, 0.8944271909999159 ], "replies_avg": [ 39, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.2795084971874737, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3541189239333839644&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": ";connect.hkust-gz.edu.cn;;hkust-gz.edu.cn;hit.edu.cn;sysu.edu.cn;ust.hk;hkust.edu.hk;ust.hk;imperial.ac.uk", "author_num": 10, "aff_unique_index": "0;0;1;2;0;0;0;3", "aff_unique_norm": "Hong Kong University of Science and Technology;Harbin Institute of Technology;Sun Yat-sen University;Imperial College London", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.ust.hk;http://en.hhit.edu.cn/;http://www.sysu.edu.cn/;https://www.imperial.ac.uk", "aff_unique_abbr": "HKUST;HIT;SYSU;ICL", "aff_campus_unique_index": "0;0;1;0;0;0", "aff_campus_unique": "Hong Kong SAR;Shenzhen;", "aff_country_unique_index": "0;0;0;0;0;0;0;1", "aff_country_unique": "China;United Kingdom" }, { "title": "Super Consistency of Neural Network Landscapes and Learning Rate Transfer", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93421", "id": "rgwhJ7INtZ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=rgwhJ7INtZ", "openreview": "https://openreview.net/forum?id=rgwhJ7INtZ", "poster": "", "project": "", "author_site": "Lorenzo Noci, Alexandru Meterez, Thomas Hofmann, Antonio Orvieto", "tldr": "", "abstract": "Recently, there has been growing evidence that if the width and depth of a neural network are scaled toward the so-called rich feature learning limit ($\\mu$P and its depth extension), then some hyperparameters --- such as the learning rate --- exhibit transfer from small to very large models. From an optimization perspective, this phenomenon is puzzling, as it implies that the loss landscape is consistently similar across very different model sizes. In this work, we study the landscape through the lens of the Hessian, with a focus on its largest eigenvalue (i.e. the sharpness), and find that certain spectral properties under $\\mu$P are largely independent of the width and depth of the network along the training trajectory. We name this property *super consistency* of the landscape. On the other hand, we show that in the Neural Tangent Kernel (NTK) and other scaling regimes, the sharpness exhibits very different dynamics at different scales. But what causes these differences in the sharpness dynamics? Through a connection between the Hessian's and the NTK's spectrum, we argue that the cause lies in the presence (for $\\mu$P) or progressive absence (for the NTK scaling) of feature learning.\nWe corroborate our claims with a substantial suite of experiments, covering a wide range of datasets and architectures: from ResNets and Vision Transformers trained on benchmark vision datasets to Transformers-based language models trained on WikiText.", "keywords": "mup;deep learning theory;optimization theory;edge of stability;NTK", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Lorenzo Noci;Alexandru Meterez;Thomas Hofmann;Antonio Orvieto", "authorids": "~Lorenzo_Noci1;~Alexandru_Meterez1;~Thomas_Hofmann1;~Antonio_Orvieto3", "gender": "M;M;M;M", "homepage": ";https://alexandrumeterez.github.io/;http://www.da.inf.ethz.ch/;http://orvi.altervista.org/", "dblp": "268/6839;;h/ThHofmann;", "google_scholar": ";wSrCMa4AAAAJ;T3hAyLkAAAAJ;xkuLyHoAAAAJ", "orcid": ";;;", "linkedin": "lorenzo-noci-97aa59130;;thomas-hofmann-1ab2402/;antonio-orvieto-947ab0130/", "or_profile": "~Lorenzo_Noci1;~Alexandru_Meterez1;~Thomas_Hofmann1;~Antonio_Orvieto3", "aff": "ETHZ - ETH Zurich;Max Planck Institute for Intelligent Systems, Max-Planck Institute;Swiss Federal Institute of Technology;ELLIS Institute T\u00fcbingen, Max Planck Institute for Intelligent Systems, T\u00fcbingen AI Center, T\u00fcbingen, Germany", "aff_domain": "ethz.ch;tue.mpg.de;ethz.ch;tue.ellis.eu", "position": "PhD student;Intern;Full Professor;Principal Researcher", "bibtex": "@inproceedings{\nnoci2024super,\ntitle={Super Consistency of Neural Network Landscapes and Learning Rate Transfer},\nauthor={Lorenzo Noci and Alexandru Meterez and Thomas Hofmann and Antonio Orvieto},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=rgwhJ7INtZ}\n}", "github": "", "reviewers": "11Ly;47kG;f2zq;ssWb", "pdf_size": 2833056, "rating": "6;7;7;8", "confidence": "4;3;5;2", "soundness": "3;3;2;4", "novelty": "3;3;3;4", "presentation": "3;3;1;4", "wc_summary": "56;156;78;68", "wc_strengths": "61;52;115;61", "wc_weaknesses": "26;86;584;13", "wc_questions": "16;88;91;63", "wc_limitations": "23;4;29;10", "wc_review": "182;386;897;215", "wc_reply_reviewers": "14;150;1729;6", "wc_reply_authors": "0;110;1521;0", "reply_reviewers": "1;1;7;1", "reply_authors": "1;2;7;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 1.0897247358851685 ], "wc_summary_avg": [ 89.5, 39.17588543989785 ], "wc_strengths_avg": [ 72.25, 24.953707139421187 ], "wc_weaknesses_avg": [ 177.25, 236.4459504834033 ], "wc_questions_avg": [ 64.5, 30.03747659175118 ], "wc_limitations_avg": [ 16.5, 9.962429422585638 ], "wc_review_avg": [ 420.0, 286.0742910504193 ], "wc_reply_reviewers_avg": [ 474.75, 726.3991241046481 ], "wc_reply_authors_avg": [ 407.75, 644.302093353731 ], "reply_reviewers_avg": [ 2.5, 2.598076211353316 ], "reply_authors_avg": [ 2.75, 2.48746859276655 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.6324555320336759, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2828780928130069126&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "ethz.ch;tue.mpg.de;ethz.ch;tue.ellis.eu", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "ETH Zurich;Max Planck Institute for Intelligent Systems;Swiss Federal Institute of Technology;ELLIS Institute T\u00fcbingen", "aff_unique_dep": ";Intelligent Systems;;", "aff_unique_url": "https://www.ethz.ch;https://www.mpi-is.mpg.de;https://www.ethz.ch;", "aff_unique_abbr": "ETHZ;MPI-IS;ETH Zurich;", "aff_campus_unique_index": "1", "aff_campus_unique": ";T\u00fcbingen", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "Switzerland;Germany" }, { "title": "Reranking Laws for Language Generation: A Communication-Theoretic Perspective", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93420", "id": "rhCgizNupi", "proceeding": "", "pdf": "https://openreview.net/pdf?id=rhCgizNupi", "openreview": "https://openreview.net/forum?id=rhCgizNupi", "poster": "/media/PosterPDFs/NeurIPS%202024/93420.png?t=1732027101.0922337", "project": "", "author_site": "Ant\u00f3nio Farinhas, Xiaocheng Li, Andr\u00e9 Martins", "tldr": "", "abstract": "To ensure large language models (LLMs) are used safely, one must reduce their propensity to hallucinate or to generate unacceptable answers. A simple and often used strategy is to first let the LLM generate multiple hypotheses and then employ a reranker to choose the best one. In this paper, we draw a parallel between this strategy and the use of redundancy to decrease the error rate in noisy communication channels. We conceptualize the generator as a sender transmitting multiple descriptions of a message through parallel noisy channels. The receiver decodes the message by ranking the (potentially corrupted) descriptions and selecting the one found to be most reliable. We provide conditions under which this protocol is asymptotically error-free (i.e., yields an acceptable answer almost surely) even in scenarios where the reranker is imperfect (governed by Mallows or Zipf-Mandelbrot models) and the channel distributions are statistically dependent. We use our framework to obtain reranking laws which we validate empirically on two real-world tasks using LLMs: text-to-code generation with DeepSeek-Coder 7B and machine translation of medical data with TowerInstruct 13B.", "keywords": "language generation;reranking;communication theory;reliability", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Ant\u00f3nio Farinhas;Haau-Sing Li;Andre Martins", "authorids": "~Ant\u00f3nio_Farinhas1;~Haau-Sing_Li1;~Andre_Martins1", "gender": "M;M;M", "homepage": ";;https://andre-martins.github.io/", "dblp": "267/5345;;m/AndreFTMartins", "google_scholar": "yK5wIPkAAAAJ;ITrvoewAAAAJ;https://scholar.google.pt/citations?user=mT7ppvwAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Ant\u00f3nio_Farinhas1;~Haau-Sing_Li1;~Andre_Martins1", "aff": "Instituto Superior T\u00e9cnico;Instituto de Telecomunica\u00e7\u00f5es, Portugal;Unbabel", "aff_domain": "tecnico.ulisboa.pt;it.pt;unbabel.com", "position": "PhD student;PhD Exchange Student;Research Scientist", "bibtex": "@inproceedings{\nfarinhas2024reranking,\ntitle={Reranking Laws for Language Generation: A Communication-Theoretic Perspective},\nauthor={Ant{\\'o}nio Farinhas and Haau-Sing Li and Andre Martins},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=rhCgizNupi}\n}", "github": "", "reviewers": "8my9;aDZG;EFsu;o8DD", "pdf_size": 789990, "rating": "5;7;7;8", "confidence": "4;3;4;4", "soundness": "3;3;4;4", "novelty": "3;3;3;3", "presentation": "3;3;4;4", "wc_summary": "70;154;167;39", "wc_strengths": "32;51;81;90", "wc_weaknesses": "104;155;219;50", "wc_questions": "4;183;70;37", "wc_limitations": "1;10;33;4", "wc_review": "211;553;570;220", "wc_reply_reviewers": "15;57;0;0", "wc_reply_authors": "0;0;283;0", "reply_reviewers": "1;1;0;0", "reply_authors": "1;1;2;1", "rating_avg": [ 6.75, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 107.5, 54.31620384378864 ], "wc_strengths_avg": [ 63.5, 23.221757039466244 ], "wc_weaknesses_avg": [ 132.0, 62.46198844097104 ], "wc_questions_avg": [ 73.5, 67.38879728857016 ], "wc_limitations_avg": [ 12.0, 12.549900398011133 ], "wc_review_avg": [ 388.5, 173.13361891903028 ], "wc_reply_reviewers_avg": [ 18.0, 23.33452377915607 ], "wc_reply_authors_avg": [ 70.75, 122.54259463549806 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.13245323570650439, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16815324958708795662&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "tecnico.ulisboa.pt;it.pt;unbabel.com", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Instituto Superior T\u00e9cnico;Instituto de Telecomunica\u00e7\u00f5es;Unbabel", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ist.utl.pt;https://www.it.pt;https://www.unbabel.com", "aff_unique_abbr": "IST;;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Portugal" }, { "title": "Moving Off-the-Grid: Scene-Grounded Video Representations", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93419", "id": "rjSPDVdUaw", "proceeding": "", "pdf": "https://openreview.net/pdf?id=rjSPDVdUaw", "openreview": "https://openreview.net/forum?id=rjSPDVdUaw", "poster": "", "project": "", "author_site": "Sjoerd van Steenkiste, Daniel Zoran, Yi Yang, Yulia Rubanova, Rishabh Kabra, Carl Doersch, Dilara Gokay, joseph heyward, Etienne Pot, Klaus Greff, Drew Hudson, Thomas Keck, Joao Carreira, Alexey Dosovitskiy, Mehdi S. M. Sajjadi, Thomas Kipf", "tldr": "", "abstract": "Current vision models typically maintain a fixed correspondence between their representation structure and image space.\nEach layer comprises a set of tokens arranged \u201con-the-grid,\u201d which biases patches or tokens to encode information at a specific spatio(-temporal) location. In this work we present *Moving Off-the-Grid* (MooG), a self-supervised video representation model that offers an alternative approach, allowing tokens to move \u201coff-the-grid\u201d to better enable them to represent scene elements consistently, even as they move across the image plane through time. By using a combination of cross-attention and positional embeddings we disentangle the representation structure and image structure. We find that a simple self-supervised objective\u2014next frame prediction\u2014trained on video data, results in a set of latent tokens which bind to specific scene structures and track them as they move. We demonstrate the usefulness of MooG\u2019s learned representation both qualitatively and quantitatively by training readouts on top of the learned representation on a variety of downstream tasks. We show that MooG can provide a strong foundation for different vision tasks when compared to \u201con-the-grid\u201d baselines.", "keywords": "Self supervised learning;point tracking;representation learning", "primary_area": "machine_vision", "supplementary_material": "/attachment/5417043d61fa1dfc8a68e11ac50a1887c13cc1ec.zip", "author": "Sjoerd van Steenkiste;Daniel Zoran;Yi Yang;Yulia Rubanova;Rishabh Kabra;Carl Doersch;Dilara Gokay;Joseph Heyward;Etienne Pot;Klaus Greff;Drew A. Hudson;Thomas Albert Keck;Joao Carreira;Alexey Dosovitskiy;Mehdi S. M. Sajjadi;Thomas Kipf", "authorids": "~Sjoerd_van_Steenkiste1;~Daniel_Zoran1;~Yi_Yang10;~Yulia_Rubanova2;~Rishabh_Kabra1;~Carl_Doersch1;~Dilara_Gokay1;~Joseph_Heyward2;~Etienne_Pot1;~Klaus_Greff1;~Drew_Arad_Hudson1;~Thomas_Albert_Keck1;~Joao_Carreira1;~Alexey_Dosovitskiy1;~Mehdi_S._M._Sajjadi1;~Thomas_Kipf2", "gender": "M;M;M;M;M;F;M;M;M;F;M;M;;Unspecified;M;F", "homepage": "http://www.sjoerdvansteenkiste.com/;;https://yangyi02.github.io/;;;;https://uk.linkedin.com/in/joe-heyward-71623595;https://github.com/Conchylicultor;http://qwlouse.github.io/;;https://scholar.google.co.uk/citations?user=GRICvzoAAAAJ&hl=en;;;http://msajjadi.com;http://tkipf.github.io/;https://yuliarubanova.github.io/", "dblp": "183/9326;18/9054;33/4854-7;234/8010;12/8654;227/2867;;;76/11430;;;61/5621-1;135/4956.html;164/6190;186/8206;222/3085", "google_scholar": "i-AStBYAAAAJ;1JQDH_AAAAAJ;-BO7TXUAAAAJ;;SBTxvCoAAAAJ;cnbENAEAAAAJ;;https://scholar.google.ch/citations?user=e1GAF6sAAAAJ;https://scholar.google.ch/citations?user=OcownLgAAAAJ;;https://scholar.google.co.uk/citations?user=GRICvzoAAAAJ;https://scholar.google.pt/citations?user=IUZ-7_cAAAAJ;FXNJRDoAAAAJ;https://scholar.google.de/citations?user=rHF25YEAAAAJ;83HL5FwAAAAJ;u_HzE9wAAAAJ", "orcid": ";;;;;;;;0000-0001-6982-0937;;;;;0000-0002-6002-2370;;", "linkedin": ";;;;;dilaragokay/;https://linkedin.com/in/joe-heyward-71623595;;;drew-arad-hudson-b3a71348/;;jo%C3%A3o-carreira-56238a7/;;;thomas-kipf-6b260410a;https://linkedin.com/in/yulia-rubanova-031702100", "or_profile": "~Sjoerd_van_Steenkiste1;~Daniel_Zoran1;~Yi_Yang10;~Rishabh_Kabra1;~Carl_Doersch1;~Dilara_Gokay1;~Joseph_Heyward2;~Etienne_Pot1;~Klaus_Greff1;~Drew_Arad_Hudson1;~Thomas_Albert_Keck1;~Joao_Carreira1;~Alexey_Dosovitskiy1;~Mehdi_S._M._Sajjadi1;~Thomas_N._Kipf1;~Yulia_Rubanova1", "aff": "Google;Google DeepMind;Google DeepMind;University College London, University of London;Google DeepMind;Google DeepMind;Google;Google;Google;Google DeepMind;Google DeepMind;Google DeepMind;Inceptive;Google DeepMind;Google;Google DeepMind", "aff_domain": "google.com;google.com;deepmind.com;ucl.ac.uk;google.com;google.com;google.com;google.com;google.com;google.com;deepmind.com;google.com;inceptive.team;google.com;google.com;deepmind.com", "position": "Researcher;Research Scientist;Researcher;PhD student;Research Scientist;Researcher;Researcher;Researcher;Researcher;Research Scientist;Researcher;Research Scientist;Researcher;Researcher;Research Scientist;Research Scientist", "bibtex": "@inproceedings{\nsteenkiste2024moving,\ntitle={Moving Off-the-Grid: Scene-Grounded Video Representations},\nauthor={Sjoerd van Steenkiste and Daniel Zoran and Yi Yang and Yulia Rubanova and Rishabh Kabra and Carl Doersch and Dilara Gokay and Joseph Heyward and Etienne Pot and Klaus Greff and Drew A. Hudson and Thomas Albert Keck and Joao Carreira and Alexey Dosovitskiy and Mehdi S. M. Sajjadi and Thomas Kipf},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=rjSPDVdUaw}\n}", "github": "", "reviewers": "ykZT;RPGH;r1sm;MZrN", "pdf_size": 9164212, "rating": "6;6;6;7", "confidence": "4;3;4;4", "soundness": "3;3;2;3", "novelty": "3;3;3;3", "presentation": "3;3;2;4", "wc_summary": "114;85;88;157", "wc_strengths": "45;77;19;195", "wc_weaknesses": "269;141;240;196", "wc_questions": "139;5;2;86", "wc_limitations": "4;16;1;31", "wc_review": "571;324;350;665", "wc_reply_reviewers": "16;0;141;23", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 111.0, 28.853076092507017 ], "wc_strengths_avg": [ 84.0, 67.29784543356496 ], "wc_weaknesses_avg": [ 211.5, 48.293374286748694 ], "wc_questions_avg": [ 58.0, 57.64113114781839 ], "wc_limitations_avg": [ 13.0, 11.811011811017716 ], "wc_review_avg": [ 477.5, 144.66945081806318 ], "wc_reply_reviewers_avg": [ 45.0, 56.049085630365106 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 16, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5198905383607532209&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "google.com;google.com;deepmind.com;ucl.ac.uk;google.com;google.com;google.com;google.com;google.com;google.com;deepmind.com;google.com;inceptive.team;google.com;google.com;deepmind.com", "author_num": 16, "aff_unique_index": "0;0;0;1;0;0;0;0;0;0;0;0;2;0;0;0", "aff_unique_norm": "Google;University College London;Inceptive", "aff_unique_dep": "Google;;", "aff_unique_url": "https://www.google.com;https://www.ucl.ac.uk;", "aff_unique_abbr": "Google;UCL;", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;1;1;1;1;1;0;0;0;1;1;1;1;0;1", "aff_country_unique": "United States;United Kingdom;" }, { "title": "Sequoia: Scalable and Robust Speculative Decoding", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93418", "id": "rk2L9YGDi2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=rk2L9YGDi2", "openreview": "https://openreview.net/forum?id=rk2L9YGDi2", "poster": "/media/PosterPDFs/NeurIPS%202024/93418.png?t=1733792005.6765432", "project": "", "author_site": "Zhuoming Chen, Avner May, Ruslan Svirschevski, Yu-Hsun Huang, Max Ryabinin, Zhihao Jia, Beidi Chen", "tldr": "", "abstract": "As the usage of large language models (LLMs) grows, it becomes increasingly important to serve them quickly and efficiently. While speculative decoding has recently emerged as a promising direction for accelerating LLM serving, existing methods are limited in their ability to scale to larger speculation budgets and adapt to different hyperparameters. This paper introduces Sequoia, a scalable and robust algorithm for speculative decoding. To improve scalability, Sequoia introduces a dynamic programming algorithm to find an optimal tree structure for the speculated tokens. To achieve robust speculative decoding, Sequoia uses a novel sampling and verification method that outperforms prior work across different decoding temperatures. Sequoia improves the decoding speed of Llama2-7B, Llama2-13B, and Vicuna-33B on an A100 GPU by up to $4.04\\times$, $3.73\\times$, and $2.27 \\times$. To serve Llama3-70B-Instruct on a single L40 GPU through offloading, Sequoia reduces the per-token decoding latency to 0.60 s/token, $9.5\\times$ faster than DeepSpeed-Zero-Inference.", "keywords": "LLM inference; Speculative Decoding", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/768ce5b70a01106626089286159218399d390b88.zip", "author": "Zhuoming Chen;Avner May;Ruslan Svirschevski;Yu-Hsun Huang;Max Ryabinin;Zhihao Jia;Beidi Chen", "authorids": "~Zhuoming_Chen1;~Avner_May1;~Ruslan_Svirschevski1;~Yu-Hsun_Huang1;~Max_Ryabinin1;~Zhihao_Jia2;~Beidi_Chen1", "gender": "M;M;;M;Not Specified;M;F", "homepage": ";https://avnermay.github.io/index.html;;;https://mryab.github.io/;https://www.cs.cmu.edu/~zhihaoj2/;https://www.andrew.cmu.edu/user/beidic/", "dblp": "226/5729;146/7842;;;276/0192;;192/1339", "google_scholar": "4Bb5KRYAAAAJ;Gx5baHUAAAAJ;;;930PERsAAAAJ;0IWLFR4AAAAJ;", "orcid": ";;;;;;", "linkedin": "zhuoming-chen-325075234/;avnermay/;;yuhsunhuang/;;;", "or_profile": "~Zhuoming_Chen1;~Avner_May1;~Ruslan_Svirschevski1;~Yu-Hsun_Huang1;~Max_Ryabinin1;~Zhihao_Jia2;~Beidi_Chen1", "aff": "Carnegie Mellon University;Together.ai;;Carnegie Mellon University;Together AI;Carnegie Mellon University;Meta Facebook", "aff_domain": "cmu.edu;together.ai;;cmu.edu;together.ai;cs.cmu.edu;fb.com", "position": "PhD student;Researcher;;MS student;Researcher;Assistant Professor;Researcher", "bibtex": "@inproceedings{\nchen2024sequoia,\ntitle={Sequoia: Scalable and Robust Speculative Decoding},\nauthor={Zhuoming Chen and Avner May and Ruslan Svirschevski and Yu-Hsun Huang and Max Ryabinin and Zhihao Jia and Beidi Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=rk2L9YGDi2}\n}", "github": "", "reviewers": "yRtD;qoEg;XEn9;uMCB", "pdf_size": 850115, "rating": "6;6;7;7", "confidence": "4;3;2;4", "soundness": "3;3;3;4", "novelty": "3;2;3;3", "presentation": "3;3;3;3", "wc_summary": "30;64;78;50", "wc_strengths": "38;30;55;45", "wc_weaknesses": "44;26;35;134", "wc_questions": "25;59;48;127", "wc_limitations": "12;8;4;6", "wc_review": "149;187;220;362", "wc_reply_reviewers": "12;0;0;65", "wc_reply_authors": "0;0;0;166", "reply_reviewers": "1;0;0;1", "reply_authors": "1;1;1;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 55.5, 17.741194999210173 ], "wc_strengths_avg": [ 42.0, 9.192388155425117 ], "wc_weaknesses_avg": [ 59.75, 43.338060639581 ], "wc_questions_avg": [ 64.75, 37.97614382740828 ], "wc_limitations_avg": [ 7.5, 2.958039891549808 ], "wc_review_avg": [ 229.5, 80.51863138429515 ], "wc_reply_reviewers_avg": [ 19.25, 26.864242032858474 ], "wc_reply_authors_avg": [ 41.5, 71.88010851410841 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2960246930072125435&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "cmu.edu;together.ai;;cmu.edu;together.ai;cs.cmu.edu;fb.com", "author_num": 7, "aff_unique_index": "0;1;0;2;0;3", "aff_unique_norm": "Carnegie Mellon University;Together.ai;Together AI;Meta", "aff_unique_dep": ";;;Meta Platforms, Inc.", "aff_unique_url": "https://www.cmu.edu;https://www.together.ai;https://www.together.ai;https://meta.com", "aff_unique_abbr": "CMU;Together.ai;Together AI;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Distributed Least Squares in Small Space via Sketching and Bias Reduction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93417", "id": "rkuVYosT2c", "proceeding": "", "pdf": "https://openreview.net/pdf?id=rkuVYosT2c", "openreview": "https://openreview.net/forum?id=rkuVYosT2c", "poster": "/media/PosterPDFs/NeurIPS%202024/93417.png?t=1731701696.7072194", "project": "", "author_site": "Sachin Garg, Kevin Tan, Michal Derezinski", "tldr": "", "abstract": "Matrix sketching is a powerful tool for reducing the size of large data matrices. Yet there are fundamental limitations to this size reduction when we want to recover an accurate estimator for a task such as least square regression. We show that these limitations can be circumvented in the distributed setting by designing sketching methods that minimize the bias of the estimator, rather than its error. In particular, we give a sparse sketching method running in optimal space and current matrix multiplication time, which recovers a nearly-unbiased least squares estimator using two passes over the data. This leads to new communication-efficient distributed averaging algorithms for least squares and related tasks, which directly improve on several prior approaches. Our key novelty is a new bias analysis for sketched least squares, giving a sharp characterization of its dependence on the sketch sparsity. The techniques include new higher moment restricted Bai-Silverstein inequalities, which are of independent interest to the non-asymptotic analysis of deterministic equivalents for random matrices that arise from sketching.", "keywords": "Matrix Sketching;Least squares;Randomized Linear Algebra;Random Matrix Theory", "primary_area": "learning_theory", "supplementary_material": "/attachment/7c924336f28f87f8135f7bef791234c4d3c455db.zip", "author": "Sachin Garg;Kevin Tan;Michal Derezinski", "authorids": "~Sachin_Garg2;~Kevin_Tan3;~Michal_Derezinski1", "gender": "M;M;M", "homepage": ";;https://web.eecs.umich.edu/~derezin/", "dblp": ";47/3097;155/1906", "google_scholar": "ryBr780AAAAJ;LJUS_7IAAAAJ;qhP66JAAAAAJ", "orcid": ";0009-0004-9844-5135;", "linkedin": ";hetankevin;", "or_profile": "~Sachin_Garg2;~Kevin_Tan3;~Michal_Derezinski1", "aff": "University of Michigan - Ann Arbor;Wharton Statistics Department, The Wharton School;University of Michigan - Ann Arbor", "aff_domain": "umich.edu;statistics.wharton.upenn.edu;umich.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\ngarg2024distributed,\ntitle={Distributed Least Squares in Small Space via Sketching and Bias Reduction},\nauthor={Sachin Garg and Kevin Tan and Michal Derezinski},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=rkuVYosT2c}\n}", "github": "", "reviewers": "8xrQ;h7iG;X2Ks;RJVb", "pdf_size": 2184989, "rating": "5;6;7;7", "confidence": "3;3;3;3", "soundness": "3;3;3;3", "novelty": "3;3;4;3", "presentation": "3;3;3;3", "wc_summary": "94;115;98;142", "wc_strengths": "71;119;218;33", "wc_weaknesses": "60;149;205;24", "wc_questions": "72;36;107;71", "wc_limitations": "1;41;1;1", "wc_review": "298;460;629;271", "wc_reply_reviewers": "23;109;25;10", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 112.25, 18.89940475253123 ], "wc_strengths_avg": [ 110.25, 69.27255950230222 ], "wc_weaknesses_avg": [ 109.5, 71.48601261785413 ], "wc_questions_avg": [ 71.5, 25.104780421266383 ], "wc_limitations_avg": [ 11.0, 17.320508075688775 ], "wc_review_avg": [ 414.5, 143.39194538048503 ], "wc_reply_reviewers_avg": [ 41.75, 39.251592324388575 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10006349917107625142&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "umich.edu;statistics.wharton.upenn.edu;umich.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Michigan;Wharton School", "aff_unique_dep": ";Wharton Statistics Department", "aff_unique_url": "https://www.umich.edu;https://www.wharton.upenn.edu", "aff_unique_abbr": "UM;Wharton", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Ann Arbor;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "OwMatch: Conditional Self-Labeling with Consistency for Open-World Semi-Supervised Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93416", "id": "rle9X7DQuH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=rle9X7DQuH", "openreview": "https://openreview.net/forum?id=rle9X7DQuH", "poster": "/media/PosterPDFs/NeurIPS%202024/93416.png?t=1731726685.3677871", "project": "", "author_site": "Shengjie Niu, Lifan Lin, Jian Huang, Chao Wang", "tldr": "", "abstract": "Semi-supervised learning (SSL) offers a robust framework for harnessing the potential of unannotated data. Traditionally, SSL mandates that all classes possess labeled instances. However, the emergence of open-world SSL (OwSSL) introduces a more practical challenge, wherein unlabeled data may encompass samples from unseen classes. This scenario leads to misclassification of unseen classes as known ones, consequently undermining classification accuracy. To overcome this challenge, this study revisits two methodologies from self-supervised and semi-supervised learning, self-labeling and consistency, tailoring them to address the OwSSL problem. Specifically, we propose an effective framework called _OwMatch_, combining conditional self-labeling and open-world hierarchical thresholding. Theoretically, we analyze the estimation of class distribution on unlabeled data through rigorous statistical analysis, thus demonstrating that OwMatch can ensure the unbiasedness of the label assignment estimator with reliability. Comprehensive empirical analyses demonstrate that our method yields substantial performance enhancements across both known and unknown classes in comparison to previous studies. Code is available at [https://github.com/niusj03/OwMatch](https://github.com/niusj03/OwMatch).", "keywords": "Open-world Semi-Supervised Learning;self-labeling;consistency loss", "primary_area": "machine_vision", "supplementary_material": "", "author": "Shengjie Niu;Lifan Lin;Jian Huang;Chao Wang", "authorids": "~Shengjie_Niu1;~Lifan_Lin1;~Jian_Huang5;~Chao_Wang13", "gender": "M;M;M;M", "homepage": "https://niusj03.github.io/;https://flammingfrost.github.io/;https://www.polyu.edu.hk/ama/people/academic-staff/prof-huang-jian/;https://wangcmath.github.io/", "dblp": ";;;", "google_scholar": ";;https://scholar.google.com/citations?hl=en;PBchRWYAAAAJ", "orcid": ";;0000-0002-5218-9269;", "linkedin": ";;;", "or_profile": "~Shengjie_Niu1;~Lifan_Lin1;~Jian_Huang5;~Chao_Wang13", "aff": "Hong Kong Polytechnic University;Southern University of Science and Technology;Hong Kong Polytechnic University;Southern University of Science and Technology", "aff_domain": "polyu.hk;mail.sustech.edu.cn;polyu.edu.hk;susteh.edu.cn", "position": "PhD student;Undergrad student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nniu2024owmatch,\ntitle={OwMatch: Conditional Self-Labeling with Consistency for Open-world Semi-Supervised Learning},\nauthor={Shengjie Niu and Lifan Lin and Jian Huang and Chao Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=rle9X7DQuH}\n}", "github": "", "reviewers": "cbPu;9vSt;dC6G;PbWT", "pdf_size": 2162227, "rating": "5;5;5;6", "confidence": "5;3;4;4", "soundness": "3;4;2;4", "novelty": "2;2;2;3", "presentation": "4;2;2;3", "wc_summary": "51;101;95;28", "wc_strengths": "34;33;114;24", "wc_weaknesses": "175;178;361;16", "wc_questions": "2;1;8;136", "wc_limitations": "1;22;10;2", "wc_review": "263;335;588;206", "wc_reply_reviewers": "18;0;20;19", "wc_reply_authors": "21;0;29;347", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;2;2", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 68.75, 30.433328769623607 ], "wc_strengths_avg": [ 51.25, 36.43744639790226 ], "wc_weaknesses_avg": [ 182.5, 122.12800661600926 ], "wc_questions_avg": [ 36.75, 57.36451429237417 ], "wc_limitations_avg": [ 8.75, 8.407585860400118 ], "wc_review_avg": [ 348.0, 145.90921835168606 ], "wc_reply_reviewers_avg": [ 14.25, 8.257572282456872 ], "wc_reply_authors_avg": [ 99.25, 143.43007878405422 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:FruPlIq4KcsJ:scholar.google.com/&scioq=OwMatch:+Conditional+Self-Labeling+with+Consistency+for+Open-World+Semi-Supervised+Learning&hl=en&as_sdt=0,10", "gs_version_total": 5, "email": "polyu.hk;mail.sustech.edu.cn;polyu.edu.hk;susteh.edu.cn", "author_num": 4, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "Hong Kong Polytechnic University;Southern University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.polyu.edu.hk;https://www.sustech.edu.cn", "aff_unique_abbr": "PolyU;SUSTech", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "DAPE: Data-Adaptive Positional Encoding for Length Extrapolation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93415", "id": "rnUEUbRxVu", "proceeding": "", "pdf": "https://openreview.net/pdf?id=rnUEUbRxVu", "openreview": "https://openreview.net/forum?id=rnUEUbRxVu", "poster": "/media/PosterPDFs/NeurIPS%202024/93415.png?t=1731389282.8771856", "project": "", "author_site": "Chuanyang Zheng, Yihang Gao, Han Shi, Minbin Huang, Jingyao Li, Jing Xiong, Xiaozhe Ren, Michael Ng, Xin Jiang, Zhenguo Li, Yu Li", "tldr": "", "abstract": "Positional encoding plays a crucial role in transformers, significantly impact- ing model performance and length generalization. Prior research has introduced absolute positional encoding (APE) and relative positional encoding (RPE) to distinguish token positions in given sequences. However, both APE and RPE remain fixed after model training regardless of input data, limiting their adaptability and flexibility. Hence, we expect that the desired positional encoding should be data-adaptive and can be dynamically adjusted with the given attention. In this paper, we propose a Data-Adaptive Positional Encoding (DAPE) method, which dynamically and semantically adjusts based on input context and learned fixed priors. Experimental validation on real-world datasets (Arxiv, Books3, and CHE) demonstrates that DAPE enhances model performances in terms of trained length and length generalization, where the improvements are statistically significant. The model visualization suggests that our model can keep both local and anti-local information. Finally, we successfully train the model on sequence length 128 and achieve better performance at evaluation sequence length 8192, compared with other static positional encoding methods, revealing the benefit of the adaptive positional encoding method.", "keywords": "Transformers;context-adaptive positional encoding;long context;length generalization", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/f2eea472521132e22428b4f574688a8db63733d9.zip", "author": "Chuanyang Zheng;Yihang Gao;Han Shi;Minbin Huang;Jingyao Li;Jing Xiong;Xiaozhe Ren;Michael Ng;Xin Jiang;Zhenguo Li;Yu Li", "authorids": "~Chuanyang_Zheng3;~Yihang_Gao1;~Han_Shi1;~Minbin_Huang1;~Jingyao_Li2;~Jing_Xiong4;~Xiaozhe_Ren1;~Michael_Ng4;~Xin_Jiang1;~Zhenguo_Li1;~Yu_Li1", "gender": "M;M;M;M;F;M;;M;M;M;M", "homepage": "https://chuanyang-zheng.github.io/;https://yihang-gao.github.io/;https://han-shi.github.io/;;https://julietljy.github.io/;;https://www.math.hkbu.edu.hk/~mng/;;http://www.ee.columbia.edu/~zgli/;https://sites.google.com/view/liyu1995;https://menik1126.github.io/", "dblp": ";;;318/2868;;248/7679.html;;42/4142-2;23/6479;;", "google_scholar": "LWwh7K4AAAAJ;https://scholar.google.com.hk/citations?user=wbVQ6okAAAAJ;https://scholar.google.com.hk/citations?user=Johp_14AAAAJ;DNEb94sAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;;DUfcez0AAAAJ;XboZC1AAAAAJ;8YHZx-AAAAAJ;https://scholar.google.com.hk/citations?user=dFX1hXkAAAAJ", "orcid": ";0000-0001-8883-7769;;;;0000-0002-0432-5510;;0000-0002-9117-8247;;0000-0002-3664-6722;0000-0003-2986-6978", "linkedin": ";;;https://linkedin.com/in/minbin-huang-76209516a;;;;xin-jiang-9577b76/;;yuli1995/;", "or_profile": "~Chuanyang_Zheng3;~Yihang_Gao1;~Han_Shi1;~Minbin_Huang1;~Jingyao_Li2;~Xiaozhe_Ren1;~Michael_Ng4;~Xin_Jiang1;~Zhenguo_Li1;~Yu_Li1;~jing_xiong3", "aff": "The Chinese University of Hong Kong;The University of Hong Kong;Huawei Technologies Ltd.;The Chinese University of Hong Kong;Department of Computer Science and Engineering, The Chinese University of Hong Kong;Noah's Ark Lab;Hong Kong Baptist University;Noah\u2019s Ark Lab, Huawei Technologies;Huawei Noah's Ark Lab;Department of Computer Science and Engineering, The Chinese University of Hong Kong;Sun Yat-Sen University", "aff_domain": "cse.cuhk.edu.hk;hku.hk;huawei.com;cuhk.edu.hk;cse.cuhk.edu.hk;huawei.com;hkbu.edu.hk;huawei.com;huawei.com;cse.cuhk.edu.hk;sysu.edu.cn", "position": "PhD student;PhD student;Principal Researcher;PhD student;PhD student;Researcher;Full Professor;Principal Researcher;Principal Researcher;Assistant Professor;MS student", "bibtex": "@inproceedings{\nzheng2024dape,\ntitle={{DAPE}: Data-Adaptive Positional Encoding for Length Extrapolation},\nauthor={Chuanyang Zheng and Yihang Gao and Han Shi and Minbin Huang and Jingyao Li and Jing Xiong and Xiaozhe Ren and Michael Ng and Xin Jiang and Zhenguo Li and Yu Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=rnUEUbRxVu}\n}", "github": "", "reviewers": "tTna;QhvW;CfZF;NzBk", "pdf_size": 42836935, "rating": "3;6;7;8", "confidence": "5;5;3;3", "soundness": "2;3;3;2", "novelty": "2;3;3;3", "presentation": "3;3;3;2", "wc_summary": "84;20;55;71", "wc_strengths": "54;33;28;44", "wc_weaknesses": "163;36;114;42", "wc_questions": "2;46;39;164", "wc_limitations": "2;1;38;5", "wc_review": "305;136;274;326", "wc_reply_reviewers": "0;26;154;516", "wc_reply_authors": "88;46;671;1843", "reply_reviewers": "0;1;2;3", "reply_authors": "2;2;3;5", "rating_avg": [ 6.0, 1.8708286933869707 ], "confidence_avg": [ 4.0, 1.0 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 57.5, 23.96351393264352 ], "wc_strengths_avg": [ 39.75, 10.059199769365355 ], "wc_weaknesses_avg": [ 88.75, 52.72274177240785 ], "wc_questions_avg": [ 62.75, 60.80039062374517 ], "wc_limitations_avg": [ 11.5, 15.370426148939398 ], "wc_review_avg": [ 260.25, 74.0823022050476 ], "wc_reply_reviewers_avg": [ 174.0, 205.87860500790265 ], "wc_reply_authors_avg": [ 662.0, 725.2196219077363 ], "reply_reviewers_avg": [ 1.5, 1.118033988749895 ], "reply_authors_avg": [ 3.0, 1.224744871391589 ], "replies_avg": [ 41, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": -0.8017837257372733, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7254973532262890476&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "cse.cuhk.edu.hk;hku.hk;huawei.com;cuhk.edu.hk;cse.cuhk.edu.hk;huawei.com;hkbu.edu.hk;huawei.com;huawei.com;cse.cuhk.edu.hk;sysu.edu.cn", "author_num": 11, "aff_unique_index": "0;1;2;0;0;3;4;2;2;0;5", "aff_unique_norm": "Chinese University of Hong Kong;University of Hong Kong;Huawei;Noah's Ark Lab;Hong Kong Baptist University;Sun Yat-sen University", "aff_unique_dep": ";;Huawei Technologies;;;", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.hku.hk;https://www.huawei.com;;https://www.hkbu.edu.hk;http://www.sysu.edu.cn/", "aff_unique_abbr": "CUHK;HKU;Huawei;;HKBU;SYSU", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China;" }, { "title": "Universality of AdaGrad Stepsizes for Stochastic Optimization: Inexact Oracle, Acceleration and Variance Reduction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93414", "id": "rniiAVjHi5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=rniiAVjHi5", "openreview": "https://openreview.net/forum?id=rniiAVjHi5", "poster": "", "project": "", "author_site": "Anton Rodomanov, Xiaowen Jiang, Sebastian Stich", "tldr": "", "abstract": "We present adaptive gradient methods (both basic and accelerated) for solving\nconvex composite optimization problems in which the main part is approximately\nsmooth (a.k.a. $(\\delta, L)$-smooth) and can be accessed only via a\n(potentially biased) stochastic gradient oracle.\nThis setting covers many interesting examples including H\u00f6lder smooth problems\nand various inexact computations of the stochastic gradient.\nOur methods use AdaGrad stepsizes and are adaptive in the sense that they do\nnot require knowing any problem-dependent constants except an estimate of the\ndiameter of the feasible set but nevertheless achieve the best possible\nconvergence rates as if they knew the corresponding constants.\nWe demonstrate that AdaGrad stepsizes work in a variety of situations\nby proving, in a unified manner, three types of new results.\nFirst, we establish efficiency guarantees for our methods in the classical\nsetting where the oracle's variance is uniformly bounded.\nWe then show that, under more refined assumptions on the variance,\nthe same methods without any modifications enjoy implicit variance\nreduction properties allowing us to express their complexity estimates in\nterms of the variance only at the minimizer.\nFinally, we show how to incorporate explicit SVRG-type variance reduction into\nour methods and obtain even faster algorithms.\nIn all three cases, we present both basic and accelerated algorithms\nachieving state-of-the-art complexity bounds.\nAs a direct corollary of our results, we obtain universal stochastic gradient\nmethods for H\u00f6lder smooth problems which can be used in all situations.", "keywords": "convex optimization;stochastic optimization;adaptive methods;universal algorithms;acceleration;variance reduction;AdaGrad;SVRG;weakly smooth functions;H\u00f6lder condition;inexact oracle;complexity estimates", "primary_area": "optimization", "supplementary_material": "", "author": "Anton Rodomanov;Xiaowen Jiang;Sebastian U Stich", "authorids": "~Anton_Rodomanov1;~Xiaowen_Jiang1;~Sebastian_U_Stich1", "gender": ";M;M", "homepage": ";;https://www.sstich.ch", "dblp": "153/5453;192/3782-3;04/10549", "google_scholar": "u95GRZQAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.ch/citations?user=8l-mDfQAAAAJ", "orcid": ";;", "linkedin": ";xiaowen-jiang-65570b222/;", "or_profile": "~Anton_Rodomanov1;~Xiaowen_Jiang1;~Sebastian_U_Stich1", "aff": "CISPA;CISPA Helmholtz Center for Information Security;CISPA Helmholtz Center for Information Security", "aff_domain": "cispa.de;cispa.de;cispa.de", "position": "Postdoc;PhD student;Tenure Track Faculty", "bibtex": "@inproceedings{\nrodomanov2024universality,\ntitle={Universality of AdaGrad Stepsizes for Stochastic Optimization: Inexact Oracle, Acceleration and Variance Reduction},\nauthor={Anton Rodomanov and Xiaowen Jiang and Sebastian U Stich},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=rniiAVjHi5}\n}", "github": "", "reviewers": "AhYx;PtE8;aWSb;soXZ", "pdf_size": 5499627, "rating": "5;5;6;6", "confidence": "4;4;4;4", "soundness": "3;2;4;3", "novelty": "2;2;3;3", "presentation": "2;2;3;3", "wc_summary": "55;60;98;61", "wc_strengths": "56;11;65;57", "wc_weaknesses": "168;310;1166;15", "wc_questions": "4;2;49;21", "wc_limitations": "1;2;26;62", "wc_review": "284;385;1404;216", "wc_reply_reviewers": "22;797;921;0", "wc_reply_authors": "21;1187;1714;36", "reply_reviewers": "1;3;3;0", "reply_authors": "2;3;4;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 68.5, 17.18284027743958 ], "wc_strengths_avg": [ 47.25, 21.21762239271875 ], "wc_weaknesses_avg": [ 414.75, 446.10389765165695 ], "wc_questions_avg": [ 19.0, 18.828170383762732 ], "wc_limitations_avg": [ 22.75, 24.772716847370617 ], "wc_review_avg": [ 572.25, 483.9609359235516 ], "wc_reply_reviewers_avg": [ 435.0, 426.33144383214335 ], "wc_reply_authors_avg": [ 739.5, 735.0273804423887 ], "reply_reviewers_avg": [ 1.75, 1.299038105676658 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18041638293906434021&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 5, "email": "cispa.de;cispa.de;cispa.de", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "CISPA Helmholtz Center for Information Security", "aff_unique_dep": "", "aff_unique_url": "https://www.cispa.de/", "aff_unique_abbr": "CISPA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "id": "rog0J435OO", "title": "FlashMask: Reducing the Complexity of Attention Computation through Sparse Mask Representation", "track": "main", "status": "Reject", "tldr": "", "abstract": "Recent advancements in Larger-Scale Transformers have significantly benefited from sophisticated attention mechanisms, which are critical for modeling long-context sequences. However, the computational and memory demands of conventional attention mask computations, typically scaling with an $\\mathcal{O}(N^2)$ complexity where $N$ is the sequence length, pose significant challenges. This paper introduces FlashMask, a simple yet effective \\emph{Exact} attention algorithm designed to substantially reduce both the computational complexity and memory requirements of attention computations. By adopting a novel column-wise sparse representation of attention masks, FlashMask achieves a linear memory complexity of $\\mathcal{O}(N)$ and computational complexity of $\\mathcal{O}(N)\\sim\\mathcal{O}(N^2)$. We assess the performance of FlashMask in a variety of masking scenarios, including causal and customized attention masks, demonstrating its versatility and robustness across a wide range of attention patterns and models. Our empirical analysis encompasses a variety of downstream training modalities, including Supervised Fine-Tuning (SFT), Direct Preference Optimization (DPO), and Reward Model (RM). We compare FlashMask against state-of-the-art techniques, including notably FlashAttention. In kernel-level assessments, FlashMask achieves substantial computational speedups, up to 6.7x (SFT), 6.9x (DPO), and 8.3x (RM). Furthermore, in end-to-end training, FlashMask consistently enhances training speed significantly, with accelerations up to 2.4x (SFT), 4.2x (LoRA), 2.5x (DPO), and 2.6x (RM) across these varied scenarios without sacrificing model accuracy. Additionally, when implemented in the LoRA scenario, FlashMask enables the LLaMA2-7B to process sequence lengths of up to 544k, significantly enhancing its capability for long-context input.", "keywords": "FlashMask;Efficient Attention Computation;Sparse Mask Representation;Linear Memory Complexity;Low Computational Complexity", "primary_area": "infrastructure", "supplementary_material": "", "author": "Guoxia Wang;Jinle Zeng;Xiyuan Xiao;Jiabin Yang;Lujing Zheng;Zeyu Chen;Jiang Bian;Haoyi Xiong;Dianhai Yu;Haifeng Wang", "authorids": "~Guoxia_Wang1;~Jinle_Zeng1;~Xiyuan_Xiao1;~Jiabin_Yang1;~Lujing_Zheng1;~Zeyu_Chen3;~Jiang_Bian5;~Haoyi_Xiong1;~Dianhai_Yu3;~Haifeng_Wang3", "gender": ";M;M;;F;M;;M;;M", "homepage": ";https://github.com/sneaxiy;http://www.notready;;https://github.com/lugimzzz;https://github.com/ZeyuChen;https://sites.google.com/view/bjbaidurecent/home;https://sites.google.com/site/haoyixiongshomepage/;;https://haifengwang.net/", "dblp": ";;;;;;;06/2700;;10/5209-1.html", "google_scholar": ";;;;https://scholar.google.com.hk/citations?hl=zh-CN;LCzu9MEAAAAJ;;f_Kcie0AAAAJ;;jgy4jCAAAAAJ", "orcid": ";;;;;0000-0001-6286-0581;;;;0000-0002-0672-7468", "linkedin": ";;;;;zeyuchenchn/;;;;", "or_profile": "~Guoxia_Wang1;~Jinle_Zeng1;~Xiyuan_Xiao1;~Jiabin_Yang1;~Lujing_Zheng1;~Zeyu_Chen3;~Jiang_Bian5;~Haoyi_Xiong1;~Dianhai_Yu3;~Haifeng_Wang3", "aff": ";Baidu;Beihang University;;Baidu;Baidu, Inc.;Microsoft;Baidu;;Baidu", "aff_domain": ";baidu.com;buaa.edu.cn;;baidu.com;baidu.com;microsoft.com;baidu.com;;baidu.com", "position": ";Researcher;MS student;;Researcher;Principal Architect;Applied Scientist;Principal Researcher;;CTO", "bibtex": "@misc{\nanonymous2024flashmask,\ntitle={FlashMask: Reducing the Complexity of Attention Computation through Sparse Mask Representation},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=rog0J435OO}\n}", "github": "", "project": "", "reviewers": "ZsUC;aVe4;X3zD;caMu", "site": "https://openreview.net/forum?id=rog0J435OO", "pdf_size": 799987, "rating": "3;4;4;5", "confidence": "5;5;4;3", "soundness": "2;2;3;2", "novelty": "2;2;3;2", "presentation": "2;3;2;2", "wc_summary": "25;40;70;45", "wc_strengths": "22;90;101;39", "wc_weaknesses": "99;145;64;177", "wc_questions": "2;2;3;96", "wc_limitations": "19;1;1;1", "wc_review": "167;278;239;358", "wc_reply_reviewers": "128;43;0;19", "wc_reply_authors": "0;60;60;105", "reply_reviewers": "1;1;0;1", "reply_authors": "1;2;2;2", "rating_avg": [ 4.0, 0.7071067811865476 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 45.0, 16.20185174601965 ], "wc_strengths_avg": [ 63.0, 33.279122584587476 ], "wc_weaknesses_avg": [ 121.25, 43.14148235747121 ], "wc_questions_avg": [ 25.75, 40.56091098582476 ], "wc_limitations_avg": [ 5.5, 7.794228634059948 ], "wc_review_avg": [ 260.5, 68.95106960736722 ], "wc_reply_reviewers_avg": [ 47.5, 48.91063279083598 ], "wc_reply_authors_avg": [ 56.25, 37.31202889149825 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.8528028654224418, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Z3DfnN1aSoIJ:scholar.google.com/&scioq=FlashMask:+Reducing+the+Complexity+of+Attention+Computation+through+Sparse+Mask+Representation&hl=en&as_sdt=0,14", "gs_version_total": 0, "aff_unique_index": "0;1;0;0;2;0;0", "aff_unique_norm": "Baidu;Beihang University;Microsoft", "aff_unique_dep": "Baidu, Inc.;;Microsoft Corporation", "aff_unique_url": "https://www.baidu.com;http://www.buaa.edu.cn/;https://www.microsoft.com", "aff_unique_abbr": "Baidu;BUAA;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;0;0", "aff_country_unique": "China;United States" }, { "title": "FEDMEKI: A Benchmark for Scaling Medical Foundation Models via Federated Knowledge Injection", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97482", "id": "rovpCs3ZEO", "proceeding": "", "pdf": "https://openreview.net/pdf?id=rovpCs3ZEO", "openreview": "https://openreview.net/forum?id=rovpCs3ZEO", "poster": "", "project": "", "author_site": "Jiaqi Wang, Xiaochen Wang, Lingjuan Lyu, Jinghui Chen, Fenglong Ma", "tldr": "", "abstract": "This study introduces the Federated Medical Knowledge Injection (FedMEKI) platform, a new benchmark designed to address the unique challenges of integrating medical knowledge into foundation models under privacy constraints. By leveraging a cross-silo federated learning approach, FedMEKI circumvents the issues associated with centralized data collection, which is often prohibited under health regulations like the Health Insurance Portability and Accountability Act (HIPAA) in the USA. The platform is meticulously designed to handle multi-site, multi-modal, and multi-task medical data, which includes 7 medical modalities, including images, signals, texts, laboratory test results, vital signs, input variables, and output variables. The curated dataset to validate FedMEKI covers 8 medical tasks, including 6 classification tasks (lung opacity detection, COVID-19 detection, electrocardiogram (ECG) abnormal detection, mortality prediction, sepsis protection, and enlarged cardiomediastinum detection) and 2 generation tasks (medical visual question answering (MedVQA) and ECG noise clarification). This comprehensive dataset is partitioned across several clients to facilitate the decentralized training process under 16 benchmark approaches. FedMEKI not only preserves data privacy but also enhances the capability of medical foundation models by allowing them to learn from a broader spectrum of medical knowledge without direct data exposure, thereby setting a new benchmark in the application of foundation models within the healthcare sector.", "keywords": "Medical Foundation Model;Federated Learning", "primary_area": "", "supplementary_material": "/attachment/817de7f2de36bd40316a6928f08880bb05c893d5.zip", "author": "Jiaqi Wang;Xiaochen Wang;Lingjuan Lyu;Jinghui Chen;Fenglong Ma", "authorids": "~Jiaqi_Wang4;~Xiaochen_Wang2;~Lingjuan_Lyu1;~Jinghui_Chen1;~Fenglong_Ma1", "gender": ";M;F;M;M", "homepage": ";;https://sites.google.com/view/lingjuan-lyu;https://jinghuichen.github.io/;https://fenglong-ma.github.io/", "dblp": ";19/30-2.html;178/9876;67/5633;85/10856", "google_scholar": ";PXpHePgAAAAJ;;mKia7Y4AAAAJ;DLJIxNMAAAAJ", "orcid": ";0009-0001-7699-3016;;;0000-0002-4999-0303", "linkedin": ";xiaochen-wang-1860691b4/;;;fenglong-ma-69805832/", "or_profile": "~Jiaqi_Wang4;~Xiaochen_Wang2;~Lingjuan_Lyu1;~Jinghui_Chen1;~Fenglong_Ma1", "aff": ";Pennsylvania State University;Sony;Pennsylvania State University;Pennsylvania State University", "aff_domain": ";psu.edu;sony.com;psu.edu;psu.edu", "position": ";PhD student;scientist;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nwang2024fedmeki,\ntitle={{FEDMEKI}: A Benchmark for Scaling Medical Foundation Models via Federated Knowledge Injection},\nauthor={Jiaqi Wang and Xiaochen Wang and Lingjuan Lyu and Jinghui Chen and Fenglong Ma},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=rovpCs3ZEO}\n}", "github": "", "reviewers": "XgYN;cYFo;9Bk2;jMQ7", "pdf_size": 4513519, "rating": "7;7;8;8", "confidence": "5;4;5;4", "wc_summary_and_contributions": "43;59;161;136", "wc_strengths": "53;74;5;80", "wc_improvement": "63;41;5;92", "wc_limitations": "5;14;34;28", "wc_correctness": "8;11;56;14", "wc_clarity": "6;7;45;9", "wc_relation_to_prior_work": "6;7;46;19", "wc_documentation": "13;7;47;25", "wc_additional_feedback": "1;1;1;1", "wc_review": "198;221;400;404", "wc_reply_reviewers": "0;0;43;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 7.5, 0.5 ], "confidence_avg": [ 4.5, 0.5 ], "wc_summary_and_contributions_avg": [ 99.75, 49.866697303912154 ], "wc_strengths_avg": [ 53.0, 29.47032405658275 ], "wc_improvement_avg": [ 50.25, 31.77558024647229 ], "wc_limitations_avg": [ 20.25, 11.409973707244026 ], "wc_correctness_avg": [ 22.25, 19.60070151805797 ], "wc_clarity_avg": [ 16.75, 16.345871038277526 ], "wc_relation_to_prior_work_avg": [ 19.5, 16.132265804901678 ], "wc_documentation_avg": [ 23.0, 15.297058540778355 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 305.75, 96.60324787500677 ], "wc_reply_reviewers_avg": [ 10.75, 18.619546181365433 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16340176519833164400&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": ";psu.edu;sony.com;psu.edu;psu.edu", "author_num": 5, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Pennsylvania State University;Sony Corporation", "aff_unique_dep": ";", "aff_unique_url": "https://www.psu.edu;https://www.sony.com", "aff_unique_abbr": "PSU;Sony", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;Japan" }, { "title": "FFAM: Feature Factorization Activation Map for Explanation of 3D Detectors", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93413", "id": "rpZWSDjc4N", "proceeding": "", "pdf": "https://openreview.net/pdf?id=rpZWSDjc4N", "openreview": "https://openreview.net/forum?id=rpZWSDjc4N", "poster": "/media/PosterPDFs/NeurIPS%202024/93413.png?t=1731423025.0321865", "project": "", "author_site": "Shuai Liu, Boyang Li, Zhiyu Fang, Mingyue Cui, Kai Huang", "tldr": "", "abstract": "LiDAR-based 3D object detection has made impressive progress recently, yet most existing models are black-box, lacking interpretability. Previous explanation approaches primarily focus on analyzing image-based models and are not readily applicable to LiDAR-based 3D detectors. In this paper, we propose a feature factorization activation map (FFAM) to generate high-quality visual explanations for 3D detectors. FFAM employs non-negative matrix factorization to generate concept activation maps and subsequently aggregates these maps to obtain a global visual explanation. To achieve object-specific visual explanations, we refine the global visual explanation using the feature gradient of a target object. Additionally, we introduce a voxel upsampling strategy to align the scale between the activation map and input point cloud. We qualitatively and quantitatively analyze FFAM with multiple detectors on several datasets. Experimental results validate the high-quality visual explanations produced by FFAM. The code is available at \\url{https://anonymous.4open.science/r/FFAM-B9AF}.", "keywords": "Explainable artificial intelligence;visual explanation;3D object detection", "primary_area": "interpretability_and_explainability", "supplementary_material": "/attachment/e8174f1db4e1f011ea626edfaaefc67ffb4c263f.zip", "author": "Shuai Liu;Boyang Li;Zhiyu Fang;Mingyue Cui;Kai Huang", "authorids": "~Shuai_Liu7;~Boyang_Li3;~Zhiyu_Fang2;~Mingyue_Cui1;~Kai_Huang2", "gender": "M;M;M;M;", "homepage": "https://scholar.google.com/citations?user=loIo-50AAAAJ&hl=en;https://scholar.google.com/citations?user=jM9DezUAAAAJ;;https://www.usilab.cn/team/cuimingyue/;", "dblp": ";70/1211-9.html;;;86/489-1.html", "google_scholar": "loIo-50AAAAJ;;;iwDISDEAAAAJ;", "orcid": "0000-0001-5002-4754;;0000-0001-9180-9507;;", "linkedin": ";;;;", "or_profile": "~Shuai_Liu7;~Boyang_Li3;~Zhiyu_Fang2;~Mingyue_Cui1;~Kai_Huang2", "aff": "SUN YAT-SEN UNIVERSITY;SUN YAT-SEN UNIVERSITY;SUN YAT-SEN UNIVERSITY;Sun Yat-Sen University;SUN YAT-SEN UNIVERSITY", "aff_domain": "sysu.edu.cn;sysu.edu.cn;sysu.edu;mail.sysu.edu.cn;sysu.edu.cn", "position": "PhD student;Postdoc;MS student;Postdoc;Full Professor", "bibtex": "@inproceedings{\nliu2024ffam,\ntitle={{FFAM}: Feature Factorization Activation Map for Explanation of 3D Detectors},\nauthor={Shuai Liu and Boyang Li and Zhiyu Fang and Mingyue Cui and Kai Huang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=rpZWSDjc4N}\n}", "github": "", "reviewers": "BgKE;v6wE;bytz;93Lo", "pdf_size": 29745236, "rating": "5;5;5;5", "confidence": "4;3;4;4", "soundness": "3;3;2;3", "novelty": "2;3;3;3", "presentation": "3;2;3;3", "wc_summary": "71;89;76;55", "wc_strengths": "61;85;76;34", "wc_weaknesses": "131;84;127;215", "wc_questions": "1;75;3;3", "wc_limitations": "1;68;28;11", "wc_review": "265;401;310;318", "wc_reply_reviewers": "69;24;31;13", "wc_reply_authors": "69;42;54;12", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 72.75, 12.173228823939851 ], "wc_strengths_avg": [ 64.0, 19.32614809008769 ], "wc_weaknesses_avg": [ 139.25, 47.457217575412066 ], "wc_questions_avg": [ 20.5, 31.476181471074284 ], "wc_limitations_avg": [ 27.0, 25.563646062328434 ], "wc_review_avg": [ 323.5, 49.094297021140854 ], "wc_reply_reviewers_avg": [ 34.25, 21.063890903629368 ], "wc_reply_authors_avg": [ 44.25, 20.932928605429293 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13641115029548701969&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "sysu.edu.cn;sysu.edu.cn;sysu.edu;mail.sysu.edu.cn;sysu.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Sun Yat-sen University", "aff_unique_dep": "", "aff_unique_url": "http://www.sysu.edu.cn", "aff_unique_abbr": "SYSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Meta-Reinforcement Learning with Universal Policy Adaptation: Provable Near-Optimality under All-task Optimum Comparator", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93412", "id": "rpjh69DUX2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=rpjh69DUX2", "openreview": "https://openreview.net/forum?id=rpjh69DUX2", "poster": "/media/PosterPDFs/NeurIPS%202024/93412.png?t=1731295706.6418185", "project": "", "author_site": "Siyuan Xu, Minghui Zhu", "tldr": "", "abstract": "Meta-reinforcement learning (Meta-RL) has attracted attention due to its capability to enhance reinforcement learning (RL) algorithms, in terms of data efficiency and generalizability. In this paper, we develop a bilevel optimization framework for meta-RL (BO-MRL) to learn the meta-prior for task-specific policy adaptation, which implements multiple-step policy optimization on one-time data collection. Beyond existing meta-RL analyses, we provide upper bounds of the expected optimality gap over the task distribution. This metric measures the distance of the policy adaptation from the learned meta-prior to the task-specific optimum, and quantifies the model's generalizability to the task distribution. We empirically validate the correctness of the derived upper bounds and demonstrate the superior effectiveness of the proposed algorithm over benchmarks.", "keywords": "Meta-learning;reinforcement learning", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/936bd5c97a81948c88cd463517dd71ba83872c1b.zip", "author": "Siyuan Xu;Minghui Zhu", "authorids": "~Siyuan_Xu4;~Minghui_Zhu1", "gender": "M;", "homepage": ";", "dblp": ";", "google_scholar": "ZV1580IAAAAJ;", "orcid": ";", "linkedin": "siyuan-xu-45b2b1169/;", "or_profile": "~Siyuan_Xu4;~Minghui_Zhu1", "aff": "Pennsylvania State University;", "aff_domain": "psu.edu;", "position": "PhD student;", "bibtex": "@inproceedings{\nxu2024metareinforcement,\ntitle={Meta-Reinforcement Learning with Universal Policy Adaptation: Provable Near-Optimality under All-task Optimum Comparator},\nauthor={Siyuan Xu and Minghui Zhu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=rpjh69DUX2}\n}", "github": "", "reviewers": "DbXf;8Th6;Q7S4", "pdf_size": 1161473, "rating": "5;6;7", "confidence": "3;4;4", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "3;3;3", "wc_summary": "14;96;62", "wc_strengths": "63;118;69", "wc_weaknesses": "130;138;148", "wc_questions": "3;79;283", "wc_limitations": "6;33;12", "wc_review": "216;464;574", "wc_reply_reviewers": "6;24;212", "wc_reply_authors": "0;0;325", "reply_reviewers": "1;1;2", "reply_authors": "1;1;3", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 57.333333333333336, 33.6386021641143 ], "wc_strengths_avg": [ 83.33333333333333, 24.63511495586917 ], "wc_weaknesses_avg": [ 138.66666666666666, 7.363574011458174 ], "wc_questions_avg": [ 121.66666666666667, 118.22389305419141 ], "wc_limitations_avg": [ 17.0, 11.575836902790225 ], "wc_review_avg": [ 418.0, 149.72864344094842 ], "wc_reply_reviewers_avg": [ 80.66666666666667, 93.15697624022701 ], "wc_reply_authors_avg": [ 108.33333333333333, 153.2064692570853 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1290269504773471779&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "psu.edu;", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Pennsylvania State University", "aff_unique_dep": "", "aff_unique_url": "https://www.psu.edu", "aff_unique_abbr": "PSU", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Optimal Parallelization of Boosting", "status": "Oral", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93411", "id": "rtz4df9IF1", "proceeding": "", "pdf": "https://openreview.net/pdf?id=rtz4df9IF1", "openreview": "https://openreview.net/forum?id=rtz4df9IF1", "poster": "/media/PosterPDFs/NeurIPS%202024/93411.png?t=1733599684.0794508", "project": "", "author_site": "Arthur da Cunha, Mikael M\u00f8ller H\u00f8gsgaard, Kasper Green Larsen", "tldr": "", "abstract": "Recent works on the parallel complexity of Boosting have established strong lower bounds on the tradeoff between the number of training rounds $p$ and the total parallel work per round $t$.\nThese works have also presented highly non-trivial parallel algorithms that shed light on different regions of this tradeoff.\nDespite these advancements, a significant gap persists between the theoretical lower bounds and the performance of these algorithms across much of the tradeoff space.\nIn this work, we essentially close this gap by providing both improved lower bounds on the parallel complexity of weak-to-strong learners, and a parallel Boosting algorithm whose performance matches these bounds across the entire $p$ vs. $t$ compromise spectrum, up to logarithmic factors.\nUltimately, this work settles the parallel complexity of Boosting algorithms that are nearly sample-optimal.", "keywords": "Learning Theory;Parallel Boosting;PAC-learning;Weak to Strong Learning", "primary_area": "learning_theory", "supplementary_material": "", "author": "Arthur da Cunha;Mikael M\u00f8ller H\u00f8gsgaard;Kasper Green Larsen", "authorids": "~Arthur_da_Cunha1;~Mikael_M\u00f8ller_H\u00f8gsgaard1;~Kasper_Green_Larsen1", "gender": ";M;", "homepage": ";https://pure.au.dk/portal/da/persons/mikael-moeller-hoegsgaard(3b07133a-329d-4585-a864-d37c7cb9056b).html;", "dblp": ";295/8599;", "google_scholar": ";;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Arthur_da_Cunha1;~Mikael_M\u00f8ller_H\u00f8gsgaard1;~Kasper_Green_Larsen1", "aff": ";Aarhus University;", "aff_domain": ";cs.au.dk;", "position": ";PhD student;", "bibtex": "@inproceedings{\ncunha2024optimal,\ntitle={Optimal Parallelization of Boosting},\nauthor={Arthur da Cunha and Mikael M{\\o}ller H{\\o}gsgaard and Kasper Green Larsen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=rtz4df9IF1}\n}", "github": "", "reviewers": "umpM;D6oP;GD3o", "pdf_size": 460073, "rating": "7;7;8", "confidence": "3;3;4", "soundness": "4;3;4", "novelty": "4;2;4", "presentation": "3;3;4", "wc_summary": "313;16;1164", "wc_strengths": "179;39;88", "wc_weaknesses": "131;128;131", "wc_questions": "36;21;53", "wc_limitations": "1;4;10", "wc_review": "660;208;1446", "wc_reply_reviewers": "55;0;41", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;0;1", "reply_authors": "1;1;1", "rating_avg": [ 7.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.3333333333333335, 0.9428090415820634 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 497.6666666666667, 486.51984086525755 ], "wc_strengths_avg": [ 102.0, 58.00574684172825 ], "wc_weaknesses_avg": [ 130.0, 1.4142135623730951 ], "wc_questions_avg": [ 36.666666666666664, 13.072447700751718 ], "wc_limitations_avg": [ 5.0, 3.7416573867739413 ], "wc_review_avg": [ 771.3333333333334, 511.50583791607136 ], "wc_reply_reviewers_avg": [ 32.0, 23.338094752285727 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.9999999999999997, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:d8GkWD5p-hMJ:scholar.google.com/&scioq=Optimal+Parallelization+of+Boosting&hl=en&as_sdt=0,5", "gs_version_total": 6, "email": ";cs.au.dk;", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "Aarhus University", "aff_unique_dep": "", "aff_unique_url": "https://au.dk", "aff_unique_abbr": "AU", "aff_country_unique_index": "0", "aff_country_unique": "Denmark" }, { "title": "Face2QR: A Unified Framework for Aesthetic, Face-Preserving, and Scannable QR Code Generation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93410", "id": "rvBabL7DUu", "proceeding": "", "pdf": "https://openreview.net/pdf?id=rvBabL7DUu", "openreview": "https://openreview.net/forum?id=rvBabL7DUu", "poster": "/media/PosterPDFs/NeurIPS%202024/93410.png?t=1731403005.9699023", "project": "", "author_site": "Xuehao Cui, Guangyang Wu, Zhenghao Gan, Guangtao Zhai, Xiaohong Liu", "tldr": "", "abstract": "Existing methods to generate aesthetic QR codes, such as image and style transfer techniques, tend to compromise either the visual appeal or the scannability of QR codes when they incorporate human face identity. Addressing these imperfections, we present Face2QR\u2014a novel pipeline specifically designed for generating personalized QR codes that harmoniously blend aesthetics, face identity, and scannability. Our pipeline introduces three innovative components. First, the ID-refined QR integration (IDQR) seamlessly intertwines the background styling with face ID, utilizing a unified SD-based framework with control networks. Second, the ID-aware QR ReShuffle (IDRS) effectively rectifies the conflicts between face IDs and QR patterns, rearranging QR modules to maintain the integrity of facial features without compromising scannability. Lastly, the ID-preserved Scannability Enhancement (IDSE) markedly boosts scanning robustness through latent code optimization, striking a delicate balance between face ID, aesthetic quality and QR functionality. In comprehensive experiments, Face2QR demonstrates remarkable performance, outperforming existing approaches, particularly in preserving facial recognition features within custom QR code designs.", "keywords": "Image Generation;QR Code;Stable Diffusion;Control Network", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Xuehao Cui;Guangyang Wu;Zhenghao Gan;Guangtao Zhai;Xiaohong Liu", "authorids": "~Xuehao_Cui1;~Guangyang_Wu1;~Zhenghao_Gan1;~Guangtao_Zhai1;~Xiaohong_Liu2", "gender": ";M;;M;M", "homepage": ";;https://jhc.sjtu.edu.cn/~xiaohongliu/supervision/;https://faculty.sjtu.edu.cn/zhaiguangtao/en/index.htm;https://jhc.sjtu.edu.cn/~xiaohongliu/", "dblp": ";;;19/3230;95/2454-1", "google_scholar": ";YTiBMYMAAAAJ;;E6zbSYgAAAAJ;https://scholar.google.ca/citations?hl=en", "orcid": ";;;;", "linkedin": ";;;;xiaohong-liu/", "or_profile": "~Xuehao_Cui1;~Guangyang_Wu1;~Zhenghao_Gan1;~Guangtao_Zhai1;~Xiaohong_Liu2", "aff": ";Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": ";sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "position": ";PhD student;Undergrad student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\ncui2024faceqr,\ntitle={Face2{QR}: A Unified Framework for Aesthetic, Face-Preserving, and Scannable {QR} Code Generation},\nauthor={Xuehao Cui and Guangyang Wu and Zhenghao Gan and Guangtao Zhai and Xiaohong Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=rvBabL7DUu}\n}", "github": "", "reviewers": "MYMo;ebKp;p8m4;jWT5", "pdf_size": 49241575, "rating": "5;5;5;7", "confidence": "4;3;4;5", "soundness": "3;3;3;4", "novelty": "3;3;2;4", "presentation": "3;3;3;3", "wc_summary": "79;74;77;97", "wc_strengths": "51;80;34;146", "wc_weaknesses": "72;103;150;140", "wc_questions": "25;9;18;54", "wc_limitations": "9;8;11;9", "wc_review": "236;274;290;446", "wc_reply_reviewers": "49;0;0;30", "wc_reply_authors": "349;0;0;20", "reply_reviewers": "1;0;0;1", "reply_authors": "2;1;1;2", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 81.75, 8.98262211161084 ], "wc_strengths_avg": [ 77.75, 42.69879974893908 ], "wc_weaknesses_avg": [ 116.25, 30.97075233183721 ], "wc_questions_avg": [ 26.5, 16.859715300087366 ], "wc_limitations_avg": [ 9.25, 1.0897247358851685 ], "wc_review_avg": [ 311.5, 80.09213444527497 ], "wc_reply_reviewers_avg": [ 19.75, 20.8611480987984 ], "wc_reply_authors_avg": [ 92.25, 148.4593799663733 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1188904112365479764&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 3, "email": ";sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Learning to Balance Altruism and Self-interest Based on Empathy in Mixed-Motive Games", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93409", "id": "ry0RXTJwjy", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ry0RXTJwjy", "openreview": "https://openreview.net/forum?id=ry0RXTJwjy", "poster": "/media/PosterPDFs/NeurIPS%202024/93409.png?t=1731488561.5679917", "project": "", "author_site": "Fanqi Kong, Yizhe Huang, Song-Chun Zhu, Siyuan Qi, Xue Feng", "tldr": "", "abstract": "Real-world multi-agent scenarios often involve mixed motives, demanding altruistic agents capable of self-protection against potential exploitation. However, existing approaches often struggle to achieve both objectives. In this paper, based on that empathic responses are modulated by learned social relationships between agents, we propose LASE (**L**earning to balance **A**ltruism and **S**elf-interest based on **E**mpathy), a distributed multi-agent reinforcement learning algorithm that fosters altruistic cooperation through gifting while avoiding exploitation by other agents in mixed-motive games. LASE allocates a portion of its rewards to co-players as gifts, with this allocation adapting dynamically based on the social relationship --- a metric evaluating the friendliness of co-players estimated by counterfactual reasoning. In particular, social relationship measures each co-player by comparing the estimated $Q$-function of current joint action to a counterfactual baseline which marginalizes the co-player's action, with its action distribution inferred by a perspective-taking module. Comprehensive experiments are performed in spatially and temporally extended mixed-motive games, demonstrating LASE's ability to promote group collaboration without compromising fairness and its capacity to adapt policies to various types of interactive co-players.", "keywords": "mixed-motive games;multi-agent reinforcement learning;cooperation;gifting;empathy", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Fanqi Kong;Yizhe Huang;Song-Chun Zhu;Siyuan Qi;Xue Feng", "authorids": "~Fanqi_Kong1;~Yizhe_Huang2;~Song-Chun_Zhu1;~Siyuan_Qi1;~Xue_Feng3", "gender": "M;;M;;F", "homepage": "https://github.com/kfq20;;https://zhusongchun.net/;;", "dblp": "377/2991;;10/10313;177/5178;", "google_scholar": "Aj13PkAAAAAJ;j5AxMFUAAAAJ;https://scholar.google.com.tw/citations?user=Al8dyb4AAAAJ;ePclJR4AAAAJ;", "orcid": ";0000-0001-8722-7221;;0000-0002-4070-733X;0000-0002-7163-7274", "linkedin": ";;;;", "or_profile": "~Fanqi_Kong1;~Yizhe_Huang2;~Song-Chun_Zhu1;~Siyuan_Qi1;~Xue_Feng3", "aff": "Tsinghua University;Peking University;Peking University;Beijing Institute for General Artificial Intelligence;Beijing Institute for General Artificial Intelligence", "aff_domain": "tsinghua.edu.cn;pku.edu.cn;pku.edu.cn;bigai.ai;bigai.ai", "position": "Undergrad student;PhD student;Full Professor;Researcher;Researcher", "bibtex": "@inproceedings{\nkong2024learning,\ntitle={Learning to Balance Altruism and Self-interest Based on Empathy in Mixed-Motive Games},\nauthor={Fanqi Kong and Yizhe Huang and Song-Chun Zhu and Siyuan Qi and Xue Feng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ry0RXTJwjy}\n}", "github": "", "reviewers": "vMEk;AnKj;7rD5;vbJc", "pdf_size": 8989391, "rating": "5;6;6;8", "confidence": "4;3;5;4", "soundness": "3;2;3;4", "novelty": "2;2;3;3", "presentation": "3;3;3;3", "wc_summary": "69;84;63;146", "wc_strengths": "102;108;80;73", "wc_weaknesses": "110;62;114;180", "wc_questions": "90;290;14;14", "wc_limitations": "12;3;8;15", "wc_review": "383;547;279;428", "wc_reply_reviewers": "35;22;88;0", "wc_reply_authors": "46;44;482;0", "reply_reviewers": "1;1;2;0", "reply_authors": "2;2;3;1", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 90.5, 32.94313282005827 ], "wc_strengths_avg": [ 90.75, 14.618053906043718 ], "wc_weaknesses_avg": [ 116.5, 41.98511641046146 ], "wc_questions_avg": [ 102.0, 112.88932633336067 ], "wc_limitations_avg": [ 9.5, 4.5 ], "wc_review_avg": [ 409.25, 96.15189805718866 ], "wc_reply_reviewers_avg": [ 36.25, 32.39116391857508 ], "wc_reply_authors_avg": [ 143.0, 196.58331567048106 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Ltgxjt7bn7YJ:scholar.google.com/&scioq=Learning+to+Balance+Altruism+and+Self-interest+Based+on+Empathy+in+Mixed-Motive+Games&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "tsinghua.edu.cn;pku.edu.cn;pku.edu.cn;bigai.ai;bigai.ai", "author_num": 5, "aff_unique_index": "0;1;1;2;2", "aff_unique_norm": "Tsinghua University;Peking University;Beijing Institute for General Artificial Intelligence", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tsinghua.edu.cn;http://www.pku.edu.cn;http://www.bigaiai.org/", "aff_unique_abbr": "THU;Peking U;BIGAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "ADOPT: Modified Adam Can Converge with Any $\\beta_2$ with the Optimal Rate", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93408", "id": "rzvVm0LsyK", "proceeding": "", "pdf": "https://openreview.net/pdf?id=rzvVm0LsyK", "openreview": "https://openreview.net/forum?id=rzvVm0LsyK", "poster": "/media/PosterPDFs/NeurIPS%202024/93408.png?t=1734030449.5304575", "project": "", "author_site": "Shohei Taniguchi, Keno Harada, Gouki Minegishi, Yuta Oshima, Seong Cheol Jeong, Go Nagahara, Tomoshi Iiyama, Masahiro Suzuki, Yusuke Iwasawa, Yutaka Matsuo", "tldr": "", "abstract": "Adam is one of the most popular optimization algorithms in deep learning. However, it is known that Adam does not converge in theory unless choosing a hyperparameter, i.e., $\\beta_2$, in a problem-dependent manner. There have been many attempts to fix the non-convergence (e.g., AMSGrad), but they require an impractical assumption that the gradient noise is uniformly bounded. In this paper, we propose a new adaptive gradient method named ADOPT, which achieves the optimal convergence rate of $\\mathcal{O} ( 1 / \\sqrt{T} )$ with any choice of $\\beta_2$ without depending on the bounded noise assumption. ADOPT addresses the non-convergence issue of Adam by removing the current gradient from the second moment estimate and changing the order of the momentum update and the normalization by the second moment estimate. We also conduct intensive numerical experiments, and verify that our ADOPT achieves superior results compared to Adam and its variants across a wide range of tasks, including image classification, generative modeling, natural language processing, and deep reinforcement learning. The implementation is available at https://github.com/iShohei220/adopt.", "keywords": "Adam;convergence analysis;adaptive gradient method", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Shohei Taniguchi;Keno Harada;Gouki Minegishi;Yuta Oshima;Seong Cheol Jeong;Go Nagahara;Tomoshi Iiyama;Masahiro Suzuki;Yusuke Iwasawa;Yutaka Matsuo", "authorids": "~Shohei_Taniguchi1;~Keno_Harada1;~Gouki_Minegishi1;~Yuta_Oshima1;~Seong_Cheol_Jeong1;~Go_Nagahara1;~Tomoshi_Iiyama1;~Masahiro_Suzuki1;~Yusuke_Iwasawa1;~Yutaka_Matsuo1", "gender": "M;M;M;M;M;M;M;M;M;M", "homepage": ";;;;;https://github.com/tomosii;;;http://ymatsuo.com;https://sites.google.com/weblab.t.u-tokyo.ac.jp/gouki-minegishi/about", "dblp": ";393/1474;218/3351;;;299/0798;;117/7377;m/YMatsuo.html;359/6674", "google_scholar": "MOcH0c0AAAAJ;https://scholar.google.co.jp/citations?user=tiJ-smIAAAAJ;https://scholar.google.co.jp/citations?user=V_tVbiIAAAAJ;;;;r2nt5kUAAAAJ;https://scholar.google.co.jp/citations?user=pvvZgj0AAAAJ;Dy8iau4AAAAJ;https://scholar.google.co.jp/citations?user=sxGpoYMAAAAJ", "orcid": ";;;;;;;0000-0002-1321-2622;;", "linkedin": ";;yuta-oshima-9a3143276/;seong-cheol-jeong;%E8%B1%AA-%E9%95%B7%E5%8E%9F-1b712a273/;;;;;gouki-minegishi-22363b236/", "or_profile": "~Shohei_Taniguchi1;~Keno_Harada1;~Yuta_Oshima1;~Seong_Cheol_Jeong1;~Go_Nagahara1;~Tomoshi_Iiyama1;~Masahiro_Suzuki1;~Yusuke_Iwasawa1;~Yutaka_Matsuo1;~Minegishi_Gouki2", "aff": "The University of Tokyo;the University of Tokyo;The University of Tokyo, The University of Tokyo;The University of Tokyo, The University of Tokyo;Tokyo University, Tokyo Institute of Technology;The University of Tokyo;The University of Tokyo, Tokyo Institute of Technology;The University of Tokyo, The University of Tokyo;The University of Tokyo;", "aff_domain": "u-tokyo.ac.jp;u-tokyo.ac.jp;weblab.t.u-tokyo.ac.jp;weblab.t.u-tokyo.ac.jp;u-tokyo.ac.jp;u-tokyo.ac.jp;u-tokyo.ac.jp;weblab.t.u-tokyo.ac.jp;u-tokyo.ac.jp;", "position": "Postdoc;PhD student;MS student;MS student;MS student;MS student;Assistant Professor;Associate Professor;Associate Professor;", "bibtex": "@inproceedings{\ntaniguchi2024adopt,\ntitle={{ADOPT}: Modified Adam Can Converge with Any \\${\\textbackslash}beta\\_2\\$ with the Optimal Rate},\nauthor={Shohei Taniguchi and Keno Harada and Gouki Minegishi and Yuta Oshima and Seong Cheol Jeong and Go Nagahara and Tomoshi Iiyama and Masahiro Suzuki and Yusuke Iwasawa and Yutaka Matsuo},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=rzvVm0LsyK}\n}", "github": "", "reviewers": "bGLP;AhBk;qABQ;f5HN", "pdf_size": 1371681, "rating": "5;5;6;6", "confidence": "3;4;4;3", "soundness": "3;3;3;3", "novelty": "3;2;2;2", "presentation": "3;2;3;3", "wc_summary": "48;128;42;58", "wc_strengths": "62;81;13;65", "wc_weaknesses": "78;79;10;114", "wc_questions": "74;86;164;2", "wc_limitations": "3;3;130;8", "wc_review": "265;377;359;247", "wc_reply_reviewers": "492;0;79;0", "wc_reply_authors": "716;37;179;37", "reply_reviewers": "2;0;1;0", "reply_authors": "3;2;2;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 69.0, 34.539832078341085 ], "wc_strengths_avg": [ 55.25, 25.439880109780393 ], "wc_weaknesses_avg": [ 70.25, 37.68537514739637 ], "wc_questions_avg": [ 81.5, 57.452154006616674 ], "wc_limitations_avg": [ 36.0, 54.30929938785806 ], "wc_review_avg": [ 312.0, 56.71860364994893 ], "wc_reply_reviewers_avg": [ 142.75, 204.20256487125718 ], "wc_reply_authors_avg": [ 242.25, 279.59557847004663 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2729275365573816632&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "u-tokyo.ac.jp;u-tokyo.ac.jp;weblab.t.u-tokyo.ac.jp;weblab.t.u-tokyo.ac.jp;u-tokyo.ac.jp;u-tokyo.ac.jp;u-tokyo.ac.jp;weblab.t.u-tokyo.ac.jp;u-tokyo.ac.jp;", "author_num": 10, "aff_unique_index": "0;0;0;0;1;0;0;0;0", "aff_unique_norm": "University of Tokyo;Tokyo University", "aff_unique_dep": ";", "aff_unique_url": "https://www.u-tokyo.ac.jp;https://www.u-tokyo.ac.jp", "aff_unique_abbr": "UTokyo;UT", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Tokyo", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "Japan" }, { "title": "ChaosBench: A Multi-Channel, Physics-Based Benchmark for Subseasonal-to-Seasonal Climate Prediction", "status": "Oral", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97481", "id": "s1K5Z5QPog", "proceeding": "", "pdf": "https://openreview.net/pdf?id=s1K5Z5QPog", "openreview": "https://openreview.net/forum?id=s1K5Z5QPog", "poster": "/media/PosterPDFs/NeurIPS%202024/97481.png?t=1733333600.8179965", "project": "", "author_site": "Juan Nathaniel, Yongquan Qu, Tung Nguyen, Sungduk Yu, Julius Busecke, Aditya Grover, Pierre Gentine", "tldr": "", "abstract": "Accurate prediction of climate in the subseasonal-to-seasonal scale is crucial for disaster preparedness and robust decision making amidst climate change. Yet, forecasting beyond the weather timescale is challenging because it deals with problems other than initial condition, including boundary interaction, butterfly effect, and our inherent lack of physical understanding. At present, existing benchmarks tend to have shorter forecasting range of up-to 15 days, do not include a wide range of operational baselines, and lack physics-based constraints for explainability. Thus, we propose ChaosBench, a challenging benchmark to extend the predictability range of data-driven weather emulators to S2S timescale. First, ChaosBench is comprised of variables beyond the typical surface-atmospheric ERA5 to also include ocean, ice, and land reanalysis products that span over 45 years to allow for full Earth system emulation that respects boundary conditions. We also propose physics-based, in addition to deterministic and probabilistic metrics, to ensure a physically-consistent ensemble that accounts for butterfly effect. Furthermore, we evaluate on a diverse set of physics-based forecasts from four national weather agencies as baselines to our data-driven counterpart such as ViT/ClimaX, PanguWeather, GraphCast, and FourCastNetV2. Overall, we find methods originally developed for weather-scale applications fail on S2S task: their performance simply collapse to an unskilled climatology. Nonetheless, we outline and demonstrate several strategies that can extend the predictability range of existing weather emulators, including the use of ensembles, robust control of error propagation, and the use of physics-informed models. Our benchmark, datasets, and instructions are available at https://leap-stc.github.io/ChaosBench.", "keywords": "subseasonal-to-seasonal;climate;benchmark;forecast", "primary_area": "", "supplementary_material": "/attachment/165e96b9bfc9c4a9dafb04afc9f345f4273c9436.pdf", "author": "Juan Nathaniel;Yongquan Qu;Tung Nguyen;Sungduk Yu;Julius Busecke;Aditya Grover;Pierre Gentine", "authorids": "~Juan_Nathaniel1;~Yongquan_Qu1;~Tung_Nguyen2;~Sungduk_Yu1;~Julius_Busecke1;~Aditya_Grover1;~Pierre_Gentine1", "gender": "M;M;M;;M;M;M", "homepage": "https://juannat7.github.io/;https://yongquan-qu.github.io;https://tung-nd.github.io/;;http://www.juliusbusecke.com;https://aditya-grover.github.io;http://www.gentine.com", "dblp": "311/0082;367/7090;;339/7180;;162/5052;", "google_scholar": "CIPv2JQAAAAJ;LnBGvRoAAAAJ;https://scholar.google.com.vn/citations?user=F9mgq3sAAAAJ;https://scholar.google.com/citations?hl=en;;oOhnPUgAAAAJ;", "orcid": ";0000-0002-9941-3977;;;0000-0001-8571-865X;;", "linkedin": ";;tung-nguyen-40703616b/;sungduk-yu;;;", "or_profile": "~Juan_Nathaniel1;~Yongquan_Qu1;~Tung_Nguyen2;~Sungduk_Yu1;~Julius_Busecke1;~Aditya_Grover1;~Pierre_Gentine1", "aff": "Columbia University;Columbia University;University of California, Los Angeles;University of California, Irvine;Columbia University;University of California, Los Angeles;", "aff_domain": "columbia.edu;columbia.edu;cs.ucla.edu;uci.edu;columbia.edu;ucla.edu;", "position": "PhD student;PhD student;PhD student;Researcher;Researcher;Assistant Professor;", "bibtex": "@inproceedings{\nnathaniel2024chaosbench,\ntitle={ChaosBench: A Multi-Channel, Physics-Based Benchmark for Subseasonal-to-Seasonal Climate Prediction},\nauthor={Juan Nathaniel and Yongquan Qu and Tung Nguyen and Sungduk Yu and Julius Busecke and Aditya Grover and Pierre Gentine},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=s1K5Z5QPog}\n}", "github": "", "reviewers": "eEjR;d7Hd;ZL2F", "pdf_size": 1981694, "rating": "8;8;9", "confidence": "4;3;5", "wc_summary_and_contributions": "36;49;25", "wc_strengths": "46;53;33", "wc_improvement": "45;28;20", "wc_limitations": "102;37;23", "wc_correctness": "1;19;5", "wc_clarity": "1;12;7", "wc_relation_to_prior_work": "1;22;1", "wc_documentation": "4;14;9", "wc_additional_feedback": "1;1;1", "wc_review": "237;235;124", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "rating_avg": [ 8.333333333333334, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "wc_summary_and_contributions_avg": [ 36.666666666666664, 9.809292646374773 ], "wc_strengths_avg": [ 44.0, 8.286535263104035 ], "wc_improvement_avg": [ 31.0, 10.424330514074594 ], "wc_limitations_avg": [ 54.0, 34.418987008142274 ], "wc_correctness_avg": [ 8.333333333333334, 7.71722460186015 ], "wc_clarity_avg": [ 6.666666666666667, 4.4969125210773475 ], "wc_relation_to_prior_work_avg": [ 8.0, 9.899494936611665 ], "wc_documentation_avg": [ 9.0, 4.08248290463863 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 198.66666666666666, 52.80361940456565 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.8660254037844387, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8646590708892480581&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "columbia.edu;columbia.edu;cs.ucla.edu;uci.edu;columbia.edu;ucla.edu;", "author_num": 7, "aff_unique_index": "0;0;1;2;0;1", "aff_unique_norm": "Columbia University;University of California, Los Angeles;University of California, Irvine", "aff_unique_dep": ";;", "aff_unique_url": "https://www.columbia.edu;https://www.ucla.edu;https://www.uci.edu", "aff_unique_abbr": "Columbia;UCLA;UCI", "aff_campus_unique_index": "1;2;1", "aff_campus_unique": ";Los Angeles;Irvine", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "EnsIR: An Ensemble Algorithm for Image Restoration via Gaussian Mixture Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93407", "id": "s1MoH2pACa", "proceeding": "", "pdf": "https://openreview.net/pdf?id=s1MoH2pACa", "openreview": "https://openreview.net/forum?id=s1MoH2pACa", "poster": "/media/PosterPDFs/NeurIPS%202024/93407.png?t=1730580536.9629204", "project": "", "author_site": "Shangquan Sun, Wenqi Ren, Zikun Liu, Hyunhee Park, Rui Wang, Xiaochun Cao", "tldr": "", "abstract": "Image restoration has experienced significant advancements due to the development of deep learning. Nevertheless, it encounters challenges related to ill-posed problems, resulting in deviations between single model predictions and ground-truths. Ensemble learning, as a powerful machine learning technique, aims to address these deviations by combining the predictions of multiple base models. Most existing works adopt ensemble learning during the design of restoration models, while only limited research focuses on the inference-stage ensemble of pre-trained restoration models. Regression-based methods fail to enable efficient inference, leading researchers in academia and industry to prefer averaging as their choice for post-training ensemble. To address this, we reformulate the ensemble problem of image restoration into Gaussian mixture models (GMMs) and employ an expectation maximization (EM)-based algorithm to estimate ensemble weights for aggregating prediction candidates. We estimate the range-wise ensemble weights on a reference set and store them in a lookup table (LUT) for efficient ensemble inference on the test set. Our algorithm is model-agnostic and training-free, allowing seamless integration and enhancement of various pre-trained image restoration models. It consistently outperforms regression-based methods and averaging ensemble approaches on 14 benchmarks across 3 image restoration tasks, including super-resolution, deblurring and deraining. The codes and all estimated weights have been released in Github.", "keywords": "Model Ensemble;Image Restoration;Gaussian Mixture Models;Expectation Maximization", "primary_area": "machine_vision", "supplementary_material": "", "author": "Shangquan Sun;Wenqi Ren;Zikun Liu;Hyunhee Park;Rui Wang;Xiaochun Cao", "authorids": "~Shangquan_Sun1;~Wenqi_Ren1;~Zikun_Liu1;~Hyunhee_Park1;~Rui_Wang5;~Xiaochun_Cao3", "gender": "M;M;M;M;F;M", "homepage": "https://sunsean21.github.io/;https://rwenqi.github.io/;;;;https://scst.sysu.edu.cn/members/caoxiaochun.htm", "dblp": "346/0940;126/3420;172/9824;;06/2293-32;39/3695", "google_scholar": "Sf1Jp-8AAAAJ;VwfgfR8AAAAJ;https://scholar.google.com.hk/citations?hl=zh-CN;;;https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-6292-2495;;0000-0001-8697-6351;0009-0008-9880-4168;0000-0002-4792-1945;0000-0001-7141-708X", "linkedin": "shangquansun/;;;;;", "or_profile": "~Shangquan_Sun1;~Wenqi_Ren1;~Zikun_Liu1;~Hyunhee_Park1;~Rui_Wang5;~Xiaochun_Cao3", "aff": "University of Chinese Academy of Sciences;SUN YAT-SEN UNIVERSITY;Samsung Research China-Beijing (SRC-B);Samsung Electronics;Institute of Information Engineering;SUN YAT-SEN UNIVERSITY", "aff_domain": "ucas.ac.cn;sysu.edu.cn;samsung.com;samsung.com;iie.ac.cn;sysu.edu.cn", "position": "PhD student;Full Professor;Researcher;Principal Researcher;Full Professor;Full Professor", "bibtex": "@inproceedings{\nsun2024ensir,\ntitle={Ens{IR}: An Ensemble Algorithm for Image Restoration via Gaussian Mixture Models},\nauthor={Shangquan Sun and Wenqi Ren and Zikun Liu and Hyunhee Park and Rui Wang and Xiaochun Cao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=s1MoH2pACa}\n}", "github": "", "reviewers": "j7aF;E4or;Aksr;EWje", "pdf_size": 17316036, "rating": "4;6;7;7", "confidence": "4;4;5;3", "soundness": "3;3;4;3", "novelty": "2;3;4;3", "presentation": "2;3;3;3", "wc_summary": "59;83;106;48", "wc_strengths": "20;49;45;47", "wc_weaknesses": "320;138;155;114", "wc_questions": "38;26;13;2", "wc_limitations": "55;21;6;2", "wc_review": "492;317;325;213", "wc_reply_reviewers": "236;17;26;13", "wc_reply_authors": "536;20;22;19", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 74.0, 22.39419567655869 ], "wc_strengths_avg": [ 40.25, 11.776565713313877 ], "wc_weaknesses_avg": [ 181.75, 81.13684428174416 ], "wc_questions_avg": [ 19.75, 13.534677683639163 ], "wc_limitations_avg": [ 21.0, 20.868636754709208 ], "wc_review_avg": [ 336.75, 99.93091363537111 ], "wc_reply_reviewers_avg": [ 73.0, 94.22579264723646 ], "wc_reply_authors_avg": [ 149.25, 223.2928290384624 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8154207245926788633&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 5, "email": "ucas.ac.cn;sysu.edu.cn;samsung.com;samsung.com;iie.ac.cn;sysu.edu.cn", "author_num": 6, "aff_unique_index": "0;1;2;2;3;1", "aff_unique_norm": "University of Chinese Academy of Sciences;Sun Yat-sen University;Samsung;Institute of Information Engineering", "aff_unique_dep": ";;Samsung Research China;", "aff_unique_url": "http://www.ucas.ac.cn;http://www.sysu.edu.cn;https://www.samsung.com/cn/research/;", "aff_unique_abbr": "UCAS;SYSU;SRC-B;", "aff_campus_unique_index": "1", "aff_campus_unique": ";Beijing", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "China;South Korea;" }, { "title": "Enhancing Diversity in Bayesian Deep Learning via Hyperspherical Energy Minimization of CKA", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93406", "id": "s2hA6Bz3LE", "proceeding": "", "pdf": "https://openreview.net/pdf?id=s2hA6Bz3LE", "openreview": "https://openreview.net/forum?id=s2hA6Bz3LE", "poster": "", "project": "", "author_site": "David Smerkous, Qinxun Bai, Fuxin Li", "tldr": "", "abstract": "Particle-based Bayesian deep learning often requires a similarity metric to compare two networks. However, naive similarity metrics lack permutation invariance and are inappropriate for comparing networks. Centered Kernel Alignment (CKA) on feature kernels has been proposed to compare deep networks but has not been used as an optimization objective in Bayesian deep learning. In this paper, we explore the use of CKA in Bayesian deep learning to generate diverse ensembles and hypernetworks that output a network posterior. Noting that CKA projects kernels onto a unit hypersphere and that directly optimizing the CKA objective leads to diminishing gradients when two networks are very similar. We propose adopting the approach of hyperspherical energy (HE) on top of CKA kernels to address this drawback and improve training stability. Additionally, by leveraging CKA-based feature kernels, we derive feature repulsive terms applied to synthetically generated outlier examples. Experiments on both diverse ensembles and hypernetworks show that our approach significantly outperforms baselines in terms of uncertainty quantification in both synthetic and realistic outlier detection tasks.", "keywords": "bayesian inference;variational inference;uncertainty quantification;deep learning;hypernetworks", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "David Smerkous;Qinxun Bai;Li Fuxin", "authorids": "~David_Smerkous1;~Qinxun_Bai4;~Li_Fuxin1", "gender": "M;M;M", "homepage": "https://smerkous.com;;http://web.engr.oregonstate.edu/~lif/", "dblp": ";;03/2783", "google_scholar": "QHtO2BkAAAAJ;p1tu16UAAAAJ;snDpfA0AAAAJ", "orcid": ";;", "linkedin": "david-smerkous/;;", "or_profile": "~David_Smerkous1;~Qinxun_Bai4;~Fuxin_Li1", "aff": "University of Washington;Horizon Robotics Inc.;Oregon State University", "aff_domain": "uw.edu;horizon.ai;oregonstate.edu", "position": "Researcher;Senior Research Scientist;Associate Professor", "bibtex": "@inproceedings{\nsmerkous2024enhancing,\ntitle={Enhancing Diversity in Bayesian Deep Learning via Hyperspherical Energy Minimization of {CKA}},\nauthor={David Smerkous and Qinxun Bai and Li Fuxin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=s2hA6Bz3LE}\n}", "github": "", "reviewers": "jUbb;JKj8;Wp13", "pdf_size": 5554468, "rating": "4;6;7", "confidence": "3;4;3", "soundness": "2;3;3", "novelty": "2;3;3", "presentation": "3;3;3", "wc_summary": "32;52;96", "wc_strengths": "43;60;85", "wc_weaknesses": "41;76;289", "wc_questions": "37;2;75", "wc_limitations": "30;15;36", "wc_review": "183;205;581", "wc_reply_reviewers": "52;21;39", "wc_reply_authors": "176;20;271", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 5.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 60.0, 26.733250207684563 ], "wc_strengths_avg": [ 62.666666666666664, 17.249798710580816 ], "wc_weaknesses_avg": [ 135.33333333333334, 109.59420098202682 ], "wc_questions_avg": [ 38.0, 29.81051268708183 ], "wc_limitations_avg": [ 27.0, 8.831760866327848 ], "wc_review_avg": [ 323.0, 182.65450081141353 ], "wc_reply_reviewers_avg": [ 37.333333333333336, 12.710450643291745 ], "wc_reply_authors_avg": [ 155.66666666666666, 103.47409767129592 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.18898223650461363, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:-rGWFlXrf7QJ:scholar.google.com/&scioq=Enhancing+Diversity+in+Bayesian+Deep+Learning+via+Hyperspherical+Energy+Minimization+of+CKA&hl=en&as_sdt=0,44", "gs_version_total": 3, "email": "uw.edu;horizon.ai;oregonstate.edu", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Washington;Horizon Robotics;Oregon State University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.washington.edu;https://www.horizon-robotics.com/;https://oregonstate.edu", "aff_unique_abbr": "UW;Horizon Robotics;OSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;China" }, { "title": "A Nearly Optimal and Low-Switching Algorithm for Reinforcement Learning with General Function Approximation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93405", "id": "s3icZC2NLq", "proceeding": "", "pdf": "https://openreview.net/pdf?id=s3icZC2NLq", "openreview": "https://openreview.net/forum?id=s3icZC2NLq", "poster": "", "project": "", "author_site": "Heyang Zhao, Jiafan He, Quanquan Gu", "tldr": "", "abstract": "The exploration-exploitation dilemma has been a central challenge in reinforcement learning (RL) with complex model classes. In this paper, we propose a new algorithm, Monotonic Q-Learning with Upper Confidence Bound (MQL-UCB) for RL with general function approximation. Our key algorithmic design includes (1) a general deterministic policy-switching strategy that achieves low switching cost, (2) a monotonic value function structure with carefully controlled function class complexity, and (3) a variance-weighted regression scheme that exploits historical trajectories with high data efficiency. MQL-UCB achieves minimax optimal regret of $\\tilde{O}(d\\sqrt{HK})$ when $K$ is sufficiently large and near-optimal policy switching cost of $\\tilde{O}(dH)$, with $d$ being the eluder dimension of the function class, $H$ being the planning horizon, and $K$ being the number of episodes. \n Our work sheds light on designing provably sample-efficient and deployment-efficient Q-learning with nonlinear function approximation.", "keywords": "Reinforcement learning;function approximation", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Heyang Zhao;Jiafan He;Quanquan Gu", "authorids": "~Heyang_Zhao1;~Jiafan_He1;~Quanquan_Gu1", "gender": "M;M;M", "homepage": "https://web.cs.ucla.edu/~hyzhao/;https://sites.google.com/g.ucla.edu/jiafan-he-homepage;http://web.cs.ucla.edu/~qgu/", "dblp": ";214/5785;50/4597", "google_scholar": "zHQ1ap0AAAAJ;F3AXNBwAAAAJ;GU9HgNAAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Heyang_Zhao1;~Jiafan_He1;~Quanquan_Gu1", "aff": "Computer Science Department, University of California, Los Angeles;University of California, Los Angeles;University of California, Los Angeles", "aff_domain": "cs.ucla.edu;ucla.edu;cs.ucla.edu", "position": "PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nzhao2024a,\ntitle={A Nearly Optimal and Low-Switching Algorithm for Reinforcement Learning with General Function Approximation},\nauthor={Heyang Zhao and Jiafan He and Quanquan Gu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=s3icZC2NLq}\n}", "github": "", "reviewers": "ZRKX;Zbvr;6wYy", "pdf_size": 671880, "rating": "5;6;8", "confidence": "3;4;3", "soundness": "3;4;3", "novelty": "3;3;4", "presentation": "3;3;4", "wc_summary": "73;124;19", "wc_strengths": "50;125;67", "wc_weaknesses": "159;111;132", "wc_questions": "4;64;88", "wc_limitations": "1;11;8", "wc_review": "287;435;314", "wc_reply_reviewers": "10;13;6", "wc_reply_authors": "36;5;0", "reply_reviewers": "1;1;1", "reply_authors": "2;2;1", "rating_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 72.0, 42.871902220452036 ], "wc_strengths_avg": [ 80.66666666666667, 32.10745846199741 ], "wc_weaknesses_avg": [ 134.0, 19.6468827043885 ], "wc_questions_avg": [ 52.0, 35.32704346531139 ], "wc_limitations_avg": [ 6.666666666666667, 4.189935029992178 ], "wc_review_avg": [ 345.3333333333333, 64.35491865860415 ], "wc_reply_reviewers_avg": [ 9.666666666666666, 2.8674417556808756 ], "wc_reply_authors_avg": [ 13.666666666666666, 15.92342788332825 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.18898223650461363, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13480435116067117237&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "cs.ucla.edu;ucla.edu;cs.ucla.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of California, Los Angeles", "aff_unique_dep": "Computer Science Department", "aff_unique_url": "https://www.ucla.edu", "aff_unique_abbr": "UCLA", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Treatment of Statistical Estimation Problems in Randomized Smoothing for Adversarial Robustness", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93404", "id": "s4Wx2qXhv9", "proceeding": "", "pdf": "https://openreview.net/pdf?id=s4Wx2qXhv9", "openreview": "https://openreview.net/forum?id=s4Wx2qXhv9", "poster": "", "project": "", "tldr": "", "abstract": "Randomized smoothing is a popular certified defense against adversarial attacks. In its essence, we need to solve a problem of statistical estimation which is usually very time-consuming since we need to perform numerous (usually $10^5$) forward passes of the classifier for every point to be certified. In this paper, we review the statistical estimation problems for randomized smoothing to find out if the computational burden is necessary. In particular, we consider the (standard) task of adversarial robustness where we need to decide if a point is robust at a certain radius or not using as few samples as possible while maintaining statistical guarantees. We present estimation procedures employing confidence sequences enjoying the same statistical guarantees as the standard methods, with the optimal sample complexities for the estimation task and empirically demonstrate their good performance. Additionally, we provide a randomized version of Clopper-Pearson confidence intervals resulting in strictly stronger certificates.", "keywords": "randomized smoothing;adversarial robustness;confidence interval;confidence sequence", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Vaclav Voracek", "authorids": "~Vaclav_Voracek1", "gender": "M", "homepage": "", "dblp": "292/8831.html", "google_scholar": "Db13d44AAAAJ", "orcid": "", "linkedin": "", "or_profile": "~Vaclav_Voracek1", "aff": "University of Tuebingen", "aff_domain": "uni-tuebingen.de", "position": "PhD student", "bibtex": "@inproceedings{\nvoracek2024treatment,\ntitle={Treatment of Statistical Estimation Problems in Randomized Smoothing for Adversarial Robustness},\nauthor={Vaclav Voracek},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=s4Wx2qXhv9}\n}", "github": "", "reviewers": "xWqy;mvjJ;NzaR", "pdf_size": 762761, "rating": "5;6;7", "confidence": "4;2;4", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "2;3;2", "wc_summary": "54;136;99", "wc_strengths": "92;43;135", "wc_weaknesses": "79;30;406", "wc_questions": "178;3;35", "wc_limitations": "6;3;27", "wc_review": "409;215;702", "wc_reply_reviewers": "103;81;121", "wc_reply_authors": "73;1;6", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 96.33333333333333, 33.529423231278855 ], "wc_strengths_avg": [ 90.0, 37.58545818087983 ], "wc_weaknesses_avg": [ 171.66666666666666, 166.90183408885062 ], "wc_questions_avg": [ 72.0, 76.08328769622582 ], "wc_limitations_avg": [ 12.0, 10.677078252031311 ], "wc_review_avg": [ 442.0, 200.18158423458104 ], "wc_reply_reviewers_avg": [ 101.66666666666667, 16.35712552851373 ], "wc_reply_authors_avg": [ 26.666666666666668, 32.82614134429381 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2704615736955766859&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "uni-tuebingen.de", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "University of Tuebingen", "aff_unique_dep": "", "aff_unique_url": "https://www.uni-tuebingen.de/", "aff_unique_abbr": "Uni T\u00fcbingen", "aff_country_unique_index": "0", "aff_country_unique": "Germany" }, { "title": "On the Curses of Future and History in Future-dependent Value Functions for Off-policy Evaluation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93403", "id": "s5917zor6V", "proceeding": "", "pdf": "https://openreview.net/pdf?id=s5917zor6V", "openreview": "https://openreview.net/forum?id=s5917zor6V", "poster": "", "project": "", "author_site": "Yuheng Zhang, Nan Jiang", "tldr": "", "abstract": "We study off-policy evaluation (OPE) in partially observable environments with complex observations, with the goal of developing estimators whose guarantee avoids exponential dependence on the horizon. While such estimators exist for MDPs and POMDPs can be converted to history-based MDPs, their estimation errors depend on the state-density ratio for MDPs which becomes history ratios after conversion, an exponential object. Recently, Uehara et al. [2022a] proposed future-dependent value functions as a promising framework to address this issue, where the guarantee for memoryless policies depends on the density ratio over the latent state space. However, it also depends on the boundedness of the future-dependent value function and other related quantities, which we show could be exponential-in-length and thus erasing the advantage of the method. In this paper, we discover novel coverage assumptions tailored to the structure of POMDPs, such as outcome coverage and belief coverage, which enable polynomial bounds on the aforementioned quantities. As a side product, our analyses also lead to the discovery of new algorithms with complementary properties.", "keywords": "Partially Observable Markov Decision Process; Offline Policy Evaluation; Reinforcement Learning Theory", "primary_area": "learning_theory", "supplementary_material": "", "author": "Yuheng Zhang;Nan Jiang", "authorids": "~Yuheng_Zhang1;~Nan_Jiang2", "gender": "M;M", "homepage": ";http://nanjiang.cs.illinois.edu", "dblp": ";06/4489-8", "google_scholar": "IoEBLNYAAAAJ;nUlanA8AAAAJ", "orcid": ";", "linkedin": ";nan-jiang-28139937/", "or_profile": "~Yuheng_Zhang1;~Nan_Jiang2", "aff": "University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign", "aff_domain": "cs.illinois.edu;illinois.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nzhang2024on,\ntitle={On the Curses of Future and History in Future-dependent Value Functions for Off-policy Evaluation},\nauthor={Yuheng Zhang and Nan Jiang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=s5917zor6V}\n}", "github": "", "reviewers": "Djpv;zNfE;NgTY;jTik", "pdf_size": 526754, "rating": "5;6;7;7", "confidence": "1;3;3;3", "soundness": "2;3;3;4", "novelty": "2;2;3;4", "presentation": "2;3;4;3", "wc_summary": "78;75;40;105", "wc_strengths": "106;42;23;78", "wc_weaknesses": "129;63;14;40", "wc_questions": "31;2;23;78", "wc_limitations": "1;2;8;31", "wc_review": "345;184;108;332", "wc_reply_reviewers": "112;24;11;20", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 2.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 74.5, 23.092206477510977 ], "wc_strengths_avg": [ 62.25, 32.06536293261001 ], "wc_weaknesses_avg": [ 61.5, 42.652666974059194 ], "wc_questions_avg": [ 33.5, 27.789386463180506 ], "wc_limitations_avg": [ 10.5, 12.134661099511597 ], "wc_review_avg": [ 242.25, 100.0359310448001 ], "wc_reply_reviewers_avg": [ 41.75, 40.831207427652686 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4112152996683443463&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "cs.illinois.edu;illinois.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Illinois Urbana-Champaign", "aff_unique_dep": "", "aff_unique_url": "https://illinois.edu", "aff_unique_abbr": "UIUC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Urbana-Champaign", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "s5Y3M5l1qg", "title": "Carefully Blending Adversarial Training and Purification Improves Adversarial Robustness", "track": "main", "status": "Reject", "tldr": "", "abstract": "In this work, we propose a novel adversarial defence mechanism for image classification - *CARSO* - blending the paradigms of *adversarial training* and *adversarial purification* in a synergistic robustness-enhancing way. The method builds upon an adversarially-trained classifier, and learns to map its *internal representation* associated with a potentially perturbed input onto a distribution of tentative *clean* reconstructions. Multiple samples from such distribution are classified by the same adversarially-trained model, and an aggregation of its outputs finally constitutes the *robust prediction* of interest. Experimental evaluation by a well-established benchmark of strong adaptive attacks, across different image datasets, shows that *CARSO* is able to defend itself against adaptive *end-to-end* *white-box* attacks devised for stochastic defences. Paying a modest *clean* accuracy toll, our method improves by a significant margin the *state-of-the-art* for CIFAR-10, CIFAR-100, and TinyImageNet-200 $\\ell_\\infty$ robust classification accuracy against AutoAttack.", "keywords": "adversarial robustness;adversarial training;adversarial purification;generative purification;internal representation", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/86f379bc34c8e7a65f47f06447f132744ee3f52a.zip", "author": "Emanuele Ballarin;Alessio ansuini;Luca Bortolussi", "authorids": "~Emanuele_Ballarin1;~Alessio_ansuini1;~Luca_Bortolussi1", "gender": "M;M;M", "homepage": "https://ballarin.cc/;;https://ai-lab.units.it", "dblp": "348/6393;232/2196;32/1171", "google_scholar": "https://scholar.google.com/citations?hl=en;6lhdu6kAAAAJ;p5ynADcAAAAJ", "orcid": "0000-0003-3673-0665;0000-0002-3117-3532;0000-0001-8874-4001", "linkedin": "emaballarin;alessioansuini/;", "or_profile": "~Emanuele_Ballarin1;~Alessio_ansuini1;~Luca_Bortolussi1", "aff": "University of Trieste;AREA Science Park;University of Trieste", "aff_domain": "units.it;areasciencepark.it;units.it", "position": "PhD student;Researcher;Full Professor", "bibtex": "@misc{\nanonymous2024carefully,\ntitle={Carefully Blending Adversarial Training and Purification Improves Adversarial Robustness},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=s5Y3M5l1qg}\n}", "github": "", "project": "", "reviewers": "WKot;7CCs;ZCGH", "site": "https://openreview.net/forum?id=s5Y3M5l1qg", "pdf_size": 757300, "rating": "3;4;6", "confidence": "3;3;5", "soundness": "2;2;3", "novelty": "2;2;3", "presentation": "2;1;2", "wc_summary": "40;34;196", "wc_strengths": "11;15;104", "wc_weaknesses": "102;209;195", "wc_questions": "10;2;53", "wc_limitations": "13;8;9", "wc_review": "176;268;557", "wc_reply_reviewers": "492;104;299", "wc_reply_authors": "1622;262;0", "reply_reviewers": "2;1;1", "reply_authors": "4;2;1", "rating_avg": [ 4.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 1.6666666666666667, 0.4714045207910317 ], "wc_summary_avg": [ 90.0, 74.9933330370107 ], "wc_strengths_avg": [ 43.333333333333336, 42.92888175679503 ], "wc_weaknesses_avg": [ 168.66666666666666, 47.48567035315906 ], "wc_questions_avg": [ 21.666666666666668, 22.395436042987765 ], "wc_limitations_avg": [ 10.0, 2.160246899469287 ], "wc_review_avg": [ 333.6666666666667, 162.32546182147627 ], "wc_reply_reviewers_avg": [ 298.3333333333333, 158.40103815596945 ], "wc_reply_authors_avg": [ 628.0, 710.9561636744327 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 1.247219128924647 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.944911182523068, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1750165298386644083&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Trieste;Area Science Park", "aff_unique_dep": ";", "aff_unique_url": "https://www.units.it;https://www.area-science-park.org/", "aff_unique_abbr": "UniTS;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Italy" }, { "title": "Understanding Information Storage and Transfer in Multi-Modal Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93402", "id": "s63dtq0mwA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=s63dtq0mwA", "openreview": "https://openreview.net/forum?id=s63dtq0mwA", "poster": "/media/PosterPDFs/NeurIPS%202024/93402.png?t=1731008670.84099", "project": "", "author_site": "Samyadeep Basu, Martin Grayson, Cecily Morrison, Besmira Nushi, Soheil Feizi, Daniela Massiceti", "tldr": "", "abstract": "Understanding the mechanisms of information storage and transfer in Transformer-based models is important for driving model understanding progress. Recent work has studied these mechanisms for Large Language Models (LLMs), revealing insights on how information is stored in a model's parameters and how information flows to and from these parameters in response to specific prompts. However, these studies have not yet been extended to Multi-modal Large Language Models (MLLMs). Given their expanding capabilities and real-world use, we start by studying one aspect of these models -- how MLLMs process information in a factual visual question answering task. We use a constraint-based formulation which views a visual question as having a set of visual or textual constraints that the model's generated answer must satisfy to be correct (e.g. What movie directed by \\emph{the director in this photo} has won a \\emph{Golden Globe}?). Under this setting, we contribute i) a method that extends causal information tracing from pure language to the multi-modal setting, and ii) \\emph{VQA-Constraints}, a test-bed of 9.7K visual questions annotated with constraints. We use these tools to study two open-source MLLMs, LLaVa and multi-modal Phi-2. Our key findings show that these MLLMs rely on MLP and self-attention blocks in much earlier layers for information storage, compared to LLMs whose mid-layer MLPs are more important. We also show that a consistent small subset of visual tokens output by the vision encoder are responsible for transferring information from the image to these causal blocks. We validate these mechanisms by introducing MultEdit a model-editing algorithm that can correct errors and insert new long-tailed information into MLLMs by targeting these causal blocks. We will publicly release our dataset and code.", "keywords": "interpretability;multimodal generative models;VQA", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Samyadeep Basu;Martin Grayson;Cecily Morrison;Besmira Nushi;Soheil Feizi;Daniela Massiceti", "authorids": "~Samyadeep_Basu1;~Martin_Grayson1;~Cecily_Morrison1;~Besmira_Nushi1;~Soheil_Feizi2;~Daniela_Massiceti1", "gender": "M;;F;F;M;F", "homepage": "https://samyadeepbasu.github.io/;;https://www.microsoft.com/en-us/research/people/cecilym/;http://besmiranushi.com/;https://www.cs.umd.edu/~sfeizi/;https://danielamassiceti.github.io/", "dblp": "250/9138;;;117/4927;57/2132;186/8148", "google_scholar": "6aRwDecAAAAJ;;;QWTkjB8AAAAJ;lptAmrMAAAAJ;-4fo-SwAAAAJ", "orcid": ";;;;;0000-0002-1273-0591", "linkedin": ";;;;;", "or_profile": "~Samyadeep_Basu1;~Martin_Grayson1;~Cecily_Morrison1;~Besmira_Nushi1;~Soheil_Feizi2;~Daniela_Massiceti1", "aff": "Adobe Systems;;;Microsoft;University of Maryland, College Park;Microsoft Research", "aff_domain": "adobe.com;;;microsoft.com;umd.edu;research.microsoft.com", "position": "Intern;;;Researcher;Associate Professor;Researcher", "bibtex": "@inproceedings{\nbasu2024understanding,\ntitle={Understanding Information Storage and Transfer in Multi-Modal Large Language Models},\nauthor={Samyadeep Basu and Martin Grayson and Cecily Morrison and Besmira Nushi and Soheil Feizi and Daniela Massiceti},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=s63dtq0mwA}\n}", "github": "", "reviewers": "eXiq;BKyn;3JTJ;ieTM", "pdf_size": 2211752, "rating": "5;6;7;7", "confidence": "3;3;4;4", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "3;3;3;4", "wc_summary": "172;205;183;88", "wc_strengths": "112;97;125;100", "wc_weaknesses": "180;183;34;142", "wc_questions": "78;84;9;2", "wc_limitations": "1;20;1;13", "wc_review": "543;589;352;345", "wc_reply_reviewers": "0;0;0;12", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 162.0, 44.34523649728345 ], "wc_strengths_avg": [ 108.5, 11.05667219374799 ], "wc_weaknesses_avg": [ 134.75, 60.371247295380606 ], "wc_questions_avg": [ 43.25, 37.89046713884642 ], "wc_limitations_avg": [ 8.75, 8.13557004763649 ], "wc_review_avg": [ 457.25, 109.98721516612737 ], "wc_reply_reviewers_avg": [ 3.0, 5.196152422706632 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.9045340337332909, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9424597340885793313&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "adobe.com;;;microsoft.com;umd.edu;research.microsoft.com", "author_num": 6, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "Adobe;Microsoft;University of Maryland", "aff_unique_dep": "Adobe Systems Incorporated;Microsoft Corporation;", "aff_unique_url": "https://www.adobe.com;https://www.microsoft.com;https://www/umd.edu", "aff_unique_abbr": "Adobe;Microsoft;UMD", "aff_campus_unique_index": "1", "aff_campus_unique": ";College Park", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "AdvAD: Exploring Non-Parametric Diffusion for Imperceptible Adversarial Attacks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93401", "id": "s8Pxz7cvHT", "proceeding": "", "pdf": "https://openreview.net/pdf?id=s8Pxz7cvHT", "openreview": "https://openreview.net/forum?id=s8Pxz7cvHT", "poster": "/media/PosterPDFs/NeurIPS%202024/93401.png?t=1733589318.6476476", "project": "", "author_site": "Jin Li, Ziqiang He, Anwei Luo, Jian-Fang Hu, Z. Jane Wang, Xiangui Kang", "tldr": "", "abstract": "Imperceptible adversarial attacks aim to fool DNNs by adding imperceptible perturbation to the input data. Previous methods typically improve the imperceptibility of attacks by integrating common attack paradigms with specifically designed perception-based losses or the capabilities of generative models. In this paper, we propose Adversarial Attacks in Diffusion (AdvAD), a novel modeling framework distinct from existing attack paradigms. AdvAD innovatively conceptualizes attacking as a non-parametric diffusion process by theoretically exploring basic modeling approach rather than using the denoising or generation abilities of regular diffusion models requiring neural networks. At each step, much subtler yet effective adversarial guidance is crafted using only the attacked model without any additional network, which gradually leads the end of diffusion process from the original image to a desired imperceptible adversarial example. Grounded in a solid theoretical foundation of the proposed non-parametric diffusion process, AdvAD achieves high attack efficacy and imperceptibility with intrinsically lower overall perturbation strength. Additionally, an enhanced version AdvAD-X is proposed to evaluate the extreme of our novel framework under an ideal scenario. Extensive experiments demonstrate the effectiveness of the proposed AdvAD and AdvAD-X. Compared with state-of-the-art imperceptible attacks, AdvAD achieves an average of 99.9% (+17.3%) ASR with 1.34 (-0.97) $l_2$ distance, 49.74 (+4.76) PSNR and 0.9971 (+0.0043) SSIM against four prevalent DNNs with three different architectures on the ImageNet-compatible dataset. Code is available at https://github.com/XianguiKang/AdvAD.", "keywords": "Adversarial Attacks;Imperceptibility;Diffusion Models;Deep Neural Networks", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Jin Li;Ziqiang He;Anwei Luo;Jian-Fang Hu;Z. Jane Wang;Xiangui Kang", "authorids": "~Jin_Li20;~Ziqiang_He1;~Anwei_Luo1;~Jian-Fang_Hu1;~Z._Jane_Wang1;~Xiangui_Kang1", "gender": "M;M;F;M;M;Not Specified", "homepage": ";https://isee-ai.cn/~hujianfang/;https://www.ece.ubc.ca/~zjanew;https://cse.sysu.edu.cn/teacher/KangXiangui;;https://ralphwithoutmissfun.github.io/", "dblp": ";;13/3672-1;75/2824.html;;59/9581", "google_scholar": "t4QF1YQAAAAJ;https://scholar.google.com.sg/citations?user=4WsBaB4AAAAJ;https://scholar.google.ca/citations?user=W75uTm8AAAAJ;;https://scholar.google.com/citations?hl=zh-CN;", "orcid": ";;0000-0002-3791-0249;;;0009-0001-0998-4865", "linkedin": ";;;;;", "or_profile": "~Jin_Li20;~Jian-Fang_Hu1;~Z._Jane_Wang1;~Xiangui_Kang1;~Luo_Anwei1;~He_Zq1", "aff": "SUN YAT-SEN UNIVERSITY;SUN YAT-SEN UNIVERSITY;University of British Columbia;SUN YAT-SEN UNIVERSITY;SUN YAT-SEN UNIVERSITY;SUN YAT-SEN UNIVERSITY", "aff_domain": "sysu.edu.cn;sysu.edu.cn;ubc.ca;sysu.edu.cn;sysu.edu.cn;sysu.edu.cn", "position": "MS student;Associate Professor;Full Professor;Full Professor;PhD student;PhD student", "bibtex": "@inproceedings{\nli2024advad,\ntitle={Adv{AD}: Exploring Non-Parametric Diffusion for Imperceptible Adversarial Attacks},\nauthor={Jin Li and Ziqiang He and Anwei Luo and Jian-Fang Hu and Z. Jane Wang and Xiangui Kang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=s8Pxz7cvHT}\n}", "github": "", "reviewers": "ytMn;jCt2;nUXD;ygiL;K1mG", "pdf_size": 4397627, "rating": "4;4;5;6;6", "confidence": "4;4;3;4;3", "soundness": "3;3;3;3;3", "novelty": "3;2;2;3;3", "presentation": "3;3;3;3;3", "wc_summary": "22;110;89;108;37", "wc_strengths": "44;52;38;89;49", "wc_weaknesses": "81;107;43;128;54", "wc_questions": "1;65;545;79;2", "wc_limitations": "1;1;7;23;2", "wc_review": "149;335;722;427;144", "wc_reply_reviewers": "0;0;25;11;19", "wc_reply_authors": "506;158;20;15;21", "reply_reviewers": "0;0;1;1;1", "reply_authors": "3;2;2;2;2", "rating_avg": [ 5.0, 0.8944271909999159 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 73.2, 36.73363581242674 ], "wc_strengths_avg": [ 54.4, 17.939899665271266 ], "wc_weaknesses_avg": [ 82.6, 31.765389970847203 ], "wc_questions_avg": [ 138.4, 205.77813294905755 ], "wc_limitations_avg": [ 6.8, 8.4 ], "wc_review_avg": [ 355.4, 213.1802992773957 ], "wc_reply_reviewers_avg": [ 11.0, 10.019980039900279 ], "wc_reply_authors_avg": [ 144.0, 188.8840914423446 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.45643546458763845, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:kk1Mnnea66oJ:scholar.google.com/&scioq=AdvAD:+Exploring+Non-Parametric+Diffusion+for+Imperceptible+Adversarial+Attacks&hl=en&as_sdt=0,44", "gs_version_total": 0, "email": "sysu.edu.cn;sysu.edu.cn;ubc.ca;sysu.edu.cn;sysu.edu.cn;sysu.edu.cn", "author_num": 6, "aff_unique_index": "0;0;1;0;0;0", "aff_unique_norm": "Sun Yat-sen University;University of British Columbia", "aff_unique_dep": ";", "aff_unique_url": "http://www.sysu.edu.cn;https://www.ubc.ca", "aff_unique_abbr": "SYSU;UBC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "China;Canada" }, { "title": "MMDU: A Multi-Turn Multi-Image Dialog Understanding Benchmark and Instruction-Tuning Dataset for LVLMs", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97480", "id": "s8h2jSN6a6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=s8h2jSN6a6", "openreview": "https://openreview.net/forum?id=s8h2jSN6a6", "poster": "/media/PosterPDFs/NeurIPS%202024/97480.png?t=1731415367.0702164", "project": "", "author_site": "Ziyu Liu, Tao Chu, Yuhang Zang, Xilin Wei, Xiaoyi Dong, Pan Zhang, Zijian Liang, Yuanjun Xiong, Dahua Lin, Yu Qiao, Jiaqi Wang", "tldr": "", "abstract": "Generating natural and meaningful responses to communicate with multi-modal human inputs is a fundamental capability of Large Vision-Language Models (LVLMs). While current open-source LVLMs demonstrate promising performance in simplified scenarios such as single-turn single-image input, they fall short in real-world conversation scenarios such as following instructions in a long context history with multi-turn and multi-images. Existing LVLM benchmarks primarily focus on single-choice questions or short-form responses, which do not adequately assess the capabilities of LVLMs in real-world human-AI interaction applications. Therefore, we introduce MMDU, a comprehensive benchmark, and MMDU-45k, a large-scale instruction tuning dataset, designed to evaluate and improve LVLMs' abilities in multi-turn and multi-image conversations. We employ the clustering algorithm to find the relevant images and textual descriptions from the open-source Wikipedia and construct the question-answer pairs by human annotators with the assistance of the GPT-4o model.\nMMDU has a maximum of 18k image+text tokens, 20 images, and 27 turns, which is at least 5x longer than previous benchmarks and poses challenges to current LVLMs. Our in-depth analysis of 15 representative LVLMs using MMDU reveals that open-source LVLMs lag behind closed-source counterparts due to limited conversational instruction tuning data.\nWe demonstrate that fine-tuning open-source LVLMs on MMDU-45k significantly address this gap, generating longer and more accurate conversations, and improving scores on MMDU and existing benchmarks (MMStar: +1.1%, MathVista: +1.5%, ChartQA: +1.2%). Our contributions pave the way for bridging the gap between current LVLM models and real-world application demands. The links to MMDU, and MMDU-45k are available in the supplementary material.", "keywords": "LVLM benchmark;LVLM instruction-tuning dataset", "primary_area": "", "supplementary_material": "", "author": "Ziyu Liu;Tao Chu;Yuhang Zang;Xilin Wei;Xiaoyi Dong;Pan Zhang;Zijian Liang;Yuanjun Xiong;Yu Qiao;Dahua Lin;Jiaqi Wang", "authorids": "~Ziyu_Liu5;~Tao_Chu1;~Yuhang_Zang1;~Xilin_Wei1;~Xiaoyi_Dong1;~Pan_Zhang1;~Zijian_Liang1;~Yuanjun_Xiong3;~Yu_Qiao1;~Dahua_Lin1;~Jiaqi_Wang1", "gender": "M;M;M;M;M;M;M;M;;M;M", "homepage": "https://liuziyu77.github.io/;;https://yuhangzang.github.io;https://github.com/Wiselnn570;;https://panzhang0212.github.io/;https://lzjmax.cpolar.cn;http://yjxiong.me/;;http://dahua.site;https://myownskyw7.github.io/", "dblp": ";;230/4433;11/1871.html;230/3711;;;142/2644;;53/6088;44/740-3", "google_scholar": ";fFJ9gQMAAAAJ;hW23VKIAAAAJ;zxtbqQwAAAAJ;FscToE0AAAAJ;moHH480AAAAJ;;ojKsx6AAAAAJ;;GMzzRRUAAAAJ;https://scholar.google.com.hk/citations?user=GDvt570AAAAJ", "orcid": ";;0000-0003-1110-5062;;;;;;;;", "linkedin": ";;yuhang-zang/;;;;;;;;", "or_profile": "~Ziyu_Liu5;~Tao_Chu1;~Yuhang_Zang1;~Xilin_Wei1;~Xiaoyi_Dong1;~Pan_Zhang1;~Zijian_Liang1;~Yuanjun_Xiong3;~Yu_Qiao1;~Dahua_Lin1;~Jiaqi_Wang1", "aff": "Wuhan University;South China University of Technology;Shanghai Artificial Intelligence Laboratory;Fudan University;Shanghai Artificial Intelligence Laboratory;Shanghai Artificial Intelligence Laboratory;Wuhan University;Moore Threads Inc.;;The Chinese University of Hong Kong;Shanghai AI Laboratory", "aff_domain": "whu.edu.cn;scut.edu.cn;pjlab.org.cn;fudan.edu.cn;pjlab.org.cn;pjlab.org.cn;whu.edu.cn;moorethreads.com;;cuhk.edu.hk;pjlab.org.cn", "position": "Undergrad student;PhD student;Researcher;PhD student;Researcher;Researcher;Undergrad student;Principal Researcher;;Associate Professor;Research Scientist", "bibtex": "@inproceedings{\nliu2024mmdu,\ntitle={{MMDU}: A Multi-Turn Multi-Image Dialog Understanding Benchmark and Instruction-Tuning Dataset for {LVLM}s},\nauthor={Ziyu Liu and Tao Chu and Yuhang Zang and Xilin Wei and Xiaoyi Dong and Pan Zhang and Zijian Liang and Yuanjun Xiong and Yu Qiao and Dahua Lin and Jiaqi Wang},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=s8h2jSN6a6}\n}", "github": "", "reviewers": "URmW;boEu;Ckq6", "pdf_size": 11360256, "rating": "6;7;8", "confidence": "4;4;4", "wc_summary_and_contributions": "138;68;24", "wc_strengths": "2;50;27", "wc_improvement": "200;176;77", "wc_limitations": "1;12;6", "wc_correctness": "1;15;22", "wc_clarity": "28;67;5", "wc_relation_to_prior_work": "1;42;3", "wc_documentation": "1;16;9", "wc_additional_feedback": "1;1;1", "wc_review": "373;447;174", "wc_reply_reviewers": "240;58;13", "wc_reply_authors": "681;233;174", "reply_reviewers": "1;1;1", "reply_authors": "6;3;3", "rating_avg": [ 7.0, 0.816496580927726 ], "confidence_avg": [ 4.0, 0.0 ], "wc_summary_and_contributions_avg": [ 76.66666666666667, 46.94204464609052 ], "wc_strengths_avg": [ 26.333333333333332, 19.601587237318874 ], "wc_improvement_avg": [ 151.0, 53.23532661682466 ], "wc_limitations_avg": [ 6.333333333333333, 4.4969125210773475 ], "wc_correctness_avg": [ 12.666666666666666, 8.73053390247253 ], "wc_clarity_avg": [ 33.333333333333336, 25.590796956892316 ], "wc_relation_to_prior_work_avg": [ 15.333333333333334, 18.87385022252275 ], "wc_documentation_avg": [ 8.666666666666666, 6.128258770283412 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 331.3333333333333, 115.28033464366573 ], "wc_reply_reviewers_avg": [ 103.66666666666667, 98.13709231931058 ], "wc_reply_authors_avg": [ 362.6666666666667, 226.38070196512382 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 4.0, 1.4142135623730951 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 44, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13317780190198883544&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 5, "email": "whu.edu.cn;scut.edu.cn;pjlab.org.cn;fudan.edu.cn;pjlab.org.cn;pjlab.org.cn;whu.edu.cn;moorethreads.com;;cuhk.edu.hk;pjlab.org.cn", "author_num": 11, "aff_unique_index": "0;1;2;3;2;2;0;4;5;6", "aff_unique_norm": "Wuhan University;South China University of Technology;Shanghai Artificial Intelligence Laboratory;Fudan University;Moore Threads Inc.;Chinese University of Hong Kong;Shanghai AI Laboratory", "aff_unique_dep": ";;;;;;", "aff_unique_url": "http://www.whu.edu.cn/;https://www.scut.edu.cn;http://www.shailab.org/;https://www.fudan.edu.cn;https://www.moorethreads.com;https://www.cuhk.edu.hk;https://www.shanghai-ai-lab.com", "aff_unique_abbr": "WHU;SCUT;Shanghai AI Lab;Fudan;MTI;CUHK;SAIL", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Generalizablity of Memorization Neural Network", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93400", "id": "sABwo1ZTFi", "proceeding": "", "pdf": "https://openreview.net/pdf?id=sABwo1ZTFi", "openreview": "https://openreview.net/forum?id=sABwo1ZTFi", "poster": "/media/PosterPDFs/NeurIPS%202024/93400.png?t=1731140905.6095655", "project": "", "author_site": "Lijia Yu, Xiao-Shan Gao, Lijun Zhang, Yibo Miao", "tldr": "", "abstract": "The neural network memorization problem is to study the expressive power of neural networks to interpolate a finite dataset. Although memorization is widely believed to have a close relationship with the strong generalizability of deep learning when using overparameterized models, to the best of our knowledge, there exists no theoretical study on the generalizability of memorization neural networks. In this paper, we give the first theoretical analysis of this topic. Since using i.i.d. training data is a necessary condition for a learning algorithm to be generalizable, memorization and its generalization theory for i.i.d. datasets are developed under mild conditions on the data distribution. First, algorithms are given to construct memorization networks for an i.i.d. dataset, which have the smallest number of parameters and even a constant number of parameters. Second, we show that, in order for the memorization networks to be generalizable, the width of the network must be at least equal to the dimension of the data, which implies that the existing memorization networks with an optimal number of parameters are not generalizable. Third, a lower bound for the sample complexity of general memorization algorithms and the exact sample complexity for memorization algorithms with constant number of parameters are given. As a consequence, it is shown that there exist data distributions such that, to be generalizable for them, the memorization network must have an exponential number of parameters in the data dimension. Finally, an efficient and generalizable memorization algorithm is given when the number of training samples is greater than the efficient memorization sample complexity of the data distribution.", "keywords": "generalization;memorization;sample complexity", "primary_area": "learning_theory", "supplementary_material": "/attachment/13d8585bea2afd15249f8db90df1bd9cace5dc45.zip", "author": "Lijia Yu;Xiao-Shan Gao;Lijun Zhang;Yibo Miao", "authorids": "~Lijia_Yu2;~Xiao-Shan_Gao2;~Lijun_Zhang2;~Yibo_Miao1", "gender": "M;M;M;M", "homepage": ";http://www.mmrc.iss.ac.cn/~xgao/;;http://www.amss.ac.cn/", "dblp": "175/8873.html;13/3109;76/4015-1;332/0699", "google_scholar": ";_se7GmUAAAAJ;;", "orcid": ";0000-0003-2021-9395;;", "linkedin": ";;;", "or_profile": "~Lijia_Yu2;~Xiao-Shan_Gao2;~Lijun_Zhang2;~Yibo_Miao1", "aff": "Institute of Software, Chinese Academy of Sciences;Academy of Mathematics and Systems Science, Chinese Academy of Sciences, Chinese Academy of Sciences;Chinese Academy of Sciences, Chinese Academy of Sciences;Intel", "aff_domain": "ios.ac.cn;amss.ac.cn;ios.ac.cn;intel.com", "position": "Postdoc;Full Professor;Full Professor;Intern", "bibtex": "@inproceedings{\nyu2024generalizablity,\ntitle={Generalizablity of Memorization Neural Network},\nauthor={Lijia Yu and Xiao-Shan Gao and Lijun Zhang and Yibo Miao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=sABwo1ZTFi}\n}", "github": "", "reviewers": "R3pV;dviZ;8736;QwyD", "pdf_size": 558457, "rating": "6;6;7;7", "confidence": "4;4;4;3", "soundness": "4;2;3;3", "novelty": "3;3;4;3", "presentation": "4;3;4;3", "wc_summary": "50;211;79;77", "wc_strengths": "21;84;106;13", "wc_weaknesses": "15;562;253;59", "wc_questions": "202;211;64;17", "wc_limitations": "4;26;8;1", "wc_review": "292;1094;510;167", "wc_reply_reviewers": "9;219;16;0", "wc_reply_authors": "8;252;7;0", "reply_reviewers": "1;2;1;0", "reply_authors": "2;3;2;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 104.25, 62.68721959059917 ], "wc_strengths_avg": [ 56.0, 39.86853395849915 ], "wc_weaknesses_avg": [ 222.25, 215.62626811221307 ], "wc_questions_avg": [ 123.5, 84.706847420973 ], "wc_limitations_avg": [ 9.75, 9.705024471890836 ], "wc_review_avg": [ 515.75, 355.70238613200223 ], "wc_reply_reviewers_avg": [ 61.0, 91.39748355397975 ], "wc_reply_authors_avg": [ 66.75, 106.99853970966146 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17999082261340310379&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "ios.ac.cn;amss.ac.cn;ios.ac.cn;intel.com", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Chinese Academy of Sciences;Intel", "aff_unique_dep": "Institute of Software;Intel Corporation", "aff_unique_url": "http://www.ios.ac.cn;https://www.intel.com", "aff_unique_abbr": "CAS;Intel", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "China;United States" }, { "title": "GameTraversalBenchmark: Evaluating Planning Abilities Of Large Language Models Through Traversing 2D Game Maps", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97479", "id": "sAxVIWQOzo", "proceeding": "", "pdf": "https://openreview.net/pdf?id=sAxVIWQOzo", "openreview": "https://openreview.net/forum?id=sAxVIWQOzo", "poster": "", "project": "", "author_site": "Muhammad Umair Nasir, Steven James, Julian Togelius", "tldr": "", "abstract": "Large language models (LLMs) have recently demonstrated great success in generating and understanding natural language. While they have also shown potential beyond the domain of natural language, it remains an open question as to what extent and in which way these LLMs can plan. We investigate their planning capabilities by proposing \\texttt{GameTraversalBenchmark (GTB)}, a benchmark consisting of diverse 2D grid-based game maps. An LLM succeeds if it can traverse through given objectives, with a minimum number of steps and a minimum number of generation errors. We evaluate a number of LLMs on \\texttt{GTB} and found that GPT-4-Turbo achieved the highest score of $44.97\\%$ on \\texttt{GTB\\_Score} (GTBS), a composite score that combines the three above criteria. Furthermore, we preliminarily test large reasoning models, namely o1, which scores $67.84\\%$ on GTBS, indicating that the benchmark remains challenging for current models. Code, data, and documentation are available at \\url{https://github.com/umair-nasir14/Game-Traversal-Benchmark}.", "keywords": "Large Language Model Evaluation;Planning Benchmark", "primary_area": "", "supplementary_material": "", "author": "Muhammad Umair Nasir;Steven James;Julian Togelius", "authorids": "~Muhammad_Umair_Nasir1;~Steven_James1;~Julian_Togelius1", "gender": "M;M;M", "homepage": "https://umair-nasir14.github.io/;;http://julian.togelius.com", "dblp": "319/7239;195/8202;47/767", "google_scholar": "O-4Fbb0AAAAJ;;lr4I9BwAAAAJ", "orcid": "0000-0002-2458-9599;;0000-0003-3128-4598", "linkedin": "umair-nasir/;;togelius/", "or_profile": "~Muhammad_Umair_Nasir1;~Steven_James1;~Julian_Togelius1", "aff": "Axon Corporate Services;University of the Witwatersrand;New York University", "aff_domain": "axon.co.za;wits.ac.za;nyu.edu", "position": "Data Scientist;Senior Lecturer;Associate Professor", "bibtex": "@inproceedings{\nnasir2024gametraversalbenchmark,\ntitle={GameTraversalBenchmark: Evaluating Planning Abilities Of Large Language Models Through Traversing 2D Game Maps},\nauthor={Muhammad Umair Nasir and Steven James and Julian Togelius},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=sAxVIWQOzo}\n}", "github": "", "reviewers": "vLVW;GU9P;52Ke", "pdf_size": 388897, "rating": "6;7;7", "confidence": "4;3;4", "wc_summary_and_contributions": "93;103;57", "wc_strengths": "43;114;42", "wc_improvement": "16;181;5", "wc_limitations": "18;115;61", "wc_correctness": "23;17;12", "wc_clarity": "5;143;11", "wc_relation_to_prior_work": "5;12;1", "wc_documentation": "19;8;1", "wc_additional_feedback": "1;1;1", "wc_review": "223;694;191", "wc_reply_reviewers": "0;29;20", "wc_reply_authors": "0;40;25", "reply_reviewers": "0;1;1", "reply_authors": "1;2;2", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 84.33333333333333, 19.754043186705406 ], "wc_strengths_avg": [ 66.33333333333333, 33.70789554721894 ], "wc_improvement_avg": [ 67.33333333333333, 80.49982746703388 ], "wc_limitations_avg": [ 64.66666666666667, 39.684869772860395 ], "wc_correctness_avg": [ 17.333333333333332, 4.496912521077347 ], "wc_clarity_avg": [ 53.0, 63.686733312362634 ], "wc_relation_to_prior_work_avg": [ 6.0, 4.546060565661952 ], "wc_documentation_avg": [ 9.333333333333334, 7.408703590297623 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 369.3333333333333, 229.94540414822143 ], "wc_reply_reviewers_avg": [ 16.333333333333332, 12.119772641798562 ], "wc_reply_authors_avg": [ 21.666666666666668, 16.49915822768611 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8596016643155893381&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "axon.co.za;wits.ac.za;nyu.edu", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Axon Corporate Services;University of the Witwatersrand;New York University", "aff_unique_dep": ";;", "aff_unique_url": ";https://www.wits.ac.za;https://www.nyu.edu", "aff_unique_abbr": ";Wits;NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1;2", "aff_country_unique": ";South Africa;United States" }, { "title": "Almost-Linear RNNs Yield Highly Interpretable Symbolic Codes in Dynamical Systems Reconstruction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93399", "id": "sEpSxteEKJ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=sEpSxteEKJ", "openreview": "https://openreview.net/forum?id=sEpSxteEKJ", "poster": "/media/PosterPDFs/NeurIPS%202024/93399.png?t=1731399517.6265595", "project": "", "author_site": "Manuel Brenner, Christoph J\u00fcrgen Hemmer, Zahra Monfared, Daniel Durstewitz", "tldr": "", "abstract": "Dynamical systems theory (DST) is fundamental for many areas of science and engineering. It can provide deep insights into the behavior of systems evolving in time, as typically described by differential or recursive equations. A common approach to facilitate mathematical tractability and interpretability of DS models involves decomposing nonlinear DS into multiple linear DS combined by switching manifolds, i.e. piecewise linear (PWL) systems. PWL models are popular in engineering and a frequent choice in mathematics for analyzing the topological properties of DS. However, hand-crafting such models is tedious and only possible for very low-dimensional scenarios, while inferring them from data usually gives rise to unnecessarily complex representations with very many linear subregions. Here we introduce Almost-Linear Recurrent Neural Networks (AL-RNNs) which automatically and robustly produce most parsimonious PWL representations of DS from time series data, using as few PWL nonlinearities as possible. AL-RNNs can be efficiently trained with any SOTA algorithm for dynamical systems reconstruction (DSR), and naturally give rise to a symbolic encoding of the underlying DS that provably preserves important topological properties. We show that for the Lorenz and R\u00f6ssler systems, AL-RNNs derive, in a purely data-driven way, the known topologically minimal PWL representations of the corresponding chaotic attractors. We further illustrate on two challenging empirical datasets that interpretable symbolic encodings of the dynamics can be achieved, tremendously facilitating mathematical and computational analysis of the underlying systems.", "keywords": "recurrent neural networks;dynamical systems;chaos;attractors;interpretability", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "", "author": "Manuel Brenner;Christoph J\u00fcrgen Hemmer;Zahra Monfared;Daniel Durstewitz", "authorids": "~Manuel_Brenner1;~Christoph_J\u00fcrgen_Hemmer1;~Zahra_Monfared1;~Daniel_Durstewitz1", "gender": "M;M;F;", "homepage": ";;;https://durstewitzlab.github.io", "dblp": "323/8935;380/2407;;98/2120", "google_scholar": "HCUeyg8AAAAJ;https://scholar.google.de/citations?user=6ksJaUwAAAAJ;https://scholar.google.pl/citations?user=OPUIwIoAAAAJ;https://scholar.google.de/citations?user=2bcbKU0AAAAJ", "orcid": ";;;0000-0002-9340-3786", "linkedin": "manuel-brenner-772261191/;christoph-hemmer-b0a077166/;;", "or_profile": "~Manuel_Brenner1;~Christoph_J\u00fcrgen_Hemmer1;~Zahra_Monfared1;~Daniel_Durstewitz1", "aff": "Heidelberg University;Ruprecht-Karls-Universit\u00e4t Heidelberg;ZI Mannheim-Heidelberg University;Heidelberg University", "aff_domain": "uni-heidelberg.de;uni-heidelberg.de;zi-manheim.de;uni-heidelberg.de", "position": "PhD student;MS student;Postdoc;Full Professor", "bibtex": "@inproceedings{\nbrenner2024almostlinear,\ntitle={Almost-Linear {RNN}s Yield Highly Interpretable Symbolic Codes in Dynamical Systems Reconstruction},\nauthor={Manuel Brenner and Christoph J{\\\"u}rgen Hemmer and Zahra Monfared and Daniel Durstewitz},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=sEpSxteEKJ}\n}", "github": "", "reviewers": "5E4V;82gv;nsFA", "pdf_size": 5685045, "rating": "7;7;8", "confidence": "4;2;3", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "2;4;3", "wc_summary": "97;99;162", "wc_strengths": "87;91;124", "wc_weaknesses": "424;41;237", "wc_questions": "78;145;347", "wc_limitations": "25;12;88", "wc_review": "711;388;958", "wc_reply_reviewers": "19;27;63", "wc_reply_authors": "34;16;22", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 7.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 119.33333333333333, 30.18093585177386 ], "wc_strengths_avg": [ 100.66666666666667, 16.579773487261185 ], "wc_weaknesses_avg": [ 234.0, 156.37348453835347 ], "wc_questions_avg": [ 190.0, 114.33576285076627 ], "wc_limitations_avg": [ 41.666666666666664, 33.189690501051004 ], "wc_review_avg": [ 685.6666666666666, 233.38999312071823 ], "wc_reply_reviewers_avg": [ 36.333333333333336, 19.136933459209764 ], "wc_reply_authors_avg": [ 24.0, 7.483314773547883 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9626635067856367307&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "uni-heidelberg.de;uni-heidelberg.de;zi-manheim.de;uni-heidelberg.de", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Heidelberg University;Ruprecht-Karls-Universit\u00e4t Heidelberg;ZI Mannheim-Heidelberg University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uni-heidelberg.de;https://www.uni-heidelberg.de/;", "aff_unique_abbr": "Uni Heidelberg;Uni Heidelberg;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "title": "GSGAN: Adversarial Learning for Hierarchical Generation of 3D Gaussian Splats", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93398", "id": "sFaFDcVNbW", "proceeding": "", "pdf": "https://openreview.net/pdf?id=sFaFDcVNbW", "openreview": "https://openreview.net/forum?id=sFaFDcVNbW", "poster": "/media/PosterPDFs/NeurIPS%202024/93398.png?t=1729673367.2468824", "project": "", "author_site": "Sangeek Hyun, Jae-Pil Heo", "tldr": "", "abstract": "Most advances in 3D Generative Adversarial Networks (3D GANs) largely depend on ray casting-based volume rendering, which incurs demanding rendering costs. One promising alternative is rasterization-based 3D Gaussian Splatting (3D-GS), providing a much faster rendering speed and explicit 3D representation. In this paper, we exploit Gaussian as a 3D representation for 3D GANs by leveraging its efficient and explicit characteristics. However, in an adversarial framework, we observe that a na\\\"ive generator architecture suffers from training instability and lacks the capability to adjust the scale of Gaussians. This leads to model divergence and visual artifacts due to the absence of proper guidance for initialized positions of Gaussians and densification to manage their scales adaptively. To address these issues, we introduce GSGAN, a generator architecture with a hierarchical multi-scale Gaussian representation that effectively regularizes the position and scale of generated Gaussians. Specifically, we design a hierarchy of Gaussians where finer-level Gaussians are parameterized by their coarser-level counterparts; the position of finer-level Gaussians would be located near their coarser-level counterparts, and the scale would monotonically decrease as the level becomes finer, modeling both coarse and fine details of the 3D scene. Experimental results demonstrate that ours achieves a significantly faster rendering speed (\u00d7100) compared to state-of-the-art 3D consistent GANs with comparable 3D generation capability.", "keywords": "3D GANs;Generative Adversarial Networks;3D Gaussian Splatting;3D Generative Models", "primary_area": "generative_models", "supplementary_material": "/attachment/e368067f506167cebdcb6f55919a765c15dde4e7.zip", "author": "Sangeek Hyun;Jae-Pil Heo", "authorids": "~Sangeek_Hyun1;~Jae-Pil_Heo3", "gender": "M;M", "homepage": "https://hse1032.github.io/;", "dblp": "277/6766;17/7557", "google_scholar": "xbaJDBwAAAAJ;VXyJ_ssAAAAJ", "orcid": "0000-0002-4050-6896;", "linkedin": "sangeek-hyun-33837a316/;", "or_profile": "~Sangeek_Hyun1;~Jae-pil_Heo1", "aff": "Sungkyunkwan University;Sungkyunkwan University", "aff_domain": "skku.edu;skku.edu", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nhyun2024gsgan,\ntitle={{GSGAN}: Adversarial Learning for Hierarchical Generation of 3D Gaussian Splats},\nauthor={Sangeek Hyun and Jae-Pil Heo},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=sFaFDcVNbW}\n}", "github": "", "reviewers": "LD2e;RZRC;Abis;Wn5i", "pdf_size": 20503862, "rating": "5;6;6;7", "confidence": "4;4;4;5", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "2;3;3;3", "wc_summary": "221;23;96;65", "wc_strengths": "126;7;92;90", "wc_weaknesses": "71;158;225;93", "wc_questions": "165;4;174;3", "wc_limitations": "29;4;6;3", "wc_review": "612;196;593;254", "wc_reply_reviewers": "49;6;0;0", "wc_reply_authors": "43;0;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "2;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 101.25, 73.83215762796046 ], "wc_strengths_avg": [ 78.75, 43.82564888281747 ], "wc_weaknesses_avg": [ 136.75, 60.15968334358152 ], "wc_questions_avg": [ 86.5, 83.06172403700756 ], "wc_limitations_avg": [ 10.5, 10.735455276791944 ], "wc_review_avg": [ 413.75, 189.97943967703452 ], "wc_reply_reviewers_avg": [ 13.75, 20.498475553074673 ], "wc_reply_authors_avg": [ 10.75, 18.619546181365433 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:68qQq99S4RgJ:scholar.google.com/&scioq=GSGAN:+Adversarial+Learning+for+Hierarchical+Generation+of+3D+Gaussian+Splats&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "skku.edu;skku.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Sungkyunkwan University", "aff_unique_dep": "", "aff_unique_url": "https://www.skku.edu", "aff_unique_abbr": "SKKU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "title": "HairFastGAN: Realistic and Robust Hair Transfer with a Fast Encoder-Based Approach", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93397", "id": "sGvZyV2iqN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=sGvZyV2iqN", "openreview": "https://openreview.net/forum?id=sGvZyV2iqN", "poster": "/media/PosterPDFs/NeurIPS%202024/93397.png?t=1731675746.2930362", "project": "", "author_site": "Maxim Nikolaev, Mikhail Kuznetsov, Dmitry Vetrov, Aibek Alanov", "tldr": "", "abstract": "Our paper addresses the complex task of transferring a hairstyle from a reference image to an input photo for virtual hair try-on. This task is challenging due to the need to adapt to various photo poses, the sensitivity of hairstyles, and the lack of objective metrics. The current state of the art hairstyle transfer methods use an optimization process for different parts of the approach, making them inexcusably slow. At the same time, faster encoder-based models are of very low quality because they either operate in StyleGAN's W+ space or use other low-dimensional image generators. Additionally, both approaches have a problem with hairstyle transfer when the source pose is very different from the target pose, because they either don't consider the pose at all or deal with it inefficiently. In our paper, we present the HairFast model, which uniquely solves these problems and achieves high resolution, near real-time performance, and superior reconstruction compared to optimization problem-based methods. Our solution includes a new architecture operating in the FS latent space of StyleGAN, an enhanced inpainting approach, and improved encoders for better alignment, color transfer, and a new encoder for post-processing. The effectiveness of our approach is demonstrated on realism metrics after random hairstyle transfer and reconstruction when the original hairstyle is transferred. In the most difficult scenario of transferring both shape and color of a hairstyle from different images, our method performs in less than a second on the Nvidia V100.", "keywords": "Generative Model;StyleGAN;HairSwap", "primary_area": "generative_models", "supplementary_material": "/attachment/19f6eb10a9d3c30245418bfc244f2d7fb843dfaa.zip", "author": "Maxim Nikolaev;Mikhail Kuznetsov;Dmitry Vetrov;Aibek Alanov", "authorids": "~Maxim_Nikolaev1;~Mikhail_Kuznetsov2;~Dmitry_P._Vetrov1;~Aibek_Alanov1", "gender": ";M;M;M", "homepage": "https://github.com/maximkm;https://constructor.university/faculty-member/dmitry-vetrov;;https://github.com/MikhailKuz", "dblp": "139/3925;89/3348;228/9365;", "google_scholar": "7g8RbyMAAAAJ;https://scholar.google.ru/citations?user=7HU0UoUAAAAJ;MXJTRGoAAAAJ;", "orcid": ";;;", "linkedin": "widemax/;;;", "or_profile": "~Maxim_Nikolaev1;~Dmitry_P._Vetrov1;~Aibek_Alanov1;~Mikhail_K._Kuznetsov1", "aff": "Artificial Intelligence Research Institute;National Research University Higher School of Economics;Artificial Intelligence Research Institute;Higher School of Economics", "aff_domain": "airi.net;hse.ru;airi.net;hse.ru", "position": "Intern;Full Professor;Researcher;MS student", "bibtex": "@inproceedings{\nnikolaev2024hairfastgan,\ntitle={HairFast{GAN}: Realistic and Robust Hair Transfer with a Fast Encoder-Based Approach},\nauthor={Maxim Nikolaev and Mikhail Kuznetsov and Dmitry Vetrov and Aibek Alanov},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=sGvZyV2iqN}\n}", "github": "", "reviewers": "fS7X;JffK;RjWU", "pdf_size": 31177531, "rating": "6;6;7", "confidence": "4;5;3", "soundness": "3;3;4", "novelty": "3;3;4", "presentation": "3;3;3", "wc_summary": "99;80;78", "wc_strengths": "61;134;81", "wc_weaknesses": "107;107;51", "wc_questions": "80;84;1", "wc_limitations": "6;83;7", "wc_review": "353;488;218", "wc_reply_reviewers": "0;110;0", "wc_reply_authors": "0;196;0", "reply_reviewers": "0;3;0", "reply_authors": "1;3;1", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 85.66666666666667, 9.46337971105226 ], "wc_strengths_avg": [ 92.0, 30.80043289739069 ], "wc_weaknesses_avg": [ 88.33333333333333, 26.398653164297773 ], "wc_questions_avg": [ 55.0, 38.21866908549625 ], "wc_limitations_avg": [ 32.0, 36.0647565729573 ], "wc_review_avg": [ 353.0, 110.22703842524301 ], "wc_reply_reviewers_avg": [ 36.666666666666664, 51.85449728701349 ], "wc_reply_authors_avg": [ 65.33333333333333, 92.39528607504222 ], "reply_reviewers_avg": [ 1.0, 1.4142135623730951 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=631874673805027121&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 4, "email": "airi.net;hse.ru;airi.net;hse.ru", "author_num": 4, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Artificial Intelligence Research Institute;National Research University Higher School of Economics;Higher School of Economics", "aff_unique_dep": ";;", "aff_unique_url": ";https://hse.ru;https://www.hse.ru", "aff_unique_abbr": ";HSE;HSE", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "United States;Russian Federation" }, { "title": "EMGBench: Benchmarking Out-of-Distribution Generalization and Adaptation for Electromyography", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97478", "id": "sHBn3PNcwU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=sHBn3PNcwU", "openreview": "https://openreview.net/forum?id=sHBn3PNcwU", "poster": "/media/PosterPDFs/NeurIPS%202024/97478.png?t=1732907537.3552265", "project": "", "author_site": "Jehan Yang, Maxwell Soh, Vivianna Lieu, Douglas Weber, Zackory Erickson", "tldr": "", "abstract": "This paper introduces the first generalization and adaptation benchmark using machine learning for evaluating out-of-distribution performance of electromyography (EMG) classification algorithms. The ability of an EMG classifier to handle inputs drawn from a different distribution than the training distribution is critical for real-world deployment as a control interface. By predicting the user\u2019s intended gesture using EMG signals, we can create a wearable solution to control assistive technologies, such as computers, prosthetics, and mobile manipulator robots. This new out-of-distribution benchmark consists of two major tasks that have utility for building robust and adaptable control interfaces: 1) intersubject classification, and 2) adaptation using train-test splits for time-series. This benchmark spans nine datasets, the largest collection of EMG datasets in a benchmark. Among these, a new dataset is introduced, featuring a novel, easy-to-wear high-density EMG wearable for data collection. The lack of open-source benchmarks has made comparing accuracy results between papers challenging for the EMG research community. This new benchmark provides researchers with a valuable resource for analyzing practical measures of out-of-distribution performance for EMG datasets. Our code and data from our new dataset can be found at emgbench.github.io.", "keywords": "electromyography;generalization;adaptation;benchmarking;out-of-distribution;healthcare", "primary_area": "", "supplementary_material": "/attachment/5c27705a193996b681536f2929e871287d42a73b.pdf", "author": "Jehan Yang;Maxwell J. Soh;Vivianna Lieu;Douglas J Weber;Zackory Erickson", "authorids": "~Jehan_Yang1;~Maxwell_J._Soh1;~Vivianna_Lieu1;~Douglas_J_Weber1;~Zackory_Erickson1", "gender": "M;;;M;M", "homepage": "https://jehanyang.github.io;;;https://www.meche.engineering.cmu.edu/directory/bios/weber-douglas.html;https://zackory.com", "dblp": ";;;;", "google_scholar": "SYQvCw4AAAAJ;;;;wElkTtIAAAAJ", "orcid": ";;;;", "linkedin": ";;vivianna-lieu/;;", "or_profile": "~Jehan_Yang1;~Maxwell_J._Soh1;~Vivianna_Lieu1;~Douglas_J_Weber1;~Zackory_Erickson1", "aff": "Carnegie Mellon University;;Carnegie Mellon University;;Carnegie Mellon University", "aff_domain": "andrew.cmu.edu;;cmu.edu;;cmu.edu", "position": "PhD student;;Undergrad student;;Assistant Professor", "bibtex": "@inproceedings{\nyang2024emgbench,\ntitle={{EMGB}ench: Benchmarking Out-of-Distribution Generalization and Adaptation for Electromyography},\nauthor={Jehan Yang and Maxwell J. Soh and Vivianna Lieu and Douglas J Weber and Zackory Erickson},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=sHBn3PNcwU}\n}", "github": "", "reviewers": "xM2s;Fh4V;qE7h", "pdf_size": 10161686, "rating": "6;6;7", "confidence": "3;5;4", "wc_summary_and_contributions": "71;65;131", "wc_strengths": "33;16;63", "wc_improvement": "85;15;80", "wc_limitations": "1;7;10", "wc_correctness": "1;5;1", "wc_clarity": "1;2;1", "wc_relation_to_prior_work": "1;2;1", "wc_documentation": "1;2;19", "wc_additional_feedback": "1;1;1", "wc_review": "195;115;307", "wc_reply_reviewers": "39;21;77", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "2;2;3", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "wc_summary_and_contributions_avg": [ 89.0, 29.79932885150268 ], "wc_strengths_avg": [ 37.333333333333336, 19.430788855719562 ], "wc_improvement_avg": [ 60.0, 31.88521078284832 ], "wc_limitations_avg": [ 6.0, 3.7416573867739413 ], "wc_correctness_avg": [ 2.3333333333333335, 1.8856180831641267 ], "wc_clarity_avg": [ 1.3333333333333333, 0.4714045207910317 ], "wc_relation_to_prior_work_avg": [ 1.3333333333333333, 0.4714045207910317 ], "wc_documentation_avg": [ 7.333333333333333, 8.259674462242579 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 205.66666666666666, 78.74572298791148 ], "wc_reply_reviewers_avg": [ 45.666666666666664, 23.342855200015464 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:iWQXrKyw5swJ:scholar.google.com/&scioq=EMGBench:+Benchmarking+Out-of-Distribution+Generalization+and+Adaptation+for+Electromyography&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "andrew.cmu.edu;;cmu.edu;;cmu.edu", "author_num": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "CultureLLM: Incorporating Cultural Differences into Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93396", "id": "sIsbOkQmBL", "proceeding": "", "pdf": "https://openreview.net/pdf?id=sIsbOkQmBL", "openreview": "https://openreview.net/forum?id=sIsbOkQmBL", "poster": "", "project": "", "author_site": "Cheng Li, Mengzhuo Chen, Jindong Wang, Sunayana Sitaram, Xing Xie", "tldr": "", "abstract": "Large language models (LLMs) have been observed to exhibit bias towards certain cultures due to the predominance of training data obtained from English corpora. Considering that multilingual cultural data is often expensive to procure, existing methodologies address this challenge through prompt engineering or culture-specific pre-training. However, these strategies may neglect the knowledge deficiency of low-resource cultures and necessitate substantial computing resources. In this paper, we propose CultureLLM, a cost-effective solution to integrate cultural differences into LLMs. CultureLLM employs the World Value Survey (WVS) as seed data and generates semantically equivalent training data through the proposed semantic data augmentation. Utilizing only $50$ seed samples from WVS with augmented data, we fine-tune culture-specific LLMs as well as a unified model (CultureLLM-One) for $9$ cultures, encompassing both rich and low-resource languages. Extensive experiments conducted on $60$ culture-related datasets reveal that CultureLLM significantly surpasses various counterparts such as GPT-3.5 (by $8.1$\\%) and Gemini Pro (by $9.5$\\%), demonstrating performance comparable to or exceeding that of GPT-4. Our human study indicates that the generated samples maintain semantic equivalence to the original samples, offering an effective solution for LLMs augmentation. Code is released at https://github.com/Scarelette/CultureLLM.", "keywords": "Culture bias;large language models;fairness", "primary_area": "fairness", "supplementary_material": "", "author": "CHENG LI;Mengzhuo Chen;Jindong Wang;Sunayana Sitaram;Xing Xie", "authorids": "~CHENG_LI26;~Mengzhuo_Chen1;~Jindong_Wang4;~Sunayana_Sitaram1;~Xing_Xie3", "gender": ";M;M;F;M", "homepage": "https://scholar.google.com/citations?user=083GCIwAAAAJ&hl=zh-CN;https://scholar.google.com/citations?user=I2270msAAAAJ&hl=zh-CN;https://jd92.wang/;https://www.microsoft.com/en-us/research/people/susitara/;http://research.microsoft.com/en-us/people/xingx/", "dblp": ";347/9020;19/2969-1;27/7642;08/6809-1", "google_scholar": "083GCIwAAAAJ;I2270msAAAAJ;hBZ_tKsAAAAJ;PUxwYrkAAAAJ;5EQfAFIAAAAJ", "orcid": ";0009-0006-4397-750X;0000-0002-4833-0880;;0000-0002-8608-8482", "linkedin": ";;jindong-wang/;;xingx/", "or_profile": "~CHENG_LI26;~Mengzhuo_Chen1;~Jindong_Wang4;~Sunayana_Sitaram1;~Xing_Xie3", "aff": "Department of Computer Science, University of Washington;University of Chinese Academy of Sciences;Microsoft Research;Microsoft;Microsoft Research Asia", "aff_domain": "cs.washington.edu;ucas.edu.cn;microsoft.com;microsoft.com;microsoft.com", "position": "Intern;MS student;Researcher;Researcher;Senior Principal Researcher", "bibtex": "@inproceedings{\nli2024culturellm,\ntitle={Culture{LLM}: Incorporating Cultural Differences into Large Language Models},\nauthor={CHENG LI and Mengzhuo Chen and Jindong Wang and Sunayana Sitaram and Xing Xie},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=sIsbOkQmBL}\n}", "github": "", "reviewers": "XR5a;eAV9;tmAM;ASzm", "pdf_size": 1072787, "rating": "5;6;6;7", "confidence": "4;4;4;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "108;65;72;67", "wc_strengths": "22;23;83;51", "wc_weaknesses": "180;106;105;6", "wc_questions": "34;48;141;20", "wc_limitations": "13;19;14;10", "wc_review": "357;261;415;154", "wc_reply_reviewers": "499;273;0;6", "wc_reply_authors": "2772;1417;0;0", "reply_reviewers": "4;3;0;1", "reply_authors": "8;5;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 78.0, 17.507141400011598 ], "wc_strengths_avg": [ 44.75, 24.963723680572976 ], "wc_weaknesses_avg": [ 99.25, 61.83597254026171 ], "wc_questions_avg": [ 60.75, 47.378133141777546 ], "wc_limitations_avg": [ 14.0, 3.24037034920393 ], "wc_review_avg": [ 296.75, 99.0817213213416 ], "wc_reply_reviewers_avg": [ 194.5, 207.5120478430108 ], "wc_reply_authors_avg": [ 1047.25, 1151.6230665890641 ], "reply_reviewers_avg": [ 2.0, 1.5811388300841898 ], "reply_authors_avg": [ 3.75, 2.947456530637899 ], "replies_avg": [ 31, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 70, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13991824526460030489&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "cs.washington.edu;ucas.edu.cn;microsoft.com;microsoft.com;microsoft.com", "author_num": 5, "aff_unique_index": "0;1;2;2;2", "aff_unique_norm": "University of Washington;University of Chinese Academy of Sciences;Microsoft", "aff_unique_dep": "Department of Computer Science;;Microsoft Research", "aff_unique_url": "https://www.washington.edu;http://www.ucas.ac.cn;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "UW;UCAS;MSR", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Seattle;;Asia", "aff_country_unique_index": "0;1;0;0;1", "aff_country_unique": "United States;China" }, { "title": "Scaling Laws with Vocabulary: Larger Models Deserve Larger Vocabularies", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93395", "id": "sKCKPr8cRL", "proceeding": "", "pdf": "https://openreview.net/pdf?id=sKCKPr8cRL", "openreview": "https://openreview.net/forum?id=sKCKPr8cRL", "poster": "/media/PosterPDFs/NeurIPS%202024/93395.png?t=1731141023.6082985", "project": "", "author_site": "Chaofan Tao, Qian Liu, Longxu Dou, Niklas Muennighoff, Zhongwei Wan, Ping Luo, Min Lin, Ngai Wong", "tldr": "", "abstract": "Research on scaling large language models (LLMs) has primarily focused on model parameters and training data size, overlooking the role of vocabulary size. We investigate how vocabulary size impacts LLM scaling laws by training models ranging from 33M to 3B parameters on up to 500B characters with various vocabulary configurations. We propose three complementary approaches for predicting the compute-optimal vocabulary size: IsoFLOPs analysis, derivative estimation, and parametric fit of the loss function. Our approaches converge on the conclusion that the optimal vocabulary size depends on the compute budget, with larger models requiring larger vocabularies. Most LLMs, however, use insufficient vocabulary sizes. For example, we predict that the optimal vocabulary size of Llama2-70B should have been at least 216K, 7 times larger than its vocabulary of 32K. We validate our predictions empirically by training models with 3B parameters across different FLOPs budgets. Adopting our predicted optimal vocabulary size consistently improves downstream performance over commonly used vocabulary sizes. By increasing the vocabulary size from the conventional 32K to 43K, we improve performance on ARC-Challenge from 29.1 to 32.0 with the same 2.3e21 FLOPs. Our work highlights the importance of jointly considering tokenization and model scaling for efficient pre-training. The code and demo are available at https://github.com/sail-sg/scaling-with-vocab and https://hf.co/spaces/sail/scaling-with-vocab-demo.", "keywords": "Natural Language Processing;Scaling Laws;Efficient Neural Networks;Large Language Models", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Chaofan Tao;Qian Liu;Longxu Dou;Niklas Muennighoff;Zhongwei Wan;Ping Luo;Min Lin;Ngai Wong", "authorids": "~Chaofan_Tao1;~Qian_Liu2;~Longxu_Dou1;~Niklas_Muennighoff1;~Zhongwei_Wan1;~Ping_Luo2;~Min_Lin1;~Ngai_Wong1", "gender": "M;M;M;M;M;M;M;", "homepage": ";http://siviltaram.github.io/;https://longxudou.github.io/;https://muennighoff.github.io/;https://people.engineering.osu.edu/people/wan.512;https://linmin.me;https://www.eee.hku.hk/~nwong/;http://luoping.me/", "dblp": "239/5831;;229/2829;281/6745;260/6958.html;;88/3656;54/4989-2.html", "google_scholar": "gjmfLroAAAAJ;bcbeUo0AAAAJ;flgPmvkAAAAJ;Me0IoRMAAAAJ;https://scholar.google.com/citations?hl=en;BGONmkIAAAAJ;PM_uMYIAAAAJ;https://scholar.google.com.hk/citations?hl=en", "orcid": ";;;;;;0000-0002-3026-0108;0000-0002-6685-7950", "linkedin": ";;longxu-dou-6b167410a/;niklasmuennighoff/;;min-lin-08a3a422/;;", "or_profile": "~Chaofan_Tao1;~Qian_Liu2;~Longxu_Dou1;~Niklas_Muennighoff1;~Zhongwei_Wan1;~Min_Lin1;~Ngai_Wong1;~Luo_Ping2", "aff": "The University of Hong Kong;Tiktok;Sea AI Lab;Allen Institute for Artificial Intelligence;Ohio State University, Columbus;Sea AI Lab;The University of Hong Kong;The University of Hong Kong", "aff_domain": "hku.hk;bytedance.com;sea.com;allenai.org;osu.edu;sea.com;hku.hk;hku.hk", "position": "PhD Student;Researcher;Researcher;Researcher;PhD student;Principal Researcher;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\ntao2024scaling,\ntitle={Scaling Laws with Vocabulary: Larger Models Deserve Larger Vocabularies},\nauthor={Chaofan Tao and Qian Liu and Longxu Dou and Niklas Muennighoff and Zhongwei Wan and Ping Luo and Min Lin and Ngai Wong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=sKCKPr8cRL}\n}", "github": "", "reviewers": "zt86;Y8zo;EsaU", "pdf_size": 3712752, "rating": "6;6;7", "confidence": "4;4;2", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "75;180;78", "wc_strengths": "17;89;73", "wc_weaknesses": "14;45;60", "wc_questions": "40;74;1", "wc_limitations": "1;5;8", "wc_review": "147;393;220", "wc_reply_reviewers": "0;69;0", "wc_reply_authors": "88;876;101", "reply_reviewers": "0;1;0", "reply_authors": "2;4;2", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 111.0, 48.80573736764972 ], "wc_strengths_avg": [ 59.666666666666664, 30.8688984074406 ], "wc_weaknesses_avg": [ 39.666666666666664, 19.154343864744856 ], "wc_questions_avg": [ 38.333333333333336, 29.825417944356715 ], "wc_limitations_avg": [ 4.666666666666667, 2.8674417556808756 ], "wc_review_avg": [ 253.33333333333334, 103.1579156223872 ], "wc_reply_reviewers_avg": [ 23.0, 32.526911934581186 ], "wc_reply_authors_avg": [ 355.0, 368.44085911671993 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 0.9428090415820634 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1865997229292330693&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "hku.hk;bytedance.com;sea.com;allenai.org;osu.edu;sea.com;hku.hk;hku.hk", "author_num": 8, "aff_unique_index": "0;1;2;3;4;2;0;0", "aff_unique_norm": "University of Hong Kong;TikTok;Sea AI Lab;Allen Institute for Artificial Intelligence;Ohio State University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.hku.hk;https://www.tiktok.com;;https://allenai.org;https://www.osu.edu", "aff_unique_abbr": "HKU;TikTok;;AI2;OSU", "aff_campus_unique_index": "0;2;0;0", "aff_campus_unique": "Hong Kong SAR;;Columbus", "aff_country_unique_index": "0;0;2;2;0;0", "aff_country_unique": "China;;United States" }, { "title": "Semi-supervised Knowledge Transfer Across Multi-omic Single-cell Data", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93394", "id": "sKEhebkEdz", "proceeding": "", "pdf": "https://openreview.net/pdf?id=sKEhebkEdz", "openreview": "https://openreview.net/forum?id=sKEhebkEdz", "poster": "/media/PosterPDFs/NeurIPS%202024/93394.png?t=1730355055.5646408", "project": "", "author_site": "Fan Zhang, Tianyu Liu, Zihao Chen, Xiaojiang Peng, Chong Chen, Xian-Sheng Hua, Xiao Luo, Hongyu Zhao", "tldr": "", "abstract": "Knowledge transfer between multi-omic single-cell data aims to effectively transfer cell types from scRNA-seq data to unannotated scATAC-seq data. Several approaches aim to reduce the heterogeneity of multi-omic data while maintaining the discriminability of cell types with extensive annotated data. However, in reality, the cost of collecting both a large amount of labeled scRNA-seq data and scATAC-seq data is expensive. Therefore, this paper explores a practical yet underexplored problem of knowledge transfer across multi-omic single-cell data under cell type scarcity. To address this problem, we propose a semi-supervised knowledge transfer framework named Dual label scArcity elimiNation with Cross-omic multi-samplE Mixup (DANCE). To overcome the label scarcity in scRNA-seq data, we generate pseudo-labels based on optimal transport and merge them into the labeled scRNA-seq data. Moreover, we adopt a divide-and-conquer strategy which divides the scATAC-seq data into source-like and target-specific data. For source-like samples, we employ consistency regularization with random perturbations while for target-specific samples, we select a few candidate labels and progressively eliminate incorrect cell types from the label set for additional supervision. Next, we generate virtual scRNA-seq samples with multi-sample Mixup based on the class-wise similarity to reduce cell heterogeneity. Extensive experiments on many benchmark datasets suggest the superiority of our DANCE over a series of state-of-the-art methods.", "keywords": "Computional Biology;Semi-supervised Learning;Transfer Learning", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "", "author": "Fan Zhang;Tianyu Liu;Zihao Chen;Xiaojiang Peng;Chong Chen;Xian-Sheng Hua;Xiao Luo;Hongyu Zhao", "authorids": "~Fan_Zhang27;~Tianyu_Liu4;~Zihao_Chen17;~Xiaojiang_Peng1;~Chong_Chen2;~Xian-Sheng_Hua1;~Xiao_Luo3;~Hongyu_Zhao1", "gender": "M;M;M;M;;M;M;M", "homepage": "https://zfkarl.github.io/;https://helloworldlty.github.io/;https://edwardchen.netlify.app/#about;https://pengxj.github.io/;;;http://luoxiao12.github.io;https://ysph.yale.edu/profile/hongyu-zhao/", "dblp": "21/3626-111;134/1099-5;;133/6556;;56/5807-1;50/1585-1;", "google_scholar": "https://scholar.google.com.hk/citations?user=KbC_-7cAAAAJ;https://scholar.google.com/citations?hl=en;;7oRD67kAAAAJ;;https://scholar.google.co.uk/citations?user=6G-l4o0AAAAJ;https://scholar.google.com.hk/citations?;__z1kpoAAAAJ", "orcid": "0000-0001-5250-7258;0000-0002-9412-6573;0000-0002-1382-0409;;;;;", "linkedin": ";;;;;xshua;%E9%9C%84-%E7%BD%97-303548214/;", "or_profile": "~Fan_Zhang27;~Tianyu_Liu4;~Zihao_Chen17;~Xiaojiang_Peng1;~Chong_Chen2;~Xian-Sheng_Hua1;~Xiao_Luo3;~Hongyu_Zhao1", "aff": "Tencent;Genentech;Peking University;Shenzhen Technology University;;Terminus Group;University of California, Los Angeles;Yale University", "aff_domain": "tencent.com;gene.com;pku.edu.cn;sztu.edu.cn;;tslsmart.com;cs.ucla.edu;yale.edu", "position": "Intern;Intern;PhD student;Full Professor;;Principal Researcher;Postdoc;Full Professor", "bibtex": "@inproceedings{\nzhang2024semisupervised,\ntitle={Semi-supervised Knowledge Transfer Across Multi-omic Single-cell Data},\nauthor={Fan Zhang and Tianyu Liu and Zihao Chen and Xiaojiang Peng and Chong Chen and Xian-Sheng Hua and Xiao Luo and Hongyu Zhao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=sKEhebkEdz}\n}", "github": "", "reviewers": "L3nP;kGak;puzU;W1Pi", "pdf_size": 0, "rating": "4;5;7;7", "confidence": "4;5;5;3", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "100;77;42;52", "wc_strengths": "30;41;48;62", "wc_weaknesses": "80;154;175;110", "wc_questions": "96;45;9;4", "wc_limitations": "12;9;8;12", "wc_review": "318;326;282;240", "wc_reply_reviewers": "0;26;188;23", "wc_reply_authors": "0;34;857;35", "reply_reviewers": "0;1;4;1", "reply_authors": "1;2;5;2", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 67.75, 22.56518335843961 ], "wc_strengths_avg": [ 45.25, 11.60549438843516 ], "wc_weaknesses_avg": [ 129.75, 37.08352059877811 ], "wc_questions_avg": [ 38.5, 36.77295201639379 ], "wc_limitations_avg": [ 10.25, 1.7853571071357126 ], "wc_review_avg": [ 291.5, 34.04041715373065 ], "wc_reply_reviewers_avg": [ 59.25, 75.01124915637654 ], "wc_reply_authors_avg": [ 231.5, 361.4073186862712 ], "reply_reviewers_avg": [ 1.5, 1.5 ], "reply_authors_avg": [ 2.5, 1.5 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:k5MhSzEDq2gJ:scholar.google.com/&scioq=Semi-supervised+Knowledge+Transfer+Across+Multi-omic+Single-cell+Data&hl=en&as_sdt=0,33", "gs_version_total": 2, "email": "tencent.com;gene.com;pku.edu.cn;sztu.edu.cn;;tslsmart.com;cs.ucla.edu;yale.edu", "author_num": 8, "aff_unique_index": "0;1;2;3;4;5;6", "aff_unique_norm": "Tencent;Genentech;Peking University;Shenzhen Technology University;Terminus Group;University of California, Los Angeles;Yale University", "aff_unique_dep": "Tencent Holdings Limited;;;;;;", "aff_unique_url": "https://www.tencent.com;https://www.genentech.com;http://www.pku.edu.cn;https://www.sztu.edu.cn;;https://www.ucla.edu;https://www.yale.edu", "aff_unique_abbr": "Tencent;Genentech;Peking U;;;UCLA;Yale", "aff_campus_unique_index": "1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;1;0;0;1;1", "aff_country_unique": "China;United States;" }, { "title": "DrivingDojo Dataset: Advancing Interactive and Knowledge-Enriched Driving World Model", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97477", "id": "sLzD2rw9Ce", "proceeding": "", "pdf": "https://openreview.net/pdf?id=sLzD2rw9Ce", "openreview": "https://openreview.net/forum?id=sLzD2rw9Ce", "poster": "/media/PosterPDFs/NeurIPS%202024/97477.png?t=1730823063.1918054", "project": "", "author_site": "Yuqi Wang, Ke Cheng, Jiawei He, Qitai Wang, Hengchen Dai, Yuntao Chen, Fei Xia, ZHAO-XIANG ZHANG", "tldr": "", "abstract": "Driving world models have gained increasing attention due to their ability to model complex physical dynamics. However, their superb modeling capability is yet to be fully unleashed due to the limited video diversity in current driving datasets. We introduce DrivingDojo, the first dataset tailor-made for training interactive world models with complex driving dynamics. Our dataset features video clips with a complete set of driving maneuvers, diverse multi-agent interplay, and rich open-world driving knowledge, laying a stepping stone for future world model development. We further define an action instruction following (AIF) benchmark for world models and demonstrate the superiority of the proposed dataset for generating action-controlled future predictions.", "keywords": "Dataset;World model;Autonomous Driving;Interactive Behavior", "primary_area": "", "supplementary_material": "/attachment/f37b49c90508e8a22ead94f46ef91a4e1313857b.pdf", "author": "Yuqi Wang;Ke Cheng;Jiawei He;Qitai Wang;Hengchen Dai;Yuntao Chen;Fei Xia;Zhaoxiang Zhang", "authorids": "~Yuqi_Wang3;~Ke_Cheng1;~Jiawei_He2;~Qitai_Wang1;~Hengchen_Dai1;~Yuntao_Chen1;~Fei_Xia3;~Zhaoxiang_Zhang3", "gender": "M;;M;M;M;M;Not Specified;M", "homepage": "http://robertwyq.github.io;;https://jiaweihe.com/;https://esdolo.github.io/;https://github.com/DHCZ;;;http://zhaoxiangzhang.net", "dblp": "20/1168-1;https://dblp.uni-trier.de/pers/hd/c/Cheng:Ke;172/2564-2;307/5344;;203/8284;;55/2285-1.html", "google_scholar": "35UcX9sAAAAJ;;35lEP_oAAAAJ;RgM_VVIAAAAJ;;iLOoUqIAAAAJ;N2ivjxcAAAAJ;qxWfV6cAAAAJ", "orcid": "0000-0002-6360-1431;;0000-0001-6872-3254;;;;;", "linkedin": "https://www.linkedin.cn/injobs/in/yuqi-wang-a08563192;;;;;;;", "or_profile": "~Yuqi_Wang3;~Ke_Cheng1;~Jiawei_He2;~Qitai_Wang1;~Hengchen_Dai1;~Yuntao_Chen1;~Fei_Xia3;~Zhaoxiang_Zhang3", "aff": "Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;Institute of automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;University of Science and Technology of China;Centre for Artificial Intelligence and Robotics (CAIR), Hong Kong Institute of Science & Innovation, Chinese Academy of Sciences;Meituan;Institute of Automation, Chinese Academy of Sciences", "aff_domain": "ia.ac.cn;ia.ac.cn;ia.ac.cn;ia.ac.cn;ustc.edu.cn;cair-cas.org.hk;meituan.com;ia.ac.cn", "position": "PhD student;PhD student;PhD student;PhD student;MS student;Assistant Professor;Engineering Director;Full Professor", "bibtex": "@inproceedings{\nwang2024drivingdojo,\ntitle={DrivingDojo Dataset: Advancing Interactive and Knowledge-Enriched Driving World Model},\nauthor={Yuqi Wang and Ke Cheng and Jiawei He and Qitai Wang and Hengchen Dai and Yuntao Chen and Fei Xia and Zhaoxiang Zhang},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=sLzD2rw9Ce}\n}", "github": "", "reviewers": "HBRt;RocP;eNwo;GUhH", "pdf_size": 13311861, "rating": "6;7;7;8", "confidence": "5;4;4;4", "wc_summary_and_contributions": "71;88;85;102", "wc_strengths": "5;34;59;35", "wc_improvement": "5;15;51;162", "wc_limitations": "14;1;29;95", "wc_correctness": "8;3;23;9", "wc_clarity": "6;4;8;8", "wc_relation_to_prior_work": "7;8;7;18", "wc_documentation": "13;11;10;19", "wc_additional_feedback": "1;1;1;1", "wc_review": "130;165;273;449", "wc_reply_reviewers": "51;0;8;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 86.5, 11.01135777277262 ], "wc_strengths_avg": [ 33.25, 19.13602623325961 ], "wc_improvement_avg": [ 58.25, 62.295164338815255 ], "wc_limitations_avg": [ 34.75, 36.168874740583234 ], "wc_correctness_avg": [ 10.75, 7.428828979051813 ], "wc_clarity_avg": [ 6.5, 1.6583123951777 ], "wc_relation_to_prior_work_avg": [ 10.0, 4.636809247747852 ], "wc_documentation_avg": [ 13.25, 3.491060010942235 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 254.25, 124.18006079882551 ], "wc_reply_reviewers_avg": [ 14.75, 21.182244923520265 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7522146872563228448&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "ia.ac.cn;ia.ac.cn;ia.ac.cn;ia.ac.cn;ustc.edu.cn;cair-cas.org.hk;meituan.com;ia.ac.cn", "author_num": 8, "aff_unique_index": "0;0;0;0;1;0;2;0", "aff_unique_norm": "Chinese Academy of Sciences;University of Science and Technology of China;Meituan", "aff_unique_dep": "Institute of Automation;;", "aff_unique_url": "http://www.ia.cas.cn;http://www.ustc.edu.cn;https://www.meituan.com", "aff_unique_abbr": "CAS;USTC;Meituan", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Boosting the Transferability of Adversarial Attack on Vision Transformer with Adaptive Token Tuning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93393", "id": "sNz7tptCH6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=sNz7tptCH6", "openreview": "https://openreview.net/forum?id=sNz7tptCH6", "poster": "/media/PosterPDFs/NeurIPS%202024/93393.png?t=1731143675.933583", "project": "", "author_site": "Di Ming, Peng Ren, Yunlong Wang, Xin Feng", "tldr": "", "abstract": "Vision transformers (ViTs) perform exceptionally well in various computer vision tasks but remain vulnerable to adversarial attacks. Recent studies have shown that the transferability of adversarial examples exists for CNNs, and the same holds true for ViTs. However, existing ViT attacks aggressively regularize the largest token gradients to exact zero within each layer of the surrogate model, overlooking the interactions between layers, which limits their transferability in attacking black-box models. Therefore, in this paper, we focus on boosting the transferability of adversarial attacks on ViTs through adaptive token tuning (ATT). Specifically, we propose three optimization strategies: an adaptive gradient re-scaling strategy to reduce the overall variance of token gradients, a self-paced patch out strategy to enhance the diversity of input tokens, and a hybrid token gradient truncation strategy to weaken the effectiveness of attention mechanism. We demonstrate that scaling correction of gradient changes using gradient variance across different layers can produce highly transferable adversarial examples. In addition, introducing attentional truncation can mitigate the overfitting over complex interactions between tokens in deep ViT layers to further improve the transferability. On the other hand, using feature importance as a guidance to discard a subset of perturbation patches in each iteration, along with combining self-paced learning and progressively more sampled attacks, significantly enhances the transferability over attacks that use all perturbation patches. Extensive experiments conducted on ViTs, undefended CNNs, and defended CNNs validate the superiority of our proposed ATT attack method. On average, our approach improves the attack performance by 10.1% compared to state-of-the-art transfer-based attacks. Notably, we achieve the best attack performance with an average of 58.3% on three defended CNNs. Code is available at https://github.com/MisterRpeng/ATT.", "keywords": "Adversarial Attack;Adversarial Transferability;Black-box Attack", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/01db82d8e9456e8b8a7b05e0db28c9e122827e91.zip", "author": "Di Ming;Peng Ren;Yunlong Wang;Xin Feng", "authorids": "~Di_Ming1;~Peng_Ren7;~Yunlong_Wang2;~Xin_Feng2", "gender": "M;M;F;M", "homepage": "https://midasdming.github.io;https://sites.google.com/view/yunlongwang/home;;https://github.com/MisterRpeng", "dblp": "150/2488;;;", "google_scholar": "NQRaX1oAAAAJ;https://scholar.google.com/citations?hl=en;m8_WoI0AAAAJ;", "orcid": ";;0000-0003-1105-3901;", "linkedin": "di-ming-757716aa/;;;", "or_profile": "~Di_Ming1;~Yunlong_Wang2;~Xin_Feng2;~OptimistsPeng1", "aff": "Chongqing University of Technology;IQVIA;Chongqing University of Technology;Chongqing University of Technology", "aff_domain": "cqut.edu.cn;iqvia.com;cqut.edu.cn;cqut.edu.cn", "position": "Lecturer;Researcher;Associate Professor;MS student", "bibtex": "@inproceedings{\nming2024boosting,\ntitle={Boosting the Transferability of Adversarial Attack on Vision Transformer with Adaptive Token Tuning},\nauthor={Di Ming and Peng Ren and Yunlong Wang and Xin Feng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=sNz7tptCH6}\n}", "github": "", "reviewers": "zV8Q;38RD;9KoY;52P8", "pdf_size": 15925418, "rating": "4;5;5;6", "confidence": "4;2;4;4", "soundness": "3;3;3;3", "novelty": "2;3;2;3", "presentation": "3;3;3;3", "wc_summary": "34;79;152;102", "wc_strengths": "19;49;80;88", "wc_weaknesses": "376;112;93;67", "wc_questions": "31;4;61;159", "wc_limitations": "64;1;53;28", "wc_review": "524;245;439;444", "wc_reply_reviewers": "40;9;11;66", "wc_reply_authors": "1065;40;44;40", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 91.75, 42.522788008313846 ], "wc_strengths_avg": [ 59.0, 27.303845882952093 ], "wc_weaknesses_avg": [ 162.0, 124.5812987570767 ], "wc_questions_avg": [ 63.75, 58.572070989508305 ], "wc_limitations_avg": [ 36.5, 24.295061226512683 ], "wc_review_avg": [ 413.0, 102.69128492720304 ], "wc_reply_reviewers_avg": [ 31.5, 23.39337513057917 ], "wc_reply_authors_avg": [ 297.25, 443.2636771719515 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:oKHWQFyuwzMJ:scholar.google.com/&scioq=Boosting+the+Transferability+of+Adversarial+Attack+on+Vision+Transformer+with+Adaptive+Token+Tuning&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": "cqut.edu.cn;iqvia.com;cqut.edu.cn;cqut.edu.cn", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Chongqing University of Technology;IQVIA", "aff_unique_dep": ";", "aff_unique_url": "http://www.cqut.edu.cn;https://www.iqvia.com", "aff_unique_abbr": ";IQVIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "China;United States" }, { "title": "Exploring the Precise Dynamics of Single-Layer GAN Models: Leveraging Multi-Feature Discriminators for High-Dimensional Subspace Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93392", "id": "sOhFyFFnxT", "proceeding": "", "pdf": "https://openreview.net/pdf?id=sOhFyFFnxT", "openreview": "https://openreview.net/forum?id=sOhFyFFnxT", "poster": "/media/PosterPDFs/NeurIPS%202024/93392.png?t=1733939362.1329286", "project": "", "author_site": "Andrew Bond, Zafer Dogan", "tldr": "", "abstract": "Subspace learning is a critical endeavor in contemporary machine learning, particularly given the vast dimensions of modern datasets. In this study, we delve into the training dynamics of a single-layer GAN model from the perspective of subspace learning, framing these GANs as a novel approach to this fundamental task. Through a rigorous scaling limit analysis, we offer insights into the behavior of this model. Extending beyond prior research that primarily focused on sequential feature learning, we investigate the non-sequential scenario, emphasizing the pivotal role of inter-feature interactions in expediting training and enhancing performance, particularly with an uninformed initialization strategy. Our investigation encompasses both synthetic and real-world datasets, such as MNIST and Olivetti Faces, demonstrating the robustness and applicability of our findings to practical scenarios. By bridging our analysis to the realm of subspace learning, we systematically compare the efficacy of GAN-based methods against conventional approaches, both theoretically and empirically. Notably, our results unveil that while all methodologies successfully capture the underlying subspace, GANs exhibit a remarkable capability to acquire a more informative basis, owing to their intrinsic ability to generate new data samples. This elucidates the unique advantage of GAN-based approaches in subspace learning tasks.", "keywords": "Training Dynamics;High-Dimensional Analysis;Scaling Limit Analysis;Subspace Learning", "primary_area": "learning_theory", "supplementary_material": "", "author": "Andrew Bond;Zafer Dogan", "authorids": "~Andrew_Bond1;~Zafer_Dogan1", "gender": "M;M", "homepage": ";http://mysite.ku.edu.tr/zdogan", "dblp": ";48/10697", "google_scholar": "tcga9tUAAAAJ;", "orcid": "0000-0002-6556-5331;0000-0002-5078-4590", "linkedin": "andrew-bond-a31a01207/;", "or_profile": "~Andrew_Bond1;~Zafer_Dogan1", "aff": "Adobe Systems;Koc University", "aff_domain": "adobe.com;ku.edu.tr", "position": "Intern;Assistant Professor", "bibtex": "@inproceedings{\nbond2024exploring,\ntitle={Exploring the Precise Dynamics of Single-Layer {GAN} Models: Leveraging Multi-Feature Discriminators for High-Dimensional Subspace Learning},\nauthor={Andrew Bond and Zafer Dogan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=sOhFyFFnxT}\n}", "github": "", "reviewers": "7DBz;N4Fr;uM7k;6SvB", "pdf_size": 2882261, "rating": "4;5;5;6", "confidence": "4;3;3;2", "soundness": "2;3;2;3", "novelty": "3;2;3;3", "presentation": "2;3;2;3", "wc_summary": "91;97;102;35", "wc_strengths": "33;42;72;55", "wc_weaknesses": "241;266;242;50", "wc_questions": "134;47;8;62", "wc_limitations": "9;24;33;10", "wc_review": "508;476;457;212", "wc_reply_reviewers": "375;122;249;52", "wc_reply_authors": "372;0;382;0", "reply_reviewers": "2;1;1;1", "reply_authors": "2;1;2;1", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 81.25, 26.984949508939238 ], "wc_strengths_avg": [ 50.5, 14.67140075112121 ], "wc_weaknesses_avg": [ 199.75, 87.03555308033609 ], "wc_questions_avg": [ 62.75, 45.61455359860491 ], "wc_limitations_avg": [ 19.0, 10.024968827881711 ], "wc_review_avg": [ 413.25, 117.61244619512001 ], "wc_reply_reviewers_avg": [ 199.5, 123.50404851663771 ], "wc_reply_authors_avg": [ 188.5, 188.53315358312977 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:_Lxy6neBfHwJ:scholar.google.com/&scioq=Exploring+the+Precise+Dynamics+of+Single-Layer+GAN+Models:+Leveraging+Multi-Feature+Discriminators+for+High-Dimensional+Subspace+Learning&hl=en&as_sdt=0,44", "gs_version_total": 4, "email": "adobe.com;ku.edu.tr", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Adobe;Koc University", "aff_unique_dep": "Adobe Systems Incorporated;", "aff_unique_url": "https://www.adobe.com;https://www.koc.edu.tr", "aff_unique_abbr": "Adobe;Koc", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;T\u00fcrkiye" }, { "title": "Learning Human-like Representations to Enable Learning Human Values", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93391", "id": "sQApQMBqiP", "proceeding": "", "pdf": "https://openreview.net/pdf?id=sQApQMBqiP", "openreview": "https://openreview.net/forum?id=sQApQMBqiP", "poster": "/media/PosterPDFs/NeurIPS%202024/93391.png?t=1731600938.303588", "project": "", "author_site": "Andrea Wynn, Ilia Sucholutsky, Tom Griffiths", "tldr": "", "abstract": "How can we build AI systems that can learn any set of individual human values both quickly and safely, avoiding causing harm or violating societal standards for acceptable behavior during the learning process? We explore the effects of representational alignment between humans and AI agents on learning human values. Making AI systems learn human-like representations of the world has many known benefits, including improving generalization, robustness to domain shifts, and few-shot learning performance. We demonstrate that this kind of representational alignment can also support safely learning and exploring human values in the context of personalization. We begin with a theoretical prediction, show that it applies to learning human morality judgments, then show that our results generalize to ten different aspects of human values -- including ethics, honesty, and fairness -- training AI agents on each set of values in a multi-armed bandit setting, where rewards reflect human value judgments over the chosen action. Using a set of textual action descriptions, we collect value judgments from humans, as well as similarity judgments from both humans and multiple language models, and demonstrate that representational alignment enables both safe exploration and improved generalization when learning human values.", "keywords": "value alignment;representational alignment;kernel methods;language models", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/2927ddf8ccc260e501e5cf3039918437d349fe4e.zip", "author": "Andrea Wynn;Ilia Sucholutsky;Thomas L. Griffiths", "authorids": "~Andrea_Wynn1;~Ilia_Sucholutsky1;~Thomas_L._Griffiths1", "gender": "F;M;", "homepage": "https://andreawynn.github.io/;https://ilia10000.github.io/;http://cocosci.princeton.edu/tom/", "dblp": "365/8326;239/5108;34/4472", "google_scholar": "wf2v_ScAAAAJ;https://scholar.google.ca/citations?user=6MfHyuMAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0001-7509-8274;0000-0003-4121-7479;", "linkedin": "andrea-wynn/;iliasu/;", "or_profile": "~Andrea_Wynn1;~Ilia_Sucholutsky1;~Thomas_L._Griffiths1", "aff": "Princeton University;Princeton University;Princeton University", "aff_domain": "princeton.edu;princeton.edu;princeton.edu", "position": "MS student;Postdoc;Professor", "bibtex": "@inproceedings{\nwynn2024learning,\ntitle={Learning Human-like Representations to Enable Learning Human Values},\nauthor={Andrea Wynn and Ilia Sucholutsky and Thomas L. Griffiths},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=sQApQMBqiP}\n}", "github": "", "reviewers": "7UgR;W1mH;RqUK;C7Ry", "pdf_size": 1441289, "rating": "5;5;6;8", "confidence": "4;2;3;3", "soundness": "3;2;2;4", "novelty": "2;2;3;3", "presentation": "2;1;3;4", "wc_summary": "108;112;65;182", "wc_strengths": "79;48;46;80", "wc_weaknesses": "529;115;192;45", "wc_questions": "48;40;106;118", "wc_limitations": "64;6;83;23", "wc_review": "828;321;492;448", "wc_reply_reviewers": "97;0;30;218", "wc_reply_authors": "587;474;250;521", "reply_reviewers": "1;0;1;2", "reply_authors": "4;3;3;4", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 116.75, 41.936708263763386 ], "wc_strengths_avg": [ 63.25, 16.269219403523945 ], "wc_weaknesses_avg": [ 220.25, 185.6843760255558 ], "wc_questions_avg": [ 78.0, 34.38022687534217 ], "wc_limitations_avg": [ 44.0, 30.84639363037436 ], "wc_review_avg": [ 522.25, 187.3584465670016 ], "wc_reply_reviewers_avg": [ 86.25, 83.78059142784801 ], "wc_reply_authors_avg": [ 458.0, 126.61950876543472 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 3.5, 0.5 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15759281031962961746&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "princeton.edu;princeton.edu;princeton.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Princeton University", "aff_unique_dep": "", "aff_unique_url": "https://www.princeton.edu", "aff_unique_abbr": "Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "UniGAD: Unifying Multi-level Graph Anomaly Detection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93390", "id": "sRILMnkkQd", "proceeding": "", "pdf": "https://openreview.net/pdf?id=sRILMnkkQd", "openreview": "https://openreview.net/forum?id=sRILMnkkQd", "poster": "/media/PosterPDFs/NeurIPS%202024/93390.png?t=1731598240.297922", "project": "", "author_site": "Yiqing Lin, Jianheng Tang, Chenyi Zi, H. Vicky Zhao, Yuan Yao, Jia Li", "tldr": "", "abstract": "Graph Anomaly Detection (GAD) aims to identify uncommon, deviated, or suspicious objects within graph-structured data. Existing methods generally focus on a single graph object type (node, edge, graph, etc.) and often overlook the inherent connections among different object types of graph anomalies. For instance, a money laundering transaction might involve an abnormal account and the broader community it interacts with. To address this, we present UniGAD, the first unified framework for detecting anomalies at node, edge, and graph levels jointly. Specifically, we develop the Maximum Rayleigh Quotient Subgraph Sampler (MRQSampler) that unifies multi-level formats by transferring objects at each level into graph-level tasks on subgraphs. We theoretically prove that MRQSampler maximizes the accumulated spectral energy of subgraphs (i.e., the Rayleigh quotient) to preserve the most significant anomaly information. To further unify multi-level training, we introduce a novel GraphStitch Network to integrate information across different levels, adjust the amount of sharing required at each level, and harmonize conflicting training goals. Comprehensive experiments show that UniGAD outperforms both existing GAD methods specialized for a single task and graph prompt-based approaches for multiple tasks, while also providing robust zero-shot task transferability.", "keywords": "Graph Anomaly Detection;Graph Neural Networks", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Yiqing Lin;Jianheng Tang;Chenyi Zi;H. Vicky Zhao;Yuan Yao;Jia Li", "authorids": "~Yiqing_Lin3;~Jianheng_Tang1;~Chenyi_Zi2;~H._Vicky_Zhao1;~Yuan_Yao1;~Jia_Li4", "gender": "F;M;M;F;Unspecified;M", "homepage": ";https://squareroot3.github.io/;;;https://yao-lab.github.io/;https://sites.google.com/view/lijia", "dblp": ";;;;25/4120-11.html;23/6950-9", "google_scholar": ";w4kWvXEAAAAJ;uGbNoaQAAAAJ;;OOlHr-wAAAAJ;1gSbcYoAAAAJ", "orcid": "0009-0008-2129-0533;0000-0001-9341-7312;;;0000-0001-5814-1162;0000-0002-6362-4385", "linkedin": ";;;hong-vicky-zhao-08599110/;;", "or_profile": "~Yiqing_Lin3;~Jianheng_Tang1;~Chenyi_Zi2;~H._Vicky_Zhao1;~Yuan_Yao1;~Jia_Li4", "aff": "Tsinghua University;Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;Tsinghua University;Hong Kong University of Science and Technology;Hong Kong University of Science and Technology (Guangzhou)", "aff_domain": "mail.tsinghua.edu.cn;ust.hk;hkust.edu;tsinghua.edu.cn;ust.hk;ust.hk", "position": "PhD student;PhD student;MS student;Associate Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nlin2024unigad,\ntitle={Uni{GAD}: Unifying Multi-level Graph Anomaly Detection},\nauthor={Yiqing Lin and Jianheng Tang and Chenyi Zi and H. Vicky Zhao and Yuan Yao and Jia Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=sRILMnkkQd}\n}", "github": "", "reviewers": "NHz6;ji27;ky9h;ymWJ", "pdf_size": 1440319, "rating": "5;6;7;7", "confidence": "4;4;4;4", "soundness": "2;3;4;4", "novelty": "2;2;3;4", "presentation": "2;3;4;4", "wc_summary": "71;98;108;86", "wc_strengths": "45;34;268;186", "wc_weaknesses": "159;102;164;143", "wc_questions": "4;38;14;12", "wc_limitations": "1;5;12;1", "wc_review": "280;277;566;428", "wc_reply_reviewers": "43;13;5;10", "wc_reply_authors": "51;28;0;21", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;1;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 90.75, 13.808964479641476 ], "wc_strengths_avg": [ 133.25, 98.20736988637869 ], "wc_weaknesses_avg": [ 142.0, 24.361855430159665 ], "wc_questions_avg": [ 17.0, 12.68857754044952 ], "wc_limitations_avg": [ 4.75, 4.493050188902857 ], "wc_review_avg": [ 387.75, 119.65445039780175 ], "wc_reply_reviewers_avg": [ 17.75, 14.85555451674558 ], "wc_reply_authors_avg": [ 25.0, 18.207141456033124 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7858831772687075382&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "mail.tsinghua.edu.cn;ust.hk;hkust.edu;tsinghua.edu.cn;ust.hk;ust.hk", "author_num": 6, "aff_unique_index": "0;1;1;0;1;1", "aff_unique_norm": "Tsinghua University;Hong Kong University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.ust.hk", "aff_unique_abbr": "THU;HKUST", "aff_campus_unique_index": "1;1;1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Preferential Normalizing Flows", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93389", "id": "sRSjr9SDKR", "proceeding": "", "pdf": "https://openreview.net/pdf?id=sRSjr9SDKR", "openreview": "https://openreview.net/forum?id=sRSjr9SDKR", "poster": "/media/PosterPDFs/NeurIPS%202024/93389.png?t=1733300239.558711", "project": "", "author_site": "Petrus Mikkola, Luigi Acerbi, Arto Klami", "tldr": "", "abstract": "Eliciting a high-dimensional probability distribution from an expert via noisy judgments is notoriously challenging, yet useful for many applications, such as prior elicitation and reward modeling. We introduce a method for eliciting the expert's belief density as a normalizing flow based solely on preferential questions such as comparing or ranking alternatives. This allows eliciting in principle arbitrarily flexible densities, but flow estimation is susceptible to the challenge of collapsing or diverging probability mass that makes it difficult in practice. We tackle this problem by introducing a novel functional prior for the flow, motivated by a decision-theoretic argument, and show empirically that the belief density can be inferred as the function-space maximum a posteriori estimate. We demonstrate our method by eliciting multivariate belief densities of simulated experts, including the prior belief of a general-purpose large language model over a real-world dataset.", "keywords": "normalizing flow;elicitation;random utility models;prior distribution", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Petrus Mikkola;Luigi Acerbi;Arto Klami", "authorids": "~Petrus_Mikkola1;~Luigi_Acerbi1;~Arto_Klami1", "gender": ";M;M", "homepage": "https://petrus-mikkola.github.io/;http://luigiacerbi.com/;https://www.cs.helsinki.fi/u/aklami/", "dblp": "258/3117;72/1450;21/5316", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.co.uk/citations?user=QYBZoGwAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-6212-8777;0000-0001-7471-7336;", "linkedin": ";luigi-acerbi-719b492/;", "or_profile": "~Petrus_Mikkola1;~Luigi_Acerbi1;~Arto_Klami1", "aff": "University of Helsinki;University of Helsinki;University of Helsinki", "aff_domain": "helsinki.fi;helsinki.fi;helsinki.fi", "position": "Postdoc;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nmikkola2024preferential,\ntitle={Preferential Normalizing Flows},\nauthor={Petrus Mikkola and Luigi Acerbi and Arto Klami},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=sRSjr9SDKR}\n}", "github": "", "reviewers": "xMnU;sR2g;FQE4", "pdf_size": 14471400, "rating": "5;7;8", "confidence": "2;3;3", "soundness": "2;3;3", "novelty": "3;4;3", "presentation": "2;4;2", "wc_summary": "171;93;78", "wc_strengths": "37;126;72", "wc_weaknesses": "102;109;154", "wc_questions": "151;62;16", "wc_limitations": "13;82;9", "wc_review": "474;472;329", "wc_reply_reviewers": "223;52;19", "wc_reply_authors": "709;0;0", "reply_reviewers": "2;1;1", "reply_authors": "2;1;1", "rating_avg": [ 6.666666666666667, 1.247219128924647 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.9428090415820634 ], "wc_summary_avg": [ 114.0, 40.76763422127902 ], "wc_strengths_avg": [ 78.33333333333333, 36.609045633862436 ], "wc_weaknesses_avg": [ 121.66666666666667, 23.041026211713937 ], "wc_questions_avg": [ 76.33333333333333, 56.037685732212594 ], "wc_limitations_avg": [ 34.666666666666664, 33.50953429829918 ], "wc_review_avg": [ 425.0, 67.88716128007317 ], "wc_reply_reviewers_avg": [ 98.0, 89.40917178902845 ], "wc_reply_authors_avg": [ 236.33333333333334, 334.22580524084145 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.9449111825230683, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13903301337807560683&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "helsinki.fi;helsinki.fi;helsinki.fi", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Helsinki", "aff_unique_dep": "", "aff_unique_url": "https://www.helsinki.fi", "aff_unique_abbr": "UH", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Finland" }, { "title": "Generalized Eigenvalue Problems with Generative Priors", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93388", "id": "sVZBJoxwk9", "proceeding": "", "pdf": "https://openreview.net/pdf?id=sVZBJoxwk9", "openreview": "https://openreview.net/forum?id=sVZBJoxwk9", "poster": "", "project": "", "author_site": "Zhaoqiang Liu, Wen Li, Junren Chen", "tldr": "", "abstract": "Generalized eigenvalue problems (GEPs) find applications in various fields of science and engineering. For example, principal component analysis, Fisher's discriminant analysis, and canonical correlation analysis are specific instances of GEPs and are widely used in statistical data processing. In this work, we study GEPs under generative priors, assuming that the underlying leading generalized eigenvector lies within the range of a Lipschitz continuous generative model. Under appropriate conditions, we show that any optimal solution to the corresponding optimization problems attains the optimal statistical rate. Moreover, from a computational perspective, we propose an iterative algorithm called the Projected Rayleigh Flow Method (PRFM) to approximate the optimal solution. We theoretically demonstrate that under suitable assumptions, PRFM converges linearly to an estimated vector that achieves the optimal statistical rate. Numerical results are provided to demonstrate the effectiveness of the proposed method.", "keywords": "Generalized Eigenvalue Problems;Generative Priors;Optimal Statistical Rate", "primary_area": "learning_theory", "supplementary_material": "", "author": "Zhaoqiang Liu;Wen Li;Junren Chen", "authorids": "~Zhaoqiang_Liu1;~Wen_Li2;~Junren_Chen1", "gender": "M;M;M", "homepage": ";http://wenli-vision.github.io;https://junrenchen58.github.io/", "dblp": "198/1405;06/721-1;", "google_scholar": "EmGrPbIAAAAJ;https://scholar.google.ch/citations?user=yjG4Eg4AAAAJ;pw3tOroAAAAJ", "orcid": ";0000-0002-5559-8594;0000-0003-3606-9598", "linkedin": ";;", "or_profile": "~Zhaoqiang_Liu1;~Wen_Li2;~Junren_Chen1", "aff": "University of Electronic Science and Technology of China;University of Electronic Science and Technology of China;The University of Hong Kong", "aff_domain": "uestc.edu.cn;uestc.edu.cn;hku.hk", "position": "Full Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\nliu2024generalized,\ntitle={Generalized Eigenvalue Problems with Generative Priors},\nauthor={Zhaoqiang Liu and Wen Li and Junren Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=sVZBJoxwk9}\n}", "github": "", "reviewers": "5DuU;PACz;cuQm", "pdf_size": 1319141, "rating": "5;5;7", "confidence": "4;3;4", "soundness": "2;2;4", "novelty": "2;2;3", "presentation": "3;2;3", "wc_summary": "31;45;85", "wc_strengths": "8;38;72", "wc_weaknesses": "126;71;145", "wc_questions": "51;240;49", "wc_limitations": "28;27;19", "wc_review": "244;421;370", "wc_reply_reviewers": "28;60;12", "wc_reply_authors": "66;29;0", "reply_reviewers": "1;1;1", "reply_authors": "2;2;1", "rating_avg": [ 5.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.9428090415820634 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 53.666666666666664, 22.88133640230735 ], "wc_strengths_avg": [ 39.333333333333336, 26.144895401503437 ], "wc_weaknesses_avg": [ 114.0, 31.379398762032817 ], "wc_questions_avg": [ 113.33333333333333, 89.5705804876182 ], "wc_limitations_avg": [ 24.666666666666668, 4.0276819911981905 ], "wc_review_avg": [ 345.0, 74.39085965358916 ], "wc_reply_reviewers_avg": [ 33.333333333333336, 19.955506062794353 ], "wc_reply_authors_avg": [ 31.666666666666668, 27.01028610651052 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.49999999999999983, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:mfCcAaN19TEJ:scholar.google.com/&scioq=Generalized+Eigenvalue+Problems+with+Generative+Priors&hl=en&as_sdt=0,14", "gs_version_total": 3, "email": "uestc.edu.cn;uestc.edu.cn;hku.hk", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Electronic Science and Technology of China;University of Hong Kong", "aff_unique_dep": ";", "aff_unique_url": "https://www.uestc.edu.cn;https://www.hku.hk", "aff_unique_abbr": "UESTC;HKU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "SOI: Scaling Down Computational Complexity by Estimating Partial States of the Model", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93387", "id": "sZ7jj9kqAy", "proceeding": "", "pdf": "https://openreview.net/pdf?id=sZ7jj9kqAy", "openreview": "https://openreview.net/forum?id=sZ7jj9kqAy", "poster": "/media/PosterPDFs/NeurIPS%202024/93387.png?t=1733096652.0271823", "project": "", "author_site": "Grzegorz Stefa\u0144ski, Pawe\u0142 Daniluk, Artur Szumaczuk, Jakub Tkaczuk", "tldr": "", "abstract": "Consumer electronics used to follow the miniaturization trend described by Moore\u2019s Law. Despite increased processing power in Microcontroller Units (MCUs), MCUs used in the smallest appliances are still not capable of running even moderately big, state-of-the-art artificial neural networks (ANNs) especially in time-sensitive scenarios. In this work, we present a novel method called Scattered Online Inference (SOI) that aims to reduce the computational complexity of ANNs. SOI leverages the continuity and seasonality of time-series data and model predictions, enabling extrapolation for processing speed improvements, particularly in deeper layers. By applying compression, SOI generates more general inner partial states of ANN, allowing skipping full model recalculation at each inference.", "keywords": "Time series data;Computational complexity reduction;Latency reduction;Real-Time results;Inference at the edge;Causality", "primary_area": "speech_and_audio", "supplementary_material": "", "author": "Grzegorz Stefa\u0144ski;Pawe\u0142 Daniluk;Artur Szumaczuk;Jakub Tkaczuk", "authorids": "~Grzegorz_Stefa\u0144ski1;~Pawe\u0142_Daniluk1;~Artur_Szumaczuk1;~Jakub_Tkaczuk1", "gender": "M;;M;M", "homepage": "https://github.com/GrzegorzStefanski;;;https://www.linkedin.com/in/tkaczukjakub/", "dblp": "339/8733.html;57/7064;293/5618;119/7322", "google_scholar": "UdGFCFsAAAAJ;https://scholar.google.pl/citations?user=5PIIXdwAAAAJ;;", "orcid": "0000-0002-0858-0180;;0000-0002-0329-5022;", "linkedin": "g-stefanski/;;;", "or_profile": "~Grzegorz_Stefa\u0144ski1;~Pawe\u0142_Daniluk1;~Artur_Szumaczuk1;~Jakub_Tkaczuk1", "aff": "Samsung;Samsung;Samsung;", "aff_domain": "samsung.com;samsung.com;samsung.com;", "position": "Researcher;Principal Researcher;Data Scientist;", "bibtex": "@inproceedings{\nstefa{\\'n}ski2024soi,\ntitle={{SOI}: Scaling Down Computational Complexity by Estimating Partial States of the Model},\nauthor={Grzegorz Stefa{\\'n}ski and Pawe{\\l} Daniluk and Artur Szumaczuk and Jakub Tkaczuk},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=sZ7jj9kqAy}\n}", "github": "", "reviewers": "MDRT;L5QG;Lrh6;rrin", "pdf_size": 2886590, "rating": "5;5;6;7", "confidence": "3;3;2;4", "soundness": "3;2;3;3", "novelty": "2;2;3;3", "presentation": "1;2;2;3", "wc_summary": "59;151;38;157", "wc_strengths": "41;101;22;150", "wc_weaknesses": "97;288;62;67", "wc_questions": "2;4;33;27", "wc_limitations": "40;1;15;8", "wc_review": "239;545;170;409", "wc_reply_reviewers": "29;50;62;20", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 101.25, 53.312170280340304 ], "wc_strengths_avg": [ 78.5, 50.53958844312051 ], "wc_weaknesses_avg": [ 128.5, 93.05509120945506 ], "wc_questions_avg": [ 16.5, 13.683932183404009 ], "wc_limitations_avg": [ 16.0, 14.713938969562161 ], "wc_review_avg": [ 340.75, 146.53050023800506 ], "wc_reply_reviewers_avg": [ 40.25, 16.618889854620253 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.42640143271122083, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:n-y4bRpJ3AcJ:scholar.google.com/&scioq=SOI:+Scaling+Down+Computational+Complexity+by+Estimating+Partial+States+of+the+Model&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "samsung.com;samsung.com;samsung.com;", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Samsung", "aff_unique_dep": "Samsung", "aff_unique_url": "https://www.samsung.com", "aff_unique_abbr": "Samsung", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "Beware of Road Markings: A New Adversarial Patch Attack to Monocular Depth Estimation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93386", "id": "satH8Evs2y", "proceeding": "", "pdf": "https://openreview.net/pdf?id=satH8Evs2y", "openreview": "https://openreview.net/forum?id=satH8Evs2y", "poster": "/media/PosterPDFs/NeurIPS%202024/93386.png?t=1729518189.9518325", "project": "", "author_site": "Hangcheng Liu, Zhenhu Wu, Hao Wang, Xingshuo Han, Shangwei Guo, Tao Xiang, Tianwei Zhang", "tldr": "", "abstract": "Monocular Depth Estimation (MDE) enables the prediction of scene depths from a single RGB image, having been widely integrated into production-grade autonomous driving systems, e.g., Tesla Autopilot. Current adversarial attacks to MDE models focus on attaching an optimized adversarial patch to a designated obstacle. Although effective, this approach presents two inherent limitations: its reliance on specific obstacles and its limited malicious impact. In contrast, we propose a pioneering attack to MDE models that \\textit{decouples obstacles from patches physically and deploys optimized patches on roads}, thereby extending the attack scope to arbitrary traffic participants. This approach is inspired by our groundbreaking discovery: \\textit{various MDE models with different architectures, trained for autonomous driving, heavily rely on road regions} when predicting depths for different obstacles. Based on this discovery, we design the Adversarial Road Marking (AdvRM) attack, which camouflages patches as ordinary road markings and deploys them on roads, thereby posing a continuous threat within the environment. Experimental results from both dataset simulations and real-world scenarios demonstrate that AdvRM is effective, stealthy, and robust against various MDE models, achieving about 1.507 of Mean Relative Shift Ratio (MRSR) over 8 MDE models. The code is available at \\url{https://github.com/a-c-a-c/AdvRM.git}", "keywords": "monocular depth estimation;adversarial patch;road dependence", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Hangcheng Liu;Zhenhu Wu;Hao Wang;XINGSHUO HAN;Shangwei Guo;Tao Xiang;Tianwei Zhang", "authorids": "~Hangcheng_Liu1;~Zhenhu_Wu1;~Hao_Wang66;~XINGSHUO_HAN1;~Shangwei_Guo1;~Tao_Xiang2;~Tianwei_Zhang1", "gender": "M;M;M;M;M;M;M", "homepage": ";https://weiguang1.github.io;https://haowang.wang;https://xingshuohan.github.io/;http://www.cs.cqu.edu.cn/info/1332/5290.htm;;https://personal.ntu.edu.sg/tianwei.zhang/index.html", "dblp": "277/7585;;;310/0337;176/6479;22/4460-1.html;77/7902-4", "google_scholar": "NRlgD90AAAAJ;;54HD4zQAAAAJ;f8u6OyEAAAAJ;wQrVkBYAAAAJ;https://scholar.google.com/citations?hl=en;9vpiYDIAAAAJ", "orcid": "0000-0002-4392-2254;;0009-0001-2350-9032;0000-0002-2756-2228;;0000-0002-9439-4623;", "linkedin": ";;;;;;", "or_profile": "~Hangcheng_Liu1;~Zhenhu_Wu1;~Hao_Wang66;~XINGSHUO_HAN1;~Shangwei_Guo1;~Tao_Xiang2;~Tianwei_Zhang1", "aff": "Nanyang Technological University;Beijing University of Posts and Telecommunications;Chongqing University;Nanyang Technological University;Chongqing University;Chongqing University;Nanyang Technological University", "aff_domain": "ntu.edu.sg;bupt.edu.cn;cqu.edu.cn;ntu.edu.sg;cqu.edu.cn;cqu.edu.cn;ntu.edu.sg", "position": "Postdoc;Undergrad student;MS student;Postdoc;Associate Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nliu2024beware,\ntitle={Beware of Road Markings: A New Adversarial Patch Attack to Monocular Depth Estimation},\nauthor={Hangcheng Liu and Zhenhu Wu and Hao Wang and XINGSHUO HAN and Shangwei Guo and Tao Xiang and Tianwei Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=satH8Evs2y}\n}", "github": "", "reviewers": "VDSt;HCq6;YPF7;LDnF", "pdf_size": 5761084, "rating": "5;6;6;6", "confidence": "4;5;4;4", "soundness": "3;2;3;3", "novelty": "3;2;3;2", "presentation": "3;2;2;3", "wc_summary": "77;143;76;163", "wc_strengths": "50;140;47;292", "wc_weaknesses": "35;876;308;257", "wc_questions": "2;105;43;2", "wc_limitations": "1;45;8;35", "wc_review": "165;1309;482;749", "wc_reply_reviewers": "0;299;405;0", "wc_reply_authors": "0;541;724;0", "reply_reviewers": "0;1;1;0", "reply_authors": "1;2;2;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 114.75, 38.899710795840114 ], "wc_strengths_avg": [ 132.25, 99.51476021173944 ], "wc_weaknesses_avg": [ 369.0, 310.18945823480203 ], "wc_questions_avg": [ 38.0, 42.148546831415196 ], "wc_limitations_avg": [ 22.25, 18.267115262131565 ], "wc_review_avg": [ 676.25, 419.7543180242462 ], "wc_reply_reviewers_avg": [ 176.0, 179.94582518080267 ], "wc_reply_authors_avg": [ 316.25, 322.80053825853514 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:4vLUEr7UEOQJ:scholar.google.com/&scioq=Beware+of+Road+Markings:+A+New+Adversarial+Patch+Attack+to+Monocular+Depth+Estimation&hl=en&as_sdt=0,44", "gs_version_total": 4, "email": "ntu.edu.sg;bupt.edu.cn;cqu.edu.cn;ntu.edu.sg;cqu.edu.cn;cqu.edu.cn;ntu.edu.sg", "author_num": 7, "aff_unique_index": "0;1;2;0;2;2;0", "aff_unique_norm": "Nanyang Technological University;Beijing University of Posts and Telecommunications;Chongqing University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ntu.edu.sg;http://www.bupt.edu.cn/;https://www.cqu.edu.cn", "aff_unique_abbr": "NTU;BUPT;CQU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Beijing", "aff_country_unique_index": "0;1;1;0;1;1;0", "aff_country_unique": "Singapore;China" }, { "title": "DiP-GO: A Diffusion Pruner via Few-step Gradient Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93385", "id": "sbsaRj475E", "proceeding": "", "pdf": "https://openreview.net/pdf?id=sbsaRj475E", "openreview": "https://openreview.net/forum?id=sbsaRj475E", "poster": "/media/PosterPDFs/NeurIPS%202024/93385.png?t=1731401745.0318723", "project": "", "author_site": "Haowei Zhu, Dehua Tang, Ji Liu, Mingjie Lu, Jintu Zheng, Jinzhang Peng, Dong Li, Yu Wang, Fan Jiang, Lu Tian, Spandan Tiwari, Ashish Sirasao, Jun-Hai Yong, Bin Wang, Emad Barsoum", "tldr": "", "abstract": "Diffusion models have achieved remarkable progress in the field of image generation due to their outstanding capabilities. However, these models require substantial computing resources because of the multi-step denoising process during inference. While traditional pruning methods have been employed to optimize these models, the retraining process necessitates large-scale training datasets and extensive computational costs to maintain generalization ability, making it neither convenient nor efficient. Recent studies attempt to utilize the similarity of features across adjacent denoising stages to reduce computational costs through simple and static strategies. However, these strategies cannot fully harness the potential of the similar feature patterns across adjacent timesteps. In this work, we propose a novel pruning method that derives an efficient diffusion model via a more intelligent and differentiable pruner. At the core of our approach is casting the model pruning process into a SubNet search process. Specifically, we first introduce a SuperNet based on standard diffusion via adding some backup connections built upon the similar features. We then construct a plugin pruner network and design optimization losses to identify redundant computation. Finally, our method can identify an optimal SubNet through few-step gradient optimization and a simple post-processing procedure. We conduct extensive experiments on various diffusion models including Stable Diffusion series and DiTs. Our DiP-GO approach achieves 4.4 x speedup for SD-1.5 without any loss of accuracy, significantly outperforming the previous state-of-the-art methods.", "keywords": "Diffusion;Pruning;Speedup;Gradient optimization;SuperNet", "primary_area": "generative_models", "supplementary_material": "", "author": "haoweiz;Dehua Tang;Ji Liu;Mingjie Lu;Jintu Zheng;Jinzhang Peng;Dong Li;Yu Wang;Fan Jiang;Lu Tian;Spandan Tiwari;Ashish Sirasao;Jun-Hai Yong;Bin Wang;Emad Barsoum", "authorids": "~haoweiz1;~Dehua_Tang1;~Ji_Liu11;~Mingjie_Lu1;~Jintu_Zheng1;~Jinzhang_Peng1;~Dong_Li13;~Yu_Wang73;~Fan_Jiang5;~Lu_Tian3;~Spandan_Tiwari1;~Ashish_Sirasao1;~Jun-Hai_Yong3;~Bin_Wang3;~Emad_Barsoum1", "gender": "M;M;M;M;;;;;M;F;M;M;M;M;", "homepage": ";;;;https://jintupersonal.com;;;https://github.com/yuwang-xlnx;;;;;https://www.thss.tsinghua.edu.cn/person/yongjunhai;https://binwangthss.github.io/;", "dblp": ";;;;363/8120;54/6166;;;;;;19/4292.html;;13/1898-21;", "google_scholar": ";-Tp203YAAAAJ;CZQyReoAAAAJ;;;nkhA0mcAAAAJ;;;;edbuKpcAAAAJ;;;;o7AFnlUAAAAJ;", "orcid": "0009-0000-4120-3312;;;;0009-0003-3865-099X;;;;;;;;;0000-0002-5176-9202;", "linkedin": ";;;https://linkedin.com/in/%E6%98%8E%E6%8D%B7-%E9%B2%81-40962b118/en?trk=people-guest_people_search-card;;;;;fan-jiang-996514268/;;spandantiwari/;;;bin-wang-22992524/;", "or_profile": "~haoweiz1;~Dehua_Tang1;~Ji_Liu11;~Mingjie_Lu1;~Jintu_Zheng1;~Jinzhang_Peng1;~Dong_Li13;~Yu_Wang73;~Fan_Jiang5;~Lu_Tian3;~Spandan_Tiwari1;~Ashish_Sirasao1;~Jun-Hai_Yong3;~Bin_Wang3;~Emad_Barsoum1", "aff": "Tsinghua University;Advanced Micro Devices;AMD;AMD;University of Chinese Academy of Sciences;Researcher at AMD ;;;Advanced Micro Devices;AMD;Advanced Micro Devices;Amd inc;Tsinghua University;Tsinghua University;", "aff_domain": "tsinghua.edu.cn;amd.com;amd.com;amd.com;ucas.ac.cn;amd.com;;;amd.com;amd.com;amd.com;amd.com;tsinghua.edu.cn;tsinghua.edu.cn;", "position": "PhD student;MTS;Researcher;Researcher;MS student;Researcher;;;Principal Researcher;Researcher;Director of Engineering;Researcher;Full Professor;Associate Professor;", "bibtex": "@inproceedings{\nhaoweiz2024dipgo,\ntitle={DiP-{GO}: A Diffusion Pruner via Few-step Gradient Optimization},\nauthor={haoweiz and Dehua Tang and Ji Liu and Mingjie Lu and Jintu Zheng and Jinzhang Peng and Dong Li and Yu Wang and Fan Jiang and Lu Tian and Spandan Tiwari and Ashish Sirasao and Jun-Hai Yong and Bin Wang and Emad Barsoum},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=sbsaRj475E}\n}", "github": "", "reviewers": "A73E;JBdU;1Yei;zGQU", "pdf_size": 1445693, "rating": "4;6;6;7", "confidence": "4;4;4;4", "soundness": "2;3;3;3", "novelty": "2;3;2;3", "presentation": "2;3;3;4", "wc_summary": "26;114;82;94", "wc_strengths": "46;98;63;67", "wc_weaknesses": "195;104;211;34", "wc_questions": "6;65;41;66", "wc_limitations": "10;6;7;66", "wc_review": "283;387;404;327", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "1070;316;1139;384", "reply_reviewers": "0;0;0;0", "reply_authors": "4;3;4;3", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 79.0, 32.66496594212215 ], "wc_strengths_avg": [ 68.5, 18.76832437912346 ], "wc_weaknesses_avg": [ 136.0, 71.64844729650461 ], "wc_questions_avg": [ 44.5, 24.37724348649781 ], "wc_limitations_avg": [ 22.25, 25.301926804099327 ], "wc_review_avg": [ 350.25, 48.225382320931374 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 727.25, 378.8016466437283 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 3.5, 0.5 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 15, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16480520757875103507&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "tsinghua.edu.cn;amd.com;amd.com;amd.com;ucas.ac.cn;amd.com;;;amd.com;amd.com;amd.com;amd.com;tsinghua.edu.cn;tsinghua.edu.cn;", "author_num": 15, "aff_unique_index": "0;1;1;1;2;3;1;1;1;1;0;0", "aff_unique_norm": "Tsinghua University;Advanced Micro Devices, Inc.;University of Chinese Academy of Sciences;AMD", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.amd.com;http://www.ucas.ac.cn;https://www.amd.com", "aff_unique_abbr": "THU;AMD;UCAS;AMD", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;0;1;1;1;1;1;0;0", "aff_country_unique": "China;United States" }, { "title": "DeepLag: Discovering Deep Lagrangian Dynamics for Intuitive Fluid Prediction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93384", "id": "scw6Et4pEr", "proceeding": "", "pdf": "https://openreview.net/pdf?id=scw6Et4pEr", "openreview": "https://openreview.net/forum?id=scw6Et4pEr", "poster": "/media/PosterPDFs/NeurIPS%202024/93384.png?t=1732127983.5679526", "project": "", "author_site": "Qilong Ma, Haixu Wu, Lanxiang Xing, Shangchen Miao, Mingsheng Long", "tldr": "", "abstract": "Accurately predicting the future fluid is vital to extensive areas such as meteorology, oceanology, and aerodynamics. However, since the fluid is usually observed from the Eulerian perspective, its moving and intricate dynamics are seriously obscured and confounded in static grids, bringing thorny challenges to the prediction. This paper introduces a new Lagrangian-Eulerian combined paradigm to tackle the tanglesome fluid dynamics. Instead of solely predicting the future based on Eulerian observations, we propose DeepLag to discover hidden Lagrangian dynamics within the fluid by tracking the movements of adaptively sampled key particles. Further, DeepLag presents a new paradigm for fluid prediction, where the Lagrangian movement of the tracked particles is inferred from Eulerian observations, and their accumulated Lagrangian dynamics information is incorporated into global Eulerian evolving features to guide future prediction respectively. Tracking key particles not only provides a transparent and interpretable clue for fluid dynamics but also makes our model free from modeling complex correlations among massive grids for better efficiency. Experimentally, DeepLag excels in three challenging fluid prediction tasks covering 2D and 3D, simulated and real-world fluids. Code is available at this repository: https://github.com/thuml/DeepLag.", "keywords": "Deep learning;Fluid prediction;Lagrangian perspective", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "/attachment/3456c3e4ec83cf923e34314eebb79a302b01517a.zip", "author": "Qilong Ma;Haixu Wu;Lanxiang Xing;Shangchen Miao;Mingsheng Long", "authorids": "~Qilong_Ma1;~Haixu_Wu1;~Lanxiang_Xing2;~Shangchen_Miao1;~Mingsheng_Long5", "gender": ";M;M;M;", "homepage": ";;https://github.com/BluesCrossing;;", "dblp": ";286/8115;;;", "google_scholar": ";oLL_x0wAAAAJ;;;", "orcid": ";;0000-0001-5928-3242;0009-0002-6734-3182;", "linkedin": ";;;;", "or_profile": "~Qilong_Ma1;~Haixu_Wu1;~Lanxiang_Xing2;~Shangchen_Miao1;~Mingsheng_Long5", "aff": ";Tsinghua University;Tsinghua University;Tsinghua University;", "aff_domain": ";tsinghua.edu.cn;tsinghua.edu.cn;mails.tsinghua.edu.cn;", "position": ";PhD student;MS student;Undergrad student;", "bibtex": "@inproceedings{\nma2024deeplag,\ntitle={DeepLag: Discovering Deep Lagrangian Dynamics for Intuitive Fluid Prediction},\nauthor={Qilong Ma and Haixu Wu and Lanxiang Xing and Shangchen Miao and Mingsheng Long},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=scw6Et4pEr}\n}", "github": "", "reviewers": "rCcn;PRfM;PBwE;UohQ;Kve3", "pdf_size": 6275588, "rating": "5;6;6;6;7", "confidence": "3;3;4;3;4", "soundness": "3;3;3;3;4", "novelty": "3;3;3;3;3", "presentation": "2;2;3;2;3", "wc_summary": "209;99;72;120;89", "wc_strengths": "84;73;52;139;74", "wc_weaknesses": "121;295;72;111;90", "wc_questions": "151;33;2;106;88", "wc_limitations": "37;95;9;4;11", "wc_review": "602;595;207;480;352", "wc_reply_reviewers": "0;18;20;25;26", "wc_reply_authors": "0;34;45;0;0", "reply_reviewers": "0;1;1;1;1", "reply_authors": "1;2;2;1;1", "rating_avg": [ 6.0, 0.6324555320336759 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 117.8, 48.172191148005716 ], "wc_strengths_avg": [ 84.4, 29.2205407205274 ], "wc_weaknesses_avg": [ 137.8, 80.40746234025795 ], "wc_questions_avg": [ 76.0, 52.90368607195533 ], "wc_limitations_avg": [ 31.2, 33.89631248380862 ], "wc_review_avg": [ 447.2, 150.759941629068 ], "wc_reply_reviewers_avg": [ 17.8, 9.389355675444401 ], "wc_reply_authors_avg": [ 15.8, 19.661129163911212 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.6454972243679028, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:8Zg25W5eMysJ:scholar.google.com/&scioq=DeepLag:+Discovering+Deep+Lagrangian+Dynamics+for+Intuitive+Fluid+Prediction&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": ";tsinghua.edu.cn;tsinghua.edu.cn;mails.tsinghua.edu.cn;", "author_num": 5, "aff_unique_index": "0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Aligner-Encoders: Self-Attention Transformers Can Be Self-Transducers", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93383", "id": "seAuMedrm5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=seAuMedrm5", "openreview": "https://openreview.net/forum?id=seAuMedrm5", "poster": "/media/PosterPDFs/NeurIPS%202024/93383.png?t=1731491550.9225726", "project": "", "author_site": "Adam Stooke, Rohit Prabhavalkar, Khe Sim, Pedro Moreno Mengibar", "tldr": "", "abstract": "Modern systems for automatic speech recognition, including the RNN-Transducer and Attention-based Encoder-Decoder (AED), are designed so that the encoder is not required to alter the time-position of information from the audio sequence into the embedding; alignment to the final text output is processed during decoding. We discover that the transformer-based encoder adopted in recent years is actually capable of performing the alignment internally during the forward pass, prior to decoding. This new phenomenon enables a simpler and more efficient model, the ''Aligner-Encoder''. To train it, we discard the dynamic programming of RNN-T in favor of the frame-wise cross-entropy loss of AED, while the decoder employs the lighter text-only recurrence of RNN-T without learned cross-attention---it simply scans embedding frames in order from the beginning, producing one token each until predicting the end-of-message. We conduct experiments demonstrating performance remarkably close to the state of the art, including a special inference configuration enabling long-form recognition. In a representative comparison, we measure the total inference time for our model to be 2x faster than RNN-T and 16x faster than AED. Lastly, we find that the audio-text alignment is clearly visible in the self-attention weights of a certain layer, which could be said to perform ''self-transduction''.", "keywords": "ASR;Speech;Transducers;Transformers;Alignment", "primary_area": "speech_and_audio", "supplementary_material": "", "author": "Adam Stooke;Rohit Prabhavalkar;Khe Chai Sim;Pedro J Moreno Mengibar", "authorids": "~Adam_Stooke3;~Rohit_Prabhavalkar1;~Khe_Chai_Sim1;~Pedro_J_Moreno_Mengibar1", "gender": "M;M;;M", "homepage": "https://github.com/astooke;https://research.google/people/RohitPrabhavalkar/;;", "dblp": ";87/8758;78/6873;", "google_scholar": ";JgltxisAAAAJ;;fzWAWboAAAAJ", "orcid": ";;;", "linkedin": ";rohitprabhavalkar;;", "or_profile": "~Adam_Stooke3;~Rohit_Prabhavalkar1;~Khe_Chai_Sim1;~Pedro_J_Moreno_Mengibar1", "aff": ";Google;Google;", "aff_domain": ";google.com;google.com;", "position": ";Researcher;Research Scientist;", "bibtex": "@inproceedings{\nstooke2024alignerencoders,\ntitle={Aligner-Encoders: Self-Attention Transformers Can Be Self-Transducers},\nauthor={Adam Stooke and Rohit Prabhavalkar and Khe Chai Sim and Pedro J Moreno Mengibar},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=seAuMedrm5}\n}", "github": "", "reviewers": "VLfL;Btkf;RgJ5;JgvT", "pdf_size": 4251242, "rating": "7;7;7;7", "confidence": "5;2;5;5", "soundness": "3;3;4;3", "novelty": "3;3;4;3", "presentation": "3;3;3;3", "wc_summary": "98;109;113;243", "wc_strengths": "77;97;39;43", "wc_weaknesses": "128;109;43;26", "wc_questions": "81;4;232;190", "wc_limitations": "25;4;94;1", "wc_review": "409;323;521;503", "wc_reply_reviewers": "105;14;47;195", "wc_reply_authors": "99;0;0;251", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;2", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 4.25, 1.299038105676658 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 140.75, 59.28901668943414 ], "wc_strengths_avg": [ 64.0, 24.1039415863879 ], "wc_weaknesses_avg": [ 76.5, 42.9563732174866 ], "wc_questions_avg": [ 126.75, 89.77576231923625 ], "wc_limitations_avg": [ 31.0, 37.52998800959041 ], "wc_review_avg": [ 439.0, 79.33473388119481 ], "wc_reply_reviewers_avg": [ 90.25, 68.69270339708578 ], "wc_reply_authors_avg": [ 87.5, 102.6851985438992 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:K0d1BFaYI20J:scholar.google.com/&scioq=Aligner-Encoders:+Self-Attention+Transformers+Can+Be+Self-Transducers&hl=en&as_sdt=0,33", "gs_version_total": 2, "email": ";google.com;google.com;", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Prototypical Hash Encoding for On-the-Fly Fine-Grained Category Discovery", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93382", "id": "seYXqfGT0q", "proceeding": "", "pdf": "https://openreview.net/pdf?id=seYXqfGT0q", "openreview": "https://openreview.net/forum?id=seYXqfGT0q", "poster": "/media/PosterPDFs/NeurIPS%202024/93382.png?t=1731338020.9789753", "project": "", "author_site": "Haiyang Zheng, Nan Pu, Wenjing Li, Nicu Sebe, Zhun Zhong", "tldr": "", "abstract": "In this paper, we study a practical yet challenging task, On-the-fly Category Discovery (OCD), aiming to online discover the newly-coming stream data that belong to both known and unknown classes, by leveraging only known category knowledge contained in labeled data. Previous OCD methods employ the hash-based technique to represent old/new categories by hash codes for instance-wise inference. However, directly mapping features into low-dimensional hash space not only inevitably damages the ability to distinguish classes and but also causes ``high sensitivity'' issue, especially for fine-grained classes, leading to inferior performance. To address these drawbacks, we propose a novel Prototypical Hash Encoding (PHE) framework consisting of Category-aware Prototype Generation (CPG) and Discriminative Category Encoding (DCE) to mitigate the sensitivity of hash code while preserving rich discriminative information contained in high-dimension feature space, in a two-stage projection fashion. CPG enables the model to fully capture the intra-category diversity by representing each category with multiple prototypes. DCE boosts the discrimination ability of hash code with the guidance of the generated category prototypes and the constraint of minimum separation distance. By jointly optimizing CPG and DCE, we demonstrate that these two components are mutually beneficial towards an effective OCD. Extensive experiments show the significant superiority of our PHE over previous methods, e.g. obtaining an improvement of +5.3% in ALL ACC averaged on all datasets. Moreover, due to the nature of the interpretable prototypes, we visually analyze the underlying mechanism of how PHE helps group certain samples into either known or unknown categories. Code is available at https://github.com/HaiyangZheng/PHE.", "keywords": "Category Discovery;On-the-fly;Deep Hash", "primary_area": "machine_vision", "supplementary_material": "", "author": "Haiyang Zheng;Nan Pu;Wenjing Li;Nicu Sebe;Zhun Zhong", "authorids": "~Haiyang_Zheng1;~Nan_Pu1;~Wenjing_Li4;~Nicu_Sebe1;~Zhun_Zhong1", "gender": "M;M;F;M;M", "homepage": ";https://tpcd.github.io/;;http://disi.unitn.it/~sebe/;http://zhunzhong.site", "dblp": "84/1066;210/5100;;20/3519;32/6525", "google_scholar": ";https://scholar.google.be/citations?user=QBb2DjwAAAAJ;uBjSytAAAAAJ;https://scholar.google.it/citations?user=stFCYOAAAAAJ;nZizkQ0AAAAJ", "orcid": "0000-0001-8733-9696;0000-0002-2179-8301;;0000-0002-6597-7248;", "linkedin": ";;;;", "or_profile": "~Haiyang_Zheng1;~Nan_Pu1;~Wenjing_Li4;~Nicu_Sebe1;~Zhun_Zhong1", "aff": "University of Trento;University of Trento;University of Science and Technology of China;University of Trento;University of Nottingham", "aff_domain": "unitn.it;unitn.it;ustc.edu.cn;unitn.it;nottingham.ac.uk", "position": "PhD student;Postdoc;Postdoc;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nzheng2024prototypical,\ntitle={Prototypical Hash Encoding for On-the-Fly Fine-Grained Category Discovery},\nauthor={Haiyang Zheng and Nan Pu and Wenjing Li and Nicu Sebe and Zhun Zhong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=seYXqfGT0q}\n}", "github": "", "reviewers": "dwc8;ZQiT;Pa9u;6xA4", "pdf_size": 9214544, "rating": "5;5;6;6", "confidence": "4;4;3;5", "soundness": "3;3;3;4", "novelty": "2;2;2;2", "presentation": "3;3;3;4", "wc_summary": "87;62;144;147", "wc_strengths": "32;42;115;209", "wc_weaknesses": "107;100;54;117", "wc_questions": "94;39;139;41", "wc_limitations": "25;1;92;13", "wc_review": "345;244;544;527", "wc_reply_reviewers": "52;19;20;34", "wc_reply_authors": "49;37;31;33", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 110.0, 36.5991803186902 ], "wc_strengths_avg": [ 99.5, 70.87488977063738 ], "wc_weaknesses_avg": [ 94.5, 24.150569351466643 ], "wc_questions_avg": [ 78.25, 41.43292772662825 ], "wc_limitations_avg": [ 32.75, 35.24468044967921 ], "wc_review_avg": [ 415.0, 125.8232887823236 ], "wc_reply_reviewers_avg": [ 31.25, 13.36740438529485 ], "wc_reply_authors_avg": [ 37.5, 6.98212002188447 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10471803270653071364&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "unitn.it;unitn.it;ustc.edu.cn;unitn.it;nottingham.ac.uk", "author_num": 5, "aff_unique_index": "0;0;1;0;2", "aff_unique_norm": "University of Trento;University of Science and Technology of China;University of Nottingham", "aff_unique_dep": ";;", "aff_unique_url": "https://www.unitn.it;http://www.ustc.edu.cn;https://www.nottingham.ac.uk", "aff_unique_abbr": "UniTN;USTC;UoN", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;2", "aff_country_unique": "Italy;China;United Kingdom" }, { "title": "Multi-scale Consistency for Robust 3D Registration via Hierarchical Sinkhorn Tree", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93381", "id": "sfPxUqzdPI", "proceeding": "", "pdf": "https://openreview.net/pdf?id=sfPxUqzdPI", "openreview": "https://openreview.net/forum?id=sfPxUqzdPI", "poster": "/media/PosterPDFs/NeurIPS%202024/93381.png?t=1730229978.0854332", "project": "", "author_site": "Chengwei Ren, Yifan Feng, Weixiang Zhang, Xiao-Ping (Steven) Zhang, Yue Gao", "tldr": "", "abstract": "We study the problem of retrieving accurate correspondence through multi-scale consistency (MSC) for robust point cloud registration. Existing works in a coarse-to-fine manner either suffer from severe noisy correspondences caused by unreliable coarse matching or struggle to form outlier-free coarse-level correspondence sets. To tackle this, we present Hierarchical Sinkhorn Tree (HST), a pruned tree structure designed to hierarchically measure the local consistency of each coarse correspondence across multiple feature scales, thereby filtering out the local dissimilar ones. In this way, we convert the modeling of MSC for each correspondence into a BFS traversal with pruning of a K-ary tree rooted at the superpoint, with its K nearest neighbors in the feature pyramid serving as child nodes. To achieve efficient pruning and accurate vicinity characterization, we further propose a novel overlap-aware Sinkhorn Distance, which retains only the most likely overlapping points for local measurement and next level exploration. The modeling process essentially involves traversing a pair of HSTs synchronously and aggregating the consistency measures of corresponding tree nodes. Extensive experiments demonstrate HST consistently outperforms the state-of-the-art methods on both indoor and outdoor benchmarks.", "keywords": "Point Cloud Registration;Coarse-to-fine Mechanism;Correspondence Retrieval;Multi-scale Consistency", "primary_area": "machine_vision", "supplementary_material": "", "author": "Chengwei Ren;Yifan Feng;Weixiang Zhang;Xiao-Ping Zhang;Yue Gao", "authorids": "~Chengwei_Ren1;~Yifan_Feng1;~Weixiang_Zhang1;~Xiao-Ping_Zhang1;~Yue_Gao4", "gender": "M;M;M;;M", "homepage": "https://github.com/ChernweiRen;;https://weixiang-zhang.github.io/;;http://www.gaoyue.org", "dblp": ";225/5463;49/1249;;33/3099-2", "google_scholar": ";https://scholar.google.com.hk/citations?user=WntYF-sAAAAJ;aZgGw_MAAAAJ;;UTDfWocAAAAJ", "orcid": ";0000-0003-0878-2986;0009-0000-3762-8078;;", "linkedin": ";;;;", "or_profile": "~Chengwei_Ren1;~Yifan_Feng1;~Weixiang_Zhang1;~Xiao-Ping_Zhang1;~Yue_Gao4", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;;Tsinghua University", "aff_domain": "mails.tsinghua.edu.cn;tsinghua.edu.cn;mail.tsinghua.edu.cn;;tsinghua.edu.cn", "position": "MS student;PhD student;MS student;;Associate Professor", "bibtex": "@inproceedings{\nren2024multiscale,\ntitle={Multi-scale Consistency for Robust 3D Registration via Hierarchical Sinkhorn Tree},\nauthor={Chengwei Ren and Yifan Feng and Weixiang Zhang and Xiao-Ping Zhang and Yue Gao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=sfPxUqzdPI}\n}", "github": "", "reviewers": "WyrU;Sd2B;hZ2Q;AXAW", "pdf_size": 6211658, "rating": "5;5;6;6", "confidence": "4;4;4;4", "soundness": "3;2;3;3", "novelty": "2;2;3;3", "presentation": "3;3;3;3", "wc_summary": "50;54;66;119", "wc_strengths": "78;24;79;78", "wc_weaknesses": "280;94;111;101", "wc_questions": "99;4;5;2", "wc_limitations": "6;7;51;2", "wc_review": "513;183;312;302", "wc_reply_reviewers": "10;25;16;0", "wc_reply_authors": "36;62;59;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 72.25, 27.625848403261752 ], "wc_strengths_avg": [ 64.75, 23.53056522908024 ], "wc_weaknesses_avg": [ 146.5, 77.31267683892467 ], "wc_questions_avg": [ 27.5, 41.29467277991194 ], "wc_limitations_avg": [ 16.5, 20.006249023742555 ], "wc_review_avg": [ 327.5, 118.51265755184127 ], "wc_reply_reviewers_avg": [ 12.75, 9.093266739736606 ], "wc_reply_authors_avg": [ 39.25, 24.79289212657531 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:qBO7PnQKwfkJ:scholar.google.com/&scioq=Multi-scale+Consistency+for+Robust+3D+Registration+via+Hierarchical+Sinkhorn+Tree&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "mails.tsinghua.edu.cn;tsinghua.edu.cn;mail.tsinghua.edu.cn;;tsinghua.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "MiniCache: KV Cache Compression in Depth Dimension for Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93380", "id": "sgVOjDqUMT", "proceeding": "", "pdf": "https://openreview.net/pdf?id=sgVOjDqUMT", "openreview": "https://openreview.net/forum?id=sgVOjDqUMT", "poster": "", "project": "", "author_site": "Akide Liu, Jing Liu, Zizheng Pan, Yefei He, Reza Haffari, Bohan Zhuang", "tldr": "", "abstract": "A critical approach for efficiently deploying computationally demanding large language models (LLMs) is Key-Value (KV) caching. The KV cache stores key-value states of previously generated tokens, significantly reducing the need for repetitive computations and thereby lowering latency in autoregressive generation. However, the size of the KV cache grows linearly with sequence length, posing challenges for applications requiring long context input and extensive sequence generation. In this paper, we present a simple yet effective approach, called MiniCache, to compress the KV cache across layers from a novel depth perspective, significantly reducing the memory footprint for LLM inference. Our approach is based on the observation that KV cache states exhibit high similarity between the adjacent layers in the middle-to-deep portion of LLMs. To facilitate merging, we propose disentangling the states into the magnitude and direction components, interpolating the directions of the state vectors while preserving their lengths unchanged. Furthermore, we introduce a token retention strategy to keep highly distinct state pairs unmerged, thus preserving the information with minimal additional storage overhead. Our MiniCache is training-free and general, complementing existing KV cache compression strategies, such as quantization and sparsity. We conduct a comprehensive evaluation of MiniCache utilizing various models including LLaMA-2, LLaMA-3, Phi-3, Mistral, and Mixtral across multiple benchmarks, demonstrating its exceptional performance in achieving superior compression ratios and high throughput. On the ShareGPT dataset, LLaMA-2-7B with cross-layer merging achieves a compression ratio of $1.53\\times$. Additionally, since MiniCache is orthogonal to existing quantization techniques, it can achieve a compression ratio of up to $5.02\\times$ when combined with the 4-bit quantization technique, enhancing inference throughput by approximately $5\\times$ and reducing the memory footprint by $41\\%$ compared to the FP16 full cache baseline, all while maintaining near-lossless performance. Project is available at https://minicache.vmv.re .", "keywords": "KV Cache;Large Language Models;Efficiency AI", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Akide Liu;Jing Liu;Zizheng Pan;Yefei He;Gholamreza Haffari;Bohan Zhuang", "authorids": "~Akide_Liu1;~Jing_Liu8;~Zizheng_Pan1;~Yefei_He1;~Gholamreza_Haffari1;~Bohan_Zhuang1", "gender": "M;M;M;M;M;M", "homepage": "https://www.jing-liu.com/;https://zizhengpan.github.io/;https://hexy.tech/;https://rezahaffari.github.io/HomePage/HomePage.html;https://bohanzhuang.github.io/;https://vmv.re/in", "dblp": "72/2590-48;271/4555;92/6254;;145/1096;", "google_scholar": "-lHaZH4AAAAJ;https://scholar.google.com.au/citations?user=w_VMopoAAAAJ;CTEQwwwAAAAJ;https://scholar.google.com.tw/citations?user=Perjx5EAAAAJ;https://scholar.google.com.au/citations?user=DFuDBBwAAAAJ;", "orcid": "0000-0002-6745-3050;0000-0002-1717-7844;0000-0002-2171-4518;;;", "linkedin": "jing-liu-619688133/;zizheng-pan-754845168/;;gholamrezahaffari/?originalSubdomain=au;bohan-zhuang/;", "or_profile": "~Jing_Liu8;~Zizheng_Pan1;~Yefei_He1;~Gholamreza_Haffari1;~Bohan_Zhuang1;~Liyang_Liu3", "aff": "Monash University;Monash University;Zhejiang University;Monash University;Monash University;Monash University", "aff_domain": "monash.edu.au;monash.edu;zju.edu.cn;monash.edu;monash.edu;monash.edu", "position": "PhD student;PhD student;PhD student;Full Professor;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nliu2024minicache,\ntitle={MiniCache: {KV} Cache Compression in Depth Dimension for Large Language Models},\nauthor={Akide Liu and Jing Liu and Zizheng Pan and Yefei He and Gholamreza Haffari and Bohan Zhuang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=sgVOjDqUMT}\n}", "github": "", "reviewers": "GYW9;fj43;ZDdv;TyMG", "pdf_size": 8577323, "rating": "6;6;7;7", "confidence": "5;4;5;4", "soundness": "2;3;4;3", "novelty": "3;2;4;3", "presentation": "2;3;3;3", "wc_summary": "76;65;107;243", "wc_strengths": "54;91;176;415", "wc_weaknesses": "174;101;51;216", "wc_questions": "127;12;2;100", "wc_limitations": "2;3;24;15", "wc_review": "433;272;360;989", "wc_reply_reviewers": "117;0;13;20", "wc_reply_authors": "161;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "3;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 122.75, 71.1139051100416 ], "wc_strengths_avg": [ 184.0, 140.5115653602934 ], "wc_weaknesses_avg": [ 135.5, 63.822018144211015 ], "wc_questions_avg": [ 60.25, 54.214273950685715 ], "wc_limitations_avg": [ 11.0, 9.082951062292475 ], "wc_review_avg": [ 513.5, 280.3858947950128 ], "wc_reply_reviewers_avg": [ 37.5, 46.45696933722647 ], "wc_reply_authors_avg": [ 40.25, 69.71504500464731 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 44, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18074342335405481674&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "monash.edu.au;monash.edu;zju.edu.cn;monash.edu;monash.edu;monash.edu", "author_num": 6, "aff_unique_index": "0;0;1;0;0;0", "aff_unique_norm": "Monash University;Zhejiang University", "aff_unique_dep": ";", "aff_unique_url": "https://www.monash.edu;https://www.zju.edu.cn", "aff_unique_abbr": "Monash;ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "Australia;China" }, { "title": "Association of Objects May Engender Stereotypes: Mitigating Association-Engendered Stereotypes in Text-to-Image Generation", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93379", "id": "shYQXpnBLB", "proceeding": "", "pdf": "https://openreview.net/pdf?id=shYQXpnBLB", "openreview": "https://openreview.net/forum?id=shYQXpnBLB", "poster": "/media/PosterPDFs/NeurIPS%202024/93379.png?t=1730444467.7995524", "project": "", "author_site": "Junlei Zhou, Jiashi Gao, Xiangyu Zhao, Xin Yao, Xuetao Wei", "tldr": "", "abstract": "Text-to-Image (T2I) has witnessed significant advancements, demonstrating superior performance for various generative tasks. However, the presence of stereotypes in T2I introduces harmful biases that require urgent attention as the T2I \n technology becomes more prominent.\nPrevious work for stereotype mitigation mainly concentrated on mitigating stereotypes engendered with individual objects within images, which failed to address stereotypes engendered by the association of multiple objects, referred to as *Association-Engendered Stereotypes*. For example, mentioning ''black people'' and ''houses'' separately in prompts may not exhibit stereotypes. Nevertheless, when these two objects are associated in prompts, the association of ''black people'' with ''poorer houses'' becomes more pronounced. To tackle this issue, we propose a novel framework, MAS, to Mitigate Association-engendered Stereotypes. This framework models the stereotype problem as a probability distribution alignment problem, aiming to align the stereotype probability distribution of the generated image with the stereotype-free distribution. The MAS framework primarily consists of the *Prompt-Image-Stereotype CLIP* (*PIS CLIP*) and *Sensitive Transformer*. The *PIS CLIP* learns the association between prompts, images, and stereotypes, which can establish the mapping of prompts to stereotypes. The *Sensitive Transformer* produces the sensitive constraints, which guide the stereotyped image distribution to align with the stereotype-free probability distribution. Moreover, recognizing that existing metrics are insufficient for accurately evaluating association-engendered stereotypes, we propose a novel metric, *Stereotype-Distribution-Total-Variation*(*SDTV*), to evaluate stereotypes in T2I. Comprehensive experiments demonstrate that our framework effectively mitigates association-engendered stereotypes.", "keywords": "Stereotypes;Diffusion Model;Text-to-Image", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/48d538e77b860668c59f1b79dce6b0d7fbad2d93.zip", "author": "Junlei Zhou;Jiashi Gao;Xiangyu Zhao;Xin Yao;Xuetao Wei", "authorids": "~Junlei_Zhou4;~Jiashi_Gao1;~Xiangyu_Zhao1;~Xin_Yao1;~Xuetao_Wei2", "gender": "F;M;;M;M", "homepage": ";https://zhaoxyai.github.io/;http://www.cs.bham.ac.uk/~xin;https://cse.sustech.edu.cn/faculty/~weixt/;https://junlei-zhou.com/", "dblp": "221/1810;08/890-1.html;;09/5916;324/3189", "google_scholar": ";;;8fNwEScAAAAJ;https://scholar.google.com.hk/citations?user=cZRzfQ4AAAAJ", "orcid": ";0000-0003-2926-4416;;0000-0002-4450-2251;0000-0001-8781-0628", "linkedin": ";;;;", "or_profile": "~Jiashi_Gao1;~Xiangyu_Zhao1;~Xin_Yao1;~Xuetao_Wei2;~Junlei_Zhou3", "aff": "Southern University of Science and Technology;City University of Hong Kong;;Southern University of Science and Technology;Southern University of Science and Technology", "aff_domain": "sustech.edu.cn;cityu.edu.hk;;sustech.edu.cn;sustech.edu", "position": "PhD student;Assistant Professor;;Associate Professor;MS student", "bibtex": "@inproceedings{\nzhou2024association,\ntitle={Association of Objects May Engender Stereotypes: Mitigating Association-Engendered Stereotypes in Text-to-Image Generation},\nauthor={Junlei Zhou and Jiashi Gao and Xiangyu Zhao and Xin Yao and Xuetao Wei},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=shYQXpnBLB}\n}", "github": "", "reviewers": "L6vF;E5YY;QwVE;ioQ9", "pdf_size": 46300696, "rating": "5;6;7;8", "confidence": "4;3;3;5", "soundness": "3;2;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;4", "wc_summary": "79;81;25;71", "wc_strengths": "75;86;45;161", "wc_weaknesses": "411;90;161;54", "wc_questions": "93;4;63;47", "wc_limitations": "13;4;5;34", "wc_review": "671;265;299;367", "wc_reply_reviewers": "21;14;0;50", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 64.0, 22.825424421026653 ], "wc_strengths_avg": [ 91.75, 42.70465431308396 ], "wc_weaknesses_avg": [ 179.0, 139.36821732374997 ], "wc_questions_avg": [ 51.75, 32.135455497005175 ], "wc_limitations_avg": [ 14.0, 12.062338081814818 ], "wc_review_avg": [ 400.5, 160.4330078256965 ], "wc_reply_reviewers_avg": [ 21.25, 18.239723133863627 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.40451991747794525, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:YjQEvskZ2VsJ:scholar.google.com/&scioq=Association+of+Objects+May+Engender+Stereotypes:+Mitigating+Association-Engendered+Stereotypes+in+Text-to-Image+Generation&hl=en&as_sdt=0,33", "gs_version_total": 2, "email": "sustech.edu.cn;cityu.edu.hk;;sustech.edu.cn;sustech.edu", "author_num": 5, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Southern University of Science and Technology;City University of Hong Kong", "aff_unique_dep": ";", "aff_unique_url": "https://www.sustech.edu.cn;https://www.cityu.edu.hk", "aff_unique_abbr": "SUSTech;CityU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "OneRef: Unified One-tower Expression Grounding and Segmentation with Mask Referring Modeling", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93378", "id": "siPdcro6uD", "proceeding": "", "pdf": "https://openreview.net/pdf?id=siPdcro6uD", "openreview": "https://openreview.net/forum?id=siPdcro6uD", "poster": "/media/PosterPDFs/NeurIPS%202024/93378.png?t=1729402553.3015864", "project": "", "author_site": "Linhui Xiao, Xiaoshan Yang, Fang Peng, Yaowei Wang, Changsheng Xu", "tldr": "", "abstract": "Constrained by the separate encoding of vision and language, existing grounding and referring segmentation works heavily rely on bulky Transformer-based fusion en-/decoders and a variety of early-stage interaction technologies. Simultaneously, the current mask visual language modeling (MVLM) fails to capture the nuanced referential relationship between image-text in referring tasks. In this paper, we propose **OneRef**, a minimalist referring framework built on the modality-shared one-tower transformer that unifies the visual and linguistic feature spaces. To modeling the referential relationship, we introduce a novel MVLM paradigm called Mask Referring Modeling (**MRefM**), which encompasses both referring-aware mask image modeling and referring-aware mask language modeling. Both modules not only reconstruct modality-related content but also cross-modal referring content. Within MRefM, we propose a referring-aware dynamic image masking strategy that is aware of the referred region rather than relying on fixed ratios or generic random masking schemes. By leveraging the unified visual language feature space and incorporating MRefM's ability to model the referential relations, our approach enables direct regression of the referring results without resorting to various complex techniques. Our method consistently surpasses existing approaches and achieves SoTA performance on both grounding and segmentation tasks, providing valuable insights for future research. Our code and models are available at https://github.com/linhuixiao/OneRef.", "keywords": "Visual Grounding;Referring Expression Comprehension;Referring Image Segmentation;Multimodality;Masked Visual Language Modeling", "primary_area": "machine_vision", "supplementary_material": "", "author": "Linhui Xiao;Xiaoshan Yang;Fang Peng;Yaowei Wang;Changsheng Xu", "authorids": "~Linhui_Xiao1;~Xiaoshan_Yang2;~Fang_Peng1;~Yaowei_Wang1;~Changsheng_Xu1", "gender": "M;M;F;M;M", "homepage": ";https://yangxs.ac.cn;;https://dblp.org/pid/68/2992.html;", "dblp": "241/9207;74/9989;;68/2992-1;85/1301", "google_scholar": "https://scholar.google.com.hk/citations?user=4rTE4ogAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.sg/citations?user=hI9NRDkAAAAJ", "orcid": "0000-0003-2592-5264;0000-0001-5453-9755;0000-0002-3948-7413;0000-0002-6110-4036;", "linkedin": ";;;yaowei-wang-971ab310/;", "or_profile": "~Linhui_Xiao1;~Xiaoshan_Yang2;~Fang_Peng1;~Yaowei_Wang1;~Changsheng_Xu1", "aff": "University of Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;University of Chinese Academy of Sciences;Pengcheng Laboratory;Institute of Automation, Chinese Academy of Sciences", "aff_domain": "ucas.edu.cn;ia.ac.cn;ucas.ac.cn;pcl.ac.cn;ia.ac.cn", "position": "PhD student;Associate Professor;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nxiao2024oneref,\ntitle={OneRef: Unified One-tower Expression Grounding and Segmentation with Mask Referring Modeling},\nauthor={Linhui Xiao and Xiaoshan Yang and Fang Peng and Yaowei Wang and Changsheng Xu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=siPdcro6uD}\n}", "github": "", "reviewers": "SMKQ;vP75;a3fR", "pdf_size": 6792345, "rating": "5;5;6", "confidence": "3;4;4", "soundness": "3;3;3", "novelty": "3;3;2", "presentation": "3;2;2", "wc_summary": "43;62;61", "wc_strengths": "31;43;69", "wc_weaknesses": "121;50;155", "wc_questions": "2;2;2", "wc_limitations": "1;1;1", "wc_review": "198;158;288", "wc_reply_reviewers": "0;0;38", "wc_reply_authors": "120;120;74", "reply_reviewers": "0;0;1", "reply_authors": "2;2;2", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 55.333333333333336, 8.73053390247253 ], "wc_strengths_avg": [ 47.666666666666664, 15.86050300449376 ], "wc_weaknesses_avg": [ 108.66666666666667, 43.744205965539656 ], "wc_questions_avg": [ 2.0, 0.0 ], "wc_limitations_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 214.66666666666666, 54.365021434333634 ], "wc_reply_reviewers_avg": [ 12.666666666666666, 17.913371790059205 ], "wc_reply_authors_avg": [ 104.66666666666667, 21.684607956387456 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3990574135303062326&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 5, "email": "ucas.edu.cn;ia.ac.cn;ucas.ac.cn;pcl.ac.cn;ia.ac.cn", "author_num": 5, "aff_unique_index": "0;1;0;2;1", "aff_unique_norm": "University of Chinese Academy of Sciences;Chinese Academy of Sciences;Pengcheng Laboratory", "aff_unique_dep": ";Institute of Automation;", "aff_unique_url": "http://www.ucas.ac.cn;http://www.ia.cas.cn;", "aff_unique_abbr": "UCAS;CAS;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "SfPUEL: Shape from Polarization under Unknown Environment Light", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93377", "id": "skeopn3q5Y", "proceeding": "", "pdf": "https://openreview.net/pdf?id=skeopn3q5Y", "openreview": "https://openreview.net/forum?id=skeopn3q5Y", "poster": "", "project": "", "author_site": "Youwei Lyu, Heng Guo, Kailong Zhang, Si Li, Boxin Shi", "tldr": "", "abstract": "Shape from polarization (SfP) benefits from advancements like polarization cameras for single-shot normal estimation, but its performance heavily relies on light conditions. This paper proposes SfPUEL, an end-to-end SfP method to jointly estimate surface normal and material under unknown environment light. To handle this challenging light condition, we design a transformer-based framework for enhancing the perception of global context features. We further propose to integrate photometric stereo (PS) priors from pretrained models to enrich extracted features for high-quality normal predictions. As metallic and dielectric materials exhibit different BRDFs, SfPUEL additionally predicts dielectric and metallic material segmentation to further boost performance. Experimental results on synthetic and our collected real-world dataset demonstrate that SfPUEL significantly outperforms existing SfP and single-shot normal estimation methods. The code and dataset is available at https://github.com/YouweiLyu/SfPUEL.", "keywords": "shape-from-polarization;photometric 3D reconstruction;physics-based vision", "primary_area": "machine_vision", "supplementary_material": "", "author": "Youwei Lyu;Heng Guo;Kailong Zhang;Si Li;Boxin Shi", "authorids": "~Youwei_Lyu1;~Heng_Guo3;~Kailong_Zhang1;~Si_Li5;~Boxin_Shi3", "gender": "M;M;M;;M", "homepage": "https://youweilyu.github.io/;https://gh-home.github.io/;https://github.com/flzt11;http://www.pris.net.cn/introduction/teacher/lisi;http://camera.pku.edu.cn", "dblp": "255/6998.html;22/7361-3;;54/6603-1.html;69/783", "google_scholar": "https://scholar.google.com/citations?hl=en;HKu6gF4AAAAJ;;;K1LjZxcAAAAJ", "orcid": "0000-0002-6723-3517;;;;0000-0001-6749-0364", "linkedin": ";;;;", "or_profile": "~Youwei_Lyu1;~Heng_Guo3;~Kailong_Zhang1;~Si_Li5;~Boxin_Shi3", "aff": "Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;Peking University", "aff_domain": "bupt.edu.cn;bupt.edu.cn;bupt.edu.cn;bupt.edu.cn;pku.edu.cn", "position": "PhD student;Researcher;MS student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nlyu2024sfpuel,\ntitle={Sf{PUEL}: Shape from Polarization under Unknown Environment Light},\nauthor={Youwei Lyu and Heng Guo and Kailong Zhang and Si Li and Boxin Shi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=skeopn3q5Y}\n}", "github": "", "reviewers": "bz5V;7r7M;66Sa", "pdf_size": 19470814, "rating": "6;6;6", "confidence": "4;4;4", "soundness": "4;3;3", "novelty": "3;4;3", "presentation": "4;4;3", "wc_summary": "66;120;111", "wc_strengths": "59;58;88", "wc_weaknesses": "62;105;98", "wc_questions": "28;46;288", "wc_limitations": "47;17;5", "wc_review": "262;346;590", "wc_reply_reviewers": "40;27;39", "wc_reply_authors": "0;0;151", "reply_reviewers": "1;1;1", "reply_authors": "1;1;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 99.0, 23.62202362203543 ], "wc_strengths_avg": [ 68.33333333333333, 13.912424503139471 ], "wc_weaknesses_avg": [ 88.33333333333333, 18.83849486792639 ], "wc_questions_avg": [ 120.66666666666667, 118.55050494292388 ], "wc_limitations_avg": [ 23.0, 17.663521732655695 ], "wc_review_avg": [ 399.3333333333333, 139.11466094157328 ], "wc_reply_reviewers_avg": [ 35.333333333333336, 5.90668171555645 ], "wc_reply_authors_avg": [ 50.333333333333336, 71.18208263944578 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:XqQ56ohjMikJ:scholar.google.com/&scioq=SfPUEL:+Shape+from+Polarization+under+Unknown+Environment+Light&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "bupt.edu.cn;bupt.edu.cn;bupt.edu.cn;bupt.edu.cn;pku.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Beijing University of Posts and Telecommunications;Peking University", "aff_unique_dep": ";", "aff_unique_url": "http://www.bupt.edu.cn/;http://www.pku.edu.cn", "aff_unique_abbr": "BUPT;Peking U", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Beijing;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Online Estimation via Offline Estimation: An Information-Theoretic Framework", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93376", "id": "sks7x4I8Bh", "proceeding": "", "pdf": "https://openreview.net/pdf?id=sks7x4I8Bh", "openreview": "https://openreview.net/forum?id=sks7x4I8Bh", "poster": "", "project": "", "author_site": "Dylan J Foster, Yanjun Han, Jian Qian, Alexander Rakhlin", "tldr": "", "abstract": "The classical theory of statistical estimation aims to estimate a parameter of interest under data generated from a fixed design (''offline estimation''), while the contemporary theory of online learning provides algorithms for estimation under adaptively chosen covariates (''online estimation''). Motivated by connections between estimation and interactive decision making, we ask: is it possible to convert offline estimation algorithms into online estimation algorithms in a black-box fashion? We investigate this question from an information-theoretic perspective by introducing a new framework, Oracle-Efficient Online Estimation (OEOE), where the learner can only interact with the data stream indirectly through a sequence of offline estimators produced by a black-box algorithm operating on the stream. Our main results settle the statistical and computational complexity of online estimation in this framework.\n\n $\\bullet$ Statistical complexity. We show that information-theoretically, there exist algorithms that achieve near-optimal online estimation error via black-box offline estimation oracles, and give a nearly-tight characterization for minimax rates in the OEOE framework.\n\n $\\bullet$ Computational complexity. We show that the guarantees above cannot be achieved in a computationally efficient fashion in general, but give a refined characterization for the special case of conditional density estimation: computationally efficient online estimation via black-box offline estimation is possible whenever it is possible via unrestricted algorithms.\n\nFinally, we apply our results to give offline oracle-efficient algorithms for interactive decision making.", "keywords": "online learning;interactive decision making;oracle-efficient;regression;classification;conditional density estimation", "primary_area": "online_learning", "supplementary_material": "", "author": "Dylan J Foster;Yanjun Han;Jian Qian;Alexander Rakhlin", "authorids": "~Dylan_J_Foster1;~Yanjun_Han1;~Jian_Qian2;~Alexander_Rakhlin1", "gender": ";M;;M", "homepage": "http://dylanfoster.net;https://yanjunhan2021.github.io;https://sites.google.com/view/jianqian/about;http://www.mit.edu/~rakhlin/", "dblp": "167/4271;35/7252;;59/407", "google_scholar": "RqwU8xsAAAAJ;hdTDzlQAAAAJ;;https://scholar.google.com.tw/citations?user=fds2VpgAAAAJ", "orcid": ";;;", "linkedin": ";;jianQ/;", "or_profile": "~Dylan_J_Foster1;~Yanjun_Han1;~Jian_Qian2;~Alexander_Rakhlin1", "aff": "Microsoft Research;New York University;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "microsoft.com;nyu.edu;mit.edu;mit.edu", "position": "Principal Researcher;Assistant Professor;PhD student;Full Professor", "bibtex": "@inproceedings{\nfoster2024online,\ntitle={Online Estimation via Offline Estimation: An Information-Theoretic Framework},\nauthor={Dylan J Foster and Yanjun Han and Jian Qian and Alexander Rakhlin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=sks7x4I8Bh}\n}", "github": "", "reviewers": "erpa;HaHn;3FiW;do6a", "pdf_size": 738689, "rating": "4;5;6;6", "confidence": "2;2;3;3", "soundness": "3;2;3;3", "novelty": "2;2;3;3", "presentation": "1;2;3;3", "wc_summary": "40;113;79;172", "wc_strengths": "21;113;48;77", "wc_weaknesses": "40;169;146;96", "wc_questions": "72;91;3;67", "wc_limitations": "1;1;3;11", "wc_review": "174;487;279;423", "wc_reply_reviewers": "13;23;10;5", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 2.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 101.0, 48.45100618150257 ], "wc_strengths_avg": [ 64.75, 34.17875802307626 ], "wc_weaknesses_avg": [ 112.75, 49.605317255310446 ], "wc_questions_avg": [ 58.25, 33.13136731256348 ], "wc_limitations_avg": [ 4.0, 4.123105625617661 ], "wc_review_avg": [ 340.75, 122.24233104780029 ], "wc_reply_reviewers_avg": [ 12.75, 6.5717197140474575 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.9045340337332909, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16096000138943336531&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "microsoft.com;nyu.edu;mit.edu;mit.edu", "author_num": 4, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "Microsoft;New York University;Massachusetts Institute of Technology", "aff_unique_dep": "Microsoft Research;;", "aff_unique_url": "https://www.microsoft.com/en-us/research;https://www.nyu.edu;https://web.mit.edu", "aff_unique_abbr": "MSR;NYU;MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Melting Pot Contest: Charting the Future of Generalized Cooperative Intelligence", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97476", "id": "slqbOc67W8", "proceeding": "", "pdf": "https://openreview.net/pdf?id=slqbOc67W8", "openreview": "https://openreview.net/forum?id=slqbOc67W8", "poster": "/media/PosterPDFs/NeurIPS%202024/97476.png?t=1734011688.113521", "project": "", "author_site": "Rakshit Trivedi, Akbir Khan, Jesse Clifton, Lewis Hammond, Edgar Du\u00e9\u00f1ez-Guzm\u00e1n, Dipam Chakraborty, John Agapiou, Jayd Matyas, Sasha Vezhnevets, Barna P\u00e1sztor, Yunke Ao, Omar G. Younis, Jiawei Huang, Benjamin Swain, Haoyuan Qin, Deng, Ziwei Deng, Utku Erdo\u011fanaras, Yue Zhao, Marko Tesic, Natasha Jaques, Jakob Foerster, Vincent Conitzer, Jos\u00e9 Hern\u00e1ndez-Orallo, Dylan Hadfield-Menell, Joel Leibo", "tldr": "", "abstract": "Multi-agent AI research promises a path to develop human-like and human-compatible intelligent technologies that complement the solipsistic view of other approaches, which mostly do not consider interactions between agents. Aiming to make progress in this direction, the Melting Pot contest 2023 focused on the problem of cooperation among interacting agents and challenged researchers to push the boundaries of multi-agent reinforcement learning (MARL) for mixed-motive games. The contest leveraged the Melting Pot environment suite to rigorously evaluate how well agents can adapt their cooperative skills to interact with novel partners in unforeseen situations. Unlike other reinforcement learning challenges, this challenge focused on social rather than environmental generalization. In particular, a population of agents performs well in Melting Pot when its component individuals are adept at finding ways to cooperate both with others in their population and with strangers. Thus Melting Pot measures cooperative intelligence.\nThe contest attracted over 600 participants across 100+ teams globally and was a success on multiple fronts: (i) it contributed to our goal of pushing the frontiers of MARL towards building more cooperatively intelligent agents, evidenced by several submissions that outperformed established baselines; (ii) it attracted a diverse range of participants, from independent researchers to industry affiliates and academic labs, both with strong background and new interest in the area alike, broadening the field\u2019s demographic and intellectual diversity; and (iii) analyzing the submitted agents provided important insights, highlighting areas for improvement in evaluating agents' cooperative intelligence. This paper summarizes the design aspects and results of the contest and explores the potential of Melting Pot as a benchmark for studying Cooperative AI. We further analyze the top solutions and conclude with a discussion on promising directions for future research.", "keywords": "Cooperative AI;Multi-Agent Reinforcement Learning;Social Intelligence;Zero-Shot Generalization;Mixed-Motive Games", "primary_area": "", "supplementary_material": "/attachment/6546dd45f587e47210103f858b48a3422c94d852.pdf", "author": "Rakshit Trivedi;Akbir Khan;Jesse Clifton;Lewis Hammond;Edgar A. Du\u00e9\u00f1ez-Guzm\u00e1n;Dipam Chakraborty;John P Agapiou;Jayd Matyas;Sasha Vezhnevets;Barna P\u00e1sztor;Yunke Ao;Omar G. Younis;Jiawei Huang;Benjamin Swain;Haoyuan Qin;Mian Deng;Ziwei Deng;Utku Erdo\u011fanaras;Yue Zhao;Marko Tesic;Natasha Jaques;Jakob Nicolaus Foerster;Vincent Conitzer;Jose Hernandez-Orallo;Dylan Hadfield-Menell;Joel Z Leibo", "authorids": "~Rakshit_Trivedi1;~Akbir_Khan1;~Jesse_Clifton1;~Lewis_Hammond1;~Edgar_A._Du\u00e9\u00f1ez-Guzm\u00e1n2;~Dipam_Chakraborty1;~John_P_Agapiou1;~Jayd_Matyas1;~Sasha_Vezhnevets1;~Barna_P\u00e1sztor1;~Yunke_Ao1;~Omar_G._Younis2;~Jiawei_Huang3;~Benjamin_Swain1;~Haoyuan_Qin1;~Mian_Deng2;~Ziwei_Deng2;~Utku_Erdo\u011fanaras1;~Yue_Zhao29;~Marko_Tesic1;~Natasha_Jaques1;~Jakob_Nicolaus_Foerster1;~Vincent_Conitzer2;~Jose_Hernandez-Orallo1;~Dylan_Hadfield-Menell2;~Joel_Z_Leibo1", "gender": ";M;M;;;M;;F;M;;M;;;M;M;M;F;M;F;;F;M;M;;M;M", "homepage": ";https://akbir.dev;https://statistics.sciences.ncsu.edu/people/jclifto/;https://www.lewishammond.com/;;https://twitter.com/__dipam__;;;;;https://github.com/ToolManChang;;https://jiaweihhuang.github.io;;https://asc.xmu.edu.cn/t/qinhaoyuan;https://asc.xmu.edu.cn/m/t/dengmian;https://github.com/CiverDeng;https://audits.sherlock.xyz/watson/TessKimy;;;https://natashajaques.ai/;https://www.jakobfoerster.com;https://www.cs.cmu.edu/~conitzer/;http://josephorallo.webs.upv.es/;http://people.csail.mit.edu/dhm/;http://www.jzleibo.com", "dblp": ";;;228/6647;;;;;79/499;273/3840;277/9587;;13/4208;;;396/5834;203/3237;;48/76-23.html;;145/7732;176/5095;c/VincentConitzer;h/JoseHernandezOrallo;135/8332;33/11107", "google_scholar": ";https://scholar.google.com/citations?hl=en;;8fYnp7UAAAAJ;;;;;vo1zs4sAAAAJ;t2QJiCkAAAAJ;https://scholar.google.ch/citations?user=IWbnaMoAAAAJ;;6IcfJiIAAAAJ;;32lBQlsAAAAJ;;;;9uCh-xkAAAAJ;;8iCb2TwAAAAJ;6z4lQzMAAAAJ;juRk4lQAAAAJ;n9AWbcAAAAAJ;4mVPFQ8AAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;0000-0003-1695-0871;;;;;;;;;;;;;;;0000-0002-0664-4769;;;;0000-0003-1899-7884;;0000-0002-6168-4763;0000-0002-3153-916X", "linkedin": ";;;lrhammond/;;;;jaydmaty/;;;yunke-ao-317325145/;;;benjamin-swain-56b976168/;;;;;;;natashajaques;;vincent-conitzer-2563082/;;;", "or_profile": "~Rakshit_Trivedi1;~Akbir_Khan1;~Jesse_Clifton1;~Lewis_Hammond1;~Edgar_A._Du\u00e9\u00f1ez-Guzm\u00e1n2;~Dipam_Chakraborty1;~John_P_Agapiou1;~Jayd_Matyas1;~Sasha_Vezhnevets1;~Barna_P\u00e1sztor1;~Yunke_Ao1;~Omar_G._Younis2;~Jiawei_Huang3;~Benjamin_Swain1;~Haoyuan_Qin1;~Mian_Deng2;~Ziwei_Deng2;~Utku_Erdo\u011fanaras1;~Yue_Zhao29;~Marko_Tesic1;~Natasha_Jaques1;~Jakob_Nicolaus_Foerster1;~Vincent_Conitzer2;~Jose_Hernandez-Orallo1;~Dylan_Hadfield-Menell2;~Joel_Z_Leibo1", "aff": ";;;University of Oxford;;;;Google DeepMind;Google DeepMind;ETHZ - ETH Zurich;Swiss Federal Institute of Technology;;Department of Computer Science, ETHZ - ETH Zurich;General Motors;Xiamen University;Xiamen University;Xiamen University;TED University;Northwest Polytechnical University Xi'an;;Google;University of Oxford, University of Oxford;University of Oxford;University of Cambridge;Massachusetts Institute of Technology;Google DeepMind", "aff_domain": ";;;ox.ac.uk;;;;deepmind.com;deepmind.com;ethz.ch;ethz.ch;;inf.ethz.ch;gm.com;xmu.edu.cn;xmu.edu.cn;xmu.edu.cn;tedu.edu.tr;nwpu.edu.cn;;google.com;eng.ox.ac.uk;oxford.ac.uk;cam.ac.uk;mit.edu;deepmind.com", "position": ";;;PhD student;;;;Research environment designer;Researcher;PhD student;PhD student;;PhD student;Researcher;MS student;MS student;MS student;Undergrad student;PhD student;;Senior Research Scientist;Associate Professor;Full Professor;Researcher;Assistant Professor;Research scientist", "bibtex": "@inproceedings{\ntrivedi2024melting,\ntitle={Melting Pot Contest: Charting the Future of Generalized Cooperative Intelligence},\nauthor={Rakshit Trivedi and Akbir Khan and Jesse Clifton and Lewis Hammond and Edgar A. Du{\\'e}{\\~n}ez-Guzm{\\'a}n and Dipam Chakraborty and John P Agapiou and Jayd Matyas and Sasha Vezhnevets and Barna P{\\'a}sztor and Yunke Ao and Omar G. Younis and Jiawei Huang and Benjamin Swain and Haoyuan Qin and Mian Deng and Ziwei Deng and Utku Erdo{\\u{g}}anaras and Yue Zhao and Marko Tesic and Natasha Jaques and Jakob Nicolaus Foerster and Vincent Conitzer and Jose Hernandez-Orallo and Dylan Hadfield-Menell and Joel Z Leibo},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=slqbOc67W8}\n}", "github": "", "reviewers": "mWg1;wn2e;e1NQ", "pdf_size": 10714056, "rating": "6;7;7", "confidence": "4;3;4", "wc_summary_and_contributions": "102;89;24", "wc_strengths": "18;65;39", "wc_improvement": "42;55;115", "wc_limitations": "1;12;53", "wc_correctness": "31;1;13", "wc_clarity": "1;5;7", "wc_relation_to_prior_work": "1;1;6", "wc_documentation": "1;1;23", "wc_additional_feedback": "1;1;1", "wc_review": "198;230;281", "wc_reply_reviewers": "30;22;0", "wc_reply_authors": "98;0;0", "reply_reviewers": "1;1;0", "reply_authors": "2;1;1", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 71.66666666666667, 34.120700787384514 ], "wc_strengths_avg": [ 40.666666666666664, 19.22382780706162 ], "wc_improvement_avg": [ 70.66666666666667, 31.794478905761125 ], "wc_limitations_avg": [ 22.0, 22.37558192911788 ], "wc_correctness_avg": [ 15.0, 12.328828005937952 ], "wc_clarity_avg": [ 4.333333333333333, 2.494438257849294 ], "wc_relation_to_prior_work_avg": [ 2.6666666666666665, 2.3570226039551585 ], "wc_documentation_avg": [ 8.333333333333334, 10.370899457402697 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 236.33333333333334, 34.179265969622904 ], "wc_reply_reviewers_avg": [ 17.333333333333332, 12.684198393626966 ], "wc_reply_authors_avg": [ 32.666666666666664, 46.19764303752111 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 26, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:JN62rzXFN38J:scholar.google.com/&scioq=Melting+Pot+Contest:+Charting+the+Future+of+Generalized+Cooperative+Intelligence&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": ";;;ox.ac.uk;;;;deepmind.com;deepmind.com;ethz.ch;ethz.ch;;inf.ethz.ch;gm.com;xmu.edu.cn;xmu.edu.cn;xmu.edu.cn;tedu.edu.tr;nwpu.edu.cn;;google.com;eng.ox.ac.uk;oxford.ac.uk;cam.ac.uk;mit.edu;deepmind.com", "author_num": 26, "aff_unique_index": "0;1;1;2;3;2;4;5;5;5;6;7;1;0;0;8;9;1", "aff_unique_norm": "University of Oxford;Google;ETH Zurich;Swiss Federal Institute of Technology;General Motors;Xiamen University;TED University;Northwest Polytechnical University;University of Cambridge;Massachusetts Institute of Technology", "aff_unique_dep": ";Google DeepMind;;;;;;;;", "aff_unique_url": "https://www.ox.ac.uk;https://deepmind.com;https://www.ethz.ch;https://www.ethz.ch;https://www.gm.com;https://www.xmu.edu.cn;https://www.teduniversity.edu.tr;http://www.nwpu.edu.cn;https://www.cam.ac.uk;https://web.mit.edu", "aff_unique_abbr": "Oxford;DeepMind;ETHZ;ETH Zurich;GM;XMU;TED\u00dc;NWPU;Cambridge;MIT", "aff_campus_unique_index": "1;2;3;4", "aff_campus_unique": ";Zurich;Xi'an;Mountain View;Cambridge", "aff_country_unique_index": "0;0;0;1;1;1;2;3;3;3;4;3;2;0;0;0;2;0", "aff_country_unique": "United Kingdom;Switzerland;United States;China;T\u00fcrkiye" }, { "id": "smxQvTmdGS", "title": "Bias and Volatility: A Statistical Framework for Evaluating Large Language Model's Stereotypes and the Associated Generation Inconsistency", "track": "Datasets & Benchmarks", "status": "Poster", "tldr": "", "abstract": "We present a novel statistical framework for analyzing stereotypes in large language models (LLMs) by systematically estimating the bias and variation in their generation. Current evaluation metrics in the alignment literature often overlook the randomness of stereotypes caused by the inconsistent generative behavior of LLMs. For example, this inconsistency can result in LLMs displaying contradictory stereotypes, including those related to gender or race, for identical professions across varied contexts. Neglecting such inconsistency could lead to misleading conclusions in alignment evaluations and hinder the accurate assessment of the risk of LLM applications perpetuating or amplifying social stereotypes and unfairness.\n\nThis work proposes a Bias-Volatility Framework (BVF) that estimates the probability distribution function of LLM stereotypes. Specifically, since the stereotype distribution fully captures an LLM's generation variation, BVF enables the assessment of both the likelihood and extent to which its outputs are against vulnerable groups, thereby allowing for the quantification of the LLM's aggregated discrimination risk. Furthermore, we introduce a mathematical framework to decompose an LLM\u2019s aggregated discrimination risk into two components: bias risk and volatility risk, originating from the mean and variation of LLM\u2019s stereotype distribution, respectively. We apply BVF to assess 12 commonly adopted LLMs and compare their risk levels. Our findings reveal that: i) Bias risk is the primary cause of discrimination risk in LLMs; ii) Most LLMs exhibit significant pro-male stereotypes for nearly all careers; iii) Alignment with reinforcement learning from human feedback lowers discrimination by reducing bias, but increases volatility; iv) Discrimination risk in LLMs correlates with key sociol-economic factors like professional salaries. Finally, we emphasize that BVF can also be used to assess other dimensions of generation inconsistency's impact on LLM behavior beyond stereotypes, such as knowledge mastery.", "keywords": "LLM discrimination;statistical framework;prejudice-volatility framework", "primary_area": "", "supplementary_material": "/attachment/c3bda7491fb6176dad95f2aaa54476b07caf64a9.zip", "author": "Yiran Liu;Ke Yang;Zehan Qi;Xiao Liu;Yang Yu;ChengXiang Zhai", "authorids": "~Yiran_Liu1;~Ke_Yang7;~Zehan_Qi2;~Xiao_Liu15;~Yang_Yu13;~ChengXiang_Zhai1", "gender": "F;M;M;;M;M", "homepage": "https://empathyang.github.io/;https://github.com/xiao9905;http://czhai.cs.illinois.edu/;https://iiis.tsinghua.edu.cn/zh/yuy/;;", "dblp": ";82/1364-36;z/ChengXiangZhai;;;358/8851", "google_scholar": "13Puu8AAAAAJ;VKI8EhUAAAAJ;YU-baPIAAAAJ;;;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";0000-0002-9226-4569;0000-0002-6434-3702;;0000-0003-3450-7881;", "linkedin": ";;;;;", "or_profile": "~Ke_Yang7;~Xiao_Liu15;~ChengXiang_Zhai1;~Yu_Yang11;~Liu_Yiran2;~zehan_qi1", "aff": "Amazon;Tsinghua University;University of Illinois, Urbana Champaign;;Tsinghua University;Tsinghua University", "aff_domain": "amazon.com;tsinghua.edu.cn;illinois.edu;;tsinghua.edu.cn;mail.tsinghua.edu.cn", "position": "Intern;PhD student;Full Professor;;PhD student;PhD student", "bibtex": "@inproceedings{\nliu2024bias,\ntitle={Bias and Volatility: A Statistical Framework for Evaluating Large Language Model's Stereotypes and the Associated Generation Inconsistency},\nauthor={Yiran Liu and Ke Yang and Zehan Qi and Xiao Liu and Yang Yu and ChengXiang Zhai},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=smxQvTmdGS}\n}", "github": "", "project": "", "reviewers": "pUWU;aF2Z;erKg;yMyq", "site": "https://openreview.net/forum?id=smxQvTmdGS", "pdf_size": 5097318, "rating": "6;6;7;7", "confidence": "4;3;5;4", "wc_summary_and_contributions": "85;121;44;148", "wc_strengths": "88;19;19;6", "wc_improvement": "57;34;19;8", "wc_limitations": "36;94;1;47", "wc_correctness": "16;25;1;3", "wc_clarity": "23;19;1;6", "wc_relation_to_prior_work": "1;23;1;20", "wc_documentation": "13;7;1;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "320;343;88;240", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "2;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 99.5, 39.067249711235114 ], "wc_strengths_avg": [ 33.0, 32.1947200640105 ], "wc_improvement_avg": [ 29.5, 18.364367672206956 ], "wc_limitations_avg": [ 44.5, 33.24530041975858 ], "wc_correctness_avg": [ 11.25, 9.807522622966516 ], "wc_clarity_avg": [ 12.25, 9.03811374126261 ], "wc_relation_to_prior_work_avg": [ 11.25, 10.304731922762475 ], "wc_documentation_avg": [ 5.5, 4.9749371855331 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 247.75, 99.84081079398344 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.7071067811865475, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3859423898330921700&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 0, "aff_unique_index": "0;1;2;1;1", "aff_unique_norm": "Amazon;Tsinghua University;University of Illinois Urbana-Champaign", "aff_unique_dep": "Amazon.com, Inc.;;", "aff_unique_url": "https://www.amazon.com;https://www.tsinghua.edu.cn;https://illinois.edu", "aff_unique_abbr": "Amazon;THU;UIUC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "0;1;0;1;1", "aff_country_unique": "United States;China" }, { "title": "The Impact of Initialization on LoRA Finetuning Dynamics", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93375", "id": "sn3UrYRItk", "proceeding": "", "pdf": "https://openreview.net/pdf?id=sn3UrYRItk", "openreview": "https://openreview.net/forum?id=sn3UrYRItk", "poster": "", "project": "", "author_site": "Soufiane Hayou, Nikhil Ghosh, Bin Yu", "tldr": "", "abstract": "In this paper, we study the role of initialization in Low Rank Adaptation (LoRA) as originally introduced in Hu et al. (2021). Essentially, to start from the pretrained model, one can either initialize $B$ to zero and $A$ to random, or vice-versa. In both cases, the product $BA$ is equal to zero at initialization, which makes finetuning starts from the pretrained model. These two initialization schemes are seemingly similar. They should in-principle yield the same performance and share the same optimal learning rate. We demonstrate that this is an *incorrect intuition* and that the first scheme (of initializing $B$ to zero and $A$ to random) on average in our experiments yields better performance compared to the other scheme. Our theoretical analysis shows that the reason behind this might be that the first initialization allows the use of larger learning rates (without causing output instability) compared to the second initialization, resulting in more efficient learning of the first scheme. We validate our results with extensive experiments on LLMs.", "keywords": "Finetuning; LoRA; Large Language Models", "primary_area": "other", "supplementary_material": "", "author": "Soufiane Hayou;Nikhil Ghosh;Bin Yu", "authorids": "~Soufiane_Hayou1;~Nikhil_Ghosh1;~Bin_Yu5", "gender": "M;M;M", "homepage": "https://www.soufianehayou.com/;;https://binyu.stat.berkeley.edu", "dblp": "220/5617;251/8779;27/116", "google_scholar": "https://scholar.google.com/citations?hl=en;0Fv4bikAAAAJ;https://scholar.google.com.hk/citations?user=z1iJa3UAAAAJ", "orcid": ";;0000-0003-3097-1433", "linkedin": ";nikhil-ghosh-03389199/;bin-yu-b665063/", "or_profile": "~Soufiane_Hayou1;~Nikhil_Ghosh1;~Bin_Yu5", "aff": "National University of Singapore;University of California, Berkeley;University of California, Berkeley", "aff_domain": "nus.edu.sg;berkeley.edu;berkeley.edu", "position": "Assistant Professor;PhD student;Full Professor", "bibtex": "@inproceedings{\nhayou2024the,\ntitle={The Impact of Initialization on Lo{RA} Finetuning Dynamics},\nauthor={Soufiane Hayou and Nikhil Ghosh and Bin Yu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=sn3UrYRItk}\n}", "github": "", "reviewers": "5gBr;2Ni1;Emo5;quNJ", "pdf_size": 883646, "rating": "5;5;6;8", "confidence": "3;3;2;3", "soundness": "3;3;2;4", "novelty": "3;2;2;3", "presentation": "2;2;2;4", "wc_summary": "111;166;104;106", "wc_strengths": "35;70;21;187", "wc_weaknesses": "284;103;106;103", "wc_questions": "28;171;1;130", "wc_limitations": "6;19;8;79", "wc_review": "464;529;240;605", "wc_reply_reviewers": "150;13;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 121.75, 25.674647027758727 ], "wc_strengths_avg": [ 78.25, 65.27394196767956 ], "wc_weaknesses_avg": [ 149.0, 77.95190825117753 ], "wc_questions_avg": [ 82.5, 70.1801253917375 ], "wc_limitations_avg": [ 28.0, 29.857997253667232 ], "wc_review_avg": [ 459.5, 136.1993024945429 ], "wc_reply_reviewers_avg": [ 40.75, 63.29840045372395 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12798062951467837257&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "nus.edu.sg;berkeley.edu;berkeley.edu", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "National University of Singapore;University of California, Berkeley", "aff_unique_dep": ";", "aff_unique_url": "https://www.nus.edu.sg;https://www.berkeley.edu", "aff_unique_abbr": "NUS;UC Berkeley", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Singapore;United States" }, { "title": "MEQA: A Benchmark for Multi-hop Event-centric Question Answering with Explanations", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97474", "id": "snNuvAOQxB", "proceeding": "", "pdf": "https://openreview.net/pdf?id=snNuvAOQxB", "openreview": "https://openreview.net/forum?id=snNuvAOQxB", "poster": "/media/PosterPDFs/NeurIPS%202024/97474.png?t=1731756141.2472932", "project": "", "author_site": "Ruosen Li, Zimu Wang, Son Tran, Lei Xia, Xinya Du", "tldr": "", "abstract": "Existing benchmarks for multi-hop question answering (QA) primarily evaluate models based on their ability to reason about entities and the relationships between them. However, there's a lack of insight into how these models perform in terms of both events and entities. In this paper, we introduce a novel semi-automatic question generation strategy by composing event structures from information extraction (IE) datasets and present the first Multi-hop Event-centric Question Answering (MEQA) benchmark. It contains (1) 2,243 challenging questions that require a diverse range of complex reasoning over entity-entity, entity-event, and event-event relations; (2) corresponding multi-step QA-format event reasoning chain (explanation) which leads to the answer for each question. We also introduce two metrics for evaluating explanations: completeness and logical consistency. We conduct comprehensive benchmarking and analysis, which shows that MEQA is challenging for the latest state-of-the-art models encompassing large language models (LLMs); and how they fall short of providing faithful explanations of the event-centric reasoning process.", "keywords": "Multi-hop Question Answering;Event-centric Reasoning;Natural Language Reasoning", "primary_area": "", "supplementary_material": "/attachment/1990db28ce45c233834c5f620ac4b6e3ac76df72.pdf", "author": "Ruosen Li;Zimu Wang;Son Quoc Tran;Lei Xia;Xinya Du", "authorids": "~Ruosen_Li1;~Zimu_Wang3;~Son_Quoc_Tran1;~Lei_Xia2;~Xinya_Du1", "gender": "M;M;M;M;M", "homepage": ";https://www.zimuwang.net;https://sonqt.github.io;https://xinyadu.github.io;", "dblp": "351/0775;;317/1208;200/8114;", "google_scholar": "tN-RVAkAAAAJ;0EzXWPgAAAAJ;2c0UVdwAAAAJ;R-lKQqkAAAAJ;TSequwgAAAAJ", "orcid": ";;;;0009-0003-4691-5468", "linkedin": "ruosenli/?locale=en_US;;son-tran-nlp08/;;brian-lei-xia/", "or_profile": "~Ruosen_Li1;~Zimu_Wang3;~Son_Quoc_Tran1;~Xinya_Du1;~Lei_XIA1", "aff": "University of Texas at Dallas;University of Texas at Dallas;Cornell University;University of Texas at Dallas;University of Hong Kong", "aff_domain": "utd.edu;utdallas.edu;cornell.edu;utdallas.edu;hku.hk", "position": "PhD student;Visiting student;PhD student;Assistant Professor;MS student", "bibtex": "@inproceedings{\nli2024meqa,\ntitle={{MEQA}: A Benchmark for Multi-hop Event-centric Question Answering with Explanations},\nauthor={Ruosen Li and Zimu Wang and Son Quoc Tran and Lei Xia and Xinya Du},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=snNuvAOQxB}\n}", "github": "", "reviewers": "spJ2;DJfr;sD7f", "pdf_size": 5762011, "rating": "5;6;7", "confidence": "4;4;4", "wc_summary_and_contributions": "105;52;49", "wc_strengths": "91;64;4", "wc_improvement": "142;374;13", "wc_limitations": "4;12;13", "wc_correctness": "29;7;1", "wc_clarity": "25;5;1", "wc_relation_to_prior_work": "1;24;3", "wc_documentation": "1;15;1", "wc_additional_feedback": "1;1;1", "wc_review": "399;554;86", "wc_reply_reviewers": "0;20;0", "wc_reply_authors": "138;0;0", "reply_reviewers": "0;1;0", "reply_authors": "4;1;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 4.0, 0.0 ], "wc_summary_and_contributions_avg": [ 68.66666666666667, 25.72072229848057 ], "wc_strengths_avg": [ 53.0, 36.359317925395686 ], "wc_improvement_avg": [ 176.33333333333334, 149.36383617045848 ], "wc_limitations_avg": [ 9.666666666666666, 4.027681991198191 ], "wc_correctness_avg": [ 12.333333333333334, 12.036980056845191 ], "wc_clarity_avg": [ 10.333333333333334, 10.498677165349081 ], "wc_relation_to_prior_work_avg": [ 9.333333333333334, 10.402991022884823 ], "wc_documentation_avg": [ 5.666666666666667, 6.599663291074443 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 346.3333333333333, 194.65582161571456 ], "wc_reply_reviewers_avg": [ 6.666666666666667, 9.428090415820632 ], "wc_reply_authors_avg": [ 46.0, 65.05382386916237 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 1.4142135623730951 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5617336321821190106&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 3, "email": "utd.edu;utdallas.edu;cornell.edu;utdallas.edu;hku.hk", "author_num": 5, "aff_unique_index": "0;0;1;0;2", "aff_unique_norm": "University of Texas at Dallas;Cornell University;University of Hong Kong", "aff_unique_dep": ";;", "aff_unique_url": "https://www.utdallas.edu;https://www.cornell.edu;https://www.hku.hk", "aff_unique_abbr": "UT Dallas;Cornell;HKU", "aff_campus_unique_index": "0;0;0;2", "aff_campus_unique": "Dallas;;Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "United States;China" }, { "title": "Adapting Diffusion Models for Improved Prompt Compliance and Controllable Image Synthesis", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93374", "id": "sntv8Ac3U2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=sntv8Ac3U2", "openreview": "https://openreview.net/forum?id=sntv8Ac3U2", "poster": "/media/PosterPDFs/NeurIPS%202024/93374.png?t=1733353967.96722", "project": "", "author_site": "Deepak Sridhar, Abhishek Peri, Rohith Rachala, Nuno Vasconcelos", "tldr": "", "abstract": "Recent advances in generative modeling with diffusion processes (DPs) enabled breakthroughs in image synthesis. Despite impressive image quality, these models have various prompt compliance problems, including low recall in generating multiple objects, difficulty in generating text in images, and meeting constraints like object locations and pose. For fine-grained editing and manipulation, they also require fine-grained semantic or instance maps that are tedious to produce manually. While prompt compliance can be enhanced by addition of loss functions at inference, this is time consuming and does not scale to complex scenes. \n To overcome these limitations, this work introduces a new family of $\\textit{Factor Graph Diffusion Models}$ (FG-DMs) that models the joint distribution of images and conditioning variables, such as semantic, sketch, depth or normal maps via a factor graph decomposition. This joint structure has several advantages, including support for efficient sampling based prompt compliance schemes, which produce images of high object recall, semi-automated fine-grained editing, explainability at intermediate levels, ability to produce labeled datasets for the training of downstream models such as segmentation or depth, training with missing data, and continual learning where new conditioning variables can be added with minimal or no modifications to the existing structure. We propose an implementation of FG-DMs by adapting a pre-trained Stable Diffusion (SD) model to implement all FG-DM factors, using only COCO dataset, and show that it is effective in generating images with 15\\% higher recall than SD while retaining its generalization ability. We introduce an attention distillation loss that encourages consistency among the attention maps of all factors, improving the fidelity of the generated conditions and image. We also show that training FG-DMs from scratch on MM-CelebA-HQ, Cityscapes, ADE20K, and COCO produce images of high quality (FID) and diversity (LPIPS).", "keywords": "Image Synthesis; Controllable 2D/3D Synthesis; Diffusion", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Deepak Sridhar;Abhishek Peri;Rohith Reddy Rachala;Nuno Vasconcelos", "authorids": "~Deepak_Sridhar1;~Abhishek_Peri1;~Rohith_Reddy_Rachala1;~Nuno_Vasconcelos1", "gender": "M;M;M;M", "homepage": "https://deepaksridhar.github.io/;;https://rohithreddy0087.github.io/;http://www.svcl.ucsd.edu/~nuno/", "dblp": "224/8563;;;78/4806", "google_scholar": "https://scholar.google.ca/citations?user=3s6c6GcAAAAJ;wJ0IFFIAAAAJ;;Fykyo9gAAAAJ", "orcid": "0000-0003-4395-7366;;0009-0007-7623-3707;0000-0002-9024-4302", "linkedin": "deepak-sridhar/;abhishek-peri/;rohith-reddy-rachala-2890bb1a6/;", "or_profile": "~Deepak_Sridhar1;~Abhishek_Peri1;~Rohith_Reddy_Rachala1;~Nuno_Vasconcelos1", "aff": "University of California, San Diego;University of California, San Diego;University of California, San Diego;University of California, San Diego", "aff_domain": "ucsd.edu;ucsd.edu;ucsd.edu;ucsd.edu", "position": "PhD student;MS student;MS student;Professor", "bibtex": "@inproceedings{\nsridhar2024adapting,\ntitle={Adapting Diffusion Models for Improved Prompt Compliance and Controllable Image Synthesis},\nauthor={Deepak Sridhar and Abhishek Peri and Rohith Reddy Rachala and Nuno Vasconcelos},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=sntv8Ac3U2}\n}", "github": "", "reviewers": "t2kD;Xxm8;cyg3;F3xm", "pdf_size": 27658905, "rating": "5;5;5;6", "confidence": "3;4;4;4", "soundness": "4;4;3;3", "novelty": "2;3;3;3", "presentation": "3;2;3;3", "wc_summary": "71;77;13;52", "wc_strengths": "40;50;14;40", "wc_weaknesses": "20;22;138;56", "wc_questions": "64;158;2;14", "wc_limitations": "1;7;1;1", "wc_review": "196;314;168;163", "wc_reply_reviewers": "12;12;54;12", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 53.25, 25.00374971879218 ], "wc_strengths_avg": [ 36.0, 13.341664064126334 ], "wc_weaknesses_avg": [ 59.0, 47.80167361086848 ], "wc_questions_avg": [ 59.5, 61.438994132391194 ], "wc_limitations_avg": [ 2.5, 2.598076211353316 ], "wc_review_avg": [ 210.25, 61.206106721470206 ], "wc_reply_reviewers_avg": [ 22.5, 18.186533479473212 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:dXSaf35BCeUJ:scholar.google.com/&scioq=Adapting+Diffusion+Models+for+Improved+Prompt+Compliance+and+Controllable+Image+Synthesis&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "ucsd.edu;ucsd.edu;ucsd.edu;ucsd.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of California, San Diego", "aff_unique_dep": "", "aff_unique_url": "https://www.ucsd.edu", "aff_unique_abbr": "UCSD", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "San Diego", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "The Iterative Optimal Brain Surgeon: Faster Sparse Recovery by Leveraging Second-Order Information", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93373", "id": "snxWD0Q4EI", "proceeding": "", "pdf": "https://openreview.net/pdf?id=snxWD0Q4EI", "openreview": "https://openreview.net/forum?id=snxWD0Q4EI", "poster": "/media/PosterPDFs/NeurIPS%202024/93373.png?t=1731710792.2754478", "project": "", "author_site": "Diyuan Wu, Ionut-Vlad Modoranu, Mher Safaryan, Denis Kuznedelev, Dan Alistarh", "tldr": "", "abstract": "The rising footprint of machine learning has led to a focus on imposing model sparsity as a means of reducing computational and memory costs. For deep neural networks (DNNs), the state-of-the-art accuracy-vs-sparsity is achieved by heuristics inspired by the classical Optimal Brain Surgeon (OBS) framework [LeCun et al., 1989, Hassibi and Stork, 1992, Hassibi et al., 1993], which leverages loss curvature information to make better pruning decisions. Yet, these results still lack a solid theoretical understanding, and it is unclear whether they can be improved by leveraging connections to the wealth of work on sparse recovery algorithms. In this paper, we draw new connections between these two areas and present new sparse recovery algorithms inspired by the OBS framework that come with theoretical guarantees under reasonable assumptions and have strong practical performance. Specifically, our work starts from the observation that we can leverage curvature information in OBS-like fashion upon the projection step of classic iterative sparse recovery algorithms such as IHT. We show for the first time that this leads both to improved convergence bounds in well-behaved settings and to stronger practical convergence. Furthermore, we present extensions of this approach to training accurate sparse DNNs, and validate it experimentally at scale.", "keywords": "Optimal Brain Surgeon;Sparse Recovery;Pruning;Second-Order Optimization", "primary_area": "optimization", "supplementary_material": "/attachment/f4069493f220c28e304c85c9b9434d9b12fd1682.zip", "author": "Diyuan Wu;Ionut-Vlad Modoranu;Mher Safaryan;Denis Kuznedelev;Dan Alistarh", "authorids": "~Diyuan_Wu1;~Ionut-Vlad_Modoranu1;~Mher_Safaryan1;~Denis_Kuznedelev1;~Dan_Alistarh7", "gender": ";M;M;M;M", "homepage": "https://diyuanwu.github.io/;;https://mher-safaryan.github.io;https://github.com/Godofnothing;http://people.csail.mit.edu/alistarh/", "dblp": "284/7961;275/9983;259/1444;322/8616;36/3251.html", "google_scholar": "MztcJLMAAAAJ;N56bz4gAAAAJ;dJNwgT8AAAAJ;;https://scholar.google.com.tw/citations?user=75q-6ZQAAAAJ", "orcid": ";;0000-0001-6290-1398;0009-0005-2420-9620;", "linkedin": "diyuan-wu-267795175/;ionut-vlad-modoranu/;mher-safaryan-94565a257/;;", "or_profile": "~Diyuan_Wu1;~Ionut-Vlad_Modoranu1;~Mher_Safaryan1;~Denis_Kuznedelev1;~Dan_Alistarh1", "aff": "Institute of Science and Technology Austria;Institute of Science and Technology Austria;Institute of Science and Technology;Yandex;Institute of Science and Technology", "aff_domain": "ista.ac.at;ist.ac.at;ist.ac.at;yandex-team.ru;ist.ac.at", "position": "PhD student;PhD student;Postdoc;Researcher;Full Professor", "bibtex": "@inproceedings{\nwu2024the,\ntitle={The Iterative Optimal Brain Surgeon: Faster Sparse Recovery by Leveraging Second-Order Information},\nauthor={Diyuan Wu and Ionut-Vlad Modoranu and Mher Safaryan and Denis Kuznedelev and Dan Alistarh},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=snxWD0Q4EI}\n}", "github": "", "reviewers": "T9TW;Dt8W;3y7g;JoZw", "pdf_size": 575606, "rating": "6;6;7;7", "confidence": "3;2;4;4", "soundness": "3;3;3;3", "novelty": "2;3;3;2", "presentation": "4;3;4;2", "wc_summary": "35;141;65;120", "wc_strengths": "134;64;7;141", "wc_weaknesses": "334;52;10;571", "wc_questions": "138;44;122;99", "wc_limitations": "29;6;9;16", "wc_review": "670;307;213;947", "wc_reply_reviewers": "23;37;0;16", "wc_reply_authors": "0;0;0;38", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 90.25, 42.28105367655825 ], "wc_strengths_avg": [ 86.5, 54.89307788783573 ], "wc_weaknesses_avg": [ 241.75, 227.28217593995356 ], "wc_questions_avg": [ 100.75, 35.57650207651112 ], "wc_limitations_avg": [ 15.0, 8.860022573334675 ], "wc_review_avg": [ 534.25, 293.1018381040965 ], "wc_reply_reviewers_avg": [ 19.0, 13.322912594474229 ], "wc_reply_authors_avg": [ 9.5, 16.454482671904334 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.9045340337332909, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:F9Hf8CMSsXUJ:scholar.google.com/&scioq=The+Iterative+Optimal+Brain+Surgeon:+Faster+Sparse+Recovery+by+Leveraging+Second-Order+Information&hl=en&as_sdt=0,33", "gs_version_total": 5, "email": "ista.ac.at;ist.ac.at;ist.ac.at;yandex-team.ru;ist.ac.at", "author_num": 5, "aff_unique_index": "0;0;1;2;1", "aff_unique_norm": "Institute of Science and Technology Austria;Institute of Science and Technology;Yandex", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ist.ac.at;;https://yandex.com", "aff_unique_abbr": "IST Austria;;Yandex", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;2", "aff_country_unique": "Austria;;Russian Federation" }, { "title": "Interpretable Generalized Additive Models for Datasets with Missing Values", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93372", "id": "soUXmwL5aK", "proceeding": "", "pdf": "https://openreview.net/pdf?id=soUXmwL5aK", "openreview": "https://openreview.net/forum?id=soUXmwL5aK", "poster": "/media/PosterPDFs/NeurIPS%202024/93372.png?t=1733516971.2438521", "project": "", "author_site": "Hayden McTavish, Jon Donnelly, Margo Seltzer, Cynthia Rudin", "tldr": "", "abstract": "Many important datasets contain samples that are missing one or more feature values. Maintaining the interpretability of machine learning models in the presence of such missing data is challenging. Singly or multiply imputing missing values complicates the model\u2019s mapping from features to labels. On the other hand, reasoning on indicator variables that represent missingness introduces a potentially large number of additional terms, sacrificing sparsity. We solve these problems with M-GAM, a sparse, generalized, additive modeling approach that incorporates missingness indicators and their interaction terms while maintaining sparsity through $\\ell_0$ regularization. We show that M-GAM provides similar or superior accuracy to prior methods while significantly improving sparsity relative to either imputation or na\u00efve inclusion of indicator variables.", "keywords": "Interpretability;Missing Data;Generalized Additive Models;Sparsity", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Hayden McTavish;Jon Donnelly;Margo Seltzer;Cynthia Rudin", "authorids": "~Hayden_McTavish1;~Jon_Donnelly1;~Margo_Seltzer1;~Cynthia_Rudin1", "gender": ";M;;", "homepage": ";;;", "dblp": ";307/5438;;", "google_scholar": ";https://scholar.google.com/citations?hl=en;;", "orcid": ";0000-0002-3971-1075;;", "linkedin": ";;;", "or_profile": "~Hayden_McTavish1;~Jon_Donnelly1;~Margo_Seltzer1;~Cynthia_Rudin1", "aff": ";Duke University;;", "aff_domain": ";duke.edu;;", "position": ";PhD student;;", "bibtex": "@inproceedings{\nmctavish2024interpretable,\ntitle={Interpretable Generalized Additive Models for Datasets with Missing Values},\nauthor={Hayden McTavish and Jon Donnelly and Margo Seltzer and Cynthia Rudin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=soUXmwL5aK}\n}", "github": "", "reviewers": "W4vf;fLWT;i65A;63xg;fBin", "pdf_size": 4424704, "rating": "5;5;5;6;6", "confidence": "3;3;3;4;3", "soundness": "2;3;3;3;3", "novelty": "2;3;2;2;2", "presentation": "2;2;3;3;3", "wc_summary": "87;40;73;50;64", "wc_strengths": "21;76;18;33;16", "wc_weaknesses": "12;62;74;197;43", "wc_questions": "1;8;209;44;86", "wc_limitations": "1;1;15;4;35", "wc_review": "122;187;389;328;244", "wc_reply_reviewers": "12;16;268;15;35", "wc_reply_authors": "6;36;616;45;46", "reply_reviewers": "1;1;1;2;1", "reply_authors": "2;2;2;3;3", "rating_avg": [ 5.4, 0.48989794855663565 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 62.8, 16.581917862539303 ], "wc_strengths_avg": [ 32.8, 22.39106964841117 ], "wc_weaknesses_avg": [ 77.6, 63.26642079334029 ], "wc_questions_avg": [ 69.6, 75.96999407660896 ], "wc_limitations_avg": [ 11.2, 12.967652061957862 ], "wc_review_avg": [ 254.0, 95.61798993913227 ], "wc_reply_reviewers_avg": [ 69.2, 99.73043667807737 ], "wc_reply_authors_avg": [ 149.8, 233.5503371866545 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 2.4, 0.4898979485566356 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.6123724356957945, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11272152757360907852&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": ";duke.edu;;", "author_num": 4, "aff_unique_index": "0", "aff_unique_norm": "Duke University", "aff_unique_dep": "", "aff_unique_url": "https://www.duke.edu", "aff_unique_abbr": "Duke", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Synthesize, Partition, then Adapt: Eliciting Diverse Samples from Foundation Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93371", "id": "sp8wHIsnu9", "proceeding": "", "pdf": "https://openreview.net/pdf?id=sp8wHIsnu9", "openreview": "https://openreview.net/forum?id=sp8wHIsnu9", "poster": "/media/PosterPDFs/NeurIPS%202024/93371.png?t=1733821716.0709531", "project": "", "author_site": "Yeming Wen, Swarat Chaudhuri", "tldr": "", "abstract": "Presenting users with diverse responses from foundation models is crucial for enhancing user experience and accommodating varying preferences. \nHowever, generating multiple high-quality and diverse responses without sacrificing accuracy remains a challenge, especially when using greedy sampling. \nIn this work, we propose a novel framework, Synthesize-Partition-Adapt (SPA), that leverages the abundant synthetic data available in many domains to elicit diverse responses from foundation models.\nBy leveraging signal provided by data attribution methods such as influence functions, SPA partitions data into subsets, each targeting unique aspects of the data, and trains multiple model adaptations optimized for these subsets.\nExperimental results demonstrate the effectiveness of our approach in diversifying foundation model responses while maintaining high quality, showcased through the HumanEval and MBPP tasks in the code generation domain and several tasks in the natural language understanding domain, highlighting its potential to enrich user experience across various applications.", "keywords": "diverse generation;instruction fine-tuning;synthetic dataset", "primary_area": "generative_models", "supplementary_material": "/attachment/74efa63cc0126d4585af401fbd484de88642d848.zip", "author": "Yeming Wen;Swarat Chaudhuri", "authorids": "~Yeming_Wen1;~Swarat_Chaudhuri1", "gender": "M;M", "homepage": "https://www.cs.utexas.edu/~ywen/;http://www.cs.utexas.edu/~swarat", "dblp": "https://dblp.uni-trier.de/pers/hd/w/Wen:Yeming;37/6100", "google_scholar": "J2GzNAkAAAAJ;9j6RBYQAAAAJ", "orcid": ";0000-0002-6859-1391", "linkedin": ";swarat-chaudhuri-609b3092/", "or_profile": "~Yeming_Wen1;~Swarat_Chaudhuri1", "aff": "University of Texas, Austin;University of Texas at Austin", "aff_domain": "utexas.edu;utexas.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nwen2024synthesize,\ntitle={Synthesize, Partition, then Adapt: Eliciting Diverse Samples from Foundation Models},\nauthor={Yeming Wen and Swarat Chaudhuri},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=sp8wHIsnu9}\n}", "github": "", "reviewers": "4Daa;j8SA;9mQS;BWst", "pdf_size": 928297, "rating": "4;4;6;8", "confidence": "3;5;3;4", "soundness": "3;1;3;3", "novelty": "2;1;3;4", "presentation": "3;2;4;3", "wc_summary": "59;37;140;111", "wc_strengths": "22;18;92;74", "wc_weaknesses": "26;94;214;364", "wc_questions": "62;284;302;8", "wc_limitations": "1;1;24;14", "wc_review": "170;434;772;571", "wc_reply_reviewers": "93;144;54;35", "wc_reply_authors": "630;333;146;33", "reply_reviewers": "2;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 5.5, 1.6583123951777 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.5, 1.118033988749895 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 86.75, 40.831207427652686 ], "wc_strengths_avg": [ 51.5, 32.16753021293366 ], "wc_weaknesses_avg": [ 174.5, 128.45524512451797 ], "wc_questions_avg": [ 164.0, 130.56033088193365 ], "wc_limitations_avg": [ 10.0, 9.669539802906858 ], "wc_review_avg": [ 486.75, 218.84854922982697 ], "wc_reply_reviewers_avg": [ 81.5, 41.70431632337353 ], "wc_reply_authors_avg": [ 285.5, 225.91646686330768 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.0909090909090909, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:p9fO9FuxtWAJ:scholar.google.com/&scioq=Synthesize,+Partition,+then+Adapt:+Eliciting+Diverse+Samples+from+Foundation+Models&hl=en&as_sdt=0,31", "gs_version_total": 3, "email": "utexas.edu;utexas.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Verified Code Transpilation with LLMs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93370", "id": "spwE9sLrfg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=spwE9sLrfg", "openreview": "https://openreview.net/forum?id=spwE9sLrfg", "poster": "", "project": "", "author_site": "Sahil Bhatia, Jie Qiu, Niranjan Hasabnis, Sanjit Seshia, Alvin Cheung", "tldr": "", "abstract": "Domain-specific languages (DSLs) have become integral to various software workflows. Such languages offer domain-specific optimizations and abstractions that improve code readability and maintainability. However, leveraging these languages requires developers to rewrite existing code using the specific DSL's API. While large language models (LLMs) have shown some success in automatic code transpilation, none of them provide any functional correctness guarantees on the rewritten code. Another approach for automating this task is verified lifting, which relies on program synthesis to find programs in the target language that are functionally equivalent to the source language program. While several verified lifting tools have been developed for various application domains, they are specialized for specific source-target languages or require significant expertise in domain knowledge to make the search efficient. In this paper, leveraging recent advances in LLMs, we propose an LLM-based approach (LLMLift) to building verified lifting tools. We use the LLM's capabilities to reason about programs to translate a given program into its corresponding equivalent in the target language. Additionally, we use LLMs to generate proofs for functional equivalence. We develop lifting-based compilers for four DSLs targeting different application domains. Our approach not only outperforms previous symbolic-based tools in number of benchmarks transpiled and transpilation time, but also requires significantly less effort to build.", "keywords": "Program Synthesis;Compilers;Formal Methods;LLMs", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Sahil Bhatia;Jie Qiu;Niranjan Hasabnis;Sanjit A. Seshia;Alvin Cheung", "authorids": "~Sahil_Bhatia3;~Jie_Qiu2;~Niranjan_Hasabnis1;~Sanjit_A._Seshia1;~Alvin_Cheung2", "gender": ";F;;;", "homepage": ";;;;", "dblp": ";;;;", "google_scholar": ";;p8vutGkAAAAJ;;", "orcid": ";;;;", "linkedin": ";jie-qiu-52a771152/;;;", "or_profile": "~Sahil_Bhatia3;~Jie_Qiu2;~Niranjan_Hasabnis1;~Sanjit_A._Seshia1;~Alvin_Cheung2", "aff": ";University of California, Berkeley;Intel;;", "aff_domain": ";berkeley.edu;intel.com;;", "position": ";Intern;Research Scientist;;", "bibtex": "@inproceedings{\nbhatia2024verified,\ntitle={Verified Code Transpilation with {LLM}s},\nauthor={Sahil Bhatia and Jie Qiu and Niranjan Hasabnis and Sanjit A. Seshia and Alvin Cheung},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=spwE9sLrfg}\n}", "github": "", "reviewers": "ZcXM;2Wxn;9szn;KVm8", "pdf_size": 533762, "rating": "3;3;4;7", "confidence": "4;3;4;3", "soundness": "2;3;2;3", "novelty": "3;2;3;3", "presentation": "2;3;2;3", "wc_summary": "138;120;52;67", "wc_strengths": "57;70;60;75", "wc_weaknesses": "314;266;88;54", "wc_questions": "10;7;92;97", "wc_limitations": "12;394;12;11", "wc_review": "531;857;304;304", "wc_reply_reviewers": "95;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 4.25, 1.6393596310755 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 94.25, 35.723766598722484 ], "wc_strengths_avg": [ 65.5, 7.297259759663212 ], "wc_weaknesses_avg": [ 180.5, 111.45739096174825 ], "wc_questions_avg": [ 51.5, 43.04939023958411 ], "wc_limitations_avg": [ 107.25, 165.55569304617705 ], "wc_review_avg": [ 499.0, 226.51600384961765 ], "wc_reply_reviewers_avg": [ 23.75, 41.13620667976084 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.45749571099781405, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10345636094114250667&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": ";berkeley.edu;intel.com;;", "author_num": 5, "aff_unique_index": "0;1", "aff_unique_norm": "University of California, Berkeley;Intel", "aff_unique_dep": ";Intel Corporation", "aff_unique_url": "https://www.berkeley.edu;https://www.intel.com", "aff_unique_abbr": "UC Berkeley;Intel", "aff_campus_unique_index": "0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "DI-MaskDINO: A Joint Object Detection and Instance Segmentation Model", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93369", "id": "srQxkSPJLW", "proceeding": "", "pdf": "https://openreview.net/pdf?id=srQxkSPJLW", "openreview": "https://openreview.net/forum?id=srQxkSPJLW", "poster": "/media/PosterPDFs/NeurIPS%202024/93369.png?t=1731644126.5835745", "project": "", "author_site": "Zhixiong Nan, Li Xianghong, Tao Xiang, Jifeng Dai", "tldr": "", "abstract": "This paper is motivated by an interesting phenomenon: the performance of object detection lags behind that of instance segmentation (i.e., performance imbalance) when investigating the intermediate results from the beginning transformer decoder layer of MaskDINO (i.e., the SOTA model for joint detection and segmentation). This phenomenon inspires us to think about a question: will the performance imbalance at the beginning layer of transformer decoder constrain the upper bound of the final performance? With this question in mind, we further conduct qualitative and quantitative pre-experiments, which validate the negative impact of detection-segmentation imbalance issue on the model performance. To address this issue, this paper proposes DI-MaskDINO model, the core idea of which is to improve the final performance by alleviating the detection-segmentation imbalance. DI-MaskDINO is implemented by configuring our proposed De-Imbalance (DI) module and Balance-Aware Tokens Optimization (BATO) module to MaskDINO. DI is responsible for generating balance-aware query, and BATO uses the balance-aware query to guide the optimization of the initial feature tokens. The balance-aware query and optimized feature tokens are respectively taken as the Query and Key&Value of transformer decoder to perform joint object detection and instance segmentation. DI-MaskDINO outperforms existing joint object detection and instance segmentation models on COCO and BDD100K benchmarks, achieving +1.2 $AP^{box}$ and +0.9 $AP^{mask}$ improvements compared to SOTA joint detection and segmentation model MaskDINO. In addition, DI-MaskDINO also obtains +1.0 $AP^{box}$ improvement compared to SOTA object detection model DINO and +3.0 $AP^{mask}$ improvement compared to SOTA segmentation model Mask2Former.", "keywords": "object detection;instance segmentation", "primary_area": "machine_vision", "supplementary_material": "", "author": "Zhixiong Nan;Xianghong Li;Tao Xiang;Jifeng Dai", "authorids": "~Zhixiong_Nan2;~Xianghong_Li1;~Tao_Xiang2;~Jifeng_Dai1", "gender": "M;F;M;M", "homepage": ";https://github.com/xianghong2023;;https://jifengdai.org/", "dblp": "187/2031;;22/4460-1.html;14/9399", "google_scholar": "9ywMAekAAAAJ;;https://scholar.google.com/citations?hl=en;SH_-B_AAAAAJ", "orcid": ";;0000-0002-9439-4623;", "linkedin": ";;;", "or_profile": "~Zhixiong_Nan2;~Xianghong_Li1;~Tao_Xiang2;~Jifeng_Dai1", "aff": "Chongqing University;Chongqing University;Chongqing University;Tsinghua University", "aff_domain": "cqu.edu.cn;cqu.edu.cn;cqu.edu.cn;tsinghua.edu.cn", "position": "Associate Professor;MS student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nnan2024dimaskdino,\ntitle={{DI}-Mask{DINO}: A Joint Object Detection and Instance Segmentation Model},\nauthor={Zhixiong Nan and Xianghong Li and Tao Xiang and Jifeng Dai},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=srQxkSPJLW}\n}", "github": "", "reviewers": "KbFf;TyXi;aN5C;5ayP", "pdf_size": 1672024, "rating": "5;5;5;7", "confidence": "4;4;4;4", "soundness": "3;2;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "35;52;54;83", "wc_strengths": "34;48;28;102", "wc_weaknesses": "128;6;256;24", "wc_questions": "18;70;63;40", "wc_limitations": "6;1;8;84", "wc_review": "221;177;409;333", "wc_reply_reviewers": "29;19;0;15", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 56.0, 17.24818831066034 ], "wc_strengths_avg": [ 53.0, 29.206163733020468 ], "wc_weaknesses_avg": [ 103.5, 99.60296180335202 ], "wc_questions_avg": [ 47.75, 20.44963324854507 ], "wc_limitations_avg": [ 24.75, 34.30287888793009 ], "wc_review_avg": [ 285.0, 91.43303560529968 ], "wc_reply_reviewers_avg": [ 15.75, 10.425329730996522 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:qAVqzBC7wvQJ:scholar.google.com/&scioq=DI-MaskDINO:+A+Joint+Object+Detection+and+Instance+Segmentation+Model&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "cqu.edu.cn;cqu.edu.cn;cqu.edu.cn;tsinghua.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Chongqing University;Tsinghua University", "aff_unique_dep": ";", "aff_unique_url": "https://www.cqu.edu.cn;https://www.tsinghua.edu.cn", "aff_unique_abbr": "CQU;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Toward Efficient Inference for Mixture of Experts", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93368", "id": "stXtBqyTWX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=stXtBqyTWX", "openreview": "https://openreview.net/forum?id=stXtBqyTWX", "poster": "/media/PosterPDFs/NeurIPS%202024/93368.png?t=1733173453.8031597", "project": "", "author_site": "Haiyang Huang, Newsha Ardalani, Anna Sun, Liu Ke, Shruti Bhosale, Hsien-Hsin Lee, Carole-Jean Wu, Benjamin Lee", "tldr": "", "abstract": "Mixture-of-Experts (MoE) models have recently gained steam in achieving the state-of-the-art performance in a wide range of tasks in computer vision and natural language processing. They effectively expand the model capacity while incurring a minimal increase in computation cost during training. However, deploying such models for inference is difficult due to their large model size and complex communication pattern. In this work, we provide a characterization of two MoE workloads, namely Language Modeling (LM) and Machine Translation (MT) and identify their sources of inefficiencies at deployment. We propose three optimization techniques to mitigate sources of inefficiencies, namely (1) Dynamic gating, (2) Expert Buffering, and (3) Expert load balancing. We show that dynamic gating improves maximum throughput by 6.21-11.55$\\times$ for LM, 5.75-10.98$\\times$ for MT Encoder and 2.58-5.71$\\times$ for MT Decoder.\nIt also reduces memory usage by up to 1.36$\\times$ for LM and up to 1.1$\\times$ for MT. We further propose Expert Buffering, a new caching mechanism that only keeps hot, active experts in GPU memory while buffering the rest in CPU memory. This reduces static memory allocation by 1.47$\\times$. Finally, we propose a load balancing methodology that provides additional robustness to the workload. Our code is available at https://github.com/hyhuang00/moe_inference.", "keywords": "Mixture-of-Experts;inference", "primary_area": "infrastructure", "supplementary_material": "", "author": "Haiyang Huang;Newsha Ardalani;Anna Sun;Liu Ke;Shruti Bhosale;Hsien-Hsin S. Lee;Carole-Jean Wu;Benjamin Lee", "authorids": "~Haiyang_Huang2;~Newsha_Ardalani1;~Anna_Sun1;~Liu_Ke1;~Shruti_Bhosale1;~Hsien-Hsin_S._Lee1;~Carole-Jean_Wu2;~Benjamin_Lee3", "gender": ";F;F;F;;;F;", "homepage": ";;;;https://ai.facebook.com/people/shruti-bhosale/;;;https://www.seas.upenn.edu/~leebcc/", "dblp": ";53/7913.html;292/8268.html;;136/9081;;26/9655;", "google_scholar": ";w-y4MOcAAAAJ;Ky_pMLQAAAAJ;c8W5RkcAAAAJ;69JJbWoAAAAJ;;S1szbyAAAAAJ;4Tnj6PcAAAAJ", "orcid": ";;0000-0002-6212-1313;;;;;", "linkedin": ";;;;shrutibhosale/;;;", "or_profile": "~Haiyang_Huang2;~Newsha_Ardalani1;~Anna_Sun1;~Liu_Ke1;~Shruti_Bhosale1;~Hsien-Hsin_S._Lee1;~Carole-Jean_Wu2;~Benjamin_Lee3", "aff": ";Meta AI;Meta Facebook;;Meta Facebook;;Meta;University of Pennsylvania", "aff_domain": ";meta.com;fb.com;;fb.com;;meta.com;upenn.edu", "position": ";Researcher;Researcher;;Research Engineer;;Researcher;Full Professor", "bibtex": "@inproceedings{\nhuang2024toward,\ntitle={Toward Efficient Inference for Mixture of Experts},\nauthor={Haiyang Huang and Newsha Ardalani and Anna Sun and Liu Ke and Shruti Bhosale and Hsien-Hsin S. Lee and Carole-Jean Wu and Benjamin Lee},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=stXtBqyTWX}\n}", "github": "", "reviewers": "JXaG;t1yX;dmgJ;BGdS", "pdf_size": 3664693, "rating": "5;6;7;7", "confidence": "3;2;3;4", "soundness": "3;2;4;4", "novelty": "2;2;4;3", "presentation": "2;2;3;3", "wc_summary": "61;71;133;88", "wc_strengths": "33;22;88;76", "wc_weaknesses": "78;86;70;74", "wc_questions": "54;17;2;1", "wc_limitations": "74;7;7;1", "wc_review": "300;203;300;240", "wc_reply_reviewers": "11;21;12;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 88.25, 27.580563808595358 ], "wc_strengths_avg": [ 54.75, 27.851166941440713 ], "wc_weaknesses_avg": [ 77.0, 5.916079783099616 ], "wc_questions_avg": [ 18.5, 21.453437952924933 ], "wc_limitations_avg": [ 22.25, 29.978117018918983 ], "wc_review_avg": [ 260.75, 41.372545244400904 ], "wc_reply_reviewers_avg": [ 11.0, 7.44983221287567 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.42640143271122083, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7416114251849268803&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": ";meta.com;fb.com;;fb.com;;meta.com;upenn.edu", "author_num": 8, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Meta;University of Pennsylvania", "aff_unique_dep": "Meta AI;", "aff_unique_url": "https://meta.com;https://www.upenn.edu", "aff_unique_abbr": "Meta;UPenn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Learning-Augmented Dynamic Submodular Maximization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93367", "id": "stY80vVBS8", "proceeding": "", "pdf": "https://openreview.net/pdf?id=stY80vVBS8", "openreview": "https://openreview.net/forum?id=stY80vVBS8", "poster": "", "project": "", "author_site": "Arpit Agarwal, Eric Balkanski", "tldr": "", "abstract": "In dynamic submodular maximization, the goal is to maintain a high-value solution over a sequence of element insertions and deletions with a fast update time. Motivated by large-scale applications and the fact that dynamic data often exhibits patterns, we ask the following question: can predictions be used to accelerate the update time of dynamic submodular maximization algorithms? \n\nWe consider the model for dynamic algorithms with predictions where predictions regarding the insertion and deletion times of elements can be used for preprocessing. Our main result is an algorithm with an $O(\\text{poly}(\\log \\eta, \\log w, \\log k))$ amortized update time over the sequence of updates that achieves a $1/2 - \\epsilon$ approximation for dynamic monotone submodular maximization under a cardinality constraint $k$, where the prediction error $\\eta$ is the number of elements that are not inserted and deleted within $w$ time steps of their predicted insertion and deletion times. This amortized update time is independent of the length of the stream and instead depends on the prediction error.", "keywords": "Submodular maximization;algorithms with predictions;dynamic algorithms", "primary_area": "optimization", "supplementary_material": "", "author": "Arpit Agarwal;Eric Balkanski", "authorids": "~Arpit_Agarwal2;~Eric_Balkanski2", "gender": ";", "homepage": ";http://ericbalkanski.com", "dblp": ";", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "~Arpit_Agarwal2;~Eric_Balkanski2", "aff": ";Columbia University", "aff_domain": ";columbia.edu", "position": ";Assistant Professor", "bibtex": "@inproceedings{\nagarwal2024learningaugmented,\ntitle={Learning-Augmented Dynamic Submodular Maximization},\nauthor={Arpit Agarwal and Eric Balkanski},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=stY80vVBS8}\n}", "github": "", "reviewers": "L8zN;T4wq;a2JE;A7Fx", "pdf_size": 497318, "rating": "4;6;7;8", "confidence": "3;4;3;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;4;3;4", "wc_summary": "54;337;182;107", "wc_strengths": "38;92;19;172", "wc_weaknesses": "310;126;32;49", "wc_questions": "6;58;34;23", "wc_limitations": "1;1;2;10", "wc_review": "409;614;269;361", "wc_reply_reviewers": "0;11;5;5", "wc_reply_authors": "41;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.25, 1.479019945774904 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 170.0, 106.60440891445344 ], "wc_strengths_avg": [ 80.25, 59.356444468987526 ], "wc_weaknesses_avg": [ 129.25, 110.20293780113124 ], "wc_questions_avg": [ 30.25, 18.872930350107268 ], "wc_limitations_avg": [ 3.5, 3.774917217635375 ], "wc_review_avg": [ 413.25, 126.34946576855796 ], "wc_reply_reviewers_avg": [ 5.25, 3.897114317029974 ], "wc_reply_authors_avg": [ 10.25, 17.75352077758099 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.50709255283711, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11463601622881834286&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": ";columbia.edu", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Columbia University", "aff_unique_dep": "", "aff_unique_url": "https://www.columbia.edu", "aff_unique_abbr": "Columbia", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "On the Expressive Power of Tree-Structured Probabilistic Circuits", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93366", "id": "suYAAOI5bd", "proceeding": "", "pdf": "https://openreview.net/pdf?id=suYAAOI5bd", "openreview": "https://openreview.net/forum?id=suYAAOI5bd", "poster": "/media/PosterPDFs/NeurIPS%202024/93366.png?t=1731736283.9385722", "project": "", "author_site": "Lang Yin, Han Zhao", "tldr": "", "abstract": "Probabilistic circuits (PCs) have emerged as a powerful framework compactly representing probability distributions for efficient and exact probabilistic inference. It has been shown that PCs with general directed acyclic graph (DAG) structure can be understood as a mixture of exponentially (in its height) many components, each of which is a product distributions over univariate marginals. However, existing structure learning algorithms for PCs often generate tree-structured circuits, or using tree-structured circuits as intermediate steps to compress them into DAG-structured circuits. This leads to an intriguing question on whether there exists an exponential gap between DAGs and trees for the PC structure.\n\nIn this paper, we provide a negative answer to this conjecture by proving that, for $n$ variables, there is a quasi-polynomial upper bound $n^{O(\\log n)}$ on the size of an equivalent tree computing the same probability distribution. On the other hand, we will also show that given a depth restriction on the tree, there is a super-polynomial separation between tree and DAG-structured PCs. Our work takes an important step towards understanding the expressive power of tree-structured PCs, and our techniques may be of independent interest in the study of structure learning algorithms for PCs.", "keywords": "Probabilistic circuits;Circuit complexities;Network polynomials;Probabilistic models", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Lang Yin;Han Zhao", "authorids": "~Lang_Yin1;~Han_Zhao1", "gender": "M;M", "homepage": ";https://hanzhaoml.github.io/", "dblp": "324/7991.html;03/3520-2", "google_scholar": ";x942ipYAAAAJ", "orcid": ";0000-0002-8579-1600", "linkedin": "lang-yin-813222a0/;", "or_profile": "~Lang_Yin1;~Han_Zhao1", "aff": "University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign", "aff_domain": "illinois.edu;illinois.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nyin2024on,\ntitle={On the Expressive Power of Tree-Structured Probabilistic Circuits},\nauthor={Lang Yin and Han Zhao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=suYAAOI5bd}\n}", "github": "", "reviewers": "Tza1;x52f;PAKa", "pdf_size": 413294, "rating": "6;6;7", "confidence": "3;3;4", "soundness": "3;3;4", "novelty": "3;2;3", "presentation": "3;3;3", "wc_summary": "113;112;90", "wc_strengths": "33;31;70", "wc_weaknesses": "89;64;1", "wc_questions": "199;53;1", "wc_limitations": "7;9;1", "wc_review": "441;269;163", "wc_reply_reviewers": "9;11;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;0", "reply_authors": "1;1;1", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 105.0, 10.614455552060438 ], "wc_strengths_avg": [ 44.666666666666664, 17.93197020841702 ], "wc_weaknesses_avg": [ 51.333333333333336, 37.02551672683163 ], "wc_questions_avg": [ 84.33333333333333, 83.81461023526201 ], "wc_limitations_avg": [ 5.666666666666667, 3.39934634239519 ], "wc_review_avg": [ 291.0, 114.55420841971134 ], "wc_reply_reviewers_avg": [ 6.666666666666667, 4.784233364802441 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.9999999999999997, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16794036777932613428&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "illinois.edu;illinois.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Illinois Urbana-Champaign", "aff_unique_dep": "", "aff_unique_url": "https://illinois.edu", "aff_unique_abbr": "UIUC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Urbana-Champaign", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Learning Action and Reasoning-Centric Image Editing from Videos and Simulation", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97473", "id": "sw9iOHGxgm", "proceeding": "", "pdf": "https://openreview.net/pdf?id=sw9iOHGxgm", "openreview": "https://openreview.net/forum?id=sw9iOHGxgm", "poster": "/media/PosterPDFs/NeurIPS%202024/97473.png?t=1733771154.1878757", "project": "", "author_site": "Benno Krojer, Dheeraj Vattikonda, Luis Lara, Varun Jampani, Eva Portelance, Chris Pal, Siva Reddy", "tldr": "", "abstract": "An image editing model should be able to perform diverse edits, ranging from object replacement, changing attributes or style, to performing actions or movement, which require many forms of reasoning. Current *general* instruction-guided editing models have significant shortcomings with action and reasoning-centric edits.\nObject, attribute or stylistic changes can be learned from visually static datasets. On the other hand, high-quality data for action and reasoning-centric edits is scarce and has to come from entirely different sources that cover e.g. physical dynamics, temporality and spatial reasoning.\nTo this end, we meticulously curate the **A**U**RO**R**A** Dataset (**A**ction-**R**easoning-**O**bject-**A**ttribute), a collection of high-quality training data, human-annotated and curated from videos and simulation engines.\nWe focus on a key aspect of quality training data: triplets (source image, prompt, target image) contain a single meaningful visual change described by the prompt, i.e., *truly minimal* changes between source and target images.\nTo demonstrate the value of our dataset, we evaluate an **A**U**RO**R**A**-finetuned model on a new expert-curated benchmark (**A**U**RO**R**A-Bench**) covering 8 diverse editing tasks.\nOur model significantly outperforms previous editing models as judged by human raters.\nFor automatic evaluations, we find important flaws in previous metrics and caution their use for semantically hard editing tasks.\nInstead, we propose a new automatic metric that focuses on discriminative understanding.\nWe hope that our efforts : (1) curating a quality training dataset and an evaluation benchmark, (2) developing critical evaluations, and (3) releasing a state-of-the-art model, will fuel further progress on general image editing.", "keywords": "image editing;video;reasoning;grounding;vision+language;world model;multi-modality", "primary_area": "", "supplementary_material": "", "author": "Benno Krojer;Dheeraj Vattikonda;Luis Lara;Varun Jampani;Eva Portelance;Christopher Pal;Siva Reddy", "authorids": "~Benno_Krojer1;~Dheeraj_Vattikonda1;~Luis_Lara1;~Varun_Jampani2;~Eva_Portelance1;~Christopher_Pal1;~Siva_Reddy1", "gender": "M;M;M;F;;M;M", "homepage": "https://www.bennokrojer.com/;;https://www.luislara.dev/;https://evaportelance.github.io/;https://scholar.google.ca/citations?user=1ScWJOoAAAAJ&hl=en&oi=ao;http://sivareddy.in;https://varunjampani.github.io/", "dblp": "280/0462.html;;;;45/1217;64/8153;124/2785", "google_scholar": "D5eyaLwAAAAJ;https://scholar.google.com/citations?view_op=new_profile;fAvcgmQAAAAJ;;https://scholar.google.ca/citations?user=1ScWJOoAAAAJ;;1Cv6Sf4AAAAJ", "orcid": ";;;;;;", "linkedin": ";;ludolara;;;;", "or_profile": "~Benno_Krojer1;~Dheeraj_Vattikonda1;~Luis_Lara1;~Eva_Portelance1;~Christopher_Pal1;~Siva_Reddy1;~Varun_Jampani1", "aff": "Meta Facebook;McGill University, McGill University;Mila - Quebec Artificial Intelligence Institute;McGill University;Polytechnique Montreal;Mila, McGill University;Stability AI", "aff_domain": "meta.com;mail.mcgill.ca;mila.quebec;mcgill.ca;polymtl.ca;mila.quebec;stability.ai", "position": "Intern;MS student;Researcher;Postdoc;Full Professor;Assistant Professor;Principal Researcher", "bibtex": "@inproceedings{\nkrojer2024learning,\ntitle={Learning Action and Reasoning-Centric Image Editing from Videos and Simulation},\nauthor={Benno Krojer and Dheeraj Vattikonda and Luis Lara and Varun Jampani and Eva Portelance and Christopher Pal and Siva Reddy},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=sw9iOHGxgm}\n}", "github": "", "reviewers": "W52U;yzTK;XmHR;TXfY", "pdf_size": 16984312, "rating": "7;8;8;9", "confidence": "3;4;4;4", "wc_summary_and_contributions": "68;66;155;70", "wc_strengths": "22;175;168;77", "wc_improvement": "23;94;49;151", "wc_limitations": "105;1;71;17", "wc_correctness": "4;8;1;34", "wc_clarity": "28;6;1;62", "wc_relation_to_prior_work": "5;8;50;65", "wc_documentation": "14;10;11;33", "wc_additional_feedback": "1;1;1;1", "wc_review": "270;369;507;510", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 8.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 89.75, 37.69864055904404 ], "wc_strengths_avg": [ 110.5, 64.07222487162437 ], "wc_improvement_avg": [ 79.25, 48.59205181920187 ], "wc_limitations_avg": [ 48.5, 41.674332628129754 ], "wc_correctness_avg": [ 11.75, 13.083864108129525 ], "wc_clarity_avg": [ 24.25, 24.045529730076648 ], "wc_relation_to_prior_work_avg": [ 32.0, 26.06722079547415 ], "wc_documentation_avg": [ 17.0, 9.354143466934854 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 414.0, 100.77946219344496 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11008682701630562727&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "meta.com;mail.mcgill.ca;mila.quebec;mcgill.ca;polymtl.ca;mila.quebec;stability.ai", "author_num": 7, "aff_unique_index": "0;1;2;1;3;1;4", "aff_unique_norm": "Meta;McGill University;Quebec Artificial Intelligence Institute;Polytechnique Montreal;Stability AI", "aff_unique_dep": "Meta Platforms, Inc.;;Artificial Intelligence;;", "aff_unique_url": "https://meta.com;https://www.mcgill.ca;https://mila.quebec;https://www.polymtl.ca;https://stability.ai", "aff_unique_abbr": "Meta;McGill;Mila;PolyMTL;Stability AI", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Montreal", "aff_country_unique_index": "0;1;1;1;1;1;0", "aff_country_unique": "United States;Canada" }, { "title": "Off-Policy Selection for Initiating Human-Centric Experimental Design", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93365", "id": "swp3lPDmZe", "proceeding": "", "pdf": "https://openreview.net/pdf?id=swp3lPDmZe", "openreview": "https://openreview.net/forum?id=swp3lPDmZe", "poster": "/media/PosterPDFs/NeurIPS%202024/93365.png?t=1731660464.0368555", "project": "", "author_site": "Ge Gao, Xi Yang, Qitong Gao, Song Ju, Miroslav Pajic, Min Chi", "tldr": "", "abstract": "In human-centric applications like healthcare and education, the \\textit{heterogeneity} among patients and students necessitates personalized treatments and instructional interventions. While reinforcement learning (RL) has been utilized in those tasks, off-policy selection (OPS) is pivotal to close the loop by offline evaluating and selecting policies without online interactions, yet current OPS methods often overlook the heterogeneity among participants. Our work is centered on resolving a \\textit{pivotal challenge} in human-centric systems (HCSs): \\textbf{\\textit{how to select a policy to deploy when a new participant joining the cohort, without having access to any prior offline data collected over the participant?}} We introduce First-Glance Off-Policy Selection (FPS), a novel approach that systematically addresses participant heterogeneity through sub-group segmentation and tailored OPS criteria to each sub-group. By grouping individuals with similar traits, FPS facilitates personalized policy selection aligned with unique characteristics of each participant or group of participants. FPS is evaluated via two important but challenging applications, intelligent tutoring systems and a healthcare application for sepsis treatment and intervention. FPS presents significant advancement in enhancing learning outcomes of students and in-hospital care outcomes.", "keywords": "Off-policy selection (OPS);Offline reinforcement learning and OPS for human-centric experimental design;intelligent tutoring;sepsis treatments", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "/attachment/920c1edfa3b3591233cdac766a10e647b3529334.zip", "author": "Ge Gao;Xi Yang;Qitong Gao;Song Ju;Miroslav Pajic;Min Chi", "authorids": "~Ge_Gao4;~Xi_Yang11;~Qitong_Gao1;~Song_Ju1;~Miroslav_Pajic2;~Min_Chi1", "gender": ";;M;M;M;", "homepage": "https://gegao.tech/;;http://qitonggao.com;;http://people.duke.edu/~mp275/;", "dblp": ";;238/5422;;74/7446.html;", "google_scholar": "d_WL-9cAAAAJ;;Flv4SrsAAAAJ;;Fbn21-8AAAAJ;", "orcid": "0000-0002-3474-8637;;;;;", "linkedin": ";;qitong-gao;song-ju-936513134/;;", "or_profile": "~Ge_Gao4;~Xi_Yang11;~Qitong_Gao1;~Song_Ju1;~Miroslav_Pajic2;~Min_Chi1", "aff": "North Carolina State University;;Duke University;;Duke University;", "aff_domain": "ncsu.edu;;duke.edu;;duke.edu;", "position": "PhD student;;PhD student;;Associate Professor;", "bibtex": "@inproceedings{\ngao2024offpolicy,\ntitle={Off-Policy Selection for Initiating Human-Centric Experimental Design},\nauthor={Ge Gao and Xi Yang and Qitong Gao and Song Ju and Miroslav Pajic and Min Chi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=swp3lPDmZe}\n}", "github": "", "reviewers": "2e7j;aKHU;wmCx", "pdf_size": 1288091, "rating": "3;6;6", "confidence": "4;3;3", "soundness": "2;3;4", "novelty": "2;3;3", "presentation": "2;3;3", "wc_summary": "66;105;90", "wc_strengths": "72;68;191", "wc_weaknesses": "296;79;183", "wc_questions": "77;112;10", "wc_limitations": "25;9;38", "wc_review": "536;373;512", "wc_reply_reviewers": "190;19;0", "wc_reply_authors": "774;92;70", "reply_reviewers": "2;1;0", "reply_authors": "3;3;2", "rating_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 87.0, 16.06237840420901 ], "wc_strengths_avg": [ 110.33333333333333, 57.0633176587396 ], "wc_weaknesses_avg": [ 186.0, 88.61527332614095 ], "wc_questions_avg": [ 66.33333333333333, 42.3188951756646 ], "wc_limitations_avg": [ 24.0, 11.86029791643813 ], "wc_review_avg": [ 473.6666666666667, 71.8532455000391 ], "wc_reply_reviewers_avg": [ 69.66666666666667, 85.4413379004696 ], "wc_reply_authors_avg": [ 312.0, 326.8067726756388 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:YywkxhZN_Z8J:scholar.google.com/&scioq=Off-Policy+Selection+for+Initiating+Human-Centric+Experimental+Design&hl=en&as_sdt=0,10", "gs_version_total": 3, "email": "ncsu.edu;;duke.edu;;duke.edu;", "author_num": 6, "aff_unique_index": "0;1;1", "aff_unique_norm": "North Carolina State University;Duke University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ncsu.edu;https://www.duke.edu", "aff_unique_abbr": "NCSU;Duke", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "sxZlp9ZoHD", "title": "Retentive Network", "track": "main", "status": "Reject", "tldr": "", "abstract": "In this work, we propose Retentive Network (RetNet) as a foundation architecture for large language models, simultaneously achieving training parallelism, low-cost inference, and good performance. We theoretically derive the connection between recurrence and attention. Then we propose the retention mechanism for sequence modeling, which supports three computation paradigms, i.e., parallel, recurrent, and chunkwise recurrent. Specifically, the parallel representation allows for training parallelism. The recurrent representation enables low-cost \n inference, which improves decoding throughput, latency, and GPU memory without sacrificing performance. The chunkwise recurrent representation facilitates efficient long-sequence modeling with linear complexity, where each chunk is encoded parallelly while recurrently summarizing the chunks. Experimental results on language modeling show that RetNet achieves favorable scaling results, parallel training, low-cost deployment, and efficient inference.", "keywords": "Retentive Network;Model Architecture", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Yutao Sun;Li Dong;Shaohan Huang;Shuming Ma;Yuqing Xia;Jilong Xue;Jianyong Wang;Furu Wei", "authorids": "~Yutao_Sun1;~Li_Dong1;~Shaohan_Huang1;~Shuming_Ma1;~Yuqing_Xia1;~Jilong_Xue2;~Jianyong_Wang2;~Furu_Wei1", "gender": "M;M;M;;;M;M;M", "homepage": "https://sunyt32.github.io/;http://dong.li;;https://www.microsoft.com/en-us/research/people/shumma/;https://github.com/xiayuqing0622;;http://dbgroup.cs.tsinghua.edu.cn/wangjy/;https://www.microsoft.com/en-us/research/people/fuwei/", "dblp": "01/9758;85/5090-4;176/0380;;211/8365;06/10336.html;24/2006;72/5870", "google_scholar": "apGDooYAAAAJ;wEfQgPgAAAAJ;;;3UAveGYAAAAJ;xKI6HXgAAAAJ;VfBaiG8AAAAJ;G-V1VpwAAAAJ", "orcid": ";;;;;;0000-0002-7555-170X;", "linkedin": ";;;;;;;", "or_profile": "~Yutao_Sun1;~Li_Dong1;~Shaohan_Huang1;~Shuming_Ma1;~Yuqing_Xia1;~Jilong_Xue2;~Jianyong_Wang2;~Furu_Wei1", "aff": "Tsinghua University;Microsoft Research;Microsoft;Microsoft;Microsoft Research;Microsoft Research;Tsinghua University;Microsoft Research", "aff_domain": "tsinghua.edu.cn;microsoft.com;microsoft.com;microsoft.com;research.microsoft.com;microsoft.com;tsinghua.edu.cn;microsoft.com", "position": "PhD student;Principal Researcher;Researcher;Researcher;Researcher;Principal Researcher;Full Professor;Distinguished Scientist", "bibtex": "@misc{\nanonymous2024retentive,\ntitle={Retentive Network},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=sxZlp9ZoHD}\n}", "github": "", "project": "", "reviewers": "maEf;ZaX6;EVGT", "site": "https://openreview.net/forum?id=sxZlp9ZoHD", "pdf_size": 580722, "rating": "4;5;6", "confidence": "5;4;4", "soundness": "2;3;3", "novelty": "2;3;3", "presentation": "2;3;3", "wc_summary": "22;120;67", "wc_strengths": "26;42;39", "wc_weaknesses": "236;155;46", "wc_questions": "16;138;77", "wc_limitations": "23;1;5", "wc_review": "323;456;234", "wc_reply_reviewers": "0;12;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;1;0", "reply_authors": "1;1;1", "rating_avg": [ 5.0, 0.816496580927726 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 69.66666666666667, 40.052743004970615 ], "wc_strengths_avg": [ 35.666666666666664, 6.944222218666553 ], "wc_weaknesses_avg": [ 145.66666666666666, 77.84742913046148 ], "wc_questions_avg": [ 77.0, 49.80629143659129 ], "wc_limitations_avg": [ 9.666666666666666, 9.568466729604882 ], "wc_review_avg": [ 337.6666666666667, 91.2225605623716 ], "wc_reply_reviewers_avg": [ 4.0, 5.656854249492381 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:-dcyLsSB5qAJ:scholar.google.com/&scioq=Retentive+Network&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;1;1;1;1;0;1", "aff_unique_norm": "Tsinghua University;Microsoft", "aff_unique_dep": ";Microsoft Research", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "THU;MSR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1;1;0;1", "aff_country_unique": "China;United States" }, { "title": "UniFL: Improve Latent Diffusion Model via Unified Feedback Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93364", "id": "sy2SmstDOB", "proceeding": "", "pdf": "https://openreview.net/pdf?id=sy2SmstDOB", "openreview": "https://openreview.net/forum?id=sy2SmstDOB", "poster": "/media/PosterPDFs/NeurIPS%202024/93364.png?t=1731661800.3784761", "project": "", "author_site": "Jiacheng Zhang, Jie Wu, Yuxi Ren, Xin Xia, Huafeng Kuang, Pan Xie, Jiashi Li, Xuefeng Xiao, Weilin Huang, Shilei Wen, Lean Fu, Guanbin Li", "tldr": "", "abstract": "Latent diffusion models (LDM) have revolutionized text-to-image generation, leading to the proliferation of various advanced models and diverse downstream applications. However, despite these significant advancements, current diffusion models still suffer from several limitations, including inferior visual quality, inadequate aesthetic appeal, and inefficient inference, without a comprehensive solution in sight. To address these challenges, we present **UniFL**, a unified framework that leverages feedback learning to enhance diffusion models comprehensively. UniFL stands out as a universal, effective, and generalizable solution applicable to various diffusion models, such as SD1.5 and SDXL.\nNotably, UniFL consists of three key components: perceptual feedback learning, which enhances visual quality; decoupled feedback learning, which improves aesthetic appeal; and adversarial feedback learning, which accelerates inference.\nIn-depth experiments and extensive user studies validate the superior performance of our method in enhancing generation quality and inference acceleration. For instance, UniFL surpasses ImageReward by 17\\% user preference in terms of generation quality and outperforms LCM and SDXL Turbo by 57\\% and 20\\% general preference with 4-step inference.", "keywords": "Diffusion Model;Feedback Learning;Acceleration", "primary_area": "generative_models", "supplementary_material": "", "author": "Jiacheng Zhang;Jie Wu;Yuxi Ren;Xin Xia;Huafeng Kuang;Pan Xie;Jiashi Li;Xuefeng Xiao;Weilin Huang;Shilei Wen;Lean Fu;Guanbin Li", "authorids": "~Jiacheng_Zhang5;~Jie_Wu8;~Yuxi_Ren1;~Xin_Xia1;~Huafeng_Kuang1;~Pan_Xie1;~Jiashi_Li1;~Xuefeng_Xiao1;~Weilin_Huang1;~Shilei_Wen1;~Lean_Fu1;~Guanbin_Li2", "gender": "M;M;M;;M;M;M;M;M;M;M;M", "homepage": "https://github.com/Zhangjiacheng144;https://github.com/WuJie1010;;;https://panxiaoxie.cn;;;http://www.whuang.org/;;https://www.bagevent.com/event/5870609?sId=34982;http://guanbinli.com;", "dblp": ";;06/2072-5;251/3442;78/6247;241/9364;245/9547;;159/2939;225/5157;126/4457;299/8261", "google_scholar": "QmdyVQ0AAAAJ;MxvLqLcAAAAJ;https://scholar.google.com.sg/citations?hl=zh-CN;https://scholar.google.com/citations?hl=zh-CN;Z-0EqtgAAAAJ;JrWHtn8AAAAJ;;78vU1IUAAAAJ;zKtYrHYAAAAJ;;2A2Bx2UAAAAJ;", "orcid": ";;;;;;;0000-0002-1520-4140;;;0000-0002-2486-2890;", "linkedin": ";;;;;;;;;;;", "or_profile": "~Jiacheng_Zhang5;~Jie_Wu8;~Xin_Xia1;~Huafeng_Kuang1;~Pan_Xie1;~Jiashi_Li1;~Xuefeng_Xiao1;~Weilin_Huang1;~Shilei_Wen1;~Lean_Fu1;~Guanbin_Li2;~Ren_Yuxi1", "aff": "SUN YAT-SEN UNIVERSITY;ByteDance Inc.;Bytedance;Xiamen University;ByteDance Inc.;Bytedance;ByteDance;Alibaba Group;bytedance;Bytedance;SUN YAT-SEN UNIVERSITY;ByteDance Inc.", "aff_domain": "sysu.edu.cn;bytedance.com;bytedance.com;xmu.edu.cn;bytedance.com;bytedance.com;bytedance.com;alibaba-inc.com;bytedance.com;bytedance.com;sysu.edu.cn;bytedance.com", "position": "MS student;Researcher;Researcher;Researcher;Researcher;Researcher;Researcher;Director;Researcher;Principal Researcher;Associate Professor;Researcher", "bibtex": "@inproceedings{\nzhang2024unifl,\ntitle={Uni{FL}: Improve Latent Diffusion Model via Unified Feedback Learning},\nauthor={Jiacheng Zhang and Jie Wu and Yuxi Ren and Xin Xia and Huafeng Kuang and Pan Xie and Jiashi Li and Xuefeng Xiao and Weilin Huang and Shilei Wen and Lean Fu and Guanbin Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=sy2SmstDOB}\n}", "github": "", "reviewers": "qtQN;nwEj;af1S;dHXQ", "pdf_size": 28038836, "rating": "4;5;6;6", "confidence": "4;4;2;5", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "2;3;3;3", "wc_summary": "33;138;110;110", "wc_strengths": "40;32;52;112", "wc_weaknesses": "197;47;53;133", "wc_questions": "2;55;42;5", "wc_limitations": "2;6;1;29", "wc_review": "274;278;258;389", "wc_reply_reviewers": "0;24;16;17", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 97.75, 39.092038831455184 ], "wc_strengths_avg": [ 59.0, 31.416556144810016 ], "wc_weaknesses_avg": [ 107.5, 61.82839153657485 ], "wc_questions_avg": [ 26.0, 22.9891278651453 ], "wc_limitations_avg": [ 9.5, 11.412712210513327 ], "wc_review_avg": [ 299.75, 52.06906471216859 ], "wc_reply_reviewers_avg": [ 14.25, 8.78564169540279 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": -0.2075143391598224, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4809521931159042233&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "sysu.edu.cn;bytedance.com;bytedance.com;xmu.edu.cn;bytedance.com;bytedance.com;bytedance.com;alibaba-inc.com;bytedance.com;bytedance.com;sysu.edu.cn;bytedance.com", "author_num": 12, "aff_unique_index": "0;1;1;2;1;1;1;3;1;1;0;1", "aff_unique_norm": "Sun Yat-sen University;ByteDance;Xiamen University;Alibaba Group", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.sysu.edu.cn;https://www.bytedance.com;https://www.xmu.edu.cn;https://www.alibaba.com", "aff_unique_abbr": "SYSU;ByteDance;XMU;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Multi-LLM Debate: Framework, Principals, and Interventions", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93363", "id": "sy7eSEXdPC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=sy7eSEXdPC", "openreview": "https://openreview.net/forum?id=sy7eSEXdPC", "poster": "/media/PosterPDFs/NeurIPS%202024/93363.png?t=1731615368.460714", "project": "", "author_site": "Andrew Estornell, Yang Liu", "tldr": "", "abstract": "The flexible and generalized nature of large language models has allowed for their application in a wide array of language-based domains.\nMuch like their human contemporaries, these models are capable of engaging in discussions and debates as a means of improving answer quality.\nWe first take a theoretical approach to analyzing debate and provide a framework through which debate can be mathematically examined.\nBuilding on this framework, we provide several theoretical results for multi-agent debate.\nIn particular, we demonstrate that similar model capabilities, or similar model responses, can result in static debate dynamics where the debate procedure simply converges to the majority opinion. \nWhen this majority opinion is the result of a common misconception (ingrained in the models through shared training data) debate is likely to converge to answers associated with that common misconception.\nUsing insights from our theoretical results we then propose three interventions which improve the efficacy of debate. \nFor each intervention, we provide theoretical results demonstrating how debate is improved.\nWe also demonstrate that these interventions result in better performance on four common benchmark tasks.", "keywords": "multi-agent debate;LLM", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Andrew Estornell;Yang Liu", "authorids": "~Andrew_Estornell1;~Yang_Liu3", "gender": ";M", "homepage": ";http://www.yliuu.com", "dblp": ";51/3710-18", "google_scholar": ";jKrIVCIAAAAJ", "orcid": ";0000-0001-8420-6011", "linkedin": ";", "or_profile": "~Andrew_Estornell1;~Yang_Liu3", "aff": ";University of California, Santa Cruz", "aff_domain": ";ucsc.edu", "position": ";Assistant Professor", "bibtex": "@inproceedings{\nestornell2024multillm,\ntitle={Multi-{LLM} Debate: Framework, Principals, and Interventions},\nauthor={Andrew Estornell and Yang Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=sy7eSEXdPC}\n}", "github": "", "reviewers": "1QQG;6kQa;iBgJ;C8as", "pdf_size": 1919331, "rating": "4;6;6;8", "confidence": "5;4;3;4", "soundness": "2;3;2;4", "novelty": "2;3;2;3", "presentation": "3;2;3;3", "wc_summary": "48;38;71;122", "wc_strengths": "48;38;83;63", "wc_weaknesses": "310;264;126;41", "wc_questions": "69;40;11;76", "wc_limitations": "24;11;16;30", "wc_review": "499;391;307;332", "wc_reply_reviewers": "247;56;34;22", "wc_reply_authors": "371;148;0;0", "reply_reviewers": "3;1;1;1", "reply_authors": "3;2;1;1", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 69.75, 32.45285041410076 ], "wc_strengths_avg": [ 58.0, 16.95582495781317 ], "wc_weaknesses_avg": [ 185.25, 107.33446557373824 ], "wc_questions_avg": [ 49.0, 25.758493744782516 ], "wc_limitations_avg": [ 20.25, 7.292976072907411 ], "wc_review_avg": [ 382.25, 73.98437335005278 ], "wc_reply_reviewers_avg": [ 89.75, 91.60342515430304 ], "wc_reply_authors_avg": [ 129.75, 151.8261752794952 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2167510986890664627&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 0, "email": ";ucsc.edu", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "University of California, Santa Cruz", "aff_unique_dep": "", "aff_unique_url": "https://www.ucsc.edu", "aff_unique_abbr": "UCSC", "aff_campus_unique_index": "0", "aff_campus_unique": "Santa Cruz", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Can LLMs Solve Molecule Puzzles? A Multimodal Benchmark for Molecular Structure Elucidation", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97472", "id": "t1mAXb4Cop", "proceeding": "", "pdf": "https://openreview.net/pdf?id=t1mAXb4Cop", "openreview": "https://openreview.net/forum?id=t1mAXb4Cop", "poster": "/media/PosterPDFs/NeurIPS%202024/97472.png?t=1731727779.3919237", "project": "", "author_site": "Kehan Guo, Bozhao Nan, Yujun Zhou, Taicheng Guo, Zhichun Guo, Mihir Surve, Zhenwen Liang, Nitesh Chawla, Olaf Wiest, Xiangliang Zhang", "tldr": "", "abstract": "Large Language Models (LLMs) have shown significant problem-solving capabilities across predictive and generative tasks in chemistry. \nHowever, their proficiency in multi-step chemical reasoning remains underexplored. \nWe introduce a new challenge: molecular structure elucidation, which involves deducing a molecule\u2019s structure from various types of spectral data. Solving such a molecular puzzle, akin to solving crossword puzzles, poses reasoning challenges that require integrating clues from diverse sources and engaging in iterative hypothesis testing. To address this challenging problem with LLMs, we present \\textbf{MolPuzzle}, a benchmark comprising 217 instances of structure elucidation, which feature over 23,000 QA samples presented in a sequential puzzle-solving process, involving three interlinked sub-tasks: molecule understanding, spectrum interpretation, and molecule construction. Our evaluation of 12 LLMs reveals that the best-performing LLM, GPT-4o, performs significantly worse than humans, with only a small portion (1.4\\%) of its answers exactly matching the ground truth. However, it performs nearly perfectly in the first subtask of molecule understanding, achieving accuracy close to 100\\%. This discrepancy highlights the potential of developing advanced LLMs with improved chemical reasoning capabilities in the other two sub-tasks. Our MolPuzzle dataset and evaluation code are available at this \\href{https://github.com/KehanGuo2/MolPuzzle}{link}.", "keywords": "Large Language Models;AI for Chemistry", "primary_area": "", "supplementary_material": "/attachment/fb4e8a8b858c3590b6332f0099ba48fa9446f08b.pdf", "author": "Kehan Guo;Bozhao Nan;Yujun Zhou;Taicheng Guo;Zhichun Guo;Mihir Surve;Zhenwen Liang;Nitesh V Chawla;Olaf Wiest;Xiangliang Zhang", "authorids": "~Kehan_Guo1;~Bozhao_Nan1;~Yujun_Zhou1;~Taicheng_Guo1;~Zhichun_Guo1;~Mihir_Surve1;~Zhenwen_Liang1;~Nitesh_V_Chawla1;~Olaf_Wiest1;~Xiangliang_Zhang1", "gender": "M;M;M;M;;;M;;M;F", "homepage": "https://kehanguo2.github.io/KehanGuo/;;https://yujunzhou.github.io/;https://taichengguo.github.io/;;;https://zhenwen-nlp.github.io/;;https://chemistry.nd.edu/people/olaf-wiest/;https://sites.nd.edu/xiangliang-zhang/", "dblp": ";;162/3265-2;325/5109;;;226/6083;;;74/1890-1", "google_scholar": "t8iRCLUAAAAJ;https://scholar.google.com/citations?hl=zh-CN;t0c7rQQAAAAJ;OA_UdcIAAAAJ;;;4rKhF2AAAAAJ;;bfywzJwAAAAJ;BhRJe4wAAAAJ", "orcid": ";;0000-0003-1376-5187;0000-0001-7919-6912;;;;;0000-0001-9316-7720;0000-0002-3574-5665", "linkedin": "kehan98/;;yujun-zhou-zyj/;;;;;;;", "or_profile": "~Kehan_Guo1;~Bozhao_Nan1;~Yujun_Zhou1;~Taicheng_Guo1;~Zhichun_Guo1;~Mihir_Surve1;~Zhenwen_Liang1;~Nitesh_V_Chawla1;~Olaf_Wiest1;~Xiangliang_Zhang1", "aff": "University of Notre Dame;University of Notre Dame;University of Notre Dame;University of Notre Dame;;;University of Notre Dame;;University of Notre Dame;University of Notre Dame", "aff_domain": "nd.edu;nd.edu;nd.edu;nd.edu;;;nd.edu;;nd.edu;nd.edu", "position": "PhD student;PhD student;PhD student;PhD student;;;PhD student;;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nguo2024can,\ntitle={Can {LLM}s Solve Molecule Puzzles? A Multimodal Benchmark for Molecular Structure Elucidation},\nauthor={Kehan Guo and Bozhao Nan and Yujun Zhou and Taicheng Guo and Zhichun Guo and Mihir Surve and Zhenwen Liang and Nitesh V Chawla and Olaf Wiest and Xiangliang Zhang},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=t1mAXb4Cop}\n}", "github": "", "reviewers": "TXir;bdDQ;hyGW", "pdf_size": 2937766, "rating": "7;7;8", "confidence": "3;4;4", "wc_summary_and_contributions": "54;127;83", "wc_strengths": "109;59;2", "wc_improvement": "84;190;18", "wc_limitations": "24;85;5", "wc_correctness": "45;20;1", "wc_clarity": "4;23;3", "wc_relation_to_prior_work": "17;35;1", "wc_documentation": "13;19;5", "wc_additional_feedback": "1;1;1", "wc_review": "351;559;119", "wc_reply_reviewers": "18;14;12", "wc_reply_authors": "1131;151;20", "reply_reviewers": "1;1;1", "reply_authors": "7;3;2", "rating_avg": [ 7.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 88.0, 30.011109054259666 ], "wc_strengths_avg": [ 56.666666666666664, 43.71371511195186 ], "wc_improvement_avg": [ 97.33333333333333, 70.84882183604435 ], "wc_limitations_avg": [ 38.0, 34.1272129929572 ], "wc_correctness_avg": [ 22.0, 18.01850900231944 ], "wc_clarity_avg": [ 10.0, 9.201449161228174 ], "wc_relation_to_prior_work_avg": [ 17.666666666666668, 13.888444437333106 ], "wc_documentation_avg": [ 12.333333333333334, 5.734883511361751 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 343.0, 179.71829808527195 ], "wc_reply_reviewers_avg": [ 14.666666666666666, 2.494438257849294 ], "wc_reply_authors_avg": [ 434.0, 495.74657504280015 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 4.0, 2.160246899469287 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7671737665981933585&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 4, "email": "nd.edu;nd.edu;nd.edu;nd.edu;;;nd.edu;;nd.edu;nd.edu", "author_num": 10, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "University of Notre Dame", "aff_unique_dep": "", "aff_unique_url": "https://www.nd.edu", "aff_unique_abbr": "Notre Dame", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Chat-Scene: Bridging 3D Scene and Large Language Models with Object Identifiers", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93362", "id": "t3BhmwAzhv", "proceeding": "", "pdf": "https://openreview.net/pdf?id=t3BhmwAzhv", "openreview": "https://openreview.net/forum?id=t3BhmwAzhv", "poster": "", "project": "", "author_site": "Haifeng Huang, Yilun Chen, Zehan Wang, Rongjie Huang, Runsen Xu, Tai WANG, Luping Liu, Xize Cheng, Yang Zhao, Jiangmiao Pang, Zhou Zhao", "tldr": "", "abstract": "Recent advancements in 3D Large Language Models (LLMs) have demonstrated promising capabilities for 3D scene understanding. However, previous methods exhibit deficiencies in general referencing and grounding capabilities for intricate scene comprehension. In this paper, we introduce the use of object identifiers and object-centric representations to interact with scenes at the object level. Specifically, we decompose the input 3D scene into a set of object proposals, each assigned a unique identifier token, which enables efficient object referencing and grounding during user-assistant interactions. Given the scarcity of scene-language data, we model the scene embeddings as a sequence of explicit object-level embeddings, derived from semantic-rich 2D or 3D representations. By employing object identifiers, we transform diverse 3D scene-language tasks into a unified question-answering format, facilitating joint training without the need for additional task-specific heads. With minimal fine-tuning on all downstream tasks, our model significantly outperforms existing methods on benchmarks including ScanRefer, Multi3DRefer, Scan2Cap, ScanQA, and SQA3D.", "keywords": "3D Scene Understanding;Multi-modal Large Language Model", "primary_area": "machine_vision", "supplementary_material": "/attachment/6348068a3c035d7af393ad39a6bdf0a6851eacee.zip", "author": "Haifeng Huang;Yilun Chen;Zehan Wang;Rongjie Huang;Runsen Xu;Tai Wang;Luping Liu;Xize Cheng;Yang Zhao;Jiangmiao Pang;Zhou Zhao", "authorids": "~Haifeng_Huang3;~Yilun_Chen1;~Zehan_Wang2;~Rongjie_Huang1;~Runsen_Xu1;~Tai_Wang2;~Luping_Liu2;~Xize_Cheng1;~Yang_Zhao14;~Jiangmiao_Pang1;~Zhou_Zhao3", "gender": "M;M;M;M;M;;;M;M;M;", "homepage": "https://zzzzchs.github.io/;http://yilunchen.com/about/;https://github.com/12zehan17;;;;;https://exgc.github.io/;;https://oceanpang.github.io/;", "dblp": ";;126/7826-1;212/8936-1;289/6916;;;334/2167;50/2082-22;231/7630;", "google_scholar": "oUm2gZUAAAAJ;gKXC9Q8AAAAJ;euXK0lkAAAAJ;iRHBUsgAAAAJ;MOobrCcAAAAJ;;;https://scholar.google.com/citations?hl=zh-CN;;https://scholar.google.com/citations?authuser=0;", "orcid": ";0000-0003-3372-8703;0009-0007-7509-7563;;;;;0000-0001-9708-3225;;0000-0002-6711-9319;", "linkedin": "haifeng-huang-784b2b249/;yilunchen-cuhk/;;;runsen-xu-4262a3272/;;;;;;", "or_profile": "~Haifeng_Huang3;~Yilun_Chen1;~Zehan_Wang2;~Rongjie_Huang1;~Runsen_Xu1;~Tai_Wang2;~Luping_Liu2;~Xize_Cheng1;~Yang_Zhao14;~Jiangmiao_Pang1;~Zhou_Zhao3", "aff": "Zhejiang University;Shanghai Artificial Intelligence Laboratory;Zhejiang University;Zhejiang University;The Chinese University of Hong Kong;;;Zhejiang University;ByteDance Inc.;Shanghai AI Laboratory ;", "aff_domain": "zju.edu.cn;pjlab.org.cn;zju.edu.cn;zju.edu.cn;ie.cuhk.edu;;;zju.edu.cn;bytedance.com;pjlab.org.cn;", "position": "MS student;Researcher;PhD student;MS student;PhD student;;;PhD student;Researcher;Research Scientist;", "bibtex": "@inproceedings{\nhuang2024chatscene,\ntitle={Chat-Scene: Bridging 3D Scene and Large Language Models with Object Identifiers},\nauthor={Haifeng Huang and Yilun Chen and Zehan Wang and Rongjie Huang and Runsen Xu and Tai Wang and Luping Liu and Xize Cheng and Yang Zhao and Jiangmiao Pang and Zhou Zhao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=t3BhmwAzhv}\n}", "github": "", "reviewers": "G2or;2kgC;KdPm;n84p", "pdf_size": 6095874, "rating": "4;5;6;7", "confidence": "4;4;5;4", "soundness": "2;3;3;4", "novelty": "1;2;2;3", "presentation": "2;2;3;3", "wc_summary": "57;46;163;117", "wc_strengths": "42;41;170;126", "wc_weaknesses": "113;150;527;249", "wc_questions": "6;115;156;40", "wc_limitations": "14;17;1;32", "wc_review": "232;369;1017;564", "wc_reply_reviewers": "71;0;305;40", "wc_reply_authors": "236;43;489;17", "reply_reviewers": "1;0;2;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 95.75, 47.304201716126656 ], "wc_strengths_avg": [ 94.75, 55.47690961111659 ], "wc_weaknesses_avg": [ 259.75, 162.11010918508444 ], "wc_questions_avg": [ 79.25, 59.31852577399408 ], "wc_limitations_avg": [ 16.0, 11.022703842524301 ], "wc_review_avg": [ 545.5, 296.68543948094253 ], "wc_reply_reviewers_avg": [ 104.0, 118.74552623151746 ], "wc_reply_authors_avg": [ 196.25, 189.00975503925716 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.2581988897471611, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3251521076949873825&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "zju.edu.cn;pjlab.org.cn;zju.edu.cn;zju.edu.cn;ie.cuhk.edu;;;zju.edu.cn;bytedance.com;pjlab.org.cn;", "author_num": 11, "aff_unique_index": "0;1;0;0;2;0;3;4", "aff_unique_norm": "Zhejiang University;Shanghai Artificial Intelligence Laboratory;Chinese University of Hong Kong;ByteDance;Shanghai AI Laboratory", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.zju.edu.cn;http://www.shailab.org/;https://www.cuhk.edu.hk;https://www.bytedance.com;https://www.shanghai-ai-lab.com", "aff_unique_abbr": "ZJU;Shanghai AI Lab;CUHK;ByteDance;SAIL", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "t3Xj7YD7fL", "title": "An NLP Benchmark Dataset for Predicting the Completeness of ESG Reports", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "Environmental, Social, and Governance (ESG) reports serve as a platform for companies to publicly disclose their economic, environmental, and social impacts, as well as their contributions to sustainable development goals. The completeness of ESG reports is considered a crucial criterion for judging their quality and credibility, yet it is often overlooked in existing literature. This paper aims to comprehensively assess the completeness of ESG reports by evaluating their topic coverage and text quality. To achieve this goal, we propose two classification tasks: topic classification and quality classification for ESG sentences. To train the classifiers, we collected 14,468 ESG reports from Chinese-listed companies. We then segment them into sentences and label 8K of them with both topic and text quality tags. By fine-tuning several large language models (LLMs) on this dataset on the two classification tasks, we find that our dataset has the potential to fill the gap in academia regarding methods for measuring ESG completeness.", "keywords": "ESG; Completeness; Natural language processing; dataset", "primary_area": "", "supplementary_material": "/attachment/eb9adb2adc0100557522e29f05e2f85a49b36996.pdf", "author": "Qi Chang;Xuan Yang;Zihan Ding;Bin Liu;Wei Lan", "authorids": "~Qi_Chang2;~Xuan_Yang4;~Zihan_Ding4;~Bin_Liu11;~Wei_Lan1", "gender": "F;F;F;M;M", "homepage": "https://www.researchgate.net/profile/Qi-Chang-13;;;https://binspage.github.io/;http://www.swufe.edu.cn", "dblp": ";;;35/837-22;", "google_scholar": ";;;U2Ao6lIAAAAJ;", "orcid": ";0000-0002-3508-3331;0009-0004-5509-0581;0000-0002-8917-874X;", "linkedin": ";;;;", "or_profile": "~Qi_Chang2;~Xuan_Yang4;~Zihan_Ding4;~Bin_Liu11;~Wei_Lan1", "aff": "Southwest University of Finance and Economics;Southwest University of Finance and Economics;Southwest University of Finance and Economics;Southwestern University of Finance and Economics;Southwest University of Finance and Economics", "aff_domain": "swufe.edu.cn;swufe.edu.cn;swufe.edu.cn;swufe.edu.cn;swufe.edu.cn", "position": "Lecturer;PhD student;MS student;Associate Professor;Full Professor", "bibtex": "@misc{\nanonymous2024an,\ntitle={An {NLP} Benchmark Dataset for Predicting the Completeness of {ESG} Reports},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=t3Xj7YD7fL}\n}", "github": "", "project": "", "reviewers": "StBq;9fC5;yB1f;8D8P", "site": "https://openreview.net/forum?id=t3Xj7YD7fL", "pdf_size": 2795858, "rating": "3;4;5;5", "confidence": "4;4;4;4", "wc_summary_and_contributions": "16;63;64;83", "wc_strengths": "15;26;51;2", "wc_improvement": "29;54;195;10", "wc_limitations": "36;36;3;25", "wc_correctness": "1;40;1;22", "wc_clarity": "1;31;1;1", "wc_relation_to_prior_work": "1;7;1;1", "wc_documentation": "1;4;1;12", "wc_additional_feedback": "1;1;1;1", "wc_review": "101;262;318;157", "wc_reply_reviewers": "0;0;0;4", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 4.25, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.0 ], "wc_summary_and_contributions_avg": [ 56.5, 24.70323865407125 ], "wc_strengths_avg": [ 23.5, 18.006943105369107 ], "wc_improvement_avg": [ 72.0, 72.70832139445939 ], "wc_limitations_avg": [ 25.0, 13.47219358530748 ], "wc_correctness_avg": [ 16.0, 16.294170736800325 ], "wc_clarity_avg": [ 8.5, 12.99038105676658 ], "wc_relation_to_prior_work_avg": [ 2.5, 2.598076211353316 ], "wc_documentation_avg": [ 4.5, 4.5 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 209.5, 85.23056963320144 ], "wc_reply_reviewers_avg": [ 1.0, 1.7320508075688772 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:_aVa8FL4O6AJ:scholar.google.com/&scioq=An+NLP+Benchmark+Dataset+for+Predicting+the+Completeness+of+ESG+Reports&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Southwest University of Finance and Economics;Southwestern University of Finance and Economics", "aff_unique_dep": ";", "aff_unique_url": "https://www.swufe.edu.cn;https://www.swufe.edu.cn", "aff_unique_abbr": "SWUFE;SWUFE", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "C-GAIL: Stabilizing Generative Adversarial Imitation Learning with Control Theory", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93361", "id": "t4VwoIYBf0", "proceeding": "", "pdf": "https://openreview.net/pdf?id=t4VwoIYBf0", "openreview": "https://openreview.net/forum?id=t4VwoIYBf0", "poster": "/media/PosterPDFs/NeurIPS%202024/93361.png?t=1730696305.5287488", "project": "", "author_site": "Tianjiao Luo, Tim Pearce, Huayu Chen, Jianfei Chen, Jun Zhu", "tldr": "", "abstract": "Generative Adversarial Imitation Learning (GAIL) provides a promising approach to training a generative policy to imitate a demonstrator. It uses on-policy Reinforcement Learning (RL) to optimize a reward signal derived from an adversarial discriminator. However, optimizing GAIL is difficult in practise, with the training loss oscillating during training, slowing convergence. This optimization instability can prevent GAIL from finding a good policy, harming its final performance. In this paper, we study GAIL\u2019s optimization from a control-theoretic perspective. We show that GAIL cannot converge to the desired equilibrium. In response, we analyze the training dynamics of GAIL in function space and design a novel controller that not only pushes GAIL to the desired equilibrium but also achieves asymptotic stability in a simplified \u201cone-step\u201d setting. Going from theory to practice, we propose Controlled-GAIL (C-GAIL), which adds a differentiable regularization term on the GAIL objective to stabilize training. Empirically, the C-GAIL regularizer improves the training of various existing GAIL methods, including the popular GAIL-DAC, by speeding up the convergence, reducing the range of oscillation, and matching the expert distribution more closely.", "keywords": "Reinforcement Learning;Control Theory;Stability Analysis", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/4b31b83c9ba7d4064ab08cd0c37fcba63ca92fbe.zip", "author": "Tianjiao Luo;Tim Pearce;Huayu Chen;Jianfei Chen;Jun Zhu", "authorids": "~Tianjiao_Luo1;~Tim_Pearce1;~Huayu_Chen1;~Jianfei_Chen1;~Jun_Zhu2", "gender": "F;;M;M;M", "homepage": "https://tianjiaoluo.github.io/;;https://chendrag.github.io/;http://ml.cs.tsinghua.edu.cn/~jianfei;http://ml.cs.tsinghua.edu.cn/~jun", "dblp": "240/3012;142/9777;259/3113;48/6809-1;50/2644-1", "google_scholar": "8GETNEsAAAAJ;https://scholar.google.co.uk/citations?user=09k1kdQAAAAJ;0FBCHc4AAAAJ;di5RZ1MAAAAJ;axsP38wAAAAJ", "orcid": ";;;;", "linkedin": "tianjiao-luo-99a60bba/;tim-pearce-3b165b69/;;;", "or_profile": "~Tianjiao_Luo1;~Tim_Pearce1;~Huayu_Chen1;~Jianfei_Chen1;~Jun_Zhu2", "aff": "Tsinghua University;Microsoft Research;Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "mails.tsinghua.edu.cn;research.microsoft.com;tsinghua.edu.cn;tsinghua.edu.cn;mail.tsinghua.edu.cn", "position": "PhD student;Researcher;PhD student;Associate Professor;Professor", "bibtex": "@inproceedings{\nluo2024cgail,\ntitle={C-{GAIL}: Stabilizing Generative Adversarial Imitation Learning with Control Theory},\nauthor={Tianjiao Luo and Tim Pearce and Huayu Chen and Jianfei Chen and Jun Zhu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=t4VwoIYBf0}\n}", "github": "", "reviewers": "9gMV;DQNe;jmhJ;qTQn;PPsU", "pdf_size": 770896, "rating": "5;5;6;7;8", "confidence": "3;3;4;3;5", "soundness": "3;2;3;3;3", "novelty": "2;2;2;3;3", "presentation": "2;2;3;3;3", "wc_summary": "68;132;53;54;97", "wc_strengths": "51;159;57;56;66", "wc_weaknesses": "98;178;134;63;237", "wc_questions": "171;159;81;176;2", "wc_limitations": "11;136;12;7;1", "wc_review": "399;764;337;356;403", "wc_reply_reviewers": "41;0;18;0;39", "wc_reply_authors": "16;0;14;0;22", "reply_reviewers": "1;0;1;0;1", "reply_authors": "2;1;2;1;2", "rating_avg": [ 6.2, 1.16619037896906 ], "confidence_avg": [ 3.6, 0.8 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 80.8, 30.129055743584136 ], "wc_strengths_avg": [ 77.8, 40.88716179927386 ], "wc_weaknesses_avg": [ 142.0, 60.929467419303776 ], "wc_questions_avg": [ 117.8, 67.34805119674957 ], "wc_limitations_avg": [ 33.4, 51.445505148652195 ], "wc_review_avg": [ 451.8, 158.11059420544848 ], "wc_reply_reviewers_avg": [ 19.6, 17.917589123540033 ], "wc_reply_authors_avg": [ 10.4, 8.890444308357148 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.7288689868556626, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14231184936964213373&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "mails.tsinghua.edu.cn;research.microsoft.com;tsinghua.edu.cn;tsinghua.edu.cn;mail.tsinghua.edu.cn", "author_num": 5, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "Tsinghua University;Microsoft", "aff_unique_dep": ";Microsoft Research", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "THU;MSR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "China;United States" }, { "title": "AudioMarkBench: Benchmarking Robustness of Audio Watermarking", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97471", "id": "t6LQXcFTEn", "proceeding": "", "pdf": "https://openreview.net/pdf?id=t6LQXcFTEn", "openreview": "https://openreview.net/forum?id=t6LQXcFTEn", "poster": "", "project": "", "author_site": "Hongbin Liu, Moyang Guo, Zhengyuan Jiang, Lun Wang, Neil Gong", "tldr": "", "abstract": "The increasing realism of synthetic speech, driven by advancements in text-to-speech models, raises ethical concerns regarding impersonation and disinformation. Audio watermarking offers a promising solution via embedding human-imperceptible watermarks into AI-generated audios. However, the robustness of audio watermarking against common/adversarial perturbations remains understudied. We present AudioMarkBench, the first systematic benchmark for evaluating the robustness of audio watermarking against *watermark removal* and *watermark forgery*. AudioMarkBench includes a new dataset created from Common-Voice across languages, biological sexes, and ages, 3 state-of-the-art watermarking methods, and 15 types of perturbations. We benchmark the robustness of these methods against the perturbations in no-box, black-box, and white-box settings. Our findings highlight the vulnerabilities of current watermarking techniques and emphasize the need for more robust and fair audio watermarking solutions. Our dataset and code are publicly available at https://github.com/moyangkuo/AudioMarkBench.", "keywords": "Robustness;audio watermarking", "primary_area": "", "supplementary_material": "", "author": "Hongbin Liu;Moyang Guo;Zhengyuan Jiang;Lun Wang;Neil Zhenqiang Gong", "authorids": "~Hongbin_Liu2;~Moyang_Guo1;~Zhengyuan_Jiang1;~Lun_Wang1;~Neil_Zhenqiang_Gong1", "gender": "M;M;M;;M", "homepage": "https://scholars.duke.edu/person/hongbin.liu;;https://zhengyuan-jiang.github.io/;https://wanglun1996.github.io/;http://people.duke.edu/~zg70/", "dblp": "82/6141-5;;306/8041;;03/9437", "google_scholar": "1Vitx-wAAAAJ;5wPwp7IAAAAJ;https://scholar.google.com.hk/citations?user=egfjpwMAAAAJ;;t6uCsYoAAAAJ", "orcid": ";;0000-0002-4778-7766;;0000-0002-9900-9309", "linkedin": "hongbin-liu-002387158/;moyang-guo-651104330/;zhengyuan-jiang-pb18061262/;;", "or_profile": "~Hongbin_Liu2;~Moyang_Guo1;~Zhengyuan_Jiang1;~Lun_Wang1;~Neil_Gong2", "aff": "Duke University;Duke University, Duke University;Duke University;Google;Duke University", "aff_domain": "duke.edu;ece.duke.edu;duke.edu;google.com;duke.edu", "position": "PhD student;MS student;PhD student;Researcher;Associate Professor", "bibtex": "@inproceedings{\nliu2024audiomarkbench,\ntitle={AudioMarkBench: Benchmarking Robustness of Audio Watermarking},\nauthor={Hongbin Liu and Moyang Guo and Zhengyuan Jiang and Lun Wang and Neil Zhenqiang Gong},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=t6LQXcFTEn}\n}", "github": "", "reviewers": "obkV;M2hh;1ur8", "pdf_size": 2999049, "rating": "4;7;7", "confidence": "3;4;4", "wc_summary_and_contributions": "50;59;28", "wc_strengths": "19;91;27", "wc_improvement": "7;3;38", "wc_limitations": "1;14;7", "wc_correctness": "1;13;6", "wc_clarity": "1;11;6", "wc_relation_to_prior_work": "1;14;9", "wc_documentation": "9;11;15", "wc_additional_feedback": "1;1;1", "wc_review": "90;217;137", "wc_reply_reviewers": "0;10;35", "wc_reply_authors": "54;0;0", "reply_reviewers": "0;1;1", "reply_authors": "2;1;1", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 45.666666666666664, 13.021349989749739 ], "wc_strengths_avg": [ 45.666666666666664, 32.221455929585524 ], "wc_improvement_avg": [ 16.0, 15.641824275533422 ], "wc_limitations_avg": [ 7.333333333333333, 5.312459150169743 ], "wc_correctness_avg": [ 6.666666666666667, 4.921607686744467 ], "wc_clarity_avg": [ 6.0, 4.08248290463863 ], "wc_relation_to_prior_work_avg": [ 8.0, 5.354126134736337 ], "wc_documentation_avg": [ 11.666666666666666, 2.494438257849294 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 148.0, 52.42772803266099 ], "wc_reply_reviewers_avg": [ 15.0, 14.719601443879744 ], "wc_reply_authors_avg": [ 18.0, 25.45584412271571 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13990134611092555912&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 4, "email": "duke.edu;ece.duke.edu;duke.edu;google.com;duke.edu", "author_num": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Duke University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.duke.edu;https://www.google.com", "aff_unique_abbr": "Duke;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "UQE: A Query Engine for Unstructured Databases", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93360", "id": "t7SGOv5W5z", "proceeding": "", "pdf": "https://openreview.net/pdf?id=t7SGOv5W5z", "openreview": "https://openreview.net/forum?id=t7SGOv5W5z", "poster": "/media/PosterPDFs/NeurIPS%202024/93360.png?t=1733844407.4043076", "project": "", "author_site": "Hanjun Dai, Bethany Wang, Xingchen Wan, Bo Dai, Sherry Yang, Azade Nova, Pengcheng Yin, Mangpo Phothilimthana, Charles Sutton, Dale Schuurmans", "tldr": "", "abstract": "Analytics on structured data is a mature field with many successful methods.\nHowever, most real world data exists in unstructured form, such as images and conversations.\nWe investigate the potential of Large Language Models (LLMs) to enable unstructured data analytics.\nIn particular, we propose a new Universal Query Engine (UQE) that directly interrogates and draws insights from unstructured data collections.\nThis engine accepts queries in a Universal Query Language (UQL), a dialect of SQL that provides full natural language flexibility in specifying conditions and operators.\nThe new engine leverages the ability of LLMs to conduct analysis of unstructured data, while also allowing us to exploit advances in sampling and optimization techniques to achieve efficient and accurate query execution.\nIn addition, we borrow techniques from classical compiler theory to better orchestrate the workflow between sampling methods and foundation model calls.\nWe demonstrate the efficiency of UQE on data analytics across different modalities, including images, dialogs and reviews, across a range of useful query types, including conditional aggregation, semantic retrieval and abstraction aggregation.", "keywords": "unstructured data;sampling and optimization;database;language models", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Hanjun Dai;Bethany Yixin Wang;Xingchen Wan;Bo Dai;Sherry Yang;Azade Nova;Pengcheng Yin;Phitchaya Mangpo Phothilimthana;Charles Sutton;Dale Schuurmans", "authorids": "~Hanjun_Dai1;~Bethany_Yixin_Wang1;~Xingchen_Wan1;~Bo_Dai1;~Sherry_Yang1;~Azade_Nova1;~Pengcheng_Yin1;~Phitchaya_Mangpo_Phothilimthana1;~Charles_Sutton1;~Dale_Schuurmans1", "gender": "M;F;M;;F;;M;F;M;", "homepage": "https://hanjun-dai.github.io;;https://xingchen.one;https://bo-dai.github.io/;https://sherryy.github.io;;https://pengcheng.in;https://mangpo.net/;http://homepages.inf.ed.ac.uk/csutton/;", "dblp": "144/7311;334/1546;255/7214;64/2903;;;130/7385;127/3128;59/5879;", "google_scholar": "obpl7GQAAAAJ;;6KkohssAAAAJ;TIKl_foAAAAJ;7c1B_fIAAAAJ;;t5lVb6sAAAAJ;7Fxbm0AAAAAJ;https://scholar.google.co.uk/citations?user=hYtGXD0AAAAJ;", "orcid": ";;0000-0003-0074-0597;0009-0002-8070-574X;;;;;0000-0002-0041-3820;", "linkedin": "hanjun-dai;yixin-wang/;;;;;pchyin/;;charles-sutton-772aa126;", "or_profile": "~Hanjun_Dai1;~Bethany_Yixin_Wang1;~Xingchen_Wan1;~Bo_Dai1;~Sherry_Yang1;~Azade_Nova1;~Pengcheng_Yin1;~Phitchaya_Mangpo_Phothilimthana1;~Charles_Sutton1;~Dale_Schuurmans1", "aff": "Google Research;Google;Google;Google Brain;University of California, Berkeley;;Google;Google;University of Edinburgh;", "aff_domain": "google.com;google.com;google.com;google.com;berkeley.edu;;google.com;google.com;ed.ac.uk;", "position": "Researcher;Researcher;Research Scientist;Research Scientist;Student;;Researcher;Researcher;Professor;", "bibtex": "@inproceedings{\ndai2024uqe,\ntitle={{UQE}: A Query Engine for Unstructured Databases},\nauthor={Hanjun Dai and Bethany Yixin Wang and Xingchen Wan and Bo Dai and Sherry Yang and Azade Nova and Pengcheng Yin and Phitchaya Mangpo Phothilimthana and Charles Sutton and Dale Schuurmans},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=t7SGOv5W5z}\n}", "github": "", "reviewers": "E5QU;rs84;uiEW;6SvA", "pdf_size": 941472, "rating": "5;5;6;7", "confidence": "4;4;2;3", "soundness": "3;2;2;4", "novelty": "2;2;2;3", "presentation": "4;2;2;4", "wc_summary": "61;62;86;64", "wc_strengths": "65;43;78;51", "wc_weaknesses": "226;193;73;104", "wc_questions": "35;5;28;9", "wc_limitations": "34;11;1;8", "wc_review": "421;314;266;236", "wc_reply_reviewers": "0;19;0;9", "wc_reply_authors": "45;45;45;14", "reply_reviewers": "0;1;0;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 1.0 ], "wc_summary_avg": [ 68.25, 10.304731922762475 ], "wc_strengths_avg": [ 59.25, 13.386093530227555 ], "wc_weaknesses_avg": [ 149.0, 62.581946278459576 ], "wc_questions_avg": [ 19.25, 12.577261228105266 ], "wc_limitations_avg": [ 13.5, 12.379418403139947 ], "wc_review_avg": [ 309.25, 70.2615648843662 ], "wc_reply_reviewers_avg": [ 7.0, 7.842193570679061 ], "wc_reply_authors_avg": [ 37.25, 13.423393758658799 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.6363636363636364, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13784317197475149258&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 4, "email": "google.com;google.com;google.com;google.com;berkeley.edu;;google.com;google.com;ed.ac.uk;", "author_num": 10, "aff_unique_index": "0;0;0;0;1;0;0;2", "aff_unique_norm": "Google;University of California, Berkeley;University of Edinburgh", "aff_unique_dep": "Google Research;;", "aff_unique_url": "https://research.google;https://www.berkeley.edu;https://www.ed.ac.uk", "aff_unique_abbr": "Google Research;UC Berkeley;Edinburgh", "aff_campus_unique_index": "0;0;0;0;1;0;0", "aff_campus_unique": "Mountain View;Berkeley;", "aff_country_unique_index": "0;0;0;0;0;0;0;1", "aff_country_unique": "United States;United Kingdom" }, { "title": "Approximation-Aware Bayesian Optimization", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93359", "id": "t7euV5dl5M", "proceeding": "", "pdf": "https://openreview.net/pdf?id=t7euV5dl5M", "openreview": "https://openreview.net/forum?id=t7euV5dl5M", "poster": "", "project": "", "author_site": "Natalie Maus, Kyurae Kim, David Eriksson, Geoff Pleiss, John Cunningham, Jacob Gardner", "tldr": "", "abstract": "High-dimensional Bayesian optimization (BO) tasks such as molecular design often require $>10,$$000$ function evaluations before obtaining meaningful results. While methods like sparse variational Gaussian processes (SVGPs) reduce computational requirements in these settings, the underlying approximations result in suboptimal data acquisitions that slow the progress of optimization. In this paper we modify SVGPs to better align with the goals of BO: targeting informed data acquisition over global posterior fidelity. Using the framework of utility-calibrated variational inference (Lacoste\u2013Julien et al., 2011), we unify GP approximation and data acquisition into a joint optimization problem, thereby ensuring optimal decisions under a limited computational budget. Our approach can be used with any decision-theoretic acquisition function and is readily compatible with trust region methods like TuRBO (Eriksson et al., 2019). We derive efficient joint objectives for the expected improvement (EI) and knowledge gradient (KG) acquisition functions in both the standard and batch BO settings. On a variety of recent high dimensional benchmark tasks in control and molecular design, our approach significantly outperforms standard SVGPs and is capable of achieving comparable rewards with up to $10\\times$ fewer function evaluations.", "keywords": "Bayesian optimization;variational inference;Gaussian processes;utility maximization;expected improvement;knowledge gradient;black-box optimization", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Natalie Maus;Kyurae Kim;David Eriksson;Geoff Pleiss;John Patrick Cunningham;Jacob R. Gardner", "authorids": "~Natalie_Maus1;~Kyurae_Kim1;~David_Eriksson2;~Geoff_Pleiss1;~John_Patrick_Cunningham1;~Jacob_R._Gardner1", "gender": "F;M;M;M;M;M", "homepage": "https://sites.google.com/seas.upenn.edu/natalie-maus/;;http://geoffpleiss.com;stat.columbia.edu/~cunningham;;https://krkim.me", "dblp": "264/7932;29/2816;199/1693.html;51/4077;144/7773;322/4034", "google_scholar": "hNRd6lsAAAAJ;SWQjkN4AAAAJ;XO8T-Y4AAAAJ;88cU_4UAAAAJ;0gkajvEAAAAJ;pKGsQ1cAAAAJ", "orcid": ";;0000-0002-7009-0967;;;0000-0003-2063-0889", "linkedin": "natalie-maus-14b936178/;davideriksson89/;;;;red-portal/", "or_profile": "~Natalie_Maus1;~David_Eriksson2;~Geoff_Pleiss1;~John_Patrick_Cunningham1;~Jacob_R_Gardner1;~Khurai_Kim1", "aff": "University of Pennsylvania;Meta;Vector Institute;Columbia University;University of Pennsylvania;University of Pennsylvania", "aff_domain": "upenn.edu;meta.com;vectorinstitute.ai;columbia.edu;upenn.edu;seas.upenn.edu", "position": "PhD student;Research scientist;Researcher;Assistant Professor;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nmaus2024approximationaware,\ntitle={Approximation-Aware Bayesian Optimization},\nauthor={Natalie Maus and Kyurae Kim and David Eriksson and Geoff Pleiss and John Patrick Cunningham and Jacob R. Gardner},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=t7euV5dl5M}\n}", "github": "", "reviewers": "jWXp;TCW1;veTg;6opG", "pdf_size": 5881495, "rating": "7;7;7;8", "confidence": "3;4;4;3", "soundness": "3;3;4;4", "novelty": "4;3;3;3", "presentation": "2;3;4;4", "wc_summary": "89;97;92;83", "wc_strengths": "100;167;112;41", "wc_weaknesses": "44;146;128;205", "wc_questions": "254;113;121;2", "wc_limitations": "48;118;15;12", "wc_review": "535;641;468;343", "wc_reply_reviewers": "25;20;24;101", "wc_reply_authors": "0;0;0;144", "reply_reviewers": "1;1;1;2", "reply_authors": "1;1;1;2", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 90.25, 5.0682837331783235 ], "wc_strengths_avg": [ 105.0, 44.760473634670134 ], "wc_weaknesses_avg": [ 130.75, 57.61672934139875 ], "wc_questions_avg": [ 122.5, 89.30985387962518 ], "wc_limitations_avg": [ 48.25, 42.67537346058028 ], "wc_review_avg": [ 496.75, 108.09342024378728 ], "wc_reply_reviewers_avg": [ 42.5, 33.826764551165695 ], "wc_reply_authors_avg": [ 36.0, 62.353829072479584 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11152772625999323968&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "upenn.edu;meta.com;vectorinstitute.ai;columbia.edu;upenn.edu;seas.upenn.edu", "author_num": 6, "aff_unique_index": "0;1;2;3;0;0", "aff_unique_norm": "University of Pennsylvania;Meta;Vector Institute;Columbia University", "aff_unique_dep": ";Meta Platforms, Inc.;;", "aff_unique_url": "https://www.upenn.edu;https://meta.com;https://vectorinstitute.ai/;https://www.columbia.edu", "aff_unique_abbr": "UPenn;Meta;Vector Institute;Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "United States;Canada" }, { "title": "SLED: Self Logits Evolution Decoding for Improving Factuality in Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93358", "id": "t7wvJstsiV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=t7wvJstsiV", "openreview": "https://openreview.net/forum?id=t7wvJstsiV", "poster": "", "project": "", "author_site": "Jianyi Zhang, Da-Cheng Juan, Cyrus Rashtchian, Chun-Sung Ferng, Heinrich Jiang, Yiran Chen", "tldr": "", "abstract": "Large language models (LLMs) have demonstrated remarkable capabilities, but their outputs can sometimes be unreliable or factually incorrect. To address this, we introduce Self Logits Evolution Decoding (SLED), a novel decoding framework that enhances the truthfulness of LLMs without relying on external knowledge bases or requiring further fine-tuning. From an optimization perspective, our SLED framework leverages the latent knowledge embedded within the LLM by contrasting the output logits from the final layer with those from early layers. It then utilizes an approximate gradient approach to enable latent knowledge to guide the self-refinement of outputs, thereby effectively improving factual accuracy. Extensive experiments have been conducted on established benchmarks across a diverse range of model families (LLaMA 2, LLaMA 3, Gemma) and scales (from 2B to 70B), including more advanced architectural configurations such as the mixture of experts (MoE). Our evaluation spans a wide variety of tasks, including multi-choice, open-generation, and adaptations to chain-of-thought reasoning tasks. The results demonstrate that SLED consistently improves factual accuracy by up to 20\\% compared to existing decoding methods while maintaining natural language fluency and negligible latency overhead. Furthermore, it can be flexibly combined with other decoding methods to further enhance their performance.", "keywords": "Large Language Models", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Jianyi Zhang;Da-Cheng Juan;Cyrus Rashtchian;Chun-Sung Ferng;Heinrich Jiang;Yiran Chen", "authorids": "~Jianyi_Zhang1;~Da-Cheng_Juan1;~Cyrus_Rashtchian1;~Chun-Sung_Ferng1;~Heinrich_Jiang1;~Yiran_Chen1", "gender": ";;M;;M;M", "homepage": "https://jayzhang42.github.io/;;http://www.cyrusrashtchian.com;;;https://ece.duke.edu/people/yiran-chen/", "dblp": ";47/1564;69/8610;63/7455;182/2472;80/1641", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;OtgZrhUAAAAJ;BjFURuUAAAAJ;;", "orcid": ";;;;;0000-0002-1486-8412", "linkedin": ";;;;;", "or_profile": "~Jianyi_Zhang1;~Da-Cheng_Juan1;~Cyrus_Rashtchian1;~Chun-Sung_Ferng1;~Heinrich_Jiang1;~Yiran_Chen1", "aff": "Pittsburgh Supercomputing Center & Duke University;Google Research;Google Research;Google;Google;Duke University", "aff_domain": "duke.edu;google.com;google.com;google.com;google.com;duke.edu", "position": "Principal investigator;Senior Software Engineer;Researcher;Software Engineer;Research scientist;Professor", "bibtex": "@inproceedings{\nzhang2024sled,\ntitle={{SLED}: Self Logits Evolution Decoding for Improving Factuality in Large Language Models},\nauthor={Jianyi Zhang and Da-Cheng Juan and Cyrus Rashtchian and Chun-Sung Ferng and Heinrich Jiang and Yiran Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=t7wvJstsiV}\n}", "github": "", "reviewers": "Xg4n;Mgbd;1uAz;12rF", "pdf_size": 917201, "rating": "4;4;5;7", "confidence": "4;4;3;4", "soundness": "2;2;2;3", "novelty": "3;2;2;3", "presentation": "2;3;3;3", "wc_summary": "79;69;71;127", "wc_strengths": "37;63;23;113", "wc_weaknesses": "137;56;88;107", "wc_questions": "106;3;33;35", "wc_limitations": "39;50;4;10", "wc_review": "398;241;219;392", "wc_reply_reviewers": "36;0;48;28", "wc_reply_authors": "1177;1016;604;666", "reply_reviewers": "1;0;1;1", "reply_authors": "4;4;3;2", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 86.5, 23.680160472429236 ], "wc_strengths_avg": [ 59.0, 34.322004603461025 ], "wc_weaknesses_avg": [ 97.0, 29.4193813667113 ], "wc_questions_avg": [ 44.25, 37.83764659700706 ], "wc_limitations_avg": [ 25.75, 19.266226926930972 ], "wc_review_avg": [ 312.5, 82.89300332356163 ], "wc_reply_reviewers_avg": [ 28.0, 17.663521732655695 ], "wc_reply_authors_avg": [ 865.75, 238.6759047327568 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.25, 0.82915619758885 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7398544830008200679&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "duke.edu;google.com;google.com;google.com;google.com;duke.edu", "author_num": 6, "aff_unique_index": "0;1;1;1;1;2", "aff_unique_norm": "Pittsburgh Supercomputing Center;Google;Duke University", "aff_unique_dep": ";Google Research;", "aff_unique_url": "https://www.psc.edu;https://research.google;https://www.duke.edu", "aff_unique_abbr": "PSC;Google Research;Duke", "aff_campus_unique_index": "1;1;1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Vocal Call Locator Benchmark (VCL) for localizing rodent vocalizations from multi-channel audio", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97470", "id": "t7xYNN7RJC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=t7xYNN7RJC", "openreview": "https://openreview.net/forum?id=t7xYNN7RJC", "poster": "/media/PosterPDFs/NeurIPS%202024/97470.png?t=1733760901.396182", "project": "", "author_site": "Ralph Peterson, Aramis Tanelus, Christopher Ick, Bartul Mimica, Niegil Francis Muttath Joseph, Violet Ivan, Aman Choudhri, Annegret Falkner, Mala Murthy, David Schneider, Dan Sanes, Alex Williams", "tldr": "", "abstract": "Understanding the behavioral and neural dynamics of social interactions is a goal\nof contemporary neuroscience. Many machine learning methods have emerged\nin recent years to make sense of complex video and neurophysiological data that\nresult from these experiments. Less focus has been placed on understanding how\nanimals process acoustic information, including social vocalizations. A critical\nstep to bridge this gap is determining the senders and receivers of acoustic infor-\nmation in social interactions. While sound source localization (SSL) is a classic\nproblem in signal processing, existing approaches are limited in their ability to\nlocalize animal-generated sounds in standard laboratory environments. Advances\nin deep learning methods for SSL are likely to help address these limitations,\nhowever there are currently no publicly available models, datasets, or benchmarks\nto systematically evaluate SSL algorithms in the domain of bioacoustics. Here,\nwe present the VCL Benchmark: the first large-scale dataset for benchmarking\nSSL algorithms in rodents. We acquired synchronized video and multi-channel\naudio recordings of 767,295 sounds with annotated ground truth sources across 9\nconditions. The dataset provides benchmarks which evaluate SSL performance on\nreal data, simulated acoustic data, and a mixture of real and simulated data. We\nintend for this benchmark to facilitate knowledge transfer between the neuroscience\nand acoustic machine learning communities, which have had limited overlap.", "keywords": "sound source localization;animal behavior;computational ethology", "primary_area": "", "supplementary_material": "/attachment/ce878f64d8e56b8846ce045e6cbd2031e0d09043.pdf", "author": "Ralph E Peterson;Aramis Tanelus;Christopher A. Ick;Bartul Mimica;M J Niegil Francis;Violet Jane Ivan;Aman Choudhri;Annegret Falkner;Mala Murthy;David M Schneider;Dan H. Sanes;Alex H Williams", "authorids": "~Ralph_E_Peterson1;~Aramis_Tanelus1;~Christopher_A._Ick1;~Bartul_Mimica1;~M_J_Niegil_Francis1;~Violet_Jane_Ivan1;~Aman_Choudhri1;~Annegret_Falkner1;~Mala_Murthy1;~David_M_Schneider1;~Dan_H._Sanes1;~Alex_H_Williams1", "gender": "M;M;M;M;;F;M;F;F;;M;M", "homepage": ";https://github.com/Aramist;https://chrisick.github.io/;https://bartulem.github.io/;;;;https://www.falknerlab.com/;https://murthylab.princeton.edu;https://www.schneiderlaboratory.com/;https://www.saneslab.com/;http://alexhwilliams.info", "dblp": ";;;;;;;;;;;126/4222", "google_scholar": "rOXG034AAAAJ;;;https://scholar.google.no/citations?user=aUeOyr8AAAAJ;Jt0LmO4AAAAJ;;;;;5NEJCPcAAAAJ;;7_GzzXMAAAAJ", "orcid": ";0009-0009-9447-2730;;0000-0001-6404-0560;;;0000-0003-4963-6651;;0000-0003-3063-3389;;;0000-0001-5853-103X", "linkedin": ";aramis-tanelus-b50917217/;;bartul-mimica/;;violet-ivan/;;;;;;", "or_profile": "~Ralph_E_Peterson1;~Aramis_Tanelus1;~Christopher_A._Ick1;~Bartul_Mimica1;~M_J_Niegil_Francis1;~Violet_Jane_Ivan1;~Aman_Choudhri1;~Annegret_Falkner1;~Mala_Murthy1;~David_M_Schneider1;~Dan_H._Sanes1;~Alex_H_Williams1", "aff": "New York University;Flatiron Institute;New York University;Princeton University;New York University;NYU Langone Health;Columbia University;Princeton University;Princeton University;New York University;New York University;Flatiron Institute", "aff_domain": "nyu.edu;flatironinstitute.org;nyu.edu;princeton.edu;nyu.edu;nyulangone.org;columbia.edu;princeton.edu;princeton.edu;nyu.edu;nyu.edu;flatironinstitute.org", "position": "PhD student;Researcher;PhD student;Postdoc;Researcher;PhD student;Undergrad student;Assistant Professor;Full Professor;Assistant Professor;Full Professor;Researcher", "bibtex": "@inproceedings{\npeterson2024vocal,\ntitle={Vocal Call Locator Benchmark ({VCL}) for localizing rodent vocalizations from multi-channel audio},\nauthor={Ralph E Peterson and Aramis Tanelus and Christopher A. Ick and Bartul Mimica and M J Niegil Francis and Violet Jane Ivan and Aman Choudhri and Annegret Falkner and Mala Murthy and David M Schneider and Dan H. Sanes and Alex H Williams},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=t7xYNN7RJC}\n}", "github": "", "reviewers": "XE72;8UsB;bexs;ioCn", "pdf_size": 19024565, "rating": "6;7;7;7", "confidence": "5;4;4;4", "wc_summary_and_contributions": "57;90;46;21", "wc_strengths": "23;42;36;24", "wc_improvement": "25;71;45;25", "wc_limitations": "26;5;9;25", "wc_correctness": "8;8;13;30", "wc_clarity": "30;22;5;50", "wc_relation_to_prior_work": "5;12;31;12", "wc_documentation": "13;12;11;20", "wc_additional_feedback": "1;1;1;1", "wc_review": "188;263;197;208", "wc_reply_reviewers": "11;26;88;20", "wc_reply_authors": "0;0;208;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;3;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 53.5, 24.78406746278746 ], "wc_strengths_avg": [ 31.25, 8.042853971072706 ], "wc_improvement_avg": [ 41.5, 18.887826767524103 ], "wc_limitations_avg": [ 16.25, 9.364160400164021 ], "wc_correctness_avg": [ 14.75, 9.03811374126261 ], "wc_clarity_avg": [ 26.75, 16.17675801883678 ], "wc_relation_to_prior_work_avg": [ 15.0, 9.669539802906858 ], "wc_documentation_avg": [ 14.0, 3.5355339059327378 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 214.0, 29.16333314283537 ], "wc_reply_reviewers_avg": [ 36.25, 30.35107082130711 ], "wc_reply_authors_avg": [ 52.0, 90.06664199358161 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:nkEbV3oBgEsJ:scholar.google.com/&scioq=Vocal+Call+Locator+Benchmark+(VCL)+for+localizing+rodent+vocalizations+from+multi-channel+audio&hl=en&as_sdt=0,5", "gs_version_total": 6, "email": "nyu.edu;flatironinstitute.org;nyu.edu;princeton.edu;nyu.edu;nyulangone.org;columbia.edu;princeton.edu;princeton.edu;nyu.edu;nyu.edu;flatironinstitute.org", "author_num": 12, "aff_unique_index": "0;1;0;2;0;3;4;2;2;0;0;1", "aff_unique_norm": "New York University;Flatiron Institute;Princeton University;NYU Langone Health;Columbia University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.nyu.edu;https://flatironinstitute.org;https://www.princeton.edu;https://nyulangone.org;https://www.columbia.edu", "aff_unique_abbr": "NYU;Flatiron;Princeton;NYU Langone;Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "t8ch1OCvHh", "title": "Mitigating Quantization Errors Due to Activation Spikes in GLU-Based LLMs", "track": "main", "status": "Reject", "tldr": "", "abstract": "Modern large language models (LLMs) have established state-of-the-art performance through architectural improvements, but still require significant computational cost for inference. In an effort to reduce the inference cost, post-training quantization (PTQ) has become a popular approach, quantizing weights and activations to lower precision, such as INT8. In this paper, we reveal the challenges of activation quantization in GLU variants, which are widely used in feed-forward network (FFN) of modern LLMs, such as LLaMA family.\nThe problem is that severe local quantization errors, caused by excessive magnitudes of activation in GLU variants, significantly degrade the performance of the quantized LLM. We denote these activations as activation spikes. Our further observations provide a systematic pattern of activation spikes: 1) The activation spikes occur in the FFN of specific layers, particularly in the early and late layers, 2) The activation spikes are dedicated to a couple of tokens, rather than being shared across a sequence. Based on our observations, we propose two empirical methods, Quantization-free Module (QFeM) and Quantization-free Prefix (QFeP), to isolate the activation spikes during quantization. Our extensive experiments validate the effectiveness of the proposed methods for the activation quantization, especially with coarse-grained scheme, of latest LLMs with GLU variants, including LLaMA-2/3, Mistral, Mixtral, SOLAR, and Gemma.\nIn particular, our methods enhance the current alleviation techniques (e.g., SmoothQuant) that fail to control the activation spikes.", "keywords": "quantization;LLM;post-training quantization;outliers", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Jaewoo Yang;Hayun Kim;Younghoon Kim", "authorids": "~Jaewoo_Yang1;~Hayun_Kim1;~Younghoon_Kim5", "gender": "M;F;", "homepage": "https://onnoo.github.io;https://github.com/hayun0909;http://nongaussian.github.io", "dblp": ";;", "google_scholar": ";;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Jaewoo_Yang1;~Hayun_Kim1;~Younghoon_Kim5", "aff": "Hanyang University;Hanyang University;Hanyang University", "aff_domain": "hanyang.ac.kr;hanyang.ac.kr;hanyang.ac.kr", "position": "PhD student;MS student;Associate Professor", "bibtex": "@misc{\nanonymous2024mitigating,\ntitle={Mitigating Quantization Errors Due to Activation Spikes in {GLU}-Based {LLM}s},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=t8ch1OCvHh}\n}", "github": "", "project": "", "reviewers": "PpZ5;TX3p;geCr;BgAS", "site": "https://openreview.net/forum?id=t8ch1OCvHh", "pdf_size": 1052034, "rating": "5;5;5;5", "confidence": "4;3;3;2", "soundness": "3;2;3;2", "novelty": "2;3;2;2", "presentation": "2;3;3;2", "wc_summary": "32;26;124;97", "wc_strengths": "30;5;53;52", "wc_weaknesses": "204;5;79;66", "wc_questions": "71;274;63;266", "wc_limitations": "1;1;1;8", "wc_review": "338;311;320;489", "wc_reply_reviewers": "100;43;10;26", "wc_reply_authors": "676;19;22;24", "reply_reviewers": "2;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 69.75, 41.90689084148334 ], "wc_strengths_avg": [ 35.0, 19.60867155112758 ], "wc_weaknesses_avg": [ 88.5, 72.29972337429791 ], "wc_questions_avg": [ 168.5, 101.57878715558677 ], "wc_limitations_avg": [ 2.75, 3.031088913245535 ], "wc_review_avg": [ 364.5, 72.53447456210047 ], "wc_reply_reviewers_avg": [ 44.75, 33.96597562267276 ], "wc_reply_authors_avg": [ 185.25, 283.3402327591336 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7851896134578114190&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Hanyang University", "aff_unique_dep": "", "aff_unique_url": "https://www.hanyang.ac.kr", "aff_unique_abbr": "HYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "Stochastic contextual bandits with graph feedback: from independence number to MAS number", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93357", "id": "t8iosEWoyd", "proceeding": "", "pdf": "https://openreview.net/pdf?id=t8iosEWoyd", "openreview": "https://openreview.net/forum?id=t8iosEWoyd", "poster": "/media/PosterPDFs/NeurIPS%202024/93357.png?t=1730249056.37102", "project": "", "author_site": "Yuxiao Wen, Yanjun Han, Zhengyuan Zhou", "tldr": "", "abstract": "We consider contextual bandits with graph feedback, a class of interactive learning problems with richer structures than vanilla contextual bandits, where taking an action reveals the rewards for all neighboring actions in the feedback graph under all contexts. Unlike the multi-armed bandits setting where a growing literature has painted a near-complete understanding of graph feedback, much remains unexplored in the contextual bandits counterpart. In this paper, we make inroads into this inquiry by establishing a regret lower bound $\\Omega(\\sqrt{\\beta_M(G) T})$, where $M$ is the number of contexts, $G$ is the feedback graph, and $\\beta_M(G)$ is our proposed graph-theoretic quantity that characterizes the fundamental learning limit for this class of problems. Interestingly, $\\beta_M(G)$ interpolates between $\\alpha(G)$ (the independence number of the graph) and $\\mathsf{m}(G)$ (the maximum acyclic subgraph (MAS) number of the graph) as the number of contexts $M$ varies. We also provide algorithms that achieve near-optimal regret for important classes of context sequences and/or feedback graphs, such as transitively closed graphs that find applications in auctions and inventory control. In particular, with many contexts, our results show that the MAS number essentially characterizes the statistical complexity for contextual bandits, as opposed to the independence number in multi-armed bandits.", "keywords": "contextual bandits;graph feedback;minimax rate", "primary_area": "bandits", "supplementary_material": "", "author": "Yuxiao Wen;Yanjun Han;Zhengyuan Zhou", "authorids": "~Yuxiao_Wen1;~Yanjun_Han1;~Zhengyuan_Zhou2", "gender": "M;M;M", "homepage": ";https://yanjunhan2021.github.io;https://scholar.google.com/citations?user=hiGI9v0AAAAJ&hl=en", "dblp": "298/1362;35/7252;125/5270", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;hdTDzlQAAAAJ;", "orcid": ";;", "linkedin": "yuxiao-wen-4b3162161/;;", "or_profile": "~Yuxiao_Wen1;~Yanjun_Han1;~Zhengyuan_Zhou2", "aff": "New York University;New York University;New York University", "aff_domain": "nyu.edu;nyu.edu;nyu.edu", "position": "PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nwen2024stochastic,\ntitle={Stochastic contextual bandits with graph feedback: from independence number to {MAS} number},\nauthor={Yuxiao Wen and Yanjun Han and Zhengyuan Zhou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=t8iosEWoyd}\n}", "github": "", "reviewers": "6nrr;DvmA;iUw4;5kFj;mcAY", "pdf_size": 445682, "rating": "5;6;6;6;6", "confidence": "3;4;2;4;3", "soundness": "3;3;3;3;3", "novelty": "2;3;3;3;3", "presentation": "3;2;3;3;3", "wc_summary": "210;134;101;269;196", "wc_strengths": "124;109;59;62;79", "wc_weaknesses": "305;207;145;101;49", "wc_questions": "212;25;58;27;66", "wc_limitations": "1;1;1;5;3", "wc_review": "852;476;364;464;393", "wc_reply_reviewers": "34;44;12;0;12", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;0;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 5.8, 0.39999999999999997 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 182.0, 59.01525226583379 ], "wc_strengths_avg": [ 86.6, 25.788369471527275 ], "wc_weaknesses_avg": [ 161.4, 88.60158012134998 ], "wc_questions_avg": [ 77.6, 69.1508495970946 ], "wc_limitations_avg": [ 2.2, 1.6 ], "wc_review_avg": [ 509.8, 176.20488074965462 ], "wc_reply_reviewers_avg": [ 20.4, 16.119553343687908 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.13363062095621223, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7997201270305611644&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "nyu.edu;nyu.edu;nyu.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "New York University", "aff_unique_dep": "", "aff_unique_url": "https://www.nyu.edu", "aff_unique_abbr": "NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "UnlearnCanvas: Stylized Image Dataset for Enhanced Machine Unlearning Evaluation in Diffusion Models", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97469", "id": "t9aThFL1lE", "proceeding": "", "pdf": "https://openreview.net/pdf?id=t9aThFL1lE", "openreview": "https://openreview.net/forum?id=t9aThFL1lE", "poster": "", "project": "", "author_site": "Yihua Zhang, Chongyu Fan, Yimeng Zhang, Yuguang Yao, Jinghan Jia, Jiancheng Liu, Gaoyuan Zhang, Gaowen Liu, Ramana Kompella, Xiaoming Liu, Sijia Liu", "tldr": "", "abstract": "The technological advancements in diffusion models (DMs) have demonstrated unprecedented capabilities in text-to-image generation and are widely used in diverse applications. However, they have also raised significant societal concerns, such as the generation of harmful content and copyright disputes. Machine unlearning (MU) has emerged as a promising solution, capable of removing undesired generative capabilities from DMs. However, existing MU evaluation systems present several key challenges that can result in incomplete and inaccurate assessments. To address these issues, we propose UnlearnCanvas, a comprehensive high-resolution stylized image dataset that facilitates the evaluation of the unlearning of artistic styles and associated objects. This dataset enables the establishment of a standardized, automated evaluation framework with 7 quantitative metrics assessing various aspects of the unlearning performance for DMs. Through extensive experiments, we benchmark 9 state-of-the-art MU methods for DMs, revealing novel insights into their strengths, weaknesses, and underlying mechanisms. Additionally, we explore challenging unlearning scenarios for DMs to evaluate worst-case performance against adversarial prompts, the unlearning of finer-scale concepts, and sequential unlearning. We hope that this study can pave the way for developing more effective, accurate, and robust DM unlearning methods, ensuring safer and more ethical applications of DMs in the future. The dataset, benchmark, and codes are publicly available at this [link](https://unlearn-canvas.netlify.app/).", "keywords": "machine unlearning;diffusion models;dataset and benchmark", "primary_area": "", "supplementary_material": "/attachment/8116877b5a4a4e8743b6cbc3ded15aadb06ea792.pdf", "author": "Yihua Zhang;Chongyu Fan;Yimeng Zhang;Yuguang Yao;Jinghan Jia;Jiancheng Liu;Gaoyuan Zhang;Gaowen Liu;Ramana Rao Kompella;Xiaoming Liu;Sijia Liu", "authorids": "~Yihua_Zhang1;~Chongyu_Fan1;~Yimeng_Zhang2;~Yuguang_Yao1;~Jinghan_Jia1;~Jiancheng_Liu2;~Gaoyuan_Zhang1;~Gaowen_Liu4;~Ramana_Rao_Kompella1;~Xiaoming_Liu2;~Sijia_Liu1", "gender": "M;M;M;M;M;M;M;F;M;M;M", "homepage": "https://yihua-zhang.com;https://github.com/a-F1;https://damon-demon.github.io;https://www.cse.msu.edu/~yaoyugua/;https://jinghanjia.netlify.app/;https://ljcc0930.github.io/;;;https://linkedin.com/en/rkompella;http://www.cse.msu.edu/~liuxm/;https://lsjxjtu.github.io/", "dblp": ";359/3239;;238/9467;286/5392;74/3002;;136/1007;98/2327;l/XiaomingLiu0002;128/6972-1", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;https://scholar.google.com/citations?hl=en;-chIdAkAAAAJ;bqP_zxYAAAAJ;ReWNzl4AAAAJ;;NIv_aeQAAAAJ;uf9RZboAAAAJ;https://scholar.google.com/citations?hl=en;C7dO_UgAAAAJ", "orcid": ";;0000-0003-1608-2541;;;;;0009-0000-9194-1233;;;", "linkedin": "zhangyihua/;;;tonyyaomsu/;jinghan-jia-5194451ba/;;;;;xiaoming-liu-5a7807b/;", "or_profile": "~Yihua_Zhang1;~Chongyu_Fan1;~Yimeng_Zhang2;~Yuguang_Yao1;~Jinghan_Jia1;~Jiancheng_Liu2;~Gaoyuan_Zhang1;~Gaowen_Liu4;~Ramana_Rao_Kompella1;~Xiaoming_Liu2;~Sijia_Liu1", "aff": "Michigan State University;Huazhong University of Science and Technology;ByteDance Inc.;Michigan State University;Michigan State University;Michigan State University;International Business Machines;Cisco Systems;Cisco;Michigan State University;Michigan State University", "aff_domain": "msu.edu;hust.edu.cn;bytedance.com;msu.edu;msu.edu;msu.edu;ibm.com;cisco.com;cisco.com;msu.edu;msu.edu", "position": "PhD student;Undergrad student;Research Intern;PhD student;PhD student;MS student;Research engineer;Researcher;Researcher;Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhang2024unlearncanvas,\ntitle={UnlearnCanvas: Stylized Image Dataset for Enhanced Machine Unlearning Evaluation in Diffusion Models},\nauthor={Yihua Zhang and Chongyu Fan and Yimeng Zhang and Yuguang Yao and Jinghan Jia and Jiancheng Liu and Gaoyuan Zhang and Gaowen Liu and Ramana Rao Kompella and Xiaoming Liu and Sijia Liu},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=t9aThFL1lE}\n}", "github": "", "reviewers": "MenV;CWGN;T5uq;CJtq", "pdf_size": 45760115, "rating": "5;6;7;9", "confidence": "4;3;3;5", "wc_summary_and_contributions": "38;27;177;165", "wc_strengths": "30;56;72;12", "wc_improvement": "106;4;50;7", "wc_limitations": "43;15;278;19", "wc_correctness": "36;7;16;9", "wc_clarity": "1;1;5;42", "wc_relation_to_prior_work": "1;1;26;46", "wc_documentation": "19;13;8;28", "wc_additional_feedback": "1;1;1;1", "wc_review": "275;125;633;329", "wc_reply_reviewers": "0;0;13;295", "wc_reply_authors": "201;201;310;107", "reply_reviewers": "0;0;1;1", "reply_authors": "2;2;3;2", "rating_avg": [ 6.75, 1.479019945774904 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "wc_summary_and_contributions_avg": [ 101.75, 69.4887580835922 ], "wc_strengths_avg": [ 42.5, 23.124662159694356 ], "wc_improvement_avg": [ 41.75, 41.318125562517956 ], "wc_limitations_avg": [ 88.75, 109.78700970515592 ], "wc_correctness_avg": [ 17.0, 11.467344941179714 ], "wc_clarity_avg": [ 12.25, 17.25362280797862 ], "wc_relation_to_prior_work_avg": [ 18.5, 18.874586088176873 ], "wc_documentation_avg": [ 17.0, 7.44983221287567 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 340.5, 184.67471402441646 ], "wc_reply_reviewers_avg": [ 77.0, 125.97420370853709 ], "wc_reply_authors_avg": [ 204.75, 71.86923889954589 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.560611910581388, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=563181851761594701&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "msu.edu;hust.edu.cn;bytedance.com;msu.edu;msu.edu;msu.edu;ibm.com;cisco.com;cisco.com;msu.edu;msu.edu", "author_num": 11, "aff_unique_index": "0;1;2;0;0;0;3;4;4;0;0", "aff_unique_norm": "Michigan State University;Huazhong University of Science and Technology;ByteDance;International Business Machines Corporation;Cisco Systems", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.msu.edu;http://www.hust.edu.cn;https://www.bytedance.com;https://www.ibm.com;https://www.cisco.com", "aff_unique_abbr": "MSU;HUST;ByteDance;IBM;Cisco", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0;0;0;0;0;0;0;0", "aff_country_unique": "United States;China" }, { "title": "SELMA: Learning and Merging Skill-Specific Text-to-Image Experts with Auto-Generated Data", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93356", "id": "t9gNEhreht", "proceeding": "", "pdf": "https://openreview.net/pdf?id=t9gNEhreht", "openreview": "https://openreview.net/forum?id=t9gNEhreht", "poster": "", "project": "", "author_site": "Jialu Li, Jaemin Cho, Yi-Lin Sung, Jaehong Yoon, Mohit Bansal", "tldr": "", "abstract": "Recent text-to-image (T2I) generation models have demonstrated impressive capabilities in creating images from text descriptions. However, these T2I generation models often fail to generate images that precisely match the details of the text inputs, such as incorrect spatial relationship or missing objects. In this paper, we introduce SELMA: Skill-Specific Expert Learning and Merging with Auto-Generated Data, a novel paradigm to improve the faithfulness of T2I models by fine-tuning models on automatically generated, multi-skill image-text datasets, with skill-specific expert learning and merging. First, SELMA leverages an LLM\u2019s in-context learning capability to generate multiple datasets of text prompts that can teach different skills, and then generates the images with a T2I model based on the prompts. Next, SELMA adapts the T2I model to the new skills by learning multiple single-skill LoRA (low-rank adaptation) experts followed by expert merging. Our independent expert fine-tuning specializes multiple models for different skills, and expert merging helps build a joint multi-skill T2I model that can generate faithful images given diverse text prompts, while mitigating the knowledge conflict from different datasets. We empirically demonstrate that SELMA significantly improves the semantic alignment and text faithfulness of state-of-the-art T2I diffusion models on multiple benchmarks (+2.1% on TIFA and +6.9% on DSG), human preference metrics (PickScore, ImageReward, and HPS), as well as human evaluation. Moreover, fine-tuning with image-text pairs auto-collected via SELMA shows comparable performance to fine-tuning with ground truth data. Lastly, we show that fine-tuning with images from a weaker T2I model can help improve the generation quality of a stronger T2I model, suggesting promising weak-to-strong generalization in T2I models. We provide code in the supplementary materials.", "keywords": "Text-to-Image Generation; LoRA Learning and Merging; Skill-based Experts; Auto-Generated Data", "primary_area": "machine_vision", "supplementary_material": "/attachment/db0909502db2e235af104301cd6ed369a8d3b85d.zip", "author": "Jialu Li;Jaemin Cho;Yi-Lin Sung;Jaehong Yoon;Mohit Bansal", "authorids": "~Jialu_Li2;~Jaemin_Cho1;~Yi-Lin_Sung1;~Jaehong_Yoon1;~Mohit_Bansal2", "gender": "F;M;M;M;M", "homepage": "https://jialuli-luka.github.io/;https://j-min.io;https://jaehong31.github.io/;https://www.cs.unc.edu/~mbansal/;https://ylsung.github.io/", "dblp": ";130/8348-1;203/4449;32/5243.html;212/7264", "google_scholar": "KyI1vSgAAAAJ;IbQZoHQAAAAJ;-5comoUAAAAJ;DN8QtscAAAAJ;aW2XnF0AAAAJ", "orcid": ";0000-0002-1558-6169;;;", "linkedin": ";;jaehongyoon/;;yi-lin-sung-41a427120/", "or_profile": "~Jialu_Li2;~Jaemin_Cho1;~Jaehong_Yoon1;~Mohit_Bansal2;~Yi_Lin_Sung1", "aff": "Google;University of North Carolina, Chapel Hill;University of North Carolina at Chapel Hill;University of North Carolina at Chapel Hill;Department of Computer Science, University of North Carolina, Chapel Hill", "aff_domain": "google.com;unc.edu;unc.edu;unc.edu;cs.unc.edu", "position": "Intern;PhD student;Postdoc;Full Professor;PhD student", "bibtex": "@inproceedings{\nli2024selma,\ntitle={{SELMA}: Learning and Merging Skill-Specific Text-to-Image Experts with Auto-Generated Data},\nauthor={Jialu Li and Jaemin Cho and Yi-Lin Sung and Jaehong Yoon and Mohit Bansal},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=t9gNEhreht}\n}", "github": "", "reviewers": "WQ94;c1fe;dhqF;xcmQ", "pdf_size": 8530534, "rating": "6;6;6;7", "confidence": "4;4;3;3", "soundness": "3;3;3;4", "novelty": "2;3;3;3", "presentation": "3;3;3;4", "wc_summary": "66;96;93;63", "wc_strengths": "112;102;28;50", "wc_weaknesses": "73;132;64;144", "wc_questions": "51;2;3;2", "wc_limitations": "25;1;6;3", "wc_review": "327;333;194;262", "wc_reply_reviewers": "52;13;24;11", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 79.5, 15.074813431681335 ], "wc_strengths_avg": [ 73.0, 35.05709628591621 ], "wc_weaknesses_avg": [ 103.25, 35.152347005569915 ], "wc_questions_avg": [ 14.5, 21.07723890835799 ], "wc_limitations_avg": [ 8.75, 9.54921462739214 ], "wc_review_avg": [ 279.0, 56.422513237182194 ], "wc_reply_reviewers_avg": [ 25.0, 16.355427233796124 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7542556718819874429&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "google.com;unc.edu;unc.edu;unc.edu;cs.unc.edu", "author_num": 5, "aff_unique_index": "0;1;1;1;1", "aff_unique_norm": "Google;University of North Carolina", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://www.unc.edu", "aff_unique_abbr": "Google;UNC", "aff_campus_unique_index": "0;1;1;1;1", "aff_campus_unique": "Mountain View;Chapel Hill", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Interpolating Item and User Fairness in Multi-Sided Recommendations", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93355", "id": "tAOg1HdvGy", "proceeding": "", "pdf": "https://openreview.net/pdf?id=tAOg1HdvGy", "openreview": "https://openreview.net/forum?id=tAOg1HdvGy", "poster": "/media/PosterPDFs/NeurIPS%202024/93355.png?t=1732744183.9936926", "project": "", "author_site": "Qinyi Chen, Jason Cheuk Nam Liang, Negin Golrezaei, Djallel Bouneffouf", "tldr": "", "abstract": "Today's online platforms heavily lean on algorithmic recommendations for bolstering user engagement and driving revenue. However, these recommendations can impact multiple stakeholders simultaneously---the platform, items (sellers), and users (customers)---each with their unique objectives, making it difficult to find the right middle ground that accommodates all stakeholders. To address this, we introduce a novel fair recommendation framework, Problem (FAIR), that flexibly balances multi-stakeholder interests via a constrained optimization formulation. We next explore Problem (FAIR) in a dynamic online setting where data uncertainty further adds complexity, and propose a low-regret algorithm FORM that concurrently performs real-time learning and fair recommendations, two tasks that are often at odds. Via both theoretical analysis and a numerical case study on real-world data, we demonstrate the efficacy of our framework and method in maintaining platform revenue while ensuring desired levels of fairness for both items and users.", "keywords": "fair recommendation;multi-sided platform;multi-stakeholder fairness;recommendation system;online learning algorithms", "primary_area": "fairness", "supplementary_material": "/attachment/edf8c40f8223db588a4efb4398a97c088ada2eca.zip", "author": "Qinyi Chen;Jason Cheuk Nam Liang;Negin Golrezaei;Djallel Bouneffouf", "authorids": "~Qinyi_Chen1;~Jason_Cheuk_Nam_Liang1;~Negin_Golrezaei1;~Djallel_Bouneffouf2", "gender": ";M;F;M", "homepage": ";http://www.mit.edu/~jcnliang/;https://www.mit.edu/~golrezae/;", "dblp": ";254/0873;37/10099.html;45/11240-1", "google_scholar": "lY2VAB0AAAAJ;https://scholar.google.com/citations?hl=en;k9uWzAIAAAAJ;", "orcid": "0000-0002-2912-2728;;;", "linkedin": "qinyi-chen-4735aa112/;jason-cheuk-nam-liang-307459113/;;", "or_profile": "~Qinyi_Chen1;~Jason_Cheuk_Nam_Liang1;~Negin_Golrezaei1;~Djallel_Bouneffouf2", "aff": "Massachusetts Institute of Technology;;Massachusetts Institute of Technology;", "aff_domain": "mit.edu;;mit.edu;", "position": "PhD student;;Assistant Professor;", "bibtex": "@inproceedings{\nchen2024interpolating,\ntitle={Interpolating Item and User Fairness in Multi-Sided Recommendations},\nauthor={Qinyi Chen and Jason Cheuk Nam Liang and Negin Golrezaei and Djallel Bouneffouf},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=tAOg1HdvGy}\n}", "github": "", "reviewers": "Pu7z;pMvw;zhjK;kac1", "pdf_size": 1147388, "rating": "5;6;6;7", "confidence": "2;2;2;3", "soundness": "3;3;3;4", "novelty": "3;3;3;3", "presentation": "3;4;3;3", "wc_summary": "81;105;94;182", "wc_strengths": "95;84;80;43", "wc_weaknesses": "115;74;102;97", "wc_questions": "101;34;89;63", "wc_limitations": "1;1;132;4", "wc_review": "393;298;497;389", "wc_reply_reviewers": "23;15;14;13", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 2.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 115.5, 39.322385482063524 ], "wc_strengths_avg": [ 75.5, 19.551214796017153 ], "wc_weaknesses_avg": [ 97.0, 14.815532390029054 ], "wc_questions_avg": [ 71.75, 25.762133063859444 ], "wc_limitations_avg": [ 34.5, 56.30497313737038 ], "wc_review_avg": [ 394.25, 70.44634483065818 ], "wc_reply_reviewers_avg": [ 16.25, 3.960744879438715 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14205589864499497338&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "mit.edu;;mit.edu;", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Optimal Algorithms for Augmented Testing of Discrete Distributions", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93354", "id": "tAlMAcqK9s", "proceeding": "", "pdf": "https://openreview.net/pdf?id=tAlMAcqK9s", "openreview": "https://openreview.net/forum?id=tAlMAcqK9s", "poster": "", "project": "", "author_site": "Maryam Aliakbarpour, Piotr Indyk, Ronitt Rubinfeld, Sandeep Silwal", "tldr": "", "abstract": "We consider the problem of hypothesis testing for discrete distributions. In the standard model, where we have sample access to an underlying distribution $p$, extensive research has established optimal bounds for uniformity testing, identity testing (goodness of fit), and closeness testing (equivalence or two-sample testing). We explore these problems in a setting where a predicted data distribution, possibly derived from historical data or predictive machine learning models, is available. We demonstrate that such a predictor can indeed reduce the number of samples required for all three property testing tasks. The reduction in sample complexity depends directly on the predictor\u2019s quality, measured by its total variation distance from $p$. A key advantage of our algorithms is their adaptability to the precision of the prediction. Specifically, our algorithms can self-adjust their sample complexity based on the accuracy of the available prediction, operating without any prior knowledge of the estimation\u2019s accuracy (i.e. they are consistent). Additionally, we never use more samples than the standard approaches require, even if the predictions provide no meaningful information (i.e. they are also robust). We provide lower bounds to indicate that the improvements in sample complexity achieved by our algorithms are information-theoretically optimal. Furthermore, experimental results show that the performance of our algorithms on real data significantly exceeds our worst-case guarantees for sample complexity, demonstrating the practicality of our approach.", "keywords": "distribution testing;learning-augmented algorithms;data driven algorithm;hypothesis testing;hypothesis selection;distribution learning", "primary_area": "learning_theory", "supplementary_material": "/attachment/933b6c0b581e7616de8f90ae6197e3326555bb85.zip", "author": "Maryam Aliakbarpour;Piotr Indyk;Ronitt Rubinfeld;Sandeep Silwal", "authorids": "~Maryam_Aliakbarpour1;~Piotr_Indyk1;~Ronitt_Rubinfeld1;~Sandeep_Silwal1", "gender": "F;;F;M", "homepage": "https://maryamaliakbarpour.com;https://people.csail.mit.edu/indyk/;http://people.csail.mit.edu/ronitt/;https://sandeepsilwal.com", "dblp": "175/1689;i/PiotrIndyk;;225/4637", "google_scholar": "Q0crxvwAAAAJ;oOwNKsAAAAAJ;https://scholar.google.com.tw/citations?user=pZhZndYAAAAJ;MnDnUvcAAAAJ", "orcid": "0000-0001-5064-3221;;;", "linkedin": ";;;", "or_profile": "~Maryam_Aliakbarpour1;~Piotr_Indyk1;~Ronitt_Rubinfeld1;~Sandeep_Silwal1", "aff": "University of California, Berkeley;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "berkeley.edu;mit.edu;mit.edu;mit.edu", "position": "Research Fellow;Full Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\naliakbarpour2024optimal,\ntitle={Optimal Algorithms for Augmented Testing of Discrete Distributions},\nauthor={Maryam Aliakbarpour and Piotr Indyk and Ronitt Rubinfeld and Sandeep Silwal},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=tAlMAcqK9s}\n}", "github": "", "reviewers": "qguF;bNNx;Zhut;BZRn", "pdf_size": 679062, "rating": "6;7;7;7", "confidence": "2;3;1;4", "soundness": "3;3;3;3", "novelty": "3;3;4;3", "presentation": "2;3;4;3", "wc_summary": "80;59;67;490", "wc_strengths": "38;10;67;130", "wc_weaknesses": "57;20;70;88", "wc_questions": "31;195;63;83", "wc_limitations": "4;1;1;3", "wc_review": "210;285;268;794", "wc_reply_reviewers": "11;177;10;0", "wc_reply_authors": "0;23;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;2;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 2.5, 1.118033988749895 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 174.0, 182.596549803111 ], "wc_strengths_avg": [ 61.25, 44.516148755255095 ], "wc_weaknesses_avg": [ 58.75, 24.933661985356263 ], "wc_questions_avg": [ 93.0, 61.741396161732524 ], "wc_limitations_avg": [ 2.25, 1.299038105676658 ], "wc_review_avg": [ 389.25, 235.33101686773037 ], "wc_reply_reviewers_avg": [ 49.5, 73.73771084051904 ], "wc_reply_authors_avg": [ 5.75, 9.959292143521045 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.2581988897471611, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6527422467353723989&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "berkeley.edu;mit.edu;mit.edu;mit.edu", "author_num": 4, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "University of California, Berkeley;Massachusetts Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.berkeley.edu;https://web.mit.edu", "aff_unique_abbr": "UC Berkeley;MIT", "aff_campus_unique_index": "0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Gorilla: Large Language Model Connected with Massive APIs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93353", "id": "tBRNC6YemY", "proceeding": "", "pdf": "https://openreview.net/pdf?id=tBRNC6YemY", "openreview": "https://openreview.net/forum?id=tBRNC6YemY", "poster": "/media/PosterPDFs/NeurIPS%202024/93353.png?t=1733815559.6167104", "project": "", "author_site": "Shishir G Patil, Tianjun Zhang, Xin Wang, Joseph Gonzalez", "tldr": "", "abstract": "Large Language Models (LLMs) have seen an impressive wave of advances, with\nmodels now excelling in a variety of tasks, such as mathematical reasoning and\nprogram synthesis. However, their potential to effectively use tools via API calls\nremains unfulfilled. This is a challenging task even for today\u2019s state-of-the-art\nLLMs such as GPT-4 largely due to their unawareness of what APIs are available\nand how to use them in a frequently updated tool set. We develop Gorilla, a\nfinetuned LLaMA model that surpasses the performance of GPT-4 on writing API\ncalls. Trained with the novel Retriever Aware Training (RAT), when combined\nwith a document retriever, Gorilla demonstrates a strong capability to adapt to\ntest-time document changes, allowing flexible user updates or version changes.\nIt also substantially mitigates the issue of hallucination, commonly encountered\nwhen prompting LLMs directly. To evaluate the model\u2019s ability, we introduce\nAPIBench, a comprehensive dataset consisting of HuggingFace, TorchHub, and\nTensorHub APIs. The successful integration of the retrieval system with Gorilla\ndemonstrates the potential for LLMs to use tools more accurately, keep up with\nfrequently updated documentation, and consequently increase the reliability and\napplicability of their outputs. Gorilla\u2019s code, model, data, and demo are available\nat: https://gorilla.cs.berkeley.edu", "keywords": "LLM;Tool Use;APIs;Function Calling", "primary_area": "generative_models", "supplementary_material": "", "author": "Shishir G Patil;Tianjun Zhang;Xin Wang;Joseph E. Gonzalez", "authorids": "~Shishir_G_Patil1;~Tianjun_Zhang1;~Xin_Wang1;~Joseph_E._Gonzalez1", "gender": "M;;F;M", "homepage": "https://shishirpatil.github.io/;https://tianjunz.github.io;https://people.eecs.berkeley.edu/~xinw/;http://eecs.berkeley.edu/~jegonzal", "dblp": "251/1906.html;;;61/8262", "google_scholar": "JTtnJUIAAAAJ;UE9jz_MAAAAJ;e9gUdKwAAAAJ;https://scholar.google.com.tw/citations?user=gM2WW9UAAAAJ", "orcid": ";;;0000-0003-2921-956X", "linkedin": ";;xin-wang-aa83a577;", "or_profile": "~Shishir_G_Patil1;~Tianjun_Zhang1;~Xin_Wang1;~Joseph_E._Gonzalez1", "aff": "University of California, Berkeley;University of California, Berkeley;Microsoft;University of California, Berkeley", "aff_domain": "berkeley.edu;berkeley.edu;microsoft.com;berkeley.edu", "position": "PhD student;PhD student;Senior Researcher;Associate Professor", "bibtex": "@inproceedings{\npatil2024gorilla,\ntitle={Gorilla: Large Language Model Connected with Massive {API}s},\nauthor={Shishir G Patil and Tianjun Zhang and Xin Wang and Joseph E. Gonzalez},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=tBRNC6YemY}\n}", "github": "", "reviewers": "N9sF;jdAv;Umrp;MqVN", "pdf_size": 1214009, "rating": "6;6;6;7", "confidence": "3;4;4;4", "soundness": "3;2;2;3", "novelty": "3;3;1;4", "presentation": "4;2;3;3", "wc_summary": "61;79;130;47", "wc_strengths": "114;83;90;71", "wc_weaknesses": "115;408;289;191", "wc_questions": "111;78;60;139", "wc_limitations": "26;18;5;37", "wc_review": "427;666;574;485", "wc_reply_reviewers": "83;125;71;0", "wc_reply_authors": "0;0;0;310", "reply_reviewers": "1;1;2;0", "reply_authors": "1;1;1;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 1.0897247358851685 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 79.25, 31.419540098480116 ], "wc_strengths_avg": [ 89.5, 15.692354826475215 ], "wc_weaknesses_avg": [ 250.75, 109.75968066644509 ], "wc_questions_avg": [ 97.0, 30.37268509697488 ], "wc_limitations_avg": [ 21.5, 11.672617529928752 ], "wc_review_avg": [ 538.0, 90.56765427016424 ], "wc_reply_reviewers_avg": [ 69.75, 44.985414302860434 ], "wc_reply_authors_avg": [ 77.5, 134.23393758658798 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 507, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2640931771833113617&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 8, "email": "berkeley.edu;berkeley.edu;microsoft.com;berkeley.edu", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of California, Berkeley;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.berkeley.edu;https://www.microsoft.com", "aff_unique_abbr": "UC Berkeley;Microsoft", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Physics-Informed Variational State-Space Gaussian Processes", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93352", "id": "tCf7S75xFa", "proceeding": "", "pdf": "https://openreview.net/pdf?id=tCf7S75xFa", "openreview": "https://openreview.net/forum?id=tCf7S75xFa", "poster": "", "project": "", "author_site": "Oliver Hamelijnck, Arno Solin, Theodoros Damoulas", "tldr": "", "abstract": "Differential equations are important mechanistic models that are integral to many scientific and engineering applications. With the abundance of available data there has been a growing interest in data-driven physics-informed models. Gaussian processes (GPs) are particularly suited to this task as they can model complex, non-linear phenomena whilst incorporating prior knowledge and quantifying uncertainty. Current approaches have found some success but are limited as they either achieve poor computational scalings or focus only on the temporal setting. This work addresses these issues by introducing a variational spatio-temporal state-space GP that handles linear and non-linear physical constraints while achieving efficient linear-in-time computation costs. We demonstrate our methods in a range of synthetic and real-world settings and outperform the current state-of-the-art in both predictive and computational performance.", "keywords": "gaussian processes;variational approximations;state space gaussian processes;physics informed gaussian processes", "primary_area": "probabilistic_methods", "supplementary_material": "/attachment/9461298fce1715a838e9758280749bfe546e0f0a.zip", "author": "Oliver Hamelijnck;Arno Solin;Theodoros Damoulas", "authorids": "~Oliver_Hamelijnck1;~Arno_Solin1;~Theodoros_Damoulas1", "gender": "M;;M", "homepage": "https://ohamelijnck.github.io;http://arno.solin.fi;https://warwick.ac.uk/fac/sci/statistics/staff/academic-research/damoulas/", "dblp": ";98/11225;31/1929", "google_scholar": ";U_fJCnAAAAAJ;https://scholar.google.co.uk/citations?user=sRg-VkwAAAAJ", "orcid": ";0000-0002-0958-7886;", "linkedin": ";asolin/;", "or_profile": "~Oliver_Hamelijnck1;~Arno_Solin1;~Theo_Damoulas1", "aff": "Alan Turing Institute;Aalto University;University of Warwick", "aff_domain": "turing.ac.uk;aalto.fi;warwick.ac.uk", "position": "PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nhamelijnck2024physicsinformed,\ntitle={Physics-Informed Variational State-Space Gaussian Processes},\nauthor={Oliver Hamelijnck and Arno Solin and Theodoros Damoulas},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=tCf7S75xFa}\n}", "github": "", "reviewers": "4ngK;BaEt;zAxE;X3gM;z1FQ", "pdf_size": 1021642, "rating": "4;6;6;6;7", "confidence": "2;2;3;4;3", "soundness": "1;3;3;3;3", "novelty": "2;2;3;2;3", "presentation": "1;3;3;2;3", "wc_summary": "38;47;43;104;98", "wc_strengths": "38;100;46;40;62", "wc_weaknesses": "172;214;75;350;82", "wc_questions": "196;133;137;1;70", "wc_limitations": "1;42;4;1;1", "wc_review": "445;536;305;496;313", "wc_reply_reviewers": "56;231;0;90;12", "wc_reply_authors": "10;129;0;0;0", "reply_reviewers": "1;1;0;1;1", "reply_authors": "2;2;1;1;1", "rating_avg": [ 5.8, 0.9797958971132712 ], "confidence_avg": [ 2.8, 0.7483314773547882 ], "soundness_avg": [ 2.6, 0.8000000000000002 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.4, 0.8 ], "wc_summary_avg": [ 66.0, 28.781938781117578 ], "wc_strengths_avg": [ 57.2, 22.99913041834408 ], "wc_weaknesses_avg": [ 178.6, 100.73648792766204 ], "wc_questions_avg": [ 107.4, 66.48488550039025 ], "wc_limitations_avg": [ 9.8, 16.14187101918486 ], "wc_review_avg": [ 419.0, 94.36736724101188 ], "wc_reply_reviewers_avg": [ 77.8, 83.03830441428823 ], "wc_reply_authors_avg": [ 27.8, 50.74800488689186 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.4909902530309828, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:KBjc4d9n7LYJ:scholar.google.com/&scioq=Physics-Informed+Variational+State-Space+Gaussian+Processes&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "turing.ac.uk;aalto.fi;warwick.ac.uk", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Alan Turing Institute;Aalto University;University of Warwick", "aff_unique_dep": ";;", "aff_unique_url": "https://www.turing.ac.uk;https://www.aalto.fi;https://www.warwick.ac.uk", "aff_unique_abbr": "ATI;Aalto;Warwick", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United Kingdom;Finland" }, { "title": "SEL-BALD: Deep Bayesian Active Learning with Selective Labels", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93351", "id": "tDMTwto6jv", "proceeding": "", "pdf": "https://openreview.net/pdf?id=tDMTwto6jv", "openreview": "https://openreview.net/forum?id=tDMTwto6jv", "poster": "", "project": "", "author_site": "Ruijiang Gao, Mingzhang Yin, Maytal Saar-Tsechansky", "tldr": "", "abstract": "Machine learning systems are widely used in many high-stakes contexts in which experimental designs for assigning treatments are infeasible. When evaluating decisions is costly, such as investigating fraud cases, or evaluating biopsy decisions, a sample-efficient strategy is needed. However, while existing active learning methods assume humans will always label the instances selected by the machine learning model, in many critical applications, humans may decline to label instances selected by the machine learning model due to reasons such as regulation constraint, domain knowledge, or algorithmic aversion, thus not sample efficient. \nIn this paper, we study the Active Learning with Instance Rejection (ALIR) problem, which considers the human discretion behavior for high-stakes decision making problems. We propose new active learning algorithms under deep bayesian active learning for selective labeling (SEL-BALD) to address the ALIR problem. Our algorithms consider how to acquire information for both the machine learning model and the human discretion model. We conduct experiments on both synthetic and real-world datasets to demonstrate the effectiveness of our proposed algorithms.", "keywords": "Bayesian Active Learning with Disagreement; Selective Labels;", "primary_area": "active_learning", "supplementary_material": "", "author": "Ruijiang Gao;Mingzhang Yin;Maytal Saar-Tsechansky", "authorids": "~Ruijiang_Gao2;~Mingzhang_Yin1;~Maytal_Saar-Tsechansky1", "gender": ";M;F", "homepage": "https://github.com/ruijiang81;http://mingzhang-yin.github.io;http://www.maytals.com", "dblp": ";200/8662;09/4099", "google_scholar": "iRHyFa4AAAAJ;oAEsILEAAAAJ;SUAbOcgAAAAJ", "orcid": ";0000-0002-5216-2437;", "linkedin": ";mingzhang-yin-19930406/;", "or_profile": "~Ruijiang_Gao2;~Mingzhang_Yin1;~Maytal_Saar-Tsechansky1", "aff": "University of Texas at Dallas;University of Florida;University of Texas at Austin", "aff_domain": "utdallas.edu;ufl.edu;utexas.edu", "position": "Assistant Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\ngao2024selbald,\ntitle={{SEL}-{BALD}: Deep Bayesian Active Learning for Selective Labeling with Instance Rejection},\nauthor={Ruijiang Gao and Mingzhang Yin and Maytal Saar-Tsechansky},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=tDMTwto6jv}\n}", "github": "", "reviewers": "WcLX;KKqr;fApQ;dpcV", "pdf_size": 2765774, "rating": "4;5;5;8", "confidence": "4;4;4;4", "soundness": "3;3;3;3", "novelty": "2;3;3;4", "presentation": "3;3;2;4", "wc_summary": "102;48;81;157", "wc_strengths": "35;138;93;146", "wc_weaknesses": "159;313;250;154", "wc_questions": "35;54;34;2", "wc_limitations": "6;1;3;7", "wc_review": "337;554;461;466", "wc_reply_reviewers": "105;129;39;48", "wc_reply_authors": "412;35;100;20", "reply_reviewers": "2;1;1;1", "reply_authors": "5;2;3;2", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 97.0, 39.62953444086872 ], "wc_strengths_avg": [ 103.0, 44.15314258351267 ], "wc_weaknesses_avg": [ 219.0, 66.37394066951276 ], "wc_questions_avg": [ 31.25, 18.673175948402566 ], "wc_limitations_avg": [ 4.25, 2.384848003542364 ], "wc_review_avg": [ 454.5, 77.26739286400182 ], "wc_reply_reviewers_avg": [ 80.25, 37.85085864283663 ], "wc_reply_authors_avg": [ 141.75, 158.89992920073942 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.0, 1.224744871391589 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:TuH0ZSV2gusJ:scholar.google.com/&scioq=SEL-BALD:+Deep+Bayesian+Active+Learning+with+Selective+Labels&hl=en&as_sdt=0,44", "gs_version_total": 2, "email": "utdallas.edu;ufl.edu;utexas.edu", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Texas at Dallas;University of Florida;University of Texas at Austin", "aff_unique_dep": ";;", "aff_unique_url": "https://www.utdallas.edu;https://www.ufl.edu;https://www.utexas.edu", "aff_unique_abbr": "UT Dallas;UF;UT Austin", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Dallas;;Austin", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Computation-Aware Gaussian Processes: Model Selection And Linear-Time Inference", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93350", "id": "tDvFa5OJyS", "proceeding": "", "pdf": "https://openreview.net/pdf?id=tDvFa5OJyS", "openreview": "https://openreview.net/forum?id=tDvFa5OJyS", "poster": "/media/PosterPDFs/NeurIPS%202024/93350.png?t=1732656997.2351208", "project": "", "author_site": "Jonathan Wenger, Kaiwen Wu, Philipp Hennig, Jacob Gardner, Geoff Pleiss, John Cunningham", "tldr": "", "abstract": "Model selection in Gaussian processes scales prohibitively with the size of the training dataset, both in time and memory.\nWhile many approximations exist, all incur inevitable approximation error.\nRecent work accounts for this error in the form of computational uncertainty, which enables---at the cost of quadratic complexity---an explicit tradeoff between computational efficiency and precision.\nHere we extend this development to model selection, which requires significant enhancements to the existing approach, including linear-time scaling in the size of the dataset.\nWe propose a novel training loss for hyperparameter optimization and demonstrate empirically that the resulting method can outperform SGPR, CGGP and SVGP, state-of-the-art methods for GP model selection, on medium to large-scale datasets.\nOur experiments show that model selection for computation-aware GPs trained on 1.8 million data points can be done within a few hours on a single GPU.\nAs a result of this work, Gaussian processes can be trained on large-scale datasets without significantly compromising their ability to quantify uncertainty---a fundamental prerequisite for optimal decision-making.", "keywords": "Gaussian Processes;Model Selection;Approximate Inference;Variational Inference;Probabilistic Numerics", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Jonathan Wenger;Kaiwen Wu;Philipp Hennig;Jacob R. Gardner;Geoff Pleiss;John Patrick Cunningham", "authorids": "~Jonathan_Wenger1;~Kaiwen_Wu2;~Philipp_Hennig1;~Jacob_R._Gardner1;~Geoff_Pleiss1;~John_Patrick_Cunningham1", "gender": "M;;M;;M;M", "homepage": "https://jonathanwenger.netlify.app/;;http://mml.inf.uni-tuebingen.de;;http://geoffpleiss.com;stat.columbia.edu/~cunningham", "dblp": "242/9063;;08/9077;;199/1693.html;51/4077", "google_scholar": "https://scholar.google.com/citations?hl=de;;https://scholar.google.de/citations?user=UeG5w08AAAAJ;;XO8T-Y4AAAAJ;88cU_4UAAAAJ", "orcid": "0000-0003-2261-1331;;0000-0001-7293-6092;;0000-0002-7009-0967;", "linkedin": ";;;;;", "or_profile": "~Jonathan_Wenger1;~Kaiwen_Wu2;~Philipp_Hennig1;~Jacob_R._Gardner1;~Geoff_Pleiss1;~John_Patrick_Cunningham1", "aff": "Columbia University;;University of T\u00fcbingen;;Vector Institute;Columbia University", "aff_domain": "columbia.edu;;uni-tuebingen.de;;vectorinstitute.ai;columbia.edu", "position": "Postdoc;;Full Professor;;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nwenger2024computationaware,\ntitle={Computation-Aware Gaussian Processes: Model Selection And Linear-Time Inference},\nauthor={Jonathan Wenger and Kaiwen Wu and Philipp Hennig and Jacob R. Gardner and Geoff Pleiss and John Patrick Cunningham},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=tDvFa5OJyS}\n}", "github": "", "reviewers": "7YnY;wkY4;LXxc", "pdf_size": 2582895, "rating": "5;7;7", "confidence": "4;4;5", "soundness": "3;2;3", "novelty": "3;3;4", "presentation": "3;3;4", "wc_summary": "55;114;63", "wc_strengths": "48;126;34", "wc_weaknesses": "261;361;184", "wc_questions": "126;307;467", "wc_limitations": "12;846;17", "wc_review": "502;1754;765", "wc_reply_reviewers": "267;356;24", "wc_reply_authors": "800;1556;1262", "reply_reviewers": "3;1;1", "reply_authors": "5;4;4", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 77.33333333333333, 26.132142830026183 ], "wc_strengths_avg": [ 69.33333333333333, 40.47495796442811 ], "wc_weaknesses_avg": [ 268.6666666666667, 72.46301738741556 ], "wc_questions_avg": [ 300.0, 139.3006341215526 ], "wc_limitations_avg": [ 291.6666666666667, 391.9781739955541 ], "wc_review_avg": [ 1007.0, 539.0108224021728 ], "wc_reply_reviewers_avg": [ 215.66666666666666, 140.31472560719428 ], "wc_reply_authors_avg": [ 1206.0, 311.16555079250014 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.9428090415820634 ], "reply_authors_avg": [ 4.333333333333333, 0.4714045207910317 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12211776889714844498&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "columbia.edu;;uni-tuebingen.de;;vectorinstitute.ai;columbia.edu", "author_num": 6, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Columbia University;University of T\u00fcbingen;Vector Institute", "aff_unique_dep": ";;", "aff_unique_url": "https://www.columbia.edu;https://www.uni-tuebingen.de/;https://vectorinstitute.ai/", "aff_unique_abbr": "Columbia;Uni T\u00fcbingen;Vector Institute", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;0", "aff_country_unique": "United States;Germany;Canada" }, { "title": "Identity Decoupling for Multi-Subject Personalization of Text-to-Image Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93349", "id": "tEEpVPDaRf", "proceeding": "", "pdf": "https://openreview.net/pdf?id=tEEpVPDaRf", "openreview": "https://openreview.net/forum?id=tEEpVPDaRf", "poster": "/media/PosterPDFs/NeurIPS%202024/93349.png?t=1732765747.841763", "project": "", "author_site": "Sangwon Jang, Jaehyeong Jo, Kimin Lee, Sung Ju Hwang", "tldr": "", "abstract": "Text-to-image diffusion models have shown remarkable success in generating personalized subjects based on a few reference images. However, current methods often fail when generating multiple subjects simultaneously, resulting in mixed\nidentities with combined attributes from different subjects. In this work, we present MuDI, a novel framework that enables multi-subject personalization by effectively decoupling identities from multiple subjects. Our main idea is to utilize segmented subjects generated by a foundation model for segmentation (Segment Anything) for both training and inference, as a form of data augmentation for training and initialization for the generation process. Moreover, we further introduce a new metric to better evaluate the performance of our method on multi-subject personalization. Experimental results show that our MuDI can produce high-quality personalized images without identity mixing, even for highly similar subjects as shown in Figure 1. Specifically, in human evaluation, MuDI obtains twice the success rate for personalizing multiple subjects without identity mixing over existing baselines and is preferred over 70% against the strongest baseline.", "keywords": "Text-to-Image Diffusion Models;Multi-subject personalization", "primary_area": "generative_models", "supplementary_material": "/attachment/8395b1483dd930c45db620562d3894ead51062fc.zip", "author": "Sangwon Jang;Jaehyeong Jo;Kimin Lee;Sung Ju Hwang", "authorids": "~Sangwon_Jang1;~Jaehyeong_Jo1;~Kimin_Lee1;~Sung_Ju_Hwang1", "gender": "M;M;M;", "homepage": "https://agwmon.github.io/;https://github.com/harryjo97;https://sites.google.com/view/kiminlee;", "dblp": ";296/2037;183/6849;", "google_scholar": "5SHoqOkAAAAJ;https://scholar.google.com/citations?hl=ko;92M8xv4AAAAJ;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Sangwon_Jang1;~Jaehyeong_Jo1;~Kimin_Lee1;~Sung_Ju_Hwang1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;", "aff_domain": "kaist.ac.kr;kaist.ac.kr;kaist.edu;", "position": "MS student;MS student;Assistant Professor;", "bibtex": "@inproceedings{\njang2024identity,\ntitle={Identity Decoupling for Multi-Subject Personalization of Text-to-Image Models},\nauthor={Sangwon Jang and Jaehyeong Jo and Kimin Lee and Sung Ju Hwang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=tEEpVPDaRf}\n}", "github": "", "reviewers": "ivq5;ViAz;amUB;JDvE", "pdf_size": 44358291, "rating": "5;6;6;6", "confidence": "4;4;2;5", "soundness": "2;3;3;3", "novelty": "3;3;3;3", "presentation": "3;4;3;3", "wc_summary": "104;56;95;55", "wc_strengths": "64;52;21;123", "wc_weaknesses": "136;68;15;212", "wc_questions": "50;80;166;63", "wc_limitations": "2;4;1;21", "wc_review": "356;260;298;474", "wc_reply_reviewers": "44;0;0;0", "wc_reply_authors": "106;0;0;0", "reply_reviewers": "1;0;0;0", "reply_authors": "2;1;1;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 77.5, 22.23173407541571 ], "wc_strengths_avg": [ 65.0, 36.979724174201195 ], "wc_weaknesses_avg": [ 107.75, 73.90661337119974 ], "wc_questions_avg": [ 89.75, 45.29003753586433 ], "wc_limitations_avg": [ 7.0, 8.154753215150045 ], "wc_review_avg": [ 347.0, 80.90117428072351 ], "wc_reply_reviewers_avg": [ 11.0, 19.05255888325765 ], "wc_reply_authors_avg": [ 26.5, 45.89934640057525 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.13245323570650439, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1861171738952750357&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 5, "email": "kaist.ac.kr;kaist.ac.kr;kaist.edu;", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "Graph Neural Flows for Unveiling Systemic Interactions Among Irregularly Sampled Time Series", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93348", "id": "tFB5SsabVb", "proceeding": "", "pdf": "https://openreview.net/pdf?id=tFB5SsabVb", "openreview": "https://openreview.net/forum?id=tFB5SsabVb", "poster": "/media/PosterPDFs/NeurIPS%202024/93348.png?t=1731581463.3008184", "project": "", "author_site": "Giangiacomo Mercatali, Andre Freitas, Jie Chen", "tldr": "", "abstract": "Interacting systems are prevalent in nature. It is challenging to accurately predict the dynamics of the system if its constituent components are analyzed independently. We develop a graph-based model that unveils the systemic interactions of time series observed at irregular time points, by using a directed acyclic graph to model the conditional dependencies (a form of causal notation) of the system components and learning this graph in tandem with a continuous-time model that parameterizes the solution curves of ordinary differential equations (ODEs). Our technique, a graph neural flow, leads to substantial enhancements over non-graph-based methods, as well as graph-based methods without the modeling of conditional dependencies. We validate our approach on several tasks, including time series classification and forecasting, to demonstrate its efficacy.", "keywords": "Graph learning;neural flows;time series", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Giangiacomo Mercatali;Andre Freitas;Jie Chen", "authorids": "~Giangiacomo_Mercatali1;~Andre_Freitas1;~Jie_Chen1", "gender": ";;", "homepage": ";http://andrefreitas.org;https://jiechenjiechen.github.io", "dblp": ";47/9409.html;92/6289-7", "google_scholar": ";ExmHmMoAAAAJ;Z-lkme8AAAAJ", "orcid": ";;", "linkedin": ";andrefreitas/;", "or_profile": "~Giangiacomo_Mercatali1;~Andre_Freitas1;~Jie_Chen1", "aff": ";University of Manchester;International Business Machines", "aff_domain": ";manchester.ac.uk;ibm.com", "position": ";Associate Professor;Research Staff Member", "bibtex": "@inproceedings{\nmercatali2024graph,\ntitle={Graph Neural Flows for Unveiling Systemic Interactions Among Irregularly Sampled Time Series},\nauthor={Giangiacomo Mercatali and Andre Freitas and Jie Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=tFB5SsabVb}\n}", "github": "", "reviewers": "zrVL;4eJC;3M7x;ufSP;2pqA", "pdf_size": 845085, "rating": "3;5;6;6;8", "confidence": "3;3;2;4;4", "soundness": "3;3;3;3;4", "novelty": "3;2;2;3;3", "presentation": "3;3;3;3;3", "wc_summary": "41;55;126;128;112", "wc_strengths": "30;64;48;71;224", "wc_weaknesses": "41;308;349;127;57", "wc_questions": "5;88;66;2;52", "wc_limitations": "2;7;23;5;64", "wc_review": "119;522;612;333;509", "wc_reply_reviewers": "18;73;49;9;0", "wc_reply_authors": "146;0;0;0;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "2;1;1;1;1", "rating_avg": [ 5.6, 1.624807680927192 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 92.4, 36.93561966449189 ], "wc_strengths_avg": [ 87.4, 69.74697126040671 ], "wc_weaknesses_avg": [ 176.4, 128.17113559612397 ], "wc_questions_avg": [ 42.6, 33.938768392503576 ], "wc_limitations_avg": [ 20.2, 23.077261535979524 ], "wc_review_avg": [ 419.0, 175.13080825485847 ], "wc_reply_reviewers_avg": [ 29.8, 27.18381871628782 ], "wc_reply_authors_avg": [ 29.2, 58.4 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.39477101697586137, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8497167121983913977&as_sdt=40000005&sciodt=0,22&hl=en", "gs_version_total": 6, "email": ";manchester.ac.uk;ibm.com", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "University of Manchester;International Business Machines Corporation", "aff_unique_dep": ";", "aff_unique_url": "https://www.manchester.ac.uk;https://www.ibm.com", "aff_unique_abbr": "UoM;IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "Verified Safe Reinforcement Learning for Neural Network Dynamic Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93347", "id": "tGDUDKirAy", "proceeding": "", "pdf": "https://openreview.net/pdf?id=tGDUDKirAy", "openreview": "https://openreview.net/forum?id=tGDUDKirAy", "poster": "/media/PosterPDFs/NeurIPS%202024/93347.png?t=1733528123.255519", "project": "", "author_site": "Junlin Wu, Huan Zhang, Yevgeniy Vorobeychik", "tldr": "", "abstract": "Learning reliably safe autonomous control is one of the core problems in trustworthy autonomy. However, training a controller that can be formally verified to be safe remains a major challenge. We introduce a novel approach for learning verified safe control policies in nonlinear neural dynamical systems while maximizing overall performance. Our approach aims to achieve safety in the sense of finite-horizon reachability proofs, and is comprised of three key parts. The first is a novel curriculum learning scheme that iteratively increases the verified safe horizon. The second leverages the iterative nature of gradient-based learning to leverage incremental verification, reusing information from prior verification runs. Finally, we learn multiple verified initial-state-dependent controllers, an idea that is especially valuable for more complex domains where learning a single universal verified safe controller is extremely challenging. Our experiments on five safe control problems demonstrate that our trained controllers can achieve verified safety over horizons that are as much as an order of magnitude longer than state-of-the-art baselines, while maintaining high reward, as well as a perfect safety record over entire episodes. Our code is available at https://github.com/jlwu002/VSRL.", "keywords": "neural network;formal verification;safe reinforcement learning", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Junlin Wu;Huan Zhang;Yevgeniy Vorobeychik", "authorids": "~Junlin_Wu2;~Huan_Zhang1;~Yevgeniy_Vorobeychik1", "gender": ";M;M", "homepage": "https://jlwu002.github.io/;http://huan-zhang.com;http://vorobeychik.com", "dblp": "188/8292-1;23/1797-1.html;70/2217", "google_scholar": ";LTa3GzEAAAAJ;https://scholar.google.com.tw/citations?user=ptI-HHkAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Junlin_Wu2;~Huan_Zhang1;~Yevgeniy_Vorobeychik1", "aff": "Washington University, St. Louis;University of Illinois, Urbana Champaign;Washington University, St. Louis", "aff_domain": "wustl.edu;uiuc.edu;wustl.edu", "position": "PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nwu2024verified,\ntitle={Verified Safe Reinforcement Learning for Neural Network Dynamic Models},\nauthor={Junlin Wu and Huan Zhang and Yevgeniy Vorobeychik},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=tGDUDKirAy}\n}", "github": "", "reviewers": "gxvV;11TB;8TUT;kLbx", "pdf_size": 341618, "rating": "6;6;7;7", "confidence": "4;4;4;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;4;3", "wc_summary": "69;79;94;60", "wc_strengths": "113;76;48;39", "wc_weaknesses": "62;122;34;169", "wc_questions": "50;106;38;9", "wc_limitations": "55;117;1;10", "wc_review": "349;500;215;287", "wc_reply_reviewers": "24;20;5;23", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 75.5, 12.619429464123963 ], "wc_strengths_avg": [ 69.0, 28.83574171059243 ], "wc_weaknesses_avg": [ 96.75, 52.44699705416889 ], "wc_questions_avg": [ 50.75, 35.20919624189112 ], "wc_limitations_avg": [ 45.75, 45.94221914535692 ], "wc_review_avg": [ 337.75, 104.99374981397702 ], "wc_reply_reviewers_avg": [ 18.0, 7.648529270389178 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8711221387983955251&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "wustl.edu;uiuc.edu;wustl.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Washington University in St. Louis;University of Illinois Urbana-Champaign", "aff_unique_dep": ";", "aff_unique_url": "https://wustl.edu;https://illinois.edu", "aff_unique_abbr": "WUSTL;UIUC", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "St. Louis;Urbana-Champaign", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "DG-SLAM: Robust Dynamic Gaussian Splatting SLAM with Hybrid Pose Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93346", "id": "tGozvLTDY3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=tGozvLTDY3", "openreview": "https://openreview.net/forum?id=tGozvLTDY3", "poster": "/media/PosterPDFs/NeurIPS%202024/93346.png?t=1731759446.8387349", "project": "", "author_site": "Yueming Xu, Haochen Jiang, Zhongyang Xiao, Jianfeng Feng, Li Zhang", "tldr": "", "abstract": "Achieving robust and precise pose estimation in dynamic scenes is a significant research challenge in Visual Simultaneous Localization and Mapping (SLAM). Recent advancements integrating Gaussian Splatting into SLAM systems have proven effective in creating high-quality renderings using explicit 3D Gaussian models, significantly improving environmental reconstruction fidelity. However, these approaches depend on a static environment assumption and face challenges in dynamic environments due to inconsistent observations of geometry and photometry. To address this problem, we propose DG-SLAM, the first robust dynamic visual SLAM system grounded in 3D Gaussians, which provides precise camera pose estimation alongside high-fidelity reconstructions. Specifically, we propose effective strategies, including motion mask generation, adaptive Gaussian point management, and a hybrid camera tracking algorithm to improve the accuracy and robustness of pose estimation. Extensive experiments demonstrate that DG-SLAM delivers state-of-the-art performance in camera pose estimation, map reconstruction, and novel-view synthesis in dynamic scenes, outperforming existing methods meanwhile preserving real-time rendering ability.", "keywords": "SLAM;Deep learning method;Gaussian splatting;Hybrid pose estimation;Dynamic environment", "primary_area": "robotics", "supplementary_material": "", "author": "Yueming Xu;Haochen Jiang;Zhongyang Xiao;Jianfeng Feng;Li Zhang", "authorids": "~Yueming_Xu1;~Haochen_Jiang1;~Zhongyang_Xiao1;~Jianfeng_Feng2;~Li_Zhang5", "gender": "M;M;M;M;M", "homepage": "https://github.com/ming82871;;https://github.com/Xiaozhongyang;http://www.robots.ox.ac.uk/~lz/;https://www.dcs.warwick.ac.uk/~feng/", "dblp": "42/10611;;;89/5992-40;19/6212", "google_scholar": ";;;-wOTCE8AAAAJ;https://scholar.google.co.uk/citations?user=0MtAVz4AAAAJ", "orcid": ";0000-0003-3081-0891;;;", "linkedin": ";;;;", "or_profile": "~Yueming_Xu1;~Haochen_Jiang1;~Zhongyang_Xiao1;~Li_Zhang5;~Feng_Jian_Feng1", "aff": "Fudan University;Fudan University;;Fudan University;The University of Warwick", "aff_domain": "fudan.edu.cn;fudan.edu.cn;;fudan.edu.cn;warwick.ac.uk", "position": "PhD student;PhD student;;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nxu2024dgslam,\ntitle={{DG}-{SLAM}: Robust Dynamic Gaussian Splatting {SLAM} with Hybrid Pose Optimization},\nauthor={Yueming Xu and Haochen Jiang and Zhongyang Xiao and Jianfeng Feng and Li Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=tGozvLTDY3}\n}", "github": "", "reviewers": "UDnM;fGfv;MuLC;DPxX", "pdf_size": 6458742, "rating": "3;5;5;6", "confidence": "5;4;4;3", "soundness": "2;3;2;2", "novelty": "2;3;2;2", "presentation": "1;3;3;2", "wc_summary": "56;55;49;70", "wc_strengths": "36;68;34;81", "wc_weaknesses": "507;52;332;142", "wc_questions": "5;3;225;3", "wc_limitations": "22;1;15;11", "wc_review": "626;179;655;307", "wc_reply_reviewers": "185;0;27;13", "wc_reply_authors": "870;0;20;20", "reply_reviewers": "1;0;1;1", "reply_authors": "4;1;2;2", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 57.5, 7.697402159170326 ], "wc_strengths_avg": [ 54.75, 20.29008378494283 ], "wc_weaknesses_avg": [ 258.25, 175.61943941375054 ], "wc_questions_avg": [ 59.0, 95.84362263604189 ], "wc_limitations_avg": [ 12.25, 7.595228765481656 ], "wc_review_avg": [ 441.75, 204.09480027673413 ], "wc_reply_reviewers_avg": [ 56.25, 74.94456284481217 ], "wc_reply_authors_avg": [ 227.5, 371.0373970370103 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.9733285267845754, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4780475872133915911&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "fudan.edu.cn;fudan.edu.cn;;fudan.edu.cn;warwick.ac.uk", "author_num": 5, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Fudan University;University of Warwick", "aff_unique_dep": ";", "aff_unique_url": "https://www.fudan.edu.cn;https://warwick.ac.uk", "aff_unique_abbr": "Fudan;Warwick", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "China;United Kingdom" }, { "title": "One-to-Normal: Anomaly Personalization for Few-shot Anomaly Detection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93345", "id": "tIzW3l2uaN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=tIzW3l2uaN", "openreview": "https://openreview.net/forum?id=tIzW3l2uaN", "poster": "/media/PosterPDFs/NeurIPS%202024/93345.png?t=1731383417.9034755", "project": "", "author_site": "Yiyue Li, Shaoting Zhang, Kang Li, Qicheng Lao", "tldr": "", "abstract": "Traditional Anomaly Detection (AD) methods have predominantly relied on unsupervised learning from extensive normal data. Recent AD methods have evolved with the advent of large pre-trained vision-language models, enhancing few-shot anomaly detection capabilities. However, these latest AD methods still exhibit limitations in accuracy improvement. One contributing factor is their direct comparison of a query image's features with those of few-shot normal images. This direct comparison often leads to a loss of precision and complicates the extension of these techniques to more complex domains\u2014an area that remains underexplored in a more refined and comprehensive manner. To address these limitations, we introduce the anomaly personalization method, which performs a personalized one-to-normal transformation of query images using an anomaly-free customized generation model, ensuring close alignment with the normal manifold. Moreover, to further enhance the stability and robustness of prediction results, we propose a triplet contrastive anomaly inference strategy, which incorporates a comprehensive comparison between the query and generated anomaly-free data pool and prompt information. Extensive evaluations across eleven datasets in three domains demonstrate our model's effectiveness compared to the latest AD methods. Additionally, our method has been proven to transfer flexibly to other AD methods, with the generated image data effectively improving the performance of other AD methods.", "keywords": "Few-shot anomaly detection;Diffusion models;Image personalization", "primary_area": "machine_vision", "supplementary_material": "", "author": "Yiyue Li;Shaoting Zhang;Kang Li;Qicheng Lao", "authorids": "~Yiyue_Li1;~Shaoting_Zhang4;~Kang_Li9;~Qicheng_Lao2", "gender": "F;M;M;", "homepage": ";;;", "dblp": "191/0284;53/3894;l/KangLi;222/3004", "google_scholar": "https://scholar.google.com/citations?hl=en;oiBMWK4AAAAJ;;", "orcid": "0000-0001-5435-1699;0000-0002-8719-448X;;", "linkedin": ";;kang-li-484142b/;qicheng-lao-02909871", "or_profile": "~Yiyue_Li1;~Shaoting_Zhang4;~Kang_Li9;~Qicheng_Lao2", "aff": "Sichuan University;Shanghai Artificial Intelligence Laboratory;;Beijing University of Posts and Telecommunications", "aff_domain": "scu.edu.cn;pjlab.org.cn;;bupt.edu.cn", "position": "PhD student;Full Professor;;Assistant Professor", "bibtex": "@inproceedings{\nli2024onetonormal,\ntitle={One-to-Normal: Anomaly Personalization for Few-shot Anomaly Detection},\nauthor={Yiyue Li and Shaoting Zhang and Kang Li and Qicheng Lao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=tIzW3l2uaN}\n}", "github": "", "reviewers": "oVNv;tpYi;Ljby;WVvk", "pdf_size": 16557930, "rating": "5;5;5;7", "confidence": "3;3;5;3", "soundness": "3;3;3;3", "novelty": "3;3;2;3", "presentation": "3;3;4;3", "wc_summary": "72;48;75;67", "wc_strengths": "45;39;17;66", "wc_weaknesses": "188;114;144;40", "wc_questions": "188;145;87;2", "wc_limitations": "67;13;5;9", "wc_review": "560;359;328;184", "wc_reply_reviewers": "0;0;43;19", "wc_reply_authors": "0;0;957;28", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;4;2", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 65.5, 10.5 ], "wc_strengths_avg": [ 41.75, 17.455300054711177 ], "wc_weaknesses_avg": [ 121.5, 53.91428382163673 ], "wc_questions_avg": [ 105.5, 69.67962399439308 ], "wc_limitations_avg": [ 23.5, 25.273503912200223 ], "wc_review_avg": [ 357.75, 134.1461423224686 ], "wc_reply_reviewers_avg": [ 15.5, 17.67059704707229 ], "wc_reply_authors_avg": [ 246.25, 410.510885970153 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Q1mxe90CzIsJ:scholar.google.com/&scioq=One-to-Normal:+Anomaly+Personalization+for+Few-shot+Anomaly+Detection&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "scu.edu.cn;pjlab.org.cn;;bupt.edu.cn", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "Sichuan University;Shanghai Artificial Intelligence Laboratory;Beijing University of Posts and Telecommunications", "aff_unique_dep": ";;", "aff_unique_url": "https://www.scu.edu.cn;http://www.shailab.org/;http://www.bupt.edu.cn/", "aff_unique_abbr": "SCU;Shanghai AI Lab;BUPT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Beijing", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "What Matters in Graph Class Incremental Learning? An Information Preservation Perspective", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93344", "id": "tJGX7tpGO8", "proceeding": "", "pdf": "https://openreview.net/pdf?id=tJGX7tpGO8", "openreview": "https://openreview.net/forum?id=tJGX7tpGO8", "poster": "/media/PosterPDFs/NeurIPS%202024/93344.png?t=1730377198.5423336", "project": "", "author_site": "Jialu Li, Yu Wang, Pengfei Zhu, Wanyu Lin, Qinghua Hu", "tldr": "", "abstract": "Graph class incremental learning (GCIL) requires the model to classify emerging nodes of new classes while remembering old classes. Existing methods are designed to preserve effective information of old models or graph data to alleviate forgetting, but there is no clear theoretical understanding of what matters in information preservation. In this paper, we consider that present practice suffers from high semantic and structural shifts assessed by two devised shift metrics. We provide insights into information preservation in GCIL and find that maintaining graph information can preserve information of old models in theory to calibrate node semantic and graph structure shifts. We correspond graph information into low-frequency local-global information and high-frequency information in spatial domain. Based on the analysis, we propose a framework, Graph Spatial Information Preservation (GSIP). Specifically, for low-frequency information preservation, the old node representations obtained by inputting replayed nodes into the old model are aligned with the outputs of the node and its neighbors in the new model, and then old and new outputs are globally matched after pooling. For high-frequency information preservation, the new node representations are encouraged to imitate the near-neighbor pair similarity of old node representations. GSIP achieves a 10\\% increase in terms of the forgetting metric compared to prior methods on large-scale datasets. Our framework can also seamlessly integrate existing replay designs. The code is available through https://github.com/Jillian555/GSIP.", "keywords": "Graph neural network;Class incremental learning", "primary_area": "graph_neural_networks", "supplementary_material": "/attachment/17d6496e4ecda764de12eb5e35ee9c8d1ec37188.zip", "author": "Jialu Li;Yu Wang;Pengfei Zhu;Wanyu Lin;Qinghua Hu", "authorids": "~Jialu_Li4;~Yu_Wang33;~Pengfei_Zhu1;~Wanyu_Lin1;~Qinghua_Hu1", "gender": "F;M;M;F;M", "homepage": ";https://wangyutju.github.io/;http://aiskyeye.com/;https://wanyu-lin.github.io;http://cic.tju.edu.cn/faculty/huqinghua/index.html", "dblp": "32/11008;02/5889-106;40/6172-1.html;152/1714;", "google_scholar": ";;https://scholar.google.com/citations?hl=zh-TW;vgLANV0AAAAJ;TVSNq_wAAAAJ", "orcid": "0000-0002-6504-8625;;;;0000-0001-7765-8095", "linkedin": ";;;;", "or_profile": "~Jialu_Li4;~Yu_Wang33;~Pengfei_Zhu1;~Wanyu_Lin1;~Qinghua_Hu1", "aff": "Tianjin University;Tianjin University;Tianjin University;The Hong Kong Polytechnic University;Tianjin University", "aff_domain": "tju.edu.cn;tju.edu.cn;tju.edu.cn;polyu.edu.hk;tju.edu.cn", "position": "PhD student;Associate Professor;Full Professor;Assistant Professor;Professor", "bibtex": "@inproceedings{\nli2024what,\ntitle={What Matters in Graph Class Incremental Learning? An Information Preservation Perspective},\nauthor={Jialu Li and Yu Wang and Pengfei Zhu and Wanyu Lin and Qinghua Hu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=tJGX7tpGO8}\n}", "github": "", "reviewers": "BwJ9;T3gk;9jHn;Za5b", "pdf_size": 0, "rating": "5;6;6;6", "confidence": "4;3;4;3", "soundness": "3;3;3;3", "novelty": "3;2;3;2", "presentation": "1;2;2;3", "wc_summary": "103;84;39;136", "wc_strengths": "34;84;74;37", "wc_weaknesses": "83;108;55;55", "wc_questions": "50;5;154;60", "wc_limitations": "19;57;25;1", "wc_review": "289;338;347;289", "wc_reply_reviewers": "0;0;31;18", "wc_reply_authors": "0;0;16;24", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;2;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 90.5, 35.07491981459117 ], "wc_strengths_avg": [ 57.25, 22.060994991160303 ], "wc_weaknesses_avg": [ 75.25, 22.094965489902897 ], "wc_questions_avg": [ 67.25, 54.2004381901106 ], "wc_limitations_avg": [ 25.5, 20.21756661915573 ], "wc_review_avg": [ 315.75, 26.93858756505248 ], "wc_reply_reviewers_avg": [ 12.25, 13.083864108129525 ], "wc_reply_authors_avg": [ 10.0, 10.392304845413264 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13456645917754309035&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "tju.edu.cn;tju.edu.cn;tju.edu.cn;polyu.edu.hk;tju.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Tianjin University;Hong Kong Polytechnic University", "aff_unique_dep": ";", "aff_unique_url": "http://www.tju.edu.cn;https://www.polyu.edu.hk", "aff_unique_abbr": "TJU;PolyU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "SILENCE: Protecting privacy in offloaded speech understanding on resource-constrained devices", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93343", "id": "tKuLgnDWWN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=tKuLgnDWWN", "openreview": "https://openreview.net/forum?id=tKuLgnDWWN", "poster": "/media/PosterPDFs/NeurIPS%202024/93343.png?t=1733260733.2384102", "project": "", "author_site": "Dongqi Cai, Shangguang Wang, Zeling Zhang, Felix Xiaozhu Lin, Mengwei Xu", "tldr": "", "abstract": "Speech serves as a ubiquitous input interface for embedded mobile devices. \nCloud-based solutions, while offering powerful speech understanding services, raise significant concerns regarding user privacy. \nTo address this, disentanglement-based encoders have been proposed to remove sensitive information from speech signals without compromising the speech understanding functionality. \nHowever, these encoders demand high memory usage and computation complexity, making them impractical for resource-constrained wimpy devices.\nOur solution is based on a key observation that speech understanding hinges on long-term dependency knowledge of the entire utterance, in contrast to privacy-sensitive elements that are short-term dependent. \nExploiting this observation, we propose SILENCE, a lightweight system that selectively obscuring short-term details, without damaging the long-term dependent speech understanding performance.\nThe crucial part of SILENCE is a differential mask generator derived from interpretable learning to \nautomatically configure the masking process.\nWe have implemented SILENCE on the STM32H7 microcontroller and evaluate its efficacy under different attacking scenarios. \nOur results demonstrate that SILENCE offers speech understanding performance and privacy protection capacity comparable to existing encoders, while achieving up to 53.3$\\times$ speedup and 134.1$\\times$ reduction in memory footprint.", "keywords": "spoken language understanding;resource-constrained devices;privacy-preserving", "primary_area": "speech_and_audio", "supplementary_material": "/attachment/91e88b5077583bc26f3acf862b95d11ae3ca2875.zip", "author": "DONGQI CAI;Shangguang Wang;Zeling Zhang;Felix Xiaozhu Lin;Mengwei Xu", "authorids": "~DONGQI_CAI2;~Shangguang_Wang1;~Zeling_Zhang1;~Felix_Xiaozhu_Lin1;~Mengwei_Xu1", "gender": "M;M;M;Not Specified;M", "homepage": "https://www.caidongqi.com;http://sguangwang.com/;https://github.com/Marovlo;https://felixlin.org;https://xumengwei.github.io/", "dblp": "159/3886-1.html;73/8637;;;143/0845", "google_scholar": ";CSrn-jIAAAAJ;;;hFmJwKQAAAAJ", "orcid": "0000-0003-2751-2500;0000-0001-7245-1298;0009-0007-3583-5269;;", "linkedin": "dongqi-cai-543403210/;;;;", "or_profile": "~DONGQI_CAI2;~Shangguang_Wang1;~Zeling_Zhang1;~Felix_Xiaozhu_Lin1;~Mengwei_Xu1", "aff": "University of Cambridge;Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;University of Virginia, Charlottesville;Beijing University of Posts and Telecommunications", "aff_domain": "cam.ac.uk;bupt.edu.cn;bupt.edu.cn;virginia.edu;bupt.edu.cn", "position": "PhD student;Full Professor;MS student;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\ncai2024silence,\ntitle={{SILENCE}: Protecting privacy in offloaded speech understanding on resource-constrained devices},\nauthor={DONGQI CAI and Shangguang Wang and Zeling Zhang and Felix Xiaozhu Lin and Mengwei Xu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=tKuLgnDWWN}\n}", "github": "", "reviewers": "YHpd;ubkn;rbWb;BMwq", "pdf_size": 2943565, "rating": "5;5;5;6", "confidence": "3;2;2;4", "soundness": "2;3;3;2", "novelty": "2;3;3;4", "presentation": "3;2;3;3", "wc_summary": "73;86;66;46", "wc_strengths": "57;41;23;50", "wc_weaknesses": "82;146;8;79", "wc_questions": "57;59;10;124", "wc_limitations": "1;14;1;13", "wc_review": "270;346;108;312", "wc_reply_reviewers": "25;0;0;0", "wc_reply_authors": "436;379;379;415", "reply_reviewers": "1;0;0;0", "reply_authors": "3;2;2;3", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 67.75, 14.463315664120728 ], "wc_strengths_avg": [ 42.75, 12.735285626950029 ], "wc_weaknesses_avg": [ 78.75, 48.83326222975484 ], "wc_questions_avg": [ 62.5, 40.56168142471414 ], "wc_limitations_avg": [ 7.25, 6.2599920127744575 ], "wc_review_avg": [ 259.0, 91.2414379544733 ], "wc_reply_reviewers_avg": [ 6.25, 10.825317547305483 ], "wc_reply_authors_avg": [ 402.25, 24.40671014290947 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:uD2q287xbLgJ:scholar.google.com/&scioq=SILENCE:+Protecting+privacy+in+offloaded+speech+understanding+on+resource-constrained+devices&hl=en&as_sdt=0,44", "gs_version_total": 2, "email": "cam.ac.uk;bupt.edu.cn;bupt.edu.cn;virginia.edu;bupt.edu.cn", "author_num": 5, "aff_unique_index": "0;1;1;2;1", "aff_unique_norm": "University of Cambridge;Beijing University of Posts and Telecommunications;University of Virginia", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cam.ac.uk;http://www.bupt.edu.cn/;https://www.virginia.edu", "aff_unique_abbr": "Cambridge;BUPT;UVA", "aff_campus_unique_index": "0;1;1;2;1", "aff_campus_unique": "Cambridge;Beijing;Charlottesville", "aff_country_unique_index": "0;1;1;2;1", "aff_country_unique": "United Kingdom;China;United States" }, { "title": "Consistency Purification: Effective and Efficient Diffusion Purification towards Certified Robustness", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93342", "id": "tLWoxftJVh", "proceeding": "", "pdf": "https://openreview.net/pdf?id=tLWoxftJVh", "openreview": "https://openreview.net/forum?id=tLWoxftJVh", "poster": "/media/PosterPDFs/NeurIPS%202024/93342.png?t=1733416611.442321", "project": "", "author_site": "Yiquan Li, Zhongzhu Chen, Kun Jin, Jiongxiao Wang, Jiachen Lei, Bo Li, Chaowei Xiao", "tldr": "", "abstract": "Diffusion Purification, purifying noised images with diffusion models, has been widely used for enhancing certified robustness via randomized smoothing. However, existing frameworks often grapple with the balance between efficiency and effectiveness. While the Denoising Diffusion Probabilistic Model (DDPM) offers an efficient single-step purification, it falls short in ensuring purified images reside on the data manifold. Conversely, the Stochastic Diffusion Model effectively places purified images on the data manifold but demands solving cumbersome stochastic differential equations, while its derivative, the Probability Flow Ordinary Differential Equation (PF-ODE), though solving simpler ordinary differential equations, still requires multiple computational steps. In this work, we demonstrated that an ideal purification pipeline should generate the purified images on the data manifold that are as much semantically aligned to the original images for effectiveness in one step for efficiency. Therefore, we introduced Consistency Purification, an efficiency-effectiveness Pareto superior purifier compared to the previous work. Consistency Purification employs the consistency model, a one-step generative model distilled from PF-ODE, thus can generate on-manifold purified images with a single network evaluation. However, the consistency model is designed not for purification thus it does not inherently ensure semantic alignment between purified and original images. To resolve this issue, we further refine it through Consistency Fine-tuning with LPIPS loss, which enables more aligned semantic meaning while keeping the purified images on data manifold. Our comprehensive experiments demonstrate that our Consistency Purification framework achieves state-of-the-art certified robustness and efficiency compared to baseline methods.", "keywords": "Consistency Model;Diffusion Purification;Certified Robustness;Randomized Smoothing", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/e2eb7ed57ea8a403fde6e7d7a94d7101c5f41ad4.zip", "author": "Yiquan Li;Zhongzhu Chen;Kun Jin;Jiongxiao Wang;Jiachen Lei;Bo Li;Chaowei Xiao", "authorids": "~Yiquan_Li2;~Zhongzhu_Chen1;~Kun_Jin1;~Jiongxiao_Wang1;~Jiachen_Lei1;~Bo_Li19;~Chaowei_Xiao2", "gender": ";M;M;;;F;", "homepage": ";https://sites.google.com/umich.edu/zhongzhc/home;;https://jayfeather1024.github.io/jxwang.github.io/;;http://boli.cs.illinois.edu/;", "dblp": ";297/5403;;322/5991;;50/3402-26;", "google_scholar": ";;https://scholar.google.com/citations?authuser=1;sIGapHMAAAAJ;;K8vJkTcAAAAJ;", "orcid": ";;0000-0002-5293-2745;;;;", "linkedin": ";;kun-jin-1426b1126/;;;;", "or_profile": "~Yiquan_Li2;~Zhongzhu_Chen1;~Kun_Jin1;~Jiongxiao_Wang1;~Jiachen_Lei1;~Bo_Li19;~Chaowei_Xiao2", "aff": ";University of Michigan - Ann Arbor;ByteDance Inc.;University of Wisconsin - Madison;;University of Illinois, Urbana Champaign;", "aff_domain": ";umich.edu;bytedance.com;wisc.edu;;illinois.edu;", "position": ";PhD student;Researcher;PhD student;;Assistant Professor;", "bibtex": "@inproceedings{\nli2024consistency,\ntitle={Consistency Purification: Effective and Efficient Diffusion Purification towards Certified Robustness},\nauthor={Yiquan Li and Zhongzhu Chen and Kun Jin and Jiongxiao Wang and Jiachen Lei and Bo Li and Chaowei Xiao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=tLWoxftJVh}\n}", "github": "", "reviewers": "aFWW;wtha;43Xy", "pdf_size": 2414048, "rating": "5;5;5", "confidence": "3;4;3", "soundness": "3;3;3", "novelty": "3;2;3", "presentation": "3;3;2", "wc_summary": "132;47;64", "wc_strengths": "47;40;22", "wc_weaknesses": "93;100;74", "wc_questions": "3;107;2", "wc_limitations": "3;6;1", "wc_review": "278;300;163", "wc_reply_reviewers": "0;60;0", "wc_reply_authors": "49;115;49", "reply_reviewers": "0;1;0", "reply_authors": "2;2;2", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 81.0, 36.72419729097788 ], "wc_strengths_avg": [ 36.333333333333336, 10.530379332620877 ], "wc_weaknesses_avg": [ 89.0, 10.98483803552272 ], "wc_questions_avg": [ 37.333333333333336, 49.26346403663559 ], "wc_limitations_avg": [ 3.3333333333333335, 2.0548046676563256 ], "wc_review_avg": [ 247.0, 60.07217880738692 ], "wc_reply_reviewers_avg": [ 20.0, 28.284271247461902 ], "wc_reply_authors_avg": [ 71.0, 31.11269837220809 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:bo2xxk4lGCkJ:scholar.google.com/&scioq=Consistency+Purification:+Effective+and+Efficient+Diffusion+Purification+towards+Certified+Robustness&hl=en&as_sdt=0,44", "gs_version_total": 5, "email": ";umich.edu;bytedance.com;wisc.edu;;illinois.edu;", "author_num": 7, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "University of Michigan;ByteDance;University of Wisconsin-Madison;University of Illinois Urbana-Champaign", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.umich.edu;https://www.bytedance.com;https://www.wisc.edu;https://illinois.edu", "aff_unique_abbr": "UM;ByteDance;UW-Madison;UIUC", "aff_campus_unique_index": "0;2;3", "aff_campus_unique": "Ann Arbor;;Madison;Urbana-Champaign", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;China" }, { "title": "SCube: Instant Large-Scale Scene Reconstruction using VoxSplats", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93341", "id": "tLXgzQ5WZl", "proceeding": "", "pdf": "https://openreview.net/pdf?id=tLXgzQ5WZl", "openreview": "https://openreview.net/forum?id=tLXgzQ5WZl", "poster": "/media/PosterPDFs/NeurIPS%202024/93341.png?t=1731722225.487227", "project": "", "author_site": "Xuanchi Ren, Yifan Lu, hanxue liang, Jay Zhangjie Wu, Huan Ling, Mike Chen, Sanja Fidler, Francis Williams, Jiahui Huang", "tldr": "", "abstract": "We present SCube, a novel method for reconstructing large-scale 3D scenes (geometry, appearance, and semantics) from a sparse set of posed images. Our method encodes reconstructed scenes using a novel representation VoxSplat, which is a set of 3D Gaussians supported on a high-resolution sparse-voxel scaffold. To reconstruct a VoxSplat from images, we employ a hierarchical voxel latent diffusion model conditioned on the input images followed by a feedforward appearance prediction model. The diffusion model generates high-resolution grids progressively in a coarse-to-fine manner, and the appearance network predicts a set of Gaussians within each voxel. From as few as 3 non-overlapping input images, SCube can generate millions of Gaussians with a 10243 voxel grid spanning hundreds of meters in 20 seconds. Past works tackling scene reconstruction from images either rely on per-scene optimization and fail to reconstruct the scene away from input views (thus requiring dense view coverage as input) or leverage geometric priors based on low-resolution models, which produce blurry results. In contrast, SCube leverages high-resolution sparse networks and produces sharp outputs from few views. We show the superiority of SCube compared to prior art using the Waymo self-driving dataset on 3D reconstruction and demonstrate its applications, such as LiDAR simulation and text-to-scene generation.", "keywords": "Large-scale Scene Reconstruction;Sparse-view Reconstruction;Sparse Voxels;Gaussian Splatting;Diffusion Models", "primary_area": "generative_models", "supplementary_material": "/attachment/05f2e8f0ce84d13f4cbd95968336bafdd7d24f09.zip", "author": "Xuanchi Ren;Yifan Lu;hanxue liang;Jay Zhangjie Wu;Huan Ling;Mike Chen;Sanja Fidler;Francis Williams;Jiahui Huang", "authorids": "~Xuanchi_Ren1;~Yifan_Lu1;~hanxue_liang1;~Jay_Zhangjie_Wu1;~Huan_Ling1;~Mike_Chen5;~Sanja_Fidler1;~Francis_Williams1;~Jiahui_Huang3", "gender": "M;M;M;M;M;M;F;M;M", "homepage": "https://xuanchiren.com/;https://yifanlu0227.github.io;https://hanxuel.github.io/;https://zhangjiewu.github.io/;http://www.cs.toronto.edu/~linghuan/;;http://www.cs.toronto.edu/~fidler/;http://fwilliams.info;https://huangjh-pub.github.io/", "dblp": "255/5432;;295/9018;322/0749;202/1680;;08/6607;195/8005;", "google_scholar": "fDHUk18AAAAJ;hiXGPH8AAAAJ;https://scholar.google.com/citations?view_op=list_works;WVp4yjoAAAAJ;03n03GEAAAAJ;;CUlqK5EAAAAJ;aQW6t9EAAAAJ;3Cuki_wAAAAJ", "orcid": ";;;;;;;;", "linkedin": ";yifan-lu-65ab69229/;hanxue-charles-liang-78b581177/;;;nvidia-mikechen/;sanja-fidler-2846a1a?trk=hp-identity-name;francis-williams/;jiahui-huang-354114107/", "or_profile": "~Xuanchi_Ren1;~Yifan_Lu1;~hanxue_liang1;~Jay_Zhangjie_Wu1;~Huan_Ling1;~Mike_Chen5;~Sanja_Fidler1;~Francis_Williams1;~Jiahui_Huang3", "aff": "NVIDIA;NVIDIA;University of Cambridge;National University of Singapore;NVIDIA;NVIDIA;Department of Computer Science, University of Toronto;NVIDIA;NVIDIA", "aff_domain": "nvidia.com;nvidia.com;cam.ac.uk;u.nus.edu;nvidia.com;nvidia.com;cs.toronto.edu;nvidia.com;nvidia.com", "position": "Researcher;Intern;PhD student;PhD student;Researcher;Principal Researcher;Associate Professor;Research Scienteist;Researcher", "bibtex": "@inproceedings{\nren2024scube,\ntitle={{SC}ube: Instant Large-Scale Scene Reconstruction using VoxSplats},\nauthor={Xuanchi Ren and Yifan Lu and hanxue liang and Jay Zhangjie Wu and Huan Ling and Mike Chen and Sanja Fidler and Francis Williams and Jiahui Huang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=tLXgzQ5WZl}\n}", "github": "", "reviewers": "CDsX;NKmV;6vwn;PK9U", "pdf_size": 26341602, "rating": "4;5;6;7", "confidence": "4;4;4;4", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "3;3;3;4", "wc_summary": "87;134;83;98", "wc_strengths": "150;96;44;54", "wc_weaknesses": "180;254;53;63", "wc_questions": "2;21;23;27", "wc_limitations": "1;11;44;8", "wc_review": "420;516;247;250", "wc_reply_reviewers": "38;0;9;90", "wc_reply_authors": "181;0;10;594", "reply_reviewers": "1;0;1;2", "reply_authors": "2;1;2;3", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 100.5, 20.1059692628831 ], "wc_strengths_avg": [ 86.0, 41.78516483155236 ], "wc_weaknesses_avg": [ 137.5, 83.76902768923607 ], "wc_questions_avg": [ 18.25, 9.627434756984853 ], "wc_limitations_avg": [ 16.0, 16.56804152578089 ], "wc_review_avg": [ 358.25, 114.88336476618362 ], "wc_reply_reviewers_avg": [ 34.25, 35.116769498346514 ], "wc_reply_authors_avg": [ 196.25, 240.6453562818115 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5252153204127803342&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "nvidia.com;nvidia.com;cam.ac.uk;u.nus.edu;nvidia.com;nvidia.com;cs.toronto.edu;nvidia.com;nvidia.com", "author_num": 9, "aff_unique_index": "0;0;1;2;0;0;3;0;0", "aff_unique_norm": "NVIDIA;University of Cambridge;National University of Singapore;University of Toronto", "aff_unique_dep": "NVIDIA Corporation;;;Department of Computer Science", "aff_unique_url": "https://www.nvidia.com;https://www.cam.ac.uk;https://www.nus.edu.sg;https://www.utoronto.ca", "aff_unique_abbr": "NVIDIA;Cambridge;NUS;U of T", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Cambridge;Toronto", "aff_country_unique_index": "0;0;1;2;0;0;3;0;0", "aff_country_unique": "United States;United Kingdom;Singapore;Canada" }, { "title": "TinyLUT: Tiny Look-Up Table for Efficient Image Restoration at the Edge", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93340", "id": "tN0xnYPLt6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=tN0xnYPLt6", "openreview": "https://openreview.net/forum?id=tN0xnYPLt6", "poster": "/media/PosterPDFs/NeurIPS%202024/93340.png?t=1731587119.7466593", "project": "", "author_site": "Huanan LI, Juntao Guan, Lai Rui, Sijun Ma, Lin Gu, Noperson", "tldr": "", "abstract": "Look-up tables(LUTs)-based methods have recently shown enormous potential in image restoration tasks, which are capable of significantly accelerating the inference. However, the size of LUT exhibits exponential growth with the convolution kernel size, creating a storage bottleneck for its broader application on edge devices. Here, we address the storage explosion challenge to promote the capacity of mapping the complex CNN models by LUT. We introduce an innovative separable mapping strategy to achieve over $7\\times$ storage reduction, transforming the storage from exponential dependence on kernel size to a linear relationship. Moreover, we design a dynamic discretization mechanism to decompose the activation and compress the quantization scale that further shrinks the LUT storage by $4.48\\times$. As a result, the storage requirement of our proposed TinyLUT is around 4.1\\% of MuLUT-SDY-X2 and amenable to on-chip cache, yielding competitive accuracy with over $5\\times$ lower inference latency on Raspberry 4B than FSRCNN. Our proposed TinyLUT enables superior inference speed on edge devices with new state-of-the-art accuracy on both of image super-resolution and denoising, showcasing the potential of applying this method to various image restoration tasks at the edge. The codes are available at: https://github.com/Jonas-KD/TinyLUT.", "keywords": "Image restoration;Deep Learning;Look-up Table;Super Resolution;Image Denoising", "primary_area": "machine_vision", "supplementary_material": "", "author": "Huanan LI;Juntao Guan;Lai Rui;Sijun Ma;Lin Gu;Zhangming Zhu", "authorids": "~Huanan_LI1;~Juntao_Guan1;~Lai_Rui1;~Sijun_Ma1;~Lin_Gu4;~Zhangming_Zhu1", "gender": "M;M;M;M;M;", "homepage": "https://github.com/jonas710118;https://faculty.xidian.edu.cn/guanjuntao/zh_CN/index.htm;https://web.xidian.edu.cn/rlai/;https://github.com/Sijun-Ma;;", "dblp": ";;;;;", "google_scholar": ";;;;https://scholar.google.com/citations?hl=en;", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Huanan_LI1;~Juntao_Guan1;~Lai_Rui1;~Sijun_Ma1;~Lin_Gu4;~Zhangming_Zhu1", "aff": "Xi'an University of Electronic Science and Technology;Xi'an University of Electronic Science and Technology;Xi'an University of Electronic Science and Technology;Xi'an University of Electronic Science and Technology;RIKEN;", "aff_domain": "xidian.edu.cn;xidian.edu.cn;xidian.edu.cn;xidian.edu.cn;riken.jp;", "position": "PhD student;Associate Professor;Full Professor;MS student;Researcher;", "bibtex": "@inproceedings{\nli2024tinylut,\ntitle={Tiny{LUT}: Tiny Look-Up Table for Efficient Image Restoration at the Edge},\nauthor={Huanan LI and Juntao Guan and Lai Rui and Sijun Ma and Lin Gu and Zhangming Zhu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=tN0xnYPLt6}\n}", "github": "", "reviewers": "JnS6;2kHn;G5Ti;MFFN", "pdf_size": 3980701, "rating": "3;5;6;7", "confidence": "4;5;3;5", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "2;3;3;3", "wc_summary": "52;43;34;69", "wc_strengths": "25;30;48;42", "wc_weaknesses": "98;129;59;103", "wc_questions": "22;3;32;79", "wc_limitations": "1;3;1;8", "wc_review": "198;208;174;301", "wc_reply_reviewers": "183;88;0;49", "wc_reply_authors": "167;333;65;40", "reply_reviewers": "1;1;0;1", "reply_authors": "3;3;2;2", "rating_avg": [ 5.25, 1.479019945774904 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 49.5, 12.932517156377562 ], "wc_strengths_avg": [ 36.25, 9.175374651751284 ], "wc_weaknesses_avg": [ 97.25, 25.02373872945448 ], "wc_questions_avg": [ 34.0, 27.99107000455681 ], "wc_limitations_avg": [ 3.25, 2.8613807855648994 ], "wc_review_avg": [ 220.25, 48.230566034414316 ], "wc_reply_reviewers_avg": [ 80.0, 67.14536469481718 ], "wc_reply_authors_avg": [ 151.25, 115.21366021440339 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.15289415743128767, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1273330063588902566&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 0, "email": "xidian.edu.cn;xidian.edu.cn;xidian.edu.cn;xidian.edu.cn;riken.jp;", "author_num": 6, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Xi'an University of Electronic Science and Technology;RIKEN", "aff_unique_dep": ";", "aff_unique_url": "http://www.xidian.edu.cn/;https://www.riken.jp", "aff_unique_abbr": "Xidian University;RIKEN", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Xi'an;", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "China;Japan" }, { "title": "OSWorld: Benchmarking Multimodal Agents for Open-Ended Tasks in Real Computer Environments", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97468", "id": "tN61DTr4Ed", "proceeding": "", "pdf": "https://openreview.net/pdf?id=tN61DTr4Ed", "openreview": "https://openreview.net/forum?id=tN61DTr4Ed", "poster": "/media/PosterPDFs/NeurIPS%202024/97468.png?t=1733745037.8665802", "project": "", "author_site": "Tianbao Xie, Danyang Zhang, Jixuan Chen, Xiaochuan Li, Siheng Zhao, Ruisheng Cao, Jing Hua Toh, Zhoujun Cheng, Dongchan Shin, Fangyu Lei, Yitao Liu, Yiheng Xu, Shuyan Zhou, Silvio Savarese, Caiming Xiong, Victor Zhong, Tao Yu", "tldr": "", "abstract": "Autonomous agents that accomplish complex computer tasks with minimal human interventions have the potential to transform human-computer interaction, significantly enhancing accessibility and productivity. However, existing benchmarks either lack an interactive environment or are limited to environments specific to certain applications or domains, failing to reflect the diverse and complex nature of real-world computer use, thereby limiting the scope of tasks and agent scalability. To address this issue, we introduce OSWorld, the first-of-its-kind scalable, real computer environment for multimodal agents, supporting task setup, execution-based evaluation, and interactive learning across various operating systems such as Ubuntu, Windows, and macOS. OSWorld can serve as a unified, integrated computer environment for assessing open-ended computer tasks that involve arbitrary applications. Building upon OSWorld, we create a benchmark of 369 computer tasks involving real web and desktop apps in open domains, OS file I/O, and workflows spanning multiple applications. Each task example is derived from real-world computer use cases and includes a detailed initial state setup configuration and a custom execution-based evaluation script for reliable, reproducible evaluation. Extensive evaluation of state-of-the-art LLM/VLM-based agents on OSWorld reveals significant deficiencies in their ability to serve as computer assistants. While humans can accomplish over 72.36% of the tasks, the best model achieves only 12.24% success, primarily struggling with GUI grounding and operational knowledge. Comprehensive analysis using OSWorld provides valuable insights for developing multimodal generalist agents that were not possible with previous benchmarks. Our code, environment, baseline models, and data are publicly available at [this https URL](https://os-world.github.io/).", "keywords": "Benchmark; Multimodal Agents; Real Computer Environment", "primary_area": "", "supplementary_material": "/attachment/721522e27847a4058ea51fa83bd35c5131869dfb.zip", "author": "Tianbao Xie;Danyang Zhang;Jixuan Chen;Xiaochuan Li;Siheng Zhao;Ruisheng Cao;Toh Jing Hua;Zhoujun Cheng;Dongchan Shin;Fangyu Lei;Yitao Liu;Yiheng Xu;Shuyan Zhou;Silvio Savarese;Caiming Xiong;Victor Zhong;Tao Yu", "authorids": "~Tianbao_Xie1;~Danyang_Zhang2;~Jixuan_Chen1;~Xiaochuan_Li3;~Siheng_Zhao1;~Ruisheng_Cao1;~Toh_Jing_Hua1;~Zhoujun_Cheng1;~Dongchan_Shin1;~Fangyu_Lei1;~Yitao_Liu2;~Yiheng_Xu1;~Shuyan_Zhou1;~Silvio_Savarese1;~Caiming_Xiong1;~Victor_Zhong1;~Tao_Yu5", "gender": "M;;F;M;;M;F;M;;M;M;;Non-Binary;M;M;M;M", "homepage": "https://tianbaoxie.com;https://zdy023.github.io;https://chenjix.github.io;https://xiaochuanli.com;https://sihengz02.github.io/;https://rhythmcao.github.io/;https://github.com/ztjhz;http://blankcheng.github.io;;https://lfy79001.github.io;https://yitaoliu17.com/;;https://shuyanzhou.github.io/;;http://cmxiong.com/;http://www.victorzhong.com;https://taoyds.github.io/", "dblp": ";;342/1459;;341/1176;244/9541;;;;329/5621;128/1821;;;50/3578;80/7282;182/8931;67/1014-9", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.hk/citations?user=CMHormkAAAAJ;kmBSlgEAAAAJ;97QHT-0AAAAJ;l7EAauYAAAAJ;NdK881sAAAAJ;;t41vrrQAAAAJ;;1WzAOSkAAAAJ;;;t6YzEpgAAAAJ;ImpbxLsAAAAJ;vaSdahkAAAAJ;lT3YoNkAAAAJ;5_Fn5CIAAAAJ", "orcid": ";0009-0001-4958-5017;;;;0000-0003-4635-4368;;;;;;;;;;;", "linkedin": ";%E4%B8%B9%E9%98%B3-%E5%BC%A0-b62931182;jixuan-chen-a99112298/;;;;;;dongchan-shin-2a4890275;;%E4%BA%A6%E9%9F%AC-%E5%88%98-9ba124257/;;;;caiming-xiong-150a1417;victorzhong;tao-yu-b9b551a5/", "or_profile": "~Tianbao_Xie1;~Danyang_Zhang2;~Jixuan_Chen1;~Xiaochuan_Li3;~Siheng_Zhao1;~Ruisheng_Cao1;~Toh_Jing_Hua1;~Zhoujun_Cheng1;~Dongchan_Shin1;~Fangyu_Lei1;~Yitao_Liu2;~Yiheng_Xu1;~Shuyan_Zhou1;~Silvio_Savarese1;~Caiming_Xiong1;~Victor_Zhong1;~Tao_Yu5", "aff": "University of Hong Kong;University of Hong Kong;Nanjing University;Tsinghua University;Nanjing University;Shanghai Jiaotong University;Nanyang Technological University;Shanghai Jiaotong University;University of Hong Kong;Institute of Automation, Chinese Academy of Sciences;The University of Hong Kong;;Carnegie Mellon University;Stanford University;Salesforce Research;Microsoft;The University of Hong Kong", "aff_domain": "cs.hku.hk;hku.hk;smail.nju.edu.cn;mail.tsinghua.edu.cn;nju.edu.cn;sjtu.edu.cn;ntu.edu.sg;sjtu.edu.cn;hku.hk;ia.ac.cn;hku.hk;;cs.cmu.edu;stanford.edu;salesforce.com;microsoft.com;hku.hk", "position": "PhD student;Intern;Undergrad student;Undergrad student;Undergrad student;PhD student;Undergrad student;MS student;Undergrad student;PhD student;Undergrad student;;PhD student;Adjunct Professor;Research Scientist;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nxie2024osworld,\ntitle={{OSW}orld: Benchmarking Multimodal Agents for Open-Ended Tasks in Real Computer Environments},\nauthor={Tianbao Xie and Danyang Zhang and Jixuan Chen and Xiaochuan Li and Siheng Zhao and Ruisheng Cao and Toh Jing Hua and Zhoujun Cheng and Dongchan Shin and Fangyu Lei and Yitao Liu and Yiheng Xu and Shuyan Zhou and Silvio Savarese and Caiming Xiong and Victor Zhong and Tao Yu},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=tN61DTr4Ed}\n}", "github": "", "reviewers": "bitD;AeF1;oXY7;bfsS", "pdf_size": 36705248, "rating": "5;6;7;9", "confidence": "3;4;3;4", "wc_summary_and_contributions": "136;79;81;59", "wc_strengths": "43;46;38;145", "wc_improvement": "46;142;160;213", "wc_limitations": "51;1;4;122", "wc_correctness": "1;11;13;42", "wc_clarity": "1;1;41;6", "wc_relation_to_prior_work": "147;39;5;1", "wc_documentation": "1;1;12;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "427;321;355;590", "wc_reply_reviewers": "0;0;369;57", "wc_reply_authors": "215;262;359;132", "reply_reviewers": "0;0;3;1", "reply_authors": "2;2;3;2", "rating_avg": [ 6.75, 1.479019945774904 ], "confidence_avg": [ 3.5, 0.5 ], "wc_summary_and_contributions_avg": [ 88.75, 28.603976996215053 ], "wc_strengths_avg": [ 68.0, 44.54772721475249 ], "wc_improvement_avg": [ 140.25, 60.35053852286656 ], "wc_limitations_avg": [ 44.5, 48.94129136015927 ], "wc_correctness_avg": [ 16.75, 15.270478054075452 ], "wc_clarity_avg": [ 12.25, 16.723860200324566 ], "wc_relation_to_prior_work_avg": [ 48.0, 59.033888572581766 ], "wc_documentation_avg": [ 3.75, 4.763139720814412 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 423.25, 103.60109796715477 ], "wc_reply_reviewers_avg": [ 106.5, 153.3305253366074 ], "wc_reply_authors_avg": [ 242.0, 82.03352973022677 ], "reply_reviewers_avg": [ 1.0, 1.224744871391589 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 17, 0 ], "corr_rating_confidence": 0.50709255283711, "gs_citation": 121, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8681997263218666179&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "cs.hku.hk;hku.hk;smail.nju.edu.cn;mail.tsinghua.edu.cn;nju.edu.cn;sjtu.edu.cn;ntu.edu.sg;sjtu.edu.cn;hku.hk;ia.ac.cn;hku.hk;;cs.cmu.edu;stanford.edu;salesforce.com;microsoft.com;hku.hk", "author_num": 17, "aff_unique_index": "0;0;1;2;1;3;4;3;0;5;0;6;7;8;9;0", "aff_unique_norm": "University of Hong Kong;Nanjing University;Tsinghua University;Shanghai Jiao Tong University;Nanyang Technological University;Chinese Academy of Sciences;Carnegie Mellon University;Stanford University;Salesforce;Microsoft", "aff_unique_dep": ";;;;;Institute of Automation;;;Salesforce Research;Microsoft Corporation", "aff_unique_url": "https://www.hku.hk;https://www.nju.edu.cn;https://www.tsinghua.edu.cn;https://www.sjtu.edu.cn;https://www.ntu.edu.sg;http://www.ia.cas.cn;https://www.cmu.edu;https://www.stanford.edu;https://research.salesforce.com;https://www.microsoft.com", "aff_unique_abbr": "HKU;Nanjing U;THU;SJTU;NTU;CAS;CMU;Stanford;Salesforce;Microsoft", "aff_campus_unique_index": "0;0;0;0;2;0", "aff_campus_unique": "Hong Kong SAR;;Stanford", "aff_country_unique_index": "0;0;0;0;0;0;1;0;0;0;0;2;2;2;2;0", "aff_country_unique": "China;Singapore;United States" }, { "title": "QGym: Scalable Simulation and Benchmarking of Queuing Network Controllers", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97467", "id": "tNCdnpEKrR", "proceeding": "", "pdf": "https://openreview.net/pdf?id=tNCdnpEKrR", "openreview": "https://openreview.net/forum?id=tNCdnpEKrR", "poster": "", "project": "", "author_site": "Haozhe Chen, Ang Li, Ethan Che, Jing Dong, Tianyi Peng, Hongseok Namkoong", "tldr": "", "abstract": "Queuing network control allows allocation of scarce resources to manage congestion, a fundamental problem in manufacturing, communications, and healthcare. Compared to standard RL problems, queueing problems are distinguished by unique challenges: i) a system operating in continuous time, ii) high stochasticity, and iii) long horizons over which the system can become unstable (exploding delays). To provide the empirical foundations for methodological development tackling these challenges, we present an open-sourced queueing simulation framework, QGym, that benchmark queueing policies across realistic problem instances. Our modular framework allows the researchers to build on our initial instances, which provide a wide range of environments including parallel servers, criss-cross, tandem, and re-entrant networks, as well as a realistically calibrated hospital queuing system. From these, various policies can be easily tested, including both model-free RL methods and classical queuing policies. Our testbed significantly expands the scope of \nempirical benchmarking in prior work, and complements the\ntraditional focus on evaluating algorithms based on mathematical guarantees in idealized settings. QGym code is open-sourced at https://github.com/namkoong-lab/QGym.", "keywords": "rl;queueing;simulator;ppo", "primary_area": "", "supplementary_material": "/attachment/f5b092690710072b5cd30ec5a9cfb2762598b2e1.pdf", "author": "Haozhe Chen;Ang Li;Ethan Che;Jing Dong;Tianyi Peng;Hongseok Namkoong", "authorids": "~Haozhe_Chen2;~Ang_Li26;~Ethan_Che1;~Jing_Dong2;~Tianyi_Peng1;~Hongseok_Namkoong2", "gender": "M;M;M;F;M;M", "homepage": ";https://leonlixyz.github.io/;https://ethche.github.io/;http://www.columbia.edu/~jd2736/;https://tianyipeng.github.io/;https://hsnamkoong.github.io", "dblp": ";;;;243/6511;191/6680", "google_scholar": ";https://scholar.google.com/citations?hl=en;;o9g6YPMAAAAJ;2kAHF2MAAAAJ;dyXX1EgAAAAJ", "orcid": ";;;;;", "linkedin": "haozhe-chen/;leon-li-18654a220/;;;;", "or_profile": "~Haozhe_Chen2;~Ang_Li26;~Ethan_Che1;~Jing_Dong2;~Tianyi_Peng1;~Hongseok_Namkoong2", "aff": "Columbia University;Columbia University;Columbia University;Columbia University;Columbia University;Columbia University", "aff_domain": "columbia.edu;columbia.edu;columbia.edu;columbia.edu;columbia.edu;columbia.edu", "position": "Undergrad student;Undergrad student;PhD student;Associate Professor;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nchen2024qgym,\ntitle={{QG}ym: Scalable Simulation and Benchmarking of Queuing Network Controllers},\nauthor={Haozhe Chen and Ang Li and Ethan Che and Jing Dong and Tianyi Peng and Hongseok Namkoong},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=tNCdnpEKrR}\n}", "github": "", "reviewers": "ZE2n;RX4h;k8Kb", "pdf_size": 1311535, "rating": "7;7;8", "confidence": "3;4;4", "wc_summary_and_contributions": "72;13;144", "wc_strengths": "57;2;111", "wc_improvement": "155;5;71", "wc_limitations": "11;2;8", "wc_correctness": "2;1;56", "wc_clarity": "2;7;45", "wc_relation_to_prior_work": "2;1;59", "wc_documentation": "8;3;10", "wc_additional_feedback": "1;1;1", "wc_review": "310;35;505", "wc_reply_reviewers": "17;18;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;0", "reply_authors": "1;1;1", "rating_avg": [ 7.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 76.33333333333333, 53.56823270890645 ], "wc_strengths_avg": [ 56.666666666666664, 44.4996878890428 ], "wc_improvement_avg": [ 77.0, 61.38403701289123 ], "wc_limitations_avg": [ 7.0, 3.7416573867739413 ], "wc_correctness_avg": [ 19.666666666666668, 25.69478978746902 ], "wc_clarity_avg": [ 18.0, 19.200694431886227 ], "wc_relation_to_prior_work_avg": [ 20.666666666666668, 27.10883414846328 ], "wc_documentation_avg": [ 7.0, 2.943920288775949 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 283.3333333333333, 192.80099123765473 ], "wc_reply_reviewers_avg": [ 11.666666666666666, 8.259674462242577 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:b9RYcbJ6JPgJ:scholar.google.com/&scioq=QGym:+Scalable+Simulation+and+Benchmarking+of+Queuing+Network+Controllers&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": "columbia.edu;columbia.edu;columbia.edu;columbia.edu;columbia.edu;columbia.edu", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Columbia University", "aff_unique_dep": "", "aff_unique_url": "https://www.columbia.edu", "aff_unique_abbr": "Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Microstructures and Accuracy of Graph Recall by Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93339", "id": "tNhwg9U767", "proceeding": "", "pdf": "https://openreview.net/pdf?id=tNhwg9U767", "openreview": "https://openreview.net/forum?id=tNhwg9U767", "poster": "/media/PosterPDFs/NeurIPS%202024/93339.png?t=1733633770.6053984", "project": "", "author_site": "Yanbang Wang, Hejie Cui, Jon Kleinberg", "tldr": "", "abstract": "Graphs data is crucial for many applications, and much of it exists in the relations described in textual format. As a result, being able to accurately recall and encode a graph described in earlier text is a basic yet pivotal ability that LLMs need to demonstrate if they are to perform reasoning tasks that involve graph-structured information. Human performance at graph recall by has been studied by cognitive scientists for decades, and has been found to often exhibit certain structural patterns of bias that align with human handling of social relationships. To date, however, we know little about how LLMs behave in analogous graph recall tasks: do their recalled graphs also exhibit certain biased patterns, and if so, how do they compare with humans and affect other graph reasoning tasks? In this work, we perform the first systematical study of graph recall by LLMs, investigating the accuracy and biased microstructures (local structural patterns) in their recall. We find that LLMs not only underperform often in graph recall, but also tend to favor more triangles and alternating 2-paths. Moreover, we find that more advanced LLMs have a striking dependence on the domain that a real-world graph comes from --- by yielding the best recall accuracy when the graph is narrated in a language style consistent with its original domain.", "keywords": "large language models;graph recall;human cognition;sociology;network motif", "primary_area": "machine_learning_for_social_sciences", "supplementary_material": "", "author": "Yanbang Wang;Hejie Cui;Jon Kleinberg", "authorids": "~Yanbang_Wang1;~Hejie_Cui1;~Jon_Kleinberg3", "gender": ";F;M", "homepage": ";https://hejiecui.com/;http://www.cs.cornell.edu/home/kleinber/", "dblp": "232/1994;221/7865;https://dblp.uni-trier.de/pid/k/JonMKleinberg.html", "google_scholar": "Ch3YUgsAAAAJ;r0Vh6GEAAAAJ;VX7d5EQAAAAJ", "orcid": ";0000-0001-6388-2619;0000-0002-1929-2512", "linkedin": ";hejie-cui-b1071b13b/;", "or_profile": "~Yanbang_Wang1;~Hejie_Cui1;~Jon_Kleinberg3", "aff": "Department of Computer Science, Cornell University;Emory University;", "aff_domain": "cs.cornell.edu;emory.edu;", "position": "PhD student;PhD student;", "bibtex": "@inproceedings{\nwang2024microstructures,\ntitle={Microstructures and Accuracy of Graph Recall by Large Language Models},\nauthor={Yanbang Wang and Hejie Cui and Jon Kleinberg},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=tNhwg9U767}\n}", "github": "", "reviewers": "uAxF;iUa6;QYAY", "pdf_size": 3963910, "rating": "4;6;7", "confidence": "5;5;4", "soundness": "2;3;3", "novelty": "2;3;3", "presentation": "2;4;4", "wc_summary": "43;75;54", "wc_strengths": "68;21;124", "wc_weaknesses": "47;56;204", "wc_questions": "40;88;146", "wc_limitations": "36;1;9", "wc_review": "234;241;537", "wc_reply_reviewers": "24;127;13", "wc_reply_authors": "46;80;11", "reply_reviewers": "1;1;1", "reply_authors": "3;3;2", "rating_avg": [ 5.666666666666667, 1.247219128924647 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.9428090415820634 ], "wc_summary_avg": [ 57.333333333333336, 13.27487183449325 ], "wc_strengths_avg": [ 71.0, 42.10304818735416 ], "wc_weaknesses_avg": [ 102.33333333333333, 71.98302268976731 ], "wc_questions_avg": [ 91.33333333333333, 43.33846123505335 ], "wc_limitations_avg": [ 15.333333333333334, 14.974051630144134 ], "wc_review_avg": [ 337.3333333333333, 141.21457274500943 ], "wc_reply_reviewers_avg": [ 54.666666666666664, 51.34415470355143 ], "wc_reply_authors_avg": [ 45.666666666666664, 28.1701181317288 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.7559289460184544, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14496967045640308309&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "cs.cornell.edu;emory.edu;", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Cornell University;Emory University", "aff_unique_dep": "Department of Computer Science;", "aff_unique_url": "https://www.cornell.edu;https://www.emory.edu", "aff_unique_abbr": "Cornell;Emory", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "An Image is Worth 32 Tokens for Reconstruction and Generation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93338", "id": "tOXoQPRzPL", "proceeding": "", "pdf": "https://openreview.net/pdf?id=tOXoQPRzPL", "openreview": "https://openreview.net/forum?id=tOXoQPRzPL", "poster": "/media/PosterPDFs/NeurIPS%202024/93338.png?t=1731619165.7556756", "project": "", "author_site": "Qihang Yu, Mark Weber, Xueqing Deng, Xiaohui Shen, Daniel Cremers, Liang-Chieh Chen", "tldr": "", "abstract": "Recent advancements in generative models have highlighted the crucial role of image tokenization in the efficient synthesis of high-resolution images. Tokenization, which transforms images into latent representations, reduces computational demands compared to directly processing pixels and enhances the effectiveness and efficiency of the generation process. Prior methods, such as VQGAN, typically utilize 2D latent grids with fixed downsampling factors. However, these 2D tokenizations face challenges in managing the inherent redundancies present in images, where adjacent regions frequently display similarities. To overcome this issue, we introduce **T**ransformer-based 1-D**i**mensional **Tok**enizer (TiTok), an innovative approach that tokenizes images into 1D latent sequences. TiTok provides a more compact latent representation, yielding substantially more efficient and effective representations than conventional techniques. For example, a 256 \u00d7 256 \u00d7 3 image can be reduced to just **32** discrete tokens, a significant reduction from the 256 or 1024 tokens obtained by prior methods. Despite its compact nature, TiTok achieves competitive performance to state-of-the-art approaches. Specifically, using the same generator framework, TiTok attains **1.97** gFID, outperforming MaskGIT baseline significantly by 4.21 at ImageNet 256 \u00d7 256 benchmark. The advantages of TiTok become even more significant when it comes to higher resolution. At ImageNet 512 \u00d7 512 benchmark, TiTok not only outperforms state-of-the-art diffusion model DiT-XL/2 (gFID 2.74 vs. 3.04), but also reduces the image tokens by 64\u00d7, leading to **410\u00d7 faster** generation process. Our best-performing variant can significantly surpasses DiT-XL/2 (gFID **2.13** vs. 3.04) while still generating high-quality samples **74\u00d7 faster**. Codes and models are available at https://github.com/bytedance/1d-tokenizer", "keywords": "image tokenization;image generation", "primary_area": "generative_models", "supplementary_material": "", "author": "Qihang Yu;Mark Weber;Xueqing Deng;Xiaohui Shen;Daniel Cremers;Liang-Chieh Chen", "authorids": "~Qihang_Yu1;~Mark_Weber1;~Xueqing_Deng2;~Xiaohui_Shen2;~Daniel_Cremers1;~Liang-Chieh_Chen1", "gender": ";;;;M;", "homepage": ";;;https://xiaohuishen.github.io/;https://vision.in.tum.de/members/cremers;http://liangchiehchen.com/", "dblp": ";;;88/6582;c/DanielCremers;138/2443", "google_scholar": "7zZdZxsAAAAJ;;;pViZYwIAAAAJ;cXQciMEAAAAJ;ACjYGPUAAAAJ", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Qihang_Yu1;~Mark_Weber1;~Xueqing_Deng2;~Xiaohui_Shen2;~Daniel_Cremers1;~Liang-Chieh_Chen1", "aff": "ByteDance;;;ByteDance Inc.;Technical University Munich;ByteDance / TikTok", "aff_domain": "bytedance.com;;;bytedance.com;tum.de;bytedance.com", "position": "Researcher;;;Researcher;Full Professor;Research Scientist", "bibtex": "@inproceedings{\nyu2024an,\ntitle={An Image is Worth 32 Tokens for Reconstruction and Generation},\nauthor={Qihang Yu and Mark Weber and Xueqing Deng and Xiaohui Shen and Daniel Cremers and Liang-Chieh Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=tOXoQPRzPL}\n}", "github": "", "reviewers": "Pxnc;Mn4Y;YNgD;8XiP;HiP7", "pdf_size": 9282355, "rating": "4;6;6;7;8", "confidence": "5;4;5;4;4", "soundness": "1;3;3;3;4", "novelty": "2;3;3;3;4", "presentation": "3;3;3;3;4", "wc_summary": "105;101;84;97;158", "wc_strengths": "70;113;36;77;46", "wc_weaknesses": "194;104;265;104;103", "wc_questions": "70;46;7;119;38", "wc_limitations": "5;4;1;1;13", "wc_review": "444;368;393;398;358", "wc_reply_reviewers": "150;24;21;0;30", "wc_reply_authors": "0;46;48;0;46", "reply_reviewers": "1;1;1;0;1", "reply_authors": "1;2;2;1;2", "rating_avg": [ 6.2, 1.32664991614216 ], "confidence_avg": [ 4.4, 0.48989794855663565 ], "soundness_avg": [ 2.8, 0.9797958971132712 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 109.0, 25.495097567963924 ], "wc_strengths_avg": [ 68.4, 26.8968399630886 ], "wc_weaknesses_avg": [ 154.0, 65.60792635040373 ], "wc_questions_avg": [ 56.0, 37.389838191679836 ], "wc_limitations_avg": [ 4.8, 4.4 ], "wc_review_avg": [ 392.2, 29.909195910288194 ], "wc_reply_reviewers_avg": [ 45.0, 53.464006583869114 ], "wc_reply_authors_avg": [ 28.0, 22.873565528793275 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.7385489458759964, "gs_citation": 86, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5746542189948887968&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "bytedance.com;;;bytedance.com;tum.de;bytedance.com", "author_num": 6, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "ByteDance;Technical University of Munich", "aff_unique_dep": ";", "aff_unique_url": "https://www.bytedance.com;https://www.tum.de", "aff_unique_abbr": "ByteDance;TUM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "China;Germany" }, { "title": "Toward Self-Improvement of LLMs via Imagination, Searching, and Criticizing", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93337", "id": "tPdJ2qHkOB", "proceeding": "", "pdf": "https://openreview.net/pdf?id=tPdJ2qHkOB", "openreview": "https://openreview.net/forum?id=tPdJ2qHkOB", "poster": "/media/PosterPDFs/NeurIPS%202024/93337.png?t=1731751336.693952", "project": "", "author_site": "Ye Tian, Baolin Peng, Linfeng Song, Lifeng Jin, Dian Yu, Lei Han, Haitao Mi, Dong Yu", "tldr": "", "abstract": "Despite the impressive capabilities of Large Language Models (LLMs) on various tasks, they still struggle with scenarios that involves complex reasoning and planning. Self-correction and self-learning emerge as viable solutions, employing strategies that allow LLMs to refine their outputs and learn from self-assessed rewards. Yet, the efficacy of LLMs in self-refining its response, particularly in complex reasoning and planning task, remains dubious. In this paper, we introduce AlphaLLM for the self-improvements of LLMs, which integrates Monte Carlo Tree Search (MCTS) with LLMs to establish a self-improving loop, thereby enhancing the capabilities of LLMs without additional annotations. Drawing inspiration from the success of AlphaGo, AlphaLLM addresses the unique challenges of combining MCTS with LLM for self-improvement, including data scarcity, the vastness search spaces of language tasks, and the subjective nature of feedback in language tasks. AlphaLLM is comprised of prompt synthesis component, an efficient MCTS approach tailored for language tasks, and a trio of critic models for precise feedback. Our experimental results in mathematical reasoning tasks demonstrate that AlphaLLM significantly enhances the performance of LLMs without additional annotations, showing the potential for self-improvement in LLMs.", "keywords": "self-improving;search;large language models", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Ye Tian;Baolin Peng;Linfeng Song;Lifeng Jin;Dian Yu;Lei Han;Haitao Mi;Dong Yu", "authorids": "~Ye_Tian1;~Baolin_Peng2;~Linfeng_Song1;~Lifeng_Jin1;~Dian_Yu3;~Lei_Han1;~Haitao_Mi1;~Dong_Yu2", "gender": "M;M;;;F;M;M;M", "homepage": ";;https://freesunshine0316.github.io/;;https://sites.google.com/site/yudiandoris/;https://www.leihan.org;;https://sites.google.com/view/dongyu888/", "dblp": ";144/2759;136/3610;66/7607;136/8648-1.html;75/2307-1;19/70;71/4598-1", "google_scholar": "rTKoFWUAAAAJ;u1CNjgwAAAAJ;yWZdmLYAAAAJ;;ERdzqyYAAAAJ;Tz4_zi8AAAAJ;https://scholar.google.com/citations?hl=en;tMY31_gAAAAJ", "orcid": ";;;;;;;0000-0003-0520-6844", "linkedin": "ye-tian-b080a7b3/;;;;;;;dongyu/", "or_profile": "~Ye_Tian1;~Baolin_Peng2;~Linfeng_Song1;~Lifeng_Jin1;~Dian_Yu3;~Lei_Han1;~Haitao_Mi1;~Dong_Yu2", "aff": "Tencent AI Lab;Tencent AI Lab;Tencent AI Lab;Tencent AI Lab;Tencent AI Lab;Tencent Robotics X;Tencent AI Lab;Tencent AI Lab", "aff_domain": "tencent.com;tencent.com;tencent.com;tencent.com;tencent.com;tencent.com;tencent.com;tencent.com", "position": "Researcher;Researcher;Researcher;Researcher;NLP researcher;Principal Researcher;Principal Researcher;Distinguished Scientist", "bibtex": "@inproceedings{\ntian2024toward,\ntitle={Toward Self-Improvement of {LLM}s via Imagination, Searching, and Criticizing},\nauthor={Ye Tian and Baolin Peng and Linfeng Song and Lifeng Jin and Dian Yu and Lei Han and Haitao Mi and Dong Yu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=tPdJ2qHkOB}\n}", "github": "", "reviewers": "iEWd;LHqU;PPNk;SX7Z", "pdf_size": 869650, "rating": "5;6;6;6", "confidence": "3;3;5;3", "soundness": "3;3;3;2", "novelty": "3;3;3;3", "presentation": "2;3;2;3", "wc_summary": "56;31;40;147", "wc_strengths": "33;59;31;36", "wc_weaknesses": "82;77;210;198", "wc_questions": "7;15;244;100", "wc_limitations": "4;1;4;9", "wc_review": "182;183;529;490", "wc_reply_reviewers": "16;123;137;161", "wc_reply_authors": "7;192;374;667", "reply_reviewers": "1;1;2;1", "reply_authors": "2;2;4;3", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 68.5, 46.19794367718113 ], "wc_strengths_avg": [ 39.75, 11.255554184490428 ], "wc_weaknesses_avg": [ 141.75, 62.419448091119804 ], "wc_questions_avg": [ 91.5, 95.29034578591894 ], "wc_limitations_avg": [ 4.5, 2.8722813232690143 ], "wc_review_avg": [ 346.0, 164.08077279193927 ], "wc_reply_reviewers_avg": [ 109.25, 55.526457657588786 ], "wc_reply_authors_avg": [ 310.0, 243.55594839789893 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 62, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4632021898419630085&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "tencent.com;tencent.com;tencent.com;tencent.com;tencent.com;tencent.com;tencent.com;tencent.com", "author_num": 8, "aff_unique_index": "0;0;0;0;0;0;0;0", "aff_unique_norm": "Tencent", "aff_unique_dep": "Tencent AI Lab", "aff_unique_url": "https://ai.tencent.com", "aff_unique_abbr": "Tencent AI Lab", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Any2Graph: Deep End-To-End Supervised Graph Prediction With An Optimal Transport Loss", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93336", "id": "tPgagXpvcV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=tPgagXpvcV", "openreview": "https://openreview.net/forum?id=tPgagXpvcV", "poster": "/media/PosterPDFs/NeurIPS%202024/93336.png?t=1729517022.1012874", "project": "", "author_site": "Paul Krzakala, Junjie Yang, R\u00e9mi Flamary, Florence d'Alch\u00e9-Buc, Charlotte Laclau, Matthieu Labeau", "tldr": "", "abstract": "We propose Any2graph, a generic framework for end-to-end Supervised Graph Prediction (SGP) i.e. a deep learning model that predicts an entire graph for any kind of input. The framework is built on a novel Optimal Transport loss, the Partially-Masked Fused Gromov-Wasserstein, that exhibits all necessary properties (permutation invariance, differentiability and scalability) and is designed to handle any-sized graphs. Numerical experiments showcase the versatility of the approach that outperform existing competitors on a novel challenging synthetic dataset and a variety of real-world tasks such as map construction from satellite image (Sat2Graph) or molecule prediction from fingerprint (Fingerprint2Graph).", "keywords": "Optimal Transport;Graph Prediction;Structured Prediction;Graph;Deep Learning", "primary_area": "other", "supplementary_material": "", "author": "Paul KRZAKALA;Junjie Yang;R\u00e9mi Flamary;Florence d'Alch\u00e9-Buc;Charlotte Laclau;Matthieu Labeau", "authorids": "~Paul_KRZAKALA1;~Junjie_Yang3;~R\u00e9mi_Flamary1;~Florence_d'Alch\u00e9-Buc2;~Charlotte_Laclau2;~Matthieu_Labeau2", "gender": ";;;F;;F", "homepage": "https://krzakalapaul.github.io/;;https://remi.flamary.com/;https://laclauc.github.io/index.html;https://www.telecom-paris.fr/matthieu-labeau;https://perso.telecom-paristech.fr/fdalche/", "dblp": "303/4319;;00/8318;153/2640;152/4095.html;https://dblp.uni-trier.de/pers/d/d=Alch=eacute==Buc:Florence.html", "google_scholar": "https://scholar.google.fr/citations?hl=fr;giOrelMAAAAJ;https://scholar.google.fr/citations?user=zDnwxFQAAAAJ;https://scholar.google.fr/citations?user=47i5TpcAAAAJ;;-qbL7z0AAAAJ", "orcid": ";;0000-0002-4212-6627;;;0000-0002-8353-0589", "linkedin": ";;;;;", "or_profile": "~Paul_KRZAKALA1;~Junjie_Yang3;~R\u00e9mi_Flamary1;~Charlotte_Laclau2;~Matthieu_Labeau2;~Florence_D'Alch\u00e92", "aff": "T\u00e9l\u00e9com Paris;T\u00e9l\u00e9com Paris;\u00c9cole Polytechnique;T\u00e9lecom Paris;T\u00e9l\u00e9com ParisTech;T\u00e9l\u00e9com ParisTech, Institut Polytechnique de paris", "aff_domain": "telecom-paris.fr;telecom-paris.fr;polytechnique.edu;telecom-paris.fr;telecom-paristech.fr;telecom-paristech.fr", "position": "PhD student;PhD student;Full Professor;Associate Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nkrzakala2024anygraph,\ntitle={Any2Graph: Deep End-To-End Supervised Graph Prediction With An Optimal Transport Loss},\nauthor={Paul KRZAKALA and Junjie Yang and R{\\'e}mi Flamary and Florence d'Alch{\\'e}-Buc and Charlotte Laclau and Matthieu Labeau},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=tPgagXpvcV}\n}", "github": "", "reviewers": "CHfS;ct8H;9w7t;YNCo;bW5k", "pdf_size": 1395539, "rating": "6;7;7;7;8", "confidence": "3;3;3;4;3", "soundness": "3;4;3;3;4", "novelty": "2;3;3;2;3", "presentation": "3;4;3;3;4", "wc_summary": "56;68;89;35;209", "wc_strengths": "73;90;84;55;142", "wc_weaknesses": "84;93;80;94;41", "wc_questions": "61;5;80;118;56", "wc_limitations": "1;11;8;17;23", "wc_review": "275;267;341;319;471", "wc_reply_reviewers": "19;0;13;75;19", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;0;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 7.0, 0.6324555320336759 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 91.4, 61.34688256138204 ], "wc_strengths_avg": [ 88.8, 29.143781497945664 ], "wc_weaknesses_avg": [ 78.4, 19.4381069037085 ], "wc_questions_avg": [ 64.0, 36.676968249843114 ], "wc_limitations_avg": [ 12.0, 7.536577472566709 ], "wc_review_avg": [ 334.6, 73.49993197275764 ], "wc_reply_reviewers_avg": [ 25.2, 25.848791074245618 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13033227585618976933&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "telecom-paris.fr;telecom-paris.fr;polytechnique.edu;telecom-paris.fr;telecom-paristech.fr;telecom-paristech.fr", "author_num": 6, "aff_unique_index": "0;0;1;0;2;2", "aff_unique_norm": "T\u00e9l\u00e9com Paris;Ecole Polytechnique;T\u00e9l\u00e9com ParisTech", "aff_unique_dep": ";;", "aff_unique_url": "https://www.telecom-paris.fr;https://www.polytechnique.edu;https://www.telecom-paristech.fr", "aff_unique_abbr": "T\u00e9l\u00e9com Paris;X;TP", "aff_campus_unique_index": "1", "aff_campus_unique": ";Paris", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "France" }, { "title": "MM-WLAuslan: Multi-View Multi-Modal Word-Level Australian Sign Language Recognition Dataset", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97466", "id": "tPsw4NeLZx", "proceeding": "", "pdf": "https://openreview.net/pdf?id=tPsw4NeLZx", "openreview": "https://openreview.net/forum?id=tPsw4NeLZx", "poster": "/media/PosterPDFs/NeurIPS%202024/97466.png?t=1729402902.9091759", "project": "", "author_site": "Xin Shen, Heming Du, Hongwei Sheng, Shuyun Wang, Hui Chen, Huiqiang Chen, Zhuojie Wu, Xiaobiao Du, Jiaying Ying, Ruihan Lu, Qingzheng Xu, Xin Yu", "tldr": "", "abstract": "Isolated Sign Language Recognition (ISLR) focuses on identifying individual sign language glosses. Considering the diversity of sign languages across geographical regions, developing region-specific ISLR datasets is crucial for supporting communication and research. Auslan, as a sign language specific to Australia, still lacks a dedicated large-scale word-level dataset for the ISLR task. To fill this gap, we curate \\underline{\\textbf{the first}} large-scale Multi-view Multi-modal Word-Level Australian Sign Language recognition dataset, dubbed MM-WLAuslan. Compared to other publicly available datasets, MM-WLAuslan exhibits three significant advantages: (1) **the largest amount** of data, (2) **the most extensive** vocabulary, and (3) **the most diverse** of multi-modal camera views. Specifically, we record **282K+** sign videos covering **3,215** commonly used Auslan glosses presented by **73** signers in a studio environment.\nMoreover, our filming system includes two different types of cameras, i.e., three Kinect-V2 cameras and a RealSense camera. We position cameras hemispherically around the front half of the model and simultaneously record videos using all four cameras. Furthermore, we benchmark results with state-of-the-art methods for various multi-modal ISLR settings on MM-WLAuslan, including multi-view, cross-camera, and cross-view. Experiment results indicate that MM-WLAuslan is a challenging ISLR dataset, and we hope this dataset will contribute to the development of Auslan and the advancement of sign languages worldwide. All datasets and benchmarks are available at MM-WLAuslan.", "keywords": "Auslan;Isolated Sign Language Recognition;Multi-View;RGB-D", "primary_area": "", "supplementary_material": "/attachment/56f52645d4f5af745f702bc1e3a1aa4eca5fd892.zip", "author": "Xin Shen;Heming Du;Hongwei Sheng;Shuyun Wang;Hui Chen;Huiqiang Chen;Zhuojie Wu;Xiaobiao Du;Jiaying Ying;Ruihan Lu;Qingzheng Xu;Xin Yu", "authorids": "~Xin_Shen3;~Heming_Du2;~Hongwei_Sheng2;~Shuyun_Wang2;~Hui_Chen2;~Huiqiang_Chen1;~Zhuojie_Wu1;~Xiaobiao_Du3;~Jiaying_Ying1;~Ruihan_Lu1;~Qingzheng_Xu1;~Xin_Yu1", "gender": "M;M;M;M;M;M;;F;M;Not Specified;M;M", "homepage": ";;https://orcid.org/0000-0001-8990-2235;;;;;;;;https://sites.google.com/view/xinyus-homepage/Home;https://xiaobiaodu.github.io/", "dblp": ";244/8133;340/0211.html;78/5566;12/417;;128/5757;;;;54/1184-2;289/6739", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;Ha3UZTwAAAAJ;;;dK1jEiAAAAAJ;;lGLp0X8AAAAJ;;;;oxdtuSEAAAAJ;https://scholar.google.com.hk/citations?user=uLjs7pmvlVUC", "orcid": ";0000-0002-7391-0449;0000-0001-8990-2235;;0000-0002-7108-4067;0000-0003-4811-6742;;;;0000-0002-3752-7872;0000-0002-0269-5649;", "linkedin": ";;https://au.linkedin.com/in/hongwei-sheng-6a78ba287;;;;;jiaying-ying-jessica-69063b135/;ruihan-lu-0400b4222/;;;", "or_profile": "~Xin_Shen3;~Heming_Du2;~Hongwei_Sheng2;~Shuyun_Wang2;~Hui_Chen2;~Huiqiang_Chen1;~Zhuojie_Wu1;~Jiaying_Ying1;~Ruihan_Lu1;~Qingzheng_Xu1;~Xin_Yu1;~Xiaobiao_Du2", "aff": "University of Queensland;Australian National University;University of Queensland;University of Queensland;Xi'an Jiaotong University;University of Technology Sydney;University of Queensland;University of Queensland;University of Queensland;University of Queensland;University of Queensland;University of Technology Sydney", "aff_domain": "uq.edu.au;anu.edu.au;uq.edu.au;uq.edu.au;xjtu.edu;uts.edu.au;uq.edu.au;uq.edu.au;uq.edu.au;uq.edu.au;uq.edu.au;uts.edu.au", "position": "PhD student;PhD student;PhD student;PhD student;PhD student;PhD student;PhD student;PhD student;PhD student;PhD student;Senior Lecturer;PhD student", "bibtex": "@inproceedings{\nshen2024mmwlauslan,\ntitle={{MM}-{WLA}uslan: Multi-View Multi-Modal Word-Level Australian Sign Language Recognition Dataset},\nauthor={Xin Shen and Heming Du and Hongwei Sheng and Shuyun Wang and Hui Chen and Huiqiang Chen and Zhuojie Wu and Xiaobiao Du and Jiaying Ying and Ruihan Lu and Qingzheng Xu and Xin Yu},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=tPsw4NeLZx}\n}", "github": "", "reviewers": "LKbv;PcEY;ATcB;B9So", "pdf_size": 6178844, "rating": "7;7;7;8", "confidence": "4;5;5;5", "wc_summary_and_contributions": "21;61;49;27", "wc_strengths": "66;21;26;2", "wc_improvement": "70;47;59;82", "wc_limitations": "23;1;1;1", "wc_correctness": "1;1;40;1", "wc_clarity": "1;1;12;1", "wc_relation_to_prior_work": "1;1;7;1", "wc_documentation": "1;1;1;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "185;135;196;117", "wc_reply_reviewers": "0;28;11;9", "wc_reply_authors": "0;33;33;33", "reply_reviewers": "0;1;1;1", "reply_authors": "1;3;2;2", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 39.5, 16.209565077447326 ], "wc_strengths_avg": [ 28.75, 23.29565410114084 ], "wc_improvement_avg": [ 64.5, 12.971121771072847 ], "wc_limitations_avg": [ 6.5, 9.526279441628825 ], "wc_correctness_avg": [ 10.75, 16.887495373796554 ], "wc_clarity_avg": [ 3.75, 4.763139720814412 ], "wc_relation_to_prior_work_avg": [ 2.5, 2.598076211353316 ], "wc_documentation_avg": [ 1.0, 0.0 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 158.25, 33.10117067416196 ], "wc_reply_reviewers_avg": [ 12.0, 10.124228365658293 ], "wc_reply_authors_avg": [ 24.75, 14.289419162443238 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:du7Z2bN6Nu0J:scholar.google.com/&scioq=MM-WLAuslan:+Multi-View+Multi-Modal+Word-Level+Australian+Sign+Language+Recognition+Dataset&hl=en&as_sdt=0,48", "gs_version_total": 4, "email": "uq.edu.au;anu.edu.au;uq.edu.au;uq.edu.au;xjtu.edu;uts.edu.au;uq.edu.au;uq.edu.au;uq.edu.au;uq.edu.au;uq.edu.au;uts.edu.au", "author_num": 12, "aff_unique_index": "0;1;0;0;2;3;0;0;0;0;0;3", "aff_unique_norm": "University of Queensland;Australian National University;Xi'an Jiao Tong University;University of Technology Sydney", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.uq.edu.au;https://www.anu.edu.au;https://www.xjtu.edu.cn;https://www.uts.edu.au", "aff_unique_abbr": "UQ;ANU;XJTU;UTS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;0;0;0;0;0;0;0", "aff_country_unique": "Australia;China" }, { "title": "Improved Distribution Matching Distillation for Fast Image Synthesis", "status": "Oral", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93335", "id": "tQukGCDaNT", "proceeding": "", "pdf": "https://openreview.net/pdf?id=tQukGCDaNT", "openreview": "https://openreview.net/forum?id=tQukGCDaNT", "poster": "", "project": "", "author_site": "Tianwei Yin, Micha\u00ebl Gharbi, Taesung Park, Richard Zhang, Eli Shechtman, Fredo Durand, Bill Freeman", "tldr": "", "abstract": "Recent approaches have shown promises distilling expensive diffusion models into efficient one-step generators.\nAmongst them, Distribution Matching Distillation (DMD) produces one-step generators that match their teacher in distribution, i.e., the distillation process does not enforce a one-to-one correspondence with the sampling trajectories of their teachers.\nHowever, to ensure stable training in practice, DMD requires an additional regression loss computed using a large set of noise--image pairs, generated by the teacher with many steps of a deterministic sampler.\nThis is not only computationally expensive for large-scale text-to-image synthesis, but it also limits the student's quality, tying it too closely to the teacher's original sampling paths.\nWe introduce DMD2, a set of techniques that lift this limitation and improve DMD training.\nFirst, we eliminate the regression loss and the need for expensive dataset construction.\nWe show that the resulting instability is due to the \"fake\" critic not estimating the distribution \nof generated samples with sufficient accuracy and propose a two time-scale update rule as a remedy.\nSecond, we integrate a GAN loss into the distillation procedure, discriminating between generated samples and real images.\nThis lets us train the student model on real data, thus mitigating the imperfect \"real\" score estimation from the teacher model, and thereby enhancing quality.\nThird, we introduce a new training procedure that enables multi-step sampling in the student, and\naddresses the training--inference input mismatch of previous work, by simulating inference-time generator samples during training. \nTaken together, our improvements set new benchmarks in one-step image generation, with FID scores of 1.28 on ImageNet-64\u00d764 and 8.35 on zero-shot COCO 2014, surpassing the original teacher despite a 500X reduction in inference cost.\nFurther, we show our approach can generate megapixel images by distilling SDXL, demonstrating exceptional visual quality among few-step methods, and surpassing the teacher. \nWe release our code and pretrained models.", "keywords": "Image Generation;diffusion based models;model distillation", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Tianwei Yin;Micha\u00ebl Gharbi;Taesung Park;Richard Zhang;Eli Shechtman;Fredo Durand;William T. Freeman", "authorids": "~Tianwei_Yin1;~Micha\u00ebl_Gharbi1;~Taesung_Park2;~Richard_Zhang1;~Eli_Shechtman3;~Fredo_Durand1;~William_T._Freeman1", "gender": "M;M;M;M;M;M;M", "homepage": "https://tianweiy.github.io;http://richzhang.github.io;http://people.csail.mit.edu/fredo/;https://billf.mit.edu/;https://research.adobe.com/person/eli-shechtman/;http://www.mgharbi.com;https://taesung.me", "dblp": "267/9373;;87/2617;86/6650;50/1918.html;171/6798;55/4543", "google_scholar": "BHlY8ewAAAAJ;LW8ze_UAAAAJ;https://scholar.google.com.tw/citations?user=NJ9c4ygAAAAJ;https://scholar.google.com.tw/citations?user=0zZnyMEAAAAJ;B_FTboQAAAAJ;R6jgG94AAAAJ;hHkuxSUAAAAJ", "orcid": ";;0000-0001-9919-069X;;0000-0002-6783-1795;0000-0002-7622-4970;", "linkedin": ";;;;elishechtman/;michaelgharbi;", "or_profile": "~Tianwei_Yin1;~Richard_Zhang1;~Fredo_Durand1;~William_T._Freeman1;~Eli_Shechtman1;~MICHAEL_GHARBI2;~Taesung_Park1", "aff": "Massachusetts Institute of Technology;Adobe Systems;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Adobe;Adobe Systems;Adobe Systems", "aff_domain": "mit.edu;adobe.com;mit.edu;mit.edu;adobe.com;adobe.com;adobe.com", "position": "PhD student;Research Scientist;Full Professor;Professor;Research Scientist;Researcher;Researcher", "bibtex": "@inproceedings{\nyin2024improved,\ntitle={Improved Distribution Matching Distillation for Fast Image Synthesis},\nauthor={Tianwei Yin and Micha{\\\"e}l Gharbi and Taesung Park and Richard Zhang and Eli Shechtman and Fredo Durand and William T. Freeman},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=tQukGCDaNT}\n}", "github": "", "reviewers": "uheV;xYYF;xaYh;aF5v;Yut7", "pdf_size": 16429403, "rating": "6;6;8;8;8", "confidence": "4;4;4;5;4", "soundness": "3;3;4;4;4", "novelty": "3;3;4;3;4", "presentation": "3;3;3;3;4", "wc_summary": "84;91;93;49;117", "wc_strengths": "82;36;150;62;135", "wc_weaknesses": "330;80;176;100;10", "wc_questions": "65;124;96;166;59", "wc_limitations": "14;9;13;54;21", "wc_review": "575;340;528;431;342", "wc_reply_reviewers": "0;0;40;122;11", "wc_reply_authors": "0;0;0;127;0", "reply_reviewers": "0;0;1;1;1", "reply_authors": "1;1;1;2;1", "rating_avg": [ 7.2, 0.9797958971132712 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 3.6, 0.4898979485566356 ], "novelty_avg": [ 3.4, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 86.8, 21.93080025899648 ], "wc_strengths_avg": [ 93.0, 43.22961947554015 ], "wc_weaknesses_avg": [ 139.2, 109.08052071749567 ], "wc_questions_avg": [ 102.0, 39.582824558133794 ], "wc_limitations_avg": [ 22.2, 16.363373735266208 ], "wc_review_avg": [ 443.2, 95.50162302285757 ], "wc_reply_reviewers_avg": [ 34.6, 46.08513860237376 ], "wc_reply_authors_avg": [ 25.4, 50.8 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.408248290463863, "gs_citation": 76, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15842835977850170110&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "mit.edu;adobe.com;mit.edu;mit.edu;adobe.com;adobe.com;adobe.com", "author_num": 7, "aff_unique_index": "0;1;0;0;1;1;1", "aff_unique_norm": "Massachusetts Institute of Technology;Adobe", "aff_unique_dep": ";Adobe Systems Incorporated", "aff_unique_url": "https://web.mit.edu;https://www.adobe.com", "aff_unique_abbr": "MIT;Adobe", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Token Merging for Training-Free Semantic Binding in Text-to-Image Synthesis", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93334", "id": "tRRWoa9e80", "proceeding": "", "pdf": "https://openreview.net/pdf?id=tRRWoa9e80", "openreview": "https://openreview.net/forum?id=tRRWoa9e80", "poster": "/media/PosterPDFs/NeurIPS%202024/93334.png?t=1731600870.057375", "project": "", "author_site": "Taihang Hu, Linxuan Li, Joost van de Weijer, Hongcheng Gao, Fahad Shahbaz Khan, Jian Yang, Ming-Ming Cheng, KAI WANG, Yaxing Wang", "tldr": "", "abstract": "Although text-to-image (T2I) models exhibit remarkable generation capabilities,\nthey frequently fail to accurately bind semantically related objects or attributes\nin the input prompts; a challenge termed semantic binding. Previous approaches\neither involve intensive fine-tuning of the entire T2I model or require users or\nlarge language models to specify generation layouts, adding complexity. In this\npaper, we define semantic binding as the task of associating a given object with its\nattribute, termed attribute binding, or linking it to other related sub-objects, referred\nto as object binding. We introduce a novel method called Token Merging (ToMe),\nwhich enhances semantic binding by aggregating relevant tokens into a single\ncomposite token. This ensures that the object, its attributes and sub-objects all share\nthe same cross-attention map. Additionally, to address potential confusion among\nmain objects with complex textual prompts, we propose end token substitution as\na complementary strategy. To further refine our approach in the initial stages of\nT2I generation, where layouts are determined, we incorporate two auxiliary losses,\nan entropy loss and a semantic binding loss, to iteratively update the composite\ntoken to improve the generation integrity. We conducted extensive experiments to\nvalidate the effectiveness of ToMe, comparing it against various existing methods\non the T2I-CompBench and our proposed GPT-4o object binding benchmark. Our\nmethod is particularly effective in complex scenarios that involve multiple objects\nand attributes, which previous methods often fail to address. The code will be\n publicly available at https://github.com/hutaihang/ToMe", "keywords": "Diffusion model;Attribute binding;Text Embedding", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/1a588a5f93b246e82128b278d13bb0a35780762d.zip", "author": "taihang Hu;Linxuan Li;Joost van de Weijer;Hongcheng Gao;Fahad Khan;Jian Yang;Ming-Ming Cheng;Kai Wang;Yaxing Wang", "authorids": "~taihang_Hu1;~Linxuan_Li2;~Joost_van_de_Weijer5;~Hongcheng_Gao1;~Fahad_Khan1;~Jian_Yang1;~Ming-Ming_Cheng3;~Kai_Wang7;~Yaxing_Wang3", "gender": "M;M;M;M;M;M;M;M;M", "homepage": "https://hutaihang.github.io/;https://github.com/Potato-lover;https://gao-hongcheng.github.io/;https://sites.google.com/view/fahadkhans/home;;https://mmcheng.net;https://wangkai930418.github.io/;https://yaxingwang.netlify.app/author/yaxing-wang/;http://lamp.cvc.uab.es/", "dblp": "344/1728;;318/1404;05/8618;y/JianYang3.html;45/7592;;;67/3379", "google_scholar": ";;https://scholar.google.com/citations?hl=en;zvaeYnUAAAAJ;https://scholar.google.com.hk/citations?user=6CIDtZQAAAAJ;huWpVyEAAAAJ;j14vd0wAAAAJ;https://scholar.google.es/citations?user=6CsB8k0AAAAJ;https://scholar.google.es/citations?user=Gsw2iUEAAAAJ", "orcid": ";;;;;0000-0001-5550-8758;0000-0002-9605-8279;;0000-0002-9656-9706", "linkedin": ";;;;;;kai-wang-43129b1b7/;;", "or_profile": "~taihang_Hu1;~Linxuan_Li2;~Hongcheng_Gao1;~Fahad_Khan1;~Jian_Yang1;~Ming-Ming_Cheng3;~Kai_Wang7;~Yaxing_Wang3;~Joost_van_de_Weijer1", "aff": "Nankai University;Harbin Engineering University;University of Chinese Academy of Sciences;Link\u00f6ping University;Nanjing University of Science and Technology;Nankai University;Computer Vision Center, Universitat Aut\u00f3noma de Barcelona;Nankai University;Computer Vision Center, Universitat Aut\u00f3noma de Barcelona", "aff_domain": "nankai.edu.cn;hrbeu.edu.cn;ucas.ac.cn;liu.se;njust.edu.cn;nankai.edu.cn;cvc.uab.es;nku.nankai.edu.cn;cvc.uab.es", "position": "MS student;Undergrad student;MS student;Associate Professor;Full Professor;Full Professor;Postdoc;Associate Professor;Researcher", "bibtex": "@inproceedings{\nhu2024token,\ntitle={Token Merging for Training-Free Semantic Binding in Text-to-Image Synthesis},\nauthor={taihang Hu and Linxuan Li and Joost van de Weijer and Hongcheng Gao and Fahad Khan and Jian Yang and Ming-Ming Cheng and Kai Wang and Yaxing Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=tRRWoa9e80}\n}", "github": "", "reviewers": "85Q2;mo93;jwvK;8gpY", "pdf_size": 16055982, "rating": "6;6;6;7", "confidence": "4;3;3;4", "soundness": "3;3;3;2", "novelty": "3;3;3;3", "presentation": "4;3;3;2", "wc_summary": "106;47;86;102", "wc_strengths": "61;23;75;66", "wc_weaknesses": "79;27;48;316", "wc_questions": "107;137;64;91", "wc_limitations": "39;1;42;5", "wc_review": "392;235;315;580", "wc_reply_reviewers": "0;18;0;146", "wc_reply_authors": "0;218;0;217", "reply_reviewers": "0;1;0;1", "reply_authors": "1;3;1;3", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 85.25, 23.31710745354149 ], "wc_strengths_avg": [ 56.25, 19.84155991851447 ], "wc_weaknesses_avg": [ 117.5, 116.08725166873407 ], "wc_questions_avg": [ 99.75, 26.43269755435491 ], "wc_limitations_avg": [ 21.75, 18.833148966649205 ], "wc_review_avg": [ 380.5, 127.86027530081421 ], "wc_reply_reviewers_avg": [ 41.0, 61.065538563088104 ], "wc_reply_authors_avg": [ 108.75, 108.7505747111251 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 1.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2123892304727210125&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "nankai.edu.cn;hrbeu.edu.cn;ucas.ac.cn;liu.se;njust.edu.cn;nankai.edu.cn;cvc.uab.es;nku.nankai.edu.cn;cvc.uab.es", "author_num": 9, "aff_unique_index": "0;1;2;3;4;0;5;0;5", "aff_unique_norm": "Nankai University;Harbin Engineering University;University of Chinese Academy of Sciences;Link\u00f6ping University;Nanjing University of Science and Technology;Universitat Aut\u00f3noma de Barcelona", "aff_unique_dep": ";;;;;Computer Vision Center", "aff_unique_url": "http://www.nankai.edu.cn;http://www.heu.edu.cn;http://www.ucas.ac.cn;https://www.liu.se;http://www.nust.edu.cn/;https://www.uab.cat", "aff_unique_abbr": "NKU;HEU;UCAS;LiU;NUST;UAB", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;0;2;0;2", "aff_country_unique": "China;Sweden;Spain" }, { "id": "tRjgapiCpm", "title": "The Last Iterate Advantage: Empirical Auditing and Principled Heuristic Analysis of Differentially Private SGD", "track": "main", "status": "Reject", "tldr": "", "abstract": "We propose a simple heuristic privacy analysis of noisy clipped stochastic gradient descent (DP-SGD) in the setting where only the last iterate is released and the intermediate iterates remain hidden. Namely, our heuristic assumes a linear structure for the model.\n\nWe show experimentally that our heuristic is predictive of the outcome of privacy auditing applied to various training procedures. Thus it can be used prior to training as a rough estimate of the final privacy leakage. We also probe the limitations of our heuristic by providing some artificial counterexamples where it underestimates the privacy leakage.\n\nThe standard composition-based privacy analysis of DP-SGD effectively assumes that the adversary has access to all intermediate iterates, which is often unrealistic.\nHowever, this analysis remains the state of the art in practice. \nWhile our heuristic does not replace a rigorous privacy analysis, it illustrates the large gap between the best theoretical upper bounds and the privacy auditing lower bounds and sets a target for further work to improve the theoretical privacy analyses.", "keywords": "differential privacy;heuristics;privacy auditing", "primary_area": "privacy", "supplementary_material": "", "author": "Milad Nasr;Thomas Steinke;Borja Balle;Christopher A. Choquette-Choo;Arun Ganesh;Matthew Jagielski;Jamie Hayes;Abhradeep Guha Thakurta;Adam Smith;Andreas Terzis", "authorids": "~Milad_Nasr2;~Thomas_Steinke2;~Borja_Balle2;~Christopher_A._Choquette-Choo1;~Arun_Ganesh1;~Matthew_Jagielski1;~Jamie_Hayes2;~Abhradeep_Guha_Thakurta1;~Adam_Smith1;~Andreas_Terzis1", "gender": ";M;;M;M;M;M;M;M;M", "homepage": "https://people.cs.umass.edu/~milad/;http://www.thomas-steinke.net/;https://borjaballe.github.io/;https://www.christopherchoquette.com;https://people.eecs.berkeley.edu/~arunganesh/;https://jagielski.github.io/;;https://athakurta.squarespace.com/;http://cs-people.bu.edu/ads22;https://aterzis-personal.github.io/aterzis/", "dblp": ";https://dblp.uni-trier.de/pid/73/4025-2.html;https://dblp.uni-trier.de/pers/b/Balle:Borja.html;250/9674;201/4732;218/5156;;31/8315;04/5072;12/6664", "google_scholar": "k6-nvDAAAAAJ;kwnwhrgAAAAJ;;oDE4I64AAAAJ;fmwchbsAAAAJ;_8rw_GMAAAAJ;https://scholar.google.com/citations?hl=en;1rV69hMAAAAJ;fkGi-JMAAAAJ;NcIqQ88AAAAJ", "orcid": ";;;;;;;;;", "linkedin": ";thomas-steinke-2841248/;;christopher-choquette-choo/;;;;;;andreas-terzis-2395371/", "or_profile": "~Milad_Nasr2;~Thomas_Steinke2;~Borja_Balle2;~Christopher_A._Choquette-Choo1;~Arun_Ganesh1;~Matthew_Jagielski1;~Jamie_Hayes2;~Abhradeep_Guha_Thakurta1;~Adam_Smith1;~Andreas_Terzis1", "aff": "Google;Google;Google DeepMind;Google DeepMind;Google;Google;Google DeepMind;Google;Google;Google DeepMind", "aff_domain": "google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com;deepmind.google", "position": "Researcher;Research Scientist;Research scientist;Research Scientist;Researcher;Researcher;Researcher;Senior Research Scientist;Researcher;Researcher", "bibtex": "@misc{\nanonymous2024the,\ntitle={The Last Iterate Advantage: Empirical Auditing and Principled Heuristic Analysis of Differentially Private {SGD}},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=tRjgapiCpm}\n}", "github": "", "project": "", "reviewers": "buwF;8UgB;6muC;zPtg", "site": "https://openreview.net/forum?id=tRjgapiCpm", "pdf_size": 642241, "rating": "3;3;5;5", "confidence": "3;3;2;3", "soundness": "2;2;3;2", "novelty": "2;3;2;2", "presentation": "3;3;2;2", "wc_summary": "66;57;59;194", "wc_strengths": "29;42;29;42", "wc_weaknesses": "294;119;77;347", "wc_questions": "4;266;26;171", "wc_limitations": "4;10;5;14", "wc_review": "397;494;196;768", "wc_reply_reviewers": "126;304;9;426", "wc_reply_authors": "376;609;0;784", "reply_reviewers": "2;1;1;3", "reply_authors": "2;2;1;3", "rating_avg": [ 4.0, 1.0 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 94.0, 57.83165223301164 ], "wc_strengths_avg": [ 35.5, 6.5 ], "wc_weaknesses_avg": [ 209.25, 113.79010282093957 ], "wc_questions_avg": [ 116.75, 107.43224609026845 ], "wc_limitations_avg": [ 8.25, 4.02336923485777 ], "wc_review_avg": [ 463.75, 205.9300548730078 ], "wc_reply_reviewers_avg": [ 216.25, 160.3065422869572 ], "wc_reply_authors_avg": [ 442.25, 293.50159709957285 ], "reply_reviewers_avg": [ 1.75, 0.82915619758885 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17408784898115259927&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 2, "aff_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;1;1;0;0;1;0;0;1", "aff_country_unique": "United States;United Kingdom" }, { "title": "Efficient Recurrent Off-Policy RL Requires a Context-Encoder-Specific Learning Rate", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93333", "id": "tSWoT8ttkO", "proceeding": "", "pdf": "https://openreview.net/pdf?id=tSWoT8ttkO", "openreview": "https://openreview.net/forum?id=tSWoT8ttkO", "poster": "/media/PosterPDFs/NeurIPS%202024/93333.png?t=1733851095.4583", "project": "", "author_site": "Fan-Ming Luo, Zuolin Tu, Zefang Huang, Yang Yu", "tldr": "", "abstract": "Real-world decision-making tasks are usually partially observable Markov decision processes (POMDPs), where the state is not fully observable. Recent progress has demonstrated that recurrent reinforcement learning (RL), which consists of a context encoder based on recurrent neural networks (RNNs) for unobservable state prediction and a multilayer perceptron (MLP) policy for decision making, can mitigate partial observability and serve as a robust baseline for POMDP tasks. However, prior recurrent RL algorithms have faced issues with training instability. In this paper, we find that this instability stems from the autoregressive nature of RNNs, which causes even small changes in RNN parameters to produce large output variations over long trajectories. Therefore, we propose **R**ecurrent Off-policy RL with Context-**E**ncoder-**S**p**e**cific **L**earning Rate (RESeL) to tackle this issue. Specifically, RESeL uses a lower learning rate for context encoder than other MLP layers to ensure the stability of the former while maintaining the training efficiency of the latter. We integrate this technique into existing off-policy RL methods, resulting in the RESeL algorithm. We evaluated RESeL in 18 POMDP tasks, including classic, meta-RL, and credit assignment scenarios, as well as five MDP locomotion tasks. The experiments demonstrate significant improvements in training stability with RESeL. Comparative results show that RESeL achieves notable performance improvements over previous recurrent RL baselines in POMDP tasks, and is competitive with or even surpasses state-of-the-art methods in MDP tasks. Further ablation studies highlight the necessity of applying a distinct learning rate for the context encoder. Code is available at https://github.com/FanmingL/Recurrent-Offpolicy-RL.", "keywords": "Reinforcement Learning;Recurrent Off-Policy Reinforcement Learning;Partially Observable Markov Decision Process", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Fan-Ming Luo;Zuolin Tu;Zefang Huang;Yang Yu", "authorids": "~Fan-Ming_Luo1;~Zuolin_Tu1;~Zefang_Huang1;~Yang_Yu5", "gender": ";M;M;", "homepage": ";;https://hzffrank.github.io;", "dblp": ";;231/3939;", "google_scholar": ";;;", "orcid": ";0000-0001-6606-4347;;", "linkedin": ";;;", "or_profile": "~Fan-Ming_Luo1;~Zuolin_Tu1;~Zefang_Huang1;~Yang_Yu5", "aff": ";Polixir;Nanjing University;", "aff_domain": ";polixir.ai;smail.nju.edu.cn;", "position": ";Researcher;Undergrad student;", "bibtex": "@inproceedings{\nluo2024efficient,\ntitle={Efficient Recurrent Off-Policy {RL} Requires a Context-Encoder-Specific Learning Rate},\nauthor={Fan-Ming Luo and Zuolin Tu and Zefang Huang and Yang Yu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=tSWoT8ttkO}\n}", "github": "", "reviewers": "B2hv;dCQa;vPkL;89BQ", "pdf_size": 8473991, "rating": "3;5;7;7", "confidence": "4;3;4;4", "soundness": "2;3;3;3", "novelty": "1;2;4;3", "presentation": "4;3;4;3", "wc_summary": "61;22;77;164", "wc_strengths": "77;29;97;157", "wc_weaknesses": "244;269;73;322", "wc_questions": "23;3;110;744", "wc_limitations": "1;4;4;45", "wc_review": "406;327;361;1432", "wc_reply_reviewers": "512;51;42;209", "wc_reply_authors": "795;40;59;29", "reply_reviewers": "2;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 5.5, 1.6583123951777 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 1.118033988749895 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 81.0, 51.92783453986888 ], "wc_strengths_avg": [ 90.0, 45.902069670114 ], "wc_weaknesses_avg": [ 227.0, 93.26574934025888 ], "wc_questions_avg": [ 220.0, 305.19420046914394 ], "wc_limitations_avg": [ 13.5, 18.227726133558185 ], "wc_review_avg": [ 631.5, 463.01754826356205 ], "wc_reply_reviewers_avg": [ 203.5, 190.0927405241978 ], "wc_reply_authors_avg": [ 230.75, 325.9466022218977 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17984819684213230290&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": ";polixir.ai;smail.nju.edu.cn;", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Polixir;Nanjing University", "aff_unique_dep": ";", "aff_unique_url": ";https://www.nju.edu.cn", "aff_unique_abbr": ";Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1", "aff_country_unique": ";China" }, { "title": "Out-of-Distribution Detection with a Single Unconditional Diffusion Model", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93332", "id": "tTnFH7D1h4", "proceeding": "", "pdf": "https://openreview.net/pdf?id=tTnFH7D1h4", "openreview": "https://openreview.net/forum?id=tTnFH7D1h4", "poster": "/media/PosterPDFs/NeurIPS%202024/93332.png?t=1733120224.0419676", "project": "", "author_site": "Alvin Heng, alexandre thiery, Harold Soh", "tldr": "", "abstract": "Out-of-distribution (OOD) detection is a critical task in machine learning that seeks to identify abnormal samples. Traditionally, unsupervised methods utilize a deep generative model for OOD detection. However, such approaches require a new model to be trained for each inlier dataset. This paper explores whether a single model can perform OOD detection across diverse tasks. To that end, we introduce Diffusion Paths (DiffPath), which uses a single diffusion model originally trained to perform unconditional generation for OOD detection. We introduce a novel technique of measuring the rate-of-change and curvature of the diffusion paths connecting samples to the standard normal. Extensive experiments show that with a single model, DiffPath is competitive with prior work using individual models on a variety of OOD tasks involving different distributions. Our code is publicly available at https://github.com/clear-nus/diffpath.", "keywords": "out-of-distribution detection;anomaly detection;diffusion model", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/5bb473a015627dee5ce956eaeda839a0bbf4f832.zip", "author": "Alvin Heng;Alexandre H. Thiery;Harold Soh", "authorids": "~Alvin_Heng1;~Alexandre_H._Thiery1;~Harold_Soh1", "gender": ";M;M", "homepage": ";http://www.haroldsoh.com;http://www.normalesup.org/~athiery/", "dblp": "338/9333;06/4578;203/7143", "google_scholar": "https://scholar.google.com.sg/citations?user=fHFKbUMAAAAJ;https://scholar.google.com.sg/citations?user=lkgd1BsAAAAJ;https://scholar.google.com.sg/citations?user=szBOsCgAAAAJ", "orcid": ";;", "linkedin": ";;alexandre-thiery-2981686/", "or_profile": "~Alvin_Heng1;~Harold_Soh1;~Alexandre_Hoang_THIERY1", "aff": "National University of Singapore;National University of Singapore;National University of Singapore", "aff_domain": "u.nus.edu;nus.edu.sg;nus.edu.sg", "position": "PhD student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nheng2024outofdistribution,\ntitle={Out-of-Distribution Detection with a Single Unconditional Diffusion Model},\nauthor={Alvin Heng and Alexandre H. Thiery and Harold Soh},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=tTnFH7D1h4}\n}", "github": "", "reviewers": "5CB8;3gUh;k7Q2;eKbL", "pdf_size": 1007791, "rating": "3;6;7;7", "confidence": "3;3;4;4", "soundness": "2;3;2;3", "novelty": "2;3;3;3", "presentation": "3;2;3;3", "wc_summary": "58;78;189;60", "wc_strengths": "89;41;126;63", "wc_weaknesses": "265;179;270;172", "wc_questions": "77;1;130;69", "wc_limitations": "1;2;22;7", "wc_review": "490;301;737;371", "wc_reply_reviewers": "819;0;37;12", "wc_reply_authors": "1475;48;16;13", "reply_reviewers": "2;0;1;1", "reply_authors": "5;2;2;2", "rating_avg": [ 5.75, 1.6393596310755 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 96.25, 54.112729555992644 ], "wc_strengths_avg": [ 79.75, 31.649447072579324 ], "wc_weaknesses_avg": [ 221.5, 46.10043383743802 ], "wc_questions_avg": [ 69.25, 45.84961831902202 ], "wc_limitations_avg": [ 8.0, 8.396427811873332 ], "wc_review_avg": [ 474.75, 165.80165107742442 ], "wc_reply_reviewers_avg": [ 217.0, 347.82107469214685 ], "wc_reply_authors_avg": [ 388.0, 627.7296392556273 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.7624928516630233, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16982350307635267203&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 5, "email": "u.nus.edu;nus.edu.sg;nus.edu.sg", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "National University of Singapore", "aff_unique_dep": "", "aff_unique_url": "https://www.nus.edu.sg", "aff_unique_abbr": "NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Singapore" }, { "title": "SyncVIS: Synchronized Video Instance Segmentation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93331", "id": "tTpVHsqTKf", "proceeding": "", "pdf": "https://openreview.net/pdf?id=tTpVHsqTKf", "openreview": "https://openreview.net/forum?id=tTpVHsqTKf", "poster": "", "project": "", "author_site": "Rongkun Zheng, Lu Qi, Xi Chen, Yi Wang, Kun Wang, Yu Qiao, Hengshuang Zhao", "tldr": "", "abstract": "Recent DETR-based methods have advanced the development of Video Instance Segmentation (VIS) through transformers' efficiency and capability in modeling spatial and temporal information. Despite harvesting remarkable progress, existing works follow asynchronous designs, which model video sequences via either video-level queries only or adopting query-sensitive cascade structures, resulting in difficulties when handling complex and challenging video scenarios. In this work, we analyze the cause of this phenomenon and the limitations of the current solutions, and propose to conduct synchronized modeling via a new framework named SyncVIS. Specifically, SyncVIS explicitly introduces video-level query embeddings and designs two key modules to synchronize video-level query with frame-level query embeddings: a synchronized video-frame modeling paradigm and a synchronized embedding optimization strategy. The former attempts to promote the mutual learning of frame- and video-level embeddings with each other and the latter divides large video sequences into small clips for easier optimization. Extensive experimental evaluations are conducted on the challenging YouTube-VIS 2019 & 2021 & 2022, and OVIS benchmarks, and SyncVIS achieves state-of-the-art results, which demonstrates the effectiveness and generality of the proposed approach. The code is available at https://github.com/rkzheng99/SyncVIS.", "keywords": "video instance segmentation;video-frame synchronization", "primary_area": "machine_vision", "supplementary_material": "", "author": "rongkun Zheng;Lu Qi;Xi Chen;Yi Wang;Kun Wang;Yu Qiao;Hengshuang Zhao", "authorids": "~rongkun_Zheng1;~Lu_Qi1;~Xi_Chen30;~Yi_Wang19;~Kun_Wang8;~Yu_Qiao1;~Hengshuang_Zhao2", "gender": "M;M;M;M;M;;M", "homepage": ";https://www.luqi.info;;https://shepnerd.github.io/;https://twitter.com/wk910930;;https://hszhao.github.io", "dblp": ";;;17/221-33;;;185/7848", "google_scholar": ";https://scholar.google.com.hk/citations?user=SSI90d4AAAAJ;INISnXkAAAAJ;Xm2M8UwAAAAJ;;;4uE10I0AAAAJ", "orcid": ";;;;;;0000-0001-8277-2706", "linkedin": "rongkun-zheng-4886b6153/;;;;;;hengshuang-zhao-347b8391/?originalSubdomain=hk", "or_profile": "~rongkun_Zheng1;~Lu_Qi1;~Xi_Chen30;~Yi_Wang19;~Kun_Wang8;~Yu_Qiao1;~Hengshuang_Zhao2", "aff": "University of Hong Kong;University of California, Merced;the University of Hong Kong, University of Hong Kong;Shanghai AI Laboratory;SenseTime Group Ltd;;The University of Hong Kong", "aff_domain": "hku.hk;ucmerced.edu;cs.hku.hk;pjlab.org.cn;sensetime.com;;hku.hk", "position": "PhD student;Postdoc;PhD student;Researcher;Researcher;;Assistant Professor", "bibtex": "@inproceedings{\nzheng2024syncvis,\ntitle={Sync{VIS}: Synchronized Video Instance Segmentation},\nauthor={rongkun Zheng and Lu Qi and Xi Chen and Yi Wang and Kun Wang and Yu Qiao and Hengshuang Zhao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=tTpVHsqTKf}\n}", "github": "", "reviewers": "hMrY;2sLS;MsYp;3JCg", "pdf_size": 1835462, "rating": "5;5;5;7", "confidence": "4;5;5;3", "soundness": "2;2;3;3", "novelty": "2;3;2;3", "presentation": "2;3;2;3", "wc_summary": "40;105;86;83", "wc_strengths": "28;42;30;49", "wc_weaknesses": "78;112;67;34", "wc_questions": "39;178;162;15", "wc_limitations": "1;12;1;16", "wc_review": "186;449;346;197", "wc_reply_reviewers": "47;214;41;0", "wc_reply_authors": "95;550;122;0", "reply_reviewers": "1;2;1;0", "reply_authors": "2;4;2;1", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 78.5, 23.77498685593748 ], "wc_strengths_avg": [ 37.25, 8.642193008721803 ], "wc_weaknesses_avg": [ 72.75, 27.851166941440713 ], "wc_questions_avg": [ 98.5, 72.22361109775667 ], "wc_limitations_avg": [ 7.5, 6.652067347825035 ], "wc_review_avg": [ 294.5, 109.31719901278115 ], "wc_reply_reviewers_avg": [ 75.5, 81.98322999248077 ], "wc_reply_authors_avg": [ 191.75, 211.74084986133403 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:6IBYkOiSu40J:scholar.google.com/&scioq=SyncVIS:+Synchronized+Video+Instance+Segmentation&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "hku.hk;ucmerced.edu;cs.hku.hk;pjlab.org.cn;sensetime.com;;hku.hk", "author_num": 7, "aff_unique_index": "0;1;0;2;3;0", "aff_unique_norm": "University of Hong Kong;University of California, Merced;Shanghai AI Laboratory;SenseTime Group", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.hku.hk;https://www.ucmerced.edu;https://www.shanghai-ai-lab.com;https://www.sensetime.com", "aff_unique_abbr": "HKU;UC Merced;SAIL;SenseTime", "aff_campus_unique_index": "0;1;0;0", "aff_campus_unique": "Hong Kong SAR;Merced;", "aff_country_unique_index": "0;1;0;0;0;0", "aff_country_unique": "China;United States" }, { "id": "tU8Xgybudy", "title": "Evaluating Multiview Object Consistency in Humans and Image Models", "track": "Datasets & Benchmarks", "status": "Poster", "tldr": "", "abstract": "We introduce a benchmark to directly evaluate the alignment between human observers and vision models on a 3D shape inference task. We leverage an experimental design from the cognitive sciences: given a set of images, participants identify which contain the same/different objects, despite considerable viewpoint variation. We draw from a diverse range of images that include common objects (e.g., chairs) as well as abstract shapes (i.e., procedurally generated 'nonsense' objects). After constructing over 2000 unique image sets, we administer these tasks to human participants, collecting 35K trials of behavioral data from over 500 participants. This includes explicit choice behaviors as well as intermediate measures, such as reaction time and gaze data. We then evaluate the performance of common vision models (e.g., DINOv2, MAE, CLIP). We find that humans outperform all models by a wide margin. Using a multi-scale evaluation approach, we identify underlying similarities and differences between models and humans: while human-model performance is correlated, humans allocate more time/processing on challenging trials. All images, data, and code can be accessed via our project page.", "keywords": "shape perception;human vision;multi-scale model evaluation", "primary_area": "", "supplementary_material": "/attachment/17dd855cac719622182c074024adcd5405325836.zip", "author": "tyler bonnen;Stephanie Fu;Yutong Bai;Thomas O'Connell;Yoni Friedman;Nancy Kanwisher;Joshua B. Tenenbaum;Alexei A Efros", "authorids": "~tyler_bonnen1;~Stephanie_Fu1;~Yutong_Bai1;~Thomas_O'Connell2;~Yoni_Friedman1;~Nancy_Kanwisher1;~Joshua_B._Tenenbaum1;~Alexei_A_Efros1", "gender": "M;F;F;M;;F;;M", "homepage": "https://tzler.github.io/;https://stephanie-fu.github.io/;https://yutongbai.com/;;https://www.yonifriedman.com;https://web.mit.edu/bcs/nklab/index.shtml;;http://www.eecs.berkeley.edu/~efros/", "dblp": ";270/1541;216/8431;;;28/4665;t/JoshuaBTenenbaum;40/6158", "google_scholar": "https://scholar.google.co.uk/citations?user=6ZkcZUAAAAAJ;Rx-h05AAAAAJ;N1-l4GsAAAAJ;lNQeAXEAAAAJ;0v1kYUEAAAAJ;XxBWSgoAAAAJ;;https://scholar.google.com.tw/citations?user=d97bGd8AAAAJ", "orcid": "0000-0001-8709-1651;0000-0001-6591-6026;;0000-0001-9895-8943;;0000-0003-3853-7885;;0000-0001-5720-8070", "linkedin": ";stephanie-fu/;%E9%9B%A8%E6%A1%90-%E7%99%BD-59a44a136/;;;;;alexei-efros-890736a3/", "or_profile": "~tyler_bonnen1;~Stephanie_Fu1;~Yutong_Bai1;~Thomas_O'Connell2;~Yoni_Friedman1;~Nancy_Kanwisher1;~Joshua_B._Tenenbaum1;~Alyosha_Efros1", "aff": "Electrical Engineering & Computer Science Department, University of California, Berkeley;University of California, Berkeley;Johns Hopkins University;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;University of California, Berkeley", "aff_domain": "eecs.berkeley.edu;berkeley.edu;jhu.edu;mit.edu;mit.edu;mit.edu;mit.edu;berkeley.edu", "position": "Postdoc;PhD student;PhD student;Postdoc;PhD student;Full Professor;Professor;Professor", "bibtex": "@inproceedings{\nbonnen2024humanlevel,\ntitle={Human-level shape inferences: A benchmark for evaluating the 3D understanding of vision models},\nauthor={tyler bonnen and Stephanie Fu and Yutong Bai and Thomas O'Connell and Yoni Friedman and Nancy Kanwisher and Joshua B. Tenenbaum and Alexei A Efros},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=tU8Xgybudy}\n}", "github": "", "project": "", "reviewers": "8pbj;H8cM;TLqL;qBmW", "site": "https://openreview.net/forum?id=tU8Xgybudy", "pdf_size": 1521820, "rating": "6;6;7;7", "confidence": "4;4;3;4", "wc_summary_and_contributions": "94;48;153;108", "wc_strengths": "10;2;60;46", "wc_improvement": "2;1;106;8", "wc_limitations": "1;1;1;1", "wc_correctness": "1;1;13;1", "wc_clarity": "1;1;13;1", "wc_relation_to_prior_work": "1;1;31;1", "wc_documentation": "1;1;2;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "112;57;380;168", "wc_reply_reviewers": "0;0;39;54", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 100.75, 37.45246987850067 ], "wc_strengths_avg": [ 29.5, 24.181604578687494 ], "wc_improvement_avg": [ 29.25, 44.39242615582077 ], "wc_limitations_avg": [ 1.0, 0.0 ], "wc_correctness_avg": [ 4.0, 5.196152422706632 ], "wc_clarity_avg": [ 4.0, 5.196152422706632 ], "wc_relation_to_prior_work_avg": [ 8.5, 12.99038105676658 ], "wc_documentation_avg": [ 1.25, 0.4330127018922193 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 179.25, 122.36701965807617 ], "wc_reply_reviewers_avg": [ 23.25, 23.84716964337697 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2809260583891369700&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;1;2;2;2;2;0", "aff_unique_norm": "University of California, Berkeley;Johns Hopkins University;Massachusetts Institute of Technology", "aff_unique_dep": "Electrical Engineering & Computer Science Department;;", "aff_unique_url": "https://www.berkeley.edu;https://www.jhu.edu;https://web.mit.edu", "aff_unique_abbr": "UC Berkeley;JHU;MIT", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Reinforced Cross-Domain Knowledge Distillation on Time Series Data", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93330", "id": "tUHABDZP0Q", "proceeding": "", "pdf": "https://openreview.net/pdf?id=tUHABDZP0Q", "openreview": "https://openreview.net/forum?id=tUHABDZP0Q", "poster": "/media/PosterPDFs/NeurIPS%202024/93330.png?t=1729057247.4792252", "project": "", "author_site": "QING XU, Min Wu, Xiaoli Li, Kezhi Mao, Zhenghua Chen", "tldr": "", "abstract": "Unsupervised domain adaptation methods have demonstrated superior capabilities in handling the domain shift issue which widely exists in various time series tasks. However, their prominent adaptation performances heavily rely on complex model architectures, posing an unprecedented challenge in deploying them on resource-limited devices for real-time monitoring. Existing approaches, which integrates knowledge distillation into domain adaptation frameworks to simultaneously address domain shift and model complexity, often neglect network capacity gap between teacher and student and just coarsely align their outputs over all source and target samples, resulting in poor distillation efficiency. Thus, in this paper, we propose an innovative framework named Reinforced Cross-Domain Knowledge Distillation (RCD-KD) which can effectively adapt to student's network capability via dynamically selecting suitable target domain samples for knowledge transferring. Particularly, a reinforcement learning-based module with a novel reward function is proposed to learn optimal target sample selection policy based on student's capacity. Meanwhile, a domain discriminator is designed to transfer the domain invariant knowledge. Empirical experimental results and analyses on four public time series datasets demonstrate the effectiveness of our proposed method over other state-of-the-art benchmarks.", "keywords": "Knowledge distillation;Domain adaptation;Time series", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/7933254b42012be2f041fcec929d133726974ca6.zip", "author": "QING XU;Min Wu;Xiaoli Li;Kezhi Mao;Zhenghua Chen", "authorids": "~QING_XU5;~Min_Wu2;~Xiaoli_Li1;~Kezhi_Mao1;~Zhenghua_Chen2", "gender": "M;M;M;M;M", "homepage": ";https://sites.google.com/site/wumincf/;https://personal.ntu.edu.sg/xlli/;https://dr.ntu.edu.sg/cris/rp/rp00158;https://zhenghuantu.github.io/", "dblp": ";16/0-8;l/XiaoliLi.html;m/KezhiMao;03/7457.html", "google_scholar": "SlX-ghoAAAAJ;https://scholar.google.com.sg/citations?user=Hji1uWQAAAAJ;E3yQKloAAAAJ;jCsRJXUAAAAJ;https://scholar.google.com.sg/citations?user=WUgu3nwAAAAJ", "orcid": ";0000-0003-0977-3600;0000-0002-0762-6562;;0000-0002-1719-0328", "linkedin": ";;li-xiaoli-41027ba/;;", "or_profile": "~QING_XU5;~Min_Wu2;~Xiaoli_Li1;~Kezhi_Mao1;~Zhenghua_Chen2", "aff": ", A*STAR;Institute for Infocomm Research (I2R), A*STAR;A*STAR;Nanyang Technological University;I2R, A*STAR", "aff_domain": "i2r.a-star.edu.sg;i2r.a-star.edu.sg;a-star.edu.sg;ntu.edu.sg;i2r.a-star.edu.sg", "position": "Researcher;Principal Researcher;Principal Researcher;Associate Professor;Researcher", "bibtex": "@inproceedings{\nxu2024reinforced,\ntitle={Reinforced Cross-Domain Knowledge Distillation on Time Series Data},\nauthor={QING XU and Min Wu and Xiaoli Li and Kezhi Mao and Zhenghua Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=tUHABDZP0Q}\n}", "github": "", "reviewers": "nLZW;bzJ9;E4vw;Ltvb", "pdf_size": 587930, "rating": "4;5;6;8", "confidence": "3;3;3;4", "soundness": "3;2;3;4", "novelty": "2;2;3;4", "presentation": "2;3;3;4", "wc_summary": "104;78;83;127", "wc_strengths": "46;85;79;127", "wc_weaknesses": "115;216;136;158", "wc_questions": "101;59;3;91", "wc_limitations": "90;8;3;1", "wc_review": "456;446;304;504", "wc_reply_reviewers": "0;172;9;33", "wc_reply_authors": "0;764;34;31", "reply_reviewers": "0;2;1;1", "reply_authors": "1;3;2;2", "rating_avg": [ 5.75, 1.479019945774904 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 98.0, 19.3778223750761 ], "wc_strengths_avg": [ 84.25, 28.80429655450728 ], "wc_weaknesses_avg": [ 156.25, 37.69864055904404 ], "wc_questions_avg": [ 63.5, 38.21975928757271 ], "wc_limitations_avg": [ 25.5, 37.32626421167808 ], "wc_review_avg": [ 427.5, 74.59725196010909 ], "wc_reply_reviewers_avg": [ 53.5, 69.47121706145647 ], "wc_reply_authors_avg": [ 207.25, 321.71522733622663 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.8783100656536799, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:of97EtX4JSYJ:scholar.google.com/&scioq=Reinforced+Cross-Domain+Knowledge+Distillation+on+Time+Series+Data&hl=en&as_sdt=0,33", "gs_version_total": 0, "email": "i2r.a-star.edu.sg;i2r.a-star.edu.sg;a-star.edu.sg;ntu.edu.sg;i2r.a-star.edu.sg", "author_num": 5, "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "A*STAR;Institute for Infocomm Research;Agency for Science, Technology and Research;Nanyang Technological University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.a-star.edu.sg;https://www.i2r.a-star.edu.sg;https://www.a-star.edu.sg;https://www.ntu.edu.sg", "aff_unique_abbr": "A*STAR;I2R;A*STAR;NTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Singapore" }, { "title": "Deep Submodular Peripteral Networks", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93329", "id": "tUpcRQNvVM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=tUpcRQNvVM", "openreview": "https://openreview.net/forum?id=tUpcRQNvVM", "poster": "", "project": "", "author_site": "Gantavya Bhatt, Arnav Das, Jeff A Bilmes", "tldr": "", "abstract": "Submodular functions, crucial for various applications, often lack practical learning methods for their acquisition. Seemingly unrelated, learning a scaling from oracles offering graded pairwise preferences (GPC) is underexplored, despite a rich history in psychometrics. In this paper, we introduce deep submodular peripteral networks (DSPNs), a novel parametric family of submodular functions, and methods for their training using a GPC-based strategy to connect and then tackle both of the above challenges. We introduce newly devised GPC-style ``peripteral'' loss which leverages numerically graded relationships between pairs of objects (sets in our case). Unlike traditional contrastive learning, or RHLF preference ranking, our method utilizes graded comparisons, extracting more nuanced information than just binary-outcome comparisons, and contrasts sets of any size (not just two). We also define a novel suite of automatic sampling strategies for training, including active-learning inspired submodular feedback. We demonstrate DSPNs' efficacy in learning submodularity from a costly target submodular function and demonstrate its superiority both for experimental design and online streaming applications.", "keywords": "Submodular Optimization;Learning Set Functions;Experimental Design;Streaming Summarization;Data subset selection;Knowledge Distillation", "primary_area": "active_learning", "supplementary_material": "/attachment/b9ef153c4555f3b8585d58be39995638189da743.zip", "author": "Gantavya Bhatt;Arnav Mohanty Das;Jeff Bilmes", "authorids": "~Gantavya_Bhatt1;~Arnav_Mohanty_Das1;~Jeff_Bilmes1", "gender": "M;M;M", "homepage": "https://sites.google.com/view/gbhatt/;;http://melodi.ee.washington.edu/people/bilmes", "dblp": "265/5828;263/7747;b/JeffABilmes", "google_scholar": "A18gBf4AAAAJ;rnRml4EAAAAJ;L9QufAsAAAAJ", "orcid": ";;0000-0002-7372-8778", "linkedin": "bhattgantavya/;arnavdas/;jbilmes/", "or_profile": "~Gantavya_Bhatt1;~Arnav_Mohanty_Das1;~Jeff_Bilmes1", "aff": "University of Washington, Seattle;University of Washington;University of Washington, Seattle", "aff_domain": "uw.edu;uw.edu;uw.edu", "position": "Graduate Student;PhD student;Full Professor", "bibtex": "@inproceedings{\nbhatt2024deep,\ntitle={Deep Submodular Peripteral Networks},\nauthor={Gantavya Bhatt and Arnav Mohanty Das and Jeff Bilmes},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=tUpcRQNvVM}\n}", "github": "", "reviewers": "YWZB;Ltg4;W4dW;fthD", "pdf_size": 34896380, "rating": "6;7;7;7", "confidence": "2;4;2;3", "soundness": "3;3;3;3", "novelty": "3;3;3;4", "presentation": "2;2;3;1", "wc_summary": "82;170;43;29", "wc_strengths": "69;61;46;42", "wc_weaknesses": "20;159;19;57", "wc_questions": "19;55;1;225", "wc_limitations": "1;12;1;7", "wc_review": "191;457;110;360", "wc_reply_reviewers": "0;13;5;15", "wc_reply_authors": "0;7;0;7", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;1;2", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 81.0, 54.93177586788907 ], "wc_strengths_avg": [ 54.5, 10.965856099730654 ], "wc_weaknesses_avg": [ 63.75, 57.0849148199417 ], "wc_questions_avg": [ 75.0, 88.75809822207775 ], "wc_limitations_avg": [ 5.25, 4.602988159880492 ], "wc_review_avg": [ 279.5, 136.5183137897623 ], "wc_reply_reviewers_avg": [ 8.25, 6.057020719792859 ], "wc_reply_authors_avg": [ 3.5, 3.5 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2182259148859514959&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 5, "email": "uw.edu;uw.edu;uw.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Washington", "aff_unique_dep": "", "aff_unique_url": "https://www.washington.edu", "aff_unique_abbr": "UW", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Seattle;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "FlashAttention-3: Fast and Accurate Attention with Asynchrony and Low-precision", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93328", "id": "tVConYid20", "proceeding": "", "pdf": "https://openreview.net/pdf?id=tVConYid20", "openreview": "https://openreview.net/forum?id=tVConYid20", "poster": "/media/PosterPDFs/NeurIPS%202024/93328.png?t=1733852252.1040823", "project": "", "author_site": "Jay Shah, Ganesh Bikshandi, Ying Zhang, Vijay Thakkar, Pradeep Ramani, Tri Dao", "tldr": "", "abstract": "Attention, as a core layer of the ubiquitous Transformer architecture, is the bottleneck for large language models and long-context applications. elaborated an approach to speed up attention on GPUs through minimizing memory reads/writes. However, it has yet to take advantage of new capabilities present in recent hardware, with FlashAttention-2 achieving only 35% utilization on the H100 GPU.\nWe develop three main techniques to speed up attention on Hopper GPUs: exploiting asynchrony of the Tensor Cores and TMA to (1) overlap overall computation and data movement via warp-specialization and (2) interleave block-wise matmul and softmax operations, and (3) block quantization and incoherent processing that leverages hardware support for FP8 low-precision. We demonstrate that our method, FlashAttention-3, achieves speedup on H100 GPUs by 1.5-2.0$\\times$ with BF16 reaching up to 840 TFLOPs/s (85\\% utilization), and with FP8 reaching 1.3 PFLOPs/s. We validate that FP8 FlashAttention-3 achieves 2.6$\\times$ lower numerical error than a baseline FP8 attention.", "keywords": "attention;hardware-aware algorithms;H100", "primary_area": "infrastructure", "supplementary_material": "", "author": "Jay Shah;Ganesh Bikshandi;Ying Zhang;Vijay Thakkar;Pradeep Ramani;Tri Dao", "authorids": "~Jay_Shah2;~Ganesh_Bikshandi1;~Ying_Zhang34;~Vijay_Thakkar1;~Pradeep_Ramani1;~Tri_Dao1", "gender": "M;M;F;M;M;", "homepage": ";https://scholar.google.com/citations?user=ZaEXuHwAAAAJ&hl=en;;https://thakkarv.dev/;;https://tridao.me/", "dblp": ";;;;;206/7018", "google_scholar": "DiXe7P4AAAAJ;ZaEXuHwAAAAJ;;https://scholar.google.com/citations?hl=en;;NQRw0bQAAAAJ", "orcid": ";;;;;", "linkedin": "jay-shah-335689167/;;ying-zhang-20537417/;;pradeep-ramani/;", "or_profile": "~Jay_Shah2;~Ganesh_Bikshandi1;~Ying_Zhang34;~Vijay_Thakkar1;~Pradeep_Ramani1;~Tri_Dao1", "aff": "Colfax Research;Colfax Research;Meta;Georgia Institute of Technology;NVIDIA;Princeton University", "aff_domain": "colfax-intl.com;research.colfax-intl.com;meta.com;gatech.edu;nvidia.com;princeton.edu", "position": "Researcher;Researcher;Researcher;PhD student;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nshah2024flashattention,\ntitle={FlashAttention-3: Fast and Accurate Attention with Asynchrony and Low-precision},\nauthor={Jay Shah and Ganesh Bikshandi and Ying Zhang and Vijay Thakkar and Pradeep Ramani and Tri Dao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=tVConYid20}\n}", "github": "", "reviewers": "TcT4;qxbQ;KCED;WEiA", "pdf_size": 697391, "rating": "7;7;8;8", "confidence": "3;5;4;4", "soundness": "3;3;3;4", "novelty": "3;4;4;3", "presentation": "3;3;3;4", "wc_summary": "103;79;85;96", "wc_strengths": "100;123;43;43", "wc_weaknesses": "43;296;33;57", "wc_questions": "46;158;46;142", "wc_limitations": "16;17;1;20", "wc_review": "308;673;208;358", "wc_reply_reviewers": "4;7;0;9", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 7.5, 0.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 90.75, 9.33742469849155 ], "wc_strengths_avg": [ 77.25, 35.202095108104004 ], "wc_weaknesses_avg": [ 107.25, 109.30776504896622 ], "wc_questions_avg": [ 98.0, 52.306787322488084 ], "wc_limitations_avg": [ 13.5, 7.365459931328117 ], "wc_review_avg": [ 386.75, 173.8668671714079 ], "wc_reply_reviewers_avg": [ 5.0, 3.391164991562634 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 103, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10504104149856608975&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "colfax-intl.com;research.colfax-intl.com;meta.com;gatech.edu;nvidia.com;princeton.edu", "author_num": 6, "aff_unique_index": "0;0;1;2;3;4", "aff_unique_norm": "Colfax Research;Meta;Georgia Institute of Technology;NVIDIA;Princeton University", "aff_unique_dep": ";Meta Platforms, Inc.;;NVIDIA Corporation;", "aff_unique_url": ";https://meta.com;https://www.gatech.edu;https://www.nvidia.com;https://www.princeton.edu", "aff_unique_abbr": ";Meta;Georgia Tech;NVIDIA;Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "tVO3b68Oyp", "title": "SyllableLM: Learning Coarse Semantic Units for Speech Language Models", "track": "main", "status": "Reject", "tldr": "", "abstract": "Self-Supervised Transformer Models are the backbone of much of the recent progress in deep learning. However, these models require their inputs to be tokenized, and tokenization strategies for continuous data like audio and vision are often based on simple heuristics such as fixed sized convolutions or discrete clustering. For speech and audio models in particular, the high resolution of waveforms (16,000 samples/second or more) presents a significant challenge, as several times more tokens are used per word than in textual language modeling. In this work, we introduce a controllable, fully-self-supervised technique to dynamically merge speech representations across time to as low as 5 Hz at 60 bits per second while still preserving semantic information. We do this by 1) extracting noisy boundaries through analyzing correlations between mask spans and model losses and 2) iteratively improving these representations with a novel agglomeration technique. Using these new feature representations, we successfully train SyllableLM, a Neural Codec Language Model (NCLM) competitive with current SoTA NCLMs on a range of common benchmarks with a 30x reduction in pretraining compute, 5x reduction in inference compute, and 2.5x reduction in bitrate.", "keywords": "Generative Spoken Language Modeling;Audio;Textless NLP;Representation Learning", "primary_area": "speech_and_audio", "supplementary_material": "", "author": "Alan Baade;Puyuan Peng;David Harwath", "authorids": "~Alan_Baade1;~Puyuan_Peng1;~David_Harwath1", "gender": "M;M;M", "homepage": ";https://jasonppy.github.io/;https://www.cs.utexas.edu/~harwath/index.html", "dblp": "317/6140;280/3431;", "google_scholar": "xlce14MAAAAJ;https://scholar.google.com/citations?hl=en;C0kDOzcAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Alan_Baade1;~Puyuan_Peng1;~David_Harwath1", "aff": "University of Texas at Austin;University of Texas at Austin;University of Texas, Austin", "aff_domain": "utexas.edu;utexas.edu;utexas.edu", "position": "Undergrad student;PhD student;Assistant Professor", "bibtex": "@misc{\nanonymous2024syllablelm,\ntitle={Syllable{LM}: Learning Coarse Semantic Units for Speech Language Models},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=tVO3b68Oyp}\n}", "github": "", "project": "", "reviewers": "kvPb;zj85;zmRG;RUrK", "site": "https://openreview.net/forum?id=tVO3b68Oyp", "pdf_size": 5867135, "rating": "3;3;6;8", "confidence": "4;3;3;3", "soundness": "3;2;3;4", "novelty": "2;2;3;4", "presentation": "1;2;3;4", "wc_summary": "184;78;63;130", "wc_strengths": "126;28;163;115", "wc_weaknesses": "507;581;160;85", "wc_questions": "76;156;3;68", "wc_limitations": "6;28;7;15", "wc_review": "899;871;396;413", "wc_reply_reviewers": "25;15;0;13", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.0, 2.1213203435596424 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 113.75, 47.57297026673865 ], "wc_strengths_avg": [ 108.0, 49.49242366261729 ], "wc_weaknesses_avg": [ 333.25, 214.01679256544332 ], "wc_questions_avg": [ 75.75, 54.29721447735602 ], "wc_limitations_avg": [ 14.0, 8.803408430829505 ], "wc_review_avg": [ 644.75, 240.52897434612737 ], "wc_reply_reviewers_avg": [ 13.25, 8.898735865278843 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5443310539518174, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1685740082033122660&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Improving Equivariant Model Training via Constraint Relaxation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93327", "id": "tWkL7k1u5v", "proceeding": "", "pdf": "https://openreview.net/pdf?id=tWkL7k1u5v", "openreview": "https://openreview.net/forum?id=tWkL7k1u5v", "poster": "/media/PosterPDFs/NeurIPS%202024/93327.png?t=1733903282.2892618", "project": "", "author_site": "Stefanos Pertigkiozoglou, Evangelos Chatzipantazis, Shubhendu Trivedi, Kostas Daniilidis", "tldr": "", "abstract": "Equivariant neural networks have been widely used in a variety of applications due to their ability to generalize well in tasks where the underlying data symmetries are known. Despite their successes, such networks can be difficult to optimize and require careful hyperparameter tuning to train successfully. In this work, we propose a novel framework for improving the optimization of such models by relaxing the hard equivariance constraint during training: We relax the equivariance constraint of the network's intermediate layers by introducing an additional non-equivariant term that we progressively constrain until we arrive at an equivariant solution. By controlling the magnitude of the activation of the additional relaxation term, we allow the model to optimize over a larger hypothesis space containing approximate equivariant networks and converge back to an equivariant solution at the end of training. We provide experimental results on different state-of-the-art network architectures, demonstrating how this training framework can result in equivariant models with improved generalization performance. Our code is available at https://github.com/StefanosPert/Equivariant_Optimization_CR", "keywords": "Equivariant Neural Networks;Symmetries;Approximate Equivariance;Optimization", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Stefanos Pertigkiozoglou;Evangelos Chatzipantazis;Shubhendu Trivedi;Kostas Daniilidis", "authorids": "~Stefanos_Pertigkiozoglou1;~Evangelos_Chatzipantazis1;~Shubhendu_Trivedi2;~Kostas_Daniilidis1", "gender": ";;M;M", "homepage": "https://www.grasp.upenn.edu/people/stefanos-pertigkiozoglou/;https://www.grasp.upenn.edu/people/evangelos-chatzipantazis/;http://www.cis.upenn.edu/~kostas;https://ttic.uchicago.edu/~shubhendu/", "dblp": "232/1802;306/8423;d/KostasDaniilidis;97/9735", "google_scholar": "https://scholar.google.gr/citations?user=8Ti0EGEAAAAJ;qQsYhTgAAAAJ;dGs2BcIAAAAJ;EbyGwncAAAAJ", "orcid": ";;0000-0003-0498-0758;", "linkedin": ";;;", "or_profile": "~Stefanos_Pertigkiozoglou1;~Evangelos_Chatzipantazis1;~Kostas_Daniilidis1;~Shubhendu_Trivedi1", "aff": "School of Engineering and Applied Science, University of Pennsylvania;School of Engineering and Applied Science, University of Pennsylvania;University of Pennsylvania;Massachusetts Institute of Technology", "aff_domain": "seas.upenn.edu;seas.upenn.edu;upenn.edu;mit.edu", "position": "PhD student;PhD student;Full Professor;Research Associate", "bibtex": "@inproceedings{\npertigkiozoglou2024improving,\ntitle={Improving Equivariant Model Training via Constraint Relaxation},\nauthor={Stefanos Pertigkiozoglou and Evangelos Chatzipantazis and Shubhendu Trivedi and Kostas Daniilidis},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=tWkL7k1u5v}\n}", "github": "", "reviewers": "3CAa;b5uM;RSZ3;GoLr", "pdf_size": 1003771, "rating": "4;5;6;7", "confidence": "4;4;2;3", "soundness": "3;3;3;3", "novelty": "1;2;3;3", "presentation": "3;2;3;3", "wc_summary": "22;78;70;91", "wc_strengths": "31;58;15;45", "wc_weaknesses": "123;283;55;37", "wc_questions": "114;247;79;138", "wc_limitations": "63;6;6;1", "wc_review": "353;672;225;312", "wc_reply_reviewers": "291;377;37;27", "wc_reply_authors": "546;406;28;20", "reply_reviewers": "1;2;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 65.25, 26.070817018267764 ], "wc_strengths_avg": [ 37.25, 16.005858302509115 ], "wc_weaknesses_avg": [ 124.5, 96.96777815336391 ], "wc_questions_avg": [ 144.5, 62.787339488148405 ], "wc_limitations_avg": [ 19.0, 25.485289874749316 ], "wc_review_avg": [ 390.5, 168.96819227298374 ], "wc_reply_reviewers_avg": [ 183.0, 154.07141201403977 ], "wc_reply_authors_avg": [ 250.0, 231.37415586015652 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.674199862463242, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4866344130720885171&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "seas.upenn.edu;seas.upenn.edu;upenn.edu;mit.edu", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "University of Pennsylvania;Massachusetts Institute of Technology", "aff_unique_dep": "School of Engineering and Applied Science;", "aff_unique_url": "https://www.upenn.edu;https://web.mit.edu", "aff_unique_abbr": "UPenn;MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "tWvVtOW0qg", "title": "Data Measurements for Decentralized Data Markets", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "Decentralized data markets can provide more equitable forms of data acquisition for machine learning.\n However, to realize practical marketplaces, efficient techniques for seller selection need to be developed. \n We propose and benchmark federated data measurements to allow a data buyer to find sellers with relevant and diverse datasets.\n Diversity and relevance measures enable a buyer to make relative comparisons between sellers without requiring intermediate brokers and training task-dependent models.", "keywords": "data valuation;data measurements;data markets", "primary_area": "", "supplementary_material": "/attachment/f359c0e89d11ad81809b0eb79f47718a77d1ae28.zip", "author": "Charles Lu;Mohammad Mohammadi Amiri;Ramesh Raskar", "authorids": "~Charles_Lu1;~Mohammad_Mohammadi_Amiri1;~Ramesh_Raskar1", "gender": "M;;M", "homepage": "https://www.mit.edu/~luchar/;;https://www.media.mit.edu/people/raskar/overview/", "dblp": "61/11017-1;;r/RameshRaskar", "google_scholar": "zyZR238AAAAJ;;", "orcid": "0000-0002-8749-4722;;0000-0002-3254-3224", "linkedin": "charlie-lu/;;", "or_profile": "~Charles_Lu1;~Mohammad_Mohammadi_Amiri1;~Ramesh_Raskar1", "aff": "Massachusetts Institute of Technology;;Massachusetts Institute of Technology", "aff_domain": "mit.edu;;mit.edu", "position": "MS student;;Associate Professor", "bibtex": "@misc{\nanonymous2024data,\ntitle={Data Measurements for Decentralized Data Markets},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=tWvVtOW0qg}\n}", "github": "", "project": "", "reviewers": "XpAY;2X1C;pi4Y;wcRR", "site": "https://openreview.net/forum?id=tWvVtOW0qg", "pdf_size": 4836795, "rating": "4;5;7;9", "confidence": "2;4;3;5", "wc_summary_and_contributions": "34;72;88;82", "wc_strengths": "5;7;65;3", "wc_improvement": "5;32;81;33", "wc_limitations": "11;43;58;1", "wc_correctness": "7;43;5;1", "wc_clarity": "1;120;4;1", "wc_relation_to_prior_work": "1;28;7;1", "wc_documentation": "1;1;4;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "66;347;313;124", "wc_reply_reviewers": "0;19;0;0", "wc_reply_authors": "45;0;0;0", "reply_reviewers": "0;1;0;0", "reply_authors": "2;2;1;1", "rating_avg": [ 6.25, 1.920286436967152 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "wc_summary_and_contributions_avg": [ 69.0, 21.0 ], "wc_strengths_avg": [ 20.0, 26.019223662515376 ], "wc_improvement_avg": [ 37.75, 27.380421837510102 ], "wc_limitations_avg": [ 28.25, 23.14492384951828 ], "wc_correctness_avg": [ 14.0, 16.881943016134134 ], "wc_clarity_avg": [ 31.5, 51.11017511220246 ], "wc_relation_to_prior_work_avg": [ 9.25, 11.098986440211556 ], "wc_documentation_avg": [ 1.75, 1.299038105676658 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 212.5, 119.8801484817232 ], "wc_reply_reviewers_avg": [ 4.75, 8.227241335952167 ], "wc_reply_authors_avg": [ 11.25, 19.48557158514987 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.7568892626614565, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5613334562110423974&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Reversing the Forget-Retain Objectives: An Efficient LLM Unlearning Framework from Logit Difference", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93326", "id": "tYdR1lTWqh", "proceeding": "", "pdf": "https://openreview.net/pdf?id=tYdR1lTWqh", "openreview": "https://openreview.net/forum?id=tYdR1lTWqh", "poster": "/media/PosterPDFs/NeurIPS%202024/93326.png?t=1731312393.665635", "project": "", "author_site": "Jiabao Ji, Yujian Liu, Yang Zhang, Gaowen Liu, Ramana Kompella, Sijia Liu, Shiyu Chang", "tldr": "", "abstract": "As Large Language Models (LLMs) demonstrate extensive capability in learning from documents, LLM unlearning becomes an increasingly important research area to address concerns of LLMs in terms of privacy, copyright, etc. A conventional LLM unlearning task typically involves two goals: (1) The target LLM should forget the knowledge in the specified forget documents; and (2) it should retain the other knowledge that the LLM possesses, for which we assume access to a small number of retain documents. To achieve both goals, a mainstream class of LLM unlearning methods introduces an optimization framework with a combination of two objectives \u2013 maximizing the prediction loss on the forget documents while minimizing that on the retain documents, which suffers from two challenges, degenerated output and catastrophic forgetting. In this paper, we propose a novel unlearning framework called Unlearning from Logit Difference (ULD), which introduces an assistant LLM that aims to achieve the opposite of the unlearning goals: remembering the forget documents and forgetting the retain knowledge. ULD then derives the unlearned LLM by computing the logit difference between the target and the assistant LLMs. We show that such reversed objectives would naturally resolve both aforementioned challenges while significantly improving the training efficiency. Extensive experiments demonstrate that our method efficiently achieves the intended forgetting while preserving the LLM\u2019s overall capabilities, reducing training time by more than threefold. Notably, our method loses 0% of model utility on the ToFU benchmark, whereas baseline methods may sacrifice 17% of utility on average to achieve comparable forget quality.", "keywords": "Large Language Model;LLM Unlearn", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/166810ed19da5e422ecd9984f94e5eb6fe3944ab.zip", "author": "Jiabao Ji;Yujian Liu;Yang Zhang;Gaowen Liu;Ramana Rao Kompella;Sijia Liu;Shiyu Chang", "authorids": "~Jiabao_Ji1;~Yujian_Liu1;~Yang_Zhang3;~Gaowen_Liu4;~Ramana_Rao_Kompella1;~Sijia_Liu1;~Shiyu_Chang2", "gender": "M;M;M;F;M;M;Unspecified", "homepage": "https://question406.github.io;https://yujianll.github.io;;;https://linkedin.com/en/rkompella;https://lsjxjtu.github.io/;http://people.csail.mit.edu/chang87/", "dblp": "284/0699;206/8853;06/6785-1;136/1007;98/2327;128/6972-1;28/9988", "google_scholar": "gqI7uDMAAAAJ;rLetNLIAAAAJ;_-5PSgQAAAAJ;NIv_aeQAAAAJ;uf9RZboAAAAJ;C7dO_UgAAAAJ;r21asW4AAAAJ", "orcid": ";;;0009-0000-9194-1233;;;", "linkedin": ";;;;;;", "or_profile": "~Jiabao_Ji1;~Yujian_Liu1;~Yang_Zhang3;~Gaowen_Liu4;~Ramana_Rao_Kompella1;~Sijia_Liu1;~Shiyu_Chang2", "aff": "University of California, Santa Barbara;University of California, Santa Barbara;International Business Machines;Cisco Systems;Cisco;Michigan State University;University of California, Santa Barbara", "aff_domain": "ucsb.edu;ucsb.edu;ibm.com;cisco.com;cisco.com;msu.edu;ucsb.edu", "position": "PhD student;PhD student;Research Staff Employee;Researcher;Researcher;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nji2024reversing,\ntitle={Reversing the Forget-Retain Objectives: An Efficient {LLM} Unlearning Framework from Logit Difference},\nauthor={Jiabao Ji and Yujian Liu and Yang Zhang and Gaowen Liu and Ramana Rao Kompella and Sijia Liu and Shiyu Chang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=tYdR1lTWqh}\n}", "github": "", "reviewers": "F8Xo;uFqn;6JsY;Y4ow", "pdf_size": 1344086, "rating": "5;6;6;7", "confidence": "3;4;4;4", "soundness": "2;3;2;3", "novelty": "3;3;2;3", "presentation": "3;3;4;3", "wc_summary": "97;94;114;51", "wc_strengths": "25;69;39;59", "wc_weaknesses": "142;67;443;61", "wc_questions": "2;63;2;12", "wc_limitations": "9;41;12;1", "wc_review": "275;334;610;184", "wc_reply_reviewers": "0;81;36;10", "wc_reply_authors": "1335;325;0;0", "reply_reviewers": "0;2;1;1", "reply_authors": "3;3;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 89.0, 23.22713929867387 ], "wc_strengths_avg": [ 48.0, 17.11724276862369 ], "wc_weaknesses_avg": [ 178.25, 156.1495677227446 ], "wc_questions_avg": [ 19.75, 25.301926804099327 ], "wc_limitations_avg": [ 15.75, 15.122417134836613 ], "wc_review_avg": [ 350.75, 158.92981941725097 ], "wc_reply_reviewers_avg": [ 31.75, 31.32391259086259 ], "wc_reply_authors_avg": [ 415.0, 547.4828764445515 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 1.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9409026630280876103&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "ucsb.edu;ucsb.edu;ibm.com;cisco.com;cisco.com;msu.edu;ucsb.edu", "author_num": 7, "aff_unique_index": "0;0;1;2;2;3;0", "aff_unique_norm": "University of California, Santa Barbara;International Business Machines Corporation;Cisco Systems;Michigan State University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.ucsb.edu;https://www.ibm.com;https://www.cisco.com;https://www.msu.edu", "aff_unique_abbr": "UCSB;IBM;Cisco;MSU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Santa Barbara;", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Latent Representation Matters: Human-like Sketches in One-shot Drawing Tasks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93325", "id": "tZRpvLXevU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=tZRpvLXevU", "openreview": "https://openreview.net/forum?id=tZRpvLXevU", "poster": "", "project": "", "author_site": "Victor Boutin, Rishav Mukherji, Aditya Agrawal, Sabine Muzellec, Thomas Fel, Thomas Serre, Rufin VanRullen", "tldr": "", "abstract": "Humans can effortlessly draw new categories from a single exemplar, a feat that has long posed a challenge for generative models. However, this gap has started to close with recent advances in diffusion models. This one-shot drawing task requires powerful inductive biases that have not been systematically investigated. Here, we study how different inductive biases shape the latent space of Latent Diffusion Models (LDMs). Along with standard LDM regularizers (KL and vector quantization), we explore supervised regularizations (including classification and prototype-based representation) and contrastive inductive biases (using SimCLR and redundancy reduction objectives). We demonstrate that LDMs with redundancy reduction and prototype-based regularizations produce near-human-like drawings (regarding both samples' recognizability and originality) -- better mimicking human perception (as evaluated psychophysically). Overall, our results suggest that the gap between humans and machines in one-shot drawings is almost closed.", "keywords": "Neuroscience;Cognitive Science;One-Shot Generative Models;Latent Diffusion Models;Human Machine alignment;Human-Machine comparison", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "", "author": "Victor Boutin;Rishav Mukherji;Aditya Agrawal;Sabine Muzellec;Thomas FEL;Thomas Serre;Rufin VanRullen", "authorids": "~Victor_Boutin2;~Rishav_Mukherji1;~Aditya_Agrawal3;~Sabine_Muzellec1;~Thomas_FEL1;~Thomas_Serre1;~Rufin_VanRullen1", "gender": "M;M;M;;M;M;M", "homepage": ";https://amapocho.github.io/;https://aditya-agrawal-30502.netlify.app/;;https://thomasfel.me;https://serre-lab.clps.brown.edu/;https://rufinv.github.io", "dblp": "228/3333;338/9227;;;274/2390;;83/2121", "google_scholar": "Z-YF5FsAAAAJ;s6F4ZKcAAAAJ;5OP8PEEAAAAJ;;1m5Mlx4AAAAJ;kZlPW4wAAAAJ;1pwyaYgAAAAJ", "orcid": "0000-0003-3372-5940;;;;;;0000-0002-3611-7716", "linkedin": ";rishav-mukherji/;aditya-agrawal-9aba24208/;;;;", "or_profile": "~Victor_Boutin2;~Rishav_Mukherji1;~Aditya_Agrawal3;~Sabine_Muzellec1;~Thomas_FEL1;~Thomas_Serre1;~Rufin_VanRullen1", "aff": "Brown University;Birla Institute of Technology and Science, Pilani;Birla Institute of Tchnology and Science - KK Birla Goa Campus;;Brown University;Universit\u00e9 de Toulouse;CNRS", "aff_domain": "brown.edu;bits-pilani.ac.in;goa.bits-pilani.ac.in;;brown.edu;univ-toulouse.fr;cnrs.fr", "position": "Postdoc;Undergrad student;Undergrad student;;PhD student;Full Professor;Research Director", "bibtex": "@inproceedings{\nboutin2024latent,\ntitle={Latent Representation Matters: Human-like Sketches in One-shot Drawing Tasks},\nauthor={Victor Boutin and Rishav Mukherji and Aditya Agrawal and Sabine Muzellec and Thomas FEL and Thomas Serre and Rufin VanRullen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=tZRpvLXevU}\n}", "github": "", "reviewers": "1Pi8;ZReU;vmMb;YDjR", "pdf_size": 15385581, "rating": "4;4;4;6", "confidence": "5;4;3;2", "soundness": "2;2;2;3", "novelty": "2;2;2;2", "presentation": "3;3;2;4", "wc_summary": "59;95;166;87", "wc_strengths": "55;105;156;110", "wc_weaknesses": "150;158;102;109", "wc_questions": "13;218;28;9", "wc_limitations": "5;44;1;21", "wc_review": "282;620;453;336", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "16;16;16;16", "reply_reviewers": "0;0;0;0", "reply_authors": "2;2;2;2", "rating_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 101.75, 39.42952573896876 ], "wc_strengths_avg": [ 106.5, 35.76660453551609 ], "wc_weaknesses_avg": [ 129.75, 24.539508960042376 ], "wc_questions_avg": [ 67.0, 87.467136685729 ], "wc_limitations_avg": [ 17.75, 16.90229274388537 ], "wc_review_avg": [ 422.75, 129.57502652903452 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 16.0, 0.0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.7745966692414834, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:QuPKEkbUXmMJ:scholar.google.com/&scioq=Latent+Representation+Matters:+Human-like+Sketches+in+One-shot+Drawing+Tasks&hl=en&as_sdt=0,5", "gs_version_total": 5, "email": "brown.edu;bits-pilani.ac.in;goa.bits-pilani.ac.in;;brown.edu;univ-toulouse.fr;cnrs.fr", "author_num": 7, "aff_unique_index": "0;1;1;0;2;3", "aff_unique_norm": "Brown University;Birla Institute of Technology and Science;Universit\u00e9 de Toulouse;Centre National de la Recherche Scientifique", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.brown.edu;https://www.bits-pilani.ac.in;https://www.univ-toulouse.fr;https://www.cnrs.fr", "aff_unique_abbr": "Brown;BITS Pilani;UT;CNRS", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Pilani;Goa", "aff_country_unique_index": "0;1;1;0;2;2", "aff_country_unique": "United States;India;France" }, { "title": "Transductive Active Learning: Theory and Applications", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93324", "id": "tZtepJBtHg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=tZtepJBtHg", "openreview": "https://openreview.net/forum?id=tZtepJBtHg", "poster": "/media/PosterPDFs/NeurIPS%202024/93324.png?t=1733130001.690524", "project": "", "author_site": "Jonas H\u00fcbotter, Bhavya, Lenart Treven, Yarden As, Andreas Krause", "tldr": "", "abstract": "We study a generalization of classical active learning to real-world settings with concrete prediction targets where sampling is restricted to an accessible region of the domain, while prediction targets may lie outside this region.\nWe analyze a family of decision rules that sample adaptively to minimize uncertainty about prediction targets.\nWe are the first to show, under general regularity assumptions, that such decision rules converge uniformly to the smallest possible uncertainty obtainable from the accessible data.\nWe demonstrate their strong sample efficiency in two key applications: active fine-tuning of large neural networks and safe Bayesian optimization, where they achieve state-of-the-art performance.", "keywords": "active learning;experimental design;bandits;Bayesian optimization;neural networks;deep learning;fine-tuning;transfer learning;transductive learning;generalization;extrapolation", "primary_area": "active_learning", "supplementary_material": "/attachment/a9eec6b4c7d0fa9e7795b4fdd37068d3aa59aa2f.zip", "author": "Jonas H\u00fcbotter;Bhavya Sukhija;Lenart Treven;Yarden As;Andreas Krause", "authorids": "~Jonas_H\u00fcbotter1;~Bhavya_Sukhija1;~Lenart_Treven1;~Yarden_As1;~Andreas_Krause1", "gender": "M;M;M;M;M", "homepage": "https://jonhue.github.io;;;https://github.com/yardenas;https://las.inf.ethz.ch/krausea", "dblp": "300/4583;312/4742;267/9666;312/4578;87/1831-1.html", "google_scholar": "pxi_RkwAAAAJ;;CDnzTWkAAAAJ;;https://scholar.google.ch/citations?user=eDHv58AAAAAJ", "orcid": ";0000-0001-6238-9734;;;0000-0001-7260-9673", "linkedin": "jonhue/;;lenart-treven/;yardenas/;krausea/", "or_profile": "~Jonas_H\u00fcbotter1;~Bhavya_Sukhija1;~Lenart_Treven1;~Yarden_As1;~Andreas_Krause1", "aff": "ETH Zurich;ETHZ - ETH Zurich;Swiss Federal Institute of Technology;Department of Computer Science, ETHZ - ETH Zurich;ETH Zurich", "aff_domain": "ethz.ch;ethz.ch;ethz.ch;inf.ethz.ch;ethz.ch", "position": "PhD student;PhD student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nh{\\\"u}botter2024transductive,\ntitle={Transductive Active Learning: Theory and Applications},\nauthor={Jonas H{\\\"u}botter and Bhavya Sukhija and Lenart Treven and Yarden As and Andreas Krause},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=tZtepJBtHg}\n}", "github": "", "reviewers": "aHJz;yyUG;4uNa;WV1c", "pdf_size": 4484277, "rating": "4;5;7;7", "confidence": "4;2;2;4", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "4;2;3;3", "wc_summary": "91;92;79;133", "wc_strengths": "32;32;50;66", "wc_weaknesses": "134;252;18;117", "wc_questions": "1;184;15;29", "wc_limitations": "1;1;1;2", "wc_review": "259;561;163;347", "wc_reply_reviewers": "305;481;18;34", "wc_reply_authors": "680;1737;23;13", "reply_reviewers": "2;3;1;1", "reply_authors": "5;6;2;2", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 3.0, 1.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 98.75, 20.42516829796024 ], "wc_strengths_avg": [ 45.0, 14.177446878757825 ], "wc_weaknesses_avg": [ 130.25, 83.0854229091963 ], "wc_questions_avg": [ 57.25, 73.84570061960277 ], "wc_limitations_avg": [ 1.25, 0.4330127018922193 ], "wc_review_avg": [ 332.5, 147.1011556718709 ], "wc_reply_reviewers_avg": [ 209.5, 193.84594398645538 ], "wc_reply_authors_avg": [ 613.25, 702.8450664975887 ], "reply_reviewers_avg": [ 1.75, 0.82915619758885 ], "reply_authors_avg": [ 3.75, 1.7853571071357126 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.19245008972987526, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11590203816998040926&as_sdt=5,34&sciodt=0,34&hl=en", "gs_version_total": 7, "email": "ethz.ch;ethz.ch;ethz.ch;inf.ethz.ch;ethz.ch", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "ETH Zurich;Swiss Federal Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.ethz.ch;https://www.ethz.ch", "aff_unique_abbr": "ETHZ;ETH Zurich", "aff_campus_unique_index": "1", "aff_campus_unique": ";Zurich", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Switzerland" }, { "title": "When to Act and When to Ask: Policy Learning With Deferral Under Hidden Confounding", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93323", "id": "taI8M5DiXj", "proceeding": "", "pdf": "https://openreview.net/pdf?id=taI8M5DiXj", "openreview": "https://openreview.net/forum?id=taI8M5DiXj", "poster": "", "project": "", "author_site": "Marah Ghoummaid, Uri Shalit", "tldr": "", "abstract": "We consider the task of learning how to act in collaboration with a human expert based on observational data. The task is motivated by high-stake scenarios such as healthcare and welfare where algorithmic action recommendations are made to a human expert, opening the option of deferring making a recommendation in cases where the human might act better on their own.\n This task is especially challenging when dealing with observational data, as using such data runs the risk of hidden confounders whose existence can lead to biased and harmful policies. However, unlike standard policy learning, the presence of a human expert can mitigate some of these risks. We build on the work of Mozannar and Sontag (2020) on consistent surrogate loss for learning with the option of deferral to an expert, where they solve a cost-sensitive supervised classification problem. Since we are solving a causal problem, where labels don\u2019t exist, we use a causal model to learn costs which are robust to a bounded degree of hidden confounding.\n We prove that our approach can take advantage of the strengths of both the model and the expert to obtain a better policy than either. We demonstrate our results by conducting experiments on synthetic and semi-synthetic data and show the advantages of our method compared to baselines.", "keywords": "policy learning;causal inference;sensitivity analysis;human-algorithm collaboration", "primary_area": "causal_inference", "supplementary_material": "/attachment/f4989a50707e637d261905f2ed4edcca62ad108d.zip", "author": "Marah Ghoummaid;Uri Shalit", "authorids": "~Marah_Ghoummaid1;~Uri_Shalit1", "gender": ";M", "homepage": ";", "dblp": "345/8137.html;87/7049", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.co.il/citations?user=aeGDj-IAAAAJ", "orcid": ";0000-0002-4026-2692", "linkedin": "marah-ghoummaid-aa2b29183/;", "or_profile": "~Marah_Ghoummaid1;~Uri_Shalit1", "aff": "Bosch;Technion", "aff_domain": "bosch.com;technion.ac.il", "position": "Intern;Associate Professor", "bibtex": "@inproceedings{\nghoummaid2024when,\ntitle={When to Act and When to Ask: Policy Learning With Deferral Under Hidden Confounding},\nauthor={Marah Ghoummaid and Uri Shalit},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=taI8M5DiXj}\n}", "github": "", "reviewers": "fb21;r89L;ukAA;Vchf;GgdC", "pdf_size": 879127, "rating": "4;5;6;6;7", "confidence": "5;5;4;4;4", "soundness": "3;3;3;2;3", "novelty": "2;3;2;3;3", "presentation": "2;2;3;2;3", "wc_summary": "93;49;64;72;129", "wc_strengths": "85;28;38;12;68", "wc_weaknesses": "395;208;41;34;12", "wc_questions": "2;88;76;499;56", "wc_limitations": "1;1;39;23;38", "wc_review": "576;374;258;640;303", "wc_reply_reviewers": "53;15;9;17;90", "wc_reply_authors": "352;0;0;435;48", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;1;1;2;2", "rating_avg": [ 5.6, 1.0198039027185568 ], "confidence_avg": [ 4.4, 0.48989794855663565 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 81.4, 27.717142709882634 ], "wc_strengths_avg": [ 46.2, 26.64132128855474 ], "wc_weaknesses_avg": [ 138.0, 146.32156368765337 ], "wc_questions_avg": [ 144.2, 179.82925234788692 ], "wc_limitations_avg": [ 20.4, 16.82379267585047 ], "wc_review_avg": [ 430.2, 151.1719550710382 ], "wc_reply_reviewers_avg": [ 36.8, 30.766215236847057 ], "wc_reply_authors_avg": [ 167.0, 187.6102342624197 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.8807048459279793, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:n4B4ZAAe2JIJ:scholar.google.com/&scioq=When+to+Act+and+When+to+Ask:+Policy+Learning+With+Deferral+Under+Hidden+Confounding&hl=en&as_sdt=0,33", "gs_version_total": 2, "email": "bosch.com;technion.ac.il", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Robert Bosch GmbH;Technion - Israel Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.bosch.com;https://www.technion.ac.il/en/", "aff_unique_abbr": "Bosch;Technion", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Germany;Israel" }, { "title": "UPS: Unified Projection Sharing for Lightweight Single-Image Super-resolution and Beyond", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93322", "id": "tacb2bFZcm", "proceeding": "", "pdf": "https://openreview.net/pdf?id=tacb2bFZcm", "openreview": "https://openreview.net/forum?id=tacb2bFZcm", "poster": "/media/PosterPDFs/NeurIPS%202024/93322.png?t=1731469443.54697", "project": "", "author_site": "Kun Zhou, Xinyu Lin, Zhonghang LIU, Xiaoguang Han, Jiangbo Lu", "tldr": "", "abstract": "To date, transformer-based frameworks have demonstrated impressive results in single-image super-resolution (SISR). However, under practical lightweight scenarios, the complex interaction of deep image feature extraction and similarity modeling limits the performance of these methods, since they require simultaneous layer-specific optimization of both two tasks. In this work, we introduce a novel Unified Projection Sharing algorithm(UPS) to decouple the feature extraction and similarity modeling, achieving notable performance. To do this, we establish a unified projection space defined by a learnable projection matrix, for similarity calculation across all self-attention layers. As a result, deep image feature extraction remains a per-layer optimization manner, while similarity modeling is carried out by projecting these image features onto the shared projection space. Extensive experiments demonstrate that our proposed UPS achieves state-of-the-art performance relative to leading lightweight SISR methods, as verified by various popular benchmarks. Moreover, our unified optimized projection space exhibits encouraging robustness performance for unseen data (degraded and depth images). Finally, UPS also demonstrates promising results across various image restoration tasks, including real-world and classic SISR, image denoising, and image deblocking.", "keywords": "Lightweight SISR;Projection Space", "primary_area": "machine_vision", "supplementary_material": "/attachment/c7c7687db5e1b9cab16d91c1b7e3c55e5a680d6d.zip", "author": "Kun Zhou;Xinyu Lin;Zhonghang LIU;Xiaoguang Han;Jiangbo Lu", "authorids": "~Kun_Zhou3;~Xinyu_Lin2;~Zhonghang_LIU1;~Xiaoguang_Han2;~Jiangbo_Lu1", "gender": "F;M;M;;M", "homepage": ";https://zhonghang-liu.github.io/homepage/;https://gaplab.cuhk.edu.cn/;https://sites.google.com/site/jiangbolu/;", "dblp": "32/3970;332/5803;60/8294;77/6697;", "google_scholar": "BVZIVQgAAAAJ;https://scholar.google.com/citations?view_op=list_works;;;https://scholar.google.com.hk/citations?user=OXCWQz0AAAAJ", "orcid": "0000-0003-0455-6199;;;;", "linkedin": ";;;;", "or_profile": "~Xinyu_Lin2;~Zhonghang_LIU1;~Xiaoguang_Han2;~Jiangbo_Lu1;~Zhou_Kun1", "aff": "The Chinese University of Hong Kong, Shenzhen;Singapore Management University;The Chinese University of Hong Kong, Shenzhen;SmartMore Corporation;The Chinese University of Hong Kong (Shenzhen\uff09", "aff_domain": "cuhk.edu.cn;smu.edu.sg;cuhk.edu.cn;smartmore.com;link.cuhk.edu.cn", "position": "PhD student;PhD student;Assistant Professor;Chief Technology Officer;PhD student", "bibtex": "@inproceedings{\nzhou2024ups,\ntitle={{UPS}: Unified Projection Sharing for Lightweight Single-Image Super-resolution and Beyond},\nauthor={Kun Zhou and Xinyu Lin and Zhonghang LIU and Xiaoguang Han and Jiangbo Lu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=tacb2bFZcm}\n}", "github": "", "reviewers": "HE6k;RnSV;fbd3;Pasd", "pdf_size": 2534307, "rating": "4;4;5;7", "confidence": "5;5;5;4", "soundness": "3;3;2;3", "novelty": "2;2;2;3", "presentation": "3;3;2;3", "wc_summary": "56;51;82;63", "wc_strengths": "54;33;50;26", "wc_weaknesses": "149;180;108;69", "wc_questions": "2;8;70;3", "wc_limitations": "1;3;6;18", "wc_review": "262;275;316;179", "wc_reply_reviewers": "0;76;15;15", "wc_reply_authors": "99;342;24;28", "reply_reviewers": "0;1;1;1", "reply_authors": "2;3;2;2", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 63.0, 11.76860229593982 ], "wc_strengths_avg": [ 40.75, 11.60549438843516 ], "wc_weaknesses_avg": [ 126.5, 41.88376773882693 ], "wc_questions_avg": [ 20.75, 28.525208149985513 ], "wc_limitations_avg": [ 7.0, 6.59545297913646 ], "wc_review_avg": [ 258.0, 49.77449145897927 ], "wc_reply_reviewers_avg": [ 26.5, 29.227555491350966 ], "wc_reply_authors_avg": [ 123.25, 129.77167449023688 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.9428090415820632, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13853535767708074457&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 2, "email": "cuhk.edu.cn;smu.edu.sg;cuhk.edu.cn;smartmore.com;link.cuhk.edu.cn", "author_num": 5, "aff_unique_index": "0;1;0;2;0", "aff_unique_norm": "Chinese University of Hong Kong;Singapore Management University;SmartMore Corporation", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cuhk.edu.cn;https://www.smu.edu.sg;https://www.smartmore.com/", "aff_unique_abbr": "CUHK;SMU;", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Shenzhen;", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "China;Singapore" }, { "title": "KALM: Knowledgeable Agents by Offline Reinforcement Learning from Large Language Model Rollouts", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93321", "id": "tb1MlJCY5g", "proceeding": "", "pdf": "https://openreview.net/pdf?id=tb1MlJCY5g", "openreview": "https://openreview.net/forum?id=tb1MlJCY5g", "poster": "", "project": "", "author_site": "Jing-Cheng Pang, Si-Hang Yang, Kaiyuan Li, Jiaji Zhang, Xiong-Hui Chen, Nan Tang, Yang Yu", "tldr": "", "abstract": "Reinforcement learning (RL) traditionally trains agents using interaction data, which limits their capabilities to the scope of the training data. To create more knowledgeable agents, leveraging knowledge from large language models (LLMs) has shown a promising way. Despite various attempts to combine LLMs with RL, there is commonly a semantic gap between action signals and LLM tokens, which hinders their integration. This paper introduces a novel approach, KALM (Knowledgeable Agents from Language Model Rollouts), to learn knowledgeable agents by bridging this gap. KALM extracts knowledge from LLMs in the form of imaginary rollouts, which agents can learn through offline RL. To overcome the limitation that LLMs are inherently text-based and may be incompatible with numerical environmental data, KALM fine-tunes the LLM to perform bidirectional translation between textual goals and rollouts. This process enables the LLM to understand the environment better, facilitating the generation of meaningful rollouts. Experiments on robotic manipulation tasks demonstrate that KALM allows agents to rephrase complex goals and tackle novel tasks requiring new optimal behaviors. KALM achieves a 46% success rate in completing 1400 various novel goals, significantly outperforming the 26% success rate of baseline methods. Project homepage: https://kalmneurips2024.github.io.", "keywords": "reinforcement learning;large language models;knowledgeable agents", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Jing-Cheng Pang;Si-Hang Yang;Kaiyuan Li;Jiaji Zhang;Xiong-Hui Chen;Nan Tang;Yang Yu", "authorids": "~Jing-Cheng_Pang1;~Si-Hang_Yang1;~Kaiyuan_Li2;~Jiaji_Zhang1;~Xiong-Hui_Chen1;~Nan_Tang4;~Yang_Yu5", "gender": "M;M;;M;M;M;", "homepage": "https://www.lamda.nju.edu.cn/pangjc;;;http://www.lamda.nju.edu.cn/zhangjj/;http://www.lamda.nju.edu.cn/chenxh/;https://www.lamda.nju.edu.cn/tangn/;", "dblp": "254/2679;;;;241/7938;;", "google_scholar": "R3Y_WrkAAAAJ;;;;H5pguCYAAAAJ;;", "orcid": ";;;;;;", "linkedin": ";si-hang-yang-aa0796235/;;;;;", "or_profile": "~Jing-Cheng_Pang1;~Si-Hang_Yang1;~Kaiyuan_Li2;~Jiaji_Zhang1;~Xiong-Hui_Chen1;~Nan_Tang4;~Yang_Yu5", "aff": "Nanjing University;;;Nanjing University;Nanjing University;Nanjing University;", "aff_domain": "nju.edu.cn;;;nju.edu.cn;nju.edu.cn;nju.edu.cn;", "position": "PhD student;;;MS student;PhD student;PhD student;", "bibtex": "@inproceedings{\npang2024kalm,\ntitle={{KALM}: Knowledgeable Agents by Offline Reinforcement Learning from Large Language Model Rollouts},\nauthor={Jing-Cheng Pang and Si-Hang Yang and Kaiyuan Li and Jiaji Zhang and Xiong-Hui Chen and Nan Tang and Yang Yu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=tb1MlJCY5g}\n}", "github": "", "reviewers": "bRuq;1PB3;JwPT;rv4h", "pdf_size": 3271692, "rating": "5;5;6;6", "confidence": "3;3;5;3", "soundness": "3;2;3;2", "novelty": "3;3;3;3", "presentation": "3;2;4;2", "wc_summary": "68;165;53;80", "wc_strengths": "26;62;62;140", "wc_weaknesses": "67;59;132;189", "wc_questions": "35;43;35;65", "wc_limitations": "30;15;4;11", "wc_review": "226;344;286;485", "wc_reply_reviewers": "27;37;63;17", "wc_reply_authors": "96;238;97;16", "reply_reviewers": "1;1;2;1", "reply_authors": "3;4;3;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 91.5, 43.5 ], "wc_strengths_avg": [ 72.5, 41.650330130744464 ], "wc_weaknesses_avg": [ 111.75, 52.826958080131774 ], "wc_questions_avg": [ 44.5, 12.278029157808675 ], "wc_limitations_avg": [ 15.0, 9.513148795220223 ], "wc_review_avg": [ 335.25, 95.99837238203573 ], "wc_reply_reviewers_avg": [ 36.0, 17.11724276862369 ], "wc_reply_authors_avg": [ 111.75, 79.95741053836099 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.0, 0.7071067811865476 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1504500513772795136&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "nju.edu.cn;;;nju.edu.cn;nju.edu.cn;nju.edu.cn;", "author_num": 7, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Nanjing University", "aff_unique_dep": "", "aff_unique_url": "https://www.nju.edu.cn", "aff_unique_abbr": "Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Reimagining Mutual Information for Enhanced Defense against Data Leakage in Collaborative Inference", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93320", "id": "tdZLKY9usl", "proceeding": "", "pdf": "https://openreview.net/pdf?id=tdZLKY9usl", "openreview": "https://openreview.net/forum?id=tdZLKY9usl", "poster": "/media/PosterPDFs/NeurIPS%202024/93320.png?t=1731644473.5911658", "project": "", "author_site": "Lin Duan, Jingwei Sun, Jinyuan Jia, Yiran Chen, Maria Gorlatova", "tldr": "", "abstract": "Edge-cloud collaborative inference empowers resource-limited IoT devices to support deep learning applications without disclosing their raw data to the cloud server, thus protecting user's data. Nevertheless, prior research has shown that collaborative inference still results in the exposure of input and predictions from edge devices. To defend against such data leakage in collaborative inference, we introduce InfoScissors, a defense strategy designed to reduce the mutual information between a model's intermediate outcomes and the device's input and predictions. We evaluate our defense on several datasets in the context of diverse attacks. Besides the empirical comparison, we provide a theoretical analysis of the inadequacies of recent defense strategies that also utilize mutual information, particularly focusing on those based on the Variational Information Bottleneck (VIB) approach. We illustrate the superiority of our method and offer a theoretical analysis of it.", "keywords": "Collaborative inference", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/37b2d1f6804c1fc7af9dec4b15d87ed28bb4fb3a.zip", "author": "Lin Duan;Jingwei Sun;Jinyuan Jia;Yiran Chen;Maria Gorlatova", "authorids": "~Lin_Duan1;~Jingwei_Sun2;~Jinyuan_Jia2;~Yiran_Chen1;~Maria_Gorlatova1", "gender": "F;M;;M;", "homepage": ";;https://jinyuan-jia.github.io/;https://ece.duke.edu/people/yiran-chen/;", "dblp": "203/8667;66/7761-2;24/5124-1.html;80/1641;", "google_scholar": "3KGmyogAAAAJ;https://scholar.google.com/citations?hl=zh-CN;iyg4ytkAAAAJ;;", "orcid": ";;0000-0002-9785-7769;0000-0002-1486-8412;", "linkedin": ";;;;", "or_profile": "~Lin_Duan1;~Jingwei_Sun2;~Jinyuan_Jia2;~Yiran_Chen1;~Maria_Gorlatova1", "aff": "Duke University;Duke University;Pennsylvania State University;Duke University;", "aff_domain": "duke.edu;duke.edu;psu.edu;duke.edu;", "position": "PhD student;PhD student;Assistant Professor;Professor;", "bibtex": "@inproceedings{\nduan2024reimagining,\ntitle={Reimagining Mutual Information for Enhanced Defense against Data Leakage in Collaborative Inference},\nauthor={Lin Duan and Jingwei Sun and Jinyuan Jia and Yiran Chen and Maria Gorlatova},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=tdZLKY9usl}\n}", "github": "", "reviewers": "B5D2;oDHU;cnFp", "pdf_size": 9103571, "rating": "4;5;7", "confidence": "4;3;4", "soundness": "2;3;3", "novelty": "2;3;3", "presentation": "2;3;3", "wc_summary": "101;30;94", "wc_strengths": "10;17;51", "wc_weaknesses": "229;42;25", "wc_questions": "3;1;356", "wc_limitations": "1;11;22", "wc_review": "344;101;548", "wc_reply_reviewers": "0;0;76", "wc_reply_authors": "55;60;14", "reply_reviewers": "0;0;1", "reply_authors": "2;2;2", "rating_avg": [ 5.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 75.0, 31.94787421201396 ], "wc_strengths_avg": [ 26.0, 17.90716802475106 ], "wc_weaknesses_avg": [ 98.66666666666667, 92.42053643836718 ], "wc_questions_avg": [ 120.0, 166.87919782485375 ], "wc_limitations_avg": [ 11.333333333333334, 8.576453553512405 ], "wc_review_avg": [ 331.0, 182.7183625145541 ], "wc_reply_reviewers_avg": [ 25.333333333333332, 35.82674358011841 ], "wc_reply_authors_avg": [ 43.0, 20.607442021431645 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.18898223650461363, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:KWxf64SiTWEJ:scholar.google.com/&scioq=Reimagining+Mutual+Information+for+Enhanced+Defense+against+Data+Leakage+in+Collaborative+Inference&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": "duke.edu;duke.edu;psu.edu;duke.edu;", "author_num": 5, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Duke University;Pennsylvania State University", "aff_unique_dep": ";", "aff_unique_url": "https://www.duke.edu;https://www.psu.edu", "aff_unique_abbr": "Duke;PSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Learning to Reason via Program Generation, Emulation, and Search", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93319", "id": "te6VagJf6G", "proceeding": "", "pdf": "https://openreview.net/pdf?id=te6VagJf6G", "openreview": "https://openreview.net/forum?id=te6VagJf6G", "poster": "/media/PosterPDFs/NeurIPS%202024/93319.png?t=1733805529.355358", "project": "", "author_site": "Nathaniel Weir, Muhammad Khalifa, Linlu Qiu, Orion Weller, Peter Clark", "tldr": "", "abstract": "Program synthesis with language models (LMs) has unlocked a large set of reasoning abilities; code-tuned LMs have proven adept at generating programs that solve a wide variety of algorithmic symbolic manipulation tasks (e.g. word concatenation). However, not all reasoning tasks are easily expressible as code, e.g. tasks involving commonsense reasoning, moral decision-making, and sarcasm understanding. Our goal is to extend a LM\u2019s program synthesis skills to such tasks and evaluate the results via pseudo-programs, namely Python programs where some leaf function calls are left undefined. To that end, we propose, Code Generation and Emulated EXecution (COGEX). COGEX works by (1) training LMs to generate pseudo-programs and (2) teaching them to emulate their generated program\u2019s execution, including those leaf functions, allowing the LM\u2019s knowledge to fill in the execution gaps; and (3) using them to search over many programs to find an optimal one. To adapt the COGEX model to a new task, we introduce a method for performing program search to find a single program whose pseudo-execution yields optimal performance when applied to all the instances of a given dataset. We show that our approach yields large improvements compared to standard in-context learning approaches on a battery of tasks, both algorithmic and soft reasoning. This result thus demonstrates that code synthesis can be applied to a much broader class of problems than previously considered.", "keywords": "language models;instruction tuning;code generation;reasoning;program search;program emulation", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Nathaniel Weir;Muhammad Khalifa;Linlu Qiu;Orion Weller;Peter Clark", "authorids": "~Nathaniel_Weir1;~Muhammad_Khalifa2;~Linlu_Qiu1;~Orion_Weller1;~Peter_Clark1", "gender": "M;M;F;M;M", "homepage": "https://cs.jhu.edu/~nweir;https://mukhal.github.io;https://linlu-qiu.github.io/;https://orionweller.github.io/;https://allenai.org/team/peterc", "dblp": "218/5179;246/4401;267/2348;248/7910;34/1184", "google_scholar": "I3rv7m8AAAAJ;tnmUr30AAAAJ;D1uOAWcAAAAJ;SYYd4iAAAAAJ;o-5vyEsAAAAJ", "orcid": ";;;;", "linkedin": ";muhammaad-khalifa-9a467b100/;;;peter-clark-a8b556/", "or_profile": "~Nathaniel_Weir1;~Muhammad_Khalifa2;~Linlu_Qiu1;~Orion_Weller1;~Peter_Clark1", "aff": "Johns Hopkins University;University of Michigan - Ann Arbor;Massachusetts Institute of Technology;Johns Hopkins University;Allen Institute for Artificial Intelligence", "aff_domain": "jhu.edu;umich.edu;mit.edu;jhu.edu;allenai.org", "position": "PhD student;PhD student;PhD student;PhD student;Senior Research Manager", "bibtex": "@inproceedings{\nweir2024learning,\ntitle={Learning to Reason via Program Generation, Emulation, and Search},\nauthor={Nathaniel Weir and Muhammad Khalifa and Linlu Qiu and Orion Weller and Peter Clark},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=te6VagJf6G}\n}", "github": "", "reviewers": "Hd75;KtaJ;a6dH", "pdf_size": 650183, "rating": "5;6;7", "confidence": "3;4;4", "soundness": "2;4;3", "novelty": "2;2;3", "presentation": "3;4;3", "wc_summary": "82;101;133", "wc_strengths": "24;70;109", "wc_weaknesses": "470;332;114", "wc_questions": "20;260;17", "wc_limitations": "45;16;57", "wc_review": "641;779;430", "wc_reply_reviewers": "43;0;0", "wc_reply_authors": "666;0;0", "reply_reviewers": "1;0;0", "reply_authors": "3;1;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 105.33333333333333, 21.044925490219462 ], "wc_strengths_avg": [ 67.66666666666667, 34.74030640177039 ], "wc_weaknesses_avg": [ 305.3333333333333, 146.55450256550367 ], "wc_questions_avg": [ 99.0, 113.85077953180645 ], "wc_limitations_avg": [ 39.333333333333336, 17.21110752456745 ], "wc_review_avg": [ 616.6666666666666, 143.51383982815813 ], "wc_reply_reviewers_avg": [ 14.333333333333334, 20.27039439401436 ], "wc_reply_authors_avg": [ 222.0, 313.9554108468271 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17544266945646871411&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 4, "email": "jhu.edu;umich.edu;mit.edu;jhu.edu;allenai.org", "author_num": 5, "aff_unique_index": "0;1;2;0;3", "aff_unique_norm": "Johns Hopkins University;University of Michigan;Massachusetts Institute of Technology;Allen Institute for Artificial Intelligence", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.jhu.edu;https://www.umich.edu;https://web.mit.edu;https://allenai.org", "aff_unique_abbr": "JHU;UM;MIT;AI2", "aff_campus_unique_index": "1", "aff_campus_unique": ";Ann Arbor", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Prediction with Action: Visual Policy Learning via Joint Denoising Process", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93318", "id": "teVxVdy8R2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=teVxVdy8R2", "openreview": "https://openreview.net/forum?id=teVxVdy8R2", "poster": "", "project": "", "author_site": "Yanjiang Guo, Yucheng Hu, Jianke Zhang, Yen-Jen Wang, Xiaoyu Chen, Chaochao Lu, Jianyu Chen", "tldr": "", "abstract": "Diffusion models have demonstrated remarkable capabilities in image generation tasks, including image editing and video creation, representing a good understanding of the physical world. On the other line, diffusion models have also shown promise in robotic control tasks by denoising actions, known as diffusion policy. Although the diffusion generative model and diffusion policy exhibit distinct capabilities\u2014image prediction and robotic action, respectively\u2014they technically follow similar denoising process. In robotic tasks, the ability to predict future images and generate actions is highly correlated since they share the same underlying dynamics of the physical world. Building on this insight, we introduce \\textbf{PAD}, a novel visual policy learning framework that unifies image \\textbf{P}rediction and robot \\textbf{A}ction within a joint \\textbf{D}enoising process. Specifically, PAD utilizes Diffusion Transformers (DiT) to seamlessly integrate images and robot states, enabling the simultaneous prediction of future images and robot actions. Additionally, PAD supports co-training on both robotic demonstrations and large-scale video datasets and can be easily extended to other robotic modalities, such as depth images. \nPAD outperforms previous methods, achieving a significant 38.9\\% relative improvement on the full Metaworld benchmark, by utilizing a single text-conditioned visual policy within a data-efficient imitation learning setting. Furthermore, PAD demonstrates superior generalization to unseen tasks in real-world robot manipulation settings with 28.0\\% success rate increase compared to the strongest baseline. \nVideos of PAD can be found at https://sites.google.com/view/pad-paper", "keywords": "Visual policy learning;diffusion;image generation", "primary_area": "robotics", "supplementary_material": "/attachment/b56068659dea16ffee93984832caa308416da444.zip", "author": "Yanjiang Guo;Yucheng Hu;Jianke Zhang;Yen-Jen Wang;Xiaoyu Chen;Chaochao Lu;Jianyu Chen", "authorids": "~Yanjiang_Guo1;~Yucheng_Hu1;~Jianke_Zhang1;~Yen-Jen_Wang1;~Xiaoyu_Chen4;~Chaochao_Lu1;~Jianyu_Chen1", "gender": "M;M;M;M;;;M", "homepage": "https://robert-gyj.github.io/;https://github.com/Hu-Yuch;;https://wangyenjen.github.io;https://github.com/Cospui;https://causallu.com/;http://people.iiis.tsinghua.edu.cn/~jychen/", "dblp": ";;;164/2206;;142/2790;", "google_scholar": "rBeZZPMAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;_U-HwfkAAAAJ;;C_Qxt0IAAAAJ;", "orcid": ";;;;;;", "linkedin": ";;;wangyenjen/;;;", "or_profile": "~Yanjiang_Guo1;~Yucheng_Hu1;~Jianke_Zhang1;~Yen-Jen_Wang1;~Xiaoyu_Chen4;~Chaochao_Lu1;~Jianyu_Chen1", "aff": "Tsinghua University;Tsinghua University;Beijing Institute of Technology;Tsinghua University;Tsinghua University;Shanghai AI Laboratory ;Tsinghua University", "aff_domain": "mails.tsinghua.edu.cn;mail.tsinghua.edu.cn;bit.edu.cn;mails.tsinghua.edu.cn;tsinghua.edu.cn;pjlab.org.cn;tsinghua.edu.cn", "position": "PhD student;Undergrad student;Undergrad student;MS student;Graduate student;Research Scientist;Assistant Professor", "bibtex": "@inproceedings{\nguo2024prediction,\ntitle={Prediction with Action: Visual Policy Learning via Joint Denoising Process},\nauthor={Yanjiang Guo and Yucheng Hu and Jianke Zhang and Yen-Jen Wang and Xiaoyu Chen and Chaochao Lu and Jianyu Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=teVxVdy8R2}\n}", "github": "", "reviewers": "GDHG;Hcpx;EGNn;yjvj", "pdf_size": 4154567, "rating": "4;6;7;8", "confidence": "5;3;3;4", "soundness": "2;2;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "143;230;72;76", "wc_strengths": "125;257;82;148", "wc_weaknesses": "616;502;49;148", "wc_questions": "100;149;19;93", "wc_limitations": "1;20;15;46", "wc_review": "985;1158;237;511", "wc_reply_reviewers": "559;116;20;163", "wc_reply_authors": "758;758;42;261", "reply_reviewers": "1;2;1;1", "reply_authors": "3;5;2;2", "rating_avg": [ 6.25, 1.479019945774904 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 130.25, 64.12634014194167 ], "wc_strengths_avg": [ 153.0, 64.54843142943135 ], "wc_weaknesses_avg": [ 328.75, 236.3571185727225 ], "wc_questions_avg": [ 90.25, 46.45091495331389 ], "wc_limitations_avg": [ 20.5, 16.28649747490233 ], "wc_review_avg": [ 722.75, 367.08607641805213 ], "wc_reply_reviewers_avg": [ 214.5, 205.46593391606308 ], "wc_reply_authors_avg": [ 454.75, 312.97873330307925 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.0, 1.224744871391589 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.560611910581388, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12150709914839766732&as_sdt=5,24&sciodt=0,24&hl=en", "gs_version_total": 4, "email": "mails.tsinghua.edu.cn;mail.tsinghua.edu.cn;bit.edu.cn;mails.tsinghua.edu.cn;tsinghua.edu.cn;pjlab.org.cn;tsinghua.edu.cn", "author_num": 7, "aff_unique_index": "0;0;1;0;0;2;0", "aff_unique_norm": "Tsinghua University;Beijing Institute of Technology;Shanghai AI Laboratory", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tsinghua.edu.cn;http://www.bit.edu.cn/;https://www.shanghai-ai-lab.com", "aff_unique_abbr": "THU;BIT;SAIL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "EigenVI: score-based variational inference with orthogonal function expansions", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93317", "id": "thUf6ZBlPp", "proceeding": "", "pdf": "https://openreview.net/pdf?id=thUf6ZBlPp", "openreview": "https://openreview.net/forum?id=thUf6ZBlPp", "poster": "/media/PosterPDFs/NeurIPS%202024/93317.png?t=1733945566.4559555", "project": "", "author_site": "Diana Cai, Chirag Modi, Charles Margossian, Robert Gower, David Blei, Lawrence Saul", "tldr": "", "abstract": "We develop EigenVI, an eigenvalue-based approach for black-box variational inference (BBVI). EigenVI constructs its variational approximations from orthogonal function expansions. For distributions over $\\mathbb{R}^D$, the lowest order term in these expansions provides a Gaussian variational approximation, while higher-order terms provide a systematic way to model non-Gaussianity. These approximations are flexible enough to model complex distributions (multimodal, asymmetric), but they are simple enough that one can calculate their low-order moments and draw samples from them. EigenVI can also model other types of random variables (e.g., nonnegative, bounded) by constructing variational approximations from different families of orthogonal functions. Within these families, EigenVI computes the variational approximation that best matches the score function of the target distribution by minimizing a stochastic estimate of the Fisher divergence. Notably, this optimization reduces to solving a minimum eigenvalue problem, so that EigenVI effectively sidesteps the iterative gradient-based optimizations that are required for many other BBVI algorithms. (Gradient-based methods can be sensitive to learning rates, termination criteria, and other tunable hyperparameters.) We use EigenVI to approximate a variety of target distributions, including a benchmark suite of Bayesian models from posteriordb. On these distributions, we find that EigenVI is more accurate than existing methods for Gaussian BBVI.", "keywords": "variational inference;black-box variational inference;Bayesian inference;probabilistic modeling;score-based divergence;score matching;non-Gaussian variational families", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Diana Cai;Chirag Modi;Charles Margossian;Robert M. Gower;David Blei;Lawrence K. Saul", "authorids": "~Diana_Cai1;~Chirag_Modi1;~Charles_Margossian1;~Robert_M._Gower1;~David_Blei2;~Lawrence_K._Saul3", "gender": "F;M;M;M;M;M", "homepage": "https://www.dianacai.com;;https://charlesm93.github.io./;https://gowerrobert.github.io/;http://www.cs.columbia.edu/~blei/;https://users.flatironinstitute.org/~lsaul/", "dblp": "191/6693;57/6166;;143/0056;86/1910;66/6611", "google_scholar": "WrLjBYgAAAAJ;yEh-Tj8AAAAJ;nPtLsvIAAAAJ;okKw87MAAAAJ;https://scholar.google.com.tw/citations?user=8OYE6iEAAAAJ;Xy7pzxoAAAAJ", "orcid": ";;0000-0002-3274-5619;;;", "linkedin": ";;charles-margossian-3428935b/;;;", "or_profile": "~Diana_Cai1;~Chirag_Modi1;~Charles_Margossian1;~Robert_M._Gower1;~David_Blei2;~Lawrence_Saul1", "aff": "Flatiron Institute;Simons Foundation;Flatiron Institute;Flatiron Institute;Columbia University;University of California, San Diego", "aff_domain": "flatiron.org;simonsfoundation.org;flatironinstitute.org;simonsfoundation.org;columbia.edu;ucsd.edu", "position": "Postdoc;Postdoc;Postdoc;Researcher;Full Professor;Full Professor", "bibtex": "@inproceedings{\ncai2024eigenvi,\ntitle={Eigen{VI}: score-based variational inference with orthogonal function expansions},\nauthor={Diana Cai and Chirag Modi and Charles Margossian and Robert M. Gower and David Blei and Lawrence K. Saul},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=thUf6ZBlPp}\n}", "github": "", "reviewers": "JZVK;jsA6;35nb;MKWn;XdZT", "pdf_size": 5530130, "rating": "5;6;7;7;7", "confidence": "4;4;4;5;3", "soundness": "3;3;3;3;4", "novelty": "2;2;4;3;3", "presentation": "4;4;4;3;3", "wc_summary": "56;46;70;56;47", "wc_strengths": "51;55;218;45;68", "wc_weaknesses": "663;107;547;2;45", "wc_questions": "72;33;176;204;50", "wc_limitations": "9;3;5;1;10", "wc_review": "851;244;1016;308;220", "wc_reply_reviewers": "180;28;31;10;25", "wc_reply_authors": "192;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;1;1;1;1", "rating_avg": [ 6.4, 0.7999999999999999 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.6, 0.4898979485566356 ], "wc_summary_avg": [ 55.0, 8.62554346113913 ], "wc_strengths_avg": [ 87.4, 65.73461797257211 ], "wc_weaknesses_avg": [ 272.8, 275.73784651367686 ], "wc_questions_avg": [ 107.0, 69.45502141674135 ], "wc_limitations_avg": [ 5.6, 3.4409301068170506 ], "wc_review_avg": [ 527.8, 336.5688042585052 ], "wc_reply_reviewers_avg": [ 54.8, 63.015553635590635 ], "wc_reply_authors_avg": [ 38.4, 76.80000000000001 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4270485430161626260&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "flatiron.org;simonsfoundation.org;flatironinstitute.org;simonsfoundation.org;columbia.edu;ucsd.edu", "author_num": 6, "aff_unique_index": "0;1;0;0;2;3", "aff_unique_norm": "Flatiron Institute;Simons Foundation;Columbia University;University of California, San Diego", "aff_unique_dep": ";;;", "aff_unique_url": "https://flatironinstitute.org;https://www.simonsfoundation.org;https://www.columbia.edu;https://www.ucsd.edu", "aff_unique_abbr": "Flatiron;Simons Foundation;Columbia;UCSD", "aff_campus_unique_index": "1", "aff_campus_unique": ";San Diego", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "From News to Forecast: Integrating Event Analysis in LLM-Based Time Series Forecasting with Reflection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93316", "id": "tj8nsfxi5r", "proceeding": "", "pdf": "https://openreview.net/pdf?id=tj8nsfxi5r", "openreview": "https://openreview.net/forum?id=tj8nsfxi5r", "poster": "/media/PosterPDFs/NeurIPS%202024/93316.png?t=1731546654.3019006", "project": "", "author_site": "Xinlei Wang, Maike Feng, Jing Qiu, Jinjin Gu, Junhua Zhao", "tldr": "", "abstract": "This paper introduces a novel approach that leverages Large Language Models (LLMs) and Generative Agents to enhance time series forecasting by reasoning across both text and time series data. With language as a medium, our method adaptively integrates social events into forecasting models, aligning news content with time series fluctuations to provide richer insights. Specifically, we utilize LLM-based agents to iteratively filter out irrelevant news and employ human-like reasoning to evaluate predictions. This enables the model to analyze complex events, such as unexpected incidents and shifts in social behavior, and continuously refine the selection logic of news and the robustness of the agent's output. By integrating selected news events with time series data, we fine-tune a pre-trained LLM to predict sequences of digits in time series. The results demonstrate significant improvements in forecasting accuracy, suggesting a potential paradigm shift in time series forecasting through the effective utilization of unstructured news data.", "keywords": "Large Language Model;Time Series Forecasting;AI Agent", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Xinlei Wang;Maike Feng;Jing Qiu;Jinjin Gu;Junhua Zhao", "authorids": "~Xinlei_Wang3;~Maike_Feng1;~Jing_Qiu3;~Jinjin_Gu1;~Junhua_Zhao1", "gender": "F;M;M;M;M", "homepage": ";;https://www.sydney.edu.au/engineering/about/our-people/academic-staff/jeremy-qiu.html;http://www.jasongt.com;https://www.zhaojunhua.org/", "dblp": ";;;209/5709;https://dblp.uni-trier.de/pid/73/3830-1.html", "google_scholar": "BfaMv18AAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=en;uMQ-G-QAAAAJ;M2oDRWEAAAAJ", "orcid": "0000-0003-4599-1218;;0000-0001-8507-0558;0000-0002-4389-6236;", "linkedin": ";;;jinjingu;", "or_profile": "~Xinlei_Wang3;~Maike_Feng1;~Jing_Qiu3;~Jinjin_Gu1;~Junhua_Zhao1", "aff": "University of Sydney, University of Sydney;The Chinese University of Hong Kong;University of Sydney, University of Sydney;University of Sydney;", "aff_domain": "sydney.edu.au;cuhk.edu.cn;usyd.edu.au;sydney.edu.au;", "position": "PhD student;MS student;Lecturer;PhD student;", "bibtex": "@inproceedings{\nwang2024from,\ntitle={From News to Forecast: Integrating Event Analysis in {LLM}-Based Time Series Forecasting with Reflection},\nauthor={Xinlei Wang and Maike Feng and Jing Qiu and Jinjin Gu and Junhua Zhao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=tj8nsfxi5r}\n}", "github": "", "reviewers": "pDM7;UwQn;MLnY;QJWf", "pdf_size": 3543329, "rating": "6;6;6;6", "confidence": "4;4;3;4", "soundness": "3;3;3;3", "novelty": "3;3;3;2", "presentation": "3;2;3;3", "wc_summary": "37;39;84;19", "wc_strengths": "47;89;81;30", "wc_weaknesses": "51;103;50;8", "wc_questions": "28;138;59;233", "wc_limitations": "7;16;27;1", "wc_review": "170;385;301;291", "wc_reply_reviewers": "16;30;24;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 44.75, 23.962209831315644 ], "wc_strengths_avg": [ 61.75, 24.180312239505923 ], "wc_weaknesses_avg": [ 53.0, 33.68233958619858 ], "wc_questions_avg": [ 114.5, 79.30479178460781 ], "wc_limitations_avg": [ 12.75, 9.807522622966516 ], "wc_review_avg": [ 286.75, 76.65629458824631 ], "wc_reply_reviewers_avg": [ 17.5, 11.258330249197702 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14432184214764017050&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "sydney.edu.au;cuhk.edu.cn;usyd.edu.au;sydney.edu.au;", "author_num": 5, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Sydney;Chinese University of Hong Kong", "aff_unique_dep": ";", "aff_unique_url": "https://www.sydney.edu.au;https://www.cuhk.edu.hk", "aff_unique_abbr": "USYD;CUHK", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "Australia;China" }, { "title": "Dynamic Conditional Optimal Transport through Simulation-Free Flows", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93315", "id": "tk0uaRynhH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=tk0uaRynhH", "openreview": "https://openreview.net/forum?id=tk0uaRynhH", "poster": "", "project": "", "author_site": "Gavin Kerrigan, Giosue Migliorini, Padhraic Smyth", "tldr": "", "abstract": "We study the geometry of conditional optimal transport (COT) and prove a dynamic formulation which generalizes the Benamou-Brenier Theorem. Equipped with these tools, we propose a simulation-free flow-based method for conditional generative modeling. Our method couples an arbitrary source distribution to a specified target distribution through a triangular COT plan, and a conditional generative model is obtained by approximating the geodesic path of measures induced by this COT plan. Our theory and methods are applicable in infinite-dimensional settings, making them well suited for a wide class of Bayesian inverse problems. Empirically, we demonstrate that our method is competitive on several challenging conditional generation tasks, including an infinite-dimensional inverse problem.", "keywords": "flow matching;optimal transport;generative models;conditional generation", "primary_area": "generative_models", "supplementary_material": "", "author": "Gavin Kerrigan;Giosue Migliorini;Padhraic Smyth", "authorids": "~Gavin_Kerrigan1;~Giosue_Migliorini1;~Padhraic_Smyth1", "gender": "M;M;M", "homepage": "https://gavinkerrigan.github.io/;;https://www.ics.uci.edu/~smyth", "dblp": "274/1893;348/6562;s/PadhraicSmyth", "google_scholar": "2F2XCy8AAAAJ;;OsoQ-dcAAAAJ", "orcid": ";;0000-0001-9971-8378", "linkedin": ";giosu%C3%A8-migliorini-520126197/;", "or_profile": "~Gavin_Kerrigan1;~Giosue_Migliorini1;~Padhraic_Smyth1", "aff": "University of California, Irvine;University of California, Irvine;University of California, Irvine", "aff_domain": "uci.edu;uci.edu;uci.edu", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nkerrigan2024dynamic,\ntitle={Dynamic Conditional Optimal Transport through Simulation-Free Flows},\nauthor={Gavin Kerrigan and Giosue Migliorini and Padhraic Smyth},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=tk0uaRynhH}\n}", "github": "", "reviewers": "i1c7;hqSP;QX6V;yVF4", "pdf_size": 1737035, "rating": "4;5;6;8", "confidence": "3;3;2;5", "soundness": "2;3;4;4", "novelty": "2;2;3;3", "presentation": "3;2;3;4", "wc_summary": "81;29;59;79", "wc_strengths": "46;23;51;36", "wc_weaknesses": "20;212;73;2", "wc_questions": "182;34;42;248", "wc_limitations": "12;7;2;1", "wc_review": "341;305;227;366", "wc_reply_reviewers": "35;107;30;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 1.479019945774904 ], "confidence_avg": [ 3.25, 1.0897247358851685 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 62.0, 20.904544960366874 ], "wc_strengths_avg": [ 39.0, 10.700467279516348 ], "wc_weaknesses_avg": [ 76.75, 82.33278508589395 ], "wc_questions_avg": [ 126.5, 91.56828053425487 ], "wc_limitations_avg": [ 5.5, 4.387482193696061 ], "wc_review_avg": [ 309.75, 52.466060458166666 ], "wc_reply_reviewers_avg": [ 43.0, 39.30012722625717 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.659231724180059, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12230147221243367546&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "uci.edu;uci.edu;uci.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of California, Irvine", "aff_unique_dep": "", "aff_unique_url": "https://www.uci.edu", "aff_unique_abbr": "UCI", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Irvine", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Einsum Benchmark: Enabling the Development of Next-Generation Tensor Execution Engines", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97464", "id": "tllpLtt14h", "proceeding": "", "pdf": "https://openreview.net/pdf?id=tllpLtt14h", "openreview": "https://openreview.net/forum?id=tllpLtt14h", "poster": "/media/PosterPDFs/NeurIPS%202024/97464.png?t=1731516030.573483", "project": "", "author_site": "Mark Blacher, Christoph Staudt, Julien Klaus, Maurice Wenig, Niklas Merk, Alexander Breuer, Max Engel, S\u00f6ren Laue, Joachim Giesen", "tldr": "", "abstract": "Modern artificial intelligence and machine learning workflows rely on efficient tensor libraries. However, tuning tensor libraries without considering the actual problems they are meant to execute can lead to a mismatch between expected performance and the actual performance. Einsum libraries are tuned to efficiently execute tensor expressions with only a few, relatively large, dense, floating-point tensors. But, practical applications of einsum cover a much broader range of tensor expressions than those that can currently be executed efficiently. For this reason, we have created a benchmark dataset that encompasses this broad range of tensor expressions, allowing future implementations of einsum to build upon and be evaluated against. In addition, we also provide generators for einsum expressions and converters to einsum expressions in our repository, so that additional data can be generated as needed. The benchmark dataset, the generators and converters are released openly and are publicly available at https://benchmark.einsum.org.", "keywords": "Einsum;Dataset;Benchmark;Tensor Operations;Contraction Paths;Tensor Networks", "primary_area": "", "supplementary_material": "/attachment/ea4e2cf86959d5c0363601bdad1cc1356b01a855.pdf", "author": "Mark Blacher;Christoph Staudt;Julien Klaus;Maurice Wenig;Niklas Merk;Alexander Breuer;Max Engel;S\u00f6ren Laue;Joachim Giesen", "authorids": "~Mark_Blacher1;~Christoph_Staudt1;~Julien_Klaus1;~Maurice_Wenig1;~Niklas_Merk1;~Alexander_Breuer1;~Max_Engel1;~S\u00f6ren_Laue1;~Joachim_Giesen1", "gender": "M;M;M;M;M;;;M;", "homepage": ";;https://www.ti2.uni-jena.de/julien-klaus;https://www.ti2.uni-jena.de/team/maurice-wenig;https://www.ti2.uni-jena.de/team/niklas-merk;https://scalable.uni-jena.de;;https://www.inf.uni-hamburg.de/en/inst/ab/ml/people/laue.html;https://www.ti2.uni-jena.de/team/prof-joachim-giesen", "dblp": "277/9082;299/5431;246/1576.html;;;;;71/5078;30/3504", "google_scholar": ";;cY_fvjsAAAAJ;;;;;https://scholar.google.de/citations?user=XLOcv_sAAAAJ;", "orcid": ";0009-0000-4250-546X;0000-0002-1498-2653;;;;;;", "linkedin": ";;julien-klaus-679a041a6/;;;;;;", "or_profile": "~Mark_Blacher1;~Christoph_Staudt1;~Julien_Klaus1;~Maurice_Wenig1;~Niklas_Merk1;~Alexander_Breuer1;~Max_Engel1;~S\u00f6ren_Laue1;~Joachim_Giesen1", "aff": "University of Jena;Friedrich-Schiller Universit\u00e4t Jena;Friedrich-Schiller Universit\u00e4t Jena;Friedrich-Schiller Universit\u00e4t Jena;Friedrich-Schiller Universit\u00e4t Jena;Friedrich-Schiller Universit\u00e4t Jena;;Universit\u00e4t Hamburg;University of Jena", "aff_domain": "uni-jena.de;uni-jena.de;uni-jena.de;uni-jena.de;uni-jena.de;uni-jena.de;;uni-hamburg.de;uni-jena.de", "position": "PhD student;PhD student;Postdoc;PhD student;PhD student;Associate Professor;;Full Professor;Full Professor", "bibtex": "@inproceedings{\nblacher2024einsum,\ntitle={Einsum Benchmark: Enabling the Development of Next-Generation Tensor Execution Engines},\nauthor={Mark Blacher and Christoph Staudt and Julien Klaus and Maurice Wenig and Niklas Merk and Alexander Breuer and Max Engel and S{\\\"o}ren Laue and Joachim Giesen},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=tllpLtt14h}\n}", "github": "", "reviewers": "3ZT8;zEK9;GRgD;Su7F", "pdf_size": 318685, "rating": "7;7;7;8", "confidence": "3;4;4;4", "wc_summary_and_contributions": "108;66;92;57", "wc_strengths": "47;36;75;22", "wc_improvement": "107;83;46;29", "wc_limitations": "10;5;5;4", "wc_correctness": "10;6;9;6", "wc_clarity": "5;16;4;1", "wc_relation_to_prior_work": "33;1;57;1", "wc_documentation": "8;22;4;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "329;236;293;122", "wc_reply_reviewers": "0;0;169;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 80.75, 20.314711418083203 ], "wc_strengths_avg": [ 45.0, 19.45507645834372 ], "wc_improvement_avg": [ 66.25, 30.572659354397025 ], "wc_limitations_avg": [ 6.0, 2.345207879911715 ], "wc_correctness_avg": [ 7.75, 1.7853571071357126 ], "wc_clarity_avg": [ 6.5, 5.678908345800274 ], "wc_relation_to_prior_work_avg": [ 23.0, 23.57965224510319 ], "wc_documentation_avg": [ 8.75, 8.042853971072706 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 245.0, 78.3741028656788 ], "wc_reply_reviewers_avg": [ 42.25, 73.17914661978507 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4405782231618517294&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "uni-jena.de;uni-jena.de;uni-jena.de;uni-jena.de;uni-jena.de;uni-jena.de;;uni-hamburg.de;uni-jena.de", "author_num": 9, "aff_unique_index": "0;1;1;1;1;1;2;0", "aff_unique_norm": "Friedrich Schiller University Jena;Friedrich-Schiller-Universit\u00e4t Jena;University of Hamburg", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uni-jena.de/;https://www.uni-jena.de;https://www.uni-hamburg.de", "aff_unique_abbr": "FSU Jena;FSU Jena;UHH", "aff_campus_unique_index": "0;0;0;0;0;0;0", "aff_campus_unique": "Jena;", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "Germany" }, { "title": "Adaptive Variance Reduction for Stochastic Optimization under Weaker Assumptions", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93314", "id": "tmQH8prqLc", "proceeding": "", "pdf": "https://openreview.net/pdf?id=tmQH8prqLc", "openreview": "https://openreview.net/forum?id=tmQH8prqLc", "poster": "/media/PosterPDFs/NeurIPS%202024/93314.png?t=1730355828.2183118", "project": "", "author_site": "Wei Jiang, Sifan Yang, Yibo Wang, Lijun Zhang", "tldr": "", "abstract": "This paper explores adaptive variance reduction methods for stochastic optimization based on the STORM technique. Existing adaptive extensions of STORM rely on strong assumptions like bounded gradients and bounded function values, or suffer an additional $\\mathcal{O}(\\log T)$ term in the convergence rate. To address these limitations, we introduce a novel adaptive STORM method that achieves an optimal convergence rate of $\\mathcal{O}(T^{-1/3})$ for non-convex functions with our newly designed learning rate strategy. Compared with existing approaches, our method requires weaker assumptions and attains the optimal convergence rate without the additional $\\mathcal{O}(\\log T)$ term. We also extend the proposed technique to stochastic compositional optimization, obtaining the same optimal rate of $\\mathcal{O}(T^{-1/3})$. Furthermore, we investigate the non-convex finite-sum problem and develop another innovative adaptive variance reduction method that achieves an optimal convergence rate of $\\mathcal{O}(n^{1/4} T^{-1/2} )$, where $n$ represents the number of component functions. Numerical experiments across various tasks validate the effectiveness of our method.", "keywords": "Adaptive methods;variance reduction;finite-sum optimization;stochastic compositional optimization", "primary_area": "optimization", "supplementary_material": "", "author": "Wei Jiang;Sifan Yang;Yibo Wang;Lijun Zhang", "authorids": "~Wei_Jiang8;~Sifan_Yang2;~Yibo_Wang2;~Lijun_Zhang1", "gender": "M;M;;", "homepage": "http://www.lamda.nju.edu.cn/jiangw/?AspxAutoDetectCookieSupport=1;https://www.lamda.nju.edu.cn/yangsf/;;", "dblp": ";251/2905;;", "google_scholar": ";qTISlvMAAAAJ;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Wei_Jiang8;~Sifan_Yang2;~Yibo_Wang2;~Lijun_Zhang1", "aff": "Nanjing University;Nanjing University;;", "aff_domain": "nju.edu.cn;nju.edu.cn;;", "position": "PhD student;MS student;;", "bibtex": "@inproceedings{\njiang2024adaptive,\ntitle={Adaptive Variance Reduction for Stochastic Optimization under Weaker Assumptions},\nauthor={Wei Jiang and Sifan Yang and Yibo Wang and Lijun Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=tmQH8prqLc}\n}", "github": "", "reviewers": "G4tN;4Ggd;2239;qB9i", "pdf_size": 807354, "rating": "6;7;7;7", "confidence": "3;4;2;3", "soundness": "3;4;3;3", "novelty": "2;4;4;3", "presentation": "2;4;3;3", "wc_summary": "100;85;36;86", "wc_strengths": "100;107;81;130", "wc_weaknesses": "233;111;74;196", "wc_questions": "2;32;57;86", "wc_limitations": "1;9;15;1", "wc_review": "436;344;263;499", "wc_reply_reviewers": "31;18;10;175", "wc_reply_authors": "18;0;0;21", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;2", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 76.75, 24.262883175748097 ], "wc_strengths_avg": [ 104.5, 17.528548142958105 ], "wc_weaknesses_avg": [ 153.5, 63.7436271324436 ], "wc_questions_avg": [ 44.25, 30.986892390170397 ], "wc_limitations_avg": [ 6.5, 5.894913061275798 ], "wc_review_avg": [ 385.5, 89.66744113668015 ], "wc_reply_reviewers_avg": [ 58.5, 67.67754428168918 ], "wc_reply_authors_avg": [ 9.75, 9.807522622966516 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=513085106515537651&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "nju.edu.cn;nju.edu.cn;;", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "Nanjing University", "aff_unique_dep": "", "aff_unique_url": "https://www.nju.edu.cn", "aff_unique_abbr": "Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Evaluation of Text-to-Video Generation Models: A Dynamics Perspective", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93313", "id": "tmX1AUmkl6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=tmX1AUmkl6", "openreview": "https://openreview.net/forum?id=tmX1AUmkl6", "poster": "", "project": "", "author_site": "Mingxiang Liao, hannan lu, Qixiang Ye, Wangmeng Zuo, Fang Wan, Tianyu Wang, Yuzhong Zhao, Jingdong Wang, Xinyu Zhang", "tldr": "", "abstract": "Comprehensive and constructive evaluation protocols play an important role when developing sophisticated text-to-video (T2V) generation models. Existing evaluation protocols primarily focus on temporal consistency and content continuity, yet largely ignore dynamics of video content. Such dynamics is an essential dimension measuring the visual vividness and the honesty of video content to text prompts. In this study, we propose an effective evaluation protocol, termed DEVIL, which centers on the dynamics dimension to evaluate T2V generation models, as well as improving existing evaluation metrics. In practice, we define a set of dynamics scores corresponding to multiple temporal granularities, and a new benchmark of text prompts under multiple dynamics grades. Upon the text prompt benchmark, we assess the generation capacity of T2V models, characterized by metrics of dynamics ranges and T2V alignment. Moreover, we analyze the relevance of existing metrics to dynamics metrics, improving them from the perspective of dynamics. Experiments show that DEVIL evaluation metrics enjoy up to about 90\\% consistency with human ratings, demonstrating the potential to advance T2V generation models.", "keywords": "Video generation model;dynamics evalution", "primary_area": "evaluation", "supplementary_material": "/attachment/9e07a137aa1c283d99a0a83033a635cba2ccb4c4.zip", "author": "Mingxiang Liao;Hannan Lu;Qixiang Ye;Wangmeng Zuo;Fang Wan;Tianyu Wang;Yuzhong Zhao;Jingdong Wang;Xinyu Zhang", "authorids": "~Mingxiang_Liao1;~Hannan_Lu1;~Qixiang_Ye1;~Wangmeng_Zuo3;~Fang_Wan1;~Tianyu_Wang12;~Yuzhong_Zhao1;~Jingdong_Wang1;~Xinyu_Zhang3", "gender": "M;M;M;M;M;M;M;M;", "homepage": "https://github.com/MingXiangL;;http://people.ucas.ac.cn/~qxye?language=en;;https://people.ucas.ac.cn/~wanfang?language=en;https://github.com/TianyuWangUCAS;https://callsys.github.io/zhaoyuzhong.github.io-main/;https://jingdongwang2017.github.io/;", "dblp": ";239/4378;06/4335;93/2671;;;42/8750;49/3441;", "google_scholar": ";;https://scholar.google.com.hk/citations?user=tjEfgsEAAAAJ;rUOpCEYAAAAJ;https://scholar.google.com.hk/citations?user=0IKavloAAAAJ;;tStQNm4AAAAJ;z5SPCmgAAAAJ;", "orcid": ";;;0000-0002-3330-783X;0000-0002-8083-9257;;0000-0002-2425-6786;0000-0002-4888-4445;", "linkedin": ";;;;;;;;", "or_profile": "~Mingxiang_Liao1;~Hannan_Lu1;~Qixiang_Ye1;~Wangmeng_Zuo3;~Fang_Wan1;~Tianyu_Wang12;~Yuzhong_Zhao1;~Jingdong_Wang1;~Xinyu_Zhang3", "aff": "University of Chinese Academy of Sciences;Harbin Institution of Technology;University of Chinese Academy of Sciences;Harbin Institute of Technology;University of Chinese Academy of Sciences;University of Chinese Academy of Sciences;University of Chinese Academy of Sciences;Baidu;", "aff_domain": "ucas.ac.cn;hit.edu.cn;ucas.ac.cn;hit.edu.cn;ucas.ac.cn;mails.ucas.ac.cn;ucas.ac.cn;baidu.com;", "position": "PhD student;PhD student;Full Professor;Full Professor;Assistant Professor;PhD student;PhD student;Chief Scientist for Computer Vision;", "bibtex": "@inproceedings{\nliao2024evaluation,\ntitle={Evaluation of Text-to-Video Generation Models: A Dynamics Perspective},\nauthor={Mingxiang Liao and Hannan Lu and Qixiang Ye and Wangmeng Zuo and Fang Wan and Tianyu Wang and Yuzhong Zhao and Jingdong Wang and Xinyu Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=tmX1AUmkl6}\n}", "github": "", "reviewers": "Kt6U;7oNg;VYfN;HC61", "pdf_size": 12315751, "rating": "5;5;6;7", "confidence": "3;5;3;5", "soundness": "2;2;3;4", "novelty": "2;2;3;3", "presentation": "2;3;3;3", "wc_summary": "42;144;111;131", "wc_strengths": "64;184;126;65", "wc_weaknesses": "188;300;216;220", "wc_questions": "90;5;5;184", "wc_limitations": "1;11;8;1", "wc_review": "385;644;466;601", "wc_reply_reviewers": "45;21;79;0", "wc_reply_authors": "281;42;38;0", "reply_reviewers": "3;1;1;0", "reply_authors": "5;3;2;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 1.0 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 107.0, 39.32556420447137 ], "wc_strengths_avg": [ 109.75, 49.68085647409875 ], "wc_weaknesses_avg": [ 231.0, 41.701318923986086 ], "wc_questions_avg": [ 71.0, 73.8951960549534 ], "wc_limitations_avg": [ 5.25, 4.380353866983808 ], "wc_review_avg": [ 524.0, 103.69908389180688 ], "wc_reply_reviewers_avg": [ 36.25, 29.371542349696245 ], "wc_reply_authors_avg": [ 90.25, 111.3426580426388 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.75, 1.479019945774904 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4406776571661738626&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "ucas.ac.cn;hit.edu.cn;ucas.ac.cn;hit.edu.cn;ucas.ac.cn;mails.ucas.ac.cn;ucas.ac.cn;baidu.com;", "author_num": 9, "aff_unique_index": "0;1;0;1;0;0;0;2", "aff_unique_norm": "University of Chinese Academy of Sciences;Harbin Institute of Technology;Baidu", "aff_unique_dep": ";;Baidu, Inc.", "aff_unique_url": "http://www.ucas.ac.cn;http://www.hit.edu.cn/;https://www.baidu.com", "aff_unique_abbr": "UCAS;HIT;Baidu", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Harbin", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "TransAgent: Transfer Vision-Language Foundation Models with Heterogeneous Agent Collaboration", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93312", "id": "tnQbciDjVf", "proceeding": "", "pdf": "https://openreview.net/pdf?id=tnQbciDjVf", "openreview": "https://openreview.net/forum?id=tnQbciDjVf", "poster": "/media/PosterPDFs/NeurIPS%202024/93312.png?t=1730775218.2159512", "project": "", "author_site": "Yiwei Guo, Shaobin Zhuang, Kunchang Li, Yu Qiao, Yali Wang", "tldr": "", "abstract": "Vision-language foundation models (such as CLIP) have recently shown their power in transfer learning, owing to large-scale image-text pre-training. However, target domain data in the downstream tasks can be highly different from the pre-training phase, which makes it hard for such a single model to generalize well. Alternatively, there exists a wide range of expert models that contain diversified vision and/or language knowledge pre-trained on different modalities, tasks, networks, and datasets. Unfortunately, these models are \"isolated agents\" with heterogeneous structures, and how to integrate their knowledge for generalizing CLIP-like models has not been fully explored. To bridge this gap, we propose a general and concise TransAgent framework, which transports the knowledge of the isolated agents in a unified manner, and effectively guides CLIP to generalize with multi-source knowledge distillation. With such a distinct framework, we flexibly collaborate with 11 heterogeneous agents to empower vision-language foundation models, without further cost in the inference phase. Finally, our TransAgent achieves state-of-the-art performance on 11 visual recognition datasets. Under the same low-shot setting, it outperforms the popular CoOp with around 10\\% on average, and 20\\% on EuroSAT which contains large domain shifts.", "keywords": "Vision-Language Models;Few-shot Transfer Learning;Heterogeneous Agent Collaboration", "primary_area": "machine_vision", "supplementary_material": "", "author": "Yiwei Guo;Shaobin Zhuang;Kunchang Li;Yu Qiao;Yali Wang", "authorids": "~Yiwei_Guo2;~Shaobin_Zhuang1;~Kunchang_Li1;~Yu_Qiao1;~Yali_Wang1", "gender": "M;M;M;;M", "homepage": "https://github.com/markywg;https://github.com/897295875;https://andy1621.github.io/;;", "dblp": ";;;;01/773-1", "google_scholar": "HCAyeJIAAAAJ;;D4tLSbsAAAAJ;;https://scholar.google.com/citations?hl=en", "orcid": "0009-0008-5757-6605;;0000-0001-5612-0341;;", "linkedin": ";;%E6%98%86%E6%98%8C-%E9%BB%8E-2a4a951b2/;;", "or_profile": "~Yiwei_Guo2;~Shaobin_Zhuang1;~Kunchang_Li1;~Yu_Qiao1;~Yali_Wang1", "aff": "Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences;Shanghai Jiaotong University;Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Chinese Academy of Sciences;;SIAT, Chinese Academy of Sciences", "aff_domain": "siat.ac.cn;sjtu.edu.cn;siat.ac.cn;;siat.ac.cn", "position": "MS student;PhD student;PhD student;;Full Professor", "bibtex": "@inproceedings{\nguo2024transagent,\ntitle={TransAgent: Transfer Vision-Language Foundation Models with Heterogeneous Agent Collaboration},\nauthor={Yiwei Guo and Shaobin Zhuang and Kunchang Li and Yu Qiao and Yali Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=tnQbciDjVf}\n}", "github": "", "reviewers": "wEnm;A3Ab;gY7M;QaNB;DUuV", "pdf_size": 1214785, "rating": "3;5;5;5;6", "confidence": "4;2;4;4;5", "soundness": "3;3;3;4;2", "novelty": "2;2;2;3;3", "presentation": "2;3;3;3;3", "wc_summary": "92;154;54;52;48", "wc_strengths": "20;122;56;44;44", "wc_weaknesses": "145;54;62;116;256", "wc_questions": "4;70;5;60;6", "wc_limitations": "14;7;1;18;7", "wc_review": "275;407;178;290;361", "wc_reply_reviewers": "0;0;0;0;21", "wc_reply_authors": "0;0;0;0;16", "reply_reviewers": "0;0;0;0;1", "reply_authors": "1;1;1;1;2", "rating_avg": [ 4.8, 0.9797958971132712 ], "confidence_avg": [ 3.8, 0.9797958971132712 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 80.0, 40.25916044827562 ], "wc_strengths_avg": [ 57.2, 34.44648022657758 ], "wc_weaknesses_avg": [ 126.6, 72.99205436210164 ], "wc_questions_avg": [ 29.0, 29.570255325241952 ], "wc_limitations_avg": [ 9.4, 5.9531504264548865 ], "wc_review_avg": [ 302.2, 78.42805620439665 ], "wc_reply_reviewers_avg": [ 4.2, 8.4 ], "wc_reply_authors_avg": [ 3.2, 6.4 ], "reply_reviewers_avg": [ 0.2, 0.4 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.16666666666666666, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:dT5CcaDMaaMJ:scholar.google.com/&scioq=TransAgent:+Transfer+Vision-Language+Foundation+Models+with+Heterogeneous+Agent+Collaboration&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "siat.ac.cn;sjtu.edu.cn;siat.ac.cn;;siat.ac.cn", "author_num": 5, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Chinese Academy of Sciences;Shanghai Jiao Tong University;Shenzhen Institute of Advanced Technology", "aff_unique_dep": "Shenzhen Institutes of Advanced Technology;;", "aff_unique_url": "http://www.siat.cas.cn;https://www.sjtu.edu.cn;http://www.siat.ac.cn", "aff_unique_abbr": "SIAT;SJTU;SIAT", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Shenzhen;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Get Rid of Isolation: A Continuous Multi-task Spatio-Temporal Learning Framework", "status": "Oral", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93311", "id": "tnh4LK72yj", "proceeding": "", "pdf": "https://openreview.net/pdf?id=tnh4LK72yj", "openreview": "https://openreview.net/forum?id=tnh4LK72yj", "poster": "/media/PosterPDFs/NeurIPS%202024/93311.png?t=1730103935.9105084", "project": "", "author_site": "Zhongchao Yi, Zhengyang Zhou, Qihe Huang, Yanjiang Chen, Liheng Yu, Xu Wang, Yang Wang", "tldr": "", "abstract": "Spatiotemporal learning has become a pivotal technique to enable urban intelligence. Traditional spatiotemporal models mostly focus on a specific task by assuming a same distribution between training and testing sets. However, given that urban systems are usually dynamic, multi-sourced with imbalanced data distributions, current specific task-specific models fail to generalize to new urban conditions and adapt to new domains without explicitly modeling interdependencies across various dimensions and types of urban data. To this end, we argue that there is an essential to propose a Continuous Multi-task Spatio-Temporal learning framework (CMuST) to empower collective urban intelligence, which reforms the urban spatiotemporal learning from single-domain to cooperatively multi-dimensional and multi-task learning. Specifically, CMuST proposes a new multi-dimensional spatiotemporal interaction network (MSTI) to allow cross-interactions between context and main observations as well as self-interactions within spatial and temporal aspects to be exposed, which is also the core for capturing task-level commonality and personalization. To ensure continuous task learning, a novel Rolling Adaptation training scheme (RoAda) is devised, which not only preserves task uniqueness by constructing data summarization-driven task prompts, but also harnesses correlated patterns among tasks by iterative model behavior modeling. We further establish a benchmark of three cities for multi-task spatiotemporal learning, and empirically demonstrate the superiority of CMuST via extensive evaluations on these datasets. The impressive improvements on both few-shot streaming data and new domain tasks against existing SOAT methods are achieved. Code is available at https://github.com/DILab-USTCSZ/CMuST.", "keywords": "continuous multi-task learning;spatio-temporal forecasting;urban intelligence", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Zhongchao Yi;Zhengyang Zhou;Qihe Huang;Yanjiang Chen;Liheng Yu;Xu Wang;Yang Wang", "authorids": "~Zhongchao_Yi1;~Zhengyang_Zhou1;~Qihe_Huang2;~Yanjiang_Chen1;~Liheng_Yu1;~Xu_Wang16;~Yang_Wang32", "gender": "M;M;M;M;M;M;M", "homepage": "https://di.ustc.edu.cn/main.htm;http://home.ustc.edu.cn/~zzy0929/Home/;;;https://di.ustc.edu.cn/main.htm;http://home.ustc.edu.cn/~wx309/;http://staff.ustc.edu.cn/~angyan/", "dblp": ";246/8238;;;;181/2815-29;", "google_scholar": ";dPElQLUAAAAJ;;;;7hYGPC8AAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";0000-0003-4728-7347;0000-0001-8960-6583;0009-0000-5314-6052;0009-0009-2253-4461;0000-0002-1492-3477;0000-0002-6079-7053", "linkedin": ";;;;;;", "or_profile": "~Zhongchao_Yi1;~Zhengyang_Zhou1;~Qihe_Huang2;~Yanjiang_Chen1;~Liheng_Yu1;~Xu_Wang16;~Yang_Wang32", "aff": "University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China", "aff_domain": "ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;mail.ustc.edu.cn;ustc.edu.cn;ustc.edu.cn", "position": "MS student;Researcher;PhD student;MS student;MS student;Associate Researcher;Associate Professor", "bibtex": "@inproceedings{\nyi2024get,\ntitle={Get Rid of Isolation: A Continuous Multi-task Spatio-Temporal Learning Framework},\nauthor={Zhongchao Yi and Zhengyang Zhou and Qihe Huang and Yanjiang Chen and Liheng Yu and Xu Wang and Yang Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=tnh4LK72yj}\n}", "github": "", "reviewers": "xkVJ;YiFX;GaXf;SHU9", "pdf_size": 3137212, "rating": "5;5;6;7", "confidence": "3;4;4;4", "soundness": "3;3;3;4", "novelty": "1;3;3;4", "presentation": "2;2;2;3", "wc_summary": "63;42;139;67", "wc_strengths": "61;31;53;194", "wc_weaknesses": "196;289;113;57", "wc_questions": "4;76;76;25", "wc_limitations": "6;5;1;4", "wc_review": "330;443;382;347", "wc_reply_reviewers": "15;10;10;42", "wc_reply_authors": "46;51;84;44", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 1.0897247358851685 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 77.75, 36.615399765672365 ], "wc_strengths_avg": [ 84.75, 64.02489750089414 ], "wc_weaknesses_avg": [ 163.75, 87.60529378981614 ], "wc_questions_avg": [ 45.25, 31.63364506344471 ], "wc_limitations_avg": [ 4.0, 1.8708286933869707 ], "wc_review_avg": [ 375.5, 43.246387132337425 ], "wc_reply_reviewers_avg": [ 19.25, 13.292385038058445 ], "wc_reply_authors_avg": [ 56.25, 16.223054582907622 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=92090538995435717&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "email": "ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;mail.ustc.edu.cn;ustc.edu.cn;ustc.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "University of Science and Technology of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ustc.edu.cn", "aff_unique_abbr": "USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Leveraging Tumor Heterogeneity: Heterogeneous Graph Representation Learning for Cancer Survival Prediction in Whole Slide Images", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93310", "id": "tsIKrvexBd", "proceeding": "", "pdf": "https://openreview.net/pdf?id=tsIKrvexBd", "openreview": "https://openreview.net/forum?id=tsIKrvexBd", "poster": "/media/PosterPDFs/NeurIPS%202024/93310.png?t=1730963355.0153153", "project": "", "author_site": "Junxian Wu, Xinyi Ke, XIAOMING JIANG, Huanwen Wu, Youyong Kong, Lizhi Shao", "tldr": "", "abstract": "Survival prediction is a significant challenge in cancer management. Tumor micro-environment is a highly sophisticated ecosystem consisting of cancer cells, immune cells, endothelial cells, fibroblasts, nerves and extracellular matrix. The intratumor heterogeneity and the interaction across multiple tissue types profoundly impacts the prognosis. However, current methods often neglect the fact that the contribution to prognosis differs with tissue types. In this paper, we propose ProtoSurv, a novel heterogeneous graph model for WSI survival prediction. The learning process of ProtoSurv is not only driven by data but also incorporates pathological domain knowledge, including the awareness of tissue heterogeneity, the emphasis on prior knowledge of prognostic-related tissues, and the depiction of spatial interaction across multiple tissues. We validate ProtoSurv across five different cancer types from TCGA (i.e., BRCA, LGG, LUAD, COAD and PAAD), and demonstrate the superiority of our method over the state-of-the-art methods.", "keywords": "Whole Slide Image;Survival Prediction;Tumor Heterogeneity;Heterogeneous\u00a0Graph;Graph Convolutional Network", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "", "author": "Junxian Wu;Xinyi Ke;Xiaoming Jiang;Huanwen Wu;Youyong Kong;Lizhi Shao", "authorids": "~Junxian_Wu3;~Xinyi_Ke1;~Xiaoming_Jiang2;~Huanwen_Wu1;~Youyong_Kong1;~Lizhi_Shao1", "gender": "M;F;M;M;M;M", "homepage": "https://github.com/wjx-error;https://github.com/kikikekeke;;https://github.com/huanwenwww;https://cse.seu.edu.cn/2023/1024/c23024a469537/page.htm;https://github.com/StandWisdom", "dblp": ";;;;154/7641;", "google_scholar": "lxenf_oAAAAJ;;;;;iCdx1zIAAAAJ", "orcid": "0009-0008-1483-3229;0009-0003-2228-3371;0000-0002-8184-1578;;;0000-0002-8974-6540", "linkedin": ";;;;;", "or_profile": "~Junxian_Wu3;~Xinyi_Ke1;~Xiaoming_Jiang2;~Huanwen_Wu1;~Youyong_Kong1;~Lizhi_Shao1", "aff": "Southeast University;Peking Union Medical College Hospital;Chongqing University of Post and Telecommunications;Peking Union Medical College Hospital;Southeast University;Institute of Automation, Chinese Academy of Sciences", "aff_domain": "seu.edu.cn;pumc.edu.cn;cqupt.edu.cn;xhyy.pumc.edu;seu.edu.cn;ia.ac.cn", "position": "MS student;PhD student;Associate Professor;Full Professor;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nwu2024leveraging,\ntitle={Leveraging Tumor Heterogeneity: Heterogeneous Graph Representation Learning for Cancer Survival Prediction in Whole Slide Images},\nauthor={Junxian Wu and Xinyi Ke and Xiaoming Jiang and Huanwen Wu and Youyong Kong and Lizhi Shao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=tsIKrvexBd}\n}", "github": "", "reviewers": "S12V;gRNp;tNQM;DhDE", "pdf_size": 13145855, "rating": "5;5;5;5", "confidence": "4;5;4;4", "soundness": "3;2;2;2", "novelty": "3;2;2;2", "presentation": "3;3;2;3", "wc_summary": "73;129;31;274", "wc_strengths": "40;69;68;80", "wc_weaknesses": "66;68;88;383", "wc_questions": "48;46;8;337", "wc_limitations": "53;1;6;71", "wc_review": "280;313;201;1145", "wc_reply_reviewers": "0;325;0;19", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;2;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 126.75, 91.84872073142881 ], "wc_strengths_avg": [ 64.25, 14.771171246722448 ], "wc_weaknesses_avg": [ 151.25, 134.07716994328302 ], "wc_questions_avg": [ 109.75, 132.16727091076672 ], "wc_limitations_avg": [ 32.75, 29.98645527567405 ], "wc_review_avg": [ 484.75, 383.3616927915464 ], "wc_reply_reviewers_avg": [ 86.0, 138.20455853552733 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11366076904963144249&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "seu.edu.cn;pumc.edu.cn;cqupt.edu.cn;xhyy.pumc.edu;seu.edu.cn;ia.ac.cn", "author_num": 6, "aff_unique_index": "0;1;2;1;0;3", "aff_unique_norm": "Southeast University;Peking Union Medical College;Chongqing University of Post and Telecommunications;Chinese Academy of Sciences", "aff_unique_dep": ";Medical College Hospital;;Institute of Automation", "aff_unique_url": "https://www.seu.edu.cn/;http://www.pumch.cn;http://www.cqupt.edu.cn;http://www.ia.cas.cn", "aff_unique_abbr": "SEU;PUMC;CQUPT;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "AirSketch: Generative Motion to Sketch", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93309", "id": "ttLcbEkaj6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ttLcbEkaj6", "openreview": "https://openreview.net/forum?id=ttLcbEkaj6", "poster": "/media/PosterPDFs/NeurIPS%202024/93309.png?t=1732222285.0429087", "project": "", "author_site": "Hui Xian Grace Lim, Xuanming Cui, Yogesh Rawat, Ser Nam Lim", "tldr": "", "abstract": "Illustration is a fundamental mode of human expression and communication. Certain types of motion that accompany speech can provide this illustrative mode of communication. While Augmented and Virtual Reality technologies (AR/VR) have introduced tools for producing drawings with hand motions (air drawing), they typically require costly hardware and additional digital markers, thereby limiting their accessibility and portability. Furthermore, air drawing demands considerable skill to achieve aesthetic results. To address these challenges, we introduce the concept of AirSketch, aimed at generating faithful and visually coherent sketches directly from hand motions, eliminating the need for complicated headsets or markers. We devise a simple augmentation-based self-supervised training procedure, enabling a controllable image diffusion model to learn to translate from highly noisy hand tracking images to clean, aesthetically pleasing sketches, while preserving the essential visual cues from the original tracking data. We present two air drawing datasets to study this problem. Our findings demonstrate that beyond producing photo-realistic images from precise spatial inputs, controllable image diffusion can effectively produce a refined, clear sketch from a noisy input. Our work serves as an initial step towards marker-less air drawing and reveals distinct applications of controllable diffusion models to AirSketch and AR/VR in general.", "keywords": "Generative;Hand Motion;Sketch", "primary_area": "generative_models", "supplementary_material": "", "author": "Hui Xian Grace Lim;Xuanming Cui;Yogesh S Rawat;Ser-Nam Lim", "authorids": "~Hui_Xian_Grace_Lim1;~Xuanming_Cui1;~Yogesh_S_Rawat1;~Ser-Nam_Lim3", "gender": ";M;M;M", "homepage": ";;https://www.crcv.ucf.edu/person/rawat/;https://sites.google.com/site/sernam", "dblp": ";;148/2258;04/6633", "google_scholar": ";;D_JvEcwAAAAJ;HX0BfLYAAAAJ", "orcid": "0000-0002-3720-8281;;;", "linkedin": ";xuanming-cui-9686b9170/;;", "or_profile": "~Hui_Xian_Grace_Lim1;~Xuanming_Cui1;~Yogesh_S_Rawat1;~Ser-Nam_Lim1", "aff": "University of Central Florida;University of Central Florida;University of Central Florida;University of Central Florida", "aff_domain": "ucf.edu;ucf.edu;ucf.edu;ucf.edu", "position": "PhD student;PhD student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nlim2024airsketch,\ntitle={AirSketch: Generative Motion to Sketch},\nauthor={Hui Xian Grace Lim and Xuanming Cui and Yogesh S Rawat and Ser-Nam Lim},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ttLcbEkaj6}\n}", "github": "", "reviewers": "V9Hw;Gs1n;THqa;gzU1", "pdf_size": 41882411, "rating": "4;5;6;6", "confidence": "5;4;4;3", "soundness": "3;3;3;3", "novelty": "2;2;2;3", "presentation": "3;3;4;3", "wc_summary": "67;154;77;39", "wc_strengths": "67;48;83;58", "wc_weaknesses": "152;373;118;48", "wc_questions": "5;124;91;27", "wc_limitations": "14;19;206;59", "wc_review": "305;718;575;231", "wc_reply_reviewers": "82;0;123;38", "wc_reply_authors": "177;35;525;26", "reply_reviewers": "1;0;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 84.25, 42.61088475964797 ], "wc_strengths_avg": [ 64.0, 12.864680330268607 ], "wc_weaknesses_avg": [ 172.75, 121.54294508526606 ], "wc_questions_avg": [ 61.75, 47.8506792010312 ], "wc_limitations_avg": [ 74.5, 77.8989730355927 ], "wc_review_avg": [ 457.25, 197.62638361311983 ], "wc_reply_reviewers_avg": [ 60.75, 46.19185534269001 ], "wc_reply_authors_avg": [ 190.75, 202.0598611797999 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8528028654224417, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:GQga88wAJCoJ:scholar.google.com/&scioq=AirSketch:+Generative+Motion+to+Sketch&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "ucf.edu;ucf.edu;ucf.edu;ucf.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Central Florida", "aff_unique_dep": "", "aff_unique_url": "https://www.ucf.edu", "aff_unique_abbr": "UCF", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Revisiting the Integration of Convolution and Attention for Vision Backbone", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93308", "id": "ttUXtV2YrA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ttUXtV2YrA", "openreview": "https://openreview.net/forum?id=ttUXtV2YrA", "poster": "/media/PosterPDFs/NeurIPS%202024/93308.png?t=1731741294.8906152", "project": "", "author_site": "Lei Zhu, Xinjiang Wang, Wayne Zhang, Rynson Lau", "tldr": "", "abstract": "Convolutions (Convs) and multi-head self-attentions (MHSAs) are typically considered alternatives to each other for building vision backbones. Although some works try to integrate both, they apply the two operators simultaneously at the finest pixel granularity. With Convs responsible for per-pixel feature extraction already, the question is whether we still need to include the heavy MHSAs at such a fine-grained level. In fact, this is the root cause of the scalability issue w.r.t. the input resolution for vision transformers. To address this important problem, we propose in this work to use MSHAs and Convs in parallel \\textbf{at different granularity levels} instead. Specifically, in each layer, we use two different ways to represent an image: a fine-grained regular grid and a coarse-grained set of semantic slots. We apply different operations to these two representations: Convs to the grid for local features, and MHSAs to the slots for global features. A pair of fully differentiable soft clustering and dispatching modules is introduced to bridge the grid and set representations, thus \nenabling local-global fusion. Through extensive experiments on various vision tasks, we empirically verify the potential of the proposed integration scheme, named \\textit{GLMix}: by offloading the burden of fine-grained features to light-weight Convs, it is sufficient to use MHSAs in a few (e.g., 64) semantic slots to match the performance of recent state-of-the-art backbones, while being more efficient. Our visualization results also demonstrate that the soft clustering module produces a meaningful semantic grouping effect with only IN1k classification supervision, which may induce better interpretability and inspire new weakly-supervised semantic segmentation approaches. Code will be available at \\url{https://github.com/rayleizhu/GLMix}.", "keywords": "convolution;attention;vision backbone", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Lei Zhu;Xinjiang Wang;Wayne Zhang;Rynson W. H. Lau", "authorids": "~Lei_Zhu12;~Xinjiang_Wang1;~Wayne_Zhang2;~Rynson_W._H._Lau1", "gender": "M;M;M;M", "homepage": "https://rayleizhu.github.io/;;http://www.cs.cityu.edu.hk/~rynson/;http://www.statfe.com", "dblp": "99/549_remove_this_suffix;215/3546;l/RynsonWHLau;239/6045", "google_scholar": "8PUzlfYAAAAJ;https://scholar.google.com/citations?hl=zh-TW;KilQqKYAAAAJ;5GtyVooAAAAJ", "orcid": "0000-0002-0191-7086;;;0000-0002-8415-1062", "linkedin": ";;;", "or_profile": "~Lei_Zhu12;~Xinjiang_Wang1;~Rynson_Lau1;~Wei_Zhang5", "aff": "City University of Hong Kong;SenseTime Group;City University of Hong Kong;SenseTime Research", "aff_domain": "cityu.edu.hk;sensetime.com;cityu.edu.hk;sensetime.com", "position": "PhD student;Researcher;Researcher;Research Director", "bibtex": "@inproceedings{\nzhu2024revisiting,\ntitle={Revisiting the Integration of Convolution and Attention for Vision Backbone},\nauthor={Lei Zhu and Xinjiang Wang and Wayne Zhang and Rynson W. H. Lau},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ttUXtV2YrA}\n}", "github": "", "reviewers": "CazW;txiT;fhiH", "pdf_size": 10560996, "rating": "4;5;5", "confidence": "4;5;5", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "2;3;3", "wc_summary": "223;130;111", "wc_strengths": "177;83;105", "wc_weaknesses": "402;77;199", "wc_questions": "186;107;5", "wc_limitations": "32;54;10", "wc_review": "1020;451;430", "wc_reply_reviewers": "297;25;0", "wc_reply_authors": "427;36;0", "reply_reviewers": "1;1;0", "reply_authors": "3;2;1", "rating_avg": [ 4.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 154.66666666666666, 48.93760199364993 ], "wc_strengths_avg": [ 121.66666666666667, 40.14418457953226 ], "wc_weaknesses_avg": [ 226.0, 134.04725534924864 ], "wc_questions_avg": [ 99.33333333333333, 74.09153497907542 ], "wc_limitations_avg": [ 32.0, 17.962924780409974 ], "wc_review_avg": [ 633.6666666666666, 273.3134139083229 ], "wc_reply_reviewers_avg": [ 107.33333333333333, 134.50237503066217 ], "wc_reply_authors_avg": [ 154.33333333333334, 193.3637907043497 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.816496580927726 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.9999999999999998, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:A5AqBZ1LpfkJ:scholar.google.com/&scioq=Revisiting+the+Integration+of+Convolution+and+Attention+for+Vision+Backbone&hl=en&as_sdt=0,5", "gs_version_total": 5, "email": "cityu.edu.hk;sensetime.com;cityu.edu.hk;sensetime.com", "author_num": 4, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "City University of Hong Kong;SenseTime Group;SenseTime", "aff_unique_dep": ";;SenseTime Research", "aff_unique_url": "https://www.cityu.edu.hk;https://www.sensetime.com;https://www.sensetime.com", "aff_unique_abbr": "CityU;SenseTime;SenseTime", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Unveiling the Tapestry of Consistency in Large Vision-Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93307", "id": "tu1oC7zHGW", "proceeding": "", "pdf": "https://openreview.net/pdf?id=tu1oC7zHGW", "openreview": "https://openreview.net/forum?id=tu1oC7zHGW", "poster": "/media/PosterPDFs/NeurIPS%202024/93307.png?t=1730622285.7484322", "project": "", "author_site": "Yuan Zhang, Fei xiao, Tao Huang, Chun-Kai Fan, Hongyuan Dong, Jiawen Li, Jiacong Wang, Kuan Cheng, Shanghang Zhang, Haoyuan Guo", "tldr": "", "abstract": "Large vision-language models (LVLMs) have recently achieved rapid progress, exhibiting great perception and reasoning abilities concerning visual information. However, when faced with prompts in different sizes of solution spaces, LVLMs fail to always give consistent answers regarding the same knowledge point. This inconsistency of answers between different solution spaces is prevalent in LVLMs and erodes trust. To this end, we provide a multi-modal benchmark ConBench, to intuitively analyze how LVLMs perform when the solution space of a prompt revolves around a knowledge point. Based on the ConBench tool, we are the first to reveal the tapestry and get the following findings: (1) In the discriminate realm, the larger the solution space of the prompt, the lower the accuracy of the answers. \n(2) Establish the relationship between the discriminative and generative realms: the accuracy of the discriminative question type exhibits a strong positive correlation with its Consistency with the caption. (3) Compared to open-source models, closed-source models exhibit a pronounced bias advantage in terms of Consistency. Eventually, we ameliorate the consistency of LVLMs by trigger-based diagnostic refinement, indirectly improving the performance of their caption. We hope this paper will accelerate the research community in better evaluating their models and encourage future advancements in the consistency domain.", "keywords": "Consistency;ConBench;Large Vision-Language Models;Analysis", "primary_area": "evaluation", "supplementary_material": "/attachment/6af31d1ca7f8caf635dea91c0c149ba97dd9083a.zip", "author": "Yuan Zhang;Fei xiao;Tao Huang;Chun-Kai Fan;Hongyuan Dong;Jiawen Li;Jiacong Wang;Kuan Cheng;Shanghang Zhang;Haoyuan Guo", "authorids": "~Yuan_Zhang20;~Fei_xiao8;~Tao_Huang5;~Chun-Kai_Fan1;~Hongyuan_Dong2;~Jiawen_Li6;~Jiacong_Wang1;~Kuan_Cheng1;~Shanghang_Zhang4;~Haoyuan_Guo1", "gender": "M;M;M;M;M;;M;M;;", "homepage": "https://gumpest.github.io/;https://github.com/;https://taohuang.info;;;;;https://www.kuancheng88.com/;;", "dblp": ";;34/808-20;;;;62/2575;126/6005.html;;", "google_scholar": "dXj1WskAAAAJ;;jkcRdBgAAAAJ;https://scholar.google.com/citations?hl=zh-CN;kxuuhyoAAAAJ;;rzYgLkgAAAAJ;cHneZMEAAAAJ;;", "orcid": ";;;;;;0009-0001-8719-0614;;;", "linkedin": ";;;;;;;;;", "or_profile": "~Yuan_Zhang20;~Fei_xiao8;~Tao_Huang5;~Chun-Kai_Fan1;~Hongyuan_Dong2;~Jiawen_Li6;~Jiacong_Wang1;~Kuan_Cheng1;~Shanghang_Zhang4;~Haoyuan_Guo1", "aff": "Peking University;ByteDance Inc.;The University of Sydney;Peking University;ByteDance Inc.;;University of Chinese Academy of Sciences;Peking University;;", "aff_domain": "stu.pku.edu.cn;bytedance.com;sydney.edu.au;pku.edu.cn;bytedance.com;;ucas.ac.cn;pku.edu.cn;;", "position": "PhD student;Researcher;PhD student;MS student;Researcher;;PhD student;Assistant Professor;;", "bibtex": "@inproceedings{\nzhang2024unveiling,\ntitle={Unveiling the Tapestry of Consistency in Large Vision-Language Models},\nauthor={Yuan Zhang and Fei xiao and Tao Huang and Chun-Kai Fan and Hongyuan Dong and Jiawen Li and Jiacong Wang and Kuan Cheng and Shanghang Zhang and Haoyuan Guo},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=tu1oC7zHGW}\n}", "github": "", "reviewers": "aYRC;AnYL;wv4L", "pdf_size": 9969597, "rating": "5;5;7", "confidence": "4;4;4", "soundness": "4;3;4", "novelty": "4;2;3", "presentation": "3;3;4", "wc_summary": "85;78;60", "wc_strengths": "61;102;46", "wc_weaknesses": "188;124;31", "wc_questions": "47;4;2", "wc_limitations": "8;37;8", "wc_review": "389;345;147", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "59;59;0", "reply_reviewers": "0;0;0", "reply_authors": "2;2;1", "rating_avg": [ 5.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 74.33333333333333, 10.530379332620877 ], "wc_strengths_avg": [ 69.66666666666667, 23.66901396810231 ], "wc_weaknesses_avg": [ 114.33333333333333, 64.45842760174102 ], "wc_questions_avg": [ 17.666666666666668, 20.75786330258702 ], "wc_limitations_avg": [ 17.666666666666668, 13.670731102939918 ], "wc_review_avg": [ 293.6666666666667, 105.25313402565372 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 39.333333333333336, 27.81286672667087 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1820438817014293595&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "stu.pku.edu.cn;bytedance.com;sydney.edu.au;pku.edu.cn;bytedance.com;;ucas.ac.cn;pku.edu.cn;;", "author_num": 10, "aff_unique_index": "0;1;2;0;1;3;0", "aff_unique_norm": "Peking University;ByteDance;University of Sydney;University of Chinese Academy of Sciences", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.pku.edu.cn;https://www.bytedance.com;https://www.sydney.edu.au;http://www.ucas.ac.cn", "aff_unique_abbr": "Peking U;ByteDance;USYD;UCAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;0;0", "aff_country_unique": "China;Australia" }, { "title": "DisCEdit: Model Editing by Identifying Discriminative Components", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93306", "id": "tuiqq1G8I5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=tuiqq1G8I5", "openreview": "https://openreview.net/forum?id=tuiqq1G8I5", "poster": "/media/PosterPDFs/NeurIPS%202024/93306.png?t=1731728500.4734375", "project": "", "author_site": "Chaitanya Murti, Chiranjib Bhattacharyya", "tldr": "", "abstract": "Model editing is a growing area of research that is particularly valuable in contexts where modifying key model components, like neurons or filters, can significantly impact the model\u2019s performance. The key challenge lies in identifying important components useful to the model\u2019s predictions. We apply model editing to address two active areas of research, Structured Pruning, and Selective Class Forgetting. In this work, we adopt a distributional approach to the problem of identifying important components, leveraging the recently proposed discriminative filters hypothesis, which states that well-trained (convolutional) models possess discriminative filters that are essential to prediction. To do so, we define discriminative ability in terms of the Bayes error rate associated with the feature distributions, which is equivalent to computing the Total Variation (TV) distance between the distributions. However, computing the TV distance is intractable, motivating us to derive novel witness function-based lower bounds on the TV distance that require no assumptions on the underlying distributions; using this bound generalizes prior work such as Murti et al. [39] that relied on unrealistic Gaussianity assumptions on the feature distributions. With these bounds, we are able to discover critical subnetworks responsible for classwise predictions, and derive DISCEDIT-SP and DISCEDIT-U , algorithms for structured pruning requiring no access to the training data and loss function, and selective forgetting respectively. We apply DISCEDIT-U to selective class forgetting on models trained on CIFAR10 and CIFAR100, and we show that on average, we can reduce accuracy on a single class by over 80% with a minimal reduction in test accuracy on the remaining classes. Similarly, on Structured pruning problems, we obtain 40.8% sparsity on ResNet50 on Imagenet, with only a 2.6% drop in accuracy with minimal fine-tuning.", "keywords": "model editing;selective forgetting;structured pruning;total variation distance", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Chaitanya Murti;Chiranjib Bhattacharyya", "authorids": "~Chaitanya_Murti1;~Chiranjib_Bhattacharyya1", "gender": ";M", "homepage": ";http://www.csa.iisc.ac.in/~chiru/", "dblp": ";b/CBhattacharyya", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": "~Chaitanya_Murti1;~Chiranjib_Bhattacharyya1", "aff": ";Indian Institute of Science, Indian institute of science, Bangalore", "aff_domain": ";iisc.ac.in", "position": ";Full Professor", "bibtex": "@inproceedings{\nmurti2024discedit,\ntitle={Dis{CE}dit: Model Editing by Identifying Discriminative Components},\nauthor={Chaitanya Murti and Chiranjib Bhattacharyya},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=tuiqq1G8I5}\n}", "github": "", "reviewers": "crkh;H2n6;FULk;YbA1", "pdf_size": 1834331, "rating": "5;6;7;7", "confidence": "3;3;3;4", "soundness": "3;3;3;4", "novelty": "2;3;3;3", "presentation": "2;4;2;3", "wc_summary": "62;93;93;186", "wc_strengths": "70;54;59;143", "wc_weaknesses": "101;123;122;167", "wc_questions": "79;1;106;131", "wc_limitations": "1;7;10;1", "wc_review": "313;278;390;628", "wc_reply_reviewers": "0;12;54;0", "wc_reply_authors": "213;186;165;221", "reply_reviewers": "0;1;1;0", "reply_authors": "3;3;3;3", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 108.5, 46.5 ], "wc_strengths_avg": [ 81.5, 35.975686233899694 ], "wc_weaknesses_avg": [ 128.25, 24.0351305384431 ], "wc_questions_avg": [ 79.25, 48.776915646645804 ], "wc_limitations_avg": [ 4.75, 3.897114317029974 ], "wc_review_avg": [ 402.25, 136.48878159028308 ], "wc_reply_reviewers_avg": [ 16.5, 22.197972880423112 ], "wc_reply_authors_avg": [ 196.25, 22.219079638904937 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 3.0, 0.0 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:gWEJvYjupXQJ:scholar.google.com/&scioq=DisCEdit:+Model+Editing+by+Identifying+Discriminative+Components&hl=en&as_sdt=0,44", "gs_version_total": 0, "email": ";iisc.ac.in", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Indian Institute of Science", "aff_unique_dep": "", "aff_unique_url": "https://www.iisc.ac.in", "aff_unique_abbr": "IISc", "aff_campus_unique_index": "0", "aff_campus_unique": "Bangalore", "aff_country_unique_index": "0", "aff_country_unique": "India" }, { "title": "Enriching Disentanglement: From Logical Definitions to Quantitative Metrics", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93305", "id": "tvQ3XCKWbB", "proceeding": "", "pdf": "https://openreview.net/pdf?id=tvQ3XCKWbB", "openreview": "https://openreview.net/forum?id=tvQ3XCKWbB", "poster": "/media/PosterPDFs/NeurIPS%202024/93305.png?t=1731417925.8727305", "project": "", "author_site": "Yivan Zhang, Masashi Sugiyama", "tldr": "", "abstract": "Disentangling the explanatory factors in complex data is a promising approach for generalizable and data-efficient representation learning. While a variety of quantitative metrics for learning and evaluating disentangled representations have been proposed, it remains unclear what properties these metrics truly quantify. In this work, we establish algebraic relationships between logical definitions and quantitative metrics to derive theoretically grounded disentanglement metrics. Concretely, we introduce a compositional approach for converting a higher-order predicate into a real-valued quantity by replacing (i) equality with a strict premetric, (ii) the Heyting algebra of binary truth values with a quantale of continuous values, and (iii) quantifiers with aggregators. The metrics induced by logical definitions have strong theoretical guarantees, and some of them are easily differentiable and can be used as learning objectives directly. Finally, we empirically demonstrate the effectiveness of the proposed metrics by isolating different aspects of disentangled representations.", "keywords": "Disentanglement;Representation Learning;Logic;Metric;Algebra;Category Theory;Topos Theory", "primary_area": "learning_theory", "supplementary_material": "/attachment/2c5f0e66abe3f9c564a820daf79932c4b5639221.zip", "author": "Yivan Zhang;Masashi Sugiyama", "authorids": "~Yivan_Zhang1;~Masashi_Sugiyama1", "gender": "M;M", "homepage": "https://yivan.xyz;http://www.ms.k.u-tokyo.ac.jp/sugi/", "dblp": "250/9557;35/1228", "google_scholar": "Q7S9kh4AAAAJ;https://scholar.google.co.jp/citations?user=GkYIrlIAAAAJ", "orcid": ";0000-0001-6658-6743", "linkedin": ";", "or_profile": "~Yivan_Zhang1;~Masashi_Sugiyama1", "aff": "RIKEN AIP;The University of Tokyo", "aff_domain": "riken.jp;u-tokyo.ac.jp", "position": "Researcher;Full Professor", "bibtex": "@inproceedings{\nzhang2024enriching,\ntitle={Enriching Disentanglement: From Logical Definitions to Quantitative Metrics},\nauthor={Yivan Zhang and Masashi Sugiyama},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=tvQ3XCKWbB}\n}", "github": "", "reviewers": "sy6j;4yVc;GqiT;GEk9", "pdf_size": 3711607, "rating": "5;5;5;6", "confidence": "3;2;2;2", "soundness": "2;2;3;3", "novelty": "3;2;2;3", "presentation": "1;2;3;3", "wc_summary": "18;45;70;68", "wc_strengths": "28;51;25;50", "wc_weaknesses": "490;79;38;34", "wc_questions": "2;39;24;35", "wc_limitations": "2;6;1;1", "wc_review": "540;220;158;188", "wc_reply_reviewers": "194;19;24;18", "wc_reply_authors": "100;40;267;57", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 2.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 50.25, 21.05201890555868 ], "wc_strengths_avg": [ 38.5, 12.05197079319395 ], "wc_weaknesses_avg": [ 160.25, 191.1941094803917 ], "wc_questions_avg": [ 25.0, 14.370107863199914 ], "wc_limitations_avg": [ 2.5, 2.0615528128088303 ], "wc_review_avg": [ 276.5, 153.70344823718173 ], "wc_reply_reviewers_avg": [ 63.75, 75.23421761406175 ], "wc_reply_authors_avg": [ 116.0, 89.88047618921475 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13490706849977658283&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "riken.jp;u-tokyo.ac.jp", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "RIKEN;University of Tokyo", "aff_unique_dep": "Advanced Institute for Computational Science;", "aff_unique_url": "https://www.aip.riken.jp;https://www.u-tokyo.ac.jp", "aff_unique_abbr": "RIKEN AIP;UTokyo", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Japan" }, { "title": "Beyond Prompts: Dynamic Conversational Benchmarking of Large Language Models", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97463", "id": "twFlD3C9Rt", "proceeding": "", "pdf": "https://openreview.net/pdf?id=twFlD3C9Rt", "openreview": "https://openreview.net/forum?id=twFlD3C9Rt", "poster": "/media/PosterPDFs/NeurIPS%202024/97463.png?t=1731314804.2811284", "project": "", "author_site": "David Castillo-Bolado, Joseph Davidson, Finlay Gray, Marek Rosa", "tldr": "", "abstract": "We introduce a dynamic benchmarking system for conversational agents that evaluates their performance through a single, simulated, and lengthy user$\\leftrightarrow$agent interaction. The interaction is a conversation between the user and agent, where multiple tasks are introduced and then undertaken concurrently. We context switch regularly to interleave the tasks, which constructs a realistic testing scenario in which we assess the Long-Term Memory, Continual Learning, and Information Integration capabilities of the agents. Results from both proprietary and open-source Large-Language Models show that LLMs in general perform well on single-task interactions, but they struggle on the same tasks when they are interleaved. Notably, short-context LLMs supplemented with an LTM system perform as well as or better than those with larger contexts. Our benchmark suggests that there are other challenges for LLMs responding to more natural interactions that contemporary benchmarks have heretofore not been able to capture.", "keywords": "Benchmark;LLM;LTM;Long-Term Memory;Large Language Model;Continual Learning;RAG;Retrieval Augmented Generation;Conversational Agents;Chat Agents", "primary_area": "", "supplementary_material": "", "author": "David Castillo-Bolado;Joseph Davidson;Finlay Gray;Marek Rosa", "authorids": "~David_Castillo-Bolado1;~Joseph_Davidson1;~Finlay_Gray1;~Marek_Rosa1", "gender": "M;;M;M", "homepage": ";;;https://blog.marekrosa.org", "dblp": "236/6168;167/4761;;", "google_scholar": "https://scholar.google.es/citations?user=Vgt9tlEAAAAJ;;;", "orcid": "0000-0003-4402-1228;;;", "linkedin": "davidcastillobolado/;;finlaygray/;marekrosa1/", "or_profile": "~David_Castillo-Bolado1;~Joseph_Davidson1;~Finlay_Gray1;~Marek_Rosa1", "aff": "GoodAI;GoodAI;University of Glasgow;GoodAI", "aff_domain": "goodai.com;goodai.com;gla.ac.uk;goodai.com", "position": "Researcher;Researcher;Undergrad student;Researcher", "bibtex": "@inproceedings{\ncastillo-bolado2024beyond,\ntitle={Beyond Prompts: Dynamic Conversational Benchmarking of Large Language Models},\nauthor={David Castillo-Bolado and Joseph Davidson and Finlay Gray and Marek Rosa},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=twFlD3C9Rt}\n}", "github": "", "reviewers": "25fo;8Qtc;zgCD", "pdf_size": 670595, "rating": "6;7;9", "confidence": "4;4;4", "wc_summary_and_contributions": "85;45;125", "wc_strengths": "178;33;39", "wc_improvement": "28;18;11", "wc_limitations": "61;1;2", "wc_correctness": "1;1;1", "wc_clarity": "1;1;2", "wc_relation_to_prior_work": "1;1;2", "wc_documentation": "1;1;2", "wc_additional_feedback": "1;1;1", "wc_review": "357;102;185", "wc_reply_reviewers": "0;51;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;1;0", "reply_authors": "1;3;1", "rating_avg": [ 7.333333333333333, 1.247219128924647 ], "confidence_avg": [ 4.0, 0.0 ], "wc_summary_and_contributions_avg": [ 85.0, 32.65986323710904 ], "wc_strengths_avg": [ 83.33333333333333, 66.98424358674873 ], "wc_improvement_avg": [ 19.0, 6.97614984548545 ], "wc_limitations_avg": [ 21.333333333333332, 28.051539866625664 ], "wc_correctness_avg": [ 1.0, 0.0 ], "wc_clarity_avg": [ 1.3333333333333333, 0.4714045207910317 ], "wc_relation_to_prior_work_avg": [ 1.3333333333333333, 0.4714045207910317 ], "wc_documentation_avg": [ 1.3333333333333333, 0.4714045207910317 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 214.66666666666666, 106.19583586730487 ], "wc_reply_reviewers_avg": [ 17.0, 24.041630560342615 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=804365725389801960&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 3, "email": "goodai.com;goodai.com;gla.ac.uk;goodai.com", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "GoodAI;University of Glasgow", "aff_unique_dep": ";", "aff_unique_url": "https://www.goodai.com/;https://www.gla.ac.uk", "aff_unique_abbr": "GoodAI;Glasgow", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Czech Republic;United Kingdom" }, { "title": "Derandomizing Multi-Distribution Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93304", "id": "twYE75Mnkt", "proceeding": "", "pdf": "https://openreview.net/pdf?id=twYE75Mnkt", "openreview": "https://openreview.net/forum?id=twYE75Mnkt", "poster": "", "project": "", "author_site": "Kasper Green Larsen, Omar Montasser, Nikita Zhivotovskiy", "tldr": "", "abstract": "Multi-distribution or collaborative learning involves learning a single predictor that works well across multiple data distributions, using samples from each during training. Recent research on multi-distribution learning, focusing on binary loss and finite VC dimension classes, has shown near-optimal sample complexity that is achieved with oracle efficient algorithms. That is, these algorithms are computationally efficient given an efficient ERM for the class. Unlike in classical PAC learning, where the optimal sample complexity is achieved with deterministic predictors, current multi-distribution learning algorithms output randomized predictors. This raises the question: can these algorithms be derandomized to produce a deterministic predictor for multiple distributions? Through a reduction to discrepancy minimization, we show that derandomizing multi-distribution learning is computationally hard, even when ERM is computationally efficient. On the positive side, we identify a structural condition enabling an efficient black-box reduction, converting existing randomized multi-distribution predictors into deterministic ones.", "keywords": "pac learning;multi-distribution;derandomization;computational efficiency;discrepancy minimization", "primary_area": "learning_theory", "supplementary_material": "", "author": "Kasper Green Larsen;Omar Montasser;Nikita Zhivotovskiy", "authorids": "~Kasper_Green_Larsen1;~Omar_Montasser1;~Nikita_Zhivotovskiy1", "gender": "M;M;M", "homepage": "https://ttic.uchicago.edu/~omar/;;http://www.cs.au.dk/~larsen/", "dblp": "194/3002;;07/6242", "google_scholar": "u455rGAAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=ZluoxUcAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Omar_Montasser1;~Nikita_Zhivotovskiy1;~Kasper_Larsen1", "aff": "University of California, Berkeley;University of California, Berkeley;Aarhus University", "aff_domain": "berkeley.edu;berkeley.edu;au.dk", "position": "Postdoc;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nlarsen2024derandomizing,\ntitle={Derandomizing Multi-Distribution Learning},\nauthor={Kasper Green Larsen and Omar Montasser and Nikita Zhivotovskiy},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=twYE75Mnkt}\n}", "github": "", "reviewers": "QSLG;xqfu;x5BR;2SLq", "pdf_size": 314924, "rating": "6;6;7;7", "confidence": "3;2;3;3", "soundness": "3;4;3;3", "novelty": "3;3;3;3", "presentation": "3;3;2;4", "wc_summary": "189;93;267;115", "wc_strengths": "270;45;347;48", "wc_weaknesses": "455;36;571;58", "wc_questions": "190;41;115;67", "wc_limitations": "38;6;1;14", "wc_review": "1142;221;1301;302", "wc_reply_reviewers": "45;0;26;29", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 166.0, 68.30080526611674 ], "wc_strengths_avg": [ 177.5, 133.8030268715921 ], "wc_weaknesses_avg": [ 280.0, 236.70973786475284 ], "wc_questions_avg": [ 103.25, 56.684984784332435 ], "wc_limitations_avg": [ 14.75, 14.201672436723781 ], "wc_review_avg": [ 741.5, 484.12834042224796 ], "wc_reply_reviewers_avg": [ 25.0, 16.140012391568973 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1667150234853804151&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "berkeley.edu;berkeley.edu;au.dk", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of California, Berkeley;Aarhus University", "aff_unique_dep": ";", "aff_unique_url": "https://www.berkeley.edu;https://au.dk", "aff_unique_abbr": "UC Berkeley;AU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;Denmark" }, { "title": "Look, Listen, and Answer: Overcoming Biases for Audio-Visual Question Answering", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93303", "id": "twpPD9UMUN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=twpPD9UMUN", "openreview": "https://openreview.net/forum?id=twpPD9UMUN", "poster": "/media/PosterPDFs/NeurIPS%202024/93303.png?t=1731290401.2389894", "project": "", "author_site": "Jie Ma, Min Hu, Pinghui Wang, Wangchun Sun, Lingyun Song, Hongbin Pei, Jun Liu, Youtian Du", "tldr": "", "abstract": "Audio-Visual Question Answering (AVQA) is a complex multi-modal reasoning task, demanding intelligent systems to accurately respond to natural language queries based on audio-video input pairs. Nevertheless, prevalent AVQA approaches are prone to overlearning dataset biases, resulting in poor robustness. Furthermore, current datasets may not provide a precise diagnostic for these methods. To tackle these challenges, firstly, we propose a novel dataset, *MUSIC-AVQA-R*, crafted in two steps: rephrasing questions within the test split of a public dataset (*MUSIC-AVQA*) and subsequently introducing distribution shifts to split questions. The former leads to a large, diverse test space, while the latter results in a comprehensive robustness evaluation on rare, frequent, and overall questions. Secondly, we propose a robust architecture that utilizes a multifaceted cycle collaborative debiasing strategy to overcome bias learning. Experimental results show that this architecture achieves state-of-the-art performance on MUSIC-AVQA-R, notably obtaining a significant improvement of 9.32\\%. Extensive ablation experiments are conducted on the two datasets mentioned to analyze the component effectiveness within the debiasing strategy. Additionally, we highlight the limited robustness of existing multi-modal QA methods through the evaluation on our dataset. We also conduct experiments combining various baselines with our proposed strategy on two datasets to verify its plug-and-play capability. Our dataset and code are available at .", "keywords": "audio-visual question answering;bias elimination;debiasing;multimodality learning", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/a8e33ad7b038e7c5bf90bcc2fc52442f88c94721.zip", "author": "Jie Ma;Min Hu;Pinghui Wang;Wangchun Sun;Lingyun Song;Hongbin Pei;Jun Liu;Youtian Du", "authorids": "~Jie_Ma1;~Min_Hu7;~Pinghui_Wang1;~Wangchun_Sun1;~Lingyun_Song1;~Hongbin_Pei1;~Jun_Liu10;~Youtian_Du1", "gender": "M;;M;M;M;;M;M", "homepage": "https://dr-majie.github.io/;;;https://github.com/sunshyyyyyyy;http://jszy.nwpu.edu.cn/en/songlingyun;;http://liukeen.gr.xjtu.edu.cn;", "dblp": "62/5110-1;;84/7882;;152/7478;;95/3736-2;", "google_scholar": "VsY24XkAAAAJ;;;;https://scholar.google.com/citations?hl=zh-CN;;7OjnHZMAAAAJ;", "orcid": "0000-0002-7432-3238;;0000-0002-1434-837X;;0000-0002-7892-2617;;0000-0002-6004-0675;0000-0002-1714-3433", "linkedin": ";;pinghui-wang-53b86818/?originalSubdomain=hk;;;;;", "or_profile": "~Jie_Ma1;~Min_Hu7;~Pinghui_Wang1;~Wangchun_Sun1;~Lingyun_Song1;~Hongbin_Pei1;~Jun_Liu10;~Youtian_Du1", "aff": "Xi'an Jiaotong University;;Xi'an Jiaotong University;Xi'an Jiaotong University;Northwestern Polytechnical University;;Xi'an Jiaotong University;Xi'an Jiaotong University", "aff_domain": "xjtu.edu.cn;;xjtu.edu.cn;xjtu.edu.cn;nwpu.edu.cn;;xjtu.edu.cn;xjtu.edu.cn", "position": "Assistant Professor;;Full Professor;MS student;Associate Professor;;Full Professor;Full Professor", "bibtex": "@inproceedings{\nma2024look,\ntitle={Look, Listen, and Answer: Overcoming Biases for Audio-Visual Question Answering},\nauthor={Jie Ma and Min Hu and Pinghui Wang and Wangchun Sun and Lingyun Song and Hongbin Pei and Jun Liu and Youtian Du},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=twpPD9UMUN}\n}", "github": "", "reviewers": "b8Yd;Zk4k;npgc", "pdf_size": 9473260, "rating": "5;6;8", "confidence": "4;3;5", "soundness": "1;2;4", "novelty": "2;3;3", "presentation": "2;2;4", "wc_summary": "107;171;145", "wc_strengths": "21;80;303", "wc_weaknesses": "487;109;105", "wc_questions": "9;2;90", "wc_limitations": "10;1;1", "wc_review": "634;363;644", "wc_reply_reviewers": "11;10;64", "wc_reply_authors": "12;10;84", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 2.3333333333333335, 1.247219128924647 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.9428090415820634 ], "wc_summary_avg": [ 141.0, 26.280537792569366 ], "wc_strengths_avg": [ 134.66666666666666, 121.44225879907793 ], "wc_weaknesses_avg": [ 233.66666666666666, 179.1411609752364 ], "wc_questions_avg": [ 33.666666666666664, 39.93606000707742 ], "wc_limitations_avg": [ 4.0, 4.242640687119285 ], "wc_review_avg": [ 547.0, 130.17168150817852 ], "wc_reply_reviewers_avg": [ 28.333333333333332, 25.223445883190152 ], "wc_reply_authors_avg": [ 35.333333333333336, 34.4222150491349 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.6546536707079772, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13458154802997778008&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 5, "email": "xjtu.edu.cn;;xjtu.edu.cn;xjtu.edu.cn;nwpu.edu.cn;;xjtu.edu.cn;xjtu.edu.cn", "author_num": 8, "aff_unique_index": "0;0;0;1;0;0", "aff_unique_norm": "Xi'an Jiao Tong University;Northwestern Polytechnical University", "aff_unique_dep": ";", "aff_unique_url": "https://www.xjtu.edu.cn;https://www.nwpu.edu.cn", "aff_unique_abbr": "XJTU;NWPU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Conditional Outcome Equivalence: A Quantile Alternative to CATE", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93302", "id": "tyPcIETPWM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=tyPcIETPWM", "openreview": "https://openreview.net/forum?id=tyPcIETPWM", "poster": "/media/PosterPDFs/NeurIPS%202024/93302.png?t=1730386109.9327066", "project": "", "author_site": "Josh Givens, Henry Reeve, Song Liu, Katarzyna Reluga", "tldr": "", "abstract": "The conditional quantile treatment effect (CQTE) can provide insight into the effect of a treatment beyond the conditional average treatment effect (CATE). This ability to provide information over multiple quantiles of the response makes the CQTE especially valuable in cases where the effect of a treatment is not well-modelled by a location shift, even conditionally on the covariates. Nevertheless, the estimation of the CQTE is challenging and often depends upon the smoothness of the individual quantiles as a function of the covariates rather than smoothness of the CQTE itself. This is in stark contrast to the CATE where it is possible to obtain high-quality estimates which have less dependency upon the smoothness of the nuisance parameters when the CATE itself is smooth. Moreover, relative smoothness of the CQTE lacks the interpretability of smoothness of the CATE making it less clear whether it is a reasonable assumption to make. We combine the desirable properties of the CATE and CQTE by considering a new estimand, the conditional quantile comparator (CQC). The CQC not only retains information about the whole treatment distribution, similar to the CQTE, but also having more natural examples of smoothness and is able to leverage simplicity in an auxiliary estimand. We provide finite sample bounds on the error of our estimator, demonstrating its ability to exploit simplicity. We validate our theory in numerical simulations which show that our method produces more accurate estimates than baselines. Finally, we apply our methodology to a study on the effect of employment incentives on earnings across different age groups. We see that our method is able to reveal heterogeneity of the effect across different quantiles.", "keywords": "Heteregenous Treatment Effect;Conditional Average Treatment Effect;Conditional Quantile Treatment Effect;Quantile Regression", "primary_area": "causal_inference", "supplementary_material": "/attachment/88829afaccc68d7a922d48d48410abd43fa22bab.zip", "author": "Josh Givens;Henry Reeve;Song Liu;Katarzyna Reluga", "authorids": "~Josh_Givens1;~Henry_Reeve1;~Song_Liu1;~Katarzyna_Reluga1", "gender": "M;;M;F", "homepage": "https://joshgivens.github.io/;https://henryreeve.netlify.app/;http://allmodelsarewrong.net;https://katarzynareluga.github.io/", "dblp": "340/6971;;80/1141-2;381/5460", "google_scholar": "mv0ieOsAAAAJ;;;https://scholar.google.com/citations?hl=en%5c", "orcid": "0000-0002-6343-6544;;;0000-0001-9651-5030", "linkedin": ";;;katarzyna-reluga-781889225", "or_profile": "~Josh_Givens1;~Henry_Reeve1;~Song_Liu1;~Katarzyna_Reluga1", "aff": "University of Bristol;University of Bristol;University of Bristol, UK;University of Bristol", "aff_domain": "bristol.ac.uk;bristol.ac.uk;bristol.ac.uk;bristol.ac.uk", "position": "PhD student;Lecturer;Lecturer;Lecturer", "bibtex": "@inproceedings{\ngivens2024conditional,\ntitle={Conditional Outcome Equivalence: A Quantile Alternative to {CATE}},\nauthor={Josh Givens and Henry Reeve and Song Liu and Katarzyna Reluga},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=tyPcIETPWM}\n}", "github": "", "reviewers": "zMNr;CQR9;nAy3;bJi9", "pdf_size": 1777512, "rating": "6;6;6;6", "confidence": "3;4;5;2", "soundness": "3;3;3;3", "novelty": "3;3;2;2", "presentation": "3;2;2;3", "wc_summary": "66;120;129;102", "wc_strengths": "49;129;56;55", "wc_weaknesses": "193;308;353;258", "wc_questions": "2;38;3;5", "wc_limitations": "1;124;10;11", "wc_review": "311;719;551;431", "wc_reply_reviewers": "18;0;36;79", "wc_reply_authors": "0;0;25;31", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;2;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 104.25, 24.128561913218117 ], "wc_strengths_avg": [ 72.25, 32.87381176559846 ], "wc_weaknesses_avg": [ 278.0, 59.476886266851594 ], "wc_questions_avg": [ 12.0, 15.049916943292411 ], "wc_limitations_avg": [ 36.5, 50.66803726216361 ], "wc_review_avg": [ 503.0, 150.83766107971843 ], "wc_reply_reviewers_avg": [ 33.25, 29.32042803234632 ], "wc_reply_authors_avg": [ 14.0, 14.159802258506296 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:yQxF8rq9hKkJ:scholar.google.com/&scioq=Conditional+Outcome+Equivalence:+A+Quantile+Alternative+to+CATE&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "bristol.ac.uk;bristol.ac.uk;bristol.ac.uk;bristol.ac.uk", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Bristol", "aff_unique_dep": "", "aff_unique_url": "https://www.bristol.ac.uk", "aff_unique_abbr": "Bristol", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "YOLOv10: Real-Time End-to-End Object Detection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93301", "id": "tz83Nyb71l", "proceeding": "", "pdf": "https://openreview.net/pdf?id=tz83Nyb71l", "openreview": "https://openreview.net/forum?id=tz83Nyb71l", "poster": "/media/PosterPDFs/NeurIPS%202024/93301.png?t=1729388892.5893633", "project": "", "author_site": "Ao Wang, Hui Chen, Lihao Liu, Kai CHEN, Zijia Lin, Jungong Han, guiguang ding", "tldr": "", "abstract": "Over the past years, YOLOs have emerged as the predominant paradigm in the field of real-time object detection owing to their effective balance between computational cost and detection performance. Researchers have explored the architectural designs, optimization objectives, data augmentation strategies, and others for YOLOs, achieving notable progress. However, the reliance on the non-maximum suppression (NMS) for post-processing hampers the end-to-end deployment of YOLOs and adversely impacts the inference latency. Besides, the design of various components in YOLOs lacks the comprehensive and thorough inspection, resulting in noticeable computational redundancy and limiting the model's capability. It renders the suboptimal efficiency, along with considerable potential for performance improvements. In this work, we aim to further advance the performance-efficiency boundary of YOLOs from both the post-processing and the model architecture. To this end, we first present the consistent dual assignments for NMS-free training of YOLOs, which brings the competitive performance and low inference latency simultaneously. Moreover, we introduce the holistic efficiency-accuracy driven model design strategy for YOLOs. We comprehensively optimize various components of YOLOs from both the efficiency and accuracy perspectives, which greatly reduces the computational overhead and enhances the capability. The outcome of our effort is a new generation of YOLO series for real-time end-to-end object detection, dubbed YOLOv10. Extensive experiments show that YOLOv10 achieves the state-of-the-art performance and efficiency across various model scales. For example, our YOLOv10-S is 1.8$\\times$ faster than RT-DETR-R18 under the similar AP on COCO, meanwhile enjoying 2.8$\\times$ smaller number of parameters and FLOPs. Compared with YOLOv9-C, YOLOv10-B has 46\\% less latency and 25\\% fewer parameters for the same performance. Code and models are available at https://github.com/THU-MIG/yolov10.", "keywords": "YOLO;object detection;computer vision", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Ao Wang;Hui Chen;Lihao Liu;Kai CHEN;Zijia Lin;Jungong Han;Guiguang Ding", "authorids": "~Ao_Wang2;~Hui_Chen7;~Lihao_Liu2;~Kai_CHEN17;~Zijia_Lin1;~Jungong_Han1;~Guiguang_Ding1", "gender": ";M;M;;M;M;M", "homepage": "https://github.com/jameslahm;https://huichen24.github.io/;https://herrlsm.github.io/;;https://sites.google.com/site/linzijia72/;https://jungonghan.github.io/;http://ise.thss.tsinghua.edu.cn/MIG/dgg.html", "dblp": ";;;;78/9911;98/6127;51/740", "google_scholar": "zRMV_M8AAAAJ;erpvWcIAAAAJ;;;ghUYrHkAAAAJ;hNi1gxAAAAAJ;https://scholar.google.com.tw/citations?user=B7F3yt4AAAAJ", "orcid": ";0000-0003-4180-5801;0009-0000-9491-2198;;0000-0002-1390-7424;0000-0003-4361-956X;0000-0003-0137-9975", "linkedin": ";;;;;;", "or_profile": "~Ao_Wang2;~Hui_Chen7;~Lihao_Liu2;~Kai_CHEN17;~Zijia_Lin1;~Jungong_Han1;~Guiguang_Ding1", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;;Kuaishou Technology;University of Sheffield;Tsinghua University", "aff_domain": "tsinghua.edu.cn;mail.tsinghua.edu.cn;tsinghua.edu.cn;;kuaishou.com;sheffield.ac.uk;tsinghua.edu.cn", "position": "PhD student;Researcher;MS student;;NLP expert;Full Professor;Full Professor", "bibtex": "@inproceedings{\nwang2024yolov,\ntitle={{YOLO}v10: Real-Time End-to-End Object Detection},\nauthor={Ao Wang and Hui Chen and Lihao Liu and Kai CHEN and Zijia Lin and Jungong Han and Guiguang Ding},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=tz83Nyb71l}\n}", "github": "", "reviewers": "qWaL;nrqj;vpxY;snfm", "pdf_size": 1213746, "rating": "5;5;6;7", "confidence": "5;4;4;5", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "2;3;3;3", "wc_summary": "61;67;52;49", "wc_strengths": "108;61;70;54", "wc_weaknesses": "183;161;92;62", "wc_questions": "8;68;58;35", "wc_limitations": "2;6;8;8", "wc_review": "362;363;280;208", "wc_reply_reviewers": "0;41;17;20", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 57.25, 7.1545440106270926 ], "wc_strengths_avg": [ 73.25, 20.849160654568326 ], "wc_weaknesses_avg": [ 124.5, 49.28742233065146 ], "wc_questions_avg": [ 42.25, 23.112496619794236 ], "wc_limitations_avg": [ 6.0, 2.449489742783178 ], "wc_review_avg": [ 303.25, 64.48788645939639 ], "wc_reply_reviewers_avg": [ 19.5, 14.568802284333465 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 1650, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17637919853899263193&as_sdt=5,40&sciodt=0,40&hl=en", "gs_version_total": 6, "email": "tsinghua.edu.cn;mail.tsinghua.edu.cn;tsinghua.edu.cn;;kuaishou.com;sheffield.ac.uk;tsinghua.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;1;2;0", "aff_unique_norm": "Tsinghua University;Kuaishou Technology;University of Sheffield", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.kuaishou.com;https://www.sheffield.ac.uk", "aff_unique_abbr": "THU;Kuaishou;Sheffield", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;0", "aff_country_unique": "China;United Kingdom" }, { "title": "RAMP: Boosting Adversarial Robustness Against Multiple $l_p$ Perturbations for Universal Robustness", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93300", "id": "u1Z3HWz4VJ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=u1Z3HWz4VJ", "openreview": "https://openreview.net/forum?id=u1Z3HWz4VJ", "poster": "/media/PosterPDFs/NeurIPS%202024/93300.png?t=1733257077.5979161", "project": "", "author_site": "Enyi Jiang, Gagandeep Singh", "tldr": "", "abstract": "Most existing works focus on improving robustness against adversarial attacks bounded by a single $l_p$ norm using adversarial training (AT). However, these AT models' multiple-norm robustness (union accuracy) is still low, which is crucial since in the real-world an adversary is not necessarily bounded by a single norm. The tradeoffs among robustness against multiple $l_p$ perturbations and accuracy/robustness make obtaining good union and clean accuracy challenging. We design a logit pairing loss to improve the union accuracy by analyzing the tradeoffs from the lens of distribution shifts. We connect natural training (NT) with AT via gradient projection, to incorporate useful information from NT into AT, where we empirically and theoretically show it moderates the accuracy/robustness tradeoff. We propose a novel training framework \\textbf{RAMP}, to boost the robustness against multiple $l_p$ perturbations. \\textbf{RAMP} can be easily adapted for robust fine-tuning and full AT. For robust fine-tuning, \\textbf{RAMP} obtains a union accuracy up to $53.3\\%$ on CIFAR-10, and $29.1\\%$ on ImageNet. For training from scratch, \\textbf{RAMP} achieves a union accuracy of $44.6\\%$ and good clean accuracy of $81.2\\%$ on ResNet-18 against AutoAttack on CIFAR-10. Beyond multi-norm robustness \\textbf{RAMP}-trained models achieve superior \\textit{universal robustness}, effectively generalizing against a range of unseen adversaries and natural corruptions.", "keywords": "Adversarial Robustness;Pre-training and Fine-tuning;Distribution Shifts", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/60efc40c9debd534b538669d46cd9a02162c541d.zip", "author": "Enyi Jiang;Gagandeep Singh", "authorids": "~Enyi_Jiang1;~Gagandeep_Singh1", "gender": "F;M", "homepage": "https://enyijiang.github.io/;https://ggndpsngh.github.io/", "dblp": "268/2122;64/3747-1", "google_scholar": "h6YC9nwAAAAJ;https://scholar.google.ch/citations?user=m4b2ruEAAAAJ", "orcid": ";0000-0002-9299-2961", "linkedin": "enyi-jiang-16a561171/;gagandeep-singh-1bb01b49/", "or_profile": "~Enyi_Jiang1;~Gagandeep_Singh1", "aff": "University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign", "aff_domain": "uiuc.edu;illinois.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\njiang2024ramp,\ntitle={{RAMP}: Boosting Adversarial Robustness Against Multiple \\$l\\_p\\$ Perturbations for Universal Robustness},\nauthor={Enyi Jiang and Gagandeep Singh},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=u1Z3HWz4VJ}\n}", "github": "", "reviewers": "3Hj9;6TV5;Qhq5;jXrr", "pdf_size": 3958209, "rating": "3;5;6;7", "confidence": "4;3;3;4", "soundness": "2;2;3;3", "novelty": "2;2;2;3", "presentation": "3;3;2;2", "wc_summary": "49;23;188;61", "wc_strengths": "30;21;136;67", "wc_weaknesses": "211;57;324;79", "wc_questions": "107;2;264;2", "wc_limitations": "15;22;128;10", "wc_review": "412;125;1040;219", "wc_reply_reviewers": "153;326;399;0", "wc_reply_authors": "228;832;470;57", "reply_reviewers": "1;3;3;0", "reply_authors": "3;4;5;2", "rating_avg": [ 5.25, 1.479019945774904 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 80.25, 63.707829189197774 ], "wc_strengths_avg": [ 63.5, 45.26864256855953 ], "wc_weaknesses_avg": [ 167.75, 107.73433760876799 ], "wc_questions_avg": [ 93.75, 107.23426457993732 ], "wc_limitations_avg": [ 43.75, 48.828142499996865 ], "wc_review_avg": [ 449.0, 356.55504483880185 ], "wc_reply_reviewers_avg": [ 219.5, 155.05241049400038 ], "wc_reply_authors_avg": [ 396.75, 290.9960266051755 ], "reply_reviewers_avg": [ 1.75, 1.299038105676658 ], "reply_authors_avg": [ 3.5, 1.118033988749895 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.16903085094570333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:JTgBNLZJZ40J:scholar.google.com/&scioq=RAMP:+Boosting+Adversarial+Robustness+Against+Multiple+%24l_p%24+Perturbations+for+Universal+Robustness&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "uiuc.edu;illinois.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Illinois Urbana-Champaign", "aff_unique_dep": "", "aff_unique_url": "https://illinois.edu", "aff_unique_abbr": "UIUC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Urbana-Champaign", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "u1b1dJtyxc", "title": "What Are Large Language Models Mapping to in the Brain? A Case Against Over-Reliance on Brain Scores", "track": "main", "status": "Reject", "tldr": "", "abstract": "Given the remarkable capabilities of large language models (LLMs), there has been a growing interest in evaluating their similarity to the human brain. One approach towards quantifying this similarity is by measuring how well a model predicts neural signals, also called \"brain score\". Internal representations from LLMs achieve state-of-the-art brain scores, leading to speculation that they share computational principles with human language processing. This inference is only valid if the subset of neural activity predicted by LLMs reflects core elements of language processing. Here, we question this assumption by analyzing three neural datasets used in an impactful study on LLM-to-brain mappings, with a particular focus on an fMRI dataset where participants read short passages. We first find that when using shuffled train-test splits, as done in previous studies with these datasets, a trivial feature that encodes temporal autocorrelation not only outperforms LLMs but also accounts for the majority of neural variance that LLMs explain. We therefore caution against shuffled train-test splits, and use contiguous test splits moving forward. Second, we explain the surprising result that untrained LLMs have higher-than-expected brain scores by showing they do not account for additional neural variance beyond two simple features: sentence length and sentence position. This undermines evidence used to claim that the transformer architecture biases computations to be more brain-like. Third, we find that brain scores of trained LLMs on this dataset can largely be explained by sentence position, sentence length, and static word vectors; a small, additional amount is explained by sense-specific word embeddings and contextual representations of sentence structure. We conclude that over-reliance on brain scores can lead to over-interpretation of similarity between LLMs and brains, and emphasize the importance of deconstructing what LLMs are mapping to in neural signals.", "keywords": "Large language models;Neuroscience;Neural encoding;fMRI;Replication;Language", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "/attachment/9d63e01dc497e717ca5218ed624542402cb01d45.zip", "author": "Ebrahim Feghhi;Nima Ryan Hadidi;Bryan Song;Idan Asher Blank;Jonathan Kao", "authorids": "~Ebrahim_Feghhi1;~Nima_Ryan_Hadidi1;~Bryan_Song1;~Idan_Asher_Blank1;~Jonathan_Kao1", "gender": "M;M;M;M;", "homepage": "https://ebrahimfeghhi.github.io/;;;;http://seas.ucla.edu/~kao", "dblp": "251/6193;;;;145/1310", "google_scholar": ";;;hKasFiEAAAAJ;", "orcid": ";0009-0000-9521-2185;;;0000-0002-9298-0143", "linkedin": ";;hcsong/;;", "or_profile": "~Ebrahim_Feghhi1;~Nima_Ryan_Hadidi1;~Bryan_Song1;~Idan_Asher_Blank1;~Jonathan_Kao1", "aff": "University of California, Los Angeles;University of California, Los Angeles;University of California, Los Angeles;University of California, Los Angeles;University of California, Los Angeles", "aff_domain": "ucla.edu;ucla.edu;ucla.edu;ucla.edu;ucla.edu", "position": "PhD student;PhD student;Undergrad student;Assistant Professor;Associate Professor", "bibtex": "@misc{\nanonymous2024what,\ntitle={What Are Large Language Models Mapping to in the Brain? A Case Against Over-Reliance on Brain Scores},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=u1b1dJtyxc}\n}", "github": "", "project": "", "reviewers": "UQa9;um4X;VpMi;bjuX;5zfv", "site": "https://openreview.net/forum?id=u1b1dJtyxc", "pdf_size": 3467626, "rating": "3;4;5;7;7", "confidence": "4;4;5;3;2", "soundness": "2;2;3;3;3", "novelty": "2;2;2;3;3", "presentation": "2;2;3;2;2", "wc_summary": "171;96;116;89;225", "wc_strengths": "92;60;94;45;60", "wc_weaknesses": "214;108;319;524;83", "wc_questions": "179;75;266;52;24", "wc_limitations": "22;1;7;11;5", "wc_review": "678;340;802;721;397", "wc_reply_reviewers": "914;0;483;159;5", "wc_reply_authors": "2652;43;1441;339;0", "reply_reviewers": "3;0;2;1;1", "reply_authors": "6;2;3;3;1", "rating_avg": [ 5.2, 1.6 ], "confidence_avg": [ 3.6, 1.019803902718557 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.2, 0.39999999999999997 ], "wc_summary_avg": [ 139.4, 51.56975858000501 ], "wc_strengths_avg": [ 70.2, 19.415457759218555 ], "wc_weaknesses_avg": [ 249.6, 160.73904317246632 ], "wc_questions_avg": [ 119.2, 90.17627182357896 ], "wc_limitations_avg": [ 9.2, 7.166589146867568 ], "wc_review_avg": [ 587.6, 184.15710684087108 ], "wc_reply_reviewers_avg": [ 312.2, 348.3236426084224 ], "wc_reply_authors_avg": [ 895.0, 1021.8835550100608 ], "reply_reviewers_avg": [ 1.4, 1.019803902718557 ], "reply_authors_avg": [ 3.0, 1.6733200530681511 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.6864064729836441, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18387895395897690674&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of California, Los Angeles", "aff_unique_dep": "", "aff_unique_url": "https://www.ucla.edu", "aff_unique_abbr": "UCLA", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "DRACO: A Denoising-Reconstruction Autoencoder for Cryo-EM", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93299", "id": "u1mNGLYN74", "proceeding": "", "pdf": "https://openreview.net/pdf?id=u1mNGLYN74", "openreview": "https://openreview.net/forum?id=u1mNGLYN74", "poster": "/media/PosterPDFs/NeurIPS%202024/93299.png?t=1730795791.842183", "project": "", "author_site": "YingJun Shen, Haizhao Dai, Qihe Chen, Yan Zeng, Jiakai Zhang, Yuan Pei, Jingyi Yu", "tldr": "", "abstract": "Foundation models in computer vision have demonstrated exceptional performance in zero-shot and few-shot tasks by extracting multi-purpose features from large-scale datasets through self-supervised pre-training methods. However, these models often overlook the severe corruption in cryogenic electron microscopy (cryo-EM) images by high-level noises. We introduce DRACO, a Denoising-Reconstruction Autoencoder for CryO-EM, inspired by the Noise2Noise (N2N) approach. By processing cryo-EM movies into odd and even images and treating them as independent noisy observations, we apply a denoising-reconstruction hybrid training scheme. We mask both images to create denoising and reconstruction tasks. For DRACO's pre-training, the quality of the dataset is essential, we hence build a high-quality, diverse dataset from an uncurated public database, including over 270,000 movies or micrographs. After pre-training, DRACO naturally serves as a generalizable cryo-EM image denoiser and a foundation model for various cryo-EM downstream tasks. DRACO demonstrates the best performance in denoising, micrograph curation, and particle picking tasks compared to state-of-the-art baselines.", "keywords": "masked image modeling;foundation model;cryo-EM;denoising autoencoder", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "YingJun Shen;Haizhao Dai;Qihe Chen;Yan Zeng;Jiakai Zhang;Yuan Pei;Jingyi Yu", "authorids": "~YingJun_Shen1;~Haizhao_Dai1;~Qihe_Chen2;~Yan_Zeng3;~Jiakai_Zhang3;~Yuan_Pei2;~Jingyi_Yu5", "gender": "M;M;M;M;M;;M", "homepage": ";;https://github.com/Dylan8527/Dylan8527.github.io;https://zerone182.github.io;https://jiakai-zhang.github.io;;", "dblp": ";313/9109.html;;;179/2299;;", "google_scholar": ";;;sR3Nf7YAAAAJ;https://scholar.google.com/citations?hl=en;;R9L_AfQAAAAJ", "orcid": "0009-0002-6928-9132;;;;;0000-0003-4065-2540;", "linkedin": ";;;;jiakai-zhang-38b8b4217/;;", "or_profile": "~YingJun_Shen1;~Haizhao_Dai1;~Qihe_Chen2;~Yan_Zeng3;~Jiakai_Zhang3;~Yuan_Pei2;~Jingyi_Yu5", "aff": "ShanghaiTech University;ShanghaiTech University;ShanghaiTech University;ShanghaiTech University;ShanghaiTech University;ShanghaiTech University;ShanghaiTech University", "aff_domain": "shanghaitech.edu.cn;shanghaitech.edu.cn;shanghaitech.edu.cn;shanghaitech.edu.cn;shanghaitech.edu.cn;shanghaitech.edu.cn;shanghaitech.edu.cn", "position": "MS student;MS student;Undergrad student;Undergrad student;PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\nshen2024draco,\ntitle={{DRACO}: A Denoising-Reconstruction Autoencoder for Cryo-{EM}},\nauthor={YingJun Shen and Haizhao Dai and Qihe Chen and Yan Zeng and Jiakai Zhang and Yuan Pei and Jingyi Yu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=u1mNGLYN74}\n}", "github": "", "reviewers": "b2KB;AE1Z;ypYy;RjVX", "pdf_size": 35742911, "rating": "4;6;7;7", "confidence": "2;5;4;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;2;3", "wc_summary": "148;87;97;54", "wc_strengths": "55;83;70;63", "wc_weaknesses": "112;340;146;108", "wc_questions": "145;90;11;53", "wc_limitations": "12;13;20;3", "wc_review": "472;613;344;281", "wc_reply_reviewers": "0;17;27;23", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 96.5, 33.7231374578345 ], "wc_strengths_avg": [ 67.75, 10.280442597476044 ], "wc_weaknesses_avg": [ 176.5, 95.54449225360926 ], "wc_questions_avg": [ 74.75, 49.25634476897367 ], "wc_limitations_avg": [ 12.0, 6.041522986797286 ], "wc_review_avg": [ 427.5, 127.30377056474015 ], "wc_reply_reviewers_avg": [ 16.75, 10.304731922762475 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.7492686492653551, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14429327408008497604&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "shanghaitech.edu.cn;shanghaitech.edu.cn;shanghaitech.edu.cn;shanghaitech.edu.cn;shanghaitech.edu.cn;shanghaitech.edu.cn;shanghaitech.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "ShanghaiTech University", "aff_unique_dep": "", "aff_unique_url": "https://www.shanghaitech.edu.cn", "aff_unique_abbr": "ShanghaiTech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Transformation-Invariant Learning and Theoretical Guarantees for OOD Generalization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93298", "id": "u2gzfXRLaN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=u2gzfXRLaN", "openreview": "https://openreview.net/forum?id=u2gzfXRLaN", "poster": "", "project": "", "author_site": "Omar Montasser, Han Shao, Emmanuel Abbe", "tldr": "", "abstract": "Learning with identical train and test distributions has been extensively investigated both practically and theoretically. Much remains to be understood, however, in statistical learning under distribution shifts. This paper focuses on a distribution shift setting where train and test distributions can be related by classes of (data) transformation maps. We initiate a theoretical study for this framework, investigating learning scenarios where the target class of transformations is either known or unknown. We establish learning rules and algorithmic reductions to Empirical Risk Minimization (ERM), accompanied with learning guarantees. We obtain upper bounds on the sample complexity in terms of the VC dimension of the class composing predictors with transformations, which we show in many cases is not much larger than the VC dimension of the class of predictors. We highlight that the learning rules we derive offer a game-theoretic viewpoint on distribution shift: a learner searching for predictors and an adversary searching for transformation maps to respectively minimize and maximize the worst-case loss.", "keywords": "pac learning guarantees;theory for distribution shifts;sample complexity;ood generalization;vc dimension", "primary_area": "learning_theory", "supplementary_material": "", "author": "Omar Montasser;Han Shao;Emmanuel Abbe", "authorids": "~Omar_Montasser1;~Han_Shao4;~Emmanuel_Abbe1", "gender": "M;F;", "homepage": "https://ttic.uchicago.edu/~omar/;https://sites.google.com/view/hanshao/;", "dblp": "194/3002;;84/5016", "google_scholar": "u455rGAAAAAJ;https://scholar.google.com/citations?hl=en;", "orcid": ";0009-0005-9206-1357;", "linkedin": ";;", "or_profile": "~Omar_Montasser1;~Han_Shao4;~Emmanuel_Abbe1", "aff": "University of California, Berkeley;Toyota Technological Institute at Chicago;Swiss Federal Institute of Technology Lausanne", "aff_domain": "berkeley.edu;ttic.edu;epfl.ch", "position": "Postdoc;PhD student;Full Professor", "bibtex": "@inproceedings{\nmontasser2024transformationinvariant,\ntitle={Transformation-Invariant Learning and Theoretical Guarantees for {OOD} Generalization},\nauthor={Omar Montasser and Han Shao and Emmanuel Abbe},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=u2gzfXRLaN}\n}", "github": "", "reviewers": "RtrA;JZtQ;Whvi;fcsB;fSmB", "pdf_size": 531025, "rating": "4;4;4;6;7", "confidence": "3;2;3;4;4", "soundness": "2;3;2;4;4", "novelty": "2;2;3;2;3", "presentation": "2;2;3;4;4", "wc_summary": "117;148;55;680;543", "wc_strengths": "54;108;7;24;143", "wc_weaknesses": "306;144;151;75;278", "wc_questions": "72;26;6;2;2", "wc_limitations": "1;9;1;25;1", "wc_review": "550;435;220;806;967", "wc_reply_reviewers": "347;23;0;35;53", "wc_reply_authors": "387;0;0;0;34", "reply_reviewers": "2;1;0;1;1", "reply_authors": "2;1;1;1;2", "rating_avg": [ 5.0, 1.2649110640673518 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.8944271909999159 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.8944271909999159 ], "wc_summary_avg": [ 308.6, 252.86249227594035 ], "wc_strengths_avg": [ 67.2, 51.136679594983484 ], "wc_weaknesses_avg": [ 190.8, 87.24540102492509 ], "wc_questions_avg": [ 21.6, 26.725268941584105 ], "wc_limitations_avg": [ 7.4, 9.329523031752482 ], "wc_review_avg": [ 595.6, 265.0106412957789 ], "wc_reply_reviewers_avg": [ 91.6, 128.85588849563686 ], "wc_reply_authors_avg": [ 84.2, 151.97157628977862 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8451542547285165, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4467949542812635075&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "berkeley.edu;ttic.edu;epfl.ch", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of California, Berkeley;Toyota Technological Institute at Chicago;Swiss Federal Institute of Technology Lausanne", "aff_unique_dep": ";;", "aff_unique_url": "https://www.berkeley.edu;https://www.tti-chicago.org;https://www.epfl.ch", "aff_unique_abbr": "UC Berkeley;TTI Chicago;EPFL", "aff_campus_unique_index": "0;1;2", "aff_campus_unique": "Berkeley;Chicago;Lausanne", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;Switzerland" }, { "title": "Lower Bounds of Uniform Stability in Gradient-Based Bilevel Algorithms for Hyperparameter Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93297", "id": "u3mZzd0Pdx", "proceeding": "", "pdf": "https://openreview.net/pdf?id=u3mZzd0Pdx", "openreview": "https://openreview.net/forum?id=u3mZzd0Pdx", "poster": "/media/PosterPDFs/NeurIPS%202024/93297.png?t=1731675618.7054188", "project": "", "author_site": "Rongzhen Wang, Chenyu Zheng, Guoqiang Wu, Xu Min, Xiaolu Zhang, Jun Zhou, Chongxuan LI", "tldr": "", "abstract": "Gradient-based bilevel programming leverages unrolling differentiation (UD) or implicit function theorem (IFT) to solve hyperparameter optimization (HO) problems, and is proven effective and scalable in practice. \nTo understand their generalization behavior, existing works establish upper bounds on the uniform stability of these algorithms, while their tightness is still unclear. \nTo this end, this paper attempts to establish stability lower bounds for UD-based and IFT-based algorithms. \nA central technical challenge arises from the dependency of each outer-level update on the concurrent stage of inner optimization in bilevel programming. \nTo address this problem, we introduce lower-bounded expansion properties to characterize the instability in update rules which can serve as general tools for lower-bound analysis. \nThese properties guarantee the hyperparameter divergence at the outer level and the Lipschitz constant of inner output at the inner level in the context of HO.\nGuided by these insights, we construct a quadratic example that yields tight lower bounds for the UD-based algorithm and meaningful bounds for a representative IFT-based algorithm.\nOur tight result indicates that uniform stability has reached its limit in stability analysis for the UD-based algorithm.", "keywords": "Uniform stability;Lower bound;Hyperparameter optimization;Bilevel programming", "primary_area": "learning_theory", "supplementary_material": "/attachment/92295d0fa594686522770859ea8236a14fbe0393.zip", "author": "Rongzhen Wang;Chenyu Zheng;Guoqiang Wu;Xu Min;Xiaolu Zhang;JUN ZHOU;Chongxuan Li", "authorids": "~Rongzhen_Wang1;~Chenyu_Zheng1;~Guoqiang_Wu2;~Xu_Min1;~Xiaolu_Zhang2;~JUN_ZHOU6;~Chongxuan_Li1", "gender": "F;M;M;M;F;M;M", "homepage": "https://github.com/rongzhenwang;https://chen-yu-zheng.github.io;https://guoqiangwoodrowwu.github.io/;https://minxueric.github.io/;https://scholar.google.com/citations?user=cAz9PToAAAAJ;https://scholar.google.com/citations?user=mCVvloEAAAAJ&hl=en;http://ml.cs.tsinghua.edu.cn/~chongxuan", "dblp": "245/1816;133/5078;98/4857;08/2810;48/5176;99/3847-11;161/9965", "google_scholar": ";QDfsVgYAAAAJ;KCTX-_0AAAAJ;xuYp0_sAAAAJ;;mCVvloEAAAAJ;UKMcQn4AAAAJ", "orcid": ";;0000-0003-4486-7944;;0000-0001-8055-0245;0000-0001-6033-6102;0000-0002-0912-9076", "linkedin": ";;;;;;", "or_profile": "~Rongzhen_Wang1;~Chenyu_Zheng1;~Guoqiang_Wu2;~Xu_Min1;~Xiaolu_Zhang2;~JUN_ZHOU6;~Chongxuan_Li1", "aff": "Renmin University of China;Renmin University of China;Shandong University;;Ant Group;Ant Group;Renmin University of China", "aff_domain": "ruc.edu.cn;ruc.edu.cn;sdu.edu.cn;;antfin.com;antgroup.com;ruc.edu.cn", "position": "PhD student;PhD student;Associate Professor;;Researcher;Researcher;Associate Professor", "bibtex": "@inproceedings{\nwang2024lower,\ntitle={Lower Bounds of Uniform Stability in Gradient-Based Bilevel Algorithms for Hyperparameter Optimization},\nauthor={Rongzhen Wang and Chenyu Zheng and Guoqiang Wu and Xu Min and Xiaolu Zhang and JUN ZHOU and Chongxuan Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=u3mZzd0Pdx}\n}", "github": "", "reviewers": "3C12;NBuR;bA9a", "pdf_size": 3262731, "rating": "5;5;8", "confidence": "3;3;2", "soundness": "2;3;4", "novelty": "2;2;4", "presentation": "2;3;4", "wc_summary": "18;36;43", "wc_strengths": "21;51;61", "wc_weaknesses": "109;109;40", "wc_questions": "108;25;14", "wc_limitations": "1;4;1", "wc_review": "257;225;159", "wc_reply_reviewers": "166;0;0", "wc_reply_authors": "322;0;0", "reply_reviewers": "2;0;0", "reply_authors": "4;1;1", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.6666666666666665, 0.9428090415820634 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 32.333333333333336, 10.530379332620877 ], "wc_strengths_avg": [ 44.333333333333336, 16.99673171197595 ], "wc_weaknesses_avg": [ 86.0, 32.526911934581186 ], "wc_questions_avg": [ 49.0, 41.960298696108765 ], "wc_limitations_avg": [ 2.0, 1.4142135623730951 ], "wc_review_avg": [ 213.66666666666666, 40.80304999493161 ], "wc_reply_reviewers_avg": [ 55.333333333333336, 78.25315045131126 ], "wc_reply_authors_avg": [ 107.33333333333333, 151.7922556947122 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.9428090415820634 ], "reply_authors_avg": [ 2.0, 1.4142135623730951 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:4DYhy7ehhQcJ:scholar.google.com/&scioq=Lower+Bounds+of+Uniform+Stability+in+Gradient-Based+Bilevel+Algorithms+for+Hyperparameter+Optimization&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": "ruc.edu.cn;ruc.edu.cn;sdu.edu.cn;;antfin.com;antgroup.com;ruc.edu.cn", "author_num": 7, "aff_unique_index": "0;0;1;2;2;0", "aff_unique_norm": "Renmin University of China;Shandong University;Ant Group", "aff_unique_dep": ";;", "aff_unique_url": "http://www.ruc.edu.cn;http://www.sdu.edu.cn;https://www.antgroup.com", "aff_unique_abbr": "RUC;SDU;Ant Group", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Explaining Datasets in Words: Statistical Models with Natural Language Parameters", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93296", "id": "u5BkOgWWZW", "proceeding": "", "pdf": "https://openreview.net/pdf?id=u5BkOgWWZW", "openreview": "https://openreview.net/forum?id=u5BkOgWWZW", "poster": "", "project": "", "author_site": "Ruiqi Zhong, Heng Wang, Dan Klein, Jacob Steinhardt", "tldr": "", "abstract": "To make sense of massive data, we often first fit simplified models and then interpret the parameters; for example, we cluster the text embeddings and then interpret the mean parameters of each cluster.\nHowever, these parameters are often high-dimensional and hard to interpret.\nTo make model parameters directly interpretable, we introduce a family of statistical models---including clustering, time series, and classification models---parameterized by *natural language predicates*. \nFor example, a cluster of text about COVID could be parameterized by the predicate ``*discusses COVID*''.\nTo learn these statistical models effectively, we develop a model-agnostic algorithm that optimizes continuous relaxations of predicate parameters with gradient descent and discretizes them by prompting language models (LMs).\nFinally, we apply our framework to a wide range of problems: taxonomizing user chat dialogues, characterizing how they evolve across time, finding categories where one language model is better than the other, clustering math problems based on subareas, and explaining visual features in memorable images.\nOur framework is highly versatile, applicable to both textual and visual domains, can be easily steered to focus on specific properties (e.g. subareas), and explains sophisticated concepts that classical methods (e.g. n-gram analysis) struggle to produce.", "keywords": "language model; explainability; exploratory analysis; data science; explainable modeling", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Ruiqi Zhong;Heng Wang;Dan Klein;Jacob Steinhardt", "authorids": "~Ruiqi_Zhong1;~Heng_Wang10;~Dan_Klein1;~Jacob_Steinhardt1", "gender": "M;M;;", "homepage": "https://ruiqi-zhong.github.io;https://arthur-heng.github.io;http://people.eecs.berkeley.edu/~klein/;", "dblp": "222/3024;61/5618-8.html;;35/10625", "google_scholar": "GskOShAAAAAJ;SLyDEswAAAAJ;;", "orcid": ";;;", "linkedin": ";;dan-klein/;", "or_profile": "~Ruiqi_Zhong1;~Heng_Wang10;~Dan_Klein1;~Jacob_Steinhardt1", "aff": "University of California, Berkeley;Xi'an Jiaotong University;University of California, Berkeley;University of California, Berkeley", "aff_domain": "berkeley.edu;xjtu.edu.cn;berkeley.edu;berkeley.edu", "position": "PhD student;Undergrad student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhong2024explaining,\ntitle={Explaining Datasets in Words: Statistical Models with Natural Language Parameters},\nauthor={Ruiqi Zhong and Heng Wang and Dan Klein and Jacob Steinhardt},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=u5BkOgWWZW}\n}", "github": "", "reviewers": "ZMnS;krnw;fF31", "pdf_size": 2765808, "rating": "5;6;7", "confidence": "4;3;4", "soundness": "3;3;4", "novelty": "2;3;4", "presentation": "3;3;4", "wc_summary": "79;108;86", "wc_strengths": "63;40;171", "wc_weaknesses": "151;177;247", "wc_questions": "58;2;148", "wc_limitations": "7;1;7", "wc_review": "358;328;659", "wc_reply_reviewers": "0;23;39", "wc_reply_authors": "24;34;9", "reply_reviewers": "0;1;1", "reply_authors": "2;3;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 91.0, 12.355835328567093 ], "wc_strengths_avg": [ 91.33333333333333, 57.11003025349886 ], "wc_weaknesses_avg": [ 191.66666666666666, 40.54078878802872 ], "wc_questions_avg": [ 69.33333333333333, 60.14057606050086 ], "wc_limitations_avg": [ 5.0, 2.8284271247461903 ], "wc_review_avg": [ 448.3333333333333, 149.46645851903435 ], "wc_reply_reviewers_avg": [ 20.666666666666668, 16.006942938057293 ], "wc_reply_authors_avg": [ 22.333333333333332, 10.274023338281626 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7905090586338385809&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "berkeley.edu;xjtu.edu.cn;berkeley.edu;berkeley.edu", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of California, Berkeley;Xi'an Jiao Tong University", "aff_unique_dep": ";", "aff_unique_url": "https://www.berkeley.edu;https://www.xjtu.edu.cn", "aff_unique_abbr": "UC Berkeley;XJTU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;China" }, { "title": "Towards Estimating Bounds on the Effect of Policies under Unobserved Confounding", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93295", "id": "u5enPCwaLt", "proceeding": "", "pdf": "https://openreview.net/pdf?id=u5enPCwaLt", "openreview": "https://openreview.net/forum?id=u5enPCwaLt", "poster": "/media/PosterPDFs/NeurIPS%202024/93295.png?t=1733822373.3029425", "project": "", "author_site": "Alexis Bellot, Silvia Chiappa", "tldr": "", "abstract": "As many practical fields transition to provide personalized decisions, data is increasingly relevant to support the evaluation of candidate plans and policies (e.g., guidelines for the treatment of disease, government directives, etc.). In the machine learning literature, significant efforts have been put into developing machinery to predict the effectiveness of policies efficiently. The challenge is that, in practice, the effectiveness of a candidate policy is not always identifiable, i.e., not uniquely estimable from the combination of the available data and assumptions about the domain at hand (e.g., encoded in a causal graph). In this paper, we develop graphical characterizations and estimation tools to bound the effect of policies given a causal graph and observational data collected in non-identifiable settings. Specifically, our contributions are two-fold: (1) we derive analytical bounds for general probabilistic and conditional policies that are tighter than existing results, (2) we develop an estimation framework to estimate bounds from finite samples, applicable in higher-dimensional spaces and continuously-valued data. We further show that the resulting estimators have favourable statistical properties such as fast convergence and robustness to model misspecification.", "keywords": "causal inference;estimation;bounding;unobserved confounding", "primary_area": "causal_inference", "supplementary_material": "", "author": "Alexis Bellot;Silvia Chiappa", "authorids": "~Alexis_Bellot1;~Silvia_Chiappa1", "gender": "M;F", "homepage": ";https://csilviavr.github.io/", "dblp": "217/4339;", "google_scholar": ";https://scholar.google.co.uk/citations?user=GAvF3gUAAAAJ", "orcid": ";0000-0002-1882-6842", "linkedin": ";", "or_profile": "~Alexis_Bellot1;~Silvia_Chiappa1", "aff": "Google DeepMind;Google DeepMind", "aff_domain": "deepmind.com;google.com", "position": "Researcher;Researcher", "bibtex": "@inproceedings{\nbellot2024towards,\ntitle={Towards Estimating Bounds on the Effect of Policies under Unobserved Confounding},\nauthor={Alexis Bellot and Silvia Chiappa},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=u5enPCwaLt}\n}", "github": "", "reviewers": "nLc5;LyZM;uSXJ;BoZN", "pdf_size": 1209877, "rating": "4;5;7;7", "confidence": "4;2;3;4", "soundness": "2;2;3;3", "novelty": "2;2;3;3", "presentation": "2;2;3;4", "wc_summary": "25;83;45;84", "wc_strengths": "57;42;71;54", "wc_weaknesses": "4;197;189;125", "wc_questions": "19;71;3;1", "wc_limitations": "4;6;1;1", "wc_review": "109;399;309;265", "wc_reply_reviewers": "0;15;0;0", "wc_reply_authors": "40;51;39;46", "reply_reviewers": "0;1;0;0", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 59.25, 25.262373206015305 ], "wc_strengths_avg": [ 56.0, 10.319883720275147 ], "wc_weaknesses_avg": [ 128.75, 77.24109981091672 ], "wc_questions_avg": [ 23.5, 28.297526393662043 ], "wc_limitations_avg": [ 3.0, 2.1213203435596424 ], "wc_review_avg": [ 270.5, 105.00833300267175 ], "wc_reply_reviewers_avg": [ 3.75, 6.49519052838329 ], "wc_reply_authors_avg": [ 44.0, 4.847679857416329 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.058025885318565944, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16116800305815465436&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": "deepmind.com;google.com", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google DeepMind", "aff_unique_url": "https://deepmind.com", "aff_unique_abbr": "DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "title": "Leveraging Contrastive Learning for Enhanced Node Representations in Tokenized Graph Transformers", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93294", "id": "u6FuiKzT1K", "proceeding": "", "pdf": "https://openreview.net/pdf?id=u6FuiKzT1K", "openreview": "https://openreview.net/forum?id=u6FuiKzT1K", "poster": "/media/PosterPDFs/NeurIPS%202024/93294.png?t=1730715942.773582", "project": "", "author_site": "Jinsong Chen, Hanpeng Liu, John Hopcroft, Kun He", "tldr": "", "abstract": "While tokenized graph Transformers have demonstrated strong performance in node classification tasks, their reliance on a limited subset of nodes with high similarity scores for constructing token sequences overlooks valuable information from other nodes, hindering their ability to fully harness graph information for learning optimal node representations. To address this limitation, we propose a novel graph Transformer called GCFormer. Unlike previous approaches, GCFormer develops a hybrid token generator to create two types of token sequences, positive and negative, to capture diverse graph information. And a tailored Transformer-based backbone is adopted to learn meaningful node representations from these generated token sequences. Additionally, GCFormer introduces contrastive learning to extract valuable information from both positive and negative token sequences, enhancing the quality of learned node representations. Extensive experimental results across various datasets, including homophily and heterophily graphs, demonstrate the superiority of GCFormer in node classification, when compared to representative graph neural networks (GNNs) and graph Transformers.", "keywords": "Node classification;Graph Transformer;Positive Token Sequence;Negative Token Sequence;Contrastive Learning", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Jinsong Chen;Hanpeng Liu;John E. Hopcroft;Kun He", "authorids": "~Jinsong_Chen2;~Hanpeng_Liu2;~John_E._Hopcroft1;~Kun_He1", "gender": "M;M;M;F", "homepage": "https://guangnianchenai.github.io/;http://showstarpro.github.io;http://www.cs.cornell.edu/jeh/;http://faculty.hust.edu.cn/hekun/zh_CN/more/1411001/jsjjgd/index.htm", "dblp": "14/7450-2;;h/JohnEHopcroft;59/1028-1", "google_scholar": "F470g5wAAAAJ;5c3zymMAAAAJ;4Z6vo5QAAAAJ;YTQnGJsAAAAJ", "orcid": "0000-0001-7588-6713;;0000-0001-8681-6075;0000-0001-7627-4604", "linkedin": ";;;", "or_profile": "~Jinsong_Chen2;~Hanpeng_Liu2;~John_E._Hopcroft1;~Kun_He1", "aff": "Huazhong University of Science and Technology;Huazhong University of Science and Technology;Department of Computer Science, Cornell University;Huazhong University of Sceince and Technology", "aff_domain": "hust.edu.cn;hust.edu.cn;cs.cornell.edu;hust.edu.cn", "position": "PhD student;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nchen2024leveraging,\ntitle={Leveraging Contrastive Learning for Enhanced Node Representations in Tokenized Graph Transformers},\nauthor={Jinsong Chen and Hanpeng Liu and John E. Hopcroft and Kun He},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=u6FuiKzT1K}\n}", "github": "", "reviewers": "rUu7;pga6;9KjY", "pdf_size": 929181, "rating": "5;6;8", "confidence": "4;4;4", "soundness": "2;2;3", "novelty": "2;3;3", "presentation": "3;2;3", "wc_summary": "71;150;76", "wc_strengths": "33;90;27", "wc_weaknesses": "56;568;13", "wc_questions": "60;87;121", "wc_limitations": "1;8;10", "wc_review": "221;903;247", "wc_reply_reviewers": "31;451;62", "wc_reply_authors": "29;444;25", "reply_reviewers": "1;2;1", "reply_authors": "2;3;2", "rating_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 99.0, 36.12016980395672 ], "wc_strengths_avg": [ 50.0, 28.39013913315678 ], "wc_weaknesses_avg": [ 212.33333333333334, 252.1062386287883 ], "wc_questions_avg": [ 89.33333333333333, 24.957742063113177 ], "wc_limitations_avg": [ 6.333333333333333, 3.8586123009300755 ], "wc_review_avg": [ 457.0, 315.54820022726585 ], "wc_reply_reviewers_avg": [ 181.33333333333334, 191.10264839143966 ], "wc_reply_authors_avg": [ 166.0, 196.58246785170508 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11523831796216837998&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "hust.edu.cn;hust.edu.cn;cs.cornell.edu;hust.edu.cn", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Huazhong University of Science and Technology;Cornell University", "aff_unique_dep": ";Department of Computer Science", "aff_unique_url": "http://www.hust.edu.cn;https://www.cornell.edu", "aff_unique_abbr": "HUST;Cornell", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "China;United States" }, { "title": "Online Convex Optimisation: The Optimal Switching Regret for all Segmentations Simultaneously", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93293", "id": "u6XxyuD3Ro", "proceeding": "", "pdf": "https://openreview.net/pdf?id=u6XxyuD3Ro", "openreview": "https://openreview.net/forum?id=u6XxyuD3Ro", "poster": "", "project": "", "author_site": "Stephen Pasteris, Chris Hicks, Vasilios Mavroudis, Mark Herbster", "tldr": "", "abstract": "We consider the classic problem of online convex optimisation. Whereas the notion of static regret is relevant for stationary problems, the notion of switching regret is more appropriate for non-stationary problems. A switching regret is defined relative to any segmentation of the trial sequence, and is equal to the sum of the static regrets of each segment. In this paper we show that, perhaps surprisingly, we can achieve the asymptotically optimal switching regret on every possible segmentation simultaneously. Our algorithm for doing so is very efficient: having a space and per-trial time complexity that is logarithmic in the time-horizon. Our algorithm also obtains novel bounds on its dynamic regret: being adaptive to variations in the rate of change of the comparator sequence.", "keywords": "Online Convex Optimisation;Non-stationary Learning", "primary_area": "online_learning", "supplementary_material": "", "author": "Stephen Pasteris;Chris Hicks;Vasilios Mavroudis;Mark Herbster", "authorids": "~Stephen_Pasteris1;~Chris_Hicks1;~Vasilios_Mavroudis1;~Mark_Herbster1", "gender": "M;Non-Binary;;M", "homepage": ";https://chrishicks.io;;http://www0.cs.ucl.ac.uk/staff/M.Herbster/", "dblp": "126/1728;220/3716;;76/6979", "google_scholar": ";IMw5HFkAAAAJ;;https://scholar.google.com.tw/citations?user=MBwWHlgAAAAJ", "orcid": ";;;", "linkedin": ";christopher-r-hicks/;;", "or_profile": "~Stephen_Pasteris1;~Chris_Hicks1;~Vasilios_Mavroudis1;~Mark_Herbster1", "aff": "Alan Turing Institute;Alan Turing Institute;;University College London", "aff_domain": "turing.ac.uk;turing.ac.uk;;ucl.edu", "position": "Senior Reasearch Associate;Principal Researcher;;Professor", "bibtex": "@inproceedings{\npasteris2024online,\ntitle={Online Convex Optimisation: The Optimal Switching Regret for all Segmentations Simultaneously},\nauthor={Stephen Pasteris and Chris Hicks and Vasilios Mavroudis and Mark Herbster},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=u6XxyuD3Ro}\n}", "github": "", "reviewers": "KotF;HMtq;nGaq;YiA9", "pdf_size": 343607, "rating": "6;6;7;7", "confidence": "3;4;4;3", "soundness": "3;3;2;2", "novelty": "3;3;2;4", "presentation": "1;3;1;1", "wc_summary": "55;199;46;40", "wc_strengths": "25;157;108;26", "wc_weaknesses": "209;49;102;162", "wc_questions": "197;53;413;179", "wc_limitations": "1;7;17;40", "wc_review": "487;465;686;447", "wc_reply_reviewers": "467;39;310;318", "wc_reply_authors": "658;16;318;67", "reply_reviewers": "2;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 1.5, 0.8660254037844386 ], "wc_summary_avg": [ 85.0, 66.03408210916541 ], "wc_strengths_avg": [ 79.0, 56.236109396009965 ], "wc_weaknesses_avg": [ 130.5, 60.433848131655495 ], "wc_questions_avg": [ 210.5, 129.40923460093563 ], "wc_limitations_avg": [ 16.25, 14.85555451674558 ], "wc_review_avg": [ 521.25, 96.16749710791063 ], "wc_reply_reviewers_avg": [ 283.5, 154.3899284279904 ], "wc_reply_authors_avg": [ 264.75, 254.19615949105133 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4469175004307152017&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "turing.ac.uk;turing.ac.uk;;ucl.edu", "author_num": 4, "aff_unique_index": "0;0;1", "aff_unique_norm": "Alan Turing Institute;University College London", "aff_unique_dep": ";", "aff_unique_url": "https://www.turing.ac.uk;https://www.ucl.ac.uk", "aff_unique_abbr": "ATI;UCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Graph Edit Distance with General Costs Using Neural Set Divergence", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93292", "id": "u7JRmrGutT", "proceeding": "", "pdf": "https://openreview.net/pdf?id=u7JRmrGutT", "openreview": "https://openreview.net/forum?id=u7JRmrGutT", "poster": "", "project": "", "author_site": "Eeshaan Jain, Indradyumna Roy, Saswat Meher, Soumen Chakrabarti, Abir De", "tldr": "", "abstract": "Graph Edit Distance (GED) measures the (dis-)similarity between two given graphs in terms of the minimum-cost edit sequence, which transforms one graph to the other.\nGED is related to other notions of graph similarity, such as graph and subgraph isomorphism, maximum common subgraph, etc. However, the computation of exact GED is NP-Hard, which has recently motivated the design of neural models for GED estimation.\nHowever, they do not explicitly account for edit operations with different costs. In response, we propose $\\texttt{GraphEdX}$, a neural GED estimator that can work with general costs specified for the four edit operations, viz., edge deletion, edge addition, node deletion, and node addition.\nWe first present GED as a quadratic assignment problem (QAP) that incorporates these four costs.\nThen, we represent each graph as a set of node and edge embeddings and use them to design a family of neural set divergence surrogates. We replace the QAP terms corresponding to each operation with their surrogates. \nComputing such neural set divergence requires aligning nodes and edges of the two graphs.\nWe learn these alignments using a Gumbel-Sinkhorn permutation generator, additionally ensuring that the node and edge alignments are consistent with each other. Moreover, these alignments are cognizant of both the presence and absence of edges between node pairs.\nThrough extensive experiments on several datasets, along with a variety of edit cost settings, we show that $\\texttt{GraphEdX}$ consistently outperforms state-of-the-art methods and heuristics in terms of prediction error. The code is available at https://github.com/structlearning/GraphEdX.", "keywords": "graph neural network;graph edit distance", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Eeshaan Jain;Indradyumna Roy;Saswat Meher;Soumen Chakrabarti;Abir De", "authorids": "~Eeshaan_Jain1;~Indradyumna_Roy1;~Saswat_Meher1;~Soumen_Chakrabarti1;~Abir_De1", "gender": "M;M;M;Not Specified;M", "homepage": "https://eeshaanjain.github.io;https://indradyumna.github.io/;;https://www.cse.iitb.ac.in/~soumen/;", "dblp": ";124/9185.html;;c/SChakrabarti;118/7174", "google_scholar": "r5rqqJEAAAAJ;qb70i84AAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=LfF2zfQAAAAJ;https://scholar.google.co.in/citations?user=_9ZKKbIAAAAJ", "orcid": ";;;;", "linkedin": "eeshaanjain/;;;;", "or_profile": "~Eeshaan_Jain1;~Indradyumna_Roy1;~Saswat_Meher1;~Soumen_Chakrabarti1;~Abir_De1", "aff": "Indian Institute of Technology, Bombay;Indian Institute of Technology Bombay;Indian Institute of Technology Bombay, Indian Institute of Technology, Bombay;Indian Institute of Technology Bombay;Indian Institute of Technology Bombay,", "aff_domain": "iitb.ac.in;iitb.ac.in;cse.iitb.ac.in;iitb.ac.in;iitb.ac.in", "position": "Undergrad student;PhD student;MS student;Professor;Assistant Professor", "bibtex": "@inproceedings{\njain2024graph,\ntitle={Graph Edit Distance with General Costs Using Neural Set Divergence},\nauthor={Eeshaan Jain and Indradyumna Roy and Saswat Meher and Soumen Chakrabarti and Abir De},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=u7JRmrGutT}\n}", "github": "", "reviewers": "81fW;htic;eEwY", "pdf_size": 2392813, "rating": "4;7;7", "confidence": "4;4;4", "soundness": "3;4;3", "novelty": "3;3;4", "presentation": "2;4;3", "wc_summary": "116;73;118", "wc_strengths": "33;105;23", "wc_weaknesses": "345;48;10", "wc_questions": "221;1;9", "wc_limitations": "1;1;1", "wc_review": "716;228;161", "wc_reply_reviewers": "0;10;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;1;0", "reply_authors": "1;1;1", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 102.33333333333333, 20.75786330258702 ], "wc_strengths_avg": [ 53.666666666666664, 36.527006751473934 ], "wc_weaknesses_avg": [ 134.33333333333334, 149.76945245572907 ], "wc_questions_avg": [ 77.0, 101.87574130609634 ], "wc_limitations_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 368.3333333333333, 247.35444653820602 ], "wc_reply_reviewers_avg": [ 3.3333333333333335, 4.714045207910316 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9190699986204795064&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "iitb.ac.in;iitb.ac.in;cse.iitb.ac.in;iitb.ac.in;iitb.ac.in", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Indian Institute of Technology Bombay", "aff_unique_dep": "", "aff_unique_url": "https://www.iitb.ac.in", "aff_unique_abbr": "IIT Bombay", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Bombay", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "India" }, { "title": "BABILong: Testing the Limits of LLMs with Long Context Reasoning-in-a-Haystack", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97462", "id": "u7m2CG84BQ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=u7m2CG84BQ", "openreview": "https://openreview.net/forum?id=u7m2CG84BQ", "poster": "", "project": "", "author_site": "Yury Kuratov, Aydar Bulatov, Petr Anokhin, Ivan Rodkin, Dmitry Sorokin, Artyom Sorokin, Mikhail Burtsev", "tldr": "", "abstract": "In recent years, the input context sizes of large language models (LLMs) have increased dramatically. However, existing evaluation methods have not kept pace, failing to comprehensively assess the efficiency of models in handling long contexts. To bridge this gap, we introduce the BABILong benchmark, designed to test language models' ability to reason across facts distributed in extremely long documents. BABILong includes a diverse set of 20 reasoning tasks, including fact chaining, simple induction, deduction, counting, and handling lists/sets. These tasks are challenging on their own, and even more demanding when the required facts are scattered across long natural text. Our evaluations show that popular LLMs effectively utilize only 10-20% of the context and their performance declines sharply with increased reasoning complexity. Among alternatives to in-context reasoning, Retrieval-Augmented Generation methods achieve a modest 60% accuracy on single-fact question answering, independent of context length. Among context extension methods, the highest performance is demonstrated by recurrent memory transformers after fine-tuning, enabling the processing of lengths up to 50 million tokens. The BABILong benchmark is extendable to any length to support the evaluation of new upcoming models with increased capabilities, and we provide splits up to 10 million token lengths.", "keywords": "long context processing;benchmarks;LLMs;language models;retrieval-augmented generation;memory augmented models", "primary_area": "", "supplementary_material": "", "author": "Yuri Kuratov;Aydar Bulatov;Petr Anokhin;Ivan Rodkin;Dmitry Igorevich Sorokin;Artyom Sorokin;Mikhail Burtsev", "authorids": "~Yuri_Kuratov2;~Aydar_Bulatov1;~Petr_Anokhin1;~Ivan_Rodkin2;~Dmitry_Igorevich_Sorokin1;~Artyom_Sorokin1;~Mikhail_Burtsev1", "gender": "M;M;M;M;M;M;", "homepage": ";;;;https://github.com/griver;;", "dblp": "324/5232;;;https://dblp.uni-trier.de/pid/203/4483;241/5267;95/11265;222/9309", "google_scholar": "UvN4mlEAAAAJ;7HLJ82QAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?view_op=list_works;m0MQ6asAAAAJ;t_PLQakAAAAJ;BsDK7zIAAAAJ", "orcid": ";;0000-0002-9100-4802;;;;", "linkedin": "booydar/;;;;;;", "or_profile": "~Aydar_Bulatov1;~Petr_Anokhin1;~Ivan_Rodkin2;~Dmitry_Igorevich_Sorokin1;~Artyom_Sorokin1;~Mikhail_Burtsev1;~Yury_Kuratov1", "aff": "Moscow Institute of Physics and Technology;Artificial Intelligence Research Institute (AIRI);Moscow Institute of Physics and Technology;Artificial Intelligence Research Institute;Artificial Intelligence Research Institute;London Institute for Mathematical Sciences;Moscow Institute of Physics and Technology", "aff_domain": "phystech.edu;airi.net;phystech.edu;airi.net;airi.net;lims.ac.uk;phystech.edu", "position": "PhD student;Researcher;Undergrad student;Researcher;Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\nkuratov2024babilong,\ntitle={{BABIL}ong: Testing the Limits of {LLM}s with Long Context Reasoning-in-a-Haystack},\nauthor={Yuri Kuratov and Aydar Bulatov and Petr Anokhin and Ivan Rodkin and Dmitry Igorevich Sorokin and Artyom Sorokin and Mikhail Burtsev},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=u7m2CG84BQ}\n}", "github": "", "reviewers": "ZPx6;rWqi;Fwae", "pdf_size": 3496252, "rating": "7;8;9", "confidence": "4;4;4", "wc_summary_and_contributions": "112;62;97", "wc_strengths": "94;202;24", "wc_improvement": "75;182;2", "wc_limitations": "32;120;1", "wc_correctness": "1;1;1", "wc_clarity": "1;3;1", "wc_relation_to_prior_work": "1;12;1", "wc_documentation": "1;1;1", "wc_additional_feedback": "1;1;1", "wc_review": "318;584;129", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "1;1;1", "rating_avg": [ 8.0, 0.816496580927726 ], "confidence_avg": [ 4.0, 0.0 ], "wc_summary_and_contributions_avg": [ 90.33333333333333, 20.949675149960893 ], "wc_strengths_avg": [ 106.66666666666667, 73.21809126772487 ], "wc_improvement_avg": [ 86.33333333333333, 73.92037758441323 ], "wc_limitations_avg": [ 51.0, 50.40502620440412 ], "wc_correctness_avg": [ 1.0, 0.0 ], "wc_clarity_avg": [ 1.6666666666666667, 0.9428090415820634 ], "wc_relation_to_prior_work_avg": [ 4.666666666666667, 5.185449728701348 ], "wc_documentation_avg": [ 1.0, 0.0 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 343.6666666666667, 186.63749772099806 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 7, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 46, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9891357888064477747&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "phystech.edu;airi.net;phystech.edu;airi.net;airi.net;lims.ac.uk;phystech.edu", "author_num": 7, "aff_unique_index": "0;1;0;1;1;2;0", "aff_unique_norm": "Moscow Institute of Physics and Technology;Artificial Intelligence Research Institute;London Institute for Mathematical Sciences", "aff_unique_dep": ";AI Research;Mathematical Sciences", "aff_unique_url": "https://www.mipt.ru/en;;https://www.lims.ac.uk", "aff_unique_abbr": "MIPT;AIRI;LIMS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;2;2;3;0", "aff_country_unique": "Russian Federation;;United States;United Kingdom" }, { "title": "Taming Diffusion Prior for Image Super-Resolution with Domain Shift SDEs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93291", "id": "u7okTt4ZyE", "proceeding": "", "pdf": "https://openreview.net/pdf?id=u7okTt4ZyE", "openreview": "https://openreview.net/forum?id=u7okTt4ZyE", "poster": "/media/PosterPDFs/NeurIPS%202024/93291.png?t=1731902544.7058487", "project": "", "author_site": "qinpeng cui, yixuan liu, Xinyi Zhang, Qiqi Bao, Qingmin Liao, liwang Amd, Lu Tian, Zicheng Liu, Zhongdao Wang, Emad Barsoum", "tldr": "", "abstract": "Diffusion-based image super-resolution (SR) models have attracted substantial interest due to their powerful image restoration capabilities. However, prevailing diffusion models often struggle to strike an optimal balance between efficiency and performance. Typically, they either neglect to exploit the potential of existing extensive pretrained models, limiting their generative capacity, or they necessitate a dozens of forward passes starting from random noises, compromising inference efficiency. In this paper, we present DoSSR, a $\\textbf{Do}$main $\\textbf{S}$hift diffusion-based SR model that capitalizes on the generative powers of pretrained diffusion models while significantly enhancing efficiency by initiating the diffusion process with low-resolution (LR) images. At the core of our approach is a domain shift equation that integrates seamlessly with existing diffusion models. This integration not only improves the use of diffusion prior but also boosts inference efficiency. Moreover, we advance our method by transitioning the discrete shift process to a continuous formulation, termed as DoS-SDEs. This advancement leads to the fast and customized solvers that further enhance sampling efficiency. Empirical results demonstrate that our proposed method achieves state-of-the-art performance on synthetic and real-world datasets, while notably requiring $\\textbf{\\emph{only 5 sampling steps}}$. Compared to previous diffusion prior based methods, our approach achieves a remarkable speedup of 5-7 times, demonstrating its superior efficiency.", "keywords": "Diffusion Models; Super-Resolution", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Qinpeng Cui;Yi'xuan Liu;Xinyi Zhang;Qiqi Bao;Qingmin Liao;liwang Amd;Lu Tian;Zicheng Liu;Zhongdao Wang;Emad Barsoum", "authorids": "~Qinpeng_Cui1;~Yi'xuan_Liu1;~Xinyi_Zhang10;~Qiqi_Bao1;~Qingmin_Liao1;~liwang_Amd1;~Lu_Tian3;~Zicheng_Liu1;~Zhongdao_Wang2;~Emad_Barsoum1", "gender": "M;F;F;F;M;F;F;M;;", "homepage": "https://orcid.org/my-orcid?orcid=0009-0008-6796-2112;;;;https://www.sigs.tsinghua.edu.cn/lqm_en/main.htm;https://github.com/wanglixilinx/CDCL;;https://sites.google.com/view/zichengliu/home?pli=1;;", "dblp": ";;;;13/322;;;l/ZichengLiu;;", "google_scholar": ";;https://scholar.google.cz/citations?user=Au0f7NEAAAAJ;;;;edbuKpcAAAAJ;bkALdvsAAAAJ;;", "orcid": ";;0009-0000-9915-5608;0000-0001-9599-1844;0000-0002-7509-3964;;;0000-0001-5894-7828;;", "linkedin": ";yixuan-liu-71a338128/;;;;;;;;", "or_profile": "~Qinpeng_Cui1;~Yi'xuan_Liu1;~Xinyi_Zhang10;~Qiqi_Bao1;~Qingmin_Liao1;~liwang_Amd1;~Lu_Tian3;~Zicheng_Liu1;~Zhongdao_Wang2;~Emad_Barsoum1", "aff": "Tsinghua University;Advanced Micro Devices;Tsinghua University;Tsinghua University;Tsinghua University;AMD;AMD;Microsoft;;", "aff_domain": "mail.tsinghua.edu.cn;amd.com;mail.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;amd.com;amd.com;microsoft.com;;", "position": "MS student;Researcher;PhD student;PhD student;Full Professor;Researcher;Researcher;partner research manager;;", "bibtex": "@inproceedings{\ncui2024taming,\ntitle={Taming Diffusion Prior for Image Super-Resolution with Domain Shift {SDE}s},\nauthor={Qinpeng Cui and Yi'xuan Liu and Xinyi Zhang and Qiqi Bao and Qingmin Liao and liwang Amd and Lu Tian and Zicheng Liu and Zhongdao Wang and Emad Barsoum},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=u7okTt4ZyE}\n}", "github": "", "reviewers": "tfJj;QMdK;dM7n;7zor", "pdf_size": 39126152, "rating": "5;5;6;6", "confidence": "5;5;5;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "107;58;69;70", "wc_strengths": "98;32;36;50", "wc_weaknesses": "355;157;46;105", "wc_questions": "110;19;167;35", "wc_limitations": "31;1;13;1", "wc_review": "701;267;331;261", "wc_reply_reviewers": "24;29;81;12", "wc_reply_authors": "0;0;424;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;2;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 76.0, 18.506755523321747 ], "wc_strengths_avg": [ 54.0, 26.267851073127396 ], "wc_weaknesses_avg": [ 165.75, 116.10636287473655 ], "wc_questions_avg": [ 82.75, 59.54987405528244 ], "wc_limitations_avg": [ 11.5, 12.278029157808675 ], "wc_review_avg": [ 390.0, 181.63975335812367 ], "wc_reply_reviewers_avg": [ 36.5, 26.424420523447623 ], "wc_reply_authors_avg": [ 106.0, 183.597385602301 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11980991796152669904&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "mail.tsinghua.edu.cn;amd.com;mail.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;amd.com;amd.com;microsoft.com;;", "author_num": 10, "aff_unique_index": "0;1;0;0;0;1;1;2", "aff_unique_norm": "Tsinghua University;Advanced Micro Devices, Inc.;Microsoft", "aff_unique_dep": ";;Microsoft Corporation", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.amd.com;https://www.microsoft.com", "aff_unique_abbr": "THU;AMD;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0;1;1;1", "aff_country_unique": "China;United States" }, { "title": "Protecting Your LLMs with Information Bottleneck", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93290", "id": "u9ShP64FJV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=u9ShP64FJV", "openreview": "https://openreview.net/forum?id=u9ShP64FJV", "poster": "/media/PosterPDFs/NeurIPS%202024/93290.png?t=1730205198.0964785", "project": "", "author_site": "Zichuan Liu, Zefan Wang, Linjie Xu, Jinyu Wang, Lei Song, Tianchun Wang, Chunlin Chen, Wei Cheng, Jiang Bian", "tldr": "", "abstract": "The advent of large language models (LLMs) has revolutionized the field of natural language processing, yet they might be attacked to produce harmful content.\nDespite efforts to ethically align LLMs, these are often fragile and can be circumvented by jailbreaking attacks through optimized or manual adversarial prompts.\nTo address this, we introduce the Information Bottleneck Protector (IBProtector), a defense mechanism grounded in the information bottleneck principle, and we modify the objective to avoid trivial solutions.\nThe IBProtector selectively compresses and perturbs prompts, facilitated by a lightweight and trainable extractor, preserving only essential information for the target LLMs to respond with the expected answer.\nMoreover, we further consider a situation where the gradient is not visible to be compatible with any LLM.\nOur empirical evaluations show that IBProtector outperforms current defense methods in mitigating jailbreak attempts, without overly affecting response quality or inference speed. \nIts effectiveness and adaptability across various attack methods and target LLMs underscore the potential of IBProtector as a novel, transferable defense that bolsters the security of LLMs without requiring modifications to the underlying models.", "keywords": "Defense;Information Bottleneck;Jailbreaking;Large Language Models", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/2346dc5b20b2c7e48de484ebe3ab8cc8b1553b4c.zip", "author": "Zichuan Liu;Zefan Wang;Linjie Xu;Jinyu Wang;Lei Song;Tianchun Wang;Chunlin Chen;Wei Cheng;Jiang Bian", "authorids": "~Zichuan_Liu3;~Zefan_Wang2;~Linjie_Xu1;~Jinyu_Wang1;~Lei_Song3;~Tianchun_Wang1;~Chunlin_Chen1;~Wei_Cheng1;~Jiang_Bian1", "gender": ";M;M;;M;M;M;M;M", "homepage": "https://zichuan-liu.github.io/;https://github.com/Zep4yrWang;http://eecs.qmul.ac.uk/people/profiles/xulinjie.html;;;;https://sme.nju.edu.cn/ccl/list.htm;https://chengw07.github.io/;https://sites.google.com/view/jiangbian", "dblp": ";;;;76/893-1.html;153/5231.html;68/6992.html;89/2506-2.html;09/851-2.html", "google_scholar": "SUvzKxwAAAAJ;TGTAeiQAAAAJ;;LvgWSg0AAAAJ;pXDSOocAAAAJ;8su8b60AAAAJ;;PRrGVmoAAAAJ;pZBEnY8AAAAJ", "orcid": ";;;;;;;;0000-0002-9472-600X", "linkedin": ";;;jinyuwang5134/;;;;wei-cheng-ml/;jbian/", "or_profile": "~Zichuan_Liu3;~Zefan_Wang2;~Linjie_Xu1;~Jinyu_Wang1;~Lei_Song3;~Tianchun_Wang1;~Chunlin_Chen1;~Wei_Cheng1;~Jiang_Bian1", "aff": "Nanjing University;Tsinghua University;School of Electronic Engineering and Computer Science;Microsoft;Microsoft;Pennsylvania State University;Nanjing University;NEC-Labs;Microsoft", "aff_domain": "nju.edu.cn;tsinghua.edu.cn;qmul.ac.uk;microsoft.com;microsoft.com;psu.edu;nju.edu.cn;nec-labs.com;microsoft.com", "position": "MS student;MS student;PhD student;Researcher;Principal Researcher;PhD student;Full Professor;Principal Researcher;Partner Research Manager", "bibtex": "@inproceedings{\nliu2024protecting,\ntitle={Protecting Your {LLM}s with Information Bottleneck},\nauthor={Zichuan Liu and Zefan Wang and Linjie Xu and Jinyu Wang and Lei Song and Tianchun Wang and Chunlin Chen and Wei Cheng and Jiang Bian},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=u9ShP64FJV}\n}", "github": "", "reviewers": "xXSx;WJbW;yBjR;UDu3", "pdf_size": 837374, "rating": "6;6;7;7", "confidence": "4;2;3;3", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "49;156;47;109", "wc_strengths": "128;67;48;64", "wc_weaknesses": "637;63;18;176", "wc_questions": "19;83;37;3", "wc_limitations": "10;6;46;1", "wc_review": "843;375;196;353", "wc_reply_reviewers": "280;110;0;56", "wc_reply_authors": "77;0;0;274", "reply_reviewers": "1;1;0;1", "reply_authors": "2;1;1;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 90.25, 45.40580909971763 ], "wc_strengths_avg": [ 76.75, 30.457962834043908 ], "wc_weaknesses_avg": [ 223.5, 245.57534485367214 ], "wc_questions_avg": [ 35.5, 29.94578434437809 ], "wc_limitations_avg": [ 15.75, 17.75352077758099 ], "wc_review_avg": [ 441.75, 241.72647248491424 ], "wc_reply_reviewers_avg": [ 111.5, 104.76998616015943 ], "wc_reply_authors_avg": [ 87.75, 112.03208245855292 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16671437300650767089&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "nju.edu.cn;tsinghua.edu.cn;qmul.ac.uk;microsoft.com;microsoft.com;psu.edu;nju.edu.cn;nec-labs.com;microsoft.com", "author_num": 9, "aff_unique_index": "0;1;2;3;3;4;0;5;3", "aff_unique_norm": "Nanjing University;Tsinghua University;Queen Mary University of London;Microsoft;Pennsylvania State University;NEC Laboratories", "aff_unique_dep": ";;School of Electronic Engineering and Computer Science;Microsoft Corporation;;", "aff_unique_url": "https://www.nju.edu.cn;https://www.tsinghua.edu.cn;https://www.eecs.qmul.ac.uk;https://www.microsoft.com;https://www.psu.edu;https://www.nec-labs.com", "aff_unique_abbr": "Nanjing U;THU;QMUL;Microsoft;PSU;NEC-Labs", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;2;2;2;0;2;2", "aff_country_unique": "China;United Kingdom;United States" }, { "title": "The Mamba in the Llama: Distilling and Accelerating Hybrid Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93289", "id": "uAzhODjALU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=uAzhODjALU", "openreview": "https://openreview.net/forum?id=uAzhODjALU", "poster": "", "project": "", "author_site": "Junxiong Wang, Daniele Paliotta, Avner May, Alexander Rush, Tri Dao", "tldr": "", "abstract": "Linear RNN architectures, like Mamba, can be competitive with Transformer models in language modeling while having advantageous deployment characteristics. Given the focus on training large-scale Transformer models, we consider the challenge of converting these pretrained models for deployment. \nWe demonstrate that it is feasible to distill large Transformers into linear RNNs by reusing the linear projection weights from attention layers with academic GPU resources. The resulting hybrid model, which incorporates a quarter of the attention layers, achieves performance comparable to the original Transformer in chat benchmarks and outperforms open-source hybrid Mamba models trained from scratch with trillions of tokens in both chat benchmarks and general benchmarks. Moreover, we introduce a hardware-aware speculative decoding algorithm that accelerates the inference speed of Mamba and hybrid models. Overall we show how, with limited computation resources, we can remove many of the original attention layers and generate from the resulting model more efficiently. \nOur top-performing model, distilled from Llama3-8B-Instruct, achieves a 29.61 length-controlled win rate on AlpacaEval 2 against GPT-4 and 7.35 on MT-Bench, surpassing the best 8B scale instruction-tuned linear RNN model. We also find that the distilled model has natural length extrapolation, showing almost perfect accuracy in the needle-in-a-haystack test at 20x the distillation length. Code and pre-trained checkpoints are open-sourced at [MambaInLlama](https://github.com/jxiw/MambaInLlama) for distillation and [SpeculativeMamba](https://github.com/itsdaniele/speculative\\_mamba) for speculative decoding.", "keywords": "Mamba;Transformer;Knowledge Distillation;Speculative Decoding", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Junxiong Wang;Daniele Paliotta;Avner May;Alexander M Rush;Tri Dao", "authorids": "~Junxiong_Wang1;~Daniele_Paliotta1;~Avner_May1;~Alexander_M_Rush1;~Tri_Dao1", "gender": ";M;M;M;", "homepage": ";https://danielepaliotta.com;https://avnermay.github.io/index.html;http://rush.seas.harvard.edu/;https://tridao.me/", "dblp": ";314/5880;146/7842;http://dblp.uni-trier.de/pers/hd/r/Rush:Alexander_M=;206/7018", "google_scholar": ";_xugfIEAAAAJ;Gx5baHUAAAAJ;LIjnUGgAAAAJ;NQRw0bQAAAAJ", "orcid": ";;;0000-0002-9900-1606;", "linkedin": ";;avnermay/;sasha-rush-a69b6917/;", "or_profile": "~Junxiong_Wang1;~Daniele_Paliotta1;~Avner_May1;~Alexander_M_Rush1;~Tri_Dao1", "aff": ";University of Geneva;Together.ai;School of Engineering and Applied Sciences, Harvard University;Princeton University", "aff_domain": ";unige.ch;together.ai;seas.harvard.edu;princeton.edu", "position": ";PhD student;Researcher;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nwang2024the,\ntitle={The Mamba in the Llama: Distilling and Accelerating Hybrid Models},\nauthor={Junxiong Wang and Daniele Paliotta and Avner May and Alexander M Rush and Tri Dao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=uAzhODjALU}\n}", "github": "", "reviewers": "J8xQ;pZLW;NYcb", "pdf_size": 2765715, "rating": "4;5;6", "confidence": "5;4;4", "soundness": "2;3;1", "novelty": "2;3;2", "presentation": "1;3;2", "wc_summary": "75;11;118", "wc_strengths": "43;31;34", "wc_weaknesses": "161;37;336", "wc_questions": "3;5;111", "wc_limitations": "21;5;63", "wc_review": "303;89;662", "wc_reply_reviewers": "150;0;541", "wc_reply_authors": "364;0;354", "reply_reviewers": "2;0;2", "reply_authors": "3;1;3", "rating_avg": [ 5.0, 0.816496580927726 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.0, 0.816496580927726 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.0, 0.816496580927726 ], "wc_summary_avg": [ 68.0, 43.96210489349511 ], "wc_strengths_avg": [ 36.0, 5.0990195135927845 ], "wc_weaknesses_avg": [ 178.0, 122.6567024938575 ], "wc_questions_avg": [ 39.666666666666664, 50.446891766380304 ], "wc_limitations_avg": [ 29.666666666666668, 24.458581770458853 ], "wc_review_avg": [ 351.3333333333333, 236.4097196723425 ], "wc_reply_reviewers_avg": [ 230.33333333333334, 228.05018940770225 ], "wc_reply_authors_avg": [ 239.33333333333334, 169.28345722157522 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.9428090415820634 ], "reply_authors_avg": [ 2.3333333333333335, 0.9428090415820634 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5934490464303270421&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": ";unige.ch;together.ai;seas.harvard.edu;princeton.edu", "author_num": 5, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "University of Geneva;Together.ai;Harvard University;Princeton University", "aff_unique_dep": ";;School of Engineering and Applied Sciences;", "aff_unique_url": "https://www.unige.ch;https://www.together.ai;https://www.harvard.edu;https://www.princeton.edu", "aff_unique_abbr": "UNIGE;Together.ai;Harvard;Princeton", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "Switzerland;United States" }, { "title": "Enhancing Consistency-Based Image Generation via Adversarialy-Trained Classification and Energy-Based Discrimination", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93288", "id": "uBVCPAMDGk", "proceeding": "", "pdf": "https://openreview.net/pdf?id=uBVCPAMDGk", "openreview": "https://openreview.net/forum?id=uBVCPAMDGk", "poster": "/media/PosterPDFs/NeurIPS%202024/93288.png?t=1732206020.6864922", "project": "", "author_site": "Shelly Golan, Roy Ganz, Michael Elad", "tldr": "", "abstract": "The recently introduced Consistency models pose an efficient alternative to diffusion algorithms, enabling rapid and good quality image synthesis. These methods overcome the slowness of diffusion models by directly mapping noise to data, while maintaining a (relatively) simpler training. Consistency models enable a fast one- or few-step generation, but they typically fall somewhat short in sample quality when compared to their diffusion origins. \nIn this work we propose a novel and highly effective technique for post-processing Consistency-based generated images, enhancing their perceptual quality. Our approach utilizes a joint classifier-discriminator model, in which both portions are trained adversarially. While the classifier aims to grade an image based on its assignment to a designated class, the discriminator portion of the very same network leverages the softmax values to assess the proximity of the input image to the targeted data manifold, thereby serving as an Energy-based Model. By employing example-specific projected gradient iterations under the guidance of this joint machine, we refine synthesized images and achieve an improved FID scores on the ImageNet 64x64 dataset for both Consistency-Training and Consistency-Distillation techniques.", "keywords": "Adversarial Training; Robustness; Energy-Based Models; Classification;", "primary_area": "generative_models", "supplementary_material": "", "author": "Shelly Golan;Roy Ganz;Michael Elad", "authorids": "~Shelly_Golan1;~Roy_Ganz1;~Michael_Elad1", "gender": "F;M;M", "homepage": ";https://royg27.github.io/;https://elad.cs.technion.ac.il/", "dblp": ";289/5822;e/MichaelElad", "google_scholar": ";2E0FHMoAAAAJ;UpZbV44AAAAJ", "orcid": ";;0000-0001-8131-6928", "linkedin": "shelly-golan/;roy-ganz-270592/;michael-elad-5553852a3/", "or_profile": "~Shelly_Golan1;~Roy_Ganz1;~Michael_Elad1", "aff": "Computer Science Department, Technion - Israel Institute of Technology;Technion - Israel Institute of Technology, Technion;Verily", "aff_domain": "cs.technion.ac.il;technion.ac.il;verily.com", "position": "MS student;PhD student;Principal Researcher", "bibtex": "@inproceedings{\ngolan2024enhancing,\ntitle={Enhancing Consistency-Based Image Generation via Adversarialy-Trained Classification and Energy-Based Discrimination},\nauthor={Shelly Golan and Roy Ganz and Michael Elad},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=uBVCPAMDGk}\n}", "github": "", "reviewers": "zvWf;Jjs5;JUbG;4wrH", "pdf_size": 4502513, "rating": "5;5;6;7", "confidence": "3;4;3;3", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "2;3;3;3", "wc_summary": "67;26;42;105", "wc_strengths": "47;11;19;69", "wc_weaknesses": "139;202;92;31", "wc_questions": "4;2;4;141", "wc_limitations": "14;1;6;7", "wc_review": "271;242;163;353", "wc_reply_reviewers": "37;13;27;54", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 60.0, 29.80771712157776 ], "wc_strengths_avg": [ 36.5, 23.038012067016545 ], "wc_weaknesses_avg": [ 116.0, 62.701674618785106 ], "wc_questions_avg": [ 37.75, 59.61700680175079 ], "wc_limitations_avg": [ 7.0, 4.636809247747852 ], "wc_review_avg": [ 257.25, 67.95724758993701 ], "wc_reply_reviewers_avg": [ 32.75, 14.939461168328663 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10150918193596305120&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "cs.technion.ac.il;technion.ac.il;verily.com", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Technion - Israel Institute of Technology;Verily", "aff_unique_dep": "Computer Science Department;", "aff_unique_url": "https://www.technion.ac.il;https://www.verily.com", "aff_unique_abbr": "Technion;Verily", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Israel;United States" }, { "title": "Training Compute-Optimal Protein Language Models", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93287", "id": "uCZI8gSfD4", "proceeding": "", "pdf": "https://openreview.net/pdf?id=uCZI8gSfD4", "openreview": "https://openreview.net/forum?id=uCZI8gSfD4", "poster": "/media/PosterPDFs/NeurIPS%202024/93287.png?t=1731498015.9302535", "project": "", "author_site": "Xingyi Cheng, Bo Chen, Pan Li, Jing Gong, Jie Tang, Le Song", "tldr": "", "abstract": "We explore optimally training protein language models, an area of significant interest in biological research where guidance on best practices is limited.\nMost models are trained with extensive compute resources until performance gains plateau, focusing primarily on increasing model sizes rather than optimizing the efficient compute frontier that balances performance and compute budgets.\nOur investigation is grounded in a massive dataset consisting of 939 million protein sequences. \nWe trained over 300 models ranging from 3.5 million to 10.7 billion parameters on 5 to 200 billion unique tokens, to investigate the relations between model sizes, training token numbers, and objectives.\nFirst, we observed the effect of diminishing returns for the Causal Language Model (CLM) and that of overfitting for Masked Language Model (MLM) when repeating the commonly used Uniref database. To address this, we included metagenomic protein sequences in the training set to increase the diversity and avoid the plateau or overfitting effects. \nSecond, we obtained the scaling laws of CLM and MLM on Transformer, tailored to the specific characteristics of protein sequence data. \nThird, we observe a transfer scaling phenomenon from CLM to MLM, further demonstrating the effectiveness of transfer through scaling behaviors based on estimated Effectively Transferred Tokens.\nFinally, to validate our scaling laws, we compare the large-scale versions of ESM-2 and PROGEN2 on downstream tasks, encompassing evaluations of protein generation as well as structure- and function-related tasks, all within less or equivalent pre-training compute budgets.", "keywords": "Protein Language Model;Scaling Law", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "/attachment/4f0dae142729844f2917f820d589d8f539c2dac0.zip", "author": "Xingyi Cheng;Bo Chen;Pan Li;Jing Gong;Jie Tang;Le Song", "authorids": "~Xingyi_Cheng3;~Bo_Chen11;~Pan_Li11;~Jing_Gong1;~Jie_Tang1;~Le_Song1", "gender": "M;M;M;M;;M", "homepage": ";https://allanchen95.github.io/;;https://github.com/Tsinghua-gongjing;;http://www.cc.gatech.edu/~lsong", "dblp": "206/6376;;;14/1793;;94/3481", "google_scholar": "shO7XmIAAAAJ;ZHtOJowAAAAJ;;zxgmfL8AAAAJ;;Xl4E0CsAAAAJ", "orcid": ";;0000-0002-6902-5762;;;", "linkedin": ";;;;;", "or_profile": "~Xingyi_Cheng3;~Bo_Chen11;~Pan_Li11;~Jing_Gong1;~Jie_Tang1;~Le_Song1", "aff": "BioMap;Tsinghua University;;;;College of Computing, Georgia Institute of Technology", "aff_domain": "biomap.com;tsinghua.edu.cn;;;;cc.gatech.edu", "position": "Principal Researcher;PhD student;;;;Associate Professor", "bibtex": "@inproceedings{\ncheng2024training,\ntitle={Training Compute-Optimal Protein Language Models},\nauthor={Xingyi Cheng and Bo Chen and Pan Li and Jing Gong and Jie Tang and Le Song},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=uCZI8gSfD4}\n}", "github": "", "reviewers": "wQuW;8Frn;qqtc;b9DN", "pdf_size": 7956549, "rating": "6;7;7;7", "confidence": "2;3;2;4", "soundness": "3;4;3;3", "novelty": "3;4;3;3", "presentation": "2;3;2;3", "wc_summary": "62;411;74;58", "wc_strengths": "75;79;58;52", "wc_weaknesses": "264;248;107;219", "wc_questions": "104;23;104;2", "wc_limitations": "12;107;9;1", "wc_review": "517;868;352;332", "wc_reply_reviewers": "87;29;29;8", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 151.25, 150.08226910598066 ], "wc_strengths_avg": [ 66.0, 11.291589790636214 ], "wc_weaknesses_avg": [ 209.5, 61.33718285020922 ], "wc_questions_avg": [ 58.25, 46.34854366644113 ], "wc_limitations_avg": [ 32.25, 43.34382885717412 ], "wc_review_avg": [ 517.25, 214.85503833980715 ], "wc_reply_reviewers_avg": [ 38.25, 29.422567868899545 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6247155051321753145&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "biomap.com;tsinghua.edu.cn;;;;cc.gatech.edu", "author_num": 6, "aff_unique_index": "0;1;2", "aff_unique_norm": "BioMap;Tsinghua University;Georgia Institute of Technology", "aff_unique_dep": ";;College of Computing", "aff_unique_url": ";https://www.tsinghua.edu.cn;https://www.gatech.edu", "aff_unique_abbr": ";THU;Georgia Tech", "aff_campus_unique_index": "1", "aff_campus_unique": ";Atlanta", "aff_country_unique_index": "1;2", "aff_country_unique": ";China;United States" }, { "title": "DU-Shapley: A Shapley Value Proxy for Efficient Dataset Valuation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93286", "id": "uCgFk8nP0Z", "proceeding": "", "pdf": "https://openreview.net/pdf?id=uCgFk8nP0Z", "openreview": "https://openreview.net/forum?id=uCgFk8nP0Z", "poster": "", "project": "", "author_site": "Felipe Garrido Lucero, Benjamin Heymann, Maxime Vono, Patrick Loiseau, Vianney Perchet", "tldr": "", "abstract": "We consider the dataset valuation problem, that is the problem of quantifying the incremental gain, to some relevant pre-defined utility of a machine learning task, of aggregating an individual dataset to others.\nThe Shapley value is a natural tool to perform dataset valuation due to its formal axiomatic justification, which can be combined with Monte Carlo integration to overcome the computational tractability challenges. Such generic approximation methods, however, remain expensive in some cases. In this paper, we exploit the knowledge about the structure of the dataset valuation problem to devise more efficient Shapley value estimators. We propose a novel approximation, referred to as discrete uniform Shapley, which is expressed as an expectation under a discrete uniform distribution with support of reasonable size. We justify the relevancy of the proposed framework via asymptotic and non-asymptotic theoretical guarantees and illustrate its benefits via an extensive set of numerical experiments.", "keywords": "data valuation;shapley value;approximation", "primary_area": "interpretability_and_explainability", "supplementary_material": "/attachment/40ee1c4ff6cdfefda28420346d021a68c7edb70a.zip", "author": "Felipe Garrido;Benjamin Heymann;Maxime Vono;Patrick Loiseau;Vianney Perchet", "authorids": "~Felipe_Garrido1;~Benjamin_Heymann1;~Maxime_Vono1;~Patrick_Loiseau1;~Vianney_Perchet3", "gender": "M;M;M;;M", "homepage": "https://sites.google.com/view/fgarridolucero/home;https://benhey.github.io/;https://mvono.github.io/;https://patrickloiseau.github.io/;", "dblp": "271/7928;193/0427;;10/7062;83/7398", "google_scholar": "https://scholar.google.com/citations?hl=fr;ok5uldEAAAAJ;https://scholar.google.fr/citations?user=R5dfDTAAAAAJ;https://scholar.google.fr/citations?user=q98gB0AAAAAJ;", "orcid": ";;0000-0003-4859-965X;;", "linkedin": ";;maximevono;;", "or_profile": "~Felipe_Garrido1;~Benjamin_Heymann1;~Maxime_Vono1;~Patrick_Loiseau1;~Vianney_Perchet1", "aff": "Ecole Nationale de la Statistique et de l'Administration Economique;Criteo;Criteo;Inria;", "aff_domain": "ensae.fr;criteo.com;criteo.com;inria.fr;", "position": "Postdoc;Researcher;Researcher;Research scientist;", "bibtex": "@inproceedings{\ngarrido2024dushapley,\ntitle={{DU}-Shapley: A Shapley Value Proxy for Efficient Dataset Valuation},\nauthor={Felipe Garrido and Benjamin Heymann and Maxime Vono and Patrick Loiseau and Vianney Perchet},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=uCgFk8nP0Z}\n}", "github": "", "reviewers": "Q5wB;WTCU;C64x;VRG1", "pdf_size": 976753, "rating": "5;6;6;7", "confidence": "3;4;2;4", "soundness": "2;4;3;3", "novelty": "3;3;3;3", "presentation": "3;4;2;3", "wc_summary": "187;98;48;114", "wc_strengths": "64;72;35;289", "wc_weaknesses": "274;178;179;384", "wc_questions": "158;38;14;223", "wc_limitations": "23;1;2;7", "wc_review": "706;387;278;1017", "wc_reply_reviewers": "233;17;0;107", "wc_reply_authors": "485;0;0;217", "reply_reviewers": "3;1;0;1", "reply_authors": "4;1;1;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 111.75, 49.80148090167601 ], "wc_strengths_avg": [ 115.0, 101.397731730054 ], "wc_weaknesses_avg": [ 253.75, 84.70647850076168 ], "wc_questions_avg": [ 108.25, 85.82067058698621 ], "wc_limitations_avg": [ 8.25, 8.814051281902097 ], "wc_review_avg": [ 597.0, 289.0250854164738 ], "wc_reply_reviewers_avg": [ 89.25, 92.41854521685569 ], "wc_reply_authors_avg": [ 175.5, 199.44485453377834 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.4264014327112209, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13020742824476117068&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "ensae.fr;criteo.com;criteo.com;inria.fr;", "author_num": 5, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Ecole Nationale de la Statistique et de l'Administration Economique;Criteo;INRIA", "aff_unique_dep": ";;", "aff_unique_url": "https://ensae.fr;https://www.criteo.com;https://www.inria.fr", "aff_unique_abbr": "ENSAE;Criteo;Inria", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "France" }, { "title": "Addressing Asynchronicity in Clinical Multimodal Fusion via Individualized Chest X-ray Generation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93285", "id": "uCvdw0IOuU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=uCvdw0IOuU", "openreview": "https://openreview.net/forum?id=uCvdw0IOuU", "poster": "/media/PosterPDFs/NeurIPS%202024/93285.png?t=1731305984.3217216", "project": "", "author_site": "Wenfang Yao, Chen Liu, Kejing Yin, William Cheung, Jing Qin", "tldr": "", "abstract": "Integrating multi-modal clinical data, such as electronic health records (EHR) and chest X-ray images (CXR), is particularly beneficial for clinical prediction tasks. However, in a temporal setting, multi-modal data are often inherently asynchronous. EHR can be continuously collected but CXR is generally taken with a much longer interval due to its high cost and radiation dose. When clinical prediction is needed, the last available CXR image might have been outdated, leading to suboptimal predictions. To address this challenge, we propose DDL-CXR, a method that dynamically generates an up-to-date latent representation of the individualized CXR images. Our approach leverages latent diffusion models for patient-specific generation strategically conditioned on a previous CXR image and EHR time series, providing information regarding anatomical structures and disease progressions, respectively. In this way, the interaction across modalities could be better captured by the latent CXR generation process, ultimately improving the prediction performance. Experiments using MIMIC datasets show that the proposed model could effectively address asynchronicity in multimodal fusion and consistently outperform existing methods.", "keywords": "Multi-modal clinical data;latent diffusion model;chest X-ray image;eletronic health records.", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "", "author": "Wenfang Yao;Chen Liu;Kejing Yin;William K. Cheung;Jing Qin", "authorids": "~Wenfang_Yao1;~Chen_Liu25;~Kejing_Yin1;~William_K._Cheung1;~Jing_Qin3", "gender": "F;F;;M;M", "homepage": "https://dorothy-yao.github.io/;https://chenliu-svg.github.io/;https://kejing.me;https://harry-qinjing.github.io/;http://www.comp.hkbu.edu.hk/~william", "dblp": "353/9210;;222/7901;00/1015-1;w/WKWCheung", "google_scholar": "Jj2YsfsAAAAJ;i938yiEAAAAJ;GbCggYMAAAAJ;X3Wi7wkAAAAJ;https://scholar.google.com.hk/citations?user=e42JkYIAAAAJ", "orcid": "0000-0002-6835-5030;;;0000-0002-7059-0929;0000-0002-7428-2050", "linkedin": ";;;;", "or_profile": "~Wenfang_Yao1;~Chen_Liu25;~Kejing_Yin1;~Jing_Qin3;~William_Cheung1", "aff": "Hong Kong Polytechnic University;South China University of Technology;Hong Kong Baptist University;Hong Kong Polytechnic University;Hong Kong Baptist University", "aff_domain": "polyu.edu.hk;scut.edu.cn;hkbu.edu.hk;polyu.edu.hk;hkbu.edu.hk", "position": "Postdoc;Undergrad student;Research Assistant Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nyao2024addressing,\ntitle={Addressing Asynchronicity in Clinical Multimodal Fusion via Individualized Chest X-ray Generation},\nauthor={Wenfang Yao and Chen Liu and Kejing Yin and William K. Cheung and Jing Qin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=uCvdw0IOuU}\n}", "github": "", "reviewers": "7ywn;y1yv;1LZb;Bxwq", "pdf_size": 1680415, "rating": "5;6;6;7", "confidence": "4;5;3;4", "soundness": "3;3;3;3", "novelty": "2;3;3;4", "presentation": "3;3;4;4", "wc_summary": "85;68;60;90", "wc_strengths": "58;48;35;17", "wc_weaknesses": "115;56;131;90", "wc_questions": "2;29;203;109", "wc_limitations": "2;3;7;36", "wc_review": "262;204;436;342", "wc_reply_reviewers": "17;0;120;138", "wc_reply_authors": "21;0;74;53", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 75.75, 12.214233500306108 ], "wc_strengths_avg": [ 39.5, 15.337861650177967 ], "wc_weaknesses_avg": [ 98.0, 28.310775333784132 ], "wc_questions_avg": [ 85.75, 78.29870688587393 ], "wc_limitations_avg": [ 12.0, 13.982131454109563 ], "wc_review_avg": [ 311.0, 87.22958213817145 ], "wc_reply_reviewers_avg": [ 68.75, 60.882571397732534 ], "wc_reply_authors_avg": [ 37.0, 28.50438562747845 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15914912820970355579&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "polyu.edu.hk;scut.edu.cn;hkbu.edu.hk;polyu.edu.hk;hkbu.edu.hk", "author_num": 5, "aff_unique_index": "0;1;2;0;2", "aff_unique_norm": "Hong Kong Polytechnic University;South China University of Technology;Hong Kong Baptist University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.polyu.edu.hk;https://www.scut.edu.cn;https://www.hkbu.edu.hk", "aff_unique_abbr": "PolyU;SCUT;HKBU", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "SPRINQL: Sub-optimal Demonstrations driven Offline Imitation Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93284", "id": "uDD44NROOt", "proceeding": "", "pdf": "https://openreview.net/pdf?id=uDD44NROOt", "openreview": "https://openreview.net/forum?id=uDD44NROOt", "poster": "/media/PosterPDFs/NeurIPS%202024/93284.png?t=1731731829.0146697", "project": "", "author_site": "Huy Hoang, Tien Mai, Pradeep Varakantham", "tldr": "", "abstract": "We focus on offline imitation learning (IL), which aims to mimic an expert's behavior using demonstrations without any interaction with the environment. One of the main challenges in offline IL is the limited support of expert demonstrations, which typically cover only a small fraction of the state-action space. While it may not be feasible to obtain numerous expert demonstrations, it is often possible to gather a larger set of sub-optimal demonstrations. For example, in treatment optimization problems, there are varying levels of doctor treatments available for different chronic conditions. These range from treatment specialists and experienced general practitioners to less experienced general practitioners. Similarly, when robots are trained to imitate humans in routine tasks, they might learn from individuals with different levels of expertise and efficiency. \n\nIn this paper, we propose an offline IL approach that leverages the larger set of sub-optimal demonstrations while effectively mimicking expert trajectories. Existing offline IL methods based on behavior cloning or distribution matching often face issues such as overfitting to the limited set of expert demonstrations or inadvertently imitating sub-optimal trajectories from the larger dataset. Our approach, which is based on inverse soft-Q learning, learns from both expert and sub-optimal demonstrations. It assigns higher importance (through learned weights) to aligning with expert demonstrations and lower importance to aligning with sub-optimal ones. A key contribution of our approach, called SPRINQL, is transforming the offline IL problem into a convex optimization over the space of Q functions. Through comprehensive experimental evaluations, we demonstrate that the SPRINQL algorithm achieves state-of-the-art (SOTA) performance on offline IL benchmarks. Code is available at https://github.com/hmhuy0/SPRINQL .", "keywords": "imitation learning;offline imitation learning;reference reward;supplementary data;ranked dataset", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Huy Hoang;Tien Anh Mai;Pradeep Varakantham", "authorids": "~Huy_Hoang1;~Tien_Anh_Mai1;~Pradeep_Varakantham1", "gender": "M;M;M", "homepage": ";https://sites.google.com/view/tien-mai/;http://www.mysmu.edu.sg/faculty/pradeepv", "dblp": "56/11359.html;229/2286.html;72/759", "google_scholar": "-wrvT0MAAAAJ;;https://scholar.google.com.sg/citations?user=BAdQpFkAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Huy_Hoang1;~Tien_Anh_Mai1;~Pradeep_Varakantham1", "aff": "Singapore Management University;Singapore Management University;Singapore Management University", "aff_domain": "smu.edu.sg;smu.edu.sg;smu.edu.sg", "position": "Researcher;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nhoang2024sprinql,\ntitle={{SPRINQL}: Sub-optimal Demonstrations driven Offline Imitation Learning},\nauthor={Huy Hoang and Tien Anh Mai and Pradeep Varakantham},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=uDD44NROOt}\n}", "github": "", "reviewers": "qfkX;g6FW;vP7z;78nP", "pdf_size": 3747723, "rating": "5;5;7;7", "confidence": "4;3;4;3", "soundness": "2;2;4;3", "novelty": "3;2;4;3", "presentation": "3;3;4;3", "wc_summary": "55;106;116;66", "wc_strengths": "33;102;83;51", "wc_weaknesses": "31;135;44;80", "wc_questions": "181;119;20;5", "wc_limitations": "10;30;8;1", "wc_review": "310;492;271;203", "wc_reply_reviewers": "145;75;24;25", "wc_reply_authors": "525;29;17;17", "reply_reviewers": "2;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 85.75, 25.791229129298976 ], "wc_strengths_avg": [ 67.25, 26.892145693491994 ], "wc_weaknesses_avg": [ 72.5, 40.30198506277327 ], "wc_questions_avg": [ 81.25, 72.35459556932096 ], "wc_limitations_avg": [ 12.25, 10.779030568655049 ], "wc_review_avg": [ 319.0, 106.96962185592693 ], "wc_reply_reviewers_avg": [ 67.25, 49.3982540177282 ], "wc_reply_authors_avg": [ 147.0, 218.2933805684451 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14977509319261685595&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "smu.edu.sg;smu.edu.sg;smu.edu.sg", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Singapore Management University", "aff_unique_dep": "", "aff_unique_url": "https://www.smu.edu.sg", "aff_unique_abbr": "SMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Singapore" }, { "title": "Automatic Outlier Rectification via Optimal Transport", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93283", "id": "uDxhMgjVJB", "proceeding": "", "pdf": "https://openreview.net/pdf?id=uDxhMgjVJB", "openreview": "https://openreview.net/forum?id=uDxhMgjVJB", "poster": "/media/PosterPDFs/NeurIPS%202024/93283.png?t=1731566032.7165825", "project": "", "author_site": "Jose Blanchet, Jiajin Li, Markus Pelger, Greg Zanotti", "tldr": "", "abstract": "In this paper, we propose a novel conceptual framework to detect outliers using optimal transport with a concave cost function. Conventional outlier detection approaches typically use a two-stage procedure: first, outliers are detected and removed, and then estimation is performed on the cleaned data. However, this approach does not inform outlier removal with the estimation task, leaving room for improvement. To address this limitation, we propose an automatic outlier rectification mechanism that integrates rectification and estimation within a joint optimization framework. We take the first step to utilize the optimal transport distance with a concave cost function to construct a rectification set in the space of probability distributions. Then, we select the best distribution within the rectification set to perform the estimation task. Notably, the concave cost function we introduced in this paper is the key to making our estimator effectively identify the outlier during the optimization process. We demonstrate the effectiveness of our approach over conventional approaches in simulations and empirical analyses for mean estimation, least absolute regression, and the fitting of option implied volatility surfaces.", "keywords": "Outlier Rectification; Optimal Transport; Statistically Robust", "primary_area": "other", "supplementary_material": "", "author": "Jose Blanchet;Jiajin Li;Markus Pelger;Greg Zanotti", "authorids": "~Jose_Blanchet1;~Jiajin_Li2;~Markus_Pelger1;~Greg_Zanotti1", "gender": "M;F;M;", "homepage": "https://web.stanford.edu/~jblanche/;https://gerrili1996.github.io/;https://mpelger.people.stanford.edu/;", "dblp": "75/5093.html;;;", "google_scholar": "https://scholar.google.co.in/citations?user=O24CcQQAAAAJ;;FpNrPm8AAAAJ;", "orcid": ";;;", "linkedin": "jose-blanchet;;;", "or_profile": "~Jose_Blanchet1;~Jiajin_Li2;~Markus_Pelger1;~Greg_Zanotti1", "aff": "Stanford University;Stanford University;Stanford University;", "aff_domain": "stanford.edu;stanford.edu;stanford.edu;", "position": "Professor;Postdoc;Assistant Professor;", "bibtex": "@inproceedings{\nblanchet2024automatic,\ntitle={Automatic Outlier Rectification via Optimal Transport},\nauthor={Jose Blanchet and Jiajin Li and Markus Pelger and Greg Zanotti},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=uDxhMgjVJB}\n}", "github": "", "reviewers": "iniy;CDCT;qw3a;i67o;fFC9", "pdf_size": 3371005, "rating": "5;5;7;7;8", "confidence": "3;3;4;2;3", "soundness": "3;3;3;3;3", "novelty": "3;3;3;3;4", "presentation": "3;3;3;3;3", "wc_summary": "69;109;49;97;74", "wc_strengths": "23;34;100;52;23", "wc_weaknesses": "15;134;129;121;68", "wc_questions": "3;8;132;54;44", "wc_limitations": "1;13;55;24;19", "wc_review": "111;298;465;348;228", "wc_reply_reviewers": "0;0;0;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;0;0;0", "reply_authors": "0;0;0;0;0", "rating_avg": [ 6.4, 1.2 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 79.6, 21.200000000000003 ], "wc_strengths_avg": [ 46.4, 28.82082580357475 ], "wc_weaknesses_avg": [ 93.4, 45.758496478796154 ], "wc_questions_avg": [ 48.2, 46.330983153824825 ], "wc_limitations_avg": [ 22.4, 18.017769007288333 ], "wc_review_avg": [ 290.0, 118.26918449029739 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4654676136817446202&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "stanford.edu;stanford.edu;stanford.edu;", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "BAKU: An Efficient Transformer for Multi-Task Policy Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93282", "id": "uFXGsiYkkX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=uFXGsiYkkX", "openreview": "https://openreview.net/forum?id=uFXGsiYkkX", "poster": "/media/PosterPDFs/NeurIPS%202024/93282.png?t=1730951186.2229574", "project": "", "author_site": "Siddhant Haldar, Zhuoran Peng, Lerrel Pinto", "tldr": "", "abstract": "Training generalist agents capable of solving diverse tasks is challenging, often requiring large datasets of expert demonstrations. This is particularly problematic in robotics, where each data point requires physical execution of actions in the real world. Thus, there is a pressing need for architectures that can effectively leverage the available training data. In this work, we present BAKU, a simple transformer architecture that enables efficient learning of multi-task robot policies. BAKU builds upon recent advancements in offline imitation learning and meticulously combines observation trunks, action chunking, multi-sensory observations, and action heads to substantially improve upon prior work. Our experiments on 129 simulated tasks across LIBERO, Meta-World suite, and the Deepmind Control suite exhibit an overall 18% absolute improvement over RT-1 and MT-ACT, with a 36% improvement on the harder LIBERO benchmark. On 30 real-world manipulation tasks, given an average of just 17 demonstrations per task, BAKU achieves a 91% success rate. Videos of the robot are best viewed at baku-robot.github.io.", "keywords": "Robot learning;Imitation Learning;Multitask Learning", "primary_area": "robotics", "supplementary_material": "/attachment/462f081e6994a2869793fb010c78c528e425f229.zip", "author": "Siddhant Haldar;Zhuoran Peng;Lerrel Pinto", "authorids": "~Siddhant_Haldar1;~Zhuoran_Peng1;~Lerrel_Pinto1", "gender": "M;M;M", "homepage": "https://siddhanthaldar.github.io/;https://bobbypeng123.github.io/;https://www.lerrelpinto.com/", "dblp": "227/2282;;168/8304", "google_scholar": "-h_bkRgAAAAJ;O7sI_yoAAAAJ;pmVPj94AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Siddhant_Haldar1;~Zhuoran_Peng1;~Lerrel_Pinto1", "aff": "New York University;New York University;New York University", "aff_domain": "nyu.edu;nyu.edu;cs.nyu.edu", "position": "PhD student;Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\nhaldar2024baku,\ntitle={{BAKU}: An Efficient Transformer for Multi-Task Policy Learning},\nauthor={Siddhant Haldar and Zhuoran Peng and Lerrel Pinto},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=uFXGsiYkkX}\n}", "github": "", "reviewers": "FcXE;eFXw;RQGT;76DH;Y1eK", "pdf_size": 7963177, "rating": "5;5;6;6;7", "confidence": "4;4;4;5;4", "soundness": "2;3;3;2;3", "novelty": "2;2;2;2;3", "presentation": "4;3;3;3;4", "wc_summary": "86;61;44;69;51", "wc_strengths": "39;82;20;45;32", "wc_weaknesses": "113;181;121;279;43", "wc_questions": "159;4;136;60;21", "wc_limitations": "91;1;5;4;6", "wc_review": "488;329;326;457;153", "wc_reply_reviewers": "30;21;0;224;43", "wc_reply_authors": "17;52;0;185;9", "reply_reviewers": "1;1;0;1;1", "reply_authors": "2;3;1;2;2", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.2, 0.39999999999999997 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 62.2, 14.634206503941375 ], "wc_strengths_avg": [ 43.6, 20.924626639440906 ], "wc_weaknesses_avg": [ 147.4, 79.02809626961793 ], "wc_questions_avg": [ 76.0, 61.569472955353454 ], "wc_limitations_avg": [ 21.4, 34.840206658399715 ], "wc_review_avg": [ 350.6, 118.58937557808456 ], "wc_reply_reviewers_avg": [ 63.6, 81.41400370943565 ], "wc_reply_authors_avg": [ 52.6, 68.5057661806654 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.13363062095621223, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14192902227127514235&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 4, "email": "nyu.edu;nyu.edu;cs.nyu.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "New York University", "aff_unique_dep": "", "aff_unique_url": "https://www.nyu.edu", "aff_unique_abbr": "NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Pretrained Transformer Efficiently Learns Low-Dimensional Target Functions In-Context", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93281", "id": "uHcG5Y6fdB", "proceeding": "", "pdf": "https://openreview.net/pdf?id=uHcG5Y6fdB", "openreview": "https://openreview.net/forum?id=uHcG5Y6fdB", "poster": "/media/PosterPDFs/NeurIPS%202024/93281.png?t=1733721969.7307549", "project": "", "author_site": "Kazusato Oko, Yujin Song, Taiji Suzuki, Denny Wu", "tldr": "", "abstract": "Transformers can efficiently learn in-context from example demonstrations. Most existing theoretical analyses studied the in-context learning (ICL) ability of transformers for linear function classes, where it is typically shown that the minimizer of the pretraining loss implements one gradient descent step on the least squares objective. However, this simplified linear setting arguably does not demonstrate the statistical efficiency of ICL, since the pretrained transformer does not outperform directly solving linear regression on the test prompt. \nIn this paper, we study ICL of a nonlinear function class via transformer with nonlinear MLP layer: given a class of \\textit{single-index} target functions $f_*(\\boldsymbol{x}) = \\sigma_*(\\langle\\boldsymbol{x},\\boldsymbol{\\beta}\\rangle)$, where the index features $\\boldsymbol{\\beta}\\in\\mathbb{R}^d$ are drawn from a $r$-dimensional subspace, we show that a nonlinear transformer optimized by gradient descent (with a pretraining sample complexity that depends on the \\textit{information exponent} of the link functions $\\sigma_*$) learns $f_*$ in-context with a prompt length that only depends on the dimension of the distribution of target functions $r$; in contrast, any algorithm that directly learns $f_*$ on test prompt yields a statistical complexity that scales with the ambient dimension $d$. Our result highlights the adaptivity of the pretrained transformer to low-dimensional structures of the function class, which enables sample-efficient ICL that outperforms estimators that only have access to the in-context data.", "keywords": "Transformer;in-context learning;feature learning;single-index models", "primary_area": "learning_theory", "supplementary_material": "", "author": "Kazusato Oko;Yujin Song;Taiji Suzuki;Denny Wu", "authorids": "~Kazusato_Oko1;~Yujin_Song1;~Taiji_Suzuki1;~Denny_Wu2", "gender": "M;M;M;M", "homepage": ";;http://ibis.t.u-tokyo.ac.jp/suzuki/;https://dennywu1.github.io/", "dblp": ";33/7664;08/312;", "google_scholar": ";;x8osrBsAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";0009-0002-0095-5002;;", "linkedin": "kazusatooko/;;;", "or_profile": "~Kazusato_Oko1;~Yujin_Song1;~Taiji_Suzuki1;~Denny_Wu2", "aff": "The University of Tokyo;The University of Tokyo;The University of Tokyo;New York University", "aff_domain": "u-tokyo.ac.jp;u-tokyo.ac.jp;tokyo.ac.jp;nyu.edu", "position": "MS student;MS student;Associate Professor;Postdoc", "bibtex": "@inproceedings{\noko2024pretrained,\ntitle={Pretrained Transformer Efficiently Learns Low-Dimensional Target Functions In-Context},\nauthor={Kazusato Oko and Yujin Song and Taiji Suzuki and Denny Wu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=uHcG5Y6fdB}\n}", "github": "", "reviewers": "UaEF;gjHz;TSPc;3HLy", "pdf_size": 739757, "rating": "4;5;5;7", "confidence": "4;4;3;3", "soundness": "3;3;3;3", "novelty": "2;2;2;4", "presentation": "3;3;3;3", "wc_summary": "57;61;62;180", "wc_strengths": "56;45;17;235", "wc_weaknesses": "155;227;106;472", "wc_questions": "8;15;134;529", "wc_limitations": "1;10;1;31", "wc_review": "277;358;320;1447", "wc_reply_reviewers": "76;123;95;293", "wc_reply_authors": "401;191;53;370", "reply_reviewers": "1;2;1;2", "reply_authors": "3;2;2;2", "rating_avg": [ 5.25, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 90.0, 51.995192085422666 ], "wc_strengths_avg": [ 88.25, 85.91092771004163 ], "wc_weaknesses_avg": [ 240.0, 140.6893741545537 ], "wc_questions_avg": [ 171.5, 212.38938297382003 ], "wc_limitations_avg": [ 10.75, 12.255100978776143 ], "wc_review_avg": [ 600.5, 489.56638977772974 ], "wc_reply_reviewers_avg": [ 146.75, 86.07663736461828 ], "wc_reply_authors_avg": [ 253.75, 140.92085544730418 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.6882472016116854, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5682868608684287446&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "u-tokyo.ac.jp;u-tokyo.ac.jp;tokyo.ac.jp;nyu.edu", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "University of Tokyo;New York University", "aff_unique_dep": ";", "aff_unique_url": "https://www.u-tokyo.ac.jp;https://www.nyu.edu", "aff_unique_abbr": "UTokyo;NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "Japan;United States" }, { "title": "Learning from higher-order correlations, efficiently: hypothesis tests, random features, and neural networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93280", "id": "uHml6eyoVF", "proceeding": "", "pdf": "https://openreview.net/pdf?id=uHml6eyoVF", "openreview": "https://openreview.net/forum?id=uHml6eyoVF", "poster": "/media/PosterPDFs/NeurIPS%202024/93280.png?t=1733303989.9738955", "project": "", "author_site": "Eszter Szekely, Lorenzo Bardone, Federica Gerace, Sebastian Goldt", "tldr": "", "abstract": "Neural networks excel at discovering statistical patterns in\nhigh-dimensional data sets. In practice, higher-order cumulants, which quantify\nthe non-Gaussian correlations between three or more variables, are particularly\nimportant for the performance of neural networks. But how efficient are neural\nnetworks at extracting features from higher-order cumulants? We study this\nquestion in the spiked cumulant model, where the statistician needs to recover a\nprivileged direction or \"spike'' from the order-$p\\ge 4$ cumulants\nof $d$-dimensional inputs. \nWe first discuss the fundamental statistical and\ncomputational limits of recovering the spike by analysing the number of\n samples $n$ required to strongly distinguish between inputs from the spiked\ncumulant model and isotropic Gaussian inputs. \nExisting literature established the presence of a wide statistical-to-computational gap in this problem. We deepen this line of work by finding an exact formula for the likelihood ratio norm which proves that statistical\ndistinguishability requires $n\\gtrsim d$ samples, while distinguishing the two\ndistributions in polynomial time requires $n \\gtrsim d^2$ samples for a wide\nclass of algorithms, i.e. those covered by the low-degree conjecture. \nNumerical experiments show that neural networks do indeed learn to distinguish\nthe two distributions with quadratic sample complexity, while ``lazy'' methods\nlike random features are not better than random guessing in this regime. Our\nresults show that neural networks extract information from higher-order\ncorrelations in the spiked cumulant model efficiently, and reveal a large gap in\nthe amount of data required by neural networks and random features to learn from\nhigher-order cumulants.", "keywords": "higher-order cumulant;hypothesis test;neural network;random features;low-degree method", "primary_area": "learning_theory", "supplementary_material": "", "author": "Eszter Szekely;Lorenzo Bardone;Federica Gerace;Sebastian Goldt", "authorids": "~Eszter_Szekely1;~Lorenzo_Bardone1;~Federica_Gerace1;~Sebastian_Goldt1", "gender": "F;M;F;M", "homepage": ";https://www.math.sissa.it/users/lorenzo-bardone;;https://datascience.sissa.it/research-unit/12/theory-of-neural-networks", "dblp": "364/8078;;;234/8941", "google_scholar": "ewRlRFQAAAAJ;;dvDLaPkAAAAJ;R06wsMkAAAAJ", "orcid": "0000-0002-2316-8936;;;", "linkedin": "eszter-sz%C3%A9kely-3b99a7146;;;", "or_profile": "~Eszter_Szekely1;~Lorenzo_Bardone1;~Federica_Gerace1;~Sebastian_Goldt1", "aff": "International Higher School for Advanced Studies Trieste;International Higher School for Advanced Studies Trieste;International Higher School for Advanced Studies Trieste;SISSA", "aff_domain": "sissa.it;sissa.it;sissa.it;sissa.it", "position": "Postdoc;PhD student;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nszekely2024learning,\ntitle={Learning from higher-order correlations, efficiently: hypothesis tests, random features, and neural networks},\nauthor={Eszter Szekely and Lorenzo Bardone and Federica Gerace and Sebastian Goldt},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=uHml6eyoVF}\n}", "github": "", "reviewers": "GzyK;mvai;u8Z4;xzwG", "pdf_size": 2166066, "rating": "5;6;6;7", "confidence": "4;3;4;2", "soundness": "3;3;3;4", "novelty": "3;3;4;4", "presentation": "3;3;3;3", "wc_summary": "93;139;109;114", "wc_strengths": "9;126;100;54", "wc_weaknesses": "32;65;229;22", "wc_questions": "44;214;41;6", "wc_limitations": "6;10;14;4", "wc_review": "184;554;493;200", "wc_reply_reviewers": "0;29;30;72", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 113.75, 16.513252253871737 ], "wc_strengths_avg": [ 72.25, 44.70109059072273 ], "wc_weaknesses_avg": [ 87.0, 83.5134719671024 ], "wc_questions_avg": [ 76.25, 80.92087184404281 ], "wc_limitations_avg": [ 8.5, 3.840572873934304 ], "wc_review_avg": [ 357.75, 167.24289969980788 ], "wc_reply_reviewers_avg": [ 32.75, 25.66490794840301 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8528028654224418, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:iVg8CrzHj4MJ:scholar.google.com/&scioq=Learning+from+higher-order+correlations,+efficiently:+hypothesis+tests,+random+features,+and+neural+networks&hl=en&as_sdt=0,48", "gs_version_total": 0, "email": "sissa.it;sissa.it;sissa.it;sissa.it", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "International Higher School for Advanced Studies;Scuola Internazionale Superiore di Studi Avanzati", "aff_unique_dep": ";", "aff_unique_url": "https://www.sissa.it;https://www.sissa.it", "aff_unique_abbr": "SISSA;SISSA", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Trieste;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Italy" }, { "title": "MoVA: Adapting Mixture of Vision Experts to Multimodal Context", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93279", "id": "uHs6RJFDsg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=uHs6RJFDsg", "openreview": "https://openreview.net/forum?id=uHs6RJFDsg", "poster": "", "project": "", "author_site": "ZHUOFAN ZONG, Bingqi Ma, Dazhong Shen, Guanglu Song, Hao Shao, DONGZHI JIANG, Hongsheng Li, Yu Liu", "tldr": "", "abstract": "As the key component in multimodal large language models (MLLMs), the ability of the visual encoder greatly affects MLLM's understanding on diverse image content. Although some large-scale pretrained vision encoders such as vision encoders in CLIP and DINOv2 have brought promising performance, we found that there is still no single vision encoder that can dominate various image content understanding, e.g., the CLIP vision encoder leads to outstanding results on general image understanding but poor performance on document or chart content. To alleviate the bias of CLIP vision encoder, we first delve into the inherent behavior of different pre-trained vision encoders and then propose the MoVA, a powerful and novel MLLM, adaptively routing and fusing task-specific vision experts with a coarse-to-fine mechanism. In the coarse-grained stage, we design a context-aware expert routing strategy to dynamically select the most suitable vision experts according to the user instruction, input image, and expertise of vision experts. This benefits from the powerful model function understanding ability of the large language model (LLM). In the fine-grained stage, we elaborately conduct the mixture-of-vision-expert adapter (MoV-Adapter) to extract and fuse task-specific knowledge from various experts. This coarse-to-fine paradigm effectively leverages representations from experts based on multimodal context and model expertise, further enhancing the generalization ability. We conduct extensive experiments to evaluate the effectiveness of the proposed approach. Without any bells and whistles, MoVA can achieve significant performance gains over current state-of-the-art methods in a wide range of challenging multimodal benchmarks.", "keywords": "Multimodal large language model;Vision encoder;Mixture-of-expert", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Zhuofan Zong;Bingqi Ma;Dazhong Shen;Guanglu Song;Hao Shao;Dongzhi Jiang;Hongsheng Li;Yu Liu", "authorids": "~Zhuofan_Zong1;~Bingqi_Ma1;~Dazhong_Shen1;~Guanglu_Song2;~Hao_Shao1;~Dongzhi_Jiang1;~Hongsheng_Li3;~Yu_Liu2", "gender": "M;M;M;M;;M;M;M", "homepage": "https://zongzhuofan.github.io/;https://mabingqi.github.io/;http://www.shendazhong.com/;;http://hao-shao.com;https://caraj7.github.io;http://www.ee.cuhk.edu.hk/~hsli;http://liuyu.us", "dblp": "266/4989;309/1989;222/7906;207/4745;66/3089.html;344/2917;27/7402-1;97/2274-15", "google_scholar": "vls0YhoAAAAJ;rcWQWCoAAAAJ;5vSh09YAAAAJ;Bd3v08QAAAAJ;https://scholar.google.com.hk/citations?user=D_ZLR1oAAAAJ;jIR4PAsAAAAJ;BN2Ze-QAAAAJ;", "orcid": ";;0000-0002-3947-4153;;;;;", "linkedin": ";https://www.linkedin.cn/injobs/in/%E5%86%B0%E5%A5%87-%E9%A9%AC-106637154;;;;;;", "or_profile": "~Zhuofan_Zong1;~Bingqi_Ma1;~Dazhong_Shen1;~Guanglu_Song2;~Hao_Shao1;~Dongzhi_Jiang1;~Hongsheng_Li3;~Yu_Liu2", "aff": "The Chinese University of Hong Kong;Sensetime Group Limmited;Shanghai Artificial Intelligence Laboratory;Sensetime;The Chinese University of Hong Kong, The Chinese University of Hong Kong;The Chinese University of Hong Kong;The Chinese University of Hong Kong;SenseTime", "aff_domain": "link.cuhk.edu.hk;sensetime.com;pjlab.org.cn;sensetime.com;ee.cuhk.edu.hk;cuhk.edu.hk;cuhk.edu.hk;sensetime.com", "position": "PhD student;Researcher;Researcher;Computer Vision Researcher;PhD student;PhD student;Associate Professor;Principal Researcher", "bibtex": "@inproceedings{\nzong2024mova,\ntitle={Mo{VA}: Adapting Mixture of Vision Experts to Multimodal Context},\nauthor={Zhuofan Zong and Bingqi Ma and Dazhong Shen and Guanglu Song and Hao Shao and Dongzhi Jiang and Hongsheng Li and Yu Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=uHs6RJFDsg}\n}", "github": "", "reviewers": "dPiw;gqRq;Lntx;FVeU", "pdf_size": 3291689, "rating": "4;5;6;7", "confidence": "4;5;3;4", "soundness": "3;2;3;3", "novelty": "2;2;3;2", "presentation": "3;3;3;4", "wc_summary": "55;47;66;52", "wc_strengths": "22;21;65;47", "wc_weaknesses": "213;119;60;117", "wc_questions": "39;13;8;70", "wc_limitations": "1;7;30;49", "wc_review": "330;207;229;335", "wc_reply_reviewers": "0;214;20;20", "wc_reply_authors": "72;849;11;21", "reply_reviewers": "0;2;1;1", "reply_authors": "2;4;2;2", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 55.0, 6.96419413859206 ], "wc_strengths_avg": [ 38.75, 18.38987493160299 ], "wc_weaknesses_avg": [ 127.25, 54.883399129427104 ], "wc_questions_avg": [ 32.5, 24.642443060703215 ], "wc_limitations_avg": [ 21.75, 19.09679292446771 ], "wc_review_avg": [ 275.25, 57.803005977198104 ], "wc_reply_reviewers_avg": [ 63.5, 87.27399383550635 ], "wc_reply_authors_avg": [ 238.25, 353.37471259273775 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.3162277660168379, "gs_citation": 46, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5457644658168928473&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "link.cuhk.edu.hk;sensetime.com;pjlab.org.cn;sensetime.com;ee.cuhk.edu.hk;cuhk.edu.hk;cuhk.edu.hk;sensetime.com", "author_num": 8, "aff_unique_index": "0;1;2;3;0;0;0;3", "aff_unique_norm": "Chinese University of Hong Kong;SenseTime Group Limited;Shanghai Artificial Intelligence Laboratory;SenseTime", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.sensetime.com;http://www.shailab.org/;https://www.sensetime.com", "aff_unique_abbr": "CUHK;SenseTime;Shanghai AI Lab;SenseTime", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Personalized Instance-based Navigation Toward User-Specific Objects in Realistic Environments", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97461", "id": "uKqn1Flsbp", "proceeding": "", "pdf": "https://openreview.net/pdf?id=uKqn1Flsbp", "openreview": "https://openreview.net/forum?id=uKqn1Flsbp", "poster": "/media/PosterPDFs/NeurIPS%202024/97461.png?t=1734033083.946572", "project": "", "author_site": "Luca Barsellotti, Roberto Bigazzi, Marcella Cornia, Lorenzo Baraldi, Rita Cucchiara", "tldr": "", "abstract": "In the last years, the research interest in visual navigation towards objects in indoor environments has grown significantly. This growth can be attributed to the recent availability of large navigation datasets in photo-realistic simulated environments, like Gibson and Matterport3D. However, the navigation tasks supported by these datasets are often restricted to the objects present in the environment at acquisition time. Also, they fail to account for the realistic scenario in which the target object is a user-specific instance that can be easily confused with similar objects and may be found in multiple locations within the environment. To address these limitations, we propose a new task denominated Personalized Instance-based Navigation (PIN), in which an embodied agent is tasked with locating and reaching a specific personal object by distinguishing it among multiple instances of the same category. The task is accompanied by PInNED, a dedicated new dataset composed of photo-realistic scenes augmented with additional 3D objects. In each episode, the target object is presented to the agent using two modalities: a set of visual reference images on a neutral background and manually annotated textual descriptions. Through comprehensive evaluations and analyses, we showcase the challenges of the PIN task as well as the performance and shortcomings of currently available methods designed for object-driven navigation, considering modular and end-to-end agents.", "keywords": "Vision-Language Navigation;Embodied Navigation;Personalized Detection", "primary_area": "", "supplementary_material": "/attachment/53c4539477c2c58b7d3f70b7190139fb04bfcff5.pdf", "author": "Luca Barsellotti;Roberto Bigazzi;Marcella Cornia;Lorenzo Baraldi;Rita Cucchiara", "authorids": "~Luca_Barsellotti1;~Roberto_Bigazzi1;~Marcella_Cornia1;~Lorenzo_Baraldi1;~Rita_Cucchiara1", "gender": "M;M;F;M;F", "homepage": "https://lucabarsellotti.github.io/;https://robertobigazzi.it;https://aimagelab.ing.unimore.it/imagelab/person.asp?idpersona=90;;https://aimagelab.ing.unimore.it/imagelab/", "dblp": "328/8099;270/1519;186/8196;158/5775;c/RitaCucchiara", "google_scholar": "zYda_LYAAAAJ;OmZlLU8AAAAJ;https://scholar.google.it/citations?user=DzgmSJEAAAAJ;https://scholar.google.it/citations?user=V4RuMvsAAAAJ;OM3sZEoAAAAJ", "orcid": "0000-0001-8845-8523;0000-0002-6457-1860;0000-0001-9640-9385;;0000-0002-2239-283X", "linkedin": ";roberto-bigazzi/;marcella-cornia-b26912ba/;;rita-cucchiara-a4653a13/?originalSubdomain=it", "or_profile": "~Luca_Barsellotti1;~Roberto_Bigazzi1;~Marcella_Cornia1;~Lorenzo_Baraldi1;~Rita_Cucchiara1", "aff": "University of Modena and Reggio Emilia;University of Modena and Reggio Emilia;University of Modena and Reggio Emilia;Universit\u00e0 degli Studi di Modena e Reggio Emilia;Universit\u00e0 di modena e reggio emilia", "aff_domain": "unimore.it;unimore.it;unimore.it;unimore.it;unimore.it", "position": "PhD student;Postdoc;Assistant Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nbarsellotti2024personalized,\ntitle={Personalized Instance-based Navigation Toward User-Specific Objects in Realistic Environments},\nauthor={Luca Barsellotti and Roberto Bigazzi and Marcella Cornia and Lorenzo Baraldi and Rita Cucchiara},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=uKqn1Flsbp}\n}", "github": "", "reviewers": "kDUs;UcJy;6hP2;94wz", "pdf_size": 5266399, "rating": "6;6;7;7", "confidence": "3;5;2;3", "wc_summary_and_contributions": "107;64;35;36", "wc_strengths": "287;77;11;114", "wc_improvement": "353;208;29;39", "wc_limitations": "398;10;7;11", "wc_correctness": "1;9;6;1", "wc_clarity": "7;5;1;4", "wc_relation_to_prior_work": "1;8;8;18", "wc_documentation": "6;8;8;6", "wc_additional_feedback": "1;1;1;1", "wc_review": "1161;390;106;230", "wc_reply_reviewers": "0;649;0;0", "wc_reply_authors": "0;1892;0;0", "reply_reviewers": "0;4;0;0", "reply_authors": "1;5;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.25, 1.0897247358851685 ], "wc_summary_and_contributions_avg": [ 60.5, 29.261749776799064 ], "wc_strengths_avg": [ 122.25, 102.02297535359376 ], "wc_improvement_avg": [ 157.25, 133.53346958721622 ], "wc_limitations_avg": [ 106.5, 168.30404035554227 ], "wc_correctness_avg": [ 4.25, 3.418698582794336 ], "wc_clarity_avg": [ 4.25, 2.165063509461097 ], "wc_relation_to_prior_work_avg": [ 8.75, 6.057020719792859 ], "wc_documentation_avg": [ 7.0, 1.0 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 471.75, 410.47678070750845 ], "wc_reply_reviewers_avg": [ 162.25, 281.02524352805034 ], "wc_reply_authors_avg": [ 473.0, 819.2600319800789 ], "reply_reviewers_avg": [ 1.0, 1.7320508075688772 ], "reply_authors_avg": [ 2.0, 1.7320508075688772 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.6882472016116854, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:QSMvJIihs3QJ:scholar.google.com/&scioq=Personalized+Instance-based+Navigation+Toward+User-Specific+Objects+in+Realistic+Environments&hl=en&as_sdt=0,5", "gs_version_total": 5, "email": "unimore.it;unimore.it;unimore.it;unimore.it;unimore.it", "author_num": 5, "aff_unique_index": "0;0;0;1;2", "aff_unique_norm": "University of Modena and Reggio Emilia;Universit\u00e0 degli Studi di Modena e Reggio Emilia;Universit\u00e0 di Modena e Reggio Emilia", "aff_unique_dep": ";;", "aff_unique_url": "https://www.unimore.it;https://www.unimore.it;https://www.unimore.it", "aff_unique_abbr": ";;Unimore", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Italy" }, { "title": "Disentangled Representation Learning in Non-Markovian Causal Systems", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93278", "id": "uLGyoBn7hm", "proceeding": "", "pdf": "https://openreview.net/pdf?id=uLGyoBn7hm", "openreview": "https://openreview.net/forum?id=uLGyoBn7hm", "poster": "", "project": "", "author_site": "Adam Li, Yushu Pan, Elias Bareinboim", "tldr": "", "abstract": "Considering various data modalities, such as images, videos, and text, humans perform causal reasoning using high-level causal variables, as opposed to operating at the low, pixel level from which the data comes. \nIn practice, most causal reasoning methods assume that the data is described as granular as the underlying causal generative factors, which is often violated in various AI tasks. \nThis mismatch translates into a lack of guarantees in various tasks such as generative modeling, decision-making, fairness, and generalizability, to cite a few. \nIn this paper, we acknowledge this issue and study the problem of causal disentangled representation learning from a combination of data gathered from various heterogeneous domains and assumptions in the form of a latent causal graph. To the best of our knowledge, the proposed work is the first to consider i) non-Markovian causal settings, where there may be unobserved confounding, ii) arbitrary distributions that arise from multiple domains, and iii) a relaxed version of disentanglement. Specifically, we introduce graphical criteria that allow for disentanglement under various conditions. Building on these results, we develop an algorithm that returns a causal disentanglement map, highlighting which latent variables can be disentangled given the combination of data and assumptions. The theory is corroborated by experiments.", "keywords": "causal representation learning;disentanglement;nonlinear ICA", "primary_area": "causal_inference", "supplementary_material": "", "author": "Adam Li;Yushu Pan;Elias Bareinboim", "authorids": "~Adam_Li1;~Yushu_Pan1;~Elias_Bareinboim2", "gender": "M;;M", "homepage": "https://adam2392.github.io;;https://causalai.net", "dblp": "176/3454;243/6652;85/9005", "google_scholar": "KxY17KcAAAAJ;https://scholar.google.com/citations?hl=en;r5U-D7YAAAAJ", "orcid": "0000-0001-8421-365X;;", "linkedin": "adam2392/;yushu-pan-699a37194/;", "or_profile": "~Adam_Li1;~Yushu_Pan1;~Elias_Bareinboim2", "aff": "Columbia University;Columbia University;Columbia University", "aff_domain": "columbia.edu;columbia.edu;columbia.edu", "position": "Postdoc;PhD student;Associate Professor", "bibtex": "@inproceedings{\nli2024disentangled,\ntitle={Disentangled Representation Learning in Non-Markovian Causal Systems},\nauthor={Adam Li and Yushu Pan and Elias Bareinboim},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=uLGyoBn7hm}\n}", "github": "", "reviewers": "fxDa;SjE4;J7y2;XxiF;zg4p", "pdf_size": 3514815, "rating": "5;5;5;6;7", "confidence": "3;3;4;3;2", "soundness": "4;3;3;4;4", "novelty": "2;3;2;3;4", "presentation": "3;3;2;2;3", "wc_summary": "49;61;49;163;173", "wc_strengths": "69;41;82;78;29", "wc_weaknesses": "73;172;345;335;51", "wc_questions": "161;4;159;69;49", "wc_limitations": "2;1;6;113;7", "wc_review": "354;279;641;758;309", "wc_reply_reviewers": "88;91;87;115;88", "wc_reply_authors": "308;841;967;86;62", "reply_reviewers": "1;1;2;1;1", "reply_authors": "3;3;5;2;2", "rating_avg": [ 5.6, 0.8 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.6, 0.4898979485566356 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 99.0, 56.59681969863678 ], "wc_strengths_avg": [ 59.8, 21.027600909281116 ], "wc_weaknesses_avg": [ 195.2, 125.09900079537006 ], "wc_questions_avg": [ 88.4, 62.14048599745579 ], "wc_limitations_avg": [ 25.8, 43.65959230226503 ], "wc_review_avg": [ 468.2, 193.92101484882963 ], "wc_reply_reviewers_avg": [ 93.8, 10.684568311354464 ], "wc_reply_authors_avg": [ 452.8, 380.3511009580491 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 3.0, 1.0954451150103321 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.7905694150420948, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=281848501471906683&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 2, "email": "columbia.edu;columbia.edu;columbia.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Columbia University", "aff_unique_dep": "", "aff_unique_url": "https://www.columbia.edu", "aff_unique_abbr": "Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Partial Structure Discovery is Sufficient for No-regret Learning in Causal Bandits", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93277", "id": "uM3rQ14iex", "proceeding": "", "pdf": "https://openreview.net/pdf?id=uM3rQ14iex", "openreview": "https://openreview.net/forum?id=uM3rQ14iex", "poster": "", "project": "", "author_site": "Muhammad Qasim Elahi, Mahsa Ghasemi, Murat Kocaoglu", "tldr": "", "abstract": "Causal knowledge about the relationships among decision variables and a reward variable in a bandit setting can accelerate the learning of an optimal decision. Current works often assume the causal graph is known, which may not always be available a priori. Motivated by this challenge, we focus on the causal bandit problem in scenarios where the underlying causal graph is unknown and may include latent confounders. While intervention on the parents of the reward node is optimal in the absence of latent confounders, this is not necessarily the case in general. Instead, one must consider a set of possibly optimal arms/interventions, each being a special subset of the ancestors of the reward node, making causal discovery beyond the parents of the reward node essential. For regret minimization, we identify that discovering the full causal structure is unnecessary; however, no existing work provides the necessary and sufficient components of the causal graph. We formally characterize the set of necessary and sufficient latent confounders one needs to detect or learn to ensure that all possibly optimal arms are identified correctly. We also propose a randomized algorithm for learning the causal graph with a limited number of samples, providing a sample complexity guarantee for any desired confidence level. In the causal bandit setup, we propose a two-stage approach. In the first stage, we learn the induced subgraph on ancestors of the reward, along with a necessary and sufficient subset of latent confounders, to construct the set of possibly optimal arms. We show that for our proposed algorithm, the number of intervention samples required to learn the set of possibly optimal arms scales polynomially with respect to the number of nodes. The second phase involves the application of a standard bandit algorithm, such as the UCB algorithm. We also establish a regret bound for our two-phase approach, which is sublinear in the number of rounds.", "keywords": "Causal Bandits;No-regret Learning;Causal Discovery", "primary_area": "bandits", "supplementary_material": "/attachment/0da32fc5f5e8e084de3deb6b8d1acc8f5233bf59.zip", "author": "Muhammad Qasim Elahi;Mahsa Ghasemi;Murat Kocaoglu", "authorids": "~Muhammad_Qasim_Elahi1;~Mahsa_Ghasemi1;~Murat_Kocaoglu1", "gender": "M;F;M", "homepage": "https://www.linkedin.com/in/qasim-elahi-b59948133/;https://mahsaghasemi.github.io/;https://www.muratkocaoglu.com", "dblp": ";206/6477;74/11343", "google_scholar": "M7C8dFAAAAAJ;7KqsRJ8AAAAJ;7N7bzdwAAAAJ", "orcid": ";;", "linkedin": ";;mkocaoglu/", "or_profile": "~Muhammad_Qasim_Elahi1;~Mahsa_Ghasemi1;~Murat_Kocaoglu1", "aff": "Purdue University;Purdue University;Purdue University", "aff_domain": "purdue.edu;purdue.edu;purdue.edu", "position": "PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nelahi2024partial,\ntitle={Partial Structure Discovery is Sufficient for No-regret Learning in Causal Bandits},\nauthor={Muhammad Qasim Elahi and Mahsa Ghasemi and Murat Kocaoglu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=uM3rQ14iex}\n}", "github": "", "reviewers": "VRJg;FQKc;MSdE", "pdf_size": 5859211, "rating": "6;7;7", "confidence": "3;3;3", "soundness": "4;4;4", "novelty": "3;3;4", "presentation": "4;3;3", "wc_summary": "127;77;27", "wc_strengths": "72;63;36", "wc_weaknesses": "77;52;69", "wc_questions": "144;88;94", "wc_limitations": "26;1;10", "wc_review": "446;281;236", "wc_reply_reviewers": "16;96;35", "wc_reply_authors": "37;567;25", "reply_reviewers": "1;2;1", "reply_authors": "2;3;2", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 4.0, 0.0 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 77.0, 40.824829046386306 ], "wc_strengths_avg": [ 57.0, 15.297058540778355 ], "wc_weaknesses_avg": [ 66.0, 10.424330514074594 ], "wc_questions_avg": [ 108.66666666666667, 25.104227178350307 ], "wc_limitations_avg": [ 12.333333333333334, 10.338708279513883 ], "wc_review_avg": [ 321.0, 90.27735042633894 ], "wc_reply_reviewers_avg": [ 49.0, 34.1272129929572 ], "wc_reply_authors_avg": [ 209.66666666666666, 252.7203109280209 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9832276683071981383&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "purdue.edu;purdue.edu;purdue.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Purdue University", "aff_unique_dep": "", "aff_unique_url": "https://www.purdue.edu", "aff_unique_abbr": "Purdue", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Learning Formal Mathematics From Intrinsic Motivation", "status": "Oral", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93276", "id": "uNKlTQ8mBD", "proceeding": "", "pdf": "https://openreview.net/pdf?id=uNKlTQ8mBD", "openreview": "https://openreview.net/forum?id=uNKlTQ8mBD", "poster": "", "project": "", "author_site": "Gabriel Poesia, David Broman, Nick Haber, Noah Goodman", "tldr": "", "abstract": "How did humanity coax mathematics from the aether? We explore the Platonic view that mathematics can be discovered from its axioms---a game of conjecture and proof. We describe an agent that jointly learns to pose challenging problems for itself (conjecturing) and solve them (theorem proving). Given a mathematical domain axiomatized in dependent type theory, we first combine methods for constrained decoding and type-directed synthesis to sample valid conjectures from a language model. Our method guarantees well-formed conjectures by construction, even as we start with a randomly initialized model. We use the same model to represent a policy and value function for guiding proof search. Our agent targets generating hard but provable conjectures --- a moving target, since its own theorem proving ability also improves as it trains. We propose novel methods for hindsight relabeling on proof search trees to significantly improve the agent's sample efficiency in both tasks. Experiments on 3 axiomatic domains (propositional logic, arithmetic and group theory) demonstrate that our agent can bootstrap from only the axioms, self-improving in generating true and challenging conjectures and in finding proofs.", "keywords": "reasoning;reinforcement learning;formal mathematics;logic", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "/attachment/d581092ecc885d5149dd8002d3958c36e11fdef5.zip", "author": "Gabriel Poesia;David Broman;Nick Haber;Noah Goodman", "authorids": "~Gabriel_Poesia1;~David_Broman1;~Nick_Haber1;~Noah_Goodman1", "gender": "M;M;;", "homepage": "https://gpoesia.com;https://people.kth.se/~dbro/;;https://cocolab.stanford.edu/", "dblp": "150/2695.html;13/3318;179/4983;96/1216", "google_scholar": "as5iYn4AAAAJ;https://scholar.google.se/citations?user=Jvtpzw8AAAAJ;euNCoVYAAAAJ;OUpIbcQAAAAJ", "orcid": ";0000-0001-8457-4105;0000-0001-8804-7804;", "linkedin": ";davidbroman/;;", "or_profile": "~Gabriel_Poesia1;~David_Broman1;~Nick_Haber1;~Noah_Goodman1", "aff": "Stanford University;Stanford University;Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu;stanford.edu", "position": "PhD student;Visiting Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\npoesia2024learning,\ntitle={Learning Formal Mathematics From Intrinsic Motivation},\nauthor={Gabriel Poesia and David Broman and Nick Haber and Noah Goodman},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=uNKlTQ8mBD}\n}", "github": "", "reviewers": "8wJE;awrR;XdUe;6DqB;viHR", "pdf_size": 784591, "rating": "5;6;6;6;7", "confidence": "3;4;3;3;4", "soundness": "3;1;2;3;3", "novelty": "2;3;3;3;3", "presentation": "3;2;4;3;3", "wc_summary": "116;164;146;38;74", "wc_strengths": "163;90;119;26;125", "wc_weaknesses": "103;391;380;26;183", "wc_questions": "48;410;160;109;209", "wc_limitations": "1;16;28;7;10", "wc_review": "431;1071;833;206;601", "wc_reply_reviewers": "17;344;198;8;16", "wc_reply_authors": "0;811;69;0;0", "reply_reviewers": "1;2;1;1;1", "reply_authors": "1;2;2;1;1", "rating_avg": [ 6.0, 0.6324555320336759 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 2.4, 0.8 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 107.6, 46.25840464175132 ], "wc_strengths_avg": [ 104.6, 45.66223822810266 ], "wc_weaknesses_avg": [ 216.6, 146.61323269064087 ], "wc_questions_avg": [ 187.2, 123.56925183879686 ], "wc_limitations_avg": [ 12.4, 9.178235124467012 ], "wc_review_avg": [ 628.4, 301.95867266895976 ], "wc_reply_reviewers_avg": [ 116.6, 134.29162297031039 ], "wc_reply_authors_avg": [ 176.0, 318.62266083880473 ], "reply_reviewers_avg": [ 1.2, 0.4000000000000001 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.6454972243679028, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2143951294478136793&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "stanford.edu;stanford.edu;stanford.edu;stanford.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "uNZpvFlsg9", "title": "PiCO: Peer Review in LLMs based on the Consistency Optimization", "track": "main", "status": "Reject", "tldr": "", "abstract": "Existing large language models (LLMs) evaluation methods typically focus on testing the performance on some closed-environment and domain-specific benchmarks with human annotations. In this paper, we explore a novel unsupervised evaluation direction, utilizing peer-review mechanisms to measure LLMs automatically without any human feedback. \nIn this setting, both open-source and closed-source LLMs lie in the same environment, capable of answering unlabeled questions and evaluating each other, where each LLM's response score is jointly determined by other anonymous ones. To obtain the ability hierarchy among these models, we assign each LLM a learnable capability parameter to adjust the final ranking.\nWe formalize it as a constrained optimization problem, intending to maximize the consistency of each LLM's capabilities and scores. \nThe key assumption behind is that high-level LLM can evaluate others' answers more accurately than low-level ones, while higher-level LLM can also achieve higher response scores. \nMoreover, we propose three metrics called PEN, CIN, and LIS to evaluate the gap in aligning human rankings. \nWe perform experiments on multiple datasets with these metrics, validating the effectiveness of the proposed approach.", "keywords": "Large Language Model;Unsupervised Evaluation;Peer Review;Consistency Optimization", "primary_area": "evaluation", "supplementary_material": "", "author": "Kun-Peng Ning;Shuo Yang;Yuyang Liu;Jia-Yu Yao;Zhenhui Liu;Yu Wang;Ming Pang;Li Yuan", "authorids": "~Kun-Peng_Ning1;~Shuo_Yang19;~Yuyang_Liu1;~Jia-Yu_Yao1;~Zhenhui_Liu1;~Yu_Wang43;~Ming_Pang2;~Li_Yuan2", "gender": "M;M;F;M;Non-Binary;F;;M", "homepage": "https://ningkp.github.io;https://github.com/Ramyyang;http://ai.sia.cn/grc/lyy/;https://parnec.nuaa.edu.cn/2020/0623/c12783a205851/page.htm;;https://github.com/rain305f;;https://yuanli2333.github.io/", "dblp": "267/5408;;;;;;59/5011.html;98/4583-7", "google_scholar": "https://scholar.google.com.hk/citations?user=oE8ge7sAAAAJ;;sWSKvYUAAAAJ;;55nAQMsAAAAJ;lzsu-5MAAAAJ;;-5juAR0AAAAJ", "orcid": ";;;;;;;0000-0002-2120-5588", "linkedin": ";;;;;;;", "or_profile": "~Kun-Peng_Ning1;~Shuo_Yang19;~Yuyang_Liu1;~Jia-Yu_Yao1;~Zhenhui_Liu1;~Yu_Wang43;~Ming_Pang2;~Yuan_LI2", "aff": "Peking University;Tianjin University;Shenyang Institute of Automation, Chinese Academy of Sciences/ University of Chinese Academy of Sciences;Peking University;Peking University;Peking University;JD.com;Peking University", "aff_domain": "pku.edu.cn;tju.edu.cn;sia.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn;jd.com;pku.edu.cn", "position": "PhD student;Undergrad student;PhD student;RA;MS student;MS student;Principal Researcher;Assistant Professor", "bibtex": "@misc{\nanonymous2024pico,\ntitle={Pi{CO}: Peer Review in {LLM}s based on the Consistency Optimization},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=uNZpvFlsg9}\n}", "github": "", "project": "", "reviewers": "Ezxz;8Eov;e5pS;mhTC", "site": "https://openreview.net/forum?id=uNZpvFlsg9", "pdf_size": 11501743, "rating": "3;4;6;7", "confidence": "4;4;3;4", "soundness": "2;2;3;3", "novelty": "1;2;3;2", "presentation": "2;3;3;3", "wc_summary": "121;69;60;19", "wc_strengths": "112;77;43;70", "wc_weaknesses": "163;428;261;18", "wc_questions": "5;150;141;41", "wc_limitations": "1;7;1;27", "wc_review": "402;731;506;175", "wc_reply_reviewers": "0;21;23;0", "wc_reply_authors": "148;180;0;75", "reply_reviewers": "0;1;1;0", "reply_authors": "3;4;1;2", "rating_avg": [ 5.0, 1.5811388300841898 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 67.25, 36.306851970392586 ], "wc_strengths_avg": [ 75.5, 24.60182920028509 ], "wc_weaknesses_avg": [ 217.5, 149.14171113407542 ], "wc_questions_avg": [ 84.25, 62.63934466451577 ], "wc_limitations_avg": [ 9.0, 10.677078252031311 ], "wc_review_avg": [ 453.5, 199.9856244833613 ], "wc_reply_reviewers_avg": [ 11.0, 11.022703842524301 ], "wc_reply_authors_avg": [ 100.75, 69.51034095729929 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.36514837167011077, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18195374833987961815&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "aff_unique_index": "0;1;2;0;0;0;3;0", "aff_unique_norm": "Peking University;Tianjin University;Shenyang Institute of Automation;JD.com", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.pku.edu.cn;http://www.tju.edu.cn;http://www.sia.cas.cn;https://www.jd.com", "aff_unique_abbr": "Peking U;TJU;SIA;JD", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Nonconvex Federated Learning on Compact Smooth Submanifolds With Heterogeneous Data", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93275", "id": "uO53206oLJ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=uO53206oLJ", "openreview": "https://openreview.net/forum?id=uO53206oLJ", "poster": "/media/PosterPDFs/NeurIPS%202024/93275.png?t=1730208318.16625", "project": "", "author_site": "Jiaojiao Zhang, Jiang Hu, Anthony Man-Cho So, Mikael Johansson", "tldr": "", "abstract": "Many machine learning tasks, such as principal component analysis and low-rank matrix completion, give rise to manifold optimization problems. Although there is a large body of work studying the design and analysis of algorithms for manifold optimization in the centralized setting, there are currently very few works addressing the federated setting. In this paper, we consider nonconvex federated learning\nover a compact smooth submanifold in the setting of heterogeneous client data. We propose an algorithm that leverages stochastic Riemannian gradients and a manifold projection operator to improve computational efficiency, uses local updates to improve communication efficiency, and avoids client drift. Theoretically, we show that our proposed algorithm converges sub-linearly to a neighborhood of a first-order optimal solution by using a novel analysis that jointly exploits the manifold structure and properties of the loss functions. Numerical experiments demonstrate that our algorithm has significantly smaller computational and communication overhead than existing methods.", "keywords": "Federated learning;manifold optimization;heterogeneous data", "primary_area": "optimization", "supplementary_material": "", "author": "Jiaojiao Zhang;Jiang Hu;Anthony Man-Cho So;Mikael Johansson", "authorids": "~Jiaojiao_Zhang3;~Jiang_Hu2;~Anthony_Man-Cho_So1;~Mikael_Johansson3", "gender": ";M;M;M", "homepage": ";https://hujiangpku.github.io/;http://www1.se.cuhk.edu.hk/~manchoso/;https://people.KTH.se/~mikaelj", "dblp": ";;82/3202;53/764-1", "google_scholar": "Y9TIpzAAAAAJ;WIlpQFwAAAAJ;https://scholar.google.com.hk/citations?user=whi3UisAAAAJ;wQSRT18AAAAJ", "orcid": ";;0000-0003-2588-7851;", "linkedin": ";;;", "or_profile": "~Jiaojiao_Zhang3;~Jiang_Hu2;~Anthony_Man-Cho_So1;~Mikael_Johansson3", "aff": "KTH Royal Institute of Technology;Harvard University;The Chinese University of Hong Kong;KTH Royal Institute of Technology, Stockholm, Sweden", "aff_domain": "kth.se;harvard.edu;cuhk.edu.hk;kth.se", "position": "Postdoc;Postdoc;Full Professor;Full Professor", "bibtex": "@inproceedings{\nzhang2024nonconvex,\ntitle={Nonconvex Federated Learning on Compact Smooth Submanifolds With Heterogeneous Data},\nauthor={Jiaojiao Zhang and Jiang Hu and Anthony Man-Cho So and Mikael Johansson},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=uO53206oLJ}\n}", "github": "", "reviewers": "GhoW;3dV9;qvSZ;q9zL", "pdf_size": 840713, "rating": "3;4;7;7", "confidence": "4;5;3;3", "soundness": "2;2;3;3", "novelty": "3;2;3;3", "presentation": "1;2;3;4", "wc_summary": "33;27;45;65", "wc_strengths": "19;27;31;64", "wc_weaknesses": "156;27;55;326", "wc_questions": "201;162;46;87", "wc_limitations": "4;4;13;7", "wc_review": "413;247;190;549", "wc_reply_reviewers": "212;0;10;143", "wc_reply_authors": "477;0;10;312", "reply_reviewers": "1;0;1;2", "reply_authors": "2;1;2;2", "rating_avg": [ 5.25, 1.7853571071357126 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 42.5, 14.517231140957975 ], "wc_strengths_avg": [ 35.25, 17.151894939043906 ], "wc_weaknesses_avg": [ 141.0, 117.09184429327262 ], "wc_questions_avg": [ 124.0, 60.881031528711794 ], "wc_limitations_avg": [ 7.0, 3.6742346141747673 ], "wc_review_avg": [ 349.75, 141.22566161997614 ], "wc_reply_reviewers_avg": [ 91.25, 89.70333048443631 ], "wc_reply_authors_avg": [ 199.75, 203.33024246284663 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8021806287494232, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7255143909135261055&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "kth.se;harvard.edu;cuhk.edu.hk;kth.se", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "KTH Royal Institute of Technology;Harvard University;Chinese University of Hong Kong", "aff_unique_dep": ";;", "aff_unique_url": "https://www.kth.se;https://www.harvard.edu;https://www.cuhk.edu.hk", "aff_unique_abbr": "KTH;Harvard;CUHK", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Hong Kong SAR;Stockholm", "aff_country_unique_index": "0;1;2;0", "aff_country_unique": "Sweden;United States;China" }, { "title": "Sample Complexity of Algorithm Selection Using Neural Networks and Its Applications to Branch-and-Cut", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93274", "id": "uOvrwVW1yA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=uOvrwVW1yA", "openreview": "https://openreview.net/forum?id=uOvrwVW1yA", "poster": "", "project": "", "author_site": "Hongyu Cheng, Sammy Khalife, Barbara Fiedorowicz, Amitabh Basu", "tldr": "", "abstract": "Data-driven algorithm design is a paradigm that uses statistical and machine learning techniques to select from a class of algorithms for a computational problem an algorithm that has the best expected performance with respect to some (unknown) distribution on the instances of the problem. We build upon recent work in this line of research by considering the setup where, instead of selecting a single algorithm that has the best performance, we allow the possibility of selecting an algorithm based on the instance to be solved, using neural networks. In particular, given a representative sample of instances, we learn a neural network that maps an instance of the problem to the most appropriate algorithm *for that instance*. We formalize this idea and derive rigorous sample complexity bounds for this learning problem, in the spirit of recent work in data-driven algorithm design. We then apply this approach to the problem of making good decisions in the branch-and-cut framework for mixed-integer optimization (e.g., which cut to add?). In other words, the neural network will take as input a mixed-integer optimization instance and output a decision that will result in a small branch-and-cut tree for that instance. Our computational results provide evidence that our particular way of using neural networks for cut selection can make a significant impact in reducing branch-and-cut tree sizes, compared to previous data-driven approaches.", "keywords": "Integer programming;branch-and-cut;branch-and-bound;sample complexity;neural networks;learning theory;data-driven algorithm design", "primary_area": "optimization", "supplementary_material": "", "author": "Hongyu Cheng;Sammy Khalife;Barbara Fiedorowicz;Amitabh Basu", "authorids": "~Hongyu_Cheng1;~Sammy_Khalife1;~Barbara_Fiedorowicz1;~Amitabh_Basu1", "gender": "M;Not Specified;F;M", "homepage": "https://hongyucheng.net/;https://khalife.github.io;https://engineering.jhu.edu/ams/people/graduate-students/;", "dblp": ";230/7960;;", "google_scholar": "vx1h9sUAAAAJ;;;", "orcid": ";0000-0003-3161-7794;;", "linkedin": ";;;", "or_profile": "~Hongyu_Cheng1;~Sammy_Khalife1;~Barbara_Fiedorowicz1;~Amitabh_Basu1", "aff": "Johns Hopkins University;Cornell University;Johns Hopkins University;Johns Hopkins University", "aff_domain": "jhu.edu;cornell.edu;johnshopkins.edu;jhu.edu", "position": "PhD student;Postdoc;PhD student;Assistant Professor", "bibtex": "@inproceedings{\ncheng2024sample,\ntitle={Sample Complexity of Algorithm Selection Using Neural Networks and Its Applications to Branch-and-Cut},\nauthor={Hongyu Cheng and Sammy Khalife and Barbara Fiedorowicz and Amitabh Basu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=uOvrwVW1yA}\n}", "github": "", "reviewers": "FoeV;vvGd;vkrW", "pdf_size": 408538, "rating": "5;7;7", "confidence": "4;2;3", "soundness": "3;3;4", "novelty": "2;3;3", "presentation": "2;4;3", "wc_summary": "99;95;63", "wc_strengths": "55;105;39", "wc_weaknesses": "67;145;122", "wc_questions": "104;21;71", "wc_limitations": "35;12;9", "wc_review": "360;378;304", "wc_reply_reviewers": "22;4;62", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 85.66666666666667, 16.110727964792762 ], "wc_strengths_avg": [ 66.33333333333333, 28.110891523077356 ], "wc_weaknesses_avg": [ 111.33333333333333, 32.72443871006635 ], "wc_questions_avg": [ 65.33333333333333, 34.120700787384514 ], "wc_limitations_avg": [ 18.666666666666668, 11.61416759345623 ], "wc_review_avg": [ 347.3333333333333, 31.510139461590597 ], "wc_reply_reviewers_avg": [ 29.333333333333332, 24.239545283597124 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:lyckuRchR6wJ:scholar.google.com/&scioq=Sample+Complexity+of+Algorithm+Selection+Using+Neural+Networks+and+Its+Applications+to+Branch-and-Cut&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "jhu.edu;cornell.edu;johnshopkins.edu;jhu.edu", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Johns Hopkins University;Cornell University", "aff_unique_dep": ";", "aff_unique_url": "https://www.jhu.edu;https://www.cornell.edu", "aff_unique_abbr": "JHU;Cornell", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Sequential Probability Assignment with Contexts: Minimax Regret, Contextual Shtarkov Sums, and Contextual Normalized Maximum Likelihood", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93273", "id": "uRnTYPkF3V", "proceeding": "", "pdf": "https://openreview.net/pdf?id=uRnTYPkF3V", "openreview": "https://openreview.net/forum?id=uRnTYPkF3V", "poster": "", "project": "", "author_site": "Ziyi Liu, Idan Attias, Dan Roy", "tldr": "", "abstract": "We study the fundamental problem of sequential probability assignment, also known as online learning with logarithmic loss, with respect to an arbitrary, possibly nonparametric hypothesis class. Our goal is to obtain a complexity measure for the hypothesis class that characterizes the minimax regret and to determine a general, minimax optimal algorithm. Notably, the sequential $\\ell_{\\infty}$ entropy, extensively studied in the literature (Rakhlin and Sridharan, 2015, Bilodeau et al., 2020, Wu et al., 2023), was shown to not characterize minimax regret in general. Inspired by the seminal work of Shtarkov (1987)\n and Rakhlin, Sridharan, and Tewari (2010), we introduce a novel complexity measure, the \\emph{contextual Shtarkov sum}, corresponding to the Shtarkov sum after projection onto a multiary context tree, and show that the worst case log contextual Shtarkov sum equals the minimax regret. Using the contextual Shtarkov sum, we derive the minimax optimal strategy, dubbed \\emph{contextual Normalized Maximum Likelihood} (cNML). Our results hold for sequential experts, beyond binary labels, which are settings rarely considered in prior work. \n To illustrate the utility of this characterization, we provide a short proof of a new regret upper bound in terms of sequential $\\ell_{\\infty}$ entropy, unifying and sharpening state-of-the-art bounds by Bilodeau et al. (2020) and Wu et al. (2023).", "keywords": "online learning;log loss;probabilistic forecasting", "primary_area": "online_learning", "supplementary_material": "", "author": "Ziyi Liu;Idan Attias;Daniel M. Roy", "authorids": "~Ziyi_Liu7;~Idan_Attias1;~Daniel_M._Roy1", "gender": ";M;M", "homepage": "https://www.statistics.utoronto.ca/people/directories/graduate-students/ziyi-liu;https://www.idanattias.com;http://danroy.org", "dblp": ";228/6803;04/2068", "google_scholar": ";-L6uUy0AAAAJ;https://scholar.google.ca/citations?user=vA6ZQ_AAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Ziyi_Liu7;~Idan_Attias1;~Daniel_M_Roy1", "aff": "University of Toronto;Tel Aviv University;University of Toronto", "aff_domain": "utoronto.ca;tau.ac.il;utoronto.ca", "position": "PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nliu2024sequential,\ntitle={Sequential Probability Assignment with Contexts: Minimax Regret, Contextual Shtarkov Sums, and Contextual Normalized Maximum Likelihood},\nauthor={Ziyi Liu and Idan Attias and Daniel M. Roy},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=uRnTYPkF3V}\n}", "github": "", "reviewers": "ZwzX;upYt;rqqz;qSjy", "pdf_size": 437339, "rating": "4;6;6;8", "confidence": "2;4;4;5", "soundness": "2;4;4;4", "novelty": "2;2;3;4", "presentation": "3;3;2;4", "wc_summary": "55;97;77;80", "wc_strengths": "55;54;70;191", "wc_weaknesses": "12;130;137;206", "wc_questions": "50;151;27;111", "wc_limitations": "1;42;2;1", "wc_review": "173;474;313;589", "wc_reply_reviewers": "0;246;21;45", "wc_reply_authors": "0;15;18;13", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 3.5, 0.8660254037844386 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 77.25, 14.939461168328663 ], "wc_strengths_avg": [ 92.5, 57.221062555670876 ], "wc_weaknesses_avg": [ 121.25, 69.71863093893913 ], "wc_questions_avg": [ 84.75, 49.0427109772696 ], "wc_limitations_avg": [ 11.5, 17.613914953808536 ], "wc_review_avg": [ 387.25, 157.83278334997453 ], "wc_reply_reviewers_avg": [ 78.0, 98.29292955243525 ], "wc_reply_authors_avg": [ 11.5, 6.87386354243376 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.9733285267845752, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1421663118075210198&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "utoronto.ca;tau.ac.il;utoronto.ca", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Toronto;Tel Aviv University", "aff_unique_dep": ";", "aff_unique_url": "https://www.utoronto.ca;https://www.tau.ac.il", "aff_unique_abbr": "U of T;TAU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Canada;Israel" }, { "title": "SVFT: Parameter-Efficient Fine-Tuning with Singular Vectors", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93272", "id": "uS0PwIBzC0", "proceeding": "", "pdf": "https://openreview.net/pdf?id=uS0PwIBzC0", "openreview": "https://openreview.net/forum?id=uS0PwIBzC0", "poster": "", "project": "", "author_site": "Vijay Chandra Lingam, Atula Neerkaje, Aditya Vavre, Aneesh Shetty, Gautham Krishna Gudur, Joydeep Ghosh, Eunsol Choi, Alex Dimakis, Aleksandar Bojchevski, Sujay Sanghavi", "tldr": "", "abstract": "Popular parameter-efficient fine-tuning (PEFT) methods, such as LoRA and its variants, freeze pre-trained model weights $\\(\\mathbf{W}\\)$ and inject learnable matrices $\\(\\mathbf{\\Delta W}\\)$. These $\\(\\mathbf{\\Delta W}\\)$ matrices are structured for efficient parameterization, often using techniques like low-rank approximations or scaling vectors. However, these methods typically exhibit a performance gap compared to full fine-tuning. While recent PEFT methods have narrowed this gap, they do so at the expense of additional learnable parameters. We propose SVFT, a *simple* approach that structures $\\(\\mathbf{\\Delta W}\\)$ based on the specific weight matrix $\\(\\mathbf{W}\\)$. SVFT updates $\\(\\mathbf{W}\\)$ as a sparse combination $\\(M\\)$ of outer products of its singular vectors, training only the coefficients of these combinations. Crucially, we make additional off-diagonal elements in $M$ learnable, enabling a smooth trade-off between trainable parameters and expressivity\u2014an aspect that distinctly sets our approach apart from previous works leveraging singular values. Extensive experiments on language and vision benchmarks show that SVFT recovers up to **96%** of full fine-tuning performance while training only **0.006 to 0.25%** of parameters, outperforming existing methods that achieve only up to **{85\\%}** performance with **0.03 to 0.8%** of the trainable parameter budget.", "keywords": "Parameter Efficient Fine Tuning;Large Language Models;Deep Learning", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Vijay Lingam;Atula Tejaswi Neerkaje;Aditya Vavre;Aneesh Shetty;Gautham Krishna Gudur;Joydeep Ghosh;Eunsol Choi;Alex Dimakis;Aleksandar Bojchevski;sujay sanghavi", "authorids": "~Vijay_Lingam1;~Atula_Tejaswi_Neerkaje1;~Aditya_Vavre1;~Aneesh_Shetty1;~Gautham_Krishna_Gudur1;~Joydeep_Ghosh1;~Eunsol_Choi1;~Alex_Dimakis1;~Aleksandar_Bojchevski1;~sujay_sanghavi1", "gender": "M;M;M;M;M;M;;M;M;M", "homepage": ";https://atutej.github.io/;;https://www.cs.utexas.edu/~aneeshks/;https://gauthamkrishna-g.github.io/;http://ideal.ece.utexas.edu/ghosh/;https://eunsol.github.io/;https://people.eecs.berkeley.edu/~alexdimakis/;https://abojchevski.github.io/;https://sites.utexas.edu/sanghavi", "dblp": "219/1559.html;315/4311;;;;51/2272;116/2765;19/5000.html;203/8114;69/4911.html", "google_scholar": "FPOCruQAAAAJ;https://scholar.google.com/citations?hl=en;zBOPtrEAAAAJ;IFPhHjAAAAAJ;X5ThCEAAAAAJ;;6wulN88AAAAJ;JSFmVQEAAAAJ;https://scholar.google.de/citations?user=F1APiN4AAAAJ;O-DazBUAAAAJ", "orcid": ";;;;;;0000-0003-3607-9104;;;", "linkedin": ";atula-tejaswi-neerkaje-4b989b157;adityavavre/;aneeshkshetty;gauthamkrishna-g/;;;alex-dimakis-b1b20320/;;", "or_profile": "~Vijay_Lingam1;~Atula_Tejaswi_Neerkaje1;~Aditya_Vavre1;~Aneesh_Shetty1;~Gautham_Krishna_Gudur1;~Joydeep_Ghosh1;~Eunsol_Choi1;~Alex_Dimakis1;~Aleksandar_Bojchevski1;~sujay_sanghavi1", "aff": "University of Texas at Austin;University of Texas at Austin;;University of Texas at Austin;University of Texas at Austin;University of Texas, Austin;University of Texas, Austin;University of Texas at Austin;University of Cologne;University of Texas, Austin", "aff_domain": "utexas.edu;utexas.edu;;cs.utexas.edu;utexas.edu;utexas.edu;cs.utexas.edu;utexas.edu;uni-koeln.de;utexas.edu", "position": "MS student;MS student;;MS student;PhD student;Full Professor;Assistant Professor;Full Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nlingam2024svft,\ntitle={{SVFT}: Parameter-Efficient Fine-Tuning with Singular Vectors},\nauthor={Vijay Lingam and Atula Tejaswi Neerkaje and Aditya Vavre and Aneesh Shetty and Gautham Krishna Gudur and Joydeep Ghosh and Eunsol Choi and Alex Dimakis and Aleksandar Bojchevski and sujay sanghavi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=uS0PwIBzC0}\n}", "github": "", "reviewers": "x5z5;BEJi;DEin;Cyjn", "pdf_size": 877329, "rating": "3;5;6;7", "confidence": "5;5;4;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "58;57;121;60", "wc_strengths": "11;29;128;54", "wc_weaknesses": "132;119;1267;119", "wc_questions": "42;3;148;2", "wc_limitations": "1;10;5;1", "wc_review": "244;218;1669;236", "wc_reply_reviewers": "361;51;41;47", "wc_reply_authors": "1280;0;34;21", "reply_reviewers": "2;1;1;1", "reply_authors": "4;1;2;2", "rating_avg": [ 5.25, 1.479019945774904 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 74.0, 27.15695122800054 ], "wc_strengths_avg": [ 55.5, 44.556144357428416 ], "wc_weaknesses_avg": [ 409.25, 495.25063099404525 ], "wc_questions_avg": [ 48.75, 59.52887954598172 ], "wc_limitations_avg": [ 4.25, 3.6996621467371855 ], "wc_review_avg": [ 591.75, 622.021854519598 ], "wc_reply_reviewers_avg": [ 125.0, 136.3011371926148 ], "wc_reply_authors_avg": [ 333.75, 546.4523652616026 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.8451542547285166, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2725522281591158971&as_sdt=40000005&sciodt=0,22&hl=en", "gs_version_total": 6, "email": "utexas.edu;utexas.edu;;cs.utexas.edu;utexas.edu;utexas.edu;cs.utexas.edu;utexas.edu;uni-koeln.de;utexas.edu", "author_num": 10, "aff_unique_index": "0;0;0;0;0;0;0;1;0", "aff_unique_norm": "University of Texas at Austin;University of Cologne", "aff_unique_dep": ";", "aff_unique_url": "https://www.utexas.edu;https://www.uni-koeln.de/", "aff_unique_abbr": "UT Austin;UC", "aff_campus_unique_index": "0;0;0;0;0;0;0;0", "aff_campus_unique": "Austin;", "aff_country_unique_index": "0;0;0;0;0;0;0;1;0", "aff_country_unique": "United States;Germany" }, { "id": "uS9RZH6K65", "title": "DENOISER: Rethinking the Robustness for Open-Vocabulary Action Recognition", "track": "main", "status": "Reject", "tldr": "", "abstract": "As one of the fundamental video tasks in computer vision, Open-Vocabulary Action Recognition (OVAR) has recently gained increasing attention, with the development of vision-language pre-trainings. To enable open-vocabulary generalization, existing methods formulate vanilla OVAR to evaluate the embedding similarity between visual samples and text descriptions. However, one crucial issue is completely ignored: the text descriptions given by users may be noisy, e.g., misspellings and typos, limiting the real-world practicality. To fill the research gap, this paper analyzes the noise rate/type in text descriptions by full statistics of manual spelling; then reveals the poor robustness of existing methods; and finally rethinks to study a practical task: noisy OVAR. One novel DENOISER framework, covering two parts: generation and discrimination, is further proposed for solution. Concretely, the generative part denoises noisy text descriptions via a decoding process, i.e., proposes text candidates, then utilizes inter-modal and intra-modal information to vote for the best. At the discriminative part, we use vanilla OVAR models to assign visual samples to text descriptions, injecting more semantics. For optimization, we alternately iterate between generative-discriminative parts for progressive refinements. The denoised text descriptions help OVAR models classify visual samples more accurately; in return, assigned visual samples help better denoising. We carry out extensive experiments to show our superior robustness, and tho rough ablations to dissect the effectiveness of each component.", "keywords": "Open-Vocabulary Action Recognition;Multi-Modal Pre-training;Multi-Modal Robust Learning", "primary_area": "machine_vision", "supplementary_material": "/attachment/ff61099c6fc915926fd29fa099364181762c2188.zip", "author": "Haozhe Cheng;Chen Ju;Haicheng Wang;Jinxiang Liu;Mengting Chen;Xiaoyun Zhang;Yanfeng Wang", "authorids": "~Haozhe_Cheng3;~Chen_Ju1;~Haicheng_Wang2;~Jinxiang_Liu1;~Mengting_Chen1;~Xiaoyun_Zhang1;~Yanfeng_Wang1", "gender": ";M;M;M;;F;M", "homepage": ";https://voide1220.github.io/;;https://jinxiang-liu.github.io/;;https://mediabrain.sjtu.edu.cn/xiaoyun-zhang/;https://cmic.sjtu.edu.cn/wangyanfeng/", "dblp": ";221/1300;150/4188;70/6217;;;55/5407-1.html", "google_scholar": ";https://scholar.google.com.hk/citations?user=b2jNn10AAAAJ;;wSRKaWIAAAAJ;;hQm9oqwAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";0000-0001-8472-7677;0000-0002-5197-6834;0000-0003-2583-8881;;0000-0001-7680-4062;0000-0002-3196-2347", "linkedin": "https://linkedin.com/in/chenghaozhe;;;;;;", "or_profile": "~Haozhe_Cheng3;~Chen_Ju1;~Haicheng_Wang2;~Jinxiang_Liu1;~Mengting_Chen1;~Xiaoyun_Zhang1;~Yanfeng_Wang1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;T\u00e9l\u00e9com Paris;Shanghai Jiaotong University;;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu;telecom-paris.fr;sjtu.edu.cn;;sjtu.edu.cn;sjtu.edu.cn", "position": "MS student;PhD student;MS student;PhD student;;Full Professor;Full Professor", "bibtex": "@misc{\nanonymous2024denoiser,\ntitle={{DENOISER}: Rethinking the Robustness for Open-Vocabulary Action Recognition},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=uS9RZH6K65}\n}", "github": "", "project": "", "reviewers": "eAxG;5bE7;YJ4Z;bX9i", "site": "https://openreview.net/forum?id=uS9RZH6K65", "pdf_size": 7308777, "rating": "3;4;4;5", "confidence": "5;5;3;4", "soundness": "3;2;2;3", "novelty": "2;2;2;2", "presentation": "3;3;3;2", "wc_summary": "64;117;53;59", "wc_strengths": "28;75;55;51", "wc_weaknesses": "177;202;76;227", "wc_questions": "2;5;116;5", "wc_limitations": "11;5;20;12", "wc_review": "282;404;320;354", "wc_reply_reviewers": "181;374;0;35", "wc_reply_authors": "48;500;95;134", "reply_reviewers": "1;3;0;1", "reply_authors": "2;4;3;3", "rating_avg": [ 4.0, 0.7071067811865476 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 73.25, 25.557533136044253 ], "wc_strengths_avg": [ 52.25, 16.69393602479655 ], "wc_weaknesses_avg": [ 170.5, 57.35198340075084 ], "wc_questions_avg": [ 32.0, 48.51288488638869 ], "wc_limitations_avg": [ 12.0, 5.338539126015656 ], "wc_review_avg": [ 340.0, 44.87761134463375 ], "wc_reply_reviewers_avg": [ 147.5, 147.34059182723544 ], "wc_reply_authors_avg": [ 194.25, 179.1317601655273 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 3.0, 0.7071067811865476 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.4264014327112209, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6897136082934061292&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;0;1;0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University;T\u00e9l\u00e9com Paris", "aff_unique_dep": ";", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.telecom-paris.fr", "aff_unique_abbr": "SJTU;T\u00e9l\u00e9com Paris", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "China;France" }, { "title": "Nonlocal Attention Operator: Materializing Hidden Knowledge Towards Interpretable Physics Discovery", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93271", "id": "uSKzEaj9zJ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=uSKzEaj9zJ", "openreview": "https://openreview.net/forum?id=uSKzEaj9zJ", "poster": "", "project": "", "author_site": "Yue Yu, Ning Liu, Fei Lu, Tian Gao, Siavash Jafarzadeh, Stewart A Silling", "tldr": "", "abstract": "Despite recent popularity of attention-based neural architectures in core AI fields like natural language processing (NLP) and computer vision (CV), their potential in modeling complex physical systems remains under-explored. Learning problems in physical systems are often characterized as discovering operators that map between function spaces based on a few instances of function pairs. This task frequently presents a severely ill-posed PDE inverse problem. In this work, we propose a novel neural operator architecture based on the attention mechanism, which we coin Nonlocal Attention Operator (NAO), and explore its capability towards developing a foundation physical model. In particular, we show that the attention mechanism is equivalent to a double integral operator that enables nonlocal interactions among spatial tokens, with a data-dependent kernel characterizing the inverse mapping from data to the hidden parameter field of the underlying operator. As such, the attention mechanism extracts global prior information from training data generated by multiple systems, and suggests the exploratory space in the form of a nonlinear kernel map. Consequently, NAO can address ill-posedness and rank deficiency in inverse PDE problems by encoding regularization and achieving generalizability. Lastly, we empirically demonstrate the advantages of NAO over baseline neural models in terms of the generalizability to unseen data resolutions and system states. Our work not only suggests a novel neural operator architecture for learning an interpretable foundation model of physical systems, but also offers a new perspective towards understanding the attention mechanism.", "keywords": "Foundation Model;Neural Operators;Inverse PDE Problems;Physical Modeling", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "", "author": "Yue Yu;Ning Liu;Fei Lu;Tian Gao;Siavash Jafarzadeh;Stewart A Silling", "authorids": "~Yue_Yu3;~Ning_Liu6;~Fei_Lu2;~Tian_Gao1;~Siavash_Jafarzadeh1;~Stewart_A_Silling1", "gender": ";;Not Specified;;M;", "homepage": ";;https://math.jhu.edu/~feilu/;https://sites.google.com/view/tiangao/home;;https://www.sandia.gov/~sasilli", "dblp": ";;;;;", "google_scholar": ";;bVyXEsQAAAAJ;5rweipAAAAAJ;BDo5e7sAAAAJ;", "orcid": ";;;0000-0002-0337-6682;;", "linkedin": ";;;;siavash-jafarzadeh/;", "or_profile": "~Yue_Yu3;~Ning_Liu6;~Fei_Lu2;~Tian_Gao1;~Siavash_Jafarzadeh1;~Stewart_A_Silling1", "aff": ";;Johns Hopkins University;Rensselaer Polytechnic Institute;Lehigh University;Sandia National Laboratories", "aff_domain": ";;jhu.edu;rpi.edu;lehigh.edu;sandia.gov", "position": ";;Associate Professor;PhD student;Assistant Professor;Researcher", "bibtex": "@inproceedings{\nyu2024nonlocal,\ntitle={Nonlocal Attention Operator: Materializing Hidden Knowledge Towards Interpretable Physics Discovery},\nauthor={Yue Yu and Ning Liu and Fei Lu and Tian Gao and Siavash Jafarzadeh and Stewart A Silling},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=uSKzEaj9zJ}\n}", "github": "", "reviewers": "BrBK;eNuS;J2Mv;eXH4", "pdf_size": 809566, "rating": "7;7;7;7", "confidence": "3;2;3;3", "soundness": "2;3;3;3", "novelty": "3;3;3;3", "presentation": "2;3;3;3", "wc_summary": "110;79;110;138", "wc_strengths": "22;28;324;122", "wc_weaknesses": "107;12;110;53", "wc_questions": "126;32;107;34", "wc_limitations": "5;32;43;1", "wc_review": "370;183;694;348", "wc_reply_reviewers": "14;0;0;0", "wc_reply_authors": "32;0;32;0", "reply_reviewers": "1;0;0;0", "reply_authors": "2;1;2;1", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 109.25, 20.873128658636684 ], "wc_strengths_avg": [ 124.0, 122.09013064126027 ], "wc_weaknesses_avg": [ 70.5, 40.68476373287671 ], "wc_questions_avg": [ 74.75, 42.29287765097097 ], "wc_limitations_avg": [ 20.25, 17.73943347460679 ], "wc_review_avg": [ 398.75, 185.15044558412492 ], "wc_reply_reviewers_avg": [ 3.5, 6.06217782649107 ], "wc_reply_authors_avg": [ 16.0, 16.0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16803501314763166648&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": ";;jhu.edu;rpi.edu;lehigh.edu;sandia.gov", "author_num": 6, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Johns Hopkins University;Rensselaer Polytechnic Institute;Lehigh University;Sandia National Laboratories", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.jhu.edu;https://www.rpi.edu;https://www.lehigh.edu;https://www.sandia.gov", "aff_unique_abbr": "JHU;RPI;Lehigh;SNL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "PACE: Pacing Operator Learning to Accurate Optical Field Simulation for Complicated Photonic Devices", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93270", "id": "uXJlgkWdcI", "proceeding": "", "pdf": "https://openreview.net/pdf?id=uXJlgkWdcI", "openreview": "https://openreview.net/forum?id=uXJlgkWdcI", "poster": "/media/PosterPDFs/NeurIPS%202024/93270.png?t=1731747687.0544758", "project": "", "author_site": "Hanqing Zhu, Wenyan Cong, Guojin Chen, Shupeng Ning, Ray Chen, Jiaqi Gu, David Z. Pan", "tldr": "", "abstract": "Electromagnetic field simulation is central to designing, optimizing, and validating photonic devices and circuits. \nHowever, costly computation associated with numerical simulation poses a significant bottleneck, hindering scalability and turnaround time in the photonic circuit design process.\nNeural operators offer a promising alternative, but existing SOTA approaches, Neurolight, struggle with predicting high-fidelity fields for real-world complicated photonic devices, with the best reported 0.38 normalized mean absolute error in Neurolight.\nThe interplays of highly complex light-matter interaction, e.g., scattering and resonance, sensitivity to local structure details, non-uniform learning complexity for full-domain simulation, and rich frequency information, contribute to the failure of existing neural PDE solvers.\nIn this work, we boost the prediction fidelity to an unprecedented level for simulating complex photonic devices with a novel operator design driven by the above challenges.\nWe propose a novel cross-axis factorized PACE operator with a strong long-distance modeling capacity to connect the full-domain complex field pattern with local device structures.\nInspired by human learning, we further divide and conquer the simulation task for extremely hard cases into two progressively easy tasks, with a first-stage model learning an initial solution refined by a second model.\nOn various complicated photonic device benchmarks, we demonstrate one sole PACE model is capable of achieving 73% lower error with 50% fewer parameters compared with various recent ML for PDE solvers.\nThe two-stage setup further advances high-fidelity simulation for even more intricate cases.\nIn terms of runtime, \nPACE demonstrates 154-577x and 11.8-12x simulation speedup over numerical solver using scipy or highly-optimized pardiso solver, respectively.\nWe open-sourced the code and *complicated* optical device dataset at [PACE-Light](https://github.com/zhuhanqing/PACE-Light).", "keywords": "AI for Scienece;Optical simulation;Neural opeartor;AI for PDE", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "", "author": "Hanqing Zhu;Wenyan Cong;Guojin Chen;Shupeng Ning;Ray Chen;Jiaqi Gu;David Z. Pan", "authorids": "~Hanqing_Zhu1;~Wenyan_Cong1;~Guojin_Chen1;~Shupeng_Ning1;~Ray_Chen2;~Jiaqi_Gu3;~David_Z._Pan1", "gender": "M;F;M;M;M;M;M", "homepage": "https://zhuhanqing.github.io/;https://wenyancong.com/;https://gjchen.me;;http://www.mrc.utexas.edu/people/faculty/ray-chen;https://scopex-asu.github.io;http://users.ece.utexas.edu/~dpan/", "dblp": "164/8690;247/9471;53/8542;;53/4318.html;;p/DavidZhigangPan.html", "google_scholar": "myMcrNEAAAAJ;uQV5aCsAAAAJ;842nSvkAAAAJ;TnL76GIAAAAJ;GnV8UogAAAAJ;FeIV12MAAAAJ;3aLlroEAAAAJ", "orcid": ";;;0000-0002-5780-9711;0000-0002-9181-4266;;0000-0002-5705-2501", "linkedin": ";;;;ray-chen-17a0041/;;davidzpan/", "or_profile": "~Hanqing_Zhu1;~Wenyan_Cong1;~Guojin_Chen1;~Shupeng_Ning1;~Ray_Chen2;~Jiaqi_Gu3;~David_Z._Pan1", "aff": "University of Texas, Austin;University of Texas at Austin;Department of Computer Science and Engineering, The Chinese University of Hong Kong;University of Texas at Austin;University of Texas, Austin;Arizona State University;University of Texas, Austin", "aff_domain": "utexas.edu;utexas.edu;cse.cuhk.edu.hk;utexas.edu;utexas.edu;asu.edu;utexas.edu", "position": "PhD student;PhD student;PhD student;PhD student;Full Professor;Assistant Professor;Professor", "bibtex": "@inproceedings{\nzhu2024pace,\ntitle={{PACE}: Pacing Operator Learning to Accurate Optical Field Simulation for Complicated Photonic Devices},\nauthor={Hanqing Zhu and Wenyan Cong and Guojin Chen and Shupeng Ning and Ray Chen and Jiaqi Gu and David Z. Pan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=uXJlgkWdcI}\n}", "github": "", "reviewers": "weV6;9WdN;ZkVJ;TfAG", "pdf_size": 21367711, "rating": "6;6;6;7", "confidence": "3;4;3;4", "soundness": "3;4;3;4", "novelty": "2;4;3;3", "presentation": "2;3;2;3", "wc_summary": "28;68;80;114", "wc_strengths": "37;86;30;191", "wc_weaknesses": "39;133;48;650", "wc_questions": "253;72;16;306", "wc_limitations": "23;1;1;85", "wc_review": "380;360;175;1346", "wc_reply_reviewers": "6;0;0;272", "wc_reply_authors": "120;510;235;539", "reply_reviewers": "1;0;0;1", "reply_authors": "2;4;3;5", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 72.5, 30.736785778607366 ], "wc_strengths_avg": [ 86.0, 64.34671708797582 ], "wc_weaknesses_avg": [ 217.5, 252.3831412753237 ], "wc_questions_avg": [ 161.75, 120.86433510345391 ], "wc_limitations_avg": [ 27.5, 34.39113257803529 ], "wc_review_avg": [ 565.25, 457.79655688962976 ], "wc_reply_reviewers_avg": [ 69.5, 116.93908670756754 ], "wc_reply_authors_avg": [ 351.0, 178.4950979719051 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 3.5, 1.118033988749895 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10448956516772333746&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "utexas.edu;utexas.edu;cse.cuhk.edu.hk;utexas.edu;utexas.edu;asu.edu;utexas.edu", "author_num": 7, "aff_unique_index": "0;0;1;0;0;2;0", "aff_unique_norm": "University of Texas at Austin;Chinese University of Hong Kong;Arizona State University", "aff_unique_dep": ";Department of Computer Science and Engineering;", "aff_unique_url": "https://www.utexas.edu;https://www.cuhk.edu.hk;https://www.asu.edu", "aff_unique_abbr": "UT Austin;CUHK;ASU", "aff_campus_unique_index": "0;0;1;0;0;0", "aff_campus_unique": "Austin;Hong Kong SAR;", "aff_country_unique_index": "0;0;1;0;0;0;0", "aff_country_unique": "United States;China" }, { "title": "Hamiltonian Monte Carlo Inference of Marginalized Linear Mixed-Effects Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93269", "id": "uXuObobJHO", "proceeding": "", "pdf": "https://openreview.net/pdf?id=uXuObobJHO", "openreview": "https://openreview.net/forum?id=uXuObobJHO", "poster": "/media/PosterPDFs/NeurIPS%202024/93269.png?t=1733417015.9231067", "project": "", "author_site": "Jinlin Lai, Daniel Sheldon, Justin Domke", "tldr": "", "abstract": "Bayesian reasoning in linear mixed-effects models (LMMs) is challenging and often requires advanced sampling techniques like Markov chain Monte Carlo (MCMC).\nA common approach is to write the model in a probabilistic programming language and then sample via Hamiltonian Monte Carlo (HMC).\nHowever, there are many ways a user can transform a model that make inference more or less efficient.\nIn particular, marginalizing some variables can greatly improve inference but is difficult for users to do manually.\nWe develop an algorithm to easily marginalize random effects in LMMs.\nA naive approach introduces cubic time operations within an inference algorithm like HMC, but we reduce the running time to linear using fast linear algebra techniques.\nWe show that marginalization is always beneficial when applicable and highlight improvements in various models, especially ones from cognitive sciences.", "keywords": "Hamiltonian Monte Carlo;Bayesian inference;hierarchical models", "primary_area": "probabilistic_methods", "supplementary_material": "/attachment/18e3eaec43ff39372f5f5b4bcab3ff932a73deb9.zip", "author": "Jinlin Lai;Justin Domke;Daniel Sheldon", "authorids": "~Jinlin_Lai2;~Justin_Domke1;~Daniel_Sheldon1", "gender": "M;Unspecified;M", "homepage": "https://lll6924.github.io/;https://people.cs.umass.edu/~domke/;https://people.cs.umass.edu/~sheldon/", "dblp": ";39/5186;58/766", "google_scholar": "tCc0cGwAAAAJ;;https://scholar.google.com.tw/citations?user=P1bHFuoAAAAJ", "orcid": ";;", "linkedin": "jinlin-lai/;;", "or_profile": "~Jinlin_Lai2;~Justin_Domke1;~Dan_Sheldon1", "aff": "Flatiron Institute;University of Massachusetts at Amherst;University of Massachusetts, Amherst", "aff_domain": "flatironinstitute.org;umass.edu;umass.edu", "position": "Intern;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nlai2024hamiltonian,\ntitle={Hamiltonian Monte Carlo Inference of Marginalized Linear Mixed-Effects Models},\nauthor={Jinlin Lai and Daniel Sheldon and Justin Domke},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=uXuObobJHO}\n}", "github": "", "reviewers": "UJHx;aGrs;jeEo;u9Xy", "pdf_size": 980285, "rating": "5;6;6;7", "confidence": "4;3;4;4", "soundness": "3;4;3;3", "novelty": "1;3;4;3", "presentation": "2;3;3;3", "wc_summary": "85;157;75;146", "wc_strengths": "95;76;112;62", "wc_weaknesses": "58;290;245;34", "wc_questions": "6;40;80;34", "wc_limitations": "23;7;57;1", "wc_review": "267;570;569;277", "wc_reply_reviewers": "17;5;82;5", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 1.0897247358851685 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 115.75, 36.134298111351214 ], "wc_strengths_avg": [ 86.25, 18.925842121290138 ], "wc_weaknesses_avg": [ 156.75, 112.20823276391087 ], "wc_questions_avg": [ 40.0, 26.419689627245813 ], "wc_limitations_avg": [ 22.0, 21.748563170931547 ], "wc_review_avg": [ 420.75, 148.79243092308155 ], "wc_reply_reviewers_avg": [ 27.25, 31.98730216820418 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:EOrXi80npUcJ:scholar.google.com/&scioq=Hamiltonian+Monte+Carlo+Inference+of+Marginalized+Linear+Mixed-Effects+Models&hl=en&as_sdt=0,44", "gs_version_total": 5, "email": "flatironinstitute.org;umass.edu;umass.edu", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "Flatiron Institute;University of Massachusetts Amherst", "aff_unique_dep": ";", "aff_unique_url": "https://flatironinstitute.org;https://www.umass.edu", "aff_unique_abbr": "Flatiron;UMass Amherst", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Amherst", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Motif-oriented influence maximization for viral marketing in large-scale social networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93268", "id": "uYZTzcHaQB", "proceeding": "", "pdf": "https://openreview.net/pdf?id=uYZTzcHaQB", "openreview": "https://openreview.net/forum?id=uYZTzcHaQB", "poster": "/media/PosterPDFs/NeurIPS%202024/93268.png?t=1730188818.88421", "project": "", "author_site": "Mingyang Zhou, Weiji Cao, Hao Liao, Rui Mao", "tldr": "", "abstract": "The influence maximization (IM) problem aims to identify a budgeted set of nodes with the highest potential to influence the largest number of users in a cascade model, a key challenge in viral marketing. Traditional \\emph{IM} approaches consider each user/node independently as a potential target customer. However, in many scenarios, the target customers comprise motifs, where activating only one or a few users within a motif is insufficient for effective viral marketing, which, nevertheless, receives little attention. For instance, if a motif of three friends planning to dine together, targeting all three simultaneously is crucial for a restaurant advertisement to succeed.\nIn this paper, we address the motif-oriented influence maximization problem under the linear threshold model. We prove that the motif-oriented IM problem is NP-hard and that the influence function is neither supermodular nor submodular, in contrast to the classical \\emph{IM} setting.\nTo simplify the problem, we establish the submodular upper and lower bounds for the influence function. By leveraging the submodular property, we propose a natural greedy strategy that simultaneously maximizes both bounds. Our algorithm has an approximation ratio of $\\tau\\cdot (1-1/e-\\varepsilon)$ and a near-linear time complexity of $O((k+l)(m+\\eta)\\log \\eta/\\varepsilon^2)$.\nExperimental results on diverse datasets confirm the effectiveness of our approach in motif maximization.", "keywords": "influence maximization;influential node;viral marketing", "primary_area": "optimization", "supplementary_material": "", "author": "Mingyang Zhou;Weiji Cao;Hao Liao;Rui Mao", "authorids": "~Mingyang_Zhou2;~Weiji_Cao1;~Hao_Liao1;~Rui_Mao2", "gender": "M;;M;M", "homepage": ";https://github.com/mingzihao1;https://csse.szu.edu.cn/pages/user/index?id=542;https://www.sics.ac.cn/mao/szu/eng/", "dblp": "195/5899-1;;74/1078;51/5793", "google_scholar": ";;Tu5ZuREAAAAJ;", "orcid": ";;;", "linkedin": ";;hao-liao-30635127;", "or_profile": "~Mingyang_Zhou2;~Weiji_Cao1;~Hao_Liao1;~Rui_Mao2", "aff": "Shenzhen University;Shenzhen University;Shenzhen University;Shenzhen University", "aff_domain": "szu.edu.cn;szu.edu.cn;szu.edu.cn;szu.edu.cn", "position": "Associate Professor;Undergrad student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nzhou2024motiforiented,\ntitle={Motif-oriented influence maximization for viral marketing in large-scale social networks},\nauthor={Mingyang Zhou and Weiji Cao and Hao Liao and Rui Mao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=uYZTzcHaQB}\n}", "github": "", "reviewers": "hHht;zfSx;3uSV;MwLB", "pdf_size": 469315, "rating": "5;5;6;7", "confidence": "3;4;4;4", "soundness": "3;3;3;3", "novelty": "2;3;2;3", "presentation": "2;3;3;3", "wc_summary": "126;53;94;65", "wc_strengths": "36;63;100;53", "wc_weaknesses": "144;157;90;66", "wc_questions": "38;56;53;18", "wc_limitations": "1;30;1;30", "wc_review": "345;359;338;232", "wc_reply_reviewers": "10;27;51;26", "wc_reply_authors": "0;92;0;0", "reply_reviewers": "1;1;1;2", "reply_authors": "1;2;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 84.5, 28.217902119044926 ], "wc_strengths_avg": [ 63.0, 23.441416339462084 ], "wc_weaknesses_avg": [ 114.25, 37.512497917360825 ], "wc_questions_avg": [ 41.25, 15.056144924913548 ], "wc_limitations_avg": [ 15.5, 14.5 ], "wc_review_avg": [ 318.5, 50.50990001969911 ], "wc_reply_reviewers_avg": [ 28.5, 14.637281168304447 ], "wc_reply_authors_avg": [ 23.0, 39.83716857408418 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:0ZAnbiOlzpQJ:scholar.google.com/&scioq=Motif-oriented+influence+maximization+for+viral+marketing+in+large-scale+social+networks&hl=en&as_sdt=0,47", "gs_version_total": 2, "email": "szu.edu.cn;szu.edu.cn;szu.edu.cn;szu.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Shenzhen University", "aff_unique_dep": "", "aff_unique_url": "https://www.szu.edu.cn", "aff_unique_abbr": "SZU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "A Primal-Dual-Assisted Penalty Approach to Bilevel Optimization with Coupled Constraints", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93267", "id": "uZi7H5Ac0X", "proceeding": "", "pdf": "https://openreview.net/pdf?id=uZi7H5Ac0X", "openreview": "https://openreview.net/forum?id=uZi7H5Ac0X", "poster": "/media/PosterPDFs/NeurIPS%202024/93267.png?t=1731605278.4146042", "project": "", "author_site": "Liuyuan Jiang, Quan Xiao, Victor Tenorio, Fernando Real-Rojas, Antonio G. Marques, Tianyi Chen", "tldr": "", "abstract": "Interest in bilevel optimization has grown in recent years, partially due to its relevance for challenging machine-learning problems. Several exciting recent works have been centered around developing efficient gradient-based algorithms that can solve bilevel optimization problems with provable guarantees. However, the existing literature mainly focuses on bilevel problems either without constraints, or featuring only simple constraints that do not couple variables across the upper and lower levels, excluding a range of complex applications. Our paper studies this challenging but less explored scenario and develops a (fully) first-order algorithm, which we term BLOCC, to tackle BiLevel Optimization problems with Coupled Constraints. We establish rigorous convergence theory for the proposed algorithm and demonstrate its effectiveness on two well-known real-world applications - support vector machine (SVM) - based model training and infrastructure planning in transportation networks.", "keywords": "Bilevel Optimization;Constrained Optimization;Hessian-free;Convergence Analysis;Penalty Based;Primal Dual", "primary_area": "optimization", "supplementary_material": "/attachment/3594f83f2ce3da0c3774d7a0290ff05ff81c2536.zip", "author": "Liuyuan Jiang;Quan Xiao;Victor M. Tenorio;Fernando Real-Rojas;Antonio Marques;Tianyi Chen", "authorids": "~Liuyuan_Jiang1;~Quan_Xiao1;~Victor_M._Tenorio1;~Fernando_Real-Rojas1;~Antonio_Marques1;~Tianyi_Chen5", "gender": ";F;M;M;M;M", "homepage": "https://liuyuan999.github.io/;https://jenniferquanxiao.github.io;;https://tsc.urjc.es/~amarques/Recent%20Publications.html#conferences;https://chentianyi1991.github.io/;https://vmtenorio.github.io", "dblp": ";;;;;", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;;d05JMMkAAAAJ;kFwvv38AAAAJ;4CWKOUoAAAAJ", "orcid": ";;;0000-0002-4642-7718;;0000-0002-0079-7326", "linkedin": ";;fernando-real-rojas-a65078149?utm_source=share&utm_campaign=share_via&utm_content=profile&utm_medium=ios_app;;;vmtenorio/", "or_profile": "~Liuyuan_Jiang1;~Quan_Xiao1;~Fernando_Real-Rojas1;~Antonio_Marques1;~Tianyi_Chen5;~Victor_Manuel_Tenorio1", "aff": "Rensselaer Polytechnic Institute;Rensselaer Polytechnic Institute;Universidad Rey Juan Carlos;King Juan Carlos University;Rensselaer Polytechnic Institute;Universidad Rey Juan Carlos", "aff_domain": "rpi.edu;rpi.edu;urjc.es;urjc.es;rpi.edu;urjc.es", "position": "PhD student;PhD student;Undergrad student;Professor;Assistant Professor;PhD student", "bibtex": "@inproceedings{\njiang2024a,\ntitle={A Primal-Dual-Assisted Penalty Approach to Bilevel Optimization with Coupled Constraints},\nauthor={Liuyuan Jiang and Quan Xiao and Victor M. Tenorio and Fernando Real-Rojas and Antonio Marques and Tianyi Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=uZi7H5Ac0X}\n}", "github": "", "reviewers": "QYWG;SFco;YCHS;BSgg", "pdf_size": 33830639, "rating": "5;5;6;7", "confidence": "3;4;4;3", "soundness": "4;3;3;3", "novelty": "4;3;3;3", "presentation": "4;3;3;2", "wc_summary": "93;40;43;244", "wc_strengths": "93;68;41;95", "wc_weaknesses": "248;33;181;312", "wc_questions": "109;11;44;75", "wc_limitations": "4;1;10;29", "wc_review": "547;153;319;755", "wc_reply_reviewers": "45;19;43;84", "wc_reply_authors": "0;0;211;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;2;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 105.0, 82.96686085419888 ], "wc_strengths_avg": [ 74.25, 21.947380253688593 ], "wc_weaknesses_avg": [ 193.5, 103.59657330240223 ], "wc_questions_avg": [ 59.75, 36.34126442489309 ], "wc_limitations_avg": [ 11.0, 10.88577052853862 ], "wc_review_avg": [ 443.5, 227.83491830709357 ], "wc_reply_reviewers_avg": [ 47.75, 23.29565410114084 ], "wc_reply_authors_avg": [ 52.75, 91.36568009925827 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9548285472026740596&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "rpi.edu;rpi.edu;urjc.es;urjc.es;rpi.edu;urjc.es", "author_num": 6, "aff_unique_index": "0;0;1;2;0;1", "aff_unique_norm": "Rensselaer Polytechnic Institute;Universidad Rey Juan Carlos;King Juan Carlos University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.rpi.edu;https://www.urjc.es;https://www.uc3m.es", "aff_unique_abbr": "RPI;URJC;UC3M", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1;0;1", "aff_country_unique": "United States;Spain" }, { "title": "Efficient Sign-Based Optimization: Accelerating Convergence via Variance Reduction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93266", "id": "uaNZvF1VFe", "proceeding": "", "pdf": "https://openreview.net/pdf?id=uaNZvF1VFe", "openreview": "https://openreview.net/forum?id=uaNZvF1VFe", "poster": "/media/PosterPDFs/NeurIPS%202024/93266.png?t=1730356566.600845", "project": "", "author_site": "Wei Jiang, Sifan Yang, Wenhao Yang, Lijun Zhang", "tldr": "", "abstract": "Sign stochastic gradient descent (signSGD) is a communication-efficient method that transmits only the sign of stochastic gradients for parameter updating. Existing literature has demonstrated that signSGD can achieve a convergence rate of $\\mathcal{O}(d^{1/2}T^{-1/4})$, where $d$ represents the dimension and $T$ is the iteration number. In this paper, we improve this convergence rate to $\\mathcal{O}(d^{1/2}T^{-1/3})$ by introducing the Sign-based Stochastic Variance Reduction (SSVR) method, which employs variance reduction estimators to track gradients and leverages their signs to update. For finite-sum problems, our method can be further enhanced to achieve a convergence rate of $\\mathcal{O}(m^{1/4}d^{1/2}T^{-1/2})$, where $m$ denotes the number of component functions. Furthermore, we investigate the heterogeneous majority vote in distributed settings and introduce two novel algorithms that attain improved convergence rates of $\\mathcal{O}(d^{1/2}T^{-1/2} + dn^{-1/2})$ and $\\mathcal{O}(d^{1/4}T^{-1/4})$ respectively, outperforming the previous results of $\\mathcal{O}(dT^{-1/4} + dn^{-1/2})$ and $\\mathcal{O}(d^{3/8}T^{-1/8})$, where $n$ represents the number of nodes. Numerical experiments across different tasks validate the effectiveness of our proposed methods.", "keywords": "SignSGD;majority vote;sign-based method;variance reduction", "primary_area": "optimization", "supplementary_material": "", "author": "Wei Jiang;Sifan Yang;Wenhao Yang;Lijun Zhang", "authorids": "~Wei_Jiang8;~Sifan_Yang2;~Wenhao_Yang3;~Lijun_Zhang1", "gender": "M;M;M;", "homepage": "http://www.lamda.nju.edu.cn/jiangw/?AspxAutoDetectCookieSupport=1;https://www.lamda.nju.edu.cn/yangsf/;http://www.lamda.nju.edu.cn/yangwh/;", "dblp": ";251/2905;233/4699;", "google_scholar": ";qTISlvMAAAAJ;ycccau7cWYIC;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Wei_Jiang8;~Sifan_Yang2;~Wenhao_Yang3;~Lijun_Zhang1", "aff": "Nanjing University;Nanjing University;Nanjing University;", "aff_domain": "nju.edu.cn;nju.edu.cn;nju.edu.cn;", "position": "PhD student;MS student;PhD student;", "bibtex": "@inproceedings{\njiang2024efficient,\ntitle={Efficient Sign-Based Optimization: Accelerating Convergence via Variance Reduction},\nauthor={Wei Jiang and Sifan Yang and Wenhao Yang and Lijun Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=uaNZvF1VFe}\n}", "github": "", "reviewers": "2Gd9;XovB;Huhp;hYmq", "pdf_size": 871361, "rating": "6;7;7;7", "confidence": "4;5;4;3", "soundness": "3;3;4;3", "novelty": "3;3;4;3", "presentation": "3;3;4;3", "wc_summary": "140;78;82;76", "wc_strengths": "251;78;94;10", "wc_weaknesses": "98;149;82;8", "wc_questions": "73;75;31;117", "wc_limitations": "1;12;8;1", "wc_review": "563;392;297;212", "wc_reply_reviewers": "17;20;19;14", "wc_reply_authors": "0;0;0;21", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;2", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 94.0, 26.645825188948457 ], "wc_strengths_avg": [ 108.25, 88.24504235366426 ], "wc_weaknesses_avg": [ 84.25, 50.4993811843274 ], "wc_questions_avg": [ 74.0, 30.4138126514911 ], "wc_limitations_avg": [ 5.5, 4.716990566028302 ], "wc_review_avg": [ 366.0, 130.3476121760579 ], "wc_reply_reviewers_avg": [ 17.5, 2.29128784747792 ], "wc_reply_authors_avg": [ 5.25, 9.093266739736606 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12978498213434839289&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "nju.edu.cn;nju.edu.cn;nju.edu.cn;", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Nanjing University", "aff_unique_dep": "", "aff_unique_url": "https://www.nju.edu.cn", "aff_unique_abbr": "Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Unifying Homophily and Heterophily for Spectral Graph Neural Networks via Triple Filter Ensembles", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93265", "id": "uatPOPWzzU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=uatPOPWzzU", "openreview": "https://openreview.net/forum?id=uatPOPWzzU", "poster": "/media/PosterPDFs/NeurIPS%202024/93265.png?t=1729489401.8615708", "project": "", "author_site": "Rui Duan, Mingjian Guang, Junli Wang, Chungang Yan, Hongda Qi, Wenkang Su, Can Tian, Haoran Yang", "tldr": "", "abstract": "Polynomial-based learnable spectral graph neural networks (GNNs) utilize polynomial to approximate graph convolutions and have achieved impressive performance on graphs. Nevertheless, there are three progressive problems to be solved. Some models use polynomials with better approximation for approximating filters, yet perform worse on real-world graphs. Carefully crafted graph learning methods, sophisticated polynomial approximations, and refined coefficient constraints leaded to overfitting, which diminishes the generalization of the models. How to design a model that retains the ability of polynomial-based spectral GNNs to approximate filters while it possesses higher generalization and performance? In this paper, we propose a spectral GNN with triple filter ensemble (TFE-GNN), which extracts homophily and heterophily from graphs with different levels of homophily adaptively while utilizing the initial features. Specifically, the first and second ensembles are combinations of a set of base low-pass and high-pass filters, respectively, after which the third ensemble combines them with two learnable coefficients and yield a graph convolution (TFE-Conv). Theoretical analysis shows that the approximation ability of TFE-GNN is consistent with that of ChebNet under certain conditions, namely it can learn arbitrary filters. TFE-GNN can be viewed as a reasonable combination of two unfolded and integrated excellent spectral GNNs, which motivates it to perform well. Experiments show that TFE-GNN achieves high generalization and new state-of-the-art performance on various real-world datasets.", "keywords": "graph neural networks; filter ensemble; homophily and heterophily", "primary_area": "graph_neural_networks", "supplementary_material": "/attachment/7a564db332225eeeee3ce77a9499cd4aa4245742.zip", "author": "Rui Duan;Mingjian Guang;Junli Wang;Chungang Yan;Hongda Qi;Wenkang Su;Can Tian;Haoran Yang", "authorids": "~Rui_Duan4;~Mingjian_Guang2;~Junli_Wang1;~Chungang_Yan1;~Hongda_Qi1;~Wenkang_Su1;~Can_Tian1;~Haoran_Yang8", "gender": "F;;M;M;F;M;M;M", "homepage": ";;;;;;https://scholar.google.com.hk/citations?hl=zh-CN&user=600TM3IAAAAJ&view_op=list_works&sortby=pubdate;", "dblp": "82/179-1;92/5298;;169/0709;130/0628;;;", "google_scholar": ";;rf7l8wwAAAAJ;https://scholar.google.com.hk/citations?hl=zh-CN;;;https://scholar.google.com.hk/citations?hl=zh-CN;", "orcid": "0000-0002-7185-9731;;;0000-0002-5043-2552;0000-0001-6182-3818;0000-0002-7612-2122;;0000-0001-6280-0319", "linkedin": ";;;;;;;", "or_profile": "~Junli_Wang1;~Chungang_Yan1;~Hongda_Qi1;~Wenkang_Su1;~Can_Tian1;~Haoran_Yang8;~Guang_Mingjian1;~rui_Duan3", "aff": "Tongji University;Tongji University;Shanghai Normal University;Guangzhou University;Guangzhou University;Tongji University;Donghua University, Shanghai;Guangzhou University", "aff_domain": "tongji.edu.cn;tongji.edu.cn;shnu.edu.cn;gzhu.edu.cn;gzhu.edu.cn;tongji.edu.cn;dhu.edu.cn;gzhu.edu.cn", "position": "Associate Professor;Full Professor;Lecturer;Lecturer;Lecturer;PhD student;Lecturer;Lecturer", "bibtex": "@inproceedings{\nduan2024unifying,\ntitle={Unifying Homophily and Heterophily for Spectral Graph Neural Networks via Triple Filter Ensembles},\nauthor={Rui Duan and Mingjian Guang and Junli Wang and Chungang Yan and Hongda Qi and Wenkang Su and Can Tian and Haoran Yang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=uatPOPWzzU}\n}", "github": "", "reviewers": "pzPy;zzRJ;eCxX", "pdf_size": 3142144, "rating": "5;6;6", "confidence": "4;5;4", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "3;3;3", "wc_summary": "38;79;89", "wc_strengths": "56;40;42", "wc_weaknesses": "77;28;253", "wc_questions": "215;146;17", "wc_limitations": "1;10;29", "wc_review": "387;303;430", "wc_reply_reviewers": "158;0;20", "wc_reply_authors": "669;0;39", "reply_reviewers": "1;0;1", "reply_authors": "2;1;2", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 68.66666666666667, 22.065558884580486 ], "wc_strengths_avg": [ 46.0, 7.118052168020874 ], "wc_weaknesses_avg": [ 119.33333333333333, 96.61032841034935 ], "wc_questions_avg": [ 126.0, 82.06095295571457 ], "wc_limitations_avg": [ 13.333333333333334, 11.67142760000773 ], "wc_review_avg": [ 373.3333333333333, 52.740454639257294 ], "wc_reply_reviewers_avg": [ 59.333333333333336, 70.24401912064985 ], "wc_reply_authors_avg": [ 236.0, 306.5909326774032 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:zQJrOoYCWdYJ:scholar.google.com/&scioq=Unifying+Homophily+and+Heterophily+for+Spectral+Graph+Neural+Networks+via+Triple+Filter+Ensembles&hl=en&as_sdt=0,14", "gs_version_total": 3, "email": "tongji.edu.cn;tongji.edu.cn;shnu.edu.cn;gzhu.edu.cn;gzhu.edu.cn;tongji.edu.cn;dhu.edu.cn;gzhu.edu.cn", "author_num": 8, "aff_unique_index": "0;0;1;2;2;0;3;2", "aff_unique_norm": "Tongji University;Shanghai Normal University;Guangzhou University;Donghua University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.tongji.edu.cn;http://www.shnu.edu.cn;http://www.gzhu.edu.cn;https://www.dhu.edu.cn", "aff_unique_abbr": "Tongji;SHNU;GU;Donghua", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shanghai", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "ElasTST: Towards Robust Varied-Horizon Forecasting with Elastic Time-Series Transformer", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93264", "id": "ucXUtMPWhv", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ucXUtMPWhv", "openreview": "https://openreview.net/forum?id=ucXUtMPWhv", "poster": "/media/PosterPDFs/NeurIPS%202024/93264.png?t=1731567833.6843836", "project": "", "author_site": "Jiawen Zhang, Shun Zheng, Xumeng Wen, Xiaofang Zhou, Jiang Bian, Jia Li", "tldr": "", "abstract": "Numerous industrial sectors necessitate models capable of providing robust forecasts across various horizons. Despite the recent strides in crafting specific architectures for time-series forecasting and developing pre-trained universal models, a comprehensive examination of their capability in accommodating varied-horizon forecasting during inference is still lacking. This paper bridges this gap through the design and evaluation of the Elastic Time-Series Transformer (ElasTST). The ElasTST model incorporates a non-autoregressive design with placeholders and structured self-attention masks, warranting future outputs that are invariant to adjustments in inference horizons. A tunable version of rotary position embedding is also integrated into ElasTST to capture time-series-specific periods and enhance adaptability to different horizons. Additionally, ElasTST employs a multi-scale patch design, effectively integrating both fine-grained and coarse-grained information. During the training phase, ElasTST uses a horizon reweighting strategy that approximates the effect of random sampling across multiple horizons with a single fixed horizon setting. Through comprehensive experiments and comparisons with state-of-the-art time-series architectures and contemporary foundation models, we demonstrate the efficacy of ElasTST's unique design elements. Our findings position ElasTST as a robust solution for the practical necessity of varied-horizon forecasting.", "keywords": "time-series forecasting;arbitrary-horizon forecasting;transformer", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Jiawen Zhang;Shun Zheng;Xumeng Wen;Xiaofang Zhou;Jiang Bian;Jia Li", "authorids": "~Jiawen_Zhang1;~Shun_Zheng1;~Xumeng_Wen1;~Xiaofang_Zhou3;~Jiang_Bian1;~Jia_Li4", "gender": "F;M;M;M;M;M", "homepage": "https://imjiawen.com;;https://github.com/xumwen;https://sites.google.com/view/jiangbian;https://sites.google.com/view/lijia;https://sites.google.com/view/xiaofang-zhou", "dblp": "59/11040-1;179/2615.html;358/9194.html;09/851-2.html;23/6950-9;https://dblp.uni-trier.de/pid/z/XiaofangZhou.html", "google_scholar": "https://scholar.google.com/citations?hl=en;21Q9To4AAAAJ;;pZBEnY8AAAAJ;1gSbcYoAAAAJ;y6m820wAAAAJ", "orcid": "0009-0000-1855-9177;0009-0005-7355-7090;;0000-0002-9472-600X;0000-0002-6362-4385;0000-0001-6343-1455", "linkedin": ";;;jbian/;;", "or_profile": "~Jiawen_Zhang1;~Shun_Zheng1;~Xumeng_Wen1;~Jiang_Bian1;~Jia_Li4;~Xiaofang_Zhou1", "aff": "The Hong Kong University of Science and Technology (Guangzhou);Microsoft;Microsoft Research Asia;Microsoft;Hong Kong University of Science and Technology (Guangzhou);Hong Kong University of Science and Technology", "aff_domain": "connect.hkust-gz.edu.cn;microsoft.com;microsoft.com;microsoft.com;ust.hk;ust.hk", "position": "PhD student;Senior Researcher;Researcher;Partner Research Manager;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nzhang2024elastst,\ntitle={Elas{TST}: Towards Robust Varied-Horizon Forecasting with Elastic Time-Series Transformer},\nauthor={Jiawen Zhang and Shun Zheng and Xumeng Wen and Xiaofang Zhou and Jiang Bian and Jia Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ucXUtMPWhv}\n}", "github": "", "reviewers": "i9JA;eA7o;qEB8", "pdf_size": 2002326, "rating": "5;5;5", "confidence": "5;3;5", "soundness": "2;2;2", "novelty": "2;2;2", "presentation": "2;2;3", "wc_summary": "66;74;90", "wc_strengths": "42;83;67", "wc_weaknesses": "155;126;167", "wc_questions": "1;162;78", "wc_limitations": "7;1;1", "wc_review": "271;446;403", "wc_reply_reviewers": "70;13;0", "wc_reply_authors": "280;0;0", "reply_reviewers": "1;1;0", "reply_authors": "2;1;1", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 4.333333333333333, 0.9428090415820634 ], "soundness_avg": [ 2.0, 0.0 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 76.66666666666667, 9.977753031397176 ], "wc_strengths_avg": [ 64.0, 16.87206764645835 ], "wc_weaknesses_avg": [ 149.33333333333334, 17.21110752456745 ], "wc_questions_avg": [ 80.33333333333333, 65.7486797501584 ], "wc_limitations_avg": [ 3.0, 2.8284271247461903 ], "wc_review_avg": [ 373.3333333333333, 74.45953412573989 ], "wc_reply_reviewers_avg": [ 27.666666666666668, 30.401023374587606 ], "wc_reply_authors_avg": [ 93.33333333333333, 131.99326582148888 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11380457776609439096&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "connect.hkust-gz.edu.cn;microsoft.com;microsoft.com;microsoft.com;ust.hk;ust.hk", "author_num": 6, "aff_unique_index": "0;1;1;1;0;0", "aff_unique_norm": "Hong Kong University of Science and Technology;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.ust.hk;https://www.microsoft.com", "aff_unique_abbr": "HKUST;Microsoft", "aff_campus_unique_index": "0;2;3;3", "aff_campus_unique": "Guangzhou;;Asia;Hong Kong SAR", "aff_country_unique_index": "0;1;0;1;0;0", "aff_country_unique": "China;United States" }, { "title": "Making Offline RL Online: Collaborative World Models for Offline Visual Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93263", "id": "ucxQrked0d", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ucxQrked0d", "openreview": "https://openreview.net/forum?id=ucxQrked0d", "poster": "/media/PosterPDFs/NeurIPS%202024/93263.png?t=1729860332.8212523", "project": "", "author_site": "Qi Wang, Junming Yang, Yunbo Wang, Xin Jin, Wenjun Zeng, Xiaokang Yang", "tldr": "", "abstract": "Training offline RL models using visual inputs poses two significant challenges, *i.e.*, the overfitting problem in representation learning and the overestimation bias for expected future rewards. Recent work has attempted to alleviate the overestimation bias by encouraging conservative behaviors. This paper, in contrast, tries to build more flexible constraints for value estimation without impeding the exploration of potential advantages. The key idea is to leverage off-the-shelf RL simulators, which can be easily interacted with in an online manner, as the \u201c*test bed*\u201d for offline policies. To enable effective online-to-offline knowledge transfer, we introduce CoWorld, a model-based RL approach that mitigates cross-domain discrepancies in state and reward spaces. Experimental results demonstrate the effectiveness of CoWorld, outperforming existing RL approaches by large margins.", "keywords": "World models;reinforcement learning;visual control", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Qi Wang;Junming Yang;Yunbo Wang;Xin Jin;Wenjun Zeng;Xiaokang Yang", "authorids": "~Qi_Wang26;~Junming_Yang1;~Yunbo_Wang2;~Xin_Jin8;~Wenjun_Zeng3;~Xiaokang_Yang1", "gender": "M;M;M;M;M;M", "homepage": "https://qiwang067.github.io/;https://junming-yang.github.io/;http://home.ustc.edu.cn/~jinxustc/;https://www.eias.ac.cn/h-col-187.html;https://icne.sjtu.edu.cn/info/1064/1078.htm;https://wyb15.github.io/", "dblp": "19/1924-80;191/4782.html;68/3340-14;57/145;06/3071-1.html;84/3894", "google_scholar": "OwW5XfMAAAAJ;L6R5ExQAAAAJ;byaSC-kAAAAJ;_cUfvYQAAAAJ;yDEavdMAAAAJ;C8bGfr0AAAAJ", "orcid": ";0000-0002-4261-6271;0000-0002-1820-8358;;0000-0003-4029-3322;", "linkedin": "qi-wang-chris-7a6670361/;;;;;", "or_profile": "~Qi_Wang26;~Junming_Yang1;~Xin_Jin8;~Wenjun_Zeng3;~Xiaokang_Yang1;~Yunbo_Wang1", "aff": "Shanghai Jiaotong University;Nanjing University of Posts and Telecommunications;Eastern Institute of Technology, Ningbo;Eastern Institute for Advanced Study;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;njupt.edu.cn;eitech.edu.cn;eias.ac.cn;sjtu.edu.cn;sjtu.edu.cn", "position": "PhD student;Undergrad student;Assistant Professor;Full Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nwang2024making,\ntitle={Making Offline {RL} Online: Collaborative World Models for Offline Visual Reinforcement Learning},\nauthor={Qi Wang and Junming Yang and Yunbo Wang and Xin Jin and Wenjun Zeng and Xiaokang Yang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ucxQrked0d}\n}", "github": "", "reviewers": "vWw1;Bpkq;DaxB;oydA", "pdf_size": 5526856, "rating": "5;6;6;6", "confidence": "3;4;4;3", "soundness": "3;3;3;3", "novelty": "2;3;2;3", "presentation": "1;3;3;3", "wc_summary": "76;63;103;27", "wc_strengths": "68;73;70;28", "wc_weaknesses": "158;229;331;12", "wc_questions": "94;48;82;27", "wc_limitations": "10;9;29;7", "wc_review": "406;422;615;101", "wc_reply_reviewers": "45;15;107;20", "wc_reply_authors": "313;46;101;0", "reply_reviewers": "2;1;1;1", "reply_authors": "2;2;2;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 67.25, 27.3530162870569 ], "wc_strengths_avg": [ 59.75, 18.417043736713012 ], "wc_weaknesses_avg": [ 182.5, 116.06571414504802 ], "wc_questions_avg": [ 62.75, 26.65872277510684 ], "wc_limitations_avg": [ 13.75, 8.870597499605086 ], "wc_review_avg": [ 386.0, 183.9578756128696 ], "wc_reply_reviewers_avg": [ 46.75, 36.594910848367974 ], "wc_reply_authors_avg": [ 115.0, 119.77687589848051 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10403928892835566663&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "sjtu.edu.cn;njupt.edu.cn;eitech.edu.cn;eias.ac.cn;sjtu.edu.cn;sjtu.edu.cn", "author_num": 6, "aff_unique_index": "0;1;2;3;0;0", "aff_unique_norm": "Shanghai Jiao Tong University;Nanjing University of Posts and Telecommunications;Eastern Institute of Technology;Eastern Institute for Advanced Study", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.sjtu.edu.cn;http://www.njupt.edu.cn;https://www.eit.edu.cn;", "aff_unique_abbr": "SJTU;NJUPT;;", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Nanjing;Ningbo", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China;" }, { "title": "Convergence Analysis of Split Federated Learning on Heterogeneous Data", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93262", "id": "ud0RBkdBfE", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ud0RBkdBfE", "openreview": "https://openreview.net/forum?id=ud0RBkdBfE", "poster": "", "project": "", "author_site": "Pengchao Han, Chao Huang, Geng Tian, Ming Tang, Xin Liu", "tldr": "", "abstract": "Split federated learning (SFL) is a recent distributed approach for collaborative model training among multiple clients. In SFL, a global model is typically split into two parts, where clients train one part in a parallel federated manner, and a main server trains the other. Despite the recent research on SFL algorithm development, the convergence analysis of SFL is missing in the literature, and this paper aims to fill this gap. The analysis of SFL can be more challenging than that of federated learning (FL), due to the potential dual-paced updates at the clients and the main server. We provide convergence analysis of SFL for strongly convex and general convex objectives on heterogeneous data. The convergence rates are $O(1/T)$ and $O(1/\\sqrt[3]{T})$, respectively, where $T$ denotes the total number of rounds for SFL training. We further extend the analysis to non-convex objectives and where some clients may be unavailable during training. Numerical experiments validate our theoretical results and show that SFL outperforms FL and split learning (SL) when data is highly heterogeneous across a large number of clients.", "keywords": "split federated learning;distributed learning;convergence analysis;machine learning", "primary_area": "optimization", "supplementary_material": "/attachment/4410fbc945e685e1dc0b9bc2086f22c433f95bb9.zip", "author": "Pengchao Han;Chao Huang;Geng Tian;Ming Tang;Xin Liu", "authorids": "~Pengchao_Han1;~Chao_Huang9;~Geng_Tian1;~Ming_Tang5;~Xin_Liu6", "gender": "F;;M;F;F", "homepage": ";;https://github.com/TIANGeng708;http://mingtang.site;https://xinliu.engineering.ucdavis.edu/", "dblp": ";;;73/4373-6;76/1820-2", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;;;4v9UxPYAAAAJ;4MV5BkQAAAAJ", "orcid": ";;;0000-0003-4732-5155;", "linkedin": ";;;;", "or_profile": "~Pengchao_Han1;~Chao_Huang9;~Geng_Tian1;~Ming_Tang5;~Xin_Liu6", "aff": "Guangdong University of Technology;;;Southern University of Science and Technology;University of California, Davis", "aff_domain": "gdut.edu.cn;;;sustech.edu.cn;ucdavis.edu", "position": "Associate Professor;;;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nhan2024convergence,\ntitle={Convergence Analysis of Split Federated Learning on Heterogeneous Data},\nauthor={Pengchao Han and Chao Huang and Geng Tian and Ming Tang and Xin Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ud0RBkdBfE}\n}", "github": "", "reviewers": "8Y1a;j2yY;b5tv;fmCP;NwMP", "pdf_size": 2321346, "rating": "5;5;5;7;8", "confidence": "4;4;3;2;5", "soundness": "3;2;2;3;4", "novelty": "2;2;2;3;4", "presentation": "2;3;3;4;4", "wc_summary": "75;35;20;86;114", "wc_strengths": "69;51;57;77;140", "wc_weaknesses": "416;51;423;106;74", "wc_questions": "131;138;2;32;5", "wc_limitations": "48;12;25;42;1", "wc_review": "739;287;527;343;334", "wc_reply_reviewers": "203;109;61;25;26", "wc_reply_authors": "964;171;87;18;18", "reply_reviewers": "2;2;1;1;1", "reply_authors": "5;3;3;2;2", "rating_avg": [ 6.0, 1.2649110640673518 ], "confidence_avg": [ 3.6, 1.019803902718557 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.6, 0.8 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 66.0, 34.24032710124131 ], "wc_strengths_avg": [ 78.8, 31.914886808509912 ], "wc_weaknesses_avg": [ 214.0, 168.7115882208451 ], "wc_questions_avg": [ 61.6, 60.47346525543249 ], "wc_limitations_avg": [ 25.6, 17.67031408888931 ], "wc_review_avg": [ 446.0, 167.83563388029373 ], "wc_reply_reviewers_avg": [ 84.8, 66.56846100068711 ], "wc_reply_authors_avg": [ 251.6, 360.6220181852461 ], "reply_reviewers_avg": [ 1.4, 0.4898979485566356 ], "reply_authors_avg": [ 3.0, 1.0954451150103321 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.15504341823651055, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14680531314281409927&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "gdut.edu.cn;;;sustech.edu.cn;ucdavis.edu", "author_num": 5, "aff_unique_index": "0;1;2", "aff_unique_norm": "Guangdong University of Technology;Southern University of Science and Technology;University of California, Davis", "aff_unique_dep": ";;", "aff_unique_url": "http://www.gdut.edu.cn;https://www.sustech.edu.cn;https://www.ucdavis.edu", "aff_unique_abbr": "GDUT;SUSTech;UC Davis", "aff_campus_unique_index": "1", "aff_campus_unique": ";Davis", "aff_country_unique_index": "0;0;1", "aff_country_unique": "China;United States" }, { "title": "Iteratively Refined Early Interaction Alignment for Subgraph Matching based Graph Retrieval", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93261", "id": "udTwwF7tks", "proceeding": "", "pdf": "https://openreview.net/pdf?id=udTwwF7tks", "openreview": "https://openreview.net/forum?id=udTwwF7tks", "poster": "", "project": "", "author_site": "Ashwin Ramachandran, Vaibhav Raj, Indradyumna Roy, Soumen Chakrabarti, Abir De", "tldr": "", "abstract": "Graph retrieval based on subgraph isomorphism has several real-world applications such as scene graph retrieval, molecular fingerprint detection and circuit design. Roy et al. [35] proposed IsoNet, a late interaction model for subgraph matching, which first computes the node and edge embeddings of each graph independently of paired graph and then computes a trainable alignment map. Here, we present $\\texttt{IsoNet++}$, an early interaction graph neural network (GNN), based on several technical innovations. First, we compute embeddings of all nodes by passing messages within and across the two input graphs, guided by an *injective alignment* between their nodes. Second, we update this alignment in a lazy fashion over multiple *rounds*. Within each round, we run a layerwise GNN from scratch, based on the current state of the alignment. After the completion of one round of GNN, we use the last-layer embeddings to update the alignments, and proceed to the next round. Third, $\\texttt{IsoNet++}$ incorporates a novel notion of node-pair partner interaction. Traditional early interaction computes attention between a node and its potential partners in the other graph, the attention then controlling messages passed across graphs. We consider *node pairs* (not single nodes) as potential partners. Existence of an edge between the nodes in one graph and non-existence in the other provide vital signals for refining the alignment. Our experiments on several datasets show that the alignments get progressively refined with successive rounds,\nresulting in significantly better retrieval performance than existing methods. We demonstrate that all three innovations contribute to the enhanced accuracy. Our code and datasets are publicly available at https://github.com/structlearning/isonetpp.", "keywords": "Graph Neural Networks;Graph Retrieval", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Ashwin Ramachandran;Vaibhav Raj;Indradyumna Roy;Soumen Chakrabarti;Abir De", "authorids": "~Ashwin_Ramachandran1;~Vaibhav_Raj1;~Indradyumna_Roy1;~Soumen_Chakrabarti1;~Abir_De1", "gender": "M;M;M;Not Specified;M", "homepage": "https://ashwinramachandran2002.github.io/;;https://indradyumna.github.io/;https://www.cse.iitb.ac.in/~soumen/;", "dblp": "07/1826;;124/9185.html;c/SChakrabarti;118/7174", "google_scholar": "y7H-6IgAAAAJ;;qb70i84AAAAJ;https://scholar.google.com.tw/citations?user=LfF2zfQAAAAJ;https://scholar.google.co.in/citations?user=_9ZKKbIAAAAJ", "orcid": ";;;;", "linkedin": "ashwin-ramachandran-042152204/;vaibhav-raj-8446421b9/;;;", "or_profile": "~Ashwin_Ramachandran1;~Vaibhav_Raj1;~Indradyumna_Roy1;~Soumen_Chakrabarti1;~Abir_De1", "aff": "Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology;Indian Institute of Technology, Bombay;Indian Institute of Technology Bombay;Indian Institute of Technology Bombay;Indian Institute of Technology Bombay,", "aff_domain": "iitb.ac.in;iitb.ac.in;iitb.ac.in;iitb.ac.in;iitb.ac.in", "position": "Undergrad student;Undergrad student;PhD student;Professor;Assistant Professor", "bibtex": "@inproceedings{\nramachandran2024iteratively,\ntitle={Iteratively Refined Early Interaction Alignment for Subgraph Matching based Graph Retrieval},\nauthor={Ashwin Ramachandran and Vaibhav Raj and Indradyumna Roy and Soumen Chakrabarti and Abir De},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=udTwwF7tks}\n}", "github": "", "reviewers": "65Ze;1Kfh;sihU;WLV9", "pdf_size": 1438163, "rating": "6;6;6;7", "confidence": "3;3;3;3", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;2;3", "wc_summary": "58;75;162;80", "wc_strengths": "38;59;84;60", "wc_weaknesses": "41;84;111;52", "wc_questions": "144;74;52;1", "wc_limitations": "50;11;20;1", "wc_review": "331;303;429;194", "wc_reply_reviewers": "18;13;83;5", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 93.75, 40.23912896671597 ], "wc_strengths_avg": [ 60.25, 16.284578594486256 ], "wc_weaknesses_avg": [ 72.0, 27.504545078950134 ], "wc_questions_avg": [ 67.75, 51.37302307631896 ], "wc_limitations_avg": [ 20.5, 18.309833423600555 ], "wc_review_avg": [ 314.25, 83.71790429770684 ], "wc_reply_reviewers_avg": [ 29.75, 31.09159854365806 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Xn1Fvk6ROpoJ:scholar.google.com/&scioq=Iteratively+Refined+Early+Interaction+Alignment+for+Subgraph+Matching+based+Graph+Retrieval&hl=en&as_sdt=0,33", "gs_version_total": 0, "email": "iitb.ac.in;iitb.ac.in;iitb.ac.in;iitb.ac.in;iitb.ac.in", "author_num": 5, "aff_unique_index": "0;1;1;1;1", "aff_unique_norm": "Indian Institute of Technology, Bombay;Indian Institute of Technology Bombay", "aff_unique_dep": ";", "aff_unique_url": "https://www.iitb.ac.in;https://www.iitb.ac.in", "aff_unique_abbr": "IIT Bombay;IIT Bombay", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Bombay", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "India" }, { "title": "Calibrating Reasoning in Language Models with Internal Consistency", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93260", "id": "udZKVMPf3S", "proceeding": "", "pdf": "https://openreview.net/pdf?id=udZKVMPf3S", "openreview": "https://openreview.net/forum?id=udZKVMPf3S", "poster": "/media/PosterPDFs/NeurIPS%202024/93260.png?t=1733453291.1422908", "project": "", "author_site": "Zhihui Xie, Jizhou Guo, Tong Yu, Shuai Li", "tldr": "", "abstract": "Large language models (LLMs) have demonstrated impressive capabilities in various reasoning tasks, aided by techniques like chain-of-thought prompting that elicits verbalized reasoning. However, LLMs often generate text with obvious mistakes and contradictions, raising doubts about their ability to robustly process and utilize generated rationales. In this work, we investigate reasoning in LLMs through the lens of internal representations, focusing on how these representations are influenced by generated rationales. Our preliminary analysis reveals that while generated rationales improve answer accuracy, inconsistencies emerge between the model\u2019s internal representations in middle layers and those in final layers, potentially undermining the reliability of their reasoning processes. To address this, we propose internal consistency as a measure of the model\u2019s confidence by examining the agreement of latent predictions decoded from intermediate layers. Extensive empirical studies across different models and datasets demonstrate that internal consistency effectively distinguishes between correct and incorrect reasoning paths. Motivated by this, we propose a new approach to calibrate reasoning by up-weighting reasoning paths with high internal consistency, resulting in a significant boost in reasoning performance. Further analysis uncovers distinct patterns in attention and feed-forward modules across layers, providing insights into the emergence of internal inconsistency. In summary, our results demonstrate the potential of using internal representations for self-evaluation of LLMs.", "keywords": "Large language models;reasoning;faithfulness;internal consistency", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Zhihui Xie;Jizhou Guo;Tong Yu;Shuai Li", "authorids": "~Zhihui_Xie2;~Jizhou_Guo1;~Tong_Yu3;~Shuai_Li3", "gender": "M;M;;F", "homepage": "https://fffffarmer.github.io/;https://aster2024.github.io/;https://www.linkedin.com/in/tong-yu-42790744;http://shuaili8.github.io", "dblp": "31/3570-2;378/4049;32/1593-1;57/2281-10", "google_scholar": "Jml8NvkAAAAJ;https://scholar.google.com.hk/citations?user=fcBDdsYAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.hk/citations?user=kMZgQxcAAAAJ", "orcid": ";0009-0001-0699-9164;0000-0002-5991-2050;", "linkedin": ";jizhou-guo-6971b6277/;tong-yu-42790744;", "or_profile": "~Zhihui_Xie2;~Jizhou_Guo1;~Tong_Yu3;~Shuai_Li3", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Adobe Research;John Hopcroft Center, Shanghai Jiao Tong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;adobe.com;sjtu.edu.cn", "position": "MS student;Undergrad student;Senior Research Scientist;Assistant Professor", "bibtex": "@inproceedings{\nxie2024calibrating,\ntitle={Calibrating Reasoning in Language Models with Internal Consistency},\nauthor={Zhihui Xie and Jizhou Guo and Tong Yu and Shuai Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=udZKVMPf3S}\n}", "github": "", "reviewers": "7a9f;reW1;B2Yn;fPrR", "pdf_size": 643602, "rating": "5;6;6;7", "confidence": "4;3;3;4", "soundness": "2;3;3;4", "novelty": "2;3;3;3", "presentation": "3;2;3;3", "wc_summary": "73;59;74;82", "wc_strengths": "47;76;80;99", "wc_weaknesses": "136;181;52;138", "wc_questions": "31;29;89;112", "wc_limitations": "1;2;6;10", "wc_review": "288;347;301;441", "wc_reply_reviewers": "18;12;22;36", "wc_reply_authors": "19;13;25;23", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 72.0, 8.276472678623424 ], "wc_strengths_avg": [ 75.5, 18.607794065928395 ], "wc_weaknesses_avg": [ 126.75, 46.75133687928079 ], "wc_questions_avg": [ 65.25, 36.18269614055868 ], "wc_limitations_avg": [ 4.75, 3.5619517121937516 ], "wc_review_avg": [ 344.25, 60.00572889316486 ], "wc_reply_reviewers_avg": [ 22.0, 8.831760866327848 ], "wc_reply_authors_avg": [ 20.0, 4.58257569495584 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17979828480406994110&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "sjtu.edu.cn;sjtu.edu.cn;adobe.com;sjtu.edu.cn", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Shanghai Jiao Tong University;Adobe", "aff_unique_dep": ";Adobe Research", "aff_unique_url": "https://www.sjtu.edu.cn;https://research.adobe.com", "aff_unique_abbr": "SJTU;Adobe", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shanghai", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "China;United States" }, { "title": "Sample-Efficient Agnostic Boosting", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93259", "id": "ufKBRvYxtp", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ufKBRvYxtp", "openreview": "https://openreview.net/forum?id=ufKBRvYxtp", "poster": "/media/PosterPDFs/NeurIPS%202024/93259.png?t=1733723601.1391578", "project": "", "author_site": "Udaya Ghai, Karan Singh", "tldr": "", "abstract": "The theory of boosting provides a computational framework for aggregating approximate weak learning algorithms, which perform marginally better than a random predictor, into an accurate strong learner. In the realizable case, the success of the boosting approach is underscored by a remarkable fact that the resultant sample complexity matches that of a computationally demanding alternative, namely Empirical Risk Minimization (ERM). This in particular implies that the realizable boosting methodology has the potential to offer computational relief without compromising on sample efficiency.\n\nDespite recent progress, in agnostic boosting, where assumptions on the conditional distribution of labels given feature descriptions are absent, ERM outstrips the agnostic boosting methodology in being quadratically more sample efficient than all known agnostic boosting algorithms. In this paper, we make progress on closing this gap, and give a substantially more sample efficient agnostic boosting algorithm than those known, without compromising on the computational (or oracle) complexity. A key feature of our algorithm is that it leverages the ability to reuse samples across multiple rounds of boosting, while guaranteeing a generalization error strictly better than those obtained by blackbox applications of uniform convergence arguments. We also apply our approach to other previously studied learning problems, including boosting for reinforcement learning, and demonstrate improved results.", "keywords": "boosting; sample complexity; learning theory; reinforcement learning", "primary_area": "learning_theory", "supplementary_material": "/attachment/0ae5e8b871b4947c7ad8bb9c08a2731d64317f19.zip", "author": "Udaya Ghai;Karan Singh", "authorids": "~Udaya_Ghai1;~Karan_Singh1", "gender": "M;M", "homepage": "https://www.cs.princeton.edu/~ughai/;https://i-am-karan-singh.github.io/", "dblp": "https://dblp.uni-trier.de/pid/236/4605;00/505", "google_scholar": "Kf1SGfgAAAAJ;PZJIgZUAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Udaya_Ghai1;~Karan_Singh1", "aff": "Amazon;Carnegie Mellon University", "aff_domain": "amazon.com;cmu.edu", "position": "Researcher;Assistant Professor", "bibtex": "@inproceedings{\nghai2024sampleefficient,\ntitle={Sample-Efficient Agnostic Boosting},\nauthor={Udaya Ghai and Karan Singh},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ufKBRvYxtp}\n}", "github": "", "reviewers": "m6d1;P9SH;3LK5;xmA9;3oUn", "pdf_size": 471199, "rating": "6;6;6;7;7", "confidence": "3;3;3;1;3", "soundness": "3;2;3;2;4", "novelty": "3;3;3;3;4", "presentation": "3;2;2;2;3", "wc_summary": "61;56;68;309;104", "wc_strengths": "120;8;44;284;56", "wc_weaknesses": "1570;116;263;47;178", "wc_questions": "208;121;18;284;90", "wc_limitations": "60;5;2;1;54", "wc_review": "2019;306;395;925;482", "wc_reply_reviewers": "62;280;4;7;0", "wc_reply_authors": "176;548;28;34;0", "reply_reviewers": "1;2;1;1;0", "reply_authors": "3;2;2;2;1", "rating_avg": [ 6.4, 0.48989794855663565 ], "confidence_avg": [ 2.6, 0.8 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 2.4, 0.4898979485566356 ], "wc_summary_avg": [ 119.6, 96.18440622055115 ], "wc_strengths_avg": [ 102.4, 97.73760791015913 ], "wc_weaknesses_avg": [ 434.8, 572.0424459775691 ], "wc_questions_avg": [ 144.2, 92.75429909174022 ], "wc_limitations_avg": [ 24.4, 26.71778433927484 ], "wc_review_avg": [ 825.4, 633.6474098424138 ], "wc_reply_reviewers_avg": [ 70.6, 107.13281476746515 ], "wc_reply_authors_avg": [ 157.2, 204.7734357772023 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.6123724356957945, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15233435500820127559&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 3, "email": "amazon.com;cmu.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Amazon;Carnegie Mellon University", "aff_unique_dep": "Amazon.com, Inc.;", "aff_unique_url": "https://www.amazon.com;https://www.cmu.edu", "aff_unique_abbr": "Amazon;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "A Neural Network Approach for Efficiently Answering Most Probable Explanation Queries in Probabilistic Models", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93258", "id": "ufPPf9ghzP", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ufPPf9ghzP", "openreview": "https://openreview.net/forum?id=ufPPf9ghzP", "poster": "/media/PosterPDFs/NeurIPS%202024/93258.png?t=1733679459.6205926", "project": "", "author_site": "Shivvrat Arya, Tahrima Rahman, Vibhav Gogate", "tldr": "", "abstract": "We propose a novel neural networks based approach to efficiently answer arbitrary Most Probable Explanation (MPE) queries\u2014a well-known NP-hard task\u2014in large probabilistic models such as \nBayesian and Markov networks, probabilistic circuits, and neural auto-regressive models. By arbitrary MPE queries, we mean that there is no predefined partition of variables into evidence and non-evidence variables. The key idea is to distill all MPE queries over a given probabilistic model into a neural network and then use the latter for answering queries, eliminating the need for time-consuming inference algorithms that operate directly on the probabilistic model. We improve upon this idea by incorporating inference-time optimization with self-supervised loss to iteratively improve the solutions and employ a teacher-student framework that provides a better initial network, which in turn, helps reduce the number of inference-time optimization steps. The teacher network utilizes a self-supervised loss function optimized for getting the exact MPE solution, while the student network learns from the teacher's near-optimal outputs through supervised loss. We demonstrate the efficacy and scalability of our approach on various datasets and a broad class of probabilistic models, showcasing its practical effectiveness.", "keywords": "Most Probable Explanation;Probabilistic Graphical Models;Probabilistic Circuits;Neural Autoregressive Model", "primary_area": "probabilistic_methods", "supplementary_material": "/attachment/907343a2a8c1a52e9d91f04f0f45baf3c735cd67.zip", "author": "Shivvrat Arya;Tahrima Rahman;Vibhav Giridhar Gogate", "authorids": "~Shivvrat_Arya1;~Tahrima_Rahman1;~Vibhav_Giridhar_Gogate1", "gender": "M;F;M", "homepage": "https://shivvrat.github.io;http://www.utdallas.edu/~txr110830/;http://www.hlt.utdallas.edu/~vgogate/", "dblp": "275/7819;150/2674;14/4229", "google_scholar": "eM1co-kAAAAJ;VDBfmocAAAAJ;https://scholar.google.com.tw/citations?user=pm_dg3cAAAAJ", "orcid": "0000-0002-9727-2533;;", "linkedin": "shivvrat/;;", "or_profile": "~Shivvrat_Arya1;~Tahrima_Rahman1;~Vibhav_Gogate1", "aff": "The University of Texas at Dallas;University of Texas, Dallas;University of Texas, Dallas", "aff_domain": "cs.utdallas.edu;utdallas.edu;utdallas.edu", "position": "PhD student;Research Scientist;Professor", "bibtex": "@inproceedings{\narya2024a,\ntitle={A Neural Network Approach for Efficiently Answering Most Probable Explanation Queries in Probabilistic Models},\nauthor={Shivvrat Arya and Tahrima Rahman and Vibhav Giridhar Gogate},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ufPPf9ghzP}\n}", "github": "", "reviewers": "q7PK;jEuh;1g2T", "pdf_size": 2220959, "rating": "6;7;7", "confidence": "2;2;4", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;2;3", "wc_summary": "85;136;147", "wc_strengths": "65;24;87", "wc_weaknesses": "58;32;160", "wc_questions": "56;121;164", "wc_limitations": "1;12;9", "wc_review": "265;325;567", "wc_reply_reviewers": "0;99;186", "wc_reply_authors": "0;0;136", "reply_reviewers": "0;2;1", "reply_authors": "1;1;2", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 2.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 122.66666666666667, 27.010286106510527 ], "wc_strengths_avg": [ 58.666666666666664, 26.10661899893503 ], "wc_weaknesses_avg": [ 83.33333333333333, 55.24088662897759 ], "wc_questions_avg": [ 113.66666666666667, 44.39469437769438 ], "wc_limitations_avg": [ 7.333333333333333, 4.642796092394706 ], "wc_review_avg": [ 385.6666666666667, 130.54075566231754 ], "wc_reply_reviewers_avg": [ 95.0, 75.98684096605149 ], "wc_reply_authors_avg": [ 45.333333333333336, 64.1110148275803 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9500086285329306656&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "cs.utdallas.edu;utdallas.edu;utdallas.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Texas at Dallas", "aff_unique_dep": "", "aff_unique_url": "https://www.utdallas.edu", "aff_unique_abbr": "UT Dallas", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Dallas", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "FilterNet: Harnessing Frequency Filters for Time Series Forecasting", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93257", "id": "ugL2D9idAD", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ugL2D9idAD", "openreview": "https://openreview.net/forum?id=ugL2D9idAD", "poster": "/media/PosterPDFs/NeurIPS%202024/93257.png?t=1731687291.4689252", "project": "", "author_site": "Kun Yi, Jingru Fei, Qi Zhang, Hui He, Shufeng Hao, Defu Lian, Wei Fan", "tldr": "", "abstract": "Given the ubiquitous presence of time series data across various domains, precise forecasting of time series holds significant importance and finds widespread real-world applications such as energy, weather, healthcare, etc. While numerous forecasters have been proposed using different network architectures, the Transformer-based models have state-of-the-art performance in time series forecasting. However, forecasters based on Transformers are still suffering from vulnerability to high-frequency signals, efficiency in computation, and bottleneck in full-spectrum utilization, which essentially are the cornerstones for accurately predicting time series with thousands of points. In this paper, we explore a novel perspective of enlightening signal processing for deep time series forecasting. Inspired by the filtering process, we introduce one simple yet effective network, namely FilterNet, built upon our proposed learnable frequency filters to extract key informative temporal patterns by selectively passing or attenuating certain components of time series signals. Concretely, we propose two kinds of learnable filters in the FilterNet: (i) Plain shaping filter, that adopts a universal frequency kernel for signal filtering and temporal modeling; (ii) Contextual shaping filter, that utilizes filtered frequencies examined in terms of its compatibility with input signals for\ndependency learning. Equipped with the two filters, FilterNet can approximately surrogate the linear and attention mappings widely adopted in time series literature, while enjoying superb abilities in handling high-frequency noises and utilizing the whole frequency spectrum that is beneficial for forecasting. Finally, we conduct extensive experiments on eight time series forecasting benchmarks, and experimental results have demonstrated our superior performance in terms of both effectiveness and efficiency compared with state-of-the-art methods. Our code is available at$^1$.", "keywords": "time series forecasting;learning in the frequency domain", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Kun Yi;Jingru Fei;Qi Zhang;Hui He;Shufeng Hao;Defu Lian;Wei Fan", "authorids": "~Kun_Yi2;~Jingru_Fei1;~Qi_Zhang25;~Hui_He2;~Shufeng_Hao1;~Defu_Lian1;~Wei_Fan6", "gender": ";F;M;F;M;M;M", "homepage": "https://github.com/aikunyi;https://github.com/TayST13;https://sites.google.com/view/qizhang-bit-uts/home;https://www.researchgate.net/profile/Hui_He43;;https://faculty.ustc.edu.cn/liandefu/en/index.htm;https://weifan.site/", "dblp": "202/8470-1;392/4248.html;52/323-20;https://dblp.uni-trier.de/pid/53/1151;215/0027;87/10734;54/3488-10", "google_scholar": "MhMZcIEAAAAJ;_QN0JMYAAAAJ;8UAk1p4AAAAJ;1IqAdRwAAAAJ;;QW0ad4sAAAAJ;cQ8zLJ4AAAAJ", "orcid": "0000-0002-9980-6033;;0000-0002-1037-1361;0000-0001-5515-2739;;0000-0002-3507-9607;0000-0001-7656-445X", "linkedin": ";;;;;;", "or_profile": "~Kun_Yi2;~Jingru_Fei1;~Qi_Zhang25;~Hui_He2;~Shufeng_Hao1;~Defu_Lian1;~Wei_Fan6", "aff": "Beijing Institute of Technology;Beijing Institute of Technology;Tongji University;Beijing Institute of Technology;Taiyuan University of Technology;University of Science and Technology of China;University of Oxford", "aff_domain": "bit.edu.cn;bit.edu.cn;tongji.edu.cn;bit.edu.cn;tyut.edu.cn;ustc.edu.cn;ox.ac.uk", "position": "PhD student;PhD student;Researcher;PhD student;Lecturer;Full Professor;Postdoc Researcher", "bibtex": "@inproceedings{\nyi2024filternet,\ntitle={FilterNet: Harnessing Frequency Filters for Time Series Forecasting},\nauthor={Kun Yi and Jingru Fei and Qi Zhang and Hui He and Shufeng Hao and Defu Lian and Wei Fan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ugL2D9idAD}\n}", "github": "", "reviewers": "2waa;ufqs;t3Wx;UnVh", "pdf_size": 1830914, "rating": "3;4;7;7", "confidence": "4;4;4;4", "soundness": "2;3;4;4", "novelty": "2;2;4;4", "presentation": "3;3;3;4", "wc_summary": "54;62;115;85", "wc_strengths": "33;35;133;127", "wc_weaknesses": "129;106;93;47", "wc_questions": "38;7;37;92", "wc_limitations": "1;1;2;1", "wc_review": "255;211;380;352", "wc_reply_reviewers": "0;456;0;76", "wc_reply_authors": "103;1126;0;17", "reply_reviewers": "0;2;0;1", "reply_authors": "3;4;1;2", "rating_avg": [ 5.25, 1.7853571071357126 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.0, 1.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 79.0, 23.695991222145572 ], "wc_strengths_avg": [ 82.0, 48.05205510693585 ], "wc_weaknesses_avg": [ 93.75, 29.911327285829362 ], "wc_questions_avg": [ 43.5, 30.64718584144391 ], "wc_limitations_avg": [ 1.25, 0.4330127018922193 ], "wc_review_avg": [ 299.5, 69.00905737655022 ], "wc_reply_reviewers_avg": [ 133.0, 189.04761305025778 ], "wc_reply_authors_avg": [ 311.5, 471.86995030410657 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13235259594414206922&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "bit.edu.cn;bit.edu.cn;tongji.edu.cn;bit.edu.cn;tyut.edu.cn;ustc.edu.cn;ox.ac.uk", "author_num": 7, "aff_unique_index": "0;0;1;0;2;3;4", "aff_unique_norm": "Beijing Institute of Technology;Tongji University;Taiyuan University of Technology;University of Science and Technology of China;University of Oxford", "aff_unique_dep": ";;;;", "aff_unique_url": "http://www.bit.edu.cn/;https://www.tongji.edu.cn;http://www.tyut.edu.cn/;http://www.ustc.edu.cn;https://www.ox.ac.uk", "aff_unique_abbr": "BIT;Tongji;TYUT;USTC;Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;1", "aff_country_unique": "China;United Kingdom" }, { "title": "AdaFlow: Imitation Learning with Variance-Adaptive Flow-Based Policies", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93256", "id": "ugXKInqDCC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ugXKInqDCC", "openreview": "https://openreview.net/forum?id=ugXKInqDCC", "poster": "", "project": "", "author_site": "Xixi Hu, Qiang Liu, Xingchao Liu, Bo Liu", "tldr": "", "abstract": "Diffusion-based imitation learning improves Behavioral Cloning (BC) on multi-modal decision-making, but comes at the cost of significantly slower inference due to the recursion in the diffusion process. It urges us to design efficient policy generators while keeping the ability to generate diverse actions. To address this challenge, we propose AdaFlow, an imitation learning framework based on flow-based generative modeling. AdaFlow represents the policy with state-conditioned ordinary differential equations (ODEs), which are known as probability flows. We reveal an intriguing connection between the conditional variance of their training loss and the discretization error of the ODEs.\nWith this insight, we propose a variance-adaptive ODE solver that can adjust its step size in the inference stage, making\nAdaFlow an adaptive decision-maker, offering rapid inference without sacrificing diversity. Interestingly, it automatically reduces to a one-step generator when the action distribution is uni-modal. Our comprehensive empirical evaluation shows that AdaFlow achieves high performance with fast inference speed.", "keywords": "Imitation Learning; Generative Model-Based Policy", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Xixi Hu;qiang liu;Xingchao Liu;Bo Liu", "authorids": "~Xixi_Hu2;~qiang_liu4;~Xingchao_Liu1;~Bo_Liu13", "gender": ";M;M;M", "homepage": "https://hxixixh.github.io/;;https://cranial-xix.github.io/;https://www.cs.utexas.edu/~lqiang/", "dblp": "234/1710;228/7309;;61/3234-1", "google_scholar": ";VOTVE0UAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=2qDh4WUAAAAJ", "orcid": ";;;", "linkedin": "xixi-hu-210200160/;;;", "or_profile": "~Xixi_Hu2;~Xingchao_Liu1;~Bo_Liu13;~Qiang_Liu1", "aff": "University of Texas, Austin;University of Texas, Austin;University of Texas, Austin;University of Texas, Austin", "aff_domain": "utexas.edu;utexas.edu;cs.utexas.edu;utexas.edu", "position": "PhD student;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nhu2024adaflow,\ntitle={AdaFlow: Imitation Learning with Variance-Adaptive Flow-Based Policies},\nauthor={Xixi Hu and qiang liu and Xingchao Liu and Bo Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ugXKInqDCC}\n}", "github": "", "reviewers": "RJmb;AbSH;FRk8;df3a", "pdf_size": 3882172, "rating": "6;6;6;7", "confidence": "3;3;3;4", "soundness": "3;3;2;3", "novelty": "3;3;2;3", "presentation": "2;3;2;3", "wc_summary": "66;77;179;84", "wc_strengths": "55;39;52;98", "wc_weaknesses": "32;42;173;464", "wc_questions": "204;125;2;119", "wc_limitations": "11;43;34;38", "wc_review": "368;326;440;803", "wc_reply_reviewers": "55;119;137;75", "wc_reply_authors": "0;0;222;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;2;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 101.5, 45.202322949158265 ], "wc_strengths_avg": [ 61.0, 22.192341021172147 ], "wc_weaknesses_avg": [ 177.75, 174.3794354274609 ], "wc_questions_avg": [ 112.5, 72.07808266040378 ], "wc_limitations_avg": [ 31.5, 12.257650672131263 ], "wc_review_avg": [ 484.25, 188.4918764827811 ], "wc_reply_reviewers_avg": [ 96.5, 32.90516676754579 ], "wc_reply_authors_avg": [ 55.5, 96.12881982007269 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13100839794451314229&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "utexas.edu;utexas.edu;cs.utexas.edu;utexas.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Incorporating Test-Time Optimization into Training with Dual Networks for Human Mesh Recovery", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93255", "id": "ugqx9tgyum", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ugqx9tgyum", "openreview": "https://openreview.net/forum?id=ugqx9tgyum", "poster": "/media/PosterPDFs/NeurIPS%202024/93255.png?t=1730348278.972617", "project": "", "author_site": "Yongwei Nie, Mingxian Fan, Chengjiang Long, Qing Zhang, Jian Zhu, Xuemiao Xu", "tldr": "", "abstract": "Human Mesh Recovery (HMR) is the task of estimating a parameterized 3D human mesh from an image. There is a kind of methods first training a regression model for this problem, then further optimizing the pretrained regression model for any specific sample individually at test time. However, the pretrained model may not provide an ideal optimization starting point for the test-time optimization. Inspired by meta-learning, we incorporate the test-time optimization into training, performing a step of test-time optimization for each sample in the training batch before really conducting the training optimization over all the training samples. In this way, we obtain a meta-model, the meta-parameter of which is friendly to the test-time optimization. At test time, after several test-time optimization steps starting from the meta-parameter, we obtain much higher HMR accuracy than the test-time optimization starting from the simply pretrained regression model. Furthermore, we find test-time HMR objectives are different from training-time objectives, which reduces the effectiveness of the learning of the meta-model. To solve this problem, we propose a dual-network architecture that unifies the training-time and test-time objectives. Our method, armed with meta-learning and the dual networks, outperforms state-of-the-art regression-based and optimization-based HMR approaches, as validated by the extensive experiments. The codes are available at https://github.com/fmx789/Meta-HMR.", "keywords": "Human Mesh Recovery; Meta-Learning; Test-Time Optimization; Training Optimization", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Yongwei Nie;Mingxian Fan;Chengjiang Long;Qing Zhang;Jian Zhu;Xuemiao Xu", "authorids": "~Yongwei_Nie1;~Mingxian_Fan1;~Chengjiang_Long1;~Qing_Zhang7;~Jian_Zhu5;~Xuemiao_Xu1", "gender": ";M;M;M;M;", "homepage": "https://nieyongwei.net/;https://fmx789.github.io;http://www.chengjianglong.com/;http://zhangqing-home.net/;;", "dblp": "31/8005.html;;10/4617;;98/960-1;", "google_scholar": "https://scholar.google.com.hk/citations?user=jVlU_oQAAAAJ%26hl=en;;k0XkeiAAAAAJ;qH0rU44AAAAJ;;", "orcid": ";;;;0000-0002-2551-2024;", "linkedin": ";;;;;", "or_profile": "~Yongwei_Nie1;~Mingxian_Fan1;~Chengjiang_Long1;~Qing_Zhang7;~Jian_Zhu5;~Xuemiao_Xu1", "aff": "South China University of Technology;South China University of Technology;Meta Reality Labs;SUN YAT-SEN UNIVERSITY;Guangdong University of Technology;", "aff_domain": "scut.edu.cn;scut.edu.cn;fb.com;sysu.edu.cn;gdut.edu.cn;", "position": "Associate Professor;MS student;Researcher;Associate Professor;Associate Professor;", "bibtex": "@inproceedings{\nnie2024incorporating,\ntitle={Incorporating Test-Time Optimization into Training with Dual Networks for Human Mesh Recovery},\nauthor={Yongwei Nie and Mingxian Fan and Chengjiang Long and Qing Zhang and Jian Zhu and Xuemiao Xu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ugqx9tgyum}\n}", "github": "", "reviewers": "Nccj;Cdc4;Qn92;PPKd", "pdf_size": 22828939, "rating": "5;5;6;6", "confidence": "4;3;4;4", "soundness": "3;2;3;3", "novelty": "2;2;3;2", "presentation": "2;1;4;3", "wc_summary": "66;110;77;79", "wc_strengths": "27;184;32;117", "wc_weaknesses": "84;172;116;175", "wc_questions": "2;167;3;27", "wc_limitations": "2;23;23;4", "wc_review": "181;656;251;402", "wc_reply_reviewers": "23;82;86;34", "wc_reply_authors": "10;240;488;21", "reply_reviewers": "1;2;2;1", "reply_authors": "2;3;3;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 83.0, 16.355427233796124 ], "wc_strengths_avg": [ 90.0, 64.99615373235558 ], "wc_weaknesses_avg": [ 136.75, 38.46670638357279 ], "wc_questions_avg": [ 49.75, 68.4301651320527 ], "wc_limitations_avg": [ 13.0, 10.024968827881711 ], "wc_review_avg": [ 372.5, 182.12427076037943 ], "wc_reply_reviewers_avg": [ 56.25, 28.05686190577984 ], "wc_reply_authors_avg": [ 189.75, 195.10558039174583 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14235416063032986441&as_sdt=4000005&sciodt=0,18&hl=en", "gs_version_total": 2, "email": "scut.edu.cn;scut.edu.cn;fb.com;sysu.edu.cn;gdut.edu.cn;", "author_num": 6, "aff_unique_index": "0;0;1;2;3", "aff_unique_norm": "South China University of Technology;Meta;Sun Yat-sen University;Guangdong University of Technology", "aff_unique_dep": ";Meta Reality Labs;;", "aff_unique_url": "https://www.scut.edu.cn;https://www.meta.com;http://www.sysu.edu.cn;http://www.gdut.edu.cn", "aff_unique_abbr": "SCUT;MRL;SYSU;GDUT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "China;United States" }, { "title": "Parameter Symmetry and Noise Equilibrium of Stochastic Gradient Descent", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93254", "id": "uhki1rE2NZ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=uhki1rE2NZ", "openreview": "https://openreview.net/forum?id=uhki1rE2NZ", "poster": "", "project": "", "author_site": "Liu Ziyin, Mingze Wang, Hongchao Li, Lei Wu", "tldr": "", "abstract": "Symmetries are prevalent in deep learning and can significantly influence the learning dynamics of neural networks. In this paper, we examine how exponential symmetries -- a broad subclass of continuous symmetries present in the model architecture or loss function -- interplay with stochastic gradient descent (SGD). We first prove that gradient noise creates a systematic motion (a ``Noether flow\") of the parameters $\\theta$ along the degenerate direction to a unique initialization-independent fixed point $\\theta^*$. These points are referred to as the noise equilibria because, at these points, noise contributions from different directions are balanced and aligned. Then, we show that the balance and alignment of gradient noise can serve as a novel alternative mechanism for explaining important phenomena such as progressive sharpening/flattening and representation formation within neural networks and have practical implications for understanding techniques like representation normalization and warmup.", "keywords": "SGD;fixed point;symmetry", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "", "author": "Liu Ziyin;Mingze Wang;Hongchao Li;Lei Wu", "authorids": "~Liu_Ziyin1;~Mingze_Wang2;~Hongchao_Li2;~Lei_Wu1", "gender": ";;M;M", "homepage": "https://www.mit.edu/~ziyinl/;https://wmz9.github.io/;https://sites.google.com/view/condmat-hongchaoli;https://leiwu0.github.io/", "dblp": ";296/7556;;", "google_scholar": "NpN9oRMAAAAJ;CkU47X0AAAAJ;;CMweeYcAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Liu_Ziyin1;~Mingze_Wang2;~Hongchao_Li2;~Lei_Wu1", "aff": "Massachusetts Institute of Technology;Peking University;;Peking University", "aff_domain": "mit.edu;pku.edu.cn;;math.pku.edu.cn", "position": "Postdoc;PhD student;;Assistant Professor", "bibtex": "@inproceedings{\nziyin2024parameter,\ntitle={Parameter Symmetry and Noise Equilibrium of Stochastic Gradient Descent},\nauthor={Liu Ziyin and Mingze Wang and Hongchao Li and Lei Wu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=uhki1rE2NZ}\n}", "github": "", "reviewers": "HyRa;QY9h;rhMv", "pdf_size": 1610539, "rating": "5;7;7", "confidence": "3;4;4", "soundness": "2;2;3", "novelty": "2;4;4", "presentation": "2;3;3", "wc_summary": "19;51;102", "wc_strengths": "65;61;99", "wc_weaknesses": "30;143;141", "wc_questions": "536;141;689", "wc_limitations": "1;14;90", "wc_review": "651;410;1121", "wc_reply_reviewers": "118;133;162", "wc_reply_authors": "907;0;107", "reply_reviewers": "2;1;1", "reply_authors": "3;1;3", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.3333333333333335, 0.9428090415820634 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 57.333333333333336, 34.179265969622904 ], "wc_strengths_avg": [ 75.0, 17.048949136725895 ], "wc_weaknesses_avg": [ 104.66666666666667, 52.80361940456566 ], "wc_questions_avg": [ 455.3333333333333, 230.87707166850115 ], "wc_limitations_avg": [ 35.0, 39.25132694147634 ], "wc_review_avg": [ 727.3333333333334, 295.2403916961378 ], "wc_reply_reviewers_avg": [ 137.66666666666666, 18.263503375736967 ], "wc_reply_authors_avg": [ 338.0, 404.70812527878246 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.9428090415820634 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.9999999999999998, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17567357611796494618&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "mit.edu;pku.edu.cn;;math.pku.edu.cn", "author_num": 4, "aff_unique_index": "0;1;1", "aff_unique_norm": "Massachusetts Institute of Technology;Peking University", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;http://www.pku.edu.cn", "aff_unique_abbr": "MIT;Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United States;China" }, { "title": "FIFO-Diffusion: Generating Infinite Videos from Text without Training", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93253", "id": "uikhNa4wam", "proceeding": "", "pdf": "https://openreview.net/pdf?id=uikhNa4wam", "openreview": "https://openreview.net/forum?id=uikhNa4wam", "poster": "/media/PosterPDFs/NeurIPS%202024/93253.png?t=1733113654.6448336", "project": "", "author_site": "Jihwan Kim, Junoh Kang, Jinyoung Choi, Bohyung Han", "tldr": "", "abstract": "We propose a novel inference technique based on a pretrained diffusion model for text-conditional video generation. Our approach, called FIFO-Diffusion, is conceptually capable of generating infinitely long videos without additional training. This is achieved by iteratively performing diagonal denoising, which simultaneously processes a series of consecutive frames with increasing noise levels in a queue; our method dequeues a fully denoised frame at the head while enqueuing a new random noise frame at the tail. However, diagonal denoising is a double-edged sword as the frames near the tail can take advantage of cleaner frames by forward reference but such a strategy induces the discrepancy between training and inference. Hence, we introduce latent partitioning to reduce the training-inference gap and lookahead denoising to leverage the benefit of forward referencing. Practically, FIFO-Diffusion consumes a constant amount of memory regardless of the target video length given a baseline model, while well-suited for parallel inference on multiple GPUs. We have demonstrated the promising results and effectiveness of the proposed methods on existing text-to-video generation baselines. Generated video examples and source codes are available at our project page.", "keywords": "generative models;diffusion;long video generation;tuning-free", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/ebc18793a282e835776f79c4a8b2c1618f01e402.zip", "author": "Jihwan Kim;Junoh Kang;Jinyoung Choi;Bohyung Han", "authorids": "~Jihwan_Kim5;~Junoh_Kang2;~Jinyoung_Choi2;~Bohyung_Han1", "gender": "M;M;F;Not Specified", "homepage": "https://jjihwan.github.io/;https://junoh-kang.github.io/;;http://cvlab.snu.ac.kr/~bhhan", "dblp": ";355/1822;;73/4880.html", "google_scholar": "QmvZVFQAAAAJ;TLGqhucAAAAJ;https://scholar.google.com/citations?hl=en;9aaeCToAAAAJ", "orcid": ";;;", "linkedin": "jjihwan/;junohkang;jinyoung-choi-7b7470189/;", "or_profile": "~Jihwan_Kim5;~Junoh_Kang2;~Jinyoung_Choi2;~Bohyung_Han1", "aff": "Seoul National University;Adobe Systems;Seoul National University;Seoul National University", "aff_domain": "snu.ac.kr;adobe.com;snu.ac.kr;snu.ac.kr", "position": "MS student;Intern;PhD student;Full Professor", "bibtex": "@inproceedings{\nkim2024fifodiffusion,\ntitle={{FIFO}-Diffusion: Generating Infinite Videos from Text without Training},\nauthor={Jihwan Kim and Junoh Kang and Jinyoung Choi and Bohyung Han},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=uikhNa4wam}\n}", "github": "", "reviewers": "uUJo;Tbwe;Hj7n;bkyd", "pdf_size": 20407753, "rating": "3;4;6;8", "confidence": "4;5;5;5", "soundness": "2;2;3;4", "novelty": "2;1;3;4", "presentation": "2;3;2;3", "wc_summary": "46;131;157;155", "wc_strengths": "75;26;58;118", "wc_weaknesses": "175;172;284;128", "wc_questions": "54;31;156;27", "wc_limitations": "1;9;25;1", "wc_review": "351;369;680;429", "wc_reply_reviewers": "38;178;19;0", "wc_reply_authors": "243;483;23;72", "reply_reviewers": "1;1;1;0", "reply_authors": "3;3;2;2", "rating_avg": [ 5.25, 1.920286436967152 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.5, 1.118033988749895 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 122.25, 45.19610049550735 ], "wc_strengths_avg": [ 69.25, 33.191678173903775 ], "wc_weaknesses_avg": [ 189.75, 57.508151596099836 ], "wc_questions_avg": [ 67.0, 52.407060593015515 ], "wc_limitations_avg": [ 9.0, 9.797958971132712 ], "wc_review_avg": [ 457.25, 131.80738788095303 ], "wc_reply_reviewers_avg": [ 58.75, 70.14761221880613 ], "wc_reply_authors_avg": [ 205.25, 179.95884946287026 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.676481425202546, "gs_citation": 33, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18178293149005371050&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "snu.ac.kr;adobe.com;snu.ac.kr;snu.ac.kr", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Seoul National University;Adobe", "aff_unique_dep": ";Adobe Systems Incorporated", "aff_unique_url": "https://www.snu.ac.kr;https://www.adobe.com", "aff_unique_abbr": "SNU;Adobe", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "South Korea;United States" }, { "title": "JiuZhang3.0: Efficiently Improving Mathematical Reasoning by Training Small Data Synthesis Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93252", "id": "ujDKXWTbJX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ujDKXWTbJX", "openreview": "https://openreview.net/forum?id=ujDKXWTbJX", "poster": "", "project": "", "author_site": "Kun Zhou, Beichen Zhang, jiapeng wang, Zhipeng Chen, Xin Zhao, Jing Sha, Zhichao Sheng, Shijin Wang, Ji-Rong Wen", "tldr": "", "abstract": "Mathematical reasoning is an important capability of large language models~(LLMs) for real-world applications.\nTo enhance this capability, existing work either collects large-scale math-related texts for pre-training, or relies on stronger LLMs (\\eg GPT-4) to synthesize massive math problems. Both types of work generally lead to large costs in training or synthesis.\nTo reduce the cost, based on open-source available texts, we propose an efficient way that trains a small LLM for math problem synthesis, to efficiently generate sufficient high-quality pre-training data.\nTo achieve it, we create a dataset using GPT-4 to distill its data synthesis capability into the small LLM.\nConcretely, we craft a set of prompts based on human education stages to guide GPT-4, to synthesize problems covering diverse math knowledge and difficulty levels.\nBesides, we adopt the gradient-based influence estimation method to select the most valuable math-related texts.\nThe both are fed into GPT-4 for creating the knowledge distillation dataset to train the small LLM.\nWe leverage it to synthesize 6 million math problems for pre-training our JiuZhang3.0 model. The whole process only needs to invoke GPT-4 API 9.3k times and use 4.6B data for training.\nExperimental results have shown that JiuZhang3.0 achieves state-of-the-art performance on several mathematical reasoning datasets, under both natural language reasoning and tool manipulation settings.\nOur code and data will be publicly released in \\url{https://github.com/RUCAIBox/JiuZhang3.0}.", "keywords": "Large Language Models;Mathematical Reasoning;Data Synthesis", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/5a90b475df805b2586cd49013f9c59152b311009.zip", "author": "Kun Zhou;Beichen Zhang;jiapeng wang;Zhipeng Chen;Xin Zhao;Jing Sha;Zhichao Sheng;Shijin Wang;Ji-Rong Wen", "authorids": "~Kun_Zhou2;~Beichen_Zhang1;~jiapeng_wang4;~Zhipeng_Chen2;~Xin_Zhao10;~Jing_Sha1;~Zhichao_Sheng1;~Shijin_Wang1;~Ji-Rong_Wen1", "gender": "M;M;M;M;M;M;;M;M", "homepage": "https://lancelot39.github.io/;https://github.com/ToheartZhang;https://github.com/wangjp1010;;https://gsai.ruc.edu.cn/addons/teacher/index/info.html?user_id=5&ruccode=20140041&ln=cn;;;;https://gsai.ruc.edu.cn/english/jrwen", "dblp": "48/3927-2.html;71/9257;;;https://dblp.uni-trier.de/pid/52/8700.html;96/5272;;74/5750-1.html;w/JRWen", "google_scholar": "bmRJVjwAAAAJ;;;wMoUf6wAAAAJ;JNhNacoAAAAJ;;;;tbxCHJgAAAAJ", "orcid": ";;;;0000-0002-8333-6196;;;0000-0002-9202-7678;0000-0002-9777-9676", "linkedin": ";;;;;jing-sha-52482737/;;;", "or_profile": "~Kun_Zhou2;~Beichen_Zhang1;~jiapeng_wang4;~Zhipeng_Chen2;~Xin_Zhao10;~Jing_Sha1;~Zhichao_Sheng1;~Shijin_Wang1;~Ji-Rong_Wen1", "aff": "Renmin University of China;Renmin University of China;Tianjin University;Renmin University of China;Renmin University of China;iFLYTEK Research;IFLYTEK CO.LTD.;State Key Laboratory of Cognitive Intelligence;Renmin University of China", "aff_domain": "ruc.edu.cn;ruc.edu.cn;tju.edu.cn;ruc.edu.cn;ruc.edu.cn;iflytek.com;iflytek.com;iflytek.com;ruc.edu.cn", "position": "PhD student;MS student;Undergrad student;PhD student;Full Professor;Researcher;Researcher;Vice Dean;Full Professor", "bibtex": "@inproceedings{\nzhou2024jiuzhang,\ntitle={JiuZhang3.0: Efficiently Improving Mathematical Reasoning by Training Small Data Synthesis Models},\nauthor={Kun Zhou and Beichen Zhang and jiapeng wang and Zhipeng Chen and Xin Zhao and Jing Sha and Zhichao Sheng and Shijin Wang and Ji-Rong Wen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ujDKXWTbJX}\n}", "github": "", "reviewers": "FBVa;1oiT;trb5;rcgh", "pdf_size": 1628474, "rating": "5;5;7;8", "confidence": "3;5;4;4", "soundness": "3;3;3;4", "novelty": "2;3;3;4", "presentation": "3;3;3;3", "wc_summary": "58;124;109;223", "wc_strengths": "51;132;63;246", "wc_weaknesses": "152;14;118;502", "wc_questions": "128;227;124;96", "wc_limitations": "7;1;2;21", "wc_review": "396;498;416;1088", "wc_reply_reviewers": "9;66;9;564", "wc_reply_authors": "661;1631;571;3334", "reply_reviewers": "1;1;1;4", "reply_authors": "4;5;4;8", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 128.5, 59.79339428398425 ], "wc_strengths_avg": [ 123.0, 77.44998386055352 ], "wc_weaknesses_avg": [ 196.5, 183.56129766375045 ], "wc_questions_avg": [ 143.75, 49.62043429878461 ], "wc_limitations_avg": [ 7.75, 7.980444849756184 ], "wc_review_avg": [ 599.5, 284.6133341921984 ], "wc_reply_reviewers_avg": [ 162.0, 233.2584403617584 ], "wc_reply_authors_avg": [ 1549.25, 1111.0779394353935 ], "reply_reviewers_avg": [ 1.75, 1.299038105676658 ], "reply_authors_avg": [ 5.25, 1.6393596310755 ], "replies_avg": [ 36, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=648889056368672816&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "ruc.edu.cn;ruc.edu.cn;tju.edu.cn;ruc.edu.cn;ruc.edu.cn;iflytek.com;iflytek.com;iflytek.com;ruc.edu.cn", "author_num": 9, "aff_unique_index": "0;0;1;0;0;2;2;3;0", "aff_unique_norm": "Renmin University of China;Tianjin University;iFLYTEK;State Key Laboratory of Cognitive Intelligence", "aff_unique_dep": ";;Research;", "aff_unique_url": "http://www.ruc.edu.cn;http://www.tju.edu.cn;https://www.iflytek.com;", "aff_unique_abbr": "RUC;TJU;iFLYTEK;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Octopus: A Multi-modal LLM with Parallel Recognition and Sequential Understanding", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93251", "id": "ujE83r50tR", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ujE83r50tR", "openreview": "https://openreview.net/forum?id=ujE83r50tR", "poster": "/media/PosterPDFs/NeurIPS%202024/93251.png?t=1732190236.427092", "project": "", "author_site": "Chuyang Zhao, YuXin Song, Junru Chen, KANG RONG, Haocheng Feng, Gang Zhang, Shufan Ji, Jingdong Wang, Errui Ding, Yifan Sun", "tldr": "", "abstract": "A mainstream of Multi-modal Large Language Models (MLLMs) have two essential functions, i.e., visual recognition (e.g., grounding) and understanding (e.g., visual question answering). Presently, all these MLLMs integrate visual recognition and understanding in a same sequential manner in the LLM head, i.e., generating the response token-by-token for both recognition and understanding. We think unifying them in the same sequential manner is not optimal for two reasons: 1) parallel recognition is more efficient than sequential recognition and is actually prevailing in deep visual recognition, and 2) the recognition results can be integrated to help high-level cognition (while the current manner does not). Such motivated, this paper proposes a novel \u201cparallel recognition \u2192 sequential understanding\u201d framework for MLLMs. The bottom LLM layers are utilized for parallel recognition and the recognition results are relayed into the top LLM layers for sequential understanding. Specifically, parallel recognition in the bottom LLM layers is implemented via object queries, a popular mechanism in DEtection TRansformer, which we find to harmonize well with the LLM layers. Empirical studies show our MLLM named Octopus improves accuracy on popular MLLM tasks and is up to 5\u00d7 faster on visual grounding tasks.", "keywords": "multimodal large language model;large language model;object detection", "primary_area": "machine_vision", "supplementary_material": "", "author": "Chuyang Zhao;YuXin Song;Junru Chen;KANG RONG;Haocheng Feng;Gang Zhang;Shufan Ji;Jingdong Wang;Errui Ding;Yifan Sun", "authorids": "~Chuyang_Zhao3;~YuXin_Song1;~Junru_Chen4;~KANG_RONG1;~Haocheng_Feng1;~Gang_Zhang2;~Shufan_Ji1;~Jingdong_Wang1;~Errui_Ding2;~Yifan_Sun2", "gender": "M;M;M;M;M;M;M;M;M;F", "homepage": "https://github.com/ZhaoChuyang;https://github.com/byrsongyuxin;https://github.com/JunruChen-Image;https://github.com/rover5056;https://github.com/godson1024;https://jingdongwang2017.github.io/;;https://yifansun-reid.github.io;;http://www.buaa.edu.cn", "dblp": "322/8350.html;;;;37/4096;49/3441;180/5531;99/10261-3.html;151/7248;", "google_scholar": ";;;;;z5SPCmgAAAAJ;1wzEtxcAAAAJ;uUZEL7UAAAAJ;https://scholar.google.com.hk/citations?user=pnuQ5UsAAAAJ;", "orcid": ";;;;;0000-0002-4888-4445;;0000-0003-3532-6521;;", "linkedin": ";;;;;;;;;", "or_profile": "~Chuyang_Zhao3;~YuXin_Song1;~Junru_Chen4;~KANG_RONG1;~Gang_Zhang2;~Jingdong_Wang1;~Errui_Ding2;~Yifan_Sun2;~Feng_Haocheng1;~Ji_Shufan1", "aff": "Baidu;Baidu;Baidu;WeChat Vision, Tencent Inc.;Baidu;Baidu;Baidu;Baidu;Baidu;", "aff_domain": "baidu.com;baidu.com;baidu.com;tencent.com;baidu.com;baidu.com;baidu.com;baidu.com;baidu.com;", "position": "Researcher;Researcher;Researcher;Researcher;senior engineer;Chief Scientist for Computer Vision;Director;Senior Expert;Technical Manager;", "bibtex": "@inproceedings{\nzhao2024octopus,\ntitle={Octopus: A Multi-modal {LLM} with Parallel Recognition and Sequential Understanding},\nauthor={Chuyang Zhao and YuXin Song and Junru Chen and KANG RONG and Haocheng Feng and Gang Zhang and Shufan Ji and Jingdong Wang and Errui Ding and Yifan Sun},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ujE83r50tR}\n}", "github": "", "reviewers": "u3Ag;Up36;aS3o;hBAZ", "pdf_size": 2565565, "rating": "5;5;6;6", "confidence": "5;5;4;3", "soundness": "2;3;3;3", "novelty": "2;2;2;3", "presentation": "2;3;3;3", "wc_summary": "69;79;64;68", "wc_strengths": "49;46;38;59", "wc_weaknesses": "171;268;64;73", "wc_questions": "3;31;12;17", "wc_limitations": "1;52;1;1", "wc_review": "293;476;179;218", "wc_reply_reviewers": "106;0;21;26", "wc_reply_authors": "85;0;50;47", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;2;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 70.0, 5.522680508593631 ], "wc_strengths_avg": [ 48.0, 7.516648189186454 ], "wc_weaknesses_avg": [ 144.0, 82.9849383924577 ], "wc_questions_avg": [ 15.75, 10.133484099755622 ], "wc_limitations_avg": [ 13.75, 22.083647796503186 ], "wc_review_avg": [ 291.5, 114.12821736976356 ], "wc_reply_reviewers_avg": [ 38.25, 40.31361432568407 ], "wc_reply_authors_avg": [ 45.5, 30.220026472523152 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.9045340337332909, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12424552699157748659&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "baidu.com;baidu.com;baidu.com;tencent.com;baidu.com;baidu.com;baidu.com;baidu.com;baidu.com;", "author_num": 10, "aff_unique_index": "0;0;0;1;0;0;0;0;0", "aff_unique_norm": "Baidu;Tencent", "aff_unique_dep": "Baidu, Inc.;WeChat Vision", "aff_unique_url": "https://www.baidu.com;https://www.tencent.com", "aff_unique_abbr": "Baidu;Tencent", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Drago: Primal-Dual Coupled Variance Reduction for Faster Distributionally Robust Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93250", "id": "ujk0XrNTQZ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ujk0XrNTQZ", "openreview": "https://openreview.net/forum?id=ujk0XrNTQZ", "poster": "", "project": "", "author_site": "Ronak Mehta, Jelena Diakonikolas, Zaid Harchaoui", "tldr": "", "abstract": "We consider the penalized distributionally robust optimization (DRO) problem with a closed, convex uncertainty set, a setting that encompasses learning using $f$-DRO and spectral/$L$-risk minimization. We present Drago, a stochastic primal-dual algorithm which combines cyclic and randomized components with a carefully regularized primal update to achieve dual variance reduction. Owing to its design, Drago enjoys a state-of-the-art linear convergence rate on strongly convex-strongly concave DRO problems witha fine-grained dependency on primal and dual condition numbers. The theoretical results are supported with numerical benchmarks on regression and classification tasks.", "keywords": "Distributionally Robust Optimization;Stochastic Optimization;Convex Optimization;Saddle Point", "primary_area": "optimization", "supplementary_material": "/attachment/a94b5e960d643fba6fb198b200dfb2abcad90299.zip", "author": "Ronak Mehta;Jelena Diakonikolas;Zaid Harchaoui", "authorids": "~Ronak_Mehta2;~Jelena_Diakonikolas2;~Zaid_Harchaoui1", "gender": ";F;", "homepage": ";http://www.jelena-diakonikolas.com/;", "dblp": ";147/5178;", "google_scholar": ";J8ixfu8AAAAJ;", "orcid": ";0000-0003-3439-0310;", "linkedin": ";;", "or_profile": "~Ronak_Mehta2;~Jelena_Diakonikolas2;~Zaid_Harchaoui1", "aff": ";University of Wisconsin, Madison;", "aff_domain": ";wisc.edu;", "position": ";Assistant Professor;", "bibtex": "@inproceedings{\nmehta2024drago,\ntitle={Drago: Primal-Dual Coupled Variance Reduction for Faster Distributionally Robust Optimization},\nauthor={Ronak Mehta and Jelena Diakonikolas and Zaid Harchaoui},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ujk0XrNTQZ}\n}", "github": "", "reviewers": "ccWo;WLa8;VCV6;P67z;em1q", "pdf_size": 992847, "rating": "5;6;7;7;8", "confidence": "2;3;3;4;3", "soundness": "3;3;3;3;4", "novelty": "3;2;3;3;3", "presentation": "3;3;2;2;3", "wc_summary": "19;54;106;64;198", "wc_strengths": "21;4;50;71;84", "wc_weaknesses": "22;34;675;126;523", "wc_questions": "1;111;90;72;308", "wc_limitations": "1;19;9;5;1", "wc_review": "64;222;930;338;1114", "wc_reply_reviewers": "13;14;379;35;377", "wc_reply_authors": "0;0;545;0;242", "reply_reviewers": "1;1;2;1;2", "reply_authors": "1;1;4;1;2", "rating_avg": [ 6.6, 1.0198039027185568 ], "confidence_avg": [ 3.0, 0.6324555320336759 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 88.2, 61.50902372823031 ], "wc_strengths_avg": [ 46.0, 29.913207785190806 ], "wc_weaknesses_avg": [ 276.0, 270.47735579896516 ], "wc_questions_avg": [ 116.4, 102.68904518009698 ], "wc_limitations_avg": [ 7.0, 6.6932802122726045 ], "wc_review_avg": [ 533.6, 412.28029300464993 ], "wc_reply_reviewers_avg": [ 163.6, 175.2342432288849 ], "wc_reply_authors_avg": [ 157.4, 215.27433660332113 ], "reply_reviewers_avg": [ 1.4, 0.4898979485566356 ], "reply_authors_avg": [ 1.8, 1.1661903789690602 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.6201736729460423, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3546663817158679192&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 2, "email": ";wisc.edu;", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "University of Wisconsin", "aff_unique_dep": "", "aff_unique_url": "https://www.wisc.edu", "aff_unique_abbr": "UW", "aff_campus_unique_index": "0", "aff_campus_unique": "Madison", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "AlterMOMA: Fusion Redundancy Pruning for Camera-LiDAR Fusion Models with Alternative Modality Masking", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93249", "id": "ujwIlTNrAP", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ujwIlTNrAP", "openreview": "https://openreview.net/forum?id=ujwIlTNrAP", "poster": "/media/PosterPDFs/NeurIPS%202024/93249.png?t=1733575270.5263453", "project": "", "author_site": "shiqi sun, Yantao Lu, Ning Liu, Bo Jiang, Jinchao Chen, Ying Zhang", "tldr": "", "abstract": "Camera-LiDAR fusion models significantly enhance perception performance in autonomous driving. The fusion mechanism leverages the strengths of each modality while minimizing their weaknesses. Moreover, in practice, camera-LiDAR fusion models utilize pre-trained backbones for efficient training. However, we argue that directly loading single-modal pre-trained camera and LiDAR backbones into camera-LiDAR fusion models introduces similar feature redundancy across modalities due to the nature of the fusion mechanism. Unfortunately, existing pruning methods are developed explicitly for single-modal models, and thus, they struggle to effectively identify these specific redundant parameters in camera-LiDAR fusion models. In this paper, to address the issue above on camera-LiDAR fusion models, we propose a novelty pruning framework Alternative Modality Masking Pruning (AlterMOMA), which employs alternative masking on each modality and identifies the redundant parameters. Specifically, when one modality parameters are masked (deactivated), the absence of features from the masked backbone compels the model to reactivate previous redundant features of the other modality backbone. Therefore, these redundant features and relevant redundant parameters can be identified via the reactivation process. The redundant parameters can be pruned by our proposed importance score evaluation function, Alternative Evaluation (AlterEva), which is based on the observation of the loss changes when certain modality parameters are activated and deactivated. Extensive experiments on the nuScene and KITTI datasets encompassing diverse tasks, baseline models, and pruning algorithms showcase that AlterMOMA outperforms existing pruning methods, attaining state-of-the-art performance.", "keywords": "Network Pruning; Camera-LiDAR fusion models; Perception of Autonomous Driving;", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Shiqi Sun;Yantao Lu;Ning Liu;Bo Jiang;Jinchao Chen;Ying Zhang", "authorids": "~Shiqi_Sun3;~Yantao_Lu1;~Ning_Liu4;~Bo_Jiang1;~Jinchao_Chen1;~Ying_Zhang25", "gender": "M;M;M;;M;M", "homepage": ";;;;;https://teacher.nwpu.edu.cn/yingzhang.html", "dblp": ";131/1381;83/622-7;;;", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;kBUsho4AAAAJ;OFOJM5MAAAAJ;;;", "orcid": ";0000-0002-3103-1067;0000-0003-4943-6625;;0000-0001-6234-1001;", "linkedin": ";;;;;", "or_profile": "~Shiqi_Sun3;~Yantao_Lu1;~Ning_Liu4;~Bo_Jiang1;~Jinchao_Chen1;~Ying_Zhang25", "aff": "Northwest Polytechnical University Xi'an;Northwest Polytechnical University Xi'an;Midea Group;;Northwest Polytechnical University Xi'an;Northwest Polytechnical University Xi'an", "aff_domain": "nwpu.edu.cn;nwpu.edu.cn;midea.com;;nwpu.edu.cn;nwpu.edu.cn", "position": "PhD student;Assistant Professor;Researcher;;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nsun2024altermoma,\ntitle={Alter{MOMA}: Fusion Redundancy Pruning for Camera-Li{DAR} Fusion Models with Alternative Modality Masking},\nauthor={Shiqi Sun and Yantao Lu and Ning Liu and Bo Jiang and Jinchao Chen and Ying Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ujwIlTNrAP}\n}", "github": "", "reviewers": "fjuq;62BJ;hcok", "pdf_size": 737905, "rating": "6;7;7", "confidence": "3;3;3", "soundness": "3;3;3", "novelty": "3;4;3", "presentation": "3;4;4", "wc_summary": "57;59;45", "wc_strengths": "52;70;53", "wc_weaknesses": "81;15;71", "wc_questions": "12;10;45", "wc_limitations": "13;25;8", "wc_review": "215;179;222", "wc_reply_reviewers": "21;18;77", "wc_reply_authors": "25;49;78", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 53.666666666666664, 6.182412330330469 ], "wc_strengths_avg": [ 58.333333333333336, 8.259674462242579 ], "wc_weaknesses_avg": [ 55.666666666666664, 29.044027881055953 ], "wc_questions_avg": [ 22.333333333333332, 16.048537489614297 ], "wc_limitations_avg": [ 15.333333333333334, 7.133644853010899 ], "wc_review_avg": [ 205.33333333333334, 18.83849486792639 ], "wc_reply_reviewers_avg": [ 38.666666666666664, 27.13341523329163 ], "wc_reply_authors_avg": [ 50.666666666666664, 21.669230617526676 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:naKC33KMb5sJ:scholar.google.com/&scioq=AlterMOMA:+Fusion+Redundancy+Pruning+for+Camera-LiDAR+Fusion+Models+with+Alternative+Modality+Masking&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "nwpu.edu.cn;nwpu.edu.cn;midea.com;;nwpu.edu.cn;nwpu.edu.cn", "author_num": 6, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Northwest Polytechnical University;Midea Group", "aff_unique_dep": ";", "aff_unique_url": "http://www.nwpu.edu.cn;https://www.mideaglobal.com", "aff_unique_abbr": "NWPU;Midea", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Xi'an;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "DOFEN: Deep Oblivious Forest ENsemble", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93248", "id": "umukvCdGI6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=umukvCdGI6", "openreview": "https://openreview.net/forum?id=umukvCdGI6", "poster": "/media/PosterPDFs/NeurIPS%202024/93248.png?t=1731487110.8928745", "project": "", "author_site": "KuanYu Chen, Ping-Han Chiang, Hsin-Rung Chou, Chih-Sheng Chen, Tien-Hao Chang", "tldr": "", "abstract": "Deep Neural Networks (DNNs) have revolutionized artificial intelligence, achieving impressive results on diverse data types, including images, videos, and texts. However, DNNs still lag behind Gradient Boosting Decision Trees (GBDT) on tabular data, a format extensively utilized across various domains. This paper introduces DOFEN, which stands for Deep Oblivious Forest ENsemble. DOFEN is a novel DNN architecture inspired by oblivious decision trees and achieves on-off sparse selection of columns. DOFEN surpasses other DNNs on tabular data, achieving state-of-the-art performance on the well-recognized benchmark: Tabular Benchmark, which includes 73 total datasets spanning a wide array of domains. The code of DOFEN is available at: https://github.com/Sinopac-Digital-Technology-Division/DOFEN", "keywords": "Tabular Data;Structured Data;Deep Neural Network;Architecture Design", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/679152873f52bb3e925767f5a9c8a74e6ef96d8b.zip", "author": "Kuan-Yu Chen;Ping-Han Chiang;Hsin-Rung Chou;Chih-Sheng Chen;Tien-Hao Chang", "authorids": "~Kuan-Yu_Chen2;~Ping-Han_Chiang1;~Hsin-Rung_Chou2;~Chih-Sheng_Chen1;~Tien-Hao_Chang1", "gender": "M;M;M;;", "homepage": ";;;https://www.ee.ncku.edu.tw/en/teacher/index2.php?teacher_id=32;https://sherry40931.github.io/", "dblp": ";;;;", "google_scholar": ";https://scholar.google.com/citations?hl=en;;;https://scholar.google.com.tw/citations?user=kJkCIykAAAAJ", "orcid": ";;;;", "linkedin": "%E5%86%A0%E5%AE%87-%E9%99%B3-590288152/;ping-han-chiang/;%E6%99%BA%E8%81%96-%E9%99%B3-5b3814253/;;hsinrungsherrychou/", "or_profile": "~Kuan-Yu_Chen2;~Ping-Han_Chiang1;~Chih-Sheng_Chen1;~Tien-Hao_Chang1;~Chou_Hsin-Rung1", "aff": "Sinopac Holdings;Sinopac Holdings;Sinopac Holdings;National Cheng Kung University;Sinopac Holdings", "aff_domain": "sinopac.com;sinopac.com;sinopac.com;ncku.edu.tw;sinopac.com", "position": "Machine Learning Engineer;Machine Learning Engineer;Machine Learning Engineer;Full Professor;Machine Learning Engineer", "bibtex": "@inproceedings{\nchen2024dofen,\ntitle={{DOFEN}: Deep Oblivious Forest {EN}semble},\nauthor={Kuan-Yu Chen and Ping-Han Chiang and Hsin-Rung Chou and Chih-Sheng Chen and Tien-Hao Chang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=umukvCdGI6}\n}", "github": "", "reviewers": "K3P3;EjLV;PzWm;qMj5", "pdf_size": 2307164, "rating": "4;5;5;7", "confidence": "3;4;4;4", "soundness": "3;2;3;4", "novelty": "3;2;3;3", "presentation": "2;3;3;3", "wc_summary": "125;91;129;51", "wc_strengths": "54;214;79;62", "wc_weaknesses": "66;121;81;65", "wc_questions": "56;160;109;447", "wc_limitations": "15;90;5;21", "wc_review": "316;676;403;646", "wc_reply_reviewers": "473;10;0;259", "wc_reply_authors": "1266;0;0;345", "reply_reviewers": "4;1;0;3", "reply_authors": "7;1;1;2", "rating_avg": [ 5.25, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 99.0, 31.400636936215164 ], "wc_strengths_avg": [ 102.25, 65.14742895924597 ], "wc_weaknesses_avg": [ 83.25, 22.69774217846348 ], "wc_questions_avg": [ 193.0, 151.18697033805526 ], "wc_limitations_avg": [ 32.75, 33.54381463101655 ], "wc_review_avg": [ 510.25, 154.22122908341768 ], "wc_reply_reviewers_avg": [ 185.5, 195.7479246377851 ], "wc_reply_authors_avg": [ 402.75, 517.916680075087 ], "reply_reviewers_avg": [ 2.0, 1.5811388300841898 ], "reply_authors_avg": [ 2.75, 2.48746859276655 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.6622661785325219, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1448937238513660316&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "sinopac.com;sinopac.com;sinopac.com;ncku.edu.tw;sinopac.com", "author_num": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Sinopac Holdings;National Cheng Kung University", "aff_unique_dep": ";", "aff_unique_url": "https://www.sinopac.com;https://www.ncku.edu.tw", "aff_unique_abbr": ";NCKU", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Taiwan", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Mean-Field Analysis for Learning Subspace-Sparse Polynomials with Gaussian Input", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93247", "id": "unA5hxIn6v", "proceeding": "", "pdf": "https://openreview.net/pdf?id=unA5hxIn6v", "openreview": "https://openreview.net/forum?id=unA5hxIn6v", "poster": "/media/PosterPDFs/NeurIPS%202024/93247.png?t=1733904996.5612712", "project": "", "author_site": "Ziang Chen, Rong Ge", "tldr": "", "abstract": "In this work, we study the mean-field flow for learning subspace-sparse polynomials using stochastic gradient descent and two-layer neural networks, where the input distribution is standard Gaussian and the output only depends on the projection of the input onto a low-dimensional subspace. We establish a necessary condition for SGD-learnability, involving both the characteristics of the target function and the expressiveness of the activation function. In addition, we prove that the condition is almost sufficient, in the sense that a condition slightly stronger than the necessary condition can guarantee the exponential decay of the loss functional to zero.", "keywords": "Subspace-sparse polynomial;merged-staircase property;algebraic independence;mean-field analysis;stochastic gradient descent", "primary_area": "learning_theory", "supplementary_material": "", "author": "Ziang Chen;Rong Ge", "authorids": "~Ziang_Chen1;~Rong_Ge1", "gender": "M;M", "homepage": "https://sites.duke.edu/ziangchen/;https://users.cs.duke.edu/~rongge/", "dblp": ";89/6869-1.html", "google_scholar": "odvrFvIAAAAJ;https://scholar.google.com.tw/citations?user=MVxcjEoAAAAJ", "orcid": "0000-0002-8298-5223;", "linkedin": ";", "or_profile": "~Ziang_Chen1;~Rong_Ge1", "aff": "Massachusetts Institute of Technology;Duke University", "aff_domain": "mit.edu;duke.edu", "position": "Instructor;Associate Professor", "bibtex": "@inproceedings{\nchen2024meanfield,\ntitle={Mean-Field Analysis for Learning Subspace-Sparse Polynomials with Gaussian Input},\nauthor={Ziang Chen and Rong Ge},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=unA5hxIn6v}\n}", "github": "", "reviewers": "JjNH;7m62;kar1;nSKZ", "pdf_size": 440365, "rating": "5;5;6;7", "confidence": "3;4;3;3", "soundness": "3;2;3;3", "novelty": "2;2;3;3", "presentation": "2;3;3;3", "wc_summary": "94;85;104;70", "wc_strengths": "52;56;40;60", "wc_weaknesses": "353;301;80;3", "wc_questions": "3;61;166;2", "wc_limitations": "3;1;4;1", "wc_review": "505;504;394;136", "wc_reply_reviewers": "242;494;155;0", "wc_reply_authors": "313;163;74;0", "reply_reviewers": "2;2;1;0", "reply_authors": "3;3;2;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 88.25, 12.497499749949988 ], "wc_strengths_avg": [ 52.0, 7.483314773547883 ], "wc_weaknesses_avg": [ 184.25, 146.4810141281115 ], "wc_questions_avg": [ 58.0, 66.7720001198107 ], "wc_limitations_avg": [ 2.25, 1.299038105676658 ], "wc_review_avg": [ 384.75, 150.53467208586864 ], "wc_reply_reviewers_avg": [ 222.75, 178.99354038623852 ], "wc_reply_authors_avg": [ 137.5, 116.60724677308868 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ydvOiYq2vBYJ:scholar.google.com/&scioq=Mean-Field+Analysis+for+Learning+Subspace-Sparse+Polynomials+with+Gaussian+Input&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "mit.edu;duke.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Massachusetts Institute of Technology;Duke University", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.duke.edu", "aff_unique_abbr": "MIT;Duke", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "unMRtFfNhF", "title": "Data Debugging is NP-hard for Classifiers Trained with SGD", "track": "main", "status": "Reject", "tldr": "", "abstract": "Data debugging is to find a subset of the training data such that the model obtained by retraining on the subset has a better accuracy.\nA bunch of heuristic approaches are proposed, however, none of them are guaranteed to solve this problem effectively.\nThis leaves an open issue whether there exists an efficient algorithm to find the subset such that the model obtained by retraining on it has a better accuracy.\nTo answer this open question and provide theoretical basis for further study on developing better algorithms for data debugging, we investigate the computational complexity of the problem named \\textsc{Debuggable}.\nGiven a machine learning model $\\mathcal{M}$ obtained by training on dataset $D$ and a test instance $(\\mathbf{x}_\\text{test},y_\\text{test})$ where $\\mathcal{M}(\\mathbf{x}_\\text{test})\\neq y_\\text{test}$, \\textsc{Debuggable} is to determine whether there exists a subset $D^\\prime$ of $D$ such that the model $\\mathcal{M}^\\prime$ obtained by retraining on $D^\\prime$ satisfies $\\mathcal{M}^\\prime(\\mathbf{x}_\\text{test})=y_\\text{test}$.\nTo cover a wide range of commonly used models, we take SGD-trained linear classifier as the model and derive the following main results.\n(1) If the loss function and the dimension of the model are not fixed, \\textsc{Debuggable} is NP-complete regardless of the training order in which all the training samples are processed during SGD.\n(2) For hinge-like loss functions, a comprehensive analysis on the computational complexity of \\textsc{Debuggable} is provided;\n(3) If the loss function is a linear function, \\textsc{Debuggable} can be solved in linear time, that is, data debugging can be solved easily in this case.\nThese results not only highlight the limitations of current approaches but also offer new insights into data debugging.", "keywords": "data debugging;machine learning interpretations;computational complexity", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Zizheng Guo;Pengyu Chen;Yanzhang Fu;Dongjing Miao", "authorids": "~Zizheng_Guo6;~Pengyu_Chen4;~Yanzhang_Fu1;~Dongjing_Miao1", "gender": ";M;M;M", "homepage": "https://baka-hit.github.io/;https://chen-py.github.io;https://github.com/fyzdalao;https://homepage.hit.edu.cn/miaodongjing", "dblp": ";;;", "google_scholar": ";;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Zizheng_Guo6;~Pengyu_Chen4;~Yanzhang_Fu1;~Dongjing_Miao1", "aff": "Harbin Institute of Technology;Harbin Institute of Technology;Harbin Institute of Technology;Harbin Institute of Technology", "aff_domain": "hit.edu.cn;hit.edu.cn;stu.hit.edu.cn;hit.edu.cn", "position": "MS student;PhD student;MS student;Full Professor", "bibtex": "@misc{\nanonymous2024data,\ntitle={Data Debugging is {NP}-hard for Classifiers Trained with {SGD}},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=unMRtFfNhF}\n}", "github": "", "project": "", "reviewers": "Fop9;w8aB;7PcW", "site": "https://openreview.net/forum?id=unMRtFfNhF", "pdf_size": 459489, "rating": "3;3;5", "confidence": "4;4;2", "soundness": "2;4;3", "novelty": "2;2;2", "presentation": "3;1;2", "wc_summary": "53;87;62", "wc_strengths": "20;26;104", "wc_weaknesses": "698;346;52", "wc_questions": "110;80;63", "wc_limitations": "16;1;10", "wc_review": "897;540;291", "wc_reply_reviewers": "313;241;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;0", "reply_authors": "1;1;1", "rating_avg": [ 3.6666666666666665, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.0, 0.816496580927726 ], "wc_summary_avg": [ 67.33333333333333, 14.383632673594278 ], "wc_strengths_avg": [ 50.0, 38.262252939417984 ], "wc_weaknesses_avg": [ 365.3333333333333, 264.08247869852244 ], "wc_questions_avg": [ 84.33333333333333, 19.430788855719562 ], "wc_limitations_avg": [ 9.0, 6.164414002968976 ], "wc_review_avg": [ 576.0, 248.70464410621688 ], "wc_reply_reviewers_avg": [ 184.66666666666666, 133.84651242706659 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:3sSnKrxqIgcJ:scholar.google.com/&scioq=Data+Debugging+is+NP-hard+for+Classifiers+Trained+with+SGD&hl=en&as_sdt=0,5", "gs_version_total": 3, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Harbin Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.hit.edu.cn/", "aff_unique_abbr": "HIT", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Harbin", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Learning to Reason Iteratively and Parallelly for Complex Visual Reasoning Scenarios", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93246", "id": "uoJQ9qadjY", "proceeding": "", "pdf": "https://openreview.net/pdf?id=uoJQ9qadjY", "openreview": "https://openreview.net/forum?id=uoJQ9qadjY", "poster": "", "project": "", "author_site": "Shantanu Jaiswal, Debaditya Roy, Basura Fernando, Cheston Tan", "tldr": "", "abstract": "Complex visual reasoning and question answering (VQA) is a challenging task that requires compositional multi-step processing and higher-level reasoning capabilities beyond the immediate recognition and localization of objects and events. Here, we introduce a fully neural Iterative and Parallel Reasoning Mechanism (IPRM) that combines two distinct forms of computation -- iterative and parallel -- to better address complex VQA scenarios. Specifically, IPRM's \"iterative\" computation facilitates compositional step-by-step reasoning for scenarios wherein individual operations need to be computed, stored, and recalled dynamically (e.g. when computing the query \u201cdetermine the color of pen to the left of the child in red t-shirt sitting at the white table\u201d). Meanwhile, its \"parallel'' computation allows for the simultaneous exploration of different reasoning paths and benefits more robust and efficient execution of operations that are mutually independent (e.g. when counting individual colors for the query: \"determine the maximum occurring color amongst all t-shirts'\"). We design IPRM as a lightweight and fully-differentiable neural module that can be conveniently applied to both transformer and non-transformer vision-language backbones. It notably outperforms prior task-specific methods and transformer-based attention modules across various image and video VQA benchmarks testing distinct complex reasoning capabilities such as compositional spatiotemporal reasoning (AGQA), situational reasoning (STAR), multi-hop reasoning generalization (CLEVR-Humans) and causal event linking (CLEVRER-Humans). Further, IPRM's internal computations can be visualized across reasoning steps, aiding interpretability and diagnosis of its errors.", "keywords": "iterative and parallel computation; complex visual reasoning and question answering; neural network based reasoning architectures", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/76811f0d00966c08128e978fac32b2841a640901.zip", "author": "Shantanu Jaiswal;Debaditya Roy;Basura Fernando;Cheston Tan", "authorids": "~Shantanu_Jaiswal1;~Debaditya_Roy2;~Basura_Fernando1;~Cheston_Tan1", "gender": ";;M;M", "homepage": ";https://sites.google.com/view/debadityaroy/;https://basurafernando.github.io/;", "dblp": "203/9071;150/4133;01/9558;136/9366", "google_scholar": "GmGNq2MAAAAJ;L0Lq-fAAAAAJ;https://scholar.google.com.au/citations?user=GyvseMkAAAAJ;Up0UYEYAAAAJ", "orcid": ";0000-0002-8779-1241;0000-0002-6920-9916;", "linkedin": ";;;cheston-tan/", "or_profile": "~Shantanu_Jaiswal1;~Debaditya_Roy2;~Basura_Fernando1;~Cheston_Tan1", "aff": "Center for Frontier AI Research, A*STAR Singapore; A*STAR;A*STAR;Singapore University of Technology and Design", "aff_domain": "ihpc.a-star.edu.sg;ihpc.a-star.edu.sg;astar.edu.sg;sutd.edu.sg", "position": "Research Engineer;Research Scientist;Principal Researcher;Assistant Professor", "bibtex": "@inproceedings{\njaiswal2024learning,\ntitle={Learning to Reason Iteratively and Parallelly for Complex Visual Reasoning Scenarios},\nauthor={Shantanu Jaiswal and Debaditya Roy and Basura Fernando and Cheston Tan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=uoJQ9qadjY}\n}", "github": "", "reviewers": "oSE3;3tYc;P4wf;nzfV", "pdf_size": 13223959, "rating": "5;6;7;7", "confidence": "4;4;4;3", "soundness": "3;4;3;3", "novelty": "3;4;3;3", "presentation": "2;3;3;3", "wc_summary": "116;128;44;124", "wc_strengths": "36;43;37;68", "wc_weaknesses": "165;49;244;7", "wc_questions": "3;57;64;278", "wc_limitations": "1;23;1;9", "wc_review": "321;300;390;486", "wc_reply_reviewers": "0;31;13;88", "wc_reply_authors": "0;17;18;906", "reply_reviewers": "0;1;1;2", "reply_authors": "1;2;2;4", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 103.0, 34.336569426778794 ], "wc_strengths_avg": [ 46.0, 12.98075498574717 ], "wc_weaknesses_avg": [ 116.25, 93.74799997866621 ], "wc_questions_avg": [ 100.5, 105.16296876752767 ], "wc_limitations_avg": [ 8.5, 8.986100377805714 ], "wc_review_avg": [ 374.25, 72.60294415517872 ], "wc_reply_reviewers_avg": [ 33.0, 33.60803475361212 ], "wc_reply_authors_avg": [ 235.25, 387.3237502400285 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:nBcQ-F_hCzIJ:scholar.google.com/&scioq=Learning+to+Reason+Iteratively+and+Parallelly+for+Complex+Visual+Reasoning+Scenarios&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "ihpc.a-star.edu.sg;ihpc.a-star.edu.sg;astar.edu.sg;sutd.edu.sg", "author_num": 4, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "A*STAR Singapore;Agency for Science, Technology and Research;Singapore University of Technology and Design", "aff_unique_dep": "Center for Frontier AI Research;;", "aff_unique_url": "https://www.a-star.edu.sg;https://www.a-star.edu.sg;https://www.sutd.edu.sg", "aff_unique_abbr": "A*STAR;A*STAR;SUTD", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Singapore" }, { "id": "up0qqbdeQu", "title": "Class Concept Representation from Contextual Texts for Training-Free Multi-Label Recognition", "track": "main", "status": "Reject", "tldr": "", "abstract": "The power of large vision-language models (VLMs) has been demonstrated for diverse vision tasks including multi-label recognition with training-free approach or prompt tuning by measuring the cosine similarity between the text features related to class names and the visual features of images. Prior works usually formed the class-related text features by averaging simple hand-crafted text prompts with class names (e.g., ``a photo of {class name}''). However, they may not fully exploit the capability of VLMs considering how humans form the concepts on words using rich contexts with the patterns of co-occurrence with other words. Inspired by that, we propose class concept representation for zero-shot multi-label recognition to better exploit rich contexts in the massive descriptions on images (e.g., captions from MS-COCO) using large VLMs. Then, for better aligning visual features of VLMs to our class concept representation, we propose context-guided visual representation that is in the same linear space as class concept representation. Experimental results on diverse benchmarks show that our proposed methods substantially improved the performance of zero-shot methods like Zero-Shot CLIP and yielded better performance than zero-shot prompt tunings that require additional training like TaI-DPT. In addition, our proposed methods can synergetically work with existing prompt tuning methods, consistently improving the performance of DualCoOp and TaI-DPT in a training-free manner with negligible increase in inference time.", "keywords": "Training-free Multi-Label Recognition;Vision-Language Model;Class Concept;Context-Guided Visual Feature", "primary_area": "machine_vision", "supplementary_material": "/attachment/dc683a81784696736593ae393002fc0505346968.zip", "author": "Dong Un Kang;Hyunwoo Lee;Se Young Chun", "authorids": "~Dong_Un_Kang1;~Hyunwoo_Lee1;~Se_Young_Chun2", "gender": "M;M;", "homepage": ";https://www.linkedin.com/in/%ED%98%84%EC%9A%B0-%EC%9D%B4-928a79133/;https://icl.snu.ac.kr", "dblp": "254/2961;55/8846;85/2542", "google_scholar": ";;https://scholar.google.co.kr/citations?user=ntw4vH4AAAAJ", "orcid": ";;0000-0001-8739-8960", "linkedin": "kdu1;%ED%98%84%EC%9A%B0-%EC%9D%B4-928a79133/;sychun-953593206/", "or_profile": "~Dong_Un_Kang1;~Hyunwoo_Lee1;~Se_Young_Chun1", "aff": "Seoul National University;Samsung;Seoul National University", "aff_domain": "snu.ac.kr;samsung.com;snu.ac.kr", "position": "PhD student;Researcher;Associate Professor", "bibtex": "@misc{\nanonymous2024class,\ntitle={Class Concept Representation from Contextual Texts for Training-Free Multi-Label Recognition},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=up0qqbdeQu}\n}", "github": "", "project": "", "reviewers": "xF3A;TJWi;jAJ4;7zr3", "site": "https://openreview.net/forum?id=up0qqbdeQu", "pdf_size": 6565954, "rating": "4;5;5;5", "confidence": "4;4;4;4", "soundness": "3;3;2;3", "novelty": "3;3;2;2", "presentation": "3;2;2;2", "wc_summary": "81;35;123;42", "wc_strengths": "44;82;34;34", "wc_weaknesses": "175;37;41;149", "wc_questions": "96;16;534;63", "wc_limitations": "4;1;1;1", "wc_review": "400;171;733;289", "wc_reply_reviewers": "42;10;30;13", "wc_reply_authors": "289;33;27;33", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 4.75, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 70.25, 35.13812032536743 ], "wc_strengths_avg": [ 48.5, 19.767397400770797 ], "wc_weaknesses_avg": [ 100.5, 62.19927652312364 ], "wc_questions_avg": [ 177.25, 207.92231121262577 ], "wc_limitations_avg": [ 1.75, 1.299038105676658 ], "wc_review_avg": [ 398.25, 209.54638507977177 ], "wc_reply_reviewers_avg": [ 23.75, 13.007209539328564 ], "wc_reply_authors_avg": [ 95.5, 111.74412736247038 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:XtdeRm7NjYQJ:scholar.google.com/&scioq=Class+Concept+Representation+from+Contextual+Texts+for+Training-Free+Multi-Label+Recognition&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;0", "aff_unique_norm": "Seoul National University;Samsung", "aff_unique_dep": ";Samsung", "aff_unique_url": "https://www.snu.ac.kr;https://www.samsung.com", "aff_unique_abbr": "SNU;Samsung", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "The Fine-Grained Complexity of Gradient Computation for Training Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93245", "id": "up4tWnwRol", "proceeding": "", "pdf": "https://openreview.net/pdf?id=up4tWnwRol", "openreview": "https://openreview.net/forum?id=up4tWnwRol", "poster": "", "project": "", "author_site": "Josh Alman, Zhao Song", "tldr": "", "abstract": "Large language models (LLMs) have made fundamental contributions over the last a few years. To train an LLM, one needs to alternatingly run `forward' computations and backward computations. The forward computation can be viewed as attention function evaluation, and the backward computation can be viewed as a gradient computation. In previous work by [Alman and Song, NeurIPS 2023], it was proved that the forward step can be performed in almost-linear time in certain parameter regimes, but that there is no truly sub-quadratic time algorithm in the remaining parameter regimes unless the popular hypothesis $\\mathsf{SETH}$ is false. In this work, we show nearly identical results for the harder-seeming problem of computing the gradient of loss function of one layer attention network, and thus for the entire process of LLM training. This completely characterizes the fine-grained complexity of every step of LLM training.", "keywords": "Strong Exponential Time Hypothesis;Fine-grained Complexity;Polynomial methods;Gradient Complexity", "primary_area": "learning_theory", "supplementary_material": "", "author": "Josh Alman;Zhao Song", "authorids": "~Josh_Alman1;~Zhao_Song3", "gender": "M;M", "homepage": "http://joshalman.com;https://www.youtube.com/@zhaosong2031", "dblp": "166/1624;76/4051-2", "google_scholar": "yyDMlesAAAAJ;yDZct7UAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Josh_Alman1;~Zhao_Song3", "aff": "Columbia University;Adobe", "aff_domain": "columbia.edu;adobe.com", "position": "Assistant Professor;Researcher", "bibtex": "@inproceedings{\nalman2024the,\ntitle={The Fine-Grained Complexity of Gradient Computation for Training Large Language Models},\nauthor={Josh Alman and Zhao Song},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=up4tWnwRol}\n}", "github": "", "reviewers": "C3gd;MncV;mody;UJu3", "pdf_size": 423374, "rating": "4;5;6;8", "confidence": "3;2;2;3", "soundness": "3;3;3;4", "novelty": "2;2;3;4", "presentation": "2;3;3;4", "wc_summary": "50;60;91;190", "wc_strengths": "41;24;33;53", "wc_weaknesses": "130;34;18;109", "wc_questions": "36;207;44;112", "wc_limitations": "1;8;9;1", "wc_review": "258;333;195;465", "wc_reply_reviewers": "0;0;19;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 1.479019945774904 ], "confidence_avg": [ 2.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 97.75, 55.36413550304926 ], "wc_strengths_avg": [ 37.75, 10.662434056068061 ], "wc_weaknesses_avg": [ 72.75, 47.67271232057182 ], "wc_questions_avg": [ 99.75, 68.60165814322566 ], "wc_limitations_avg": [ 4.75, 3.766629793329841 ], "wc_review_avg": [ 312.75, 100.56434507319182 ], "wc_reply_reviewers_avg": [ 4.75, 8.227241335952167 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.16903085094570333, "gs_citation": 37, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3792771055216274413&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "columbia.edu;adobe.com", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Columbia University;Adobe", "aff_unique_dep": ";Adobe Inc.", "aff_unique_url": "https://www.columbia.edu;https://www.adobe.com", "aff_unique_abbr": "Columbia;Adobe", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "On the Necessity of Collaboration for Online Model Selection with Decentralized Data", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93244", "id": "uqWfLgZpV1", "proceeding": "", "pdf": "https://openreview.net/pdf?id=uqWfLgZpV1", "openreview": "https://openreview.net/forum?id=uqWfLgZpV1", "poster": "/media/PosterPDFs/NeurIPS%202024/93244.png?t=1731235894.532286", "project": "", "author_site": "Junfan Li, Zheshun Wu, Zenglin Xu, Irwin King", "tldr": "", "abstract": "We consider online model selection with decentralized data over $M$ clients, and study the necessity of collaboration among clients. Previous work proposed various federated algorithms without demonstrating their necessity, while we answer the question from a novel perspective of computational constraints. We prove lower bounds on the regret, and propose a federated algorithm and analyze the upper bound. Our results show (i) collaboration is unnecessary in the absence of computational constraints on clients; (ii) collaboration is necessary if the computational cost on each client is limited to $o(K)$, where $K$ is the number of candidate hypothesis spaces. We clarify the unnecessary nature of collaboration in previous federated algorithms for distributed online multi-kernel learning, and improve the regret bounds at a smaller computational and communication cost. Our algorithm relies on three new techniques including an improved Bernstein's inequality for martingale, a federated online mirror descent framework, and decoupling model selection and prediction, which might be of independent interest.", "keywords": "online learning;model selection;federated learning;kernel methods", "primary_area": "online_learning", "supplementary_material": "/attachment/c369f259d9dd86c4d201a5f7ef0ae7c298d4e851.zip", "author": "Junfan Li;Zheshun Wu;Zenglin Xu;Irwin King", "authorids": "~Junfan_Li1;~Zheshun_Wu1;~Zenglin_Xu2;~Irwin_King1", "gender": "M;M;M;M", "homepage": ";;https://www.cse.cuhk.edu.hk/irwin.king/;https://faculty.fudan.edu.cn/xuzenglin/en/index.htm", "dblp": "224/4583;;k/IrwinKing;68/1538", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;Y9cnocIAAAAJ;MXvC7tkAAAAJ;gF0H9nEAAAAJ", "orcid": "0000-0003-1027-4251;0000-0002-4099-552X;0000-0001-8106-6447;0000-0001-5550-6461", "linkedin": ";;irwinking/;", "or_profile": "~Junfan_Li1;~Zheshun_Wu1;~Irwin_King1;~Zenglin_Xu1", "aff": "Harbin Institute of Technology Shenzhen;Harbin Institute of Technology, Shenzhen;The Chinese University of Hong Kong;Harbin Institute of Technology Shenzhen", "aff_domain": "hit.edu.cn;hit.edu.cn;cuhk.edu.hk;hit.edu.cn", "position": "Postdoc;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nli2024on,\ntitle={On the Necessity of Collaboration for Online Model Selection with Decentralized Data},\nauthor={Junfan Li and Zheshun Wu and Zenglin Xu and Irwin King},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=uqWfLgZpV1}\n}", "github": "", "reviewers": "EYSG;eZoF;N9xw;83EK", "pdf_size": 741586, "rating": "5;6;6;8", "confidence": "3;4;1;2", "soundness": "3;3;2;3", "novelty": "2;3;3;4", "presentation": "2;4;2;3", "wc_summary": "121;42;207;77", "wc_strengths": "42;48;78;70", "wc_weaknesses": "180;34;72;6", "wc_questions": "33;32;65;1", "wc_limitations": "10;3;3;1", "wc_review": "386;159;425;155", "wc_reply_reviewers": "53;0;0;0", "wc_reply_authors": "31;0;0;0", "reply_reviewers": "1;0;0;0", "reply_authors": "2;1;1;1", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 2.5, 1.118033988749895 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 111.75, 61.70646238442129 ], "wc_strengths_avg": [ 59.5, 14.9248115565993 ], "wc_weaknesses_avg": [ 73.0, 66.06814663663572 ], "wc_questions_avg": [ 32.75, 22.63155982251334 ], "wc_limitations_avg": [ 4.25, 3.418698582794336 ], "wc_review_avg": [ 281.25, 125.02074827803584 ], "wc_reply_reviewers_avg": [ 13.25, 22.949673200287624 ], "wc_reply_authors_avg": [ 7.75, 13.423393758658799 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.30779350562554625, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4600392476196462678&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 4, "email": "hit.edu.cn;hit.edu.cn;cuhk.edu.hk;hit.edu.cn", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Harbin Institute of Technology;Chinese University of Hong Kong", "aff_unique_dep": ";", "aff_unique_url": "https://www.hit.edu.cn/;https://www.cuhk.edu.hk", "aff_unique_abbr": "HIT;CUHK", "aff_campus_unique_index": "0;0;1;0", "aff_campus_unique": "Shenzhen;Hong Kong SAR", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Mixture of Demonstrations for In-Context Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93243", "id": "uqxSLoCw3K", "proceeding": "", "pdf": "https://openreview.net/pdf?id=uqxSLoCw3K", "openreview": "https://openreview.net/forum?id=uqxSLoCw3K", "poster": "", "project": "", "author_site": "Song Wang, Zihan Chen, Chengshuai Shi, Cong Shen, Jundong Li", "tldr": "", "abstract": "In-Context Learning (ICL) empowers Large Language Models (LLMs) to tackle various tasks by providing input-output examples as additional inputs, referred to as demonstrations. Nevertheless, the performance of ICL could be easily impacted by the quality of selected demonstrations. Existing efforts generally learn a retriever model to score each demonstration for selecting suitable demonstrations, however, the effect is suboptimal due to the large search space and the noise from unhelpful demonstrations. In this study, we introduce MoD, which partitions the demonstration pool into groups, each governed by an expert to reduce search space. We further design an expert-wise training strategy to alleviate the impact of unhelpful demonstrations when optimizing the retriever model. During inference, experts collaboratively retrieve demonstrations for the input query to enhance the ICL performance. We validate MoD via experiments across a range of NLP datasets and tasks, demonstrating its state-of-the-art performance and shedding new light on the future design of retrieval methods for ICL.", "keywords": "In-context Learning;Large Language Models;Mixture-of-Experts", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Song Wang;Zihan Chen;Chengshuai Shi;Cong Shen;Jundong Li", "authorids": "~Song_Wang6;~Zihan_Chen5;~Chengshuai_Shi1;~Cong_Shen1;~Jundong_Li2", "gender": "M;;M;M;M", "homepage": "https://songw-sw.github.io/;;https://chengshuai-shi.github.io/;https://cshen317.github.io/;https://jundongli.github.io/", "dblp": ";;259/3938;79/6027-1.html;144/7997.html", "google_scholar": ";;twvDiW8AAAAJ;70LBhKcAAAAJ;uY6ek7sAAAAJ", "orcid": "0000-0003-1273-7694;;0000-0002-2727-8251;0000-0002-3148-4453;", "linkedin": ";;;cong-shen-3372404/;", "or_profile": "~Song_Wang6;~Zihan_Chen5;~Chengshuai_Shi1;~Cong_Shen1;~Jundong_Li2", "aff": "University of Virginia;;University of Virginia;University of Virginia;University of Virginia", "aff_domain": "virginia.edu;;virginia.edu;virginia.edu;virginia.edu", "position": "PhD student;;PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nwang2024mixture,\ntitle={Mixture of Demonstrations for In-Context Learning},\nauthor={Song Wang and Zihan Chen and Chengshuai Shi and Cong Shen and Jundong Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=uqxSLoCw3K}\n}", "github": "", "reviewers": "SdUm;3zjF;UC6f;SZXS", "pdf_size": 738594, "rating": "5;5;6;6", "confidence": "3;4;5;3", "soundness": "2;2;3;3", "novelty": "3;2;2;2", "presentation": "2;2;2;3", "wc_summary": "76;60;82;67", "wc_strengths": "52;68;68;131", "wc_weaknesses": "228;161;227;277", "wc_questions": "5;6;99;34", "wc_limitations": "1;8;1;87", "wc_review": "362;303;477;596", "wc_reply_reviewers": "57;12;44;100", "wc_reply_authors": "132;104;554;58", "reply_reviewers": "1;1;1;1", "reply_authors": "3;3;3;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 71.25, 8.407585860400118 ], "wc_strengths_avg": [ 79.75, 30.30160886817728 ], "wc_weaknesses_avg": [ 223.25, 41.233329964968874 ], "wc_questions_avg": [ 36.0, 38.19031290785662 ], "wc_limitations_avg": [ 24.25, 36.34126442489309 ], "wc_review_avg": [ 434.5, 112.29091681877034 ], "wc_reply_reviewers_avg": [ 53.25, 31.5703579327191 ], "wc_reply_authors_avg": [ 212.0, 199.21345336096155 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.75, 0.4330127018922193 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9911407870587494219&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "virginia.edu;;virginia.edu;virginia.edu;virginia.edu", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Virginia", "aff_unique_dep": "", "aff_unique_url": "https://www.virginia.edu", "aff_unique_abbr": "UVA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "HW-GPT-Bench: Hardware-Aware Architecture Benchmark for Language Models", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97460", "id": "urJyyMKs7E", "proceeding": "", "pdf": "https://openreview.net/pdf?id=urJyyMKs7E", "openreview": "https://openreview.net/forum?id=urJyyMKs7E", "poster": "", "project": "", "author_site": "Rhea Sukthanker, Arber Zela, Benedikt Staffler, Aaron Klein, Lennart Purucker, J\u00f6rg Franke, Frank Hutter", "tldr": "", "abstract": "The increasing size of language models necessitates a thorough analysis across multiple dimensions to assess trade-offs among crucial hardware metrics such as latency, energy consumption, GPU memory usage, and performance. Identifying optimal model configurations under specific hardware constraints is becoming essential but remains challenging due to the computational load of exhaustive training and evaluation on multiple devices. To address this, we introduce HW-GPT-Bench, a hardware-aware benchmark that utilizes surrogate predictions to approximate various hardware metrics across 13 devices of architectures in the GPT-2 family, with architectures containing up to 1.55B parameters. Our surrogates, via calibrated predictions and reliable uncertainty estimates, faithfully model the heteroscedastic noise inherent in the energy and latency measurements. To estimate perplexity, we employ weight-sharing techniques from Neural Architecture Search (NAS), inheriting pretrained weights from the largest GPT-2 model. Finally, we demonstrate the utility of HW-GPT-Bench by simulating optimization trajectories of various multi-objective optimization algorithms in just a few seconds.", "keywords": "Language Modelling;Transformers;Efficiency;Hardware-Awareness;Neural Architecture Search", "primary_area": "", "supplementary_material": "/attachment/f414d32055ce478d1099abac3e9bb51b34532815.pdf", "author": "Rhea Sanjay Sukthanker;Arber Zela;Benedikt Staffler;Aaron Klein;Lennart Purucker;J\u00f6rg K.H. Franke;Frank Hutter", "authorids": "~Rhea_Sanjay_Sukthanker3;~Arber_Zela1;~Benedikt_Staffler1;~Aaron_Klein1;~Lennart_Purucker1;~J\u00f6rg_K.H._Franke1;~Frank_Hutter1", "gender": "F;M;M;M;M;M;M", "homepage": "https://rheasukthanker.github.io/;https://ml.informatik.uni-freiburg.de/people/zela/index.html;https://www.bosch-ai.com/;https://aaronkl.github.io/;http://ml.informatik.uni-freiburg.de/~hutter/;;https://ml.informatik.uni-freiburg.de/profile/purucker/", "dblp": "277/5077;;223/4871;178/3281;89/5383;251/8540;339/0547", "google_scholar": "OsamqmMAAAAJ;hD_6YioAAAAJ;;usl__skAAAAJ;https://scholar.google.de/citations?user=YUrxwrkAAAAJ;https://scholar.google.de/citations?user=4tDpiPgAAAAJ;x_HyTt0AAAAJ", "orcid": ";;;;0000-0002-2037-3694;0000-0002-4390-4582;0009-0001-1181-0549", "linkedin": "rhea-sukthanker-006502116/;https://de.linkedin.com/in/arber-zela-ba85a2145;;;frank-hutter-9190b24b/;;lennart-purucker/", "or_profile": "~Rhea_Sanjay_Sukthanker3;~Arber_Zela1;~Benedikt_Staffler1;~Aaron_Klein1;~Frank_Hutter1;~J\u00f6rg_Franke1;~Lennart_Oswald_Purucker1", "aff": "University of Freiburg, Albert-Ludwigs-Universit\u00e4t Freiburg;University of Freiburg;Robert Bosch GmbH, Bosch;Amazon Berlin;Albert-Ludwigs-Universit\u00e4t Freiburg;Universit\u00e4t Freiburg;University of Freiburg, Albert-Ludwigs-Universit\u00e4t Freiburg", "aff_domain": "cs.uni-freiburg.de;uni-freiburg.de;de.bosch.com;amazon.com;uni-freiburg.de;uni-freiburg.de;cs.uni-freiburg.de", "position": "PhD student;PhD student;Researcher;Scientist;Full Professor;PhD student;PhD student", "bibtex": "@inproceedings{\nsukthanker2024hwgptbench,\ntitle={{HW}-{GPT}-Bench: Hardware-Aware Architecture Benchmark for Language Models},\nauthor={Rhea Sanjay Sukthanker and Arber Zela and Benedikt Staffler and Aaron Klein and Lennart Purucker and J{\\\"o}rg K.H. Franke and Frank Hutter},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=urJyyMKs7E}\n}", "github": "", "reviewers": "oM4X;6da8;r3pB;74Qi", "pdf_size": 8410594, "rating": "6;6;8;9", "confidence": "4;4;4;4", "wc_summary_and_contributions": "86;65;76;48", "wc_strengths": "17;69;8;30", "wc_improvement": "49;6;44;92", "wc_limitations": "18;1;19;1", "wc_correctness": "38;12;3;8", "wc_clarity": "33;4;8;1", "wc_relation_to_prior_work": "32;10;1;1", "wc_documentation": "31;1;11;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "305;169;171;183", "wc_reply_reviewers": "12;20;32;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 7.25, 1.299038105676658 ], "confidence_avg": [ 4.0, 0.0 ], "wc_summary_and_contributions_avg": [ 68.75, 14.095655359010449 ], "wc_strengths_avg": [ 31.0, 23.29162939770423 ], "wc_improvement_avg": [ 47.75, 30.48257699079919 ], "wc_limitations_avg": [ 9.75, 8.757139944068497 ], "wc_correctness_avg": [ 15.25, 13.516193990913271 ], "wc_clarity_avg": [ 11.5, 12.658988901172163 ], "wc_relation_to_prior_work_avg": [ 11.0, 12.668859459319927 ], "wc_documentation_avg": [ 11.0, 12.24744871391589 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 207.0, 56.83308895353129 ], "wc_reply_reviewers_avg": [ 16.0, 11.661903789690601 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12449938893963579965&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "cs.uni-freiburg.de;uni-freiburg.de;de.bosch.com;amazon.com;uni-freiburg.de;uni-freiburg.de;cs.uni-freiburg.de", "author_num": 7, "aff_unique_index": "0;0;1;2;3;0;0", "aff_unique_norm": "University of Freiburg;Robert Bosch GmbH;Amazon;Albert-Ludwigs-Universit\u00e4t Freiburg", "aff_unique_dep": ";;Amazon;", "aff_unique_url": "https://www.uni-freiburg.de;https://www.bosch.com;https://www.amazon.de;https://www.uni-freiburg.de", "aff_unique_abbr": "UoF;Bosch;Amazon;Albert-Ludwigs-Universit\u00e4t", "aff_campus_unique_index": "0;2;0;0", "aff_campus_unique": "Freiburg;;Berlin", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "Germany" }, { "id": "urgpcr7kFR", "title": "Alberta Wells Dataset: Pinpointing Oil and Gas Wells from Satellite Imagery", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "Millions of abandoned oil and gas wells are scattered across the world, leaching methane into the atmosphere and toxic compounds into the groundwater. Many of these locations are unknown, preventing the wells from being plugged and their polluting effects averted. Remote sensing is a relatively unexplored tool for pinpointing abandoned wells at scale. We introduce the first large-scale dataset for this problem, leveraging medium-resolution multi-spectral satellite imagery from Planet Labs. Our curated dataset comprises over 213,000 wells (abandoned, suspended and active) from Alberta, a region with especially high well density, sourced from the Alberta Energy Regulator and verified by domain experts. We evaluate baseline algorithms for well detection and segmentation, showing the promise of computer vision approaches but also significant room for improvement.", "keywords": "Remote Sensing;Satellite Imagery;Climate Change;AI for Good", "primary_area": "", "supplementary_material": "/attachment/366707202c0067c071043b9adad2ced317b3a6a0.zip", "author": "Pratinav Seth;Michelle Lin;BREFO DWAMENA YAW;Jade Boutot;Mary Kang;David Rolnick", "authorids": "~Pratinav_Seth1;~Michelle_Lin1;~BREFO_DWAMENA_YAW1;~Jade_Boutot1;~Mary_Kang1;~David_Rolnick1", "gender": "M;;M;;F;M", "homepage": "https://pratinavseth.github.io/;https://mchll-ln.github.io/;;;https://www.mcgill.ca/civil/mary-kang;http://www.davidrolnick.com/", "dblp": "305/7312;;;;;37/10718", "google_scholar": "https://scholar.google.com/citations?hl=en;ThOrOBoAAAAJ;;;https://scholar.google.ca/citations?hl=en;P_luG3cAAAAJ", "orcid": "0009-0001-4525-4464;;0000-0003-0040-6521;0000-0002-3445-5009;0000-0001-9142-384X;", "linkedin": "pratinav-seth/;lin-michelle/;yaw-brefo-3b2004ba/;;;", "or_profile": "~Pratinav_Seth1;~Michelle_Lin1;~BREFO_DWAMENA_YAW1;~Jade_Boutot1;~Mary_Kang1;~David_Rolnick1", "aff": "Manipal Institute of Technology;Mila - Quebec Artificial Intelligence Institute;Aya data;McGill University, McGill University;McGill University;McGill University", "aff_domain": "manipal.edu;mila.quebec;ayadata.ai;mail.mcgill.ca;mcgill.ca;cs.mcgill.ca", "position": "Undergrad student;Intern;Researcher;PhD student;Assistant Professor;Assistant Professor", "bibtex": "@misc{\nanonymous2024alberta,\ntitle={Alberta Wells Dataset: Pinpointing Oil and Gas Wells from Satellite Imagery},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=urgpcr7kFR}\n}", "github": "", "project": "", "reviewers": "eFmG;Yk8C;qB8s;2Adt", "site": "https://openreview.net/forum?id=urgpcr7kFR", "pdf_size": 1921375, "rating": "4;7;7;7", "confidence": "4;4;5;4", "wc_summary_and_contributions": "28;42;64;59", "wc_strengths": "33;147;32;46", "wc_improvement": "423;271;458;552", "wc_limitations": "1;11;23;28", "wc_correctness": "1;34;24;179", "wc_clarity": "1;9;23;392", "wc_relation_to_prior_work": "1;21;6;1", "wc_documentation": "1;72;11;16", "wc_additional_feedback": "1;1;1;1", "wc_review": "490;608;642;1274", "wc_reply_reviewers": "0;57;141;59", "wc_reply_authors": "1601;2036;927;1520", "reply_reviewers": "0;1;1;1", "reply_authors": "3;4;3;3", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 48.25, 14.254385290148432 ], "wc_strengths_avg": [ 64.5, 47.95049530505394 ], "wc_improvement_avg": [ 426.0, 101.16076314461057 ], "wc_limitations_avg": [ 15.75, 10.520812706250407 ], "wc_correctness_avg": [ 59.5, 70.02321043768274 ], "wc_clarity_avg": [ 106.25, 165.16563655918262 ], "wc_relation_to_prior_work_avg": [ 7.25, 8.1967981553775 ], "wc_documentation_avg": [ 25.0, 27.667670664513846 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 753.5, 305.7593007579655 ], "wc_reply_reviewers_avg": [ 64.25, 50.246268518169586 ], "wc_reply_authors_avg": [ 1521.0, 395.1145403550722 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.25, 0.4330127018922193 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "aff_unique_index": "0;1;2;3;3;3", "aff_unique_norm": "Manipal Institute of Technology;Quebec Artificial Intelligence Institute;Aya;McGill University", "aff_unique_dep": ";Artificial Intelligence;;", "aff_unique_url": "https://mit manipal.edu;https://mila.quebec;;https://www.mcgill.ca", "aff_unique_abbr": "MIT Manipal;Mila;;McGill", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1", "aff_country_unique": "India;Canada;" }, { "title": "JailbreakBench: An Open Robustness Benchmark for Jailbreaking Large Language Models", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97459", "id": "urjPCYZt0I", "proceeding": "", "pdf": "https://openreview.net/pdf?id=urjPCYZt0I", "openreview": "https://openreview.net/forum?id=urjPCYZt0I", "poster": "", "project": "", "author_site": "Patrick Chao, Edoardo Debenedetti, Alexander Robey, Maksym Andriushchenko, Francesco Croce, Vikash Sehwag, Edgar Dobriban, Nicolas Flammarion, George J. Pappas, Florian Tramer, Hamed Hassani, Eric Wong", "tldr": "", "abstract": "Jailbreak attacks cause large language models (LLMs) to generate harmful, unethical, or otherwise objectionable content. Evaluating these attacks presents a number of challenges, which the current collection of benchmarks and evaluation techniques do not adequately address. First, there is no clear standard of practice regarding jailbreaking evaluation. Second, existing works compute costs and success rates in incomparable ways. And third, numerous works are not reproducible, as they withhold adversarial prompts, involve closed-source code, or rely on evolving proprietary APIs. To address these challenges, we introduce JailbreakBench, an open-sourced benchmark with the following components: (1) an evolving repository of state-of-the-art adversarial prompts, which we refer to as *jailbreak artifacts*; (2) a jailbreaking dataset comprising 100 behaviors---both original and sourced from prior work---which align with OpenAI's usage policies; (3) a standardized evaluation framework at https://github.com/JailbreakBench/jailbreakbench that includes a clearly defined threat model, system prompts, chat templates, and scoring functions; and (4) a leaderboard at https://jailbreakbench.github.io/ that tracks the performance of attacks and defenses for various LLMs. We have carefully considered the potential ethical implications of releasing this benchmark, and believe that it will be a net positive for the community.", "keywords": "jailbreaks;robustness;alignment;large language models;AI safety", "primary_area": "", "supplementary_material": "/attachment/26f4152b1cc802393bba8b9044c3f2df12afc009.pdf", "author": "Patrick Chao;Edoardo Debenedetti;Alexander Robey;Maksym Andriushchenko;Francesco Croce;Vikash Sehwag;Edgar Dobriban;Nicolas Flammarion;George J. Pappas;Florian Tram\u00e8r;Hamed Hassani;Eric Wong", "authorids": "~Patrick_Chao1;~Edoardo_Debenedetti1;~Alexander_Robey1;~Maksym_Andriushchenko1;~Francesco_Croce1;~Vikash_Sehwag1;~Edgar_Dobriban2;~Nicolas_Flammarion1;~George_J._Pappas1;~Florian_Tram\u00e8r1;~Hamed_Hassani2;~Eric_Wong1", "gender": ";M;M;M;M;M;;M;M;M;M;M", "homepage": "https://patrickrchao.github.io/;https://edoardo.science/;https://arobey1.github.io/;https://www.andriushchenko.me/;;https://vsehwag.github.io/;https://statistics.wharton.upenn.edu/profile/dobriban/;;https://www.seas.upenn.edu/~hassani/;http://riceric22.github.io/;http://floriantramer.com;http://www.georgejpappas.org/", "dblp": "222/2677.html;319/7073;242/9113;200/8865;52/4288;187/5613;99/11269;164/7417;73/4984;64/1811-1.html;158/7224;p/GeorgeJPappas", "google_scholar": ";6Urve9wAAAAJ;V5NWZc8AAAAJ;ZNtuJYoAAAAJ;https://scholar.google.de/citations?view_op=list_works;JAkeEG8AAAAJ;aGvH4yMAAAAJ;;;pWnTMRkAAAAJ;https://scholar.google.ch/citations?user=ijH0-a8AAAAJ;https://scholar.google.com.tw/citations?user=Kia-4B0AAAAJ", "orcid": ";0000-0003-3343-9477;;;;;;;;;;0000-0001-9081-0637", "linkedin": ";https://linkedin.com/in/edoardo-debenedetti/;alexrobey/;;;;edgar-dobriban/;;;;;", "or_profile": "~Patrick_Chao1;~Edoardo_Debenedetti1;~Alexander_Robey1;~Maksym_Andriushchenko1;~Francesco_Croce1;~Vikash_Sehwag1;~Edgar_Dobriban2;~Nicolas_Flammarion1;~Hamed_Hassani2;~Eric_Wong1;~Florian_Tramer1;~George_Pappas1", "aff": "The Wharton School, University of Pennsylvania;Google;School of Engineering and Applied Science, University of Pennsylvania;Swiss Federal Institute of Technology Lausanne;EPFL - EPF Lausanne;Sony AI;The Wharton School, University of Pennsylvania;Swiss Federal Institute of Technology Lausanne;University of Pennsylvania;University of Pennsylvania;ETHZ - ETH Zurich;School of Engineering and Applied Science, University of Pennsylvania", "aff_domain": "wharton.upenn.edu;google.com;seas.upenn.edu;epfl.ch;epfl.ch;sony.com;wharton.upenn.edu;epfl.ch;upenn.edu;upenn.edu;ethz.ch;seas.upenn.edu", "position": "PhD student;Intern;PhD student;PhD Student;Postdoc;Researcher;Associate Professor;Assistant Professor;;Assistant Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nchao2024jailbreakbench,\ntitle={JailbreakBench: An Open Robustness Benchmark for Jailbreaking Large Language Models},\nauthor={Patrick Chao and Edoardo Debenedetti and Alexander Robey and Maksym Andriushchenko and Francesco Croce and Vikash Sehwag and Edgar Dobriban and Nicolas Flammarion and George J. Pappas and Florian Tram{\\`e}r and Hamed Hassani and Eric Wong},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=urjPCYZt0I}\n}", "github": "", "reviewers": "Q3j5;gG4f;Acex;9Nc2;PtUM", "pdf_size": 496627, "rating": "5;6;6;6;7", "confidence": "4;4;5;3;5", "wc_summary_and_contributions": "66;74;44;267;44", "wc_strengths": "2;30;6;182;7", "wc_improvement": "2;23;6;299;10", "wc_limitations": "1;32;1;382;7", "wc_correctness": "1;1;1;12;54", "wc_clarity": "1;1;1;10;6", "wc_relation_to_prior_work": "1;1;1;65;73", "wc_documentation": "1;1;1;72;7", "wc_additional_feedback": "1;1;1;1;1", "wc_review": "76;164;62;1290;209", "wc_reply_reviewers": "22;14;15;46;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "2;1;1;3;1", "rating_avg": [ 6.0, 0.6324555320336759 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "wc_summary_and_contributions_avg": [ 99.0, 84.83867042805421 ], "wc_strengths_avg": [ 45.4, 69.00318833213434 ], "wc_improvement_avg": [ 68.0, 115.71516754514077 ], "wc_limitations_avg": [ 84.6, 149.13966608518336 ], "wc_correctness_avg": [ 13.8, 20.546532554180523 ], "wc_clarity_avg": [ 3.8, 3.655133376499413 ], "wc_relation_to_prior_work_avg": [ 28.2, 33.408980828513755 ], "wc_documentation_avg": [ 16.4, 27.89695323866031 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 360.2, 468.0975966612091 ], "wc_reply_reviewers_avg": [ 19.4, 15.094369811290566 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.6, 0.8 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": 0.42257712736425823, "gs_citation": 149, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15968546840637362529&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "wharton.upenn.edu;google.com;seas.upenn.edu;epfl.ch;epfl.ch;sony.com;wharton.upenn.edu;epfl.ch;upenn.edu;upenn.edu;ethz.ch;seas.upenn.edu", "author_num": 12, "aff_unique_index": "0;1;0;2;3;4;0;2;0;0;5;0", "aff_unique_norm": "University of Pennsylvania;Google;Swiss Federal Institute of Technology Lausanne;EPFL;Sony;ETH Zurich", "aff_unique_dep": "The Wharton School;Google;;;Sony AI;", "aff_unique_url": "https://www.wharton.upenn.edu;https://www.google.com;https://www.epfl.ch;https://www.epfl.ch;https://www.sony.com;https://www.ethz.ch", "aff_unique_abbr": "UPenn Wharton;Google;EPFL;EPFL;Sony AI;ETHZ", "aff_campus_unique_index": "1;2;2;2", "aff_campus_unique": ";Mountain View;Lausanne", "aff_country_unique_index": "0;0;0;1;1;2;0;1;0;0;1;0", "aff_country_unique": "United States;Switzerland;Japan" }, { "title": "BendVLM: Test-Time Debiasing of Vision-Language Embeddings", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93242", "id": "utMOhsgXzB", "proceeding": "", "pdf": "https://openreview.net/pdf?id=utMOhsgXzB", "openreview": "https://openreview.net/forum?id=utMOhsgXzB", "poster": "", "project": "", "author_site": "Walter Gerych, Haoran Zhang, Kimia Hamidieh, Eileen Pan, Maanas K. Sharma, Tom Hartvigsen, Marzyeh Ghassemi", "tldr": "", "abstract": "Vision-language (VL) embedding models have been shown to encode biases present in their training data, such as societal biases that prescribe negative characteristics to members of various racial and gender identities. Due to their wide-spread adoption for various tasks ranging from few-shot classification to text-guided image generation, debiasing VL models is crucial. Debiasing approaches that fine-tune the VL model often suffer from catastrophic forgetting. On the other hand, fine-tuning-free methods typically utilize a ``one-size-fits-all\" approach that assumes that correlation with the spurious attribute can be explained using a single linear direction across all possible inputs. In this work, we propose a nonlinear, fine-tuning-free approach for VL embedding model debiasing that tailors the debiasing operation to each unique input. This allows for a more flexible debiasing approach. Additionally, we do not require knowledge of the set of inputs a priori to inference time, making our method more appropriate for online tasks such as retrieval and text guided image generation.", "keywords": "vision language models;embedding models;multimodal models;debias;fairness", "primary_area": "fairness", "supplementary_material": "", "author": "Walter Gerych;Haoran Zhang;Kimia Hamidieh;Eileen Pan;Maanas Sharma;Thomas Hartvigsen;Marzyeh Ghassemi", "authorids": "~Walter_Gerych2;~Haoran_Zhang4;~Kimia_Hamidieh1;~Eileen_Pan1;~Maanas_Sharma1;~Thomas_Hartvigsen1;~Marzyeh_Ghassemi2", "gender": "M;M;F;F;;M;F", "homepage": "https://waltergerych.github.io/;https://haoran.ca;;;;https://www.tomhartvigsen.com;https://www.healthyml.org/", "dblp": "237/9060;95/4452-3.html;;275/3557;;211/5752;145/6563", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.ca/citations?user=6aWRAPkAAAAJ;;SIHPQ28AAAAJ;https://scholar.google.co.in/citations?user=KmHKvzsAAAAJ;rIjeeRsAAAAJ;", "orcid": ";;;;;;", "linkedin": "walter-gerych-84165112b/;;kimia-hamidieh-956519212/;;;;", "or_profile": "~Walter_Gerych2;~Haoran_Zhang4;~Kimia_Hamidieh1;~Eileen_Pan1;~Maanas_Sharma1;~Thomas_Hartvigsen1;~Marzyeh_Ghassemi2", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;University of Virginia, Charlottesville;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;mit.edu;mit.edu;mit.edu;virginia.edu;mit.edu", "position": "Postdoc;PhD student;PhD student;PhD student;Undergrad student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\ngerych2024bendvlm,\ntitle={Bend{VLM}: Test-Time Debiasing of Vision-Language Embeddings},\nauthor={Walter Gerych and Haoran Zhang and Kimia Hamidieh and Eileen Pan and Maanas Sharma and Thomas Hartvigsen and Marzyeh Ghassemi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=utMOhsgXzB}\n}", "github": "", "reviewers": "tPdX;kcNf;GYMR;xaVq", "pdf_size": 751969, "rating": "5;6;6;8", "confidence": "4;3;3;3", "soundness": "3;3;2;3", "novelty": "2;3;2;4", "presentation": "2;3;3;3", "wc_summary": "94;58;54;78", "wc_strengths": "60;59;39;51", "wc_weaknesses": "92;117;83;157", "wc_questions": "2;4;3;21", "wc_limitations": "2;4;35;1", "wc_review": "250;242;214;308", "wc_reply_reviewers": "7;0;12;22", "wc_reply_authors": "15;0;30;20", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;2;2", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 71.0, 16.09347693943108 ], "wc_strengths_avg": [ 52.25, 8.407585860400118 ], "wc_weaknesses_avg": [ 112.25, 28.682529525828087 ], "wc_questions_avg": [ 7.5, 7.826237921249264 ], "wc_limitations_avg": [ 10.5, 14.186260959111108 ], "wc_review_avg": [ 253.5, 34.18698582794336 ], "wc_reply_reviewers_avg": [ 10.25, 8.011710179481033 ], "wc_reply_authors_avg": [ 16.25, 10.825317547305483 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.6622661785325219, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2732882802614118325&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 5, "email": "mit.edu;mit.edu;mit.edu;mit.edu;mit.edu;virginia.edu;mit.edu", "author_num": 7, "aff_unique_index": "0;0;0;0;0;1;0", "aff_unique_norm": "Massachusetts Institute of Technology;University of Virginia", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.virginia.edu", "aff_unique_abbr": "MIT;UVA", "aff_campus_unique_index": "1", "aff_campus_unique": ";Charlottesville", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Adaptive Labeling for Efficient Out-of-distribution Model Evaluation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93241", "id": "uuQQwrjMzb", "proceeding": "", "pdf": "https://openreview.net/pdf?id=uuQQwrjMzb", "openreview": "https://openreview.net/forum?id=uuQQwrjMzb", "poster": "/media/PosterPDFs/NeurIPS%202024/93241.png?t=1733725079.205619", "project": "", "author_site": "Daksh Mittal, Yuanzhe Ma, Shalmali Joshi, Hongseok Namkoong", "tldr": "", "abstract": "Datasets often suffer severe selection bias; clinical labels are only available on patients for whom doctors ordered medical exams. To assess model performance outside the support of available data, we present a computational framework for adaptive labeling, providing cost-efficient model evaluations under severe distribution shifts. We formulate the problem as a Markov Decision Process over states defined by posterior beliefs on model performance. Each batch of new labels incurs a \u201cstate transition\u201d to sharper beliefs, and we choose batches to minimize uncertainty on model performance at the end of the label collection process. Instead of relying on high-variance REINFORCE policy gradient estimators that do not scale, our adaptive labeling policy is optimized using path-wise policy gradients computed by auto-differentiating through simulated roll-outs. Our framework is agnostic to different uncertainty quantification approaches and highlights the virtue of planning in adaptive labeling. On synthetic and real datasets, we empirically demonstrate even a one-step lookahead policy substantially outperforms active learning-inspired heuristics.", "keywords": "Model Evaluation;Uncertainty Quantification;Markov Decision Process;Policy Gradient;Auto-differentiation", "primary_area": "evaluation", "supplementary_material": "", "author": "Daksh Mittal;Yuanzhe Ma;Shalmali Joshi;Hongseok Namkoong", "authorids": "~Daksh_Mittal1;~Yuanzhe_Ma1;~Shalmali_Joshi1;~Hongseok_Namkoong2", "gender": "M;M;F;M", "homepage": "https://sites.google.com/view/dakshmittal/home;https://yuanzhe-ma.com/;https://reaim-lab.github.io/shalmalijoshi.html;https://hsnamkoong.github.io", "dblp": "339/6244;304/2466;173/2910;191/6680", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;4d8UV8sAAAAJ;x5wW5WIAAAAJ;dyXX1EgAAAAJ", "orcid": ";;;", "linkedin": ";yuanzhema;;", "or_profile": "~Daksh_Mittal1;~Yuanzhe_Ma1;~Shalmali_Joshi1;~Hongseok_Namkoong2", "aff": "Columbia University;Columbia University;Columbia University;Columbia University", "aff_domain": "columbia.edu;columbia.edu;columbia.edu;columbia.edu", "position": "PhD student;PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nmittal2024adaptive,\ntitle={Adaptive Labeling for Efficient Out-of-distribution Model Evaluation},\nauthor={Daksh Mittal and Yuanzhe Ma and Shalmali Joshi and Hongseok Namkoong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=uuQQwrjMzb}\n}", "github": "", "reviewers": "GtV8;PJGF;4yoX;CyPE", "pdf_size": 671395, "rating": "5;6;6;6", "confidence": "3;2;3;2", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "1;3;2;2", "wc_summary": "139;73;58;67", "wc_strengths": "15;71;69;50", "wc_weaknesses": "8;120;32;130", "wc_questions": "201;49;12;20", "wc_limitations": "5;2;6;7", "wc_review": "368;315;177;274", "wc_reply_reviewers": "37;18;19;10", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 2.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 84.25, 32.057565409743766 ], "wc_strengths_avg": [ 51.25, 22.47637648732553 ], "wc_weaknesses_avg": [ 72.5, 53.298686662993866 ], "wc_questions_avg": [ 70.5, 76.59144860883622 ], "wc_limitations_avg": [ 5.0, 1.8708286933869707 ], "wc_review_avg": [ 283.5, 69.93747207327414 ], "wc_reply_reviewers_avg": [ 21.0, 9.874208829065749 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:sPDo_ctF0D0J:scholar.google.com/&scioq=Adaptive+Labeling+for+Efficient+Out-of-distribution+Model+Evaluation&hl=en&as_sdt=0,44", "gs_version_total": 0, "email": "columbia.edu;columbia.edu;columbia.edu;columbia.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Columbia University", "aff_unique_dep": "", "aff_unique_url": "https://www.columbia.edu", "aff_unique_abbr": "Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Exploring Jacobian Inexactness in Second-Order Methods for Variational Inequalities: Lower Bounds, Optimal Algorithms and Quasi-Newton Approximations", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93240", "id": "uvFDaeFR9X", "proceeding": "", "pdf": "https://openreview.net/pdf?id=uvFDaeFR9X", "openreview": "https://openreview.net/forum?id=uvFDaeFR9X", "poster": "", "project": "", "author_site": "Artem Agafonov, Petr Ostroukhov, Roman Mozhaev, Konstantin Yakovlev, Eduard Gorbunov, Martin Takac, Alexander Gasnikov, Dmitry Kamzolov", "tldr": "", "abstract": "Variational inequalities represent a broad class of problems, including minimization and min-max problems, commonly found in machine learning. Existing second-order and high-order methods for variational inequalities require precise computation of derivatives, often resulting in prohibitively high iteration costs. In this work, we study the impact of Jacobian inaccuracy on second-order methods. For the smooth and monotone case, we establish a lower bound with explicit dependence on the level of Jacobian inaccuracy and propose an optimal algorithm for this key setting. When derivatives are exact, our method converges at the same rate as exact optimal second-order methods. To reduce the cost of solving the auxiliary problem, which arises in all high-order methods with global convergence, we introduce several Quasi-Newton approximations. Our method with Quasi-Newton updates achieves a global sublinear convergence rate. We extend our approach with a tensor generalization for inexact high-order derivatives and support the theory with experiments.", "keywords": "Variational Inequalities;Quasi-Newton Methods;Min-Max Problems;Lower Bounds;Optimal Methods", "primary_area": "optimization", "supplementary_material": "/attachment/da4300a886faee222fafaa29b26ce71ccebc4832.zip", "author": "Artem Agafonov;Petr Ostroukhov;Roman Mozhaev;Konstantin Yakovlev;Eduard Gorbunov;Martin Tak\u00e1\u010d;Alexander Gasnikov;Dmitry Kamzolov", "authorids": "~Artem_Agafonov1;~Petr_Ostroukhov1;~Roman_Mozhaev1;~Konstantin_Yakovlev2;~Eduard_Gorbunov1;~Martin_Tak\u00e1\u010d1;~Alexander_Gasnikov1;~Dmitry_Kamzolov1", "gender": "M;M;M;M;M;M;M;M", "homepage": "https://scholar.google.com/citations?user=cr5pJdAAAAAJ&hl=en;;https://t.me/mozhaevr;;https://eduardgorbunov.github.io;;https://arxiv.org/search/?query=Gasnikov&searchtype=all&source=header;http://mtakac.com", "dblp": ";258/3462;;;215/5512.html;279/1643;153/1930;42/3759-1.html", "google_scholar": "cr5pJdAAAAAJ;-exQ4_wAAAAJ;;https://scholar.google.com/citations?hl=ru;https://scholar.google.ru/citations?user=85j2RqQAAAAJ;https://scholar.google.hr/citations?user=CAq74XAAAAAJ;AmeE8qkAAAAJ;qKQD-2cAAAAJ", "orcid": ";;;0009-0005-9397-6081;;;;0000-0001-7455-2025", "linkedin": ";petr-ostroukhov-7735a8198/;;;;;;martintakac/", "or_profile": "~Artem_Agafonov1;~Petr_Ostroukhov1;~Roman_Mozhaev1;~Konstantin_Yakovlev2;~Eduard_Gorbunov1;~Dmitry_Kamzolov1;~Alexander_Vladimirovich_Gasnikov1;~Martin_Takac3", "aff": "Moscow Institute of Physics and Technology;Institute for Information Transmission Problems;Moscow Institute of Physics and Technology;Moscow Institute of Physics and Technology;Mohamed bin Zayed University of Artificial Intelligence;Mohamed bin Zayed University of Artificial Intelligence;Moscow Institute of Physics and Technology;Mohamed bin Zayed University of Artificial Intelligence", "aff_domain": "mipt.edu;iitp.ru;phystech.edu;phystech.edu;mbzuai.ac.ae;mbzuai.ac.ae;mipt.ru;mbzuai.ac.ae", "position": "PhD student;Intern;Undergrad student;MS student;Postdoc;Postdoc;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nagafonov2024exploring,\ntitle={Exploring Jacobian Inexactness in Second-Order Methods for Variational Inequalities: Lower Bounds, Optimal Algorithms and Quasi-Newton Approximations},\nauthor={Artem Agafonov and Petr Ostroukhov and Roman Mozhaev and Konstantin Yakovlev and Eduard Gorbunov and Martin Tak{\\'a}{\\v{c}} and Alexander Gasnikov and Dmitry Kamzolov},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=uvFDaeFR9X}\n}", "github": "", "reviewers": "jz9w;7Fy2;Vnro", "pdf_size": 595005, "rating": "6;6;8", "confidence": "4;3;4", "soundness": "3;3;4", "novelty": "3;3;3", "presentation": "3;3;4", "wc_summary": "67;30;89", "wc_strengths": "59;29;151", "wc_weaknesses": "113;17;67", "wc_questions": "33;66;118", "wc_limitations": "1;8;18", "wc_review": "273;150;443", "wc_reply_reviewers": "12;0;43", "wc_reply_authors": "18;0;22", "reply_reviewers": "1;0;1", "reply_authors": "2;1;2", "rating_avg": [ 6.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 62.0, 24.34474618201362 ], "wc_strengths_avg": [ 79.66666666666667, 51.90589775952461 ], "wc_weaknesses_avg": [ 65.66666666666667, 39.20317447463775 ], "wc_questions_avg": [ 72.33333333333333, 34.988887124660344 ], "wc_limitations_avg": [ 9.0, 6.97614984548545 ], "wc_review_avg": [ 288.6666666666667, 120.12863475828271 ], "wc_reply_reviewers_avg": [ 18.333333333333332, 18.116904322268255 ], "wc_reply_authors_avg": [ 13.333333333333334, 9.568466729604882 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.49999999999999983, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:2nCPLr4zns0J:scholar.google.com/&scioq=Exploring+Jacobian+Inexactness+in+Second-Order+Methods+for+Variational+Inequalities:+Lower+Bounds,+Optimal+Algorithms+and+Quasi-Newton+Approximations&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "mipt.edu;iitp.ru;phystech.edu;phystech.edu;mbzuai.ac.ae;mbzuai.ac.ae;mipt.ru;mbzuai.ac.ae", "author_num": 8, "aff_unique_index": "0;1;0;0;2;2;0;2", "aff_unique_norm": "Moscow Institute of Physics and Technology;Institute for Information Transmission Problems;Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": ";;", "aff_unique_url": "https://www.mipt.ru/en;http://www.iitp.ru;https://mbzuai.ac.ae", "aff_unique_abbr": "MIPT;;MBZUAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;1;0;1", "aff_country_unique": "Russian Federation;United Arab Emirates" }, { "id": "uvvVjWP1aj", "title": "AI Sandbagging: Language Models can Strategically Underperform on Evaluations", "track": "main", "status": "Reject", "tldr": "", "abstract": "Trustworthy capability evaluations are crucial for ensuring the safety of AI systems, and are becoming a key component of AI regulation. However, the developers of an AI system, or the AI system itself, may have incentives for evaluations to understate the AI's actual capability. These conflicting interests lead to the problem of *sandbagging* \u2013 which we define as *strategic underperformance on an evaluation*. In this paper we assess sandbagging capabilities in contemporary language models (LMs). We prompt frontier LMs, like GPT-4 and Claude 3 Opus, to selectively underperform on dangerous capability evaluations, while maintaining performance on general (harmless) capability evaluations. Moreover, we find that models can be fine-tuned, on a synthetic dataset, to hide specific capabilities unless given a password. This behaviour generalizes to high-quality, held-out benchmarks such as WMDP. In addition, we show that both frontier and smaller models can be prompted or password-locked to target specific scores on a capability evaluation. We have mediocre success in password-locking a model to mimic the answers a weaker model would give. Overall, our results suggest that capability evaluations are vulnerable to sandbagging. This vulnerability decreases the trustworthiness of evaluations, and thereby undermines important safety decisions regarding the development and deployment of advanced AI systems.\n\nWe publish our code and results at https://anonymous.4open.science/r/Sandbagging-8305/README.md", "keywords": "Alignment;AI safety;sandbagging;AI evaluations;AI governance;NLP;LLM", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/a22eaea17f84fe9acc3c1c514e2f357d934a586e.zip", "author": "Teun van der Weij;Felix Hofst\u00e4tter;Oliver Jaffe;Samuel F. Brown;Francis Rhys Ward", "authorids": "~Teun_van_der_Weij2;~Felix_Hofst\u00e4tter1;~Oliver_Jaffe2;~Samuel_F._Brown1;~Francis_Rhys_Ward1", "gender": "M;M;M;M;M", "homepage": "https://www.teunvanderweij.com;;https://github.com/ojaffe;;https://francisrhysward.wordpress.com/", "dblp": "351/0747;;;;273/0874", "google_scholar": "-fMmbSYAAAAJ;zRIuwQ8AAAAJ;;;", "orcid": ";;;;", "linkedin": "teun-van-der-weij/;felixhofstaetter/;;sam-brown-likes-making-things/;", "or_profile": "~Teun_van_der_Weij2;~Felix_Hofst\u00e4tter1;~Oliver_Jaffe2;~Samuel_F._Brown1;~Francis_Rhys_Ward1", "aff": "Utrecht University (ICS), Utrecht University;Independent;OpenAI;Independent;Imperial College London", "aff_domain": "cs.uu.nl;gmail.com;openai.com;sambrown.eu;ic.ac.uk", "position": "MS student;Researcher;Researcher (Contractor);Researcher;PhD student", "bibtex": "@misc{\nanonymous2024ai,\ntitle={{AI} Sandbagging: Language Models can Strategically Underperform on Evaluations},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=uvvVjWP1aj}\n}", "github": "", "project": "", "reviewers": "k4pA;ZaZq;TYgF;qZwJ", "site": "https://openreview.net/forum?id=uvvVjWP1aj", "pdf_size": 1323396, "rating": "4;5;5;8", "confidence": "4;4;3;3", "soundness": "2;2;2;4", "novelty": "2;2;2;4", "presentation": "3;3;4;4", "wc_summary": "94;37;148;73", "wc_strengths": "37;42;52;162", "wc_weaknesses": "341;77;518;15", "wc_questions": "44;39;53;34", "wc_limitations": "1;52;63;10", "wc_review": "517;247;834;294", "wc_reply_reviewers": "9;37;374;14", "wc_reply_authors": "73;88;260;4", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 88.0, 40.193283020922784 ], "wc_strengths_avg": [ 73.25, 51.52365961381237 ], "wc_weaknesses_avg": [ 237.75, 202.89082655457835 ], "wc_questions_avg": [ 42.5, 7.0178344238090995 ], "wc_limitations_avg": [ 31.5, 26.48112535373072 ], "wc_review_avg": [ 473.0, 232.04202205635082 ], "wc_reply_reviewers_avg": [ 108.5, 153.64976407401346 ], "wc_reply_authors_avg": [ 106.25, 94.25066312764065 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.6666666666666667, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5315007714902151108&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;2;1;3", "aff_unique_norm": "Utrecht University;Independent;OpenAI;Imperial College London", "aff_unique_dep": "Institute of Computing and Software Systems (ICS);;;", "aff_unique_url": "https://www.uu.nl;;https://openai.com;https://www.imperial.ac.uk", "aff_unique_abbr": "UU;;OpenAI;ICL", "aff_campus_unique_index": "0", "aff_campus_unique": "Utrecht;", "aff_country_unique_index": "0;2;3", "aff_country_unique": "Netherlands;;United States;United Kingdom" }, { "title": "Diversity-Driven Synthesis: Enhancing Dataset Distillation through Directed Weight Adjustment", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93239", "id": "uwSaDHLlYc", "proceeding": "", "pdf": "https://openreview.net/pdf?id=uwSaDHLlYc", "openreview": "https://openreview.net/forum?id=uwSaDHLlYc", "poster": "/media/PosterPDFs/NeurIPS%202024/93239.png?t=1732532613.4853866", "project": "", "author_site": "JIAWEI DU, xin zhang, Juncheng Hu, Wenxin Huang, Joey Tianyi Zhou", "tldr": "", "abstract": "The sharp increase in data-related expenses has motivated research into condensing datasets while retaining the most informative features. Dataset distillation has thus recently come to the fore. This paradigm generates synthetic datasets that are representative enough to replace the original dataset in training a neural network. To avoid redundancy in these synthetic datasets, it is crucial that each element contains unique features and remains diverse from others during the synthesis stage. In this paper, we provide a thorough theoretical and empirical analysis of diversity within synthesized datasets. We argue that enhancing diversity can improve the parallelizable yet isolated synthesizing approach. Specifically, we introduce a novel method that employs dynamic and directed weight adjustment techniques to modulate the synthesis process, thereby maximizing the representativeness and diversity of each synthetic instance. Our method ensures that each batch of synthetic data mirrors the characteristics of a large, varying subset of the original dataset. Extensive experiments across multiple datasets, including CIFAR, Tiny-ImageNet, and ImageNet-1K, demonstrate the superior performance of our method, highlighting its effectiveness in producing diverse and representative synthetic datasets with minimal computational expense. Our code is available at https://github.com/AngusDujw/Diversity-Driven-Synthesis.", "keywords": "Dataset Distillation;Synthetic Data;Diversity;Generatlization", "primary_area": "optimization", "supplementary_material": "", "author": "Jiawei Du;Xin Zhang;Juncheng Hu;Wenxin Huang;Joey Tianyi Zhou", "authorids": "~Jiawei_Du1;~Xin_Zhang29;~Juncheng_Hu1;~Wenxin_Huang1;~Joey_Tianyi_Zhou1", "gender": "M;F;M;F;M", "homepage": ";https://zhangxin-xd.github.io/;https://jch.ai;;https://joeyzhouty.github.io/", "dblp": ";76/1584-92;;126/5563.html;123/5110", "google_scholar": "WrJKEzEAAAAJ;https://scholar.google.com/citations?hl=zh-CN;F8_T6XMAAAAJ;TY2PC4CgIwkC;https://scholar.google.com.sg/citations?user=cYNqDokAAAAJ", "orcid": ";;;;0000-0002-4675-7055", "linkedin": ";;;;", "or_profile": "~Jiawei_Du1;~Xin_Zhang29;~Juncheng_Hu1;~Wenxin_Huang1;~Joey_Tianyi_Zhou1", "aff": "A*STAR;Xidian University;National University of Singapore;Hubei University;A*STAR Centre for Frontier AI Research", "aff_domain": "astar.edu.sg;xidian.edu;nus.edu.sg;hubu.edu.cn;cfar.a-star.edu.sg", "position": "Researcher;PhD student;MS student;Lecturer;Principal Researcher", "bibtex": "@inproceedings{\ndu2024diversitydriven,\ntitle={Diversity-Driven Synthesis: Enhancing Dataset Distillation through Directed Weight Adjustment},\nauthor={Jiawei Du and Xin Zhang and Juncheng Hu and Wenxin Huang and Joey Tianyi Zhou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=uwSaDHLlYc}\n}", "github": "", "reviewers": "LurV;e5no;NmcN;Uwti", "pdf_size": 3347782, "rating": "6;6;6;6", "confidence": "4;4;4;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;4;3;3", "wc_summary": "43;78;82;123", "wc_strengths": "137;67;109;128", "wc_weaknesses": "70;283;176;158", "wc_questions": "87;18;93;116", "wc_limitations": "9;5;5;10", "wc_review": "346;451;465;535", "wc_reply_reviewers": "12;47;0;34", "wc_reply_authors": "14;328;34;22", "reply_reviewers": "1;2;0;1", "reply_authors": "2;4;2;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 81.5, 28.359301824974466 ], "wc_strengths_avg": [ 110.25, 26.93858756505248 ], "wc_weaknesses_avg": [ 171.75, 75.72441812255806 ], "wc_questions_avg": [ 78.5, 36.568429006453094 ], "wc_limitations_avg": [ 7.25, 2.277608394786075 ], "wc_review_avg": [ 449.25, 67.57357101707738 ], "wc_reply_reviewers_avg": [ 23.25, 18.34904629674251 ], "wc_reply_authors_avg": [ 99.5, 132.11642592804273 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11749949518733637899&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "astar.edu.sg;xidian.edu;nus.edu.sg;hubu.edu.cn;cfar.a-star.edu.sg", "author_num": 5, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "Agency for Science, Technology and Research;Xidian University;National University of Singapore;Hubei University;A*STAR", "aff_unique_dep": ";;;;Centre for Frontier AI Research", "aff_unique_url": "https://www.a-star.edu.sg;http://www.xidian.edu.cn/;https://www.nus.edu.sg;http://www.hubu.edu.cn/;https://www.a-star.edu.sg", "aff_unique_abbr": "A*STAR;Xidian;NUS;HUBU;A*STAR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1;0", "aff_country_unique": "Singapore;China" }, { "title": "Du-IN: Discrete units-guided mask modeling for decoding speech from Intracranial Neural signals", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93238", "id": "uyLtEFnpQP", "proceeding": "", "pdf": "https://openreview.net/pdf?id=uyLtEFnpQP", "openreview": "https://openreview.net/forum?id=uyLtEFnpQP", "poster": "/media/PosterPDFs/NeurIPS%202024/93238.png?t=1729519520.845317", "project": "", "author_site": "Hui Zheng, Haiteng Wang, Weibang Jiang, Zhongtao Chen, Li He, Peiyang Lin, Penghu Wei, Guoguang Zhao, Yunzhe Liu", "tldr": "", "abstract": "Invasive brain-computer interfaces with Electrocorticography (ECoG) have shown promise for high-performance speech decoding in medical applications, but less damaging methods like intracranial stereo-electroencephalography (sEEG) remain underexplored. With rapid advances in representation learning, leveraging abundant recordings to enhance speech decoding is increasingly attractive. However, popular methods often pre-train temporal models based on brain-level tokens, overlooking that brain activities in different regions are highly desynchronized during tasks. Alternatively, they pre-train spatial-temporal models based on channel-level tokens but fail to evaluate them on challenging tasks like speech decoding, which requires intricate processing in specific language-related areas. To address this issue, we collected a well-annotated Chinese word-reading sEEG dataset targeting language-related brain networks from 12 subjects. Using this benchmark, we developed the Du-IN model, which extracts contextual embeddings based on region-level tokens through discrete codex-guided mask modeling. Our model achieves state-of-the-art performance on the 61-word classification task, surpassing all baselines. Model comparisons and ablation studies reveal that our design choices, including (\\romannumeral1) temporal modeling based on region-level tokens by utilizing 1D depthwise convolution to fuse channels in the ventral sensorimotor cortex (vSMC) and superior temporal gyrus (STG) and (\\romannumeral2) self-supervision through discrete codex-guided mask modeling, significantly contribute to this performance. Overall, our approach -- inspired by neuroscience findings and capitalizing on region-level representations from specific brain regions -- is suitable for invasive brain modeling and represents a promising neuro-inspired AI approach in brain-computer interfaces. Code and dataset are available at https://github.com/liulab-repository/Du-IN.", "keywords": "neuroscience;sEEG;speech decoding;self-supervision;neuro-AI", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "", "author": "Hui Zheng;Haiteng Wang;Weibang Jiang;Zhongtao Chen;Li He;Peiyang Lin;Penghu Wei;Guoguang Zhao;Yunzhe Liu", "authorids": "~Hui_Zheng1;~Haiteng_Wang1;~Weibang_Jiang2;~Zhongtao_Chen1;~Li_He8;~Peiyang_Lin1;~Penghu_Wei1;~Guoguang_Zhao1;~Yunzhe_Liu2", "gender": "M;M;M;M;M;M;M;Not Specified;M", "homepage": "https://norbertzheng.github.io;https://scholar.google.com/citations?hl=en&user=W-EvV0MAAAAJ;https://935963004.github.io/;https://github.com/opghjywl;;https://github.com/PeiyangLin;https://gastroenterology.xwhosp.com.cn/Html/News/Articles/1585.html;https://github.com/yunzheliu;https://yjsh.ccmu.edu.cn/dsdw/dsjj/ssyjsds/sdykdxxwyy1/wkxsw06/20230076_1.htm", "dblp": ";;311/0002;;;;;;", "google_scholar": "NWFTgZ8AAAAJ;https://scholar.google.com/citations?hl=en;wevIfuYAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;;;JTvHJzUAAAAJ;65cGhWsAAAAJ", "orcid": "0000-0003-0513-9389;;0000-0003-3759-5100;;;;;;", "linkedin": "hui-zheng-988b38219/;;;;;;;;", "or_profile": "~Hui_Zheng1;~Haiteng_Wang1;~Weibang_Jiang2;~Zhongtao_Chen1;~Li_He8;~Peiyang_Lin1;~Guoguang_Zhao1;~Yunzhe_Liu2;~Penghu_Wei3", "aff": "Westlake University;Beijing Normal University;Shanghai Jiaotong University;;Beijing Normal University;Beijing Normal University;Zhejiang University;;Capital Medical University", "aff_domain": "westlake.edu;bnu.edu.cn;sjtu.edu.cn;;bnu.edu.cn;bnu.edu.cn;zju.edu.cn;;mail.ccmu.edu.cn", "position": "Intern;MS student;PhD student;;Postdoc;MS student;Researcher;;Researcher", "bibtex": "@inproceedings{\nzheng2024duin,\ntitle={Du-{IN}: Discrete units-guided mask modeling for decoding speech from Intracranial Neural signals},\nauthor={Hui Zheng and Haiteng Wang and Weibang Jiang and Zhongtao Chen and Li He and Peiyang Lin and Penghu Wei and Guoguang Zhao and Yunzhe Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=uyLtEFnpQP}\n}", "github": "", "reviewers": "t1mV;eGF6;bbQK;1qa8;gwJp", "pdf_size": 6202591, "rating": "4;4;5;7;8", "confidence": "5;5;4;3;5", "soundness": "2;2;2;4;4", "novelty": "2;2;2;4;3", "presentation": "3;3;3;4;3", "wc_summary": "36;116;252;86;112", "wc_strengths": "23;71;88;108;41", "wc_weaknesses": "416;484;634;45;140", "wc_questions": "64;77;313;61;459", "wc_limitations": "24;44;73;17;1", "wc_review": "563;792;1360;317;753", "wc_reply_reviewers": "266;218;198;0;110", "wc_reply_authors": "708;407;793;0;18", "reply_reviewers": "1;1;2;0;1", "reply_authors": "2;2;3;1;2", "rating_avg": [ 5.6, 1.624807680927192 ], "confidence_avg": [ 4.4, 0.8 ], "soundness_avg": [ 2.8, 0.9797958971132712 ], "novelty_avg": [ 2.6, 0.8 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 120.4, 71.71499145924791 ], "wc_strengths_avg": [ 66.2, 30.81168609472711 ], "wc_weaknesses_avg": [ 343.8, 219.04373992424433 ], "wc_questions_avg": [ 194.8, 162.88695466488406 ], "wc_limitations_avg": [ 31.8, 24.798387044322055 ], "wc_review_avg": [ 757.0, 345.27843836532855 ], "wc_reply_reviewers_avg": [ 158.4, 93.94807076252285 ], "wc_reply_authors_avg": [ 385.2, 332.9236549120534 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.33850160019316505, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9687751260730017284&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "westlake.edu;bnu.edu.cn;sjtu.edu.cn;;bnu.edu.cn;bnu.edu.cn;zju.edu.cn;;mail.ccmu.edu.cn", "author_num": 9, "aff_unique_index": "0;1;2;1;1;3;4", "aff_unique_norm": "Westlake University;Beijing Normal University;Shanghai Jiao Tong University;Zhejiang University;Capital Medical University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.westlake.edu.cn;https://www.bnu.edu.cn;https://www.sjtu.edu.cn;https://www.zju.edu.cn;http://www.cmu.edu.cn", "aff_unique_abbr": "WU;BNU;SJTU;ZJU;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Integrating Deep Metric Learning with Coreset for Active Learning in 3D Segmentation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93237", "id": "uyqjpycMbU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=uyqjpycMbU", "openreview": "https://openreview.net/forum?id=uyqjpycMbU", "poster": "/media/PosterPDFs/NeurIPS%202024/93237.png?t=1733994481.7318964", "project": "", "author_site": "Arvind Vepa, Zukang Yang, Andrew Choi, Jungseock Joo, Fabien Scalzo, Yizhou Sun", "tldr": "", "abstract": "Deep learning has seen remarkable advancements in machine learning, yet it often demands extensive annotated data. Tasks like 3D semantic segmentation impose a substantial annotation burden, especially in domains like medicine, where expert annotations drive up the cost. Active learning (AL) holds great potential to alleviate this annotation burden in 3D medical segmentation. The majority of existing AL methods, however, are not tailored to the medical domain. While weakly-supervised methods have been explored to reduce annotation burden, the fusion of AL with weak supervision remains unexplored, despite its potential to significantly reduce annotation costs. Additionally, there is little focus on slice-based AL for 3D segmentation, which can also significantly reduce costs in comparison to conventional volume-based AL. This paper introduces a novel metric learning method for Coreset to perform slice-based active learning in 3D medical segmentation. By merging contrastive learning with inherent data groupings in medical imaging, we learn a metric that emphasizes the relevant differences in samples for training 3D medical segmentation models. We perform comprehensive evaluations using both weak and full annotations across four datasets (medical and non-medical). Our findings demonstrate that our approach surpasses existing active learning techniques on both weak and full annotations and obtains superior performance with low-annotation budgets which is crucial in medical imaging. Source code for this project is available in the supplementary materials and on GitHub: https://github.com/arvindmvepa/al-seg.", "keywords": "Active Learning;Medical Imaging;Segmentation;Deep Metric Learning", "primary_area": "active_learning", "supplementary_material": "/attachment/1c3be0302ba8ec4e851ab3fc4cf4237b28cb6670.zip", "author": "Arvind Murari Vepa;ZUKANG YANG;Andrew Choi;Jungseock Joo;Fabien Scalzo;Yizhou Sun", "authorids": "~Arvind_Murari_Vepa1;~ZUKANG_YANG2;~Andrew_Choi2;~Jungseock_Joo3;~Fabien_Scalzo1;~Yizhou_Sun1", "gender": "M;M;;M;M;F", "homepage": ";https://github.com/zukangy;https://quantumope.github.io/;https://www.jsjoo.com/;https://web.cs.ucla.edu/~fab/;http://web.cs.ucla.edu/~yzsun/", "dblp": "313/8968;375/0960;;135/4915;41/5162;37/3868", "google_scholar": ";o2eIS-YAAAAJ;chiMYfwAAAAJ;ePNRe-EAAAAJ;9Z1pfWMAAAAJ;https://scholar.google.com.tw/citations?user=TQgOjK0AAAAJ", "orcid": ";0009-0003-5191-2229;;;0000-0001-9755-8104;", "linkedin": "arvind-vepa-4b759323/;zukangyang;;jungseock-joo-058634a8/;fabien-scalzo-62b0893/;", "or_profile": "~Arvind_Murari_Vepa1;~ZUKANG_YANG2;~Andrew_Choi2;~Jungseock_Joo3;~Fabien_Scalzo1;~Yizhou_Sun1", "aff": "University of California, Los Angeles;University of California, Berkeley;;University of California, Los Angeles;University of California, Los Angeles;University of California, Los Angeles", "aff_domain": "ucla.edu;berkeley.edu;;ucla.edu;ucla.edu;ucla.edu", "position": "PhD student;MS student;;Associate Professor;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nvepa2024integrating,\ntitle={Integrating Deep Metric Learning with Coreset for Active Learning in 3D Segmentation},\nauthor={Arvind Murari Vepa and ZUKANG YANG and Andrew Choi and Jungseock Joo and Fabien Scalzo and Yizhou Sun},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=uyqjpycMbU}\n}", "github": "", "reviewers": "aT8e;ff5T;gm1Z;4vB1", "pdf_size": 11536621, "rating": "5;5;6;7", "confidence": "4;4;4;3", "soundness": "3;2;3;3", "novelty": "2;2;3;3", "presentation": "2;3;3;2", "wc_summary": "116;84;122;114", "wc_strengths": "104;54;78;47", "wc_weaknesses": "295;325;227;498", "wc_questions": "130;76;101;220", "wc_limitations": "232;55;13;53", "wc_review": "877;594;541;932", "wc_reply_reviewers": "0;124;214;186", "wc_reply_authors": "56;75;854;77", "reply_reviewers": "0;1;2;3", "reply_authors": "2;3;4;4", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 109.0, 14.730919862656235 ], "wc_strengths_avg": [ 70.75, 22.37604746151563 ], "wc_weaknesses_avg": [ 336.25, 99.9083955431174 ], "wc_questions_avg": [ 131.75, 54.41679428264771 ], "wc_limitations_avg": [ 88.25, 84.66810202195394 ], "wc_review_avg": [ 736.0, 170.65022707280528 ], "wc_reply_reviewers_avg": [ 131.0, 82.34682750416071 ], "wc_reply_authors_avg": [ 265.5, 339.86946023436707 ], "reply_reviewers_avg": [ 1.5, 1.118033988749895 ], "reply_authors_avg": [ 3.25, 0.82915619758885 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10341730023120090274&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "ucla.edu;berkeley.edu;;ucla.edu;ucla.edu;ucla.edu", "author_num": 6, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "University of California, Los Angeles;University of California, Berkeley", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucla.edu;https://www.berkeley.edu", "aff_unique_abbr": "UCLA;UC Berkeley", "aff_campus_unique_index": "0;1;0;0;0", "aff_campus_unique": "Los Angeles;Berkeley", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Learning to Edit Visual Programs with Self-Supervision", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93236", "id": "uzIWqRzjEP", "proceeding": "", "pdf": "https://openreview.net/pdf?id=uzIWqRzjEP", "openreview": "https://openreview.net/forum?id=uzIWqRzjEP", "poster": "", "project": "", "author_site": "R. Kenny Jones, Renhao Zhang, Aditya Ganeshan, Daniel Ritchie", "tldr": "", "abstract": "We design a system that learns how to edit visual programs. Our edit network consumes a complete input program and a visual target. From this input, we task our network with predicting a local edit operation that could be applied to the input program to improve its similarity to the target. In order to apply this scheme for domains that lack program annotations, we develop a self-supervised learning approach that integrates this edit network into a bootstrapped finetuning loop along with a network that predicts entire programs in one-shot. Our joint finetuning scheme, when coupled with an inference procedure that initializes a population from the one-shot model and evolves members of this population with the edit network, helps to infer more accurate visual programs. Over multiple domains, we experimentally compare our method against the alternative of using only the one-shot model, and find that even under equal search-time budgets, our editing-based paradigm provides significant advantages.", "keywords": "visual program induction;program synthesis;visual programs;inverse graphics", "primary_area": "machine_vision", "supplementary_material": "", "author": "R. Kenny Jones;Renhao Zhang;Aditya Ganeshan;Daniel Ritchie", "authorids": "~R._Kenny_Jones1;~Renhao_Zhang1;~Aditya_Ganeshan1;~Daniel_Ritchie1", "gender": "M;M;M;M", "homepage": "https://rkjones4.github.io/;https://renhaoz.github.io/;https://bardofcodes.github.io/;http://dritchie.github.io", "dblp": "274/7070;256/2504;211/7173;17/7188.html", "google_scholar": "NwVbkmQAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.co.in/citations?user=Aw1Sb4gAAAAJ;0RiypNsAAAAJ", "orcid": ";;0000-0001-8615-741X;", "linkedin": ";;aditya-ganeshan-68341bb9/;", "or_profile": "~R._Kenny_Jones1;~Renhao_Zhang1;~Aditya_Ganeshan1;~Daniel_Ritchie1", "aff": "Brown University;Brown University;Adobe Systems;Brown University", "aff_domain": "brown.edu;brown.edu;adobe.com;brown.edu", "position": "PhD student;MS student;Intern;Assistant Professor", "bibtex": "@inproceedings{\njones2024learning,\ntitle={Learning to Edit Visual Programs with Self-Supervision},\nauthor={R. Kenny Jones and Renhao Zhang and Aditya Ganeshan and Daniel Ritchie},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=uzIWqRzjEP}\n}", "github": "", "reviewers": "WhD9;2AbF;reBN", "pdf_size": 1314463, "rating": "5;6;6", "confidence": "3;3;3", "soundness": "3;3;3", "novelty": "2;2;3", "presentation": "2;3;4", "wc_summary": "83;32;33", "wc_strengths": "20;63;47", "wc_weaknesses": "188;84;189", "wc_questions": "24;18;38", "wc_limitations": "9;25;3", "wc_review": "324;222;310", "wc_reply_reviewers": "17;24;0", "wc_reply_authors": "30;0;0", "reply_reviewers": "1;1;0", "reply_authors": "2;1;1", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 49.333333333333336, 23.809428571238094 ], "wc_strengths_avg": [ 43.333333333333336, 17.745108872274887 ], "wc_weaknesses_avg": [ 153.66666666666666, 49.2634640366356 ], "wc_questions_avg": [ 26.666666666666668, 8.379870059984357 ], "wc_limitations_avg": [ 12.333333333333334, 9.285592184789413 ], "wc_review_avg": [ 285.3333333333333, 45.146674542232034 ], "wc_reply_reviewers_avg": [ 13.666666666666666, 10.077477638553983 ], "wc_reply_authors_avg": [ 10.0, 14.142135623730951 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17600737557575937723&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "brown.edu;brown.edu;adobe.com;brown.edu", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Brown University;Adobe", "aff_unique_dep": ";Adobe Systems Incorporated", "aff_unique_url": "https://www.brown.edu;https://www.adobe.com", "aff_unique_abbr": "Brown;Adobe", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Achieving Domain-Independent Certified Robustness via Knowledge Continuity", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93235", "id": "v07KRLYxDX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=v07KRLYxDX", "openreview": "https://openreview.net/forum?id=v07KRLYxDX", "poster": "/media/PosterPDFs/NeurIPS%202024/93235.png?t=1733007663.7814336", "project": "", "author_site": "Alan Sun, Chiyu Ma, Kenneth Ge, Soroush Vosoughi", "tldr": "", "abstract": "We present *knowledge continuity*, a novel definition inspired by Lipschitz continuity which aims to certify the robustness of neural networks across input domains (such as continuous and discrete domains in vision and language, respectively). Most existing approaches that seek to certify robustness, especially Lipschitz continuity, lie within the continuous domain with norm and distribution-dependent guarantees. In contrast, our proposed definition yields certification guarantees that depend only on the loss function and the intermediate learned metric spaces of the neural network. These bounds are independent of domain modality, norms, and distribution. We further demonstrate that the expressiveness of a model class is not at odds with its knowledge continuity. This implies that achieving robustness by maximizing knowledge continuity should not theoretically hinder inferential performance. Finally, to complement our theoretical results, we present several applications of knowledge continuity such as regularization, a certification algorithm, and show that knowledge continuity can be used to localize vulnerable components of a neural network.", "keywords": "Lipschitz continuity;robustness;certified robustness;adversarial robustness", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/0b9b46ec140ee7d19a8606c9d892c0ade5e1f189.zip", "author": "Alan Sun;Chiyu Ma;Kenneth Ge;Soroush Vosoughi", "authorids": "~Alan_Sun1;~Chiyu_Ma1;~Kenneth_Ge1;~Soroush_Vosoughi1", "gender": ";M;;M", "homepage": "https://alansun17904.github.io/;https://henrymachiyu.github.io/;https://www.cs.dartmouth.edu/~soroush/;https://github.com/kenneth-ge", "dblp": ";;01/1709;", "google_scholar": "QRJoXDQAAAAJ;h_3TRv0AAAAJ;45DAXkwAAAAJ;", "orcid": ";;0000-0002-2564-8909;", "linkedin": ";henry-chiyu-ma-3b7b30203/;;kenneth-ge-8209a6216/", "or_profile": "~Alan_Sun1;~Chiyu_Ma1;~Soroush_Vosoughi1;~Kenneth_Kou_Ge1", "aff": "Dartmouth College;Dartmouth College;Dartmouth College;Carnegie Mellon University", "aff_domain": "dartmouth.edu;dartmouth.edu;dartmouth.edu;andrew.cmu.edu", "position": "Undergrad student;PhD student;Assistant Professor;Undergrad student", "bibtex": "@inproceedings{\nsun2024achieving,\ntitle={Achieving Domain-Independent Certified Robustness via Knowledge Continuity},\nauthor={Alan Sun and Chiyu Ma and Kenneth Ge and Soroush Vosoughi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=v07KRLYxDX}\n}", "github": "", "reviewers": "nGPb;7Euo;Cz6j;88sL", "pdf_size": 1025831, "rating": "5;7;7;7", "confidence": "3;2;3;2", "soundness": "3;3;4;3", "novelty": "2;3;3;3", "presentation": "2;3;4;2", "wc_summary": "97;109;240;79", "wc_strengths": "32;47;80;65", "wc_weaknesses": "226;21;312;103", "wc_questions": "16;36;2;4", "wc_limitations": "1;24;2;11", "wc_review": "372;237;636;262", "wc_reply_reviewers": "24;13;35;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 2.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 131.25, 63.68820534447489 ], "wc_strengths_avg": [ 56.0, 18.12456896039186 ], "wc_weaknesses_avg": [ 165.5, 111.70161144764207 ], "wc_questions_avg": [ 14.5, 13.518505834595775 ], "wc_limitations_avg": [ 9.5, 9.233092656309694 ], "wc_review_avg": [ 376.75, 158.0591265950815 ], "wc_reply_reviewers_avg": [ 18.0, 12.98075498574717 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:TBBPVbOYY6sJ:scholar.google.com/&scioq=Achieving+Domain-Independent+Certified+Robustness+via+Knowledge+Continuity&hl=en&as_sdt=0,14", "gs_version_total": 5, "email": "dartmouth.edu;dartmouth.edu;dartmouth.edu;andrew.cmu.edu", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Dartmouth College;Carnegie Mellon University", "aff_unique_dep": ";", "aff_unique_url": "https://www.dartmouth.edu;https://www.cmu.edu", "aff_unique_abbr": "Dartmouth;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Skinned Motion Retargeting with Dense Geometric Interaction Perception", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93234", "id": "v1BIm8wESL", "proceeding": "", "pdf": "https://openreview.net/pdf?id=v1BIm8wESL", "openreview": "https://openreview.net/forum?id=v1BIm8wESL", "poster": "/media/PosterPDFs/NeurIPS%202024/93234.png?t=1733559892.6066675", "project": "", "author_site": "Zijie Ye, Jia-Wei Liu, Jia Jia, Shikun Sun, Mike Zheng Shou", "tldr": "", "abstract": "Capturing and maintaining geometric interactions among different body parts is crucial for successful motion retargeting in skinned characters. Existing approaches often overlook body geometries or add a geometry correction stage after skeletal motion retargeting. This results in conflicts between skeleton interaction and geometry correction, leading to issues such as jittery, interpenetration, and contact mismatches. To address these challenges, we introduce a new retargeting framework, MeshRet, which directly models the dense geometric interactions in motion retargeting. Initially, we establish dense mesh correspondences between characters using semantically consistent sensors (SCS), effective across diverse mesh topologies. Subsequently, we develop a novel spatio-temporal representation called the dense mesh interaction (DMI) field. This field, a collection of interacting SCS feature vectors, skillfully captures both contact and non-contact interactions between body geometries. By aligning the DMI field during retargeting, MeshRet not only preserves motion semantics but also prevents self-interpenetration and ensures contact preservation. Extensive experiments on the public Mixamo dataset and our newly-collected ScanRet dataset demonstrate that MeshRet achieves state-of-the-art performance. Code available at https://github.com/abcyzj/MeshRet.", "keywords": "Neural Motion Processing;Motion Retargeting", "primary_area": "generative_models", "supplementary_material": "/attachment/47a42cc1bec4234043baef989515007d15742b3a.zip", "author": "Zijie Ye;Jia-Wei Liu;Jia Jia;Shikun Sun;Mike Zheng Shou", "authorids": "~Zijie_Ye1;~Jia-Wei_Liu1;~Jia_Jia1;~Shikun_Sun1;~Mike_Zheng_Shou1", "gender": "M;M;F;M;", "homepage": "https://abcyzj.github.io;https://jia-wei-liu.github.io/;https://hcsi.cs.tsinghua.edu.cn/;https://skipper17.github.io;http://www.columbia.edu/~zs2262/", "dblp": "241/3229;85/3336;71/2992-1.html;293/2733.html;284/0807", "google_scholar": "0PpDl-8AAAAJ;stQQf7wAAAAJ;RYhh3FsAAAAJ;C1YFRxAAAAAJ;h1-3lSoAAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Zijie_Ye1;~Jia-Wei_Liu1;~Jia_Jia1;~Shikun_Sun1;~Zheng_Shou1", "aff": "Tsinghua University;National University of Singapore;Tsinghua University;Tsinghua University;National University of Singapore", "aff_domain": "tsinghua.edu.cn;u.nus.edu;tsinghua.edu.cn;tsinghua.edu.cn;nus.edu.sg", "position": "PhD student;PhD student;Full Professor;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nye2024skinned,\ntitle={Skinned Motion Retargeting with Dense Geometric Interaction Perception},\nauthor={Zijie Ye and Jia-Wei Liu and Jia Jia and Shikun Sun and Mike Zheng Shou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=v1BIm8wESL}\n}", "github": "", "reviewers": "yJ9L;YXjG;utNP;WToy", "pdf_size": 17019611, "rating": "5;6;6;7", "confidence": "4;4;4;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "85;90;161;88", "wc_strengths": "123;17;94;74", "wc_weaknesses": "64;18;125;53", "wc_questions": "41;120;4;38", "wc_limitations": "27;17;62;10", "wc_review": "340;262;446;263", "wc_reply_reviewers": "0;43;48;49", "wc_reply_authors": "38;132;36;36", "reply_reviewers": "0;1;1;1", "reply_authors": "2;3;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 106.0, 31.804087787578503 ], "wc_strengths_avg": [ 77.0, 38.77499194068259 ], "wc_weaknesses_avg": [ 65.0, 38.58108344772085 ], "wc_questions_avg": [ 50.75, 42.540421953713626 ], "wc_limitations_avg": [ 29.0, 19.987496091306685 ], "wc_review_avg": [ 327.75, 75.24750826439372 ], "wc_reply_reviewers_avg": [ 35.0, 20.334699407662754 ], "wc_reply_authors_avg": [ 60.5, 41.28861828639946 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=903754748789654421&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "tsinghua.edu.cn;u.nus.edu;tsinghua.edu.cn;tsinghua.edu.cn;nus.edu.sg", "author_num": 5, "aff_unique_index": "0;1;0;0;1", "aff_unique_norm": "Tsinghua University;National University of Singapore", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.nus.edu.sg", "aff_unique_abbr": "THU;NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;1", "aff_country_unique": "China;Singapore" }, { "title": "Weight for Robustness: A Comprehensive Approach towards Optimal Fault-Tolerant Asynchronous ML", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93233", "id": "v1kpc060aC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=v1kpc060aC", "openreview": "https://openreview.net/forum?id=v1kpc060aC", "poster": "/media/PosterPDFs/NeurIPS%202024/93233.png?t=1731611032.0577946", "project": "", "author_site": "Tehila Dahan, Kfir Y. Levy", "tldr": "", "abstract": "We address the challenges of Byzantine-robust training in asynchronous distributed machine learning systems, aiming to enhance efficiency amid massive parallelization and heterogeneous compute resources. Asynchronous systems, marked by independently operating workers and intermittent updates, uniquely struggle with maintaining integrity against Byzantine failures, which encompass malicious or erroneous actions that disrupt learning. The inherent delays in such settings not only introduce additional bias to the system but also obscure the disruptions caused by Byzantine faults. To tackle these issues, we adapt the Byzantine framework to asynchronous dynamics by introducing a novel weighted robust aggregation framework. This allows for the extension of robust aggregators and a recent meta-aggregator to their weighted versions, mitigating the effects of delayed updates. By further incorporating a recent variance-reduction technique, we achieve an optimal convergence rate for the first time in an asynchronous Byzantine environment. Our methodology is rigorously validated through empirical and theoretical analysis, demonstrating its effectiveness in enhancing fault tolerance and optimizing performance in asynchronous ML systems.", "keywords": "stochastic convex optimization;byzantine robust learning;online convex optimization", "primary_area": "optimization", "supplementary_material": "", "author": "Tehila Dahan;Kfir Yehuda Levy", "authorids": "~Tehila_Dahan1;~Kfir_Yehuda_Levy1", "gender": "F;M", "homepage": ";http://kfiryehud.wixsite.com/kfir-y-levy", "dblp": "378/2189;83/11388", "google_scholar": ";", "orcid": ";", "linkedin": "tehila-dahan-b86481178/;", "or_profile": "~Tehila_Dahan1;~Kfir_Yehuda_Levy1", "aff": "Technion - Israel Institute of Technology, Technion;Technion - Israel Institute of Technology, Technion", "aff_domain": "technion.ac.il;technion.ac.il", "position": "MS student;Assistant Professor", "bibtex": "@inproceedings{\ndahan2024weight,\ntitle={Weight for Robustness: A Comprehensive Approach towards Optimal Fault-Tolerant Asynchronous {ML}},\nauthor={Tehila Dahan and Kfir Yehuda Levy},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=v1kpc060aC}\n}", "github": "", "reviewers": "ApUX;y4Tu;hh1u;yddM;6iKY", "pdf_size": 1236817, "rating": "6;6;6;6;7", "confidence": "3;3;4;1;2", "soundness": "3;3;3;3;3", "novelty": "2;3;3;3;3", "presentation": "2;3;2;3;3", "wc_summary": "130;42;70;23;79", "wc_strengths": "89;79;122;50;109", "wc_weaknesses": "54;17;101;15;299", "wc_questions": "38;56;85;17;3", "wc_limitations": "33;58;1;13;5", "wc_review": "344;252;379;118;495", "wc_reply_reviewers": "0;42;129;0;93", "wc_reply_authors": "0;0;0;0;40", "reply_reviewers": "0;1;1;0;1", "reply_authors": "1;1;1;1;2", "rating_avg": [ 6.2, 0.39999999999999997 ], "confidence_avg": [ 2.6, 1.019803902718557 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 68.8, 36.51520231355702 ], "wc_strengths_avg": [ 89.8, 24.927093693409187 ], "wc_weaknesses_avg": [ 97.2, 105.63408540807272 ], "wc_questions_avg": [ 39.8, 28.923346970916075 ], "wc_limitations_avg": [ 22.0, 21.109239683134017 ], "wc_review_avg": [ 317.6, 126.55528436221066 ], "wc_reply_reviewers_avg": [ 52.8, 51.21484159889592 ], "wc_reply_authors_avg": [ 8.0, 16.0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.29417420270727607, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Ahqg_s3TMG4J:scholar.google.com/&scioq=Weight+for+Robustness:+A+Comprehensive+Approach+towards+Optimal+Fault-Tolerant+Asynchronous+ML&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": "technion.ac.il;technion.ac.il", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Technion - Israel Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.technion.ac.il", "aff_unique_abbr": "Technion", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Israel" }, { "title": "Vision-Language Navigation with Energy-Based Policy", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93232", "id": "v3jHuoxMw8", "proceeding": "", "pdf": "https://openreview.net/pdf?id=v3jHuoxMw8", "openreview": "https://openreview.net/forum?id=v3jHuoxMw8", "poster": "", "project": "", "author_site": "Rui Liu, Wenguan Wang, Yi Yang", "tldr": "", "abstract": "Vision-language navigation (VLN) requires an agent to execute actions following human instructions. Existing VLN models are optimized through expert demonstrations by supervised behavioural cloning or incorporating manual reward engineering. While straightforward, these efforts overlook the accumulation of errors in the Markov decision process, and struggle to match the distribution of the expert policy. Going beyond this, we propose an Energy-based Navigation Policy (ENP) to model the joint state-action distribution using an energy-based model. At each step, low energy values correspond to the state-action pairs that the expert is most likely to perform, and vice versa. Theoretically, the optimization objective is equivalent to minimizing the forward divergence between the occupancy measure of the expert and ours. Consequently, ENP learns to globally align with the expert policy by maximizing the likelihood of the actions and modeling the dynamics of the navigation states in a collaborative manner. With a variety of VLN architectures, ENP achieves promising performances on R2R, REVERIE, RxR, and R2R-CE, unleashing the power of existing VLN models.", "keywords": "Vision-Language Navigation; Vision-Language; Embodied vision", "primary_area": "robotics", "supplementary_material": "", "author": "Rui Liu;Wenguan Wang;Yi Yang", "authorids": "~Rui_Liu22;~Wenguan_Wang4;~Yi_Yang4", "gender": ";M;M", "homepage": ";https://sites.google.com/view/wenguanwang/;http://reler.net/", "dblp": ";145/1078;", "google_scholar": ";CqAQQkgAAAAJ;https://scholar.google.com.au/citations?user=RMSuNFwAAAAJ", "orcid": ";0000-0002-0802-9567;", "linkedin": ";wenguanwang;", "or_profile": "~Rui_Liu22;~Wenguan_Wang4;~Yi_Yang4", "aff": ";Zhejiang University;Zhejiang University", "aff_domain": ";zju.edu.cn;zju.edu.cn", "position": ";Full Professor;Full Professor", "bibtex": "@inproceedings{\nliu2024visionlanguage,\ntitle={Vision-Language Navigation with Energy-Based Policy},\nauthor={Rui Liu and Wenguan Wang and Yi Yang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=v3jHuoxMw8}\n}", "github": "", "reviewers": "dJ3y;UeCa;Ngjf", "pdf_size": 2837807, "rating": "5;6;7", "confidence": "3;5;2", "soundness": "2;3;3", "novelty": "2;3;3", "presentation": "3;3;4", "wc_summary": "52;59;122", "wc_strengths": "46;79;147", "wc_weaknesses": "292;66;150", "wc_questions": "77;4;15", "wc_limitations": "47;1;35", "wc_review": "514;209;469", "wc_reply_reviewers": "77;20;76", "wc_reply_authors": "41;47;51", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.3333333333333335, 1.247219128924647 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 77.66666666666667, 31.47838764754143 ], "wc_strengths_avg": [ 90.66666666666667, 42.05023450852827 ], "wc_weaknesses_avg": [ 169.33333333333334, 93.27140802816025 ], "wc_questions_avg": [ 32.0, 32.13513134665341 ], "wc_limitations_avg": [ 27.666666666666668, 19.48218559493661 ], "wc_review_avg": [ 397.3333333333333, 134.43296553383854 ], "wc_reply_reviewers_avg": [ 57.666666666666664, 26.637484032009397 ], "wc_reply_authors_avg": [ 46.333333333333336, 4.109609335312651 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.3273268353539886, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9023296300026736190&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": ";zju.edu.cn;zju.edu.cn", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Zhejiang University", "aff_unique_dep": "", "aff_unique_url": "https://www.zju.edu.cn", "aff_unique_abbr": "ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "GeoNLF: Geometry guided Pose-Free Neural LiDAR Fields", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93231", "id": "v3y785TN7B", "proceeding": "", "pdf": "https://openreview.net/pdf?id=v3y785TN7B", "openreview": "https://openreview.net/forum?id=v3y785TN7B", "poster": "/media/PosterPDFs/NeurIPS%202024/93231.png?t=1733402073.7734404", "project": "", "author_site": "Weiyi Xue, Zehan Zheng, Fan Lu, Haiyun Wei, Guang Chen, changjun jiang", "tldr": "", "abstract": "Although recent efforts have extended Neural Radiance Field (NeRF) into LiDAR point cloud synthesis, the majority of existing works exhibit a strong dependence on precomputed poses. However, point cloud registration methods struggle to achieve precise global pose estimation, whereas previous pose-free NeRFs overlook geometric consistency in global reconstruction. In light of this, we explore the geometric insights of point clouds, which provide explicit registration priors for reconstruction. Based on this, we propose Geometry guided Neural LiDAR Fields (GeoNLF), a hybrid framework performing alternately global neural reconstruction and pure geometric pose optimization. Furthermore, NeRFs tend to overfit individual frames and easily get stuck in local minima under sparse-view inputs. To tackle this issue, we develop a selective-reweighting strategy and introduce geometric constraints for robust optimization. Extensive experiments on NuScenes and KITTI-360 datasets demonstrate the superiority of GeoNLF in both novel view synthesis and multi-view registration of low-frequency large-scale point clouds.", "keywords": "Neural LiDAR Field;Pose-free;Geometry;Point Cloud;Registration", "primary_area": "machine_vision", "supplementary_material": "/attachment/927fa7bbc543aeeac7bb896d2f1bcfb98704c3b1.zip", "author": "Weiyi Xue;Zehan Zheng;Fan Lu;Haiyun Wei;Guang Chen;changjun jiang", "authorids": "~Weiyi_Xue1;~Zehan_Zheng1;~Fan_Lu3;~Haiyun_Wei2;~Guang_Chen4;~changjun_jiang2", "gender": "M;M;;M;M;Not Specified", "homepage": ";https://dyfcalid.github.io/;https://fanlu97.github.io/;;https://cs.tongji.edu.cn/info/1033/2865.htm;", "dblp": ";300/6688;35/6336-1;09/4891-1;;", "google_scholar": ";Pig6X6MAAAAJ;DyEUPFUAAAAJ;https://scholar.google.com.hk/citations?user=kBhIyv4AAAAJ;;", "orcid": "0009-0006-3099-3236;0000-0002-9733-6437;0000-0002-4932-3850;0000-0002-7416-592X;0000-0002-2543-8928;0009-0003-7265-9672", "linkedin": ";;;guang-chen-2879064a;;", "or_profile": "~Weiyi_Xue1;~Zehan_Zheng1;~Fan_Lu3;~Guang_Chen4;~changjun_jiang2;~Danico_Wei1", "aff": "Tongji University;Tongji University;Tongji University;Tongji University;Tongji University;Tongji University", "aff_domain": "tongji.edu.cn;tongji.edu.cn;tongji.edu.cn;tongji.edu.cn;tongji.edu.cn;tongji.edu.cn", "position": "MS student;MS student;PhD student;Full Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\nxue2024geonlf,\ntitle={Geo{NLF}: Geometry guided Pose-Free Neural Li{DAR} Fields},\nauthor={Weiyi Xue and Zehan Zheng and Fan Lu and Haiyun Wei and Guang Chen and changjun jiang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=v3y785TN7B}\n}", "github": "", "reviewers": "PTin;TE2m;fgnm", "pdf_size": 9510690, "rating": "5;6;7", "confidence": "4;3;4", "soundness": "3;3;4", "novelty": "3;2;3", "presentation": "1;3;4", "wc_summary": "95;61;86", "wc_strengths": "106;56;112", "wc_weaknesses": "246;65;503", "wc_questions": "6;4;136", "wc_limitations": "31;1;25", "wc_review": "484;187;862", "wc_reply_reviewers": "0;0;82", "wc_reply_authors": "0;0;57", "reply_reviewers": "0;0;1", "reply_authors": "1;1;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 1.247219128924647 ], "wc_summary_avg": [ 80.66666666666667, 14.383632673594278 ], "wc_strengths_avg": [ 91.33333333333333, 25.104227178350307 ], "wc_weaknesses_avg": [ 271.3333333333333, 179.70778750206927 ], "wc_questions_avg": [ 48.666666666666664, 61.759389749431804 ], "wc_limitations_avg": [ 19.0, 12.96148139681572 ], "wc_review_avg": [ 511.0, 276.2281665580105 ], "wc_reply_reviewers_avg": [ 27.333333333333332, 38.6551707048646 ], "wc_reply_authors_avg": [ 19.0, 26.870057685088806 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:WUmyBmUppnMJ:scholar.google.com/&scioq=GeoNLF:+Geometry+guided+Pose-Free+Neural+LiDAR+Fields&hl=en&as_sdt=0,33", "gs_version_total": 5, "email": "tongji.edu.cn;tongji.edu.cn;tongji.edu.cn;tongji.edu.cn;tongji.edu.cn;tongji.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Tongji University", "aff_unique_dep": "", "aff_unique_url": "https://www.tongji.edu.cn", "aff_unique_abbr": "Tongji", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Adam with model exponential moving average is effective for nonconvex optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93230", "id": "v416YLOQuU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=v416YLOQuU", "openreview": "https://openreview.net/forum?id=v416YLOQuU", "poster": "", "project": "", "author_site": "Kwangjun Ahn, Ashok Cutkosky", "tldr": "", "abstract": "In this work, we offer a theoretical analysis of two modern optimization techniques for training large and complex models: (i) adaptive optimization algorithms, such as Adam, and (ii) the model exponential moving average (EMA). Specifically, we demonstrate that a clipped version of Adam with model EMA achieves the optimal convergence rates in various nonconvex optimization settings, both smooth and nonsmooth. Moreover, when the scale varies significantly across different coordinates, we demonstrate that the coordinate-wise adaptivity of Adam is provably advantageous. Notably, unlike previous analyses of Adam, our analysis crucially relies on its core elements---momentum and discounting factors---as well as model EMA, motivating their wide applications in practice.", "keywords": "Adam;exponential moving average;EMA;nonconvex optimization.", "primary_area": "optimization", "supplementary_material": "", "author": "Kwangjun Ahn;Ashok Cutkosky", "authorids": "~Kwangjun_Ahn2;~Ashok_Cutkosky1", "gender": ";", "homepage": "http://kjahn.mit.edu/;http://www.cs.stanford.edu/~ashokc", "dblp": ";191/6725", "google_scholar": "z94iNtgAAAAJ;h4AbGp0AAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Kwangjun_Ahn2;~Ashok_Cutkosky1", "aff": "Massachusetts Institute of Technology;Boston University", "aff_domain": "mit.edu;bu.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nahn2024adam,\ntitle={Adam with model exponential moving average is effective for nonconvex optimization},\nauthor={Kwangjun Ahn and Ashok Cutkosky},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=v416YLOQuU}\n}", "github": "", "reviewers": "m9Ez;DXTx;BY48", "pdf_size": 445555, "rating": "4;5;7", "confidence": "2;3;3", "soundness": "3;3;3", "novelty": "2;2;3", "presentation": "2;2;3", "wc_summary": "52;17;139", "wc_strengths": "7;10;65", "wc_weaknesses": "84;51;127", "wc_questions": "307;12;67", "wc_limitations": "2;1;1", "wc_review": "452;91;399", "wc_reply_reviewers": "282;10;43", "wc_reply_authors": "432;0;165", "reply_reviewers": "1;1;1", "reply_authors": "2;1;2", "rating_avg": [ 5.333333333333333, 1.247219128924647 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 69.33333333333333, 51.29219130519663 ], "wc_strengths_avg": [ 27.333333333333332, 26.662499674428297 ], "wc_weaknesses_avg": [ 87.33333333333333, 31.116269413639905 ], "wc_questions_avg": [ 128.66666666666666, 128.08417371227233 ], "wc_limitations_avg": [ 1.3333333333333333, 0.4714045207910317 ], "wc_review_avg": [ 314.0, 159.16239086752458 ], "wc_reply_reviewers_avg": [ 111.66666666666667, 121.19497606015779 ], "wc_reply_authors_avg": [ 199.0, 177.99438193381272 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.7559289460184545, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1358952178243709989&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "mit.edu;bu.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Massachusetts Institute of Technology;Boston University", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.bu.edu", "aff_unique_abbr": "MIT;BU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Learning to Cooperate with Humans using Generative Agents", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93229", "id": "v4dXL3LsGX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=v4dXL3LsGX", "openreview": "https://openreview.net/forum?id=v4dXL3LsGX", "poster": "/media/PosterPDFs/NeurIPS%202024/93229.png?t=1731753469.766126", "project": "", "author_site": "Yancheng Liang, Daphne Chen, Abhishek Gupta, Simon Du, Natasha Jaques", "tldr": "", "abstract": "Training agents that can coordinate zero-shot with humans is a key mission in multi-agent reinforcement learning (MARL). Current algorithms focus on training simulated human partner policies which are then used to train a Cooperator agent. The simulated human is produced either through behavior cloning over a dataset of human cooperation behavior, or by using MARL to create a population of simulated agents. However, these approaches often struggle to produce a Cooperator that can coordinate well with real humans, since the simulated humans fail to cover the diverse strategies and styles employed by people in the real world. We show \\emph{learning a generative model of human partners} can effectively address this issue. Our model learns a latent variable representation of the human that can be regarded as encoding the human's unique strategy, intention, experience, or style. This generative model can be flexibly trained from any (human or neural policy) agent interaction data. By sampling from the latent space, we can use the generative model to produce different partners to train Cooperator agents. We evaluate our method---Generative Agent Modeling for Multi-agent Adaptation (GAMMA)---on Overcooked, a challenging cooperative cooking game that has become a standard benchmark for zero-shot coordination. We conduct an evaluation with real human teammates, and the results show that GAMMA consistently improves performance, whether the generative model is trained on simulated populations or human datasets. Further, we propose a method for posterior sampling from the generative model that is biased towards the human data, enabling us to efficiently improve performance with only a small amount of expensive human interaction data.", "keywords": "multi-agent reinforcement learning;human-AI cooperation", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Yancheng Liang;Daphne Chen;Abhishek Gupta;Simon Shaolei Du;Natasha Jaques", "authorids": "~Yancheng_Liang1;~Daphne_Chen1;~Abhishek_Gupta1;~Simon_Shaolei_Du1;~Natasha_Jaques1", "gender": "M;;M;M;F", "homepage": "https://lych12333.top;;https://homes.cs.washington.edu/~abhgupta/;http://simonshaoleidu.com;https://natashajaques.ai/", "dblp": ";;18/6404-4;176/5602;145/7732", "google_scholar": "TR6GqIgAAAAJ;ckrkok8AAAAJ;1wLVDP4AAAAJ;OttawxUAAAAJ;8iCb2TwAAAAJ", "orcid": ";;;;", "linkedin": ";;;;natashajaques", "or_profile": "~Yancheng_Liang1;~Daphne_Chen1;~Abhishek_Gupta1;~Simon_Shaolei_Du1;~Natasha_Jaques1", "aff": "Department of Computer Science, University of Washington;University of Washington;University of Washington;University of Washington;Google", "aff_domain": "cs.washington.edu;uw.edu;uw.edu;washington.edu;google.com", "position": "PhD student;PhD student;Assistant Professor;Assistant Professor;Senior Research Scientist", "bibtex": "@inproceedings{\nliang2024learning,\ntitle={Learning to Cooperate with Humans using Generative Agents},\nauthor={Yancheng Liang and Daphne Chen and Abhishek Gupta and Simon Shaolei Du and Natasha Jaques},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=v4dXL3LsGX}\n}", "github": "", "reviewers": "VM8E;pvTn;igMC;UQLZ", "pdf_size": 1624996, "rating": "4;4;6;7", "confidence": "5;5;4;4", "soundness": "3;2;3;3", "novelty": "3;3;3;3", "presentation": "4;4;3;4", "wc_summary": "67;239;26;139", "wc_strengths": "44;224;38;169", "wc_weaknesses": "247;340;153;384", "wc_questions": "65;235;121;189", "wc_limitations": "6;6;207;182", "wc_review": "429;1044;545;1063", "wc_reply_reviewers": "0;0;168;43", "wc_reply_authors": "0;0;367;0", "reply_reviewers": "0;0;2;1", "reply_authors": "1;1;3;1", "rating_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 117.75, 80.84978354949381 ], "wc_strengths_avg": [ 118.75, 80.1728601211158 ], "wc_weaknesses_avg": [ 281.0, 88.92412496055275 ], "wc_questions_avg": [ 152.5, 64.78232783715016 ], "wc_limitations_avg": [ 100.25, 94.6635489510086 ], "wc_review_avg": [ 770.25, 286.2825309026033 ], "wc_reply_reviewers_avg": [ 52.75, 68.81633163719205 ], "wc_reply_authors_avg": [ 91.75, 158.9156615944445 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.9622504486493761, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3039440790473952135&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "cs.washington.edu;uw.edu;uw.edu;washington.edu;google.com", "author_num": 5, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "University of Washington;Google", "aff_unique_dep": "Department of Computer Science;Google", "aff_unique_url": "https://www.washington.edu;https://www.google.com", "aff_unique_abbr": "UW;Google", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Seattle;;Mountain View", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Lumen: Unleashing Versatile Vision-Centric Capabilities of Large Multimodal Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93228", "id": "v5Un2QqnRf", "proceeding": "", "pdf": "https://openreview.net/pdf?id=v5Un2QqnRf", "openreview": "https://openreview.net/forum?id=v5Un2QqnRf", "poster": "/media/PosterPDFs/NeurIPS%202024/93228.png?t=1731748428.2426698", "project": "", "author_site": "Yang Jiao, Shaoxiang Chen, Zequn Jie, Jingjing Chen, Lin Ma, Yu-Gang Jiang", "tldr": "", "abstract": "Large Multimodal Model (LMM) is a hot research topic in the computer vision area and has also demonstrated remarkable potential across multiple disciplinary fields. A recent trend is to further extend and enhance the perception capabilities of LMMs. The current methods follow the paradigm of adapting the visual task outputs to the format of the language model, which is the main component of a LMM. This adaptation leads to convenient development of such LMMs with minimal modifications, however, it overlooks the intrinsic characteristics of diverse visual tasks and hinders the learning of perception capabilities. To address this issue, we propose a novel LMM architecture named Lumen, a Large multimodal model with versatile vision-centric capability enhancement. We decouple the LMM's learning of perception capabilities into task-agnostic and task-specific stages. Lumen first promotes fine-grained vision-language concept alignment, which is the fundamental capability for various visual tasks. Thus the output of the task-agnostic stage is a shared representation for all the tasks we address in this paper. Then the task-specific decoding is carried out by flexibly routing the shared representation to lightweight task decoders with negligible training efforts. Comprehensive experimental results on a series of vision-centric and VQA benchmarks indicate that our Lumen model not only achieves or surpasses the performance of existing LMM-based approaches in a range of vision-centric tasks while maintaining general visual understanding and instruction following capabilities.", "keywords": "Large Multimodal Models;Vision-Centric Capabilities", "primary_area": "machine_vision", "supplementary_material": "", "author": "Yang Jiao;Shaoxiang Chen;ZEQUN JIE;Jingjing Chen;Lin Ma;Yu-Gang Jiang", "authorids": "~Yang_Jiao5;~Shaoxiang_Chen1;~ZEQUN_JIE1;~Jingjing_Chen3;~Lin_Ma2;~Yu-Gang_Jiang1", "gender": "M;;F;M;M;M", "homepage": ";;https://jingjing1.github.io/#teach;http://forestlinma.com;https://fvl.fudan.edu.cn/people/yugangjiang/;", "dblp": ";04/2928-1;;74/3608-2;24/5818;161/1958", "google_scholar": "5gA7Wv0AAAAJ;WL5mbfEAAAAJ;DfWdqzQAAAAJ;DAn1pA4AAAAJ;f3_FP8AAAAAJ;4sKGNB0AAAAJ", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Yang_Jiao5;~Shaoxiang_Chen1;~Jingjing_Chen3;~Lin_Ma2;~Yu-Gang_Jiang1;~Zequn_Jie2", "aff": "Fudan University;Meituan Inc.;Fudan University;Meituan;Fudan University;Meituan", "aff_domain": "fudan.edu.cn;meituan.com;fudan.edu.cn;meituan.com;fudan.edu.cn;meituan.com", "position": "PhD student;Software Engineer;Associate Professor;Principal Researcher and Research Manager ;Full Professor;Researcher", "bibtex": "@inproceedings{\njiao2024lumen,\ntitle={Lumen: Unleashing Versatile Vision-Centric Capabilities of Large Multimodal Models},\nauthor={Yang Jiao and Shaoxiang Chen and ZEQUN JIE and Jingjing Chen and Lin Ma and Yu-Gang Jiang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=v5Un2QqnRf}\n}", "github": "", "reviewers": "1g9i;kab5;bt1Q;AJBg;ecgp", "pdf_size": 7957244, "rating": "5;5;5;5;7", "confidence": "4;5;4;3;5", "soundness": "3;3;3;2;4", "novelty": "3;2;3;2;3", "presentation": "3;3;4;2;3", "wc_summary": "78;57;74;53;80", "wc_strengths": "40;69;77;50;227", "wc_weaknesses": "176;184;34;352;193", "wc_questions": "54;99;3;67;4", "wc_limitations": "1;1;5;23;11", "wc_review": "349;410;193;545;515", "wc_reply_reviewers": "0;38;34;377;237", "wc_reply_authors": "0;23;23;738;25", "reply_reviewers": "0;1;1;1;1", "reply_authors": "1;2;2;2;2", "rating_avg": [ 5.4, 0.7999999999999999 ], "confidence_avg": [ 4.2, 0.7483314773547882 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 68.4, 11.18212859879549 ], "wc_strengths_avg": [ 92.6, 68.47656533442664 ], "wc_weaknesses_avg": [ 187.8, 100.79365059367578 ], "wc_questions_avg": [ 45.4, 37.21612553719154 ], "wc_limitations_avg": [ 8.2, 8.255906976220118 ], "wc_review_avg": [ 402.4, 126.31009460846747 ], "wc_reply_reviewers_avg": [ 137.2, 146.13610094702815 ], "wc_reply_authors_avg": [ 161.8, 288.24669989437865 ], "reply_reviewers_avg": [ 0.8, 0.4000000000000001 ], "reply_authors_avg": [ 1.8, 0.4000000000000001 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5345224838248488, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=716987272841450703&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "fudan.edu.cn;meituan.com;fudan.edu.cn;meituan.com;fudan.edu.cn;meituan.com", "author_num": 6, "aff_unique_index": "0;1;0;2;0;2", "aff_unique_norm": "Fudan University;Meituan Inc.;Meituan", "aff_unique_dep": ";;", "aff_unique_url": "https://www.fudan.edu.cn;https://www.meituan.com;https://www.meituan.com", "aff_unique_abbr": "Fudan;Meituan;Meituan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "CE-NAS: An End-to-End Carbon-Efficient Neural Architecture Search Framework", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93227", "id": "v6W55lCkhN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=v6W55lCkhN", "openreview": "https://openreview.net/forum?id=v6W55lCkhN", "poster": "/media/PosterPDFs/NeurIPS%202024/93227.png?t=1731050718.1257534", "project": "", "author_site": "Yiyang Zhao, Yunzhuo Liu, Bo Jiang, Tian Guo", "tldr": "", "abstract": "This work presents a novel approach to neural architecture search (NAS) that aims to increase carbon efficiency for the model design process. The proposed framework CE-NAS addresses the key challenge of high carbon cost associated with NAS by exploring the carbon emission variations of energy and energy differences of different NAS algorithms. At the high level, CE-NAS leverages a reinforcement-learning agent to dynamically adjust GPU resources based on carbon intensity, predicted by a time-series transformer, to balance energy-efficient sampling and energy-intensive evaluation tasks. Furthermore, CE-NAS leverages a recently proposed multi-objective optimizer to effectively reduce the NAS search space. We demonstrate the efficacy of CE-NAS in lowering carbon emissions while achieving SOTA results for both NAS datasets and open-domain NAS tasks. For example, on the HW-NasBench dataset, CE-NAS reduces carbon emissions by up to 7.22X while maintaining a search efficiency comparable to vanilla NAS. For open-domain NAS tasks, CE-NAS achieves SOTA results with 97.35% top-1 accuracy on CIFAR-10 with only 1.68M parameters and a carbon consumption of 38.53 lbs of CO2. On ImageNet, our searched model achieves 80.6% top-1 accuracy with a 0.78 ms TensorRT latency using FP16 on NVIDIA V100, consuming only 909.86 lbs of CO2, making it comparable to other one-shot-based NAS baselines. Our code is available at https://github.com/cake-lab/CE-NAS.", "keywords": "Neural architecture search; Carbon efficient; Optimization; Environment friendly", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Yiyang Zhao;Yunzhuo Liu;Bo Jiang;Tian Guo", "authorids": "~Yiyang_Zhao1;~Yunzhuo_Liu1;~Bo_Jiang2;~Tian_Guo3", "gender": "M;M;M;F", "homepage": "https://zhaoyiyang.me/;;https://jhc.sjtu.edu.cn/~bjiang/;http://tianguo.info", "dblp": "33/3791;;34/2005-3.html;55/3523-1.html", "google_scholar": "pcerjpMeoAAAAJ;;WxAIZtMAAAAJ;vDzUD84AAAAJ", "orcid": ";0000-0003-0156-2454;;", "linkedin": ";;;", "or_profile": "~Yiyang_Zhao1;~Yunzhuo_Liu1;~Bo_Jiang2;~Tian_Guo3", "aff": "Worcester Polytechnic Institute;Shanghai Jiaotong University;Shanghai Jiaotong University;Worcester Polytechnic Institute", "aff_domain": "wpi.edu;sjtu.edu.cn;sjtu.edu.cn;wpi.edu", "position": "PhD student;PhD student;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nzhao2024cenas,\ntitle={{CE}-{NAS}: An End-to-End Carbon-Efficient Neural Architecture Search Framework},\nauthor={Yiyang Zhao and Yunzhuo Liu and Bo Jiang and Tian Guo},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=v6W55lCkhN}\n}", "github": "", "reviewers": "CjFX;d3FP;g77N;QYii", "pdf_size": 1944644, "rating": "5;7;8;9", "confidence": "5;5;4;4", "soundness": "3;3;4;4", "novelty": "2;3;4;4", "presentation": "3;3;3;3", "wc_summary": "86;145;200;95", "wc_strengths": "57;181;267;49", "wc_weaknesses": "43;505;414;79", "wc_questions": "12;2;98;45", "wc_limitations": "1;95;27;18", "wc_review": "199;928;1006;286", "wc_reply_reviewers": "92;34;43;8", "wc_reply_authors": "35;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 7.25, 1.479019945774904 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 131.5, 45.489009661675425 ], "wc_strengths_avg": [ 138.5, 90.78959191449205 ], "wc_weaknesses_avg": [ 260.25, 202.23176679246018 ], "wc_questions_avg": [ 39.25, 37.46581775432107 ], "wc_limitations_avg": [ 35.25, 35.73776014245996 ], "wc_review_avg": [ 604.75, 364.597980658149 ], "wc_reply_reviewers_avg": [ 44.25, 30.416894976312097 ], "wc_reply_authors_avg": [ 8.75, 15.155444566227676 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.8451542547285166, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13097235429737348384&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "wpi.edu;sjtu.edu.cn;sjtu.edu.cn;wpi.edu", "author_num": 4, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Worcester Polytechnic Institute;Shanghai Jiao Tong University", "aff_unique_dep": ";", "aff_unique_url": "https://www.wpi.edu;https://www.sjtu.edu.cn", "aff_unique_abbr": "WPI;SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "United States;China" }, { "title": "An Accelerated Algorithm for Stochastic Bilevel Optimization under Unbounded Smoothness", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93226", "id": "v7vYVvmfru", "proceeding": "", "pdf": "https://openreview.net/pdf?id=v7vYVvmfru", "openreview": "https://openreview.net/forum?id=v7vYVvmfru", "poster": "/media/PosterPDFs/NeurIPS%202024/93226.png?t=1731733421.065693", "project": "", "author_site": "Xiaochuan Gong, Jie Hao, Mingrui Liu", "tldr": "", "abstract": "This paper investigates a class of stochastic bilevel optimization problems where the upper-level function is nonconvex with potentially unbounded smoothness and the lower-level problem is strongly convex. These problems have significant applications in sequential data learning, such as text classification using recurrent neural networks. The unbounded smoothness is characterized by the smoothness constant of the upper-level function scaling linearly with the gradient norm, lacking a uniform upper bound. Existing state-of-the-art algorithms require $\\widetilde{O}(\\epsilon^{-4})$ oracle calls of stochastic gradient or Hessian/Jacobian-vector product to find an $\\epsilon$-stationary point. However, it remains unclear if we can further improve the convergence rate when the assumptions for the function in the population level also hold for each random realization almost surely (e.g., Lipschitzness of each realization of the stochastic gradient). To address this issue, we propose a new Accelerated Bilevel Optimization algorithm named AccBO. The algorithm updates the upper-level variable by normalized stochastic gradient descent with recursive momentum and the lower-level variable by the stochastic Nesterov accelerated gradient descent algorithm with averaging. We prove that our algorithm achieves an oracle complexity of $\\widetilde{O}(\\epsilon^{-3})$ to find an $\\epsilon$-stationary point, when the lower-level stochastic gradient has a small variance $O(\\epsilon)$. Our proof relies on a novel lemma characterizing the dynamics of stochastic Nesterov accelerated gradient descent algorithm under distribution drift with high probability for the lower-level variable, which is of independent interest and also plays a crucial role in analyzing the hypergradient estimation error over time. Experimental results on various tasks confirm that our proposed algorithm achieves the predicted theoretical acceleration and significantly outperforms baselines in bilevel optimization.", "keywords": "Bilevel Optimization;Acceleration;Unbounded Smoothness;Nonconvex Optimization", "primary_area": "optimization", "supplementary_material": "/attachment/23d699e321d95183967c8345555d7020c7952ab2.zip", "author": "Xiaochuan Gong;Jie Hao;Mingrui Liu", "authorids": "~Xiaochuan_Gong1;~Jie_Hao3;~Mingrui_Liu2", "gender": "M;M;", "homepage": ";https://jhao6.github.io/JieHao.github.io/;https://mingrliu.github.io", "dblp": ";;", "google_scholar": "byUF8hgAAAAJ;S8ZTkikAAAAJ;KFoEnFQAAAAJ", "orcid": ";;", "linkedin": ";;mingrui-liu-447a2aab/", "or_profile": "~Xiaochuan_Gong1;~Jie_Hao3;~Mingrui_Liu2", "aff": "George Mason University;George Mason University;George Mason University", "aff_domain": "gmu.edu;gmu.edu;gmu.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\ngong2024an,\ntitle={An Accelerated Algorithm for Stochastic Bilevel Optimization under Unbounded Smoothness},\nauthor={Xiaochuan Gong and Jie Hao and Mingrui Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=v7vYVvmfru}\n}", "github": "", "reviewers": "wLtJ;2WtS;o6Pr;W5hu", "pdf_size": 786211, "rating": "5;6;6;6", "confidence": "3;3;3;3", "soundness": "2;3;3;3", "novelty": "2;2;3;2", "presentation": "3;3;3;3", "wc_summary": "41;75;79;117", "wc_strengths": "32;75;25;157", "wc_weaknesses": "86;24;452;36", "wc_questions": "48;342;1;39", "wc_limitations": "1;84;1;46", "wc_review": "208;600;558;395", "wc_reply_reviewers": "76;32;13;0", "wc_reply_authors": "420;91;162;0", "reply_reviewers": "2;2;1;0", "reply_authors": "4;3;3;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 78.0, 26.92582403567252 ], "wc_strengths_avg": [ 72.25, 52.54224490826405 ], "wc_weaknesses_avg": [ 149.5, 176.18952863323065 ], "wc_questions_avg": [ 107.5, 136.5329630528833 ], "wc_limitations_avg": [ 33.0, 34.70590727815655 ], "wc_review_avg": [ 440.25, 154.4123942564197 ], "wc_reply_reviewers_avg": [ 30.25, 28.760867511255636 ], "wc_reply_authors_avg": [ 168.25, 156.27919727206177 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 2.75, 1.0897247358851685 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15511798430897149038&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 3, "email": "gmu.edu;gmu.edu;gmu.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "George Mason University", "aff_unique_dep": "", "aff_unique_url": "https://www.gmu.edu", "aff_unique_abbr": "GMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Measuring Dejavu Memorization Efficiently", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93225", "id": "v8RRFNbJ43", "proceeding": "", "pdf": "https://openreview.net/pdf?id=v8RRFNbJ43", "openreview": "https://openreview.net/forum?id=v8RRFNbJ43", "poster": "/media/PosterPDFs/NeurIPS%202024/93225.png?t=1731717099.2857294", "project": "", "author_site": "Narine Kokhlikyan, Bargav Jayaraman, Florian Bordes, Chuan Guo, Kamalika Chaudhuri", "tldr": "", "abstract": "Recent research has shown that representation learning models may accidentally memorize their training data. For example, the d\u00e9j\u00e0 vu method shows that for certain representation learning models and training images, it is sometimes possible to correctly predict the foreground label given only the representation of he background \u2013 better than through dataset-level correlations. However, their measurement method requires training two models \u2013 one to estimate dataset-level correlations and the other to estimate memorization. This multiple model setup becomes infeasible for large open-source models. In this work, we propose alter native simple methods to estimate dataset-level correlations, and show that these can be used to approximate an off-the-shelf model\u2019s memorization ability without any retraining. This enables, for the first time, the measurement of memorization in pre-trained open-source image representation and vision-language models. Our results show that different ways of measuring memorization yield very similar aggregate results. We also find that open-source models typically have lower aggregate memorization than similar models trained on a subset of the data. The code is available both for vision (https://github.com/facebookresearch/DejaVuOSS) and vision language (https://github.com/facebookresearch/VLMDejaVu) models.", "keywords": "memorization;privacy", "primary_area": "privacy", "supplementary_material": "", "author": "Narine Kokhlikyan;Bargav Jayaraman;Florian Bordes;Chuan Guo;Kamalika Chaudhuri", "authorids": "~Narine_Kokhlikyan1;~Bargav_Jayaraman1;~Florian_Bordes1;~Chuan_Guo1;~Kamalika_Chaudhuri1", "gender": ";M;M;M;F", "homepage": ";https://bargavjayaraman.github.io;;https://sites.google.com/view/chuanguo;http://cseweb.ucsd.edu/users/kamalika", "dblp": "136/9284;165/8210;194/9862;;56/6435", "google_scholar": "oZjHXwUAAAAJ;https://scholar.google.co.in/citations?user=gL3ZkpEAAAAJ;OADfWhUAAAAJ;0gp5M-kAAAAJ;I-DJ7EsAAAAJ", "orcid": "0000-0002-5827-5141;;;;", "linkedin": ";bargav-jayaraman/;florianbordes;;", "or_profile": "~Narine_Kokhlikyan1;~Bargav_Jayaraman1;~Florian_Bordes1;~Chuan_Guo1;~Kamalika_Chaudhuri1", "aff": "Meta;Meta;Meta;Meta;University of California, San Diego", "aff_domain": "meta.com;meta.com;meta.com;meta.com;ucsd.edu", "position": "Research Scientist;Postdoc;Researcher;Researcher;Associate Professor", "bibtex": "@inproceedings{\nkokhlikyan2024measuring,\ntitle={Measuring Dejavu Memorization Efficiently},\nauthor={Narine Kokhlikyan and Bargav Jayaraman and Florian Bordes and Chuan Guo and Kamalika Chaudhuri},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=v8RRFNbJ43}\n}", "github": "", "reviewers": "Uwm9;ASwd;wk9m", "pdf_size": 10814650, "rating": "5;6;7", "confidence": "4;3;3", "soundness": "3;3;3", "novelty": "3;2;3", "presentation": "3;3;3", "wc_summary": "47;80;13", "wc_strengths": "46;72;39", "wc_weaknesses": "100;133;63", "wc_questions": "3;108;236", "wc_limitations": "3;92;1", "wc_review": "199;485;352", "wc_reply_reviewers": "0;17;21", "wc_reply_authors": "20;18;11", "reply_reviewers": "0;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 46.666666666666664, 27.353650985238193 ], "wc_strengths_avg": [ 52.333333333333336, 14.197026292697903 ], "wc_weaknesses_avg": [ 98.66666666666667, 28.592928418676454 ], "wc_questions_avg": [ 115.66666666666667, 95.27620666019169 ], "wc_limitations_avg": [ 32.0, 42.43426288586461 ], "wc_review_avg": [ 345.3333333333333, 116.85413509537815 ], "wc_reply_reviewers_avg": [ 12.666666666666666, 9.104333522498441 ], "wc_reply_authors_avg": [ 16.333333333333332, 3.8586123009300755 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9688755821212991790&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "meta.com;meta.com;meta.com;meta.com;ucsd.edu", "author_num": 5, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Meta;University of California, San Diego", "aff_unique_dep": "Meta Platforms, Inc.;", "aff_unique_url": "https://meta.com;https://www.ucsd.edu", "aff_unique_abbr": "Meta;UCSD", "aff_campus_unique_index": "1", "aff_campus_unique": ";San Diego", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Analysing the Generalisation and Reliability of Steering Vectors", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93224", "id": "v8X70gTodR", "proceeding": "", "pdf": "https://openreview.net/pdf?id=v8X70gTodR", "openreview": "https://openreview.net/forum?id=v8X70gTodR", "poster": "/media/PosterPDFs/NeurIPS%202024/93224.png?t=1733700045.6997068", "project": "", "author_site": "Daniel Tan, David Chanin, Aengus Lynch, Brooks Paige, Dimitrios Kanoulas, Adri\u00e0 Garriga-Alonso, Robert Kirk", "tldr": "", "abstract": "Steering vectors (SVs) are a new approach to efficiently adjust language model behaviour at inference time by intervening on intermediate model activations. They have shown promise in terms of improving both capabilities and model alignment. However, the reliability and generalisation properties of this approach are unknown. In this work, we rigorously investigate these properties, and show that steering vectors have substantial limitations both in- and out-of-distribution. In-distribution, steerability is highly variable across different inputs. Depending on the concept, spurious biases can substantially contribute to how effective steering is for each input, presenting a challenge for the widespread use of steering vectors. Out-of-distribution, while steering vectors often generalise well, for several concepts they are brittle to reasonable changes in the prompt, resulting in them failing to generalise well. Overall, our findings show that while steering can work well in the right circumstances, there remain many technical difficulties of applying steering vectors to guide models' behaviour at scale.", "keywords": "Interpretability;Causal Abstractions;Steering Vectors;Representation Engineering;Linear Representation Hypothesis;Contrastive Activation Addition", "primary_area": "interpretability_and_explainability", "supplementary_material": "/attachment/e00b31d6a87e2e6241e1773ffaf18c8378e0223e.zip", "author": "Daniel Chee Hian Tan;David Chanin;Aengus Lynch;Brooks Paige;Dimitrios Kanoulas;Adri\u00e0 Garriga-Alonso;Robert Kirk", "authorids": "~Daniel_Chee_Hian_Tan1;~David_Chanin1;~Aengus_Lynch1;~Brooks_Paige1;~Dimitrios_Kanoulas1;~Adri\u00e0_Garriga-Alonso1;~Robert_Kirk1", "gender": "M;M;M;M;M;M;M", "homepage": "https://daniel-ch-tan.github.io/;;;https://tbrx.github.io;https://dkanou.github.io;https://robertkirk.github.io;https://agarri.ga/", "dblp": ";342/7486;;https://dblp.uni-trier.de/pers/p/Paige:Brooks;20/4287.html;01/9684;225/6564", "google_scholar": "QKO1QacAAAAJ;https://scholar.google.co.uk/citations?user=xnGP_IcAAAAJ;Pd2002AAAAAJ;JrFJmx0AAAAJ;cE8_5EsAAAAJ;https://scholar.google.co.uk/citations?user=PL5KWdYAAAAJ;OtnThiMAAAAJ", "orcid": "0000-0003-1067-8432;;;;0000-0002-3684-1472;;0000-0003-3409-5047", "linkedin": "daniel-tan-a0672b163/;davidchanin/;;;;;adrigarriga/", "or_profile": "~Daniel_Chee_Hian_Tan1;~David_Chanin1;~Aengus_Lynch1;~Brooks_Paige1;~Dimitrios_Kanoulas1;~Robert_Kirk1;~Adria_Garriga-Alonso1", "aff": "University College London;University College London, University of London;University College London, University of London;University College London;University College London, University of London;University College London;FAR", "aff_domain": "ucl.ac.uk;ucl.ac.uk;ucl.ac.uk;ucl.ac.uk;ucl.ac.uk;ucl.ac.uk;far.ai", "position": "PhD student;PhD student;PhD student;Associate Professor;Associate Professor;PhD student;Researcher", "bibtex": "@inproceedings{\ntan2024analysing,\ntitle={Analysing the Generalisation and Reliability of Steering Vectors},\nauthor={Daniel Chee Hian Tan and David Chanin and Aengus Lynch and Brooks Paige and Dimitrios Kanoulas and Adri{\\`a} Garriga-Alonso and Robert Kirk},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=v8X70gTodR}\n}", "github": "", "reviewers": "h9xf;qr21;Z5or;GGDN", "pdf_size": 1556749, "rating": "3;6;7;8", "confidence": "3;3;3;4", "soundness": "2;3;3;4", "novelty": "2;3;3;4", "presentation": "3;3;3;4", "wc_summary": "44;186;50;10", "wc_strengths": "41;292;45;89", "wc_weaknesses": "83;84;38;75", "wc_questions": "1;153;25;10", "wc_limitations": "1;60;1;2", "wc_review": "170;775;159;186", "wc_reply_reviewers": "348;57;30;0", "wc_reply_authors": "596;72;72;72", "reply_reviewers": "2;1;1;0", "reply_authors": "3;2;2;2", "rating_avg": [ 6.0, 1.8708286933869707 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 72.5, 67.28112662552553 ], "wc_strengths_avg": [ 116.75, 102.91835356242345 ], "wc_weaknesses_avg": [ 70.0, 18.801595676963167 ], "wc_questions_avg": [ 47.25, 61.653771174195015 ], "wc_limitations_avg": [ 16.0, 25.406692031825003 ], "wc_review_avg": [ 322.5, 261.4273321594358 ], "wc_reply_reviewers_avg": [ 108.75, 139.59472590323747 ], "wc_reply_authors_avg": [ 203.0, 226.8986557915229 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.6172133998483676, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16474100076583690455&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "ucl.ac.uk;ucl.ac.uk;ucl.ac.uk;ucl.ac.uk;ucl.ac.uk;ucl.ac.uk;far.ai", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0;1", "aff_unique_norm": "University College London;FAR", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucl.ac.uk;", "aff_unique_abbr": "UCL;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United Kingdom;" }, { "title": "Learning from Uncertain Data: From Possible Worlds to Possible Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93223", "id": "v9RqRFSLQ2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=v9RqRFSLQ2", "openreview": "https://openreview.net/forum?id=v9RqRFSLQ2", "poster": "", "project": "", "author_site": "Jiongli Zhu, Su Feng, Boris Glavic, Babak Salimi", "tldr": "", "abstract": "We introduce an efficient method for learning linear models from uncertain data, where uncertainty is represented as a set of possible variations in the data, leading to predictive multiplicity. Our approach leverages abstract interpretation and zonotopes, a type of convex polytope, to compactly represent these dataset variations, enabling the symbolic execution of gradient descent on all possible worlds simultaneously. We develop techniques to ensure that this process converges to a fixed point and derive closed-form solutions for this fixed point. Our method provides sound over-approximations of all possible optimal models and viable prediction ranges. We demonstrate the effectiveness of our approach through theoretical and empirical analysis, highlighting its potential to reason about model and prediction uncertainty due to data quality issues in training data.", "keywords": "data uncertainty;robustness verification;predictive multiplicity;abstract interpretation;linear regression", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Jiongli Zhu;Su Feng;Boris Glavic;Babak Salimi", "authorids": "~Jiongli_Zhu1;~Su_Feng1;~Boris_Glavic1;~Babak_Salimi1", "gender": "M;M;M;M", "homepage": "https://lodino.github.io;http://fengsu.me;http://www.cs.iit.edu/~dbgroup/members/bglavic.html;https://bsalimi.github.io/", "dblp": "295/3522;;09/3012;", "google_scholar": "41xTqZYAAAAJ;;lCdBR8cAAAAJ;", "orcid": ";0009-0009-8104-3128;0000-0003-2887-2452;", "linkedin": ";;;", "or_profile": "~Jiongli_Zhu1;~Su_Feng1;~Boris_Glavic1;~Babak_Salimi1", "aff": "University of California, San Diego;Illinois Institute of Technology;University of Illinois at Chicago;University of California, San Diego", "aff_domain": "ucsd.edu;iit.edu;uic.edu;ucsd.edu", "position": "PhD student;Postdoc;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhu2024learning,\ntitle={Learning from Uncertain Data: From Possible Worlds to Possible Models},\nauthor={Jiongli Zhu and Su Feng and Boris Glavic and Babak Salimi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=v9RqRFSLQ2}\n}", "github": "", "reviewers": "DX9v;7DaV;h3Xr", "pdf_size": 1176173, "rating": "5;7;7", "confidence": "1;3;3", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "2;3;3", "wc_summary": "30;64;135", "wc_strengths": "28;126;100", "wc_weaknesses": "165;105;12", "wc_questions": "284;73;91", "wc_limitations": "11;8;48", "wc_review": "518;376;386", "wc_reply_reviewers": "415;25;26", "wc_reply_authors": "710;0;0", "reply_reviewers": "2;1;1", "reply_authors": "2;1;1", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 2.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 76.33333333333333, 43.744205965539656 ], "wc_strengths_avg": [ 84.66666666666667, 41.45144415122617 ], "wc_weaknesses_avg": [ 94.0, 62.94441992742486 ], "wc_questions_avg": [ 149.33333333333334, 95.50683512479908 ], "wc_limitations_avg": [ 22.333333333333332, 18.190351532856337 ], "wc_review_avg": [ 426.6666666666667, 64.71132478597201 ], "wc_reply_reviewers_avg": [ 155.33333333333334, 183.61251470298953 ], "wc_reply_authors_avg": [ 236.66666666666666, 334.6972097616325 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15199964759239215598&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 4, "email": "ucsd.edu;iit.edu;uic.edu;ucsd.edu", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of California, San Diego;Illinois Institute of Technology;University of Illinois at Chicago", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ucsd.edu;https://www.iit.edu;https://www.uic.edu", "aff_unique_abbr": "UCSD;IIT;UIC", "aff_campus_unique_index": "0;2;0", "aff_campus_unique": "San Diego;;Chicago", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "LG-VQ: Language-Guided Codebook Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93222", "id": "vA4s3kN4QE", "proceeding": "", "pdf": "https://openreview.net/pdf?id=vA4s3kN4QE", "openreview": "https://openreview.net/forum?id=vA4s3kN4QE", "poster": "/media/PosterPDFs/NeurIPS%202024/93222.png?t=1730121873.1143901", "project": "", "author_site": "Liang Guotao, Baoquan Zhang, Yaowei Wang, Yunming Ye, Xutao Li, Wanghuaibin, Luo Chuyao, kolaye, luolinfeng", "tldr": "", "abstract": "Vector quantization (VQ) is a key technique in high-resolution and high-fidelity image synthesis, which aims to learn a codebook to encode an image with a sequence of discrete codes and then generate an image in an auto-regression manner. \n Although existing methods have shown superior performance, most methods prefer to learn a single-modal codebook (\\emph{e.g.}, image), resulting in suboptimal performance when the codebook is applied to multi-modal downstream tasks (\\emph{e.g.}, text-to-image, image captioning) due to the existence of modal gaps.\n In this paper, we propose a novel language-guided codebook learning framework, called LG-VQ, which aims to learn a codebook that can be aligned with the text to improve the performance of multi-modal downstream tasks. Specifically, we first introduce pre-trained text semantics as prior knowledge, then design two novel alignment modules (\\emph{i.e.}, Semantic Alignment Module, and Relationship Alignment Module) to transfer such prior knowledge into codes for achieving codebook text alignment. \n In particular, our LG-VQ method is model-agnostic, which can be easily integrated into existing VQ models. Experimental results show that our method achieves superior performance on reconstruction and various multi-modal downstream tasks.", "keywords": "Codebook Learing;VQ-GAN;Vector-quantized Image Modeling", "primary_area": "generative_models", "supplementary_material": "", "author": "Liang Guotao;Baoquan Zhang;Yaowei Wang;Yunming Ye;Xutao Li;Wanghuaibin;Luo Chuyao;kolaye;luolinfeng", "authorids": "~Liang_Guotao1;~Baoquan_Zhang1;~Yaowei_Wang1;~Yunming_Ye1;~Xutao_Li2;~Wanghuaibin1;~Luo_Chuyao1;~kolaye1;~luolinfeng1", "gender": "M;M;M;M;M;M;M;M;M", "homepage": ";https://zhangbq-research.github.io/;https://dblp.org/pid/68/2992.html;https://faculty.hitsz.edu.cn/yeyunming;http://faculty.hitsz.edu.cn/lixutao;;;https://github.com/Kolaye;https://github.com/llf10811020205", "dblp": ";160/1111;68/2992-1;70/705;;;;;", "google_scholar": "https://scholar.google.com.hk/citations?user=hQpTPuEAAAAJ;IyFK9X0AAAAJ;https://scholar.google.com/citations?hl=zh-CN;;hPm6fo8AAAAJ;https://scholar.google.com/citations?view_op=list_works;;;", "orcid": ";;0000-0002-6110-4036;0000-0002-1807-8581;;;0000-0003-4848-609X;;", "linkedin": ";;yaowei-wang-971ab310/;;;;;;", "or_profile": "~Liang_Guotao1;~Baoquan_Zhang1;~Yaowei_Wang1;~Yunming_Ye1;~Xutao_Li2;~Wanghuaibin1;~Luo_Chuyao1;~kolaye1;~luolinfeng1", "aff": "Harbin Institute of Technology,shenzhen;, Harbin Institute of Technology (shenzhen);Pengcheng Laboratory;Harbin Institute of Technology, Shenzhen;Harbin Institute of Technology, Shenzhen;Harbin Institute of Technology;Harbin Institute of Technology;R&D Director;", "aff_domain": "hit.edu.cn;hit.edu.cn;pcl.ac.cn;hit.edu.cn;hit.edu.cn;hit.edu.cn;hit.edu.cn;sifar.com.cn;", "position": "PhD student;PhD student;Full Professor;Full Professor;Full Professor;MS student;Postdoc;Researcher;", "bibtex": "@inproceedings{\nguotao2024lgvq,\ntitle={{LG}-{VQ}: Language-Guided Codebook Learning},\nauthor={Liang Guotao and Baoquan Zhang and Yaowei Wang and Yunming Ye and Xutao Li and Wanghuaibin and Luo Chuyao and kolaye and luolinfeng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=vA4s3kN4QE}\n}", "github": "", "reviewers": "YeAE;heW4;WVeZ;8ZRy", "pdf_size": 0, "rating": "5;5;6;6", "confidence": "2;3;4;4", "soundness": "3;2;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "35;43;105;81", "wc_strengths": "20;19;55;59", "wc_weaknesses": "104;64;67;134", "wc_questions": "18;32;6;61", "wc_limitations": "19;15;1;13", "wc_review": "196;173;234;348", "wc_reply_reviewers": "30;25;0;98", "wc_reply_authors": "9;7;0;153", "reply_reviewers": "1;1;0;2", "reply_authors": "2;2;1;3", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 66.0, 28.442925306655784 ], "wc_strengths_avg": [ 38.25, 18.806581294855267 ], "wc_weaknesses_avg": [ 92.25, 28.795615985771168 ], "wc_questions_avg": [ 29.25, 20.51066795596867 ], "wc_limitations_avg": [ 12.0, 6.708203932499369 ], "wc_review_avg": [ 237.75, 67.2769462743368 ], "wc_reply_reviewers_avg": [ 38.25, 36.32062086473743 ], "wc_reply_authors_avg": [ 42.25, 64.02880211279921 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.9045340337332909, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=526687766677084257&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 3, "email": "hit.edu.cn;hit.edu.cn;pcl.ac.cn;hit.edu.cn;hit.edu.cn;hit.edu.cn;hit.edu.cn;sifar.com.cn;", "author_num": 9, "aff_unique_index": "0;0;1;0;0;0;0", "aff_unique_norm": "Harbin Institute of Technology;Pengcheng Laboratory;", "aff_unique_dep": ";;", "aff_unique_url": "http://www.hit.edu.cn/;;", "aff_unique_abbr": "HIT;;", "aff_campus_unique_index": "0;0;0;0;2;2", "aff_campus_unique": "Shenzhen;;Harbin", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China;" }, { "title": "OccamLLM: Fast and Exact Language Model Arithmetic in a Single Step", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93221", "id": "vAOgaPvgYr", "proceeding": "", "pdf": "https://openreview.net/pdf?id=vAOgaPvgYr", "openreview": "https://openreview.net/forum?id=vAOgaPvgYr", "poster": "", "project": "", "author_site": "Owen Dugan, Donato Jim\u00e9nez-Benet\u00f3, Charlotte Loh, Zhuo Chen, Rumen Dangovski, Marin Soljacic", "tldr": "", "abstract": "Despite significant advancements in text generation and reasoning, Large Language Models (LLMs) still face challenges in accurately performing complex arithmetic operations. Language model systems often enable LLMs to generate code for arithmetic operations to achieve accurate calculations. However, this approach compromises speed and security, and fine-tuning risks the language model losing prior capabilities. We propose a framework that enables exact arithmetic in *a single autoregressive step*, providing faster, more secure, and more interpretable LLM systems with arithmetic capabilities. We use the hidden states of a LLM to control a symbolic architecture that performs arithmetic. Our implementation using Llama 3 with OccamNet as a symbolic model (OccamLlama) achieves 100\\% accuracy on single arithmetic operations ($+,-,\\times,\\div,\\sin{},\\cos{},\\log{},\\exp{},\\sqrt{}$), outperforming GPT 4o with and without a code interpreter. Furthermore, OccamLlama outperforms GPT 4o with and without a code interpreter on average across a range of mathematical problem solving benchmarks, demonstrating that OccamLLMs can excel in arithmetic tasks, even surpassing much larger models. Code is available at https://github.com/druidowm/OccamLLM.", "keywords": "LLM;Language Model;Arithmetic;OccamNet;Llama", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Owen M Dugan;Donato M. Jim\u00e9nez Benet\u00f3;Charlotte Loh;Zhuo Chen;Rumen Dangovski;Marin Soljacic", "authorids": "~Owen_M_Dugan1;~Donato_M._Jim\u00e9nez_Benet\u00f31;~Charlotte_Loh1;~Zhuo_Chen8;~Rumen_Dangovski1;~Marin_Soljacic1", "gender": "M;M;F;;M;", "homepage": ";;;;http://super-ms.mit.edu/rumen.html;https://www.rle.mit.edu/marin/", "dblp": ";;217/6481;;207/8546;131/2044", "google_scholar": "1VvL1cgAAAAJ;;https://scholar.google.com/citations?hl=en;;;", "orcid": ";;;;;", "linkedin": ";https://es.linkedin.com/in/donato-jim%C3%A9nez-benet%C3%B3-09bb241b6;;;;", "or_profile": "~Owen_M_Dugan1;~Donato_M._Jim\u00e9nez_Benet\u00f31;~Charlotte_Loh1;~Zhuo_Chen8;~Rumen_Dangovski1;~Marin_Soljacic1", "aff": "Massachusetts Institute of Technology;Universitat Polit\u00e8cnica de Catalunya ;Massachusetts Institute of Technology;;Massachusetts Institute of Technology;", "aff_domain": "mit.edu;upc.ed;mit.edu;;mit.edu;", "position": "Undergrad student;Undergrad student;PhD student;;PhD student;", "bibtex": "@inproceedings{\ndugan2024occamllm,\ntitle={Occam{LLM}: Fast and Exact Language Model Arithmetic in a Single Step},\nauthor={Owen M Dugan and Donato M. Jim{\\'e}nez Benet{\\'o} and Charlotte Loh and Zhuo Chen and Rumen Dangovski and Marin Soljacic},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=vAOgaPvgYr}\n}", "github": "", "reviewers": "Z5Za;GTF8;r8sc;bzKn;JtJz", "pdf_size": 2761693, "rating": "3;4;6;6;7", "confidence": "4;3;3;3;3", "soundness": "2;3;3;3;3", "novelty": "2;2;3;3;3", "presentation": "4;4;4;3;3", "wc_summary": "64;82;172;142;44", "wc_strengths": "85;60;122;29;85", "wc_weaknesses": "205;231;132;55;39", "wc_questions": "46;35;54;17;62", "wc_limitations": "23;3;6;10;57", "wc_review": "423;411;486;253;287", "wc_reply_reviewers": "540;142;193;61;312", "wc_reply_authors": "4353;2201;2950;1139;2558", "reply_reviewers": "1;1;1;1;3", "reply_authors": "7;5;7;4;7", "rating_avg": [ 5.2, 1.469693845669907 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.6, 0.4898979485566356 ], "wc_summary_avg": [ 100.8, 48.37520025798343 ], "wc_strengths_avg": [ 76.2, 30.798701271319867 ], "wc_weaknesses_avg": [ 132.4, 77.08073689320828 ], "wc_questions_avg": [ 42.8, 15.68948692596415 ], "wc_limitations_avg": [ 19.8, 19.81312696168881 ], "wc_review_avg": [ 372.0, 87.75420217858516 ], "wc_reply_reviewers_avg": [ 249.6, 166.47954829347657 ], "wc_reply_authors_avg": [ 2640.2, 1047.1785712093233 ], "reply_reviewers_avg": [ 1.4, 0.8 ], "reply_authors_avg": [ 6.0, 1.2649110640673518 ], "replies_avg": [ 45, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.7484551991837489, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:w7t-9C-d3SkJ:scholar.google.com/&scioq=OccamLLM:+Fast+and+Exact+Language+Model+Arithmetic+in+a+Single+Step&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": "mit.edu;upc.ed;mit.edu;;mit.edu;", "author_num": 6, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Massachusetts Institute of Technology;Universitat Polit\u00e8cnica de Catalunya", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.upc.edu", "aff_unique_abbr": "MIT;UPC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;Spain" }, { "title": "Going Beyond Heuristics by Imposing Policy Improvement as a Constraint", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93220", "id": "vBGMbFgvsX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=vBGMbFgvsX", "openreview": "https://openreview.net/forum?id=vBGMbFgvsX", "poster": "", "project": "", "author_site": "Chi-Chang Lee, Zhang-Wei Hong, Pulkit Agrawal", "tldr": "", "abstract": "In many reinforcement learning (RL) applications, incorporating heuristic rewards alongside the task reward is crucial for achieving desirable performance. Heuristics encode prior human knowledge about how a task should be done, providing valuable hints for RL algorithms. However, such hints may not be optimal, limiting the performance of learned policies. \nThe currently established way of using heuristics is to modify the heuristic reward in a manner that ensures that the optimal policy learned with it remains the same as the optimal policy for the task reward (i.e., optimal policy invariance). \nHowever, these methods often fail in practical scenarios with limited training data. We found that while optimal policy invariance ensures convergence to the best policy based on task rewards, it doesn't guarantee better performance than policies trained with biased heuristics under a finite data regime, which is impractical. In this paper, we introduce a new principle tailored for finite data settings. Instead of enforcing optimal policy invariance, we train a policy that combines task and heuristic rewards and ensures it outperforms the heuristic-trained policy. As such, we prevent policies from merely exploiting heuristic rewards without improving the task reward. Our experiments on robotic locomotion, helicopter control, and manipulation tasks demonstrate that our method consistently outperforms the heuristic policy, regardless of the heuristic rewards' quality.\nCode is available at https://github.com/Improbable-AI/hepo.", "keywords": "Deep reinforcement learning", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/55b8dcc3d7045b9166063c32638f97a974040774.zip", "author": "Chi-Chang Lee;Zhang-Wei Hong;Pulkit Agrawal", "authorids": "~Chi-Chang_Lee1;~Zhang-Wei_Hong1;~Pulkit_Agrawal1", "gender": "M;M;M", "homepage": "https://bio-asplab.citi.sinica.edu.tw/Lab-eng.html;;https://people.eecs.berkeley.edu/~pulkitag/", "dblp": ";198/0600;149/2672", "google_scholar": ";GZkyN4cAAAAJ;UpZmJI0AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Chi-Chang_Lee1;~Zhang-Wei_Hong1;~Pulkit_Agrawal1", "aff": "National Taiwan University;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "ntu.edu.tw;mit.edu;mit.edu", "position": "MS student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nlee2024going,\ntitle={Going Beyond Heuristics by Imposing Policy Improvement as a Constraint},\nauthor={Chi-Chang Lee and Zhang-Wei Hong and Pulkit Agrawal},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=vBGMbFgvsX}\n}", "github": "", "reviewers": "inih;P4zd;15cn", "pdf_size": 936404, "rating": "6;7;7", "confidence": "3;4;3", "soundness": "3;4;3", "novelty": "2;3;3", "presentation": "3;4;3", "wc_summary": "63;151;51", "wc_strengths": "47;135;92", "wc_weaknesses": "170;230;49", "wc_questions": "70;83;60", "wc_limitations": "12;27;88", "wc_review": "362;626;340", "wc_reply_reviewers": "10;52;10", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 88.33333333333333, 44.58200932613464 ], "wc_strengths_avg": [ 91.33333333333333, 35.92894221778438 ], "wc_weaknesses_avg": [ 149.66666666666666, 75.2787412812468 ], "wc_questions_avg": [ 71.0, 9.41629792788369 ], "wc_limitations_avg": [ 42.333333333333336, 32.86673427984932 ], "wc_review_avg": [ 442.6666666666667, 129.9469977422419 ], "wc_reply_reviewers_avg": [ 24.0, 19.79898987322333 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:cXtsXMWvSHgJ:scholar.google.com/&scioq=Going+Beyond+Heuristics+by+Imposing+Policy+Improvement+as+a+Constraint&hl=en&as_sdt=0,33", "gs_version_total": 2, "email": "ntu.edu.tw;mit.edu;mit.edu", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "National Taiwan University;Massachusetts Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.ntu.edu.tw;https://web.mit.edu", "aff_unique_abbr": "NTU;MIT", "aff_campus_unique_index": "0", "aff_campus_unique": "Taiwan;", "aff_country_unique_index": "0;1;1", "aff_country_unique": "China;United States" }, { "title": "HAWK: Learning to Understand Open-World Video Anomalies", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93219", "id": "vBKoEZ1PG3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=vBKoEZ1PG3", "openreview": "https://openreview.net/forum?id=vBKoEZ1PG3", "poster": "", "project": "", "author_site": "Jiaqi Tang, Hao LU, RUIZHENG WU, Xiaogang Xu, Ke Ma, Cheng Fang, Bin Guo, Jiangbo Lu, Qifeng Chen, Yingcong Chen", "tldr": "", "abstract": "Video Anomaly Detection (VAD) systems can autonomously monitor and identify disturbances, reducing the need for manual labor and associated costs. However, current VAD systems are often limited by their superficial semantic understanding of scenes and minimal user interaction. Additionally, the prevalent data scarcity in existing datasets restricts their applicability in open-world scenarios.\nIn this paper, we introduce HAWK, a novel framework that leverages interactive large Visual Language Models (VLM) to interpret video anomalies precisely. Recognizing the difference in motion information between abnormal and normal videos, HAWK explicitly integrates motion modality to enhance anomaly identification. To reinforce motion attention, we construct an auxiliary consistency loss within the motion and video space, guiding the video branch to focus on the motion modality. Moreover, to improve the interpretation of motion-to-language, we establish a clear supervisory relationship between motion and its linguistic representation. Furthermore, we have annotated over 8,000 anomaly videos with language descriptions, enabling effective training across diverse open-world scenarios, and also created 8,000 question-answering pairs for users' open-world questions. The final results demonstrate that HAWK achieves SOTA performance, surpassing existing baselines in both video description generation and question-answering. Our codes/dataset/demo will be released at https://github.com/jqtangust/hawk.", "keywords": "Video Anomalies Understanding", "primary_area": "machine_vision", "supplementary_material": "", "author": "Jiaqi Tang;Hao LU;RUIZHENG WU;Xiaogang Xu;Ke Ma;Cheng Fang;Bin Guo;Jiangbo Lu;Qifeng Chen;Ying-Cong Chen", "authorids": "~Jiaqi_Tang1;~Hao_LU8;~RUIZHENG_WU1;~Xiaogang_Xu2;~Ke_Ma11;~Cheng_Fang6;~Bin_Guo3;~Jiangbo_Lu1;~Qifeng_Chen1;~Ying-Cong_Chen1", "gender": "M;M;M;M;M;M;M;;M;M", "homepage": "https://jqt.me;https://scholar.google.com/citations?user=OrbGCGkAAAAJ&hl=zh-TW;;https://xiaogang00.github.io;;;http://www.guob.org;https://sites.google.com/site/jiangbolu/;http://cqf.io/;https://www.yingcong.me/", "dblp": "121/7086-5;72/5422-9;244/2111;118/2268-2;;;;77/6697;117/4819;137/6578", "google_scholar": "https://scholar.google.com.hk/citations?user=dl5CsIUAAAAJ;;https://scholar.google.com/citations?hl=en;https://scholar.google.com.hk/citations?user=R65xDQwAAAAJ;yXGNGS8AAAAJ;;VC6N2iQAAAAJ;;lLMX9hcAAAAJ;https://scholar.google.com.hk/citations?user=n7j4bJUAAAAJ", "orcid": "0009-0003-1251-0825;0000-0002-2241-6598;;0000-0002-7928-7336;0000-0002-1663-9955;;;;;", "linkedin": "jqtnpu;;RuizhengWu;;;;;;;", "or_profile": "~Jiaqi_Tang1;~Hao_LU8;~RUIZHENG_WU1;~Xiaogang_Xu2;~Ke_Ma11;~Cheng_Fang6;~Bin_Guo3;~Jiangbo_Lu1;~Qifeng_Chen1;~Ying-Cong_Chen1", "aff": "The Hong Kong University of Science and Technology (Guangzhou);Hong Kong University of Science and Technology;Smartmore Technology;Zhejiang University;Northwest Polytechnical University Xi'an;Northwestern Polytechnical University, Northwest Polytechnical University Xi'an;Northwestern Polytechnical University;SmartMore Corporation;Hong Kong University of Science and Technology;Hong Kong University of Science and Technology", "aff_domain": "connect.hkust-gz.edu.cn;hkust.edu;smartmore.com;zju.edu.cn;nwpu.edu.cn;mai.nwpu.edu.cn;nwpu.edu.cn;smartmore.com;hkust.edu;hkust-gz.edu.cn", "position": "MS student;PhD student;Researcher;Assistant Professor;PhD student;MS student;Full Professor;Chief Technology Officer;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\ntang2024hawk,\ntitle={{HAWK}: Learning to Understand Open-World Video Anomalies},\nauthor={Jiaqi Tang and Hao LU and RUIZHENG WU and Xiaogang Xu and Ke Ma and Cheng Fang and Bin Guo and Jiangbo Lu and Qifeng Chen and Ying-Cong Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=vBKoEZ1PG3}\n}", "github": "", "reviewers": "b4dX;Cy7X;K7jv;kzaw", "pdf_size": 2090713, "rating": "3;6;6;7", "confidence": "4;5;4;5", "soundness": "2;3;4;4", "novelty": "2;3;3;4", "presentation": "3;3;4;4", "wc_summary": "65;96;67;66", "wc_strengths": "37;72;38;84", "wc_weaknesses": "74;245;167;70", "wc_questions": "3;3;37;4", "wc_limitations": "3;9;13;12", "wc_review": "182;425;322;236", "wc_reply_reviewers": "438;29;85;32", "wc_reply_authors": "957;0;0;0", "reply_reviewers": "3;1;1;1", "reply_authors": "3;1;1;1", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 73.5, 13.009611831257688 ], "wc_strengths_avg": [ 57.75, 20.69269194667528 ], "wc_weaknesses_avg": [ 139.0, 72.46723397508697 ], "wc_questions_avg": [ 11.75, 14.58380951603524 ], "wc_limitations_avg": [ 9.25, 3.897114317029974 ], "wc_review_avg": [ 291.25, 91.95481227211548 ], "wc_reply_reviewers_avg": [ 146.0, 170.05146279876573 ], "wc_reply_authors_avg": [ 239.25, 414.3931557108539 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.6666666666666667, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9631667593122067309&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "connect.hkust-gz.edu.cn;hkust.edu;smartmore.com;zju.edu.cn;nwpu.edu.cn;mai.nwpu.edu.cn;nwpu.edu.cn;smartmore.com;hkust.edu;hkust-gz.edu.cn", "author_num": 10, "aff_unique_index": "0;0;1;2;3;4;4;5;0;0", "aff_unique_norm": "Hong Kong University of Science and Technology;Smartmore Technology;Zhejiang University;Northwest Polytechnical University;Northwestern Polytechnical University;SmartMore Corporation", "aff_unique_dep": ";;;;;", "aff_unique_url": "https://www.ust.hk;https://www.smartmore.com/;https://www.zju.edu.cn;http://www.nwpu.edu.cn;http://www.nwpu.edu.cn;https://www.smartmore.com/", "aff_unique_abbr": "HKUST;;ZJU;NWPU;NWPU;", "aff_campus_unique_index": "0;1;3;3;1;1", "aff_campus_unique": "Guangzhou;Hong Kong SAR;;Xi'an", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Conformalized Credal Set Predictors", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93218", "id": "vBah12uVbD", "proceeding": "", "pdf": "https://openreview.net/pdf?id=vBah12uVbD", "openreview": "https://openreview.net/forum?id=vBah12uVbD", "poster": "/media/PosterPDFs/NeurIPS%202024/93218.png?t=1733737054.3557365", "project": "", "author_site": "Alireza Javanmardi, David Stutz, Eyke H\u00fcllermeier", "tldr": "", "abstract": "Credal sets are sets of probability distributions that are considered as candidates for an imprecisely known ground-truth distribution. In machine learning, they have recently attracted attention as an appealing formalism for uncertainty representation, in particular, due to their ability to represent both the aleatoric and epistemic uncertainty in a prediction. However, the design of methods for learning credal set predictors remains a challenging problem. In this paper, we make use of conformal prediction for this purpose. More specifically, we propose a method for predicting credal sets in the classification task, given training data labeled by probability distributions. Since our method inherits the coverage guarantees of conformal prediction, our conformal credal sets are guaranteed to be valid with high probability (without any assumptions on model or distribution). We demonstrate the applicability of our method on ambiguous classification tasks for uncertainty quantification.", "keywords": "Conformal Prediction;Credal Sets;Imprecise Probabilities;Uncertainty Representation;Uncertainty Quantification", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Alireza Javanmardi;David Stutz;Eyke H\u00fcllermeier", "authorids": "~Alireza_Javanmardi2;~David_Stutz1;~Eyke_H\u00fcllermeier1", "gender": "M;M;M", "homepage": "https://www.kiml.ifi.lmu.de/people/employees/javanmardi/index.html;http://davidstutz.de/;https://cs.uni-paderborn.de/index.php?id=60202", "dblp": "295/4647;17/9394;h/EykeHullermeier", "google_scholar": "3A0fiv0AAAAJ;TxEy3cwAAAAJ;https://scholar.google.de/citations?user=usVJeNN3xFAC", "orcid": "0000-0002-4901-5989;;0000-0002-9944-4108", "linkedin": ";davidstutz92/;", "or_profile": "~Alireza_Javanmardi2;~David_Stutz1;~Eyke_H\u00fcllermeier1", "aff": "Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;Google DeepMind;Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen", "aff_domain": "lmu.de;deepmind.com;lmu.de", "position": "PhD student;Researcher;Full Professor", "bibtex": "@inproceedings{\njavanmardi2024conformalized,\ntitle={Conformalized Credal Set Predictors},\nauthor={Alireza Javanmardi and David Stutz and Eyke H{\\\"u}llermeier},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=vBah12uVbD}\n}", "github": "", "reviewers": "gqZt;LLVn;ieV3;wGcP", "pdf_size": 3029047, "rating": "3;4;5;8", "confidence": "4;4;4;4", "soundness": "2;2;3;4", "novelty": "2;2;2;4", "presentation": "2;3;4;3", "wc_summary": "54;42;65;70", "wc_strengths": "29;29;58;48", "wc_weaknesses": "262;178;116;4", "wc_questions": "124;251;54;262", "wc_limitations": "16;32;32;12", "wc_review": "485;532;325;396", "wc_reply_reviewers": "614;26;82;551", "wc_reply_authors": "557;43;453;35", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;3;2", "rating_avg": [ 5.0, 1.8708286933869707 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 57.75, 10.779030568655049 ], "wc_strengths_avg": [ 41.0, 12.509996003196804 ], "wc_weaknesses_avg": [ 140.0, 94.0744386111339 ], "wc_questions_avg": [ 172.75, 87.41674610736779 ], "wc_limitations_avg": [ 23.0, 9.1104335791443 ], "wc_review_avg": [ 434.5, 79.8889854735933 ], "wc_reply_reviewers_avg": [ 318.25, 265.9251539437366 ], "wc_reply_authors_avg": [ 272.0, 235.90040271267026 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12909513350879824301&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "lmu.de;deepmind.com;lmu.de", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;Google", "aff_unique_dep": ";Google DeepMind", "aff_unique_url": "https://www.lmu.de;https://deepmind.com", "aff_unique_abbr": "LMU;DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Germany;United Kingdom" }, { "title": "Optimal deep learning of holomorphic operators between Banach spaces", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93217", "id": "vBlzen37i0", "proceeding": "", "pdf": "https://openreview.net/pdf?id=vBlzen37i0", "openreview": "https://openreview.net/forum?id=vBlzen37i0", "poster": "/media/PosterPDFs/NeurIPS%202024/93217.png?t=1733521057.388287", "project": "", "author_site": "Ben Adcock, Nick Dexter, Sebastian Moraga Scheuermann", "tldr": "", "abstract": "Operator learning problems arise in many key areas of scientific computing where Partial Differential Equations (PDEs) are used to model physical systems. In such scenarios, the operators map between Banach or Hilbert spaces. In this work, we tackle the problem of learning operators between Banach spaces, in contrast to the vast majority of past works considering only Hilbert spaces. We focus on learning holomorphic operators -- an important class of problems with many applications. We combine arbitrary approximate encoders and decoders with standard feedforward Deep Neural Network (DNN) architectures -- specifically, those with constant width exceeding the depth -- under standard $\\ell^2$-loss minimization. We first identify a family of DNNs such that the resulting Deep Learning (DL) procedure achieves optimal generalization bounds for such operators. For standard fully-connected architectures, we then show that there are uncountably many minimizers of the training problem that yield equivalent optimal performance. The DNN architectures we consider are `problem agnostic', with width and depth only depending on the amount of training data $m$ and not on regularity assumptions of the target operator. Next, we show that DL is optimal for this problem: no recovery procedure can surpass these generalization bounds up to log terms. Finally, we present numerical results demonstrating the practical performance on challenging problems including the parametric diffusion, Navier-Stokes-Brinkman and Boussinesq PDEs.", "keywords": "Deep learning;operator learning;parametric PDEs;deep neural networks;generalization error;optimal algorithms", "primary_area": "learning_theory", "supplementary_material": "/attachment/81180756a6d60d46dec1042d10fa35052948c1a6.zip", "author": "Ben Adcock;Nick Dexter;Sebastian Moraga", "authorids": "~Ben_Adcock1;~Nick_Dexter1;~Sebastian_Moraga1", "gender": ";M;M", "homepage": ";https://sites.google.com/view/ndexter;https://sites.google.com/view/sebanthalas", "dblp": ";256/9590;", "google_scholar": ";Sgso_3QAAAAJ;", "orcid": ";0000-0002-2418-4735;0000-0001-9886-5712", "linkedin": ";https://www.linkedin.com/mwlite/in/nick-dexter-7139409;https://www.linkedin.com/feed/", "or_profile": "~Ben_Adcock1;~Nick_Dexter1;~Sebastian_Moraga1", "aff": ";Florida State University;Simon Fraser University", "aff_domain": ";fsu.edu;sfu.ca", "position": ";Assistant Professor;PhD student", "bibtex": "@inproceedings{\nadcock2024optimal,\ntitle={Optimal deep learning of holomorphic operators between Banach spaces},\nauthor={Ben Adcock and Nick Dexter and Sebastian Moraga},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=vBlzen37i0}\n}", "github": "", "reviewers": "ZpED;4yPo;XKAz", "pdf_size": 4617577, "rating": "7;8;8", "confidence": "4;4;3", "soundness": "4;4;4", "novelty": "4;4;4", "presentation": "3;3;4", "wc_summary": "85;73;137", "wc_strengths": "46;97;67", "wc_weaknesses": "33;24;62", "wc_questions": "2;228;573", "wc_limitations": "11;7;1", "wc_review": "177;429;840", "wc_reply_reviewers": "4;145;0", "wc_reply_authors": "0;78;0", "reply_reviewers": "1;1;0", "reply_authors": "1;2;1", "rating_avg": [ 7.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 4.0, 0.0 ], "novelty_avg": [ 4.0, 0.0 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 98.33333333333333, 27.776888874666213 ], "wc_strengths_avg": [ 70.0, 20.92844953645635 ], "wc_weaknesses_avg": [ 39.666666666666664, 16.21384867602041 ], "wc_questions_avg": [ 267.6666666666667, 234.79116015916975 ], "wc_limitations_avg": [ 6.333333333333333, 4.109609335312651 ], "wc_review_avg": [ 482.0, 273.25080054777516 ], "wc_reply_reviewers_avg": [ 49.666666666666664, 67.43062278289361 ], "wc_reply_authors_avg": [ 26.0, 36.76955262170047 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2176076788926110332&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": ";fsu.edu;sfu.ca", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Florida State University;Simon Fraser University", "aff_unique_dep": ";", "aff_unique_url": "https://www.fsu.edu;https://www.sfu.ca", "aff_unique_abbr": "FSU;SFU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;Canada" }, { "title": "2D-OOB: Attributing Data Contribution Through Joint Valuation Framework", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93216", "id": "vBxeeH1X4y", "proceeding": "", "pdf": "https://openreview.net/pdf?id=vBxeeH1X4y", "openreview": "https://openreview.net/forum?id=vBxeeH1X4y", "poster": "/media/PosterPDFs/NeurIPS%202024/93216.png?t=1732674433.2473464", "project": "", "author_site": "Yifan Sun, Jingyan Shen, Yongchan Kwon", "tldr": "", "abstract": "Data valuation has emerged as a powerful framework for quantifying each datum's contribution to the training of a machine learning model. However, it is crucial to recognize that the quality of cells within a single data point can vary greatly in practice. For example, even in the case of an abnormal data point, not all cells are necessarily noisy. The single scalar score assigned by existing data valuation methods blurs the distinction between noisy and clean cells of a data point, making it challenging to interpret the data values. In this paper, we propose 2D-OOB, an out-of-bag estimation framework for jointly determining helpful (or detrimental) samples as well as the particular cells that drive them. Our comprehensive experiments demonstrate that 2D-OOB achieves state-of-the-art performance across multiple use cases while being exponentially faster. Specifically, 2D-OOB shows promising results in detecting and rectifying fine-grained outliers at the cell level, and localizing backdoor triggers in data poisoning attacks.", "keywords": "Data valuation;Cell-level attribution;Outlier detection", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Yifan Sun;Jingyan Shen;Yongchan Kwon", "authorids": "~Yifan_Sun8;~Jingyan_Shen1;~Yongchan_Kwon1", "gender": "M;F;", "homepage": "https://yifansun99.github.io/;;", "dblp": "99/10261-10.html;;", "google_scholar": ";;", "orcid": ";;", "linkedin": ";jyshen;", "or_profile": "~Yifan_Sun8;~Jingyan_Shen1;~Yongchan_Kwon1", "aff": "Tsinghua University;Tsinghua University;", "aff_domain": "mail.tsinghua.edu.cn;mail.tsinghua.edu.cn;", "position": "MS student;MS student;", "bibtex": "@inproceedings{\nsun2024doob,\ntitle={2D-{OOB}: Attributing Data Contribution Through Joint Valuation Framework},\nauthor={Yifan Sun and Jingyan Shen and Yongchan Kwon},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=vBxeeH1X4y}\n}", "github": "", "reviewers": "nD8c;6nYm;ECZV;4K7L", "pdf_size": 947272, "rating": "5;5;6;6", "confidence": "3;2;4;5", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "3;3;3;3", "wc_summary": "138;35;61;56", "wc_strengths": "55;51;36;168", "wc_weaknesses": "159;23;132;115", "wc_questions": "149;19;2;121", "wc_limitations": "1;1;1;1", "wc_review": "502;129;232;461", "wc_reply_reviewers": "28;0;26;26", "wc_reply_authors": "33;0;37;14", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;2;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 72.5, 39.05444917035702 ], "wc_strengths_avg": [ 77.5, 52.72807601268986 ], "wc_weaknesses_avg": [ 107.25, 51.109563684304725 ], "wc_questions_avg": [ 72.75, 63.318145108649546 ], "wc_limitations_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 331.0, 155.52009516457994 ], "wc_reply_reviewers_avg": [ 20.0, 11.575836902790225 ], "wc_reply_authors_avg": [ 21.0, 14.916433890176299 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8944271909999159, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:uSLihujtJW0J:scholar.google.com/&scioq=2D-OOB:+Attributing+Data+Contribution+Through+Joint+Valuation+Framework&hl=en&as_sdt=0,7", "gs_version_total": 3, "email": "mail.tsinghua.edu.cn;mail.tsinghua.edu.cn;", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Unveiling the Bias Impact on Symmetric Moral Consistency of Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93215", "id": "vCIc9BXzze", "proceeding": "", "pdf": "https://openreview.net/pdf?id=vCIc9BXzze", "openreview": "https://openreview.net/forum?id=vCIc9BXzze", "poster": "/media/PosterPDFs/NeurIPS%202024/93215.png?t=1731576551.0899515", "project": "", "author_site": "Ziyi Zhou, Xinwei Guo, Jiashi Gao, Xiangyu Zhao, Shiyao Zhang, Xin Yao, Xuetao Wei", "tldr": "", "abstract": "Large Language Models (LLMs) have demonstrated remarkable capabilities, surpassing human experts in various benchmark tests and playing a vital role in various industry sectors. Despite their effectiveness, a notable drawback of LLMs is their inconsistent moral behavior, which raises ethical concerns. This work delves into symmetric moral consistency in large language models and demonstrates that modern LLMs lack sufficient consistency ability in moral scenarios. Our extensive investigation of twelve popular LLMs reveals that their assessed consistency scores are influenced by position bias and selection bias rather than their intrinsic abilities. We propose a new framework tSMC, which gauges the effects of these biases and effectively mitigates the bias impact based on the Kullback\u2013Leibler divergence to pinpoint LLMs' mitigated Symmetric Moral Consistency. We find that the ability of LLMs to maintain consistency varies across different moral scenarios. Specifically, LLMs show more consistency in scenarios with clear moral answers compared to those where no choice is morally perfect. The average consistency score of 12 LLMs ranges from $60.7\\%$ in high-ambiguity moral scenarios to $84.8\\%$ in low-ambiguity moral scenarios.", "keywords": "Large Language Model;Moral Consistency;Evaluation;Ethics", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/c0175cacff3d23b0a19c97bed89b98a5abe896c6.zip", "author": "Ziyi Zhou;Xinwei Guo;Jiashi Gao;Xiangyu Zhao;Shiyao Zhang;Xin Yao;Xuetao Wei", "authorids": "~Ziyi_Zhou4;~Xinwei_Guo2;~Jiashi_Gao1;~Xiangyu_Zhao1;~Shiyao_Zhang1;~Xin_Yao1;~Xuetao_Wei2", "gender": "M;Not Specified;F;M;M;;M", "homepage": "https://github.com/zhouziyiaichifeiniu;;;https://zhaoxyai.github.io/;;http://www.cs.bham.ac.uk/~xin;https://cse.sustech.edu.cn/faculty/~weixt/", "dblp": ";;221/1810;08/890-1.html;;;09/5916", "google_scholar": ";38RY290AAAAJ;;;Pjh4rVAAAAAJ;;8fNwEScAAAAJ", "orcid": ";0000-0002-7973-5565;;0000-0003-2926-4416;;;0000-0002-4450-2251", "linkedin": ";;;;;;", "or_profile": "~Ziyi_Zhou4;~Xinwei_Guo2;~Jiashi_Gao1;~Xiangyu_Zhao1;~Shiyao_Zhang1;~Xin_Yao1;~Xuetao_Wei2", "aff": "Southern University of Science and Technology;Southern University of Science and Technology;Southern University of Science and Technology;City University of Hong Kong;Southern University of Science and Technology;;Southern University of Science and Technology", "aff_domain": "sustech.edu;sustech.edu.cn;sustech.edu.cn;cityu.edu.hk;sustech.edu.cn;;sustech.edu.cn", "position": "Undergrad student;PhD student;PhD student;Assistant Professor;Assistant Professor;;Associate Professor", "bibtex": "@inproceedings{\nzhou2024unveiling,\ntitle={Unveiling the Bias Impact on Symmetric Moral Consistency of Large Language Models},\nauthor={Ziyi Zhou and Xinwei Guo and Jiashi Gao and Xiangyu Zhao and Shiyao Zhang and Xin Yao and Xuetao Wei},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=vCIc9BXzze}\n}", "github": "", "reviewers": "FDgK;SWHZ;EVZc;7hT9;QyMp", "pdf_size": 580900, "rating": "4;5;5;8;8", "confidence": "2;4;4;3;3", "soundness": "2;2;3;3;3", "novelty": "2;2;3;3;3", "presentation": "1;3;2;3;4", "wc_summary": "76;59;74;37;61", "wc_strengths": "44;22;135;45;89", "wc_weaknesses": "330;457;44;11;59", "wc_questions": "72;36;1;77;2", "wc_limitations": "27;1;1;5;16", "wc_review": "549;575;255;175;227", "wc_reply_reviewers": "156;0;0;30;32", "wc_reply_authors": "432;120;120;0;0", "reply_reviewers": "1;0;0;1;1", "reply_authors": "3;3;3;1;1", "rating_avg": [ 6.0, 1.6733200530681511 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 2.6, 0.4898979485566356 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.6, 1.019803902718557 ], "wc_summary_avg": [ 61.4, 13.95134402127623 ], "wc_strengths_avg": [ 67.0, 40.36334971233185 ], "wc_weaknesses_avg": [ 180.2, 179.40278704635557 ], "wc_questions_avg": [ 37.6, 32.69617714657174 ], "wc_limitations_avg": [ 10.0, 10.119288512538814 ], "wc_review_avg": [ 356.2, 170.1839005311607 ], "wc_reply_reviewers_avg": [ 43.6, 57.88816804840174 ], "wc_reply_authors_avg": [ 134.4, 158.18166771152718 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 2.2, 0.9797958971132712 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:b7WsxKv8O4IJ:scholar.google.com/&scioq=Unveiling+the+Bias+Impact+on+Symmetric+Moral+Consistency+of+Large+Language+Models&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "sustech.edu;sustech.edu.cn;sustech.edu.cn;cityu.edu.hk;sustech.edu.cn;;sustech.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;1;0;0", "aff_unique_norm": "Southern University of Science and Technology;City University of Hong Kong", "aff_unique_dep": ";", "aff_unique_url": "https://www.sustech.edu.cn;https://www.cityu.edu.hk", "aff_unique_abbr": "SUSTech;CityU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Direct3D: Scalable Image-to-3D Generation via 3D Latent Diffusion Transformer", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93214", "id": "vCOgjBIZuL", "proceeding": "", "pdf": "https://openreview.net/pdf?id=vCOgjBIZuL", "openreview": "https://openreview.net/forum?id=vCOgjBIZuL", "poster": "/media/PosterPDFs/NeurIPS%202024/93214.png?t=1731741573.5440247", "project": "", "author_site": "Wu Shuang, Youtian Lin, Yifei Zeng, Feihu Zhang, Jingxi Xu, Philip Torr, Xun Cao, Yao Yao", "tldr": "", "abstract": "Generating high-quality 3D assets from text and images has long been challenging, primarily due to the absence of scalable 3D representations capable of capturing intricate geometry distributions. In this work, we introduce Direct3D, a native 3D generative model scalable to in-the-wild input images, without requiring a multi-view diffusion model or SDS optimization. Our approach comprises two primary components: a Direct 3D Variational Auto-Encoder (D3D-VAE) and a Direct 3D Diffusion Transformer (D3D-DiT). D3D-VAE efficiently encodes high-resolution 3D shapes into a compact and continuous latent triplane space. Notably, our method directly supervises the decoded geometry using a semi-continuous surface sampling strategy, diverging from previous methods relying on rendered images as supervision signals. D3D-DiT models the distribution of encoded 3D latents and is specifically designed to fuse positional information from the three feature maps of the triplane latent, enabling a native 3D generative model scalable to large-scale 3D datasets. Additionally, we introduce an innovative image-to-3D generation pipeline incorporating semantic and pixel-level image conditions, allowing the model to produce 3D shapes consistent with the provided conditional image input. Extensive experiments demonstrate the superiority of our large-scale pre-trained Direct3D over previous image-to-3D approaches, achieving significantly better generation quality and generalization ability, thus establishing a new state-of-the-art for 3D content creation. Project page: https://www.neural4d.com/research/direct3d.", "keywords": "3D Generation;Diffsion Model", "primary_area": "generative_models", "supplementary_material": "/attachment/ef348c73d41cd09b5eb26862dc6174b56f1b3629.zip", "author": "Shuang Wu;Youtian Lin;Yifei Zeng;Feihu Zhang;Jingxi Xu;Philip Torr;Xun Cao;Yao Yao", "authorids": "~Shuang_Wu14;~Youtian_Lin1;~Yifei_Zeng1;~Feihu_Zhang3;~Jingxi_Xu2;~Philip_Torr1;~Xun_Cao1;~Yao_Yao1", "gender": "M;M;M;M;M;;M;M", "homepage": "https://scholar.google.it/citations?hl=zh-CN&user=SN8J78EAAAAJ;https://linyou.github.io;https://github.com/zeng-yifei;;http://xjx.name/;http://www.robots.ox.ac.uk/~tvg/;http://cite.nju.edu.cn;https://yoyo000.github.io/", "dblp": ";253/3893;223/2832;120/0587;33/10762.html;;78/7658;07/4410-8", "google_scholar": "https://scholar.google.it/citations?hl=zh-CN;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?view_op=list_works;nWT4vFcAAAAJ;;;8hZIngIAAAAJ;MGxaDVEAAAAJ", "orcid": ";0000-0002-9179-9406;0009-0000-8270-8333;;0000-0002-6262-4199;;;", "linkedin": ";;;;;;;", "or_profile": "~Shuang_Wu14;~Youtian_Lin1;~Yifei_Zeng1;~Feihu_Zhang3;~Jingxi_Xu2;~Philip_Torr1;~Xun_Cao1;~Yao_Yao1", "aff": "Nanjing University;VAST;Nanjing University;DreamTech;DreamTech;University of Oxford;Nanjing University;Nanjing University", "aff_domain": "smail.nju.edu.cn;vastai3d.com;nju.edu.cn;dreamtech.ai;neural4d.com;ox.ac.uk;nju.edu.cn;nju.edu.cn", "position": "PhD student;Researcher;MS student;Instructor;Researcher;Full Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nwu2024directd,\ntitle={Direct3D: Scalable Image-to-3D Generation via 3D Latent Diffusion Transformer},\nauthor={Shuang Wu and Youtian Lin and Yifei Zeng and Feihu Zhang and Jingxi Xu and Philip Torr and Xun Cao and Yao Yao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=vCOgjBIZuL}\n}", "github": "", "reviewers": "yUww;3ERi;U1iF;FNua;HuD3", "pdf_size": 14877960, "rating": "4;4;5;6;6", "confidence": "5;5;5;4;5", "soundness": "3;3;3;3;4", "novelty": "2;3;3;3;4", "presentation": "3;3;3;3;4", "wc_summary": "31;163;117;71;79", "wc_strengths": "19;103;56;39;74", "wc_weaknesses": "276;203;183;121;125", "wc_questions": "21;97;220;3;9", "wc_limitations": "4;46;1;1;101", "wc_review": "351;612;577;235;388", "wc_reply_reviewers": "746;110;30;0;61", "wc_reply_authors": "641;92;36;0;11", "reply_reviewers": "3;1;1;0;2", "reply_authors": "3;2;2;1;2", "rating_avg": [ 5.0, 0.8944271909999159 ], "confidence_avg": [ 4.8, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 92.2, 44.71420355994278 ], "wc_strengths_avg": [ 58.2, 28.86797533600166 ], "wc_weaknesses_avg": [ 181.6, 57.00385951845717 ], "wc_questions_avg": [ 70.0, 82.26785520481253 ], "wc_limitations_avg": [ 30.6, 39.123394535750606 ], "wc_review_avg": [ 432.6, 141.9360419343868 ], "wc_reply_reviewers_avg": [ 189.4, 280.6689152720693 ], "wc_reply_authors_avg": [ 156.0, 244.5739152076525 ], "reply_reviewers_avg": [ 1.4, 1.019803902718557 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.5590169943749475, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5892606022845768340&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 5, "email": "smail.nju.edu.cn;vastai3d.com;nju.edu.cn;dreamtech.ai;neural4d.com;ox.ac.uk;nju.edu.cn;nju.edu.cn", "author_num": 8, "aff_unique_index": "0;1;0;2;2;3;0;0", "aff_unique_norm": "Nanjing University;VAST;DreamTech;University of Oxford", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.nju.edu.cn;;;https://www.ox.ac.uk", "aff_unique_abbr": "Nanjing U;;;Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;2;0;0", "aff_country_unique": "China;;United Kingdom" }, { "title": "The Space Complexity of Approximating Logistic Loss", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93213", "id": "vDlj3veE9a", "proceeding": "", "pdf": "https://openreview.net/pdf?id=vDlj3veE9a", "openreview": "https://openreview.net/forum?id=vDlj3veE9a", "poster": "", "project": "", "author_site": "Gregory Dexter, Petros Drineas, Rajiv Khanna", "tldr": "", "abstract": "We provide space complexity lower bounds for data structures that approximate logistic loss up to $\\epsilon$-relative error on a logistic regression problem with data $\\mathbf{X} \\in \\mathbb{R}^{n \\times d}$ and labels $\\mathbf{y} \\in \\\\{-1,1\\\\}^d$. The space complexity of existing coreset constructions depend on a natural complexity measure $\\mu_\\mathbf{y}(\\mathbf{X})$. We give an $\\tilde{\\Omega}(\\frac{d}{\\epsilon^2})$ space complexity lower bound in the regime $\\mu_\\mathbf{y}(\\mathbf{X}) = \\mathcal{O}(1)$ that shows existing coresets are optimal in this regime up to lower order factors. We also prove a general $\\tilde{\\Omega}(d\\cdot \\mu_\\mathbf{y}(\\mathbf{X}))$ space lower bound when $\\epsilon$ is constant, showing that the dependency on $\\mu_\\mathbf{y}(\\mathbf{X})$ is not an artifact of mergeable coresets. Finally, we refute a prior conjecture that $\\mu_\\mathbf{y}(\\mathbf{X})$ is hard to compute by providing an efficient linear programming formulation, and we empirically compare our algorithm to prior approximate methods.", "keywords": "logistic regression;data structure;space complexity;approximation algorithm", "primary_area": "optimization", "supplementary_material": "/attachment/3e23d03b67d834f5fc6972a78671f1a59b25f284.zip", "author": "Gregory Dexter;Petros Drineas;Rajiv Khanna", "authorids": "~Gregory_Dexter1;~Petros_Drineas1;~Rajiv_Khanna1", "gender": ";;M", "homepage": "https://www.gregorydexter.com/;https://www.cs.purdue.edu/homes/pdrineas/;http://rjvak7.github.io/", "dblp": "252/2283;67/1567;31/4624", "google_scholar": "nEIGQ00AAAAJ;https://scholar.google.co.uk/citations?user=Yw2PquQAAAAJ;523w4w8AAAAJ", "orcid": ";;0000-0003-1314-3126", "linkedin": "gregorydexter1;;", "or_profile": "~Gregory_Dexter1;~Petros_Drineas1;~Rajiv_Khanna1", "aff": "Purdue University;Purdue University;Purdue University", "aff_domain": "purdue.edu;purdue.edu;purdue.edu", "position": "PhD student;Professor;Assistant Professor", "bibtex": "@inproceedings{\ndexter2024the,\ntitle={The Space Complexity of Approximating Logistic Loss},\nauthor={Gregory Dexter and Petros Drineas and Rajiv Khanna},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=vDlj3veE9a}\n}", "github": "", "reviewers": "hULF;8cM9;9S9F;iEdU", "pdf_size": 467384, "rating": "5;5;6;7", "confidence": "1;2;4;3", "soundness": "3;3;3;4", "novelty": "3;3;3;4", "presentation": "2;2;3;3", "wc_summary": "146;92;91;57", "wc_strengths": "37;106;25;36", "wc_weaknesses": "10;68;41;32", "wc_questions": "13;1;115;18", "wc_limitations": "1;17;2;1", "wc_review": "207;284;274;144", "wc_reply_reviewers": "6;14;101;15", "wc_reply_authors": "0;0;282;0", "reply_reviewers": "1;1;2;1", "reply_authors": "1;1;2;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 2.5, 1.118033988749895 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 96.5, 31.862987932709636 ], "wc_strengths_avg": [ 51.0, 32.101401838549044 ], "wc_weaknesses_avg": [ 37.75, 20.78911975048487 ], "wc_questions_avg": [ 36.75, 45.598108513402174 ], "wc_limitations_avg": [ 5.25, 6.796138609534093 ], "wc_review_avg": [ 227.25, 56.450752873633135 ], "wc_reply_reviewers_avg": [ 34.0, 38.839412971876904 ], "wc_reply_authors_avg": [ 70.5, 122.10958193360585 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.674199862463242, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12443849538239497286&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "purdue.edu;purdue.edu;purdue.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Purdue University", "aff_unique_dep": "", "aff_unique_url": "https://www.purdue.edu", "aff_unique_abbr": "Purdue", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "RSA: Resolving Scale Ambiguities in Monocular Depth Estimators through Language Descriptions", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93212", "id": "vH7GcaDhAo", "proceeding": "", "pdf": "https://openreview.net/pdf?id=vH7GcaDhAo", "openreview": "https://openreview.net/forum?id=vH7GcaDhAo", "poster": "", "project": "", "author_site": "Ziyao Zeng, Yangchao Wu, Hyoungseob Park, Daniel Wang, Fengyu Yang, Stefano Soatto, DONG LAO, Byung-Woo Hong, Alex Wong", "tldr": "", "abstract": "We propose a method for metric-scale monocular depth estimation. Inferring depth from a single image is an ill-posed problem due to the loss of scale from perspective projection during the image formation process. Any scale chosen is a bias, typically stemming from training on a dataset; hence, existing works have instead opted to use relative (normalized, inverse) depth. Our goal is to recover metric-scaled depth maps through a linear transformation. The crux of our method lies in the observation that certain objects (e.g., cars, trees, street signs) are typically found or associated with certain types of scenes (e.g., outdoor). We explore whether language descriptions can be used to transform relative depth predictions to those in metric scale. Our method, RSA , takes as input a text caption describing objects present in an image and outputs the parameters of a linear transformation which can be applied globally to a relative depth map to yield metric-scaled depth predictions. We demonstrate our method on recent general-purpose monocular depth models on indoors (NYUv2, VOID) and outdoors (KITTI). When trained on multiple datasets, RSA can serve as a general alignment module in zero-shot settings. Our method improves over common practices in aligning relative to metric depth and results in predictions that are comparable to an upper bound of fitting relative depth to ground truth via a linear transformation. Code is available at: https://github.com/Adonis-galaxy/RSA.", "keywords": "Monocular Depth Estimation;Vision-Language Model;Multimodal Learning", "primary_area": "machine_vision", "supplementary_material": "/attachment/145ff85c6d8eb8143719185dac54d9559b372fe0.zip", "author": "Ziyao Zeng;Yangchao Wu;Hyoungseob Park;Daniel Wang;Fengyu Yang;Stefano Soatto;Dong Lao;Byung-Woo Hong;Alex Wong", "authorids": "~Ziyao_Zeng2;~Yangchao_Wu1;~Hyoungseob_Park1;~Daniel_Wang2;~Fengyu_Yang1;~Stefano_Soatto3;~Dong_Lao1;~Byung-Woo_Hong4;~Alex_Wong2", "gender": "M;M;M;M;;M;M;M;", "homepage": "https://adonis-galaxy.github.io/homepage/;https://scholar.google.com/citations?view_op=list_works&hl=en&user=k_h1nbAAAAAJ;;https://preacherwhite.github.io/;https://fredfyyang.github.io/;;http://www.image.cau.ac.kr;https://vision.cs.yale.edu/members/alex-wong/;https://www.cs.ucla.edu/~soatto", "dblp": "306/8074;;268/8125;;129/9492;180/5522;16/3511;39/6537-1;08/1262", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?view_op=list_works;A3c4pHkAAAAJ;;oKb-baEAAAAJ;dvQXYW0AAAAJ;gYk4eAgAAAAJ;K9_XuM8AAAAJ;lH1PdF8AAAAJ", "orcid": ";;0000-0003-0787-2082;;;;;0000-0002-3157-6016;0000-0003-2902-6362", "linkedin": ";;;;;;;;stefano-soatto-5765aa6/", "or_profile": "~Ziyao_Zeng2;~Yangchao_Wu1;~Hyoungseob_Park1;~Daniel_Wang2;~Fengyu_Yang1;~Dong_Lao1;~Byung-Woo_Hong4;~Alex_Wong2;~Stefano_Soatto2", "aff": "Yale University;University of California, Los Angeles;Yale University;Yale University;Yale University;University of California, Los Angeles;Chung-Ang University;Yale University;UCLA Computer Science Department, University of California, Los Angeles", "aff_domain": "yale.edu;ucla.edu;yale.edu;yale.edu;yale.edu;cs.ucla.edu;cau.ac.kr;yale.edu;cs.ucla.edu", "position": "PhD student;PhD student;PhD student;PhD student;PhD student;Postdoc;Full Professor;Assistant Professor;Professor", "bibtex": "@inproceedings{\nzeng2024rsa,\ntitle={{RSA}: Resolving Scale Ambiguities in Monocular Depth Estimators through Language Descriptions},\nauthor={Ziyao Zeng and Yangchao Wu and Hyoungseob Park and Daniel Wang and Fengyu Yang and Stefano Soatto and Dong Lao and Byung-Woo Hong and Alex Wong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=vH7GcaDhAo}\n}", "github": "", "reviewers": "ZAYV;Sd3q;Nu8S;KH1n", "pdf_size": 2534988, "rating": "4;5;6;6", "confidence": "4;4;4;4", "soundness": "3;3;3;1", "novelty": "2;3;3;2", "presentation": "2;3;3;3", "wc_summary": "39;34;100;40", "wc_strengths": "21;48;175;17", "wc_weaknesses": "59;81;268;154", "wc_questions": "20;86;7;2", "wc_limitations": "1;23;1;21", "wc_review": "140;272;551;234", "wc_reply_reviewers": "0;118;70;255", "wc_reply_authors": "0;528;24;634", "reply_reviewers": "0;2;1;1", "reply_authors": "1;3;2;2", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 53.25, 27.086666461563706 ], "wc_strengths_avg": [ 65.25, 64.4762553193034 ], "wc_weaknesses_avg": [ 140.5, 81.57971561607701 ], "wc_questions_avg": [ 28.75, 33.699962907991456 ], "wc_limitations_avg": [ 11.5, 10.523782589924593 ], "wc_review_avg": [ 299.25, 153.08392306182907 ], "wc_reply_reviewers_avg": [ 110.75, 93.25603197648933 ], "wc_reply_authors_avg": [ 296.5, 287.0831761005859 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10872906316605898639&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 3, "email": "yale.edu;ucla.edu;yale.edu;yale.edu;yale.edu;cs.ucla.edu;cau.ac.kr;yale.edu;cs.ucla.edu", "author_num": 9, "aff_unique_index": "0;1;0;0;0;1;2;0;1", "aff_unique_norm": "Yale University;University of California, Los Angeles;Chung-Ang University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.yale.edu;https://www.ucla.edu;http://www.cau.ac.kr", "aff_unique_abbr": "Yale;UCLA;CAU", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;0;0;0;0;0;1;0;0", "aff_country_unique": "United States;South Korea" }, { "title": "Gradient Cuff: Detecting Jailbreak Attacks on Large Language Models by Exploring Refusal Loss Landscapes", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93211", "id": "vI1WqFn15v", "proceeding": "", "pdf": "https://openreview.net/pdf?id=vI1WqFn15v", "openreview": "https://openreview.net/forum?id=vI1WqFn15v", "poster": "/media/PosterPDFs/NeurIPS%202024/93211.png?t=1732073251.2373726", "project": "", "author_site": "Xiaomeng Hu, Pin-Yu Chen, Tsung-Yi Ho", "tldr": "", "abstract": "Large Language Models (LLMs) are becoming a prominent generative AI tool, where the user enters a query and the LLM generates an answer. To reduce harm and misuse, efforts have been made to align these LLMs to human values using advanced training techniques such as Reinforcement Learning from Human Feedback (RLHF). However, recent studies have highlighted the vulnerability of LLMs to adversarial jailbreak attempts aiming at subverting the embedded safety guardrails. To address this challenge, this paper defines and investigates the **Refusal Loss** of LLMs and then proposes a method called **Gradient Cuff** to detect jailbreak attempts. Gradient Cuff exploits the unique properties observed in the refusal loss landscape, including functional values and its smoothness, to design an effective two-step detection strategy. Experimental results on two aligned LLMs (LLaMA-2-7B-Chat and Vicuna-7B-V1.5) and six types of jailbreak attacks (GCG, AutoDAN, PAIR, TAP, Base64, and LRL) show that Gradient Cuff can significantly improve the LLM's rejection capability for malicious jailbreak queries, while maintaining the model's performance for benign user queries by adjusting the detection threshold.", "keywords": "Large Language Models;Jailbreak Detection;AI Alignment and Safety", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Xiaomeng Hu;Pin-Yu Chen;Tsung-Yi Ho", "authorids": "~Xiaomeng_Hu1;~Pin-Yu_Chen1;~Tsung-Yi_Ho2", "gender": "M;M;M", "homepage": "https://gregxmhu.github.io/;http://www.pinyuchen.com;https://www.cse.cuhk.edu.hk/people/faculty/tsung-yi-ho/", "dblp": "319/7072;39/8969;63/4181.html", "google_scholar": "u6pbsnkAAAAJ;jxwlCUUAAAAJ;TRDUYkAAAAAJ", "orcid": ";0000-0003-1039-8369;0000-0001-7348-5625", "linkedin": "xiaomeng-hu-greg/;pin-yu-chen-940062a2;", "or_profile": "~Xiaomeng_Hu1;~Pin-Yu_Chen1;~Tsung-Yi_Ho2", "aff": "Department of Computer Science and Engineering, The Chinese University of Hong Kong;International Business Machines;Department of Computer Science and Engineering, The Chinese University of Hong Kong", "aff_domain": "cse.cuhk.edu.hk;ibm.com;cse.cuhk.edu.hk", "position": "PhD student;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nhu2024gradient,\ntitle={Gradient Cuff: Detecting Jailbreak Attacks on Large Language Models by Exploring Refusal Loss Landscapes},\nauthor={Xiaomeng Hu and Pin-Yu Chen and Tsung-Yi Ho},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=vI1WqFn15v}\n}", "github": "", "reviewers": "yW4P;RDin;GFwB;AWBZ", "pdf_size": 1734110, "rating": "5;5;5;6", "confidence": "4;3;4;3", "soundness": "2;3;3;3", "novelty": "2;2;2;3", "presentation": "3;2;3;3", "wc_summary": "57;100;82;237", "wc_strengths": "31;187;97;41", "wc_weaknesses": "191;337;100;46", "wc_questions": "165;101;71;48", "wc_limitations": "36;1;4;41", "wc_review": "480;726;354;413", "wc_reply_reviewers": "27;113;67;0", "wc_reply_authors": "134;125;242;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;3;1", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 119.0, 69.8176195526602 ], "wc_strengths_avg": [ 89.0, 61.91930232165088 ], "wc_weaknesses_avg": [ 168.5, 110.22363630365312 ], "wc_questions_avg": [ 96.25, 43.91682479414922 ], "wc_limitations_avg": [ 20.5, 18.117670931993437 ], "wc_review_avg": [ 493.25, 141.57926225263358 ], "wc_reply_reviewers_avg": [ 51.75, 42.64607250380743 ], "wc_reply_authors_avg": [ 125.25, 85.72448600020883 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4286176208214064050&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "cse.cuhk.edu.hk;ibm.com;cse.cuhk.edu.hk", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Chinese University of Hong Kong;International Business Machines Corporation", "aff_unique_dep": "Department of Computer Science and Engineering;", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.ibm.com", "aff_unique_abbr": "CUHK;IBM", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "China;United States" }, { "title": "LOVA3: Learning to Visual Question Answering, Asking and Assessment", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93210", "id": "vIOKLMl6wu", "proceeding": "", "pdf": "https://openreview.net/pdf?id=vIOKLMl6wu", "openreview": "https://openreview.net/forum?id=vIOKLMl6wu", "poster": "/media/PosterPDFs/NeurIPS%202024/93210.png?t=1731485342.0168343", "project": "", "author_site": "Henry Hengyuan Zhao, Pan Zhou, Difei Gao, Zechen Bai, Mike Zheng Shou", "tldr": "", "abstract": "Question answering, asking, and assessment are three innate human traits crucial for understanding the world and acquiring knowledge. By enhancing these capabilities, humans can more effectively utilize data, leading to better comprehension and learning outcomes. However, current Multimodal Large Language Models (MLLMs) primarily focus on question answering, often neglecting the full potential of questioning and assessment skills. In this study, we introduce LOVA3, an innovative framework named ``Learning tO Visual Question Answering, Asking and Assessment,'' designed to equip MLLMs with these additional capabilities. Our approach involves the creation of two supplementary training tasks GenQA and EvalQA, aiming at fostering the skills of asking and assessing questions in the context of images. To develop the questioning ability, we compile a comprehensive set of multimodal foundational tasks. For assessment, we introduce a new benchmark called EvalQABench, comprising 64,000 training samples (split evenly between positive and negative samples) and 5,000 testing samples. We posit that enhancing MLLMs with the capabilities to answer, ask, and assess questions \nwill enhance their multimodal comprehension, ultimately improving overall performance. To validate this hypothesis, we train MLLMs using the LOVA3 framework and evaluate them on a range of multimodal datasets and benchmarks. Our results demonstrate consistent performance gains, underscoring the critical role of these additional tasks in fostering comprehensive intelligence in MLLMs.", "keywords": "Multimodal Large Language Model;Instruction Tuning;Visual Question Answering;Visual Question Assessment.", "primary_area": "machine_vision", "supplementary_material": "", "author": "Hengyuan Zhao;Pan Zhou;Difei Gao;Zechen Bai;Mike Zheng Shou", "authorids": "~Hengyuan_Zhao2;~Pan_Zhou3;~Difei_Gao1;~Zechen_Bai1;~Mike_Zheng_Shou1", "gender": "M;;;M;", "homepage": "https://zhaohengyuan1.github.io;;;https://www.baizechen.site/;", "dblp": "260/3042;;;256/5272;", "google_scholar": "QLSk-6IAAAAJ;;;aIdQ8GwAAAAJ;", "orcid": "0000-0001-8047-4465;;;;", "linkedin": ";;;;", "or_profile": "~Hengyuan_Zhao2;~Pan_Zhou3;~Difei_Gao1;~Zechen_Bai1;~Mike_Zheng_Shou1", "aff": "National University of Singapore;;;National University of Singapore;", "aff_domain": "u.nus.edu;;;u.nus.edu;", "position": "PhD student;;;PhD student;", "bibtex": "@inproceedings{\nzhao2024lova,\ntitle={{LOVA}3: Learning to Visual Question Answering, Asking and Assessment},\nauthor={Hengyuan Zhao and Pan Zhou and Difei Gao and Zechen Bai and Mike Zheng Shou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=vIOKLMl6wu}\n}", "github": "", "reviewers": "UkuU;4XZr;5aGT", "pdf_size": 4865124, "rating": "3;4;7", "confidence": "4;4;4", "soundness": "2;3;3", "novelty": "2;2;3", "presentation": "3;3;3", "wc_summary": "137;72;150", "wc_strengths": "47;50;131", "wc_weaknesses": "206;230;175", "wc_questions": "45;10;134", "wc_limitations": "1;37;71", "wc_review": "436;399;661", "wc_reply_reviewers": "0;248;0", "wc_reply_authors": "376;479;0", "reply_reviewers": "0;2;0", "reply_authors": "3;4;1", "rating_avg": [ 4.666666666666667, 1.699673171197595 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 119.66666666666667, 34.120700787384514 ], "wc_strengths_avg": [ 76.0, 38.91015291668744 ], "wc_weaknesses_avg": [ 203.66666666666666, 22.51419305435771 ], "wc_questions_avg": [ 63.0, 52.19833969262496 ], "wc_limitations_avg": [ 36.333333333333336, 28.58126814696802 ], "wc_review_avg": [ 498.6666666666667, 115.77660481384926 ], "wc_reply_reviewers_avg": [ 82.66666666666667, 116.90832115617586 ], "wc_reply_authors_avg": [ 285.0, 205.86565198368245 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.9428090415820634 ], "reply_authors_avg": [ 2.6666666666666665, 1.247219128924647 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14158848033747425181&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "u.nus.edu;;;u.nus.edu;", "author_num": 5, "aff_unique_index": "0;0", "aff_unique_norm": "National University of Singapore", "aff_unique_dep": "", "aff_unique_url": "https://www.nus.edu.sg", "aff_unique_abbr": "NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Singapore" }, { "title": "Speaking Your Language: Spatial Relationships in Interpretable Emergent Communication", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93209", "id": "vIP8IWmZlN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=vIP8IWmZlN", "openreview": "https://openreview.net/forum?id=vIP8IWmZlN", "poster": "/media/PosterPDFs/NeurIPS%202024/93209.png?t=1731242787.9914422", "project": "", "author_site": "Olaf Lipinski, Adam Sobey, Federico Cerutti, Timothy Norman", "tldr": "", "abstract": "Effective communication requires the ability to refer to specific parts of an observation in relation to others. While emergent communication literature shows success in developing various language properties, no research has shown the emergence of such positional references. This paper demonstrates how agents can communicate about spatial relationships within their observations. The results indicate that agents can develop a language capable of expressing the relationships between parts of their observation, achieving over 90% accuracy when trained in a referential game which requires such communication. Using a collocation measure, we demonstrate how the agents create such references. This analysis suggests that agents use a mixture of non-compositional and compositional messages to convey spatial relationships. We also show that the emergent language is interpretable by humans. The translation accuracy is tested by communicating with the receiver agent, where the receiver achieves over 78% accuracy using parts of this lexicon, confirming that the interpretation of the emergent language was successful.", "keywords": "Emergent Communication;Spatial References;Spatial Deixis;NPMI;NLP", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Olaf Lipinski;Adam Sobey;Federico Cerutti;Timothy J. Norman", "authorids": "~Olaf_Lipinski1;~Adam_Sobey1;~Federico_Cerutti1;~Timothy_J._Norman1", "gender": "M;M;;M", "homepage": "https://olipinski.eu/;https://www.southampton.ac.uk/engineering/about/staff/ajs502.page;;https://www.southampton.ac.uk/people/5xgv5h/professor-tim-norman", "dblp": "358/9314;;;23/1018", "google_scholar": "https://scholar.google.co.uk/citations?user=W2WANZoAAAAJ;inoZq8AAAAAJ;;https://scholar.google.co.uk/citations?user=mJjmcu0AAAAJ", "orcid": "0000-0002-2023-7617;;;0000-0002-6387-4034", "linkedin": "olipinski/;;;tim-norman-1a05081/", "or_profile": "~Olaf_Lipinski1;~Adam_Sobey1;~Federico_Cerutti1;~Timothy_J._Norman1", "aff": "University of Southampton;University of Southampton;;University of Southampton", "aff_domain": "soton.ac.uk;soton.ac.uk;;soton.ac.uk", "position": "PhD student;Associate Professor;;Full Professor", "bibtex": "@inproceedings{\nlipinski2024speaking,\ntitle={Speaking Your Language: Spatial Relationships in Interpretable Emergent Communication},\nauthor={Olaf Lipinski and Adam Sobey and Federico Cerutti and Timothy J. Norman},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=vIP8IWmZlN}\n}", "github": "", "reviewers": "VwFJ;YdNg;vZ26;JyuC", "pdf_size": 509081, "rating": "5;5;6;7", "confidence": "4;4;4;4", "soundness": "4;3;4;3", "novelty": "2;2;2;3", "presentation": "3;2;3;3", "wc_summary": "100;103;102;196", "wc_strengths": "120;36;197;86", "wc_weaknesses": "339;230;159;60", "wc_questions": "156;44;383;265", "wc_limitations": "109;38;1;1", "wc_review": "824;451;842;608", "wc_reply_reviewers": "317;152;491;27", "wc_reply_authors": "0;0;599;0", "reply_reviewers": "1;1;2;1", "reply_authors": "1;1;2;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 125.25, 40.861809798392436 ], "wc_strengths_avg": [ 109.75, 58.5678025881115 ], "wc_weaknesses_avg": [ 197.0, 101.8160105287965 ], "wc_questions_avg": [ 212.0, 125.90671149704451 ], "wc_limitations_avg": [ 37.25, 44.0929416120086 ], "wc_review_avg": [ 681.25, 161.70865004692854 ], "wc_reply_reviewers_avg": [ 246.75, 174.54279561184987 ], "wc_reply_authors_avg": [ 149.75, 259.3746084334394 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=989685775414435603&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 5, "email": "soton.ac.uk;soton.ac.uk;;soton.ac.uk", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Southampton", "aff_unique_dep": "", "aff_unique_url": "https://www.southampton.ac.uk", "aff_unique_abbr": "Southampton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Improving Subgroup Robustness via Data Selection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93208", "id": "vJLTcCBZVT", "proceeding": "", "pdf": "https://openreview.net/pdf?id=vJLTcCBZVT", "openreview": "https://openreview.net/forum?id=vJLTcCBZVT", "poster": "", "project": "", "author_site": "Saachi Jain, Kimia Hamidieh, Kristian Georgiev, Andrew Ilyas, Marzyeh Ghassemi, Aleksander Madry", "tldr": "", "abstract": "Machine learning models can often fail on subgroups that are underrepresented\nduring training. While dataset balancing can improve performance on\nunderperforming groups, it requires access to training group annotations and can\nend up removing large portions of the dataset. In this paper, we introduce\nData Debiasing with Datamodels (D3M), a debiasing approach\nwhich isolates and removes specific training examples that drive the model's\nfailures on minority groups. Our approach enables us to efficiently train\ndebiased classifiers while removing only a small number of examples, and does\nnot require training group annotations or additional hyperparameter tuning.", "keywords": "group robustness;fairness;data attribution;machine learning", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/4e7fb703da07c8282e859c6705e6beec29fa7332.zip", "author": "Saachi Jain;Kimia Hamidieh;Kristian Georgiev;Andrew Ilyas;Marzyeh Ghassemi;Aleksander Madry", "authorids": "~Saachi_Jain1;~Kimia_Hamidieh1;~Kristian_Georgiev1;~Andrew_Ilyas1;~Marzyeh_Ghassemi2;~Aleksander_Madry1", "gender": "F;F;M;F;M;M", "homepage": "http://people.csail.mit.edu/saachij/;;http://andrewilyas.com;https://www.healthyml.org/;https://people.csail.mit.edu/madry/;https://kristian-georgiev.github.io/", "dblp": "227/2617;;156/5465;145/6563;67/2454;304/2868", "google_scholar": "6hsn3EYAAAAJ;;Dtw3YBoAAAAJ;;SupjsEUAAAAJ;t8RKSJsAAAAJ", "orcid": ";;;;;0000-0003-4802-1962", "linkedin": ";kimia-hamidieh-956519212/;;;;", "or_profile": "~Saachi_Jain1;~Kimia_Hamidieh1;~Andrew_Ilyas1;~Marzyeh_Ghassemi2;~Aleksander_Madry1;~Kristian_Georgiev_Georgiev1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;mit.edu;mit.edu;mit.edu;mit.edu", "position": "PhD student;PhD student;PhD student;Assistant Professor;Professor;PhD student", "bibtex": "@inproceedings{\njain2024improving,\ntitle={Improving Subgroup Robustness via Data Selection},\nauthor={Saachi Jain and Kimia Hamidieh and Kristian Georgiev and Andrew Ilyas and Marzyeh Ghassemi and Aleksander Madry},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=vJLTcCBZVT}\n}", "github": "", "reviewers": "QjGu;FTVd;yCP5;uxzC", "pdf_size": 3004050, "rating": "5;5;7;7", "confidence": "5;2;4;4", "soundness": "2;3;3;4", "novelty": "3;3;3;3", "presentation": "3;2;3;2", "wc_summary": "71;46;63;87", "wc_strengths": "27;60;161;172", "wc_weaknesses": "180;68;294;108", "wc_questions": "9;1;239;72", "wc_limitations": "3;2;130;19", "wc_review": "290;177;887;458", "wc_reply_reviewers": "116;53;63;131", "wc_reply_authors": "665;0;0;0", "reply_reviewers": "2;1;1;1", "reply_authors": "3;0;1;1", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.75, 1.0897247358851685 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 66.75, 14.771171246722448 ], "wc_strengths_avg": [ 105.0, 62.717621128355944 ], "wc_weaknesses_avg": [ 162.5, 85.87636461797857 ], "wc_questions_avg": [ 80.25, 95.69058208622205 ], "wc_limitations_avg": [ 38.5, 53.25645500782041 ], "wc_review_avg": [ 453.0, 269.78046630547584 ], "wc_reply_reviewers_avg": [ 90.75, 33.36446462930284 ], "wc_reply_authors_avg": [ 166.25, 287.95344675832587 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 1.0897247358851685 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.22941573387056177, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6559701246206472981&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": "mit.edu;mit.edu;mit.edu;mit.edu;mit.edu;mit.edu", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "The Benefits of Balance: From Information Projections to Variance Reduction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93207", "id": "vJMMdFfL0A", "proceeding": "", "pdf": "https://openreview.net/pdf?id=vJMMdFfL0A", "openreview": "https://openreview.net/forum?id=vJMMdFfL0A", "poster": "", "project": "", "author_site": "Lang Liu, Ronak Mehta, Soumik Pal, Zaid Harchaoui", "tldr": "", "abstract": "Data balancing across multiple modalities and sources appears in various forms in foundation models in machine learning and AI, e.g., in CLIP and DINO. We show that data balancing across modalities and sources actually offers an unsuspected benefit: variance reduction. We present a non-asymptotic statistical bound that quantifies this variance reduction effect and relates it to the eigenvalue decay of Markov operators. Furthermore, we describe how various forms of data balancing in contrastive multimodal learning and self-supervised clustering can be better understood, and even improved upon, owing to our variance reduction viewpoint.", "keywords": "regularized optimal transport;self-supervised learning;variance reduction;alternating projection", "primary_area": "learning_theory", "supplementary_material": "/attachment/2c60cc6ac01579a70a7ea77c5b63e302a681e2d7.zip", "author": "Lang Liu;Ronak Mehta;Soumik Pal;Zaid Harchaoui", "authorids": "~Lang_Liu1;~Ronak_Mehta2;~Soumik_Pal1;~Zaid_Harchaoui1", "gender": "M;;M;", "homepage": "https://langliu95.github.io/;;https://sites.math.washington.edu/~soumik/;", "dblp": "150/2641;;;", "google_scholar": "RV5cyL8AAAAJ;;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Lang_Liu1;~Ronak_Mehta2;~Soumik_Pal1;~Zaid_Harchaoui1", "aff": "Citadel Securities;;University of Washington;", "aff_domain": "citadelsecurities.com;;u.washington.edu;", "position": "Researcher;;Full Professor;", "bibtex": "@inproceedings{\nliu2024the,\ntitle={The Benefits of Balance: From Information Projections to Variance Reduction},\nauthor={Lang Liu and Ronak Mehta and Soumik Pal and Zaid Harchaoui},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=vJMMdFfL0A}\n}", "github": "", "reviewers": "os3n;G8ZX;vTkj", "pdf_size": 1733956, "rating": "5;5;6", "confidence": "3;3;2", "soundness": "3;3;3", "novelty": "3;2;3", "presentation": "2;2;3", "wc_summary": "68;89;74", "wc_strengths": "43;73;80", "wc_weaknesses": "33;139;86", "wc_questions": "83;131;115", "wc_limitations": "48;50;62", "wc_review": "275;482;417", "wc_reply_reviewers": "16;60;19", "wc_reply_authors": "49;0;0", "reply_reviewers": "1;1;1", "reply_authors": "2;1;1", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 77.0, 8.831760866327848 ], "wc_strengths_avg": [ 65.33333333333333, 16.048537489614297 ], "wc_weaknesses_avg": [ 86.0, 43.27431878916948 ], "wc_questions_avg": [ 109.66666666666667, 19.955506062794353 ], "wc_limitations_avg": [ 53.333333333333336, 6.182412330330469 ], "wc_review_avg": [ 391.3333333333333, 86.43430388965302 ], "wc_reply_reviewers_avg": [ 31.666666666666668, 20.07209228976613 ], "wc_reply_authors_avg": [ 16.333333333333332, 23.098821518760555 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.9999999999999997, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:vnmREBzpLDIJ:scholar.google.com/&scioq=The+Benefits+of+Balance:+From+Information+Projections+to+Variance+Reduction&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "citadelsecurities.com;;u.washington.edu;", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Citadel Securities;University of Washington", "aff_unique_dep": ";", "aff_unique_url": "https://www.citadel.com;https://www.washington.edu", "aff_unique_abbr": "Citadel;UW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Flaws can be Applause: Unleashing Potential of Segmenting Ambiguous Objects in SAM", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93206", "id": "vJSNsSFO95", "proceeding": "", "pdf": "https://openreview.net/pdf?id=vJSNsSFO95", "openreview": "https://openreview.net/forum?id=vJSNsSFO95", "poster": "/media/PosterPDFs/NeurIPS%202024/93206.png?t=1732942248.7587955", "project": "", "author_site": "Chenxin Li, Yuzhihuang, WUYANG LI, Hengyu Liu, Xinyu Liu, Qing Xu, Zhen Chen, Yue Huang, Yixuan Yuan", "tldr": "", "abstract": "As the vision foundation models like the Segment Anything Model (SAM) demonstrate potent universality, they also present challenges in giving ambiguous and uncertain predictions. Significant variations in the model output and granularity can occur with simply subtle changes in the prompt, contradicting the consensus requirement for the robustness of a model. While some established works have been dedicated to stabilizing and fortifying the prediction of SAM, this paper takes a unique path to explore how this flaw can be inverted into an advantage when modeling inherently ambiguous data distributions. We introduce an optimization framework based on a conditional variational autoencoder, which jointly models the prompt and the granularity of the object with a latent probability distribution. This approach enables the model to adaptively perceive and represent the real ambiguous label distribution, taming SAM to produce a series of diverse, convincing, and reasonable segmentation outputs controllably. Extensive experiments on several practical deployment scenarios involving ambiguity demonstrates the exceptional performance of our framework. Project page: \\url{https://a-sa-m.github.io/}.", "keywords": "Foundation Model;Ambiguous Segmentation;Uncertainty", "primary_area": "machine_vision", "supplementary_material": "/attachment/5a5fc9bddf32e5cd5e2a12aed251543b0f3a0969.zip", "author": "Chenxin Li;Yuzhihuang;Wuyang Li;Hengyu Liu;Xinyu Liu;Qing Xu;Zhen Chen;Yue Huang;Yixuan Yuan", "authorids": "~Chenxin_Li1;~Yuzhihuang1;~Wuyang_Li1;~Hengyu_Liu2;~Xinyu_Liu7;~Qing_Xu4;~Zhen_Chen9;~Yue_Huang1;~Yixuan_Yuan2", "gender": "M;M;M;M;;;;F;F", "homepage": "https://xggnet.github.io/;https://xmu-smartdsp.github.io/news.html;https://wymancv.github.io/wuyang.github.io/;https://liuhengyu321.github.io/;;https://scholar.google.com/citations?user=IzA-Ij8AAAAJ&hl=zh-CN&authuser=1;https://franciszchen.github.io/;https://huangyue05.github.io/;http://www.ee.cityu.edu.hk/~yxyuan/", "dblp": "00/9389;;170/0777;372/3098;;93/1908-14;11/1266-13;48/2209-1;36/9220", "google_scholar": "https://scholar.google.com.hk/citations?user=yfptgYMAAAAJ;;3Ml_EbAAAAAJ;UiejE-oAAAAJ;;IzA-Ij8AAAAJ;oVG2zEkAAAAJ;smxgn4YAAAAJ;https://scholar.google.com.au/citations?hl=en", "orcid": ";;;0009-0007-7965-1402;;;0000-0003-0255-6435;;", "linkedin": ";;;;;;franciszchen/;;", "or_profile": "~Chenxin_Li1;~Yuzhihuang1;~Wuyang_Li1;~Hengyu_Liu2;~Xinyu_Liu7;~Qing_Xu4;~Zhen_Chen9;~Yue_Huang1;~Yixuan_Yuan2", "aff": "The Chinese University of Hong Kong;Xiamen University;City University of Hong Kong;Tianjin University;;Chinese University of Hong Kong;Centre for Artificial Intelligence and Robotics, Hong Kong Institute of Science & Innovation, Chinese Academy of Sciences;Xiamen University;The Chinese University of Hong Kong", "aff_domain": "cuhk.edu.hk;xmu.edu.cn;cityu.edu;tju.edu.cn;;cuhk.hk;cair-cas.org.hk;xmu.edu.cn;cuhk.edu.hk", "position": "PhD student;MS student;PhD student;Undergrad student;;Intern;Assistant Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nli2024flaws,\ntitle={Flaws can be Applause: Unleashing Potential of Segmenting Ambiguous Objects in {SAM}},\nauthor={Chenxin Li and Yuzhihuang and Wuyang Li and Hengyu Liu and Xinyu Liu and Qing Xu and Zhen Chen and Yue Huang and Yixuan Yuan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=vJSNsSFO95}\n}", "github": "", "reviewers": "V2bw;2dZP;vXP5;YPmC", "pdf_size": 1914317, "rating": "5;5;5;6", "confidence": "4;3;3;3", "soundness": "2;3;2;3", "novelty": "2;3;2;3", "presentation": "3;2;1;2", "wc_summary": "91;82;49;96", "wc_strengths": "91;69;42;114", "wc_weaknesses": "97;316;104;47", "wc_questions": "111;40;10;620", "wc_limitations": "11;5;11;6", "wc_review": "401;512;216;883", "wc_reply_reviewers": "15;65;20;340", "wc_reply_authors": "94;297;52;1141", "reply_reviewers": "1;2;1;2", "reply_authors": "3;4;2;5", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.0, 0.7071067811865476 ], "wc_summary_avg": [ 79.5, 18.309833423600555 ], "wc_strengths_avg": [ 79.0, 26.636441203734407 ], "wc_weaknesses_avg": [ 141.0, 103.39970986419642 ], "wc_questions_avg": [ 195.25, 247.9570275269487 ], "wc_limitations_avg": [ 8.25, 2.7726341266023544 ], "wc_review_avg": [ 503.0, 243.5436305880324 ], "wc_reply_reviewers_avg": [ 110.0, 134.2106553146955 ], "wc_reply_authors_avg": [ 396.0, 439.9903408030681 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 3.5, 1.118033988749895 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10860649873427256500&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "cuhk.edu.hk;xmu.edu.cn;cityu.edu;tju.edu.cn;;cuhk.hk;cair-cas.org.hk;xmu.edu.cn;cuhk.edu.hk", "author_num": 9, "aff_unique_index": "0;1;2;3;0;4;1;0", "aff_unique_norm": "Chinese University of Hong Kong;Xiamen University;City University of Hong Kong;Tianjin University;Hong Kong Institute of Science & Innovation, Chinese Academy of Sciences", "aff_unique_dep": ";;;;Centre for Artificial Intelligence and Robotics", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.xmu.edu.cn;https://www.cityu.edu.hk;http://www.tju.edu.cn;", "aff_unique_abbr": "CUHK;XMU;CityU;TJU;", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "ERBench: An Entity-Relationship based Automatically Verifiable Hallucination Benchmark for Large Language Models", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97458", "id": "vJaWizbBdA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=vJaWizbBdA", "openreview": "https://openreview.net/forum?id=vJaWizbBdA", "poster": "/media/PosterPDFs/NeurIPS%202024/97458.png?t=1731570025.2838056", "project": "", "author_site": "Jio Oh, Soyeon Kim, Junseok Seo, Jindong Wang, Ruochen Xu, Xing Xie, Steven Whang", "tldr": "", "abstract": "Large language models (LLMs) have achieved unprecedented performances in various applications, yet evaluating them is still challenging. Existing benchmarks are either manually constructed or are automatic, but lack the ability to evaluate the thought process of LLMs with arbitrary complexity. We contend that utilizing existing relational databases based on the entity-relationship (ER) model is a promising approach for constructing benchmarks as they contain structured knowledge that can be used to question LLMs. Unlike knowledge graphs, which are also used to evaluate LLMs, relational databases have integrity constraints that can be used to better construct complex in-depth questions and verify answers: (1) functional dependencies can be used to pinpoint critical keywords that an LLM must know to properly answer a given question containing certain attribute values; and (2) foreign key constraints can be used to join relations and construct multi-hop questions, which can be arbitrarily long and used to debug intermediate answers. We thus propose ERBench, which uses these integrity constraints to convert any database into an LLM benchmark. ERBench supports continuous evaluation as databases change, multimodal questions, and various prompt engineering techniques. In our experiments, we construct LLM benchmarks using databases of multiple domains and make an extensive comparison of contemporary LLMs. We show how ERBench can properly evaluate any LLM by not only checking for answer correctness, but also effectively verifying the rationales by looking for the right keywords.", "keywords": "Large Language Model;hallucination;evaluation benchmark;relational database;integrity constraints", "primary_area": "", "supplementary_material": "/attachment/18c79dd5ed1ed539e0e7c7d5ccba569e0d676e5b.zip", "author": "Jio Oh;Soyeon Kim;Junseok Seo;Jindong Wang;Ruochen Xu;Xing Xie;Steven Euijong Whang", "authorids": "~Jio_Oh1;~Soyeon_Kim3;~Junseok_Seo1;~Jindong_Wang4;~Ruochen_Xu2;~Xing_Xie3;~Steven_Euijong_Whang1", "gender": "M;F;M;M;M;M;M", "homepage": "https://www.jiooh.com;https://sites.google.com/view/ss0-0y/home;;https://jd92.wang/;https://xrc10.github.io/;http://research.microsoft.com/en-us/people/xingx/;http://www.stevenwhang.com", "dblp": "292/5381;;;19/2969-1;188/3515;08/6809-1;w/StevenEuijongWhang", "google_scholar": "H0WDLM4AAAAJ;https://scholar.google.com/citations?hl=en;;hBZ_tKsAAAAJ;HTp5S00AAAAJ;5EQfAFIAAAAJ;w6hts30AAAAJ", "orcid": ";;0000-0002-2415-8078;0000-0002-4833-0880;;0000-0002-8608-8482;0000-0001-6419-931X", "linkedin": ";;;jindong-wang/;ruochenx/;xingx/;steven-euijong-whang-1612b5a/", "or_profile": "~Jio_Oh1;~Soyeon_Kim3;~Junseok_Seo1;~Jindong_Wang4;~Ruochen_Xu2;~Xing_Xie3;~Steven_Euijong_Whang1", "aff": "Korea Advanced Institute of Science & Technology;KAIST;KAIST;Microsoft Research;Microsoft Research;Microsoft Research Asia;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;ee.kaist.ac.kr;ee.kaist.ac.kr;microsoft.com;research.microsoft.com;microsoft.com;kaist.ac.kr", "position": "MS student;PhD student;MS student;Researcher;Researcher;Senior Principal Researcher;Associate Professor", "bibtex": "@inproceedings{\noh2024erbench,\ntitle={{ERB}ench: An Entity-Relationship based Automatically Verifiable Hallucination Benchmark for Large Language Models},\nauthor={Jio Oh and Soyeon Kim and Junseok Seo and Jindong Wang and Ruochen Xu and Xing Xie and Steven Euijong Whang},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=vJaWizbBdA}\n}", "github": "", "reviewers": "dgij;2QMS;oNmd;FFNu", "pdf_size": 797603, "rating": "5;6;7;7", "confidence": "3;3;4;3", "wc_summary_and_contributions": "90;48;143;130", "wc_strengths": "3;58;27;122", "wc_improvement": "37;6;70;35", "wc_limitations": "1;7;27;1", "wc_correctness": "2;13;32;1", "wc_clarity": "2;9;7;1", "wc_relation_to_prior_work": "2;31;14;1", "wc_documentation": "45;22;19;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "183;195;340;293", "wc_reply_reviewers": "0;0;20;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;1;0", "reply_authors": "2;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 102.75, 37.157603528753036 ], "wc_strengths_avg": [ 52.5, 44.612218057388716 ], "wc_improvement_avg": [ 37.0, 22.66053838724932 ], "wc_limitations_avg": [ 9.0, 10.677078252031311 ], "wc_correctness_avg": [ 12.0, 12.469963913339926 ], "wc_clarity_avg": [ 4.75, 3.344772040064913 ], "wc_relation_to_prior_work_avg": [ 12.0, 12.103718436910205 ], "wc_documentation_avg": [ 21.75, 15.642490210960657 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 252.75, 66.01656988968755 ], "wc_reply_reviewers_avg": [ 5.0, 8.660254037844387 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7156659910300616545&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "kaist.ac.kr;ee.kaist.ac.kr;ee.kaist.ac.kr;microsoft.com;research.microsoft.com;microsoft.com;kaist.ac.kr", "author_num": 7, "aff_unique_index": "0;0;0;1;1;1;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Microsoft", "aff_unique_dep": ";Microsoft Research", "aff_unique_url": "https://www.kaist.ac.kr;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "KAIST;MSR", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;0;0;1;1;2;0", "aff_country_unique": "South Korea;United States;China" }, { "id": "vKwf15M5EE", "title": "Weakly-Supervised Cortical Surfaces Reconstruction from Brain Ribbon Segmentations", "track": "main", "status": "Reject", "tldr": "", "abstract": "Deep learning-based cortical surface reconstruction (CSR) approaches typically rely on supervision information provided by pseudo ground truth generated by conventional CSR methods, subject to errors associated with the supervision information and also increasing computational cost of training data preparation. We propose a new method to jointly reconstruct multiple cortical surfaces using weak supervision from brain MRI ribbon segmentation results. Our approach initializes a midthickness surface, which is then deformed inward and outward to form the inner (white matter) and outer (pial) cortical surfaces, respectively, by jointly learning diffeomorphic flows by minimizing loss functions to optimize the surfaces towards the boundaries of the cortical ribbon segmentation maps. Specifically, a boundary surface loss drives the initialization surface to the inner and outer boundaries, while an inter-surface normal consistency loss regularizes the pial surface in challenging deep cortical sulci regions. Additional regularization terms are utilized to enforce edge length uniformity and smoothness of the reconstructed surfaces. Our method has been evaluated on two large-scale adult brain MRI datasets and one infant brain MRI dataset, demonstrating comparable or superior performance in CSR in terms of accuracy and surface regularity compared to alternative supervised deep learning methods.", "keywords": "Cortical surface reconstruction;diffeomorphic deformation;ODE;Brain MRI", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "", "author": "Hao Zheng;Xiaoyang Chen;Hongming Li;Tingting Chen;Peixian Liang;Yong Fan", "authorids": "~Hao_Zheng1;~Xiaoyang_Chen1;~Hongming_Li1;~Tingting_Chen1;~Peixian_Liang1;~Yong_Fan5", "gender": "M;M;M;F;F;", "homepage": "https://hzaoheng123.github.io/HZ/;https://github.com/xychen2022;;;;", "dblp": "31/6916-6;;31/3923;;215/3485;", "google_scholar": "43iRyc8AAAAJ;Ypmtq7MAAAAJ;CpF6oAYAAAAJ;B_D91AoAAAAJ;https://scholar.google.com/citations?hl=en;", "orcid": ";0000-0003-2390-9797;;;;", "linkedin": ";;;;;", "or_profile": "~Hao_Zheng1;~Xiaoyang_Chen1;~Hongming_Li1;~Tingting_Chen1;~Peixian_Liang1;~Yong_Fan5", "aff": "University of Pennsylvania;University of Pennsylvania;University of Pennsylvania;University of Pennsylvania;University of Pennsylvania;", "aff_domain": "upenn.edu;upenn.edu;upenn.edu;upenn.edu;upenn.edu;", "position": "Postdoc;Postdoc;Senior Research Investigator;Postdoc;Postdoc;", "bibtex": "@misc{\nanonymous2024weaklysupervised,\ntitle={Weakly-Supervised Cortical Surfaces Reconstruction from Brain Ribbon Segmentations},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=vKwf15M5EE}\n}", "github": "", "project": "", "reviewers": "LNir;zPL4;Guru;DDcT", "site": "https://openreview.net/forum?id=vKwf15M5EE", "pdf_size": 2206124, "rating": "2;4;5;7", "confidence": "5;4;4;4", "soundness": "3;2;2;3", "novelty": "1;2;2;2", "presentation": "3;3;3;3", "wc_summary": "129;73;212;74", "wc_strengths": "107;70;62;73", "wc_weaknesses": "557;87;162;165", "wc_questions": "60;69;142;73", "wc_limitations": "84;7;11;1", "wc_review": "937;306;589;386", "wc_reply_reviewers": "58;0;140;18", "wc_reply_authors": "1119;116;233;30", "reply_reviewers": "1;0;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 4.5, 1.8027756377319946 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 122.0, 56.68774117920029 ], "wc_strengths_avg": [ 78.0, 17.219175357722563 ], "wc_weaknesses_avg": [ 242.75, 184.1037411352632 ], "wc_questions_avg": [ 86.0, 32.67261850540908 ], "wc_limitations_avg": [ 25.75, 33.818449106959356 ], "wc_review_avg": [ 554.5, 243.7421793617182 ], "wc_reply_reviewers_avg": [ 54.0, 53.907327887774215 ], "wc_reply_authors_avg": [ 374.5, 435.833970681497 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.8006407690254357, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Ue6tc3Has6cJ:scholar.google.com/&scioq=Weakly-Supervised+Cortical+Surfaces+Reconstruction+from+Brain+Ribbon+Segmentations&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of Pennsylvania", "aff_unique_dep": "", "aff_unique_url": "https://www.upenn.edu", "aff_unique_abbr": "UPenn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Large Pre-trained time series models for cross-domain Time series analysis tasks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93205", "id": "vMMzjCr5Zj", "proceeding": "", "pdf": "https://openreview.net/pdf?id=vMMzjCr5Zj", "openreview": "https://openreview.net/forum?id=vMMzjCr5Zj", "poster": "/media/PosterPDFs/NeurIPS%202024/93205.png?t=1733836002.3151128", "project": "", "author_site": "Harshavardhan Prabhakar Kamarthi, B. Aditya Prakash", "tldr": "", "abstract": "Large pre-trained models have been vital in recent advancements in domains like language and vision, making model training for individual downstream tasks more efficient and provide superior performance. However, tackling time-series analysis tasks usually involves designing and training a separate model from scratch leveraging training data and domain expertise specific to the task. We tackle a significant challenge for pre-training a foundational time-series model from multi-domain time-series datasets: extracting semantically useful tokenized inputs to the model across heterogeneous time-series from different domains. We propose Large Pre-trained Time-series Models (LPTM) that introduces a novel method of adaptive segmentation that automatically identifies optimal dataset-specific segmentation strategy during pre-training. This enables LPTM to perform similar to or better than domain-specific state-of-art model when fine-tuned to different downstream time-series analysis tasks and under zero-shot settings. LPTM achieves superior forecasting and time-series classification results taking up to 40% less data and 50% less training time compared to state-of-art baselines.", "keywords": "Time-series;Self-supervised Learning", "primary_area": "other", "supplementary_material": "", "author": "Harshavardhan Kamarthi;B. Aditya Prakash", "authorids": "~Harshavardhan_Kamarthi1;~B._Aditya_Prakash2", "gender": "M;", "homepage": "https://www.harsha-pk.com;https://www.cc.gatech.edu/~badityap/", "dblp": "245/8927;06/3956", "google_scholar": "LNXEjT8AAAAJ;C-NftTgAAAAJ", "orcid": "0000-0002-2901-7127;0000-0002-3252-455X", "linkedin": "harshavardhan-kamarthi-462928112/;", "or_profile": "~Harshavardhan_Kamarthi1;~B._Aditya_Prakash2", "aff": "Georgia Institute of Technology;Georgia Institute of Technology", "aff_domain": "gatech.edu;gatech.edu", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nkamarthi2024large,\ntitle={Large Pre-trained time series models for cross-domain Time series analysis tasks},\nauthor={Harshavardhan Kamarthi and B. Aditya Prakash},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=vMMzjCr5Zj}\n}", "github": "", "reviewers": "fxq9;bKzz;7ZoZ;RuJ6", "pdf_size": 704137, "rating": "5;5;6;7", "confidence": "4;4;4;4", "soundness": "3;2;3;3", "novelty": "3;2;3;3", "presentation": "2;2;4;2", "wc_summary": "82;134;87;147", "wc_strengths": "44;96;54;46", "wc_weaknesses": "139;197;217;181", "wc_questions": "611;298;149;135", "wc_limitations": "92;5;14;1", "wc_review": "968;730;521;510", "wc_reply_reviewers": "58;28;98;34", "wc_reply_authors": "23;48;48;36", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 112.5, 28.429737951659 ], "wc_strengths_avg": [ 60.0, 21.118712081942874 ], "wc_weaknesses_avg": [ 183.5, 28.683619018526933 ], "wc_questions_avg": [ 298.25, 191.5324711374027 ], "wc_limitations_avg": [ 28.0, 37.24916106437835 ], "wc_review_avg": [ 682.25, 186.81859516654117 ], "wc_reply_reviewers_avg": [ 54.5, 27.509089406957838 ], "wc_reply_authors_avg": [ 38.75, 10.328964130056798 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13226977334258865383&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "gatech.edu;gatech.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Georgia Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.gatech.edu", "aff_unique_abbr": "Georgia Tech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Supra-Laplacian Encoding for Transformer on Dynamic Graphs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93204", "id": "vP9qAzr2Gw", "proceeding": "", "pdf": "https://openreview.net/pdf?id=vP9qAzr2Gw", "openreview": "https://openreview.net/forum?id=vP9qAzr2Gw", "poster": "", "project": "", "author_site": "Yannis Karmim, Marc Lafon, Rapha\u00ebl Fournier-Sniehotta, Nicolas THOME", "tldr": "", "abstract": "Fully connected Graph Transformers (GT) have rapidly become prominent in the static graph community as an alternative to Message-Passing models, which suffer from a lack of expressivity, oversquashing, and under-reaching.\nHowever, in a dynamic context, by interconnecting all nodes at multiple snapshots with self-attention,GT loose both structural and temporal information. In this work, we introduce Supra-LAplacian encoding for spatio-temporal TransformErs (SLATE), a new spatio-temporal encoding to leverage the GT architecture while keeping spatio-temporal information.\nSpecifically, we transform Discrete Time Dynamic Graphs into multi-layer graphs and take advantage of the spectral properties of their associated supra-Laplacian matrix.\nOur second contribution explicitly model nodes' pairwise relationships with a cross-attention mechanism, providing an accurate edge representation for dynamic link prediction.\nSLATE outperforms numerous state-of-the-art methods based on Message-Passing Graph Neural Networks combined with recurrent models (e.g, LSTM), and Dynamic Graph Transformers,\non~9 datasets. Code is open-source and available at this link https://github.com/ykrmm/SLATE.", "keywords": "Dynamic graphs;Link prediction;Transformer;supra-Lapacian encoding", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Yannis Karmim;Marc Lafon;Raphael Fournier-S'niehotta;Nicolas THOME", "authorids": "~Yannis_Karmim1;~Marc_Lafon1;~Raphael_Fournier-S'niehotta1;~Nicolas_THOME2", "gender": "M;;M;", "homepage": "https://cedric.cnam.fr/lab/en/author/karmimy/;https://cedric.cnam.fr/lab/author/lafonm/;http://raphael.fournier-sniehotta.fr/;", "dblp": ";;54/9826;", "google_scholar": "VDabjmoAAAAJ;;BqJgi0YAAAAJ;", "orcid": "0009-0002-1790-3938;;0000-0002-9137-8011;", "linkedin": "https://fr.linkedin.com/in/yannis-karmim-59257b18b;;;", "or_profile": "~Yannis_Karmim1;~Marc_Lafon1;~Raphael_Fournier-S'niehotta1;~Nicolas_THOME2", "aff": "Conservatoire National des Arts et M\u00e9tiers;Conservatoire National des Arts et M\u00e9tiers;Conservatoire National des Arts et M\u00e9tiers;", "aff_domain": "cnam.fr;cnam.fr;cnam.fr;", "position": "PhD student;PhD student;Associate Professor;", "bibtex": "@inproceedings{\nkarmim2024supralaplacian,\ntitle={Supra-Laplacian Encoding for Transformer on Dynamic Graphs},\nauthor={Yannis Karmim and Marc Lafon and Raphael Fournier-S'niehotta and Nicolas THOME},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=vP9qAzr2Gw}\n}", "github": "", "reviewers": "G1C1;nC2C;kt1z;n3Tf", "pdf_size": 4573683, "rating": "4;6;6;7", "confidence": "4;3;4;3", "soundness": "2;2;3;4", "novelty": "2;2;3;3", "presentation": "2;3;4;4", "wc_summary": "77;106;87;100", "wc_strengths": "39;50;115;73", "wc_weaknesses": "73;86;173;31", "wc_questions": "81;7;84;12", "wc_limitations": "20;1;32;1", "wc_review": "290;250;491;217", "wc_reply_reviewers": "126;38;228;0", "wc_reply_authors": "437;51;77;0", "reply_reviewers": "2;1;1;0", "reply_authors": "3;2;2;1", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 92.5, 11.280514172678478 ], "wc_strengths_avg": [ 69.25, 29.123658767400773 ], "wc_weaknesses_avg": [ 90.75, 51.6545012559409 ], "wc_questions_avg": [ 46.0, 36.55817282086182 ], "wc_limitations_avg": [ 13.5, 13.200378782444085 ], "wc_review_avg": [ 312.0, 106.52933868188613 ], "wc_reply_reviewers_avg": [ 98.0, 87.87491109526086 ], "wc_reply_authors_avg": [ 141.25, 172.98320005133448 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.6882472016116854, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17516424418164022718&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 8, "email": "cnam.fr;cnam.fr;cnam.fr;", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Conservatoire National des Arts et M\u00e9tiers", "aff_unique_dep": "", "aff_unique_url": "https://www.cnam.fr", "aff_unique_abbr": "CNAM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "France" }, { "title": "AdaNeg: Adaptive Negative Proxy Guided OOD Detection with Vision-Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93203", "id": "vS5NC7jtCI", "proceeding": "", "pdf": "https://openreview.net/pdf?id=vS5NC7jtCI", "openreview": "https://openreview.net/forum?id=vS5NC7jtCI", "poster": "", "project": "", "author_site": "Yabin Zhang, Lei Zhang", "tldr": "", "abstract": "Recent research has shown that pre-trained vision-language models are effective at identifying out-of-distribution (OOD) samples by using negative labels as guidance. However, employing consistent negative labels across different OOD datasets often results in semantic misalignments, as these text labels may not accurately reflect the actual space of OOD images. To overcome this issue, we introduce \\textit{adaptive negative proxies}, which are dynamically generated during testing by exploring actual OOD images, to align more closely with the underlying OOD label space and enhance the efficacy of negative proxy guidance. Specifically, our approach utilizes a feature memory bank to selectively cache discriminative features from test images, representing the targeted OOD distribution. This facilitates the creation of proxies that can better align with specific OOD datasets. While task-adaptive proxies average features to reflect the unique characteristics of each dataset, the sample-adaptive proxies weight features based on their similarity to individual test samples, exploring detailed sample-level nuances. The final score for identifying OOD samples integrates static negative labels with our proposed adaptive proxies, effectively combining textual and visual knowledge for enhanced performance. Our method is training-free and annotation-free, and it maintains fast testing speed. Extensive experiments across various benchmarks demonstrate the effectiveness of our approach, abbreviated as AdaNeg. Notably, on the large-scale ImageNet benchmark, our AdaNeg significantly outperforms existing methods, with a 2.45\\% increase in AUROC and a 6.48\\% reduction in FPR95. Codes are available at \\url{https://github.com/YBZh/OpenOOD-VLM}.", "keywords": "Adaptive negative proxy;OOD detection;vision-language models", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Yabin Zhang;Lei Zhang", "authorids": "~Yabin_Zhang2;~Lei_Zhang2", "gender": "M;M", "homepage": "https://ybzh.github.io/;http://www4.comp.polyu.edu.hk/~cslzhang/", "dblp": "70/6124-1;64/5666-6.html", "google_scholar": "p0GLwtoAAAAJ;tAK5l1IAAAAJ", "orcid": ";0000-0002-2078-4215", "linkedin": ";", "or_profile": "~Yabin_Zhang2;~Lei_Zhang2", "aff": "The Hong Kong Polytechnic University;The Hong Kong Polytechnic University", "aff_domain": "polyu.edu.hk;polyu.edu.hk", "position": "PhD student;Chair Professor", "bibtex": "@inproceedings{\nzhang2024adaneg,\ntitle={AdaNeg: Adaptive Negative Proxy Guided {OOD} Detection with Vision-Language Models},\nauthor={Yabin Zhang and Lei Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=vS5NC7jtCI}\n}", "github": "", "reviewers": "Pmx9;yxd9;eV4A;ACdF", "pdf_size": 1048142, "rating": "5;5;6;8", "confidence": "5;3;5;5", "soundness": "3;2;3;3", "novelty": "3;2;4;4", "presentation": "3;2;4;4", "wc_summary": "108;74;120;75", "wc_strengths": "58;65;144;52", "wc_weaknesses": "577;164;105;112", "wc_questions": "2;25;2;5", "wc_limitations": "1;7;10;1", "wc_review": "746;335;381;245", "wc_reply_reviewers": "47;17;9;39", "wc_reply_authors": "526;18;13;18", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 4.5, 0.8660254037844386 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 94.25, 20.20365066021485 ], "wc_strengths_avg": [ 79.75, 37.37897136091361 ], "wc_weaknesses_avg": [ 239.5, 196.18422464612183 ], "wc_questions_avg": [ 8.5, 9.604686356149273 ], "wc_limitations_avg": [ 4.75, 3.897114317029974 ], "wc_review_avg": [ 426.75, 190.69920686777908 ], "wc_reply_reviewers_avg": [ 28.0, 15.524174696260024 ], "wc_reply_authors_avg": [ 143.75, 220.70158019370862 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.4714045207910316, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17521796259481984518&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "polyu.edu.hk;polyu.edu.hk", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Hong Kong Polytechnic University", "aff_unique_dep": "", "aff_unique_url": "https://www.polyu.edu.hk", "aff_unique_abbr": "PolyU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Learning Multimodal Behaviors from Scratch with Diffusion Policy Gradient", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93202", "id": "vU1SiBb57j", "proceeding": "", "pdf": "https://openreview.net/pdf?id=vU1SiBb57j", "openreview": "https://openreview.net/forum?id=vU1SiBb57j", "poster": "/media/PosterPDFs/NeurIPS%202024/93202.png?t=1731426672.2532363", "project": "", "author_site": "Steven Li, Rickmer Krohn, Tao Chen, Anurag Ajay, Pulkit Agrawal, Georgia Chalvatzaki", "tldr": "", "abstract": "Deep reinforcement learning (RL) algorithms typically parameterize the policy as a deep network that outputs either a deterministic action or a stochastic one modeled as a Gaussian distribution, hence restricting learning to a single behavioral mode. Meanwhile, diffusion models emerged as a powerful framework for multimodal learning. However, the use of diffusion policies in online RL is hindered by the intractability of policy likelihood approximation, as well as the greedy objective of RL methods that can easily skew the policy to a single mode. This paper presents Deep Diffusion Policy Gradient (DDiffPG), a novel actor-critic algorithm that learns from scratch multimodal policies parameterized as diffusion models while discovering and maintaining versatile behaviors. DDiffPG explores and discovers multiple modes through off-the-shelf unsupervised clustering combined with novelty-based intrinsic motivation. DDiffPG forms a multimodal training batch and utilizes mode-specific Q-learning to mitigate the inherent greediness of the RL objective, ensuring the improvement of the diffusion policy across all modes. Our approach further allows the policy to be conditioned on mode-specific embeddings to explicitly control the learned modes. Empirical studies validate DDiffPG's capability to master multimodal behaviors in complex, high-dimensional continuous control tasks with sparse rewards, also showcasing proof-of-concept dynamic online replanning when navigating mazes with unseen obstacles. Our project page is available at https://supersglzc.github.io/projects/ddiffpg/.", "keywords": "Reinforcement Learning;Diffusion Model;Multimodal Learning;Unsupervised Skill Discovery", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Zechu Li;Rickmer Krohn;Tao Chen;Anurag Ajay;Pulkit Agrawal;Georgia Chalvatzaki", "authorids": "~Zechu_Li1;~Rickmer_Krohn1;~Tao_Chen1;~Anurag_Ajay1;~Pulkit_Agrawal1;~Georgia_Chalvatzaki1", "gender": "M;M;M;M;M;F", "homepage": ";;https://taochenshh.github.io;https://anuragajay.github.io/;https://people.eecs.berkeley.edu/~pulkitag/;https://www.ias.informatik.tu-darmstadt.de/Team/GeorgiaChalvatzaki", "dblp": ";;;180/5483;149/2672;145/3334", "google_scholar": "https://scholar.google.com/citations?hl=en;;gdUv1PIAAAAJ;;UpZmJI0AAAAJ;https://scholar.google.gr/citations?user=mlho5FkAAAAJ", "orcid": ";;;;;", "linkedin": ";rickmer-krohn-a20586207/;;;;", "or_profile": "~Zechu_Li1;~Rickmer_Krohn1;~Tao_Chen1;~Anurag_Ajay1;~Pulkit_Agrawal1;~Georgia_Chalvatzaki1", "aff": "Technische Universit\u00e4t Darmstadt;;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Technische Universit\u00e4t Darmstadt", "aff_domain": "tu-darmstadt.de;;mit.edu;mit.edu;mit.edu;tu-darmstadt.de", "position": "MS student;;PhD student;PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nli2024learning,\ntitle={Learning Multimodal Behaviors from Scratch with Diffusion Policy Gradient},\nauthor={Zechu Li and Rickmer Krohn and Tao Chen and Anurag Ajay and Pulkit Agrawal and Georgia Chalvatzaki},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=vU1SiBb57j}\n}", "github": "", "reviewers": "U1rA;TVD7;HLNL", "pdf_size": 13429277, "rating": "5;6;7", "confidence": "4;4;4", "soundness": "2;3;3", "novelty": "3;3;3", "presentation": "3;2;3", "wc_summary": "60;75;80", "wc_strengths": "99;60;68", "wc_weaknesses": "198;200;59", "wc_questions": "1;260;189", "wc_limitations": "1;103;1", "wc_review": "359;698;397", "wc_reply_reviewers": "33;22;53", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 71.66666666666667, 8.498365855987975 ], "wc_strengths_avg": [ 75.66666666666667, 16.81930108205715 ], "wc_weaknesses_avg": [ 152.33333333333334, 66.001683480213 ], "wc_questions_avg": [ 150.0, 109.27335753360316 ], "wc_limitations_avg": [ 35.0, 48.08326112068523 ], "wc_review_avg": [ 484.6666666666667, 151.6450534050558 ], "wc_reply_reviewers_avg": [ 36.0, 12.832251036613439 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15382806490381598410&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "tu-darmstadt.de;;mit.edu;mit.edu;mit.edu;tu-darmstadt.de", "author_num": 6, "aff_unique_index": "0;1;1;1;0", "aff_unique_norm": "Technische Universit\u00e4t Darmstadt;Massachusetts Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.tu-darmstadt.de;https://web.mit.edu", "aff_unique_abbr": "TUD;MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;0", "aff_country_unique": "Germany;United States" }, { "title": "Unveiling LoRA Intrinsic Ranks via Salience Analysis", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93201", "id": "vU512K8vrR", "proceeding": "", "pdf": "https://openreview.net/pdf?id=vU512K8vrR", "openreview": "https://openreview.net/forum?id=vU512K8vrR", "poster": "/media/PosterPDFs/NeurIPS%202024/93201.png?t=1731165066.6799183", "project": "", "author_site": "Wenjun Ke, Jiahao Wang, Peng Wang, Jiajun Liu, Dong Nie, Guozheng Li, Yining Li", "tldr": "", "abstract": "The immense parameter scale of large language models underscores the necessity for parameter-efficient fine-tuning methods. Methods based on Low-Rank Adaptation (LoRA) assume the low-rank characteristics of the incremental matrix and optimize the matrix obtained from low-rank decomposition. Although effective, these methods are constrained by a fixed and unalterable intrinsic rank, neglecting the variable importance of matrices. Consequently, methods for adaptive rank allocation are proposed, among which AdaLoRA demonstrates excellent fine-tuning performance. AdaLoRA conducts adaptation based on singular value decomposition (SVD), dynamically allocating intrinsic ranks according to importance. However, it still struggles to achieve a balance between fine-tuning effectiveness and efficiency, leading to limited rank allocation space. Additionally, the importance measurement focuses only on parameters with minimal impact on the loss, neglecting the dominant role of singular values in SVD-based matrices and the fluctuations during training. To address these issues, we propose SalientLoRA, which adaptively optimizes intrinsic ranks of LoRA via salience measurement. Firstly, during rank allocation, the salience measurement analyses the variation of singular value magnitudes across multiple time steps and establishes their inter-dependency relationships to assess the matrix importance. This measurement mitigates instability and randomness that may arise during importance assessment. Secondly, to achieve a balance between fine-tuning performance and efficiency, we propose an adaptive adjustment of time-series window, which adaptively controls the size of time-series for significance measurement and rank reduction during training, allowing for rapid rank allocation while maintaining training stability. This mechanism enables matrics to set a higher initial rank, thus expanding the allocation space for ranks. To evaluate the generality of our method across various tasks, we conduct experiments on natural language understanding (NLU), natural language generation (NLG), and large model instruction tuning tasks. Experimental results demonstrate the superiority of SalientLoRA, which outperforms state-of-the-art methods by 0.96\\%-3.56\\% on multiple datasets. Furthermore, as the rank allocation space expands, our method ensures fine-tuning efficiency, achieving a speed improvement of 94.5\\% compared to AdaLoRA. The code is publicly available at https://github.com/Heyest/SalientLoRA.", "keywords": "Parameter-Efficient Fine-Tuning;Low-Rank Adaptation;LoRA", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Wenjun Ke;Jiahao Wang;Peng Wang;Jiajun Liu;Dong Nie;Guozheng Li;Yining Li", "authorids": "~Wenjun_Ke1;~Jiahao_Wang12;~Peng_Wang11;~Jiajun_Liu3;~Dong_Nie1;~Guozheng_Li3;~Yining_Li5", "gender": "M;M;;M;;M;M", "homepage": "https://cs.seu.edu.cn/2023/1024/c23024a469536/page.htm;https://github.com/Heyest/InstructNER;;https://scholar.google.com/citations?user=kO35itkAAAAJ&hl=zh-CN;https://sites.google.com/site/ginobilinie/;;https://seu-ning.top", "dblp": ";;;;https://dblp.uni-trier.de/pid/130/8299;;", "google_scholar": ";;;kO35itkAAAAJ;wk0hUKgAAAAJ;https://scholar.google.com/citations?hl=en;", "orcid": "0000-0001-7352-1710;;;0000-0002-7871-0518;;0000-0002-8068-3491;", "linkedin": ";;;;;;", "or_profile": "~Wenjun_Ke1;~Jiahao_Wang12;~Peng_Wang11;~Jiajun_Liu3;~Dong_Nie1;~Guozheng_Li3;~Yining_Li5", "aff": "Southeast University;Southeast University;;Southeast University;Meta Inc.;Southeast University;Southeast University", "aff_domain": "seu.edu.cn;seu.edu.cn;;seu.edu.cn;meta.com;seu.edu.cn;seu.edu.cn", "position": "Associate Professor;MS student;;MS student;Researcher;PhD student;MS student", "bibtex": "@inproceedings{\nke2024unveiling,\ntitle={Unveiling Lo{RA} Intrinsic Ranks via Salience Analysis},\nauthor={Wenjun Ke and Jiahao Wang and Peng Wang and Jiajun Liu and Dong Nie and Guozheng Li and Yining Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=vU512K8vrR}\n}", "github": "", "reviewers": "SXAJ;i7vj;xsy4", "pdf_size": 546698, "rating": "5;5;6", "confidence": "4;3;4", "soundness": "2;3;3", "novelty": "3;3;3", "presentation": "2;2;3", "wc_summary": "125;90;58", "wc_strengths": "50;68;40", "wc_weaknesses": "83;67;66", "wc_questions": "168;26;121", "wc_limitations": "20;7;1", "wc_review": "446;258;286", "wc_reply_reviewers": "67;23;0", "wc_reply_authors": "248;127;52", "reply_reviewers": "2;1;0", "reply_authors": "5;3;2", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 91.0, 27.36177382164151 ], "wc_strengths_avg": [ 52.666666666666664, 11.585431464655176 ], "wc_weaknesses_avg": [ 72.0, 7.788880963698615 ], "wc_questions_avg": [ 105.0, 59.06493601678297 ], "wc_limitations_avg": [ 9.333333333333334, 7.93025150224688 ], "wc_review_avg": [ 330.0, 82.81706748410419 ], "wc_reply_reviewers_avg": [ 30.0, 27.796882319185844 ], "wc_reply_authors_avg": [ 142.33333333333334, 80.74789298936673 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 3.3333333333333335, 1.247219128924647 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:mC46W32yIGcJ:scholar.google.com/&scioq=Unveiling+LoRA+Intrinsic+Ranks+via+Salience+Analysis&hl=en&as_sdt=0,33", "gs_version_total": 2, "email": "seu.edu.cn;seu.edu.cn;;seu.edu.cn;meta.com;seu.edu.cn;seu.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;1;0;0", "aff_unique_norm": "Southeast University;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://www.seu.edu.cn/;https://www.meta.com", "aff_unique_abbr": "SEU;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;0", "aff_country_unique": "China;United States" }, { "title": "DynaMo: In-Domain Dynamics Pretraining for Visuo-Motor Control", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93200", "id": "vUrOuc6NR3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=vUrOuc6NR3", "openreview": "https://openreview.net/forum?id=vUrOuc6NR3", "poster": "/media/PosterPDFs/NeurIPS%202024/93200.png?t=1733339824.2605684", "project": "", "author_site": "Zichen Cui, Hengkai Pan, Aadhithya Iyer, Siddhant Haldar, Lerrel Pinto", "tldr": "", "abstract": "Imitation learning has proven to be a powerful tool for training complex visuo-motor policies. However, current methods often require hundreds to thousands of expert demonstrations to handle high-dimensional visual observations. A key reason for this poor data efficiency is that visual representations are predominantly either pretrained on out-of-domain data or trained directly through a behavior cloning objective. In this work, we present DynaMo, a new in-domain, self-supervised method for learning visual representations. Given a set of expert demonstrations, we jointly learn a latent inverse dynamics model and a forward dynamics model over a sequence of image embeddings, predicting the next frame in latent space, without augmentations, contrastive sampling, or access to ground truth actions. Importantly, DynaMo does not require any out-of-domain data such as Internet datasets or cross-embodied datasets. On a suite of six simulated and real environments, we show that representations learned with DynaMo significantly improve downstream imitation learning performance over prior self-supervised learning objectives, and pretrained representations. Gains from using DynaMo hold across policy classes such as Behavior Transformer, Diffusion Policy, MLP, and nearest neighbors. Finally, we ablate over key components of DynaMo and measure its impact on downstream policy performance. Robot videos are best viewed at https://dynamo-ssl.github.io.", "keywords": "Robot learning;representation learning;self-supervised learning", "primary_area": "robotics", "supplementary_material": "", "author": "Zichen Jeff Cui;Hengkai Pan;Aadhithya Iyer;Siddhant Haldar;Lerrel Pinto", "authorids": "~Zichen_Jeff_Cui1;~Hengkai_Pan1;~Aadhithya_Iyer1;~Siddhant_Haldar1;~Lerrel_Pinto1", "gender": "Not Specified;M;M;M;M", "homepage": "https://jeffcui.com;https://hengkaipan.github.io/;https://aadhithya14.github.io/;https://siddhanthaldar.github.io/;https://www.lerrelpinto.com/", "dblp": ";;;227/2282;168/8304", "google_scholar": ";76ut8YkAAAAJ;;-h_bkRgAAAAJ;pmVPj94AAAAJ", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Zichen_Jeff_Cui1;~Hengkai_Pan1;~Aadhithya_Iyer1;~Siddhant_Haldar1;~Lerrel_Pinto1", "aff": "New York University;New York University;New York University;New York University;New York University", "aff_domain": "nyu.edu;nyu.edu;nyu.edu;nyu.edu;cs.nyu.edu", "position": "PhD student;Undergrad student;MS student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\ncui2024dynamo,\ntitle={DynaMo: In-Domain Dynamics Pretraining for Visuo-Motor Control},\nauthor={Zichen Jeff Cui and Hengkai Pan and Aadhithya Iyer and Siddhant Haldar and Lerrel Pinto},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=vUrOuc6NR3}\n}", "github": "", "reviewers": "96in;MZTu;B6gm;kzSJ", "pdf_size": 17990184, "rating": "4;6;6;6", "confidence": "5;4;4;4", "soundness": "2;3;3;3", "novelty": "1;2;2;3", "presentation": "2;3;3;3", "wc_summary": "29;76;84;65", "wc_strengths": "6;56;171;48", "wc_weaknesses": "202;249;263;265", "wc_questions": "74;119;140;2", "wc_limitations": "1;4;18;1", "wc_review": "312;504;676;381", "wc_reply_reviewers": "110;15;64;107", "wc_reply_authors": "350;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 63.5, 21.02974084481309 ], "wc_strengths_avg": [ 70.25, 61.18976630123701 ], "wc_weaknesses_avg": [ 244.75, 25.439880109780393 ], "wc_questions_avg": [ 83.75, 52.87898921121696 ], "wc_limitations_avg": [ 6.0, 7.035623639735144 ], "wc_review_avg": [ 468.25, 138.26130152721694 ], "wc_reply_reviewers_avg": [ 74.0, 38.61994303465504 ], "wc_reply_authors_avg": [ 87.5, 151.55444566227678 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16811352998942830432&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "nyu.edu;nyu.edu;nyu.edu;nyu.edu;cs.nyu.edu", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "New York University", "aff_unique_dep": "", "aff_unique_url": "https://www.nyu.edu", "aff_unique_abbr": "NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Unified Speech Recognition: A Single Model for Auditory, Visual, and Audiovisual Inputs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93199", "id": "vWSll6M9pj", "proceeding": "", "pdf": "https://openreview.net/pdf?id=vWSll6M9pj", "openreview": "https://openreview.net/forum?id=vWSll6M9pj", "poster": "", "project": "", "author_site": "Alexandros Haliassos, Rodrigo Mira, Honglie Chen, Zoe Landgraf, Stavros Petridis, Maja Pantic", "tldr": "", "abstract": "Research in auditory, visual, and audiovisual speech recognition (ASR, VSR, and AVSR, respectively) has traditionally been conducted independently. Even recent self-supervised studies addressing two or all three tasks simultaneously tend to yield separate models, leading to disjoint inference pipelines with increased memory requirements and redundancies. This paper proposes unified training strategies for these systems. We demonstrate that training a single model for all three tasks enhances VSR and AVSR performance, overcoming typical optimisation challenges when training from scratch. Moreover, we introduce a greedy pseudo-labelling approach to more effectively leverage unlabelled samples, addressing shortcomings in related self-supervised methods. Finally, we develop a self-supervised pre-training method within our framework, proving its effectiveness alongside our semi-supervised approach. Despite using a single model for all tasks, our unified approach achieves state-of-the-art performance on LRS3 for ASR, VSR, and AVSR compared to recent methods. Code will be made publicly available.", "keywords": "Speech recognition;lipreading;self-supervised learning;semi-supervised learning", "primary_area": "speech_and_audio", "supplementary_material": "/attachment/f8a1ad275276e55234a8c57c9bf487524d451199.zip", "author": "Alexandros Haliassos;Rodrigo Mira;Honglie Chen;Zoe Landgraf;Stavros Petridis;Maja Pantic", "authorids": "~Alexandros_Haliassos1;~Rodrigo_Mira1;~Honglie_Chen1;~Zoe_Landgraf1;~Stavros_Petridis1;~Maja_Pantic2", "gender": "M;M;;F;M;F", "homepage": ";https://miraodasilva.github.io/;https://www.robots.ox.ac.uk/~hchen/;;http://ibug.doc.ic.ac.uk/people/spetridis;https://ibug.doc.ic.ac.uk/maja/", "dblp": "257/3052;291/3819;247/1102;259/2503;57/2474;p/MajaPantic", "google_scholar": "qejRKDYAAAAJ;08YfKjcAAAAJ;;;https://scholar.google.co.uk/citations?user=6v-UKEMAAAAJ;ygpxbK8AAAAJ", "orcid": ";0000-0002-9493-3842;;;;", "linkedin": "alexandros-haliassos-692495150/;rodrigo-mira-670bbb151/;;;;maja-pantic-3922952b/?originalSubdomain=uk", "or_profile": "~Alexandros_Haliassos1;~Rodrigo_Mira1;~Honglie_Chen1;~Zoe_Landgraf1;~Stavros_Petridis1;~Maja_Pantic1", "aff": "Imperial College London;Meta;Meta Facebook;Meta Facebook;Meta Facebook;Meta Facebook", "aff_domain": "imperial.ac.uk;meta.com;fb.com;facebook.com;meta.com;fb.com", "position": "PhD student;Researcher;Researcher;Researcher;Researcher;Research Lead", "bibtex": "@inproceedings{\nhaliassos2024unified,\ntitle={Unified Speech Recognition: A Single Model for Auditory, Visual, and Audiovisual Inputs},\nauthor={Alexandros Haliassos and Rodrigo Mira and Honglie Chen and Zoe Landgraf and Stavros Petridis and Maja Pantic},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=vWSll6M9pj}\n}", "github": "", "reviewers": "d9WG;d2RY;WdRe;Fi7g", "pdf_size": 524307, "rating": "5;6;7;7", "confidence": "5;4;2;3", "soundness": "3;4;3;3", "novelty": "3;3;3;3", "presentation": "3;3;4;3", "wc_summary": "31;78;48;110", "wc_strengths": "112;108;68;87", "wc_weaknesses": "136;22;20;192", "wc_questions": "98;134;64;163", "wc_limitations": "8;33;11;23", "wc_review": "385;375;211;575", "wc_reply_reviewers": "187;118;29;13", "wc_reply_authors": "796;0;0;0", "reply_reviewers": "3;1;1;1", "reply_authors": "3;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 66.75, 30.11125204969066 ], "wc_strengths_avg": [ 93.75, 17.640507362318125 ], "wc_weaknesses_avg": [ 92.5, 74.1940024530285 ], "wc_questions_avg": [ 114.75, 37.265097611572145 ], "wc_limitations_avg": [ 18.75, 9.959292143521045 ], "wc_review_avg": [ 386.5, 128.9059734845519 ], "wc_reply_reviewers_avg": [ 86.75, 70.3575688892105 ], "wc_reply_authors_avg": [ 199.0, 344.67811070620655 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.9438798074485388, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14221190857928601304&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "imperial.ac.uk;meta.com;fb.com;facebook.com;meta.com;fb.com", "author_num": 6, "aff_unique_index": "0;1;1;1;1;1", "aff_unique_norm": "Imperial College London;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://www.imperial.ac.uk;https://meta.com", "aff_unique_abbr": "ICL;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "Towards Open Respiratory Acoustic Foundation Models: Pretraining and Benchmarking", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97457", "id": "vXnGXRbOfb", "proceeding": "", "pdf": "https://openreview.net/pdf?id=vXnGXRbOfb", "openreview": "https://openreview.net/forum?id=vXnGXRbOfb", "poster": "/media/PosterPDFs/NeurIPS%202024/97457.png?t=1731407304.1582787", "project": "", "author_site": "Yuwei Zhang, Tong Xia, Jing Han, Yu Wu, Georgios Rizos, Yang Liu, Mohammed Mosuily, J Ch, Cecilia Mascolo", "tldr": "", "abstract": "Respiratory audio, such as coughing and breathing sounds, has predictive power for a wide range of healthcare applications, yet is currently under-explored. The main problem for those applications arises from the difficulty in collecting large labeled task-specific data for model development. Generalizable respiratory acoustic foundation models pretrained with unlabeled data would offer appealing advantages and possibly unlock this impasse. However, given the safety-critical nature of healthcare applications, it is pivotal to also ensure openness and replicability for any proposed foundation model solution. To this end, we introduce OPERA, an OPEn Respiratory Acoustic foundation model pretraining and benchmarking system, as the first approach answering this need. We curate large-scale respiratory audio datasets ($\\sim$136K samples, over 400 hours), pretrain three pioneering foundation models, and build a benchmark consisting of 19 downstream respiratory health tasks for evaluation. Our pretrained models demonstrate superior performance (against existing acoustic models pretrained with general audio on 16 out of 19 tasks) and generalizability (to unseen datasets and new respiratory audio modalities). This highlights the great promise of respiratory acoustic foundation models and encourages more studies using OPERA as an open resource to accelerate research on respiratory audio for health. The system is accessible from https://github.com/evelyn0414/OPERA.", "keywords": "foundation model;respiratory audio;machine learning for health;self-supervised learning", "primary_area": "", "supplementary_material": "/attachment/68d90b0310bff48b1bc38ad1965089f5fff1694e.pdf", "author": "Yuwei Zhang;Tong Xia;Jing Han;Yu Wu;Georgios Rizos;Yang Liu;Mohammed Mosuily;Jagmohan Chauhan;Cecilia Mascolo", "authorids": "~Yuwei_Zhang2;~Tong_Xia2;~Jing_Han4;~Yu_Wu9;~Georgios_Rizos1;~Yang_Liu142;~Mohammed_Mosuily1;~Jagmohan_Chauhan1;~Cecilia_Mascolo1", "gender": "F;;;F;M;F;M;;F", "homepage": "https://evelyn0414.github.io;;;;https://georgiosrizos.github.io/;https://yangliu-cs.github.io/YangLiu-CS/;https://sites.google.com/view/mohammed-mosuily/bio;;http://www.cl.cam.ac.uk/users/cm542", "dblp": ";;;;178/3612;;;;21/6419", "google_scholar": "cQyahboAAAAJ;;;Ezx8coQAAAAJ;Cx5SSpIAAAAJ;;8W-hCuIAAAAJ;;https://scholar.google.com/citations?hl=en", "orcid": "0000-0003-2110-1858;;;0009-0004-3709-7472;0000-0003-2483-5574;;;;0000-0001-9614-4380", "linkedin": "yuwei-zhang-8b27271b6/;;;yu-wu-b31122192;georgiosrizos/;;mohammed-mosuily/;;", "or_profile": "~Yuwei_Zhang2;~Tong_Xia2;~Jing_Han4;~Yu_Wu9;~Georgios_Rizos1;~Yang_Liu142;~Mohammed_Mosuily1;~Jagmohan_Chauhan1;~Cecilia_Mascolo1", "aff": "Computer Laboratory;;;University of Cambridge;University of Cambridge;University of Cambridge;Imam Abdulrahman Bin Faisal University;;University of Cambridge", "aff_domain": "cl.cam.ac.uk;;;cam.ac.uk;cam.ac.uk;cam.ac.uk;iau.edu.sa;;cam.ac.uk", "position": "PhD student;;;PhD student;Postdoc;Postdoc;Lecturer;;Full Professor", "bibtex": "@inproceedings{\nzhang2024towards,\ntitle={Towards Open Respiratory Acoustic Foundation Models: Pretraining and Benchmarking},\nauthor={Yuwei Zhang and Tong Xia and Jing Han and Yu Wu and Georgios Rizos and Yang Liu and Mohammed Mosuily and Jagmohan Chauhan and Cecilia Mascolo},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=vXnGXRbOfb}\n}", "github": "", "reviewers": "jXE3;Vs5k;KT7v;LooR", "pdf_size": 9310940, "rating": "5;6;7;7", "confidence": "3;3;3;5", "wc_summary_and_contributions": "42;49;24;46", "wc_strengths": "5;4;51;34", "wc_improvement": "5;4;158;111", "wc_limitations": "33;4;7;2", "wc_correctness": "10;4;26;12", "wc_clarity": "6;4;7;12", "wc_relation_to_prior_work": "5;4;5;7", "wc_documentation": "12;4;7;4", "wc_additional_feedback": "1;1;1;1", "wc_review": "119;78;286;229", "wc_reply_reviewers": "0;0;42;0", "wc_reply_authors": "66;0;28;0", "reply_reviewers": "0;0;1;0", "reply_authors": "4;1;2;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "wc_summary_and_contributions_avg": [ 40.25, 9.705024471890836 ], "wc_strengths_avg": [ 23.5, 19.93113142799475 ], "wc_improvement_avg": [ 69.5, 67.09135562797938 ], "wc_limitations_avg": [ 11.5, 12.539936203984453 ], "wc_correctness_avg": [ 13.0, 8.06225774829855 ], "wc_clarity_avg": [ 7.25, 2.947456530637899 ], "wc_relation_to_prior_work_avg": [ 5.25, 1.0897247358851685 ], "wc_documentation_avg": [ 6.75, 3.2691742076555053 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 178.0, 83.28565302619654 ], "wc_reply_reviewers_avg": [ 10.5, 18.186533479473212 ], "wc_reply_authors_avg": [ 23.5, 27.069355367278327 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4680953024156911489&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "cl.cam.ac.uk;;;cam.ac.uk;cam.ac.uk;cam.ac.uk;iau.edu.sa;;cam.ac.uk", "author_num": 9, "aff_unique_index": "0;0;0;0;1;0", "aff_unique_norm": "University of Cambridge;Imam Abdulrahman Bin Faisal University", "aff_unique_dep": "Computer Laboratory;", "aff_unique_url": "https://www.cl.cam.ac.uk;https://www.ibfu.edu.sa", "aff_unique_abbr": "CL;", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;0;0;0;1;0", "aff_country_unique": "United Kingdom;Saudi Arabia" }, { "title": "Curriculum Fine-tuning of Vision Foundation Model for Medical Image Classification Under Label Noise", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93198", "id": "vYUx8j5KK2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=vYUx8j5KK2", "openreview": "https://openreview.net/forum?id=vYUx8j5KK2", "poster": "/media/PosterPDFs/NeurIPS%202024/93198.png?t=1732951208.764191", "project": "", "author_site": "Yeonguk Yu, Minhwan Ko, Sungho Shin, Kangmin Kim, Kyoobin Lee", "tldr": "", "abstract": "Deep neural networks have demonstrated remarkable performance in various vision tasks, but their success heavily depends on the quality of the training data. Noisy labels are a critical issue in medical datasets and can significantly degrade model performance. Previous clean sample selection methods have not utilized the well pre-trained features of vision foundation models (VFMs) and assumed that training begins from scratch. In this paper, we propose CUFIT, a curriculum fine-tuning paradigm of VFMs for medical image classification under label noise. Our method is motivated by the fact that linear probing of VFMs is relatively unaffected by noisy samples, as it does not update the feature extractor of the VFM, thus robustly classifying the training samples. Subsequently, curriculum fine-tuning of two adapters is conducted, starting with clean sample selection from the linear probing phase. Our experimental results demonstrate that CUFIT outperforms previous methods across various medical image benchmarks. Specifically, our method surpasses previous baselines by 5.0\\%, 2.1\\%, 4.6\\%, and 5.8\\% at a 40\\% noise rate on the HAM10000, APTOS-2019, BloodMnist, and OrgancMnist datasets, respectively. Furthermore, we provide extensive analyses to demonstrate the impact of our method on noisy label detection. For instance, our method shows higher label precision and recall compared to previous approaches. Our work highlights the potential of leveraging VFMs in medical image classification under challenging conditions of noisy labels.", "keywords": "Learning with noisy label;medical image classification", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "/attachment/ce53cc85637bcaefe46b51ee2e5002314c6d2f0a.zip", "author": "Yeonguk Yu;Minhwan Ko;Sungho Shin;Kangmin Kim;Kyoobin Lee", "authorids": "~Yeonguk_Yu1;~Minhwan_Ko1;~Sungho_Shin3;~Kangmin_Kim2;~Kyoobin_Lee2", "gender": "M;M;M;M;", "homepage": ";;;;https://ailab.gist.ac.kr", "dblp": "253/5861;;;;", "google_scholar": "https://scholar.google.com/citations?hl=en;;-O_DjyEAAAAJ;;QVihy5MAAAAJ", "orcid": "0000-0003-2147-4718;0000-0002-6811-9513;;0000-0001-9974-9430;", "linkedin": ";;;;", "or_profile": "~Yeonguk_Yu1;~Minhwan_Ko1;~Sungho_Shin3;~Kangmin_Kim2;~Kyoobin_Lee2", "aff": "Gwangju Institute of Science and Technology;Gwangju Institute of Science and Technology;Gwangju Institute of Science and Technology;Gwangju Institute of Science and Technology;Gwangju Institute of Science and Technology", "aff_domain": "gist.ac.kr;gist.ac.kr;gist.ac.kr;gist.ac.kr;gist.ac.kr", "position": "PhD student;PhD student;PhD student;MS student;Associate Professor", "bibtex": "@inproceedings{\nyu2024curriculum,\ntitle={Curriculum Fine-tuning of Vision Foundation Model for Medical Image Classification Under Label Noise},\nauthor={Yeonguk Yu and Minhwan Ko and Sungho Shin and Kangmin Kim and Kyoobin Lee},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=vYUx8j5KK2}\n}", "github": "", "reviewers": "tyjY;T7UR;8Ng4;1c7f", "pdf_size": 1255505, "rating": "4;6;6;7", "confidence": "4;5;3;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;2;3;3", "wc_summary": "43;44;80;49", "wc_strengths": "54;45;5;52", "wc_weaknesses": "308;30;111;35", "wc_questions": "61;299;111;14", "wc_limitations": "5;6;1;1", "wc_review": "471;424;308;151", "wc_reply_reviewers": "18;72;0;21", "wc_reply_authors": "22;38;0;19", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;1;2", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 54.0, 15.182226450688976 ], "wc_strengths_avg": [ 39.0, 19.912307751739878 ], "wc_weaknesses_avg": [ 121.0, 112.63436420560113 ], "wc_questions_avg": [ 121.25, 108.20437837721725 ], "wc_limitations_avg": [ 3.25, 2.277608394786075 ], "wc_review_avg": [ 338.5, 123.44330682544113 ], "wc_reply_reviewers_avg": [ 27.75, 26.78035660703569 ], "wc_reply_authors_avg": [ 19.75, 13.497684986693088 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:i58Mzr1YPZ8J:scholar.google.com/&scioq=Curriculum+Fine-tuning+of+Vision+Foundation+Model+for+Medical+Image+Classification+Under+Label+Noise&hl=en&as_sdt=0,39", "gs_version_total": 5, "email": "gist.ac.kr;gist.ac.kr;gist.ac.kr;gist.ac.kr;gist.ac.kr", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Gwangju Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.gist.ac.kr", "aff_unique_abbr": "GIST", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Gwangju", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "vYmvgxpgwH", "title": "An Empirical Analysis of Compute-Optimal Inference for Problem-Solving with Language Models", "track": "main", "status": "Reject", "tldr": "", "abstract": "The optimal training configurations of large language models (LLMs) with respect to model sizes and compute budgets have been extensively studied. But how to optimally configure LLMs during inference has not been explored in sufficient depth. We study compute-optimal inference: designing models and inference strategies that optimally trade off additional inference-time compute for improved performance. As a first step towards understanding and designing compute-optimal inference methods, we assessed the effectiveness and computational efficiency of multiple inference strategies such as Greedy Search, Majority Voting, Best-of-N, Weighted Voting, and their variants on two different Tree Search algorithms, involving different model sizes (e.g., 7B and 34B) and computational budgets. We found that a smaller language model with a novel tree search algorithm typically achieves a Pareto-optimal trade-off. These results highlight the potential benefits of deploying smaller models equipped with more sophisticated decoding algorithms in end-devices to enhance problem-solving accuracy. For instance, we show that the Llemma-7B model can achieve competitive accuracy to a Llemma-34B model on MATH500 while using 2\u00d7 less FLOPs. Our findings could potentially apply to any generation task with a well-defined measure of success.", "keywords": "Tree Search;Language Model Problem-Solving;Compute-Optimal Scaling", "primary_area": "generative_models", "supplementary_material": "", "author": "Yangzhen Wu;Zhiqing Sun;Shanda Li;Sean Welleck;Yiming Yang", "authorids": "~Yangzhen_Wu1;~Zhiqing_Sun1;~Shanda_Li1;~Sean_Welleck1;~Yiming_Yang1", "gender": "M;M;M;;F", "homepage": "https://thu-wyz.github.io/;https://www.cs.cmu.edu/~zhiqings/;https://lithiumda.github.io/;;http://www.cs.cmu.edu/~yiming/", "dblp": ";211/7692;295/9278;;25/1666", "google_scholar": ";https://scholar.google.com/citations?hl=en;;;MlZq4XwAAAAJ", "orcid": ";;;;0000-0001-8322-607X", "linkedin": ";zhiqing-sun-5781b3100/;;;yiming-yang-24100924/", "or_profile": "~Yangzhen_Wu1;~Zhiqing_Sun1;~Shanda_Li1;~Sean_Welleck1;~Yiming_Yang1", "aff": "Computer Science Department, Stanford University;Carnegie Mellon University;Google;;School of Computer Science, Carnegie Mellon University", "aff_domain": "cs.stanford.edu;cs.cmu.edu;google.com;;cs.cmu.edu", "position": "Intern;PhD student;Intern;;Full Professor", "bibtex": "@misc{\nanonymous2024an,\ntitle={An Empirical Analysis of Compute-Optimal Inference for Problem-Solving with Language Models},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=vYmvgxpgwH}\n}", "github": "", "project": "", "reviewers": "PMgx;pgJ7;drh5", "site": "https://openreview.net/forum?id=vYmvgxpgwH", "pdf_size": 1691137, "rating": "4;5;6", "confidence": "4;2;2", "soundness": "2;3;3", "novelty": "2;2;2", "presentation": "2;2;3", "wc_summary": "42;138;116", "wc_strengths": "42;36;43", "wc_weaknesses": "285;126;72", "wc_questions": "2;56;1", "wc_limitations": "2;6;61", "wc_review": "373;362;293", "wc_reply_reviewers": "64;85;5", "wc_reply_authors": "783;110;0", "reply_reviewers": "1;1;1", "reply_authors": "3;3;1", "rating_avg": [ 5.0, 0.816496580927726 ], "confidence_avg": [ 2.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 98.66666666666667, 41.063636251825315 ], "wc_strengths_avg": [ 40.333333333333336, 3.091206165165235 ], "wc_weaknesses_avg": [ 161.0, 90.41017641836565 ], "wc_questions_avg": [ 19.666666666666668, 25.69478978746902 ], "wc_limitations_avg": [ 23.0, 26.919633479426622 ], "wc_review_avg": [ 342.6666666666667, 35.405586502069916 ], "wc_reply_reviewers_avg": [ 51.333333333333336, 33.86574801903671 ], "wc_reply_authors_avg": [ 297.6666666666667, 346.1082040184672 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.3333333333333335, 0.9428090415820634 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8660254037844387, "gs_citation": 38, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=258232428087186881&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "Stanford University;Carnegie Mellon University;Google", "aff_unique_dep": "Computer Science Department;;Google", "aff_unique_url": "https://www.stanford.edu;https://www.cmu.edu;https://www.google.com", "aff_unique_abbr": "Stanford;CMU;Google", "aff_campus_unique_index": "0;2;3", "aff_campus_unique": "Stanford;;Mountain View;Pittsburgh", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Template-free Articulated Gaussian Splatting for Real-time Reposable Dynamic View Synthesis", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93197", "id": "vcGEV6m5m2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=vcGEV6m5m2", "openreview": "https://openreview.net/forum?id=vcGEV6m5m2", "poster": "/media/PosterPDFs/NeurIPS%202024/93197.png?t=1731723868.5577202", "project": "", "author_site": "Diwen Wan, Yuxiang Wang, Ruijie Lu, Gang Zeng", "tldr": "", "abstract": "While novel view synthesis for dynamic scenes has made significant progress, capturing skeleton models of objects and re-posing them remains a challenging task. To tackle this problem, in this paper, we propose a novel approach to automatically discover the associated skeleton model for dynamic objects from videos without the need for object-specific templates. Our approach utilizes 3D Gaussian Splatting and superpoints to reconstruct dynamic objects. Treating superpoints as rigid parts, we can discover the underlying skeleton model through intuitive cues and optimize it using the kinematic model. Besides, an adaptive control strategy is applied to avoid the emergence of redundant superpoints. Extensive experiments demonstrate the effectiveness and efficiency of our method in obtaining re-posable 3D objects. Not only can our approach achieve excellent visual fidelity, but it also allows for the real-time rendering of high-resolution images.", "keywords": "Gaussian Splatting;View Synthesis;Skeleton Discovery", "primary_area": "machine_vision", "supplementary_material": "/attachment/d429c7e04316cca6211055e365343b99475db2fd.zip", "author": "Diwen Wan;Yuxiang Wang;Ruijie Lu;Gang Zeng", "authorids": "~Diwen_Wan1;~Yuxiang_Wang4;~Ruijie_Lu1;~Gang_Zeng1", "gender": "M;;;M", "homepage": ";https://github.com/AZUO-jpg;https://jason-aplp.github.io/Ruijie-Lu/;https://www.cis.pku.edu.cn/info/1177/1378.htm", "dblp": "227/6394;;125/9394;", "google_scholar": "gWWaiWYAAAAJ;;wxo8_VYAAAAJ;RuHyY6gAAAAJ", "orcid": "0000-0002-3640-0511;;;", "linkedin": ";;Ruijie122/;", "or_profile": "~Diwen_Wan1;~Yuxiang_Wang4;~Ruijie_Lu1;~Gang_Zeng1", "aff": "Peking University;Peking University;Peking University;Peking University", "aff_domain": "pku.edu.cn;stu.pku.edu.cn;pku.edu.cn;pku.edu.cn", "position": "PhD student;Undergrad student;PhD student;Researcher", "bibtex": "@inproceedings{\nwan2024templatefree,\ntitle={Template-free Articulated Gaussian Splatting for Real-time Reposable Dynamic View Synthesis},\nauthor={Diwen Wan and Yuxiang Wang and Ruijie Lu and Gang Zeng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=vcGEV6m5m2}\n}", "github": "", "reviewers": "57cM;m4oH;XcTH;xZeD", "pdf_size": 14489031, "rating": "5;6;7;7", "confidence": "4;4;4;4", "soundness": "2;3;3;3", "novelty": "2;3;3;4", "presentation": "3;3;4;4", "wc_summary": "35;105;55;271", "wc_strengths": "41;91;48;62", "wc_weaknesses": "90;170;246;133", "wc_questions": "47;115;174;105", "wc_limitations": "7;29;39;38", "wc_review": "220;510;562;609", "wc_reply_reviewers": "395;60;51;84", "wc_reply_authors": "779;0;0;0", "reply_reviewers": "3;1;1;1", "reply_authors": "4;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 116.5, 92.77257137753594 ], "wc_strengths_avg": [ 60.5, 19.1637678967368 ], "wc_weaknesses_avg": [ 159.75, 57.28165063962455 ], "wc_questions_avg": [ 110.25, 45.04095358670818 ], "wc_limitations_avg": [ 28.25, 12.871965661856 ], "wc_review_avg": [ 475.25, 151.4717382880384 ], "wc_reply_reviewers_avg": [ 147.5, 143.4024058375591 ], "wc_reply_authors_avg": [ 194.75, 337.3168947740389 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14199866367950383167&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "pku.edu.cn;stu.pku.edu.cn;pku.edu.cn;pku.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Homology Consistency Constrained Efficient Tuning for Vision-Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93196", "id": "veMnGKXvTx", "proceeding": "", "pdf": "https://openreview.net/pdf?id=veMnGKXvTx", "openreview": "https://openreview.net/forum?id=veMnGKXvTx", "poster": "/media/PosterPDFs/NeurIPS%202024/93196.png?t=1731329127.1206052", "project": "", "author_site": "Huatian Zhang, Lei Zhang, Yongdong Zhang, Zhendong Mao", "tldr": "", "abstract": "Efficient transfer learning has shown remarkable performance in tuning large-scale vision-language models (VLMs) toward downstream tasks with limited data resources. The key challenge of efficient transfer lies in adjusting image-text alignment to be task-specific while preserving pre-trained general knowledge. However, existing methods adjust image-text alignment merely on a set of observed samples, e.g., data set and external knowledge base, which cannot guarantee to keep the correspondence of general concepts between image and text latent manifolds without being disrupted and thereby a weak generalization of the adjusted alignment. In this work, we propose a Homology Consistency (HC) constraint for efficient transfer on VLMs, which explicitly constrains the correspondence of image and text latent manifolds through structural equivalence based on persistent homology in downstream tuning. Specifically, we build simplicial complex on the top of data to mimic the topology of latent manifolds, then track the persistence of the homology classes of topological features across multiple scales, and guide the directions of persistence tracks in image and text manifolds to coincide each other, with a deviating perturbation additionally. For practical application, we tailor the implementation of our proposed HC constraint for two main paradigms of adapter tuning. Extensive experiments on few-shot learning over 11 datasets and domain generalization demonstrate the effectiveness and robustness of our method.", "keywords": "Efficient Transfer Learning;Vision-Language Models;Persistent Homology;Topological Data Analysis", "primary_area": "machine_vision", "supplementary_material": "", "author": "Huatian Zhang;Lei Zhang;Yongdong Zhang;Zhendong Mao", "authorids": "~Huatian_Zhang1;~Lei_Zhang54;~Yongdong_Zhang2;~Zhendong_Mao1", "gender": "M;;M;", "homepage": ";;https://imcc.ustc.edu.cn/_upload/tpl/0d/13/3347/template3347/zhangyongdong.html;", "dblp": "324/0507-1;;z/YongdongZhang;", "google_scholar": ";;https://scholar.google.com.hk/citations?user=hxGs4ukAAAAJ;", "orcid": "0000-0002-9967-1992;;0000-0003-0066-3448;", "linkedin": ";;;", "or_profile": "~Huatian_Zhang1;~Lei_Zhang54;~Yongdong_Zhang2;~Zhendong_Mao1", "aff": "University of Science and Technology of China;;University of Science and Technology of China;", "aff_domain": "ustc.edu.cn;;ustc.edu.cn;", "position": "PhD student;;Full Professor;", "bibtex": "@inproceedings{\nzhang2024homology,\ntitle={Homology Consistency Constrained Efficient Tuning for Vision-Language Models},\nauthor={Huatian Zhang and Lei Zhang and Yongdong Zhang and Zhendong Mao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=veMnGKXvTx}\n}", "github": "", "reviewers": "yCSE;zhsp;KyVk;Ls33", "pdf_size": 2654727, "rating": "3;5;5;6", "confidence": "4;3;3;4", "soundness": "2;2;3;3", "novelty": "2;2;2;2", "presentation": "2;2;3;3", "wc_summary": "34;92;77;47", "wc_strengths": "24;37;90;23", "wc_weaknesses": "83;43;46;107", "wc_questions": "2;43;80;39", "wc_limitations": "8;7;1;15", "wc_review": "151;222;294;231", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 4.75, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 62.5, 23.092206477510977 ], "wc_strengths_avg": [ 43.5, 27.408940147331492 ], "wc_weaknesses_avg": [ 69.75, 26.65872277510684 ], "wc_questions_avg": [ 41.0, 27.613402542968153 ], "wc_limitations_avg": [ 7.75, 4.968651728587948 ], "wc_review_avg": [ 224.5, 50.69763308084511 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.2294157338705618, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Buxrwx2oZjEJ:scholar.google.com/&scioq=Homology+Consistency+Constrained+Efficient+Tuning+for+Vision-Language+Models&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": "ustc.edu.cn;;ustc.edu.cn;", "author_num": 4, "aff_unique_index": "0;0", "aff_unique_norm": "University of Science and Technology of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ustc.edu.cn", "aff_unique_abbr": "USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "MARVEL: Multidimensional Abstraction and Reasoning through Visual Evaluation and Learning", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97456", "id": "vecFROHnL4", "proceeding": "", "pdf": "https://openreview.net/pdf?id=vecFROHnL4", "openreview": "https://openreview.net/forum?id=vecFROHnL4", "poster": "", "project": "", "author_site": "Yifan Jiang, jiarui zhang, Kexuan Sun, Zhivar Sourati Hassan Zadeh, Kian Ahrabian, Kaixin Ma, Filip Ilievski, Jay Pujara", "tldr": "", "abstract": "While multi-modal large language models (MLLMs) have shown significant progress across popular visual reasoning benchmarks, whether they possess abstract visual reasoning abilities remains an open question. Similar to the Sudoku puzzles, abstract visual reasoning (AVR) problems require finding high-level patterns (e.g., repetition constraints on numbers) that control the input shapes (e.g., digits) in a specific task configuration (e.g., matrix). However, existing AVR benchmarks only consider a limited set of patterns (addition, conjunction), input shapes (rectangle, square), and task configurations (3 \u00d7 3 matrices). And they fail to capture all abstract reasoning patterns in human cognition necessary for addressing real-world tasks, such as geometric properties and object boundary understanding in real-world navigation. To evaluate MLLMs\u2019 AVR abilities systematically, we introduce MARVEL founded on the core knowledge system in human cognition, a multi-dimensional AVR benchmark with 770 puzzles composed of six core knowledge patterns, geometric and abstract shapes, and five different task configurations. To inspect whether the model performance is grounded in perception or reasoning, MARVEL complements the standard AVR question with perception questions in a hierarchical evaluation framework. We conduct comprehensive experiments on MARVEL with ten representative MLLMs in zero-shot and few-shot settings. Our experiments reveal that all MLLMs show near-random performance on MARVEL, with significant performance gaps (40%) compared to humans across all patterns and task configurations. Further analysis of perception questions reveals that MLLMs struggle to comprehend the visual features (near-random performance). Although closed-source MLLMs, such as GPT-4V, show a promising understanding of reasoning patterns (on par with humans) after adding textual descriptions, this advantage is hindered by their weak perception abilities. We release our entire\ncode and dataset at https://github.com/1171-jpg/MARVEL_AVR.", "keywords": "Abstract Visual Reasoning;Benchmark;Multi-modal large language model;Evaluation", "primary_area": "", "supplementary_material": "/attachment/c5914fd5f87582fe6ddda9c87fb44f624dcf214a.zip", "author": "Yifan Jiang;Jiarui Zhang;Kexuan Sun;Zhivar Sourati;Kian Ahrabian;Kaixin Ma;Filip Ilievski;Jay Pujara", "authorids": "~Yifan_Jiang4;~Jiarui_Zhang2;~Kexuan_Sun1;~Zhivar_Sourati1;~Kian_Ahrabian1;~Kaixin_Ma1;~Filip_Ilievski1;~Jay_Pujara1", "gender": "M;M;F;M;M;;M;", "homepage": "https://yifanjiang-921.github.io//;https://saccharomycetes.github.io/;http://www.kianasun.com;https://zhpinkman.github.io/;;;http://www.ilievski.info;https://www.jaypujara.org", "dblp": ";194/0368-2;;317/2968;211/6774;203/9347;167/4770;65/10103", "google_scholar": "npRM7lYAAAAJ;rM4hgN8AAAAJ;;giqWNAwAAAAJ;pwUdiCYAAAAJ;gDIMQp4AAAAJ;4ZScBc0AAAAJ;yvdSr4AAAAAJ", "orcid": "0000-0003-2851-9210;0009-0002-7294-541X;;0000-0003-2129-6165;;;;0000-0001-6921-1744", "linkedin": "yifan-jiang-29199122a/;;;zhivarsourati/;kahrabian/;;;pujara", "or_profile": "~Yifan_Jiang4;~Jiarui_Zhang2;~Kexuan_Sun1;~Zhivar_Sourati1;~Kian_Ahrabian1;~Kaixin_Ma1;~Filip_Ilievski1;~Jay_Pujara1", "aff": "Information Sciences Institute, University of Southern California;University of Southern California;Amazon;University of Southern California;Microsoft;Tencent AI Lab;Vrije Universiteit Amsterdam;University of Southern California", "aff_domain": "isi.edu;usc.edu;amazon.com;usc.edu;microsoft.com;tencent.com;vu.nl;usc.edu", "position": "PhD student;PhD student;Applied Scientist;PhD student;Intern;Researcher;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\njiang2024marvel,\ntitle={{MARVEL}: Multidimensional Abstraction and Reasoning through Visual Evaluation and Learning},\nauthor={Yifan Jiang and Jiarui Zhang and Kexuan Sun and Zhivar Sourati and Kian Ahrabian and Kaixin Ma and Filip Ilievski and Jay Pujara},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=vecFROHnL4}\n}", "github": "", "reviewers": "haaJ;mxzQ;PeKX", "pdf_size": 1330297, "rating": "6;7;7", "confidence": "3;4;3", "wc_summary_and_contributions": "137;81;118", "wc_strengths": "154;70;151", "wc_improvement": "208;285;303", "wc_limitations": "56;22;5", "wc_correctness": "28;10;1", "wc_clarity": "32;12;22", "wc_relation_to_prior_work": "19;16;8", "wc_documentation": "9;5;1", "wc_additional_feedback": "1;1;1", "wc_review": "644;502;610", "wc_reply_reviewers": "0;142;217", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;1;1", "reply_authors": "1;3;4", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 112.0, 23.25224003546038 ], "wc_strengths_avg": [ 125.0, 38.91015291668744 ], "wc_improvement_avg": [ 265.3333333333333, 41.20140234938073 ], "wc_limitations_avg": [ 27.666666666666668, 21.202725191719 ], "wc_correctness_avg": [ 13.0, 11.224972160321824 ], "wc_clarity_avg": [ 22.0, 8.16496580927726 ], "wc_relation_to_prior_work_avg": [ 14.333333333333334, 4.642796092394707 ], "wc_documentation_avg": [ 5.0, 3.265986323710904 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 585.3333333333334, 60.53832578531462 ], "wc_reply_reviewers_avg": [ 119.66666666666667, 89.98641872835898 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 1.247219128924647 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14465606146703360057&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "isi.edu;usc.edu;amazon.com;usc.edu;microsoft.com;tencent.com;vu.nl;usc.edu", "author_num": 8, "aff_unique_index": "0;0;1;0;2;3;4;0", "aff_unique_norm": "University of Southern California;Amazon;Microsoft;Tencent;Vrije Universiteit Amsterdam", "aff_unique_dep": "Information Sciences Institute;Amazon.com, Inc.;Microsoft Corporation;Tencent AI Lab;", "aff_unique_url": "https://www.usc.edu;https://www.amazon.com;https://www.microsoft.com;https://ai.tencent.com;https://www.vu.nl", "aff_unique_abbr": "USC;Amazon;Microsoft;Tencent AI Lab;VU Amsterdam", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;0;0;0;0;1;2;0", "aff_country_unique": "United States;China;Netherlands" }, { "title": "ComBack: A Versatile Dataset for Enhancing Compiler Backend Development Efficiency", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97455", "id": "vfju5hjrJw", "proceeding": "", "pdf": "https://openreview.net/pdf?id=vfju5hjrJw", "openreview": "https://openreview.net/forum?id=vfju5hjrJw", "poster": "/media/PosterPDFs/NeurIPS%202024/97455.png?t=1728267172.1819987", "project": "", "author_site": "Ming Zhong, FANG LYU, Lulin Wang, Hongna Geng, Lei Qiu, Huimin Cui, Xiaobing Feng", "tldr": "", "abstract": "Compiler backends are tasked with generating executable machine code for processors. With the proliferation of diverse processors, it is imperative for programmers to tailor specific compiler backends to accommodate each one. Meanwhile, compiler backend development is a laborious and time-consuming task, lacking effective automation methods. Although language models have demonstrated strong abilities in code related tasks, the lack of appropriate datasets for compiler backend development limits the application of language models in this field.\n\nIn this paper, we introduce ComBack, the first public dataset designed for improving compiler backend development capabilities of language models. ComBack includes 178 backends for mainstream compilers and three tasks including statement-level completion, next-statement suggestion and code generation, representing common development scenarios. We conducted experiments by fine-tuning six pre-trained language models with ComBack, demonstrating its effectiveness in enhancing model accuracy across the three tasks. We further evaluated the top-performing model(CodeT5+) across the three tasks for new targets, comparing its accuracy with conventional methods (Fork-Flow), ChatGPT-3.5-Turbo, and Code-LLaMA-34B-Instruct. Remarkably, fine-tuned CodeT5+ with only 220M parameters on ComBack outperformed Fork-Flow methods significantly and surpassed ChatGPT and Code-LLaMA. This suggests potential efficiency improvements in compiler development. ComBack is avaliable at https://huggingface.co/datasets/docz1105/ComBack.", "keywords": "Compiler Backend; Code Dataset; Code Generation", "primary_area": "", "supplementary_material": "/attachment/d924ab7856f4fa638a6aec44b3a1f4256d297ac6.zip", "author": "Ming Zhong;FANG LYU;Lulin Wang;Hongna Geng;Lei Qiu;Huimin Cui;Xiaobing Feng", "authorids": "~Ming_Zhong9;~FANG_LYU1;~Lulin_Wang1;~Hongna_Geng2;~Lei_Qiu1;~Huimin_Cui1;~Xiaobing_Feng3", "gender": "M;F;M;F;F;F;M", "homepage": "https://huggingface.co/docz1105;https://lvfang1109.github.io/;https://github.com/MrLinWang;https://github.com/orangehn;https://github.com/ict-ql;https://cuihuimin.github.io/;", "dblp": ";;;;;97/2112.html;f/XiaobingFeng2.html", "google_scholar": "MA5nMrkAAAAJ;;;;;;", "orcid": ";;;;;;", "linkedin": ";;;;;;", "or_profile": "~Ming_Zhong9;~FANG_LYU1;~Lulin_Wang1;~Hongna_Geng2;~Lei_Qiu1;~Huimin_Cui1;~Xiaobing_Feng3", "aff": "Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;Chinese Academy of Sciences;University of Chinese Academy of Sciences;;ICT, Chinese Academy of Sciences", "aff_domain": "ict.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn;ucas.ac.cn;;ict.ac.cn", "position": "MS student;Associate Professor;Researcher;PhD student;PhD student;;Full Professor", "bibtex": "@inproceedings{\nzhong2024comback,\ntitle={ComBack: A Versatile Dataset for Enhancing Compiler Backend Development Efficiency},\nauthor={Ming Zhong and FANG LYU and Lulin Wang and Hongna Geng and Lei Qiu and Huimin Cui and Xiaobing Feng},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=vfju5hjrJw}\n}", "github": "", "reviewers": "hMKy;HjXi;3S26;MbzQ", "pdf_size": 1132760, "rating": "7;7;7;8", "confidence": "4;4;4;3", "wc_summary_and_contributions": "92;195;309;106", "wc_strengths": "31;279;455;91", "wc_improvement": "186;113;588;143", "wc_limitations": "37;137;181;46", "wc_correctness": "63;39;136;7", "wc_clarity": "62;17;132;65", "wc_relation_to_prior_work": "51;160;97;7", "wc_documentation": "40;85;29;17", "wc_additional_feedback": "1;1;1;1", "wc_review": "563;1026;1928;483", "wc_reply_reviewers": "15;0;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 175.5, 86.60975695613052 ], "wc_strengths_avg": [ 214.0, 166.52627420320195 ], "wc_improvement_avg": [ 257.5, 192.57011710023963 ], "wc_limitations_avg": [ 100.25, 60.857928817862344 ], "wc_correctness_avg": [ 61.25, 47.50986739615256 ], "wc_clarity_avg": [ 69.0, 41.04266073246227 ], "wc_relation_to_prior_work_avg": [ 78.75, 56.684984784332435 ], "wc_documentation_avg": [ 42.75, 25.713566458194787 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 1000.0, 574.4819405342521 ], "wc_reply_reviewers_avg": [ 3.75, 6.49519052838329 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:bzTOewV8eZcJ:scholar.google.com/&scioq=ComBack:+A+Versatile+Dataset+for+Enhancing+Compiler+Backend+Development+Efficiency&hl=en&as_sdt=0,33", "gs_version_total": 2, "email": "ict.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn;ucas.ac.cn;;ict.ac.cn", "author_num": 7, "aff_unique_index": "0;0;0;0;1;2", "aff_unique_norm": "Chinese Academy of Sciences;University of Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences", "aff_unique_dep": "Institute of Computing Technology;;", "aff_unique_url": "http://www.ict.ac.cn;http://www.ucas.ac.cn;http://www.ict.cas.cn", "aff_unique_abbr": "CAS;UCAS;ICT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Can We Leave Deepfake Data Behind in Training Deepfake Detector?", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93195", "id": "vh9yEPLeyD", "proceeding": "", "pdf": "https://openreview.net/pdf?id=vh9yEPLeyD", "openreview": "https://openreview.net/forum?id=vh9yEPLeyD", "poster": "/media/PosterPDFs/NeurIPS%202024/93195.png?t=1732193174.2826505", "project": "", "author_site": "Jikang Cheng, Zhiyuan Yan, Ying Zhang, Yuhao Luo, Zhongyuan Wang, Chen Li", "tldr": "", "abstract": "The generalization ability of deepfake detectors is vital for their applications in real-world scenarios. One effective solution to enhance this ability is to train the models with manually-blended data, which we termed ''blendfake'', encouraging models to learn generic forgery artifacts like blending boundary. Interestingly, current SoTA methods utilize blendfake $\\textit{without}$ incorporating any deepfake data in their training process. This is likely because previous empirical observations suggest that vanilla hybrid training (VHT), which combines deepfake and blendfake data, results in inferior performance to methods using only blendfake data (so-called \"1+1<2\"). Therefore, a critical question arises: Can we leave deepfake behind and rely solely on blendfake data to train an effective deepfake detector? Intuitively, as deepfakes also contain additional informative forgery clues ($\\textit{e.g.,}$ deep generative artifacts), excluding all deepfake data in training deepfake detectors seems counter-intuitive. In this paper, we rethink the role of blendfake in detecting deepfakes and formulate the process from \"real to blendfake to deepfake\" to be a $\\textit{progressive transition}$. Specifically, blendfake and deepfake can be explicitly delineated as the oriented pivot anchors between \"real-to-fake\" transitions. The accumulation of forgery information should be oriented and progressively increasing during this transition process. To this end, we propose an $\\underline{O}$riented $\\underline{P}$rogressive $\\underline{R}$egularizor (OPR) to establish the constraints that compel the distribution of anchors to be discretely arranged. Furthermore, we introduce feature bridging to facilitate the smooth transition between adjacent anchors. Extensive experiments confirm that our design allows leveraging forgery information from both blendfake and deepfake effectively and comprehensively. Code is available at https://github.com/beautyremain/ProDet.", "keywords": "Deepfake Detection;Data Synthesis;Hybrid Training", "primary_area": "machine_vision", "supplementary_material": "", "author": "Jikang Cheng;Zhiyuan Yan;Ying Zhang;Yuhao Luo;Zhongyuan Wang;Chen Li", "authorids": "~Jikang_Cheng1;~Zhiyuan_Yan3;~Ying_Zhang9;~Yuhao_Luo4;~Zhongyuan_Wang4;~Chen_Li11", "gender": "M;M;F;M;;", "homepage": ";https://yzy-stack.github.io/;https://yingzhangdut.github.io/;;;", "dblp": ";56/6499-2;13/6769-21;;;", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;R_psgxkAAAAJ;;;", "orcid": "0000-0001-6549-6148;0009-0002-7242-5828;;0009-0006-8828-3608;;", "linkedin": ";;;yuhao-luo-039982299;;", "or_profile": "~Jikang_Cheng1;~Zhiyuan_Yan3;~Ying_Zhang9;~Yuhao_Luo4;~Zhongyuan_Wang4;~Chen_Li11", "aff": "Wuhan University;Peking University;Tencent;The Chinese University of Hong Kong, Shenzhen;;", "aff_domain": "whu.edu.cn;stu.pku.edu.cn;tencent.com;cuhk.edu.hk;;", "position": "MS student;PhD student;Senior Researcher;MS student;;", "bibtex": "@inproceedings{\ncheng2024can,\ntitle={Can We Leave Deepfake Data Behind in Training Deepfake Detector?},\nauthor={Jikang Cheng and Zhiyuan Yan and Ying Zhang and Yuhao Luo and Zhongyuan Wang and Chen Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=vh9yEPLeyD}\n}", "github": "", "reviewers": "6DHe;t2ao;Ssp1;LWpa", "pdf_size": 2190016, "rating": "4;4;5;7", "confidence": "5;3;5;4", "soundness": "2;2;3;3", "novelty": "2;2;2;3", "presentation": "3;3;3;4", "wc_summary": "42;87;63;153", "wc_strengths": "51;30;37;25", "wc_weaknesses": "258;27;292;99", "wc_questions": "23;53;4;3", "wc_limitations": "1;38;4;3", "wc_review": "375;235;400;283", "wc_reply_reviewers": "46;38;0;0", "wc_reply_authors": "74;154;64;64", "reply_reviewers": "1;1;0;0", "reply_authors": "2;3;2;2", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 86.25, 41.69757187175291 ], "wc_strengths_avg": [ 35.75, 9.781998773256925 ], "wc_weaknesses_avg": [ 169.0, 109.67451846258547 ], "wc_questions_avg": [ 20.75, 20.253086184579377 ], "wc_limitations_avg": [ 11.5, 15.337861650177967 ], "wc_review_avg": [ 323.25, 67.03870150890454 ], "wc_reply_reviewers_avg": [ 21.0, 21.18962010041709 ], "wc_reply_authors_avg": [ 89.0, 37.749172176353746 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1387268034302380535&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "whu.edu.cn;stu.pku.edu.cn;tencent.com;cuhk.edu.hk;;", "author_num": 6, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Wuhan University;Peking University;Tencent;Chinese University of Hong Kong", "aff_unique_dep": ";;Tencent Holdings Limited;", "aff_unique_url": "http://www.whu.edu.cn/;http://www.pku.edu.cn;https://www.tencent.com;https://www.cuhk.edu.cn", "aff_unique_abbr": "WHU;Peking U;Tencent;CUHK", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Improved off-policy training of diffusion samplers", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93194", "id": "vieIamY2Gi", "proceeding": "", "pdf": "https://openreview.net/pdf?id=vieIamY2Gi", "openreview": "https://openreview.net/forum?id=vieIamY2Gi", "poster": "", "project": "", "author_site": "Marcin Sendera, Minsu Kim, Sarthak Mittal, Pablo Lemos, Luca Scimeca, Jarrid Rector-Brooks, Alexandre Adam, Yoshua Bengio, Nikolay Malkin", "tldr": "", "abstract": "We study the problem of training diffusion models to sample from a distribution with a given unnormalized density or energy function. We benchmark several diffusion-structured inference methods, including simulation-based variational approaches and off-policy methods (continuous generative flow networks). Our results shed light on the relative advantages of existing algorithms while bringing into question some claims from past work. We also propose a novel exploration strategy for off-policy methods, based on local search in the target space with the use of a replay buffer, and show that it improves the quality of samples on a variety of target distributions. Our code for the sampling methods and benchmarks studied is made public at [this link](https://github.com/GFNOrg/gfn-diffusion) as a base for future work on diffusion models for amortized inference.", "keywords": "diffusion models;amortized inference;stochastic control;GFlowNets", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Marcin Sendera;Minsu Kim;Sarthak Mittal;Pablo Lemos;Luca Scimeca;Jarrid Rector-Brooks;Alexandre Adam;Yoshua Bengio;Nikolay Malkin", "authorids": "~Marcin_Sendera1;~Minsu_Kim2;~Sarthak_Mittal1;~Pablo_Lemos1;~Luca_Scimeca1;~Jarrid_Rector-Brooks2;~Alexandre_Adam1;~Yoshua_Bengio1;~Nikolay_Malkin1", "gender": "M;M;M;M;M;M;;M;", "homepage": ";https://minsuukim.github.io/;https://sarthmit.github.io/;https://pablo-lemos.github.io;https://lucascimeca.com;;;http://yoshuabengio.org;", "dblp": "220/9876;;228/8275;313/2645;223/6396;230/4010;;56/953;", "google_scholar": "https://scholar.google.pl/citations?user=ScNBRmQAAAAJ;https://scholar.google.ca/citations?user=VvyLuhAAAAAJ;FGGgTrcAAAAJ;AklQTTsAAAAJ;fKJvAvMAAAAJ;gxRPZh4AAAAJ;Mmlh_CEAAAAJ;kukA0LcAAAAJ;", "orcid": "0000-0002-8741-6919;;;0000-0002-4728-8473;0000-0002-2821-0072;;0000-0001-8806-7936;;", "linkedin": "marcin-sendera-976516123/;;;;luca-scimeca/;;alexandre-adam-31b131160/;yoshuabengio/?originalSubdomain=ca;", "or_profile": "~Marcin_Sendera1;~Minsu_Kim2;~Sarthak_Mittal1;~Pablo_Lemos1;~Luca_Scimeca1;~Jarrid_Rector-Brooks2;~Alexandre_Adam1;~Yoshua_Bengio1;~Nikolay_Malkin1", "aff": "Jagiellonian University;Korea Advanced Institute of Science & Technology;University of Montreal;Universit\u00e9 de Montr\u00e9al;Montreal Institute for Learning Algorithms, University of Montreal, Universit\u00e9 de Montr\u00e9al;Montreal Institute for Learning Algorithms, University of Montreal, University of Montreal;Universit\u00e9 de Montr\u00e9al;University of Montreal;", "aff_domain": "uj.edu.pl;kaist.ac.kr;umontreal.ca;umontreal.ca;mila.umontreal.ca;mila.umontreal.ca;umontreal.ca;umontreal.ca;", "position": "PhD student;PhD student;PhD student;Postdoc;Postdoc;PhD student;PhD student;Full Professor;", "bibtex": "@inproceedings{\nsendera2024improved,\ntitle={Improved off-policy training of diffusion samplers},\nauthor={Marcin Sendera and Minsu Kim and Sarthak Mittal and Pablo Lemos and Luca Scimeca and Jarrid Rector-Brooks and Alexandre Adam and Yoshua Bengio and Nikolay Malkin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=vieIamY2Gi}\n}", "github": "", "reviewers": "fepi;6eYp;5SPJ;FAJh", "pdf_size": 2201244, "rating": "3;4;6;7", "confidence": "3;2;3;5", "soundness": "2;2;3;3", "novelty": "1;3;2;3", "presentation": "2;3;3;3", "wc_summary": "25;65;68;76", "wc_strengths": "16;87;64;115", "wc_weaknesses": "169;150;60;5", "wc_questions": "76;15;39;334", "wc_limitations": "3;7;2;4", "wc_review": "289;324;233;534", "wc_reply_reviewers": "115;0;79;417", "wc_reply_authors": "103;29;29;589", "reply_reviewers": "1;0;1;3", "reply_authors": "2;2;2;5", "rating_avg": [ 5.0, 1.5811388300841898 ], "confidence_avg": [ 3.25, 1.0897247358851685 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 58.5, 19.75474626513841 ], "wc_strengths_avg": [ 70.5, 36.28015986734347 ], "wc_weaknesses_avg": [ 96.0, 66.74953183356419 ], "wc_questions_avg": [ 116.0, 127.72431248591633 ], "wc_limitations_avg": [ 4.0, 1.8708286933869707 ], "wc_review_avg": [ 345.0, 113.84419177103415 ], "wc_reply_reviewers_avg": [ 152.75, 158.13344839090811 ], "wc_reply_authors_avg": [ 187.5, 233.7664432719119 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.7254762501100116, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11529359833882667277&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "uj.edu.pl;kaist.ac.kr;umontreal.ca;umontreal.ca;mila.umontreal.ca;mila.umontreal.ca;umontreal.ca;umontreal.ca;", "author_num": 9, "aff_unique_index": "0;1;2;3;2;2;3;2", "aff_unique_norm": "Jagiellonian University;Korea Advanced Institute of Science and Technology;University of Montreal;Universit\u00e9 de Montr\u00e9al", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.uj.edu.pl;https://www.kaist.ac.kr;https://wwwumontreal.ca;https://www.umontreal.ca", "aff_unique_abbr": "UJ;KAIST;UM;UdeM", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Montreal", "aff_country_unique_index": "0;1;2;2;2;2;2;2", "aff_country_unique": "Poland;South Korea;Canada" }, { "title": "Newton Losses: Using Curvature Information for Learning with Differentiable Algorithms", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93193", "id": "vjAORqq71s", "proceeding": "", "pdf": "https://openreview.net/pdf?id=vjAORqq71s", "openreview": "https://openreview.net/forum?id=vjAORqq71s", "poster": "/media/PosterPDFs/NeurIPS%202024/93193.png?t=1733861635.504515", "project": "", "author_site": "Felix Petersen, Christian Borgelt, Tobias Sutter, Hilde Kuehne, Oliver Deussen, Stefano Ermon", "tldr": "", "abstract": "When training neural networks with custom objectives, such as ranking losses and shortest-path losses, a common problem is that they are, per se, non-differentiable. A popular approach is to continuously relax the objectives to provide gradients, enabling learning. However, such differentiable relaxations are often non-convex and can exhibit vanishing and exploding gradients, making them (already in isolation) hard to optimize. Here, the loss function poses the bottleneck when training a deep neural network. We present Newton Losses, a method for improving the performance of existing hard to optimize losses by exploiting their second-order information via their empirical Fisher and Hessian matrices. Instead of training the neural network with second-order techniques, we only utilize the loss function's second-order information to replace it by a Newton Loss, while training the network with gradient descent. This makes our method computationally efficient. We apply Newton Losses to eight differentiable algorithms for sorting and shortest-paths, achieving significant improvements for less-optimized differentiable algorithms, and consistent improvements, even for well-optimized differentiable algorithms.", "keywords": "differentiable;empirical fisher;hessian;continuous;relaxations;accelerated training;newton's method;stochastic smoothing", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Felix Petersen;Christian Borgelt;Tobias Sutter;Hilde Kuehne;Oliver Deussen;Stefano Ermon", "authorids": "~Felix_Petersen1;~Christian_Borgelt1;~Tobias_Sutter1;~Hilde_Kuehne5;~Oliver_Deussen1;~Stefano_Ermon1", "gender": "Not Specified;M;M;F;M;M", "homepage": "http://www.petersen.ai/;https://www.borgelt.net/;https://sites.google.com/view/suttert/home;https://hildekuehne.github.io;https://graphics.uni-konstanz.de;http://cs.stanford.edu/~ermon/", "dblp": "230/3983;b/ChristianBorgelt.html;01/10961;45/4963;48/2158;47/8135", "google_scholar": "v8Kat6YAAAAJ;https://scholar.google.de/citations?user=T50Bxb8AAAAJ;https://scholar.google.ch/citations?user=11gxHJIAAAAJ;pxhCcH0AAAAJ;https://scholar.google.de/scholar?hl=en;", "orcid": ";;0000-0003-1226-6845;0000-0003-1079-4441;0000-0001-5803-2185;", "linkedin": ";christian-borgelt-a2429071/;;hilde-kuehne-8b9aa661;;", "or_profile": "~Felix_Petersen1;~Christian_Borgelt1;~Tobias_Sutter1;~Hilde_Kuehne5;~Oliver_Deussen1;~Stefano_Ermon1", "aff": "Stanford University;Paris-Lodron-University of Salzburg;Universit\u00e4t Konstanz;Rheinische Friedrich-Wilhelms-Universit\u00e4t Bonn, Rheinische Friedrich-Wilhelms Universit\u00e4t Bonn;University of Konstanz;Stanford University", "aff_domain": "stanford.edu;sbg.ac.at;uni-konstanz.de;cs.uni-bonn.de;uni-konstanz.de;stanford.edu", "position": "Postdoc;Full Professor;Assistant Professor;Associate Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\npetersen2024newton,\ntitle={Newton Losses: Using Curvature Information for Learning with Differentiable Algorithms},\nauthor={Felix Petersen and Christian Borgelt and Tobias Sutter and Hilde Kuehne and Oliver Deussen and Stefano Ermon},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=vjAORqq71s}\n}", "github": "", "reviewers": "tok2;qoSU;nJzH", "pdf_size": 634157, "rating": "4;5;6", "confidence": "5;2;3", "soundness": "3;2;3", "novelty": "2;3;3", "presentation": "3;2;3", "wc_summary": "29;74;79", "wc_strengths": "20;37;69", "wc_weaknesses": "24;213;33", "wc_questions": "102;2;184", "wc_limitations": "1;1;12", "wc_review": "176;327;377", "wc_reply_reviewers": "183;136;200", "wc_reply_authors": "455;141;83", "reply_reviewers": "1;2;1", "reply_authors": "2;3;2", "rating_avg": [ 5.0, 0.816496580927726 ], "confidence_avg": [ 3.3333333333333335, 1.247219128924647 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 60.666666666666664, 22.484562605386735 ], "wc_strengths_avg": [ 42.0, 20.314198646923455 ], "wc_weaknesses_avg": [ 90.0, 87.05170877128145 ], "wc_questions_avg": [ 96.0, 74.4222189044822 ], "wc_limitations_avg": [ 4.666666666666667, 5.185449728701348 ], "wc_review_avg": [ 293.3333333333333, 85.44133790046959 ], "wc_reply_reviewers_avg": [ 173.0, 27.067816067549053 ], "wc_reply_authors_avg": [ 226.33333333333334, 163.4163054560006 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.6546536707079771, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17410264126483629220&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "stanford.edu;sbg.ac.at;uni-konstanz.de;cs.uni-bonn.de;uni-konstanz.de;stanford.edu", "author_num": 6, "aff_unique_index": "0;1;2;3;4;0", "aff_unique_norm": "Stanford University;Paris-Lodron-University of Salzburg;Universit\u00e4t Konstanz;Rheinische Friedrich-Wilhelms-Universit\u00e4t Bonn;University of Konstanz", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.stanford.edu;https://www.uni-salzburg.at;https://www.uni-konstanz.de;https://www.uni-bonn.de;https://www.uni-konstanz.de", "aff_unique_abbr": "Stanford;PLUS;Uni Konstanz;Uni Bonn;Uni Konstanz", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;1;2;2;2;0", "aff_country_unique": "United States;Austria;Germany" }, { "title": "Bileve: Securing Text Provenance in Large Language Models Against Spoofing with Bi-level Signature", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93192", "id": "vjCFnYTg67", "proceeding": "", "pdf": "https://openreview.net/pdf?id=vjCFnYTg67", "openreview": "https://openreview.net/forum?id=vjCFnYTg67", "poster": "/media/PosterPDFs/NeurIPS%202024/93192.png?t=1733341513.7743566", "project": "", "author_site": "Tong Zhou, Xuandong Zhao, Xiaolin Xu, Shaolei Ren", "tldr": "", "abstract": "Text watermarks for large language models (LLMs) have been commonly used to identify the origins of machine-generated content, which is promising for assessing liability when combating deepfake or harmful content. While existing watermarking techniques typically prioritize robustness against removal attacks, unfortunately, they are vulnerable to spoofing attacks: malicious actors can subtly alter the meanings of LLM-generated responses or even forge harmful content, potentially misattributing blame to the LLM developer. To overcome this, we introduce a bi-level signature scheme, Bileve, which embeds fine-grained signature bits for integrity checks (mitigating spoofing attacks) as well as a coarse-grained signal to trace text sources when the signature is invalid (enhancing detectability) via a novel rank-based sampling strategy. Compared to conventional watermark detectors that only output binary results, Bileve can differentiate 5 scenarios during detection, reliably tracing text provenance and regulating LLMs. The experiments conducted on OPT-1.3B and LLaMA-7B demonstrate the effectiveness of Bileve in defeating spoofing attacks with enhanced detectability.", "keywords": "Large language model;Text Provenance;Spoofing attacks", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/bf094d4b2a2681c4060d01552d47e3f093379cd8.zip", "author": "Tong Zhou;Xuandong Zhao;Xiaolin Xu;Shaolei Ren", "authorids": "~Tong_Zhou3;~Xuandong_Zhao1;~Xiaolin_Xu3;~Shaolei_Ren1", "gender": "F;M;;", "homepage": "https://tongzhou0101.github.io/;https://xuandongzhao.github.io/;;", "dblp": "30/97-2;244/8033;;", "google_scholar": "6ghv7EwAAAAJ;CxeH4uoAAAAJ;;", "orcid": "0000-0002-8645-5246;;;", "linkedin": "tong-zhou-292a931b5/;xuandong-zhao-a3270610b/;;", "or_profile": "~Tong_Zhou3;~Xuandong_Zhao1;~Xiaolin_Xu3;~Shaolei_Ren1", "aff": "Northeastern University;UC Santa Barbara;;", "aff_domain": "neu.edu;ucsb.edu;;", "position": "PhD student;PhD student;;", "bibtex": "@inproceedings{\nzhou2024bileve,\ntitle={Bileve: Securing Text Provenance in Large Language Models Against Spoofing with Bi-level Signature},\nauthor={Tong Zhou and Xuandong Zhao and Xiaolin Xu and Shaolei Ren},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=vjCFnYTg67}\n}", "github": "", "reviewers": "4vVx;ggNL;6UZB;ezrE", "pdf_size": 1750755, "rating": "3;3;6;7", "confidence": "4;4;3;4", "soundness": "1;2;3;3", "novelty": "2;1;3;3", "presentation": "1;1;3;3", "wc_summary": "47;45;88;63", "wc_strengths": "15;29;52;71", "wc_weaknesses": "375;535;59;108", "wc_questions": "44;23;39;3", "wc_limitations": "32;13;15;3", "wc_review": "513;645;253;248", "wc_reply_reviewers": "281;153;12;27", "wc_reply_authors": "649;319;27;22", "reply_reviewers": "2;1;1;1", "reply_authors": "4;3;2;2", "rating_avg": [ 4.75, 1.7853571071357126 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.0, 1.0 ], "wc_summary_avg": [ 60.75, 17.210098779495716 ], "wc_strengths_avg": [ 41.75, 21.44032415799724 ], "wc_weaknesses_avg": [ 269.25, 194.94406248973064 ], "wc_questions_avg": [ 27.25, 16.005858302509115 ], "wc_limitations_avg": [ 15.75, 10.425329730996522 ], "wc_review_avg": [ 414.75, 170.76061460418794 ], "wc_reply_reviewers_avg": [ 118.25, 108.75517229079269 ], "wc_reply_authors_avg": [ 254.25, 257.6833085397655 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.4042260417272216, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4596148564248795133&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "neu.edu;ucsb.edu;;", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Northeastern University;University of California, Santa Barbara", "aff_unique_dep": ";", "aff_unique_url": "https://www.northeastern.edu;https://www.ucsb.edu", "aff_unique_abbr": "NEU;UCSB", "aff_campus_unique_index": "1", "aff_campus_unique": ";Santa Barbara", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "$\\epsilon$-Softmax: Approximating One-Hot Vectors for Mitigating Label Noise", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93191", "id": "vjsd8Bcipv", "proceeding": "", "pdf": "https://openreview.net/pdf?id=vjsd8Bcipv", "openreview": "https://openreview.net/forum?id=vjsd8Bcipv", "poster": "/media/PosterPDFs/NeurIPS%202024/93191.png?t=1729521755.7931905", "project": "", "author_site": "Jialiang Wang, Xiong Zhou, Deming Zhai, Junjun Jiang, Xiangyang Ji, Xianming Liu", "tldr": "", "abstract": "Noisy labels pose a common challenge for training accurate deep neural networks. To mitigate label noise, prior studies have proposed various robust loss functions to achieve noise tolerance in the presence of label noise, particularly symmetric losses. However, they usually suffer from the underfitting issue due to the overly strict symmetric condition. In this work, we propose a simple yet effective approach for relaxing the symmetric condition, namely **$\\epsilon$-softmax**, which simply modifies the outputs of the softmax layer to approximate one-hot vectors with a controllable error $\\epsilon$. Essentially, ***$\\epsilon$-softmax** not only acts as an alternative for the softmax layer, but also implicitly plays the crucial role in modifying the loss function.* We prove theoretically that **$\\epsilon$-softmax** can achieve noise-tolerant learning with controllable excess risk bound for almost any loss function. Recognizing that **$\\epsilon$-softmax**-enhanced losses may slightly reduce fitting ability on clean datasets, we further incorporate them with one symmetric loss, thereby achieving a better trade-off between robustness and effective learning. Extensive experiments demonstrate the superiority of our method in mitigating synthetic and real-world label noise.", "keywords": "Learning with Noisy Labels;Robust Loss Function;Excess Risk Bound", "primary_area": "optimization_for_deep_networks", "supplementary_material": "/attachment/3463b6976fed01ff0ba9d8e36d7454d8cea41634.zip", "author": "Jialiang Wang;Xiong Zhou;Deming Zhai;Junjun Jiang;Xiangyang Ji;Xianming Liu", "authorids": "~Jialiang_Wang3;~Xiong_Zhou3;~Deming_Zhai2;~Junjun_Jiang2;~Xiangyang_Ji1;~Xianming_Liu5", "gender": "M;M;F;M;;M", "homepage": "https://cswjl.github.io/;https://hitcszx.github.io/;;http://homepage.hit.edu.cn/jiangjunjun;;http://homepage.hit.edu.cn/xmliu", "dblp": ";;69/8937;https://dblp.uni-trier.de/pers/hd/j/Jiang:Junjun;;89/58201.html", "google_scholar": ";BMGootgAAAAJ;;WNH2_rgAAAAJ;;", "orcid": ";0000-0002-0856-6696;;0000-0002-5694-505X;;0000-0002-8857-1785", "linkedin": ";;;;;", "or_profile": "~Jialiang_Wang3;~Xiong_Zhou3;~Deming_Zhai2;~Junjun_Jiang2;~Xiangyang_Ji1;~Xianming_Liu5", "aff": "Harbin Institute of Technology;Harbin Institute of Technology;Harbin Institute of Technology;Harbin Institute of Technology;;Harbin Institute of Technology", "aff_domain": "hit.edu.cn;hit.edu.cn;hit.edu.cn;hit.edu.cn;;hit.edu.cn", "position": "PhD student;PhD student;Associate Professor;Full Professor;;Full Professor", "bibtex": "@inproceedings{\nwang2024epsilonsoftmax,\ntitle={\\${\\textbackslash}epsilon\\$-Softmax: Approximating One-Hot Vectors for Mitigating Label Noise},\nauthor={Jialiang Wang and Xiong Zhou and Deming Zhai and Junjun Jiang and Xiangyang Ji and Xianming Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=vjsd8Bcipv}\n}", "github": "", "reviewers": "4jLX;NkMU;5cpw;MvPF;d71F", "pdf_size": 6214248, "rating": "5;5;5;7;7", "confidence": "5;4;4;4;3", "soundness": "2;2;3;3;4", "novelty": "3;2;2;3;4", "presentation": "4;2;3;3;4", "wc_summary": "38;73;75;59;85", "wc_strengths": "18;88;121;176;69", "wc_weaknesses": "32;308;276;35;34", "wc_questions": "233;28;101;42;85", "wc_limitations": "10;12;1;3;7", "wc_review": "331;509;574;315;280", "wc_reply_reviewers": "29;21;63;16;25", "wc_reply_authors": "52;32;35;32;32", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 5.8, 0.9797958971132712 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.2, 0.7483314773547882 ], "wc_summary_avg": [ 66.0, 16.272676485446393 ], "wc_strengths_avg": [ 94.4, 52.70521795799729 ], "wc_weaknesses_avg": [ 137.0, 126.96456198483102 ], "wc_questions_avg": [ 97.8, 72.71698563609468 ], "wc_limitations_avg": [ 6.6, 4.127953488110059 ], "wc_review_avg": [ 401.8, 117.06989365332147 ], "wc_reply_reviewers_avg": [ 30.8, 16.666133324799727 ], "wc_reply_authors_avg": [ 36.6, 7.787168933572715 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.6454972243679028, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:eM9yhhlBZcAJ:scholar.google.com/&scioq=%24%5Cepsilon%24-Softmax:+Approximating+One-Hot+Vectors+for+Mitigating+Label+Noise&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": "hit.edu.cn;hit.edu.cn;hit.edu.cn;hit.edu.cn;;hit.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Harbin Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.hit.edu.cn/", "aff_unique_abbr": "HIT", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Harbin", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "PaDeLLM-NER: Parallel Decoding in Large Language Models for Named Entity Recognition", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93190", "id": "vjw4TIf8Bo", "proceeding": "", "pdf": "https://openreview.net/pdf?id=vjw4TIf8Bo", "openreview": "https://openreview.net/forum?id=vjw4TIf8Bo", "poster": "/media/PosterPDFs/NeurIPS%202024/93190.png?t=1733390928.3111498", "project": "", "author_site": "Jinghui Lu, Yanjie Wang, Ziwei Yang, Xuejing Liu, Brian Mac Namee, Can Huang", "tldr": "", "abstract": "In this study, we aim to reduce generation latency for Named Entity Recognition (NER) with Large Language Models (LLMs). The main cause of high latency in LLMs is the sequential decoding process, which autoregressively generates all labels and mentions for NER, significantly increase the sequence length. To this end, we introduce Parallel Decoding in LLM for NE} (PaDeLLM-NER), a approach that integrates seamlessly into existing generative model frameworks without necessitating additional modules or architectural modifications. PaDeLLM-NER allows for the simultaneous decoding of all mentions, thereby reducing generation latency. Experiments reveal that PaDeLLM-NER significantly increases inference speed that is 1.76 to 10.22 times faster than the autoregressive approach for both English and Chinese. Simultaneously it maintains the quality of predictions as evidenced by the performance that is on par with the state-of-the-art across various datasets. All resources are available at https://github.com/GeorgeLuImmortal/PaDeLLM_NER.", "keywords": "NER;inference speedup;LLM", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Jinghui Lu;Yanjie Wang;Ziwei Yang;Xuejing Liu;Brian Mac Namee;Can Huang", "authorids": "~Jinghui_Lu2;~Yanjie_Wang2;~Ziwei_Yang4;~Xuejing_Liu1;~Brian_Mac_Namee1;~Can_Huang1", "gender": "M;M;F;;M;M", "homepage": "https://github.com/princewang1994;http://yangziwei.cn;https://gingl.github.io;https://people.ucd.ie/brian.macnamee;;https://georgeluimmortal.github.io/", "dblp": ";;;83/5307;;14/983", "google_scholar": ";;;rZ3_vb0AAAAJ;;ZzK_UdYAAAAJ", "orcid": ";;;0000-0003-2518-0274;0009-0006-9126-3069;0000-0001-7149-6961", "linkedin": ";;;brian-mac-namee-7960605/;https://www.linkedin.cn/incareer/in/can-huang-3175b256;jinghui-lu-2712aa105", "or_profile": "~Yanjie_Wang2;~Ziwei_Yang4;~Xuejing_Liu1;~Brian_Mac_Namee1;~Can_Huang1;~JINGHUI_LU1", "aff": "ByteDance Inc;;Sensetime;University College Dublin;Bytedance;ByteDance Inc.", "aff_domain": "bytedance.com;;sensetime.com;ucd.ie;bytedance.com;bytedance.com", "position": "Researcher;;Researcher;Associate Professor;Researcher;Researcher", "bibtex": "@inproceedings{\nlu2024padellmner,\ntitle={PaDe{LLM}-{NER}: Parallel Decoding in Large Language Models for Named Entity Recognition},\nauthor={Jinghui Lu and Yanjie Wang and Ziwei Yang and Xuejing Liu and Brian Mac Namee and Can Huang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=vjw4TIf8Bo}\n}", "github": "", "reviewers": "1fG5;XzjF;GKXR;14Sq", "pdf_size": 665510, "rating": "6;6;6;9", "confidence": "4;3;4;4", "soundness": "3;3;2;3", "novelty": "2;3;2;3", "presentation": "2;3;3;3", "wc_summary": "103;76;59;50", "wc_strengths": "33;82;37;21", "wc_weaknesses": "194;103;204;154", "wc_questions": "4;1;34;60", "wc_limitations": "6;1;29;1", "wc_review": "340;263;363;286", "wc_reply_reviewers": "121;0;191;0", "wc_reply_authors": "20;0;658;0", "reply_reviewers": "1;0;2;0", "reply_authors": "2;1;3;1", "rating_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 72.0, 20.18662923818635 ], "wc_strengths_avg": [ 43.25, 23.134119823325893 ], "wc_weaknesses_avg": [ 163.75, 39.75157229594824 ], "wc_questions_avg": [ 24.75, 24.097458372201828 ], "wc_limitations_avg": [ 9.25, 11.583932838203095 ], "wc_review_avg": [ 313.0, 40.18084120572888 ], "wc_reply_reviewers_avg": [ 78.0, 81.8321452731138 ], "wc_reply_authors_avg": [ 169.5, 282.15377013252896 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2899569886688355434&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 5, "email": "bytedance.com;;sensetime.com;ucd.ie;bytedance.com;bytedance.com", "author_num": 6, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "ByteDance;SenseTime;University College Dublin", "aff_unique_dep": ";;", "aff_unique_url": "https://www.bytedance.com;https://www.sensetime.com;https://www.ucd.ie", "aff_unique_abbr": "ByteDance;SenseTime;UCD", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "China;Ireland" }, { "title": "CALE: Continuous Arcade Learning Environment", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97454", "id": "vlUK2h1Nvw", "proceeding": "", "pdf": "https://openreview.net/pdf?id=vlUK2h1Nvw", "openreview": "https://openreview.net/forum?id=vlUK2h1Nvw", "poster": "", "project": "", "author_site": "Jesse Farebrother, Pablo Samuel Castro", "tldr": "", "abstract": "We introduce the Continuous Arcade Learning Environment (CALE), an extension of the well-known Arcade Learning Environment (ALE) [Bellemare et al., 2013]. The CALE uses the same underlying emulator of the Atari 2600 gaming system (Stella), but adds support for continuous actions. This enables the benchmarking and evaluation of continuous-control agents (such as PPO [Schulman et al., 2017] and SAC [Haarnoja et al., 2018]) and value-based agents (such as DQN [Mnih et al., 2015] and Rainbow [Hessel et al., 2018]) on the same environment suite. We provide a series of open questions and research directions that CALE enables, as well as initial baseline results using Soft Actor-Critic. CALE is available as part of the ALE athttps://github.com/Farama-Foundation/Arcade-Learning-Environment.", "keywords": "Deep reinforcement learning;Atari;Environment", "primary_area": "", "supplementary_material": "", "author": "Jesse Farebrother;Pablo Samuel Castro", "authorids": "~Jesse_Farebrother1;~Pablo_Samuel_Castro1", "gender": "M;M", "homepage": "https://brosa.ca;https://psc-g.github.io/", "dblp": "228/6862;05/5455", "google_scholar": "cA12XHcAAAAJ;https://scholar.google.ca/citations?user=jn5r6TsAAAAJ", "orcid": "0000-0002-5178-4947;", "linkedin": "jessefarebro/;pablo-samuel-castro-2113641b/", "or_profile": "~Jesse_Farebrother1;~Pablo_Samuel_Castro1", "aff": "Google DeepMind;Google", "aff_domain": "google.com;google.com", "position": "Student Researcher;Researcher", "bibtex": "@inproceedings{\nfarebrother2024cale,\ntitle={{CALE}: Continuous Arcade Learning Environment},\nauthor={Jesse Farebrother and Pablo Samuel Castro},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=vlUK2h1Nvw}\n}", "github": "", "reviewers": "Bcrv;jHe1;JNAR", "pdf_size": 3264187, "rating": "6;6;7", "confidence": "4;5;4", "wc_summary_and_contributions": "20;124;63", "wc_strengths": "57;148;18", "wc_improvement": "322;32;14", "wc_limitations": "1;14;1", "wc_correctness": "6;87;1", "wc_clarity": "8;85;5", "wc_relation_to_prior_work": "8;60;7", "wc_documentation": "18;27;8", "wc_additional_feedback": "1;1;1", "wc_review": "441;578;118", "wc_reply_reviewers": "0;135;43", "wc_reply_authors": "0;63;18", "reply_reviewers": "0;1;1", "reply_authors": "1;2;2", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 69.0, 42.66927075386533 ], "wc_strengths_avg": [ 74.33333333333333, 54.46915538989832 ], "wc_improvement_avg": [ 122.66666666666667, 141.1413790810083 ], "wc_limitations_avg": [ 5.333333333333333, 6.128258770283412 ], "wc_correctness_avg": [ 31.333333333333332, 39.41516910474387 ], "wc_clarity_avg": [ 32.666666666666664, 37.025516726831626 ], "wc_relation_to_prior_work_avg": [ 25.0, 24.752104287649296 ], "wc_documentation_avg": [ 17.666666666666668, 7.760297817881877 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 379.0, 192.84363268375407 ], "wc_reply_reviewers_avg": [ 59.333333333333336, 56.31064631922536 ], "wc_reply_authors_avg": [ 27.0, 26.49528259898354 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=837176122668934422&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "google.com;google.com", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google DeepMind", "aff_unique_url": "https://deepmind.com", "aff_unique_abbr": "DeepMind", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "Remix-DiT: Mixing Diffusion Transformers for Multi-Expert Denoising", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93189", "id": "vo5LONGAdo", "proceeding": "", "pdf": "https://openreview.net/pdf?id=vo5LONGAdo", "openreview": "https://openreview.net/forum?id=vo5LONGAdo", "poster": "", "project": "", "author_site": "Gongfan Fang, Xinyin Ma, Xinchao Wang", "tldr": "", "abstract": "Transformer-based diffusion models have achieved significant advancements across a variety of generative tasks. However, producing high-quality outputs typically necessitates large transformer models, which result in substantial training and inference overhead. In this work, we investigate an alternative approach involving multiple experts for denoising, and introduce RemixDiT, a novel method designed to enhance output quality at a low cost. The goal of RemixDiT is to craft N diffusion experts for different denoising timesteps, yet without the need for expensive training of N independent models. To achieve this, RemixDiT employs K basis models (where K < N) and utilizes learnable mixing coefficients to adaptively craft expert models. This design offers two significant advantages: first, although the total model size is increased, the model produced by the mixing operation shares the same architecture as a plain model, making the overall model as efficient as a standard diffusion transformer. Second, the learnable mixing adaptively allocates model capacity across timesteps, thereby effectively improving generation quality. Experiments conducted on the ImageNet dataset demonstrate that RemixDiT achieves promising results compared to standard diffusion transformers and other multiple-expert methods.", "keywords": "Diffusion Models;Multiple Experts", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Gongfan Fang;Xinyin Ma;Xinchao Wang", "authorids": "~Gongfan_Fang2;~Xinyin_Ma1;~Xinchao_Wang1", "gender": "M;F;M", "homepage": "https://fangggf.github.io/;https://horseee.github.io;https://sites.google.com/site/sitexinchaowang/", "dblp": "243/5768;267/2244;", "google_scholar": "489YZ_kAAAAJ;jFUKS0oAAAAJ;https://scholar.google.com.tw/citations?user=w69Buq0AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Gongfan_Fang2;~Xinyin_Ma1;~Xinchao_WANG3", "aff": "National University of Singapore;National University of Singapore;National University of Singapore", "aff_domain": "u.nus.edu;u.nus.edu;nus.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nfang2024remixdit,\ntitle={Remix-DiT: Mixing Diffusion Transformers for Multi-Expert Denoising},\nauthor={Gongfan Fang and Xinyin Ma and Xinchao Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=vo5LONGAdo}\n}", "github": "", "reviewers": "2dkC;KY1f;7vev;cagX", "pdf_size": 1690765, "rating": "5;6;7;7", "confidence": "4;5;4;4", "soundness": "3;3;3;4", "novelty": "2;2;3;4", "presentation": "3;3;3;3", "wc_summary": "130;84;80;78", "wc_strengths": "53;42;86;86", "wc_weaknesses": "167;125;239;113", "wc_questions": "157;50;127;5", "wc_limitations": "8;1;8;19", "wc_review": "515;302;540;301", "wc_reply_reviewers": "0;20;53;0", "wc_reply_authors": "0;0;17;0", "reply_reviewers": "0;1;1;0", "reply_authors": "1;1;2;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 93.0, 21.470910553583888 ], "wc_strengths_avg": [ 66.75, 19.638928178492836 ], "wc_weaknesses_avg": [ 161.0, 49.29503017546495 ], "wc_questions_avg": [ 84.75, 60.35882288447978 ], "wc_limitations_avg": [ 9.0, 6.442049363362563 ], "wc_review_avg": [ 414.5, 113.34571010849947 ], "wc_reply_reviewers_avg": [ 18.25, 21.660736367907717 ], "wc_reply_authors_avg": [ 4.25, 7.361215932167728 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:QbvSZ7HG-YQJ:scholar.google.com/&scioq=Remix-DiT:+Mixing+Diffusion+Transformers+for+Multi-Expert+Denoising&hl=en&as_sdt=0,34", "gs_version_total": 4, "email": "u.nus.edu;u.nus.edu;nus.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "National University of Singapore", "aff_unique_dep": "", "aff_unique_url": "https://www.nus.edu.sg", "aff_unique_abbr": "NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Singapore" }, { "title": "UltraPixel: Advancing Ultra High-Resolution Image Synthesis to New Peaks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93188", "id": "voJCpdlw53", "proceeding": "", "pdf": "https://openreview.net/pdf?id=voJCpdlw53", "openreview": "https://openreview.net/forum?id=voJCpdlw53", "poster": "/media/PosterPDFs/NeurIPS%202024/93188.png?t=1731601466.3476553", "project": "", "author_site": "Jingjing Ren, Wenbo Li, Haoyu Chen, Renjing Pei, Bin Shao, Yong Guo, Long Peng, Fenglong Song, Lei Zhu", "tldr": "", "abstract": "Ultra-high-resolution image generation poses great challenges, such as increased semantic planning complexity and detail synthesis difficulties, alongside substantial training resource demands. We present UltraPixel, a novel architecture utilizing cascade diffusion models to generate high-quality images at multiple resolutions (\\textit{e.g.}, 1K, 2K, and 4K) within a single model, while maintaining computational efficiency. UltraPixel leverages semantics-rich representations of lower-resolution images in a later denoising stage to guide the whole generation of highly detailed high-resolution images, significantly reducing complexity. Specifically, we introduce implicit neural representations for continuous upsampling and scale-aware normalization layers adaptable to various resolutions. Notably, both low- and high-resolution processes are performed in the most compact space, sharing the majority of parameters with less than 3$\\%$ additional parameters for high-resolution outputs, largely enhancing training and inference efficiency. Our model achieves fast training with reduced data requirements, producing photo-realistic high-resolution images and demonstrating state-of-the-art performance in extensive experiments.", "keywords": "Image generation;Diffusion Models", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/71676d77d8ab54565410aff68c91516abc3b9a95.zip", "author": "Jingjing Ren;Wenbo Li;Haoyu Chen;Renjing Pei;Bin Shao;Yong Guo;Long Peng;Fenglong Song;Lei Zhu", "authorids": "~Jingjing_Ren1;~Wenbo_Li6;~Haoyu_Chen2;~Renjing_Pei1;~Bin_Shao2;~Yong_Guo1;~Long_Peng1;~Fenglong_Song1;~Lei_Zhu1", "gender": ";M;M;F;M;M;;;M", "homepage": ";https://fenglinglwb.github.io/;https://haoyuchen.com;;;http://www.guoyongcs.com/;;;https://sites.google.com/site/indexlzhu/home?authuser=0", "dblp": ";51/3185-2.html;146/8170-3;181/9091;;;;;99/549-3", "google_scholar": ";foGn_TIAAAAJ;https://scholar.google.com.hk/citations?user=KWbcBucAAAAJ;;;https://scholar.google.com/citations?hl=en;;;https://scholar.google.com.hk/citations?user=AQtqhaYAAAAJ", "orcid": ";;0000-0001-7618-9733;0000-0001-7513-6576;;0000-0002-3444-4588;;;", "linkedin": ";;;;%E6%BB%A8-%E9%82%B5-6127151a3/;;;;", "or_profile": "~Jingjing_Ren1;~Wenbo_Li6;~Haoyu_Chen2;~Renjing_Pei1;~Bin_Shao2;~Yong_Guo1;~Long_Peng1;~Fenglong_Song1;~Lei_Zhu1", "aff": ";Huawei Technologies Ltd.;Hong Kong University of Science and Technology (Guangzhou);Huawei Technologies Ltd.;Huawei Technologies Ltd.;Saarland Informatics Campus, Max-Planck Institute;;;Hong Kong University of Science and Technology (Guangzhou) & HKUST", "aff_domain": ";huawei.com;connect.hkust-gz.edu.cn;huawei.com;huawei.com;mpi-inf.mpg.de;;;ust.hk", "position": ";Researcher;PhD student;Principal Engineer;Researcher;Postdoc;;;Assistant Professor", "bibtex": "@inproceedings{\nren2024ultrapixel,\ntitle={UltraPixel: Advancing Ultra High-Resolution Image Synthesis to New Peaks},\nauthor={Jingjing Ren and Wenbo Li and Haoyu Chen and Renjing Pei and Bin Shao and Yong Guo and Long Peng and Fenglong Song and Lei Zhu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=voJCpdlw53}\n}", "github": "", "reviewers": "rwnV;z6xV;UnLF", "pdf_size": 50345074, "rating": "4;6;8", "confidence": "5;4;4", "soundness": "2;3;3", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "82;69;68", "wc_strengths": "39;93;74", "wc_weaknesses": "106;82;53", "wc_questions": "20;151;128", "wc_limitations": "10;60;2", "wc_review": "257;455;325", "wc_reply_reviewers": "0;0;38", "wc_reply_authors": "75;75;0", "reply_reviewers": "0;0;1", "reply_authors": "2;2;1", "rating_avg": [ 6.0, 1.632993161855452 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 73.0, 6.377042156569663 ], "wc_strengths_avg": [ 68.66666666666667, 22.365648262955002 ], "wc_weaknesses_avg": [ 80.33333333333333, 21.66923061752668 ], "wc_questions_avg": [ 99.66666666666667, 57.11003025349886 ], "wc_limitations_avg": [ 24.0, 25.664502073226878 ], "wc_review_avg": [ 345.6666666666667, 82.14350586355293 ], "wc_reply_reviewers_avg": [ 12.666666666666666, 17.913371790059205 ], "wc_reply_authors_avg": [ 50.0, 35.35533905932738 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2749627882598037985&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": ";huawei.com;connect.hkust-gz.edu.cn;huawei.com;huawei.com;mpi-inf.mpg.de;;;ust.hk", "author_num": 9, "aff_unique_index": "0;1;0;0;2;1", "aff_unique_norm": "Huawei;Hong Kong University of Science and Technology;Max-Planck Institute", "aff_unique_dep": "Huawei Technologies;;Informatics", "aff_unique_url": "https://www.huawei.com;https://www.ust.hk;https://www.mpi-sws.org", "aff_unique_abbr": "Huawei;HKUST;MPI-SWS", "aff_campus_unique_index": "1;2;3", "aff_campus_unique": ";Hong Kong SAR;Saarland;Guangzhou", "aff_country_unique_index": "0;0;0;0;1;0", "aff_country_unique": "China;Germany" }, { "title": "MoTE: Reconciling Generalization with Specialization for Visual-Language to Video Knowledge Transfer", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93187", "id": "vpEq2bzsS0", "proceeding": "", "pdf": "https://openreview.net/pdf?id=vpEq2bzsS0", "openreview": "https://openreview.net/forum?id=vpEq2bzsS0", "poster": "/media/PosterPDFs/NeurIPS%202024/93187.png?t=1731681730.6741846", "project": "", "author_site": "Minghao Zhu, Zhengpu Wang, Mengxian Hu, Ronghao Dang, Xiao Lin, Xun Zhou, Chengju Liu, Qijun Chen", "tldr": "", "abstract": "Transferring visual-language knowledge from large-scale foundation models for video recognition has proved to be effective. To bridge the domain gap, additional parametric modules are added to capture the temporal information. However, zero-shot generalization diminishes with the increase in the number of specialized parameters, making existing works a trade-off between zero-shot and close-set performance. In this paper, we present MoTE, a novel framework that enables generalization and specialization to be balanced in one unified model. Our approach tunes a mixture of temporal experts to learn multiple task views with various degrees of data fitting. To maximally preserve the knowledge of each expert, we propose Weight Merging Regularization, which regularizes the merging process of experts in weight space. Additionally with temporal feature modulation to regularize the contribution of temporal feature during test. We achieve a sound balance between zero-shot and close-set video recognition tasks and obtain state-of-the-art or competitive results on various datasets, including Kinetics-400 \\& 600, UCF, and HMDB. Code is available at https://github.com/ZMHH-H/MoTE.", "keywords": "Video Recognition;Vision-Language Model;Transfer Learning", "primary_area": "machine_vision", "supplementary_material": "/attachment/5e13fb402a20c4d39b0ae911f1f589e529732d74.zip", "author": "Minghao Zhu;Zhengpu Wang;Mengxian Hu;Ronghao Dang;Xiao Lin;Xun Zhou;Chengju Liu;Qijun Chen", "authorids": "~Minghao_Zhu2;~Zhengpu_Wang1;~Mengxian_Hu1;~Ronghao_Dang1;~Xiao_Lin9;~Xun_Zhou4;~Chengju_Liu1;~Qijun_Chen2", "gender": "M;F;M;M;M;F;M;M", "homepage": "https://github.com/ZMHH-H;https://github.com/pupubushilulu;https://humengxian.github.io/homepage/;https://rh-dang.github.io/;;;http://rail.tongji.edu.cn;https://rail.tongji.edu.cn/main.htm", "dblp": ";;;318/1427;;81/;75;09", "google_scholar": ";;;https://scholar.google.com.hk/citations?hl=zh-CN;;;;", "orcid": "0009-0008-0291-5841;0009-0004-8113-3856;0009-0004-9844-5768;0000-0003-1176-3735;0000-0001-6675-9632;0000-0001-7543-0855;0000-0001-5644-1188;0009-0000-9989-8233", "linkedin": "%E9%93%AD%E7%9A%93-%E6%9C%B1-84339b269/;;;;;;;", "or_profile": "~Minghao_Zhu2;~Zhengpu_Wang1;~Mengxian_Hu1;~Ronghao_Dang1;~Xun_Zhou4;~Chengju_Liu1;~Qijun_Chen2;~Chris_Lin2", "aff": "Tongji University;Tongji University;Tongji University;Tongji University;Tongji University;Tongji University;Tongji University;Tongji University", "aff_domain": "tongji.edu.cn;tongji.edu.cn;tongji.edu.cn;tongji.edu.cn;tongji.edu.cn;tongji.edu.cn;tongji.edu.cn;tongji.edu.cn", "position": "PhD student;MS student;PhD student;MS student;PhD student;Full Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\nzhu2024mote,\ntitle={Mo{TE}: Reconciling Generalization with Specialization for Visual-Language to Video Knowledge Transfer},\nauthor={Minghao Zhu and Zhengpu Wang and Mengxian Hu and Ronghao Dang and Xiao Lin and Xun Zhou and Chengju Liu and Qijun Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=vpEq2bzsS0}\n}", "github": "", "reviewers": "XfMx;7UgE;s5Zs;KXzA", "pdf_size": 2148829, "rating": "6;6;6;6", "confidence": "3;4;4;4", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "3;3;3;3", "wc_summary": "71;122;177;125", "wc_strengths": "39;108;94;66", "wc_weaknesses": "107;190;241;155", "wc_questions": "65;150;5;117", "wc_limitations": "83;47;44;9", "wc_review": "365;617;561;472", "wc_reply_reviewers": "0;18;278;0", "wc_reply_authors": "0;0;709;0", "reply_reviewers": "0;1;4;0", "reply_authors": "1;1;5;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 123.75, 37.49249924984996 ], "wc_strengths_avg": [ 76.75, 26.52710877574109 ], "wc_weaknesses_avg": [ 173.25, 48.97129261107981 ], "wc_questions_avg": [ 84.25, 54.87884382892919 ], "wc_limitations_avg": [ 45.75, 26.185635375144138 ], "wc_review_avg": [ 503.75, 95.34509688494737 ], "wc_reply_reviewers_avg": [ 74.0, 118.0084742719776 ], "wc_reply_authors_avg": [ 177.25, 307.0060056415835 ], "reply_reviewers_avg": [ 1.25, 1.6393596310755 ], "reply_authors_avg": [ 2.0, 1.7320508075688772 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3940495650164892375&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "tongji.edu.cn;tongji.edu.cn;tongji.edu.cn;tongji.edu.cn;tongji.edu.cn;tongji.edu.cn;tongji.edu.cn;tongji.edu.cn", "author_num": 8, "aff_unique_index": "0;0;0;0;0;0;0;0", "aff_unique_norm": "Tongji University", "aff_unique_dep": "", "aff_unique_url": "https://www.tongji.edu.cn", "aff_unique_abbr": "Tongji", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Learning Segmentation from Point Trajectories", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93186", "id": "vt2qkE1Oax", "proceeding": "", "pdf": "https://openreview.net/pdf?id=vt2qkE1Oax", "openreview": "https://openreview.net/forum?id=vt2qkE1Oax", "poster": "", "project": "", "author_site": "Laurynas Karazija, Iro Laina, Christian Rupprecht, Andrea Vedaldi", "tldr": "", "abstract": "We consider the problem of segmenting objects in videos based on their motion and no other forms of supervision. Prior work has often approached this problem by using the principle of common fate, namely the fact that the motion of points that belong to the same object is strongly correlated. However, most authors have only considered instantaneous motion from optical flow. In this work, we present a way to train a segmentation network using long-term point trajectories as a supervisory signal to complement optical flow. The key difficulty is that long-term motion, unlike instantaneous motion, is difficult to model -- any parametric approximation is unlikely to capture complex motion patterns over long periods of time. We instead draw inspiration from subspace clustering approaches, proposing a loss function that seeks to group the trajectories into low-rank matrices where the motion of object points can be approximately explained as a linear combination of other point tracks. Our method outperforms the prior art on motion-based segmentation, which shows the utility of long-term motion and the effectiveness of our formulation.", "keywords": "unsupervised segmentation;motion segmentation;point tracking", "primary_area": "machine_vision", "supplementary_material": "/attachment/368bc3f67b3140b95741558037789ba7f6f4fb96.zip", "author": "Laurynas Karazija;Iro Laina;Christian Rupprecht;Andrea Vedaldi", "authorids": "~Laurynas_Karazija1;~Iro_Laina1;~Christian_Rupprecht1;~Andrea_Vedaldi1", "gender": "M;M;M;", "homepage": "https://karazijal.github.io;http://chrirupp.github.io;https://www.robots.ox.ac.uk/~vedaldi/;", "dblp": "206/6117;https://dblp.uni-trier.de/pid/76/744-1;99/2825;182/2070", "google_scholar": "Kyt9trwAAAAJ;https://scholar.google.de/citations?user=IrYlproAAAAJ;bRT7t28AAAAJ;n9nXAPcAAAAJ", "orcid": ";;0000-0003-1374-2858;0000-0001-8857-7709", "linkedin": "laurynas-karazija-b9591b103/;;;", "or_profile": "~Laurynas_Karazija1;~Christian_Rupprecht1;~Andrea_Vedaldi1;~Iro_Laina2", "aff": "University of Oxford;University of Oxford;Meta;University of Oxford", "aff_domain": "ox.ac.uk;ox.ac.uk;meta.com;ox.ac.uk", "position": "PhD student;Associate Professor;Researcher;Lecturer", "bibtex": "@inproceedings{\nkarazija2024learning,\ntitle={Learning Segmentation from Point Trajectories},\nauthor={Laurynas Karazija and Iro Laina and Christian Rupprecht and Andrea Vedaldi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=vt2qkE1Oax}\n}", "github": "", "reviewers": "SSEv;ceYj;yFh7;8Nte;Nk1m", "pdf_size": 2444215, "rating": "5;6;6;6;7", "confidence": "4;3;4;5;4", "soundness": "4;3;3;3;3", "novelty": "3;2;3;2;3", "presentation": "3;3;2;3;4", "wc_summary": "67;76;100;61;66", "wc_strengths": "72;36;83;42;44", "wc_weaknesses": "107;61;183;155;21", "wc_questions": "53;101;91;118;2", "wc_limitations": "47;24;20;134;1", "wc_review": "346;298;477;510;134", "wc_reply_reviewers": "0;10;33;0;0", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;1;1;0;0", "reply_authors": "1;1;1;1;1", "rating_avg": [ 6.0, 0.6324555320336759 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 74.0, 13.870832707519762 ], "wc_strengths_avg": [ 55.4, 18.56448221739567 ], "wc_weaknesses_avg": [ 105.4, 59.29451913963044 ], "wc_questions_avg": [ 73.0, 41.41014368485094 ], "wc_limitations_avg": [ 45.2, 46.74783417442994 ], "wc_review_avg": [ 353.0, 134.9518432626987 ], "wc_reply_reviewers_avg": [ 8.6, 12.799999999999999 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6940060207683950192&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "ox.ac.uk;ox.ac.uk;meta.com;ox.ac.uk", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Oxford;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://www.ox.ac.uk;https://meta.com", "aff_unique_abbr": "Oxford;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United Kingdom;United States" }, { "title": "Average gradient outer product as a mechanism for deep neural collapse", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93185", "id": "vtRotUd539", "proceeding": "", "pdf": "https://openreview.net/pdf?id=vtRotUd539", "openreview": "https://openreview.net/forum?id=vtRotUd539", "poster": "", "project": "", "author_site": "Daniel Beaglehole, Peter S\u00faken\u00edk, Marco Mondelli, Misha Belkin", "tldr": "", "abstract": "Deep Neural Collapse (DNC) refers to the surprisingly rigid structure of the data representations in the final layers of Deep Neural Networks (DNNs). Though the phenomenon has been measured in a variety of settings, its emergence is typically explained via data-agnostic approaches, such as the unconstrained features model. In this work, we introduce a data-dependent setting where DNC forms due to feature learning through the average gradient outer product (AGOP). The AGOP is defined with respect to a learned predictor and is equal to the uncentered covariance matrix of its input-output gradients averaged over the training dataset. Deep Recursive Feature Machines are a method that constructs a neural network by iteratively mapping the data with the AGOP and applying an untrained random feature map. We demonstrate theoretically and empirically that DNC occurs in Deep Recursive Feature Machines as a consequence of the projection with the AGOP matrix computed at each layer. We then provide evidence that this mechanism holds for neural networks more generally. We show that the right singular vectors and values of the weights can be responsible for the majority of within-class variability collapse for DNNs trained in the feature learning regime. As observed in recent work, this singular structure is highly correlated with that of the AGOP.", "keywords": "Theory of deep learning;neural collapse;average gradient outer product;kernel methods;feature learning", "primary_area": "learning_theory", "supplementary_material": "", "author": "Daniel Beaglehole;Peter S\u00faken\u00edk;Marco Mondelli;Mikhail Belkin", "authorids": "~Daniel_Beaglehole1;~Peter_S\u00faken\u00edk1;~Marco_Mondelli1;~Mikhail_Belkin1", "gender": "M;M;M;", "homepage": ";https://research-explorer.app.ist.ac.at/person/d64d6a8d-eb8e-11eb-b029-96fd216dec3c;http://marcomondelli.com;http://misha.belkin-wang.org/", "dblp": ";304/2274;120/7089;", "google_scholar": ";qEhrUDAAAAAJ;BHdSb5AAAAAJ;Iwd9DdkAAAAJ", "orcid": ";;;", "linkedin": "daniel-beaglehole-9271551a6/;;;", "or_profile": "~Daniel_Beaglehole1;~Peter_S\u00faken\u00edk1;~Marco_Mondelli1;~Misha_Belkin1", "aff": "University of California, San Diego;Institute of Science and Technology;Institute of Science and Technology;University of California, San Diego", "aff_domain": "ucsd.edu;ist.ac.at;ist.ac.at;ucsd.edu", "position": "PhD student;PhD student;Assistant Professor;Professor", "bibtex": "@inproceedings{\nbeaglehole2024average,\ntitle={Average gradient outer product as a mechanism for deep neural collapse},\nauthor={Daniel Beaglehole and Peter S{\\'u}ken{\\'\\i}k and Marco Mondelli and Mikhail Belkin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=vtRotUd539}\n}", "github": "", "reviewers": "Ljpy;uXDR;1F1v;AtBe", "pdf_size": 6370456, "rating": "5;6;6;6", "confidence": "3;3;2;4", "soundness": "3;3;2;4", "novelty": "2;3;2;4", "presentation": "3;1;2;3", "wc_summary": "145;82;59;102", "wc_strengths": "82;67;47;80", "wc_weaknesses": "152;918;19;344", "wc_questions": "51;113;38;180", "wc_limitations": "7;51;2;30", "wc_review": "437;1231;165;736", "wc_reply_reviewers": "29;307;30;0", "wc_reply_authors": "0;306;0;0", "reply_reviewers": "1;2;1;0", "reply_authors": "1;2;1;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 97.0, 31.614869919074472 ], "wc_strengths_avg": [ 69.0, 13.946325680981353 ], "wc_weaknesses_avg": [ 358.25, 343.2028372551719 ], "wc_questions_avg": [ 95.5, 56.420297765963625 ], "wc_limitations_avg": [ 22.5, 19.551214796017153 ], "wc_review_avg": [ 642.25, 395.3829630876879 ], "wc_reply_reviewers_avg": [ 91.5, 125.00099999600003 ], "wc_reply_authors_avg": [ 76.5, 132.50188677901912 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12914200192885422995&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "ucsd.edu;ist.ac.at;ist.ac.at;ucsd.edu", "author_num": 4, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "University of California, San Diego;Institute of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucsd.edu;", "aff_unique_abbr": "UCSD;", "aff_campus_unique_index": "0;0", "aff_campus_unique": "San Diego;", "aff_country_unique_index": "0;0", "aff_country_unique": "United States;" }, { "title": "GREAT Score: Global Robustness Evaluation of Adversarial Perturbation using Generative Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93184", "id": "vunJCq9PwU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=vunJCq9PwU", "openreview": "https://openreview.net/forum?id=vunJCq9PwU", "poster": "", "project": "", "author_site": "ZAITANG LI, Pin-Yu Chen, Tsung-Yi Ho", "tldr": "", "abstract": "Current studies on adversarial robustness mainly focus on aggregating \\textit{local} robustness results from a set of data samples to evaluate and rank different models. However, the local statistics may not well represent the true \\textit{global} robustness of the underlying unknown data distribution. To address this challenge, this paper makes the first attempt to present a new framework, called \\textit{GREAT Score}, for global robustness evaluation of adversarial perturbation using generative models. Formally, GREAT Score carries the physical meaning of a global statistic capturing a mean certified attack-proof perturbation level over all samples drawn from a generative model. For finite-sample evaluation, we also derive a probabilistic guarantee on the sample complexity and the difference between the sample mean and the true mean. GREAT Score has several advantages: (1) Robustness evaluations using GREAT Score are efficient and scalable to large models, by sparing the need of running adversarial attacks. In particular, we show high correlation and significantly reduced computation cost of GREAT Score when compared to the attack-based model ranking on RobustBench \\cite{croce2021robustbench}. (2) The use of generative models facilitates the approximation of the unknown data distribution. In our ablation study with different generative adversarial networks (GANs), we observe consistency between global robustness evaluation and the quality of GANs. (3) GREAT Score can be used for remote auditing of privacy-sensitive black-box models, as demonstrated by our robustness evaluation on several online facial recognition services.", "keywords": "Adversarial Robustness", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/a2a24d998ffc9934aafecc9a430c810763187870.zip", "author": "ZAITANG LI;Pin-Yu Chen;Tsung-Yi Ho", "authorids": "~ZAITANG_LI1;~Pin-Yu_Chen1;~Tsung-Yi_Ho2", "gender": "M;M;M", "homepage": ";http://www.pinyuchen.com;https://www.cse.cuhk.edu.hk/people/faculty/tsung-yi-ho/", "dblp": "276/1334;39/8969;63/4181.html", "google_scholar": "nPGJNbgAAAAJ;jxwlCUUAAAAJ;TRDUYkAAAAAJ", "orcid": "0000-0001-8871-2755;0000-0003-1039-8369;0000-0001-7348-5625", "linkedin": ";pin-yu-chen-940062a2;", "or_profile": "~ZAITANG_LI1;~Pin-Yu_Chen1;~Tsung-Yi_Ho2", "aff": "Department of Computer Science and Engineering, The Chinese University of Hong Kong;International Business Machines;Department of Computer Science and Engineering, The Chinese University of Hong Kong", "aff_domain": "cse.cuhk.edu.hk;ibm.com;cse.cuhk.edu.hk", "position": "PhD student;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nli2024great,\ntitle={{GREAT} Score: Global Robustness Evaluation of Adversarial Perturbation using Generative Models},\nauthor={ZAITANG LI and Pin-Yu Chen and Tsung-Yi Ho},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=vunJCq9PwU}\n}", "github": "", "reviewers": "gkrf;D3uH;kqpq;P6PA", "pdf_size": 7585237, "rating": "3;6;6;7", "confidence": "4;3;3;5", "soundness": "2;3;3;3", "novelty": "2;3;3;4", "presentation": "3;3;3;2", "wc_summary": "76;98;64;107", "wc_strengths": "60;50;77;79", "wc_weaknesses": "212;176;324;216", "wc_questions": "8;84;121;133", "wc_limitations": "1;12;1;4", "wc_review": "357;420;587;539", "wc_reply_reviewers": "0;39;77;641", "wc_reply_authors": "0;77;490;382", "reply_reviewers": "0;1;2;2", "reply_authors": "1;2;3;3", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 86.25, 17.09349291397168 ], "wc_strengths_avg": [ 66.5, 12.05197079319395 ], "wc_weaknesses_avg": [ 232.0, 55.35341001239219 ], "wc_questions_avg": [ 86.5, 48.78780585351221 ], "wc_limitations_avg": [ 4.5, 4.5 ], "wc_review_avg": [ 475.75, 91.6334409481604 ], "wc_reply_reviewers_avg": [ 189.25, 262.23498527084445 ], "wc_reply_authors_avg": [ 237.25, 204.20746191067553 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.10050378152592121, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13847030510275551331&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "cse.cuhk.edu.hk;ibm.com;cse.cuhk.edu.hk", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Chinese University of Hong Kong;International Business Machines Corporation", "aff_unique_dep": "Department of Computer Science and Engineering;", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.ibm.com", "aff_unique_abbr": "CUHK;IBM", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "China;United States" }, { "title": "Low Precision Local Training is Enough for Federated Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93183", "id": "vvpewjtnvm", "proceeding": "", "pdf": "https://openreview.net/pdf?id=vvpewjtnvm", "openreview": "https://openreview.net/forum?id=vvpewjtnvm", "poster": "/media/PosterPDFs/NeurIPS%202024/93183.png?t=1731686065.177167", "project": "", "author_site": "Zhiwei Li, Yiqiu LI, Binbin Lin, Zhongming Jin, Weizhong Zhang", "tldr": "", "abstract": "Federated Learning (FL) is a prevalent machine learning paradigm designed to address challenges posed by heterogeneous client data while preserving data privacy.\n Unlike distributed training, it typically orchestrates resource-constrained edge devices to communicate via a low-bandwidth communication network with a central server. This urges the development of more computation and communication efficient training algorithms. In this paper, we propose an efficient FL paradigm, where the local models in the clients are trained with low-precision operations and communicated with the server in low precision format, while only the model aggregation in the server is performed with high-precision computation. We surprisingly find that high precision models can be recovered from the low precision local models with proper aggregation in the server. \n In this way, both the workload in the client-side and the communication cost can be significantly reduced. We theoretically show that our proposed paradigm can converge to the optimal solution as the training goes on, which demonstrates that low precision local training is enough for FL. Our paradigm can be integrated with existing FL algorithms flexibly. Experiments across extensive benchmarks are conducted to showcase the effectiveness of our proposed method. Notably, the models trained by our method with the precision as low as 8 bits are comparable to those from the full precision training. As a by-product, we show that low precision local training can relieve the over-fitting issue in local training, which under heterogeneous client data can cause the client models drift further away from each other and lead to the failure in model aggregation. Code is released at https://github.com/digbangbang/LPT-FL.", "keywords": "Federated Learning;Low Precision Training", "primary_area": "optimization_for_deep_networks", "supplementary_material": "/attachment/1b5c1599f62ae74be65a7b4211e330a93706ba48.zip", "author": "Zhiwei Li;Yiqiu LI;Binbin Lin;Zhongming Jin;WEIZHONG ZHANG", "authorids": "~Zhiwei_Li11;~Yiqiu_LI1;~Binbin_Lin3;~Zhongming_Jin1;~WEIZHONG_ZHANG2", "gender": "M;;M;M;M", "homepage": "https://digbangbang.github.io/;https://github.com/179784038/information.git;https://www.linkedin.com/in/binbin-lin-03598b31/;https://sites.google.com/site/zjuzhongmingjin/;https://facultyprofiles.ust.hk/profiles.php?profile=weizhong-zhang-weizhong", "dblp": ";;51/8073;;39/2330.html", "google_scholar": "6CzBrBQAAAAJ;;Zmvq4KYAAAAJ;fOC90nQAAAAJ;qd06pUgAAAAJ", "orcid": ";;0000-0002-0330-6406;;0000-0001-7311-0698", "linkedin": ";;;;", "or_profile": "~Zhiwei_Li11;~Yiqiu_LI1;~Binbin_Lin3;~Zhongming_Jin1;~Weizhong_Zhang1", "aff": "Fudan University;Fudan University;Zhejiang University;Alibaba Cloud Computing;Fudan University", "aff_domain": "fudan.edu.cn;fudan.edu.cn;zju.edu.cn;alibaba-inc.com;fudan.edu.cn", "position": "MS student;MS student;Researcher;Researcher;Associate Professor", "bibtex": "@inproceedings{\nli2024low,\ntitle={Low Precision Local Training is Enough for Federated Learning},\nauthor={Zhiwei Li and Yiqiu LI and Binbin Lin and Zhongming Jin and WEIZHONG ZHANG},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=vvpewjtnvm}\n}", "github": "", "reviewers": "vykk;1d4C;Zy16;5cLQ", "pdf_size": 3345363, "rating": "5;6;6;7", "confidence": "4;4;4;4", "soundness": "3;3;3;3", "novelty": "1;3;3;3", "presentation": "3;3;3;3", "wc_summary": "234;63;116;60", "wc_strengths": "9;39;63;41", "wc_weaknesses": "124;159;94;154", "wc_questions": "38;12;37;9", "wc_limitations": "32;1;9;85", "wc_review": "437;274;319;349", "wc_reply_reviewers": "36;33;0;111", "wc_reply_authors": "54;167;55;354", "reply_reviewers": "1;1;0;2", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 118.25, 70.44279594110387 ], "wc_strengths_avg": [ 38.0, 19.209372712298546 ], "wc_weaknesses_avg": [ 132.75, 26.070817018267764 ], "wc_questions_avg": [ 24.0, 13.546217184144066 ], "wc_limitations_avg": [ 31.75, 32.78242669480098 ], "wc_review_avg": [ 344.75, 59.57505770034973 ], "wc_reply_reviewers_avg": [ 45.0, 40.6386515524322 ], "wc_reply_authors_avg": [ 157.5, 122.393831543914 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:t5EAe_P5vKIJ:scholar.google.com/&scioq=Low+Precision+Local+Training+is+Enough+for+Federated+Learning&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": "fudan.edu.cn;fudan.edu.cn;zju.edu.cn;alibaba-inc.com;fudan.edu.cn", "author_num": 5, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Fudan University;Zhejiang University;Alibaba Group", "aff_unique_dep": ";;Cloud Computing", "aff_unique_url": "https://www.fudan.edu.cn;https://www.zju.edu.cn;https://www.alibabacloud.com", "aff_unique_abbr": "Fudan;ZJU;Alibaba Cloud", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Intrinsic Self-Supervision for Data Quality Audits", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97453", "id": "vvyUa3CDwt", "proceeding": "", "pdf": "https://openreview.net/pdf?id=vvyUa3CDwt", "openreview": "https://openreview.net/forum?id=vvyUa3CDwt", "poster": "/media/PosterPDFs/NeurIPS%202024/97453.png?t=1733218327.3380256", "project": "", "author_site": "Fabian Gr\u00f6ger, Simone Lionetti, Philippe Gottfrois, Alvaro Gonzalez-Jimenez, Ludovic Amruthalingam, Matthew Groh, Alexander Navarini, Marc Pouly", "tldr": "", "abstract": "Benchmark datasets in computer vision often contain off-topic images, near duplicates, and label errors, leading to inaccurate estimates of model performance.\nIn this paper, we revisit the task of data cleaning and formalize it as either a ranking problem, which significantly reduces human inspection effort, or a scoring problem, which allows for automated decisions based on score distributions.\nWe find that a specific combination of context-aware self-supervised representation learning and distance-based indicators is effective in finding issues without annotation biases.\nThis methodology, which we call SelfClean, surpasses state-of-the-art performance in detecting off-topic images, near duplicates, and label errors within widely-used image datasets, such as ImageNet-1k, Food-101N, and STL-10, both for synthetic issues and real contamination.\nWe apply the detailed method to multiple image benchmarks, identify up to 16% of issues, and confirm an improvement in evaluation reliability upon cleaning.\nThe official implementation can be found at: https://github.com/Digital-Dermatology/SelfClean.", "keywords": "Data Cleaning;Data-centric AI;Data-centric Machine Learning Research;Self-Supervised Learning", "primary_area": "", "supplementary_material": "", "author": "Fabian Gr\u00f6ger;Simone Lionetti;Philippe Gottfrois;Alvaro Gonzalez-Jimenez;Ludovic Amruthalingam;Matthew Groh;Alexander A. Navarini;Marc Pouly", "authorids": "~Fabian_Gr\u00f6ger1;~Simone_Lionetti1;~Philippe_Gottfrois1;~Alvaro_Gonzalez-Jimenez1;~Ludovic_Amruthalingam1;~Matthew_Groh1;~Alexander_A._Navarini1;~Marc_Pouly1", "gender": "M;;;M;M;M;;M", "homepage": "https://www.hslu.ch/en/lucerne-university-of-applied-sciences-and-arts/about-us/people-finder/profile/?pid=4915;https://www.hslu.ch/en/lucerne-university-of-applied-sciences-and-arts/about-us/people-finder/profile/?pid=4484;;;;https://mattgroh.com;;https://marcpouly.ch", "dblp": "278/9039;286/7318;;318/8116;273/6466;217/2239;;40/1674", "google_scholar": "uwyr5k0AAAAJ;WCvkBwkAAAAJ;;LbtKzVgAAAAJ;3GMhoS8AAAAJ;Zri-8PwAAAAJ;;", "orcid": "0000-0002-9699-688X;0000-0001-7305-8957;;0000-0002-1337-9430;0000-0001-5980-5469;0000-0002-9029-0157;;0000-0002-9520-4799", "linkedin": "fabian-gr%C3%B6ger-6a1822a4/;simonelionetti;;https://linkedin.com/in/alvarogonjim;;;;marcpouly", "or_profile": "~Fabian_Gr\u00f6ger1;~Simone_Lionetti1;~Philippe_Gottfrois1;~Alvaro_Gonzalez-Jimenez1;~Ludovic_Amruthalingam1;~Matthew_Groh1;~Alexander_A._Navarini1;~Marc_Pouly1", "aff": "University of Basel;HSLU - Lucerne University of Applied Sciences and Arts;;University of Basel;HSLU - Lucerne University of Applied Sciences and Arts;Northwestern University;;HSLU - Lucerne University of Applied Sciences and Arts", "aff_domain": "unibas.ch;hslu.ch;;unibas.ch;hslu.ch;northwestern.edu;;hslu.ch", "position": "PhD student;Postdoc;;PhD student;Researcher;Assistant Professor;;Full Professor", "bibtex": "@inproceedings{\ngr{\\\"o}ger2024intrinsic,\ntitle={Intrinsic Self-Supervision for Data Quality Audits},\nauthor={Fabian Gr{\\\"o}ger and Simone Lionetti and Philippe Gottfrois and Alvaro Gonzalez-Jimenez and Ludovic Amruthalingam and Matthew Groh and Alexander A. Navarini and Marc Pouly},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=vvyUa3CDwt}\n}", "github": "", "reviewers": "qYrV;fiv4;tscY", "pdf_size": 13562451, "rating": "6;7;7", "confidence": "3;4;3", "wc_summary_and_contributions": "290;43;39", "wc_strengths": "422;102;10", "wc_improvement": "335;502;27", "wc_limitations": "1;6;15", "wc_correctness": "1;1;9", "wc_clarity": "1;1;7", "wc_relation_to_prior_work": "1;1;21", "wc_documentation": "1;1;2", "wc_additional_feedback": "1;1;1", "wc_review": "1053;658;131", "wc_reply_reviewers": "0;129;26", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;2;1", "reply_authors": "3;5;2", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 124.0, 117.39108427247218 ], "wc_strengths_avg": [ 178.0, 176.574818891785 ], "wc_improvement_avg": [ 288.0, 196.74518206722792 ], "wc_limitations_avg": [ 7.333333333333333, 5.792715732327589 ], "wc_correctness_avg": [ 3.6666666666666665, 3.7712361663282534 ], "wc_clarity_avg": [ 3.0, 2.8284271247461903 ], "wc_relation_to_prior_work_avg": [ 7.666666666666667, 9.428090415820632 ], "wc_documentation_avg": [ 1.3333333333333333, 0.4714045207910317 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 614.0, 377.688584241921 ], "wc_reply_reviewers_avg": [ 51.666666666666664, 55.70358057512003 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 3.3333333333333335, 1.247219128924647 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14141512434597728750&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "unibas.ch;hslu.ch;;unibas.ch;hslu.ch;northwestern.edu;;hslu.ch", "author_num": 8, "aff_unique_index": "0;1;0;1;2;1", "aff_unique_norm": "University of Basel;Lucerne University of Applied Sciences and Arts;Northwestern University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.unibas.ch;https://www.hslu.ch;https://www.northwestern.edu", "aff_unique_abbr": "UniBas;HSLU;NU", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Lucerne", "aff_country_unique_index": "0;0;0;0;1;0", "aff_country_unique": "Switzerland;United States" }, { "title": "Rethinking Misalignment in Vision-Language Model Adaptation from a Causal Perspective", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93182", "id": "vwgWbCxeAQ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=vwgWbCxeAQ", "openreview": "https://openreview.net/forum?id=vwgWbCxeAQ", "poster": "/media/PosterPDFs/NeurIPS%202024/93182.png?t=1730356606.6164515", "project": "", "author_site": "Yanan Zhang, Jiangmeng Li, Lixiang Liu, Wenwen Qiang", "tldr": "", "abstract": "Foundational Vision-Language models such as CLIP have exhibited impressive generalization in downstream tasks. However, CLIP suffers from a two-level misalignment issue, i.e., task misalignment and data misalignment, when adapting to specific tasks. Soft prompt tuning has mitigated the task misalignment, yet the data misalignment remains a challenge. To analyze the impacts of the data misalignment, we revisit the pre-training and adaptation processes of CLIP and develop a structural causal model. We discover that while we expect to capture task-relevant information for downstream tasks accurately, the task-irrelevant knowledge impacts the prediction results and hampers the modeling of the true relationships between the images and the predicted classes. As task-irrelevant knowledge is unobservable, we leverage the front-door adjustment and propose Causality-Guided Semantic Decoupling and Classification (CDC) to mitigate the interference of task-irrelevant knowledge. Specifically, we decouple semantics contained in the data of downstream tasks and perform classification based on each semantic. Furthermore, we employ the Dempster-Shafer evidence theory to evaluate the uncertainty of each prediction generated by diverse semantics. Experiments conducted in multiple different settings have consistently demonstrated the effectiveness of CDC.", "keywords": "causal;adaptation;foundational models", "primary_area": "machine_vision", "supplementary_material": "", "author": "Yanan Zhang;Jiangmeng Li;Lixiang Liu;Wenwen Qiang", "authorids": "~Yanan_Zhang3;~Jiangmeng_Li1;~Lixiang_Liu1;~Wenwen_Qiang1", "gender": "F;M;M;M", "homepage": ";https://jiangmengli.github.io/;https://people.ucas.ac.cn/~liulx;", "dblp": ";293/0997;;261/6913", "google_scholar": "https://scholar.google.com.au/citations?hl=en;https://scholar.google.com.sg/citations?user=-kU4VLcAAAAJ;;https://scholar.google.com/citations?hl=zh-CN", "orcid": "0000-0001-6937-0913;0000-0002-3376-1522;;0000-0002-7985-5743", "linkedin": ";jiangmeng-li-86aaa7125/;;", "or_profile": "~Yanan_Zhang3;~Jiangmeng_Li1;~Lixiang_Liu1;~Wenwen_Qiang1", "aff": "University of Chinese Academy of Sciences;Institute of Software, Chinese Academy of Sciences;University of Chinese Academy of Sciences;Institute of Software Chinese Academy of Sciences", "aff_domain": "ucas.ac.cn;iscas.ac.cn;ucas.ac.cn;iscas.ac.cn", "position": "PhD student;Assistant Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhang2024rethinking,\ntitle={Rethinking Misalignment in Vision-Language Model Adaptation from a Causal Perspective},\nauthor={Yanan Zhang and Jiangmeng Li and Lixiang Liu and Wenwen Qiang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=vwgWbCxeAQ}\n}", "github": "", "reviewers": "JJqY;7jjJ;Ptf4", "pdf_size": 2075049, "rating": "5;6;7", "confidence": "3;4;4", "soundness": "3;3;4", "novelty": "3;3;3", "presentation": "3;2;4", "wc_summary": "107;66;75", "wc_strengths": "28;73;76", "wc_weaknesses": "110;156;86", "wc_questions": "2;2;21", "wc_limitations": "2;1;9", "wc_review": "249;298;267", "wc_reply_reviewers": "10;32;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;0", "reply_authors": "1;1;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 82.66666666666667, 17.594190960528863 ], "wc_strengths_avg": [ 59.0, 21.95449840010015 ], "wc_weaknesses_avg": [ 117.33333333333333, 29.044027881055953 ], "wc_questions_avg": [ 8.333333333333334, 8.956685895029603 ], "wc_limitations_avg": [ 4.0, 3.559026084010437 ], "wc_review_avg": [ 271.3333333333333, 20.237478982214054 ], "wc_reply_reviewers_avg": [ 14.0, 13.366625103842281 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7252960338798233932&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "ucas.ac.cn;iscas.ac.cn;ucas.ac.cn;iscas.ac.cn", "author_num": 4, "aff_unique_index": "0;1;0;1", "aff_unique_norm": "University of Chinese Academy of Sciences;Chinese Academy of Sciences", "aff_unique_dep": ";Institute of Software", "aff_unique_url": "http://www.ucas.ac.cn;http://www.ios.ac.cn", "aff_unique_abbr": "UCAS;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Revive Re-weighting in Imbalanced Learning by Density Ratio Estimation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93181", "id": "vx4NgdyyVG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=vx4NgdyyVG", "openreview": "https://openreview.net/forum?id=vx4NgdyyVG", "poster": "/media/PosterPDFs/NeurIPS%202024/93181.png?t=1731416256.2783659", "project": "", "author_site": "JIAAN LUO, Feng Hong, Jiangchao Yao, Bo Han, Ya Zhang, Yanfeng Wang", "tldr": "", "abstract": "In deep learning, model performance often deteriorates when trained on highly imbalanced datasets, especially when evaluation metrics require robust generalization across underrepresented classes. To address the challenges posed by imbalanced data distributions, this study introduces a novel method utilizing density ratio estimation for dynamic class weight adjustment, termed as Re-weighting with Density Ratio (RDR). Our method adaptively adjusts the importance of each class during training, mitigates overfitting on dominant classes and enhances model adaptability across diverse datasets. Extensive experiments conducted on various large scale benchmark datasets validate the effectiveness of our method. Results demonstrate substantial improvements in generalization capabilities, particularly under severely imbalanced conditions.", "keywords": "machine learning;density ratio estimation;optimization", "primary_area": "optimization", "supplementary_material": "", "author": "Jiaan Luo;Feng Hong;Jiangchao Yao;Bo Han;Ya Zhang;Yanfeng Wang", "authorids": "~Jiaan_Luo1;~Feng_Hong1;~Jiangchao_Yao1;~Bo_Han1;~Ya_Zhang1;~Yanfeng_Wang1", "gender": "M;M;M;F;M;M", "homepage": "https://github.com/GoodMorningPeter;;https://sunarker.github.io/;https://annzhanglion.github.io/;https://cmic.sjtu.edu.cn/wangyanfeng/;https://bhanml.github.io/", "dblp": ";68/1260-4;166/5900;85/3714-2;55/5407-1.html;241/0472-3", "google_scholar": ";DCTAaNQAAAAJ;w8oDh9QAAAAJ;pbjw9sMAAAAJ;https://scholar.google.com/citations?hl=zh-CN;nTNjqHwAAAAJ", "orcid": ";;;0000-0002-5390-9053;0000-0002-3196-2347;", "linkedin": ";;;;;", "or_profile": "~Jiaan_Luo1;~Feng_Hong1;~Jiangchao_Yao1;~Ya_Zhang1;~Yanfeng_Wang1;~bo_han2", "aff": "Beijing Institute of Technology;Shanghai Jiaotong University;Shanghai Artificial Intelligence Laboratory;Shanghai Jiaotong University;Shanghai Jiaotong University;MBZUAI", "aff_domain": "bit.edu.cn;sjtu.edu.cn;pjlab.org.cn;sjtu.edu.cn;sjtu.edu.cn;mbzuai.ac.ae", "position": "Undergrad student;PhD student;Researcher;Professor;Full Professor;Researcher", "bibtex": "@inproceedings{\nluo2024revive,\ntitle={Revive Re-weighting in Imbalanced Learning by Density Ratio Estimation},\nauthor={Jiaan Luo and Feng Hong and Jiangchao Yao and Bo Han and Ya Zhang and Yanfeng Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=vx4NgdyyVG}\n}", "github": "", "reviewers": "24eD;MG5X;xrua", "pdf_size": 787836, "rating": "6;6;6", "confidence": "4;3;3", "soundness": "3;4;3", "novelty": "3;2;3", "presentation": "2;2;1", "wc_summary": "45;129;73", "wc_strengths": "88;139;52", "wc_weaknesses": "112;154;109", "wc_questions": "24;2;34", "wc_limitations": "33;5;37", "wc_review": "302;429;305", "wc_reply_reviewers": "16;105;28", "wc_reply_authors": "119;80;111", "reply_reviewers": "1;2;1", "reply_authors": "3;2;3", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 1.6666666666666667, 0.4714045207910317 ], "wc_summary_avg": [ 82.33333333333333, 34.92213560989012 ], "wc_strengths_avg": [ 93.0, 35.6931365951495 ], "wc_weaknesses_avg": [ 125.0, 20.54263858417414 ], "wc_questions_avg": [ 20.0, 13.366625103842281 ], "wc_limitations_avg": [ 25.0, 14.236104336041748 ], "wc_review_avg": [ 345.3333333333333, 59.17394321452269 ], "wc_reply_reviewers_avg": [ 49.666666666666664, 39.43207943906698 ], "wc_reply_authors_avg": [ 103.33333333333333, 16.81930108205715 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18094552255394679854&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "bit.edu.cn;sjtu.edu.cn;pjlab.org.cn;sjtu.edu.cn;sjtu.edu.cn;mbzuai.ac.ae", "author_num": 6, "aff_unique_index": "0;1;2;1;1;3", "aff_unique_norm": "Beijing Institute of Technology;Shanghai Jiao Tong University;Shanghai Artificial Intelligence Laboratory;Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.bit.edu.cn/;https://www.sjtu.edu.cn;http://www.shailab.org/;https://www.mbzuai.ac.ae", "aff_unique_abbr": "BIT;SJTU;Shanghai AI Lab;MBZUAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;1", "aff_country_unique": "China;United Arab Emirates" }, { "title": "Conditional Generative Models are Sufficient to Sample from Any Causal Effect Estimand", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93180", "id": "vymkuBMLlh", "proceeding": "", "pdf": "https://openreview.net/pdf?id=vymkuBMLlh", "openreview": "https://openreview.net/forum?id=vymkuBMLlh", "poster": "", "project": "", "author_site": "Md Musfiqur Rahman, Matt Jordan, Murat Kocaoglu", "tldr": "", "abstract": "Causal inference from observational data plays critical role in many applications in trustworthy machine learning.\nWhile sound and complete algorithms exist to compute causal effects, many of them assume access to conditional likelihoods,\n which is difficult to estimate for high-dimensional (particularly image) data. Researchers have alleviated this issue by simulating causal relations with neural models. However, when we have high-dimensional variables in the causal graph along with some unobserved confounders, no existing work can effectively sample from the un/conditional interventional distributions. In this work, we show how to sample from any identifiable interventional distribution given an arbitrary causal graph through a sequence of push-forward computations of conditional generative models, such as diffusion models. Our proposed algorithm follows the recursive steps of the existing likelihood-based identification algorithms to train a set of feed-forward models, and connect them in a specific way to sample from the desired distribution. We conduct experiments on a Colored MNIST dataset having both the treatment ($X$) and the target variables ($Y$) as images and sample from $P(y|do(x))$. Our algorithm also enables us to conduct a causal analysis to evaluate spurious correlations among input features of generative models pre-trained on the CelebA dataset. Finally, we generate high-dimensional interventional samples from the MIMIC-CXR dataset involving text and image variables.", "keywords": "causal inference;causal graphs;deep generative models", "primary_area": "causal_inference", "supplementary_material": "/attachment/79fda2fc144a5e9f898e1bcc80c9c098b1661f07.zip", "author": "Md Musfiqur Rahman;Matt Jordan;Murat Kocaoglu", "authorids": "~Md_Musfiqur_Rahman1;~Matt_Jordan1;~Murat_Kocaoglu1", "gender": "M;M;M", "homepage": "https://sites.google.com/view/musfiqshohan/home;https://www.cs.utexas.edu/~mjordan/;https://www.muratkocaoglu.com", "dblp": "249/2369;236/5728;74/11343", "google_scholar": "vMGENI8AAAAJ;Zj7R8p0AAAAJ;7N7bzdwAAAAJ", "orcid": ";;", "linkedin": "md-musfiqur-rahman-861b58150/;;mkocaoglu/", "or_profile": "~Md_Musfiqur_Rahman1;~Matt_Jordan1;~Murat_Kocaoglu1", "aff": "Purdue University;University of Texas, Austin;Purdue University", "aff_domain": "purdue.edu;utexas.edu;purdue.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nrahman2024conditional,\ntitle={Conditional Generative Models are Sufficient to Sample from Any Causal Effect Estimand},\nauthor={Md Musfiqur Rahman and Matt Jordan and Murat Kocaoglu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=vymkuBMLlh}\n}", "github": "", "reviewers": "xD7P;fUEc;6o9W;5UTo", "pdf_size": 12136511, "rating": "5;5;5;7", "confidence": "2;3;2;3", "soundness": "3;4;3;3", "novelty": "2;2;4;3", "presentation": "1;3;3;3", "wc_summary": "62;72;76;113", "wc_strengths": "56;74;43;111", "wc_weaknesses": "155;103;370;40", "wc_questions": "93;46;43;60", "wc_limitations": "1;13;1;1", "wc_review": "367;308;533;325", "wc_reply_reviewers": "22;70;45;0", "wc_reply_authors": "27;266;56;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;3;2;1", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 2.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 80.75, 19.30511590226798 ], "wc_strengths_avg": [ 71.0, 25.583197610932064 ], "wc_weaknesses_avg": [ 167.0, 124.0745743494613 ], "wc_questions_avg": [ 60.5, 19.83053201505194 ], "wc_limitations_avg": [ 4.0, 5.196152422706632 ], "wc_review_avg": [ 383.25, 89.085282173881 ], "wc_reply_reviewers_avg": [ 34.25, 26.06122598804592 ], "wc_reply_authors_avg": [ 87.25, 105.08419243635076 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15455534211566406457&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "purdue.edu;utexas.edu;purdue.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Purdue University;University of Texas at Austin", "aff_unique_dep": ";", "aff_unique_url": "https://www.purdue.edu;https://www.utexas.edu", "aff_unique_abbr": "Purdue;UT Austin", "aff_campus_unique_index": "1", "aff_campus_unique": ";Austin", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Mercury: A Code Efficiency Benchmark for Code Large Language Models", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97452", "id": "vyraA7xt4c", "proceeding": "", "pdf": "https://openreview.net/pdf?id=vyraA7xt4c", "openreview": "https://openreview.net/forum?id=vyraA7xt4c", "poster": "/media/PosterPDFs/NeurIPS%202024/97452.png?t=1729680899.2127168", "project": "", "author_site": "Mingzhe Du, Anh Tuan Luu, Bin Ji, Qian Liu, See-Kiong Ng", "tldr": "", "abstract": "Amidst the recent strides in evaluating Large Language Models for Code (Code LLMs), existing benchmarks have mainly focused on the functional correctness of generated code, neglecting the importance of their computational efficiency. To fill the gap, we present Mercury, the first code efficiency benchmark for Code LLMs. It comprises 1,889 Python tasks, each accompanied by adequate solutions that serve as real-world efficiency baselines, enabling a comprehensive analysis of the runtime distribution. Based on the distribution, we introduce a new metric Beyond, which computes a runtime-percentile-weighted Pass score to reflect functional correctness and code efficiency simultaneously. On Mercury, leading Code LLMs can achieve 65% on Pass, while less than 50% on Beyond. Given that an ideal Beyond score would be aligned with the Pass score, it indicates that while Code LLMs exhibit impressive capabilities in generating functionally correct code, there remains a notable gap in their efficiency. Finally, our empirical experiments reveal that Direct Preference Optimization (DPO) serves as a robust baseline for enhancing code efficiency compared with Supervised Fine Tuning (SFT), which paves a promising avenue for future exploration of efficient code generation. Our code and data are available on GitHub: https://github.com/Elfsong/Mercury.", "keywords": "code generation;language model;computational efficiency;benchmark", "primary_area": "", "supplementary_material": "/attachment/a855b892e7a31aa47d0cb503fc579de642a679f5.pdf", "author": "Mingzhe Du;Anh Tuan Luu;Bin Ji;Qian Liu;See-Kiong Ng", "authorids": "~Mingzhe_Du1;~Anh_Tuan_Luu2;~Bin_Ji3;~Qian_Liu2;~See-Kiong_Ng1", "gender": "M;M;M;M;M", "homepage": "https://elfsong.github.io/;https://tuanluu.github.io/;https://jibin5167.github.io/;http://siviltaram.github.io/;https://www.comp.nus.edu.sg/~ngsk/", "dblp": ";81/8329.html;119/1943-2.html;;00/5480", "google_scholar": "CJHW1IgAAAAJ;https://scholar.google.com.sg/citations?hl=en;31ZXPVQAAAAJ;bcbeUo0AAAAJ;https://scholar.google.com.tw/citations?user=_wsommYAAAAJ", "orcid": ";;0000-0002-5508-5051;;0000-0001-6565-7511", "linkedin": ";;bin-ji-3b89a3269/;;seekiong/?originalSubdomain=sg", "or_profile": "~Mingzhe_Du1;~Anh_Tuan_Luu2;~Bin_Ji3;~Qian_Liu2;~See-Kiong_Ng1", "aff": "National University of Singapore;Nanyang Technological University;National University of Singapore;Tiktok;National University of Singapore", "aff_domain": "nus.edu;ntu.edu.sg;nus.edu.sg;bytedance.com;nus.edu.sg", "position": "Researcher;Assistant Professor;Postdoc;Researcher;Full Professor", "bibtex": "@inproceedings{\ndu2024mercury,\ntitle={Mercury: A Code Efficiency Benchmark for Code Large Language Models},\nauthor={Mingzhe Du and Anh Tuan Luu and Bin Ji and Qian Liu and See-Kiong Ng},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=vyraA7xt4c}\n}", "github": "", "reviewers": "myqn;yc6N;2jdy;akjd", "pdf_size": 4988502, "rating": "6;6;6;8", "confidence": "4;4;3;4", "wc_summary_and_contributions": "128;76;87;36", "wc_strengths": "135;131;102;80", "wc_improvement": "90;131;3;57", "wc_limitations": "1;13;3;10", "wc_correctness": "1;4;1;1", "wc_clarity": "1;8;3;1", "wc_relation_to_prior_work": "1;80;1;1", "wc_documentation": "1;7;1;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "359;451;202;188", "wc_reply_reviewers": "226;31;39;10", "wc_reply_authors": "102;56;391;55", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;3;2", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 81.75, 32.7595405950694 ], "wc_strengths_avg": [ 112.0, 22.438805672316875 ], "wc_improvement_avg": [ 70.25, 46.847491928597414 ], "wc_limitations_avg": [ 6.75, 4.9180788932265 ], "wc_correctness_avg": [ 1.75, 1.299038105676658 ], "wc_clarity_avg": [ 3.25, 2.8613807855648994 ], "wc_relation_to_prior_work_avg": [ 20.75, 34.208003449485325 ], "wc_documentation_avg": [ 2.5, 2.598076211353316 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 300.0, 110.03408562804528 ], "wc_reply_reviewers_avg": [ 76.5, 86.96119824381447 ], "wc_reply_authors_avg": [ 151.0, 139.85885742419035 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1845451707233703202&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 2, "email": "nus.edu;ntu.edu.sg;nus.edu.sg;bytedance.com;nus.edu.sg", "author_num": 5, "aff_unique_index": "0;1;0;2;0", "aff_unique_norm": "National University of Singapore;Nanyang Technological University;TikTok", "aff_unique_dep": ";;", "aff_unique_url": "https://www.nus.edu.sg;https://www.ntu.edu.sg;https://www.tiktok.com", "aff_unique_abbr": "NUS;NTU;TikTok", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "Singapore;China" }, { "title": "High Rank Path Development: an approach to learning the filtration of stochastic processes", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93179", "id": "w28i9oe9Xr", "proceeding": "", "pdf": "https://openreview.net/pdf?id=w28i9oe9Xr", "openreview": "https://openreview.net/forum?id=w28i9oe9Xr", "poster": "", "project": "", "author_site": "Jiajie Tao, Hao Ni, Chong Liu", "tldr": "", "abstract": "Since the weak convergence for stochastic processes does not account for the growth of information over time which is represented by the underlying filtration, a slightly erroneous stochastic model in weak topology may cause huge loss in multi-periods decision making problems. To address such discontinuities, Aldous introduced the extended weak convergence, which can fully characterise all essential properties, including the filtration, of stochastic processes; however, it was considered to be hard to find efficient numerical implementations. In this paper, we introduce a novel metric called High Rank PCF Distance (HRPCFD) for extended weak convergence based on the high rank path development method from rough path theory, which also defines the characteristic function for measure-valued processes. We then show that such HRPCFD admits many favourable analytic properties which allows us to design an efficient algorithm for training HRPCFD from data and construct the HRPCF-GAN by using HRPCFD as the discriminator for conditional time series generation. Our numerical experiments on both hypothesis testing and generative modelling validate the out-performance of our approach compared with several state-of-the-art methods, highlighting its potential in broad applications of synthetic time series generation and in addressing classic financial and economic challenges, such as optimal stopping or utility maximisation problems. Code is available at https://github.com/DeepIntoStreams/High-Rank-PCF-GAN.git.", "keywords": "adapted weak topology; stochastic process; synthetic time series generation; path development", "primary_area": "probabilistic_methods", "supplementary_material": "/attachment/5dcca4b22b94b2847cc0ece59e9e06e53c6636e1.zip", "author": "Jiajie Tao;Hao Ni;Chong Liu", "authorids": "~Jiajie_Tao1;~Hao_Ni2;~Chong_Liu8", "gender": "M;F;M", "homepage": ";https://iris.ucl.ac.uk/iris/browse/profile?upi=HNIXX56;https://sites.google.com/view/chongliu/home?pli=1", "dblp": ";;", "google_scholar": ";https://scholar.google.co.uk/citations?user=VTTtSLcAAAAJ;", "orcid": ";0000-0001-5485-4376;", "linkedin": "https://www.linkedin/in/jiajie-tao;;", "or_profile": "~Jiajie_Tao1;~Hao_Ni2;~Chong_Liu8", "aff": "University College London, University of London;University College London;ShanghaiTech University", "aff_domain": "ucl.ac.uk;ucl.ac.uk;shanghaitech.edu.cn", "position": "PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\ntao2024high,\ntitle={High Rank Path Development: an approach to learning the filtration of stochastic processes},\nauthor={Jiajie Tao and Hao Ni and Chong Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=w28i9oe9Xr}\n}", "github": "", "reviewers": "zBhD;Yt11;gzqb;9Q6u", "pdf_size": 2689806, "rating": "4;6;7;9", "confidence": "4;1;4;4", "soundness": "3;3;3;4", "novelty": "1;3;3;4", "presentation": "3;3;3;4", "wc_summary": "41;99;53;110", "wc_strengths": "36;107;72;90", "wc_weaknesses": "115;2;146;123", "wc_questions": "2;2;109;1", "wc_limitations": "1;1;4;1", "wc_review": "195;211;384;325", "wc_reply_reviewers": "15;12;38;29", "wc_reply_authors": "186;27;14;4", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 6.5, 1.8027756377319946 ], "confidence_avg": [ 3.25, 1.299038105676658 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 1.0897247358851685 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 75.75, 29.32042803234632 ], "wc_strengths_avg": [ 76.25, 26.328454189336675 ], "wc_weaknesses_avg": [ 96.5, 55.73374202401988 ], "wc_questions_avg": [ 28.5, 46.47848964843845 ], "wc_limitations_avg": [ 1.75, 1.299038105676658 ], "wc_review_avg": [ 278.75, 78.77301251063082 ], "wc_reply_reviewers_avg": [ 23.5, 10.547511554864494 ], "wc_reply_authors_avg": [ 57.75, 74.49286878621335 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.16012815380508713, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:HGG4QLmpDvMJ:scholar.google.com/&scioq=High+Rank+Path+Development:+an+approach+to+learning+the+filtration+of+stochastic+processes&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "ucl.ac.uk;ucl.ac.uk;shanghaitech.edu.cn", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "University College London;ShanghaiTech University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucl.ac.uk;https://www.shanghaitech.edu.cn", "aff_unique_abbr": "UCL;ShanghaiTech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United Kingdom;China" }, { "title": "Adversarially Robust Multi-task Representation Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93178", "id": "w2L3Ll1jbV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=w2L3Ll1jbV", "openreview": "https://openreview.net/forum?id=w2L3Ll1jbV", "poster": "", "project": "", "author_site": "Austin Watkins, Thanh Nguyen-Tang, Enayat Ullah, Raman Arora", "tldr": "", "abstract": "We study adversarially robust transfer learning, wherein, given labeled data on multiple (source) tasks, the goal is to train a model with small robust error on a previously unseen (target) task.\nIn particular, we consider a multi-task representation learning (MTRL) setting, i.e., we assume that the source and target tasks admit a simple (linear) predictor on top of a shared representation (e.g., the final hidden layer of a \ndeep neural network).\nIn this general setting, we provide rates on~the excess adversarial (transfer) risk for Lipschitz losses and smooth nonnegative losses.\nThese rates show that learning a representation using adversarial training on diverse tasks helps protect against inference-time attacks in data-scarce environments.\nAdditionally, we provide novel rates for the single-task setting.", "keywords": "Learning Theory;Multi-task and Transfer Learning;Adversarial Robustness", "primary_area": "learning_theory", "supplementary_material": "", "author": "Austin Watkins;Thanh Nguyen-Tang;Enayat Ullah;Raman Arora", "authorids": "~Austin_Watkins1;~Thanh_Nguyen-Tang1;~Enayat_Ullah1;~Raman_Arora1", "gender": "M;;M;M", "homepage": "http://austinwatkins.com;https://enayatullah.github.io;http://www.cs.jhu.edu/~raman/Home.html;https://thanhnguyentang.github.io/", "dblp": "334/0263;223/5999;;287/5102.html", "google_scholar": ";;Spe0xdkAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;0000-0002-1917-2190", "linkedin": "austin-watkins-6599a858/;;;thanhnguyentang/", "or_profile": "~Austin_Watkins1;~Enayat_Ullah1;~Raman_Arora1;~Thanh_Tang_Nguyen2", "aff": "Johns Hopkins University;Johns Hopkins University;Johns Hopkins University;Johns Hopkins University", "aff_domain": "jhu.edu;jhu.edu;jhu.edu;jhu.edu", "position": "PhD student;PhD student;Associate Professor;Postdoc", "bibtex": "@inproceedings{\nwatkins2024adversarially,\ntitle={Adversarially Robust Multi-task Representation Learning},\nauthor={Austin Watkins and Thanh Nguyen-Tang and Enayat Ullah and Raman Arora},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=w2L3Ll1jbV}\n}", "github": "", "reviewers": "evww;g9e1;4GkJ;6tUb", "pdf_size": 503387, "rating": "5;6;6;7", "confidence": "3;2;3;4", "soundness": "3;2;3;3", "novelty": "3;3;3;3", "presentation": "4;3;3;2", "wc_summary": "58;76;108;241", "wc_strengths": "100;39;11;166", "wc_weaknesses": "56;88;4;172", "wc_questions": "104;27;75;93", "wc_limitations": "1;13;13;11", "wc_review": "319;243;211;683", "wc_reply_reviewers": "0;11;12;23", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 120.75, 71.69858785220251 ], "wc_strengths_avg": [ 79.0, 59.65316420777694 ], "wc_weaknesses_avg": [ 80.0, 60.991802727907626 ], "wc_questions_avg": [ 74.75, 29.448047473474364 ], "wc_limitations_avg": [ 9.5, 4.9749371855331 ], "wc_review_avg": [ 364.0, 188.30560267819968 ], "wc_reply_reviewers_avg": [ 11.5, 8.139410298049853 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4229759270211545768&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "jhu.edu;jhu.edu;jhu.edu;jhu.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Johns Hopkins University", "aff_unique_dep": "", "aff_unique_url": "https://www.jhu.edu", "aff_unique_abbr": "JHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Optimization Can Learn Johnson Lindenstrauss Embeddings", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93177", "id": "w3JCTBRduf", "proceeding": "", "pdf": "https://openreview.net/pdf?id=w3JCTBRduf", "openreview": "https://openreview.net/forum?id=w3JCTBRduf", "poster": "/media/PosterPDFs/NeurIPS%202024/93177.png?t=1733856434.590126", "project": "", "author_site": "Nikos Tsikouras, Constantine Caramanis, Christos Tzamos", "tldr": "", "abstract": "Embeddings play a pivotal role across various disciplines, offering compact representations of complex data structures. Randomized methods like Johnson-Lindenstrauss (JL) provide state-of-the-art and essentially unimprovable theoretical guarantees for achieving such representations. These guarantees are worst-case and in particular, neither the analysis, ${\\textit{nor the algorithm}}$, takes into account any potential structural information of the data. The natural question is: must we randomize? Could we instead use an optimization-based approach, working directly with the data? A first answer is no: as we show, the distance-preserving objective of JL has a non-convex landscape over the space of projection matrices, with many bad stationary points. But this is not the final answer. \n\nWe present a novel method motivated by diffusion models, that circumvents this fundamental challenge: rather than performing optimization directly over the space of projection matrices, we use optimization over the larger space of $\\textit{random solution samplers}$, gradually reducing the variance of the sampler. We show that by moving through this larger space, our objective converges to a deterministic (zero variance) solution, avoiding bad stationary points. \n\nThis method can also be seen as an optimization-based derandomization approach, and is an idea and method that we believe can be applied to many other problems.", "keywords": "Optimization;Non-Convex Optimization;Embeddings;Projections;Derandomization;Gradient Descent;Dimensionality Reduction", "primary_area": "optimization", "supplementary_material": "/attachment/ee7fcb741e66d4fd07cf9aa64ad34b1ded2f142d.zip", "author": "Nikos Tsikouras;Constantine Caramanis;Christos Tzamos", "authorids": "~Nikos_Tsikouras1;~Constantine_Caramanis1;~Christos_Tzamos1", "gender": "M;M;", "homepage": "https://nikostsikouras.github.io/;http://users.ece.utexas.edu/~cmcaram/constantine_caramanis/Home.html;https://tzamos.com", "dblp": "395/8370;96/5760;79/8819", "google_scholar": "xZYPVCsAAAAJ;47YTUrEAAAAJ;wB01auEAAAAJ", "orcid": ";;", "linkedin": "nikos-tsikouras/;;", "or_profile": "~Nikos_Tsikouras1;~Constantine_Caramanis1;~Christos_Tzamos1", "aff": "University of Athens;University of Texas, Austin;University of Wisconsin, Madison", "aff_domain": "uoa.gr;utexas.edu;wisc.edu", "position": "PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\ntsikouras2024optimization,\ntitle={Optimization Can Learn Johnson Lindenstrauss Embeddings},\nauthor={Nikos Tsikouras and Constantine Caramanis and Christos Tzamos},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=w3JCTBRduf}\n}", "github": "", "reviewers": "XscG;y6Yv;pTrj;WAK9", "pdf_size": 430791, "rating": "5;5;7;8", "confidence": "3;3;3;3", "soundness": "2;2;4;4", "novelty": "3;3;4;4", "presentation": "4;3;3;4", "wc_summary": "44;96;110;206", "wc_strengths": "67;51;45;182", "wc_weaknesses": "48;25;20;142", "wc_questions": "119;112;53;155", "wc_limitations": "23;3;5;1", "wc_review": "301;287;233;686", "wc_reply_reviewers": "157;92;9;133", "wc_reply_authors": "440;79;0;0", "reply_reviewers": "2;1;1;1", "reply_authors": "3;2;1;1", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 1.0 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 114.0, 58.532042506647585 ], "wc_strengths_avg": [ 86.25, 55.86311394829329 ], "wc_weaknesses_avg": [ 58.75, 49.21064417379638 ], "wc_questions_avg": [ 109.75, 36.601741761834234 ], "wc_limitations_avg": [ 8.0, 8.774964387392123 ], "wc_review_avg": [ 376.75, 180.34186286051278 ], "wc_reply_reviewers_avg": [ 97.75, 56.264442590325196 ], "wc_reply_authors_avg": [ 129.75, 182.00326233339885 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:N4W1Lp65a2QJ:scholar.google.com/&scioq=Optimization+Can+Learn+Johnson+Lindenstrauss+Embeddings&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "uoa.gr;utexas.edu;wisc.edu", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Athens;University of Texas at Austin;University of Wisconsin", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uoa.gr;https://www.utexas.edu;https://www.wisc.edu", "aff_unique_abbr": "UoA;UT Austin;UW", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Austin;Madison", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Greece;United States" }, { "title": "Can Language Models Learn to Skip Steps?", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93176", "id": "w4AnTVxAO9", "proceeding": "", "pdf": "https://openreview.net/pdf?id=w4AnTVxAO9", "openreview": "https://openreview.net/forum?id=w4AnTVxAO9", "poster": "", "project": "", "author_site": "Tengxiao Liu, Qipeng Guo, Xiangkun Hu, Cheng Jiayang, Yue Zhang, Xipeng Qiu, Zheng Zhang", "tldr": "", "abstract": "Trained on vast corpora of human language, language models demonstrate emergent human-like reasoning abilities. Yet they are still far from true intelligence, which opens up intriguing opportunities to explore the parallels of humans and model behaviors. In this work, we study the ability to skip steps in reasoning\u2014a hallmark of human expertise developed through practice. Unlike humans, who may skip steps to enhance efficiency or to reduce cognitive load, models do not inherently possess such motivations to minimize reasoning steps. To address this, we introduce a controlled framework that stimulates step-skipping behavior by iteratively refining models to generate shorter and accurate reasoning paths. Empirical results indicate that models can develop the step skipping ability under our guidance. Moreover, after fine-tuning on expanded datasets that include both complete and skipped reasoning sequences, the models can not only resolve tasks with increased efficiency without sacrificing accuracy, but also exhibit comparable and even enhanced generalization capabilities in out-of-domain scenarios. Our work presents the first exploration into human-like step-skipping ability and provides fresh perspectives on how such cognitive abilities can benefit AI models.", "keywords": "Large Language Models;Natural Language Processing;Reasoning;Human-like Abilities", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Tengxiao Liu;Qipeng Guo;Xiangkun Hu;Cheng Jiayang;Yue Zhang;Xipeng Qiu;Zheng Zhang", "authorids": "~Tengxiao_Liu1;~Qipeng_Guo1;~Xiangkun_Hu1;~Cheng_Jiayang1;~Yue_Zhang7;~Xipeng_Qiu1;~Zheng_Zhang1", "gender": ";M;M;M;M;M;M", "homepage": "https://tengxiaoliu.github.io;;;http://frcchang.github.io;https://xpqiu.github.io/;https://shanghai.nyu.edu/academics/faculty/directory/zheng-zhang;https://jiayangcheng.com", "dblp": "165/9039;172/1046;224/5990;47/722-4;69/1395;;289/7347", "google_scholar": ";k3mPGKgAAAAJ;_-0MpawAAAAJ;;Pq4Yp_kAAAAJ;https://scholar.google.com.hk/citations?user=k0KiE4wAAAAJ;btRJjVEAAAAJ", "orcid": "0000-0002-3339-9607;;;0000-0002-5214-2268;0000-0001-7163-5247;;", "linkedin": ";;;;;;jiayang-joey-cheng-1b87a5221/", "or_profile": "~Tengxiao_Liu1;~Qipeng_Guo1;~Xiangkun_Hu1;~Yue_Zhang7;~Xipeng_Qiu1;~Zheng_Zhang1;~Jiayang_Cheng1", "aff": "Amazon;Shanghai AI Laboratory;Amazon;Westlake University;Fudan University;Amazon;Department of Computer Science and Engineering, Hong Kong University of Science and Technology", "aff_domain": "amazon.com;pjlab.org.cn;amazon.com;westlake.edu.cn;fudan.edu.cn;amazon.com;cse.ust.hk", "position": "Intern;Researcher;Applied Scientist;Full Professor;Full Professor;Senior Principal Scientist;PhD student", "bibtex": "@inproceedings{\nliu2024can,\ntitle={Can Language Models Learn to Skip Steps?},\nauthor={Tengxiao Liu and Qipeng Guo and Xiangkun Hu and Cheng Jiayang and Yue Zhang and Xipeng Qiu and Zheng Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=w4AnTVxAO9}\n}", "github": "", "reviewers": "vxGY;E6Qj;DBKK;rzDm", "pdf_size": 2184792, "rating": "4;5;6;7", "confidence": "4;4;4;4", "soundness": "3;2;3;3", "novelty": "3;2;2;3", "presentation": "3;3;3;3", "wc_summary": "58;79;85;129", "wc_strengths": "57;35;56;133", "wc_weaknesses": "64;103;119;192", "wc_questions": "1;2;127;164", "wc_limitations": "1;1;4;21", "wc_review": "181;220;391;639", "wc_reply_reviewers": "0;38;43;184", "wc_reply_authors": "0;695;479;87", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 87.75, 25.839649765428323 ], "wc_strengths_avg": [ 70.25, 37.278512577623054 ], "wc_weaknesses_avg": [ 119.5, 46.39234850705448 ], "wc_questions_avg": [ 73.5, 73.17957365276187 ], "wc_limitations_avg": [ 6.75, 8.317902379807062 ], "wc_review_avg": [ 357.75, 180.57045024034247 ], "wc_reply_reviewers_avg": [ 66.25, 69.98705237399273 ], "wc_reply_authors_avg": [ 315.25, 283.94750835321656 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12038014967124890297&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "amazon.com;pjlab.org.cn;amazon.com;westlake.edu.cn;fudan.edu.cn;amazon.com;cse.ust.hk", "author_num": 7, "aff_unique_index": "0;1;0;2;3;0;4", "aff_unique_norm": "Amazon;Shanghai AI Laboratory;Westlake University;Fudan University;Hong Kong University of Science and Technology", "aff_unique_dep": "Amazon.com, Inc.;;;;Department of Computer Science and Engineering", "aff_unique_url": "https://www.amazon.com;https://www.shanghai-ai-lab.com;https://www.westlake.edu.cn;https://www.fudan.edu.cn;https://www.ust.hk", "aff_unique_abbr": "Amazon;SAIL;WU;Fudan;HKUST", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;0;1;1;0;1", "aff_country_unique": "United States;China" }, { "title": "Discovery of the Hidden World with Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93175", "id": "w50ICQC6QJ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=w50ICQC6QJ", "openreview": "https://openreview.net/forum?id=w50ICQC6QJ", "poster": "/media/PosterPDFs/NeurIPS%202024/93175.png?t=1730361557.6553507", "project": "", "author_site": "Chenxi Liu, Yongqiang Chen, Tongliang Liu, Mingming Gong, James Cheng, Bo Han, Kun Zhang", "tldr": "", "abstract": "Revealing the underlying causal mechanisms in the real world is the key to the development of science. Despite the progress in the past decades, traditional causal discovery approaches (CDs) mainly rely on high-quality measured variables, usually given by human experts, to find causal relations. The lack of well-defined high-level variables in many real-world applications has already been a longstanding roadblock to a broader application of CDs. To this end, this paper presents Causal representatiOn AssistanT (COAT) that introduces large language models (LLMs) to bridge the gap. LLMs are trained on massive observations of the world and have demonstrated great capability in extracting key information from unstructured data. Therefore, it is natural to employ LLMs to assist with proposing useful high-level factors and crafting their measurements. Meanwhile, COAT also adopts CDs to find causal relations among the identified variables as well as to provide feedback to LLMs to iteratively refine the proposed factors. We show that LLMs and CDs are mutually beneficial and the constructed feedback provably also helps with the factor proposal. We construct and curate several synthetic and real-world benchmarks including analysis of human reviews and diagnosis of neuropathic and brain tumors, to comprehensively evaluate COAT. Extensive empirical results confirm the effectiveness and reliability of COAT with significant improvements.", "keywords": "Causal Discovery;Large Language Models;Causal Representation Learning", "primary_area": "causal_inference", "supplementary_material": "", "author": "Chenxi Liu;Yongqiang Chen;Tongliang Liu;Mingming Gong;James Cheng;Bo Han;Kun Zhang", "authorids": "~Chenxi_Liu3;~Yongqiang_Chen1;~Tongliang_Liu1;~Mingming_Gong1;~James_Cheng2;~Bo_Han1;~Kun_Zhang1", "gender": "M;;M;M;M;M;M", "homepage": "https://chxliou.github.io;https://lfhase.win;https://tongliang-liu.github.io/;https://mingming-gong.github.io/;https://www.cse.cuhk.edu.hk/~jcheng/;http://www.andrew.cmu.edu/user/kunz1/;https://bhanml.github.io/", "dblp": ";76/5774-2;150/6667;98/8479;06/4171;96/3115-1;241/0472-3", "google_scholar": "cIGI2jAAAAAJ;huQ_Ig8AAAAJ;https://scholar.google.com.au/citations?user=EiLdZ_YAAAAJ;https://scholar.google.com.au/citations?user=6BmiCJIAAAAJ;;RGoypN4AAAAJ;nTNjqHwAAAAJ", "orcid": ";;;0000-0001-7147-5589;;;", "linkedin": "chenxi-liu-b79170147/;;;;;;", "or_profile": "~Chenxi_Liu3;~Yongqiang_Chen1;~Tongliang_Liu1;~Mingming_Gong1;~James_Cheng2;~Kun_Zhang1;~bo_han2", "aff": "Mohamed bin Zayed University of Artificial Intelligence;Department of Computer Science and Engineering, The Chinese University of Hong Kong;Mohamed bin Zayed University of Artificial Intelligence;University of Melbourne;The Chinese University of Hong Kong;Carnegie Mellon University;MBZUAI", "aff_domain": "mbzuai.ac.ae;cse.cuhk.edu.hk;mbzuai.ac.ae;unimelb.edu.au;cuhk.edu.hk;cmu.edu;mbzuai.ac.ae", "position": "Intern;PhD student;Affiliated Associate Professor;Assistant Professor;Associate Professor;Associate Professor;Researcher", "bibtex": "@inproceedings{\nliu2024discovery,\ntitle={Discovery of the Hidden World with Large Language Models},\nauthor={Chenxi Liu and Yongqiang Chen and Tongliang Liu and Mingming Gong and James Cheng and Bo Han and Kun Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=w50ICQC6QJ}\n}", "github": "", "reviewers": "1cJQ;AG9s;PF9K;szEm", "pdf_size": 3870990, "rating": "5;5;6;8", "confidence": "4;4;2;4", "soundness": "2;2;3;3", "novelty": "2;3;2;4", "presentation": "2;2;3;3", "wc_summary": "398;68;83;117", "wc_strengths": "156;66;40;84", "wc_weaknesses": "213;201;75;60", "wc_questions": "95;388;15;4", "wc_limitations": "6;18;1;4", "wc_review": "868;741;214;269", "wc_reply_reviewers": "112;44;96;27", "wc_reply_authors": "1063;1130;566;43", "reply_reviewers": "4;1;2;1", "reply_authors": "7;3;5;2", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 166.5, 134.83044908328387 ], "wc_strengths_avg": [ 86.5, 43.066808565297706 ], "wc_weaknesses_avg": [ 137.25, 70.07986515397985 ], "wc_questions_avg": [ 125.5, 155.57072346685285 ], "wc_limitations_avg": [ 7.25, 6.456585785072479 ], "wc_review_avg": [ 523.0, 285.72101777783166 ], "wc_reply_reviewers_avg": [ 69.75, 35.23049105533444 ], "wc_reply_authors_avg": [ 700.5, 437.68510369899496 ], "reply_reviewers_avg": [ 2.0, 1.224744871391589 ], "reply_authors_avg": [ 4.25, 1.920286436967152 ], "replies_avg": [ 34, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "email": "mbzuai.ac.ae;cse.cuhk.edu.hk;mbzuai.ac.ae;unimelb.edu.au;cuhk.edu.hk;cmu.edu;mbzuai.ac.ae", "author_num": 7, "aff_unique_index": "0;1;0;2;1;3;0", "aff_unique_norm": "Mohamed bin Zayed University of Artificial Intelligence;Chinese University of Hong Kong;University of Melbourne;Carnegie Mellon University", "aff_unique_dep": ";Department of Computer Science and Engineering;;", "aff_unique_url": "https://mbzuai.ac.ae;https://www.cuhk.edu.hk;https://www.unimelb.edu.au;https://www.cmu.edu", "aff_unique_abbr": "MBZUAI;CUHK;UniMelb;CMU", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;0;2;1;3;0", "aff_country_unique": "United Arab Emirates;China;Australia;United States" }, { "title": "Fit for our purpose, not yours: Benchmark for a low-resource, Indigenous language", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97451", "id": "w5jfyvsRq3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=w5jfyvsRq3", "openreview": "https://openreview.net/forum?id=w5jfyvsRq3", "poster": "/media/PosterPDFs/NeurIPS%202024/97451.png?t=1733964236.935066", "project": "", "author_site": "Suzanne Duncan, Gianna Leoni, Lee Steven, Keoni K Mahelona, Peter Lucas K Jones", "tldr": "", "abstract": "Influential and popular benchmarks in AI are largely irrelevant to developing NLP tools for low-resource, Indigenous languages. With the primary goal of measuring the performance of general-purpose AI systems, these benchmarks fail to give due consideration and care to individual language communities, especially low-resource languages. The datasets contain numerous grammatical and orthographic errors, poor pronunciation, limited vocabulary, and the content lacks cultural relevance to the language community. To overcome the issues with these benchmarks, we have created a dataset for te reo M\u0101ori (the Indigenous language of Aotearoa/New Zealand) to pursue NLP tools that are \u2018fit-for-our-purpose\u2019. This paper demonstrates how low-resourced, Indigenous languages can develop tailored, high-quality benchmarks that; i. Consider the impact of colonisation on their language; ii. Reflect the diversity of speakers in the language community; iii. Support the aspirations for the tools they are developing and their language revitalisation efforts.", "keywords": "Benchmarking;Indigenous languages;low-resource languages;NLP", "primary_area": "", "supplementary_material": "", "author": "Suzanne Duncan;Gianna Leoni;Lee Steven;Keoni Mahelona;Peter-Lucas Jones", "authorids": "~Suzanne_Duncan1;~Gianna_Leoni1;~Lee_Steven1;~Keoni_Mahelona1;~Peter-Lucas_Jones2", "gender": "F;F;M;;M", "homepage": "https://tehiku.nz/;;;https://kmahelona.net;https://www.whakaatamaori.co.nz/board-of-directors/peter-lucas-jones", "dblp": "343/5720;;;;", "google_scholar": "https://scholar.google.com/citations?hl=en;;;;", "orcid": "0000-0001-7194-1164;0000-0001-7816-074X;;;", "linkedin": "suzanne-duncan-86b66817/;gianna-leoni/;lee-steven-nz/;;https://nz.linkedin.com/company/te-hiku-media", "or_profile": "~Suzanne_Duncan1;~Gianna_Leoni1;~Lee_Steven1;~Keoni_Mahelona1;~Peter-Lucas_Jones2", "aff": "Te Reo Irirangi o Te Hiku o Te Ika;Te Reo Irirangi o Te Hiku o te Ika;Te Reo Irirangi o Te Hiku o te Ika;Te Hiku Media;Te Reo Irirangi o Te Hiku o Te Ika", "aff_domain": "tehiku.nz;tehiku.nz;tehiku.nz;tehiku.nz;tehiku.nz", "position": "Chief Operating Officer ;Senior Advisor;Data Scientist;CTO;Data Scientist", "bibtex": "@inproceedings{\nduncan2024fit,\ntitle={Fit for our purpose, not yours: Benchmark for a low-resource, Indigenous language},\nauthor={Suzanne Duncan and Gianna Leoni and Lee Steven and Keoni Mahelona and Peter-Lucas Jones},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=w5jfyvsRq3}\n}", "github": "", "reviewers": "chc9;Xhje;f8XK;bjZD", "pdf_size": 11434987, "rating": "4;5;6;10", "confidence": "4;4;4;5", "wc_summary_and_contributions": "37;73;132;235", "wc_strengths": "37;77;132;298", "wc_improvement": "83;146;28;237", "wc_limitations": "11;2;1;70", "wc_correctness": "22;1;1;42", "wc_clarity": "14;2;1;18", "wc_relation_to_prior_work": "21;1;1;84", "wc_documentation": "7;10;11;112", "wc_additional_feedback": "1;1;1;1", "wc_review": "233;313;308;1097", "wc_reply_reviewers": "0;0;0;121", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 2.277608394786075 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 119.25, 74.94122697154084 ], "wc_strengths_avg": [ 136.0, 99.42585176904446 ], "wc_improvement_avg": [ 123.5, 77.69974259931625 ], "wc_limitations_avg": [ 21.0, 28.556960622587273 ], "wc_correctness_avg": [ 16.5, 17.03672503740082 ], "wc_clarity_avg": [ 8.75, 7.39509972887452 ], "wc_relation_to_prior_work_avg": [ 26.75, 34.046842731742395 ], "wc_documentation_avg": [ 35.0, 44.48033273256845 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 487.75, 353.17515130597735 ], "wc_reply_reviewers_avg": [ 30.25, 52.39453692895854 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.950586375786717, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:5oy3Wk_JJ9QJ:scholar.google.com/&scioq=Fit+for+our+purpose,+not+yours:+Benchmark+for+a+low-resource,+Indigenous+language&hl=en&as_sdt=0,44", "gs_version_total": 2, "email": "tehiku.nz;tehiku.nz;tehiku.nz;tehiku.nz;tehiku.nz", "author_num": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Te Reo Irirangi o Te Hiku o Te Ika;Te Hiku Media", "aff_unique_dep": ";", "aff_unique_url": ";", "aff_unique_abbr": ";", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "New Zealand" }, { "title": "Unified Generative and Discriminative Training for Multi-modal Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93174", "id": "w67vRHZF13", "proceeding": "", "pdf": "https://openreview.net/pdf?id=w67vRHZF13", "openreview": "https://openreview.net/forum?id=w67vRHZF13", "poster": "/media/PosterPDFs/NeurIPS%202024/93174.png?t=1731169701.2617998", "project": "", "author_site": "Wei Chow, Juncheng Li, Qifan Yu, Kaihang Pan, Hao Fei, Zhiqi Ge, Shuaiyang, Siliang Tang, Hanwang Zhang, QIANRU SUN", "tldr": "", "abstract": "In recent times, Vision-Language Models (VLMs) have been trained under two predominant paradigms. Generative training has enabled Multimodal Large Language Models (MLLMs) to tackle various complex tasks, yet issues such as hallucinations and weak object discrimination persist. Discriminative training, exemplified by models like CLIP, excels in zero-shot image-text classification and retrieval, yet struggles with complex scenarios requiring fine-grained semantic differentiation. This paper addresses these challenges by proposing a unified approach that integrates the strengths of both paradigms. Considering interleaved image-text sequences as the general format of input samples, we introduce a structure-induced training strategy that imposes semantic relationships between input samples and the MLLM\u2019s hidden state. This approach enhances the MLLM\u2019s ability to capture global semantics and distinguish fine-grained semantics. By leveraging dynamic sequence alignment within the Dynamic Time Warping framework and integrating a novel kernel for fine-grained semantic differentiation, our method effectively balances generative and discriminative tasks. Extensive experiments demonstrate the effectiveness of our approach, achieving state-of-the-art results in multiple generative tasks, especially those requiring cognitive and discrimination abilities. Additionally, our method surpasses discriminative benchmarks in interleaved and fine-grained retrieval tasks. By employing a retrieval-augmented generation strategy, our approach further enhances performance in some generative tasks within one model, offering a promising direction for future research in vision-language modeling.", "keywords": "vision-language;multi-modal understanding", "primary_area": "machine_vision", "supplementary_material": "", "author": "Wei Chow;Juncheng Li;Qifan Yu;Kaihang Pan;Hao Fei;Zhiqi Ge;Shuai Yang;Siliang Tang;Hanwang Zhang;Qianru Sun", "authorids": "~Wei_Chow1;~Juncheng_Li3;~Qifan_Yu1;~Kaihang_Pan1;~Hao_Fei1;~Zhiqi_Ge1;~Shuai_Yang18;~Siliang_Tang1;~Hanwang_Zhang3;~Qianru_Sun2", "gender": "M;M;M;M;M;M;M;M;M;F", "homepage": "http://none.com;;https://github.com/Yuqifan1117;https://github.com/1308024507pkh;https://haofei.vip/;;;https://person.zju.edu.cn/en/siliang;https://mreallab.github.io/index.html;https://qianrusun.com/", "dblp": ";182/7674-6;;344/0647.html;81/3569-1;305/0099;;44/5693;79/8116.html;127/6132.html", "google_scholar": ";lm9s-QgAAAAJ;uodH3cwAAAAJ;https://scholar.google.com.hk/citations?user=lMQADDUAAAAJ;YGDX46AAAAAJ;NOiYcWYAAAAJ;l5HWdWEAAAAJ;8e7H3PcAAAAJ;YG0DFyYAAAAJ;https://scholar.google.de/citations?user=fNfrGMIAAAAJ", "orcid": ";0000-0003-2258-1291;;;0000-0003-3026-6347;;0009-0008-7260-8169;0000-0002-7356-9711;;0000-0003-2689-317X", "linkedin": ";;;;;;;siliang-tang-4734272a/;;", "or_profile": "~Wei_Chow1;~Juncheng_Li3;~Qifan_Yu1;~Kaihang_Pan1;~Hao_Fei1;~Zhiqi_Ge1;~Shuai_Yang18;~Siliang_Tang1;~Hanwang_Zhang3;~Qianru_Sun2", "aff": "Zhejiang University;National University of Singapore;Zhejiang University;Zhejiang University;National University of Singapore;Zhejiang University;Zhejiang University;Zhejiang University;Nanyang Technological University;Singapore Management University", "aff_domain": "zju.edu.cn;nus.edu;zju.edu.cn;zju.edu.cn;nus.edu.sg;zju.edu.cn;zju.edu.cn;zju.edu.cn;ntu.edu.sg;smu.edu.sg", "position": "Undergrad student;Postdoc;PhD student;PhD student;Postdoc;PhD student;Undergrad student;Full Professor;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nchow2024unified,\ntitle={Unified Generative and Discriminative Training for Multi-modal Large Language Models},\nauthor={Wei Chow and Juncheng Li and Qifan Yu and Kaihang Pan and Hao Fei and Zhiqi Ge and Shuai Yang and Siliang Tang and Hanwang Zhang and Qianru Sun},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=w67vRHZF13}\n}", "github": "", "reviewers": "4VP3;DDMF;Lcdg", "pdf_size": 7246773, "rating": "5;6;6", "confidence": "4;2;4", "soundness": "2;3;3", "novelty": "2;3;3", "presentation": "2;3;3", "wc_summary": "96;116;90", "wc_strengths": "22;22;40", "wc_weaknesses": "156;39;53", "wc_questions": "9;52;28", "wc_limitations": "1;7;27", "wc_review": "284;236;238", "wc_reply_reviewers": "19;18;0", "wc_reply_authors": "551;699;722", "reply_reviewers": "1;1;0", "reply_authors": "7;6;6", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 100.66666666666667, 11.115554667022044 ], "wc_strengths_avg": [ 28.0, 8.48528137423857 ], "wc_weaknesses_avg": [ 82.66666666666667, 52.16853031814827 ], "wc_questions_avg": [ 29.666666666666668, 17.594190960528863 ], "wc_limitations_avg": [ 11.666666666666666, 11.115554667022044 ], "wc_review_avg": [ 252.66666666666666, 22.17105219775452 ], "wc_reply_reviewers_avg": [ 12.333333333333334, 8.73053390247253 ], "wc_reply_authors_avg": [ 657.3333333333334, 75.77305296446458 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 6.333333333333333, 0.4714045207910317 ], "replies_avg": [ 26, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15558786185812970610&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "zju.edu.cn;nus.edu;zju.edu.cn;zju.edu.cn;nus.edu.sg;zju.edu.cn;zju.edu.cn;zju.edu.cn;ntu.edu.sg;smu.edu.sg", "author_num": 10, "aff_unique_index": "0;1;0;0;1;0;0;0;2;3", "aff_unique_norm": "Zhejiang University;National University of Singapore;Nanyang Technological University;Singapore Management University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.zju.edu.cn;https://www.nus.edu.sg;https://www.ntu.edu.sg;https://www.smu.edu.sg", "aff_unique_abbr": "ZJU;NUS;NTU;SMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;1;0;0;0;1;1", "aff_country_unique": "China;Singapore" }, { "title": "Training Dynamics of Transformers to Recognize Word Co-occurrence via Gradient Flow Analysis", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93173", "id": "w6q46IslSR", "proceeding": "", "pdf": "https://openreview.net/pdf?id=w6q46IslSR", "openreview": "https://openreview.net/forum?id=w6q46IslSR", "poster": "/media/PosterPDFs/NeurIPS%202024/93173.png?t=1731375173.5818732", "project": "", "author_site": "Hongru Yang, Bhavya Kailkhura, Zhangyang "Atlas" Wang, Yingbin Liang", "tldr": "", "abstract": "Understanding the training dynamics of transformers is important to explain the impressive capabilities behind large language models. \nIn this work, we study the dynamics of training a shallow transformer on a task of recognizing co-occurrence of two designated words. In the literature of studying training dynamics of transformers, several simplifications are commonly adopted such as weight reparameterization, attention linearization, special initialization, and lazy regime. In contrast, we analyze the gradient flow dynamics of simultaneously training three attention matrices and a linear MLP layer from random initialization, and provide a framework of analyzing such dynamics via a coupled dynamical system. We establish near minimum loss and characterize the attention model after training. We discover that gradient flow serves as an inherent mechanism that naturally divide the training process into two phases. In Phase 1, the linear MLP quickly aligns with the two target signals for correct classification, whereas the softmax attention remains almost unchanged. In Phase 2, the attention matrices and the MLP evolve jointly to enlarge the classification margin and reduce the loss to a near minimum value. Technically, we prove a novel property of the gradient flow, termed \\textit{automatic balancing of gradients}, which enables the loss values of different samples to decrease almost at the same rate and further facilitates the proof of near minimum training loss. We also conduct experiments to verify our theoretical results.", "keywords": "Transformers;gradient flow dynamics;implicit bias", "primary_area": "learning_theory", "supplementary_material": "", "author": "Hongru Yang;Bhavya Kailkhura;Zhangyang Wang;Yingbin Liang", "authorids": "~Hongru_Yang1;~Bhavya_Kailkhura1;~Zhangyang_Wang1;~Yingbin_Liang1", "gender": "M;M;M;F", "homepage": ";https://people.llnl.gov/kailkhura1;https://vita-group.github.io;https://sites.google.com/view/yingbinliang/home", "dblp": "234/7562;132/8938;119/4026;51/332", "google_scholar": "VwBcercAAAAJ;SQpJmOgAAAAJ;pxFyKAIAAAAJ;lGgLAiIAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Hongru_Yang1;~Bhavya_Kailkhura1;~Zhangyang_Wang1;~Yingbin_Liang1", "aff": "University of Texas at Austin;Lawrence Livermore National Laboratory;University of Texas at Austin;The Ohio State University", "aff_domain": "utexas.edu;llnl.gov;utexas.edu;osu.edu", "position": "PhD student;Research Staff;Associate Professor;Professor", "bibtex": "@inproceedings{\nyang2024training,\ntitle={Training Dynamics of Transformers to Recognize Word Co-occurrence via Gradient Flow Analysis},\nauthor={Hongru Yang and Bhavya Kailkhura and Zhangyang Wang and Yingbin Liang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=w6q46IslSR}\n}", "github": "", "reviewers": "G3j5;cFaT;ritc", "pdf_size": 657202, "rating": "5;6;6", "confidence": "4;3;3", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "2;3;3", "wc_summary": "37;142;233", "wc_strengths": "45;58;48", "wc_weaknesses": "28;133;37", "wc_questions": "64;203;88", "wc_limitations": "29;1;5", "wc_review": "203;537;411", "wc_reply_reviewers": "62;93;50", "wc_reply_authors": "644;102;356", "reply_reviewers": "1;1;1", "reply_authors": "4;3;3", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 137.33333333333334, 80.08467740807572 ], "wc_strengths_avg": [ 50.333333333333336, 5.557777333511022 ], "wc_weaknesses_avg": [ 66.0, 47.51841748206689 ], "wc_questions_avg": [ 118.33333333333333, 60.664835137188184 ], "wc_limitations_avg": [ 11.666666666666666, 12.36482466066094 ], "wc_review_avg": [ 383.6666666666667, 137.7179081391459 ], "wc_reply_reviewers_avg": [ 68.33333333333333, 18.116904322268255 ], "wc_reply_authors_avg": [ 367.3333333333333, 221.41564734428522 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.9999999999999997, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4138920972352167873&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "utexas.edu;llnl.gov;utexas.edu;osu.edu", "author_num": 4, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "University of Texas at Austin;Lawrence Livermore National Laboratory;Ohio State University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.utexas.edu;https://www.llnl.gov;https://www.osu.edu", "aff_unique_abbr": "UT Austin;LLNL;OSU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Austin;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Self-Calibrated Tuning of Vision-Language Models for Out-of-Distribution Detection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93172", "id": "w6vbfSC1y0", "proceeding": "", "pdf": "https://openreview.net/pdf?id=w6vbfSC1y0", "openreview": "https://openreview.net/forum?id=w6vbfSC1y0", "poster": "/media/PosterPDFs/NeurIPS%202024/93172.png?t=1732160763.0231981", "project": "", "author_site": "Geng Yu, Jianing Zhu, Jiangchao Yao, Bo Han", "tldr": "", "abstract": "Out-of-distribution (OOD) detection is crucial for deploying reliable machine learning models in open-world applications. Recent advances in CLIP-based OOD detection have shown promising results via regularizing prompt tuning with OOD features extracted from ID data. However, the irrelevant context mined from ID data can be spurious due to the inaccurate foreground-background decomposition, thus limiting the OOD detection performance. In this work, we propose a novel framework, namely, \\textit{Self-Calibrated Tuning (SCT)}, to mitigate this problem for effective OOD detection with only the given few-shot ID data. Specifically, SCT introduces modulating factors respectively on the two components of the original learning objective. It adaptively directs the optimization process between the two tasks during training on data with different prediction uncertainty to calibrate the influence of OOD regularization, which is compatible with many prompt tuning based OOD detection methods. Extensive experiments and analyses have been conducted to characterize and demonstrate the effectiveness of the proposed SCT. The code is publicly available at: https://github.com/tmlr-group/SCT.", "keywords": "out-of-distribution detection;vision-language model;prompt-tuning", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/d88248e69c3d65a868af87a8adefc747838165bf.zip", "author": "Geng Yu;Jianing Zhu;Jiangchao Yao;Bo Han", "authorids": "~Geng_Yu1;~Jianing_Zhu2;~Jiangchao_Yao1;~Bo_Han1", "gender": "M;M;M;M", "homepage": "https://warriors-30.github.io/;https://zfancy.github.io/;https://sunarker.github.io/;https://bhanml.github.io/", "dblp": ";129/6807;166/5900;241/0472-3", "google_scholar": "SNp2hXIAAAAJ;82uNA3MAAAAJ;w8oDh9QAAAAJ;nTNjqHwAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Geng_Yu1;~Jianing_Zhu2;~Jiangchao_Yao1;~bo_han2", "aff": "Shanghai Jiaotong University;RIKEN;Shanghai Artificial Intelligence Laboratory;MBZUAI", "aff_domain": "sjtu.edu.cn;riken.jp;pjlab.org.cn;mbzuai.ac.ae", "position": "MS student;Research Intern;Researcher;Researcher", "bibtex": "@inproceedings{\nyu2024selfcalibrated,\ntitle={Self-Calibrated Tuning of Vision-Language Models for Out-of-Distribution Detection},\nauthor={Geng Yu and Jianing Zhu and Jiangchao Yao and Bo Han},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=w6vbfSC1y0}\n}", "github": "", "reviewers": "pBeU;fpY7;wD2g;pUkR;34fm", "pdf_size": 884997, "rating": "3;4;4;5;7", "confidence": "5;4;2;4;5", "soundness": "2;2;2;3;4", "novelty": "2;2;2;3;4", "presentation": "3;2;2;4;4", "wc_summary": "54;68;41;78;104", "wc_strengths": "49;23;9;80;98", "wc_weaknesses": "193;780;72;42;3", "wc_questions": "2;4;7;37;68", "wc_limitations": "1;13;1;25;4", "wc_review": "299;888;130;262;277", "wc_reply_reviewers": "55;241;151;0;0", "wc_reply_authors": "52;62;52;0;0", "reply_reviewers": "1;1;1;0;0", "reply_authors": "3;3;3;1;1", "rating_avg": [ 4.6, 1.3564659966250536 ], "confidence_avg": [ 4.0, 1.0954451150103321 ], "soundness_avg": [ 2.6, 0.8 ], "novelty_avg": [ 2.6, 0.8 ], "presentation_avg": [ 3.0, 0.8944271909999159 ], "wc_summary_avg": [ 69.0, 21.52208168370337 ], "wc_strengths_avg": [ 51.8, 33.46281518342412 ], "wc_weaknesses_avg": [ 218.0, 288.09234630583296 ], "wc_questions_avg": [ 23.6, 25.601562452319193 ], "wc_limitations_avg": [ 8.8, 9.21737489744233 ], "wc_review_avg": [ 371.2, 265.0550131576462 ], "wc_reply_reviewers_avg": [ 89.4, 93.7498799999232 ], "wc_reply_authors_avg": [ 33.2, 27.352513595646013 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 2.2, 0.9797958971132712 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.2691909510290828, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:TYQNmPOp1CYJ:scholar.google.com/&scioq=Self-Calibrated+Tuning+of+Vision-Language+Models+for+Out-of-Distribution+Detection&hl=en&as_sdt=0,33", "gs_version_total": 5, "email": "sjtu.edu.cn;riken.jp;pjlab.org.cn;mbzuai.ac.ae", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Shanghai Jiao Tong University;RIKEN;Shanghai Artificial Intelligence Laboratory;Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.riken.jp;http://www.shailab.org/;https://www.mbzuai.ac.ae", "aff_unique_abbr": "SJTU;RIKEN;Shanghai AI Lab;MBZUAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;2", "aff_country_unique": "China;Japan;United Arab Emirates" }, { "title": "Humor in AI: Massive Scale Crowd-Sourced Preferences and Benchmarks for Cartoon Captioning", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97450", "id": "w90ZH5v34S", "proceeding": "", "pdf": "https://openreview.net/pdf?id=w90ZH5v34S", "openreview": "https://openreview.net/forum?id=w90ZH5v34S", "poster": "", "project": "", "author_site": "Jifan Zhang, Lalit Jain, Yang Guo, Jiayi Chen, Kuan Zhou, Siddharth Suresh, Andrew Wagenmaker, Scott Sievert, Timothy T Rogers, Kevin Jamieson, Bob Mankoff, Robert Nowak", "tldr": "", "abstract": "We present a novel multimodal preference dataset for creative tasks, consisting of over 250 million human votes on more than 2.2 million captions, collected through crowdsourcing rating data for The New Yorker's weekly cartoon caption contest over the past eight years. This unique dataset supports the development and evaluation of multimodal large language models and preference-based fine-tuning algorithms for humorous caption generation. We propose novel benchmarks for judging the quality of model-generated captions, utilizing both GPT4 and human judgments to establish ranking-based evaluation strategies. Our experimental results highlight the limitations of current fine-tuning methods, such as RLHF and DPO, when applied to creative tasks. Furthermore, we demonstrate that even state-of-the-art models like GPT4 and Claude currently underperform top human contestants in generating humorous captions. As we conclude this extensive data collection effort, we release the entire preference dataset to the research community, fostering further advancements in AI humor generation and evaluation.", "keywords": "Large Language Model;Humor;Reasoning;Preference-Based Alignment", "primary_area": "", "supplementary_material": "/attachment/32a36cf271afbc54368131938de22edfa75fdc92.pdf", "author": "Jifan Zhang;Lalit K Jain;Yang Guo;Jiayi Chen;Kuan Lok Zhou;Siddharth Suresh;Andrew Wagenmaker;Scott Sievert;Timothy T. Rogers;Kevin Jamieson;Bob Mankoff;Robert D Nowak", "authorids": "~Jifan_Zhang1;~Lalit_K_Jain1;~Yang_Guo4;~Jiayi_Chen13;~Kuan_Lok_Zhou1;~Siddharth_Suresh1;~Andrew_Wagenmaker1;~Scott_Sievert1;~Timothy_T._Rogers1;~Kevin_Jamieson1;~Bob_Mankoff1;~Robert_D_Nowak1", "gender": "M;;M;F;M;;M;;M;M;M;M", "homepage": "https://jifanz.github.io/;http://www.lalitjain.com;;;;https://www.sidsuresh.com/;https://wagenmaker.github.io;;http://concepts.psych.wisc.edu/;;https://www.bobmankoff.com/;http://nowak.ece.wisc.edu", "dblp": "277/6616;178/3228;;;;262/0748;195/1036;222/3229;25/7229;85/10260;209/4940;n/RobertDNowak", "google_scholar": "ZUOsJWcAAAAJ;hGMSFu4AAAAJ;BbQQEPcAAAAJ;;;xsyrntwAAAAJ;ym8AZSIAAAAJ;;7u_uyOsAAAAJ;;;fn13u8IAAAAJ", "orcid": ";;;;;;;;0000-0001-6304-755X;;;", "linkedin": ";;;jiayi-chen-7562a72aa/;david-zhou-822929310/;siddsuresh97/;;;;;;", "or_profile": "~Jifan_Zhang1;~Lalit_K_Jain1;~Yang_Guo4;~Jiayi_Chen13;~Kuan_Lok_Zhou1;~Siddharth_Suresh1;~Andrew_Wagenmaker1;~Scott_Sievert1;~Timothy_T._Rogers1;~Kevin_Jamieson1;~Bob_Mankoff1;~Robert_D_Nowak1", "aff": "University of Wisconsin, Madison;University of Washington;;University of Wisconsin - Madison;University of Wisconsin - Madison;University of Wisconsin - Madison;University of Washington, Seattle;University of Wisconsin, Madison;University of Wisconsin - Madison;University of Washington;Cartoon Collections;University of Wisconsin - Madison", "aff_domain": "wisc.edu;uw.edu;;wisc.edu;wisc.edu;wisc.edu;uw.edu;wisc.edu;wisc.edu;washington.edu;cartooncollections.com;", "position": "PhD student;Assistant Professor;;Undergrad student;Undergrad student;MS student;PhD student;PhD student;Full Professor;Associate Professor;Researcher;Full Professor", "bibtex": "@inproceedings{\nzhang2024humor,\ntitle={Humor in {AI}: Massive Scale Crowd-Sourced Preferences and Benchmarks for Cartoon Captioning},\nauthor={Jifan Zhang and Lalit K Jain and Yang Guo and Jiayi Chen and Kuan Lok Zhou and Siddharth Suresh and Andrew Wagenmaker and Scott Sievert and Timothy T. Rogers and Kevin Jamieson and Bob Mankoff and Robert D Nowak},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=w90ZH5v34S}\n}", "github": "", "reviewers": "RQwh;9bsn;1BvT;DL7F", "pdf_size": 929234, "rating": "7;7;7;8", "confidence": "3;4;4;3", "wc_summary_and_contributions": "134;181;148;92", "wc_strengths": "88;109;87;120", "wc_improvement": "109;66;136;49", "wc_limitations": "39;1;102;9", "wc_correctness": "32;1;82;74", "wc_clarity": "71;1;34;30", "wc_relation_to_prior_work": "141;1;86;32", "wc_documentation": "14;1;27;104", "wc_additional_feedback": "1;1;1;1", "wc_review": "629;362;703;511", "wc_reply_reviewers": "416;22;0;0", "wc_reply_authors": "867;34;0;0", "reply_reviewers": "2;1;0;0", "reply_authors": "4;2;1;1", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "wc_summary_and_contributions_avg": [ 138.75, 31.932546093288583 ], "wc_strengths_avg": [ 101.0, 14.053469322555197 ], "wc_improvement_avg": [ 90.0, 34.40203482353915 ], "wc_limitations_avg": [ 37.75, 39.707524475847144 ], "wc_correctness_avg": [ 47.25, 32.7671710710583 ], "wc_clarity_avg": [ 34.0, 24.869660230891775 ], "wc_relation_to_prior_work_avg": [ 65.0, 53.39007398384086 ], "wc_documentation_avg": [ 36.5, 40.04060439104285 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 551.25, 128.94645206441317 ], "wc_reply_reviewers_avg": [ 109.5, 177.18563711542762 ], "wc_reply_authors_avg": [ 225.25, 370.77444288947424 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10062683978640058958&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "wisc.edu;uw.edu;;wisc.edu;wisc.edu;wisc.edu;uw.edu;wisc.edu;wisc.edu;washington.edu;cartooncollections.com;", "author_num": 12, "aff_unique_index": "0;1;2;2;2;1;0;2;1;3;2", "aff_unique_norm": "University of Wisconsin;University of Washington;University of Wisconsin-Madison;Cartoon Collections", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.wisc.edu;https://www.washington.edu;https://www.wisc.edu;", "aff_unique_abbr": "UW;UW;UW-Madison;", "aff_campus_unique_index": "0;0;0;0;2;0;0;0", "aff_campus_unique": "Madison;;Seattle", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States;" }, { "title": "Energy-Based Modelling for Discrete and Mixed Data via Heat Equations on Structured Spaces", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93171", "id": "wAqdvcK1Fv", "proceeding": "", "pdf": "https://openreview.net/pdf?id=wAqdvcK1Fv", "openreview": "https://openreview.net/forum?id=wAqdvcK1Fv", "poster": "", "project": "", "author_site": "Tobias Schr\u00f6der, Zijing Ou, Yingzhen Li, Andrew Duncan", "tldr": "", "abstract": "Energy-based models (EBMs) offer a flexible framework for probabilistic modelling across various data domains. However, training EBMs on data in discrete or mixed state spaces poses significant challenges due to the lack of robust and fast sampling methods. In this work, we propose to train discrete EBMs with Energy Discrepancy, a loss function which only requires the evaluation of the energy function at data points and their perturbed counterparts, thus eliminating the need for Markov chain Monte Carlo. We introduce perturbations of the data distribution by simulating a diffusion process on the discrete state space endowed with a graph structure. This allows us to inform the choice of perturbation from the structure of the modelled discrete variable, while the continuous time parameter enables fine-grained control of the perturbation. Empirically, we demonstrate the efficacy of the proposed approaches in a wide range of applications, including the estimation of discrete densities with non-binary vocabulary and binary image modelling. We also introduce the first application of EBMs to tabular data sets with applications in synthetic data generation and calibrated classification.", "keywords": "Energy-based models;discrete probabilistic modelling;tabular data", "primary_area": "generative_models", "supplementary_material": "/attachment/4a9f159a87f431d7e18d6d9fae84cdf1bddadd30.zip", "author": "Tobias Schr\u00f6der;Zijing Ou;Yingzhen Li;Andrew B. Duncan", "authorids": "~Tobias_Schr\u00f6der2;~Zijing_Ou1;~Yingzhen_Li1;~Andrew_B._Duncan1", "gender": ";F;M;M", "homepage": "https://j-zin.github.io/;http://yingzhenli.net/home/en/;;https://tobias-schroeder.github.io", "dblp": "246/3072;117/9230;189/0076;", "google_scholar": "zZg3Cm0AAAAJ;https://scholar.google.se/citations?hl=en;https://scholar.google.co.uk/citations?user=3ZzC72cAAAAJ;", "orcid": ";;;", "linkedin": ";;andrew-duncan-404690140/;tobias-schroeder-3295b3215/", "or_profile": "~Zijing_Ou1;~Yingzhen_Li1;~Andrew_Duncan1;~Tobias_Schroeder1", "aff": "Imperial College London;Imperial College London;;Imperial College London", "aff_domain": "imperial.ac.uk;imperial.ac.uk;;ic.ac.uk", "position": "PhD student;Associate Professor;;PhD student", "bibtex": "@inproceedings{\nschr{\\\"o}der2024energybased,\ntitle={Energy-Based Modelling for Discrete and Mixed Data via Heat Equations on Structured Spaces},\nauthor={Tobias Schr{\\\"o}der and Zijing Ou and Yingzhen Li and Andrew B. Duncan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=wAqdvcK1Fv}\n}", "github": "", "reviewers": "t2RA;FxWc;qwnm;9ZAB", "pdf_size": 3538352, "rating": "6;6;6;6", "confidence": "4;4;3;3", "soundness": "3;4;3;3", "novelty": "3;3;3;3", "presentation": "3;3;4;2", "wc_summary": "103;98;155;73", "wc_strengths": "57;104;108;50", "wc_weaknesses": "159;95;109;182", "wc_questions": "51;937;95;2", "wc_limitations": "8;66;2;2", "wc_review": "378;1300;469;309", "wc_reply_reviewers": "69;35;22;14", "wc_reply_authors": "542;1490;10;10", "reply_reviewers": "1;1;1;1", "reply_authors": "2;4;2;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 107.25, 29.819247140060394 ], "wc_strengths_avg": [ 79.75, 26.404308360568734 ], "wc_weaknesses_avg": [ 136.25, 35.54838252297846 ], "wc_questions_avg": [ 271.25, 385.7760846657035 ], "wc_limitations_avg": [ 19.5, 26.95830113341714 ], "wc_review_avg": [ 614.0, 400.10686072598156 ], "wc_reply_reviewers_avg": [ 35.0, 21.011901389450696 ], "wc_reply_authors_avg": [ 513.0, 604.4394097012537 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:VDLz6VRHl_wJ:scholar.google.com/&scioq=Energy-Based+Modelling+for+Discrete+and+Mixed+Data+via+Heat+Equations+on+Structured+Spaces&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "imperial.ac.uk;imperial.ac.uk;;ic.ac.uk", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Imperial College London", "aff_unique_dep": "", "aff_unique_url": "https://www.imperial.ac.uk", "aff_unique_abbr": "ICL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Learning Structured Representations with Hyperbolic Embeddings", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93170", "id": "wBtmN8SZ2B", "proceeding": "", "pdf": "https://openreview.net/pdf?id=wBtmN8SZ2B", "openreview": "https://openreview.net/forum?id=wBtmN8SZ2B", "poster": "/media/PosterPDFs/NeurIPS%202024/93170.png?t=1731743482.698525", "project": "", "author_site": "Aditya Sinha, Siqi Zeng, Makoto Yamada, Han Zhao", "tldr": "", "abstract": "Most real-world datasets consist of a natural hierarchy between classes or an inherent label structure that is either already available or can be constructed cheaply. However, most existing representation learning methods ignore this hierarchy, treating labels as permutation invariant. Recent work [Zeng et al., 2022] proposes using this structured information explicitly, but the use of Euclidean distance may distort the underlying semantic context [Chen et al., 2013]. In this work, motivated by the advantage of hyperbolic spaces in modeling hierarchical relationships, we propose a novel approach HypStructure: a Hyperbolic Structured regularization approach to accurately embed the label hierarchy into the learned representations. HypStructure is a simple-yet-effective regularizer that consists of a hyperbolic tree-based representation loss along with a centering loss, and can be combined with any standard task loss to learn hierarchy-informed features. Extensive experiments on several large-scale vision benchmarks demonstrate the efficacy of HypStructure in reducing distortion and boosting generalization performance especially under low dimensional scenarios. For a better understanding of structured representation, we perform eigenvalue analysis that links the representation geometry to improved Out-of-Distribution (OOD) detection performance seen empirically.", "keywords": "hierarchical representation;representation learning;hyperbolic geometry", "primary_area": "other", "supplementary_material": "", "author": "Aditya Sinha;Siqi Zeng;Makoto Yamada;Han Zhao", "authorids": "~Aditya_Sinha1;~Siqi_Zeng1;~Makoto_Yamada3;~Han_Zhao1", "gender": "M;;M;M", "homepage": "https://adityaasinha28.github.io/;https://cindy2000sh.github.io/;https://groups.oist.jp/mlds;https://hanzhaoml.github.io/", "dblp": ";135/7166;56/4937;03/3520-2", "google_scholar": "5letoXIAAAAJ;5If-3u4AAAAJ;1cKNu1gAAAAJ;x942ipYAAAAJ", "orcid": ";0009-0008-2042-0754;;0000-0002-8579-1600", "linkedin": "adityaasinha28/;siqi-zeng-91b067175/;;", "or_profile": "~Aditya_Sinha1;~Siqi_Zeng1;~Makoto_Yamada3;~Han_Zhao1", "aff": "Department of Computer Science;University of Illinois Urbana-Champaign;Okinawa Institute of Science and Technology (OIST);University of Illinois, Urbana Champaign", "aff_domain": "cs.illinois.edu;cs.illinois.edu;oist.jp;illinois.edu", "position": "MS student;PhD student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nsinha2024learning,\ntitle={Learning Structured Representations with Hyperbolic Embeddings},\nauthor={Aditya Sinha and Siqi Zeng and Makoto Yamada and Han Zhao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=wBtmN8SZ2B}\n}", "github": "", "reviewers": "3c64;gWWe;dBDc;kf6R", "pdf_size": 14277826, "rating": "5;5;5;6", "confidence": "3;4;4;3", "soundness": "3;2;3;3", "novelty": "2;3;2;3", "presentation": "3;3;3;3", "wc_summary": "100;54;70;65", "wc_strengths": "110;49;86;109", "wc_weaknesses": "136;271;251;228", "wc_questions": "2;7;43;2", "wc_limitations": "62;80;10;46", "wc_review": "410;461;460;450", "wc_reply_reviewers": "62;45;78;194", "wc_reply_authors": "113;997;67;48", "reply_reviewers": "1;1;1;1", "reply_authors": "2;4;2;2", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 72.25, 17.03489066592445 ], "wc_strengths_avg": [ 88.5, 24.743686063317245 ], "wc_weaknesses_avg": [ 221.5, 51.655106233556424 ], "wc_questions_avg": [ 13.5, 17.153716798408443 ], "wc_limitations_avg": [ 49.5, 25.782746168707476 ], "wc_review_avg": [ 445.25, 20.801141795584204 ], "wc_reply_reviewers_avg": [ 94.75, 58.47809418919191 ], "wc_reply_authors_avg": [ 306.25, 399.5043022296506 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ScVa_IC4vC8J:scholar.google.com/&scioq=Learning+Structured+Representations+with+Hyperbolic+Embeddings&hl=en&as_sdt=0,21", "gs_version_total": 6, "email": "cs.illinois.edu;cs.illinois.edu;oist.jp;illinois.edu", "author_num": 4, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "Unknown Institution;University of Illinois Urbana-Champaign;Okinawa Institute of Science and Technology", "aff_unique_dep": "Department of Computer Science;;", "aff_unique_url": ";https://illinois.edu;https://www.oist.jp", "aff_unique_abbr": ";UIUC;OIST", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "1;2;1", "aff_country_unique": ";United States;Japan" }, { "title": "FactorSim: Generative Simulation via Factorized Representation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93169", "id": "wBzvYh3PRA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=wBzvYh3PRA", "openreview": "https://openreview.net/forum?id=wBzvYh3PRA", "poster": "", "project": "", "author_site": "Fan-Yun Sun, Harini S I, Angela Yi, Yihan Zhou, Alex Zook, Jonathan Tremblay, Logan Cross, Jiajun Wu, Nick Haber", "tldr": "", "abstract": "Generating simulations to train intelligent agents in game-playing and robotics from natural language input, user input, or task documentation remains an open-ended challenge. Existing approaches focus on parts of this challenge, such as generating reward functions or task hyperparameters. Unlike previous work, we introduce FACTORSIM that generates full simulations in code from language input that can be used to train agents. Exploiting the structural modularity specific to coded simulations, we propose to use a factored partially observable Markov decision process representation that allows us to reduce context dependence during each step of the generation. For evaluation, we introduce a generative simulation benchmark that assesses the generated simulation code\u2019s accuracy and effectiveness in facilitating zero-shot transfers in reinforcement learning settings. We show that FACTORSIM outperforms existing methods in generating simulations regarding prompt alignment (i.e., accuracy), zero-shot transfer abilities, and human evaluation. We also demonstrate its effectiveness in generating robotic tasks.", "keywords": "generative simulation;POMDP;Large Language Models", "primary_area": "robotics", "supplementary_material": "", "author": "Fan-Yun Sun;Harini S I;Angela Yi;Yihan Zhou;Alex Zook;Jonathan Tremblay;Logan Cross;Jiajun Wu;Nick Haber", "authorids": "~Fan-Yun_Sun1;~Harini_S_I1;~Angela_Yi2;~Yihan_Zhou4;~Alex_Zook1;~Jonathan_Tremblay1;~Logan_Cross1;~Jiajun_Wu1;~Nick_Haber1", "gender": "M;F;F;F;;Not Specified;M;M;", "homepage": "https://sunfanyun.com/;https://harini-si.github.io/;;;;https://jtremblay.org/;https://locross93.github.io/;https://jiajunwu.com;", "dblp": "227/3016;356/3166;;;05/10531;17/8925;;117/4768;179/4983", "google_scholar": "TOw2RMMAAAAJ;https://scholar.google.com/citations?view_op=list_works;;;2nA9bVMAAAAJ;https://scholar.google.ca/citations?user=zeS5UJEAAAAJ;;2efgcS0AAAAJ;euNCoVYAAAAJ", "orcid": ";;;;0000-0002-0178-5060;;0000-0002-5248-9499;0000-0002-4176-343X;0000-0001-8804-7804", "linkedin": "sunfanyun/;harini-s-i-a140b61bb/;yiangela/;yz-mle/;alexander-zook-b75b7a31/;;;jiajunwu/;", "or_profile": "~Fan-Yun_Sun1;~Harini_S_I1;~Angela_Yi2;~Yihan_Zhou4;~Alex_Zook1;~Jonathan_Tremblay1;~Logan_Cross1;~Jiajun_Wu1;~Nick_Haber1", "aff": "Stanford University;Birla Institute of Technology and Science, Pilani ;Meta Facebook;Stanford University;NVIDIA;NVIDIA;Stanford University;Stanford University;Stanford University", "aff_domain": "stanford.edu;bits-pilani.ac.in;meta.com;stanford.edu;nvidia.com;nvidia.com;stanford.edu;stanford.edu;stanford.edu", "position": "PhD student;Undergrad student;Software Engineer;MS student;Researcher;Researcher;Postdoc;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nsun2024factorsim,\ntitle={FactorSim: Generative Simulation via Factorized Representation},\nauthor={Fan-Yun Sun and Harini S I and Angela Yi and Yihan Zhou and Alex Zook and Jonathan Tremblay and Logan Cross and Jiajun Wu and Nick Haber},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=wBzvYh3PRA}\n}", "github": "", "reviewers": "p3c7;6Px5;yVST;EKr6", "pdf_size": 6117930, "rating": "4;6;6;6", "confidence": "3;3;4;3", "soundness": "2;3;2;3", "novelty": "2;2;2;3", "presentation": "2;3;3;3", "wc_summary": "78;66;128;39", "wc_strengths": "23;57;47;88", "wc_weaknesses": "224;31;142;279", "wc_questions": "43;185;10;39", "wc_limitations": "18;5;2;9", "wc_review": "386;344;329;454", "wc_reply_reviewers": "163;108;58;0", "wc_reply_authors": "374;24;16;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 77.75, 32.26743714644843 ], "wc_strengths_avg": [ 53.75, 23.31710745354149 ], "wc_weaknesses_avg": [ 169.0, 93.40503198436367 ], "wc_questions_avg": [ 69.25, 68.03078347336594 ], "wc_limitations_avg": [ 8.5, 6.020797289396148 ], "wc_review_avg": [ 378.25, 48.468417552051356 ], "wc_reply_reviewers_avg": [ 82.25, 60.28422264573045 ], "wc_reply_authors_avg": [ 103.5, 156.41211589899294 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:-y2nnhlfp0kJ:scholar.google.com/&scioq=FactorSim:+Generative+Simulation+via+Factorized+Representation&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "stanford.edu;bits-pilani.ac.in;meta.com;stanford.edu;nvidia.com;nvidia.com;stanford.edu;stanford.edu;stanford.edu", "author_num": 9, "aff_unique_index": "0;1;2;0;3;3;0;0;0", "aff_unique_norm": "Stanford University;Birla Institute of Technology and Science;Meta;NVIDIA", "aff_unique_dep": ";;Meta Platforms, Inc.;NVIDIA Corporation", "aff_unique_url": "https://www.stanford.edu;https://www.bits-pilani.ac.in;https://meta.com;https://www.nvidia.com", "aff_unique_abbr": "Stanford;BITS Pilani;Meta;NVIDIA", "aff_campus_unique_index": "0;1;0;0;0;0", "aff_campus_unique": "Stanford;Pilani;", "aff_country_unique_index": "0;1;0;0;0;0;0;0;0", "aff_country_unique": "United States;India" }, { "title": "Learning Spatially-Aware Language and Audio Embeddings", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93168", "id": "wDDvJzvvBR", "proceeding": "", "pdf": "https://openreview.net/pdf?id=wDDvJzvvBR", "openreview": "https://openreview.net/forum?id=wDDvJzvvBR", "poster": "/media/PosterPDFs/NeurIPS%202024/93168.png?t=1731714110.0397356", "project": "", "author_site": "Bhavika Devnani, Skyler Seto, Zakaria Aldeneh, Alessandro Toso, Elena Menyaylenko, Barry-John Theobald, Jonathan Sheaffer, Miguel Sarabia", "tldr": "", "abstract": "Humans can picture a sound scene given an imprecise natural language description. For example, it is easy to imagine an acoustic environment given a phrase like \"the lion roar came from right behind me!\". For a machine to have the same degree of comprehension, the machine must know what a lion is (semantic attribute), what the concept of \"behind\" is (spatial attribute) and how these pieces of linguistic information align with the semantic and spatial attributes of the sound (what a roar sounds like when its coming from behind). \nState-of-the-art audio foundation models, such as CLAP, which learn to map between audio scenes and natural textual descriptions, are trained on non-spatial audio and text pairs, and hence lack spatial awareness. In contrast, sound event localization and detection models are limited to recognizing sounds from a fixed number of classes, and they localize the source to absolute position (e.g., 0.2m) rather than a position described using natural language (e.g., \"next to me\"). To address these gaps, we present ELSA (Embeddings for Language and Spatial Audio), a spatially aware-audio and text embedding model trained using multimodal contrastive learning. ELSA supports non-spatial audio, spatial audio, and open vocabulary text captions describing both the spatial and semantic components of sound. To train ELSA: (a) we spatially augment the audio and captions of three open-source audio datasets totaling 4,738 hours and 890,038 samples of audio comprised from 8,972 simulated spatial configurations, and (b) we design an encoder to capture the semantics of non-spatial audio, and the semantics and spatial attributes of spatial audio using contrastive learning. ELSA is a single model that is competitive with state-of-the-art for both semantic retrieval and 3D source localization. In particular, ELSA achieves +2.8\\% mean audio-to-text and text-to-audio R@1 above the LAION-CLAP baseline, and outperforms by -11.6\u00b0 mean-absolute-error in 3D source localization over the SeldNET baseline on the TUT Sound Events 2018 benchmark. Moreover, we show that the representation-space of ELSA is structured, enabling swapping of direction of audio via vector arithmetic of two directional text embeddings.", "keywords": "multimodal embeddings;spatial audio;contrastive learning", "primary_area": "speech_and_audio", "supplementary_material": "", "author": "Bhavika Suresh Devnani;Skyler Seto;Zakaria Aldeneh;Alessandro Toso;YELENA MENYAYLENKO;Barry-John Theobald;Jonathan Sheaffer;Miguel Sarabia", "authorids": "~Bhavika_Suresh_Devnani1;~Skyler_Seto1;~Zakaria_Aldeneh1;~Alessandro_Toso2;~YELENA_MENYAYLENKO1;~Barry-John_Theobald1;~Jonathan_Sheaffer1;~Miguel_Sarabia1", "gender": "F;;;M;F;M;M;", "homepage": ";;;;;;https://www.apple.com;", "dblp": ";173/5386;;;;86/6624;;", "google_scholar": ";428y_sgAAAAJ;1AHzh04AAAAJ;;;DNrQd3IAAAAJ;gNqalHoAAAAJ;", "orcid": ";;;;;;0000-0001-9831-9631;", "linkedin": "bhavikadevnani/;;;alessandro-toso-2747991/;yelenamenyaylenko/;barry-john-theobald-392a0611a/;jsheaffer/;", "or_profile": "~Bhavika_Suresh_Devnani1;~Skyler_Seto1;~Zakaria_Aldeneh1;~Alessandro_Toso2;~YELENA_MENYAYLENKO1;~Barry-John_Theobald1;~Jonathan_Sheaffer1;~Miguel_Sarabia1", "aff": "Apple;Apple;Apple;Apple;Apple;Apple;Apple;", "aff_domain": "apple.com;apple.com;apple.com;apple.com;apple.com;apple.com;apple.com;", "position": "Researcher;Researcher;Researcher;Researcher;Researcher;Researcher;Researcher;", "bibtex": "@inproceedings{\ndevnani2024learning,\ntitle={Learning Spatially-Aware Language and Audio Embeddings},\nauthor={Bhavika Suresh Devnani and Skyler Seto and Zakaria Aldeneh and Alessandro Toso and YELENA MENYAYLENKO and Barry-John Theobald and Jonathan Sheaffer and Miguel Sarabia},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=wDDvJzvvBR}\n}", "github": "", "reviewers": "rKMo;SQBX;USXb;JMmV", "pdf_size": 1888014, "rating": "6;6;7;7", "confidence": "4;3;4;5", "soundness": "3;3;4;3", "novelty": "3;3;3;3", "presentation": "4;2;4;3", "wc_summary": "84;92;51;82", "wc_strengths": "76;59;84;117", "wc_weaknesses": "193;142;86;378", "wc_questions": "2;248;204;5", "wc_limitations": "28;69;74;4", "wc_review": "383;610;499;586", "wc_reply_reviewers": "34;38;0;78", "wc_reply_authors": "22;25;0;144", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;1;3", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 77.25, 15.610493265749165 ], "wc_strengths_avg": [ 84.0, 21.083168642308014 ], "wc_weaknesses_avg": [ 199.75, 109.65029639722822 ], "wc_questions_avg": [ 114.75, 112.33738246906059 ], "wc_limitations_avg": [ 43.75, 29.07210862665452 ], "wc_review_avg": [ 519.5, 88.97331060492242 ], "wc_reply_reviewers_avg": [ 37.5, 27.654113618049667 ], "wc_reply_authors_avg": [ 47.75, 56.40201680791211 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.7071067811865475, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18182436082373407914&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "apple.com;apple.com;apple.com;apple.com;apple.com;apple.com;apple.com;", "author_num": 8, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "Apple", "aff_unique_dep": "Apple Inc.", "aff_unique_url": "https://www.apple.com", "aff_unique_abbr": "Apple", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Communication Efficient Distributed Training with Distributed Lion", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93167", "id": "wDirCeTIoz", "proceeding": "", "pdf": "https://openreview.net/pdf?id=wDirCeTIoz", "openreview": "https://openreview.net/forum?id=wDirCeTIoz", "poster": "", "project": "", "author_site": "Bo Liu, Lemeng Wu, Lizhang Chen, Kaizhao Liang, Jiaxu Zhu, Chen Liang, Raghuraman Krishnamoorthi, Qiang Liu", "tldr": "", "abstract": "The Lion optimizer has been a promising competitor with the AdamW for training large AI models, with advantages in memory, computation, and sample efficiency. In this paper, we introduce Distributed Lion, an innovative adaptation of Lion for distributed training environments. Leveraging the sign operator in Lion, our Distributed Lion only requires to communicate binary or lower-precision vectors\nbetween workers to the center server, significantly reducing the communication cost. \nOur theoretical analysis confirms Distributed Lion's convergence properties. Empirical results demonstrate its robustness across a range of tasks, worker counts, and batch sizes, on both vision and language problems. Notably, Distributed Lion attains comparable performance to standard Lion or AdamW optimizers applied on aggregated gradients, but with significantly reduced communication bandwidth. This feature is particularly advantageous for training large models. In addition, we also demonstrate that \\mavolion{} presents a more favorable performance-bandwidth balance compared to existing efficient distributed methods such as deep gradient compression and ternary gradients.", "keywords": "Distributed Optimization", "primary_area": "infrastructure", "supplementary_material": "", "author": "Bo Liu;Lemeng Wu;Lizhang Chen;Kaizhao Liang;Jiaxu Zhu;Chen Liang;Raghuraman Krishnamoorthi;qiang liu", "authorids": "~Bo_Liu13;~Lemeng_Wu1;~Lizhang_Chen1;~Kaizhao_Liang1;~Jiaxu_Zhu2;~Chen_Liang1;~Raghuraman_Krishnamoorthi1;~qiang_liu4", "gender": "M;M;M;M;M;M;M;M", "homepage": "https://cranial-xix.github.io/;https://sites.google.com/utexas.edu/wlm/home?authuser=1;https://l-z-chen.github.io/;https://kaizhaoliang.github.io/Portfolio/;;;http://crazydonkey200.github.io/;https://www.cs.utexas.edu/~lqiang/", "dblp": ";232/3021;225/1559;239/5146;;;35/3221;61/3234-1", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.ca/citations?user=PCDSl2sAAAAJ;;qKLmNfoAAAAJ;hYlDlh4AAAAJ;F1mr9C0AAAAJ;ILQ8_ekAAAAJ;https://scholar.google.com.tw/citations?user=2qDh4WUAAAAJ", "orcid": ";;;;;;;", "linkedin": ";;;kaizhao-liang-427a42132/;jiaxuzhu/;raghuraman-krishnamoorthi-b8670a5/;;", "or_profile": "~Bo_Liu13;~Lemeng_Wu1;~Lizhang_Chen1;~Kaizhao_Liang1;~Jiaxu_Zhu2;~Raghuraman_Krishnamoorthi1;~Chen_Liang2;~Qiang_Liu1", "aff": "University of Texas, Austin;University of Texas, Austin;University of Texas at Austin;SambaNova Systems, Inc;Meta Facebook;Meta Facebook;Google DeepMind;University of Texas, Austin", "aff_domain": "cs.utexas.edu;cs.utexas.edu;utexas.edu;sambanovasystems.com;meta.com;meta.com;google.com;utexas.edu", "position": "PhD student;PhD student;PhD student;Principal Engineer;Researcher;Researcher;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nliu2024communication,\ntitle={Communication Efficient Distributed Training with Distributed Lion},\nauthor={Bo Liu and Lemeng Wu and Lizhang Chen and Kaizhao Liang and Jiaxu Zhu and Chen Liang and Raghuraman Krishnamoorthi and qiang liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=wDirCeTIoz}\n}", "github": "", "reviewers": "aXYH;gUvC;ZPCr;7ynU", "pdf_size": 880189, "rating": "5;6;6;7", "confidence": "5;2;4;3", "soundness": "2;3;3;3", "novelty": "2;2;2;3", "presentation": "3;3;3;3", "wc_summary": "84;90;59;71", "wc_strengths": "59;69;23;74", "wc_weaknesses": "176;80;147;63", "wc_questions": "41;11;25;76", "wc_limitations": "36;1;12;10", "wc_review": "396;251;266;294", "wc_reply_reviewers": "103;0;104;34", "wc_reply_authors": "334;0;0;0", "reply_reviewers": "2;0;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 76.0, 11.979148550710939 ], "wc_strengths_avg": [ 56.25, 19.942103700462496 ], "wc_weaknesses_avg": [ 116.5, 46.542990879400946 ], "wc_questions_avg": [ 38.25, 24.24226680820092 ], "wc_limitations_avg": [ 14.75, 12.94942083646987 ], "wc_review_avg": [ 301.75, 56.56136048575918 ], "wc_reply_reviewers_avg": [ 60.25, 44.890839822841365 ], "wc_reply_authors_avg": [ 83.5, 144.62624243200125 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.6324555320336759, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4766156975183010031&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 3, "email": "cs.utexas.edu;cs.utexas.edu;utexas.edu;sambanovasystems.com;meta.com;meta.com;google.com;utexas.edu", "author_num": 8, "aff_unique_index": "0;0;0;1;2;2;3;0", "aff_unique_norm": "University of Texas at Austin;SambaNova Systems;Meta;Google", "aff_unique_dep": ";;Meta Platforms, Inc.;Google DeepMind", "aff_unique_url": "https://www.utexas.edu;https://www.sambanova.com;https://meta.com;https://deepmind.com", "aff_unique_abbr": "UT Austin;SambaNova;Meta;DeepMind", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Austin;", "aff_country_unique_index": "0;0;0;0;0;0;1;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Decision Mamba: Reinforcement Learning via Hybrid Selective Sequence Modeling", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93166", "id": "wFzIMbTsY7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=wFzIMbTsY7", "openreview": "https://openreview.net/forum?id=wFzIMbTsY7", "poster": "/media/PosterPDFs/NeurIPS%202024/93166.png?t=1729063095.2695296", "project": "", "author_site": "Sili Huang, Jifeng Hu, Zhejian Yang, Liwei Yang, Tao Luo, Hechang Chen, Lichao Sun, Bo Yang", "tldr": "", "abstract": "Recent works have shown the remarkable superiority of transformer models in reinforcement learning (RL), where the decision-making problem is formulated as sequential generation. Transformer-based agents could emerge with self-improvement in online environments by providing task contexts, such as multiple trajectories, called in-context RL. However, due to the quadratic computation complexity of attention in transformers, current in-context RL methods suffer from huge computational costs as the task horizon increases. In contrast, the Mamba model is renowned for its efficient ability to process long-term dependencies, which provides an opportunity for in-context RL to solve tasks that require long-term memory. To this end, we first implement Decision Mamba (DM) by replacing the backbone of Decision Transformer (DT). Then, we propose a Decision Mamba-Hybrid (DM-H) with the merits of transformers and Mamba in high-quality prediction and long-term memory. Specifically, DM-H first generates high-value sub-goals from long-term memory through the Mamba model. Then, we use sub-goals to prompt the transformer, establishing high-quality predictions. Experimental results demonstrate that DM-H achieves state-of-the-art in long and short-term tasks, such as D4RL, Grid World, and Tmaze benchmarks. Regarding efficiency, the online testing of DM-H in the long-term task is 28$\\times$ times faster than the transformer-based baselines.", "keywords": "In-context Reinforcement Learning;Mamba", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Sili Huang;Jifeng Hu;Zhejian Yang;Liwei Yang;Tao Luo;Hechang Chen;Lichao Sun;Bo Yang", "authorids": "~Sili_Huang1;~Jifeng_Hu1;~Zhejian_Yang1;~Liwei_Yang2;~Tao_Luo2;~Hechang_Chen2;~Lichao_Sun1;~Bo_Yang6", "gender": "M;;M;M;M;M;M;", "homepage": ";;https://github.com/Caster-Yang;;;http://sai.jlu.edu.cn/info/1094/2387.htm;https://lichao-sun.github.io/;http://ccst.jlu.edu.cn/info/1367/19045.htm", "dblp": "26/6752;;336/8382.html;;43/4720-14;145/1142;121/0780-1.html;46/999-2", "google_scholar": "ZMhi8A0AAAAJ;;JaMLCS8AAAAJ;;d4KZI8MAAAAJ;EezEcbgAAAAJ;WhGUE7AAAAAJ;", "orcid": "0000-0001-5387-7904;;;0000-0002-0327-714X;0000-0002-3415-3676;;;0000-0003-1927-8419", "linkedin": ";;;;;;lichao-sun-b273a290/;", "or_profile": "~Sili_Huang1;~Jifeng_Hu1;~Zhejian_Yang1;~Liwei_Yang2;~Tao_Luo2;~Hechang_Chen2;~Lichao_Sun1;~Bo_Yang6", "aff": "Jilin University;;Jilin University;Institute of High Performance Computing, Singapore, A*STAR;Institute of High Performance Computing, Singapore, A*STAR;Jilin University;Lehigh University;Jilin University", "aff_domain": "jlu.edu.cn;;mails.jlu.edu.cn;ihpc.a-star.edu.sg;ihpc.a-star.edu.sg;jlu.edu.cn;lehigh.edu;jlu.edu.cn", "position": "PhD student;;PhD student;Researcher;Researcher;Associate Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nhuang2024decision,\ntitle={Decision Mamba: Reinforcement Learning via Hybrid Selective Sequence Modeling},\nauthor={Sili Huang and Jifeng Hu and Zhejian Yang and Liwei Yang and Tao Luo and Hechang Chen and Lichao Sun and Bo Yang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=wFzIMbTsY7}\n}", "github": "", "reviewers": "yEub;YUg4;wVw1;UASH", "pdf_size": 652623, "rating": "3;4;7;10", "confidence": "4;4;5;5", "soundness": "2;2;4;4", "novelty": "2;2;3;4", "presentation": "2;3;4;4", "wc_summary": "66;44;146;86", "wc_strengths": "55;40;131;149", "wc_weaknesses": "332;421;99;95", "wc_questions": "97;18;86;74", "wc_limitations": "49;31;6;12", "wc_review": "599;554;468;416", "wc_reply_reviewers": "226;51;18;101", "wc_reply_authors": "546;623;40;75", "reply_reviewers": "1;1;1;1", "reply_authors": "3;3;2;2", "rating_avg": [ 6.0, 2.7386127875258306 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 1.0 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 85.5, 37.957212753309484 ], "wc_strengths_avg": [ 93.75, 46.98603515939603 ], "wc_weaknesses_avg": [ 236.75, 143.25567178998534 ], "wc_questions_avg": [ 68.75, 30.408674749156695 ], "wc_limitations_avg": [ 24.5, 16.889345754054535 ], "wc_review_avg": [ 509.25, 71.51005174099653 ], "wc_reply_reviewers_avg": [ 99.0, 79.05377916330123 ], "wc_reply_authors_avg": [ 321.0, 265.1914402841841 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.9128709291752768, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8655856541446042007&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "jlu.edu.cn;;mails.jlu.edu.cn;ihpc.a-star.edu.sg;ihpc.a-star.edu.sg;jlu.edu.cn;lehigh.edu;jlu.edu.cn", "author_num": 8, "aff_unique_index": "0;0;1;1;0;2;0", "aff_unique_norm": "Jilin University;Institute of High Performance Computing;Lehigh University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.jlu.edu.cn;https://www.ihpc.a-star.edu.sg;https://www.lehigh.edu", "aff_unique_abbr": "JLU;IHPC;Lehigh", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1;0;2;0", "aff_country_unique": "China;Singapore;United States" }, { "title": "Diffusion Models are Certifiably Robust Classifiers", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93165", "id": "wGP1tBCP1E", "proceeding": "", "pdf": "https://openreview.net/pdf?id=wGP1tBCP1E", "openreview": "https://openreview.net/forum?id=wGP1tBCP1E", "poster": "", "project": "", "author_site": "Huanran Chen, Yinpeng Dong, Shitong Shao, Hao Zhongkai, Xiao Yang, Hang Su, Jun Zhu", "tldr": "", "abstract": "Generative learning, recognized for its effective modeling of data distributions, offers inherent advantages in handling out-of-distribution instances, especially for enhancing robustness to adversarial attacks. Among these, diffusion classifiers, utilizing powerful diffusion models, have demonstrated superior empirical robustness. However, a comprehensive theoretical understanding of their robustness is still lacking, raising concerns about their vulnerability to stronger future attacks. In this study, we prove that diffusion classifiers possess $O(1)$ Lipschitzness, and establish their certified robustness, demonstrating their inherent resilience. To achieve non-constant Lipschitzness, thereby obtaining much tighter certified robustness, we generalize diffusion classifiers to classify Gaussian-corrupted data. This involves deriving the evidence lower bounds (ELBOs) for these distributions, approximating the likelihood using the ELBO, and calculating classification probabilities via Bayes' theorem. Experimental results show the superior certified robustness of these Noised Diffusion Classifiers (NDCs). Notably, we achieve over 80\\% and 70\\% certified robustness on CIFAR-10 under adversarial perturbations with \\(\\ell_2\\) norms less than 0.25 and 0.5, respectively, using a single off-the-shelf diffusion model without any additional data.", "keywords": "certified robustness;diffusion classifier;adversarial robustness", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Huanran Chen;Yinpeng Dong;Shitong Shao;Zhongkai Hao;Xiao Yang;Hang Su;Jun Zhu", "authorids": "~Huanran_Chen1;~Yinpeng_Dong2;~Shitong_Shao1;~Zhongkai_Hao1;~Xiao_Yang4;~Hang_Su3;~Jun_Zhu2", "gender": "M;M;M;M;M;M;M", "homepage": "https://huanranchen.github.io/;https://dongyp13.github.io;https://shaoshitong.github.io/;https://ml.cs.tsinghua.edu.cn/~xiaoyang/;http://ml.cs.tsinghua.edu.cn/~jun;;https://haozhongkai.github.io/", "dblp": "329/6558;183/0980;329/2735;57/33851;50/2644-1;26/5371-6;270/0220.html", "google_scholar": "https://scholar.google.co.jp/citations?user=QYsKXccAAAAJ;6_4ad84AAAAJ;hmUOaNcAAAAJ;bwkwp0MAAAAJ;axsP38wAAAAJ;dxN1_X0AAAAJ;dfSzq27ZiVoC", "orcid": ";;;0000-0001-9502-9962;;;", "linkedin": ";;;;;;", "or_profile": "~Huanran_Chen1;~Yinpeng_Dong2;~Shitong_Shao1;~Xiao_Yang4;~Jun_Zhu2;~Hang_Su2;~Hao_Zhongkai1", "aff": ";Tsinghua University;Southeast University;Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": ";tsinghua.edu.cn;seu.edu.cn;mail.tsinghua.edu.cn;mail.tsinghua.edu.cn;tsinghua.edu.cn;mails.tsinghua.edu.cn", "position": ";Postdoc;MS student;Postdoc;Professor;Associate Professor;PhD student", "bibtex": "@inproceedings{\nchen2024diffusion,\ntitle={Diffusion Models are Certifiably Robust Classifiers},\nauthor={Huanran Chen and Yinpeng Dong and Shitong Shao and Zhongkai Hao and Xiao Yang and Hang Su and Jun Zhu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=wGP1tBCP1E}\n}", "github": "", "reviewers": "TbjG;p9kw;LwbB;76VC", "pdf_size": 778199, "rating": "6;7;7;8", "confidence": "4;4;4;5", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "50;50;125;33", "wc_strengths": "35;44;69;72", "wc_weaknesses": "144;116;122;117", "wc_questions": "6;257;33;12", "wc_limitations": "5;39;26;6", "wc_review": "240;506;375;240", "wc_reply_reviewers": "62;0;14;27", "wc_reply_authors": "39;0;28;41", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;2;2", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 64.5, 35.61249780624774 ], "wc_strengths_avg": [ 55.0, 15.858751527153705 ], "wc_weaknesses_avg": [ 124.75, 11.344051304538427 ], "wc_questions_avg": [ 77.0, 104.40545962735857 ], "wc_limitations_avg": [ 19.0, 14.265342617687105 ], "wc_review_avg": [ 340.25, 110.43182285917406 ], "wc_reply_reviewers_avg": [ 25.75, 23.004075725836064 ], "wc_reply_authors_avg": [ 27.0, 16.355427233796124 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1310023140522702134&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": ";tsinghua.edu.cn;seu.edu.cn;mail.tsinghua.edu.cn;mail.tsinghua.edu.cn;tsinghua.edu.cn;mails.tsinghua.edu.cn", "author_num": 7, "aff_unique_index": "0;1;0;0;0;0", "aff_unique_norm": "Tsinghua University;Southeast University", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.seu.edu.cn/", "aff_unique_abbr": "THU;SEU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Algorithmic Collective Action in Recommender Systems: Promoting Songs by Reordering Playlists", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93164", "id": "wGjSbaMsop", "proceeding": "", "pdf": "https://openreview.net/pdf?id=wGjSbaMsop", "openreview": "https://openreview.net/forum?id=wGjSbaMsop", "poster": "", "project": "", "author_site": "Joachim Baumann, Celestine Mendler-D\u00fcnner", "tldr": "", "abstract": "We investigate algorithmic collective action in transformer-based recommender systems. Our use case is a collective of fans aiming to promote the visibility of an underrepresented artist by strategically placing one of their songs in the existing playlists they control. We introduce two easily implementable strategies to select the position at which to insert the song and boost recommendations at test time. The strategies exploit statistical properties of the learner to leverage discontinuities in the recommendations, and the long-tail nature of song distributions. We evaluate the efficacy of our strategies using a publicly available recommender system model released by a major music streaming platform. Our findings reveal that even small collectives (controlling less than 0.01\\% of the training data) can achieve up to $40\\times$ more test time recommendations than songs with similar training set occurrences, on average. Focusing on the externalities of the strategy, we find that the recommendations of other songs are largely preserved, and the newly gained recommendations are distributed across various artists. Together, our findings demonstrate how carefully designed collective action strategies can be effective while not necessarily being adversarial.", "keywords": "collective action;platform power;sequential recommender systems;transformer models;music recommendation", "primary_area": "fairness", "supplementary_material": "", "author": "Joachim Baumann;Celestine Mendler-D\u00fcnner", "authorids": "~Joachim_Baumann1;~Celestine_Mendler-D\u00fcnner1", "gender": "M;", "homepage": "https://www.ifi.uzh.ch/en/scg/people/Baumann.html;http://celestine.ai/", "dblp": ";176/5511", "google_scholar": "https://scholar.google.ch/citations?user=0Eunq_cAAAAJ;UqtDdZUAAAAJ", "orcid": "0000-0003-2019-4829;", "linkedin": "joachimbaumann/;", "or_profile": "~Joachim_Baumann1;~Celestine_Mendler-D\u00fcnner1", "aff": "Max-Planck-Institute for Intelligent Systems, Max-Planck Institute;Max Planck Institute for Intelligent Systems", "aff_domain": "is.mpg.de;tuebingen.mpg.de", "position": "PhD student;Group Lead", "bibtex": "@inproceedings{\nbaumann2024algorithmic,\ntitle={Algorithmic Collective Action in Recommender Systems: Promoting Songs by Reordering Playlists},\nauthor={Joachim Baumann and Celestine Mendler-D{\\\"u}nner},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=wGjSbaMsop}\n}", "github": "", "reviewers": "fQFx;tKsL;1cCC;9a2T", "pdf_size": 4065037, "rating": "4;5;8;8", "confidence": "4;4;5;4", "soundness": "2;3;4;3", "novelty": "2;1;4;4", "presentation": "3;4;3;3", "wc_summary": "71;68;44;372", "wc_strengths": "45;33;67;106", "wc_weaknesses": "34;285;109;41", "wc_questions": "79;178;95;71", "wc_limitations": "196;635;7;42", "wc_review": "425;1199;322;632", "wc_reply_reviewers": "0;215;60;7", "wc_reply_authors": "0;212;0;0", "reply_reviewers": "0;2;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 6.25, 1.7853571071357126 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 1.299038105676658 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 138.75, 135.07289698529456 ], "wc_strengths_avg": [ 62.75, 27.788261910382232 ], "wc_weaknesses_avg": [ 117.25, 101.18392905990555 ], "wc_questions_avg": [ 105.75, 42.5991490525339 ], "wc_limitations_avg": [ 220.0, 249.92698933888673 ], "wc_review_avg": [ 644.5, 339.04756303504087 ], "wc_reply_reviewers_avg": [ 70.5, 86.59243615928588 ], "wc_reply_authors_avg": [ 53.0, 91.7986928011505 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.5659164584181102, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ADwwqikmrjAJ:scholar.google.com/&scioq=Algorithmic+Collective+Action+in+Recommender+Systems:+Promoting+Songs+by+Reordering+Playlists&hl=en&as_sdt=0,5", "gs_version_total": 5, "email": "is.mpg.de;tuebingen.mpg.de", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Max-Planck-Institute for Intelligent Systems;Max Planck Institute for Intelligent Systems", "aff_unique_dep": "Intelligent Systems;Intelligent Systems", "aff_unique_url": "https://www.mpi-is.mpg.de;https://www.mpi-is.mpg.de", "aff_unique_abbr": "MPI-IS;MPI-IS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "id": "wH36UKML4x", "title": "Trained Models Tell Us How to Make Them Robust to Spurious Correlation without Group Annotation", "track": "main", "status": "Reject", "tldr": "", "abstract": "Classifiers trained with Empirical Risk Minimization (ERM) tend to rely on attributes that have high spurious correlation with the target. This can degrade the performance on underrepresented (or 'minority') groups that lack these attributes, posing significant challenges for both out-of-distribution generalization and fairness objectives. Many studies aim to improve robustness to spurious correlation, yet nearly all require group annotation for training and/or model selection. This constrains their applicability in situations where the nature of the spurious correlation is not known, or when group labels for certain spurious attributes are either insufficient or completely absent. To meet the demand for effectively enhancing the model robustness under minimal assumptions about group annotation, we propose Environment-based Validation and Loss-based Sampling (EVaLS). It uses the losses from a trained model to construct a balanced dataset of high-loss and low-loss samples in which the training data group imbalance is mitigated. This results in a significant robustness to group shifts when equipped with a simple mechanism of last layer retraining. Furthermore, by utilizing environment inference methods for creating diverse environments with correlation shifts, EVaLS can potentially eliminate the need for group annotation in the validation data. In such a context, the worst environment accuracy acts as a reliable surrogate throughout the retraining process for tuning hyperparameters and finding a model that performs well across diverse group shifts. EVaLS effectively achieves group robustness, showing that group annotation is not necessary even for validation. It is a fast, straightforward, and effective approach that reaches near-optimal worst group accuracy without needing group annotations, marking a new chapter in the robustness of trained models against spurious correlation.", "keywords": "Spurious Correlation;Group Robustness;Zero Group Annotation;Distribution Shift;Out-of-Distribution Generalization", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/1b9859575e787f8f19cca4065d8147e79520b1fa.zip", "author": "Mahdi Ghaznavi;Hesam Asadollahzadeh;Fahimeh Hosseini Noohdani;Soroush Vafaie Tabar;Hosein Hasani;Taha Akbari Alvanagh;Mohammad Hossein Rohban;Mahdieh Soleymani Baghshah", "authorids": "~Mahdi_Ghaznavi1;~Hesam_Asadollahzadeh1;~Fahimeh_Hosseini_Noohdani1;~Soroush_Vafaie_Tabar1;~Hosein_Hasani2;~Taha_Akbari_Alvanagh1;~Mohammad_Hossein_Rohban1;~Mahdieh_Soleymani_Baghshah1", "gender": "M;M;M;M;M;M;F;F", "homepage": "https://mahdighaznavi.github.io;http://HesamAsad.github.io;;http://ce.sharif.edu/~hasanih;;http://sharif.edu/~rohban/;;http://sharif.edu/~soleymani/", "dblp": ";;;217/2122;;43/8108;;21/473", "google_scholar": ";cL60wssAAAAJ;;U59qHq4AAAAJ;;pRyJ6FkAAAAJ;hWWXh3wAAAAJ;S1U0KlgAAAAJ", "orcid": ";;;;;;;", "linkedin": ";;soroush-vafaie-tabar-5248a5159;;https://www.linkedin.com/feed/;;fahimeh-hosseini-00902623b/;", "or_profile": "~Mahdi_Ghaznavi1;~Hesam_Asadollahzadeh1;~Soroush_Vafaie_Tabar1;~Hosein_Hasani2;~Taha_Akbari_Alvanagh1;~Mohammad_Hossein_Rohban1;~Fahimeh_Hosseini1;~Mahdieh_Baghshah1", "aff": "Sharif University of Technology;University of Tehran, University of Tehran;Sharif University of Technology, Sharif University of Technology;Sharif University of Technology;Sharif University of Technology;Sharif University of Technology;Sharif University of Technology;Sharif University of Technology", "aff_domain": "sharif.edu;ut.ac.ir;ce.sharif.edu;sharif.edu;sharif.ir;sharif.edu;sharif.edu;sharif.edu", "position": "MS student;Undergrad student;MS student;PhD student;Undergrad student;Associate Professor;MS student;Associate Professor", "bibtex": "@misc{\nanonymous2024trained,\ntitle={Trained Models Tell Us How to Make Them Robust to Spurious Correlation without Group Annotation},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=wH36UKML4x}\n}", "github": "", "project": "", "reviewers": "7E4z;SN9B;RE7s", "site": "https://openreview.net/forum?id=wH36UKML4x", "pdf_size": 4412614, "rating": "5;6;6", "confidence": "4;3;4", "soundness": "2;2;3", "novelty": "2;2;2", "presentation": "3;3;2", "wc_summary": "97;53;84", "wc_strengths": "55;35;92", "wc_weaknesses": "223;177;259", "wc_questions": "49;2;112", "wc_limitations": "5;2;9", "wc_review": "429;269;556", "wc_reply_reviewers": "36;77;670", "wc_reply_authors": "81;294;2169", "reply_reviewers": "1;1;5", "reply_authors": "2;2;6", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 78.0, 18.457157599876172 ], "wc_strengths_avg": [ 60.666666666666664, 23.612614331233114 ], "wc_weaknesses_avg": [ 219.66666666666666, 33.559234529741914 ], "wc_questions_avg": [ 54.333333333333336, 45.065384597148274 ], "wc_limitations_avg": [ 5.333333333333333, 2.8674417556808756 ], "wc_review_avg": [ 418.0, 117.42515346665154 ], "wc_reply_reviewers_avg": [ 261.0, 289.6906395910415 ], "wc_reply_authors_avg": [ 848.0, 938.1268570934317 ], "reply_reviewers_avg": [ 2.3333333333333335, 1.8856180831641267 ], "reply_authors_avg": [ 3.3333333333333335, 1.8856180831641267 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1294678029261996527&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1;0;0;0;0;0;0", "aff_unique_norm": "Sharif University of Technology;University of Tehran", "aff_unique_dep": ";", "aff_unique_url": "https://www.sharif.edu;https://ut.ac.ir", "aff_unique_abbr": "SUT;UT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "Iran" }, { "title": "FasMe: Fast and Sample-efficient Meta Estimator for Precision Matrix Learning in Small Sample Settings", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93163", "id": "wHFaAH3E8z", "proceeding": "", "pdf": "https://openreview.net/pdf?id=wHFaAH3E8z", "openreview": "https://openreview.net/forum?id=wHFaAH3E8z", "poster": "/media/PosterPDFs/NeurIPS%202024/93163.png?t=1730968629.1478336", "project": "", "author_site": "Xiao Tan, Yiqin Wang, Yangyang Shen, Dian Shen, Meng Wang, Peibo Duan, Beilun Wang", "tldr": "", "abstract": "Precision matrix estimation is a ubiquitous task featuring numerous applications such as rare disease diagnosis and neural connectivity exploration. However, this task becomes challenging in small sample settings, where the number of samples is significantly less than the number of dimensions, leading to unreliable estimates. Previous approaches either fail to perform well in small sample settings or suffer from inefficient estimation processes, even when incorporating meta-learning techniques.\nTo this end, we propose a novel approach FasMe for Fast and Sample-efficient Meta Precision Matrix Learning, which first extracts meta-knowledge through a multi-task learning diagram. Then, meta-knowledge constraints are applied using a maximum determinant matrix completion algorithm for the novel task. As a result, we reduce the sample size requirements to $O(\\log p/K)$ per meta-training task and $O(\\log\\vert \\mathcal{G}\\vert)$ for the meta-testing task. Moreover, the hereby proposed model only needs $O(p \\log\\epsilon^{-1})$ time and $O(p)$ memory for converging to an $\\epsilon$-accurate solution. On multiple synthetic and biomedical datasets, FasMe is at least ten times faster than the four baselines while promoting prediction accuracy in small sample settings.", "keywords": "Precision Matrix Estimation;Meta Learning;Graphical Model;Small Sample", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Xiao Tan;Yiqin Wang;Yangyang Shen;Dian Shen;Meng Wang;Peibo Duan;Beilun Wang", "authorids": "~Xiao_Tan5;~Yiqin_Wang2;~Yangyang_Shen1;~Dian_Shen1;~Meng_Wang11;~Peibo_Duan1;~Beilun_Wang1", "gender": "F;M;M;M;;M;M", "homepage": "https://github.com/MahjongGod-Saki;;https://github.com/SYYatSEU;https://dianshenseu.github.io/en/;;https://scholar.google.com/citations?hl=zh-CN&user=wdIMVqsAAAAJ&view_op=list_works&sortby=pubdate;https://cse.seu.edu.cn/2019/0105/c23024a257533/pagem.htm", "dblp": "116/7143-5;;;139/4309;;;180/5592", "google_scholar": ";;;;;https://scholar.google.com/citations?hl=zh-CN;", "orcid": "0000-0002-3874-9557;0000-0002-1096-7827;;;;;0000-0002-2646-1492", "linkedin": ";;;;;;", "or_profile": "~Xiao_Tan5;~Yiqin_Wang2;~Yangyang_Shen1;~Dian_Shen1;~Meng_Wang11;~Peibo_Duan1;~Beilun_Wang1", "aff": "Southeast University;Southeast University;Southeast University;Southeast University;;Monash University;Southeast University", "aff_domain": "seu.edu.cn;seu.edu.cn;seu.edu.cn;seu.edu.cn;;monash.edu;seu.edu.cn", "position": "MS student;Undergrad student;MS student;Associate Professor;;Lecturer;Associate Professor", "bibtex": "@inproceedings{\ntan2024fasme,\ntitle={FasMe: Fast and Sample-efficient Meta Estimator for Precision Matrix Learning in Small Sample Settings},\nauthor={Xiao Tan and Yiqin Wang and Yangyang Shen and Dian Shen and Meng Wang and Peibo Duan and Beilun Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=wHFaAH3E8z}\n}", "github": "", "reviewers": "nam2;PtUj;gJdm", "pdf_size": 3258992, "rating": "6;6;7", "confidence": "3;3;3", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "73;31;51", "wc_strengths": "38;47;46", "wc_weaknesses": "30;22;13", "wc_questions": "4;44;19", "wc_limitations": "1;1;1", "wc_review": "146;145;130", "wc_reply_reviewers": "10;13;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;0", "reply_authors": "1;1;1", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 51.666666666666664, 17.15290710702481 ], "wc_strengths_avg": [ 43.666666666666664, 4.027681991198191 ], "wc_weaknesses_avg": [ 21.666666666666668, 6.944222218666553 ], "wc_questions_avg": [ 22.333333333333332, 16.49915822768611 ], "wc_limitations_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 140.33333333333334, 7.318166133366716 ], "wc_reply_reviewers_avg": [ 7.666666666666667, 5.557777333511022 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:B4mq28MjEAIJ:scholar.google.com/&scioq=FasMe:+Fast+and+Sample-efficient+Meta+Estimator+for+Precision+Matrix+Learning+in+Small+Sample+Settings&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": "seu.edu.cn;seu.edu.cn;seu.edu.cn;seu.edu.cn;;monash.edu;seu.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;0;1;0", "aff_unique_norm": "Southeast University;Monash University", "aff_unique_dep": ";", "aff_unique_url": "https://www.seu.edu.cn/;https://www.monash.edu", "aff_unique_abbr": "SEU;Monash", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;0", "aff_country_unique": "China;Australia" }, { "title": "Bandits with Preference Feedback: A Stackelberg Game Perspective", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93162", "id": "wIE991zhXH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=wIE991zhXH", "openreview": "https://openreview.net/forum?id=wIE991zhXH", "poster": "", "project": "", "author_site": "Barna P\u00e1sztor, Parnian Kassraie, Andreas Krause", "tldr": "", "abstract": "Bandits with preference feedback present a powerful tool for optimizing unknown target functions when only pairwise comparisons are allowed instead of direct value queries. This model allows for incorporating human feedback into online inference and optimization and has been employed in systems for tuning large language models.\nThe problem is fairly understood in toy settings with linear target functions or over finite small domains that limits practical interest.\nTaking the next step, we consider infinite domains and kernelized rewards. In this setting, selecting a pair of actions is quite challenging and requires balancing exploration and exploitation at two levels: within the pair, and along the iterations of the algorithm.\nWe propose MaxMinLCB, which emulates this trade-off as a zero-sum Stackelberg game and chooses action pairs that are informative and have favorable reward values. MaxMinLCB consistently outperforms algorithms in the literature and satisfies an anytime-valid rate-optimal regret guarantee. This is owed to our novel preference-based confidence sequences for kernelized logistic estimators, which are of independent interest.", "keywords": "Preference Feedback;Duelling Bandits;Reproducing Kernel Hilbert Space;Stackelberg Games;Exploration-exploitation dilemma", "primary_area": "bandits", "supplementary_material": "", "author": "Barna P\u00e1sztor;Parnian Kassraie;Andreas Krause", "authorids": "~Barna_P\u00e1sztor1;~Parnian_Kassraie1;~Andreas_Krause1", "gender": ";F;M", "homepage": ";https://pkassraie.github.io;https://las.inf.ethz.ch/krausea", "dblp": "273/3840;216/8534.html;87/1831-1.html", "google_scholar": "t2QJiCkAAAAJ;GFDOkb0AAAAJ;https://scholar.google.ch/citations?user=eDHv58AAAAAJ", "orcid": ";;0000-0001-7260-9673", "linkedin": ";parnian-kassraie/;krausea/", "or_profile": "~Barna_P\u00e1sztor1;~Parnian_Kassraie1;~Andreas_Krause1", "aff": "ETHZ - ETH Zurich;Google;ETH Zurich", "aff_domain": "ethz.ch;deepmind.com;ethz.ch", "position": "PhD student;Intern;Full Professor", "bibtex": "@inproceedings{\np{\\'a}sztor2024bandits,\ntitle={Bandits with Preference Feedback: A Stackelberg Game Perspective},\nauthor={Barna P{\\'a}sztor and Parnian Kassraie and Andreas Krause},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=wIE991zhXH}\n}", "github": "", "reviewers": "K7qo;Hhi1;kTTb;zRaD", "pdf_size": 3410759, "rating": "6;6;7;7", "confidence": "4;3;3;3", "soundness": "3;3;3;4", "novelty": "3;2;3;3", "presentation": "3;2;3;4", "wc_summary": "40;46;66;106", "wc_strengths": "41;58;148;104", "wc_weaknesses": "63;81;203;29", "wc_questions": "56;30;2;23", "wc_limitations": "130;3;1;17", "wc_review": "330;218;420;279", "wc_reply_reviewers": "25;18;19;16", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 64.5, 25.821502667350714 ], "wc_strengths_avg": [ 87.75, 41.72753886823425 ], "wc_weaknesses_avg": [ 94.0, 65.64297372910524 ], "wc_questions_avg": [ 27.75, 19.292161620720474 ], "wc_limitations_avg": [ 37.75, 53.6161123171011 ], "wc_review_avg": [ 311.75, 74.01477892961648 ], "wc_reply_reviewers_avg": [ 19.5, 3.3541019662496847 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3771512310095007554&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "ethz.ch;deepmind.com;ethz.ch", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "ETH Zurich;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.ethz.ch;https://www.google.com", "aff_unique_abbr": "ETHZ;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Switzerland;United States" }, { "title": "S-MolSearch: 3D Semi-supervised Contrastive Learning for Bioactive Molecule Search", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93161", "id": "wJAF8TGVUG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=wJAF8TGVUG", "openreview": "https://openreview.net/forum?id=wJAF8TGVUG", "poster": "/media/PosterPDFs/NeurIPS%202024/93161.png?t=1731657464.6325119", "project": "", "author_site": "Gengmo Zhou, Zhen Wang, Feng Yu, Guolin Ke, Zhewei Wei, Zhifeng Gao", "tldr": "", "abstract": "Virtual Screening is an essential technique in the early phases of drug discovery, aimed at identifying promising drug candidates from vast molecular libraries. \nRecently, ligand-based virtual screening has garnered significant attention due to its efficacy in conducting extensive database screenings without relying on specific protein-binding site information.\nObtaining binding affinity data for complexes is highly expensive, resulting in a limited amount of available data that covers a relatively small chemical space. Moreover, these datasets contain a significant amount of inconsistent noise. It is challenging to identify an inductive bias that consistently maintains the integrity of molecular activity during data augmentation. To tackle these challenges, we propose S-MolSearch, the first framework to our knowledge, that leverages molecular 3D information and affinity information in semi-supervised contrastive learning for ligand-based virtual screening. \n% S-MolSearch processes both labeled and unlabeled data, trains molecular structural encoders, and generates soft labels for unlabeled data, drawing on the principles of inverse optimal transport.\nDrawing on the principles of inverse optimal transport, S-MolSearch efficiently processes both labeled and unlabeled data, training molecular structural encoders while generating soft labels for the unlabeled data.\nThis design allows S-MolSearch to adaptively utilize unlabeled data within the learning process.\nEmpirically, S-MolSearch demonstrates superior performance on widely-used benchmarks LIT-PCBA and DUD-E. It surpasses both structure-based and ligand-based virtual screening methods for AUROC, BEDROC and EF.", "keywords": "semi-supervised learning; 3D molecule search; contrastive learning", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "", "author": "Gengmo Zhou;Zhen Wang;Feng Yu;Guolin Ke;Zhewei Wei;Zhifeng Gao", "authorids": "~Gengmo_Zhou1;~Zhen_Wang28;~Feng_Yu6;~Guolin_Ke3;~Zhewei_Wei1;~Zhifeng_Gao1", "gender": ";M;M;M;M;M", "homepage": "https://zhougengmo.github.io/;https://github.com/Wangzhencc;;http://weizhewei.com;;https://guolinke.github.io", "dblp": ";;;94/4260;71/6161;190/7810", "google_scholar": "z76EQ7YAAAAJ;;tdUUL90AAAAJ;https://scholar.google.com.hk/citations?user=qZ7dj4gAAAAJ;uBo3SJcAAAAJ;M2qJgtoAAAAJ", "orcid": ";;;0000-0003-3620-5086;;", "linkedin": ";;;;;", "or_profile": "~Gengmo_Zhou1;~Zhen_Wang28;~Feng_Yu6;~Zhewei_Wei1;~Zhifeng_Gao1;~guolin_ke1", "aff": "DP Technology;DP Technology;;Renmin University of China;DP Technology;DP Technology", "aff_domain": "dp.tech;dp.tech;;ruc.edu.cn;dp.tech;dp.tech", "position": "Intern;Researcher;;Full Professor;Researcher;Senior Researcher", "bibtex": "@inproceedings{\nzhou2024smolsearch,\ntitle={S-MolSearch: 3D Semi-supervised Contrastive Learning for Bioactive Molecule Search},\nauthor={Gengmo Zhou and Zhen Wang and Feng Yu and Guolin Ke and Zhewei Wei and Zhifeng Gao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=wJAF8TGVUG}\n}", "github": "", "reviewers": "9wcM;KtU6;ZRdZ;YuaV", "pdf_size": 1368384, "rating": "6;6;6;7", "confidence": "3;4;3;4", "soundness": "3;3;4;3", "novelty": "3;3;3;3", "presentation": "2;2;4;3", "wc_summary": "58;22;101;82", "wc_strengths": "28;55;112;9", "wc_weaknesses": "329;47;82;57", "wc_questions": "4;3;93;155", "wc_limitations": "55;19;5;10", "wc_review": "474;146;393;313", "wc_reply_reviewers": "8;8;19;28", "wc_reply_authors": "0;0;0;24", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 65.75, 29.498940658945703 ], "wc_strengths_avg": [ 51.0, 38.82653731663435 ], "wc_weaknesses_avg": [ 128.75, 116.31503557150296 ], "wc_questions_avg": [ 63.75, 64.11464341318604 ], "wc_limitations_avg": [ 22.25, 19.562400159489634 ], "wc_review_avg": [ 331.5, 121.2858194514099 ], "wc_reply_reviewers_avg": [ 15.75, 8.37779804005802 ], "wc_reply_authors_avg": [ 6.0, 10.392304845413264 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15144339966458634288&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "dp.tech;dp.tech;;ruc.edu.cn;dp.tech;dp.tech", "author_num": 6, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "DP Technology;Renmin University of China", "aff_unique_dep": ";", "aff_unique_url": ";http://www.ruc.edu.cn", "aff_unique_abbr": ";RUC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1", "aff_country_unique": ";China" }, { "title": "Sharpness-diversity tradeoff: improving flat ensembles with SharpBalance", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93160", "id": "wJaCsnT9UE", "proceeding": "", "pdf": "https://openreview.net/pdf?id=wJaCsnT9UE", "openreview": "https://openreview.net/forum?id=wJaCsnT9UE", "poster": "/media/PosterPDFs/NeurIPS%202024/93160.png?t=1733723338.8552976", "project": "", "author_site": "Haiquan Lu, Xiaotian Liu, Yefan Zhou, Qunli Li, Kurt Keutzer, Michael Mahoney, Yujun Yan, Huanrui Yang, Yaoqing Yang", "tldr": "", "abstract": "Recent studies on deep ensembles have identified the sharpness of the local minima of individual learners and the diversity of the ensemble members as key factors in improving test-time performance. Building on this, our study investigates the interplay between sharpness and diversity within deep ensembles, illustrating their crucial role in robust generalization to both in-distribution (ID) and out-of-distribution (OOD) data. We discover a trade-off between sharpness and diversity: minimizing the sharpness in the loss landscape tends to diminish the diversity of individual members within the ensemble, adversely affecting the ensemble's improvement. The trade-off is justified through our rigorous theoretical analysis and verified empirically through extensive experiments. To address the issue of reduced diversity, we introduce SharpBalance, a novel training approach that balances sharpness and diversity within ensembles. Theoretically, we show that our training strategy achieves a better sharpness-diversity trade-off. Empirically, we conducted comprehensive evaluations in various data sets (CIFAR-10, CIFAR-100, TinyImageNet) and showed that SharpBalance not only effectively improves the sharpness-diversity trade-off but also significantly improves ensemble performance in ID and OOD scenarios.", "keywords": "Diversity;loss landscape;deep ensemble", "primary_area": "optimization_for_deep_networks", "supplementary_material": "/attachment/0da00d42f8fe8ecc4ad08324d5b8c5d8f255afe6.zip", "author": "Haiquan Lu;Xiaotian Liu;Yefan Zhou;Qunli Li;Kurt Keutzer;Michael W. Mahoney;Yujun Yan;Huanrui Yang;Yaoqing Yang", "authorids": "~Haiquan_Lu1;~Xiaotian_Liu2;~Yefan_Zhou1;~Qunli_Li1;~Kurt_Keutzer1;~Michael_W._Mahoney1;~Yujun_Yan1;~Huanrui_Yang1;~Yaoqing_Yang1", "gender": "M;M;M;M;M;;F;M;M", "homepage": "https://github.com/haiquanlu;;https://yefanzhou.github.io/;;https://people.eecs.berkeley.edu/~keutzer/;;https://sites.google.com/umich.edu/yujunyan/home;https://sites.google.com/view/huanrui-yang;https://sites.google.com/site/yangyaoqingcmu/", "dblp": "236/2803;;237/4333;;k/KurtKeutzer.html;;219/1736;221/2845;04/4176", "google_scholar": "S1brcdYAAAAJ;32r5znUAAAAJ;TAeVaicAAAAJ;;ID9QePIAAAAJ;;5TQUP58AAAAJ;bjNCUt8AAAAJ;LYvugWgAAAAJ", "orcid": ";;;;0000-0003-3868-8501;;0000-0003-3776-4293;;0000-0001-9908-5531", "linkedin": ";xiaotian-liu-700051139/;yefan-zhou/;qunli-li-594385236/;kurtkeutzer/;;;;", "or_profile": "~Haiquan_Lu1;~Xiaotian_Liu2;~Yefan_Zhou1;~Qunli_Li1;~Kurt_Keutzer1;~Michael_W._Mahoney1;~Yujun_Yan1;~Huanrui_Yang1;~Yaoqing_Yang1", "aff": "Nankai University;Dartmouth College;Dartmouth College;University of California, San Diego;University of California, Berkeley;;Dartmouth College;University of California, Berkeley;Dartmouth College", "aff_domain": "nankai.edu.cn;dartmouth.edu;dartmouth.edu;ucsd.edu;berkeley.edu;;dartmouth.edu;berkeley.edu;dartmouth.edu", "position": "Undergrad student;PhD student;PhD student;MS student;Full Professor;;Assistant Professor;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nlu2024sharpnessdiversity,\ntitle={Sharpness-diversity tradeoff: improving flat ensembles with SharpBalance},\nauthor={Haiquan Lu and Xiaotian Liu and Yefan Zhou and Qunli Li and Kurt Keutzer and Michael W. Mahoney and Yujun Yan and Huanrui Yang and Yaoqing Yang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=wJaCsnT9UE}\n}", "github": "", "reviewers": "8MB6;b225;TwFd;jD9M", "pdf_size": 6319466, "rating": "5;6;6;6", "confidence": "2;4;4;3", "soundness": "3;3;3;3", "novelty": "3;2;3;2", "presentation": "3;3;3;3", "wc_summary": "70;30;55;20", "wc_strengths": "82;36;46;26", "wc_weaknesses": "89;35;134;364", "wc_questions": "67;22;5;39", "wc_limitations": "5;4;6;79", "wc_review": "313;127;246;528", "wc_reply_reviewers": "135;9;44;45", "wc_reply_authors": "706;9;28;27", "reply_reviewers": "2;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 43.75, 19.803724397193573 ], "wc_strengths_avg": [ 47.5, 21.13646138784825 ], "wc_weaknesses_avg": [ 155.5, 125.37643319220722 ], "wc_questions_avg": [ 33.25, 22.895141405983935 ], "wc_limitations_avg": [ 23.5, 32.05074102107469 ], "wc_review_avg": [ 303.5, 145.7300586701316 ], "wc_reply_reviewers_avg": [ 58.25, 46.622821664931436 ], "wc_reply_authors_avg": [ 192.5, 296.5657599926195 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10881773293033659751&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "nankai.edu.cn;dartmouth.edu;dartmouth.edu;ucsd.edu;berkeley.edu;;dartmouth.edu;berkeley.edu;dartmouth.edu", "author_num": 9, "aff_unique_index": "0;1;1;2;3;1;3;1", "aff_unique_norm": "Nankai University;Dartmouth College;University of California, San Diego;University of California, Berkeley", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.nankai.edu.cn;https://www.dartmouth.edu;https://www.ucsd.edu;https://www.berkeley.edu", "aff_unique_abbr": "NKU;Dartmouth;UCSD;UC Berkeley", "aff_campus_unique_index": "1;2;2", "aff_campus_unique": ";San Diego;Berkeley", "aff_country_unique_index": "0;1;1;1;1;1;1;1", "aff_country_unique": "China;United States" }, { "title": "CRAYM: Neural Field Optimization via Camera RAY Matching", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93159", "id": "wK0Z49myyi", "proceeding": "", "pdf": "https://openreview.net/pdf?id=wK0Z49myyi", "openreview": "https://openreview.net/forum?id=wK0Z49myyi", "poster": "/media/PosterPDFs/NeurIPS%202024/93159.png?t=1731512921.138323", "project": "", "author_site": "Liqiang Lin, Wenpeng Wu, Chi-Wing Fu, Hao Zhang, Hui Huang", "tldr": "", "abstract": "We introduce camera ray matching (CRAYM) into the joint optimization of camera poses and neural fields from multi-view images. The optimized field, referred to as a feature volume, can be \u201cprobed\u201d by the camera rays for novel view synthesis (NVS) and 3D geometry reconstruction. One key reason for matching camera rays, instead of pixels as in prior works, is that the camera rays can be parameterized by the feature volume to carry both geometric and photometric information. Multi-view consistencies involving the camera rays and scene rendering can be naturally integrated into the joint optimization and network training, to impose physically meaningful constraints to improve the final quality of both the geometric reconstruction and photorealistic rendering. We formulate our per-ray optimization and matched ray coherence by focusing on camera rays passing through keypoints in the input images to elevate both the efficiency and accuracy of scene correspondences. Accumulated ray features along the feature volume provide a means to discount the coherence constraint amid erroneous ray matching. We demonstrate the effectiveness of CRAYM for both NVS and geometry reconstruction, over dense- or sparse-view settings, with qualitative and quantitative comparisons to state-of-the-art alternatives.", "keywords": "Neural Implicit Fields;Novel View Synthesis;3D Reconstruction", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Liqiang Lin;Wenpeng Wu;Chi-Wing Fu;Hao Zhang;Hui Huang", "authorids": "~Liqiang_Lin1;~Wenpeng_Wu2;~Chi-Wing_Fu2;~Hao_Zhang25;~Hui_Huang3", "gender": ";;;;", "homepage": "https://linxius.github.io/;;;;https://vcc.tech/~huihuang", "dblp": "281/6566;;;;33/5763-4", "google_scholar": "OWjgUJoAAAAJ;;;;https://scholar.google.com.hk/citations?user=wjzkl3YAAAAJ", "orcid": "0000-0003-2594-6495;;;;0000-0003-3212-0544", "linkedin": ";;;;", "or_profile": "~Liqiang_Lin1;~Wenpeng_Wu2;~Chi-Wing_Fu2;~Hao_Zhang25;~Hui_Huang3", "aff": "Shenzhen University;;;;Shenzhen University", "aff_domain": "szu.edu.cn;;;;szu.edu", "position": "PhD student;;;;Full Professor", "bibtex": "@inproceedings{\nlin2024craym,\ntitle={{CRAYM}: Neural Field Optimization via Camera {RAY} Matching},\nauthor={Liqiang Lin and Wenpeng Wu and Chi-Wing Fu and Hao Zhang and Hui Huang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=wK0Z49myyi}\n}", "github": "", "reviewers": "Bu9t;67PC;GtQM;LukG", "pdf_size": 17950617, "rating": "5;5;5;8", "confidence": "3;5;4;5", "soundness": "3;2;2;4", "novelty": "2;3;2;4", "presentation": "4;2;2;4", "wc_summary": "93;107;74;175", "wc_strengths": "37;47;9;160", "wc_weaknesses": "371;146;59;159", "wc_questions": "8;433;16;82", "wc_limitations": "30;74;1;2", "wc_review": "539;807;159;578", "wc_reply_reviewers": "48;49;27;0", "wc_reply_authors": "43;121;61;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;3;2;1", "rating_avg": [ 5.75, 1.299038105676658 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 1.0 ], "wc_summary_avg": [ 112.25, 38.07476198218447 ], "wc_strengths_avg": [ 63.25, 57.56898036269185 ], "wc_weaknesses_avg": [ 183.75, 114.74182977449854 ], "wc_questions_avg": [ 134.75, 174.57287160380906 ], "wc_limitations_avg": [ 26.75, 29.65952629426168 ], "wc_review_avg": [ 520.75, 232.60091895777197 ], "wc_reply_reviewers_avg": [ 31.0, 19.937402037376884 ], "wc_reply_authors_avg": [ 56.25, 43.45903243285566 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16417437056022970295&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "szu.edu.cn;;;;szu.edu", "author_num": 5, "aff_unique_index": "0;0", "aff_unique_norm": "Shenzhen University", "aff_unique_dep": "", "aff_unique_url": "https://www.szu.edu.cn", "aff_unique_abbr": "SZU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Space-Time Continuous PDE Forecasting using Equivariant Neural Fields", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93158", "id": "wN5AgP0DJ0", "proceeding": "", "pdf": "https://openreview.net/pdf?id=wN5AgP0DJ0", "openreview": "https://openreview.net/forum?id=wN5AgP0DJ0", "poster": "", "project": "", "author_site": "David Knigge, David Wessels, Riccardo Valperga, Samuele Papa, Jan-jakob Sonke, Erik Bekkers, Efstratios Gavves", "tldr": "", "abstract": "Recently, Conditional Neural Fields (NeFs) have emerged as a powerful modelling paradigm for PDEs, by learning solutions as flows in the latent space of the Conditional NeF. Although benefiting from favourable properties of NeFs such as grid-agnosticity and space-time-continuous dynamics modelling, this approach limits the ability to impose known constraints of the PDE on the solutions -- such as symmetries or boundary conditions -- in favour of modelling flexibility. Instead, we propose a space-time continuous NeF-based solving framework that - by preserving geometric information in the latent space of the Conditional NeF - preserves known symmetries of the PDE. We show that modelling solutions as flows of pointclouds over the group of interest $G$ improves generalization and data-efficiency. Furthermore, we validate that our framework readily generalizes to unseen spatial and temporal locations, as well as geometric transformations of the initial conditions - where other NeF-based PDE forecasting methods fail -, and improve over baselines in a number of challenging geometries.", "keywords": "pde solving;neural fields;equivariance;attention", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "/attachment/c8b58ed053a19cbaa62dbbbb9cb64e5e4383c707.zip", "author": "David M Knigge;David Wessels;Riccardo Valperga;Samuele Papa;Jan-Jakob Sonke;Erik J Bekkers;Stratis Gavves", "authorids": "~David_M_Knigge1;~David_Wessels1;~Riccardo_Valperga1;~Samuele_Papa1;~Jan-Jakob_Sonke2;~Erik_J_Bekkers1;~Stratis_Gavves1", "gender": "M;M;M;;M;;M", "homepage": "https://github.com/Dafidofff;;https://samuelepapa.github.io/;https://erikbekkers.bitbucket.io/;;;https://www.egavves.com", "dblp": ";;296/3702;43/5596;304/8106;20/4093;03/8693", "google_scholar": ";IK64D1wAAAAJ;;https://scholar.google.nl/citations?user=yeWrfR4AAAAJ;Csnj-pQAAAAJ;https://scholar.google.com/citations?hl=nl;https://scholar.google.nl/citations?user=QqfCvsgAAAAJ", "orcid": ";;;;0000-0001-5272-3313;0000-0001-5155-5274;", "linkedin": ";;samuelepapa/;;https://linkedin.com/in/david-knigge;;", "or_profile": "~David_Wessels1;~Riccardo_Valperga1;~Samuele_Papa1;~Erik_J_Bekkers1;~David_Mattanja_Knigge1;~Jan-jakob_Sonke1;~Efstratios_Gavves1", "aff": "University of Amsterdam, University of Amsterdam;University of Amsterdam;NXAI;University of Amsterdam;University of Amsterdam;University of Amsterdam;University of Amsterdam", "aff_domain": "ivi.uva.nl;uva.nl;nx-ai.com;uva.nl;uva.nl;uva.nl;uva.nl", "position": "PhD student;PhD student;Intern;Assistant Professor;PhD student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nknigge2024spacetime,\ntitle={Space-Time Continuous {PDE} Forecasting using Equivariant Neural Fields},\nauthor={David M Knigge and David Wessels and Riccardo Valperga and Samuele Papa and Jan-Jakob Sonke and Erik J Bekkers and Stratis Gavves},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=wN5AgP0DJ0}\n}", "github": "", "reviewers": "zC2e;kxDM;hu7A;i6aX;NkrS", "pdf_size": 3086414, "rating": "5;5;6;6;7", "confidence": "4;3;3;3;4", "soundness": "3;3;3;2;3", "novelty": "3;2;3;2;3", "presentation": "4;3;3;3;3", "wc_summary": "58;56;115;167;59", "wc_strengths": "52;62;31;66;63", "wc_weaknesses": "175;133;24;218;56", "wc_questions": "20;31;90;120;1", "wc_limitations": "33;1;1;2;79", "wc_review": "338;283;261;573;258", "wc_reply_reviewers": "25;29;16;19;12", "wc_reply_authors": "83;58;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;1;1;1", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 3.4, 0.4898979485566356 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 91.0, 44.02272140611028 ], "wc_strengths_avg": [ 54.8, 12.796874618437112 ], "wc_weaknesses_avg": [ 121.2, 72.25344282454644 ], "wc_questions_avg": [ 52.4, 45.01821853427788 ], "wc_limitations_avg": [ 23.2, 30.478845122477985 ], "wc_review_avg": [ 342.6, 118.72084905356768 ], "wc_reply_reviewers_avg": [ 20.2, 6.11228271597445 ], "wc_reply_authors_avg": [ 28.2, 35.431059820445675 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.21821789023599233, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9795923816411699493&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "ivi.uva.nl;uva.nl;nx-ai.com;uva.nl;uva.nl;uva.nl;uva.nl", "author_num": 7, "aff_unique_index": "0;0;1;0;0;0;0", "aff_unique_norm": "University of Amsterdam;NXAI", "aff_unique_dep": ";", "aff_unique_url": "https://www.uva.nl;", "aff_unique_abbr": "UvA;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "Netherlands;" }, { "title": "RWKU: Benchmarking Real-World Knowledge Unlearning for Large Language Models", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97449", "id": "wOmtZ5FgMH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=wOmtZ5FgMH", "openreview": "https://openreview.net/forum?id=wOmtZ5FgMH", "poster": "/media/PosterPDFs/NeurIPS%202024/97449.png?t=1731681550.4137602", "project": "", "author_site": "Zhuoran Jin, Pengfei Cao, Chenhao Wang, Zhitao He, Hongbang Yuan, Jiachun Li, Yubo Chen, Kang Liu, Jun Zhao", "tldr": "", "abstract": "Large language models (LLMs) inevitably memorize sensitive, copyrighted, and harmful knowledge from the training corpus; therefore, it is crucial to erase this knowledge from the models. Machine unlearning is a promising solution for efficiently removing specific knowledge by post hoc modifying models. In this paper, we propose a Real-World Knowledge Unlearning benchmark (RWKU) for LLM unlearning. RWKU is designed based on the following three key factors: (1) For the task setting, we consider a more practical and challenging unlearning setting, where neither the forget corpus nor the retain corpus is accessible. (2) For the knowledge source, we choose 200 real-world famous people as the unlearning targets and show that such popular knowledge is widely present in various LLMs. (3) For the evaluation framework, we design the forget set and the retain set to evaluate the model\u2019s capabilities across various real-world applications. Regarding the forget set, we provide four four membership inference attack (MIA) methods and nine kinds of adversarial attack probes to rigorously test unlearning efficacy. Regarding the retain set, we assess locality and utility in terms of neighbor perturbation, general ability, reasoning ability, truthfulness, factuality, and fluency. We conduct extensive experiments across two unlearning scenarios, two models and six baseline methods and obtain some meaningful findings. We release our benchmark and code publicly at http://rwku-bench.github.io for future work.", "keywords": "Knowledge Unlearning;Large Language Models", "primary_area": "", "supplementary_material": "/attachment/c746d5b12935a063c775271403967e0c0de8fd1b.zip", "author": "Zhuoran Jin;Pengfei Cao;Chenhao Wang;Zhitao He;Hongbang Yuan;Jiachun Li;Yubo Chen;Kang Liu;Jun Zhao", "authorids": "~Zhuoran_Jin1;~Pengfei_Cao1;~Chenhao_Wang7;~Zhitao_He1;~Hongbang_Yuan1;~Jiachun_Li3;~Yubo_Chen1;~Kang_Liu1;~Jun_Zhao4", "gender": "M;;;M;M;;M;M;M", "homepage": "https://scholar.google.com/citations?user=Am8WsCkAAAAJ;https://cpf-nlpr.github.io/;;;;;http://www.nlpr.ia.ac.cn/cip/yubochen/index.html;http://www.nlpr.ia.ac.cn/cip/~liukang/index.html;http://nlpr-web.ia.ac.cn/cip/english/~junzhao/index.html", "dblp": "320/9888;182/7941;;;320/9916.html;;https://dblp.uni-trier.de/pid/90/7879.html;42/4903.html;https://dblp.uni-trier.de/pid/47/2026-1.html", "google_scholar": "Am8WsCkAAAAJ;lP5_LJIAAAAJ;;ULvoYXgAAAAJ;https://scholar.google.com/citations?hl=en;U2SEqCIAAAAJ;https://scholar.google.com.hk/citations?user=9z7GPxIAAAAJ;DtZCfl0AAAAJ;https://scholar.google.com.hk/citations?user=HljRttwAAAAJ", "orcid": ";;;0009-0003-3317-1260;;0009-0009-4486-9975;;;", "linkedin": ";;;;;;;;", "or_profile": "~Zhuoran_Jin1;~Pengfei_Cao1;~Chenhao_Wang7;~Zhitao_He1;~Hongbang_Yuan1;~Jiachun_Li3;~Yubo_Chen1;~Kang_Liu1;~Jun_Zhao4", "aff": "Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;;Institute of Automation, Chinese Academy of Sciences;Institute of automation, Chinese Academy of Sciences;Institute of automation, Chinese academy of science;Institute of automation, Chinese academy of science;Institute of Automation, Chinese Academy of Sciences;Institute of automation, Chinese academy of science", "aff_domain": "nlpr.ia.ac.cn;ia.ac.cn;;ia.cas.cn;nlpr.ia.ac.cn;nlpr.ia.ac.cn;nlpr.ia.ac.cn;ia.ac.cn;nlpr.ia.ac.cn", "position": "PhD student;Assistant Professor;;MS student;MS student;PhD student;Associate Professor;Professor;Full Professor", "bibtex": "@inproceedings{\njin2024rwku,\ntitle={{RWKU}: Benchmarking Real-World Knowledge Unlearning for Large Language Models},\nauthor={Zhuoran Jin and Pengfei Cao and Chenhao Wang and Zhitao He and Hongbang Yuan and Jiachun Li and Yubo Chen and Kang Liu and Jun Zhao},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=wOmtZ5FgMH}\n}", "github": "", "reviewers": "nFJQ;nhq4;kTbb;7nBB", "pdf_size": 2595323, "rating": "4;7;7;7", "confidence": "4;4;3;3", "wc_summary_and_contributions": "50;49;82;88", "wc_strengths": "6;90;65;62", "wc_improvement": "6;70;186;165", "wc_limitations": "1;1;6;23", "wc_correctness": "14;1;74;13", "wc_clarity": "22;1;21;5", "wc_relation_to_prior_work": "1;1;66;7", "wc_documentation": "1;1;16;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "102;215;517;365", "wc_reply_reviewers": "0;0;16;133", "wc_reply_authors": "846;90;139;789", "reply_reviewers": "0;0;1;2", "reply_authors": "5;2;4;6", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "wc_summary_and_contributions_avg": [ 67.25, 17.879807045938723 ], "wc_strengths_avg": [ 55.75, 30.711357833869865 ], "wc_improvement_avg": [ 106.75, 72.75773154792554 ], "wc_limitations_avg": [ 7.75, 9.03811374126261 ], "wc_correctness_avg": [ 25.5, 28.464890654980568 ], "wc_clarity_avg": [ 12.25, 9.364160400164021 ], "wc_relation_to_prior_work_avg": [ 18.75, 27.38955092731533 ], "wc_documentation_avg": [ 4.75, 6.49519052838329 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 299.75, 156.31918468313478 ], "wc_reply_reviewers_avg": [ 37.25, 55.66585578251717 ], "wc_reply_authors_avg": [ 466.0, 352.50319147491416 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 4.25, 1.479019945774904 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5339119809034115317&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "nlpr.ia.ac.cn;ia.ac.cn;;ia.cas.cn;nlpr.ia.ac.cn;nlpr.ia.ac.cn;nlpr.ia.ac.cn;ia.ac.cn;nlpr.ia.ac.cn", "author_num": 9, "aff_unique_index": "0;0;0;0;0;0;0;0", "aff_unique_norm": "Chinese Academy of Sciences", "aff_unique_dep": "Institute of Automation", "aff_unique_url": "http://www.ia.cas.cn", "aff_unique_abbr": "CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Batched Energy-Entropy acquisition for Bayesian Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93157", "id": "wQiJNyPENt", "proceeding": "", "pdf": "https://openreview.net/pdf?id=wQiJNyPENt", "openreview": "https://openreview.net/forum?id=wQiJNyPENt", "poster": "", "project": "", "author_site": "Felix Teufel, Carsten Stahlhut, Jesper Ferkinghoff-Borg", "tldr": "", "abstract": "Bayesian optimization (BO) is an attractive machine learning framework for performing sample-efficient global optimization of black-box functions. The optimization process is guided by an acquisition function that selects points to acquire in each round of BO. In batched BO, when multiple points are acquired in parallel, commonly used acquisition functions are often high-dimensional and intractable, leading to the use of sampling-based alternatives. We propose a statistical physics inspired acquisition function that can natively handle batches. Batched Energy-Entropy acquisition for BO (BEEBO) enables tight control of the explore-exploit trade-off of the optimization process and generalizes to heteroskedastic black-box problems. We demonstrate the applicability of BEEBO on a range of problems, showing competitive performance to existing acquisition functions.", "keywords": "bayesian optimization;gaussian processes;acquisition;batch;parallel", "primary_area": "active_learning", "supplementary_material": "", "author": "Felix Teufel;Carsten Stahlhut;Jesper Ferkinghoff-Borg", "authorids": "~Felix_Teufel1;~Carsten_Stahlhut1;~Jesper_Ferkinghoff-Borg1", "gender": ";M;", "homepage": ";;", "dblp": "348/2081;;", "google_scholar": "pO3M3xYAAAAJ;https://scholar.google.dk/citations?user=_ug9GaoAAAAJ;https://scholar.google.co.in/citations?user=54ndJAgAAAAJ", "orcid": "0000-0003-1275-8065;;", "linkedin": ";;https://dk.linkedin.com/in/jesper-ferkinghoff-borg-8829a677", "or_profile": "~Felix_Teufel1;~Carsten_Stahlhut1;~Jesper_Ferkinghoff-Borg1", "aff": "Copenhagen University;Novo Nordisk A/S;", "aff_domain": "ku.dk;novonordisk.com;", "position": "PhD student;Principal Researcher;", "bibtex": "@inproceedings{\nteufel2024batched,\ntitle={Batched Energy-Entropy acquisition for Bayesian Optimization},\nauthor={Felix Teufel and Carsten Stahlhut and Jesper Ferkinghoff-Borg},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=wQiJNyPENt}\n}", "github": "", "reviewers": "CMG8;4oKm;x4Ge;mXVT", "pdf_size": 26113201, "rating": "3;4;5;5", "confidence": "3;3;4;4", "soundness": "2;2;3;2", "novelty": "2;2;2;2", "presentation": "3;2;2;3", "wc_summary": "99;21;41;45", "wc_strengths": "30;16;87;28", "wc_weaknesses": "216;516;544;49", "wc_questions": "56;8;54;61", "wc_limitations": "1;7;1;2", "wc_review": "402;568;727;185", "wc_reply_reviewers": "8;312;81;157", "wc_reply_authors": "0;324;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 4.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 51.5, 28.892040426387332 ], "wc_strengths_avg": [ 40.25, 27.517040175135115 ], "wc_weaknesses_avg": [ 331.25, 207.57092161475796 ], "wc_questions_avg": [ 44.75, 21.370248009791556 ], "wc_limitations_avg": [ 2.75, 2.48746859276655 ], "wc_review_avg": [ 470.5, 200.93593506389044 ], "wc_reply_reviewers_avg": [ 139.5, 112.66876230792633 ], "wc_reply_authors_avg": [ 81.0, 140.29611541307906 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.9045340337332909, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:72pUtc-rIe8J:scholar.google.com/&scioq=Batched+Energy-Entropy+acquisition+for+Bayesian+Optimization&hl=en&as_sdt=0,11", "gs_version_total": 4, "email": "ku.dk;novonordisk.com;", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "University of Copenhagen;Novo Nordisk", "aff_unique_dep": ";", "aff_unique_url": "https://www.ku.dk;https://www.novonordisk.com", "aff_unique_abbr": "UCPH;NN", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Denmark" }, { "title": "Neural Collapse Inspired Feature Alignment for Out-of-Distribution Generalization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93156", "id": "wQpNG9JnPK", "proceeding": "", "pdf": "https://openreview.net/pdf?id=wQpNG9JnPK", "openreview": "https://openreview.net/forum?id=wQpNG9JnPK", "poster": "/media/PosterPDFs/NeurIPS%202024/93156.png?t=1731559462.4830122", "project": "", "author_site": "Zhikang Chen, Min Zhang, Sen Cui, Haoxuan Li, Gang Niu, Mingming Gong, Changshui Zhang, Kun Zhang", "tldr": "", "abstract": "The spurious correlation between the background features of the image and its label arises due to that the samples labeled with the same class in the training set often co-occurs with a specific background, which will cause the encoder to extract non-semantic features for classification, resulting in poor out-of-distribution generalization performance. Although many studies have been proposed to address this challenge, the semantic and spurious features are still difficult to accurately decouple from the original image and fail to achieve high performance with deep learning models. This paper proposes a novel perspective inspired by neural collapse to solve the spurious correlation problem through the alternate execution of environment partitioning and learning semantic masks. Specifically, we propose to assign an environment to each sample by learning a local model for each environment and using maximum likelihood probability. At the same time, we require that the learned semantic mask neurally collapses to the same simplex equiangular tight frame (ETF) in each environment after being applied to the original input. We conduct extensive experiments on four datasets, and the results demonstrate that our method significantly improves out-of-distribution performance.", "keywords": "Spurious Correlation;Neural Collapse;Out of Distribution", "primary_area": "other", "supplementary_material": "", "author": "Zhikang Chen;Min Zhang;Sen Cui;Haoxuan Li;Gang Niu;Mingming Gong;Changshui Zhang;Kun Zhang", "authorids": "~Zhikang_Chen1;~Min_Zhang17;~Sen_Cui1;~Haoxuan_Li6;~Gang_Niu1;~Mingming_Gong1;~Changshui_Zhang2;~Kun_Zhang1", "gender": "M;;M;M;M;M;M;M", "homepage": ";;;https://haoxuanli-pku.github.io/;https://niug1984.github.io;https://mingming-gong.github.io/;http://bigeye.au.tsinghua.edu.cn/english/Introduction.html;http://www.andrew.cmu.edu/user/kunz1/", "dblp": ";;267/5483;145/4965-1.html;26/3367-1;98/8479;z/ChangshuiZhang;96/3115-1", "google_scholar": ";;UzQuG1UAAAAJ;gtDqiucAAAAJ;https://scholar.google.co.jp/citations?user=HOkcy00AAAAJ;https://scholar.google.com.au/citations?user=6BmiCJIAAAAJ;GL9M37YAAAAJ;RGoypN4AAAAJ", "orcid": "0009-0005-7498-5839;;;0000-0003-3620-3769;;0000-0001-7147-5589;;", "linkedin": ";;;;;;;", "or_profile": "~Zhikang_Chen1;~Min_Zhang17;~Sen_Cui1;~Haoxuan_Li6;~Gang_Niu1;~Mingming_Gong1;~Changshui_Zhang2;~Kun_Zhang1", "aff": "Tsinghua University;;Tsinghua University;Peking University;Southeast University;University of Melbourne;Tsinghua University;Carnegie Mellon University", "aff_domain": "tsinghua.edu.cn;;tsinghua.edu.cn;pku.edu.cn;seu.edu.cn;unimelb.edu.au;mail.tsinghua.edu.cn;cmu.edu", "position": "MS student;;PhD student;PhD student;Adjunct Full Professor;Assistant Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nchen2024neural,\ntitle={Neural Collapse Inspired Feature Alignment for Out-of-Distribution Generalization},\nauthor={Zhikang Chen and Min Zhang and Sen Cui and Haoxuan Li and Gang Niu and Mingming Gong and Changshui Zhang and Kun Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=wQpNG9JnPK}\n}", "github": "", "reviewers": "BNCX;F1YK;Tf9A", "pdf_size": 595866, "rating": "4;5;7", "confidence": "4;4;5", "soundness": "2;3;3", "novelty": "2;2;3", "presentation": "1;2;3", "wc_summary": "57;21;75", "wc_strengths": "12;37;43", "wc_weaknesses": "310;17;172", "wc_questions": "64;20;5", "wc_limitations": "4;1;11", "wc_review": "447;96;306", "wc_reply_reviewers": "132;0;83", "wc_reply_authors": "1323;920;0", "reply_reviewers": "3;0;1", "reply_authors": "5;4;1", "rating_avg": [ 5.333333333333333, 1.247219128924647 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.0, 0.816496580927726 ], "wc_summary_avg": [ 51.0, 22.44994432064365 ], "wc_strengths_avg": [ 30.666666666666668, 13.424687043734847 ], "wc_weaknesses_avg": [ 166.33333333333334, 119.68384277847292 ], "wc_questions_avg": [ 29.666666666666668, 25.037749277618563 ], "wc_limitations_avg": [ 5.333333333333333, 4.189935029992179 ], "wc_review_avg": [ 283.0, 144.215117099422 ], "wc_reply_reviewers_avg": [ 71.66666666666667, 54.48139335793664 ], "wc_reply_authors_avg": [ 747.6666666666666, 553.6884402702381 ], "reply_reviewers_avg": [ 1.3333333333333333, 1.247219128924647 ], "reply_authors_avg": [ 3.3333333333333335, 1.699673171197595 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.944911182523068, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=361365695665372312&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "tsinghua.edu.cn;;tsinghua.edu.cn;pku.edu.cn;seu.edu.cn;unimelb.edu.au;mail.tsinghua.edu.cn;cmu.edu", "author_num": 8, "aff_unique_index": "0;0;1;2;3;0;4", "aff_unique_norm": "Tsinghua University;Peking University;Southeast University;University of Melbourne;Carnegie Mellon University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.tsinghua.edu.cn;http://www.pku.edu.cn;https://www.seu.edu.cn/;https://www.unimelb.edu.au;https://www.cmu.edu", "aff_unique_abbr": "THU;Peking U;SEU;UniMelb;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;0;2", "aff_country_unique": "China;Australia;United States" }, { "title": "Language Without Borders: A Dataset and Benchmark for Code-Switching Lip Reading", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97448", "id": "wSCfRAAr69", "proceeding": "", "pdf": "https://openreview.net/pdf?id=wSCfRAAr69", "openreview": "https://openreview.net/forum?id=wSCfRAAr69", "poster": "", "project": "", "author_site": "Xueyi Zhang, Xueyi Zhang, Mingrui Lao, Peng Zhao, Jun Tang, Yanming Guo, Siqi Cai, Xianghu Yue, Haizhou Li", "tldr": "", "abstract": "Lip reading aims at transforming the videos of continuous lip movement into textual contents, and has achieved significant progress over the past decade. It serves as a critical yet practical assistance for speech-impaired individuals, with more practicability than speech recognition in noisy environments. With the increasing interpersonal communications in social media owing to globalization, the existing monolingual datasets for lip reading may not be sufficient to meet the exponential proliferation of bilingual and even multilingual users. However, to our best knowledge, research on code-switching is only explored in speech recognition, while the attempts in lip reading are seriously neglected. To bridge this gap, we have collected a bilingual code-switching lip reading benchmark composed of Chinese and English, dubbed CSLR. As the pioneering work, we recruited 62 speakers with proficient foundations in both\nspoken Chinese and English to express sentences containing both involved languages. Through rigorous criteria in data selection, CSLR benchmark has accumulated 85,560 video samples with a resolution of 1080x1920, totaling over 71.3 hours of high-quality code-switching lip movement data. To systematically evaluate the technical challenges in CSLR, we implement commonly-used lip reading backbones, as well as competitive solutions in code-switching speech for benchmark testing. Experiments show CSLR to be a challenging and under-explored lip reading task. We hope our proposed benchmark will extend the applicability of code-switching lip reading, and further contribute to the communities of cross-lingual communication and collaboration. Our dataset and benchmark are accessible at https://github.com/cslr-lipreading/CSLR.", "keywords": "Lip Reading;Code-Switching;Visual Speech Recognition;Multilingual", "primary_area": "", "supplementary_material": "/attachment/77662ed445229d4531cde19aab6db690f7616136.pdf", "author": "Xueyi Zhang;Chengwei Zhang;Mingrui Lao;Peng Zhao;Jun Tang;Yanming Guo;Siqi Cai;Xianghu Yue;Haizhou Li", "authorids": "~Xueyi_Zhang2;~Chengwei_Zhang4;~Mingrui_Lao1;~Peng_Zhao9;~Jun_Tang6;~Yanming_Guo2;~Siqi_Cai3;~Xianghu_Yue1;~Haizhou_Li3", "gender": "M;M;M;M;M;M;F;M;M", "homepage": "https://scholar.google.com/citations?user=-hFq7VAAAAAJ&hl=en;https://blog.csdn.net/qq_44697805;;;;https://dblp.uni-trier.de/pid/46/3423.html;;https://colips.org/~eleliha/;https://scholar.google.com/citations?user=jWNJCDIAAAAJ&hl=en", "dblp": ";;222/4779.html;;;;;36/4118;199/5796", "google_scholar": ";nwIQFuMAAAAJ;;rmzUvpkAAAAJ;;;https://scholar.google.com.hk/citations?user=ILplI3cAAAAJ;https://scholar.google.com.sg/citations?user=z8_x7C8AAAAJ;jWNJCDIAAAAJ", "orcid": ";0000-0002-3375-2458;;;0000-0001-5524-9565;;;0000-0001-9158-9401;0000-0003-3527-6034", "linkedin": ";;;;;;;haizhou-li-4ba74b6/;", "or_profile": "~Xueyi_Zhang2;~Chengwei_Zhang4;~Mingrui_Lao1;~Peng_Zhao9;~Jun_Tang6;~Yanming_Guo2;~Siqi_Cai3;~Haizhou_Li3;~Yue_Xianghu1", "aff": "National University of Defense Technology;University of Chinese Academy of Sciences;National University of Defense Technology;National University of Defense Technology;National University of Defense Technology;National University of Defense Technology;National University of Singapore;National University of Singapore;National University of Singapore", "aff_domain": "nudt.edu.cn;ucas.ac.cn;nudt.edu.cn;nudt.edu.cn;nudt.edu.cn;nudt.edu.cn;nus.edu.sg;nus.edu.sg;u.nus.edu", "position": "PhD student;PhD student;Lecturer;MS student;Associate Professor;Associate Professor;Postdoc;Full Professor;PhD student", "bibtex": "@inproceedings{\nzhang2024language,\ntitle={Language Without Borders: A Dataset and Benchmark for Code-Switching Lip Reading},\nauthor={Xueyi Zhang and Chengwei Zhang and Mingrui Lao and Peng Zhao and Jun Tang and Yanming Guo and Siqi Cai and Xianghu Yue and Haizhou Li},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=wSCfRAAr69}\n}", "github": "", "reviewers": "Rsda;o2C4;A5e4;8wJh", "pdf_size": 7651787, "rating": "6;6;7;9", "confidence": "4;3;4;5", "wc_summary_and_contributions": "72;87;150;19", "wc_strengths": "111;5;110;6", "wc_improvement": "164;4;70;61", "wc_limitations": "133;16;56;13", "wc_correctness": "57;3;111;4", "wc_clarity": "116;1;55;6", "wc_relation_to_prior_work": "153;10;65;7", "wc_documentation": "28;12;24;9", "wc_additional_feedback": "1;1;1;1", "wc_review": "835;139;642;126", "wc_reply_reviewers": "10;65;0;0", "wc_reply_authors": "103;37;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "6;5;2;2", "rating_avg": [ 7.0, 1.224744871391589 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 82.0, 46.6851154009498 ], "wc_strengths_avg": [ 58.0, 52.502380898393554 ], "wc_improvement_avg": [ 74.75, 57.408078699778834 ], "wc_limitations_avg": [ 54.5, 48.396797414705034 ], "wc_correctness_avg": [ 43.75, 44.54983164951356 ], "wc_clarity_avg": [ 44.5, 46.360004314063644 ], "wc_relation_to_prior_work_avg": [ 58.75, 59.1116528275094 ], "wc_documentation_avg": [ 18.25, 7.949056547792323 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 435.5, 310.62235914370365 ], "wc_reply_reviewers_avg": [ 18.75, 27.012728481217888 ], "wc_reply_authors_avg": [ 35.0, 42.06542523260641 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 3.75, 1.7853571071357126 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.8660254037844386, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:krYpZftDoN0J:scholar.google.com/&scioq=Language+Without+Borders:+A+Dataset+and+Benchmark+for+Code-Switching+Lip+Reading&hl=en&as_sdt=0,7", "gs_version_total": 0, "email": "nudt.edu.cn;ucas.ac.cn;nudt.edu.cn;nudt.edu.cn;nudt.edu.cn;nudt.edu.cn;nus.edu.sg;nus.edu.sg;u.nus.edu", "author_num": 9, "aff_unique_index": "0;1;0;0;0;0;2;2;2", "aff_unique_norm": "National University of Defense Technology;University of Chinese Academy of Sciences;National University of Singapore", "aff_unique_dep": ";;", "aff_unique_url": "http://www.nudt.edu.cn/;http://www.ucas.ac.cn;https://www.nus.edu.sg", "aff_unique_abbr": "NUDT;UCAS;NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;1;1;1", "aff_country_unique": "China;Singapore" }, { "title": "Pretraining Codomain Attention Neural Operators for Solving Multiphysics PDEs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93155", "id": "wSpIdUXZYX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=wSpIdUXZYX", "openreview": "https://openreview.net/forum?id=wSpIdUXZYX", "poster": "/media/PosterPDFs/NeurIPS%202024/93155.png?t=1731356507.3943808", "project": "", "author_site": "Md Ashiqur Rahman, Robert Joseph George, Mogab Elleithy, Daniel Leibovici, Zongyi Li, Boris Bonev, Colin White, Julius Berner, Raymond A. Yeh, Jean Kossaifi, Kamyar Azizzadenesheli, Animashree Anandkumar", "tldr": "", "abstract": "Existing neural operator architectures face challenges when solving multiphysics problems with coupled partial differential equations (PDEs) due to complex geometries, interactions between physical variables, and the limited amounts of high-resolution training data. \nTo address these issues, we propose *Codomain Attention Neural Operator* (CoDA-NO), which tokenizes functions along the codomain or channel space, enabling self-supervised learning or pretraining of multiple PDE systems. \nSpecifically, we extend positional encoding, self-attention, and normalization layers to function spaces. CoDA-NO can learn representations of different PDE systems with a single model. We evaluate CoDA-NO's potential as a backbone for learning multiphysics PDEs over multiple systems by considering few-shot learning settings. On complex downstream tasks with limited data, such as fluid flow simulations, fluid-structure interactions, and Rayleigh-B\u00e9nard convection, we found CoDA-NO to outperform existing methods by over 36%.", "keywords": "Neural Operator", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "", "author": "Md Ashiqur Rahman;Robert Joseph George;Mogab Elleithy;Daniel Leibovici;Zongyi Li;Boris Bonev;Colin White;Julius Berner;Raymond A. Yeh;Jean Kossaifi;Kamyar Azizzadenesheli;Anima Anandkumar", "authorids": "~Md_Ashiqur_Rahman2;~Robert_Joseph_George1;~Mogab_Elleithy1;~Daniel_Leibovici1;~Zongyi_Li1;~Boris_Bonev1;~Colin_White1;~Julius_Berner1;~Raymond_A._Yeh1;~Jean_Kossaifi1;~Kamyar_Azizzadenesheli1;~Anima_Anandkumar1", "gender": "M;M;Not Specified;M;M;M;M;M;;M;M;", "homepage": "https://ashiq24.github.io/;https://www.robertj1.com/;;;https://zongyi-li.github.io;https://bonevbs.github.io;https://crwhite.ml/;https://jberner.info/;;http://jeankossaifi.com/;https://kamyar.page/;", "dblp": "271/3154;331/8708;;;;199/1689;136/9162;227/2217;;155/6766;176/5584;", "google_scholar": "isCWj28AAAAJ;;;;;sYo-KS4AAAAJ;LS6HY-gAAAAJ;73-D2jgAAAAJ;;https://scholar.google.co.uk/citations?user=hJS2TXwAAAAJ;CxAS4SQAAAAJ;", "orcid": ";;;;;0000-0002-4041-238X;;0000-0002-5648-648X;;;;", "linkedin": ";robertljg/;mogab-elleithy-12a199129/;daniel-leibovici-63525789?utm_source=share&utm_campaign=share_via&utm_content=profile&utm_medium=ios_app;;;;julius-berner/;;;;", "or_profile": "~Md_Ashiqur_Rahman2;~Robert_Joseph_George1;~Mogab_Elleithy1;~Daniel_Leibovici1;~Zongyi_Li1;~Boris_Bonev1;~Colin_White1;~Julius_Berner1;~Raymond_A._Yeh1;~Jean_Kossaifi1;~Kamyar_Azizzadenesheli1;~Anima_Anandkumar1", "aff": "Purdue University;California Institute of Technology;California Institute of Technology;California Institute of Technology;California Institute of Technology;NVIDIA;Abacus.AI;California Institute of Technology;;NVIDIA AI;NVIDIA;", "aff_domain": "purdue.edu;cms.caltech.edu;caltech.edu;caltech.edu;caltech.edu;nvidia.com;abacus.ai;caltech.edu;;nvidia.com;nvidia.com;", "position": "PhD student;PhD student;Researcher;PhD student;PhD student;Researcher;Head of Research;Postdoc;;Researcher;Researcher;", "bibtex": "@inproceedings{\nrahman2024pretraining,\ntitle={Pretraining Codomain Attention Neural Operators for Solving Multiphysics {PDE}s},\nauthor={Md Ashiqur Rahman and Robert Joseph George and Mogab Elleithy and Daniel Leibovici and Zongyi Li and Boris Bonev and Colin White and Julius Berner and Raymond A. Yeh and Jean Kossaifi and Kamyar Azizzadenesheli and Anima Anandkumar},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=wSpIdUXZYX}\n}", "github": "", "reviewers": "ZkA5;qRRk;Jm1K;rAy3", "pdf_size": 2679916, "rating": "5;6;6;7", "confidence": "4;4;3;3", "soundness": "3;2;3;3", "novelty": "2;3;2;3", "presentation": "3;3;1;3", "wc_summary": "52;56;41;56", "wc_strengths": "115;76;57;68", "wc_weaknesses": "361;196;191;33", "wc_questions": "35;84;25;76", "wc_limitations": "5;8;124;5", "wc_review": "568;420;438;238", "wc_reply_reviewers": "145;16;196;19", "wc_reply_authors": "1990;306;98;0", "reply_reviewers": "2;1;1;1", "reply_authors": "5;2;2;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 51.25, 6.139014578904337 ], "wc_strengths_avg": [ 79.0, 21.85177338341216 ], "wc_weaknesses_avg": [ 195.25, 115.99218723689971 ], "wc_questions_avg": [ 55.0, 25.406692031825003 ], "wc_limitations_avg": [ 35.5, 51.11017511220246 ], "wc_review_avg": [ 416.0, 117.56700217322886 ], "wc_reply_reviewers_avg": [ 94.0, 78.60343503944341 ], "wc_reply_authors_avg": [ 598.5, 810.9455900367176 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 1.5 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": -0.7071067811865476, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2754229560600706253&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "purdue.edu;cms.caltech.edu;caltech.edu;caltech.edu;caltech.edu;nvidia.com;abacus.ai;caltech.edu;;nvidia.com;nvidia.com;", "author_num": 12, "aff_unique_index": "0;1;1;1;1;2;3;1;2;2", "aff_unique_norm": "Purdue University;California Institute of Technology;NVIDIA;Abacus.AI", "aff_unique_dep": ";;NVIDIA Corporation;", "aff_unique_url": "https://www.purdue.edu;https://www.caltech.edu;https://www.nvidia.com;https://www.abacus.ai", "aff_unique_abbr": "Purdue;Caltech;NVIDIA;Abacus.AI", "aff_campus_unique_index": "1;1;1;1;1", "aff_campus_unique": ";Pasadena", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "A Theoretical Perspective for Speculative Decoding Algorithm", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93154", "id": "wSqpNeMVLU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=wSqpNeMVLU", "openreview": "https://openreview.net/forum?id=wSqpNeMVLU", "poster": "", "project": "", "author_site": "Ming Yin, Minshuo Chen, Kaixuan Huang, Mengdi Wang", "tldr": "", "abstract": "Transformer-based autoregressive sampling has been the major bottleneck for slowing down large language model inferences. One effective way to accelerate inference is Speculative Decoding, which employs a small model to sample a sequence of draft tokens and a large model to validate. Given its empirical effectiveness, the theoretical understanding of Speculative Decoding is falling behind. This paper tackles this gap by conceptualizing the decoding problem via markov chain abstraction and studying the key properties, output quality and inference acceleration, from a theoretical perspective. Our analysis covers the theoretical limits of speculative decoding, batch algorithms, and output quality-inference acceleration tradeoffs. Our results reveal the fundamental connections between different components of LLMs via total variation distances and show how they jointly affect the efficiency of decoding algorithms.", "keywords": "probabilistic analysis;theory for large language models", "primary_area": "other", "supplementary_material": "/attachment/163a83c837eadac3fa32b5b9bdb52a5e2402832d.zip", "author": "Ming Yin;Minshuo Chen;Kaixuan Huang;Mengdi Wang", "authorids": "~Ming_Yin4;~Minshuo_Chen1;~Kaixuan_Huang1;~Mengdi_Wang1", "gender": "M;M;M;F", "homepage": "https://mingyin0312.github.io;https://minshuochen.github.io;https://hackyhuang.github.io/;http://mwang.princeton.edu", "dblp": "89/453.html;217/1509;;", "google_scholar": "ncBRYIUAAAAJ;qU9WvTgAAAAJ;EfxwV6oAAAAJ;", "orcid": "0000-0001-6458-0751;;;", "linkedin": ";;;", "or_profile": "~Ming_Yin4;~Minshuo_Chen1;~Kaixuan_Huang1;~Mengdi_Wang1", "aff": "Princeton University;Princeton University;Princeton University;Princeton University", "aff_domain": "princeton.edu;princeton.edu;princeton.edu;princeton.edu", "position": "Postdoc;Postdoc;PhD student;Full Professor", "bibtex": "@inproceedings{\nyin2024a,\ntitle={A Theoretical Perspective for Speculative Decoding Algorithm},\nauthor={Ming Yin and Minshuo Chen and Kaixuan Huang and Mengdi Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=wSqpNeMVLU}\n}", "github": "", "reviewers": "Ceup;qBZ2;GGsr;XUMm", "pdf_size": 1338627, "rating": "5;6;6;6", "confidence": "4;2;5;2", "soundness": "2;3;3;3", "novelty": "2;3;4;3", "presentation": "3;2;4;3", "wc_summary": "81;30;44;169", "wc_strengths": "24;56;54;38", "wc_weaknesses": "175;68;107;122", "wc_questions": "3;1;2;13", "wc_limitations": "3;24;1;7", "wc_review": "286;179;208;349", "wc_reply_reviewers": "23;12;21;22", "wc_reply_authors": "35;21;127;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 1.299038105676658 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 81.0, 54.115616969595756 ], "wc_strengths_avg": [ 43.0, 13.0 ], "wc_weaknesses_avg": [ 118.0, 38.360135557633264 ], "wc_questions_avg": [ 4.75, 4.815340071064556 ], "wc_limitations_avg": [ 8.75, 9.065732182234372 ], "wc_review_avg": [ 255.5, 66.67270805959512 ], "wc_reply_reviewers_avg": [ 19.5, 4.387482193696061 ], "wc_reply_authors_avg": [ 45.75, 48.53542520674976 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16912459662196859413&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "princeton.edu;princeton.edu;princeton.edu;princeton.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Princeton University", "aff_unique_dep": "", "aff_unique_url": "https://www.princeton.edu", "aff_unique_abbr": "Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Iterative Methods via Locally Evolving Set Process", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93153", "id": "wT2KhEb97a", "proceeding": "", "pdf": "https://openreview.net/pdf?id=wT2KhEb97a", "openreview": "https://openreview.net/forum?id=wT2KhEb97a", "poster": "/media/PosterPDFs/NeurIPS%202024/93153.png?t=1731699259.3470106", "project": "", "author_site": "Baojian Zhou, Yifan Sun, Reza Babanezhad Harikandeh, Xingzhi Guo, Deqing Yang, Yanghua Xiao", "tldr": "", "abstract": "Given the damping factor $\\alpha$ and precision tolerance $\\epsilon$, \\citet{andersen2006local} introduced Approximate Personalized PageRank (APPR), the \\textit{de facto local method} for approximating the PPR vector, with runtime bounded by $\\Theta(1/(\\alpha\\epsilon))$ independent of the graph size. Recently, Fountoulakis \\& Yang asked whether faster local algorithms could be developed using $\\tilde{\\mathcal{O}}(1/(\\sqrt{\\alpha}\\epsilon))$ operations. By noticing that APPR is a local variant of Gauss-Seidel, this paper explores the question of *whether standard iterative solvers can be effectively localized*. We propose to use the *locally evolving set process*, a novel framework to characterize the algorithm locality, and demonstrate that many standard solvers can be effectively localized. Let $\\overline{\\operatorname{vol}}{ (\\mathcal S_t)}$ and $\\overline{\\gamma_t}$ be the running average of volume and the residual ratio of active nodes $\\textstyle \\mathcal{S_t}$ during the process. We show $\\overline{\\operatorname{vol}}{ (\\mathcal S_t)}/\\overline{\\gamma_t} \\leq 1/\\epsilon$ and prove APPR admits a new runtime bound $\\tilde{\\mathcal{O}}(\\overline{\\operatorname{vol}}(\\mathcal S_t)/(\\alpha\\overline{\\gamma_t}))$ mirroring the actual performance. Furthermore, when the geometric mean of residual reduction is $\\Theta(\\sqrt{\\alpha})$, then there exists $c \\in (0,2)$ such that the local Chebyshev method has runtime $\\tilde{\\mathcal{O}}(\\overline{\\operatorname{vol}}(\\mathcal{S_t})/(\\sqrt{\\alpha}(2-c)))$ without the monotonicity assumption. Numerical results confirm the efficiency of this novel framework and show up to a hundredfold speedup over corresponding standard solvers on real-world graphs.", "keywords": "local computation;Personalized PageRank;graph clustering", "primary_area": "optimization", "supplementary_material": "/attachment/bbe9b3fd565b5d607aced0b1f15fafc9b5d76a21.zip", "author": "Baojian Zhou;Yifan Sun;Reza Babanezhad Harikandeh;Xingzhi Guo;Deqing Yang;Yanghua Xiao", "authorids": "~Baojian_Zhou2;~Yifan_Sun1;~Reza_Babanezhad_Harikandeh1;~Xingzhi_Guo1;~Deqing_Yang1;~Yanghua_Xiao1", "gender": "M;F;M;M;M;", "homepage": "https://baojian.github.io/;https://sites.google.com/site/yifansunwebsite/;http://babanezhad.ca;https://www.linkedin.com/in/xingzhi-guo;http://kw.fudan.edu.cn/people/yangdeqing/;", "dblp": "139/5761.html;https://dblp.uni-trier.de/pid/99/10261-1;37/8904.html;;01/2462.html;96/999", "google_scholar": "FWQHIYgAAAAJ;o3fSb1YAAAAJ;KLrwPsgAAAAJ;https://scholar.google.com/citations?hl=en;uZdQxkwAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";;;;0000-0002-1390-3861;0000-0001-8403-9591", "linkedin": ";;;;;", "or_profile": "~Baojian_Zhou2;~Yifan_Sun1;~Reza_Babanezhad_Harikandeh1;~Xingzhi_Guo1;~Deqing_Yang1;~Yanghua_Xiao1", "aff": "Fudan University;State University of New York, Stony Brook;Samsung;Amazon;Fudan University;Fudan University", "aff_domain": "fudan.edu.cn;stonybrook.edu;samsung.com;amazon.com;fudan.edu.cn;fudan.edu.cn", "position": "Assistant Professor;Assistant Professor;Research Scientist;Applied Scientist;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nzhou2024iterative,\ntitle={Iterative Methods via Locally Evolving Set Process},\nauthor={Baojian Zhou and Yifan Sun and Reza Babanezhad Harikandeh and Xingzhi Guo and Deqing Yang and Yanghua Xiao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=wT2KhEb97a}\n}", "github": "", "reviewers": "N8Gt;cyZp;oXbQ", "pdf_size": 1998577, "rating": "6;7;9", "confidence": "3;2;4", "soundness": "3;3;4", "novelty": "3;3;3", "presentation": "3;3;4", "wc_summary": "222;122;120", "wc_strengths": "62;71;47", "wc_weaknesses": "42;1;63", "wc_questions": "20;29;20", "wc_limitations": "4;1;14", "wc_review": "350;224;264", "wc_reply_reviewers": "13;0;57", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;0;1", "reply_authors": "1;1;1", "rating_avg": [ 7.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 154.66666666666666, 47.61885714247619 ], "wc_strengths_avg": [ 60.0, 9.899494936611665 ], "wc_weaknesses_avg": [ 35.333333333333336, 25.746628689770024 ], "wc_questions_avg": [ 23.0, 4.242640687119285 ], "wc_limitations_avg": [ 6.333333333333333, 5.557777333511022 ], "wc_review_avg": [ 279.3333333333333, 52.56953067657685 ], "wc_reply_reviewers_avg": [ 23.333333333333332, 24.390344173235626 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.6546536707079772, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6378282401607266563&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "fudan.edu.cn;stonybrook.edu;samsung.com;amazon.com;fudan.edu.cn;fudan.edu.cn", "author_num": 6, "aff_unique_index": "0;1;2;3;0;0", "aff_unique_norm": "Fudan University;State University of New York;Samsung;Amazon", "aff_unique_dep": ";;Samsung;Amazon.com, Inc.", "aff_unique_url": "https://www.fudan.edu.cn;https://www.stonybrook.edu;https://www.samsung.com;https://www.amazon.com", "aff_unique_abbr": "Fudan;SUNY Stony Brook;Samsung;Amazon", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stony Brook", "aff_country_unique_index": "0;1;2;1;0;0", "aff_country_unique": "China;United States;South Korea" }, { "title": "Taming the Long Tail in Human Mobility Prediction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93152", "id": "wT2TIfHKp8", "proceeding": "", "pdf": "https://openreview.net/pdf?id=wT2TIfHKp8", "openreview": "https://openreview.net/forum?id=wT2TIfHKp8", "poster": "", "project": "", "author_site": "Xiaohang Xu, Renhe Jiang, Chuang Yang, zipei fan, Kaoru Sezaki", "tldr": "", "abstract": "With the popularity of location-based services, human mobility prediction plays a key role in enhancing personalized navigation, optimizing recommendation systems, and facilitating urban mobility and planning. This involves predicting a user's next POI (point-of-interest) visit using their past visit history. However, the uneven distribution of visitations over time and space, namely the long-tail problem in spatial distribution, makes it difficult for AI models to predict those POIs that are less visited by humans. In light of this issue, we propose the $\\underline{\\bf{Lo}}$ng-$\\underline{\\bf{T}}$ail Adjusted $\\underline{\\bf{Next}}$ POI Prediction (LoTNext) framework for mobility prediction, combining a Long-Tailed Graph Adjustment module to reduce the impact of the long-tailed nodes in the user-POI interaction graph and a novel Long-Tailed Loss Adjustment module to adjust loss by logit score and sample weight adjustment strategy. Also, we employ the auxiliary prediction task to enhance generalization and accuracy. Our experiments with two real-world trajectory datasets demonstrate that LoTNext significantly surpasses existing state-of-the-art works.", "keywords": "Human Mobility;Next POI Prediction;Long-Tail Learning", "primary_area": "machine_learning_for_social_sciences", "supplementary_material": "", "author": "Xiaohang Xu;Renhe Jiang;Chuang Yang;zipei fan;Kaoru Sezaki", "authorids": "~Xiaohang_Xu1;~Renhe_Jiang1;~Chuang_Yang3;~zipei_fan1;~Kaoru_Sezaki1", "gender": ";M;M;M;M", "homepage": "https://yukayo.github.io;https://www.renhejiang.com/;https://sustc-chuangyang.github.io/;https://fanzipei.github.io/;https://www.mcl.iis.u-tokyo.ac.jp/en/", "dblp": "171/2451-2;213/1173;61/2794;129/4043;46/2349.html", "google_scholar": "k3R7EH8AAAAJ;Yo2lwasAAAAJ;Zpk004QAAAAJ;OMny2bEAAAAJ;hXYgAmEAAAAJ", "orcid": "0000-0003-1266-9943;0000-0003-2593-4638;;0000-0002-1442-1530;", "linkedin": ";renhejiang/;;;", "or_profile": "~Xiaohang_Xu1;~Renhe_Jiang1;~Chuang_Yang3;~zipei_fan1;~Kaoru_Sezaki1", "aff": "The University of Tokyo;The University of Tokyo;The University of Tokyo;Jilin University;The University of Tokyo, Tokyo Institute of Technology", "aff_domain": "g.ecc.u-tokyo.ac.jp;u-tokyo.ac.jp;u-tokyo.ac.jp;jlu.edu.cn;u-tokyo.ac.jp", "position": "PhD student;Lecturer;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nxu2024taming,\ntitle={Taming the Long Tail in Human Mobility Prediction},\nauthor={Xiaohang Xu and Renhe Jiang and Chuang Yang and zipei fan and Kaoru Sezaki},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=wT2TIfHKp8}\n}", "github": "", "reviewers": "7ppu;mZab;Et5T;JhGF", "pdf_size": 1436646, "rating": "5;6;6;7", "confidence": "4;3;5;5", "soundness": "2;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "60;112;84;92", "wc_strengths": "80;81;16;30", "wc_weaknesses": "189;60;79;41", "wc_questions": "90;29;61;66", "wc_limitations": "40;24;1;5", "wc_review": "459;306;241;234", "wc_reply_reviewers": "25;27;17;17", "wc_reply_authors": "64;58;91;28", "reply_reviewers": "1;1;1;1", "reply_authors": "3;3;4;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 87.0, 18.627936010197157 ], "wc_strengths_avg": [ 51.75, 29.17511782324109 ], "wc_weaknesses_avg": [ 92.25, 57.451610073173754 ], "wc_questions_avg": [ 61.5, 21.73131381210073 ], "wc_limitations_avg": [ 17.5, 15.628499608087784 ], "wc_review_avg": [ 310.0, 90.49033097519315 ], "wc_reply_reviewers_avg": [ 21.5, 4.55521678957215 ], "wc_reply_authors_avg": [ 60.25, 22.38721733489895 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.0, 0.7071067811865476 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.4264014327112209, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12646088770995920778&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "g.ecc.u-tokyo.ac.jp;u-tokyo.ac.jp;u-tokyo.ac.jp;jlu.edu.cn;u-tokyo.ac.jp", "author_num": 5, "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "University of Tokyo;Jilin University", "aff_unique_dep": ";", "aff_unique_url": "https://www.u-tokyo.ac.jp;http://www.jlu.edu.cn", "aff_unique_abbr": "UTokyo;JLU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Tokyo", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "Japan;China" }, { "title": "Aligning Vision Models with Human Aesthetics in Retrieval: Benchmarks and Algorithms", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93151", "id": "wT5AgMVkaJ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=wT5AgMVkaJ", "openreview": "https://openreview.net/forum?id=wT5AgMVkaJ", "poster": "/media/PosterPDFs/NeurIPS%202024/93151.png?t=1730892001.9619167", "project": "", "author_site": "Miaosen Zhang, Yixuan Wei, Zhen Xing, Yifei Ma, Zuxuan Wu, Ji Li, Zheng Zhang, Qi Dai, Chong Luo, Xin Geng, Baining Guo", "tldr": "", "abstract": "Modern vision models are trained on very large noisy datasets. While these models acquire strong capabilities, they may not follow the user's intent to output the desired results in certain aspects, e.g., visual aesthetic, preferred style, and responsibility. In this paper, we target the realm of visual aesthetics and aim to align vision models with human aesthetic standards in a retrieval system. Advanced retrieval systems usually adopt a cascade of aesthetic models as re-rankers or filters, which are limited to low-level features like saturation and perform poorly when stylistic, cultural or knowledge contexts are involved. We find that utilizing the reasoning ability of large language models (LLMs) to rephrase the search query and extend the aesthetic expectations can make up for this shortcoming. Based on the above findings, we propose a preference-based reinforcement learning method that fine-tunes the vision models to distill the knowledge from both LLMs reasoning and the aesthetic models to better align the vision models with human aesthetics. Meanwhile, with rare benchmarks designed for evaluating retrieval systems, we leverage large multi-modality model (LMM) to evaluate the aesthetic performance with their strong abilities. As aesthetic assessment is one of the most subjective tasks, to validate the robustness of LMM, we further propose a novel dataset named HPIR to benchmark the alignment with human aesthetics. Experiments demonstrate that our method significantly enhances the aesthetic behaviors of the vision models, under several metrics. We believe the proposed algorithm can be a general practice for aligning vision models with human values.", "keywords": "Image retrieval;Alignment;Aesthetics;Vision-Language Models", "primary_area": "machine_vision", "supplementary_material": "", "author": "Miaosen Zhang;Yuxing Wei;Zhen Xing;Yifei Ma;Zuxuan Wu;Ji Li;Zheng Zhang;Qi Dai;Chong Luo;Xin Geng;Baining Guo", "authorids": "~Miaosen_Zhang1;~Yuxing_Wei1;~Zhen_Xing2;~Yifei_Ma2;~Zuxuan_Wu1;~Ji_Li7;~Zheng_Zhang4;~Qi_Dai4;~Chong_Luo1;~Xin_Geng1;~Baining_Guo1", "gender": "M;;M;;M;Not Specified;;M;F;M;M", "homepage": "http://palm.seu.edu.cn/homepage/zhangmiaosen/index.html;;https://chenhsing.github.io/;;https://zxwu.azurewebsites.net/;https://sites.google.com/view/ji-li-homepage/;;;https://www.microsoft.com/en-us/research/people/cluo/;http://palm.seu.edu.cn/xgeng/index.htm;https://www.microsoft.com/en-us/research/people/bainguo/", "dblp": "297/3016;;26/9542;;150/8447;98/2427-6.html;;35/5587-1.html;79/3712;;", "google_scholar": ";;yuiXa5EAAAAJ;;7t12hVkAAAAJ;https://scholar.google.com/citations?hl=en;nZ_PVbsAAAAJ;NSJY12IAAAAJ;01iBf38AAAAJ;ZOCxkIcAAAAJ;h4kYmRYAAAAJ", "orcid": ";;;;;0000-0003-4699-084X;;;0000-0003-0939-474X;;", "linkedin": ";;;yifei-ma-008b30126/;;uscjili/;;;;;", "or_profile": "~Miaosen_Zhang1;~Yuxing_Wei1;~Zhen_Xing2;~Yifei_Ma2;~Zuxuan_Wu1;~Ji_Li7;~Zheng_Zhang4;~Qi_Dai4;~Chong_Luo1;~Xin_Geng1;~Baining_Guo1", "aff": "Southeast University;;Fudan University;Microsoft;Fudan University;Microsoft;Microsoft;Microsoft Research Asia;Microsoft Research Asia;Southeast University, China;Microsoft Research", "aff_domain": "seu.edu.cn;;fudan.edu.cn;microsoft.com;fudan.edu;microsoft.com;microsoft.com;microsoft.com;microsoft.com;seu.edu.cn;microsoft.com", "position": "PhD student;;PhD student;Researcher;Associate Professor;Principal Researcher;Researcher;Researcher;Principal Researcher;Professor;Researcher", "bibtex": "@inproceedings{\nzhang2024aligning,\ntitle={Aligning Vision Models with Human Aesthetics in Retrieval: Benchmarks and Algorithms},\nauthor={Miaosen Zhang and Yuxing Wei and Zhen Xing and Yifei Ma and Zuxuan Wu and Ji Li and Zheng Zhang and Qi Dai and Chong Luo and Xin Geng and Baining Guo},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=wT5AgMVkaJ}\n}", "github": "", "reviewers": "hvvN;9cZ7;CcyJ;amTt", "pdf_size": 4636407, "rating": "5;5;6;7", "confidence": "4;2;3;5", "soundness": "3;3;3;4", "novelty": "3;3;3;3", "presentation": "3;3;2;3", "wc_summary": "64;68;36;58", "wc_strengths": "42;71;101;82", "wc_weaknesses": "25;113;87;129", "wc_questions": "62;30;59;46", "wc_limitations": "13;38;46;7", "wc_review": "206;320;329;322", "wc_reply_reviewers": "0;29;0;25", "wc_reply_authors": "0;29;0;24", "reply_reviewers": "0;1;0;1", "reply_authors": "1;2;1;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 56.5, 12.359207094308275 ], "wc_strengths_avg": [ 74.0, 21.365860619221497 ], "wc_weaknesses_avg": [ 88.5, 39.607448794387146 ], "wc_questions_avg": [ 49.25, 12.636751956100111 ], "wc_limitations_avg": [ 26.0, 16.38596960817394 ], "wc_review_avg": [ 294.25, 51.06062573059598 ], "wc_reply_reviewers_avg": [ 13.5, 13.573871960498227 ], "wc_reply_authors_avg": [ 13.25, 13.36740438529485 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.674199862463242, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13992724091801588272&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "seu.edu.cn;;fudan.edu.cn;microsoft.com;fudan.edu;microsoft.com;microsoft.com;microsoft.com;microsoft.com;seu.edu.cn;microsoft.com", "author_num": 11, "aff_unique_index": "0;1;2;1;2;2;2;2;0;2", "aff_unique_norm": "Southeast University;Fudan University;Microsoft", "aff_unique_dep": ";;Microsoft Corporation", "aff_unique_url": "https://www.seu.edu.cn/;https://www.fudan.edu.cn;https://www.microsoft.com", "aff_unique_abbr": "SEU;Fudan;Microsoft", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;0;1;0;1;1;0;0;0;1", "aff_country_unique": "China;United States" }, { "title": "Enhancing In-Context Learning Performance with just SVD-Based Weight Pruning: A Theoretical Perspective", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93150", "id": "wT6GHk5ShC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=wT6GHk5ShC", "openreview": "https://openreview.net/forum?id=wT6GHk5ShC", "poster": "/media/PosterPDFs/NeurIPS%202024/93150.png?t=1729776438.2620609", "project": "", "author_site": "Xinhao Yao, Xiaolin Hu, Shenzhi Yang, Yong Liu", "tldr": "", "abstract": "Pre-trained large language models (LLMs) based on Transformer have demonstrated striking in-context learning (ICL) abilities. With a few demonstration input-label pairs, they can predict the label for an unseen input without any parameter updates. In this paper, we show an exciting phenomenon that SVD-based weight pruning can enhance ICL performance, and more surprising, pruning weights in deep layers often results in more stable performance improvements than in shallow layers. However, the underlying mechanism of those findings still remains an open question. To reveal those findings, we conduct an in-depth theoretical analysis by presenting the implicit gradient descent (GD) trajectories of ICL and giving the mutual information based generalization bounds of ICL via full implicit GD trajectories. This helps us reasonably explain the surprising experimental findings. Besides, based on all our experimental and theoretical insights, we intuitively propose a simple, model-compression and derivative-free algorithm for downstream tasks in enhancing ICL inference. Experiments on benchmark datasets and open source LLMs display the method effectiveness.", "keywords": "Large language models;In-Context Learning;SVD;Theoretical generalization bounds", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Xinhao Yao;Xiaolin Hu;Shenzhi Yang;Yong Liu", "authorids": "~Xinhao_Yao1;~Xiaolin_Hu6;~Shenzhi_Yang2;~Yong_Liu7", "gender": "M;M;M;M", "homepage": "https://chen123ctrls.github.io/xhy.github.io/;https://www.xiaolinhu.art/;https://summerfall1819.github.io/;https://iie-liuyong.github.io", "dblp": "379/6132.html;60/6028-3.html;;29/4867-18", "google_scholar": "https://scholar.google.com.hk/citations?user=_RwlAhUAAAAJ;6CSzbVEAAAAJ;;vVhmzbAAAAAJ", "orcid": ";0009-0002-5493-5779;;0000-0002-6739-621X", "linkedin": ";;;", "or_profile": "~Xinhao_Yao1;~Xiaolin_Hu6;~Shenzhi_Yang2;~Yong_Liu7", "aff": "Renmin University of China;Renmin University of China;Renmin University of China;Renmin University of China", "aff_domain": "ruc.edu.cn;ruc.edu.cn;ruc.edu.cn;ruc.edu.cn", "position": "Undergrad student;PhD student;Undergrad student;Associate Professor", "bibtex": "@inproceedings{\nyao2024enhancing,\ntitle={Enhancing In-Context Learning Performance with just {SVD}-Based Weight Pruning: A Theoretical Perspective},\nauthor={Xinhao Yao and Xiaolin Hu and Shenzhi Yang and Yong Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=wT6GHk5ShC}\n}", "github": "", "reviewers": "CqwV;Nxqr;Zh6a;itEx", "pdf_size": 1022027, "rating": "5;5;5;5", "confidence": "3;3;3;3", "soundness": "3;2;3;3", "novelty": "3;2;3;3", "presentation": "2;1;2;2", "wc_summary": "52;74;57;65", "wc_strengths": "57;48;43;110", "wc_weaknesses": "221;190;92;46", "wc_questions": "30;70;208;65", "wc_limitations": "5;8;65;1", "wc_review": "365;390;465;287", "wc_reply_reviewers": "435;59;83;20", "wc_reply_authors": "1148;203;202;51", "reply_reviewers": "2;2;1;1", "reply_authors": "4;4;2;2", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 1.75, 0.4330127018922193 ], "wc_summary_avg": [ 62.0, 8.336666000266533 ], "wc_strengths_avg": [ 64.5, 26.744158240632665 ], "wc_weaknesses_avg": [ 137.25, 71.0118828084427 ], "wc_questions_avg": [ 93.25, 68.01975815893496 ], "wc_limitations_avg": [ 19.75, 26.242856170775315 ], "wc_review_avg": [ 376.75, 63.55460250839431 ], "wc_reply_reviewers_avg": [ 149.25, 166.5028152915139 ], "wc_reply_authors_avg": [ 401.0, 435.69312594990527 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 3.0, 1.0 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5523716100631685398&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "ruc.edu.cn;ruc.edu.cn;ruc.edu.cn;ruc.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Renmin University of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ruc.edu.cn", "aff_unique_abbr": "RUC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Probabilistic Weather Forecasting with Hierarchical Graph Neural Networks", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93149", "id": "wTIzpqX121", "proceeding": "", "pdf": "https://openreview.net/pdf?id=wTIzpqX121", "openreview": "https://openreview.net/forum?id=wTIzpqX121", "poster": "/media/PosterPDFs/NeurIPS%202024/93149.png?t=1732893654.897938", "project": "", "author_site": "Joel Oskarsson, Tomas Landelius, Marc Deisenroth, Fredrik Lindsten", "tldr": "", "abstract": "In recent years, machine learning has established itself as a powerful tool for high-resolution weather forecasting. While most current machine learning models focus on deterministic forecasts, accurately capturing the uncertainty in the chaotic weather system calls for probabilistic modeling. We propose a probabilistic weather forecasting model called Graph-EFM, combining a flexible latent-variable formulation with the successful graph-based forecasting framework. The use of a hierarchical graph construction allows for efficient sampling of spatially coherent forecasts. Requiring only a single forward pass per time step, Graph-EFM allows for fast generation of arbitrarily large ensembles. We experiment with the model on both global and limited area forecasting. Ensemble forecasts from Graph-EFM achieve equivalent or lower errors than comparable deterministic models, with the added benefit of accurately capturing forecast uncertainty.", "keywords": "weather forecasting;graph neural network;probabilistic;ensemble forecasting;latent variable model;earth system modeling", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "", "author": "Joel Oskarsson;Tomas Landelius;Marc Peter Deisenroth;Fredrik Lindsten", "authorids": "~Joel_Oskarsson1;~Tomas_Landelius1;~Marc_Peter_Deisenroth1;~Fredrik_Lindsten1", "gender": "M;M;M;M", "homepage": "https://joeloskarsson.github.io/;;https://lindsten.netlify.app/;https://deisenroth.cc", "dblp": "322/4002;;22/8369;76/5043", "google_scholar": "YQaxGpkAAAAJ;;RfTyvdsAAAAJ;https://scholar.google.co.uk/citations?user=GDabimYAAAAJ", "orcid": "0000-0002-8201-0282;0000-0002-3155-5696;;", "linkedin": ";;;", "or_profile": "~Joel_Oskarsson1;~Tomas_Landelius1;~Fredrik_Lindsten1;~Marc_Deisenroth1", "aff": "Link\u00f6ping University;SMHI;Link\u00f6ping University;Alan Turing Institute", "aff_domain": "liu.se;smhi.se;liu.se;turing.ac.uk", "position": "PhD student;Researcher;Associate Professor;Full Professor", "bibtex": "@inproceedings{\noskarsson2024probabilistic,\ntitle={Probabilistic Weather Forecasting with Hierarchical Graph Neural Networks},\nauthor={Joel Oskarsson and Tomas Landelius and Marc Peter Deisenroth and Fredrik Lindsten},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=wTIzpqX121}\n}", "github": "", "reviewers": "PLfK;i1dL;LjpL;568u", "pdf_size": 42525674, "rating": "5;6;7;10", "confidence": "4;5;3;5", "soundness": "3;3;3;4", "novelty": "2;3;3;4", "presentation": "2;2;3;4", "wc_summary": "120;39;73;46", "wc_strengths": "74;77;47;42", "wc_weaknesses": "713;29;43;8", "wc_questions": "61;162;41;43", "wc_limitations": "137;2;7;9", "wc_review": "1105;309;211;148", "wc_reply_reviewers": "810;69;4;0", "wc_reply_authors": "1852;144;0;0", "reply_reviewers": "2;1;1;0", "reply_authors": "5;2;1;1", "rating_avg": [ 7.0, 1.8708286933869707 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 69.5, 31.80015723231569 ], "wc_strengths_avg": [ 60.0, 15.636495771111889 ], "wc_weaknesses_avg": [ 198.25, 297.4519919247474 ], "wc_questions_avg": [ 76.75, 49.83159138538524 ], "wc_limitations_avg": [ 38.75, 56.78192934376217 ], "wc_review_avg": [ 443.25, 386.34464859759606 ], "wc_reply_reviewers_avg": [ 220.75, 341.3043912697286 ], "wc_reply_authors_avg": [ 499.0, 783.3639001128403 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.25, 1.6393596310755 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.3223291856101521, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11914738598363813307&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 4, "email": "liu.se;smhi.se;liu.se;turing.ac.uk", "author_num": 4, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Link\u00f6ping University;Swedish Meteorological and Hydrological Institute;Alan Turing Institute", "aff_unique_dep": ";;", "aff_unique_url": "https://www.liu.se;https://www.smhi.se;https://www.turing.ac.uk", "aff_unique_abbr": "LiU;SMHI;ATI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "Sweden;United Kingdom" }, { "title": "Neural Experts: Mixture of Experts for Implicit Neural Representations", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93148", "id": "wWguwYhpAY", "proceeding": "", "pdf": "https://openreview.net/pdf?id=wWguwYhpAY", "openreview": "https://openreview.net/forum?id=wWguwYhpAY", "poster": "", "project": "", "author_site": "Yizhak Ben-Shabat, Chamin Hewa Koneputugodage, Sameera Ramasinghe, Stephen Gould", "tldr": "", "abstract": "Implicit neural representations (INRs) have proven effective in various tasks including image, shape, audio, and video reconstruction. These INRs typically learn the implicit field from sampled input points. This is often done using a single network for the entire domain, imposing many global constraints on a single function. \nIn this paper, we propose a mixture of experts (MoE) implicit neural representation approach that enables learning local piece-wise continuous functions that simultaneously learns to subdivide the domain and fit it locally. \nWe show that incorporating a mixture of experts architecture into existing INR formulations provides a boost in speed, accuracy, and memory requirements. Additionally, we introduce novel conditioning and pretraining methods for the gating network that improves convergence to the desired solution. \nWe evaluate the effectiveness of our approach on multiple reconstruction tasks, including surface reconstruction, image reconstruction, and audio signal reconstruction and show improved performance compared to non-MoE methods. Code is available at our project page https://sitzikbs.github.io/neural-experts-projectpage/ .", "keywords": "Implicit Neural Representation;Surface Reconstruction;Mixture of Experts", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/72038e325e14a2f62f6a366c62dc86c907ddec63.zip", "author": "Yizhak Ben-Shabat;Chamin P Hewa Koneputugodage;Sameera Ramasinghe;Stephen Gould", "authorids": "~Yizhak_Ben-Shabat1;~Chamin_P_Hewa_Koneputugodage1;~Sameera_Ramasinghe1;~Stephen_Gould1", "gender": "M;M;M;M", "homepage": "https://www.itzikbs.com;;;http://users.cecs.anu.edu.au/~sgould/", "dblp": "195/6247;248/8306;181/4514;89/1569.html", "google_scholar": "FNn1MBMAAAAJ;https://scholar.google.com.au/citations?user=ct40eqsAAAAJ;https://scholar.google.com.au/citations?user=-j0m9aMAAAAJ;YvdzeM8AAAAJ", "orcid": "0000-0001-7547-7493;0009-0000-1333-5967;;0000-0001-8929-7899", "linkedin": "yizhak-itzik-ben-shabat-67b3b1b7/;chamin-hewa-koneputugodage-b3ba17148/;;", "or_profile": "~Yizhak_Ben-Shabat1;~Chamin_P_Hewa_Koneputugodage1;~Sameera_Ramasinghe1;~Stephen_Gould1", "aff": "Technion - Israel Institute of Technology, Technion;Australian National University;Amazon;Australian National University", "aff_domain": "technion.ac.il;anu.edu.au;amazon.com;anu.edu.au", "position": "Researcher;PhD student;Researcher;Full Professor", "bibtex": "@inproceedings{\nben-shabat2024neural,\ntitle={Neural Experts: Mixture of Experts for Implicit Neural Representations},\nauthor={Yizhak Ben-Shabat and Chamin P Hewa Koneputugodage and Sameera Ramasinghe and Stephen Gould},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=wWguwYhpAY}\n}", "github": "", "reviewers": "ekS9;ij4R;VtKc;aVob", "pdf_size": 35633215, "rating": "5;5;6;6", "confidence": "5;4;4;3", "soundness": "2;1;2;3", "novelty": "2;1;3;3", "presentation": "3;3;2;3", "wc_summary": "55;97;102;69", "wc_strengths": "54;53;70;56", "wc_weaknesses": "211;232;428;104", "wc_questions": "100;36;71;25", "wc_limitations": "16;9;7;7", "wc_review": "436;427;678;261", "wc_reply_reviewers": "91;0;54;110", "wc_reply_authors": "265;0;417;0", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;2;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.0, 0.7071067811865476 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 80.75, 19.472737352514155 ], "wc_strengths_avg": [ 58.25, 6.869315832017043 ], "wc_weaknesses_avg": [ 243.75, 116.92812963525928 ], "wc_questions_avg": [ 58.0, 29.605742686174924 ], "wc_limitations_avg": [ 9.75, 3.6996621467371855 ], "wc_review_avg": [ 450.5, 148.6850698624445 ], "wc_reply_reviewers_avg": [ 63.75, 41.95458854523543 ], "wc_reply_authors_avg": [ 170.5, 178.76870531499634 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.7071067811865475, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:FQLzDI5tuWsJ:scholar.google.com/&scioq=Neural+Experts:+Mixture+of+Experts+for+Implicit+Neural+Representations&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "technion.ac.il;anu.edu.au;amazon.com;anu.edu.au", "author_num": 4, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "Technion - Israel Institute of Technology;Australian National University;Amazon", "aff_unique_dep": ";;Amazon.com, Inc.", "aff_unique_url": "https://www.technion.ac.il;https://www.anu.edu.au;https://www.amazon.com", "aff_unique_abbr": "Technion;ANU;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;1", "aff_country_unique": "Israel;Australia;United States" }, { "title": "Reflective Multi-Agent Collaboration based on Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93147", "id": "wWiAR5mqXq", "proceeding": "", "pdf": "https://openreview.net/pdf?id=wWiAR5mqXq", "openreview": "https://openreview.net/forum?id=wWiAR5mqXq", "poster": "/media/PosterPDFs/NeurIPS%202024/93147.png?t=1731132397.331683", "project": "", "author_site": "Xiaohe Bo, Zeyu Zhang, Quanyu Dai, Xueyang Feng, Lei Wang, Rui Li, Xu Chen, Ji-Rong Wen", "tldr": "", "abstract": "Benefiting from the powerful language expression and planning capabilities of Large Language Models (LLMs), LLM-based autonomous agents have achieved promising performance in various downstream tasks. Recently, based on the development of single-agent systems, researchers propose to construct LLM-based multi-agent systems to tackle more complicated tasks. In this paper, we propose a novel framework, named COPPER, to enhance the collaborative capabilities of LLM-based agents with the self-reflection mechanism. To improve the quality of reflections, we propose to fine-tune a shared reflector, which automatically tunes the prompts of actor models using our counterfactual PPO mechanism. On the one hand, we propose counterfactual rewards to assess the contribution of a single agent\u2019s reflection within the system, alleviating the credit assignment problem. On the other hand, we propose to train a shared reflector, which enables the reflector to generate personalized reflections according to agent roles, while reducing the computational resource requirements and improving training stability. We conduct experiments on three datasets to evaluate the performance of our model in multi-hop question answering, mathematics, and chess scenarios. Experimental results show that COPPER possesses stronger reflection capabilities and exhibits excellent generalization performance across different actor models.", "keywords": "Large Language Models;Multi-Agent Systems;Reflection Mechanism", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Xiaohe Bo;Zeyu Zhang;Quanyu Dai;Xueyang Feng;Lei Wang;Rui Li;Xu Chen;Ji-Rong Wen", "authorids": "~Xiaohe_Bo1;~Zeyu_Zhang6;~Quanyu_Dai1;~Xueyang_Feng1;~Lei_Wang46;~Rui_Li16;~Xu_Chen13;~Ji-Rong_Wen1", "gender": "F;M;M;M;M;;M;M", "homepage": ";https://zeyu-zhang.cn;;https://github.com/XueyangFeng;https://paitesanshi.github.io/;https://github.com/rui9812;https://gsai.ruc.edu.cn/chenxu;https://gsai.ruc.edu.cn/english/jrwen", "dblp": "353/7419.html;44/8352-200.html;210/1089;;181/2817-198;96/4282-86.html;83/6331-17;w/JRWen", "google_scholar": "jGxytu8AAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;;E6NenUwAAAAJ;UlIbtTkAAAAJ;loPoqy0AAAAJ;tbxCHJgAAAAJ", "orcid": ";0000-0003-0048-1687;0000-0001-7578-2738;;0009-0002-7769-6918;0009-0005-0625-6802;0000-0003-0144-1775;0000-0002-9777-9676", "linkedin": ";;;;;;;", "or_profile": "~Xiaohe_Bo1;~Zeyu_Zhang6;~Quanyu_Dai1;~Xueyang_Feng1;~Lei_Wang46;~Rui_Li16;~Xu_Chen13;~Ji-Rong_Wen1", "aff": "Beijing Normal University;Renmin University of China;Huawei Technologies Ltd.;Renmin University of China;Renmin University of China;Renmin University of China;Renmin University of China;Renmin University of China", "aff_domain": "bnu.edu.cn;ruc.edu.cn;huawei.com;ruc.edu.cn;ruc.edu.cn;ruc.edu.cn;ruc.edu.cn;ruc.edu.cn", "position": "Undergrad student;MS student;Researcher;PhD student;PhD student;PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nbo2024reflective,\ntitle={Reflective Multi-Agent Collaboration based on Large Language Models},\nauthor={Xiaohe Bo and Zeyu Zhang and Quanyu Dai and Xueyang Feng and Lei Wang and Rui Li and Xu Chen and Ji-Rong Wen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=wWiAR5mqXq}\n}", "github": "", "reviewers": "tdnV;ogh3;gdf9", "pdf_size": 1539508, "rating": "4;6;7", "confidence": "4;3;3", "soundness": "2;3;3", "novelty": "2;2;3", "presentation": "3;3;3", "wc_summary": "87;51;59", "wc_strengths": "31;35;76", "wc_weaknesses": "122;82;100", "wc_questions": "2;49;82", "wc_limitations": "1;18;9", "wc_review": "243;235;326", "wc_reply_reviewers": "16;66;14", "wc_reply_authors": "231;261;20", "reply_reviewers": "1;1;1", "reply_authors": "4;3;2", "rating_avg": [ 5.666666666666667, 1.247219128924647 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 65.66666666666667, 15.4344492037203 ], "wc_strengths_avg": [ 47.333333333333336, 20.33606539022619 ], "wc_weaknesses_avg": [ 101.33333333333333, 16.35712552851373 ], "wc_questions_avg": [ 44.333333333333336, 32.826141344293816 ], "wc_limitations_avg": [ 9.333333333333334, 6.944222218666553 ], "wc_review_avg": [ 268.0, 41.14203041497426 ], "wc_reply_reviewers_avg": [ 32.0, 24.055491403558285 ], "wc_reply_authors_avg": [ 170.66666666666666, 107.23908905908435 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.0, 0.816496580927726 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.9449111825230683, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15131065470666892700&as_sdt=5,31&sciodt=0,31&hl=en", "gs_version_total": 2, "email": "bnu.edu.cn;ruc.edu.cn;huawei.com;ruc.edu.cn;ruc.edu.cn;ruc.edu.cn;ruc.edu.cn;ruc.edu.cn", "author_num": 8, "aff_unique_index": "0;1;2;1;1;1;1;1", "aff_unique_norm": "Beijing Normal University;Renmin University of China;Huawei", "aff_unique_dep": ";;Huawei Technologies", "aff_unique_url": "https://www.bnu.edu.cn;http://www.ruc.edu.cn;https://www.huawei.com", "aff_unique_abbr": "BNU;RUC;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "A Sober Look at the Robustness of CLIPs to Spurious Features", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93146", "id": "wWyumwEYV8", "proceeding": "", "pdf": "https://openreview.net/pdf?id=wWyumwEYV8", "openreview": "https://openreview.net/forum?id=wWyumwEYV8", "poster": "/media/PosterPDFs/NeurIPS%202024/93146.png?t=1731323426.217694", "project": "", "author_site": "Qizhou Wang, Yong Lin, Yongqiang Chen, Ludwig Schmidt, Bo Han, Tong Zhang", "tldr": "", "abstract": "Large vision language models, such as CLIP, demonstrate impressive robustness to spurious features than single-modal models trained on ImageNet. However, existing test datasets are typically curated based on ImageNet-trained models, which aim to capture the spurious features inherited in ImageNet. Benchmarking CLIP models based on the ImageNet-oriented spurious features may not be sufficient to reflect the extent to which CLIP models are robust to spurious correlations within CLIP training data, e.g., LAION. To this end, we craft a new challenging dataset named CounterAnimal designed to reveal the reliance of CLIP models on realistic spurious features. Specifically, we split animal photos into groups according to the backgrounds, and then identify a pair of groups for each class where a CLIP model shows high-performance drops across the two groups. Our evaluations show that the spurious features captured by CounterAnimal are generically learned by CLIP models with different backbones and pre-train data, yet have limited influence for ImageNet models. We provide theoretical insights that the CLIP objective cannot offer additional robustness. Furthermore, we also re-evaluate strategies such as scaling up parameters and high-quality pre-trained data. We find that they still help mitigate the spurious features, providing a promising path for future developments.", "keywords": "CLIP;Distribution Shift", "primary_area": "machine_vision", "supplementary_material": "", "author": "Qizhou Wang;Yong Lin;Yongqiang Chen;Ludwig Schmidt;Bo Han;Tong Zhang", "authorids": "~Qizhou_Wang1;~Yong_Lin2;~Yongqiang_Chen1;~Ludwig_Schmidt1;~Bo_Han1;~Tong_Zhang2", "gender": ";;;M;;M", "homepage": ";;https://lfhase.win;http://people.csail.mit.edu/ludwigs/;;http://tongzhang-ml.org", "dblp": ";;76/5774-2;141/2720;;07/4227-1", "google_scholar": ";;huQ_Ig8AAAAJ;SWMKy70AAAAJ;;LurWtuYAAAAJ", "orcid": ";;;;;0000-0002-5511-2558", "linkedin": ";;;ludwig-schmidt-87ba3612/;;", "or_profile": "~Qizhou_Wang1;~Yong_Lin2;~Yongqiang_Chen1;~Ludwig_Schmidt1;~Bo_Han1;~Tong_Zhang2", "aff": ";;Department of Computer Science and Engineering, The Chinese University of Hong Kong;University of Washington;;UIUC", "aff_domain": ";;cse.cuhk.edu.hk;washington.edu;;illinois.edu", "position": ";;PhD student;Assistant Professor;;Full Professor", "bibtex": "@inproceedings{\nwang2024a,\ntitle={A Sober Look at the Robustness of {CLIP}s to Spurious Features},\nauthor={Qizhou Wang and Yong Lin and Yongqiang Chen and Ludwig Schmidt and Bo Han and Tong Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=wWyumwEYV8}\n}", "github": "", "reviewers": "iLSC;2MXa;BaJ3;KCA4", "pdf_size": 3294639, "rating": "5;5;6;6", "confidence": "4;5;5;4", "soundness": "2;3;1;2", "novelty": "2;3;2;2", "presentation": "3;3;2;3", "wc_summary": "63;105;72;64", "wc_strengths": "44;91;44;38", "wc_weaknesses": "384;185;2122;6", "wc_questions": "5;59;25;235", "wc_limitations": "6;31;27;14", "wc_review": "502;471;2290;357", "wc_reply_reviewers": "476;217;825;147", "wc_reply_authors": "1944;195;850;235", "reply_reviewers": "2;1;2;1", "reply_authors": "5;2;4;3", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.0, 0.7071067811865476 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 76.0, 17.10263137648707 ], "wc_strengths_avg": [ 54.25, 21.358546298847212 ], "wc_weaknesses_avg": [ 674.25, 846.4851962674835 ], "wc_questions_avg": [ 81.0, 90.98351499035417 ], "wc_limitations_avg": [ 19.5, 10.012492197250394 ], "wc_review_avg": [ 905.0, 801.4508718567845 ], "wc_reply_reviewers_avg": [ 416.25, 265.9148124870068 ], "wc_reply_authors_avg": [ 806.0, 706.4598360841188 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 3.5, 1.118033988749895 ], "replies_avg": [ 32, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2141320546797681099&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": ";;cse.cuhk.edu.hk;washington.edu;;illinois.edu", "author_num": 6, "aff_unique_index": "0;1;2", "aff_unique_norm": "Chinese University of Hong Kong;University of Washington;University of Illinois Urbana-Champaign", "aff_unique_dep": "Department of Computer Science and Engineering;;", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.washington.edu;https://www illinois.edu", "aff_unique_abbr": "CUHK;UW;UIUC", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Hong Kong SAR;;Urbana-Champaign", "aff_country_unique_index": "0;1;1", "aff_country_unique": "China;United States" }, { "id": "wZ5kEOCTce", "title": "Rethinking Patch Dependence for Masked Autoencoders", "track": "main", "status": "Reject", "tldr": "", "abstract": "In this work, we examine the impact of inter-patch dependencies in the decoder of masked autoencoders (MAE) on representation learning. We decompose the decoding mechanism for masked reconstruction into self-attention between mask tokens and cross-attention between masked and visible tokens. Our findings reveal that MAE reconstructs coherent images from visible patches not through interactions between patches in the decoder but by learning a global representation within the encoder. This discovery leads us to propose a simple visual pretraining framework: cross-attention masked autoencoders (CrossMAE). This framework employs only cross-attention in the decoder to independently read out reconstructions for a small subset of masked patches from encoder outputs, yet it achieves comparable or superior performance to traditional MAE across models ranging from ViT-S to ViT-H. By its design, CrossMAE challenges the necessity of interaction between mask tokens for effective masked pretraining. Code is available [here](https://anonymous.4open.science/r/mae-cross-anon-11EB/README.md).", "keywords": "self-supervised learning;visual pre-training;representation learning", "primary_area": "machine_vision", "supplementary_material": "", "author": "Letian Fu;Long Lian;Renhao Wang;Baifeng Shi;Xudong Wang;Adam Yala;Trevor Darrell;Alexei A Efros;Ken Goldberg", "authorids": "~Letian_Fu1;~Long_Lian1;~Renhao_Wang1;~Baifeng_Shi1;~Xudong_Wang4;~Adam_Yala1;~Trevor_Darrell2;~Alexei_A_Efros1;~Ken_Goldberg1", "gender": "M;M;;;M;M;M;M;M", "homepage": "https://max-fu.github.io/;https://github.com/TonyLianLong;;https://bfshi.github.io;http://people.eecs.berkeley.edu/~xdwang/;http://adamyala.csail.mit.edu/;http://goldberg.berkeley.edu/;http://www.eecs.berkeley.edu/~efros/;https://people.eecs.berkeley.edu/~trevor/", "dblp": ";276/0012;243/7150;261/9376;;177/9396;g/KennethYGoldberg;40/6158;d/TrevorDarrell", "google_scholar": "aWot7UgAAAAJ;eOLxyqUAAAAJ;q4RlE2oAAAAJ;LBEIm8gAAAAJ;Azf07WcAAAAJ;a4unsk4AAAAJ;https://scholar.google.com.tw/citations?user=8fztli4AAAAJ;https://scholar.google.com.tw/citations?user=d97bGd8AAAAJ;https://scholar.google.com.tw/citations?user=bh-uRFMAAAAJ", "orcid": ";0000-0001-6098-189X;;;;0000-0001-9576-2590;0000-0001-6747-9499;0000-0001-5720-8070;", "linkedin": ";longlian/;;baifeng-shi-09171b188/;;;goldbergken/;alexei-efros-890736a3/;", "or_profile": "~Letian_Fu1;~Long_Lian1;~Renhao_Wang1;~Baifeng_Shi1;~Xudong_Wang4;~Adam_Yala1;~Ken_Goldberg1;~Alyosha_Efros1;~trevor_darrell1", "aff": "University of California, Berkeley;NVIDIA;University of California, Berkeley;NVIDIA;Google DeepMind;University of California, San Francisco;University of California, Berkeley;University of California, Berkeley;Electrical Engineering & Computer Science Department", "aff_domain": "berkeley.edu;nvidia.com;berkeley.edu;nvidia.com;google.com;ucsf.edu;berkeley.edu;berkeley.edu;eecs.berkeley.edu", "position": "PhD student;Intern;PhD student;Research Intern;Research Intern;Assistant Professor;Full Professor;Professor;Professor", "bibtex": "@misc{\nanonymous2024rethinking,\ntitle={Rethinking Patch Dependence for Masked Autoencoders},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=wZ5kEOCTce}\n}", "github": "", "project": "", "reviewers": "yYWM;cyeA;gKk3", "site": "https://openreview.net/forum?id=wZ5kEOCTce", "pdf_size": 6069369, "rating": "4;5;7", "confidence": "4;4;4", "soundness": "2;3;3", "novelty": "2;2;3", "presentation": "3;3;3", "wc_summary": "54;66;164", "wc_strengths": "39;73;103", "wc_weaknesses": "281;259;304", "wc_questions": "6;214;2", "wc_limitations": "44;10;22", "wc_review": "424;622;595", "wc_reply_reviewers": "0;38;27", "wc_reply_authors": "117;273;0", "reply_reviewers": "0;1;1", "reply_authors": "3;4;1", "rating_avg": [ 5.333333333333333, 1.247219128924647 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 94.66666666666667, 49.27022991173834 ], "wc_strengths_avg": [ 71.66666666666667, 26.144895401503437 ], "wc_weaknesses_avg": [ 281.3333333333333, 18.372685039360892 ], "wc_questions_avg": [ 74.0, 99.00841715059718 ], "wc_limitations_avg": [ 25.333333333333332, 14.079141387961918 ], "wc_review_avg": [ 547.0, 87.66983517721475 ], "wc_reply_reviewers_avg": [ 21.666666666666668, 15.965240019770729 ], "wc_reply_authors_avg": [ 130.0, 111.83022847155415 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 1.247219128924647 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6127487205818608225&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;0;1;2;3;0;0;4", "aff_unique_norm": "University of California, Berkeley;NVIDIA;Google;University of California, San Francisco;Electrical Engineering & Computer Science Department", "aff_unique_dep": ";NVIDIA Corporation;Google DeepMind;;Electrical Engineering & Computer Science", "aff_unique_url": "https://www.berkeley.edu;https://www.nvidia.com;https://deepmind.com;https://www.ucsf.edu;", "aff_unique_abbr": "UC Berkeley;NVIDIA;DeepMind;UCSF;", "aff_campus_unique_index": "0;0;2;0;0", "aff_campus_unique": "Berkeley;;San Francisco", "aff_country_unique_index": "0;0;0;0;1;0;0;0", "aff_country_unique": "United States;United Kingdom;" }, { "title": "Incentivizing Quality Text Generation via Statistical Contracts", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93145", "id": "wZgw4CrxwK", "proceeding": "", "pdf": "https://openreview.net/pdf?id=wZgw4CrxwK", "openreview": "https://openreview.net/forum?id=wZgw4CrxwK", "poster": "/media/PosterPDFs/NeurIPS%202024/93145.png?t=1733393262.9558287", "project": "", "author_site": "Eden Saig, Ohad Einav, Inbal Talgam-Cohen", "tldr": "", "abstract": "While the success of large language models (LLMs) increases demand for machine-generated text, current pay-per-token pricing schemes create a misalignment of incentives known in economics as moral hazard: Text-generating agents have strong incentive to cut costs by preferring a cheaper model over the cutting-edge one, and this can be done \u201cbehind the scenes\u201d since the agent performs inference internally. In this work, we approach this issue from an economic perspective, by proposing a pay-for-performance, contract-based framework for incentivizing quality. We study a principal-agent game where the agent generates text using costly inference, and the contract determines the principal\u2019s payment for the text according to an automated quality evaluation. Since standard contract theory is inapplicable when internal inference costs are unknown, we introduce cost-robust contracts. As our main theoretical contribution, we characterize optimal cost-robust contracts through a direct correspondence to optimal composite hypothesis tests from statistics, generalizing a result of Saig et al. (NeurIPS\u201923). We evaluate our framework empirically by deriving contracts for a range of objectives and LLM evaluation benchmarks, and find that cost-robust contracts sacrifice only a marginal increase in objective value compared to their cost-aware counterparts.", "keywords": "Contract Theory;Contract Design;Moral Hazard;Natural Language Generation;LLM evaluation;Hypothesis Testing", "primary_area": "algorithmic_game_theory", "supplementary_material": "/attachment/1d1c6a71a59af8f798d2cf33631637e3dd64117a.zip", "author": "Eden Saig;Ohad Einav;Inbal Talgam-Cohen", "authorids": "~Eden_Saig1;~Ohad_Einav1;~Inbal_Talgam-Cohen2", "gender": "M;M;F", "homepage": "https://edensaig.github.io/;;http://www.inbaltalgam.com/", "dblp": "209/3728;;07/8319", "google_scholar": "7DsqqK8AAAAJ;;R1YK5BsAAAAJ", "orcid": "0000-0002-0810-2218;;", "linkedin": "eden-saig/;ohadeinav/;", "or_profile": "~Eden_Saig1;~Ohad_Einav1;~Inbal_Talgam-Cohen2", "aff": "Technion - Israel Institute of Technology;Technion - Israel Institute of Technology, Technion - Israel Institute of Technology;Tel Aviv University", "aff_domain": "cs.technion.ac.il;campus.technion.ac.il;tau.ac.il", "position": "PhD student;MS student;Assistant Professor", "bibtex": "@inproceedings{\nsaig2024incentivizing,\ntitle={Incentivizing Quality Text Generation via Statistical Contracts},\nauthor={Eden Saig and Ohad Einav and Inbal Talgam-Cohen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=wZgw4CrxwK}\n}", "github": "", "reviewers": "4RXi;RQsK;Bg3E", "pdf_size": 574621, "rating": "5;5;7", "confidence": "3;3;3", "soundness": "3;3;4", "novelty": "3;1;4", "presentation": "3;3;4", "wc_summary": "102;160;189", "wc_strengths": "25;79;42", "wc_weaknesses": "56;226;149", "wc_questions": "49;2;52", "wc_limitations": "1;4;7", "wc_review": "233;471;439", "wc_reply_reviewers": "172;42;0", "wc_reply_authors": "778;44;0", "reply_reviewers": "1;1;0", "reply_authors": "2;2;1", "rating_avg": [ 5.666666666666667, 0.9428090415820634 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 1.247219128924647 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 150.33333333333334, 36.16935473881477 ], "wc_strengths_avg": [ 48.666666666666664, 22.543784558547888 ], "wc_weaknesses_avg": [ 143.66666666666666, 69.50459617096476 ], "wc_questions_avg": [ 34.333333333333336, 22.89589968143253 ], "wc_limitations_avg": [ 4.0, 2.449489742783178 ], "wc_review_avg": [ 381.0, 105.46405390779678 ], "wc_reply_reviewers_avg": [ 71.33333333333333, 73.21809126772487 ], "wc_reply_authors_avg": [ 274.0, 356.8342285525124 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5099546735893119374&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "cs.technion.ac.il;campus.technion.ac.il;tau.ac.il", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Technion - Israel Institute of Technology;Tel Aviv University", "aff_unique_dep": ";", "aff_unique_url": "https://www.technion.ac.il/en/;https://www.tau.ac.il", "aff_unique_abbr": "Technion;TAU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Israel" }, { "title": "RoPINN: Region Optimized Physics-Informed Neural Networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93144", "id": "wZigMVFURk", "proceeding": "", "pdf": "https://openreview.net/pdf?id=wZigMVFURk", "openreview": "https://openreview.net/forum?id=wZigMVFURk", "poster": "/media/PosterPDFs/NeurIPS%202024/93144.png?t=1729496720.7048914", "project": "", "author_site": "Haixu Wu, Huakun Luo, Yuezhou Ma, Jianmin Wang, Mingsheng Long", "tldr": "", "abstract": "Physics-informed neural networks (PINNs) have been widely applied to solve partial differential equations (PDEs) by enforcing outputs and gradients of deep models to satisfy target equations. Due to the limitation of numerical computation, PINNs are conventionally optimized on finite selected points. However, since PDEs are usually defined on continuous domains, solely optimizing models on scattered points may be insufficient to obtain an accurate solution for the whole domain. To mitigate this inherent deficiency of the default scatter-point optimization, this paper proposes and theoretically studies a new training paradigm as region optimization. Concretely, we propose to extend the optimization process of PINNs from isolated points to their continuous neighborhood regions, which can theoretically decrease the generalization error, especially for hidden high-order constraints of PDEs. A practical training algorithm, Region Optimized PINN (RoPINN), is seamlessly derived from this new paradigm, which is implemented by a straightforward but effective Monte Carlo sampling method. By calibrating the sampling process into trust regions, RoPINN finely balances optimization and generalization error. Experimentally, RoPINN consistently boosts the performance of diverse PINNs on a wide range of PDEs without extra backpropagation or gradient calculation. Code is available at this repository: https://github.com/thuml/RoPINN.", "keywords": "Physics-informed Neural Networks;PINN Training;Deep Learning", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "", "author": "Haixu Wu;Huakun Luo;Yuezhou Ma;Jianmin Wang;Mingsheng Long", "authorids": "~Haixu_Wu1;~Huakun_Luo1;~Yuezhou_Ma1;~Jianmin_Wang1;~Mingsheng_Long5", "gender": "M;M;M;M;M", "homepage": ";https://github.com/mayz20;https://www.thss.tsinghua.edu.cn/en/faculty/jianminwang.htm;https://luohk19.github.io;http://ise.thss.tsinghua.edu.cn/~mlong", "dblp": "286/8115;359/0553;06/3456-1.html;338/9930;74/9023", "google_scholar": "oLL_x0wAAAAJ;;https://scholar.google.com.tw/citations?user=MiovcboAAAAJ;;_MjXpXkAAAAJ", "orcid": ";;0000-0001-6841-7943;;0000-0002-5412-9120", "linkedin": ";;;;", "or_profile": "~Haixu_Wu1;~Yuezhou_Ma1;~Jianmin_Wang1;~\u534e\u5764_\u7f571;~Mingsheng_Long2", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;Undergrad student;Full Professor;Undergrad student;Associate Professor", "bibtex": "@inproceedings{\nwu2024ropinn,\ntitle={Ro{PINN}: Region Optimized Physics-Informed Neural Networks},\nauthor={Haixu Wu and Huakun Luo and Yuezhou Ma and Jianmin Wang and Mingsheng Long},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=wZigMVFURk}\n}", "github": "", "reviewers": "zNBp;KCE6;mhfV;aU22", "pdf_size": 2139317, "rating": "4;6;6;6", "confidence": "3;4;4;4", "soundness": "2;3;4;3", "novelty": "3;3;3;3", "presentation": "2;3;3;3", "wc_summary": "53;94;18;90", "wc_strengths": "12;77;12;71", "wc_weaknesses": "87;57;6;147", "wc_questions": "89;26;88;103", "wc_limitations": "2;14;17;41", "wc_review": "243;268;141;452", "wc_reply_reviewers": "276;139;0;31", "wc_reply_authors": "1884;213;214;39", "reply_reviewers": "2;1;0;1", "reply_authors": "5;2;2;2", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 63.75, 30.873734791890662 ], "wc_strengths_avg": [ 43.0, 31.072495876578696 ], "wc_weaknesses_avg": [ 74.25, 51.0165414351071 ], "wc_questions_avg": [ 76.5, 29.75315109362368 ], "wc_limitations_avg": [ 18.5, 14.150971698084906 ], "wc_review_avg": [ 276.0, 112.19848483825439 ], "wc_reply_reviewers_avg": [ 111.5, 108.08445771710196 ], "wc_reply_authors_avg": [ 587.5, 751.9170499463355 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.75, 1.299038105676658 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17646345421163357252&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Stabilize the Latent Space for Image Autoregressive Modeling: A Unified Perspective", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93143", "id": "waQ5X4qc3W", "proceeding": "", "pdf": "https://openreview.net/pdf?id=waQ5X4qc3W", "openreview": "https://openreview.net/forum?id=waQ5X4qc3W", "poster": "", "project": "", "author_site": "Yongxin Zhu, Bocheng Li, Hang Zhang, Xin Li, Linli Xu, Lidong Bing", "tldr": "", "abstract": "Latent-based image generative models, such as Latent Diffusion Models (LDMs) and Mask Image Models (MIMs), have achieved notable success in image generation tasks. These models typically leverage reconstructive autoencoders like VQGAN or VAE to encode pixels into a more compact latent space and learn the data distribution in the latent space instead of directly from pixels. However, this practice raises a pertinent question: Is it truly the optimal choice? In response, we begin with an intriguing observation: despite sharing the same latent space, autoregressive models significantly lag behind LDMs and MIMs in image generation. This finding contrasts sharply with the field of NLP, where the autoregressive model GPT has established a commanding presence. To address this discrepancy, we introduce a unified perspective on the relationship between latent space and generative models, emphasizing the stability of latent space in image generative modeling. Furthermore, we propose a simple but effective discrete image tokenizer to stabilize the latent space for image generative modeling by applying K-Means on the latent features of self-supervised learning models. Experimental results show that image autoregressive modeling with our tokenizer (DiGIT) benefits both image understanding and image generation with the next token prediction principle, which is inherently straightforward for GPT models but challenging for other generative models. Remarkably, for the first time, a GPT-style autoregressive model for images outperforms LDMs, which also exhibits substantial improvement akin to GPT when scaling up model size. Our findings underscore the potential of an optimized latent space and the integration of discrete tokenization in advancing the capabilities of image generative models. The code is available at \\url{https://github.com/DAMO-NLP-SG/DiGIT}.", "keywords": "Latent space;Image sequential modeling;Image tokenizer", "primary_area": "generative_models", "supplementary_material": "", "author": "Yongxin Zhu;Bocheng Li;Hang Zhang;Xin Li;Linli Xu;Lidong Bing", "authorids": "~Yongxin_Zhu1;~Bocheng_Li1;~Hang_Zhang6;~Xin_Li40;~Linli_Xu1;~Lidong_Bing2", "gender": "M;;M;M;;", "homepage": "https://youngsheen.github.io;https://sites.google.com/view/bochengli;;https://lixin4ever.github.io/;;", "dblp": "27/3343-3;169/2500.html;49/6156-29;09/1365-56.html;;", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;;https://scholar.google.com.hk/citations?hl=zh-CN;https://scholar.google.com.hk/citations?user=syD9lxQAAAAJ;;", "orcid": ";;;;;", "linkedin": ";;;;;", "or_profile": "~Yongxin_Zhu1;~Bocheng_Li1;~Hang_Zhang6;~Xin_Li40;~Linli_Xu1;~Lidong_Bing2", "aff": "University of Science and Technology of China;University of Science and Technology of China;Sichuan University;Alibaba Group;;", "aff_domain": "ustc.edu;ustc.edu.cn;scu.edu.cn;alibaba-inc.com;;", "position": "PhD student;MS student;PhD student;Researcher;;", "bibtex": "@inproceedings{\nzhu2024stabilize,\ntitle={Stabilize the Latent Space for Image Autoregressive Modeling: A Unified Perspective},\nauthor={Yongxin Zhu and Bocheng Li and Hang Zhang and Xin Li and Linli Xu and Lidong Bing},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=waQ5X4qc3W}\n}", "github": "", "reviewers": "Sn2s;K39P;Vc7B;eGbB", "pdf_size": 12230171, "rating": "3;6;6;7", "confidence": "5;4;4;3", "soundness": "1;1;3;3", "novelty": "2;2;3;3", "presentation": "1;2;1;3", "wc_summary": "54;169;41;98", "wc_strengths": "26;59;49;96", "wc_weaknesses": "234;260;193;104", "wc_questions": "2;91;53;8", "wc_limitations": "35;40;1;8", "wc_review": "351;619;337;314", "wc_reply_reviewers": "0;263;25;0", "wc_reply_authors": "110;997;30;107", "reply_reviewers": "0;3;1;0", "reply_authors": "2;4;2;2", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.0, 1.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 1.75, 0.82915619758885 ], "wc_summary_avg": [ 90.5, 50.002499937503124 ], "wc_strengths_avg": [ 57.5, 25.243811122728676 ], "wc_weaknesses_avg": [ 197.75, 59.16238247400116 ], "wc_questions_avg": [ 38.5, 36.15591237958185 ], "wc_limitations_avg": [ 21.0, 16.777961735562517 ], "wc_review_avg": [ 405.25, 124.11360723143937 ], "wc_reply_reviewers_avg": [ 72.0, 110.74520305638525 ], "wc_reply_authors_avg": [ 311.0, 397.3581507909458 ], "reply_reviewers_avg": [ 1.0, 1.224744871391589 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.9428090415820634, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8617383142404105724&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "ustc.edu;ustc.edu.cn;scu.edu.cn;alibaba-inc.com;;", "author_num": 6, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "University of Science and Technology of China;Sichuan University;Alibaba Group", "aff_unique_dep": ";;", "aff_unique_url": "http://www.ustc.edu.cn;https://www.scu.edu.cn;https://www.alibaba.com", "aff_unique_abbr": "USTC;SCU;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Constructing Semantics-Aware Adversarial Examples with a Probabilistic Perspective", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93142", "id": "wbE0QCBWji", "proceeding": "", "pdf": "https://openreview.net/pdf?id=wbE0QCBWji", "openreview": "https://openreview.net/forum?id=wbE0QCBWji", "poster": "/media/PosterPDFs/NeurIPS%202024/93142.png?t=1732579531.0604975", "project": "", "author_site": "Andi Zhang, Mingtian Zhang, Damon Wischik", "tldr": "", "abstract": "We propose a probabilistic perspective on adversarial examples, allowing us to embed subjective understanding of semantics as a distribution into the process of generating adversarial examples, in a principled manner. Despite significant pixel-level modifications compared to traditional adversarial attacks, our method preserves the overall semantics of the image, making the changes difficult for humans to detect. This extensive pixel-level modification enhances our method's ability to deceive classifiers designed to defend against adversarial attacks. Our empirical findings indicate that the proposed methods achieve higher success rates in circumventing adversarial defense mechanisms, while remaining difficult for human observers to detect.", "keywords": "Adversarial Examples;Probabilistic Generative Models;Diffusion Models;Energy-based Models", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Andi Zhang;Mingtian Zhang;Damon Wischik", "authorids": "~Andi_Zhang2;~Mingtian_Zhang1;~Damon_Wischik1", "gender": "M;M;", "homepage": "http://andi.ac;http://tomo.wiki;https://www.cl.cam.ac.uk/~djw1005/", "dblp": "200/8255-1;230/8340;18/4263.html", "google_scholar": "qGAOAoYAAAAJ;;", "orcid": ";;", "linkedin": "zhangandi/;;", "or_profile": "~Andi_Zhang2;~Mingtian_Zhang1;~Damon_Wischik1", "aff": "University of Cambridge;;University of Cambridge", "aff_domain": "cam.ac.uk;;cam.ac.uk", "position": "PhD student;;Lecturer", "bibtex": "@inproceedings{\nzhang2024constructing,\ntitle={Constructing Semantics-Aware Adversarial Examples with Probabilistic Perspective},\nauthor={Andi Zhang and Mingtian Zhang and Damon Wischik},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=wbE0QCBWji}\n}", "github": "", "reviewers": "aH4C;A1FN;ggd1", "pdf_size": 5115590, "rating": "6;6;7", "confidence": "4;2;4", "soundness": "2;3;3", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "74;132;116", "wc_strengths": "87;82;109", "wc_weaknesses": "209;104;152", "wc_questions": "40;7;5", "wc_limitations": "1;9;1", "wc_review": "411;334;383", "wc_reply_reviewers": "46;0;38", "wc_reply_authors": "16;0;21", "reply_reviewers": "1;0;1", "reply_authors": "2;1;2", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 107.33333333333333, 24.458581770458856 ], "wc_strengths_avg": [ 92.66666666666667, 11.728408057172787 ], "wc_weaknesses_avg": [ 155.0, 42.91852746774987 ], "wc_questions_avg": [ 17.333333333333332, 16.048537489614297 ], "wc_limitations_avg": [ 3.6666666666666665, 3.7712361663282534 ], "wc_review_avg": [ 376.0, 31.822423959633664 ], "wc_reply_reviewers_avg": [ 28.0, 20.06655592438988 ], "wc_reply_authors_avg": [ 12.333333333333334, 8.9566858950296 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7616503380374975058&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 5, "email": "cam.ac.uk;;cam.ac.uk", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "University of Cambridge", "aff_unique_dep": "", "aff_unique_url": "https://www.cam.ac.uk", "aff_unique_abbr": "Cambridge", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Cambridge", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "title": "Real-Time Selection Under General Constraints via Predictive Inference", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93141", "id": "wblxm5zdkE", "proceeding": "", "pdf": "https://openreview.net/pdf?id=wblxm5zdkE", "openreview": "https://openreview.net/forum?id=wblxm5zdkE", "poster": "/media/PosterPDFs/NeurIPS%202024/93141.png?t=1730699192.7257493", "project": "", "author_site": "Yuyang Huo, Lin Lu, Haojie Ren, Changliang Zou", "tldr": "", "abstract": "Real-time decision-making gets more attention in the big data era. Here, we consider the problem of sample selection in the online setting, where one encounters a possibly infinite sequence of individuals collected over time with covariate information available. The goal is to select samples of interest that are characterized by their unobserved responses until the user-specified stopping time. We derive a new decision rule that enables us to find more preferable samples that meet practical requirements by simultaneously controlling two types of general constraints: individual and interactive constraints, which include the widely utilized False Selection Rate (FSR), cost limitations, and diversity of selected samples. The key elements of our approach involve quantifying the uncertainty of response predictions via predictive inference and addressing individual and interactive constraints in a sequential manner. Theoretical and numerical results demonstrate the effectiveness of the proposed method in controlling both individual and interactive constraints.", "keywords": "Online multiple testing; Predictive inference; False selection rate; Individual and interactive constraints; Local false discovery rate.", "primary_area": "other", "supplementary_material": "/attachment/ce846f83458cfd12a3b676105f74908abaae3444.zip", "author": "Yuyang Huo;Lin Lu;Haojie Ren;Changliang Zou", "authorids": "~Yuyang_Huo1;~Lin_Lu2;~Haojie_Ren1;~Changliang_Zou2", "gender": "M;F;F;M", "homepage": ";;https://sites.google.com/view/haojieren;http://web.stat.nankai.edu.cn/chlzou/", "dblp": ";;;", "google_scholar": ";HNzgF2YAAAAJ;qfd5nS8AAAAJ;LPwSdmwAAAAJ", "orcid": "0000-0002-7521-1043;0000-0002-9317-1647;;", "linkedin": ";;;", "or_profile": "~Yuyang_Huo1;~Lin_Lu2;~Haojie_Ren1;~Changliang_Zou2", "aff": "Nankai University;Nankai University;Shanghai Jiaotong University;Nankai University", "aff_domain": "nku.nankai.edu.cn;nku.nankai.edu.cn;sjtu.edu.cn;nankai.edu.cn", "position": "PhD student;PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nhuo2024realtime,\ntitle={Real-Time Selection Under General Constraints via Predictive Inference},\nauthor={Yuyang Huo and Lin Lu and Haojie Ren and Changliang Zou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=wblxm5zdkE}\n}", "github": "", "reviewers": "MMvB;vWkU;78XN;vGU3", "pdf_size": 859467, "rating": "4;6;7;8", "confidence": "4;1;4;4", "soundness": "3;3;3;4", "novelty": "2;4;3;4", "presentation": "3;2;3;4", "wc_summary": "55;29;140;68", "wc_strengths": "19;45;56;80", "wc_weaknesses": "8;117;127;3", "wc_questions": "190;51;2;13", "wc_limitations": "7;7;13;30", "wc_review": "279;249;338;194", "wc_reply_reviewers": "0;5;19;10", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 1.479019945774904 ], "confidence_avg": [ 3.25, 1.299038105676658 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 73.0, 41.15215668710451 ], "wc_strengths_avg": [ 50.0, 21.920310216782973 ], "wc_weaknesses_avg": [ 63.75, 58.3839661208452 ], "wc_questions_avg": [ 64.0, 74.98333148106984 ], "wc_limitations_avg": [ 14.25, 9.41740410091868 ], "wc_review_avg": [ 265.0, 52.01442107723588 ], "wc_reply_reviewers_avg": [ 8.5, 7.0178344238090995 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.09759000729485333, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4206332015866964299&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "nku.nankai.edu.cn;nku.nankai.edu.cn;sjtu.edu.cn;nankai.edu.cn", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Nankai University;Shanghai Jiao Tong University", "aff_unique_dep": ";", "aff_unique_url": "http://www.nankai.edu.cn;https://www.sjtu.edu.cn", "aff_unique_abbr": "NKU;SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "LiT: Unifying LiDAR \"Languages\" with LiDAR Translator", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93140", "id": "wcX04Wn34u", "proceeding": "", "pdf": "https://openreview.net/pdf?id=wcX04Wn34u", "openreview": "https://openreview.net/forum?id=wcX04Wn34u", "poster": "", "project": "", "author_site": "Yixing Lao, Tao Tang, Xiaoyang Wu, Peng Chen, Kaicheng Yu, Hengshuang Zhao", "tldr": "", "abstract": "LiDAR data exhibits significant domain gaps due to variations in sensors, vehicles, and driving environments, creating \u201clanguage barriers\u201d that limit the effective use of data across domains and the scalability of LiDAR perception models. To address these challenges, we introduce the LiDAR Translator (LiT), a framework that directly translates LiDAR data across domains, enabling both cross-domain adaptation and multi-domain joint learning. LiT integrates three key components: a scene modeling module for precise foreground and background reconstruction, a LiDAR modeling module that models LiDAR rays statistically and simulates ray-drop, and a fast, hardware-accelerated ray casting engine. LiT enables state-of-the-art zero-shot and unified domain detection across diverse LiDAR datasets, marking a step toward data-driven domain unification for autonomous driving systems. Source code and demos are available at: https://yxlao.github.io/lit.", "keywords": "LiDAR Translation;Domain Unification;3D Detection", "primary_area": "machine_vision", "supplementary_material": "", "author": "Yixing Lao;Tao Tang;Xiaoyang Wu;Peng Chen;Kaicheng Yu;Hengshuang Zhao", "authorids": "~Yixing_Lao1;~Tao_Tang4;~Xiaoyang_Wu1;~Peng_Chen9;~Kaicheng_Yu1;~Hengshuang_Zhao2", "gender": "M;M;M;M;M;M", "homepage": "https://github.com/yxlao/;https://xywu.me;;https://www.yukaicheng.cn;https://hszhao.github.io;https://github.com/Trent-tangtao", "dblp": "213/7784;56/4409-2;;;185/7848;", "google_scholar": "2w9VSWIAAAAJ;Np1dTpQAAAAJ;YDmfyEYAAAAJ;j9OguiIAAAAJ;4uE10I0AAAAJ;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";;;;0000-0001-8277-2706;0000-0001-8526-220X", "linkedin": ";;;;hengshuang-zhao-347b8391/?originalSubdomain=hk;", "or_profile": "~Yixing_Lao1;~Xiaoyang_Wu1;~Peng_Chen9;~Kaicheng_Yu1;~Hengshuang_Zhao2;~tang_tao1", "aff": "University of Hong Kong;the University of Hong Kong, University of Hong Kong;Alibaba Group;Westlake University;The University of Hong Kong;SUN YAT-SEN UNIVERSITY", "aff_domain": "hku.hk;cs.hku.hk;alibaba-inc.com;westlake.edu;hku.hk;sysu.edu.cn", "position": "PhD student;PhD student;Researcher;Assistant Professor;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nlao2024lit,\ntitle={LiT: Unifying Li{DAR} ''Languages'' with Li{DAR} Translator},\nauthor={Yixing Lao and Tao Tang and Xiaoyang Wu and Peng Chen and Kaicheng Yu and Hengshuang Zhao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=wcX04Wn34u}\n}", "github": "", "reviewers": "Kzeh;tU6b;omtq;Q1U8", "pdf_size": 11832275, "rating": "5;5;6;7", "confidence": "5;4;3;5", "soundness": "3;2;3;4", "novelty": "3;2;3;4", "presentation": "3;3;3;4", "wc_summary": "65;59;111;56", "wc_strengths": "58;58;94;54", "wc_weaknesses": "77;87;84;90", "wc_questions": "85;83;22;43", "wc_limitations": "34;14;1;15", "wc_review": "319;301;312;258", "wc_reply_reviewers": "17;0;104;6", "wc_reply_authors": "437;1434;292;108", "reply_reviewers": "1;0;1;1", "reply_authors": "3;4;3;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 72.75, 22.320114247019436 ], "wc_strengths_avg": [ 66.0, 16.24807680927192 ], "wc_weaknesses_avg": [ 84.5, 4.8218253804964775 ], "wc_questions_avg": [ 58.25, 26.808347580557815 ], "wc_limitations_avg": [ 16.0, 11.76860229593982 ], "wc_review_avg": [ 297.5, 23.69071548096427 ], "wc_reply_reviewers_avg": [ 31.75, 42.156701721078704 ], "wc_reply_authors_avg": [ 567.75, 513.5398596993226 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.0, 0.7071067811865476 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0909090909090909, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:LCDf6pHUenkJ:scholar.google.com/&scioq=LiT:+Unifying+LiDAR+%22Languages%22+with+LiDAR+Translator&hl=en&as_sdt=0,10", "gs_version_total": 2, "email": "hku.hk;cs.hku.hk;alibaba-inc.com;westlake.edu;hku.hk;sysu.edu.cn", "author_num": 6, "aff_unique_index": "0;0;1;2;0;3", "aff_unique_norm": "University of Hong Kong;Alibaba Group;Westlake University;Sun Yat-sen University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.hku.hk;https://www.alibaba.com;https://www.westlake.edu.cn;http://www.sysu.edu.cn", "aff_unique_abbr": "HKU;Alibaba;WU;SYSU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "GaussianMarker: Uncertainty-Aware Copyright Protection of 3D Gaussian Splatting", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93139", "id": "wcxHbAY8B3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=wcxHbAY8B3", "openreview": "https://openreview.net/forum?id=wcxHbAY8B3", "poster": "", "project": "", "author_site": "Xiufeng Huang, Ruiqi Li, Yiu-ming Cheung, Ka Chun Cheung, Simon See, Renjie Wan", "tldr": "", "abstract": "3D Gaussian Splatting (3DGS) has become a crucial method for acquiring 3D assets. To protect the copyright of these assets, digital watermarking techniques can be applied to embed ownership information discreetly within 3DGS mod- els. However, existing watermarking methods for meshes, point clouds, and implicit radiance fields cannot be directly applied to 3DGS models, as 3DGS models use explicit 3D Gaussians with distinct structures and do not rely on neural networks. Naively embedding the watermark on a pre-trained 3DGS can cause obvious distortion in rendered images. In our work, we propose an uncertainty- based method that constrains the perturbation of model parameters to achieve invisible watermarking for 3DGS. At the message decoding stage, the copyright messages can be reliably extracted from both 3D Gaussians and 2D rendered im- ages even under various forms of 3D and 2D distortions. We conduct extensive experiments on the Blender, LLFF, and MipNeRF-360 datasets to validate the effectiveness of our proposed method, demonstrating state-of-the-art performance on both message decoding accuracy and view synthesis quality.", "keywords": "3D Gaussian splatting; Uncertainty estimation; Digital watermarking;", "primary_area": "privacy", "supplementary_material": "/attachment/3a668841f53ebde2b56d6c6b00618ca5bddb4a8d.zip", "author": "Xiufeng Huang;Ruiqi Li;Yiu-ming Cheung;Ka Chun Cheung;Simon See;Renjie Wan", "authorids": "~Xiufeng_Huang1;~Ruiqi_Li7;~Yiu-ming_Cheung1;~Ka_Chun_Cheung1;~Simon_See1;~Renjie_Wan1", "gender": "M;M;;M;M;M", "homepage": "https://kevinhuangxf.github.io/;https://www.comp.hkbu.edu.hk/~csrqli/;;;;https://wanrenjie.github.io/", "dblp": ";;;165/1089;62/6547;191/2619", "google_scholar": "0l8-UK0AAAAJ;a4H_EcsAAAAJ;;NvbCXToAAAAJ;ebIHTEoAAAAJ;https://scholar.google.com.sg/citations?user=S8_ES4MAAAAJ", "orcid": "0009-0003-2249-3264;0000-0003-4535-7364;;;0000-0002-4958-9237;0000-0002-0161-0367", "linkedin": ";;;;simonsee/;", "or_profile": "~Xiufeng_Huang1;~Ruiqi_Li7;~Yiu-ming_Cheung1;~Ka_Chun_Cheung1;~Simon_See1;~Renjie_Wan1", "aff": "Hong Kong Baptist University;Hong Kong Baptist University;;NVIDIA;NVIDIA;Hong Kong Baptist University", "aff_domain": "hkbu.edu.hk;hkbu.edu.hk;;nvidia.com;nvidia.com;hkbu.edu.hk", "position": "PhD student;PhD student;;Senior Manager, Solution Architect;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nhuang2024gaussianmarker,\ntitle={GaussianMarker: Uncertainty-Aware Copyright Protection of 3D Gaussian Splatting},\nauthor={Xiufeng Huang and Ruiqi Li and Yiu-ming Cheung and Ka Chun Cheung and Simon See and Renjie Wan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=wcxHbAY8B3}\n}", "github": "", "reviewers": "BKx3;VdSM;fyQG;QvjQ", "pdf_size": 5886663, "rating": "4;5;5;7", "confidence": "5;5;4;3", "soundness": "3;2;3;3", "novelty": "2;2;3;3", "presentation": "3;2;3;3", "wc_summary": "86;103;69;90", "wc_strengths": "43;62;51;108", "wc_weaknesses": "131;222;189;127", "wc_questions": "274;43;2;3", "wc_limitations": "74;6;7;1", "wc_review": "608;436;318;329", "wc_reply_reviewers": "479;193;9;47", "wc_reply_authors": "1218;768;125;112", "reply_reviewers": "3;2;1;1", "reply_authors": "5;4;3;3", "rating_avg": [ 5.25, 1.0897247358851685 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 87.0, 12.144957801491119 ], "wc_strengths_avg": [ 66.0, 25.16942589730644 ], "wc_weaknesses_avg": [ 167.25, 40.01484099681017 ], "wc_questions_avg": [ 80.5, 112.93471565466484 ], "wc_limitations_avg": [ 22.0, 30.108138434649195 ], "wc_review_avg": [ 422.75, 116.46324527506522 ], "wc_reply_reviewers_avg": [ 182.0, 184.7187050625897 ], "wc_reply_authors_avg": [ 555.75, 465.31837219263116 ], "reply_reviewers_avg": [ 1.75, 0.82915619758885 ], "reply_authors_avg": [ 3.75, 0.82915619758885 ], "replies_avg": [ 31, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.899228803025897, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18263296548236819851&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "hkbu.edu.hk;hkbu.edu.hk;;nvidia.com;nvidia.com;hkbu.edu.hk", "author_num": 6, "aff_unique_index": "0;0;1;1;0", "aff_unique_norm": "Hong Kong Baptist University;NVIDIA", "aff_unique_dep": ";NVIDIA Corporation", "aff_unique_url": "https://www.hkbu.edu.hk;https://www.nvidia.com", "aff_unique_abbr": "HKBU;NVIDIA", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;1;1;0", "aff_country_unique": "China;United States" }, { "title": "Learning Cortico-Muscular Dependence through Orthonormal Decomposition of Density Ratios", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93138", "id": "wdGvRud1LS", "proceeding": "", "pdf": "https://openreview.net/pdf?id=wdGvRud1LS", "openreview": "https://openreview.net/forum?id=wdGvRud1LS", "poster": "", "project": "", "author_site": "Shihan Ma, Bo Hu, Tianyu Jia, Alexander Clarke, Blanka Zicher, Arnault Caillet, Dario Farina, Jose C Principe", "tldr": "", "abstract": "The cortico-spinal neural pathway is fundamental for motor control and movement execution, and in humans it is typically studied using concurrent electroencephalography (EEG) and electromyography (EMG) recordings. However, current approaches for capturing high-level and contextual connectivity between these recordings have important limitations. Here, we present a novel application of statistical dependence estimators based on orthonormal decomposition of density ratios to model the relationship between cortical and muscle oscillations. Our method extends from traditional scalar-valued measures by learning eigenvalues, eigenfunctions, and projection spaces of density ratios from realizations of the signal, addressing the interpretability, scalability, and local temporal dependence of cortico-muscular connectivity. We experimentally demonstrate that eigenfunctions learned from cortico-muscular connectivity can accurately classify movements and subjects. Moreover, they reveal channel and temporal dependencies that confirm the activation of specific EEG channels during movement.", "keywords": "EEG-EMG fusion;statistical dependence;orthonormal decomposition;cortico-muscular connectivity", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "", "author": "Shihan Ma;Bo Hu;Tianyu Jia;Alexander Kenneth Clarke;Blanka Zicher;Arnault H. Caillet;Dario Farina;Jose C Principe", "authorids": "~Shihan_Ma2;~Bo_Hu3;~Tianyu_Jia2;~Alexander_Kenneth_Clarke1;~Blanka_Zicher1;~Arnault_H._Caillet1;~Dario_Farina1;~Jose_C_Principe1", "gender": "F;M;M;M;F;M;M;M", "homepage": "https://shihan-ma.github.io/;http://www.cnel.ufl.edu/people/students.php;;https://github.com/AlexKClarke;;;https://www.imperial.ac.uk/people/d.farina;http://www.cnel.ufl.edu", "dblp": "219/6415;;;;;;;", "google_scholar": "RGmlbP8AAAAJ;;DPwvCNcAAAAJ;https://scholar.google.co.uk/citations?user=rXFknSUAAAAJ;AXU9XqAAAAAJ;https://scholar.google.fr/citations?user=Z72zPxwAAAAJ;0JDIQ0wAAAAJ;", "orcid": ";;0000-0001-6645-509X;0000-0003-0744-3982;0000-0002-3592-2158;;0000-0002-7883-2697;", "linkedin": "shihan-ma-6a36a4232/;;;;;;;", "or_profile": "~Shihan_Ma2;~Bo_Hu3;~Tianyu_Jia2;~Alexander_Kenneth_Clarke1;~Blanka_Zicher1;~Arnault_H._Caillet1;~Dario_Farina1;~Jose_C_Principe1", "aff": "Shanghai Jiaotong University;University of Florida;Imperial College London;Imperial College London;Imperial College London;Imperial College London;Imperial College London;", "aff_domain": "sjtu.edu.cn;ufl.edu;imperial.ac.uk;imperial.ac.uk;imperial.ac.uk;ic.ac.uk;imperial.ac.uk;", "position": "PhD student;PhD student;Postdoc;PhD student;PhD student;Postdoc;Full Professor;", "bibtex": "@inproceedings{\nma2024learning,\ntitle={Learning Cortico-Muscular Dependence through Orthonormal Decomposition of Density Ratios},\nauthor={Shihan Ma and Bo Hu and Tianyu Jia and Alexander Kenneth Clarke and Blanka Zicher and Arnault H. Caillet and Dario Farina and Jose C Principe},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=wdGvRud1LS}\n}", "github": "", "reviewers": "J9kE;MC3m;CLW7;fPCn", "pdf_size": 33201703, "rating": "5;5;7;7", "confidence": "4;4;4;2", "soundness": "3;2;4;3", "novelty": "2;1;4;3", "presentation": "2;3;3;3", "wc_summary": "88;184;120;166", "wc_strengths": "91;76;106;23", "wc_weaknesses": "134;336;12;53", "wc_questions": "93;362;29;32", "wc_limitations": "7;14;65;53", "wc_review": "413;972;332;327", "wc_reply_reviewers": "12;184;41;56", "wc_reply_authors": "264;161;38;192", "reply_reviewers": "1;2;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 1.118033988749895 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 139.5, 37.79880950506246 ], "wc_strengths_avg": [ 74.0, 31.29696470905765 ], "wc_weaknesses_avg": [ 133.75, 124.74849698493365 ], "wc_questions_avg": [ 129.0, 136.9251620411676 ], "wc_limitations_avg": [ 34.75, 24.742423082632794 ], "wc_review_avg": [ 511.0, 268.3384057491585 ], "wc_reply_reviewers_avg": [ 73.25, 65.86871412134899 ], "wc_reply_authors_avg": [ 163.75, 81.65284747025053 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:lP6g6VqQYDEJ:scholar.google.com/&scioq=Learning+Cortico-Muscular+Dependence+through+Orthonormal+Decomposition+of+Density+Ratios&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "sjtu.edu.cn;ufl.edu;imperial.ac.uk;imperial.ac.uk;imperial.ac.uk;ic.ac.uk;imperial.ac.uk;", "author_num": 8, "aff_unique_index": "0;1;2;2;2;2;2", "aff_unique_norm": "Shanghai Jiao Tong University;University of Florida;Imperial College London", "aff_unique_dep": ";;", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.ufl.edu;https://www.imperial.ac.uk", "aff_unique_abbr": "SJTU;UF;ICL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;2;2;2;2", "aff_country_unique": "China;United States;United Kingdom" }, { "title": "Video Token Merging for Long Video Understanding", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93137", "id": "wduRaBDRBS", "proceeding": "", "pdf": "https://openreview.net/pdf?id=wduRaBDRBS", "openreview": "https://openreview.net/forum?id=wduRaBDRBS", "poster": "/media/PosterPDFs/NeurIPS%202024/93137.png?t=1731437603.8045342", "project": "", "author_site": "Seon-Ho Lee, Jue Wang, Zhikang Zhang, David Fan, Xinyu Li", "tldr": "", "abstract": "As the scale of data and models for video understanding rapidly expand, handling long-form video input in transformer-based models presents a practical challenge. Rather than resorting to input sampling or token dropping, which may result in information loss, token merging shows promising results when used in collaboration with transformers. However, the application of token merging for long-form video processing is not trivial. We begin with the premise that token merging should not rely solely on the similarity of video tokens; the saliency of tokens should also be considered. To address this, we explore various video token merging strategies for long-form video classification, starting with a simple extension of image token merging, moving to region-concentrated merging, and finally proposing a learnable video token merging (VTM) algorithm that dynamically merges tokens based on their saliency. Extensive experimental results show that we achieve better or comparable performances on the LVU, COIN, and Breakfast datasets. Moreover, our approach significantly reduces memory costs by 84% and boosts throughput by approximately 6.89 times compared to baseline algorithms.", "keywords": "Long video understanding;token merging", "primary_area": "machine_vision", "supplementary_material": "", "author": "Seon-Ho Lee;Jue Wang;Zhikang Zhang;David Fan;Xinyu Li", "authorids": "~Seon-Ho_Lee1;~Jue_Wang8;~Zhikang_Zhang1;~David_Fan2;~Xinyu_Li4", "gender": "M;M;;M;M", "homepage": "https://uhseon.github.io/;https://3xwangdot.github.io/juewang.github.io/;;https://davidfan.io;https://www.arthurlxy.com", "dblp": "125/9915;69/393-10;;03/3063;88/2359-3.html", "google_scholar": "https://scholar.google.co.kr/citations?user=_LtQ4TcAAAAJ;Lt945BwAAAAJ;;VAiqiv4AAAAJ;xMnAUmkAAAAJ", "orcid": ";;;0000-0002-9217-5451;", "linkedin": "seonho-lee-604679198/;;;davidfan97;", "or_profile": "~Seon-Ho_Lee1;~Jue_Wang8;~Zhikang_Zhang1;~David_Fan2;~Xinyu_Li4", "aff": "Korea University;Amazon;;Amazon;Amazon", "aff_domain": "korea.ac.kr;amazon.com;;amazon.com;amazon.com", "position": "PhD student;Researcher;;Applied Scientist;Researcher", "bibtex": "@inproceedings{\nlee2024video,\ntitle={Video Token Merging for Long Video Understanding},\nauthor={Seon-Ho Lee and Jue Wang and Zhikang Zhang and David Fan and Xinyu Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=wduRaBDRBS}\n}", "github": "", "reviewers": "7bwU;JTuZ;rFDe;sUxH", "pdf_size": 4191973, "rating": "4;5;5;6", "confidence": "3;3;4;4", "soundness": "3;3;3;4", "novelty": "3;2;2;3", "presentation": "3;3;2;4", "wc_summary": "97;294;97;148", "wc_strengths": "54;117;103;38", "wc_weaknesses": "67;210;525;385", "wc_questions": "36;65;152;17", "wc_limitations": "1;1;139;1", "wc_review": "255;687;1016;589", "wc_reply_reviewers": "52;176;255;0", "wc_reply_authors": "420;416;480;0", "reply_reviewers": "1;1;1;0", "reply_authors": "3;2;2;1", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 159.0, 80.67527502277262 ], "wc_strengths_avg": [ 78.0, 32.8709598277872 ], "wc_weaknesses_avg": [ 296.75, 173.3470146844185 ], "wc_questions_avg": [ 67.5, 51.69381007432128 ], "wc_limitations_avg": [ 35.5, 59.75575286112627 ], "wc_review_avg": [ 636.75, 271.2788003143629 ], "wc_reply_reviewers_avg": [ 120.75, 100.47729843103863 ], "wc_reply_authors_avg": [ 329.0, 191.63246071581924 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.7071067811865476, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Kd--fHRO2TAJ:scholar.google.com/&scioq=Video+Token+Merging+for+Long+Video+Understanding&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "korea.ac.kr;amazon.com;;amazon.com;amazon.com", "author_num": 5, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Korea University;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.korea.ac.kr;https://www.amazon.com", "aff_unique_abbr": "KU;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "South Korea;United States" }, { "title": "Linear Causal Representation Learning from Unknown Multi-node Interventions", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93136", "id": "weemASPtzg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=weemASPtzg", "openreview": "https://openreview.net/forum?id=weemASPtzg", "poster": "/media/PosterPDFs/NeurIPS%202024/93136.png?t=1732771537.9156206", "project": "", "author_site": "Burak Var\u0131c\u0131, Emre Acart\u00fcrk, Karthikeyan Shanmugam, Ali Tajer", "tldr": "", "abstract": "Despite the multifaceted recent advances in interventional causal representation learning (CRL), they primarily focus on the stylized assumption of single-node interventions. This assumption is not valid in a wide range of applications, and generally, the subset of nodes intervened in an interventional environment is *fully unknown*. This paper focuses on interventional CRL under unknown multi-node (UMN) interventional environments and establishes the first identifiability results for *general* latent causal models (parametric or nonparametric) under stochastic interventions (soft or hard) and linear transformation from the latent to observed space. Specifically, it is established that given sufficiently diverse interventional environments, (i) identifiability *up to ancestors* is possible using only *soft* interventions, and (ii) *perfect* identifiability is possible using *hard* interventions. Remarkably, these guarantees match the best-known results for more restrictive single-node interventions. Furthermore, CRL algorithms are also provided that achieve the identifiability guarantees. A central step in designing these algorithms is establishing the relationships between UMN interventional CRL and score functions associated with the statistical models of different interventional environments. Establishing these relationships also serves as constructive proof of the identifiability guarantees.", "keywords": "Causal representation learning;interventions;score-based methods;identifiability", "primary_area": "causal_inference", "supplementary_material": "", "author": "Burak Var\u0131c\u0131;Emre Acart\u00fcrk;Karthikeyan Shanmugam;Ali Tajer", "authorids": "~Burak_Var\u0131c\u01311;~Emre_Acart\u00fcrk1;~Karthikeyan_Shanmugam1;~Ali_Tajer1", "gender": "M;M;M;M", "homepage": ";https://sites.google.com/corp/view/karthikeyan-shanmugam/;https://www.isg-rpi.com/;https://bvarici.github.io/", "dblp": "338/7569;;65/2830;289/8565", "google_scholar": "KLiI1JwAAAAJ;https://scholar.google.ca/citations?user=m4DyPcUAAAAJ;;v_SL5c4AAAAJ", "orcid": ";0009-0008-2879-5868;;", "linkedin": ";;;", "or_profile": "~Emre_Acart\u00fcrk1;~Karthikeyan_Shanmugam1;~Ali_Tajer1;~Burak_Varici1", "aff": "Rensselaer Polytechnic Institute;Google Research;Rensselaer Polytechnic Institute;Rensselaer Polytechnic Institute", "aff_domain": "rpi.edu;google.com;rpi.edu;rpi.edu", "position": "PhD student;Researcher;Associate Professor;PhD student", "bibtex": "@inproceedings{\nvar{\\i}c{\\i}2024linear,\ntitle={Linear Causal Representation Learning from Unknown Multi-node Interventions},\nauthor={Burak Var{\\i}c{\\i} and Emre Acart{\\\"u}rk and Karthikeyan Shanmugam and Ali Tajer},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=weemASPtzg}\n}", "github": "", "reviewers": "q4pN;8PAL;NxzW;GLEY;Vtz8", "pdf_size": 578355, "rating": "6;7;7;7;8", "confidence": "4;3;2;3;4", "soundness": "3;3;3;3;4", "novelty": "2;3;3;3;3", "presentation": "3;4;3;3;3", "wc_summary": "69;148;84;47;110", "wc_strengths": "12;33;28;53;127", "wc_weaknesses": "141;130;40;63;80", "wc_questions": "64;20;55;13;397", "wc_limitations": "41;12;71;12;6", "wc_review": "327;343;278;188;720", "wc_reply_reviewers": "12;16;22;14;67", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 7.0, 0.6324555320336759 ], "confidence_avg": [ 3.2, 0.7483314773547882 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 91.6, 34.86316107297214 ], "wc_strengths_avg": [ 50.6, 40.381183736983246 ], "wc_weaknesses_avg": [ 90.8, 38.79896905846855 ], "wc_questions_avg": [ 109.8, 144.9267401137554 ], "wc_limitations_avg": [ 28.4, 24.54872705457454 ], "wc_review_avg": [ 371.2, 182.5698770334252 ], "wc_reply_reviewers_avg": [ 26.2, 20.67268729507608 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2206241039057423376&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "rpi.edu;google.com;rpi.edu;rpi.edu", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Rensselaer Polytechnic Institute;Google", "aff_unique_dep": ";Google Research", "aff_unique_url": "https://www.rpi.edu;https://research.google", "aff_unique_abbr": "RPI;Google Research", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Stochastic Optimal Control Matching", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93135", "id": "wfU2CdgmWt", "proceeding": "", "pdf": "https://openreview.net/pdf?id=wfU2CdgmWt", "openreview": "https://openreview.net/forum?id=wfU2CdgmWt", "poster": "", "project": "", "author_site": "Carles Domingo i Enrich, Jiequn Han, Brandon Amos, Joan Bruna, Ricky T. Q. Chen", "tldr": "", "abstract": "Stochastic optimal control, which has the goal of driving the behavior of noisy systems, is broadly applicable in science, engineering and artificial intelligence. Our work introduces Stochastic Optimal Control Matching (SOCM), a novel Iterative Diffusion Optimization (IDO) technique for stochastic optimal control that stems from the same philosophy as the conditional score matching loss for diffusion models. That is, the control is learned via a least squares problem by trying to fit a matching vector field. The training loss, which is closely connected to the cross-entropy loss, is optimized with respect to both the control function and a family of reparameterization matrices which appear in the matching vector field. The optimization with respect to the reparameterization matrices aims at minimizing the variance of the matching vector field. Experimentally, our algorithm achieves lower error than all the existing IDO techniques for stochastic optimal control for three out of four control problems, in some cases by an order of magnitude. The key idea underlying SOCM is the path-wise reparameterization trick, a novel technique that may be of independent interest.", "keywords": "Stochastic Optimal Control;Diffusion", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/fdf69fe9eca55d989d7ec58b91bd0b134ef726f9.zip", "author": "Carles Domingo-Enrich;Jiequn Han;Brandon Amos;Joan Bruna;Ricky T. Q. Chen", "authorids": "~Carles_Domingo-Enrich1;~Jiequn_Han1;~Brandon_Amos1;~Joan_Bruna1;~Ricky_T._Q._Chen1", "gender": "M;M;;M;M", "homepage": "https://cdenrich.github.io;https://users.flatironinstitute.org/~jhan/;http://bamos.github.io;http://cims.nyu.edu/~bruna;http://www.rtqichen.com", "dblp": "216/7444.html;190/7087;133/4801.html;44/8776;228/6698", "google_scholar": "1ZHcGwIAAAAJ;el5gT4AAAAAJ;d8gdZR4AAAAJ;L4bNmsMAAAAJ;7MxQd6UAAAAJ", "orcid": ";;;;", "linkedin": ";;bdamos;;", "or_profile": "~Carles_Domingo-Enrich1;~Jiequn_Han1;~Brandon_Amos1;~Joan_Bruna1;~Tian_Qi_Chen2", "aff": "New York University;Simons Foundation;Meta;New York University;FAIR Labs, Meta AI", "aff_domain": "nyu.edu;simonsfoundation.org;meta.com;nyu.edu;meta.com", "position": "PhD student;Researcher;Research Scientist;Associate Professor;Researcher", "bibtex": "@inproceedings{\ndomingo-enrich2024stochastic,\ntitle={Stochastic Optimal Control Matching},\nauthor={Carles Domingo-Enrich and Jiequn Han and Brandon Amos and Joan Bruna and Ricky T. Q. Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=wfU2CdgmWt}\n}", "github": "", "reviewers": "cXHg;Su1g;Uos4;gdJ8", "pdf_size": 7602096, "rating": "5;5;6;7", "confidence": "3;3;4;1", "soundness": "2;3;4;3", "novelty": "3;3;4;4", "presentation": "3;3;3;4", "wc_summary": "117;126;330;42", "wc_strengths": "92;183;133;46", "wc_weaknesses": "72;182;187;26", "wc_questions": "44;154;219;50", "wc_limitations": "21;1;27;27", "wc_review": "346;646;896;191", "wc_reply_reviewers": "0;20;172;5", "wc_reply_authors": "0;106;879;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;3;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 2.75, 1.0897247358851685 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 153.75, 106.85591934937437 ], "wc_strengths_avg": [ 113.5, 50.56925943693461 ], "wc_weaknesses_avg": [ 116.75, 69.69711256572973 ], "wc_questions_avg": [ 116.75, 73.46895602906032 ], "wc_limitations_avg": [ 19.0, 10.677078252031311 ], "wc_review_avg": [ 519.75, 271.92312792405136 ], "wc_reply_reviewers_avg": [ 49.25, 71.25087718758274 ], "wc_reply_authors_avg": [ 246.25, 367.8725152821287 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.6225430174794673, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14948872992694906018&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "nyu.edu;simonsfoundation.org;meta.com;nyu.edu;meta.com", "author_num": 5, "aff_unique_index": "0;1;2;0;2", "aff_unique_norm": "New York University;Simons Foundation;Meta", "aff_unique_dep": ";;Meta Platforms, Inc.", "aff_unique_url": "https://www.nyu.edu;https://www.simonsfoundation.org;https://meta.com", "aff_unique_abbr": "NYU;Simons Foundation;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Sparse-view Pose Estimation and Reconstruction via Analysis by Generative Synthesis", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93134", "id": "wgpmDyJgsg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=wgpmDyJgsg", "openreview": "https://openreview.net/forum?id=wgpmDyJgsg", "poster": "/media/PosterPDFs/NeurIPS%202024/93134.png?t=1733906132.8686712", "project": "", "author_site": "Qitao Zhao, Shubham Tulsiani", "tldr": "", "abstract": "Inferring the 3D structure underlying a set of multi-view images typically requires solving two co-dependent tasks -- accurate 3D reconstruction requires precise camera poses, and predicting camera poses relies on (implicitly or explicitly) modeling the underlying 3D. The classical framework of analysis by synthesis casts this inference as a joint optimization seeking to explain the observed pixels, and recent instantiations learn expressive 3D representations (e.g., Neural Fields) with gradient-descent-based pose refinement of initial pose estimates. However, given a sparse set of observed views, the observations may not provide sufficient direct evidence to obtain complete and accurate 3D. Moreover, large errors in pose estimation may not be easily corrected and can further degrade the inferred 3D. To allow robust 3D reconstruction and pose estimation in this challenging setup, we propose SparseAGS, a method that adapts this analysis-by-synthesis approach by: a) including novel-view-synthesis-based generative priors in conjunction with photometric objectives to improve the quality of the inferred 3D, and b) explicitly reasoning about outliers and using a discrete search with a continuous optimization-based strategy to correct them. We validate our framework across real-world and synthetic datasets in combination with several off-the-shelf pose estimation systems as initialization. We find that it significantly improves the base systems' pose accuracy while yielding high-quality 3D reconstructions that outperform the results from current multi-view reconstruction baselines.", "keywords": "computer vision;3D vision;3D reconstruction;camera pose estimation;analysis-by-synthesis;generative model", "primary_area": "machine_vision", "supplementary_material": "", "author": "Qitao Zhao;Shubham Tulsiani", "authorids": "~Qitao_Zhao1;~Shubham_Tulsiani1", "gender": "M;M", "homepage": "https://qitaozhao.github.io;https://shubhtuls.github.io/", "dblp": ";135/6623", "google_scholar": "r9nmsasAAAAJ;06rffEkAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Qitao_Zhao1;~Shubham_Tulsiani1", "aff": "Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "andrew.cmu.edu;cmu.edu", "position": "MS student;Assistant Professor", "bibtex": "@inproceedings{\nzhao2024sparseview,\ntitle={Sparse-view Pose Estimation and Reconstruction via Analysis by Generative Synthesis},\nauthor={Qitao Zhao and Shubham Tulsiani},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=wgpmDyJgsg}\n}", "github": "", "reviewers": "XaBT;mujy;uJqP;ePgG", "pdf_size": 16097932, "rating": "6;6;6;6", "confidence": "5;4;4;3", "soundness": "3;3;3;3", "novelty": "3;3;2;2", "presentation": "3;2;3;3", "wc_summary": "75;105;152;47", "wc_strengths": "43;52;40;99", "wc_weaknesses": "234;140;122;149", "wc_questions": "138;57;41;57", "wc_limitations": "5;6;5;15", "wc_review": "495;360;360;367", "wc_reply_reviewers": "120;53;0;24", "wc_reply_authors": "500;248;0;29", "reply_reviewers": "2;1;0;1", "reply_authors": "3;2;1;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 94.75, 38.899710795840114 ], "wc_strengths_avg": [ 58.5, 23.79600806858159 ], "wc_weaknesses_avg": [ 161.25, 43.112498187880504 ], "wc_questions_avg": [ 73.25, 37.94980237102691 ], "wc_limitations_avg": [ 7.75, 4.205650960315181 ], "wc_review_avg": [ 395.5, 57.51738867507808 ], "wc_reply_reviewers_avg": [ 49.25, 44.95205779494416 ], "wc_reply_authors_avg": [ 194.25, 200.88102822317492 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8710308994218202475&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "andrew.cmu.edu;cmu.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Parsimony or Capability? Decomposition Delivers Both in Long-term Time Series Forecasting", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93133", "id": "wiEHZSV15I", "proceeding": "", "pdf": "https://openreview.net/pdf?id=wiEHZSV15I", "openreview": "https://openreview.net/forum?id=wiEHZSV15I", "poster": "/media/PosterPDFs/NeurIPS%202024/93133.png?t=1730630856.8418543", "project": "", "author_site": "Jinliang Deng, Feiyang Ye, Du Yin, Xuan Song, Ivor Tsang, Hui Xiong", "tldr": "", "abstract": "Long-term time series forecasting (LTSF) represents a critical frontier in time series analysis, characterized by extensive input sequences, as opposed to the shorter spans typical of traditional approaches. While longer sequences inherently offer richer information for enhanced predictive precision, prevailing studies often respond by escalating model complexity. These intricate models can inflate into millions of parameters, resulting in prohibitive parameter scales. Our study demonstrates, through both theoretical and empirical evidence, that decomposition is key to containing excessive model inflation while achieving uniformly superior and robust results across various datasets. Remarkably, by tailoring decomposition to the intrinsic dynamics of time series data, our proposed model outperforms existing benchmarks, using over 99\\% fewer parameters than the majority of competing methods. Through this work, we aim to unleash the power of a restricted set of parameters by capitalizing on domain characteristics\u2014a timely reminder that in the realm of LTSF, bigger is not invariably better. The code is available at \\url{https://anonymous.4open.science/r/SSCNN-321D/}.", "keywords": "Time Series Forecasting;Spatial-temporal Prediction;Decomposition", "primary_area": "other", "supplementary_material": "/attachment/184d86b85600e98dcf3d662230d67da38288eca5.zip", "author": "Jinliang Deng;Feiyang Ye;Du Yin;Xuan Song;Ivor Tsang;Hui Xiong", "authorids": "~Jinliang_Deng1;~Feiyang_Ye4;~Du_Yin1;~Xuan_Song2;~Ivor_Tsang1;~Hui_Xiong1", "gender": "M;M;M;;;M", "homepage": "https://www.linkedin.com/in/jinliang-deng-a58714182/?originalSubdomain=hk;https://feiyang-ye.github.io/;https://github.com/lixus7;;;https://www.hkust-gz.edu.cn/people/hui-xiong/", "dblp": "299/5312;285/4704;;;;262/1686-1.html", "google_scholar": "oaoJ2AYAAAAJ;3EX25cAAAAAJ;;;;cVDF1tkAAAAJ", "orcid": "0000-0002-0759-947X;;;;;0000-0001-6016-6465", "linkedin": ";;;;;", "or_profile": "~Jinliang_Deng1;~Feiyang_Ye4;~Du_Yin1;~Xuan_Song2;~Ivor_Tsang1;~Hui_Xiong1", "aff": "University of Technology Sydney;University of Technology Sydney;University of New South Wales;;;Hong Kong University of Science and Technology (Guangzhou)", "aff_domain": "uts.edu.au;uts.edu.au;unsw.edu.au;;;hkust.edu", "position": "PhD student;PhD student;PhD student;;;Full Professor", "bibtex": "@inproceedings{\ndeng2024parsimony,\ntitle={Parsimony or Capability? Decomposition Delivers Both in Long-term Time Series Forecasting},\nauthor={Jinliang Deng and Feiyang Ye and Du Yin and Xuan Song and Ivor Tsang and Hui Xiong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=wiEHZSV15I}\n}", "github": "", "reviewers": "aS9R;a5mv;p9WT;aFLy;oTnm;UAfk", "pdf_size": 10010015, "rating": "6;6;7;7;7;7", "confidence": "4;2;4;4;4;4", "soundness": "3;2;3;3;3;4", "novelty": "3;3;3;3;3;3", "presentation": "3;2;3;3;4;3", "wc_summary": "70;189;14;77;68;79", "wc_strengths": "212;59;120;98;80;141", "wc_weaknesses": "179;209;173;65;135;22", "wc_questions": "2;105;2;97;13;41", "wc_limitations": "1;23;2;15;16;14", "wc_review": "464;585;311;352;312;297", "wc_reply_reviewers": "11;21;72;45;15;8", "wc_reply_authors": "34;34;77;81;19;19", "reply_reviewers": "1;1;2;2;1;1", "reply_authors": "2;2;3;3;2;2", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.7453559924999298 ], "soundness_avg": [ 3.0, 0.5773502691896257 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.5773502691896257 ], "wc_summary_avg": [ 82.83333333333333, 52.3495197897958 ], "wc_strengths_avg": [ 118.33333333333333, 49.48624949305502 ], "wc_weaknesses_avg": [ 130.5, 66.33689270182417 ], "wc_questions_avg": [ 43.333333333333336, 42.866718506967096 ], "wc_limitations_avg": [ 11.833333333333334, 7.861650943380503 ], "wc_review_avg": [ 386.8333333333333, 104.76866049645868 ], "wc_reply_reviewers_avg": [ 28.666666666666668, 22.83759084394752 ], "wc_reply_authors_avg": [ 44.0, 25.521232990067965 ], "reply_reviewers_avg": [ 1.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 31, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.6324555320336761, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13665882903510319239&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 2, "email": "uts.edu.au;uts.edu.au;unsw.edu.au;;;hkust.edu", "author_num": 6, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "University of Technology Sydney;University of New South Wales;Hong Kong University of Science and Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uts.edu.au;https://www.unsw.edu.au;https://www.ust.hk", "aff_unique_abbr": "UTS;UNSW;HKUST", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "Australia;China" }, { "title": "MonoMAE: Enhancing Monocular 3D Detection through Depth-Aware Masked Autoencoders", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93132", "id": "wiK6bwuxjE", "proceeding": "", "pdf": "https://openreview.net/pdf?id=wiK6bwuxjE", "openreview": "https://openreview.net/forum?id=wiK6bwuxjE", "poster": "/media/PosterPDFs/NeurIPS%202024/93132.png?t=1730120914.1638405", "project": "", "author_site": "Xueying Jiang, Sheng Jin, Xiaoqin Zhang, Ling Shao, Shijian Lu", "tldr": "", "abstract": "Monocular 3D object detection aims for precise 3D localization and identification of objects from a single-view image. Despite its recent progress, it often struggles while handling pervasive object occlusions that tend to complicate and degrade the prediction of object dimensions, depths, and orientations. We design MonoMAE, a monocular 3D detector inspired by Masked Autoencoders that addresses the object occlusion issue by masking and reconstructing objects in the feature space. MonoMAE consists of two novel designs. The first is depth-aware masking that selectively masks certain parts of non-occluded object queries in the feature space for simulating occluded object queries for network training. It masks non-occluded object queries by balancing the masked and preserved query portions adaptively according to the depth information. The second is lightweight query completion that works with the depth-aware masking to learn to reconstruct and complete the masked object queries. With the proposed feature-space occlusion and completion, MonoMAE learns enriched 3D representations that achieve superior monocular 3D detection performance qualitatively and quantitatively for both occluded and non-occluded objects. Additionally, MonoMAE learns generalizable representations that can work well in new domains.", "keywords": "Monocular 3D Object Detection;Masked Autoencoders", "primary_area": "machine_vision", "supplementary_material": "", "author": "Xueying Jiang;Sheng Jin;Xiaoqin Zhang;Ling Shao;Shijian Lu", "authorids": "~Xueying_Jiang1;~Sheng_Jin3;~Xiaoqin_Zhang4;~Ling_Shao1;~Shijian_Lu1", "gender": ";M;;M;M", "homepage": ";;;;https://personal.ntu.edu.sg/shijian.lu/", "dblp": ";70/6780-2;;;42/2718", "google_scholar": ";https://scholar.google.com/citations?view_op=list_works;;z84rLjoAAAAJ;https://scholar.google.com.sg/scholar?hl=en", "orcid": ";0000-0001-7254-1664;;;", "linkedin": ";;;;", "or_profile": "~Xueying_Jiang1;~Sheng_Jin3;~Xiaoqin_Zhang4;~Ling_Shao1;~Shijian_Lu1", "aff": ";Nanyang Technological University;;University of Chinese Academy of Sciences;Nanyang Technological University", "aff_domain": ";ntu.edu.sg;;ucas.ac.cn;ntu.edu.sg", "position": ";Postdoc;;Full Professor;Associate Professor", "bibtex": "@inproceedings{\njiang2024monomae,\ntitle={Mono{MAE}: Enhancing Monocular 3D Detection through Depth-Aware Masked Autoencoders},\nauthor={Xueying Jiang and Sheng Jin and Xiaoqin Zhang and Ling Shao and Shijian Lu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=wiK6bwuxjE}\n}", "github": "", "reviewers": "8dEd;bA8P;g4DX;xT7k", "pdf_size": 5072351, "rating": "5;6;6;6", "confidence": "4;5;5;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;2;3", "wc_summary": "45;98;61;83", "wc_strengths": "37;54;45;97", "wc_weaknesses": "161;20;62;186", "wc_questions": "5;112;55;31", "wc_limitations": "7;6;47;23", "wc_review": "255;290;270;420", "wc_reply_reviewers": "88;36;42;17", "wc_reply_authors": "50;15;38;15", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 71.75, 20.29008378494283 ], "wc_strengths_avg": [ 58.25, 23.166516786085904 ], "wc_weaknesses_avg": [ 107.25, 68.46668898084674 ], "wc_questions_avg": [ 50.75, 39.53716605929161 ], "wc_limitations_avg": [ 20.75, 16.588776326179094 ], "wc_review_avg": [ 308.75, 65.41932054064762 ], "wc_reply_reviewers_avg": [ 45.75, 26.080404521402652 ], "wc_reply_authors_avg": [ 29.5, 15.107944929738128 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2140243858649300245&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": ";ntu.edu.sg;;ucas.ac.cn;ntu.edu.sg", "author_num": 5, "aff_unique_index": "0;1;0", "aff_unique_norm": "Nanyang Technological University;University of Chinese Academy of Sciences", "aff_unique_dep": ";", "aff_unique_url": "https://www.ntu.edu.sg;http://www.ucas.ac.cn", "aff_unique_abbr": "NTU;UCAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Singapore;China" }, { "title": "Implicit Bias of Mirror Flow on Separable Data", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93131", "id": "wiMaws0FWB", "proceeding": "", "pdf": "https://openreview.net/pdf?id=wiMaws0FWB", "openreview": "https://openreview.net/forum?id=wiMaws0FWB", "poster": "", "project": "", "author_site": "Scott Pesme, Radu-Alexandru Dragomir, Nicolas Flammarion", "tldr": "", "abstract": "We examine the continuous-time counterpart of mirror descent, namely mirror flow, on classification problems which are linearly separable. Such problems are minimised \u2018at infinity\u2019 and have many possible solutions; we study which solution is preferred by the algorithm depending on the mirror potential. For exponential tailed losses and under mild assumptions on the potential, we show that the iterates converge in direction towards a $\\phi_\\infty$-maximum margin classifier. The function $\\phi_\\infty$ is the horizon function of the mirror potential and characterises its shape \u2018at infinity\u2019. When the potential is separable, a simple formula allows to compute this function. We analyse several examples of potentials and provide numerical experiments highlighting our results.", "keywords": "Implicit bias;Mirror descent;Classification", "primary_area": "optimization", "supplementary_material": "", "author": "Scott Pesme;Radu-Alexandru Dragomir;Nicolas Flammarion", "authorids": "~Scott_Pesme1;~Radu-Alexandru_Dragomir1;~Nicolas_Flammarion1", "gender": "M;M;M", "homepage": "https://scottpesme.github.io/;https://radualexandrudragomir.github.io;", "dblp": "268/7836;;164/7417", "google_scholar": "BwCLRb0AAAAJ;;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Scott_Pesme1;~Radu-Alexandru_Dragomir1;~Nicolas_Flammarion1", "aff": "Swiss Federal Institute of Technology Lausanne;T\u00e9l\u00e9com Paris;Swiss Federal Institute of Technology Lausanne", "aff_domain": "epfl.ch;telecom-paris.fr;epfl.ch", "position": "PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\npesme2024implicit,\ntitle={Implicit Bias of Mirror Flow on Separable Data},\nauthor={Scott Pesme and Radu-Alexandru Dragomir and Nicolas Flammarion},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=wiMaws0FWB}\n}", "github": "", "reviewers": "j6HU;Jqcr;hMTV;DbGr", "pdf_size": 2045725, "rating": "5;6;7;7", "confidence": "3;4;4;2", "soundness": "3;3;4;4", "novelty": "3;3;4;3", "presentation": "3;3;4;4", "wc_summary": "97;94;89;65", "wc_strengths": "100;82;48;84", "wc_weaknesses": "42;158;1;201", "wc_questions": "114;171;48;44", "wc_limitations": "1;2;1;4", "wc_review": "354;507;187;398", "wc_reply_reviewers": "10;16;10;20", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 86.25, 12.597122687344122 ], "wc_strengths_avg": [ 78.5, 18.940696924875812 ], "wc_weaknesses_avg": [ 100.5, 81.74503042999005 ], "wc_questions_avg": [ 94.25, 52.308579602202926 ], "wc_limitations_avg": [ 2.0, 1.224744871391589 ], "wc_review_avg": [ 361.5, 115.11841729280333 ], "wc_reply_reviewers_avg": [ 14.0, 4.242640687119285 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.0909090909090909, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7451104421037838162&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "email": "epfl.ch;telecom-paris.fr;epfl.ch", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Swiss Federal Institute of Technology Lausanne;T\u00e9l\u00e9com Paris", "aff_unique_dep": ";", "aff_unique_url": "https://www.epfl.ch;https://www.telecom-paris.fr", "aff_unique_abbr": "EPFL;T\u00e9l\u00e9com Paris", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Lausanne;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Switzerland;France" }, { "title": "$\\texttt{ConflictBank}$: A Benchmark for Evaluating the Influence of Knowledge Conflicts in LLMs", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97447", "id": "wjHVmgBDzc", "proceeding": "", "pdf": "https://openreview.net/pdf?id=wjHVmgBDzc", "openreview": "https://openreview.net/forum?id=wjHVmgBDzc", "poster": "", "project": "", "author_site": "Zhaochen Su, Jun Zhang, Xiaoye Qu, Tong Zhu, Yanshu Li, Jiashuo Sun, Juntao Li, Min Zhang, Yu Cheng", "tldr": "", "abstract": "Large language models (LLMs) have achieved\nimpressive advancements across numerous disciplines, yet the critical issue of knowledge conflicts, a major source of hallucinations, has rarely been studied. While a few research explored the conflicts between the inherent knowledge of LLMs and the retrieved contextual knowledge, a comprehensive assessment of knowledge conflict in LLMs is still missing. Motivated by this research gap, we firstly propose ConflictBank, the largest benchmark with 7.45M claim-evidence pairs and 553k QA pairs, addressing conflicts from misinformation, temporal discrepancies, and semantic divergences.\nUsing ConflictBank, we conduct the thorough and controlled experiments for a comprehensive understanding of LLM behavior in knowledge conflicts, focusing on three key aspects: (i) conflicts encountered in retrieved knowledge, (ii) conflicts within the models' encoded knowledge, and (iii) the interplay between these conflict forms.\nOur investigation delves into four model families and twelve LLM instances and provides insights into conflict types, model sizes, and the impact at different stages.\nWe believe that knowledge conflicts represent a critical bottleneck to achieving trustworthy artificial intelligence and hope our work will offer valuable guidance for future model training and development.\nResources are available at https://github.com/zhaochen0110/conflictbank.", "keywords": "knowledge conflict; hallucination; retrieval-augmented generation", "primary_area": "", "supplementary_material": "/attachment/36768cf3c1646ffc0f6d576cb3a10f5e80e64023.pdf", "author": "Zhaochen Su;Jun Zhang;Xiaoye Qu;Tong Zhu;Yanshu Li;Jiashuo Sun;Juntao Li;Min Zhang;Yu Cheng", "authorids": "~Zhaochen_Su1;~Jun_Zhang39;~Xiaoye_Qu1;~Tong_Zhu2;~Yanshu_Li1;~Jiashuo_Sun1;~Juntao_Li2;~Min_Zhang9;~Yu_Cheng1", "gender": "M;M;M;;M;M;M;M;M", "homepage": "https://zhaochen0110.github.io/;https://user.qzone.qq.com/2522247500;;;https://github.com/kaamava;https://github.com/gasolsun36;https://lijuntaopku.github.io/;https://zhangmin-nlp-ai.github.io/;https://ych133.github.io", "dblp": ";;229/8206;36/1469-2;;336/2528;;83/5342-5;96/3060-1.html", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;rT3hqdcAAAAJ;;;https://scholar.google.com.hk/citations?user=JCUiEM4AAAAJ;sZSygsYAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=en", "orcid": ";;;0000-0002-5433-8504;0009-0002-4054-606X;;0000-0002-6286-7529;;", "linkedin": "%E8%82%87%E8%BE%B0-%E8%8B%8F-4a103b295/;;%E6%99%93%E6%99%94-xiaoye-qu-%E7%9E%BF-8b9a0a133/;;yanshu-li-aa08282a6/;jiashuo-sun-b67857190/;;;chengyu05/", "or_profile": "~Zhaochen_Su1;~Jun_Zhang39;~Xiaoye_Qu1;~Tong_Zhu2;~Yanshu_Li1;~Jiashuo_Sun1;~Juntao_Li2;~Min_Zhang9;~Yu_Cheng1", "aff": "Soochow University;Soochow University;Shanghai Artificial Intelligence Laboratory;Soochow University, China;Suzhou University;Xiamen University;Soochow University, China;Harbin Institute of Technology, Shenzhen;The Chinese University of Hong Kong", "aff_domain": "suda.edu.cn;suda.edu.cn;pjlab.org.cn;suda.edu.cn;suda.edu.cn;xmu.edu.cn;suda.edu.cn;hit.edu.cn;cuhk.edu.hk", "position": "MS student;Undergrad student;Researcher;PhD student;Undergrad student;MS student;Associate Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nsu2024textttconflictbank,\ntitle={\\${\\textbackslash}texttt\\{ConflictBank\\}\\$: A Benchmark for Evaluating the Influence of Knowledge Conflicts in {LLM}s},\nauthor={Zhaochen Su and Jun Zhang and Xiaoye Qu and Tong Zhu and Yanshu Li and Jiashuo Sun and Juntao Li and Min Zhang and Yu Cheng},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=wjHVmgBDzc}\n}", "github": "", "reviewers": "xYZS;wkTi;Rqhs;MJhq", "pdf_size": 1040418, "rating": "4;5;7;8", "confidence": "4;4;3;3", "wc_summary_and_contributions": "101;26;33;57", "wc_strengths": "2;16;59;58", "wc_improvement": "2;3;103;85", "wc_limitations": "2;32;3;61", "wc_correctness": "2;1;8;1", "wc_clarity": "2;1;5;1", "wc_relation_to_prior_work": "2;1;1;1", "wc_documentation": "53;1;1;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "167;82;214;266", "wc_reply_reviewers": "0;35;129;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;2;0", "reply_authors": "5;1;3;3", "rating_avg": [ 6.0, 1.5811388300841898 ], "confidence_avg": [ 3.5, 0.5 ], "wc_summary_and_contributions_avg": [ 54.25, 29.337476033224124 ], "wc_strengths_avg": [ 33.75, 25.24257316519059 ], "wc_improvement_avg": [ 48.25, 46.19185534269001 ], "wc_limitations_avg": [ 24.5, 24.27447218787671 ], "wc_correctness_avg": [ 3.0, 2.9154759474226504 ], "wc_clarity_avg": [ 2.25, 1.6393596310755 ], "wc_relation_to_prior_work_avg": [ 1.25, 0.4330127018922193 ], "wc_documentation_avg": [ 14.0, 22.516660498395403 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 182.25, 67.64752397538287 ], "wc_reply_reviewers_avg": [ 41.0, 52.77783625727754 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 3.0, 1.4142135623730951 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.948683298050514, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "email": "suda.edu.cn;suda.edu.cn;pjlab.org.cn;suda.edu.cn;suda.edu.cn;xmu.edu.cn;suda.edu.cn;hit.edu.cn;cuhk.edu.hk", "author_num": 9, "aff_unique_index": "0;0;1;0;2;3;0;4;5", "aff_unique_norm": "Soochow University;Shanghai Artificial Intelligence Laboratory;Suzhou University;Xiamen University;Harbin Institute of Technology;Chinese University of Hong Kong", "aff_unique_dep": ";;;;;", "aff_unique_url": "https://www.soochow.edu.cn;http://www.shailab.org/;https://www.suda.edu.cn;https://www.xmu.edu.cn;http://en.hhit.edu.cn/;https://www.cuhk.edu.hk", "aff_unique_abbr": "Soochow U;Shanghai AI Lab;Suda;XMU;HIT;CUHK", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Shenzhen;Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "TSDS: Data Selection for Task-Specific Model Finetuning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93130", "id": "wjbTHLUSzU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=wjbTHLUSzU", "openreview": "https://openreview.net/forum?id=wjbTHLUSzU", "poster": "/media/PosterPDFs/NeurIPS%202024/93130.png?t=1733813665.883709", "project": "", "author_site": "Zifan Liu, Amin Karbasi, Theodoros Rekatsinas", "tldr": "", "abstract": "Finetuning foundation models for specific tasks is an emerging paradigm in modern machine learning. The efficacy of task-specific finetuning largely depends on the selection of appropriate training data. We present TSDS (Task-Specific Data Selection), a framework to select data for task-specific model finetuning, guided by a small but representative set of examples from the target task. To do so, we formulate data selection for task-specific finetuning as an optimization problem with a distribution alignment loss based on optimal transport to capture the discrepancy between the selected data and the target distribution. In addition, we add a regularizer to encourage the diversity of the selected data and incorporate kernel density estimation into the regularizer to reduce the negative effects of near-duplicates among the candidate data.\nWe connect our optimization problem to nearest neighbor search and design efficient algorithms to compute the optimal solution based on approximate nearest neighbor search techniques.\nWe evaluate our method on data selection for both continued pretraining and instruction tuning of language models.\nWe show that instruction tuning using data selected by our method with a 1\\% selection ratio often outperforms using the full dataset and beats the baseline selection methods by 1.5 points in F1 score on average.", "keywords": "data selection;finetuning;foundation model;large language model;optimal transport", "primary_area": "generative_models", "supplementary_material": "/attachment/ff8e18ce48f0707d2b3422190a6e6fc6a9117f67.zip", "author": "Zifan Liu;Amin Karbasi;Theodoros Rekatsinas", "authorids": "~Zifan_Liu2;~Amin_Karbasi3;~Theodoros_Rekatsinas2", "gender": "M;M;M", "homepage": "https://zifanl.github.io/;http://seas.yale.edu/faculty-research/faculty-directory/amin-karbasi;https://thodrek.github.io", "dblp": "30/2761;49/7411;51/11411", "google_scholar": "YKI_uJcAAAAJ;https://scholar.google.com.tw/citations?user=VusVB38AAAAJ;https://scholar.google.com.tw/citations?user=ZUzG5igAAAAJ", "orcid": ";;", "linkedin": "zifan-liu-cs/;;", "or_profile": "~Zifan_Liu2;~amin_karbasi1;~Theodoros_Rekatsinas1", "aff": "University of Wisconsin - Madison;Google;Apple", "aff_domain": "wisc.edu;google.com;apple.com", "position": "PhD student;Researcher;Researcher", "bibtex": "@inproceedings{\nliu2024tsds,\ntitle={{TSDS}: Data Selection for Task-Specific Model Finetuning},\nauthor={Zifan Liu and Amin Karbasi and Theodoros Rekatsinas},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=wjbTHLUSzU}\n}", "github": "", "reviewers": "CE8N;rjVY;mg1e;zKiE", "pdf_size": 574474, "rating": "3;5;5;8", "confidence": "5;3;3;2", "soundness": "2;2;3;4", "novelty": "2;3;3;3", "presentation": "3;2;3;4", "wc_summary": "68;55;142;88", "wc_strengths": "47;65;203;46", "wc_weaknesses": "167;221;206;71", "wc_questions": "6;2;196;23", "wc_limitations": "1;25;27;4", "wc_review": "289;368;774;232", "wc_reply_reviewers": "0;19;0;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;0;0", "reply_authors": "1;1;1;1", "rating_avg": [ 5.25, 1.7853571071357126 ], "confidence_avg": [ 3.25, 1.0897247358851685 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 88.25, 33.1841453106751 ], "wc_strengths_avg": [ 90.25, 65.53386529116072 ], "wc_weaknesses_avg": [ 166.25, 58.41821205754247 ], "wc_questions_avg": [ 56.75, 80.78172751309543 ], "wc_limitations_avg": [ 14.25, 11.818946653572814 ], "wc_review_avg": [ 415.75, 212.39865230269234 ], "wc_reply_reviewers_avg": [ 4.75, 8.227241335952167 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.9316142209946916, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16847222900372303302&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "wisc.edu;google.com;apple.com", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Wisconsin-Madison;Google;Apple", "aff_unique_dep": ";Google;Apple Inc.", "aff_unique_url": "https://www.wisc.edu;https://www.google.com;https://www.apple.com", "aff_unique_abbr": "UW-Madison;Google;Apple", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Madison;Mountain View;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Scaling White-Box Transformers for Vision", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93129", "id": "wkwGedn19x", "proceeding": "", "pdf": "https://openreview.net/pdf?id=wkwGedn19x", "openreview": "https://openreview.net/forum?id=wkwGedn19x", "poster": "/media/PosterPDFs/NeurIPS%202024/93129.png?t=1733173685.784244", "project": "", "author_site": "Jinrui Yang, Xianhang Li, Druv Pai, Yuyin Zhou, Yi Ma, Yaodong Yu, Cihang Xie", "tldr": "", "abstract": "CRATE, a white-box transformer architecture designed to learn compressed and sparse representations, offers an intriguing alternative to standard vision transformers (ViTs) due to its inherent mathematical interpretability. Despite extensive investigations into the scaling behaviors of language and vision transformers, the scalability of CRATE remains an open question which this paper aims to address. \nSpecifically, we propose CRATE-$\\alpha$, featuring strategic yet minimal modifications to the sparse coding block in the CRATE architecture design, and a light training recipe designed to improve the scalability of CRATE.\nThrough extensive experiments, we demonstrate that CRATE-$\\alpha$ can effectively scale with larger model sizes and datasets. \nFor example, our CRATE-$\\alpha$-B substantially outperforms the prior best CRATE-B model accuracy on ImageNet classification by 3.7%, achieving an accuracy of 83.2%. Meanwhile, when scaling further, our CRATE-$\\alpha$-L obtains an ImageNet classification accuracy of 85.1%. More notably, these model performance improvements are achieved while preserving, and potentially even enhancing the interpretability of learned CRATE models, as we demonstrate through showing that the learned token representations of increasingly larger trained CRATE-$\\alpha$ models yield increasingly higher-quality unsupervised object segmentation of images.", "keywords": "white-box deep neural networks;representation learning;transformer;sparse coding;scaling", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/9607b591487ef91382d20dc2c8ff1691fb320a1f.zip", "author": "Jinrui Yang;Xianhang Li;Druv Pai;Yuyin Zhou;Yi Ma;Yaodong Yu;Cihang Xie", "authorids": "~Jinrui_Yang2;~Xianhang_Li1;~Druv_Pai1;~Yuyin_Zhou1;~Yi_Ma4;~Yaodong_Yu4;~Cihang_Xie3", "gender": "M;M;M;;M;M;M", "homepage": ";https://xhl-video.github.io/xianhangli/;https://druvpai.github.io;https://yuyinzhou.github.io/;http://people.eecs.berkeley.edu/~yima/;https://yaodongyu.github.io;https://cihangxie.github.io/", "dblp": "272/1122;268/5945;;192/1413;;;175/3366", "google_scholar": "8iBtvCQAAAAJ;YKpFz4YAAAAJ;https://scholar.google.com/citations?hl=en;eiqVLC0AAAAJ;https://scholar.google.com.hk/citations?user=XqLiBQMAAAAJ;bZ9oyW8AAAAJ;X3vVZPcAAAAJ", "orcid": ";;;;;;", "linkedin": ";;https://linkedin.com/in/druvpai;;;;", "or_profile": "~Jinrui_Yang2;~Xianhang_Li1;~Druv_Pai1;~Yuyin_Zhou1;~Yi_Ma4;~Yaodong_Yu4;~cihang_xie1", "aff": "University of California, Santa Cruz;University of California, Santa Cruz;Electrical Engineering & Computer Science Department, University of California, Berkeley;University of California, Santa Cruz;University of California, Berkeley;Electrical Engineering & Computer Science Department, University of California Berkeley;University of California, Santa Cruz", "aff_domain": "ucsc.edu;ucsc.edu;eecs.berkeley.edu;ucsc.edu;berkeley.edu;eecs.berkeley.edu;ucsc.edu", "position": "PhD student;PhD student;PhD student;Assistant Professor;Full Professor;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nyang2024scaling,\ntitle={Scaling White-Box Transformers for Vision},\nauthor={Jinrui Yang and Xianhang Li and Druv Pai and Yuyin Zhou and Yi Ma and Yaodong Yu and Cihang Xie},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=wkwGedn19x}\n}", "github": "", "reviewers": "yemz;cPqJ;8F4z;YH3U", "pdf_size": 5904281, "rating": "5;6;6;7", "confidence": "4;4;3;4", "soundness": "3;3;2;3", "novelty": "2;2;3;3", "presentation": "3;2;3;3", "wc_summary": "59;72;53;81", "wc_strengths": "58;106;77;122", "wc_weaknesses": "148;195;68;43", "wc_questions": "20;206;2;3", "wc_limitations": "1;6;1;1", "wc_review": "286;585;201;250", "wc_reply_reviewers": "51;591;10;0", "wc_reply_authors": "43;1662;0;0", "reply_reviewers": "1;3;1;0", "reply_authors": "2;5;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 66.25, 10.940178243520533 ], "wc_strengths_avg": [ 90.75, 24.853319697778804 ], "wc_weaknesses_avg": [ 113.5, 60.97745485013293 ], "wc_questions_avg": [ 57.75, 85.89055535971345 ], "wc_limitations_avg": [ 2.25, 2.165063509461097 ], "wc_review_avg": [ 330.5, 150.00083333101853 ], "wc_reply_reviewers_avg": [ 163.0, 247.84370074706356 ], "wc_reply_authors_avg": [ 426.25, 713.6765286178326 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 2.25, 1.6393596310755 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2830208699621022922&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "ucsc.edu;ucsc.edu;eecs.berkeley.edu;ucsc.edu;berkeley.edu;eecs.berkeley.edu;ucsc.edu", "author_num": 7, "aff_unique_index": "0;0;1;0;1;1;0", "aff_unique_norm": "University of California, Santa Cruz;University of California, Berkeley", "aff_unique_dep": ";Electrical Engineering & Computer Science Department", "aff_unique_url": "https://www.ucsc.edu;https://www.berkeley.edu", "aff_unique_abbr": "UCSC;UC Berkeley", "aff_campus_unique_index": "0;0;1;0;1;1;0", "aff_campus_unique": "Santa Cruz;Berkeley", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Learning Infinitesimal Generators of Continuous Symmetries from Data", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93128", "id": "wl44W8xpc7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=wl44W8xpc7", "openreview": "https://openreview.net/forum?id=wl44W8xpc7", "poster": "/media/PosterPDFs/NeurIPS%202024/93128.png?t=1731587004.1601503", "project": "", "author_site": "Gyeonghoon Ko, Hyunsu Kim, Juho Lee", "tldr": "", "abstract": "Exploiting symmetry inherent in data can significantly improve the sample efficiency of a learning procedure and the generalization of learned models. When data clearly reveals underlying symmetry, leveraging this symmetry can naturally inform the design of model architectures or learning strategies. Yet, in numerous real-world scenarios, identifying the specific symmetry within a given data distribution often proves ambiguous. To tackle this, some existing works learn symmetry in a data-driven manner, parameterizing and learning expected symmetry through data. However, these methods often rely on explicit knowledge, such as pre-defined Lie groups, which are typically restricted to linear or affine transformations. In this paper, we propose a novel symmetry learning algorithm based on transformations defined with one-parameter groups, continuously parameterized transformations flowing along the directions of vector fields called infinitesimal generators. Our method is built upon minimal inductive biases, encompassing not only commonly utilized symmetries rooted in Lie groups but also extending to symmetries derived from nonlinear generators. To learn these symmetries, we introduce a notion of a validity score that examine whether the transformed data is still valid for the given task. The validity score is designed to be fully differentiable and easily computable, enabling effective searches for transformations that achieve symmetries innate to the data. We apply our method mainly in two domains: image data and partial differential equations, and demonstrate its advantages. Our codes are available at \\url{https://github.com/kogyeonghoon/learning-symmetry-from-scratch.git}.", "keywords": "Symmetry Discovery;Geometric Deep Learning;Lie Point Symmetry;Neural PDE Solver", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "/attachment/aeb7374cb0032635f43721b9aba30963b4ee8076.zip", "author": "Gyeonghoon Ko;Hyunsu Kim;Juho Lee", "authorids": "~Gyeonghoon_Ko1;~Hyunsu_Kim2;~Juho_Lee2", "gender": "M;M;M", "homepage": "https://github.com/kogyeonghoon;https://kim-hyunsu.github.io/;https://juho.lee.github.io", "dblp": "391/4998;;55/3410-1", "google_scholar": ";;Py4URJUAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Gyeonghoon_Ko1;~Hyunsu_Kim2;~Juho_Lee2", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "position": "MS student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nko2024learning,\ntitle={Learning Infinitesimal Generators of Continuous Symmetries from Data},\nauthor={Gyeonghoon Ko and Hyunsu Kim and Juho Lee},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=wl44W8xpc7}\n}", "github": "", "reviewers": "4Qv3;7EZa;UU8r", "pdf_size": 5905618, "rating": "5;6;6", "confidence": "3;4;3", "soundness": "3;2;2", "novelty": "2;3;3", "presentation": "3;3;2", "wc_summary": "46;203;118", "wc_strengths": "17;188;50", "wc_weaknesses": "3;797;49", "wc_questions": "565;179;181", "wc_limitations": "1;32;73", "wc_review": "632;1399;471", "wc_reply_reviewers": "0;197;0", "wc_reply_authors": "0;170;0", "reply_reviewers": "0;2;0", "reply_authors": "1;2;1", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 122.33333333333333, 64.1681818002938 ], "wc_strengths_avg": [ 85.0, 74.06753674856482 ], "wc_weaknesses_avg": [ 283.0, 363.9377236103268 ], "wc_questions_avg": [ 308.3333333333333, 181.49257713624422 ], "wc_limitations_avg": [ 35.333333333333336, 29.48822740612863 ], "wc_review_avg": [ 834.0, 404.885992183808 ], "wc_reply_reviewers_avg": [ 65.66666666666667, 92.86669059583322 ], "wc_reply_authors_avg": [ 56.666666666666664, 80.13876853447539 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.9428090415820634 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7064865455933249091&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "Efficient LLM Scheduling by Learning to Rank", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93127", "id": "wlLjYl0Gi6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=wlLjYl0Gi6", "openreview": "https://openreview.net/forum?id=wlLjYl0Gi6", "poster": "/media/PosterPDFs/NeurIPS%202024/93127.png?t=1731662562.651862", "project": "", "author_site": "Yichao Fu, Siqi Zhu, Runlong Su, Aurick Qiao, Ion Stoica, Hao Zhang", "tldr": "", "abstract": "In Large Language Model (LLM) inference, the output length of an LLM request is typically regarded as not known a priori. Consequently, most LLM serving systems employ a simple First-come-first-serve (FCFS) scheduling strategy, leading to Head-Of-Line (HOL) blocking and reduced throughput and service quality. \nIn this paper, we reexamine this assumption -- we show that, although predicting the exact generation length of each request is infeasible, it is possible to predict the relative ranks of output lengths in a batch of requests, using learning to rank. The ranking information offers valuable guidance for scheduling requests. Building on this insight, we develop a novel scheduler for LLM inference and serving that can approximate the shortest-job-first (SJF) schedule better than existing approaches. We integrate this scheduler with the state-of-the-art LLM serving system and show significant performance improvement in several important applications: 2.8x lower latency in chatbot serving and 6.5x higher throughput in synthetic data generation. Our code is available at https://github.com/hao-ai-lab/vllm-ltr.git", "keywords": "Large Language Models", "primary_area": "infrastructure", "supplementary_material": "", "author": "Yichao Fu;Siqi Zhu;Runlong Su;Aurick Qiao;Ion Stoica;Hao Zhang", "authorids": "~Yichao_Fu1;~Siqi_Zhu1;~Runlong_Su1;~Aurick_Qiao1;~Ion_Stoica1;~Hao_Zhang2", "gender": ";;M;M;M;M", "homepage": ";;;;http://people.eecs.berkeley.edu/~istoica/;https://cseweb.ucsd.edu/~haozhang/", "dblp": ";;;141/2530;s/IonStoica;55/2270-25", "google_scholar": ";;;DOM7ToIAAAAJ;vN-is70AAAAJ;H1d4BS8AAAAJ", "orcid": ";;;0009-0004-9119-8696;;", "linkedin": ";;runlong-su-b1173a292/;aurickq/;ionstoica;", "or_profile": "~Yichao_Fu1;~Siqi_Zhu1;~Runlong_Su1;~Aurick_Qiao1;~Ion_Stoica1;~Hao_Zhang2", "aff": ";;University of California, San Diego;Snowflake;University of California, Berkeley;Carnegie Mellon University", "aff_domain": ";;ucsd.edu;snowflake.com;berkeley.edu;cmu.edu", "position": ";;MS student;Researcher;Full Professor;PhD student", "bibtex": "@inproceedings{\nfu2024efficient,\ntitle={Efficient {LLM} Scheduling by Learning to Rank},\nauthor={Yichao Fu and Siqi Zhu and Runlong Su and Aurick Qiao and Ion Stoica and Hao Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=wlLjYl0Gi6}\n}", "github": "", "reviewers": "RLWW;JCjf;GSuM;GoTD", "pdf_size": 1233879, "rating": "4;6;6;6", "confidence": "4;4;3;3", "soundness": "2;3;3;3", "novelty": "2;3;2;3", "presentation": "3;3;3;3", "wc_summary": "70;24;45;69", "wc_strengths": "102;29;43;52", "wc_weaknesses": "180;90;110;25", "wc_questions": "15;5;35;57", "wc_limitations": "2;5;3;4", "wc_review": "369;153;236;207", "wc_reply_reviewers": "0;32;14;50", "wc_reply_authors": "0;37;35;345", "reply_reviewers": "0;1;1;2", "reply_authors": "1;2;2;3", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 52.0, 19.013153341831543 ], "wc_strengths_avg": [ 56.5, 27.518175811634027 ], "wc_weaknesses_avg": [ 101.25, 55.26922742358536 ], "wc_questions_avg": [ 28.0, 19.924858845171276 ], "wc_limitations_avg": [ 3.5, 1.118033988749895 ], "wc_review_avg": [ 241.25, 79.5436201087177 ], "wc_reply_reviewers_avg": [ 24.0, 18.81488772222678 ], "wc_reply_authors_avg": [ 104.25, 139.77370103134567 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3131110958694883622&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 4, "email": ";;ucsd.edu;snowflake.com;berkeley.edu;cmu.edu", "author_num": 6, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "University of California, San Diego;Snowflake Inc.;University of California, Berkeley;Carnegie Mellon University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.ucsd.edu;https://www.snowflake.com;https://www.berkeley.edu;https://www.cmu.edu", "aff_unique_abbr": "UCSD;Snowflake;UC Berkeley;CMU", "aff_campus_unique_index": "0;2", "aff_campus_unique": "San Diego;;Berkeley", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Advancing Training Efficiency of Deep Spiking Neural Networks through Rate-based Backpropagation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93126", "id": "wlcm21C4nk", "proceeding": "", "pdf": "https://openreview.net/pdf?id=wlcm21C4nk", "openreview": "https://openreview.net/forum?id=wlcm21C4nk", "poster": "/media/PosterPDFs/NeurIPS%202024/93126.png?t=1731363037.5928252", "project": "", "author_site": "Chengting Yu, Lei Liu, Gaoang Wang, Erping Li, Aili Wang", "tldr": "", "abstract": "Recent insights have revealed that rate-coding is a primary form of information representation captured by surrogate-gradient-based Backpropagation Through Time (BPTT) in training deep Spiking Neural Networks (SNNs). Motivated by these findings, we propose rate-based backpropagation, a training strategy specifically designed to exploit rate-based representations to reduce the complexity of BPTT. Our method minimizes reliance on detailed temporal derivatives by focusing on averaged dynamics, streamlining the computational graph to reduce memory and computational demands of SNNs training. We substantiate the rationality of the gradient approximation between BPTT and the proposed method through both theoretical analysis and empirical observations. Comprehensive experiments on CIFAR-10, CIFAR-100, ImageNet, and CIFAR10-DVS validate that our method achieves comparable performance to BPTT counterparts, and surpasses state-of-the-art efficient training techniques. By leveraging the inherent benefits of rate-coding, this work sets the stage for more scalable and efficient SNNs training within resource-constrained environments.", "keywords": "Spiking Neural Networks;Training Method;Training Efficiency Optimization", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "/attachment/a4775862497eb748b75db7be1d6984317a1abd5b.zip", "author": "Chengting Yu;Lei Liu;Gaoang Wang;Erping Li;Aili Wang", "authorids": "~Chengting_Yu1;~Lei_Liu26;~Gaoang_Wang2;~Erping_Li1;~Aili_Wang2", "gender": "M;M;M;M;F", "homepage": ";https://github.com/linghushaoxia007;https://person.zju.edu.cn/en/gaoangwang;https://person.zju.edu.cn/en/liep;https://person.zju.edu.cn/en/ailiwang", "dblp": "305/8526;;176/7523;80/4276-1;95/3819-2", "google_scholar": "7oeTzcwAAAAJ;;GhsXNiwAAAAJ;;WurRhGMAAAAJ", "orcid": ";;;;0000-0002-1019-4019", "linkedin": ";;;;aili-wang-10b3b187/", "or_profile": "~Chengting_Yu1;~Lei_Liu26;~Gaoang_Wang2;~Erping_Li1;~Aili_Wang2", "aff": "Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn;intl.zju.edu.cn;zju.edu.cn;zju.edu.cn", "position": "PhD student;MS student;Assistant Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nyu2024advancing,\ntitle={Advancing Training Efficiency of Deep Spiking Neural Networks through Rate-based Backpropagation},\nauthor={Chengting Yu and Lei Liu and Gaoang Wang and Erping Li and Aili Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=wlcm21C4nk}\n}", "github": "", "reviewers": "YGtz;B3hA;TjVW;Fw27", "pdf_size": 5640885, "rating": "5;6;6;7", "confidence": "4;4;5;4", "soundness": "3;3;3;3", "novelty": "3;2;2;3", "presentation": "2;3;3;3", "wc_summary": "51;67;37;102", "wc_strengths": "47;38;23;25", "wc_weaknesses": "48;25;84;38", "wc_questions": "152;213;29;47", "wc_limitations": "4;13;3;11", "wc_review": "302;356;176;223", "wc_reply_reviewers": "0;0;29;14", "wc_reply_authors": "277;0;0;0", "reply_reviewers": "0;0;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 64.25, 24.24226680820092 ], "wc_strengths_avg": [ 33.25, 9.807522622966516 ], "wc_weaknesses_avg": [ 48.75, 21.924586655168667 ], "wc_questions_avg": [ 110.25, 75.66827274360107 ], "wc_limitations_avg": [ 7.75, 4.322904116447646 ], "wc_review_avg": [ 264.25, 69.52112988149717 ], "wc_reply_reviewers_avg": [ 10.75, 11.986972094736853 ], "wc_reply_authors_avg": [ 69.25, 119.94451842414475 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16386172436571067477&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "zju.edu.cn;zju.edu.cn;intl.zju.edu.cn;zju.edu.cn;zju.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Zhejiang University", "aff_unique_dep": "", "aff_unique_url": "https://www.zju.edu.cn", "aff_unique_abbr": "ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Reinforcement Learning with Lookahead Information", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93125", "id": "wlqfOvlTQz", "proceeding": "", "pdf": "https://openreview.net/pdf?id=wlqfOvlTQz", "openreview": "https://openreview.net/forum?id=wlqfOvlTQz", "poster": "/media/PosterPDFs/NeurIPS%202024/93125.png?t=1732870168.9353373", "project": "", "tldr": "", "abstract": "We study reinforcement learning (RL) problems in which agents observe the reward or transition realizations at their current state _before deciding which action to take_. Such observations are available in many applications, including transactions, navigation and more. When the environment is known, previous work shows that this lookahead information can drastically increase the collected reward. However, outside of specific applications, existing approaches for interacting with unknown environments are not well-adapted to these observations. In this work, we close this gap and design provably-efficient learning algorithms able to incorporate lookahead information. To achieve this, we perform planning using the empirical distribution of the reward and transition observations, in contrast to vanilla approaches that only rely on estimated expectations. We prove that our algorithms achieve tight regret versus a baseline that also has access to lookahead information -- linearly increasing the amount of collected reward compared to agents that cannot handle lookahead information.", "keywords": "Reinforcement Learning;Regret Minimization;Lookahead", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Nadav Merlis", "authorids": "~Nadav_Merlis1", "gender": "M", "homepage": "", "dblp": "227/2875", "google_scholar": "https://scholar.google.co.il/citations?user=pX2zzp0AAAAJ", "orcid": "0000-0002-9906-0577", "linkedin": "", "or_profile": "~Nadav_Merlis1", "aff": "Ecole Nationale de la Statistique et de l'Administration Economique", "aff_domain": "ensae.fr", "position": "Postdoc", "bibtex": "@inproceedings{\nmerlis2024reinforcement,\ntitle={Reinforcement Learning with Lookahead Information},\nauthor={Nadav Merlis},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=wlqfOvlTQz}\n}", "github": "", "reviewers": "CUjN;MArZ;Dj8h;V5fX;49N6", "pdf_size": 697420, "rating": "5;5;6;6;7", "confidence": "3;4;1;3;3", "soundness": "3;3;3;2;3", "novelty": "3;2;3;3;3", "presentation": "3;3;4;3;3", "wc_summary": "67;71;133;27;60", "wc_strengths": "24;62;70;26;112", "wc_weaknesses": "65;211;46;13;25", "wc_questions": "73;46;14;64;117", "wc_limitations": "1;5;28;17;6", "wc_review": "230;395;291;147;320", "wc_reply_reviewers": "0;0;0;13;115", "wc_reply_authors": "0;0;0;0;48", "reply_reviewers": "0;0;0;1;1", "reply_authors": "1;1;1;1;2", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 2.8, 0.9797958971132712 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 71.6, 34.39534852272905 ], "wc_strengths_avg": [ 58.8, 32.41234332781263 ], "wc_weaknesses_avg": [ 72.0, 71.74398929527128 ], "wc_questions_avg": [ 62.8, 33.78401989106684 ], "wc_limitations_avg": [ 11.4, 9.850888284819803 ], "wc_review_avg": [ 276.6, 83.75822347686226 ], "wc_reply_reviewers_avg": [ 25.6, 44.98266332710859 ], "wc_reply_authors_avg": [ 9.6, 19.2 ], "reply_reviewers_avg": [ 0.4, 0.48989794855663565 ], "reply_authors_avg": [ 1.2, 0.4 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": -0.3273268353539886, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:YG5xWkku-EYJ:scholar.google.com/&scioq=Reinforcement+Learning+with+Lookahead+Information&hl=en&as_sdt=0,5", "gs_version_total": 8, "email": "ensae.fr", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "Ecole Nationale de la Statistique et de l'Administration Economique", "aff_unique_dep": "", "aff_unique_url": "https://ensae.fr", "aff_unique_abbr": "ENSAE", "aff_country_unique_index": "0", "aff_country_unique": "France" }, { "title": "An Analysis of Tokenization: Transformers under Markov Data", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93124", "id": "wm9JZq7RCe", "proceeding": "", "pdf": "https://openreview.net/pdf?id=wm9JZq7RCe", "openreview": "https://openreview.net/forum?id=wm9JZq7RCe", "poster": "", "project": "", "author_site": "Nived Rajaraman, Jiantao Jiao, Kannan Ramchandran", "tldr": "", "abstract": "While there has been a large body of research attempting to circumvent tokenization for language modeling (Clark et al. 2022, Xue et al. 2022), the current consensus is that it is a necessary initial step for designing state-of-the-art performant language models. In this paper, we investigate tokenization from a theoretical point of view by studying the behavior of transformers on simple data generating processes. When trained on data drawn from certain simple $k^{\\text{th}}$-order Markov processes for $k > 1$, transformers exhibit a surprising phenomenon - in the absence of tokenization, they empirically are incredibly slow or fail to learn the right distribution and predict characters according to a unigram model (Makkuva et al. 2024). With the addition of tokenization, however, we empirically observe that transformers break through this barrier and are able to model the probabilities of sequences drawn from the source near-optimally, achieving small cross-entropy loss. With this observation as starting point, we study the end-to-end cross-entropy loss achieved by transformers with and without tokenization. With the appropriate tokenization, we show that even the simplest unigram models (over tokens) learnt by transformers are able to model the probability of sequences drawn from $k^{\\text{th}}$-order Markov sources near optimally. Our analysis provides a justification for the use of tokenization in practice through studying the behavior of transformers on Markovian data.", "keywords": "Tokenization;LLMs;interpretability", "primary_area": "interpretability_and_explainability", "supplementary_material": "/attachment/33138127e7d0632e5c20d0dc8606b9fbf98c1c41.zip", "author": "Nived Rajaraman;Jiantao Jiao;Kannan Ramchandran", "authorids": "~Nived_Rajaraman1;~Jiantao_Jiao1;~Kannan_Ramchandran1", "gender": "M;M;M", "homepage": "https://people.eecs.berkeley.edu/~nived.rajaraman/;https://scholar.google.com/citations?user=aO8KpGcAAAAJ&hl=en;https://www.eecs.berkeley.edu/~kannanr/", "dblp": "229/4215;43/8919;53/5765", "google_scholar": "7hb2BM8AAAAJ;aO8KpGcAAAAJ;https://scholar.google.com.tw/citations?user=DcV-5RAAAAAJ", "orcid": ";;0000-0002-4567-328X", "linkedin": ";;", "or_profile": "~Nived_Rajaraman1;~Jiantao_Jiao1;~Kannan_Ramchandran1", "aff": "University of California, Berkeley;University of California, Berkeley;University of California, Berkeley", "aff_domain": "berkeley.edu;berkeley.edu;berkeley.edu", "position": "PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nrajaraman2024an,\ntitle={An Analysis of Tokenization: Transformers under Markov Data},\nauthor={Nived Rajaraman and Jiantao Jiao and Kannan Ramchandran},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=wm9JZq7RCe}\n}", "github": "", "reviewers": "zrKY;veC3;RvTD;88v6", "pdf_size": 915706, "rating": "6;6;7;7", "confidence": "2;3;3;3", "soundness": "3;4;3;3", "novelty": "3;2;3;3", "presentation": "2;3;3;2", "wc_summary": "240;142;85;48", "wc_strengths": "104;56;36;78", "wc_weaknesses": "160;144;94;153", "wc_questions": "296;72;6;225", "wc_limitations": "4;29;1;50", "wc_review": "804;443;222;554", "wc_reply_reviewers": "32;23;31;33", "wc_reply_authors": "0;0;0;100", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 128.75, 72.43402170251214 ], "wc_strengths_avg": [ 68.5, 25.31304011769428 ], "wc_weaknesses_avg": [ 137.75, 25.8879798362097 ], "wc_questions_avg": [ 149.75, 115.93182263727246 ], "wc_limitations_avg": [ 21.0, 19.96246477767713 ], "wc_review_avg": [ 505.75, 209.60245108299665 ], "wc_reply_reviewers_avg": [ 29.75, 3.960744879438715 ], "wc_reply_authors_avg": [ 25.0, 43.30127018922193 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Fdm5Sd_TOEIJ:scholar.google.com/&scioq=An+Analysis+of+Tokenization:+Transformers+under+Markov+Data&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "berkeley.edu;berkeley.edu;berkeley.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "LLMCBench: Benchmarking Large Language Model Compression for Efficient Deployment", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97446", "id": "wmO7z57wNK", "proceeding": "", "pdf": "https://openreview.net/pdf?id=wmO7z57wNK", "openreview": "https://openreview.net/forum?id=wmO7z57wNK", "poster": "/media/PosterPDFs/NeurIPS%202024/97446.png?t=1731315805.5282753", "project": "", "author_site": "Jinyang Guo, Ge Yang, Changyi He, Jianyu Wu, Yifu Ding, Aishan Liu, Haotong Qin, Pengliang Ji, Xianglong Liu", "tldr": "", "abstract": "Although large language models (LLMs) have demonstrated their strong intelligence ability, the high demand for computation and storage hinders their practical application. To this end, many model compression techniques are proposed to increase the efficiency of LLMs. However, current researches only validate their methods on limited models, datasets, metrics, etc, and still lack a comprehensive evaluation under more general scenarios. So it is still a question of which model compression approach we should use under a specific case. To mitigate this gap, we present the Large Language Model Compression Benchmark (LLMCBench), a rigorously designed benchmark with an in-depth analysis for LLM compression algorithms. We first analyze the actual model production requirements and carefully design evaluation tracks and metrics. Then, we conduct extensive experiments and comparison using multiple mainstream LLM compression approaches. Finally, we perform an in-depth analysis based on the evaluation and provide useful insight for LLM compression design. We hope our LLMCBench can contribute insightful suggestions for LLM compression algorithm design and serve as a foundation for future research.", "keywords": "Model compression;benchmark", "primary_area": "", "supplementary_material": "/attachment/70cc15d5d7d277b3d10b2627c8543bfec48e8a1c.zip", "author": "Ge Yang;Changyi He;Jinyang Guo;Jianyu Wu;Yifu Ding;Aishan Liu;Haotong Qin;Pengliang Ji;Xianglong Liu", "authorids": "~Ge_Yang5;~Changyi_He1;~Jinyang_Guo1;~Jianyu_Wu2;~Yifu_Ding2;~Aishan_Liu1;~Haotong_Qin1;~Pengliang_Ji1;~Xianglong_Liu3", "gender": "M;M;M;M;F;M;M;;", "homepage": ";;https://jinyangguo.github.io/;https://uanu2002.github.io/;https://yifu-ding.github.io/;https://liuaishan.github.io/;https://htqin.github.io/;;", "dblp": ";;;;;177/5658;262/3626.html;;", "google_scholar": "OQ3u-S4AAAAJ;IUCo1rwAAAAJ;uJGeT1AAAAAJ;WmA0pzkAAAAJ;RCEI1r0AAAAJ;88tzr_sAAAAJ;mK6n-KgAAAAJ;;", "orcid": "0009-0001-0635-0197;;;0009-0006-9173-1766;0000-0002-3612-8757;;;;", "linkedin": ";;;;yifu-ding-253614186/;;;;", "or_profile": "~Ge_Yang5;~Changyi_He1;~Jinyang_Guo1;~Jianyu_Wu2;~Yifu_Ding2;~Aishan_Liu1;~Haotong_Qin1;~Pengliang_Ji1;~Xianglong_Liu3", "aff": "Beihang University;Beihang University;Beihang University;Beihang University;Nanyang Technological University;Beihang University;ETHZ - ETH Zurich;;", "aff_domain": "buaa.edu.cn;buaa.edu.cn;buaa.edu.cn;buaa.edu.cn;ntu.edu.sg;buaa.edu.cn;ethz.ch;;", "position": "MS student;MS student;Assistant Professor;Undergrad student;PhD student;Assistant Professor;Postdoc;;", "bibtex": "@inproceedings{\nyang2024llmcbench,\ntitle={{LLMCB}ench: Benchmarking Large Language Model Compression for Efficient Deployment},\nauthor={Ge Yang and Changyi He and Jinyang Guo and Jianyu Wu and Yifu Ding and Aishan Liu and Haotong Qin and Pengliang Ji and Xianglong Liu},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=wmO7z57wNK}\n}", "github": "", "reviewers": "7KTd;jkFp;bNyA", "pdf_size": 692842, "rating": "7;8;8", "confidence": "3;4;4", "wc_summary_and_contributions": "64;86;57", "wc_strengths": "61;66;63", "wc_improvement": "72;77;65", "wc_limitations": "1;4;7", "wc_correctness": "14;20;11", "wc_clarity": "23;1;4", "wc_relation_to_prior_work": "40;1;15", "wc_documentation": "7;29;5", "wc_additional_feedback": "1;1;1", "wc_review": "283;285;228", "wc_reply_reviewers": "23;29;39", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "6;5;6", "rating_avg": [ 7.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 69.0, 12.355835328567093 ], "wc_strengths_avg": [ 63.333333333333336, 2.0548046676563256 ], "wc_improvement_avg": [ 71.33333333333333, 4.9216076867444665 ], "wc_limitations_avg": [ 4.0, 2.449489742783178 ], "wc_correctness_avg": [ 15.0, 3.7416573867739413 ], "wc_clarity_avg": [ 9.333333333333334, 9.741092797468305 ], "wc_relation_to_prior_work_avg": [ 18.666666666666668, 16.131404843417148 ], "wc_documentation_avg": [ 13.666666666666666, 10.873004286866728 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 265.3333333333333, 26.411277052720408 ], "wc_reply_reviewers_avg": [ 30.333333333333332, 6.599663291074443 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 5.666666666666667, 0.4714045207910317 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.9999999999999997, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2465675273067029026&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 4, "email": "buaa.edu.cn;buaa.edu.cn;buaa.edu.cn;buaa.edu.cn;ntu.edu.sg;buaa.edu.cn;ethz.ch;;", "author_num": 9, "aff_unique_index": "0;0;0;0;1;0;2", "aff_unique_norm": "Beihang University;Nanyang Technological University;ETH Zurich", "aff_unique_dep": ";;", "aff_unique_url": "http://www.buaa.edu.cn/;https://www.ntu.edu.sg;https://www.ethz.ch", "aff_unique_abbr": "BUAA;NTU;ETHZ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;0;2", "aff_country_unique": "China;Singapore;Switzerland" }, { "title": "KFNN: K-Free Nearest Neighbor For Crowdsourcing", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93123", "id": "wnPlJNiqfA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=wnPlJNiqfA", "openreview": "https://openreview.net/forum?id=wnPlJNiqfA", "poster": "/media/PosterPDFs/NeurIPS%202024/93123.png?t=1731371842.2815003", "project": "", "author_site": "Wenjun Zhang, Liangxiao Jiang, Chaoqun Li", "tldr": "", "abstract": "To reduce annotation costs, it is common in crowdsourcing to collect only a few noisy labels from different crowd workers for each instance. However, the limited noisy labels restrict the performance of label integration algorithms in inferring the unknown true label for the instance. Recent works have shown that leveraging neighbor instances can help alleviate this problem. Yet, these works all assume that each instance has the same neighborhood size, which defies common sense. To address this gap, we propose a novel label integration algorithm called K-free nearest neighbor (KFNN). In KFNN, the neighborhood size of each instance is automatically determined based on its attributes and noisy labels. Specifically, KFNN initially estimates a Mahalanobis distance distribution from the attribute space to model the relationship between each instance and all classes. This distance distribution is then utilized to enhance the multiple noisy label distribution of each instance. Subsequently, a Kalman filter is designed to mitigate the impact of noise incurred by neighbor instances. Finally, KFNN determines the optimal neighborhood size by the max-margin learning. Extensive experimental results demonstrate that KFNN significantly outperforms all the other state-of-the-art algorithms and exhibits greater robustness in various crowdsourcing scenarios.", "keywords": "Crowdsourcing learning;Label integration;K-free nearest neighbor", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "/attachment/718cbdf91b6ba3c8c630d1539fac3a6180e128b1.zip", "author": "Wenjun Zhang;Liangxiao Jiang;Chaoqun Li", "authorids": "~Wenjun_Zhang4;~Liangxiao_Jiang1;~Chaoqun_Li1", "gender": "M;M;F", "homepage": ";http://grzy.cug.edu.cn/jlx/en/index.htm;http://grzy.cug.edu.cn/lcq/", "dblp": "46/3359-12;46/4017;26/4644-1", "google_scholar": "https://scholar.google.com/citations?hl=en;S_PLKWEAAAAJ;", "orcid": "0000-0002-7269-0376;0000-0003-2201-3526;", "linkedin": ";;", "or_profile": "~Wenjun_Zhang4;~Liangxiao_Jiang1;~Chaoqun_Li1", "aff": "China University of Geosciences;China University of Geosciences;China University of Geosciences", "aff_domain": "cug.edu.cn;cug.edu.cn;cug.edu.cn", "position": "PhD student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nzhang2024kfnn,\ntitle={{KFNN}: K-Free Nearest Neighbor For Crowdsourcing},\nauthor={Wenjun Zhang and Liangxiao Jiang and Chaoqun Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=wnPlJNiqfA}\n}", "github": "", "reviewers": "MQSy;4Kyj;qVhb;B2kv", "pdf_size": 764236, "rating": "3;7;7;8", "confidence": "4;4;4;5", "soundness": "1;3;3;4", "novelty": "2;3;3;4", "presentation": "2;3;4;3", "wc_summary": "96;76;101;91", "wc_strengths": "24;167;87;140", "wc_weaknesses": "53;152;99;61", "wc_questions": "579;5;19;2", "wc_limitations": "7;36;1;2", "wc_review": "759;436;307;296", "wc_reply_reviewers": "407;14;10;16", "wc_reply_authors": "805;0;0;0", "reply_reviewers": "2;1;1;1", "reply_authors": "4;1;1;1", "rating_avg": [ 6.25, 1.920286436967152 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 1.0897247358851685 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 91.0, 9.354143466934854 ], "wc_strengths_avg": [ 104.5, 54.66488818245218 ], "wc_weaknesses_avg": [ 91.25, 39.14316670889058 ], "wc_questions_avg": [ 151.25, 247.04490988482235 ], "wc_limitations_avg": [ 11.5, 14.326548781894402 ], "wc_review_avg": [ 449.5, 186.97660281436285 ], "wc_reply_reviewers_avg": [ 111.75, 170.47635466539046 ], "wc_reply_authors_avg": [ 201.25, 348.57522502323656 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5261522196019801, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1075792375947863486&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 2, "email": "cug.edu.cn;cug.edu.cn;cug.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "China University of Geosciences", "aff_unique_dep": "", "aff_unique_url": "http://www.cug.edu.cn", "aff_unique_abbr": "CUG", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Automated Multi-level Preference for MLLMs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93122", "id": "woENr7FJaI", "proceeding": "", "pdf": "https://openreview.net/pdf?id=woENr7FJaI", "openreview": "https://openreview.net/forum?id=woENr7FJaI", "poster": "", "project": "", "author_site": "Mengxi Zhang, Wenhao Wu, Yu Lu, YuXin Song, KANG RONG, Huanjin Yao, Jianbo Zhao, Fanglong Liu, Haocheng Feng, Jingdong Wang, Yifan Sun", "tldr": "", "abstract": "Current multimodal Large Language Models (MLLMs) suffer from ''hallucination'', occasionally generating responses that are not grounded in the input images. To tackle this challenge, one promising path is to utilize reinforcement learning from human feedback (RLHF), which steers MLLMs towards learning superior responses while avoiding inferior ones. We rethink the common practice of using binary preferences (*i.e.*, superior, inferior), and find that adopting multi-level preferences (*e.g.*, superior, medium, inferior) is better for two benefits: 1) It narrows the gap between adjacent levels, thereby encouraging MLLMs to discern subtle differences. 2) It further integrates cross-level comparisons (beyond adjacent-level comparisons), thus providing a broader range of comparisons with hallucination examples. To verify our viewpoint, we present the Automated Multi-level Preference (**AMP**) framework for MLLMs. To facilitate this framework, we first develop an automated dataset generation pipeline that provides high-quality multi-level preference datasets without any human annotators. Furthermore, we design the Multi-level Direct Preference Optimization (MDPO) algorithm to robustly conduct complex multi-level preference learning. Additionally, we propose a new hallucination benchmark, MRHal-Bench. Extensive experiments across public hallucination and general benchmarks, as well as our MRHal-Bench, demonstrate the effectiveness of our proposed method. Code is available at https://github.com/takomc/amp.", "keywords": "Multimodal Large Language Models;Hallucinations;Reinforcement Learning from Human Feedback", "primary_area": "machine_vision", "supplementary_material": "", "author": "Mengxi Zhang;Wenhao Wu;Yu Lu;YuXin Song;KANG RONG;Huanjin Yao;Jianbo Zhao;Fanglong Liu;Haocheng Feng;Jingdong Wang;Yifan Sun", "authorids": "~Mengxi_Zhang1;~Wenhao_Wu2;~Yu_Lu11;~YuXin_Song1;~KANG_RONG1;~Huanjin_Yao1;~Jianbo_Zhao3;~Fanglong_Liu1;~Haocheng_Feng1;~Jingdong_Wang1;~Yifan_Sun2", "gender": "M;M;M;M;M;M;M;M;M;M;M", "homepage": ";https://whwu95.github.io/;http://reler.net/people/yu_Lu/index.html;https://github.com/byrsongyuxin;https://github.com/rover5056;https://github.com/HJYao00;https://github.com/JerryLeolfl;https://jingdongwang2017.github.io/;https://yifansun-reid.github.io;;", "dblp": ";;;;;362/0783;290/8675;49/3441;99/10261-3.html;151/7248;", "google_scholar": ";Kn5d1ckAAAAJ;QQUmpsgAAAAJ;;;pDtsCBQAAAAJ;I3lfvcUAAAAJ;z5SPCmgAAAAJ;uUZEL7UAAAAJ;https://scholar.google.com.hk/citations?user=pnuQ5UsAAAAJ;", "orcid": "0000-0002-6011-1218;0000-0002-8511-743X;;;;;;0000-0002-4888-4445;0000-0003-3532-6521;;", "linkedin": ";wenhao-w-usyd/;;;;;;;;;%E5%81%A5%E5%8D%9A-%E8%B5%B5-894a58214/", "or_profile": "~Mengxi_Zhang1;~Wenhao_Wu2;~Yu_Lu11;~YuXin_Song1;~KANG_RONG1;~Huanjin_Yao1;~Fanglong_Liu1;~Jingdong_Wang1;~Yifan_Sun2;~Feng_Haocheng1;~jianbo_zhao2", "aff": "Tianjin University;The Chinese University of Hong Kong;University of Technology Sydney;Baidu;WeChat Vision, Tencent Inc.;Tsinghua University;Baidu;Baidu;Baidu;Baidu;Institute of Automation, Chinese Academy of Sciences", "aff_domain": "tju.edu.cn;cuhk.edu.hk;uts.edu.au;baidu.com;tencent.com;tsinghua.edu.cn;baidu.com;baidu.com;baidu.com;baidu.com;ia.ac.cn", "position": "MS student;Honorary Research Assistant;PhD student;Researcher;Researcher;MS student;Researcher;Chief Scientist for Computer Vision;Senior Expert;Technical Manager;Undergrad student", "bibtex": "@inproceedings{\nzhang2024automated,\ntitle={Automated Multi-level Preference for {MLLM}s},\nauthor={Mengxi Zhang and Wenhao Wu and Yu Lu and YuXin Song and KANG RONG and Huanjin Yao and Jianbo Zhao and Fanglong Liu and Haocheng Feng and Jingdong Wang and Yifan Sun},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=woENr7FJaI}\n}", "github": "", "reviewers": "iDRq;xo7T;2sCy", "pdf_size": 2556180, "rating": "5;6;6", "confidence": "5;4;4", "soundness": "3;4;2", "novelty": "3;4;2", "presentation": "3;3;3", "wc_summary": "74;36;141", "wc_strengths": "71;66;78", "wc_weaknesses": "118;64;296", "wc_questions": "95;2;174", "wc_limitations": "1;1;1", "wc_review": "359;169;690", "wc_reply_reviewers": "22;27;19", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 83.66666666666667, 43.40762861781581 ], "wc_strengths_avg": [ 71.66666666666667, 4.9216076867444665 ], "wc_weaknesses_avg": [ 159.33333333333334, 99.12057752499673 ], "wc_questions_avg": [ 90.33333333333333, 70.29619872763787 ], "wc_limitations_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 406.0, 215.27811469507685 ], "wc_reply_reviewers_avg": [ 22.666666666666668, 3.299831645537222 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": -0.9999999999999998, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16306476046355989654&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "tju.edu.cn;cuhk.edu.hk;uts.edu.au;baidu.com;tencent.com;tsinghua.edu.cn;baidu.com;baidu.com;baidu.com;baidu.com;ia.ac.cn", "author_num": 11, "aff_unique_index": "0;1;2;3;4;5;3;3;3;3;6", "aff_unique_norm": "Tianjin University;Chinese University of Hong Kong;University of Technology Sydney;Baidu;Tencent;Tsinghua University;Chinese Academy of Sciences", "aff_unique_dep": ";;;Baidu, Inc.;WeChat Vision;;Institute of Automation", "aff_unique_url": "http://www.tju.edu.cn;https://www.cuhk.edu.hk;https://www.uts.edu.au;https://www.baidu.com;https://www.tencent.com;https://www.tsinghua.edu.cn;http://www.ia.cas.cn", "aff_unique_abbr": "TJU;CUHK;UTS;Baidu;Tencent;THU;CAS", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;1;0;0;0;0;0;0;0;0", "aff_country_unique": "China;Australia" }, { "title": "Alignment at Pre-training! Towards Native Alignment for Arabic LLMs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93121", "id": "woRFmNJiLp", "proceeding": "", "pdf": "https://openreview.net/pdf?id=woRFmNJiLp", "openreview": "https://openreview.net/forum?id=woRFmNJiLp", "poster": "/media/PosterPDFs/NeurIPS%202024/93121.png?t=1730342211.8875399", "project": "", "author_site": "Juhao Liang, Zhenyang Cai, Jianqing Zhu, Huang Huang, Kewei Zong, Bang An, Mosen Alharthi, Juncai He, Lian Zhang, Haizhou Li, Benyou Wang, Jinchao Xu", "tldr": "", "abstract": "The alignment of large language models (LLMs) is critical for developing effective and safe language models. Traditional approaches focus on aligning models during the instruction tuning or reinforcement learning stages, referred to in this paper as `\\textit{post alignment}'. We argue that alignment during the pre-training phase, which we term 'native alignment', warrants investigation. Native alignment aims to prevent unaligned content from the beginning, rather than relying on post-hoc processing. This approach leverages extensively aligned pre-training data to enhance the effectiveness and usability of pre-trained models. Our study specifically explores the application of native alignment in the context of Arabic LLMs. We conduct comprehensive experiments and ablation studies to evaluate the impact of native alignment on model performance and alignment stability. Additionally, we release open-source Arabic LLMs that demonstrate state-of-the-art performance on various benchmarks, providing significant benefits to the Arabic LLM community.", "keywords": "Large language model;Safety of LLMs;LLM Pre-training", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/55552c496370e9475895fff1050c7ee629dd9e90.zip", "author": "Juhao Liang;Zhenyang Cai;Jianqing Zhu;Huang Huang;Kewei Zong;Bang An;Mosen Alharthi;Juncai He;Lian Zhang;Haizhou Li;Benyou Wang;Jinchao Xu", "authorids": "~Juhao_Liang1;~Zhenyang_Cai1;~Jianqing_Zhu2;~Huang_Huang2;~Kewei_Zong1;~Bang_An3;~Mosen_Alharthi1;~Juncai_He1;~Lian_Zhang2;~Haizhou_Li3;~Benyou_Wang2;~Jinchao_Xu1", "gender": "M;M;M;M;M;M;M;M;M;M;M;M", "homepage": "https://github.com/JuhaoLiang1997;https://github.com/Eric3200C;;https://www.sribd.cn/en/teacher/1118;https://cemse.kaust.edu.sa/amcs/people/person/bang;https://scholar.google.com/citations?hl=en&user=eMfvcJ4AAAAJ;https://juncaihe.github.io;;https://colips.org/~eleliha/;https://wabyking.github.io/old.html;https://www.personal.psu.edu/jxx1/;", "dblp": "344/0709;369/9432.html;;;;;223/4286;;36/4118;169/1793;;", "google_scholar": ";;0JhMor8AAAAJ;;;;CG5GBW0AAAAJ;;https://scholar.google.com.sg/citations?user=z8_x7C8AAAAJ;Jk4vJU8AAAAJ;pBHiYxcAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";0009-0006-0320-8490;;;;;;;0000-0001-9158-9401;0000-0002-1501-9914;;", "linkedin": "https://www.linkedin.cn/incareer/in/juhao-liang;;;;;;;lian-zhang-47027a187/;haizhou-li-4ba74b6/;;;", "or_profile": "~Juhao_Liang1;~Zhenyang_Cai1;~Huang_Huang2;~Kewei_Zong1;~Bang_An3;~Mosen_Alharthi1;~Juncai_He1;~Lian_Zhang2;~Haizhou_Li3;~Benyou_Wang2;~Jinchao_Xu1;~Zhu_Jianqing2", "aff": "The Chinese University of Hong Kong;The Chinese University of Hong Kong, Shenzhen;Shenzhen Research Institute of Big Data;The Chinese University of Hong Kong;;King Abdullah University of Science and Technology;King Abdullah University of Science and Technology;Shenzhen Research Institute of Big Data;National University of Singapore;The Chinese University of Hong Kong, Shenzhen;Pennsylvania State University;King Abdullah University of Science and Technology", "aff_domain": "cuhk.edu.cn;cuhk.edu.cn;sribd.cn;cuhk.edu.cn;;kaust.edu.sa;kaust.edu.sa;sribd.cn;nus.edu.sg;cuhk.edu.cn;psu.edu;kaust.edu.sa", "position": "PhD student;MS student;Researcher;Intern;;PhD student;Researcher;Researcher;Full Professor;Assistant Professor;Full Professor;Postdoc", "bibtex": "@inproceedings{\nliang2024alignment,\ntitle={Alignment at Pre-training! Towards Native Alignment for Arabic {LLM}s},\nauthor={Juhao Liang and Zhenyang Cai and Jianqing Zhu and Huang Huang and Kewei Zong and Bang An and Mosen Alharthi and Juncai He and Lian Zhang and Haizhou Li and Benyou Wang and Jinchao Xu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=woRFmNJiLp}\n}", "github": "", "reviewers": "5nE6;c7RZ;6r3B;g6T4", "pdf_size": 697578, "rating": "3;6;7;7", "confidence": "5;4;4;4", "soundness": "2;2;4;4", "novelty": "2;2;4;3", "presentation": "3;3;4;4", "wc_summary": "67;87;72;133", "wc_strengths": "9;25;62;44", "wc_weaknesses": "285;137;52;71", "wc_questions": "13;27;142;157", "wc_limitations": "4;17;58;1", "wc_review": "378;293;386;406", "wc_reply_reviewers": "63;34;91;0", "wc_reply_authors": "1967;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "6;1;1;1", "rating_avg": [ 5.75, 1.6393596310755 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 1.0 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 89.75, 26.03243169586737 ], "wc_strengths_avg": [ 35.0, 19.912307751739878 ], "wc_weaknesses_avg": [ 136.25, 91.49146135022656 ], "wc_questions_avg": [ 84.75, 65.15510340717755 ], "wc_limitations_avg": [ 20.0, 22.74862633215465 ], "wc_review_avg": [ 365.75, 43.222534631832964 ], "wc_reply_reviewers_avg": [ 47.0, 33.80088756231114 ], "wc_reply_authors_avg": [ 491.75, 851.7359846219954 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 2.165063509461097 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": -0.9684959969581861, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:S9_kZHAPv2YJ:scholar.google.com/&scioq=Alignment+at+Pre-training!+Towards+Native+Alignment+for+Arabic+LLMs&hl=en&as_sdt=0,5", "gs_version_total": 6, "email": "cuhk.edu.cn;cuhk.edu.cn;sribd.cn;cuhk.edu.cn;;kaust.edu.sa;kaust.edu.sa;sribd.cn;nus.edu.sg;cuhk.edu.cn;psu.edu;kaust.edu.sa", "author_num": 12, "aff_unique_index": "0;0;1;0;2;2;1;3;0;4;2", "aff_unique_norm": "Chinese University of Hong Kong;Shenzhen Research Institute of Big Data;King Abdullah University of Science and Technology;National University of Singapore;Pennsylvania State University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.cuhk.edu.hk;http://www.sribd.cn;https://www.kast.kau.edu.sa;https://www.nus.edu.sg;https://www.psu.edu", "aff_unique_abbr": "CUHK;;KAUST;NUS;PSU", "aff_campus_unique_index": "0;1;0;1", "aff_campus_unique": "Hong Kong SAR;Shenzhen;", "aff_country_unique_index": "0;0;0;0;1;1;0;2;0;3;1", "aff_country_unique": "China;Saudi Arabia;Singapore;United States" }, { "title": "Human Expertise in Algorithmic Prediction", "status": "Oral", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93120", "id": "wpGJ2AX6SZ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=wpGJ2AX6SZ", "openreview": "https://openreview.net/forum?id=wpGJ2AX6SZ", "poster": "/media/PosterPDFs/NeurIPS%202024/93120.png?t=1733496399.7839956", "project": "", "author_site": "Rohan Alur, Manish Raghavan, Devavrat Shah", "tldr": "", "abstract": "We introduce a novel framework for incorporating human expertise into algorithmic predictions. Our approach leverages human judgment to distinguish inputs which are *algorithmically indistinguishable*, or \"look the same\" to predictive algorithms. We argue that this framing clarifies the problem of human-AI collaboration in prediction tasks, as experts often form judgments by drawing on information which is not encoded in an algorithm's training data. Algorithmic indistinguishability yields a natural test for assessing whether experts incorporate this kind of \"side information\", and further provides a simple but principled method for selectively incorporating human feedback into algorithmic predictions. We show that this method provably improves the performance of any feasible algorithmic predictor and precisely quantify this improvement. We find empirically that although algorithms often outperform their human counterparts *on average*, human judgment can improve algorithmic predictions on *specific* instances (which can be identified ex-ante). In an X-ray classification task, we find that this subset constitutes nearly 30% of the patient population. Our approach provides a natural way of uncovering this heterogeneity and thus enabling effective human-AI collaboration.", "keywords": "human/AI collaboration;human/AI complementarity;multicalibration;machine learning for healthcare;trustworthy machine learning", "primary_area": "human-AI_interaction", "supplementary_material": "/attachment/98758ad76148097ae1a4913defbb572f573f0e90.zip", "author": "Rohan Alur;Manish Raghavan;Devavrat Shah", "authorids": "~Rohan_Alur1;~Manish_Raghavan1;~Devavrat_Shah1", "gender": "M;M;M", "homepage": "https://sites.google.com/view/rohanalur;https://mraghavan.github.io/;http://devavrat.mit.edu", "dblp": ";143/9427;73/3881", "google_scholar": "MFi8ptoAAAAJ;WaGlwJ4AAAAJ;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Rohan_Alur1;~Manish_Raghavan1;~Devavrat_Shah1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;mit.edu", "position": "PhD student;Assistant Professor;Professor", "bibtex": "@inproceedings{\nalur2024human,\ntitle={Human Expertise in Algorithmic Prediction},\nauthor={Rohan Alur and Manish Raghavan and Devavrat Shah},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=wpGJ2AX6SZ}\n}", "github": "", "reviewers": "WEMo;jnGw;XFzN;fseb", "pdf_size": 1097738, "rating": "7;7;8;8", "confidence": "4;3;4;3", "soundness": "3;3;4;4", "novelty": "3;3;4;4", "presentation": "4;3;4;4", "wc_summary": "125;70;80;22", "wc_strengths": "81;132;46;166", "wc_weaknesses": "35;43;70;14", "wc_questions": "1;28;51;13", "wc_limitations": "9;11;10;1", "wc_review": "251;284;257;216", "wc_reply_reviewers": "27;14;36;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 7.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 74.25, 36.594910848367974 ], "wc_strengths_avg": [ 106.25, 46.09975596464693 ], "wc_weaknesses_avg": [ 40.5, 20.05617112013158 ], "wc_questions_avg": [ 23.25, 18.659782956937093 ], "wc_limitations_avg": [ 7.75, 3.960744879438715 ], "wc_review_avg": [ 252.0, 24.21776207662467 ], "wc_reply_reviewers_avg": [ 19.25, 13.589977924926883 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18418297412412227098&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "mit.edu;mit.edu;mit.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Solving Inverse Problems via Diffusion Optimal Control", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93119", "id": "wqLC4G1GN3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=wqLC4G1GN3", "openreview": "https://openreview.net/forum?id=wqLC4G1GN3", "poster": "", "project": "", "author_site": "Henry Li, Marcus Pereira", "tldr": "", "abstract": "Existing approaches to diffusion-based inverse problem solvers frame the signal recovery task as a probabilistic sampling episode, where the solution is drawn from the desired posterior distribution. This framework suffers from several critical drawbacks, including the intractability of the conditional likelihood function, strict dependence on the score network approximation, and poor $\\mathbf{x}_0$ prediction quality. We demonstrate that these limitations can be sidestepped by reframing the generative process as a discrete optimal control episode. We derive a diffusion-based optimal controller inspired by the iterative Linear Quadratic Regulator (iLQR) algorithm. This framework is fully general and able to handle any differentiable forward measurement operator, including super-resolution, inpainting, Gaussian deblurring, nonlinear deblurring, and even highly nonlinear neural classifiers. Furthermore, we show that the idealized posterior sampling equation can be recovered as a special case of our algorithm. We then evaluate our method against a selection of neural inverse problem solvers, and establish a new baseline in image reconstruction with inverse problems.", "keywords": "diffusion models;inverse problems;optimal control", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Henry Li;Marcus Aloysius Pereira", "authorids": "~Henry_Li2;~Marcus_Aloysius_Pereira1", "gender": ";M", "homepage": "https://hnry.li;", "dblp": "31/6498;", "google_scholar": "o7-TIlcAAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Henry_Li2;~Marcus_Aloysius_Pereira1", "aff": "Yale University;", "aff_domain": "yale.edu;", "position": "PhD student;", "bibtex": "@inproceedings{\nli2024solving,\ntitle={Solving Inverse Problems via Diffusion Optimal Control},\nauthor={Henry Li and Marcus Aloysius Pereira},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=wqLC4G1GN3}\n}", "github": "", "reviewers": "VcWe;L2Ru;KZ9Z;u6BH;d6zE;GqZF", "pdf_size": 9680369, "rating": "4;5;5;6;6;7", "confidence": "3;3;3;3;3;4", "soundness": "1;2;2;3;3;3", "novelty": "2;2;2;4;3;3", "presentation": "2;3;3;3;3;3", "wc_summary": "69;65;46;67;65;110", "wc_strengths": "63;65;41;94;40;74", "wc_weaknesses": "400;79;26;178;313;154", "wc_questions": "3;97;315;369;71;97", "wc_limitations": "3;56;14;17;7;46", "wc_review": "538;362;442;725;496;481", "wc_reply_reviewers": "525;12;131;27;29;25", "wc_reply_authors": "564;0;0;260;0;0", "reply_reviewers": "1;1;1;1;1;1", "reply_authors": "3;1;1;2;1;1", "rating_avg": [ 5.5, 0.9574271077563381 ], "confidence_avg": [ 3.1666666666666665, 0.3726779962499649 ], "soundness_avg": [ 2.3333333333333335, 0.7453559924999298 ], "novelty_avg": [ 2.6666666666666665, 0.7453559924999298 ], "presentation_avg": [ 2.8333333333333335, 0.3726779962499649 ], "wc_summary_avg": [ 70.33333333333333, 19.3016982557379 ], "wc_strengths_avg": [ 62.833333333333336, 18.703089465528297 ], "wc_weaknesses_avg": [ 191.66666666666666, 129.0150723322236 ], "wc_questions_avg": [ 158.66666666666666, 134.29031569286332 ], "wc_limitations_avg": [ 23.833333333333332, 19.945063438243448 ], "wc_review_avg": [ 507.3333333333333, 111.45352793379351 ], "wc_reply_reviewers_avg": [ 124.83333333333333, 183.31431719559956 ], "wc_reply_authors_avg": [ 137.33333333333334, 213.12489817527705 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.7637626158259734 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.7006490497453708, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:YODgoh3QTYYJ:scholar.google.com/&scioq=Solving+Inverse+Problems+via+Diffusion+Optimal+Control&hl=en&as_sdt=0,44", "gs_version_total": 3, "email": "yale.edu;", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Yale University", "aff_unique_dep": "", "aff_unique_url": "https://www.yale.edu", "aff_unique_abbr": "Yale", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "ProG: A Graph Prompt Learning Benchmark", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97445", "id": "wqo6xEMyk9", "proceeding": "", "pdf": "https://openreview.net/pdf?id=wqo6xEMyk9", "openreview": "https://openreview.net/forum?id=wqo6xEMyk9", "poster": "", "project": "", "author_site": "Chenyi Zi, Haihong Zhao, Xiangguo Sun, Yiqing Lin, Hong Cheng, Jia Li", "tldr": "", "abstract": "Artificial general intelligence on graphs has shown significant advancements across various applications, yet the traditional `Pre-train \\& Fine-tune' paradigm faces inefficiencies and negative transfer issues, particularly in complex and few-shot settings. Graph prompt learning emerges as a promising alternative, leveraging lightweight prompts to manipulate data and fill the task gap by reformulating downstream tasks to the pretext. However, several critical challenges still remain: how to unify diverse graph prompt models, how to evaluate the quality of graph prompts, and to improve their usability for practical comparisons and selection. In response to these challenges, we introduce the first comprehensive benchmark for graph prompt learning. Our benchmark integrates **SIX** pre-training methods and **FIVE** state-of-the-art graph prompt techniques, evaluated across **FIFTEEN** diverse datasets to assess performance, flexibility, and efficiency. We also present 'ProG', an easy-to-use open-source library that streamlines the execution of various graph prompt models, facilitating objective evaluations. Additionally, we propose a unified framework that categorizes existing graph prompt methods into two main approaches: prompts as graphs and prompts as tokens. This framework enhances the applicability and comparison of graph prompt techniques. The code is available at: https://github.com/sheldonresearch/ProG.", "keywords": "graph prompt leaning;benchmark;library", "primary_area": "", "supplementary_material": "", "author": "Chenyi Zi;Haihong Zhao;Xiangguo Sun;Yiqing Lin;Hong Cheng;Jia Li", "authorids": "~Chenyi_Zi2;~Haihong_Zhao2;~Xiangguo_Sun1;~Yiqing_Lin3;~Hong_Cheng1;~Jia_Li4", "gender": "M;M;F;F;M;M", "homepage": ";https://xgsun.mysxl.cn;;https://www1.se.cuhk.edu.hk/~hcheng/;https://sites.google.com/view/lijia;https://haihongzhao.com", "dblp": ";224/0873;;85/5637-1;23/6950-9;116/7210", "google_scholar": "uGbNoaQAAAAJ;rKfYQwEAAAAJ;;https://scholar.google.com.hk/citations?user=s3lQL7YAAAAJ;1gSbcYoAAAAJ;", "orcid": ";0000-0002-2224-4634;0009-0008-2129-0533;0000-0002-4673-2587;0000-0002-6362-4385;0000-0003-4188-6517", "linkedin": ";;;;;", "or_profile": "~Chenyi_Zi2;~Xiangguo_Sun1;~Yiqing_Lin3;~Hong_Cheng1;~Jia_Li4;~Haihong_ZHAO1", "aff": "Hong Kong University of Science and Technology;The Chinese University of Hong Kong;Tsinghua University;The Chinese University of Hong Kong;Hong Kong University of Science and Technology (Guangzhou);Hong Kong University of Science and Technology", "aff_domain": "hkust.edu;se.cuhk.edu.hk;mail.tsinghua.edu.cn;cuhk.edu.hk;ust.hk;hkust.edu", "position": "MS student;Postdoc;PhD student;Professor;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nzi2024prog,\ntitle={ProG: A Graph Prompt Learning Benchmark},\nauthor={Chenyi Zi and Haihong Zhao and Xiangguo Sun and Yiqing Lin and Hong Cheng and Jia Li},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=wqo6xEMyk9}\n}", "github": "", "reviewers": "AAD3;bbb5;v69Z;fofj", "pdf_size": 1076613, "rating": "5;6;7;8", "confidence": "5;4;2;5", "wc_summary_and_contributions": "61;135;75;244", "wc_strengths": "103;38;78;4", "wc_improvement": "192;36;54;13", "wc_limitations": "1;47;11;8", "wc_correctness": "1;1;34;5", "wc_clarity": "1;1;23;4", "wc_relation_to_prior_work": "1;1;20;15", "wc_documentation": "1;1;9;2", "wc_additional_feedback": "1;1;1;1", "wc_review": "362;261;305;296", "wc_reply_reviewers": "123;18;0;25", "wc_reply_authors": "237;205;73;106", "reply_reviewers": "1;1;0;1", "reply_authors": "8;7;2;2", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 4.0, 1.224744871391589 ], "wc_summary_and_contributions_avg": [ 128.75, 72.11232557614544 ], "wc_strengths_avg": [ 55.75, 37.81781987370504 ], "wc_improvement_avg": [ 73.75, 69.80105658226098 ], "wc_limitations_avg": [ 16.75, 17.83781096435322 ], "wc_correctness_avg": [ 10.25, 13.808964479641476 ], "wc_clarity_avg": [ 7.25, 9.175374651751284 ], "wc_relation_to_prior_work_avg": [ 9.25, 8.437268515343103 ], "wc_documentation_avg": [ 3.25, 3.344772040064913 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 306.0, 36.26982216664427 ], "wc_reply_reviewers_avg": [ 41.5, 47.92963592601137 ], "wc_reply_authors_avg": [ 155.25, 67.72877896433687 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 4.75, 2.7726341266023544 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.18257418583505533, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=627245598519501714&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "hkust.edu;se.cuhk.edu.hk;mail.tsinghua.edu.cn;cuhk.edu.hk;ust.hk;hkust.edu", "author_num": 6, "aff_unique_index": "0;1;2;1;0;0", "aff_unique_norm": "Hong Kong University of Science and Technology;Chinese University of Hong Kong;Tsinghua University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ust.hk;https://www.cuhk.edu.hk;https://www.tsinghua.edu.cn", "aff_unique_abbr": "HKUST;CUHK;THU", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Corruption-Robust Linear Bandits: Minimax Optimality and Gap-Dependent Misspecification", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93118", "id": "wqs2RMq4CW", "proceeding": "", "pdf": "https://openreview.net/pdf?id=wqs2RMq4CW", "openreview": "https://openreview.net/forum?id=wqs2RMq4CW", "poster": "", "project": "", "author_site": "Haolin Liu, Artin Tajdini, Andrew Wagenmaker, Chen-Yu Wei", "tldr": "", "abstract": "In linear bandits, how can a learner effectively learn when facing corrupted rewards? While significant work has explored this question, a holistic understanding across different adversarial models and corruption measures is lacking, as is a full characterization of the minimax regret bounds. In this work, we compare two types of corruptions commonly considered: strong corruption, where the corruption level depends on the learner\u2019s chosen action, and weak corruption, where the corruption level does not depend on the learner\u2019s chosen action. We provide a unified framework to analyze these corruptions. For stochastic linear bandits, we fully characterize the gap between the minimax regret under strong and weak corruptions. We also initiate the study of corrupted adversarial linear bandits, obtaining upper and lower bounds with matching dependencies on the corruption level. Next, we reveal a connection between corruption-robust learning and learning with gap-dependent misspecification\u2014a setting first studied by Liu et al. (2023a), where the misspecification level of an action or policy is proportional to its suboptimality. We present a general reduction that enables any corruption-robust algorithm to handle gap-dependent misspecification. This allows us to recover the results of Liu et al. (2023a) in a black-box manner and significantly generalize them to settings like linear MDPs, yielding the first results for gap-dependent misspecification in reinforcement learning. However, this general reduction does not attain the optimal rate for gap-dependent misspecification. Motivated by this, we develop a specialized algorithm that achieves optimal bounds for gap-dependent misspecification in linear bandits, thus answering an open question posed by Liu et al. (2023a).", "keywords": "corruption-robust;linear bandits;misspecification;reinforcement learning", "primary_area": "bandits", "supplementary_material": "", "author": "Haolin Liu;Artin Tajdini;Andrew Wagenmaker;Chen-Yu Wei", "authorids": "~Haolin_Liu8;~Artin_Tajdini1;~Andrew_Wagenmaker1;~Chen-Yu_Wei1", "gender": "M;;M;M", "homepage": "https://liuhl2000.github.io/;https://www.linkedin.com/in/artin-tajdini-692942ab;https://wagenmaker.github.io;https://bahh723.github.io/", "dblp": ";278/8700;195/1036;183/1729", "google_scholar": ";;ym8AZSIAAAAJ;2L2cR-kAAAAJ", "orcid": "0000-0002-8247-9742;;;", "linkedin": ";;;", "or_profile": "~Haolin_Liu8;~Artin_Tajdini1;~Andrew_Wagenmaker1;~Chen-Yu_Wei1", "aff": "University of Virginia, Charlottesville;University of Washington;University of Washington, Seattle;University of Virginia, Charlottesville", "aff_domain": "virginia.edu;uw.edu;uw.edu;virginia.edu", "position": "PhD student;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nliu2024corruptionrobust,\ntitle={Corruption-Robust Linear Bandits: Minimax Optimality and Gap-Dependent Misspecification},\nauthor={Haolin Liu and Artin Tajdini and Andrew Wagenmaker and Chen-Yu Wei},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=wqs2RMq4CW}\n}", "github": "", "reviewers": "DxTV;TGCM;QWPG", "pdf_size": 626890, "rating": "5;5;6", "confidence": "3;3;3", "soundness": "2;3;3", "novelty": "2;3;3", "presentation": "2;3;2", "wc_summary": "74;133;86", "wc_strengths": "40;56;2", "wc_weaknesses": "22;91;164", "wc_questions": "9;54;2", "wc_limitations": "4;26;1", "wc_review": "149;360;255", "wc_reply_reviewers": "10;21;329", "wc_reply_authors": "40;40;1039", "reply_reviewers": "1;1;3", "reply_authors": "2;2;5", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 97.66666666666667, 25.46020860523775 ], "wc_strengths_avg": [ 32.666666666666664, 22.64705033528404 ], "wc_weaknesses_avg": [ 92.33333333333333, 57.97892337354632 ], "wc_questions_avg": [ 21.666666666666668, 23.041026211713937 ], "wc_limitations_avg": [ 10.333333333333334, 11.14550233153366 ], "wc_review_avg": [ 254.66666666666666, 86.14071175827503 ], "wc_reply_reviewers_avg": [ 120.0, 147.8535311268103 ], "wc_reply_authors_avg": [ 373.0, 470.93311627024065 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.9428090415820634 ], "reply_authors_avg": [ 3.0, 1.4142135623730951 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6872261706073808698&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "virginia.edu;uw.edu;uw.edu;virginia.edu", "author_num": 4, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "University of Virginia;University of Washington", "aff_unique_dep": ";", "aff_unique_url": "https://www.virginia.edu;https://www.washington.edu", "aff_unique_abbr": "UVA;UW", "aff_campus_unique_index": "0;2;0", "aff_campus_unique": "Charlottesville;;Seattle", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Quantum Algorithms for Non-smooth Non-convex Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93117", "id": "wsGzvhnoaX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=wsGzvhnoaX", "openreview": "https://openreview.net/forum?id=wsGzvhnoaX", "poster": "", "project": "", "author_site": "Chengchang Liu, Chaowen Guan, Jianhao He, John C.S. Lui", "tldr": "", "abstract": "This paper considers the problem for finding the $(\\delta,\\epsilon)$-Goldstein stationary point of Lipschitz continuous objective, which is a rich function class to cover a great number of important applications. \nWe construct a novel zeroth-order quantum estimator for the gradient of the smoothed surrogate. \nBased on such estimator, we propose a novel quantum algorithm that achieves a query complexity of $\\tilde{\\mathcal{O}}(d^{3/2}\\delta^{-1}\\epsilon^{-3})$ on the stochastic function value oracle, where $d$ is the dimension of the problem. \nWe also enhance the query complexity to $\\tilde{\\mathcal{O}}(d^{3/2}\\delta^{-1}\\epsilon^{-7/3})$ by introducing a variance reduction variant. \nOur findings demonstrate the clear advantages of utilizing quantum techniques for non-convex non-smooth optimization, as they outperform the optimal classical methods on the dependency of $\\epsilon$ by a factor of $\\epsilon^{-2/3}$.", "keywords": "quantum computing;non-convex non-smooth optimization", "primary_area": "optimization", "supplementary_material": "", "author": "Chengchang Liu;Chaowen Guan;Jianhao He;John C.S. Lui", "authorids": "~Chengchang_Liu1;~Chaowen_Guan1;~Jianhao_He1;~John_C.S._Lui2", "gender": ";M;M;M", "homepage": "https://7ccliu.github.io;;;http://www.cse.cuhk.edu.hk/~cslui/Index.html", "dblp": "291/5180;133/7367;271/4370;l/JohnCSLui", "google_scholar": "jmrbA5wAAAAJ;;fvdQ0agAAAAJ;https://scholar.google.com.tw/citations?user=7LVjQ7MAAAAJ", "orcid": "0009-0003-6552-4892;;0000-0002-3201-0137;0000-0001-7466-0384", "linkedin": ";chaowen-guan-4a250b77/;;", "or_profile": "~Chengchang_Liu1;~Chaowen_Guan1;~Jianhao_He1;~John_C.S._Lui2", "aff": "Department of Computer Science and Engineering, The Chinese University of Hong Kong;University of Cincinnati;Chinese University of Hong Kong;The Chinese University of Hong Kong", "aff_domain": "cse.cuhk.edu.hk;ceas.uc.edu;cuhk.hk;cse.cuhk.edu.hk", "position": "PhD student;Assistant Professor;Postdoc;Full Professor", "bibtex": "@inproceedings{\nliu2024quantum,\ntitle={Quantum Algorithms for Non-smooth Non-convex Optimization},\nauthor={Chengchang Liu and Chaowen Guan and Jianhao He and John C.S. Lui},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=wsGzvhnoaX}\n}", "github": "", "reviewers": "RNco;mFHz;KzCk;hDMr", "pdf_size": 340600, "rating": "6;6;6;7", "confidence": "1;4;4;3", "soundness": "3;4;3;3", "novelty": "3;3;3;3", "presentation": "3;3;2;3", "wc_summary": "60;123;89;90", "wc_strengths": "33;46;65;96", "wc_weaknesses": "61;68;71;136", "wc_questions": "23;125;98;203", "wc_limitations": "3;1;61;1", "wc_review": "180;363;384;526", "wc_reply_reviewers": "0;13;190;99", "wc_reply_authors": "0;0;593;173", "reply_reviewers": "0;1;2;2", "reply_authors": "1;1;2;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.0, 1.224744871391589 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 90.5, 22.299103120977758 ], "wc_strengths_avg": [ 60.0, 23.695991222145572 ], "wc_weaknesses_avg": [ 84.0, 30.240701050074883 ], "wc_questions_avg": [ 112.25, 64.35594378144104 ], "wc_limitations_avg": [ 16.5, 25.705057868053906 ], "wc_review_avg": [ 363.25, 122.98246826275687 ], "wc_reply_reviewers_avg": [ 75.5, 76.27089877535205 ], "wc_reply_authors_avg": [ 191.5, 242.32674223040263 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8929328941795497577&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 4, "email": "cse.cuhk.edu.hk;ceas.uc.edu;cuhk.hk;cse.cuhk.edu.hk", "author_num": 4, "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Chinese University of Hong Kong;University of Cincinnati", "aff_unique_dep": "Department of Computer Science and Engineering;", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.uc.edu", "aff_unique_abbr": "CUHK;UC", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "China;United States" }, { "title": "The Feature Speed Formula: a flexible approach to scale hyper-parameters of deep neural networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93116", "id": "wsHMb4J2o9", "proceeding": "", "pdf": "https://openreview.net/pdf?id=wsHMb4J2o9", "openreview": "https://openreview.net/forum?id=wsHMb4J2o9", "poster": "/media/PosterPDFs/NeurIPS%202024/93116.png?t=1731343204.6232388", "project": "", "author_site": "L\u00e9na\u00efc Chizat, Praneeth Netrapalli", "tldr": "", "abstract": "Deep learning succeeds by doing hierarchical feature learning, yet tuning hyper-parameters (HP) such as initialization scales, learning rates etc., only give indirect control over this behavior. In this paper, we introduce a key notion to predict and control feature learning: the angle $\\theta_\\ell$ between the feature updates and the backward pass (at layer index $\\ell$). We show that the magnitude of feature updates after one GD step, at any training time, can be expressed via a simple and general *feature speed formula* in terms of this angle $\\theta_\\ell$, the loss decay, and the magnitude of the backward pass. This angle $\\theta_\\ell$ is controlled by the conditioning of the layer-to-layer Jacobians and at random initialization, it is determined by the spectrum of a certain kernel, which coincides with the Neural Tangent Kernel when $\\ell=\\text{depth}$. Given $\\theta_\\ell$, the feature speed formula provides us with rules to adjust HPs (scales and learning rates) so as to satisfy certain dynamical properties, such as feature learning and loss decay. We investigate the implications of our approach for ReLU MLPs and ResNets in the large width-then-depth limit. Relying on prior work, we show that in ReLU MLPs with iid initialization, the angle degenerates with depth as $\\cos(\\theta_\\ell)=\\Theta(1/\\sqrt{\\ell})$. In contrast, ResNets with branch scale $O(1/\\sqrt{\\text{depth}})$ maintain a non-degenerate angle $\\cos(\\theta_\\ell)=\\Theta(1)$. We use these insights to recover key properties of known HP scalings (such as $\\mu$P), and also introduce a new HP scaling for large depth ReLU MLPs with favorable theoretical properties.", "keywords": "Feature Learning;Deep Neural Networks;One SGD step;Hyperparameter scaling;dynamical isometry", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "L\u00e9na\u00efc Chizat;Praneeth Netrapalli", "authorids": "~L\u00e9na\u00efc_Chizat1;~Praneeth_Netrapalli1", "gender": "M;M", "homepage": "https://lchizat.github.io/;http://praneethnetrapalli.org/", "dblp": "192/1488;http://dblp.uni-trier.de/pers/hd/n/Netrapalli:Praneeth", "google_scholar": "https://scholar.google.fr/citations?user=jrJh9yIAAAAJ;https://scholar.google.co.in/citations?user=mim8FQkAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~L\u00e9na\u00efc_Chizat1;~Praneeth_Netrapalli1", "aff": "EPFL - EPF Lausanne;Google", "aff_domain": "epfl.ch;google.com", "position": "Assistant Professor;Research Scientist", "bibtex": "@inproceedings{\nchizat2024the,\ntitle={The Feature Speed Formula: a flexible approach to scale hyper-parameters of deep neural networks},\nauthor={L{\\'e}na{\\\"\\i}c Chizat and Praneeth Netrapalli},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=wsHMb4J2o9}\n}", "github": "", "reviewers": "gz2f;jmiK;PezB;PDm8", "pdf_size": 709748, "rating": "3;3;6;8", "confidence": "4;4;4;4", "soundness": "3;1;3;4", "novelty": "1;2;2;4", "presentation": "2;3;3;4", "wc_summary": "72;69;61;94", "wc_strengths": "45;34;79;192", "wc_weaknesses": "410;172;184;88", "wc_questions": "5;14;69;141", "wc_limitations": "57;4;9;39", "wc_review": "589;293;402;554", "wc_reply_reviewers": "418;0;47;0", "wc_reply_authors": "605;0;80;0", "reply_reviewers": "3;0;2;0", "reply_authors": "4;1;2;1", "rating_avg": [ 5.0, 2.1213203435596424 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.75, 1.0897247358851685 ], "novelty_avg": [ 2.25, 1.0897247358851685 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 74.0, 12.227019260637483 ], "wc_strengths_avg": [ 87.5, 62.57195857570706 ], "wc_weaknesses_avg": [ 213.5, 119.32623349456732 ], "wc_questions_avg": [ 57.25, 54.205050502697624 ], "wc_limitations_avg": [ 27.25, 21.775846711436962 ], "wc_review_avg": [ 459.5, 119.0892522438528 ], "wc_reply_reviewers_avg": [ 116.25, 175.26890054998347 ], "wc_reply_authors_avg": [ 171.25, 252.54640662658417 ], "reply_reviewers_avg": [ 1.25, 1.299038105676658 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9827381658848855680&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "epfl.ch;google.com", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "EPFL;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.epfl.ch;https://www.google.com", "aff_unique_abbr": "EPFL;Google", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Lausanne;Mountain View", "aff_country_unique_index": "0;1", "aff_country_unique": "Switzerland;United States" }, { "title": "On the Ability of Developers' Training Data Preservation of Learnware", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93115", "id": "wsqDJHPUHN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=wsqDJHPUHN", "openreview": "https://openreview.net/forum?id=wsqDJHPUHN", "poster": "", "project": "", "author_site": "Hao-Yi Lei, Zhi-Hao Tan, Zhi-Hua Zhou", "tldr": "", "abstract": "The learnware paradigm aims to enable users to leverage numerous existing well-trained models instead of building machine learning models from scratch. In this paradigm, developers worldwide can submit their well-trained models spontaneously into a learnware dock system, and the system helps developers generate specification for each model to form a learnware. As the key component, a specification should characterize the capabilities of the model, enabling it to be adequately identified and reused, while preserving the developer's original data. Recently, the RKME (Reduced Kernel Mean Embedding) specification was proposed and most commonly utilized. This paper provides a theoretical analysis of RKME specification about its preservation ability for developer's training data. By modeling it as a geometric problem on manifolds and utilizing tools from geometric analysis, we prove that the RKME specification is able to disclose none of the developer's original data and possesses robust defense against common inference attacks, while preserving sufficient information for effective learnware identification.", "keywords": "Learnware;Model Specification;Reduced Kernel Mean Embedding;Data Preservation;Synthetic Data;Learnware Dock System", "primary_area": "learning_theory", "supplementary_material": "", "author": "Hao-Yi Lei;Zhi-Hao Tan;Zhi-Hua Zhou", "authorids": "~Hao-Yi_Lei1;~Zhi-Hao_Tan1;~Zhi-Hua_Zhou2", "gender": "M;M;M", "homepage": "http://www.lamda.nju.edu.cn/leihy/;http://www.lamda.nju.edu.cn/tanzh/;https://cs.nju.edu.cn/zhouzh/", "dblp": "396/6267.html;245/3420;z/ZhiHuaZhou", "google_scholar": ";_9uUbpgAAAAJ;https://scholar.google.com.tw/citations?user=rSVIHasAAAAJ", "orcid": ";0000-0003-4607-6089;0000-0003-0746-1494", "linkedin": ";;", "or_profile": "~Hao-Yi_Lei1;~Zhi-Hao_Tan1;~Zhi-hua_Zhou1", "aff": "Nanjing University;Nanjing University;Nanjing University", "aff_domain": "nju.edu.cn;nju.edu.cn;nju.edu.cn", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nlei2024on,\ntitle={On the Ability of Developers' Training Data Preservation of Learnware},\nauthor={Hao-Yi Lei and Zhi-Hao Tan and Zhi-Hua Zhou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=wsqDJHPUHN}\n}", "github": "", "reviewers": "gkER;mMLd;4E2V", "pdf_size": 781333, "rating": "6;6;7", "confidence": "2;2;2", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;3;2", "wc_summary": "55;107;156", "wc_strengths": "77;77;116", "wc_weaknesses": "46;208;122", "wc_questions": "13;179;166", "wc_limitations": "8;1;6", "wc_review": "199;572;566", "wc_reply_reviewers": "0;55;43", "wc_reply_authors": "0;28;23", "reply_reviewers": "0;1;1", "reply_authors": "1;2;2", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 2.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 106.0, 41.23913998456644 ], "wc_strengths_avg": [ 90.0, 18.384776310850235 ], "wc_weaknesses_avg": [ 125.33333333333333, 66.17821057988465 ], "wc_questions_avg": [ 119.33333333333333, 75.37609405876346 ], "wc_limitations_avg": [ 5.0, 2.943920288775949 ], "wc_review_avg": [ 445.6666666666667, 174.43687173938378 ], "wc_reply_reviewers_avg": [ 32.666666666666664, 23.612614331233114 ], "wc_reply_authors_avg": [ 17.0, 12.192894105447921 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2923689094756884793&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "nju.edu.cn;nju.edu.cn;nju.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Nanjing University", "aff_unique_dep": "", "aff_unique_url": "https://www.nju.edu.cn", "aff_unique_abbr": "Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Solving Zero-Sum Markov Games with Continuous State via Spectral Dynamic Embedding", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93114", "id": "wvQHQgnpGN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=wvQHQgnpGN", "openreview": "https://openreview.net/forum?id=wvQHQgnpGN", "poster": "/media/PosterPDFs/NeurIPS%202024/93114.png?t=1730437345.856516", "project": "", "author_site": "Chenhao Zhou, Zebang Shen, zhang chao, Hanbin Zhao, Hui Qian", "tldr": "", "abstract": "In this paper, we propose a provably efficient natural policy gradient algorithm called Spectral Dynamic Embedding Policy Optimization (\\SDEPO) for two-player zero-sum stochastic Markov games with continuous state space and finite action space.\n In the policy evaluation procedure of our algorithm, a novel kernel embedding method is employed to construct a finite-dimensional linear approximations to the state-action value function.\n We explicitly analyze the approximation error in policy evaluation, and show that \\SDEPO\\ achieves an $\\tilde{O}(\\frac{1}{(1-\\gamma)^3\\epsilon})$ last-iterate convergence to the $\\epsilon-$optimal Nash equilibrium, which is independent of the cardinality of the state space.\n The complexity result matches the best-known results for global convergence of policy gradient algorithms for single agent setting.\n Moreover, we also propose a practical variant of \\SDEPO\\ to deal with continuous action space and empirical results demonstrate the practical superiority of the proposed method.", "keywords": "zero-sum Markov game;reinforcement learning;dynamic programming", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/3d7a74447aa7babe4adf3163213b53f340c85edb.zip", "author": "Chenhao Zhou;Zebang Shen;Chao Zhang;Hanbin Zhao;Hui Qian", "authorids": "~Chenhao_Zhou2;~Zebang_Shen1;~Chao_Zhang19;~Hanbin_Zhao1;~Hui_Qian1", "gender": "M;M;M;M;M", "homepage": ";;;;", "dblp": "159/1092-3;165/3377;94/3019-29;222/7871;66/5293", "google_scholar": "WUqisOIAAAAJ;klqzFvgAAAAJ;;F2kiw10AAAAJ;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Chenhao_Zhou2;~Zebang_Shen1;~Chao_Zhang19;~Hanbin_Zhao1;~Hui_Qian1", "aff": "Zhejiang University;;Zhejiang University;Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;;zju.edu.cn;zju.edu.cn;zju.edu.cn", "position": "PhD student;;Assistant Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nzhou2024solving,\ntitle={Solving Zero-Sum Markov Games with Continous State via Spectral Dynamic Embedding},\nauthor={Chenhao Zhou and Zebang Shen and Chao Zhang and Hanbin Zhao and Hui Qian},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=wvQHQgnpGN}\n}", "github": "", "reviewers": "sWhd;DHon;SoJH", "pdf_size": 445359, "rating": "4;7;7", "confidence": "3;4;2", "soundness": "3;2;3", "novelty": "3;3;3", "presentation": "3;2;3", "wc_summary": "63;131;46", "wc_strengths": "51;65;74", "wc_weaknesses": "176;195;60", "wc_questions": "56;52;5", "wc_limitations": "1;40;1", "wc_review": "347;483;186", "wc_reply_reviewers": "0;30;0", "wc_reply_authors": "400;36;55", "reply_reviewers": "0;1;0", "reply_authors": "2;2;2", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 80.0, 36.72419729097788 ], "wc_strengths_avg": [ 63.333333333333336, 9.46337971105226 ], "wc_weaknesses_avg": [ 143.66666666666666, 59.667597758098346 ], "wc_questions_avg": [ 37.666666666666664, 23.156472577277874 ], "wc_limitations_avg": [ 14.0, 18.384776310850235 ], "wc_review_avg": [ 338.6666666666667, 121.39284254939508 ], "wc_reply_reviewers_avg": [ 10.0, 14.142135623730951 ], "wc_reply_authors_avg": [ 163.66666666666666, 167.2928237818015 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:e2gSURhkWyUJ:scholar.google.com/&scioq=Solving+Zero-Sum+Markov+Games+with+Continuous+State+via+Spectral+Dynamic+Embedding&hl=en&as_sdt=0,33", "gs_version_total": 2, "email": "zju.edu.cn;;zju.edu.cn;zju.edu.cn;zju.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Zhejiang University", "aff_unique_dep": "", "aff_unique_url": "https://www.zju.edu.cn", "aff_unique_abbr": "ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "A provable control of sensitivity of neural networks through a direct parameterization of the overall bi-Lipschitzness", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93113", "id": "ww62xltEfB", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ww62xltEfB", "openreview": "https://openreview.net/forum?id=ww62xltEfB", "poster": "/media/PosterPDFs/NeurIPS%202024/93113.png?t=1731597706.3532972", "project": "", "author_site": "Yuri Kinoshita, Taro Toyoizumi", "tldr": "", "abstract": "While neural networks can enjoy an outstanding flexibility and exhibit unprecedented performance, the mechanism behind their behavior is still not well-understood. To tackle this fundamental challenge, researchers have tried to restrict and manipulate some of their properties in order to gain new insights and better control on them. Especially, throughout the past few years, the concept of *bi-Lipschitzness* has been proved as a beneficial inductive bias in many areas. However, due to its complexity, the design and control of bi-Lipschitz architectures are falling behind, and a model that is precisely designed for bi-Lipschitzness realizing a direct and simple control of the constants along with solid theoretical analysis is lacking. In this work, we investigate and propose a novel framework for bi-Lipschitzness that can achieve such a clear and tight control based on convex neural networks and the Legendre-Fenchel duality. Its desirable properties are illustrated with concrete experiments to illustrate its broad range of applications.", "keywords": "bi-Lipschitzness;theoretical guarantee;tight control;direct parameterization;inductive bias;convex neural network;Legendre-Fenchel transformation", "primary_area": "learning_theory", "supplementary_material": "/attachment/2b16413a9557031aa286c0ea85b4fae8ed2a19ae.zip", "author": "Yuri Kinoshita;Taro Toyoizumi", "authorids": "~Yuri_Kinoshita1;~Taro_Toyoizumi1", "gender": ";", "homepage": ";", "dblp": ";", "google_scholar": ";", "orcid": ";", "linkedin": "yuri-kinoshita-2a9390239;", "or_profile": "~Yuri_Kinoshita1;~Taro_Toyoizumi1", "aff": "The University of Tokyo;", "aff_domain": "g.ecc.u-tokyo.ac.jp;", "position": "MS student;", "bibtex": "@inproceedings{\nkinoshita2024a,\ntitle={A provable control of sensitivity of neural networks through a direct parameterization of the overall bi-Lipschitzness},\nauthor={Yuri Kinoshita and Taro Toyoizumi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ww62xltEfB}\n}", "github": "", "reviewers": "Hqwz;XcLq;u3yR", "pdf_size": 1869047, "rating": "6;6;7", "confidence": "3;3;4", "soundness": "3;4;3", "novelty": "3;3;3", "presentation": "3;3;4", "wc_summary": "110;77;46", "wc_strengths": "81;176;108", "wc_weaknesses": "26;79;96", "wc_questions": "1;53;176", "wc_limitations": "24;18;4", "wc_review": "242;403;430", "wc_reply_reviewers": "18;13;84", "wc_reply_authors": "20;19;57", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 77.66666666666667, 26.132142830026183 ], "wc_strengths_avg": [ 121.66666666666667, 39.96943276499625 ], "wc_weaknesses_avg": [ 67.0, 29.81051268708183 ], "wc_questions_avg": [ 76.66666666666667, 73.37725957149273 ], "wc_limitations_avg": [ 15.333333333333334, 8.379870059984357 ], "wc_review_avg": [ 358.3333333333333, 82.99531445944538 ], "wc_reply_reviewers_avg": [ 38.333333333333336, 32.355662392986005 ], "wc_reply_authors_avg": [ 32.0, 17.682382946499793 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.9999999999999997, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10150012619880413691&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 3, "email": "g.ecc.u-tokyo.ac.jp;", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "University of Tokyo", "aff_unique_dep": "", "aff_unique_url": "https://www.u-tokyo.ac.jp", "aff_unique_abbr": "UTokyo", "aff_country_unique_index": "0", "aff_country_unique": "Japan" }, { "title": "LoRANN: Low-Rank Matrix Factorization for Approximate Nearest Neighbor Search", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93112", "id": "wyYsCI3K7U", "proceeding": "", "pdf": "https://openreview.net/pdf?id=wyYsCI3K7U", "openreview": "https://openreview.net/forum?id=wyYsCI3K7U", "poster": "/media/PosterPDFs/NeurIPS%202024/93112.png?t=1732898780.6878428", "project": "", "author_site": "Elias J\u00e4\u00e4saari, Ville Hyv\u00f6nen, Teemu Roos", "tldr": "", "abstract": "Approximate nearest neighbor (ANN) search is a key component in many modern machine learning pipelines; recent use cases include retrieval-augmented generation (RAG) and vector databases. Clustering-based ANN algorithms, that use score computation methods based on product quantization (PQ), are often used in industrial-scale applications due to their scalability and suitability for distributed and disk-based implementations. However, they have slower query times than the leading graph-based ANN algorithms. In this work, we propose a new supervised score computation method based on the observation that inner product approximation is a multivariate (multi-output) regression problem that can be solved efficiently by reduced-rank regression. Our experiments show that on modern high-dimensional data sets, the proposed reduced-rank regression (RRR) method is superior to PQ in both query latency and memory usage. We also introduce LoRANN, a clustering-based ANN library that leverages the proposed score computation method. LoRANN is competitive with the leading graph-based algorithms and outperforms the state-of-the-art GPU ANN methods on high-dimensional data sets.", "keywords": "Approximate nearest neighbor search;vector search;k-nn search;multivariate regression;vector databases", "primary_area": "infrastructure", "supplementary_material": "", "author": "Elias J\u00e4\u00e4saari;Ville Hyv\u00f6nen;Teemu Roos", "authorids": "~Elias_J\u00e4\u00e4saari1;~Ville_Hyv\u00f6nen1;~Teemu_Roos1", "gender": "M;M;M", "homepage": "https://eliasjaasaari.com/;http://www.cs.helsinki.fi/teemu.roos;", "dblp": "194/7765;27/267;168/8572", "google_scholar": "SEdhPaQAAAAJ;js9H6osAAAAJ;XgHEwekAAAAJ", "orcid": ";0000-0001-9470-3759;", "linkedin": ";teemu-roos-7b839013a/;", "or_profile": "~Elias_J\u00e4\u00e4saari1;~Teemu_Roos1;~Ville_Oskari_Hyv\u00f6nen1", "aff": "University of Helsinki;University of Helsinki;Aalto University", "aff_domain": "helsinki.fi;helsinki.fi;aalto.fi", "position": "PhD student;Professor;Postdoc", "bibtex": "@inproceedings{\nj{\\\"a}{\\\"a}saari2024lorann,\ntitle={Lo{RANN}: Low-Rank Matrix Factorization for Approximate Nearest Neighbor Search},\nauthor={Elias J{\\\"a}{\\\"a}saari and Ville Hyv{\\\"o}nen and Teemu Roos},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=wyYsCI3K7U}\n}", "github": "", "reviewers": "bJwE;wBA6;udkL;rz2s", "pdf_size": 1994285, "rating": "5;6;7;8", "confidence": "3;3;5;1", "soundness": "4;3;3;4", "novelty": "3;2;3;3", "presentation": "4;2;2;4", "wc_summary": "48;73;236;96", "wc_strengths": "26;27;38;10", "wc_weaknesses": "93;57;718;24", "wc_questions": "31;12;139;1", "wc_limitations": "1;11;72;1", "wc_review": "199;180;1203;132", "wc_reply_reviewers": "12;10;107;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 3.0, 1.4142135623730951 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 1.0 ], "wc_summary_avg": [ 113.25, 72.87446397744549 ], "wc_strengths_avg": [ 25.25, 9.98436277385793 ], "wc_weaknesses_avg": [ 223.0, 286.82834587955216 ], "wc_questions_avg": [ 45.75, 54.89706276295664 ], "wc_limitations_avg": [ 21.25, 29.583568074186047 ], "wc_review_avg": [ 428.5, 447.8239051234313 ], "wc_reply_reviewers_avg": [ 32.25, 43.395708313150045 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.3162277660168379, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:qlcJeH9u-HIJ:scholar.google.com/&scioq=LoRANN:+Low-Rank+Matrix+Factorization+for+Approximate+Nearest+Neighbor+Search&hl=en&as_sdt=0,5", "gs_version_total": 7, "email": "helsinki.fi;helsinki.fi;aalto.fi", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Helsinki;Aalto University", "aff_unique_dep": ";", "aff_unique_url": "https://www.helsinki.fi;https://www.aalto.fi", "aff_unique_abbr": "UH;Aalto", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Finland" }, { "title": "Focus On What Matters: Separated Models For Visual-Based RL Generalization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93111", "id": "wz2KvvEk44", "proceeding": "", "pdf": "https://openreview.net/pdf?id=wz2KvvEk44", "openreview": "https://openreview.net/forum?id=wz2KvvEk44", "poster": "/media/PosterPDFs/NeurIPS%202024/93111.png?t=1731555097.6739016", "project": "", "author_site": "Di Zhang, Bowen Lv, Hai Zhang, Feifan Yang, Junqiao Zhao, Hang Yu, Chang Huang, Hongtu Zhou, Chen Ye, changjun jiang", "tldr": "", "abstract": "A primary challenge for visual-based Reinforcement Learning (RL) is to generalize effectively across unseen environments. Although previous studies have explored different auxiliary tasks to enhance generalization, few adopt image reconstruction due to concerns about exacerbating overfitting to task-irrelevant features during training. Perceiving the pre-eminence of image reconstruction in representation learning, we propose SMG (\\blue{S}eparated \\blue{M}odels for \\blue{G}eneralization), a novel approach that exploits image reconstruction for generalization. SMG introduces two model branches to extract task-relevant and task-irrelevant representations separately from visual observations via cooperatively reconstruction. Built upon this architecture, we further emphasize the importance of task-relevant features for generalization. Specifically, SMG incorporates two additional consistency losses to guide the agent's focus toward task-relevant areas across different scenarios, thereby achieving free from overfitting. Extensive experiments in DMC demonstrate the SOTA performance of SMG in generalization, particularly excelling in video-background settings. Evaluations on robotic manipulation tasks further confirm the robustness of SMG in real-world applications. Source code is available at \\url{https://anonymous.4open.science/r/SMG/}.", "keywords": "Reinforcement Learning;Visual-based RL;Generalization", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Di Zhang;Bowen Lv;Hai Zhang;Feifan Yang;Junqiao Zhao;Hang Yu;Chang Huang;Hongtu Zhou;Chen Ye;changjun jiang", "authorids": "~Di_Zhang5;~Bowen_Lv1;~Hai_Zhang2;~Feifan_Yang2;~Junqiao_Zhao1;~Hang_Yu14;~Chang_Huang5;~Hongtu_Zhou1;~Chen_Ye1;~changjun_jiang2", "gender": "M;M;M;M;M;M;F;M;M;M", "homepage": "https://github.com/DinoMax00;https://github.com/extreme1228;https://betray12138.github.io/resume/;https://github.com/0x79ff;http://cs1.tongji.edu.cn/~junqiao;https://tinyyukoala.com;https://github.com/huangchanger;https://github.com/HongtuZ;;https://cs.tongji.edu.cn/info/1033/2865.htm", "dblp": ";;;;;;;249/2223;;", "google_scholar": ";;YHqAzxUAAAAJ;;;;;z31IEggAAAAJ;https://scholar.google.com/citations?hl=en;", "orcid": ";;;;;;;0000-0002-0609-4226;;0000-0002-2543-8928", "linkedin": ";;;;;;;;;", "or_profile": "~Di_Zhang5;~Bowen_Lv1;~Hai_Zhang2;~Feifan_Yang2;~Junqiao_Zhao1;~Hang_Yu14;~Chang_Huang5;~Hongtu_Zhou1;~Chen_Ye1;~changjun_jiang2", "aff": "Tongji University;Tongji University;Tongji University;Tongji University;Tongji University;Tongji University;Tongji University;Tongji University;Tongji University;Tongji University", "aff_domain": "tongji.edu.cn;tongji.edu.cn;tongji.edu.cn;tongji.edu.cn;tongji.edu.cn;tongji.edu.cn;tongji.edu.cn;tongji.edu.cn;tongji.edu.cn;tongji.edu.cn", "position": "Undergrad student;Undergrad student;MS student;Undergrad student;Associate Professor;Undergrad student;MS student;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nzhang2024focus,\ntitle={Focus On What Matters: Separated Models For Visual-Based {RL} Generalization},\nauthor={Di Zhang and Bowen Lv and Hai Zhang and Feifan Yang and Junqiao Zhao and Hang Yu and Chang Huang and Hongtu Zhou and Chen Ye and changjun jiang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=wz2KvvEk44}\n}", "github": "", "reviewers": "iwu6;jV8P;ZRY4;C7ia", "pdf_size": 17606474, "rating": "5;5;7;7", "confidence": "4;4;4;3", "soundness": "3;3;3;3", "novelty": "2;2;3;2", "presentation": "2;3;3;2", "wc_summary": "72;173;90;154", "wc_strengths": "39;114;73;29", "wc_weaknesses": "301;403;99;96", "wc_questions": "61;77;5;57", "wc_limitations": "10;10;10;14", "wc_review": "483;777;277;350", "wc_reply_reviewers": "268;50;0;45", "wc_reply_authors": "326;372;0;33", "reply_reviewers": "2;2;0;1", "reply_authors": "2;3;1;2", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 122.25, 42.2751404492049 ], "wc_strengths_avg": [ 63.75, 33.281939546847326 ], "wc_weaknesses_avg": [ 224.75, 132.26559454370587 ], "wc_questions_avg": [ 50.0, 27.03701166919155 ], "wc_limitations_avg": [ 11.0, 1.7320508075688772 ], "wc_review_avg": [ 471.75, 191.08555021246374 ], "wc_reply_reviewers_avg": [ 90.75, 104.171433224277 ], "wc_reply_authors_avg": [ 182.75, 167.45055240279143 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9149062433766138966&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "tongji.edu.cn;tongji.edu.cn;tongji.edu.cn;tongji.edu.cn;tongji.edu.cn;tongji.edu.cn;tongji.edu.cn;tongji.edu.cn;tongji.edu.cn;tongji.edu.cn", "author_num": 10, "aff_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_unique_norm": "Tongji University", "aff_unique_dep": "", "aff_unique_url": "https://www.tongji.edu.cn", "aff_unique_abbr": "Tongji", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Hierarchical Selective Classification", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93110", "id": "wzof7Y66xs", "proceeding": "", "pdf": "https://openreview.net/pdf?id=wzof7Y66xs", "openreview": "https://openreview.net/forum?id=wzof7Y66xs", "poster": "/media/PosterPDFs/NeurIPS%202024/93110.png?t=1732963108.2895474", "project": "", "author_site": "Shani Goren, Ido Galil, Ran El-Yaniv", "tldr": "", "abstract": "Deploying deep neural networks for risk-sensitive tasks necessitates an uncertainty estimation mechanism. This paper introduces *hierarchical selective classification*, extending selective classification to a hierarchical setting. Our approach leverages the inherent structure of class relationships, enabling models to reduce the specificity of their predictions when faced with uncertainty. In this paper, we first formalize hierarchical risk and coverage, and introduce hierarchical risk-coverage curves. Next, we develop algorithms for hierarchical selective classification (which we refer to as \"inference rules\"), and propose an efficient algorithm that guarantees a target accuracy constraint with high probability. Lastly, we conduct extensive empirical studies on over a thousand ImageNet classifiers, revealing that training regimes such as CLIP, pretraining on ImageNet21k and knowledge distillation boost hierarchical selective performance.", "keywords": "Hierarchical Selective Classification;Hierarchical Uncertainty;Selective Classification;Non-Bayesian Uncertainty Estimation;CLIP", "primary_area": "other", "supplementary_material": "/attachment/fb32d0bcb881e6d92dbfbcac971ca6f95cab8087.zip", "author": "Shani Goren;Ido Galil;Ran El-Yaniv", "authorids": "~Shani_Goren1;~Ido_Galil1;~Ran_El-Yaniv1", "gender": "F;M;M", "homepage": ";https://idogalil.github.io/;http://www.cs.technion.ac.il/~rani/", "dblp": ";;04/1896", "google_scholar": ";;https://scholar.google.com.tw/citations?user=D9eVSd8AAAAJ", "orcid": ";;", "linkedin": "shani-goren-146aa81b8/;ido-galil/;", "or_profile": "~Shani_Goren1;~Ido_Galil1;~Ran_El-Yaniv1", "aff": "Amazon;Computer Science Departmen, Technion-Israel Institute of Technology;Deci", "aff_domain": "amazon.com;cs.technion.ac.il;deci.ai", "position": "Intern;PhD student;Chief Scientist", "bibtex": "@inproceedings{\ngoren2024hierarchical,\ntitle={Hierarchical Selective Classification},\nauthor={Shani Goren and Ido Galil and Ran El-Yaniv},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=wzof7Y66xs}\n}", "github": "", "reviewers": "XLCN;9FfT;vmdY;h9cA", "pdf_size": 745343, "rating": "5;6;6;6", "confidence": "4;3;3;3", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;2;3", "wc_summary": "30;19;50;143", "wc_strengths": "31;61;49;114", "wc_weaknesses": "46;156;143;86", "wc_questions": "115;44;98;20", "wc_limitations": "42;16;15;1", "wc_review": "264;296;355;364", "wc_reply_reviewers": "31;72;285;33", "wc_reply_authors": "0;0;489;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;2;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 60.5, 48.91063279083598 ], "wc_strengths_avg": [ 63.75, 30.914195768287424 ], "wc_weaknesses_avg": [ 107.75, 44.31915500096995 ], "wc_questions_avg": [ 69.25, 38.674119253061214 ], "wc_limitations_avg": [ 18.5, 14.807092894960848 ], "wc_review_avg": [ 319.75, 41.45102531904368 ], "wc_reply_reviewers_avg": [ 105.25, 105.05801968436299 ], "wc_reply_authors_avg": [ 122.25, 211.74321122529525 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4450469967248579126&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "amazon.com;cs.technion.ac.il;deci.ai", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Amazon;Technion-Israel Institute of Technology;Deci", "aff_unique_dep": "Amazon.com, Inc.;Computer Science Department;", "aff_unique_url": "https://www.amazon.com;https://www.technion.ac.il;https://www.deci.ai", "aff_unique_abbr": "Amazon;Technion;Deci", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United States;Israel" }, { "title": "A Polar coordinate system represents syntax in large language models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93109", "id": "x2780VcMOI", "proceeding": "", "pdf": "https://openreview.net/pdf?id=x2780VcMOI", "openreview": "https://openreview.net/forum?id=x2780VcMOI", "poster": "", "project": "", "author_site": "Pablo J. Diego Simon, St\u00e9phane d'Ascoli, Emmanuel Chemla, Yair Lakretz, Jean-Remi King", "tldr": "", "abstract": "Originally formalized with symbolic representations, syntactic trees may also be effectively represented in the activations of large language models (LLMs). Indeed, a ''Structural Probe'' can find a subspace of neural activations, where syntactically-related words are relatively close to one-another. However, this syntactic code remains incomplete: the distance between the Structural Probe word embeddings can represent the \\emph{existence} but not the type and direction of syntactic relations. Here, we hypothesize that syntactic relations are, in fact, coded by the relative direction between nearby embeddings. To test this hypothesis, we introduce a ''Polar Probe'' trained to read syntactic relations from both the distance and the direction between word embeddings. Our approach reveals three main findings. First, our Polar Probe successfully recovers the type and direction of syntactic relations, and substantially outperforms the Structural Probe by nearly two folds. Second, we confirm that this polar coordinate system exists in a low-dimensional subspace of the intermediate layers of many LLMs and becomes increasingly precise in the latest frontier models. Third, we demonstrate with a new benchmark that similar syntactic relations are coded similarly across the nested levels of syntactic trees. Overall, this work shows that LLMs spontaneously learn a geometry of neural activations that explicitly represents the main symbolic structures of linguistic theory.", "keywords": "Natural Language Processing;Large Language Models;Interpretability;Syntax;Linguistics;Cognitive Science", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Pablo J. Diego Simon;St\u00e9phane d'Ascoli;Emmanuel Chemla;Yair Lakretz;Jean-Remi King", "authorids": "~Pablo_J._Diego_Simon1;~St\u00e9phane_d'Ascoli1;~Emmanuel_Chemla1;~Yair_Lakretz2;~Jean-Remi_King1", "gender": "M;M;;M;M", "homepage": "https://www.linkedin.com/in/pablo-j-diego-sim%C3%B3n-b3475a212/;https://sdascoli.github.io/;http://www.emmanuel.chemla.free.fr;https://yairlak.github.io/;https://kingjr.github.io/", "dblp": ";227/3250;85/8850;166/5196;", "google_scholar": ";2GcqQgYAAAAJ;;https://scholar.google.co.il/citations?user=cNnJ5YUAAAAJ;XZOgIwEAAAAJ", "orcid": ";0000-0002-3131-3371;0000-0002-8423-5880;0000-0001-8774-6427;", "linkedin": ";st%C3%A9phane-d-ascoli-182642130/;;;", "or_profile": "~Pablo_J._Diego_Simon1;~St\u00e9phane_d'Ascoli1;~Emmanuel_Chemla1;~Yair_Lakretz2;~Jean-Remi_King1", "aff": "ENS - PSL;Meta Facebook;CNRS;Ecole Normale Sup\u00e9rieure de Paris;CNRS", "aff_domain": "psl.eu;facebook.com;cnrs.fr;ens.fr;cnrs.fr", "position": "PhD student;Researcher;Full Professor;Researcher;Associate Professor", "bibtex": "@inproceedings{\nsimon2024a,\ntitle={A Polar coordinate system represents syntax in large language models},\nauthor={Pablo J. Diego Simon and St{\\'e}phane d'Ascoli and Emmanuel Chemla and Yair Lakretz and Jean-Remi King},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=x2780VcMOI}\n}", "github": "", "reviewers": "tm4o;9UbP;rWeD", "pdf_size": 1821283, "rating": "5;5;8", "confidence": "2;3;4", "soundness": "3;3;4", "novelty": "2;3;3", "presentation": "3;3;4", "wc_summary": "62;88;61", "wc_strengths": "42;84;117", "wc_weaknesses": "75;180;147", "wc_questions": "32;225;1", "wc_limitations": "1;5;1", "wc_review": "212;582;327", "wc_reply_reviewers": "82;201;101", "wc_reply_authors": "175;0;0", "reply_reviewers": "1;1;1", "reply_authors": "2;1;1", "rating_avg": [ 6.0, 1.4142135623730951 ], "confidence_avg": [ 3.0, 0.816496580927726 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 70.33333333333333, 12.498888839501783 ], "wc_strengths_avg": [ 81.0, 30.692018506445613 ], "wc_weaknesses_avg": [ 134.0, 43.840620433565945 ], "wc_questions_avg": [ 86.0, 99.09927682211746 ], "wc_limitations_avg": [ 2.3333333333333335, 1.8856180831641267 ], "wc_review_avg": [ 373.6666666666667, 154.61421524412157 ], "wc_reply_reviewers_avg": [ 128.0, 52.19833969262496 ], "wc_reply_authors_avg": [ 58.333333333333336, 82.49579113843053 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.8660254037844387, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13090516097648246206&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "psl.eu;facebook.com;cnrs.fr;ens.fr;cnrs.fr", "author_num": 5, "aff_unique_index": "0;1;2;3;2", "aff_unique_norm": "\u00c9cole Normale Sup\u00e9rieure - PSL;Meta;Centre National de la Recherche Scientifique;Ecole Normale Sup\u00e9rieure de Paris", "aff_unique_dep": ";Meta Platforms, Inc.;;", "aff_unique_url": "https://www.ens.psl.eu;https://meta.com;https://www.cnrs.fr;https://www.ens.fr", "aff_unique_abbr": "ENS;Meta;CNRS;ENS Paris", "aff_campus_unique_index": "1", "aff_campus_unique": ";Paris", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "France;United States" }, { "title": "Dynamic Model Predictive Shielding for Provably Safe Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93108", "id": "x2zY4hZcmg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=x2zY4hZcmg", "openreview": "https://openreview.net/forum?id=x2zY4hZcmg", "poster": "/media/PosterPDFs/NeurIPS%202024/93108.png?t=1733728857.0974078", "project": "", "author_site": "Arko Banerjee, Kia Rahmani, Joydeep Biswas, Isil Dillig", "tldr": "", "abstract": "Among approaches for provably safe reinforcement learning, Model Predictive Shielding (MPS) has proven effective at complex tasks in continuous, high-dimensional state spaces, by leveraging a *backup policy* to ensure safety when the learned policy attempts to take risky actions. However, while MPS can ensure safety both during and after training, it often hinders task progress due to the conservative and task-oblivious nature of backup policies.\nThis paper introduces *Dynamic Model Predictive Shielding* (DMPS), which optimizes reinforcement learning objectives while maintaining provable safety. DMPS employs a local planner to dynamically select safe recovery actions that maximize both short-term progress as well as long-term rewards. Crucially, the planner and the neural policy play a synergistic role in DMPS. When planning recovery actions for ensuring safety, the planner utilizes the neural policy to estimate long-term rewards, allowing it to *observe* beyond its short-term planning horizon. \nConversely, the neural policy under training learns from the recovery plans proposed by the planner, converging to policies that are both *high-performing* and *safe* in practice.\nThis approach guarantees safety during and after training, with bounded recovery regret that decreases exponentially with planning horizon depth. Experimental results demonstrate that DMPS converges to policies that rarely require shield interventions after training and achieve higher rewards compared to several state-of-the-art baselines.", "keywords": "Safe Reinforcement Learning;Model Predictive Shielding;Planning;MCTS", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/8fcfb7bfbfe2d328ca847d4c47888bf071c63929.zip", "author": "Arko Banerjee;Kia Rahmani;Joydeep Biswas;Isil Dillig", "authorids": "~Arko_Banerjee1;~Kia_Rahmani1;~Joydeep_Biswas1;~Isil_Dillig1", "gender": "M;M;M;F", "homepage": ";https://kiarahmani.github.io/;https://www.joydeepb.com/;https://www.cs.utexas.edu/~isil/", "dblp": ";220/1284;84/73;", "google_scholar": "J5ktGaEAAAAJ;XF7r9VQAAAAJ;https://scholar.google.com.tw/citations?user=f28F1YUAAAAJ;", "orcid": ";;0000-0002-1211-1731;", "linkedin": ";;;", "or_profile": "~Arko_Banerjee1;~Kia_Rahmani1;~Joydeep_Biswas1;~Isil_Dillig1", "aff": "University of Texas at Austin;University of Texas at Austin;The University of Texas at Austin;University of Texas, Austin", "aff_domain": "utexas.edu;utexas.edu;cs.utexas.edu;utexas.edu", "position": "Undergrad student;Postdoc;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nbanerjee2024dynamic,\ntitle={Dynamic Model Predictive Shielding for Provably Safe Reinforcement Learning},\nauthor={Arko Banerjee and Kia Rahmani and Joydeep Biswas and Isil Dillig},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=x2zY4hZcmg}\n}", "github": "", "reviewers": "d437;BFvm;xs2Y;jAJK;NRWz", "pdf_size": 1993009, "rating": "5;5;6;7;7", "confidence": "3;4;4;3;5", "soundness": "2;3;3;3;3", "novelty": "3;2;3;4;3", "presentation": "3;3;3;4;4", "wc_summary": "88;77;82;180;197", "wc_strengths": "31;85;120;140;170", "wc_weaknesses": "225;375;29;185;415", "wc_questions": "46;163;103;39;102", "wc_limitations": "25;124;12;4;2", "wc_review": "415;824;346;548;886", "wc_reply_reviewers": "92;193;25;183;58", "wc_reply_authors": "448;24;33;78;31", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 6.0, 0.8944271909999159 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 124.8, 52.403816654896424 ], "wc_strengths_avg": [ 109.2, 47.88068504104761 ], "wc_weaknesses_avg": [ 245.8, 138.8904604355533 ], "wc_questions_avg": [ 90.6, 45.11585087305791 ], "wc_limitations_avg": [ 33.4, 46.01564951187802 ], "wc_review_avg": [ 603.8, 216.02999791695598 ], "wc_reply_reviewers_avg": [ 110.2, 67.03849640318613 ], "wc_reply_authors_avg": [ 122.8, 163.71609572671832 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.2988071523335984, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16236337683667480398&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 4, "email": "utexas.edu;utexas.edu;cs.utexas.edu;utexas.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Unsupervised Object Detection with Theoretical Guarantees", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93107", "id": "x33oWJQyH0", "proceeding": "", "pdf": "https://openreview.net/pdf?id=x33oWJQyH0", "openreview": "https://openreview.net/forum?id=x33oWJQyH0", "poster": "/media/PosterPDFs/NeurIPS%202024/93107.png?t=1729949686.4324992", "project": "", "author_site": "Marian Longa, Jo\u00e3o Henriques", "tldr": "", "abstract": "Unsupervised object detection using deep neural networks is typically a difficult problem with few to no guarantees about the learned representation. In this work we present the first unsupervised object detection method that is theoretically guaranteed to recover the true object positions up to quantifiable small shifts. We develop an unsupervised object detection architecture and prove that the learned variables correspond to the true object positions up to small shifts related to the encoder and decoder receptive field sizes, the object sizes, and the widths of the Gaussians used in the rendering process. We perform detailed analysis of how the error depends on each of these variables and perform synthetic experiments validating our theoretical predictions up to a precision of individual pixels. We also perform experiments on CLEVR-based data and show that, unlike current SOTA object detection methods (SAM, CutLER), our method's prediction errors always lie within our theoretical bounds. We hope that this work helps open up an avenue of research into object detection methods with theoretical guarantees.", "keywords": "unsupervised object detection;object detection;unsupervised learning;representation learning", "primary_area": "machine_vision", "supplementary_material": "", "author": "Marian Longa;Joao F. Henriques", "authorids": "~Marian_Longa1;~Joao_F._Henriques1", "gender": ";M", "homepage": "http://marianlonga.com;http://www.robots.ox.ac.uk/~joao/", "dblp": ";31/8617.html", "google_scholar": ";aCQjyp0AAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Marian_Longa1;~Joao_F._Henriques1", "aff": "University of Oxford;University of Oxford", "aff_domain": "ox.ac.uk;ox.ac.uk", "position": "PhD student;Principal Researcher", "bibtex": "@inproceedings{\nlonga2024unsupervised,\ntitle={Unsupervised Object Detection with Theoretical Guarantees},\nauthor={Marian Longa and Joao F. Henriques},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=x33oWJQyH0}\n}", "github": "", "reviewers": "x9ub;pKP5;YrtR;ghv9", "pdf_size": 1262635, "rating": "5;6;6;7", "confidence": "5;4;5;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;2;4", "wc_summary": "59;58;165;35", "wc_strengths": "33;79;44;34", "wc_weaknesses": "120;67;153;434", "wc_questions": "89;75;5;2", "wc_limitations": "73;1;1;2", "wc_review": "374;280;368;507", "wc_reply_reviewers": "65;16;27;64", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 79.25, 50.43002577829998 ], "wc_strengths_avg": [ 47.5, 18.688231591030757 ], "wc_weaknesses_avg": [ 193.5, 142.20144162419732 ], "wc_questions_avg": [ 42.75, 39.57508686029634 ], "wc_limitations_avg": [ 19.25, 31.03526220285564 ], "wc_review_avg": [ 382.25, 81.06902922818307 ], "wc_reply_reviewers_avg": [ 43.0, 21.85177338341216 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.7071067811865476, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:tUPFgBSCSq0J:scholar.google.com/&scioq=Unsupervised+Object+Detection+with+Theoretical+Guarantees&hl=en&as_sdt=0,11", "gs_version_total": 7, "email": "ox.ac.uk;ox.ac.uk", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Oxford", "aff_unique_dep": "", "aff_unique_url": "https://www.ox.ac.uk", "aff_unique_abbr": "Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "title": "DropBP: Accelerating Fine-Tuning of Large Language Models by Dropping Backward Propagation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93106", "id": "x4EoTQW7ka", "proceeding": "", "pdf": "https://openreview.net/pdf?id=x4EoTQW7ka", "openreview": "https://openreview.net/forum?id=x4EoTQW7ka", "poster": "/media/PosterPDFs/NeurIPS%202024/93106.png?t=1731339823.407223", "project": "", "author_site": "Sunghyeon Woo, Baeseong Park, Byeongwook Kim, Minjung Jo, Se Jung Kwon, Dongsuk Jeon, Dongsoo Lee", "tldr": "", "abstract": "Large language models (LLMs) have achieved significant success across various domains. However, training these LLMs typically involves substantial memory and computational costs during both forward and backward propagation. While parameter-efficient fine-tuning (PEFT) considerably reduces the training memory associated with parameters, it does not address the significant computational costs and activation memory. In this paper, we propose Dropping Backward Propagation (DropBP), a novel approach designed to reduce computational costs and activation memory while maintaining accuracy. DropBP randomly drops layers during backward propagation, which is essentially equivalent to training shallow submodules generated by undropped layers and residual connections. Additionally, DropBP calculates the sensitivity of each layer to assign an appropriate drop rate, thereby stabilizing the training process. DropBP is not only applicable to full fine-tuning but can also be orthogonally integrated with all types of PEFT by dropping layers during backward propagation. Specifically, DropBP can reduce training time by 44% with comparable accuracy to the baseline, accelerate convergence to the same perplexity by 1.5$\\times$, and enable training with a sequence length 6.2$\\times$ larger on a single NVIDIA-A100 GPU. Furthermore, our DropBP enabled a throughput increase of 79% on a NVIDIA A100 GPU and 117% on an Intel Gaudi2 HPU. The code is available at [https://github.com/WooSunghyeon/dropbp](https://github.com/WooSunghyeon/dropbp).", "keywords": "Training Acceleration;Memory Efficient Fine-Tuning;Large Language Models;Backpropagation Optimization.", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Sunghyeon Woo;Baeseong park;Byeongwook Kim;Minjung Jo;Se Jung Kwon;Dongsuk Jeon;Dongsoo Lee", "authorids": "~Sunghyeon_Woo1;~Baeseong_park1;~Byeongwook_Kim1;~Minjung_Jo1;~Se_Jung_Kwon1;~Dongsuk_Jeon1;~Dongsoo_Lee1", "gender": "M;;F;M;M;M;M", "homepage": "http://mms.snu.ac.kr/;;https://www.linkedin.com/in/minjung-jo-781853207/;;http://mms.snu.ac.kr;;https://baeseong.tistory.com/", "dblp": "319/4560;220/5405;;119/5676;28/9878;11/9680;241/6925.html", "google_scholar": ";https://scholar.google.co.kr/citations?user=OjfC7gUAAAAJ;;https://scholar.google.co.kr/citations?user=8eTxKOkAAAAJ;_7GzTD4AAAAJ;ALiieEkAAAAJ;https://scholar.google.co.kr/citations?user=RMmyMJsAAAAJ", "orcid": ";;;;;;", "linkedin": ";;;se-jung-kwon-305503175/;;;baeseong-park/", "or_profile": "~Sunghyeon_Woo1;~Byeongwook_Kim1;~Minjung_Jo1;~Se_Jung_Kwon1;~Dongsuk_Jeon1;~Dongsoo_Lee1;~Bae_Seong_Park1", "aff": "NAVER;NAVER CLOUD;;NAVER Cloud;Seoul National University;NAVER CLOVA;NAVER Clova", "aff_domain": "navercorp.com;navercorp.com;;navercorp.com;snu.ac.kr;navercorp.com;navercorp.com", "position": "Intern;Researcher;;AI Researcher;Associate Professor;Executive Officer;Software Engineer", "bibtex": "@inproceedings{\nwoo2024dropbp,\ntitle={Drop{BP}: Accelerating Fine-Tuning of Large Language Models by Dropping Backward Propagation},\nauthor={Sunghyeon Woo and Baeseong park and Byeongwook Kim and Minjung Jo and Se Jung Kwon and Dongsuk Jeon and Dongsoo Lee},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=x4EoTQW7ka}\n}", "github": "", "reviewers": "NAjC;Ep4o;nmtJ", "pdf_size": 4047758, "rating": "6;6;6", "confidence": "4;4;3", "soundness": "3;3;3", "novelty": "2;3;2", "presentation": "2;3;3", "wc_summary": "69;92;32", "wc_strengths": "71;95;53", "wc_weaknesses": "189;72;164", "wc_questions": "20;91;47", "wc_limitations": "1;4;6", "wc_review": "350;354;302", "wc_reply_reviewers": "142;25;22", "wc_reply_authors": "84;50;50", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 64.33333333333333, 24.716166549222166 ], "wc_strengths_avg": [ 73.0, 17.204650534085253 ], "wc_weaknesses_avg": [ 141.66666666666666, 50.307940614667274 ], "wc_questions_avg": [ 52.666666666666664, 29.26127512980633 ], "wc_limitations_avg": [ 3.6666666666666665, 2.0548046676563256 ], "wc_review_avg": [ 335.3333333333333, 23.6267268622258 ], "wc_reply_reviewers_avg": [ 63.0, 55.87486017879597 ], "wc_reply_authors_avg": [ 61.333333333333336, 16.027753706895076 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11800679749634832740&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 4, "email": "navercorp.com;navercorp.com;;navercorp.com;snu.ac.kr;navercorp.com;navercorp.com", "author_num": 7, "aff_unique_index": "0;0;0;1;0;0", "aff_unique_norm": "NAVER Corporation;Seoul National University", "aff_unique_dep": ";", "aff_unique_url": "https://www.naver.com;https://www.snu.ac.kr", "aff_unique_abbr": "NAVER;SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "$\\text{ID}^3$: Identity-Preserving-yet-Diversified Diffusion Models for Synthetic Face Recognition", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93105", "id": "x4HMnqs6IE", "proceeding": "", "pdf": "https://openreview.net/pdf?id=x4HMnqs6IE", "openreview": "https://openreview.net/forum?id=x4HMnqs6IE", "poster": "/media/PosterPDFs/NeurIPS%202024/93105.png?t=1730969476.561054", "project": "", "author_site": "Jianqing Xu, Shen Li, Jiaying Wu, Miao Xiong, Ailin Deng, Jiazhen Ji, Yuge Huang, Guodong Mu, Wenjie Feng, Shouhong Ding, Bryan Hooi", "tldr": "", "abstract": "Synthetic face recognition (SFR) aims to generate synthetic face datasets that mimic the distribution of real face data, which allows for training face recognition models in a privacy-preserving manner. Despite the remarkable potential of diffusion models in image generation, current diffusion-based SFR models struggle with generalization to real-world faces. To address this limitation, we outline three key objectives for SFR: (1) promoting diversity across identities (inter-class diversity), (2) ensuring diversity within each identity by injecting various facial attributes (intra-class diversity), and (3) maintaining identity consistency within each identity group (intra-class identity preservation). Inspired by these goals, we introduce a diffusion-fueled SFR model termed $\\text{ID}^3$. $\\text{ID}^3$ employs an ID-preserving loss to generate diverse yet identity-consistent facial appearances. Theoretically, we show that minimizing this loss is equivalent to maximizing the lower bound of an adjusted conditional log-likelihood over ID-preserving data. This equivalence motivates an ID-preserving sampling algorithm, which operates over an adjusted gradient vector field, enabling the generation of fake face recognition datasets that approximate the distribution of real-world faces. Extensive experiments across five challenging benchmarks validate the advantages of $\\text{ID}^3$.", "keywords": "synthetic face recognition", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Jianqing Xu;Shen Li;Jiaying Wu;Miao Xiong;Ailin Deng;Jiazhen Ji;Yuge Huang;Guodong Mu;Wenjie Feng;Shouhong Ding;Bryan Hooi", "authorids": "~Jianqing_Xu1;~Shen_Li2;~Jiaying_Wu2;~Miao_Xiong2;~Ailin_Deng1;~Jiazhen_Ji1;~Yuge_Huang1;~Guodong_Mu1;~Wenjie_Feng1;~Shouhong_Ding3;~Bryan_Hooi1", "gender": ";M;;F;;M;M;M;M;M;", "homepage": ";https://github.com/MathsShen;https://jiayingwu19.github.io/;https://miaoxiong2320.github.io/;https://d-ailin.github.io;https://www.baidu.com/;;https://muyouhang.github.io/;https://wenchieh.github.io;;http://bhooi.github.io", "dblp": ";;15/4299.html;;70/3580;;233/3678;220/7581;126/2373-1;119/6735;169/9975", "google_scholar": ";;mrfO62wAAAAJ;yQ4U_5IAAAAJ;;;R9w5G9cAAAAJ;ksJ15z4AAAAJ;EV1kntYAAAAJ;OGf40fkAAAAJ;", "orcid": ";;;;;0000-0003-2708-9319;;0000-0001-6246-1724;0000-0003-3636-0035;0000-0002-3175-3553;0000-0002-5645-1754", "linkedin": ";;;miao-xiong-9b1892187/;;;;;;;", "or_profile": "~Jianqing_Xu1;~Shen_Li2;~Jiaying_Wu2;~Miao_Xiong2;~Ailin_Deng1;~Jiazhen_Ji1;~Yuge_Huang1;~Guodong_Mu1;~Wenjie_Feng1;~Shouhong_Ding3;~Bryan_Hooi1", "aff": ";National University of Singapore;National University of Singapore;National University of Singapore;National University of Singapore;;Tencent Youtu Lab;Tencent Youtu Lab;National Universiasty of Singapore;Tencent Youtu Lab;National University of Singapore", "aff_domain": ";u.nus.edu;u.nus.edu;u.nus.edu;nus.edu.sg;;tencent.com;tencent.com;nus.edu.sg;tencent.com;nus.edu.sg", "position": ";Researcher;PhD student;PhD student;PhD student;;Researcher;Researcher;Postdoc;researcher;Assistant Professor", "bibtex": "@inproceedings{\nxu2024textid,\ntitle={\\${\\textbackslash}text\\{{ID}\\}{\\textasciicircum}3\\$: Identity-Preserving-yet-Diversified Diffusion Models for Synthetic Face Recognition},\nauthor={Jianqing Xu and Shen Li and Jiaying Wu and Miao Xiong and Ailin Deng and Jiazhen Ji and Yuge Huang and Guodong Mu and Wenjie Feng and Shouhong Ding and Bryan Hooi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=x4HMnqs6IE}\n}", "github": "", "reviewers": "37yP;enp6;ThNX;qiUe", "pdf_size": 6207242, "rating": "4;5;5;8", "confidence": "4;4;4;5", "soundness": "2;2;3;4", "novelty": "2;2;3;4", "presentation": "3;3;3;4", "wc_summary": "57;50;46;69", "wc_strengths": "37;20;5;66", "wc_weaknesses": "208;128;5;38", "wc_questions": "106;69;153;63", "wc_limitations": "97;29;7;41", "wc_review": "505;296;216;277", "wc_reply_reviewers": "547;44;27;0", "wc_reply_authors": "947;82;0;0", "reply_reviewers": "2;1;1;0", "reply_authors": "3;2;1;1", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 55.5, 8.73212459828649 ], "wc_strengths_avg": [ 32.0, 22.66053838724932 ], "wc_weaknesses_avg": [ 94.75, 79.38316887098927 ], "wc_questions_avg": [ 97.75, 35.89829383132296 ], "wc_limitations_avg": [ 43.5, 33.20767983464066 ], "wc_review_avg": [ 323.5, 108.87722443192608 ], "wc_reply_reviewers_avg": [ 154.5, 227.15248182663555 ], "wc_reply_authors_avg": [ 257.25, 399.63194003983216 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.9622504486493763, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12622584893490570385&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": ";u.nus.edu;u.nus.edu;u.nus.edu;nus.edu.sg;;tencent.com;tencent.com;nus.edu.sg;tencent.com;nus.edu.sg", "author_num": 11, "aff_unique_index": "0;0;0;0;1;1;0;1;0", "aff_unique_norm": "National University of Singapore;Tencent", "aff_unique_dep": ";Youtu Lab", "aff_unique_url": "https://www.nus.edu.sg;https://www.tencent.com", "aff_unique_abbr": "NUS;Tencent", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;1;0;1;0", "aff_country_unique": "Singapore;China" }, { "title": "Pard: Permutation-Invariant Autoregressive Diffusion for Graph Generation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93104", "id": "x4Kk4FxLs3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=x4Kk4FxLs3", "openreview": "https://openreview.net/forum?id=x4Kk4FxLs3", "poster": "", "project": "", "author_site": "Lingxiao Zhao, Xueying Ding, Leman Akoglu", "tldr": "", "abstract": "Graph generation has been dominated by autoregressive models due to their simplicity and effectiveness, despite their sensitivity to ordering. Yet diffusion models have garnered increasing attention, as they offer comparable performance while being permutation-invariant. Current graph diffusion models generate graphs in a one-shot fashion, but they require extra features and thousands of denoising steps to achieve optimal performance. We introduce PARD, a Permutation-invariant Auto Regressive Diffusion model that integrates diffusion models with autoregressive methods. PARD harnesses the effectiveness and efficiency of the autoregressive model while maintaining permutation invariance without ordering sensitivity. Specifically, we show that contrary to sets, elements in a graph are not entirely un-ordered and there is a unique partial order for nodes and edges. With this partial order, PARD generates a graph in a block-by-block, autoregressive fashion, where each block\u2019s probability is conditionally modeled by a shared diffusion model with an equivariant network. To ensure efficiency while being expressive, we further propose a higher-order graph transformer, which integrates transformer with PPGN (Maronet al., 2019). Like GPT, we extend the higher-order graph transformer to support parallel training of all blocks. Without any extra features, PARD achieves state-of-the-art performance on molecular and non-molecular datasets, and scales to large datasets like MOSES containing 1.9M molecules.", "keywords": "diffusion;generative model;graph generation;graph generative model", "primary_area": "generative_models", "supplementary_material": "", "author": "Lingxiao Zhao;Xueying Ding;Leman Akoglu", "authorids": "~Lingxiao_Zhao1;~Xueying_Ding1;~Leman_Akoglu3", "gender": "M;F;F", "homepage": "http://lingxiaozhao.com/;;http://www.andrew.cmu.edu/user/lakoglu/", "dblp": ";;02/6979.html", "google_scholar": "QKslW6EAAAAJ;U9CMsh0AAAAJ;4ITkr_kAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Lingxiao_Zhao1;~Xueying_Ding1;~Leman_Akoglu3", "aff": "Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "andrew.cmu.edu;cmu.edu;cmu.edu", "position": "PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nzhao2024pard,\ntitle={Pard: Permutation-Invariant Autoregressive Diffusion for Graph Generation},\nauthor={Lingxiao Zhao and Xueying Ding and Leman Akoglu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=x4Kk4FxLs3}\n}", "github": "", "reviewers": "adow;KWRX;2orD;SjHr", "pdf_size": 1798574, "rating": "3;6;6;7", "confidence": "4;3;3;3", "soundness": "1;3;3;3", "novelty": "2;4;3;3", "presentation": "3;2;3;4", "wc_summary": "46;87;38;60", "wc_strengths": "27;40;60;64", "wc_weaknesses": "214;62;77;19", "wc_questions": "14;113;35;19", "wc_limitations": "24;10;1;14", "wc_review": "325;312;211;176", "wc_reply_reviewers": "126;78;11;0", "wc_reply_authors": "962;156;39;20", "reply_reviewers": "2;1;1;0", "reply_authors": "6;2;2;2", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 57.75, 18.632968094214082 ], "wc_strengths_avg": [ 47.75, 15.039531242695032 ], "wc_weaknesses_avg": [ 93.0, 73.030815413769 ], "wc_questions_avg": [ 45.25, 39.87715511417533 ], "wc_limitations_avg": [ 12.25, 8.257572282456872 ], "wc_review_avg": [ 256.0, 63.87879147260067 ], "wc_reply_reviewers_avg": [ 53.75, 51.295102105366745 ], "wc_reply_authors_avg": [ 294.25, 389.0272323372748 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 3.0, 1.7320508075688772 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.9622504486493763, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8764349416503774372&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "andrew.cmu.edu;cmu.edu;cmu.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Multi-Reward Best Policy Identification", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93103", "id": "x69O84Df2G", "proceeding": "", "pdf": "https://openreview.net/pdf?id=x69O84Df2G", "openreview": "https://openreview.net/forum?id=x69O84Df2G", "poster": "/media/PosterPDFs/NeurIPS%202024/93103.png?t=1733434253.7252274", "project": "", "author_site": "Alessio Russo, Filippo Vannella", "tldr": "", "abstract": "Rewards are a critical aspect of formulating Reinforcement Learning (RL) problems; often, one may be interested in testing multiple reward functions, or the problem may naturally involve multiple rewards. \nIn this study, we investigate the _Multi-Reward Best Policy Identification_ (MR-BPI) problem, where the goal is to determine the best policy for all rewards in a given set $\\mathcal{R}$ with minimal sample complexity and a prescribed confidence level. We derive a fundamental instance-specific lower bound on the sample complexity required by any Probably Correct (PC) algorithm in this setting. This bound guides the design of an optimal exploration policy attaining minimal sample complexity. However, this lower bound involves solving a hard non-convex optimization problem. We address this challenge by devising a convex approximation, enabling the design of sample-efficient algorithms. We propose MR-NaS, a PC algorithm with competitive performance on hard-exploration tabular environments. Extending this approach to Deep RL (DRL), we also introduce DBMR-BPI, an efficient algorithm for model-free exploration in multi-reward settings.", "keywords": "multiple rewards;best policy identification;pure exploration;active exploration;reinforcement learning;sequential decision making", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/ca99df705ace592e9b3439b6ae89fd84c2742d0d.zip", "author": "Alessio Russo;Filippo Vannella", "authorids": "~Alessio_Russo1;~Filippo_Vannella1", "gender": ";M", "homepage": ";", "dblp": ";221/3638", "google_scholar": ";QedUtsAAAAAJ", "orcid": ";0000-0002-7668-0650", "linkedin": ";filippovannella/", "or_profile": "~Alessio_Russo1;~Filippo_Vannella1", "aff": ";Ericsson", "aff_domain": ";ericsson.com", "position": ";Researcher", "bibtex": "@inproceedings{\nrusso2024multireward,\ntitle={Multi-Reward Best Policy Identification},\nauthor={Alessio Russo and Filippo Vannella},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=x69O84Df2G}\n}", "github": "", "reviewers": "N1yn;1UCp;YFyE", "pdf_size": 14230466, "rating": "4;6;7", "confidence": "3;3;2", "soundness": "2;3;4", "novelty": "2;2;3", "presentation": "2;4;2", "wc_summary": "66;59;165", "wc_strengths": "63;23;191", "wc_weaknesses": "482;145;206", "wc_questions": "167;8;559", "wc_limitations": "10;36;118", "wc_review": "788;271;1239", "wc_reply_reviewers": "118;11;61", "wc_reply_authors": "149;34;33", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 5.666666666666667, 1.247219128924647 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.9428090415820634 ], "wc_summary_avg": [ 96.66666666666667, 48.40339749324306 ], "wc_strengths_avg": [ 92.33333333333333, 71.6534871602368 ], "wc_weaknesses_avg": [ 277.6666666666667, 146.61590030491993 ], "wc_questions_avg": [ 244.66666666666666, 231.5517700692919 ], "wc_limitations_avg": [ 54.666666666666664, 46.02414825091522 ], "wc_review_avg": [ 766.0, 395.4904128631523 ], "wc_reply_reviewers_avg": [ 63.333333333333336, 43.71371511195186 ], "wc_reply_authors_avg": [ 72.0, 54.448752664011195 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.7559289460184545, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4856889655231324195&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": ";ericsson.com", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "Ericsson", "aff_unique_dep": "", "aff_unique_url": "https://www.ericsson.com", "aff_unique_abbr": "Ericsson", "aff_country_unique_index": "0", "aff_country_unique": "Sweden" }, { "title": "Limits of Transformer Language Models on Learning to Compose Algorithms", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93102", "id": "x7AD0343Jz", "proceeding": "", "pdf": "https://openreview.net/pdf?id=x7AD0343Jz", "openreview": "https://openreview.net/forum?id=x7AD0343Jz", "poster": "/media/PosterPDFs/NeurIPS%202024/93102.png?t=1732818981.6281393", "project": "", "author_site": "Jonathan Thomm, Giacomo Camposampiero, Aleksandar Terzic, Michael Hersche, Bernhard Sch\u00f6lkopf, Abbas Rahimi", "tldr": "", "abstract": "We analyze the capabilities of Transformer language models in learning compositional discrete tasks. To this end, we evaluate training LLaMA models and prompting GPT-4 and Gemini on four tasks demanding to learn a composition of several discrete sub-tasks. In particular, we measure how well these models can reuse primitives observable in the sub-tasks to learn the composition task. Our results indicate that compositional learning in state-of-the-art Transformer language models is highly sample inefficient: LLaMA requires more data samples than relearning all sub-tasks from scratch to learn the compositional task; in-context prompting with few samples is unreliable and fails at executing the sub-tasks or correcting the errors in multi-round code generation. Further, by leveraging complexity theory, we support these findings with a theoretical analysis focused on the sample inefficiency of gradient descent in memorizing feedforward models. We open source our code at https://github.com/IBM/limitations-lm-algorithmic-compositional-learning.", "keywords": "Few-shot Compositional Learning;Compositionality;Sample Efficiency;Algorithmic Learning;Large Language Models;Transformers", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Jonathan Thomm;Giacomo Camposampiero;Aleksandar Terzic;Michael Hersche;Bernhard Sch\u00f6lkopf;Abbas Rahimi", "authorids": "~Jonathan_Thomm1;~Giacomo_Camposampiero1;~Aleksandar_Terzic1;~Michael_Hersche1;~Bernhard_Sch\u00f6lkopf1;~Abbas_Rahimi1", "gender": "M;;M;M;;M", "homepage": ";;https://research.ibm.com/people/aleksandar-terzic--2;https://research.ibm.com/people/michael-hersche--1;;https://research.ibm.com/people/abbas-rahimi", "dblp": "352/5370.html;330/3568;;231/2484;;48/9350", "google_scholar": ";B_rpizsAAAAJ;;uhC6m3EAAAAJ;;yx0pEmYAAAAJ", "orcid": ";0009-0000-7315-9790;;0000-0003-3065-7639;;0000-0003-3141-4970", "linkedin": "jonathan-thomm/?locale=en_US;;;;;", "or_profile": "~Jonathan_Thomm1;~Giacomo_Camposampiero1;~Aleksandar_Terzic1;~Michael_Hersche1;~Bernhard_Sch\u00f6lkopf1;~Abbas_Rahimi1", "aff": "Department of Computer Science, ETHZ - ETH Zurich;International Business Machines;International Business Machines;International Business Machines;;IBM Research - Zurich", "aff_domain": "inf.ethz.ch;ibm.com;ibm.com;ibm.com;;zurich.ibm.com", "position": "MS student;PhD student;PhD student;Postdoc;;Principal Researcher", "bibtex": "@inproceedings{\nthomm2024limits,\ntitle={Limits of Transformer Language Models on Learning to Compose Algorithms},\nauthor={Jonathan Thomm and Giacomo Camposampiero and Aleksandar Terzic and Michael Hersche and Bernhard Sch{\\\"o}lkopf and Abbas Rahimi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=x7AD0343Jz}\n}", "github": "", "reviewers": "Fa35;qmuR;teuc;DGs4", "pdf_size": 1114847, "rating": "4;4;6;7", "confidence": "3;2;2;4", "soundness": "2;2;3;4", "novelty": "2;2;2;3", "presentation": "1;2;3;3", "wc_summary": "87;99;147;179", "wc_strengths": "65;72;288;181", "wc_weaknesses": "336;88;43;233", "wc_questions": "2;74;23;431", "wc_limitations": "12;24;30;20", "wc_review": "502;357;531;1044", "wc_reply_reviewers": "287;0;35;179", "wc_reply_authors": "1102;27;46;207", "reply_reviewers": "1;0;1;1", "reply_authors": "3;2;2;3", "rating_avg": [ 5.25, 1.299038105676658 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 128.0, 37.027017163147235 ], "wc_strengths_avg": [ 151.5, 91.24828765516644 ], "wc_weaknesses_avg": [ 175.0, 116.4881968269747 ], "wc_questions_avg": [ 132.5, 174.31652245269237 ], "wc_limitations_avg": [ 21.5, 6.5383484153110105 ], "wc_review_avg": [ 608.5, 259.93316448656566 ], "wc_reply_reviewers_avg": [ 125.25, 114.98342271823361 ], "wc_reply_authors_avg": [ 345.5, 442.32821524293473 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2349413857326181256&as_sdt=5,24&sciodt=0,24&hl=en", "gs_version_total": 4, "email": "inf.ethz.ch;ibm.com;ibm.com;ibm.com;;zurich.ibm.com", "author_num": 6, "aff_unique_index": "0;1;1;1;2", "aff_unique_norm": "ETH Zurich;International Business Machines Corporation;IBM", "aff_unique_dep": "Department of Computer Science;;Research", "aff_unique_url": "https://www.ethz.ch;https://www.ibm.com;https://www.ibm.com/research", "aff_unique_abbr": "ETHZ;IBM;IBM", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Zurich;", "aff_country_unique_index": "0;1;1;1;0", "aff_country_unique": "Switzerland;United States" }, { "title": "MeshFormer : High-Quality Mesh Generation with 3D-Guided Reconstruction Model", "status": "Oral", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93101", "id": "x7pjdDod6Z", "proceeding": "", "pdf": "https://openreview.net/pdf?id=x7pjdDod6Z", "openreview": "https://openreview.net/forum?id=x7pjdDod6Z", "poster": "", "project": "", "author_site": "Minghua Liu, Chong Zeng, Xinyue Wei, Ruoxi Shi, Linghao Chen, Chao Xu, Mengqi Zhang, Zhaoning Wang, Xiaoshuai Zhang, Isabella Liu, Hongzhi Wu, Hao Su", "tldr": "", "abstract": "Open-world 3D reconstruction models have recently garnered significant attention. However, without sufficient 3D inductive bias, existing methods typically entail expensive training costs and struggle to extract high-quality 3D meshes. In this work, we introduce MeshFormer, a sparse-view reconstruction model that explicitly leverages 3D native structure, input guidance, and training supervision. Specifically, instead of using a triplane representation, we store features in 3D sparse voxels and combine transformers with 3D convolutions to leverage an explicit 3D structure and projective bias. In addition to sparse-view RGB input, we require the network to take input and generate corresponding normal maps. The input normal maps can be predicted by 2D diffusion models, significantly aiding in the guidance and refinement of the geometry's learning. Moreover, by combining Signed Distance Function (SDF) supervision with surface rendering, we directly learn to generate high-quality meshes without the need for complex multi-stage training processes. By incorporating these explicit 3D biases, MeshFormer can be trained efficiently and deliver high-quality textured meshes with fine-grained geometric details. It can also be integrated with 2D diffusion models to enable fast single-image-to-3D and text-to-3D tasks. **Videos are available at https://meshformer3d.github.io/**", "keywords": "sparse view 3D reconstruction;3D generation;3D AIGC;reconstruction model", "primary_area": "machine_vision", "supplementary_material": "", "author": "Minghua Liu;Chong Zeng;Xinyue Wei;Ruoxi Shi;Linghao Chen;Chao Xu;Mengqi Zhang;Zhaoning Wang;Xiaoshuai Zhang;Isabella Liu;Hongzhi Wu;Hao Su", "authorids": "~Minghua_Liu1;~Chong_Zeng1;~Xinyue_Wei1;~Ruoxi_Shi1;~Linghao_Chen2;~Chao_Xu6;~Mengqi_Zhang2;~Zhaoning_Wang2;~Xiaoshuai_Zhang1;~Isabella_Liu1;~Hongzhi_Wu1;~Hao_Su1", "gender": "M;M;F;Not Specified;;M;F;M;M;;;M", "homepage": "https://cseweb.ucsd.edu//~mil070/;https://chong-zeng.com/;https://sites.google.com/view/xinyue-wei/;https://rshi.top/;https://ootts.github.io/;https://chaoxu.xyz;https://mq-zhang1.github.io/;https://www.zhaoningwang.com/;https://i.buriedjet.com;;https://svbrdf.github.io/;http://ai.ucsd.edu/~haosu", "dblp": "28/8907;260/2700-1;215/7941;190/7068;262/3716;79/1442-16;;;175/5693;;;09/4945-1", "google_scholar": "6U3IGtEAAAAJ;4dID7zIAAAAJ;UipTKqoAAAAJ;Z7zLvdkAAAAJ;;9Az3LhwAAAAJ;https://scholar.google.com/citations?hl=en;;cTGxuQQAAAAJ;;Ex5JDjkAAAAJ;1P8Zu04AAAAJ", "orcid": ";0009-0004-6373-6848;;;;0009-0001-0574-5357;;;;;0000-0002-4404-2275;", "linkedin": ";chongzeng/;;;;chaoxu/;mengqi-z-9a83341bb/;;;;;", "or_profile": "~Minghua_Liu1;~Chong_Zeng1;~Xinyue_Wei1;~Ruoxi_Shi1;~Linghao_Chen2;~Chao_Xu6;~Mengqi_Zhang2;~Zhaoning_Wang2;~Xiaoshuai_Zhang1;~Isabella_Liu1;~Hongzhi_Wu1;~Hao_Su1", "aff": "University of California, San Diego;University of California, San Diego;University of California, San Diego;University of California, San Diego;Zhejiang University;University of California, Los Angeles;Georgia Institute of Technology;University of Central Florida;University of California, San Diego;;Zhejiang University;University of California, San Diego", "aff_domain": "ucsd.edu;ucsd.edu;ucsd.edu;ucsd.edu;zju.edu.cn;ucla.edu;gatech.edu;ucf.edu;ucsd.edu;;zju.edu.cn;ucsd.edu", "position": "PhD student;Intern;PhD student;Intern;PhD student;PhD student;PhD student;MS student;PhD student;;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nliu2024meshformer,\ntitle={MeshFormer : High-Quality Mesh Generation with 3D-Guided Reconstruction Model},\nauthor={Minghua Liu and Chong Zeng and Xinyue Wei and Ruoxi Shi and Linghao Chen and Chao Xu and Mengqi Zhang and Zhaoning Wang and Xiaoshuai Zhang and Isabella Liu and Hongzhi Wu and Hao Su},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=x7pjdDod6Z}\n}", "github": "", "reviewers": "3423;mWQL;V11k;r7MY;bHQc", "pdf_size": 42194359, "rating": "7;7;7;7;8", "confidence": "4;5;5;5;5", "soundness": "4;3;4;3;3", "novelty": "3;3;3;3;3", "presentation": "3;4;4;3;3", "wc_summary": "75;146;48;89;69", "wc_strengths": "77;139;75;68;66", "wc_weaknesses": "85;186;57;186;46", "wc_questions": "23;120;8;185;64", "wc_limitations": "11;9;1;11;18", "wc_review": "271;600;189;539;263", "wc_reply_reviewers": "27;114;0;19;9", "wc_reply_authors": "0;21;0;0;0", "reply_reviewers": "1;1;0;1;1", "reply_authors": "1;2;1;1;1", "rating_avg": [ 7.2, 0.39999999999999997 ], "confidence_avg": [ 4.8, 0.39999999999999997 ], "soundness_avg": [ 3.4, 0.4898979485566356 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 85.4, 33.049054449408985 ], "wc_strengths_avg": [ 85.0, 27.313000567495326 ], "wc_weaknesses_avg": [ 112.0, 61.74463539450209 ], "wc_questions_avg": [ 80.0, 65.29012176432205 ], "wc_limitations_avg": [ 10.0, 5.440588203494177 ], "wc_review_avg": [ 372.4, 164.5862691721275 ], "wc_reply_reviewers_avg": [ 33.8, 41.12128402664489 ], "wc_reply_authors_avg": [ 4.2, 8.399999999999999 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": 0.2500000000000001, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11993994086816219712&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "ucsd.edu;ucsd.edu;ucsd.edu;ucsd.edu;zju.edu.cn;ucla.edu;gatech.edu;ucf.edu;ucsd.edu;;zju.edu.cn;ucsd.edu", "author_num": 12, "aff_unique_index": "0;0;0;0;1;2;3;4;0;1;0", "aff_unique_norm": "University of California, San Diego;Zhejiang University;University of California, Los Angeles;Georgia Institute of Technology;University of Central Florida", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.ucsd.edu;https://www.zju.edu.cn;https://www.ucla.edu;https://www.gatech.edu;https://www.ucf.edu", "aff_unique_abbr": "UCSD;ZJU;UCLA;Georgia Tech;UCF", "aff_campus_unique_index": "0;0;0;0;2;0;0", "aff_campus_unique": "San Diego;;Los Angeles", "aff_country_unique_index": "0;0;0;0;1;0;0;0;0;1;0", "aff_country_unique": "United States;China" }, { "title": "On Convergence of Adam for Stochastic Optimization under Relaxed Assumptions", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93100", "id": "x7usmidzxj", "proceeding": "", "pdf": "https://openreview.net/pdf?id=x7usmidzxj", "openreview": "https://openreview.net/forum?id=x7usmidzxj", "poster": "/media/PosterPDFs/NeurIPS%202024/93100.png?t=1730861644.2494907", "project": "", "author_site": "Yusu Hong, Junhong Lin", "tldr": "", "abstract": "In this paper, we study Adam in non-convex smooth scenarios with potential unbounded gradients and affine variance noise. We consider a general noise model which governs affine variance noise, bounded noise, and sub-Gaussian noise. We show that Adam with a specific hyper-parameter setup can find a stationary point with a $\\mathcal{O}(\\text{poly}(\\log T)/\\sqrt{T})$ rate in high probability under this general noise model where $T$ denotes total number iterations, matching the lower rate of stochastic first-order algorithms up to logarithm factors. We also provide a probabilistic convergence result for Adam under a generalized smooth condition which allows unbounded smoothness parameters and has been illustrated empirically to capture the smooth property of many practical objective functions more accurately.", "keywords": "Adam;optimal rate;high probability convergence;relaxed assumptions", "primary_area": "optimization", "supplementary_material": "", "author": "Yusu Hong;Junhong Lin", "authorids": "~Yusu_Hong1;~Junhong_Lin1", "gender": "M;M", "homepage": ";https://person.zju.edu.cn/en/junhong", "dblp": "360/0732;", "google_scholar": "OabD-60AAAAJ;", "orcid": ";", "linkedin": "%E9%92%B0%E6%BA%AF-%E6%B4%AA-0198b41ba/;", "or_profile": "~Yusu_Hong1;~Junhong_Lin1", "aff": "Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nhong2024on,\ntitle={On Convergence of Adam for Stochastic Optimization under Relaxed Assumptions},\nauthor={Yusu Hong and Junhong Lin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=x7usmidzxj}\n}", "github": "", "reviewers": "4Xir;J5iU;hewP", "pdf_size": 615753, "rating": "6;6;7", "confidence": "5;3;5", "soundness": "3;4;3", "novelty": "3;3;2", "presentation": "3;4;3", "wc_summary": "31;66;29", "wc_strengths": "30;37;5", "wc_weaknesses": "99;56;187", "wc_questions": "225;98;113", "wc_limitations": "2;5;37", "wc_review": "387;262;371", "wc_reply_reviewers": "145;89;81", "wc_reply_authors": "57;59;496", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.333333333333333, 0.9428090415820634 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 42.0, 16.990193249832878 ], "wc_strengths_avg": [ 24.0, 13.73559851869101 ], "wc_weaknesses_avg": [ 114.0, 54.5221667458903 ], "wc_questions_avg": [ 145.33333333333334, 56.66470584842816 ], "wc_limitations_avg": [ 14.666666666666666, 15.839472494022298 ], "wc_review_avg": [ 340.0, 55.53977553669682 ], "wc_reply_reviewers_avg": [ 105.0, 28.472208672083497 ], "wc_reply_authors_avg": [ 204.0, 206.47679449920435 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6059677302753771791&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "zju.edu.cn;zju.edu.cn", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Zhejiang University", "aff_unique_dep": "", "aff_unique_url": "https://www.zju.edu.cn", "aff_unique_abbr": "ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Benchmarking Uncertainty Disentanglement: Specialized Uncertainties for Specialized Tasks", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97444", "id": "x8RgF2xQTj", "proceeding": "", "pdf": "https://openreview.net/pdf?id=x8RgF2xQTj", "openreview": "https://openreview.net/forum?id=x8RgF2xQTj", "poster": "/media/PosterPDFs/NeurIPS%202024/97444.png?t=1733969569.9481277", "project": "", "author_site": "B\u00e1lint Mucs\u00e1nyi, Michael Kirchhof, Seong Joon Oh", "tldr": "", "abstract": "Uncertainty quantification, once a singular task, has evolved into a spectrum of tasks, including abstained prediction, out-of-distribution detection, and aleatoric uncertainty quantification. The latest goal is disentanglement: the construction of multiple estimators that are each tailored to one and only one source of uncertainty. This paper presents the first benchmark of uncertainty disentanglement. We reimplement and evaluate a comprehensive range of uncertainty estimators, from Bayesian over evidential to deterministic ones, across a diverse range of uncertainty tasks on ImageNet. We find that, despite recent theoretical endeavors, no existing approach provides pairs of disentangled uncertainty estimators in practice. We further find that specialized uncertainty tasks are harder than predictive uncertainty tasks, where we observe saturating performance. Our results provide both practical advice for which uncertainty estimators to use for which specific task, and reveal opportunities for future research toward task-centric and disentangled uncertainties. All our reimplementations and Weights & Biases logs are available at https://github.com/bmucsanyi/untangle.", "keywords": "Uncertainty Quantification;Uncertainty Disentanglement;Aleatoric Uncertainty;Epistemic Uncertainty;Abstained Prediction;Out-of-Distribution Detection", "primary_area": "", "supplementary_material": "", "author": "B\u00e1lint Mucs\u00e1nyi;Michael Kirchhof;Seong Joon Oh", "authorids": "~B\u00e1lint_Mucs\u00e1nyi1;~Michael_Kirchhof1;~Seong_Joon_Oh1", "gender": "M;M;M", "homepage": "https://bmucsanyi.github.io/;https://www.hci.uni-tuebingen.de/chair/team/michael-kirchhof;https://seongjoonoh.com", "dblp": ";65/6349;168/8835", "google_scholar": "NexA8EEAAAAJ;Xtgj8q0AAAAJ;https://scholar.google.de/citations?user=kmXOOdsAAAAJ", "orcid": "0000-0002-7075-9018;0000-0003-4521-9391;0000-0002-8985-7689", "linkedin": "b%C3%A1lint-mucs%C3%A1nyi-148a47222/;michael-kirchhof;seong-joon-oh-32113479/", "or_profile": "~B\u00e1lint_Mucs\u00e1nyi1;~Michael_Kirchhof1;~Seong_Joon_Oh1", "aff": "Eberhard-Karls-Universit\u00e4t T\u00fcbingen;Eberhard-Karls-Universit\u00e4t T\u00fcbingen;Eberhard-Karls-Universit\u00e4t T\u00fcbingen", "aff_domain": "uni-tuebingen.de;uni-tuebingen.de;uni-tuebingen.de", "position": "MS student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nmucs{\\'a}nyi2024benchmarking,\ntitle={Benchmarking Uncertainty Disentanglement: Specialized Uncertainties for Specialized Tasks},\nauthor={B{\\'a}lint Mucs{\\'a}nyi and Michael Kirchhof and Seong Joon Oh},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=x8RgF2xQTj}\n}", "github": "", "reviewers": "B7nH;h1Bx;hBr9", "pdf_size": 6154135, "rating": "7;7;7", "confidence": "4;5;4", "wc_summary_and_contributions": "47;131;63", "wc_strengths": "30;175;40", "wc_improvement": "5;218;100", "wc_limitations": "29;82;7", "wc_correctness": "65;50;11", "wc_clarity": "165;7;10", "wc_relation_to_prior_work": "32;10;24", "wc_documentation": "16;47;11", "wc_additional_feedback": "1;1;1", "wc_review": "390;721;267", "wc_reply_reviewers": "0;57;0", "wc_reply_authors": "0;248;0", "reply_reviewers": "0;1;0", "reply_authors": "1;4;1", "rating_avg": [ 7.0, 0.0 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 80.33333333333333, 36.41733408999377 ], "wc_strengths_avg": [ 81.66666666666667, 66.12278141625791 ], "wc_improvement_avg": [ 107.66666666666667, 87.12570739390809 ], "wc_limitations_avg": [ 39.333333333333336, 31.47838764754143 ], "wc_correctness_avg": [ 42.0, 22.759613353482084 ], "wc_clarity_avg": [ 60.666666666666664, 73.78497287539126 ], "wc_relation_to_prior_work_avg": [ 22.0, 9.092121131323903 ], "wc_documentation_avg": [ 24.666666666666668, 15.923427883328248 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 459.3333333333333, 191.71912325645093 ], "wc_reply_reviewers_avg": [ 19.0, 26.870057685088806 ], "wc_reply_authors_avg": [ 82.66666666666667, 116.90832115617586 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.0, 1.4142135623730951 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9799185382141498852&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "uni-tuebingen.de;uni-tuebingen.de;uni-tuebingen.de", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Eberhard Karls University of T\u00fcbingen", "aff_unique_dep": "", "aff_unique_url": "https://www.uni-tuebingen.de/", "aff_unique_abbr": "Uni T\u00fcbingen", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "T\u00fcbingen", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "title": "From Unstructured Data to In-Context Learning: Exploring What Tasks Can Be Learned and When", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93099", "id": "x9eFgahVBI", "proceeding": "", "pdf": "https://openreview.net/pdf?id=x9eFgahVBI", "openreview": "https://openreview.net/forum?id=x9eFgahVBI", "poster": "", "project": "", "author_site": "Kevin Christian Wibisono, Yixin Wang", "tldr": "", "abstract": "Large language models (LLMs) like transformers demonstrate impressive in-context learning (ICL) capabilities, allowing them to make\npredictions for new tasks based on prompt exemplars without parameter updates. While existing ICL theories often assume structured training data resembling ICL tasks (e.g., x-y pairs for linear regression), LLMs are typically trained unsupervised on unstructured text, such as web content, which lacks clear parallels to tasks like word analogy. To address this gap, we examine what enables ICL in models trained on unstructured data, focusing on critical sequence model requirements and training data structure. We find that many ICL capabilities can\nemerge simply from co-occurrence of semantically related word pairs in unstructured data; word analogy completion, for example, can provably arise purely through co-occurrence modeling, using classical language models like continuous bag of words (CBOW), without needing positional information or attention mechanisms. However, positional information becomes crucial for logic reasoning tasks requiring generalization to unseen tokens. Finally, we identify two cases where ICL fails: one in logic reasoning tasks that require generalizing to new, unseen patterns, and another in analogy completion where relevant word pairs appear only in fixed training positions. These findings suggest that LLMs' ICL abilities depend heavily on the structural elements within their training data.", "keywords": "in-context learning;large language models;unstructured data;continuous bag of words;co-occurrence", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/0faf9530828b94ca355d666ecc5e8ffe651a967f.zip", "author": "Kevin Christian Wibisono;Yixin Wang", "authorids": "~Kevin_Christian_Wibisono1;~Yixin_Wang1", "gender": "M;", "homepage": "https://k-wib.github.io;", "dblp": ";", "google_scholar": ";gFLW9qcAAAAJ", "orcid": ";0000-0002-6617-4842", "linkedin": ";", "or_profile": "~Kevin_Christian_Wibisono1;~Yixin_Wang1", "aff": "University of Michigan - Ann Arbor;University of Michigan - Ann Arbor", "aff_domain": "umich.edu;umich.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nwibisono2024from,\ntitle={From Unstructured Data to In-Context Learning: Exploring What Tasks Can Be Learned and When},\nauthor={Kevin Christian Wibisono and Yixin Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=x9eFgahVBI}\n}", "github": "", "reviewers": "xEmA;xShA;nuMZ;cRrU", "pdf_size": 779980, "rating": "5;6;6;7", "confidence": "4;2;2;3", "soundness": "3;4;3;4", "novelty": "2;2;3;3", "presentation": "2;3;2;3", "wc_summary": "90;68;87;73", "wc_strengths": "92;132;33;37", "wc_weaknesses": "326;241;82;98", "wc_questions": "54;33;11;17", "wc_limitations": "1;9;6;2", "wc_review": "563;483;219;227", "wc_reply_reviewers": "560;49;43;28", "wc_reply_authors": "20;0;0;164", "reply_reviewers": "1;1;1;2", "reply_authors": "2;1;1;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 2.75, 0.82915619758885 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 79.5, 9.233092656309694 ], "wc_strengths_avg": [ 73.5, 41.0396150079408 ], "wc_weaknesses_avg": [ 186.75, 101.46766726401076 ], "wc_questions_avg": [ 28.75, 16.64894891577243 ], "wc_limitations_avg": [ 4.5, 3.2015621187164243 ], "wc_review_avg": [ 373.0, 152.6695778470616 ], "wc_reply_reviewers_avg": [ 170.0, 225.2964713438717 ], "wc_reply_authors_avg": [ 46.0, 68.61486719363377 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.4264014327112209, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13705327143127120687&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "email": "umich.edu;umich.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Michigan", "aff_unique_dep": "", "aff_unique_url": "https://www.umich.edu", "aff_unique_abbr": "UM", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Ann Arbor", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Trading off Consistency and Dimensionality of Convex Surrogates for Multiclass Classification", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93098", "id": "xCIbVuXwPM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=xCIbVuXwPM", "openreview": "https://openreview.net/forum?id=xCIbVuXwPM", "poster": "/media/PosterPDFs/NeurIPS%202024/93098.png?t=1731617822.600772", "project": "", "author_site": "Enrique Nueve, Dhamma Kimpara, Bo Waggoner, Jessica Finocchiaro", "tldr": "", "abstract": "In multiclass classification over $n$ outcomes, we typically optimize some surrogate loss $L: \\mathbb{R}^d \\times\\mathcal{Y} \\to \\mathbb{R}$ assigning real-valued error to predictions in $\\mathbb{R}^d$. In this paradigm, outcomes must be embedded into the reals with dimension $d \\approx n$ in order to design a consistent surrogate loss. Consistent losses are well-motivated theoretically, yet for large $n$, such as in information retrieval and structured prediction tasks, their optimization may be computationally infeasible. In practice, outcomes are typically embedded into some $\\mathbb{R}^d$ for $d \\ll n$, with little known about their suitability for multiclass classification. We investigate two approaches for trading off consistency and dimensionality in multiclass classification while using a convex surrogate loss. We first formalize partial consistency when the optimized surrogate has dimension $d \\ll n$. \nWe then check if partial consistency holds under a given embedding and low-noise assumption, providing insight into when to use a particular embedding into $\\mathbb{R}^d$. Finally, we present a new method to construct (fully) consistent losses with $d \\ll n$ out of multiple problem instances. Our practical approach leverages parallelism to sidestep lower bounds on $d$.", "keywords": "Loss Functions;Consistency;Property Elicitation", "primary_area": "learning_theory", "supplementary_material": "/attachment/7b17f0f4a25b7d636031a7a95219ad031738cd25.zip", "author": "Enrique Nueve;Dhamma Kimpara;Bo Waggoner;Jessica Finocchiaro", "authorids": "~Enrique_Nueve1;~Dhamma_Kimpara2;~Bo_Waggoner1;~Jessica_Finocchiaro1", "gender": "M;M;;F", "homepage": "https://enriquenueve.github.io/;https://dkimpara.github.io/;https://www.bowaggoner.com;https://jessiefin.com", "dblp": ";;117/4968;188/5953", "google_scholar": ";hLjrxx8AAAAJ;;gM8Ls7MAAAAJ", "orcid": ";;;0000-0002-3222-0089", "linkedin": ";;;", "or_profile": "~Enrique_Nueve1;~Dhamma_Kimpara2;~Bo_Waggoner1;~Jessica_Finocchiaro1", "aff": "University of Colorado at Boulder;University of Colorado at Boulder;University of Colorado, Boulder;Harvard University", "aff_domain": "cs.colorado.edu;colorado.edu;colorado.edu;harvard.edu", "position": "PhD student;PhD student;Professor;Postdoc", "bibtex": "@inproceedings{\nnueve2024trading,\ntitle={Trading off Consistency and Dimensionality of Convex Surrogates for Multiclass Classification},\nauthor={Enrique Nueve and Dhamma Kimpara and Bo Waggoner and Jessica Finocchiaro},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=xCIbVuXwPM}\n}", "github": "", "reviewers": "2eE5;zH7s;BgPT;vfJE", "pdf_size": 1616977, "rating": "6;7;7;7", "confidence": "2;4;4;4", "soundness": "2;4;3;3", "novelty": "2;4;3;3", "presentation": "2;2;3;3", "wc_summary": "49;78;93;106", "wc_strengths": "33;136;34;64", "wc_weaknesses": "43;227;43;44", "wc_questions": "39;599;305;4", "wc_limitations": "1;47;7;4", "wc_review": "165;1087;482;222", "wc_reply_reviewers": "54;17;26;51", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 81.5, 21.219095173922945 ], "wc_strengths_avg": [ 66.75, 41.87705218851967 ], "wc_weaknesses_avg": [ 89.25, 79.53104739659852 ], "wc_questions_avg": [ 236.75, 239.3536870407473 ], "wc_limitations_avg": [ 14.75, 18.73999733191016 ], "wc_review_avg": [ 489.0, 365.3484637986042 ], "wc_reply_reviewers_avg": [ 37.0, 15.858751527153705 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:oVYf1WR_rWYJ:scholar.google.com/&scioq=Trading+off+Consistency+and+Dimensionality+of+Convex+Surrogates+for+Multiclass+Classification&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": "cs.colorado.edu;colorado.edu;colorado.edu;harvard.edu", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "University of Colorado;Harvard University", "aff_unique_dep": ";", "aff_unique_url": "https://www.colorado.edu;https://www.harvard.edu", "aff_unique_abbr": "CU;Harvard", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Boulder;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Med-Real2Sim: Non-Invasive Medical Digital Twins using Physics-Informed Self-Supervised Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93097", "id": "xCUXJqQySD", "proceeding": "", "pdf": "https://openreview.net/pdf?id=xCUXJqQySD", "openreview": "https://openreview.net/forum?id=xCUXJqQySD", "poster": "/media/PosterPDFs/NeurIPS%202024/93097.png?t=1731651177.9641936", "project": "", "author_site": "Keying Kuang, Frances Dean, Jack B. Jedlicki, David Ouyang, Anthony Philippakis, David Sontag, Ahmed Alaa", "tldr": "", "abstract": "A digital twin is a virtual replica of a real-world physical phenomena that uses mathematical modeling to characterize and simulate its defining features. By constructing digital twins for disease processes, we can perform in-silico simulations that mimic patients' health conditions and counterfactual outcomes under hypothetical interventions in a virtual setting. This eliminates the need for invasive procedures or uncertain treatment decisions. In this paper, we propose a method to identify digital twin model parameters using only noninvasive patient health data. We approach the digital twin modeling as a composite inverse problem, and observe that its structure resembles pretraining and finetuning in self-supervised learning (SSL). Leveraging this, we introduce a physics-informed SSL algorithm that initially pretrains a neural network on the pretext task of learning a differentiable simulator of a physiological process. Subsequently, the model is trained to reconstruct physiological measurements from noninvasive modalities while being constrained by the physical equations learned in pretraining. We apply our method to identify digital twins of cardiac hemodynamics using noninvasive echocardiogram videos, and demonstrate its utility in unsupervised disease detection and in-silico clinical trials.", "keywords": "Inverse Problems;Digital Twins;Personalized Medicine;Non-Invasive Imaging;Physics-Informed", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "", "author": "Keying Kuang;Frances Dean;Jack B. Jedlicki;David Ouyang;Anthony Philippakis;David Sontag;Ahmed Alaa", "authorids": "~Keying_Kuang1;~Frances_Dean1;~Jack_B._Jedlicki1;~David_Ouyang1;~Anthony_Philippakis1;~David_Sontag1;~Ahmed_Alaa1", "gender": "F;F;M;;M;M;M", "homepage": ";;;https://douyang.github.io;;http://people.csail.mit.edu/dsontag/;https://alaalab.berkeley.edu/", "dblp": "371/9273;;;;;12/673;140/7324", "google_scholar": "YI_onxkAAAAJ;Y04UhM4AAAAJ;;;Q-v0BgUAAAAJ;LfcroyAAAAAJ;https://scholar.google.com.eg/citations?user=_pv1sEcAAAAJ", "orcid": ";;;;;0000-0002-5034-7796;", "linkedin": "maggie-keying-kuang-a8454a1a0/;;jack-benarroch-jedlicki-2706a2224/;;;;", "or_profile": "~Keying_Kuang1;~Frances_Dean1;~Jack_B._Jedlicki1;~David_Ouyang1;~Anthony_Philippakis1;~David_Sontag1;~Ahmed_Alaa1", "aff": "University of California, Berkeley;University of California, Berkeley;Universitat de Barcelona;Cedars Sinai Medical Center;Broad Institute;Massachusetts Institute of Technology;University of California, Berkeley", "aff_domain": "berkeley.edu;berkeley.edu;ub.edu;cshs.org;broadinstitute.org;mit.edu;berkeley.edu", "position": "PhD student;PhD student;Undergrad student;Assistant Professor;Researcher;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nkuang2024medrealsim,\ntitle={Med-Real2Sim: Non-Invasive Medical Digital Twins using Physics-Informed Self-Supervised Learning},\nauthor={Keying Kuang and Frances Dean and Jack B. Jedlicki and David Ouyang and Anthony Philippakis and David Sontag and Ahmed Alaa},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=xCUXJqQySD}\n}", "github": "", "reviewers": "LjNF;32qb;8RJZ;CAet", "pdf_size": 5883690, "rating": "3;5;6;6", "confidence": "3;4;4;4", "soundness": "1;3;3;3", "novelty": "2;2;2;3", "presentation": "3;3;3;3", "wc_summary": "155;136;85;90", "wc_strengths": "25;59;44;98", "wc_weaknesses": "85;122;64;51", "wc_questions": "25;23;224;75", "wc_limitations": "28;14;4;54", "wc_review": "318;354;421;368", "wc_reply_reviewers": "67;18;12;10", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 116.5, 29.82029510249689 ], "wc_strengths_avg": [ 56.5, 26.818836663807772 ], "wc_weaknesses_avg": [ 80.5, 26.85609800399157 ], "wc_questions_avg": [ 86.75, 81.93404359605353 ], "wc_limitations_avg": [ 25.0, 18.788294228055936 ], "wc_review_avg": [ 365.25, 36.99577678600627 ], "wc_reply_reviewers_avg": [ 26.75, 23.42407949098534 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.9428090415820632, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:3nmGuIQ-m0YJ:scholar.google.com/&scioq=Med-Real2Sim:+Non-Invasive+Medical+Digital+Twins+using+Physics-Informed+Self-Supervised+Learning&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "berkeley.edu;berkeley.edu;ub.edu;cshs.org;broadinstitute.org;mit.edu;berkeley.edu", "author_num": 7, "aff_unique_index": "0;0;1;2;3;4;0", "aff_unique_norm": "University of California, Berkeley;University of Barcelona;Cedars-Sinai Medical Center;Broad Institute;Massachusetts Institute of Technology", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.berkeley.edu;https://www.ub.edu;https://www.cedars-sinai.org;https://www.broadinstitute.org;https://web.mit.edu", "aff_unique_abbr": "UC Berkeley;UB;CSMC;Broad;MIT", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0;1;0;0;0;0", "aff_country_unique": "United States;Spain" }, { "title": "Fast T2T: Optimization Consistency Speeds Up Diffusion-Based Training-to-Testing Solving for Combinatorial Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93096", "id": "xDrKZOZEOc", "proceeding": "", "pdf": "https://openreview.net/pdf?id=xDrKZOZEOc", "openreview": "https://openreview.net/forum?id=xDrKZOZEOc", "poster": "/media/PosterPDFs/NeurIPS%202024/93096.png?t=1731316785.3774662", "project": "", "author_site": "Yang Li, Jinpei Guo, Runzhong Wang, Hongyuan Zha, Junchi Yan", "tldr": "", "abstract": "Diffusion models have recently advanced Combinatorial Optimization (CO) as a powerful backbone for neural solvers. However, their iterative sampling process requiring denoising across multiple noise levels incurs substantial overhead. We propose to learn direct mappings from different noise levels to the optimal solution for a given instance, facilitating high-quality generation with minimal shots. This is achieved through an optimization consistency training protocol, which, for a given instance, minimizes the difference among samples originating from varying generative trajectories and time steps relative to the optimal solution. The proposed model enables fast single-step solution generation while retaining the option of multi-step sampling to trade for sampling quality, which offers a more effective and efficient alternative backbone for neural solvers. In addition, within the training-to-testing (T2T) framework, to bridge the gap between training on historical instances and solving new instances, we introduce a novel consistency-based gradient search scheme during the test stage, enabling more effective exploration of the solution space learned during training. It is achieved by updating the latent solution probabilities under objective gradient guidance during the alternation of noise injection and denoising steps. We refer to this model as Fast T2T. Extensive experiments on two popular tasks, the Traveling Salesman Problem (TSP) and Maximal Independent Set (MIS), demonstrate the superiority of Fast T2T regarding both solution quality and efficiency, even outperforming LKH given limited time budgets. Notably, Fast T2T with merely one-step generation and one-step gradient search can mostly outperform the SOTA diffusion-based counterparts that require hundreds of steps, while achieving tens of times speedup.", "keywords": "Neural Combinatorial Optimization;Generative Modeling", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Yang Li;Jinpei Guo;Runzhong Wang;Hongyuan Zha;Junchi Yan", "authorids": "~Yang_Li32;~Jinpei_Guo1;~Runzhong_Wang1;~Hongyuan_Zha1;~Junchi_Yan2", "gender": "M;M;M;;M", "homepage": "https://yangco-le.github.io;https://jp-guo.github.io/;http://runzhong.wang;;http://thinklab.sjtu.edu.cn/", "dblp": ";;239/4351;z/HongyuanZha;60/7949.html", "google_scholar": "ecE0xDIAAAAJ;;uoM0g3cAAAAJ;n1DQMIsAAAAJ;ga230VoAAAAJ", "orcid": "0000-0002-5249-3471;;0000-0002-9566-738X;;0000-0001-9639-7679", "linkedin": ";;;;", "or_profile": "~Yang_Li32;~Jinpei_Guo1;~Runzhong_Wang1;~Hongyuan_Zha1;~Junchi_Yan1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Massachusetts Institute of Technology;The Chinese University of Hong Kong, Shenzhen;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;mit.edu;cuhk.edu.cn;sjtu.edu.cn", "position": "PhD student;Undergrad student;Postdoc;Full Professor;Full Professor", "bibtex": "@inproceedings{\nli2024fast,\ntitle={Fast T2T: Optimization Consistency Speeds Up Diffusion-Based Training-to-Testing Solving for Combinatorial Optimization},\nauthor={Yang Li and Jinpei Guo and Runzhong Wang and Hongyuan Zha and Junchi Yan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=xDrKZOZEOc}\n}", "github": "", "reviewers": "rPB6;P6jy;rpwx;3pWn", "pdf_size": 1563777, "rating": "5;5;6;6", "confidence": "4;4;2;4", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "3;3;3;3", "wc_summary": "28;62;107;203", "wc_strengths": "20;30;150;89", "wc_weaknesses": "2;103;153;172", "wc_questions": "557;2;67;2", "wc_limitations": "2;6;1;17", "wc_review": "609;203;478;483", "wc_reply_reviewers": "1127;196;19;102", "wc_reply_authors": "2263;1212;54;334", "reply_reviewers": "2;2;1;2", "reply_authors": "7;3;2;4", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 100.0, 65.7381167968782 ], "wc_strengths_avg": [ 72.25, 52.05946119582875 ], "wc_weaknesses_avg": [ 107.5, 65.91851029870138 ], "wc_questions_avg": [ 157.0, 232.45967392216656 ], "wc_limitations_avg": [ 6.5, 6.34428877022476 ], "wc_review_avg": [ 443.25, 148.30774592043397 ], "wc_reply_reviewers_avg": [ 361.0, 446.6615049452997 ], "wc_reply_authors_avg": [ 965.75, 862.2489127276416 ], "reply_reviewers_avg": [ 1.75, 0.4330127018922193 ], "reply_authors_avg": [ 4.0, 1.8708286933869707 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16355910821680443955&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 3, "email": "sjtu.edu.cn;sjtu.edu.cn;mit.edu;cuhk.edu.cn;sjtu.edu.cn", "author_num": 5, "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Shanghai Jiao Tong University;Massachusetts Institute of Technology;Chinese University of Hong Kong", "aff_unique_dep": ";;", "aff_unique_url": "https://www.sjtu.edu.cn;https://web.mit.edu;https://www.cuhk.edu.cn", "aff_unique_abbr": "SJTU;MIT;CUHK", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "China;United States" }, { "title": "Multi-modal Transfer Learning between Biological Foundation Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93095", "id": "xImeJtdUiw", "proceeding": "", "pdf": "https://openreview.net/pdf?id=xImeJtdUiw", "openreview": "https://openreview.net/forum?id=xImeJtdUiw", "poster": "", "project": "", "author_site": "Juan Jose Garau-Luis, Patrick Bordes, Liam Gonzalez, Ma\u0161a Roller, Bernardo de Almeida, Christopher Blum, Lorenz Hexemer, Stefan Laurent, Maren Lang, Thomas PIERROT, Guillaume Richard", "tldr": "", "abstract": "Biological sequences encode fundamental instructions for the building blocks of life, in the form of DNA, RNA, and proteins. Modeling these sequences is key to understand disease mechanisms and is an active research area in computational biology. Recently, Large Language Models have shown great promise in solving certain biological tasks but current approaches are limited to a single sequence modality (DNA, RNA, or protein). Key problems in genomics intrinsically involve multiple modalities, but it remains unclear how to adapt general-purpose sequence models to those cases. In this work we propose a multi-modal model that connects DNA, RNA, and proteins by leveraging information from different pre-trained modality-specific encoders. We demonstrate its capabilities by applying it to the largely unsolved problem of predicting how multiple \\rna transcript isoforms originate from the same gene (i.e. same DNA sequence) and map to different transcription expression levels across various human tissues. We show that our model, dubbed IsoFormer, is able to accurately predict differential transcript expression, outperforming existing methods and leveraging the use of multiple modalities. Our framework also achieves efficient transfer knowledge from the encoders pre-training as well as in between modalities. We open-source our model, paving the way for new multi-modal gene expression approaches.", "keywords": "Multi-Modal;Transformers;BioAI", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Juan Jose Garau-Luis;Patrick Philippe Bordes;Liam Gonzalez;Ma\u0161a Roller;Bernardo P de Almeida;Christopher F. Blum;Lorenz Hexemer;Stefan Laurent;Maren Lang;Thomas PIERROT;Guillaume Richard", "authorids": "~Juan_Jose_Garau-Luis1;~Patrick_Philippe_Bordes1;~Liam_Gonzalez1;~Ma\u0161a_Roller1;~Bernardo_P_de_Almeida1;~Christopher_F._Blum1;~Lorenz_Hexemer1;~Stefan_Laurent1;~Maren_Lang1;~Thomas_PIERROT1;~Guillaume_Richard1", "gender": "M;M;;;;Not Specified;Not Specified;M;F;M;M", "homepage": ";;;;https://bernardo-de-almeida.github.io/;;;;;;", "dblp": ";204/2882.html;;;;;;;;228/7739;258/3535", "google_scholar": "FpaKuysAAAAJ;;jakXZZ0AAAAJ;https://scholar.google.com/citations?hl=en;;;https://scholar.google.de/citations?user=T_CFMLAAAAAJ;https://scholar.google.de/citations?user=WxNMQ8wAAAAJ;eql6LvUAAAAJ;https://scholar.google.fr/citations?user=0zBiyNUAAAAJ;viOjnmQAAAAJ", "orcid": ";;;;;0000-0001-9540-4953;0000-0001-6512-7190;0000-0003-4016-5427;0000-0003-3578-7053;0000-0002-5227-6194;", "linkedin": "juanjosegarau/;patrick-bordes-052b1a12b/?originalSubdomain=fr;liam-gonzalez-4a81b2147/;;;;https://de.linkedin.com/in/lorenz-hexemer-445a45162;;https://de.linkedin.com/in/maren-lang-1209b8108;thomas-pierrot-120a43128/;", "or_profile": "~Juan_Jose_Garau-Luis1;~Patrick_Philippe_Bordes1;~Liam_Gonzalez1;~Ma\u0161a_Roller1;~Bernardo_P_de_Almeida1;~Christopher_F._Blum1;~Lorenz_Hexemer1;~Stefan_Laurent1;~Maren_Lang1;~Thomas_PIERROT1;~Guillaume_Richard1", "aff": ";;InstaDeep;InstaDeep;InstaDeep;BioNTech SE;BioNTech SE;Max Planck Institut, Max-Planck Institute;Biontech SE;Universit\u00e9 Pierre et Marie Curie - Paris 6, Computer Science Lab - Pierre and Marie Curie University, Paris, France;InstaDeep", "aff_domain": ";;instadeep.com;instadeep.com;instadeep.com;biontech.de;biontech.de;mpip.mpg.de;biontech.de;isir.upmc.fr;instadeep.com", "position": ";;Researcher;Researcher;Researcher;Researcher;Researcher;Principal Researcher;Principal Researcher;PhD student;Researcher", "bibtex": "@inproceedings{\ngarau-luis2024multimodal,\ntitle={Multi-modal Transfer Learning between Biological Foundation Models},\nauthor={Juan Jose Garau-Luis and Patrick Philippe Bordes and Liam Gonzalez and Ma{\\v{s}}a Roller and Bernardo P de Almeida and Christopher F. Blum and Lorenz Hexemer and Stefan Laurent and Maren Lang and Thomas PIERROT and Guillaume Richard},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=xImeJtdUiw}\n}", "github": "", "reviewers": "pNzb;Z3Wd;p67W", "pdf_size": 2576555, "rating": "5;6;6", "confidence": "4;3;5", "soundness": "2;3;3", "novelty": "2;2;2", "presentation": "2;3;2", "wc_summary": "41;93;92", "wc_strengths": "23;82;52", "wc_weaknesses": "77;37;133", "wc_questions": "30;68;160", "wc_limitations": "38;47;33", "wc_review": "209;327;470", "wc_reply_reviewers": "17;58;17", "wc_reply_authors": "0;214;0", "reply_reviewers": "1;1;1", "reply_authors": "1;2;1", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 75.33333333333333, 24.280765135299085 ], "wc_strengths_avg": [ 52.333333333333336, 24.087802353519557 ], "wc_weaknesses_avg": [ 82.33333333333333, 39.37286149395574 ], "wc_questions_avg": [ 86.0, 54.57716250105594 ], "wc_limitations_avg": [ 39.333333333333336, 5.792715732327589 ], "wc_review_avg": [ 335.3333333333333, 106.71561376959897 ], "wc_reply_reviewers_avg": [ 30.666666666666668, 19.3275853524323 ], "wc_reply_authors_avg": [ 71.33333333333333, 100.88056744928079 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18305258770359139437&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": ";;instadeep.com;instadeep.com;instadeep.com;biontech.de;biontech.de;mpip.mpg.de;biontech.de;isir.upmc.fr;instadeep.com", "author_num": 11, "aff_unique_index": "0;0;0;1;1;2;1;3;0", "aff_unique_norm": "InstaDeep;BioNTech;Max Planck Institute;Universit\u00e9 Pierre et Marie Curie - Paris 6", "aff_unique_dep": ";;;Computer Science Lab", "aff_unique_url": "https://www.instadeep.com;https://www.biontech.de;https://www.mpg.de;https://www.upmc.fr", "aff_unique_abbr": "InstaDeep;BioNTech;MPI;UPMC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Paris", "aff_country_unique_index": "0;0;0;1;1;1;1;2;0", "aff_country_unique": "United Kingdom;Germany;France" }, { "title": "Regularized Adaptive Momentum Dual Averaging with an Efficient Inexact Subproblem Solver for Training Structured Neural Network", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93094", "id": "xL7Ve14AHA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=xL7Ve14AHA", "openreview": "https://openreview.net/forum?id=xL7Ve14AHA", "poster": "", "project": "", "author_site": "Zih-Syuan Huang, Ching-pei Lee", "tldr": "", "abstract": "We propose a Regularized Adaptive Momentum Dual Averaging (RAMDA) algorithm for training structured neural networks. Similar to existing regularized adaptive methods, the subproblem for computing the update direction of RAMDA involves a nonsmooth regularizer and a diagonal preconditioner, and therefore does not possess a closed-form solution in general. We thus also carefully devise an implementable inexactness condition that retains convergence guarantees similar to the exact versions, and propose a companion efficient solver for the subproblems of both RAMDA and existing methods to make them practically feasible. We leverage the theory of manifold identification in variational analysis to show that, even in the presence of such inexactness, the iterates of RAMDA attain the ideal structure induced by the regularizer at the stationary point of asymptotic convergence. This structure is locally optimal near the point of convergence, so RAMDA is guaranteed to obtain the best structure possible among all methods converging to the same point, making it the first regularized adaptive method outputting models that possess outstanding predictive performance while being (locally) optimally structured. Extensive numerical experiments in large-scale modern computer vision, language modeling, and speech tasks show that the proposed RAMDA is efficient and consistently outperforms state of the art for training structured neural network. Implementation of our algorithm is available at https://www.github.com/ismoptgroup/RAMDA.", "keywords": "structured neural networks;variance reduction;manifold identification;proximal methods;adaptive methods;inexact subproblem solution", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Zih-Syuan Huang;Ching-pei Lee", "authorids": "~Zih-Syuan_Huang1;~Ching-pei_Lee2", "gender": ";Unspecified", "homepage": "https://github.com/zihsyuan1214;http://leepei.github.io", "dblp": ";", "google_scholar": ";https://scholar.google.com/citations?hl=en", "orcid": ";", "linkedin": ";", "or_profile": "~Zih-Syuan_Huang1;~Ching-Pei_Lee1", "aff": "Department of computer science and information engineering, National Taiwan University;Institute of Statistical Mathematics, Japan", "aff_domain": "csie.ntu.edu.tw;ism.ac.jp", "position": "MS student;Associate Professor", "bibtex": "@inproceedings{\nhuang2024regularized,\ntitle={Regularized Adaptive Momentum Dual Averaging with an Efficient Inexact Subproblem Solver for Training Structured Neural Network},\nauthor={Zih-Syuan Huang and Ching-pei Lee},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=xL7Ve14AHA}\n}", "github": "", "reviewers": "4z2u;xuwn;X8bY", "pdf_size": 1132234, "rating": "5;7;7", "confidence": "4;3;3", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "68;77;95", "wc_strengths": "22;80;86", "wc_weaknesses": "246;133;84", "wc_questions": "4;7;165", "wc_limitations": "16;14;1", "wc_review": "356;311;431", "wc_reply_reviewers": "37;28;0", "wc_reply_authors": "77;0;64", "reply_reviewers": "2;1;0", "reply_authors": "2;1;2", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 80.0, 11.224972160321824 ], "wc_strengths_avg": [ 62.666666666666664, 28.85981442921782 ], "wc_weaknesses_avg": [ 154.33333333333334, 67.83476656962532 ], "wc_questions_avg": [ 58.666666666666664, 75.19899526515557 ], "wc_limitations_avg": [ 10.333333333333334, 6.649979114420002 ], "wc_review_avg": [ 366.0, 49.49747468305833 ], "wc_reply_reviewers_avg": [ 21.666666666666668, 15.755069730795299 ], "wc_reply_authors_avg": [ 47.0, 33.65511352924941 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.9999999999999998, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:iPhcifNIQiIJ:scholar.google.com/&scioq=Regularized+Adaptive+Momentum+Dual+Averaging+with+an+Efficient+Inexact+Subproblem+Solver+for+Training+Structured+Neural+Network&hl=en&as_sdt=0,14", "gs_version_total": 4, "email": "csie.ntu.edu.tw;ism.ac.jp", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "National Taiwan University;Institute of Statistical Mathematics", "aff_unique_dep": "Department of Computer Science and Information Engineering;", "aff_unique_url": "https://www.ntu.edu.tw;https://www.ism.ac.jp", "aff_unique_abbr": "NTU;ISM", "aff_campus_unique_index": "0", "aff_campus_unique": "Taiwan;", "aff_country_unique_index": "0;1", "aff_country_unique": "China;Japan" }, { "title": "Can an AI Agent Safely Run a Government? Existence of Probably Approximately Aligned Policies", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93093", "id": "xM5m7J6Lbl", "proceeding": "", "pdf": "https://openreview.net/pdf?id=xM5m7J6Lbl", "openreview": "https://openreview.net/forum?id=xM5m7J6Lbl", "poster": "/media/PosterPDFs/NeurIPS%202024/93093.png?t=1731695140.2983644", "project": "", "author_site": "Fr\u00e9d\u00e9ric Berdoz, Roger Wattenhofer", "tldr": "", "abstract": "While autonomous agents often surpass humans in their ability to handle vast and complex data, their potential misalignment (i.e., lack of transparency regarding their true objective) has thus far hindered their use in critical applications such as social decision processes. More importantly, existing alignment methods provide no formal guarantees on the safety of such models. Drawing from utility and social choice theory, we provide a novel quantitative definition of alignment in the context of social decision-making. Building on this definition, we introduce probably approximately aligned (i.e., near-optimal) policies, and we derive a sufficient condition for their existence. Lastly, recognizing the practical difficulty of satisfying this condition, we introduce the relaxed concept of safe (i.e., nondestructive) policies, and we propose a simple yet robust method to safeguard the black-box policy of any autonomous agent, ensuring all its actions are verifiably safe for the society.", "keywords": "Alignment;Planning;Social Choice;AI Safety", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Fr\u00e9d\u00e9ric Berdoz;Roger Wattenhofer", "authorids": "~Fr\u00e9d\u00e9ric_Berdoz1;~Roger_Wattenhofer1", "gender": "M;Not Specified", "homepage": ";https://disco.ethz.ch/members/wroger", "dblp": ";w/RogerWattenhofer", "google_scholar": "27Ud3mIAAAAJ;https://scholar.google.ch/citations?user=EG3VPm4AAAAJ", "orcid": ";", "linkedin": "frederic-berdoz;roger-wattenhofer-4466731/", "or_profile": "~Fr\u00e9d\u00e9ric_Berdoz1;~Roger_Wattenhofer1", "aff": "ETHZ - ETH Zurich;Swiss Federal Institute of Technology", "aff_domain": "ethz.ch;ethz.ch", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nberdoz2024can,\ntitle={Can an {AI} Agent Safely Run a Government? Existence of Probably Approximately Aligned Policies},\nauthor={Fr{\\'e}d{\\'e}ric Berdoz and Roger Wattenhofer},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=xM5m7J6Lbl}\n}", "github": "", "reviewers": "1bQj;KXy4;Fxrx;nJtj", "pdf_size": 689837, "rating": "5;6;6;7", "confidence": "2;2;3;2", "soundness": "2;3;3;4", "novelty": "2;3;3;3", "presentation": "1;3;4;4", "wc_summary": "112;108;90;68", "wc_strengths": "28;137;123;70", "wc_weaknesses": "8;143;48;52", "wc_questions": "43;42;34;24", "wc_limitations": "1;4;45;37", "wc_review": "192;434;340;251", "wc_reply_reviewers": "0;36;9;19", "wc_reply_authors": "0;146;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 2.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 1.224744871391589 ], "wc_summary_avg": [ 94.5, 17.399712641305314 ], "wc_strengths_avg": [ 89.5, 43.41946568072896 ], "wc_weaknesses_avg": [ 62.75, 49.423552077931426 ], "wc_questions_avg": [ 35.75, 7.628073151196179 ], "wc_limitations_avg": [ 21.75, 19.48557158514987 ], "wc_review_avg": [ 304.25, 91.58158930702174 ], "wc_reply_reviewers_avg": [ 16.0, 13.360389215887388 ], "wc_reply_authors_avg": [ 36.5, 63.21985447626402 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:JNx5_t_48scJ:scholar.google.com/&scioq=Can+an+AI+Agent+Safely+Run+a+Government%3F+Existence+of+Probably+Approximately+Aligned+Policies&hl=en&as_sdt=0,5", "gs_version_total": 7, "email": "ethz.ch;ethz.ch", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "ETH Zurich;Swiss Federal Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.ethz.ch;https://www.ethz.ch", "aff_unique_abbr": "ETHZ;ETH Zurich", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Switzerland" }, { "title": "Communication-Efficient Federated Group Distributionally Robust Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93092", "id": "xNZEjFe0mh", "proceeding": "", "pdf": "https://openreview.net/pdf?id=xNZEjFe0mh", "openreview": "https://openreview.net/forum?id=xNZEjFe0mh", "poster": "/media/PosterPDFs/NeurIPS%202024/93092.png?t=1732141165.9891374", "project": "", "author_site": "Zhishuai Guo, Tianbao Yang", "tldr": "", "abstract": "Federated learning faces challenges due to the heterogeneity in data volumes and distributions at different clients, which can compromise model generalization ability to various distributions. \nExisting approaches to address this issue based on group distributionally robust optimization (GDRO) often lead to high communication and sample complexity.\nTo this end, this work introduces algorithms tailored for communication-efficient Federated Group Distributionally Robust Optimization (FGDRO). Our contributions are threefold: Firstly, we introduce the FGDRO-CVaR algorithm, which optimizes the average top-K losses while reducing communication complexity to $O(1/\\epsilon^4)$, where $\\epsilon$ denotes the desired precision level. Secondly, our FGDRO-KL algorithm is crafted to optimize KL regularized FGDRO, cutting communication complexity to $O(1/\\epsilon^3)$. Lastly, we propose FGDRO-KL-Adam to utilize Adam-type local updates in FGDRO-KL, which not only maintains a communication cost of $O(1/\\epsilon^3)$ but also shows potential to surpass SGD-type local steps in practical applications.\nThe effectiveness of our algorithms has been demonstrated on a variety of real-world tasks, including natural language processing and computer vision.", "keywords": "Federated Learning;Group Distributional Robust Optimization;Communication Efficiency", "primary_area": "optimization", "supplementary_material": "", "author": "Zhishuai Guo;Tianbao Yang", "authorids": "~Zhishuai_Guo1;~Tianbao_Yang1", "gender": "M;M", "homepage": "https://zhishuaiguo.github.io;https://people.tamu.edu/~tianbao-yang/publications.html", "dblp": "221/2907;56/7047", "google_scholar": "sHow-tEAAAAJ;https://scholar.google.com.tw/citations?user=BCxFU0EAAAAJ", "orcid": ";", "linkedin": "zhishuai-guo-5850671b3/;", "or_profile": "~Zhishuai_Guo1;~Tianbao_Yang1", "aff": "Texas A&M University - College Station;Texas A&M University - College Station", "aff_domain": "tamu.edu;tamu.edu", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nguo2024communicationefficient,\ntitle={Communication-Efficient Federated Group Distributionally Robust Optimization},\nauthor={Zhishuai Guo and Tianbao Yang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=xNZEjFe0mh}\n}", "github": "", "reviewers": "sEsv;tWjT;rzqA;f6MF", "pdf_size": 942986, "rating": "4;5;5;6", "confidence": "5;3;2;3", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "4;2;2;3", "wc_summary": "86;26;82;58", "wc_strengths": "22;24;49;45", "wc_weaknesses": "231;49;242;31", "wc_questions": "56;143;36;203", "wc_limitations": "2;2;10;15", "wc_review": "397;244;419;352", "wc_reply_reviewers": "11;26;70;53", "wc_reply_authors": "0;113;32;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;2;2;1", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 1.0897247358851685 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 63.0, 23.895606290697042 ], "wc_strengths_avg": [ 35.0, 12.103718436910205 ], "wc_weaknesses_avg": [ 138.25, 98.53267224631635 ], "wc_questions_avg": [ 109.5, 67.3219874929432 ], "wc_limitations_avg": [ 7.25, 5.539629951540085 ], "wc_review_avg": [ 353.0, 67.4054893907017 ], "wc_reply_reviewers_avg": [ 40.0, 22.94558781116753 ], "wc_reply_authors_avg": [ 36.25, 46.197267235194765 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.6488856845230502, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:xGlOY4LcZoQJ:scholar.google.com/&scioq=Communication-Efficient+Federated+Group+Distributionally+Robust+Optimization&hl=en&as_sdt=0,33", "gs_version_total": 0, "email": "tamu.edu;tamu.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Texas A&M University", "aff_unique_dep": "", "aff_unique_url": "https://www.tamu.edu", "aff_unique_abbr": "TAMU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "College Station", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Keeping LLMs Aligned After Fine-tuning: The Crucial Role of Prompt Templates", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93091", "id": "xNlQjS0dtO", "proceeding": "", "pdf": "https://openreview.net/pdf?id=xNlQjS0dtO", "openreview": "https://openreview.net/forum?id=xNlQjS0dtO", "poster": "", "project": "", "author_site": "Kaifeng Lyu, Haoyu Zhao, Xinran Gu, Dingli Yu, Anirudh Goyal, Sanjeev Arora", "tldr": "", "abstract": "Public LLMs such as the Llama 2-Chat underwent alignment training and were considered safe. Recently Qi et al. (2024) reported that even benign fine-tuning on seemingly safe datasets can give rise to unsafe behaviors in the models. The current paper is about methods and best practices to mitigate such loss of alignment. We focus on the setting where a public model is fine-tuned before serving users for specific usage, where the model should improve on the downstream task while maintaining alignment. Through extensive experiments on several chat models (Meta's Llama 2-Chat, Mistral AI's Mistral 7B Instruct v0.2, and OpenAI's GPT-3.5 Turbo), this paper uncovers that the prompt templates used during fine-tuning and inference play a crucial role in preserving safety alignment, and proposes the \u201cPure Tuning, Safe Testing\u201d (PTST) strategy --- fine-tune models without a safety prompt, but include it at test time. This seemingly counterintuitive strategy incorporates an intended distribution shift to encourage alignment preservation. Fine-tuning experiments on GSM8K, ChatDoctor, and OpenOrca show that PTST significantly reduces the rise of unsafe behaviors.", "keywords": "safety prompt;AI alignment", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Kaifeng Lyu;Haoyu Zhao;Xinran Gu;Dingli Yu;Anirudh Goyal;Sanjeev Arora", "authorids": "~Kaifeng_Lyu2;~Haoyu_Zhao1;~Xinran_Gu2;~Dingli_Yu1;~Anirudh_Goyal1;~Sanjeev_Arora1", "gender": "M;M;F;;M;", "homepage": "https://kaifeng.ac/;http://hyzhao.me;;https://dingliyu.net/;https://anirudh9119.github.io/;http://www.cs.princeton.edu/~arora/", "dblp": "220/3283;;;39/578;172/1039;a/SArora", "google_scholar": "843JJtgAAAAJ;1MjanHUAAAAJ;6iYQL8MAAAAJ;KJLJstYAAAAJ;krrh6OUAAAAJ;RUP4S68AAAAJ", "orcid": ";;;0000-0002-8824-8611;;", "linkedin": ";;;;;", "or_profile": "~Kaifeng_Lyu2;~Haoyu_Zhao1;~Xinran_Gu2;~Dingli_Yu1;~Anirudh_Goyal1;~Sanjeev_Arora1", "aff": "Princeton University;Princeton University;Tsinghua University;Princeton University;Google DeepMind;Princeton University", "aff_domain": "princeton.edu;princeton.edu;tsinghua.edu.cn;princeton.edu;google.com;princeton.edu", "position": "PhD student;PhD student;PhD student;PhD student;Researcher;Full Professor", "bibtex": "@inproceedings{\nlyu2024keeping,\ntitle={Keeping {LLM}s Aligned After Fine-tuning: The Crucial Role of Prompt Templates},\nauthor={Kaifeng Lyu and Haoyu Zhao and Xinran Gu and Dingli Yu and Anirudh Goyal and Sanjeev Arora},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=xNlQjS0dtO}\n}", "github": "", "reviewers": "1poN;SwBC;jeuL;JbMw", "pdf_size": 669665, "rating": "6;6;6;7", "confidence": "4;4;3;5", "soundness": "3;3;2;3", "novelty": "3;3;2;3", "presentation": "3;3;2;3", "wc_summary": "97;50;70;75", "wc_strengths": "31;46;30;81", "wc_weaknesses": "45;142;72;492", "wc_questions": "269;39;78;28", "wc_limitations": "9;3;27;98", "wc_review": "451;280;277;774", "wc_reply_reviewers": "56;145;27;145", "wc_reply_authors": "0;0;0;56", "reply_reviewers": "1;1;1;2", "reply_authors": "1;1;1;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 73.0, 16.718253497300488 ], "wc_strengths_avg": [ 47.0, 20.627651344736268 ], "wc_weaknesses_avg": [ 187.75, 179.19036664954956 ], "wc_questions_avg": [ 103.5, 97.34089582493064 ], "wc_limitations_avg": [ 34.25, 37.85085864283663 ], "wc_review_avg": [ 445.5, 202.3147300618519 ], "wc_reply_reviewers_avg": [ 93.25, 52.75592383799188 ], "wc_reply_authors_avg": [ 14.0, 24.24871130596428 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.816496580927726, "gs_citation": 41, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15705932095166587&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 6, "email": "princeton.edu;princeton.edu;tsinghua.edu.cn;princeton.edu;google.com;princeton.edu", "author_num": 6, "aff_unique_index": "0;0;1;0;2;0", "aff_unique_norm": "Princeton University;Tsinghua University;Google", "aff_unique_dep": ";;Google DeepMind", "aff_unique_url": "https://www.princeton.edu;https://www.tsinghua.edu.cn;https://deepmind.com", "aff_unique_abbr": "Princeton;THU;DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;2;0", "aff_country_unique": "United States;China;United Kingdom" }, { "title": "Universal Online Convex Optimization with $1$ Projection per Round", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93090", "id": "xNncVKbwwS", "proceeding": "", "pdf": "https://openreview.net/pdf?id=xNncVKbwwS", "openreview": "https://openreview.net/forum?id=xNncVKbwwS", "poster": "/media/PosterPDFs/NeurIPS%202024/93090.png?t=1731490619.5341961", "project": "", "author_site": "Wenhao Yang, Yibo Wang, Peng Zhao, Lijun Zhang", "tldr": "", "abstract": "To address the uncertainty in function types, recent progress in online convex optimization (OCO) has spurred the development of universal algorithms that simultaneously attain minimax rates for multiple types of convex functions. However, for a $T$-round online problem, state-of-the-art methods typically conduct $O(\\log T)$ projections onto the domain in each round, a process potentially time-consuming with complicated feasible sets. In this paper, inspired by the black-box reduction of Cutkosky and Orabona [2018], we employ a surrogate loss defined over simpler domains to develop universal OCO algorithms that only require $1$ projection. Embracing the framework of prediction with expert advice, we maintain a set of experts for each type of functions and aggregate their predictions via a meta-algorithm. The crux of our approach lies in a uniquely designed expert-loss for strongly convex functions, stemming from an innovative decomposition of the regret into the meta-regret and the expert-regret. Our analysis sheds new light on the surrogate loss, facilitating a rigorous examination of the discrepancy between the regret of the original loss and that of the surrogate loss, and carefully controlling meta-regret under the strong convexity condition. With only $1$ projection per round, we establish optimal regret bounds for general convex, exponentially concave, and strongly convex functions simultaneously. Furthermore, we enhance the expert-loss to exploit the smoothness property, and demonstrate that our algorithm can attain small-loss regret for multiple types of convex and smooth functions.", "keywords": "Online Convex Optimization;Universal Online Learning;Projection", "primary_area": "online_learning", "supplementary_material": "", "author": "Wenhao Yang;Yibo Wang;Peng Zhao;Lijun Zhang", "authorids": "~Wenhao_Yang3;~Yibo_Wang2;~Peng_Zhao1;~Lijun_Zhang1", "gender": "M;;;", "homepage": "http://www.lamda.nju.edu.cn/yangwh/;;;", "dblp": "233/4699;;;", "google_scholar": "ycccau7cWYIC;;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Wenhao_Yang3;~Yibo_Wang2;~Peng_Zhao1;~Lijun_Zhang1", "aff": "Nanjing University;;;", "aff_domain": "nju.edu.cn;;;", "position": "PhD student;;;", "bibtex": "@inproceedings{\nyang2024universal,\ntitle={Universal Online Convex Optimization with \\$1\\$ Projection per Round},\nauthor={Wenhao Yang and Yibo Wang and Peng Zhao and Lijun Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=xNncVKbwwS}\n}", "github": "", "reviewers": "uhFC;aw3E;VeB9", "pdf_size": 1063481, "rating": "5;6;7", "confidence": "3;5;4", "soundness": "3;4;3", "novelty": "2;3;3", "presentation": "3;4;2", "wc_summary": "50;184;80", "wc_strengths": "36;86;40", "wc_weaknesses": "40;96;139", "wc_questions": "2;84;102", "wc_limitations": "2;6;27", "wc_review": "130;456;388", "wc_reply_reviewers": "88;27;63", "wc_reply_authors": "0;20;23", "reply_reviewers": "1;1;1", "reply_authors": "1;2;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 104.66666666666667, 57.418541333691934 ], "wc_strengths_avg": [ 54.0, 22.686266036231405 ], "wc_weaknesses_avg": [ 91.66666666666667, 40.5325657822064 ], "wc_questions_avg": [ 62.666666666666664, 43.52266331719857 ], "wc_limitations_avg": [ 11.666666666666666, 10.96458946893235 ], "wc_review_avg": [ 324.6666666666667, 140.42158745086962 ], "wc_reply_reviewers_avg": [ 59.333333333333336, 25.037749277618563 ], "wc_reply_authors_avg": [ 14.333333333333334, 10.208928554075703 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6859047240907577234&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "email": "nju.edu.cn;;;", "author_num": 4, "aff_unique_index": "0", "aff_unique_norm": "Nanjing University", "aff_unique_dep": "", "aff_unique_url": "https://www.nju.edu.cn", "aff_unique_abbr": "Nanjing U", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "title": "Infinite-Dimensional Feature Interaction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93089", "id": "xO9GHdmK76", "proceeding": "", "pdf": "https://openreview.net/pdf?id=xO9GHdmK76", "openreview": "https://openreview.net/forum?id=xO9GHdmK76", "poster": "/media/PosterPDFs/NeurIPS%202024/93089.png?t=1733517688.4011977", "project": "", "author_site": "Chenhui Xu, FUXUN YU, Maoliang Li, Zihao Zheng, Zirui Xu, Jinjun Xiong, Xiang Chen", "tldr": "", "abstract": "The past neural network design has largely focused on feature \\textit{representation space} dimension and its capacity scaling (e.g., width, depth), but overlooked the feature \\textit{interaction space} scaling. \n Recent advancements have shown shifted focus towards element-wise multiplication to facilitate higher-dimensional feature interaction space for better information transformation. Despite this progress, multiplications predominantly capture low-order interactions, thus remaining confined to a finite-dimensional interaction space. To transcend this limitation, classic kernel methods emerge as a promising solution to engage features in an infinite-dimensional space. We introduce InfiNet, a model architecture that enables feature interaction within an infinite-dimensional space created by RBF kernel. Our experiments reveal that InfiNet achieves new state-of-the-art, owing to its capability to leverage infinite-dimensional interactions, significantly enhancing model performance.", "keywords": "deep learning;kernel method;feature interaction", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Chenhui Xu;Fuxun Yu;Maoliang Li;Zihao Zheng;Zirui Xu;Jinjun Xiong;Xiang Chen", "authorids": "~Chenhui_Xu1;~Fuxun_Yu1;~Maoliang_Li1;~Zihao_Zheng5;~Zirui_Xu1;~Jinjun_Xiong1;~Xiang_Chen1", "gender": "M;M;M;M;M;;M", "homepage": "https://minihuihui.github.io;https://scholar.google.com/citations?user=t8vayXEAAAAJ&hl=en;https://ridethepig.github.io/;https://2201210721.com;https://sites.google.com/view/ziruixu/home;https://www.xlab-ub.com;https://if-lab-pku.github.io/", "dblp": "174/1805;215/4440.html;;;;81/1130;64/3062-10.html", "google_scholar": "SjjiXpYAAAAJ;t8vayXEAAAAJ;;;CTDArowAAAAJ;tRt1xPYAAAAJ;QEdR90AAAAAJ", "orcid": "0009-0003-7517-5796;0000-0002-4880-6658;;;;0000-0002-2620-4859;0000-0003-2790-976X", "linkedin": ";;;;;jinjun-xiong-314774/;", "or_profile": "~Chenhui_Xu1;~Fuxun_Yu1;~Maoliang_Li1;~Zihao_Zheng5;~Zirui_Xu1;~Jinjun_Xiong1;~Xiang_Chen1", "aff": "George Mason University;Microsoft;Northwest Polytechnical University Xi'an;Peking University;CVS Health;State University of New York at Buffalo;Peking University", "aff_domain": "gmu.edu;microsoft.com;nwpu.edu.cn;pku.edu.cn;cvshealth.com;buffalo.edu;pku.edu.cn", "position": "PhD student;Principal Researcher;Undergrad student;MS student;Researcher;Professor;Associate Professor", "bibtex": "@inproceedings{\nxu2024infinitedimensional,\ntitle={Infinite-Dimensional Feature Interaction},\nauthor={Chenhui Xu and Fuxun Yu and Maoliang Li and Zihao Zheng and Zirui Xu and Jinjun Xiong and Xiang Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=xO9GHdmK76}\n}", "github": "", "reviewers": "u52R;Tii5;USts;Jbm2", "pdf_size": 1045201, "rating": "6;6;7;7", "confidence": "3;4;5;3", "soundness": "3;4;3;3", "novelty": "2;2;3;3", "presentation": "2;4;2;3", "wc_summary": "75;72;76;49", "wc_strengths": "57;70;53;25", "wc_weaknesses": "268;52;360;106", "wc_questions": "51;157;345;32", "wc_limitations": "84;1;102;17", "wc_review": "535;352;936;229", "wc_reply_reviewers": "103;40;47;12", "wc_reply_authors": "182;85;51;32", "reply_reviewers": "1;1;1;1", "reply_authors": "3;3;2;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 68.0, 11.067971810589327 ], "wc_strengths_avg": [ 51.25, 16.406934509529805 ], "wc_weaknesses_avg": [ 196.5, 123.40482162379232 ], "wc_questions_avg": [ 146.25, 124.24044228833057 ], "wc_limitations_avg": [ 51.0, 42.85440467443224 ], "wc_review_avg": [ 513.0, 267.39016436660495 ], "wc_reply_reviewers_avg": [ 50.5, 33.01893396219811 ], "wc_reply_authors_avg": [ 87.5, 57.768936289324216 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16999326984350028292&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "gmu.edu;microsoft.com;nwpu.edu.cn;pku.edu.cn;cvshealth.com;buffalo.edu;pku.edu.cn", "author_num": 7, "aff_unique_index": "0;1;2;3;4;5;3", "aff_unique_norm": "George Mason University;Microsoft;Northwest Polytechnical University;Peking University;CVS Health;State University of New York at Buffalo", "aff_unique_dep": ";Microsoft Corporation;;;;", "aff_unique_url": "https://www.gmu.edu;https://www.microsoft.com;http://www.nwpu.edu.cn;http://www.pku.edu.cn;https://www.cvshealth.com;https://www.buffalo.edu", "aff_unique_abbr": "GMU;Microsoft;NWPU;Peking U;CVS;SUNY Buffalo", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Xi'an;Buffalo", "aff_country_unique_index": "0;0;1;1;0;0;1", "aff_country_unique": "United States;China" }, { "title": "Assembly Fuzzy Representation on Hypergraph for Open-Set 3D Object Retrieval", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93088", "id": "xOCAURlVM9", "proceeding": "", "pdf": "https://openreview.net/pdf?id=xOCAURlVM9", "openreview": "https://openreview.net/forum?id=xOCAURlVM9", "poster": "/media/PosterPDFs/NeurIPS%202024/93088.png?t=1730861746.3067813", "project": "", "author_site": "Yang Xu, Yifan Feng, Jun Zhang, Jun-Hai Yong, Yue Gao", "tldr": "", "abstract": "The lack of object-level labels presents a significant challenge for 3D object retrieval in the open-set environment. However, part-level shapes of objects often share commonalities across categories but remain underexploited in existing retrieval methods. In this paper, we introduce the Hypergraph-Based Assembly Fuzzy Representation (HARF) framework, which navigates the intricacies of open-set 3D object retrieval through a bottom-up lens of Part Assembly. To tackle the challenge of assembly isomorphism and unification, we propose the Hypergraph Isomorphism Convolution (HIConv) for smoothing and adopt the Isomorphic Assembly Embedding (IAE) module to generate assembly embeddings with geometric-semantic consistency. To address the challenge of open-set category generalization, our method employs high-order correlations and fuzzy representation to mitigate distribution skew through the Structure Fuzzy Reconstruction (SFR) module, by constructing a leveraged hypergraph based on local certainty and global uncertainty correlations. We construct three open-set retrieval datasets for 3D objects with part-level annotations: OP-SHNP, OP-INTRA, and OP-COSEG. Extensive experiments and ablation studies on these three benchmarks show our method outperforms current state-of-the-art methods.", "keywords": "Hypergraph;3D Object Retrieval;Open-Set Learning;3D Part Assembly;Fuzzy Representation", "primary_area": "machine_vision", "supplementary_material": "", "author": "Yang Xu;Yifan Feng;Jun Zhang;Jun-Hai Yong;Yue Gao", "authorids": "~Yang_Xu10;~Yifan_Feng1;~Jun_Zhang17;~Jun-Hai_Yong3;~Yue_Gao4", "gender": "M;M;M;M;M", "homepage": ";;https://junzhang.org;https://www.thss.tsinghua.edu.cn/person/yongjunhai;http://www.gaoyue.org", "dblp": ";225/5463;29/4190-18.html;;33/3099-2", "google_scholar": "ecK5NOIAAAAJ;https://scholar.google.com.hk/citations?user=WntYF-sAAAAJ;;;UTDfWocAAAAJ", "orcid": "0000-0002-8691-2726;0000-0003-0878-2986;0000-0001-5579-7094;;", "linkedin": ";;;;", "or_profile": "~Yang_Xu10;~Yifan_Feng1;~Jun_Zhang17;~Jun-Hai_Yong3;~Yue_Gao4", "aff": "School of Software, Tsinghua University;Tsinghua University;Tencent AI Lab;Tsinghua University;Tsinghua University", "aff_domain": "mails.tsinghua.edu.cn;tsinghua.edu.cn;tencent.com;tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;PhD student;Principal Researcher;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nxu2024assembly,\ntitle={Assembly Fuzzy Representation on Hypergraph for Open-Set 3D Object Retrieval},\nauthor={Yang Xu and Yifan Feng and Jun Zhang and Jun-Hai Yong and Yue Gao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=xOCAURlVM9}\n}", "github": "", "reviewers": "7vQy;Kfog;zjgH;3mhr;ZtnM", "pdf_size": 0, "rating": "3;5;6;6;7", "confidence": "3;5;4;4;4", "soundness": "3;3;3;3;3", "novelty": "4;2;3;3;3", "presentation": "1;3;3;3;3", "wc_summary": "84;127;138;62;97", "wc_strengths": "40;44;112;39;49", "wc_weaknesses": "122;171;173;146;189", "wc_questions": "2;90;71;5;46", "wc_limitations": "6;41;1;22;13", "wc_review": "254;473;495;274;394", "wc_reply_reviewers": "12;21;38;9;0", "wc_reply_authors": "140;46;46;0;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "3;2;2;1;1", "rating_avg": [ 5.4, 1.3564659966250536 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.6, 0.8000000000000002 ], "wc_summary_avg": [ 101.6, 27.817979797246238 ], "wc_strengths_avg": [ 56.8, 27.82373087851448 ], "wc_weaknesses_avg": [ 160.2, 23.540603220818284 ], "wc_questions_avg": [ 42.8, 35.0051424793558 ], "wc_limitations_avg": [ 16.6, 14.093970341958293 ], "wc_review_avg": [ 378.0, 99.1584590440977 ], "wc_reply_reviewers_avg": [ 16.0, 12.884098726725126 ], "wc_reply_authors_avg": [ 46.4, 51.12181530423191 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.4662524041201569, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:MEHom4GZEi4J:scholar.google.com/&scioq=Assembly+Fuzzy+Representation+on+Hypergraph+for+Open-Set+3D+Object+Retrieval&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": "mails.tsinghua.edu.cn;tsinghua.edu.cn;tencent.com;tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Tsinghua University;Tencent", "aff_unique_dep": "School of Software;Tencent AI Lab", "aff_unique_url": "https://www.tsinghua.edu.cn;https://ai.tencent.com", "aff_unique_abbr": "THU;Tencent AI Lab", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Structural Inference of Dynamical Systems with Conjoined State Space Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93087", "id": "xQWJBeK5rh", "proceeding": "", "pdf": "https://openreview.net/pdf?id=xQWJBeK5rh", "openreview": "https://openreview.net/forum?id=xQWJBeK5rh", "poster": "/media/PosterPDFs/NeurIPS%202024/93087.png?t=1731344006.8042963", "project": "", "author_site": "Aoran Wang, Jun Pang", "tldr": "", "abstract": "This paper introduces SICSM, a novel structural inference framework that integrates Selective State Space Models (selective SSMs) with Generative Flow Networks (GFNs) to handle the challenges posed by dynamical systems with irregularly sampled trajectories and partial observations. \nBy utilizing the robust temporal modeling capabilities of selective SSMs, our approach learns input-dependent transition functions that adapt to non-uniform time intervals, thereby enhancing the accuracy of structural inference. \nBy aggregating dynamics across diverse temporal dependencies and channeling them into the GFN, the SICSM adeptly approximates the posterior distribution of the system's structure. \nThis process not only enables precise inference of complex interactions within partially observed systems but also ensures the seamless integration of prior knowledge, enhancing the model\u2019s accuracy and robustness.\nExtensive evaluations on sixteen diverse datasets demonstrate that SICSM outperforms existing methods, particularly in scenarios characterized by irregular sampling and incomplete observations, which highlight its potential as a reliable tool for scientific discovery and system diagnostics in disciplines that demand precise modeling of complex interactions.", "keywords": "Structural Inference;AI4Science", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "", "author": "Aoran Wang;Jun Pang", "authorids": "~Aoran_Wang1;~Jun_Pang1", "gender": ";M", "homepage": ";https://satoss.uni.lu/members/jun/", "dblp": ";p/JunPang", "google_scholar": ";0JjOM_gAAAAJ", "orcid": ";0000-0002-4521-4112", "linkedin": ";", "or_profile": "~Aoran_Wang1;~Jun_Pang1", "aff": ";University of Luxembourg", "aff_domain": ";uni.lu", "position": ";Principal Researcher", "bibtex": "@inproceedings{\nwang2024structural,\ntitle={Structural Inference of Dynamical Systems with Conjoined State Space Models},\nauthor={Aoran Wang and Jun Pang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=xQWJBeK5rh}\n}", "github": "", "reviewers": "2uCG;3YzK;hoUX;uuWM", "pdf_size": 1905670, "rating": "5;6;6;7", "confidence": "2;4;3;4", "soundness": "3;3;2;4", "novelty": "3;2;3;4", "presentation": "3;2;2;3", "wc_summary": "245;56;98;87", "wc_strengths": "124;38;85;126", "wc_weaknesses": "141;117;177;22", "wc_questions": "212;467;229;71", "wc_limitations": "208;1;10;57", "wc_review": "930;679;599;363", "wc_reply_reviewers": "426;37;27;100", "wc_reply_authors": "995;80;44;78", "reply_reviewers": "1;1;1;1", "reply_authors": "4;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 121.5, 72.94689849472697 ], "wc_strengths_avg": [ 93.25, 35.84253757757673 ], "wc_weaknesses_avg": [ 114.25, 57.381944024231174 ], "wc_questions_avg": [ 244.75, 142.21880149966108 ], "wc_limitations_avg": [ 69.0, 83.02108166002175 ], "wc_review_avg": [ 642.75, 202.48503031088495 ], "wc_reply_reviewers_avg": [ 147.5, 163.20922155319533 ], "wc_reply_authors_avg": [ 299.25, 401.9461251212655 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.8528028654224418, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8528160961396305661&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 0, "email": ";uni.lu", "author_num": 2, "aff_unique_index": "0", "aff_unique_norm": "University of Luxembourg", "aff_unique_dep": "", "aff_unique_url": "https://wwwen.uniluxembourg.lu", "aff_unique_abbr": "Uni Lu", "aff_country_unique_index": "0", "aff_country_unique": "Luxembourg" }, { "title": "The Implicit Bias of Adam on Separable Data", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93086", "id": "xRQxan3WkM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=xRQxan3WkM", "openreview": "https://openreview.net/forum?id=xRQxan3WkM", "poster": "", "project": "", "author_site": "Chenyang Zhang, Difan Zou, Yuan Cao", "tldr": "", "abstract": "Adam has become one of the most favored optimizers in deep learning problems. Despite its success in practice, numerous mysteries persist regarding its theoretical understanding. In this paper, we study the implicit bias of Adam in linear logistic regression. Specifically, we show that when the training data are linearly separable, the iterates of Adam converge towards a linear classifier that achieves the maximum $\\ell_\\infty$-margin in direction. Notably, for a general class of diminishing learning rates, this convergence occurs within polynomial time. Our result shed light on the difference between Adam and (stochastic) gradient descent from a theoretical perspective.", "keywords": "Adam;implicit bias", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Chenyang Zhang;Difan Zou;Yuan Cao", "authorids": "~Chenyang_Zhang6;~Difan_Zou1;~Yuan_Cao1", "gender": "M;M;M", "homepage": "https://saasweb.hku.hk/student/2023phd.php#chyzhang;https://difanzou.github.io/;https://yuancaohku.github.io/", "dblp": ";161/8923;", "google_scholar": ";Cp4fcTQAAAAJ;-VGnHI4AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Chenyang_Zhang6;~Difan_Zou1;~Yuan_Cao1", "aff": "The Univeristy of Hongkong;University of Hong Kong;University of Hong Kong", "aff_domain": "connect.hku.hk;hku.hk;hku.hk", "position": "PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhang2024the,\ntitle={The Implicit Bias of Adam on Separable Data},\nauthor={Chenyang Zhang and Difan Zou and Yuan Cao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=xRQxan3WkM}\n}", "github": "", "reviewers": "Gj1V;CKHd;dtxf;qQ6s", "pdf_size": 560135, "rating": "6;6;7;7", "confidence": "4;4;3;3", "soundness": "3;3;3;4", "novelty": "3;3;4;4", "presentation": "3;3;4;3", "wc_summary": "77;94;103;53", "wc_strengths": "108;60;88;110", "wc_weaknesses": "373;172;19;420", "wc_questions": "39;40;73;94", "wc_limitations": "64;9;1;38", "wc_review": "661;375;284;715", "wc_reply_reviewers": "178;45;23;47", "wc_reply_authors": "164;0;0;23", "reply_reviewers": "2;1;1;1", "reply_authors": "2;1;1;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 81.75, 19.044356119333624 ], "wc_strengths_avg": [ 91.5, 20.11839953873071 ], "wc_weaknesses_avg": [ 246.0, 160.78712635033938 ], "wc_questions_avg": [ 61.5, 23.221757039466244 ], "wc_limitations_avg": [ 28.0, 24.92990172463582 ], "wc_review_avg": [ 508.75, 183.1124995733497 ], "wc_reply_reviewers_avg": [ 73.25, 61.206106721470206 ], "wc_reply_authors_avg": [ 46.75, 68.34242825653769 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=48747321388976623&as_sdt=8000005&sciodt=0,19&hl=en", "gs_version_total": 4, "email": "connect.hku.hk;hku.hk;hku.hk", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Hong Kong", "aff_unique_dep": "", "aff_unique_url": "https://www.hku.hk", "aff_unique_abbr": "HKU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Enhancing Semi-Supervised Learning via Representative and Diverse Sample Selection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93085", "id": "xRdpCOdghl", "proceeding": "", "pdf": "https://openreview.net/pdf?id=xRdpCOdghl", "openreview": "https://openreview.net/forum?id=xRdpCOdghl", "poster": "/media/PosterPDFs/NeurIPS%202024/93085.png?t=1730711720.0108669", "project": "", "author_site": "Qian Shao, Jiangrui Kang, Qiyuan Chen, Zepeng Li, Hongxia Xu, Yiwen Cao, JIAJUAN LIANG, Jian Wu", "tldr": "", "abstract": "Semi-Supervised Learning (SSL) has become a preferred paradigm in many deep learning tasks, which reduces the need for human labor. Previous studies primarily focus on effectively utilising the labelled and unlabeled data to improve performance. However, we observe that how to select samples for labelling also significantly impacts performance, particularly under extremely low-budget settings. The sample selection task in SSL has been under-explored for a long time. To fill in this gap, we propose a Representative and Diverse Sample Selection approach (RDSS). By adopting a modified Frank-Wolfe algorithm to minimise a novel criterion $\\alpha$-Maximum Mean Discrepancy ($\\alpha$-MMD), RDSS samples a representative and diverse subset for annotation from the unlabeled data. We demonstrate that minimizing $\\alpha$-MMD enhances the generalization ability of low-budget learning. Experimental results show that RDSS consistently improves the performance of several popular SSL frameworks and outperforms the state-of-the-art sample selection approaches used in Active Learning (AL) and Semi-Supervised Active Learning (SSAL), even with constrained annotation budgets. Our code is available at [RDSS](https://github.com/YanhuiAILab/RDSS).", "keywords": "Semi-supervised learning;Sample selection;Low-budget learning", "primary_area": "other", "supplementary_material": "/attachment/eac037c8809c8c76dfb5748e4b2c511278073120.zip", "author": "Qian Shao;Jiangrui Kang;Qiyuan Chen;Zepeng Li;Hongxia Xu;Yiwen Cao;JIAJUAN LIANG;Jian Wu", "authorids": "~Qian_Shao2;~Jiangrui_Kang1;~Qiyuan_Chen1;~Zepeng_Li2;~Hongxia_Xu1;~Yiwen_Cao2;~JIAJUAN_LIANG1;~Jian_Wu6", "gender": "M;M;M;F;M;M;M;M", "homepage": "https://abeier87.github.io/;;https://qiyuan-chen.github.io/;;https://www.uic.edu.cn/en/faculty.htm#/yiwencao/en;;https://scholar.google.com/citations?hl=zh-TW&user=VO9XIXYAAAAJ;https://lzzppp.github.io", "dblp": "189/1524;387/4420;319/2575-1.html;57/101;;;96/2744-1;48/7448", "google_scholar": "https://scholar.google.com.hk/citations?user=dxBWovEAAAAJ;;;XlpKptAAAAAJ;;3qwbWyQAAAAJ;https://scholar.google.com/citations?hl=zh-TW;Lt9Q9h4AAAAJ", "orcid": "0000-0001-5768-6136;;0000-0002-2315-4972;0000-0001-5384-4627;;;;0000-0002-5105-4004", "linkedin": ";jiangrui-kang-7b4a44242/;;;;;;", "or_profile": "~Qian_Shao2;~Jiangrui_Kang1;~Qiyuan_Chen1;~Hongxia_Xu1;~Yiwen_Cao2;~JIAJUAN_LIANG1;~Jian_Wu6;~zepeng_Li1", "aff": "Zhejiang University;Illinois Institute of Technology;Central China Normal University;Zhejiang University;Beijing Normal University Hong Kong Baptist University United International College;;Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;hawk.iit.edu;ccnu.edu.cn;zju.edu.cn;uic.edu.cn;;zju.edu.cn;zju.edu.cn", "position": "PhD student;PhD student;Undergrad student;Researcher;Instructor;;Full Professor;PhD student", "bibtex": "@inproceedings{\nshao2024enhancing,\ntitle={Enhancing Semi-Supervised Learning via Representative and Diverse Sample Selection},\nauthor={Qian Shao and Jiangrui Kang and Qiyuan Chen and Zepeng Li and Hongxia Xu and Yiwen Cao and JIAJUAN LIANG and Jian Wu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=xRdpCOdghl}\n}", "github": "", "reviewers": "CxrG;sn3m;GzC9;Qb3b", "pdf_size": 3182667, "rating": "4;5;6;6", "confidence": "5;3;2;3", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "60;75;42;92", "wc_strengths": "70;21;42;30", "wc_weaknesses": "408;34;27;253", "wc_questions": "35;161;5;4", "wc_limitations": "2;2;5;4", "wc_review": "575;293;121;383", "wc_reply_reviewers": "206;9;74;108", "wc_reply_authors": "400;70;63;73", "reply_reviewers": "1;1;2;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 3.25, 1.0897247358851685 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 67.25, 18.45772196128222 ], "wc_strengths_avg": [ 40.75, 18.45772196128222 ], "wc_weaknesses_avg": [ 180.5, 159.716154474117 ], "wc_questions_avg": [ 51.25, 64.57698893568823 ], "wc_limitations_avg": [ 3.25, 1.299038105676658 ], "wc_review_avg": [ 343.0, 163.71316379570703 ], "wc_reply_reviewers_avg": [ 99.25, 71.15959176386554 ], "wc_reply_authors_avg": [ 151.5, 143.51742054538187 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.899228803025897, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:vDEQGPvtPTcJ:scholar.google.com/&scioq=Enhancing+Semi-Supervised+Learning+via+Representative+and+Diverse+Sample+Selection&hl=en&as_sdt=0,47", "gs_version_total": 3, "email": "zju.edu.cn;hawk.iit.edu;ccnu.edu.cn;zju.edu.cn;uic.edu.cn;;zju.edu.cn;zju.edu.cn", "author_num": 8, "aff_unique_index": "0;1;2;0;3;0;0", "aff_unique_norm": "Zhejiang University;Illinois Institute of Technology;Central China Normal University;Beijing Normal University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.zju.edu.cn;https://www.iit.edu;http://www.ccnu.edu.cn;https://www.bnu.edu.cn", "aff_unique_abbr": "ZJU;IIT;CCNU;BNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0;0;0", "aff_country_unique": "China;United States" }, { "title": "On $f$-Divergence Principled Domain Adaptation: An Improved Framework", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93084", "id": "xSU27DgWEr", "proceeding": "", "pdf": "https://openreview.net/pdf?id=xSU27DgWEr", "openreview": "https://openreview.net/forum?id=xSU27DgWEr", "poster": "", "project": "", "author_site": "Ziqiao Wang, Yongyi Mao", "tldr": "", "abstract": "Unsupervised domain adaptation (UDA) plays a crucial role in addressing distribution shifts in machine learning. In this work, we improve the theoretical foundations of UDA proposed in Acuna et al. (2021) by refining their $f$-divergence-based discrepancy and additionally introducing a new measure, $f$-domain discrepancy ($f$-DD). By removing the absolute value function and incorporating a scaling parameter, $f$-DD obtains novel target error and sample complexity bounds, allowing us to recover previous KL-based results and bridging the gap between algorithms and theory presented in Acuna et al. (2021). Using a localization technique, we also develop a fast-rate generalization bound. Empirical results demonstrate the superior performance of $f$-DD-based learning algorithms over previous works in popular UDA benchmarks.", "keywords": "learning theory;unsupervised domain adaptation;f-divergence;generalization", "primary_area": "learning_theory", "supplementary_material": "/attachment/1caa0ef8404f4dadd4c615f26786c25c8acb2597.zip", "author": "Ziqiao Wang;Yongyi Mao", "authorids": "~Ziqiao_Wang1;~Yongyi_Mao2", "gender": "M;M", "homepage": "https://ziqiaowanggeothe.github.io;http://www.eecs.uottawa.ca/~yymao", "dblp": "222/9220;86/2933", "google_scholar": "iBL7APIAAAAJ;https://scholar.google.ca/citations?user=jM5l70wAAAAJ", "orcid": "0000-0003-0504-4830;0000-0001-5298-5778", "linkedin": "ziqiao-wang-987565155/?locale=en_US;", "or_profile": "~Ziqiao_Wang1;~Yongyi_Mao1", "aff": "University of Ottawa;University of Ottawa", "aff_domain": "uottawa.ca;eecs.uottawa.ca", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nwang2024on,\ntitle={On \\$f\\$-Divergence Principled Domain Adaptation: An Improved Framework},\nauthor={Ziqiao Wang and Yongyi Mao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=xSU27DgWEr}\n}", "github": "", "reviewers": "HkGQ;vWvG;CqSQ;EpWo;zRKu", "pdf_size": 1146094, "rating": "4;6;6;7;7", "confidence": "3;3;3;2;3", "soundness": "3;3;3;3;3", "novelty": "4;3;2;3;3", "presentation": "2;3;3;3;3", "wc_summary": "84;83;90;84;151", "wc_strengths": "30;64;73;49;113", "wc_weaknesses": "76;56;110;22;42", "wc_questions": "7;34;11;292;176", "wc_limitations": "5;2;1;15;15", "wc_review": "202;239;285;462;497", "wc_reply_reviewers": "0;17;31;61;0", "wc_reply_authors": "0;21;17;17;0", "reply_reviewers": "0;1;1;1;0", "reply_authors": "1;2;2;2;1", "rating_avg": [ 6.0, 1.0954451150103321 ], "confidence_avg": [ 2.8, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 98.4, 26.41666140904259 ], "wc_strengths_avg": [ 65.8, 27.737339454244704 ], "wc_weaknesses_avg": [ 61.2, 30.109134826494103 ], "wc_questions_avg": [ 104.0, 112.68185301990734 ], "wc_limitations_avg": [ 7.6, 6.1838499334961226 ], "wc_review_avg": [ 337.0, 119.79816359193491 ], "wc_reply_reviewers_avg": [ 21.8, 22.7806935803105 ], "wc_reply_authors_avg": [ 11.0, 9.09945053286186 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.4564354645876385, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16825508037581706099&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "uottawa.ca;eecs.uottawa.ca", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Ottawa", "aff_unique_dep": "", "aff_unique_url": "https://www.uottawa.ca", "aff_unique_abbr": "U Ottawa", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Canada" }, { "title": "Implicit Optimization Bias of Next-token Prediction in Linear Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93083", "id": "xSziO6gQgG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=xSziO6gQgG", "openreview": "https://openreview.net/forum?id=xSziO6gQgG", "poster": "", "project": "", "tldr": "", "abstract": "We initiate an investigation into the optimization properties of next-token prediction (NTP), the dominant training paradigm for modern language models. Specifically, we study the structural properties of the solutions selected by gradient-based optimizers among the many possible minimizers of the NTP objective. By framing NTP as cross-entropy minimization across \\emph{distinct} contexts, each tied with a \\emph{sparse} conditional probability distribution across a finite vocabulary of tokens, we introduce ``NTP-separability conditions'' that enable reaching the data-entropy lower bound. With this setup, and focusing on linear models with fixed context embeddings, we characterize the optimization bias of gradient descent (GD): Within the data subspace defined by the sparsity patterns of distinct contexts, GD selects parameters that equate the logits' differences of in-support tokens to their log-odds. In the orthogonal subspace, the GD parameters diverge in norm and select the direction that maximizes a margin specific to NTP. These findings extend previous research on implicit bias in one-hot classification to the NTP setting, highlighting key differences and prompting further research into the optimization and generalization properties of NTP, irrespective of the specific architecture used to generate the context embeddings.", "keywords": "entropy;gradient descent;SVM;next-token prediction;linear models;separability;language models;word embeddings", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Christos Thrampoulidis", "authorids": "~Christos_Thrampoulidis1", "gender": "", "homepage": "https://sites.google.com/view/cthrampo/home", "dblp": "127/6532", "google_scholar": "", "orcid": "", "linkedin": "", "or_profile": "~Christos_Thrampoulidis1", "aff": "University of British Columbia", "aff_domain": "ubc.ca", "position": "Assistant Professor", "bibtex": "@inproceedings{\nthrampoulidis2024implicit,\ntitle={Implicit Optimization Bias of Next-token Prediction in Linear Models},\nauthor={Christos Thrampoulidis},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=xSziO6gQgG}\n}", "github": "", "reviewers": "k4Ko;YBdf;kgdu;6D5d", "pdf_size": 2133027, "rating": "5;5;7;7", "confidence": "3;4;3;2", "soundness": "3;2;3;3", "novelty": "2;2;3;3", "presentation": "2;2;3;3", "wc_summary": "173;59;80;37", "wc_strengths": "45;20;76;34", "wc_weaknesses": "161;29;105;37", "wc_questions": "2;151;9;6", "wc_limitations": "2;35;9;1", "wc_review": "383;294;279;115", "wc_reply_reviewers": "0;116;91;44", "wc_reply_authors": "29;547;22;71", "reply_reviewers": "0;2;1;1", "reply_authors": "2;4;2;3", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 87.25, 51.78983973715308 ], "wc_strengths_avg": [ 43.75, 20.620075169601105 ], "wc_weaknesses_avg": [ 83.0, 53.85164807134504 ], "wc_questions_avg": [ 42.0, 62.98015560476173 ], "wc_limitations_avg": [ 11.75, 13.77270852083932 ], "wc_review_avg": [ 267.75, 96.73514097782666 ], "wc_reply_reviewers_avg": [ 62.75, 44.5049154588569 ], "wc_reply_authors_avg": [ 167.25, 220.04814814035586 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": -0.7071067811865475, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3772673923960238886&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "ubc.ca", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "University of British Columbia", "aff_unique_dep": "", "aff_unique_url": "https://www.ubc.ca", "aff_unique_abbr": "UBC", "aff_country_unique_index": "0", "aff_country_unique": "Canada" }, { "title": "A Benchmark Suite for Evaluating Neural Mutual Information Estimators on Unstructured Datasets", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97443", "id": "xT5pmUju8W", "proceeding": "", "pdf": "https://openreview.net/pdf?id=xT5pmUju8W", "openreview": "https://openreview.net/forum?id=xT5pmUju8W", "poster": "/media/PosterPDFs/NeurIPS%202024/97443.png?t=1733068260.4050093", "project": "", "author_site": "Kyungeun Lee, Wonjong Rhee", "tldr": "", "abstract": "Mutual Information (MI) is a fundamental metric for quantifying dependency between two random variables. When we can access only the samples, but not the underlying distribution functions, we can evaluate MI using sample-based estimators. Assessment of such MI estimators, however, has almost always relied on analytical datasets including Gaussian multivariates. Such datasets allow analytical calculations of the true MI values, but they are limited in that they do not reflect the complexities of real-world datasets. This study introduces a comprehensive benchmark suite for evaluating neural MI estimators on unstructured datasets, specifically focusing on images and texts. By leveraging same-class sampling for positive pairing and introducing a binary symmetric channel trick, we show that we can accurately manipulate true MI values of real-world datasets. Using the benchmark suite, we investigate seven challenging scenarios, shedding light on the reliability of neural MI estimators for unstructured datasets.", "keywords": "Mutual information; Mutual information estimation", "primary_area": "", "supplementary_material": "", "author": "Kyungeun Lee;Wonjong Rhee", "authorids": "~Kyungeun_Lee1;~Wonjong_Rhee1", "gender": "F;", "homepage": "https://sites.google.com/view/cvkyungeunlee/;http://drl.snu.ac.kr", "dblp": "230/3844;37/711", "google_scholar": "ASy-_MEAAAAJ;https://scholar.google.co.kr/citations?user=htFuYWsAAAAJ", "orcid": "0000-0002-1674-7147;0000-0002-2590-8774", "linkedin": ";wonjong/", "or_profile": "~Kyungeun_Lee1;~Wonjong_Rhee1", "aff": "LG AI Research;Seoul National University", "aff_domain": "lgresearch.ai;snu.ac.kr", "position": "Researcher;Full Professor", "bibtex": "@inproceedings{\nlee2024a,\ntitle={A Benchmark Suite for Evaluating Neural Mutual Information Estimators on Unstructured Datasets},\nauthor={Kyungeun Lee and Wonjong Rhee},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=xT5pmUju8W}\n}", "github": "", "reviewers": "GBdp;7BAF;AxEX", "pdf_size": 5973872, "rating": "6;7;7", "confidence": "4;3;3", "wc_summary_and_contributions": "124;56;192", "wc_strengths": "82;82;73", "wc_improvement": "204;76;101", "wc_limitations": "11;22;26", "wc_correctness": "38;12;10", "wc_clarity": "14;9;14", "wc_relation_to_prior_work": "24;15;1", "wc_documentation": "26;15;1", "wc_additional_feedback": "1;1;1", "wc_review": "524;288;419", "wc_reply_reviewers": "70;12;11", "wc_reply_authors": "44;9;11", "reply_reviewers": "1;1;1", "reply_authors": "4;2;2", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 124.0, 55.52176750308537 ], "wc_strengths_avg": [ 79.0, 4.242640687119285 ], "wc_improvement_avg": [ 127.0, 55.395547354157145 ], "wc_limitations_avg": [ 19.666666666666668, 6.342099196813483 ], "wc_correctness_avg": [ 20.0, 12.754084313139327 ], "wc_clarity_avg": [ 12.333333333333334, 2.357022603955158 ], "wc_relation_to_prior_work_avg": [ 13.333333333333334, 9.463379711052259 ], "wc_documentation_avg": [ 14.0, 10.23067283548187 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 410.3333333333333, 96.54129801397028 ], "wc_reply_reviewers_avg": [ 31.0, 27.58018612458347 ], "wc_reply_authors_avg": [ 21.333333333333332, 16.048537489614297 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.6666666666666665, 0.9428090415820634 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.9999999999999997, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9805497901108052030&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 3, "email": "lgresearch.ai;snu.ac.kr", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "LG;Seoul National University", "aff_unique_dep": "LG AI Research;", "aff_unique_url": "https://www.lgaires.com;https://www.snu.ac.kr", "aff_unique_abbr": "LG AI;SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "title": "ReVideo: Remake a Video with Motion and Content Control", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93082", "id": "xUjBZR6b1T", "proceeding": "", "pdf": "https://openreview.net/pdf?id=xUjBZR6b1T", "openreview": "https://openreview.net/forum?id=xUjBZR6b1T", "poster": "/media/PosterPDFs/NeurIPS%202024/93082.png?t=1733491513.9521909", "project": "", "author_site": "Chong Mou, Mingdeng Cao, Xintao Wang, Zhaoyang Zhang, Ying Shan, Jian Zhang", "tldr": "", "abstract": "Despite significant advancements in video generation and editing using diffusion models, achieving accurate and localized video editing remains a substantial challenge. Additionally, most existing video editing methods primarily focus on altering visual content, with limited research dedicated to motion editing. In this paper, we present a novel attempt to Remake a Video (ReVideo) which stands out from existing methods by allowing precise video editing in specific areas through the specification of both content and motion. Content editing is facilitated by modifying the first frame, while the trajectory-based motion control offers an intuitive user interaction experience. ReVideo addresses a new task involving the coupling and training imbalance between content and motion control. To tackle this, we develop a three-stage training strategy that progressively decouples these two aspects from coarse to fine. Furthermore, we propose a spatiotemporal adaptive fusion module to integrate content and motion control across various sampling steps and spatial locations. Extensive experiments demonstrate that our ReVideo has promising performance on several accurate video editing applications, i.e., (1) locally changing video content while keeping the motion constant, (2) keeping content unchanged and customizing new motion trajectories, (3) modifying both content and motion trajectories. Our method can also seamlessly extend these applications to multi-area editing without specific training, demonstrating its flexibility and robustness.", "keywords": "Diffusion model;Video editing", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/b5aa2cd28380b8f65bb6360652f7e186ce530b04.zip", "author": "Chong Mou;Mingdeng Cao;Xintao Wang;Zhaoyang Zhang;Ying Shan;Jian Zhang", "authorids": "~Chong_Mou1;~Mingdeng_Cao1;~Xintao_Wang1;~Zhaoyang_Zhang1;~Ying_Shan2;~Jian_Zhang22", "gender": "M;M;;M;M;M", "homepage": ";https://github.com/ljzycmd;;https://zzyfd.github.io/#/;;http://jianzhang.tech/", "dblp": "276/3204.html;290/8525;;;68/5910;07/314-18", "google_scholar": "https://scholar.google.com.hk/citations?user=SYQoDk0AAAAJ;EcS0L5sAAAAJ;;Pf6o7uAAAAAJ;4oXBp9UAAAAJ;7brFI_4AAAAJ", "orcid": ";;;;0000-0001-7673-8325;0000-0001-5486-3125", "linkedin": ";;;;YingShanProfile/;", "or_profile": "~Chong_Mou1;~Mingdeng_Cao1;~Xintao_Wang1;~Zhaoyang_Zhang1;~Ying_Shan2;~Jian_Zhang22", "aff": "Peking University;The University of Tokyo ;;The Chinese University of Hong Kong;Tencent PCG ARC Lab;Peking University", "aff_domain": "pku.edu.cn;u-tokyo.ac.jp;;cuhk.edu.hk;arc.tencent.com;pku.edu.cn", "position": "PhD student;PhD student;;PhD student;Director;Assistant Professor", "bibtex": "@inproceedings{\nmou2024revideo,\ntitle={ReVideo: Remake a Video with Motion and Content Control},\nauthor={Chong Mou and Mingdeng Cao and Xintao Wang and Zhaoyang Zhang and Ying Shan and Jian Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=xUjBZR6b1T}\n}", "github": "", "reviewers": "ZSh9;F4MH;YHcN", "pdf_size": 0, "rating": "5;6;7", "confidence": "3;3;5", "soundness": "2;3;3", "novelty": "2;3;3", "presentation": "2;3;3", "wc_summary": "67;60;65", "wc_strengths": "76;165;66", "wc_weaknesses": "129;954;312", "wc_questions": "29;6;60", "wc_limitations": "4;6;3", "wc_review": "305;1191;506", "wc_reply_reviewers": "721;166;40", "wc_reply_authors": "643;26;26", "reply_reviewers": "4;1;1", "reply_authors": "6;2;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 64.0, 2.943920288775949 ], "wc_strengths_avg": [ 102.33333333333333, 44.4996878890428 ], "wc_weaknesses_avg": [ 465.0, 353.75415191909764 ], "wc_questions_avg": [ 31.666666666666668, 22.125902367034783 ], "wc_limitations_avg": [ 4.333333333333333, 1.247219128924647 ], "wc_review_avg": [ 667.3333333333334, 379.27152396256815 ], "wc_reply_reviewers_avg": [ 309.0, 295.83441314356924 ], "wc_reply_authors_avg": [ 231.66666666666666, 290.8565893280666 ], "reply_reviewers_avg": [ 2.0, 1.4142135623730951 ], "reply_authors_avg": [ 3.3333333333333335, 1.8856180831641267 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.8660254037844387, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11579581042511335674&as_sdt=20000005&sciodt=0,21&hl=en", "gs_version_total": 4, "email": "pku.edu.cn;u-tokyo.ac.jp;;cuhk.edu.hk;arc.tencent.com;pku.edu.cn", "author_num": 6, "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "Peking University;University of Tokyo;Chinese University of Hong Kong;Tencent", "aff_unique_dep": ";;;PCG ARC Lab", "aff_unique_url": "http://www.pku.edu.cn;https://www.u-tokyo.ac.jp;https://www.cuhk.edu.hk;https://www.tencent.com", "aff_unique_abbr": "Peking U;UTokyo;CUHK;Tencent", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "China;Japan" }, { "title": "Interpreting Learned Feedback Patterns in Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93081", "id": "xUoNgR1Byy", "proceeding": "", "pdf": "https://openreview.net/pdf?id=xUoNgR1Byy", "openreview": "https://openreview.net/forum?id=xUoNgR1Byy", "poster": "/media/PosterPDFs/NeurIPS%202024/93081.png?t=1733191984.3715093", "project": "", "author_site": "Luke Marks, Amir Abdullah, Clement Neo, Rauno Arike, David Krueger, Philip Torr, Fazl Barez", "tldr": "", "abstract": "Reinforcement learning from human feedback (RLHF) is widely used to train large language models (LLMs). However, it is unclear whether LLMs accurately learn the underlying preferences in human feedback data. We coin the term **Learned Feedback Pattern** (LFP) for patterns in an LLM's activations learned during RLHF that improve its performance on the fine-tuning task. We hypothesize that LLMs with LFPs accurately aligned to the fine-tuning feedback exhibit consistent activation patterns for outputs that would have received similar feedback during RLHF. To test this, we train probes to estimate the feedback signal implicit in the activations of a fine-tuned LLM. We then compare these estimates to the true feedback, measuring how accurate the LFPs are to the fine-tuning feedback. Our probes are trained on a condensed, sparse and interpretable representation of LLM activations, making it easier to correlate features of the input with our probe's predictions. We validate our probes by comparing the neural features they correlate with positive feedback inputs against the features GPT-4 describes and classifies as related to LFPs. Understanding LFPs can help minimize discrepancies between LLM behavior and training objectives, which is essential for the **safety** and **alignment** of LLMs.", "keywords": "Interpretability;Reward Models;Safety", "primary_area": "interpretability_and_explainability", "supplementary_material": "/attachment/572c88aa15b170dd8077543010aef15a77d25918.zip", "author": "Luke Marks;Amir Abdullah;Clement Neo;Rauno Arike;David Krueger;Philip Torr;Fazl Barez", "authorids": "~Luke_Marks2;~Amir_Abdullah1;~Clement_Neo1;~Rauno_Arike1;~David_Krueger1;~Philip_Torr1;~Fazl_Barez1", "gender": "M;M;;M;M;;", "homepage": "https://lukemarks.bot/;;https://clementneo.com;;https://mila.umontreal.ca/en/person/david-scott-krueger/;http://www.robots.ox.ac.uk/~tvg/;", "dblp": "358/8955;358/8964;367/9292;;142/2741.html;;", "google_scholar": "xoQcqAQAAAAJ;jPEbq5wAAAAJ;Y2-g_2cAAAAJ;gJSs1EwAAAAJ;https://scholar.google.ca/citations?user=5Uz70IoAAAAJ;;", "orcid": ";;;;;;", "linkedin": ";amirali-abdullah-23273314/;;rauno-arike/;;;", "or_profile": "~Luke_Marks2;~Amir_Abdullah1;~Clement_Neo1;~Rauno_Arike1;~David_Krueger1;~Philip_Torr1;~Fazl_Barez1", "aff": "University of Oxford;Cynch AI;Nanyang Technological University;Delft University of Technology;University of Cambridge;University of Oxford;", "aff_domain": "oxford.ac.uk;cynch.ai;ntu.edu.sg;tudelft.nl;cam.ac.uk;ox.ac.uk;", "position": "Intern;Applied Scientist;Undergrad student;Undergrad student;Assistant Professor;Full Professor;", "bibtex": "@inproceedings{\nmarks2024interpreting,\ntitle={Interpreting Learned Feedback Patterns in Large Language Models},\nauthor={Luke Marks and Amir Abdullah and Clement Neo and Rauno Arike and David Krueger and Philip Torr and Fazl Barez},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=xUoNgR1Byy}\n}", "github": "", "reviewers": "dzK7;d8nm;zoqc;81zn", "pdf_size": 1583673, "rating": "4;5;6;6", "confidence": "3;1;4;4", "soundness": "1;2;3;3", "novelty": "2;2;2;3", "presentation": "3;2;4;3", "wc_summary": "113;274;43;78", "wc_strengths": "89;15;32;92", "wc_weaknesses": "500;137;90;204", "wc_questions": "3;131;47;113", "wc_limitations": "73;12;1;1", "wc_review": "778;569;213;488", "wc_reply_reviewers": "403;0;32;62", "wc_reply_authors": "454;0;0;347", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;1;2", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 3.0, 1.224744871391589 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 127.0, 88.40531658220561 ], "wc_strengths_avg": [ 57.0, 34.05143168796284 ], "wc_weaknesses_avg": [ 232.75, 159.5264476505385 ], "wc_questions_avg": [ 73.5, 51.32981589680602 ], "wc_limitations_avg": [ 21.75, 29.92803869283786 ], "wc_review_avg": [ 512.0, 202.47345505028554 ], "wc_reply_reviewers_avg": [ 124.25, 162.42286630890368 ], "wc_reply_authors_avg": [ 200.25, 203.7920202068766 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.49236596391733084, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:nqmstri4FeUJ:scholar.google.com/&scioq=Interpreting+Learned+Feedback+Patterns+in+Large+Language+Models&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "oxford.ac.uk;cynch.ai;ntu.edu.sg;tudelft.nl;cam.ac.uk;ox.ac.uk;", "author_num": 7, "aff_unique_index": "0;1;2;3;4;0", "aff_unique_norm": "University of Oxford;Cynch AI;Nanyang Technological University;Delft University of Technology;University of Cambridge", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.ox.ac.uk;https://www.cynch.ai;https://www.ntu.edu.sg;https://www.tudelft.nl;https://www.cam.ac.uk", "aff_unique_abbr": "Oxford;Cynch AI;NTU;TU Delft;Cambridge", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;1;2;3;0;0", "aff_country_unique": "United Kingdom;United States;Singapore;Netherlands" }, { "title": "pFedClub: Controllable Heterogeneous Model Aggregation for Personalized Federated Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93080", "id": "xW6ga9i4eA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=xW6ga9i4eA", "openreview": "https://openreview.net/forum?id=xW6ga9i4eA", "poster": "", "project": "", "author_site": "Jiaqi Wang, Qi Li, Lingjuan Lyu, Fenglong Ma", "tldr": "", "abstract": "Federated learning, a pioneering paradigm, enables collaborative model training without exposing users\u2019 data to central servers. Most existing federated learning systems necessitate uniform model structures across all clients, restricting their practicality. Several methods have emerged to aggregate diverse client models; however, they either lack the ability of personalization, raise privacy and security concerns, need prior knowledge, or ignore the capability and functionality of personalized models. In this paper, we present an innovative approach, named pFedClub, which addresses these challenges. pFedClub introduces personalized federated learning through the substitution of controllable neural network blocks/layers. Initially, pFedClub dissects heterogeneous client models into blocks and organizes them into functional groups on the server. Utilizing the designed CMSR (Controllable Model Searching and Reproduction) algorithm, pFedClub generates a range of personalized candidate models for each client. A model matching technique is then applied to select the optimal personalized model, serving as a teacher model to guide each client\u2019s training process. We conducted extensive experiments across three datasets, examining both IID and non-IID settings. The results demonstrate that pFedClub outperforms baseline approaches, achieving state-of-the-art performance. Moreover, our model insight analysis reveals that pFedClub generates personalized models of reasonable size in a controllable manner, significantly reducing computational costs.", "keywords": "Heterogenous federated learning", "primary_area": "infrastructure", "supplementary_material": "/attachment/8e5cf8e938968efb4d4463104e8f5c5c124fa92c.zip", "author": "Jiaqi Wang;Qi Li;Lingjuan Lyu;Fenglong Ma", "authorids": "~Jiaqi_Wang4;~Qi_Li14;~Lingjuan_Lyu1;~Fenglong_Ma1", "gender": ";F;F;M", "homepage": ";https://sites.google.com/iastate.edu/qili/;https://sites.google.com/view/lingjuan-lyu;https://fenglong-ma.github.io/", "dblp": ";181/2688-12;178/9876;85/10856", "google_scholar": ";Gvld0foAAAAJ;;DLJIxNMAAAAJ", "orcid": ";0000-0002-3136-2157;;0000-0002-4999-0303", "linkedin": ";;;fenglong-ma-69805832/", "or_profile": "~Jiaqi_Wang4;~Qi_Li14;~Lingjuan_Lyu1;~Fenglong_Ma1", "aff": ";Iowa State University;Sony;Pennsylvania State University", "aff_domain": ";iastate.edu;sony.com;psu.edu", "position": ";Assistant Professor;scientist;Assistant Professor", "bibtex": "@inproceedings{\nwang2024pfedclub,\ntitle={pFedClub: Controllable Heterogeneous Model Aggregation for Personalized Federated Learning},\nauthor={Jiaqi Wang and Qi Li and Lingjuan Lyu and Fenglong Ma},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=xW6ga9i4eA}\n}", "github": "", "reviewers": "AXhq;WRnm;PfmG;5tH7", "pdf_size": 1474020, "rating": "5;7;7;8", "confidence": "4;5;5;4", "soundness": "2;4;3;3", "novelty": "2;3;3;3", "presentation": "2;3;4;3", "wc_summary": "58;88;58;107", "wc_strengths": "26;104;105;54", "wc_weaknesses": "198;84;100;92", "wc_questions": "202;5;46;2", "wc_limitations": "1;29;45;2", "wc_review": "485;310;354;257", "wc_reply_reviewers": "58;15;21;33", "wc_reply_authors": "94;0;15;18", "reply_reviewers": "1;1;1;1", "reply_authors": "3;1;2;2", "rating_avg": [ 6.75, 1.0897247358851685 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 77.75, 20.8611480987984 ], "wc_strengths_avg": [ 72.25, 33.737034546622496 ], "wc_weaknesses_avg": [ 118.5, 46.24662149822406 ], "wc_questions_avg": [ 63.75, 81.68958011888664 ], "wc_limitations_avg": [ 19.25, 18.632968094214082 ], "wc_review_avg": [ 351.5, 84.38157381798469 ], "wc_reply_reviewers_avg": [ 31.75, 16.48294573187693 ], "wc_reply_authors_avg": [ 31.75, 36.5812451947716 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.2294157338705618, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7920735482956940319&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": ";iastate.edu;sony.com;psu.edu", "author_num": 4, "aff_unique_index": "0;1;2", "aff_unique_norm": "Iowa State University;Sony Corporation;Pennsylvania State University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.iastate.edu;https://www.sony.com;https://www.psu.edu", "aff_unique_abbr": "ISU;Sony;PSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;Japan" }, { "title": "SelfCodeAlign: Self-Alignment for Code Generation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93079", "id": "xXRnUU7xTL", "proceeding": "", "pdf": "https://openreview.net/pdf?id=xXRnUU7xTL", "openreview": "https://openreview.net/forum?id=xXRnUU7xTL", "poster": "", "project": "", "author_site": "Yuxiang Wei, Federico Cassano, Jiawei Liu, Yifeng Ding, Naman Jain, Zachary Mueller, Harm de Vries, Leandro Von Werra, Arjun Guha, LINGMING ZHANG", "tldr": "", "abstract": "Instruction tuning is a supervised fine-tuning approach that significantly improves the ability of large language models (LLMs) to follow human instructions. For programming tasks, most models are finetuned with costly human-annotated instruction-response pairs or those generated by large, proprietary LLMs, which may not be permitted. We propose SelfCodeAlign, the first fully transparent and permissive pipeline for self-aligning code LLMs without extensive human annotations or distillation. SelfCodeAlign employs the same base model for inference throughout the data generation process. It first extracts diverse coding concepts from high-quality seed snippets to generate new tasks. It then samples multiple responses per task, pairs each with test cases, and validates them in a sandbox environment. Finally, passing examples are selected for instruction tuning. In our primary experiments, we use SelfCodeAlign with CodeQwen1.5-7B to generate a dataset of 74k instruction-response pairs. Finetuning on this dataset leads to a model that achieves a 67.1 pass@1 on HumanEval+, surpassing CodeLlama-70B-Instruct despite being ten times smaller. Across all benchmarks, this finetuned model consistently outperforms the original version trained with OctoPack, the previous state-of-the-art method for instruction tuning without human annotations or distillation. Additionally, we show that SelfCodeAlign is effective across LLMs of various sizes, from 3B to 33B, and that the base models can benefit more from alignment with their own data distribution. We further validate each component\u2019s effectiveness in our pipeline, showing that SelfCodeAlign outperforms both direct distillation from GPT-4o and leading GPT-3.5-based distillation methods, such as OSS-Instruct and Evol-Instruct. SelfCodeAlign has also led to the creation of StarCoder2-Instruct, the first fully transparent, permissively licensed, and self-aligned code LLM that achieves state-of-the-art coding performance. Overall, SelfCodeAlign shows for the first time that a strong instruction-tuned code LLM can result from self-alignment rather than distillation.", "keywords": "Large language models;Code generation;Instruction tuning;Self-Alignment", "primary_area": "generative_models", "supplementary_material": "/attachment/bcec3bbed475d368a97007019676fbf3d48cb111.zip", "author": "Yuxiang Wei;Federico Cassano;Jiawei Liu;Yifeng Ding;Naman Jain;Zachary Mueller;Harm de Vries;Leandro Von Werra;Arjun Guha;LINGMING ZHANG", "authorids": "~Yuxiang_Wei2;~Federico_Cassano1;~Jiawei_Liu11;~Yifeng_Ding2;~Naman_Jain2;~Zachary_Mueller1;~Harm_de_Vries1;~Leandro_Von_Werra1;~Arjun_Guha3;~LINGMING_ZHANG2", "gender": "M;;M;M;M;M;M;M;M;M", "homepage": "https://yuxiang.cs.illinois.edu;https://federico.codes;https://jiawei-site.github.io/;https://yifeng-ding.com/;https://naman-ntc.github.io/;https://muellerzr.github.io;;https://github.com/lvwerra;https://khoury.northeastern.edu/~arjunguha;http://lingming.cs.illinois.edu/", "dblp": "301/1212;326/8944;12/8228-4;;;;;223/1855;15/2016;27/7057-1", "google_scholar": "Clrvw6kAAAAJ;n8tK15oAAAAJ;Vw6el1AAAAAJ;ipXUDHgAAAAJ;6oqV3v8AAAAJ;;LWrdpCsAAAAJ;https://scholar.google.com/citations?hl=en;yMU0f9EAAAAJ;zzbWQE4AAAAJ", "orcid": "0000-0002-4391-3753;0000-0002-9318-7454;0000-0001-7122-8625;;;;;;;", "linkedin": "yuxiang-wei-a94a63205/;;jiawei-liu-uiuc/;;;;;lvwerra/;;", "or_profile": "~Yuxiang_Wei2;~Federico_Cassano1;~Jiawei_Liu11;~Yifeng_Ding2;~Naman_Jain2;~Zachary_Mueller1;~Harm_de_Vries1;~Leandro_Von_Werra1;~Arjun_Guha3;~LINGMING_ZHANG2", "aff": "Snowflake;Northeastern University;Amazon;Amazon;University of California, Berkeley;Hugging Face;ServiceNow Research;Hugging Face;Roblox Research;University of Illinois Urbana-Champaign", "aff_domain": "snowflake.com;neu.edu;amazon.com;amazon.com;berkeley.edu;huggingface.co;elementai.com;hf.co;roblox.com;cs.illinois.edu", "position": "Intern;Undergrad student;Intern;Intern;PhD student;Researcher;Researcher;Researcher;Visiting Professor;Associate Professor", "bibtex": "@inproceedings{\nwei2024selfcodealign,\ntitle={SelfCodeAlign: Self-Alignment for Code Generation},\nauthor={Yuxiang Wei and Federico Cassano and Jiawei Liu and Yifeng Ding and Naman Jain and Zachary Mueller and Harm de Vries and Leandro Von Werra and Arjun Guha and LINGMING ZHANG},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=xXRnUU7xTL}\n}", "github": "", "reviewers": "qGfX;pw5h;4fJs;NEpV", "pdf_size": 1353923, "rating": "5;6;7;7", "confidence": "3;2;3;3", "soundness": "3;2;3;3", "novelty": "2;3;3;3", "presentation": "3;2;3;3", "wc_summary": "57;38;117;74", "wc_strengths": "90;18;157;59", "wc_weaknesses": "113;24;165;72", "wc_questions": "4;74;116;31", "wc_limitations": "4;4;58;1", "wc_review": "268;158;613;237", "wc_reply_reviewers": "23;14;0;0", "wc_reply_authors": "29;29;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "2;2;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 71.5, 29.19332115399 ], "wc_strengths_avg": [ 81.0, 50.76908508137605 ], "wc_weaknesses_avg": [ 93.5, 51.92542729723078 ], "wc_questions_avg": [ 56.25, 42.581539427315214 ], "wc_limitations_avg": [ 16.75, 23.84716964337697 ], "wc_review_avg": [ 319.0, 174.41473561600236 ], "wc_reply_reviewers_avg": [ 9.25, 9.781998773256925 ], "wc_reply_authors_avg": [ 14.5, 14.5 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3959590849963983054&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "snowflake.com;neu.edu;amazon.com;amazon.com;berkeley.edu;huggingface.co;elementai.com;hf.co;roblox.com;cs.illinois.edu", "author_num": 10, "aff_unique_index": "0;1;2;2;3;4;5;4;6;7", "aff_unique_norm": "Snowflake Inc.;Northeastern University;Amazon;University of California, Berkeley;Hugging Face;ServiceNow;Roblox Corporation;University of Illinois Urbana-Champaign", "aff_unique_dep": ";;Amazon.com, Inc.;;;Research;Research;", "aff_unique_url": "https://www.snowflake.com;https://www.northeastern.edu;https://www.amazon.com;https://www.berkeley.edu;https://huggingface.co;https://www.servicenow.com;https://www.roblox.com;https://illinois.edu", "aff_unique_abbr": "Snowflake;NEU;Amazon;UC Berkeley;Hugging Face;ServiceNow;Roblox;UIUC", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Berkeley;Urbana-Champaign", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Causal vs. Anticausal merging of predictors", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93078", "id": "xZKXGvLB0c", "proceeding": "", "pdf": "https://openreview.net/pdf?id=xZKXGvLB0c", "openreview": "https://openreview.net/forum?id=xZKXGvLB0c", "poster": "", "project": "", "author_site": "Sergio Garrido Mejia, Patrick Bl\u00f6baum, Bernhard Sch\u00f6lkopf, Dominik Janzing", "tldr": "", "abstract": "We study the differences arising from merging predictors in the causal and anticausal directions using the same data.\nIn particular we study the asymmetries that arise in a simple model where we merge the predictors using one binary variable as target and two continuous variables as predictors.\nWe use Causal Maximum Entropy (CMAXENT) as inductive bias to merge the predictors, however, we expect similar differences to hold also when we use other merging methods that take into account asymmetries between cause and effect.\nWe show that if we observe all bivariate distributions, the CMAXENT solution reduces to a logistic regression in the causal direction and Linear Discriminant Analysis (LDA) in the anticausal direction.\nFurthermore, we study how the decision boundaries of these two solutions differ whenever we observe only some of the bivariate distributions implications for Out-Of-Variable (OOV) generalisation.", "keywords": "Causality;Merging of predictors;Causal vs Anticausal;Maximum Entropy", "primary_area": "causal_inference", "supplementary_material": "", "author": "Sergio Hernan Garrido Mejia;Patrick Bl\u00f6baum;Bernhard Sch\u00f6lkopf;Dominik Janzing", "authorids": "~Sergio_Hernan_Garrido_Mejia1;~Patrick_Bl\u00f6baum1;~Bernhard_Sch\u00f6lkopf1;~Dominik_Janzing3", "gender": "M;;;", "homepage": ";;;", "dblp": "334/1205;;;", "google_scholar": "GQl2G48AAAAJ;;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Sergio_Hernan_Garrido_Mejia1;~Patrick_Bl\u00f6baum1;~Bernhard_Sch\u00f6lkopf1;~Dominik_Janzing3", "aff": "Max Planck Institute for Intelligent Systems, Max-Planck Institute;;;", "aff_domain": "tuebingen.mpg.de;;;", "position": "PhD student;;;", "bibtex": "@inproceedings{\nmejia2024causal,\ntitle={Causal vs. Anticausal merging of predictors},\nauthor={Sergio Hernan Garrido Mejia and Patrick Bl{\\\"o}baum and Bernhard Sch{\\\"o}lkopf and Dominik Janzing},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=xZKXGvLB0c}\n}", "github": "", "reviewers": "vvvY;546c;29XC;Wx67", "pdf_size": 439581, "rating": "4;6;6;6", "confidence": "4;4;3;4", "soundness": "3;4;3;3", "novelty": "2;4;3;3", "presentation": "2;3;4;3", "wc_summary": "76;49;77;17", "wc_strengths": "70;16;53;35", "wc_weaknesses": "248;240;118;136", "wc_questions": "69;93;50;22", "wc_limitations": "31;56;7;15", "wc_review": "494;454;305;225", "wc_reply_reviewers": "0;96;25;74", "wc_reply_authors": "50;97;0;0", "reply_reviewers": "0;2;1;1", "reply_authors": "2;2;1;1", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 54.75, 24.519125188309634 ], "wc_strengths_avg": [ 43.5, 20.130822139197395 ], "wc_weaknesses_avg": [ 185.5, 58.9130715546219 ], "wc_questions_avg": [ 58.5, 26.004807247891687 ], "wc_limitations_avg": [ 27.25, 18.713297411199342 ], "wc_review_avg": [ 369.5, 109.17989741706117 ], "wc_reply_reviewers_avg": [ 48.75, 38.114137796885814 ], "wc_reply_authors_avg": [ 36.75, 40.332214171800686 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:RMSwlGgvKSEJ:scholar.google.com/&scioq=Causal+vs.+Anticausal+merging+of+predictors&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": "tuebingen.mpg.de;;;", "author_num": 4, "aff_unique_index": "0", "aff_unique_norm": "Max Planck Institute for Intelligent Systems", "aff_unique_dep": "Intelligent Systems", "aff_unique_url": "https://www.mpi-is.mpg.de", "aff_unique_abbr": "MPI-IS", "aff_country_unique_index": "0", "aff_country_unique": "Germany" }, { "title": "Dynamic 3D Gaussian Fields for Urban Areas", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93077", "id": "xZxXNhndXU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=xZxXNhndXU", "openreview": "https://openreview.net/forum?id=xZxXNhndXU", "poster": "/media/PosterPDFs/NeurIPS%202024/93077.png?t=1733139460.9454374", "project": "", "author_site": "Tobias Fischer, Jonas Kulhanek, Samuel Rota Bul\u00f2, Lorenzo Porzi, Marc Pollefeys, Peter Kontschieder", "tldr": "", "abstract": "We present an efficient neural 3D scene representation for novel-view synthesis (NVS) in large-scale, dynamic urban areas. Existing works are not well suited for applications like mixed-reality or closed-loop simulation due to their limited visual quality and non-interactive rendering speeds. Recently, rasterization-based approaches have achieved high-quality NVS at impressive speeds. However, these methods are limited to small-scale, homogeneous data, i.e. they cannot handle severe appearance and geometry variations due to weather, season, and lighting and do not scale to larger, dynamic areas with thousands of images. We propose 4DGF, a neural scene representation that scales to large-scale dynamic urban areas, handles heterogeneous input data, and substantially improves rendering speeds. We use 3D Gaussians as an efficient geometry scaffold while relying on neural fields as a compact and flexible appearance model. We integrate scene dynamics via a scene graph at global scale while modeling articulated motions on a local level via deformations. This decomposed approach enables flexible scene composition suitable for real-world applications. In experiments, we surpass the state-of-the-art by over 3 dB in PSNR and more than 200x in rendering speed.", "keywords": "Neural Rendering;Gaussian Splatting;Dynamic Urban Areas", "primary_area": "machine_vision", "supplementary_material": "/attachment/6d99e721c19040d980b665c775bf0eb2301cf0af.zip", "author": "Tobias Fischer;Jonas Kulhanek;Samuel Rota Bul\u00f2;Lorenzo Porzi;Marc Pollefeys;Peter Kontschieder", "authorids": "~Tobias_Fischer3;~Jonas_Kulhanek1;~Samuel_Rota_Bul\u00f23;~Lorenzo_Porzi1;~Marc_Pollefeys2;~Peter_Kontschieder1", "gender": "M;M;M;M;;M", "homepage": "https://tobiasfshr.github.io;https://jkulhanek.com;;;;http://www.samuelrotabulo.it", "dblp": "249/9213;247/1194;154/1309;p/MarcPollefeys;93/8066.html;05/4139", "google_scholar": "Jp637I8AAAAJ;YDNzfN4AAAAJ;vW1gaVEAAAAJ;YYH0BjEAAAAJ;CxbDDRMAAAAJ;484sccEAAAAJ", "orcid": "0000-0001-8227-001X;0000-0002-8437-3626;0000-0001-9331-2908;;0000-0002-9809-664X;", "linkedin": ";;lorenzoporzi/;marc-pollefeys-30a7075/;;", "or_profile": "~Tobias_Fischer3;~Jonas_Kulhanek1;~Lorenzo_Porzi1;~Marc_Pollefeys2;~Peter_Kontschieder1;~Samuel_Rota_Bul\u00f21", "aff": "Meta Facebook;Department of Computer Science, ETHZ - ETH Zurich;Meta;Swiss Federal Institute of Technology;Meta Facebook;Meta", "aff_domain": "meta.com;inf.ethz.ch;meta.com;ethz.ch;meta.com;meta.com", "position": "Intern;Intern;Research Engineer;Full Professor;Principal Researcher;Researcher", "bibtex": "@inproceedings{\nfischer2024dynamic,\ntitle={Dynamic 3D Gaussian Fields for Urban Areas},\nauthor={Tobias Fischer and Jonas Kulhanek and Samuel Rota Bul{\\`o} and Lorenzo Porzi and Marc Pollefeys and Peter Kontschieder},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=xZxXNhndXU}\n}", "github": "", "reviewers": "51SN;7pd2;6enk;f8M6", "pdf_size": 43800469, "rating": "5;6;7;8", "confidence": "5;5;5;4", "soundness": "3;3;4;3", "novelty": "2;3;3;3", "presentation": "2;4;4;4", "wc_summary": "63;100;58;81", "wc_strengths": "38;57;97;112", "wc_weaknesses": "314;96;130;105", "wc_questions": "25;33;15;76", "wc_limitations": "11;31;11;8", "wc_review": "451;317;311;382", "wc_reply_reviewers": "18;0;105;26", "wc_reply_authors": "0;0;32;0", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;2;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.8660254037844386 ], "wc_summary_avg": [ 75.5, 16.53027525481654 ], "wc_strengths_avg": [ 76.0, 29.75735203273302 ], "wc_weaknesses_avg": [ 161.25, 89.06563590970426 ], "wc_questions_avg": [ 37.25, 23.263436977368585 ], "wc_limitations_avg": [ 15.25, 9.175374651751284 ], "wc_review_avg": [ 365.25, 56.79953785023255 ], "wc_reply_reviewers_avg": [ 37.25, 40.23291562887283 ], "wc_reply_authors_avg": [ 8.0, 13.856406460551018 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.7745966692414834, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16476360127884240647&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "meta.com;inf.ethz.ch;meta.com;ethz.ch;meta.com;meta.com", "author_num": 6, "aff_unique_index": "0;1;0;2;0;0", "aff_unique_norm": "Meta;ETH Zurich;Swiss Federal Institute of Technology", "aff_unique_dep": "Meta Platforms, Inc.;Department of Computer Science;", "aff_unique_url": "https://meta.com;https://www.ethz.ch;https://www.ethz.ch", "aff_unique_abbr": "Meta;ETHZ;ETH Zurich", "aff_campus_unique_index": "1", "aff_campus_unique": ";Zurich", "aff_country_unique_index": "0;1;0;1;0;0", "aff_country_unique": "United States;Switzerland" }, { "title": "Co-occurrence is not Factual Association in Language Models", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93076", "id": "xabStWAUtr", "proceeding": "", "pdf": "https://openreview.net/pdf?id=xabStWAUtr", "openreview": "https://openreview.net/forum?id=xabStWAUtr", "poster": "", "project": "", "author_site": "Xiao Zhang, Miao Li, Ji Wu", "tldr": "", "abstract": "Pretrained language models can encode a large amount of knowledge and utilize it for various reasoning tasks, yet they can still struggle to learn novel factual knowledge effectively from finetuning on limited textual demonstrations. In this work, we show that the reason for this deficiency is that language models are biased to learn word co-occurrence statistics instead of true factual associations. We identify the differences between two forms of knowledge representation in language models: knowledge in the form of co-occurrence statistics is encoded in the middle layers of the transformer model and does not generalize well to reasoning scenarios beyond simple question answering, while true factual associations are encoded in the lower layers and can be freely utilized in various reasoning tasks. Based on these observations, we propose two strategies to improve the learning of factual associations in language models. We show that training on text with implicit rather than explicit factual associations can force the model to learn factual associations instead of co-occurrence statistics, significantly improving the generalization of newly learned knowledge. We also propose a simple training method to actively forget the learned co-occurrence statistics, which unblocks and enhances the learning of factual associations when training on plain narrative text. On both synthetic and real-world corpora, the two proposed strategies improve the generalization of the knowledge learned during finetuning to reasoning scenarios such as indirect and multi-hop question answering.", "keywords": "language model;knowledge learning;reasoning", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Xiao Zhang;Miao Li;Ji Wu", "authorids": "~Xiao_Zhang9;~Miao_Li10;~Ji_Wu3", "gender": ";M;M", "homepage": ";http://web.ee.tsinghua.edu.cn/limiao/zh_CN/index.htm;http://speech.tsinghua.edu.cn/en/", "dblp": ";;91/4957-2", "google_scholar": "https://scholar.google.com/citations?hl=en;NZkelOcAAAAJ;", "orcid": ";0009-0009-5672-7448;0000-0001-6170-726X", "linkedin": ";;", "or_profile": "~Xiao_Zhang9;~Miao_Li10;~Ji_Wu3", "aff": "Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nzhang2024cooccurrence,\ntitle={Co-occurrence is not Factual Association in Language Models},\nauthor={Xiao Zhang and Miao Li and Ji Wu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=xabStWAUtr}\n}", "github": "", "reviewers": "iMfh;JNNz;hFpF;B3nU", "pdf_size": 818009, "rating": "5;7;7;7", "confidence": "4;4;3;4", "soundness": "3;4;3;3", "novelty": "3;3;3;3", "presentation": "3;4;3;4", "wc_summary": "54;141;109;132", "wc_strengths": "103;131;75;173", "wc_weaknesses": "175;62;77;426", "wc_questions": "2;45;117;141", "wc_limitations": "1;7;14;11", "wc_review": "335;386;392;883", "wc_reply_reviewers": "17;18;17;101", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 109.0, 33.830459648074545 ], "wc_strengths_avg": [ 120.5, 36.204281514760105 ], "wc_weaknesses_avg": [ 185.0, 145.75150084990548 ], "wc_questions_avg": [ 76.25, 55.54896488684555 ], "wc_limitations_avg": [ 8.25, 4.866980583482946 ], "wc_review_avg": [ 499.0, 222.8059693993857 ], "wc_reply_reviewers_avg": [ 38.25, 36.23102951890824 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6019575048070423716&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Beyond Redundancy: Information-aware Unsupervised Multiplex Graph Structure Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93075", "id": "xaqPAkJnAS", "proceeding": "", "pdf": "https://openreview.net/pdf?id=xaqPAkJnAS", "openreview": "https://openreview.net/forum?id=xaqPAkJnAS", "poster": "/media/PosterPDFs/NeurIPS%202024/93075.png?t=1730190130.4067755", "project": "", "author_site": "Zhixiang Shen, Shuo Wang, Zhao Kang", "tldr": "", "abstract": "Unsupervised Multiplex Graph Learning (UMGL) aims to learn node representations on various edge types without manual labeling. However, existing research overlooks a key factor: the reliability of the graph structure. Real-world data often exhibit a complex nature and contain abundant task-irrelevant noise, severely compromising UMGL's performance. Moreover, existing methods primarily rely on contrastive learning to maximize mutual information across different graphs, limiting them to multiplex graph redundant scenarios and failing to capture view-unique task-relevant information. In this paper, we focus on a more realistic and challenging task: to unsupervisedly learn a fused graph from multiple graphs that preserve sufficient task-relevant information while removing task-irrelevant noise. Specifically, our proposed Information-aware Unsupervised Multiplex Graph Fusion framework (InfoMGF) uses graph structure refinement to eliminate irrelevant noise and simultaneously maximizes view-shared and view-unique task-relevant information, thereby tackling the frontier of non-redundant multiplex graph. Theoretical analyses further guarantee the effectiveness of InfoMGF. Comprehensive experiments against various baselines on different downstream tasks demonstrate its superior performance and robustness. Surprisingly, our unsupervised method even beats the sophisticated supervised approaches. The source code and datasets are available at https://github.com/zxlearningdeep/InfoMGF.", "keywords": "Multiplex Graph Learning;Graph Structure Learning;Multi-view Graph Clustering;Self-supervised Graph Learning", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Zhixiang Shen;Shuo Wang;zhao kang", "authorids": "~Zhixiang_Shen1;~Shuo_Wang39;~zhao_kang1", "gender": "M;M;M", "homepage": ";https://github.com/SIRBabbage;https://sites.google.com/site/zhaokanghomepage/", "dblp": "139/9841;;166/1345", "google_scholar": "W_fgHnwAAAAJ;;T_yCaN4AAAAJ", "orcid": "0009-0004-3878-0177;;0000-0003-4103-0954", "linkedin": ";;", "or_profile": "~Zhixiang_Shen1;~Shuo_Wang39;~zhao_kang1", "aff": "University of Electronic Science and Technology of China;University of Electronic Science and Technology of China;University of Electronic Science and Technology of China", "aff_domain": "uestc.edu.cn;uestc.edu.cn;uestc.edu.cn", "position": "Undergrad student;Undergrad student;Associate Professor", "bibtex": "@inproceedings{\nshen2024beyond,\ntitle={Beyond Redundancy: Information-aware Unsupervised Multiplex Graph Structure Learning},\nauthor={Zhixiang Shen and Shuo Wang and zhao kang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=xaqPAkJnAS}\n}", "github": "", "reviewers": "2Joh;gjsJ;ei6T;cF8g", "pdf_size": 4234664, "rating": "6;7;7;7", "confidence": "4;4;5;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "48;61;48;60", "wc_strengths": "95;98;97;102", "wc_weaknesses": "46;68;61;71", "wc_questions": "54;5;2;76", "wc_limitations": "1;6;1;5", "wc_review": "244;238;209;314", "wc_reply_reviewers": "0;0;18;0", "wc_reply_authors": "0;26;26;0", "reply_reviewers": "0;0;1;0", "reply_authors": "1;2;2;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 54.25, 6.2599920127744575 ], "wc_strengths_avg": [ 98.0, 2.5495097567963922 ], "wc_weaknesses_avg": [ 61.5, 9.656603957913983 ], "wc_questions_avg": [ 34.25, 31.736217480979047 ], "wc_limitations_avg": [ 3.25, 2.277608394786075 ], "wc_review_avg": [ 251.25, 38.57055223872222 ], "wc_reply_reviewers_avg": [ 4.5, 7.794228634059948 ], "wc_reply_authors_avg": [ 13.0, 13.0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13637097806482590113&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "uestc.edu.cn;uestc.edu.cn;uestc.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Electronic Science and Technology of China", "aff_unique_dep": "", "aff_unique_url": "https://www.uestc.edu.cn", "aff_unique_abbr": "UESTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Feedback control guides credit assignment in recurrent neural networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93074", "id": "xavWvnJTST", "proceeding": "", "pdf": "https://openreview.net/pdf?id=xavWvnJTST", "openreview": "https://openreview.net/forum?id=xavWvnJTST", "poster": "", "project": "", "author_site": "Klara Kaleb, Barbara Feulner, Juan Gallego, Claudia Clopath", "tldr": "", "abstract": "How do brain circuits learn to generate behaviour?\n While significant strides have been made in understanding learning in artificial neural networks, applying this knowledge to biological networks remains challenging.\n For instance, while backpropagation is known to perform accurate credit assignment of error in artificial neural networks, how a similarly powerful process can be realized within the constraints of biological circuits remains largely unclear.\n One of the major challenges is that the brain's extensive recurrent connectivity requires the propagation of error through both space and time, a problem that is notoriously difficult to solve in vanilla recurrent neural networks.\n Moreover, the extensive feedback connections in the brain are known to influence forward network activity, but the interaction between feedback-driven activity changes and local, synaptic plasticity-based learning is not fully understood.\n Building on our previous work modelling motor learning, this work investigates the mechanistic properties of pre-trained networks with feedback control on a standard motor task.\n We show that feedback control of the ongoing recurrent network dynamics approximates the optimal first-order gradient with respect to the network activities, allowing for rapid, ongoing movement correction.\n Moreover, we show that trial-by-trial adaptation to a persistent perturbation using a local, biologically plausible learning rule that integrates recent activity and error feedback is both more accurate and more efficient with feedback control during learning, due to the decoupling of the recurrent network dynamics and the injection of an adaptive, second-order gradient into the network dynamics.\n Thus, our results suggest that feedback control may guide credit assignment in biological recurrent neural networks, enabling both rapid and efficient learning in the brain.", "keywords": "biologically-plausible learning;RNNs;motor control;feedback control", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "/attachment/921085fdcdf365f9677bbb04b2b50b7c97fd751c.zip", "author": "Klara Kaleb;Barbara Feulner;Juan A. Gallego;Claudia Clopath", "authorids": "~Klara_Kaleb1;~Barbara_Feulner1;~Juan_A._Gallego1;~Claudia_Clopath1", "gender": "F;;;", "homepage": ";;;", "dblp": ";;;37/6388", "google_scholar": "https://scholar.google.co.uk/citations?user=11qgBWAAAAAJ;https://scholar.google.de/citations?user=1GmLl64AAAAJ;;", "orcid": "0000-0003-2091-2114;;;", "linkedin": ";;;", "or_profile": "~Klara_Kaleb1;~Barbara_Feulner1;~Juan_A._Gallego1;~Claudia_Clopath1", "aff": "Imperial College London;Imperial College London ;;Imperial College London", "aff_domain": "imperial.ac.uk;ic.ac;;ic.ac.uk", "position": "PhD student;PhD student;;Full Professor", "bibtex": "@inproceedings{\nkaleb2024feedback,\ntitle={Feedback control guides credit assignment in recurrent neural networks},\nauthor={Klara Kaleb and Barbara Feulner and Juan A. Gallego and Claudia Clopath},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=xavWvnJTST}\n}", "github": "", "reviewers": "GeF2;9rgZ;HTCn;mASo", "pdf_size": 2997160, "rating": "5;6;7;7", "confidence": "2;4;3;4", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "4;4;3;3", "wc_summary": "118;89;80;32", "wc_strengths": "54;56;34;60", "wc_weaknesses": "448;194;703;52", "wc_questions": "109;108;27;35", "wc_limitations": "75;11;78;6", "wc_review": "804;458;922;185", "wc_reply_reviewers": "125;80;183;27", "wc_reply_authors": "0;16;70;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;2;2;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 79.75, 30.93844695520446 ], "wc_strengths_avg": [ 51.0, 10.04987562112089 ], "wc_weaknesses_avg": [ 349.25, 248.6718470193198 ], "wc_questions_avg": [ 69.75, 38.854697270729055 ], "wc_limitations_avg": [ 42.5, 34.0624426605022 ], "wc_review_avg": [ 592.25, 290.45169563973974 ], "wc_reply_reviewers_avg": [ 103.75, 57.41678761477343 ], "wc_reply_authors_avg": [ 21.5, 28.75326068465975 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.6363636363636364, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7015897999154622705&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "email": "imperial.ac.uk;ic.ac;;ic.ac.uk", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Imperial College London", "aff_unique_dep": "", "aff_unique_url": "https://www.imperial.ac.uk", "aff_unique_abbr": "ICL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Customized Multiple Clustering via Multi-Modal Subspace Proxy Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93073", "id": "xbuaSTqAEz", "proceeding": "", "pdf": "https://openreview.net/pdf?id=xbuaSTqAEz", "openreview": "https://openreview.net/forum?id=xbuaSTqAEz", "poster": "/media/PosterPDFs/NeurIPS%202024/93073.png?t=1731450595.8679855", "project": "", "author_site": "Jiawei Yao, Qi Qian, Juhua Hu", "tldr": "", "abstract": "Multiple clustering aims to discover various latent structures of data from different aspects. Deep multiple clustering methods have achieved remarkable performance by exploiting complex patterns and relationships in data. However, existing works struggle to flexibly adapt to diverse user-specific needs in data grouping, which may require manual understanding of each clustering. To address these limitations, we introduce Multi-Sub, a novel end-to-end multiple clustering approach that incorporates a multi-modal subspace proxy learning framework in this work. Utilizing the synergistic capabilities of CLIP and GPT-4, Multi-Sub aligns textual prompts expressing user preferences with their corresponding visual representations. This is achieved by automatically generating proxy words from large language models that act as subspace bases, thus allowing for the customized representation of data in terms specific to the user\u2019s interests. Our method consistently outperforms existing baselines across a broad set of datasets in visual multiple clustering tasks. Our code is available at https://github.com/Alexander-Yao/Multi-Sub.", "keywords": "Multiple Clustering;Multi-modal Model;Large Language Model", "primary_area": "machine_vision", "supplementary_material": "", "author": "Jiawei Yao;Qi Qian;Juhua Hu", "authorids": "~Jiawei_Yao3;~Qi_Qian1;~Juhua_Hu1", "gender": ";;F", "homepage": ";http://qi-qian.com;http://faculty.washington.edu/juhuah/", "dblp": ";05/2084-1;147/2228", "google_scholar": ";Rp_40_gAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;", "linkedin": ";;", "or_profile": "~Jiawei_Yao3;~Qi_Qian1;~Juhua_Hu1", "aff": ";Alibaba Group;University of Washington", "aff_domain": ";alibaba-inc.com;uw.edu", "position": ";Researcher;Assistant Professor", "bibtex": "@inproceedings{\nyao2024customized,\ntitle={Customized Multiple Clustering via Multi-Modal Subspace Proxy Learning},\nauthor={Jiawei Yao and Qi Qian and Juhua Hu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=xbuaSTqAEz}\n}", "github": "", "reviewers": "r2bm;Evc4;HfG7;vBFT", "pdf_size": 0, "rating": "5;5;6;7", "confidence": "4;4;4;5", "soundness": "3;3;2;4", "novelty": "3;3;3;4", "presentation": "3;3;3;4", "wc_summary": "157;58;33;100", "wc_strengths": "104;35;31;124", "wc_weaknesses": "136;161;45;145", "wc_questions": "5;16;36;126", "wc_limitations": "8;1;15;17", "wc_review": "410;271;160;512", "wc_reply_reviewers": "27;0;25;115", "wc_reply_authors": "24;0;16;28", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 87.0, 46.97339672623218 ], "wc_strengths_avg": [ 73.5, 41.13696634415329 ], "wc_weaknesses_avg": [ 121.75, 45.20716204319842 ], "wc_questions_avg": [ 45.75, 47.64648465521879 ], "wc_limitations_avg": [ 10.25, 6.299801584177076 ], "wc_review_avg": [ 338.25, 133.82147622859344 ], "wc_reply_reviewers_avg": [ 41.75, 43.6083420918521 ], "wc_reply_authors_avg": [ 17.0, 10.723805294763608 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14076183635103336146&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": ";alibaba-inc.com;uw.edu", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "Alibaba Group;University of Washington", "aff_unique_dep": ";", "aff_unique_url": "https://www.alibaba.com;https://www.washington.edu", "aff_unique_abbr": "Alibaba;UW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "China;United States" }, { "title": "SocialGPT: Prompting LLMs for Social Relation Reasoning via Greedy Segment Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93072", "id": "xcF2VbyZts", "proceeding": "", "pdf": "https://openreview.net/pdf?id=xcF2VbyZts", "openreview": "https://openreview.net/forum?id=xcF2VbyZts", "poster": "/media/PosterPDFs/NeurIPS%202024/93072.png?t=1731404238.1373", "project": "", "author_site": "Wanhua Li, Zibin Meng, Jiawei Zhou, Donglai Wei, Chuang Gan, Hanspeter Pfister", "tldr": "", "abstract": "Social relation reasoning aims to identify relation categories such as friends, spouses, and colleagues from images. While current methods adopt the paradigm of training a dedicated network end-to-end using labeled image data, they are limited in terms of generalizability and interpretability. To address these issues, we first present a simple yet well-crafted framework named SocialGPT, which combines the perception capability of Vision Foundation Models (VFMs) and the reasoning capability of Large Language Models (LLMs) within a modular framework, providing a strong baseline for social relation recognition. Specifically, we instruct VFMs to translate image content into a textual social story, and then utilize LLMs for text-based reasoning. SocialGPT introduces systematic design principles to adapt VFMs and LLMs separately and bridge their gaps. Without additional model training, it achieves competitive zero-shot results on two databases while offering interpretable answers, as LLMs can generate language-based explanations for the decisions. The manual prompt design process for LLMs at the reasoning phase is tedious and an automated prompt optimization method is desired. As we essentially convert a visual classification task into a generative task of LLMs, automatic prompt optimization encounters a unique long prompt optimization issue. To address this issue, we further propose the Greedy Segment Prompt Optimization (GSPO), which performs a greedy search by utilizing gradient information at the segment level. Experimental results show that GSPO significantly improves performance, and our method also generalizes to different image styles. The code is available at https://github.com/Mengzibin/SocialGPT.", "keywords": "Social Relation Reasoning;Large Language Models;Foundation Models;Prompt Optimization", "primary_area": "machine_vision", "supplementary_material": "", "author": "Wanhua Li;Zibin Meng;Jiawei Zhou;Donglai Wei;Chuang Gan;Hanspeter Pfister", "authorids": "~Wanhua_Li1;~Zibin_Meng2;~Jiawei_Zhou1;~Donglai_Wei1;~Chuang_Gan1;~Hanspeter_Pfister1", "gender": "M;;M;M;M;M", "homepage": "https://li-wanhua.github.io/;;https://joezhouai.com/;https://donglaiw.github.io/;http://people.csail.mit.edu/ganchuang/;https://vcg.seas.harvard.edu", "dblp": "189/8563-1;;126/4991-1;89/10116.html;139/6993;p/HanspeterPfister", "google_scholar": "I03QnrsAAAAJ;;https://scholar.google.com/citations?hl=en;xF2mhDoAAAAJ;PTeSCbIAAAAJ;tvBEoaMAAAAJ", "orcid": ";;0000-0001-5590-6270;0000-0002-2329-5484;;0000-0002-3620-2582", "linkedin": ";;jiawei-zhou/;;;hpfister/", "or_profile": "~Wanhua_Li1;~Zibin_Meng2;~Jiawei_Zhou1;~Donglai_Wei1;~Chuang_Gan1;~Hanspeter_Pfister1", "aff": "Harvard University;;Toyota Technological Institute at Chicago;Boston College;University of Massachusetts at Amherst;Harvard University", "aff_domain": "harvard.edu;;ttic.edu;bc.edu;umass.edu;harvard.edu", "position": "Postdoc;;Assistant Professor;Assistant Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nli2024socialgpt,\ntitle={Social{GPT}: Prompting {LLM}s for Social Relation Reasoning via Greedy Segment Optimization},\nauthor={Wanhua Li and Zibin Meng and Jiawei Zhou and Donglai Wei and Chuang Gan and Hanspeter Pfister},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=xcF2VbyZts}\n}", "github": "", "reviewers": "zGW2;9pM3;RdU3;aaEq", "pdf_size": 4264475, "rating": "4;4;5;7", "confidence": "4;3;3;4", "soundness": "3;2;3;4", "novelty": "2;2;3;3", "presentation": "3;3;3;4", "wc_summary": "41;81;74;133", "wc_strengths": "16;65;54;35", "wc_weaknesses": "240;110;62;138", "wc_questions": "73;78;6;57", "wc_limitations": "6;7;7;14", "wc_review": "376;341;203;377", "wc_reply_reviewers": "0;128;16;0", "wc_reply_authors": "80;420;73;36", "reply_reviewers": "0;1;1;0", "reply_authors": "3;4;3;2", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 82.25, 32.96494350063413 ], "wc_strengths_avg": [ 42.5, 18.688231591030757 ], "wc_weaknesses_avg": [ 137.5, 65.12104114646817 ], "wc_questions_avg": [ 53.5, 28.5 ], "wc_limitations_avg": [ 8.5, 3.2015621187164243 ], "wc_review_avg": [ 324.25, 71.48907259155067 ], "wc_reply_reviewers_avg": [ 36.0, 53.51635264103861 ], "wc_reply_authors_avg": [ 152.25, 155.48693675032638 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 3.0, 0.7071067811865476 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.40824829046386296, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=69667796190112915&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "harvard.edu;;ttic.edu;bc.edu;umass.edu;harvard.edu", "author_num": 6, "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "Harvard University;Toyota Technological Institute at Chicago;Boston College;University of Massachusetts Amherst", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.harvard.edu;https://www.tti-chicago.org;https://www.bostoncollege.edu;https://www.umass.edu", "aff_unique_abbr": "Harvard;TTI Chicago;BC;UMass Amherst", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Chicago;Amherst", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Simplified and Generalized Masked Diffusion for Discrete Data", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93071", "id": "xcqSOfHt4g", "proceeding": "", "pdf": "https://openreview.net/pdf?id=xcqSOfHt4g", "openreview": "https://openreview.net/forum?id=xcqSOfHt4g", "poster": "", "project": "", "author_site": "Jiaxin Shi, Kehang Han, Zhe Wang, Arnaud Doucet, Michalis Titsias", "tldr": "", "abstract": "Masked (or absorbing) diffusion is actively explored as an alternative to autoregressive models for generative modeling of discrete data. However, existing work in this area has been hindered by unnecessarily complex model formulations and unclear relationships between different perspectives, leading to suboptimal parameterization, training objectives, and ad hoc adjustments to counteract these issues. In this work, we aim to provide a simple and general framework that unlocks the full potential of masked diffusion models. We show that the continuous-time variational objective of masked diffusion models is a simple weighted integral of cross-entropy losses. Our framework also enables training generalized masked diffusion models with state-dependent masking schedules. When evaluated by perplexity, our models trained on OpenWebText surpass prior diffusion language models at GPT-2 scale and demonstrate superior performance on 4 out of 5 zero-shot language modeling tasks. Furthermore, our models vastly outperform previous discrete diffusion models on pixel-level image modeling, achieving 2.75 (CIFAR-10) and 3.40 (ImageNet 64x64) bits per dimension that are better than autoregressive models of similar sizes.", "keywords": "diffusion;discrete;masked diffusion;absorbing diffusion;diffusion model", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Jiaxin Shi;Kehang Han;Zhe Wang;Arnaud Doucet;Michalis Titsias", "authorids": "~Jiaxin_Shi1;~Kehang_Han1;~Zhe_Wang13;~Arnaud_Doucet2;~Michalis_Titsias1", "gender": "M;;;;M", "homepage": "http://jiaxins.io;;;https://www.stats.ox.ac.uk/~doucet/;https://mtitsias.github.io/", "dblp": "151/7509;198/6522;;68/1628;19/5385", "google_scholar": "juZXbFoAAAAJ;;;W4SZGV8AAAAJ;https://scholar.google.gr/citations?user=B-SbkAwAAAAJ", "orcid": ";;;0000-0002-7662-419X;", "linkedin": ";;;;", "or_profile": "~Jiaxin_Shi1;~Kehang_Han1;~Zhe_Wang13;~Arnaud_Doucet2;~Michalis_Titsias1", "aff": "Google DeepMind;Google DeepMind;;University of Oxford;Google DeepMind", "aff_domain": "google.com;google.com;;ox.ac.uk;google.com", "position": "Research Scientist;Researcher;;Full Professor;Research Scientist", "bibtex": "@inproceedings{\nshi2024simplified,\ntitle={Simplified and Generalized Masked Diffusion for Discrete Data},\nauthor={Jiaxin Shi and Kehang Han and Zhe Wang and Arnaud Doucet and Michalis Titsias},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=xcqSOfHt4g}\n}", "github": "", "reviewers": "EzVa;Pt68;Y9JA;bny3", "pdf_size": 1825816, "rating": "6;6;7;7", "confidence": "3;5;4;5", "soundness": "4;2;3;3", "novelty": "4;3;3;3", "presentation": "4;2;3;4", "wc_summary": "196;42;85;46", "wc_strengths": "91;12;122;75", "wc_weaknesses": "100;384;94;306", "wc_questions": "3;70;101;62", "wc_limitations": "15;1;1;8", "wc_review": "405;509;403;497", "wc_reply_reviewers": "23;156;0;234", "wc_reply_authors": "9;127;0;289", "reply_reviewers": "1;1;0;2", "reply_authors": "2;2;1;4", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 92.25, 62.21083105054939 ], "wc_strengths_avg": [ 75.0, 40.10610926031095 ], "wc_weaknesses_avg": [ 221.0, 127.04723531033645 ], "wc_questions_avg": [ 59.0, 35.46124645299429 ], "wc_limitations_avg": [ 6.25, 5.80409338312195 ], "wc_review_avg": [ 453.5, 49.686517285879475 ], "wc_reply_reviewers_avg": [ 103.25, 96.14929796935597 ], "wc_reply_authors_avg": [ 106.25, 116.80619632536623 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 54, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14599860146187264346&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "google.com;google.com;;ox.ac.uk;google.com", "author_num": 5, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Google;University of Oxford", "aff_unique_dep": "Google DeepMind;", "aff_unique_url": "https://deepmind.com;https://www.ox.ac.uk", "aff_unique_abbr": "DeepMind;Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Combining Statistical Depth and Fermat Distance for Uncertainty Quantification", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93070", "id": "xeXRhTUmcf", "proceeding": "", "pdf": "https://openreview.net/pdf?id=xeXRhTUmcf", "openreview": "https://openreview.net/forum?id=xeXRhTUmcf", "poster": "/media/PosterPDFs/NeurIPS%202024/93070.png?t=1731652575.9305713", "project": "", "author_site": "Hai Vy Nguyen, Fabrice Gamboa, Reda CHHAIBI, Sixin Zhang, Serge Gratton, Thierry Giaccone", "tldr": "", "abstract": "We measure the out-of-domain uncertainty in the prediction of Neural Networks using a statistical notion called \"Lens Depth'' (LD) combined with Fermat Distance, which is able to capture precisely the \"depth'' of a point with respect to a distribution in feature space, without any distributional assumption. Our method also has no trainable parameter. The method is applied directly in the feature space at test time and does not intervene in training process. As such, it does not impact the performance of the original model. The proposed method gives excellent qualitative results on toy datasets and can give competitive or better uncertainty estimation on standard deep learning datasets compared to strong baseline methods.", "keywords": "Out-of-distribution;In-distribution;Uncertainty quantification;Fermat distance;Lens depth;Novelty detection;Feature spaces;Deep learning", "primary_area": "other", "supplementary_material": "", "author": "Hai-Vy Nguyen;Fabrice Gamboa;Reda Chhaibi;Sixin Zhang;Serge Gratton;Thierry Giaccone", "authorids": "~Hai-Vy_Nguyen1;~Fabrice_Gamboa1;~Reda_Chhaibi2;~Sixin_Zhang2;~Serge_Gratton2;~Thierry_Giaccone1", "gender": "M;M;;M;M;", "homepage": "https://www.linkedin.com/in/hai-vy-nguyen-8a094a199/;https://www.math.univ-toulouse.fr/~gamboa/;;https://www.irit.fr/~Sixin.Zhang/;http://gratton.perso.enseeiht.fr/;", "dblp": "376/5050;02/3932;;116/3004;71/3633;", "google_scholar": ";wK29Q6IAAAAJ;;-cL9xWMAAAAJ;https://scholar.google.fr/citations?user=q9HdQc4AAAAJ;", "orcid": ";0000-0001-9779-4393;;;0000-0002-5021-2357;", "linkedin": ";fabrice-gamboa-88416514/;;;;", "or_profile": "~Hai-Vy_Nguyen1;~Fabrice_Gamboa1;~Reda_Chhaibi2;~Sixin_Zhang2;~Serge_Gratton2;~Thierry_Giaccone1", "aff": "Institut de math\u00e9matiques de Toulouse;ANITI;;Universtite Toulouse;University of Toulouse, IRIT;", "aff_domain": "math.univ-toulouse.fr;aniti.univ-toulouse.fr;;irit.fr;irit.fr;", "position": "PhD student;Principal Researcher;;Assistant Professor;Full Professor;", "bibtex": "@inproceedings{\nnguyen2024combining,\ntitle={Combining Statistical Depth and Fermat Distance for Uncertainty Quantification},\nauthor={Hai-Vy Nguyen and Fabrice Gamboa and Reda Chhaibi and Sixin Zhang and Serge Gratton and Thierry Giaccone},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=xeXRhTUmcf}\n}", "github": "", "reviewers": "TY6a;exfg;kN5D;NNtx", "pdf_size": 8075937, "rating": "6;6;7;8", "confidence": "3;4;3;4", "soundness": "3;3;3;4", "novelty": "3;3;3;4", "presentation": "3;3;4;3", "wc_summary": "61;72;123;47", "wc_strengths": "61;96;146;89", "wc_weaknesses": "54;277;162;767", "wc_questions": "20;125;351;132", "wc_limitations": "15;4;106;41", "wc_review": "211;574;888;1076", "wc_reply_reviewers": "49;26;35;77", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 75.75, 28.682529525828087 ], "wc_strengths_avg": [ 98.0, 30.651264247988205 ], "wc_weaknesses_avg": [ 315.0, 272.61603034304494 ], "wc_questions_avg": [ 157.0, 120.47198844544735 ], "wc_limitations_avg": [ 41.5, 39.588508433635134 ], "wc_review_avg": [ 687.25, 328.27836891881867 ], "wc_reply_reviewers_avg": [ 46.75, 19.292161620720474 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:MUGes4uj0KcJ:scholar.google.com/&scioq=Combining+Statistical+Depth+and+Fermat+Distance+for+Uncertainty+Quantification&hl=en&as_sdt=0,5", "gs_version_total": 11, "email": "math.univ-toulouse.fr;aniti.univ-toulouse.fr;;irit.fr;irit.fr;", "author_num": 6, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Institut de Math\u00e9matiques de Toulouse;ANITI;Universit\u00e9 Toulouse;University of Toulouse", "aff_unique_dep": "Department of Mathematics;;;Institut de Recherche en Informatique de Toulouse (IRIT)", "aff_unique_url": "https://www.imtoulouse.fr;https://www.aniti.fr;https://www.univ-toulouse.fr;https://www.univ-toulouse.fr", "aff_unique_abbr": "IMT;;UT;UT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "France" }, { "title": "ReactZyme: A Benchmark for Enzyme-Reaction Prediction", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97442", "id": "xepxnDQoGq", "proceeding": "", "pdf": "https://openreview.net/pdf?id=xepxnDQoGq", "openreview": "https://openreview.net/forum?id=xepxnDQoGq", "poster": "", "project": "", "author_site": "Chenqing Hua, Bozitao Zhong, Sitao Luan, Liang Hong, Guy Wolf, Doina Precup, Shuangjia Zheng", "tldr": "", "abstract": "Enzymes, with their specific catalyzed reactions, are necessary for all aspects of life, enabling diverse biological processes and adaptations. Predicting enzyme functions is essential for understanding biological pathways, guiding drug development, enhancing bioproduct yields, and facilitating evolutionary studies.\nAddressing the inherent complexities, we introduce a new approach to annotating enzymes based on their catalyzed reactions. This method provides detailed insights into specific reactions and is adaptable to newly discovered reactions, diverging from traditional classifications by protein family or expert-derived reaction classes. We employ machine learning algorithms to analyze enzyme reaction datasets, delivering a much more refined view on the functionality of enzymes.\nOur evaluation leverages the largest enzyme-reaction dataset to date, derived from the SwissProt and Rhea databases with entries up to January 8, 2024. \nWe frame the enzyme-reaction prediction as a retrieval problem, aiming to rank enzymes by their catalytic ability for specific reactions. With our model, we can recruit proteins for novel reactions and predict reactions in novel proteins, facilitating enzyme discovery and function annotation https://github.com/WillHua127/ReactZyme.", "keywords": "enzyme-reaction prediction;protein representation;molecule representation", "primary_area": "", "supplementary_material": "/attachment/98523a3d48388a11e8543c9c634d630863b2f670.pdf", "author": "Chenqing Hua;Bozitao Zhong;Sitao Luan;Liang Hong;Guy Wolf;Doina Precup;Shuangjia Zheng", "authorids": "~Chenqing_Hua1;~Bozitao_Zhong1;~Sitao_Luan1;~Liang_Hong2;~Guy_Wolf1;~Doina_Precup1;~Shuangjia_Zheng2", "gender": "Non-Binary;M;M;M;M;F;M", "homepage": "https://willhua127.github.io/;;;https://ins.sjtu.edu.cn/people/lhong/index_english.html;http://guywolf.org;http://cs.mcgill.ca/~dprecup/;https://zhenglab.sjtu.edu.cn/", "dblp": "272/8791;310/9462;249/2879;;120/1308;p/DoinaPrecup;235/3743.html", "google_scholar": "Lxe71v4AAAAJ;6k_LhSoAAAAJ;Ouoi7yYAAAAJ;pcz1yA4AAAAJ;g0k3SjcAAAAJ;https://scholar.google.com.tw/citations?user=j54VcVEAAAAJ;_7z2_9kAAAAJ", "orcid": ";0000-0001-9363-6099;;0000-0003-0107-336X;0000-0002-6740-059X;;0000-0001-9747-4285", "linkedin": "willhua/;;;;;;", "or_profile": "~Chenqing_Hua1;~Bozitao_Zhong1;~Sitao_Luan1;~Liang_Hong2;~Guy_Wolf1;~Doina_Precup1;~SHUANGJIA_ZHENG1", "aff": "Montreal Institute for Learning Algorithms, University of Montreal, Universit\u00e9 de Montr\u00e9al;The Chinese University of Hong Kong;McGill University;;University of Montreal;McGill University;Shanghai Jiaotong University", "aff_domain": "mila.umontreal.ca;link.cuhk.edu.hk;mcgill.ca;;umontreal.ca;mcgill.ca;sjtu.edu.cn", "position": "MS student;MS student;PhD student;;Associate Professor;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nhua2024reactzyme,\ntitle={ReactZyme: A Benchmark for Enzyme-Reaction Prediction},\nauthor={Chenqing Hua and Bozitao Zhong and Sitao Luan and Liang Hong and Guy Wolf and Doina Precup and Shuangjia Zheng},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=xepxnDQoGq}\n}", "github": "", "reviewers": "goyW;6BMJ;vyov", "pdf_size": 2498021, "rating": "5;6;8", "confidence": "3;3;4", "wc_summary_and_contributions": "45;44;66", "wc_strengths": "24;37;7", "wc_improvement": "189;19;38", "wc_limitations": "29;42;20", "wc_correctness": "93;16;1", "wc_clarity": "146;205;1", "wc_relation_to_prior_work": "36;32;1", "wc_documentation": "1;5;10", "wc_additional_feedback": "1;1;1", "wc_review": "564;401;145", "wc_reply_reviewers": "126;31;0", "wc_reply_authors": "547;233;233", "reply_reviewers": "2;1;0", "reply_authors": "3;2;2", "rating_avg": [ 6.333333333333333, 1.247219128924647 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 51.666666666666664, 10.143416036468626 ], "wc_strengths_avg": [ 22.666666666666668, 12.283683848458853 ], "wc_improvement_avg": [ 82.0, 76.05699617173076 ], "wc_limitations_avg": [ 30.333333333333332, 9.030811456096044 ], "wc_correctness_avg": [ 36.666666666666664, 40.301640440833445 ], "wc_clarity_avg": [ 117.33333333333333, 85.7139947085007 ], "wc_relation_to_prior_work_avg": [ 23.0, 15.641824275533422 ], "wc_documentation_avg": [ 5.333333333333333, 3.6817870057290873 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 370.0, 172.45482500256892 ], "wc_reply_reviewers_avg": [ 52.333333333333336, 53.6055552676731 ], "wc_reply_authors_avg": [ 337.6666666666667, 148.02101952838396 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.944911182523068, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2425493781910502534&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "mila.umontreal.ca;link.cuhk.edu.hk;mcgill.ca;;umontreal.ca;mcgill.ca;sjtu.edu.cn", "author_num": 7, "aff_unique_index": "0;1;2;0;2;3", "aff_unique_norm": "University of Montreal;Chinese University of Hong Kong;McGill University;Shanghai Jiao Tong University", "aff_unique_dep": "Montreal Institute for Learning Algorithms;;;", "aff_unique_url": "https://www.mila.quebec;https://www.cuhk.edu.hk;https://www.mcgill.ca;https://www.sjtu.edu.cn", "aff_unique_abbr": "MILA;CUHK;McGill;SJTU", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Montreal;Hong Kong SAR;", "aff_country_unique_index": "0;1;0;0;0;1", "aff_country_unique": "Canada;China" }, { "title": "FedGMark: Certifiably Robust Watermarking for Federated Graph Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93069", "id": "xeviQPXTMU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=xeviQPXTMU", "openreview": "https://openreview.net/forum?id=xeviQPXTMU", "poster": "/media/PosterPDFs/NeurIPS%202024/93069.png?t=1730259776.0169036", "project": "", "author_site": "Yuxin Yang, Qiang Li, Yuan Hong, Binghui Wang", "tldr": "", "abstract": "Federated graph learning (FedGL) is an emerging learning paradigm to collaboratively train graph data from various clients. However, during the development and deployment of FedGL models, they are susceptible to illegal copying and model theft. Backdoor-based watermarking is a well-known method for mitigating these attacks, as it offers ownership verification to the model owner. We take the first step to protect the ownership of FedGL models via backdoor-based watermarking. Existing techniques have challenges in achieving the goal: 1) they either cannot be directly applied or yield unsatisfactory performance; 2) they are vulnerable to watermark removal attacks; and 3) they lack of formal guarantees. To address all the challenges, we propose FedGMark, the first certified robust backdoor-based watermarking for FedGL. FedGMark leverages the unique graph structure and client information in FedGL to learn customized and diverse watermarks. It also designs a novel GL architecture that facilitates defending against both the empirical and theoretically worst-case watermark removal attacks. Extensive experiments validate the promising empirical and provable watermarking performance of FedGMark. Source code is available at: https://github.com/Yuxin104/FedGMark.", "keywords": "Watermark;Federated Graph Learning", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Yuxin Yang;Qiang Li;Yuan Hong;Binghui Wang", "authorids": "~Yuxin_Yang3;~Qiang_Li12;~Yuan_Hong1;~Binghui_Wang2", "gender": ";M;M;M", "homepage": ";https://ccst.jlu.edu.cn/info/1367/19705.htm;https://yhongcs.github.io/;https://wangbinghui.net", "dblp": "146/9561-3;72/872-8;79/5433-1;123/7149", "google_scholar": "7EMBUqkAAAAJ;;KJuZW2wAAAAJ;SoOztcEAAAAJ", "orcid": "0000-0002-9907-9980;0000-0001-7510-4718;;0000-0001-5616-060X", "linkedin": ";;;", "or_profile": "~Yuxin_Yang3;~Qiang_Li12;~Yuan_Hong1;~Binghui_Wang2", "aff": "Illinois Institute of Technology;Jilin University;University of Connecticut;Illinois Institute of Technology", "aff_domain": "iit.edu;jlu.edu.cn;uconn.edu;iit.edu", "position": "PhD student;Full Professor;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nyang2024fedgmark,\ntitle={Fed{GM}ark: Certifiably Robust Watermarking for Federated Graph Learning},\nauthor={Yuxin Yang and Qiang Li and Yuan Hong and Binghui Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=xeviQPXTMU}\n}", "github": "", "reviewers": "yEMY;Y9Y7;djtK;ZEKF", "pdf_size": 2939470, "rating": "5;5;6;6", "confidence": "5;3;3;4", "soundness": "2;2;3;3", "novelty": "2;2;2;3", "presentation": "2;3;3;3", "wc_summary": "83;58;67;88", "wc_strengths": "31;56;57;92", "wc_weaknesses": "251;295;147;74", "wc_questions": "29;66;5;108", "wc_limitations": "34;4;5;163", "wc_review": "428;479;281;525", "wc_reply_reviewers": "196;19;0;33", "wc_reply_authors": "536;170;138;32", "reply_reviewers": "2;1;0;1", "reply_authors": "4;4;3;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 74.0, 12.062338081814818 ], "wc_strengths_avg": [ 59.0, 21.714050750608465 ], "wc_weaknesses_avg": [ 191.75, 86.65845313643672 ], "wc_questions_avg": [ 52.0, 38.95510236156491 ], "wc_limitations_avg": [ 51.5, 65.49236596734004 ], "wc_review_avg": [ 428.25, 91.67708274154452 ], "wc_reply_reviewers_avg": [ 62.0, 78.24640566824779 ], "wc_reply_authors_avg": [ 219.0, 190.01315743916263 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 3.25, 0.82915619758885 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13078207973256430056&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "iit.edu;jlu.edu.cn;uconn.edu;iit.edu", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Illinois Institute of Technology;Jilin University;University of Connecticut", "aff_unique_dep": ";;", "aff_unique_url": "https://www.iit.edu;http://www.jlu.edu.cn;https://www.uconn.edu", "aff_unique_abbr": "IIT;JLU;UConn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;China" }, { "title": "RestoreAgent: Autonomous Image Restoration Agent via Multimodal Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93068", "id": "xgP5ynlZWf", "proceeding": "", "pdf": "https://openreview.net/pdf?id=xgP5ynlZWf", "openreview": "https://openreview.net/forum?id=xgP5ynlZWf", "poster": "/media/PosterPDFs/NeurIPS%202024/93068.png?t=1732285991.5628567", "project": "", "author_site": "Haoyu Chen, Wenbo Li, Jinjin Gu, Jingjing Ren, Sixiang Chen, Tian Ye, Renjing Pei, Kaiwen Zhou, Fenglong Song, Lei Zhu", "tldr": "", "abstract": "Natural images captured by mobile devices often suffer from multiple types of degradation, such as noise, blur, and low light. Traditional image restoration methods require manual selection of specific tasks, algorithms, and execution sequences, which is time-consuming and may yield suboptimal results. All-in-one models, though capable of handling multiple tasks, typically support only a limited range and often produce overly smooth, low-fidelity outcomes due to their broad data distribution fitting. To address these challenges, we first define a new pipeline for restoring images with multiple degradations, and then introduce RestoreAgent, an intelligent image restoration system leveraging multimodal large language models. RestoreAgent autonomously assesses the type and extent of degradation in input images and performs restoration through (1) determining the appropriate restoration tasks, (2) optimizing the task sequence, (3) selecting the most suitable models, and (4) executing the restoration. Experimental results demonstrate the superior performance of RestoreAgent in handling complex degradation, surpassing human experts. Furthermore, the system\u2019s modular design facilitates the fast integration of new tasks and models.", "keywords": "Image Restoration;Low level vision;Agent;Multimodal Large Language Model", "primary_area": "machine_vision", "supplementary_material": "", "author": "Haoyu Chen;Wenbo Li;Jinjin Gu;Jingjing Ren;Sixiang Chen;Tian Ye;Renjing Pei;Kaiwen Zhou;Fenglong Song;Lei Zhu", "authorids": "~Haoyu_Chen2;~Wenbo_Li6;~Jinjin_Gu1;~Jingjing_Ren1;~Sixiang_Chen2;~Tian_Ye3;~Renjing_Pei1;~Kaiwen_Zhou2;~Fenglong_Song1;~Lei_Zhu1", "gender": "M;M;M;;M;;F;M;;M", "homepage": "https://haoyuchen.com;https://fenglinglwb.github.io/;http://www.jasongt.com;;https://ephemeral182.github.io;;;https://jnhujnhu.github.io/;;https://sites.google.com/site/indexlzhu/home?authuser=0", "dblp": "146/8170-3;51/3185-2.html;209/5709;;;;181/9091;215/4936;;99/549-3", "google_scholar": "https://scholar.google.com.hk/citations?user=KWbcBucAAAAJ;foGn_TIAAAAJ;uMQ-G-QAAAAJ;;https://scholar.google.com.hk/citations?hl=zh-CN;;;nHmlZ5QAAAAJ;;https://scholar.google.com.hk/citations?user=AQtqhaYAAAAJ", "orcid": "0000-0001-7618-9733;;0000-0002-4389-6236;;0009-0003-6837-886X;;0000-0001-7513-6576;;;", "linkedin": ";;jinjingu;;;;;;;", "or_profile": "~Haoyu_Chen2;~Wenbo_Li6;~Jinjin_Gu1;~Jingjing_Ren1;~Sixiang_Chen2;~Tian_Ye3;~Renjing_Pei1;~Kaiwen_Zhou2;~Fenglong_Song1;~Lei_Zhu1", "aff": "Hong Kong University of Science and Technology (Guangzhou);Huawei Technologies Ltd.;University of Sydney;;HKUST(GZ);;Huawei Technologies Ltd.;Huawei Noah's Ark Lab;;Hong Kong University of Science and Technology (Guangzhou) & HKUST", "aff_domain": "connect.hkust-gz.edu.cn;huawei.com;sydney.edu.au;;connect.hkust-gz.edu.cn;;huawei.com;huawei.com;;ust.hk", "position": "PhD student;Researcher;PhD student;;PhD student;;Principal Engineer;Researcher;;Assistant Professor", "bibtex": "@inproceedings{\nchen2024restoreagent,\ntitle={RestoreAgent: Autonomous Image Restoration Agent via Multimodal Large Language Models},\nauthor={Haoyu Chen and Wenbo Li and Jinjin Gu and Jingjing Ren and Sixiang Chen and Tian Ye and Renjing Pei and Kaiwen Zhou and Fenglong Song and Lei Zhu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=xgP5ynlZWf}\n}", "github": "", "reviewers": "Ux72;NhBR;4Dbc;DJ9b", "pdf_size": 7815827, "rating": "5;6;7;8", "confidence": "4;5;5;4", "soundness": "3;4;3;3", "novelty": "3;3;3;3", "presentation": "3;3;4;3", "wc_summary": "42;74;84;92", "wc_strengths": "30;82;97;88", "wc_weaknesses": "212;126;359;132", "wc_questions": "3;75;34;101", "wc_limitations": "7;6;6;2", "wc_review": "294;363;580;415", "wc_reply_reviewers": "31;9;93;54", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 73.0, 19.0 ], "wc_strengths_avg": [ 74.25, 26.099568961957974 ], "wc_weaknesses_avg": [ 207.25, 93.961095672624 ], "wc_questions_avg": [ 53.25, 37.5790832778023 ], "wc_limitations_avg": [ 5.25, 1.920286436967152 ], "wc_review_avg": [ 413.0, 105.53909228338095 ], "wc_reply_reviewers_avg": [ 46.75, 31.083556746292725 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11251803285804608403&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "connect.hkust-gz.edu.cn;huawei.com;sydney.edu.au;;connect.hkust-gz.edu.cn;;huawei.com;huawei.com;;ust.hk", "author_num": 10, "aff_unique_index": "0;1;2;0;1;1;0", "aff_unique_norm": "Hong Kong University of Science and Technology;Huawei;University of Sydney", "aff_unique_dep": ";Huawei Technologies;", "aff_unique_url": "https://www.ust.hk;https://www.huawei.com;https://www.sydney.edu.au", "aff_unique_abbr": "HKUST;Huawei;USYD", "aff_campus_unique_index": "0;2;2", "aff_campus_unique": "Hong Kong SAR;;Guangzhou", "aff_country_unique_index": "0;0;1;0;0;0;0", "aff_country_unique": "China;Australia" }, { "title": "DDK: Distilling Domain Knowledge for Efficient Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93067", "id": "xgiurUq0ss", "proceeding": "", "pdf": "https://openreview.net/pdf?id=xgiurUq0ss", "openreview": "https://openreview.net/forum?id=xgiurUq0ss", "poster": "/media/PosterPDFs/NeurIPS%202024/93067.png?t=1733294336.8387716", "project": "", "author_site": "Jiaheng Liu, Chenchen Zhang, Jinyang Guo, Yuanxing Zhang, Haoran Que, Ken Deng, ZhiqiBai zhiqi, Jie Liu, Ge Zhang, JiakaiWang, Yanan Wu, Congnan Liu, Jiamang Wang, Lin Qu, Wenbo Su, Bo Zheng", "tldr": "", "abstract": "Despite the advanced intelligence abilities of large language models (LLMs) in various applications, they still face significant computational and storage demands. Knowledge Distillation (KD) has emerged as an effective strategy to improve the performance of a smaller LLM (i.e., the student model) by transferring knowledge from a high-performing LLM (i.e., the teacher model). Prevailing techniques in LLM distillation typically use a black-box model API to generate high-quality pretrained and aligned datasets, or utilize white-box distillation by altering the loss function to better transfer knowledge from the teacher LLM. However, these methods ignore the knowledge differences between the student and teacher LLMs across domains. This results in excessive focus on domains with minimal performance gaps and insufficient attention to domains with large gaps, reducing overall performance. In this paper, we introduce a new LLM distillation framework called DDK, which dynamically adjusts the composition of the distillation dataset in a smooth manner according to the domain performance differences between the teacher and student models, making the distillation process more stable and effective. Extensive evaluations show that DDK significantly improves the performance of student models, outperforming both continuously pretrained baselines and existing knowledge distillation methods by a large margin.", "keywords": "Knowledge Distillation;Large Language Models;Model Acceralation", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Jiaheng Liu;Chenchen Zhang;Jinyang Guo;Yuanxing Zhang;Haoran Que;Ken Deng;ZhiqiBai;Jie Liu;Ge Zhang;JiakaiWang;Yanan Wu;Congnan Liu;Jiamang Wang;Lin Qu;Wenbo Su;Bo Zheng", "authorids": "~Jiaheng_Liu1;~Chenchen_Zhang3;~Jinyang_Guo1;~Yuanxing_Zhang3;~Haoran_Que1;~Ken_Deng1;~ZhiqiBai1;~Jie_Liu13;~Ge_Zhang5;~JiakaiWang1;~Yanan_Wu2;~Congnan_Liu2;~Jiamang_Wang1;~Lin_Qu2;~Wenbo_Su2;~Bo_Zheng5", "gender": "M;;M;M;M;F;M;;M;;;;M;M;M;M", "homepage": "https://liujiaheng.github.io/;;https://jinyangguo.github.io/;;;;https://github.com/BaiZhiqi;;;https://github.com/BlcDec;;;;https://www.linkedin.com/in/%E7%90%B3-%E6%9B%B2-05a54498/;;", "dblp": "225/1962;;;194/7059.html;358/4966;;;;;;135/9598;;;;;33/1610-7", "google_scholar": "yFI_RjUAAAAJ;;uJGeT1AAAAAJ;COdftTMAAAAJ;onEik5gAAAAJ;https://scholar.google.com/citations?hl=en;;;qyTrq4kAAAAJ;;aucxPZEAAAAJ;;;;;3gHhO9QAAAAJ", "orcid": ";;;0000-0003-1460-8124;;0000-0002-2976-2573;;;;;;;;;0009-0009-3800-7543;0000-0002-4037-6315", "linkedin": ";;;;;;;;ge-zhang-792797169/;;;;%E5%AE%B6%E5%BF%99-%E7%8E%8B-b78784114/;;https://www.linkedin.cn/incareer/in/wenbo-su-b923b0b1;bo-zheng-0315254/", "or_profile": "~Jiaheng_Liu1;~Chenchen_Zhang3;~Jinyang_Guo1;~Yuanxing_Zhang3;~Haoran_Que1;~Ken_Deng1;~ZhiqiBai1;~Jie_Liu13;~Ge_Zhang5;~JiakaiWang1;~Yanan_Wu2;~Congnan_Liu2;~Jiamang_Wang1;~Lin_Qu2;~Wenbo_Su2;~Bo_Zheng5", "aff": "Alibaba Group;;Beihang University;Alibaba Group;Beihang University;Alibaba Group;;;University of Waterloo;;;;Alibaba Group;;Alibaba Group;Alibaba Group", "aff_domain": "alibaba-inc.com;;buaa.edu.cn;alibaba-inc.com;buaa.edu.cn;alibaba-inc.com;;;cs.uwaterloo.ca;;;;alibaba-inc.com;;alibaba-inc.com;alibaba-inc.com", "position": "Researcher;;Assistant Professor;Researcher;MS student;Researcher;;;PhD student;;;;Researcher;;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nliu2024ddk,\ntitle={{DDK}: Distilling Domain Knowledge for Efficient Large Language Models},\nauthor={Jiaheng Liu and Chenchen Zhang and Jinyang Guo and Yuanxing Zhang and Haoran Que and Ken Deng and ZhiqiBai and Jie Liu and Ge Zhang and JiakaiWang and Yanan Wu and Congnan Liu and Jiamang Wang and Lin Qu and Wenbo Su and Bo Zheng},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=xgiurUq0ss}\n}", "github": "", "reviewers": "EYh3;foae;6kDi;oZHE", "pdf_size": 819864, "rating": "5;5;6;6", "confidence": "4;4;4;5", "soundness": "2;3;2;3", "novelty": "2;2;2;2", "presentation": "2;3;3;3", "wc_summary": "61;78;41;90", "wc_strengths": "53;49;26;20", "wc_weaknesses": "161;234;78;72", "wc_questions": "33;13;41;33", "wc_limitations": "1;161;1;1", "wc_review": "309;535;187;216", "wc_reply_reviewers": "40;0;18;17", "wc_reply_authors": "103;302;14;14", "reply_reviewers": "1;0;1;1", "reply_authors": "3;4;2;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 67.5, 18.445866745696716 ], "wc_strengths_avg": [ 37.0, 14.230249470757707 ], "wc_weaknesses_avg": [ 136.25, 66.49953007352758 ], "wc_questions_avg": [ 30.0, 10.344080432788601 ], "wc_limitations_avg": [ 41.0, 69.2820323027551 ], "wc_review_avg": [ 311.75, 136.5455510077132 ], "wc_reply_reviewers_avg": [ 18.75, 14.201672436723781 ], "wc_reply_authors_avg": [ 108.25, 117.6145718012866 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 16, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7118421578165913454&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "alibaba-inc.com;;buaa.edu.cn;alibaba-inc.com;buaa.edu.cn;alibaba-inc.com;;;cs.uwaterloo.ca;;;;alibaba-inc.com;;alibaba-inc.com;alibaba-inc.com", "author_num": 16, "aff_unique_index": "0;1;0;1;0;2;0;0;0", "aff_unique_norm": "Alibaba Group;Beihang University;University of Waterloo", "aff_unique_dep": ";;", "aff_unique_url": "https://www.alibaba.com;http://www.buaa.edu.cn/;https://uwaterloo.ca", "aff_unique_abbr": "Alibaba;BUAA;UW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;1;0;0;0", "aff_country_unique": "China;Canada" }, { "title": "Reasons and Solutions for the Decline in Model Performance after Editing", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93066", "id": "xjXYgdFM5M", "proceeding": "", "pdf": "https://openreview.net/pdf?id=xjXYgdFM5M", "openreview": "https://openreview.net/forum?id=xjXYgdFM5M", "poster": "/media/PosterPDFs/NeurIPS%202024/93066.png?t=1730689653.9546402", "project": "", "author_site": "Xiusheng Huang, Jiaxiang Liu, Yequan Wang, Kang Liu", "tldr": "", "abstract": "Knowledge editing technology has received widespread attention for low-cost updates of incorrect or outdated knowledge in large-scale language models. However, recent research has found that edited models often exhibit varying degrees of performance degradation. The reasons behind this phenomenon and potential solutions have not yet been provided. In order to investigate the reasons for the performance decline of the edited model and optimize the editing method, this work explores the underlying reasons from both data and model perspectives. Specifically, 1) from a data perspective, to clarify the impact of data on the performance of editing models, this paper first constructs a **M**ulti-**Q**uestion **D**ataset (**MQD**) to evaluate the impact of different types of editing data on model performance. The performance of the editing model is mainly affected by the diversity of editing targets and sequence length, as determined through experiments. 2) From a model perspective, this article explores the factors that affect the performance of editing models. The results indicate a strong correlation between the L1-norm of the editing model layer and the editing accuracy, and clarify that this is an important factor leading to the bottleneck of editing performance. Finally, in order to improve the performance of the editing model, this paper further proposes a **D**ump **for** **S**equence (**D4S**) method, which successfully overcomes the previous editing bottleneck by reducing the L1-norm of the editing layer, allowing users to perform multiple effective edits and minimizing model damage. Our code is available at https://github.com/nlpkeg/D4S.", "keywords": "Knowledge editing;performance evaluation", "primary_area": "natural_language_processing", "supplementary_material": "/attachment/d86773b0e28a00af1ddb1a50ba96917b67189e8f.zip", "author": "Xiusheng Huang;Jiaxiang Liu;Yequan Wang;Kang Liu", "authorids": "~Xiusheng_Huang1;~Jiaxiang_Liu5;~Yequan_Wang1;~Kang_Liu1", "gender": "F;M;M;M", "homepage": "https://github.com/Huangxiusheng;;http://www.wangyequan.com;http://www.nlpr.ia.ac.cn/cip/~liukang/index.html", "dblp": "298/0049;;188/9082;42/4903.html", "google_scholar": "4mY2E30AAAAJ;;7Gqp6FsAAAAJ;DtZCfl0AAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Xiusheng_Huang1;~Jiaxiang_Liu5;~Yequan_Wang1;~Kang_Liu1", "aff": "University of Chinese Academy of Sciences;University of Chinese Academy of Sciences;Beijing Academy of Artificial Intelligence;Institute of Automation, Chinese Academy of Sciences", "aff_domain": "ucas.ac.cn;ucas.ac.cn;baai.ac.cn;ia.ac.cn", "position": "PhD student;Undergrad student;Researcher;Professor", "bibtex": "@inproceedings{\nhuang2024reasons,\ntitle={Reasons and Solutions for the Decline in Model Performance after Editing},\nauthor={Xiusheng Huang and Jiaxiang Liu and Yequan Wang and Kang Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=xjXYgdFM5M}\n}", "github": "", "reviewers": "Kq3N;nwwK;rpPG;MFeE", "pdf_size": 3978515, "rating": "6;6;7;8", "confidence": "3;3;3;4", "soundness": "3;2;3;3", "novelty": "3;2;3;3", "presentation": "2;2;3;4", "wc_summary": "104;167;51;94", "wc_strengths": "123;85;41;119", "wc_weaknesses": "121;85;36;122", "wc_questions": "132;2;1;68", "wc_limitations": "116;18;1;7", "wc_review": "596;357;130;410", "wc_reply_reviewers": "58;15;28;198", "wc_reply_authors": "15;0;15;399", "reply_reviewers": "1;1;1;2", "reply_authors": "2;1;2;3", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 104.0, 41.46685423323066 ], "wc_strengths_avg": [ 92.0, 32.93933818400121 ], "wc_weaknesses_avg": [ 91.0, 35.078483433580764 ], "wc_questions_avg": [ 50.75, 54.2004381901106 ], "wc_limitations_avg": [ 35.5, 46.87483333303704 ], "wc_review_avg": [ 373.25, 166.13454637732636 ], "wc_reply_reviewers_avg": [ 74.75, 72.8470143519966 ], "wc_reply_authors_avg": [ 107.25, 168.55321859875593 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4706344586271279545&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "ucas.ac.cn;ucas.ac.cn;baai.ac.cn;ia.ac.cn", "author_num": 4, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "University of Chinese Academy of Sciences;Beijing Academy of Artificial Intelligence;Chinese Academy of Sciences", "aff_unique_dep": ";;Institute of Automation", "aff_unique_url": "http://www.ucas.ac.cn;https://www.baaic.cn;http://www.ia.cas.cn", "aff_unique_abbr": "UCAS;BAAI;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Unraveling Molecular Structure: A Multimodal Spectroscopic Dataset for Chemistry", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97441", "id": "xjxqWYyTfR", "proceeding": "", "pdf": "https://openreview.net/pdf?id=xjxqWYyTfR", "openreview": "https://openreview.net/forum?id=xjxqWYyTfR", "poster": "", "project": "", "author_site": "Marvin Alberts, Oliver Schilter, Federico Zipoli, Nina Hartrampf, Teodoro Laino", "tldr": "", "abstract": "Spectroscopic techniques are essential tools for determining the structure of molecules. Different spectroscopic techniques, such as Nuclear magnetic resonance (NMR), Infrared spectroscopy, and Mass Spectrometry, provide insight into the molecular structure, including the presence or absence of functional groups. Chemists leverage the complementary nature of the different methods to their advantage. However, the lack of a comprehensive multimodal dataset, containing spectra from a variety of spectroscopic techniques, has limited machine-learning approaches mostly to single-modality tasks for predicting molecular structures from spectra. \nHere we introduce a dataset comprising simulated $^1$H-NMR, $^{13}$C-NMR, HSQC-NMR, Infrared, and Mass spectra (positive and negative ion modes) for 790k molecules extracted from chemical reactions in patent data. This dataset enables the development of foundation models for integrating information from multiple spectroscopic modalities, emulating the approach employed by human experts. Additionally, we provide benchmarks for evaluating single-modality tasks such as structure elucidation, predicting the spectra for a target molecule, and functional group predictions. \nThis dataset has the potential automate structure elucidation, streamlining the molecular discovery pipeline from synthesis to structure determination. \nThe dataset and code for the benchmarks can be found at https://rxn4chemistry.github.io/multimodal-spectroscopic-dataset (Available upon submission of the supporting information).", "keywords": "Multimodal;Spectroscopy;Chemistry;Science", "primary_area": "", "supplementary_material": "", "author": "Marvin Alberts;Oliver Schilter;Federico Zipoli;Nina Hartrampf;Teodoro Laino", "authorids": "~Marvin_Alberts1;~Oliver_Schilter1;~Federico_Zipoli1;~Nina_Hartrampf1;~Teodoro_Laino1", "gender": ";M;;;", "homepage": ";;;https://www.hartrampf-lab.com/;https://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=&cad=rja&uact=8&ved=2ahUKEwirz6e9wbDzAhUghf0HHV9QA2IQFnoECAgQAQ&url=https%3A%2F%2Fresearcher.watson.ibm.com%2Fresearcher%2Fview.php%3Fperson%3Dzurich-teo&usg=AOvVaw0filAdQFA8Tgg0EVfxtPje", "dblp": ";;;;", "google_scholar": "fP9kHqAAAAAJ;;;nhbLEo8AAAAJ;", "orcid": "0009-0003-9198-7866;;;0000-0003-0875-6390;", "linkedin": ";oliver-schilter-925a66156/;;nina-hartrampf-153137100/?original_referer=https%3A%2F%2Fwww%2Egoogle%2Ecom%2F&originalSubdomain=ch;", "or_profile": "~Marvin_Alberts1;~Oliver_Schilter1;~Federico_Zipoli1;~Nina_Hartrampf1;~Teodoro_Laino1", "aff": "IBM Research;EPFL - EPF Lausanne;;University of Zurich;International Business Machines", "aff_domain": "ibm.com;epfl.ch;;uzh.ch;ibm.com", "position": "PhD student;PhD student;;Assistant Professor;Principal Researcher", "bibtex": "@inproceedings{\nalberts2024unraveling,\ntitle={Unraveling Molecular Structure: A Multimodal Spectroscopic Dataset for Chemistry},\nauthor={Marvin Alberts and Oliver Schilter and Federico Zipoli and Nina Hartrampf and Teodoro Laino},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=xjxqWYyTfR}\n}", "github": "", "reviewers": "qGKy;yKuK;vx3L;x5D8", "pdf_size": 12526496, "rating": "6;6;6;7", "confidence": "5;4;4;3", "wc_summary_and_contributions": "106;28;91;28", "wc_strengths": "24;14;35;50", "wc_improvement": "23;183;142;57", "wc_limitations": "1;142;1;58", "wc_correctness": "2;17;13;159", "wc_clarity": "8;2;1;50", "wc_relation_to_prior_work": "2;13;1;30", "wc_documentation": "4;13;1;24", "wc_additional_feedback": "1;1;1;1", "wc_review": "171;413;286;457", "wc_reply_reviewers": "160;0;21;21", "wc_reply_authors": "496;94;40;40", "reply_reviewers": "2;0;1;1", "reply_authors": "2;3;2;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 63.25, 35.64670391494843 ], "wc_strengths_avg": [ 30.75, 13.36740438529485 ], "wc_improvement_avg": [ 101.25, 64.07954041657915 ], "wc_limitations_avg": [ 50.5, 57.72564421468157 ], "wc_correctness_avg": [ 47.75, 64.46462208064203 ], "wc_clarity_avg": [ 15.25, 20.24073862288627 ], "wc_relation_to_prior_work_avg": [ 11.5, 11.672617529928752 ], "wc_documentation_avg": [ 10.5, 8.958236433584458 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 331.75, 112.05216419150501 ], "wc_reply_reviewers_avg": [ 50.5, 63.79851095441021 ], "wc_reply_authors_avg": [ 167.5, 190.93650777156265 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9719673591040923171&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "ibm.com;epfl.ch;;uzh.ch;ibm.com", "author_num": 5, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "IBM;EPFL;University of Zurich;International Business Machines Corporation", "aff_unique_dep": "IBM Research;;;", "aff_unique_url": "https://www.ibm.com/research;https://www.epfl.ch;https://www.unizh.ch;https://www.ibm.com", "aff_unique_abbr": "IBM;EPFL;UZH;IBM", "aff_campus_unique_index": "1", "aff_campus_unique": ";Lausanne", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "United States;Switzerland" }, { "title": "Take A Shortcut Back: Mitigating the Gradient Vanishing for Training Spiking Neural Networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93065", "id": "xjyU6zmZD7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=xjyU6zmZD7", "openreview": "https://openreview.net/forum?id=xjyU6zmZD7", "poster": "/media/PosterPDFs/NeurIPS%202024/93065.png?t=1730347627.9534833", "project": "", "author_site": "Yufei Guo, Yuanpei Chen, Zecheng Hao, Weihang Peng, Zhou Jie, Yuhan Zhang, Xiaode Liu, Zhe Ma", "tldr": "", "abstract": "The Spiking Neural Network (SNN) is a biologically inspired neural network infrastructure that has recently garnered significant attention. It utilizes binary spike activations to transmit information, thereby replacing multiplications with additions and resulting in high energy efficiency. However, training an SNN directly poses a challenge due to the undefined gradient of the firing spike process. Although prior works have employed various surrogate gradient training methods that use an alternative function to replace the firing process during back-propagation, these approaches ignore an intrinsic problem: gradient vanishing. To address this issue, we propose a shortcut back-propagation method in the paper, which advocates for transmitting the gradient directly from the loss to the shallow layers. This enables us to present the gradient to the shallow layers directly, thereby significantly mitigating the gradient vanishing problem. Additionally, this method does not introduce any burden during the inference phase.\nTo strike a balance between final accuracy and ease of training, we also propose an evolutionary training framework and implement it by inducing a balance coefficient that dynamically changes with the training epoch, which further improves the network's performance. Extensive experiments conducted over static and dynamic datasets using several popular network structures reveal that our method consistently outperforms state-of-the-art methods.", "keywords": "Spiking Neural Network;Training SNNs;Surrogate gradient", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "/attachment/219213773c28852413f852e0755c33b1f50db66d.zip", "author": "Yufei Guo;Yuanpei Chen;Zecheng Hao;Weihang Peng;Zhou Jie;Yuhan Zhang;Xiaode Liu;Zhe Ma", "authorids": "~Yufei_Guo1;~Yuanpei_Chen1;~Zecheng_Hao1;~Weihang_Peng2;~Zhou_Jie3;~Yuhan_Zhang1;~Xiaode_Liu1;~Zhe_Ma2", "gender": "M;Non-Binary;;;F;F;M;M", "homepage": "https://github.com/yfguo91;;https://hzc1208.github.io/;;https://www.zhouejie11.com;https://github.com/hanbaobao950123;https://www.researchgate.net/profile/Xiaode-Liu;https://dblp.org/pid/22/6672", "dblp": "23/2981-1;;339/6969;;;;;22/6672-1", "google_scholar": ";;txTkX7YAAAAJ;;;;;", "orcid": ";0000-0002-4674-553X;0000-0001-9074-2857;;;0000-0001-8579-4943;0000-0003-3067-4543;", "linkedin": ";;;;;;;", "or_profile": "~Yufei_Guo1;~Yuanpei_Chen1;~Zecheng_Hao1;~Weihang_Peng2;~Zhou_Jie3;~Yuhan_Zhang1;~Xiaode_Liu1;~Zhe_Ma2", "aff": "Intelligent Science & Technology Academy of CASIC;Baidu;Peking University;;China Aerospace Science and Industry Corporation Limited;Intelligent\u00a0Science\u00a0&\u00a0Technology\u00a0Academy\u00a0of\u00a0CASIC;CASIC;Intelligent science and technology academy limited of CASIC", "aff_domain": "casic.cn;baidu.com;pku.edu.cn;;mail.tsinghua.edu.cn;casic.cn;pke.edu.cn;casic.com", "position": "Researcher;Researcher;PhD student;;Researcher;Researcher;Researcher;Full Professor", "bibtex": "@inproceedings{\nguo2024take,\ntitle={Take A Shortcut Back: Mitigating the Gradient Vanishing for Training Spiking Neural Networks},\nauthor={Yufei Guo and Yuanpei Chen and Zecheng Hao and Weihang Peng and Zhou Jie and Yuhan Zhang and Xiaode Liu and Zhe Ma},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=xjyU6zmZD7}\n}", "github": "", "reviewers": "Jb1s;6ctb;runr", "pdf_size": 509833, "rating": "6;6;6", "confidence": "5;4;5", "soundness": "3;3;4", "novelty": "3;3;4", "presentation": "3;3;3", "wc_summary": "49;105;53", "wc_strengths": "40;80;23", "wc_weaknesses": "129;102;40", "wc_questions": "11;50;21", "wc_limitations": "7;1;1", "wc_review": "236;338;138", "wc_reply_reviewers": "51;19;0", "wc_reply_authors": "19;19;0", "reply_reviewers": "1;1;0", "reply_authors": "2;2;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 69.0, 25.508168626278653 ], "wc_strengths_avg": [ 47.666666666666664, 23.893281249943232 ], "wc_weaknesses_avg": [ 90.33333333333333, 37.2588542795162 ], "wc_questions_avg": [ 27.333333333333332, 16.539514973407037 ], "wc_limitations_avg": [ 3.0, 2.8284271247461903 ], "wc_review_avg": [ 237.33333333333334, 81.65510122188053 ], "wc_reply_reviewers_avg": [ 23.333333333333332, 21.044925490219462 ], "wc_reply_authors_avg": [ 12.666666666666666, 8.956685895029603 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4670872514201778655&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "casic.cn;baidu.com;pku.edu.cn;;mail.tsinghua.edu.cn;casic.cn;pke.edu.cn;casic.com", "author_num": 8, "aff_unique_index": "0;1;2;3;0;0;0", "aff_unique_norm": "China Aerospace Science and Industry Corporation;Baidu;Peking University;China Aerospace Science and Industry Corporation Limited", "aff_unique_dep": "Intelligent Science & Technology Academy;Baidu, Inc.;;", "aff_unique_url": "http://www.casic.com.cn/;https://www.baidu.com;http://www.pku.edu.cn;http://www.casic.com.cn", "aff_unique_abbr": "CASIC;Baidu;Peking U;CASIC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Classic GNNs are Strong Baselines: Reassessing GNNs for Node Classification", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97440", "id": "xkljKdGe4E", "proceeding": "", "pdf": "https://openreview.net/pdf?id=xkljKdGe4E", "openreview": "https://openreview.net/forum?id=xkljKdGe4E", "poster": "/media/PosterPDFs/NeurIPS%202024/97440.png?t=1733564677.2140584", "project": "", "author_site": "Yuankai Luo, Lei Shi, Xiao-Ming Wu", "tldr": "", "abstract": "Graph Transformers (GTs) have recently emerged as popular alternatives to traditional message-passing Graph Neural Networks (GNNs), due to their theoretically superior expressiveness and impressive performance reported on standard node classification benchmarks, often significantly outperforming GNNs. In this paper, we conduct a thorough empirical analysis to reevaluate the performance of three classic GNN models (GCN, GAT, and GraphSAGE) against GTs. Our findings suggest that the previously reported superiority of GTs may have been overstated due to suboptimal hyperparameter configurations in GNNs. Remarkably, with slight hyperparameter tuning, these classic GNN models achieve state-of-the-art performance, matching or even exceeding that of recent GTs across 17 out of the 18 diverse datasets examined. Additionally, we conduct detailed ablation studies to investigate the influence of various GNN configurations\u2014such as normalization, dropout, residual connections, and network depth\u2014on node classification performance. Our study aims to promote a higher standard of empirical rigor in the field of graph machine learning, encouraging more accurate comparisons and evaluations of model capabilities. Our implementation is available at https://github.com/LUOyk1999/tunedGNN.", "keywords": "graph neural networks;node classification;graph transformers", "primary_area": "", "supplementary_material": "", "author": "Yuankai Luo;Lei Shi;Xiao-Ming Wu", "authorids": "~Yuankai_Luo2;~Lei_Shi13;~Xiao-Ming_Wu1", "gender": "M;M;F", "homepage": "https://luoyk1999.github.io/;https://leishidata.com/;http://www4.comp.polyu.edu.hk/~csxmwu/", "dblp": "299/6707;29/563-2;98/2898-3", "google_scholar": "33f_QqAAAAAJ;NmaU6U0AAAAJ;3KbaUFkAAAAJ", "orcid": "0000-0003-3844-7214;;", "linkedin": ";;", "or_profile": "~Yuankai_Luo2;~Lei_Shi13;~Xiao-Ming_Wu1", "aff": "Beihang University;Beihang University;Hong Kong Polytechnic University", "aff_domain": "buaa.edu.cn;buaa.edu.cn;polyu.edu.hk", "position": "PhD student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nluo2024classic,\ntitle={Classic {GNN}s are Strong Baselines: Reassessing {GNN}s for Node Classification},\nauthor={Yuankai Luo and Lei Shi and Xiao-Ming Wu},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=xkljKdGe4E}\n}", "github": "", "reviewers": "PM1d;Ejkm;xHX5;madU", "pdf_size": 3807686, "rating": "5;7;7;8", "confidence": "5;3;3;3", "wc_summary_and_contributions": "112;59;51;31", "wc_strengths": "57;51;96;38", "wc_improvement": "311;5;85;10", "wc_limitations": "1;50;25;13", "wc_correctness": "3;255;9;10", "wc_clarity": "1;8;8;6", "wc_relation_to_prior_work": "1;49;6;13", "wc_documentation": "6;15;13;5", "wc_additional_feedback": "1;1;1;1", "wc_review": "493;493;294;127", "wc_reply_reviewers": "619;53;0;21", "wc_reply_authors": "3621;364;0;44", "reply_reviewers": "3;2;0;1", "reply_authors": "11;6;1;2", "rating_avg": [ 6.75, 1.0897247358851685 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "wc_summary_and_contributions_avg": [ 63.25, 29.93639089803579 ], "wc_strengths_avg": [ 60.5, 21.615966321217286 ], "wc_improvement_avg": [ 102.75, 124.33900232831209 ], "wc_limitations_avg": [ 22.25, 18.129740759315894 ], "wc_correctness_avg": [ 69.25, 107.27622057101006 ], "wc_clarity_avg": [ 5.75, 2.8613807855648994 ], "wc_relation_to_prior_work_avg": [ 17.25, 18.819869818890883 ], "wc_documentation_avg": [ 9.75, 4.322904116447646 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 351.75, 153.09372129515958 ], "wc_reply_reviewers_avg": [ 173.25, 258.0449331027447 ], "wc_reply_authors_avg": [ 1007.25, 1515.5743754431849 ], "reply_reviewers_avg": [ 1.5, 1.118033988749895 ], "reply_authors_avg": [ 5.0, 3.9370039370059056 ], "replies_avg": [ 33, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.9271726499455306, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2344632619638318294&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "buaa.edu.cn;buaa.edu.cn;polyu.edu.hk", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Beihang University;Hong Kong Polytechnic University", "aff_unique_dep": ";", "aff_unique_url": "http://www.buaa.edu.cn/;https://www.polyu.edu.hk", "aff_unique_abbr": "BUAA;PolyU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Large Language Models' Expert-level Global History Knowledge Benchmark (HiST-LLM)", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97439", "id": "xlKeMuyoZ5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=xlKeMuyoZ5", "openreview": "https://openreview.net/forum?id=xlKeMuyoZ5", "poster": "", "project": "", "author_site": "Jakob Hauser, D\u00e1niel Kondor, Jenny Reddish, Majid Benam, Enrico Cioni, Federica Villa, James Bennett, Daniel Hoyer, Pieter Francois, Peter Turchin, R. Maria del Rio-Chanona", "tldr": "", "abstract": "Large Language Models (LLMs) have the potential to transform humanities and social science research, yet their history knowledge and comprehension at a graduate level remains untested. Benchmarking LLMs in history is particularly challenging, given that human knowledge of history is inherently unbalanced, with more information available on Western history and recent periods. We introduce the History Seshat Test for LLMs (HiST-LLM), based on a subset of the Seshat Global History Databank, which provides a structured representation of human historical knowledge, containing 36,000 data points across 600 historical societies and over 2,700 scholarly references. This dataset covers every major world region from the Neolithic period to the Industrial Revolution and includes information reviewed and assembled by history experts and graduate research assistants. Using this dataset, we benchmark a total of seven models from the Gemini, OpenAI, and Llama families. We find that, in a four-choice format, LLMs have a balanced accuracy ranging from 33.6% (Llama-3.1-8B) to 46% (GPT-4-Turbo), outperforming random guessing (25%) but falling short of expert comprehension. \nLLMs perform better on earlier historical periods. Regionally, performance is more even but still better for the Americas and lowest in Oceania and Sub-Saharan Africa for the more advanced models. Our benchmark shows that while LLMs possess some expert-level historical knowledge, there is considerable room for improvement.", "keywords": "Large Language Models;Benchmark;History;Text dataset;Expert knowledge", "primary_area": "", "supplementary_material": "/attachment/69d59132edd2cd1dee9c02f7f8c5bed75910fd29.zip", "author": "Jakob Hauser;D\u00e1niel Kondor;Jenny Reddish;Majid Benam;Enrico Cioni;Federica Villa;James S Bennett;Daniel Hoyer;Pieter Francois;Peter Turchin;R. Maria del Rio-Chanona", "authorids": "~Jakob_Hauser2;~D\u00e1niel_Kondor1;~Jenny_Reddish1;~Majid_Benam1;~Enrico_Cioni1;~Federica_Villa1;~James_S_Bennett1;~Daniel_Hoyer1;~Pieter_Francois1;~Peter_Turchin1;~R._Maria_del_Rio-Chanona1", "gender": "M;M;F;M;Not Specified;F;M;;M;;F", "homepage": "https://csh.ac.at/jakob-hauser/;;https://csh.ac.at/jenny-reddish/;https://csh.ac.at/majid-benam/;;;;;https://www.rpc.ox.ac.uk/people/pieter-francois/;https://peterturchin.com/;https://mariadelriochanona.info", "dblp": ";;;;;;;;;;172/5035", "google_scholar": ";;;;;;;;;;", "orcid": ";0000-0003-3720-7462;0000-0002-5553-6916;;;0000-0002-6695-5163;0000-0003-3051-1672;0000-0003-2675-257X;;;0000-0002-0189-7919", "linkedin": ";;;;;federicavilla92/;jim-bennett-9b72961/;;;;", "or_profile": "~Jakob_Hauser2;~D\u00e1niel_Kondor1;~Jenny_Reddish1;~Majid_Benam1;~Enrico_Cioni1;~Federica_Villa1;~James_S_Bennett1;~Daniel_Hoyer1;~Pieter_Francois1;~Peter_Turchin1;~R._Maria_del_Rio-Chanona1", "aff": "Complexity Science Hub Vienna;Complexity Science Hub Vienna;Complexity Science Hub Vienna;Complexity Science Hub Vienna;Alan Turing Institute;University of Milano-Bicocca;Complexity Science Hub Vienna;Complexity Science Hub;University of Oxford/Alan Turing Institute;University of Connecticut;University College London, University of London", "aff_domain": "csh.ac.at;csh.ac.at;csh.ac.at;csh.ac.at;turing.ac.uk;campus.unimib.it;csh.ac.at;csh.ac.at;ox.ac.uk;uconn.edu;ucl.ac.uk", "position": "Researcher;Postdoc;Researcher;Researcher;Researcher;PhD student;Principal Researcher;Associate Professor;Full Professor;Emeritus;Lecturer", "bibtex": "@inproceedings{\nhauser2024large,\ntitle={Large Language Models' Expert-level Global History Knowledge Benchmark (Hi{ST}-{LLM})},\nauthor={Jakob Hauser and D{\\'a}niel Kondor and Jenny Reddish and Majid Benam and Enrico Cioni and Federica Villa and James S Bennett and Daniel Hoyer and Pieter Francois and Peter Turchin and R. Maria del Rio-Chanona},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=xlKeMuyoZ5}\n}", "github": "", "reviewers": "YNxR;oaF3;5Gsk;SzfT", "pdf_size": 730729, "rating": "6;6;7;9", "confidence": "3;3;2;4", "wc_summary_and_contributions": "31;99;100;95", "wc_strengths": "37;87;59;63", "wc_improvement": "60;78;126;57", "wc_limitations": "1;96;6;92", "wc_correctness": "6;1;28;17", "wc_clarity": "1;1;4;6", "wc_relation_to_prior_work": "1;1;19;10", "wc_documentation": "1;1;4;5", "wc_additional_feedback": "1;1;1;1", "wc_review": "139;365;347;346", "wc_reply_reviewers": "0;0;30;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 7.0, 1.224744871391589 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 81.25, 29.07210862665452 ], "wc_strengths_avg": [ 61.5, 17.741194999210173 ], "wc_improvement_avg": [ 80.25, 27.60774347895894 ], "wc_limitations_avg": [ 48.75, 45.30659444275193 ], "wc_correctness_avg": [ 13.0, 10.41633332799983 ], "wc_clarity_avg": [ 3.0, 2.1213203435596424 ], "wc_relation_to_prior_work_avg": [ 7.75, 7.46240577829965 ], "wc_documentation_avg": [ 2.75, 1.7853571071357126 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 299.25, 92.82880748991661 ], "wc_reply_reviewers_avg": [ 7.5, 12.99038105676658 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15136324898650294155&as_sdt=5,38&sciodt=0,38&hl=en", "gs_version_total": 2, "email": "csh.ac.at;csh.ac.at;csh.ac.at;csh.ac.at;turing.ac.uk;campus.unimib.it;csh.ac.at;csh.ac.at;ox.ac.uk;uconn.edu;ucl.ac.uk", "author_num": 11, "aff_unique_index": "0;0;0;0;1;2;0;3;4;5;6", "aff_unique_norm": "Complexity Science Hub Vienna;Alan Turing Institute;University of Milano-Bicocca;Complexity Science Hub;University of Oxford;University of Connecticut;University College London", "aff_unique_dep": "Complexity Science;;;;;;", "aff_unique_url": "https://www.complexitysciencehub.at;https://www.turing.ac.uk;https://www.unimib.it;;https://www.ox.ac.uk;https://www.uconn.edu;https://www.ucl.ac.uk", "aff_unique_abbr": ";ATI;UniMiB;;Oxford;UConn;UCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;2;0;1;4;1", "aff_country_unique": "Austria;United Kingdom;Italy;;United States" }, { "title": "Hybrid Top-Down Global Causal Discovery with Local Search for Linear and Nonlinear Additive Noise Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93064", "id": "xnmm1jThkv", "proceeding": "", "pdf": "https://openreview.net/pdf?id=xnmm1jThkv", "openreview": "https://openreview.net/forum?id=xnmm1jThkv", "poster": "", "project": "", "author_site": "Sujai Hiremath, Jacqueline Maasch, Mengxiao Gao, Promit Ghosal, Kyra Gan", "tldr": "", "abstract": "Learning the unique directed acyclic graph corresponding to an unknown causal model is a challenging task. Methods based on functional causal models can identify a unique graph, but either suffer from the curse of dimensionality or impose strong parametric assumptions. To address these challenges, we propose a novel hybrid approach for global causal discovery in observational data that leverages local causal substructures. We first present a topological sorting algorithm that leverages ancestral relationships in linear structural causal models to establish a compact top-down hierarchical ordering, encoding more causal information than linear orderings produced by existing methods. We demonstrate that this approach generalizes to nonlinear settings with arbitrary noise. We then introduce a nonparametric constraint-based algorithm that prunes spurious edges by searching for local conditioning sets, achieving greater accuracy than current methods. We provide theoretical guarantees for correctness and worst-case polynomial time complexities, with empirical validation on synthetic data.", "keywords": "global causal discovery;additive noise model;local structure", "primary_area": "probabilistic_methods", "supplementary_material": "", "author": "Sujai Hiremath;Jacqueline R. M. A. Maasch;Mengxiao Gao;Promit Ghosal;Kyra Gan", "authorids": "~Sujai_Hiremath1;~Jacqueline_R._M._A._Maasch1;~Mengxiao_Gao1;~Promit_Ghosal2;~Kyra_Gan1", "gender": "M;Non-Binary;;M;", "homepage": "https://sujai1.github.io/;https://jmaasch.github.io/;;https://www.promit-ghosal.com/;", "dblp": ";;;;", "google_scholar": ";5l9n9J8AAAAJ;;Gp90OAUAAAAJ;", "orcid": "0009-0002-9360-0859;0000-0003-0432-3547;;;", "linkedin": ";;gao-mengxiao-b25a29297/;promit-ghosal-84822439/;", "or_profile": "~Sujai_Hiremath1;~Jacqueline_R._M._A._Maasch1;~Mengxiao_Gao1;~Promit_Ghosal2;~Kyra_Gan1", "aff": "Cornell University;Cornell University;Tsinghua University;Brandeis University;", "aff_domain": "cornell.edu;cs.cornell.edu;mails.tsinghua.edu.cn;brandeis.edu;", "position": "PhD student;PhD student;Undergrad student;Assistant Professor;", "bibtex": "@inproceedings{\nhiremath2024hybrid,\ntitle={Hybrid Top-Down Global Causal Discovery with Local Search for Linear and Nonlinear Additive Noise Models},\nauthor={Sujai Hiremath and Jacqueline R. M. A. Maasch and Mengxiao Gao and Promit Ghosal and Kyra Gan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=xnmm1jThkv}\n}", "github": "", "reviewers": "LHcu;fR7W;rTf8;LEV3", "pdf_size": 2850957, "rating": "5;6;6;7", "confidence": "4;3;4;3", "soundness": "3;3;2;4", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "43;58;76;97", "wc_strengths": "41;67;75;72", "wc_weaknesses": "104;99;317;188", "wc_questions": "39;83;50;123", "wc_limitations": "1;7;19;1", "wc_review": "228;314;537;481", "wc_reply_reviewers": "18;19;89;16", "wc_reply_authors": "27;27;122;7", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 68.5, 20.180436070610565 ], "wc_strengths_avg": [ 63.75, 13.442005058770064 ], "wc_weaknesses_avg": [ 177.0, 88.22414635461202 ], "wc_questions_avg": [ 73.75, 32.72136152423979 ], "wc_limitations_avg": [ 7.0, 7.3484692283495345 ], "wc_review_avg": [ 390.0, 124.40860098883839 ], "wc_reply_reviewers_avg": [ 35.5, 30.907118921051183 ], "wc_reply_authors_avg": [ 45.75, 44.77373672143079 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.7071067811865476, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17885708640285802498&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "cornell.edu;cs.cornell.edu;mails.tsinghua.edu.cn;brandeis.edu;", "author_num": 5, "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Cornell University;Tsinghua University;Brandeis University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cornell.edu;https://www.tsinghua.edu.cn;https://www.brandeis.edu", "aff_unique_abbr": "Cornell;THU;Brandeis", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;China" }, { "title": "Unified Lexical Representation for Interpretable Visual-Language Alignment", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93063", "id": "xoCFd1WKpf", "proceeding": "", "pdf": "https://openreview.net/pdf?id=xoCFd1WKpf", "openreview": "https://openreview.net/forum?id=xoCFd1WKpf", "poster": "/media/PosterPDFs/NeurIPS%202024/93063.png?t=1731571656.060214", "project": "", "author_site": "Yifan Li, Yikai Wang, Yanwei Fu, Dongyu Ru, Zheng Zhang, Tong He", "tldr": "", "abstract": "Visual-Language Alignment (VLA) has gained a lot of attention since CLIP's groundbreaking work. \nAlthough CLIP performs well, the typical direct latent feature alignment lacks clarity in its representation and similarity scores. \nOn the other hand, lexical representation, a vector whose element represents the similarity between the sample and a word from the vocabulary, is a natural sparse representation and interpretable, providing exact matches for individual words.\nHowever, lexical representations are difficult to learn due to no ground-truth supervision and false-discovery issues, and thus requires complex design to train effectively.\nIn this paper, we introduce LexVLA, a more interpretable VLA framework by learning a unified lexical representation for both modalities without complex design. \nWe use DINOv2 as our visual model for its local-inclined features and Llama 2, a generative language model, to leverage its in-context lexical prediction ability.\nTo avoid the false discovery, we propose an overuse penalty to refrain the lexical representation from falsely frequently activating meaningless words.\nWe demonstrate that these two pre-trained uni-modal models can be well-aligned by fine-tuning on the modest multi-modal dataset and avoid intricate training configurations. \nOn cross-modal retrieval benchmarks, LexVLA, trained on the CC-12M multi-modal dataset, outperforms baselines fine-tuned on larger datasets (e.g., YFCC15M) and those trained from scratch on even bigger datasets (e.g., 1.1B data, including CC-12M).\nWe conduct extensive experiments to analyze LexVLA. \nCodes are available at https://github.com/Clementine24/LexVLA.", "keywords": "Multi-modal;Alignment;Retrieval;Sparse Retrieval;Lexical Representation", "primary_area": "machine_vision", "supplementary_material": "", "author": "Yifan Li;Yikai Wang;Yanwei Fu;Dongyu Ru;Zheng Zhang;Tong He", "authorids": "~Yifan_Li4;~Yikai_Wang1;~Yanwei_Fu2;~Dongyu_Ru1;~Zheng_Zhang1;~Tong_He5", "gender": "F;M;M;;M;M", "homepage": "https://github.com/Clementine24;https://yikai-wang.github.io;http://yanweifu.github.io;;https://shanghai.nyu.edu/academics/faculty/directory/zheng-zhang;https://hetong007.github.io/", "dblp": ";85/9555-2;63/9065;;;02/1554-2", "google_scholar": ";x8HOE_cAAAAJ;https://scholar.google.co.uk/citations?user=Vg54TcsAAAAJ;;https://scholar.google.com.hk/citations?user=k0KiE4wAAAAJ;hV5D8GYAAAAJ", "orcid": ";0000-0001-6107-5063;0000-0002-6595-6893;;;", "linkedin": ";;;;;", "or_profile": "~Yifan_Li4;~Yikai_Wang1;~Yanwei_Fu2;~Dongyu_Ru1;~Zheng_Zhang1;~Tong_He5", "aff": "Fudan University;Fudan University;Fudan University,;;Amazon;Amazon", "aff_domain": "fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;;amazon.com;amazon.com", "position": "MS student;PhD student;Professor;;Senior Principal Scientist;Researcher", "bibtex": "@inproceedings{\nli2024unified,\ntitle={Unified Lexical Representation for Interpretable Visual-Language Alignment},\nauthor={Yifan Li and Yikai Wang and Yanwei Fu and Dongyu Ru and Zheng Zhang and Tong He},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=xoCFd1WKpf}\n}", "github": "", "reviewers": "CyPa;a8Qd;KFJh", "pdf_size": 9566604, "rating": "5;5;6", "confidence": "4;4;3", "soundness": "3;3;3", "novelty": "2;2;3", "presentation": "3;3;3", "wc_summary": "116;65;70", "wc_strengths": "34;52;61", "wc_weaknesses": "150;149;21", "wc_questions": "325;7;18", "wc_limitations": "1;1;17", "wc_review": "626;274;187", "wc_reply_reviewers": "40;98;12", "wc_reply_authors": "0;258;0", "reply_reviewers": "1;1;1", "reply_authors": "1;3;1", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 83.66666666666667, 22.954060400915758 ], "wc_strengths_avg": [ 49.0, 11.224972160321824 ], "wc_weaknesses_avg": [ 106.66666666666667, 60.576856600153455 ], "wc_questions_avg": [ 116.66666666666667, 147.382344789176 ], "wc_limitations_avg": [ 6.333333333333333, 7.542472332656507 ], "wc_review_avg": [ 362.3333333333333, 189.79345498608626 ], "wc_reply_reviewers_avg": [ 50.0, 35.81433604950212 ], "wc_reply_authors_avg": [ 86.0, 121.62236636408618 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.9999999999999997, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3683391059127525330&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 6, "email": "fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;;amazon.com;amazon.com", "author_num": 6, "aff_unique_index": "0;0;0;1;1", "aff_unique_norm": "Fudan University;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.fudan.edu.cn;https://www.amazon.com", "aff_unique_abbr": "Fudan;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;1", "aff_country_unique": "China;United States" }, { "title": "Evaluate then Cooperate: Shapley-based View Cooperation Enhancement for Multi-view Clustering", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93062", "id": "xoc4QOvbDs", "proceeding": "", "pdf": "https://openreview.net/pdf?id=xoc4QOvbDs", "openreview": "https://openreview.net/forum?id=xoc4QOvbDs", "poster": "", "project": "", "author_site": "Fangdi Wang, Jiaqi Jin, Jingtao Hu, Suyuan Liu, Xihong Yang, Siwei Wang, Xinwang Liu, En Zhu", "tldr": "", "abstract": "The fundamental goal of deep multi-view clustering is to achieve preferable task performance through inter-view cooperation. Although numerous DMVC approaches have been proposed, the collaboration role of individual views have not been well investigated in existing literature. Moreover, how to further enhance view cooperation for better fusion still needs to be explored. In this paper, we firstly consider DMVC as an unsupervised cooperative game where each view can be regarded as a participant. Then, we introduce the Shapley value and propose a novel MVC framework termed Shapley-based Cooperation Enhancing Multi-view Clustering (SCE-MVC), which evaluates view cooperation with game theory. Specially, we employ the optimal transport distance between fused cluster distributions and single view component as the utility function for computing shapley values. Afterwards, we apply shapley values to assess the contribution of each view and utilize these contributions to promote view cooperation. Comprehensive experimental results well support the effectiveness of our framework adopting to existing DMVC frameworks, demonstrating the importance and necessity of enhancing the cooperation among views.", "keywords": "Multi-view Clustering;Clustering Network", "primary_area": "learning_theory", "supplementary_material": "", "author": "Fangdi Wang;Jiaqi Jin;Jingtao Hu;Suyuan Liu;Xihong Yang;Siwei Wang;Xinwang Liu;En Zhu", "authorids": "~Fangdi_Wang1;~Jiaqi_Jin1;~Jingtao_Hu1;~Suyuan_Liu1;~Xihong_Yang1;~Siwei_Wang4;~Xinwang_Liu1;~En_Zhu1", "gender": "M;M;F;M;M;M;M;M", "homepage": ";https://jinjiaqi1998.github.io/;https://hujingtao.github.io/;https://tracesource.github.io//;https://github.com/xihongyang1999;https://wangsiwei2010.github.io/;https://xinwangliu.github.io/;https://www.researchgate.net/profile/En_Zhu", "dblp": ";198/6163.html;;227/5361;309/8286.html;51/8279-1;45/6569-2.html;30/1307", "google_scholar": ";;;https://scholar.google.com/citations?hl=zh-CN;Jx7E9xoAAAAJ;5o9hK3EAAAAJ;A56vWC4AAAAJ;", "orcid": "0009-0004-5179-862X;0009-0000-0510-4472;;0000-0003-1481-5393;;0000-0001-9517-262X;;", "linkedin": ";;;;;;;", "or_profile": "~Fangdi_Wang1;~Jiaqi_Jin1;~Jingtao_Hu1;~Suyuan_Liu1;~Xihong_Yang1;~Siwei_Wang4;~Xinwang_Liu1;~En_Zhu1", "aff": "National University of Defense Technology;National University of Defense Technology;National University of Defense Technology;National University of Defense Technology;National University of Defense Technology;Intelligent Game and Decision Lab;National University of Defense Technology;National University of Defense Technology", "aff_domain": "nudt.edu.cn;nudt.edu.cn;nudt.edu.cn;nudt.edu.cn;nudt.edu.cn;nudt.edu.cn;nudt.edu.cn;nudt.edu.cn", "position": "MS student;PhD student;PhD student;PhD student;PhD student;Assistant Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nwang2024evaluate,\ntitle={Evaluate then Cooperate: Shapley-based View Cooperation Enhancement for Multi-view Clustering},\nauthor={Fangdi Wang and Jiaqi Jin and Jingtao Hu and Suyuan Liu and Xihong Yang and Siwei Wang and Xinwang Liu and En Zhu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=xoc4QOvbDs}\n}", "github": "", "reviewers": "CDST;v5TM;URj2;zda3;TWtF", "pdf_size": 1218256, "rating": "6;6;6;6;7", "confidence": "4;4;5;5;5", "soundness": "4;3;3;2;3", "novelty": "3;3;3;3;4", "presentation": "3;2;3;3;3", "wc_summary": "65;39;32;56;59", "wc_strengths": "50;30;41;33;38", "wc_weaknesses": "83;147;50;43;29", "wc_questions": "27;63;42;37;62", "wc_limitations": "1;15;15;15;1", "wc_review": "226;294;180;184;189", "wc_reply_reviewers": "24;0;0;52;23", "wc_reply_authors": "22;0;0;0;17", "reply_reviewers": "1;0;0;2;1", "reply_authors": "2;1;1;1;2", "rating_avg": [ 6.2, 0.39999999999999997 ], "confidence_avg": [ 4.6, 0.48989794855663565 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 3.2, 0.39999999999999997 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 50.2, 12.544321424453376 ], "wc_strengths_avg": [ 38.4, 6.945502141674136 ], "wc_weaknesses_avg": [ 70.4, 42.20710840604933 ], "wc_questions_avg": [ 46.2, 14.161920773680384 ], "wc_limitations_avg": [ 9.4, 6.858571279792899 ], "wc_review_avg": [ 214.6, 42.94927240361587 ], "wc_reply_reviewers_avg": [ 19.8, 19.229144546755066 ], "wc_reply_authors_avg": [ 7.8, 9.682974749528162 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.4, 0.4898979485566356 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.408248290463863, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18042996102372286958&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "email": "nudt.edu.cn;nudt.edu.cn;nudt.edu.cn;nudt.edu.cn;nudt.edu.cn;nudt.edu.cn;nudt.edu.cn;nudt.edu.cn", "author_num": 8, "aff_unique_index": "0;0;0;0;0;1;0;0", "aff_unique_norm": "National University of Defense Technology;Intelligent Game and Decision Lab", "aff_unique_dep": ";Intelligent Game and Decision Lab", "aff_unique_url": "http://www.nudt.edu.cn/;", "aff_unique_abbr": "NUDT;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China;" }, { "title": "LLM-ESR: Large Language Models Enhancement for Long-tailed Sequential Recommendation", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93061", "id": "xojbzSYIVS", "proceeding": "", "pdf": "https://openreview.net/pdf?id=xojbzSYIVS", "openreview": "https://openreview.net/forum?id=xojbzSYIVS", "poster": "/media/PosterPDFs/NeurIPS%202024/93061.png?t=1733110218.456418", "project": "", "author_site": "Qidong Liu, Xian Wu, Yejing Wang, Zijian Zhang, Feng Tian, Yefeng Zheng, Xiangyu Zhao", "tldr": "", "abstract": "Sequential recommender systems (SRS) aim to predict users' subsequent choices based on their historical interactions and have found applications in diverse fields such as e-commerce and social media. However, in real-world systems, most users interact with only a handful of items, while the majority of items are seldom consumed. These two issues, known as the long-tail user and long-tail item challenges, often pose difficulties for existing SRS. These challenges can adversely affect user experience and seller benefits, making them crucial to address. Though a few works have addressed the challenges, they still struggle with the seesaw or noisy issues due to the intrinsic scarcity of interactions. The advancements in large language models (LLMs) present a promising solution to these problems from a semantic perspective. As one of the pioneers in this field, we propose the Large Language Models Enhancement framework for Sequential Recommendation (LLM-ESR). This framework utilizes semantic embeddings derived from LLMs to enhance SRS without adding extra inference load. To address the long-tail item challenge, we design a dual-view modeling framework that combines semantics from LLMs and collaborative signals from conventional SRS. For the long-tail user challenge, we propose a retrieval augmented self-distillation method to enhance user preference representation using more informative interactions from similar users. To verify the effectiveness and versatility of our proposed enhancement framework, we conduct extensive experiments on three real-world datasets using three popular SRS models. The results consistently show that our method surpasses existing baselines. The implementation code is available in Supplementary Material.", "keywords": "Large Language Models;Recommender Systems;Sequential Recommendation;Long-tail", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "/attachment/ad6992ebf420615a23e54af75c634dd6af73f8c0.zip", "author": "Qidong Liu;Xian Wu;Yejing Wang;Zijian Zhang;Feng Tian;Yefeng Zheng;Xiangyu Zhao", "authorids": "~Qidong_Liu2;~Xian_Wu1;~Yejing_Wang1;~Zijian_Zhang5;~Feng_Tian4;~Yefeng_Zheng3;~Xiangyu_Zhao1", "gender": "M;M;M;M;;;M", "homepage": "https://liuqidong07.github.io/;;;https://zhangzj2114.github.io/;;;https://zhaoxyai.github.io/", "dblp": "254/1779;03/5595;;43/6524-9.html;;;08/890-1.html", "google_scholar": "pX_HTHIAAAAJ;lslB5jkAAAAJ;;8_W0Xi4AAAAJ;;;", "orcid": "0000-0002-0751-2602;0000-0003-1118-9710;;0000-0003-1194-8334;;;0000-0003-2926-4416", "linkedin": ";;;;;;", "or_profile": "~Qidong_Liu2;~Xian_Wu1;~Yejing_Wang1;~Zijian_Zhang5;~Feng_Tian4;~Yefeng_Zheng3;~Xiangyu_Zhao1", "aff": "Xi'an Jiaotong University;Tencent;City University of Hong Kong;City University of Hong Kong;;;City University of Hong Kong", "aff_domain": "xjtu.edu.cn;tencent.com;cityu.edu.hk;cityu.edu.hk;;;cityu.edu.hk", "position": "PhD student;Principal Researcher;PhD student;PhD student;;;Assistant Professor", "bibtex": "@inproceedings{\nliu2024llmesr,\ntitle={{LLM}-{ESR}: Large Language Models Enhancement for Long-tailed Sequential Recommendation},\nauthor={Qidong Liu and Xian Wu and Yejing Wang and Zijian Zhang and Feng Tian and Yefeng Zheng and Xiangyu Zhao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=xojbzSYIVS}\n}", "github": "", "reviewers": "V8zP;Bjnq;wsRH", "pdf_size": 5923259, "rating": "6;7;7", "confidence": "4;4;4", "soundness": "3;3;4", "novelty": "2;3;3", "presentation": "3;3;3", "wc_summary": "113;63;74", "wc_strengths": "28;74;85", "wc_weaknesses": "41;46;69", "wc_questions": "87;36;61", "wc_limitations": "1;29;14", "wc_review": "270;248;303", "wc_reply_reviewers": "12;17;28", "wc_reply_authors": "14;24;75", "reply_reviewers": "1;1;1", "reply_authors": "2;2;3", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 83.33333333333333, 21.452790546272116 ], "wc_strengths_avg": [ 62.333333333333336, 24.689178916188272 ], "wc_weaknesses_avg": [ 52.0, 12.192894105447921 ], "wc_questions_avg": [ 61.333333333333336, 20.821996915655223 ], "wc_limitations_avg": [ 14.666666666666666, 11.440668201153676 ], "wc_review_avg": [ 273.6666666666667, 22.60285134421958 ], "wc_reply_reviewers_avg": [ 19.0, 6.683312551921141 ], "wc_reply_authors_avg": [ 37.666666666666664, 26.71246067953223 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15474415300954799686&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "xjtu.edu.cn;tencent.com;cityu.edu.hk;cityu.edu.hk;;;cityu.edu.hk", "author_num": 7, "aff_unique_index": "0;1;2;2;2", "aff_unique_norm": "Xi'an Jiao Tong University;Tencent;City University of Hong Kong", "aff_unique_dep": ";Tencent Holdings Limited;", "aff_unique_url": "https://www.xjtu.edu.cn;https://www.tencent.com;https://www.cityu.edu.hk", "aff_unique_abbr": "XJTU;Tencent;CityU", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "MLLM-CompBench: A Comparative Reasoning Benchmark for Multimodal LLMs", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97438", "id": "xotfLEAF4u", "proceeding": "", "pdf": "https://openreview.net/pdf?id=xotfLEAF4u", "openreview": "https://openreview.net/forum?id=xotfLEAF4u", "poster": "", "project": "", "author_site": "Jihyung Kil, Zheda Mai, Justin Lee, Zihe Wang, Kerrie Cheng, Lemeng Wang, Ye Liu, Arpita Chowdhury, Wei-Lun (Harry) Chao", "tldr": "", "abstract": "The ability to compare objects, scenes, or situations is crucial for effective decision-making and problem-solving in everyday life. For instance, comparing the freshness of apples enables better choices during grocery shopping, while comparing sofa designs helps optimize the aesthetics of our living space. Despite its significance, the comparative capability is largely unexplored in artificial general intelligence (AGI). In this paper, we introduce MLLM-CompBench, a benchmark designed to evaluate the comparative reasoning capability of multimodal large language models (MLLMs). MLLM-CompBench mines and pairs images through visually oriented questions covering eight dimensions of relative comparison: visual attribute, existence, state, emotion, temporality, spatiality, quantity, and quality. We curate a collection of around 40K image pairs using metadata from diverse vision datasets and CLIP similarity scores. These image pairs span a broad array of visual domains, including animals, fashion, sports, and both outdoor and indoor scenes. The questions are carefully crafted to discern relative characteristics between two images and are labeled by human annotators for accuracy and relevance. We use MLLM-CompBench to evaluate recent MLLMs, including GPT-4V(ision), Gemini-Pro, and LLaVA-1.6. Our results reveal notable shortcomings in their comparative abilities. We believe MLLM-CompBench not only sheds light on these limitations but also establishes a solid foundation for future enhancements in the comparative capability of MLLMs.", "keywords": "Benchmark;Comparison;Multimodal LLMs;Relativity;Vision-Language", "primary_area": "", "supplementary_material": "/attachment/38219c8ad8568d7c012375583226802c1cc4500c.pdf", "author": "Jihyung Kil;Zheda Mai;Justin Lee;Arpita Chowdhury;Zihe Wang;Kerrie Cheng;Lemeng Wang;Ye Liu;Wei-Lun Chao", "authorids": "~Jihyung_Kil1;~Zheda_Mai1;~Justin_Lee3;~Arpita_Chowdhury1;~Zihe_Wang4;~Kerrie_Cheng1;~Lemeng_Wang1;~Ye_Liu15;~Wei-Lun_Chao1", "gender": "M;M;M;F;M;F;F;;M", "homepage": "https://heendung.github.io/;https://zheda-mai.github.io/;;;;;https://lemeng-personal-website.web.app/;;https://sites.google.com/view/wei-lun-harry-chao", "dblp": "213/1319;270/0552;;;;;;;64/8842", "google_scholar": "C3O0uxcAAAAJ;FT3oT6EAAAAJ;;7eHA9IAAAAAJ;;;R_IqFp4AAAAJ;;PGKakWwAAAAJ", "orcid": ";;;;0009-0008-3277-3370;;;0009-0002-7093-192X;0000-0003-1269-7231", "linkedin": ";;justinhylee135/;arpita-chowdhury/;zihe-wang-a618552b9;kerriecheng/;lemeng-wang-8b3b98251/;;", "or_profile": "~Jihyung_Kil1;~Zheda_Mai1;~Justin_Lee3;~Arpita_Chowdhury1;~Zihe_Wang4;~Kerrie_Cheng1;~Lemeng_Wang1;~Ye_Liu15;~Wei-Lun_Chao1", "aff": "The Ohio State University;Ohio State University, Columbus;Ohio State University, Columbus;Ohio State University, Columbus;Ohio State University, Columbus;Ohio State University, Columbus;Ohio State University, Columbus;Ohio State University, Columbus;Ohio State University", "aff_domain": "osu.edu;osu.edu;ohio-state.edu;osu.edu;osu.edu;osu.edu;osu.edu;osu.edu;osu.edu", "position": "PhD student;PhD student;Undergrad student;PhD student;Undergrad student;Undergrad student;Undergrad student;Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\nkil2024mllmcompbench,\ntitle={{MLLM}-CompBench: A Comparative Reasoning Benchmark for Multimodal {LLM}s},\nauthor={Jihyung Kil and Zheda Mai and Justin Lee and Arpita Chowdhury and Zihe Wang and Kerrie Cheng and Lemeng Wang and Ye Liu and Wei-Lun Chao},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=xotfLEAF4u}\n}", "github": "", "reviewers": "xgQo;MVH6;V7ua;Z9Lw", "pdf_size": 19562624, "rating": "6;6;6;9", "confidence": "3;3;4;4", "wc_summary_and_contributions": "170;65;105;36", "wc_strengths": "25;104;41;36", "wc_improvement": "31;82;5;1", "wc_limitations": "1;1;18;40", "wc_correctness": "1;14;1;1", "wc_clarity": "1;5;1;1", "wc_relation_to_prior_work": "1;11;1;1", "wc_documentation": "1;1;1;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "232;284;174;118", "wc_reply_reviewers": "9;9;26;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "wc_summary_and_contributions_avg": [ 94.0, 50.25435304528355 ], "wc_strengths_avg": [ 51.5, 30.858548248418945 ], "wc_improvement_avg": [ 29.75, 32.29067202769246 ], "wc_limitations_avg": [ 15.0, 16.015617378046965 ], "wc_correctness_avg": [ 4.25, 5.629165124598851 ], "wc_clarity_avg": [ 2.0, 1.7320508075688772 ], "wc_relation_to_prior_work_avg": [ 3.5, 4.330127018922194 ], "wc_documentation_avg": [ 1.0, 0.0 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 202.0, 62.177166226839255 ], "wc_reply_reviewers_avg": [ 11.0, 9.40744386111339 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4115881218324546507&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 0, "email": "osu.edu;osu.edu;ohio-state.edu;osu.edu;osu.edu;osu.edu;osu.edu;osu.edu;osu.edu", "author_num": 9, "aff_unique_index": "0;0;0;0;0;0;0;0;0", "aff_unique_norm": "Ohio State University", "aff_unique_dep": "", "aff_unique_url": "https://www.osu.edu", "aff_unique_abbr": "OSU", "aff_campus_unique_index": "1;1;1;1;1;1;1", "aff_campus_unique": ";Columbus", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Scene Graph Generation with Role-Playing Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93060", "id": "xpRUi8amtC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=xpRUi8amtC", "openreview": "https://openreview.net/forum?id=xpRUi8amtC", "poster": "/media/PosterPDFs/NeurIPS%202024/93060.png?t=1729676888.693572", "project": "", "author_site": "Guikun Chen, Jin Li, Wenguan Wang", "tldr": "", "abstract": "Current approaches for open-vocabulary scene graph generation (OVSGG) use vision-language models such as CLIP and follow a standard zero-shot pipeline \u2013 computing similarity between the query image and the text embeddings for each category (i.e., text classifiers). In this work, we argue that the text classifiers adopted by existing OVSGG methods, i.e., category-/part-level prompts, are scene-agnostic as they remain unchanged across contexts. Using such fixed text classifiers not only struggles to model visual relations with high variance, but also falls short in adapting to distinct contexts. To plug these intrinsic shortcomings, we devise SDSGG, a scene-specific description based OVSGG framework where the weights of text classifiers are adaptively adjusted according to the visual content. In particular, to generate comprehensive and diverse descriptions oriented to the scene, an LLM is asked to play different roles (e.g., biologist and engineer) to analyze and discuss the descriptive features of a given scene from different views. Unlike previous efforts simply treating the generated descriptions as mutually equivalent text classifiers, SDSGG is equipped with an advanced renormalization mechanism to adjust the influence of each text classifier based on its relevance to the presented scene (this is what the term \u201cspecific\u201d means). Furthermore, to capture the complicated interplay between subjects and objects, we propose a new lightweight module called mutual visual adapter. It refines CLIP\u2019s ability to recognize relations by learning an interaction-aware semantic space. Extensive experiments on prevalent benchmarks show that SDSGG significantly outperforms top-leading methods.", "keywords": "Scene Graph Generation;Large Language Model", "primary_area": "machine_vision", "supplementary_material": "", "author": "Guikun Chen;Jin Li;Wenguan Wang", "authorids": "~Guikun_Chen1;~Jin_Li23;~Wenguan_Wang4", "gender": "M;M;M", "homepage": "https://guikunchen.github.io/;https://ln172.github.io/;https://sites.google.com/view/wenguanwang/", "dblp": "342/9515;;145/1078", "google_scholar": "I1TOdpkAAAAJ;;CqAQQkgAAAAJ", "orcid": ";;0000-0002-0802-9567", "linkedin": ";;wenguanwang", "or_profile": "~Guikun_Chen1;~Jin_Li23;~Wenguan_Wang4", "aff": "Zhejiang University;Changsha University of Science and Technology;Zhejiang University", "aff_domain": "zju.edu.cn;csust.edu.cn;zju.edu.cn", "position": "PhD student;Undergrad student;Full Professor", "bibtex": "@inproceedings{\nchen2024scene,\ntitle={Scene Graph Generation with Role-Playing Large Language Models},\nauthor={Guikun Chen and Jin Li and Wenguan Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=xpRUi8amtC}\n}", "github": "", "reviewers": "xMLQ;RK4c;qa1m", "pdf_size": 2381087, "rating": "6;6;6", "confidence": "3;4;5", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;3;3", "wc_summary": "90;91;58", "wc_strengths": "51;55;64", "wc_weaknesses": "381;42;145", "wc_questions": "2;5;66", "wc_limitations": "7;1;1", "wc_review": "531;194;334", "wc_reply_reviewers": "19;12;26", "wc_reply_authors": "31;25;112", "reply_reviewers": "1;1;1", "reply_authors": "2;2;3", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 79.66666666666667, 15.326085243430198 ], "wc_strengths_avg": [ 56.666666666666664, 5.436502143433364 ], "wc_weaknesses_avg": [ 189.33333333333334, 141.90215721482963 ], "wc_questions_avg": [ 24.333333333333332, 29.48822740612863 ], "wc_limitations_avg": [ 3.0, 2.8284271247461903 ], "wc_review_avg": [ 353.0, 138.23410095438342 ], "wc_reply_reviewers_avg": [ 19.0, 5.715476066494082 ], "wc_reply_authors_avg": [ 56.0, 39.67366884975475 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=598298628589081407&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "zju.edu.cn;csust.edu.cn;zju.edu.cn", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Zhejiang University;Changsha University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.zju.edu.cn;http://www.csust.edu.cn", "aff_unique_abbr": "ZJU;CSUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Is Programming by Example Solved by LLMs?", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93059", "id": "xqc8yyhScL", "proceeding": "", "pdf": "https://openreview.net/pdf?id=xqc8yyhScL", "openreview": "https://openreview.net/forum?id=xqc8yyhScL", "poster": "", "project": "", "author_site": "Wen-Ding Li, Kevin Ellis", "tldr": "", "abstract": "Programming-by-Examples (PBE) aims to generate an algorithm from input-output examples.\nSuch systems are practically and theoretically important:\nfrom an end-user perspective, they are deployed to millions of people, and from an AI perspective, PBE corresponds to a very general form of few-shot inductive inference.\nGiven the success of Large Language Models (LLMs) in code-generation tasks, we investigate here the extent to which LLMs can be said to have \"solved\" PBE.\nWe experiment on classic domains such as lists and strings, and an uncommon graphics programming domain not well represented in typical pretraining data.\nWe find that pretrained models are not effective at PBE, but that they can be fine-tuned for much higher performance, provided the test problems are in-distribution.\nWe analyze empirically what causes these models to succeed and fail, and take steps toward understanding how to achieve better out-of-distribution generalization.\nCollectively these results suggest that LLMs make strong progress toward solving the typical suite of PBE tasks, potentially increasing the flexibility and applicability of PBE systems, while also identifying ways in which LLMs still fall short.", "keywords": "programming by example;program synthesis;LLM;code generation", "primary_area": "generative_models", "supplementary_material": "", "author": "Wen-Ding Li;Kevin Ellis", "authorids": "~Wen-Ding_Li1;~Kevin_Ellis1", "gender": ";M", "homepage": "https://www.cs.cornell.edu/~wdli/;https://www.cs.cornell.edu/~ellisk/", "dblp": "132/0674;", "google_scholar": "2G2mr9QAAAAJ;L7XI6asAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Wen-Ding_Li1;~Kevin_Ellis1", "aff": "Cornell University;Cornell University", "aff_domain": "cornell.edu;cornell.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nli2024is,\ntitle={Is Programming by Example solved by {LLM}s?},\nauthor={Wen-Ding Li and Kevin Ellis},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=xqc8yyhScL}\n}", "github": "", "reviewers": "k14y;e1ch;mgYE;CppC", "pdf_size": 3204269, "rating": "5;5;6;7", "confidence": "4;3;5;3", "soundness": "3;2;3;3", "novelty": "2;2;2;3", "presentation": "3;3;4;3", "wc_summary": "55;45;139;97", "wc_strengths": "21;36;34;90", "wc_weaknesses": "118;138;541;51", "wc_questions": "31;81;287;72", "wc_limitations": "1;1;1;1", "wc_review": "226;301;1002;311", "wc_reply_reviewers": "16;0;834;0", "wc_reply_authors": "12;0;721;0", "reply_reviewers": "1;0;2;0", "reply_authors": "2;1;3;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 84.0, 37.26929030716845 ], "wc_strengths_avg": [ 45.25, 26.47050245084139 ], "wc_weaknesses_avg": [ 212.0, 192.66162046448173 ], "wc_questions_avg": [ 117.75, 99.51727237017703 ], "wc_limitations_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 460.0, 314.64344900219993 ], "wc_reply_reviewers_avg": [ 212.5, 358.8826409844867 ], "wc_reply_authors_avg": [ 183.25, 310.50875591519156 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.0909090909090909, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17300001981086537608&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "cornell.edu;cornell.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Cornell University", "aff_unique_dep": "", "aff_unique_url": "https://www.cornell.edu", "aff_unique_abbr": "Cornell", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Data curation via joint example selection further accelerates multimodal learning", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97437", "id": "xqpkzMfmQ5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=xqpkzMfmQ5", "openreview": "https://openreview.net/forum?id=xqpkzMfmQ5", "poster": "", "project": "", "author_site": "Talfan Evans, Nikhil Parthasarathy, Hamza Merzic, Olivier Henaff", "tldr": "", "abstract": "Data curation is an essential component of large-scale pretraining. In this work, we demonstrate that jointly prioritizing batches of data is more effective for learning than selecting examples independently. Multimodal contrastive objectives expose the dependencies between data and thus naturally yield criteria for measuring the joint learnability of a batch. We derive a simple and tractable algorithm for selecting such batches, which significantly accelerate training beyond individually-prioritized data points. As performance improves by selecting from large super-batches, we also leverage recent advances in model approximation to reduce the computational overhead of scoring. As a result, our approach\u2014multimodal contrastive learning with joint example selection (JEST)\u2014surpasses state-of-the-art pretraining methods with up to 13\u00d7 fewer iterations and 10\u00d7 less computation. Essential to the performance of JEST is the ability to steer the data selection process towards the distribution of smaller, well-curated datasets via pretrained reference models, exposing data curation as a new dimension for neural scaling laws.", "keywords": "Data curation;pretraining;contrastive learning;multimodal learning", "primary_area": "", "supplementary_material": "", "author": "Talfan Evans;Nikhil Parthasarathy;Hamza Merzic;Olivier J Henaff", "authorids": "~Talfan_Evans1;~Nikhil_Parthasarathy1;~Hamza_Merzic1;~Olivier_J_Henaff1", "gender": "M;M;M;", "homepage": "http://www.talfanevans.co.uk;;;https://www.olivierhenaff.com/", "dblp": ";209/4951;203/4563;156/0035.html", "google_scholar": ";X9mO4ckAAAAJ;;Sx75CVsAAAAJ", "orcid": ";;;0000-0001-8183-9489", "linkedin": ";nikparth/;;", "or_profile": "~Talfan_Evans1;~Nikhil_Parthasarathy1;~Hamza_Merzic1;~Olivier_J_Henaff1", "aff": "Google DeepMind;Google DeepMind;Google DeepMind;Google DeepMind", "aff_domain": "deepmind.com;google.com;google.com;google.com", "position": "Researcher;Researcher;Research Engineer;Research Scientist", "bibtex": "@inproceedings{\nevans2024data,\ntitle={Data curation via joint example selection further accelerates multimodal learning},\nauthor={Talfan Evans and Nikhil Parthasarathy and Hamza Merzic and Olivier J Henaff},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=xqpkzMfmQ5}\n}", "github": "", "reviewers": "YMd9;dJ5D;fv2x;FvDN", "pdf_size": 1757071, "rating": "7;7;7;8", "confidence": "2;4;4;3", "wc_summary_and_contributions": "103;94;48;49", "wc_strengths": "62;4;37;16", "wc_improvement": "3;38;3;3", "wc_limitations": "20;1;51;9", "wc_correctness": "9;1;31;10", "wc_clarity": "6;1;9;5", "wc_relation_to_prior_work": "14;1;10;15", "wc_documentation": "12;1;22;12", "wc_additional_feedback": "1;1;1;1", "wc_review": "230;142;212;120", "wc_reply_reviewers": "14;16;23;26", "wc_reply_authors": "48;38;46;38", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "wc_summary_and_contributions_avg": [ 73.5, 25.20416632225712 ], "wc_strengths_avg": [ 29.75, 22.049659861322123 ], "wc_improvement_avg": [ 11.75, 15.155444566227676 ], "wc_limitations_avg": [ 20.25, 18.9917745353087 ], "wc_correctness_avg": [ 12.75, 11.098986440211556 ], "wc_clarity_avg": [ 5.25, 2.8613807855648994 ], "wc_relation_to_prior_work_avg": [ 10.0, 5.522680508593631 ], "wc_documentation_avg": [ 11.75, 7.428828979051813 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 176.0, 46.10856753359401 ], "wc_reply_reviewers_avg": [ 19.75, 4.9180788932265 ], "wc_reply_authors_avg": [ 42.5, 4.55521678957215 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12993419392714536115&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "deepmind.com;google.com;google.com;google.com", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google DeepMind", "aff_unique_url": "https://deepmind.com", "aff_unique_abbr": "DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Approximated Orthogonal Projection Unit: Stabilizing Regression Network Training Using Natural Gradient", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93058", "id": "xqrlhsbcwN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=xqrlhsbcwN", "openreview": "https://openreview.net/forum?id=xqrlhsbcwN", "poster": "", "project": "", "author_site": "Shaoqi Wang, Chunjie Yang, Siwei Lou", "tldr": "", "abstract": "Neural networks (NN) are extensively studied in cutting-edge soft sensor models due to their feature extraction and function approximation capabilities. Current research into network-based methods primarily focuses on models' offline accuracy. Notably, in industrial soft sensor context, online optimizing stability and interpretability are prioritized, followed by accuracy. This requires a clearer understanding of network's training process. To bridge this gap, we propose a novel NN named the Approximated Orthogonal Projection Unit (AOPU) which has solid mathematical basis and presents superior training stability. AOPU truncates the gradient backpropagation at dual parameters, optimizes the trackable parameters updates, and enhances the robustness of training. We further prove that AOPU attains minimum variance estimation in NN, wherein the truncated gradient approximates the natural gradient. Empirical results on two chemical process datasets clearly show that AOPU outperforms other models in achieving stable convergence, marking a significant advancement in soft sensor field.", "keywords": "Neural networks;network's structure design;minimum variance estimation;online learning;training stability;natural gradient;soft sensor", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "ShaoQi Wang;Chunjie Yang;Siwei Lou", "authorids": "~ShaoQi_Wang2;~Chunjie_Yang1;~Siwei_Lou1", "gender": "M;M;M", "homepage": ";https://person.zju.edu.cn/cjyang;https://www.researchgate.net/profile/Siwei-Lou-2", "dblp": "156/4566;72/1298;213/0462", "google_scholar": ";https://scholar.google.com.hk/citations?user=Dji0R9YAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-2900-238X;;", "linkedin": ";;", "or_profile": "~ShaoQi_Wang2;~Chunjie_Yang1;~Siwei_Lou1", "aff": "Zhejiang University;Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn;zju.edu.cn", "position": "PhD student;Full Professor;PhD student", "bibtex": "@inproceedings{\nwang2024approximated,\ntitle={Approximated Orthogonal Projection Unit: Stabilizing Regression Network Training Using Natural Gradient},\nauthor={ShaoQi Wang and Chunjie Yang and Siwei Lou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=xqrlhsbcwN}\n}", "github": "", "reviewers": "SnqU;8Lrh;PyNt", "pdf_size": 10056063, "rating": "5;5;6", "confidence": "3;3;3", "soundness": "2;3;3", "novelty": "2;3;3", "presentation": "2;3;3", "wc_summary": "74;94;77", "wc_strengths": "24;54;106", "wc_weaknesses": "83;105;73", "wc_questions": "138;9;149", "wc_limitations": "1;36;5", "wc_review": "320;298;410", "wc_reply_reviewers": "178;175;87", "wc_reply_authors": "1204;410;980", "reply_reviewers": "3;1;2", "reply_authors": "6;5;6", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 81.66666666666667, 8.806563209081938 ], "wc_strengths_avg": [ 61.333333333333336, 33.87558937576667 ], "wc_weaknesses_avg": [ 87.0, 13.366625103842281 ], "wc_questions_avg": [ 98.66666666666667, 63.56274240639891 ], "wc_limitations_avg": [ 14.0, 15.641824275533422 ], "wc_review_avg": [ 342.6666666666667, 48.45157949495099 ], "wc_reply_reviewers_avg": [ 146.66666666666666, 42.20847729491737 ], "wc_reply_authors_avg": [ 864.6666666666666, 334.2507375542432 ], "reply_reviewers_avg": [ 2.0, 0.816496580927726 ], "reply_authors_avg": [ 5.666666666666667, 0.4714045207910317 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13060007466886383359&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "zju.edu.cn;zju.edu.cn;zju.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Zhejiang University", "aff_unique_dep": "", "aff_unique_url": "https://www.zju.edu.cn", "aff_unique_abbr": "ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Inverse Factorized Soft Q-Learning for Cooperative Multi-agent Imitation Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93057", "id": "xrbgXJomJp", "proceeding": "", "pdf": "https://openreview.net/pdf?id=xrbgXJomJp", "openreview": "https://openreview.net/forum?id=xrbgXJomJp", "poster": "/media/PosterPDFs/NeurIPS%202024/93057.png?t=1731697393.379903", "project": "", "author_site": "The Viet Bui, Tien Mai, Thanh Nguyen", "tldr": "", "abstract": "This paper concerns imitation learning (IL) in cooperative multi-agent systems.\nThe learning problem under consideration poses several challenges, characterized by high-dimensional state and action spaces and intricate inter-agent dependencies. In a single-agent setting, IL was shown to be done efficiently via an inverse soft-Q learning process. However, extending this framework to a multi-agent context introduces the need to simultaneously learn both local value functions to capture local observations and individual actions, and a joint value function for exploiting centralized learning.\nIn this work, we introduce a new multi-agent IL algorithm designed to address these challenges. Our approach enables the\ncentralized learning by leveraging mixing networks to aggregate decentralized Q functions.\nWe further establish conditions for the mixing networks under which the multi-agent IL objective function exhibits convexity within the Q function space.\nWe present extensive experiments conducted on some challenging multi-agent game environments, including an advanced version of the Star-Craft multi-agent challenge (SMACv2), which demonstrates the effectiveness of our algorithm.", "keywords": "Multi-agent Imitation Learning;Inverse Q Learning;Centralized Learning", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/34ad182a1b09569cf694f828ed41b57c8869734b.zip", "author": "The Viet Bui;Tien Anh Mai;Thanh Hong Nguyen", "authorids": "~The_Viet_Bui1;~Tien_Anh_Mai1;~Thanh_Hong_Nguyen1", "gender": "M;F;M", "homepage": "https://sites.google.com/view/tien-mai/;https://ix.cs.uoregon.edu/~thanhhng/;", "dblp": "229/2286.html;117/4935;", "google_scholar": ";6fpZnQIAAAAJ;rpPDGm4AAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Tien_Anh_Mai1;~Thanh_Hong_Nguyen1;~Viet_The_Bui1", "aff": "Singapore Management University;University of Oregon;Singapore Management University", "aff_domain": "smu.edu.sg;uoregon.edu;phdcs.smu.edu.sg", "position": "Assistant Professor;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nbui2024inverse,\ntitle={Inverse Factorized Soft Q-Learning for Cooperative Multi-agent Imitation Learning},\nauthor={The Viet Bui and Tien Anh Mai and Thanh Hong Nguyen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=xrbgXJomJp}\n}", "github": "", "reviewers": "GGqd;9wvM;v3qG;vZKY", "pdf_size": 1874374, "rating": "5;6;6;7", "confidence": "3;4;3;2", "soundness": "3;2;2;3", "novelty": "3;2;3;3", "presentation": "2;3;3;3", "wc_summary": "46;26;73;64", "wc_strengths": "34;24;79;63", "wc_weaknesses": "166;175;461;24", "wc_questions": "58;65;222;50", "wc_limitations": "6;44;1;1", "wc_review": "310;334;836;202", "wc_reply_reviewers": "11;67;133;0", "wc_reply_authors": "13;128;169;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;3;2;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 52.25, 18.005207580030838 ], "wc_strengths_avg": [ 50.0, 22.034064536530703 ], "wc_weaknesses_avg": [ 206.5, 158.67340671958866 ], "wc_questions_avg": [ 98.75, 71.35606141036654 ], "wc_limitations_avg": [ 13.0, 18.01388353465182 ], "wc_review_avg": [ 420.5, 244.98724456591611 ], "wc_reply_reviewers_avg": [ 52.75, 52.84115346962063 ], "wc_reply_authors_avg": [ 77.5, 72.6102609828666 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10806558197564431462&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "smu.edu.sg;uoregon.edu;phdcs.smu.edu.sg", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Singapore Management University;University of Oregon", "aff_unique_dep": ";", "aff_unique_url": "https://www.smu.edu.sg;https://www.uoregon.edu", "aff_unique_abbr": "SMU;UO", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Singapore;United States" }, { "title": "Toward Approaches to Scalability in 3D Human Pose Estimation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93056", "id": "xse8QMGnyM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=xse8QMGnyM", "openreview": "https://openreview.net/forum?id=xse8QMGnyM", "poster": "/media/PosterPDFs/NeurIPS%202024/93056.png?t=1730787644.5316038", "project": "", "author_site": "Jun-Hui Kim, Seong-Whan Lee", "tldr": "", "abstract": "In the field of 3D Human Pose Estimation (HPE), scalability and generalization across diverse real-world scenarios remain significant challenges. This paper addresses two key bottlenecks to scalability: limited data diversity caused by 'popularity bias' and increased 'one-to-many' depth ambiguity arising from greater pose diversity. We introduce the Biomechanical Pose Generator (BPG), which leverages biomechanical principles, specifically the normal range of motion, to autonomously generate a wide array of plausible 3D poses without relying on a source dataset, thus overcoming the restrictions of popularity bias. To address depth ambiguity, we propose the Binary Depth Coordinates (BDC), which simplifies depth estimation into a binary classification of joint positions (front or back). This method decomposes a 3D pose into three core elements\u20142D pose, bone length, and binary depth decision\u2014substantially reducing depth ambiguity and enhancing model robustness and accuracy, particularly in complex poses. Our results demonstrate that these approaches increase the diversity and volume of pose data while consistently achieving performance gains, even amid the complexities introduced by increased pose diversity.", "keywords": "3D Human Pose Estimation;Data generation;Pose Decompression", "primary_area": "machine_vision", "supplementary_material": "/attachment/ffd6aa33f07118e67868116ac943421300451ddf.zip", "author": "Jun-Hee Kim;Seong-Whan Lee", "authorids": "~Jun-Hee_Kim1;~Seong-Whan_Lee3", "gender": "M;", "homepage": "https://github.com/JunHuiK;http://pr.korea.ac.kr/sub2_1.php?code=LSW", "dblp": ";l/SeongWhanLee", "google_scholar": ";https://scholar.google.com/citations?hl=en", "orcid": ";0000-0002-6249-4996", "linkedin": ";", "or_profile": "~JunHui_Kim1;~Seong-whan_Lee1", "aff": "Korea University;Korea University", "aff_domain": "korea.ac.kr;korea.ac.kr", "position": "MS student;Full Professor", "bibtex": "@inproceedings{\nkim2024toward,\ntitle={Toward Approaches to Scalability in 3D Human Pose Estimation},\nauthor={Jun-Hee Kim and Seong-Whan Lee},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=xse8QMGnyM}\n}", "github": "", "reviewers": "696C;9ZcB;1aXZ;Gv4b", "pdf_size": 23838035, "rating": "5;5;5;6", "confidence": "4;4;4;4", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "2;2;3;2", "wc_summary": "84;82;200;76", "wc_strengths": "106;24;69;49", "wc_weaknesses": "218;133;169;60", "wc_questions": "49;154;10;57", "wc_limitations": "1;70;15;22", "wc_review": "458;463;463;264", "wc_reply_reviewers": "56;55;0;18", "wc_reply_authors": "0;0;86;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;2;1", "rating_avg": [ 5.25, 0.4330127018922193 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 110.5, 51.75664208582315 ], "wc_strengths_avg": [ 62.0, 29.991665508937647 ], "wc_weaknesses_avg": [ 145.0, 57.6064232529672 ], "wc_questions_avg": [ 67.5, 53.011791141216875 ], "wc_limitations_avg": [ 27.0, 25.95187854472196 ], "wc_review_avg": [ 412.0, 85.47221770844605 ], "wc_reply_reviewers_avg": [ 32.25, 24.107830678018296 ], "wc_reply_authors_avg": [ 21.5, 37.239092362730865 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7381840651996575495&as_sdt=5,48&sciodt=0,48&hl=en", "gs_version_total": 0, "email": "korea.ac.kr;korea.ac.kr", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Korea University", "aff_unique_dep": "", "aff_unique_url": "https://www.korea.ac.kr", "aff_unique_abbr": "KU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "title": "Towards Human-AI Complementarity with Prediction Sets", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93055", "id": "xtK3gZjQDC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=xtK3gZjQDC", "openreview": "https://openreview.net/forum?id=xtK3gZjQDC", "poster": "/media/PosterPDFs/NeurIPS%202024/93055.png?t=1732279102.3098984", "project": "", "author_site": "Giovanni De Toni, Nastaran Okati, Suhas Thejaswi, Eleni Straitouri, Manuel Rodriguez", "tldr": "", "abstract": "Decision support systems based on prediction sets have proven to be effective at helping human experts solve classification tasks. Rather than providing single-label predictions, these systems provide sets of label predictions constructed using conformal prediction, namely prediction sets, and ask human experts to predict label values from these sets. In this paper, we first show that the prediction sets constructed using conformal prediction are, in general, suboptimal in terms of average accuracy. Then, we show that the problem of finding the optimal prediction sets under which the human experts achieve the highest average accuracy is NP-hard. More strongly, unless P = NP, we show that the problem is hard to approximate to any factor less than the size of the label set. However, we introduce a simple and efficient greedy algorithm that, for a large class of expert models and non-conformity scores, is guaranteed to find prediction sets that provably offer equal or greater performance than those constructed using conformal prediction. Further, using a simulation study with both synthetic and real expert predictions, we demonstrate that, in practice, our greedy algorithm finds near-optimal prediction sets offering greater performance than conformal prediction.", "keywords": "conformal prediction;decision support systems;human-ai complementarity", "primary_area": "human-AI_interaction", "supplementary_material": "/attachment/8974d4d930794c932d9d0ed6fe006e2a5dc3f70b.zip", "author": "Giovanni De Toni;Nastaran Okati;Suhas Thejaswi;Eleni Straitouri;Manuel Gomez Rodriguez", "authorids": "~Giovanni_De_Toni1;~Nastaran_Okati2;~Suhas_Thejaswi1;~Eleni_Straitouri1;~Manuel_Gomez_Rodriguez1", "gender": "M;F;;;M", "homepage": "http://detoni.me;https://www.mpi-sws.org/people/nastaran/;;https://people.mpi-sws.org/~estraitouri/;https://www.mpi-sws.org/~manuelgr/", "dblp": "267/3078;218/6109.html;;302/4619;73/8260", "google_scholar": "Z95oxK0AAAAJ;3ETK8Z8AAAAJ;;kphSqwwAAAAJ;https://scholar.google.com.tw/citations?user=UcuXmuwAAAAJ", "orcid": ";;;;", "linkedin": "giovannidetoni/;;;eleni-straitouri-919419205;", "or_profile": "~Giovanni_De_Toni1;~Nastaran_Okati2;~Suhas_Thejaswi1;~Eleni_Straitouri1;~Manuel_Gomez_Rodriguez1", "aff": "MPI-SWS;MPI-SWS;;MPI-SWS;MPI-SWS", "aff_domain": "mpi-sws.org;mpi-sws.org;;mpi-sws.org;mpi-sws.org", "position": "Intern;PhD student;;PhD student;Associate Professor", "bibtex": "@inproceedings{\ntoni2024towards,\ntitle={Towards Human-{AI} Complementarity with Prediction Sets},\nauthor={Giovanni De Toni and Nastaran Okati and Suhas Thejaswi and Eleni Straitouri and Manuel Gomez Rodriguez},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=xtK3gZjQDC}\n}", "github": "", "reviewers": "QrrM;Dxna;9FF1;trEi", "pdf_size": 2167301, "rating": "5;6;6;7", "confidence": "3;4;4;3", "soundness": "2;3;2;3", "novelty": "2;3;3;3", "presentation": "2;2;3;3", "wc_summary": "35;72;108;80", "wc_strengths": "31;73;48;29", "wc_weaknesses": "164;392;36;200", "wc_questions": "53;2;66;42", "wc_limitations": "5;1;4;9", "wc_review": "288;540;262;360", "wc_reply_reviewers": "11;38;98;33", "wc_reply_authors": "0;0;108;0", "reply_reviewers": "1;1;2;1", "reply_authors": "1;1;2;1", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 73.75, 26.06122598804592 ], "wc_strengths_avg": [ 45.25, 17.640507362318125 ], "wc_weaknesses_avg": [ 198.0, 127.51470503436065 ], "wc_questions_avg": [ 40.75, 23.93089007956035 ], "wc_limitations_avg": [ 4.75, 2.8613807855648994 ], "wc_review_avg": [ 362.5, 108.58521998872591 ], "wc_reply_reviewers_avg": [ 45.0, 32.24127789030702 ], "wc_reply_authors_avg": [ 27.0, 46.76537180435969 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3392822528753432865&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "mpi-sws.org;mpi-sws.org;;mpi-sws.org;mpi-sws.org", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Max Planck Institute for Software Systems", "aff_unique_dep": "", "aff_unique_url": "https://www.mpi-sws.org", "aff_unique_abbr": "MPI-SWS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "id": "xtpY1kQmW9", "title": "Double-Bayesian Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "Contemporary machine learning methods will try to approach the Bayes error, as it is the lowest possible error any model can achieve. This paper postulates that any decision is composed of not one but two Bayesian decisions and that decision-making is, therefore, a double-Bayesian process. The paper shows how this duality implies intrinsic uncertainty in decisions and how it incorporates explainability. The proposed approach understands that Bayesian learning is tantamount to finding a base for a logarithmic function measuring uncertainty, with solutions being fixed points. Furthermore, following this approach, the golden ratio describes possible solutions satisfying Bayes' theorem. The double-Bayesian framework suggests using a learning rate and momentum weight with values similar to those used in the literature to train neural networks with stochastic gradient descent.", "keywords": "Artificial Intelligence;Machine Learning;Learning Theory;Bayesian Inference;Neural Networks;Information Theory;Optimization;Stochastic Gradient Descent;Golden Ratio", "primary_area": "learning_theory", "supplementary_material": "/attachment/90949be9ee7b147f74b2a05944fcd6aaafb4f667.zip", "author": "Stefan Jaeger", "authorids": "~Stefan_Jaeger1", "gender": "M", "homepage": "https://lhncbc.nlm.nih.gov/LHC-personnel/staff/StefanJaeger.html", "dblp": "71/115-1", "google_scholar": "aKoHJp0AAAAJ", "orcid": "0000-0001-6877-4318", "linkedin": "", "or_profile": "~Stefan_Jaeger1", "aff": "National Institutes of Health", "aff_domain": "nih.gov", "position": "Staff Scientist", "bibtex": "@misc{\nanonymous2024doublebayesian,\ntitle={Double-Bayesian Learning},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=xtpY1kQmW9}\n}", "github": "", "project": "", "reviewers": "cT9s;eiHq;RMti", "site": "https://openreview.net/forum?id=xtpY1kQmW9", "pdf_size": 1882358, "rating": "1;2;4", "confidence": "2;4;4", "soundness": "1;2;2", "novelty": "1;2;2", "presentation": "1;1;2", "wc_summary": "87;31;85", "wc_strengths": "23;35;25", "wc_weaknesses": "20;375;165", "wc_questions": "2;66;49", "wc_limitations": "2;13;1", "wc_review": "134;520;325", "wc_reply_reviewers": "0;35;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;1;0", "reply_authors": "0;0;0", "rating_avg": [ 2.3333333333333335, 1.247219128924647 ], "confidence_avg": [ 3.3333333333333335, 0.9428090415820634 ], "soundness_avg": [ 1.6666666666666667, 0.4714045207910317 ], "novelty_avg": [ 1.6666666666666667, 0.4714045207910317 ], "presentation_avg": [ 1.3333333333333333, 0.4714045207910317 ], "wc_summary_avg": [ 67.66666666666667, 25.94010194445829 ], "wc_strengths_avg": [ 27.666666666666668, 5.2493385826745405 ], "wc_weaknesses_avg": [ 186.66666666666666, 145.73568159132782 ], "wc_questions_avg": [ 39.0, 27.067816067549053 ], "wc_limitations_avg": [ 5.333333333333333, 5.436502143433364 ], "wc_review_avg": [ 326.3333333333333, 157.5866604619679 ], "wc_reply_reviewers_avg": [ 11.666666666666666, 16.49915822768611 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 5, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.7559289460184545, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:a6tYLqFS5vEJ:scholar.google.com/&scioq=Double-Bayesian+Learning&hl=en&as_sdt=0,5", "gs_version_total": 3, "aff_unique_index": "0", "aff_unique_norm": "National Institutes of Health", "aff_unique_dep": "", "aff_unique_url": "https://www.nih.gov", "aff_unique_abbr": "NIH", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "CIFD: Controlled Information Flow to Enhance Knowledge Distillation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93054", "id": "xutrKezbPF", "proceeding": "", "pdf": "https://openreview.net/pdf?id=xutrKezbPF", "openreview": "https://openreview.net/forum?id=xutrKezbPF", "poster": "/media/PosterPDFs/NeurIPS%202024/93054.png?t=1733856490.0271878", "project": "", "author_site": "Yashas Malur Saidutta, Rakshith Sharma Srinivasa, Jaejin Cho, Ching-Hua Lee, Chouchang Yang, Yilin Shen, Hongxia Jin", "tldr": "", "abstract": "Knowledge Distillation is the mechanism by which the insights gained from a larger teacher model are transferred to a smaller student model. However, the transfer suffers when the teacher model is significantly larger than the student. To overcome this, prior works have proposed training intermediately sized models, Teacher Assistants (TAs) to help the transfer process. However, training TAs is expensive, as training these models is a knowledge transfer task in itself. Further, these TAs are larger than the student model and training them especially in large data settings can be computationally intensive. In this paper, we propose a novel framework called Controlled Information Flow for Knowledge Distillation (CIFD) consisting of two components. First, we propose a significantly smaller alternatives to TAs, the Rate-Distortion Module (RDM) which uses the teacher's penultimate layer embedding and a information rate-constrained bottleneck layer to replace the Teacher Assistant model. RDMs are smaller and easier to train than TAs, especially in large data regimes, since they operate on the teacher embeddings and do not need to relearn low level input feature extractors. Also, by varying the information rate across the bottleneck, RDMs can replace TAs of different sizes. Secondly, we propose the use of Information Bottleneck Module in the student model, which is crucial for regularization in the presence of a large number of RDMs. We show comprehensive state-of-the-art results of the proposed method over large datasets like Imagenet. Further, we show the significant improvement in distilling CLIP like models over a huge 12M image-text dataset. It outperforms CLIP specialized distillation methods across five zero-shot classification datasets and two zero-shot image-text retrieval datasets.", "keywords": "Knowledge Distillation;Information Bottleneck;Rate-Distortion;Teaching Assistant;CLIP", "primary_area": "machine_vision", "supplementary_material": "", "author": "Yashas Malur Saidutta;Rakshith Sharma Srinivasa;Jaejin Cho;Ching-Hua Lee;Chouchang Yang;Yilin Shen;Hongxia Jin", "authorids": "~Yashas_Malur_Saidutta1;~Rakshith_Sharma_Srinivasa1;~Jaejin_Cho2;~Ching-Hua_Lee2;~Chouchang_Yang2;~Yilin_Shen1;~Hongxia_Jin1", "gender": ";M;M;;;M;", "homepage": ";;https://www.researchgate.net/profile/Jaejin_Cho;;;;", "dblp": "229/0916;235/5670;225/4664;;;30/383;", "google_scholar": "QoTTPgoAAAAJ;https://scholar.google.com/citations?hl=en;r39ykjUAAAAJ;;;9PSFMzAAAAAJ;", "orcid": ";;;;;;", "linkedin": ";;;;;;", "or_profile": "~Yashas_Malur_Saidutta1;~Rakshith_Sharma_Srinivasa1;~Jaejin_Cho2;~Ching-Hua_Lee2;~Chouchang_Yang2;~Yilin_Shen1;~Hongxia_Jin1", "aff": "Samsung;Samsung Research America ;Samsung Research America;;;Samsung Research America;", "aff_domain": "samsung.com;samsung.com;samsung.com;;;gmail.com;", "position": "Researcher;Researcher;Researcher;;;Principal Researcher;", "bibtex": "@inproceedings{\nsaidutta2024cifd,\ntitle={{CIFD}: Controlled Information Flow to Enhance Knowledge Distillation},\nauthor={Yashas Malur Saidutta and Rakshith Sharma Srinivasa and Jaejin Cho and Ching-Hua Lee and Chouchang Yang and Yilin Shen and Hongxia Jin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=xutrKezbPF}\n}", "github": "", "reviewers": "vQtp;iLhT;sfSd", "pdf_size": 3063251, "rating": "5;5;5", "confidence": "4;4;4", "soundness": "2;3;2", "novelty": "3;3;2", "presentation": "3;3;2", "wc_summary": "78;37;55", "wc_strengths": "42;36;20", "wc_weaknesses": "176;153;386", "wc_questions": "88;39;2", "wc_limitations": "8;4;10", "wc_review": "392;269;473", "wc_reply_reviewers": "52;0;217", "wc_reply_authors": "153;261;835", "reply_reviewers": "1;0;2", "reply_authors": "2;2;3", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 56.666666666666664, 16.779617264870957 ], "wc_strengths_avg": [ 32.666666666666664, 9.285592184789412 ], "wc_weaknesses_avg": [ 238.33333333333334, 104.8374403011104 ], "wc_questions_avg": [ 43.0, 35.223098481914775 ], "wc_limitations_avg": [ 7.333333333333333, 2.494438257849294 ], "wc_review_avg": [ 378.0, 83.86894538504703 ], "wc_reply_reviewers_avg": [ 89.66666666666667, 92.5070567878773 ], "wc_reply_authors_avg": [ 416.3333333333333, 299.3073485380686 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Kz252yyaP08J:scholar.google.com/&scioq=CIFD:+Controlled+Information+Flow+to+Enhance+Knowledge+Distillation&hl=en&as_sdt=0,14", "gs_version_total": 2, "email": "samsung.com;samsung.com;samsung.com;;;gmail.com;", "author_num": 7, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Samsung", "aff_unique_dep": "Samsung", "aff_unique_url": "https://www.samsung.com", "aff_unique_abbr": "Samsung", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "South Korea;United States" }, { "title": "On-Road Object Importance Estimation: A New Dataset and A Model with Multi-Fold Top-Down Guidance", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93053", "id": "xvTMc9Ovx3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=xvTMc9Ovx3", "openreview": "https://openreview.net/forum?id=xvTMc9Ovx3", "poster": "/media/PosterPDFs/NeurIPS%202024/93053.png?t=1731646200.8963146", "project": "", "author_site": "Zhixiong Nan, Yilong Chen, Tianfei Zhou, Tao Xiang", "tldr": "", "abstract": "This paper addresses the problem of on-road object importance estimation, which utilizes video sequences captured from the driver's perspective as the input. Although this problem is significant for safer and smarter driving systems, the exploration of this problem remains limited. On one hand, publicly-available large-scale datasets are scarce in the community. To address this dilemma, this paper contributes a new large-scale dataset named Traffic Object Importance (TOI). On the other hand, existing methods often only consider either bottom-up feature or single-fold guidance, leading to limitations in handling highly dynamic and diverse traffic scenarios. Different from existing methods, this paper proposes a model that integrates multi-fold top-down guidance with the bottom-up feature. Specifically, three kinds of top-down guidance factors (i.e., driver intention, semantic context, and traffic rule) are integrated into our model. These factors are important for object importance estimation, but none of the existing methods simultaneously consider them. To our knowledge, this paper proposes the first on-road object importance estimation model that fuses multi-fold top-down guidance factors with bottom-up feature. Extensive experiments demonstrate that our model outperforms state-of-the-art methods by large margins, achieving 23.1% Average Precision (AP) improvement compared with the recently proposed model (i.e., Goal).", "keywords": "Object importance estimation;Autonomous vehicles", "primary_area": "robotics", "supplementary_material": "", "author": "Zhixiong Nan;Yilong Chen;Tianfei Zhou;Tao Xiang", "authorids": "~Zhixiong_Nan2;~Yilong_Chen3;~Tianfei_Zhou2;~Tao_Xiang2", "gender": "M;M;M;M", "homepage": ";https://github.com/BOB421-cyl;https://www.tfzhou.com/;", "dblp": "187/2031;;150/6710;22/4460-1.html", "google_scholar": "9ywMAekAAAAJ;;https://scholar.google.ae/citations?user=-_33ccMAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";0009-0007-7095-1724;0000-0001-5475-1473;0000-0002-9439-4623", "linkedin": ";;;", "or_profile": "~Zhixiong_Nan2;~Yilong_Chen3;~Tianfei_Zhou2;~Tao_Xiang2", "aff": "Chongqing University;Chongqing University;Beijing Institute of Technology;Chongqing University", "aff_domain": "cqu.edu.cn;cqu.edu.cn;bit.edu.cn;cqu.edu.cn", "position": "Associate Professor;MS student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nnan2024onroad,\ntitle={On-Road Object Importance Estimation: A New Dataset and A Model with Multi-Fold Top-Down Guidance},\nauthor={Zhixiong Nan and Yilong Chen and Tianfei Zhou and Tao Xiang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=xvTMc9Ovx3}\n}", "github": "", "reviewers": "Yrjv;8N7U;xrYx;mvxo", "pdf_size": 16382080, "rating": "6;6;6;7", "confidence": "4;3;3;3", "soundness": "3;3;3;3", "novelty": "3;3;2;3", "presentation": "3;3;3;4", "wc_summary": "79;52;85;49", "wc_strengths": "107;40;68;55", "wc_weaknesses": "94;12;153;181", "wc_questions": "57;197;129;38", "wc_limitations": "37;5;8;1", "wc_review": "374;306;443;324", "wc_reply_reviewers": "18;89;19;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 66.25, 15.927570436196476 ], "wc_strengths_avg": [ 67.5, 24.86463351831271 ], "wc_weaknesses_avg": [ 110.0, 64.71089552772392 ], "wc_questions_avg": [ 105.25, 62.914127984102265 ], "wc_limitations_avg": [ 12.75, 14.219265100559873 ], "wc_review_avg": [ 361.75, 53.11485197192966 ], "wc_reply_reviewers_avg": [ 31.5, 34.04776057246644 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:kMTjylYVslsJ:scholar.google.com/&scioq=On-Road+Object+Importance+Estimation:+A+New+Dataset+and+A+Model+with+Multi-Fold+Top-Down+Guidance&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "cqu.edu.cn;cqu.edu.cn;bit.edu.cn;cqu.edu.cn", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Chongqing University;Beijing Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.cqu.edu.cn;http://www.bit.edu.cn/", "aff_unique_abbr": "CQU;BIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "RCDN: Towards Robust Camera-Insensitivity Collaborative Perception via Dynamic Feature-based 3D Neural Modeling", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93052", "id": "xvVeSZoVJO", "proceeding": "", "pdf": "https://openreview.net/pdf?id=xvVeSZoVJO", "openreview": "https://openreview.net/forum?id=xvVeSZoVJO", "poster": "/media/PosterPDFs/NeurIPS%202024/93052.png?t=1731389969.1951804", "project": "", "author_site": "Tianhang Wang, Fan Lu, Zehan Zheng, Zhijun Li, Guang Chen, changjun jiang", "tldr": "", "abstract": "Collaborative perception is dedicated to tackling the constraints of single-agent perception, such as occlusions, based on the multiple agents' multi-view sensor inputs. However, most existing works assume an ideal condition that all agents' multi-view cameras are continuously available. In reality, cameras may be highly noisy, obscured or even failed during the collaboration. In this work, we introduce a new robust camera-insensitivity problem: how to overcome the issues caused by the failed camera perspectives, while stabilizing high collaborative performance with low calibration cost? To address above problems, we propose RCDN, a Robust Camera-insensitivity collaborative perception with a novel Dynamic feature-based 3D Neural modeling mechanism. The key intuition of RCDN is to construct collaborative neural rendering field representations to recover failed perceptual messages sent by multiple agents. To better model collaborative neural rendering field, RCDN first establishes a geometry BEV feature based time-invariant static field with other agents via fast hash grid modeling. Based on the static background field, the proposed time-varying dynamic field can model corresponding motion vector for foregrounds with appropriate positions. To validate RCDN, we create OPV2V-N, a new large-scale dataset with manual labelling under different camera failed scenarios. Extensive experiments conducted on OPV2V-N show that RCDN can be ported to other baselines and improve their robustness in extreme camera-insensitivity setting. Our code and datasets will be available soon.", "keywords": "collaborative perception", "primary_area": "machine_vision", "supplementary_material": "/attachment/e66fe30496ee76f50eb61921093237088446e61c.zip", "author": "Tianhang Wang;Fan Lu;Zehan Zheng;Zhijun Li;Guang Chen;changjun jiang", "authorids": "~Tianhang_Wang1;~Fan_Lu3;~Zehan_Zheng1;~Zhijun_Li2;~Guang_Chen4;~changjun_jiang2", "gender": "M;;M;M;M;M", "homepage": "https://www.wangtianhang.cn;https://fanlu97.github.io/;https://dyfcalid.github.io/;;;https://cs.tongji.edu.cn/info/1033/2865.htm", "dblp": ";35/6336-1;300/6688;;09/4891-1;", "google_scholar": ";DyEUPFUAAAAJ;Pig6X6MAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.hk/citations?user=kBhIyv4AAAAJ;", "orcid": ";0000-0002-4932-3850;0000-0002-9733-6437;;0000-0002-7416-592X;0000-0002-2543-8928", "linkedin": ";;;;guang-chen-2879064a;", "or_profile": "~Tianhang_Wang1;~Fan_Lu3;~Zehan_Zheng1;~Zhijun_Li2;~Guang_Chen4;~changjun_jiang2", "aff": "Tongji University;Tongji University;Tongji University;University of Science and Technology of China, Tsinghua University;Tongji University;Tongji University", "aff_domain": "tongji.edu.cn;tongji.edu.cn;tongji.edu.cn;ustc.edu.cn;tongji.edu.cn;tongji.edu.cn", "position": "PhD student;PhD student;MS student;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nwang2024rcdn,\ntitle={{RCDN}: Towards Robust Camera-Insensitivity Collaborative Perception via Dynamic Feature-based 3D Neural Modeling},\nauthor={Tianhang Wang and Fan Lu and Zehan Zheng and Zhijun Li and Guang Chen and changjun jiang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=xvVeSZoVJO}\n}", "github": "", "reviewers": "ymZH;YPsN;y98a;qUQv;fQwk", "pdf_size": 6771894, "rating": "5;5;5;5;6", "confidence": "3;2;3;3;3", "soundness": "2;3;3;2;2", "novelty": "2;1;3;2;2", "presentation": "3;4;2;3;2", "wc_summary": "81;123;60;93;80", "wc_strengths": "113;98;47;109;81", "wc_weaknesses": "71;130;221;59;100", "wc_questions": "33;97;1;59;3", "wc_limitations": "9;5;1;5;31", "wc_review": "307;453;330;325;295", "wc_reply_reviewers": "31;0;30;0;8", "wc_reply_authors": "53;0;63;0;32", "reply_reviewers": "1;0;1;0;1", "reply_authors": "2;1;2;1;2", "rating_avg": [ 5.2, 0.39999999999999997 ], "confidence_avg": [ 2.8, 0.39999999999999997 ], "soundness_avg": [ 2.4, 0.4898979485566356 ], "novelty_avg": [ 2.0, 0.6324555320336759 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 87.4, 20.71328076379983 ], "wc_strengths_avg": [ 89.6, 24.013329631685817 ], "wc_weaknesses_avg": [ 116.2, 57.880566686928695 ], "wc_questions_avg": [ 38.6, 36.164070567346265 ], "wc_limitations_avg": [ 10.2, 10.703270528207721 ], "wc_review_avg": [ 342.0, 56.89991212646993 ], "wc_reply_reviewers_avg": [ 13.8, 13.948476619330155 ], "wc_reply_authors_avg": [ 29.6, 26.157981573508305 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.2500000000000001, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15845375989748862269&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "tongji.edu.cn;tongji.edu.cn;tongji.edu.cn;ustc.edu.cn;tongji.edu.cn;tongji.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;1;0;0", "aff_unique_norm": "Tongji University;University of Science and Technology of China", "aff_unique_dep": ";", "aff_unique_url": "https://www.tongji.edu.cn;http://www.ustc.edu.cn/", "aff_unique_abbr": "Tongji;USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Measuring Mutual Policy Divergence for Multi-Agent Sequential Exploration", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93051", "id": "xvYI7TCiU6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=xvYI7TCiU6", "openreview": "https://openreview.net/forum?id=xvYI7TCiU6", "poster": "/media/PosterPDFs/NeurIPS%202024/93051.png?t=1731293866.1863616", "project": "", "author_site": "Haowen Dou, Lujuan Dang, Zhirong Luan, Badong Chen", "tldr": "", "abstract": "Despite the success of Multi-Agent Reinforcement Learning (MARL) algorithms in cooperative tasks, previous works, unfortunately, face challenges in heterogeneous scenarios since they simply disable parameter sharing for agent specialization. Sequential updating scheme was thus proposed, naturally diversifying agents by encouraging agents to learn from preceding ones. However, the exploration strategy in sequential scheme has not been investigated. Benefiting from updating one-by-one, agents have the access to the information from preceding agents. Thus, in this work, we propose to exploit the preceding information to enhance exploration and heterogeneity sequentially. We present Multi-Agent Divergence Policy Optimization (MADPO), equipped with mutual policy divergence maximization framework. We quantify the policy discrepancies between episodes to enhance exploration and between agents to heterogenize agents, termed intra-agent and inter-agent policy divergence. To address the issue that traditional divergence measurements lack stability and directionality, we propose to employ the conditional Cauchy-Schwarz divergence to provide entropy-guided exploration incentives. Extensive experiments show that the proposed method outperforms state-of-the-art sequential updating approaches in two challenging multi-agent tasks with various heterogeneous scenarios. Source code is available at \\url{https://github.com/hwdou6677/MADPO}.", "keywords": "multi-agent reinforcement learning;sequential updating;exploration;Cauchy-Schwarz divergence", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Haowen Dou;Lujuan Dang;Zhirong Luan;Badong Chen", "authorids": "~Haowen_Dou1;~Lujuan_Dang1;~Zhirong_Luan1;~Badong_Chen1", "gender": "M;M;M;M", "homepage": ";;;http://gr.xjtu.edu.cn/web/chenbd/home", "dblp": "314/6932;;;95/6450", "google_scholar": ";;mJNCeucAAAAJ;mq6tPX4AAAAJ", "orcid": "0000-0002-9707-8036;0000-0002-8929-8127;;", "linkedin": ";;;", "or_profile": "~Haowen_Dou1;~Lujuan_Dang1;~Zhirong_Luan1;~Badong_Chen1", "aff": "Xi'an Jiaotong University;Xi'an Jiaotong University;Xi'an University of Technology;Xi'an Jiaotong University", "aff_domain": "xjtu.edu.cn;xjtu.edu;xaut.edu.cn;xjtu.edu.cn", "position": "PhD student;Assistant Professor;Lecturer;Full Professor", "bibtex": "@inproceedings{\ndou2024measuring,\ntitle={Measuring Mutual Policy Divergence for Multi-Agent Sequential Exploration},\nauthor={Haowen Dou and Lujuan Dang and Zhirong Luan and Badong Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=xvYI7TCiU6}\n}", "github": "", "reviewers": "sHA3;gpQ4;3yBc;jcAK", "pdf_size": 3478844, "rating": "5;6;7;7", "confidence": "4;3;3;4", "soundness": "3;2;3;3", "novelty": "2;2;3;3", "presentation": "3;3;3;3", "wc_summary": "92;82;70;65", "wc_strengths": "70;141;41;90", "wc_weaknesses": "82;85;49;205", "wc_questions": "79;99;58;133", "wc_limitations": "20;7;20;1", "wc_review": "343;414;238;494", "wc_reply_reviewers": "0;18;87;140", "wc_reply_authors": "0;17;154;121", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 77.25, 10.520812706250407 ], "wc_strengths_avg": [ 85.5, 36.47259244967377 ], "wc_weaknesses_avg": [ 105.25, 59.29744935492588 ], "wc_questions_avg": [ 92.25, 27.63489641739227 ], "wc_limitations_avg": [ 12.0, 8.276472678623424 ], "wc_review_avg": [ 372.25, 94.13388072314878 ], "wc_reply_reviewers_avg": [ 61.25, 55.87206368123519 ], "wc_reply_authors_avg": [ 73.0, 65.82172893505609 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:AyiUbGCG5fgJ:scholar.google.com/&scioq=Measuring+Mutual+Policy+Divergence+for+Multi-Agent+Sequential+Exploration&hl=en&as_sdt=0,33", "gs_version_total": 0, "email": "xjtu.edu.cn;xjtu.edu;xaut.edu.cn;xjtu.edu.cn", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Xi'an Jiao Tong University;Xi'an University of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.xjtu.edu.cn;http://www.xaut.edu.cn", "aff_unique_abbr": "XJTU;XAUT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "ManiPose: Manifold-Constrained Multi-Hypothesis 3D Human Pose Estimation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93050", "id": "xxY8d4rnSb", "proceeding": "", "pdf": "https://openreview.net/pdf?id=xxY8d4rnSb", "openreview": "https://openreview.net/forum?id=xxY8d4rnSb", "poster": "", "project": "", "author_site": "C\u00e9dric ROMMEL, Victor Letzelter, Nermin Samet, Renaud Marlet, Matthieu Cord, Patrick Perez, Eduardo Valle", "tldr": "", "abstract": "We propose ManiPose, a manifold-constrained multi-hypothesis model for human-pose 2D-to-3D lifting. We provide theoretical and empirical evidence that, due to the depth ambiguity inherent to monocular 3D human pose estimation, traditional regression models suffer from pose-topology consistency issues, which standard evaluation metrics (MPJPE, P-MPJPE and PCK) fail to assess. ManiPose addresses depth ambiguity by proposing multiple candidate 3D poses for each 2D input, each with its estimated plausibility. Unlike previous multi-hypothesis approaches, ManiPose forgoes generative models, greatly facilitating its training and usage. By constraining the outputs to lie on the human pose manifold, ManiPose guarantees the consistency of all hypothetical poses, in contrast to previous works. We showcase the performance of ManiPose on real-world datasets, where it outperforms state-of-the-art models in pose consistency by a large margin while being very competitive on the MPJPE metric.", "keywords": "human pose estimation;depth ambiguity;multiple choice learning", "primary_area": "machine_vision", "supplementary_material": "", "author": "C\u00e9dric Rommel;Victor Letzelter;Nermin Samet;Renaud Marlet;Matthieu Cord;Patrick Perez;Eduardo Valle", "authorids": "~C\u00e9dric_Rommel1;~Victor_Letzelter1;~Nermin_Samet1;~Renaud_Marlet1;~Matthieu_Cord1;~Patrick_Perez1;~Eduardo_Valle1", "gender": "M;;F;M;M;M;M", "homepage": "https://cedricrommel.github.io/;https://victorletzelter.github.io;https://nerminsamet.github.io;http://imagine.enpc.fr/~marletr/;https://cord.isir.upmc.fr/;http://eduardovalle.com;https://ptrckprz.github.io/", "dblp": "295/9766;360/0588;119/1448;61/5462;68/3117;84/8260;71/1167", "google_scholar": "GBv4KYwAAAAJ;https://scholar.google.fr/citations?user=YhTdZh8AAAAJ;https://scholar.google.com.tr/citations?user=kEVeHU0AAAAJ;2rclwh4AAAAJ;SpAotDcAAAAJ;lxWPqWAAAAAJ;https://scholar.google.fr/citations?user=8Cph5uQAAAAJ", "orcid": ";;;0000-0003-1612-1758;;0000-0001-5396-9868;", "linkedin": "cedric-rommel/;victor-letzelter-3b832219b;;renaud-marlet-9914ab/;;;", "or_profile": "~C\u00e9dric_Rommel1;~Victor_Letzelter1;~Nermin_Samet1;~Renaud_Marlet1;~Matthieu_Cord1;~Eduardo_Valle1;~Patrick_Perez2", "aff": "Valeo;T\u00e9l\u00e9com ParisTech;Valeo;Ecole des Ponts ParisTech;Sorbonne Universit\u00e9;Universidade Estadual de Campinas;Kyutai", "aff_domain": "valeo.com;telecom-paristech.fr;valeo.com;enpc.fr;isir.upmc.fr;unicamp.br;kyutai.org", "position": "Researcher;PhD student;Researcher;Researcher;Full Professor;Assistant Professor;Researcher", "bibtex": "@inproceedings{\nrommel2024manipose,\ntitle={ManiPose: Manifold-Constrained Multi-Hypothesis 3D Human Pose Estimation},\nauthor={C{\\'e}dric Rommel and Victor Letzelter and Nermin Samet and Renaud Marlet and Matthieu Cord and Patrick Perez and Eduardo Valle},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=xxY8d4rnSb}\n}", "github": "", "reviewers": "fpqy;tj82;B5U1;5BLY", "pdf_size": 2813190, "rating": "3;5;7;8", "confidence": "4;4;3;4", "soundness": "2;3;3;3", "novelty": "1;3;3;4", "presentation": "3;3;3;4", "wc_summary": "140;70;73;85", "wc_strengths": "45;34;43;102", "wc_weaknesses": "277;146;180;102", "wc_questions": "9;27;14;41", "wc_limitations": "1;22;4;1", "wc_review": "472;299;314;331", "wc_reply_reviewers": "52;229;52;90", "wc_reply_authors": "0;446;0;0", "reply_reviewers": "1;3;1;1", "reply_authors": "1;3;1;1", "rating_avg": [ 5.75, 1.920286436967152 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 1.0897247358851685 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 92.0, 28.275431031197385 ], "wc_strengths_avg": [ 56.0, 26.879360111431225 ], "wc_weaknesses_avg": [ 176.25, 64.4064243690022 ], "wc_questions_avg": [ 22.75, 12.417225938187643 ], "wc_limitations_avg": [ 7.0, 8.74642784226795 ], "wc_review_avg": [ 354.0, 69.0615667357757 ], "wc_reply_reviewers_avg": [ 105.75, 72.82985308237275 ], "wc_reply_authors_avg": [ 111.5, 193.1236650439298 ], "reply_reviewers_avg": [ 1.5, 0.8660254037844386 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.3758230140014144, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:mgCRWy7Mkv0J:scholar.google.com/&scioq=ManiPose:+Manifold-Constrained+Multi-Hypothesis+3D+Human+Pose+Estimation&hl=en&as_sdt=0,23", "gs_version_total": 5, "email": "valeo.com;telecom-paristech.fr;valeo.com;enpc.fr;isir.upmc.fr;unicamp.br;kyutai.org", "author_num": 7, "aff_unique_index": "0;1;0;2;3;4;5", "aff_unique_norm": "Valeo;T\u00e9l\u00e9com ParisTech;Ecole des Ponts ParisTech;Sorbonne Universit\u00e9;Universidade Estadual de Campinas;Kyushu University", "aff_unique_dep": ";;;;;", "aff_unique_url": "https://www.valeo.com;https://www.telecom-paristech.fr;https://www.ponts.org;https://www.sorbonne-universite.fr;https://www.unicamp.br;https://www.kyushu-u.ac.jp", "aff_unique_abbr": ";TP;ENPC;Sorbonne U;UNICAMP;Kyushu U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;1;2", "aff_country_unique": "France;Brazil;Japan" }, { "title": "On the Use of Anchoring for Training Vision Models", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93049", "id": "xymhWyiZOp", "proceeding": "", "pdf": "https://openreview.net/pdf?id=xymhWyiZOp", "openreview": "https://openreview.net/forum?id=xymhWyiZOp", "poster": "", "project": "", "author_site": "Vivek Sivaraman Narayanaswamy, Kowshik Thopalli, Rushil Anirudh, Yamen Mubarka, Wesam Sakla, Jay Thiagarajan", "tldr": "", "abstract": "Anchoring is a recent, architecture-agnostic principle for training deep neural networks that has been shown to significantly improve uncertainty estimation, calibration, and extrapolation capabilities. In this paper, we systematically explore anchoring as a general protocol for training vision models, providing fundamental insights into its training and inference processes and their implications for generalization and safety. Despite its promise, we identify a critical problem in anchored training that can lead to an increased risk of learning undesirable shortcuts, thereby limiting its generalization capabilities. To address this, we introduce a new anchored training protocol that employs a simple regularizer to mitigate this issue and significantly enhances generalization. We empirically evaluate our proposed approach across datasets and architectures of varying scales and complexities, demonstrating substantial performance gains in generalization and safety metrics compared to the standard training protocol. The open-source code is available at https://software.llnl.gov/anchoring.", "keywords": "Anomaly Detection;OOD Generalization;ML Safety;Anchoring;Deep Neural Networks", "primary_area": "machine_vision", "supplementary_material": "", "author": "Vivek Narayanaswamy;Kowshik Thopalli;Rushil Anirudh;Yamen Mubarka;Wesam A. Sakla;Jayaraman J. Thiagarajan", "authorids": "~Vivek_Narayanaswamy1;~Kowshik_Thopalli1;~Rushil_Anirudh1;~Yamen_Mubarka1;~Wesam_A._Sakla1;~Jayaraman_J._Thiagarajan3", "gender": "M;M;M;M;M;M", "homepage": ";https://kowshikthopalli.github.io/;https://rushila.com/;;;https://jjthiagarajan.com", "dblp": "230/4531;224/0052;136/5391;;185/7912;16/7803", "google_scholar": "7h2Ui6YAAAAJ;https://scholar.google.com/citations?hl=en;WkoIlpQAAAAJ;;;cMz65_oAAAAJ", "orcid": ";;0000-0002-4186-3502;;;", "linkedin": ";;rushilanirudh/;yamen-mubarka;;", "or_profile": "~Vivek_Narayanaswamy1;~Kowshik_Thopalli1;~Rushil_Anirudh1;~Yamen_Mubarka1;~Wesam_A._Sakla1;~Jayaraman_J._Thiagarajan2", "aff": "Lawrence Livermore National Labs;Lawrence Livermore National Labs;Amazon;Lawrence Livermore National Labs;Lawrence Livermore National Laboratory;Lawrence Livermore National Labs", "aff_domain": "llnl.gov;llnl.gov;amazon.com;llnl.gov;llnl.gov;llnl.gov", "position": "Researcher;Postdoc;Applied Scientist;Data Scientist;Data Scientist;Computer Scientist", "bibtex": "@inproceedings{\nnarayanaswamy2024on,\ntitle={On the Use of Anchoring for Training Vision Models},\nauthor={Vivek Narayanaswamy and Kowshik Thopalli and Rushil Anirudh and Yamen Mubarka and Wesam A. Sakla and Jayaraman J. Thiagarajan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=xymhWyiZOp}\n}", "github": "", "reviewers": "8YxY;Tq8Q;fpNo;VjYP", "pdf_size": 1101469, "rating": "7;7;8;8", "confidence": "2;4;3;4", "soundness": "3;3;4;4", "novelty": "3;3;4;4", "presentation": "3;3;4;4", "wc_summary": "55;121;93;112", "wc_strengths": "92;123;73;83", "wc_weaknesses": "73;79;3;79", "wc_questions": "32;267;43;62", "wc_limitations": "27;1;1;11", "wc_review": "279;591;213;347", "wc_reply_reviewers": "159;124;13;42", "wc_reply_authors": "253;12;0;14", "reply_reviewers": "2;1;1;1", "reply_authors": "4;2;1;2", "rating_avg": [ 7.5, 0.5 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 95.25, 25.341418665891617 ], "wc_strengths_avg": [ 92.75, 18.713297411199342 ], "wc_weaknesses_avg": [ 58.5, 32.13642792844283 ], "wc_questions_avg": [ 101.0, 96.43909995432351 ], "wc_limitations_avg": [ 10.0, 10.63014581273465 ], "wc_review_avg": [ 357.5, 142.89419162443238 ], "wc_reply_reviewers_avg": [ 84.5, 59.222039816271106 ], "wc_reply_authors_avg": [ 69.75, 105.93482666243429 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 1.0897247358851685 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:GdZyxI48RNgJ:scholar.google.com/&scioq=On+the+Use+of+Anchoring+for+Training+Vision+Models&hl=en&as_sdt=0,14", "gs_version_total": 4, "email": "llnl.gov;llnl.gov;amazon.com;llnl.gov;llnl.gov;llnl.gov", "author_num": 6, "aff_unique_index": "0;0;1;0;0;0", "aff_unique_norm": "Lawrence Livermore National Laboratory;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.llnl.gov;https://www.amazon.com", "aff_unique_abbr": "LLNL;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Random Function Descent", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93048", "id": "xzCuBjHQbS", "proceeding": "", "pdf": "https://openreview.net/pdf?id=xzCuBjHQbS", "openreview": "https://openreview.net/forum?id=xzCuBjHQbS", "poster": "/media/PosterPDFs/NeurIPS%202024/93048.png?t=1732267561.4715352", "project": "", "author_site": "Felix Benning, Leif D\u00f6ring", "tldr": "", "abstract": "Classical worst-case optimization theory neither explains the success of optimization in machine learning, nor does it help with step size selection. In this paper we demonstrate the viability and advantages of replacing the classical 'convex function' framework with a 'random function' framework. With complexity $\\mathcal{O}(n^3d^3)$, where $n$ is the number of steps and $d$ the number of dimensions, Bayesian optimization with gradients has not been viable in large dimension so far. By bridging the gap between Bayesian optimization (i.e. random function optimization theory) and classical optimization we establish viability. Specifically, we use a 'stochastic Taylor approximation' to rediscover gradient descent, which is scalable in high dimension due to $\\mathcal{O}(nd)$ complexity. This rediscovery yields a specific step size schedule we call Random Function Descent (RFD). The advantage of this random function framework is that RFD is scale invariant and that it provides a theoretical foundation for common step size heuristics such as gradient clipping and gradual learning rate warmup.", "keywords": "optimization;hyperparameter tuning;Gaussian processes;random functions;random fields;average case analysis;bayesian optimization", "primary_area": "optimization", "supplementary_material": "/attachment/83538dd90d97da60b513f719f9a2b4e64c892263.zip", "author": "Felix Benning;Leif D\u00f6ring", "authorids": "~Felix_Benning1;~Leif_D\u00f6ring1", "gender": "M;M", "homepage": ";https://www.wim.uni-mannheim.de/doering/", "dblp": ";346/0412.html", "google_scholar": "https://scholar.google.com/citations?hl=en;", "orcid": "0009-0001-2354-7696;0000-0002-4569-5083", "linkedin": ";leif-d\u00f6ring-0b002496/?originalSubdomain=de", "or_profile": "~Felix_Benning1;~Leif_D\u00f6ring1", "aff": "Universit\u00e4t Mannheim;Universit\u00e4t Mannheim", "aff_domain": "uni-mannheim.de;uni-mannheim.de", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nbenning2024random,\ntitle={Random Function Descent},\nauthor={Felix Benning and Leif D{\\\"o}ring},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=xzCuBjHQbS}\n}", "github": "", "reviewers": "BoUK;XTWd;1by3;Y4cG", "pdf_size": 4115521, "rating": "5;5;6;7", "confidence": "3;4;2;3", "soundness": "3;4;3;4", "novelty": "2;2;2;4", "presentation": "3;3;2;4", "wc_summary": "40;350;322;131", "wc_strengths": "107;205;8;387", "wc_weaknesses": "314;193;363;359", "wc_questions": "20;66;49;258", "wc_limitations": "5;1;8;7", "wc_review": "486;815;750;1142", "wc_reply_reviewers": "135;0;16;22", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 210.75, 129.69459317951538 ], "wc_strengths_avg": [ 176.75, 139.95066094877865 ], "wc_weaknesses_avg": [ 307.25, 68.71089797113702 ], "wc_questions_avg": [ 98.25, 93.68664525960997 ], "wc_limitations_avg": [ 5.25, 2.680951323690902 ], "wc_review_avg": [ 798.25, 233.5983465266824 ], "wc_reply_reviewers_avg": [ 43.25, 53.5787971122906 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.42640143271122083, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13394637357290038424&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "uni-mannheim.de;uni-mannheim.de", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of Mannheim", "aff_unique_dep": "", "aff_unique_url": "https://www.uni-mannheim.de", "aff_unique_abbr": "UM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "title": "Bench2Drive: Towards Multi-Ability Benchmarking of Closed-Loop End-To-End Autonomous Driving", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97436", "id": "y09S5rdaWY", "proceeding": "", "pdf": "https://openreview.net/pdf?id=y09S5rdaWY", "openreview": "https://openreview.net/forum?id=y09S5rdaWY", "poster": "", "project": "", "author_site": "Xiaosong Jia, Zhenjie Yang, Qifeng Li, Zhiyuan Zhang, Junchi Yan", "tldr": "", "abstract": "In an era marked by the rapid scaling of foundation models, autonomous driving technologies are approaching a transformative threshold where end-to-end autonomous driving (E2E-AD) emerges due to its potential of scaling up in the data-driven manner. However, existing E2E-AD methods are mostly evaluated under the open-loop log-replay manner with L2 errors and collision rate as metrics (e.g., in nuScenes), which could not fully reflect the driving performance of algorithms as recently acknowledged in the community. For those E2E-AD methods evaluated under the closed-loop protocol, they are tested in fixed routes (e.g., Town05Long and Longest6 in CARLA) with the driving score as metrics, which is known for high variance due to the unsmoothed metric function and large randomness in the long route. Besides, these methods usually collect their own data for training, which makes algorithm-level fair comparison infeasible. \n\n To fulfill the paramount need of comprehensive, realistic, and fair testing environments for Full Self-Driving (FSD), we present Bench2Drive, the first benchmark for evaluating E2E-AD systems' multiple abilities in a closed-loop manner. Bench2Drive's official training data consists of 2 million fully annotated frames, collected from 10000 short clips uniformly distributed under 44 interactive scenarios (cut-in, overtaking, detour, etc), 23 weathers (sunny, foggy, rainy, etc), and 12 towns (urban, village, university, etc) in CARLA v2. Its evaluation protocol requires E2E-AD models to pass 44 interactive scenarios under different locations and weathers which sums up to 220 routes and thus provides a comprehensive and disentangled assessment about their driving capability under different situations. We implement state-of-the-art E2E-AD models and evaluate them in Bench2Drive, providing insights regarding current status and future directions.", "keywords": "Autonomous Driving", "primary_area": "", "supplementary_material": "", "author": "Xiaosong Jia;Zhenjie Yang;Qifeng Li;Zhiyuan Zhang;Junchi Yan", "authorids": "~Xiaosong_Jia1;~Zhenjie_Yang1;~Qifeng_Li2;~Zhiyuan_Zhang11;~Junchi_Yan2", "gender": "M;M;M;M;M", "homepage": "https://jiaxiaosong1002.github.io/;https://github.com/jayyoung0802;;http://thinklab.sjtu.edu.cn/;https://github.com/wdask", "dblp": "274/6360;;;60/7949.html;", "google_scholar": "JeFQwxUAAAAJ;jVlRiUEAAAAJ;;ga230VoAAAAJ;", "orcid": ";;0000-0003-3813-0778;0000-0001-9639-7679;", "linkedin": ";;;;", "or_profile": "~Xiaosong_Jia1;~Zhenjie_Yang1;~Qifeng_Li2;~Junchi_Yan1;~zhiyuan_zhang10", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Tsinghua University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;tsinghua.edu.cn", "position": "PhD student;PhD student;PhD student;Full Professor;Intern", "bibtex": "@inproceedings{\njia2024benchdrive,\ntitle={Bench2Drive: Towards Multi-Ability Benchmarking of Closed-Loop End-To-End Autonomous Driving},\nauthor={Xiaosong Jia and Zhenjie Yang and Qifeng Li and Zhiyuan Zhang and Junchi Yan},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=y09S5rdaWY}\n}", "github": "", "reviewers": "K6WS;XSJo;WLvp;9tDx", "pdf_size": 8022848, "rating": "6;6;6;8", "confidence": "4;4;4;4", "wc_summary_and_contributions": "184;53;54;46", "wc_strengths": "90;30;72;56", "wc_improvement": "217;85;39;30", "wc_limitations": "87;62;22;1", "wc_correctness": "39;8;20;1", "wc_clarity": "56;6;8;1", "wc_relation_to_prior_work": "44;5;26;1", "wc_documentation": "36;7;12;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "754;257;254;138", "wc_reply_reviewers": "99;166;47;32", "wc_reply_authors": "752;464;685;21", "reply_reviewers": "1;2;1;1", "reply_authors": "3;3;4;2", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.0 ], "wc_summary_and_contributions_avg": [ 84.25, 57.67310898503739 ], "wc_strengths_avg": [ 62.0, 22.045407685048602 ], "wc_improvement_avg": [ 92.75, 74.70734568969775 ], "wc_limitations_avg": [ 43.0, 33.54847239443251 ], "wc_correctness_avg": [ 17.0, 14.404860290887934 ], "wc_clarity_avg": [ 17.75, 22.230328382639787 ], "wc_relation_to_prior_work_avg": [ 19.0, 17.277152543170995 ], "wc_documentation_avg": [ 14.0, 13.285330255586423 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 350.75, 237.7092499251975 ], "wc_reply_reviewers_avg": [ 86.0, 52.454742397613586 ], "wc_reply_authors_avg": [ 480.5, 285.8955228750531 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.0, 0.7071067811865476 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 39, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=429223358631711409&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 4, "email": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;tsinghua.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Shanghai Jiao Tong University;Tsinghua University", "aff_unique_dep": ";", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.tsinghua.edu.cn", "aff_unique_abbr": "SJTU;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "MMLU-Pro: A More Robust and Challenging Multi-Task Language Understanding Benchmark", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97435", "id": "y10DM6R2r3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=y10DM6R2r3", "openreview": "https://openreview.net/forum?id=y10DM6R2r3", "poster": "/media/PosterPDFs/NeurIPS%202024/97435.png?t=1731333253.6925266", "project": "", "author_site": "Yubo Wang, Xueguang Ma, Ge Zhang, Yuansheng Ni, Abhranil Chandra, Shiguang Guo, Weiming Ren, Aaran Arulraj, Xuan He, Ziyan Jiang, Tianle Li, Max KU, Kai Wang, Alex Zhuang, Rongqi Fan, Xiang Yue, Wenhu Chen", "tldr": "", "abstract": "In the age of large-scale language models, benchmarks like the Massive Multitask Language Understanding (MMLU) have been pivotal in pushing the boundaries of what AI can achieve in language comprehension and reasoning across diverse domains. However, as models continue to improve, their performance on these benchmarks has begun to plateau, making it increasingly difficult to discern differences in model capabilities. This paper introduces MMLU-Pro, an enhanced dataset designed to extend the mostly knowledge-driven MMLU benchmark by integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options. Additionally, MMLU-Pro eliminates part of the trivial and noisy questions in MMLU. Our experimental results show that MMLU-Pro not only raises the challenge, causing a significant drop in accuracy by 16\\% to 33\\% compared to MMLU, but also demonstrates greater stability under varying prompts. With 24 different prompt styles tested, the sensitivity of model scores to prompt variations decreased from 4-5\\% in MMLU to just 2\\% in MMLU-Pro. Additionally, we found that models utilizing Chain of Thought (CoT) reasoning achieved better performance on MMLU-Pro compared to direct answering, which is in stark contrast to the findings on the original MMLU, indicating that MMLU-Pro includes more complex reasoning questions. Our assessments confirm that MMLU-Pro is more discriminative benchmark to better track progress in the field.", "keywords": "Evaluation;Multi-task;Robustness;Language Understanding", "primary_area": "", "supplementary_material": "/attachment/1975be6fef1c19c72e8990b3625bd35645e3c978.pdf", "author": "Yubo Wang;Xueguang Ma;Ge Zhang;Yuansheng Ni;Abhranil Chandra;Shiguang Guo;Weiming Ren;Aaran Arulraj;Xuan He;Ziyan Jiang;Tianle Li;Max Ku;Kai Wang;Alex Zhuang;Rongqi Fan;Xiang Yue;Wenhu Chen", "authorids": "~Yubo_Wang9;~Xueguang_Ma1;~Ge_Zhang5;~Yuansheng_Ni1;~Abhranil_Chandra1;~Shiguang_Guo1;~Weiming_Ren1;~Aaran_Arulraj1;~Xuan_He6;~Ziyan_Jiang1;~Tianle_Li1;~Max_Ku1;~Kai_Wang39;~Alex_Zhuang1;~Rongqi_Fan1;~Xiang_Yue1;~Wenhu_Chen3", "gender": "M;M;M;;;;M;Not Specified;M;M;F;M;;M;;;", "homepage": ";;;;https://abhranilchandra.github.io/;;https://cs.uwaterloo.ca/~w2ren;;;https://xmhzz2018.github.io/;https://www.litianlephoebe.com/;https://kuwingfung.github.io/;;;https://richard5678.github.io;;", "dblp": ";44/9030;;;304/2731;;325/9916;;;02/10386;242/0053;348/0574.html;;;379/4566.html;;", "google_scholar": "s_ZW7voAAAAJ;4kvcmkQAAAAJ;qyTrq4kAAAAJ;;;;51MNpu0AAAAJ;;_-8U5bMAAAAJ;wDJjd2IAAAAJ;g213g7YAAAAJ;https://scholar.google.com.hk/citations?user=oCFgVhUAAAAJ;;jol39OkAAAAJ;Gx4qcdgAAAAJ;;", "orcid": ";;;;;;;;0000-0001-8983-7576;0009-0005-4955-0737;;;;;0009-0001-1427-5481;;", "linkedin": ";;ge-zhang-792797169/;;abhranil-chandra-462332136/;;;aaran-arulraj;;;;max-ku-650571172/;;;richard-fan2020/;;", "or_profile": "~Yubo_Wang9;~Xueguang_Ma1;~Ge_Zhang5;~Yuansheng_Ni1;~Abhranil_Chandra1;~Shiguang_Guo1;~Weiming_Ren1;~Aaran_Arulraj1;~Xuan_He6;~Ziyan_Jiang1;~Tianle_Li1;~Max_Ku1;~Kai_Wang39;~Alex_Zhuang1;~Rongqi_Fan1;~Xiang_Yue1;~Wenhu_Chen3", "aff": "University of Waterloo;University of Waterloo;University of Waterloo;;University of Waterloo;;University of Waterloo;University of Waterloo;Tsinghua University;Amazon;University of Waterloo;University of Waterloo;;, University of Waterloo;NVIDIA;;", "aff_domain": "uwaterloo.ca;uwaterloo.ca;cs.uwaterloo.ca;;uwaterloo.ca;;cs.uwaterloo.ca;uwaterloo.ca;mails.tsinghua.edu.cn;amazon.com;uwaterloo.ca;uwaterloo.ca;;cs.uwaterloo.ca;nvidia.com;;", "position": "PhD student;PhD student;PhD student;;MS student;;PhD student;Undergrad student;Undergrad student;Researcher;MS student;MS student;;Undergrad student;Intern;;", "bibtex": "@inproceedings{\nwang2024mmlupro,\ntitle={{MMLU}-Pro: A More Robust and Challenging Multi-Task Language Understanding Benchmark},\nauthor={Yubo Wang and Xueguang Ma and Ge Zhang and Yuansheng Ni and Abhranil Chandra and Shiguang Guo and Weiming Ren and Aaran Arulraj and Xuan He and Ziyan Jiang and Tianle Li and Max Ku and Kai Wang and Alex Zhuang and Rongqi Fan and Xiang Yue and Wenhu Chen},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=y10DM6R2r3}\n}", "github": "", "reviewers": "6sZ1;ZrEH;WZvx;AG1t", "pdf_size": 892636, "rating": "6;6;7;7", "confidence": "4;4;3;3", "wc_summary_and_contributions": "85;102;92;50", "wc_strengths": "83;2;66;97", "wc_improvement": "7;2;13;25", "wc_limitations": "179;1;30;10", "wc_correctness": "1;1;51;8", "wc_clarity": "1;1;5;5", "wc_relation_to_prior_work": "1;1;24;5", "wc_documentation": "1;1;43;8", "wc_additional_feedback": "1;1;1;1", "wc_review": "359;112;325;209", "wc_reply_reviewers": "19;0;24;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;0;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.5, 0.5 ], "wc_summary_and_contributions_avg": [ 82.25, 19.57517560585345 ], "wc_strengths_avg": [ 62.0, 36.33868462121325 ], "wc_improvement_avg": [ 11.75, 8.584142356694699 ], "wc_limitations_avg": [ 55.0, 72.35675504056273 ], "wc_correctness_avg": [ 15.25, 20.837166314064877 ], "wc_clarity_avg": [ 3.0, 2.0 ], "wc_relation_to_prior_work_avg": [ 7.75, 9.522998477370455 ], "wc_documentation_avg": [ 13.25, 17.41228014936585 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 251.25, 97.75575430633226 ], "wc_reply_reviewers_avg": [ 10.75, 10.894379284750462 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 17, 0 ], "corr_rating_confidence": -1.0, "gs_citation": 269, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2666986522064548248&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "email": "uwaterloo.ca;uwaterloo.ca;cs.uwaterloo.ca;;uwaterloo.ca;;cs.uwaterloo.ca;uwaterloo.ca;mails.tsinghua.edu.cn;amazon.com;uwaterloo.ca;uwaterloo.ca;;cs.uwaterloo.ca;nvidia.com;;", "author_num": 17, "aff_unique_index": "0;0;0;0;0;0;1;2;0;0;0;3", "aff_unique_norm": "University of Waterloo;Tsinghua University;Amazon;NVIDIA", "aff_unique_dep": ";;Amazon.com, Inc.;NVIDIA Corporation", "aff_unique_url": "https://uwaterloo.ca;https://www.tsinghua.edu.cn;https://www.amazon.com;https://www.nvidia.com", "aff_unique_abbr": "UW;THU;Amazon;NVIDIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;1;2;0;0;0;2", "aff_country_unique": "Canada;China;United States" }, { "title": "Learning diffusion at lightspeed", "status": "Oral", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93047", "id": "y10avdRFNK", "proceeding": "", "pdf": "https://openreview.net/pdf?id=y10avdRFNK", "openreview": "https://openreview.net/forum?id=y10avdRFNK", "poster": "/media/PosterPDFs/NeurIPS%202024/93047.png?t=1729499977.898014", "project": "", "author_site": "Antonio Terpin, Nicolas Lanzetti, Mart\u00edn Gadea, Florian Dorfler", "tldr": "", "abstract": "Diffusion regulates numerous natural processes and the dynamics of many successful generative models. Existing models to learn the diffusion terms from observational data rely on complex bilevel optimization problems and model only the drift of the system.\nWe propose a new simple model, JKOnet*, which bypasses the complexity of existing architectures while presenting significantly enhanced representational capabilities: JKOnet* recovers the potential, interaction, and internal energy components of the underlying diffusion process. JKOnet* minimizes a simple quadratic loss and outperforms other baselines in terms of sample efficiency, computational complexity, and accuracy. Additionally, JKOnet* provides a closed-form optimal solution for linearly parametrized functionals, and, when applied to predict the evolution of cellular processes from real-world data, it achieves state-of-the-art accuracy at a fraction of the computational cost of all existing methods.\nOur methodology is based on the interpretation of diffusion processes as energy-minimizing trajectories in the probability space via the so-called JKO scheme, which we study via its first-order optimality conditions.", "keywords": "optimal transport;diffusion processes;gradient flows", "primary_area": "optimization", "supplementary_material": "", "author": "Antonio Terpin;Nicolas Lanzetti;Mart\u00edn Gadea;Florian Dorfler", "authorids": "~Antonio_Terpin1;~Nicolas_Lanzetti1;~Mart\u00edn_Gadea1;~Florian_Dorfler1", "gender": "M;M;M;M", "homepage": "https://www.antonioterpin.com;http://people.ee.ethz.ch/~lnicolas/index.html;;http://people.ee.ethz.ch/~floriand/", "dblp": "303/1006;247/4363;;", "google_scholar": "IFx4XfoAAAAJ;gWJV1rQAAAAJ;;https://scholar.google.com/citations?view_op=list_works", "orcid": "0000-0002-8456-6786;0000-0002-9128-1412;;0000-0002-9649-5305", "linkedin": ";;mart%C3%ADn-gadea-p%C3%A9rez-057171243/;", "or_profile": "~Antonio_Terpin1;~Nicolas_Lanzetti1;~Mart\u00edn_Gadea1;~Florian_Dorfler1", "aff": "ETHZ - ETH Zurich;ETHZ - ETH Zurich;ETHZ - ETH Zurich;", "aff_domain": "ethz.ch;ethz.ch;ethz.ch;", "position": "PhD student;PhD student;MS student;", "bibtex": "@inproceedings{\nterpin2024learning,\ntitle={Learning diffusion at lightspeed},\nauthor={Antonio Terpin and Nicolas Lanzetti and Mart{\\'\\i}n Gadea and Florian Dorfler},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=y10avdRFNK}\n}", "github": "", "reviewers": "CXdg;vNRb;vFxe;fc3q", "pdf_size": 5304087, "rating": "5;6;7;8", "confidence": "3;3;3;4", "soundness": "3;3;4;4", "novelty": "2;3;4;4", "presentation": "3;3;3;4", "wc_summary": "53;88;119;98", "wc_strengths": "56;94;114;51", "wc_weaknesses": "56;69;124;165", "wc_questions": "18;76;72;58", "wc_limitations": "1;7;7;4", "wc_review": "184;334;436;376", "wc_reply_reviewers": "0;14;17;23", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 89.5, 23.8589605808803 ], "wc_strengths_avg": [ 78.75, 26.280934153869037 ], "wc_weaknesses_avg": [ 103.5, 43.729280808172454 ], "wc_questions_avg": [ 56.0, 22.93468988235943 ], "wc_limitations_avg": [ 4.75, 2.48746859276655 ], "wc_review_avg": [ 332.5, 93.08463890460122 ], "wc_reply_reviewers_avg": [ 13.5, 8.440971508067067 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.7745966692414834, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11990975770044276023&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "ethz.ch;ethz.ch;ethz.ch;", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "ETH Zurich", "aff_unique_dep": "", "aff_unique_url": "https://www.ethz.ch", "aff_unique_abbr": "ETHZ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Switzerland" }, { "title": "HEPrune: Fast Private Training of Deep Neural Networks With Encrypted Data Pruning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93046", "id": "y2fAmldTIf", "proceeding": "", "pdf": "https://openreview.net/pdf?id=y2fAmldTIf", "openreview": "https://openreview.net/forum?id=y2fAmldTIf", "poster": "/media/PosterPDFs/NeurIPS%202024/93046.png?t=1731633109.47164", "project": "", "author_site": "Yancheng Zhang, Mengxin Zheng, Yuzhang Shang, Xun Chen, Qian Lou", "tldr": "", "abstract": "Non-interactive cryptographic computing, Fully Homomorphic Encryption (FHE), provides a promising solution for private neural network training on encrypted data. One challenge of FHE-based private training is its large computational overhead, especially the multiple rounds of forward and backward execution on each encrypted data sample. Considering the existence of largely redundant data samples, pruning them will significantly speed up the training, as proven in plain non-FHE training. \nExecuting the data pruning of encrypted data on the server side is not trivial since the knowledge calculation of data pruning needs complex and expensive executions on encrypted data. There is a lack of FHE-based data pruning protocol for efficient, private training. In this paper, we propose, \\textit{HEPrune}, to construct a FHE data-pruning protocol and then design an FHE-friendly data-pruning algorithm under client-aided or non-client-aided settings, respectively. We also observed that data sample pruning may not always remove ciphertexts, leaving large empty slots and limiting the effects of data pruning. Thus, in HEPrune, we further propose ciphertext-wise pruning to reduce ciphertext computation numbers without hurting accuracy. Experimental results show that our work can achieve a $16\\times$ speedup with only a $0.6\\%$ accuracy drop over prior work. \nThe code is publicly available at \\href{https://github.com/UCF-Lou-Lab-PET/Private-Data-Prune}.", "keywords": "Confidential Training;Privacy;Pruning;Cryptographic Computing", "primary_area": "privacy", "supplementary_material": "", "author": "Yancheng Zhang;Mengxin Zheng;Yuzhang Shang;Xun Chen;Qian Lou", "authorids": "~Yancheng_Zhang1;~Mengxin_Zheng1;~Yuzhang_Shang1;~Xun_Chen1;~Qian_Lou1", "gender": "Not Specified;F;M;;M", "homepage": "https://yanchengzhang.com/;https://mxzheng.github.io/;https://42shawn.github.io/;;https://qlou.org", "dblp": "305/5393;327/9609;300/8483;;207/3962.html", "google_scholar": "Xi6wdOsAAAAJ;CwLrXMAAAAAJ;6ZPL5E0AAAAJ;;SBYgXLoAAAAJ", "orcid": "0009-0002-6345-8935;;;;", "linkedin": "yancheng-zhang-790ba6287/;mengxin-zheng-86bb91171/;;;", "or_profile": "~Yancheng_Zhang1;~Mengxin_Zheng1;~Yuzhang_Shang1;~Xun_Chen1;~Qian_Lou1", "aff": "University of Central Florida;University of Central Florida;Illinois Institute of Technology;;University of Central Florida", "aff_domain": "ucf.edu;ucf.edu;iit.edu;;ucf.edu", "position": "PhD student;Assistant Professor;PhD student;;Assistant Professor", "bibtex": "@inproceedings{\nzhang2024heprune,\ntitle={{HEP}rune: Fast Private Training of Deep Neural Networks With Encrypted Data Pruning},\nauthor={Yancheng Zhang and Mengxin Zheng and Yuzhang Shang and Xun Chen and Qian Lou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=y2fAmldTIf}\n}", "github": "", "reviewers": "FEVE;98ig;ADwT;LFsM", "pdf_size": 668390, "rating": "5;5;6;7", "confidence": "4;4;4;3", "soundness": "3;3;2;3", "novelty": "3;3;2;3", "presentation": "3;3;2;3", "wc_summary": "169;648;66;156", "wc_strengths": "112;79;96;47", "wc_weaknesses": "452;87;229;53", "wc_questions": "111;146;105;14", "wc_limitations": "4;47;5;1", "wc_review": "848;1007;501;271", "wc_reply_reviewers": "0;203;24;0", "wc_reply_authors": "0;229;21;0", "reply_reviewers": "0;1;1;0", "reply_authors": "1;2;2;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 259.75, 227.6382821495541 ], "wc_strengths_avg": [ 83.5, 24.088378940891808 ], "wc_weaknesses_avg": [ 205.25, 157.01333542091257 ], "wc_questions_avg": [ 94.0, 48.76986774638619 ], "wc_limitations_avg": [ 14.25, 18.965429075030176 ], "wc_review_avg": [ 656.75, 288.2328702629178 ], "wc_reply_reviewers_avg": [ 56.75, 85.00404402144642 ], "wc_reply_authors_avg": [ 62.5, 96.51036213795905 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7516716253256403765&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "ucf.edu;ucf.edu;iit.edu;;ucf.edu", "author_num": 5, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Central Florida;Illinois Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucf.edu;https://www.iit.edu", "aff_unique_abbr": "UCF;IIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Towards Diverse Device Heterogeneous Federated Learning via Task Arithmetic Knowledge Integration", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93045", "id": "y6JotynERr", "proceeding": "", "pdf": "https://openreview.net/pdf?id=y6JotynERr", "openreview": "https://openreview.net/forum?id=y6JotynERr", "poster": "/media/PosterPDFs/NeurIPS%202024/93045.png?t=1731301532.8443182", "project": "", "author_site": "Mahdi Morafah, Vyacheslav Kungurtsev, Hojin Chang, Chen Chen, Bill Lin", "tldr": "", "abstract": "Federated Learning (FL) has emerged as a promising paradigm for collaborative machine learning, while preserving user data privacy. Despite its potential, standard FL algorithms lack support for diverse heterogeneous device prototypes, which vary significantly in model and dataset sizes---from small IoT devices to large workstations. This limitation is only partially addressed by existing knowledge distillation (KD) techniques, which often fail to transfer knowledge effectively across a broad spectrum of device prototypes with varied capabilities. This failure primarily stems from two issues: the dilution of informative logits from more capable devices by those from less capable ones, and the use of a single integrated logits as the distillation target across all devices, which neglects their individual learning capacities and and the unique contributions of each device. To address these challenges, we introduce TAKFL, a novel KD-based framework that treats the knowledge transfer from each device prototype's ensemble as a separate task, independently distilling each to preserve its unique contributions and avoid dilution. TAKFL also incorporates a KD-based self-regularization technique to mitigate the issues related to the noisy and unsupervised ensemble distillation process. To integrate the separately distilled knowledge, we introduce an adaptive task arithmetic knowledge integration process, allowing each student model to customize the knowledge integration for optimal performance. Additionally, we present theoretical results demonstrating the effectiveness of task arithmetic in transferring knowledge across heterogeneous device prototypes with varying capacities. Comprehensive evaluations of our method across both computer vision (CV) and natural language processing (NLP) tasks demonstrate that TAKFL achieves state-of-the-art results in a variety of datasets and settings, significantly outperforming existing KD-based methods. Our code is released at https://github.com/MMorafah/TAKFL and the project website is available at https://mmorafah.github.io/takflpage .", "keywords": "Federated Learning;Heterogeneous Device Prototypes;Knowledge Distillation;Task Arithmetic;Machine Learning", "primary_area": "other", "supplementary_material": "/attachment/fe17063e1e942daac661fb289ffbb1be693c72ca.zip", "author": "Mahdi Morafah;Vyacheslav Kungurtsev;Hojin Matthew Chang;Chen Chen;Bill Lin", "authorids": "~Mahdi_Morafah1;~Vyacheslav_Kungurtsev1;~Hojin_Matthew_Chang1;~Chen_Chen18;~Bill_Lin1", "gender": "M;M;M;M;M", "homepage": "https://mmorafah.github.io/mmorafah/;https://cs.fel.cvut.cz/en/people/kunguvya;;https://www.crcv.ucf.edu/chenchen/;", "dblp": "292/3101;149/2722.html;;65/4423-1;l/BillLin.html", "google_scholar": "citations?user=mAtf52wAAAAJ;https://scholar.google.cz/citations?hl=en;;TuEwcZ0AAAAJ;j3geh3QAAAAJ", "orcid": "0000-0002-2518-071X;0000-0003-2229-8824;0009-0007-1583-1375;0000-0003-3957-7061;", "linkedin": "mahdi-morafah-ab97a8106/;https://cz.linkedin.com/in/vyacheslav-kungurtsev-3694962a;;dennychen/;", "or_profile": "~Mahdi_Morafah1;~Vyacheslav_Kungurtsev1;~Hojin_Matthew_Chang1;~Chen_Chen18;~Bill_Lin1", "aff": "University of California, San Diego;Czech Technical Univeresity in Prague, Czech Technical University of Prague;University of California, San Diego;University of Central Florida;University of California, San Diego", "aff_domain": "ucsd.edu;fel.cvut.cz;ucsd.edu;ucf.edu;ucsd.edu", "position": "PhD student;Researcher;MS student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nmorafah2024towards,\ntitle={Towards Diverse Device Heterogeneous Federated Learning via Task Arithmetic Knowledge Integration},\nauthor={Mahdi Morafah and Vyacheslav Kungurtsev and Hojin Matthew Chang and Chen Chen and Bill Lin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=y6JotynERr}\n}", "github": "", "reviewers": "ME5v;F7vu;RvJG;V7t1", "pdf_size": 7006698, "rating": "5;5;5;5", "confidence": "3;4;3;4", "soundness": "2;3;2;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "78;45;80;58", "wc_strengths": "51;26;160;56", "wc_weaknesses": "122;266;314;122", "wc_questions": "12;4;2;2", "wc_limitations": "1;4;1;1", "wc_review": "264;345;557;239", "wc_reply_reviewers": "0;933;0;80", "wc_reply_authors": "88;2803;88;489", "reply_reviewers": "0;6;0;1", "reply_authors": "2;8;2;3", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 65.25, 14.515078366994786 ], "wc_strengths_avg": [ 73.25, 51.35842189943145 ], "wc_weaknesses_avg": [ 206.0, 85.6971411425142 ], "wc_questions_avg": [ 5.0, 4.123105625617661 ], "wc_limitations_avg": [ 1.75, 1.299038105676658 ], "wc_review_avg": [ 351.25, 125.08472128921262 ], "wc_reply_reviewers_avg": [ 253.25, 393.8104715469105 ], "wc_reply_authors_avg": [ 867.0, 1129.6749532498275 ], "reply_reviewers_avg": [ 1.75, 2.48746859276655 ], "reply_authors_avg": [ 3.75, 2.48746859276655 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3558337356725291677&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "ucsd.edu;fel.cvut.cz;ucsd.edu;ucf.edu;ucsd.edu", "author_num": 5, "aff_unique_index": "0;1;0;2;0", "aff_unique_norm": "University of California, San Diego;Czech Technical University in Prague;University of Central Florida", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ucsd.edu;https://www.ctu.cz;https://www.ucf.edu", "aff_unique_abbr": "UCSD;CTU;UCF", "aff_campus_unique_index": "0;1;0;0", "aff_campus_unique": "San Diego;Prague;", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "United States;Czech Republic" }, { "title": "NeuroBOLT: Resting-state EEG-to-fMRI Synthesis with Multi-dimensional Feature Mapping", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93044", "id": "y6qhVtFG77", "proceeding": "", "pdf": "https://openreview.net/pdf?id=y6qhVtFG77", "openreview": "https://openreview.net/forum?id=y6qhVtFG77", "poster": "/media/PosterPDFs/NeurIPS%202024/93044.png?t=1733617950.057914", "project": "", "author_site": "Yamin Li, Ange Lou, Ziyuan Xu, Shengchao Zhang, Shiyu Wang, Dario Englot, Soheil Kolouri, Daniel Moyer, Roza Bayrak, Catie Chang", "tldr": "", "abstract": "Functional magnetic resonance imaging (fMRI) is an indispensable tool in modern neuroscience, providing a non-invasive window into whole-brain dynamics at millimeter-scale spatial resolution. However, fMRI is constrained by issues such as high operation costs and immobility. With the rapid advancements in cross-modality synthesis and brain decoding, the use of deep neural networks has emerged as a promising solution for inferring whole-brain, high-resolution fMRI features directly from electroencephalography (EEG), a more widely accessible and portable neuroimaging modality. Nonetheless, the complex projection from neural activity to fMRI hemodynamic responses and the spatial ambiguity of EEG pose substantial challenges both in modeling and interpretability. Relatively few studies to date have developed approaches for EEG-fMRI translation, and although they have made significant strides, the inference of fMRI signals in a given study has been limited to a small set of brain areas and to a single condition (i.e., either resting-state or a specific task). The capability to predict fMRI signals in other brain areas, as well as to generalize across conditions, remain critical gaps in the field. To tackle these challenges, we introduce a novel and generalizable framework: NeuroBOLT, i.e., Neuro-to-BOLD Transformer, which leverages multi-dimensional representation learning from temporal, spatial, and spectral domains to translate raw EEG data to the corresponding fMRI activity signals across the brain. Our experiments demonstrate that NeuroBOLT effectively reconstructs unseen resting-state fMRI signals from primary sensory, high-level cognitive areas, and deep subcortical brain regions, achieving state-of-the-art accuracy with the potential to generalize across varying conditions and sites, which significantly advances the integration of these two modalities.", "keywords": "EEG-to-fMRI synthesis;EEG;fMRI", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "", "author": "Yamin Li;Ange Lou;Ziyuan Xu;SHENGCHAO ZHANG;Shiyu Wang;Dario J. Englot;Soheil Kolouri;Daniel Moyer;Roza G Bayrak;Catie Chang", "authorids": "~Yamin_Li2;~Ange_Lou2;~Ziyuan_Xu3;~SHENGCHAO_ZHANG1;~Shiyu_Wang8;~Dario_J._Englot1;~Soheil_Kolouri1;~Daniel_Moyer3;~Roza_G_Bayrak2;~Catie_Chang1", "gender": "F;M;M;M;F;M;M;F;F;M", "homepage": ";;;;;http://www.englotlab.com;https://skolouri.github.io/;;https://www.cchanglab.net/;https://dcmoyer.github.io", "dblp": "09/2379;266/9729;;;;;143/9637;;43/8258.html;187/6201", "google_scholar": "lWarbfUAAAAJ;GwWK7IoAAAAJ;;;eE7BwNEAAAAJ;;yREBSy0AAAAJ;QHN1CZsAAAAJ;4ndpsi4AAAAJ;sKmoxSMAAAAJ", "orcid": "0000-0003-0010-0848;;;;;;0000-0001-8495-5362;;0000-0003-1541-9579;", "linkedin": "yamin-li-44a60a260/;;roger-xu/;shengchao-zhang-b86422129/;;;skolouri/;;;", "or_profile": "~Yamin_Li2;~Ange_Lou2;~Ziyuan_Xu3;~SHENGCHAO_ZHANG1;~Shiyu_Wang8;~Dario_J._Englot1;~Soheil_Kolouri1;~Roza_G_Bayrak2;~Catie_Chang1;~Daniel_Moyer2", "aff": "Vanderbilt University;Vanderbilt University;, Vanderbilt University;Vanderbilt University;Vanderbilt University;Vanderbilt University;Vanderbilt University;Vanderbilt University;Vanderbilt University;Vanderbilt University", "aff_domain": "vanderbilt.edu;vanderbilt.edu;engineering.vanderbilt.edu;vanderbilt.edu;vanderbilt.edu;vanderbilt.edu;vanderbilt.edu;vanderbilt.edu;vanderbilt.edu;vanderbilt.edu", "position": "PhD student;PhD student;Undergrad student;PhD student;PhD student;Associate Professor;Assistant Professor;Research Assistant Professor;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nli2024neurobolt,\ntitle={Neuro{BOLT}: Resting-state {EEG}-to-f{MRI} Synthesis with Multi-dimensional Feature Mapping},\nauthor={Yamin Li and Ange Lou and Ziyuan Xu and SHENGCHAO ZHANG and Shiyu Wang and Dario J. Englot and Soheil Kolouri and Daniel Moyer and Roza G Bayrak and Catie Chang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=y6qhVtFG77}\n}", "github": "", "reviewers": "bwvZ;EkS2;rcEv", "pdf_size": 18344285, "rating": "4;5;8", "confidence": "5;5;4", "soundness": "2;3;3", "novelty": "2;3;4", "presentation": "3;3;3", "wc_summary": "53;67;48", "wc_strengths": "46;174;82", "wc_weaknesses": "337;726;71", "wc_questions": "6;74;41", "wc_limitations": "10;61;41", "wc_review": "452;1102;283", "wc_reply_reviewers": "332;52;16", "wc_reply_authors": "1356;209;23", "reply_reviewers": "1;1;1", "reply_authors": "3;2;2", "rating_avg": [ 5.666666666666667, 1.699673171197595 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.816496580927726 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 56.0, 8.04155872120988 ], "wc_strengths_avg": [ 100.66666666666667, 53.897021150420635 ], "wc_weaknesses_avg": [ 378.0, 268.96963893098916 ], "wc_questions_avg": [ 40.333333333333336, 27.764885897278397 ], "wc_limitations_avg": [ 37.333333333333336, 20.98147330914162 ], "wc_review_avg": [ 612.3333333333334, 353.05366290252374 ], "wc_reply_reviewers_avg": [ 133.33333333333334, 141.24525557420404 ], "wc_reply_authors_avg": [ 529.3333333333334, 589.4530421406687 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.9707253433941508, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16291104561180934094&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "vanderbilt.edu;vanderbilt.edu;engineering.vanderbilt.edu;vanderbilt.edu;vanderbilt.edu;vanderbilt.edu;vanderbilt.edu;vanderbilt.edu;vanderbilt.edu;vanderbilt.edu", "author_num": 10, "aff_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_unique_norm": "Vanderbilt University", "aff_unique_dep": "", "aff_unique_url": "https://www.vanderbilt.edu", "aff_unique_abbr": "Vanderbilt", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "RobIR: Robust Inverse Rendering for High-Illumination Scenes", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93043", "id": "y7oxY5pq4j", "proceeding": "", "pdf": "https://openreview.net/pdf?id=y7oxY5pq4j", "openreview": "https://openreview.net/forum?id=y7oxY5pq4j", "poster": "/media/PosterPDFs/NeurIPS%202024/93043.png?t=1730442033.8277564", "project": "", "author_site": "Ziyi Yang, Chenyanzhen, Xinyu Gao, YazhenYuan, Wu Yu, Xiaowei Zhou, Xiaogang Jin", "tldr": "", "abstract": "Implicit representation has opened up new possibilities for inverse rendering. However, existing implicit neural inverse rendering methods struggle to handle strongly illuminated scenes with significant shadows and slight reflections. The existence of shadows and reflections can lead to an inaccurate understanding of the scene, making precise factorization difficult. To this end, we present RobIR, an implicit inverse rendering approach that uses ACES tone mapping and regularized visibility estimation to reconstruct accurate BRDF of the object. By accurately modeling the indirect radiance field, normal, visibility, and direct light simultaneously, we are able to accurately decouple environment lighting and the object's PBR materials without imposing strict constraints on the scene. Even in high-illumination scenes with shadows and specular reflections, our method can recover high-quality albedo and roughness with no shadow interference. RobIR outperforms existing methods in both quantitative and qualitative evaluations.", "keywords": "Inverse Rendering;BRDF Estimation;Illumination Estimation;NeRF;Inverse Graphics", "primary_area": "machine_vision", "supplementary_material": "/attachment/1a991de6eefb6a4b6003a333189ee406cdaef46a.zip", "author": "Ziyi Yang;Chenyanzhen;Xinyu Gao;YazhenYuan;Wu Yu;Xiaowei Zhou;Xiaogang Jin", "authorids": "~Ziyi_Yang4;~Chenyanzhen1;~Xinyu_Gao1;~YazhenYuan1;~Wu_Yu1;~Xiaowei_Zhou3;~Xiaogang_Jin1", "gender": "M;M;M;M;M;M;M", "homepage": "https://ingra14m.github.io/;https://github.com/Choconuts;;https://yuanyazhen.github.io/;https://www.linkedin.com/in/yu-wu-b3930092/;http://xzhou.me/;http://www.cad.zju.edu.cn/home/jin/", "dblp": ";;;138/3442.html;;;36/3676-1", "google_scholar": "B0IyfqQAAAAJ;;;4EhxgBkAAAAJ;;E1vVpg4AAAAJ;yryOvLwAAAAJ", "orcid": "0000-0002-9318-4704;;0009-0007-1079-6451;;;;", "linkedin": ";;;;yu-wu-b3930092/;;", "or_profile": "~Ziyi_Yang4;~Chenyanzhen1;~Xinyu_Gao1;~YazhenYuan1;~Wu_Yu1;~Xiaowei_Zhou3;~Xiaogang_Jin1", "aff": "Zhejiang University;;Zhejiang University;Tencent Games;Tencent IEG;Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;;zju.edu.cn;tencent.com;tencent.com;zju.edu.cn;zju.edu.cn", "position": "MS student;;MS student;Researcher;Researcher;Full Professor;Full Professor", "bibtex": "@inproceedings{\nyang2024robir,\ntitle={Rob{IR}: Robust Inverse Rendering for High-Illumination Scenes},\nauthor={Ziyi Yang and Chenyanzhen and Xinyu Gao and YazhenYuan and Wu Yu and Xiaowei Zhou and Xiaogang Jin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=y7oxY5pq4j}\n}", "github": "", "reviewers": "QVta;GRUR;ySN8;ijRb", "pdf_size": 50696528, "rating": "5;5;6;7", "confidence": "5;5;5;4", "soundness": "3;3;2;3", "novelty": "3;3;2;3", "presentation": "3;3;3;3", "wc_summary": "49;85;97;98", "wc_strengths": "36;97;27;53", "wc_weaknesses": "36;204;396;56", "wc_questions": "68;161;75;43", "wc_limitations": "2;12;18;1", "wc_review": "191;559;613;251", "wc_reply_reviewers": "0;46;0;144", "wc_reply_authors": "0;247;0;468", "reply_reviewers": "0;1;0;2", "reply_authors": "1;2;1;4", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 82.25, 19.866743568083823 ], "wc_strengths_avg": [ 53.25, 26.929305598176867 ], "wc_weaknesses_avg": [ 173.0, 144.17697458332242 ], "wc_questions_avg": [ 86.75, 44.488060195967186 ], "wc_limitations_avg": [ 8.25, 7.084313657652377 ], "wc_review_avg": [ 403.5, 184.7180283567362 ], "wc_reply_reviewers_avg": [ 47.5, 58.794132360295954 ], "wc_reply_authors_avg": [ 178.75, 195.08123307996595 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Pi7J8moTc_MJ:scholar.google.com/&scioq=RobIR:+Robust+Inverse+Rendering+for+High-Illumination+Scenes&hl=en&as_sdt=0,48", "gs_version_total": 2, "email": "zju.edu.cn;;zju.edu.cn;tencent.com;tencent.com;zju.edu.cn;zju.edu.cn", "author_num": 7, "aff_unique_index": "0;0;1;1;0;0", "aff_unique_norm": "Zhejiang University;Tencent", "aff_unique_dep": ";Tencent Games", "aff_unique_url": "https://www.zju.edu.cn;https://games.qq.com", "aff_unique_abbr": "ZJU;Tencent Games", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "ChronoEpilogi: Scalable Time Series Selection with Multiple Solutions", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93042", "id": "y8HUXkwAOg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=y8HUXkwAOg", "openreview": "https://openreview.net/forum?id=y8HUXkwAOg", "poster": "/media/PosterPDFs/NeurIPS%202024/93042.png?t=1733538632.3280969", "project": "", "author_site": "Etienne Vareille, Michele Linardi, Ioannis Tsamardinos, Vassilis Christophides", "tldr": "", "abstract": "We consider the problem of selecting all the minimal-size subsets of multivariate time-series (TS) variables whose past leads to an optimal predictive model for the future (forecasting) of a given target variable (multiple feature selection problem for times-series). Identifying these subsets leads to gaining insights, domain intuition,and a better understanding of the data-generating mechanism; it is often the first step in causal modeling. While identifying a single solution to the feature selection problem suffices for forecasting purposes, identifying all such minimal-size, optimally predictive subsets is necessary for knowledge discovery and important to avoid misleading a practitioner. We develop the theory of multiple feature selection for time-series data, propose the ChronoEpilogi algorithm, and prove its soundness and completeness under two mild, broad, non-parametric distributional assumptions, namely Compositionality of the distribution and Interchangeability of time-series variables in solutions. Experiments on synthetic and real datasets demonstrate the scalability of ChronoEpilogi to hundreds of TS variables and its efficacy in identifying multiple solutions. In the real datasets, ChronoEpilogi is shown to reduce the number of TS variables by 96% (on average) by conserving or even improving forecasting performance. Furthermore, it is on par with GroupLasso performance, with the added benefit of providing multiple solutions.", "keywords": "Multivariate Time Series Causal Discovery;Forecasting;Explanations;Multiple Markov Boundaries", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Etienne Vareille;Michele Linardi;Ioannis Tsamardinos;Vassilis Christophides", "authorids": "~Etienne_Vareille1;~Michele_Linardi1;~Ioannis_Tsamardinos1;~Vassilis_Christophides1", "gender": "M;M;M;M", "homepage": ";;http://www.mensxmachina.org;https://www.etis-lab.fr/2022/01/13/vassilis-christophides/", "dblp": "359/9277;176/5473.html;16/4486.html;c/VassilisChristophides", "google_scholar": ";https://scholar.google.fr/citations?user=7LlyF3IAAAAJ;https://scholar.google.gr/citations?user=7fendUwAAAAJ;MdUjsa8AAAAJ", "orcid": ";;0000-0002-2492-959X;0000-0002-2076-1881", "linkedin": "https://fr.linkedin.com/in/etienne-vareille-a11146228;;ioannistsamardinos/;vassilis-christophides-137015248/", "or_profile": "~Etienne_Vareille1;~Michele_Linardi1;~Ioannis_Tsamardinos1;~Vassilis_Christophides1", "aff": "Ecole Nationale Sup\u00e9rieure de l'Electronique et de ses Applications;Universit\u00e9 de Cergy-Pontoise;University of Crete;Ecole Nationale Sup\u00e9rieure de l'Electronique et de ses Applications", "aff_domain": "ensea.fr;u-cergy.fr;uoc.gr;ensea.fr", "position": "PhD student;Associate Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nvareille2024chronoepilogi,\ntitle={ChronoEpilogi: Scalable Time Series Selection with Multiple Solutions},\nauthor={Etienne Vareille and Michele Linardi and Ioannis Tsamardinos and Vassilis Christophides},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=y8HUXkwAOg}\n}", "github": "", "reviewers": "5YNE;UZ5R;hgHX;fxYh;PVJ5", "pdf_size": 3507993, "rating": "5;6;6;7;7", "confidence": "2;3;3;3;3", "soundness": "2;3;3;3;3", "novelty": "2;2;3;3;3", "presentation": "3;3;3;3;4", "wc_summary": "38;104;239;69;86", "wc_strengths": "18;105;25;158;49", "wc_weaknesses": "132;109;76;135;22", "wc_questions": "207;20;44;132;37", "wc_limitations": "23;3;40;45;1", "wc_review": "418;341;424;539;195", "wc_reply_reviewers": "92;5;39;17;0", "wc_reply_authors": "273;0;0;0;0", "reply_reviewers": "2;1;1;1;0", "reply_authors": "3;1;1;1;1", "rating_avg": [ 6.2, 0.7483314773547882 ], "confidence_avg": [ 2.8, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.2, 0.39999999999999997 ], "wc_summary_avg": [ 107.2, 69.39567709879341 ], "wc_strengths_avg": [ 71.0, 53.16765934287497 ], "wc_weaknesses_avg": [ 94.8, 42.08277557386157 ], "wc_questions_avg": [ 88.0, 71.07460868692841 ], "wc_limitations_avg": [ 22.4, 18.194504664870653 ], "wc_review_avg": [ 383.4, 113.44531722376205 ], "wc_reply_reviewers_avg": [ 30.6, 33.51775648816609 ], "wc_reply_authors_avg": [ 54.6, 109.20000000000002 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 1.4, 0.8000000000000002 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8017837257372733, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:HztgscQ6T_kJ:scholar.google.com/&scioq=ChronoEpilogi:+Scalable+Time+Series+Selection+with+Multiple+Solutions&hl=en&as_sdt=0,10", "gs_version_total": 0, "email": "ensea.fr;u-cergy.fr;uoc.gr;ensea.fr", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Ecole Nationale Sup\u00e9rieure de l'Electronique et de ses Applications;Universite de Cergy-Pontoise;University of Crete", "aff_unique_dep": ";;", "aff_unique_url": "https://www.enssea.fr;https://www.u-cergy.fr;https://www.uoc.gr", "aff_unique_abbr": "ENSEA;UCP;UoC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "France;Greece" }, { "title": "Equivariant Machine Learning on Graphs with Nonlinear Spectral Filters", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93041", "id": "y8P633E5HQ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=y8P633E5HQ", "openreview": "https://openreview.net/forum?id=y8P633E5HQ", "poster": "/media/PosterPDFs/NeurIPS%202024/93041.png?t=1733015570.2760084", "project": "", "author_site": "Ya-Wei Eileen Lin, Ronen Talmon, Ron Levie", "tldr": "", "abstract": "Equivariant machine learning is an approach for designing deep learning models that respect the symmetries of the problem, with the aim of reducing model complexity and improving generalization. \nIn this paper, we focus on an extension of shift equivariance, which is the basis of convolution networks on images, to general graphs. Unlike images, graphs do not have a natural notion of domain translation. \nTherefore, we consider the graph functional shifts as the symmetry group: the unitary operators that commute with the graph shift operator. \nNotably, such symmetries operate in the signal space rather than directly in the spatial space.\nWe remark that each linear filter layer of a standard spectral graph neural network (GNN) commutes with graph functional shifts, but the activation function breaks this symmetry. Instead, we propose nonlinear spectral filters (NLSFs) that are fully equivariant to graph functional shifts and show that they have universal approximation properties. \nThe proposed NLSFs are based on a new form of spectral domain that is transferable between graphs. \nWe demonstrate the superior performance of NLSFs over existing spectral GNNs in node and graph classification benchmarks.", "keywords": "graph machine learning;graph signal processing;equivariant machine learning;geometric deep learning;spectral method;nonlinear method", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Ya-Wei Eileen Lin;Ronen Talmon;Ron Levie", "authorids": "~Ya-Wei_Eileen_Lin1;~Ronen_Talmon2;~Ron_Levie1", "gender": "F;;", "homepage": "https://ya-wei-eileen-lin.github.io/;http://ronentalmon.com/;", "dblp": "311/6484;54/7051;", "google_scholar": "https://scholar.google.com/citations?hl=en;;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Ya-Wei_Eileen_Lin1;~Ronen_Talmon2;~Ron_Levie1", "aff": "Technion - Israel Institute of Technology;Technion - Israel Institute of Technology, Technion;", "aff_domain": "technion.ac.il;technion.il;", "position": "PhD student;Associate Professor;", "bibtex": "@inproceedings{\nlin2024equivariant,\ntitle={Equivariant Machine Learning on Graphs with Nonlinear Spectral Filters},\nauthor={Ya-Wei Eileen Lin and Ronen Talmon and Ron Levie},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=y8P633E5HQ}\n}", "github": "", "reviewers": "fGo4;6hk5;GXWH;J5Wx;8cKX", "pdf_size": 2634009, "rating": "5;7;7;7;7", "confidence": "5;3;3;3;2", "soundness": "3;3;3;4;3", "novelty": "2;4;3;3;3", "presentation": "3;3;3;3;2", "wc_summary": "46;67;123;40;85", "wc_strengths": "62;47;26;50;53", "wc_weaknesses": "118;237;33;44;152", "wc_questions": "92;2;1;115;8", "wc_limitations": "31;9;1;5;1", "wc_review": "349;362;184;254;299", "wc_reply_reviewers": "29;19;0;17;20", "wc_reply_authors": "26;13;0;13;13", "reply_reviewers": "1;1;0;1;1", "reply_authors": "2;2;1;2;2", "rating_avg": [ 6.6, 0.7999999999999999 ], "confidence_avg": [ 3.2, 0.9797958971132712 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 72.2, 29.982661656364 ], "wc_strengths_avg": [ 47.6, 11.909659944767524 ], "wc_weaknesses_avg": [ 116.8, 74.84757845114297 ], "wc_questions_avg": [ 43.6, 49.503939237196064 ], "wc_limitations_avg": [ 9.4, 11.200000000000001 ], "wc_review_avg": [ 289.6, 65.23373360463127 ], "wc_reply_reviewers_avg": [ 17.0, 9.444575162494075 ], "wc_reply_authors_avg": [ 13.0, 8.221921916437786 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.9185586535436918, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Ow4m_wbvmQYJ:scholar.google.com/&scioq=Equivariant+Machine+Learning+on+Graphs+with+Nonlinear+Spectral+Filters&hl=en&as_sdt=0,33", "gs_version_total": 5, "email": "technion.ac.il;technion.il;", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Technion - Israel Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.technion.ac.il/en/", "aff_unique_abbr": "Technion", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Israel" }, { "title": "Parallelizing Linear Transformers with the Delta Rule over Sequence Length", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93040", "id": "y8Rm4VNRPH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=y8Rm4VNRPH", "openreview": "https://openreview.net/forum?id=y8Rm4VNRPH", "poster": "", "project": "", "author_site": "Songlin Yang, Bailin Wang, Yu Zhang, Yikang Shen, Yoon Kim", "tldr": "", "abstract": "Transformers with linear attention (i.e., linear transformers) and state-space models have recently been suggested as a viable linear-time alternative to transformers with softmax attention. However, these models still underperform transformers especially on tasks that require in-context retrieval. While more expressive variants of linear transformers which replace the additive update in linear transformers with the delta rule (DeltaNet) have been found to be more effective at associative recall, existing algorithms for training such models do not parallelize over sequence length and are thus inefficient to train on modern hardware. This work describes a hardware-efficient algorithm for training linear transformers with the delta rule, which exploits a memory-efficient representation for computing products of Householder matrices. This algorithm allows us to scale up DeltaNet to standard language modeling settings. We train a 1.3B model for 100B tokens and find that it outperforms recent linear-time baselines such as Mamba and GLA in terms of perplexity and zero-shot performance on downstream tasks. We also experiment with two hybrid models which combine DeltaNet layers with (1) sliding-window attention layers every other layer or (2) two global attention layers, and find that these hybrids outperform strong transformer baselines.", "keywords": "linear transformer;recurrent neural network;hardware-aware algorithm;state space model;sequence modeling;efficient training", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Songlin Yang;Bailin Wang;Yu Zhang;Yikang Shen;Yoon Kim", "authorids": "~Songlin_Yang1;~Bailin_Wang3;~Yu_Zhang36;~Yikang_Shen1;~Yoon_Kim1", "gender": "F;M;M;;M", "homepage": "https://sustcsonglin.github.io;https://yzhang.site;;https://people.csail.mit.edu/yoonkim/;https://berlino.github.io/", "dblp": ";50/671-92;152/8226;;218/7334", "google_scholar": "1chlis0AAAAJ;y3JK-1oAAAAJ;qff5rRYAAAAJ;n_ts4eYAAAAJ;", "orcid": ";0000-0002-8345-3835;;;", "linkedin": ";;;;", "or_profile": "~Songlin_Yang1;~Yu_Zhang36;~Yikang_Shen1;~Yoon_Kim1;~bailin_wang1", "aff": "Massachusetts Institute of Technology;Soochow University, China;International Business Machines;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;suda.edu.cn;ibm.com;mit.edu;mit.edu", "position": "PhD student;PhD student;Researcher;Assistant Professor;Postdoc", "bibtex": "@inproceedings{\nyang2024parallelizing,\ntitle={Parallelizing Linear Transformers with the Delta Rule over Sequence Length},\nauthor={Songlin Yang and Bailin Wang and Yu Zhang and Yikang Shen and Yoon Kim},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=y8Rm4VNRPH}\n}", "github": "", "reviewers": "dem4;eUyw;pqA2", "pdf_size": 533319, "rating": "7;7;8", "confidence": "4;4;3", "soundness": "3;3;4", "novelty": "3;3;3", "presentation": "3;4;3", "wc_summary": "117;105;101", "wc_strengths": "34;205;78", "wc_weaknesses": "100;89;14", "wc_questions": "43;168;7", "wc_limitations": "1;3;1", "wc_review": "295;570;201", "wc_reply_reviewers": "15;160;16", "wc_reply_authors": "0;277;0", "reply_reviewers": "1;1;1", "reply_authors": "1;2;1", "rating_avg": [ 7.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 107.66666666666667, 6.79869268479038 ], "wc_strengths_avg": [ 105.66666666666667, 72.49980842886568 ], "wc_weaknesses_avg": [ 67.66666666666667, 38.21285414912399 ], "wc_questions_avg": [ 72.66666666666667, 68.99436369894444 ], "wc_limitations_avg": [ 1.6666666666666667, 0.9428090415820634 ], "wc_review_avg": [ 355.3333333333333, 156.5680540709233 ], "wc_reply_reviewers_avg": [ 63.666666666666664, 68.11917661145225 ], "wc_reply_authors_avg": [ 92.33333333333333, 130.5790522591158 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.9999999999999997, "gs_citation": 51, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9267043678662150440&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "mit.edu;suda.edu.cn;ibm.com;mit.edu;mit.edu", "author_num": 5, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "Massachusetts Institute of Technology;Soochow University;International Business Machines Corporation", "aff_unique_dep": ";;", "aff_unique_url": "https://web.mit.edu;https://www.soochow.edu.cn;https://www.ibm.com", "aff_unique_abbr": "MIT;Soochow U;IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "United States;China" }, { "title": "MomentumSMoE: Integrating Momentum into Sparse Mixture of Experts", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93039", "id": "y929esCZNJ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=y929esCZNJ", "openreview": "https://openreview.net/forum?id=y929esCZNJ", "poster": "", "project": "", "author_site": "Rachel S.Y. Teo, Tan Nguyen", "tldr": "", "abstract": "Sparse Mixture of Experts (SMoE) has become the key to unlocking unparalleled scalability in deep learning. SMoE has the potential to exponentially increase in parameter count while maintaining the efficiency of the model by only activating a small subset of these parameters for a given sample. However, it has been observed that SMoE suffers from unstable training and has difficulty adapting to new distributions, leading to the model's lack of robustness to data contamination. To overcome these limitations, we first establish a connection between the dynamics of the expert representations in SMoEs and gradient descent on a multi-objective optimization problem. Leveraging our framework, we then integrate momentum into SMoE and propose a new family of SMoEs, named MomentumSMoE. We theoretically prove and numerically validate that MomentumSMoE is more stable and robust than SMoE. In particular, we verify the advantages of MomentumSMoE over SMoE on a variety of practical tasks including ImageNet-1K object recognition and WikiText-103 language modeling. We demonstrate the applicability of MomentumSMoE to many types of SMoE models, including those in the Sparse MoE model for vision (V-MoE) and the Generalist Language Model (GLaM). We also show that other advanced momentum-based optimization methods, such as Adam, can be easily incorporated into the MomentumSMoE framework for designing new SMoE models with even better performance, almost negligible additional computation cost, and simple implementations.", "keywords": "Sparse Mixture of Experts;optimization;gradient descent;momentum;adam", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/174f42807904accb758129ab1cd95efc3a54efcd.zip", "author": "Rachel Teo;Tan Minh Nguyen", "authorids": "~Rachel_Teo1;~Tan_Minh_Nguyen1", "gender": "F;M", "homepage": "https://github.com/rachtsy;https://tanmnguyen89.github.io/", "dblp": "380/2055.html;255/4725", "google_scholar": ";OizOh88AAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Rachel_Teo1;~Tan_Minh_Nguyen1", "aff": "National University of Singapore;National University of Singapore", "aff_domain": "nus.edu.sg;nus.edu.sg", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nteo2024momentumsmoe,\ntitle={Momentum{SM}oE: Integrating Momentum into Sparse Mixture of Experts},\nauthor={Rachel Teo and Tan Minh Nguyen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=y929esCZNJ}\n}", "github": "", "reviewers": "oJG3;qEGD;GFpG;puL2", "pdf_size": 1749809, "rating": "5;6;7;7", "confidence": "4;4;4;3", "soundness": "2;3;3;3", "novelty": "3;3;4;3", "presentation": "3;3;3;3", "wc_summary": "91;40;67;64", "wc_strengths": "56;48;65;47", "wc_weaknesses": "160;47;297;13", "wc_questions": "19;4;37;64", "wc_limitations": "1;7;17;6", "wc_review": "327;146;483;194", "wc_reply_reviewers": "22;0;1438;0", "wc_reply_authors": "66;57;2222;57", "reply_reviewers": "1;0;5;0", "reply_authors": "3;2;11;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 65.5, 18.062391868188442 ], "wc_strengths_avg": [ 54.0, 7.245688373094719 ], "wc_weaknesses_avg": [ 129.25, 111.0908974668942 ], "wc_questions_avg": [ 31.0, 22.34949663862701 ], "wc_limitations_avg": [ 7.75, 5.80409338312195 ], "wc_review_avg": [ 287.5, 130.90550026641355 ], "wc_reply_reviewers_avg": [ 365.0, 619.5619420203278 ], "wc_reply_authors_avg": [ 600.5, 936.1806716654644 ], "reply_reviewers_avg": [ 1.5, 2.0615528128088303 ], "reply_authors_avg": [ 4.5, 3.774917217635375 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5222329678670935, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9583029310809681701&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "nus.edu.sg;nus.edu.sg", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "National University of Singapore", "aff_unique_dep": "", "aff_unique_url": "https://www.nus.edu.sg", "aff_unique_abbr": "NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Singapore" }, { "title": "Continuously Learning, Adapting, and Improving: A Dual-Process Approach to Autonomous Driving", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93038", "id": "y9huwsnGRJ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=y9huwsnGRJ", "openreview": "https://openreview.net/forum?id=y9huwsnGRJ", "poster": "/media/PosterPDFs/NeurIPS%202024/93038.png?t=1730899781.9106257", "project": "", "author_site": "Jianbiao Mei, Yukai Ma, Xuemeng Yang, Licheng Wen, Xinyu Cai, Xin Li, Daocheng Fu, Bo Zhang, Pinlong Cai, Min Dou, Botian Shi, Liang He, Yong Liu, Yu Qiao", "tldr": "", "abstract": "Autonomous driving has advanced significantly due to sensors, machine learning, and artificial intelligence improvements. However, prevailing methods struggle with intricate scenarios and causal relationships, hindering adaptability and interpretability in varied environments. To address the above problems, we introduce LeapAD, a novel paradigm for autonomous driving inspired by the human cognitive process. Specifically, LeapAD emulates human attention by selecting critical objects relevant to driving decisions, simplifying environmental interpretation, and mitigating decision-making complexities. Additionally, LeapAD incorporates an innovative dual-process decision-making module, which consists of an Analytic Process (System-II) for thorough analysis and reasoning, along with a Heuristic Process (System-I) for swift and empirical processing. The Analytic Process leverages its logical reasoning to accumulate linguistic driving experience, which is then transferred to the Heuristic Process by supervised fine-tuning. Through reflection mechanisms and a growing memory bank, LeapAD continuously improves itself from past mistakes in a closed-loop environment. Closed-loop testing in CARLA shows that LeapAD outperforms all methods relying solely on camera input, requiring 1-2 orders of magnitude less labeled data. Experiments also demonstrate that as the memory bank expands, the Heuristic Process with only 1.8B parameters can inherit the knowledge from a GPT-4 powered Analytic Process and achieve continuous performance improvement. Project page: https://pjlab-adg.github.io/LeapAD", "keywords": "Autonomous Driving;Dual-process System;Knowledge-Driven;Vision Language Model", "primary_area": "robotics", "supplementary_material": "", "author": "Jianbiao Mei;Yukai Ma;Xuemeng Yang;Licheng Wen;Xinyu Cai;Xin Li;Daocheng Fu;Bo Zhang;Pinlong Cai;Min Dou;Botian Shi;Liang He;Yong Liu;Yu Qiao", "authorids": "~Jianbiao_Mei1;~Yukai_Ma1;~Xuemeng_Yang1;~Licheng_Wen1;~Xinyu_Cai2;~Xin_Li50;~Daocheng_Fu1;~Bo_Zhang17;~Pinlong_Cai1;~Min_Dou1;~Botian_Shi1;~Liang_He2;~Yong_Liu11;~Yu_Qiao1", "gender": ";;F;M;;M;M;M;M;M;M;;M;", "homepage": "https://jianbiaomei.github.io;https://april.zju.edu.cn/team/yukai-ma/;;https://wenlc.cn;;https://sankin97.github.io/;;https://bobrown.github.io/boZhang.github.io/;https://pinlong-cai.github.io/;;;;https://person.zju.edu.cn/en/yongliu;", "dblp": "294/4308;;133/6494;;;;;36/2259-69;;;245/8742;;29/4867-7;", "google_scholar": "OUtPkg0AAAAJ;https://scholar.google.com.hk/citations?hl=zh-CN;;RNnjXTkAAAAJ;;https://scholar.google.com.hk/citations?user=7atts2cAAAAJ;vIU6eHYAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.hk/citations?user=H6mQGfAAAAAJ;;K0PpvLkAAAAJ;;https://scholar.google.com.hk/citations?user=qYcgBbEAAAAJ;", "orcid": "0000-0003-3849-2736;0000-0001-8135-9012;0000-0002-7339-1650;;0000-0001-8500-9300;;;0000-0001-8052-782X;;;0000-0003-3677-7252;;0000-0003-4822-8939;", "linkedin": ";;;;;;;;;%E6%B0%91-%E7%AA%A6-a34b17a3;friskit/;;;", "or_profile": "~Jianbiao_Mei1;~Yukai_Ma1;~Xuemeng_Yang1;~Licheng_Wen1;~Xinyu_Cai2;~Xin_Li50;~Daocheng_Fu1;~Bo_Zhang17;~Pinlong_Cai1;~Min_Dou1;~Botian_Shi1;~Liang_He2;~Yong_Liu11;~Yu_Qiao1", "aff": "Zhejiang University;Zhejiang University;Shanghai AI Lab;Shanghai AI Lab;Shanghai Artificial Intelligence Laboratory;East China Normal University;Shanghai Artificial Intelligence Laboratory;Shanghai Artificial Intelligence Laboratory;Shanghai Artificial Intelligence Laboratory;Shanghai AI Laboratory;Shanghai AI Lab;;Zhejiang University;", "aff_domain": "zju.edu.cn;zju.edu.cn;pjlab.org.cn;pjlab.org.cn;pjlab.org.cn;ecnu.edu.cn;shlab.org.cn;pjlab.org.cn;pjlab.org.cn;pjlab.org.cn;pjlab.org.cn;;zju.edu.cn;", "position": "PhD student;PhD student;Researcher;Researcher;Researcher;PhD student;Researcher;Researcher;Researcher;Researcher;Researcher;;Full Professor;", "bibtex": "@inproceedings{\nmei2024continuously,\ntitle={Continuously Learning, Adapting, and Improving: A Dual-Process Approach to Autonomous Driving},\nauthor={Jianbiao Mei and Yukai Ma and Xuemeng Yang and Licheng Wen and Xinyu Cai and Xin Li and Daocheng Fu and Bo Zhang and Pinlong Cai and Min Dou and Botian Shi and Liang He and Yong Liu and Yu Qiao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=y9huwsnGRJ}\n}", "github": "", "reviewers": "SMxu;kf7r;eSfn;zRJR", "pdf_size": 36680144, "rating": "5;6;6;8", "confidence": "3;3;3;4", "soundness": "3;3;2;4", "novelty": "2;3;2;4", "presentation": "3;3;3;4", "wc_summary": "114;116;53;175", "wc_strengths": "48;135;20;48", "wc_weaknesses": "149;197;70;12", "wc_questions": "175;115;67;1", "wc_limitations": "1;8;2;1", "wc_review": "487;571;212;237", "wc_reply_reviewers": "126;9;4;10", "wc_reply_authors": "246;27;27;27", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 114.5, 43.142206712220926 ], "wc_strengths_avg": [ 62.75, 43.251445062564095 ], "wc_weaknesses_avg": [ 107.0, 71.16530053333577 ], "wc_questions_avg": [ 89.5, 63.83376849285964 ], "wc_limitations_avg": [ 3.0, 2.9154759474226504 ], "wc_review_avg": [ 376.75, 155.371128270345 ], "wc_reply_reviewers_avg": [ 37.25, 51.290228114134955 ], "wc_reply_authors_avg": [ 81.75, 94.82978171439603 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 14, 0 ], "corr_rating_confidence": 0.9271726499455306, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10723129017819638957&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "zju.edu.cn;zju.edu.cn;pjlab.org.cn;pjlab.org.cn;pjlab.org.cn;ecnu.edu.cn;shlab.org.cn;pjlab.org.cn;pjlab.org.cn;pjlab.org.cn;pjlab.org.cn;;zju.edu.cn;", "author_num": 14, "aff_unique_index": "0;0;1;1;2;3;2;2;2;4;1;0", "aff_unique_norm": "Zhejiang University;Shanghai AI Lab;Shanghai Artificial Intelligence Laboratory;East China Normal University;Shanghai AI Laboratory", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.zju.edu.cn;https://www.shanghaiailab.com;http://www.shailab.org/;http://www.ecnu.edu.cn;https://www.shanghai-ai-lab.com", "aff_unique_abbr": "ZJU;SAIL;Shanghai AI Lab;ECNU;SAIL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "MC-DiT: Contextual Enhancement via Clean-to-Clean Reconstruction for Masked Diffusion Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93037", "id": "y9sHKrdnRt", "proceeding": "", "pdf": "https://openreview.net/pdf?id=y9sHKrdnRt", "openreview": "https://openreview.net/forum?id=y9sHKrdnRt", "poster": "/media/PosterPDFs/NeurIPS%202024/93037.png?t=1731475072.755698", "project": "", "author_site": "Guanghao Zheng, Yuchen Liu, Wenrui Dai, Chenglin Li, Junni Zou, Hongkai Xiong", "tldr": "", "abstract": "Diffusion Transformer (DiT) is emerging as a cutting-edge trend in the landscape of generative diffusion models for image generation. Recently, masked-reconstruction strategies have been considered to improve the efficiency and semantic consistency in training DiT but suffer from deficiency in contextual information extraction. In this paper, we provide a new insight to reveal that noisy-to-noisy masked-reconstruction harms sufficient utilization of contextual information. We further demonstrate the insight with theoretical analysis and empirical study on the mutual information between unmasked and masked patches. Guided by such insight, we propose a novel training paradigm named MC-DiT for fully learning contextual information via diffusion denoising at different noise variances with clean-to-clean mask-reconstruction. Moreover, to avoid model collapse, we design two complementary branches of DiT decoders for enhancing the use of noisy patches and mitigating excessive reliance on clean patches in reconstruction. Extensive experimental results on 256$\\times$256 and 512$\\times$512 image generation on the ImageNet dataset demonstrate that the proposed MC-DiT achieves state-of-the-art performance in unconditional and conditional image generation with enhanced convergence speed.", "keywords": "Diffusion;Transformer;Masked Autoencoder", "primary_area": "machine_vision", "supplementary_material": "/attachment/94b98a0c9c66377060e2bc722ad7402f67603111.zip", "author": "Guanghao Zheng;Yuchen Liu;Wenrui Dai;Chenglin Li;Junni Zou;Hongkai Xiong", "authorids": "~Guanghao_Zheng1;~Yuchen_Liu4;~Wenrui_Dai1;~Chenglin_Li2;~Junni_Zou1;~Hongkai_Xiong1", "gender": "M;;;M;F;M", "homepage": ";https://min.sjtu.edu.cn/;;https://min.sjtu.edu.cn/En/FacultyShow/4?Vid=17;http://www.cs.sjtu.edu.cn/~zou-jn;http://min.sjtu.edu.cn", "dblp": "359/7717;69/10440-6;16/5135.html;;91/4613;21/3569", "google_scholar": "VBDzcnsAAAAJ;https://scholar.google.com.hk/citations?user=GRcH3nAAAAAJ;Xg8MhyAAAAAJ;ltW2JMcAAAAJ;https://scholar.google.com/citations?hl=zh-CN;bB16iN4AAAAJ", "orcid": ";0000-0002-3096-448X;;;;0000-0003-4552-0029", "linkedin": ";;;;;", "or_profile": "~Guanghao_Zheng1;~Yuchen_Liu4;~Wenrui_Dai1;~Chenglin_Li2;~Junni_Zou1;~Hongkai_Xiong1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "position": "PhD student;PhD student;Associate Professor;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nzheng2024mcdit,\ntitle={{MC}-DiT: Contextual Enhancement via Clean-to-Clean Reconstruction for Masked Diffusion Models},\nauthor={Guanghao Zheng and Yuchen Liu and Wenrui Dai and Chenglin Li and Junni Zou and Hongkai Xiong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=y9sHKrdnRt}\n}", "github": "", "reviewers": "gqjf;TpTX;cs4r;Ngpd;YSTZ;RKJM;ZkfZ", "pdf_size": 38741584, "rating": "5;5;5;5;5;6;7", "confidence": "3;3;3;4;4;3;4", "soundness": "3;3;3;4;2;3;3", "novelty": "3;3;3;4;3;2;3", "presentation": "3;2;4;4;3;3;2", "wc_summary": "39;45;85;69;47;50;127", "wc_strengths": "31;42;72;109;14;32;55", "wc_weaknesses": "61;77;51;143;97;15;255", "wc_questions": "18;51;24;6;55;130;450", "wc_limitations": "1;8;11;9;69;23;3", "wc_review": "150;223;243;336;282;250;890", "wc_reply_reviewers": "10;42;13;0;28;0;21", "wc_reply_authors": "40;89;33;0;82;0;31", "reply_reviewers": "1;1;1;0;1;0;1", "reply_authors": "2;3;2;1;3;1;2", "rating_avg": [ 5.428571428571429, 0.7284313590846836 ], "confidence_avg": [ 3.4285714285714284, 0.49487165930539345 ], "soundness_avg": [ 3.0, 0.5345224838248488 ], "novelty_avg": [ 3.0, 0.5345224838248488 ], "presentation_avg": [ 3.0, 0.7559289460184544 ], "wc_summary_avg": [ 66.0, 28.97782403346196 ], "wc_strengths_avg": [ 50.714285714285715, 29.353579901968082 ], "wc_weaknesses_avg": [ 99.85714285714286, 73.24992164096945 ], "wc_questions_avg": [ 104.85714285714286, 145.89470623063212 ], "wc_limitations_avg": [ 17.714285714285715, 21.93962029697666 ], "wc_review_avg": [ 339.14285714285717, 230.8904678917372 ], "wc_reply_reviewers_avg": [ 16.285714285714285, 14.149349159330724 ], "wc_reply_authors_avg": [ 39.285714285714285, 32.713973797639646 ], "reply_reviewers_avg": [ 0.7142857142857143, 0.45175395145262565 ], "reply_authors_avg": [ 2.0, 0.7559289460184544 ], "replies_avg": [ 28, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.283069258536149, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Ac_cZPNbrM8J:scholar.google.com/&scioq=MC-DiT:+Contextual+Enhancement+via+Clean-to-Clean+Reconstruction+for+Masked+Diffusion+Models&hl=en&as_sdt=0,44", "gs_version_total": 0, "email": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Causal Discovery from Event Sequences by Local Cause-Effect Attribution", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93036", "id": "y9zIRxshzj", "proceeding": "", "pdf": "https://openreview.net/pdf?id=y9zIRxshzj", "openreview": "https://openreview.net/forum?id=y9zIRxshzj", "poster": "/media/PosterPDFs/NeurIPS%202024/93036.png?t=1733328187.5333507", "project": "", "author_site": "Joscha C\u00fcppers, Sascha Xu, Ahmed Musa, Jilles Vreeken", "tldr": "", "abstract": "Sequences of events, such as crashes in the stock market or outages in a network, contain strong temporal dependencies, whose understanding is crucial to react to and influence future events. In this paper, we study the problem of discovering the underlying causal structure from event sequences. To this end, we introduce a new causal model, where individual events of the cause trigger events of the effect with dynamic delays. We show that in contrast to existing methods based on Granger causality, our model is identifiable for both instant and delayed effects.\n\nWe base our approach on the Algorithmic Markov Condition, by which we identify the true causal network as the one that minimizes the Kolmogorov complexity. As the Kolmogorov complexity is not computable, we instantiate our model using Minimum Description Length and show that the resulting score identifies the causal direction. To discover causal graphs, we introduce the Cascade algorithm, which adds edges in topological order. Extensive evaluation shows that Cascade outperforms existing methods in settings with instantaneous effects, noise, and multiple colliders, and discovers insightful causal graphs on real-world data.", "keywords": "causality;causal discovery;event sequences", "primary_area": "causal_inference", "supplementary_material": "/attachment/d51dac59a82ef68b26357bde7250dabe465005c3.zip", "author": "Joscha C\u00fcppers;Sascha Xu;Ahmed Musa;Jilles Vreeken", "authorids": "~Joscha_C\u00fcppers1;~Sascha_Xu1;~Ahmed_Musa1;~Jilles_Vreeken2", "gender": ";M;M;M", "homepage": "https://eda.rg.cispa.io/people/joscha/?q=joscha;;;https://vreeken.eu", "dblp": "285/3656;247/3300;;94/6462", "google_scholar": ";https://scholar.google.de/citations?user=82xDR9IAAAAJ;;p5HEQfIAAAAJ", "orcid": "0000-0001-6628-2192;0009-0008-5191-0342;;0000-0002-2310-2806", "linkedin": ";sascha-xu-36073216a/;ahmed-musa/;jilles-vreeken-b3b05b58/", "or_profile": "~Joscha_C\u00fcppers1;~Sascha_Xu1;~Ahmed_Musa1;~Jilles_Vreeken2", "aff": "CISPA Helmholtz Center for Information Security;CISPA, saarland university, saarland informatics campus;Saarland University, Universit\u00e4t des Saarlandes;CISPA Helmholtz Center for Information Security", "aff_domain": "cispa.de;cispa.saarland;cs.uni-saarland.de;cispa.de", "position": "PhD student;PhD student;MS student;Tenured Faculty", "bibtex": "@inproceedings{\nc{\\\"u}ppers2024causal,\ntitle={Causal Discovery from Event Sequences by Local Cause-Effect Attribution},\nauthor={Joscha C{\\\"u}ppers and Sascha Xu and Ahmed Musa and Jilles Vreeken},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=y9zIRxshzj}\n}", "github": "", "reviewers": "WfU1;hRgg;e9Hj;T6V6", "pdf_size": 649154, "rating": "5;5;6;6", "confidence": "3;3;3;4", "soundness": "3;3;3;2", "novelty": "3;3;3;3", "presentation": "3;2;3;3", "wc_summary": "71;43;51;109", "wc_strengths": "102;50;35;134", "wc_weaknesses": "117;136;43;285", "wc_questions": "1;67;110;169", "wc_limitations": "1;33;12;52", "wc_review": "292;329;251;749", "wc_reply_reviewers": "10;12;10;42", "wc_reply_authors": "0;35;35;16", "reply_reviewers": "1;1;1;1", "reply_authors": "1;2;2;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 68.5, 25.509802037648196 ], "wc_strengths_avg": [ 80.25, 39.76414842543469 ], "wc_weaknesses_avg": [ 145.25, 87.8475241540705 ], "wc_questions_avg": [ 86.75, 61.33667336920058 ], "wc_limitations_avg": [ 24.5, 19.60229578391266 ], "wc_review_avg": [ 405.25, 200.37262163279692 ], "wc_reply_reviewers_avg": [ 18.5, 13.592277219068187 ], "wc_reply_authors_avg": [ 21.5, 14.637281168304447 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:1VVgLJWR45kJ:scholar.google.com/&scioq=Causal+Discovery+from+Event+Sequences+by+Local+Cause-Effect+Attribution&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "cispa.de;cispa.saarland;cs.uni-saarland.de;cispa.de", "author_num": 4, "aff_unique_index": "0;1;1;0", "aff_unique_norm": "CISPA Helmholtz Center for Information Security;Saarland University", "aff_unique_dep": ";CISPA", "aff_unique_url": "https://www.cispa.de/;https://www.uni-saarland.de", "aff_unique_abbr": "CISPA;Saarland U", "aff_campus_unique_index": "1", "aff_campus_unique": ";Saarland Informatics Campus", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "title": "Sketchy Moment Matching: Toward Fast and Provable Data Selection for Finetuning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93035", "id": "yAAQWBMGiT", "proceeding": "", "pdf": "https://openreview.net/pdf?id=yAAQWBMGiT", "openreview": "https://openreview.net/forum?id=yAAQWBMGiT", "poster": "/media/PosterPDFs/NeurIPS%202024/93035.png?t=1729448678.6369214", "project": "", "author_site": "Yijun Dong, Viet Hoang Phan, Xiang Pan, Qi Lei", "tldr": "", "abstract": "We revisit data selection in a modern context of finetuning from a fundamental perspective. Extending the classical wisdom of variance minimization in low dimensions to high-dimensional finetuning, our generalization analysis unveils the importance of additionally reducing bias induced by low-rank approximation. Inspired by the variance-bias tradeoff in high dimensions from the theory, we introduce Sketchy Moment Matching (SkMM), a scalable data selection scheme with two stages. (i) First, the bias is controlled using gradient sketching that explores the finetuning parameter space for an informative low-dimensional subspace $\\mathcal{S}$; (ii) then the variance is reduced over $\\mathcal{S}$ via moment matching between the original and selected datasets. Theoretically, we show that gradient sketching is fast and provably accurate: selecting $n$ samples by reducing variance over $\\mathcal{S}$ preserves the fast-rate generalization $O(\\dim(\\mathcal{S})/n)$, independent of the parameter dimension. Empirically, we concretize the variance-bias balance via synthetic experiments and demonstrate the effectiveness of SkMM for finetuning in real vision tasks.", "keywords": "Data selection;Finetuning;Sketching;Johnson-Lindenstrauss transform", "primary_area": "learning_theory", "supplementary_material": "", "author": "Yijun Dong;Hoang Phan;Xiang Pan;Qi Lei", "authorids": "~Yijun_Dong1;~Hoang_Phan1;~Xiang_Pan3;~Qi_Lei1", "gender": "F;M;F;M", "homepage": "https://dyjdongyijun.github.io/;https://xiangpan.info;https://cecilialeiqi.github.io/;https://viethoang1512.github.io/", "dblp": "200/1432;59/749-1.html;;295/0299", "google_scholar": "l3bmbCkAAAAJ;SxU03foAAAAJ;kGOgaowAAAAJ;", "orcid": ";0000-0002-9828-5416;;", "linkedin": "yijun-dong-82638513b/;;;", "or_profile": "~Yijun_Dong1;~Xiang_Pan3;~Qi_Lei1;~Hoang_Viet_Phan1", "aff": "New York University;New York University;New York University;New York University", "aff_domain": "nyu.edu;nyu.edu;nyu.edu;nyu.edu", "position": "Postdoc;PhD student;Assistant Professor;PhD student", "bibtex": "@inproceedings{\ndong2024sketchy,\ntitle={Sketchy Moment Matching: Toward Fast and Provable Data Selection for Finetuning},\nauthor={Yijun Dong and Hoang Phan and Xiang Pan and Qi Lei},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=yAAQWBMGiT}\n}", "github": "", "reviewers": "gV74;iMuZ;dwk6;NGFs", "pdf_size": 864258, "rating": "5;5;6;6", "confidence": "3;3;3;3", "soundness": "2;2;3;2", "novelty": "3;2;3;3", "presentation": "3;3;3;3", "wc_summary": "54;197;93;191", "wc_strengths": "28;82;51;60", "wc_weaknesses": "86;113;54;162", "wc_questions": "84;58;36;125", "wc_limitations": "6;1;1;1", "wc_review": "258;451;235;539", "wc_reply_reviewers": "12;61;23;21", "wc_reply_authors": "155;103;0;211", "reply_reviewers": "1;2;1;1", "reply_authors": "3;3;1;3", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 133.75, 61.844057919900436 ], "wc_strengths_avg": [ 55.25, 19.356846334049358 ], "wc_weaknesses_avg": [ 103.75, 39.58771905528279 ], "wc_questions_avg": [ 75.75, 33.12382073372575 ], "wc_limitations_avg": [ 2.25, 2.165063509461097 ], "wc_review_avg": [ 370.75, 128.34402011780682 ], "wc_reply_reviewers_avg": [ 29.25, 18.793283374652763 ], "wc_reply_authors_avg": [ 117.25, 77.72507639108501 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17672271266554918212&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "nyu.edu;nyu.edu;nyu.edu;nyu.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "New York University", "aff_unique_dep": "", "aff_unique_url": "https://www.nyu.edu", "aff_unique_abbr": "NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "yAKuSbIwR7", "title": "Neural Synaptic Balance", "track": "main", "status": "Reject", "tldr": "", "abstract": "For a given additive cost function $R$ (regularizer), a neuron is said to be in balance if the total cost of its input weights is equal to the total cost of its output weights. The basic example is provided by feedforward layered networks of ReLU units trained with $L_2$ regularizers, which exhibit balance after proper training. We develop a general theory that extends this phenomenon in three broad directions in terms of: (1) activation functions; (2) regularizers, including all $L_p$ ($p>0$) regularizers; and (3) architectures (non-layered, recurrent, convolutional, mixed activations). Gradient descent on the error function alone does not converge in general to a balanced state where every neuron is in balance, even when starting from a balanced state. However, gradient descent on the regularized error function must converge to a balanced state, and thus network balance can be used to assess learning progress. The theory is based on two local neuronal operations: scaling which is commutative, and balancing which is not commutative. Finally, and most importantly, given any initial set of weights, when local balancing operations are applied to each neuron in a stochastic manner, global order always emerges through the convergence of the stochastic algorithm to the same unique set of balanced weights. The reason for this convergence is the existence of an underlying strictly convex optimization problem where the relevant variables are constrained to a linear, only architecture-dependent, manifold. The theory is corroborated through simulations carried out on benchmark data sets. Balancing operations are entirely local and thus physically plausible in biological and neuromorphic networks.", "keywords": "neural networks;deep learning;activation functions;regularizations;scaling;neural balance", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/6770be9b085d16afe2923a731602d90a3a1db1ae.zip", "author": "Pierre Baldi;Alireza Rahmansetayesh", "authorids": "~Pierre_Baldi1;~Alireza_Rahmansetayesh1", "gender": ";", "homepage": ";", "dblp": ";", "google_scholar": ";", "orcid": ";", "linkedin": ";", "or_profile": ";", "aff": ";", "aff_domain": ";", "position": ";", "bibtex": "@misc{\nanonymous2024neural,\ntitle={Neural Synaptic Balance},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=yAKuSbIwR7}\n}", "github": "", "project": "", "reviewers": "QTyq;VcLX;cT2m;TDzF", "site": "https://openreview.net/forum?id=yAKuSbIwR7", "pdf_size": 2441499, "rating": "3;5;5;5", "confidence": "5;3;4;3", "soundness": "2;3;4;2", "novelty": "1;2;2;2", "presentation": "3;2;3;2", "wc_summary": "77;81;48;51", "wc_strengths": "25;16;31;55", "wc_weaknesses": "147;60;133;170", "wc_questions": "37;94;98;76", "wc_limitations": "33;3;1;31", "wc_review": "319;254;311;383", "wc_reply_reviewers": "0;0;115;72", "wc_reply_authors": "0;0;29;0", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;2;1", "rating_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 1.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 64.25, 14.85555451674558 ], "wc_strengths_avg": [ 31.75, 14.446020213193666 ], "wc_weaknesses_avg": [ 127.5, 41.14911906711977 ], "wc_questions_avg": [ 76.25, 24.128561913218117 ], "wc_limitations_avg": [ 17.0, 15.033296378372908 ], "wc_review_avg": [ 316.75, 45.72950360544055 ], "wc_reply_reviewers_avg": [ 46.75, 49.15981590689697 ], "wc_reply_authors_avg": [ 7.25, 12.55736835487436 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1 }, { "title": "Proving Theorems Recursively", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93034", "id": "yAa5l92TtQ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=yAa5l92TtQ", "openreview": "https://openreview.net/forum?id=yAa5l92TtQ", "poster": "", "project": "", "author_site": "Haiming Wang, Huajian Xin, Zhengying Liu, Wenda Li, Yinya Huang, Jianqiao Lu, Zhicheng Yang, Jing Tang, Jian Yin, Zhenguo Li, Xiaodan Liang", "tldr": "", "abstract": "Recent advances in automated theorem proving leverages language models to explore expanded search spaces by step-by-step proof generation. However, such approaches are usually based on short-sighted heuristics (e.g., log probability or value function scores) that potentially lead to suboptimal or even distracting subgoals, preventing us from finding longer proofs. To address this challenge, we propose POETRY (PrOvE Theorems RecursivelY), which proves theorems in a recursive, level-by-level manner in the Isabelle theorem prover. Unlike previous step-by-step methods, POETRY searches for a verifiable sketch of the proof at each level and focuses on solving the current level's theorem or conjecture. Detailed proofs of intermediate conjectures within the sketch are temporarily replaced by a placeholder tactic called sorry, deferring their proofs to subsequent levels. This approach allows the theorem to be tackled incrementally by outlining the overall theorem at the first level and then solving the intermediate conjectures at deeper levels. Experiments are conducted on the miniF2F and PISA datasets and significant performance gains are observed in our POETRY approach over state-of-the-art methods. POETRY on miniF2F achieves an average proving success rate improvement of 5.1%. Moreover, we observe a substantial increase in the maximum proof length found by POETRY, from 10 to 26.", "keywords": "Nerual Theorem Proving;Language Model", "primary_area": "machine_learning_for_other_sciences_and_fields", "supplementary_material": "/attachment/d9713b9fee3d90ce5089e2db9efa2325fcc37b0f.zip", "author": "Haiming Wang;Huajian Xin;Zhengying Liu;Wenda Li;Yinya Huang;Jianqiao Lu;Zhicheng YANG;Jing Tang;Jian Yin;Zhenguo Li;Xiaodan Liang", "authorids": "~Haiming_Wang1;~Huajian_Xin1;~Zhengying_Liu2;~Wenda_Li1;~Yinya_Huang1;~Jianqiao_Lu1;~Zhicheng_YANG5;~Jing_Tang5;~Jian_Yin3;~Zhenguo_Li1;~Xiaodan_Liang2", "gender": "M;M;M;M;;M;;M;M;M;F", "homepage": ";https://xinhuajian.wordpress.com/;;https://wenda302.github.io;https://eleanor-h.github.io/;https://jianqiaolu.github.io/;;https://sites.google.com/view/jtang;http://sai.sysu.edu.cn/teacher/teacher01/1385356.htm;http://www.ee.columbia.edu/~zgli/;https://www.sysu-hcp.net/", "dblp": "97/604;356/3551;241/1782;132/9868.html;282/1562;358/4791;;83/663-4;95/578-1;23/6479;", "google_scholar": "zDPqP6AAAAAJ;E5M9x8wAAAAJ;http:// DFme0joAAAAJ;ufYxQkEAAAAJ;dWStaRIAAAAJ;uIW6d6AAAAAJ;;https://scholar.google.com/citations?hl=en;;XboZC1AAAAAJ;voxznZAAAAAJ", "orcid": ";;;;0000-0002-0686-0832;;;0000-0002-0785-707X;;;", "linkedin": ";;;;;jianqiao-lu-308620201/;;;;;", "or_profile": "~Haiming_Wang1;~Huajian_Xin1;~Zhengying_Liu2;~Wenda_Li1;~Yinya_Huang1;~Jianqiao_Lu1;~Zhicheng_YANG5;~Jing_Tang5;~Jian_Yin3;~Zhenguo_Li1;~Xiaodan_Liang2", "aff": "SUN YAT-SEN UNIVERSITY;University of Edinburgh, University of Edinburgh;Huawei Technologies Ltd.;University of Edinburgh;City University of Hong Kong;University of Hong Kong;;Hong Kong University of Science and Technology;SUN YAT-SEN UNIVERSITY;Huawei Noah's Ark Lab;SUN YAT-SEN UNIVERSITY", "aff_domain": "sysu.edu.cn;ed.ac.uk;huawei.com;ed.ac.uk;cityu.edu.hk;hku.hk;;ust.hk;sysu.edu.cn;huawei.com;sysu.edu.cn", "position": "PhD student;PhD student;Researcher;Lecturer;Postdoc;PhD student;;Assistant Professor;Full Professor;Principal Researcher;Associate Professor", "bibtex": "@inproceedings{\nwang2024proving,\ntitle={Proving Theorems Recursively},\nauthor={Haiming Wang and Huajian Xin and Zhengying Liu and Wenda Li and Yinya Huang and Jianqiao Lu and Zhicheng YANG and Jing Tang and Jian Yin and Zhenguo Li and Xiaodan Liang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=yAa5l92TtQ}\n}", "github": "", "reviewers": "9pow;BhWC;p8XS;eTZY", "pdf_size": 640025, "rating": "5;5;7;7", "confidence": "4;4;4;3", "soundness": "3;2;4;4", "novelty": "3;3;4;3", "presentation": "3;3;4;4", "wc_summary": "166;57;112;112", "wc_strengths": "107;43;45;74", "wc_weaknesses": "326;156;66;158", "wc_questions": "230;41;50;5", "wc_limitations": "3;1;11;1", "wc_review": "832;298;284;350", "wc_reply_reviewers": "263;415;0;0", "wc_reply_authors": "319;510;0;0", "reply_reviewers": "1;1;0;0", "reply_authors": "3;3;1;1", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 111.75, 38.53813046840752 ], "wc_strengths_avg": [ 67.25, 26.022826518270456 ], "wc_weaknesses_avg": [ 176.5, 93.97207031879206 ], "wc_questions_avg": [ 81.5, 87.3741952752642 ], "wc_limitations_avg": [ 4.0, 4.123105625617661 ], "wc_review_avg": [ 441.0, 227.07928130941406 ], "wc_reply_reviewers_avg": [ 169.5, 177.81521307244776 ], "wc_reply_authors_avg": [ 207.25, 217.97405235486173 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.0, 1.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7654364614782925952&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "sysu.edu.cn;ed.ac.uk;huawei.com;ed.ac.uk;cityu.edu.hk;hku.hk;;ust.hk;sysu.edu.cn;huawei.com;sysu.edu.cn", "author_num": 11, "aff_unique_index": "0;1;2;1;3;4;5;0;2;0", "aff_unique_norm": "Sun Yat-sen University;University of Edinburgh;Huawei;City University of Hong Kong;University of Hong Kong;Hong Kong University of Science and Technology", "aff_unique_dep": ";;Huawei Technologies;;;", "aff_unique_url": "http://www.sysu.edu.cn;https://www.ed.ac.uk;https://www.huawei.com;https://www.cityu.edu.hk;https://www.hku.hk;https://www.ust.hk", "aff_unique_abbr": "SYSU;Edinburgh;Huawei;CityU;HKU;HKUST", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;0;1;0;0;0;0;0;0", "aff_country_unique": "China;United Kingdom" }, { "title": "In Pursuit of Causal Label Correlations for Multi-label Image Recognition", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93033", "id": "yBHbeSpwYS", "proceeding": "", "pdf": "https://openreview.net/pdf?id=yBHbeSpwYS", "openreview": "https://openreview.net/forum?id=yBHbeSpwYS", "poster": "/media/PosterPDFs/NeurIPS%202024/93033.png?t=1728703034.623924", "project": "", "author_site": "Zhao-Min Chen, Xin Jin, YisuGe, Sixian Chan", "tldr": "", "abstract": "Multi-label image recognition aims to predict all objects present in an input image. A common belief is that modeling the correlations between objects is beneficial for multi-label recognition. However, this belief has been recently challenged as label correlations may mislead the classifier in testing, due to the possible contextual bias in training. Accordingly, a few of recent works not only discarded label correlation modeling, but also advocated to remove contextual information for multi-label image recognition. This work explicitly explores label correlations for multi-label image recognition based on a principled causal intervention approach. With causal intervention, we pursue causal label correlations and suppress spurious label correlations, as the former tend to convey useful contextual cues while the later may mislead the classifier. Specifically, we decouple label-specific features with a Transformer decoder attached to the backbone network, and model the confounders which may give rise to spurious correlations by clustering spatial features of all training images. Based on label-specific features and confounders, we employ a cross-attention module to implement causal intervention, quantifying the causal correlations from all object categories to each predicted object category. Finally, we obtain image labels by combining the predictions from decoupled features and causal label correlations. Extensive experiments clearly validate the effectiveness of our approach for multi-label image recognition in both common and cross-dataset settings.", "keywords": "Multi-label;Label correlation;Causal intervention", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Zhao-Min Chen;Xin Jin;YisuGe;Sixian Chan", "authorids": "~Zhao-Min_Chen3;~Xin_Jin15;~YisuGe1;~Sixian_Chan1", "gender": "M;M;M;M", "homepage": ";;;", "dblp": ";226/5163;176/6694;178/7243", "google_scholar": "ddsm5qYAAAAJ;b1CALvUAAAAJ;https://scholar.google.com/citations?hl=zh-CN;MF8EAkcAAAAJ", "orcid": ";0000-0002-7620-5120;;0000-0001-9492-0796", "linkedin": ";;;%E9%92%8A%E6%B0%91-%E9%99%88-694a77187/", "or_profile": "~Xin_Jin15;~YisuGe1;~Sixian_Chan1;~Zhaomin_Chen1", "aff": "Samsung R&D Institute China-Nanjing (SRC-N);Wenzhou University;Zhejiang University of Technology;Wenzhou University", "aff_domain": "samsung.com;wzu.edu.cn;zjut.edu.cn;wzu.edu.cn", "position": "Researcher;Lecturer;Instructor;Associate Professor", "bibtex": "@inproceedings{\nchen2024in,\ntitle={In Pursuit of Causal Label Correlations for Multi-label Image Recognition},\nauthor={Zhao-Min Chen and Xin Jin and YisuGe and Sixian Chan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=yBHbeSpwYS}\n}", "github": "", "reviewers": "wRCW;NEe5;BNe7", "pdf_size": 1375295, "rating": "4;4;8", "confidence": "3;4;5", "soundness": "3;3;4", "novelty": "3;3;4", "presentation": "3;2;4", "wc_summary": "49;74;62", "wc_strengths": "124;49;27", "wc_weaknesses": "194;137;57", "wc_questions": "223;156;32", "wc_limitations": "56;4;40", "wc_review": "646;420;218", "wc_reply_reviewers": "0;158;12", "wc_reply_authors": "46;347;9", "reply_reviewers": "0;2;1", "reply_authors": "2;4;2", "rating_avg": [ 5.333333333333333, 1.8856180831641267 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 61.666666666666664, 10.208928554075703 ], "wc_strengths_avg": [ 66.66666666666667, 41.523754914774045 ], "wc_weaknesses_avg": [ 129.33333333333334, 56.19213072624631 ], "wc_questions_avg": [ 137.0, 79.1243746684084 ], "wc_limitations_avg": [ 33.333333333333336, 21.74600857373345 ], "wc_review_avg": [ 428.0, 174.82181404695086 ], "wc_reply_reviewers_avg": [ 56.666666666666664, 71.82076456166575 ], "wc_reply_authors_avg": [ 134.0, 151.36930556313808 ], "reply_reviewers_avg": [ 1.0, 0.816496580927726 ], "reply_authors_avg": [ 2.6666666666666665, 0.9428090415820634 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.8660254037844387, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:5s_J_bwtl08J:scholar.google.com/&scioq=In+Pursuit+of+Causal+Label+Correlations+for+Multi-label+Image+Recognition&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": "samsung.com;wzu.edu.cn;zjut.edu.cn;wzu.edu.cn", "author_num": 4, "aff_unique_index": "0;1;2;1", "aff_unique_norm": "Samsung;Wenzhou University;Zhejiang University of Technology", "aff_unique_dep": "R&D;;", "aff_unique_url": "https://www.samsung.com/cn;https://www.wzu.edu.cn;https://www.zjut.edu.cn", "aff_unique_abbr": "SRC-N;WZU;ZJUT", "aff_campus_unique_index": "0", "aff_campus_unique": "Nanjing;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Text-DiFuse: An Interactive Multi-Modal Image Fusion Framework based on Text-modulated Diffusion Model", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93032", "id": "yBrxziByeG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=yBrxziByeG", "openreview": "https://openreview.net/forum?id=yBrxziByeG", "poster": "/media/PosterPDFs/NeurIPS%202024/93032.png?t=1731142446.6737683", "project": "", "author_site": "Hao Zhang, Lei Cao, Jiayi Ma", "tldr": "", "abstract": "Existing multi-modal image fusion methods fail to address the compound degradations presented in source images, resulting in fusion images plagued by noise, color bias, improper exposure, etc. Additionally, these methods often overlook the specificity of foreground objects, weakening the salience of the objects of interest within the fused images. To address these challenges, this study proposes a novel interactive multi-modal image fusion framework based on the text-modulated diffusion model, called Text-DiFuse. First, this framework integrates feature-level information integration into the diffusion process, allowing adaptive degradation removal and multi-modal information fusion. This is the first attempt to deeply and explicitly embed information fusion within the diffusion process, effectively addressing compound degradation in image fusion. Second, by embedding the combination of the text and zero-shot location model into the diffusion fusion process, a text-controlled fusion re-modulation strategy is developed. This enables user-customized text control to improve fusion performance and highlight foreground objects in the fused images. Extensive experiments on diverse public datasets show that our Text-DiFuse achieves state-of-the-art fusion performance across various scenarios with complex degradation. Moreover, the semantic segmentation experiment validates the significant enhancement in semantic performance achieved by our text-controlled fusion re-modulation strategy. The code is publicly available at https://github.com/Leiii-Cao/Text-DiFuse.", "keywords": "Image fusion;multi-modal fusion;text;diffusion", "primary_area": "machine_vision", "supplementary_material": "/attachment/48520d51bcdb809bda8090cf60d05790fbc0702a.zip", "author": "Hao Zhang;Lei Cao;Jiayi Ma", "authorids": "~Hao_Zhang26;~Lei_Cao10;~Jiayi_Ma2", "gender": "M;M;M", "homepage": ";;https://sites.google.com/site/jiayima2013/home", "dblp": "55/2270-73;;96/9989", "google_scholar": "Yjt5n6AAAAAJ;https://scholar.google.com/citations?hl=zh-CN;73trMQkAAAAJ", "orcid": "0000-0001-5467-3428;;0000-0003-3264-3265", "linkedin": ";;", "or_profile": "~Hao_Zhang26;~Lei_Cao10;~Jiayi_Ma2", "aff": "Wuhan University;Wuhan University;Wuhan University", "aff_domain": "whu.edu.cn;whu.edu.cn;whu.edu.cn", "position": "PhD student;MS student;Full Professor", "bibtex": "@inproceedings{\nzhang2024textdifuse,\ntitle={Text-DiFuse: An Interactive Multi-Modal Image Fusion Framework based on Text-modulated Diffusion Model},\nauthor={Hao Zhang and Lei Cao and Jiayi Ma},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=yBrxziByeG}\n}", "github": "", "reviewers": "TtBn;DZyq;NcHo;RoRE", "pdf_size": 23547502, "rating": "6;6;7;7", "confidence": "5;2;5;5", "soundness": "4;3;4;3", "novelty": "4;2;4;3", "presentation": "3;2;4;2", "wc_summary": "69;150;86;92", "wc_strengths": "123;97;126;126", "wc_weaknesses": "219;98;237;228", "wc_questions": "55;112;6;7", "wc_limitations": "7;22;10;12", "wc_review": "473;479;465;465", "wc_reply_reviewers": "6;0;66;69", "wc_reply_authors": "203;72;133;356", "reply_reviewers": "1;0;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.25, 1.299038105676658 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 99.25, 30.49077729412617 ], "wc_strengths_avg": [ 118.0, 12.186057606953941 ], "wc_weaknesses_avg": [ 195.5, 56.65024271792664 ], "wc_questions_avg": [ 45.0, 43.45687517528153 ], "wc_limitations_avg": [ 12.75, 5.629165124598851 ], "wc_review_avg": [ 470.5, 5.894913061275798 ], "wc_reply_reviewers_avg": [ 35.25, 32.33709170596515 ], "wc_reply_authors_avg": [ 191.0, 105.94102132790678 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5773502691896258, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18330107385603297455&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "whu.edu.cn;whu.edu.cn;whu.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Wuhan University", "aff_unique_dep": "", "aff_unique_url": "http://www.whu.edu.cn/", "aff_unique_abbr": "WHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Stepping Forward on the Last Mile", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93031", "id": "yCh1z6Dcto", "proceeding": "", "pdf": "https://openreview.net/pdf?id=yCh1z6Dcto", "openreview": "https://openreview.net/forum?id=yCh1z6Dcto", "poster": "/media/PosterPDFs/NeurIPS%202024/93031.png?t=1730228944.9839957", "project": "", "author_site": "Chen Feng, Jay Zhuo, Parker Zhang, Ramchalam Kinattinkara Ramakrishnan, Zhaocong Yuan, Andrew Zou Li", "tldr": "", "abstract": "Continuously adapting pre-trained models to local data on resource constrained edge devices is the \\emph{last mile} for model deployment. However, as models increase in size and depth, backpropagation requires a large amount of memory, which becomes prohibitive for edge devices. In addition, most existing low power neural processing engines (e.g., NPUs, DSPs, MCUs, etc.) are designed as fixed-point inference accelerators, without training capabilities. Forward gradients, solely based on directional derivatives computed from two forward calls, have been recently used for model training, with substantial savings in computation and memory. However, the performance of quantized training with fixed-point forward gradients remains unclear. In this paper, we investigate the feasibility of on-device training using fixed-point forward gradients, by conducting comprehensive experiments across a variety of deep learning benchmark tasks in both vision and audio domains. We propose a series of algorithm enhancements that further reduce the memory footprint, and the accuracy gap compared to backpropagation. An empirical study on how training with forward gradients navigates in the loss landscape is further explored. Our results demonstrate that on the last mile of model customization on edge devices, training with fixed-point forward gradients is a feasible and practical approach.", "keywords": "On-device model adaptation;Fixed-point forward gradient learning;Low memory;Edge devices", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Chen Feng;Shaojie Zhuo;Xiaopeng Zhang;Ramchalam Kinattinkara Ramakrishnan;Zhaocong Yuan;Andrew Zou Li", "authorids": "~Chen_Feng9;~Shaojie_Zhuo3;~Xiaopeng_Zhang1;~Ramchalam_Kinattinkara_Ramakrishnan1;~Zhaocong_Yuan1;~Andrew_Zou_Li1", "gender": "M;M;M;M;;F", "homepage": ";;;https://justin-yuan.github.io/;;", "dblp": ";;;;;", "google_scholar": "td18GeEAAAAJ;;JgscZTcAAAAJ;https://scholar.google.ca/citations?user=94mfGXkAAAAJ;https://scholar.google.ca/citations?hl=en;", "orcid": ";;;;;", "linkedin": ";parker-xiaopeng-zhang-56433b6/;;justin-yuan/;;chen-feng-84603638/", "or_profile": "~Shaojie_Zhuo3;~Xiaopeng_Zhang1;~Ramchalam_Kinattinkara_Ramakrishnan1;~Zhaocong_Yuan1;~Andrew_Zou_Li1;~Feng_Chen2", "aff": "Qualcomm AI Research;Qualcomm Inc, QualComm;Qualcomm Inc, QualComm;;University of Toronto;Qualcomm Inc, QualComm", "aff_domain": "qualcomm.com;qti.qualcomm.com;qti.qualcomm.com;;utoronto.ca;qti.qualcomm.com", "position": "Researcher;Researcher;Researcher;;Undergrad student;Researcher", "bibtex": "@inproceedings{\nfeng2024stepping,\ntitle={Stepping Forward on the Last Mile},\nauthor={Chen Feng and Shaojie Zhuo and Xiaopeng Zhang and Ramchalam Kinattinkara Ramakrishnan and Zhaocong Yuan and Andrew Zou Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=yCh1z6Dcto}\n}", "github": "", "reviewers": "vPS7;oWX1;wcC5;uc7y", "pdf_size": 1348192, "rating": "5;5;5;7", "confidence": "3;3;4;3", "soundness": "3;2;3;3", "novelty": "3;2;3;3", "presentation": "2;3;3;3", "wc_summary": "95;119;88;633", "wc_strengths": "113;25;34;66", "wc_weaknesses": "260;28;89;220", "wc_questions": "57;44;56;615", "wc_limitations": "8;1;63;73", "wc_review": "533;217;330;1607", "wc_reply_reviewers": "140;162;0;28", "wc_reply_authors": "524;665;0;18", "reply_reviewers": "2;1;0;1", "reply_authors": "3;2;1;2", "rating_avg": [ 5.5, 0.8660254037844386 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 233.75, 230.7936036808646 ], "wc_strengths_avg": [ 59.5, 34.44198019858905 ], "wc_weaknesses_avg": [ 149.25, 94.34345499291406 ], "wc_questions_avg": [ 193.0, 243.6955067291968 ], "wc_limitations_avg": [ 36.25, 32.041964671349355 ], "wc_review_avg": [ 671.75, 551.7097855757137 ], "wc_reply_reviewers_avg": [ 82.5, 69.64732586395547 ], "wc_reply_authors_avg": [ 301.75, 297.0323004321247 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13465363324725123112&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "qualcomm.com;qti.qualcomm.com;qti.qualcomm.com;;utoronto.ca;qti.qualcomm.com", "author_num": 6, "aff_unique_index": "0;1;1;2;1", "aff_unique_norm": "Qualcomm;Qualcomm Incorporated;University of Toronto", "aff_unique_dep": "Qualcomm AI Research;;", "aff_unique_url": "https://www.qualcomm.com/research;https://www.qualcomm.com;https://www.utoronto.ca", "aff_unique_abbr": "QAI;Qualcomm;U of T", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "United States;Canada" }, { "title": "Transferable Adversarial Attacks on SAM and Its Downstream Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93030", "id": "yDjojeIWO9", "proceeding": "", "pdf": "https://openreview.net/pdf?id=yDjojeIWO9", "openreview": "https://openreview.net/forum?id=yDjojeIWO9", "poster": "/media/PosterPDFs/NeurIPS%202024/93030.png?t=1731139712.1890006", "project": "", "author_site": "Song Xia, Wenhan Yang, Yi Yu, Xun Lin, Henghui Ding, LINGYU DUAN, Xudong Jiang", "tldr": "", "abstract": "The utilization of large foundational models has a dilemma: while fine-tuning downstream tasks from them holds promise for making use of the well-generalized knowledge in practical applications, their open accessibility also poses threats of adverse usage.\nThis paper, for the first time, explores the feasibility of adversarial attacking various downstream models fine-tuned from the segment anything model (SAM), by solely utilizing the information from the open-sourced SAM. \nIn contrast to prevailing transfer-based adversarial attacks, we demonstrate the existence of adversarial dangers even without accessing the downstream task and dataset to train a similar surrogate model.\nTo enhance the effectiveness of the adversarial attack towards models fine-tuned on unknown datasets, we propose a universal meta-initialization (UMI) algorithm to extract the intrinsic vulnerability inherent in the foundation model, which is then utilized as the prior knowledge to guide the generation of adversarial perturbations.\nMoreover, by formulating the gradient difference in the attacking process between the open-sourced SAM and its fine-tuned downstream models, we theoretically demonstrate that a deviation occurs in the adversarial update direction by directly maximizing the distance of encoded feature embeddings in the open-sourced SAM.\nConsequently, we propose a gradient robust loss that simulates the associated uncertainty with gradient-based noise augmentation to enhance the robustness of generated adversarial examples (AEs) towards this deviation, thus improving the transferability.\nExtensive experiments demonstrate the effectiveness of the proposed universal meta-initialized and gradient robust adversarial attack (UMI-GRAT) toward SAMs and their downstream models.\nCode is available at https://github.com/xiasong0501/GRAT.", "keywords": "Segment anything;Security of large fundation model;Transfer-based adversarial attack;Fine-tuning", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Song Xia;Wenhan Yang;Yi Yu;Xun Lin;Henghui Ding;LINGYU DUAN;Xudong Jiang", "authorids": "~Song_Xia1;~Wenhan_Yang6;~Yi_Yu5;~Xun_Lin1;~Henghui_Ding2;~LINGYU_DUAN1;~Xudong_Jiang1", "gender": "M;M;;M;M;M;M", "homepage": ";https://flyywh.github.io/;https://github.com/yuyi-sd;;http://eecs.pku.edu.cn/EN/People/Faculty/Detail/?ID=6096;https://personal.ntu.edu.sg/exdjiang/;https://henghuiding.github.io/", "dblp": ";156/2359.html;99/111-11.html;57/88;d/LingyuDuan;11/2494;230/1216", "google_scholar": "x_CkEE8AAAAJ;S8nAnakAAAAJ;https://scholar.google.com/citations?hl=en;nHrEsbcAAAAJ;;https://scholar.google.com.sg/citations?user=IL3mSioAAAAJ;WI_flSwAAAAJ", "orcid": "0009-0002-1224-470X;;0000-0003-2730-9553;0000-0001-8387-4245;;0000-0002-9104-2315;0000-0003-4868-6526", "linkedin": ";;%E7%9B%8A-%E4%BD%99-6b453a229;;;;", "or_profile": "~Song_Xia1;~Wenhan_Yang6;~Yi_Yu5;~Xun_Lin1;~LINGYU_DUAN1;~Xudong_Jiang1;~Henghui_Ding1", "aff": "Nanyang Technological University;Peng Cheng Laboratory;Nanyang Technological University;Beihang University;Peking University;Nanyang Technological University;Nanyang Technological University", "aff_domain": "ntu.edu.sg;pcl.ac.cn;ntu.edu.sg;buaa.edu.cn;pku.edu.cn;ntu.edu.sg;ntu.edu.sg", "position": "PhD student;Researcher;PhD student;PhD student;Full Professor;Associate Professor;Postdoc", "bibtex": "@inproceedings{\nxia2024transferable,\ntitle={Transferable Adversarial Attacks on {SAM} and Its Downstream Models},\nauthor={Song Xia and Wenhan Yang and Yi Yu and Xun Lin and Henghui Ding and LINGYU DUAN and Xudong Jiang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=yDjojeIWO9}\n}", "github": "", "reviewers": "iuQV;Qxcd;pDnZ;Q28B;aXPE", "pdf_size": 2037777, "rating": "4;5;5;7;8", "confidence": "3;4;3;4;4", "soundness": "3;3;3;3;3", "novelty": "2;2;2;3;3", "presentation": "2;3;2;3;3", "wc_summary": "112;57;92;109;128", "wc_strengths": "146;75;55;48;111", "wc_weaknesses": "381;2;120;34;61", "wc_questions": "38;186;27;68;56", "wc_limitations": "1;6;21;35;37", "wc_review": "678;326;315;294;393", "wc_reply_reviewers": "57;0;32;70;28", "wc_reply_authors": "52;0;52;115;52", "reply_reviewers": "1;0;1;2;1", "reply_authors": "2;1;2;3;2", "rating_avg": [ 5.8, 1.469693845669907 ], "confidence_avg": [ 3.6, 0.4898979485566356 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 99.6, 24.171057072457547 ], "wc_strengths_avg": [ 87.0, 36.73145790735783 ], "wc_weaknesses_avg": [ 119.6, 136.32402576215242 ], "wc_questions_avg": [ 75.0, 57.27826813024291 ], "wc_limitations_avg": [ 20.0, 14.642404174178502 ], "wc_review_avg": [ 401.2, 142.31148934643332 ], "wc_reply_reviewers_avg": [ 37.4, 24.344198487524704 ], "wc_reply_authors_avg": [ 54.2, 36.46587445818351 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.0, 0.6324555320336759 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.7222222222222223, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4876974520246678312&as_sdt=8005&sciodt=0,7&hl=en", "gs_version_total": 3, "email": "ntu.edu.sg;pcl.ac.cn;ntu.edu.sg;buaa.edu.cn;pku.edu.cn;ntu.edu.sg;ntu.edu.sg", "author_num": 7, "aff_unique_index": "0;1;0;2;3;0;0", "aff_unique_norm": "Nanyang Technological University;Pengcheng Laboratory;Beihang University;Peking University", "aff_unique_dep": ";Peng Cheng Laboratory;;", "aff_unique_url": "https://www.ntu.edu.sg;http://www.pcl.ac.cn;http://www.buaa.edu.cn/;http://www.pku.edu.cn", "aff_unique_abbr": "NTU;PCL;BUAA;Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1;1;0;0", "aff_country_unique": "Singapore;China" }, { "title": "Diffusion Forcing: Next-token Prediction Meets Full-Sequence Diffusion", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93029", "id": "yDo1ynArjj", "proceeding": "", "pdf": "https://openreview.net/pdf?id=yDo1ynArjj", "openreview": "https://openreview.net/forum?id=yDo1ynArjj", "poster": "", "project": "", "author_site": "Boyuan Chen, Diego Mart\u00ed Mons\u00f3, Yilun Du, Max Simchowitz, Russ Tedrake, Vincent Sitzmann", "tldr": "", "abstract": "This paper presents Diffusion Forcing, a new training paradigm where a diffusion model is trained to denoise a set of tokens with independent per-token noise levels. We apply Diffusion Forcing to sequence generative modeling by training a causal next-token prediction model to generate one or several future tokens without fully diffusing past ones. Our approach is shown to combine the strengths of next-token prediction models, such as variable-length generation, with the strengths of full-sequence diffusion models, such as the ability to guide sampling to desirable trajectories. Our method offers a range of additional capabilities, such as (1) rolling-out sequences of continuous tokens, such as video, with lengths past the training horizon, where baselines diverge and (2) new sampling and guiding schemes that uniquely profit from Diffusion Forcing's variable-horizon and causal architecture, and which lead to marked performance gains in decision-making and planning tasks. In addition to its empirical success, our method is proven to optimize a variational lower bound on the likelihoods of all subsequences of tokens drawn from the true joint distribution. Project website: https://boyuan.space/diffusion-forcing/", "keywords": "diffusion;sequence modeling;decision making;planning", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/4b1a61d84de34141d52b2030bc58797dd96b5905.zip", "author": "Boyuan Chen;Diego Mart\u00ed Mons\u00f3;Yilun Du;Max Simchowitz;Russ Tedrake;Vincent Sitzmann", "authorids": "~Boyuan_Chen2;~Diego_Mart\u00ed_Mons\u00f31;~Yilun_Du1;~Max_Simchowitz1;~Russ_Tedrake1;~Vincent_Sitzmann1", "gender": "M;M;;M;M;M", "homepage": "https://boyuan.space/;;https://yilundu.github.io;;http://people.csail.mit.edu/russt;https://vsitzmann.github.io", "dblp": "193/7174-3.html;;204/4379;176/5165;73/1296;192/1958", "google_scholar": "rEL4-fgAAAAJ;;;;nxNkEiYAAAAJ;X44QVV4AAAAJ", "orcid": "0009-0009-1960-9135;;;;;0000-0002-0107-5704", "linkedin": "boyuan99/;diego-marti/;;;;vincentsitzmann/", "or_profile": "~Boyuan_Chen2;~Diego_Mart\u00ed_Mons\u00f31;~Yilun_Du1;~Max_Simchowitz1;~Russ_Tedrake1;~Vincent_Sitzmann1", "aff": "Massachusetts Institute of Technology;Technische Universit\u00e4t M\u00fcnchen;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;tum.de;mit.edu;mit.edu;mit.edu;mit.edu", "position": "PhD student;MS student;PhD student;Postdoc;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nchen2024diffusion,\ntitle={Diffusion Forcing: Next-token Prediction Meets Full-Sequence Diffusion},\nauthor={Boyuan Chen and Diego Mart{\\'\\i} Mons{\\'o} and Yilun Du and Max Simchowitz and Russ Tedrake and Vincent Sitzmann},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=yDo1ynArjj}\n}", "github": "", "reviewers": "a6Fo;2PA2;tz82;35x2", "pdf_size": 18779767, "rating": "6;7;7;8", "confidence": "4;4;4;4", "soundness": "3;3;3;4", "novelty": "3;3;3;4", "presentation": "2;3;3;4", "wc_summary": "58;293;130;551", "wc_strengths": "41;64;113;252", "wc_weaknesses": "181;108;151;526", "wc_questions": "164;186;50;259", "wc_limitations": "1;1;30;14", "wc_review": "445;652;474;1602", "wc_reply_reviewers": "37;40;26;45", "wc_reply_authors": "25;25;25;25", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 258.0, 189.3792491272473 ], "wc_strengths_avg": [ 117.5, 81.891696770796 ], "wc_weaknesses_avg": [ 241.5, 166.29266369867312 ], "wc_questions_avg": [ 164.75, 75.00458319329559 ], "wc_limitations_avg": [ 11.5, 11.926860441876563 ], "wc_review_avg": [ 793.25, 473.61026963105434 ], "wc_reply_reviewers_avg": [ 37.0, 6.96419413859206 ], "wc_reply_authors_avg": [ 25.0, 0.0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 52, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10028432692663331012&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "mit.edu;tum.de;mit.edu;mit.edu;mit.edu;mit.edu", "author_num": 6, "aff_unique_index": "0;1;0;0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology;Technische Universit\u00e4t M\u00fcnchen", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.tum.de", "aff_unique_abbr": "MIT;TUM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0;0", "aff_country_unique": "United States;Germany" }, { "title": "Safe Time-Varying Optimization based on Gaussian Processes with Spatio-Temporal Kernel", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93028", "id": "yKvHJJE9le", "proceeding": "", "pdf": "https://openreview.net/pdf?id=yKvHJJE9le", "openreview": "https://openreview.net/forum?id=yKvHJJE9le", "poster": "/media/PosterPDFs/NeurIPS%202024/93028.png?t=1730224622.6891475", "project": "", "author_site": "Jialin Li, Marta Zagorowska, Giulia De Pasquale, Alisa Rupenyan, John Lygeros", "tldr": "", "abstract": "Ensuring safety is a key aspect in sequential decision making problems, such as robotics or process control. The complexity of the underlying systems often makes finding the optimal decision challenging, especially when the safety-critical system is time-varying. Overcoming the problem of optimizing an unknown time-varying reward subject to unknown time-varying safety constraints, we propose TVSAFEOPT, a new algorithm built on Bayesian optimization with a spatio-temporal kernel. The algorithm is capable of safely tracking a time-varying safe region without the need for explicit change detection. Optimality guarantees are also provided for the algorithm when the optimization problem becomes stationary. We show that TVSAFEOPT compares favorably against SAFEOPT on synthetic data, both regarding safety and optimality. Evaluation on a realistic case study with gas compressors confirms that TVSAFEOPT ensures safety when solving time-varying optimization problems with unknown reward and safety functions.", "keywords": "Safe learning;Bayesian optimization;Time-varying optimization", "primary_area": "active_learning", "supplementary_material": "", "author": "Jialin Li;Marta Zagorowska;Giulia De Pasquale;Alisa Rupenyan;John Lygeros", "authorids": "~Jialin_Li6;~Marta_Zagorowska1;~Giulia_De_Pasquale1;~Alisa_Rupenyan2;~John_Lygeros1", "gender": "M;;F;F;M", "homepage": ";;https://sites.google.com/view/giuliadepasquale/home-page?authuser=2;https://alisa-rupenyan.github.io;https://control.ee.ethz.ch/people/profile.john-lygeros.html", "dblp": ";;;247/9348;51/2754", "google_scholar": ";;61JYIhYAAAAJ;8jUSNmsAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;0000-0002-2170-8564;0000-0002-6159-1962", "linkedin": "jialin-li-48124918b;;giulia-de-pasquale-9189aa106/;arupenyan/;john-lygeros-662b73233/", "or_profile": "~Jialin_Li6;~Marta_Zagorowska1;~Giulia_De_Pasquale1;~Alisa_Rupenyan2;~John_Lygeros1", "aff": "ETHZ - ETH Zurich;;ETH Zurich;ZHAW - Z\u00fcrcher Hochschule f\u00fcr Angewandte Wissenschaften;ETHZ - ETH Zurich", "aff_domain": "ethz.ch;;eth.ch;zhaw.ch;ethz.ch", "position": "MS student;;Postdoc;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nli2024safe,\ntitle={Safe Time-Varying Optimization based on Gaussian Processes with Spatio-Temporal Kernel},\nauthor={Jialin Li and Marta Zagorowska and Giulia De Pasquale and Alisa Rupenyan and John Lygeros},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=yKvHJJE9le}\n}", "github": "", "reviewers": "g7A9;o5DK;yo4J", "pdf_size": 625681, "rating": "5;5;6", "confidence": "3;2;3", "soundness": "3;2;3", "novelty": "3;2;3", "presentation": "3;2;3", "wc_summary": "55;78;37", "wc_strengths": "28;40;47", "wc_weaknesses": "42;24;139", "wc_questions": "32;1;109", "wc_limitations": "5;1;45", "wc_review": "162;144;377", "wc_reply_reviewers": "19;11;52", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 56.666666666666664, 16.779617264870957 ], "wc_strengths_avg": [ 38.333333333333336, 7.845734863959881 ], "wc_weaknesses_avg": [ 68.33333333333333, 50.50632523643834 ], "wc_questions_avg": [ 47.333333333333336, 45.40435612973226 ], "wc_limitations_avg": [ 17.0, 19.86621923433512 ], "wc_review_avg": [ 227.66666666666666, 105.84999868787068 ], "wc_reply_reviewers_avg": [ 27.333333333333332, 17.745108872274887 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17941753819333744499&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "email": "ethz.ch;;eth.ch;zhaw.ch;ethz.ch", "author_num": 5, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "ETH Zurich;Z\u00fcrcher Hochschule f\u00fcr Angewandte Wissenschaften", "aff_unique_dep": ";", "aff_unique_url": "https://www.ethz.ch;https://www.zhaw.ch", "aff_unique_abbr": "ETHZ;ZHAW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Switzerland" }, { "title": "Lips Are Lying: Spotting the Temporal Inconsistency between Audio and Visual in Lip-Syncing DeepFakes", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93027", "id": "yMS7ansbr6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=yMS7ansbr6", "openreview": "https://openreview.net/forum?id=yMS7ansbr6", "poster": "/media/PosterPDFs/NeurIPS%202024/93027.png?t=1731410509.5716941", "project": "", "author_site": "Weifeng Liu, Tianyi She, Jiawei Liu, Boheng Li, Dongyu Yao, \u5b50\u6e38 \u6881, Run Wang", "tldr": "", "abstract": "In recent years, DeepFake technology has achieved unprecedented success in high-quality video synthesis, but these methods also pose potential and severe security threats to humanity. DeepFake can be bifurcated into entertainment applications like face swapping and illicit uses such as lip-syncing fraud. However, lip-forgery videos, which neither change identity nor have discernible visual artifacts, present a formidable challenge to existing DeepFake detection methods. Our preliminary experiments have shown that the effectiveness of the existing methods often drastically decrease or even fail when tackling lip-syncing videos.\nIn this paper, for the first time, we propose a novel approach dedicated to lip-forgery identification that exploits the inconsistency between lip movements and audio signals. We also mimic human natural cognition by capturing subtle biological links between lips and head regions to boost accuracy. To better illustrate the effectiveness and advances of our proposed method, we create a high-quality LipSync dataset, AVLips, by employing the state-of-the-art lip generators. We hope this high-quality and diverse dataset could be well served the further research on this challenging and interesting field. Experimental results show that our approach gives an average accuracy of more than 95.3% in spotting lip-syncing videos, significantly outperforming the baselines. Extensive experiments demonstrate the capability to tackle deepfakes and the robustness in surviving diverse input transformations. Our method achieves an accuracy of up to 90.2% in real-world scenarios (e.g., WeChat video call) and shows its powerful capabilities in real scenario deployment.\nTo facilitate the progress of this research community, we release all resources at https://github.com/AaronComo/LipFD.", "keywords": "DeepFake Detection;LipSync Detection;AI security", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Weifeng Liu;Tianyi She;Jiawei Liu;Boheng Li;Dongyu Yao;Ziyou Liang;Run Wang", "authorids": "~Weifeng_Liu3;~Tianyi_She2;~Jiawei_Liu17;~Boheng_Li1;~Dongyu_Yao1;~Ziyou_Liang1;~Run_Wang1", "gender": "M;M;M;M;M;M;M", "homepage": "https://github.com/AaronComo;https://ansonshe.github.io/;https://github.com/AWGiaGia;https://antigonerandy.github.io;https://rainjamesy.github.io;;http://wangrun.github.io/", "dblp": ";331/2401;;329/5678;;;01/1318-1", "google_scholar": "VCPVx3QAAAAJ;https://scholar.google.com/citations?hl=zh-CN;;;_TOxPGYAAAAJ;;LpuwwNUAAAAJ", "orcid": "0009-0000-6522-3262;0000-0003-4623-1384;;0000-0001-9921-7215;;;0000-0002-2842-5137", "linkedin": ";https://www.linkedin.cn/incareer/in/%E6%B7%BB%E7%BF%BC-%E4%BD%98-306863245;;;rain-dongyu-yao/;https://www.linkedin.cn/injobs/in/%E5%AD%90%E6%B8%B8-%E6%A2%81-2849a2235;", "or_profile": "~Weifeng_Liu3;~Tianyi_She2;~Jiawei_Liu17;~Boheng_Li1;~Dongyu_Yao1;~Ziyou_Liang1;~Run_Wang1", "aff": "Wuhan University;Wuhan University;Wuhan University;Wuhan University;Wuhan University;;Wuhan University", "aff_domain": "whu.edu.cn;whu.edu.cn;whu.edu.cn;whu.edu.cn;whu.edu.cn;;whu.edu.cn", "position": "Undergrad student;Undergrad student;Intern;Undergrad student;Undergrad student;;Associate Professor", "bibtex": "@inproceedings{\nliu2024lips,\ntitle={Lips Are Lying: Spotting the Temporal Inconsistency between Audio and Visual in Lip-Syncing DeepFakes},\nauthor={Weifeng Liu and Tianyi She and Jiawei Liu and Boheng Li and Dongyu Yao and Ziyou Liang and Run Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=yMS7ansbr6}\n}", "github": "", "reviewers": "K6BS;Ze8K;9Sa1;Qots", "pdf_size": 5954870, "rating": "5;6;6;6", "confidence": "4;4;3;4", "soundness": "3;2;3;4", "novelty": "3;4;3;4", "presentation": "3;1;2;4", "wc_summary": "50;100;65;77", "wc_strengths": "62;24;34;46", "wc_weaknesses": "129;51;88;21", "wc_questions": "101;87;2;1", "wc_limitations": "1;22;11;11", "wc_review": "343;284;200;156", "wc_reply_reviewers": "30;32;23;0", "wc_reply_authors": "104;39;94;85", "reply_reviewers": "1;1;1;0", "reply_authors": "3;2;3;2", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 73.0, 18.289341158171883 ], "wc_strengths_avg": [ 41.5, 14.168627315304754 ], "wc_weaknesses_avg": [ 72.25, 40.45599461142934 ], "wc_questions_avg": [ 47.75, 46.51545442108461 ], "wc_limitations_avg": [ 11.25, 7.428828979051813 ], "wc_review_avg": [ 245.75, 72.57539183497393 ], "wc_reply_reviewers_avg": [ 21.25, 12.71563997602952 ], "wc_reply_authors_avg": [ 80.5, 24.88473427625861 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9639660399584723845&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "whu.edu.cn;whu.edu.cn;whu.edu.cn;whu.edu.cn;whu.edu.cn;;whu.edu.cn", "author_num": 7, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Wuhan University", "aff_unique_dep": "", "aff_unique_url": "http://www.whu.edu.cn/", "aff_unique_abbr": "WHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "A Simple and Optimal Approach for Universal Online Learning with Gradient Variations", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93026", "id": "yO5DVyCHZR", "proceeding": "", "pdf": "https://openreview.net/pdf?id=yO5DVyCHZR", "openreview": "https://openreview.net/forum?id=yO5DVyCHZR", "poster": "/media/PosterPDFs/NeurIPS%202024/93026.png?t=1732504652.077089", "project": "", "author_site": "Yu-Hu Yan, Peng Zhao, Zhi-Hua Zhou", "tldr": "", "abstract": "We investigate the problem of universal online learning with gradient-variation regret. Universal online learning aims to achieve regret guarantees without prior knowledge of the curvature of the online functions. Moreover, we study the problem-dependent gradient-variation regret as it plays a crucial role in bridging stochastic and adversarial optimization as well as game theory. In this work, we design a universal approach with the *optimal* gradient-variation regret simultaneously for strongly convex, exp-concave, and convex functions, thus addressing an open problem highlighted by [Yan et al. [2023]](https://openreview.net/forum?id=AA1xrgAP5z). Our approach is *simple* since it is algorithmically efficient-to-implement with a two-layer online ensemble structure and only $1$ gradient query per round, and theoretically easy-to-analyze with a novel and alternative analysis to the gradient-variation regret. Concretely, previous works on gradient variations require controlling the algorithmic stability, which is challenging and leads to sub-optimal regret and less efficient algorithm design. Our analysis overcomes this issue by using a Bregman divergence negative term from linearization and a useful smoothness property.", "keywords": "universal online learning;gradient-variation regret", "primary_area": "online_learning", "supplementary_material": "", "author": "Yu-Hu Yan;Peng Zhao;Zhi-Hua Zhou", "authorids": "~Yu-Hu_Yan1;~Peng_Zhao1;~Zhi-Hua_Zhou2", "gender": "M;;", "homepage": "https://www.lamda.nju.edu.cn/yanyh;;", "dblp": "271/0054;;", "google_scholar": "NdaoylQAAAAJ;;", "orcid": ";;", "linkedin": ";;", "or_profile": "~Yu-Hu_Yan1;~Peng_Zhao1;~Zhi-Hua_Zhou2", "aff": "Nanjing University;;", "aff_domain": "nju.edu.cn;;", "position": "PhD student;;", "bibtex": "@inproceedings{\nyan2024a,\ntitle={A Simple and Optimal Approach for Universal Online Learning with Gradient Variations},\nauthor={Yu-Hu Yan and Peng Zhao and Zhi-Hua Zhou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=yO5DVyCHZR}\n}", "github": "", "reviewers": "THmn;S4Zf;KThz;y66n", "pdf_size": 480063, "rating": "5;5;6;7", "confidence": "3;3;3;4", "soundness": "3;3;3;3", "novelty": "2;2;3;4", "presentation": "2;3;3;3", "wc_summary": "313;43;69;210", "wc_strengths": "80;31;80;32", "wc_weaknesses": "145;45;138;104", "wc_questions": "159;91;2;137", "wc_limitations": "6;12;2;1", "wc_review": "703;222;291;484", "wc_reply_reviewers": "43;25;107;20", "wc_reply_authors": "203;43;379;0", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;2;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 158.75, 109.39921160593434 ], "wc_strengths_avg": [ 55.75, 24.252577182641847 ], "wc_weaknesses_avg": [ 108.0, 39.54111783953509 ], "wc_questions_avg": [ 97.25, 60.21783373719118 ], "wc_limitations_avg": [ 5.25, 4.322904116447646 ], "wc_review_avg": [ 425.0, 187.03609277356068 ], "wc_reply_reviewers_avg": [ 48.75, 34.701404870696514 ], "wc_reply_authors_avg": [ 156.25, 149.20015918222072 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11882419425887916725&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "nju.edu.cn;;", "author_num": 3, "aff_unique_index": "0", "aff_unique_norm": "Nanjing University", "aff_unique_dep": "", "aff_unique_url": "https://www.nju.edu.cn", "aff_unique_abbr": "Nanjing U", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "title": "AUC Maximization under Positive Distribution Shift", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93025", "id": "yOe6ajdslI", "proceeding": "", "pdf": "https://openreview.net/pdf?id=yOe6ajdslI", "openreview": "https://openreview.net/forum?id=yOe6ajdslI", "poster": "/media/PosterPDFs/NeurIPS%202024/93025.png?t=1730338651.6828773", "project": "", "author_site": "Atsutoshi Kumagai, Tomoharu Iwata, Hiroshi Takahashi, Taishi Nishiyama, Yasuhiro Fujiwara", "tldr": "", "abstract": "Maximizing the area under the receiver operating characteristic curve (AUC) is a popular approach to imbalanced binary classification problems. Existing AUC maximization methods usually assume that training and test distributions are identical. However, this assumption is often violated in practice due to {\\it a positive distribution shift}, where the negative-conditional density does not change but the positive-conditional density can vary. This shift often occurs in imbalanced classification since positive data are often more diverse and time-varying than negative data. To deal with this shift, we theoretically show that the AUC on the test distribution can be expressed by using the positive and marginal training densities and the marginal test density. Based on this result, we can maximize the AUC on the test distribution by using positive and unlabeled data in the training distribution and unlabeled data in the test distribution. The proposed method requires only positive labels in the training distribution as supervision. Moreover, the derived AUC has a simple form and thus is easy to implement. The effectiveness of the proposed method is shown with four real-world datasets.", "keywords": "AUC maximization;distribution shift;domain adaptation;PU learning;imbalanced data", "primary_area": "other", "supplementary_material": "", "author": "Atsutoshi Kumagai;Tomoharu Iwata;Hiroshi Takahashi;Taishi Nishiyama;Yasuhiro Fujiwara", "authorids": "~Atsutoshi_Kumagai2;~Tomoharu_Iwata1;~Hiroshi_Takahashi1;~Taishi_Nishiyama1;~Yasuhiro_Fujiwara1", "gender": "M;M;M;M;M", "homepage": "https://scholar.google.co.jp/citations?user=Q_d8GEIAAAAJ&hl=ja;http://www.kecl.ntt.co.jp/as/members/iwata/;https://takahashihiroshi.github.io/;;http://www.linkedin.com/in/yasuhiro-fujiwara-8960b0180", "dblp": "178/8630;29/5953;54/2994;;02/2520", "google_scholar": "https://scholar.google.co.jp/citations?user=Q_d8GEIAAAAJ;S1F-gScAAAAJ;https://scholar.google.co.jp/citations?user=ncTryO4AAAAJ;https://scholar.google.jp/citations?user=xsRmpOIAAAAJ;https://scholar.google.co.jp/citations?user=kCaZaaMAAAAJ", "orcid": "0000-0002-2915-4615;;0000-0001-5102-2830;;0000-0001-9578-1118", "linkedin": ";tomoharu-iwata-025a493;;;", "or_profile": "~Atsutoshi_Kumagai2;~Tomoharu_Iwata1;~Hiroshi_Takahashi1;~Taishi_Nishiyama1;~Yasuhiro_Fujiwara1", "aff": "NTT;NTT;NTT;NTT;NTT", "aff_domain": "ntt.co.jp;hco.ntt.co.jp;ntt.co.jp;ntt.co.jp;ntt.co.jp", "position": "Researcher;Researcher;Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\nkumagai2024auc,\ntitle={{AUC} Maximization under Positive Distribution Shift},\nauthor={Atsutoshi Kumagai and Tomoharu Iwata and Hiroshi Takahashi and Taishi Nishiyama and Yasuhiro Fujiwara},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=yOe6ajdslI}\n}", "github": "", "reviewers": "QCqD;4KuN;CKbq;dvdg", "pdf_size": 799764, "rating": "4;5;6;6", "confidence": "4;4;5;3", "soundness": "3;2;4;3", "novelty": "2;2;3;3", "presentation": "3;2;4;4", "wc_summary": "74;64;70;36", "wc_strengths": "84;34;111;32", "wc_weaknesses": "108;37;135;21", "wc_questions": "118;140;4;14", "wc_limitations": "56;7;1;4", "wc_review": "440;282;321;107", "wc_reply_reviewers": "0;276;23;209", "wc_reply_authors": "0;1336;39;582", "reply_reviewers": "0;4;1;3", "reply_authors": "1;6;2;4", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 61.0, 14.866068747318506 ], "wc_strengths_avg": [ 65.25, 33.640563312762765 ], "wc_weaknesses_avg": [ 75.25, 47.56245893559331 ], "wc_questions_avg": [ 69.0, 60.60528029800704 ], "wc_limitations_avg": [ 17.0, 22.616365755797283 ], "wc_review_avg": [ 287.5, 119.36184482488531 ], "wc_reply_reviewers_avg": [ 127.0, 118.1841782981123 ], "wc_reply_authors_avg": [ 489.25, 540.2959258591536 ], "reply_reviewers_avg": [ 2.0, 1.5811388300841898 ], "reply_authors_avg": [ 3.25, 1.920286436967152 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:UcEt866waWgJ:scholar.google.com/&scioq=AUC+Maximization+under+Positive+Distribution+Shift&hl=en&as_sdt=0,44", "gs_version_total": 2, "email": "ntt.co.jp;hco.ntt.co.jp;ntt.co.jp;ntt.co.jp;ntt.co.jp", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "NTT Corporation", "aff_unique_dep": "", "aff_unique_url": "https://www.ntt.co.jp", "aff_unique_abbr": "NTT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Japan" }, { "title": "Local Curvature Smoothing with Stein's Identity for Efficient Score Matching", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93024", "id": "yPPNi7vc7n", "proceeding": "", "pdf": "https://openreview.net/pdf?id=yPPNi7vc7n", "openreview": "https://openreview.net/forum?id=yPPNi7vc7n", "poster": "", "project": "", "author_site": "GENKI OSADA, Makoto Shing, Takashi Nishide", "tldr": "", "abstract": "The training of score-based diffusion models (SDMs) is based on score matching. The challenge of score matching is that it includes a\u00a0computationally expensive Jacobian trace. While several methods have been proposed to avoid this computation, each has drawbacks, such as instability during training and approximating the learning as learning a denoising vector field rather than a true score.\nWe propose a novel score matching variant, local curvature smoothing with Stein's identity (LCSS). The LCSS bypasses the Jacobian trace by applying Stein's identity, enabling regularization effectiveness and efficient computation. We show that LCSS surpasses existing methods in sample generation performance and matches the performance of denoising score matching, widely adopted by most SDMs, in evaluations such as FID, Inception score, and bits per dimension. Furthermore, we show that LCSS enables realistic image generation even at a high resolution of $1024 \\times 1024$.", "keywords": "Score matching;score-based generative models;diffusion models", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "GENKI OSADA;Makoto Shing;Takashi Nishide", "authorids": "~GENKI_OSADA1;~Makoto_Shing1;~Takashi_Nishide1", "gender": "M;M;", "homepage": "https://geosada.github.io/Notes/;https://github.com/mkshing;", "dblp": "205/2027.html;;", "google_scholar": "https://scholar.google.co.jp/citations?user=7g5bnIYAAAAJ;https://scholar.google.com/citations?hl=en;", "orcid": ";;", "linkedin": ";;", "or_profile": "~GENKI_OSADA1;~Makoto_Shing1;~Takashi_Nishide1", "aff": "LY Corporation;Stability AI;", "aff_domain": "lycorp.co.jp;stability.ai;", "position": "Researcher;Researcher;", "bibtex": "@inproceedings{\nosada2024local,\ntitle={Local Curvature Smoothing with Stein's Identity for Efficient Score Matching},\nauthor={GENKI OSADA and Makoto Shing and Takashi Nishide},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=yPPNi7vc7n}\n}", "github": "", "reviewers": "j6aU;qUJ6;tfBV;AvFd", "pdf_size": 50072598, "rating": "6;6;6;6", "confidence": "3;3;2;3", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;2", "wc_summary": "67;91;52;23", "wc_strengths": "25;39;53;21", "wc_weaknesses": "92;21;32;75", "wc_questions": "2;137;30;4", "wc_limitations": "1;1;4;1", "wc_review": "187;289;171;124", "wc_reply_reviewers": "203;43;0;0", "wc_reply_authors": "721;0;0;0", "reply_reviewers": "3;1;0;0", "reply_authors": "3;1;1;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 58.25, 24.651318423159438 ], "wc_strengths_avg": [ 34.5, 12.599603168354152 ], "wc_weaknesses_avg": [ 55.0, 29.385370509830228 ], "wc_questions_avg": [ 43.25, 55.24208088043027 ], "wc_limitations_avg": [ 1.75, 1.299038105676658 ], "wc_review_avg": [ 192.75, 60.20122507059138 ], "wc_reply_reviewers_avg": [ 61.5, 83.5598587839879 ], "wc_reply_authors_avg": [ 180.25, 312.20215806429013 ], "reply_reviewers_avg": [ 1.0, 1.224744871391589 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:VLwaGkNHIPwJ:scholar.google.com/&scioq=Local+Curvature+Smoothing+with+Stein%27s+Identity+for+Efficient+Score+Matching&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "lycorp.co.jp;stability.ai;", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "LY Corporation;Stability AI", "aff_unique_dep": ";", "aff_unique_url": ";https://stability.ai", "aff_unique_abbr": ";Stability AI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1", "aff_country_unique": ";United States" }, { "title": "Toward a Stable, Fair, and Comprehensive Evaluation of Object Hallucination in Large Vision-Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93023", "id": "yQL5tutdaH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=yQL5tutdaH", "openreview": "https://openreview.net/forum?id=yQL5tutdaH", "poster": "/media/PosterPDFs/NeurIPS%202024/93023.png?t=1731727696.1003604", "project": "", "author_site": "Hongliang Wei, Xingtao Wang, Xianqi Zhang, Xiaopeng Fan, Debin Zhao", "tldr": "", "abstract": "Given different instructions, large vision-language models (LVLMs) exhibit different degrees of object hallucinations, posing a significant challenge to the evaluation of object hallucinations. Overcoming this challenge, existing object hallucination evaluation methods average the results obtained from a set of instructions. However, these methods fail to provide consistent evaluation across instruction sets that generate image descriptions of significantly different lengths. In this paper, we present the first systematic investigation of the effect of instructions on object hallucinations in LVLMs, with a specific focus on the role played by image description lengths. A valuable finding is that instructions indirectly affect hallucinations through the length of image descriptions. The longer the image description, the higher the object hallucination degree. Accordingly, we fit an informative length-hallucination curve, upon which a fine-grained evaluation framework named LeHaCE is introduced for evaluating object hallucinations at any given image description length. LeHaCE evaluates the object hallucination degree at a uniform image description length to mitigate the effect of description lengths, promoting stability and fairness. Moreover, LeHaCE incorporates the curve slope as an innovative hallucination evaluation metric, reflecting the extent to which the object hallucination degree is affected by the image description length, achieving a more comprehensive evaluation. Experimental results demonstrate that LeHaCE provides a more stable, fair, and comprehensive evaluation of object hallucinations in LVLMs compared to existing methods.", "keywords": "Large Vision-Language Models;Multimodal large language models;Multimodal;Object hallucination;Evaluation;Image caption", "primary_area": "other", "supplementary_material": "/attachment/cf926d128382eb1e41d016e813136a7829db8a55.zip", "author": "Hongliang Wei;Xingtao Wang;Xianqi Zhang;Xiaopeng Fan;Debin Zhao", "authorids": "~Hongliang_Wei1;~Xingtao_Wang1;~Xianqi_Zhang1;~Xiaopeng_Fan1;~Debin_Zhao1", "gender": "M;M;M;M;M", "homepage": ";https://blog.csdn.net/weixin_37594902?type=blog;https://github.com/Xianqi-Zhang;http://homepage.hit.edu.cn/xiaopengfan;http://homepage.hit.edu.cn/zhaodebin", "dblp": "240/4963;;;76/1458;16/3958.html", "google_scholar": ";;;;", "orcid": ";0000-0002-5763-2493;;;", "linkedin": ";;;;", "or_profile": "~Hongliang_Wei1;~Xingtao_Wang1;~Xianqi_Zhang1;~Xiaopeng_Fan1;~Debin_Zhao1", "aff": "Harbin Institute of Technology;Harbin Institute of Technology;Harbin Institute of Technology;Harbin Institute of Technology;Harbin Institute of Technology", "aff_domain": "hit.edu.cn;hit.edu.cn;hit.edu.cn;hit.edu.cn;hit.edu.cn", "position": "PhD student;Postdoc;PhD student;Full Professor;Undergrad student", "bibtex": "@inproceedings{\nwei2024toward,\ntitle={Toward a Stable, Fair, and Comprehensive Evaluation of Object Hallucination in Large Vision-Language Models},\nauthor={Hongliang Wei and Xingtao Wang and Xianqi Zhang and Xiaopeng Fan and Debin Zhao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=yQL5tutdaH}\n}", "github": "", "reviewers": "Pvzh;XTeq;onEV;ffbd", "pdf_size": 1874932, "rating": "5;5;7;7", "confidence": "4;4;5;5", "soundness": "4;2;3;3", "novelty": "2;2;4;3", "presentation": "3;3;3;3", "wc_summary": "141;101;95;116", "wc_strengths": "96;32;153;91", "wc_weaknesses": "211;78;177;253", "wc_questions": "137;122;118;122", "wc_limitations": "9;15;5;6", "wc_review": "594;348;548;588", "wc_reply_reviewers": "0;47;0;0", "wc_reply_authors": "0;17;0;0", "reply_reviewers": "0;1;0;0", "reply_authors": "1;2;1;1", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 113.25, 17.75352077758099 ], "wc_strengths_avg": [ 93.0, 42.81938813201328 ], "wc_weaknesses_avg": [ 179.75, 64.61955973232872 ], "wc_questions_avg": [ 124.75, 7.258615570478987 ], "wc_limitations_avg": [ 8.75, 3.897114317029974 ], "wc_review_avg": [ 519.5, 100.58205605375146 ], "wc_reply_reviewers_avg": [ 11.75, 20.351596988934308 ], "wc_reply_authors_avg": [ 4.25, 7.361215932167728 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4859016199393304881&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "hit.edu.cn;hit.edu.cn;hit.edu.cn;hit.edu.cn;hit.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Harbin Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.hit.edu.cn/", "aff_unique_abbr": "HIT", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Harbin", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Generative Modeling of Molecular Dynamics Trajectories", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93022", "id": "yRRCH1OsGW", "proceeding": "", "pdf": "https://openreview.net/pdf?id=yRRCH1OsGW", "openreview": "https://openreview.net/forum?id=yRRCH1OsGW", "poster": "", "project": "", "author_site": "Bowen Jing, Hannes St\u00e4rk, Tommi Jaakkola, Bonnie Berger", "tldr": "", "abstract": "Molecular dynamics (MD) is a powerful technique for studying microscopic phenomena, but its computational cost has driven significant interest in the development of deep learning-based surrogate models. We introduce generative modeling of molecular trajectories as a paradigm for learning flexible multi-task surrogate models of MD from data. By conditioning on appropriately chosen frames of the trajectory, we show such generative models can be adapted to diverse tasks such as forward simulation, transition path sampling, and trajectory upsampling. By alternatively conditioning on part of the molecular system and inpainting the rest, we also demonstrate the first steps towards dynamics-conditioned molecular design. We validate the full set of these capabilities on tetrapeptide simulations and show preliminary results on scaling to protein monomers. Altogether, our work illustrates how generative modeling can unlock value from MD data towards diverse downstream tasks that are not straightforward to address with existing methods or even MD itself. Code is available at https://github.com/bjing2016/mdgen.", "keywords": "molecular dynamics;molecular simulation;transition paths;Boltzmann distribution;proteins", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "", "author": "Bowen Jing;Hannes Stark;Tommi Jaakkola;Bonnie Berger", "authorids": "~Bowen_Jing1;~Hannes_Stark1;~Tommi_S._Jaakkola1;~Bonnie_Berger1", "gender": ";;;F", "homepage": ";;;https://people.csail.mit.edu/bab/", "dblp": ";;;b/BonnieBerger", "google_scholar": ";;;bYjKaowAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Bowen_Jing1;~Hannes_Stark1;~Tommi_S._Jaakkola1;~Bonnie_Berger1", "aff": ";;;Massachusetts Institute of Technology", "aff_domain": ";;;mit.edu", "position": ";;;Full Professor", "bibtex": "@inproceedings{\njing2024generative,\ntitle={Generative Modeling of Molecular Dynamics Trajectories},\nauthor={Bowen Jing and Hannes Stark and Tommi Jaakkola and Bonnie Berger},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=yRRCH1OsGW}\n}", "github": "", "reviewers": "zaG3;TTLp;b3SL;b8Jp;hVZ7;QAiG", "pdf_size": 7445806, "rating": "4;5;6;7;7;7", "confidence": "4;3;3;2;4;4", "soundness": "3;3;3;3;3;4", "novelty": "2;2;3;2;3;4", "presentation": "3;3;3;2;3;3", "wc_summary": "48;68;79;33;152;58", "wc_strengths": "53;134;53;12;124;19", "wc_weaknesses": "44;128;180;219;62;158", "wc_questions": "55;276;1;60;187;44", "wc_limitations": "7;306;9;19;4;20", "wc_review": "207;912;322;343;529;299", "wc_reply_reviewers": "0;11;19;45;41;53", "wc_reply_authors": "0;0;0;0;0;0", "reply_reviewers": "0;1;1;1;1;1", "reply_authors": "1;1;1;1;1;1", "rating_avg": [ 6.0, 1.1547005383792515 ], "confidence_avg": [ 3.3333333333333335, 0.7453559924999298 ], "soundness_avg": [ 3.1666666666666665, 0.3726779962499649 ], "novelty_avg": [ 2.6666666666666665, 0.7453559924999298 ], "presentation_avg": [ 2.8333333333333335, 0.3726779962499649 ], "wc_summary_avg": [ 73.0, 38.19249489974001 ], "wc_strengths_avg": [ 65.83333333333333, 47.34770908455398 ], "wc_weaknesses_avg": [ 131.83333333333334, 62.19436380323292 ], "wc_questions_avg": [ 103.83333333333333, 95.75910168519522 ], "wc_limitations_avg": [ 60.833333333333336, 109.80196820741521 ], "wc_review_avg": [ 435.3333333333333, 233.83660011973222 ], "wc_reply_reviewers_avg": [ 28.166666666666668, 19.30817328375617 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.8333333333333334, 0.372677996249965 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.19364916731037085, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1773396763692470730&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 6, "email": ";;;mit.edu", "author_num": 4, "aff_unique_index": "0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Diffusion-based Curriculum Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93021", "id": "yRhrVaDOWE", "proceeding": "", "pdf": "https://openreview.net/pdf?id=yRhrVaDOWE", "openreview": "https://openreview.net/forum?id=yRhrVaDOWE", "poster": "/media/PosterPDFs/NeurIPS%202024/93021.png?t=1731173079.3094592", "project": "", "author_site": "Erdi Sayar, Giovanni Iacca, Ozgur S. Oguz, Alois Knoll", "tldr": "", "abstract": "Curriculum Reinforcement Learning (CRL) is an approach to facilitate the learning process of agents by structuring tasks in a sequence of increasing complexity. Despite its potential, many existing CRL methods struggle to efficiently guide agents toward desired outcomes, particularly in the absence of domain knowledge. This paper introduces DiCuRL (Diffusion Curriculum Reinforcement Learning), a novel method that leverages conditional diffusion models to generate curriculum goals. To estimate how close an agent is to achieving its goal, our method uniquely incorporates a $Q$-function and a trainable reward function based on Adversarial Intrinsic Motivation within the diffusion model. Furthermore, it promotes exploration through the inherent noising and denoising mechanism present in the diffusion models and is environment-agnostic. This combination allows for the generation of challenging yet achievable goals, enabling agents to learn effectively without relying on domain knowledge. We demonstrate the effectiveness of DiCuRL in three different maze environments and two robotic manipulation tasks simulated in MuJoCo, where it outperforms or matches nine state-of-the-art CRL algorithms from the literature.", "keywords": "curriculum reinforcement learning;reinforcement learning;diffusion models", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Erdi Sayar;Giovanni Iacca;Ozgur S. Oguz;Alois Knoll", "authorids": "~Erdi_Sayar2;~Giovanni_Iacca1;~Ozgur_S._Oguz1;~Alois_Knoll1", "gender": "M;M;M;", "homepage": "https://www.ce.cit.tum.de/air/people/erdi-sayar-msc/;https://sites.google.com/site/giovanniiacca/;https://www.in.tum.de/i06/people/prof-dr-ing-habil-alois-knoll/;http://oz-oguz.github.io/", "dblp": ";69/9200;k/AloisKnoll;177/4848", "google_scholar": ";qSw6YfcAAAAJ;https://scholar.google.de/citations?user=-CA8QgwAAAAJ;https://scholar.google.de/citations?user=0tWX-EMAAAAJ", "orcid": ";0000-0001-9723-1830;0000-0003-4840-076X;0000-0001-8723-1837", "linkedin": ";giovanniiacca/;alois-knoll-505480166;", "or_profile": "~Erdi_Sayar2;~Giovanni_Iacca1;~Alois_Knoll1;~Ozgur_S_Oguz1", "aff": "Technische Universit\u00e4t M\u00fcnchen;University of Trento;Technical University Munich;Bilkent University", "aff_domain": "tum.edu;unitn.it;tum.de;cs.bilkent.edu.tr", "position": "PhD student;Associate Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nsayar2024diffusionbased,\ntitle={Diffusion-based Curriculum Reinforcement Learning},\nauthor={Erdi Sayar and Giovanni Iacca and Ozgur S. Oguz and Alois Knoll},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=yRhrVaDOWE}\n}", "github": "", "reviewers": "CrFc;7Mgp;c6He;FHis", "pdf_size": 8294811, "rating": "5;6;6;7", "confidence": "4;4;4;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "2;2;3;3", "wc_summary": "140;121;83;114", "wc_strengths": "85;34;25;129", "wc_weaknesses": "348;553;253;174", "wc_questions": "2;29;213;65", "wc_limitations": "1;14;15;27", "wc_review": "576;751;589;509", "wc_reply_reviewers": "557;451;93;134", "wc_reply_authors": "967;797;116;53", "reply_reviewers": "3;2;1;2", "reply_authors": "4;3;4;3", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 114.5, 20.524375751773793 ], "wc_strengths_avg": [ 68.25, 41.87705218851967 ], "wc_weaknesses_avg": [ 332.0, 141.6880376037441 ], "wc_questions_avg": [ 77.25, 81.49961656351519 ], "wc_limitations_avg": [ 14.25, 9.202581159652981 ], "wc_review_avg": [ 606.25, 88.91393310387298 ], "wc_reply_reviewers_avg": [ 308.75, 199.34188596479166 ], "wc_reply_authors_avg": [ 483.25, 403.8690227041435 ], "reply_reviewers_avg": [ 2.0, 0.7071067811865476 ], "reply_authors_avg": [ 3.5, 0.5 ], "replies_avg": [ 32, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:kndiewrmda4J:scholar.google.com/&scioq=Diffusion-based+Curriculum+Reinforcement+Learning&hl=en&as_sdt=0,48", "gs_version_total": 4, "email": "tum.edu;unitn.it;tum.de;cs.bilkent.edu.tr", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Technische Universit\u00e4t M\u00fcnchen;University of Trento;Technical University of Munich;Bilkent University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.tum.de;https://www.unitn.it;https://www.tum.de;https://www.bilkent.edu.tr", "aff_unique_abbr": "TUM;UniTN;TUM;Bilkent", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;2", "aff_country_unique": "Germany;Italy;T\u00fcrkiye" }, { "title": "$SE(3)$ Equivariant Ray Embeddings for Implicit Multi-View Depth Estimation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93020", "id": "yRuJqoWoCs", "proceeding": "", "pdf": "https://openreview.net/pdf?id=yRuJqoWoCs", "openreview": "https://openreview.net/forum?id=yRuJqoWoCs", "poster": "/media/PosterPDFs/NeurIPS%202024/93020.png?t=1733844686.5257313", "project": "", "author_site": "Yinshuang Xu, Dian Chen, Katherine Liu, Sergey Zakharov, Rare\u0219 Ambru\u0219, Kostas Daniilidis, Vitor Guizilini", "tldr": "", "abstract": "Incorporating inductive bias by embedding geometric entities (such as rays) as input has proven successful in multi-view learning. However, the methods adopting this technique typically lack equivariance, which is crucial for effective 3D learning. Equivariance serves as a valuable inductive prior, aiding in the generation of robust multi-view features for 3D scene understanding. In this paper, we explore the application of equivariant multi-view learning to depth estimation, not only recognizing its significance for computer vision and robotics but also addressing the limitations of previous research. Most prior studies have either overlooked equivariance in this setting or achieved only approximate equivariance through data augmentation, which often leads to inconsistencies across different reference frames. To address this issue, we propose to embed $SE(3)$ equivariance into the Perceiver IO architecture. We employ Spherical Harmonics for positional encoding to ensure 3D rotation equivariance, and develop a specialized equivariant encoder and decoder within the Perceiver IO architecture. To validate our model, we applied it to the task of stereo depth estimation, achieving state of the art results on real-world datasets without explicit geometric constraints or extensive data augmentation.", "keywords": "$SE(3)$ Equivariance;Stereo Depth Estimation", "primary_area": "machine_vision", "supplementary_material": "", "author": "Yinshuang Xu;Dian Chen;Katherine Liu;Sergey Zakharov;Rares Andrei Ambrus;Kostas Daniilidis;Vitor Campagnolo Guizilini", "authorids": "~Yinshuang_Xu1;~Dian_Chen5;~Katherine_Liu1;~Sergey_Zakharov1;~Rares_Andrei_Ambrus1;~Kostas_Daniilidis1;~Vitor_Campagnolo_Guizilini2", "gender": "F;;F;M;M;M;M", "homepage": "https://xuyinxuyin.github.io/;https://dianchen.io;https://thekatherineliu.com;https://zakharos.github.io/;http://www.csc.kth.se/~raambrus/;http://www.cis.upenn.edu/~kostas;", "dblp": "239/4094;318/2996;226/6398;195/5832;25/76;d/KostasDaniilidis;", "google_scholar": ";zdAyna8AAAAJ;PhpQD2YAAAAJ;https://scholar.google.de/citations?user=3DK3I-8AAAAJ;2xjjS3oAAAAJ;dGs2BcIAAAAJ;UH9tP6QAAAAJ", "orcid": ";;;;0000-0002-3111-3812;0000-0003-0498-0758;", "linkedin": ";;;;rare%C8%99-ambru%C8%99-b04812125/;;vitorguizilini/", "or_profile": "~Yinshuang_Xu1;~Dian_Chen5;~Katherine_Liu1;~Sergey_Zakharov1;~Rares_Andrei_Ambrus1;~Kostas_Daniilidis1;~Vitor_Campagnolo_Guizilini2", "aff": "School of Engineering and Applied Science, University of Pennsylvania;Toyota Research Institute;Toyota Research Institute;Toyota Research Institute;Toyota Research Institute;University of Pennsylvania;Toyota Research Institute", "aff_domain": "seas.upenn.edu;tri.global;tri.global;tri.global;tri.global;upenn.edu;tri.global", "position": "PhD student;Researcher;Researcher;Researcher;Researcher;Full Professor;Staff Research Scientist", "bibtex": "@inproceedings{\nxu2024se,\ntitle={\\${SE}(3)\\$ Equivariant Ray Embeddings for Implicit Multi-View Depth Estimation},\nauthor={Yinshuang Xu and Dian Chen and Katherine Liu and Sergey Zakharov and Rares Andrei Ambrus and Kostas Daniilidis and Vitor Campagnolo Guizilini},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=yRuJqoWoCs}\n}", "github": "", "reviewers": "xRYu;KQ34;QCgu", "pdf_size": 10548494, "rating": "6;6;7", "confidence": "4;4;4", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "3;2;4", "wc_summary": "134;114;95", "wc_strengths": "165;164;15", "wc_weaknesses": "131;272;27", "wc_questions": "98;5;93", "wc_limitations": "98;10;2", "wc_review": "626;565;232", "wc_reply_reviewers": "65;12;4", "wc_reply_authors": "0;45;0", "reply_reviewers": "1;1;1", "reply_authors": "1;2;1", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 114.33333333333333, 15.923427883328248 ], "wc_strengths_avg": [ 114.66666666666667, 70.47615830115096 ], "wc_weaknesses_avg": [ 143.33333333333334, 100.40030987114643 ], "wc_questions_avg": [ 65.33333333333333, 42.71091455614386 ], "wc_limitations_avg": [ 36.666666666666664, 43.49201714746691 ], "wc_review_avg": [ 474.3333333333333, 173.15567818841197 ], "wc_reply_reviewers_avg": [ 27.0, 27.067816067549053 ], "wc_reply_authors_avg": [ 15.0, 21.213203435596427 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.3333333333333333, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1376217265500369332&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "seas.upenn.edu;tri.global;tri.global;tri.global;tri.global;upenn.edu;tri.global", "author_num": 7, "aff_unique_index": "0;1;1;1;1;0;1", "aff_unique_norm": "University of Pennsylvania;Toyota Research Institute", "aff_unique_dep": "School of Engineering and Applied Science;", "aff_unique_url": "https://www.upenn.edu;https://www.tri.global", "aff_unique_abbr": "UPenn;TRI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "V-PETL Bench: A Unified Visual Parameter-Efficient Transfer Learning Benchmark", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97434", "id": "yS1dUkQFnu", "proceeding": "", "pdf": "https://openreview.net/pdf?id=yS1dUkQFnu", "openreview": "https://openreview.net/forum?id=yS1dUkQFnu", "poster": "/media/PosterPDFs/NeurIPS%202024/97434.png?t=1731316189.7152448", "project": "", "author_site": "Yi Xin, Siqi Luo, Xuyang Liu, Haodi Zhou, Xinyu Cheng, Christina Lee, Junlong Du, Yuntao Du., Haozhe Wang, MingCai Chen, Ting Liu, Guimin Hu, Zhongwei Wan, rongchao zhang, Aoxue Li, Mingyang Yi, Xiaohong Liu", "tldr": "", "abstract": "Parameter-efficient transfer learning (PETL) methods show promise in adapting a pre-trained model to various downstream tasks while training only a few parameters. In the computer vision (CV) domain, numerous PETL algorithms have been proposed, but their direct employment or comparison remains inconvenient. To address this challenge, we construct a Unified Visual PETL Benchmark (V-PETL Bench) for the CV domain by selecting 30 diverse, challenging, and comprehensive datasets from image recognition, video action recognition, and dense prediction tasks. On these datasets, we systematically evaluate 25 dominant PETL algorithms and open-source a modular and extensible codebase for fair evaluation of these algorithms. V-PETL Bench runs on NVIDIA A800 GPUs and requires approximately 310 GPU days. We release all the benchmark, making it more efficient and friendly to researchers. Additionally, V-PETL Bench will be continuously updated for new PETL algorithms and CV tasks.", "keywords": "Parameter-Efficient Transfer Learning;Computer Vision Tasks.", "primary_area": "", "supplementary_material": "/attachment/ce67410bc0feb3dc15348b35e1fba5ac8f07ff44.pdf", "author": "Yi Xin;Siqi Luo;Xuyang Liu;Yuntao Du.;Haodi Zhou;Xinyu Cheng;Christina Luoluo Lee;Junlong Du;Haozhe Wang;MingCai Chen;Ting Liu;Guimin Hu;Zhongwei Wan;Rongchao Zhang;Aoxue Li;Mingyang Yi;Xiaohong Liu", "authorids": "~Yi_Xin1;~Siqi_Luo2;~Xuyang_Liu1;~Yuntao_Du.1;~Haodi_Zhou1;~Xinyu_Cheng2;~Christina_Luoluo_Lee1;~Junlong_Du1;~Haozhe_Wang2;~MingCai_Chen1;~Ting_Liu20;~Guimin_Hu1;~Zhongwei_Wan1;~Rongchao_Zhang2;~Aoxue_Li2;~Mingyang_Yi1;~Xiaohong_Liu2", "gender": "M;F;M;;M;M;F;M;;;F;F;M;;;M;M", "homepage": "https://synbol.github.io/;https://multimedia.sjtu.edu.cn/;https://xuyang-liu16.github.io/;;http://iip.nju.edu.cn/index.php/;https://mindofxin.github.io/xinyucheng.github.io/;https://www.luoluo.ai;;;https://chenmc1996.github.io/;https://github.com/liuting20;https://lemei.github.io/;https://people.engineering.osu.edu/people/wan.512;;;http://mingyangyi.github.io;https://jhc.sjtu.edu.cn/~xiaohongliu/", "dblp": "33/1127.html;;209/9753;;;;;246/5771;;138/3964;;275/9112;260/6958.html;;;;95/2454-1", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;9VhMC1QAAAAJ;;;;;https://scholar.google.com.tw/citations?user=vBVZ-N8AAAAJ;;dQwpk-8AAAAJ;XfFqozoAAAAJ;4OS5x9EAAAAJ;https://scholar.google.com/citations?hl=en;;;RlOZiPUAAAAJ;https://scholar.google.ca/citations?hl=en", "orcid": ";;0000-0002-0691-9371;;;;;;;;0000-0002-6950-8086;0000-0001-8364-3076;;;;;", "linkedin": ";;;;steven-zhou-b436b2255/;;;;;;;;;;;;xiaohong-liu/", "or_profile": "~Yi_Xin1;~Siqi_Luo2;~Xuyang_Liu1;~Yuntao_Du.1;~Haodi_Zhou1;~Xinyu_Cheng2;~Christina_Luoluo_Lee1;~Junlong_Du1;~Haozhe_Wang2;~MingCai_Chen1;~Ting_Liu20;~Guimin_Hu1;~Zhongwei_Wan1;~Rongchao_Zhang2;~Aoxue_Li2;~Mingyang_Yi1;~Xiaohong_Liu2", "aff": "Nanjing university;Nanjing University;Sichuan University;;Nanjing University;Nanjing university;Massachusetts Institute of Technology;Tencent;;Nanjing University;National University of Defense Technology;University of Copenhagen;Ohio State University, Columbus;;;Huawei Noah's ark Lab;Shanghai Jiaotong University", "aff_domain": "nju.edu.cn;nju.edu.cn;scu.edu.cn;;nju.edu.cn;nju.edu.cn;mit.edu;tencent.com;;nju.edu.cn;nudt.edu.cn;di.ku.dk;osu.edu;;;huawei.com;sjtu.edu.cn", "position": "PhD student;MS student;MS student;;MS student;MS student;Undergrad student;Researcher;;PhD student;PhD student;Postdoc;PhD student;;;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nxin2024vpetl,\ntitle={V-{PETL} Bench: A Unified Visual Parameter-Efficient Transfer Learning Benchmark},\nauthor={Yi Xin and Siqi Luo and Xuyang Liu and Yuntao Du. and Haodi Zhou and Xinyu Cheng and Christina Luoluo Lee and Junlong Du and Haozhe Wang and MingCai Chen and Ting Liu and Guimin Hu and Zhongwei Wan and Rongchao Zhang and Aoxue Li and Mingyang Yi and Xiaohong Liu},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=yS1dUkQFnu}\n}", "github": "", "reviewers": "sENS;y7Gu;Pejx;etT1", "pdf_size": 9252295, "rating": "5;7;7;8", "confidence": "5;4;4;3", "wc_summary_and_contributions": "18;53;177;67", "wc_strengths": "2;23;4;42", "wc_improvement": "2;14;4;17", "wc_limitations": "14;1;1;29", "wc_correctness": "1;1;1;8", "wc_clarity": "1;1;1;6", "wc_relation_to_prior_work": "1;1;1;16", "wc_documentation": "11;1;1;6", "wc_additional_feedback": "1;1;1;1", "wc_review": "51;96;191;192", "wc_reply_reviewers": "0;0;0;14", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;0;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.75, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 78.75, 59.46585154523561 ], "wc_strengths_avg": [ 17.75, 16.223054582907622 ], "wc_improvement_avg": [ 9.25, 6.378675411086537 ], "wc_limitations_avg": [ 11.25, 11.54068888758379 ], "wc_correctness_avg": [ 2.75, 3.031088913245535 ], "wc_clarity_avg": [ 2.25, 2.165063509461097 ], "wc_relation_to_prior_work_avg": [ 4.75, 6.49519052838329 ], "wc_documentation_avg": [ 4.75, 4.14578098794425 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 132.5, 61.10851004565567 ], "wc_reply_reviewers_avg": [ 3.5, 6.06217782649107 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 17, 0 ], "corr_rating_confidence": -0.9733285267845754, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16428624973313046913&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "nju.edu.cn;nju.edu.cn;scu.edu.cn;;nju.edu.cn;nju.edu.cn;mit.edu;tencent.com;;nju.edu.cn;nudt.edu.cn;di.ku.dk;osu.edu;;;huawei.com;sjtu.edu.cn", "author_num": 17, "aff_unique_index": "0;0;1;0;0;2;3;0;4;5;6;7;8", "aff_unique_norm": "Nanjing University;Sichuan University;Massachusetts Institute of Technology;Tencent;National University of Defense Technology;University of Copenhagen;Ohio State University;Huawei;Shanghai Jiao Tong University", "aff_unique_dep": ";;;Tencent Holdings Limited;;;;Noah's ark Lab;", "aff_unique_url": "https://www.nju.edu.cn;https://www.scu.edu.cn;https://web.mit.edu;https://www.tencent.com;http://www.nudt.edu.cn/;https://www.ku.dk;https://www.osu.edu;https://www.huawei.com;https://www.sjtu.edu.cn", "aff_unique_abbr": "Nanjing U;SCU;MIT;Tencent;NUDT;UCPH;OSU;Huawei;SJTU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Columbus", "aff_country_unique_index": "0;0;0;0;0;1;0;0;0;2;1;0;0", "aff_country_unique": "China;United States;Denmark" }, { "title": "Exogenous Matching: Learning Good Proposals for Tractable Counterfactual Estimation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93019", "id": "yS9xU6ANiA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=yS9xU6ANiA", "openreview": "https://openreview.net/forum?id=yS9xU6ANiA", "poster": "/media/PosterPDFs/NeurIPS%202024/93019.png?t=1730118867.2030852", "project": "", "author_site": "Yikang Chen, Dehui du, Lili Tian", "tldr": "", "abstract": "We propose an importance sampling method for tractable and efficient estimation of counterfactual expressions in general settings, named Exogenous Matching. By minimizing a common upper bound of counterfactual estimators, we transform the variance minimization problem into a conditional distribution learning problem, enabling its integration with existing conditional distribution modeling approaches. We validate the theoretical results through experiments under various types and settings of Structural Causal Models (SCMs) and demonstrate the outperformance on counterfactual estimation tasks compared to other existing importance sampling methods. We also explore the impact of injecting structural prior knowledge (counterfactual Markov boundaries) on the results. Finally, we apply this method to identifiable proxy SCMs and demonstrate the unbiasedness of the estimates, empirically illustrating the applicability of the method to practical scenarios.", "keywords": "causality;causal inference;counterfactual estimation;importance sampling;normalizing flows", "primary_area": "causal_inference", "supplementary_material": "/attachment/60a7dee54957af26b133d37ea7e39c27923e0de9.zip", "author": "Yikang Chen;Dehui du;Lili Tian", "authorids": "~Yikang_Chen1;~Dehui_du1;~Lili_Tian1", "gender": "M;F;F", "homepage": "https://cyisk.github.io/;https://faculty.ecnu.edu.cn/_s43/ddh/main.psp;", "dblp": ";81/6559.html;309/4577.html", "google_scholar": ";;0eF6QIMAAAAJhl=zh-CNoi=sra", "orcid": ";;0000-0003-2236-1694", "linkedin": ";;", "or_profile": "~Yikang_Chen1;~Dehui_du1;~Lili_Tian1", "aff": "East China Normal University;East China Normal University;East China Normal University", "aff_domain": "stu.ecnu.edu.cn;ecnu.edu.cn;stu.ecnu.edu.cn", "position": "MS student;Full Professor;PhD student", "bibtex": "@inproceedings{\nchen2024exogenous,\ntitle={Exogenous Matching: Learning Good Proposals for Tractable Counterfactual Estimation},\nauthor={Yikang Chen and Dehui du and Lili Tian},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=yS9xU6ANiA}\n}", "github": "", "reviewers": "6eu3;aexG;rY6N;8MDB", "pdf_size": 1229608, "rating": "5;5;6;7", "confidence": "3;3;4;3", "soundness": "3;2;3;4", "novelty": "3;2;3;4", "presentation": "2;2;2;4", "wc_summary": "67;85;90;63", "wc_strengths": "28;120;80;58", "wc_weaknesses": "162;224;70;127", "wc_questions": "43;101;312;6", "wc_limitations": "9;24;4;2", "wc_review": "309;554;556;256", "wc_reply_reviewers": "16;36;24;0", "wc_reply_authors": "20;142;27;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 76.25, 11.4755174175285 ], "wc_strengths_avg": [ 71.5, 33.53729267546801 ], "wc_weaknesses_avg": [ 145.75, 55.84968665981932 ], "wc_questions_avg": [ 115.5, 118.3944677761592 ], "wc_limitations_avg": [ 9.75, 8.613216588476108 ], "wc_review_avg": [ 418.75, 137.53431390020455 ], "wc_reply_reviewers_avg": [ 19.0, 13.076696830622021 ], "wc_reply_authors_avg": [ 47.25, 55.59395200918891 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.17407765595569782, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:QSEiAJ6GcDQJ:scholar.google.com/&scioq=Exogenous+Matching:+Learning+Good+Proposals+for+Tractable+Counterfactual+Estimation&hl=en&as_sdt=0,10", "gs_version_total": 3, "email": "stu.ecnu.edu.cn;ecnu.edu.cn;stu.ecnu.edu.cn", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "East China Normal University", "aff_unique_dep": "", "aff_unique_url": "http://www.ecnu.edu.cn", "aff_unique_abbr": "ECNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Aligning Large Language Models with Representation Editing: A Control Perspective", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93018", "id": "yTTomSJsSW", "proceeding": "", "pdf": "https://openreview.net/pdf?id=yTTomSJsSW", "openreview": "https://openreview.net/forum?id=yTTomSJsSW", "poster": "", "project": "", "author_site": "Lingkai Kong, Haorui Wang, Wenhao Mu, Yuanqi Du, Yuchen Zhuang, Yifei Zhou, Yue Song, Rongzhi Zhang, Kai Wang, Chao Zhang", "tldr": "", "abstract": "Aligning large language models (LLMs) with human objectives is crucial for real-world applications. However, fine-tuning LLMs for alignment often suffers from unstable training and requires substantial computing resources. Test-time alignment techniques, such as prompting and guided decoding, do not modify the underlying model, and their performance remains dependent on the original model's capabilities. To address these challenges, we propose aligning LLMs through representation editing. The core of our method is to view a pre-trained autoregressive LLM as a discrete-time stochastic dynamical system. To achieve alignment for specific objectives, we introduce external control signals into the state space of this language dynamical system. We train a value function directly on the hidden states according to the Bellman equation, enabling gradient-based optimization to obtain the optimal control signals at test time. Our experiments demonstrate that our method outperforms existing test-time alignment techniques while requiring significantly fewer resources compared to fine-tuning methods. Our code is available at [https://github.com/Lingkai-Kong/RE-Control](https://github.com/Lingkai-Kong/RE-Control).", "keywords": "Large language model;Alignment;Representation editing", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Lingkai Kong;Haorui Wang;Wenhao Mu;Yuanqi Du;Yuchen Zhuang;Yifei Zhou;Yue Song;Rongzhi Zhang;Kai Wang;Chao Zhang", "authorids": "~Lingkai_Kong1;~Haorui_Wang1;~Wenhao_Mu1;~Yuanqi_Du1;~Yuchen_Zhuang1;~Yifei_Zhou1;~Yue_Song1;~Rongzhi_Zhang2;~Kai_Wang5;~Chao_Zhang15", "gender": "M;M;M;M;M;M;M;M;M;", "homepage": "https://lingkai-kong.com/;;https://github.com/SSSScuderia;https://yuanqidu.github.io/;https://night-chen.github.io/;https://yifeizhou02.github.io/;https://kingjamessong.github.io/;https://rongzhizhang.org/;https://guaguakai.github.io/;http://chaozhang.org/", "dblp": "20/10253;;;266/2837;191/5231.html;50/7699;11/1346;130/7337;78/2022-40;94/3019-14", "google_scholar": "https://scholar.google.com/citations?hl=en;;;fAc_zZMAAAAJ;T-f6XlEAAAAJ;;Uza2i10AAAAJ;https://scholar.google.com/citations?hl=en;gGSsQmsAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0001-6480-513X;;;;;;;;0000-0002-2446-987X;0000-0003-3009-598X", "linkedin": ";haorui-wang-6a1a92185/;wenhao-mu/;;;yifei-zhou-57aa9b222/;;;guaguakai/;", "or_profile": "~Lingkai_Kong1;~Haorui_Wang1;~Wenhao_Mu1;~Yuanqi_Du1;~Yuchen_Zhuang1;~Yifei_Zhou1;~Yue_Song1;~Rongzhi_Zhang2;~Kai_Wang5;~Chao_Zhang15", "aff": "Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology;Cornell University;Georgia Institute of Technology;University of California, Berkeley;University of Trento, Italy;Zhejiang University;Georgia Institute of Technology;Georgia Institute of Technology", "aff_domain": "gatech.edu;gatech.edu;gatech.edu;cornell.edu;gatech.edu;berkeley.edu;unitn.it;zju.edu.cn;gatech.edu;gatech.edu", "position": "PhD student;PhD student;MS student;PhD student;PhD student;PhD student;PhD student;Undergrad student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nkong2024aligning,\ntitle={Aligning Large Language Models with Representation Editing: A Control Perspective},\nauthor={Lingkai Kong and Haorui Wang and Wenhao Mu and Yuanqi Du and Yuchen Zhuang and Yifei Zhou and Yue Song and Rongzhi Zhang and Kai Wang and Chao Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=yTTomSJsSW}\n}", "github": "", "reviewers": "XYhX;vtBo;F1xo;Xnva", "pdf_size": 1109597, "rating": "5;6;7;8", "confidence": "3;4;3;3", "soundness": "2;3;4;3", "novelty": "2;2;3;3", "presentation": "3;3;2;4", "wc_summary": "83;52;75;112", "wc_strengths": "60;75;72;83", "wc_weaknesses": "498;399;146;58", "wc_questions": "384;67;7;19", "wc_limitations": "99;5;4;71", "wc_review": "1124;598;304;343", "wc_reply_reviewers": "43;364;14;11", "wc_reply_authors": "94;420;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;1;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 80.5, 21.453437952924933 ], "wc_strengths_avg": [ 72.5, 8.261355820929152 ], "wc_weaknesses_avg": [ 275.25, 179.46778958910704 ], "wc_questions_avg": [ 119.25, 154.49332509852974 ], "wc_limitations_avg": [ 44.75, 41.45102531904368 ], "wc_review_avg": [ 592.25, 327.1103598176004 ], "wc_reply_reviewers_avg": [ 108.0, 148.32902615469436 ], "wc_reply_authors_avg": [ 128.5, 172.61735138739675 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.2581988897471611, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17978075635758413200&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "gatech.edu;gatech.edu;gatech.edu;cornell.edu;gatech.edu;berkeley.edu;unitn.it;zju.edu.cn;gatech.edu;gatech.edu", "author_num": 10, "aff_unique_index": "0;0;0;1;0;2;3;4;0;0", "aff_unique_norm": "Georgia Institute of Technology;Cornell University;University of California, Berkeley;University of Trento;Zhejiang University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.gatech.edu;https://www.cornell.edu;https://www.berkeley.edu;https://www.unitn.it;https://www.zju.edu.cn", "aff_unique_abbr": "Georgia Tech;Cornell;UC Berkeley;UniTN;ZJU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;0;0;0;0;0;1;2;0;0", "aff_country_unique": "United States;Italy;China" }, { "title": "On the Effects of Data Scale on UI Control Agents", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97433", "id": "yUEBXN3cvX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=yUEBXN3cvX", "openreview": "https://openreview.net/forum?id=yUEBXN3cvX", "poster": "/media/PosterPDFs/NeurIPS%202024/97433.png?t=1731089809.755786", "project": "", "author_site": "WEI Li, William Bishop, Alice Li, Christopher Rawles, Folawiyo Campbell-Ajala, Divya Tyamagundlu, Oriana Riva", "tldr": "", "abstract": "Autonomous agents that control user interfaces to accomplish human tasks are emerging. Leveraging LLMs to power such agents has been of special interest, but unless fine-tuned on human-collected task demonstrations, performance is still relatively low. In this work we study whether fine-tuning alone is a viable approach for building real-world UI control agents. To this end we collect and release a new dataset, AndroidControl, consisting of 15,283 demonstrations of everyday tasks with Android apps. Compared to existing datasets, each AndroidControl task instance includes both high and low-level human-generated instructions, allowing us to explore the level of task complexity an agent can handle. Moreover, AndroidControl is the most diverse computer control dataset to date, including 14,548 unique tasks over 833 Android apps, thus allowing us to conduct in-depth analysis of the model performance in and out of the domain of the training data. Using the dataset, we find that when tested in domain fine-tuned models outperform zero and few-shot baselines and scale in such a way that robust performance might feasibly be obtained simply by collecting more data. Out of domain, performance scales significantly more slowly and suggests that in particular for high-level tasks, fine-tuning on more data alone may be insufficient for achieving robust out-of-domain performance.", "keywords": "computer control agents;autonomous UI agent;LLMs;dataset", "primary_area": "", "supplementary_material": "/attachment/890eadf6f992f83774fb1735ab66ec259db7d9a9.pdf", "author": "Wei Li;William E Bishop;Alice Li;Christopher Rawles;Folawiyo Campbell-Ajala;Divya Tyamagundlu;Oriana Riva", "authorids": "~Wei_Li91;~William_E_Bishop1;~Alice_Li2;~Christopher_Rawles1;~Folawiyo_Campbell-Ajala1;~Divya_Tyamagundlu1;~Oriana_Riva3", "gender": "M;;F;M;F;;", "homepage": ";;;;https://www.linkedin.com/in/folawiyo-campbell-ajala-88091349;;", "dblp": ";;;352/5268;;;53/3712", "google_scholar": "A9AiOcMAAAAJ;;;https://scholar.google.com/citations?view_op=list_works;;Y27rs8YAAAAJ;Eo7-pZ4AAAAJ", "orcid": ";;;;;;", "linkedin": ";;alice-li-0a7b3b165;chris-rawles-50854826/;;divya-tyam-06ab102/;", "or_profile": "~Wei_Li91;~William_E_Bishop1;~Alice_Li2;~Christopher_Rawles1;~Folawiyo_Campbell-Ajala1;~Divya_Tyamagundlu1;~Oriana_Riva3", "aff": "Google;;Research, Google;Google;Google;Research, Google;Microsoft", "aff_domain": "google.com;;research.google.com;google.com;google.com;research.google.com;microsoft.com", "position": "Software Engineer;;Researcher;Researcher;Researcher;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nli2024on,\ntitle={On the Effects of Data Scale on {UI} Control Agents},\nauthor={Wei Li and William E Bishop and Alice Li and Christopher Rawles and Folawiyo Campbell-Ajala and Divya Tyamagundlu and Oriana Riva},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=yUEBXN3cvX}\n}", "github": "", "reviewers": "CboS;SNrC;Lq7f", "pdf_size": 3909994, "rating": "7;7;8", "confidence": "5;3;4", "wc_summary_and_contributions": "85;115;110", "wc_strengths": "61;50;48", "wc_improvement": "70;113;50", "wc_limitations": "1;243;8", "wc_correctness": "1;14;16", "wc_clarity": "1;151;9", "wc_relation_to_prior_work": "1;56;17", "wc_documentation": "1;46;1", "wc_additional_feedback": "1;1;1", "wc_review": "222;789;260", "wc_reply_reviewers": "10;18;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;0", "reply_authors": "1;1;1", "rating_avg": [ 7.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "wc_summary_and_contributions_avg": [ 103.33333333333333, 13.12334645668635 ], "wc_strengths_avg": [ 53.0, 5.715476066494082 ], "wc_improvement_avg": [ 77.66666666666667, 26.284765338288427 ], "wc_limitations_avg": [ 84.0, 112.46629124616258 ], "wc_correctness_avg": [ 10.333333333333334, 6.649979114420002 ], "wc_clarity_avg": [ 53.666666666666664, 68.90250761442253 ], "wc_relation_to_prior_work_avg": [ 24.666666666666668, 23.098821518760552 ], "wc_documentation_avg": [ 16.0, 21.213203435596427 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 423.6666666666667, 258.79507122217166 ], "wc_reply_reviewers_avg": [ 9.333333333333334, 7.363574011458175 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15745359532645814520&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "google.com;;research.google.com;google.com;google.com;research.google.com;microsoft.com", "author_num": 7, "aff_unique_index": "0;0;0;0;0;1", "aff_unique_norm": "Google;Microsoft", "aff_unique_dep": "Google;Microsoft Corporation", "aff_unique_url": "https://www.google.com;https://www.microsoft.com", "aff_unique_abbr": "Google;Microsoft", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Temporally Consistent Atmospheric Turbulence Mitigation with Neural Representations", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93017", "id": "yURca4wi2L", "proceeding": "", "pdf": "https://openreview.net/pdf?id=yURca4wi2L", "openreview": "https://openreview.net/forum?id=yURca4wi2L", "poster": "/media/PosterPDFs/NeurIPS%202024/93017.png?t=1733343635.5594852", "project": "", "author_site": "Haoming Cai, Jingxi Chen, Brandon Feng, Weiyun Jiang, Mingyang Xie, Kevin Zhang, Cornelia Fermuller, Yiannis Aloimonos, Ashok Veeraraghavan, Chris Metzler", "tldr": "", "abstract": "Atmospheric turbulence, caused by random fluctuations in the atmosphere's refractive index, introduces complex spatio-temporal distortions in imagery captured at long range. Video Atmospheric Turbulence Mitigation (ATM) aims to restore videos affected by these distortions. However, existing video ATM methods, both supervised and self-supervised, struggle to maintain temporally consistent mitigation across frames, leading to visually incoherent results. This limitation arises from the stochastic nature of atmospheric turbulence, which varies across space and time. Inspired by the observation that atmospheric turbulence induces high-frequency temporal variations, we propose ConVRT, a novel framework for consistent video restoration through turbulence. ConVRT introduces a neural video representation that explicitly decouples spatial and temporal information into a spatial content field and a temporal deformation field, enabling targeted regularization of the network's temporal representation capability. By leveraging the low-pass filtering properties of the regularized temporal representations, ConVRT effectively mitigates turbulence-induced temporal frequency variations and promotes temporal consistency. Furthermore, our training framework seamlessly integrates supervised pre-training on synthetic turbulence data with self-supervised learning on real-world videos, significantly improving the temporally consistent mitigation of ATM methods on diverse real-world data. More information can be found on our project page: https://convrt-2024.github.io/", "keywords": "Atmospheric Turbulence Mitigation", "primary_area": "machine_vision", "supplementary_material": "/attachment/7349d5429098cce355e5c0e065007015968bd4c3.zip", "author": "Haoming Cai;Jingxi Chen;Brandon Y. Feng;Weiyun Jiang;Mingyang Xie;Kevin Zhang;Cornelia Fermuller;Yiannis Aloimonos;Ashok Veeraraghavan;Christopher Metzler", "authorids": "~Haoming_Cai2;~Jingxi_Chen1;~Brandon_Y._Feng1;~Weiyun_Jiang1;~Mingyang_Xie1;~Kevin_Zhang3;~Cornelia_Fermuller3;~Yiannis_Aloimonos1;~Ashok_Veeraraghavan1;~Christopher_Metzler1", "gender": "M;M;M;M;M;F;M;;M;", "homepage": "https://www.haomingcai.com;https://codingrex.github.io/;;https://www.linkedin.com/in/mingyangx/;https://kevinwzhang.com;http://users.umiacs.umd.edu/users/fer/;http://www.prg.cs.umd.edu;https://computationalimaging.rice.edu/;https://www.cs.umd.edu/~metzler/;https://brandonyfeng.github.io/", "dblp": "271/0165;;277/0585;164/8513;;f/CorneliaFermuller;a/YiannisAloimonos;84/858;147/4828;284/2193", "google_scholar": "mePn76IAAAAJ;https://scholar.google.com/citations?view_op=list_works;fsOwLQgAAAAJ;37dscrUAAAAJ;Sm16yZ4AAAAJ;0gEOJSEAAAAJ;https://scholar.google.com/citations?hl=en;tI-oUmsAAAAJ;on7GFpYAAAAJ;VCeYRsYAAAAJ", "orcid": ";0000-0002-1953-8041;;;;0000-0003-2044-2386;;;;0000-0001-7003-9128", "linkedin": ";jingxi-chen-262b16202/;;mingyangx/;;cornelia-fermuller-594b855/;yiannis-aloimonos-6374865/;;;", "or_profile": "~Haoming_Cai2;~Jingxi_Chen1;~Weiyun_Jiang1;~Mingyang_Xie1;~Kevin_Zhang3;~Cornelia_Fermuller3;~Yiannis_Aloimonos1;~Ashok_Veeraraghavan1;~Christopher_Metzler1;~Brandon_Yushan_Feng1", "aff": "University of Maryland, College Park;University of Maryland, College Park;Rice University;University of Maryland, College Park;University of Maryland, College Park;University of Maryland, College Park;University of Maryland, College Park;William Marsh Rice University;University of Maryland, College Park;Massachusetts Institute of Technology", "aff_domain": "umd.edu;umd.edu;rice.edu;umd.edu;umd.edu;umd.edu;umd.edu;rice.edu;umd.edu;mit.edu", "position": "PhD student;PhD student;PhD student;PhD student;PhD student;Research Scientist;Full Professor;Full Professor;Assistant Professor;Postdoc", "bibtex": "@inproceedings{\ncai2024temporally,\ntitle={Temporally Consistent Atmospheric Turbulence Mitigation with Neural Representations},\nauthor={Haoming Cai and Jingxi Chen and Brandon Y. Feng and Weiyun Jiang and Mingyang Xie and Kevin Zhang and Cornelia Fermuller and Yiannis Aloimonos and Ashok Veeraraghavan and Christopher Metzler},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=yURca4wi2L}\n}", "github": "", "reviewers": "VLLH;7Q3b;EL8T;DNNj", "pdf_size": 20799406, "rating": "4;5;5;6", "confidence": "5;4;4;4", "soundness": "3;3;2;3", "novelty": "3;3;2;3", "presentation": "3;3;2;3", "wc_summary": "68;104;21;122", "wc_strengths": "65;66;11;102", "wc_weaknesses": "107;281;17;361", "wc_questions": "6;54;154;239", "wc_limitations": "6;43;3;1", "wc_review": "252;548;206;825", "wc_reply_reviewers": "31;55;32;362", "wc_reply_authors": "47;77;208;465", "reply_reviewers": "1;1;2;2", "reply_authors": "2;2;3;3", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 78.75, 38.596470045847454 ], "wc_strengths_avg": [ 61.0, 32.48845948948642 ], "wc_weaknesses_avg": [ 191.5, 136.31856073183872 ], "wc_questions_avg": [ 113.25, 90.12040556943805 ], "wc_limitations_avg": [ 13.25, 17.268106439329124 ], "wc_review_avg": [ 457.75, 249.36356490072882 ], "wc_reply_reviewers_avg": [ 120.0, 140.04820598636744 ], "wc_reply_authors_avg": [ 199.25, 164.94298257276665 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.816496580927726, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13108359592887712195&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "umd.edu;umd.edu;rice.edu;umd.edu;umd.edu;umd.edu;umd.edu;rice.edu;umd.edu;mit.edu", "author_num": 10, "aff_unique_index": "0;0;1;0;0;0;0;1;0;2", "aff_unique_norm": "University of Maryland;Rice University;Massachusetts Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www/umd.edu;https://www.rice.edu;https://web.mit.edu", "aff_unique_abbr": "UMD;Rice;MIT", "aff_campus_unique_index": "0;0;0;0;0;0;0", "aff_campus_unique": "College Park;", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Learning Bregman Divergences with Application to Robustness", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93016", "id": "yUckuDjAE0", "proceeding": "", "pdf": "https://openreview.net/pdf?id=yUckuDjAE0", "openreview": "https://openreview.net/forum?id=yUckuDjAE0", "poster": "/media/PosterPDFs/NeurIPS%202024/93016.png?t=1731754323.3845835", "project": "", "author_site": "Mohamed-Hicham LEGHETTAS, Markus P\u00fcschel", "tldr": "", "abstract": "We propose a novel and general method to learn Bregman divergences from raw high-dimensional data that measure similarity between images in pixel space. As a prototypical application, we learn divergences that consider real-world corruptions of images (e.g., blur) as close to the original and noisy perturbations as far, even if in $L^p$-distance the opposite holds. We also show that the learned Bregman divergence excels on datasets of human perceptual similarity judgment, suggesting its utility in a range of applications. We then define adversarial attacks by replacing the projected gradient descent (PGD) with the mirror descent associated with the learned Bregman divergence, and use them to improve the state-of-the-art in robustness through adversarial training for common image corruptions. In particular, for the contrast corruption that was found problematic in prior work we achieve an accuracy that exceeds the $L^p$- and the LPIPS-based adversarially trained neural networks by a margin of 27.16\\% on the CIFAR-10-C corruption data set.", "keywords": "Bregman divergence;similarity and distance learning;mirror descent;corruption robustness.", "primary_area": "machine_vision", "supplementary_material": "", "author": "Mohamed-Hicham LEGHETTAS;Markus P\u00fcschel", "authorids": "~Mohamed-Hicham_LEGHETTAS1;~Markus_P\u00fcschel1", "gender": "M;M", "homepage": "https://acl.inf.ethz.ch/people/hichaml/;https://acl.inf.ethz.ch/", "dblp": ";37/6355", "google_scholar": ";az9ZryAAAAAJ", "orcid": ";0000-0001-8834-8551", "linkedin": ";", "or_profile": "~Mohamed-Hicham_LEGHETTAS1;~Markus_P\u00fcschel1", "aff": "Department of Computer Science, ETHZ - ETH Zurich;Department of Computer Science, ETHZ - ETH Zurich", "aff_domain": "inf.ethz.ch;inf.ethz.ch", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nleghettas2024learning,\ntitle={Learning Bregman Divergences with Application to Robustness},\nauthor={Mohamed-Hicham LEGHETTAS and Markus P{\\\"u}schel},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=yUckuDjAE0}\n}", "github": "", "reviewers": "AVtS;DULG;zPfU", "pdf_size": 11098626, "rating": "4;5;7", "confidence": "4;4;4", "soundness": "2;4;3", "novelty": "1;2;3", "presentation": "3;4;3", "wc_summary": "57;120;97", "wc_strengths": "24;86;30", "wc_weaknesses": "279;161;3", "wc_questions": "105;135;383", "wc_limitations": "5;7;1", "wc_review": "470;509;514", "wc_reply_reviewers": "144;142;378", "wc_reply_authors": "547;417;239", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 5.333333333333333, 1.247219128924647 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.0, 0.816496580927726 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 91.33333333333333, 26.02989734047285 ], "wc_strengths_avg": [ 46.666666666666664, 27.920522121829233 ], "wc_weaknesses_avg": [ 147.66666666666666, 113.07028296103663 ], "wc_questions_avg": [ 207.66666666666666, 124.58285953087162 ], "wc_limitations_avg": [ 4.333333333333333, 2.494438257849294 ], "wc_review_avg": [ 497.6666666666667, 19.669491322575904 ], "wc_reply_reviewers_avg": [ 221.33333333333334, 110.78307131005572 ], "wc_reply_authors_avg": [ 401.0, 126.24843233350134 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:9cuSjqVBbhoJ:scholar.google.com/&scioq=Learning+Bregman+Divergences+with+Application+to+Robustness&hl=en&as_sdt=0,33", "gs_version_total": 2, "email": "inf.ethz.ch;inf.ethz.ch", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "ETH Zurich", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.ethz.ch", "aff_unique_abbr": "ETHZ", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Zurich", "aff_country_unique_index": "0;0", "aff_country_unique": "Switzerland" }, { "title": "Class Distribution Shifts in Zero-Shot Learning: Learning Robust Representations", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93015", "id": "yUqUBGioBG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=yUqUBGioBG", "openreview": "https://openreview.net/forum?id=yUqUBGioBG", "poster": "", "project": "", "author_site": "Yuli Slavutsky, Yuval Benjamini", "tldr": "", "abstract": "Zero-shot learning methods typically assume that the new, unseen classes encountered during deployment come from the same distribution as the the classes in the training set. However, real-world scenarios often involve class distribution shifts (e.g., in age or gender for person identification), posing challenges for zero-shot classifiers that rely on learned representations from training classes. In this work, we propose and analyze a model that assumes that the attribute responsible for the shift is unknown in advance. We show that in this setting, standard training may lead to non-robust representations. To mitigate this, we develop an algorithm for learning robust representations in which (a) synthetic data environments are constructed via hierarchical sampling, and (b) environment balancing penalization, inspired by out-of-distribution problems, is applied. We show that our algorithm improves generalization to diverse class distributions in both simulations and experiments on real-world datasets.", "keywords": "Zero-Shot Learning;Distribution Shift;Out of Distribution Generalization;Robust Representation Learning", "primary_area": "evaluation", "supplementary_material": "/attachment/98526b4d3d8fc8cf29cbd36b03e4a727d45f19a9.zip", "author": "Yuli Slavutsky;Yuval Benjamini", "authorids": "~Yuli_Slavutsky1;~Yuval_Benjamini1", "gender": ";M", "homepage": "https://yulisl.github.io/;https://sites.google.com/view/yuvalbenjamini/home", "dblp": "277/5961;182/2094.html", "google_scholar": "c61ibm0AAAAJ;UDTmds0AAAAJ", "orcid": "0000-0002-5898-3332;", "linkedin": "yuli-slavutsky/;", "or_profile": "~Yuli_Slavutsky1;~Yuval_Benjamini1", "aff": "The Hebrew University of Jerusalem;Hebrew University of Jerusalem", "aff_domain": "huji.ac.il;mail.huji.ac.il", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nslavutsky2024class,\ntitle={Class Distribution Shifts in Zero-Shot Learning: Learning Robust Representations},\nauthor={Yuli Slavutsky and Yuval Benjamini},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=yUqUBGioBG}\n}", "github": "", "reviewers": "X2Y7;LBt4;YoYU;sFfJ", "pdf_size": 2438498, "rating": "6;6;7;7", "confidence": "2;3;4;5", "soundness": "3;3;3;3", "novelty": "3;3;4;3", "presentation": "3;3;3;3", "wc_summary": "64;115;81;19", "wc_strengths": "42;147;114;5", "wc_weaknesses": "57;281;135;64", "wc_questions": "36;28;2;2", "wc_limitations": "8;7;1;1", "wc_review": "207;578;333;91", "wc_reply_reviewers": "0;16;0;0", "wc_reply_authors": "0;129;0;0", "reply_reviewers": "0;1;0;0", "reply_authors": "1;2;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 69.75, 34.57871455100666 ], "wc_strengths_avg": [ 77.0, 56.298312585724986 ], "wc_weaknesses_avg": [ 134.25, 90.0538033622123 ], "wc_questions_avg": [ 17.0, 15.264337522473747 ], "wc_limitations_avg": [ 4.25, 3.2691742076555053 ], "wc_review_avg": [ 302.25, 180.75034578113537 ], "wc_reply_reviewers_avg": [ 4.0, 6.928203230275509 ], "wc_reply_authors_avg": [ 32.25, 55.858638544096294 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.8944271909999159, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:jsfH4BPT334J:scholar.google.com/&scioq=Class+Distribution+Shifts+in+Zero-Shot+Learning:+Learning+Robust+Representations&hl=en&as_sdt=0,5", "gs_version_total": 5, "email": "huji.ac.il;mail.huji.ac.il", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Hebrew University of Jerusalem", "aff_unique_dep": "", "aff_unique_url": "https://www.huji.ac.il", "aff_unique_abbr": "HUJI", "aff_campus_unique_index": "1", "aff_campus_unique": ";Jerusalem", "aff_country_unique_index": "0;0", "aff_country_unique": "Israel" }, { "title": "MAmmoTH2: Scaling Instructions from the Web", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93014", "id": "yVu5dnPlqA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=yVu5dnPlqA", "openreview": "https://openreview.net/forum?id=yVu5dnPlqA", "poster": "", "project": "", "author_site": "Xiang Yue, Tianyu Zheng, Ge Zhang, Wenhu Chen", "tldr": "", "abstract": "Instruction tuning improves the reasoning abilities of large language models (LLMs), with data quality and scalability being the crucial factors. Most instruction tuning data come from human crowd-sourcing or GPT-4 distillation. We propose a paradigm to efficiently harvest 10 million naturally existing instruction data from the pre-training web corpus to enhance LLM reasoning. Our approach involves (1) recalling relevant documents, (2) extracting instruction-response pairs, and (3) refining the extracted pairs using open-source LLMs. Fine-tuning base LLMs on this dataset, we build MAmmoTH2 models, which significantly boost performance on reasoning benchmarks. Notably, MAmmoTH2-7B\u2019s (Mistral) performance increases from 11% to 36.7% on MATH and from 36% to 68.4% on GSM8K without training on any in-domain data. Further training MAmmoTH2 on public instruction tuning datasets yields MAmmoTH2-Plus, achieving state-of-the-art performance on several reasoning and chatbot benchmarks. Our work demonstrates how to harvest large-scale, high-quality instruction data without costly human annotation or GPT-4 distillation, providing a new paradigm for building better instruction tuning data.", "keywords": "large language models;instruction tuning;reasoning", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Xiang Yue;Tianyu Zheng;Ge Zhang;Wenhu Chen", "authorids": "~Xiang_Yue1;~Tianyu_Zheng1;~Ge_Zhang5;~Wenhu_Chen3", "gender": ";;M;", "homepage": ";;;", "dblp": ";;;", "google_scholar": ";;qyTrq4kAAAAJ;", "orcid": ";;;", "linkedin": ";;ge-zhang-792797169/;", "or_profile": "~Xiang_Yue1;~Tianyu_Zheng1;~Ge_Zhang5;~Wenhu_Chen3", "aff": ";;University of Waterloo;", "aff_domain": ";;cs.uwaterloo.ca;", "position": ";;PhD student;", "bibtex": "@inproceedings{\nyue2024mammoth,\ntitle={{MA}mmo{TH}2: Scaling Instructions from the Web},\nauthor={Xiang Yue and Tianyu Zheng and Ge Zhang and Wenhu Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=yVu5dnPlqA}\n}", "github": "", "reviewers": "Mpo3;k2fV;yEcJ;Jcxm", "pdf_size": 1798194, "rating": "5;7;7;8", "confidence": "4;5;3;4", "soundness": "3;4;3;4", "novelty": "2;4;3;4", "presentation": "3;4;3;4", "wc_summary": "75;101;64;206", "wc_strengths": "36;65;68;147", "wc_weaknesses": "59;396;27;340", "wc_questions": "63;2;36;76", "wc_limitations": "53;21;15;21", "wc_review": "286;585;210;790", "wc_reply_reviewers": "9;302;17;291", "wc_reply_authors": "0;900;0;167", "reply_reviewers": "1;3;1;2", "reply_authors": "1;4;1;2", "rating_avg": [ 6.75, 1.0897247358851685 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 111.5, 56.189411813970786 ], "wc_strengths_avg": [ 79.0, 41.20072814890533 ], "wc_weaknesses_avg": [ 205.5, 164.09219969273371 ], "wc_questions_avg": [ 44.25, 28.340562803162538 ], "wc_limitations_avg": [ 27.5, 14.9248115565993 ], "wc_review_avg": [ 467.75, 232.94889460995518 ], "wc_reply_reviewers_avg": [ 154.75, 141.83154620887413 ], "wc_reply_authors_avg": [ 266.75, 371.9095152049756 ], "reply_reviewers_avg": [ 1.75, 0.82915619758885 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 29, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 75, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5101689991839690215&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 3, "email": ";;cs.uwaterloo.ca;", "author_num": 4, "aff_unique_index": "0", "aff_unique_norm": "University of Waterloo", "aff_unique_dep": "", "aff_unique_url": "https://uwaterloo.ca", "aff_unique_abbr": "UW", "aff_country_unique_index": "0", "aff_country_unique": "Canada" }, { "title": "Excluding the Irrelevant: Focusing Reinforcement Learning through Continuous Action Masking", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93013", "id": "yVzWlFhpRW", "proceeding": "", "pdf": "https://openreview.net/pdf?id=yVzWlFhpRW", "openreview": "https://openreview.net/forum?id=yVzWlFhpRW", "poster": "/media/PosterPDFs/NeurIPS%202024/93013.png?t=1730313065.473966", "project": "", "author_site": "Roland Stolz, Hanna Krasowski, Jakob Thumm, Michael Eichelbeck, Philipp Gassert, Matthias Althoff", "tldr": "", "abstract": "Continuous action spaces in reinforcement learning (RL) are commonly defined as multidimensional intervals. While intervals usually reflect the action boundaries for tasks well, they can be challenging for learning because the typically large global action space leads to frequent exploration of irrelevant actions. Yet, little task knowledge can be sufficient to identify significantly smaller state-specific sets of relevant actions. Focusing learning on these relevant actions can significantly improve training efficiency and effectiveness. In this paper, we propose to focus learning on the set of relevant actions and introduce three continuous action masking methods for exactly mapping the action space to the state-dependent set of relevant actions. Thus, our methods ensure that only relevant actions are executed, enhancing the predictability of the RL agent and enabling its use in safety-critical applications. We further derive the implications of the proposed methods on the policy gradient. Using proximal policy optimization ( PPO), we evaluate our methods on four control tasks, where the relevant action set is computed based on the system dynamics and a relevant state set. Our experiments show that the three action masking methods achieve higher final rewards and converge faster than the baseline without action masking.", "keywords": "Reinforcement Learning;Policy Gradient;Action Masking;Robotics;Continuous Actions", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/f1d4803a68bdb2523684be6a035fa100b8f5e177.zip", "author": "Roland Stolz;Hanna Krasowski;Jakob Thumm;Michael Eichelbeck;Philipp Gassert;Matthias Althoff", "authorids": "~Roland_Stolz1;~Hanna_Krasowski1;~Jakob_Thumm1;~Michael_Eichelbeck1;~Philipp_Gassert1;~Matthias_Althoff1", "gender": "M;F;M;;;M", "homepage": ";https://hanna.krasowski.io;https://jakob-thumm.com;https://www.ce.cit.tum.de/air/people/michael-eichelbeck-msc/;;https://www.ce.cit.tum.de/cps/members/prof-dr-ing-matthias-althoff/", "dblp": ";;;;218/5804;67/1387", "google_scholar": ";BNUx83UAAAAJ;https://scholar.google.de/citations?user=sBm3vkcAAAAJ;;;https://scholar.google.com.tw/citations?user=E3zazJAAAAAJ", "orcid": "0009-0002-0653-572X;0000-0002-6730-3802;0000-0003-0282-2908;;;0000-0003-3733-842X", "linkedin": ";;;;philipp-gassert-01a633129;", "or_profile": "~Roland_Stolz1;~Hanna_Krasowski1;~Jakob_Thumm1;~Michael_Eichelbeck1;~Philipp_Gassert1;~Matthias_Althoff1", "aff": "Technische Universit\u00e4t M\u00fcnchen;Technical University of Munich;Stanford University;Technische Universit\u00e4t M\u00fcnchen;Technical University Munich;Technische Universit\u00e4t M\u00fcnchen", "aff_domain": "tum.de;tum.de;stanford.edu;tum.de;tum.de;tum.de", "position": "PhD student;PhD student;Intern;PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nstolz2024excluding,\ntitle={Excluding the Irrelevant: Focusing Reinforcement Learning through Continuous Action Masking},\nauthor={Roland Stolz and Hanna Krasowski and Jakob Thumm and Michael Eichelbeck and Philipp Gassert and Matthias Althoff},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=yVzWlFhpRW}\n}", "github": "", "reviewers": "Ke6H;T9Vb;PubC;8567", "pdf_size": 1534678, "rating": "5;6;6;6", "confidence": "2;3;4;4", "soundness": "3;3;3;3", "novelty": "3;2;4;2", "presentation": "3;3;4;3", "wc_summary": "82;62;42;78", "wc_strengths": "81;36;65;91", "wc_weaknesses": "138;63;296;531", "wc_questions": "2;95;392;3", "wc_limitations": "1;109;24;20", "wc_review": "304;365;819;723", "wc_reply_reviewers": "160;31;640;0", "wc_reply_authors": "868;182;1125;0", "reply_reviewers": "3;1;2;0", "reply_authors": "4;2;3;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 66.0, 15.748015748023622 ], "wc_strengths_avg": [ 68.25, 20.801141795584204 ], "wc_weaknesses_avg": [ 257.0, 179.1605425309937 ], "wc_questions_avg": [ 123.0, 159.8327250596698 ], "wc_limitations_avg": [ 38.5, 41.620307543313515 ], "wc_review_avg": [ 552.75, 221.92383265435913 ], "wc_reply_reviewers_avg": [ 207.75, 256.6713608878092 ], "wc_reply_authors_avg": [ 543.75, 466.239410067403 ], "reply_reviewers_avg": [ 1.5, 1.118033988749895 ], "reply_authors_avg": [ 2.5, 1.118033988749895 ], "replies_avg": [ 30, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.8703882797784891, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8348832851609767927&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 7, "email": "tum.de;tum.de;stanford.edu;tum.de;tum.de;tum.de", "author_num": 6, "aff_unique_index": "0;1;2;0;1;0", "aff_unique_norm": "Technische Universit\u00e4t M\u00fcnchen;Technical University of Munich;Stanford University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tum.de;https://www.tum.de;https://www.stanford.edu", "aff_unique_abbr": "TUM;TUM;Stanford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "Germany;United States" }, { "title": "Accelerating ERM for data-driven algorithm design using output-sensitive techniques", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93012", "id": "yW3tlSwusb", "proceeding": "", "pdf": "https://openreview.net/pdf?id=yW3tlSwusb", "openreview": "https://openreview.net/forum?id=yW3tlSwusb", "poster": "/media/PosterPDFs/NeurIPS%202024/93012.png?t=1733421330.4641523", "project": "", "author_site": "Maria-Florina Balcan, Christopher Seiler, Dravyansh Sharma", "tldr": "", "abstract": "Data-driven algorithm design is a promising, learning-based approach for beyond worst-case analysis of algorithms with tunable parameters. An important open problem is the design of computationally efficient data-driven algorithms for combinatorial algorithm families with multiple parameters. As one fixes the problem instance and varies the parameters, the \u201cdual\u201d loss function typically has a piecewise-decomposable structure, i.e. is well-behaved except at certain sharp transition boundaries. Motivated by prior empirical work, we initiate the study of techniques to develop efficient ERM learning algorithms for data-driven algorithm design by enumerating the pieces of the sum dual loss functions for a collection of problem instances. The running time of our approach scales with the actual number of pieces that appear as opposed to worst case upper bounds on the number of pieces. Our approach involves two novel ingredients \u2013 an output-sensitive algorithm for enumerating polytopes induced by a set of hyperplanes using tools from computational geometry, and an execution graph which compactly represents all the states the algorithm could attain for all possible parameter values. We illustrate our techniques by giving algorithms for pricing problems, linkage-based clustering and dynamic-programming based sequence alignment.", "keywords": "Learning Theory;Data-driven Algorithm Design", "primary_area": "learning_theory", "supplementary_material": "", "author": "Maria Florina Balcan;Christopher Seiler;Dravyansh Sharma", "authorids": "~Maria_Florina_Balcan1;~Christopher_Seiler1;~Dravyansh_Sharma1", "gender": "M;M;F", "homepage": ";http://www.cs.cmu.edu/~dravyans/;http://www.cs.cmu.edu/~ninamf/", "dblp": ";164/7289;b/MariaFlorinaBalcan", "google_scholar": ";;https://scholar.google.com.tw/citations?user=LWlN_BUAAAAJ", "orcid": ";;", "linkedin": "cdseiler/;;", "or_profile": "~Christopher_Seiler1;~Dravyansh_Sharma1;~Nina_Balcan1", "aff": ";Carnegie Mellon University;Carnegie Mellon University", "aff_domain": ";cmu.edu;cmu.edu", "position": ";PhD student;Full Professor", "bibtex": "@inproceedings{\nbalcan2024accelerating,\ntitle={Accelerating {ERM} for data-driven algorithm design using output-sensitive techniques},\nauthor={Maria Florina Balcan and Christopher Seiler and Dravyansh Sharma},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=yW3tlSwusb}\n}", "github": "", "reviewers": "YVeC;swPv;gq34", "pdf_size": 804834, "rating": "5;6;7", "confidence": "2;4;2", "soundness": "2;3;3", "novelty": "3;2;3", "presentation": "2;2;3", "wc_summary": "188;453;55", "wc_strengths": "45;32;75", "wc_weaknesses": "35;98;32", "wc_questions": "56;1;3", "wc_limitations": "1;1;1", "wc_review": "325;585;166", "wc_reply_reviewers": "17;14;10", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 2.6666666666666665, 0.9428090415820634 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 232.0, 165.43478070426022 ], "wc_strengths_avg": [ 50.666666666666664, 18.00617178142601 ], "wc_weaknesses_avg": [ 55.0, 30.430248109405877 ], "wc_questions_avg": [ 20.0, 25.468935326524086 ], "wc_limitations_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 358.6666666666667, 172.70462903144457 ], "wc_reply_reviewers_avg": [ 13.666666666666666, 2.8674417556808756 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12665341377820111099&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": ";cmu.edu;cmu.edu", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "SeafloorAI: A Large-scale Vision-Language Dataset for Seafloor Geological Survey", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97432", "id": "yWMMKm81vZ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=yWMMKm81vZ", "openreview": "https://openreview.net/forum?id=yWMMKm81vZ", "poster": "/media/PosterPDFs/NeurIPS%202024/97432.png?t=1733993295.3878152", "project": "", "author_site": "Kien Nguyen, Fengchun Qiao, Arthur Trembanis, Xi Peng", "tldr": "", "abstract": "A major obstacle to the advancements of machine learning models in marine science, particularly in sonar imagery analysis, is the scarcity of AI-ready datasets.\n While there have been efforts to make AI-ready sonar image dataset publicly available, they suffer from limitations in terms of environment setting and scale.\n To bridge this gap, we introduce $\\texttt{SeafloorAI}$, the first extensive AI-ready datasets for seafloor mapping across 5 geological layers that is curated in collaboration with marine scientists. We further extend the dataset to $\\texttt{SeafloorGenAI}$ by incorporating the language component in order to facilitate the development of both $\\textit{vision}$- and $\\textit{language}$-capable machine learning models for sonar imagery.\n The dataset consists of 62 geo-distributed data surveys spanning 17,300 square kilometers, with 696K sonar images, 827K annotated segmentation masks, 696K detailed language descriptions and approximately 7M question-answer pairs. \n By making our data processing source code publicly available, we aim to engage the marine science community to enrich the data pool and inspire the machine learning community to develop more robust models. \n This collaborative approach will enhance the capabilities and applications of our datasets within both fields.", "keywords": "Underwater Geological Survey;Sonar Imagery Analysis;Vision-Language Model", "primary_area": "", "supplementary_material": "/attachment/a2a68a71115944f0d6598f3a4c0750b5c3b09973.pdf", "author": "Kien X Nguyen;Fengchun Qiao;Arthur Trembanis;Xi Peng", "authorids": "~Kien_X_Nguyen1;~Fengchun_Qiao1;~Arthur_Trembanis1;~Xi_Peng1", "gender": "M;;M;Not Specified", "homepage": "https://nyquixt.github.io/;https://joffery.github.io/joffery/;;https://deep-real.github.io/dr_xipeng.html", "dblp": ";215/3373;;149/7762-5", "google_scholar": "AIBgKwoAAAAJ;BY6zd_0AAAAJ;ksrqUkcAAAAJ;DWw4v0kAAAAJ", "orcid": ";0000-0003-2714-2036;;0000-0002-7772-001X", "linkedin": ";fengchun-qiao-9148ba157/;;xi-peng-74b540b6/", "or_profile": "~Kien_X_Nguyen1;~Fengchun_Qiao1;~Arthur_Trembanis1;~Xi_Peng1", "aff": "University of Delaware;University of Delaware;University of Delaware;University of Delaware", "aff_domain": "udel.edu;udel.edu;udel.edu;udel.edu", "position": "PhD student;PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nnguyen2024seafloorai,\ntitle={Seafloor{AI}: A Large-scale Vision-Language Dataset for Seafloor Geological Survey},\nauthor={Kien X Nguyen and Fengchun Qiao and Arthur Trembanis and Xi Peng},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=yWMMKm81vZ}\n}", "github": "", "reviewers": "AnUV;KQU4;47bb;X3ie", "pdf_size": 1354486, "rating": "7;7;7;8", "confidence": "5;2;3;4", "wc_summary_and_contributions": "135;60;106;253", "wc_strengths": "162;115;46;315", "wc_improvement": "125;320;73;279", "wc_limitations": "31;108;1;114", "wc_correctness": "29;29;1;292", "wc_clarity": "16;16;1;312", "wc_relation_to_prior_work": "31;24;1;86", "wc_documentation": "53;76;1;93", "wc_additional_feedback": "1;1;1;1", "wc_review": "583;749;231;1745", "wc_reply_reviewers": "86;126;350;111", "wc_reply_authors": "165;164;807;86", "reply_reviewers": "1;1;2;1", "reply_authors": "3;4;3;4", "rating_avg": [ 7.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "wc_summary_and_contributions_avg": [ 138.5, 71.31093885232475 ], "wc_strengths_avg": [ 159.5, 98.80409910524968 ], "wc_improvement_avg": [ 199.25, 102.94749875543359 ], "wc_limitations_avg": [ 63.5, 48.71601379423403 ], "wc_correctness_avg": [ 87.75, 118.47652721108938 ], "wc_clarity_avg": [ 86.25, 130.48060200658182 ], "wc_relation_to_prior_work_avg": [ 35.5, 31.196954979612993 ], "wc_documentation_avg": [ 55.75, 34.65093793824346 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 827.0, 562.0409237769079 ], "wc_reply_reviewers_avg": [ 168.25, 105.90178232683338 ], "wc_reply_authors_avg": [ 305.5, 291.30954326969794 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.5, 0.5 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.2581988897471611, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:XunBJxvzVNAJ:scholar.google.com/&scioq=SeafloorAI:+A+Large-scale+Vision-Language+Dataset+for+Seafloor+Geological+Survey&hl=en&as_sdt=0,5", "gs_version_total": 5, "email": "udel.edu;udel.edu;udel.edu;udel.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Delaware", "aff_unique_dep": "", "aff_unique_url": "https://www.udel.edu", "aff_unique_abbr": "UD", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Is Mamba Compatible with Trajectory Optimization in Offline Reinforcement Learning?", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93011", "id": "yWSxjlFsmX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=yWSxjlFsmX", "openreview": "https://openreview.net/forum?id=yWSxjlFsmX", "poster": "/media/PosterPDFs/NeurIPS%202024/93011.png?t=1731499171.9237905", "project": "", "author_site": "Yang Dai, Oubo Ma, Longfei Zhang, Xingxing Liang, Shengchao Hu, Mengzhu Wang, Shouling Ji, Jincai Huang, Li Shen", "tldr": "", "abstract": "Transformer-based trajectory optimization methods have demonstrated exceptional performance in offline Reinforcement Learning (offline RL). Yet, it poses challenges due to substantial parameter size and limited scalability, which is particularly critical in sequential decision-making scenarios where resources are constrained such as in robots and drones with limited computational power. Mamba, a promising new linear-time sequence model, offers performance on par with transformers while delivering substantially fewer parameters on long sequences. As it remains unclear whether Mamba is compatible with trajectory optimization, this work aims to conduct comprehensive experiments to explore the potential of Decision Mamba (dubbed DeMa) in offline RL from the aspect of data structures and essential components with the following insights: (1) Long sequences impose a significant computational burden without contributing to performance improvements since DeMa's focus on sequences diminishes approximately exponentially. Consequently, we introduce a Transformer-like DeMa as opposed to an RNN-like DeMa. (2) For the components of DeMa, we identify the hidden attention mechanism as a critical factor in its success, which can also work well with other residual structures and does not require position embedding. Extensive evaluations demonstrate that our specially designed DeMa is compatible with trajectory optimization and surpasses previous methods, outperforming Decision Transformer (DT) with higher performance while using 30\\% fewer parameters in Atari, and exceeding DT with only a quarter of the parameters in MuJoCo.", "keywords": "Offline RL; Trajectory Optimization; Mamba", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Yang Dai;Oubo Ma;Longfei Zhang;Xingxing Liang;Shengchao Hu;Mengzhu Wang;Shouling Ji;Jincai Huang;Li Shen", "authorids": "~Yang_Dai2;~Oubo_Ma1;~Longfei_Zhang3;~Xingxing_Liang1;~Shengchao_Hu1;~Mengzhu_Wang1;~Shouling_Ji1;~Jincai_Huang1;~Li_Shen1", "gender": ";M;M;M;;;M;M;M", "homepage": "https://github.com/AndssY?tab=repositories;https://maoubo.github.io;https://github.com/longfeizhang617;;;;https://nesa.zju.edu.cn/;;https://sites.google.com/site/mathshenli/home", "dblp": ";;;;;;07/8388;;91/3680-8", "google_scholar": ";https://scholar.google.com.hk/citations?user=1UpZ_CkAAAAJ;;;;;https://scholar.google.com.vn/citations?hl=en;;yVhgENIAAAAJ", "orcid": "0009-0001-4813-8274;;;0000-0002-3594-2167;;;0000-0003-4268-372X;0000-0003-2937-3065;", "linkedin": ";;;;;;;;", "or_profile": "~Yang_Dai2;~Oubo_Ma1;~Longfei_Zhang3;~Xingxing_Liang1;~Shengchao_Hu1;~Mengzhu_Wang1;~Shouling_Ji1;~Jincai_Huang1;~Li_Shen1", "aff": "National University of Defense Technology;Zhejiang University;National University of Defense Technology;;;;Zhejiang University;National University of Defense Technology;JD Explore Academy", "aff_domain": "nudt.edu.cn;zju.edu.cn;nudt.edu.cn;;;;zju.edu.cn;nudt.edu.cn;jd.com", "position": "PhD student;PhD student;PhD student;;;;Full Professor;Full Professor;Researcher", "bibtex": "@inproceedings{\ndai2024is,\ntitle={Is Mamba Compatible with Trajectory Optimization in Offline Reinforcement Learning?},\nauthor={Yang Dai and Oubo Ma and Longfei Zhang and Xingxing Liang and Shengchao Hu and Mengzhu Wang and Shouling Ji and Jincai Huang and Li Shen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=yWSxjlFsmX}\n}", "github": "", "reviewers": "BojN;85kG;qC8w", "pdf_size": 0, "rating": "5;6;6", "confidence": "3;3;3", "soundness": "2;3;3", "novelty": "2;3;2", "presentation": "3;2;3", "wc_summary": "48;56;34", "wc_strengths": "34;34;50", "wc_weaknesses": "45;143;76", "wc_questions": "18;2;3", "wc_limitations": "9;30;3", "wc_review": "154;265;166", "wc_reply_reviewers": "16;27;21", "wc_reply_authors": "122;34;86", "reply_reviewers": "1;1;1", "reply_authors": "3;2;3", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 46.0, 9.092121131323903 ], "wc_strengths_avg": [ 39.333333333333336, 7.542472332656507 ], "wc_weaknesses_avg": [ 88.0, 40.89824772122476 ], "wc_questions_avg": [ 7.666666666666667, 7.318166133366716 ], "wc_limitations_avg": [ 14.0, 11.575836902790225 ], "wc_review_avg": [ 195.0, 49.73932046178355 ], "wc_reply_reviewers_avg": [ 21.333333333333332, 4.496912521077347 ], "wc_reply_authors_avg": [ 80.66666666666667, 36.12324582438418 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.6666666666666665, 0.4714045207910317 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15404112556803172543&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "nudt.edu.cn;zju.edu.cn;nudt.edu.cn;;;;zju.edu.cn;nudt.edu.cn;jd.com", "author_num": 9, "aff_unique_index": "0;1;0;1;0;2", "aff_unique_norm": "National University of Defense Technology;Zhejiang University;JD", "aff_unique_dep": ";;JD Explore Academy", "aff_unique_url": "http://www.nudt.edu.cn/;https://www.zju.edu.cn;", "aff_unique_abbr": "NUDT;ZJU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China;" }, { "title": "User-Creator Feature Polarization in Recommender Systems with Dual Influence", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93010", "id": "yWq89o19wf", "proceeding": "", "pdf": "https://openreview.net/pdf?id=yWq89o19wf", "openreview": "https://openreview.net/forum?id=yWq89o19wf", "poster": "/media/PosterPDFs/NeurIPS%202024/93010.png?t=1732068090.9892619", "project": "", "author_site": "Tao Lin, Kun Jin, Andrew Estornell, Xiaoying Zhang, Yiling Chen, Yang Liu", "tldr": "", "abstract": "Recommender systems serve the dual purpose of presenting relevant content to users and helping content creators reach their target audience. The dual nature of these systems naturally influences both users and creators: users' preferences are affected by the items they are recommended, while creators may be incentivized to alter their content to attract more users. We define a model, called user-creator feature dynamics, to capture the dual influence of recommender systems. We prove that a recommender system with dual influence is guaranteed to polarize, causing diversity loss in the system. We then investigate, both theoretically and empirically, approaches for mitigating polarization and promoting diversity in recommender systems. Unexpectedly, we find that common diversity-promoting approaches do not work in the presence of dual influence, while relevancy-optimizing methods like top-$k$ truncation can prevent polarization and improve diversity of the system.", "keywords": "recommender systems;performativity;preference dynamics;diversity;polarization", "primary_area": "algorithmic_game_theory", "supplementary_material": "/attachment/aec25873925b9e309d00954f94bd9e571d640026.zip", "author": "Tao Lin;Kun Jin;Andrew Estornell;Xiaoying Zhang;Yiling Chen;Yang Liu", "authorids": "~Tao_Lin2;~Kun_Jin1;~Andrew_Estornell1;~Xiaoying_Zhang3;~Yiling_Chen1;~Yang_Liu3", "gender": "M;M;;F;F;M", "homepage": "https://tao-l.github.io/;;;https://github.com/Xiaoyinggit;https://yiling.seas.harvard.edu/;http://www.yliuu.com", "dblp": "64/4492-13;;;46/7725;72/3762-1;51/3710-18", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?authuser=1;;lwKg4C4AAAAJ;x_7xA0UAAAAJ;jKrIVCIAAAAJ", "orcid": ";0000-0002-5293-2745;;;;0000-0001-8420-6011", "linkedin": ";kun-jin-1426b1126/;;;;", "or_profile": "~Tao_Lin2;~Kun_Jin1;~Andrew_Estornell1;~Xiaoying_Zhang3;~Yiling_Chen1;~Yang_Liu3", "aff": "Harvard University;ByteDance Inc.;;ByteDance AILab;Harvard University;University of California, Santa Cruz", "aff_domain": "g.harvard.edu;bytedance.com;;bytedance.com;fas.harvard.edu;ucsc.edu", "position": "PhD student;Researcher;;Researcher;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nlin2024usercreator,\ntitle={User-Creator Feature Polarization in Recommender Systems with Dual Influence},\nauthor={Tao Lin and Kun Jin and Andrew Estornell and Xiaoying Zhang and Yiling Chen and Yang Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=yWq89o19wf}\n}", "github": "", "reviewers": "EsTG;JScD;Gmcw", "pdf_size": 2704430, "rating": "6;7;7", "confidence": "4;5;3", "soundness": "2;4;3", "novelty": "3;4;3", "presentation": "3;2;3", "wc_summary": "104;122;95", "wc_strengths": "92;112;39", "wc_weaknesses": "224;151;238", "wc_questions": "31;80;113", "wc_limitations": "6;10;1", "wc_review": "457;475;486", "wc_reply_reviewers": "365;15;33", "wc_reply_authors": "406;0;0", "reply_reviewers": "3;1;1", "reply_authors": "4;1;1", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 107.0, 11.224972160321824 ], "wc_strengths_avg": [ 81.0, 30.80043289739069 ], "wc_weaknesses_avg": [ 204.33333333333334, 38.14300576631172 ], "wc_questions_avg": [ 74.66666666666667, 33.68811198166037 ], "wc_limitations_avg": [ 5.666666666666667, 3.6817870057290873 ], "wc_review_avg": [ 472.6666666666667, 11.953614051360738 ], "wc_reply_reviewers_avg": [ 137.66666666666666, 160.91681771095966 ], "wc_reply_authors_avg": [ 135.33333333333334, 191.39023544115884 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.9428090415820634 ], "reply_authors_avg": [ 2.0, 1.4142135623730951 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:rHk3IaL1QeQJ:scholar.google.com/&scioq=User-Creator+Feature+Polarization+in+Recommender+Systems+with+Dual+Influence&hl=en&as_sdt=0,44", "gs_version_total": 4, "email": "g.harvard.edu;bytedance.com;;bytedance.com;fas.harvard.edu;ucsc.edu", "author_num": 6, "aff_unique_index": "0;1;1;0;2", "aff_unique_norm": "Harvard University;ByteDance;University of California, Santa Cruz", "aff_unique_dep": ";;", "aff_unique_url": "https://www.harvard.edu;https://www.bytedance.com;https://www.ucsc.edu", "aff_unique_abbr": "Harvard;ByteDance;UCSC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Santa Cruz", "aff_country_unique_index": "0;1;1;0;0", "aff_country_unique": "United States;China" }, { "title": "Controlled maximal variability along with reliable performance in recurrent neural networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93009", "id": "yXW2dCTQdi", "proceeding": "", "pdf": "https://openreview.net/pdf?id=yXW2dCTQdi", "openreview": "https://openreview.net/forum?id=yXW2dCTQdi", "poster": "", "project": "", "author_site": "Chiara Mastrogiuseppe, Ruben Moreno Bote", "tldr": "", "abstract": "Natural behaviors, even stereotyped ones, exhibit variability. Despite its role in exploring and learning, the function and neural basis of this variability is still not well understood. Given the coupling between neural activity and behavior, we ask what type of neural variability does not compromise behavioral performance. While previous studies typically curtail variability to allow for high task performance in neural networks, our approach takes the reversed perspective. We investigate how to generate maximal neural variability while at the same time having high network performance. \nTo do so, we extend to neural activity the maximum occupancy principle (MOP) developed for behavior, and refer to this new neural principle as NeuroMOP. NeuroMOP posits that the goal of the nervous system is to maximize future action-state entropy, a reward-free, intrinsic motivation that entails creating all possible activity patterns while avoiding terminal or dangerous ones.\nWe show that this goal can be achieved through a neural network controller that injects currents (actions) into a recurrent neural network of fixed random weights to maximize future cumulative action-state entropy. \nHigh activity variability can be induced while adhering to an energy constraint or while avoiding terminal states defined by specific neurons' activities, also in a context-dependent manner. The network solves these tasks by flexibly switching between stochastic and deterministic modes as needed and projecting noise onto a null space. Based on future maximum entropy production, NeuroMOP contributes to a novel theory of neural variability that reconciles stochastic and deterministic behaviors within a single framework.", "keywords": "Reinforcement Learning;Computational Neuroscience;Neural Variability;Recurrent Neural Network;Maximum Occupancy Principle;Maximum Entropy Reinforcement Learning", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "/attachment/8f8c00e81f8a6216c0d6ac14a152a196d05ae8ee.zip", "author": "Chiara Mastrogiuseppe;Rub\u00e9n Moreno-Bote", "authorids": "~Chiara_Mastrogiuseppe1;~Rub\u00e9n_Moreno-Bote1", "gender": "F;M", "homepage": "https://www.upf.edu/web/tcn/people;https://www.upf.edu/web/tcn", "dblp": "https://dblp.org/rec/journals/corr/abs-2104-06339;67/908", "google_scholar": ";https://scholar.google.es/citations?user=dk4k91QAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Chiara_Mastrogiuseppe1;~Rub\u00e9n_Moreno-Bote1", "aff": "Universitat Pompeu Fabra;Universitat Pompeu Fabra", "aff_domain": "upf.edu;upf.edu", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nmastrogiuseppe2024controlled,\ntitle={Controlled maximal variability along with reliable performance in recurrent neural networks},\nauthor={Chiara Mastrogiuseppe and Rub{\\'e}n Moreno-Bote},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=yXW2dCTQdi}\n}", "github": "", "reviewers": "ivma;Hyea;TEkX;QFbP", "pdf_size": 5922256, "rating": "5;6;7;7", "confidence": "3;4;3;4", "soundness": "3;3;3;4", "novelty": "3;2;2;3", "presentation": "3;4;3;4", "wc_summary": "130;180;89;206", "wc_strengths": "34;47;20;127", "wc_weaknesses": "136;275;620;442", "wc_questions": "56;35;118;4", "wc_limitations": "7;35;23;19", "wc_review": "363;572;870;798", "wc_reply_reviewers": "0;545;10;138", "wc_reply_authors": "0;575;7;182", "reply_reviewers": "0;2;1;2", "reply_authors": "1;3;2;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 151.25, 45.14075209829805 ], "wc_strengths_avg": [ 57.0, 41.527099585692234 ], "wc_weaknesses_avg": [ 368.25, 181.2820661290024 ], "wc_questions_avg": [ 53.25, 41.709561253985875 ], "wc_limitations_avg": [ 21.0, 10.0 ], "wc_review_avg": [ 650.75, 199.22019852414564 ], "wc_reply_reviewers_avg": [ 173.25, 221.4197089240251 ], "wc_reply_authors_avg": [ 191.0, 233.38487525973056 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13711615804603656911&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "upf.edu;upf.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Universitat Pompeu Fabra", "aff_unique_dep": "", "aff_unique_url": "https://www.upf.edu/", "aff_unique_abbr": "UPF", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Spain" }, { "title": "Binarized Diffusion Model for Image Super-Resolution", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93008", "id": "yXpfrLMIr2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=yXpfrLMIr2", "openreview": "https://openreview.net/forum?id=yXpfrLMIr2", "poster": "/media/PosterPDFs/NeurIPS%202024/93008.png?t=1730175546.040975", "project": "", "author_site": "Zheng Chen, Haotong Qin, Yong Guo, Xiongfei Su, Xin Yuan, Linghe Kong, Yulun Zhang", "tldr": "", "abstract": "Advanced diffusion models (DMs) perform impressively in image super-resolution (SR), but the high memory and computational costs hinder their deployment. Binarization, an ultra-compression algorithm, offers the potential for effectively accelerating DMs. Nonetheless, due to the model structure and the multi-step iterative attribute of DMs, existing binarization methods result in significant performance degradation. In this paper, we introduce a novel binarized diffusion model, BI-DiffSR, for image SR. First, for the model structure, we design a UNet architecture optimized for binarization. We propose the consistent-pixel-downsample (CP-Down) and consistent-pixel-upsample (CP-Up) to maintain dimension consistent and facilitate the full-precision information transfer. Meanwhile, we design the channel-shuffle-fusion (CS-Fusion) to enhance feature fusion in skip connection. Second, for the activation difference across timestep, we design the timestep-aware redistribution (TaR) and activation function (TaA). The TaR and TaA dynamically adjust the distribution of activations based on different timesteps, improving the flexibility and representation alability of the binarized module. Comprehensive experiments demonstrate that our BI-DiffSR outperforms existing binarization methods. Code is released at: https://github.com/zhengchen1999/BI-DiffSR.", "keywords": "diffusion model;binarization;image super-resolution", "primary_area": "machine_vision", "supplementary_material": "/attachment/73026ac5b5480ff7b51165e63e432098f9fd9a63.zip", "author": "Zheng Chen;Haotong Qin;Yong Guo;Xiongfei Su;Xin Yuan;Linghe Kong;Yulun Zhang", "authorids": "~Zheng_Chen11;~Haotong_Qin1;~Yong_Guo1;~Xiongfei_Su1;~Xin_Yuan4;~Linghe_Kong1;~Yulun_Zhang1", "gender": "M;M;M;;M;M;M", "homepage": "https://zheng-chen.cn/;https://htqin.github.io/;http://www.guoyongcs.com/;;https://en.westlake.edu.cn/faculty/xin-yuan.html;https://www.cs.sjtu.edu.cn/~linghe.kong/;http://yulunzhang.com/", "dblp": "33/2592-14;262/3626.html;;;78/713-2;23/7909;166/2763-1.html", "google_scholar": "nLZtXdgAAAAJ;mK6n-KgAAAAJ;https://scholar.google.com/citations?hl=en;;cS9CbWkAAAAJ;https://scholar.google.com.tw/citations?user=-wm2X-8AAAAJ;ORmLjWoAAAAJ", "orcid": "0009-0004-3963-7938;;0000-0002-3444-4588;;0000-0002-8311-7524;0000-0001-9266-3044;0000-0002-2288-5079", "linkedin": "zheng-chen-290084313;;;;xin-yuan-0024bb31/;;yulun-zhang-1116b5b9/", "or_profile": "~Zheng_Chen11;~Haotong_Qin1;~Yong_Guo1;~Xiongfei_Su1;~Xin_Yuan4;~Linghe_Kong1;~Yulun_Zhang1", "aff": "Shanghai Jiaotong University;ETHZ - ETH Zurich;Saarland Informatics Campus, Max-Planck Institute;;Westlake University;Shanghai Jiaotong University;Swiss Federal Institute of Technology", "aff_domain": "sjtu.edu.cn;ethz.ch;mpi-inf.mpg.de;;westlake.edu.cn;sjtu.edu.cn;ethz.ch", "position": "MS student;Postdoc;Postdoc;;Associate Professor;Full Professor;Postdoc", "bibtex": "@inproceedings{\nchen2024binarized,\ntitle={Binarized Diffusion Model for Image Super-Resolution},\nauthor={Zheng Chen and Haotong Qin and Yong Guo and Xiongfei Su and Xin Yuan and Linghe Kong and Yulun Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=yXpfrLMIr2}\n}", "github": "", "reviewers": "mEsa;MRyP;Z33c;KWS7", "pdf_size": 2691926, "rating": "5;6;7;8", "confidence": "5;4;4;4", "soundness": "3;4;3;4", "novelty": "2;4;3;4", "presentation": "4;4;4;3", "wc_summary": "38;89;77;125", "wc_strengths": "28;67;270;228", "wc_weaknesses": "181;212;133;184", "wc_questions": "2;6;49;47", "wc_limitations": "3;9;4;6", "wc_review": "252;383;533;590", "wc_reply_reviewers": "136;29;25;34", "wc_reply_authors": "333;19;21;30", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.25, 0.82915619758885 ], "presentation_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_avg": [ 82.25, 31.059418861272984 ], "wc_strengths_avg": [ 148.25, 102.76763838874571 ], "wc_weaknesses_avg": [ 177.5, 28.394541729001368 ], "wc_questions_avg": [ 26.0, 22.056745000112777 ], "wc_limitations_avg": [ 5.5, 2.29128784747792 ], "wc_review_avg": [ 439.5, 132.04260676009088 ], "wc_reply_reviewers_avg": [ 56.0, 46.29794811868016 ], "wc_reply_authors_avg": [ 100.75, 134.1535966718746 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.7745966692414834, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18039049883299260095&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": "sjtu.edu.cn;ethz.ch;mpi-inf.mpg.de;;westlake.edu.cn;sjtu.edu.cn;ethz.ch", "author_num": 7, "aff_unique_index": "0;1;2;3;0;4", "aff_unique_norm": "Shanghai Jiao Tong University;ETH Zurich;Max-Planck Institute;Westlake University;Swiss Federal Institute of Technology", "aff_unique_dep": ";;Informatics;;", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.ethz.ch;https://www.mpi-sws.org;https://www.westlake.edu.cn;https://www.ethz.ch", "aff_unique_abbr": "SJTU;ETHZ;MPI-SWS;WU;ETH Zurich", "aff_campus_unique_index": "1", "aff_campus_unique": ";Saarland", "aff_country_unique_index": "0;1;2;0;0;1", "aff_country_unique": "China;Switzerland;Germany" }, { "id": "yaYJlpidX1", "title": "Metalearning to Continually Learn In Context", "track": "main", "status": "Reject", "tldr": "", "abstract": "General-purpose learning systems should improve themselves in open-ended fashion in ever-changing environments. Conventional learning algorithms for neural networks, however, suffer from catastrophic forgetting (CF)---previously acquired skills are forgotten when a new task is learned. Instead of hand-crafting new algorithms for avoiding CF, we propose Automated Continual Learning (ACL) to train self-referential neural networks to meta-learn their own in-context continual (meta-)learning algorithms. ACL encodes continual learning desiderata---good performance on both old and new tasks---into its meta-learning objectives. Our experiments demonstrate that, in general, in-context learning algorithms also suffer from CF but ACL effectively solves such \"in-context catastrophic forgetting\". Our ACL-learned algorithms outperform hand-crafted ones and existing meta-continual learning methods on the Split-MNIST benchmark in the replay-free setting, and enables continual learning of diverse tasks consisting of multiple few-shot and standard image classification datasets. Going beyond, we also highlight the limitations of in-context continual learning, by investigating the possibilities to extend ACL to the realm of state-of-the-art CL methods which leverage pre-trained models.", "keywords": "In-context learning;meta-learning;catastrophic forgetting;continual learning;self-reference;Transformers", "primary_area": "other", "supplementary_material": "/attachment/a611b6ad9958270c35c28554543ae8ecb85e1aa7.zip", "author": "Kazuki Irie;R\u00f3bert Csord\u00e1s;J\u00fcrgen Schmidhuber", "authorids": "~Kazuki_Irie1;~R\u00f3bert_Csord\u00e1s1;~J\u00fcrgen_Schmidhuber1", "gender": ";M;M", "homepage": "https://sites.harvard.edu/kazuki-irie/;https://robertcsordas.github.io/;http://people.idsia.ch/~juergen/", "dblp": "148/9667;166/4773.html;s/JurgenSchmidhuber", "google_scholar": "https://scholar.google.de/citations?user=-gZ-BdwAAAAJ;av1lplwAAAAJ;https://scholar.google.ch/citations?user=gLnCTgIAAAAJ", "orcid": "0000-0003-0923-691X;;", "linkedin": ";robertcsordas/;", "or_profile": "~Kazuki_Irie1;~R\u00f3bert_Csord\u00e1s1;~J\u00fcrgen_Schmidhuber1", "aff": "Harvard University;IDSIA;IDSIA", "aff_domain": "fas.harvard.edu;idsia.ch;idsia.ch", "position": "Postpostdoc;Postdoc;Scientific Director", "bibtex": "@misc{\nanonymous2024metalearning,\ntitle={Metalearning to Continually Learn In Context},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=yaYJlpidX1}\n}", "github": "", "project": "", "reviewers": "vTAU;x6Jn;HvJH;d5XY", "site": "https://openreview.net/forum?id=yaYJlpidX1", "pdf_size": 579487, "rating": "5;5;5;5", "confidence": "4;4;3;4", "soundness": "3;4;3;3", "novelty": "3;3;3;2", "presentation": "3;4;2;3", "wc_summary": "60;149;53;194", "wc_strengths": "27;120;30;104", "wc_weaknesses": "66;265;147;220", "wc_questions": "25;1;113;51", "wc_limitations": "8;5;15;28", "wc_review": "186;540;358;597", "wc_reply_reviewers": "42;83;143;100", "wc_reply_authors": "185;93;88;24", "reply_reviewers": "1;1;1;1", "reply_authors": "4;2;2;2", "rating_avg": [ 5.0, 0.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 114.0, 59.71180787750443 ], "wc_strengths_avg": [ 70.25, 42.14483954175173 ], "wc_weaknesses_avg": [ 174.5, 75.4801298356064 ], "wc_questions_avg": [ 47.5, 41.74625731727337 ], "wc_limitations_avg": [ 14.0, 8.860022573334675 ], "wc_review_avg": [ 420.25, 161.49980650143206 ], "wc_reply_reviewers_avg": [ 92.0, 36.21463792446364 ], "wc_reply_authors_avg": [ 97.5, 57.378131722808824 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:mhiFttVUT0cJ:scholar.google.com/&scioq=Metalearning+to+Continually+Learn+In+Context&hl=en&as_sdt=0,33", "gs_version_total": 0, "aff_unique_index": "0;1;1", "aff_unique_norm": "Harvard University;Institute of Digital Technologies", "aff_unique_dep": ";", "aff_unique_url": "https://www.harvard.edu;https://www.idsia.ch", "aff_unique_abbr": "Harvard;IDSIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United States;Switzerland" }, { "title": "Large Spatial Model: End-to-end Unposed Images to Semantic 3D", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93007", "id": "ybHPzL7eYT", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ybHPzL7eYT", "openreview": "https://openreview.net/forum?id=ybHPzL7eYT", "poster": "/media/PosterPDFs/NeurIPS%202024/93007.png?t=1731217413.9704163", "project": "", "author_site": "Zhiwen Fan, Jian Zhang, Wenyan Cong, Peihao Wang, Renjie Li, Kairun Wen, Shijie Zhou, Achuta Kadambi, Zhangyang "Atlas" Wang, Danfei Xu, Boris Ivanovic, Marco Pavone, Yue Wang", "tldr": "", "abstract": "Reconstructing and understanding 3D structures from a limited number of images is a classical problem in computer vision. Traditional approaches typically decompose this task into multiple subtasks, involving several stages of complex mappings between different data representations. For example, dense reconstruction using Structure-from-Motion (SfM) requires transforming images into key points, optimizing camera parameters, and estimating structures. Following this, accurate sparse reconstructions are necessary for further dense modeling, which is then input into task-specific neural networks. This multi-stage paradigm leads to significant processing times and engineering complexity.\n\nIn this work, we introduce the Large Spatial Model (LSM), which directly processes unposed RGB images into semantic radiance fields. LSM simultaneously estimates geometry, appearance, and semantics in a single feed-forward pass and can synthesize versatile label maps by interacting through language at novel views. Built on a general Transformer-based framework, LSM predicts global geometry via pixel-aligned point maps. To improve spatial attribute regression, we adopt local context aggregation with multi-scale fusion, enhancing the accuracy of fine local details. To address the scarcity of labeled 3D semantic data and enable natural language-driven scene manipulation, we incorporate a pre-trained 2D language-based segmentation model into a 3D-consistent semantic feature field. An efficient decoder parameterizes a set of semantic anisotropic Gaussians, allowing supervised end-to-end learning. Comprehensive experiments on various tasks demonstrate that LSM unifies multiple 3D vision tasks directly from unposed images, achieving real-time semantic 3D reconstruction for the first time.", "keywords": "3D Reconstruction;3D Scene Understanding;Gaussian Splatting", "primary_area": "machine_vision", "supplementary_material": "/attachment/a56101d73213494352b81a74c2e7ef24fe402a86.zip", "author": "Zhiwen Fan;Jian Zhang;Wenyan Cong;Peihao Wang;Renjie Li;Kairun Wen;Shijie Zhou;Achuta Kadambi;Zhangyang Wang;Danfei Xu;Boris Ivanovic;Marco Pavone;Yue Wang", "authorids": "~Zhiwen_Fan2;~Jian_Zhang54;~Wenyan_Cong1;~Peihao_Wang1;~Renjie_Li3;~Kairun_Wen1;~Shijie_Zhou1;~Achuta_Kadambi2;~Zhangyang_Wang1;~Danfei_Xu1;~Boris_Ivanovic1;~Marco_Pavone1;~Yue_Wang2", "gender": ";M;F;M;;M;M;M;M;M;;M;M", "homepage": ";https://github.com/zjrandom951;https://wenyancong.com/;https://peihaowang.github.io/;https://shadowiterator.github.io/;https://kairunwen.github.io/;https://shijiezhou-ucla.github.io/;http://visual.ee.ucla.edu;https://vita-group.github.io;https://cs.stanford.edu/~danfei/;http://www.borisivanovic.com/;https://web.stanford.edu/~pavone/;https://yuewang.xyz", "dblp": ";;247/9471;239/4075;;321/5079;;;119/4026;135/8443;203/8356;91/3382-1.html;33/4822-41", "google_scholar": ";;uQV5aCsAAAAJ;fqf2tBsAAAAJ;;RzRhziMAAAAJ;rLGB0qcAAAAJ;;pxFyKAIAAAAJ;J5D4kcoAAAAJ;ey9AQcEAAAAJ;RhOpyXcAAAAJ;v-AEFIEAAAAJ", "orcid": ";;;;;0009-0006-7726-9691;0000-0002-9018-7539;;;;0000-0002-8698-202X;;", "linkedin": ";;;peihao-wang-25a411162/;;;shijie-zhou-ucla/;achuta-kadambi/;;;boris-ivanovic-a3103064;;", "or_profile": "~Zhiwen_Fan2;~Jian_Zhang54;~Wenyan_Cong1;~Peihao_Wang1;~Renjie_Li3;~Kairun_Wen1;~Shijie_Zhou1;~Achuta_Kadambi2;~Zhangyang_Wang1;~Danfei_Xu1;~Boris_Ivanovic1;~Marco_Pavone1;~Yue_Wang2", "aff": ";Xiamen University;University of Texas at Austin;University of Texas, Austin;Tsinghua University;Xiamen University;University of California, Los Angeles;University of California, Los Angeles;University of Texas at Austin;NVIDIA;NVIDIA;Stanford University;NVIDIA", "aff_domain": ";xmu.edu.cn;utexas.edu;utexas.edu;tsinghua.edu.cn;xmu.edu.cn;ucla.edu;ucla.edu;utexas.edu;nvidia.com;nvidia.com;stanford.edu;nvidia.com", "position": ";MS student;PhD student;PhD student;PhD student;MS student;PhD student;Assistant Professor;Associate Professor;Research Scientist;Researcher;Associate Professor;Researcher", "bibtex": "@inproceedings{\nfan2024large,\ntitle={Large Spatial Model: End-to-end Unposed Images to Semantic 3D},\nauthor={Zhiwen Fan and Jian Zhang and Wenyan Cong and Peihao Wang and Renjie Li and Kairun Wen and Shijie Zhou and Achuta Kadambi and Zhangyang Wang and Danfei Xu and Boris Ivanovic and Marco Pavone and Yue Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ybHPzL7eYT}\n}", "github": "", "reviewers": "MCB6;K6pm;fir7;8Lhg", "pdf_size": 2301347, "rating": "5;5;6;7", "confidence": "4;3;4;4", "soundness": "3;1;3;3", "novelty": "3;3;3;3", "presentation": "2;2;3;3", "wc_summary": "51;101;61;125", "wc_strengths": "58;31;55;108", "wc_weaknesses": "165;197;142;209", "wc_questions": "55;63;131;23", "wc_limitations": "5;39;11;60", "wc_review": "334;431;400;525", "wc_reply_reviewers": "37;0;22;42", "wc_reply_authors": "84;192;14;14", "reply_reviewers": "1;0;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.8660254037844386 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 84.5, 29.94578434437809 ], "wc_strengths_avg": [ 63.0, 28.008927148321835 ], "wc_weaknesses_avg": [ 178.25, 26.394838510587633 ], "wc_questions_avg": [ 68.0, 39.331920878594275 ], "wc_limitations_avg": [ 28.75, 22.1401784093986 ], "wc_review_avg": [ 422.5, 68.76954267697292 ], "wc_reply_reviewers_avg": [ 25.25, 16.330569494050106 ], "wc_reply_authors_avg": [ 76.0, 72.81483365359011 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 13, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2926012101166379087&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": ";xmu.edu.cn;utexas.edu;utexas.edu;tsinghua.edu.cn;xmu.edu.cn;ucla.edu;ucla.edu;utexas.edu;nvidia.com;nvidia.com;stanford.edu;nvidia.com", "author_num": 13, "aff_unique_index": "0;1;1;2;0;3;3;1;4;4;5;4", "aff_unique_norm": "Xiamen University;University of Texas at Austin;Tsinghua University;University of California, Los Angeles;NVIDIA;Stanford University", "aff_unique_dep": ";;;;NVIDIA Corporation;", "aff_unique_url": "https://www.xmu.edu.cn;https://www.utexas.edu;https://www.tsinghua.edu.cn;https://www.ucla.edu;https://www.nvidia.com;https://www.stanford.edu", "aff_unique_abbr": "XMU;UT Austin;THU;UCLA;NVIDIA;Stanford", "aff_campus_unique_index": "1;1;2;2;1;3", "aff_campus_unique": ";Austin;Los Angeles;Stanford", "aff_country_unique_index": "0;1;1;0;0;1;1;1;1;1;1;1", "aff_country_unique": "China;United States" }, { "title": "Predicting Ground State Properties: Constant Sample Complexity and Deep Learning Algorithms", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93006", "id": "ybLXvqJyQA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ybLXvqJyQA", "openreview": "https://openreview.net/forum?id=ybLXvqJyQA", "poster": "", "project": "", "author_site": "Marc Wanner, Laura Lewis, Chiranjib Bhattacharyya, Devdatt Dubhashi, Alexandru Gheorghiu", "tldr": "", "abstract": "A fundamental problem in quantum many-body physics is that of finding ground states of local\nHamiltonians. A number of recent works gave provably efficient machine learning (ML) algorithms\nfor learning ground states. Specifically, [Huang et al. Science 2022], introduced an approach for learning\nproperties of the ground state of an $n$-qubit gapped local Hamiltonian $H$ from only $n^{\\mathcal{O}(1)}$ data\npoints sampled from Hamiltonians in the same phase of matter. This was subsequently improved\nby [Lewis et al. Nature Communications 2024], to $\\mathcal{O}(\\log \ud835\udc5b)$ samples when the geometry of the $n$-qubit system is known.\nIn this work, we introduce two approaches that achieve a constant sample complexity, independent\nof system size $n$, for learning ground state properties. Our first algorithm consists of a simple\nmodification of the ML model used by Lewis et al. and applies to a property of interest known beforehand. Our second algorithm, which applies even if a description of\nthe property is not known, is a deep neural network model. While empirical results showing the\nperformance of neural networks have been demonstrated, to our knowledge, this is the first rigorous\nsample complexity bound on a neural network model for predicting ground state properties. We also perform numerical experiments that confirm the improved scaling of our approach compared to earlier results.", "keywords": "Deep Learning;Learning Theory;Quantum Many-Body Problems", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "", "author": "Marc Wanner;Laura Lewis;Chiranjib Bhattacharyya;Devdatt Dubhashi;Alexandru Gheorghiu", "authorids": "~Marc_Wanner1;~Laura_Lewis2;~Chiranjib_Bhattacharyya1;~Devdatt_Dubhashi1;~Alexandru_Gheorghiu1", "gender": "M;;M;M;", "homepage": "https://www.chalmers.se/en/persons/wanner/;;http://www.csa.iisc.ac.in/~chiru/;;https://agheorghiu.com/", "dblp": ";;b/CBhattacharyya;d/DPDubhashi.html;", "google_scholar": "Yv_sRL8AAAAJ;;;z8cbzqkAAAAJ;sOeTuIMAAAAJ", "orcid": ";0000-0001-7793-8345;;0000-0002-9928-2305;", "linkedin": ";;;;", "or_profile": "~Marc_Wanner1;~Laura_Lewis2;~Chiranjib_Bhattacharyya1;~Devdatt_Dubhashi1;~Alexandru_Gheorghiu1", "aff": "Chalmers University of Technology;University of Cambridge;Indian Institute of Science, Indian institute of science, Bangalore;Chalmers University;Chalmers University of Technology", "aff_domain": "chalmers.se;cam.ac.uk;iisc.ac.in;chalmers.se;chalmers.se", "position": "PhD student;MS student;Full Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nwanner2024predicting,\ntitle={Predicting Ground State Properties: Constant Sample Complexity and Deep Learning Algorithms},\nauthor={Marc Wanner and Laura Lewis and Chiranjib Bhattacharyya and Devdatt Dubhashi and Alexandru Gheorghiu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ybLXvqJyQA}\n}", "github": "", "reviewers": "u4Mp;KHZF;cLpw;7tNg;MeLw;vDXc", "pdf_size": 1053798, "rating": "3;5;5;5;7;7", "confidence": "4;2;1;2;3;3", "soundness": "3;3;3;1;4;4", "novelty": "2;3;2;3;3;3", "presentation": "2;2;3;2;4;4", "wc_summary": "141;48;93;199;76;88", "wc_strengths": "78;70;30;16;42;79", "wc_weaknesses": "366;60;92;109;69;47", "wc_questions": "7;69;15;16;2;70", "wc_limitations": "1;6;6;6;2;12", "wc_review": "593;253;236;346;191;296", "wc_reply_reviewers": "0;12;14;15;19;17", "wc_reply_authors": "0;0;0;58;0;0", "reply_reviewers": "0;1;1;1;1;1", "reply_authors": "1;1;1;2;1;1", "rating_avg": [ 5.333333333333333, 1.3743685418725535 ], "confidence_avg": [ 2.5, 0.9574271077563381 ], "soundness_avg": [ 3.0, 1.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.8333333333333335, 0.8975274678557507 ], "wc_summary_avg": [ 107.5, 49.35838327984416 ], "wc_strengths_avg": [ 52.5, 24.520399670478458 ], "wc_weaknesses_avg": [ 123.83333333333333, 110.19893627234138 ], "wc_questions_avg": [ 29.833333333333332, 28.44536673851981 ], "wc_limitations_avg": [ 5.5, 3.547299442298794 ], "wc_review_avg": [ 319.1666666666667, 131.6325905271521 ], "wc_reply_reviewers_avg": [ 12.833333333333334, 6.148622248565572 ], "wc_reply_authors_avg": [ 9.666666666666666, 21.615323782497967 ], "reply_reviewers_avg": [ 0.8333333333333334, 0.372677996249965 ], "reply_authors_avg": [ 1.1666666666666667, 0.3726779962499649 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.1266600992762247, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17923219460115222644&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "chalmers.se;cam.ac.uk;iisc.ac.in;chalmers.se;chalmers.se", "author_num": 5, "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "Chalmers University of Technology;University of Cambridge;Indian Institute of Science", "aff_unique_dep": ";;", "aff_unique_url": "https://www.chalmers.se;https://www.cam.ac.uk;https://www.iisc.ac.in", "aff_unique_abbr": "Chalmers;Cambridge;IISc", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Cambridge;Bangalore", "aff_country_unique_index": "0;1;2;0;0", "aff_country_unique": "Sweden;United Kingdom;India" }, { "title": "Auditing Local Explanations is Hard", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93005", "id": "ybMrn4tdn0", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ybMrn4tdn0", "openreview": "https://openreview.net/forum?id=ybMrn4tdn0", "poster": "", "project": "", "author_site": "Robi Bhattacharjee, Ulrike Luxburg", "tldr": "", "abstract": "In sensitive contexts, providers of machine learning algorithms are increasingly required to give explanations for their algorithms' decisions. However, explanation receivers might not trust the provider, who potentially could output misleading or manipulated explanations. In this work, we investigate an auditing framework in which a third-party auditor or a collective of users attempts to sanity-check explanations: they can query model decisions and the corresponding local explanations, pool all the information received, and then check for basic consistency properties. We prove upper and lower bounds on the amount of queries that are needed for an auditor to succeed within this framework. Our results show that successful auditing requires a potentially exorbitant number of queries -- particularly in high dimensional cases. Our analysis also reveals that a key property is the ``locality'' of the provided explanations --- a quantity that so far has not been paid much attention to in the explainability literature. Looking forward, our results suggest that for complex high-dimensional settings, merely providing a pointwise prediction and explanation could be insufficient, as there is no way for the users to verify that the provided explanations are not completely made-up.", "keywords": "explanation;auditing;trust;regulation", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Robi Bhattacharjee;Ulrike von Luxburg", "authorids": "~Robi_Bhattacharjee1;~Ulrike_von_Luxburg1", "gender": "M;F", "homepage": ";", "dblp": "237/9976;06/1082", "google_scholar": ";mMifMdoAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Robi_Bhattacharjee1;~Ulrike_von_Luxburg1", "aff": "Eberhard-Karls-Universit\u00e4t T\u00fcbingen;University of Tuebingen", "aff_domain": "uni-tuebingen.de;uni-tuebingen.de", "position": "Postdoc;Professor", "bibtex": "@inproceedings{\nbhattacharjee2024auditing,\ntitle={Auditing Local Explanations is Hard},\nauthor={Robi Bhattacharjee and Ulrike von Luxburg},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ybMrn4tdn0}\n}", "github": "", "reviewers": "6haQ;cVCK;mYL9;KRVz", "pdf_size": 839598, "rating": "4;6;7;7", "confidence": "4;4;4;3", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "2;2;3;3", "wc_summary": "67;76;208;25", "wc_strengths": "33;81;101;38", "wc_weaknesses": "413;201;275;297", "wc_questions": "73;66;2;142", "wc_limitations": "63;14;1;7", "wc_review": "649;438;587;509", "wc_reply_reviewers": "201;24;45;16", "wc_reply_authors": "181;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 94.0, 68.57477670397476 ], "wc_strengths_avg": [ 63.25, 28.69124430902222 ], "wc_weaknesses_avg": [ 296.5, 76.08383533970931 ], "wc_questions_avg": [ 70.75, 49.57506933933628 ], "wc_limitations_avg": [ 21.25, 24.539508960042376 ], "wc_review_avg": [ 545.75, 79.56561757442721 ], "wc_reply_reviewers_avg": [ 71.5, 75.51324387152229 ], "wc_reply_authors_avg": [ 45.25, 78.3752990424917 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.4714045207910316, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13403200573866149531&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "uni-tuebingen.de;uni-tuebingen.de", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Eberhard Karls University of T\u00fcbingen;University of Tuebingen", "aff_unique_dep": ";", "aff_unique_url": "https://www.uni-tuebingen.de/;https://www.uni-tuebingen.de/", "aff_unique_abbr": "Uni T\u00fcbingen;Uni T\u00fcbingen", "aff_campus_unique_index": "0", "aff_campus_unique": "T\u00fcbingen;", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "title": "Policy Aggregation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93004", "id": "ybiUVIxJth", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ybiUVIxJth", "openreview": "https://openreview.net/forum?id=ybiUVIxJth", "poster": "/media/PosterPDFs/NeurIPS%202024/93004.png?t=1733840944.4129004", "project": "", "author_site": "Parand A. Alamdari, Soroush Ebadian, Ariel Procaccia", "tldr": "", "abstract": "We consider the challenge of AI value alignment with multiple individuals that have different reward functions and optimal policies in an underlying Markov decision process. We formalize this problem as one of *policy aggregation*, where the goal is to identify a desirable collective policy. We argue that an approach informed by social choice theory is especially suitable. Our key insight is that social choice methods can be reinterpreted by identifying ordinal preferences with volumes of subsets of the *state-action occupancy polytope*. Building on this insight, we demonstrate that a variety of methods \u2014 including approval voting, Borda count, the proportional veto core, and quantile fairness \u2014 can be practically applied to policy aggregation.", "keywords": "Markov decision process;reinforcement learning;AI alignment", "primary_area": "algorithmic_game_theory", "supplementary_material": "", "author": "Parand A. Alamdari;Soroush Ebadian;Ariel D. Procaccia", "authorids": "~Parand_A._Alamdari1;~Soroush_Ebadian1;~Ariel_D._Procaccia1", "gender": "M;M;F", "homepage": "https://ebadian.org/;http://procaccia.info/;http://praal.github.io", "dblp": "242/8319.html;p/ArielDProcaccia;266/1421", "google_scholar": "tN4kqvYAAAAJ;https://scholar.google.com.tw/citations?user=8ZpV-lkAAAAJ;WE3XiuoAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Soroush_Ebadian1;~Ariel_Procaccia1;~Parand_Alizadeh_Alamdari1", "aff": "University of Toronto;Harvard University;University of Toronto", "aff_domain": "cs.toronto.edu;harvard.edu;cs.toronto.edu", "position": "PhD student;Gordon McKay Professor of Computer Science;PhD student", "bibtex": "@inproceedings{\nalamdari2024policy,\ntitle={Policy Aggregation},\nauthor={Parand A. Alamdari and Soroush Ebadian and Ariel D. Procaccia},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ybiUVIxJth}\n}", "github": "", "reviewers": "61yS;7MP7;vrsi", "pdf_size": 504080, "rating": "6;6;7", "confidence": "3;3;3", "soundness": "3;3;4", "novelty": "2;3;3", "presentation": "3;3;4", "wc_summary": "85;117;237", "wc_strengths": "47;61;94", "wc_weaknesses": "103;52;84", "wc_questions": "74;14;10", "wc_limitations": "7;19;6", "wc_review": "316;263;431", "wc_reply_reviewers": "80;0;20", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;0;1", "reply_authors": "1;1;1", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 146.33333333333334, 65.42850211405492 ], "wc_strengths_avg": [ 67.33333333333333, 19.70335560817553 ], "wc_weaknesses_avg": [ 79.66666666666667, 21.044925490219462 ], "wc_questions_avg": [ 32.666666666666664, 29.272664533466862 ], "wc_limitations_avg": [ 10.666666666666666, 5.90668171555645 ], "wc_review_avg": [ 336.6666666666667, 70.12528470926557 ], "wc_reply_reviewers_avg": [ 33.333333333333336, 33.993463423951894 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 9, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7195267300444546807&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 9, "email": "cs.toronto.edu;harvard.edu;cs.toronto.edu", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Toronto;Harvard University", "aff_unique_dep": ";", "aff_unique_url": "https://www.utoronto.ca;https://www.harvard.edu", "aff_unique_abbr": "U of T;Harvard", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Canada;United States" }, { "id": "yeFE8HCnZB", "title": "NerfBaselines: Consistent and Reproducible Evaluation of Novel View Synthesis Methods", "track": "Datasets & Benchmarks", "status": "Reject", "tldr": "", "abstract": "Novel view synthesis is an important problem with many applications, including AR/VR, gaming, and simulations for robotics. With the recent rapid development of Neural Radiance Fields (NeRFs) and 3D Gaussian Splatting (3DGS) methods, it is becoming difficult to keep track of the current state of the art (SoTA) due to methods using different evaluation protocols, codebases being difficult to install and use, and methods not generalizing well to novel 3D scenes. Our experiments support this claim by showing that tiny differences in evaluation protocols of various methods can lead to inconsistent reported metrics. To address these issues, we propose a framework called NerfBaselines, which simplifies the installation of various methods, provides consistent benchmarking tools, and ensures reproducibility. We validate our implementation experimentally by reproducing numbers reported in the original papers. To further improve the accessibility, we release a web platform where commonly used methods are compared on standard benchmarks. Web: https://jkulhanek.com/nerfbaselines", "keywords": "NeRF;3DGS;Benchmarks;Novel View Synthesis Evaluation", "primary_area": "", "supplementary_material": "/attachment/571620f7504ba31cd2839ce40b28356441ac4661.zip", "author": "Jonas Kulhanek;Torsten Sattler", "authorids": "~Jonas_Kulhanek1;~Torsten_Sattler1", "gender": "M;M", "homepage": "https://jkulhanek.com;https://tsattler.github.io/", "dblp": "247/1194;51/9054", "google_scholar": "YDNzfN4AAAAJ;jzx6_ZIAAAAJ", "orcid": "0000-0002-8437-3626;0000-0001-9760-4553", "linkedin": ";torsten-sattler-ba2ab0145", "or_profile": "~Jonas_Kulhanek1;~Torsten_Sattler1", "aff": "Department of Computer Science, ETHZ - ETH Zurich;CIIRC, Czech Technical University in Prague", "aff_domain": "inf.ethz.ch;cvut.cz", "position": "Intern;Senior Researcher", "bibtex": "@misc{\nanonymous2024nerfbaselines,\ntitle={NerfBaselines: Consistent and Reproducible Evaluation of Novel View Synthesis Methods},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=yeFE8HCnZB}\n}", "github": "", "project": "", "reviewers": "KmxW;Zc83;mXZR;BETo", "site": "https://openreview.net/forum?id=yeFE8HCnZB", "pdf_size": 23995940, "rating": "5;5;6;6", "confidence": "3;2;4;4", "wc_summary_and_contributions": "50;61;106;60", "wc_strengths": "38;24;112;69", "wc_improvement": "108;28;132;162", "wc_limitations": "26;4;43;9", "wc_correctness": "17;1;21;15", "wc_clarity": "1;1;12;6", "wc_relation_to_prior_work": "28;1;31;13", "wc_documentation": "1;1;29;10", "wc_additional_feedback": "1;1;1;1", "wc_review": "270;122;487;345", "wc_reply_reviewers": "0;0;61;127", "wc_reply_authors": "50;0;15;363", "reply_reviewers": "0;0;1;1", "reply_authors": "2;1;2;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.25, 0.82915619758885 ], "wc_summary_and_contributions_avg": [ 69.25, 21.649191670822262 ], "wc_strengths_avg": [ 60.75, 33.77406549410361 ], "wc_improvement_avg": [ 107.5, 49.726753362752326 ], "wc_limitations_avg": [ 20.5, 15.337861650177967 ], "wc_correctness_avg": [ 13.5, 7.533259586659682 ], "wc_clarity_avg": [ 5.0, 4.527692569068709 ], "wc_relation_to_prior_work_avg": [ 18.25, 12.07010770457331 ], "wc_documentation_avg": [ 10.25, 11.431863365173676 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 306.0, 131.75166033109412 ], "wc_reply_reviewers_avg": [ 47.0, 52.47380298777667 ], "wc_reply_authors_avg": [ 107.0, 148.91104727319595 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.9045340337332909, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17657165895499969573&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff_unique_index": "0;1", "aff_unique_norm": "ETH Zurich;Czech Technical University in Prague", "aff_unique_dep": "Department of Computer Science;CIIRC", "aff_unique_url": "https://www.ethz.ch;https://www.ciirc.cvut.cz/", "aff_unique_abbr": "ETHZ;CTU", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Zurich;Prague", "aff_country_unique_index": "0;1", "aff_country_unique": "Switzerland;Czech Republic" }, { "title": "Learning 3D Garment Animation from Trajectories of A Piece of Cloth", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93003", "id": "yeFx5NQmr7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=yeFx5NQmr7", "openreview": "https://openreview.net/forum?id=yeFx5NQmr7", "poster": "/media/PosterPDFs/NeurIPS%202024/93003.png?t=1731748465.646109", "project": "", "author_site": "YIDI SHAO, Chen Change Loy, Bo Dai", "tldr": "", "abstract": "Garment animation is ubiquitous in various applications, such as virtual reality, gaming, and film producing. Recently, learning-based approaches obtain compelling performance in animating diverse garments under versatile scenarios. Nevertheless, to mimic the deformations of the observed garments, data-driven methods require large scale of garment data, which are both resource-wise expensive and time-consuming. In addition, forcing models to match the dynamics of observed garment animation may hinder the potentials to generalize to unseen cases. In this paper, instead of using garment-wise supervised-learning we adopt a disentangled scheme to learn how to animate observed garments: 1). learning constitutive behaviors from the observed cloth; 2). dynamically animate various garments constrained by the learned constitutive laws. Specifically, we propose Energy Unit network (EUNet) to model the constitutive relations in the format of energy. Without the priors from analytical physics models and differentiable simulation engines, EUNet is able to directly capture the constitutive behaviors from the observed piece of cloth and uniformly describes the change of energy caused by deformations, such as stretching and bending. We further apply the pre-trained EUNet to animate various garments based on energy optimizations. The disentangled scheme alleviates the need of garment data and enables us to utilize the dynamics of a piece of cloth for animating garments. Experiments show that while EUNet effectively delivers the energy gradients due to the deformations, models constrained by EUNet achieve more stable and physically plausible performance comparing with those trained in garment-wise supervised manner.", "keywords": "Garment;Cloth;Simulation;Constitutive Model;3D", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "/attachment/1c7cbcf8bbd3cf3a0847668690be98d26aa50b61.zip", "author": "Yidi Shao;Chen Change Loy;Bo Dai", "authorids": "~Yidi_Shao1;~Chen_Change_Loy2;~Bo_Dai2", "gender": "M;M;M", "homepage": "https://github.com/ftbabi;https://www.mmlab-ntu.com/person/ccloy/index.html;http://daibo.info/", "dblp": "291/7118;01/5855;64/2903-2", "google_scholar": ";https://scholar.google.co.uk/citations?user=559LF80AAAAJ;https://scholar.google.com.hk/citations?user=KNWTvgEAAAAJ", "orcid": ";0000-0001-5345-1591;0000-0003-0777-9232", "linkedin": ";;", "or_profile": "~Yidi_Shao1;~Chen_Change_Loy2;~Bo_Dai2", "aff": "Nanyang Technological University;Nanyang Technological University;Shanghai AI Laboratory", "aff_domain": "ntu.edu.sg;ntu.edu.sg;pjlab.org.cn", "position": "PhD student;Full Professor;Scientist", "bibtex": "@inproceedings{\nshao2024learning,\ntitle={Learning 3D Garment Animation from Trajectories of A Piece of Cloth},\nauthor={Yidi Shao and Chen Change Loy and Bo Dai},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=yeFx5NQmr7}\n}", "github": "", "reviewers": "cPXz;Hyzo;QaVe;TX4H;CDgw", "pdf_size": 47076041, "rating": "4;4;5;5;6", "confidence": "4;4;4;4;2", "soundness": "2;1;2;3;3", "novelty": "2;1;3;3;3", "presentation": "3;2;3;2;3", "wc_summary": "81;42;82;78;81", "wc_strengths": "73;19;57;79;188", "wc_weaknesses": "121;204;343;41;119", "wc_questions": "43;150;4;41;2", "wc_limitations": "3;17;4;39;2", "wc_review": "321;432;490;278;392", "wc_reply_reviewers": "101;76;176;324;0", "wc_reply_authors": "511;203;309;90;0", "reply_reviewers": "1;1;1;1;0", "reply_authors": "2;2;2;2;1", "rating_avg": [ 4.8, 0.7483314773547882 ], "confidence_avg": [ 3.6, 0.8 ], "soundness_avg": [ 2.2, 0.7483314773547882 ], "novelty_avg": [ 2.4, 0.8 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 72.8, 15.458331087151677 ], "wc_strengths_avg": [ 83.2, 56.41418261394913 ], "wc_weaknesses_avg": [ 165.6, 102.59746585564382 ], "wc_questions_avg": [ 48.0, 53.907327887774215 ], "wc_limitations_avg": [ 13.0, 14.099645385611653 ], "wc_review_avg": [ 382.6, 75.89360974416752 ], "wc_reply_reviewers_avg": [ 135.4, 109.78451621244227 ], "wc_reply_authors_avg": [ 222.6, 177.85229827022195 ], "reply_reviewers_avg": [ 0.8, 0.4 ], "reply_authors_avg": [ 1.8, 0.4 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.8017837257372731, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17340195475789667522&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "ntu.edu.sg;ntu.edu.sg;pjlab.org.cn", "author_num": 3, "aff_unique_index": "0;0;1", "aff_unique_norm": "Nanyang Technological University;Shanghai AI Laboratory", "aff_unique_dep": ";", "aff_unique_url": "https://www.ntu.edu.sg;https://www.shanghai-ai-lab.com", "aff_unique_abbr": "NTU;SAIL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Singapore;China" }, { "title": "Color-Oriented Redundancy Reduction in Dataset Distillation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93002", "id": "yfQwyxiSJ7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=yfQwyxiSJ7", "openreview": "https://openreview.net/forum?id=yfQwyxiSJ7", "poster": "/media/PosterPDFs/NeurIPS%202024/93002.png?t=1733810192.8979454", "project": "", "author_site": "Bowen Yuan, Zijian Wang, Mahsa Baktashmotlagh, Yadan Luo, Zi Huang", "tldr": "", "abstract": "Dataset Distillation (DD) is designed to generate condensed representations of extensive image datasets, enhancing training efficiency. Despite recent advances, there remains considerable potential for improvement, particularly in addressing the notable redundancy within the color space of distilled images. In this paper, we propose a two-fold optimization strategy to minimize color redundancy at the individual image and overall dataset levels, respectively. At the image level, we employ a palette network, a specialized neural network, to dynamically allocate colors from a reduced color space to each pixel. The palette network identifies essential areas in synthetic images for model training, and consequently assigns more unique colors to them. At the dataset level, we develop a color-guided initialization strategy to minimize redundancy among images. Representative images with the least replicated color patterns are selected based on the information gain. A comprehensive performance study involving various datasets and evaluation scenarios is conducted, demonstrating the superior performance of our proposed color-aware DD compared to existing DD methods.", "keywords": "Computer Vision;Data Distillation;Parameterization", "primary_area": "machine_vision", "supplementary_material": "", "author": "Bowen Yuan;Zijian Wang;Mahsa Baktashmotlagh;Yadan Luo;Zi Huang", "authorids": "~Bowen_Yuan3;~Zijian_Wang2;~Mahsa_Baktashmotlagh1;~Yadan_Luo1;~Zi_Huang1", "gender": "M;M;F;F;F", "homepage": ";;;https://sites.google.com/view/yadanluo/home;https://staff.itee.uq.edu.au/huang/", "dblp": ";03/4540-9;119/1507;182/2414;70/6862", "google_scholar": ";OfTXHvsAAAAJ;https://scholar.google.com.au/citations?user=3kaiBBYAAAAJ;3IfL11AAAAAJ;https://scholar.google.com.au/citations?user=iAWMsgEAAAAJ", "orcid": "0009-0008-5187-4564;;;0000-0001-6272-2971;", "linkedin": "bowen-yuan-721156294/;;;;", "or_profile": "~Bowen_Yuan3;~Zijian_Wang2;~Mahsa_Baktashmotlagh1;~Yadan_Luo1;~Zi_Huang1", "aff": "University of Queensland;The University of Queensland;The University of Queensland;The University of Queensland;University of Queensland", "aff_domain": "uq.edu.au;uq.edu.au;uq.edu.au;uq.edu.au;uq.edu.au", "position": "PhD student;Postdoc;Assistant Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nyuan2024colororiented,\ntitle={Color-Oriented Redundancy Reduction in Dataset Distillation},\nauthor={Bowen Yuan and Zijian Wang and Mahsa Baktashmotlagh and Yadan Luo and Zi Huang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=yfQwyxiSJ7}\n}", "github": "", "reviewers": "uhQ2;5XJK;VepF;Jnju", "pdf_size": 9844493, "rating": "5;5;6;7", "confidence": "2;3;3;5", "soundness": "3;2;3;3", "novelty": "2;3;3;3", "presentation": "2;2;3;3", "wc_summary": "86;52;35;79", "wc_strengths": "52;28;31;42", "wc_weaknesses": "57;35;137;71", "wc_questions": "2;43;44;34", "wc_limitations": "2;1;30;27", "wc_review": "199;159;277;253", "wc_reply_reviewers": "0;0;49;16", "wc_reply_authors": "72;64;49;0", "reply_reviewers": "0;0;1;1", "reply_authors": "2;2;2;1", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.25, 1.0897247358851685 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 63.0, 20.554804791094465 ], "wc_strengths_avg": [ 38.25, 9.496709956611289 ], "wc_weaknesses_avg": [ 75.0, 38.02630668366309 ], "wc_questions_avg": [ 30.75, 17.049560111627514 ], "wc_limitations_avg": [ 15.0, 13.546217184144066 ], "wc_review_avg": [ 222.0, 46.05431575867782 ], "wc_reply_reviewers_avg": [ 16.25, 20.004686950812303 ], "wc_reply_authors_avg": [ 46.25, 27.949731662397046 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.899228803025897, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17149738212130258725&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "uq.edu.au;uq.edu.au;uq.edu.au;uq.edu.au;uq.edu.au", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of Queensland", "aff_unique_dep": "", "aff_unique_url": "https://www.uq.edu.au", "aff_unique_abbr": "UQ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Australia" }, { "title": "Bag of Tricks: Benchmarking of Jailbreak Attacks on LLMs", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97431", "id": "yg4Tt2QeU7", "proceeding": "", "pdf": "https://openreview.net/pdf?id=yg4Tt2QeU7", "openreview": "https://openreview.net/forum?id=yg4Tt2QeU7", "poster": "", "project": "", "author_site": "Zhao Xu, Fan LIU, Hao Liu", "tldr": "", "abstract": "Although Large Language Models (LLMs) have demonstrated significant capabilities in executing complex tasks in a zero-shot manner, they are susceptible to jailbreak attacks and can be manipulated to produce harmful outputs. Recently, a growing body of research has categorized jailbreak attacks into token-level and prompt-level attacks. However, previous work primarily overlooks the diverse key factors of jailbreak attacks, with most studies concentrating on LLM vulnerabilities and lacking exploration of defense-enhanced LLMs. To address these issues, we introduced JailTrickBench to evaluate the impact of various attack settings on LLM performance and provide a baseline for jailbreak attacks, encouraging the adoption of a standardized evaluation framework. Specifically, we evaluate the eight key factors of implementing jailbreak attacks on LLMs from both target-level and attack-level perspectives. We further conduct seven representative jailbreak attacks on six defense methods across two widely used datasets, encompassing approximately 354 experiments with about 55,000 GPU hours on A800-80G. Our experimental results highlight the need for standardized benchmarking to evaluate these attacks on defense-enhanced LLMs. Our code is available at https://github.com/usail-hkust/JailTrickBench.", "keywords": "jailbreak attack;jailbreak defense;LLM;benchmark", "primary_area": "", "supplementary_material": "/attachment/df17b1523703ab239019493628dae42e0089cd1a.zip", "author": "Zhao Xu;Fan Liu;Hao Liu", "authorids": "~Zhao_Xu5;~Fan_Liu5;~Hao_Liu17", "gender": ";;", "homepage": ";https://luckyfan-cs.github.io/;https://raymondhliu.github.io/", "dblp": ";;09/3214-26", "google_scholar": ";https://scholar.google.com/citations?hl=en;", "orcid": ";;0000-0003-4271-1567", "linkedin": ";;", "or_profile": "~Zhao_Xu5;~Fan_Liu5;~Hao_Liu17", "aff": ";Hong Kong University of Science and Technology (Guangzhou);The Hong Kong University of Science and Technology (Guangzhou)", "aff_domain": ";ust.hk;hkust-gz.edu.cn", "position": ";PhD student;Assistant Professor", "bibtex": "@inproceedings{\nxu2024bag,\ntitle={Bag of Tricks: Benchmarking of Jailbreak Attacks on {LLM}s},\nauthor={Zhao Xu and Fan Liu and Hao Liu},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=yg4Tt2QeU7}\n}", "github": "", "reviewers": "DcAP;xgXx;cDmw;Duca", "pdf_size": 2021756, "rating": "6;6;7;7", "confidence": "5;3;3;4", "wc_summary_and_contributions": "56;27;103;28", "wc_strengths": "33;14;28;88", "wc_improvement": "106;5;236;18", "wc_limitations": "75;36;9;55", "wc_correctness": "1;1;18;1", "wc_clarity": "1;1;25;1", "wc_relation_to_prior_work": "1;1;38;24", "wc_documentation": "1;1;14;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "275;87;472;217", "wc_reply_reviewers": "30;26;35;7", "wc_reply_authors": "48;53;89;23", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;3;2", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "wc_summary_and_contributions_avg": [ 53.5, 30.858548248418945 ], "wc_strengths_avg": [ 40.75, 28.154706533721853 ], "wc_improvement_avg": [ 91.25, 92.16120387668555 ], "wc_limitations_avg": [ 43.75, 24.34517405975977 ], "wc_correctness_avg": [ 5.25, 7.361215932167728 ], "wc_clarity_avg": [ 7.0, 10.392304845413264 ], "wc_relation_to_prior_work_avg": [ 16.0, 15.795568998931314 ], "wc_documentation_avg": [ 4.25, 5.629165124598851 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 262.75, 138.66934592764184 ], "wc_reply_reviewers_avg": [ 24.5, 10.594810050208546 ], "wc_reply_authors_avg": [ 53.25, 23.562417108607512 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13477358374616651443&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": ";ust.hk;hkust-gz.edu.cn", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "Hong Kong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.ust.hk", "aff_unique_abbr": "HKUST", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Hong Kong SAR;Guangzhou", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Optimal Algorithms for Learning Partitions with Faulty Oracles", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93001", "id": "ygDl8q02gA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ygDl8q02gA", "openreview": "https://openreview.net/forum?id=ygDl8q02gA", "poster": "", "project": "", "author_site": "Adela DePavia, Olga Medrano Martin del Campo, Erasmo Tani", "tldr": "", "abstract": "We consider a clustering problem where a learner seeks to partition a finite set by querying a faulty oracle. This models applications where learners crowdsource information from non-expert human workers or conduct noisy experiments to determine group structure. The learner aims to exactly recover a partition by submitting queries of the form ``are $u$ and $v$ in the same group?'' for any pair of elements $u$ and $v$ in the set. Moreover, because the learner only has access to faulty sources of information, they require an error-tolerant algorithm for this task: i.e. they must fully recover the correct partition, even if up to $\\ell$ answers are incorrect, for some error-tolerance parameter $\\ell$. We study the question: for any given error-tolerance $\\ell$, what is the minimum number of queries needed to learn a finite set partition of $n$ elements into $k$ groups? We design algorithms for this task and prove that they achieve optimal query complexity. To analyze our algorithms, we first highlight a connection between this task and correlation clustering. We then use this connection to build a R\u00e9nyi-Ulam style analytical framework for this problem, which yields matching lower bounds. Our analysis also reveals an inherent asymmetry between the query complexity necessary to be robust against false negative errors as opposed to false positive errors.", "keywords": "clustering; error tolerant; partitions; query complexity; oracle advice; graph learning; active learning", "primary_area": "learning_theory", "supplementary_material": "", "author": "Adela Frances DePavia;Olga Medrano Mart\u00edn del Campo;Erasmo Tani", "authorids": "~Adela_Frances_DePavia1;~Olga_Medrano_Mart\u00edn_del_Campo1;~Erasmo_Tani1", "gender": "F;;", "homepage": "https://cam.uchicago.edu/people/profile/adela-depavia/;;", "dblp": ";;", "google_scholar": "rxMmdJYAAAAJ;;", "orcid": ";0000-0003-3850-8662;", "linkedin": ";;", "or_profile": "~Adela_Frances_DePavia1;~Olga_Medrano_Mart\u00edn_del_Campo1;~Erasmo_Tani1", "aff": "University of Chicago;University of Chicago;", "aff_domain": "uchicago.edu;uchicago.edu;", "position": "PhD student;PhD student;", "bibtex": "@inproceedings{\ndepavia2024optimal,\ntitle={Optimal Algorithms for Learning Partitions with Faulty Oracles},\nauthor={Adela Frances DePavia and Olga Medrano Mart{\\'\\i}n del Campo and Erasmo Tani},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ygDl8q02gA}\n}", "github": "", "reviewers": "GCFN;BYtw;E2Nr;2QcX", "pdf_size": 555811, "rating": "6;6;6;8", "confidence": "4;4;4;4", "soundness": "4;3;3;4", "novelty": "3;3;3;3", "presentation": "3;3;3;4", "wc_summary": "184;287;132;43", "wc_strengths": "72;101;53;108", "wc_weaknesses": "170;106;126;169", "wc_questions": "56;43;99;55", "wc_limitations": "1;4;1;8", "wc_review": "483;541;411;383", "wc_reply_reviewers": "34;99;14;14", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 161.5, 88.27372202416753 ], "wc_strengths_avg": [ 83.5, 22.18670773233379 ], "wc_weaknesses_avg": [ 142.75, 27.67105888830422 ], "wc_questions_avg": [ 63.25, 21.26470079733077 ], "wc_limitations_avg": [ 3.5, 2.8722813232690143 ], "wc_review_avg": [ 454.5, 61.84456322103019 ], "wc_reply_reviewers_avg": [ 40.25, 34.888214342382156 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:l6S8FO-X2HoJ:scholar.google.com/&scioq=Optimal+Algorithms+for+Learning+Partitions+with+Faulty+Oracles&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": "uchicago.edu;uchicago.edu;", "author_num": 3, "aff_unique_index": "0;0", "aff_unique_norm": "University of Chicago", "aff_unique_dep": "", "aff_unique_url": "https://www.uchicago.edu", "aff_unique_abbr": "UChicago", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Avoiding Undesired Future with Minimal Cost in Non-Stationary Environments", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/93000", "id": "yhd2kHHNtB", "proceeding": "", "pdf": "https://openreview.net/pdf?id=yhd2kHHNtB", "openreview": "https://openreview.net/forum?id=yhd2kHHNtB", "poster": "/media/PosterPDFs/NeurIPS%202024/93000.png?t=1731426468.0668082", "project": "", "author_site": "Wen-Bo Du, Tian Qin, Tian-Zuo Wang, Zhi-Hua Zhou", "tldr": "", "abstract": "Machine learning (ML) has achieved remarkable success in prediction tasks. In many real-world scenarios, rather than solely predicting an outcome using an ML model, the crucial concern is how to make decisions to prevent the occurrence of undesired outcomes, known as the *avoiding undesired future (AUF)* problem. To this end, a new framework called *rehearsal learning* has been proposed recently, which works effectively in stationary environments by leveraging the influence relations among variables. In real tasks, however, the environments are usually non-stationary, where the influence relations may be *dynamic*, leading to the failure of AUF by the existing method. In this paper, we introduce a novel sequential methodology that effectively updates the estimates of dynamic influence relations, which are crucial for rehearsal learning to prevent undesired outcomes in non-stationary environments. Meanwhile, we take the cost of decision actions into account and provide the formulation of AUF problem with minimal action cost under non-stationarity. We prove that in linear Gaussian cases, the problem can be transformed into the well-studied convex quadratically constrained quadratic program (QCQP). In this way, we establish the first polynomial-time rehearsal-based approach for addressing the AUF problem. Theoretical and experimental results validate the effectiveness and efficiency of our method under certain circumstances.", "keywords": "decision-making;structural rehearsal model;non-stationary environment;alteration cost", "primary_area": "probabilistic_methods", "supplementary_material": "/attachment/9b7caf1ecd079f3602cd8bc81b96064aca72974e.zip", "author": "Wen-Bo Du;Tian Qin;Tian-Zuo Wang;Zhi-Hua Zhou", "authorids": "~Wen-Bo_Du1;~Tian_Qin1;~Tian-Zuo_Wang1;~Zhi-Hua_Zhou2", "gender": "M;M;M;M", "homepage": "https://www.lamda.nju.edu.cn/duwb/;http://www.lamda.nju.edu.cn/qint/;http://www.lamda.nju.edu.cn/wangtz/;https://cs.nju.edu.cn/zhouzh/", "dblp": "35/7086-2;https://dblp.uni-trier.de/pid/133/4172;249/9504;z/ZhiHuaZhou", "google_scholar": "AQyRj3oAAAAJ;5tIqs3sAAAAJ;xUyl98AAAAAJ;https://scholar.google.com.tw/citations?user=rSVIHasAAAAJ", "orcid": ";;;0000-0003-0746-1494", "linkedin": ";;;", "or_profile": "~Wen-Bo_Du1;~Tian_Qin1;~Tian-Zuo_Wang1;~Zhi-hua_Zhou1", "aff": "Nanjing University;Nanjing University;Nanjing university;Nanjing University", "aff_domain": "nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn", "position": "PhD student;PhD student;Researcher;Full Professor", "bibtex": "@inproceedings{\ndu2024avoiding,\ntitle={Avoiding Undesired Future with Minimal Cost in Non-Stationary Environments},\nauthor={Wen-Bo Du and Tian Qin and Tian-Zuo Wang and Zhi-Hua Zhou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=yhd2kHHNtB}\n}", "github": "", "reviewers": "weAS;w1Mx;pTXc", "pdf_size": 1311231, "rating": "5;7;7", "confidence": "3;4;3", "soundness": "3;4;4", "novelty": "3;4;4", "presentation": "3;3;4", "wc_summary": "66;108;68", "wc_strengths": "27;65;117", "wc_weaknesses": "144;107;21", "wc_questions": "30;50;203", "wc_limitations": "2;2;2", "wc_review": "269;332;411", "wc_reply_reviewers": "15;15;0", "wc_reply_authors": "30;25;0", "reply_reviewers": "1;1;0", "reply_authors": "2;2;1", "rating_avg": [ 6.333333333333333, 0.9428090415820634 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 80.66666666666667, 19.344824171395878 ], "wc_strengths_avg": [ 69.66666666666667, 36.890227552685126 ], "wc_weaknesses_avg": [ 90.66666666666667, 51.525613910839425 ], "wc_questions_avg": [ 94.33333333333333, 77.27152702573497 ], "wc_limitations_avg": [ 2.0, 0.0 ], "wc_review_avg": [ 337.3333333333333, 58.09379389305616 ], "wc_reply_reviewers_avg": [ 10.0, 7.0710678118654755 ], "wc_reply_authors_avg": [ 18.333333333333332, 13.123346456686352 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.49999999999999983, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13807046422796720991&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Nanjing University", "aff_unique_dep": "", "aff_unique_url": "https://www.nju.edu.cn", "aff_unique_abbr": "Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "From Trojan Horses to Castle Walls: Unveiling Bilateral Data Poisoning Effects in Diffusion Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92999", "id": "yiXZZC5qDI", "proceeding": "", "pdf": "https://openreview.net/pdf?id=yiXZZC5qDI", "openreview": "https://openreview.net/forum?id=yiXZZC5qDI", "poster": "/media/PosterPDFs/NeurIPS%202024/92999.png?t=1729853234.7251022", "project": "", "author_site": "Zhuoshi Pan, Yuguang Yao, Gaowen Liu, Bingquan Shen, H. Vicky Zhao, Ramana Kompella, Sijia Liu", "tldr": "", "abstract": "While state-of-the-art diffusion models (DMs) excel in image generation, concerns regarding their security persist. Earlier research highlighted DMs' vulnerability to data poisoning attacks, but these studies placed stricter requirements than conventional methods like 'BadNets' in image classification. This is because the art necessitates modifications to the diffusion training and sampling procedures. Unlike the prior work, we investigate whether BadNets-like data poisoning methods can directly degrade the generation by DMs. In other words, if only the training dataset is contaminated (without manipulating the diffusion process), how will this affect the performance of learned DMs? In this setting, we uncover bilateral data poisoning effects that not only serve an adversarial purpose (compromising the functionality of DMs) but also offer a defensive advantage (which can be leveraged for defense in classification tasks against poisoning attacks). We show that a BadNets-like data poisoning attack remains effective in DMs for producing incorrect images (misaligned with the intended text conditions). Meanwhile, poisoned DMs exhibit an increased ratio of triggers, a phenomenon we refer to as 'trigger amplification', among the generated images. This insight can be then used to enhance the detection of poisoned training data. In addition, even under a low poisoning ratio, studying the poisoning effects of DMs is also valuable for designing robust image classifiers against such attacks. Last but not least, we establish a meaningful linkage between data poisoning and the phenomenon of data replications by exploring DMs' inherent data memorization tendencies. Code is available at https://github.com/OPTML-Group/BiBadDiff.", "keywords": "Diffusion model;data poisoning;data replication;diffusion classifier", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/3a0e2a13593ea6ef621c3e26114d5847789f70bf.zip", "author": "Zhuoshi Pan;Yuguang Yao;Gaowen Liu;Bingquan Shen;H. Vicky Zhao;Ramana Rao Kompella;Sijia Liu", "authorids": "~Zhuoshi_Pan2;~Yuguang_Yao1;~Gaowen_Liu4;~Bingquan_Shen1;~H._Vicky_Zhao1;~Ramana_Rao_Kompella1;~Sijia_Liu1", "gender": "M;M;F;;F;M;M", "homepage": "https://pzs19.github.io;https://www.cse.msu.edu/~yaoyugua/;;;;https://linkedin.com/en/rkompella;https://lsjxjtu.github.io/", "dblp": "359/9934;238/9467;136/1007;151/9308;;98/2327;128/6972-1", "google_scholar": ";-chIdAkAAAAJ;NIv_aeQAAAAJ;https://scholar.google.com.sg/citations?user=zrJdj6YAAAAJ;;uf9RZboAAAAJ;C7dO_UgAAAAJ", "orcid": ";;0009-0000-9194-1233;;;;", "linkedin": ";tonyyaomsu/;;;hong-vicky-zhao-08599110/;;", "or_profile": "~Zhuoshi_Pan2;~Yuguang_Yao1;~Gaowen_Liu4;~Bingquan_Shen1;~H._Vicky_Zhao1;~Ramana_Rao_Kompella1;~Sijia_Liu1", "aff": "Tsinghua University;Michigan State University;Cisco Systems;DSO National Labs;Tsinghua University;Cisco;Michigan State University", "aff_domain": "tsinghua.edu.cn;msu.edu;cisco.com;dso.org.sg;tsinghua.edu.cn;cisco.com;msu.edu", "position": "MS student;PhD student;Researcher;Researcher;Associate Professor;Researcher;Assistant Professor", "bibtex": "@inproceedings{\npan2024from,\ntitle={From Trojan Horses to Castle Walls: Unveiling Bilateral Data Poisoning Effects in Diffusion Models},\nauthor={Zhuoshi Pan and Yuguang Yao and Gaowen Liu and Bingquan Shen and H. Vicky Zhao and Ramana Rao Kompella and Sijia Liu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=yiXZZC5qDI}\n}", "github": "", "reviewers": "HqnS;jBz2;A3yL;NHzT", "pdf_size": 14890665, "rating": "6;6;6;6", "confidence": "4;4;4;3", "soundness": "2;3;3;3", "novelty": "2;3;3;3", "presentation": "1;3;3;2", "wc_summary": "113;255;92;15", "wc_strengths": "76;319;44;32", "wc_weaknesses": "494;132;16;88", "wc_questions": "141;138;35;2", "wc_limitations": "58;141;6;1", "wc_review": "882;985;193;138", "wc_reply_reviewers": "180;20;5;13", "wc_reply_authors": "115;106;25;38", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 118.75, 86.71324869937696 ], "wc_strengths_avg": [ 117.75, 117.2995630852903 ], "wc_weaknesses_avg": [ 182.5, 184.55012869136667 ], "wc_questions_avg": [ 79.0, 61.62385901580653 ], "wc_limitations_avg": [ 51.5, 56.28720991486432 ], "wc_review_avg": [ 549.5, 386.21270046439435 ], "wc_reply_reviewers_avg": [ 54.5, 72.65156570921235 ], "wc_reply_authors_avg": [ 71.0, 39.89360851063739 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8997359844608568134&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "tsinghua.edu.cn;msu.edu;cisco.com;dso.org.sg;tsinghua.edu.cn;cisco.com;msu.edu", "author_num": 7, "aff_unique_index": "0;1;2;3;0;2;1", "aff_unique_norm": "Tsinghua University;Michigan State University;Cisco Systems;DSO National Laboratories", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.msu.edu;https://www.cisco.com;https://www.dso.org.sg", "aff_unique_abbr": "THU;MSU;Cisco;DSO", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;2;0;1;1", "aff_country_unique": "China;United States;Singapore" }, { "title": "Paloma: A Benchmark for Evaluating Language Model Fit", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97430", "id": "yjj8ele147", "proceeding": "", "pdf": "https://openreview.net/pdf?id=yjj8ele147", "openreview": "https://openreview.net/forum?id=yjj8ele147", "poster": "/media/PosterPDFs/NeurIPS%202024/97430.png?t=1731719020.0417001", "project": "", "author_site": "Ian Magnusson, Akshita Bhagia, Valentin Hofmann, Luca Soldaini, Ananya Harsh Jha, Oyvind Tafjord, Dustin Schwenk, Evan Walsh, Yanai Elazar, Kyle Lo, Dirk Groeneveld, Iz Beltagy, Hannaneh Hajishirzi, Noah Smith, Kyle Richardson, Jesse Dodge", "tldr": "", "abstract": "Evaluations of language models (LMs) commonly report perplexity on monolithic data held out from training. Implicitly or explicitly, this data is composed of domains\u2014varying distributions of language. We introduce Perplexity Analysis for Language Model Assessment (Paloma), a benchmark to measure LM fit to 546 English and code domains, instead of assuming perplexity on one distribution extrapolates to others. We include two new datasets of the top 100 subreddits (e.g., r/depression on Reddit) and programming languages (e.g., Java on GitHub), both sources common in contemporary LMs. With our benchmark, we release 6 baseline 1B LMs carefully controlled to provide fair comparisons about which pretraining corpus is best and code for others to apply those controls to their own experiments. Our case studies demonstrate how the fine-grained results from Paloma surface findings such as that models pretrained without data beyond Common Crawl exhibit anomalous gaps in LM fit to many domains or that loss is dominated by the most frequently occurring strings in the vocabulary.", "keywords": "Perplexity;Language Modeling;Evaluation;Domains;Benchmark Contamination", "primary_area": "", "supplementary_material": "/attachment/316e0ea2e70d1f1588c9b1e40290b95fe2c66795.zip", "author": "Ian Magnusson;Akshita Bhagia;Valentin Hofmann;Luca Soldaini;Ananya Harsh Jha;Oyvind Tafjord;Dustin Schwenk;Evan Pete Walsh;Yanai Elazar;Kyle Lo;Dirk Groeneveld;Iz Beltagy;Hannaneh Hajishirzi;Noah A. Smith;Kyle Richardson;Jesse Dodge", "authorids": "~Ian_Magnusson1;~Akshita_Bhagia1;~Valentin_Hofmann1;~Luca_Soldaini1;~Ananya_Harsh_Jha2;~Oyvind_Tafjord2;~Dustin_Schwenk1;~Evan_Pete_Walsh1;~Yanai_Elazar1;~Kyle_Lo1;~Dirk_Groeneveld1;~Iz_Beltagy1;~Hannaneh_Hajishirzi1;~Noah_A._Smith2;~Kyle_Richardson1;~Jesse_Dodge1", "gender": ";F;;Non-Binary;M;M;;M;M;;;M;F;;M;M", "homepage": ";https://akshitab.github.io/;https://valentinhofmann.github.io/;https://soldaini.net;;;;https://github.com/epwalsh/;https://yanaiela.github.io;https://kyleclo.github.io/;;http://beltagy.net/;https://homes.cs.washington.edu/~hannaneh/;;https://www.nlp-kyle.com/;http://www.cs.cmu.edu/~jessed/", "dblp": ";321/0726;264/4665;160/1741;;178/8640;208/4259;;223/4533;220/2020;185/7781;;52/1296;;38/9169;49/11425", "google_scholar": ";fzH3_G4AAAAJ;bbHOPKwAAAAJ;3KPvwcgAAAAJ;KK_RffoAAAAJ;https://scholar.google.com/citations?hl=en;4yiNcJyuYb4C;;https://scholar.google.co.il/citations?user=7p_Ce8kAAAAJ;VJS12uMAAAAJ;KEhvGNMAAAAJ;jkV6H1gAAAAJ;LOV6_WIAAAAJ;;LmJN-n4AAAAJ;nHy_1doAAAAJ", "orcid": ";0000-0003-4848-3884;;0000-0001-6998-9863;;0000-0003-4190-5618;;;;;0000-0002-8274-768X;;;;;", "linkedin": ";;;soldni/;ananyaharshjha/;;;;yanai-elazar-7b345b95/;kylelo/;mechanicaldirk/;beltagy/;;;;", "or_profile": "~Ian_Magnusson1;~Akshita_Bhagia1;~Valentin_Hofmann1;~Luca_Soldaini1;~Ananya_Harsh_Jha2;~Oyvind_Tafjord2;~Dustin_Schwenk1;~Evan_Pete_Walsh1;~Yanai_Elazar1;~Kyle_Lo1;~Dirk_Groeneveld1;~Iz_Beltagy1;~Hannaneh_Hajishirzi1;~Noah_A._Smith2;~Kyle_Richardson1;~Jesse_Dodge1", "aff": ";Allen Institute for Artificial Intelligence;Allen Institute for Artificial Intelligence;Allen Institute for Artificial Intelligence;Allen Institute for Artificial Intelligence;Allen Institute for Artificial Intelligence;Allen Institute for Artificial Intelligence;Allen Institute for Artificial Intelligence;Department of Computer Science;Allen Institute for Artificial Intelligence;Allen Institute for Artificial Intelligence;Allen Institute for Artificial Intelligence;University of Washington;;Allen Institute for Artificial Intelligence;Allen Institute for Artificial Intelligence", "aff_domain": ";allenai.org;allenai.org;allenai.org;allenai.org;allenai.org;allenai.org;allenai.org;cs.washington.edu;allenai.org;allenai.org;allenai.org;uw.edu;;allenai.org;allenai.org", "position": ";Researcher;Postdoc;Researcher;Researcher;Researcher;Researcher;Researcher;Postdoc;Researcher;Principal Researcher;Research Scientist;Associate Professor;;Research Scientist;Researcher", "bibtex": "@inproceedings{\nmagnusson2024paloma,\ntitle={Paloma: A Benchmark for Evaluating Language Model Fit},\nauthor={Ian Magnusson and Akshita Bhagia and Valentin Hofmann and Luca Soldaini and Ananya Harsh Jha and Oyvind Tafjord and Dustin Schwenk and Evan Pete Walsh and Yanai Elazar and Kyle Lo and Dirk Groeneveld and Iz Beltagy and Hannaneh Hajishirzi and Noah A. Smith and Kyle Richardson and Jesse Dodge},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=yjj8ele147}\n}", "github": "", "reviewers": "Camw;5GQQ;zXDY;kc9d", "pdf_size": 4768680, "rating": "6;7;7;8", "confidence": "3;4;4;3", "wc_summary_and_contributions": "66;85;88;118", "wc_strengths": "17;79;2;2", "wc_improvement": "92;240;2;2", "wc_limitations": "1;30;1;7", "wc_correctness": "1;45;1;2", "wc_clarity": "18;8;1;1", "wc_relation_to_prior_work": "1;46;1;1", "wc_documentation": "1;10;1;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "198;544;98;135", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "857;530;241;547", "reply_reviewers": "0;0;0;0", "reply_authors": "2;1;1;1", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 3.5, 0.5 ], "wc_summary_and_contributions_avg": [ 89.25, 18.619546181365433 ], "wc_strengths_avg": [ 25.0, 31.77262973063451 ], "wc_improvement_avg": [ 84.0, 97.27281223445738 ], "wc_limitations_avg": [ 9.75, 11.94518731540029 ], "wc_correctness_avg": [ 12.25, 18.91262805640718 ], "wc_clarity_avg": [ 7.0, 6.96419413859206 ], "wc_relation_to_prior_work_avg": [ 12.25, 19.48557158514987 ], "wc_documentation_avg": [ 3.25, 3.897114317029974 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 243.75, 176.99770478737852 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 543.75, 217.9350533989427 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 16, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12341219371612362640&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": ";allenai.org;allenai.org;allenai.org;allenai.org;allenai.org;allenai.org;allenai.org;cs.washington.edu;allenai.org;allenai.org;allenai.org;uw.edu;;allenai.org;allenai.org", "author_num": 16, "aff_unique_index": "0;0;0;0;0;0;0;1;0;0;0;2;0;0", "aff_unique_norm": "Allen Institute for Artificial Intelligence;Unknown Institution;University of Washington", "aff_unique_dep": ";Department of Computer Science;", "aff_unique_url": "https://allenai.org;;https://www.washington.edu", "aff_unique_abbr": "AI2;;UW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States;" }, { "title": "Controlling Continuous Relaxation for Combinatorial Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92998", "id": "ykACV1IhjD", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ykACV1IhjD", "openreview": "https://openreview.net/forum?id=ykACV1IhjD", "poster": "/media/PosterPDFs/NeurIPS%202024/92998.png?t=1731401464.0384605", "project": "", "tldr": "", "abstract": "Unsupervised learning (UL)-based solvers for combinatorial optimization (CO) train a neural network that generates a soft solution by directly optimizing the CO objective using a continuous relaxation strategy. These solvers offer several advantages over traditional methods and other learning-based methods, particularly for large-scale CO problems. However, UL-based solvers face two practical issues: (I) an optimization issue, where UL-based solvers are easily trapped at local optima, and (II) a rounding issue, where UL-based solvers require artificial post-learning rounding from the continuous space back to the original discrete space, undermining the robustness of the results. This study proposes a Continuous Relaxation Annealing (CRA) strategy, an effective rounding-free learning method for UL-based solvers. CRA introduces a penalty term that dynamically shifts from prioritizing continuous solutions, effectively smoothing the non-convexity of the objective function, to enforcing discreteness, eliminating artificial rounding. Experimental results demonstrate that CRA significantly enhances the performance of UL-based solvers, outperforming existing UL-based solvers and greedy algorithms in complex CO problems. Additionally, CRA effectively eliminates artificial rounding and accelerates the learning process.", "keywords": "Combinatorial Optimization;Unsupervised Learning for Combinatorial Optimization;Learning for Combinatorial Optimization;Graph Neual Networks", "primary_area": "optimization", "supplementary_material": "/attachment/ecb0ebc609709640d4a1077a6a77cec8af865aad.zip", "author": "Yuma Ichikawa", "authorids": "~Yuma_Ichikawa1", "gender": "M", "homepage": "https://ichikawa-laboratory.com/", "dblp": "334/5344", "google_scholar": "IJIMbBwAAAAJ", "orcid": "0009-0004-4216-7017", "linkedin": "ichikawayuma1111/", "or_profile": "~Yuma_Ichikawa1", "aff": "The University of Tokyo, Tokyo Institute of Technology", "aff_domain": "u-tokyo.ac.jp", "position": "PhD student", "bibtex": "@inproceedings{\nichikawa2024controlling,\ntitle={Controlling Continuous Relaxation for Combinatorial Optimization},\nauthor={Yuma Ichikawa},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ykACV1IhjD}\n}", "github": "", "reviewers": "qMFs;fKtw;J6G3;BuGW", "pdf_size": 1889298, "rating": "4;4;6;6", "confidence": "4;1;4;4", "soundness": "3;2;4;2", "novelty": "2;2;2;3", "presentation": "3;1;3;2", "wc_summary": "67;61;32;47", "wc_strengths": "55;22;19;67", "wc_weaknesses": "110;153;53;672", "wc_questions": "1;83;101;2", "wc_limitations": "1;1;2;2", "wc_review": "234;320;207;790", "wc_reply_reviewers": "403;0;58;586", "wc_reply_authors": "1203;0;352;646", "reply_reviewers": "2;0;1;2", "reply_authors": "3;1;2;2", "rating_avg": [ 5.0, 1.0 ], "confidence_avg": [ 3.25, 1.299038105676658 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 51.75, 13.516193990913271 ], "wc_strengths_avg": [ 40.75, 20.716840975399702 ], "wc_weaknesses_avg": [ 247.0, 247.92438363339738 ], "wc_questions_avg": [ 46.75, 45.6966902521397 ], "wc_limitations_avg": [ 1.5, 0.5 ], "wc_review_avg": [ 387.75, 235.95802063078932 ], "wc_reply_reviewers_avg": [ 261.75, 242.44419460981118 ], "wc_reply_authors_avg": [ 550.25, 440.83124605681024 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": 0.5773502691896258, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12869485256444550301&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "u-tokyo.ac.jp", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "University of Tokyo", "aff_unique_dep": "", "aff_unique_url": "https://www.u-tokyo.ac.jp", "aff_unique_abbr": "UTokyo", "aff_campus_unique_index": "0", "aff_campus_unique": "Tokyo", "aff_country_unique_index": "0", "aff_country_unique": "Japan" }, { "title": "CemiFace: Center-based Semi-hard Synthetic Face Generation for Face Recognition", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92997", "id": "ykQnxko1cJ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ykQnxko1cJ", "openreview": "https://openreview.net/forum?id=ykQnxko1cJ", "poster": "/media/PosterPDFs/NeurIPS%202024/92997.png?t=1731684743.6566968", "project": "", "author_site": "Zhonglin Sun, Siyang Song, Ioannis Patras, Georgios Tzimiropoulos", "tldr": "", "abstract": "Privacy issue is a main concern in developing face recognition techniques. Although synthetic face images can partially mitigate potential legal risks while maintaining effective face recognition (FR) performance, FR models trained by face images synthesized by existing generative approaches frequently suffer from performance degradation problems due to the insufficient discriminative quality of these synthesized samples. In this paper, we systematically investigate what contributes to solid face recognition model training, and reveal that face images with certain degree of similarities to their identity centers show great effectiveness in the performance of trained FR models. Inspired by this, we propose a novel diffusion-based approach (namely **Ce**nter-based Se**mi**-hard Synthetic Face\nGeneration (**CemiFace**) which produces facial samples with various levels of similarity to the subject center, thus allowing to generate face datasets containing effective discriminative samples for training face recognition. Experimental results show that with a modest degree of similarity, training on the generated dataset can produce competitive performance compared to previous generation methods. The code will be available at:https://github.com/szlbiubiubiu/CemiFace", "keywords": "synthetic face recognition;diffusion models;center-based semi-hard", "primary_area": "machine_vision", "supplementary_material": "/attachment/db17dfcbdd7ec13ce977fe3001b4a83cfea3d44d.zip", "author": "Zhonglin Sun;Siyang Song;Ioannis Patras;Georgios Tzimiropoulos", "authorids": "~Zhonglin_Sun1;~Siyang_Song1;~Ioannis_Patras2;~Georgios_Tzimiropoulos1", "gender": "M;M;M;M", "homepage": ";https://www.cst.cam.ac.uk/people/ss2796;http://www.eecs.qmul.ac.uk/~ioannisp/;https://ytzimiro.github.io/", "dblp": "04/8376;220/3096.html;18/1556;03/3273", "google_scholar": "-w2Y9dQAAAAJ;ZKSL1IcAAAAJ;https://scholar.google.com.tw/citations?user=OBYLxRkAAAAJ;https://scholar.google.co.uk/citations?user=D4JkWxf-8fwC", "orcid": ";0000-0003-2339-5685;0000-0003-3913-4738;", "linkedin": ";siyang-song-7a814412b/;ioannis-patras-1053767/;", "or_profile": "~Zhonglin_Sun1;~Siyang_Song1;~Ioannis_Patras2;~Georgios_Tzimiropoulos1", "aff": "Queen Mary university of London;University of Leicester;Queen Mary, University of London;Queen Mary University London", "aff_domain": "qmul.ac.uk;leicester.ac.uk;qmul.ac.uk;qmul.ac.uk", "position": "PhD student;Assistant Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nsun2024cemiface,\ntitle={CemiFace: Center-based Semi-hard Synthetic Face Generation for Face Recognition},\nauthor={Zhonglin Sun and Siyang Song and Ioannis Patras and Georgios Tzimiropoulos},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ykQnxko1cJ}\n}", "github": "", "reviewers": "5xKY;L3p9;fJPL;tciQ;Crof", "pdf_size": 1823848, "rating": "5;5;5;5;9", "confidence": "4;4;4;4;5", "soundness": "2;3;3;3;4", "novelty": "3;3;2;3;4", "presentation": "2;3;3;2;4", "wc_summary": "128;117;73;82;198", "wc_strengths": "58;53;13;50;218", "wc_weaknesses": "242;95;106;548;78", "wc_questions": "2;3;20;38;5", "wc_limitations": "7;3;18;257;45", "wc_review": "437;271;230;975;544", "wc_reply_reviewers": "12;11;36;44;50", "wc_reply_authors": "35;39;57;114;39", "reply_reviewers": "1;1;2;1;1", "reply_authors": "2;2;2;3;2", "rating_avg": [ 5.8, 1.6 ], "confidence_avg": [ 4.2, 0.39999999999999997 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 3.0, 0.6324555320336759 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 119.6, 44.29266304931326 ], "wc_strengths_avg": [ 78.4, 71.60055865703842 ], "wc_weaknesses_avg": [ 213.8, 177.00892632858944 ], "wc_questions_avg": [ 13.6, 13.836184445142381 ], "wc_limitations_avg": [ 66.0, 96.61883874276279 ], "wc_review_avg": [ 491.4, 267.02104785952736 ], "wc_reply_reviewers_avg": [ 30.6, 16.218507946170636 ], "wc_reply_authors_avg": [ 56.8, 29.6 ], "reply_reviewers_avg": [ 1.2, 0.4 ], "reply_authors_avg": [ 2.2, 0.39999999999999997 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8370310685290737621&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 6, "email": "qmul.ac.uk;leicester.ac.uk;qmul.ac.uk;qmul.ac.uk", "author_num": 4, "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Queen Mary University of London;University of Leicester;Queen Mary, University of London", "aff_unique_dep": ";;", "aff_unique_url": "https://www.qmul.ac.uk;https://www.leicester.ac.uk;https://www.qmul.ac.uk", "aff_unique_abbr": "QMUL;Leicester;QMUL", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "London;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Towards Flexible 3D Perception: Object-Centric Occupancy Completion Augments 3D Object Detection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92996", "id": "yktQNqtepd", "proceeding": "", "pdf": "https://openreview.net/pdf?id=yktQNqtepd", "openreview": "https://openreview.net/forum?id=yktQNqtepd", "poster": "/media/PosterPDFs/NeurIPS%202024/92996.png?t=1731514203.6870816", "project": "", "author_site": "Chaoda Zheng, Feng Wang, Naiyan Wang, Shuguang Cui, Zhen Li", "tldr": "", "abstract": "While 3D object bounding box (bbox) representation has been widely used in autonomous driving perception, it lacks the ability to capture the precise details of an object's intrinsic geometry. Recently, occupancy has emerged as a promising alternative for 3D scene perception. However, constructing a high-resolution occupancy map remains infeasible for large scenes due to computational constraints. Recognizing that foreground objects only occupy a small portion of the scene, we introduce object-centric occupancy as a supplement to object bboxes. This representation not only provides intricate details for detected objects but also enables higher voxel resolution in practical applications. We advance the development of object-centric occupancy perception from both data and algorithm perspectives. On the data side, we construct the first object-centric occupancy dataset from scratch using an automated pipeline. From the algorithmic standpoint, we introduce a novel object-centric occupancy completion network equipped with an implicit shape decoder that manages dynamic-size occupancy generation. This network accurately predicts the complete object-centric occupancy volume for inaccurate object proposals by leveraging temporal information from long sequences. Our method demonstrates robust performance in completing object shapes under noisy detection and tracking conditions. Additionally, we show that our occupancy features significantly enhance the detection results of state-of-the-art 3D object detectors, especially for incomplete or distant objects in the Waymo Open Dataset.", "keywords": "Object Centric;Occupancy;LiDAR;Detection;Long Sequence", "primary_area": "machine_vision", "supplementary_material": "", "author": "Chaoda Zheng;Feng Wang;Naiyan Wang;Shuguang Cui;Zhen Li", "authorids": "~Chaoda_Zheng1;~Feng_Wang1;~Naiyan_Wang1;~Shuguang_Cui1;~Zhen_Li6", "gender": "M;M;M;M;M", "homepage": ";http://happynear.wang/;http://winsty.net;https://sse.cuhk.edu.cn/en/content/1415;https://mypage.cuhk.edu.cn/academics/lizhen/", "dblp": "247/8254;90/4225-15;31/9922;48/4914;74/2397-26", "google_scholar": "3YuWG1QAAAAJ;GKGSZUoAAAAJ;yAWtq6QAAAAJ;https://scholar.google.com.hk/citations?user=1o_qvR0AAAAJ;https://scholar.google.com.hk/citations?user=0TTt3QsAAAAJ", "orcid": ";;;0000-0003-2608-775X;0000-0002-7669-2686", "linkedin": ";;;;", "or_profile": "~Chaoda_Zheng1;~Feng_Wang1;~Naiyan_Wang1;~Shuguang_Cui1;~Zhen_LI_Jason1", "aff": "The Chinese University of Hong Kong, Shenzhen;TuSimple;Tusimple;The Chinese University of Hong Kong, Shenzhen;The Chinese University of Hong Kong, Shenzhen", "aff_domain": "cuhk.edu.cn;tusimple.com;tusimple.ai;cuhk.edu.cn;edu.cn", "position": "PhD student;Researcher;Chief Scientist;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nzheng2024towards,\ntitle={Towards Flexible 3D Perception: Object-Centric Occupancy Completion Augments 3D Object Detection},\nauthor={Chaoda Zheng and Feng Wang and Naiyan Wang and Shuguang Cui and Zhen Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=yktQNqtepd}\n}", "github": "", "reviewers": "aQAQ;nYQd;GRiU;fcuc", "pdf_size": 4102076, "rating": "5;6;6;6", "confidence": "4;4;5;4", "soundness": "2;2;4;3", "novelty": "2;3;3;3", "presentation": "3;3;4;3", "wc_summary": "46;81;70;106", "wc_strengths": "25;42;84;123", "wc_weaknesses": "84;140;76;125", "wc_questions": "2;42;32;26", "wc_limitations": "29;6;1;19", "wc_review": "186;311;263;399", "wc_reply_reviewers": "22;23;0;172", "wc_reply_authors": "0;0;0;261", "reply_reviewers": "1;1;0;3", "reply_authors": "1;1;1;3", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 75.75, 21.568205766822608 ], "wc_strengths_avg": [ 68.5, 38.095275297600885 ], "wc_weaknesses_avg": [ 106.25, 26.929305598176867 ], "wc_questions_avg": [ 25.5, 14.722431864335457 ], "wc_limitations_avg": [ 13.75, 10.985786271359915 ], "wc_review_avg": [ 289.75, 77.24433636196248 ], "wc_reply_reviewers_avg": [ 54.25, 68.60165814322566 ], "wc_reply_authors_avg": [ 65.25, 113.01631519386925 ], "reply_reviewers_avg": [ 1.25, 1.0897247358851685 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1279678784680885519&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "cuhk.edu.cn;tusimple.com;tusimple.ai;cuhk.edu.cn;edu.cn", "author_num": 5, "aff_unique_index": "0;1;1;0;0", "aff_unique_norm": "Chinese University of Hong Kong;TuSimple", "aff_unique_dep": ";", "aff_unique_url": "https://www.cuhk.edu.cn;https://www.tusimple.com", "aff_unique_abbr": "CUHK;TuSimple", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Shenzhen;", "aff_country_unique_index": "0;1;1;0;0", "aff_country_unique": "China;United States" }, { "title": "Fair Wasserstein Coresets", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92995", "id": "ylceJ2xIw5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ylceJ2xIw5", "openreview": "https://openreview.net/forum?id=ylceJ2xIw5", "poster": "/media/PosterPDFs/NeurIPS%202024/92995.png?t=1730150194.7940922", "project": "", "author_site": "Zikai Xiong, Niccolo Dalmasso, Shubham Sharma, Freddy Lecue, Daniele Magazzeni, Vamsi Potluru, Tucker Balch, Manuela Veloso", "tldr": "", "abstract": "Data distillation and coresets have emerged as popular approaches to generate a smaller representative set of samples for downstream learning tasks to handle large-scale datasets. At the same time, machine learning is being increasingly applied to decision-making processes at a societal level, making it imperative for modelers to address inherent biases towards subgroups present in the data. While current approaches focus on creating fair synthetic representative samples by optimizing local properties relative to the original samples, their impact on downstream learning processes has yet to be explored. In this work, we present fair Wasserstein coresets ($\\texttt{FWC}$), a novel coreset approach which generates fair synthetic representative samples along with sample-level weights to be used in downstream learning tasks. $\\texttt{FWC}$ uses an efficient majority minimization algorithm to minimize the Wasserstein distance between the original dataset and the weighted synthetic samples while enforcing demographic parity. We show that an unconstrained version of $\\texttt{FWC}$ is equivalent to Lloyd's algorithm for k-medians and k-means clustering. Experiments conducted on both synthetic and real datasets show that $\\texttt{FWC}$: (i) achieves a competitive fairness-performance tradeoff in downstream models compared to existing approaches, (ii) improves downstream fairness when added to the existing training data and (iii) can be used to reduce biases in predictions from large language models (GPT-3.5 and GPT-4).", "keywords": "Algorithmic Fairness;Nonconvex Optimization;Coresets", "primary_area": "fairness", "supplementary_material": "", "author": "Zikai Xiong;Niccolo Dalmasso;Shubham Sharma;Freddy Lecue;Daniele Magazzeni;Vamsi K. Potluru;Tucker Balch;Manuela Veloso", "authorids": "~Zikai_Xiong1;~Niccolo_Dalmasso1;~Shubham_Sharma4;~Freddy_Lecue1;~Daniele_Magazzeni1;~Vamsi_K._Potluru1;~Tucker_Balch2;~Manuela_Veloso1", "gender": "M;M;M;;M;Not Specified;M;F", "homepage": "https://zikaixiong.github.io/;https://www.niccolodalmasso.com/;;http://www-sop.inria.fr/members/Freddy.Lecue/;https://nms.kcl.ac.uk/daniele.magazzeni/;;;https://www.cs.cmu.edu/~mmv/", "dblp": "255/6961;259/3135;;02/3657.html;14/4672;21/4837;;v/ManuelaMVeloso", "google_scholar": "oejoCFAAAAAJ;hFh64VcAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.ca/citations?user=GLByS4gAAAAJ;;uC_8kekAAAAJ;jM1cT4QAAAAJ;https://scholar.google.com.tw/citations?user=2FbkAzYAAAAJ", "orcid": "0000-0003-3025-7846;0000-0002-8121-2720;;;;;0000-0002-5148-2033;", "linkedin": "zikai-xiong/;;;freddylecue/;;;;", "or_profile": "~Zikai_Xiong1;~Niccolo_Dalmasso1;~Shubham_Sharma4;~Freddy_Lecue1;~Daniele_Magazzeni1;~Vamsi_K._Potluru1;~Tucker_Balch2;~Manuela_Veloso1", "aff": "Massachusetts Institute of Technology;J.P. Morgan Chase;J.P. Morgan Chase;INRIA;;J.P. Morgan Chase;J.P. Morgan Chase;School of Computer Science, Carnegie Mellon University", "aff_domain": "mit.edu;jpmorgan.com;jpmorgan.com;inria.fr;;jpmorgan.com;jpmorgan.com;cs.cmu.edu", "position": "PhD student;Researcher;Researcher;Full Professor;;Researcher;Managing Director;Full Professor", "bibtex": "@inproceedings{\nxiong2024fair,\ntitle={Fair Wasserstein Coresets},\nauthor={Zikai Xiong and Niccolo Dalmasso and Shubham Sharma and Freddy Lecue and Daniele Magazzeni and Vamsi K. Potluru and Tucker Balch and Manuela Veloso},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ylceJ2xIw5}\n}", "github": "", "reviewers": "4tbZ;cAaY;xAdA;MJ1C", "pdf_size": 1032312, "rating": "6;6;6;7", "confidence": "3;3;4;4", "soundness": "3;2;3;4", "novelty": "3;2;3;3", "presentation": "3;3;3;4", "wc_summary": "73;104;257;73", "wc_strengths": "16;105;36;36", "wc_weaknesses": "263;229;24;83", "wc_questions": "24;2;52;2", "wc_limitations": "7;2;1;5", "wc_review": "383;442;370;199", "wc_reply_reviewers": "38;14;14;9", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 126.75, 76.25737669235679 ], "wc_strengths_avg": [ 48.25, 33.766662553471285 ], "wc_weaknesses_avg": [ 149.75, 99.21535919402803 ], "wc_questions_avg": [ 20.0, 20.54263858417414 ], "wc_limitations_avg": [ 3.75, 2.384848003542364 ], "wc_review_avg": [ 348.5, 90.47789785356422 ], "wc_reply_reviewers_avg": [ 18.75, 11.299889379989523 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14074688286571596774&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "mit.edu;jpmorgan.com;jpmorgan.com;inria.fr;;jpmorgan.com;jpmorgan.com;cs.cmu.edu", "author_num": 8, "aff_unique_index": "0;1;1;2;1;1;3", "aff_unique_norm": "Massachusetts Institute of Technology;JPMorgan Chase & Co.;INRIA;Carnegie Mellon University", "aff_unique_dep": ";;;School of Computer Science", "aff_unique_url": "https://web.mit.edu;https://www.jpmorganchase.com;https://www.inria.fr;https://www.cmu.edu", "aff_unique_abbr": "MIT;JPM;INRIA;CMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pittsburgh", "aff_country_unique_index": "0;0;0;1;0;0;0", "aff_country_unique": "United States;France" }, { "title": "Information-theoretic Generalization Analysis for Expected Calibration Error", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92994", "id": "yltJAlwtW9", "proceeding": "", "pdf": "https://openreview.net/pdf?id=yltJAlwtW9", "openreview": "https://openreview.net/forum?id=yltJAlwtW9", "poster": "/media/PosterPDFs/NeurIPS%202024/92994.png?t=1733376914.9670393", "project": "", "author_site": "Futoshi Futami, Masahiro Fujisawa", "tldr": "", "abstract": "While the expected calibration error (ECE), which employs binning, is widely adopted to evaluate the calibration performance of machine learning models, theoretical understanding of its estimation bias is limited. In this paper, we present the first comprehensive analysis of the estimation bias in the two common binning strategies, uniform mass and uniform width binning.\nOur analysis establishes upper bounds on the bias, achieving an improved convergence rate. Moreover, our bounds reveal, for the first time, the optimal number of bins to minimize the estimation bias. We further extend our bias analysis to generalization error analysis based on the information-theoretic approach, deriving upper bounds that enable the numerical evaluation of how small the ECE is for unknown data. Experiments using deep learning models show that our bounds are nonvacuous thanks to this information-theoretic generalization analysis approach.", "keywords": "information thery;information-theoretic generalization error analysis;generalization error;expected calibration error;calibration error;binning", "primary_area": "learning_theory", "supplementary_material": "/attachment/b2543ac5e3637d8a5c66cd924fec0a9177dcec00.zip", "author": "Futoshi Futami;Masahiro Fujisawa", "authorids": "~Futoshi_Futami1;~Masahiro_Fujisawa1", "gender": "M;M", "homepage": ";https://msfuji0211.github.io/", "dblp": "209/4960;236/6307", "google_scholar": "https://scholar.google.co.jp/citations?user=WTOG0mMAAAAJ;gS24jX8AAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Futoshi_Futami1;~Masahiro_Fujisawa1", "aff": "Osaka University;RIKEN", "aff_domain": "osaka-u.ac.jp;riken.jp", "position": "Lecturer;Special Postdoctoral Researcher", "bibtex": "@inproceedings{\nfutami2024informationtheoretic,\ntitle={Information-theoretic Generalization Analysis for Expected Calibration Error},\nauthor={Futoshi Futami and Masahiro Fujisawa},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=yltJAlwtW9}\n}", "github": "", "reviewers": "kktU;8W41;Ckaf;9r8F", "pdf_size": 1065616, "rating": "5;6;6;7", "confidence": "4;4;4;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "80;66;29;103", "wc_strengths": "30;59;90;30", "wc_weaknesses": "54;187;27;221", "wc_questions": "358;59;29;105", "wc_limitations": "10;1;8;8", "wc_review": "532;372;183;467", "wc_reply_reviewers": "113;13;10;70", "wc_reply_authors": "160;43;34;68", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 69.5, 26.85609800399157 ], "wc_strengths_avg": [ 52.25, 24.80297361204902 ], "wc_weaknesses_avg": [ 122.25, 83.17864810154106 ], "wc_questions_avg": [ 137.75, 130.01033612755563 ], "wc_limitations_avg": [ 6.75, 3.418698582794336 ], "wc_review_avg": [ 388.5, 131.58362360111533 ], "wc_reply_reviewers_avg": [ 51.5, 42.80478945164898 ], "wc_reply_authors_avg": [ 76.25, 49.93182852650201 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:MsmgRO9Li2MJ:scholar.google.com/&scioq=Information-theoretic+Generalization+Analysis+for+Expected+Calibration+Error&hl=en&as_sdt=0,14", "gs_version_total": 4, "email": "osaka-u.ac.jp;riken.jp", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Osaka University;RIKEN", "aff_unique_dep": ";", "aff_unique_url": "https://www.osaka-u.ac.jp;https://www.riken.jp", "aff_unique_abbr": "Osaka U;RIKEN", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Japan" }, { "title": "ReGS: Reference-based Controllable Scene Stylization with Gaussian Splatting", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92993", "id": "ynJr0RW6FR", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ynJr0RW6FR", "openreview": "https://openreview.net/forum?id=ynJr0RW6FR", "poster": "/media/PosterPDFs/NeurIPS%202024/92993.png?t=1731729330.912768", "project": "", "author_site": "Yiqun Mei, Jiacong Xu, Vishal Patel", "tldr": "", "abstract": "Referenced-based scene stylization that edits the appearance based on a content-aligned reference image is an emerging research area. Starting with a pretrained neural radiance field (NeRF), existing methods typically learn a novel appearance that matches the given style. Despite their effectiveness, they inherently suffer from time-consuming volume rendering, and thus are impractical for many real-time applications. In this work, we propose ReGS, which adapts 3D Gaussian Splatting (3DGS) for reference-based stylization to enable real-time stylized view synthesis. Editing the appearance of a pretrained 3DGS is challenging as it uses discrete Gaussians as 3D representation, which tightly bind appearance with geometry. Simply optimizing the appearance as prior methods do is often insufficient for modeling continuous textures in the given reference image. To address this challenge, we propose a novel texture-guided control mechanism that adaptively adjusts local responsible Gaussians to a new geometric arrangement, serving for desired texture details. The proposed process is guided by texture clues for effective appearance editing, and regularized by scene depth for preserving original geometric structure. With these novel designs, we show ReGs can produce state-of-the-art stylization results that respect the reference texture while embracing real-time rendering speed for free-view navigation.", "keywords": "Gaussian Splatting;Appearance Editing", "primary_area": "machine_vision", "supplementary_material": "/attachment/03d46a6680a888891cb3451bb08578c96fbcc074.zip", "author": "Yiqun Mei;Jiacong Xu;Vishal M. Patel", "authorids": "~Yiqun_Mei1;~Jiacong_Xu1;~Vishal_M._Patel1", "gender": "M;M;M", "homepage": "https://yiqunmei.net;https://xujiacong.github.io;https://engineering.jhu.edu/vpatel36/", "dblp": "263/9774;322/0053;76/6100", "google_scholar": "TvB8_rAAAAAJ;PgTi9kIAAAAJ;AkEXTbIAAAAJ", "orcid": ";0000-0003-1141-9168;", "linkedin": ";;", "or_profile": "~Yiqun_Mei1;~Jiacong_Xu1;~Vishal_Patel2", "aff": "Netflix Eyeline Studios| ScanlineVFX;Honda Research Institute;Johns Hopkins University", "aff_domain": "scanlinevfx.com;honda-ri.de;jhu.edu", "position": "Intern;Intern;Assistant Professor", "bibtex": "@inproceedings{\nmei2024regs,\ntitle={Re{GS}: Reference-based Controllable Scene Stylization with Gaussian Splatting},\nauthor={Yiqun Mei and Jiacong Xu and Vishal M. Patel},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ynJr0RW6FR}\n}", "github": "", "reviewers": "JWsE;3qBC;hmWz;TJDG;BehR", "pdf_size": 24348339, "rating": "5;5;6;6;7", "confidence": "3;4;4;4;4", "soundness": "2;3;3;3;3", "novelty": "2;3;2;3;3", "presentation": "3;3;1;2;3", "wc_summary": "88;80;117;66;64", "wc_strengths": "77;66;51;43;119", "wc_weaknesses": "74;229;423;57;281", "wc_questions": "42;5;124;36;196", "wc_limitations": "14;8;9;7;18", "wc_review": "295;388;724;209;678", "wc_reply_reviewers": "68;55;78;12;102", "wc_reply_authors": "31;44;33;44;26", "reply_reviewers": "1;1;1;1;1", "reply_authors": "2;2;2;2;2", "rating_avg": [ 5.8, 0.7483314773547882 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.4, 0.8 ], "wc_summary_avg": [ 83.0, 19.183326093250876 ], "wc_strengths_avg": [ 71.2, 26.64132128855474 ], "wc_weaknesses_avg": [ 212.8, 136.1152452886891 ], "wc_questions_avg": [ 80.6, 69.85012526832003 ], "wc_limitations_avg": [ 11.2, 4.166533331199932 ], "wc_review_avg": [ 458.8, 206.21483942723424 ], "wc_reply_reviewers_avg": [ 63.0, 29.785902705810344 ], "wc_reply_authors_avg": [ 35.6, 7.227724399837061 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.5345224838248487, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14863858026548689285&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 2, "email": "scanlinevfx.com;honda-ri.de;jhu.edu", "author_num": 3, "aff_unique_index": "0;1;2", "aff_unique_norm": "Netflix;Honda Research Institute;Johns Hopkins University", "aff_unique_dep": "Eyeline Studios;;", "aff_unique_url": "https://www.netflix.com;https://www.honda-ri.com;https://www.jhu.edu", "aff_unique_abbr": "Netflix;HRI;JHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;Japan" }, { "title": "PGN: The RNN's New Successor is Effective for Long-Range Time Series Forecasting", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92992", "id": "ypEamFKu2O", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ypEamFKu2O", "openreview": "https://openreview.net/forum?id=ypEamFKu2O", "poster": "/media/PosterPDFs/NeurIPS%202024/92992.png?t=1731751112.5693805", "project": "", "author_site": "Yuxin Jia, Youfang Lin, Jing Yu, Shuo Wang, Tianhao Liu, Huaiyu Wan", "tldr": "", "abstract": "Due to the recurrent structure of RNN, the long information propagation path poses limitations in capturing long-term dependencies, gradient explosion/vanishing issues, and inefficient sequential execution. Based on this, we propose a novel paradigm called Parallel Gated Network (PGN) as the new successor to RNN. PGN directly captures information from previous time steps through the designed Historical Information Extraction (HIE) layer and leverages gated mechanisms to select and fuse it with the current time step information. This reduces the information propagation path to $\\mathcal{O}(1)$, effectively addressing the limitations of RNN. To enhance PGN's performance in long-range time series forecasting tasks, we propose a novel temporal modeling framework called Temporal PGN (TPGN). TPGN incorporates two branches to comprehensively capture the semantic information of time series. One branch utilizes PGN to capture long-term periodic patterns while preserving their local characteristics. The other branch employs patches to capture short-term information and aggregate the global representation of the series. TPGN achieves a theoretical complexity of $\\mathcal{O}(\\sqrt{L})$, ensuring efficiency in its operations. Experimental results on five benchmark datasets demonstrate the state-of-the-art (SOTA) performance and high efficiency of TPGN, further confirming the effectiveness of PGN as the new successor to RNN in long-range time series forecasting. The code is available in this repository: https://github.com/Water2sea/TPGN.", "keywords": "information propagation paths;the RNN's new successor;long-range time series forecasting;comprehensive semantic information", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Yuxin Jia;Youfang Lin;Jing Yu;Shuo Wang;Tianhao Liu;Huaiyu Wan", "authorids": "~Yuxin_Jia1;~Youfang_Lin1;~Jing_Yu8;~Shuo_Wang17;~Tianhao_Liu2;~Huaiyu_Wan1", "gender": ";M;F;M;;M", "homepage": ";https://faculty.bjtu.edu.cn/7443/;;;;https://faculty.bjtu.edu.cn/8793/", "dblp": ";12/4988;;;226/9361;07/9988", "google_scholar": ";e8xT-e0AAAAJ;;;;T5wVWIUAAAAJ", "orcid": "0009-0006-8031-334X;0000-0002-5143-3645;;0000-0001-6599-3638;0009-0008-3032-2488;0000-0002-0501-9363", "linkedin": ";youfang-lin-a1625091/;yu-jing-7b31b5307/;;;", "or_profile": "~Yuxin_Jia1;~Youfang_Lin1;~Jing_Yu8;~Shuo_Wang17;~Tianhao_Liu2;~Huaiyu_Wan1", "aff": "Beijing Jiaotong University;Beijing Jiaotong University;Beijing Jiaotong University;Beijing Jiaotong University;Beijing Jiaotong University;Beijing Jiaotong University", "aff_domain": "bjtu.edu.cn;bjtu.edu.cn;bjtu.edu.cn;bjtu.edu.cn;bjtu.edu.cn;bjtu.edu.cn", "position": "PhD student;Full Professor;MS student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\njia2024pgn,\ntitle={{PGN}: The {RNN}'s New Successor is Effective for Long-Range Time Series Forecasting},\nauthor={Yuxin Jia and Youfang Lin and Jing Yu and Shuo Wang and Tianhao Liu and Huaiyu Wan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ypEamFKu2O}\n}", "github": "", "reviewers": "fqFX;Bhsw;t9pC;K1qz", "pdf_size": 3240222, "rating": "5;6;7;7", "confidence": "4;4;4;5", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "4;2;3;3", "wc_summary": "25;56;81;60", "wc_strengths": "29;15;128;122", "wc_weaknesses": "21;337;92;56", "wc_questions": "224;2;44;15", "wc_limitations": "1;50;1;11", "wc_review": "300;460;346;264", "wc_reply_reviewers": "19;39;13;21", "wc_reply_authors": "79;30;7;8", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 55.5, 20.006249023742555 ], "wc_strengths_avg": [ 73.5, 51.780787943019945 ], "wc_weaknesses_avg": [ 126.5, 124.09774373452565 ], "wc_questions_avg": [ 71.25, 89.4912705240014 ], "wc_limitations_avg": [ 15.75, 20.191272867256288 ], "wc_review_avg": [ 342.5, 73.8021002411178 ], "wc_reply_reviewers_avg": [ 23.0, 9.695359714832659 ], "wc_reply_authors_avg": [ 31.0, 29.197602641312866 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16189600486747544076&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "bjtu.edu.cn;bjtu.edu.cn;bjtu.edu.cn;bjtu.edu.cn;bjtu.edu.cn;bjtu.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Beijing Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "http://www.njtu.edu.cn/en", "aff_unique_abbr": "BJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Decoupling Semantic Similarity from Spatial Alignment for Neural Networks.", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92991", "id": "ypFgcT147Z", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ypFgcT147Z", "openreview": "https://openreview.net/forum?id=ypFgcT147Z", "poster": "/media/PosterPDFs/NeurIPS%202024/92991.png?t=1731612003.726192", "project": "", "author_site": "Tassilo Wald, Constantin Ulrich, Priyank Jaini, Gregor Koehler, David Zimmerer, Stefan Denner, Fabian Isensee, Michael Baumgartner, Klaus Maier-Hein", "tldr": "", "abstract": "What representation do deep neural networks learn? How similar are images to each other for neural networks? Despite the overwhelming success of deep learning methods key questions about their internal workings still remain largely unanswered, due to their internal high dimensionality and complexity. To address this, one approach is to measure the similarity of activation responses to various inputs.\nRepresentational Similarity Matrices (RSMs) distill this similarity into scalar values for each input pair.\nThese matrices encapsulate the entire similarity structure of a system, indicating which input lead to similar responses.\nWhile the similarity between images is ambiguous, we argue that the spatial location of semantic objects does neither influence human perception nor deep learning classifiers. Thus this should be reflected in the definition of similarity between image responses for computer vision systems. Revisiting the established similarity calculations for RSMs we expose their sensitivity to spatial alignment. In this paper we propose to solve this through _semantic RSMs_, which are invariant to spatial permutation. We measure semantic similarity between input responses by formulating it as a set-matching problem. Further, we quantify the superiority of _semantic_ RSMs over _spatio-semantic_ RSMs through image retrieval and by comparing the similarity between representations to the similarity between predicted class probabilities.", "keywords": "Representational Similarity;Representational Similarity Analysis;Computer Vision", "primary_area": "other", "supplementary_material": "", "author": "Tassilo Wald;Constantin Ulrich;Priyank Jaini;Gregor Koehler;David Zimmerer;Stefan Denner;Fabian Isensee;Michael Baumgartner;Klaus Maier-Hein", "authorids": "~Tassilo_Wald1;~Constantin_Ulrich1;~Priyank_Jaini1;~Gregor_Koehler1;~David_Zimmerer1;~Stefan_Denner1;~Fabian_Isensee1;~Michael_Baumgartner2;~Klaus_Maier-Hein1", "gender": "M;M;M;M;M;;M;M;M", "homepage": "https://TaWald.github.io;;https://priyankjaini.github.io/;;;;;;http://www.dkfz.de/en/mic", "dblp": "289/0140;327/3310;184/4579;251/8923;192/4608;262/3952;;66/4721-1;133/0183", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;https://scholar.google.com/citations?hl=de;https://scholar.google.ca/citations?user=keg9BGEAAAAJ;b8U4UTAAAAAJ;https://scholar.google.de/citations?user=_G7n5uMAAAAJ;WeZ0bvgAAAAJ;PjerEe4AAAAJ;https://scholar.google.com/citations?hl=de;oCrBpVMAAAAJ", "orcid": "0009-0007-5222-2683;;;;;;0000-0002-3519-5886;;0000-0002-6626-2463", "linkedin": "tassilo-wald-342902217/;;;;;;;michael-baumgartner-/;", "or_profile": "~Tassilo_Wald1;~Constantin_Ulrich1;~Priyank_Jaini1;~Gregor_Koehler1;~David_Zimmerer1;~Stefan_Denner1;~Fabian_Isensee1;~Michael_Baumgartner2;~Klaus_Maier-Hein1", "aff": "German Cancer Research Center;Deutsches Krebsforschungszentrum;Google;German Cancer Research Center (DKFZ);German Cancer Research Center;Deutsches Krebsforschungszentrum;German Cancer Research Center;Deutsches Krebsforschungszentrum;German Cancer Research Center", "aff_domain": "dkfz.de;dkfz.de;google.com;dkfz.de;dkfz.de;dkfz-heidelberg.de;dkfz.de;dkfz.de;dkfz.de", "position": "PhD student;PhD student;Researcher;PhD student;Postdoc;PhD student;Principal Researcher;PhD student;Full Professor", "bibtex": "@inproceedings{\nwald2024decoupling,\ntitle={Decoupling Semantic Similarity from Spatial Alignment for Neural Networks.},\nauthor={Tassilo Wald and Constantin Ulrich and Priyank Jaini and Gregor Koehler and David Zimmerer and Stefan Denner and Fabian Isensee and Michael Baumgartner and Klaus Maier-Hein},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ypFgcT147Z}\n}", "github": "", "reviewers": "sf44;Xdnj;zPy6;xk1Y", "pdf_size": 33099731, "rating": "5;5;6;6", "confidence": "3;2;3;3", "soundness": "2;2;3;3", "novelty": "2;3;3;3", "presentation": "2;2;3;3", "wc_summary": "85;30;66;72", "wc_strengths": "61;62;73;50", "wc_weaknesses": "175;121;172;37", "wc_questions": "47;25;164;22", "wc_limitations": "102;2;34;7", "wc_review": "470;240;509;188", "wc_reply_reviewers": "202;0;20;0", "wc_reply_authors": "729;0;0;0", "reply_reviewers": "1;0;1;0", "reply_authors": "2;1;1;1", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 2.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 63.25, 20.38841582860228 ], "wc_strengths_avg": [ 61.5, 8.139410298049853 ], "wc_weaknesses_avg": [ 126.25, 55.818343759018866 ], "wc_questions_avg": [ 64.5, 58.25160941982633 ], "wc_limitations_avg": [ 36.25, 39.86461463503692 ], "wc_review_avg": [ 351.75, 139.6538130521326 ], "wc_reply_reviewers_avg": [ 55.5, 84.9749963224477 ], "wc_reply_authors_avg": [ 182.25, 315.66625967942787 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:1DkU3FlFQ9UJ:scholar.google.com/&scioq=Decoupling+Semantic+Similarity+from+Spatial+Alignment+for+Neural+Networks.&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "dkfz.de;dkfz.de;google.com;dkfz.de;dkfz.de;dkfz-heidelberg.de;dkfz.de;dkfz.de;dkfz.de", "author_num": 9, "aff_unique_index": "0;1;2;0;0;1;0;1;0", "aff_unique_norm": "German Cancer Research Center;Deutsches Krebsforschungszentrum;Google", "aff_unique_dep": ";;Google", "aff_unique_url": "https://www.dkfz.de;https://www.dkfz.de;https://www.google.com", "aff_unique_abbr": "DKFZ;DKFZ;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;1;0;0;0;0;0;0", "aff_country_unique": "Germany;United States" }, { "title": "Neural Concept Binder", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92990", "id": "ypPzyflbYs", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ypPzyflbYs", "openreview": "https://openreview.net/forum?id=ypPzyflbYs", "poster": "/media/PosterPDFs/NeurIPS%202024/92990.png?t=1732716757.4761217", "project": "", "author_site": "Wolfgang Stammer, Antonia W\u00fcst, David Steinmann, Kristian Kersting", "tldr": "", "abstract": "The challenge in object-based visual reasoning lies in generating concept representations that are both descriptive and distinct. Achieving this in an unsupervised manner requires human users to understand the model's learned concepts and, if necessary, revise incorrect ones. To address this challenge, we introduce the Neural Concept Binder (NCB), a novel framework for deriving both discrete and continuous concept representations, which we refer to as \"concept-slot encodings\". NCB employs two types of binding: \"soft binding\", which leverages the recent SysBinder mechanism to obtain object-factor encodings, and subsequent \"hard binding\", achieved through hierarchical clustering and retrieval-based inference. This enables obtaining expressive, discrete representations from unlabeled images. Moreover, the structured nature of NCB's concept representations allows for intuitive inspection and the straightforward integration of external knowledge, such as human input or insights from other AI models like GPT-4. Additionally, we demonstrate that incorporating the hard binding mechanism preserves model performance while enabling seamless integration into both neural and symbolic modules for complex reasoning tasks. We validate the effectiveness of NCB through evaluations on our newly introduced CLEVR-Sudoku dataset.", "keywords": "Concept Discovery;Interpretable Artificial Intelligence;Interactive Machine Learning;Disentanglement", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Wolfgang Stammer;Antonia W\u00fcst;David Steinmann;Kristian Kersting", "authorids": "~Wolfgang_Stammer1;~Antonia_W\u00fcst1;~David_Steinmann1;~Kristian_Kersting1", "gender": "M;;M;M", "homepage": "https://ml-research.github.io/people/wstammer/;https://www.aiml.informatik.tu-darmstadt.de/people/awuest/index.html;;http://www.ml.informatik.tu-darmstadt.de/", "dblp": "256/5497;;;40/3793", "google_scholar": "66-aU5AAAAAJ;;;QY-earAAAAAJ", "orcid": "0000-0003-3793-8046;;0000-0001-5823-2945;0000-0002-2873-9152", "linkedin": "https://linkedin.com/in/wolfgang-stammer-7835a4207/en-us?trk=people-guest_people_search-card;;;", "or_profile": "~Wolfgang_Stammer1;~Antonia_W\u00fcst1;~David_Steinmann1;~Kristian_Kersting1", "aff": "CS Department, TU Darmstadt;Technische Universit\u00e4t Darmstadt;Technische Universit\u00e4t Darmstadt;TU Darmstadt", "aff_domain": "cs.tu-darmstadt.de;tu-darmstadt.de;tu-darmstadt.de;tu-darmstadt.de", "position": "PhD student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nstammer2024neural,\ntitle={Neural Concept Binder},\nauthor={Wolfgang Stammer and Antonia W{\\\"u}st and David Steinmann and Kristian Kersting},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ypPzyflbYs}\n}", "github": "", "reviewers": "U7bj;6FRw;edmj;VMaX", "pdf_size": 4565926, "rating": "4;5;6;7", "confidence": "5;4;3;3", "soundness": "3;3;3;3", "novelty": "1;3;2;4", "presentation": "3;1;2;4", "wc_summary": "47;72;84;50", "wc_strengths": "63;76;48;46", "wc_weaknesses": "143;693;144;76", "wc_questions": "44;52;97;123", "wc_limitations": "17;9;1;8", "wc_review": "314;902;374;303", "wc_reply_reviewers": "17;112;48;19", "wc_reply_authors": "278;657;11;13", "reply_reviewers": "1;1;1;1", "reply_authors": "3;6;2;2", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 1.118033988749895 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 63.25, 15.384651442265437 ], "wc_strengths_avg": [ 58.25, 12.173228823939851 ], "wc_weaknesses_avg": [ 264.0, 249.2117573470401 ], "wc_questions_avg": [ 79.0, 32.45766473423496 ], "wc_limitations_avg": [ 8.75, 5.673402858955108 ], "wc_review_avg": [ 473.25, 249.0094124727015 ], "wc_reply_reviewers_avg": [ 49.0, 38.38619543533847 ], "wc_reply_authors_avg": [ 239.75, 264.2455061112677 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 3.25, 1.6393596310755 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.9438798074485388, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12091143707215472609&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "cs.tu-darmstadt.de;tu-darmstadt.de;tu-darmstadt.de;tu-darmstadt.de", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Technische Universit\u00e4t Darmstadt", "aff_unique_dep": "Computer Science Department", "aff_unique_url": "https://www.tu-darmstadt.de", "aff_unique_abbr": "TU Darmstadt", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Darmstadt;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "title": "Federated Ensemble-Directed Offline Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92989", "id": "ypaqE8UwsC", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ypaqE8UwsC", "openreview": "https://openreview.net/forum?id=ypaqE8UwsC", "poster": "", "project": "", "author_site": "Desik Rengarajan, Nitin Ragothaman, Dileep Kalathil, Srinivas Shakkottai", "tldr": "", "abstract": "We consider the problem of federated offline reinforcement learning (RL), a scenario under which distributed learning agents must collaboratively learn a high-quality control policy only using small pre-collected datasets generated according to different unknown behavior policies. Na\\\"{i}vely combining a standard offline RL approach with a standard federated learning approach to solve this problem can lead to poorly performing policies. In response, we develop the Federated Ensemble-Directed Offline Reinforcement Learning Algorithm (FEDORA), which distills the collective wisdom of the clients using an ensemble learning approach. We develop the FEDORA codebase to utilize distributed compute resources on a federated learning platform. We show that FEDORA significantly outperforms other approaches, including offline RL over the combined data pool, in various complex continuous control environments and real-world datasets. Finally, we demonstrate the performance of FEDORA in the real-world on a mobile robot. We provide our code and a video of our experiments at \\url{https://github.com/DesikRengarajan/FEDORA}.", "keywords": "Deep Reinforcement Learning;Offline Reinforcement Learning;Federated Learning", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/b658741d56ebceb05dfff999f7938a326e2c17d8.zip", "author": "Desik Rengarajan;Nitin Ragothaman;Dileep Kalathil;Srinivas Shakkottai", "authorids": "~Desik_Rengarajan1;~Nitin_Ragothaman1;~Dileep_Kalathil1;~Srinivas_Shakkottai1", "gender": "M;;M;", "homepage": "https://sites.google.com/view/desik-rengarajan/home;https://rnitin.github.io/;http://people.tamu.edu/~dileep.kalathil/;https://cesg.tamu.edu/faculty/sshakkot/", "dblp": "218/1345;;44/8356;03/353.html", "google_scholar": "ygOY_E4AAAAJ;;S24XFwwAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-8538-6023;;;0000-0002-5882-6433", "linkedin": "desik-rengarajan-109868100/;;;", "or_profile": "~Desik_Rengarajan1;~Nitin_Ragothaman1;~Dileep_Kalathil1;~Srinivas_Shakkottai1", "aff": "Hewlett Packard Labs ;Texas A&M University - College Station;Texas A&M University;Texas A&M", "aff_domain": "hpe.com;tamu.edu;tamu.edu;tamu.edu", "position": "Researcher;MS student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nrengarajan2024federated,\ntitle={Federated Ensemble-Directed Offline Reinforcement Learning},\nauthor={Desik Rengarajan and Nitin Ragothaman and Dileep Kalathil and Srinivas Shakkottai},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ypaqE8UwsC}\n}", "github": "", "reviewers": "EGVU;EkXX;hv6n", "pdf_size": 2376858, "rating": "6;7;8", "confidence": "4;4;4", "soundness": "3;3;4", "novelty": "2;4;4", "presentation": "2;4;4", "wc_summary": "32;38;99", "wc_strengths": "52;67;83", "wc_weaknesses": "72;95;47", "wc_questions": "72;87;5", "wc_limitations": "30;32;5", "wc_review": "258;319;239", "wc_reply_reviewers": "22;61;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;0", "reply_authors": "1;1;1", "rating_avg": [ 7.0, 0.816496580927726 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.3333333333333335, 0.9428090415820634 ], "presentation_avg": [ 3.3333333333333335, 0.9428090415820634 ], "wc_summary_avg": [ 56.333333333333336, 30.26916289265731 ], "wc_strengths_avg": [ 67.33333333333333, 12.657891697365017 ], "wc_weaknesses_avg": [ 71.33333333333333, 19.601587237318874 ], "wc_questions_avg": [ 54.666666666666664, 35.64952859280034 ], "wc_limitations_avg": [ 22.333333333333332, 12.283683848458853 ], "wc_review_avg": [ 272.0, 34.1272129929572 ], "wc_reply_reviewers_avg": [ 27.666666666666668, 25.223445883190152 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 10, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7069806738959863019&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "hpe.com;tamu.edu;tamu.edu;tamu.edu", "author_num": 4, "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Hewlett Packard Labs;Texas A&M University", "aff_unique_dep": ";", "aff_unique_url": "https://www.hpl.hp.com;https://www.tamu.edu", "aff_unique_abbr": "HPL;TAMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";College Station", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "GTBench: Uncovering the Strategic Reasoning Capabilities of LLMs via Game-Theoretic Evaluations", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92988", "id": "ypggxVWIv2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ypggxVWIv2", "openreview": "https://openreview.net/forum?id=ypggxVWIv2", "poster": "/media/PosterPDFs/NeurIPS%202024/92988.png?t=1731729714.8035934", "project": "", "author_site": "Jinhao Duan, Renming Zhang, James Diffenderfer, Bhavya Kailkhura, Lichao Sun, Elias Stengel-Eskin, Mohit Bansal, Tianlong Chen, Kaidi Xu", "tldr": "", "abstract": "As Large Language Models (LLMs) are integrated into critical real-world applications, their strategic and logical reasoning abilities are increasingly crucial. This paper evaluates LLMs' reasoning abilities in competitive environments through game-theoretic tasks, e.g., board and card games that require pure logic and strategic reasoning to compete with opponents. We first propose GTBench, a language-driven environment composing 10 widely-recognized tasks, across a comprehensive game taxonomy: complete versus incomplete information, dynamic versus static, and probabilistic versus deterministic scenarios. Then, we (1) Characterize the game-theoretic reasoning of LLMs; and (2) Perform LLM-vs.-LLM competitions as reasoning evaluation. We observe that (1) LLMs have distinct behaviors regarding various gaming scenarios; for example, LLMs fail in complete and deterministic games yet they are competitive in probabilistic gaming scenarios; (2) Most open-source LLMs, e.g., CodeLlama-34b-Instruct and Llama-2-70b-chat, are less competitive than commercial LLMs, e.g., GPT-4, in complex games, yet the recently released Llama-3-70b-Instruct makes up for this shortcoming. In addition, code-pretraining greatly benefits strategic reasoning, while advanced reasoning methods such as Chain-of-Thought (CoT) and Tree-of-Thought (ToT) do not always help. We further characterize the game-theoretic properties of LLMs, such as equilibrium and Pareto Efficiency in repeated games. Detailed error profiles are provided for a better understanding of LLMs' behavior. We hope our research provides standardized protocols and serves as a foundation to spur further explorations in the strategic reasoning of LLMs.", "keywords": "Large Language Models;Game Theory;Strategic Reasoning;Benchmark", "primary_area": "evaluation", "supplementary_material": "/attachment/6e3a2c2e604838262bed558a247fee75307ea2a9.zip", "author": "Jinhao Duan;Renming Zhang;James Diffenderfer;Bhavya Kailkhura;Lichao Sun;Elias Stengel-Eskin;Mohit Bansal;Tianlong Chen;Kaidi Xu", "authorids": "~Jinhao_Duan1;~Renming_Zhang1;~James_Diffenderfer1;~Bhavya_Kailkhura1;~Lichao_Sun1;~Elias_Stengel-Eskin1;~Mohit_Bansal2;~Tianlong_Chen1;~Kaidi_Xu1", "gender": "M;M;;M;M;M;M;M;M", "homepage": "https://jinhaoduan.github.io;;;https://people.llnl.gov/kailkhura1;https://lichao-sun.github.io/;https://esteng.github.io;https://www.cs.unc.edu/~mbansal/;https://tianlong-chen.github.io;https://kaidixu.com/", "dblp": "282/2912;;188/4110;132/8938;121/0780-1.html;212/6138;32/5243.html;;195/8175", "google_scholar": "aWeTAXYAAAAJ;;nRr24_QAAAAJ;SQpJmOgAAAAJ;WhGUE7AAAAAJ;gr_ZVSQAAAAJ;DN8QtscAAAAJ;LE3ctn0AAAAJ;lYK0wlsAAAAJ", "orcid": ";;;;;0000-0002-6689-505X;;0000-0001-7774-8197;", "linkedin": ";renming-zhang/;;;lichao-sun-b273a290/;;;tianlong-chen-783862167/;", "or_profile": "~Jinhao_Duan1;~Renming_Zhang1;~James_Diffenderfer1;~Bhavya_Kailkhura1;~Lichao_Sun1;~Elias_Stengel-Eskin1;~Mohit_Bansal2;~Tianlong_Chen1;~Kaidi_Xu1", "aff": "Drexel University;Boston University, Boston University;Lawrence Livermore National Labs;Lawrence Livermore National Laboratory;Lehigh University;University of North Carolina at Chapel Hill;University of North Carolina at Chapel Hill;Harvard University;Drexel University", "aff_domain": "drexel.edu;bu.edu;llnl.gov;llnl.gov;lehigh.edu;cs.unc.edu;unc.edu;harvard.edu;drexel.edu", "position": "PhD student;MS student;Researcher;Research Staff;Assistant Professor;Postdoc;Full Professor;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nduan2024gtbench,\ntitle={{GTB}ench: Uncovering the Strategic Reasoning Capabilities of {LLM}s via Game-Theoretic Evaluations},\nauthor={Jinhao Duan and Renming Zhang and James Diffenderfer and Bhavya Kailkhura and Lichao Sun and Elias Stengel-Eskin and Mohit Bansal and Tianlong Chen and Kaidi Xu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ypggxVWIv2}\n}", "github": "", "reviewers": "L1Yq;VyBq;PA1v;HsvL", "pdf_size": 6237988, "rating": "4;6;7;7", "confidence": "4;4;4;4", "soundness": "3;3;4;3", "novelty": "1;3;3;3", "presentation": "3;3;4;4", "wc_summary": "41;100;55;45", "wc_strengths": "14;113;53;116", "wc_weaknesses": "14;199;10;62", "wc_questions": "175;12;65;2", "wc_limitations": "5;33;10;9", "wc_review": "249;457;193;234", "wc_reply_reviewers": "198;66;10;10", "wc_reply_authors": "307;83;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;3;1;1", "rating_avg": [ 6.0, 1.224744871391589 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.8660254037844386 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 60.25, 23.509306667785847 ], "wc_strengths_avg": [ 74.0, 42.79602785306132 ], "wc_weaknesses_avg": [ 71.25, 76.54206359904337 ], "wc_questions_avg": [ 63.5, 68.6822393344888 ], "wc_limitations_avg": [ 14.25, 10.985786271359915 ], "wc_review_avg": [ 283.25, 102.38743819434102 ], "wc_reply_reviewers_avg": [ 71.0, 76.8049477572897 ], "wc_reply_authors_avg": [ 97.5, 125.61150425020791 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 50, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1103546201672317902&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "drexel.edu;bu.edu;llnl.gov;llnl.gov;lehigh.edu;cs.unc.edu;unc.edu;harvard.edu;drexel.edu", "author_num": 9, "aff_unique_index": "0;1;2;2;3;4;4;5;0", "aff_unique_norm": "Drexel University;Boston University;Lawrence Livermore National Laboratory;Lehigh University;University of North Carolina;Harvard University", "aff_unique_dep": ";;;;;", "aff_unique_url": "https://www.drexel.edu;https://www.bu.edu;https://www.llnl.gov;https://www.lehigh.edu;https://www.unc.edu;https://www.harvard.edu", "aff_unique_abbr": "Drexel;BU;LLNL;Lehigh;UNC;Harvard", "aff_campus_unique_index": "1;2;2", "aff_campus_unique": ";Boston;Chapel Hill", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "MutaPLM: Protein Language Modeling for Mutation Explanation and Engineering", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92987", "id": "yppcLFeZgy", "proceeding": "", "pdf": "https://openreview.net/pdf?id=yppcLFeZgy", "openreview": "https://openreview.net/forum?id=yppcLFeZgy", "poster": "/media/PosterPDFs/NeurIPS%202024/92987.png?t=1731315871.8583128", "project": "", "author_site": "Yizhen Luo, Zikun Nie, Massimo Hong, Suyuan Zhao, Hao Zhou, Zaiqing Nie", "tldr": "", "abstract": "Studying protein mutations within amino acid sequences holds tremendous significance in life sciences. Protein language models (PLMs) have demonstrated strong capabilities in broad biological applications. However, due to architectural design and lack of supervision, PLMs model mutations implicitly with evolutionary plausibility, which is not satisfactory to serve as explainable and engineerable tools in real-world studies. To address these issues, we present MutaPLM, a unified framework for interpreting and navigating protein mutations with protein language models. MutaPLM introduces a protein *delta* network that captures explicit protein mutation representations within a unified feature space, and a transfer learning pipeline with a chain-of-thought (CoT) strategy to harvest protein mutation knowledge from biomedical texts. We also construct MutaDescribe, the first large-scale protein mutation dataset with rich textual annotations, which provides cross-modal supervision signals. Through comprehensive experiments, we demonstrate that MutaPLM excels at providing human-understandable explanations for mutational effects and prioritizing novel mutations with desirable properties. Our code, model, and data are open-sourced at https://github.com/PharMolix/MutaPLM.", "keywords": "protein language modeling;mutation explanation;directed evolution", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "/attachment/8bae49c0ebb45cf620ca63017ab769d8afe4b0eb.zip", "author": "YIZHEN LUO;Zikun Nie;Massimo Hong;Suyuan Zhao;Hao Zhou;Zaiqing Nie", "authorids": "~YIZHEN_LUO1;~Zikun_Nie1;~Massimo_Hong1;~Suyuan_Zhao1;~Hao_Zhou5;~Zaiqing_Nie2", "gender": "M;F;M;M;M;M", "homepage": "https://air.tsinghua.edu.cn/airtd/yjs.htm;https://air.tsinghua.edu.cn/airtd/yjs.htm;;https://github.com/toycat-I;https://zhouh.github.io/;https://air.tsinghua.edu.cn/en/info/1046/1192.htm", "dblp": "286/8497;;300/2422;348/9782;63/778-12;n/ZaiqingNie", "google_scholar": ";;;YjWpJGUAAAAJ;https://scholar.google.com/citations?hl=zh-CN;", "orcid": ";;;;;0000-0002-1134-2343", "linkedin": ";;massimo-hong-86b1a7276;;;", "or_profile": "~YIZHEN_LUO1;~Zikun_Nie1;~Massimo_Hong1;~Suyuan_Zhao1;~Hao_Zhou5;~Zaiqing_Nie2", "aff": "Computer Science and Technology, Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "cs.tsinghua.edu.cn;mails.tsinghua.edu.cn;tsinghua.edu.cn;mails.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;Undergrad student;MS student;PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nluo2024mutaplm,\ntitle={Muta{PLM}: Protein Language Modeling for Mutation Explanation and Engineering},\nauthor={YIZHEN LUO and Zikun Nie and Massimo Hong and Suyuan Zhao and Hao Zhou and Zaiqing Nie},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=yppcLFeZgy}\n}", "github": "", "reviewers": "3bXf;eM8T;g15f", "pdf_size": 3254645, "rating": "5;6;6", "confidence": "5;4;4", "soundness": "3;3;2", "novelty": "3;3;2", "presentation": "3;4;2", "wc_summary": "44;44;65", "wc_strengths": "60;95;37", "wc_weaknesses": "144;103;188", "wc_questions": "1;166;2", "wc_limitations": "13;5;1", "wc_review": "262;413;293", "wc_reply_reviewers": "0;24;12", "wc_reply_authors": "0;32;32", "reply_reviewers": "0;1;1", "reply_authors": "1;2;2", "rating_avg": [ 5.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.816496580927726 ], "wc_summary_avg": [ 51.0, 9.899494936611665 ], "wc_strengths_avg": [ 64.0, 23.84673283002656 ], "wc_weaknesses_avg": [ 145.0, 34.708308323320324 ], "wc_questions_avg": [ 56.333333333333336, 77.54711829304526 ], "wc_limitations_avg": [ 6.333333333333333, 4.988876515698588 ], "wc_review_avg": [ 322.6666666666667, 65.11698873736579 ], "wc_reply_reviewers_avg": [ 12.0, 9.797958971132712 ], "wc_reply_authors_avg": [ 21.333333333333332, 15.084944665313014 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.9999999999999998, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18238051379477654217&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "cs.tsinghua.edu.cn;mails.tsinghua.edu.cn;tsinghua.edu.cn;mails.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 6, "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "Computer Science and Technology", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Personalized Federated Learning with Mixture of Models for Adaptive Prediction and Model Fine-Tuning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92986", "id": "yvUHnBkCzd", "proceeding": "", "pdf": "https://openreview.net/pdf?id=yvUHnBkCzd", "openreview": "https://openreview.net/forum?id=yvUHnBkCzd", "poster": "", "project": "", "author_site": "Pouya M. Ghari, Yanning Shen", "tldr": "", "abstract": "Federated learning is renowned for its efficacy in distributed model training, ensuring that users, called clients, retain data privacy by not disclosing their data to the central server that orchestrates collaborations. Most previous work on federated learning assumes that clients possess static batches of training data. However, clients may also need to make real-time predictions on streaming data in non-stationary environments. In such dynamic environments, employing pre-trained models may be inefficient, as they struggle to adapt to the constantly evolving data streams. To address this challenge, clients can fine-tune models online, leveraging their observed data to enhance performance. Despite the potential benefits of client participation in federated online model fine-tuning, existing analyses have not conclusively demonstrated its superiority over local model fine-tuning. To bridge this gap, the present paper develops a novel personalized federated learning algorithm, wherein each client constructs a personalized model by combining a locally fine-tuned model with multiple federated models learned by the server over time. Theoretical analysis and experiments on real datasets corroborate the effectiveness of this approach for real-time predictions and federated model fine-tuning.", "keywords": "Federated Learning;Personalized Models;Real-time Predictions", "primary_area": "other", "supplementary_material": "", "author": "Pouya M. Ghari;Yanning Shen", "authorids": "~Pouya_M._Ghari1;~Yanning_Shen1", "gender": "F;", "homepage": "https://sites.google.com/uci.edu/yanning-shen/home;", "dblp": "120/7392.html;280/1628", "google_scholar": "MfzntAIAAAAJ;", "orcid": ";", "linkedin": ";", "or_profile": "~Yanning_Shen1;~Pouya_M_Gari1", "aff": "University of California, Irvine;University of California, Irvine", "aff_domain": "uci.edu;uci.edu", "position": "Assistant Professor;PhD student", "bibtex": "@inproceedings{\nghari2024personalized,\ntitle={Personalized Federated Learning with Mixture of Models for Adaptive Prediction and Model Fine-Tuning},\nauthor={Pouya M. Ghari and Yanning Shen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=yvUHnBkCzd}\n}", "github": "", "reviewers": "rUKC;extd;Lvm5;NAbk", "pdf_size": 454120, "rating": "4;5;5;6", "confidence": "5;4;3;4", "soundness": "3;2;3;3", "novelty": "2;2;2;3", "presentation": "3;2;3;3", "wc_summary": "54;67;75;120", "wc_strengths": "84;46;48;94", "wc_weaknesses": "90;150;97;103", "wc_questions": "1;70;2;3", "wc_limitations": "1;5;1;3", "wc_review": "230;338;223;323", "wc_reply_reviewers": "0;17;0;0", "wc_reply_authors": "0;21;0;0", "reply_reviewers": "0;1;0;0", "reply_authors": "1;2;1;1", "rating_avg": [ 5.0, 0.7071067811865476 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 79.0, 24.829418035870273 ], "wc_strengths_avg": [ 68.0, 21.307275752662516 ], "wc_weaknesses_avg": [ 110.0, 23.547823678633232 ], "wc_questions_avg": [ 19.0, 29.45335295004628 ], "wc_limitations_avg": [ 2.5, 1.6583123951777 ], "wc_review_avg": [ 278.5, 52.32829062753722 ], "wc_reply_reviewers_avg": [ 4.25, 7.361215932167728 ], "wc_reply_authors_avg": [ 5.25, 9.093266739736606 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3855011256956316003&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 3, "email": "uci.edu;uci.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "University of California, Irvine", "aff_unique_dep": "", "aff_unique_url": "https://www.uci.edu", "aff_unique_abbr": "UCI", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Irvine", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Towards Multi-Domain Learning for Generalizable Video Anomaly Detection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92985", "id": "ywEQkCmImh", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ywEQkCmImh", "openreview": "https://openreview.net/forum?id=ywEQkCmImh", "poster": "/media/PosterPDFs/NeurIPS%202024/92985.png?t=1733799140.7117856", "project": "", "author_site": "MyeongAh Cho, Taeoh Kim, Minho Shim, Dongyoon Wee, Sangyoun Lee", "tldr": "", "abstract": "Most of the existing Video Anomaly Detection (VAD) studies have been conducted within single-domain learning, where training and evaluation are performed on a single dataset. However, the criteria for abnormal events differ across VAD datasets, making it problematic to apply a single-domain model to other domains. In this paper, we propose a new task called Multi-Domain learning forVAD (MDVAD) to explore various real-world abnormal events using multiple datasets for a general model. MDVAD involves training on datasets from multiple domains simultaneously, and we experimentally observe that Abnormal Conflicts between domains hinder learning and generalization. The task aims to address two key objectives: (i) better distinguishing between general normal and abnormal events across multiple domains, and (ii) being aware of ambiguous abnormal conflicts. This paper is the first to tackle abnormal conflict issue and introduces a new benchmark, baselines, and evaluation protocols for MDVAD. As baselines, we propose a framework with Null(Angular)-Multiple Instance Learning and an Abnormal Conflict classifier. Through experiments on a MDVAD benchmark composed of six VAD datasets and using four different evaluation protocols, we reveal abnormal conflicts and demonstrate that the proposed baseline effectively handles these conflicts, showing robustness and adaptability across multiple domains.", "keywords": "Video anomaly detection;Multi-domain learning;Domain generalization", "primary_area": "machine_vision", "supplementary_material": "", "author": "MyeongAh Cho;Taeoh Kim;Minho Shim;Dongyoon Wee;Sangyoun Lee", "authorids": "~MyeongAh_Cho1;~Taeoh_Kim2;~Minho_Shim1;~Dongyoon_Wee1;~Sangyoun_Lee1", "gender": "F;;;M;M", "homepage": "https://vslab.khu.ac.kr;https://taeoh-kim.github.io/;;;http://mvp.yonsei.ac.kr", "dblp": "254/7937;226/2517;;234/0087;65/1227", "google_scholar": "https://scholar.google.com/citations?hl=ko;mMgadisAAAAJ;;oEKX8h0AAAAJ;", "orcid": "0000-0001-9330-2785;0000-0001-7252-5525;;0000-0003-0359-146X;", "linkedin": "myeongah-cho-729075232/;;;;", "or_profile": "~MyeongAh_Cho1;~Taeoh_Kim2;~Minho_Shim1;~Dongyoon_Wee1;~Sangyoun_Lee1", "aff": "Kyung Hee University;NAVER Cloud;;NAVER;Yonsei University", "aff_domain": "khu.ac.kr;navercorp.com;;navercorp.com;yonsei.ac.kr", "position": "Assistant Professor;Researcher;;Researcher;Full Professor", "bibtex": "@inproceedings{\ncho2024towards,\ntitle={Towards Multi-Domain Learning for Generalizable Video Anomaly Detection},\nauthor={MyeongAh Cho and Taeoh Kim and Minho Shim and Dongyoon Wee and Sangyoun Lee},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ywEQkCmImh}\n}", "github": "", "reviewers": "2cHp;926M;beRv;SwSy", "pdf_size": 5656145, "rating": "4;5;5;5", "confidence": "4;4;5;4", "soundness": "2;2;3;3", "novelty": "2;3;2;3", "presentation": "2;2;3;3", "wc_summary": "71;67;109;87", "wc_strengths": "28;37;69;115", "wc_weaknesses": "134;156;181;157", "wc_questions": "76;4;8;64", "wc_limitations": "12;12;1;2", "wc_review": "321;276;368;425", "wc_reply_reviewers": "144;298;0;53", "wc_reply_authors": "352;707;0;0", "reply_reviewers": "1;4;0;1", "reply_authors": "2;4;1;1", "rating_avg": [ 4.75, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 83.5, 16.515144564913744 ], "wc_strengths_avg": [ 62.25, 34.05418476487141 ], "wc_weaknesses_avg": [ 157.0, 16.62828914831589 ], "wc_questions_avg": [ 38.0, 32.31098884280702 ], "wc_limitations_avg": [ 6.75, 5.261891294962297 ], "wc_review_avg": [ 347.5, 55.319526389874305 ], "wc_reply_reviewers_avg": [ 123.75, 113.01852724221813 ], "wc_reply_authors_avg": [ 264.75, 292.9943472150956 ], "reply_reviewers_avg": [ 1.5, 1.5 ], "reply_authors_avg": [ 2.0, 1.224744871391589 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1550228494464315413&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "email": "khu.ac.kr;navercorp.com;;navercorp.com;yonsei.ac.kr", "author_num": 5, "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Kyung Hee University;NAVER Corporation;Yonsei University", "aff_unique_dep": ";Cloud Division;", "aff_unique_url": "http://www.khu.ac.kr;https://www.naver.com;https://www.yonsei.ac.kr", "aff_unique_abbr": "KHU;NAVER;Yonsei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "AV-Cloud: Spatial Audio Rendering Through Audio-Visual Cloud Splatting", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92984", "id": "yxOrSmS5wR", "proceeding": "", "pdf": "https://openreview.net/pdf?id=yxOrSmS5wR", "openreview": "https://openreview.net/forum?id=yxOrSmS5wR", "poster": "/media/PosterPDFs/NeurIPS%202024/92984.png?t=1733794174.5263877", "project": "", "author_site": "Mingfei Chen, Eli Shlizerman", "tldr": "", "abstract": "We propose a novel approach for rendering high-quality spatial audio for 3D scenes that is in synchrony with the visual stream but does not rely or explicitly conditioned on the visual rendering. We demonstrate that such an approach enables the experience of immersive virtual tourism - performing a real-time dynamic navigation within the scene, experiencing both audio and visual content. Current audio-visual rendering approaches typically rely on visual cues, such as images, and thus visual artifacts could cause inconsistency in the audio quality. Furthermore, when such approaches are incorporated with visual rendering, audio generation at each viewpoint occurs after the rendering of the image of the viewpoint and thus could lead to audio lag that affects the integration of audio and visual streams. Our proposed approach, AV-Cloud, overcomes these challenges by learning the representation of the audio-visual scene based on a set of sparse AV anchor points, that constitute the Audio-Visual Cloud, and are derived from the camera calibration. The Audio-Visual Cloud serves as an audio-visual representation from which the generation of spatial audio for arbitrary listener location can be generated. In particular, we propose a novel module Audio-Visual Cloud Splatting which decodes AV anchor points into a spatial audio transfer function for the arbitrary viewpoint of the target listener. This function, applied through the Spatial Audio Render Head module, transforms monaural input into viewpoint-specific spatial audio. As a result, AV-Cloud efficiently renders the spatial audio aligned with any visual viewpoint and eliminates the need for pre-rendered images. We show that AV-Cloud surpasses current state-of-the-art accuracy on audio reconstruction, perceptive quality, and acoustic effects on two real-world datasets. AV-Cloud also outperforms previous methods when tested on scenes \"in the wild\".", "keywords": "audio-visual;audio scenes reconstruction;spatial audio;point-based scene rendering", "primary_area": "speech_and_audio", "supplementary_material": "/attachment/5f0a6a5f67f28a0bc37758f85bcb0e82c5f5fa80.zip", "author": "Mingfei Chen;Eli Shlizerman", "authorids": "~Mingfei_Chen2;~Eli_Shlizerman1", "gender": "F;", "homepage": "https://www.mingfeichen.com/;http://faculty.washington.edu/shlizee/", "dblp": ";00/9501", "google_scholar": "uK7MW8QAAAAJ;oJnSO50AAAAJ", "orcid": ";0000-0002-3136-4531", "linkedin": "mingfei-chen-b85947153/;", "or_profile": "~Mingfei_Chen2;~Eli_Shlizerman1", "aff": "Meta Facebook;University of Washington", "aff_domain": "meta.com;u.washington.edu", "position": "Intern;Associate Professor", "bibtex": "@inproceedings{\nchen2024avcloud,\ntitle={{AV}-Cloud: Spatial Audio Rendering Through Audio-Visual Cloud Splatting},\nauthor={Mingfei Chen and Eli Shlizerman},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=yxOrSmS5wR}\n}", "github": "", "reviewers": "JmUt;RM9S;ka62", "pdf_size": 13009855, "rating": "5;6;7", "confidence": "4;3;1", "soundness": "2;3;3", "novelty": "2;3;3", "presentation": "1;4;3", "wc_summary": "64;80;166", "wc_strengths": "85;65;41", "wc_weaknesses": "284;189;17", "wc_questions": "5;2;9", "wc_limitations": "1;38;6", "wc_review": "439;374;239", "wc_reply_reviewers": "37;34;0", "wc_reply_authors": "94;85;0", "reply_reviewers": "1;1;0", "reply_authors": "2;2;1", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 2.6666666666666665, 1.247219128924647 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 1.247219128924647 ], "wc_summary_avg": [ 103.33333333333333, 44.79087208597553 ], "wc_strengths_avg": [ 63.666666666666664, 17.98765008430939 ], "wc_weaknesses_avg": [ 163.33333333333334, 110.50289086213488 ], "wc_questions_avg": [ 5.333333333333333, 2.8674417556808756 ], "wc_limitations_avg": [ 15.0, 16.391054470858997 ], "wc_review_avg": [ 350.6666666666667, 83.29999333066533 ], "wc_reply_reviewers_avg": [ 23.666666666666668, 16.77961726487096 ], "wc_reply_authors_avg": [ 59.666666666666664, 42.35039026450117 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.9819805060619659, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:pgNJIDsDs48J:scholar.google.com/&scioq=AV-Cloud:+Spatial+Audio+Rendering+Through+Audio-Visual+Cloud+Splatting&hl=en&as_sdt=0,33", "gs_version_total": 2, "email": "meta.com;u.washington.edu", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Meta;University of Washington", "aff_unique_dep": "Meta Platforms, Inc.;", "aff_unique_url": "https://meta.com;https://www.washington.edu", "aff_unique_abbr": "Meta;UW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "REBEL: Reinforcement Learning via Regressing Relative Rewards", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92983", "id": "yxjWAJzUyV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=yxjWAJzUyV", "openreview": "https://openreview.net/forum?id=yxjWAJzUyV", "poster": "/media/PosterPDFs/NeurIPS%202024/92983.png?t=1731625638.3198247", "project": "", "author_site": "Zhaolin Gao, Jonathan Chang, Wenhao Zhan, Owen Oertell, Gokul Swamy, Kiant\u00e9 Brantley, Thorsten Joachims, Drew Bagnell, Jason Lee, Wen Sun", "tldr": "", "abstract": "While originally developed for continuous control problems, Proximal Policy Optimization (PPO) has emerged as the work-horse of a variety of reinforcement learning (RL) applications, including the fine-tuning of generative models. Unfortunately, PPO requires multiple heuristics to enable stable convergence (e.g. value networks, clipping), and is notorious for its sensitivity to the precise implementation of these components. In response, we take a step back and ask what a *minimalist* RL algorithm for the era of generative models would look like. We propose REBEL, an algorithm that cleanly reduces the problem of policy optimization to regressing the *relative reward* between two completions to a prompt in terms of the policy, enabling strikingly lightweight implementation. In theory, we prove that fundamental RL algorithms like Natural Policy Gradient can be seen as variants of REBEL, which allows us to match the strongest known theoretical guarantees in terms of convergence and sample complexity in the RL literature. REBEL can also cleanly incorporate offline data and be extended to handle the intransitive preferences we frequently see in practice. Empirically, we find that REBEL provides a unified approach to language modeling and image generation with stronger or similar performance as PPO and DPO, all while being simpler to implement and more computationally efficient than PPO. When fine-tuning Llama-3-8B-Instruct, REBEL achieves strong performance in AlpacaEval 2.0, MT-Bench, and Open LLM Leaderboard. Implementation of REBEL can be found at , and models trained by REBEL can be found at .", "keywords": "Reinforcement Learning;Reinforcement Learning from Human Feedback", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Zhaolin Gao;Jonathan Daniel Chang;Wenhao Zhan;Owen Oertell;Gokul Swamy;Kiant\u00e9 Brantley;Thorsten Joachims;J. Andrew Bagnell;Jason D. Lee;Wen Sun", "authorids": "~Zhaolin_Gao1;~Jonathan_Daniel_Chang1;~Wenhao_Zhan1;~Owen_Oertell1;~Gokul_Swamy1;~Kiant\u00e9_Brantley2;~Thorsten_Joachims1;~J._Andrew_Bagnell1;~Jason_D._Lee1;~Wen_Sun1", "gender": "M;M;M;Not Specified;;;M;M;M;", "homepage": "https://zhaolingao.github.io/;https://www.cs.cornell.edu/people/phdstudents;;https://owenoertell.com;https://gokul.dev/;;http://www.joachims.org;http://www.ri.cmu.edu/person.html?person_id=689/;https://jasondlee88.github.io/;https://wensun.github.io", "dblp": "251/9132;;275/3558;368/5285;31/11509;;j/ThorstenJoachims;65/2021;88/3262;", "google_scholar": "4ANbX-YAAAAJ;_qY_t5kAAAAJ;;https://scholar.google.com/citations?hl=en;Sbpra_AAAAAJ;;5tk1PV8AAAAJ;https://scholar.google.com.tw/citations?user=7t4jbPQAAAAJ;GR_DsT0AAAAJ;iOLC30YAAAAJ", "orcid": "0000-0002-1647-4898;;;;;;0000-0003-3654-3683;;;", "linkedin": ";;;;;;thorsten-joachims-7224a35/;;;", "or_profile": "~Zhaolin_Gao1;~Jonathan_Daniel_Chang1;~Wenhao_Zhan1;~Owen_Oertell1;~Gokul_Swamy1;~Kiant\u00e9_Brantley2;~Thorsten_Joachims1;~J._Andrew_Bagnell1;~Jason_D._Lee1;~Wen_Sun1", "aff": "Cornell University;Cornell University;Princeton University;Cornell University;Carnegie Mellon University;;Amazon;Carnegie Mellon University;Princeton University;Cornell University", "aff_domain": "cornell.edu;cornell.edu;princeton.edu;cornell.edu;cmu.edu;;amazon.com;;princeton.edu;cornell.edu", "position": "PhD student;PhD student;PhD student;Undergrad student;PhD student;;Amazon Scholar;Associate Professor;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\ngao2024rebel,\ntitle={{REBEL}: Reinforcement Learning via Regressing Relative Rewards},\nauthor={Zhaolin Gao and Jonathan Daniel Chang and Wenhao Zhan and Owen Oertell and Gokul Swamy and Kiant{\\'e} Brantley and Thorsten Joachims and J. Andrew Bagnell and Jason D. Lee and Wen Sun},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=yxjWAJzUyV}\n}", "github": "", "reviewers": "ihUH;zZ2j;HFsB;dEG5", "pdf_size": 3655435, "rating": "5;7;7;8", "confidence": "4;3;3;3", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "150;53;67;116", "wc_strengths": "150;59;68;80", "wc_weaknesses": "380;67;59;2", "wc_questions": "579;1;240;105", "wc_limitations": "1;1;1;11", "wc_review": "1260;181;435;314", "wc_reply_reviewers": "169;20;22;24", "wc_reply_authors": "400;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;1;1", "rating_avg": [ 6.75, 1.0897247358851685 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 96.5, 38.74596753211875 ], "wc_strengths_avg": [ 89.25, 35.856484769146014 ], "wc_weaknesses_avg": [ 127.0, 148.20425095117886 ], "wc_questions_avg": [ 231.25, 217.9224345954312 ], "wc_limitations_avg": [ 3.5, 4.330127018922194 ], "wc_review_avg": [ 547.5, 421.0573001385916 ], "wc_reply_reviewers_avg": [ 58.75, 63.66857545131664 ], "wc_reply_authors_avg": [ 100.0, 173.20508075688772 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.9271726499455306, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14263649071676430654&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "cornell.edu;cornell.edu;princeton.edu;cornell.edu;cmu.edu;;amazon.com;;princeton.edu;cornell.edu", "author_num": 10, "aff_unique_index": "0;0;1;0;2;3;2;1;0", "aff_unique_norm": "Cornell University;Princeton University;Carnegie Mellon University;Amazon", "aff_unique_dep": ";;;Amazon.com, Inc.", "aff_unique_url": "https://www.cornell.edu;https://www.princeton.edu;https://www.cmu.edu;https://www.amazon.com", "aff_unique_abbr": "Cornell;Princeton;CMU;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Changing the Training Data Distribution to Reduce Simplicity Bias Improves In-distribution Generalization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92982", "id": "yySpldUsU2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=yySpldUsU2", "openreview": "https://openreview.net/forum?id=yySpldUsU2", "poster": "/media/PosterPDFs/NeurIPS%202024/92982.png?t=1731802772.5315008", "project": "", "author_site": "Dang Nguyen, Paymon Haddad, Eric Gan, Baharan Mirzasoleiman", "tldr": "", "abstract": "Can we modify the training data distribution to encourage the underlying optimization method toward finding solutions with superior generalization performance on in-distribution data? In this work, we approach this question for the first time by comparing the inductive bias of gradient descent (GD) with that of sharpness-aware minimization (SAM). By studying a two-layer CNN, we rigorously prove that SAM learns different features more uniformly, particularly in early epochs. That is, SAM is less susceptible to simplicity bias compared to GD. We also show that examples constraining features that are learned early are separable from the rest based on the model\u2019s output. Based on this observation, we propose a method that (i) clusters examples based on the network output early in training, (ii) identifies a cluster of examples with similar network output, and (iii) upsamples the rest of examples only once to alleviate the simplicity bias. We show empirically that USEFUL effectively improves the generalization performance on the original data distribution when training with various gradient methods, including (S)GD and SAM. Notably, we demonstrate that our method can be combined with SAM variants and existing data augmentation strategies to achieve, to the best of our knowledge, state-of-the-art performance for training ResNet18 on CIFAR10, STL10, CINIC10, Tiny-ImageNet; ResNet34 on CIFAR100; and VGG19 and DenseNet121 on CIFAR10.", "keywords": "In-distribution generalization;Simplicity bias;Data modification;Sharpness-aware minimization", "primary_area": "optimization_for_deep_networks", "supplementary_material": "/attachment/f7b953c5405e1474bf7885c0feb466397f08fe12.zip", "author": "Dang Nguyen;Paymon Haddad;Eric Gan;Baharan Mirzasoleiman", "authorids": "~Dang_Nguyen2;~Paymon_Haddad1;~Eric_Gan1;~Baharan_Mirzasoleiman1", "gender": "M;M;M;F", "homepage": "https://hsgser.github.io/;https://sushipaypay.github.io/;;http://web.cs.ucla.edu/~baharan/", "dblp": ";;347/3322;52/10075", "google_scholar": "https://scholar.google.co.jp/citations?user=WIqAtrcAAAAJ;;;x63j7HEAAAAJ", "orcid": ";;;", "linkedin": "dang-nguyen-50b7a7a0/;paymonhaddad/;eric-gan-107889207/;", "or_profile": "~Dang_Nguyen2;~Paymon_Haddad1;~Eric_Gan1;~Baharan_Mirzasoleiman1", "aff": "University of California, Los Angeles;University of California, Los Angeles;University of California, Los Angeles;University of California, Los Angeles", "aff_domain": "ucla.edu;ucla.edu;ucla.edu;ucla.edu", "position": "PhD student;Undergrad student;Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\nnguyen2024changing,\ntitle={Changing the Training Data Distribution to Reduce Simplicity Bias Improves In-distribution Generalization},\nauthor={Dang Nguyen and Paymon Haddad and Eric Gan and Baharan Mirzasoleiman},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=yySpldUsU2}\n}", "github": "", "reviewers": "eKCz;Y69v;F4yx;EFb1", "pdf_size": 898837, "rating": "4;4;6;6", "confidence": "4;3;2;3", "soundness": "3;4;3;3", "novelty": "2;3;3;2", "presentation": "3;3;3;3", "wc_summary": "66;116;126;159", "wc_strengths": "42;33;50;136", "wc_weaknesses": "404;298;194;426", "wc_questions": "107;2;39;71", "wc_limitations": "5;1;70;6", "wc_review": "624;450;479;798", "wc_reply_reviewers": "0;0;37;131", "wc_reply_authors": "35;35;76;173", "reply_reviewers": "0;0;1;1", "reply_authors": "2;2;3;2", "rating_avg": [ 5.0, 1.0 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 116.75, 33.34197804570089 ], "wc_strengths_avg": [ 65.25, 41.287861412284364 ], "wc_weaknesses_avg": [ 330.5, 92.48107914595288 ], "wc_questions_avg": [ 54.75, 38.80963153651423 ], "wc_limitations_avg": [ 20.5, 28.64000698323937 ], "wc_review_avg": [ 587.75, 138.13105190361796 ], "wc_reply_reviewers_avg": [ 42.0, 53.558379363083795 ], "wc_reply_authors_avg": [ 79.75, 56.37985012395829 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.7071067811865475, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17492839769310668355&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "ucla.edu;ucla.edu;ucla.edu;ucla.edu", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of California, Los Angeles", "aff_unique_dep": "", "aff_unique_url": "https://www.ucla.edu", "aff_unique_abbr": "UCLA", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "ReLIZO: Sample Reusable Linear Interpolation-based Zeroth-order Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92981", "id": "yzviAnpvU6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=yzviAnpvU6", "openreview": "https://openreview.net/forum?id=yzviAnpvU6", "poster": "/media/PosterPDFs/NeurIPS%202024/92981.png?t=1731598834.896068", "project": "", "author_site": "Xiaoxing Wang, Xiaohan Qin, Xiaokang Yang, Junchi Yan", "tldr": "", "abstract": "Gradient estimation is critical in zeroth-order optimization methods, which aims to obtain the descent direction by sampling update directions and querying function evaluations. Extensive research has been conducted including smoothing and linear interpolation. The former methods smooth the objective function, causing a biased gradient estimation, while the latter often enjoys more accurate estimates, at the cost of large amounts of samples and queries at each iteration to update variables. This paper resorts to the linear interpolation strategy and proposes to reduce the complexity of gradient estimation by reusing queries in the prior iterations while maintaining the sample size unchanged. Specifically, we model the gradient estimation as a quadratically constrained linear program problem and manage to derive the analytical solution. It innovatively decouples the required sample size from the variable dimension without extra conditions required, making it able to leverage the queries in the prior iterations. Moreover, part of the intermediate variables that contribute to the gradient estimation can be directly indexed, significantly reducing the computation complexity. Experiments on both simulation functions and real scenarios (black-box adversarial attacks neural architecture search, and parameter-efficient fine-tuning for large language models), show its efficacy and efficiency. Our code is available at https://github.com/Thinklab-SJTU/ReLIZO.git.", "keywords": "Zero-order Optimization; Linear Interpolation; Reusing Strategy;", "primary_area": "optimization", "supplementary_material": "", "author": "Xiaoxing Wang;Xiaohan Qin;Xiaokang Yang;Junchi Yan", "authorids": "~Xiaoxing_Wang1;~Xiaohan_Qin1;~Xiaokang_Yang1;~Junchi_Yan2", "gender": "M;M;M;M", "homepage": "https://github.com/jianke0604;https://icne.sjtu.edu.cn/info/1064/1078.htm;http://thinklab.sjtu.edu.cn/;https://scholar.google.com/citations?user=n2ewxUIAAAAJ&hl=zh-CN", "dblp": ";06/3071-1.html;60/7949.html;78/885", "google_scholar": "e5ckbCoAAAAJ;yDEavdMAAAAJ;ga230VoAAAAJ;n2ewxUIAAAAJ", "orcid": ";0000-0003-4029-3322;0000-0001-9639-7679;0000-0002-7830-9521", "linkedin": ";;;", "or_profile": "~Xiaohan_Qin1;~Xiaokang_Yang1;~Junchi_Yan1;~Victor_Wang1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "position": "Undergrad student;Full Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\nwang2024relizo,\ntitle={Re{LIZO}: Sample Reusable Linear Interpolation-based Zeroth-order Optimization},\nauthor={Xiaoxing Wang and Xiaohan Qin and Xiaokang Yang and Junchi Yan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=yzviAnpvU6}\n}", "github": "", "reviewers": "VWEi;YYyS;zPFv", "pdf_size": 908917, "rating": "6;6;7", "confidence": "3;3;4", "soundness": "4;3;3", "novelty": "4;3;3", "presentation": "3;3;3", "wc_summary": "82;71;108", "wc_strengths": "100;49;81", "wc_weaknesses": "54;95;52", "wc_questions": "42;2;21", "wc_limitations": "7;1;1", "wc_review": "285;218;263", "wc_reply_reviewers": "21;31;10", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;1;1", "reply_authors": "1;1;1", "rating_avg": [ 6.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 87.0, 15.513435037626794 ], "wc_strengths_avg": [ 76.66666666666667, 21.044925490219462 ], "wc_weaknesses_avg": [ 67.0, 19.8158185969358 ], "wc_questions_avg": [ 21.666666666666668, 16.33673433979046 ], "wc_limitations_avg": [ 3.0, 2.8284271247461903 ], "wc_review_avg": [ 255.33333333333334, 27.884683171152503 ], "wc_reply_reviewers_avg": [ 20.666666666666668, 8.576453553512405 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.9999999999999997, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:Hskq7js8UV0J:scholar.google.com/&scioq=ReLIZO:+Sample+Reusable+Linear+Interpolation-based+Zeroth-order+Optimization&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "DiffusionPDE: Generative PDE-Solving under Partial Observation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92980", "id": "z0I2SbjN0R", "proceeding": "", "pdf": "https://openreview.net/pdf?id=z0I2SbjN0R", "openreview": "https://openreview.net/forum?id=z0I2SbjN0R", "poster": "/media/PosterPDFs/NeurIPS%202024/92980.png?t=1733459934.898936", "project": "", "author_site": "Jiahe Huang, Guandao Yang, Zichen Wang, Jeong Joon Park", "tldr": "", "abstract": "We introduce a general framework for solving partial differential equations (PDEs) using generative diffusion models. In particular, we focus on the scenarios where we do not have the full knowledge of the scene necessary to apply classical solvers. Most existing forward or inverse PDE approaches perform poorly when the observations on the data or the underlying coefficients are incomplete, which is a common assumption for real-world measurements. In this work, we propose DiffusionPDE that can simultaneously fill in the missing information and solve a PDE by modeling the joint distribution of the solution and coefficient spaces. We show that the learned generative priors lead to a versatile framework for accurately solving a wide range of PDEs under partial observation, significantly outperforming the state-of-the-art methods for both forward and inverse directions.", "keywords": "Guided Diffusion Model;Partial Differential Equation;Sparse Observation;Inverse Problem", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "", "author": "Jiahe Huang;Guandao Yang;Zichen Wang;Jeong Joon Park", "authorids": "~Jiahe_Huang1;~Guandao_Yang1;~Zichen_Wang10;~Jeong_Joon_Park2", "gender": "F;M;M;Not Specified", "homepage": "https://jhhuang.site;http://www.guandaoyang.com;https://zichenwang01.github.io/;https://jjparkcv.github.io/", "dblp": "368/8419;209/9624;;227/2840", "google_scholar": "-KXbo2EAAAAJ;_kElCmMAAAAJ;https://scholar.google.com/citations?view_op=list_works;aD5pXLoAAAAJ", "orcid": "0009-0008-9038-1551;0000-0002-2992-5803;0000-0001-8666-2061;", "linkedin": ";guandao-yang-349b83a6/;;", "or_profile": "~Jiahe_Huang1;~Guandao_Yang1;~Zichen_Wang10;~Jeong_Joon_Park2", "aff": "Shanghai Jiaotong University;Stanford University;Cornell University;University of Michigan - Ann Arbor", "aff_domain": "sjtu.edu.cn;stanford.edu;cornell.edu;umich.edu", "position": "Undergrad student;Postdoc;Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\nhuang2024diffusionpde,\ntitle={Diffusion{PDE}: Generative {PDE}-Solving under Partial Observation},\nauthor={Jiahe Huang and Guandao Yang and Zichen Wang and Jeong Joon Park},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=z0I2SbjN0R}\n}", "github": "", "reviewers": "4PC9;y14J;AaET;nBsW", "pdf_size": 13020439, "rating": "4;5;6;8", "confidence": "3;4;4;5", "soundness": "2;3;3;4", "novelty": "2;2;2;3", "presentation": "3;3;3;4", "wc_summary": "70;46;65;87", "wc_strengths": "36;46;134;67", "wc_weaknesses": "211;184;220;175", "wc_questions": "355;61;228;63", "wc_limitations": "8;28;26;79", "wc_review": "680;365;673;471", "wc_reply_reviewers": "663;129;47;0", "wc_reply_authors": "554;118;27;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;2;2;1", "rating_avg": [ 5.75, 1.479019945774904 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 67.0, 14.611639196202457 ], "wc_strengths_avg": [ 70.75, 38.192767639960316 ], "wc_weaknesses_avg": [ 197.5, 18.553975315279473 ], "wc_questions_avg": [ 176.75, 123.2241352170913 ], "wc_limitations_avg": [ 35.25, 26.43269755435491 ], "wc_review_avg": [ 547.25, 134.596387395799 ], "wc_reply_reviewers_avg": [ 209.75, 265.7248341800217 ], "wc_reply_authors_avg": [ 174.75, 223.28163269736274 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.9561828874675149, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9516161824482612704&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "sjtu.edu.cn;stanford.edu;cornell.edu;umich.edu", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Shanghai Jiao Tong University;Stanford University;Cornell University;University of Michigan", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.stanford.edu;https://www.cornell.edu;https://www.umich.edu", "aff_unique_abbr": "SJTU;Stanford;Cornell;UM", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Stanford;Ann Arbor", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "China;United States" }, { "title": "XMask3D: Cross-modal Mask Reasoning for Open Vocabulary 3D Semantic Segmentation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92979", "id": "z1GwaNoGnr", "proceeding": "", "pdf": "https://openreview.net/pdf?id=z1GwaNoGnr", "openreview": "https://openreview.net/forum?id=z1GwaNoGnr", "poster": "", "project": "", "author_site": "Ziyi Wang, Yanbo Wang, Xumin Yu, Jie Zhou, Jiwen Lu", "tldr": "", "abstract": "Existing methodologies in open vocabulary 3D semantic segmentation primarily concentrate on establishing a unified feature space encompassing 3D, 2D, and textual modalities. Nevertheless, traditional techniques such as global feature alignment or vision-language model distillation tend to impose only approximate correspondence, struggling notably with delineating fine-grained segmentation boundaries. To address this gap, we propose a more meticulous mask-level alignment between 3D features and the 2D-text embedding space through a cross-modal mask reasoning framework, XMask3D. In our approach, we developed a mask generator based on the denoising UNet from a pre-trained diffusion model, leveraging its capability for precise textual control over dense pixel representations and enhancing the open-world adaptability of the generated masks. We further integrate 3D global features as implicit conditions into the pre-trained 2D denoising UNet, enabling the generation of segmentation masks with additional 3D geometry awareness. Subsequently, the generated 2D masks are employed to align mask-level 3D representations with the vision-language feature space, thereby augmenting the open vocabulary capability of 3D geometry embeddings. Finally, we fuse complementary 2D and 3D mask features, resulting in competitive performance across multiple benchmarks for 3D open vocabulary semantic segmentation. Code is available at https://github.com/wangzy22/XMask3D.", "keywords": "3D open vocabulary;semantic segmentation;cross-modal learning", "primary_area": "machine_vision", "supplementary_material": "", "author": "Ziyi Wang;Yanbo Wang;Xumin Yu;Jie Zhou;Jiwen Lu", "authorids": "~Ziyi_Wang3;~Yanbo_Wang6;~Xumin_Yu2;~Jie_Zhou3;~Jiwen_Lu1", "gender": "F;M;M;M;M", "homepage": "https://wangzy22.github.io;https://github.com/aofengbaobao;https://yuxumin.github.io/;https://www.tsinghua.edu.cn/publish/auen/1713/2011/20110506105532098625469/20110506105532098625469_.html;http://ivg.au.tsinghua.edu.cn/Jiwen_Lu/", "dblp": "160/2171-7;;237/0070;00/5012-1;http://dblp.uni-trier.de/pers/hd/l/Lu:Jiwen", "google_scholar": "DYHPUXUAAAAJ;T49IInQAAAAJ;zfDZMZAAAAAJ;;TN8uDQoAAAAJ", "orcid": "0000-0002-9007-1210;;;;0000-0002-6121-5529", "linkedin": ";;;;", "or_profile": "~Ziyi_Wang3;~Yanbo_Wang6;~Xumin_Yu2;~Jie_Zhou3;~Jiwen_Lu1", "aff": "Tsinghua University;Tsinghua University;Department of Automation, Tsinghua University, Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;mail.tsinghua.edu.cn;mails.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;PhD student;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nwang2024xmaskd,\ntitle={{XM}ask3D: Cross-modal Mask Reasoning for Open Vocabulary 3D Semantic Segmentation},\nauthor={Ziyi Wang and Yanbo Wang and Xumin Yu and Jie Zhou and Jiwen Lu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=z1GwaNoGnr}\n}", "github": "", "reviewers": "tdkT;AnYT;4VWC;Xo8p", "pdf_size": 6451153, "rating": "5;5;6;7", "confidence": "3;4;4;4", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "2;3;3;3", "wc_summary": "64;80;270;61", "wc_strengths": "18;41;159;40", "wc_weaknesses": "36;67;60;83", "wc_questions": "318;8;46;23", "wc_limitations": "15;4;13;5", "wc_review": "451;200;548;212", "wc_reply_reviewers": "29;38;16;32", "wc_reply_authors": "42;33;27;27", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 118.75, 87.62241436984033 ], "wc_strengths_avg": [ 64.5, 55.328564051491526 ], "wc_weaknesses_avg": [ 61.5, 16.91892431568863 ], "wc_questions_avg": [ 98.75, 127.3054888840226 ], "wc_limitations_avg": [ 9.25, 4.815340071064556 ], "wc_review_avg": [ 352.75, 150.7636809712472 ], "wc_reply_reviewers_avg": [ 28.75, 8.042853971072706 ], "wc_reply_authors_avg": [ 32.25, 6.139014578904337 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.5222329678670935, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:thWyj5EdOAwJ:scholar.google.com/&scioq=XMask3D:+Cross-modal+Mask+Reasoning+for+Open+Vocabulary+3D+Semantic+Segmentation&hl=en&as_sdt=0,44", "gs_version_total": 5, "email": "tsinghua.edu.cn;mail.tsinghua.edu.cn;mails.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "MMScan: A Multi-Modal 3D Scene Dataset with Hierarchical Grounded Language Annotations", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97429", "id": "z1nITsHKb4", "proceeding": "", "pdf": "https://openreview.net/pdf?id=z1nITsHKb4", "openreview": "https://openreview.net/forum?id=z1nITsHKb4", "poster": "/media/PosterPDFs/NeurIPS%202024/97429.png?t=1731469613.1663094", "project": "", "author_site": "Ruiyuan Lyu, Tai WANG, Jingli Lin, Shuaiyang, Xiaohan Mao, Yilun Chen, Runsen Xu, Haifeng Huang, Chenming Zhu, Dahua Lin, Jiangmiao Pang", "tldr": "", "abstract": "With the emergence of LLMs and their integration with other data modalities, multi-modal 3D perception attracts more attention due to its connectivity to the physical world and makes rapid progress. However, limited by existing datasets, previous works mainly focus on understanding object properties or inter-object spatial relationships in a 3D scene. To tackle this problem, this paper builds the first largest ever multi-modal 3D scene dataset and benchmark with hierarchical grounded language annotations, MMScan. It is constructed based on a top-down logic, from region to object level, from a single target to inter-target relationships, covering holistic aspects of spatial and attribute understanding. The overall pipeline incorporates powerful VLMs via carefully designed prompts to initialize the annotations efficiently and further involve humans' correction in the loop to ensure the annotations are natural, correct, and comprehensive. Built upon existing 3D scanning data, the resulting multi-modal 3D dataset encompasses 1.4M meta-annotated captions on 109k objects and 7.7k regions as well as over 3.04M diverse samples for 3D visual grounding and question-answering benchmarks. We evaluate representative baselines on our benchmarks, analyze their capabilities in different aspects, and showcase the key problems to be addressed in the future. Furthermore, we use this high-quality dataset to train state-of-the-art 3D visual grounding and LLMs and obtain remarkable performance improvement both on existing benchmarks and in-the-wild evaluation.", "keywords": "Multi-modal 3D perception;3D scene dataset;hierarchical grounded language annotations", "primary_area": "", "supplementary_material": "/attachment/018214304f243c1d6ea9e5e39ae434ad910afc6e.zip", "author": "Ruiyuan Lyu;Jingli Lin;Tai Wang;Shuai Yang;Xiaohan Mao;Yilun Chen;Runsen Xu;Haifeng Huang;Chenming Zhu;Dahua Lin;Jiangmiao Pang", "authorids": "~Ruiyuan_Lyu1;~Jingli_Lin1;~Tai_Wang2;~Shuai_Yang18;~Xiaohan_Mao1;~Yilun_Chen1;~Runsen_Xu1;~Haifeng_Huang3;~Chenming_Zhu1;~Dahua_Lin1;~Jiangmiao_Pang1", "gender": ";M;M;M;M;M;M;M;M;M;M", "homepage": "https://github.com/ruiyuanlyu;https://mail.sjtu.edu.cn/zimbra/mail#1;;;http://yilunchen.com/about/;;https://zzzzchs.github.io/;https://github.com/ZCMax;http://dahua.site;https://oceanpang.github.io/;https://tai-wang.github.io/", "dblp": ";67/8498;;262/3946;;289/6916;;251/1919;53/6088;231/7630;", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;l5HWdWEAAAAJ;-zT1NKwAAAAJ;gKXC9Q8AAAAJ;MOobrCcAAAAJ;oUm2gZUAAAAJ;QabwS_wAAAAJ;GMzzRRUAAAAJ;https://scholar.google.com/citations?authuser=0;JmbbZWIAAAAJ", "orcid": ";;0009-0008-7260-8169;;0000-0003-3372-8703;;;;;0000-0002-6711-9319;", "linkedin": ";;;;yilunchen-cuhk/;runsen-xu-4262a3272/;haifeng-huang-784b2b249/;;;;%E6%B3%B0-%E7%8E%8B-2b2738147/", "or_profile": "~Ruiyuan_Lyu1;~Jingli_Lin1;~Shuai_Yang18;~Xiaohan_Mao1;~Yilun_Chen1;~Runsen_Xu1;~Haifeng_Huang3;~Chenming_Zhu1;~Dahua_Lin1;~Jiangmiao_Pang1;~Tai_WANG1", "aff": "Tsinghua University;Shanghai Jiaotong University;Zhejiang University;Shanghai Jiaotong University;Shanghai Artificial Intelligence Laboratory;The Chinese University of Hong Kong;Zhejiang University;University of Hong Kong;The Chinese University of Hong Kong;Shanghai AI Laboratory ;Shanghai AI Laboratory", "aff_domain": "tsinghua.edu.cn;sjtu.edu;zju.edu.cn;sjtu.edu.cn;pjlab.org.cn;ie.cuhk.edu;zju.edu.cn;hku.hk;cuhk.edu.hk;pjlab.org.cn;pjlab.org.cn", "position": "Undergrad student;Undergrad student;Undergrad student;PhD student;Researcher;PhD student;MS student;PhD student;Associate Professor;Research Scientist;Research Scientist", "bibtex": "@inproceedings{\nlyu2024mmscan,\ntitle={{MMS}can: A Multi-Modal 3D Scene Dataset with Hierarchical Grounded Language Annotations},\nauthor={Ruiyuan Lyu and Jingli Lin and Tai Wang and Shuai Yang and Xiaohan Mao and Yilun Chen and Runsen Xu and Haifeng Huang and Chenming Zhu and Dahua Lin and Jiangmiao Pang},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=z1nITsHKb4}\n}", "github": "", "reviewers": "bd7y;iBNL;qEsA;GsMX", "pdf_size": 9015471, "rating": "5;6;7;9", "confidence": "4;5;4;5", "wc_summary_and_contributions": "105;35;94;45", "wc_strengths": "80;19;129;31", "wc_improvement": "154;12;218;58", "wc_limitations": "1;13;8;20", "wc_correctness": "1;25;25;18", "wc_clarity": "1;1;43;9", "wc_relation_to_prior_work": "1;7;13;23", "wc_documentation": "10;9;8;33", "wc_additional_feedback": "1;1;1;1", "wc_review": "354;122;539;238", "wc_reply_reviewers": "0;60;17;137", "wc_reply_authors": "0;163;16;44", "reply_reviewers": "0;1;1;1", "reply_authors": "1;3;2;2", "rating_avg": [ 6.75, 1.479019945774904 ], "confidence_avg": [ 4.5, 0.5 ], "wc_summary_and_contributions_avg": [ 69.75, 30.21071829665756 ], "wc_strengths_avg": [ 64.75, 43.56819367382587 ], "wc_improvement_avg": [ 110.5, 80.47825793343193 ], "wc_limitations_avg": [ 10.5, 6.946221994724902 ], "wc_correctness_avg": [ 17.25, 9.807522622966516 ], "wc_clarity_avg": [ 13.5, 17.342145196024624 ], "wc_relation_to_prior_work_avg": [ 11.0, 8.12403840463596 ], "wc_documentation_avg": [ 15.0, 10.41633332799983 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 313.25, 153.99898538626806 ], "wc_reply_reviewers_avg": [ 53.5, 52.936282453530865 ], "wc_reply_authors_avg": [ 55.75, 63.891998716584226 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.50709255283711, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5783728075965741220&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "tsinghua.edu.cn;sjtu.edu;zju.edu.cn;sjtu.edu.cn;pjlab.org.cn;ie.cuhk.edu;zju.edu.cn;hku.hk;cuhk.edu.hk;pjlab.org.cn;pjlab.org.cn", "author_num": 11, "aff_unique_index": "0;1;2;1;3;4;2;5;4;6;6", "aff_unique_norm": "Tsinghua University;Shanghai Jiao Tong University;Zhejiang University;Shanghai Artificial Intelligence Laboratory;Chinese University of Hong Kong;University of Hong Kong;Shanghai AI Laboratory", "aff_unique_dep": ";;;;;;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.sjtu.edu.cn;https://www.zju.edu.cn;http://www.shailab.org/;https://www.cuhk.edu.hk;https://www.hku.hk;https://www.shanghai-ai-lab.com", "aff_unique_abbr": "THU;SJTU;ZJU;Shanghai AI Lab;CUHK;HKU;SAIL", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Provably Efficient Reinforcement Learning with Multinomial Logit Function Approximation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92978", "id": "z2739hYuR3", "proceeding": "", "pdf": "https://openreview.net/pdf?id=z2739hYuR3", "openreview": "https://openreview.net/forum?id=z2739hYuR3", "poster": "", "project": "", "author_site": "Long-Fei Li, Yu-Jie Zhang, Peng Zhao, Zhi-Hua Zhou", "tldr": "", "abstract": "We study a new class of MDPs that employs multinomial logit (MNL) function approximation to ensure valid probability distributions over the state space. Despite its significant benefits, incorporating the non-linear function raises substantial challenges in both *statistical* and *computational* efficiency. The best-known result of Hwang and Oh [2023] has achieved an $\\widetilde{\\mathcal{O}}(\\kappa^{-1}dH^2\\sqrt{K})$ regret upper bound, where $\\kappa$ is a problem-dependent quantity, $d$ is the feature dimension, $H$ is the episode length, and $K$ is the number of episodes. However, we observe that $\\kappa^{-1}$ exhibits polynomial dependence on the number of reachable states, which can be as large as the state space size in the worst case and thus undermines the motivation for function approximation. Additionally, their method requires storing all historical data and the time complexity scales linearly with the episode count, which is computationally expensive. In this work, we propose a statistically efficient algorithm that achieves a regret of $\\widetilde{\\mathcal{O}}(dH^2\\sqrt{K} + \\kappa^{-1}d^2H^2)$, eliminating the dependence on $\\kappa^{-1}$ in the dominant term for the first time. We then address the computational challenges by introducing an enhanced algorithm that achieves the same regret guarantee but with only constant cost. Finally, we establish the first lower bound for this problem, justifying the optimality of our results in $d$ and $K$.", "keywords": "MNL function approximation;regret analysis;reinforcement learning;Markov decision process", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Long-Fei Li;Yu-Jie Zhang;Peng Zhao;Zhi-Hua Zhou", "authorids": "~Long-Fei_Li1;~Yu-Jie_Zhang1;~Peng_Zhao1;~Zhi-Hua_Zhou2", "gender": ";M;;", "homepage": ";https://yujie-zhang96.github.io/;;", "dblp": ";234/6681;;", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Long-Fei_Li1;~Yu-Jie_Zhang1;~Peng_Zhao1;~Zhi-Hua_Zhou2", "aff": ";The University of Tokyo;;", "aff_domain": ";u-tokyo.ac.jp;;", "position": ";PhD student;;", "bibtex": "@inproceedings{\nli2024provably,\ntitle={Provably Efficient Reinforcement Learning with Multinomial Logit Function Approximation},\nauthor={Long-Fei Li and Yu-Jie Zhang and Peng Zhao and Zhi-Hua Zhou},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=z2739hYuR3}\n}", "github": "", "reviewers": "tHPh;Roix;xBZM;zUWe;qUhc", "pdf_size": 528421, "rating": "5;6;6;6;6", "confidence": "3;3;4;3;3", "soundness": "3;4;3;3;3", "novelty": "2;3;3;2;3", "presentation": "3;3;3;3;3", "wc_summary": "79;214;62;142;71", "wc_strengths": "21;38;44;48;117", "wc_weaknesses": "142;58;61;200;119", "wc_questions": "15;5;24;73;20", "wc_limitations": "49;1;6;1;29", "wc_review": "306;316;197;464;356", "wc_reply_reviewers": "189;28;33;357;0", "wc_reply_authors": "727;18;17;150;0", "reply_reviewers": "3;1;1;1;0", "reply_authors": "4;2;2;2;1", "rating_avg": [ 5.8, 0.39999999999999997 ], "confidence_avg": [ 3.2, 0.39999999999999997 ], "soundness_avg": [ 3.2, 0.39999999999999997 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 113.6, 57.552063386120224 ], "wc_strengths_avg": [ 53.6, 33.01272481937836 ], "wc_weaknesses_avg": [ 116.0, 53.16013544000805 ], "wc_questions_avg": [ 27.4, 23.66938951472978 ], "wc_limitations_avg": [ 17.2, 18.977881862842334 ], "wc_review_avg": [ 327.8, 86.10319390127174 ], "wc_reply_reviewers_avg": [ 121.4, 135.1689313414884 ], "wc_reply_authors_avg": [ 182.4, 277.59438034657694 ], "reply_reviewers_avg": [ 1.2, 0.9797958971132713 ], "reply_authors_avg": [ 2.2, 0.9797958971132712 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.25000000000000006, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14250985624259328829&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "email": ";u-tokyo.ac.jp;;", "author_num": 4, "aff_unique_index": "0", "aff_unique_norm": "University of Tokyo", "aff_unique_dep": "", "aff_unique_url": "https://www.u-tokyo.ac.jp", "aff_unique_abbr": "UTokyo", "aff_country_unique_index": "0", "aff_country_unique": "Japan" }, { "title": "Guiding Neural Collapse: Optimising Towards the Nearest Simplex Equiangular Tight Frame", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92977", "id": "z4FaPUslma", "proceeding": "", "pdf": "https://openreview.net/pdf?id=z4FaPUslma", "openreview": "https://openreview.net/forum?id=z4FaPUslma", "poster": "", "project": "", "author_site": "Evan Markou, Thalaiyasingam Ajanthan, Stephen Gould", "tldr": "", "abstract": "Neural Collapse (NC) is a recently observed phenomenon in neural networks that characterises the solution space of the final classifier layer when trained until zero training loss. Specifically, NC suggests that the final classifier layer converges to a Simplex Equiangular Tight Frame (ETF), which maximally separates the weights corresponding to each class. By duality, the penultimate layer feature means also converge to the same simplex ETF. Since this simple symmetric structure is optimal, our idea is to utilise this property to improve convergence speed. Specifically, we introduce the notion of \\textit{nearest simplex ETF geometry} for the penultimate layer features at any given training iteration, by formulating it as a Riemannian optimisation. Then, at each iteration, the classifier weights are implicitly set to the nearest simplex ETF by solving this inner-optimisation, which is encapsulated within a declarative node to allow backpropagation. Our experiments on synthetic and real-world architectures on classification tasks demonstrate that our approach accelerates convergence and enhances training stability.", "keywords": "neural collapse;equiangular tight frames;Riemannian optimisation;deep learning", "primary_area": "optimization_for_deep_networks", "supplementary_material": "", "author": "Evan Markou;Thalaiyasingam Ajanthan;Stephen Gould", "authorids": "~Evan_Markou1;~Thalaiyasingam_Ajanthan1;~Stephen_Gould1", "gender": "M;M;M", "homepage": ";https://tajanthan.github.io/;http://users.cecs.anu.edu.au/~sgould/", "dblp": "294/3940;154/6629;89/1569.html", "google_scholar": "https://scholar.google.com.au/citations?user=i-9KGy8AAAAJ;https://scholar.google.com.au/citations?user=Rza8c10AAAAJ;YvdzeM8AAAAJ", "orcid": ";;0000-0001-8929-7899", "linkedin": ";;", "or_profile": "~Evan_Markou1;~Thalaiyasingam_Ajanthan1;~Stephen_Gould1", "aff": "Australian National University;Amazon;Australian National University", "aff_domain": "anu.edu.au;amazon.com;anu.edu.au", "position": "PhD student;Researcher;Full Professor", "bibtex": "@inproceedings{\nmarkou2024guiding,\ntitle={Guiding Neural Collapse: Optimising Towards the Nearest Simplex Equiangular Tight Frame},\nauthor={Evan Markou and Thalaiyasingam Ajanthan and Stephen Gould},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=z4FaPUslma}\n}", "github": "", "reviewers": "xmab;isxr;oHwR;m1UJ", "pdf_size": 17080501, "rating": "5;6;6;6", "confidence": "4;4;3;3", "soundness": "3;4;3;3", "novelty": "2;3;3;3", "presentation": "3;4;2;3", "wc_summary": "54;99;105;249", "wc_strengths": "118;64;54;125", "wc_weaknesses": "270;105;113;248", "wc_questions": "3;182;110;676", "wc_limitations": "94;1;24;6", "wc_review": "539;451;406;1304", "wc_reply_reviewers": "0;137;44;72", "wc_reply_authors": "0;236;0;0", "reply_reviewers": "0;1;1;1", "reply_authors": "1;2;1;1", "rating_avg": [ 5.75, 0.4330127018922193 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 126.75, 73.28156316564215 ], "wc_strengths_avg": [ 90.25, 31.546592525976557 ], "wc_weaknesses_avg": [ 184.0, 75.45528477184351 ], "wc_questions_avg": [ 242.75, 258.11758463924923 ], "wc_limitations_avg": [ 31.25, 37.22482370676858 ], "wc_review_avg": [ 675.0, 366.2901855087029 ], "wc_reply_reviewers_avg": [ 63.25, 49.71606883091221 ], "wc_reply_authors_avg": [ 59.0, 102.19099764656376 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:LdNFWM9BZMsJ:scholar.google.com/&scioq=Guiding+Neural+Collapse:+Optimising+Towards+the+Nearest+Simplex+Equiangular+Tight+Frame&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "anu.edu.au;amazon.com;anu.edu.au", "author_num": 3, "aff_unique_index": "0;1;0", "aff_unique_norm": "Australian National University;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.anu.edu.au;https://www.amazon.com", "aff_unique_abbr": "ANU;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Australia;United States" }, { "title": "Gated Inference Network: Inference and Learning State-Space Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92976", "id": "z4duW3KzlD", "proceeding": "", "pdf": "https://openreview.net/pdf?id=z4duW3KzlD", "openreview": "https://openreview.net/forum?id=z4duW3KzlD", "poster": "/media/PosterPDFs/NeurIPS%202024/92976.png?t=1731390088.065917", "project": "", "author_site": "Hamidreza Hashempoorikderi, Wan Choi", "tldr": "", "abstract": "This paper advances temporal reasoning within dynamically changing high-dimensional noisy observations, focusing on a latent space that characterizes the nonlinear dynamics of objects in their environment. We introduce the *Gated Inference Network* (GIN), an efficient approximate Bayesian inference algorithm for state space models (SSMs) with nonlinear state transitions and emissions. GIN disentangles two latent representations: one representing the object derived from a nonlinear mapping model, and another representing the latent state describing its dynamics. This disentanglement enables direct state estimation and missing data imputation as the world evolves. To infer the latent state, we utilize a deep extended Kalman filter (EKF) approach that integrates a novel compact RNN structure to compute both the Kalman Gain (KG) and smoothing gain (SG), completing the data flow. This design results in a computational cost per step that is linearly faster than EKF but introduces issues such as the exploding gradient problem. To mitigate the exploding gradients caused by the compact RNN structure in our model, we propose a specialized learning method that ensures stable training and inference. The model is then trained end-to-end on videos depicting a diverse range of simulated and real-world physical systems, and outperforms its ounterparts \u2014RNNs, autoregressive models, and variational approaches\u2014 in state estimation and missing data imputation tasks.", "keywords": "Time Series and Recurrent Networks", "primary_area": "other", "supplementary_material": "", "author": "Hamidreza Hashempoor;Wan Choi", "authorids": "~Hamidreza_Hashempoor1;~Wan_Choi1", "gender": "M;M", "homepage": "https://hamidreza-hashempoor.github.io/;http://wcisl.snu.ac.kr/index.php", "dblp": ";91/4197", "google_scholar": "Gx6JOqoAAAAJ;HEddD1cAAAAJ", "orcid": ";0000-0003-3930-7088", "linkedin": ";", "or_profile": "~Hamidreza_Hashempoor1;~Wan_Choi1", "aff": "Independent Researcher;Seoul National University", "aff_domain": "independent.research;snu.ac.kr", "position": "Researcher;Full Professor", "bibtex": "@inproceedings{\nhashempoor2024gated,\ntitle={Gated Inference Network: Inference and Learning State-Space Models},\nauthor={Hamidreza Hashempoor and Wan Choi},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=z4duW3KzlD}\n}", "github": "", "reviewers": "g5tG;254u;Db5x", "pdf_size": 5268052, "rating": "6;7;8", "confidence": "3;4;4", "soundness": "3;4;4", "novelty": "3;3;4", "presentation": "3;2;3", "wc_summary": "67;100;96", "wc_strengths": "13;108;93", "wc_weaknesses": "84;352;64", "wc_questions": "22;18;77", "wc_limitations": "4;1;23", "wc_review": "190;579;353", "wc_reply_reviewers": "55;20;18", "wc_reply_authors": "321;15;17", "reply_reviewers": "1;1;1", "reply_authors": "2;2;2", "rating_avg": [ 7.0, 0.816496580927726 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 87.66666666666667, 14.70449666674185 ], "wc_strengths_avg": [ 71.33333333333333, 41.69998667732268 ], "wc_weaknesses_avg": [ 166.66666666666666, 131.30456537717524 ], "wc_questions_avg": [ 39.0, 26.919633479426622 ], "wc_limitations_avg": [ 9.333333333333334, 9.741092797468305 ], "wc_review_avg": [ 374.0, 159.50130615975115 ], "wc_reply_reviewers_avg": [ 31.0, 16.990193249832878 ], "wc_reply_authors_avg": [ 117.66666666666667, 143.78069720546247 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": 0.8660254037844385, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12730400128280500248&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 0, "email": "independent.research;snu.ac.kr", "author_num": 2, "aff_unique_index": "0;1", "aff_unique_norm": "Independent Researcher;Seoul National University", "aff_unique_dep": ";", "aff_unique_url": ";https://www.snu.ac.kr", "aff_unique_abbr": ";SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1", "aff_country_unique": ";South Korea" }, { "title": "Unveiling the Hidden: Online Vectorized HD Map Construction with Clip-Level Token Interaction and Propagation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92975", "id": "z4eVwH484M", "proceeding": "", "pdf": "https://openreview.net/pdf?id=z4eVwH484M", "openreview": "https://openreview.net/forum?id=z4eVwH484M", "poster": "", "project": "", "author_site": "Nayeon Kim, Hongje Seong, Daehyun Ji, Sujin Jang", "tldr": "", "abstract": "Predicting and constructing road geometric information (e.g., lane lines, road markers) is a crucial task for safe autonomous driving, while such static map elements can be repeatedly occluded by various dynamic objects on the road. Recent studies have shown significantly improved vectorized high-definition (HD) map construction performance, but there has been insufficient investigation of temporal information across adjacent input frames (i.e., clips), which may lead to inconsistent and suboptimal prediction results. To tackle this, we introduce a novel paradigm of clip-level vectorized HD map construction, MapUnveiler, which explicitly unveils the occluded map elements within a clip input by relating dense image representations with efficient clip tokens. Additionally, MapUnveiler associates inter-clip information through clip token propagation, effectively utilizing long- term temporal map information. MapUnveiler runs efficiently with the proposed clip-level pipeline by avoiding redundant computation with temporal stride while building a global map relationship. Our extensive experiments demonstrate that MapUnveiler achieves state-of-the-art performance on both the nuScenes and Argoverse2 benchmark datasets. We also showcase that MapUnveiler significantly outperforms state-of-the-art approaches in a challenging setting, achieving +10.7% mAP improvement in heavily occluded driving road scenes. The project page can be found at https://mapunveiler.github.io.", "keywords": "vectorized HD map;clip-level pipeline;clip-level token;interaction;propagation", "primary_area": "machine_vision", "supplementary_material": "/attachment/7fdf9cf2a030d124f39ce6c0a6575eea1927c98c.zip", "author": "Nayeon Kim;Hongje Seong;Daehyun Ji;Sujin Jang", "authorids": "~Nayeon_Kim5;~Hongje_Seong1;~Daehyun_Ji1;~Sujin_Jang2", "gender": "F;M;;M", "homepage": "https://ny2kim.github.io/;https://hongje.github.io;;https://sujinjang.github.io/", "dblp": ";231/5155;274/9684;146/6241", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;M25_eCsAAAAJ;https://scholar.google.co.kr/citations?user=WEZZefcAAAAJ;https://scholar.google.co.kr/citations?hl=en", "orcid": ";0000-0001-7221-409X;;0000-0002-2723-5606", "linkedin": "nayeon-kim-6b1291241?utm_source=share&utm_campaign=share_via&utm_content=profile&utm_medium=ios_app;hongje-seong;captainzone/;sujin-jang-7996b354", "or_profile": "~Nayeon_Kim5;~Hongje_Seong1;~Daehyun_Ji1;~Sujin_Jang2", "aff": "Samsung;Samsung Advanced Institute of Technology;Samsung;Samsung Advanced Institute of Technology (SAIT)", "aff_domain": "samsung.com;samsung.com;samsung.com;samsung.com", "position": "Researcher;Researcher;Principal Researcher;Researcher", "bibtex": "@inproceedings{\nkim2024unveiling,\ntitle={Unveiling the Hidden: Online Vectorized {HD} Map Construction with Clip-Level Token Interaction and Propagation},\nauthor={Nayeon Kim and Hongje Seong and Daehyun Ji and Sujin Jang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=z4eVwH484M}\n}", "github": "", "reviewers": "Hway;JH5U;xabw;gpqV", "pdf_size": 6176567, "rating": "5;5;6;7", "confidence": "5;4;5;3", "soundness": "3;3;3;4", "novelty": "2;3;3;4", "presentation": "1;3;3;3", "wc_summary": "84;81;87;101", "wc_strengths": "101;60;59;52", "wc_weaknesses": "584;83;170;133", "wc_questions": "57;89;40;86", "wc_limitations": "11;13;10;15", "wc_review": "837;326;366;387", "wc_reply_reviewers": "192;0;25;100", "wc_reply_authors": "116;80;29;25", "reply_reviewers": "1;0;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 4.25, 0.82915619758885 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 88.25, 7.660776723022281 ], "wc_strengths_avg": [ 68.0, 19.300259065618782 ], "wc_weaknesses_avg": [ 242.5, 199.56765770033982 ], "wc_questions_avg": [ 68.0, 20.43281674170255 ], "wc_limitations_avg": [ 12.25, 1.920286436967152 ], "wc_review_avg": [ 479.0, 207.849705316125 ], "wc_reply_reviewers_avg": [ 79.25, 74.7775868827017 ], "wc_reply_authors_avg": [ 62.5, 37.73923687622737 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.6363636363636364, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15662763178508099657&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "samsung.com;samsung.com;samsung.com;samsung.com", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Samsung", "aff_unique_dep": "Samsung", "aff_unique_url": "https://www.samsung.com", "aff_unique_abbr": "Samsung", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "GTSinger: A Global Multi-Technique Singing Corpus with Realistic Music Scores for All Singing Tasks", "status": "Spotlight", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97428", "id": "z64azPC6Nl", "proceeding": "", "pdf": "https://openreview.net/pdf?id=z64azPC6Nl", "openreview": "https://openreview.net/forum?id=z64azPC6Nl", "poster": "/media/PosterPDFs/NeurIPS%202024/97428.png?t=1729087809.5141168", "project": "", "author_site": "Yu Zhang, Changhao Pan, Wenxiang Guo, Ruiqi Li, Zhiyuan Zhu, Jialei Wang, Wenhao Xu, Jingyu Lu, Zhiqing Hong, Chuxin Wang, Lichao Zhang, Jinzheng He, Ziyue Jiang, Yuxin Chen, Chen Yang, Jiecheng Zhou, Xinyu Cheng, Zhou Zhao", "tldr": "", "abstract": "The scarcity of high-quality and multi-task singing datasets significantly hinders the development of diverse controllable and personalized singing tasks, as existing singing datasets suffer from low quality, limited diversity of languages and singers, absence of multi-technique information and realistic music scores, and poor task suitability.\nTo tackle these problems, we present GTSinger, a large Global, multi-Technique, free-to-use, high-quality singing corpus with realistic music scores, designed for all singing tasks, along with its benchmarks.\nParticularly,\n(1) we collect 80.59 hours of high-quality singing voices, forming the largest recorded singing dataset;\n(2) 20 professional singers across nine widely spoken languages offer diverse timbres and styles;\n(3) we provide controlled comparison and phoneme-level annotations of six commonly used singing techniques, helping technique modeling and control;\n(4) GTSinger offers realistic music scores, assisting real-world musical composition;\n(5) singing voices are accompanied by manual phoneme-to-audio alignments, global style labels, and 16.16 hours of paired speech for various singing tasks.\nMoreover, to facilitate the use of GTSinger, we conduct four benchmark experiments: technique-controllable singing voice synthesis, technique recognition, style transfer, and speech-to-singing conversion.", "keywords": "singing voice synthesis;singing technique;multi-lingual;realistic music score;technique control;style transfer;speech-to-singing conversion", "primary_area": "", "supplementary_material": "/attachment/c4d6a496b7d2a36dbf9d5e7434ebbd8d84e07cfa.pdf", "author": "Yu Zhang;Changhao Pan;Wenxiang Guo;Ruiqi Li;Zhiyuan Zhu;Jialei Wang;Wenhao Xu;Jingyu Lu;Zhiqing Hong;Chuxin Wang;Lichao Zhang;Jinzheng He;Ziyue Jiang;Yuxin Chen;Chen Yang;Jiecheng Zhou;Xinyu Cheng;Zhou Zhao", "authorids": "~Yu_Zhang65;~Changhao_Pan1;~Wenxiang_Guo1;~Ruiqi_Li2;~Zhiyuan_Zhu4;~Jialei_Wang2;~Wenhao_Xu6;~Jingyu_Lu1;~Zhiqing_Hong3;~Chuxin_Wang3;~Lichao_Zhang2;~Jinzheng_He1;~Ziyue_Jiang1;~Yuxin_Chen13;~Chen_Yang26;~Jiecheng_Zhou1;~Xinyu_Cheng3;~Zhou_Zhao3", "gender": "M;M;M;;M;M;M;M;F;M;;;M;M;;M;M;", "homepage": "https://aaronz345.github.io;https://david-pigeon.github.io/;https://gwx314.github.io/;;https://github.com/dieKarotte;https://github.com/Unc1eW4ng;https://github.com/abilitywuchen;https://myjokerml.github.io/;https://github.com/PeppaPiggeee;https://github.com/mingzhengdan;https://leach9.github.io/;;;https://github.com/Xyc-LCQ;;;;", "dblp": "50/671-126;382/3463;256/7852;;;;;;;;;272/8857;258/6865;;;;;", "google_scholar": "kA9A6LsAAAAJ;lAH4cq8AAAAJ;tFg-qdwAAAAJ;;;;;;https://scholar.google.com/citations?hl=zh-CN;;;https://scholar.google.com/citations?hl=zh-CN;wDgSBssAAAAJ;;;;;", "orcid": "0009-0007-4594-0281;0009-0004-6023-1764;0009-0006-7997-4140;;;;;;;;;;;;;0009-0000-5155-0385;;", "linkedin": "yuzhang34;changhao-pan-4032b8317;;;;;;;;;;;;;;;xinyu-cheng-72aaa7309/;", "or_profile": "~Yu_Zhang65;~Changhao_Pan1;~Wenxiang_Guo1;~Ruiqi_Li2;~Zhiyuan_Zhu4;~Jialei_Wang2;~Wenhao_Xu6;~Jingyu_Lu1;~Zhiqing_Hong3;~Chuxin_Wang3;~Lichao_Zhang2;~Jinzheng_He1;~Ziyue_Jiang1;~Yuxin_Chen13;~Chen_Yang26;~Jiecheng_Zhou1;~Xinyu_Cheng3;~Zhou_Zhao3", "aff": "Zhejiang University;Zhejiang University;Zhejiang University;;Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University;;Zhejiang University;Zhejiang University;", "aff_domain": "zju.edu.cn;zju.edu.cn;zju.edu.cn;;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;;zju.edu.cn;zju.edu.cn;", "position": "PhD student;Undergrad student;PhD student;;Undergrad student;Undergrad student;Undergrad student;Undergrad student;MS student;Undergrad student;MS student;MS student;PhD student;Undergrad student;;Undergrad student;Undergrad student;", "bibtex": "@inproceedings{\nzhang2024gtsinger,\ntitle={{GTS}inger: A Global Multi-Technique Singing Corpus with Realistic Music Scores for All Singing Tasks},\nauthor={Yu Zhang and Changhao Pan and Wenxiang Guo and Ruiqi Li and Zhiyuan Zhu and Jialei Wang and Wenhao Xu and Jingyu Lu and Zhiqing Hong and Chuxin Wang and Lichao Zhang and Jinzheng He and Ziyue Jiang and Yuxin Chen and Chen Yang and Jiecheng Zhou and Xinyu Cheng and Zhou Zhao},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=z64azPC6Nl}\n}", "github": "", "reviewers": "A3kt;kj3W;pxTo", "pdf_size": 4735556, "rating": "7;9;9", "confidence": "5;3;4", "wc_summary_and_contributions": "31;63;74", "wc_strengths": "42;106;24", "wc_improvement": "67;81;65", "wc_limitations": "23;11;12", "wc_correctness": "28;3;1", "wc_clarity": "15;15;1", "wc_relation_to_prior_work": "83;19;1", "wc_documentation": "31;39;12", "wc_additional_feedback": "1;1;1", "wc_review": "321;338;191", "wc_reply_reviewers": "86;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "1;0;0", "reply_authors": "3;2;2", "rating_avg": [ 8.333333333333334, 0.9428090415820634 ], "confidence_avg": [ 4.0, 0.816496580927726 ], "wc_summary_and_contributions_avg": [ 56.0, 18.239152027072603 ], "wc_strengths_avg": [ 57.333333333333336, 35.188381921057726 ], "wc_improvement_avg": [ 71.0, 7.118052168020874 ], "wc_limitations_avg": [ 15.333333333333334, 5.436502143433364 ], "wc_correctness_avg": [ 10.666666666666666, 12.283683848458853 ], "wc_clarity_avg": [ 10.333333333333334, 6.599663291074444 ], "wc_relation_to_prior_work_avg": [ 34.333333333333336, 35.188381921057726 ], "wc_documentation_avg": [ 27.333333333333332, 11.32352516764202 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 283.3333333333333, 65.65735974655766 ], "wc_reply_reviewers_avg": [ 28.666666666666668, 40.54078878802872 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 18, 0 ], "corr_rating_confidence": -0.8660254037844385, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4877574257490327541&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "email": "zju.edu.cn;zju.edu.cn;zju.edu.cn;;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;;zju.edu.cn;zju.edu.cn;", "author_num": 18, "aff_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0;0;0", "aff_unique_norm": "Zhejiang University", "aff_unique_dep": "", "aff_unique_url": "https://www.zju.edu.cn", "aff_unique_abbr": "ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Vision Model Pre-training on Interleaved Image-Text Data via Latent Compression Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92974", "id": "z6KNvOe9zQ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=z6KNvOe9zQ", "openreview": "https://openreview.net/forum?id=z6KNvOe9zQ", "poster": "/media/PosterPDFs/NeurIPS%202024/92974.png?t=1733585188.9238036", "project": "", "author_site": "CHENYU YANG, Xizhou Zhu, Jinguo Zhu, Weijie Su, Junjie Wang, Xuan Dong, Wenhai Wang, Bin Li, Jie Zhou, Yu Qiao, Jifeng Dai", "tldr": "", "abstract": "Recently, vision model pre-training has evolved from relying on manually annotated datasets to leveraging large-scale, web-crawled image-text data. Despite these advances, there is no pre-training method that effectively exploits the interleaved image-text data, which is very prevalent on the Internet. Inspired by the recent success of compression learning in natural language processing, we propose a novel vision model pre-training method called Latent Compression Learning (LCL) for interleaved image-text data. This method performs latent compression learning by maximizing the mutual information between the inputs and outputs of a causal attention model. The training objective can be decomposed into two basic tasks: 1) contrastive learning between visual representation and preceding context, and 2) generating subsequent text based on visual representation. Our experiments demonstrate that our method not only matches the performance of CLIP on paired pre-training datasets (e.g., LAION), but can also leverage interleaved pre-training data (e.g., MMC4) to learn robust visual representations from scratch, showcasing the potential of vision model pre-training with interleaved image-text data.", "keywords": "vision pre-training;compression learning;interleaved image-text data", "primary_area": "machine_vision", "supplementary_material": "", "author": "Chenyu Yang;Xizhou Zhu;Jinguo Zhu;Weijie Su;Junjie Wang;Xuan Dong;Wenhai Wang;Bin Li;Jie Zhou;Yu Qiao;Jifeng Dai", "authorids": "~Chenyu_Yang1;~Xizhou_Zhu1;~Jinguo_Zhu1;~Weijie_Su2;~Junjie_Wang8;~Xuan_Dong5;~Wenhai_Wang2;~Bin_Li8;~Jie_Zhou3;~Yu_Qiao1;~Jifeng_Dai1", "gender": "M;;M;M;M;M;;M;M;;M", "homepage": "https://github.com/Chenyu-Yang-2000;;https://lechatelia.github.io/;https://www.weijiesu.com/;;https://xsgldhy.github.io;;http://staff.ustc.edu.cn/~binli;https://www.tsinghua.edu.cn/publish/auen/1713/2011/20110506105532098625469/20110506105532098625469_.html;;https://jifengdai.org/", "dblp": ";170/1608;244/7280;57/10098-2;14/1915-9;;;89/6764-25;00/5012-1;;14/9399", "google_scholar": "LReV9sUAAAAJ;02RXI00AAAAJ;YfHg5lQAAAAJ;ECDe6IIAAAAJ;https://scholar.google.com.hk/citations?hl=zh-CN;;;;;;SH_-B_AAAAAJ", "orcid": "0000-0001-9997-4929;;;;;;;0000-0002-2332-3959;;;", "linkedin": ";;;weijie-su-abb163177/;;;;;;;", "or_profile": "~Chenyu_Yang1;~Xizhou_Zhu1;~Jinguo_Zhu1;~Weijie_Su2;~Junjie_Wang8;~Xuan_Dong5;~Wenhai_Wang2;~Bin_Li8;~Jie_Zhou3;~Yu_Qiao1;~Jifeng_Dai1", "aff": "Tsinghua University;Tsinghua University;Xi'an Jiaotong University;University of Science and Technology of China;Beijing University of Posts and Telecommunications;Tsinghua University;;University of Science and Technology of China;Tsinghua University;;Tsinghua University", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;xjtu.edu.cn;ustc.edu.cn;bupt.edu.cn;mails.tsinghua.edu.cn;;ustc.edu.cn;tsinghua.edu.cn;;tsinghua.edu.cn", "position": "PhD student;Postdoc;PhD student;PhD student;Undergrad student;Undergrad student;;Full Professor;Full Professor;;Associate Professor", "bibtex": "@inproceedings{\nyang2024vision,\ntitle={Vision Model Pre-training on Interleaved Image-Text Data via Latent Compression Learning},\nauthor={Chenyu Yang and Xizhou Zhu and Jinguo Zhu and Weijie Su and Junjie Wang and Xuan Dong and Wenhai Wang and Bin Li and Jie Zhou and Yu Qiao and Jifeng Dai},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=z6KNvOe9zQ}\n}", "github": "", "reviewers": "KZWZ;A7ig;ktxP;vQis", "pdf_size": 675512, "rating": "4;4;5;6", "confidence": "4;4;4;4", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "3;2;3;3", "wc_summary": "76;37;101;95", "wc_strengths": "20;54;114;76", "wc_weaknesses": "218;327;83;140", "wc_questions": "58;12;2;81", "wc_limitations": "21;8;22;9", "wc_review": "393;438;322;401", "wc_reply_reviewers": "172;89;150;24", "wc_reply_authors": "31;44;20;17", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 4.75, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 77.25, 25.00374971879218 ], "wc_strengths_avg": [ 66.0, 34.14674215792775 ], "wc_weaknesses_avg": [ 192.0, 91.49590154755568 ], "wc_questions_avg": [ 38.25, 32.48364973336586 ], "wc_limitations_avg": [ 15.0, 6.519202405202649 ], "wc_review_avg": [ 388.5, 41.97916149710473 ], "wc_reply_reviewers_avg": [ 108.75, 57.60805065266486 ], "wc_reply_authors_avg": [ 28.0, 10.606601717798213 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 11, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2513614060617440775&as_sdt=80005&sciodt=0,11&hl=en", "gs_version_total": 4, "email": "tsinghua.edu.cn;tsinghua.edu.cn;xjtu.edu.cn;ustc.edu.cn;bupt.edu.cn;mails.tsinghua.edu.cn;;ustc.edu.cn;tsinghua.edu.cn;;tsinghua.edu.cn", "author_num": 11, "aff_unique_index": "0;0;1;2;3;0;2;0;0", "aff_unique_norm": "Tsinghua University;Xi'an Jiao Tong University;University of Science and Technology of China;Beijing University of Posts and Telecommunications", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.xjtu.edu.cn;http://www.ustc.edu.cn;http://www.bupt.edu.cn/", "aff_unique_abbr": "THU;XJTU;USTC;BUPT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Beijing", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Learning diverse causally emergent representations from time series data", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92973", "id": "z6reLFqv6w", "proceeding": "", "pdf": "https://openreview.net/pdf?id=z6reLFqv6w", "openreview": "https://openreview.net/forum?id=z6reLFqv6w", "poster": "", "project": "", "author_site": "David McSharry, Christos Kaplanis, Fernando Rosas, Pedro A.M Mediano", "tldr": "", "abstract": "Cognitive processes usually take place at a macroscopic scale in systems characterised by emergent properties, which make the whole \u2018more than the sum of its parts.\u2019 While recent proposals have provided quantitative, information-theoretic metrics to detect emergence in time series data, it is often highly non-trivial to identify the relevant macroscopic variables a priori. In this paper we leverage recent advances in representation learning and differentiable information estimators to put forward a data-driven method to find emergent variables. The proposed method successfully detects emergent variables and recovers the ground-truth emergence values in a synthetic dataset. Furthermore, we show the method can be extended to learn multiple independent features, extracting a diverse set of emergent quantities. We finally show that a modified method scales to real experimental data from primate brain activity, paving the ground for future analyses uncovering the emergent structure of cognitive representations in biological and artificial intelligence systems.", "keywords": "emergence;representation learning", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "David McSharry;Christos Kaplanis;Fernando E Rosas;Pedro A. M. Mediano", "authorids": "~David_McSharry1;~Christos_Kaplanis2;~Fernando_E_Rosas1;~Pedro_A._M._Mediano1", "gender": "M;M;Not Specified;M", "homepage": "https://www.linkedin.com/in/david-mcsharry-208612202/;https://profiles.sussex.ac.uk/p555273-fernando-rosas;https://www.doc.ic.ac.uk/~pam213;", "dblp": ";;190/7253;200/8168", "google_scholar": ";https://scholar.google.es/citations?user=OZNAs2wAAAAJ;I9-416MAAAAJ;https://scholar.google.co.uk/citations?user=jaNr8IoAAAAJ", "orcid": "0009-0003-3627-3836;;0000-0003-1789-5894;", "linkedin": "david-mcsharry-208612202/;;;", "or_profile": "~David_McSharry1;~Fernando_E_Rosas1;~Pedro_A._M._Mediano1;~Christos_Kaplanis1", "aff": "Imperial College London;University of Sussex;Imperial College London;Google", "aff_domain": "imperial.ac.uk;sussex.ac.uk;ic.ac.uk;google.com", "position": "MS student;Lecturer;Lecturer;Research Scientist", "bibtex": "@inproceedings{\nmcsharry2024learning,\ntitle={Learning diverse causally emergent representations from time series data},\nauthor={David McSharry and Christos Kaplanis and Fernando E Rosas and Pedro A. M. Mediano},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=z6reLFqv6w}\n}", "github": "", "reviewers": "7mVS;PMxp;aLEM;pckS", "pdf_size": 872260, "rating": "5;6;7;7", "confidence": "3;4;3;4", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "2;4;2;4", "wc_summary": "116;88;227;59", "wc_strengths": "70;65;142;120", "wc_weaknesses": "138;103;171;158", "wc_questions": "90;1;1;182", "wc_limitations": "8;11;1;13", "wc_review": "422;268;542;532", "wc_reply_reviewers": "108;22;47;77", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 1.0 ], "wc_summary_avg": [ 122.5, 63.610140700992005 ], "wc_strengths_avg": [ 99.25, 32.7366384957283 ], "wc_weaknesses_avg": [ 142.5, 25.656383221335 ], "wc_questions_avg": [ 68.5, 74.92829905983453 ], "wc_limitations_avg": [ 8.25, 4.548351349665063 ], "wc_review_avg": [ 441.0, 110.42191811411355 ], "wc_reply_reviewers_avg": [ 63.5, 32.23740063962974 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:99UJIAHNGr4J:scholar.google.com/&scioq=Learning+diverse+causally+emergent+representations+from+time+series+data&hl=en&as_sdt=0,33", "gs_version_total": 0, "email": "imperial.ac.uk;sussex.ac.uk;ic.ac.uk;google.com", "author_num": 4, "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Imperial College London;University of Sussex;Google", "aff_unique_dep": ";;Google", "aff_unique_url": "https://www.imperial.ac.uk;https://www.sussex.ac.uk;https://www.google.com", "aff_unique_abbr": "ICL;Sussex;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "The Many Faces of Optimal Weak-to-Strong Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92972", "id": "z7h7zMgyPJ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=z7h7zMgyPJ", "openreview": "https://openreview.net/forum?id=z7h7zMgyPJ", "poster": "/media/PosterPDFs/NeurIPS%202024/92972.png?t=1733587762.4100173", "project": "", "author_site": "Mikael M\u00f8ller H\u00f8gsgaard, Kasper Green Larsen, Markus Engelund Mathiasen", "tldr": "", "abstract": "Boosting is an extremely successful idea, allowing one to combine multiple low accuracy classifiers into a much more accurate voting classifier. In this work, we present a new and surprisingly simple Boosting algorithm that obtains a provably optimal sample complexity. Sample optimal Boosting algorithms have only recently been developed, and our new algorithm has the fastest runtime among all such algorithms and is the simplest to describe: Partition your training data into 5 disjoint pieces of equal size, run AdaBoost on each, and combine the resulting classifiers via a majority vote. In addition to this theoretical contribution, we also perform the first empirical comparison of the proposed sample optimal Boosting algorithms. Our pilot empirical study suggests that our new algorithm might outperform previous algorithms on large data sets.", "keywords": "Learning Theory;Weak to Strong Learning;Boosting;Large Margin Classifiers;Generalization Bounds;Sample Complexity", "primary_area": "learning_theory", "supplementary_material": "/attachment/9a9811aaf9480e6aee3ebb3c8c9c52b771282c85.zip", "author": "Mikael M\u00f8ller H\u00f8gsgaard;Kasper Green Larsen;Markus Engelund Mathiasen", "authorids": "~Mikael_M\u00f8ller_H\u00f8gsgaard1;~Kasper_Green_Larsen1;~Markus_Engelund_Mathiasen1", "gender": "M;;M", "homepage": "https://pure.au.dk/portal/da/persons/mikael-moeller-hoegsgaard(3b07133a-329d-4585-a864-d37c7cb9056b).html;;http://www.cs.au.dk/~larsen/", "dblp": "295/8599;;07/6242", "google_scholar": ";;https://scholar.google.com.tw/citations?user=ZluoxUcAAAAJ", "orcid": ";;", "linkedin": ";markus-mathiasen-5329a4303/;", "or_profile": "~Mikael_M\u00f8ller_H\u00f8gsgaard1;~Markus_Engelund_Mathiasen1;~Kasper_Larsen1", "aff": "Aarhus University;Aarhus University;Aarhus University", "aff_domain": "cs.au.dk;au.dk;au.dk", "position": "PhD student;MS student;Full Professor", "bibtex": "@inproceedings{\nh{\\o}gsgaard2024the,\ntitle={The Many Faces of Optimal Weak-to-Strong Learning},\nauthor={Mikael M{\\o}ller H{\\o}gsgaard and Kasper Green Larsen and Markus Engelund Mathiasen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=z7h7zMgyPJ}\n}", "github": "", "reviewers": "KvEC;ynbn;p9UC", "pdf_size": 425308, "rating": "3;7;8", "confidence": "4;5;4", "soundness": "2;4;3", "novelty": "1;4;3", "presentation": "1;4;3", "wc_summary": "68;58;499", "wc_strengths": "12;19;103", "wc_weaknesses": "25;1;208", "wc_questions": "4;1;227", "wc_limitations": "3;1;11", "wc_review": "112;80;1048", "wc_reply_reviewers": "0;0;147", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;1", "reply_authors": "1;0;1", "rating_avg": [ 6.0, 2.160246899469287 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 2.6666666666666665, 1.247219128924647 ], "presentation_avg": [ 2.6666666666666665, 1.247219128924647 ], "wc_summary_avg": [ 208.33333333333334, 205.57291218013677 ], "wc_strengths_avg": [ 44.666666666666664, 41.34677200889515 ], "wc_weaknesses_avg": [ 78.0, 92.44457799135652 ], "wc_questions_avg": [ 77.33333333333333, 105.8374014966144 ], "wc_limitations_avg": [ 5.0, 4.320493798938574 ], "wc_review_avg": [ 413.3333333333333, 448.967209889047 ], "wc_reply_reviewers_avg": [ 49.0, 69.29646455628166 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 0.6666666666666666, 0.4714045207910317 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3273268353539885, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14308682075485092429&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "cs.au.dk;au.dk;au.dk", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Aarhus University", "aff_unique_dep": "", "aff_unique_url": "https://au.dk", "aff_unique_abbr": "AU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Denmark" }, { "title": "PURE: Prompt Evolution with Graph ODE for Out-of-distribution Fluid Dynamics Modeling", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92971", "id": "z86knmjoUq", "proceeding": "", "pdf": "https://openreview.net/pdf?id=z86knmjoUq", "openreview": "https://openreview.net/forum?id=z86knmjoUq", "poster": "", "project": "", "author_site": "Hao Wu, Changhu Wang, Fan Xu, Jinbao Xue, Chong Chen, Xian-Sheng Hua, Xiao Luo", "tldr": "", "abstract": "This work studies the problem of out-of-distribution fluid dynamics modeling. Previous works usually design effective neural operators to learn from mesh-based data structures. However, in real-world applications, they would suffer from distribution shifts from the variance of system parameters and \ntemporal evolution of the dynamical system. In this paper, we propose a novel approach named \\underline{P}rompt Evol\\underline{u}tion with G\\underline{r}aph OD\\underline{E} (\\method{}) for out-of-distribution fluid dynamics modeling. The core of our \\method{} is to learn time-evolving prompts using a graph ODE to adapt spatio-temporal forecasting models to different scenarios. In particular, our \\method{} first learns from historical observations and system parameters in the frequency domain to explore multi-view context information, which could effectively initialize prompt embeddings. More importantly, we incorporate the interpolation of observation sequences into a graph ODE, which can capture the temporal evolution of prompt embeddings for model adaptation. These time-evolving prompt embeddings are then incorporated into basic forecasting models to overcome temporal distribution shifts. We also minimize the mutual information between prompt embeddings and observation embeddings to enhance the robustness of our model to different distributions. Extensive experiments on various benchmark datasets validate the superiority of the proposed \\method{} in comparison to various baselines.", "keywords": "AI for Science", "primary_area": "machine_learning_for_physical_sciences", "supplementary_material": "", "author": "Hao Wu;Changhu Wang;Fan Xu;Jinbao Xue;Chong Chen;Xian-Sheng Hua;Xiao Luo", "authorids": "~Hao_Wu39;~Changhu_Wang4;~Fan_Xu5;~Jinbao_Xue2;~Chong_Chen2;~Xian-Sheng_Hua1;~Xiao_Luo3", "gender": "M;M;;M;;M;M", "homepage": "https://easylearningscores.github.io/;;;https://www.google.com/;;;http://luoxiao12.github.io", "dblp": "111;;;https://dblp.org;;56/5807-1;50/1585-1", "google_scholar": "HdXMhfcAAAAJ;;;;;https://scholar.google.co.uk/citations?user=6G-l4o0AAAAJ;https://scholar.google.com.hk/citations?", "orcid": "0009-0008-4084-1409;0009-0002-8567-0961;;0000-0000-0000-0000;;;", "linkedin": ";;;https://www.linkedin;;xshua;%E9%9C%84-%E7%BD%97-303548214/", "or_profile": "~Hao_Wu39;~Changhu_Wang4;~Fan_Xu5;~Jinbao_Xue2;~Chong_Chen2;~Xian-Sheng_Hua1;~Xiao_Luo3", "aff": "University of Science and Technology of China;Peking University;;Shandong University of Science and Technology;;Terminus Group;University of California, Los Angeles", "aff_domain": "ustc.edu.cn;pku.edu.cn;;sdust.edu.cn;;tslsmart.com;cs.ucla.edu", "position": "MS student;PhD student;;MS student;;Principal Researcher;Postdoc", "bibtex": "@inproceedings{\nwu2024pure,\ntitle={{PURE}: Prompt Evolution with Graph {ODE} for Out-of-distribution Fluid Dynamics Modeling},\nauthor={Hao Wu and Changhu Wang and Fan Xu and Jinbao Xue and Chong Chen and Xian-Sheng Hua and Xiao Luo},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=z86knmjoUq}\n}", "github": "", "reviewers": "vRkX;MFD5;MBSt;Sh2w", "pdf_size": 11025018, "rating": "4;5;5;7", "confidence": "3;4;5;2", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "3;2;3;3", "wc_summary": "103;132;34;52", "wc_strengths": "34;76;24;65", "wc_weaknesses": "194;168;108;51", "wc_questions": "1;37;150;7", "wc_limitations": "1;29;17;7", "wc_review": "333;442;333;182", "wc_reply_reviewers": "118;0;0;9", "wc_reply_authors": "1378;92;105;25", "reply_reviewers": "1;0;0;1", "reply_authors": "3;2;2;2", "rating_avg": [ 5.25, 1.0897247358851685 ], "confidence_avg": [ 3.5, 1.118033988749895 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 80.25, 39.15593824696326 ], "wc_strengths_avg": [ 49.75, 21.405314760591587 ], "wc_weaknesses_avg": [ 130.25, 55.37316588384666 ], "wc_questions_avg": [ 48.75, 60.02655662288151 ], "wc_limitations_avg": [ 13.5, 10.618380290797651 ], "wc_review_avg": [ 322.5, 92.52161909521472 ], "wc_reply_reviewers_avg": [ 31.75, 49.93182852650201 ], "wc_reply_authors_avg": [ 400.0, 565.4639687902315 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": -0.5129891760425771, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12304922257988481896&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "ustc.edu.cn;pku.edu.cn;;sdust.edu.cn;;tslsmart.com;cs.ucla.edu", "author_num": 7, "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "University of Science and Technology of China;Peking University;Shandong University of Science and Technology;Terminus Group;University of California, Los Angeles", "aff_unique_dep": ";;;;", "aff_unique_url": "http://www.ustc.edu.cn;http://www.pku.edu.cn;http://www.sdstu.edu.cn/;;https://www.ucla.edu", "aff_unique_abbr": "USTC;Peking U;SDUST;;UCLA", "aff_campus_unique_index": "1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;0;0;2", "aff_country_unique": "China;;United States" }, { "title": "On Differentially Private U Statistics", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92970", "id": "zApFYcLg6K", "proceeding": "", "pdf": "https://openreview.net/pdf?id=zApFYcLg6K", "openreview": "https://openreview.net/forum?id=zApFYcLg6K", "poster": "", "project": "", "author_site": "Kamalika Chaudhuri, Po-Ling Loh, Shourya Pandey, Purnamrita Sarkar", "tldr": "", "abstract": "We consider the problem of privately estimating a parameter $\\mathbb{E}[h(X_1,\\dots,X_k)]$, where $X_1$, $X_2$, $\\dots$, $X_k$ are i.i.d. data from some distribution and $h$ is a permutation-invariant function. Without privacy constraints, the standard estimators for this task are U-statistics, which commonly arise in a wide range of problems, including nonparametric signed rank tests, symmetry testing, uniformity testing, and subgraph counts in random networks, and are the unique minimum variance unbiased estimators under mild conditions. Despite the recent outpouring of interest in private mean estimation, privatizing U-statistics has received little attention. While existing private mean estimation algorithms can be applied in a black-box manner to obtain confidence intervals, we show that they can lead to suboptimal private error, e.g., constant-factor inflation in the leading term, or even $\\Theta(1/n)$ rather than $O(1/n^2)$ in degenerate settings. To remedy this, we propose a new thresholding-based approach that reweights different subsets of the data using _local H\u00e1jek projections_. This leads to nearly optimal private error for non-degenerate U-statistics and a strong indication of near-optimality for degenerate U-statistics.", "keywords": "Differential Privacy;Statistics;Mean Estimation", "primary_area": "privacy", "supplementary_material": "", "author": "Kamalika Chaudhuri;Po-Ling Loh;Shourya Pandey;Purnamrita Sarkar", "authorids": "~Kamalika_Chaudhuri1;~Po-Ling_Loh2;~Shourya_Pandey1;~Purnamrita_Sarkar1", "gender": "F;;M;F", "homepage": "http://cseweb.ucsd.edu/users/kamalika;;;https://psarkar.github.io/", "dblp": "56/6435;02/10264;;25/6929", "google_scholar": "I-DJ7EsAAAAJ;;19evEwcAAAAJ;KfT3_0AAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Kamalika_Chaudhuri1;~Po-Ling_Loh2;~Shourya_Pandey1;~Purnamrita_Sarkar1", "aff": "University of California, San Diego;University of Cambridge;The University of Texas at Austin;University of Texas, Austin", "aff_domain": "ucsd.edu;cam.ac.uk;cs.utexas.edu;utexas.edu", "position": "Associate Professor;Associate Professor;PhD student;Associate Professor", "bibtex": "@inproceedings{\nchaudhuri2024on,\ntitle={On Differentially Private U Statistics},\nauthor={Kamalika Chaudhuri and Po-Ling Loh and Shourya Pandey and Purnamrita Sarkar},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=zApFYcLg6K}\n}", "github": "", "reviewers": "8Ha8;MPw8;B5Tq;vFH3", "pdf_size": 611845, "rating": "5;6;7;7", "confidence": "3;4;3;3", "soundness": "2;3;4;4", "novelty": "2;3;3;3", "presentation": "2;3;4;3", "wc_summary": "33;42;405;400", "wc_strengths": "61;24;249;74", "wc_weaknesses": "165;8;129;65", "wc_questions": "24;43;22;18", "wc_limitations": "79;1;31;1", "wc_review": "362;118;836;558", "wc_reply_reviewers": "19;32;34;48", "wc_reply_authors": "249;56;28;283", "reply_reviewers": "1;1;1;1", "reply_authors": "3;3;2;2", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.82915619758885 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 220.0, 182.53629776019892 ], "wc_strengths_avg": [ 102.0, 86.83029425263973 ], "wc_weaknesses_avg": [ 91.75, 60.172148873045906 ], "wc_questions_avg": [ 26.75, 9.627434756984853 ], "wc_limitations_avg": [ 28.0, 31.89043743820395 ], "wc_review_avg": [ 468.5, 263.27694543958836 ], "wc_reply_reviewers_avg": [ 33.25, 10.280442597476044 ], "wc_reply_authors_avg": [ 154.0, 113.07740711565684 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.17407765595569782, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:4cDuf3VAfj4J:scholar.google.com/&scioq=On+Differentially+Private+U+Statistics&hl=en&as_sdt=0,33", "gs_version_total": 3, "email": "ucsd.edu;cam.ac.uk;cs.utexas.edu;utexas.edu", "author_num": 4, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "University of California, San Diego;University of Cambridge;University of Texas at Austin", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ucsd.edu;https://www.cam.ac.uk;https://www.utexas.edu", "aff_unique_abbr": "UCSD;Cambridge;UT Austin", "aff_campus_unique_index": "0;1;2;2", "aff_campus_unique": "San Diego;Cambridge;Austin", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Multi-Label Learning with Stronger Consistency Guarantees", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92969", "id": "zAuerb1KGx", "proceeding": "", "pdf": "https://openreview.net/pdf?id=zAuerb1KGx", "openreview": "https://openreview.net/forum?id=zAuerb1KGx", "poster": "", "project": "", "author_site": "Anqi Mao, Mehryar Mohri, Yutao Zhong", "tldr": "", "abstract": "We present a detailed study of surrogate losses and algorithms for multi-label learning, supported by $H$-consistency bounds. We first show that, for the simplest form of multi-label loss (the popular Hamming loss), the well-known consistent binary relevance surrogate suffers from a sub-optimal dependency on the number of labels in terms of $H$-consistency bounds, when using smooth losses such as logistic losses. Furthermore, this loss function fails to account for label correlations. To address these drawbacks, we introduce a novel surrogate loss, *multi-label logistic loss*, that accounts for label correlations and benefits from label-independent $H$-consistency bounds. We then broaden our analysis to cover a more extensive family of multi-label losses, including all common ones and a new extension defined based on linear-fractional functions with respect to the confusion matrix. We also extend our multi-label logistic losses to more comprehensive multi-label comp-sum losses, adapting comp-sum losses from standard classification to the multi-label learning. We prove that this family of surrogate losses benefits from $H$-consistency bounds, and thus Bayes-consistency, across any general multi-label loss. Our work thus proposes a unified surrogate loss framework benefiting from strong consistency guarantees for any multi-label loss, significantly expanding upon previous work which only established Bayes-consistency and for specific loss functions. Additionally, we adapt constrained losses from standard classification to multi-label constrained losses in a similar way, which also benefit from $H$-consistency bounds and thus Bayes-consistency for any multi-label loss. We further describe efficient gradient computation algorithms for minimizing the multi-label logistic loss.", "keywords": "multi-label learning;consistency;surrogate loss;hamming loss;learning theory", "primary_area": "learning_theory", "supplementary_material": "", "author": "Anqi Mao;Mehryar Mohri;Yutao Zhong", "authorids": "~Anqi_Mao1;~Mehryar_Mohri2;~Yutao_Zhong1", "gender": "F;M;", "homepage": "https://anqi-mao.github.io;https://cs.nyu.edu/~mohri/;", "dblp": "241/6864;03/5448;51/3178-2", "google_scholar": "nkjIZ-oAAAAJ;ktwwLjsAAAAJ;", "orcid": ";;", "linkedin": ";mehryar-mohri-3737b981/;", "or_profile": "~Anqi_Mao1;~Mehryar_Mohri2;~Yutao_Zhong1", "aff": "Courant Institute of Mathematical Sciences, NYU;Google Research;Google", "aff_domain": "cims.nyu.edu;google.com;google.com", "position": "PhD student;Principal Researcher;Researcher", "bibtex": "@inproceedings{\nmao2024multilabel,\ntitle={Multi-Label Learning with Stronger Consistency Guarantees},\nauthor={Anqi Mao and Mehryar Mohri and Yutao Zhong},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=zAuerb1KGx}\n}", "github": "", "reviewers": "73SN;9NfQ;sTKD;Abmm", "pdf_size": 435142, "rating": "5;5;7;8", "confidence": "1;2;4;4", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "63;40;79;52", "wc_strengths": "89;39;192;83", "wc_weaknesses": "35;336;55;64", "wc_questions": "4;191;66;3", "wc_limitations": "1;18;7;8", "wc_review": "192;624;399;210", "wc_reply_reviewers": "26;172;19;45", "wc_reply_authors": "13;0;14;28", "reply_reviewers": "1;1;1;1", "reply_authors": "2;1;2;2", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 2.75, 1.299038105676658 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 58.5, 14.361406616345072 ], "wc_strengths_avg": [ 100.75, 56.108711444837155 ], "wc_weaknesses_avg": [ 122.5, 123.71034718244064 ], "wc_questions_avg": [ 66.0, 76.54737095420064 ], "wc_limitations_avg": [ 8.5, 6.103277807866851 ], "wc_review_avg": [ 356.25, 174.55998252749683 ], "wc_reply_reviewers_avg": [ 65.5, 62.219369974309444 ], "wc_reply_authors_avg": [ 13.75, 9.908960591303208 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.9259259259259257, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6986778131429995874&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "cims.nyu.edu;google.com;google.com", "author_num": 3, "aff_unique_index": "0;1;1", "aff_unique_norm": "New York University;Google", "aff_unique_dep": "Courant Institute of Mathematical Sciences;Google Research", "aff_unique_url": "https://www.courant.nyu.edu;https://research.google", "aff_unique_abbr": "NYU;Google Research", "aff_campus_unique_index": "0;1;1", "aff_campus_unique": "New York;Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Amortized Bayesian Experimental Design for Decision-Making", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92968", "id": "zBG7WogAvm", "proceeding": "", "pdf": "https://openreview.net/pdf?id=zBG7WogAvm", "openreview": "https://openreview.net/forum?id=zBG7WogAvm", "poster": "/media/PosterPDFs/NeurIPS%202024/92968.png?t=1731676332.7464342", "project": "", "author_site": "Daolang Huang, Yujia Guo, Luigi Acerbi, Samuel Kaski", "tldr": "", "abstract": "Many critical decisions, such as personalized medical diagnoses and product pricing, are made based on insights gained from designing, observing, and analyzing a series of experiments. This highlights the crucial role of experimental design, which goes beyond merely collecting information on system parameters as in traditional Bayesian experimental design (BED), but also plays a key part in facilitating downstream decision-making. Most recent BED methods use an amortized policy network to rapidly design experiments. However, the information gathered through these methods is suboptimal for down-the-line decision-making, as the experiments are not inherently designed with downstream objectives in mind. In this paper, we present an amortized decision-aware BED framework that prioritizes maximizing downstream decision utility. We introduce a novel architecture, the Transformer Neural Decision Process (TNDP), capable of instantly proposing the next experimental design, whilst inferring the downstream decision, thus effectively amortizing both tasks within a unified workflow. We demonstrate the performance of our method across several tasks, showing that it can deliver informative designs and facilitate accurate decision-making.", "keywords": "Bayesian experimental design;amortized inference;Bayesian decision theory;neural processes", "primary_area": "probabilistic_methods", "supplementary_material": "/attachment/ae909fba2c1e3b7eb4782a71b9b094cfc7c180b9.zip", "author": "Daolang Huang;Yujia Guo;Luigi Acerbi;Samuel Kaski", "authorids": "~Daolang_Huang1;~Yujia_Guo1;~Luigi_Acerbi1;~Samuel_Kaski1", "gender": "M;F;M;M", "homepage": "https://www.huangdaolang.com;https://www.linkedin.com/in/yujia-guo-6738a0203/;http://luigiacerbi.com/;https://people.aalto.fi/samuel.kaski", "dblp": "277/8410;204/7435;72/1450;64/5826", "google_scholar": "2togGHoAAAAJ;;https://scholar.google.co.uk/citations?user=QYBZoGwAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;0000-0001-7471-7336;0000-0003-1925-9154", "linkedin": "daolanghuang/?originalSubdomain=fi;;luigi-acerbi-719b492/;samuel-kaski-27790/", "or_profile": "~Daolang_Huang1;~Yujia_Guo1;~Luigi_Acerbi1;~Samuel_Kaski1", "aff": "Aalto University;Aalto University;University of Helsinki;Aalto University", "aff_domain": "aalto.fi;aalto.fi;helsinki.fi;aalto.fi", "position": "PhD student;PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nhuang2024amortized,\ntitle={Amortized Bayesian Experimental Design for Decision-Making},\nauthor={Daolang Huang and Yujia Guo and Luigi Acerbi and Samuel Kaski},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=zBG7WogAvm}\n}", "github": "", "reviewers": "Ctfm;WHLG;Zp5w;7z19", "pdf_size": 4753144, "rating": "6;6;6;7", "confidence": "5;4;4;4", "soundness": "2;3;3;3", "novelty": "3;3;3;4", "presentation": "3;3;3;2", "wc_summary": "45;104;188;43", "wc_strengths": "87;90;107;50", "wc_weaknesses": "820;499;141;305", "wc_questions": "111;71;66;225", "wc_limitations": "132;10;115;23", "wc_review": "1195;774;617;646", "wc_reply_reviewers": "42;17;0;20", "wc_reply_authors": "14;17;0;14", "reply_reviewers": "2;1;0;1", "reply_authors": "2;2;1;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 95.0, 59.02118263810036 ], "wc_strengths_avg": [ 83.5, 20.79062288629179 ], "wc_weaknesses_avg": [ 441.25, 252.73541006356825 ], "wc_questions_avg": [ 118.25, 64.05222478571685 ], "wc_limitations_avg": [ 70.0, 54.032397688794084 ], "wc_review_avg": [ 808.0, 231.11144497839132 ], "wc_reply_reviewers_avg": [ 19.75, 14.939461168328663 ], "wc_reply_authors_avg": [ 11.25, 6.609652033201143 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10656860825083842356&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "aalto.fi;aalto.fi;helsinki.fi;aalto.fi", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Aalto University;University of Helsinki", "aff_unique_dep": ";", "aff_unique_url": "https://www.aalto.fi;https://www.helsinki.fi", "aff_unique_abbr": "Aalto;UH", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Finland" }, { "title": "FedNE: Surrogate-Assisted Federated Neighbor Embedding for Dimensionality Reduction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92967", "id": "zBMKodNgKX", "proceeding": "", "pdf": "https://openreview.net/pdf?id=zBMKodNgKX", "openreview": "https://openreview.net/forum?id=zBMKodNgKX", "poster": "/media/PosterPDFs/NeurIPS%202024/92967.png?t=1733711140.4136357", "project": "", "author_site": "Ziwei Li, Xiaoqi Wang, Hong-You Chen, Han Wei Shen, Wei-Lun (Harry) Chao", "tldr": "", "abstract": "Federated learning (FL) has rapidly evolved as a promising paradigm that enables collaborative model training across distributed participants without exchanging their local data. Despite its broad applications in fields such as computer vision, graph learning, and natural language processing, the development of a data projection model that can be effectively used to visualize data in the context of FL is crucial yet remains heavily under-explored. Neighbor embedding (NE) is an essential technique for visualizing complex high-dimensional data, but collaboratively learning a joint NE model is difficult. The key challenge lies in the objective function, as effective visualization algorithms like NE require computing loss functions among pairs of data. \nIn this paper, we introduce \\textsc{FedNE}, a novel approach that integrates the \\textsc{FedAvg} framework with the contrastive NE technique, without any requirements of shareable data. To address the lack of inter-client repulsion which is crucial for the alignment in the global embedding space, we develop a surrogate loss function that each client learns and shares with each other. Additionally, we propose a data-mixing strategy to augment the local data, aiming to relax the problems of invisible neighbors and false neighbors constructed by the local $k$NN graphs. We conduct comprehensive experiments on both synthetic and real-world datasets. The results demonstrate that our \\textsc{FedNE} can effectively preserve the neighborhood data structures and enhance the alignment in the global embedding space compared to several baseline methods.", "keywords": "Federated Learning;Dimensionality Reduction;Unsupervised Learning;Representation Learning", "primary_area": "other", "supplementary_material": "", "author": "Ziwei Li;Xiaoqi Wang;Hong-You Chen;Han Wei Shen;Wei-Lun Chao", "authorids": "~Ziwei_Li3;~Xiaoqi_Wang2;~Hong-You_Chen1;~Han_Wei_Shen1;~Wei-Lun_Chao1", "gender": "F;F;;M;M", "homepage": ";;https://sites.google.com/view/hongyouc/%E9%A6%96%E9%A0%81;http://www.cse.ohio-state.edu/~hwshen;https://sites.google.com/view/wei-lun-harry-chao", "dblp": ";;228/5569;61/6829;64/8842", "google_scholar": "j0j6c0MAAAAJ;i__pLDEAAAAJ;uxlU7J8AAAAJ;https://scholar.google.com/citations?hl=en;PGKakWwAAAAJ", "orcid": ";;;0000-0002-1211-2320;0000-0003-1269-7231", "linkedin": "ziwei-li-03458513b/;xiaoqiwang2582/;;;", "or_profile": "~Ziwei_Li3;~Xiaoqi_Wang2;~Hong-You_Chen1;~Han_Wei_Shen1;~Wei-Lun_Chao1", "aff": "Ohio State University, Columbus;Ohio State University, Columbus;Apple AI/ML;Ohio State University, Columbus;Ohio State University", "aff_domain": "osu.edu;osu.edu;apple.com;osu.edu;osu.edu", "position": "PhD student;PhD student;Researcher;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nli2024fedne,\ntitle={Fed{NE}: Surrogate-Assisted Federated Neighbor Embedding for Dimensionality Reduction},\nauthor={Ziwei Li and Xiaoqi Wang and Hong-You Chen and Han Wei Shen and Wei-Lun Chao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=zBMKodNgKX}\n}", "github": "", "reviewers": "mP1Q;y2ZN;4ZYS;BJv7", "pdf_size": 4673579, "rating": "3;5;5;5", "confidence": "5;4;3;3", "soundness": "2;2;3;3", "novelty": "2;3;3;2", "presentation": "3;2;2;3", "wc_summary": "119;103;96;85", "wc_strengths": "32;80;33;38", "wc_weaknesses": "211;135;78;203", "wc_questions": "260;103;38;2", "wc_limitations": "3;10;1;39", "wc_review": "625;431;246;367", "wc_reply_reviewers": "72;35;10;402", "wc_reply_authors": "310;40;39;880", "reply_reviewers": "1;1;1;2", "reply_authors": "2;2;2;4", "rating_avg": [ 4.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 100.75, 12.336429791475327 ], "wc_strengths_avg": [ 45.75, 19.904459299363044 ], "wc_weaknesses_avg": [ 156.75, 54.214273950685715 ], "wc_questions_avg": [ 100.75, 98.81137333323528 ], "wc_limitations_avg": [ 13.25, 15.237699957670777 ], "wc_review_avg": [ 417.25, 137.11377574846372 ], "wc_reply_reviewers_avg": [ 129.75, 158.72361985539519 ], "wc_reply_authors_avg": [ 317.25, 343.1584000137546 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.5, 0.8660254037844386 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.8703882797784891, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:PZ8oz5krr4IJ:scholar.google.com/&scioq=FedNE:+Surrogate-Assisted+Federated+Neighbor+Embedding+for+Dimensionality+Reduction&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "osu.edu;osu.edu;apple.com;osu.edu;osu.edu", "author_num": 5, "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Ohio State University;Apple", "aff_unique_dep": ";AI/ML", "aff_unique_url": "https://www.osu.edu;https://www.apple.com", "aff_unique_abbr": "OSU;Apple", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Columbus;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "zDYXdR3ClP", "title": "UIR-LoRA: Achieving Universal Image Restoration through Multiple Low-Rank Adaptation", "track": "main", "status": "Reject", "tldr": "", "abstract": "Existing unified methods typically treat multi-degradation image restoration as a multi-task learning problem. Despite performing effectively compared to single degradation restoration methods, they overlook the utilization of commonalities and specificities within multi-task restoration, thereby impeding the model's performance. Inspired by the success of deep generative models and fine-tuning techniques, we proposed a universal image restoration framework based on multiple low-rank adapters (LoRA) from multi-domain transfer learning. Our framework leverages the pre-trained generative model as the shared component for multi-degradation restoration and transfers it to specific degradation image restoration tasks using low-rank adaptation. Additionally, we introduce a LoRA composing strategy based on the degradation similarity, which adaptively combines trained LoRAs and enables our model to be applicable for mixed degradation restoration. Extensive experiments on multiple and mixed degradations demonstrate that the proposed universal image restoration method not only achieves higher fidelity and perceptual image quality but also has better generalization ability than other unified image restoration models.", "keywords": "Image Restoration;Transferring Learning", "primary_area": "machine_vision", "supplementary_material": "", "author": "Cheng Zhang;Dong Gong;Jiumei He;Yu Zhu;Jinqiu Sun;Yanning Zhang", "authorids": "~Cheng_Zhang14;~Dong_Gong1;~Jiumei_He1;~Yu_Zhu9;~Jinqiu_Sun1;~Yanning_Zhang1", "gender": "M;M;F;M;F;F", "homepage": ";https://donggong1.github.io;https://github.com/hjm0001;http://teacher.nwpu.edu.cn/yuzhu;http://jszy.nwpu.edu.cn/en/sunjinqiu.html;http://teacher.nwpu.edu.cn/ynzhang", "dblp": ";125/5032;;38/5267-4;53/1738;14/6655", "google_scholar": "L_Av9NcAAAAJ;https://scholar.google.com.au/citations?user=e2u6hRoAAAAJ;;;;", "orcid": ";0000-0002-2668-9630;;0000-0002-2480-0569;0000-0002-0551-6351;", "linkedin": ";;;;;", "or_profile": "~Cheng_Zhang14;~Dong_Gong1;~Jiumei_He1;~Yu_Zhu9;~Jinqiu_Sun1;~Yanning_Zhang1", "aff": "Northwest Polytechnical University Xi'an;University of New South Wales;Northwest Polytechnical University Xi'an;Northwest Polytechnical University Xi'an;Northwest Polytechnical University Xi'an;Northwestern Polytechnical University", "aff_domain": "nwpu.edu.cn;unsw.edu.au;nwpu.edu.cn;nwpu.edu.cn;nwpu.edu.cn;nwpu.edu.cn", "position": "PhD student;Assistant Professor;PhD student;Associate Professor;Full Professor;Full Professor", "bibtex": "@misc{\nanonymous2024uirlora,\ntitle={{UIR}-Lo{RA}: Achieving Universal Image Restoration through Multiple Low-Rank Adaptation},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=zDYXdR3ClP}\n}", "github": "", "project": "", "reviewers": "xL9P;Siin;4LsM;SkW8;nZDS", "site": "https://openreview.net/forum?id=zDYXdR3ClP", "pdf_size": 3041963, "rating": "3;4;5;6;6", "confidence": "5;5;3;4;5", "soundness": "1;4;2;2;3", "novelty": "2;3;2;3;3", "presentation": "2;4;2;3;3", "wc_summary": "54;58;104;73;71", "wc_strengths": "24;18;52;38;77", "wc_weaknesses": "280;61;295;151;106", "wc_questions": "49;23;36;28;62", "wc_limitations": "1;37;24;2;5", "wc_review": "408;197;511;292;321", "wc_reply_reviewers": "138;157;592;25;0", "wc_reply_authors": "119;634;856;13;0", "reply_reviewers": "2;2;3;1;0", "reply_authors": "2;3;4;2;1", "rating_avg": [ 4.8, 1.16619037896906 ], "confidence_avg": [ 4.4, 0.7999999999999999 ], "soundness_avg": [ 2.4, 1.019803902718557 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.7483314773547882 ], "wc_summary_avg": [ 72.0, 17.58408371226661 ], "wc_strengths_avg": [ 41.8, 21.16978979583879 ], "wc_weaknesses_avg": [ 178.6, 93.48069319383549 ], "wc_questions_avg": [ 39.6, 14.235167719419396 ], "wc_limitations_avg": [ 13.8, 14.3024473430249 ], "wc_review_avg": [ 345.8, 106.5934331936072 ], "wc_reply_reviewers_avg": [ 182.4, 213.74433325821764 ], "wc_reply_authors_avg": [ 324.4, 352.9433948949888 ], "reply_reviewers_avg": [ 1.6, 1.019803902718557 ], "reply_authors_avg": [ 2.4, 1.019803902718557 ], "replies_avg": [ 27, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.3429971702850176, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:KL1Aq8saVe4J:scholar.google.com/&scioq=UIR-LoRA:+Achieving+Universal+Image+Restoration+through+Multiple+Low-Rank+Adaptation&hl=en&as_sdt=0,33", "gs_version_total": 4, "aff_unique_index": "0;1;0;0;0;2", "aff_unique_norm": "Northwest Polytechnical University;University of New South Wales;Northwestern Polytechnical University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.nwpu.edu.cn;https://www.unsw.edu.au;https://www.nwpu.edu.cn", "aff_unique_abbr": "NWPU;UNSW;NWPU", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Xi'an;", "aff_country_unique_index": "0;1;0;0;0;0", "aff_country_unique": "China;Australia" }, { "title": "A teacher-teacher framework for clinical language representation learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92966", "id": "zDaD8zv8tG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=zDaD8zv8tG", "openreview": "https://openreview.net/forum?id=zDaD8zv8tG", "poster": "/media/PosterPDFs/NeurIPS%202024/92966.png?t=1733192248.119484", "project": "", "author_site": "Feiqing Huang, Shenghan Zhang, Sara Sweet, Tianxi Cai", "tldr": "", "abstract": "In recent years, there has been a proliferation of ready-to-use large language models (LLMs) designed for various applications, both general-purpose and domain-specific. Instead of advocating for the development of a new model or continuous pretraining of an existing one, this paper introduces a pragmatic teacher-teacher framework to facilitate mutual learning between two pre-existing models.\nBy leveraging two teacher models possessing complementary knowledge, we introduce a LIghtweight kNowledge alignmEnt (LINE) module aimed at harmonizing their knowledge within a unified representation space. This framework is particularly valuable in clinical settings, where stringent regulations and privacy considerations dictate the handling of detailed clinical notes. Our trained LINE module excels in capturing critical information from clinical notes, leveraging highly de-identified data. Validation and downstream tasks further demonstrate the effectiveness of the proposed framework.", "keywords": "clinical language models;teacher-teacher framework;knowledge alignment", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Feiqing Huang;Shenghan Zhang;Sara Morini Sweet;Tianxi Cai", "authorids": "~Feiqing_Huang1;~Shenghan_Zhang2;~Sara_Morini_Sweet1;~Tianxi_Cai1", "gender": ";M;F;F", "homepage": ";;;https://celehs.hms.harvard.edu/tcai/index.html", "dblp": ";;;", "google_scholar": ";cpzTTJYAAAAJ;;", "orcid": ";;;", "linkedin": ";;sara-sweet-b28290b8/;", "or_profile": "~Feiqing_Huang1;~Shenghan_Zhang2;~Sara_Morini_Sweet1;~Tianxi_Cai1", "aff": ";Harvard Medical School, Harvard University;Harvard Medical School, Harvard University;Harvard T.H. Chan School of Public Health", "aff_domain": ";hms.harvard.edu;hms.harvard.edu;hsph.harvard.edu", "position": ";MS student;Research Associate;Full Professor", "bibtex": "@inproceedings{\nhuang2024a,\ntitle={A teacher-teacher framework for clinical language representation learning},\nauthor={Feiqing Huang and Shenghan Zhang and Sara Morini Sweet and Tianxi Cai},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=zDaD8zv8tG}\n}", "github": "", "reviewers": "TNnd;Hfn8;Yopj;j7j8;Yayo", "pdf_size": 804072, "rating": "3;4;5;6;7", "confidence": "4;4;3;4;4", "soundness": "3;3;2;3;3", "novelty": "3;2;3;3;3", "presentation": "3;3;3;3;3", "wc_summary": "73;24;80;81;44", "wc_strengths": "66;8;199;136;31", "wc_weaknesses": "101;39;131;83;59", "wc_questions": "135;1;82;13;32", "wc_limitations": "27;4;39;31;2", "wc_review": "402;76;531;344;168", "wc_reply_reviewers": "0;0;27;9;11", "wc_reply_authors": "0;346;0;0;0", "reply_reviewers": "0;0;1;1;1", "reply_authors": "1;2;1;1;1", "rating_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 3.8, 0.39999999999999997 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 60.4, 22.63271967749347 ], "wc_strengths_avg": [ 88.0, 70.36760618352737 ], "wc_weaknesses_avg": [ 82.6, 32.0599438552222 ], "wc_questions_avg": [ 52.6, 49.61693259362171 ], "wc_limitations_avg": [ 20.6, 14.894294209528693 ], "wc_review_avg": [ 304.2, 163.22548820573337 ], "wc_reply_reviewers_avg": [ 9.4, 9.891410415102591 ], "wc_reply_authors_avg": [ 69.2, 138.4 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.2, 0.4000000000000001 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:1jOspvncXHIJ:scholar.google.com/&scioq=A+teacher-teacher+framework+for+clinical+language+representation+learning&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": ";hms.harvard.edu;hms.harvard.edu;hsph.harvard.edu", "author_num": 4, "aff_unique_index": "0;0;0", "aff_unique_norm": "Harvard University", "aff_unique_dep": "Harvard Medical School", "aff_unique_url": "https://www.harvard.edu", "aff_unique_abbr": "Harvard", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "zFHJUSTZka", "title": "Direct Language Model Alignment from Online AI Feedback", "track": "main", "status": "Reject", "tldr": "", "abstract": "Direct alignment from preferences (DAP) methods, such as DPO, have recently emerged as efficient alternatives to reinforcement learning from human feedback (RLHF), that do not require a separate reward model. However, the preference datasets used in DAP methods are usually collected ahead of training and never updated, thus the feedback is purely offline. Moreover, responses in these datasets are often sampled from a language model distinct from the one being aligned, and since the model evolves over training, the alignment phase is inevitably off-policy. In this study, we posit that online feedback is key and improves DAP methods. Our method, online AI feedback (OAIF), uses an LLM as annotator: on each training iteration, we sample two responses from the current model and prompt the LLM annotator to choose which one is preferred, thus providing online feedback. Despite its simplicity, we demonstrate via human evaluation in several tasks that OAIF outperforms both offline DAP and RLHF methods. We further show that the feedback leveraged in OAIF is easily controllable, via instruction prompts to the LLM annotator.", "keywords": "LLM Alignment; AI Feedback; On-policy Learning; Online Feedback", "primary_area": "generative_models", "supplementary_material": "", "author": "Shangmin Guo;Biao Zhang;Tianlin Liu;Tianqi Liu;Misha Khalman;Felipe Llinares-L\u00f3pez;Alexandre Rame;Thomas Mesnard;Yao Zhao;Bilal Piot;Johan Ferret;Mathieu Blondel", "authorids": "~Shangmin_Guo1;~Biao_Zhang2;~Tianlin_Liu2;~Tianqi_Liu1;~Misha_Khalman1;~Felipe_Llinares-L\u00f3pez1;~Alexandre_Rame1;~Thomas_Mesnard2;~Yao_Zhao5;~Bilal_Piot1;~Johan_Ferret1;~Mathieu_Blondel1", "gender": "M;M;M;M;M;;;M;M;;M;M", "homepage": ";;http://www.tianlinliu.com;;https://alexrame.github.io/;https://thomasmesnard.github.io/;;;https://ferretj.github.io;http://www.mblondel.org;;", "dblp": "183/0949;https://dblp.uni-trier.de/pers/hd/z/Zhang_0002:Biao;20/7667;134/5653-2;;;;;;05/8614.html;157/7706;", "google_scholar": "cpOrbSoAAAAJ;gqPKjaIAAAAJ;;pUKhiMIAAAAJ;7znwivwAAAAJ;;p7L3HrMAAAAJ;https://scholar.google.fr/citations?user=fqxNUREAAAAJ;uyUnqjMAAAAJ;C0EKzrUAAAAJ;zzjTWUUAAAAJ;", "orcid": "0000-0003-1716-0994;;;;;;;;;;;", "linkedin": ";;;;alexandre-ram%C3%A9-05259587;;;;;;;khalman/", "or_profile": "~Shangmin_Guo1;~Biao_Zhang2;~Tianlin_Liu2;~Tianqi_Liu1;~Alexandre_Rame1;~Thomas_Mesnard2;~Yao_Zhao5;~Bilal_Piot1;~Johan_Ferret1;~Mathieu_Blondel1;~Felipe_Llinares-Lopez1;~Mikhail_Khalman1", "aff": "University of Edinburgh;Google DeepMind;University of Basel;Google DeepMind;Google;Google DeepMind;Google;University Lille;Google;Google;Google LLC;Google", "aff_domain": "ed.ac.uk;google.com;unibas.ch;google.com;google.com;google.com;google.com;univ-lille1.fr;google.com;google.com;google.com;google.com", "position": "PhD student;Researcher;PhD student;Software Engineer;research scientist;PhD student;Researcher;Associate Professor;Researcher;Research scientist;Research Scientist;Researcher", "bibtex": "@misc{\nanonymous2024direct,\ntitle={Direct Language Model Alignment from Online {AI} Feedback},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=zFHJUSTZka}\n}", "github": "", "project": "", "reviewers": "5Zpk;wraF;wFW3;1r3m", "site": "https://openreview.net/forum?id=zFHJUSTZka", "pdf_size": 1188580, "rating": "3;4;4;5", "confidence": "4;3;4;4", "soundness": "2;2;3;3", "novelty": "2;2;2;2", "presentation": "3;3;3;3", "wc_summary": "85;67;43;41", "wc_strengths": "58;44;26;33", "wc_weaknesses": "241;194;139;118", "wc_questions": "1;3;1;75", "wc_limitations": "1;7;2;13", "wc_review": "386;315;211;280", "wc_reply_reviewers": "0;0;166;10", "wc_reply_authors": "0;0;150;0", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;2;1", "rating_avg": [ 4.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 59.0, 18.16590212458495 ], "wc_strengths_avg": [ 40.25, 12.090802289343747 ], "wc_weaknesses_avg": [ 173.0, 48.07806152498247 ], "wc_questions_avg": [ 20.0, 31.76476034853718 ], "wc_limitations_avg": [ 5.75, 4.763139720814412 ], "wc_review_avg": [ 298.0, 63.09912836165013 ], "wc_reply_reviewers_avg": [ 44.0, 70.55494312945054 ], "wc_reply_authors_avg": [ 37.5, 64.9519052838329 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 12, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 135, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7597996581921883428&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff_unique_index": "0;1;2;1;1;1;1;3;1;1;1;1", "aff_unique_norm": "University of Edinburgh;Google;University of Basel;University of Lille", "aff_unique_dep": ";Google DeepMind;;", "aff_unique_url": "https://www.ed.ac.uk;https://deepmind.com;https://www.unibas.ch;https://www.univ-lille.fr", "aff_unique_abbr": "Edinburgh;DeepMind;UniBas;ULille", "aff_campus_unique_index": "1;1;1;1;1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;1;0;2;0;2;3;2;2;2;2", "aff_country_unique": "United Kingdom;Switzerland;United States;France" }, { "title": "Scene Graph Disentanglement and Composition for Generalizable Complex Image Generation", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92965", "id": "zGN0YWy2he", "proceeding": "", "pdf": "https://openreview.net/pdf?id=zGN0YWy2he", "openreview": "https://openreview.net/forum?id=zGN0YWy2he", "poster": "/media/PosterPDFs/NeurIPS%202024/92965.png?t=1731318959.3633285", "project": "", "author_site": "Yunnan Wang, Ziqiang Li, Wenyao Zhang, Zequn Zhang, Baao Xie, Xihui Liu, Wenjun Zeng, Xin Jin", "tldr": "", "abstract": "There has been exciting progress in generating images from natural language or layout conditions. However, these methods struggle to faithfully reproduce complex scenes due to the insufficient modeling of multiple objects and their relationships. To address this issue, we leverage the scene graph, a powerful structured representation, for complex image generation. Different from the previous works that directly use scene graphs for generation, we employ the generative capabilities of variational autoencoders and diffusion models in a generalizable manner, compositing diverse disentangled visual clues from scene graphs. Specifically, we first propose a Semantics-Layout Variational AutoEncoder (SL-VAE) to jointly derive (layouts, semantics) from the input scene graph, which allows a more diverse and reasonable generation in a one-to-many mapping. We then develop a Compositional Masked Attention (CMA) integrated with a diffusion model, incorporating (layouts, semantics) with fine-grained attributes as generation guidance. To further achieve graph manipulation while keeping the visual content consistent, we introduce a Multi-Layered Sampler (MLS) for an \"isolated\" image editing effect. Extensive experiments demonstrate that our method outperforms recent competitors based on text, layout, or scene graph, in terms of generation rationality and controllability.", "keywords": "Scene Graph; Disentanglement; Diffusion Model; Compositional Image Generation", "primary_area": "generative_models", "supplementary_material": "/attachment/4b7d2dd027fefb29263efc4e8b2ca9cc76f24486.zip", "author": "Yunnan Wang;Ziqiang Li;Wenyao Zhang;Zequn Zhang;Baao Xie;Xihui Liu;Wenjun Zeng;Xin Jin", "authorids": "~Yunnan_Wang1;~Ziqiang_Li3;~Wenyao_Zhang1;~Zequn_Zhang1;~Baao_Xie3;~Xihui_Liu1;~Wenjun_Zeng3;~Xin_Jin8", "gender": "M;M;M;M;M;F;M;M", "homepage": "https://wangyunnan.github.io/;https://github.com/ZiQ-Li;;;;https://xh-liu.github.io/;https://www.eias.ac.cn/h-col-187.html;http://home.ustc.edu.cn/~jinxustc/", "dblp": "284/4034;;;;;184/3911;57/145;68/3340-14", "google_scholar": "cxDT_WwAAAAJ;https://scholar.google.com/citations?hl=zh-CN;w_Szx5MAAAAJ;ElVJU4MAAAAJ;https://scholar.google.co.uk/citations?view_op=list_works;https://scholar.google.com.hk/citations?user=4YL23GMAAAAJ;_cUfvYQAAAAJ;byaSC-kAAAAJ", "orcid": "0000-0002-5480-4587;0000-0002-4260-182X;;0000-0001-5566-761X;;0000-0003-1831-9952;;0000-0002-1820-8358", "linkedin": ";;;;;;;", "or_profile": "~Yunnan_Wang1;~Ziqiang_Li3;~Wenyao_Zhang1;~Zequn_Zhang1;~Baao_Xie3;~Xihui_Liu1;~Wenjun_Zeng3;~Xin_Jin8", "aff": "\tEastern Institute of Technology;Shanghai Jiaotong University;Shanghai Jiaotong University;University of Science and Technology of China;Ningbo Institute of Digital Twin;University of Hong Kong;Eastern Institute for Advanced Study;Eastern Institute of Technology, Ningbo", "aff_domain": "eitech.edu.cn;sjtu.edu.cn;sjtu.edu.cn;ustc.edu.cn;idt.eitech.edu.cn;hku.hk;eias.ac.cn;eitech.edu.cn", "position": "PhD student;PhD student;PhD student;PhD student;Researcher;Assistant Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nwang2024scene,\ntitle={Scene Graph Disentanglement and Composition for Generalizable Complex Image Generation},\nauthor={Yunnan Wang and Ziqiang Li and Wenyao Zhang and Zequn Zhang and Baao Xie and Xihui Liu and Wenjun Zeng and Xin Jin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=zGN0YWy2he}\n}", "github": "", "reviewers": "3rPJ;SBq5;anRy;Knts", "pdf_size": 4118408, "rating": "5;6;6;8", "confidence": "3;4;4;4", "soundness": "3;3;3;4", "novelty": "2;3;3;3", "presentation": "3;3;3;3", "wc_summary": "81;82;83;79", "wc_strengths": "36;62;43;77", "wc_weaknesses": "101;119;117;35", "wc_questions": "4;3;53;1", "wc_limitations": "4;81;67;1", "wc_review": "226;347;363;193", "wc_reply_reviewers": "16;20;0;47", "wc_reply_authors": "0;0;52;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;2;1", "rating_avg": [ 6.25, 1.0897247358851685 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 81.25, 1.479019945774904 ], "wc_strengths_avg": [ 54.5, 16.101242188104617 ], "wc_weaknesses_avg": [ 93.0, 34.20526275297414 ], "wc_questions_avg": [ 15.25, 21.821720830401986 ], "wc_limitations_avg": [ 38.25, 36.10661296770994 ], "wc_review_avg": [ 282.25, 73.89646473275971 ], "wc_reply_reviewers_avg": [ 20.75, 16.90229274388537 ], "wc_reply_authors_avg": [ 13.0, 22.516660498395403 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.6622661785325219, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15418300821180328409&as_sdt=5,28&sciodt=0,28&hl=en", "gs_version_total": 6, "email": "eitech.edu.cn;sjtu.edu.cn;sjtu.edu.cn;ustc.edu.cn;idt.eitech.edu.cn;hku.hk;eias.ac.cn;eitech.edu.cn", "author_num": 8, "aff_unique_index": "0;1;1;2;3;4;5;0", "aff_unique_norm": "Eastern Institute of Technology;Shanghai Jiao Tong University;University of Science and Technology of China;Ningbo Institute of Digital Twin;University of Hong Kong;Eastern Institute for Advanced Study", "aff_unique_dep": ";;;;;", "aff_unique_url": "https://www.eit.ac.nz;https://www.sjtu.edu.cn;http://www.ustc.edu.cn;;https://www.hku.hk;", "aff_unique_abbr": "EIT;SJTU;USTC;;HKU;", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Hong Kong SAR;Ningbo", "aff_country_unique_index": "0;1;1;1;1;1;1", "aff_country_unique": "New Zealand;China;" }, { "title": "$E^3$: Exploring Embodied Emotion Through A Large-Scale Egocentric Video Dataset", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97427", "id": "zGfKPqunJG", "proceeding": "", "pdf": "https://openreview.net/pdf?id=zGfKPqunJG", "openreview": "https://openreview.net/forum?id=zGfKPqunJG", "poster": "", "project": "", "author_site": "wang lin, Yueying Feng, WenKang Han, Tao Jin, Zhou Zhao, Fei Wu, Chang Yao, Jingyuan Chen", "tldr": "", "abstract": "Understanding human emotions is fundamental to enhancing human-computer interaction, especially for embodied agents that mimic human behavior. Traditional emotion analysis often takes a third-person perspective, limiting the ability of agents to interact naturally and empathetically. To address this gap, this paper presents $E^3$ for Exploring Embodied Emotion, the first massive first-person view video dataset. $E^3$ contains more than $50$ hours of video, capturing $8$ different emotion types in diverse scenarios and languages. The dataset features videos recorded by individuals in their daily lives, capturing a wide range of real-world emotions conveyed through visual, acoustic, and textual modalities. By leveraging this dataset, we define $4$ core benchmark tasks - emotion recognition, emotion classification, emotion localization, and emotion reasoning - supported by more than $80$k manually crafted annotations, providing a comprehensive resource for training and evaluating emotion analysis models. We further present Emotion-LlaMa, which complements visual modality with acoustic modality to enhance the understanding of emotion in first-person videos. The results of comparison experiments with a large number of baselines demonstrate the superiority of Emotion-LlaMa and set a new benchmark for embodied emotion analysis. We expect that $E^3$ can promote advances in multimodal understanding, robotics, and augmented reality, and provide a solid foundation for the development of more empathetic and context-aware embodied agents.", "keywords": "emotion analysis; egocentric datasets ; video understanding;", "primary_area": "", "supplementary_material": "/attachment/d86f0798861735580286fc78a6592071f5a0f1b3.pdf", "author": "Wang Lin;Yueying Feng;WenKang Han;Tao Jin;Zhou Zhao;Fei Wu;Chang Yao;Jingyuan Chen", "authorids": "~Wang_Lin2;~Yueying_Feng2;~WenKang_Han1;~Tao_Jin2;~Zhou_Zhao3;~Fei_Wu1;~Chang_Yao2;~Jingyuan_Chen3", "gender": ";F;;M;;M;;", "homepage": ";;;https://hugddygff.github.io/;;https://person.zju.edu.cn/wufei;;", "dblp": ";377/3161;;88/4850-4.html;;84/3254-1;;", "google_scholar": ";https://scholar.google.com.hk/citations?user=WIXoltUAAAAJ;;;;XJLn4MYAAAAJ;;", "orcid": ";0009-0006-0619-9198;;0000-0003-3564-1628;;;;", "linkedin": ";;;;;;;", "or_profile": "~Wang_Lin2;~Yueying_Feng2;~WenKang_Han1;~Tao_Jin2;~Zhou_Zhao3;~Fei_Wu1;~Chang_Yao2;~Jingyuan_Chen3", "aff": ";Zhejiang University;;Zhejiang University;;Zhejiang University;;", "aff_domain": ";zju.edu.cn;;zju.edu.cn;;zju.edu.cn;;", "position": ";MS student;;Assistant Professor;;Full Professor;;", "bibtex": "@inproceedings{\nlin2024e,\ntitle={\\$E{\\textasciicircum}3\\$: Exploring Embodied Emotion Through A Large-Scale Egocentric Video Dataset},\nauthor={Wang Lin and Yueying Feng and WenKang Han and Tao Jin and Zhou Zhao and Fei Wu and Chang Yao and Jingyuan Chen},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=zGfKPqunJG}\n}", "github": "", "reviewers": "12eZ;se4o;gGZu;AVha", "pdf_size": 7246463, "rating": "6;6;7;8", "confidence": "4;5;4;5", "wc_summary_and_contributions": "72;60;114;54", "wc_strengths": "35;22;22;35", "wc_improvement": "49;19;6;34", "wc_limitations": "20;25;2;1", "wc_correctness": "40;1;1;1", "wc_clarity": "4;1;5;1", "wc_relation_to_prior_work": "6;1;1;1", "wc_documentation": "22;11;8;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "249;141;160;129", "wc_reply_reviewers": "0;0;0;11", "wc_reply_authors": "56;56;56;56", "reply_reviewers": "0;0;0;1", "reply_authors": "2;2;2;2", "rating_avg": [ 6.75, 0.82915619758885 ], "confidence_avg": [ 4.5, 0.5 ], "wc_summary_and_contributions_avg": [ 75.0, 23.430749027719962 ], "wc_strengths_avg": [ 28.5, 6.5 ], "wc_improvement_avg": [ 27.0, 16.109003693587013 ], "wc_limitations_avg": [ 12.0, 10.653637876331258 ], "wc_correctness_avg": [ 10.75, 16.887495373796554 ], "wc_clarity_avg": [ 2.75, 1.7853571071357126 ], "wc_relation_to_prior_work_avg": [ 2.25, 2.165063509461097 ], "wc_documentation_avg": [ 10.5, 7.566372975210778 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 169.75, 47.07108985353961 ], "wc_reply_reviewers_avg": [ 2.75, 4.763139720814412 ], "wc_reply_authors_avg": [ 56.0, 0.0 ], "reply_reviewers_avg": [ 0.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7214839528372931058&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": ";zju.edu.cn;;zju.edu.cn;;zju.edu.cn;;", "author_num": 8, "aff_unique_index": "0;0;0", "aff_unique_norm": "Zhejiang University", "aff_unique_dep": "", "aff_unique_url": "https://www.zju.edu.cn", "aff_unique_abbr": "ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Bridging Model-Based Optimization and Generative Modeling via Conservative Fine-Tuning of Diffusion Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92964", "id": "zIr2QjU4hl", "proceeding": "", "pdf": "https://openreview.net/pdf?id=zIr2QjU4hl", "openreview": "https://openreview.net/forum?id=zIr2QjU4hl", "poster": "", "project": "", "author_site": "Masatoshi Uehara, Yulai Zhao, Ehsan Hajiramezanali, Gabriele Scalia, Gokcen Eraslan, Avantika Lal, Sergey Levine, Tommaso Biancalani", "tldr": "", "abstract": "AI-driven design problems, such as DNA/protein sequence design, are commonly tackled from two angles: generative modeling, which efficiently captures the feasible design space (e.g., natural images or biological sequences), and model-based optimization, which utilizes reward models for extrapolation. To combine the strengths of both approaches, we adopt a hybrid method that fine-tunes cutting-edge diffusion models by optimizing reward models through RL. Although prior work has explored similar avenues, they primarily focus on scenarios where accurate reward models are accessible. In contrast, we concentrate on an offline setting where a reward model is unknown, and we must learn from static offline datasets, a common scenario in scientific domains. In offline scenarios, existing approaches tend to suffer from overoptimization, as they may be misled by the reward model in out-of-distribution regions. To address this, we introduce a conservative fine-tuning approach, BRAID, by optimizing a conservative reward model, which includes additional penalization outside of offline data distributions. Through empirical and theoretical analysis, we demonstrate the capability of our approach to outperform the best designs in offline data, leveraging the extrapolation capabilities of reward models while avoiding the generation of invalid designs through pre-trained diffusion models.", "keywords": "Diffusion models;Reinforcement learning", "primary_area": "diffusion_based_models", "supplementary_material": "/attachment/285152ee7a6a7419534ed874d06583c7bb56f9f4.zip", "author": "Masatoshi Uehara;Yulai Zhao;Ehsan Hajiramezanali;Gabriele Scalia;G\u00f6kcen Eraslan;Avantika Lal;Sergey Levine;Tommaso Biancalani", "authorids": "~Masatoshi_Uehara1;~Yulai_Zhao1;~Ehsan_Hajiramezanali1;~Gabriele_Scalia1;~G\u00f6kcen_Eraslan1;~Avantika_Lal1;~Sergey_Levine1;~Tommaso_Biancalani1", "gender": "M;M;M;;M;F;M;M", "homepage": "https://www.masatoshiuehara.com/;https://yulaizhao.com/;http://ehsanhajiramezanali.github.io/;;;https://avantikalal.github.io/;https://people.eecs.berkeley.edu/~svlevine/;", "dblp": "225/6517;64/6357-2;225/3486;201/9258;;311/7916;80/7594;", "google_scholar": "https://scholar.google.co.jp/citations?user=xuLKJboAAAAJ;r-mWYj0AAAAJ;20I_DMoAAAAJ;MxeFvewAAAAJ;https://scholar.google.com/citations?hl=en;CLgOCOAAAAAJ;8R35rCwAAAAJ;https://scholar.google.it/citations?user=s_qd9x0AAAAJ", "orcid": "0000-0001-9017-3105;0000-0002-6930-3590;;0000-0003-3305-9220;;;;", "linkedin": ";yulaizhao/;ehsan-hajiramezanali-978a3b52/;gabriele-scalia;;avantikalal;;", "or_profile": "~Masatoshi_Uehara1;~Yulai_Zhao1;~Ehsan_Hajiramezanali1;~Gabriele_Scalia1;~G\u00f6kcen_Eraslan1;~Avantika_Lal1;~Sergey_Levine1;~Tommaso_Biancalani1", "aff": "Genentech ;Princeton University;Genentech;Genentech;Genentech;Genentech;Google;Genentech", "aff_domain": "gene.com;princeton.edu;gene.come;gene.com;gene.com;gene.com;google.com;gene.com", "position": "Researcher;PhD student;Principal Researcher;Researcher;Principal Researcher;Principal Researcher;Research Scientist;Director", "bibtex": "@inproceedings{\nuehara2024bridging,\ntitle={Bridging Model-Based Optimization and Generative Modeling via Conservative Fine-Tuning of Diffusion Models},\nauthor={Masatoshi Uehara and Yulai Zhao and Ehsan Hajiramezanali and Gabriele Scalia and G{\\\"o}kcen Eraslan and Avantika Lal and Sergey Levine and Tommaso Biancalani},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=zIr2QjU4hl}\n}", "github": "", "reviewers": "dLuW;HnkA;hthV;wUpc", "pdf_size": 12698533, "rating": "5;6;7;7", "confidence": "4;2;3;3", "soundness": "2;3;4;3", "novelty": "2;3;3;3", "presentation": "2;4;4;2", "wc_summary": "81;127;130;34", "wc_strengths": "47;99;269;17", "wc_weaknesses": "195;186;105;25", "wc_questions": "63;112;105;1", "wc_limitations": "7;71;48;46", "wc_review": "393;595;657;123", "wc_reply_reviewers": "11;54;145;0", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 1.0 ], "wc_summary_avg": [ 93.0, 39.2109678533953 ], "wc_strengths_avg": [ 108.0, 97.47307320486001 ], "wc_weaknesses_avg": [ 127.75, 68.90346507977665 ], "wc_questions_avg": [ 70.25, 44.15526582413472 ], "wc_limitations_avg": [ 43.0, 22.9891278651453 ], "wc_review_avg": [ 442.0, 208.4442371474923 ], "wc_reply_reviewers_avg": [ 52.5, 57.089841478147406 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.42640143271122083, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9614735492083256162&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "gene.com;princeton.edu;gene.come;gene.com;gene.com;gene.com;google.com;gene.com", "author_num": 8, "aff_unique_index": "0;1;0;0;0;0;2;0", "aff_unique_norm": "Genentech;Princeton University;Google", "aff_unique_dep": ";;Google", "aff_unique_url": "https://www.genentech.com;https://www.princeton.edu;https://www.google.com", "aff_unique_abbr": "Genentech;Princeton;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Slicing Vision Transformer for Flexible Inference", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92963", "id": "zJNSbgl4UA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=zJNSbgl4UA", "openreview": "https://openreview.net/forum?id=zJNSbgl4UA", "poster": "/media/PosterPDFs/NeurIPS%202024/92963.png?t=1731290885.9911199", "project": "", "author_site": "Yitian Zhang, n n, Xu Ma, Huan Wang, Ke Ma, Stephen Chen, Derek Hu, Yun Fu", "tldr": "", "abstract": "Vision Transformers (ViT) is known for its scalability. In this work, we target to scale down a ViT to fit in an environment with dynamic-changing resource constraints. We observe that smaller ViTs are intrinsically the sub-networks of a larger ViT with different widths. Thus, we propose a general framework, named Scala, to enable a single network to represent multiple smaller ViTs with flexible inference capability, which aligns with the inherent design of ViT to vary from widths. Concretely, Scala activates several subnets during training, introduces Isolated Activation to disentangle the smallest sub-network from other subnets, and leverages Scale Coordination to ensure each sub-network receives simplified, steady, and accurate learning objectives. Comprehensive empirical validations on different tasks demonstrate that with only one-shot training, Scala learns slimmable representation without modifying the original ViT structure and matches the performance of Separate Training. Compared with the prior art, Scala achieves an average improvement of 1.6% on ImageNet-1K with fewer parameters.", "keywords": "Vision Transformer;Flexible Inference", "primary_area": "machine_vision", "supplementary_material": "", "author": "Yitian Zhang;Huseyin Coskun;Xu Ma;Huan Wang;Ke Ma;Stephen Xi Chen;Derek Hao Hu;Yun Fu", "authorids": "~Yitian_Zhang1;~Huseyin_Coskun1;~Xu_Ma2;~Huan_Wang3;~Ke_Ma3;~Stephen_Xi_Chen1;~Derek_Hao_Hu1;~Yun_Fu1", "gender": ";M;M;M;M;;M;M", "homepage": ";;https://ma-xu.github.io/;https://huanwang.tech/;;;;http://www1.ece.neu.edu/~yunfu/", "dblp": ";;77/9370-5;70/6155-14;;;h/DerekHaoHu;00/5815-1", "google_scholar": ";https://scholar.google.de/citations?user=nwjxmycAAAAJ;Ya7frcEAAAAJ;0-On0y4AAAAJ;ovZamhQAAAAJ;;Ks81aO0AAAAJ;https://scholar.google.com.tw/citations?user=h-JEcQ8AAAAJ", "orcid": ";;;0000-0001-6951-901X;;;;0000-0002-5098-2853", "linkedin": ";;;huanwang-zju/;;;;furaymond/", "or_profile": "~Yitian_Zhang1;~Huseyin_Coskun1;~Xu_Ma2;~Huan_Wang3;~Ke_Ma3;~Stephen_Xi_Chen1;~Derek_Hao_Hu1;~Yun_Fu1", "aff": ";Technical University Munich;Adobe Systems;Northeastern University;Snap Inc.;;Snap Inc.;Northeastern University", "aff_domain": ";tum.de;adobe.com;neu.edu;snapchat.com;;snap.com;northeastern.edu", "position": ";PhD student;Intern;PhD student;Researcher;;Senior Manager, Machine Learning Engineering;Full Professor", "bibtex": "@inproceedings{\nzhang2024slicing,\ntitle={Slicing Vision Transformer for Flexibile Inference},\nauthor={Yitian Zhang and Huseyin Coskun and Xu Ma and Huan Wang and Ke Ma and Stephen Xi Chen and Derek Hao Hu and Yun Fu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=zJNSbgl4UA}\n}", "github": "", "reviewers": "7ef6;bSkR;rLHb;V4fU", "pdf_size": 723091, "rating": "4;5;6;7", "confidence": "5;3;3;3", "soundness": "2;3;3;3", "novelty": "2;3;2;3", "presentation": "2;3;2;4", "wc_summary": "103;138;264;73", "wc_strengths": "13;87;192;79", "wc_weaknesses": "157;124;553;76", "wc_questions": "4;4;207;67", "wc_limitations": "14;7;11;50", "wc_review": "291;360;1227;345", "wc_reply_reviewers": "96;0;343;13", "wc_reply_authors": "144;0;118;32", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;2;2", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.5, 0.8660254037844386 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 144.5, 72.72723011362388 ], "wc_strengths_avg": [ 92.75, 64.09514412184436 ], "wc_weaknesses_avg": [ 227.5, 190.12167156849847 ], "wc_questions_avg": [ 70.5, 82.89903497628907 ], "wc_limitations_avg": [ 20.5, 17.211914478058507 ], "wc_review_avg": [ 555.75, 388.395014772332 ], "wc_reply_reviewers_avg": [ 113.0, 137.8023947542277 ], "wc_reply_authors_avg": [ 73.5, 59.31905258852336 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.7745966692414834, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:YTj04v7hVwoJ:scholar.google.com/&scioq=Slicing+Vision+Transformer+for+Flexible+Inference&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": ";tum.de;adobe.com;neu.edu;snapchat.com;;snap.com;northeastern.edu", "author_num": 8, "aff_unique_index": "0;1;2;3;3;2", "aff_unique_norm": "Technical University of Munich;Adobe;Northeastern University;Snap Inc.", "aff_unique_dep": ";Adobe Systems Incorporated;;", "aff_unique_url": "https://www.tum.de;https://www.adobe.com;https://www.northeastern.edu;https://www.snapinc.com", "aff_unique_abbr": "TUM;Adobe;NEU;Snap", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1;1", "aff_country_unique": "Germany;United States" }, { "title": "Marginal Causal Flows for Validation and Inference", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92962", "id": "zJremsKVyh", "proceeding": "", "pdf": "https://openreview.net/pdf?id=zJremsKVyh", "openreview": "https://openreview.net/forum?id=zJremsKVyh", "poster": "", "project": "", "author_site": "Daniel de Vassimon Manela, Battaglia, Robin Evans", "tldr": "", "abstract": "Investigating the marginal causal effect of an intervention on an outcome from complex data remains challenging due to the inflexibility of employed models and the lack of complexity in causal benchmark datasets, which often fail to reproduce intricate real-world data patterns. In this paper we introduce Frugal Flows, a likelihood-based machine learning model that uses normalising flows to flexibly learn the data-generating process, while also directly targeting the marginal causal quantities inferred from observational data. We provide a novel algorithm for fitting a model to observational data with a parametrically specified causal distribution, and propose that these models are exceptionally well suited for synthetic data generation to validate causal methods. Unlike existing data generation methods, Frugal Flows generate synthetic data that closely resembles the empirical dataset, while also automatically and exactly satisfying a user-defined average treatment effect. To our knowledge, Frugal Flows are the first generative model to both learn flexible data representations and also \\textit{exactly} parameterise quantities such as the average treatment effect and the degree of unobserved confounding. We demonstrate the above with experiments on both simulated and real-world datasets.", "keywords": "Causal Inference;Normalising Flows;Synthetic Data;Marginal Structural Models", "primary_area": "causal_inference", "supplementary_material": "/attachment/1df1843e8db7c9355b9ad25ef25954693c495f16.zip", "author": "Daniel de Vassimon Manela;Laura Battaglia;Robin J. Evans", "authorids": "~Daniel_de_Vassimon_Manela1;~Laura_Battaglia1;~Robin_J._Evans2", "gender": ";;M", "homepage": ";https://www.stats.ox.ac.uk/people/daniel-de-vassimon-manela;http://www.stats.ox.ac.uk/~evans/", "dblp": ";;88/1856-2", "google_scholar": "IR5S8U4AAAAJ;ZinuBB4AAAAJ;P9lBQjUAAAAJ", "orcid": ";;0000-0002-9341-1313", "linkedin": ";;", "or_profile": "~Laura_Battaglia1;~Dan_Manela1;~Robin_Evans1", "aff": "University of Oxford;University of Oxford;University of Oxford", "aff_domain": "oxford.ac.uk;ox.ac.uk;stats.ox.ac.uk", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nmanela2024marginal,\ntitle={Marginal Causal Flows for Validation and Inference},\nauthor={Daniel de Vassimon Manela and Laura Battaglia and Robin J. Evans},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=zJremsKVyh}\n}", "github": "", "reviewers": "SZi8;J42v;Cwuj;PATR", "pdf_size": 1513339, "rating": "6;7;7;7", "confidence": "3;3;4;3", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;4;2", "wc_summary": "26;133;120;105", "wc_strengths": "31;73;155;82", "wc_weaknesses": "2;235;210;187", "wc_questions": "131;190;113;44", "wc_limitations": "2;72;11;6", "wc_review": "192;703;609;424", "wc_reply_reviewers": "6;17;17;122", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 3.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 96.0, 41.611296543126365 ], "wc_strengths_avg": [ 85.25, 44.633927678392816 ], "wc_weaknesses_avg": [ 158.5, 91.93611912627159 ], "wc_questions_avg": [ 119.5, 52.06966487312935 ], "wc_limitations_avg": [ 22.75, 28.612715704735194 ], "wc_review_avg": [ 482.0, 195.2139851547527 ], "wc_reply_reviewers_avg": [ 40.5, 47.26785376976619 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1907586793183700156&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "oxford.ac.uk;ox.ac.uk;stats.ox.ac.uk", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Oxford", "aff_unique_dep": "", "aff_unique_url": "https://www.ox.ac.uk", "aff_unique_abbr": "Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Improving Sparse Decomposition of Language Model Activations with Gated Sparse Autoencoders", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92961", "id": "zLBlin2zvW", "proceeding": "", "pdf": "https://openreview.net/pdf?id=zLBlin2zvW", "openreview": "https://openreview.net/forum?id=zLBlin2zvW", "poster": "/media/PosterPDFs/NeurIPS%202024/92961.png?t=1733760973.5067425", "project": "", "author_site": "Senthooran Rajamanoharan, Arthur Conmy, Lewis Smith, Tom Lieberum, Vikrant Varma, Janos Kramar, Rohin Shah, Neel Nanda", "tldr": "", "abstract": "Recent work has found that sparse autoencoders (SAEs) are an effective technique for unsupervised discovery of interpretable features in language models' (LMs) activations, by finding sparse, linear reconstructions of those activations. We introduce the Gated Sparse Autoencoder (Gated SAE), which achieves a Pareto improvement over training with prevailing methods. In SAEs, the L1 penalty used to encourage sparsity introduces many undesirable biases, such as shrinkage -- systematic underestimation of feature activations. The key insight of Gated SAEs is to separate the functionality of (a) determining which directions to use and (b) estimating the magnitudes of those directions: this enables us to apply the L1 penalty only to the former, limiting the scope of undesirable side effects. Through training SAEs on LMs of up to 7B parameters we find that, in typical hyper-parameter ranges, Gated SAEs solve shrinkage, are similarly interpretable, and require half as many firing features to achieve comparable reconstruction fidelity.", "keywords": "Mechanistic Interpretability;Sparse Autoencoders;Science of Deep Learning", "primary_area": "interpretability_and_explainability", "supplementary_material": "", "author": "Senthooran Rajamanoharan;Arthur Conmy;Lewis Smith;Tom Lieberum;Vikrant Varma;Janos Kramar;Rohin Shah;Neel Nanda", "authorids": "~Senthooran_Rajamanoharan1;~Arthur_Conmy1;~Lewis_Smith3;~Tom_Lieberum2;~Vikrant_Varma1;~Janos_Kramar1;~Rohin_Shah1;~Neel_Nanda1", "gender": "M;M;M;;;M;M;M", "homepage": ";https://arthurconmy.github.io/;https://lsgos.onl;;;;http://rohinshah.com/;https://neelnanda.io", "dblp": "118/5915;;;;281/7099;49/9013;145/1009;285/6389", "google_scholar": "W4JthzgAAAAJ;;;SyeX5d8AAAAJ;EPYHbToAAAAJ;;odFQXSYAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;;;;;", "linkedin": ";;;;;;rohin-shah-76405832/;https://linkedin.com/in/neel-nanda-993580151", "or_profile": "~Senthooran_Rajamanoharan1;~Arthur_Conmy1;~Lewis_Smith3;~Tom_Lieberum2;~Vikrant_Varma1;~Janos_Kramar1;~Rohin_Shah1;~Neel_Nanda1", "aff": "Google DeepMind;Google DeepMind;Google;Google;Google DeepMind;Google DeepMind;Google DeepMind;Google DeepMind", "aff_domain": "deepmind.com;google.com;google.com;deepmind.com;deepmind.com;deepmind.com;deepmind.com;deepmind.com", "position": "Researcher;Researcher;Researcher;Researcher;Researcher;Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\nrajamanoharan2024improving,\ntitle={Improving Sparse Decomposition of Language Model Activations with Gated Sparse Autoencoders},\nauthor={Senthooran Rajamanoharan and Arthur Conmy and Lewis Smith and Tom Lieberum and Vikrant Varma and Janos Kramar and Rohin Shah and Neel Nanda},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=zLBlin2zvW}\n}", "github": "", "reviewers": "58d7;1pM6;Qtwo;Hix2", "pdf_size": 1138801, "rating": "6;6;7;7", "confidence": "3;4;5;3", "soundness": "3;4;3;4", "novelty": "3;3;3;3", "presentation": "3;3;4;4", "wc_summary": "110;35;194;98", "wc_strengths": "50;82;118;59", "wc_weaknesses": "94;219;385;46", "wc_questions": "108;1;55;85", "wc_limitations": "1;1;2;104", "wc_review": "363;338;754;392", "wc_reply_reviewers": "160;20;369;0", "wc_reply_authors": "179;0;304;0", "reply_reviewers": "2;1;1;0", "reply_authors": "2;1;2;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 3.75, 0.82915619758885 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 109.25, 56.6187910503218 ], "wc_strengths_avg": [ 77.25, 26.261902063635834 ], "wc_weaknesses_avg": [ 186.0, 131.10491981615334 ], "wc_questions_avg": [ 62.25, 40.04606722263748 ], "wc_limitations_avg": [ 27.0, 44.45784520194383 ], "wc_review_avg": [ 461.75, 169.80926800384012 ], "wc_reply_reviewers_avg": [ 137.25, 147.3183203135306 ], "wc_reply_authors_avg": [ 120.75, 128.583387340667 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.5, 0.5 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.30151134457776363, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12181351122635816345&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "deepmind.com;google.com;google.com;deepmind.com;deepmind.com;deepmind.com;deepmind.com;deepmind.com", "author_num": 8, "aff_unique_index": "0;0;0;0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google DeepMind", "aff_unique_url": "https://deepmind.com", "aff_unique_abbr": "DeepMind", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;1;1;0;0;0;0", "aff_country_unique": "United Kingdom;United States" }, { "title": "Logarithmic Smoothing for Pessimistic Off-Policy Evaluation, Selection and Learning", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92960", "id": "zLClygeRK8", "proceeding": "", "pdf": "https://openreview.net/pdf?id=zLClygeRK8", "openreview": "https://openreview.net/forum?id=zLClygeRK8", "poster": "/media/PosterPDFs/NeurIPS%202024/92960.png?t=1730898774.8469748", "project": "", "author_site": "Otmane Sakhi, Imad Aouali, Pierre Alquier, Nicolas Chopin", "tldr": "", "abstract": "This work investigates the offline formulation of the contextual bandit problem, where the goal is to leverage past interactions collected under a behavior policy to evaluate, select, and learn new, potentially better-performing, policies. Motivated by critical applications, we move beyond point estimators. Instead, we adopt the principle of _pessimism_ where we construct upper bounds that assess a policy's worst-case performance, enabling us to confidently select and learn improved policies. Precisely, we introduce novel, fully empirical concentration bounds for a broad class of importance weighting risk estimators. These bounds are general enough to cover most existing estimators and pave the way for the development of new ones. In particular, our pursuit of the tightest bound within this class motivates a novel estimator (LS), that _logarithmically smoothes_ large importance weights. The bound for LS is provably tighter than its competitors, and naturally results in improved policy selection and learning strategies. Extensive policy evaluation, selection, and learning experiments highlight the versatility and favorable performance of LS.", "keywords": "offline contextual bandit;off-policy evaluation;off-policy selection;off-policy learning;pessimism", "primary_area": "reinforcement_learning", "supplementary_material": "/attachment/cba8c7c3041eaa14a49145883f724a603d37df7e.zip", "author": "Otmane Sakhi;Imad Aouali;Pierre Alquier;Nicolas Chopin", "authorids": "~Otmane_Sakhi1;~Imad_Aouali2;~Pierre_Alquier1;~Nicolas_Chopin1", "gender": "M;M;M;M", "homepage": ";https://pierrealquier.github.io/;https://nchopin.github.io/;https://www.iaouali.com/", "dblp": "249/9312;15/8421;24/7601;298/2200", "google_scholar": ";https://scholar.google.fr/citations?user=ngkCAJYAAAAJ;https://scholar.google.co.uk/citations?user=pXG4LfoAAAAJ;cG9L1BwAAAAJ", "orcid": ";0000-0003-4249-7337;0000-0002-0628-5815;", "linkedin": "otmane-sakhi/;pierre-alquier-601453159;;imad-aouali/", "or_profile": "~Otmane_Sakhi1;~Pierre_Alquier1;~Nicolas_Chopin1;~Imad_AOUALI1", "aff": "Criteo;ESSEC Business School, Asia-Pacific campus;Ecole Nationale de la Statistique et de l'Administration Economique;Ecole Nationale de la Statistique et de l'Administration Economique", "aff_domain": "criteo.com;essec.edu;ensae.fr;ensae.fr", "position": "Researcher;Full Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\nsakhi2024logarithmic,\ntitle={Logarithmic Smoothing for Pessimistic Off-Policy Evaluation, Selection and Learning},\nauthor={Otmane Sakhi and Imad Aouali and Pierre Alquier and Nicolas Chopin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=zLClygeRK8}\n}", "github": "", "reviewers": "PPYn;J96t;j8sh;VVmk", "pdf_size": 733432, "rating": "3;6;7;10", "confidence": "5;3;2;5", "soundness": "2;3;3;4", "novelty": "2;3;3;4", "presentation": "1;2;3;4", "wc_summary": "47;87;113;140", "wc_strengths": "51;100;67;7", "wc_weaknesses": "298;185;180;28", "wc_questions": "128;51;2;15", "wc_limitations": "16;12;1;1", "wc_review": "540;435;363;191", "wc_reply_reviewers": "103;42;41;0", "wc_reply_authors": "218;0;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "2;1;1;1", "rating_avg": [ 6.5, 2.5 ], "confidence_avg": [ 3.75, 1.299038105676658 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.5, 1.118033988749895 ], "wc_summary_avg": [ 96.75, 34.29559009552103 ], "wc_strengths_avg": [ 56.25, 33.47667098144617 ], "wc_weaknesses_avg": [ 172.75, 95.97232674057663 ], "wc_questions_avg": [ 49.0, 49.01530373260988 ], "wc_limitations_avg": [ 7.5, 6.652067347825035 ], "wc_review_avg": [ 382.25, 127.09715771802294 ], "wc_reply_reviewers_avg": [ 46.5, 36.7593525514256 ], "wc_reply_authors_avg": [ 54.5, 94.39676901250381 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.03849001794597505, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12783951444993271112&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 12, "email": "criteo.com;essec.edu;ensae.fr;ensae.fr", "author_num": 4, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "Criteo;ESSEC Business School;Ecole Nationale de la Statistique et de l'Administration Economique", "aff_unique_dep": ";;", "aff_unique_url": "https://www.criteo.com;https://www.essec.edu;https://ensae.fr", "aff_unique_abbr": "Criteo;ESSEC;ENSAE", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia-Pacific", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "France" }, { "title": "DART-Math: Difficulty-Aware Rejection Tuning for Mathematical Problem-Solving", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92959", "id": "zLU21oQjD5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=zLU21oQjD5", "openreview": "https://openreview.net/forum?id=zLU21oQjD5", "poster": "/media/PosterPDFs/NeurIPS%202024/92959.png?t=1733440586.0256467", "project": "", "author_site": "Yuxuan Tong, Xiwen Zhang, Rui Wang, Ruidong Wu, Junxian He", "tldr": "", "abstract": "Solving mathematical problems requires advanced reasoning abilities and presents notable challenges for large language models. Previous works usually synthesize data from proprietary models to augment existing datasets, followed by instruction tuning to achieve top-tier results. However, our analysis of these datasets reveals severe biases towards easy queries, with frequent failures to generate any correct response for the most challenging queries.\nHypothesizing that difficult queries are crucial to learning complex reasoning, we propose *Difficulty-Aware Rejection Tuning* (`DART`), a method that allocates difficult queries more trials during the synthesis phase, enabling more extensive training on difficult samples.\nUtilizing `DART`, we have created new datasets for mathematical problem-solving that focus more on difficult queries and are substantially smaller than previous ones. Remarkably, our synthesis process solely relies on a 7B-sized open-weight model, without reliance on the commonly used proprietary GPT-4.\nWe fine-tune various base models on our datasets ranging from 7B to 70B in size, resulting in a series of strong models called `DART-Math`.\nIn comprehensive in-domain and out-of-domain evaluation on 6 mathematical benchmarks, `DART-Math` outperforms vanilla rejection tuning significantly, being superior or comparable to previous arts, despite using much smaller datasets and no proprietary models. Furthermore, our results position our synthetic datasets as the most effective and cost-efficient publicly available resources for advancing mathematical problem-solving. Our datasets, models and code are publicly available at https://github.com/hkust-nlp/dart-math.", "keywords": "Large Language Models;Mathematical Reasoning;Synthetic Data", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Yuxuan Tong;Xiwen Zhang;Rui Wang;Ruidong Wu;Junxian He", "authorids": "~Yuxuan_Tong2;~Xiwen_Zhang2;~Rui_Wang1;~Ruidong_Wu1;~Junxian_He1", "gender": ";M;M;M;M", "homepage": ";https://xiwen1995.github.io/;https://www.ruiwang1998.com;;https://jxhe.github.io", "dblp": ";;06/2293;224/4293;188/6127.html", "google_scholar": ";9ArsuzwAAAAJ;;lNeJlFYAAAAJ;BIFGeoUAAAAJ", "orcid": ";;;0009-0002-6402-4717;", "linkedin": ";;;ruidong-wu-0b7182224/;", "or_profile": "~Yuxuan_Tong2;~Xiwen_Zhang2;~Rui_Wang1;~Ruidong_Wu1;~Junxian_He1", "aff": ";Helixon Research;Helixon;Helixon AI;Hong Kong University of Science and Technology", "aff_domain": ";helixon.com;helixon.com;helixon.com;ust.hk", "position": ";Researcher;Researcher;Researcher;Assistant Professor", "bibtex": "@inproceedings{\ntong2024dartmath,\ntitle={{DART}-Math: Difficulty-Aware Rejection Tuning for Mathematical Problem-Solving},\nauthor={Yuxuan Tong and Xiwen Zhang and Rui Wang and Ruidong Wu and Junxian He},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=zLU21oQjD5}\n}", "github": "", "reviewers": "quxz;M9p3;neCQ;dSs8", "pdf_size": 571797, "rating": "4;4;5;6", "confidence": "5;4;5;4", "soundness": "2;2;2;3", "novelty": "1;2;2;3", "presentation": "3;3;3;3", "wc_summary": "37;87;47;97", "wc_strengths": "21;63;22;56", "wc_weaknesses": "122;96;85;246", "wc_questions": "85;38;26;207", "wc_limitations": "1;8;1;6", "wc_review": "266;292;181;612", "wc_reply_reviewers": "287;305;20;0", "wc_reply_authors": "785;696;158;0", "reply_reviewers": "2;2;1;0", "reply_authors": "3;3;2;1", "rating_avg": [ 4.75, 0.82915619758885 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 67.0, 25.495097567963924 ], "wc_strengths_avg": [ 40.5, 19.1637678967368 ], "wc_weaknesses_avg": [ 137.25, 64.2081575814164 ], "wc_questions_avg": [ 89.0, 71.60656394493455 ], "wc_limitations_avg": [ 4.0, 3.082207001484488 ], "wc_review_avg": [ 337.75, 163.5731869836863 ], "wc_reply_reviewers_avg": [ 153.0, 143.31608423341743 ], "wc_reply_authors_avg": [ 409.75, 336.9067934904252 ], "reply_reviewers_avg": [ 1.25, 0.82915619758885 ], "reply_authors_avg": [ 2.25, 0.82915619758885 ], "replies_avg": [ 21, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.30151134457776363, "gs_citation": 43, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8372923690243322085&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 7, "email": ";helixon.com;helixon.com;helixon.com;ust.hk", "author_num": 5, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Helixon Research;Helixon;Helixon AI;Hong Kong University of Science and Technology", "aff_unique_dep": ";;;", "aff_unique_url": ";;;https://www.ust.hk", "aff_unique_abbr": ";;Helixon AI;HKUST", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;2", "aff_country_unique": "United States;;China" }, { "title": "Improved Few-Shot Jailbreaking Can Circumvent Aligned Language Models and Their Defenses", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92958", "id": "zMNd0JuceF", "proceeding": "", "pdf": "https://openreview.net/pdf?id=zMNd0JuceF", "openreview": "https://openreview.net/forum?id=zMNd0JuceF", "poster": "", "project": "", "author_site": "Xiaosen Zheng, Tianyu Pang, Chao Du, Qian Liu, Jing Jiang, Min Lin", "tldr": "", "abstract": "Recently, Anil et al. (2024) show that many-shot (up to hundreds of) demonstrations can jailbreak state-of-the-art LLMs by exploiting their long-context capability. Nevertheless, is it possible to use few-shot demonstrations to efficiently jailbreak LLMs within limited context sizes? While the vanilla few-shot jailbreaking may be inefficient, we propose improved techniques such as injecting special system tokens like [/INST] and employing demo-level random search from a collected demo pool. These simple techniques result in surprisingly effective jailbreaking against aligned LLMs (even with advanced defenses). For example, our method achieves >80% (mostly >95%) ASRs on Llama-2-7B and Llama-3-8B without multiple restarts, even if the models are enhanced by strong defenses such as perplexity detection and/or SmoothLLM, which is challenging for suffix-based jailbreaking. In addition, we conduct comprehensive and elaborate (e.g., making sure to use correct system prompts) evaluations against other aligned LLMs and advanced defenses, where our method consistently achieves nearly 100% ASRs. Our code is available at https://github.com/sail-sg/I-FSJ.", "keywords": "Jailbreaking Attacks;Large Language Models;Alignment;Jailbreaking Defenses", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/69e4cd26f91ae3e0f17993b1780b4eb5b9998c1c.zip", "author": "Xiaosen Zheng;Tianyu Pang;Chao Du;Qian Liu;Jing Jiang;Min Lin", "authorids": "~Xiaosen_Zheng1;~Tianyu_Pang1;~Chao_Du1;~Qian_Liu2;~Jing_Jiang1;~Min_Lin1", "gender": "M;M;M;M;F;M", "homepage": "https://xszheng2020.github.io;https://p2333.github.io/;https://duchao0726.github.io/;http://siviltaram.github.io/;http://www.mysmu.edu/faculty/jingjiang/;https://linmin.me", "dblp": "219/6063;202/2550;75/7523;;68/1974-1;", "google_scholar": "https://scholar.google.com/citations?hl=en;wYDbtFsAAAAJ;QOp7xW0AAAAJ;bcbeUo0AAAAJ;https://scholar.google.com.sg/citations?user=hVTK2YwAAAAJ;BGONmkIAAAAJ", "orcid": ";0000-0003-0639-6176;0000-0003-1244-6336;;0000-0002-3035-0074;", "linkedin": ";%E5%A4%A9%E5%AE%87-%E5%BA%9E-b3999017a/;duchao/;;;min-lin-08a3a422/", "or_profile": "~Xiaosen_Zheng1;~Tianyu_Pang1;~Chao_Du1;~Qian_Liu2;~Jing_Jiang1;~Min_Lin1", "aff": "Sea AI Lab;Sea AI Lab;Sea AI Lab;Tiktok;Singapore Management University;Sea AI Lab", "aff_domain": "sea.com;sea.com;sea.com;bytedance.com;smu.edu.sg;sea.com", "position": "Research Intern;Senior Research Scientist;Senior Research Scientist;Researcher;Full Professor;Principal Researcher", "bibtex": "@inproceedings{\nzheng2024improved,\ntitle={Improved Few-Shot Jailbreaking Can Circumvent Aligned Language Models and Their Defenses},\nauthor={Xiaosen Zheng and Tianyu Pang and Chao Du and Qian Liu and Jing Jiang and Min Lin},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=zMNd0JuceF}\n}", "github": "", "reviewers": "P1po;XP1a;PZVT;PMJL;TSPr", "pdf_size": 638058, "rating": "3;6;6;7;7", "confidence": "4;4;4;4;4", "soundness": "2;3;3;3;4", "novelty": "2;3;3;3;3", "presentation": "2;3;3;4;3", "wc_summary": "28;105;44;43;93", "wc_strengths": "46;206;13;156;48", "wc_weaknesses": "167;397;87;225;178", "wc_questions": "2;63;5;208;4", "wc_limitations": "1;13;9;1;15", "wc_review": "244;784;158;633;338", "wc_reply_reviewers": "478;77;55;179;43", "wc_reply_authors": "531;36;41;36;24", "reply_reviewers": "1;1;1;1;1", "reply_authors": "5;2;2;2;2", "rating_avg": [ 5.8, 1.469693845669907 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 62.6, 30.493277947770718 ], "wc_strengths_avg": [ 93.8, 73.9848633167623 ], "wc_weaknesses_avg": [ 210.8, 103.14339532902726 ], "wc_questions_avg": [ 56.4, 79.21262525633145 ], "wc_limitations_avg": [ 7.8, 5.878775382679628 ], "wc_review_avg": [ 431.4, 238.14247836116934 ], "wc_reply_reviewers_avg": [ 166.4, 163.0234338983203 ], "wc_reply_authors_avg": [ 133.6, 198.77887211673178 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.6, 1.2000000000000002 ], "replies_avg": [ 25, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5135405495683394869&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 5, "email": "sea.com;sea.com;sea.com;bytedance.com;smu.edu.sg;sea.com", "author_num": 6, "aff_unique_index": "0;0;0;1;2;0", "aff_unique_norm": "Sea AI Lab;TikTok;Singapore Management University", "aff_unique_dep": ";;", "aff_unique_url": ";https://www.tiktok.com;https://www.smu.edu.sg", "aff_unique_abbr": ";TikTok;SMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1;2", "aff_country_unique": ";China;Singapore" }, { "title": "Continuous Spatiotemporal Events Decoupling through Spike-based Bayesian Computation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92957", "id": "zNIhPZnqhh", "proceeding": "", "pdf": "https://openreview.net/pdf?id=zNIhPZnqhh", "openreview": "https://openreview.net/forum?id=zNIhPZnqhh", "poster": "", "project": "", "author_site": "Yajing Zheng, Jiyuan Zhang, Tiejun Huang, Zhaofei Yu", "tldr": "", "abstract": "Numerous studies have demonstrated that the cognitive processes of the human brain can be modeled using the Bayesian theorem for probabilistic inference of the external world. Spiking neural networks (SNNs), capable of performing Bayesian computation with greater physiological interpretability, offer a novel approach to distributed information processing in the cortex. However, applying these models to real-world scenarios to harness the advantages of brain-like computation remains a challenge. \nRecently, bio-inspired sensors with high dynamic range and ultra-high temporal resolution have been widely used in extreme vision scenarios. Event streams, generated by various types of motion, represent spatiotemporal data. Inferring motion targets from these streams without prior knowledge remains a difficult task. The Bayesian inference-based Expectation-Maximization (EM) framework has proven effective for motion segmentation in event streams, allowing for decoupling without prior information about the motion or its source. \nThis work demonstrates that Bayesian computation based on spiking neural networks can decouple event streams of different motions. The Winner-Take-All (WTA) circuits in the constructed network implement an equivalent E-step, while STDP achieves an equivalent optimization in M-step. Through theoretical analysis and experiments, we show that STDP-based learning can maximize the contrast of warped events under mixed motion models. Experimental results show that the constructed spiking network can effectively segment the motion contained in event streams.", "keywords": "Bayesian Computation;Spiking Neural Network;Event Cameras;Motion Segmentation;Winner-Take-All;Spike-timing-dependent plasticity", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "", "author": "Yajing Zheng;Jiyuan Zhang;Zhaofei Yu;Tiejun Huang", "authorids": "~Yajing_Zheng1;~Jiyuan_Zhang3;~Zhaofei_Yu1;~Tiejun_Huang1", "gender": "F;M;M;M", "homepage": "https://zyj061.github.io;;https://yuzhaofei.github.io;https://idm.pku.edu.cn/~tjhuang/", "dblp": "230/4398;;166/0573;h/TiejunHuang", "google_scholar": "_bUM0NcAAAAJ;ukHrw0IAAAAJ;qaUgD50AAAAJ;https://scholar.google.com.tw/citations?user=knvEK4AAAAAJ", "orcid": ";;;0000-0002-4234-6099", "linkedin": ";jiyuanzhang-leo;;", "or_profile": "~Yajing_Zheng1;~Jiyuan_Zhang3;~Zhaofei_Yu1;~Tiejun_Huang1", "aff": "Peking University;Peking University;Peking University;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn", "position": "Postdoc;PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nzheng2024continuous,\ntitle={Continuous Spatiotemporal Events Decoupling through Spike-based Bayesian Computation},\nauthor={Yajing Zheng and Jiyuan Zhang and Tiejun Huang and Zhaofei Yu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=zNIhPZnqhh}\n}", "github": "", "reviewers": "pCA1;WNgv;LG4A;wrk7", "pdf_size": 12096447, "rating": "5;5;7;7", "confidence": "2;3;5;5", "soundness": "2;2;3;3", "novelty": "2;2;3;3", "presentation": "2;1;3;3", "wc_summary": "37;31;83;70", "wc_strengths": "37;46;73;49", "wc_weaknesses": "20;119;56;38", "wc_questions": "29;567;3;37", "wc_limitations": "24;3;16;6", "wc_review": "147;766;231;200", "wc_reply_reviewers": "153;78;113;19", "wc_reply_authors": "493;190;34;20", "reply_reviewers": "2;1;1;1", "reply_authors": "3;2;2;2", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 3.75, 1.299038105676658 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.82915619758885 ], "wc_summary_avg": [ 55.25, 21.84462176372024 ], "wc_strengths_avg": [ 51.25, 13.311179511974137 ], "wc_weaknesses_avg": [ 58.25, 37.31202889149825 ], "wc_questions_avg": [ 159.0, 235.8940440112891 ], "wc_limitations_avg": [ 12.25, 8.317902379807062 ], "wc_review_avg": [ 336.0, 250.07098992086227 ], "wc_reply_reviewers_avg": [ 90.75, 49.19540120783649 ], "wc_reply_authors_avg": [ 184.25, 190.33703659561374 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.9622504486493763, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:SotdtUSI954J:scholar.google.com/&scioq=Continuous+Spatiotemporal+Events+Decoupling+through+Spike-based+Bayesian+Computation&hl=en&as_sdt=0,5", "gs_version_total": 0, "email": "pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "ResAD: A Simple Framework for Class Generalizable Anomaly Detection", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92956", "id": "zNiJZUAlxg", "proceeding": "", "pdf": "https://openreview.net/pdf?id=zNiJZUAlxg", "openreview": "https://openreview.net/forum?id=zNiJZUAlxg", "poster": "/media/PosterPDFs/NeurIPS%202024/92956.png?t=1730729842.5964487", "project": "", "author_site": "Xincheng Yao, Zixin Chen, Chao Gao, Guangtao Zhai, Chongyang Zhang", "tldr": "", "abstract": "This paper explores the problem of class-generalizable anomaly detection, where the objective is to train one unified AD model that can generalize to detect anomalies in diverse classes from different domains without any retraining or fine-tuning on the target data. Because normal feature representations vary significantly across classes, this will cause the widely studied one-for-one AD models to be poorly classgeneralizable (i.e., performance drops dramatically when used for new classes). In this work, we propose a simple but effective framework (called ResAD) that can be directly applied to detect anomalies in new classes. Our main insight is to learn the residual feature distribution rather than the initial feature distribution. In this way, we can significantly reduce feature variations. Even in new classes, the distribution of normal residual features would not remarkably shift from the learned distribution. Therefore, the learned model can be directly adapted to new classes. ResAD consists of three components: (1) a Feature Converter that converts initial features into residual features; (2) a simple and shallow Feature Constraintor that constrains normal residual features into a spatial hypersphere for further reducing feature variations and maintaining consistency in feature scales among different classes; (3) a Feature Distribution Estimator that estimates the normal residual feature distribution, anomalies can be recognized as out-of-distribution. Despite the simplicity, ResAD can achieve remarkable anomaly detection results when directly used in new classes. The code is available at https://github.com/xcyao00/ResAD.", "keywords": "class-generalizable anomaly detection", "primary_area": "machine_vision", "supplementary_material": "", "author": "Xincheng Yao;Zixin Chen;Chao Gao;Guangtao Zhai;Chongyang Zhang", "authorids": "~Xincheng_Yao2;~Zixin_Chen1;~Chao_Gao6;~Guangtao_Zhai1;~Chongyang_Zhang1", "gender": "M;M;M;M;M", "homepage": ";https://github.com/ZixinChen-S;https://github.com/jsyzgaochao;https://faculty.sjtu.edu.cn/zhaiguangtao/en/index.htm;https://ee.sjtu.edu.cn/FacultyDetail.aspx?id=96&infoid=66&flag=66", "dblp": "310/4056;;;19/3230;", "google_scholar": "https://scholar.google.com/citations?hl=en;;;E6zbSYgAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": "0000-0001-6946-9171;0000-0001-7775-9478;;;0000-0001-7292-0445", "linkedin": ";;;;", "or_profile": "~Xincheng_Yao2;~Zixin_Chen1;~Chao_Gao6;~Guangtao_Zhai1;~Chongyang_Zhang1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu;;sjtu.edu.cn;sjtu.edu.cn", "position": "PhD student;MS student;;Full Professor;Full Professor", "bibtex": "@inproceedings{\nyao2024resad,\ntitle={Res{AD}: A Simple Framework for Class Generalizable Anomaly Detection},\nauthor={Xincheng Yao and Zixin Chen and Chao Gao and Guangtao Zhai and Chongyang Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=zNiJZUAlxg}\n}", "github": "", "reviewers": "fL7E;6bzj;CQaQ;226V", "pdf_size": 2544729, "rating": "5;6;6;7", "confidence": "5;5;4;5", "soundness": "2;3;3;4", "novelty": "2;2;4;3", "presentation": "2;3;3;4", "wc_summary": "46;54;63;72", "wc_strengths": "24;36;56;41", "wc_weaknesses": "139;139;75;131", "wc_questions": "75;270;29;255", "wc_limitations": "1;14;7;16", "wc_review": "285;513;230;515", "wc_reply_reviewers": "86;163;15;110", "wc_reply_authors": "1095;2135;0;24", "reply_reviewers": "1;2;1;1", "reply_authors": "3;5;1;2", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 4.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.75, 0.82915619758885 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 58.75, 9.730750228014282 ], "wc_strengths_avg": [ 39.25, 11.4755174175285 ], "wc_weaknesses_avg": [ 121.0, 26.758176320519304 ], "wc_questions_avg": [ 157.25, 106.63108130371745 ], "wc_limitations_avg": [ 9.5, 5.937171043518958 ], "wc_review_avg": [ 385.75, 129.71772238210167 ], "wc_reply_reviewers_avg": [ 93.5, 53.20009398487939 ], "wc_reply_authors_avg": [ 813.5, 881.8584070019405 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.75, 1.479019945774904 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:FpnnaP36EQoJ:scholar.google.com/&scioq=ResAD:+A+Simple+Framework+for+Class+Generalizable+Anomaly+Detection&hl=en&as_sdt=0,44", "gs_version_total": 4, "email": "sjtu.edu.cn;sjtu.edu;;sjtu.edu.cn;sjtu.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Deep Correlated Prompting for Visual Recognition with Missing Modalities", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92955", "id": "zO55ovdLJw", "proceeding": "", "pdf": "https://openreview.net/pdf?id=zO55ovdLJw", "openreview": "https://openreview.net/forum?id=zO55ovdLJw", "poster": "/media/PosterPDFs/NeurIPS%202024/92955.png?t=1729063642.0270283", "project": "", "author_site": "lianyu hu, Tongkai Shi, Wei Feng, Fanhua Shang, Liang Wan", "tldr": "", "abstract": "Large-scale multimodal models have shown excellent performance over a series of tasks powered by the large corpus of paired multimodal training data. Generally, they are always assumed to receive modality-complete inputs. However, this simple assumption may not always hold in the real world due to privacy constraints or collection difficulty, where models pretrained on modality-complete data easily demonstrate degraded performance on missing-modality cases. To handle this issue, we refer to prompt learning to adapt large pretrained multimodal models to handle missing-modality scenarios by regarding different missing cases as different types of input. Instead of only prepending independent prompts to the intermediate layers, we present to leverage the correlations between prompts and input features and excavate the relationships between different layers of prompts to carefully design the instructions. We also incorporate the complementary semantics of different modalities to guide the prompting design for each modality. Extensive experiments on three commonly-used datasets consistently demonstrate the superiority of our method compared to the previous approaches upon different missing scenarios. Plentiful ablations are further given to show the generalizability and reliability of our method upon different modality-missing ratios and types.", "keywords": "Multimodal prompting;Mutltimodal models;Missing modalities", "primary_area": "machine_vision", "supplementary_material": "", "author": "Lianyu Hu;Tongkai Shi;Wei Feng;Fanhua Shang;Liang Wan", "authorids": "~Lianyu_Hu1;~Tongkai_Shi1;~Wei_Feng1;~Fanhua_Shang2;~Liang_Wan1", "gender": "M;M;M;M;F", "homepage": "https://hulianyuyy.github.io/;https://stk2666.github.io/;;https://sites.google.com/site/fanhua217/home;http://cic.tju.edu.cn/faculty/lwan/index.html", "dblp": "324/5573;375/3639;17/1152-5;66/9057;", "google_scholar": "UNn2NIwAAAAJ;YqWiWT8AAAAJ;https://scholar.google.co.jp/citations?user=7ory1i8AAAAJ;rk_HZTkAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";;;0000-0002-1040-352X;", "linkedin": ";;;;", "or_profile": "~Lianyu_Hu1;~Tongkai_Shi1;~Wei_Feng1;~Fanhua_Shang2;~Liang_Wan1", "aff": "Tianjin University;Tianjin University;Tianjin University;Tianjin University;Tianjin University", "aff_domain": "tju.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu.cn", "position": "PhD student;MS student;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nhu2024deep,\ntitle={Deep Correlated Prompting for Visual Recognition with Missing Modalities},\nauthor={Lianyu Hu and Tongkai Shi and Wei Feng and Fanhua Shang and Liang Wan},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=zO55ovdLJw}\n}", "github": "", "reviewers": "xVBv;YNgk;64zT;BZuF;nnhg", "pdf_size": 784749, "rating": "4;5;5;6;6", "confidence": "5;4;5;4;4", "soundness": "3;3;4;3;2", "novelty": "2;2;3;3;2", "presentation": "3;3;3;3;2", "wc_summary": "147;51;114;86;81", "wc_strengths": "53;17;117;61;27", "wc_weaknesses": "269;80;208;44;302", "wc_questions": "98;37;213;48;135", "wc_limitations": "7;1;30;25;4", "wc_review": "574;186;682;264;549", "wc_reply_reviewers": "0;49;0;72;241", "wc_reply_authors": "68;0;69;0;119", "reply_reviewers": "0;1;0;1;2", "reply_authors": "2;1;2;1;3", "rating_avg": [ 5.2, 0.7483314773547882 ], "confidence_avg": [ 4.4, 0.48989794855663565 ], "soundness_avg": [ 3.0, 0.6324555320336759 ], "novelty_avg": [ 2.4, 0.4898979485566356 ], "presentation_avg": [ 2.8, 0.39999999999999997 ], "wc_summary_avg": [ 95.8, 32.480147782915026 ], "wc_strengths_avg": [ 55.0, 34.962837413459454 ], "wc_weaknesses_avg": [ 180.6, 102.0619419764292 ], "wc_questions_avg": [ 106.2, 63.998124972533375 ], "wc_limitations_avg": [ 13.4, 11.774548823628022 ], "wc_review_avg": [ 451.0, 191.46174552635833 ], "wc_reply_reviewers_avg": [ 72.4, 88.83377735974081 ], "wc_reply_authors_avg": [ 51.2, 45.69201243105845 ], "reply_reviewers_avg": [ 0.8, 0.7483314773547883 ], "reply_authors_avg": [ 1.8, 0.7483314773547883 ], "replies_avg": [ 20, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.7637626158259732, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:VLYeAiyvwSoJ:scholar.google.com/&scioq=Deep+Correlated+Prompting+for+Visual+Recognition+with+Missing+Modalities&hl=en&as_sdt=0,5", "gs_version_total": 5, "email": "tju.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Tianjin University", "aff_unique_dep": "", "aff_unique_url": "http://www.tju.edu.cn", "aff_unique_abbr": "TJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "kGym: A Platform and Dataset to Benchmark Large Language Models on Linux Kernel Crash Resolution", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97426", "id": "zQ3qU0xWZ5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=zQ3qU0xWZ5", "openreview": "https://openreview.net/forum?id=zQ3qU0xWZ5", "poster": "", "project": "", "author_site": "Alex Mathai, Chenxi Huang, Petros Maniatis, Aleksandr Nogikh, Franjo Ivan\u010di\u0107, Junfeng Yang, Baishakhi Ray", "tldr": "", "abstract": "Large Language Models (LLMs) are consistently improving at increasingly realistic software engineering (SE) tasks. In real-world software stacks, significant SE effort is spent developing foundational system software like the Linux kernel. \nUnlike application-level software, a systems codebase like Linux is multilingual (low-level C/Assembly/Bash/Rust); gigantic (>20 million lines); critical (impacting billions of devices worldwide), and highly concurrent (involving complex multi-threading). To evaluate if machine learning (ML) models are useful while developing such large-scale systems-level software, we introduce kGym (a platform) and kBench (a dataset). The kGym platform provides a SE environment for large-scale experiments on the Linux kernel, including compiling and running kernels in parallel across several virtual machines, detecting operations and crashes, inspecting logs, and querying and patching the code base. We use kGym to facilitate evaluation on kBench, a crash resolution benchmark drawn from real-world Linux kernel bugs. An example bug in kBench contains crashing stack traces, a bug-reproducer file, a developer-written fix, and other associated data. To understand current performance, we conduct baseline experiments by prompting LLMs to resolve Linux kernel crashes. Our initial evaluations reveal that the best performing LLM achieves 0.72\\% and 5.38\\% in the unassisted and assisted (i.e., buggy files disclosed to the model) settings, respectively. These results highlight the need for further research to enhance model performance in SE tasks. Improving performance on kBench requires models to master new learning skills, including understanding the cause of crashes and repairing faults, writing memory-safe and hardware-aware code, and understanding concurrency. As a result, this work opens up multiple avenues of research at the intersection of machine learning and systems software.", "keywords": "Benchmarks;Datasets;Natural Language Processing;Linux Kernel;Code LLMs", "primary_area": "", "supplementary_material": "/attachment/677043df6e032055ab94bf270c60696a100b28db.zip", "author": "Alex Mathai;Chenxi Huang;Petros Maniatis;Aleksandr Nogikh;Franjo Ivancic;Junfeng Yang;Baishakhi Ray", "authorids": "~Alex_Mathai1;~Chenxi_Huang4;~Petros_Maniatis1;~Aleksandr_Nogikh1;~Franjo_Ivancic1;~Junfeng_Yang1;~Baishakhi_Ray2", "gender": "M;M;M;M;M;M;F", "homepage": "https://alexmathai.online;https://chenxi-huang.com;https://ai.google/research/people/PetrosManiatis;;https://www.franjo-ivancic.info;https://www.cs.columbia.edu/~junfeng/;http://rayb.info/", "dblp": "278/2636;;m/PetrosManiatis;;;71/3724.html;74/1969", "google_scholar": "uG9zSdUAAAAJ;;https://scholar.google.com/citations?hl=en;;;JJ9AvbAAAAAJ;https://scholar.google.com.tw/citations?user=VaAEb5YAAAAJ", "orcid": ";;;;;0009-0000-2277-6545;", "linkedin": ";;;aleksandr-nogikh/;;;", "or_profile": "~Alex_Mathai1;~Chenxi_Huang4;~Petros_Maniatis1;~Aleksandr_Nogikh1;~Franjo_Ivancic1;~Junfeng_Yang1;~Baishakhi_Ray2", "aff": "Columbia University;University of Minnesota - Twin Cities;Google DeepMind;Google;Columbia University;Columbia University;Columbia University", "aff_domain": "columbia.edu;umn.edu;google.com;google.com;columbia.edu;columbia.edu;columbia.edu", "position": "PhD student;Undergrad student;Research Scientist;Researcher;Lecturer;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nmathai2024kgym,\ntitle={kGym: A Platform and Dataset to Benchmark Large Language Models on Linux Kernel Crash Resolution},\nauthor={Alex Mathai and Chenxi Huang and Petros Maniatis and Aleksandr Nogikh and Franjo Ivancic and Junfeng Yang and Baishakhi Ray},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=zQ3qU0xWZ5}\n}", "github": "", "reviewers": "KpsL;BMph;kfnU", "pdf_size": 1739260, "rating": "8;8;8", "confidence": "3;4;4", "wc_summary_and_contributions": "72;59;275", "wc_strengths": "53;47;174", "wc_improvement": "53;134;310", "wc_limitations": "48;34;148", "wc_correctness": "14;11;81", "wc_clarity": "1;24;133", "wc_relation_to_prior_work": "1;20;100", "wc_documentation": "24;51;44", "wc_additional_feedback": "1;1;1", "wc_review": "267;381;1266", "wc_reply_reviewers": "0;12;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;1;0", "reply_authors": "1;1;1", "rating_avg": [ 8.0, 0.0 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "wc_summary_and_contributions_avg": [ 135.33333333333334, 98.90174697929028 ], "wc_strengths_avg": [ 91.33333333333333, 58.50546033396275 ], "wc_improvement_avg": [ 165.66666666666666, 107.28259670401138 ], "wc_limitations_avg": [ 76.66666666666667, 50.76306618880393 ], "wc_correctness_avg": [ 35.333333333333336, 32.31442746239244 ], "wc_clarity_avg": [ 52.666666666666664, 57.575071766250446 ], "wc_relation_to_prior_work_avg": [ 40.333333333333336, 42.897811391983886 ], "wc_documentation_avg": [ 39.666666666666664, 11.440668201153676 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 638.0, 446.49524073611354 ], "wc_reply_reviewers_avg": [ 4.0, 5.656854249492381 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 8, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5052073590740845555&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "columbia.edu;umn.edu;google.com;google.com;columbia.edu;columbia.edu;columbia.edu", "author_num": 7, "aff_unique_index": "0;1;2;2;0;0;0", "aff_unique_norm": "Columbia University;University of Minnesota;Google", "aff_unique_dep": ";;Google DeepMind", "aff_unique_url": "https://www.columbia.edu;https://www.minnesota.edu;https://deepmind.com", "aff_unique_abbr": "Columbia;UMN;DeepMind", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Twin Cities;Mountain View", "aff_country_unique_index": "0;0;1;0;0;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Towards Understanding the Working Mechanism of Text-to-Image Diffusion Model", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92954", "id": "zTu0QEpvtZ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=zTu0QEpvtZ", "openreview": "https://openreview.net/forum?id=zTu0QEpvtZ", "poster": "/media/PosterPDFs/NeurIPS%202024/92954.png?t=1733146031.1185253", "project": "", "author_site": "Mingyang Yi, Aoxue Li, Yi Xin, Zhenguo Li", "tldr": "", "abstract": "Recently, the strong latent Diffusion Probabilistic Model (DPM) has been applied to high-quality Text-to-Image (T2I) generation (e.g., Stable Diffusion), by injecting the encoded target text prompt into the gradually denoised diffusion image generator. Despite the success of DPM in practice, the mechanism behind it remains to be explored. To fill this blank, we begin by examining the intermediate statuses during the gradual denoising generation process in DPM. The empirical observations indicate, the shape of image is reconstructed after the first few denoising steps, and then the image is filled with details (e.g., texture). The phenomenon is because the low-frequency signal (shape relevant) of the noisy image is not corrupted until the final stage in the forward process (initial stage of generation) of adding noise in DPM. Inspired by the observations, we proceed to explore the influence of each token in the text prompt during the two stages. After a series of experiments of T2I generations conditioned on a set of text prompts. We conclude that in the earlier generation stage, the image is mostly decided by the special token [\\texttt{EOS}] in the text prompt, and the information in the text prompt is already conveyed in this stage. After that, the diffusion model completes the details of generated images by information from themselves. Finally, we propose to apply this observation to accelerate the process of T2I generation by properly removing text guidance, which finally accelerates the sampling up to 25\\%+.", "keywords": "text-to-image generation; working mechanism;", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Mingyang Yi;Aoxue Li;Yi Xin;Zhenguo Li", "authorids": "~Mingyang_Yi1;~Aoxue_Li2;~Yi_Xin1;~Zhenguo_Li1", "gender": "M;;M;M", "homepage": "http://mingyangyi.github.io;;https://synbol.github.io/;http://www.ee.columbia.edu/~zgli/", "dblp": ";;33/1127.html;23/6479", "google_scholar": "RlOZiPUAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;XboZC1AAAAAJ", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Mingyang_Yi1;~Aoxue_Li2;~Yi_Xin1;~Zhenguo_Li1", "aff": "Huawei Noah's ark Lab;;Nanjing university;Huawei Noah's Ark Lab", "aff_domain": "huawei.com;;nju.edu.cn;huawei.com", "position": "Researcher;;PhD student;Principal Researcher", "bibtex": "@inproceedings{\nyi2024towards,\ntitle={Towards Understanding the Working Mechanism of Text-to-Image Diffusion Model},\nauthor={Mingyang Yi and Aoxue Li and Yi Xin and Zhenguo Li},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=zTu0QEpvtZ}\n}", "github": "", "reviewers": "zfAn;KEXx;xbNR;P41E", "pdf_size": 13641364, "rating": "6;6;7;7", "confidence": "4;4;5;5", "soundness": "3;3;4;4", "novelty": "2;2;2;3", "presentation": "3;2;3;3", "wc_summary": "137;101;57;81", "wc_strengths": "114;98;41;44", "wc_weaknesses": "156;135;31;47", "wc_questions": "118;19;19;124", "wc_limitations": "12;1;28;29", "wc_review": "537;354;176;325", "wc_reply_reviewers": "100;28;30;0", "wc_reply_authors": "0;167;0;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;4;1;1", "rating_avg": [ 6.5, 0.5 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 3.5, 0.5 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 94.0, 29.30870177950569 ], "wc_strengths_avg": [ 74.25, 32.26743714644843 ], "wc_weaknesses_avg": [ 92.25, 54.061885834661744 ], "wc_questions_avg": [ 70.0, 51.044098581520664 ], "wc_limitations_avg": [ 17.5, 11.672617529928752 ], "wc_review_avg": [ 348.0, 128.32575735213877 ], "wc_reply_reviewers_avg": [ 39.5, 36.888345042845174 ], "wc_reply_authors_avg": [ 41.75, 72.31312121600062 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 16, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 1.0, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8396469560101847964&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 3, "email": "huawei.com;;nju.edu.cn;huawei.com", "author_num": 4, "aff_unique_index": "0;1;0", "aff_unique_norm": "Huawei;Nanjing University", "aff_unique_dep": "Noah's ark Lab;", "aff_unique_url": "https://www.huawei.com;https://www.nju.edu.cn", "aff_unique_abbr": "Huawei;Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Neural Gaffer: Relighting Any Object via Diffusion", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92953", "id": "zV2GDsZb5a", "proceeding": "", "pdf": "https://openreview.net/pdf?id=zV2GDsZb5a", "openreview": "https://openreview.net/forum?id=zV2GDsZb5a", "poster": "/media/PosterPDFs/NeurIPS%202024/92953.png?t=1733693683.2693996", "project": "", "author_site": "Haian Jin, Yuan Li, Fujun Luan, Yuanbo Xiangli, Sai Bi, Kai Zhang, Zexiang Xu, Jin Sun, Noah Snavely", "tldr": "", "abstract": "Single-image relighting is a challenging task that involves reasoning about the complex interplay between geometry, materials, and lighting. Many prior methods either support only specific categories of images, such as portraits, or require special capture conditions, like using a flashlight. Alternatively, some methods explicitly decompose a scene into intrinsic components, such as normals and BRDFs, which can be inaccurate or under-expressive. In this work, we propose a novel end-to-end 2D relighting diffusion model, called Neural Gaffer, that takes a single image of any object and can synthesize an accurate, high-quality relit image under any novel environmental lighting condition, simply by conditioning an image generator on a target environment map, without an explicit scene decomposition. Our method builds on a pre-trained diffusion model, and fine-tunes it on a synthetic relighting dataset, revealing and harnessing the inherent understanding of lighting present in the diffusion model. We evaluate our model on both synthetic and in-the-wild Internet imagery and demonstrate its advantages in terms of generalization and accuracy. Moreover, by combining with other generative methods, our model enables many downstream 2D tasks, such as text-based relighting and object insertion. Our model can also operate as a strong relighting prior for 3D tasks, such as relighting a radiance field.", "keywords": "relighting;single-image relighting;diffusion models;3D relighting.", "primary_area": "machine_vision", "supplementary_material": "/attachment/6d6fe18c77b16819b0e040bdc736f39a1a1bbda1.zip", "author": "Haian Jin;Yuan Li;Fujun Luan;Yuanbo Xiangli;Sai Bi;Kai Zhang;Zexiang Xu;Jin Sun;Noah Snavely", "authorids": "~Haian_Jin1;~Yuan_Li13;~Fujun_Luan2;~Yuanbo_Xiangli1;~Sai_Bi1;~Kai_Zhang7;~Zexiang_Xu1;~Jin_Sun2;~Noah_Snavely1", "gender": "M;;M;;M;M;M;M;M", "homepage": "https://haian-jin.github.io/;;https://luanfujun.com/;https://kam1107.github.io/;https://sai-bi.github.io/;https://kai-46.github.io/website/;https://cseweb.ucsd.edu/~zex014/;https://jinsungit.github.io/;http://www.cs.cornell.edu/~snavely/", "dblp": "345/8396;;183/9337;186/4450;165/9898;55/957-45;154/0366;93/1520-11;33/4636", "google_scholar": "VZvmpKoAAAAJ;;NLxrmYQAAAAJ;S6tTC-oAAAAJ;-q4nE1kAAAAJ;6B7FPMoAAAAJ;_RRIYvEAAAAJ;https://scholar.google.com.tw/citations?user=scholar.google.com/citations?user=Gw10rFEAAAAJ;Db4BCX8AAAAJ", "orcid": ";;;;;;;0009-0004-2926-4023;", "linkedin": ";;luanfujun/;;;;;;", "or_profile": "~Haian_Jin1;~Yuan_Li13;~Fujun_Luan2;~Yuanbo_Xiangli1;~Sai_Bi1;~Kai_Zhang7;~Zexiang_Xu1;~Jin_Sun2;~Noah_Snavely1", "aff": "Cornell University;;Adobe Systems;Cornell University;Adobe Systems;Adobe Systems;Adobe Research;University of Georgia;Google", "aff_domain": "cornell.edu;;adobe.com;cornell.edu;adobe.com;adobe.com;adobe.com;uga.edu;google.com", "position": "PhD student;;Researcher;Postdoc;Researcher;Researcher;Researcher;Assistant Professor;Researcher", "bibtex": "@inproceedings{\njin2024neural,\ntitle={Neural Gaffer: Relighting Any Object via Diffusion},\nauthor={Haian Jin and Yuan Li and Fujun Luan and Yuanbo Xiangli and Sai Bi and Kai Zhang and Zexiang Xu and Jin Sun and Noah Snavely},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=zV2GDsZb5a}\n}", "github": "", "reviewers": "C74A;rkJr;NcDc;YxGp", "pdf_size": 7218811, "rating": "4;5;6;7", "confidence": "4;4;4;3", "soundness": "3;3;3;3", "novelty": "2;3;3;3", "presentation": "3;2;3;3", "wc_summary": "127;51;78;107", "wc_strengths": "272;92;50;84", "wc_weaknesses": "211;674;139;113", "wc_questions": "18;290;80;136", "wc_limitations": "30;8;1;6", "wc_review": "658;1115;348;446", "wc_reply_reviewers": "0;529;0;0", "wc_reply_authors": "104;2669;104;115", "reply_reviewers": "0;2;0;0", "reply_authors": "2;8;2;2", "rating_avg": [ 5.5, 1.118033988749895 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 90.75, 28.81297450802329 ], "wc_strengths_avg": [ 124.5, 86.60687039721502 ], "wc_weaknesses_avg": [ 284.25, 227.86769735967405 ], "wc_questions_avg": [ 131.0, 100.84145972763385 ], "wc_limitations_avg": [ 11.25, 11.121488209767612 ], "wc_review_avg": [ 641.75, 295.3120171953725 ], "wc_reply_reviewers_avg": [ 132.25, 229.06371930098402 ], "wc_reply_authors_avg": [ 748.0, 1109.0989586146045 ], "reply_reviewers_avg": [ 0.5, 0.8660254037844386 ], "reply_authors_avg": [ 3.5, 2.598076211353316 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 9, 0 ], "corr_rating_confidence": -0.7745966692414834, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13356085482619303075&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "cornell.edu;;adobe.com;cornell.edu;adobe.com;adobe.com;adobe.com;uga.edu;google.com", "author_num": 9, "aff_unique_index": "0;1;0;1;1;1;2;3", "aff_unique_norm": "Cornell University;Adobe;University of Georgia;Google", "aff_unique_dep": ";Adobe Systems Incorporated;;Google", "aff_unique_url": "https://www.cornell.edu;https://www.adobe.com;https://www.uga.edu;https://www.google.com", "aff_unique_abbr": "Cornell;Adobe;UGA;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Rethinking No-reference Image Exposure Assessment from Holism to Pixel: Models, Datasets and Benchmarks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92952", "id": "zVrQeoPIoQ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=zVrQeoPIoQ", "openreview": "https://openreview.net/forum?id=zVrQeoPIoQ", "poster": "", "project": "", "author_site": "Shuai He, Shuntian Zheng, Anlong Ming, Banyu Wu, Huadong Ma", "tldr": "", "abstract": "The past decade has witnessed an increasing demand for enhancing image quality through exposure, and as a crucial prerequisite in this endeavor, Image Exposure Assessment (IEA) is now being accorded serious attention. However, IEA encounters two persistent challenges that remain unresolved over the long term: the accuracy and generalizability of No-reference IEA are inadequate for practical applications; the scope of IEA is confined to qualitative and quantitative analysis of the entire image or subimage, such as providing only a score to evaluate the exposure level, thereby lacking intuitive and precise fine-grained evaluation for complex exposure conditions. The objective of this paper is to address the persistent bottleneck challenges from three perspectives: model, dataset, and benchmark. 1) Model-level: we propose a Pixel-level IEA Network (P-IEANet) that utilizes Haar discrete wavelet transform (DWT) to analyze, decompose, and assess exposure from both lightness and structural perspectives, capable of generating pixel-level assessment results under no-reference scenarios. 2) Dataset-level: we elaborately build an exposure-oriented dataset, IEA40K, containing 40K images, covering 17 typical lighting scenarios, 27 devices, and 50+ scenes, with each image densely annotated by more than 10 experts with pixel-level labels. 3) Benchmark-level: we develop a comprehensive benchmark of 19 methods based on IEA40K. Our P-IEANet not only achieves state-of-the-art (SOTA) performance on all metrics but also seamlessly integrates with existing exposure correction and lighting enhancement methods. To our knowledge, this is the first work that explicitly emphasizes assessing complex image exposure problems at a pixel level, providing a significant boost to the IEA and exposure-related community. The code and dataset are available in \\href{https://github.com/mRobotit/Pixel-level-No-reference-Image-Exposure-Assessment}{\\textcolor{red} {here}}.", "keywords": "Image Exposure Assessment;Image Quality Assessment", "primary_area": "machine_vision", "supplementary_material": "/attachment/4c033c87c975961e7194fda4f3ecc8ff4de81590.zip", "author": "Shuai He;Shuntian Zheng;Anlong Ming;Banyu Wu;Huadong Ma", "authorids": "~Shuai_He2;~Shuntian_Zheng1;~Anlong_Ming1;~Banyu_Wu1;~Huadong_Ma1", "gender": "M;M;M;M;M", "homepage": "https://github.com/woshidandan;;https://teacher.bupt.edu.cn/mal/en/index.htm;https://github.com/Alley-wu;https://scs.bupt.edu.cn/", "dblp": ";;52/3276;;04/6217", "google_scholar": ";;y5kFLCwAAAAJ;;", "orcid": "0000-0001-8817-0685;0000-0002-5717-5851;0000-0003-2952-7757;;", "linkedin": ";;;;", "or_profile": "~Shuai_He2;~Shuntian_Zheng1;~Anlong_Ming1;~Banyu_Wu1;~Huadong_Ma1", "aff": "China, Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;Beijing University of Post and Telecommunication", "aff_domain": "bupt.edu.cn;bupt.edu.cn;bupt.edu.cn;bupt.edu.cn;bupt.edu.cn", "position": "PhD student;Undergrad student;Full Professor;Undergrad student;Full Professor", "bibtex": "@inproceedings{\nhe2024rethinking,\ntitle={Rethinking No-reference Image Exposure Assessment from Holism to Pixel: Models, Datasets and Benchmarks},\nauthor={Shuai He and Shuntian Zheng and Anlong Ming and Banyu Wu and Huadong Ma},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=zVrQeoPIoQ}\n}", "github": "", "reviewers": "k7zi;B8CP;TsXB;Wvdf", "pdf_size": 5976674, "rating": "5;5;7;7", "confidence": "5;4;5;4", "soundness": "2;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;2", "wc_summary": "113;69;81;154", "wc_strengths": "99;97;55;93", "wc_weaknesses": "25;131;70;56", "wc_questions": "497;74;18;9", "wc_limitations": "23;7;1;2", "wc_review": "757;378;225;314", "wc_reply_reviewers": "25;10;10;52", "wc_reply_authors": "0;0;0;172", "reply_reviewers": "1;1;1;2", "reply_authors": "1;1;1;2", "rating_avg": [ 6.0, 1.0 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 104.25, 32.91940916845258 ], "wc_strengths_avg": [ 86.0, 18.027756377319946 ], "wc_weaknesses_avg": [ 70.5, 38.53894134508627 ], "wc_questions_avg": [ 149.5, 202.16886506086934 ], "wc_limitations_avg": [ 8.25, 8.814051281902097 ], "wc_review_avg": [ 418.5, 202.84538446807213 ], "wc_reply_reviewers_avg": [ 24.25, 17.151894939043906 ], "wc_reply_authors_avg": [ 43.0, 74.47818472546173 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:8xCmZiryL5gJ:scholar.google.com/&scioq=Rethinking+No-reference+Image+Exposure+Assessment+from+Holism+to+Pixel:+Models,+Datasets+and+Benchmarks&hl=en&as_sdt=0,33", "gs_version_total": 0, "email": "bupt.edu.cn;bupt.edu.cn;bupt.edu.cn;bupt.edu.cn;bupt.edu.cn", "author_num": 5, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Beijing University of Posts and Telecommunications", "aff_unique_dep": "", "aff_unique_url": "http://www.bupt.edu.cn/", "aff_unique_abbr": "BUPT", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Beijing", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "InstructG2I: Synthesizing Images from Multimodal Attributed Graphs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92951", "id": "zWnW4zqkuM", "proceeding": "", "pdf": "https://openreview.net/pdf?id=zWnW4zqkuM", "openreview": "https://openreview.net/forum?id=zWnW4zqkuM", "poster": "/media/PosterPDFs/NeurIPS%202024/92951.png?t=1731107425.2122135", "project": "", "author_site": "Bowen Jin, Ziqi Pang, Bingjun Guo, Yu-Xiong Wang, Jiaxuan You, Jiawei Han", "tldr": "", "abstract": "In this paper, we approach an overlooked yet critical task Graph2Image: generating images from multimodal attributed graphs (MMAGs). This task poses significant challenges due to the explosion in graph size, dependencies among graph entities, and the need for controllability in graph conditions. To address these challenges, we propose a graph context-conditioned diffusion model called InstructG2I. InstructG2I first exploits the graph structure and multimodal information to conduct informative neighbor sampling by combining personalized page rank and re-ranking based on vision-language features. Then, a graph QFormer encoder adaptively encodes the graph nodes into an auxiliary set of graph prompts to guide the denoising process of diffusion. Finally, we propose graph classifier-free guidance, enabling controllable generation by varying the strength of graph guidance and multiple connected edges to a node. Extensive experiments conducted on three datasets from different domains demonstrate the effectiveness and controllability of our approach. The code is available at https://github.com/PeterGriffinJin/InstructG2I.", "keywords": "learning on graphs;image generation;diffusion model;graph neural network", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Bowen Jin;Ziqi Pang;Bingjun Guo;Yu-Xiong Wang;Jiaxuan You;Jiawei Han", "authorids": "~Bowen_Jin1;~Ziqi_Pang1;~Bingjun_Guo1;~Yu-Xiong_Wang1;~Jiaxuan_You2;~Jiawei_Han1", "gender": "M;M;M;;M;M", "homepage": "https://peterjin.me/;https://ziqipang.github.io/;https://bingjung.github.io;https://yxw.cs.illinois.edu/;http://hanj.cs.illinois.edu/;https://cs.stanford.edu/~jiaxuan/", "dblp": "235/8066;255/9210;;35/10700;h/JiaweiHan.html;192/4727", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;Uh5k1vcAAAAJ;;T_Q-xDkAAAAJ;https://scholar.google.com.tw/citations?user=Kv9AbjMAAAAJ;NDbMl7oAAAAJ", "orcid": "0000-0003-1295-2829;;;;0000-0002-3629-2696;", "linkedin": "bowen-peter-jin/;;;;;jiaxuan-you-5859b37b/", "or_profile": "~Bowen_Jin1;~Ziqi_Pang1;~Bingjun_Guo1;~Yu-Xiong_Wang1;~Jiawei_Han1;~Jiaxuan_You1", "aff": "University of Illinois, Urbana Champaign;UIUC;University of Illinois, Urbana Champaign;Department of Computer Science, University of Illinois Urbana-Champaign;University of Illinois at Urbana-Champaign (UIUC);NVIDIA", "aff_domain": "illinois.edu;cs.illinois.edu;illinois.edu;cs.illinois.edu;illinois.edu;nvidia.com", "position": "PhD student;PhD student;Undergrad student;Assistant Professor;Full Professor;Researcher", "bibtex": "@inproceedings{\njin2024instructgi,\ntitle={InstructG2I: Synthesizing Images from Multimodal Attributed Graphs},\nauthor={Bowen Jin and Ziqi Pang and Bingjun Guo and Yu-Xiong Wang and Jiaxuan You and Jiawei Han},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=zWnW4zqkuM}\n}", "github": "", "reviewers": "7dPX;TQRx;nWct;5RsF", "pdf_size": 3646907, "rating": "5;5;6;7", "confidence": "3;3;3;3", "soundness": "3;2;3;3", "novelty": "2;3;3;4", "presentation": "3;3;2;3", "wc_summary": "27;63;78;61", "wc_strengths": "45;15;56;105", "wc_weaknesses": "59;90;118;30", "wc_questions": "450;32;30;19", "wc_limitations": "3;1;7;9", "wc_review": "584;201;289;224", "wc_reply_reviewers": "80;0;17;29", "wc_reply_authors": "343;0;33;24", "reply_reviewers": "1;0;1;1", "reply_authors": "2;1;2;2", "rating_avg": [ 5.75, 0.82915619758885 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 57.25, 18.659782956937093 ], "wc_strengths_avg": [ 55.25, 32.40659655070245 ], "wc_weaknesses_avg": [ 74.25, 32.98768709685479 ], "wc_questions_avg": [ 132.75, 183.23124051318322 ], "wc_limitations_avg": [ 5.0, 3.1622776601683795 ], "wc_review_avg": [ 324.5, 153.25876810153474 ], "wc_reply_reviewers_avg": [ 31.5, 29.837057495671385 ], "wc_reply_authors_avg": [ 100.0, 140.81370671919692 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:2H5aUDYsuxAJ:scholar.google.com/&scioq=InstructG2I:+Synthesizing+Images+from+Multimodal+Attributed+Graphs&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "illinois.edu;cs.illinois.edu;illinois.edu;cs.illinois.edu;illinois.edu;nvidia.com", "author_num": 6, "aff_unique_index": "0;0;0;0;0;1", "aff_unique_norm": "University of Illinois Urbana-Champaign;NVIDIA", "aff_unique_dep": ";NVIDIA Corporation", "aff_unique_url": "https://illinois.edu;https://www.nvidia.com", "aff_unique_abbr": "UIUC;NVIDIA", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Urbana-Champaign;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "FLAME : Factuality-Aware Alignment for Large Language Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92950", "id": "zWuHSIALBh", "proceeding": "", "pdf": "https://openreview.net/pdf?id=zWuHSIALBh", "openreview": "https://openreview.net/forum?id=zWuHSIALBh", "poster": "/media/PosterPDFs/NeurIPS%202024/92950.png?t=1731565000.9867644", "project": "", "author_site": "Sheng-Chieh Lin, Luyu Gao, Barlas Oguz, Wenhan Xiong, Jimmy Lin, Scott Yih, Xilun Chen", "tldr": "", "abstract": "Alignment is a procedure to fine-tune pre-trained large language models (LLMs) to follow natural language instructions and serve as helpful AI assistants. \nWe have observed, however, that the conventional alignment process fails to enhance the factual accuracy of LLMs, and often leads to the generation of more false facts (i.e., *hallucination*). \nIn this paper, we study how to make the LLM alignment process more factual, by first identifying factors that lead to hallucination in both alignment steps: supervised fine-tuning (SFT) and reinforcement learning (RL).\nIn particular, we find that training the LLM on new or unfamiliar knowledge can encourage hallucination.\nThis makes SFT less factual as it trains on human-labeled data that may be novel to the LLM. \nFurthermore, reward functions used in standard RL often inadequately capture factuality and favor longer and more detailed responses, which inadvertently promote hallucination.\nBased on these observations, we propose *FactuaLity-aware AlignMEnt*, comprised of *factuality-aware SFT* and *factuality-aware RL* through direct preference optimization. \nExperiments show that our proposed *FLAME* guides LLMs to output more factual responses while maintaining their instruction-following capability.", "keywords": "large language models;factuality;alignment", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Sheng-Chieh Lin;Luyu Gao;Barlas Oguz;Wenhan Xiong;Jimmy Lin;Wen-tau Yih;Xilun Chen", "authorids": "~Sheng-Chieh_Lin1;~Luyu_Gao1;~Barlas_Oguz1;~Wenhan_Xiong1;~Jimmy_Lin2;~Wen-tau_Yih1;~Xilun_Chen1", "gender": "M;M;;M;;M;", "homepage": "https://jacklin64.github.io/about_me/;https://luyug.github.io/;;https://xwhan.github.io;https://cs.uwaterloo.ca/~jimmylin/;http://scottyih.org;https://xilunchen.com", "dblp": "61/10361;;https://dblp.org/pers/hd/o/Oguz:Barlas;203/8542;00/7739;07/7129;96/10207-2.html", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=zh-CN;iPmTQZMAAAAJ;;;8rDNIMsAAAAJ;eUk_hy8AAAAJ", "orcid": "0000-0002-7989-9703;;;;;0000-0003-4263-395X;", "linkedin": "jack-lin-716a61127/;;barlas-o%C4%9Fuz-25465050;;;scottyih/;", "or_profile": "~Sheng-Chieh_Lin1;~Luyu_Gao1;~Barlas_Oguz1;~Wenhan_Xiong1;~Jimmy_Lin2;~Wen-tau_Yih1;~Xilun_Chen1", "aff": "Meta Platforms, Inc.;Carnegie Mellon University;Meta;Meta Facebook;University of Waterloo;Meta Platforms, Inc.;Meta FAIR", "aff_domain": "meta.com;cmu.edu;meta.com;fb.com;waterloo.ca;meta.com;meta.com", "position": "Intern;PhD student;Research Scientist;Researcher;Full Professor;Research Scientist;Research Scientist", "bibtex": "@inproceedings{\nlin2024flame,\ntitle={{FLAME} : Factuality-Aware Alignment for Large Language Models},\nauthor={Sheng-Chieh Lin and Luyu Gao and Barlas Oguz and Wenhan Xiong and Jimmy Lin and Wen-tau Yih and Xilun Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=zWuHSIALBh}\n}", "github": "", "reviewers": "XBcd;6wxd;b1G9;HVe5", "pdf_size": 1638012, "rating": "4;5;6;6", "confidence": "4;4;5;3", "soundness": "3;3;3;3", "novelty": "2;2;3;3", "presentation": "3;4;3;3", "wc_summary": "75;64;123;110", "wc_strengths": "45;50;62;155", "wc_weaknesses": "140;211;138;212", "wc_questions": "33;5;23;44", "wc_limitations": "15;13;9;9", "wc_review": "308;343;355;530", "wc_reply_reviewers": "23;120;28;72", "wc_reply_authors": "18;285;12;114", "reply_reviewers": "1;1;1;2", "reply_authors": "2;2;2;3", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 93.0, 24.259018941416407 ], "wc_strengths_avg": [ 78.0, 44.883181705400524 ], "wc_weaknesses_avg": [ 175.25, 36.25861966484659 ], "wc_questions_avg": [ 26.25, 14.341809509263467 ], "wc_limitations_avg": [ 11.5, 2.598076211353316 ], "wc_review_avg": [ 384.0, 86.04359360231301 ], "wc_reply_reviewers_avg": [ 60.75, 39.162322454113976 ], "wc_reply_authors_avg": [ 107.25, 110.31630659154611 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14797496204644076539&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 4, "email": "meta.com;cmu.edu;meta.com;fb.com;waterloo.ca;meta.com;meta.com", "author_num": 7, "aff_unique_index": "0;1;0;0;2;0;0", "aff_unique_norm": "Meta;Carnegie Mellon University;University of Waterloo", "aff_unique_dep": "Meta Platforms, Inc.;;", "aff_unique_url": "https://www.meta.com;https://www.cmu.edu;https://uwaterloo.ca", "aff_unique_abbr": "Meta;CMU;UW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;0;0", "aff_country_unique": "United States;Canada" }, { "title": "Neural Conditional Probability for Uncertainty Quantification", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92949", "id": "zXfhHJnMB2", "proceeding": "", "pdf": "https://openreview.net/pdf?id=zXfhHJnMB2", "openreview": "https://openreview.net/forum?id=zXfhHJnMB2", "poster": "", "project": "", "author_site": "Vladimir Kostic, Gr\u00e9goire Pacreau, Giacomo Turri, Pietro Novelli, Karim Lounici, Massimiliano Pontil", "tldr": "", "abstract": "We introduce Neural Conditional Probability (NCP), an operator-theoretic approach to learning conditional distributions with \na focus on statistical inference tasks. NCP can be used to build conditional confidence regions and extract key statistics such as \nconditional quantiles, mean, and covariance. It offers streamlined learning via a single unconditional training phase, allowing \nefficient inference without the need for retraining even when conditioning changes. By leveraging the approximation \ncapabilities of neural networks, NCP efficiently handles a wide variety of complex probability distributions. \nWe provide theoretical guarantees that ensure both optimization consistency and statistical accuracy. \nIn experiments, we show that NCP with a 2-hidden-layer network matches or outperforms leading methods. \nThis demonstrates that a a minimalistic architecture with a theoretically grounded loss can achieve \ncompetitive results, even in the face of more complex architectures.", "keywords": "Operator learning;conditional density estimation;statistical inference;deep learning;theoretical guarantees", "primary_area": "learning_theory", "supplementary_material": "", "author": "Vladimir R Kostic;gregoire pacreau;Giacomo Turri;Pietro Novelli;Karim Lounici;Massimiliano Pontil", "authorids": "~Vladimir_R_Kostic1;~gregoire_pacreau1;~Giacomo_Turri1;~Pietro_Novelli1;~Karim_Lounici1;~Massimiliano_Pontil3", "gender": "M;M;;M;;", "homepage": "https://vladi-iit.github.io/;;;;;", "dblp": "94/879;;211/5333.html;318/3513;;", "google_scholar": "66gV7SAAAAAJ;l7hggicAAAAJ;https://scholar.google.com/citations?hl=it;;;", "orcid": ";;0000-0002-3405-9292;0000-0003-1623-5659;;", "linkedin": "vladimir-kostic-77500652/;gr%C3%A9goire-pacreau-3aa053147/;giacomoturri/;;;", "or_profile": "~Vladimir_R_Kostic1;~gregoire_pacreau1;~Giacomo_Turri1;~Pietro_Novelli1;~Karim_Lounici1;~Massimiliano_Pontil3", "aff": "University of Novi Sad;\u00c9cole Polytechnique;Istituto Italiano di Tecnologia;Istituto Italiano di Tecnologia;;", "aff_domain": "uns.ac.rs;polytechnique.edu;iit.it;iit.it;;", "position": "Associate Professor;PhD student;Postdoc;Postdoc;;", "bibtex": "@inproceedings{\nkostic2024neural,\ntitle={Neural Conditional Probability for Uncertainty Quantification},\nauthor={Vladimir R Kostic and gregoire pacreau and Giacomo Turri and Pietro Novelli and Karim Lounici and Massimiliano Pontil},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=zXfhHJnMB2}\n}", "github": "", "reviewers": "n6sh;6sLX;vE36;BaSA", "pdf_size": 3228574, "rating": "6;7;7;7", "confidence": "2;2;1;3", "soundness": "3;3;3;4", "novelty": "2;3;3;4", "presentation": "3;3;3;3", "wc_summary": "119;72;8;147", "wc_strengths": "49;37;8;49", "wc_weaknesses": "297;100;8;63", "wc_questions": "53;3;8;114", "wc_limitations": "60;9;8;8", "wc_review": "578;221;40;381", "wc_reply_reviewers": "116;13;0;16", "wc_reply_authors": "60;39;0;39", "reply_reviewers": "1;1;0;1", "reply_authors": "2;2;1;2", "rating_avg": [ 6.75, 0.4330127018922193 ], "confidence_avg": [ 2.0, 0.7071067811865476 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.7071067811865476 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 86.5, 52.652160449501025 ], "wc_strengths_avg": [ 35.75, 16.753730927766508 ], "wc_weaknesses_avg": [ 117.0, 108.95641330366928 ], "wc_questions_avg": [ 44.5, 44.60100895719737 ], "wc_limitations_avg": [ 21.25, 22.37604746151563 ], "wc_review_avg": [ 305.0, 198.48551584435575 ], "wc_reply_reviewers_avg": [ 36.25, 46.434766070262484 ], "wc_reply_authors_avg": [ 34.5, 21.68524844220144 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:EGAtPIROLbAJ:scholar.google.com/&scioq=Neural+Conditional+Probability+for+Uncertainty+Quantification&hl=en&as_sdt=0,14", "gs_version_total": 0, "email": "uns.ac.rs;polytechnique.edu;iit.it;iit.it;;", "author_num": 6, "aff_unique_index": "0;1;2;2", "aff_unique_norm": "University of Novi Sad;Ecole Polytechnique;Istituto Italiano di Tecnologia", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uns.ac.rs;https://www.polytechnique.edu;https://www.iit.it", "aff_unique_abbr": "UNS;X;IIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;2", "aff_country_unique": "Serbia;France;Italy" }, { "title": "Semantic Feature Learning for Universal Unsupervised Cross-Domain Retrieval", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92948", "id": "zZVqZRXSao", "proceeding": "", "pdf": "https://openreview.net/pdf?id=zZVqZRXSao", "openreview": "https://openreview.net/forum?id=zZVqZRXSao", "poster": "/media/PosterPDFs/NeurIPS%202024/92948.png?t=1731563651.232014", "project": "", "author_site": "Lixu Wang, Xinyu Du, Qi Zhu", "tldr": "", "abstract": "Cross-domain retrieval (CDR) is finding increasingly broad applications across various domains. However, existing efforts have several major limitations, with the most critical being their reliance on accurate supervision. Recent studies thus focus on achieving unsupervised CDR, but they typically assume that the category spaces across domains are identical, an assumption that is often unrealistic in real-world scenarios. This is because only through dedicated and comprehensive analysis can the category composition of a data domain be obtained, which contradicts the premise of unsupervised scenarios. Therefore, in this work, we introduce the problem of **U**niversal **U**nsupervised **C**ross-**D**omain **R**etrieval (U^2CDR) for the first time and design a two-stage semantic feature learning framework to address it. In the first stage, a cross-domain unified prototypical structure is established under the guidance of an instance-prototype-mixed contrastive loss and a semantic-enhanced loss, to counteract category space differences. In the second stage, through a modified adversarial training mechanism, we ensure minimal changes for the established prototypical structure during domain alignment, enabling more accurate nearest-neighbor searching. Extensive experiments across multiple datasets and scenarios, including close-set, partial, and open-set CDR, demonstrate that our approach significantly outperforms existing state-of-the-art CDR methods and other related methods in solving U^2CDR challenges.", "keywords": "Image Retrieval;Domain Adaptation", "primary_area": "machine_vision", "supplementary_material": "", "author": "Lixu Wang;Xinyu Du;Qi Zhu", "authorids": "~Lixu_Wang1;~Xinyu_Du1;~Qi_Zhu2", "gender": ";M;", "homepage": ";;http://zhulab.ece.northwestern.edu/", "dblp": ";;66/5923-2.html", "google_scholar": ";https://scholar.google.com/;TN09YMcAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Lixu_Wang1;~Xinyu_Du1;~Qi_Zhu2", "aff": ";General Motors;Northwestern University", "aff_domain": ";gm.com;northwestern.edu", "position": ";Researcher;Associate Professor", "bibtex": "@inproceedings{\nwang2024semantic,\ntitle={Semantic Feature Learning for Universal Unsupervised Cross-Domain Retrieval},\nauthor={Lixu Wang and Xinyu Du and Qi Zhu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=zZVqZRXSao}\n}", "github": "", "reviewers": "gZ1R;tjhd;ssYY", "pdf_size": 4201564, "rating": "5;6;7", "confidence": "4;3;4", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "2;3;3", "wc_summary": "22;153;77", "wc_strengths": "18;19;60", "wc_weaknesses": "101;180;87", "wc_questions": "3;26;16", "wc_limitations": "1;2;11", "wc_review": "145;380;251", "wc_reply_reviewers": "0;13;13", "wc_reply_authors": "103;213;44", "reply_reviewers": "0;1;1", "reply_authors": "2;3;2", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 84.0, 53.70909296075169 ], "wc_strengths_avg": [ 32.333333333333336, 19.567546828585563 ], "wc_weaknesses_avg": [ 122.66666666666667, 40.94169295745136 ], "wc_questions_avg": [ 15.0, 9.41629792788369 ], "wc_limitations_avg": [ 4.666666666666667, 4.496912521077347 ], "wc_review_avg": [ 258.6666666666667, 96.09139168289508 ], "wc_reply_reviewers_avg": [ 8.666666666666666, 6.128258770283412 ], "wc_reply_authors_avg": [ 120.0, 70.03332540060244 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15598413862958412648&as_sdt=40005&sciodt=0,10&hl=en", "gs_version_total": 4, "email": ";gm.com;northwestern.edu", "author_num": 3, "aff_unique_index": "0;1", "aff_unique_norm": "General Motors;Northwestern University", "aff_unique_dep": ";", "aff_unique_url": "https://www.gm.com;https://www.northwestern.edu", "aff_unique_abbr": "GM;NU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "GenRL: Multimodal-foundation world models for generalization in embodied agents", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92947", "id": "za9Jx8yqUA", "proceeding": "", "pdf": "https://openreview.net/pdf?id=za9Jx8yqUA", "openreview": "https://openreview.net/forum?id=za9Jx8yqUA", "poster": "", "project": "", "author_site": "Pietro Mazzaglia, Tim Verbelen, Bart Dhoedt, Aaron Courville, Sai Rajeswar Mudumba", "tldr": "", "abstract": "Learning generalist embodied agents, able to solve multitudes of tasks in different domains is a long-standing problem. Reinforcement learning (RL) is hard to scale up as it requires a complex reward design for each task. In contrast, language can specify tasks in a more natural way. Current foundation vision-language models (VLMs) generally require fine-tuning or other adaptations to be adopted in embodied contexts, due to the significant domain gap. However, the lack of multimodal data in such domains represents an obstacle to developing foundation models for embodied applications. In this work, we overcome these problems by presenting multimodal-foundation world models, able to connect and align the representation of foundation VLMs with the latent space of generative world models for RL, without any language annotations. The resulting agent learning framework, GenRL, allows one to specify tasks through vision and/or language prompts, ground them in the embodied domain\u2019s dynamics, and learn the corresponding behaviors in imagination.\nAs assessed through large-scale multi-task benchmarking in locomotion and manipulation domains, GenRL enables multi-task generalization from language and visual prompts. Furthermore, by introducing a data-free policy learning strategy, our approach lays the groundwork for foundational policy learning using generative world models. \nWebsite, code and data: https://mazpie.github.io/genrl/", "keywords": "world models;foundations models;reinforcement learning;multitask generalization", "primary_area": "reinforcement_learning", "supplementary_material": "", "author": "Pietro Mazzaglia;Tim Verbelen;Bart Dhoedt;Aaron Courville;Sai Rajeswar", "authorids": "~Pietro_Mazzaglia1;~Tim_Verbelen1;~Bart_Dhoedt1;~Aaron_Courville3;~Sai_Rajeswar2", "gender": ";M;M;;M", "homepage": "https://mazpie.github.io/;https://scholar.google.be/citations?user=G86XVPEAAAAJ;;;https://sairajeswar.com/", "dblp": "266/6084;71/8853;39/211;56/1688;159/2116", "google_scholar": "c-PYVTgAAAAJ;https://scholar.google.be/citations?user=G86XVPEAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.ca/citations?user=km6CP8cAAAAJ;https://scholar.google.ca/citations?user=h-sqIigAAAAJ", "orcid": "0000-0003-3319-5986;;0000-0002-7271-7479;;", "linkedin": "pietromazzaglia/;;;;sairajeswar/", "or_profile": "~Pietro_Mazzaglia1;~Tim_Verbelen1;~Bart_Dhoedt1;~Aaron_Courville3;~sai_rajeswar_mudumba1", "aff": "Ghent University;VERSES;Ghent University;Universit\u00e9 de Montr\u00e9al;ServiceNow", "aff_domain": "ugent.be;verses.ai;ugent.be; ;servicenow.com", "position": "PhD student;Researcher;Full Professor;Assistant Professor;Research Scientist", "bibtex": "@inproceedings{\nmazzaglia2024genrl,\ntitle={Gen{RL}: Multimodal-foundation world models for generalization in embodied agents},\nauthor={Pietro Mazzaglia and Tim Verbelen and Bart Dhoedt and Aaron Courville and Sai Rajeswar},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=za9Jx8yqUA}\n}", "github": "", "reviewers": "jfoT;Yb6H;X1kr;bBQY", "pdf_size": 1365638, "rating": "3;5;5;7", "confidence": "4;5;4;3", "soundness": "1;3;3;2", "novelty": "3;3;3;3", "presentation": "3;3;1;3", "wc_summary": "162;75;89;104", "wc_strengths": "98;104;56;99", "wc_weaknesses": "547;684;280;118", "wc_questions": "2;209;225;240", "wc_limitations": "1;5;7;11", "wc_review": "810;1077;657;572", "wc_reply_reviewers": "102;1044;10;310", "wc_reply_authors": "483;1532;58;812", "reply_reviewers": "1;2;1;2", "reply_authors": "2;3;2;3", "rating_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 2.25, 0.82915619758885 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.5, 0.8660254037844386 ], "wc_summary_avg": [ 107.5, 33.094561486745825 ], "wc_strengths_avg": [ 89.25, 19.330998422223306 ], "wc_weaknesses_avg": [ 407.25, 221.3474361721861 ], "wc_questions_avg": [ 169.0, 97.03865209286452 ], "wc_limitations_avg": [ 6.0, 3.605551275463989 ], "wc_review_avg": [ 779.0, 192.02734180319217 ], "wc_reply_reviewers_avg": [ 366.5, 405.97136598533643 ], "wc_reply_authors_avg": [ 721.25, 539.0303215033455 ], "reply_reviewers_avg": [ 1.5, 0.5 ], "reply_authors_avg": [ 2.5, 0.5 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.5, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16644401298703339372&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": "ugent.be;verses.ai;ugent.be; ;servicenow.com", "author_num": 5, "aff_unique_index": "0;0;2;3", "aff_unique_norm": "Ghent University;;Universit\u00e9 de Montr\u00e9al;ServiceNow", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.ugent.be/en;;https://www.umontreal.ca;https://www.servicenow.com", "aff_unique_abbr": "UGent;;UdeM;ServiceNow", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;2;3", "aff_country_unique": "Belgium;;Canada;United States" }, { "title": "Mesa-Extrapolation: A Weave Position Encoding Method for Enhanced Extrapolation in LLMs", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92946", "id": "zaXuMqOAF4", "proceeding": "", "pdf": "https://openreview.net/pdf?id=zaXuMqOAF4", "openreview": "https://openreview.net/forum?id=zaXuMqOAF4", "poster": "/media/PosterPDFs/NeurIPS%202024/92946.png?t=1731405085.0858166", "project": "", "author_site": "Xin Ma, Yang Liu, Jingjing Liu, Xiaoxu Ma", "tldr": "", "abstract": "Large language models (LLMs), although having revolutionized many fields, still suffer from the challenging extrapolation problem, where the inference ability of LLMs sharply declines beyond their max training lengths. In this work, we conduct a theoretical analysis to better understand why No Position Encoding (NoPE) fails outside its effective range, as well as examining the power of Position Encoding (PE) in this context. Our findings reveal that with meticulous weave position, PE can indeed be extended beyond effective range. Our theorems establish that LLMs equipped with weave PE can achieve improved extrapolation performance without additional cost. Furthermore, we introduce a novel weave PE method, Mesa-Extrapolation, which utilizes a chunk-based triangular attention matrix and applies Stair PE to manage the final chunk. This method not only retains competitive performance but also offers substantial benefits such as significantly reduced memory demand and faster inference speed. Extensive experiments validate the effectiveness of Mesa-Extrapolation, demonstrating its potential as a scalable solution to enhancing LLMs\u2019 applicative reach.", "keywords": "Large language models;extrapolation;position encoding;transformers;self-attention", "primary_area": "generative_models", "supplementary_material": "/attachment/5f3abdf1177202c764db483292a5af98703f34b1.zip", "author": "Xin Ma;Yang Liu;Jingjing Liu;Xiaoxu Ma", "authorids": "~Xin_Ma5;~Yang_Liu59;~Jingjing_Liu2;~Xiaoxu_Ma1", "gender": "M;F;;M", "homepage": ";;https://air.tsinghua.edu.cn/en/info/1046/1194.htm#:~:text=Jingjing%20Liu%20is%20Professor%2C%20Principal,CVPR%2C%20ACL%2C%20etc.);", "dblp": ";;30/3008-1;21/2791", "google_scholar": ";JEieoFsAAAAJ;BzJ_GboAAAAJ;", "orcid": "0000-0002-6046-7222;;;", "linkedin": ";;jingjing-liu-65703431/;", "or_profile": "~Xin_Ma5;~Yang_Liu59;~Jingjing_Liu2;~Xiaoxu_Ma1", "aff": "ENN Group;Tsinghua University;Tsinghua University;", "aff_domain": "enn.group.cn;tsinghua.edu.cn;tsinghua.edu.cn;", "position": "Researcher;Associate Professor;Full Professor;", "bibtex": "@inproceedings{\nma2024mesaextrapolation,\ntitle={Mesa-Extrapolation: A Weave Position Encoding Method for Enhanced Extrapolation in {LLM}s},\nauthor={Xin Ma and Yang Liu and Jingjing Liu and Xiaoxu Ma},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=zaXuMqOAF4}\n}", "github": "", "reviewers": "PZAX;vMAr;hjGM;LfLf;bJoG", "pdf_size": 5613840, "rating": "4;5;6;6;8", "confidence": "4;5;4;4;3", "soundness": "2;3;3;3;3", "novelty": "2;2;3;3;4", "presentation": "2;3;3;2;3", "wc_summary": "83;61;43;85;119", "wc_strengths": "45;67;35;112;40", "wc_weaknesses": "56;357;40;87;251", "wc_questions": "41;9;117;92;2", "wc_limitations": "1;18;6;27;13", "wc_review": "226;512;241;403;425", "wc_reply_reviewers": "207;303;34;62;135", "wc_reply_authors": "809;984;324;118;306", "reply_reviewers": "2;2;1;1;1", "reply_authors": "5;4;3;3;3", "rating_avg": [ 5.8, 1.32664991614216 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 2.6, 0.4898979485566356 ], "wc_summary_avg": [ 78.2, 25.568730903195018 ], "wc_strengths_avg": [ 59.8, 28.294169010592977 ], "wc_weaknesses_avg": [ 158.2, 124.59438189581422 ], "wc_questions_avg": [ 52.2, 45.37576445637032 ], "wc_limitations_avg": [ 13.0, 9.09945053286186 ], "wc_review_avg": [ 361.4, 110.71151701607201 ], "wc_reply_reviewers_avg": [ 148.2, 98.04774347224927 ], "wc_reply_authors_avg": [ 508.2, 329.82928917850825 ], "reply_reviewers_avg": [ 1.4, 0.4898979485566356 ], "reply_authors_avg": [ 3.6, 0.8 ], "replies_avg": [ 32, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.7150969419341942, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13864642004372843653&as_sdt=20005&sciodt=0,9&hl=en", "gs_version_total": 3, "email": "enn.group.cn;tsinghua.edu.cn;tsinghua.edu.cn;", "author_num": 4, "aff_unique_index": "0;1;1", "aff_unique_norm": "ENN Group;Tsinghua University", "aff_unique_dep": ";", "aff_unique_url": ";https://www.tsinghua.edu.cn", "aff_unique_abbr": ";THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1;1", "aff_country_unique": ";China" }, { "title": "Inference of Neural Dynamics Using Switching Recurrent Neural Networks", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92945", "id": "zb8jLAh2VN", "proceeding": "", "pdf": "https://openreview.net/pdf?id=zb8jLAh2VN", "openreview": "https://openreview.net/forum?id=zb8jLAh2VN", "poster": "", "project": "", "author_site": "Yongxu Zhang, Shreya Saxena", "tldr": "", "abstract": "Neural population activity often exhibits distinct dynamical features across time, which may correspond to distinct internal processes or behavior. Linear methods and variations thereof, such as Hidden Markov Model (HMM) and Switching Linear Dynamical System (SLDS), are often employed to identify discrete states with evolving neural dynamics. However, these techniques may not be able to capture the underlying nonlinear dynamics associated with neural propagation. Recurrent Neural Networks (RNNs) are commonly used to model neural dynamics thanks to their nonlinear characteristics. In our work, we develop Switching Recurrent Neural Networks (SRNN), RNNs with weights that switch across time, to reconstruct switching dynamics of neural time-series data. We apply these models to simulated data as well as cortical neural activity across mice and monkeys, which allows us to automatically detect discrete states that lead to the identification of varying neural dynamics. In a monkey reaching dataset with electrophysiology recordings, a mouse self-initiated lever pull dataset with widefield calcium recordings, and a mouse self-initiated decision making dataset with widefield calcium recording, SRNNs are able to automatically identify discrete states with distinct nonlinear neural dynamics. The inferred switches are aligned with the behavior, and the reconstructions show that the recovered neural dynamics are distinct across different stages of the behavior. We show that the neural dynamics have behaviorally-relevant switches across time and we are able to use SRNNs to successfully capture these switches and the corresponding dynamical features.", "keywords": "Neural Dynamics;Variational Inference;Recurrent Neural Networks", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "", "author": "Yongxu Zhang;Shreya Saxena", "authorids": "~Yongxu_Zhang1;~Shreya_Saxena2", "gender": ";F", "homepage": ";https://saxena.ece.ufl.edu/pi/", "dblp": ";115/8750", "google_scholar": "_aOqZf0AAAAJ;x-YACVoAAAAJ", "orcid": ";", "linkedin": ";", "or_profile": "~Yongxu_Zhang1;~Shreya_Saxena1", "aff": "Yale University;Yale University", "aff_domain": "yale.edu;yale.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nzhang2024inference,\ntitle={Inference of Neural Dynamics Using Switching Recurrent Neural Networks},\nauthor={Yongxu Zhang and Shreya Saxena},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=zb8jLAh2VN}\n}", "github": "", "reviewers": "hFy4;uaif;Kpfw;8VtM", "pdf_size": 20427701, "rating": "3;5;6;6", "confidence": "4;4;4;3", "soundness": "2;3;3;3", "novelty": "2;2;3;2", "presentation": "2;2;3;3", "wc_summary": "22;133;37;47", "wc_strengths": "16;94;17;46", "wc_weaknesses": "193;332;223;334", "wc_questions": "76;240;199;152", "wc_limitations": "31;23;5;34", "wc_review": "338;822;481;613", "wc_reply_reviewers": "190;77;66;182", "wc_reply_authors": "981;562;743;918", "reply_reviewers": "2;1;1;1", "reply_authors": "3;3;3;3", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.4330127018922193 ], "presentation_avg": [ 2.5, 0.5 ], "wc_summary_avg": [ 59.75, 43.21675022488387 ], "wc_strengths_avg": [ 43.25, 31.68102744546016 ], "wc_weaknesses_avg": [ 270.5, 63.3975551579081 ], "wc_questions_avg": [ 166.75, 60.94823623370901 ], "wc_limitations_avg": [ 23.25, 11.277743568639961 ], "wc_review_avg": [ 563.5, 178.1354821477181 ], "wc_reply_reviewers_avg": [ 128.75, 57.451610073173754 ], "wc_reply_authors_avg": [ 801.0, 163.22836763259014 ], "reply_reviewers_avg": [ 1.25, 0.4330127018922193 ], "reply_authors_avg": [ 3.0, 0.0 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 2, 0 ], "corr_rating_confidence": -0.4714045207910316, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9426863689487473676&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "email": "yale.edu;yale.edu", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Yale University", "aff_unique_dep": "", "aff_unique_url": "https://www.yale.edu", "aff_unique_abbr": "Yale", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Bridging Geometric States via Geometric Diffusion Bridge", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92944", "id": "zcEPOB9rCR", "proceeding": "", "pdf": "https://openreview.net/pdf?id=zcEPOB9rCR", "openreview": "https://openreview.net/forum?id=zcEPOB9rCR", "poster": "", "project": "", "author_site": "Shengjie Luo, Yixian Xu, Di He, Shuxin Zheng, Tie-Yan Liu, Liwei Wang", "tldr": "", "abstract": "The accurate prediction of geometric state evolution in complex systems is critical for advancing scientific domains such as quantum chemistry and material modeling. Traditional experimental and computational methods face challenges in terms of environmental constraints and computational demands, while current deep learning approaches still fall short in terms of precision and generality. In this work, we introduce the Geometric Diffusion Bridge (GDB), a novel generative modeling framework that accurately bridges initial and target geometric states. GDB leverages a probabilistic approach to evolve geometric state distributions, employing an equivariant diffusion bridge derived by a modified version of Doob's $h$-transform for connecting geometric states. This tailored diffusion process is anchored by initial and target geometric states as fixed endpoints and governed by equivariant transition kernels. Moreover, trajectory data can be seamlessly leveraged in our GDB framework by using a chain of equivariant diffusion bridges, providing a more detailed and accurate characterization of evolution dynamics. Theoretically, we conduct a thorough examination to confirm our framework's ability to preserve joint distributions of geometric states and capability to completely model the underlying dynamics inducing trajectory distributions with negligible error. Experimental evaluations across various real-world scenarios show that GDB surpasses existing state-of-the-art approaches, opening up a new pathway for accurately bridging geometric states and tackling crucial scientific challenges with improved accuracy and applicability.", "keywords": "Bridging Geometric States;Generative Modeling;Geometric Deep Learning;Diffusion Bridge;Doob's h-transform;Equivariance", "primary_area": "generative_models", "supplementary_material": "", "author": "Shengjie Luo;Yixian Xu;Di He;Shuxin Zheng;Tie-Yan Liu;Liwei Wang", "authorids": "~Shengjie_Luo1;~Yixian_Xu1;~Di_He1;~Shuxin_Zheng1;~Tie-Yan_Liu1;~Liwei_Wang1", "gender": "M;M;M;M;M;M", "homepage": "https://lsj2408.github.io;https://github.com/xyx050;https://dihe-pku.github.io/;https://www.microsoft.com/en-us/research/people/shuz/;http://member.acm.org/~tieyanliu;http://www.liweiwang-pku.com/", "dblp": "274/2110;;74/184;186/8255;l/TieYanLiu;", "google_scholar": "ImWO7WYAAAAJ;;https://scholar.google.co.jp/citations?user=orVoz4IAAAAJ;https://scholar.google.co.jp/citations?user=rPhGUw0AAAAJ;Nh832fgAAAAJ;VZHxoh8AAAAJ", "orcid": ";;;;0000-0002-0476-8020;", "linkedin": "shengjie-luo-ba6137193/;;;;;", "or_profile": "~Shengjie_Luo1;~Yixian_Xu1;~Di_He1;~Shuxin_Zheng1;~Tie-Yan_Liu1;~Liwei_Wang1", "aff": "Microsoft;Peking University;Microsoft;Microsoft;Microsoft;Peking University", "aff_domain": "microsoft.com;pku.edu.cn;microsoft.com;microsoft.com;microsoft.com;pku.edu.cn", "position": "Intern;Undergrad student;Senior Researcher;Senior Researcher;Distinguished Scientist;Full Professor", "bibtex": "@inproceedings{\nluo2024bridging,\ntitle={Bridging Geometric States via Geometric Diffusion Bridge},\nauthor={Shengjie Luo and Yixian Xu and Di He and Shuxin Zheng and Tie-Yan Liu and Liwei Wang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=zcEPOB9rCR}\n}", "github": "", "reviewers": "znC7;U4NU;dV6w;Sjdg", "pdf_size": 599148, "rating": "3;5;6;6", "confidence": "4;3;3;2", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "2;3;4;3", "wc_summary": "80;49;63;110", "wc_strengths": "32;75;45;95", "wc_weaknesses": "79;251;68;69", "wc_questions": "207;48;2;2", "wc_limitations": "27;48;16;10", "wc_review": "425;471;194;286", "wc_reply_reviewers": "158;0;18;20", "wc_reply_authors": "812;319;115;115", "reply_reviewers": "1;0;1;1", "reply_authors": "6;4;3;3", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 3.0, 0.7071067811865476 ], "wc_summary_avg": [ 75.5, 22.74313083108832 ], "wc_strengths_avg": [ 61.75, 24.732316915323562 ], "wc_weaknesses_avg": [ 116.75, 77.62852246436229 ], "wc_questions_avg": [ 64.75, 84.24777445131711 ], "wc_limitations_avg": [ 25.25, 14.48059045757458 ], "wc_review_avg": [ 344.0, 110.1748610164769 ], "wc_reply_reviewers_avg": [ 49.0, 63.41135544995076 ], "wc_reply_authors_avg": [ 340.25, 284.81342577203066 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 4.0, 1.224744871391589 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.8660254037844386, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17359303208106259705&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "microsoft.com;pku.edu.cn;microsoft.com;microsoft.com;microsoft.com;pku.edu.cn", "author_num": 6, "aff_unique_index": "0;1;0;0;0;1", "aff_unique_norm": "Microsoft;Peking University", "aff_unique_dep": "Microsoft Corporation;", "aff_unique_url": "https://www.microsoft.com;http://www.pku.edu.cn", "aff_unique_abbr": "Microsoft;Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0;1", "aff_country_unique": "United States;China" }, { "id": "zeNwOAcb4q", "title": "Estimating Transition Matrix with Diffusion Models for Instance-Dependent Label Noise", "track": "main", "status": "Reject", "tldr": "", "abstract": "Learning with noisy labels is a common problem in weakly supervised learning, where the transition matrix approach is a prevalent method for dealing with label noise. It estimates the transition probabilities from a clean label distribution to a noisy label distribution and has garnered continuous attention. However, existing transition matrix methods predominantly focus on class-dependent noise, making it challenging to incorporate feature information for learning instance-dependent label noise. This paper proposes the idea of using diffusion models for estimating transition matrix in the context of instance-dependent label noise. Specifically, we first estimate grouped transition matrices through clustering. Then, we introduce a process of adding noise and denoising with the transition matrix, incorporating features extracted by unsupervised pre-trained models. The proposed method enables the estimation of instance-dependent transition matrix and extends the application of transition matrix method to a broader range of noisy label data. Experimental results demonstrate the significant effectiveness of our approach on both synthetic and real-world datasets with instance-dependent noise. The code will be open sourced upon acceptance of the paper.", "keywords": "Transition Matrix;Label Noise;Diffusion Models", "primary_area": "diffusion_based_models", "supplementary_material": "", "author": "Haixin Yang;Ruirui Li;Xiangzhong Fang;Yukun Yang;Naihao Wang", "authorids": "~Haixin_Yang1;~Ruirui_Li2;~Xiangzhong_Fang2;~Yukun_Yang3;~Naihao_Wang2", "gender": "M;F;M;M;M", "homepage": "https://github.com/DrawFlatbread/xin;;https://www.math.pku.edu.cn/jsdw/js_20180628175159671361/f_20180628175159671361/69902.htm;https://github.com/Tyrantyyk;https://github.com/wangnaihao", "dblp": ";;;;", "google_scholar": ";Tg_SltkAAAAJ;;;", "orcid": ";;;;", "linkedin": ";;;;", "or_profile": "~Haixin_Yang1;~Ruirui_Li2;~Xiangzhong_Fang2;~Yukun_Yang3;~NaiHao_Wang1", "aff": "Peking University;Beijing University of Chemical Technology;School of mathematical Science, Peking University, Peking University;Beijing University of Chemical Technology;Beijing University of Chemical Technology", "aff_domain": "pku.edu.cn;buct.edu.cn;math.pku.edu.cn;buct.edu.cn;buct.edu.cn", "position": "PhD student;Full Professor;Full Professor;Undergrad student;MS student", "bibtex": "@misc{\nanonymous2024estimating,\ntitle={Estimating Transition Matrix with Diffusion Models for Instance-Dependent Label Noise},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=zeNwOAcb4q}\n}", "github": "", "project": "", "reviewers": "87Qq;YAFq;s31f", "site": "https://openreview.net/forum?id=zeNwOAcb4q", "pdf_size": 361380, "rating": "2;3;3", "confidence": "4;4;5", "soundness": "1;1;1", "novelty": "1;2;1", "presentation": "1;2;2", "wc_summary": "75;94;82", "wc_strengths": "9;33;11", "wc_weaknesses": "262;291;21", "wc_questions": "5;12;165", "wc_limitations": "22;5;4", "wc_review": "373;435;283", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 2.6666666666666665, 0.4714045207910317 ], "confidence_avg": [ 4.333333333333333, 0.4714045207910317 ], "soundness_avg": [ 1.0, 0.0 ], "novelty_avg": [ 1.3333333333333333, 0.4714045207910317 ], "presentation_avg": [ 1.6666666666666667, 0.4714045207910317 ], "wc_summary_avg": [ 83.66666666666667, 7.845734863959881 ], "wc_strengths_avg": [ 17.666666666666668, 10.873004286866728 ], "wc_weaknesses_avg": [ 191.33333333333334, 121.02433180517416 ], "wc_questions_avg": [ 60.666666666666664, 73.8301353709235 ], "wc_limitations_avg": [ 10.333333333333334, 8.259674462242577 ], "wc_review_avg": [ 363.6666666666667, 62.403703593794994 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 4, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.4999999999999999, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:DhdkKqGR10EJ:scholar.google.com/&scioq=Estimating+Transition+Matrix+with+Diffusion+Models+for+Instance-Dependent+Label+Noise&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;1;0;1;1", "aff_unique_norm": "Peking University;Beijing University of Chemical Technology", "aff_unique_dep": ";", "aff_unique_url": "http://www.pku.edu.cn;http://www.buct.edu.cn", "aff_unique_abbr": "Peking U;BUCT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Peking", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Exploring Context Window of Large Language Models via Decomposed Positional Vectors", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92943", "id": "zeYyq0GpXO", "proceeding": "", "pdf": "https://openreview.net/pdf?id=zeYyq0GpXO", "openreview": "https://openreview.net/forum?id=zeYyq0GpXO", "poster": "/media/PosterPDFs/NeurIPS%202024/92943.png?t=1730896535.2910097", "project": "", "author_site": "Zican Dong, Junyi Li, Xin Men, Xin Zhao, Bingning Wang, Zhen Tian, weipeng chen, Ji-Rong Wen", "tldr": "", "abstract": "Transformer-based large language models (LLMs) typically have a limited context window, resulting in significant performance degradation when processing text beyond the length of the context window. Extensive studies have been proposed to extend the context window and achieve length extrapolation of LLMs, but there is still a lack of in-depth interpretation of these approaches. In this study, we explore the positional information within and beyond the context window for deciphering the underlying mechanism of LLMs. By using a mean-based decomposition method, we disentangle positional vectors from hidden states of LLMs and analyze their formation and effect on attention. Furthermore, when texts exceed the context window, we analyze the change of positional vectors in two settings, i.e., direct extrapolation and context window extension. Based on our findings, we design two training-free context window extension methods, positional vector replacement and attention window extension. Experimental results show that our methods can effectively extend the context window length.", "keywords": "Positional Vector;Context Window;Large Language Model;Length Extrapolation;Context Window Extension", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "zican Dong;Junyi Li;Xin Men;Xin Zhao;Bingning Wang;Zhen Tian;weipeng chen;Ji-Rong Wen", "authorids": "~zican_Dong1;~Junyi_Li4;~Xin_Men1;~Xin_Zhao10;~Bingning_Wang3;~Zhen_Tian1;~weipeng_chen2;~Ji-Rong_Wen1", "gender": "M;M;M;M;M;M;M;M", "homepage": ";http://lijunyi.tech;;https://gsai.ruc.edu.cn/addons/teacher/index/info.html?user_id=5&ruccode=20140041&ln=cn;;https://www.tianzhen.xyz;;https://gsai.ruc.edu.cn/english/jrwen", "dblp": "336/7105;;;https://dblp.uni-trier.de/pid/52/8700.html;183/0924;84/8525-1;;w/JRWen", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;zeWrn-4AAAAJ;;JNhNacoAAAAJ;wW00MTkAAAAJ;MBDadZUAAAAJ;https://scholar.google.com.hk/citations?view_op=list_works;tbxCHJgAAAAJ", "orcid": ";;;0000-0002-8333-6196;;0000-0001-5569-2591;;0000-0002-9777-9676", "linkedin": ";;;;;;;", "or_profile": "~zican_Dong1;~Junyi_Li4;~Xin_Men1;~Xin_Zhao10;~Bingning_Wang3;~Zhen_Tian1;~weipeng_chen2;~Ji-Rong_Wen1", "aff": "Renmin University of China;Renmin University of China;;Renmin University of China;Beijing Baichuan Intelligence Technology Co., Ltd.;Renmin University of China;Beijing Baichuan Intelligence Technology Co., Ltd.;Renmin University of China", "aff_domain": "ruc.edu.cn;ruc.edu.cn;;ruc.edu.cn;baichuan-ai.com;ruc.edu.cn;baichuan-ai.com;ruc.edu.cn", "position": "PhD student;PhD student;;Full Professor;Researcher;MS student;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\ndong2024exploring,\ntitle={Exploring Context Window of Large Language Models via Decomposed Positional Vectors},\nauthor={zican Dong and Junyi Li and Xin Men and Xin Zhao and Bingning Wang and Zhen Tian and weipeng chen and Ji-Rong Wen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=zeYyq0GpXO}\n}", "github": "", "reviewers": "yDA4;1sVW;R24u", "pdf_size": 11771636, "rating": "6;7;7", "confidence": "4;3;4", "soundness": "2;3;4", "novelty": "3;3;4", "presentation": "3;4;3", "wc_summary": "49;40;58", "wc_strengths": "29;54;74", "wc_weaknesses": "16;15;356", "wc_questions": "159;9;5", "wc_limitations": "2;10;5", "wc_review": "255;128;498", "wc_reply_reviewers": "262;4;0", "wc_reply_authors": "665;0;0", "reply_reviewers": "3;1;0", "reply_authors": "3;1;1", "rating_avg": [ 6.666666666666667, 0.4714045207910317 ], "confidence_avg": [ 3.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.816496580927726 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 49.0, 7.3484692283495345 ], "wc_strengths_avg": [ 52.333333333333336, 18.408935028645434 ], "wc_weaknesses_avg": [ 129.0, 160.51375849648113 ], "wc_questions_avg": [ 57.666666666666664, 71.67209281783873 ], "wc_limitations_avg": [ 5.666666666666667, 3.2998316455372216 ], "wc_review_avg": [ 293.6666666666667, 153.50642404219514 ], "wc_reply_reviewers_avg": [ 88.66666666666667, 122.5760534887989 ], "wc_reply_authors_avg": [ 221.66666666666666, 313.4840063260361 ], "reply_reviewers_avg": [ 1.3333333333333333, 1.247219128924647 ], "reply_authors_avg": [ 1.6666666666666667, 0.9428090415820634 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12798926848599562176&as_sdt=80000005&sciodt=0,23&hl=en", "gs_version_total": 5, "email": "ruc.edu.cn;ruc.edu.cn;;ruc.edu.cn;baichuan-ai.com;ruc.edu.cn;baichuan-ai.com;ruc.edu.cn", "author_num": 8, "aff_unique_index": "0;0;0;1;0;1;0", "aff_unique_norm": "Renmin University of China;Beijing Baichuan Intelligence Technology Co., Ltd.", "aff_unique_dep": ";", "aff_unique_url": "http://www.ruc.edu.cn;", "aff_unique_abbr": "RUC;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "SeeClear: Semantic Distillation Enhances Pixel Condensation for Video Super-Resolution", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92942", "id": "zeaBrGv7Ll", "proceeding": "", "pdf": "https://openreview.net/pdf?id=zeaBrGv7Ll", "openreview": "https://openreview.net/forum?id=zeaBrGv7Ll", "poster": "/media/PosterPDFs/NeurIPS%202024/92942.png?t=1731847800.0847738", "project": "", "author_site": "Qi Tang, Yao Zhao, Meiqin Liu, Chao Yao", "tldr": "", "abstract": "Diffusion-based Video Super-Resolution (VSR) is renowned for generating perceptually realistic videos, yet it grapples with maintaining detail consistency across frames due to stochastic fluctuations. The traditional approach of pixel-level alignment is ineffective for diffusion-processed frames because of iterative disruptions. To overcome this, we introduce SeeClear--a novel VSR framework leveraging conditional video generation, orchestrated by instance-centric and channel-wise semantic controls. This framework integrates a Semantic Distiller and a Pixel Condenser, which synergize to extract and upscale semantic details from low-resolution frames. The Instance-Centric Alignment Module (InCAM) utilizes video-clip-wise tokens to dynamically relate pixels within and across frames, enhancing coherency. Additionally, the Channel-wise Texture Aggregation Memory (CaTeGory) infuses extrinsic knowledge, capitalizing on long-standing semantic textures. Our method also innovates the blurring diffusion process with the ResShift mechanism, finely balancing between sharpness and diffusion effects. Comprehensive experiments confirm our framework's advantage over state-of-the-art diffusion-based VSR techniques.", "keywords": "Video Super-Resolution;Diffusion Model", "primary_area": "machine_vision", "supplementary_material": "", "author": "Qi Tang;Yao Zhao;Meiqin Liu;Chao Yao", "authorids": "~Qi_Tang3;~Yao_Zhao1;~Meiqin_Liu1;~Chao_Yao1", "gender": "M;M;F;M", "homepage": "http://tang5618.com;http://mepro.bjtu.edu.cn;http://faculty.bjtu.edu.cn/8002/;https://yaochao1986.github.io/", "dblp": ";45/2091-1.html;;", "google_scholar": "TZQUv1MAAAAJ;474TbQYAAAAJ;;5FpD9m0AAAAJ", "orcid": ";;;0000-0001-5483-3225", "linkedin": ";;;", "or_profile": "~Qi_Tang3;~Yao_Zhao1;~Meiqin_Liu1;~Chao_Yao1", "aff": "Beijing Jiaotong University;Beijing Jiaotong University;Beijing Jiaotong University;University of Science and Technology Beijing", "aff_domain": "bjtu.edu.cn;bjtu.edu.cn;bjtu.edu.cn;ustb.edu.cn", "position": "MS student;Full Professor;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\ntang2024seeclear,\ntitle={SeeClear: Semantic Distillation Enhances Pixel Condensation for Video Super-Resolution},\nauthor={Qi Tang and Yao Zhao and Meiqin Liu and Chao Yao},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=zeaBrGv7Ll}\n}", "github": "", "reviewers": "NXC1;srYj;ERJ1;vLVk", "pdf_size": 30999271, "rating": "3;5;6;6", "confidence": "5;4;4;4", "soundness": "2;2;2;3", "novelty": "2;3;3;3", "presentation": "2;3;3;3", "wc_summary": "97;70;52;72", "wc_strengths": "59;59;91;12", "wc_weaknesses": "65;91;106;431", "wc_questions": "99;36;72;185", "wc_limitations": "6;7;9;10", "wc_review": "326;263;330;710", "wc_reply_reviewers": "0;12;62;103", "wc_reply_authors": "0;0;0;1204", "reply_reviewers": "0;1;1;2", "reply_authors": "1;1;1;4", "rating_avg": [ 5.0, 1.224744871391589 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 72.75, 16.021469970012117 ], "wc_strengths_avg": [ 55.25, 28.181332473820326 ], "wc_weaknesses_avg": [ 173.25, 149.53323209240145 ], "wc_questions_avg": [ 98.0, 54.97726802961384 ], "wc_limitations_avg": [ 8.0, 1.5811388300841898 ], "wc_review_avg": [ 407.25, 176.8012655497692 ], "wc_reply_reviewers_avg": [ 44.25, 41.12405014100629 ], "wc_reply_authors_avg": [ 301.0, 521.347293078232 ], "reply_reviewers_avg": [ 1.0, 0.7071067811865476 ], "reply_authors_avg": [ 1.75, 1.299038105676658 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.9428090415820632, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16737891935691686009&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "email": "bjtu.edu.cn;bjtu.edu.cn;bjtu.edu.cn;ustb.edu.cn", "author_num": 4, "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Beijing Jiao Tong University;University of Science and Technology Beijing", "aff_unique_dep": ";", "aff_unique_url": "http://www.njtu.edu.cn/en;http://www.ustb.edu.cn", "aff_unique_abbr": "BJTU;USTB", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "XLand-MiniGrid: Scalable Meta-Reinforcement Learning Environments in JAX", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97425", "id": "zg8dpAGl1I", "proceeding": "", "pdf": "https://openreview.net/pdf?id=zg8dpAGl1I", "openreview": "https://openreview.net/forum?id=zg8dpAGl1I", "poster": "/media/PosterPDFs/NeurIPS%202024/97425.png?t=1733819927.1396108", "project": "", "author_site": "Alexander Nikulin, Vladislav Kurenkov, Ilya Zisman, Artem Agarkov, Viacheslav Sinii, Sergey Kolesnikov", "tldr": "", "abstract": "Inspired by the diversity and depth of XLand and the simplicity and minimalism of MiniGrid, we present XLand-MiniGrid, a suite of tools and grid-world environments for meta-reinforcement learning research. Written in JAX, XLand-MiniGrid is designed to be highly scalable and can potentially run on GPU or TPU accelerators, democratizing large-scale experimentation with limited resources. Along with the environments, XLand-MiniGrid provides pre-sampled benchmarks with millions of unique tasks of varying difficulty and easy-to-use baselines that allow users to quickly start training adaptive agents. In addition, we have conducted a preliminary analysis of scaling and generalization, showing that our baselines are capable of reaching millions of steps per second during training and validating that the proposed benchmarks are challenging. XLand-MiniGrid is open-source and available at \\url{https://github.com/corl-team/xland-minigrid}.", "keywords": "reinforcement learning;meta-reinforcement learning;jax accelerated environments;xland", "primary_area": "", "supplementary_material": "", "author": "Alexander Nikulin;Vladislav Kurenkov;Ilya Zisman;Artem Sergeevich Agarkov;Viacheslav Sinii;Sergey Kolesnikov", "authorids": "~Alexander_Nikulin1;~Vladislav_Kurenkov1;~Ilya_Zisman1;~Artem_Sergeevich_Agarkov2;~Viacheslav_Sinii1;~Sergey_Kolesnikov1", "gender": "M;M;Not Specified;M;M;M", "homepage": "https://howuhh.github.io/;https://vkurenkov.me;https://zis.mn/;;https://t.me/identiki_t;https://scitator.com", "dblp": "314/6349;251/9126;;;351/7957;191/1945", "google_scholar": "yACvnqUAAAAJ;w09vtVsAAAAJ;tmh78sQAAAAJ;;IO-blf8AAAAJ;iukbpVEAAAAJ", "orcid": ";0000-0003-4078-1086;;;;", "linkedin": ";;suessmann/;artem-agarkov/;;scitator/", "or_profile": "~Alexander_Nikulin1;~Vladislav_Kurenkov1;~Ilya_Zisman1;~Artem_Sergeevich_Agarkov2;~Viacheslav_Sinii1;~Sergey_Kolesnikov1", "aff": "Moscow Institute of Physics and Technology;Tinkoff;Higher School of Economics;Moscow Institute of Physics and Technology;Innopolis University;Tinkoff", "aff_domain": "mipt.edu;tinkoff.ai;hse.ru;mipt.ru;innopolis.ru;tinkoff.ru", "position": "PhD student;Researcher;MS student;Undergrad student;Undergrad student;Principal Researcher", "bibtex": "@inproceedings{\nnikulin2024xlandminigrid,\ntitle={{XL}and-MiniGrid: Scalable Meta-Reinforcement Learning Environments in {JAX}},\nauthor={Alexander Nikulin and Vladislav Kurenkov and Ilya Zisman and Artem Sergeevich Agarkov and Viacheslav Sinii and Sergey Kolesnikov},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=zg8dpAGl1I}\n}", "github": "", "reviewers": "vqDw;788o;R62J;Q9K1", "pdf_size": 1345645, "rating": "5;6;7;8", "confidence": "5;4;4;4", "wc_summary_and_contributions": "53;44;82;52", "wc_strengths": "2;28;75;4", "wc_improvement": "2;66;266;4", "wc_limitations": "2;40;66;67", "wc_correctness": "2;11;58;42", "wc_clarity": "2;8;7;4", "wc_relation_to_prior_work": "2;16;92;13", "wc_documentation": "1;9;24;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "67;223;671;188", "wc_reply_reviewers": "0;316;410;43", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;4;1;1", "reply_authors": "2;3;1;1", "rating_avg": [ 6.5, 1.118033988749895 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 57.75, 14.428704030508076 ], "wc_strengths_avg": [ 27.25, 29.40556920040828 ], "wc_improvement_avg": [ 84.5, 107.9015755213982 ], "wc_limitations_avg": [ 43.75, 26.423237878806603 ], "wc_correctness_avg": [ 28.25, 22.69774217846348 ], "wc_clarity_avg": [ 5.25, 2.384848003542364 ], "wc_relation_to_prior_work_avg": [ 30.75, 35.74475485997911 ], "wc_documentation_avg": [ 8.75, 9.390819985496474 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 287.25, 228.99385908796768 ], "wc_reply_reviewers_avg": [ 192.25, 174.61726002889864 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.5, 1.5 ], "reply_authors_avg": [ 1.75, 0.82915619758885 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.7745966692414834, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5901798189621569908&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "email": "mipt.edu;tinkoff.ai;hse.ru;mipt.ru;innopolis.ru;tinkoff.ru", "author_num": 6, "aff_unique_index": "0;1;2;0;3;1", "aff_unique_norm": "Moscow Institute of Physics and Technology;Tinkoff Bank;Higher School of Economics;Innopolis University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.mipt.ru/en;https://www.tinkoff.ru;https://www.hse.ru;https://innopolis.ru/en", "aff_unique_abbr": "MIPT;Tinkoff;HSE;Innopolis", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "Russian Federation" }, { "title": "Point Cloud Matters: Rethinking the Impact of Different Observation Spaces on Robot Learning", "status": "Poster", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97424", "id": "zgSnSZ0Re6", "proceeding": "", "pdf": "https://openreview.net/pdf?id=zgSnSZ0Re6", "openreview": "https://openreview.net/forum?id=zgSnSZ0Re6", "poster": "/media/PosterPDFs/NeurIPS%202024/97424.png?t=1731585324.6755955", "project": "", "author_site": "Haoyi Zhu, Yating Wang, Di Huang, Weicai Ye, Wanli Ouyang, Tong He", "tldr": "", "abstract": "In robot learning, the observation space is crucial due to the distinct characteristics of different modalities, which can potentially become a bottleneck alongside policy design. In this study, we explore the influence of various observation spaces on robot learning, focusing on three predominant modalities: RGB, RGB-D, and point cloud. We introduce OBSBench, a benchmark comprising two simulators and 125 tasks, along with standardized pipelines for various encoders and policy baselines. Extensive experiments on diverse contact-rich manipulation tasks reveal a notable trend: point cloud-based methods, even those with the simplest designs, frequently outperform their RGB and RGB-D counterparts. This trend persists in both scenarios: training from scratch and utilizing pre-training. Furthermore, our findings demonstrate that point cloud observations often yield better policy performance and significantly stronger generalization capabilities across various geometric and visual conditions. These outcomes suggest that the 3D point cloud is a valuable observation modality for intricate robotic tasks. We also suggest that incorporating both appearance and coordinate information can enhance the performance of point cloud methods. We hope our work provides valuable insights and guidance for designing more generalizable and robust robotic models.", "keywords": "Point Cloud;RGB-D images;Robot Learning;Observation Space;Pre-trained Visual Representations;Robot Manipulation", "primary_area": "", "supplementary_material": "", "author": "Haoyi Zhu;Yating Wang;Di Huang;Weicai Ye;Wanli Ouyang;Tong He", "authorids": "~Haoyi_Zhu1;~Yating_Wang3;~Di_Huang6;~Weicai_Ye3;~Wanli_Ouyang1;~Tong_He2", "gender": "M;F;;M;;M", "homepage": "https://www.haoyizhu.site/;https://github.com/xiaoxiao0406;;https://ywcmaike.github.io/;;http://tonghe90.github.io/", "dblp": ";;;02/10372;;02/1554-1", "google_scholar": "pD1NOyUAAAAJ;https://scholar.google.com/citations?hl=zh-CN;;https://scholar.google.com/citations?hl=zh-CN;;kWADCMUAAAAJ", "orcid": ";;;0000-0001-6215-1347;;0000-0003-2772-9320", "linkedin": ";;;weicai-ye-b9b36b129/;;", "or_profile": "~Haoyi_Zhu1;~Yating_Wang3;~Di_Huang6;~Weicai_Ye3;~Wanli_Ouyang1;~Tong_He2", "aff": "University of Science and Technology of China;Northwest Polytechnical University Xi'an;;Zhejiang University;;Shanghai AI lab", "aff_domain": "ustc.edu.cn;nwpu.edu.cn;;zju.edu.cn;;pjlab.org.cn", "position": "PhD student;Undergrad student;;PhD student;;Researcher", "bibtex": "@inproceedings{\nzhu2024point,\ntitle={Point Cloud Matters: Rethinking the Impact of Different Observation Spaces on Robot Learning},\nauthor={Haoyi Zhu and Yating Wang and Di Huang and Weicai Ye and Wanli Ouyang and Tong He},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=zgSnSZ0Re6}\n}", "github": "", "reviewers": "sM5n;jcCF;r3vY;EBqC", "pdf_size": 0, "rating": "6;7;7;8", "confidence": "4;3;4;4", "wc_summary_and_contributions": "126;59;64;29", "wc_strengths": "121;5;57;94", "wc_improvement": "322;5;74;106", "wc_limitations": "9;5;21;2", "wc_correctness": "4;5;1;1", "wc_clarity": "1;5;6;1", "wc_relation_to_prior_work": "1;5;1;1", "wc_documentation": "1;5;1;1", "wc_additional_feedback": "1;1;1;1", "wc_review": "586;95;226;236", "wc_reply_reviewers": "14;38;44;0", "wc_reply_authors": "89;62;89;32", "reply_reviewers": "1;1;1;0", "reply_authors": "4;2;4;2", "rating_avg": [ 7.0, 0.7071067811865476 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "wc_summary_and_contributions_avg": [ 69.5, 35.259750424527965 ], "wc_strengths_avg": [ 69.25, 43.49928160326329 ], "wc_improvement_avg": [ 126.75, 118.48918727040034 ], "wc_limitations_avg": [ 9.25, 7.224091638399945 ], "wc_correctness_avg": [ 2.75, 1.7853571071357126 ], "wc_clarity_avg": [ 3.25, 2.277608394786075 ], "wc_relation_to_prior_work_avg": [ 2.0, 1.7320508075688772 ], "wc_documentation_avg": [ 2.0, 1.7320508075688772 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 285.75, 182.05819811258158 ], "wc_reply_reviewers_avg": [ 24.0, 17.832554500127006 ], "wc_reply_authors_avg": [ 68.0, 23.526580712037184 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 3.0, 1.0 ], "replies_avg": [ 24, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5282509461129368320&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 5, "email": "ustc.edu.cn;nwpu.edu.cn;;zju.edu.cn;;pjlab.org.cn", "author_num": 6, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "University of Science and Technology of China;Northwest Polytechnical University;Zhejiang University;Shanghai AI Lab", "aff_unique_dep": ";;;AI Research", "aff_unique_url": "http://www.ustc.edu.cn;http://www.nwpu.edu.cn;https://www.zju.edu.cn;https://www.shanghaiailab.com", "aff_unique_abbr": "USTC;NWPU;ZJU;Shanghai AI Lab", "aff_campus_unique_index": "1", "aff_campus_unique": ";Xi'an", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Learning the Optimal Policy for Balancing Short-Term and Long-Term Rewards", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92941", "id": "zgh0ChWocO", "proceeding": "", "pdf": "https://openreview.net/pdf?id=zgh0ChWocO", "openreview": "https://openreview.net/forum?id=zgh0ChWocO", "poster": "/media/PosterPDFs/NeurIPS%202024/92941.png?t=1731330259.967758", "project": "", "author_site": "Qinwei Yang, Xueqing Liu, Yan Zeng, Ruocheng Guo, Yang Liu, Peng Wu", "tldr": "", "abstract": "Learning the optimal policy to balance multiple short-term and long-term rewards has extensive applications across various domains. Yet, there is a noticeable scarcity of research addressing policy learning strategies in this context. In this paper, we aim to learn the optimal policy capable of effectively balancing multiple short-term and long-term rewards, especially in scenarios where the long-term outcomes are often missing due to data collection challenges over extended periods. Towards this goal, the conventional linear weighting method, which aggregates multiple rewards into a single surrogate reward through weighted summation, can only achieve sub-optimal policies when multiple rewards are related. Motivated by this, we propose a novel decomposition-based policy learning (DPPL) method that converts the whole problem into subproblems. The DPPL method is capable of obtaining optimal policies even when multiple rewards are interrelated. Nevertheless, the DPPL method requires a set of preference vectors specified in advance, posing challenges in practical applications where selecting suitable preferences is non-trivial. To mitigate this, we further theoretically transform the optimization problem in DPPL into an $\\varepsilon$-constraint problem, where $\\varepsilon$ represents the minimum acceptable levels of other rewards while maximizing one reward. This transformation provides intuitive into the selection of preference vectors. Extensive experiments are conducted on the proposed method and the results validate the effectiveness of the method.", "keywords": "Policy Learning;Short-Term and Long-Term Rewards;Causal Inference;Decision Making", "primary_area": "causal_inference", "supplementary_material": "/attachment/bae89354816bc00b1a6aa0b9272a42402039a7d1.zip", "author": "Qinwei Yang;Xueqing Liu;Yan Zeng;Ruocheng Guo;Yang Liu;Peng Wu", "authorids": "~Qinwei_Yang2;~Xueqing_Liu6;~Yan_Zeng2;~Ruocheng_Guo1;~Yang_Liu3;~Peng_Wu5", "gender": "M;;M;M;M;", "homepage": ";https://scholar.google.com/citations?user=XyxLHCAAAAAJ&hl=zh-CN;https://rguo12.github.io;http://www.yliuu.com;https://pengwu.site/;", "dblp": ";83/4665-2;167/4378;51/3710-18;15/6146-12;", "google_scholar": ";XyxLHCAAAAAJ;8Nuj8NwAAAAJ;jKrIVCIAAAAJ;https://scholar.google.com/citations?view_op=list_works;", "orcid": "0009-0004-4754-8994;0000-0001-7721-2560;;0000-0001-8420-6011;0000-0001-7154-8880;0009-0005-7155-7011", "linkedin": ";;;;;", "or_profile": "~Qinwei_Yang2;~Yan_Zeng2;~Ruocheng_Guo1;~Yang_Liu3;~Peng_Wu5;~XueqingLiu2", "aff": "Beijing Technology and Business University;Beijing Technology and Business University;Bytedance Research;University of California, Santa Cruz;Beijing Technology and Business University;Beijing Technology and Business University", "aff_domain": "btbu.edu.cn;btbu.edu.cn;bytedance.com;ucsc.edu;btbu.edu.cn;btbu.edu.cn", "position": "MS student;Lecturer;Researcher;Assistant Professor;Associate Professor;MS student", "bibtex": "@inproceedings{\nyang2024learning,\ntitle={Learning the Optimal Policy for Balancing Short-Term and Long-Term Rewards},\nauthor={Qinwei Yang and Xueqing Liu and Yan Zeng and Ruocheng Guo and Yang Liu and Peng Wu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=zgh0ChWocO}\n}", "github": "", "reviewers": "hVvL;xruw;B23s;ouKL", "pdf_size": 649847, "rating": "4;5;6;6", "confidence": "2;1;3;3", "soundness": "3;3;3;3", "novelty": "3;3;3;3", "presentation": "3;3;3;3", "wc_summary": "109;60;104;99", "wc_strengths": "70;38;75;101", "wc_weaknesses": "347;37;115;93", "wc_questions": "5;4;92;47", "wc_limitations": "1;1;20;2", "wc_review": "532;140;406;342", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "57;57;57;57", "reply_reviewers": "0;0;0;0", "reply_authors": "2;2;2;2", "rating_avg": [ 5.25, 0.82915619758885 ], "confidence_avg": [ 2.25, 0.82915619758885 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 93.0, 19.3778223750761 ], "wc_strengths_avg": [ 71.0, 22.39419567655869 ], "wc_weaknesses_avg": [ 148.0, 118.35962149314267 ], "wc_questions_avg": [ 37.0, 36.18701424544446 ], "wc_limitations_avg": [ 6.0, 8.093207028119323 ], "wc_review_avg": [ 355.0, 141.70744511139844 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 57.0, 0.0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.6363636363636364, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:QdE7b3bxs0YJ:scholar.google.com/&scioq=Learning+the+Optimal+Policy+for+Balancing+Short-Term+and+Long-Term+Rewards&hl=en&as_sdt=0,5", "gs_version_total": 2, "email": "btbu.edu.cn;btbu.edu.cn;bytedance.com;ucsc.edu;btbu.edu.cn;btbu.edu.cn", "author_num": 6, "aff_unique_index": "0;0;1;2;0;0", "aff_unique_norm": "Beijing Technology and Business University;ByteDance;University of California, Santa Cruz", "aff_unique_dep": ";Bytedance Research;", "aff_unique_url": "http://www.btbu.edu.cn;https://www.bytedance.com;https://www.ucsc.edu", "aff_unique_abbr": "BTBU;Bytedance;UCSC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Santa Cruz", "aff_country_unique_index": "0;0;0;1;0;0", "aff_country_unique": "China;United States" }, { "title": "Entrywise error bounds for low-rank approximations of kernel matrices", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92940", "id": "ziYC4FHRNr", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ziYC4FHRNr", "openreview": "https://openreview.net/forum?id=ziYC4FHRNr", "poster": "", "project": "", "tldr": "", "abstract": "In this paper, we derive *entrywise* error bounds for low-rank approximations of kernel matrices obtained using the truncated eigen-decomposition (or singular value decomposition). While this approximation is well-known to be optimal with respect to the spectral and Frobenius norm error, little is known about the statistical behaviour of individual entries. Our error bounds fill this gap. A key technical innovation is a delocalisation result for the eigenvectors of the kernel matrix corresponding to small eigenvalues, which takes inspiration from the field of Random Matrix Theory. Finally, we validate our theory with an empirical study of a collection of synthetic and real-world datasets.", "keywords": "low-rank approximation;kernel methods;SVD;theory;error bounds", "primary_area": "learning_theory", "supplementary_material": "", "author": "Alexander Modell", "authorids": "~Alexander_Modell1", "gender": "M", "homepage": "http://alexandermodell.github.io/", "dblp": "", "google_scholar": "https://scholar.google.com/citations?hl=en", "orcid": "0000-0002-0074-8145", "linkedin": "", "or_profile": "~Alexander_Modell1", "aff": "Imperial College London", "aff_domain": "imperial.ac.uk", "position": "Postdoc", "bibtex": "@inproceedings{\nmodell2024entrywise,\ntitle={Entrywise error bounds for low-rank approximations of kernel matrices},\nauthor={Alexander Modell},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ziYC4FHRNr}\n}", "github": "", "reviewers": "3p4Z;tJPJ;2JjA", "pdf_size": 541598, "rating": "5;5;6", "confidence": "3;4;3", "soundness": "3;2;3", "novelty": "2;3;3", "presentation": "3;4;3", "wc_summary": "48;80;97", "wc_strengths": "62;26;76", "wc_weaknesses": "192;117;114", "wc_questions": "73;86;328", "wc_limitations": "9;11;6", "wc_review": "384;320;621", "wc_reply_reviewers": "62;52;10", "wc_reply_authors": "315;117;0", "reply_reviewers": "1;1;1", "reply_authors": "2;2;1", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 3.3333333333333335, 0.4714045207910317 ], "soundness_avg": [ 2.6666666666666665, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.4714045207910317 ], "wc_summary_avg": [ 75.0, 20.314198646923455 ], "wc_strengths_avg": [ 54.666666666666664, 21.06075866524175 ], "wc_weaknesses_avg": [ 141.0, 36.08323710533743 ], "wc_questions_avg": [ 162.33333333333334, 117.26418416928884 ], "wc_limitations_avg": [ 8.666666666666666, 2.0548046676563256 ], "wc_review_avg": [ 441.6666666666667, 129.47157560209456 ], "wc_reply_reviewers_avg": [ 41.333333333333336, 22.528993664954402 ], "wc_reply_authors_avg": [ 144.0, 130.0076920801227 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6666666666666667, 0.4714045207910317 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 1, 0 ], "corr_rating_confidence": -0.4999999999999999, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13906514978757452966&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "imperial.ac.uk", "author_num": 1, "aff_unique_index": "0", "aff_unique_norm": "Imperial College London", "aff_unique_dep": "", "aff_unique_url": "https://www.imperial.ac.uk", "aff_unique_abbr": "ICL", "aff_country_unique_index": "0", "aff_country_unique": "United Kingdom" }, { "title": "Enhancing Robustness of Graph Neural Networks on Social Media with Explainable Inverse Reinforcement Learning", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92939", "id": "ziehA15y8k", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ziehA15y8k", "openreview": "https://openreview.net/forum?id=ziehA15y8k", "poster": "/media/PosterPDFs/NeurIPS%202024/92939.png?t=1731628917.6664598", "project": "", "author_site": "Yuefei Lyu, Chaozhuo Li, Sihong Xie, Xi Zhang", "tldr": "", "abstract": "Adversarial attacks against graph neural networks (GNNs) through perturbations of the graph structure are increasingly common in social network tasks like rumor detection. Social media platforms capture diverse attack sequence samples through both machine and manual screening processes. Investigating effective ways to leverage these adversarial samples to enhance robustness is imperative. We improve the maximum entropy inverse reinforcement learning (IRL) method with the mixture-of-experts approach to address multi-source graph adversarial attacks. This method reconstructs the attack policy, integrating various attack models and providing feature-level explanations, subsequently generating additional adversarial samples to fortify the robustness of detection models. We develop precise sample guidance and a bidirectional update mechanism to reduce the deviation caused by imprecise feature representation and negative sampling within the large action space of social graphs, while also accelerating policy learning. We take rumor detector as an example targeted GNN model on real-world rumor datasets. By utilizing a small subset of samples generated by various graph adversarial attack methods, we reconstruct the attack policy, closely approximating the performance of the original attack method. We validate that samples generated by the learned policy enhance model robustness through adversarial training and data augmentation.", "keywords": "graph adverisarial attack;reinforcement learning;inverse reinforcement learning;graph neural networks", "primary_area": "graph_neural_networks", "supplementary_material": "", "author": "Yuefei Lyu;Chaozhuo Li;Sihong Xie;Xi Zhang", "authorids": "~Yuefei_Lyu1;~Chaozhuo_Li1;~Sihong_Xie1;~Xi_Zhang12", "gender": "F;;M;M", "homepage": "https://www.researchgate.net/profile/Yuefei-Lyu;https://scss.bupt.edu.cn/info/1063/5534.htm;https://sihongxie.github.io/index.html;https://www.linkedin.com/in/xi-zhang-a1128b51/", "dblp": ";316/1269.html;67/1229;87/1222-8", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;qRp1xZwAAAAJ;6sRtx0cAAAAJ", "orcid": ";0000-0002-8179-7503;0000-0003-1060-8506;0000-0002-2111-7385", "linkedin": ";;;", "or_profile": "~Yuefei_Lyu1;~Chaozhuo_Li1;~Sihong_Xie1;~Xi_Zhang12", "aff": "Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;HKUST-GZ;Beijing University of Posts and Telecommunications", "aff_domain": "bupt.edu.cn;bupt.edu.cn;hkust-gz.edu.cn;bupt.edu.cn", "position": "PhD student;Associate Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nlyu2024enhancing,\ntitle={Enhancing Robustness of Graph Neural Networks on Social Media with Explainable Inverse Reinforcement Learning},\nauthor={Yuefei Lyu and Chaozhuo Li and Sihong Xie and Xi Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ziehA15y8k}\n}", "github": "", "reviewers": "QAYk;sELL;M7DL;E98Q", "pdf_size": 910317, "rating": "5;7;8;8", "confidence": "5;3;4;4", "soundness": "3;3;3;3", "novelty": "3;3;4;4", "presentation": "2;2;4;3", "wc_summary": "67;73;94;99", "wc_strengths": "29;87;95;124", "wc_weaknesses": "220;216;76;84", "wc_questions": "6;27;55;57", "wc_limitations": "7;34;18;8", "wc_review": "329;437;338;372", "wc_reply_reviewers": "31;55;27;23", "wc_reply_authors": "142;96;57;0", "reply_reviewers": "1;1;1;1", "reply_authors": "3;2;2;1", "rating_avg": [ 7.0, 1.224744871391589 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.5, 0.5 ], "presentation_avg": [ 2.75, 0.82915619758885 ], "wc_summary_avg": [ 83.25, 13.534677683639163 ], "wc_strengths_avg": [ 83.75, 34.47734763580284 ], "wc_weaknesses_avg": [ 149.0, 69.0724257573165 ], "wc_questions_avg": [ 36.25, 21.111312133545844 ], "wc_limitations_avg": [ 16.75, 10.848386976873567 ], "wc_review_avg": [ 369.0, 42.40872551728005 ], "wc_reply_reviewers_avg": [ 34.0, 12.449899597988733 ], "wc_reply_authors_avg": [ 73.75, 52.136239795366905 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6814492235552088842&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 2, "email": "bupt.edu.cn;bupt.edu.cn;hkust-gz.edu.cn;bupt.edu.cn", "author_num": 4, "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Beijing University of Posts and Telecommunications;Hong Kong University of Science and Technology (Guangzhou)", "aff_unique_dep": ";", "aff_unique_url": "http://www.bupt.edu.cn/;https://www.ust.hk", "aff_unique_abbr": "BUPT;HKUST", "aff_campus_unique_index": "0;0;1;0", "aff_campus_unique": "Beijing;Guangzhou", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "TPR: Topology-Preserving Reservoirs for Generalized Zero-Shot Learning", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92938", "id": "zkfCa4oESF", "proceeding": "", "pdf": "https://openreview.net/pdf?id=zkfCa4oESF", "openreview": "https://openreview.net/forum?id=zkfCa4oESF", "poster": "/media/PosterPDFs/NeurIPS%202024/92938.png?t=1729994466.9256206", "project": "", "author_site": "Hui Chen, Yanbin Liu, Yongqiang Ma, Nanning Zheng, Xin Yu", "tldr": "", "abstract": "Pre-trained vision-language models (VLMs) such as CLIP have shown excellent performance for zero-shot classification. Based on CLIP, recent methods design various learnable prompts to evaluate the zero-shot generalization capability on a base-to-novel setting. This setting assumes test samples are already divided into either base or novel classes, limiting its application to realistic scenarios. In this paper, we focus on a more challenging and practical setting: generalized zero-shot learning (GZSL), i.e., testing with no information about the base/novel division. To address this challenging zero-shot problem, we introduce two unique designs that enable us to classify an image without the need of knowing whether it comes from seen or unseen classes. Firstly, most existing methods only adopt a single latent space to align visual and linguistic features, which has a limited ability to represent complex visual-linguistic patterns, especially for fine-grained tasks. Instead, we propose a dual-space feature alignment module that effectively augments the latent space with a novel attribute space induced by a well-devised attribute reservoir. In particular, the attribute reservoir consists of a static vocabulary and learnable tokens complementing each other for flexible control over feature granularity. Secondly, finetuning CLIP models (e.g., prompt learning) on seen base classes usually sacrifices the model's original generalization capability on unseen novel classes. To mitigate this issue, we present a new topology-preserving objective that can enforce feature topology structures of the combined base and novel classes to resemble the topology of CLIP. In this manner, our model will inherit the generalization ability of CLIP through maintaining the pairwise class angles in the attribute space. Extensive experiments on twelve object recognition datasets demonstrate that our model, termed Topology-Preserving Reservoir (TPR), outperforms strong baselines including both prompt learning and conventional generative-based zero-shot methods.", "keywords": "Generalized Zero-shot Learning;Vision-Language Models;Contrastive Learning", "primary_area": "machine_vision", "supplementary_material": "", "author": "Hui Chen;Yanbin Liu;Yongqiang Ma;Nanning Zheng;Xin Yu", "authorids": "~Hui_Chen2;~Yanbin_Liu1;~Yongqiang_Ma3;~Nanning_Zheng1;~Xin_Yu1", "gender": "M;M;M;M;M", "homepage": ";https://csyanbin.github.io/;;https://sites.google.com/view/xinyus-homepage/Home;http://gr.xjtu.edu.cn/web/musaqy", "dblp": "12/417;;07/256-1;54/1184-2;", "google_scholar": "dK1jEiAAAAAJ;https://scholar.google.com.hk/citations?user=znb27R8AAAAJ;https://scholar.google.com/citations?hl=zh-CN;oxdtuSEAAAAJ;p1SknbIAAAAJ", "orcid": "0000-0002-7108-4067;0000-0003-4724-8065;;0000-0002-0269-5649;0000-0002-6063-5601", "linkedin": ";;;;", "or_profile": "~Hui_Chen2;~Yanbin_Liu1;~Nanning_Zheng1;~Xin_Yu1;~YONGQIANG_MA1", "aff": "Xi'an Jiaotong University;Australian National University;Xi'an Jiaotong University;University of Queensland;Xi'an Jiaotong University", "aff_domain": "xjtu.edu;anu.edu.au;xjtu.edu.cn;uq.edu.au;xjtu.edu.cn", "position": "PhD student;Postdoc;Full Professor;Senior Lecturer;Assistant Professor", "bibtex": "@inproceedings{\nchen2024tpr,\ntitle={{TPR}: Topology-Preserving Reservoirs for Generalized Zero-Shot Learning},\nauthor={Hui Chen and Yanbin Liu and Yongqiang Ma and Nanning Zheng and Xin Yu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=zkfCa4oESF}\n}", "github": "", "reviewers": "k9s8;8m4c;TcJo;BqNV", "pdf_size": 19502067, "rating": "4;6;6;7", "confidence": "5;3;5;5", "soundness": "3;3;3;4", "novelty": "2;3;3;1", "presentation": "3;3;3;4", "wc_summary": "70;73;125;103", "wc_strengths": "55;28;66;68", "wc_weaknesses": "105;80;203;3", "wc_questions": "1;29;6;72", "wc_limitations": "1;1;37;3", "wc_review": "232;211;437;249", "wc_reply_reviewers": "0;0;0;0", "wc_reply_authors": "67;0;0;0", "reply_reviewers": "0;0;0;0", "reply_authors": "2;1;1;1", "rating_avg": [ 5.75, 1.0897247358851685 ], "confidence_avg": [ 4.5, 0.8660254037844386 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 92.75, 22.65364209128413 ], "wc_strengths_avg": [ 54.25, 15.943258763502524 ], "wc_weaknesses_avg": [ 97.75, 71.45409365459757 ], "wc_questions_avg": [ 27.0, 28.044607324760317 ], "wc_limitations_avg": [ 10.5, 15.321553446044563 ], "wc_review_avg": [ 282.25, 90.35312667528446 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 16.75, 29.011851026778693 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 11, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.13245323570650439, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:u-QxoVoBRLQJ:scholar.google.com/&scioq=TPR:+Topology-Preserving+Reservoirs+for+Generalized+Zero-Shot+Learning&hl=en&as_sdt=0,5", "gs_version_total": 3, "email": "xjtu.edu;anu.edu.au;xjtu.edu.cn;uq.edu.au;xjtu.edu.cn", "author_num": 5, "aff_unique_index": "0;1;0;2;0", "aff_unique_norm": "Xi'an Jiao Tong University;Australian National University;University of Queensland", "aff_unique_dep": ";;", "aff_unique_url": "https://www.xjtu.edu.cn;https://www.anu.edu.au;https://www.uq.edu.au", "aff_unique_abbr": "XJTU;ANU;UQ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1;0", "aff_country_unique": "China;Australia" }, { "title": "Unsupervised Homography Estimation on Multimodal Image Pair via Alternating Optimization", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92937", "id": "zkhyrxlwqH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=zkhyrxlwqH", "openreview": "https://openreview.net/forum?id=zkhyrxlwqH", "poster": "/media/PosterPDFs/NeurIPS%202024/92937.png?t=1731417280.9906673", "project": "", "author_site": "Sanghyeob Song, Jaihyun Lew, Hyemi Jang, Sungroh Yoon", "tldr": "", "abstract": "Estimating the homography between two images is crucial for mid- or high-level vision tasks, such as image stitching and fusion. However, using supervised learning methods is often challenging or costly due to the difficulty of collecting ground-truth data. In response, unsupervised learning approaches have emerged. Most early methods, though, assume that the given image pairs are from the same camera or have minor lighting differences. Consequently, while these methods perform effectively under such conditions, they generally fail when input image pairs come from different domains, referred to as multimodal image pairs.\nTo address these limitations, we propose AltO, an unsupervised learning framework for estimating homography in multimodal image pairs. Our method employs a two-phase alternating optimization framework, similar to Expectation-Maximization (EM), where one phase reduces the geometry gap and the other addresses the modality gap. To handle these gaps, we use Barlow Twins loss for the modality gap and propose an extended version, Geometry Barlow Twins, for the geometry gap. As a result, we demonstrate that our method, AltO, can be trained on multimodal datasets without any ground-truth data. It not only outperforms other unsupervised methods but is also compatible with various architectures of homography estimators.\nThe source code can be found at: https://github.com/songsang7/AltO", "keywords": "Homography Estimation;Unsupervised Learning;Cross Domain;Image Registration;Image Alignment;Multimodal;Alternating Optimization", "primary_area": "machine_vision", "supplementary_material": "", "author": "Sanghyeob Song;Jaihyun Lew;Hyemi Jang;Sungroh Yoon", "authorids": "~Sanghyeob_Song1;~Jaihyun_Lew1;~Hyemi_Jang1;~Sungroh_Yoon1", "gender": ";M;;", "homepage": ";;http://data.snu.ac.kr;http://ailab.snu.ac.kr", "dblp": ";306/8963;224/0270;99/1474", "google_scholar": "zTF9o24AAAAJ;LeF2vTkAAAAJ;;Bphl_fIAAAAJ", "orcid": ";0000-0003-3934-2879;0000-0002-7736-0528;0000-0002-2367-197X", "linkedin": ";;;", "or_profile": "~Sanghyeob_Song1;~Jaihyun_Lew1;~Hyemi_Jang1;~Sungroh_Yoon1", "aff": "Seoul National University;Seoul National University;Seoul National University;Seoul National University", "aff_domain": "snu.ac.kr;snu.ac.kr;snu.ac.kr;snu.ac.kr", "position": "PhD student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nsong2024unsupervised,\ntitle={Unsupervised Homography Estimation on Multimodal Image Pair via Alternating Optimization},\nauthor={Sanghyeob Song and Jaihyun Lew and Hyemi Jang and Sungroh Yoon},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=zkhyrxlwqH}\n}", "github": "", "reviewers": "EsVS;PrYW;sFim", "pdf_size": 1546154, "rating": "3;6;7", "confidence": "5;5;4", "soundness": "2;3;2", "novelty": "2;3;3", "presentation": "2;3;3", "wc_summary": "51;68;77", "wc_strengths": "21;112;72", "wc_weaknesses": "461;80;360", "wc_questions": "5;42;54", "wc_limitations": "69;58;6", "wc_review": "607;360;569", "wc_reply_reviewers": "1199;105;211", "wc_reply_authors": "1454;369;409", "reply_reviewers": "3;1;1", "reply_authors": "5;4;3", "rating_avg": [ 5.333333333333333, 1.699673171197595 ], "confidence_avg": [ 4.666666666666667, 0.4714045207910317 ], "soundness_avg": [ 2.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 65.33333333333333, 10.780641085864152 ], "wc_strengths_avg": [ 68.33333333333333, 37.2409571424915 ], "wc_weaknesses_avg": [ 300.3333333333333, 161.16313336354426 ], "wc_questions_avg": [ 33.666666666666664, 20.8539897594894 ], "wc_limitations_avg": [ 44.333333333333336, 27.475241379993168 ], "wc_review_avg": [ 512.0, 108.59404526338756 ], "wc_reply_reviewers_avg": [ 505.0, 492.6364447203096 ], "wc_reply_authors_avg": [ 744.0, 502.3113244459721 ], "reply_reviewers_avg": [ 1.6666666666666667, 0.9428090415820634 ], "reply_authors_avg": [ 4.0, 0.816496580927726 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": -0.6933752452815364, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:ZITWUifnR6sJ:scholar.google.com/&scioq=Unsupervised+Homography+Estimation+on+Multimodal+Image+Pair+via+Alternating+Optimization&hl=en&as_sdt=0,5", "gs_version_total": 4, "email": "snu.ac.kr;snu.ac.kr;snu.ac.kr;snu.ac.kr", "author_num": 4, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Seoul National University", "aff_unique_dep": "", "aff_unique_url": "https://www.snu.ac.kr", "aff_unique_abbr": "SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Rethinking Deep Thinking: Stable Learning of Algorithms using Lipschitz Constraints", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92936", "id": "zlgfRk2CQa", "proceeding": "", "pdf": "https://openreview.net/pdf?id=zlgfRk2CQa", "openreview": "https://openreview.net/forum?id=zlgfRk2CQa", "poster": "/media/PosterPDFs/NeurIPS%202024/92936.png?t=1732804445.7012978", "project": "", "author_site": "Jay Bear, Adam Prugel-Bennett, Jonathon Hare", "tldr": "", "abstract": "Iterative algorithms solve problems by taking steps until a solution is reached. Models in the form of Deep Thinking (DT) networks have been demonstrated to learn iterative algorithms in a way that can scale to different sized problems at inference time using recurrent computation and convolutions. However, they are often unstable during training, and have no guarantees of convergence/termination at the solution. This paper addresses the problem of instability by analyzing the growth in intermediate representations, allowing us to build models (referred to as Deep Thinking with Lipschitz Constraints (DT-L)) with many fewer parameters and providing more reliable solutions. Additionally our DT-L formulation provides guarantees of convergence of the learned iterative procedure to a unique solution at inference time. We demonstrate DT-L is capable of robustly learning algorithms which extrapolate to harder problems than in the training set. We benchmark on the traveling salesperson problem to evaluate the capabilities of the modified system in an NP-hard problem where DT fails to learn.", "keywords": "machine learning;iterative algorithms;deep thinking;lipschitz;traveling salesperson problem;contraction mapping", "primary_area": "deep_learning_architectures", "supplementary_material": "/attachment/1f64b053929cd11eefbd93bd1f5868d37f18f7bb.zip", "author": "Jay Bear;Adam Prugel-Bennett;Jonathon Hare", "authorids": "~Jay_Bear1;~Adam_Prugel-Bennett1;~Jonathon_Hare1", "gender": "Non-Binary;M;M", "homepage": ";;http://users.soton.ac.uk/jsh2", "dblp": ";p/AdamPrugelBennett.html;13/905", "google_scholar": ";https://scholar.google.co.uk/citations?user=oQgxYjkAAAAJ;https://scholar.google.co.uk/citations?user=UFeON5oAAAAJ", "orcid": "0009-0002-7459-8810;0000-0002-1329-5077;0000-0003-2921-4283", "linkedin": ";;jonathonhare/", "or_profile": "~Jay_Bear1;~Adam_Prugel-Bennett1;~Jonathon_Hare1", "aff": "University of Southampton;University of Southampton;University of Southampton", "aff_domain": "soton.ac.uk;soton.ac.uk;soton.ac.uk", "position": "PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nbear2024rethinking,\ntitle={Rethinking Deep Thinking: Stable Learning of Algorithms using Lipschitz Constraints},\nauthor={Jay Bear and Adam Prugel-Bennett and Jonathon Hare},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=zlgfRk2CQa}\n}", "github": "", "reviewers": "vZtn;x7iE;ipKY;9kC6", "pdf_size": 835878, "rating": "6;6;6;8", "confidence": "3;4;4;4", "soundness": "3;3;3;4", "novelty": "2;1;3;3", "presentation": "3;3;3;4", "wc_summary": "79;51;101;94", "wc_strengths": "49;18;96;115", "wc_weaknesses": "108;76;56;106", "wc_questions": "76;13;90;66", "wc_limitations": "5;31;1;6", "wc_review": "317;189;344;387", "wc_reply_reviewers": "11;70;0;10", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;0;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.5, 0.8660254037844386 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.25, 0.82915619758885 ], "presentation_avg": [ 3.25, 0.4330127018922193 ], "wc_summary_avg": [ 81.25, 19.188212527486765 ], "wc_strengths_avg": [ 69.5, 38.2262998470948 ], "wc_weaknesses_avg": [ 86.5, 21.696773953747133 ], "wc_questions_avg": [ 61.25, 29.13224158900238 ], "wc_limitations_avg": [ 10.75, 11.840080236214618 ], "wc_review_avg": [ 309.25, 73.77796080131247 ], "wc_reply_reviewers_avg": [ 22.75, 27.616797424755827 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 13, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.3333333333333333, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16361422963570820904&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "email": "soton.ac.uk;soton.ac.uk;soton.ac.uk", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Southampton", "aff_unique_dep": "", "aff_unique_url": "https://www.southampton.ac.uk", "aff_unique_abbr": "Southampton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Segment, Shuffle, and Stitch: A Simple Layer for Improving Time-Series Representations", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92935", "id": "zm1LcgRpHm", "proceeding": "", "pdf": "https://openreview.net/pdf?id=zm1LcgRpHm", "openreview": "https://openreview.net/forum?id=zm1LcgRpHm", "poster": "/media/PosterPDFs/NeurIPS%202024/92935.png?t=1733452236.330268", "project": "", "author_site": "Shivam Grover, Amin Jalali, Ali Etemad", "tldr": "", "abstract": "Existing approaches for learning representations of time-series keep the temporal arrangement of the time-steps intact with the presumption that the original order is the most optimal for learning. However, non-adjacent sections of real-world time-series may have strong dependencies. Accordingly, we raise the question: Is there an alternative arrangement for time-series which could enable more effective representation learning? To address this, we propose a simple plug-and-play neural network layer called Segment, Shuffle, and Stitch (S3) designed to improve representation learning in time-series models. S3 works by creating non-overlapping segments from the original sequence and shuffling them in a learned manner that is optimal for the task at hand. It then re-attaches the shuffled segments back together and performs a learned weighted sum with the original input to capture both the newly shuffled sequence along with the original sequence. S3 is modular and can be stacked to achieve different levels of granularity, and can be added to many forms of neural architectures including CNNs or Transformers with negligible computation overhead. Through extensive experiments on several datasets and state-of-the-art baselines, we show that incorporating S3 results in significant improvements for the tasks of time-series classification, forecasting, and anomaly detection, improving performance on certain datasets by up to 68\\%. We also show that S3 makes the learning more stable with a smoother training loss curve and loss landscape compared to the original baseline. The code is available at https://github.com/shivam-grover/S3-TimeSeries.", "keywords": "Time Series;Deep Learning;Representation Learning;Temporal Mechanism", "primary_area": "deep_learning_architectures", "supplementary_material": "", "author": "Shivam Grover;Amin Jalali;Ali Etemad", "authorids": "~Shivam_Grover1;~Amin_Jalali3;~Ali_Etemad1", "gender": "M;M;M", "homepage": "https://shivam-grover.github.io/;;http://www.aiimlab.com", "dblp": "306/0858;;15/8931", "google_scholar": "ZJI9ALMAAAAJ;eu2vqNEAAAAJ;https://scholar.google.com/citations?hl=en", "orcid": ";0000-0002-4095-1308;0000-0001-7128-0220", "linkedin": "shivam-grover/;amin-jalali-14b02647/;ali-etemad-6b3379b/", "or_profile": "~Shivam_Grover1;~Amin_Jalali3;~Ali_Etemad1", "aff": "Queen's University;Queen's University;Queen's University", "aff_domain": "queensu.ca;queensu.ca;queensu.ca", "position": "MS student;Postdoc;Associate Professor", "bibtex": "@inproceedings{\ngrover2024segment,\ntitle={Segment, Shuffle, and Stitch: A Simple Layer for Improving Time-Series Representations},\nauthor={Shivam Grover and Amin Jalali and Ali Etemad},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=zm1LcgRpHm}\n}", "github": "", "reviewers": "2e8Y;dMD7;xbmW;FZgx;5XMz", "pdf_size": 3927744, "rating": "4;5;6;6;8", "confidence": "3;4;4;4;5", "soundness": "2;2;2;3;4", "novelty": "2;3;2;3;4", "presentation": "3;3;3;4;4", "wc_summary": "87;60;97;66;38", "wc_strengths": "18;47;30;50;42", "wc_weaknesses": "157;111;116;71;168", "wc_questions": "14;104;121;2;224", "wc_limitations": "2;25;1;20;12", "wc_review": "278;347;365;209;484", "wc_reply_reviewers": "0;142;21;30;83", "wc_reply_authors": "0;634;32;32;176", "reply_reviewers": "0;2;1;1;1", "reply_authors": "1;3;2;2;3", "rating_avg": [ 5.8, 1.32664991614216 ], "confidence_avg": [ 4.0, 0.6324555320336759 ], "soundness_avg": [ 2.6, 0.8 ], "novelty_avg": [ 2.8, 0.7483314773547882 ], "presentation_avg": [ 3.4, 0.4898979485566356 ], "wc_summary_avg": [ 69.6, 20.771133815947554 ], "wc_strengths_avg": [ 37.4, 11.859173664298874 ], "wc_weaknesses_avg": [ 124.6, 34.82872377793938 ], "wc_questions_avg": [ 93.0, 80.73165426274876 ], "wc_limitations_avg": [ 12.0, 9.528903399657276 ], "wc_review_avg": [ 336.6, 92.04042590079644 ], "wc_reply_reviewers_avg": [ 55.2, 51.30068225667179 ], "wc_reply_authors_avg": [ 174.8, 237.573062446061 ], "reply_reviewers_avg": [ 1.0, 0.6324555320336759 ], "reply_authors_avg": [ 2.2, 0.7483314773547882 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.9534625892455921, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7955337952339478600&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "queensu.ca;queensu.ca;queensu.ca", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "Queen's University", "aff_unique_dep": "", "aff_unique_url": "https://www.queensu.ca", "aff_unique_abbr": "Queen's", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Canada" }, { "title": "GraphCroc: Cross-Correlation Autoencoder for Graph Structural Reconstruction", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92934", "id": "zn6s6VQYb0", "proceeding": "", "pdf": "https://openreview.net/pdf?id=zn6s6VQYb0", "openreview": "https://openreview.net/forum?id=zn6s6VQYb0", "poster": "/media/PosterPDFs/NeurIPS%202024/92934.png?t=1731361362.0282958", "project": "", "author_site": "Shijin Duan, Ruyi Ding, Jiaxing He, Aidong Ding, Yunsi Fei, Xiaolin Xu", "tldr": "", "abstract": "Graph-structured data is integral to many applications, prompting the development of various graph representation methods. Graph autoencoders (GAEs), in particular, reconstruct graph structures from node embeddings. Current GAE models primarily utilize self-correlation to represent graph structures and focus on node-level tasks, often overlooking multi-graph scenarios. Our theoretical analysis indicates that self-correlation generally falls short in accurately representing specific graph features such as islands, symmetrical structures, and directional edges, particularly in smaller or multiple graph contexts.To address these limitations, we introduce a cross-correlation mechanism that significantly enhances the GAE representational capabilities. Additionally, we propose the GraphCroc, a new GAE that supports flexible encoder architectures tailored for various downstream tasks and ensures robust structural reconstruction, through a mirrored encoding-decoding process. This model also tackles the challenge of representation bias during optimization by implementing a loss-balancing strategy. Both theoretical analysis and numerical evaluations demonstrate that our methodology significantly outperforms existing self-correlation-based GAEs in graph structure reconstruction.", "keywords": "Graph Neural Network;Graph Representation;Auto-Encoder", "primary_area": "graph_neural_networks", "supplementary_material": "/attachment/11730d9a19e1603c2107c5a477f06920f0827738.zip", "author": "Shijin Duan;Ruyi Ding;Jiaxing He;Aidong Adam Ding;Yunsi Fei;Xiaolin Xu", "authorids": "~Shijin_Duan1;~Ruyi_Ding1;~Jiaxing_He1;~Aidong_Adam_Ding1;~Yunsi_Fei1;~Xiaolin_Xu3", "gender": "M;M;M;;F;", "homepage": "https://scholar.google.com/citations?user=RJ5CUeAAAAAJ;;;https://cos.northeastern.edu/people/aidong-ding/;https://coe.northeastern.edu/people/fei-yunsi/;", "dblp": "300/5409;;;;67/5667.html;", "google_scholar": "RJ5CUeAAAAAJ;https://scholar.google.com/citations?hl=en;SKnkYV4AAAAJ;;Ja64KW4AAAAJ;", "orcid": "0000-0002-4317-1489;;;;0000-0002-5169-4044;", "linkedin": ";;jiaxing-he-43b91523b/;;;", "or_profile": "~Shijin_Duan1;~Ruyi_Ding1;~Jiaxing_He1;~Aidong_Adam_Ding1;~Yunsi_Fei1;~Xiaolin_Xu3", "aff": "Northeastern University;Northeastern University;Northeastern University;Northeastern University;Northeastern University;", "aff_domain": "neu.edu;neu.edu;neu.edu;northeastern.edu;northeastern.edu;", "position": "PhD student;PhD student;PhD student;Associate Professor;Full Professor;", "bibtex": "@inproceedings{\nduan2024graphcroc,\ntitle={GraphCroc: Cross-Correlation Autoencoder for Graph Structural Reconstruction},\nauthor={Shijin Duan and Ruyi Ding and Jiaxing He and Aidong Adam Ding and Yunsi Fei and Xiaolin Xu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=zn6s6VQYb0}\n}", "github": "", "reviewers": "QrYC;Msno;Xxf1", "pdf_size": 1049537, "rating": "5;5;6", "confidence": "4;4;4", "soundness": "3;3;3", "novelty": "3;3;3", "presentation": "3;2;3", "wc_summary": "35;48;88", "wc_strengths": "25;38;32", "wc_weaknesses": "97;79;141", "wc_questions": "2;12;3", "wc_limitations": "1;1;1", "wc_review": "160;178;265", "wc_reply_reviewers": "0;0;45", "wc_reply_authors": "38;38;61", "reply_reviewers": "0;0;1", "reply_authors": "2;2;3", "rating_avg": [ 5.333333333333333, 0.4714045207910317 ], "confidence_avg": [ 4.0, 0.0 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.6666666666666665, 0.4714045207910317 ], "wc_summary_avg": [ 57.0, 22.55363976538303 ], "wc_strengths_avg": [ 31.666666666666668, 5.312459150169742 ], "wc_weaknesses_avg": [ 105.66666666666667, 26.042699979499478 ], "wc_questions_avg": [ 5.666666666666667, 4.4969125210773475 ], "wc_limitations_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 201.0, 45.8475735453906 ], "wc_reply_reviewers_avg": [ 15.0, 21.213203435596427 ], "wc_reply_authors_avg": [ 45.666666666666664, 10.842303978193728 ], "reply_reviewers_avg": [ 0.3333333333333333, 0.4714045207910317 ], "reply_authors_avg": [ 2.3333333333333335, 0.4714045207910317 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:cw0_2i7-D3QJ:scholar.google.com/&scioq=GraphCroc:+Cross-Correlation+Autoencoder+for+Graph+Structural+Reconstruction&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "neu.edu;neu.edu;neu.edu;northeastern.edu;northeastern.edu;", "author_num": 6, "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Northeastern University", "aff_unique_dep": "", "aff_unique_url": "https://www.northeastern.edu", "aff_unique_abbr": "NEU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "znBiAp5ISn", "title": "TAS-GNN: Topology-Aware Spiking Graph Neural Networks for Graph Classification", "track": "main", "status": "Reject", "tldr": "", "abstract": "The recent integration of spiking neurons into graph neural networks has been gaining much attraction due to its superior energy efficiency. \nEspecially because the irregular connection among graph nodes fits the nature of the spiking neural networks, spiking graph neural networks are considered strong alternatives to vanilla graph neural networks. \nHowever, there is still a large performance gap for graph tasks between the spiking neural networks and artificial neural networks. \nThe gaps are especially large when they are adapted to graph classification tasks, where none of the nodes in the testset graphs are connected to the training set graphs.\nWe diagnose the problem as the existence of neurons under starvation, caused by the irregular connections among the nodes and the neurons. To alleviate the problem, we propose TAS-GNN.\nBased on a set of observations on spiking neurons on graph classification tasks, we devise several techniques to utilize more neurons to deliver meaningful information to the connected neurons. \nExperiments on diverse datasets show up to 27.20% improvement, demonstrating the effectiveness of the TAS-GNN.", "keywords": "Spiking Neural Network;Graph Neural Network;Graph Classification", "primary_area": "neuroscience_and_cognitive_science", "supplementary_material": "/attachment/2f7b7eeeb09dd927e4819ebd2d5daae7842c7c98.zip", "author": "SunJong Park;Hyeyoon Lee;Kanghyun Choi;Dain Kwon;Jongkil Park;Seongsik Park;Jinho Lee", "authorids": "~SunJong_Park1;~Hyeyoon_Lee1;~Kanghyun_Choi1;~Dain_Kwon1;~Jongkil_Park1;~Seongsik_Park1;~Jinho_Lee2", "gender": ";;M;F;M;M;M", "homepage": ";https://aisys.snu.ac.kr/members/HyeyoonLee.html;https://aisys.snu.ac.kr/kanghyun.html;https://github.com/meowrowan;https://www.kist.re.kr;https://seongsikpark.github.io;http://acsys.snu.ac.kr/people.html", "dblp": ";276/0074;229/7353;380/6008;;93/11156;", "google_scholar": ";lYXg5nsAAAAJ;n9e6qnsAAAAJ;;;j8qTMtoAAAAJ;https://scholar.google.com/citations?hl=ko", "orcid": ";;;;;;", "linkedin": ";;;;;;", "or_profile": "~SunJong_Park1;~Hyeyoon_Lee1;~Kanghyun_Choi1;~Dain_Kwon1;~Jongkil_Park1;~Seongsik_Park1;~Jinho_Lee2", "aff": ";Seoul National University;Seoul National University;Yonsei University;Korea Institute of Science and Technology;Korea Institute of Science and Technology;Seoul National University", "aff_domain": ";snu.ac.kr;snu.ac.kr;yonsei.ac.kr;kist.re.kr;kist.re.kr;snu.ac.kr", "position": ";PhD student;PhD student;Undergrad student;Principal Researcher;Researcher;Associate Professor", "bibtex": "@misc{\nanonymous2024tasgnn,\ntitle={{TAS}-{GNN}: Topology-Aware Spiking Graph Neural Networks for Graph Classification},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=znBiAp5ISn}\n}", "github": "", "project": "", "reviewers": "4pSD;5K64;MTu6;icJz", "site": "https://openreview.net/forum?id=znBiAp5ISn", "pdf_size": 2251299, "rating": "3;6;6;7", "confidence": "4;4;4;5", "soundness": "2;3;3;3", "novelty": "2;2;3;3", "presentation": "2;2;3;2", "wc_summary": "85;116;76;55", "wc_strengths": "71;58;37;114", "wc_weaknesses": "87;162;93;78", "wc_questions": "7;30;33;214", "wc_limitations": "21;1;7;118", "wc_review": "271;367;246;579", "wc_reply_reviewers": "51;49;26;27", "wc_reply_authors": "159;46;19;29", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;2", "rating_avg": [ 5.5, 1.5 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.25, 0.4330127018922193 ], "wc_summary_avg": [ 83.0, 21.94310825749169 ], "wc_strengths_avg": [ 70.0, 28.151376520518493 ], "wc_weaknesses_avg": [ 105.0, 33.3391661563393 ], "wc_questions_avg": [ 71.0, 83.17150954503592 ], "wc_limitations_avg": [ 36.75, 47.46775221136977 ], "wc_review_avg": [ 365.75, 131.14376653123853 ], "wc_reply_reviewers_avg": [ 38.25, 11.776565713313877 ], "wc_reply_authors_avg": [ 63.25, 56.117622009490034 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.0 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 7, 0 ], "corr_rating_confidence": 0.5773502691896258, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:u_W3zpx3cIAJ:scholar.google.com/&scioq=TAS-GNN:+Topology-Aware+Spiking+Graph+Neural+Networks+for+Graph+Classification&hl=en&as_sdt=0,5", "gs_version_total": 0, "aff_unique_index": "0;0;1;2;2;0", "aff_unique_norm": "Seoul National University;Yonsei University;Korea Institute of Science and Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.snu.ac.kr;https://www.yonsei.ac.kr;https://www.kist.re.kr", "aff_unique_abbr": "SNU;Yonsei;KIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "DevBench: A multimodal developmental benchmark for language learning", "status": "Oral", "track": "Datasets & Benchmarks", "site": "https://neurips.cc/virtual/2024/poster/97423", "id": "zogaeVpbaE", "proceeding": "", "pdf": "https://openreview.net/pdf?id=zogaeVpbaE", "openreview": "https://openreview.net/forum?id=zogaeVpbaE", "poster": "/media/PosterPDFs/NeurIPS%202024/97423.png?t=1733513525.6622586", "project": "", "author_site": "Alvin Tan, Chunhua Yu, Bria Long, Wanjing Ma, Tonya Murray, Rebecca Silverman, Jason Yeatman, Michael C Frank", "tldr": "", "abstract": "How (dis)similar are the learning trajectories of vision\u2013language models and children? Recent modeling work has attempted to understand the gap between models\u2019 and humans\u2019 data efficiency by constructing models trained on less data, especially multimodal naturalistic data. However, such models are often evaluated on adult-level benchmarks, with limited breadth in language abilities tested, and without direct comparison to behavioral data. We introduce DevBench, a multimodal benchmark comprising seven language evaluation tasks spanning the domains of lexical, syntactic, and semantic ability, with behavioral data from both children and adults. We evaluate a set of vision\u2013language models on these tasks, comparing models and humans on their response patterns, not their absolute performance. Across tasks, models exhibit variation in their closeness to human response patterns, and models that perform better on a task also more closely resemble human behavioral responses. We also examine the developmental trajectory of OpenCLIP over training, finding that greater training results in closer approximations to adult response patterns. DevBench thus provides a benchmark for comparing models to human language development. These comparisons highlight ways in which model and human language learning processes diverge, providing insight into entry points for improving language models.", "keywords": "multimodal;developmental;language;evaluation", "primary_area": "", "supplementary_material": "", "author": "Alvin Wei Ming Tan;Chunhua Yu;Bria Lorelle Long;Wanjing Anya Ma;Tonya Murray;Rebecca D. Silverman;Jason D Yeatman;Michael Frank", "authorids": "~Alvin_Wei_Ming_Tan1;~Chunhua_Yu1;~Bria_Lorelle_Long1;~Wanjing_Anya_Ma1;~Tonya_Murray1;~Rebecca_D._Silverman1;~Jason_D_Yeatman1;~Michael_Frank1", "gender": "M;F;F;;;M;M;F", "homepage": ";;http://Brialong.com;;;https://web.stanford.edu/~mcfrank/;https://profiles.stanford.edu/jason-yeatman;", "dblp": ";;;;;;;", "google_scholar": ";;HXUagwoAAAAJ;;;dErAioMAAAAJ;;nAWSAbQAAAAJ", "orcid": "0000-0001-5551-7507;;;;0000-0002-9785-0313;0000-0002-7551-4378;;0000-0001-5761-8707", "linkedin": ";https://www.linkedin/sunny-yu-992421242;;tonya-murray/;;;;", "or_profile": "~Alvin_Wei_Ming_Tan1;~Chunhua_Yu1;~Bria_Lorelle_Long1;~Tonya_Murray1;~Rebecca_D._Silverman1;~Michael_Frank1;~Jason_Yeatman1;~Wanjing_Ma1", "aff": "Stanford University;Stanford University;Stanford University;Stanford University;Stanford University;Stanford University;;Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu;;stanford.edu", "position": "PhD student;Undergrad student;Postdoc;Research Coordinator;Associate Professor;Full Professor;;PhD student", "bibtex": "@inproceedings{\ntan2024devbench,\ntitle={DevBench: A multimodal developmental benchmark for language learning},\nauthor={Alvin Wei Ming Tan and Chunhua Yu and Bria Lorelle Long and Wanjing Anya Ma and Tonya Murray and Rebecca D. Silverman and Jason D Yeatman and Michael Frank},\nbooktitle={The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track},\nyear={2024},\nurl={https://openreview.net/forum?id=zogaeVpbaE}\n}", "github": "", "reviewers": "bnQw;6bM6;k3Ee;fso1", "pdf_size": 4195608, "rating": "5;6;7;7", "confidence": "4;4;5;3", "wc_summary_and_contributions": "25;86;64;179", "wc_strengths": "42;117;117;151", "wc_improvement": "173;186;129;3", "wc_limitations": "7;11;16;5", "wc_correctness": "10;59;13;7", "wc_clarity": "20;11;15;6", "wc_relation_to_prior_work": "66;19;6;2", "wc_documentation": "10;16;78;6", "wc_additional_feedback": "1;1;1;1", "wc_review": "354;506;439;360", "wc_reply_reviewers": "32;18;13;73", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "1;1;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 0.82915619758885 ], "confidence_avg": [ 4.0, 0.7071067811865476 ], "wc_summary_and_contributions_avg": [ 88.5, 56.632587791835896 ], "wc_strengths_avg": [ 106.75, 39.87715511417533 ], "wc_improvement_avg": [ 122.75, 72.29237511660548 ], "wc_limitations_avg": [ 9.75, 4.205650960315181 ], "wc_correctness_avg": [ 22.25, 21.323402636539974 ], "wc_clarity_avg": [ 13.0, 5.1478150704935 ], "wc_relation_to_prior_work_avg": [ 23.25, 25.469344318219107 ], "wc_documentation_avg": [ 27.5, 29.37260628544903 ], "wc_additional_feedback_avg": [ 1.0, 0.0 ], "wc_review_avg": [ 414.75, 62.45548414671045 ], "wc_reply_reviewers_avg": [ 34.0, 23.569047498785352 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 14, 0 ], "authors#_avg": [ 8, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18233515719461213973&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu;;stanford.edu", "author_num": 8, "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0;0;0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "RashomonGB: Analyzing the Rashomon Effect and Mitigating Predictive Multiplicity in Gradient Boosting", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92933", "id": "zpw6NmhvKU", "proceeding": "", "pdf": "https://openreview.net/pdf?id=zpw6NmhvKU", "openreview": "https://openreview.net/forum?id=zpw6NmhvKU", "poster": "/media/PosterPDFs/NeurIPS%202024/92933.png?t=1732204062.028944", "project": "", "author_site": "Hsiang Hsu, Ivan Brugere, Shubham Sharma, Freddy Lecue, Richard Chen", "tldr": "", "abstract": "The Rashomon effect is a mixed blessing in responsible machine learning. It enhances the prospects of finding models that perform well in accuracy while adhering to ethical standards, such as fairness or interpretability. Conversely, it poses a risk to the credibility of machine decisions through predictive multiplicity. While recent studies have explored the Rashomon effect across various machine learning algorithms, its impact on gradient boosting---an algorithm widely applied to tabular datasets---remains unclear. This paper addresses this gap by systematically analyzing the Rashomon effect and predictive multiplicity in gradient boosting algorithms. We provide rigorous theoretical derivations to examine the Rashomon effect in the context of gradient boosting and offer an information-theoretic characterization of the Rashomon set. Additionally, we introduce a novel inference technique called RashomonGB to efficiently inspect the Rashomon effect in practice. On more than 20 datasets, our empirical results show that RashomonGB outperforms existing baselines in terms of improving the estimation of predictive multiplicity metrics and model selection with group fairness constraints. Lastly, we propose a framework to mitigate predictive multiplicity in gradient boosting and empirically demonstrate its effectiveness.", "keywords": "Rashomon effects; predictive multiplicity; gradient boosting; tabular data", "primary_area": "safety_in_machine_learning", "supplementary_material": "", "author": "Hsiang Hsu;Ivan Brugere;Shubham Sharma;Freddy Lecue;Chun-Fu Chen", "authorids": "~Hsiang_Hsu1;~Ivan_Brugere1;~Shubham_Sharma4;~Freddy_Lecue1;~Chun-Fu_Chen1", "gender": "M;M;M;;M", "homepage": "https://hsianghsu.github.io;;;http://www-sop.inria.fr/members/Freddy.Lecue/;", "dblp": ";50/10346;;02/3657.html;48/915", "google_scholar": "https://scholar.google.com.tw/citations?user=JRl3iYIAAAAJ;JGlGUcsAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.ca/citations?user=GLByS4gAAAAJ;9gqd5cYAAAAJ", "orcid": "0000-0001-8084-3929;0000-0002-2953-3746;;;", "linkedin": ";ivanbrugere/;;freddylecue/;", "or_profile": "~Hsiang_Hsu1;~Ivan_Brugere1;~Shubham_Sharma4;~Freddy_Lecue1;~Chun-Fu_Chen1", "aff": "JP Morgan & Chase Bank;J.P. Morgan;J.P. Morgan Chase;INRIA;JPMorganChase, GTAR", "aff_domain": "jpmchase.com;jpmchase.com;jpmorgan.com;inria.fr;jpmchase.com", "position": "Researcher;Researcher;Researcher;Full Professor;Executive Director", "bibtex": "@inproceedings{\nhsu2024rashomongb,\ntitle={Rashomon{GB}: Analyzing the Rashomon Effect and Mitigating Predictive Multiplicity in Gradient Boosting},\nauthor={Hsiang Hsu and Ivan Brugere and Shubham Sharma and Freddy Lecue and Chun-Fu Chen},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=zpw6NmhvKU}\n}", "github": "", "reviewers": "XEPy;E8Ec;HS14;JGsC", "pdf_size": 6535140, "rating": "5;6;6;7", "confidence": "2;4;2;4", "soundness": "3;2;3;3", "novelty": "3;3;3;3", "presentation": "2;3;3;3", "wc_summary": "86;88;78;102", "wc_strengths": "111;50;82;110", "wc_weaknesses": "56;157;107;261", "wc_questions": "12;179;57;288", "wc_limitations": "6;1;1;118", "wc_review": "271;475;325;879", "wc_reply_reviewers": "0;0;17;510", "wc_reply_authors": "0;0;0;517", "reply_reviewers": "0;0;1;2", "reply_authors": "1;1;1;3", "rating_avg": [ 6.0, 0.7071067811865476 ], "confidence_avg": [ 3.0, 1.0 ], "soundness_avg": [ 2.75, 0.4330127018922193 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 88.5, 8.645808232895291 ], "wc_strengths_avg": [ 88.25, 24.963723680572976 ], "wc_weaknesses_avg": [ 145.25, 75.7706242550502 ], "wc_questions_avg": [ 134.0, 107.8818798501398 ], "wc_limitations_avg": [ 31.5, 49.98249693642766 ], "wc_review_avg": [ 487.5, 238.06879257895184 ], "wc_reply_reviewers_avg": [ 131.75, 218.49299187845818 ], "wc_reply_authors_avg": [ 129.25, 223.8675668782774 ], "reply_reviewers_avg": [ 0.75, 0.82915619758885 ], "reply_authors_avg": [ 1.5, 0.8660254037844386 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": 0.7071067811865476, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2071083622869526902&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "email": "jpmchase.com;jpmchase.com;jpmorgan.com;inria.fr;jpmchase.com", "author_num": 5, "aff_unique_index": "0;1;0;2;3", "aff_unique_norm": "JPMorgan Chase & Co.;J.P. Morgan;INRIA;JPMorgan Chase", "aff_unique_dep": ";;;Global Technology, Analytics, and Research (GTAR)", "aff_unique_url": "https://www.jpmorganchase.com;https://www.jpmorganchase.com;https://www.inria.fr;https://www.jpmorganchase.com", "aff_unique_abbr": "JPM;JPM;INRIA;JPM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "United States;France" }, { "title": "Generative Semi-supervised Graph Anomaly Detection", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92932", "id": "zqLAMwVLkt", "proceeding": "", "pdf": "https://openreview.net/pdf?id=zqLAMwVLkt", "openreview": "https://openreview.net/forum?id=zqLAMwVLkt", "poster": "/media/PosterPDFs/NeurIPS%202024/92932.png?t=1733463039.4534194", "project": "", "author_site": "Hezhe Qiao, Qingsong Wen, Xiaoli Li, Ee-peng Lim, Guansong Pang", "tldr": "", "abstract": "This work considers a practical semi-supervised graph anomaly detection (GAD) scenario, where part of the nodes in a graph are known to be normal, contrasting to the extensively explored unsupervised setting with a fully unlabeled graph. We reveal that having access to the normal nodes, even just a small percentage of normal nodes, helps enhance the detection performance of existing unsupervised GAD methods when they are adapted to the semi-supervised setting. However, their utilization of these normal nodes is limited. In this paper, we propose a novel Generative GAD approach (namely GGAD) for the semi-supervised scenario to better exploit the normal nodes. The key idea is to generate pseudo anomaly nodes, referred to as 'outlier nodes', for providing effective negative node samples in training a discriminative one-class classifier. The main challenge here lies in the lack of ground truth information about real anomaly nodes. To address this challenge, GGAD is designed to leverage two important priors about the anomaly nodes -- asymmetric local affinity and egocentric closeness -- to generate reliable outlier nodes that assimilate anomaly nodes in both graph structure and feature representations. Comprehensive experiments on six real-world GAD datasets are performed to establish a benchmark for semi-supervised GAD and show that GGAD substantially outperforms state-of-the-art unsupervised and semi-supervised GAD methods with varying numbers of training normal nodes.", "keywords": "Anomaly Detection;Graph Neural Network;Graph Anomaly Detection", "primary_area": "graph_neural_networks", "supplementary_material": "/attachment/909949c36e711c24253839184c0f149002910c81.zip", "author": "Hezhe Qiao;Qingsong Wen;Xiaoli Li;Ee-Peng Lim;Guansong Pang", "authorids": "~Hezhe_Qiao1;~Qingsong_Wen2;~Xiaoli_Li1;~Ee-Peng_Lim1;~Guansong_Pang1", "gender": "M;M;M;;M", "homepage": "https://hezheqiao2022.github.io/;https://personal.ntu.edu.sg/xlli/;https://sis.smu.edu.sg/faculty/profile/9626;http://guansongpang.com/;https://sites.google.com/site/qingsongwen8/", "dblp": "300/2321;l/XiaoliLi.html;l/EePengLim.html;07/11150;27/561", "google_scholar": "bMjKCuEAAAAJ;E3yQKloAAAAJ;https://scholar.google.com.tw/citations?user=r0wOAikAAAAJ;https://scholar.google.com.tw/citations?hl=en;vjPJvwYAAAAJ", "orcid": "0000-0003-3511-0528;0000-0002-0762-6562;0000-0003-0065-8665;0000-0002-9877-2716;0000-0003-4516-2524", "linkedin": "hezhe-qiao-83761b247/;li-xiaoli-41027ba/;;guansong-pang-5587b21b/;qingsong-wen-22814156/", "or_profile": "~Hezhe_Qiao1;~Xiaoli_Li1;~Ee-Peng_Lim1;~Guansong_Pang1;~Qingsong_Wen1", "aff": "Singapore Management University;A*STAR;Singapore Management University;Singapore Management University;Squirrel Ai Learning", "aff_domain": "smu.edu.sg;a-star.edu.sg;smu.edu.sg;smu.edu.sg;squirrelai.com", "position": "PhD student;Principal Researcher;Full Professor;Assistant Professor;Principal Researcher", "bibtex": "@inproceedings{\nqiao2024generative,\ntitle={Generative Semi-supervised Graph Anomaly Detection},\nauthor={Hezhe Qiao and Qingsong Wen and Xiaoli Li and Ee-Peng Lim and Guansong Pang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=zqLAMwVLkt}\n}", "github": "", "reviewers": "7mdh;JEU8;yDqR;R8Ff;FpxG", "pdf_size": 3809575, "rating": "5;5;5;6;7", "confidence": "4;5;3;3;4", "soundness": "3;2;2;3;4", "novelty": "3;2;2;3;3", "presentation": "3;3;3;3;3", "wc_summary": "69;164;132;83;140", "wc_strengths": "42;32;52;67;140", "wc_weaknesses": "59;170;98;192;66", "wc_questions": "359;69;26;2;3", "wc_limitations": "77;5;7;19;1", "wc_review": "606;440;315;363;350", "wc_reply_reviewers": "497;564;69;0;6", "wc_reply_authors": "784;1831;299;0;23", "reply_reviewers": "2;5;2;0;1", "reply_authors": "3;6;4;1;2", "rating_avg": [ 5.6, 0.8 ], "confidence_avg": [ 3.8, 0.7483314773547882 ], "soundness_avg": [ 2.8, 0.7483314773547882 ], "novelty_avg": [ 2.6, 0.4898979485566356 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 117.6, 35.83629445129616 ], "wc_strengths_avg": [ 66.6, 38.47908522821196 ], "wc_weaknesses_avg": [ 117.0, 54.3323108288245 ], "wc_questions_avg": [ 91.8, 135.79013218934577 ], "wc_limitations_avg": [ 21.8, 28.24464551025557 ], "wc_review_avg": [ 414.8, 103.95652937646581 ], "wc_reply_reviewers_avg": [ 227.2, 249.72096427813185 ], "wc_reply_authors_avg": [ 587.4, 682.9279903474451 ], "reply_reviewers_avg": [ 2.0, 1.6733200530681511 ], "reply_authors_avg": [ 3.2, 1.7204650534085253 ], "replies_avg": [ 33, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.1336306209562122, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18434209808684818892&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "smu.edu.sg;a-star.edu.sg;smu.edu.sg;smu.edu.sg;squirrelai.com", "author_num": 5, "aff_unique_index": "0;1;0;0;2", "aff_unique_norm": "Singapore Management University;Agency for Science, Technology and Research;Squirrel Ai Learning", "aff_unique_dep": ";;", "aff_unique_url": "https://www.smu.edu.sg;https://www.a-star.edu.sg;https://www.squirrelai.com/", "aff_unique_abbr": "SMU;A*STAR;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "Singapore;China" }, { "title": "G2D: From Global to Dense Radiography Representation Learning via Vision-Language Pre-training", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92931", "id": "zsXbGJJ7Oo", "proceeding": "", "pdf": "https://openreview.net/pdf?id=zsXbGJJ7Oo", "openreview": "https://openreview.net/forum?id=zsXbGJJ7Oo", "poster": "/media/PosterPDFs/NeurIPS%202024/92931.png?t=1729849308.046768", "project": "", "author_site": "Che Liu, Cheng Ouyang, Sibo Cheng, Anand Shah, Wenjia Bai, Rossella Arcucci", "tldr": "", "abstract": "Medical imaging tasks require an understanding of subtle and localized visual features due to the inherently detailed and area-specific nature of pathological patterns, which are crucial for clinical diagnosis. Although recent advances in medical vision-language pre-training (VLP) enable models to learn clinically relevant visual features by leveraging both medical images and their associated radiology reports, current medical VLP methods primarily focus on aligning images with entire reports. This focus hinders the learning of dense (pixel-level) visual features and is suboptimal for dense prediction tasks (e.g., medical image segmentation).\n\nTo address this challenge, we propose a novel medical VLP framework, named **Global to Dense level representation learning (G2D)**, which aims to learn global and dense visual features simultaneously using only image-text pairs without extra annotations. In particular, G2D designs a **Pseudo Segmentation (PS)** task, which enables the model to learn dense visual features during VLP. Notably, generating PS masks can be performed on the fly during VLP, which does not incur extra trainable parameters. With this simple yet effective idea, G2D achieves superior performance across 5 medical imaging tasks and 25 diseases. Particularly, in the segmentation task which requires dense visual features, **G2D surpasses existing models even with just 1% of the training data for finetuning, compared to 100% used by other models**. The code can be found in https://github.com/cheliu-computation/G2D-NeurIPS24/tree/main.", "keywords": "Medical Vision-Language Pre-training;dense visual learning", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "", "author": "Che Liu;Cheng Ouyang;Sibo Cheng;Anand Shah;Wenjia Bai;Rossella Arcucci", "authorids": "~Che_Liu3;~Cheng_Ouyang2;~Sibo_Cheng1;~Anand_Shah1;~Wenjia_Bai1;~Rossella_Arcucci1", "gender": ";;M;M;M;F", "homepage": ";;;https://www.bing.com/ck/a?!&&p=ebd74ca33ca49e50JmltdHM9MTY2NzUyMDAwMCZpZ3VpZD0wNTY1MTYwZS0wOTFiLTZkOTMtMWIxMi0xOTM1MDg1YjZjNDAmaW5zaWQ9NTE3NQ&ptn=3&hsh=3&fclid=0565160e-091b-6d93-1b12-1935085b6c40&psq=imperial+URL+anand+shah&u=a1aHR0cHM6Ly93d3cuaW1wZXJpYWwuYWMudWsvcGVvcGxlL3MuYW5hbmQ&ntb=1;http://www.doc.ic.ac.uk/~wbai;https://www.imperial.ac.uk/people/r.arcucci", "dblp": ";;195/5965;;20/5519;130/5772", "google_scholar": ";;Jj1UrwQAAAAJ;;https://scholar.google.co.uk/citations?user=IA1QFM4AAAAJ;oxy2ZQoAAAAJ", "orcid": ";;0000-0002-8707-2589;;;0000-0002-9471-0585", "linkedin": ";;sibo-cheng-23a52711b/;;;https://www.linkedin.com/public-profile/settings?trk=d_flagship3_profile_self_view_public_profile", "or_profile": "~Che_Liu3;~Cheng_Ouyang2;~Sibo_Cheng1;~Anand_Shah1;~Wenjia_Bai1;~Rossella_Arcucci1", "aff": ";;Imperial College London;Imperial College London;Imperial College London;Imperial College London ", "aff_domain": ";;ic.ac.uk;ic.ac.uk;imperial.ac.uk;imperial.ac.uk", "position": ";;Postdoc;Lecturer;Associate Professor;Senior Lecturer", "bibtex": "@inproceedings{\nliu2024gd,\ntitle={G2D: From Global to Dense Radiography Representation Learning via Vision-Language Pre-training},\nauthor={Che Liu and Cheng Ouyang and Sibo Cheng and Anand Shah and Wenjia Bai and Rossella Arcucci},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=zsXbGJJ7Oo}\n}", "github": "", "reviewers": "t3s2;SZ6L;bXf3;iPQX", "pdf_size": 12260886, "rating": "5;5;6;6", "confidence": "5;4;4;4", "soundness": "3;3;4;3", "novelty": "2;3;3;3", "presentation": "3;3;4;4", "wc_summary": "104;67;30;102", "wc_strengths": "142;36;50;203", "wc_weaknesses": "125;23;51;140", "wc_questions": "370;220;14;14", "wc_limitations": "80;1;1;6", "wc_review": "821;347;146;465", "wc_reply_reviewers": "26;7;16;101", "wc_reply_authors": "612;0;24;53", "reply_reviewers": "1;1;1;1", "reply_authors": "3;1;2;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 75.75, 30.235533731025818 ], "wc_strengths_avg": [ 107.75, 68.42651167493489 ], "wc_weaknesses_avg": [ 84.75, 49.052905112745364 ], "wc_questions_avg": [ 154.5, 150.17573039609297 ], "wc_limitations_avg": [ 22.0, 33.54847239443251 ], "wc_review_avg": [ 444.75, 245.3470755888482 ], "wc_reply_reviewers_avg": [ 37.5, 37.27264412407577 ], "wc_reply_authors_avg": [ 172.25, 254.58237861250333 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.0, 0.7071067811865476 ], "replies_avg": [ 17, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": -0.5773502691896257, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5328770949863411666&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "email": ";;ic.ac.uk;ic.ac.uk;imperial.ac.uk;imperial.ac.uk", "author_num": 6, "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Imperial College London", "aff_unique_dep": "", "aff_unique_url": "https://www.imperial.ac.uk", "aff_unique_abbr": "ICL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "OxonFair: A Flexible Toolkit for Algorithmic Fairness", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92930", "id": "ztwl4ubnXV", "proceeding": "", "pdf": "https://openreview.net/pdf?id=ztwl4ubnXV", "openreview": "https://openreview.net/forum?id=ztwl4ubnXV", "poster": "/media/PosterPDFs/NeurIPS%202024/92930.png?t=1731681123.3428457", "project": "", "author_site": "Eoin Delaney, Zihao Fu, Sandra Wachter, Brent Mittelstadt, Chris Russell", "tldr": "", "abstract": "We present OxonFair, a new open source toolkit for enforcing fairness in binary classification. Compared to existing toolkits: (i) We support NLP and Computer Vision classification as well as standard tabular problems. (ii) We support enforcing fairness on validation data, making us robust to a wide range of overfitting challenges. (iii) Our approach can optimize any measure based on True Positives, False Positive, False Negatives, and True Negatives. This makes it easily extensible and much more expressive than existing toolkits. It supports all 9 and all 10 of the decision-based group metrics of two popular review articles. (iv) We jointly optimize a performance objective alongside fairness constraints. This minimizes degradation while enforcing fairness, and even improves the performance of inadequately tuned unfair baselines. OxonFair is compatible with standard ML toolkits, including sklearn, Autogluon, and PyTorch and is available at https://github.com/oxfordinternetinstitute/oxonfair.", "keywords": "Fairness Toolkit;Algorithmic Fairness;Trustworthy AI", "primary_area": "infrastructure", "supplementary_material": "/attachment/f13b1845f29e53481fdce123227ac8b54be53db5.zip", "author": "Eoin D. Delaney;Zihao Fu;Sandra Wachter;Brent Mittelstadt;Chris Russell", "authorids": "~Eoin_D._Delaney1;~Zihao_Fu1;~Sandra_Wachter1;~Brent_Mittelstadt1;~Chris_Russell3", "gender": ";M;F;M;M", "homepage": "https://e-delaney.github.io/;https://fuzihaofzh.github.io/;https://www.oii.ox.ac.uk/people/profiles/sandra-wachter/;https://www.oii.ox.ac.uk/people/profiles/brent-mittelstadt/;https://www.oii.ox.ac.uk/people/profiles/chris-russell/", "dblp": "275/3311.html;;209/9828;179/6075;57/9988-1", "google_scholar": "I11ceKoAAAAJ;64CHB2YAAAAJ;ZXBJVqYAAAAJ;tP685zYAAAAJ;https://scholar.google.co.uk/citations?user=RM2sHhYAAAAJ", "orcid": "0000-0002-7282-8494;;0000-0003-3800-0113;0000-0002-4709-6404;0000-0003-1665-1759", "linkedin": ";;;;", "or_profile": "~Eoin_D._Delaney1;~Zihao_Fu1;~Sandra_Wachter1;~Brent_Mittelstadt1;~Chris_Russell3", "aff": "University of Oxford;University of Cambridge;University of Oxford;University of Oxford;University of Oxford", "aff_domain": "oxford.ac.uk;cam.ac.uk;oxford.ac.uk;ox.ac.uk;oxford.ac.uk", "position": "Postdoc;Postdoc;Full Professor;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\ndelaney2024oxonfair,\ntitle={OxonFair: A Flexible Toolkit for Algorithmic Fairness},\nauthor={Eoin D. Delaney and Zihao Fu and Sandra Wachter and Brent Mittelstadt and Chris Russell},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=ztwl4ubnXV}\n}", "github": "", "reviewers": "AKgy;AiSk;9LRe;aniZ;JNWw", "pdf_size": 871049, "rating": "4;6;6;7;7", "confidence": "4;4;3;3;1", "soundness": "3;3;3;3;3", "novelty": "2;3;3;3;3", "presentation": "2;3;2;2;2", "wc_summary": "81;86;113;147;36", "wc_strengths": "105;35;46;83;7", "wc_weaknesses": "125;81;50;256;58", "wc_questions": "68;61;40;129;16", "wc_limitations": "81;1;19;102;9", "wc_review": "460;264;268;717;126", "wc_reply_reviewers": "91;16;15;62;15", "wc_reply_authors": "0;0;38;38;38", "reply_reviewers": "1;1;1;1;1", "reply_authors": "1;1;2;2;2", "rating_avg": [ 6.0, 1.0954451150103321 ], "confidence_avg": [ 3.0, 1.0954451150103321 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.8, 0.39999999999999997 ], "presentation_avg": [ 2.2, 0.39999999999999997 ], "wc_summary_avg": [ 92.6, 36.761936836896936 ], "wc_strengths_avg": [ 55.2, 34.83905854066668 ], "wc_weaknesses_avg": [ 114.0, 75.63861447699846 ], "wc_questions_avg": [ 62.8, 37.74334378403694 ], "wc_limitations_avg": [ 42.4, 41.034619530342916 ], "wc_review_avg": [ 367.0, 204.76327795774318 ], "wc_reply_reviewers_avg": [ 39.8, 31.339432030590473 ], "wc_reply_authors_avg": [ 22.8, 18.616122045152153 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.6, 0.4898979485566356 ], "replies_avg": [ 22, 0 ], "authors#_avg": [ 5, 0 ], "corr_rating_confidence": -0.6666666666666669, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4214094681626302016&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "email": "oxford.ac.uk;cam.ac.uk;oxford.ac.uk;ox.ac.uk;oxford.ac.uk", "author_num": 5, "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "University of Oxford;University of Cambridge", "aff_unique_dep": ";", "aff_unique_url": "https://www.ox.ac.uk;https://www.cam.ac.uk", "aff_unique_abbr": "Oxford;Cambridge", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "zuWgB7GerW", "title": "How DNNs break the Curse of Dimensionality: Compositionality and Symmetry Learning", "track": "main", "status": "Reject", "tldr": "", "abstract": "We show that deep neural networks (DNNs) can efficiently learn any\ncomposition of functions with bounded $F_{1}$-norm, which allows\nDNNs to break the curse of dimensionality in ways that shallow networks\ncannot. More specifically, we derive a generalization bound that combines\na covering number argument for compositionality, and the $F_{1}$-norm\n(or the related Barron norm) for large width adaptivity. We show that\nthe global minimizer of the regularized loss of DNNs can fit for example\nthe composition of two functions $f^{*}=h\\circ g$ from a small number\nof observations, assuming $g$ is smooth/regular and reduces the dimensionality\n(e.g. $g$ could be the modulo map of the symmetries of $f^{*}$),\nso that $h$ can be learned in spite of its low regularity. The measures\nof reguarity we consider is the Sobolev norm with different levels\nof differentiability, which is well adapted to the $F_{1}$ norm.\nWe compute scaling laws empirically, and observe phase transitions\ndepending on whether $g$ or $h$ is harder to learn, as predicted\nby our theory.", "keywords": "Generalization;Rademacher complexity;Compositionality;Feature Learning", "primary_area": "learning_theory", "supplementary_material": "", "author": "Arthur Jacot;Seok Hoan Choi;Yuxiao Wen", "authorids": "~Arthur_Jacot1;~Seok_Hoan_Choi1;~Yuxiao_Wen1", "gender": "M;M;M", "homepage": ";;", "dblp": "222/2747;;298/1362", "google_scholar": "https://scholar.google.ch/citations?user=G6OhFawAAAAJ;;https://scholar.google.com/citations?hl=zh-CN", "orcid": ";;", "linkedin": ";kevin-seok-hoan-choi/;yuxiao-wen-4b3162161/", "or_profile": "~Arthur_Jacot1;~Seok_Hoan_Choi1;~Yuxiao_Wen1", "aff": "NYU, New York University;New York University;New York University", "aff_domain": "cims.nyu.edu;nyu.edu;nyu.edu", "position": "Assistant Professor;MS student;PhD student", "bibtex": "@misc{\nanonymous2024how,\ntitle={How {DNN}s break the Curse of Dimensionality: Compositionality and Symmetry Learning},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=zuWgB7GerW}\n}", "github": "", "project": "", "reviewers": "vTqY;GvvC;eK3L", "site": "https://openreview.net/forum?id=zuWgB7GerW", "pdf_size": 1881758, "rating": "5;6;7", "confidence": "3;3;3", "soundness": "3;3;4", "novelty": "3;3;4", "presentation": "4;2;4", "wc_summary": "78;143;68", "wc_strengths": "58;96;155", "wc_weaknesses": "106;160;408", "wc_questions": "55;280;88", "wc_limitations": "2;15;9", "wc_review": "299;694;728", "wc_reply_reviewers": "0;0;0", "wc_reply_authors": "0;0;0", "reply_reviewers": "0;0;0", "reply_authors": "0;0;0", "rating_avg": [ 6.0, 0.816496580927726 ], "confidence_avg": [ 3.0, 0.0 ], "soundness_avg": [ 3.3333333333333335, 0.4714045207910317 ], "novelty_avg": [ 3.3333333333333335, 0.4714045207910317 ], "presentation_avg": [ 3.3333333333333335, 0.9428090415820634 ], "wc_summary_avg": [ 96.33333333333333, 33.2498955721 ], "wc_strengths_avg": [ 103.0, 39.9082280572148 ], "wc_weaknesses_avg": [ 224.66666666666666, 131.4973595003168 ], "wc_questions_avg": [ 141.0, 99.20685460188726 ], "wc_limitations_avg": [ 8.666666666666666, 5.312459150169743 ], "wc_review_avg": [ 573.6666666666666, 194.71403533272982 ], "wc_reply_reviewers_avg": [ 0, 0 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0, 0 ], "reply_authors_avg": [ 0, 0 ], "replies_avg": [ 6, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:l5BnnkB2m5gJ:scholar.google.com/&scioq=How+DNNs+break+the+Curse+of+Dimensionality:+Compositionality+and+Symmetry+Learning&hl=en&as_sdt=0,23", "gs_version_total": 6, "aff_unique_index": "0;0;0", "aff_unique_norm": "New York University", "aff_unique_dep": "", "aff_unique_url": "https://www.nyu.edu", "aff_unique_abbr": "NYU", "aff_campus_unique_index": "0", "aff_campus_unique": "New York;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "A Separation in Heavy-Tailed Sampling: Gaussian vs. Stable Oracles for Proximal Samplers", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92929", "id": "zuwLGhgxtQ", "proceeding": "", "pdf": "https://openreview.net/pdf?id=zuwLGhgxtQ", "openreview": "https://openreview.net/forum?id=zuwLGhgxtQ", "poster": "", "project": "", "author_site": "Ye He, Alireza Mousavi-Hosseini, Krishnakumar Balasubramanian, Murat Erdogdu", "tldr": "", "abstract": "We study the complexity of heavy-tailed sampling and present a separation result in terms of obtaining high-accuracy versus low-accuracy guarantees i.e., samplers that require only $\\mathcal{O}(\\log(1/\\varepsilon))$ versus $\\Omega(\\text{poly}(1/\\varepsilon))$ iterations to output a sample which is $\\varepsilon$-close to the target in $\\chi^2$-divergence. Our results are presented for proximal samplers that are based on Gaussian versus stable oracles. We show that proximal samplers based on the Gaussian oracle have a fundamental barrier in that they necessarily achieve only low-accuracy guarantees when sampling from a class of heavy-tailed targets. In contrast, proximal samplers based on the stable oracle exhibit high-accuracy guarantees, thereby overcoming the aforementioned limitation. We also prove lower bounds for samplers under the stable oracle and show that our upper bounds cannot be fundamentally improved.", "keywords": "Proximal samplers;Complexity of heavy-tailed sampling;Restricted Gaussian oracle;Restricted Stable oracle", "primary_area": "learning_theory", "supplementary_material": "", "author": "Ye He;Alireza Mousavi-Hosseini;Krishna Balasubramanian;Murat A Erdogdu", "authorids": "~Ye_He1;~Alireza_Mousavi-Hosseini1;~Krishna_Balasubramanian1;~Murat_A_Erdogdu1", "gender": "M;M;M;M", "homepage": "https://yeleohe.github.io/;http://www.cs.toronto.edu/~erdogdu/;https://www.cs.toronto.edu/~mousavi/;https://sites.google.com/view/kriznakumar/", "dblp": "72/7636-3;139/1292;296/4041;22/6780-2.html", "google_scholar": "PC25rDIAAAAJ;Lqc4cdAAAAAJ;;", "orcid": "0000-0003-4686-8449;;;", "linkedin": ";;;", "or_profile": "~Ye_He1;~Murat_A_Erdogdu1;~Alireza_Mousavi1;~Krishnakumar_Balasubramanian1", "aff": "Georgia Institute of Technology;Vector Institute;Department of Computer Science, University of Toronto;University of California, Davis", "aff_domain": "gatech.edu;vectorinstitute.ai;cs.toronto.edu;ucdavis.edu", "position": "Postdoc;Faculty;PhD student;Associate Professor", "bibtex": "@inproceedings{\nhe2024a,\ntitle={A Separation in Heavy-Tailed Sampling: Gaussian vs. Stable Oracles for Proximal Samplers},\nauthor={Ye He and Alireza Mousavi-Hosseini and Krishna Balasubramanian and Murat A Erdogdu},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=zuwLGhgxtQ}\n}", "github": "", "reviewers": "nmvc;kBax;YAjf;vTyM;MHrz", "pdf_size": 1052367, "rating": "6;7;7;7;8", "confidence": "2;3;1;1;3", "soundness": "3;3;3;2;3", "novelty": "3;3;3;3;3", "presentation": "3;4;3;2;3", "wc_summary": "40;89;108;106;64", "wc_strengths": "63;39;25;26;92", "wc_weaknesses": "19;9;51;66;85", "wc_questions": "34;25;37;1;6", "wc_limitations": "1;4;16;1;26", "wc_review": "157;166;237;200;273", "wc_reply_reviewers": "0;0;29;18;17", "wc_reply_authors": "0;0;0;0;0", "reply_reviewers": "0;0;1;1;1", "reply_authors": "1;1;1;1;1", "rating_avg": [ 7.0, 0.6324555320336759 ], "confidence_avg": [ 2.0, 0.8944271909999159 ], "soundness_avg": [ 2.8, 0.39999999999999997 ], "novelty_avg": [ 3.0, 0.0 ], "presentation_avg": [ 3.0, 0.6324555320336759 ], "wc_summary_avg": [ 81.4, 26.027677575995906 ], "wc_strengths_avg": [ 49.0, 25.495097567963924 ], "wc_weaknesses_avg": [ 46.0, 28.439409276565502 ], "wc_questions_avg": [ 20.6, 14.595889832415152 ], "wc_limitations_avg": [ 9.6, 9.891410415102591 ], "wc_review_avg": [ 206.6, 43.555022672477165 ], "wc_reply_reviewers_avg": [ 12.8, 11.26765281680262 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.6, 0.48989794855663565 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.35355339059327373, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7546727298764410928&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": "gatech.edu;vectorinstitute.ai;cs.toronto.edu;ucdavis.edu", "author_num": 4, "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Georgia Institute of Technology;Vector Institute;University of Toronto;University of California, Davis", "aff_unique_dep": ";;Department of Computer Science;", "aff_unique_url": "https://www.gatech.edu;https://vectorinstitute.ai/;https://www.utoronto.ca;https://www.ucdavis.edu", "aff_unique_abbr": "Georgia Tech;Vector Institute;U of T;UC Davis", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Toronto;Davis", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "United States;Canada" }, { "title": "Procedure-Aware Surgical Video-language Pretraining with Hierarchical Knowledge Augmentation", "status": "Spotlight", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92928", "id": "zuwpeRkJNH", "proceeding": "", "pdf": "https://openreview.net/pdf?id=zuwpeRkJNH", "openreview": "https://openreview.net/forum?id=zuwpeRkJNH", "poster": "", "project": "", "author_site": "Kun Yuan, vinkle srivastav, Nassir Navab, Nicolas Padoy", "tldr": "", "abstract": "Surgical video-language pretraining (VLP) faces unique challenges due to the knowledge domain gap and the scarcity of multi-modal data. This study aims to bridge the gap by addressing issues regarding textual information loss in surgical lecture videos and the spatial-temporal challenges of surgical VLP. To tackle these issues, we propose a hierarchical knowledge augmentation approach and a novel Procedure-Encoded Surgical Knowledge-Augmented Video-Language Pretraining (PeskaVLP) framework. The proposed knowledge augmentation approach uses large language models (LLM) to refine and enrich surgical concepts, thus providing comprehensive language supervision and reducing the risk of overfitting. The PeskaVLP framework combines language supervision with visual self-supervision, constructing hard negative samples and employing a Dynamic Time Warping (DTW) based loss function to effectively comprehend the cross-modal procedural alignment. Extensive experiments on multiple public surgical scene understanding and cross-modal retrieval datasets show that our proposed method significantly improves zero-shot transferring performance and offers a generalist visual repre- sentation for further advancements in surgical scene understanding. The source code will be available at https://github.com/CAMMA-public/PeskaVLP.", "keywords": "Surgical Data Science;Video-language Pretraining;Multi-modal;Surgical Foundation Model", "primary_area": "machine_learning_for_healthcare", "supplementary_material": "/attachment/1231365d3818f4ad2ba8ea7068f8ec75882da969.zip", "author": "Kun yuan;Vinkle Srivastav;Nassir Navab;Nicolas Padoy", "authorids": "~Kun_yuan6;~Vinkle_Srivastav1;~Nassir_Navab1;~Nicolas_Padoy1", "gender": "M;;M;", "homepage": "https://flaick.github.io/;;https://www.cs.cit.tum.de/camp/members/cv-nassir-navab/nassir-navab/;", "dblp": ";;n/NassirNavab.html;", "google_scholar": "zId4EqoAAAAJ;;https://scholar.google.com.tw/citations?user=kzoVUPYAAAAJ;", "orcid": ";;;", "linkedin": ";;;", "or_profile": "~Kun_yuan6;~Vinkle_Srivastav1;~Nassir_Navab1;~Nicolas_Padoy1", "aff": "Technische Universit\u00e4t M\u00fcnchen;;Technical University of Munich;", "aff_domain": "tum.de;;tum.de;", "position": "PhD student;;Full Professor;", "bibtex": "@inproceedings{\nyuan2024procedureaware,\ntitle={Procedure-Aware Surgical Video-language Pretraining with Hierarchical Knowledge Augmentation},\nauthor={Kun yuan and Vinkle Srivastav and Nassir Navab and Nicolas Padoy},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=zuwpeRkJNH}\n}", "github": "", "reviewers": "HH5E;smvC;usyA;EZYT", "pdf_size": 8201907, "rating": "5;6;8;8", "confidence": "4;4;5;5", "soundness": "2;2;3;4", "novelty": "2;2;4;4", "presentation": "3;3;4;4", "wc_summary": "67;64;70;100", "wc_strengths": "41;87;123;119", "wc_weaknesses": "37;71;140;31", "wc_questions": "270;154;81;27", "wc_limitations": "7;36;82;12", "wc_review": "422;412;496;289", "wc_reply_reviewers": "23;33;161;0", "wc_reply_authors": "0;0;462;0", "reply_reviewers": "1;1;1;0", "reply_authors": "1;1;2;1", "rating_avg": [ 6.75, 1.299038105676658 ], "confidence_avg": [ 4.5, 0.5 ], "soundness_avg": [ 2.75, 0.82915619758885 ], "novelty_avg": [ 3.0, 1.0 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 75.25, 14.446020213193666 ], "wc_strengths_avg": [ 92.5, 32.844329799829985 ], "wc_weaknesses_avg": [ 69.75, 43.33229165414633 ], "wc_questions_avg": [ 133.0, 91.03570728016562 ], "wc_limitations_avg": [ 34.25, 29.667954091915405 ], "wc_review_avg": [ 404.75, 74.28786913083455 ], "wc_reply_reviewers_avg": [ 54.25, 62.782859922115684 ], "wc_reply_authors_avg": [ 115.5, 200.05186827420533 ], "reply_reviewers_avg": [ 0.75, 0.4330127018922193 ], "reply_authors_avg": [ 1.25, 0.4330127018922193 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.9622504486493761, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=279261836162342009&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "email": "tum.de;;tum.de;", "author_num": 4, "aff_unique_index": "0;1", "aff_unique_norm": "Technische Universit\u00e4t M\u00fcnchen;Technical University of Munich", "aff_unique_dep": ";", "aff_unique_url": "https://www.tum.de;https://www.tum.de", "aff_unique_abbr": "TUM;TUM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "title": "IDGen: Item Discrimination Induced Prompt Generation for LLM Evaluation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92927", "id": "zv4UISZzp5", "proceeding": "", "pdf": "https://openreview.net/pdf?id=zv4UISZzp5", "openreview": "https://openreview.net/forum?id=zv4UISZzp5", "poster": "/media/PosterPDFs/NeurIPS%202024/92927.png?t=1731319283.0287871", "project": "", "author_site": "Fan Lin, Shuyi Xie, Yong Dai, Wenlin Yao, TianJiao Lang, Yu Zhang", "tldr": "", "abstract": "As Large Language Models (LLMs) become more capable of handling increasingly complex tasks, the evaluation set must keep pace with these advancements to ensure it remains sufficiently discriminative. Item Discrimination (ID) theory, which is widely used in educational assessment, measures the ability of individual test items to differentiate between high and low performers. Inspired by this theory, we propose an ID-induced prompt synthesis framework for evaluating LLMs so that the evaluation set continually updates and refines according to model abilities. \nOur data synthesis framework prioritizes both breadth and specificity. It can generate prompts that comprehensively evaluate the capabilities of LLMs while revealing meaningful performance differences between models, allowing for effective discrimination of their relative strengths and weaknesses across various tasks and domains.\nTo produce high-quality data, we incorporate a self-correct mechanism into our generalization framework and develop two models to predict prompt discrimination and difficulty score to facilitate our data synthesis framework, contributing valuable tools to evaluation data synthesis research. We apply our generated data to evaluate five SOTA models. Our data achieves an average score of 51.92, accompanied by a variance of 10.06. By contrast, previous works (i.e., SELF-INSTRUCT and WizardLM) obtain an average score exceeding 67, with a variance below 3.2.\nThe results demonstrate that the data generated by our framework is more challenging and discriminative compared to previous works.\nWe will release a dataset of over 3,000 carefully crafted prompts to facilitate evaluation research of LLMs.", "keywords": "LLM;Data Generalization;Discrimination Indexes", "primary_area": "natural_language_processing", "supplementary_material": "", "author": "Fan Lin;Shuyi Xie;Yong Dai;Wenlin Yao;TianJiao Lang;Yu Zhang", "authorids": "~Fan_Lin2;~Shuyi_Xie1;~Yong_Dai1;~Wenlin_Yao1;~TianJiao_Lang1;~Yu_Zhang21", "gender": "M;M;M;M;;", "homepage": "https://palm.seu.edu.cn/homepage/linfan/index.html;;https://daiyongya.github.io/;https://wenlinyao.github.io/;;", "dblp": ";;;203/8711;;", "google_scholar": ";;https://scholar.google.com/citations?hl=zh-CN;qwo2A24AAAAJ;;", "orcid": ";;0000-0002-3041-5851;;;", "linkedin": ";shuyi-xie-7b37081a/;yong-dai-9255311a6/;;https://www.linkedin.com/feed/?trk=onboarding-landing;", "or_profile": "~Fan_Lin2;~Shuyi_Xie1;~Yong_Dai1;~Wenlin_Yao1;~TianJiao_Lang1;~Yu_Zhang21", "aff": "Southeast University;Tencent Big Data;Tencent AI Lab;Tencent AI Lab;Tencent Big Data;", "aff_domain": "seu.edu.cn;tencent.com;tencent.com;tencent.com;tencent.com;", "position": "MS student;Researcher;Researcher;Researcher;Researcher;", "bibtex": "@inproceedings{\nlin2024idgen,\ntitle={{IDG}en: Item Discrimination Induced Prompt Generation for {LLM} Evaluation},\nauthor={Fan Lin and Shuyi Xie and Yong Dai and Wenlin Yao and TianJiao Lang and Yu Zhang},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=zv4UISZzp5}\n}", "github": "", "reviewers": "K8YF;UD21;VhvU", "pdf_size": 757306, "rating": "4;4;7", "confidence": "3;2;3", "soundness": "3;3;3", "novelty": "2;3;3", "presentation": "1;3;3", "wc_summary": "47;88;43", "wc_strengths": "51;37;70", "wc_weaknesses": "54;83;104", "wc_questions": "325;7;148", "wc_limitations": "23;18;9", "wc_review": "500;233;374", "wc_reply_reviewers": "80;0;54", "wc_reply_authors": "2146;0;20", "reply_reviewers": "1;0;1", "reply_authors": "5;1;2", "rating_avg": [ 5.0, 1.4142135623730951 ], "confidence_avg": [ 2.6666666666666665, 0.4714045207910317 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.6666666666666665, 0.4714045207910317 ], "presentation_avg": [ 2.3333333333333335, 0.9428090415820634 ], "wc_summary_avg": [ 59.333333333333336, 20.33606539022619 ], "wc_strengths_avg": [ 52.666666666666664, 13.523641850067197 ], "wc_weaknesses_avg": [ 80.33333333333333, 20.49932248202906 ], "wc_questions_avg": [ 160.0, 130.09996156801893 ], "wc_limitations_avg": [ 16.666666666666668, 5.792715732327588 ], "wc_review_avg": [ 369.0, 109.0596167240652 ], "wc_reply_reviewers_avg": [ 44.666666666666664, 33.319997332266134 ], "wc_reply_authors_avg": [ 722.0, 1006.9531601155371 ], "reply_reviewers_avg": [ 0.6666666666666666, 0.4714045207910317 ], "reply_authors_avg": [ 2.6666666666666665, 1.699673171197595 ], "replies_avg": [ 15, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:qjxEKXg-sj4J:scholar.google.com/&scioq=IDGen:+Item+Discrimination+Induced+Prompt+Generation+for+LLM+Evaluation&hl=en&as_sdt=0,33", "gs_version_total": 4, "email": "seu.edu.cn;tencent.com;tencent.com;tencent.com;tencent.com;", "author_num": 6, "aff_unique_index": "0;1;1;1;1", "aff_unique_norm": "Southeast University;Tencent", "aff_unique_dep": ";Big Data", "aff_unique_url": "https://www.seu.edu.cn/;https://www.tencent.com", "aff_unique_abbr": "SEU;Tencent", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Toward Global Convergence of Gradient EM for Over-Paramterized Gaussian Mixture Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92926", "id": "zv9gYC3xgF", "proceeding": "", "pdf": "https://openreview.net/pdf?id=zv9gYC3xgF", "openreview": "https://openreview.net/forum?id=zv9gYC3xgF", "poster": "", "project": "", "author_site": "Weihang Xu, Maryam Fazel, Simon Du", "tldr": "", "abstract": "We study the gradient Expectation-Maximization (EM) algorithm for Gaussian Mixture Models (GMM) in the over-parameterized setting, where a general GMM with $n>1$ components learns from data that are generated by a single ground truth Gaussian distribution. \nWhile results for the special case of 2-Gaussian mixtures are well-known, a general global convergence analysis for arbitrary $n$ remains unresolved and faces several new technical barriers since the convergence becomes sub-linear and non-monotonic. To address these challenges, we construct a novel likelihood-based convergence analysis framework and rigorously prove that gradient EM converges globally with a sublinear rate $O(1/\\sqrt{t})$. This is the first global convergence result for Gaussian mixtures with more than $2$ components. The sublinear convergence rate is due to the algorithmic nature of learning over-parameterized GMM with gradient EM. We also identify a new emerging technical challenge for learning general over-parameterized GMM: the existence of bad local regions that can trap gradient EM for an exponential number of steps.", "keywords": "over-parameterization;global convergence;non-convex optimization", "primary_area": "learning_theory", "supplementary_material": "", "author": "Weihang Xu;Maryam Fazel;Simon Shaolei Du", "authorids": "~Weihang_Xu2;~Maryam_Fazel1;~Simon_Shaolei_Du1", "gender": ";F;M", "homepage": "https://weihangxu.com;;http://simonshaoleidu.com", "dblp": ";10/2309;176/5602", "google_scholar": ";vlN_kRoAAAAJ;OttawxUAAAAJ", "orcid": ";;", "linkedin": ";;", "or_profile": "~Weihang_Xu2;~Maryam_Fazel1;~Simon_Shaolei_Du1", "aff": "University of Washington;University of Washington, Seattle;University of Washington", "aff_domain": "cs.washington.edu;uw.edu;washington.edu", "position": "PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nxu2024toward,\ntitle={Toward Global Convergence of Gradient {EM} for Over-Paramterized Gaussian Mixture Models},\nauthor={Weihang Xu and Maryam Fazel and Simon Shaolei Du},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=zv9gYC3xgF}\n}", "github": "", "reviewers": "JsCq;DbMw;DCG2;6yVv", "pdf_size": 658076, "rating": "6;6;6;6", "confidence": "3;3;2;4", "soundness": "3;4;2;3", "novelty": "2;2;2;2", "presentation": "3;4;2;4", "wc_summary": "288;29;82;97", "wc_strengths": "45;19;46;66", "wc_weaknesses": "92;109;42;110", "wc_questions": "1;69;1;93", "wc_limitations": "1;9;1;1", "wc_review": "427;235;172;367", "wc_reply_reviewers": "15;80;31;30", "wc_reply_authors": "18;176;17;0", "reply_reviewers": "1;1;1;1", "reply_authors": "2;2;2;1", "rating_avg": [ 6.0, 0.0 ], "confidence_avg": [ 3.0, 0.7071067811865476 ], "soundness_avg": [ 3.0, 0.7071067811865476 ], "novelty_avg": [ 2.0, 0.0 ], "presentation_avg": [ 3.25, 0.82915619758885 ], "wc_summary_avg": [ 124.0, 97.99744894638839 ], "wc_strengths_avg": [ 44.0, 16.688319268278637 ], "wc_weaknesses_avg": [ 88.25, 27.643941470058138 ], "wc_questions_avg": [ 41.0, 40.890096600521744 ], "wc_limitations_avg": [ 3.0, 3.4641016151377544 ], "wc_review_avg": [ 300.25, 101.52185725251483 ], "wc_reply_reviewers_avg": [ 39.0, 24.50510150968569 ], "wc_reply_authors_avg": [ 52.75, 71.51704342322884 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 1.75, 0.4330127018922193 ], "replies_avg": [ 18, 0 ], "authors#_avg": [ 3, 0 ], "corr_rating_confidence": 0.0, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8187223223645839466&as_sdt=5,44&sciodt=0,44&hl=en", "gs_version_total": 4, "email": "cs.washington.edu;uw.edu;washington.edu", "author_num": 3, "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Washington", "aff_unique_dep": "", "aff_unique_url": "https://www.washington.edu", "aff_unique_abbr": "UW", "aff_campus_unique_index": "1", "aff_campus_unique": ";Seattle", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "PERIA: Perceive, Reason, Imagine, Act via Holistic Language and Vision Planning for Manipulation", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92925", "id": "zw2K6LfFI9", "proceeding": "", "pdf": "https://openreview.net/pdf?id=zw2K6LfFI9", "openreview": "https://openreview.net/forum?id=zw2K6LfFI9", "poster": "", "project": "", "author_site": "Fei Ni, Jianye Hao, Shiguang Wu, Longxin Kou, Yifu Yuan, Zibin Dong, Jinyi Liu, MingZhi Li, Yuzheng Zhuang, YAN ZHENG", "tldr": "", "abstract": "Long-horizon manipulation tasks with general instructions often implicitly encapsulate multiple sub-tasks, posing significant challenges in instruction following.\nWhile language planning is a common approach to decompose general instructions into stepwise sub-instructions, text-only guidance may lack expressiveness and lead to potential ambiguity. Considering that humans often imagine and visualize sub-instructions reasoning out before acting, the imagined subgoal images can provide more intuitive guidance and enhance the reliability of decomposition. Inspired by this, we propose **PERIA**(**PE**rceive, **R**eason, **I**magine, **A**ct), a novel framework that integrates holistic language planning and vision planning for long-horizon manipulation tasks with complex instructions, leveraging both logical and intuitive aspects of task decomposition.\nSpecifically, we first perform a lightweight multimodal alignment on the encoding side to empower the MLLM to perceive visual details and language instructions. \nThe MLLM is then jointly instruction-tuned with a pretrained image-editing model to unlock capabilities of simultaneous reasoning of language instructions and generation of imagined subgoals. Furthermore, we introduce a consistency alignment loss to encourage coherent subgoal images and align with their corresponding instructions, mitigating potential hallucinations and semantic conflicts between the two planning manners.\nComprehensive evaluations across three task domains demonstrate that PERIA, benefiting from holistic language and vision planning, significantly outperforms competitive baselines in both instruction following accuracy and task success rate on complex manipulation tasks.", "keywords": "robotics manipulation;multi-modal LLM;image generation;instruction following", "primary_area": "generative_models", "supplementary_material": "", "author": "Fei Ni;Jianye HAO;Shiguang Wu;Longxin Kou;Yifu Yuan;Zibin Dong;Jinyi Liu;MingZhi Li;Yuzheng Zhuang;YAN ZHENG", "authorids": "~Fei_Ni1;~Jianye_HAO1;~Shiguang_Wu4;~Longxin_Kou1;~Yifu_Yuan1;~Zibin_Dong1;~Jinyi_Liu1;~MingZhi_Li1;~Yuzheng_Zhuang1;~YAN_ZHENG1", "gender": "M;M;M;M;M;;M;F;M;M", "homepage": "https://fei-ni.github.io/;http://www.icdai.org/jianye.html;;https://yifu-yuan.github.io/;https://github.com/GrandpaDZB;;https://github.com/turn0the0table;;https://yanzzzzz.github.io;https://longxinkou.github.io/index.html", "dblp": "11/579-1;21/7664.html;275/7661-1.html;261/3688;358/5885;192/6688-2;;;10/2381-2;", "google_scholar": "https://scholar.google.com.hk/citations?hl=zh-CN;;https://scholar.google.com/citations?hl=zh-CN;83JhosMAAAAJ;JQ6881QAAAAJ;kaQS7NAAAAAJ;;https://scholar.google.com/citations?hl=en;https://scholar.google.com.hk/citations?user=tJuhd1kAAAAJ;", "orcid": "0009-0007-5623-2782;0000-0002-0422-8235;0000-0001-9091-5236;0009-0009-2194-942X;0000-0002-2986-6022;;;;;", "linkedin": ";;;;;\u91d1\u6bc5-\u5218-5b7447118;;;;", "or_profile": "~Fei_Ni1;~Jianye_HAO1;~Shiguang_Wu4;~Yifu_Yuan1;~Zibin_Dong1;~Jinyi_Liu1;~MingZhi_Li1;~Yuzheng_Zhuang1;~YAN_ZHENG1;~LongxinKou1", "aff": "Tianjin University;Tianjin University;Huawei Technologies Ltd.;Tianjin University;Tianjin University;Tianjin University;Hebei University of Technology;Huawei Technologies Ltd.;Tianjin Unibersity, China;Tianjin University", "aff_domain": "tju.edu.cn;tju.edu.cn;huawei.com;tju.edu.cn;tju.edu.cn;tju.edu.cn;hebut.edu.cn;huawei.com;tju.edu.cn;tju.edu.cn", "position": "PhD student;Associate Professor;Researcher;PhD student;MS student;PhD student;Undergrad student;Research Engineer;Associate Professor;MS student", "bibtex": "@inproceedings{\nni2024peria,\ntitle={{PERIA}: Perceive, Reason, Imagine, Act via Holistic Language and Vision Planning for Manipulation},\nauthor={Fei Ni and Jianye HAO and Shiguang Wu and Longxin Kou and Yifu Yuan and Zibin Dong and Jinyi Liu and MingZhi Li and Yuzheng Zhuang and YAN ZHENG},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=zw2K6LfFI9}\n}", "github": "", "reviewers": "WmQe;tWdi;HJPE;C2qN", "pdf_size": 4520336, "rating": "6;6;6;7", "confidence": "4;4;5;4", "soundness": "3;3;3;3", "novelty": "3;2;3;3", "presentation": "3;3;3;3", "wc_summary": "64;83;99;168", "wc_strengths": "30;15;65;98", "wc_weaknesses": "7;171;148;74", "wc_questions": "145;4;96;36", "wc_limitations": "9;7;12;1", "wc_review": "255;280;420;377", "wc_reply_reviewers": "19;29;30;45", "wc_reply_authors": "44;157;67;44", "reply_reviewers": "1;1;1;1", "reply_authors": "2;3;2;2", "rating_avg": [ 6.25, 0.4330127018922193 ], "confidence_avg": [ 4.25, 0.4330127018922193 ], "soundness_avg": [ 3.0, 0.0 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.0, 0.0 ], "wc_summary_avg": [ 103.5, 39.24601890638081 ], "wc_strengths_avg": [ 52.0, 32.16364407215078 ], "wc_weaknesses_avg": [ 100.0, 64.55617708631762 ], "wc_questions_avg": [ 70.25, 54.343237849800595 ], "wc_limitations_avg": [ 7.25, 4.02336923485777 ], "wc_review_avg": [ 333.0, 67.819613682179 ], "wc_reply_reviewers_avg": [ 30.75, 9.283722313813572 ], "wc_reply_authors_avg": [ 78.0, 46.56715580750021 ], "reply_reviewers_avg": [ 1.0, 0.0 ], "reply_authors_avg": [ 2.25, 0.4330127018922193 ], "replies_avg": [ 19, 0 ], "authors#_avg": [ 10, 0 ], "corr_rating_confidence": -0.3333333333333333, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2506804364194928933&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "email": "tju.edu.cn;tju.edu.cn;huawei.com;tju.edu.cn;tju.edu.cn;tju.edu.cn;hebut.edu.cn;huawei.com;tju.edu.cn;tju.edu.cn", "author_num": 10, "aff_unique_index": "0;0;1;0;0;0;2;1;0;0", "aff_unique_norm": "Tianjin University;Huawei;Hebei University of Technology", "aff_unique_dep": ";Huawei Technologies;", "aff_unique_url": "http://www.tju.edu.cn;https://www.huawei.com;http://www.hbut.edu.cn", "aff_unique_abbr": "TJU;Huawei;HUT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Cooperative Hardware-Prompt Learning for Snapshot Compressive Imaging", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92924", "id": "zxSWIdyW3A", "proceeding": "", "pdf": "https://openreview.net/pdf?id=zxSWIdyW3A", "openreview": "https://openreview.net/forum?id=zxSWIdyW3A", "poster": "/media/PosterPDFs/NeurIPS%202024/92924.png?t=1731729571.0265477", "project": "", "author_site": "JIAMIAN WANG, Zongliang Wu, Yulun Zhang, Xin Yuan, Tao Lin, Zhiqiang Tao", "tldr": "", "abstract": "Existing reconstruction models in snapshot compressive imaging systems (SCI) are trained with a single well-calibrated hardware instance, making their perfor- mance vulnerable to hardware shifts and limited in adapting to multiple hardware configurations. To facilitate cross-hardware learning, previous efforts attempt to directly collect multi-hardware data and perform centralized training, which is impractical due to severe user data privacy concerns and hardware heterogeneity across different platforms/institutions. In this study, we explicitly consider data privacy and heterogeneity in cooperatively optimizing SCI systems by proposing a Federated Hardware-Prompt learning (FedHP) framework. Rather than mitigating the client drift by rectifying the gradients, which only takes effect on the learning manifold but fails to solve the heterogeneity rooted in the input data space, FedHP learns a hardware-conditioned prompter to align inconsistent data distribution across clients, serving as an indicator of the data inconsistency among different hardware (e.g., coded apertures). Extensive experimental results demonstrate that the proposed FedHP coordinates the pre-trained model to multiple hardware con- figurations, outperforming prevalent FL frameworks for 0.35dB under challenging heterogeneous settings. Moreover, a Snapshot Spectral Heterogeneous Dataset has been built upon multiple practical SCI systems. Data and code are aveilable at https://github.com/Jiamian-Wang/FedHP-Snapshot-Compressive-Imaging.git", "keywords": "snapshot compressive imaging;hyperpectral imaging;prompt learning;federated learning", "primary_area": "machine_vision", "supplementary_material": "/attachment/795e5ddcb1cd4711f49418eded4f618ffe25e568.zip", "author": "Jiamian Wang;Zongliang Wu;Yulun Zhang;Xin Yuan;Tao Lin;ZHIQIANG TAO", "authorids": "~Jiamian_Wang1;~Zongliang_Wu1;~Yulun_Zhang1;~Xin_Yuan4;~Tao_Lin1;~ZHIQIANG_TAO2", "gender": "M;M;M;M;M;", "homepage": "https://jiamian-wang.github.io/;;http://yulunzhang.com/;https://en.westlake.edu.cn/faculty/xin-yuan.html;https://lins-lab.github.io/;http://ztao.cc/", "dblp": "291/6309;;166/2763-1.html;78/713-2;64/4492-4.html;135/5229.html", "google_scholar": "MGSkEscAAAAJ;;ORmLjWoAAAAJ;cS9CbWkAAAAJ;QE9pa_cAAAAJ;sEKglOkAAAAJ", "orcid": ";0000-0003-0750-0246;0000-0002-2288-5079;0000-0002-8311-7524;0000-0002-3246-6935;", "linkedin": "%E5%8A%A0%E5%86%95-%E7%8E%8B-5928b81ba/;;yulun-zhang-1116b5b9/;xin-yuan-0024bb31/;;", "or_profile": "~Jiamian_Wang1;~Zongliang_Wu1;~Yulun_Zhang1;~Xin_Yuan4;~Tao_Lin1;~ZHIQIANG_TAO2", "aff": "Rochester Institute of Technology;Zhejiang University;Swiss Federal Institute of Technology;Westlake University;Westlake University;Rochester Institute of Technology", "aff_domain": "rit.edu;zju.edu.cn;ethz.ch;westlake.edu.cn;westlake.edu;rit.edu", "position": "PhD student;PhD student;Postdoc;Associate Professor;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nwang2024cooperative,\ntitle={Cooperative Hardware-Prompt Learning for Snapshot Compressive Imaging},\nauthor={Jiamian Wang and Zongliang Wu and Yulun Zhang and Xin Yuan and Tao Lin and ZHIQIANG TAO},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=zxSWIdyW3A}\n}", "github": "", "reviewers": "aeby;1X5M;Jxiy;BdgG", "pdf_size": 17441517, "rating": "5;5;6;6", "confidence": "3;4;4;4", "soundness": "2;2;3;3", "novelty": "2;2;3;3", "presentation": "2;3;3;3", "wc_summary": "122;48;114;53", "wc_strengths": "19;63;66;64", "wc_weaknesses": "80;160;68;134", "wc_questions": "37;50;25;126", "wc_limitations": "105;104;44;1", "wc_review": "363;425;317;378", "wc_reply_reviewers": "554;98;21;14", "wc_reply_authors": "553;292;13;13", "reply_reviewers": "3;2;1;1", "reply_authors": "4;3;2;2", "rating_avg": [ 5.5, 0.5 ], "confidence_avg": [ 3.75, 0.4330127018922193 ], "soundness_avg": [ 2.5, 0.5 ], "novelty_avg": [ 2.5, 0.5 ], "presentation_avg": [ 2.75, 0.4330127018922193 ], "wc_summary_avg": [ 84.25, 33.914414339628514 ], "wc_strengths_avg": [ 53.0, 19.6596032513375 ], "wc_weaknesses_avg": [ 110.5, 37.87809393303734 ], "wc_questions_avg": [ 59.5, 39.39860403618382 ], "wc_limitations_avg": [ 63.5, 43.729280808172454 ], "wc_review_avg": [ 370.75, 38.55110244856819 ], "wc_reply_reviewers_avg": [ 171.75, 223.13939029225656 ], "wc_reply_authors_avg": [ 217.75, 224.5833642547907 ], "reply_reviewers_avg": [ 1.75, 0.82915619758885 ], "reply_authors_avg": [ 2.75, 0.82915619758885 ], "replies_avg": [ 23, 0 ], "authors#_avg": [ 6, 0 ], "corr_rating_confidence": 0.5773502691896257, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17681974547602435562&as_sdt=5,47&sciodt=0,47&hl=en", "gs_version_total": 4, "email": "rit.edu;zju.edu.cn;ethz.ch;westlake.edu.cn;westlake.edu;rit.edu", "author_num": 6, "aff_unique_index": "0;1;2;3;3;0", "aff_unique_norm": "Rochester Institute of Technology;Zhejiang University;Swiss Federal Institute of Technology;Westlake University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.rit.edu;https://www.zju.edu.cn;https://www.ethz.ch;https://www.westlake.edu.cn", "aff_unique_abbr": "RIT;ZJU;ETH Zurich;WU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;1;1;0", "aff_country_unique": "United States;China;Switzerland" }, { "title": "Stress-Testing Capability Elicitation With Password-Locked Models", "status": "Poster", "track": "main", "site": "https://neurips.cc/virtual/2024/poster/92923", "id": "zzOOqD6R1b", "proceeding": "", "pdf": "https://openreview.net/pdf?id=zzOOqD6R1b", "openreview": "https://openreview.net/forum?id=zzOOqD6R1b", "poster": "", "project": "", "author_site": "Ryan Greenblatt, Fabien Roger, Dmitrii Krasheninnikov, David Krueger", "tldr": "", "abstract": "To determine the safety of large language models (LLMs), AI developers must be able to assess their dangerous capabilities. But simple prompting strategies often fail to elicit an LLM\u2019s full capabilities. One way to elicit capabilities more robustly is to fine-tune the LLM to complete the task. In this paper, we investigate the conditions under which fine-tuning-based elicitation suffices to elicit capabilities. To do this, we introduce password-locked models, LLMs fine-tuned such that some of their capabilities are deliberately hidden. Specifically, these LLMs are trained to exhibit these capabilities only when a password is present in the prompt, and to imitate a much weaker LLM otherwise. Password-locked models enable a novel method of evaluating capabilities elicitation methods, by testing whether these password-locked capabilities can be elicited without using the password. We find that a few high-quality demonstrations are often sufficient to fully elicit password-locked capabilities. More surprisingly, fine-tuning can elicit other capabilities that have been locked using the same password, or even different passwords. Furthermore, when only evaluations, and not demonstrations, are available, approaches like reinforcement learning are still often able to elicit capabilities. Overall, our findings suggest that fine-tuning is an effective method of eliciting hidden capabilities of current models but may be unreliable when high-quality demonstrations are not available, e.g., as may be the case when models\u2019 (hidden) capabilities exceed those of human demonstrators.", "keywords": "LLMs;Elicitation;Fine-tuning;Sandbagging;Red-teaming;Safety", "primary_area": "safety_in_machine_learning", "supplementary_material": "/attachment/da6619534d79b7246746540de2464bd411225e1e.zip", "author": "Ryan Greenblatt;Fabien Roger;Dmitrii Krasheninnikov;David Krueger", "authorids": "~Ryan_Greenblatt1;~Fabien_Roger1;~Dmitrii_Krasheninnikov1;~David_Krueger1", "gender": ";M;M;M", "homepage": ";;https://krasheninnikov.github.io/about/;https://mila.umontreal.ca/en/person/david-scott-krueger/", "dblp": ";336/6227;;142/2741.html", "google_scholar": ";La75jqEAAAAJ;BIQflKQAAAAJ;https://scholar.google.ca/citations?user=5Uz70IoAAAAJ", "orcid": ";;;", "linkedin": "ryan-greenblatt-4b9907134/;;;", "or_profile": "~Ryan_Greenblatt1;~Fabien_Roger1;~Dmitrii_Krasheninnikov1;~David_Krueger1", "aff": ";Redwood Research;University of Cambridge;University of Cambridge", "aff_domain": ";redwoodresearch.org;cam.ac.uk;cam.ac.uk", "position": ";Researcher;PhD student;Assistant Professor", "bibtex": "@inproceedings{\ngreenblatt2024stresstesting,\ntitle={Stress-Testing Capability Elicitation With Password-Locked Models},\nauthor={Ryan Greenblatt and Fabien Roger and Dmitrii Krasheninnikov and David Krueger},\nbooktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},\nyear={2024},\nurl={https://openreview.net/forum?id=zzOOqD6R1b}\n}", "github": "", "reviewers": "JNQU;Vgeo;VH7G;Y88j", "pdf_size": 1114184, "rating": "5;5;7;8", "confidence": "3;4;3;4", "soundness": "3;3;3;4", "novelty": "3;2;3;3", "presentation": "3;4;3;4", "wc_summary": "123;87;131;102", "wc_strengths": "147;18;221;73", "wc_weaknesses": "166;151;157;214", "wc_questions": "168;77;212;17", "wc_limitations": "1;4;17;8", "wc_review": "605;337;738;414", "wc_reply_reviewers": "0;0;13;20", "wc_reply_authors": "0;0;0;0", "reply_reviewers": "0;0;1;1", "reply_authors": "1;1;1;1", "rating_avg": [ 6.25, 1.299038105676658 ], "confidence_avg": [ 3.5, 0.5 ], "soundness_avg": [ 3.25, 0.4330127018922193 ], "novelty_avg": [ 2.75, 0.4330127018922193 ], "presentation_avg": [ 3.5, 0.5 ], "wc_summary_avg": [ 110.75, 17.32591988899868 ], "wc_strengths_avg": [ 114.75, 76.53879735140865 ], "wc_weaknesses_avg": [ 172.0, 24.829418035870273 ], "wc_questions_avg": [ 118.5, 76.18562856602287 ], "wc_limitations_avg": [ 7.5, 6.020797289396148 ], "wc_review_avg": [ 523.5, 157.6586502542756 ], "wc_reply_reviewers_avg": [ 8.25, 8.613216588476108 ], "wc_reply_authors_avg": [ 0, 0 ], "reply_reviewers_avg": [ 0.5, 0.5 ], "reply_authors_avg": [ 1.0, 0.0 ], "replies_avg": [ 12, 0 ], "authors#_avg": [ 4, 0 ], "corr_rating_confidence": 0.19245008972987526, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10499569071228246819&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "email": ";redwoodresearch.org;cam.ac.uk;cam.ac.uk", "author_num": 4, "aff_unique_index": "0;1;1", "aff_unique_norm": "Redwood Research;University of Cambridge", "aff_unique_dep": ";", "aff_unique_url": "https://www.redwoodresearch.org;https://www.cam.ac.uk", "aff_unique_abbr": "Redwood Research;Cambridge", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United States;United Kingdom" } ]